From a344ac80356a082f13108428eb6127f1a1c5f63c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 18 Jun 2019 16:08:40 +0900
Subject: [PATCH 001/553] fix install for Darwin

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 6273490e..b639a0bd 100644
--- a/Makefile
+++ b/Makefile
@@ -385,8 +385,8 @@ DEPEND_FILE=$(addprefix $(OBJ_DIR)/, $(addsuffix .d,$(basename $(ALL_SRC))))
 PREFIX?=/usr/local
 install: lib/libmcl.a lib/libmcl.$(LIB_SUF)
 	$(MKDIR) $(PREFIX)/include/mcl
-	cp -a include/mcl/ $(PREFIX)/include/
-	cp -a include/cybozu/ $(PREFIX)/include/
+	cp -a include/mcl $(PREFIX)/include/
+	cp -a include/cybozu $(PREFIX)/include/
 	$(MKDIR) $(PREFIX)/lib
 	cp -a lib/libmcl.a lib/libmcl.$(LIB_SUF) $(PREFIX)/lib/
 

From 0363aee11320f26102a2f411853b257ae5ec376b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 20 Jun 2019 10:10:35 +0900
Subject: [PATCH 002/553] use Jacobi instead of Proj

---
 include/mcl/bn.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 5ebe5d95..eb22c6b7 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1100,11 +1100,11 @@ struct Param {
 		} else {
 			twist_b_type = tb_generic;
 		}
-		G1::init(0, cp.b, mcl::ec::Proj);
+		G1::init(0, cp.b, mcl::ec::Jacobi);
 		if (isBLS12) {
 			G1::setOrder(r);
 		}
-		G2::init(0, twist_b, mcl::ec::Proj);
+		G2::init(0, twist_b, mcl::ec::Jacobi);
 		G2::setOrder(r);
 
 		const mpz_class largest_c = isBLS12 ? abs_z : gmp::abs(z * 6 + 2);

From 977053396a8dce3e129e7fa3c20de7a4292fc372 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 20 Jun 2019 10:34:31 +0900
Subject: [PATCH 003/553] disable addLine/dblLine test for Proj

---
 test/bls12_test.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index cc8ddef8..5af112f9 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -431,6 +431,7 @@ const char *e1Str =
 	CYBOZU_BENCH_C("finalExp", 100, finalExp, e2, e0);
 }
 
+#if 0 // test for only Proj
 CYBOZU_TEST_AUTO(addLine)
 {
 const char *l0Str=
@@ -566,6 +567,7 @@ const char *q1Str =
 	CYBOZU_TEST_EQUAL(Q, Q1);
 	G1::setOrder(BN::param.r);
 }
+#endif
 
 CYBOZU_TEST_AUTO(mul_012)
 {

From 17d2b0c18dc501c7fc7c9ff311572e08a8699ded Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 24 Jun 2019 05:24:26 +0900
Subject: [PATCH 004/553] [java] jni for BN254(old)

---
 ffi/java/Makefile                            |  1 +
 ffi/java/com/herumi/mcl/Bn256.java           | 88 ++++++++++++++++++++
 ffi/java/com/herumi/mcl/Bn256JNI.java        | 68 +++++++++++++++
 ffi/java/com/herumi/mcl/CipherText.java      | 66 +++++++++++++++
 ffi/java/com/herumi/mcl/Elgamal.java         | 38 +++++++++
 ffi/java/com/herumi/mcl/ElgamalJNI.java      | 51 ++++++++++++
 ffi/java/com/herumi/mcl/Fr.java              | 78 +++++++++++++++++
 ffi/java/com/herumi/mcl/G1.java              | 74 ++++++++++++++++
 ffi/java/com/herumi/mcl/G2.java              | 70 ++++++++++++++++
 ffi/java/com/herumi/mcl/GT.java              | 62 ++++++++++++++
 ffi/java/com/herumi/mcl/PrivateKey.java      | 86 +++++++++++++++++++
 ffi/java/com/herumi/mcl/PublicKey.java       | 82 ++++++++++++++++++
 ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java | 26 ++++++
 13 files changed, 790 insertions(+)
 create mode 100644 ffi/java/com/herumi/mcl/Bn256.java
 create mode 100644 ffi/java/com/herumi/mcl/Bn256JNI.java
 create mode 100644 ffi/java/com/herumi/mcl/CipherText.java
 create mode 100644 ffi/java/com/herumi/mcl/Elgamal.java
 create mode 100644 ffi/java/com/herumi/mcl/ElgamalJNI.java
 create mode 100644 ffi/java/com/herumi/mcl/Fr.java
 create mode 100644 ffi/java/com/herumi/mcl/G1.java
 create mode 100644 ffi/java/com/herumi/mcl/G2.java
 create mode 100644 ffi/java/com/herumi/mcl/GT.java
 create mode 100644 ffi/java/com/herumi/mcl/PrivateKey.java
 create mode 100644 ffi/java/com/herumi/mcl/PublicKey.java
 create mode 100644 ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java

diff --git a/ffi/java/Makefile b/ffi/java/Makefile
index d69c043f..e25b7b99 100644
--- a/ffi/java/Makefile
+++ b/ffi/java/Makefile
@@ -9,6 +9,7 @@ else
   LDFLAGS+=-lrt
 endif
 CFLAGS+=$(JAVA_INC) $(JAVA_INC)/linux -I $(TOP_DIR)/include -I $(TOP_DIR)/../xbyak -I $(TOP_DIR)/../cybozulib/include -Wno-strict-aliasing
+CFLAGS+=-std=c++03
 MCL_LIB=$(TOP_DIR)/lib/libmcl.a
 
 PACKAGE_NAME=com.herumi.mcl
diff --git a/ffi/java/com/herumi/mcl/Bn256.java b/ffi/java/com/herumi/mcl/Bn256.java
new file mode 100644
index 00000000..9da1ffaf
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/Bn256.java
@@ -0,0 +1,88 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class Bn256 {
+  public static void SystemInit() {
+    Bn256JNI.SystemInit();
+  }
+
+  public static void neg(Fr y, Fr x) {
+    Bn256JNI.neg__SWIG_0(Fr.getCPtr(y), y, Fr.getCPtr(x), x);
+  }
+
+  public static void add(Fr z, Fr x, Fr y) {
+    Bn256JNI.add__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void sub(Fr z, Fr x, Fr y) {
+    Bn256JNI.sub__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void mul(Fr z, Fr x, Fr y) {
+    Bn256JNI.mul__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void mul(G1 z, G1 x, Fr y) {
+    Bn256JNI.mul__SWIG_1(G1.getCPtr(z), z, G1.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void mul(G2 z, G2 x, Fr y) {
+    Bn256JNI.mul__SWIG_2(G2.getCPtr(z), z, G2.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void div(Fr z, Fr x, Fr y) {
+    Bn256JNI.div(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void pow(GT z, GT x, Fr y) {
+    Bn256JNI.pow(GT.getCPtr(z), z, GT.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void neg(G1 y, G1 x) {
+    Bn256JNI.neg__SWIG_1(G1.getCPtr(y), y, G1.getCPtr(x), x);
+  }
+
+  public static void dbl(G1 y, G1 x) {
+    Bn256JNI.dbl__SWIG_0(G1.getCPtr(y), y, G1.getCPtr(x), x);
+  }
+
+  public static void add(G1 z, G1 x, G1 y) {
+    Bn256JNI.add__SWIG_1(G1.getCPtr(z), z, G1.getCPtr(x), x, G1.getCPtr(y), y);
+  }
+
+  public static void sub(G1 z, G1 x, G1 y) {
+    Bn256JNI.sub__SWIG_1(G1.getCPtr(z), z, G1.getCPtr(x), x, G1.getCPtr(y), y);
+  }
+
+  public static void pairing(GT e, G1 P, G2 Q) {
+    Bn256JNI.pairing(GT.getCPtr(e), e, G1.getCPtr(P), P, G2.getCPtr(Q), Q);
+  }
+
+  public static void neg(G2 y, G2 x) {
+    Bn256JNI.neg__SWIG_2(G2.getCPtr(y), y, G2.getCPtr(x), x);
+  }
+
+  public static void dbl(G2 y, G2 x) {
+    Bn256JNI.dbl__SWIG_1(G2.getCPtr(y), y, G2.getCPtr(x), x);
+  }
+
+  public static void add(G2 z, G2 x, G2 y) {
+    Bn256JNI.add__SWIG_2(G2.getCPtr(z), z, G2.getCPtr(x), x, G2.getCPtr(y), y);
+  }
+
+  public static void sub(G2 z, G2 x, G2 y) {
+    Bn256JNI.sub__SWIG_2(G2.getCPtr(z), z, G2.getCPtr(x), x, G2.getCPtr(y), y);
+  }
+
+  public static void mul(GT z, GT x, GT y) {
+    Bn256JNI.mul__SWIG_3(GT.getCPtr(z), z, GT.getCPtr(x), x, GT.getCPtr(y), y);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/Bn256JNI.java b/ffi/java/com/herumi/mcl/Bn256JNI.java
new file mode 100644
index 00000000..fa9c43b7
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/Bn256JNI.java
@@ -0,0 +1,68 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class Bn256JNI {
+  public final static native void SystemInit();
+  public final static native void neg__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_);
+  public final static native void add__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
+  public final static native void sub__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
+  public final static native void mul__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
+  public final static native void mul__SWIG_1(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, Fr jarg3_);
+  public final static native void mul__SWIG_2(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, Fr jarg3_);
+  public final static native void div(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
+  public final static native void pow(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, Fr jarg3_);
+  public final static native long new_Fr__SWIG_0();
+  public final static native long new_Fr__SWIG_1(long jarg1, Fr jarg1_);
+  public final static native long new_Fr__SWIG_2(int jarg1);
+  public final static native long new_Fr__SWIG_3(String jarg1);
+  public final static native boolean Fr_equals(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_);
+  public final static native void Fr_setStr(long jarg1, Fr jarg1_, String jarg2);
+  public final static native void Fr_setInt(long jarg1, Fr jarg1_, int jarg2);
+  public final static native void Fr_clear(long jarg1, Fr jarg1_);
+  public final static native void Fr_setRand(long jarg1, Fr jarg1_);
+  public final static native String Fr_toString(long jarg1, Fr jarg1_);
+  public final static native void delete_Fr(long jarg1);
+  public final static native void neg__SWIG_1(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
+  public final static native void dbl__SWIG_0(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
+  public final static native void add__SWIG_1(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, G1 jarg3_);
+  public final static native void sub__SWIG_1(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, G1 jarg3_);
+  public final static native void pairing(long jarg1, GT jarg1_, long jarg2, G1 jarg2_, long jarg3, G2 jarg3_);
+  public final static native long new_G1__SWIG_0();
+  public final static native long new_G1__SWIG_1(long jarg1, G1 jarg1_);
+  public final static native long new_G1__SWIG_2(String jarg1, String jarg2);
+  public final static native boolean G1_equals(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
+  public final static native void G1_set(long jarg1, G1 jarg1_, String jarg2, String jarg3);
+  public final static native void G1_hashAndMapToG1(long jarg1, G1 jarg1_, String jarg2);
+  public final static native void G1_clear(long jarg1, G1 jarg1_);
+  public final static native void G1_setStr(long jarg1, G1 jarg1_, String jarg2);
+  public final static native String G1_toString(long jarg1, G1 jarg1_);
+  public final static native void delete_G1(long jarg1);
+  public final static native void neg__SWIG_2(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
+  public final static native void dbl__SWIG_1(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
+  public final static native void add__SWIG_2(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, G2 jarg3_);
+  public final static native void sub__SWIG_2(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, G2 jarg3_);
+  public final static native long new_G2__SWIG_0();
+  public final static native long new_G2__SWIG_1(long jarg1, G2 jarg1_);
+  public final static native long new_G2__SWIG_2(String jarg1, String jarg2, String jarg3, String jarg4);
+  public final static native boolean G2_equals(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
+  public final static native void G2_set(long jarg1, G2 jarg1_, String jarg2, String jarg3, String jarg4, String jarg5);
+  public final static native void G2_clear(long jarg1, G2 jarg1_);
+  public final static native void G2_setStr(long jarg1, G2 jarg1_, String jarg2);
+  public final static native String G2_toString(long jarg1, G2 jarg1_);
+  public final static native void delete_G2(long jarg1);
+  public final static native void mul__SWIG_3(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, GT jarg3_);
+  public final static native long new_GT__SWIG_0();
+  public final static native long new_GT__SWIG_1(long jarg1, GT jarg1_);
+  public final static native boolean GT_equals(long jarg1, GT jarg1_, long jarg2, GT jarg2_);
+  public final static native void GT_clear(long jarg1, GT jarg1_);
+  public final static native void GT_setStr(long jarg1, GT jarg1_, String jarg2);
+  public final static native String GT_toString(long jarg1, GT jarg1_);
+  public final static native void delete_GT(long jarg1);
+}
diff --git a/ffi/java/com/herumi/mcl/CipherText.java b/ffi/java/com/herumi/mcl/CipherText.java
new file mode 100644
index 00000000..3437695d
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/CipherText.java
@@ -0,0 +1,66 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class CipherText {
+  private transient long swigCPtr;
+  protected transient boolean swigCMemOwn;
+
+  protected CipherText(long cPtr, boolean cMemoryOwn) {
+    swigCMemOwn = cMemoryOwn;
+    swigCPtr = cPtr;
+  }
+
+  protected static long getCPtr(CipherText obj) {
+    return (obj == null) ? 0 : obj.swigCPtr;
+  }
+
+  protected void finalize() {
+    delete();
+  }
+
+  public synchronized void delete() {
+    if (swigCPtr != 0) {
+      if (swigCMemOwn) {
+        swigCMemOwn = false;
+        ElgamalJNI.delete_CipherText(swigCPtr);
+      }
+      swigCPtr = 0;
+    }
+  }
+
+  public String toStr() {
+    return ElgamalJNI.CipherText_toStr(swigCPtr, this);
+  }
+
+  public String toString() {
+    return ElgamalJNI.CipherText_toString(swigCPtr, this);
+  }
+
+  public void fromStr(String str) {
+    ElgamalJNI.CipherText_fromStr(swigCPtr, this, str);
+  }
+
+  public void add(CipherText c) {
+    ElgamalJNI.CipherText_add(swigCPtr, this, CipherText.getCPtr(c), c);
+  }
+
+  public void mul(int m) {
+    ElgamalJNI.CipherText_mul__SWIG_0(swigCPtr, this, m);
+  }
+
+  public void mul(String str) {
+    ElgamalJNI.CipherText_mul__SWIG_1(swigCPtr, this, str);
+  }
+
+  public CipherText() {
+    this(ElgamalJNI.new_CipherText(), true);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/Elgamal.java b/ffi/java/com/herumi/mcl/Elgamal.java
new file mode 100644
index 00000000..ee9e7ec3
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/Elgamal.java
@@ -0,0 +1,38 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class Elgamal {
+  public static SWIGTYPE_p_bool new_p_bool() {
+    long cPtr = ElgamalJNI.new_p_bool();
+    return (cPtr == 0) ? null : new SWIGTYPE_p_bool(cPtr, false);
+  }
+
+  public static SWIGTYPE_p_bool copy_p_bool(boolean value) {
+    long cPtr = ElgamalJNI.copy_p_bool(value);
+    return (cPtr == 0) ? null : new SWIGTYPE_p_bool(cPtr, false);
+  }
+
+  public static void delete_p_bool(SWIGTYPE_p_bool obj) {
+    ElgamalJNI.delete_p_bool(SWIGTYPE_p_bool.getCPtr(obj));
+  }
+
+  public static void p_bool_assign(SWIGTYPE_p_bool obj, boolean value) {
+    ElgamalJNI.p_bool_assign(SWIGTYPE_p_bool.getCPtr(obj), value);
+  }
+
+  public static boolean p_bool_value(SWIGTYPE_p_bool obj) {
+    return ElgamalJNI.p_bool_value(SWIGTYPE_p_bool.getCPtr(obj));
+  }
+
+  public static void SystemInit(String param) {
+    ElgamalJNI.SystemInit(param);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/ElgamalJNI.java b/ffi/java/com/herumi/mcl/ElgamalJNI.java
new file mode 100644
index 00000000..0f9e029e
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/ElgamalJNI.java
@@ -0,0 +1,51 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class ElgamalJNI {
+  public final static native long new_p_bool();
+  public final static native long copy_p_bool(boolean jarg1);
+  public final static native void delete_p_bool(long jarg1);
+  public final static native void p_bool_assign(long jarg1, boolean jarg2);
+  public final static native boolean p_bool_value(long jarg1);
+  public final static native void SystemInit(String jarg1);
+  public final static native String CipherText_toStr(long jarg1, CipherText jarg1_);
+  public final static native String CipherText_toString(long jarg1, CipherText jarg1_);
+  public final static native void CipherText_fromStr(long jarg1, CipherText jarg1_, String jarg2);
+  public final static native void CipherText_add(long jarg1, CipherText jarg1_, long jarg2, CipherText jarg2_);
+  public final static native void CipherText_mul__SWIG_0(long jarg1, CipherText jarg1_, int jarg2);
+  public final static native void CipherText_mul__SWIG_1(long jarg1, CipherText jarg1_, String jarg2);
+  public final static native long new_CipherText();
+  public final static native void delete_CipherText(long jarg1);
+  public final static native String PublicKey_toStr(long jarg1, PublicKey jarg1_);
+  public final static native String PublicKey_toString(long jarg1, PublicKey jarg1_);
+  public final static native void PublicKey_fromStr(long jarg1, PublicKey jarg1_, String jarg2);
+  public final static native void PublicKey_save(long jarg1, PublicKey jarg1_, String jarg2);
+  public final static native void PublicKey_load(long jarg1, PublicKey jarg1_, String jarg2);
+  public final static native void PublicKey_enc__SWIG_0(long jarg1, PublicKey jarg1_, long jarg2, CipherText jarg2_, int jarg3);
+  public final static native void PublicKey_enc__SWIG_1(long jarg1, PublicKey jarg1_, long jarg2, CipherText jarg2_, String jarg3);
+  public final static native void PublicKey_rerandomize(long jarg1, PublicKey jarg1_, long jarg2, CipherText jarg2_);
+  public final static native void PublicKey_add__SWIG_0(long jarg1, PublicKey jarg1_, long jarg2, CipherText jarg2_, int jarg3);
+  public final static native void PublicKey_add__SWIG_1(long jarg1, PublicKey jarg1_, long jarg2, CipherText jarg2_, String jarg3);
+  public final static native long new_PublicKey();
+  public final static native void delete_PublicKey(long jarg1);
+  public final static native String PrivateKey_toStr(long jarg1, PrivateKey jarg1_);
+  public final static native String PrivateKey_toString(long jarg1, PrivateKey jarg1_);
+  public final static native void PrivateKey_fromStr(long jarg1, PrivateKey jarg1_, String jarg2);
+  public final static native void PrivateKey_save(long jarg1, PrivateKey jarg1_, String jarg2);
+  public final static native void PrivateKey_load(long jarg1, PrivateKey jarg1_, String jarg2);
+  public final static native void PrivateKey_init(long jarg1, PrivateKey jarg1_);
+  public final static native long PrivateKey_getPublicKey(long jarg1, PrivateKey jarg1_);
+  public final static native int PrivateKey_dec__SWIG_0(long jarg1, PrivateKey jarg1_, long jarg2, CipherText jarg2_, long jarg3);
+  public final static native int PrivateKey_dec__SWIG_1(long jarg1, PrivateKey jarg1_, long jarg2, CipherText jarg2_);
+  public final static native void PrivateKey_setCache(long jarg1, PrivateKey jarg1_, int jarg2, int jarg3);
+  public final static native void PrivateKey_clearCache(long jarg1, PrivateKey jarg1_);
+  public final static native long new_PrivateKey();
+  public final static native void delete_PrivateKey(long jarg1);
+}
diff --git a/ffi/java/com/herumi/mcl/Fr.java b/ffi/java/com/herumi/mcl/Fr.java
new file mode 100644
index 00000000..84207c03
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/Fr.java
@@ -0,0 +1,78 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class Fr {
+  private transient long swigCPtr;
+  protected transient boolean swigCMemOwn;
+
+  protected Fr(long cPtr, boolean cMemoryOwn) {
+    swigCMemOwn = cMemoryOwn;
+    swigCPtr = cPtr;
+  }
+
+  protected static long getCPtr(Fr obj) {
+    return (obj == null) ? 0 : obj.swigCPtr;
+  }
+
+  protected void finalize() {
+    delete();
+  }
+
+  public synchronized void delete() {
+    if (swigCPtr != 0) {
+      if (swigCMemOwn) {
+        swigCMemOwn = false;
+        Bn256JNI.delete_Fr(swigCPtr);
+      }
+      swigCPtr = 0;
+    }
+  }
+
+  public Fr() {
+    this(Bn256JNI.new_Fr__SWIG_0(), true);
+  }
+
+  public Fr(Fr rhs) {
+    this(Bn256JNI.new_Fr__SWIG_1(Fr.getCPtr(rhs), rhs), true);
+  }
+
+  public Fr(int x) {
+    this(Bn256JNI.new_Fr__SWIG_2(x), true);
+  }
+
+  public Fr(String str) {
+    this(Bn256JNI.new_Fr__SWIG_3(str), true);
+  }
+
+  public boolean equals(Fr rhs) {
+    return Bn256JNI.Fr_equals(swigCPtr, this, Fr.getCPtr(rhs), rhs);
+  }
+
+  public void setStr(String str) {
+    Bn256JNI.Fr_setStr(swigCPtr, this, str);
+  }
+
+  public void setInt(int x) {
+    Bn256JNI.Fr_setInt(swigCPtr, this, x);
+  }
+
+  public void clear() {
+    Bn256JNI.Fr_clear(swigCPtr, this);
+  }
+
+  public void setRand() {
+    Bn256JNI.Fr_setRand(swigCPtr, this);
+  }
+
+  public String toString() {
+    return Bn256JNI.Fr_toString(swigCPtr, this);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/G1.java b/ffi/java/com/herumi/mcl/G1.java
new file mode 100644
index 00000000..377df2ad
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/G1.java
@@ -0,0 +1,74 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class G1 {
+  private transient long swigCPtr;
+  protected transient boolean swigCMemOwn;
+
+  protected G1(long cPtr, boolean cMemoryOwn) {
+    swigCMemOwn = cMemoryOwn;
+    swigCPtr = cPtr;
+  }
+
+  protected static long getCPtr(G1 obj) {
+    return (obj == null) ? 0 : obj.swigCPtr;
+  }
+
+  protected void finalize() {
+    delete();
+  }
+
+  public synchronized void delete() {
+    if (swigCPtr != 0) {
+      if (swigCMemOwn) {
+        swigCMemOwn = false;
+        Bn256JNI.delete_G1(swigCPtr);
+      }
+      swigCPtr = 0;
+    }
+  }
+
+  public G1() {
+    this(Bn256JNI.new_G1__SWIG_0(), true);
+  }
+
+  public G1(G1 rhs) {
+    this(Bn256JNI.new_G1__SWIG_1(G1.getCPtr(rhs), rhs), true);
+  }
+
+  public G1(String x, String y) {
+    this(Bn256JNI.new_G1__SWIG_2(x, y), true);
+  }
+
+  public boolean equals(G1 rhs) {
+    return Bn256JNI.G1_equals(swigCPtr, this, G1.getCPtr(rhs), rhs);
+  }
+
+  public void set(String x, String y) {
+    Bn256JNI.G1_set(swigCPtr, this, x, y);
+  }
+
+  public void hashAndMapToG1(String m) {
+    Bn256JNI.G1_hashAndMapToG1(swigCPtr, this, m);
+  }
+
+  public void clear() {
+    Bn256JNI.G1_clear(swigCPtr, this);
+  }
+
+  public void setStr(String str) {
+    Bn256JNI.G1_setStr(swigCPtr, this, str);
+  }
+
+  public String toString() {
+    return Bn256JNI.G1_toString(swigCPtr, this);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/G2.java b/ffi/java/com/herumi/mcl/G2.java
new file mode 100644
index 00000000..3731295e
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/G2.java
@@ -0,0 +1,70 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class G2 {
+  private transient long swigCPtr;
+  protected transient boolean swigCMemOwn;
+
+  protected G2(long cPtr, boolean cMemoryOwn) {
+    swigCMemOwn = cMemoryOwn;
+    swigCPtr = cPtr;
+  }
+
+  protected static long getCPtr(G2 obj) {
+    return (obj == null) ? 0 : obj.swigCPtr;
+  }
+
+  protected void finalize() {
+    delete();
+  }
+
+  public synchronized void delete() {
+    if (swigCPtr != 0) {
+      if (swigCMemOwn) {
+        swigCMemOwn = false;
+        Bn256JNI.delete_G2(swigCPtr);
+      }
+      swigCPtr = 0;
+    }
+  }
+
+  public G2() {
+    this(Bn256JNI.new_G2__SWIG_0(), true);
+  }
+
+  public G2(G2 rhs) {
+    this(Bn256JNI.new_G2__SWIG_1(G2.getCPtr(rhs), rhs), true);
+  }
+
+  public G2(String xa, String xb, String ya, String yb) {
+    this(Bn256JNI.new_G2__SWIG_2(xa, xb, ya, yb), true);
+  }
+
+  public boolean equals(G2 rhs) {
+    return Bn256JNI.G2_equals(swigCPtr, this, G2.getCPtr(rhs), rhs);
+  }
+
+  public void set(String xa, String xb, String ya, String yb) {
+    Bn256JNI.G2_set(swigCPtr, this, xa, xb, ya, yb);
+  }
+
+  public void clear() {
+    Bn256JNI.G2_clear(swigCPtr, this);
+  }
+
+  public void setStr(String str) {
+    Bn256JNI.G2_setStr(swigCPtr, this, str);
+  }
+
+  public String toString() {
+    return Bn256JNI.G2_toString(swigCPtr, this);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/GT.java b/ffi/java/com/herumi/mcl/GT.java
new file mode 100644
index 00000000..91531f3e
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/GT.java
@@ -0,0 +1,62 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class GT {
+  private transient long swigCPtr;
+  protected transient boolean swigCMemOwn;
+
+  protected GT(long cPtr, boolean cMemoryOwn) {
+    swigCMemOwn = cMemoryOwn;
+    swigCPtr = cPtr;
+  }
+
+  protected static long getCPtr(GT obj) {
+    return (obj == null) ? 0 : obj.swigCPtr;
+  }
+
+  protected void finalize() {
+    delete();
+  }
+
+  public synchronized void delete() {
+    if (swigCPtr != 0) {
+      if (swigCMemOwn) {
+        swigCMemOwn = false;
+        Bn256JNI.delete_GT(swigCPtr);
+      }
+      swigCPtr = 0;
+    }
+  }
+
+  public GT() {
+    this(Bn256JNI.new_GT__SWIG_0(), true);
+  }
+
+  public GT(GT rhs) {
+    this(Bn256JNI.new_GT__SWIG_1(GT.getCPtr(rhs), rhs), true);
+  }
+
+  public boolean equals(GT rhs) {
+    return Bn256JNI.GT_equals(swigCPtr, this, GT.getCPtr(rhs), rhs);
+  }
+
+  public void clear() {
+    Bn256JNI.GT_clear(swigCPtr, this);
+  }
+
+  public void setStr(String str) {
+    Bn256JNI.GT_setStr(swigCPtr, this, str);
+  }
+
+  public String toString() {
+    return Bn256JNI.GT_toString(swigCPtr, this);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/PrivateKey.java b/ffi/java/com/herumi/mcl/PrivateKey.java
new file mode 100644
index 00000000..01487e0b
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/PrivateKey.java
@@ -0,0 +1,86 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class PrivateKey {
+  private transient long swigCPtr;
+  protected transient boolean swigCMemOwn;
+
+  protected PrivateKey(long cPtr, boolean cMemoryOwn) {
+    swigCMemOwn = cMemoryOwn;
+    swigCPtr = cPtr;
+  }
+
+  protected static long getCPtr(PrivateKey obj) {
+    return (obj == null) ? 0 : obj.swigCPtr;
+  }
+
+  protected void finalize() {
+    delete();
+  }
+
+  public synchronized void delete() {
+    if (swigCPtr != 0) {
+      if (swigCMemOwn) {
+        swigCMemOwn = false;
+        ElgamalJNI.delete_PrivateKey(swigCPtr);
+      }
+      swigCPtr = 0;
+    }
+  }
+
+  public String toStr() {
+    return ElgamalJNI.PrivateKey_toStr(swigCPtr, this);
+  }
+
+  public String toString() {
+    return ElgamalJNI.PrivateKey_toString(swigCPtr, this);
+  }
+
+  public void fromStr(String str) {
+    ElgamalJNI.PrivateKey_fromStr(swigCPtr, this, str);
+  }
+
+  public void save(String fileName) {
+    ElgamalJNI.PrivateKey_save(swigCPtr, this, fileName);
+  }
+
+  public void load(String fileName) {
+    ElgamalJNI.PrivateKey_load(swigCPtr, this, fileName);
+  }
+
+  public void init() {
+    ElgamalJNI.PrivateKey_init(swigCPtr, this);
+  }
+
+  public PublicKey getPublicKey() {
+    return new PublicKey(ElgamalJNI.PrivateKey_getPublicKey(swigCPtr, this), true);
+  }
+
+  public int dec(CipherText c, SWIGTYPE_p_bool b) {
+    return ElgamalJNI.PrivateKey_dec__SWIG_0(swigCPtr, this, CipherText.getCPtr(c), c, SWIGTYPE_p_bool.getCPtr(b));
+  }
+
+  public int dec(CipherText c) {
+    return ElgamalJNI.PrivateKey_dec__SWIG_1(swigCPtr, this, CipherText.getCPtr(c), c);
+  }
+
+  public void setCache(int rangeMin, int rangeMax) {
+    ElgamalJNI.PrivateKey_setCache(swigCPtr, this, rangeMin, rangeMax);
+  }
+
+  public void clearCache() {
+    ElgamalJNI.PrivateKey_clearCache(swigCPtr, this);
+  }
+
+  public PrivateKey() {
+    this(ElgamalJNI.new_PrivateKey(), true);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/PublicKey.java b/ffi/java/com/herumi/mcl/PublicKey.java
new file mode 100644
index 00000000..f114666f
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/PublicKey.java
@@ -0,0 +1,82 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class PublicKey {
+  private transient long swigCPtr;
+  protected transient boolean swigCMemOwn;
+
+  protected PublicKey(long cPtr, boolean cMemoryOwn) {
+    swigCMemOwn = cMemoryOwn;
+    swigCPtr = cPtr;
+  }
+
+  protected static long getCPtr(PublicKey obj) {
+    return (obj == null) ? 0 : obj.swigCPtr;
+  }
+
+  protected void finalize() {
+    delete();
+  }
+
+  public synchronized void delete() {
+    if (swigCPtr != 0) {
+      if (swigCMemOwn) {
+        swigCMemOwn = false;
+        ElgamalJNI.delete_PublicKey(swigCPtr);
+      }
+      swigCPtr = 0;
+    }
+  }
+
+  public String toStr() {
+    return ElgamalJNI.PublicKey_toStr(swigCPtr, this);
+  }
+
+  public String toString() {
+    return ElgamalJNI.PublicKey_toString(swigCPtr, this);
+  }
+
+  public void fromStr(String str) {
+    ElgamalJNI.PublicKey_fromStr(swigCPtr, this, str);
+  }
+
+  public void save(String fileName) {
+    ElgamalJNI.PublicKey_save(swigCPtr, this, fileName);
+  }
+
+  public void load(String fileName) {
+    ElgamalJNI.PublicKey_load(swigCPtr, this, fileName);
+  }
+
+  public void enc(CipherText c, int m) {
+    ElgamalJNI.PublicKey_enc__SWIG_0(swigCPtr, this, CipherText.getCPtr(c), c, m);
+  }
+
+  public void enc(CipherText c, String str) {
+    ElgamalJNI.PublicKey_enc__SWIG_1(swigCPtr, this, CipherText.getCPtr(c), c, str);
+  }
+
+  public void rerandomize(CipherText c) {
+    ElgamalJNI.PublicKey_rerandomize(swigCPtr, this, CipherText.getCPtr(c), c);
+  }
+
+  public void add(CipherText c, int m) {
+    ElgamalJNI.PublicKey_add__SWIG_0(swigCPtr, this, CipherText.getCPtr(c), c, m);
+  }
+
+  public void add(CipherText c, String str) {
+    ElgamalJNI.PublicKey_add__SWIG_1(swigCPtr, this, CipherText.getCPtr(c), c, str);
+  }
+
+  public PublicKey() {
+    this(ElgamalJNI.new_PublicKey(), true);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java b/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java
new file mode 100644
index 00000000..4ca620d2
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java
@@ -0,0 +1,26 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class SWIGTYPE_p_bool {
+  private transient long swigCPtr;
+
+  protected SWIGTYPE_p_bool(long cPtr, @SuppressWarnings("unused") boolean futureUse) {
+    swigCPtr = cPtr;
+  }
+
+  protected SWIGTYPE_p_bool() {
+    swigCPtr = 0;
+  }
+
+  protected static long getCPtr(SWIGTYPE_p_bool obj) {
+    return (obj == null) ? 0 : obj.swigCPtr;
+  }
+}
+

From 24be4c8de127a4d831ec8742e6d838ac71aa7e2e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 3 Jul 2019 14:38:50 +0900
Subject: [PATCH 005/553] [java] change Bn256 to Mcl

---
 ffi/java/ElgamalTest.java                 |    2 +-
 ffi/java/Makefile                         |   26 +-
 ffi/java/{Bn256Test.java => MclTest.java} |   76 +-
 ffi/java/bn256.i                          |   31 -
 ffi/java/bn256_impl.hpp                   |  249 ---
 ffi/java/bn256_wrap.cxx                   | 1542 -------------
 ffi/java/com/herumi/mcl/Bn256.java        |   88 -
 ffi/java/com/herumi/mcl/Bn256JNI.java     |   68 -
 ffi/java/com/herumi/mcl/Fp.java           |   94 +
 ffi/java/com/herumi/mcl/Fr.java           |   40 +-
 ffi/java/com/herumi/mcl/G1.java           |   36 +-
 ffi/java/com/herumi/mcl/G2.java           |   34 +-
 ffi/java/com/herumi/mcl/GT.java           |   22 +-
 ffi/java/com/herumi/mcl/Mcl.java          |  128 ++
 ffi/java/com/herumi/mcl/MclConstants.java |   14 +
 ffi/java/com/herumi/mcl/MclJNI.java       |  104 +
 ffi/java/java.md                          |    4 +-
 ffi/java/mcl.i                            |   28 +
 ffi/java/mcl_impl.hpp                     |  353 +++
 ffi/java/mcl_wrap.cxx                     | 2432 +++++++++++++++++++++
 ffi/java/{run-bn256.bat => run-mcl.bat}   |    0
 21 files changed, 3304 insertions(+), 2067 deletions(-)
 rename ffi/java/{Bn256Test.java => MclTest.java} (54%)
 delete mode 100644 ffi/java/bn256.i
 delete mode 100644 ffi/java/bn256_impl.hpp
 delete mode 100644 ffi/java/bn256_wrap.cxx
 delete mode 100644 ffi/java/com/herumi/mcl/Bn256.java
 delete mode 100644 ffi/java/com/herumi/mcl/Bn256JNI.java
 create mode 100644 ffi/java/com/herumi/mcl/Fp.java
 create mode 100644 ffi/java/com/herumi/mcl/Mcl.java
 create mode 100644 ffi/java/com/herumi/mcl/MclConstants.java
 create mode 100644 ffi/java/com/herumi/mcl/MclJNI.java
 create mode 100644 ffi/java/mcl.i
 create mode 100644 ffi/java/mcl_impl.hpp
 create mode 100644 ffi/java/mcl_wrap.cxx
 rename ffi/java/{run-bn256.bat => run-mcl.bat} (100%)

diff --git a/ffi/java/ElgamalTest.java b/ffi/java/ElgamalTest.java
index 0cf49e14..d9c6ade2 100644
--- a/ffi/java/ElgamalTest.java
+++ b/ffi/java/ElgamalTest.java
@@ -8,7 +8,7 @@
 */
 public class ElgamalTest {
 	static {
-		String lib = "mcl_elgamal";
+		String lib = "mclelgamaljava";
 		String libName = System.mapLibraryName(lib);
 		System.out.println("libName : " + libName);
 		System.loadLibrary(lib);
diff --git a/ffi/java/Makefile b/ffi/java/Makefile
index e25b7b99..9d2c656f 100644
--- a/ffi/java/Makefile
+++ b/ffi/java/Makefile
@@ -15,33 +15,33 @@ MCL_LIB=$(TOP_DIR)/lib/libmcl.a
 PACKAGE_NAME=com.herumi.mcl
 PACKAGE_DIR=$(subst .,/,$(PACKAGE_NAME))
 
-ELGAMAL_LIB=$(TOP_DIR)/bin/libmcl_elgamal.$(LIB_SUF)
-BN256_LIB=$(TOP_DIR)/bin/libmcl_bn256.$(LIB_SUF)
-JAVA_EXE=cd $(TOP_DIR)/bin && LD_LIBRARY_PATH=./:$(LD_LIBRARY_PATH) java -classpath ../ffi/java
+ELGAMAL_LIB=$(TOP_DIR)/lib/libmclelgamaljava.$(LIB_SUF)
+MCLJAVA_LIB=$(TOP_DIR)/lib/libmcljava.$(LIB_SUF)
+JAVA_EXE=cd $(TOP_DIR)/lib && LD_LIBRARY_PATH=../lib:$(LD_LIBRARY_PATH) java -classpath ../ffi/java
 all: $(ELGAMAL_LIB)
 
 elgamal_wrap.cxx: elgamal.i elgamal_impl.hpp
 	$(MKDIR) $(PACKAGE_DIR)
 	swig -java -package $(PACKAGE_NAME) -outdir $(PACKAGE_DIR) -c++ -Wall elgamal.i
 
-bn256_wrap.cxx: bn256.i bn256_impl.hpp
+mcl_wrap.cxx: mcl.i mcl_impl.hpp
 	$(MKDIR) $(PACKAGE_DIR)
-	swig -java -package $(PACKAGE_NAME) -outdir $(PACKAGE_DIR) -c++ -Wall bn256.i
+	swig -java -package $(PACKAGE_NAME) -outdir $(PACKAGE_DIR) -c++ -Wall mcl.i
 
 $(MCL_LIB):
 	make -C $(TOP_DIR)
 
 $(ELGAMAL_LIB): elgamal_wrap.cxx $(MCL_LIB)
-	$(PRE)$(CXX) $< -o $@ $(CFLAGS) $(LDFLAGS) $(MCL_LIB) -shared
+	$(PRE)$(CXX) $< -o $@ $(CFLAGS) $(MCL_LIB) $(LDFLAGS) -shared
 
-$(BN256_LIB): bn256_wrap.cxx $(MCL_LIB)
-	$(PRE)$(CXX) $< -o $@ $(CFLAGS) $(LDFLAGS) $(MCL_LIB) -shared
+$(MCLJAVA_LIB): mcl_wrap.cxx $(MCL_LIB)
+	$(PRE)$(CXX) $< -o $@ $(CFLAGS) $(MCL_LIB) $(LDFLAGS) -shared
 
 %.class: %.java
 	javac $<
 
 ElgamalTest.class: ElgamalTest.java $(ELGAMAL_LIB)
-Bn256Test.class: Bn256Test.java $(BN256_LIB)
+MclTest.class: MclTest.java $(MCLJAVA_LIB)
 
 jar:
 	jar cvf mcl.jar com
@@ -53,13 +53,13 @@ test_elgamal: ElgamalTest.class $(ELGAMAL_LIB)
 	$(JAVA_EXE) ElgamalTest -e NIST_P384 -h sha384
 	$(JAVA_EXE) ElgamalTest -e NIST_P521 -h sha512
 
-test_bn256: Bn256Test.class $(BN256_LIB)
-	$(JAVA_EXE) Bn256Test
+test_mcl: MclTest.class $(MCLJAVA_LIB)
+	$(JAVA_EXE) MclTest
 
 test:
 	$(MAKE) test_elgamal
-	$(MAKE) test_bn256
+	$(MAKE) test_mcl
 
 clean:
-	rm -rf *.class $(ELGAMAL_LIB) $(PACKAGE_DIR)/*.class *_wrap.cxx
+	rm -rf *.class $(ELGAMAL_LIB) $(MCLJAVA_LIB) $(PACKAGE_DIR)/*.class *_wrap.cxx
 
diff --git a/ffi/java/Bn256Test.java b/ffi/java/MclTest.java
similarity index 54%
rename from ffi/java/Bn256Test.java
rename to ffi/java/MclTest.java
index b1f9f6f3..d8ed0fb5 100644
--- a/ffi/java/Bn256Test.java
+++ b/ffi/java/MclTest.java
@@ -2,11 +2,11 @@
 import com.herumi.mcl.*;
 
 /*
-	Bn256Test
+	MclTest
 */
-public class Bn256Test {
+public class MclTest {
 	static {
-		String lib = "mcl_bn256";
+		String lib = "mcljava";
 		String libName = System.mapLibraryName(lib);
 		System.out.println("libName : " + libName);
 		System.loadLibrary(lib);
@@ -25,34 +25,47 @@ public static void assertBool(String msg, boolean b) {
 			System.out.println("NG : " + msg);
 		}
 	}
-	public static void main(String argv[]) {
+	public static void testCurve(int curveType, String name) {
 		try {
-			Bn256.SystemInit();
+			System.out.println("curve=" + name);
+			Mcl.SystemInit(curveType);
 			Fr x = new Fr(5);
 			Fr y = new Fr(-2);
 			Fr z = new Fr(5);
 			assertBool("x != y", !x.equals(y));
 			assertBool("x == z", x.equals(z));
 			assertEquals("x == 5", x.toString(), "5");
-			Bn256.add(x, x, y);
+			Mcl.add(x, x, y);
 			assertEquals("x == 3", x.toString(), "3");
-			Bn256.mul(x, x, x);
+			Mcl.mul(x, x, x);
 			assertEquals("x == 9", x.toString(), "9");
+			assertEquals("x == 12", (new Fr("12")).toString(), "12");
+			assertEquals("x == 18", (new Fr("12", 16)).toString(), "18");
+			assertEquals("x == ff", (new Fr("255")).toString(16), "ff");
+
+/*
+			{
+				byte[] b = x.serialize();
+				Fr t = new Fr();
+				t.deserialize(b);
+				assertEquals("serialize", x, t);
+			}
+*/
 			G1 P = new G1();
 			System.out.println("P=" + P);
-			P.set("-1", "1");
+			Mcl.hashAndMapToG1(P, "test".getBytes());
 			System.out.println("P=" + P);
-			Bn256.neg(P, P);
+			byte[] buf = { 1, 2, 3, 4 };
+			Mcl.hashAndMapToG1(P, buf);
+			System.out.println("P=" + P);
+			Mcl.neg(P, P);
 			System.out.println("P=" + P);
 
-			String xa = "12723517038133731887338407189719511622662176727675373276651903807414909099441";
-			String xb = "4168783608814932154536427934509895782246573715297911553964171371032945126671";
-			String ya = "13891744915211034074451795021214165905772212241412891944830863846330766296736";
-			String yb = "7937318970632701341203597196594272556916396164729705624521405069090520231616";
-
-			G2 Q = new G2(xa, xb, ya, yb);
+			G2 Q = new G2();
+			Mcl.hashAndMapToG2(Q, "abc".getBytes());
+			System.out.println("Q=" + Q);
 
-			P.hashAndMapToG1("This is a pen");
+			Mcl.hashAndMapToG1(P, "This is a pen".getBytes());
 			{
 				String s = P.toString();
 				G1 P1 = new G1();
@@ -61,19 +74,20 @@ public static void main(String argv[]) {
 			}
 
 			GT e = new GT();
-			Bn256.pairing(e, P, Q);
+			Mcl.pairing(e, P, Q);
 			GT e1 = new GT();
 			GT e2 = new GT();
 			Fr c = new Fr("1234567890123234928348230428394234");
+			System.out.println("c=" + c);
 			G2 cQ = new G2(Q);
-			Bn256.mul(cQ, Q, c); // cQ = Q * c
-			Bn256.pairing(e1, P, cQ);
-			Bn256.pow(e2, e, c); // e2 = e^c
+			Mcl.mul(cQ, Q, c); // cQ = Q * c
+			Mcl.pairing(e1, P, cQ);
+			Mcl.pow(e2, e, c); // e2 = e^c
 			assertBool("e1 == e2", e1.equals(e2));
 
 			G1 cP = new G1(P);
-			Bn256.mul(cP, P, c); // cP = P * c
-			Bn256.pairing(e1, cP, Q);
+			Mcl.mul(cP, P, c); // cP = P * c
+			Mcl.pairing(e1, cP, Q);
 			assertBool("e1 == e2", e1.equals(e2));
 
 			BLSsignature(Q);
@@ -84,21 +98,25 @@ public static void main(String argv[]) {
 	public static void BLSsignature(G2 Q)
 	{
 		Fr s = new Fr();
-		s.setRand(); // secret key
+		s.setByCSPRNG(); // secret key
 		System.out.println("secret key " + s);
 		G2 pub = new G2();
-		Bn256.mul(pub, Q, s); // public key = sQ
+		Mcl.mul(pub, Q, s); // public key = sQ
 
-		String m = "signature test";
+		byte[] m = "signature test".getBytes();
 		G1 H = new G1();
-		H.hashAndMapToG1(m); // H = Hash(m)
+		Mcl.hashAndMapToG1(H, m); // H = Hash(m)
 		G1 sign = new G1();
-		Bn256.mul(sign, H, s); // signature of m = s H
+		Mcl.mul(sign, H, s); // signature of m = s H
 
 		GT e1 = new GT();
 		GT e2 = new GT();
-		Bn256.pairing(e1, H, pub); // e1 = e(H, s Q)
-		Bn256.pairing(e2, sign, Q); // e2 = e(s H, Q);
+		Mcl.pairing(e1, H, pub); // e1 = e(H, s Q)
+		Mcl.pairing(e2, sign, Q); // e2 = e(s H, Q);
 		assertBool("verify signature", e1.equals(e2));
 	}
+	public static void main(String argv[]) {
+		testCurve(Mcl.BN254, "BN254");
+		testCurve(Mcl.BLS12_381, "BLS12_381");
+	}
 }
diff --git a/ffi/java/bn256.i b/ffi/java/bn256.i
deleted file mode 100644
index 94a8edb7..00000000
--- a/ffi/java/bn256.i
+++ /dev/null
@@ -1,31 +0,0 @@
-%module Bn256
-
-%include "std_string.i"
-%include "std_except.i"
-
-
-%{
-#include <cybozu/random_generator.hpp>
-#include <cybozu/crypto.hpp>
-#include <mcl/bn256.hpp>
-struct Param {
-    cybozu::RandomGenerator rg;
-    static inline Param& getParam()
-	{
-		static Param p;
-	    return p;
-	}
-};
-
-static void HashAndMapToG1(mcl::bn256::G1& P, const std::string& m)
-{
-	std::string digest = cybozu::crypto::Hash::digest(cybozu::crypto::Hash::N_SHA256, m);
-	mcl::bn256::Fp t;
-	t.setArrayMask(digest.c_str(), digest.size());
-	mcl::bn256::BN::param.mapTo.calcG1(P, t);
-}
-
-#include "bn256_impl.hpp"
-%}
-
-%include "bn256_impl.hpp"
diff --git a/ffi/java/bn256_impl.hpp b/ffi/java/bn256_impl.hpp
deleted file mode 100644
index c4caaf3c..00000000
--- a/ffi/java/bn256_impl.hpp
+++ /dev/null
@@ -1,249 +0,0 @@
-#include <mcl/bn256.hpp>
-#include <stdint.h>
-#include <sstream>
-
-void SystemInit() throw(std::exception)
-{
-	mcl::bn256::initPairing();
-}
-
-class G1;
-class G2;
-class GT;
-/*
-	Fr = Z / rZ
-*/
-class Fr {
-	mcl::bn256::Fr self_;
-	friend class G1;
-	friend class G2;
-	friend class GT;
-	friend void neg(Fr& y, const Fr& x);
-	friend void add(Fr& z, const Fr& x, const Fr& y);
-	friend void sub(Fr& z, const Fr& x, const Fr& y);
-	friend void mul(Fr& z, const Fr& x, const Fr& y);
-	friend void mul(G1& z, const G1& x, const Fr& y);
-	friend void mul(G2& z, const G2& x, const Fr& y);
-	friend void div(Fr& z, const Fr& x, const Fr& y);
-	friend void pow(GT& z, const GT& x, const Fr& y);
-public:
-	Fr() {}
-	Fr(const Fr& rhs) : self_(rhs.self_) {}
-	Fr(int x) : self_(x) {}
-	Fr(const std::string& str) throw(std::exception)
-		: self_(str) {}
-	bool equals(const Fr& rhs) const { return self_ == rhs.self_; }
-	void setStr(const  std::string& str) throw(std::exception)
-	{
-		self_.setStr(str);
-	}
-	void setInt(int x)
-	{
-		self_ = x;
-	}
-	void clear()
-	{
-		self_.clear();
-	}
-	void setRand()
-	{
-		self_.setRand(Param::getParam().rg);
-	}
-	std::string toString() const throw(std::exception)
-	{
-		return self_.getStr();
-	}
-};
-
-void neg(Fr& y, const Fr& x)
-{
-	mcl::bn256::Fr::neg(y.self_, x.self_);
-}
-
-void add(Fr& z, const Fr& x, const Fr& y)
-{
-	mcl::bn256::Fr::add(z.self_, x.self_, y.self_);
-}
-
-void sub(Fr& z, const Fr& x, const Fr& y)
-{
-	mcl::bn256::Fr::sub(z.self_, x.self_, y.self_);
-}
-
-void mul(Fr& z, const Fr& x, const Fr& y)
-{
-	mcl::bn256::Fr::mul(z.self_, x.self_, y.self_);
-}
-
-void div(Fr& z, const Fr& x, const Fr& y)
-{
-	mcl::bn256::Fr::div(z.self_, x.self_, y.self_);
-}
-
-/*
-	#G1 = r
-*/
-class G1 {
-	mcl::bn256::G1 self_;
-	friend void neg(G1& y, const G1& x);
-	friend void dbl(G1& y, const G1& x);
-	friend void add(G1& z, const G1& x, const G1& y);
-	friend void sub(G1& z, const G1& x, const G1& y);
-	friend void mul(G1& z, const G1& x, const Fr& y);
-	friend void pairing(GT& e, const G1& P, const G2& Q);
-public:
-	G1() {}
-	G1(const G1& rhs) : self_(rhs.self_) {}
-	G1(const std::string& x, const std::string& y) throw(std::exception)
-		: self_(mcl::bn256::Fp(x), mcl::bn256::Fp(y))
-	{
-	}
-	bool equals(const G1& rhs) const { return self_ == rhs.self_; }
-	void set(const std::string& x, const std::string& y)
-	{
-		self_.set(mcl::bn256::Fp(x), mcl::bn256::Fp(y));
-	}
-	void hashAndMapToG1(const std::string& m) throw(std::exception)
-	{
-		HashAndMapToG1(self_, m);
-	}
-	void clear()
-	{
-		self_.clear();
-	}
-	/*
-		compressed format
-	*/
-	void setStr(const std::string& str) throw(std::exception)
-	{
-		self_.setStr(str);
-	}
-	std::string toString() const throw(std::exception)
-	{
-		return self_.getStr();
-	}
-};
-
-void neg(G1& y, const G1& x)
-{
-	mcl::bn256::G1::neg(y.self_, x.self_);
-}
-void dbl(G1& y, const G1& x)
-{
-	mcl::bn256::G1::dbl(y.self_, x.self_);
-}
-void add(G1& z, const G1& x, const G1& y)
-{
-	mcl::bn256::G1::add(z.self_, x.self_, y.self_);
-}
-void sub(G1& z, const G1& x, const G1& y)
-{
-	mcl::bn256::G1::sub(z.self_, x.self_, y.self_);
-}
-void mul(G1& z, const G1& x, const Fr& y)
-{
-	mcl::bn256::G1::mul(z.self_, x.self_, y.self_);
-}
-
-/*
-	#G2 = r
-*/
-class G2 {
-	mcl::bn256::G2 self_;
-	friend void neg(G2& y, const G2& x);
-	friend void dbl(G2& y, const G2& x);
-	friend void add(G2& z, const G2& x, const G2& y);
-	friend void sub(G2& z, const G2& x, const G2& y);
-	friend void mul(G2& z, const G2& x, const Fr& y);
-	friend void pairing(GT& e, const G1& P, const G2& Q);
-public:
-	G2() {}
-	G2(const G2& rhs) : self_(rhs.self_) {}
-	G2(const std::string& xa, const std::string& xb, const std::string& ya, const std::string& yb) throw(std::exception)
-		: self_(mcl::bn256::Fp2(xa, xb), mcl::bn256::Fp2(ya, yb))
-	{
-	}
-	bool equals(const G2& rhs) const { return self_ == rhs.self_; }
-	void set(const std::string& xa, const std::string& xb, const std::string& ya, const std::string& yb)
-	{
-		self_.set(mcl::bn256::Fp2(xa, xb), mcl::bn256::Fp2(ya, yb));
-	}
-	void clear()
-	{
-		self_.clear();
-	}
-	/*
-		compressed format
-	*/
-	void setStr(const std::string& str) throw(std::exception)
-	{
-		self_.setStr(str);
-	}
-	std::string toString() const throw(std::exception)
-	{
-		return self_.getStr();
-	}
-};
-
-void neg(G2& y, const G2& x)
-{
-	mcl::bn256::G2::neg(y.self_, x.self_);
-}
-void dbl(G2& y, const G2& x)
-{
-	mcl::bn256::G2::dbl(y.self_, x.self_);
-}
-void add(G2& z, const G2& x, const G2& y)
-{
-	mcl::bn256::G2::add(z.self_, x.self_, y.self_);
-}
-void sub(G2& z, const G2& x, const G2& y)
-{
-	mcl::bn256::G2::sub(z.self_, x.self_, y.self_);
-}
-void mul(G2& z, const G2& x, const Fr& y)
-{
-	mcl::bn256::G2::mul(z.self_, x.self_, y.self_);
-}
-
-/*
-	#GT = r
-*/
-class GT {
-	mcl::bn256::Fp12 self_;
-	friend void mul(GT& z, const GT& x, const GT& y);
-	friend void pow(GT& z, const GT& x, const Fr& y);
-	friend void pairing(GT& e, const G1& P, const G2& Q);
-public:
-	GT() {}
-	GT(const GT& rhs) : self_(rhs.self_) {}
-	bool equals(const GT& rhs) const { return self_ == rhs.self_; }
-	void clear()
-	{
-		self_.clear();
-	}
-	void setStr(const std::string& str) throw(std::exception)
-	{
-		std::istringstream iss(str);
-		iss >> self_;
-	}
-	std::string toString() const throw(std::exception)
-	{
-		std::ostringstream oss;
-		oss << self_;
-		return oss.str();
-	}
-};
-
-void mul(GT& z, const GT& x, const GT& y)
-{
-	mcl::bn256::Fp12::mul(z.self_, x.self_, y.self_);
-}
-void pow(GT& z, const GT& x, const Fr& y)
-{
-	mcl::bn256::Fp12::pow(z.self_, x.self_, y.self_);
-}
-void pairing(GT& e, const G1& P, const G2& Q)
-{
-	mcl::bn256::pairing(e.self_, P.self_, Q.self_);
-}
diff --git a/ffi/java/bn256_wrap.cxx b/ffi/java/bn256_wrap.cxx
deleted file mode 100644
index 0c8257af..00000000
--- a/ffi/java/bn256_wrap.cxx
+++ /dev/null
@@ -1,1542 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
- *
- * This file is not intended to be easily readable and contains a number of
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG
- * interface file instead.
- * ----------------------------------------------------------------------------- */
-
-
-#ifndef SWIGJAVA
-#define SWIGJAVA
-#endif
-
-
-
-#ifdef __cplusplus
-/* SwigValueWrapper is described in swig.swg */
-template<typename T> class SwigValueWrapper {
-  struct SwigMovePointer {
-    T *ptr;
-    SwigMovePointer(T *p) : ptr(p) { }
-    ~SwigMovePointer() { delete ptr; }
-    SwigMovePointer& operator=(SwigMovePointer& rhs) { T* oldptr = ptr; ptr = 0; delete oldptr; ptr = rhs.ptr; rhs.ptr = 0; return *this; }
-  } pointer;
-  SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
-  SwigValueWrapper(const SwigValueWrapper<T>& rhs);
-public:
-  SwigValueWrapper() : pointer(0) { }
-  SwigValueWrapper& operator=(const T& t) { SwigMovePointer tmp(new T(t)); pointer = tmp; return *this; }
-  operator T&() const { return *pointer.ptr; }
-  T *operator&() { return pointer.ptr; }
-};
-
-template <typename T> T SwigValueInit() {
-  return T();
-}
-#endif
-
-/* -----------------------------------------------------------------------------
- *  This section contains generic SWIG labels for method/variable
- *  declarations/attributes, and other compiler dependent labels.
- * ----------------------------------------------------------------------------- */
-
-/* template workaround for compilers that cannot correctly implement the C++ standard */
-#ifndef SWIGTEMPLATEDISAMBIGUATOR
-# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# elif defined(__HP_aCC)
-/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
-/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# else
-#  define SWIGTEMPLATEDISAMBIGUATOR
-# endif
-#endif
-
-/* inline attribute */
-#ifndef SWIGINLINE
-# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
-#   define SWIGINLINE inline
-# else
-#   define SWIGINLINE
-# endif
-#endif
-
-/* attribute recognised by some compilers to avoid 'unused' warnings */
-#ifndef SWIGUNUSED
-# if defined(__GNUC__)
-#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#     define SWIGUNUSED __attribute__ ((__unused__))
-#   else
-#     define SWIGUNUSED
-#   endif
-# elif defined(__ICC)
-#   define SWIGUNUSED __attribute__ ((__unused__))
-# else
-#   define SWIGUNUSED
-# endif
-#endif
-
-#ifndef SWIG_MSC_UNSUPPRESS_4505
-# if defined(_MSC_VER)
-#   pragma warning(disable : 4505) /* unreferenced local function has been removed */
-# endif
-#endif
-
-#ifndef SWIGUNUSEDPARM
-# ifdef __cplusplus
-#   define SWIGUNUSEDPARM(p)
-# else
-#   define SWIGUNUSEDPARM(p) p SWIGUNUSED
-# endif
-#endif
-
-/* internal SWIG method */
-#ifndef SWIGINTERN
-# define SWIGINTERN static SWIGUNUSED
-#endif
-
-/* internal inline SWIG method */
-#ifndef SWIGINTERNINLINE
-# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
-#endif
-
-/* exporting methods */
-#if defined(__GNUC__)
-#  if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#    ifndef GCC_HASCLASSVISIBILITY
-#      define GCC_HASCLASSVISIBILITY
-#    endif
-#  endif
-#endif
-
-#ifndef SWIGEXPORT
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   if defined(STATIC_LINKED)
-#     define SWIGEXPORT
-#   else
-#     define SWIGEXPORT __declspec(dllexport)
-#   endif
-# else
-#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
-#     define SWIGEXPORT __attribute__ ((visibility("default")))
-#   else
-#     define SWIGEXPORT
-#   endif
-# endif
-#endif
-
-/* calling conventions for Windows */
-#ifndef SWIGSTDCALL
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   define SWIGSTDCALL __stdcall
-# else
-#   define SWIGSTDCALL
-# endif
-#endif
-
-/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
-#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
-# define _CRT_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
-#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
-# define _SCL_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Apple's deprecated 'AssertMacros.h' from Carbon-framework */
-#if defined(__APPLE__) && !defined(__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES)
-# define __ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES 0
-#endif
-
-/* Intel's compiler complains if a variable which was never initialised is
- * cast to void, which is a common idiom which we use to indicate that we
- * are aware a variable isn't used.  So we just silence that warning.
- * See: https://github.com/swig/swig/issues/192 for more discussion.
- */
-#ifdef __INTEL_COMPILER
-# pragma warning disable 592
-#endif
-
-
-/* Fix for jlong on some versions of gcc on Windows */
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-  typedef long long __int64;
-#endif
-
-/* Fix for jlong on 64-bit x86 Solaris */
-#if defined(__x86_64)
-# ifdef _LP64
-#   undef _LP64
-# endif
-#endif
-
-#include <jni.h>
-#include <stdlib.h>
-#include <string.h>
-
-
-/* Support for throwing Java exceptions */
-typedef enum {
-  SWIG_JavaOutOfMemoryError = 1, 
-  SWIG_JavaIOException, 
-  SWIG_JavaRuntimeException, 
-  SWIG_JavaIndexOutOfBoundsException,
-  SWIG_JavaArithmeticException,
-  SWIG_JavaIllegalArgumentException,
-  SWIG_JavaNullPointerException,
-  SWIG_JavaDirectorPureVirtual,
-  SWIG_JavaUnknownError
-} SWIG_JavaExceptionCodes;
-
-typedef struct {
-  SWIG_JavaExceptionCodes code;
-  const char *java_exception;
-} SWIG_JavaExceptions_t;
-
-
-static void SWIGUNUSED SWIG_JavaThrowException(JNIEnv *jenv, SWIG_JavaExceptionCodes code, const char *msg) {
-  jclass excep;
-  static const SWIG_JavaExceptions_t java_exceptions[] = {
-    { SWIG_JavaOutOfMemoryError, "java/lang/OutOfMemoryError" },
-    { SWIG_JavaIOException, "java/io/IOException" },
-    { SWIG_JavaRuntimeException, "java/lang/RuntimeException" },
-    { SWIG_JavaIndexOutOfBoundsException, "java/lang/IndexOutOfBoundsException" },
-    { SWIG_JavaArithmeticException, "java/lang/ArithmeticException" },
-    { SWIG_JavaIllegalArgumentException, "java/lang/IllegalArgumentException" },
-    { SWIG_JavaNullPointerException, "java/lang/NullPointerException" },
-    { SWIG_JavaDirectorPureVirtual, "java/lang/RuntimeException" },
-    { SWIG_JavaUnknownError,  "java/lang/UnknownError" },
-    { (SWIG_JavaExceptionCodes)0,  "java/lang/UnknownError" }
-  };
-  const SWIG_JavaExceptions_t *except_ptr = java_exceptions;
-
-  while (except_ptr->code != code && except_ptr->code)
-    except_ptr++;
-
-  jenv->ExceptionClear();
-  excep = jenv->FindClass(except_ptr->java_exception);
-  if (excep)
-    jenv->ThrowNew(excep, msg);
-}
-
-
-/* Contract support */
-
-#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_JavaThrowException(jenv, SWIG_JavaIllegalArgumentException, msg); return nullreturn; } else
-
-
-#include <string>
-
-
-#include <typeinfo>
-#include <stdexcept>
-
-
-#include <cybozu/random_generator.hpp>
-#include <cybozu/crypto.hpp>
-#include <mcl/bn256.hpp>
-struct Param {
-    cybozu::RandomGenerator rg;
-    static inline Param& getParam()
-	{
-		static Param p;
-	    return p;
-	}
-};
-
-static void HashAndMapToG1(mcl::bn256::G1& P, const std::string& m)
-{
-	std::string digest = cybozu::crypto::Hash::digest(cybozu::crypto::Hash::N_SHA256, m);
-	mcl::bn256::Fp t;
-	t.setArrayMask(digest.c_str(), digest.size());
-	mcl::bn256::BN::param.mapTo.calcG1(P, t);
-}
-
-#include "bn256_impl.hpp"
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_SystemInit(JNIEnv *jenv, jclass jcls) {
-  (void)jenv;
-  (void)jcls;
-  try {
-    SystemInit();
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return ;
-  }
-  
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_neg_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
-  Fr *arg1 = 0 ;
-  Fr *arg2 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  arg1 = *(Fr **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
-    return ;
-  } 
-  arg2 = *(Fr **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  neg(*arg1,(Fr const &)*arg2);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_add_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  Fr *arg1 = 0 ;
-  Fr *arg2 = 0 ;
-  Fr *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(Fr **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
-    return ;
-  } 
-  arg2 = *(Fr **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  arg3 = *(Fr **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  add(*arg1,(Fr const &)*arg2,(Fr const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_sub_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  Fr *arg1 = 0 ;
-  Fr *arg2 = 0 ;
-  Fr *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(Fr **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
-    return ;
-  } 
-  arg2 = *(Fr **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  arg3 = *(Fr **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  sub(*arg1,(Fr const &)*arg2,(Fr const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_mul_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  Fr *arg1 = 0 ;
-  Fr *arg2 = 0 ;
-  Fr *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(Fr **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
-    return ;
-  } 
-  arg2 = *(Fr **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  arg3 = *(Fr **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  mul(*arg1,(Fr const &)*arg2,(Fr const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_mul_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  G1 *arg1 = 0 ;
-  G1 *arg2 = 0 ;
-  Fr *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(G1 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
-    return ;
-  } 
-  arg2 = *(G1 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return ;
-  } 
-  arg3 = *(Fr **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  mul(*arg1,(G1 const &)*arg2,(Fr const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_mul_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  G2 *arg1 = 0 ;
-  G2 *arg2 = 0 ;
-  Fr *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(G2 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
-    return ;
-  } 
-  arg2 = *(G2 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return ;
-  } 
-  arg3 = *(Fr **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  mul(*arg1,(G2 const &)*arg2,(Fr const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_div(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  Fr *arg1 = 0 ;
-  Fr *arg2 = 0 ;
-  Fr *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(Fr **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
-    return ;
-  } 
-  arg2 = *(Fr **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  arg3 = *(Fr **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  div(*arg1,(Fr const &)*arg2,(Fr const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_pow(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  GT *arg1 = 0 ;
-  GT *arg2 = 0 ;
-  Fr *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(GT **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT & reference is null");
-    return ;
-  } 
-  arg2 = *(GT **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
-    return ;
-  } 
-  arg3 = *(Fr **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return ;
-  } 
-  pow(*arg1,(GT const &)*arg2,(Fr const &)*arg3);
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1Fr_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
-  jlong jresult = 0 ;
-  Fr *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  result = (Fr *)new Fr();
-  *(Fr **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1Fr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  jlong jresult = 0 ;
-  Fr *arg1 = 0 ;
-  Fr *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(Fr **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return 0;
-  } 
-  result = (Fr *)new Fr((Fr const &)*arg1);
-  *(Fr **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1Fr_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
-  jlong jresult = 0 ;
-  int arg1 ;
-  Fr *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  arg1 = (int)jarg1; 
-  result = (Fr *)new Fr(arg1);
-  *(Fr **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1Fr_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1) {
-  jlong jresult = 0 ;
-  std::string *arg1 = 0 ;
-  Fr *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  if(!jarg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return 0;
-  }
-  const char *arg1_pstr = (const char *)jenv->GetStringUTFChars(jarg1, 0); 
-  if (!arg1_pstr) return 0;
-  std::string arg1_str(arg1_pstr);
-  arg1 = &arg1_str;
-  jenv->ReleaseStringUTFChars(jarg1, arg1_pstr); 
-  try {
-    result = (Fr *)new Fr((std::string const &)*arg1);
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return 0;
-  }
-  
-  *(Fr **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_Bn256JNI_Fr_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
-  jboolean jresult = 0 ;
-  Fr *arg1 = (Fr *) 0 ;
-  Fr *arg2 = 0 ;
-  bool result;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  arg1 = *(Fr **)&jarg1; 
-  arg2 = *(Fr **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
-    return 0;
-  } 
-  result = (bool)((Fr const *)arg1)->equals((Fr const &)*arg2);
-  jresult = (jboolean)result; 
-  return jresult;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_Fr_1setStr(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
-  Fr *arg1 = (Fr *) 0 ;
-  std::string *arg2 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(Fr **)&jarg1; 
-  if(!jarg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
-  if (!arg2_pstr) return ;
-  std::string arg2_str(arg2_pstr);
-  arg2 = &arg2_str;
-  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
-  try {
-    (arg1)->setStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return ;
-  }
-  
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_Fr_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
-  Fr *arg1 = (Fr *) 0 ;
-  int arg2 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(Fr **)&jarg1; 
-  arg2 = (int)jarg2; 
-  (arg1)->setInt(arg2);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_Fr_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  Fr *arg1 = (Fr *) 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(Fr **)&jarg1; 
-  (arg1)->clear();
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_Fr_1setRand(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  Fr *arg1 = (Fr *) 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(Fr **)&jarg1; 
-  (arg1)->setRand();
-}
-
-
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_Bn256JNI_Fr_1toString(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  jstring jresult = 0 ;
-  Fr *arg1 = (Fr *) 0 ;
-  std::string result;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(Fr **)&jarg1; 
-  try {
-    result = ((Fr const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return 0;
-  }
-  
-  jresult = jenv->NewStringUTF((&result)->c_str()); 
-  return jresult;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_delete_1Fr(JNIEnv *jenv, jclass jcls, jlong jarg1) {
-  Fr *arg1 = (Fr *) 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  arg1 = *(Fr **)&jarg1; 
-  delete arg1;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_neg_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
-  G1 *arg1 = 0 ;
-  G1 *arg2 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  arg1 = *(G1 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
-    return ;
-  } 
-  arg2 = *(G1 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return ;
-  } 
-  neg(*arg1,(G1 const &)*arg2);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_dbl_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
-  G1 *arg1 = 0 ;
-  G1 *arg2 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  arg1 = *(G1 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
-    return ;
-  } 
-  arg2 = *(G1 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return ;
-  } 
-  dbl(*arg1,(G1 const &)*arg2);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_add_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  G1 *arg1 = 0 ;
-  G1 *arg2 = 0 ;
-  G1 *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(G1 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
-    return ;
-  } 
-  arg2 = *(G1 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return ;
-  } 
-  arg3 = *(G1 **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return ;
-  } 
-  add(*arg1,(G1 const &)*arg2,(G1 const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_sub_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  G1 *arg1 = 0 ;
-  G1 *arg2 = 0 ;
-  G1 *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(G1 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
-    return ;
-  } 
-  arg2 = *(G1 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return ;
-  } 
-  arg3 = *(G1 **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return ;
-  } 
-  sub(*arg1,(G1 const &)*arg2,(G1 const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_pairing(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  GT *arg1 = 0 ;
-  G1 *arg2 = 0 ;
-  G2 *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(GT **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT & reference is null");
-    return ;
-  } 
-  arg2 = *(G1 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return ;
-  } 
-  arg3 = *(G2 **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return ;
-  } 
-  pairing(*arg1,(G1 const &)*arg2,(G2 const &)*arg3);
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1G1_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
-  jlong jresult = 0 ;
-  G1 *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  result = (G1 *)new G1();
-  *(G1 **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1G1_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  jlong jresult = 0 ;
-  G1 *arg1 = 0 ;
-  G1 *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G1 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return 0;
-  } 
-  result = (G1 *)new G1((G1 const &)*arg1);
-  *(G1 **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1G1_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jstring jarg1, jstring jarg2) {
-  jlong jresult = 0 ;
-  std::string *arg1 = 0 ;
-  std::string *arg2 = 0 ;
-  G1 *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  if(!jarg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return 0;
-  }
-  const char *arg1_pstr = (const char *)jenv->GetStringUTFChars(jarg1, 0); 
-  if (!arg1_pstr) return 0;
-  std::string arg1_str(arg1_pstr);
-  arg1 = &arg1_str;
-  jenv->ReleaseStringUTFChars(jarg1, arg1_pstr); 
-  if(!jarg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return 0;
-  }
-  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
-  if (!arg2_pstr) return 0;
-  std::string arg2_str(arg2_pstr);
-  arg2 = &arg2_str;
-  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
-  try {
-    result = (G1 *)new G1((std::string const &)*arg1,(std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return 0;
-  }
-  
-  *(G1 **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_Bn256JNI_G1_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
-  jboolean jresult = 0 ;
-  G1 *arg1 = (G1 *) 0 ;
-  G1 *arg2 = 0 ;
-  bool result;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  arg1 = *(G1 **)&jarg1; 
-  arg2 = *(G1 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return 0;
-  } 
-  result = (bool)((G1 const *)arg1)->equals((G1 const &)*arg2);
-  jresult = (jboolean)result; 
-  return jresult;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jstring jarg3) {
-  G1 *arg1 = (G1 *) 0 ;
-  std::string *arg2 = 0 ;
-  std::string *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G1 **)&jarg1; 
-  if(!jarg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
-  if (!arg2_pstr) return ;
-  std::string arg2_str(arg2_pstr);
-  arg2 = &arg2_str;
-  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
-  if(!jarg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg3_pstr = (const char *)jenv->GetStringUTFChars(jarg3, 0); 
-  if (!arg3_pstr) return ;
-  std::string arg3_str(arg3_pstr);
-  arg3 = &arg3_str;
-  jenv->ReleaseStringUTFChars(jarg3, arg3_pstr); 
-  (arg1)->set((std::string const &)*arg2,(std::string const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_G1_1hashAndMapToG1(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
-  G1 *arg1 = (G1 *) 0 ;
-  std::string *arg2 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G1 **)&jarg1; 
-  if(!jarg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
-  if (!arg2_pstr) return ;
-  std::string arg2_str(arg2_pstr);
-  arg2 = &arg2_str;
-  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
-  try {
-    (arg1)->hashAndMapToG1((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return ;
-  }
-  
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_G1_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  G1 *arg1 = (G1 *) 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G1 **)&jarg1; 
-  (arg1)->clear();
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_G1_1setStr(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
-  G1 *arg1 = (G1 *) 0 ;
-  std::string *arg2 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G1 **)&jarg1; 
-  if(!jarg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
-  if (!arg2_pstr) return ;
-  std::string arg2_str(arg2_pstr);
-  arg2 = &arg2_str;
-  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
-  try {
-    (arg1)->setStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return ;
-  }
-  
-}
-
-
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_Bn256JNI_G1_1toString(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  jstring jresult = 0 ;
-  G1 *arg1 = (G1 *) 0 ;
-  std::string result;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G1 **)&jarg1; 
-  try {
-    result = ((G1 const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return 0;
-  }
-  
-  jresult = jenv->NewStringUTF((&result)->c_str()); 
-  return jresult;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_delete_1G1(JNIEnv *jenv, jclass jcls, jlong jarg1) {
-  G1 *arg1 = (G1 *) 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  arg1 = *(G1 **)&jarg1; 
-  delete arg1;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_neg_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
-  G2 *arg1 = 0 ;
-  G2 *arg2 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  arg1 = *(G2 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
-    return ;
-  } 
-  arg2 = *(G2 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return ;
-  } 
-  neg(*arg1,(G2 const &)*arg2);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_dbl_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
-  G2 *arg1 = 0 ;
-  G2 *arg2 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  arg1 = *(G2 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
-    return ;
-  } 
-  arg2 = *(G2 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return ;
-  } 
-  dbl(*arg1,(G2 const &)*arg2);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_add_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  G2 *arg1 = 0 ;
-  G2 *arg2 = 0 ;
-  G2 *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(G2 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
-    return ;
-  } 
-  arg2 = *(G2 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return ;
-  } 
-  arg3 = *(G2 **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return ;
-  } 
-  add(*arg1,(G2 const &)*arg2,(G2 const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_sub_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  G2 *arg1 = 0 ;
-  G2 *arg2 = 0 ;
-  G2 *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(G2 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
-    return ;
-  } 
-  arg2 = *(G2 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return ;
-  } 
-  arg3 = *(G2 **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return ;
-  } 
-  sub(*arg1,(G2 const &)*arg2,(G2 const &)*arg3);
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1G2_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
-  jlong jresult = 0 ;
-  G2 *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  result = (G2 *)new G2();
-  *(G2 **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1G2_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  jlong jresult = 0 ;
-  G2 *arg1 = 0 ;
-  G2 *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G2 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return 0;
-  } 
-  result = (G2 *)new G2((G2 const &)*arg1);
-  *(G2 **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1G2_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jstring jarg1, jstring jarg2, jstring jarg3, jstring jarg4) {
-  jlong jresult = 0 ;
-  std::string *arg1 = 0 ;
-  std::string *arg2 = 0 ;
-  std::string *arg3 = 0 ;
-  std::string *arg4 = 0 ;
-  G2 *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  if(!jarg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return 0;
-  }
-  const char *arg1_pstr = (const char *)jenv->GetStringUTFChars(jarg1, 0); 
-  if (!arg1_pstr) return 0;
-  std::string arg1_str(arg1_pstr);
-  arg1 = &arg1_str;
-  jenv->ReleaseStringUTFChars(jarg1, arg1_pstr); 
-  if(!jarg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return 0;
-  }
-  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
-  if (!arg2_pstr) return 0;
-  std::string arg2_str(arg2_pstr);
-  arg2 = &arg2_str;
-  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
-  if(!jarg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return 0;
-  }
-  const char *arg3_pstr = (const char *)jenv->GetStringUTFChars(jarg3, 0); 
-  if (!arg3_pstr) return 0;
-  std::string arg3_str(arg3_pstr);
-  arg3 = &arg3_str;
-  jenv->ReleaseStringUTFChars(jarg3, arg3_pstr); 
-  if(!jarg4) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return 0;
-  }
-  const char *arg4_pstr = (const char *)jenv->GetStringUTFChars(jarg4, 0); 
-  if (!arg4_pstr) return 0;
-  std::string arg4_str(arg4_pstr);
-  arg4 = &arg4_str;
-  jenv->ReleaseStringUTFChars(jarg4, arg4_pstr); 
-  try {
-    result = (G2 *)new G2((std::string const &)*arg1,(std::string const &)*arg2,(std::string const &)*arg3,(std::string const &)*arg4);
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return 0;
-  }
-  
-  *(G2 **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_Bn256JNI_G2_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
-  jboolean jresult = 0 ;
-  G2 *arg1 = (G2 *) 0 ;
-  G2 *arg2 = 0 ;
-  bool result;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  arg1 = *(G2 **)&jarg1; 
-  arg2 = *(G2 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return 0;
-  } 
-  result = (bool)((G2 const *)arg1)->equals((G2 const &)*arg2);
-  jresult = (jboolean)result; 
-  return jresult;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_G2_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jstring jarg3, jstring jarg4, jstring jarg5) {
-  G2 *arg1 = (G2 *) 0 ;
-  std::string *arg2 = 0 ;
-  std::string *arg3 = 0 ;
-  std::string *arg4 = 0 ;
-  std::string *arg5 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G2 **)&jarg1; 
-  if(!jarg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
-  if (!arg2_pstr) return ;
-  std::string arg2_str(arg2_pstr);
-  arg2 = &arg2_str;
-  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
-  if(!jarg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg3_pstr = (const char *)jenv->GetStringUTFChars(jarg3, 0); 
-  if (!arg3_pstr) return ;
-  std::string arg3_str(arg3_pstr);
-  arg3 = &arg3_str;
-  jenv->ReleaseStringUTFChars(jarg3, arg3_pstr); 
-  if(!jarg4) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg4_pstr = (const char *)jenv->GetStringUTFChars(jarg4, 0); 
-  if (!arg4_pstr) return ;
-  std::string arg4_str(arg4_pstr);
-  arg4 = &arg4_str;
-  jenv->ReleaseStringUTFChars(jarg4, arg4_pstr); 
-  if(!jarg5) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg5_pstr = (const char *)jenv->GetStringUTFChars(jarg5, 0); 
-  if (!arg5_pstr) return ;
-  std::string arg5_str(arg5_pstr);
-  arg5 = &arg5_str;
-  jenv->ReleaseStringUTFChars(jarg5, arg5_pstr); 
-  (arg1)->set((std::string const &)*arg2,(std::string const &)*arg3,(std::string const &)*arg4,(std::string const &)*arg5);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_G2_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  G2 *arg1 = (G2 *) 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G2 **)&jarg1; 
-  (arg1)->clear();
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_G2_1setStr(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
-  G2 *arg1 = (G2 *) 0 ;
-  std::string *arg2 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G2 **)&jarg1; 
-  if(!jarg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
-  if (!arg2_pstr) return ;
-  std::string arg2_str(arg2_pstr);
-  arg2 = &arg2_str;
-  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
-  try {
-    (arg1)->setStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return ;
-  }
-  
-}
-
-
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_Bn256JNI_G2_1toString(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  jstring jresult = 0 ;
-  G2 *arg1 = (G2 *) 0 ;
-  std::string result;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(G2 **)&jarg1; 
-  try {
-    result = ((G2 const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return 0;
-  }
-  
-  jresult = jenv->NewStringUTF((&result)->c_str()); 
-  return jresult;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_delete_1G2(JNIEnv *jenv, jclass jcls, jlong jarg1) {
-  G2 *arg1 = (G2 *) 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  arg1 = *(G2 **)&jarg1; 
-  delete arg1;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_mul_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  GT *arg1 = 0 ;
-  GT *arg2 = 0 ;
-  GT *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(GT **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT & reference is null");
-    return ;
-  } 
-  arg2 = *(GT **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
-    return ;
-  } 
-  arg3 = *(GT **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
-    return ;
-  } 
-  mul(*arg1,(GT const &)*arg2,(GT const &)*arg3);
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1GT_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
-  jlong jresult = 0 ;
-  GT *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  result = (GT *)new GT();
-  *(GT **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_Bn256JNI_new_1GT_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  jlong jresult = 0 ;
-  GT *arg1 = 0 ;
-  GT *result = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(GT **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
-    return 0;
-  } 
-  result = (GT *)new GT((GT const &)*arg1);
-  *(GT **)&jresult = result; 
-  return jresult;
-}
-
-
-SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_Bn256JNI_GT_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
-  jboolean jresult = 0 ;
-  GT *arg1 = (GT *) 0 ;
-  GT *arg2 = 0 ;
-  bool result;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  arg1 = *(GT **)&jarg1; 
-  arg2 = *(GT **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
-    return 0;
-  } 
-  result = (bool)((GT const *)arg1)->equals((GT const &)*arg2);
-  jresult = (jboolean)result; 
-  return jresult;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_GT_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  GT *arg1 = (GT *) 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(GT **)&jarg1; 
-  (arg1)->clear();
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_GT_1setStr(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
-  GT *arg1 = (GT *) 0 ;
-  std::string *arg2 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(GT **)&jarg1; 
-  if(!jarg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
-    return ;
-  }
-  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
-  if (!arg2_pstr) return ;
-  std::string arg2_str(arg2_pstr);
-  arg2 = &arg2_str;
-  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
-  try {
-    (arg1)->setStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return ;
-  }
-  
-}
-
-
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_Bn256JNI_GT_1toString(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-  jstring jresult = 0 ;
-  GT *arg1 = (GT *) 0 ;
-  std::string result;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  arg1 = *(GT **)&jarg1; 
-  try {
-    result = ((GT const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
-    return 0;
-  }
-  
-  jresult = jenv->NewStringUTF((&result)->c_str()); 
-  return jresult;
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_Bn256JNI_delete_1GT(JNIEnv *jenv, jclass jcls, jlong jarg1) {
-  GT *arg1 = (GT *) 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  arg1 = *(GT **)&jarg1; 
-  delete arg1;
-}
-
-
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/ffi/java/com/herumi/mcl/Bn256.java b/ffi/java/com/herumi/mcl/Bn256.java
deleted file mode 100644
index 9da1ffaf..00000000
--- a/ffi/java/com/herumi/mcl/Bn256.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
- *
- * Do not make changes to this file unless you know what you are doing--modify
- * the SWIG interface file instead.
- * ----------------------------------------------------------------------------- */
-
-package com.herumi.mcl;
-
-public class Bn256 {
-  public static void SystemInit() {
-    Bn256JNI.SystemInit();
-  }
-
-  public static void neg(Fr y, Fr x) {
-    Bn256JNI.neg__SWIG_0(Fr.getCPtr(y), y, Fr.getCPtr(x), x);
-  }
-
-  public static void add(Fr z, Fr x, Fr y) {
-    Bn256JNI.add__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
-  }
-
-  public static void sub(Fr z, Fr x, Fr y) {
-    Bn256JNI.sub__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
-  }
-
-  public static void mul(Fr z, Fr x, Fr y) {
-    Bn256JNI.mul__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
-  }
-
-  public static void mul(G1 z, G1 x, Fr y) {
-    Bn256JNI.mul__SWIG_1(G1.getCPtr(z), z, G1.getCPtr(x), x, Fr.getCPtr(y), y);
-  }
-
-  public static void mul(G2 z, G2 x, Fr y) {
-    Bn256JNI.mul__SWIG_2(G2.getCPtr(z), z, G2.getCPtr(x), x, Fr.getCPtr(y), y);
-  }
-
-  public static void div(Fr z, Fr x, Fr y) {
-    Bn256JNI.div(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
-  }
-
-  public static void pow(GT z, GT x, Fr y) {
-    Bn256JNI.pow(GT.getCPtr(z), z, GT.getCPtr(x), x, Fr.getCPtr(y), y);
-  }
-
-  public static void neg(G1 y, G1 x) {
-    Bn256JNI.neg__SWIG_1(G1.getCPtr(y), y, G1.getCPtr(x), x);
-  }
-
-  public static void dbl(G1 y, G1 x) {
-    Bn256JNI.dbl__SWIG_0(G1.getCPtr(y), y, G1.getCPtr(x), x);
-  }
-
-  public static void add(G1 z, G1 x, G1 y) {
-    Bn256JNI.add__SWIG_1(G1.getCPtr(z), z, G1.getCPtr(x), x, G1.getCPtr(y), y);
-  }
-
-  public static void sub(G1 z, G1 x, G1 y) {
-    Bn256JNI.sub__SWIG_1(G1.getCPtr(z), z, G1.getCPtr(x), x, G1.getCPtr(y), y);
-  }
-
-  public static void pairing(GT e, G1 P, G2 Q) {
-    Bn256JNI.pairing(GT.getCPtr(e), e, G1.getCPtr(P), P, G2.getCPtr(Q), Q);
-  }
-
-  public static void neg(G2 y, G2 x) {
-    Bn256JNI.neg__SWIG_2(G2.getCPtr(y), y, G2.getCPtr(x), x);
-  }
-
-  public static void dbl(G2 y, G2 x) {
-    Bn256JNI.dbl__SWIG_1(G2.getCPtr(y), y, G2.getCPtr(x), x);
-  }
-
-  public static void add(G2 z, G2 x, G2 y) {
-    Bn256JNI.add__SWIG_2(G2.getCPtr(z), z, G2.getCPtr(x), x, G2.getCPtr(y), y);
-  }
-
-  public static void sub(G2 z, G2 x, G2 y) {
-    Bn256JNI.sub__SWIG_2(G2.getCPtr(z), z, G2.getCPtr(x), x, G2.getCPtr(y), y);
-  }
-
-  public static void mul(GT z, GT x, GT y) {
-    Bn256JNI.mul__SWIG_3(GT.getCPtr(z), z, GT.getCPtr(x), x, GT.getCPtr(y), y);
-  }
-
-}
diff --git a/ffi/java/com/herumi/mcl/Bn256JNI.java b/ffi/java/com/herumi/mcl/Bn256JNI.java
deleted file mode 100644
index fa9c43b7..00000000
--- a/ffi/java/com/herumi/mcl/Bn256JNI.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
- *
- * Do not make changes to this file unless you know what you are doing--modify
- * the SWIG interface file instead.
- * ----------------------------------------------------------------------------- */
-
-package com.herumi.mcl;
-
-public class Bn256JNI {
-  public final static native void SystemInit();
-  public final static native void neg__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_);
-  public final static native void add__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
-  public final static native void sub__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
-  public final static native void mul__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
-  public final static native void mul__SWIG_1(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, Fr jarg3_);
-  public final static native void mul__SWIG_2(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, Fr jarg3_);
-  public final static native void div(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
-  public final static native void pow(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, Fr jarg3_);
-  public final static native long new_Fr__SWIG_0();
-  public final static native long new_Fr__SWIG_1(long jarg1, Fr jarg1_);
-  public final static native long new_Fr__SWIG_2(int jarg1);
-  public final static native long new_Fr__SWIG_3(String jarg1);
-  public final static native boolean Fr_equals(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_);
-  public final static native void Fr_setStr(long jarg1, Fr jarg1_, String jarg2);
-  public final static native void Fr_setInt(long jarg1, Fr jarg1_, int jarg2);
-  public final static native void Fr_clear(long jarg1, Fr jarg1_);
-  public final static native void Fr_setRand(long jarg1, Fr jarg1_);
-  public final static native String Fr_toString(long jarg1, Fr jarg1_);
-  public final static native void delete_Fr(long jarg1);
-  public final static native void neg__SWIG_1(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
-  public final static native void dbl__SWIG_0(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
-  public final static native void add__SWIG_1(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, G1 jarg3_);
-  public final static native void sub__SWIG_1(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, G1 jarg3_);
-  public final static native void pairing(long jarg1, GT jarg1_, long jarg2, G1 jarg2_, long jarg3, G2 jarg3_);
-  public final static native long new_G1__SWIG_0();
-  public final static native long new_G1__SWIG_1(long jarg1, G1 jarg1_);
-  public final static native long new_G1__SWIG_2(String jarg1, String jarg2);
-  public final static native boolean G1_equals(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
-  public final static native void G1_set(long jarg1, G1 jarg1_, String jarg2, String jarg3);
-  public final static native void G1_hashAndMapToG1(long jarg1, G1 jarg1_, String jarg2);
-  public final static native void G1_clear(long jarg1, G1 jarg1_);
-  public final static native void G1_setStr(long jarg1, G1 jarg1_, String jarg2);
-  public final static native String G1_toString(long jarg1, G1 jarg1_);
-  public final static native void delete_G1(long jarg1);
-  public final static native void neg__SWIG_2(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
-  public final static native void dbl__SWIG_1(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
-  public final static native void add__SWIG_2(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, G2 jarg3_);
-  public final static native void sub__SWIG_2(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, G2 jarg3_);
-  public final static native long new_G2__SWIG_0();
-  public final static native long new_G2__SWIG_1(long jarg1, G2 jarg1_);
-  public final static native long new_G2__SWIG_2(String jarg1, String jarg2, String jarg3, String jarg4);
-  public final static native boolean G2_equals(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
-  public final static native void G2_set(long jarg1, G2 jarg1_, String jarg2, String jarg3, String jarg4, String jarg5);
-  public final static native void G2_clear(long jarg1, G2 jarg1_);
-  public final static native void G2_setStr(long jarg1, G2 jarg1_, String jarg2);
-  public final static native String G2_toString(long jarg1, G2 jarg1_);
-  public final static native void delete_G2(long jarg1);
-  public final static native void mul__SWIG_3(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, GT jarg3_);
-  public final static native long new_GT__SWIG_0();
-  public final static native long new_GT__SWIG_1(long jarg1, GT jarg1_);
-  public final static native boolean GT_equals(long jarg1, GT jarg1_, long jarg2, GT jarg2_);
-  public final static native void GT_clear(long jarg1, GT jarg1_);
-  public final static native void GT_setStr(long jarg1, GT jarg1_, String jarg2);
-  public final static native String GT_toString(long jarg1, GT jarg1_);
-  public final static native void delete_GT(long jarg1);
-}
diff --git a/ffi/java/com/herumi/mcl/Fp.java b/ffi/java/com/herumi/mcl/Fp.java
new file mode 100644
index 00000000..93b97328
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/Fp.java
@@ -0,0 +1,94 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class Fp {
+  private transient long swigCPtr;
+  protected transient boolean swigCMemOwn;
+
+  protected Fp(long cPtr, boolean cMemoryOwn) {
+    swigCMemOwn = cMemoryOwn;
+    swigCPtr = cPtr;
+  }
+
+  protected static long getCPtr(Fp obj) {
+    return (obj == null) ? 0 : obj.swigCPtr;
+  }
+
+  protected void finalize() {
+    delete();
+  }
+
+  public synchronized void delete() {
+    if (swigCPtr != 0) {
+      if (swigCMemOwn) {
+        swigCMemOwn = false;
+        MclJNI.delete_Fp(swigCPtr);
+      }
+      swigCPtr = 0;
+    }
+  }
+
+  public Fp() {
+    this(MclJNI.new_Fp__SWIG_0(), true);
+  }
+
+  public Fp(Fp rhs) {
+    this(MclJNI.new_Fp__SWIG_1(Fp.getCPtr(rhs), rhs), true);
+  }
+
+  public Fp(int x) {
+    this(MclJNI.new_Fp__SWIG_2(x), true);
+  }
+
+  public Fp(String str, int base) {
+    this(MclJNI.new_Fp__SWIG_3(str, base), true);
+  }
+
+  public Fp(String str) {
+    this(MclJNI.new_Fp__SWIG_4(str), true);
+  }
+
+  public boolean equals(Fp rhs) {
+    return MclJNI.Fp_equals(swigCPtr, this, Fp.getCPtr(rhs), rhs);
+  }
+
+  public void setStr(String str, int base) {
+    MclJNI.Fp_setStr__SWIG_0(swigCPtr, this, str, base);
+  }
+
+  public void setStr(String str) {
+    MclJNI.Fp_setStr__SWIG_1(swigCPtr, this, str);
+  }
+
+  public void setInt(int x) {
+    MclJNI.Fp_setInt(swigCPtr, this, x);
+  }
+
+  public void clear() {
+    MclJNI.Fp_clear(swigCPtr, this);
+  }
+
+  public void setByCSPRNG() {
+    MclJNI.Fp_setByCSPRNG(swigCPtr, this);
+  }
+
+  public String toString(int base) {
+    return MclJNI.Fp_toString__SWIG_0(swigCPtr, this, base);
+  }
+
+  public String toString() {
+    return MclJNI.Fp_toString__SWIG_1(swigCPtr, this);
+  }
+
+  public void deserialize(byte[] cbuf) {
+    MclJNI.Fp_deserialize(swigCPtr, this, cbuf);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/Fr.java b/ffi/java/com/herumi/mcl/Fr.java
index 84207c03..a06a97a3 100644
--- a/ffi/java/com/herumi/mcl/Fr.java
+++ b/ffi/java/com/herumi/mcl/Fr.java
@@ -29,50 +29,66 @@ public synchronized void delete() {
     if (swigCPtr != 0) {
       if (swigCMemOwn) {
         swigCMemOwn = false;
-        Bn256JNI.delete_Fr(swigCPtr);
+        MclJNI.delete_Fr(swigCPtr);
       }
       swigCPtr = 0;
     }
   }
 
   public Fr() {
-    this(Bn256JNI.new_Fr__SWIG_0(), true);
+    this(MclJNI.new_Fr__SWIG_0(), true);
   }
 
   public Fr(Fr rhs) {
-    this(Bn256JNI.new_Fr__SWIG_1(Fr.getCPtr(rhs), rhs), true);
+    this(MclJNI.new_Fr__SWIG_1(Fr.getCPtr(rhs), rhs), true);
   }
 
   public Fr(int x) {
-    this(Bn256JNI.new_Fr__SWIG_2(x), true);
+    this(MclJNI.new_Fr__SWIG_2(x), true);
+  }
+
+  public Fr(String str, int base) {
+    this(MclJNI.new_Fr__SWIG_3(str, base), true);
   }
 
   public Fr(String str) {
-    this(Bn256JNI.new_Fr__SWIG_3(str), true);
+    this(MclJNI.new_Fr__SWIG_4(str), true);
   }
 
   public boolean equals(Fr rhs) {
-    return Bn256JNI.Fr_equals(swigCPtr, this, Fr.getCPtr(rhs), rhs);
+    return MclJNI.Fr_equals(swigCPtr, this, Fr.getCPtr(rhs), rhs);
+  }
+
+  public void setStr(String str, int base) {
+    MclJNI.Fr_setStr__SWIG_0(swigCPtr, this, str, base);
   }
 
   public void setStr(String str) {
-    Bn256JNI.Fr_setStr(swigCPtr, this, str);
+    MclJNI.Fr_setStr__SWIG_1(swigCPtr, this, str);
   }
 
   public void setInt(int x) {
-    Bn256JNI.Fr_setInt(swigCPtr, this, x);
+    MclJNI.Fr_setInt(swigCPtr, this, x);
   }
 
   public void clear() {
-    Bn256JNI.Fr_clear(swigCPtr, this);
+    MclJNI.Fr_clear(swigCPtr, this);
   }
 
-  public void setRand() {
-    Bn256JNI.Fr_setRand(swigCPtr, this);
+  public void setByCSPRNG() {
+    MclJNI.Fr_setByCSPRNG(swigCPtr, this);
+  }
+
+  public String toString(int base) {
+    return MclJNI.Fr_toString__SWIG_0(swigCPtr, this, base);
   }
 
   public String toString() {
-    return Bn256JNI.Fr_toString(swigCPtr, this);
+    return MclJNI.Fr_toString__SWIG_1(swigCPtr, this);
+  }
+
+  public void deserialize(byte[] cbuf) {
+    MclJNI.Fr_deserialize(swigCPtr, this, cbuf);
   }
 
 }
diff --git a/ffi/java/com/herumi/mcl/G1.java b/ffi/java/com/herumi/mcl/G1.java
index 377df2ad..146d904e 100644
--- a/ffi/java/com/herumi/mcl/G1.java
+++ b/ffi/java/com/herumi/mcl/G1.java
@@ -29,46 +29,54 @@ public synchronized void delete() {
     if (swigCPtr != 0) {
       if (swigCMemOwn) {
         swigCMemOwn = false;
-        Bn256JNI.delete_G1(swigCPtr);
+        MclJNI.delete_G1(swigCPtr);
       }
       swigCPtr = 0;
     }
   }
 
   public G1() {
-    this(Bn256JNI.new_G1__SWIG_0(), true);
+    this(MclJNI.new_G1__SWIG_0(), true);
   }
 
   public G1(G1 rhs) {
-    this(Bn256JNI.new_G1__SWIG_1(G1.getCPtr(rhs), rhs), true);
+    this(MclJNI.new_G1__SWIG_1(G1.getCPtr(rhs), rhs), true);
   }
 
-  public G1(String x, String y) {
-    this(Bn256JNI.new_G1__SWIG_2(x, y), true);
+  public G1(Fp x, Fp y) {
+    this(MclJNI.new_G1__SWIG_2(Fp.getCPtr(x), x, Fp.getCPtr(y), y), true);
   }
 
   public boolean equals(G1 rhs) {
-    return Bn256JNI.G1_equals(swigCPtr, this, G1.getCPtr(rhs), rhs);
+    return MclJNI.G1_equals(swigCPtr, this, G1.getCPtr(rhs), rhs);
   }
 
-  public void set(String x, String y) {
-    Bn256JNI.G1_set(swigCPtr, this, x, y);
+  public void set(Fp x, Fp y) {
+    MclJNI.G1_set(swigCPtr, this, Fp.getCPtr(x), x, Fp.getCPtr(y), y);
   }
 
-  public void hashAndMapToG1(String m) {
-    Bn256JNI.G1_hashAndMapToG1(swigCPtr, this, m);
+  public void clear() {
+    MclJNI.G1_clear(swigCPtr, this);
   }
 
-  public void clear() {
-    Bn256JNI.G1_clear(swigCPtr, this);
+  public void setStr(String str, int base) {
+    MclJNI.G1_setStr__SWIG_0(swigCPtr, this, str, base);
   }
 
   public void setStr(String str) {
-    Bn256JNI.G1_setStr(swigCPtr, this, str);
+    MclJNI.G1_setStr__SWIG_1(swigCPtr, this, str);
+  }
+
+  public String toString(int base) {
+    return MclJNI.G1_toString__SWIG_0(swigCPtr, this, base);
   }
 
   public String toString() {
-    return Bn256JNI.G1_toString(swigCPtr, this);
+    return MclJNI.G1_toString__SWIG_1(swigCPtr, this);
+  }
+
+  public void deserialize(byte[] cbuf) {
+    MclJNI.G1_deserialize(swigCPtr, this, cbuf);
   }
 
 }
diff --git a/ffi/java/com/herumi/mcl/G2.java b/ffi/java/com/herumi/mcl/G2.java
index 3731295e..5aded6df 100644
--- a/ffi/java/com/herumi/mcl/G2.java
+++ b/ffi/java/com/herumi/mcl/G2.java
@@ -29,42 +29,54 @@ public synchronized void delete() {
     if (swigCPtr != 0) {
       if (swigCMemOwn) {
         swigCMemOwn = false;
-        Bn256JNI.delete_G2(swigCPtr);
+        MclJNI.delete_G2(swigCPtr);
       }
       swigCPtr = 0;
     }
   }
 
   public G2() {
-    this(Bn256JNI.new_G2__SWIG_0(), true);
+    this(MclJNI.new_G2__SWIG_0(), true);
   }
 
   public G2(G2 rhs) {
-    this(Bn256JNI.new_G2__SWIG_1(G2.getCPtr(rhs), rhs), true);
+    this(MclJNI.new_G2__SWIG_1(G2.getCPtr(rhs), rhs), true);
   }
 
-  public G2(String xa, String xb, String ya, String yb) {
-    this(Bn256JNI.new_G2__SWIG_2(xa, xb, ya, yb), true);
+  public G2(Fp ax, Fp ay, Fp bx, Fp by) {
+    this(MclJNI.new_G2__SWIG_2(Fp.getCPtr(ax), ax, Fp.getCPtr(ay), ay, Fp.getCPtr(bx), bx, Fp.getCPtr(by), by), true);
   }
 
   public boolean equals(G2 rhs) {
-    return Bn256JNI.G2_equals(swigCPtr, this, G2.getCPtr(rhs), rhs);
+    return MclJNI.G2_equals(swigCPtr, this, G2.getCPtr(rhs), rhs);
   }
 
-  public void set(String xa, String xb, String ya, String yb) {
-    Bn256JNI.G2_set(swigCPtr, this, xa, xb, ya, yb);
+  public void set(Fp ax, Fp ay, Fp bx, Fp by) {
+    MclJNI.G2_set(swigCPtr, this, Fp.getCPtr(ax), ax, Fp.getCPtr(ay), ay, Fp.getCPtr(bx), bx, Fp.getCPtr(by), by);
   }
 
   public void clear() {
-    Bn256JNI.G2_clear(swigCPtr, this);
+    MclJNI.G2_clear(swigCPtr, this);
+  }
+
+  public void setStr(String str, int base) {
+    MclJNI.G2_setStr__SWIG_0(swigCPtr, this, str, base);
   }
 
   public void setStr(String str) {
-    Bn256JNI.G2_setStr(swigCPtr, this, str);
+    MclJNI.G2_setStr__SWIG_1(swigCPtr, this, str);
+  }
+
+  public String toString(int base) {
+    return MclJNI.G2_toString__SWIG_0(swigCPtr, this, base);
   }
 
   public String toString() {
-    return Bn256JNI.G2_toString(swigCPtr, this);
+    return MclJNI.G2_toString__SWIG_1(swigCPtr, this);
+  }
+
+  public void deserialize(byte[] cbuf) {
+    MclJNI.G2_deserialize(swigCPtr, this, cbuf);
   }
 
 }
diff --git a/ffi/java/com/herumi/mcl/GT.java b/ffi/java/com/herumi/mcl/GT.java
index 91531f3e..187af707 100644
--- a/ffi/java/com/herumi/mcl/GT.java
+++ b/ffi/java/com/herumi/mcl/GT.java
@@ -29,34 +29,42 @@ public synchronized void delete() {
     if (swigCPtr != 0) {
       if (swigCMemOwn) {
         swigCMemOwn = false;
-        Bn256JNI.delete_GT(swigCPtr);
+        MclJNI.delete_GT(swigCPtr);
       }
       swigCPtr = 0;
     }
   }
 
   public GT() {
-    this(Bn256JNI.new_GT__SWIG_0(), true);
+    this(MclJNI.new_GT__SWIG_0(), true);
   }
 
   public GT(GT rhs) {
-    this(Bn256JNI.new_GT__SWIG_1(GT.getCPtr(rhs), rhs), true);
+    this(MclJNI.new_GT__SWIG_1(GT.getCPtr(rhs), rhs), true);
   }
 
   public boolean equals(GT rhs) {
-    return Bn256JNI.GT_equals(swigCPtr, this, GT.getCPtr(rhs), rhs);
+    return MclJNI.GT_equals(swigCPtr, this, GT.getCPtr(rhs), rhs);
   }
 
   public void clear() {
-    Bn256JNI.GT_clear(swigCPtr, this);
+    MclJNI.GT_clear(swigCPtr, this);
+  }
+
+  public void setStr(String str, int base) {
+    MclJNI.GT_setStr__SWIG_0(swigCPtr, this, str, base);
   }
 
   public void setStr(String str) {
-    Bn256JNI.GT_setStr(swigCPtr, this, str);
+    MclJNI.GT_setStr__SWIG_1(swigCPtr, this, str);
+  }
+
+  public String toString(int base) {
+    return MclJNI.GT_toString__SWIG_0(swigCPtr, this, base);
   }
 
   public String toString() {
-    return Bn256JNI.GT_toString(swigCPtr, this);
+    return MclJNI.GT_toString__SWIG_1(swigCPtr, this);
   }
 
 }
diff --git a/ffi/java/com/herumi/mcl/Mcl.java b/ffi/java/com/herumi/mcl/Mcl.java
new file mode 100644
index 00000000..c33e0465
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/Mcl.java
@@ -0,0 +1,128 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class Mcl implements MclConstants {
+  public static void SystemInit(int curveType) {
+    MclJNI.SystemInit(curveType);
+  }
+
+  public static void neg(Fr y, Fr x) {
+    MclJNI.neg__SWIG_0(Fr.getCPtr(y), y, Fr.getCPtr(x), x);
+  }
+
+  public static void add(Fr z, Fr x, Fr y) {
+    MclJNI.add__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void sub(Fr z, Fr x, Fr y) {
+    MclJNI.sub__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void mul(Fr z, Fr x, Fr y) {
+    MclJNI.mul__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void mul(G1 z, G1 x, Fr y) {
+    MclJNI.mul__SWIG_1(G1.getCPtr(z), z, G1.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void mul(G2 z, G2 x, Fr y) {
+    MclJNI.mul__SWIG_2(G2.getCPtr(z), z, G2.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void div(Fr z, Fr x, Fr y) {
+    MclJNI.div__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void pow(GT z, GT x, Fr y) {
+    MclJNI.pow__SWIG_0(GT.getCPtr(z), z, GT.getCPtr(x), x, Fr.getCPtr(y), y);
+  }
+
+  public static void neg(Fp y, Fp x) {
+    MclJNI.neg__SWIG_1(Fp.getCPtr(y), y, Fp.getCPtr(x), x);
+  }
+
+  public static void add(Fp z, Fp x, Fp y) {
+    MclJNI.add__SWIG_1(Fp.getCPtr(z), z, Fp.getCPtr(x), x, Fp.getCPtr(y), y);
+  }
+
+  public static void sub(Fp z, Fp x, Fp y) {
+    MclJNI.sub__SWIG_1(Fp.getCPtr(z), z, Fp.getCPtr(x), x, Fp.getCPtr(y), y);
+  }
+
+  public static void mul(Fp z, Fp x, Fp y) {
+    MclJNI.mul__SWIG_3(Fp.getCPtr(z), z, Fp.getCPtr(x), x, Fp.getCPtr(y), y);
+  }
+
+  public static void mul(G1 z, G1 x, Fp y) {
+    MclJNI.mul__SWIG_4(G1.getCPtr(z), z, G1.getCPtr(x), x, Fp.getCPtr(y), y);
+  }
+
+  public static void mul(G2 z, G2 x, Fp y) {
+    MclJNI.mul__SWIG_5(G2.getCPtr(z), z, G2.getCPtr(x), x, Fp.getCPtr(y), y);
+  }
+
+  public static void div(Fp z, Fp x, Fp y) {
+    MclJNI.div__SWIG_1(Fp.getCPtr(z), z, Fp.getCPtr(x), x, Fp.getCPtr(y), y);
+  }
+
+  public static void pow(GT z, GT x, Fp y) {
+    MclJNI.pow__SWIG_1(GT.getCPtr(z), z, GT.getCPtr(x), x, Fp.getCPtr(y), y);
+  }
+
+  public static void neg(G1 y, G1 x) {
+    MclJNI.neg__SWIG_2(G1.getCPtr(y), y, G1.getCPtr(x), x);
+  }
+
+  public static void dbl(G1 y, G1 x) {
+    MclJNI.dbl__SWIG_0(G1.getCPtr(y), y, G1.getCPtr(x), x);
+  }
+
+  public static void add(G1 z, G1 x, G1 y) {
+    MclJNI.add__SWIG_2(G1.getCPtr(z), z, G1.getCPtr(x), x, G1.getCPtr(y), y);
+  }
+
+  public static void sub(G1 z, G1 x, G1 y) {
+    MclJNI.sub__SWIG_2(G1.getCPtr(z), z, G1.getCPtr(x), x, G1.getCPtr(y), y);
+  }
+
+  public static void pairing(GT e, G1 P, G2 Q) {
+    MclJNI.pairing(GT.getCPtr(e), e, G1.getCPtr(P), P, G2.getCPtr(Q), Q);
+  }
+
+  public static void hashAndMapToG1(G1 P, byte[] cbuf) {
+    MclJNI.hashAndMapToG1(G1.getCPtr(P), P, cbuf);
+  }
+
+  public static void neg(G2 y, G2 x) {
+    MclJNI.neg__SWIG_3(G2.getCPtr(y), y, G2.getCPtr(x), x);
+  }
+
+  public static void dbl(G2 y, G2 x) {
+    MclJNI.dbl__SWIG_1(G2.getCPtr(y), y, G2.getCPtr(x), x);
+  }
+
+  public static void add(G2 z, G2 x, G2 y) {
+    MclJNI.add__SWIG_3(G2.getCPtr(z), z, G2.getCPtr(x), x, G2.getCPtr(y), y);
+  }
+
+  public static void sub(G2 z, G2 x, G2 y) {
+    MclJNI.sub__SWIG_3(G2.getCPtr(z), z, G2.getCPtr(x), x, G2.getCPtr(y), y);
+  }
+
+  public static void hashAndMapToG2(G2 P, byte[] cbuf) {
+    MclJNI.hashAndMapToG2(G2.getCPtr(P), P, cbuf);
+  }
+
+  public static void mul(GT z, GT x, GT y) {
+    MclJNI.mul__SWIG_6(GT.getCPtr(z), z, GT.getCPtr(x), x, GT.getCPtr(y), y);
+  }
+
+}
diff --git a/ffi/java/com/herumi/mcl/MclConstants.java b/ffi/java/com/herumi/mcl/MclConstants.java
new file mode 100644
index 00000000..c972e0a9
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/MclConstants.java
@@ -0,0 +1,14 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public interface MclConstants {
+  public final static int BN254 = 0;
+  public final static int BLS12_381 = 5;
+}
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
new file mode 100644
index 00000000..7181f50a
--- /dev/null
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -0,0 +1,104 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * Do not make changes to this file unless you know what you are doing--modify
+ * the SWIG interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+package com.herumi.mcl;
+
+public class MclJNI {
+  public final static native void SystemInit(int jarg1);
+  public final static native void neg__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_);
+  public final static native void add__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
+  public final static native void sub__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
+  public final static native void mul__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
+  public final static native void mul__SWIG_1(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, Fr jarg3_);
+  public final static native void mul__SWIG_2(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, Fr jarg3_);
+  public final static native void div__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
+  public final static native void pow__SWIG_0(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, Fr jarg3_);
+  public final static native long new_Fr__SWIG_0();
+  public final static native long new_Fr__SWIG_1(long jarg1, Fr jarg1_);
+  public final static native long new_Fr__SWIG_2(int jarg1);
+  public final static native long new_Fr__SWIG_3(String jarg1, int jarg2);
+  public final static native long new_Fr__SWIG_4(String jarg1);
+  public final static native boolean Fr_equals(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_);
+  public final static native void Fr_setStr__SWIG_0(long jarg1, Fr jarg1_, String jarg2, int jarg3);
+  public final static native void Fr_setStr__SWIG_1(long jarg1, Fr jarg1_, String jarg2);
+  public final static native void Fr_setInt(long jarg1, Fr jarg1_, int jarg2);
+  public final static native void Fr_clear(long jarg1, Fr jarg1_);
+  public final static native void Fr_setByCSPRNG(long jarg1, Fr jarg1_);
+  public final static native String Fr_toString__SWIG_0(long jarg1, Fr jarg1_, int jarg2);
+  public final static native String Fr_toString__SWIG_1(long jarg1, Fr jarg1_);
+  public final static native void Fr_deserialize(long jarg1, Fr jarg1_, byte[] jarg2);
+  public final static native void delete_Fr(long jarg1);
+  public final static native void neg__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
+  public final static native void add__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
+  public final static native void sub__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
+  public final static native void mul__SWIG_3(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
+  public final static native void mul__SWIG_4(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, Fp jarg3_);
+  public final static native void mul__SWIG_5(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, Fp jarg3_);
+  public final static native void div__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
+  public final static native void pow__SWIG_1(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, Fp jarg3_);
+  public final static native long new_Fp__SWIG_0();
+  public final static native long new_Fp__SWIG_1(long jarg1, Fp jarg1_);
+  public final static native long new_Fp__SWIG_2(int jarg1);
+  public final static native long new_Fp__SWIG_3(String jarg1, int jarg2);
+  public final static native long new_Fp__SWIG_4(String jarg1);
+  public final static native boolean Fp_equals(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
+  public final static native void Fp_setStr__SWIG_0(long jarg1, Fp jarg1_, String jarg2, int jarg3);
+  public final static native void Fp_setStr__SWIG_1(long jarg1, Fp jarg1_, String jarg2);
+  public final static native void Fp_setInt(long jarg1, Fp jarg1_, int jarg2);
+  public final static native void Fp_clear(long jarg1, Fp jarg1_);
+  public final static native void Fp_setByCSPRNG(long jarg1, Fp jarg1_);
+  public final static native String Fp_toString__SWIG_0(long jarg1, Fp jarg1_, int jarg2);
+  public final static native String Fp_toString__SWIG_1(long jarg1, Fp jarg1_);
+  public final static native void Fp_deserialize(long jarg1, Fp jarg1_, byte[] jarg2);
+  public final static native void delete_Fp(long jarg1);
+  public final static native void neg__SWIG_2(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
+  public final static native void dbl__SWIG_0(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
+  public final static native void add__SWIG_2(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, G1 jarg3_);
+  public final static native void sub__SWIG_2(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, G1 jarg3_);
+  public final static native void pairing(long jarg1, GT jarg1_, long jarg2, G1 jarg2_, long jarg3, G2 jarg3_);
+  public final static native void hashAndMapToG1(long jarg1, G1 jarg1_, byte[] jarg2);
+  public final static native long new_G1__SWIG_0();
+  public final static native long new_G1__SWIG_1(long jarg1, G1 jarg1_);
+  public final static native long new_G1__SWIG_2(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
+  public final static native boolean G1_equals(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
+  public final static native void G1_set(long jarg1, G1 jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
+  public final static native void G1_clear(long jarg1, G1 jarg1_);
+  public final static native void G1_setStr__SWIG_0(long jarg1, G1 jarg1_, String jarg2, int jarg3);
+  public final static native void G1_setStr__SWIG_1(long jarg1, G1 jarg1_, String jarg2);
+  public final static native String G1_toString__SWIG_0(long jarg1, G1 jarg1_, int jarg2);
+  public final static native String G1_toString__SWIG_1(long jarg1, G1 jarg1_);
+  public final static native void G1_deserialize(long jarg1, G1 jarg1_, byte[] jarg2);
+  public final static native void delete_G1(long jarg1);
+  public final static native void neg__SWIG_3(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
+  public final static native void dbl__SWIG_1(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
+  public final static native void add__SWIG_3(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, G2 jarg3_);
+  public final static native void sub__SWIG_3(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, G2 jarg3_);
+  public final static native void hashAndMapToG2(long jarg1, G2 jarg1_, byte[] jarg2);
+  public final static native long new_G2__SWIG_0();
+  public final static native long new_G2__SWIG_1(long jarg1, G2 jarg1_);
+  public final static native long new_G2__SWIG_2(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_, long jarg4, Fp jarg4_);
+  public final static native boolean G2_equals(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
+  public final static native void G2_set(long jarg1, G2 jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_, long jarg4, Fp jarg4_, long jarg5, Fp jarg5_);
+  public final static native void G2_clear(long jarg1, G2 jarg1_);
+  public final static native void G2_setStr__SWIG_0(long jarg1, G2 jarg1_, String jarg2, int jarg3);
+  public final static native void G2_setStr__SWIG_1(long jarg1, G2 jarg1_, String jarg2);
+  public final static native String G2_toString__SWIG_0(long jarg1, G2 jarg1_, int jarg2);
+  public final static native String G2_toString__SWIG_1(long jarg1, G2 jarg1_);
+  public final static native void G2_deserialize(long jarg1, G2 jarg1_, byte[] jarg2);
+  public final static native void delete_G2(long jarg1);
+  public final static native void mul__SWIG_6(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, GT jarg3_);
+  public final static native long new_GT__SWIG_0();
+  public final static native long new_GT__SWIG_1(long jarg1, GT jarg1_);
+  public final static native boolean GT_equals(long jarg1, GT jarg1_, long jarg2, GT jarg2_);
+  public final static native void GT_clear(long jarg1, GT jarg1_);
+  public final static native void GT_setStr__SWIG_0(long jarg1, GT jarg1_, String jarg2, int jarg3);
+  public final static native void GT_setStr__SWIG_1(long jarg1, GT jarg1_, String jarg2);
+  public final static native String GT_toString__SWIG_0(long jarg1, GT jarg1_, int jarg2);
+  public final static native String GT_toString__SWIG_1(long jarg1, GT jarg1_);
+  public final static native void delete_GT(long jarg1);
+}
diff --git a/ffi/java/java.md b/ffi/java/java.md
index 3fe86135..b0132ca8 100644
--- a/ffi/java/java.md
+++ b/ffi/java/java.md
@@ -21,7 +21,7 @@ System.loadLibrary("mcl_bn256");
 ## Fr
 * `Fr::setInt(int x)` ; set by x
 * `Fr::setStr(String str)` ; set by str such as "123", "0xfff", etc.
-* `Fr::setRand()` ; randomly set
+* `Fr::setByCSPRNG()` ; randomly set
 * `Bn256.neg(Fr y, Fr x)` ; `y = -x`
 * `Bn256.add(Fr z, Fr x, Fr y)` ; `z = x + y`
 * `Bn256.sub(Fr z, Fr x, Fr y)` ; `z = x - y`
@@ -68,7 +68,7 @@ String yb = "7937318970632701341203597196594272556916396164729705624521405069090
 G2 Q = new G2(xa, xb, ya, yb); // fixed point of G2
 
 Fr s = new Fr();
-s.setRand(); // secret key
+s.setByCSPRNG(); // secret key
 G2 pub = new G2();
 Bn256.mul(pub, Q, s); // public key = sQ
 
diff --git a/ffi/java/mcl.i b/ffi/java/mcl.i
new file mode 100644
index 00000000..6649ca7a
--- /dev/null
+++ b/ffi/java/mcl.i
@@ -0,0 +1,28 @@
+%module Mcl
+
+%include "std_string.i"
+%include "std_except.i"
+
+%apply(char *STRING, size_t LENGTH) { (const char *cbuf, size_t bufSize) };
+%{
+#include <mcl/bls12_381.hpp>
+
+#include "mcl_impl.hpp"
+
+%}
+
+%include "mcl_impl.hpp"
+
+%javaconst(1);
+#define BN254 0
+#define BLS12_381 5
+
+%typemap(jtype) void serialize "byte[]"
+%typemap(jstype) void serialize "byte[]"
+%typemap(jni) void serialize "jbyteArray"
+%typemap(javaout) void serialize { return $jnicall; }
+%typemap(in, numinputs=0) std::string& out (std::string temp) "$1=&temp;"
+%typemap(argout) std::string& out {
+  $result = JCALL1(NewByteArray, jenv, $1->size());
+  JCALL4(SetByteArrayRegion, jenv, $result, 0, $1->size(), (const jbyte*)$1->c_str());
+}
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
new file mode 100644
index 00000000..8edb6568
--- /dev/null
+++ b/ffi/java/mcl_impl.hpp
@@ -0,0 +1,353 @@
+#include <mcl/bls12_381.hpp>
+#include <stdint.h>
+#include <sstream>
+
+void SystemInit(int curveType) throw(std::exception)
+{
+	mcl::CurveParam cp;
+	switch (curveType) {
+	case MCL_BN254: cp = mcl::BN254; break;
+	case MCL_BN_SNARK1: cp = mcl::BN_SNARK1; break;
+	case MCL_BLS12_381: cp = mcl::BLS12_381; break;
+	default:
+		throw std::runtime_error("bad curveType");
+	}
+	mcl::bn::initPairing(cp);
+}
+
+class G1;
+class G2;
+class GT;
+/*
+	Fr = Z / rZ
+*/
+class Fr {
+	mcl::bn::Fr self_;
+	friend class G1;
+	friend class G2;
+	friend class GT;
+	friend void neg(Fr& y, const Fr& x);
+	friend void add(Fr& z, const Fr& x, const Fr& y);
+	friend void sub(Fr& z, const Fr& x, const Fr& y);
+	friend void mul(Fr& z, const Fr& x, const Fr& y);
+	friend void mul(G1& z, const G1& x, const Fr& y);
+	friend void mul(G2& z, const G2& x, const Fr& y);
+	friend void div(Fr& z, const Fr& x, const Fr& y);
+	friend void pow(GT& z, const GT& x, const Fr& y);
+public:
+	Fr() {}
+	Fr(const Fr& rhs) : self_(rhs.self_) {}
+	Fr(int x) : self_(x) {}
+	Fr(const std::string& str, int base = 0) throw(std::exception)
+		: self_(str, base) {}
+	bool equals(const Fr& rhs) const { return self_ == rhs.self_; }
+	void setStr(const std::string& str, int base = 0) throw(std::exception)
+	{
+		self_.setStr(str, base);
+	}
+	void setInt(int x)
+	{
+		self_ = x;
+	}
+	void clear()
+	{
+		self_.clear();
+	}
+	void setByCSPRNG()
+	{
+		self_.setByCSPRNG();
+	}
+	std::string toString(int base = 0) const throw(std::exception)
+	{
+		return self_.getStr(base);
+	}
+	void deserialize(const char *cbuf, size_t bufSize) throw(std::exception)
+	{
+		if (self_.deserialize(cbuf, bufSize) == 0) {
+			throw std::runtime_error("deserialize");
+		}
+	}
+};
+
+void neg(Fr& y, const Fr& x)
+{
+	mcl::bn::Fr::neg(y.self_, x.self_);
+}
+
+void add(Fr& z, const Fr& x, const Fr& y)
+{
+	mcl::bn::Fr::add(z.self_, x.self_, y.self_);
+}
+
+void sub(Fr& z, const Fr& x, const Fr& y)
+{
+	mcl::bn::Fr::sub(z.self_, x.self_, y.self_);
+}
+
+void mul(Fr& z, const Fr& x, const Fr& y)
+{
+	mcl::bn::Fr::mul(z.self_, x.self_, y.self_);
+}
+
+void div(Fr& z, const Fr& x, const Fr& y)
+{
+	mcl::bn::Fr::div(z.self_, x.self_, y.self_);
+}
+
+class Fp {
+	mcl::bn::Fp self_;
+	friend class G1;
+	friend class G2;
+	friend class GT;
+	friend void neg(Fp& y, const Fp& x);
+	friend void add(Fp& z, const Fp& x, const Fp& y);
+	friend void sub(Fp& z, const Fp& x, const Fp& y);
+	friend void mul(Fp& z, const Fp& x, const Fp& y);
+	friend void mul(G1& z, const G1& x, const Fp& y);
+	friend void mul(G2& z, const G2& x, const Fp& y);
+	friend void div(Fp& z, const Fp& x, const Fp& y);
+	friend void pow(GT& z, const GT& x, const Fp& y);
+public:
+	Fp() {}
+	Fp(const Fp& rhs) : self_(rhs.self_) {}
+	Fp(int x) : self_(x) {}
+	Fp(const std::string& str, int base = 0) throw(std::exception)
+		: self_(str, base) {}
+	bool equals(const Fp& rhs) const { return self_ == rhs.self_; }
+	void setStr(const std::string& str, int base = 0) throw(std::exception)
+	{
+		self_.setStr(str, base);
+	}
+	void setInt(int x)
+	{
+		self_ = x;
+	}
+	void clear()
+	{
+		self_.clear();
+	}
+	void setByCSPRNG()
+	{
+		self_.setByCSPRNG();
+	}
+	std::string toString(int base = 0) const throw(std::exception)
+	{
+		return self_.getStr(base);
+	}
+	void deserialize(const char *cbuf, size_t bufSize) throw(std::exception)
+	{
+		if (self_.deserialize(cbuf, bufSize) == 0) {
+			throw std::runtime_error("deserialize");
+		}
+	}
+};
+
+void neg(Fp& y, const Fp& x)
+{
+	mcl::bn::Fp::neg(y.self_, x.self_);
+}
+
+void add(Fp& z, const Fp& x, const Fp& y)
+{
+	mcl::bn::Fp::add(z.self_, x.self_, y.self_);
+}
+
+void sub(Fp& z, const Fp& x, const Fp& y)
+{
+	mcl::bn::Fp::sub(z.self_, x.self_, y.self_);
+}
+
+void mul(Fp& z, const Fp& x, const Fp& y)
+{
+	mcl::bn::Fp::mul(z.self_, x.self_, y.self_);
+}
+
+void div(Fp& z, const Fp& x, const Fp& y)
+{
+	mcl::bn::Fp::div(z.self_, x.self_, y.self_);
+}
+
+
+/*
+	#G1 = r
+*/
+class G1 {
+	mcl::bn::G1 self_;
+	friend void neg(G1& y, const G1& x);
+	friend void dbl(G1& y, const G1& x);
+	friend void add(G1& z, const G1& x, const G1& y);
+	friend void sub(G1& z, const G1& x, const G1& y);
+	friend void mul(G1& z, const G1& x, const Fr& y);
+	friend void pairing(GT& e, const G1& P, const G2& Q);
+	friend void hashAndMapToG1(G1& P, const char *cbuf, size_t bufSize) throw(std::exception);
+public:
+	G1() {}
+	G1(const G1& rhs) : self_(rhs.self_) {}
+	G1(const Fp& x, const Fp& y) throw(std::exception)
+		: self_(x.self_, y.self_) { }
+	bool equals(const G1& rhs) const { return self_ == rhs.self_; }
+	void set(const Fp& x, const Fp& y) throw(std::exception)
+	{
+		self_.set(x.self_, y.self_);
+	}
+	void clear()
+	{
+		self_.clear();
+	}
+	/*
+		compressed format
+	*/
+	void setStr(const std::string& str, int base = 0) throw(std::exception)
+	{
+		self_.setStr(str, base);
+	}
+	std::string toString(int base = 0) const throw(std::exception)
+	{
+		return self_.getStr(base);
+	}
+	void deserialize(const char *cbuf, size_t bufSize) throw(std::exception)
+	{
+		if (self_.deserialize(cbuf, bufSize) == 0) {
+			throw std::runtime_error("deserialize");
+		}
+	}
+};
+
+void neg(G1& y, const G1& x)
+{
+	mcl::bn::G1::neg(y.self_, x.self_);
+}
+void dbl(G1& y, const G1& x)
+{
+	mcl::bn::G1::dbl(y.self_, x.self_);
+}
+void add(G1& z, const G1& x, const G1& y)
+{
+	mcl::bn::G1::add(z.self_, x.self_, y.self_);
+}
+void sub(G1& z, const G1& x, const G1& y)
+{
+	mcl::bn::G1::sub(z.self_, x.self_, y.self_);
+}
+void mul(G1& z, const G1& x, const Fr& y)
+{
+	mcl::bn::G1::mul(z.self_, x.self_, y.self_);
+}
+
+/*
+	#G2 = r
+*/
+class G2 {
+	mcl::bn::G2 self_;
+	friend void neg(G2& y, const G2& x);
+	friend void dbl(G2& y, const G2& x);
+	friend void add(G2& z, const G2& x, const G2& y);
+	friend void sub(G2& z, const G2& x, const G2& y);
+	friend void mul(G2& z, const G2& x, const Fr& y);
+	friend void pairing(GT& e, const G1& P, const G2& Q);
+	friend void hashAndMapToG2(G2& P, const char *cbuf, size_t bufSize) throw(std::exception);
+public:
+	G2() {}
+	G2(const G2& rhs) : self_(rhs.self_) {}
+	G2(const Fp& ax, const Fp& ay, const Fp& bx, const Fp& by) throw(std::exception)
+		: self_(mcl::bn::Fp2(ax.self_, ay.self_), mcl::bn::Fp2(bx.self_, by.self_))
+	{
+	}
+	bool equals(const G2& rhs) const { return self_ == rhs.self_; }
+	void set(const Fp& ax, const Fp& ay, const Fp& bx, const Fp& by) throw(std::exception)
+	{
+		self_.set(mcl::bn::Fp2(ax.self_, ay.self_), mcl::bn::Fp2(bx.self_, by.self_));
+	}
+	void clear()
+	{
+		self_.clear();
+	}
+	/*
+		compressed format
+	*/
+	void setStr(const std::string& str, int base = 0) throw(std::exception)
+	{
+		self_.setStr(str, base);
+	}
+	std::string toString(int base = 0) const throw(std::exception)
+	{
+		return self_.getStr(base);
+	}
+	void deserialize(const char *cbuf, size_t bufSize) throw(std::exception)
+	{
+		if (self_.deserialize(cbuf, bufSize) == 0) {
+			throw std::runtime_error("deserialize");
+		}
+	}
+};
+
+void neg(G2& y, const G2& x)
+{
+	mcl::bn::G2::neg(y.self_, x.self_);
+}
+void dbl(G2& y, const G2& x)
+{
+	mcl::bn::G2::dbl(y.self_, x.self_);
+}
+void add(G2& z, const G2& x, const G2& y)
+{
+	mcl::bn::G2::add(z.self_, x.self_, y.self_);
+}
+void sub(G2& z, const G2& x, const G2& y)
+{
+	mcl::bn::G2::sub(z.self_, x.self_, y.self_);
+}
+void mul(G2& z, const G2& x, const Fr& y)
+{
+	mcl::bn::G2::mul(z.self_, x.self_, y.self_);
+}
+
+/*
+	#GT = r
+*/
+class GT {
+	mcl::bn::Fp12 self_;
+	friend void mul(GT& z, const GT& x, const GT& y);
+	friend void pow(GT& z, const GT& x, const Fr& y);
+	friend void pairing(GT& e, const G1& P, const G2& Q);
+public:
+	GT() {}
+	GT(const GT& rhs) : self_(rhs.self_) {}
+	bool equals(const GT& rhs) const { return self_ == rhs.self_; }
+	void clear()
+	{
+		self_.clear();
+	}
+	void setStr(const std::string& str, int base = 0) throw(std::exception)
+	{
+		self_.setStr(str, base);
+	}
+	std::string toString(int base = 0) const throw(std::exception)
+	{
+		return self_.getStr(base);
+	}
+};
+
+void mul(GT& z, const GT& x, const GT& y)
+{
+	mcl::bn::Fp12::mul(z.self_, x.self_, y.self_);
+}
+void pow(GT& z, const GT& x, const Fr& y)
+{
+	mcl::bn::Fp12::pow(z.self_, x.self_, y.self_);
+}
+void pairing(GT& e, const G1& P, const G2& Q)
+{
+	mcl::bn::pairing(e.self_, P.self_, Q.self_);
+}
+
+void hashAndMapToG1(G1& P, const char *cbuf, size_t bufSize) throw(std::exception)
+{
+	mcl::bn::hashAndMapToG1(P.self_, cbuf, bufSize);
+}
+
+void hashAndMapToG2(G2& P, const char *cbuf, size_t bufSize) throw(std::exception)
+{
+	mcl::bn::hashAndMapToG2(P.self_, cbuf, bufSize);
+}
+
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
new file mode 100644
index 00000000..f9a36a75
--- /dev/null
+++ b/ffi/java/mcl_wrap.cxx
@@ -0,0 +1,2432 @@
+/* ----------------------------------------------------------------------------
+ * This file was automatically generated by SWIG (http://www.swig.org).
+ * Version 3.0.12
+ *
+ * This file is not intended to be easily readable and contains a number of
+ * coding conventions designed to improve portability and efficiency. Do not make
+ * changes to this file unless you know what you are doing--modify the SWIG
+ * interface file instead.
+ * ----------------------------------------------------------------------------- */
+
+
+#ifndef SWIGJAVA
+#define SWIGJAVA
+#endif
+
+
+
+#ifdef __cplusplus
+/* SwigValueWrapper is described in swig.swg */
+template<typename T> class SwigValueWrapper {
+  struct SwigMovePointer {
+    T *ptr;
+    SwigMovePointer(T *p) : ptr(p) { }
+    ~SwigMovePointer() { delete ptr; }
+    SwigMovePointer& operator=(SwigMovePointer& rhs) { T* oldptr = ptr; ptr = 0; delete oldptr; ptr = rhs.ptr; rhs.ptr = 0; return *this; }
+  } pointer;
+  SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
+  SwigValueWrapper(const SwigValueWrapper<T>& rhs);
+public:
+  SwigValueWrapper() : pointer(0) { }
+  SwigValueWrapper& operator=(const T& t) { SwigMovePointer tmp(new T(t)); pointer = tmp; return *this; }
+  operator T&() const { return *pointer.ptr; }
+  T *operator&() { return pointer.ptr; }
+};
+
+template <typename T> T SwigValueInit() {
+  return T();
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ *  This section contains generic SWIG labels for method/variable
+ *  declarations/attributes, and other compiler dependent labels.
+ * ----------------------------------------------------------------------------- */
+
+/* template workaround for compilers that cannot correctly implement the C++ standard */
+#ifndef SWIGTEMPLATEDISAMBIGUATOR
+# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# elif defined(__HP_aCC)
+/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
+/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
+#  define SWIGTEMPLATEDISAMBIGUATOR template
+# else
+#  define SWIGTEMPLATEDISAMBIGUATOR
+# endif
+#endif
+
+/* inline attribute */
+#ifndef SWIGINLINE
+# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
+#   define SWIGINLINE inline
+# else
+#   define SWIGINLINE
+# endif
+#endif
+
+/* attribute recognised by some compilers to avoid 'unused' warnings */
+#ifndef SWIGUNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define SWIGUNUSED __attribute__ ((__unused__))
+#   else
+#     define SWIGUNUSED
+#   endif
+# elif defined(__ICC)
+#   define SWIGUNUSED __attribute__ ((__unused__))
+# else
+#   define SWIGUNUSED
+# endif
+#endif
+
+#ifndef SWIG_MSC_UNSUPPRESS_4505
+# if defined(_MSC_VER)
+#   pragma warning(disable : 4505) /* unreferenced local function has been removed */
+# endif
+#endif
+
+#ifndef SWIGUNUSEDPARM
+# ifdef __cplusplus
+#   define SWIGUNUSEDPARM(p)
+# else
+#   define SWIGUNUSEDPARM(p) p SWIGUNUSED
+# endif
+#endif
+
+/* internal SWIG method */
+#ifndef SWIGINTERN
+# define SWIGINTERN static SWIGUNUSED
+#endif
+
+/* internal inline SWIG method */
+#ifndef SWIGINTERNINLINE
+# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
+#endif
+
+/* exporting methods */
+#if defined(__GNUC__)
+#  if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#    ifndef GCC_HASCLASSVISIBILITY
+#      define GCC_HASCLASSVISIBILITY
+#    endif
+#  endif
+#endif
+
+#ifndef SWIGEXPORT
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   if defined(STATIC_LINKED)
+#     define SWIGEXPORT
+#   else
+#     define SWIGEXPORT __declspec(dllexport)
+#   endif
+# else
+#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
+#     define SWIGEXPORT __attribute__ ((visibility("default")))
+#   else
+#     define SWIGEXPORT
+#   endif
+# endif
+#endif
+
+/* calling conventions for Windows */
+#ifndef SWIGSTDCALL
+# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
+#   define SWIGSTDCALL __stdcall
+# else
+#   define SWIGSTDCALL
+# endif
+#endif
+
+/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
+#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
+# define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
+#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
+# define _SCL_SECURE_NO_DEPRECATE
+#endif
+
+/* Deal with Apple's deprecated 'AssertMacros.h' from Carbon-framework */
+#if defined(__APPLE__) && !defined(__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES)
+# define __ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES 0
+#endif
+
+/* Intel's compiler complains if a variable which was never initialised is
+ * cast to void, which is a common idiom which we use to indicate that we
+ * are aware a variable isn't used.  So we just silence that warning.
+ * See: https://github.com/swig/swig/issues/192 for more discussion.
+ */
+#ifdef __INTEL_COMPILER
+# pragma warning disable 592
+#endif
+
+
+/* Fix for jlong on some versions of gcc on Windows */
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+  typedef long long __int64;
+#endif
+
+/* Fix for jlong on 64-bit x86 Solaris */
+#if defined(__x86_64)
+# ifdef _LP64
+#   undef _LP64
+# endif
+#endif
+
+#include <jni.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/* Support for throwing Java exceptions */
+typedef enum {
+  SWIG_JavaOutOfMemoryError = 1, 
+  SWIG_JavaIOException, 
+  SWIG_JavaRuntimeException, 
+  SWIG_JavaIndexOutOfBoundsException,
+  SWIG_JavaArithmeticException,
+  SWIG_JavaIllegalArgumentException,
+  SWIG_JavaNullPointerException,
+  SWIG_JavaDirectorPureVirtual,
+  SWIG_JavaUnknownError
+} SWIG_JavaExceptionCodes;
+
+typedef struct {
+  SWIG_JavaExceptionCodes code;
+  const char *java_exception;
+} SWIG_JavaExceptions_t;
+
+
+static void SWIGUNUSED SWIG_JavaThrowException(JNIEnv *jenv, SWIG_JavaExceptionCodes code, const char *msg) {
+  jclass excep;
+  static const SWIG_JavaExceptions_t java_exceptions[] = {
+    { SWIG_JavaOutOfMemoryError, "java/lang/OutOfMemoryError" },
+    { SWIG_JavaIOException, "java/io/IOException" },
+    { SWIG_JavaRuntimeException, "java/lang/RuntimeException" },
+    { SWIG_JavaIndexOutOfBoundsException, "java/lang/IndexOutOfBoundsException" },
+    { SWIG_JavaArithmeticException, "java/lang/ArithmeticException" },
+    { SWIG_JavaIllegalArgumentException, "java/lang/IllegalArgumentException" },
+    { SWIG_JavaNullPointerException, "java/lang/NullPointerException" },
+    { SWIG_JavaDirectorPureVirtual, "java/lang/RuntimeException" },
+    { SWIG_JavaUnknownError,  "java/lang/UnknownError" },
+    { (SWIG_JavaExceptionCodes)0,  "java/lang/UnknownError" }
+  };
+  const SWIG_JavaExceptions_t *except_ptr = java_exceptions;
+
+  while (except_ptr->code != code && except_ptr->code)
+    except_ptr++;
+
+  jenv->ExceptionClear();
+  excep = jenv->FindClass(except_ptr->java_exception);
+  if (excep)
+    jenv->ThrowNew(excep, msg);
+}
+
+
+/* Contract support */
+
+#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_JavaThrowException(jenv, SWIG_JavaIllegalArgumentException, msg); return nullreturn; } else
+
+
+#include <string>
+
+
+#include <typeinfo>
+#include <stdexcept>
+
+
+#include <mcl/bls12_381.hpp>
+
+#include "mcl_impl.hpp"
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_SystemInit(JNIEnv *jenv, jclass jcls, jint jarg1) {
+  int arg1 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg1 = (int)jarg1; 
+  try {
+    SystemInit(arg1);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  Fr *arg1 = 0 ;
+  Fr *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(Fr **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
+    return ;
+  } 
+  arg2 = *(Fr **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  neg(*arg1,(Fr const &)*arg2);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  Fr *arg1 = 0 ;
+  Fr *arg2 = 0 ;
+  Fr *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(Fr **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
+    return ;
+  } 
+  arg2 = *(Fr **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fr **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  add(*arg1,(Fr const &)*arg2,(Fr const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  Fr *arg1 = 0 ;
+  Fr *arg2 = 0 ;
+  Fr *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(Fr **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
+    return ;
+  } 
+  arg2 = *(Fr **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fr **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  sub(*arg1,(Fr const &)*arg2,(Fr const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  Fr *arg1 = 0 ;
+  Fr *arg2 = 0 ;
+  Fr *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(Fr **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
+    return ;
+  } 
+  arg2 = *(Fr **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fr **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  mul(*arg1,(Fr const &)*arg2,(Fr const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  G1 *arg1 = 0 ;
+  G1 *arg2 = 0 ;
+  Fr *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(G1 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
+    return ;
+  } 
+  arg2 = *(G1 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fr **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  mul(*arg1,(G1 const &)*arg2,(Fr const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  G2 *arg1 = 0 ;
+  G2 *arg2 = 0 ;
+  Fr *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(G2 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
+    return ;
+  } 
+  arg2 = *(G2 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fr **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  mul(*arg1,(G2 const &)*arg2,(Fr const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  Fr *arg1 = 0 ;
+  Fr *arg2 = 0 ;
+  Fr *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(Fr **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
+    return ;
+  } 
+  arg2 = *(Fr **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fr **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  div(*arg1,(Fr const &)*arg2,(Fr const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pow_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  GT *arg1 = 0 ;
+  GT *arg2 = 0 ;
+  Fr *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(GT **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT & reference is null");
+    return ;
+  } 
+  arg2 = *(GT **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fr **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  pow(*arg1,(GT const &)*arg2,(Fr const &)*arg3);
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+  jlong jresult = 0 ;
+  Fr *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  result = (Fr *)new Fr();
+  *(Fr **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jlong jresult = 0 ;
+  Fr *arg1 = 0 ;
+  Fr *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return 0;
+  } 
+  result = (Fr *)new Fr((Fr const &)*arg1);
+  *(Fr **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
+  jlong jresult = 0 ;
+  int arg1 ;
+  Fr *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg1 = (int)jarg1; 
+  result = (Fr *)new Fr(arg1);
+  *(Fr **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1, jint jarg2) {
+  jlong jresult = 0 ;
+  std::string *arg1 = 0 ;
+  int arg2 ;
+  Fr *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  if(!jarg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return 0;
+  }
+  const char *arg1_pstr = (const char *)jenv->GetStringUTFChars(jarg1, 0); 
+  if (!arg1_pstr) return 0;
+  std::string arg1_str(arg1_pstr);
+  arg1 = &arg1_str;
+  jenv->ReleaseStringUTFChars(jarg1, arg1_pstr); 
+  arg2 = (int)jarg2; 
+  try {
+    result = (Fr *)new Fr((std::string const &)*arg1,arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  *(Fr **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jstring jarg1) {
+  jlong jresult = 0 ;
+  std::string *arg1 = 0 ;
+  Fr *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  if(!jarg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return 0;
+  }
+  const char *arg1_pstr = (const char *)jenv->GetStringUTFChars(jarg1, 0); 
+  if (!arg1_pstr) return 0;
+  std::string arg1_str(arg1_pstr);
+  arg1 = &arg1_str;
+  jenv->ReleaseStringUTFChars(jarg1, arg1_pstr); 
+  try {
+    result = (Fr *)new Fr((std::string const &)*arg1);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  *(Fr **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fr_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  jboolean jresult = 0 ;
+  Fr *arg1 = (Fr *) 0 ;
+  Fr *arg2 = 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(Fr **)&jarg1; 
+  arg2 = *(Fr **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return 0;
+  } 
+  result = (bool)((Fr const *)arg1)->equals((Fr const &)*arg2);
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+  Fr *arg1 = (Fr *) 0 ;
+  std::string *arg2 = 0 ;
+  int arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  if(!jarg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return ;
+  }
+  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
+  if (!arg2_pstr) return ;
+  std::string arg2_str(arg2_pstr);
+  arg2 = &arg2_str;
+  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
+  arg3 = (int)jarg3; 
+  try {
+    (arg1)->setStr((std::string const &)*arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+  Fr *arg1 = (Fr *) 0 ;
+  std::string *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  if(!jarg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return ;
+  }
+  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
+  if (!arg2_pstr) return ;
+  std::string arg2_str(arg2_pstr);
+  arg2 = &arg2_str;
+  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
+  try {
+    (arg1)->setStr((std::string const &)*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+  Fr *arg1 = (Fr *) 0 ;
+  int arg2 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  arg2 = (int)jarg2; 
+  (arg1)->setInt(arg2);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  Fr *arg1 = (Fr *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  (arg1)->clear();
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setByCSPRNG(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  Fr *arg1 = (Fr *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  (arg1)->setByCSPRNG();
+}
+
+
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fr_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+  jstring jresult = 0 ;
+  Fr *arg1 = (Fr *) 0 ;
+  int arg2 ;
+  std::string result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  arg2 = (int)jarg2; 
+  try {
+    result = ((Fr const *)arg1)->toString(arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  jresult = jenv->NewStringUTF((&result)->c_str()); 
+  return jresult;
+}
+
+
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fr_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jstring jresult = 0 ;
+  Fr *arg1 = (Fr *) 0 ;
+  std::string result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  try {
+    result = ((Fr const *)arg1)->toString();
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  jresult = jenv->NewStringUTF((&result)->c_str()); 
+  return jresult;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+  Fr *arg1 = (Fr *) 0 ;
+  char *arg2 = (char *) 0 ;
+  size_t arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  {
+    if (jarg2) {
+      arg2 = (char *) jenv->GetByteArrayElements(jarg2, 0);
+      arg3 = (size_t) jenv->GetArrayLength(jarg2);
+    } else {
+      arg2 = 0;
+      arg3 = 0;
+    }
+  }
+  try {
+    (arg1)->deserialize((char const *)arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+  {
+    if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1Fr(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+  Fr *arg1 = (Fr *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg1 = *(Fr **)&jarg1; 
+  delete arg1;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  Fp *arg1 = 0 ;
+  Fp *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(Fp **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp & reference is null");
+    return ;
+  } 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  neg(*arg1,(Fp const &)*arg2);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  Fp *arg1 = 0 ;
+  Fp *arg2 = 0 ;
+  Fp *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(Fp **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp & reference is null");
+    return ;
+  } 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fp **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  add(*arg1,(Fp const &)*arg2,(Fp const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  Fp *arg1 = 0 ;
+  Fp *arg2 = 0 ;
+  Fp *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(Fp **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp & reference is null");
+    return ;
+  } 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fp **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  sub(*arg1,(Fp const &)*arg2,(Fp const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  Fp *arg1 = 0 ;
+  Fp *arg2 = 0 ;
+  Fp *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(Fp **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp & reference is null");
+    return ;
+  } 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fp **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  mul(*arg1,(Fp const &)*arg2,(Fp const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  G1 *arg1 = 0 ;
+  G1 *arg2 = 0 ;
+  Fp *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(G1 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
+    return ;
+  } 
+  arg2 = *(G1 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fp **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  mul(*arg1,(G1 const &)*arg2,(Fp const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_15(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  G2 *arg1 = 0 ;
+  G2 *arg2 = 0 ;
+  Fp *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(G2 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
+    return ;
+  } 
+  arg2 = *(G2 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fp **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  mul(*arg1,(G2 const &)*arg2,(Fp const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  Fp *arg1 = 0 ;
+  Fp *arg2 = 0 ;
+  Fp *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(Fp **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp & reference is null");
+    return ;
+  } 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fp **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  div(*arg1,(Fp const &)*arg2,(Fp const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pow_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  GT *arg1 = 0 ;
+  GT *arg2 = 0 ;
+  Fp *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(GT **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT & reference is null");
+    return ;
+  } 
+  arg2 = *(GT **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fp **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  pow(*arg1,(GT const &)*arg2,(Fp const &)*arg3);
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+  jlong jresult = 0 ;
+  Fp *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  result = (Fp *)new Fp();
+  *(Fp **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jlong jresult = 0 ;
+  Fp *arg1 = 0 ;
+  Fp *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return 0;
+  } 
+  result = (Fp *)new Fp((Fp const &)*arg1);
+  *(Fp **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
+  jlong jresult = 0 ;
+  int arg1 ;
+  Fp *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg1 = (int)jarg1; 
+  result = (Fp *)new Fp(arg1);
+  *(Fp **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1, jint jarg2) {
+  jlong jresult = 0 ;
+  std::string *arg1 = 0 ;
+  int arg2 ;
+  Fp *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  if(!jarg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return 0;
+  }
+  const char *arg1_pstr = (const char *)jenv->GetStringUTFChars(jarg1, 0); 
+  if (!arg1_pstr) return 0;
+  std::string arg1_str(arg1_pstr);
+  arg1 = &arg1_str;
+  jenv->ReleaseStringUTFChars(jarg1, arg1_pstr); 
+  arg2 = (int)jarg2; 
+  try {
+    result = (Fp *)new Fp((std::string const &)*arg1,arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  *(Fp **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jstring jarg1) {
+  jlong jresult = 0 ;
+  std::string *arg1 = 0 ;
+  Fp *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  if(!jarg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return 0;
+  }
+  const char *arg1_pstr = (const char *)jenv->GetStringUTFChars(jarg1, 0); 
+  if (!arg1_pstr) return 0;
+  std::string arg1_str(arg1_pstr);
+  arg1 = &arg1_str;
+  jenv->ReleaseStringUTFChars(jarg1, arg1_pstr); 
+  try {
+    result = (Fp *)new Fp((std::string const &)*arg1);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  *(Fp **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fp_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  jboolean jresult = 0 ;
+  Fp *arg1 = (Fp *) 0 ;
+  Fp *arg2 = 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(Fp **)&jarg1; 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return 0;
+  } 
+  result = (bool)((Fp const *)arg1)->equals((Fp const &)*arg2);
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+  Fp *arg1 = (Fp *) 0 ;
+  std::string *arg2 = 0 ;
+  int arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  if(!jarg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return ;
+  }
+  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
+  if (!arg2_pstr) return ;
+  std::string arg2_str(arg2_pstr);
+  arg2 = &arg2_str;
+  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
+  arg3 = (int)jarg3; 
+  try {
+    (arg1)->setStr((std::string const &)*arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+  Fp *arg1 = (Fp *) 0 ;
+  std::string *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  if(!jarg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return ;
+  }
+  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
+  if (!arg2_pstr) return ;
+  std::string arg2_str(arg2_pstr);
+  arg2 = &arg2_str;
+  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
+  try {
+    (arg1)->setStr((std::string const &)*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+  Fp *arg1 = (Fp *) 0 ;
+  int arg2 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  arg2 = (int)jarg2; 
+  (arg1)->setInt(arg2);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  Fp *arg1 = (Fp *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  (arg1)->clear();
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setByCSPRNG(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  Fp *arg1 = (Fp *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  (arg1)->setByCSPRNG();
+}
+
+
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fp_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+  jstring jresult = 0 ;
+  Fp *arg1 = (Fp *) 0 ;
+  int arg2 ;
+  std::string result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  arg2 = (int)jarg2; 
+  try {
+    result = ((Fp const *)arg1)->toString(arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  jresult = jenv->NewStringUTF((&result)->c_str()); 
+  return jresult;
+}
+
+
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fp_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jstring jresult = 0 ;
+  Fp *arg1 = (Fp *) 0 ;
+  std::string result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  try {
+    result = ((Fp const *)arg1)->toString();
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  jresult = jenv->NewStringUTF((&result)->c_str()); 
+  return jresult;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+  Fp *arg1 = (Fp *) 0 ;
+  char *arg2 = (char *) 0 ;
+  size_t arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  {
+    if (jarg2) {
+      arg2 = (char *) jenv->GetByteArrayElements(jarg2, 0);
+      arg3 = (size_t) jenv->GetArrayLength(jarg2);
+    } else {
+      arg2 = 0;
+      arg3 = 0;
+    }
+  }
+  try {
+    (arg1)->deserialize((char const *)arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+  {
+    if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1Fp(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+  Fp *arg1 = (Fp *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg1 = *(Fp **)&jarg1; 
+  delete arg1;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  G1 *arg1 = 0 ;
+  G1 *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(G1 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
+    return ;
+  } 
+  arg2 = *(G1 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return ;
+  } 
+  neg(*arg1,(G1 const &)*arg2);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_dbl_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  G1 *arg1 = 0 ;
+  G1 *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(G1 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
+    return ;
+  } 
+  arg2 = *(G1 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return ;
+  } 
+  dbl(*arg1,(G1 const &)*arg2);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  G1 *arg1 = 0 ;
+  G1 *arg2 = 0 ;
+  G1 *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(G1 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
+    return ;
+  } 
+  arg2 = *(G1 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return ;
+  } 
+  arg3 = *(G1 **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return ;
+  } 
+  add(*arg1,(G1 const &)*arg2,(G1 const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  G1 *arg1 = 0 ;
+  G1 *arg2 = 0 ;
+  G1 *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(G1 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
+    return ;
+  } 
+  arg2 = *(G1 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return ;
+  } 
+  arg3 = *(G1 **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return ;
+  } 
+  sub(*arg1,(G1 const &)*arg2,(G1 const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pairing(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  GT *arg1 = 0 ;
+  G1 *arg2 = 0 ;
+  G2 *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(GT **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT & reference is null");
+    return ;
+  } 
+  arg2 = *(G1 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return ;
+  } 
+  arg3 = *(G2 **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return ;
+  } 
+  pairing(*arg1,(G1 const &)*arg2,(G2 const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_hashAndMapToG1(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+  G1 *arg1 = 0 ;
+  char *arg2 = (char *) 0 ;
+  size_t arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
+    return ;
+  } 
+  {
+    if (jarg2) {
+      arg2 = (char *) jenv->GetByteArrayElements(jarg2, 0);
+      arg3 = (size_t) jenv->GetArrayLength(jarg2);
+    } else {
+      arg2 = 0;
+      arg3 = 0;
+    }
+  }
+  try {
+    hashAndMapToG1(*arg1,(char const *)arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+  {
+    if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
+  }
+  
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+  jlong jresult = 0 ;
+  G1 *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  result = (G1 *)new G1();
+  *(G1 **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jlong jresult = 0 ;
+  G1 *arg1 = 0 ;
+  G1 *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return 0;
+  } 
+  result = (G1 *)new G1((G1 const &)*arg1);
+  *(G1 **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  jlong jresult = 0 ;
+  Fp *arg1 = 0 ;
+  Fp *arg2 = 0 ;
+  G1 *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(Fp **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return 0;
+  } 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return 0;
+  } 
+  try {
+    result = (G1 *)new G1((Fp const &)*arg1,(Fp const &)*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  *(G1 **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  jboolean jresult = 0 ;
+  G1 *arg1 = (G1 *) 0 ;
+  G1 *arg2 = 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(G1 **)&jarg1; 
+  arg2 = *(G1 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
+    return 0;
+  } 
+  result = (bool)((G1 const *)arg1)->equals((G1 const &)*arg2);
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  G1 *arg1 = (G1 *) 0 ;
+  Fp *arg2 = 0 ;
+  Fp *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(G1 **)&jarg1; 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fp **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  try {
+    (arg1)->set((Fp const &)*arg2,(Fp const &)*arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  G1 *arg1 = (G1 *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1; 
+  (arg1)->clear();
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+  G1 *arg1 = (G1 *) 0 ;
+  std::string *arg2 = 0 ;
+  int arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1; 
+  if(!jarg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return ;
+  }
+  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
+  if (!arg2_pstr) return ;
+  std::string arg2_str(arg2_pstr);
+  arg2 = &arg2_str;
+  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
+  arg3 = (int)jarg3; 
+  try {
+    (arg1)->setStr((std::string const &)*arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+  G1 *arg1 = (G1 *) 0 ;
+  std::string *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1; 
+  if(!jarg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return ;
+  }
+  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
+  if (!arg2_pstr) return ;
+  std::string arg2_str(arg2_pstr);
+  arg2 = &arg2_str;
+  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
+  try {
+    (arg1)->setStr((std::string const &)*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G1_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+  jstring jresult = 0 ;
+  G1 *arg1 = (G1 *) 0 ;
+  int arg2 ;
+  std::string result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1; 
+  arg2 = (int)jarg2; 
+  try {
+    result = ((G1 const *)arg1)->toString(arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  jresult = jenv->NewStringUTF((&result)->c_str()); 
+  return jresult;
+}
+
+
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G1_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jstring jresult = 0 ;
+  G1 *arg1 = (G1 *) 0 ;
+  std::string result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1; 
+  try {
+    result = ((G1 const *)arg1)->toString();
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  jresult = jenv->NewStringUTF((&result)->c_str()); 
+  return jresult;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+  G1 *arg1 = (G1 *) 0 ;
+  char *arg2 = (char *) 0 ;
+  size_t arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1; 
+  {
+    if (jarg2) {
+      arg2 = (char *) jenv->GetByteArrayElements(jarg2, 0);
+      arg3 = (size_t) jenv->GetArrayLength(jarg2);
+    } else {
+      arg2 = 0;
+      arg3 = 0;
+    }
+  }
+  try {
+    (arg1)->deserialize((char const *)arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+  {
+    if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G1(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+  G1 *arg1 = (G1 *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg1 = *(G1 **)&jarg1; 
+  delete arg1;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  G2 *arg1 = 0 ;
+  G2 *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(G2 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
+    return ;
+  } 
+  arg2 = *(G2 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return ;
+  } 
+  neg(*arg1,(G2 const &)*arg2);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_dbl_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  G2 *arg1 = 0 ;
+  G2 *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(G2 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
+    return ;
+  } 
+  arg2 = *(G2 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return ;
+  } 
+  dbl(*arg1,(G2 const &)*arg2);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  G2 *arg1 = 0 ;
+  G2 *arg2 = 0 ;
+  G2 *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(G2 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
+    return ;
+  } 
+  arg2 = *(G2 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return ;
+  } 
+  arg3 = *(G2 **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return ;
+  } 
+  add(*arg1,(G2 const &)*arg2,(G2 const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  G2 *arg1 = 0 ;
+  G2 *arg2 = 0 ;
+  G2 *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(G2 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
+    return ;
+  } 
+  arg2 = *(G2 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return ;
+  } 
+  arg3 = *(G2 **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return ;
+  } 
+  sub(*arg1,(G2 const &)*arg2,(G2 const &)*arg3);
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_hashAndMapToG2(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+  G2 *arg1 = 0 ;
+  char *arg2 = (char *) 0 ;
+  size_t arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G2 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
+    return ;
+  } 
+  {
+    if (jarg2) {
+      arg2 = (char *) jenv->GetByteArrayElements(jarg2, 0);
+      arg3 = (size_t) jenv->GetArrayLength(jarg2);
+    } else {
+      arg2 = 0;
+      arg3 = 0;
+    }
+  }
+  try {
+    hashAndMapToG2(*arg1,(char const *)arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+  {
+    if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
+  }
+  
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+  jlong jresult = 0 ;
+  G2 *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  result = (G2 *)new G2();
+  *(G2 **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jlong jresult = 0 ;
+  G2 *arg1 = 0 ;
+  G2 *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G2 **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return 0;
+  } 
+  result = (G2 *)new G2((G2 const &)*arg1);
+  *(G2 **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_) {
+  jlong jresult = 0 ;
+  Fp *arg1 = 0 ;
+  Fp *arg2 = 0 ;
+  Fp *arg3 = 0 ;
+  Fp *arg4 = 0 ;
+  G2 *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  (void)jarg4_;
+  arg1 = *(Fp **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return 0;
+  } 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return 0;
+  } 
+  arg3 = *(Fp **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return 0;
+  } 
+  arg4 = *(Fp **)&jarg4;
+  if (!arg4) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return 0;
+  } 
+  try {
+    result = (G2 *)new G2((Fp const &)*arg1,(Fp const &)*arg2,(Fp const &)*arg3,(Fp const &)*arg4);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  *(G2 **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G2_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  jboolean jresult = 0 ;
+  G2 *arg1 = (G2 *) 0 ;
+  G2 *arg2 = 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(G2 **)&jarg1; 
+  arg2 = *(G2 **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
+    return 0;
+  } 
+  result = (bool)((G2 const *)arg1)->equals((G2 const &)*arg2);
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_, jlong jarg5, jobject jarg5_) {
+  G2 *arg1 = (G2 *) 0 ;
+  Fp *arg2 = 0 ;
+  Fp *arg3 = 0 ;
+  Fp *arg4 = 0 ;
+  Fp *arg5 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  (void)jarg4_;
+  (void)jarg5_;
+  arg1 = *(G2 **)&jarg1; 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  arg3 = *(Fp **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  arg4 = *(Fp **)&jarg4;
+  if (!arg4) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  arg5 = *(Fp **)&jarg5;
+  if (!arg5) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  try {
+    (arg1)->set((Fp const &)*arg2,(Fp const &)*arg3,(Fp const &)*arg4,(Fp const &)*arg5);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  G2 *arg1 = (G2 *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G2 **)&jarg1; 
+  (arg1)->clear();
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+  G2 *arg1 = (G2 *) 0 ;
+  std::string *arg2 = 0 ;
+  int arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G2 **)&jarg1; 
+  if(!jarg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return ;
+  }
+  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
+  if (!arg2_pstr) return ;
+  std::string arg2_str(arg2_pstr);
+  arg2 = &arg2_str;
+  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
+  arg3 = (int)jarg3; 
+  try {
+    (arg1)->setStr((std::string const &)*arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+  G2 *arg1 = (G2 *) 0 ;
+  std::string *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G2 **)&jarg1; 
+  if(!jarg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return ;
+  }
+  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
+  if (!arg2_pstr) return ;
+  std::string arg2_str(arg2_pstr);
+  arg2 = &arg2_str;
+  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
+  try {
+    (arg1)->setStr((std::string const &)*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G2_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+  jstring jresult = 0 ;
+  G2 *arg1 = (G2 *) 0 ;
+  int arg2 ;
+  std::string result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G2 **)&jarg1; 
+  arg2 = (int)jarg2; 
+  try {
+    result = ((G2 const *)arg1)->toString(arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  jresult = jenv->NewStringUTF((&result)->c_str()); 
+  return jresult;
+}
+
+
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G2_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jstring jresult = 0 ;
+  G2 *arg1 = (G2 *) 0 ;
+  std::string result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G2 **)&jarg1; 
+  try {
+    result = ((G2 const *)arg1)->toString();
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  jresult = jenv->NewStringUTF((&result)->c_str()); 
+  return jresult;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+  G2 *arg1 = (G2 *) 0 ;
+  char *arg2 = (char *) 0 ;
+  size_t arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G2 **)&jarg1; 
+  {
+    if (jarg2) {
+      arg2 = (char *) jenv->GetByteArrayElements(jarg2, 0);
+      arg3 = (size_t) jenv->GetArrayLength(jarg2);
+    } else {
+      arg2 = 0;
+      arg3 = 0;
+    }
+  }
+  try {
+    (arg1)->deserialize((char const *)arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+  {
+    if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G2(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+  G2 *arg1 = (G2 *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg1 = *(G2 **)&jarg1; 
+  delete arg1;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_16(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+  GT *arg1 = 0 ;
+  GT *arg2 = 0 ;
+  GT *arg3 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  (void)jarg3_;
+  arg1 = *(GT **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT & reference is null");
+    return ;
+  } 
+  arg2 = *(GT **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
+    return ;
+  } 
+  arg3 = *(GT **)&jarg3;
+  if (!arg3) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
+    return ;
+  } 
+  mul(*arg1,(GT const &)*arg2,(GT const &)*arg3);
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1GT_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+  jlong jresult = 0 ;
+  GT *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  result = (GT *)new GT();
+  *(GT **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1GT_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jlong jresult = 0 ;
+  GT *arg1 = 0 ;
+  GT *result = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(GT **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
+    return 0;
+  } 
+  result = (GT *)new GT((GT const &)*arg1);
+  *(GT **)&jresult = result; 
+  return jresult;
+}
+
+
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_GT_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  jboolean jresult = 0 ;
+  GT *arg1 = (GT *) 0 ;
+  GT *arg2 = 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(GT **)&jarg1; 
+  arg2 = *(GT **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
+    return 0;
+  } 
+  result = (bool)((GT const *)arg1)->equals((GT const &)*arg2);
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  GT *arg1 = (GT *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(GT **)&jarg1; 
+  (arg1)->clear();
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+  GT *arg1 = (GT *) 0 ;
+  std::string *arg2 = 0 ;
+  int arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(GT **)&jarg1; 
+  if(!jarg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return ;
+  }
+  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
+  if (!arg2_pstr) return ;
+  std::string arg2_str(arg2_pstr);
+  arg2 = &arg2_str;
+  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
+  arg3 = (int)jarg3; 
+  try {
+    (arg1)->setStr((std::string const &)*arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+  GT *arg1 = (GT *) 0 ;
+  std::string *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(GT **)&jarg1; 
+  if(!jarg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string");
+    return ;
+  }
+  const char *arg2_pstr = (const char *)jenv->GetStringUTFChars(jarg2, 0); 
+  if (!arg2_pstr) return ;
+  std::string arg2_str(arg2_pstr);
+  arg2 = &arg2_str;
+  jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
+  try {
+    (arg1)->setStr((std::string const &)*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+}
+
+
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+  jstring jresult = 0 ;
+  GT *arg1 = (GT *) 0 ;
+  int arg2 ;
+  std::string result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(GT **)&jarg1; 
+  arg2 = (int)jarg2; 
+  try {
+    result = ((GT const *)arg1)->toString(arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  jresult = jenv->NewStringUTF((&result)->c_str()); 
+  return jresult;
+}
+
+
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jstring jresult = 0 ;
+  GT *arg1 = (GT *) 0 ;
+  std::string result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(GT **)&jarg1; 
+  try {
+    result = ((GT const *)arg1)->toString();
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  jresult = jenv->NewStringUTF((&result)->c_str()); 
+  return jresult;
+}
+
+
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1GT(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+  GT *arg1 = (GT *) 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg1 = *(GT **)&jarg1; 
+  delete arg1;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/ffi/java/run-bn256.bat b/ffi/java/run-mcl.bat
similarity index 100%
rename from ffi/java/run-bn256.bat
rename to ffi/java/run-mcl.bat

From 562337b15212a40dca0db9ace9466a4c165357c1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 3 Jul 2019 22:37:48 +0900
Subject: [PATCH 006/553] v0.95 fix a parser of 0b10 with base = 16

---
 include/mcl/conversion.hpp | 2 --
 include/mcl/op.hpp         | 2 +-
 readme.md                  | 1 +
 test/fp_test.cpp           | 7 ++-----
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/mcl/conversion.hpp b/include/mcl/conversion.hpp
index 7a04b7fa..a5420364 100644
--- a/include/mcl/conversion.hpp
+++ b/include/mcl/conversion.hpp
@@ -208,8 +208,6 @@ inline bool parsePrefix(size_t *readSize, bool *isMinus, int *base, const char *
 			if (*base == 0 || *base == 2) {
 				*base = 2;
 				pos += 2;
-			} else {
-				return false;
 			}
 		}
 	}
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index aad9aa7c..c31b7d15 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x094; /* 0xABC = A.BC */
+static const int version = 0x095; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index 2bd603c4..a7035817 100644
--- a/readme.md
+++ b/readme.md
@@ -457,6 +457,7 @@ Y. Sakemi, Y. Nogami, K. Okeya, Y. Morikawa, CANS 2008.
 
 # History
 
+* 2019/Jun/03 v0.95 fix a parser of 0b10 with base = 16
 * 2019/Apr/29 v0.94 mclBn_setETHserialization supports [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations)
 * 2019/Apr/24 v0.93 support ios
 * 2019/Mar/22 v0.92 shortcut for Ec::mul(Px, P, x) if P = 0
diff --git a/test/fp_test.cpp b/test/fp_test.cpp
index d82c30f5..6b71075c 100644
--- a/test/fp_test.cpp
+++ b/test/fp_test.cpp
@@ -92,6 +92,7 @@ void setStrTest()
 		{ "0b100", 4, 2 },
 		{ "0x100", 256, 0 },
 		{ "0x100", 256, 16 },
+		{ "0b100", 0xb100, 16 }, // hex string
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		Fp x;
@@ -101,7 +102,6 @@ void setStrTest()
 	// use prefix if base conflicts with prefix
 	{
 		Fp x;
-		CYBOZU_TEST_EXCEPTION(x.setStr("0b100", 16), cybozu::Exception);
 		CYBOZU_TEST_EXCEPTION(x.setStr("0b100", 10), cybozu::Exception);
 		CYBOZU_TEST_EXCEPTION(x.setStr("0x100", 2), cybozu::Exception);
 		CYBOZU_TEST_EXCEPTION(x.setStr("0x100", 10), cybozu::Exception);
@@ -117,6 +117,7 @@ void streamTest()
 	} tbl[] = {
 		{ "100", 100, 256 }, // set base = 10 if base = 0
 		{ "0x100", 256, 256 },
+		{ "0b100", 4, 0xb100 }, // 0b100 = 0xb100 if std::hex
 	};
 	Fp::setIoMode(0);
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
@@ -133,10 +134,6 @@ void streamTest()
 			CYBOZU_TEST_EQUAL(x, tbl[i].out16);
 		}
 	}
-	// use prefix if base conflicts with prefix
-	std::istringstream is("0b100");
-	Fp x;
-	CYBOZU_TEST_EXCEPTION(is >> std::hex >> x, cybozu::Exception);
 	{
 		std::ostringstream os;
 		os << Fp(123);

From e50de173a320133f36eaa78bf1e3b8e73af73e90 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 4 Jul 2019 07:04:38 +0900
Subject: [PATCH 007/553] [java] add serialize/deserialize

---
 ffi/java/MclTest.java               |  31 ++++-
 ffi/java/com/herumi/mcl/Fp.java     |   2 +
 ffi/java/com/herumi/mcl/Fr.java     |   2 +
 ffi/java/com/herumi/mcl/G1.java     |   2 +
 ffi/java/com/herumi/mcl/G2.java     |   2 +
 ffi/java/com/herumi/mcl/GT.java     |   6 +
 ffi/java/com/herumi/mcl/MclJNI.java |   6 +
 ffi/java/mcl.i                      |  15 +--
 ffi/java/mcl_impl.hpp               |  57 ++++++++--
 ffi/java/mcl_wrap.cxx               | 168 ++++++++++++++++++++++++++++
 ffi/java/run-mcl.bat                |   8 +-
 11 files changed, 272 insertions(+), 27 deletions(-)

diff --git a/ffi/java/MclTest.java b/ffi/java/MclTest.java
index d8ed0fb5..ba8b4c8a 100644
--- a/ffi/java/MclTest.java
+++ b/ffi/java/MclTest.java
@@ -11,11 +11,13 @@ public class MclTest {
 		System.out.println("libName : " + libName);
 		System.loadLibrary(lib);
 	}
+    public static int errN = 0;
 	public static void assertEquals(String msg, String x, String y) {
 		if (x.equals(y)) {
 			System.out.println("OK : " + msg);
 		} else {
 			System.out.println("NG : " + msg + ", x = " + x + ", y = " + y);
+            errN++;
 		}
 	}
 	public static void assertBool(String msg, boolean b) {
@@ -23,6 +25,7 @@ public static void assertBool(String msg, boolean b) {
 			System.out.println("OK : " + msg);
 		} else {
 			System.out.println("NG : " + msg);
+            errN++;
 		}
 	}
 	public static void testCurve(int curveType, String name) {
@@ -43,14 +46,12 @@ public static void testCurve(int curveType, String name) {
 			assertEquals("x == 18", (new Fr("12", 16)).toString(), "18");
 			assertEquals("x == ff", (new Fr("255")).toString(16), "ff");
 
-/*
 			{
 				byte[] b = x.serialize();
 				Fr t = new Fr();
 				t.deserialize(b);
-				assertEquals("serialize", x, t);
+				assertBool("serialize", x.equals(t));
 			}
-*/
 			G1 P = new G1();
 			System.out.println("P=" + P);
 			Mcl.hashAndMapToG1(P, "test".getBytes());
@@ -60,6 +61,12 @@ public static void testCurve(int curveType, String name) {
 			System.out.println("P=" + P);
 			Mcl.neg(P, P);
 			System.out.println("P=" + P);
+			{
+				byte[] b = P.serialize();
+				G1 t = new G1();
+				t.deserialize(b);
+				assertBool("serialize", P.equals(t));
+			}
 
 			G2 Q = new G2();
 			Mcl.hashAndMapToG2(Q, "abc".getBytes());
@@ -72,6 +79,12 @@ public static void testCurve(int curveType, String name) {
 				P1.setStr(s);
 				assertBool("P == P1", P1.equals(P));
 			}
+			{
+				byte[] b = Q.serialize();
+				G2 t = new G2();
+				t.deserialize(b);
+				assertBool("serialize", Q.equals(t));
+			}
 
 			GT e = new GT();
 			Mcl.pairing(e, P, Q);
@@ -84,13 +97,23 @@ public static void testCurve(int curveType, String name) {
 			Mcl.pairing(e1, P, cQ);
 			Mcl.pow(e2, e, c); // e2 = e^c
 			assertBool("e1 == e2", e1.equals(e2));
-
+			{
+				byte[] b = e1.serialize();
+				GT t = new GT();
+				t.deserialize(b);
+				assertBool("serialize", e1.equals(t));
+			}
 			G1 cP = new G1(P);
 			Mcl.mul(cP, P, c); // cP = P * c
 			Mcl.pairing(e1, cP, Q);
 			assertBool("e1 == e2", e1.equals(e2));
 
 			BLSsignature(Q);
+            if (errN == 0) {
+                System.out.println("all test passed");
+            } else {
+                System.out.println("ERR=" + errN);
+            }
 		} catch (RuntimeException e) {
 			System.out.println("unknown exception :" + e);
 		}
diff --git a/ffi/java/com/herumi/mcl/Fp.java b/ffi/java/com/herumi/mcl/Fp.java
index 93b97328..aa4d1e8b 100644
--- a/ffi/java/com/herumi/mcl/Fp.java
+++ b/ffi/java/com/herumi/mcl/Fp.java
@@ -91,4 +91,6 @@ public void deserialize(byte[] cbuf) {
     MclJNI.Fp_deserialize(swigCPtr, this, cbuf);
   }
 
+  public byte[] serialize() { return MclJNI.Fp_serialize(swigCPtr, this); }
+
 }
diff --git a/ffi/java/com/herumi/mcl/Fr.java b/ffi/java/com/herumi/mcl/Fr.java
index a06a97a3..2127f9d0 100644
--- a/ffi/java/com/herumi/mcl/Fr.java
+++ b/ffi/java/com/herumi/mcl/Fr.java
@@ -91,4 +91,6 @@ public void deserialize(byte[] cbuf) {
     MclJNI.Fr_deserialize(swigCPtr, this, cbuf);
   }
 
+  public byte[] serialize() { return MclJNI.Fr_serialize(swigCPtr, this); }
+
 }
diff --git a/ffi/java/com/herumi/mcl/G1.java b/ffi/java/com/herumi/mcl/G1.java
index 146d904e..ec640b6c 100644
--- a/ffi/java/com/herumi/mcl/G1.java
+++ b/ffi/java/com/herumi/mcl/G1.java
@@ -79,4 +79,6 @@ public void deserialize(byte[] cbuf) {
     MclJNI.G1_deserialize(swigCPtr, this, cbuf);
   }
 
+  public byte[] serialize() { return MclJNI.G1_serialize(swigCPtr, this); }
+
 }
diff --git a/ffi/java/com/herumi/mcl/G2.java b/ffi/java/com/herumi/mcl/G2.java
index 5aded6df..2480dd35 100644
--- a/ffi/java/com/herumi/mcl/G2.java
+++ b/ffi/java/com/herumi/mcl/G2.java
@@ -79,4 +79,6 @@ public void deserialize(byte[] cbuf) {
     MclJNI.G2_deserialize(swigCPtr, this, cbuf);
   }
 
+  public byte[] serialize() { return MclJNI.G2_serialize(swigCPtr, this); }
+
 }
diff --git a/ffi/java/com/herumi/mcl/GT.java b/ffi/java/com/herumi/mcl/GT.java
index 187af707..fdc590d5 100644
--- a/ffi/java/com/herumi/mcl/GT.java
+++ b/ffi/java/com/herumi/mcl/GT.java
@@ -67,4 +67,10 @@ public String toString() {
     return MclJNI.GT_toString__SWIG_1(swigCPtr, this);
   }
 
+  public void deserialize(byte[] cbuf) {
+    MclJNI.GT_deserialize(swigCPtr, this, cbuf);
+  }
+
+  public byte[] serialize() { return MclJNI.GT_serialize(swigCPtr, this); }
+
 }
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
index 7181f50a..9e7c0086 100644
--- a/ffi/java/com/herumi/mcl/MclJNI.java
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -32,6 +32,7 @@ public class MclJNI {
   public final static native String Fr_toString__SWIG_0(long jarg1, Fr jarg1_, int jarg2);
   public final static native String Fr_toString__SWIG_1(long jarg1, Fr jarg1_);
   public final static native void Fr_deserialize(long jarg1, Fr jarg1_, byte[] jarg2);
+  public final static native byte[] Fr_serialize(long jarg1, Fr jarg1_);
   public final static native void delete_Fr(long jarg1);
   public final static native void neg__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
   public final static native void add__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
@@ -55,6 +56,7 @@ public class MclJNI {
   public final static native String Fp_toString__SWIG_0(long jarg1, Fp jarg1_, int jarg2);
   public final static native String Fp_toString__SWIG_1(long jarg1, Fp jarg1_);
   public final static native void Fp_deserialize(long jarg1, Fp jarg1_, byte[] jarg2);
+  public final static native byte[] Fp_serialize(long jarg1, Fp jarg1_);
   public final static native void delete_Fp(long jarg1);
   public final static native void neg__SWIG_2(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
   public final static native void dbl__SWIG_0(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
@@ -73,6 +75,7 @@ public class MclJNI {
   public final static native String G1_toString__SWIG_0(long jarg1, G1 jarg1_, int jarg2);
   public final static native String G1_toString__SWIG_1(long jarg1, G1 jarg1_);
   public final static native void G1_deserialize(long jarg1, G1 jarg1_, byte[] jarg2);
+  public final static native byte[] G1_serialize(long jarg1, G1 jarg1_);
   public final static native void delete_G1(long jarg1);
   public final static native void neg__SWIG_3(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
   public final static native void dbl__SWIG_1(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
@@ -90,6 +93,7 @@ public class MclJNI {
   public final static native String G2_toString__SWIG_0(long jarg1, G2 jarg1_, int jarg2);
   public final static native String G2_toString__SWIG_1(long jarg1, G2 jarg1_);
   public final static native void G2_deserialize(long jarg1, G2 jarg1_, byte[] jarg2);
+  public final static native byte[] G2_serialize(long jarg1, G2 jarg1_);
   public final static native void delete_G2(long jarg1);
   public final static native void mul__SWIG_6(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, GT jarg3_);
   public final static native long new_GT__SWIG_0();
@@ -100,5 +104,7 @@ public class MclJNI {
   public final static native void GT_setStr__SWIG_1(long jarg1, GT jarg1_, String jarg2);
   public final static native String GT_toString__SWIG_0(long jarg1, GT jarg1_, int jarg2);
   public final static native String GT_toString__SWIG_1(long jarg1, GT jarg1_);
+  public final static native void GT_deserialize(long jarg1, GT jarg1_, byte[] jarg2);
+  public final static native byte[] GT_serialize(long jarg1, GT jarg1_);
   public final static native void delete_GT(long jarg1);
 }
diff --git a/ffi/java/mcl.i b/ffi/java/mcl.i
index 6649ca7a..1b1c1cd5 100644
--- a/ffi/java/mcl.i
+++ b/ffi/java/mcl.i
@@ -11,18 +11,19 @@
 
 %}
 
-%include "mcl_impl.hpp"
-
-%javaconst(1);
-#define BN254 0
-#define BLS12_381 5
-
 %typemap(jtype) void serialize "byte[]"
 %typemap(jstype) void serialize "byte[]"
 %typemap(jni) void serialize "jbyteArray"
 %typemap(javaout) void serialize { return $jnicall; }
-%typemap(in, numinputs=0) std::string& out (std::string temp) "$1=&temp;"
+%typemap(in, numinputs=0) std::string& out (std::string buf) "$1=&buf;"
 %typemap(argout) std::string& out {
   $result = JCALL1(NewByteArray, jenv, $1->size());
   JCALL4(SetByteArrayRegion, jenv, $result, 0, $1->size(), (const jbyte*)$1->c_str());
 }
+
+%include "mcl_impl.hpp"
+
+%javaconst(1);
+#define BN254 0
+#define BLS12_381 5
+
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index 8edb6568..76f4ff9a 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -15,6 +15,23 @@ void SystemInit(int curveType) throw(std::exception)
 	mcl::bn::initPairing(cp);
 }
 
+template<class T>
+void deserializeT(T& x, const char *cbuf, size_t bufSize)
+{
+	if (x.deserialize(cbuf, bufSize) == 0) {
+		throw std::runtime_error("deserialize");
+	}
+}
+
+template<class T>
+void serializeT(std::string& out, const T& x)
+{
+	out.resize(48 * 12);
+	size_t n = x.serialize(&out[0], out.size());
+	if (n == 0) throw std::runtime_error("serializeT");
+	out.resize(n);
+}
+
 class G1;
 class G2;
 class GT;
@@ -63,9 +80,11 @@ class Fr {
 	}
 	void deserialize(const char *cbuf, size_t bufSize) throw(std::exception)
 	{
-		if (self_.deserialize(cbuf, bufSize) == 0) {
-			throw std::runtime_error("deserialize");
-		}
+		deserializeT(self_, cbuf, bufSize);
+	}
+	void serialize(std::string& out) const throw(std::exception)
+	{
+		serializeT(out, self_);
 	}
 };
 
@@ -136,9 +155,11 @@ class Fp {
 	}
 	void deserialize(const char *cbuf, size_t bufSize) throw(std::exception)
 	{
-		if (self_.deserialize(cbuf, bufSize) == 0) {
-			throw std::runtime_error("deserialize");
-		}
+		deserializeT(self_, cbuf, bufSize);
+	}
+	void serialize(std::string& out) const throw(std::exception)
+	{
+		serializeT(out, self_);
 	}
 };
 
@@ -207,9 +228,11 @@ class G1 {
 	}
 	void deserialize(const char *cbuf, size_t bufSize) throw(std::exception)
 	{
-		if (self_.deserialize(cbuf, bufSize) == 0) {
-			throw std::runtime_error("deserialize");
-		}
+		deserializeT(self_, cbuf, bufSize);
+	}
+	void serialize(std::string& out) const throw(std::exception)
+	{
+		serializeT(out, self_);
 	}
 };
 
@@ -275,9 +298,11 @@ class G2 {
 	}
 	void deserialize(const char *cbuf, size_t bufSize) throw(std::exception)
 	{
-		if (self_.deserialize(cbuf, bufSize) == 0) {
-			throw std::runtime_error("deserialize");
-		}
+		deserializeT(self_, cbuf, bufSize);
+	}
+	void serialize(std::string& out) const throw(std::exception)
+	{
+		serializeT(out, self_);
 	}
 };
 
@@ -326,6 +351,14 @@ class GT {
 	{
 		return self_.getStr(base);
 	}
+	void deserialize(const char *cbuf, size_t bufSize) throw(std::exception)
+	{
+		deserializeT(self_, cbuf, bufSize);
+	}
+	void serialize(std::string& out) const throw(std::exception)
+	{
+		serializeT(out, self_);
+	}
 };
 
 void mul(GT& z, const GT& x, const GT& y)
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index f9a36a75..4999bca1 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -788,6 +788,33 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1deserialize(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fr_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jbyteArray jresult = 0 ;
+  Fr *arg1 = (Fr *) 0 ;
+  std::string *arg2 = 0 ;
+  std::string buf2 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg2=&buf2;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  try {
+    ((Fr const *)arg1)->serialize(*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  {
+    jresult = jenv->NewByteArray(arg2->size());
+    jenv->SetByteArrayRegion(jresult, 0, arg2->size(), (const jbyte*)arg2->c_str());
+  }
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1Fr(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   Fr *arg1 = (Fr *) 0 ;
   
@@ -1322,6 +1349,33 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1deserialize(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fp_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jbyteArray jresult = 0 ;
+  Fp *arg1 = (Fp *) 0 ;
+  std::string *arg2 = 0 ;
+  std::string buf2 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg2=&buf2;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  try {
+    ((Fp const *)arg1)->serialize(*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  {
+    jresult = jenv->NewByteArray(arg2->size());
+    jenv->SetByteArrayRegion(jresult, 0, arg2->size(), (const jbyte*)arg2->c_str());
+  }
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1Fp(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   Fp *arg1 = (Fp *) 0 ;
   
@@ -1766,6 +1820,33 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1deserialize(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_G1_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jbyteArray jresult = 0 ;
+  G1 *arg1 = (G1 *) 0 ;
+  std::string *arg2 = 0 ;
+  std::string buf2 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg2=&buf2;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1; 
+  try {
+    ((G1 const *)arg1)->serialize(*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  {
+    jresult = jenv->NewByteArray(arg2->size());
+    jenv->SetByteArrayRegion(jresult, 0, arg2->size(), (const jbyte*)arg2->c_str());
+  }
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G1(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   G1 *arg1 = (G1 *) 0 ;
   
@@ -2209,6 +2290,33 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1deserialize(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_G2_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jbyteArray jresult = 0 ;
+  G2 *arg1 = (G2 *) 0 ;
+  std::string *arg2 = 0 ;
+  std::string buf2 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg2=&buf2;
+  (void)jarg1_;
+  arg1 = *(G2 **)&jarg1; 
+  try {
+    ((G2 const *)arg1)->serialize(*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  {
+    jresult = jenv->NewByteArray(arg2->size());
+    jenv->SetByteArrayRegion(jresult, 0, arg2->size(), (const jbyte*)arg2->c_str());
+  }
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G2(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   G2 *arg1 = (G2 *) 0 ;
   
@@ -2416,6 +2524,66 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_11(JN
 }
 
 
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+  GT *arg1 = (GT *) 0 ;
+  char *arg2 = (char *) 0 ;
+  size_t arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(GT **)&jarg1; 
+  {
+    if (jarg2) {
+      arg2 = (char *) jenv->GetByteArrayElements(jarg2, 0);
+      arg3 = (size_t) jenv->GetArrayLength(jarg2);
+    } else {
+      arg2 = 0;
+      arg3 = 0;
+    }
+  }
+  try {
+    (arg1)->deserialize((char const *)arg2,arg3);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  
+  {
+    if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
+  }
+  
+}
+
+
+SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_GT_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jbyteArray jresult = 0 ;
+  GT *arg1 = (GT *) 0 ;
+  std::string *arg2 = 0 ;
+  std::string buf2 ;
+  
+  (void)jenv;
+  (void)jcls;
+  arg2=&buf2;
+  (void)jarg1_;
+  arg1 = *(GT **)&jarg1; 
+  try {
+    ((GT const *)arg1)->serialize(*arg2);
+  }
+  catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return 0;
+  }
+  
+  {
+    jresult = jenv->NewByteArray(arg2->size());
+    jenv->SetByteArrayRegion(jresult, 0, arg2->size(), (const jbyte*)arg2->c_str());
+  }
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1GT(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   GT *arg1 = (GT *) 0 ;
   
diff --git a/ffi/java/run-mcl.bat b/ffi/java/run-mcl.bat
index 903876ec..f3c39cae 100644
--- a/ffi/java/run-mcl.bat
+++ b/ffi/java/run-mcl.bat
@@ -1,9 +1,9 @@
 @echo off
-echo [[compile Bn256Test.java]]
-%JAVA_DIR%\bin\javac Bn256Test.java
+echo [[compile MclTest.java]]
+%JAVA_DIR%\bin\javac MclTest.java
 
-echo [[run Bn256Test]]
+echo [[run MclTest]]
 set TOP_DIR=..\..
 pushd %TOP_DIR%\bin
-%JAVA_DIR%\bin\java -classpath ../ffi/java Bn256Test %1 %2 %3 %4 %5 %6
+%JAVA_DIR%\bin\java -classpath ../ffi/java MclTest %1 %2 %3 %4 %5 %6
 popd

From db79a61ed095cb8d9748964e678ba88fc5f54452 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 4 Jul 2019 09:21:02 +0900
Subject: [PATCH 008/553] [java] fix path for mac

---
 ffi/java/Makefile                   |  4 +-
 ffi/java/com/herumi/mcl/Mcl.java    | 16 +----
 ffi/java/com/herumi/mcl/MclJNI.java |  7 +--
 ffi/java/mcl_impl.hpp               |  3 -
 ffi/java/mcl_wrap.cxx               | 91 +----------------------------
 5 files changed, 8 insertions(+), 113 deletions(-)

diff --git a/ffi/java/Makefile b/ffi/java/Makefile
index 9d2c656f..98dbe63e 100644
--- a/ffi/java/Makefile
+++ b/ffi/java/Makefile
@@ -1,9 +1,9 @@
 TOP_DIR=../..
 include $(TOP_DIR)/common.mk
 ifeq ($(UNAME_S),Darwin)
-  JAVA_INC=-I/System/Library/Frameworks/JavaVM.framework/Versions/Current/Headers/
+  JAVA_INC?=-I/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/JavaVM.framework/Headers/
 else
-  JAVA_INC=-I/usr/lib/jvm/default-java/include
+  JAVA_INC?=-I/usr/lib/jvm/default-java/include
 #JAVA_INC=-I/usr/lib/jvm/java-7-openjdk-amd64/include
   CFLAGS+=-z noexecstack
   LDFLAGS+=-lrt
diff --git a/ffi/java/com/herumi/mcl/Mcl.java b/ffi/java/com/herumi/mcl/Mcl.java
index c33e0465..0088f1c6 100644
--- a/ffi/java/com/herumi/mcl/Mcl.java
+++ b/ffi/java/com/herumi/mcl/Mcl.java
@@ -42,7 +42,7 @@ public static void div(Fr z, Fr x, Fr y) {
   }
 
   public static void pow(GT z, GT x, Fr y) {
-    MclJNI.pow__SWIG_0(GT.getCPtr(z), z, GT.getCPtr(x), x, Fr.getCPtr(y), y);
+    MclJNI.pow(GT.getCPtr(z), z, GT.getCPtr(x), x, Fr.getCPtr(y), y);
   }
 
   public static void neg(Fp y, Fp x) {
@@ -61,22 +61,10 @@ public static void mul(Fp z, Fp x, Fp y) {
     MclJNI.mul__SWIG_3(Fp.getCPtr(z), z, Fp.getCPtr(x), x, Fp.getCPtr(y), y);
   }
 
-  public static void mul(G1 z, G1 x, Fp y) {
-    MclJNI.mul__SWIG_4(G1.getCPtr(z), z, G1.getCPtr(x), x, Fp.getCPtr(y), y);
-  }
-
-  public static void mul(G2 z, G2 x, Fp y) {
-    MclJNI.mul__SWIG_5(G2.getCPtr(z), z, G2.getCPtr(x), x, Fp.getCPtr(y), y);
-  }
-
   public static void div(Fp z, Fp x, Fp y) {
     MclJNI.div__SWIG_1(Fp.getCPtr(z), z, Fp.getCPtr(x), x, Fp.getCPtr(y), y);
   }
 
-  public static void pow(GT z, GT x, Fp y) {
-    MclJNI.pow__SWIG_1(GT.getCPtr(z), z, GT.getCPtr(x), x, Fp.getCPtr(y), y);
-  }
-
   public static void neg(G1 y, G1 x) {
     MclJNI.neg__SWIG_2(G1.getCPtr(y), y, G1.getCPtr(x), x);
   }
@@ -122,7 +110,7 @@ public static void hashAndMapToG2(G2 P, byte[] cbuf) {
   }
 
   public static void mul(GT z, GT x, GT y) {
-    MclJNI.mul__SWIG_6(GT.getCPtr(z), z, GT.getCPtr(x), x, GT.getCPtr(y), y);
+    MclJNI.mul__SWIG_4(GT.getCPtr(z), z, GT.getCPtr(x), x, GT.getCPtr(y), y);
   }
 
 }
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
index 9e7c0086..515cba0f 100644
--- a/ffi/java/com/herumi/mcl/MclJNI.java
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -17,7 +17,7 @@ public class MclJNI {
   public final static native void mul__SWIG_1(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, Fr jarg3_);
   public final static native void mul__SWIG_2(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, Fr jarg3_);
   public final static native void div__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
-  public final static native void pow__SWIG_0(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, Fr jarg3_);
+  public final static native void pow(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, Fr jarg3_);
   public final static native long new_Fr__SWIG_0();
   public final static native long new_Fr__SWIG_1(long jarg1, Fr jarg1_);
   public final static native long new_Fr__SWIG_2(int jarg1);
@@ -38,10 +38,7 @@ public class MclJNI {
   public final static native void add__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
   public final static native void sub__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
   public final static native void mul__SWIG_3(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
-  public final static native void mul__SWIG_4(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_, long jarg3, Fp jarg3_);
-  public final static native void mul__SWIG_5(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_, long jarg3, Fp jarg3_);
   public final static native void div__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
-  public final static native void pow__SWIG_1(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, Fp jarg3_);
   public final static native long new_Fp__SWIG_0();
   public final static native long new_Fp__SWIG_1(long jarg1, Fp jarg1_);
   public final static native long new_Fp__SWIG_2(int jarg1);
@@ -95,7 +92,7 @@ public class MclJNI {
   public final static native void G2_deserialize(long jarg1, G2 jarg1_, byte[] jarg2);
   public final static native byte[] G2_serialize(long jarg1, G2 jarg1_);
   public final static native void delete_G2(long jarg1);
-  public final static native void mul__SWIG_6(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, GT jarg3_);
+  public final static native void mul__SWIG_4(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, GT jarg3_);
   public final static native long new_GT__SWIG_0();
   public final static native long new_GT__SWIG_1(long jarg1, GT jarg1_);
   public final static native boolean GT_equals(long jarg1, GT jarg1_, long jarg2, GT jarg2_);
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index 76f4ff9a..ed9fb07a 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -122,10 +122,7 @@ class Fp {
 	friend void add(Fp& z, const Fp& x, const Fp& y);
 	friend void sub(Fp& z, const Fp& x, const Fp& y);
 	friend void mul(Fp& z, const Fp& x, const Fp& y);
-	friend void mul(G1& z, const G1& x, const Fp& y);
-	friend void mul(G2& z, const G2& x, const Fp& y);
 	friend void div(Fp& z, const Fp& x, const Fp& y);
-	friend void pow(GT& z, const GT& x, const Fp& y);
 public:
 	Fp() {}
 	Fp(const Fp& rhs) : self_(rhs.self_) {}
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index 4999bca1..c4ea085e 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -460,7 +460,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_10(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pow_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pow(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   GT *arg1 = 0 ;
   GT *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -934,64 +934,6 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_13(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  G1 *arg1 = 0 ;
-  G1 *arg2 = 0 ;
-  Fp *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(G1 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 & reference is null");
-    return ;
-  } 
-  arg2 = *(G1 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G1 const & reference is null");
-    return ;
-  } 
-  arg3 = *(Fp **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
-    return ;
-  } 
-  mul(*arg1,(G1 const &)*arg2,(Fp const &)*arg3);
-}
-
-
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_15(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  G2 *arg1 = 0 ;
-  G2 *arg2 = 0 ;
-  Fp *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(G2 **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 & reference is null");
-    return ;
-  } 
-  arg2 = *(G2 **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "G2 const & reference is null");
-    return ;
-  } 
-  arg3 = *(Fp **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
-    return ;
-  } 
-  mul(*arg1,(G2 const &)*arg2,(Fp const &)*arg3);
-}
-
-
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
@@ -1021,35 +963,6 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_11(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pow_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
-  GT *arg1 = 0 ;
-  GT *arg2 = 0 ;
-  Fp *arg3 = 0 ;
-  
-  (void)jenv;
-  (void)jcls;
-  (void)jarg1_;
-  (void)jarg2_;
-  (void)jarg3_;
-  arg1 = *(GT **)&jarg1;
-  if (!arg1) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT & reference is null");
-    return ;
-  } 
-  arg2 = *(GT **)&jarg2;
-  if (!arg2) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT const & reference is null");
-    return ;
-  } 
-  arg3 = *(Fp **)&jarg3;
-  if (!arg3) {
-    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
-    return ;
-  } 
-  pow(*arg1,(GT const &)*arg2,(Fp const &)*arg3);
-}
-
-
 SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   Fp *result = 0 ;
@@ -2327,7 +2240,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G2(JNIEnv *jenv, jcla
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_16(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   GT *arg1 = 0 ;
   GT *arg2 = 0 ;
   GT *arg3 = 0 ;

From d403ab84c63ad8f8dc8aa5d77712aaa6153bb025 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 4 Jul 2019 09:31:32 +0900
Subject: [PATCH 009/553] [java] support windows

---
 ffi/java/make_wrap.bat | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ffi/java/make_wrap.bat b/ffi/java/make_wrap.bat
index b7008bc0..3e6ee700 100644
--- a/ffi/java/make_wrap.bat
+++ b/ffi/java/make_wrap.bat
@@ -5,9 +5,11 @@ set SWIG=..\..\..\..\p\swig\swig.exe
 set PACKAGE_NAME=com.herumi.mcl
 set PACKAGE_DIR=%PACKAGE_NAME:.=\%
 if /i "%1"=="" (
-	set NAME=elgamal
+	set NAME=mcl
+	set LIBNAME=
 ) else (
 	set NAME=%1
+	set LIBNAME=%NAME%
 )
 
 echo [[run swig]]
@@ -15,7 +17,7 @@ mkdir %PACKAGE_DIR%
 set TOP_DIR=../..
 %SWIG% -java -package %PACKAGE_NAME% -outdir %PACKAGE_DIR% -c++ -Wall %NAME%.i
 echo [[make dll]]
-cl /MT /DNOMINMAX /LD /Ox /DNDEBUG /EHsc %NAME%_wrap.cxx %TOP_DIR%/src/fp.cpp -DMCL_NO_AUTOLINK -I%JAVA_INCLUDE% -I%JAVA_INCLUDE%\win32 -I%TOP_DIR%/include -I%TOP_DIR%/../cybozulib/include -I%TOP_DIR%/../cybozulib_ext/include -I%TOP_DIR%/../xbyak /link /LIBPATH:%TOP_DIR%/../cybozulib_ext/lib /OUT:%TOP_DIR%/bin/mcl_%NAME%.dll
+cl /MT /DNOMINMAX /LD /Ox /DNDEBUG /EHsc %NAME%_wrap.cxx %TOP_DIR%/src/fp.cpp -DMCL_NO_AUTOLINK -I%JAVA_INCLUDE% -I%JAVA_INCLUDE%\win32 -I%TOP_DIR%/include -I%TOP_DIR%/../cybozulib/include -I%TOP_DIR%/../cybozulib_ext/include -I%TOP_DIR%/../xbyak /link /LIBPATH:%TOP_DIR%/../cybozulib_ext/lib /OUT:%TOP_DIR%/bin/mcl%LIBNAME%java.dll
 
 call run-%NAME%.bat
 

From dd19f1c3506b836175b786792e9a38332738786a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 4 Jul 2019 14:19:32 +0900
Subject: [PATCH 010/553] [java] fix indent

---
 ffi/java/MclTest.java | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/ffi/java/MclTest.java b/ffi/java/MclTest.java
index ba8b4c8a..c47966dd 100644
--- a/ffi/java/MclTest.java
+++ b/ffi/java/MclTest.java
@@ -11,13 +11,13 @@ public class MclTest {
 		System.out.println("libName : " + libName);
 		System.loadLibrary(lib);
 	}
-    public static int errN = 0;
+	public static int errN = 0;
 	public static void assertEquals(String msg, String x, String y) {
 		if (x.equals(y)) {
 			System.out.println("OK : " + msg);
 		} else {
 			System.out.println("NG : " + msg + ", x = " + x + ", y = " + y);
-            errN++;
+			errN++;
 		}
 	}
 	public static void assertBool(String msg, boolean b) {
@@ -25,7 +25,7 @@ public static void assertBool(String msg, boolean b) {
 			System.out.println("OK : " + msg);
 		} else {
 			System.out.println("NG : " + msg);
-            errN++;
+			errN++;
 		}
 	}
 	public static void testCurve(int curveType, String name) {
@@ -109,11 +109,11 @@ public static void testCurve(int curveType, String name) {
 			assertBool("e1 == e2", e1.equals(e2));
 
 			BLSsignature(Q);
-            if (errN == 0) {
-                System.out.println("all test passed");
-            } else {
-                System.out.println("ERR=" + errN);
-            }
+			if (errN == 0) {
+				System.out.println("all test passed");
+			} else {
+				System.out.println("ERR=" + errN);
+			}
 		} catch (RuntimeException e) {
 			System.out.println("unknown exception :" + e);
 		}

From 4797680018534e497cde399ac0df31274b155b3e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 4 Jul 2019 14:35:39 +0900
Subject: [PATCH 011/553] [she] fix compile error of she-wasm

---
 src/she_c_impl.hpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/she_c_impl.hpp b/src/she_c_impl.hpp
index 8cfa6d0c..8dd0a54f 100644
--- a/src/she_c_impl.hpp
+++ b/src/she_c_impl.hpp
@@ -167,7 +167,7 @@ void sheGetPublicKey(shePublicKey *pub, const sheSecretKey *sec)
 	cast(sec)->getPublicKey(*cast(pub));
 }
 
-static int setRangeForDLP(void (*f)(mclSize), mclSize hashSize)
+static int wrapSetRangeForDLP(void f(size_t), mclSize hashSize)
 	try
 {
 	f(hashSize);
@@ -178,19 +178,19 @@ static int setRangeForDLP(void (*f)(mclSize), mclSize hashSize)
 
 int sheSetRangeForDLP(mclSize hashSize)
 {
-	return setRangeForDLP(SHE::setRangeForDLP, hashSize);
+	return wrapSetRangeForDLP(SHE::setRangeForDLP, hashSize);
 }
-int sheSetRangeForG1DLP(mclSize hashSize)
+int sheSetRangeForG1DLPnoexcept(mclSize hashSize)
 {
-	return setRangeForDLP(SHE::setRangeForG1DLP, hashSize);
+	return wrapSetRangeForDLP(SHE::setRangeForG1DLP, hashSize);
 }
 int sheSetRangeForG2DLP(mclSize hashSize)
 {
-	return setRangeForDLP(SHE::setRangeForG2DLP, hashSize);
+	return wrapSetRangeForDLP(SHE::setRangeForG2DLP, hashSize);
 }
 int sheSetRangeForGTDLP(mclSize hashSize)
 {
-	return setRangeForDLP(SHE::setRangeForGTDLP, hashSize);
+	return wrapSetRangeForDLP(SHE::setRangeForGTDLP, hashSize);
 }
 
 void sheSetTryNum(mclSize tryNum)

From 4a5202567ce8703c21d6c2e70b44ef71b5ff7412 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Jul 2019 08:29:31 +0900
Subject: [PATCH 012/553] [java] update documents

---
 ffi/java/java.md | 72 +++++++++++++++++++++---------------------------
 readme.md        |  2 +-
 2 files changed, 33 insertions(+), 41 deletions(-)

diff --git a/ffi/java/java.md b/ffi/java/java.md
index b0132ca8..0e6d3cb2 100644
--- a/ffi/java/java.md
+++ b/ffi/java/java.md
@@ -1,13 +1,14 @@
 # JNI for mcl (experimental)
 This library provides functionality to compute the optimal ate pairing
-over Barreto-Naehrig (BN) curves.
+over Barreto-Naehrig (BN) or BLS12-381 curves.
 
 # Initialization
-Load the library `mcl_bn256`.
+Load the library `mcljava`.
 ```
 import com.herumi.mcl.*;
 
-System.loadLibrary("mcl_bn256");
+System.loadLibrary(System.mapLibraryName("mcljava"));
+Mcl.SystemInit(curveType); // curveType = Mcl.BN254 or Mcl.BLS12_381
 ```
 
 # Classes
@@ -22,74 +23,65 @@ System.loadLibrary("mcl_bn256");
 * `Fr::setInt(int x)` ; set by x
 * `Fr::setStr(String str)` ; set by str such as "123", "0xfff", etc.
 * `Fr::setByCSPRNG()` ; randomly set
-* `Bn256.neg(Fr y, Fr x)` ; `y = -x`
-* `Bn256.add(Fr z, Fr x, Fr y)` ; `z = x + y`
-* `Bn256.sub(Fr z, Fr x, Fr y)` ; `z = x - y`
-* `Bn256.mul(Fr z, Fr x, Fr y)` ; `z = x * y`
-* `Bn256.div(Fr z, Fr x, Fr y)` ; `z = x / y`
+* `Mcl.neg(Fr y, Fr x)` ; `y = -x`
+* `Mcl.add(Fr z, Fr x, Fr y)` ; `z = x + y`
+* `Mcl.sub(Fr z, Fr x, Fr y)` ; `z = x - y`
+* `Mcl.mul(Fr z, Fr x, Fr y)` ; `z = x * y`
+* `Mcl.div(Fr z, Fr x, Fr y)` ; `z = x / y`
 
 ## G1
 
-* `G1::set(String x, String y)` ; set by (x, y)
-* `G1::hashAndMapToG1(String m)` ; take SHA-256 of m and map it to an element of G1
-* `G1::setStr(String str)` ; set by the result of `toString()` method
-* `Bn256.neg(G1 y, G1 x)` ; `y = -x`
-* `Bn256.dbl(G1 y, G1 x)` ; `y = 2x`
-* `Bn256.add(G1 z, G1 x, G1 y)` ; `z = x + y`
-* `Bn256.sub(G1 z, G1 x, G1 y)` ; `z = x - y`
-* `Bn256.mul(G1 z, G1 x, Fr y)` ; `z = x * y`
+* `Mcl.neg(G1 y, G1 x)` ; `y = -x`
+* `Mcl.dbl(G1 y, G1 x)` ; `y = 2x`
+* `Mcl.add(G1 z, G1 x, G1 y)` ; `z = x + y`
+* `Mcl.sub(G1 z, G1 x, G1 y)` ; `z = x - y`
+* `Mcl.mul(G1 z, G1 x, Fr y)` ; `z = x * y`
 
 ## G2
 
-* `G2::set(String xa, String xb, String ya, String yb)` ; set by ((xa, xb), (ya, yb))
-* `G2::setStr(String str)` ; set by the result of `toString()` method
-* `Bn256.neg(G2 y, G2 x)` ; `y = -x`
-* `Bn256.dbl(G2 y, G2 x)` ; `y = 2x`
-* `Bn256.add(G2 z, G2 x, G2 y)` ; `z = x + y`
-* `Bn256.sub(G2 z, G2 x, G2 y)` ; `z = x - y`
-* `Bn256.mul(G2 z, G2 x, Fr y)` ; `z = x * y`
+* `Mcl.neg(G2 y, G2 x)` ; `y = -x`
+* `Mcl.dbl(G2 y, G2 x)` ; `y = 2x`
+* `Mcl.add(G2 z, G2 x, G2 y)` ; `z = x + y`
+* `Mcl.sub(G2 z, G2 x, G2 y)` ; `z = x - y`
+* `Mcl.mul(G2 z, G2 x, Fr y)` ; `z = x * y`
 
 ## GT
 
 * `GT::setStr(String str)` ; set by the result of `toString()` method
-* `Bn256.mul(GT z, GT x, GT y)` ; `z = x * y`
-* `Bn256.pow(GT z, GT x, Fr y)` ; `z = x ^ y`
+* `Mcl.mul(GT z, GT x, GT y)` ; `z = x * y`
+* `Mcl.pow(GT z, GT x, Fr y)` ; `z = x ^ y`
 
 ## pairing
-* `Bn256.pairing(GT e, G1 P, G2 Q)` ; e = e(P, Q)
+* `Mcl.pairing(GT e, G1 P, G2 Q)` ; e = e(P, Q)
 
 # BLS signature sample
 ```
-String xa = "12723517038133731887338407189719511622662176727675373276651903807414909099441";
-String xb = "4168783608814932154536427934509895782246573715297911553964171371032945126671";
-String ya = "13891744915211034074451795021214165905772212241412891944830863846330766296736";
-String yb = "7937318970632701341203597196594272556916396164729705624521405069090520231616";
-
-G2 Q = new G2(xa, xb, ya, yb); // fixed point of G2
+G2 Q = new G2();
+Mcl.hashAndMapToG2(Q, "abc".getBytes());
 
 Fr s = new Fr();
 s.setByCSPRNG(); // secret key
 G2 pub = new G2();
-Bn256.mul(pub, Q, s); // public key = sQ
+Mcl.mul(pub, Q, s); // public key = sQ
 
 String m = "signature test";
 G1 H = new G1();
-H.hashAndMapToG1(m); // H = Hash(m)
+Mcl.hashAndMapToG1(H, m.getBytes()); // H = Hash(m)
 G1 sign = new G1();
-Bn256.mul(sign, H, s); // signature of m = s H
+Mcl.mul(sign, H, s); // signature of m = s H
 
 GT e1 = new GT();
 GT e2 = new GT();
-Bn256.pairing(e1, H, pub); // e1 = e(H, s Q)
-Bn256.pairing(e2, sign, Q); // e2 = e(s H, Q);
+Mcl.pairing(e1, H, pub); // e1 = e(H, s Q)
+Mcl.pairing(e2, sign, Q); // e2 = e(s H, Q);
 assertBool("verify signature", e1.equals(e2));
 ```
 
 # Make test
 ```
-cd java
-make test_bn256
+cd ffi/java
+make test
 ```
 
 # Sample code
-[Bn256Test.java](https://github.com/herumi/mcl/blob/master/java/Bn256Test.java)
+[MclTest.java](https://github.com/herumi/mcl/blob/master/ffi/java/MclTest.java)
diff --git a/readme.md b/readme.md
index a7035817..92467102 100644
--- a/readme.md
+++ b/readme.md
@@ -430,7 +430,7 @@ K. Shimizu, and T. Teruya. ASIACCS 2018
 * [she-api(Japanese)](https://github.com/herumi/mcl/blob/master/misc/she/she-api-ja.md)
 
 # Java API
-See [java.md](https://github.com/herumi/mcl/blob/master/java/java.md)
+See [java.md](https://github.com/herumi/mcl/blob/master/ffi/java/java.md)
 
 # License
 

From 7b21820c8a4c43ad9031517650cb061645ae48a4 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Jul 2019 10:57:06 +0900
Subject: [PATCH 013/553] define hash for g++ of Mac

---
 include/cybozu/hash.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/cybozu/hash.hpp b/include/cybozu/hash.hpp
index 3fd246fa..46d8a119 100644
--- a/include/cybozu/hash.hpp
+++ b/include/cybozu/hash.hpp
@@ -54,7 +54,7 @@ namespace std { CYBOZU_NAMESPACE_TR1_BEGIN
 	#pragma warning(push)
 	#pragma warning(disable : 4099) // missmatch class and struct
 #endif
-#ifndef __APPLE__
+#if !(defined(__APPLE__) && defined(__clang__))
 template<class T>
 struct hash;
 #endif

From a139f3291e1dae6ddad2172114fe519864a948f7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Jul 2019 10:58:11 +0900
Subject: [PATCH 014/553] [java] ignore -Wdeprecated

---
 ffi/java/elgamal_impl.hpp | 9 +++++++++
 ffi/java/mcl_impl.hpp     | 8 ++++++++
 2 files changed, 17 insertions(+)

diff --git a/ffi/java/elgamal_impl.hpp b/ffi/java/elgamal_impl.hpp
index dbf2ba64..d7130c29 100644
--- a/ffi/java/elgamal_impl.hpp
+++ b/ffi/java/elgamal_impl.hpp
@@ -13,6 +13,11 @@ typedef mcl::FpT<mcl::ZnTag, 521> Zn;
 typedef mcl::EcT<Fp> Ec;
 typedef mcl::ElgamalT<Ec, Zn> Elgamal;
 
+#if defined(__GNUC__) && !defined(__EMSCRIPTEN__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated"
+#endif
+
 /*
 	init system
 	@param param [in] string such as "ecParamName hashName"
@@ -145,3 +150,7 @@ class PrivateKey {
 		self_.clearCache();
 	}
 };
+
+#if defined(__GNUC__) && !defined(__EMSCRIPTEN__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index ed9fb07a..8eb23737 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -2,6 +2,11 @@
 #include <stdint.h>
 #include <sstream>
 
+#if defined(__GNUC__) && !defined(__EMSCRIPTEN__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated"
+#endif
+
 void SystemInit(int curveType) throw(std::exception)
 {
 	mcl::CurveParam cp;
@@ -381,3 +386,6 @@ void hashAndMapToG2(G2& P, const char *cbuf, size_t bufSize) throw(std::exceptio
 	mcl::bn::hashAndMapToG2(P.self_, cbuf, bufSize);
 }
 
+#if defined(__GNUC__) && !defined(__EMSCRIPTEN__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif

From d893c71d870e36d69f81911b70825e0601498768 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Jul 2019 10:58:43 +0900
Subject: [PATCH 015/553] [java] change C++ version

---
 ffi/java/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ffi/java/Makefile b/ffi/java/Makefile
index 98dbe63e..f8172c31 100644
--- a/ffi/java/Makefile
+++ b/ffi/java/Makefile
@@ -9,7 +9,6 @@ else
   LDFLAGS+=-lrt
 endif
 CFLAGS+=$(JAVA_INC) $(JAVA_INC)/linux -I $(TOP_DIR)/include -I $(TOP_DIR)/../xbyak -I $(TOP_DIR)/../cybozulib/include -Wno-strict-aliasing
-CFLAGS+=-std=c++03
 MCL_LIB=$(TOP_DIR)/lib/libmcl.a
 
 PACKAGE_NAME=com.herumi.mcl

From 9d7d6f50416abc4db9cbf3fc852202bd3e596d3b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Jul 2019 10:59:10 +0900
Subject: [PATCH 016/553] [java] use Swig 4.0.0

---
 ffi/java/com/herumi/mcl/CipherText.java      |   3 +-
 ffi/java/com/herumi/mcl/Elgamal.java         |   2 +-
 ffi/java/com/herumi/mcl/ElgamalJNI.java      |   2 +-
 ffi/java/com/herumi/mcl/Fp.java              |   3 +-
 ffi/java/com/herumi/mcl/Fr.java              |   3 +-
 ffi/java/com/herumi/mcl/G1.java              |   3 +-
 ffi/java/com/herumi/mcl/G2.java              |   3 +-
 ffi/java/com/herumi/mcl/GT.java              |   3 +-
 ffi/java/com/herumi/mcl/Mcl.java             |   2 +-
 ffi/java/com/herumi/mcl/MclConstants.java    |   2 +-
 ffi/java/com/herumi/mcl/MclJNI.java          |   2 +-
 ffi/java/com/herumi/mcl/PrivateKey.java      |   3 +-
 ffi/java/com/herumi/mcl/PublicKey.java       |   3 +-
 ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java |   2 +-
 ffi/java/elgamal_wrap.cxx                    | 114 ++++---------
 ffi/java/mcl_wrap.cxx                        | 166 +++++--------------
 16 files changed, 93 insertions(+), 223 deletions(-)

diff --git a/ffi/java/com/herumi/mcl/CipherText.java b/ffi/java/com/herumi/mcl/CipherText.java
index 3437695d..87175bbb 100644
--- a/ffi/java/com/herumi/mcl/CipherText.java
+++ b/ffi/java/com/herumi/mcl/CipherText.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
@@ -21,6 +21,7 @@ protected static long getCPtr(CipherText obj) {
     return (obj == null) ? 0 : obj.swigCPtr;
   }
 
+  @SuppressWarnings("deprecation")
   protected void finalize() {
     delete();
   }
diff --git a/ffi/java/com/herumi/mcl/Elgamal.java b/ffi/java/com/herumi/mcl/Elgamal.java
index ee9e7ec3..8249c842 100644
--- a/ffi/java/com/herumi/mcl/Elgamal.java
+++ b/ffi/java/com/herumi/mcl/Elgamal.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/com/herumi/mcl/ElgamalJNI.java b/ffi/java/com/herumi/mcl/ElgamalJNI.java
index 0f9e029e..67f0f220 100644
--- a/ffi/java/com/herumi/mcl/ElgamalJNI.java
+++ b/ffi/java/com/herumi/mcl/ElgamalJNI.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/com/herumi/mcl/Fp.java b/ffi/java/com/herumi/mcl/Fp.java
index aa4d1e8b..bac8b549 100644
--- a/ffi/java/com/herumi/mcl/Fp.java
+++ b/ffi/java/com/herumi/mcl/Fp.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
@@ -21,6 +21,7 @@ protected static long getCPtr(Fp obj) {
     return (obj == null) ? 0 : obj.swigCPtr;
   }
 
+  @SuppressWarnings("deprecation")
   protected void finalize() {
     delete();
   }
diff --git a/ffi/java/com/herumi/mcl/Fr.java b/ffi/java/com/herumi/mcl/Fr.java
index 2127f9d0..94656c62 100644
--- a/ffi/java/com/herumi/mcl/Fr.java
+++ b/ffi/java/com/herumi/mcl/Fr.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
@@ -21,6 +21,7 @@ protected static long getCPtr(Fr obj) {
     return (obj == null) ? 0 : obj.swigCPtr;
   }
 
+  @SuppressWarnings("deprecation")
   protected void finalize() {
     delete();
   }
diff --git a/ffi/java/com/herumi/mcl/G1.java b/ffi/java/com/herumi/mcl/G1.java
index ec640b6c..2492a3b5 100644
--- a/ffi/java/com/herumi/mcl/G1.java
+++ b/ffi/java/com/herumi/mcl/G1.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
@@ -21,6 +21,7 @@ protected static long getCPtr(G1 obj) {
     return (obj == null) ? 0 : obj.swigCPtr;
   }
 
+  @SuppressWarnings("deprecation")
   protected void finalize() {
     delete();
   }
diff --git a/ffi/java/com/herumi/mcl/G2.java b/ffi/java/com/herumi/mcl/G2.java
index 2480dd35..5bd7e664 100644
--- a/ffi/java/com/herumi/mcl/G2.java
+++ b/ffi/java/com/herumi/mcl/G2.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
@@ -21,6 +21,7 @@ protected static long getCPtr(G2 obj) {
     return (obj == null) ? 0 : obj.swigCPtr;
   }
 
+  @SuppressWarnings("deprecation")
   protected void finalize() {
     delete();
   }
diff --git a/ffi/java/com/herumi/mcl/GT.java b/ffi/java/com/herumi/mcl/GT.java
index fdc590d5..3aefca68 100644
--- a/ffi/java/com/herumi/mcl/GT.java
+++ b/ffi/java/com/herumi/mcl/GT.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
@@ -21,6 +21,7 @@ protected static long getCPtr(GT obj) {
     return (obj == null) ? 0 : obj.swigCPtr;
   }
 
+  @SuppressWarnings("deprecation")
   protected void finalize() {
     delete();
   }
diff --git a/ffi/java/com/herumi/mcl/Mcl.java b/ffi/java/com/herumi/mcl/Mcl.java
index 0088f1c6..c292c1ea 100644
--- a/ffi/java/com/herumi/mcl/Mcl.java
+++ b/ffi/java/com/herumi/mcl/Mcl.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/com/herumi/mcl/MclConstants.java b/ffi/java/com/herumi/mcl/MclConstants.java
index c972e0a9..6d31b200 100644
--- a/ffi/java/com/herumi/mcl/MclConstants.java
+++ b/ffi/java/com/herumi/mcl/MclConstants.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
index 515cba0f..ccb56a44 100644
--- a/ffi/java/com/herumi/mcl/MclJNI.java
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/com/herumi/mcl/PrivateKey.java b/ffi/java/com/herumi/mcl/PrivateKey.java
index 01487e0b..96603e52 100644
--- a/ffi/java/com/herumi/mcl/PrivateKey.java
+++ b/ffi/java/com/herumi/mcl/PrivateKey.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
@@ -21,6 +21,7 @@ protected static long getCPtr(PrivateKey obj) {
     return (obj == null) ? 0 : obj.swigCPtr;
   }
 
+  @SuppressWarnings("deprecation")
   protected void finalize() {
     delete();
   }
diff --git a/ffi/java/com/herumi/mcl/PublicKey.java b/ffi/java/com/herumi/mcl/PublicKey.java
index f114666f..8da13c0c 100644
--- a/ffi/java/com/herumi/mcl/PublicKey.java
+++ b/ffi/java/com/herumi/mcl/PublicKey.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
@@ -21,6 +21,7 @@ protected static long getCPtr(PublicKey obj) {
     return (obj == null) ? 0 : obj.swigCPtr;
   }
 
+  @SuppressWarnings("deprecation")
   protected void finalize() {
     delete();
   }
diff --git a/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java b/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java
index 4ca620d2..d49f742e 100644
--- a/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java
+++ b/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/elgamal_wrap.cxx b/ffi/java/elgamal_wrap.cxx
index 38d05f48..15e29952 100644
--- a/ffi/java/elgamal_wrap.cxx
+++ b/ffi/java/elgamal_wrap.cxx
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * This file is not intended to be easily readable and contains a number of
  * coding conventions designed to improve portability and efficiency. Do not make
@@ -358,12 +358,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_SystemInit(JNIEnv *jenv,
   jenv->ReleaseStringUTFChars(jarg1, arg1_pstr); 
   try {
     SystemInit((std::string const &)*arg1);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -378,12 +376,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_ElgamalJNI_CipherText_1toStr(JNIE
   arg1 = *(CipherText **)&jarg1; 
   try {
     result = ((CipherText const *)arg1)->toStr();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -400,12 +396,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_ElgamalJNI_CipherText_1toString(J
   arg1 = *(CipherText **)&jarg1; 
   try {
     result = ((CipherText const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -430,12 +424,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_CipherText_1fromStr(JNIEn
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->fromStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -455,12 +447,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_CipherText_1add(JNIEnv *j
   } 
   try {
     (arg1)->add((CipherText const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -475,12 +465,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_CipherText_1mul_1_1SWIG_1
   arg2 = (int)jarg2; 
   try {
     (arg1)->mul(arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -503,12 +491,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_CipherText_1mul_1_1SWIG_1
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->mul((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -545,12 +531,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_ElgamalJNI_PublicKey_1toStr(JNIEn
   arg1 = *(PublicKey **)&jarg1; 
   try {
     result = ((PublicKey const *)arg1)->toStr();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -567,12 +551,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_ElgamalJNI_PublicKey_1toString(JN
   arg1 = *(PublicKey **)&jarg1; 
   try {
     result = ((PublicKey const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -597,12 +579,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PublicKey_1fromStr(JNIEnv
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->fromStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -625,12 +605,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PublicKey_1save(JNIEnv *j
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     ((PublicKey const *)arg1)->save((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -653,12 +631,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PublicKey_1load(JNIEnv *j
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->load((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -680,12 +656,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PublicKey_1enc_1_1SWIG_10
   arg3 = (int)jarg3; 
   try {
     ((PublicKey const *)arg1)->enc(*arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -715,12 +689,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PublicKey_1enc_1_1SWIG_11
   jenv->ReleaseStringUTFChars(jarg3, arg3_pstr); 
   try {
     ((PublicKey const *)arg1)->enc(*arg2,(std::string const &)*arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -740,12 +712,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PublicKey_1rerandomize(JN
   } 
   try {
     ((PublicKey const *)arg1)->rerandomize(*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -767,12 +737,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PublicKey_1add_1_1SWIG_10
   arg3 = (int)jarg3; 
   try {
     ((PublicKey const *)arg1)->add(*arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -802,12 +770,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PublicKey_1add_1_1SWIG_11
   jenv->ReleaseStringUTFChars(jarg3, arg3_pstr); 
   try {
     ((PublicKey const *)arg1)->add(*arg2,(std::string const &)*arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -844,12 +810,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1toStr(JNIE
   arg1 = *(PrivateKey **)&jarg1; 
   try {
     result = ((PrivateKey const *)arg1)->toStr();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -866,12 +830,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1toString(J
   arg1 = *(PrivateKey **)&jarg1; 
   try {
     result = ((PrivateKey const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -896,12 +858,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1fromStr(JNIEn
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->fromStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -924,12 +884,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1save(JNIEnv *
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     ((PrivateKey const *)arg1)->save((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -952,12 +910,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1load(JNIEnv *
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->load((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -970,12 +926,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1init(JNIEnv *
   arg1 = *(PrivateKey **)&jarg1; 
   try {
     (arg1)->init();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -990,12 +944,10 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1getPublicKey
   arg1 = *(PrivateKey **)&jarg1; 
   try {
     result = ((PrivateKey const *)arg1)->getPublicKey();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   *(PublicKey **)&jresult = new PublicKey((const PublicKey &)result); 
   return jresult;
 }
@@ -1021,12 +973,10 @@ SWIGEXPORT jint JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1dec_1_1SWIG_1
   arg3 = *(bool **)&jarg3; 
   try {
     result = (int)((PrivateKey const *)arg1)->dec((CipherText const &)*arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = (jint)result; 
   return jresult;
 }
@@ -1050,12 +1000,10 @@ SWIGEXPORT jint JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1dec_1_1SWIG_1
   } 
   try {
     result = (int)((PrivateKey const *)arg1)->dec((CipherText const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = (jint)result; 
   return jresult;
 }
@@ -1074,12 +1022,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1setCache(JNIE
   arg3 = (int)jarg3; 
   try {
     (arg1)->setCache(arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -1092,12 +1038,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_ElgamalJNI_PrivateKey_1clearCache(JN
   arg1 = *(PrivateKey **)&jarg1; 
   try {
     (arg1)->clearCache();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index c4ea085e..6446e7f8 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 3.0.12
+ * Version 4.0.0
  *
  * This file is not intended to be easily readable and contains a number of
  * coding conventions designed to improve portability and efficiency. Do not make
@@ -255,12 +255,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_SystemInit(JNIEnv *jenv, jcla
   arg1 = (int)jarg1; 
   try {
     SystemInit(arg1);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -554,12 +552,10 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_13(JNIEnv *j
   arg2 = (int)jarg2; 
   try {
     result = (Fr *)new Fr((std::string const &)*arg1,arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   *(Fr **)&jresult = result; 
   return jresult;
 }
@@ -583,12 +579,10 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_14(JNIEnv *j
   jenv->ReleaseStringUTFChars(jarg1, arg1_pstr); 
   try {
     result = (Fr *)new Fr((std::string const &)*arg1);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   *(Fr **)&jresult = result; 
   return jresult;
 }
@@ -637,12 +631,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_10(JNIEnv
   arg3 = (int)jarg3; 
   try {
     (arg1)->setStr((std::string const &)*arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -665,12 +657,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_11(JNIEnv
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->setStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -722,12 +712,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fr_1toString_1_1SWIG_10(JN
   arg2 = (int)jarg2; 
   try {
     result = ((Fr const *)arg1)->toString(arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -744,12 +732,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fr_1toString_1_1SWIG_11(JN
   arg1 = *(Fr **)&jarg1; 
   try {
     result = ((Fr const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -775,12 +761,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1deserialize(JNIEnv *jenv,
   }
   try {
     (arg1)->deserialize((char const *)arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
   {
     if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
   }
@@ -801,12 +785,10 @@ SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fr_1serialize(JNIEnv *j
   arg1 = *(Fr **)&jarg1; 
   try {
     ((Fr const *)arg1)->serialize(*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   {
     jresult = jenv->NewByteArray(arg2->size());
     jenv->SetByteArrayRegion(jresult, 0, arg2->size(), (const jbyte*)arg2->c_str());
@@ -1028,12 +1010,10 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_13(JNIEnv *j
   arg2 = (int)jarg2; 
   try {
     result = (Fp *)new Fp((std::string const &)*arg1,arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   *(Fp **)&jresult = result; 
   return jresult;
 }
@@ -1057,12 +1037,10 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_14(JNIEnv *j
   jenv->ReleaseStringUTFChars(jarg1, arg1_pstr); 
   try {
     result = (Fp *)new Fp((std::string const &)*arg1);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   *(Fp **)&jresult = result; 
   return jresult;
 }
@@ -1111,12 +1089,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_10(JNIEnv
   arg3 = (int)jarg3; 
   try {
     (arg1)->setStr((std::string const &)*arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -1139,12 +1115,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_11(JNIEnv
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->setStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -1196,12 +1170,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fp_1toString_1_1SWIG_10(JN
   arg2 = (int)jarg2; 
   try {
     result = ((Fp const *)arg1)->toString(arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -1218,12 +1190,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fp_1toString_1_1SWIG_11(JN
   arg1 = *(Fp **)&jarg1; 
   try {
     result = ((Fp const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -1249,12 +1219,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1deserialize(JNIEnv *jenv,
   }
   try {
     (arg1)->deserialize((char const *)arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
   {
     if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
   }
@@ -1275,12 +1243,10 @@ SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fp_1serialize(JNIEnv *j
   arg1 = *(Fp **)&jarg1; 
   try {
     ((Fp const *)arg1)->serialize(*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   {
     jresult = jenv->NewByteArray(arg2->size());
     jenv->SetByteArrayRegion(jresult, 0, arg2->size(), (const jbyte*)arg2->c_str());
@@ -1454,12 +1420,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_hashAndMapToG1(JNIEnv *jenv,
   }
   try {
     hashAndMapToG1(*arg1,(char const *)arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
   {
     if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
   }
@@ -1520,12 +1484,10 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_12(JNIEnv *j
   } 
   try {
     result = (G1 *)new G1((Fp const &)*arg1,(Fp const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   *(G1 **)&jresult = result; 
   return jresult;
 }
@@ -1576,12 +1538,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass
   } 
   try {
     (arg1)->set((Fp const &)*arg2,(Fp const &)*arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -1617,12 +1577,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1setStr_1_1SWIG_10(JNIEnv
   arg3 = (int)jarg3; 
   try {
     (arg1)->setStr((std::string const &)*arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -1645,12 +1603,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1setStr_1_1SWIG_11(JNIEnv
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->setStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -1667,12 +1623,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G1_1toString_1_1SWIG_10(JN
   arg2 = (int)jarg2; 
   try {
     result = ((G1 const *)arg1)->toString(arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -1689,12 +1643,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G1_1toString_1_1SWIG_11(JN
   arg1 = *(G1 **)&jarg1; 
   try {
     result = ((G1 const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -1720,12 +1672,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1deserialize(JNIEnv *jenv,
   }
   try {
     (arg1)->deserialize((char const *)arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
   {
     if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
   }
@@ -1746,12 +1696,10 @@ SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_G1_1serialize(JNIEnv *j
   arg1 = *(G1 **)&jarg1; 
   try {
     ((G1 const *)arg1)->serialize(*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   {
     jresult = jenv->NewByteArray(arg2->size());
     jenv->SetByteArrayRegion(jresult, 0, arg2->size(), (const jbyte*)arg2->c_str());
@@ -1896,12 +1844,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_hashAndMapToG2(JNIEnv *jenv,
   }
   try {
     hashAndMapToG2(*arg1,(char const *)arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
   {
     if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
   }
@@ -1976,12 +1922,10 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_12(JNIEnv *j
   } 
   try {
     result = (G2 *)new G2((Fp const &)*arg1,(Fp const &)*arg2,(Fp const &)*arg3,(Fp const &)*arg4);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   *(G2 **)&jresult = result; 
   return jresult;
 }
@@ -2046,12 +1990,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1set(JNIEnv *jenv, jclass
   } 
   try {
     (arg1)->set((Fp const &)*arg2,(Fp const &)*arg3,(Fp const &)*arg4,(Fp const &)*arg5);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -2087,12 +2029,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1setStr_1_1SWIG_10(JNIEnv
   arg3 = (int)jarg3; 
   try {
     (arg1)->setStr((std::string const &)*arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -2115,12 +2055,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1setStr_1_1SWIG_11(JNIEnv
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->setStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -2137,12 +2075,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G2_1toString_1_1SWIG_10(JN
   arg2 = (int)jarg2; 
   try {
     result = ((G2 const *)arg1)->toString(arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -2159,12 +2095,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G2_1toString_1_1SWIG_11(JN
   arg1 = *(G2 **)&jarg1; 
   try {
     result = ((G2 const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -2190,12 +2124,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1deserialize(JNIEnv *jenv,
   }
   try {
     (arg1)->deserialize((char const *)arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
   {
     if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
   }
@@ -2216,12 +2148,10 @@ SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_G2_1serialize(JNIEnv *j
   arg1 = *(G2 **)&jarg1; 
   try {
     ((G2 const *)arg1)->serialize(*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   {
     jresult = jenv->NewByteArray(arg2->size());
     jenv->SetByteArrayRegion(jresult, 0, arg2->size(), (const jbyte*)arg2->c_str());
@@ -2354,12 +2284,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1setStr_1_1SWIG_10(JNIEnv
   arg3 = (int)jarg3; 
   try {
     (arg1)->setStr((std::string const &)*arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -2382,12 +2310,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1setStr_1_1SWIG_11(JNIEnv
   jenv->ReleaseStringUTFChars(jarg2, arg2_pstr); 
   try {
     (arg1)->setStr((std::string const &)*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
 }
 
 
@@ -2404,12 +2330,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_10(JN
   arg2 = (int)jarg2; 
   try {
     result = ((GT const *)arg1)->toString(arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -2426,12 +2350,10 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_11(JN
   arg1 = *(GT **)&jarg1; 
   try {
     result = ((GT const *)arg1)->toString();
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   jresult = jenv->NewStringUTF((&result)->c_str()); 
   return jresult;
 }
@@ -2457,12 +2379,10 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1deserialize(JNIEnv *jenv,
   }
   try {
     (arg1)->deserialize((char const *)arg2,arg3);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return ;
   }
-  
   {
     if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
   }
@@ -2483,12 +2403,10 @@ SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_GT_1serialize(JNIEnv *j
   arg1 = *(GT **)&jarg1; 
   try {
     ((GT const *)arg1)->serialize(*arg2);
-  }
-  catch(std::exception &_e) {
+  } catch(std::exception &_e) {
     SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
     return 0;
   }
-  
   {
     jresult = jenv->NewByteArray(arg2->size());
     jenv->SetByteArrayRegion(jresult, 0, arg2->size(), (const jbyte*)arg2->c_str());

From 38d82215f69b62f758ebe5ebcc4159815daa149c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Jul 2019 16:13:09 +0900
Subject: [PATCH 017/553] [doc] fix link to reference

---
 readme.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/readme.md b/readme.md
index 92467102..40ae5a8d 100644
--- a/readme.md
+++ b/readme.md
@@ -450,8 +450,8 @@ This library contains some part of the followings software licensed by BSD-3-Cla
 * [_High-Speed Software Implementation of the Optimal Ate Pairing over Barreto-Naehrig Curves_](http://dx.doi.org/10.1007/978-3-642-17455-1_2),
    Jean-Luc Beuchat, Jorge Enrique González Díaz, Shigeo Mitsunari, Eiji Okamoto, Francisco Rodríguez-Henríquez, Tadanori Teruya,
   Pairing 2010, ([preprint](http://eprint.iacr.org/2010/354))
-* [_Faster hashing to G2_](http://dx.doi.org/10.1007/978-3-642-28496-0_25),Laura Fuentes-Castañeda,  Edward Knapp,  Francisco Rodríguez-Henríquez,
-  SAC 2011, ([preprint](https://eprint.iacr.org/2008/530))
+* [_Faster hashing to G2_](https://link.springer.com/chapter/10.1007/978-3-642-28496-0_25),Laura Fuentes-Castañeda,  Edward Knapp,  Francisco Rodríguez-Henríquez,
+  SAC 2011, ([PDF](http://cacr.uwaterloo.ca/techreports/2011/cacr2011-26.pdf))
 * [_Skew Frobenius Map and Efficient Scalar Multiplication for Pairing–Based Cryptography_](https://www.researchgate.net/publication/221282560_Skew_Frobenius_Map_and_Efficient_Scalar_Multiplication_for_Pairing-Based_Cryptography),
 Y. Sakemi, Y. Nogami, K. Okeya, Y. Morikawa, CANS 2008.
 

From c4087c70f71c73dcc8b7d76a34ccffbdc3abed28 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 22 Jul 2019 05:03:55 +0900
Subject: [PATCH 018/553] remove unused code

---
 include/mcl/vint.hpp | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/include/mcl/vint.hpp b/include/mcl/vint.hpp
index bf324f4d..40f43b26 100644
--- a/include/mcl/vint.hpp
+++ b/include/mcl/vint.hpp
@@ -552,15 +552,6 @@ size_t getRealSize(const T *x, size_t xn)
 	return 1;
 }
 
-template<class T>
-size_t getBitSize(const T *x, size_t n)
-{
-	if (n == 1 && x[0] == 0) return 1;
-	T v = x[n - 1];
-	assert(v);
-	return (n - 1) * sizeof(T) * 8 + 1 + cybozu::bsr<Unit>(v);
-}
-
 /*
 	q[qn] = x[xn] / y[yn] ; qn == xn - yn + 1 if xn >= yn if q
 	r[rn] = x[xn] % y[yn] ; rn = yn before getRealSize

From 3354927f26d2958fa717a8eb0044f3ee41e50bff Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 22 Jul 2019 15:40:31 +0900
Subject: [PATCH 019/553] add getNAFwidth

---
 include/mcl/array.hpp    |  9 ++++++
 include/mcl/gmp_util.hpp | 67 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)

diff --git a/include/mcl/array.hpp b/include/mcl/array.hpp
index a6d2a8fa..5fa49e64 100644
--- a/include/mcl/array.hpp
+++ b/include/mcl/array.hpp
@@ -125,6 +125,15 @@ class FixedArray {
 		n_ = n;
 		return true;
 	}
+	void push(bool *pb, const T& x)
+	{
+		if (n_ == maxSize) {
+			*pb = false;
+			return;
+		}
+		p_[n_++] = x;
+		*pb = true;
+	}
 	bool copy(const FixedArray<T, maxSize>& rhs)
 	{
 		if (this == &rhs) return true;
diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index 117ecff7..742d3d2e 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -10,6 +10,7 @@
 #include <stdlib.h>
 #include <assert.h>
 #include <stdint.h>
+#include <cybozu/bit_operation.hpp>
 #ifndef CYBOZU_DONT_USE_EXCEPTION
 #include <cybozu/exception.hpp>
 #endif
@@ -434,6 +435,36 @@ inline size_t getUnitSize(const mpz_class& x)
 	return std::abs(x.get_mpz_t()->_mp_size);
 #endif
 }
+
+/*
+	get the number of lower zeros
+*/
+template<class T>
+size_t getLowerZeroBitNum(const T *x, size_t n)
+{
+	size_t ret = 0;
+	for (size_t i = 0; i < n; i++) {
+		T v = x[i];
+		if (v == 0) {
+			ret += sizeof(T) * 8;
+		} else {
+			ret += cybozu::bsf<T>(v);
+			break;
+		}
+	}
+	return ret;
+}
+
+/*
+	get the number of lower zero
+	@note x != 0
+*/
+inline size_t getLowerZeroBitNum(const mpz_class& x)
+{
+	assert(!isZero(x));
+	return getLowerZeroBitNum(getUnit(x), getUnitSize(x));
+}
+
 inline mpz_class abs(const mpz_class& x)
 {
 #ifdef MCL_USE_VINT
@@ -576,6 +607,42 @@ bool getNAF(Vec& v, const mpz_class& x)
 	}
 }
 
+/*
+	v = naf[i]
+	v = 0 or (|v| <= 2^(w-1) - 1 and odd)
+*/
+template<class Vec>
+void getNAFwidth(bool *pb, Vec& naf, mpz_class x, size_t w)
+{
+	assert(w > 0);
+	naf.clear();
+	size_t zeroNum = 0;
+	const int signedMaxW = 1 << (w - 1);
+	const int maxW = signedMaxW * 2;
+	const int maskW = maxW - 1;
+	while (!isZero(x)) {
+		size_t z = gmp::getLowerZeroBitNum(x);
+		if (z) {
+			x >>= z;
+			zeroNum += z;
+		}
+		for (size_t i = 0; i < zeroNum; i++) {
+			naf.push(pb, 0);
+			if (!*pb) return;
+		}
+		assert(!isZero(x));
+		int v = getUnit(x)[0] & maskW;
+		x >>= w;
+		if (v & signedMaxW) {
+			x++;
+			v -= maxW;
+		}
+		naf.push(pb, v);
+		if (!*pb) return;
+		zeroNum = w - 1;
+	}
+}
+
 #ifndef CYBOZU_DONT_USE_EXCEPTION
 inline void setStr(mpz_class& z, const std::string& str, int base = 0)
 {

From 0931176c7620b6e4d43ddca258b73e925f767952 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 22 Jul 2019 17:56:20 +0900
Subject: [PATCH 020/553] use wider naf for G1::mul

---
 include/mcl/bn.hpp       | 57 ++++++++++++++++++++++++++++++++++++++++
 include/mcl/gmp_util.hpp | 10 +++++++
 2 files changed, 67 insertions(+)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index eb22c6b7..a249f014 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -570,6 +570,7 @@ struct GLV1 {
 	mpz_class v0, v1;
 	mpz_class B[2][2];
 	mpz_class r;
+	typedef mcl::FixedArray<int8_t, 512 / 2> NafArray;
 private:
 	bool usePrecomputedTable(int curveType)
 	{
@@ -698,8 +699,63 @@ struct GLV1 {
 		a = x - (t * B[0][0] + b * B[1][0]);
 		b = - (t * B[0][1] + b * B[1][1]);
 	}
+	void addTbl(G1& Q, const G1 *tbl, const NafArray& naf, int i) const
+	{
+		if ((size_t)i >= naf.size()) return;
+		int n = naf[i];
+		if (n > 0) {
+			Q += tbl[(n - 1) / 2];
+		} else if (n < 0) {
+			Q -= tbl[(-n - 1)  / 2];
+		}
+	}
 	void mul(G1& Q, const G1& P, mpz_class x, bool constTime = false) const
 	{
+#if 1
+		(void)constTime;
+		NafArray naf[2];
+		mpz_class u[2];
+		const int w = 5;
+		const size_t tblSize = 1 << (w - 2);
+		G1 tbl0[tblSize];
+		G1 tbl1[tblSize];
+		bool b;
+
+		x %= r;
+		if (x == 0) {
+			Q.clear();
+//			if (constTime) goto DummyLoop;
+			return;
+		}
+		if (x < 0) {
+			x += r;
+		}
+		split(u[0], u[1], x);
+		gmp::getNAFwidth(&b, naf[0], u[0], w);
+		if (!b) puts("ERR");
+		assert(b); (void)b;
+		gmp::getNAFwidth(&b, naf[1], u[1], w);
+		if (!b) puts("ERR");
+		assert(b); (void)b;
+
+		tbl0[0] = P;
+		mulLambda(tbl1[0], tbl0[0]);
+		{
+			G1 P2;
+			G1::dbl(P2, P);
+			for (size_t i = 1; i < tblSize; i++) {
+				G1::add(tbl0[i], tbl0[i - 1], P2);
+				mulLambda(tbl1[i], tbl0[i]);
+			}
+		}
+		const int maxBit = (int)fp::max_(naf[0].size(), naf[1].size());
+		Q.clear();
+		for (int i = maxBit - 1; i >= 0; i--) {
+			G1::dbl(Q, Q);
+			addTbl(Q, tbl0, naf[0], i);
+			addTbl(Q, tbl1, naf[1], i);
+		}
+#else
 		typedef mcl::fp::Unit Unit;
 		const size_t maxUnit = 512 / 2 / mcl::fp::UnitBitSize;
 		const int splitN = 2;
@@ -782,6 +838,7 @@ struct GLV1 {
 			G1::dbl(D, D);
 			D += tbl[0];
 		}
+#endif
 	}
 };
 
diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index 742d3d2e..3da87e4a 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -617,6 +617,11 @@ void getNAFwidth(bool *pb, Vec& naf, mpz_class x, size_t w)
 	assert(w > 0);
 	naf.clear();
 	size_t zeroNum = 0;
+	bool negative = false;
+	if (x < 0) {
+		negative = true;
+		x = -x;
+	}
 	const int signedMaxW = 1 << (w - 1);
 	const int maxW = signedMaxW * 2;
 	const int maskW = maxW - 1;
@@ -641,6 +646,11 @@ void getNAFwidth(bool *pb, Vec& naf, mpz_class x, size_t w)
 		if (!*pb) return;
 		zeroNum = w - 1;
 	}
+	if (negative) {
+		for (size_t i = 0; i < naf.size(); i++) {
+			naf[i] = -naf[i];
+		}
+	}
 }
 
 #ifndef CYBOZU_DONT_USE_EXCEPTION

From b1ef09c6c4bcbb24caea6db3868f2e54d6086111 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 22 Jul 2019 19:46:32 +0900
Subject: [PATCH 021/553] fix GLV1::mul(0)

---
 include/mcl/gmp_util.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index 3da87e4a..384c0520 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -651,6 +651,7 @@ void getNAFwidth(bool *pb, Vec& naf, mpz_class x, size_t w)
 			naf[i] = -naf[i];
 		}
 	}
+	*pb = true;
 }
 
 #ifndef CYBOZU_DONT_USE_EXCEPTION

From cbf3c2ccc2866bd164811b4159f011f55adab115 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Jul 2019 16:14:07 +0900
Subject: [PATCH 022/553] remove old comments

---
 include/mcl/bn.hpp | 122 ++++++---------------------------------------
 1 file changed, 16 insertions(+), 106 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index a249f014..5e46f43e 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -699,146 +699,56 @@ struct GLV1 {
 		a = x - (t * B[0][0] + b * B[1][0]);
 		b = - (t * B[0][1] + b * B[1][1]);
 	}
-	void addTbl(G1& Q, const G1 *tbl, const NafArray& naf, int i) const
+	void addTbl(G1& Q, const G1 *tbl, const NafArray& naf, size_t i) const
 	{
-		if ((size_t)i >= naf.size()) return;
+		if (i >= naf.size()) return;
 		int n = naf[i];
 		if (n > 0) {
-			Q += tbl[(n - 1) / 2];
+			Q += tbl[(n - 1) >> 1];
 		} else if (n < 0) {
-			Q -= tbl[(-n - 1)  / 2];
+			Q -= tbl[(-n - 1) >> 1];
 		}
 	}
 	void mul(G1& Q, const G1& P, mpz_class x, bool constTime = false) const
 	{
-#if 1
-		(void)constTime;
-		NafArray naf[2];
-		mpz_class u[2];
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
-		G1 tbl0[tblSize];
-		G1 tbl1[tblSize];
+		NafArray naf[2];
+		mpz_class u[2];
+		G1 tbl[2][tblSize];
 		bool b;
 
 		x %= r;
 		if (x == 0) {
 			Q.clear();
-//			if (constTime) goto DummyLoop;
-			return;
+			if (!constTime) return;
 		}
 		if (x < 0) {
 			x += r;
 		}
 		split(u[0], u[1], x);
 		gmp::getNAFwidth(&b, naf[0], u[0], w);
-		if (!b) puts("ERR");
 		assert(b); (void)b;
 		gmp::getNAFwidth(&b, naf[1], u[1], w);
-		if (!b) puts("ERR");
 		assert(b); (void)b;
 
-		tbl0[0] = P;
-		mulLambda(tbl1[0], tbl0[0]);
+		tbl[0][0] = P;
+		mulLambda(tbl[1][0], tbl[0][0]);
 		{
 			G1 P2;
 			G1::dbl(P2, P);
 			for (size_t i = 1; i < tblSize; i++) {
-				G1::add(tbl0[i], tbl0[i - 1], P2);
-				mulLambda(tbl1[i], tbl0[i]);
+				G1::add(tbl[0][i], tbl[0][i - 1], P2);
+				mulLambda(tbl[1][i], tbl[0][i]);
 			}
 		}
-		const int maxBit = (int)fp::max_(naf[0].size(), naf[1].size());
+		const size_t maxBit = fp::max_(naf[0].size(), naf[1].size());
 		Q.clear();
-		for (int i = maxBit - 1; i >= 0; i--) {
+		for (size_t i = 0; i < maxBit; i++) {
 			G1::dbl(Q, Q);
-			addTbl(Q, tbl0, naf[0], i);
-			addTbl(Q, tbl1, naf[1], i);
-		}
-#else
-		typedef mcl::fp::Unit Unit;
-		const size_t maxUnit = 512 / 2 / mcl::fp::UnitBitSize;
-		const int splitN = 2;
-		mpz_class u[splitN];
-		G1 in[splitN];
-		G1 tbl[4];
-		int bitTbl[splitN]; // bit size of u[i]
-		Unit w[splitN][maxUnit]; // unit array of u[i]
-		int maxBit = 0; // max bit of u[i]
-		int maxN = 0;
-		int remainBit = 0;
-
-		x %= r;
-		if (x == 0) {
-			Q.clear();
-			if (constTime) goto DummyLoop;
-			return;
-		}
-		if (x < 0) {
-			x += r;
-		}
-		split(u[0], u[1], x);
-		in[0] = P;
-		mulLambda(in[1], in[0]);
-		for (int i = 0; i < splitN; i++) {
-			if (u[i] < 0) {
-				u[i] = -u[i];
-				G1::neg(in[i], in[i]);
-			}
-			in[i].normalize();
+			addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
+			addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
 		}
-#if 0
-		G1::mulGeneric(in[0], in[0], u[0]);
-		G1::mulGeneric(in[1], in[1], u[1]);
-		G1::add(Q, in[0], in[1]);
-		return;
-#else
-		tbl[0] = in[0]; // dummy
-		tbl[1] = in[0];
-		tbl[2] = in[1];
-		G1::add(tbl[3], in[0], in[1]);
-		tbl[3].normalize();
-		for (int i = 0; i < splitN; i++) {
-			bool b;
-			mcl::gmp::getArray(&b, w[i], maxUnit, u[i]);
-			assert(b);
-			bitTbl[i] = (int)mcl::gmp::getBitSize(u[i]);
-			maxBit = fp::max_(maxBit, bitTbl[i]);
-		}
-		assert(maxBit > 0);
-		maxBit--;
-		/*
-			maxBit = maxN * UnitBitSize + remainBit
-			0 < remainBit <= UnitBitSize
-		*/
-		maxN = maxBit / mcl::fp::UnitBitSize;
-		remainBit = maxBit % mcl::fp::UnitBitSize;
-		remainBit++;
-		Q.clear();
-		for (int i = maxN; i >= 0; i--) {
-			for (int j = remainBit - 1; j >= 0; j--) {
-				G1::dbl(Q, Q);
-				uint32_t b0 = (w[0][i] >> j) & 1;
-				uint32_t b1 = (w[1][i] >> j) & 1;
-				uint32_t c = b1 * 2 + b0;
-				if (c == 0) {
-					if (constTime) tbl[0] += tbl[1];
-				} else {
-					Q += tbl[c];
-				}
-			}
-			remainBit = (int)mcl::fp::UnitBitSize;
-		}
-#endif
-	DummyLoop:
-		if (!constTime) return;
-		const int limitBit = (int)rBitSize / splitN;
-		G1 D = tbl[0];
-		for (int i = maxBit + 1; i < limitBit; i++) {
-			G1::dbl(D, D);
-			D += tbl[0];
-		}
-#endif
 	}
 };
 

From 38d97e08020828efcd5dd4d53871d082a31f5bda Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Jul 2019 16:23:18 +0900
Subject: [PATCH 023/553] disable many warnings of LSAN

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b639a0bd..39a415cd 100644
--- a/Makefile
+++ b/Makefile
@@ -313,7 +313,7 @@ sample: $(SAMPLE_EXE) $(MCL_LIB)
 
 TEST_EXE=$(addprefix $(EXE_DIR)/,$(TEST_SRC:.cpp=.exe))
 test_ci: $(TEST_EXE)
-	@sh -ec 'for i in $(TEST_EXE); do echo $$i; env LSAN_OPTIONS=verbosity=1:log_threads=1 $$i; done'
+	@sh -ec 'for i in $(TEST_EXE); do echo $$i; env LSAN_OPTIONS=verbosity=0:log_threads=1 $$i; done'
 test: $(TEST_EXE)
 	@echo test $(TEST_EXE)
 	@sh -ec 'for i in $(TEST_EXE); do $$i|grep "ctest:name"; done' > result.txt

From de44c1336da17bd9a300d513bef7e43065563835 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Jul 2019 16:37:28 +0900
Subject: [PATCH 024/553] rewrite GLV2::mul

---
 include/mcl/bn.hpp | 74 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 63 insertions(+), 11 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 5e46f43e..130372f5 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -560,6 +560,19 @@ struct MapTo {
 	}
 };
 
+typedef mcl::FixedArray<int8_t, MCL_MAX_FR_BIT_SIZE / 2 + 2> NafArray;
+template<class G>
+void addTbl(G& Q, const G *tbl, const NafArray& naf, size_t i)
+{
+	if (i >= naf.size()) return;
+	int n = naf[i];
+	if (n > 0) {
+		Q += tbl[(n - 1) >> 1];
+	} else if (n < 0) {
+		Q -= tbl[(-n - 1) >> 1];
+	}
+}
+
 /*
 	Software implementation of Attribute-Based Encryption: Appendixes
 	GLV for G1 on BN/BLS12
@@ -570,7 +583,6 @@ struct GLV1 {
 	mpz_class v0, v1;
 	mpz_class B[2][2];
 	mpz_class r;
-	typedef mcl::FixedArray<int8_t, 512 / 2> NafArray;
 private:
 	bool usePrecomputedTable(int curveType)
 	{
@@ -699,16 +711,6 @@ struct GLV1 {
 		a = x - (t * B[0][0] + b * B[1][0]);
 		b = - (t * B[0][1] + b * B[1][1]);
 	}
-	void addTbl(G1& Q, const G1 *tbl, const NafArray& naf, size_t i) const
-	{
-		if (i >= naf.size()) return;
-		int n = naf[i];
-		if (n > 0) {
-			Q += tbl[(n - 1) >> 1];
-		} else if (n < 0) {
-			Q -= tbl[(-n - 1) >> 1];
-		}
-	}
 	void mul(G1& Q, const G1& P, mpz_class x, bool constTime = false) const
 	{
 		const int w = 5;
@@ -857,6 +859,55 @@ struct GLV2 {
 	template<class T>
 	void mul(T& Q, const T& P, mpz_class x, bool constTime = false) const
 	{
+#if 0
+		const int w = 5;
+		const size_t tblSize = 1 << (w - 2);
+		const size_t splitN = 4;
+		NafArray naf[splitN];
+		mpz_class u[splitN];
+		T tbl[splitN][tblSize];
+		bool b;
+
+		x %= r;
+		if (x == 0) {
+			Q.clear();
+			if (!constTime) return;
+		}
+		if (x < 0) {
+			x += r;
+		}
+		split(u, x);
+		for (size_t i = 0; i < splitN; i++) {
+			gmp::getNAFwidth(&b, naf[i], u[i], w);
+			assert(b); (void)b;
+		}
+		tbl[0][0] = P;
+		Frobenius(tbl[1][0], tbl[0][0]);
+		Frobenius(tbl[2][0], tbl[1][0]);
+		Frobenius(tbl[3][0], tbl[2][0]);
+		{
+			T P2;
+			T::dbl(P2, P);
+			for (size_t i = 1; i < tblSize; i++) {
+				T::add(tbl[0][i], tbl[0][i - 1], P2);
+				Frobenius(tbl[1][i], tbl[0][i]);
+				Frobenius(tbl[2][i], tbl[1][i]);
+				Frobenius(tbl[3][i], tbl[2][i]);
+			}
+		}
+		size_t maxBit = naf[0].size();
+		for (size_t i = 1; i < splitN; i++) {
+			if (naf[i].size() > maxBit) maxBit = naf[i].size();
+		}
+		Q.clear();
+		for (size_t i = 0; i < maxBit; i++) {
+			T::dbl(Q, Q);
+			addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
+			addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
+			addTbl(Q, tbl[2], naf[2], maxBit - 1 - i);
+			addTbl(Q, tbl[3], naf[3], maxBit - 1 - i);
+		}
+#else
 #if 0 // #ifndef NDEBUG
 		{
 			T R;
@@ -964,6 +1015,7 @@ struct GLV2 {
 			T::dbl(D, D);
 			D += tbl[0];
 		}
+#endif
 	}
 	void pow(Fp12& z, const Fp12& x, mpz_class y, bool constTime = false) const
 	{

From cc1396f753383d63e7c32f99d3080d4eb4fa14d4 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Jul 2019 16:58:43 +0900
Subject: [PATCH 025/553] GLV2 supports GT::pow

---
 include/mcl/bn.hpp       | 10 +++++-----
 include/mcl/fp_tower.hpp | 10 ++++++++++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 130372f5..02f58f20 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -859,7 +859,7 @@ struct GLV2 {
 	template<class T>
 	void mul(T& Q, const T& P, mpz_class x, bool constTime = false) const
 	{
-#if 0
+#if 1
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
 		const size_t splitN = 4;
@@ -877,14 +877,14 @@ struct GLV2 {
 			x += r;
 		}
 		split(u, x);
-		for (size_t i = 0; i < splitN; i++) {
-			gmp::getNAFwidth(&b, naf[i], u[i], w);
-			assert(b); (void)b;
-		}
 		tbl[0][0] = P;
 		Frobenius(tbl[1][0], tbl[0][0]);
 		Frobenius(tbl[2][0], tbl[1][0]);
 		Frobenius(tbl[3][0], tbl[2][0]);
+		for (size_t i = 0; i < splitN; i++) {
+			gmp::getNAFwidth(&b, naf[i], u[i], w);
+			assert(b); (void)b;
+		}
 		{
 			T P2;
 			T::dbl(P2, P);
diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 8d79a7ee..dbbfacd3 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -1336,6 +1336,12 @@ struct GroupMtoA : public T {
 	{
 		T::mul(castT(z), castT(x), castT(y));
 	}
+	static void sub(GroupMtoA& z, const GroupMtoA& x, const GroupMtoA& y)
+	{
+		T r;
+		T::unitaryInv(r, castT(y));
+		T::mul(castT(z), castT(x), r);
+	}
 	static void dbl(GroupMtoA& y, const GroupMtoA& x)
 	{
 		T::sqr(castT(y), castT(x));
@@ -1363,6 +1369,10 @@ struct GroupMtoA : public T {
 	{
 		add(*this, *this, rhs);
 	}
+	void operator-=(const GroupMtoA& rhs)
+	{
+		sub(*this, *this, rhs);
+	}
 	void normalize() {}
 private:
 	bool isOne() const;

From 72a3ee5dc9f771de6098a57932946d81c2249e3c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Jul 2019 19:37:39 +0900
Subject: [PATCH 026/553] add FixedVec::value_type

---
 include/mcl/array.hpp    | 1 +
 include/mcl/gmp_util.hpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/array.hpp b/include/mcl/array.hpp
index 5fa49e64..33d4aabd 100644
--- a/include/mcl/array.hpp
+++ b/include/mcl/array.hpp
@@ -118,6 +118,7 @@ class FixedArray {
 		y = t;
 	}
 public:
+	typedef T value_type;
 	FixedArray() : n_(0) {}
 	bool resize(size_t n)
 	{
diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index 384c0520..0eeeb9c2 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -642,7 +642,7 @@ void getNAFwidth(bool *pb, Vec& naf, mpz_class x, size_t w)
 			x++;
 			v -= maxW;
 		}
-		naf.push(pb, v);
+		naf.push(pb, Vec::value_type(v));
 		if (!*pb) return;
 		zeroNum = w - 1;
 	}

From 8118e08dc27153dd3bac0264b1774e92e6173ad1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Jul 2019 19:37:39 +0900
Subject: [PATCH 027/553] add FixedVec::value_type

---
 include/mcl/array.hpp    | 1 +
 include/mcl/gmp_util.hpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/array.hpp b/include/mcl/array.hpp
index 5fa49e64..33d4aabd 100644
--- a/include/mcl/array.hpp
+++ b/include/mcl/array.hpp
@@ -118,6 +118,7 @@ class FixedArray {
 		y = t;
 	}
 public:
+	typedef T value_type;
 	FixedArray() : n_(0) {}
 	bool resize(size_t n)
 	{
diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index 384c0520..42ec6a70 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -642,7 +642,7 @@ void getNAFwidth(bool *pb, Vec& naf, mpz_class x, size_t w)
 			x++;
 			v -= maxW;
 		}
-		naf.push(pb, v);
+		naf.push(pb, typename Vec::value_type(v));
 		if (!*pb) return;
 		zeroNum = w - 1;
 	}

From 2762669e4c7f11198bce7a86290b80982447320c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Jul 2019 16:58:52 +0900
Subject: [PATCH 028/553] test of GLV for secp256k1

---
 test/ecdsa_test.cpp | 167 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 166 insertions(+), 1 deletion(-)

diff --git a/test/ecdsa_test.cpp b/test/ecdsa_test.cpp
index 332c9ee2..8a766eaa 100644
--- a/test/ecdsa_test.cpp
+++ b/test/ecdsa_test.cpp
@@ -1,4 +1,4 @@
-#define PUT(x) std::cout << #x "=" << x << std::endl;
+#define PUT(x) std::cout << #x "=" << (x) << std::endl;
 #include <stdlib.h>
 #include <stdio.h>
 void put(const void *buf, size_t bufSize)
@@ -15,9 +15,174 @@ void put(const void *buf, size_t bufSize)
 
 using namespace mcl::ecdsa;
 
+typedef mcl::FixedArray<int8_t, 256 / 2 + 2> NafArray;
+
+template<class G>
+void addTbl(G& Q, const G *tbl, const NafArray& naf, size_t i)
+{
+	if (i >= naf.size()) return;
+	int n = naf[i];
+	if (n > 0) {
+		Q += tbl[(n - 1) >> 1];
+	} else if (n < 0) {
+		Q -= tbl[(-n - 1) >> 1];
+	}
+}
+
+using namespace mcl;
+
+template<class G1>
+struct GLV1 {
+	Fp rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
+	size_t rBitSize;
+	mpz_class v0, v1;
+	mpz_class B[2][2];
+	mpz_class r;
+private:
+public:
+	bool operator==(const GLV1& rhs) const
+	{
+		return rw == rhs.rw && rBitSize == rhs.rBitSize && v0 == rhs.v0 && v1 == rhs.v1
+			&& B[0][0] == rhs.B[0][0] && B[0][1] == rhs.B[0][1] && B[1][0] == rhs.B[1][0]
+			&& B[1][1] == rhs.B[1][1] && r == rhs.r;
+	}
+	bool operator!=(const GLV1& rhs) const { return !operator==(rhs); }
+#ifndef CYBOZU_DONT_USE_STRING
+	void dump(const mpz_class& x) const
+	{
+		printf("\"%s\",\n", mcl::gmp::getStr(x, 16).c_str());
+	}
+	void dump() const
+	{
+		printf("\"%s\",\n", rw.getStr(16).c_str());
+		printf("%d,\n", (int)rBitSize);
+		dump(v0);
+		dump(v1);
+		dump(B[0][0]); dump(B[0][1]); dump(B[1][0]); dump(B[1][1]);
+		dump(r);
+	}
+#endif
+	void init(const mpz_class& r, const mpz_class& z, bool isBLS12 = false, int curveType = -1)
+	{
+	}
+	/*
+		L = lambda = p^4
+		L (x, y) = (rw x, y)
+	*/
+	void mulLambda(G1& Q, const G1& P) const
+	{
+		Fp::mul(Q.x, P.x, rw);
+		Q.y = P.y;
+		Q.z = P.z;
+	}
+	/*
+		x = a + b * lambda mod r
+	*/
+	void split(mpz_class& a, mpz_class& b, const mpz_class& x) const
+	{
+		mpz_class t;
+//		t = (x * v0) >> rBitSize;
+//		b = (x * v1) >> rBitSize;
+t = (B[1][1] * x) / r;
+b = (-B[0][1] * x) / r;
+		a = x - (t * B[0][0] + b * B[1][0]);
+		b = - (t * B[0][1] + b * B[1][1]);
+	}
+	void mul(G1& Q, const G1& P, mpz_class x, bool constTime = false) const
+	{
+		const int w = 5;
+		const size_t tblSize = 1 << (w - 2);
+		NafArray naf[2];
+		mpz_class u[2];
+		G1 tbl[2][tblSize];
+		bool b;
+
+		x %= r;
+		if (x == 0) {
+			Q.clear();
+			if (!constTime) return;
+		}
+		if (x < 0) {
+			x += r;
+		}
+		split(u[0], u[1], x);
+		gmp::getNAFwidth(&b, naf[0], u[0], w);
+		assert(b); (void)b;
+		gmp::getNAFwidth(&b, naf[1], u[1], w);
+		assert(b); (void)b;
+
+		tbl[0][0] = P;
+		mulLambda(tbl[1][0], tbl[0][0]);
+		{
+			G1 P2;
+			G1::dbl(P2, P);
+			for (size_t i = 1; i < tblSize; i++) {
+				G1::add(tbl[0][i], tbl[0][i - 1], P2);
+				mulLambda(tbl[1][i], tbl[0][i]);
+			}
+		}
+		const size_t maxBit = fp::max_(naf[0].size(), naf[1].size());
+		Q.clear();
+		for (size_t i = 0; i < maxBit; i++) {
+			G1::dbl(Q, Q);
+			addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
+			addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
+		}
+	}
+};
+
+static GLV1<Ec> glv1;
+
+inline void mulArrayEc(Ec& z, const Ec& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
+{
+	mpz_class s;
+	bool b;
+	mcl::gmp::setArray(&b, s, y, yn);
+	assert(b);
+	if (isNegative) s = -s;
+	glv1.mul(z, x, s, constTime);
+}
+
+void initGLV()
+{
+	const mcl::ecdsa::local::Param& p = mcl::ecdsa::local::getParam();
+	const mcl::EcParam& ecParam = p.ecParam;
+	{
+		Fp& rw = glv1.rw;
+		bool b = Fp::squareRoot(rw, -3);
+		assert(b);
+		printf("b=%d\n", b);
+		if (!b) exit(1);
+		rw = -(rw + 1) / 2;
+		glv1.r = ecParam.n;
+		glv1.rBitSize = gmp::getBitSize(glv1.r);
+		glv1.rBitSize = (glv1.rBitSize + fp::UnitBitSize - 1) & ~(fp::UnitBitSize - 1);
+		gmp::setStr(glv1.B[0][0], "0x3086d221a7d46bcde86c90e49284eb15");
+		gmp::setStr(glv1.B[0][1], "-0xe4437ed6010e88286f547fa90abfe4c3");
+		gmp::setStr(glv1.B[1][0], "0x114ca50f7a8e2f3f657c1108d9d44cfd8");
+		glv1.B[1][1] = glv1.B[0][0];
+		glv1.v0 = ((-glv1.B[1][1]) << glv1.rBitSize) / glv1.r;
+		glv1.v1 = ((glv1.B[1][0]) << glv1.rBitSize) / glv1.r;
+	}
+	PUT(p.P);
+	Ec Q1, Q2;
+	mpz_class L;
+	gmp::setStr(L, "0x5363ad4cc05c30e0a5261c028812645a122e22ea20816678df02967c1b23bd72");
+	PUT(L);
+	Ec::mul(Q1, p.P, L);
+	PUT(Q1);
+	glv1.mulLambda(Q2, p.P);
+	PUT(Q2);
+	PUT(Q1 == Q2);
+	// enable GLV
+	Ec::setMulArrayGLV(mulArrayEc);
+}
+
+
 CYBOZU_TEST_AUTO(ecdsa)
 {
 	init();
+	initGLV();
 	SecretKey sec;
 	PublicKey pub;
 	sec.setByCSPRNG();

From 53028c4a98817ad62ac916ad90c008d4a13cc567 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Jul 2019 14:56:45 +0900
Subject: [PATCH 029/553] GLV1 is template class

---
 include/mcl/bn.hpp  | 86 ++++++++++++++++++++++++---------------------
 test/ecdsa_test.cpp | 12 +++----
 test/glv_test.cpp   | 20 +++++------
 3 files changed, 62 insertions(+), 56 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 02f58f20..3c5959b2 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -577,14 +577,15 @@ void addTbl(G& Q, const G *tbl, const NafArray& naf, size_t i)
 	Software implementation of Attribute-Based Encryption: Appendixes
 	GLV for G1 on BN/BLS12
 */
-struct GLV1 {
-	Fp rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
-	size_t rBitSize;
-	mpz_class v0, v1;
-	mpz_class B[2][2];
-	mpz_class r;
+template<class F, class G>
+struct GLV1T {
+	static F rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
+	static size_t rBitSize;
+	static mpz_class v0, v1;
+	static mpz_class B[2][2];
+	static mpz_class r;
 private:
-	bool usePrecomputedTable(int curveType)
+	static bool usePrecomputedTable(int curveType)
 	{
 		if (curveType < 0) return false;
 		const struct Tbl {
@@ -631,19 +632,12 @@ struct GLV1 {
 		return false;
 	}
 public:
-	bool operator==(const GLV1& rhs) const
-	{
-		return rw == rhs.rw && rBitSize == rhs.rBitSize && v0 == rhs.v0 && v1 == rhs.v1
-			&& B[0][0] == rhs.B[0][0] && B[0][1] == rhs.B[0][1] && B[1][0] == rhs.B[1][0]
-			&& B[1][1] == rhs.B[1][1] && r == rhs.r;
-	}
-	bool operator!=(const GLV1& rhs) const { return !operator==(rhs); }
 #ifndef CYBOZU_DONT_USE_STRING
-	void dump(const mpz_class& x) const
+	static void dump(const mpz_class& x)
 	{
 		printf("\"%s\",\n", mcl::gmp::getStr(x, 16).c_str());
 	}
-	void dump() const
+	static void dump()
 	{
 		printf("\"%s\",\n", rw.getStr(16).c_str());
 		printf("%d,\n", (int)rBitSize);
@@ -653,14 +647,14 @@ struct GLV1 {
 		dump(r);
 	}
 #endif
-	void init(const mpz_class& r, const mpz_class& z, bool isBLS12 = false, int curveType = -1)
+	static void init(const mpz_class& _r, const mpz_class& z, bool isBLS12 = false, int curveType = -1)
 	{
 		if (usePrecomputedTable(curveType)) return;
-		bool b = Fp::squareRoot(rw, -3);
+		bool b = F::squareRoot(rw, -3);
 		assert(b);
 		(void)b;
 		rw = -(rw + 1) / 2;
-		this->r = r;
+		r = _r;
 		rBitSize = gmp::getBitSize(r);
 		rBitSize = (rBitSize + fp::UnitBitSize - 1) & ~(fp::UnitBitSize - 1);// a little better size
 		if (isBLS12) {
@@ -694,16 +688,16 @@ struct GLV1 {
 		L = lambda = p^4
 		L (x, y) = (rw x, y)
 	*/
-	void mulLambda(G1& Q, const G1& P) const
+	static void mulLambda(G& Q, const G& P)
 	{
-		Fp::mul(Q.x, P.x, rw);
+		F::mul(Q.x, P.x, rw);
 		Q.y = P.y;
 		Q.z = P.z;
 	}
 	/*
 		x = a + b * lambda mod r
 	*/
-	void split(mpz_class& a, mpz_class& b, const mpz_class& x) const
+	static void split(mpz_class& a, mpz_class& b, const mpz_class& x)
 	{
 		mpz_class t;
 		t = (x * v0) >> rBitSize;
@@ -711,13 +705,13 @@ struct GLV1 {
 		a = x - (t * B[0][0] + b * B[1][0]);
 		b = - (t * B[0][1] + b * B[1][1]);
 	}
-	void mul(G1& Q, const G1& P, mpz_class x, bool constTime = false) const
+	static void mul(G& Q, const G& P, mpz_class x, bool constTime = false)
 	{
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
 		NafArray naf[2];
 		mpz_class u[2];
-		G1 tbl[2][tblSize];
+		G tbl[2][tblSize];
 		bool b;
 
 		x %= r;
@@ -737,22 +731,43 @@ struct GLV1 {
 		tbl[0][0] = P;
 		mulLambda(tbl[1][0], tbl[0][0]);
 		{
-			G1 P2;
-			G1::dbl(P2, P);
+			G P2;
+			G::dbl(P2, P);
 			for (size_t i = 1; i < tblSize; i++) {
-				G1::add(tbl[0][i], tbl[0][i - 1], P2);
+				G::add(tbl[0][i], tbl[0][i - 1], P2);
 				mulLambda(tbl[1][i], tbl[0][i]);
 			}
 		}
 		const size_t maxBit = fp::max_(naf[0].size(), naf[1].size());
 		Q.clear();
 		for (size_t i = 0; i < maxBit; i++) {
-			G1::dbl(Q, Q);
+			G::dbl(Q, Q);
 			addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
 			addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
 		}
 	}
+	static void mulArray(G& z, const G& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
+	{
+		mpz_class s;
+		bool b;
+		mcl::gmp::setArray(&b, s, y, yn);
+		assert(b);
+		if (isNegative) s = -s;
+		mul(z, x, s, constTime);
+	}
 };
+template<class F, class G>
+F GLV1T<F, G>::rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
+template<class F, class G>
+size_t GLV1T<F, G>::rBitSize;
+template<class F, class G>
+mpz_class GLV1T<F, G>::v0;
+template<class F, class G>
+mpz_class GLV1T<F, G>::v1;
+template<class F, class G>
+mpz_class GLV1T<F, G>::B[2][2];
+template<class F, class G>
+mpz_class GLV1T<F, G>::r;
 
 /*
 	GLV method for G2 and GT on BN/BLS12
@@ -1035,7 +1050,7 @@ struct Param {
 	mpz_class p;
 	mpz_class r;
 	local::MapTo mapTo;
-	local::GLV1 glv1;
+	typedef local::GLV1T<Fp, G1> GLV1;
 	local::GLV2 glv2;
 	// for G2 Frobenius
 	Fp2 g2;
@@ -1151,7 +1166,7 @@ struct Param {
 		} else {
 			mapTo.init(2 * p - r, z, cp.curveType);
 		}
-		glv1.init(r, z, isBLS12, cp.curveType);
+		GLV1::init(r, z, isBLS12, cp.curveType);
 		glv2.init(r, z, isBLS12);
 		basePoint.clear();
 		*pb = true;
@@ -1200,15 +1215,6 @@ static const local::Param& param = local::StaticVar<>::param;
 
 namespace local {
 
-inline void mulArrayGLV1(G1& z, const G1& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
-{
-	mpz_class s;
-	bool b;
-	mcl::gmp::setArray(&b, s, y, yn);
-	assert(b);
-	if (isNegative) s = -s;
-	BN::param.glv1.mul(z, x, s, constTime);
-}
 inline void mulArrayGLV2(G2& z, const G2& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 {
 	mpz_class s;
@@ -2227,7 +2233,7 @@ inline void init(bool *pb, const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode
 {
 	local::StaticVar<>::param.init(pb, cp, mode);
 	if (!*pb) return;
-	G1::setMulArrayGLV(local::mulArrayGLV1);
+	G1::setMulArrayGLV(bn::local::Param::GLV1::mulArray);
 	G2::setMulArrayGLV(local::mulArrayGLV2);
 	Fp12::setPowArrayGLV(local::powArrayGLV2);
 	G1::setCompressedExpression();
diff --git a/test/ecdsa_test.cpp b/test/ecdsa_test.cpp
index 8a766eaa..28f689e4 100644
--- a/test/ecdsa_test.cpp
+++ b/test/ecdsa_test.cpp
@@ -81,10 +81,10 @@ struct GLV1 {
 	void split(mpz_class& a, mpz_class& b, const mpz_class& x) const
 	{
 		mpz_class t;
-//		t = (x * v0) >> rBitSize;
-//		b = (x * v1) >> rBitSize;
-t = (B[1][1] * x) / r;
-b = (-B[0][1] * x) / r;
+		t = (x * v0) >> rBitSize;
+		b = (x * v1) >> rBitSize;
+//t = (B[1][1] * x) / r;
+//b = (-B[0][1] * x) / r;
 		a = x - (t * B[0][0] + b * B[1][0]);
 		b = - (t * B[0][1] + b * B[1][1]);
 	}
@@ -161,8 +161,8 @@ void initGLV()
 		gmp::setStr(glv1.B[0][1], "-0xe4437ed6010e88286f547fa90abfe4c3");
 		gmp::setStr(glv1.B[1][0], "0x114ca50f7a8e2f3f657c1108d9d44cfd8");
 		glv1.B[1][1] = glv1.B[0][0];
-		glv1.v0 = ((-glv1.B[1][1]) << glv1.rBitSize) / glv1.r;
-		glv1.v1 = ((glv1.B[1][0]) << glv1.rBitSize) / glv1.r;
+		glv1.v0 = ((glv1.B[1][1]) << glv1.rBitSize) / glv1.r;
+		glv1.v1 = ((-glv1.B[0][1]) << glv1.rBitSize) / glv1.r;
 	}
 	PUT(p.P);
 	Ec Q1, Q2;
diff --git a/test/glv_test.cpp b/test/glv_test.cpp
index 0e6fccde..79d378f4 100644
--- a/test/glv_test.cpp
+++ b/test/glv_test.cpp
@@ -77,7 +77,7 @@ struct oldGLV {
 };
 
 template<class GLV1, class GLV2>
-void compareLength(const GLV1& rhs, const GLV2& lhs)
+void compareLength(const GLV2& lhs)
 {
 	cybozu::XorShift rg;
 	int lt = 0;
@@ -88,7 +88,7 @@ void compareLength(const GLV1& rhs, const GLV2& lhs)
 	for (int i = 1; i < 1000; i++) {
 		r.setRand(rg);
 		x = r.getMpz();
-		rhs.split(R0, R1, x);
+		GLV1::split(R0, R1, x);
 		lhs.split(L0, L1, x);
 
 		size_t R0n = mcl::gmp::getBitSize(R0);
@@ -121,10 +121,10 @@ void testGLV1()
 		oldGlv.init(BN::param.r, BN::param.z);
 	}
 
-	mcl::bn::local::GLV1 glv;
-	glv.init(BN::param.r, BN::param.z, BN::param.isBLS12);
+	typedef mcl::bn::local::Param::GLV1 GLV1;
+	GLV1::init(BN::param.r, BN::param.z, BN::param.isBLS12);
 	if (!BN::param.isBLS12) {
-		compareLength(glv, oldGlv);
+		compareLength<GLV1>(oldGlv);
 	}
 
 	for (int i = 1; i < 100; i++) {
@@ -133,9 +133,9 @@ void testGLV1()
 		s.setRand(rg);
 		mpz_class ss = s.getMpz();
 		G1::mulGeneric(P1, P0, ss);
-		glv.mul(P2, P0, ss);
+		GLV1::mul(P2, P0, ss);
 		CYBOZU_TEST_EQUAL(P1, P2);
-		glv.mul(P2, P0, ss, true);
+		GLV1::mul(P2, P0, ss, true);
 		CYBOZU_TEST_EQUAL(P1, P2);
 		if (!BN::param.isBLS12) {
 			oldGlv.mul(P2, P0, ss);
@@ -145,15 +145,15 @@ void testGLV1()
 	for (int i = -100; i < 100; i++) {
 		mpz_class ss = i;
 		G1::mulGeneric(P1, P0, ss);
-		glv.mul(P2, P0, ss);
+		GLV1::mul(P2, P0, ss);
 		CYBOZU_TEST_EQUAL(P1, P2);
-		glv.mul(P2, P0, ss, true);
+		GLV1::mul(P2, P0, ss, true);
 		CYBOZU_TEST_EQUAL(P1, P2);
 	}
 	Fr s;
 	mapToG1(P0, 123);
 	CYBOZU_BENCH_C("Ec::mul", 100, P1 = P0; s.setRand(rg); G1::mulGeneric, P2, P1, s.getMpz());
-	CYBOZU_BENCH_C("Ec::glv", 100, P1 = P0; s.setRand(rg); glv.mul, P2, P1, s.getMpz());
+	CYBOZU_BENCH_C("Ec::glv", 100, P1 = P0; s.setRand(rg); GLV1::mul, P2, P1, s.getMpz());
 }
 
 /*

From 14465da20501105f6d492d7d06fef68fa4965b9d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Jul 2019 15:44:59 +0900
Subject: [PATCH 030/553] move GLV1 to ec.hpp

---
 include/mcl/bn.hpp | 263 ++++-----------------------------------------
 include/mcl/ec.hpp | 125 +++++++++++++++++++++
 test/glv_test.cpp  |   6 +-
 3 files changed, 146 insertions(+), 248 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 3c5959b2..d1cb9f7f 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -561,30 +561,13 @@ struct MapTo {
 };
 
 typedef mcl::FixedArray<int8_t, MCL_MAX_FR_BIT_SIZE / 2 + 2> NafArray;
-template<class G>
-void addTbl(G& Q, const G *tbl, const NafArray& naf, size_t i)
-{
-	if (i >= naf.size()) return;
-	int n = naf[i];
-	if (n > 0) {
-		Q += tbl[(n - 1) >> 1];
-	} else if (n < 0) {
-		Q -= tbl[(-n - 1) >> 1];
-	}
-}
 
 /*
 	Software implementation of Attribute-Based Encryption: Appendixes
 	GLV for G1 on BN/BLS12
 */
-template<class F, class G>
-struct GLV1T {
-	static F rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
-	static size_t rBitSize;
-	static mpz_class v0, v1;
-	static mpz_class B[2][2];
-	static mpz_class r;
-private:
+
+struct GLV1 : mcl::GLV1T<Fp, G1> {
 	static bool usePrecomputedTable(int curveType)
 	{
 		if (curveType < 0) return false;
@@ -620,37 +603,21 @@ struct GLV1T {
 			bool b;
 			rw.setStr(&b, tbl[i].rw, 16); if (!b) continue;
 			rBitSize = tbl[i].rBitSize;
-			mcl::gmp::setStr(&b, v0, tbl[i].v0, 16); if (!b) continue;
-			mcl::gmp::setStr(&b, v1, tbl[i].v1, 16); if (!b) continue;
-			mcl::gmp::setStr(&b, B[0][0], tbl[i].B[0][0], 16); if (!b) continue;
-			mcl::gmp::setStr(&b, B[0][1], tbl[i].B[0][1], 16); if (!b) continue;
-			mcl::gmp::setStr(&b, B[1][0], tbl[i].B[1][0], 16); if (!b) continue;
-			mcl::gmp::setStr(&b, B[1][1], tbl[i].B[1][1], 16); if (!b) continue;
-			mcl::gmp::setStr(&b, r, tbl[i].r, 16); if (!b) continue;
+			gmp::setStr(&b, v0, tbl[i].v0, 16); if (!b) continue;
+			gmp::setStr(&b, v1, tbl[i].v1, 16); if (!b) continue;
+			gmp::setStr(&b, B[0][0], tbl[i].B[0][0], 16); if (!b) continue;
+			gmp::setStr(&b, B[0][1], tbl[i].B[0][1], 16); if (!b) continue;
+			gmp::setStr(&b, B[1][0], tbl[i].B[1][0], 16); if (!b) continue;
+			gmp::setStr(&b, B[1][1], tbl[i].B[1][1], 16); if (!b) continue;
+			gmp::setStr(&b, r, tbl[i].r, 16); if (!b) continue;
 			return true;
 		}
 		return false;
 	}
-public:
-#ifndef CYBOZU_DONT_USE_STRING
-	static void dump(const mpz_class& x)
-	{
-		printf("\"%s\",\n", mcl::gmp::getStr(x, 16).c_str());
-	}
-	static void dump()
-	{
-		printf("\"%s\",\n", rw.getStr(16).c_str());
-		printf("%d,\n", (int)rBitSize);
-		dump(v0);
-		dump(v1);
-		dump(B[0][0]); dump(B[0][1]); dump(B[1][0]); dump(B[1][1]);
-		dump(r);
-	}
-#endif
-	static void init(const mpz_class& _r, const mpz_class& z, bool isBLS12 = false, int curveType = -1)
+	static void initForBN(const mpz_class& _r, const mpz_class& z, bool isBLS12 = false, int curveType = -1)
 	{
 		if (usePrecomputedTable(curveType)) return;
-		bool b = F::squareRoot(rw, -3);
+		bool b = Fp::squareRoot(rw, -3);
 		assert(b);
 		(void)b;
 		rw = -(rw + 1) / 2;
@@ -684,90 +651,7 @@ struct GLV1T {
 		v0 = ((-B[1][1]) << rBitSize) / r;
 		v1 = ((B[1][0]) << rBitSize) / r;
 	}
-	/*
-		L = lambda = p^4
-		L (x, y) = (rw x, y)
-	*/
-	static void mulLambda(G& Q, const G& P)
-	{
-		F::mul(Q.x, P.x, rw);
-		Q.y = P.y;
-		Q.z = P.z;
-	}
-	/*
-		x = a + b * lambda mod r
-	*/
-	static void split(mpz_class& a, mpz_class& b, const mpz_class& x)
-	{
-		mpz_class t;
-		t = (x * v0) >> rBitSize;
-		b = (x * v1) >> rBitSize;
-		a = x - (t * B[0][0] + b * B[1][0]);
-		b = - (t * B[0][1] + b * B[1][1]);
-	}
-	static void mul(G& Q, const G& P, mpz_class x, bool constTime = false)
-	{
-		const int w = 5;
-		const size_t tblSize = 1 << (w - 2);
-		NafArray naf[2];
-		mpz_class u[2];
-		G tbl[2][tblSize];
-		bool b;
-
-		x %= r;
-		if (x == 0) {
-			Q.clear();
-			if (!constTime) return;
-		}
-		if (x < 0) {
-			x += r;
-		}
-		split(u[0], u[1], x);
-		gmp::getNAFwidth(&b, naf[0], u[0], w);
-		assert(b); (void)b;
-		gmp::getNAFwidth(&b, naf[1], u[1], w);
-		assert(b); (void)b;
-
-		tbl[0][0] = P;
-		mulLambda(tbl[1][0], tbl[0][0]);
-		{
-			G P2;
-			G::dbl(P2, P);
-			for (size_t i = 1; i < tblSize; i++) {
-				G::add(tbl[0][i], tbl[0][i - 1], P2);
-				mulLambda(tbl[1][i], tbl[0][i]);
-			}
-		}
-		const size_t maxBit = fp::max_(naf[0].size(), naf[1].size());
-		Q.clear();
-		for (size_t i = 0; i < maxBit; i++) {
-			G::dbl(Q, Q);
-			addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
-			addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
-		}
-	}
-	static void mulArray(G& z, const G& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
-	{
-		mpz_class s;
-		bool b;
-		mcl::gmp::setArray(&b, s, y, yn);
-		assert(b);
-		if (isNegative) s = -s;
-		mul(z, x, s, constTime);
-	}
 };
-template<class F, class G>
-F GLV1T<F, G>::rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
-template<class F, class G>
-size_t GLV1T<F, G>::rBitSize;
-template<class F, class G>
-mpz_class GLV1T<F, G>::v0;
-template<class F, class G>
-mpz_class GLV1T<F, G>::v1;
-template<class F, class G>
-mpz_class GLV1T<F, G>::B[2][2];
-template<class F, class G>
-mpz_class GLV1T<F, G>::r;
 
 /*
 	GLV method for G2 and GT on BN/BLS12
@@ -787,7 +671,7 @@ struct GLV2 {
 		this->z = z;
 		this->abs_z = z < 0 ? -z : z;
 		this->isBLS12 = isBLS12;
-		rBitSize = mcl::gmp::getBitSize(r);
+		rBitSize = gmp::getBitSize(r);
 		rBitSize = (rBitSize + mcl::fp::UnitBitSize - 1) & ~(mcl::fp::UnitBitSize - 1);// a little better size
 		mpz_class z2p1 = z * 2 + 1;
 		B[0][0] = z + 1;
@@ -874,7 +758,6 @@ struct GLV2 {
 	template<class T>
 	void mul(T& Q, const T& P, mpz_class x, bool constTime = false) const
 	{
-#if 1
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
 		const size_t splitN = 4;
@@ -917,120 +800,11 @@ struct GLV2 {
 		Q.clear();
 		for (size_t i = 0; i < maxBit; i++) {
 			T::dbl(Q, Q);
-			addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
-			addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
-			addTbl(Q, tbl[2], naf[2], maxBit - 1 - i);
-			addTbl(Q, tbl[3], naf[3], maxBit - 1 - i);
-		}
-#else
-#if 0 // #ifndef NDEBUG
-		{
-			T R;
-			T::mulGeneric(R, P, r);
-			assert(R.isZero());
-		}
-#endif
-		typedef mcl::fp::Unit Unit;
-		const size_t maxUnit = 512 / 2 / mcl::fp::UnitBitSize;
-		const int splitN = 4;
-		mpz_class u[splitN];
-		T in[splitN];
-		T tbl[16];
-		int bitTbl[splitN]; // bit size of u[i]
-		Unit w[splitN][maxUnit]; // unit array of u[i]
-		int maxBit = 0; // max bit of u[i]
-		int maxN = 0;
-		int remainBit = 0;
-
-		x %= r;
-		if (x == 0) {
-			Q.clear();
-			if (constTime) goto DummyLoop;
-			return;
+			mcl::local::addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
+			mcl::local::addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
+			mcl::local::addTbl(Q, tbl[2], naf[2], maxBit - 1 - i);
+			mcl::local::addTbl(Q, tbl[3], naf[3], maxBit - 1 - i);
 		}
-		if (x < 0) {
-			x += r;
-		}
-		split(u, x);
-		in[0] = P;
-		Frobenius(in[1], in[0]);
-		Frobenius(in[2], in[1]);
-		Frobenius(in[3], in[2]);
-		for (int i = 0; i < splitN; i++) {
-			if (u[i] < 0) {
-				u[i] = -u[i];
-				T::neg(in[i], in[i]);
-			}
-//			in[i].normalize(); // slow
-		}
-#if 0
-		for (int i = 0; i < splitN; i++) {
-			T::mulGeneric(in[i], in[i], u[i]);
-		}
-		T::add(Q, in[0], in[1]);
-		Q += in[2];
-		Q += in[3];
-		return;
-#else
-		tbl[0] = in[0];
-		for (size_t i = 1; i < 16; i++) {
-			tbl[i].clear();
-			if (i & 1) {
-				tbl[i] += in[0];
-			}
-			if (i & 2) {
-				tbl[i] += in[1];
-			}
-			if (i & 4) {
-				tbl[i] += in[2];
-			}
-			if (i & 8) {
-				tbl[i] += in[3];
-			}
-//			tbl[i].normalize();
-		}
-		for (int i = 0; i < splitN; i++) {
-			bool b;
-			mcl::gmp::getArray(&b, w[i], maxUnit, u[i]);
-			assert(b);
-			bitTbl[i] = (int)mcl::gmp::getBitSize(u[i]);
-			maxBit = fp::max_(maxBit, bitTbl[i]);
-		}
-		maxBit--;
-		/*
-			maxBit = maxN * UnitBitSize + remainBit
-			0 < remainBit <= UnitBitSize
-		*/
-		maxN = maxBit / mcl::fp::UnitBitSize;
-		remainBit = maxBit % mcl::fp::UnitBitSize;
-		remainBit++;
-		Q.clear();
-		for (int i = maxN; i >= 0; i--) {
-			for (int j = remainBit - 1; j >= 0; j--) {
-				T::dbl(Q, Q);
-				uint32_t b0 = (w[0][i] >> j) & 1;
-				uint32_t b1 = (w[1][i] >> j) & 1;
-				uint32_t b2 = (w[2][i] >> j) & 1;
-				uint32_t b3 = (w[3][i] >> j) & 1;
-				uint32_t c = b3 * 8 + b2 * 4 + b1 * 2 + b0;
-				if (c == 0) {
-					if (constTime) tbl[0] += tbl[1];
-				} else {
-					Q += tbl[c];
-				}
-			}
-			remainBit = (int)mcl::fp::UnitBitSize;
-		}
-#endif
-	DummyLoop:
-		if (!constTime) return;
-		const int limitBit = (int)rBitSize / splitN;
-		T D = tbl[0];
-		for (int i = maxBit + 1; i < limitBit; i++) {
-			T::dbl(D, D);
-			D += tbl[0];
-		}
-#endif
 	}
 	void pow(Fp12& z, const Fp12& x, mpz_class y, bool constTime = false) const
 	{
@@ -1050,7 +824,6 @@ struct Param {
 	mpz_class p;
 	mpz_class r;
 	local::MapTo mapTo;
-	typedef local::GLV1T<Fp, G1> GLV1;
 	local::GLV2 glv2;
 	// for G2 Frobenius
 	Fp2 g2;
@@ -1166,7 +939,7 @@ struct Param {
 		} else {
 			mapTo.init(2 * p - r, z, cp.curveType);
 		}
-		GLV1::init(r, z, isBLS12, cp.curveType);
+		GLV1::initForBN(r, z, isBLS12, cp.curveType);
 		glv2.init(r, z, isBLS12);
 		basePoint.clear();
 		*pb = true;
@@ -2233,7 +2006,7 @@ inline void init(bool *pb, const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode
 {
 	local::StaticVar<>::param.init(pb, cp, mode);
 	if (!*pb) return;
-	G1::setMulArrayGLV(bn::local::Param::GLV1::mulArray);
+	G1::setMulArrayGLV(local::GLV1::mulArray);
 	G2::setMulArrayGLV(local::mulArrayGLV2);
 	Fp12::setPowArrayGLV(local::powArrayGLV2);
 	G1::setCompressedExpression();
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index ad6e6db4..115a8dec 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -10,6 +10,7 @@
 #include <cybozu/exception.hpp>
 #include <mcl/op.hpp>
 #include <mcl/util.hpp>
+#include <mcl/array.hpp>
 
 //#define MCL_EC_USE_AFFINE
 
@@ -1068,6 +1069,130 @@ template<class Fp> void (*EcT<Fp>::mulArrayGLV)(EcT& z, const EcT& x, const fp::
 template<class Fp> int EcT<Fp>::mode_;
 #endif
 
+namespace local {
+
+template<class G, class Vec>
+void addTbl(G& Q, const G *tbl, const Vec& naf, size_t i)
+{
+	if (i >= naf.size()) return;
+	int n = naf[i];
+	if (n > 0) {
+		Q += tbl[(n - 1) >> 1];
+	} else if (n < 0) {
+		Q -= tbl[(-n - 1) >> 1];
+	}
+}
+
+} // mcl::local
+
+template<class F, class G>
+struct GLV1T {
+	static F rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
+	static size_t rBitSize;
+	static mpz_class v0, v1;
+	static mpz_class B[2][2];
+	static mpz_class r;
+public:
+#ifndef CYBOZU_DONT_USE_STRING
+	static void dump(const mpz_class& x)
+	{
+		printf("\"%s\",\n", mcl::gmp::getStr(x, 16).c_str());
+	}
+	static void dump()
+	{
+		printf("\"%s\",\n", rw.getStr(16).c_str());
+		printf("%d,\n", (int)rBitSize);
+		dump(v0);
+		dump(v1);
+		dump(B[0][0]); dump(B[0][1]); dump(B[1][0]); dump(B[1][1]);
+		dump(r);
+	}
+#endif
+	/*
+		initGLV1() is defined in bn.hpp
+	*/
+	/*
+		L = lambda = p^4
+		L (x, y) = (rw x, y)
+	*/
+	static void mulLambda(G& Q, const G& P)
+	{
+		F::mul(Q.x, P.x, rw);
+		Q.y = P.y;
+		Q.z = P.z;
+	}
+	/*
+		x = a + b * lambda mod r
+	*/
+	static void split(mpz_class& a, mpz_class& b, const mpz_class& x)
+	{
+		mpz_class t;
+		t = (x * v0) >> rBitSize;
+		b = (x * v1) >> rBitSize;
+		a = x - (t * B[0][0] + b * B[1][0]);
+		b = - (t * B[0][1] + b * B[1][1]);
+	}
+	static void mul(G& Q, const G& P, mpz_class x, bool constTime = false)
+	{
+		const int w = 5;
+		const size_t tblSize = 1 << (w - 2);
+		typedef mcl::FixedArray<int8_t, sizeof(G) * 8 / 2 + 2> NafArray;
+		NafArray naf[2];
+		mpz_class u[2];
+		G tbl[2][tblSize];
+		bool b;
+
+		x %= r;
+		if (x == 0) {
+			Q.clear();
+			if (!constTime) return;
+		}
+		if (x < 0) {
+			x += r;
+		}
+		split(u[0], u[1], x);
+		gmp::getNAFwidth(&b, naf[0], u[0], w);
+		assert(b); (void)b;
+		gmp::getNAFwidth(&b, naf[1], u[1], w);
+		assert(b); (void)b;
+
+		tbl[0][0] = P;
+		mulLambda(tbl[1][0], tbl[0][0]);
+		{
+			G P2;
+			G::dbl(P2, P);
+			for (size_t i = 1; i < tblSize; i++) {
+				G::add(tbl[0][i], tbl[0][i - 1], P2);
+				mulLambda(tbl[1][i], tbl[0][i]);
+			}
+		}
+		const size_t maxBit = fp::max_(naf[0].size(), naf[1].size());
+		Q.clear();
+		for (size_t i = 0; i < maxBit; i++) {
+			G::dbl(Q, Q);
+			local::addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
+			local::addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
+		}
+	}
+	static void mulArray(G& z, const G& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
+	{
+		mpz_class s;
+		bool b;
+		mcl::gmp::setArray(&b, s, y, yn);
+		assert(b);
+		if (isNegative) s = -s;
+		mul(z, x, s, constTime);
+	}
+};
+
+// rw = 1 / w = (-1 - sqrt(-3)) / 2
+template<class F, class G> F GLV1T<F, G>::rw;
+template<class F, class G> size_t GLV1T<F, G>::rBitSize;
+template<class F, class G> mpz_class GLV1T<F, G>::v0;
+template<class F, class G> mpz_class GLV1T<F, G>::v1;
+template<class F, class G> mpz_class GLV1T<F, G>::B[2][2];
+template<class F, class G> mpz_class GLV1T<F, G>::r;
+
 struct EcParam {
 	const char *name;
 	const char *p;
diff --git a/test/glv_test.cpp b/test/glv_test.cpp
index 79d378f4..61f2062d 100644
--- a/test/glv_test.cpp
+++ b/test/glv_test.cpp
@@ -88,7 +88,7 @@ void compareLength(const GLV2& lhs)
 	for (int i = 1; i < 1000; i++) {
 		r.setRand(rg);
 		x = r.getMpz();
-		GLV1::split(R0, R1, x);
+		mcl::bn::local::GLV1::split(R0, R1, x);
 		lhs.split(L0, L1, x);
 
 		size_t R0n = mcl::gmp::getBitSize(R0);
@@ -121,8 +121,8 @@ void testGLV1()
 		oldGlv.init(BN::param.r, BN::param.z);
 	}
 
-	typedef mcl::bn::local::Param::GLV1 GLV1;
-	GLV1::init(BN::param.r, BN::param.z, BN::param.isBLS12);
+	typedef mcl::bn::local::GLV1 GLV1;
+	GLV1::initForBN(BN::param.r, BN::param.z, BN::param.isBLS12);
 	if (!BN::param.isBLS12) {
 		compareLength<GLV1>(oldGlv);
 	}

From 626663b8db188819985dffe88eddb2c33d20441d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Jul 2019 16:44:21 +0900
Subject: [PATCH 031/553] secp256k1 supports GLV method

---
 include/mcl/ec.hpp  |  27 +++++--
 test/ecdsa_test.cpp | 167 +-------------------------------------------
 2 files changed, 24 insertions(+), 170 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 115a8dec..dd771937 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -10,7 +10,6 @@
 #include <cybozu/exception.hpp>
 #include <mcl/op.hpp>
 #include <mcl/util.hpp>
-#include <mcl/array.hpp>
 
 //#define MCL_EC_USE_AFFINE
 
@@ -1109,10 +1108,6 @@ struct GLV1T {
 	}
 #endif
 	/*
-		initGLV1() is defined in bn.hpp
-	*/
-	/*
-		L = lambda = p^4
 		L (x, y) = (rw x, y)
 	*/
 	static void mulLambda(G& Q, const G& P)
@@ -1183,6 +1178,28 @@ struct GLV1T {
 		if (isNegative) s = -s;
 		mul(z, x, s, constTime);
 	}
+	/*
+		initForBN() is defined in bn.hpp
+	*/
+	static void initForSecp256k1(const mpz_class& _r)
+	{
+		bool b = F::squareRoot(rw, -3);
+		assert(b);
+		(void)b;
+		rw = -(rw + 1) / 2;
+		r = _r;
+		rBitSize = gmp::getBitSize(r);
+		rBitSize = (rBitSize + fp::UnitBitSize - 1) & ~(fp::UnitBitSize - 1);
+		gmp::setStr(&b, B[0][0], "0x3086d221a7d46bcde86c90e49284eb15");
+		assert(b); (void)b;
+		gmp::setStr(&b, B[0][1], "-0xe4437ed6010e88286f547fa90abfe4c3");
+		assert(b); (void)b;
+		gmp::setStr(&b, B[1][0], "0x114ca50f7a8e2f3f657c1108d9d44cfd8");
+		assert(b); (void)b;
+		B[1][1] = B[0][0];
+		v0 = ((B[1][1]) << rBitSize) / r;
+		v1 = ((-B[0][1]) << rBitSize) / r;
+	}
 };
 
 // rw = 1 / w = (-1 - sqrt(-3)) / 2
diff --git a/test/ecdsa_test.cpp b/test/ecdsa_test.cpp
index 28f689e4..f0155148 100644
--- a/test/ecdsa_test.cpp
+++ b/test/ecdsa_test.cpp
@@ -15,174 +15,11 @@ void put(const void *buf, size_t bufSize)
 
 using namespace mcl::ecdsa;
 
-typedef mcl::FixedArray<int8_t, 256 / 2 + 2> NafArray;
-
-template<class G>
-void addTbl(G& Q, const G *tbl, const NafArray& naf, size_t i)
-{
-	if (i >= naf.size()) return;
-	int n = naf[i];
-	if (n > 0) {
-		Q += tbl[(n - 1) >> 1];
-	} else if (n < 0) {
-		Q -= tbl[(-n - 1) >> 1];
-	}
-}
-
-using namespace mcl;
-
-template<class G1>
-struct GLV1 {
-	Fp rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
-	size_t rBitSize;
-	mpz_class v0, v1;
-	mpz_class B[2][2];
-	mpz_class r;
-private:
-public:
-	bool operator==(const GLV1& rhs) const
-	{
-		return rw == rhs.rw && rBitSize == rhs.rBitSize && v0 == rhs.v0 && v1 == rhs.v1
-			&& B[0][0] == rhs.B[0][0] && B[0][1] == rhs.B[0][1] && B[1][0] == rhs.B[1][0]
-			&& B[1][1] == rhs.B[1][1] && r == rhs.r;
-	}
-	bool operator!=(const GLV1& rhs) const { return !operator==(rhs); }
-#ifndef CYBOZU_DONT_USE_STRING
-	void dump(const mpz_class& x) const
-	{
-		printf("\"%s\",\n", mcl::gmp::getStr(x, 16).c_str());
-	}
-	void dump() const
-	{
-		printf("\"%s\",\n", rw.getStr(16).c_str());
-		printf("%d,\n", (int)rBitSize);
-		dump(v0);
-		dump(v1);
-		dump(B[0][0]); dump(B[0][1]); dump(B[1][0]); dump(B[1][1]);
-		dump(r);
-	}
-#endif
-	void init(const mpz_class& r, const mpz_class& z, bool isBLS12 = false, int curveType = -1)
-	{
-	}
-	/*
-		L = lambda = p^4
-		L (x, y) = (rw x, y)
-	*/
-	void mulLambda(G1& Q, const G1& P) const
-	{
-		Fp::mul(Q.x, P.x, rw);
-		Q.y = P.y;
-		Q.z = P.z;
-	}
-	/*
-		x = a + b * lambda mod r
-	*/
-	void split(mpz_class& a, mpz_class& b, const mpz_class& x) const
-	{
-		mpz_class t;
-		t = (x * v0) >> rBitSize;
-		b = (x * v1) >> rBitSize;
-//t = (B[1][1] * x) / r;
-//b = (-B[0][1] * x) / r;
-		a = x - (t * B[0][0] + b * B[1][0]);
-		b = - (t * B[0][1] + b * B[1][1]);
-	}
-	void mul(G1& Q, const G1& P, mpz_class x, bool constTime = false) const
-	{
-		const int w = 5;
-		const size_t tblSize = 1 << (w - 2);
-		NafArray naf[2];
-		mpz_class u[2];
-		G1 tbl[2][tblSize];
-		bool b;
-
-		x %= r;
-		if (x == 0) {
-			Q.clear();
-			if (!constTime) return;
-		}
-		if (x < 0) {
-			x += r;
-		}
-		split(u[0], u[1], x);
-		gmp::getNAFwidth(&b, naf[0], u[0], w);
-		assert(b); (void)b;
-		gmp::getNAFwidth(&b, naf[1], u[1], w);
-		assert(b); (void)b;
-
-		tbl[0][0] = P;
-		mulLambda(tbl[1][0], tbl[0][0]);
-		{
-			G1 P2;
-			G1::dbl(P2, P);
-			for (size_t i = 1; i < tblSize; i++) {
-				G1::add(tbl[0][i], tbl[0][i - 1], P2);
-				mulLambda(tbl[1][i], tbl[0][i]);
-			}
-		}
-		const size_t maxBit = fp::max_(naf[0].size(), naf[1].size());
-		Q.clear();
-		for (size_t i = 0; i < maxBit; i++) {
-			G1::dbl(Q, Q);
-			addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
-			addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
-		}
-	}
-};
-
-static GLV1<Ec> glv1;
-
-inline void mulArrayEc(Ec& z, const Ec& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
-{
-	mpz_class s;
-	bool b;
-	mcl::gmp::setArray(&b, s, y, yn);
-	assert(b);
-	if (isNegative) s = -s;
-	glv1.mul(z, x, s, constTime);
-}
-
-void initGLV()
-{
-	const mcl::ecdsa::local::Param& p = mcl::ecdsa::local::getParam();
-	const mcl::EcParam& ecParam = p.ecParam;
-	{
-		Fp& rw = glv1.rw;
-		bool b = Fp::squareRoot(rw, -3);
-		assert(b);
-		printf("b=%d\n", b);
-		if (!b) exit(1);
-		rw = -(rw + 1) / 2;
-		glv1.r = ecParam.n;
-		glv1.rBitSize = gmp::getBitSize(glv1.r);
-		glv1.rBitSize = (glv1.rBitSize + fp::UnitBitSize - 1) & ~(fp::UnitBitSize - 1);
-		gmp::setStr(glv1.B[0][0], "0x3086d221a7d46bcde86c90e49284eb15");
-		gmp::setStr(glv1.B[0][1], "-0xe4437ed6010e88286f547fa90abfe4c3");
-		gmp::setStr(glv1.B[1][0], "0x114ca50f7a8e2f3f657c1108d9d44cfd8");
-		glv1.B[1][1] = glv1.B[0][0];
-		glv1.v0 = ((glv1.B[1][1]) << glv1.rBitSize) / glv1.r;
-		glv1.v1 = ((-glv1.B[0][1]) << glv1.rBitSize) / glv1.r;
-	}
-	PUT(p.P);
-	Ec Q1, Q2;
-	mpz_class L;
-	gmp::setStr(L, "0x5363ad4cc05c30e0a5261c028812645a122e22ea20816678df02967c1b23bd72");
-	PUT(L);
-	Ec::mul(Q1, p.P, L);
-	PUT(Q1);
-	glv1.mulLambda(Q2, p.P);
-	PUT(Q2);
-	PUT(Q1 == Q2);
-	// enable GLV
-	Ec::setMulArrayGLV(mulArrayEc);
-}
-
-
 CYBOZU_TEST_AUTO(ecdsa)
 {
 	init();
-	initGLV();
+	mcl::GLV1T<Fp, Ec>::initForSecp256k1(Zn::getOp().mp);
+	Ec::setMulArrayGLV(mcl::GLV1T<Fp, Ec>::mulArray);
 	SecretKey sec;
 	PublicKey pub;
 	sec.setByCSPRNG();

From 2053b495a7b90be7268ef74e3a5ce0e84910e202 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Jul 2019 22:10:49 +0900
Subject: [PATCH 032/553] simplify GLV1T class

---
 include/mcl/bn.hpp |  2 +-
 include/mcl/ec.hpp | 43 ++++++++++++++++++++++---------------------
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index d1cb9f7f..147f8bb3 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -567,7 +567,7 @@ typedef mcl::FixedArray<int8_t, MCL_MAX_FR_BIT_SIZE / 2 + 2> NafArray;
 	GLV for G1 on BN/BLS12
 */
 
-struct GLV1 : mcl::GLV1T<Fp, G1> {
+struct GLV1 : mcl::GLV1T<G1> {
 	static bool usePrecomputedTable(int curveType)
 	{
 		if (curveType < 0) return false;
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index dd771937..1d0ad494 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -1070,8 +1070,8 @@ template<class Fp> int EcT<Fp>::mode_;
 
 namespace local {
 
-template<class G, class Vec>
-void addTbl(G& Q, const G *tbl, const Vec& naf, size_t i)
+template<class Ec, class Vec>
+void addTbl(Ec& Q, const Ec *tbl, const Vec& naf, size_t i)
 {
 	if (i >= naf.size()) return;
 	int n = naf[i];
@@ -1084,9 +1084,10 @@ void addTbl(G& Q, const G *tbl, const Vec& naf, size_t i)
 
 } // mcl::local
 
-template<class F, class G>
+template<class Ec>
 struct GLV1T {
-	static F rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
+	typedef typename Ec::Fp Fp;
+	static Fp rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
 	static size_t rBitSize;
 	static mpz_class v0, v1;
 	static mpz_class B[2][2];
@@ -1110,9 +1111,9 @@ struct GLV1T {
 	/*
 		L (x, y) = (rw x, y)
 	*/
-	static void mulLambda(G& Q, const G& P)
+	static void mulLambda(Ec& Q, const Ec& P)
 	{
-		F::mul(Q.x, P.x, rw);
+		Fp::mul(Q.x, P.x, rw);
 		Q.y = P.y;
 		Q.z = P.z;
 	}
@@ -1127,14 +1128,14 @@ struct GLV1T {
 		a = x - (t * B[0][0] + b * B[1][0]);
 		b = - (t * B[0][1] + b * B[1][1]);
 	}
-	static void mul(G& Q, const G& P, mpz_class x, bool constTime = false)
+	static void mul(Ec& Q, const Ec& P, mpz_class x, bool constTime = false)
 	{
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
-		typedef mcl::FixedArray<int8_t, sizeof(G) * 8 / 2 + 2> NafArray;
+		typedef mcl::FixedArray<int8_t, sizeof(Fp) * 8 / 2 + 2> NafArray;
 		NafArray naf[2];
 		mpz_class u[2];
-		G tbl[2][tblSize];
+		Ec tbl[2][tblSize];
 		bool b;
 
 		x %= r;
@@ -1154,22 +1155,22 @@ struct GLV1T {
 		tbl[0][0] = P;
 		mulLambda(tbl[1][0], tbl[0][0]);
 		{
-			G P2;
-			G::dbl(P2, P);
+			Ec P2;
+			Ec::dbl(P2, P);
 			for (size_t i = 1; i < tblSize; i++) {
-				G::add(tbl[0][i], tbl[0][i - 1], P2);
+				Ec::add(tbl[0][i], tbl[0][i - 1], P2);
 				mulLambda(tbl[1][i], tbl[0][i]);
 			}
 		}
 		const size_t maxBit = fp::max_(naf[0].size(), naf[1].size());
 		Q.clear();
 		for (size_t i = 0; i < maxBit; i++) {
-			G::dbl(Q, Q);
+			Ec::dbl(Q, Q);
 			local::addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
 			local::addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
 		}
 	}
-	static void mulArray(G& z, const G& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
+	static void mulArray(Ec& z, const Ec& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 	{
 		mpz_class s;
 		bool b;
@@ -1183,7 +1184,7 @@ struct GLV1T {
 	*/
 	static void initForSecp256k1(const mpz_class& _r)
 	{
-		bool b = F::squareRoot(rw, -3);
+		bool b = Fp::squareRoot(rw, -3);
 		assert(b);
 		(void)b;
 		rw = -(rw + 1) / 2;
@@ -1203,12 +1204,12 @@ struct GLV1T {
 };
 
 // rw = 1 / w = (-1 - sqrt(-3)) / 2
-template<class F, class G> F GLV1T<F, G>::rw;
-template<class F, class G> size_t GLV1T<F, G>::rBitSize;
-template<class F, class G> mpz_class GLV1T<F, G>::v0;
-template<class F, class G> mpz_class GLV1T<F, G>::v1;
-template<class F, class G> mpz_class GLV1T<F, G>::B[2][2];
-template<class F, class G> mpz_class GLV1T<F, G>::r;
+template<class Ec> typename Ec::Fp GLV1T<Ec>::rw;
+template<class Ec> size_t GLV1T<Ec>::rBitSize;
+template<class Ec> mpz_class GLV1T<Ec>::v0;
+template<class Ec> mpz_class GLV1T<Ec>::v1;
+template<class Ec> mpz_class GLV1T<Ec>::B[2][2];
+template<class Ec> mpz_class GLV1T<Ec>::r;
 
 struct EcParam {
 	const char *name;

From 3c98ac00c8209f722f8c718109674bd3b56a458b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Jul 2019 22:10:59 +0900
Subject: [PATCH 033/553] enable GLV for secp256k1

---
 include/mcl/ecdsa.hpp | 2 ++
 include/mcl/she.hpp   | 5 +++++
 test/ecdsa_test.cpp   | 2 --
 test/she_test.cpp     | 5 +++--
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/mcl/ecdsa.hpp b/include/mcl/ecdsa.hpp
index cf3ed3f6..6540c19f 100644
--- a/include/mcl/ecdsa.hpp
+++ b/include/mcl/ecdsa.hpp
@@ -99,6 +99,8 @@ inline void init(bool *pb)
 	p.P.set(pb, x, y);
 	if (!*pb) return;
 	p.Pbase.init(pb, p.P, ecParam.bitSize, local::winSize);
+	mcl::GLV1T<Ec>::initForSecp256k1(Zn::getOp().mp);
+	Ec::setMulArrayGLV(mcl::GLV1T<Ec>::mulArray);
 }
 
 #ifndef CYBOZU_DONT_USE_EXCEPTION
diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index 282f2fe8..84f3e555 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -26,6 +26,7 @@
 #include <mcl/window_method.hpp>
 #include <cybozu/endian.hpp>
 #include <cybozu/serializer.hpp>
+#include <mcl/ecparam.hpp>
 
 namespace mcl { namespace she {
 
@@ -588,6 +589,10 @@ struct SHET {
 		useDecG2ViaGT_ = false;
 		isG1only_ = true;
 		setTryNum(tryNum);
+		if (std::string(para.name) == mcl::ecparam::secp256k1.name) {
+			mcl::GLV1T<G1>::initForSecp256k1(Fr::getOp().mp);
+			G1::setMulArrayGLV(mcl::GLV1T<G1>::mulArray);
+		}
 	}
 	/*
 		set range for G1-DLP
diff --git a/test/ecdsa_test.cpp b/test/ecdsa_test.cpp
index f0155148..80de88a2 100644
--- a/test/ecdsa_test.cpp
+++ b/test/ecdsa_test.cpp
@@ -18,8 +18,6 @@ using namespace mcl::ecdsa;
 CYBOZU_TEST_AUTO(ecdsa)
 {
 	init();
-	mcl::GLV1T<Fp, Ec>::initForSecp256k1(Zn::getOp().mp);
-	Ec::setMulArrayGLV(mcl::GLV1T<Fp, Ec>::mulArray);
 	SecretKey sec;
 	PublicKey pub;
 	sec.setByCSPRNG();
diff --git a/test/she_test.cpp b/test/she_test.cpp
index cb644785..0782eda8 100644
--- a/test/she_test.cpp
+++ b/test/she_test.cpp
@@ -716,8 +716,9 @@ CYBOZU_TEST_AUTO(hashBench)
 CYBOZU_TEST_AUTO(liftedElGamal)
 {
 	const size_t hashSize = 1024;
-	initG1only(mcl::ecparam::secp192k1, hashSize);
-	const size_t byteSize = 192 / 8;
+	const mcl::EcParam& param = mcl::ecparam::secp256k1;
+	initG1only(param, hashSize);
+	const size_t byteSize = (param.bitSize + 7) / 8;
 	SecretKey sec;
 	sec.setByCSPRNG();
 	PublicKey pub;

From 5b453fda2beb773d8cb1a3a80ab4e6848b9011d0 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Jul 2019 22:13:18 +0900
Subject: [PATCH 034/553] v0.96

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index c31b7d15..a8b47e5f 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x095; /* 0xABC = A.BC */
+static const int version = 0x096; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index 40ae5a8d..0510f1fb 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography.
 The current version supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+* v0.96 improved scalar multiplication
 * mclBn_setETHserialization(true) (de)serialize acoording to [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations) when BLS12-381 is used.
 * (Break backward compatibility) libmcl_dy.a is renamed to libmcl.a
     * The option SHARE_BASENAME_SUF is removed

From 4c376d551ea3c66de37265fb3699b79247248961 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Jul 2019 11:43:05 +0900
Subject: [PATCH 035/553] test of GT::pow

---
 test/glv_test.cpp | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/test/glv_test.cpp b/test/glv_test.cpp
index 61f2062d..e1d49411 100644
--- a/test/glv_test.cpp
+++ b/test/glv_test.cpp
@@ -191,6 +191,25 @@ void testGLV2()
 	CYBOZU_BENCH_C("G2::glv", 1000, Q1 = Q0; s.setRand(rg); glv2.mul, Q2, Q1, s.getMpz());
 }
 
+void testGT()
+{
+	G1 P;
+	G2 Q;
+	GT x, y, z;
+	hashAndMapToG1(P, "abc", 3);
+	hashAndMapToG2(Q, "abc", 3);
+	pairing(x, P, Q);
+	int n = 200;
+	y = x;
+	for (int i = 0; i < n; i++) {
+		y *= y;
+	}
+	mpz_class t = 1;
+	t <<= n;
+	GT::pow(z, x, t);
+	CYBOZU_TEST_EQUAL(y, z);
+}
+
 CYBOZU_TEST_AUTO(glv)
 {
 	const mcl::CurveParam tbl[] = {
@@ -204,5 +223,6 @@ CYBOZU_TEST_AUTO(glv)
 		initPairing(cp);
 		testGLV1();
 		testGLV2();
+		testGT();
 	}
 }

From 85163ee0037b87452080c13a31ff1a8ea0318855 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 29 Jul 2019 04:59:03 +0900
Subject: [PATCH 036/553] add initCurve

---
 include/mcl/ahe.hpp     | 76 -----------------------------------------
 include/mcl/ec.hpp      | 50 +++++++++++++++++++++------
 include/mcl/ecdsa.hpp   | 27 +++------------
 include/mcl/ecparam.hpp | 20 +++++++++--
 4 files changed, 62 insertions(+), 111 deletions(-)
 delete mode 100644 include/mcl/ahe.hpp

diff --git a/include/mcl/ahe.hpp b/include/mcl/ahe.hpp
deleted file mode 100644
index 239319d0..00000000
--- a/include/mcl/ahe.hpp
+++ /dev/null
@@ -1,76 +0,0 @@
-#pragma once
-/**
-	@file
-	@brief 192/256-bit additive homomorphic encryption by lifted-ElGamal
-	@author MITSUNARI Shigeo(@herumi)
-	@license modified new BSD license
-	http://opensource.org/licenses/BSD-3-Clause
-*/
-#include <mcl/elgamal.hpp>
-#include <mcl/ecparam.hpp>
-
-namespace mcl {
-
-#ifdef MCL_USE_AHE192
-namespace ahe192 {
-
-const mcl::EcParam& para = mcl::ecparam::NIST_P192;
-
-typedef mcl::FpT<mcl::FpTag, 192> Fp;
-typedef mcl::FpT<mcl::ZnTag, 192> Zn;
-typedef mcl::EcT<Fp> Ec;
-typedef mcl::ElgamalT<Ec, Zn> ElgamalEc;
-typedef ElgamalEc::PrivateKey SecretKey;
-typedef ElgamalEc::PublicKey PublicKey;
-typedef ElgamalEc::CipherText CipherText;
-
-static inline void initAhe()
-{
-	Fp::init(para.p);
-	Zn::init(para.n);
-	Ec::init(para.a, para.b);
-	Ec::setIoMode(16);
-	Zn::setIoMode(16);
-}
-
-static inline void initSecretKey(SecretKey& sec)
-{
-	const Ec P(Fp(para.gx), Fp(para.gy));
-	sec.init(P, Zn::getBitSize());
-}
-
-} //mcl::ahe192
-#endif
-
-#ifdef MCL_USE_AHE256
-namespace ahe256 {
-
-const mcl::EcParam& para = mcl::ecparam::NIST_P256;
-
-typedef mcl::FpT<mcl::FpTag, 256> Fp;
-typedef mcl::FpT<mcl::ZnTag, 256> Zn;
-typedef mcl::EcT<Fp> Ec;
-typedef mcl::ElgamalT<Ec, Zn> ElgamalEc;
-typedef ElgamalEc::PrivateKey SecretKey;
-typedef ElgamalEc::PublicKey PublicKey;
-typedef ElgamalEc::CipherText CipherText;
-
-static inline void initAhe()
-{
-	Fp::init(para.p);
-	Zn::init(para.n);
-	Ec::init(para.a, para.b);
-	Ec::setIoMode(16);
-	Zn::setIoMode(16);
-}
-
-static inline void initSecretKey(SecretKey& sec)
-{
-	const Ec P(Fp(para.gx), Fp(para.gy));
-	sec.init(P, Zn::getBitSize());
-}
-
-} //mcl::ahe256
-#endif
-
-} //  mcl
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 1d0ad494..1ab40962 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -10,6 +10,7 @@
 #include <cybozu/exception.hpp>
 #include <mcl/op.hpp>
 #include <mcl/util.hpp>
+#include <mcl/ecparam.hpp>
 
 //#define MCL_EC_USE_AFFINE
 
@@ -1211,17 +1212,44 @@ template<class Ec> mpz_class GLV1T<Ec>::v1;
 template<class Ec> mpz_class GLV1T<Ec>::B[2][2];
 template<class Ec> mpz_class GLV1T<Ec>::r;
 
-struct EcParam {
-	const char *name;
-	const char *p;
-	const char *a;
-	const char *b;
-	const char *gx;
-	const char *gy;
-	const char *n;
-	size_t bitSize; // bit length of p
-	int curveType;
-};
+/*
+	Ec : elliptic curve
+	Zn : cyclic group of the order |Ec|
+	P : set the generator of Ec unless NULL
+*/
+template<class Ec, class Zn>
+void initCurve(bool *pb, int curveType, Ec *P = 0)
+{
+	typedef typename Ec::Fp Fp;
+	*pb = false;
+	const EcParam *ecParam = getEcParam(curveType);
+	if (ecParam == 0) return;
+
+	Zn::init(pb, ecParam->n);
+	if (!*pb) return;
+	Fp::init(pb, ecParam->p);
+	if (!*pb) return;
+	Ec::init(pb, ecParam->a, ecParam->b);
+	if (!*pb) return;
+	Zn::setIoMode(16);
+	Fp::setIoMode(16);
+//	Ec::setIoMode(IoEcAffine);
+	if (P) {
+		Fp x, y;
+		x.setStr(pb, ecParam->gx);
+		if (!*pb) return;
+		y.setStr(pb, ecParam->gy);
+		if (!*pb) return;
+		P->set(pb, x, y);
+		if (!*pb) return;
+	}
+	if (curveType == MCL_SECP256K1) {
+		GLV1T<Ec>::initForSecp256k1(Zn::getOp().mp);
+		Ec::setMulArrayGLV(GLV1T<Ec>::mulArray);
+	} else {
+		Ec::setMulArrayGLV(0);
+	}
+}
 
 } // mcl
 
diff --git a/include/mcl/ecdsa.hpp b/include/mcl/ecdsa.hpp
index 6540c19f..c92000ad 100644
--- a/include/mcl/ecdsa.hpp
+++ b/include/mcl/ecdsa.hpp
@@ -32,9 +32,9 @@ typedef mcl::EcT<Fp> Ec;
 namespace local {
 
 struct Param {
-	mcl::EcParam ecParam;
 	Ec P;
 	mcl::fp::WindowMethod<Ec> Pbase;
+	size_t bitSize;
 };
 
 inline Param& getParam()
@@ -79,28 +79,11 @@ const local::Param& param = local::getParam();
 
 inline void init(bool *pb)
 {
-	const mcl::EcParam& ecParam = mcl::ecparam::secp256k1;
-	Zn::init(pb, ecParam.n);
-	if (!*pb) return;
-	Fp::init(pb, ecParam.p);
-	if (!*pb) return;
-	Ec::init(pb, ecParam.a, ecParam.b);
-	if (!*pb) return;
-	Zn::setIoMode(16);
-	Fp::setIoMode(16);
-	Ec::setIoMode(mcl::IoEcAffine);
 	local::Param& p = local::getParam();
-	p.ecParam = ecParam;
-	Fp x, y;
-	x.setStr(pb, ecParam.gx);
-	if (!*pb) return;
-	y.setStr(pb, ecParam.gy);
-	if (!*pb) return;
-	p.P.set(pb, x, y);
+	mcl::initCurve<Ec, Zn>(pb, MCL_SECP256K1, &p.P);
 	if (!*pb) return;
-	p.Pbase.init(pb, p.P, ecParam.bitSize, local::winSize);
-	mcl::GLV1T<Ec>::initForSecp256k1(Zn::getOp().mp);
-	Ec::setMulArrayGLV(mcl::GLV1T<Ec>::mulArray);
+	p.bitSize = 256;
+	p.Pbase.init(pb, p.P, p.bitSize, local::winSize);
 }
 
 #ifndef CYBOZU_DONT_USE_EXCEPTION
@@ -119,7 +102,7 @@ struct PrecomputedPublicKey {
 	mcl::fp::WindowMethod<Ec> pubBase_;
 	void init(bool *pb, const PublicKey& pub)
 	{
-		pubBase_.init(pb, pub, param.ecParam.bitSize, local::winSize);
+		pubBase_.init(pb, pub, param.bitSize, local::winSize);
 	}
 #ifndef CYBOZU_DONT_USE_EXCEPTION
 	void init(const PublicKey& pub)
diff --git a/include/mcl/ecparam.hpp b/include/mcl/ecparam.hpp
index 087bf8b6..9fa4e047 100644
--- a/include/mcl/ecparam.hpp
+++ b/include/mcl/ecparam.hpp
@@ -6,10 +6,23 @@
 	@license modified new BSD license
 	http://opensource.org/licenses/BSD-3-Clause
 */
-#include <mcl/ec.hpp>
 #include <mcl/curve_type.h>
 
-namespace mcl { namespace ecparam {
+namespace mcl {
+
+struct EcParam {
+	const char *name;
+	const char *p;
+	const char *a;
+	const char *b;
+	const char *gx;
+	const char *gy;
+	const char *n;
+	size_t bitSize; // bit length of p
+	int curveType;
+};
+
+namespace ecparam {
 
 const struct mcl::EcParam secp160k1 = {
 	"secp160k1",
@@ -181,6 +194,7 @@ inline const mcl::EcParam* getEcParam(int curve)
 	case MCL_SECP224K1: return &ecparam::secp224k1;
 	case MCL_SECP256K1: return &ecparam::secp256k1;
 	case MCL_SECP384R1: return &ecparam::secp384r1;
+	case MCL_SECP521R1: return &ecparam::secp521r1;
 	case MCL_NIST_P192: return &ecparam::NIST_P192;
 	case MCL_NIST_P224: return &ecparam::NIST_P224;
 	case MCL_NIST_P256: return &ecparam::NIST_P256;
@@ -189,3 +203,5 @@ inline const mcl::EcParam* getEcParam(int curve)
 }
 
 } // mcl
+
+#include <mcl/ec.hpp>

From b57e25f0cf0cca7738c3716b92b247f14184cff4 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 29 Jul 2019 13:01:57 +0900
Subject: [PATCH 037/553] old codes use initCurve

---
 ffi/java/Makefile         |  1 +
 ffi/java/elgamal_impl.hpp | 10 +++++-----
 include/mcl/curve_type.h  |  6 ++++--
 include/mcl/ec.hpp        | 23 +++++++++++++++--------
 include/mcl/ecparam.hpp   |  8 +++++---
 sample/bench.cpp          |  8 ++------
 sample/ecdh.cpp           | 16 +++++-----------
 sample/vote.cpp           |  9 +++------
 test/ec_test.cpp          |  4 +---
 test/elgamal_test.cpp     |  8 ++------
 10 files changed, 43 insertions(+), 50 deletions(-)

diff --git a/ffi/java/Makefile b/ffi/java/Makefile
index f8172c31..bb581dbc 100644
--- a/ffi/java/Makefile
+++ b/ffi/java/Makefile
@@ -49,6 +49,7 @@ test_elgamal: ElgamalTest.class $(ELGAMAL_LIB)
 	$(JAVA_EXE) ElgamalTest
 	$(JAVA_EXE) ElgamalTest -e NIST_P192
 	$(JAVA_EXE) ElgamalTest -e NIST_P256 -h sha256
+	$(JAVA_EXE) ElgamalTest -e secp256k1 -h sha256
 	$(JAVA_EXE) ElgamalTest -e NIST_P384 -h sha384
 	$(JAVA_EXE) ElgamalTest -e NIST_P521 -h sha512
 
diff --git a/ffi/java/elgamal_impl.hpp b/ffi/java/elgamal_impl.hpp
index d7130c29..c54f58fd 100644
--- a/ffi/java/elgamal_impl.hpp
+++ b/ffi/java/elgamal_impl.hpp
@@ -34,11 +34,11 @@ void SystemInit(const std::string& param) throw(std::exception)
 	if (iss >> ecParamStr >> hashNameStr) {
 		Param& p = Param::getParam();
 		p.ecParam = mcl::getEcParam(ecParamStr);
-		Zn::init(p.ecParam->n);
-		Fp::init(p.ecParam->p);
-		Ec::init(p.ecParam->a, p.ecParam->b);
-		p.hashName = cybozu::crypto::Hash::getName(hashNameStr);
-		return;
+		if (p.ecParam) {
+			mcl::initCurve<Ec, Zn>(p.ecParam->curveType);
+			p.hashName = cybozu::crypto::Hash::getName(hashNameStr);
+			return;
+		}
 	}
 	throw cybozu::Exception("SystemInit:bad param") << param;
 }
diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index 9e4a941a..42ba6a60 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -18,7 +18,7 @@ enum {
 
 	/*
 		for only G1
-		the size of curve must be less or equal to MCLBN_FP_UNIT_SIZE
+		the size of curve must be <= MCLBN_FP_UNIT_SIZE
 	*/
 	MCL_EC_BEGIN = 100,
 	MCL_SECP192K1 = MCL_EC_BEGIN,
@@ -29,7 +29,9 @@ enum {
 	MCL_NIST_P192 = 105,
 	MCL_NIST_P224 = 106,
 	MCL_NIST_P256 = 107,
-	MCL_EC_END = MCL_NIST_P256 + 1,
+	MCL_SECP160K1 = 108,
+	MCL_P160_1 = 109,
+	MCL_EC_END = MCL_P160_1 + 1,
 	MCL_NIST_P384 = MCL_SECP384R1,
 	MCL_NIST_P521 = MCL_SECP521R1
 };
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 1ab40962..36862391 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -1215,25 +1215,22 @@ template<class Ec> mpz_class GLV1T<Ec>::r;
 /*
 	Ec : elliptic curve
 	Zn : cyclic group of the order |Ec|
-	P : set the generator of Ec unless NULL
+	set P the generator of Ec if P != 0
 */
 template<class Ec, class Zn>
-void initCurve(bool *pb, int curveType, Ec *P = 0)
+void initCurve(bool *pb, int curveType, Ec *P = 0, mcl::fp::Mode mode = fp::FP_AUTO, mcl::ec::Mode ecMode = ec::Jacobi)
 {
 	typedef typename Ec::Fp Fp;
 	*pb = false;
 	const EcParam *ecParam = getEcParam(curveType);
 	if (ecParam == 0) return;
 
-	Zn::init(pb, ecParam->n);
+	Zn::init(pb, ecParam->n, mode);
 	if (!*pb) return;
-	Fp::init(pb, ecParam->p);
+	Fp::init(pb, ecParam->p, mode);
 	if (!*pb) return;
-	Ec::init(pb, ecParam->a, ecParam->b);
+	Ec::init(pb, ecParam->a, ecParam->b, ecMode);
 	if (!*pb) return;
-	Zn::setIoMode(16);
-	Fp::setIoMode(16);
-//	Ec::setIoMode(IoEcAffine);
 	if (P) {
 		Fp x, y;
 		x.setStr(pb, ecParam->gx);
@@ -1251,6 +1248,16 @@ void initCurve(bool *pb, int curveType, Ec *P = 0)
 	}
 }
 
+#ifndef CYBOZU_DONT_USE_EXCEPTION
+template<class Ec, class Zn>
+void initCurve(int curveType, Ec *P = 0, mcl::fp::Mode mode = fp::FP_AUTO, mcl::ec::Mode ecMode = ec::Jacobi)
+{
+	bool b;
+	initCurve<Ec, Zn>(&b, curveType, P, mode, ecMode);
+	if (!b) throw cybozu::Exception("mcl:initCurve") << curveType << mode << ecMode;
+}
+#endif
+
 } // mcl
 
 #ifdef CYBOZU_USE_BOOST
diff --git a/include/mcl/ecparam.hpp b/include/mcl/ecparam.hpp
index 9fa4e047..dba80630 100644
--- a/include/mcl/ecparam.hpp
+++ b/include/mcl/ecparam.hpp
@@ -33,9 +33,9 @@ const struct mcl::EcParam secp160k1 = {
 	"0x938cf935318fdced6bc28286531733c3f03c4fee",
 	"0x100000000000000000001b8fa16dfab9aca16b6b3",
 	160,
-	-1
+	MCL_SECP160K1
 };
-// p=2^160 + 7
+// p=2^160 + 7 (for test)
 const struct mcl::EcParam p160_1 = {
 	"p160_1",
 	"0x10000000000000000000000000000000000000007",
@@ -45,7 +45,7 @@ const struct mcl::EcParam p160_1 = {
 	"1236612389951462151661156731535316138439983579284",
 	"1461501637330902918203683518218126812711137002561",
 	161,
-	-1
+	MCL_P160_1
 };
 const struct mcl::EcParam secp192k1 = {
 	"secp192k1",
@@ -198,6 +198,8 @@ inline const mcl::EcParam* getEcParam(int curve)
 	case MCL_NIST_P192: return &ecparam::NIST_P192;
 	case MCL_NIST_P224: return &ecparam::NIST_P224;
 	case MCL_NIST_P256: return &ecparam::NIST_P256;
+	case MCL_SECP160K1: return &ecparam::secp160k1;
+	case MCL_P160_1: return &ecparam::p160_1;
 	default: return 0;
 	}
 }
diff --git a/sample/bench.cpp b/sample/bench.cpp
index 0f865b18..de81f258 100644
--- a/sample/bench.cpp
+++ b/sample/bench.cpp
@@ -76,12 +76,8 @@ void benchFp(size_t bitSize, int mode)
 
 void benchEcSub(const mcl::EcParam& para, mcl::fp::Mode mode, mcl::ec::Mode ecMode)
 {
-	Fp::init(para.p, mode);
-	Zn::init(para.n);
-	Ec::init(para.a, para.b, ecMode);
-	Fp x(para.gx);
-	Fp y(para.gy);
-	Ec P(x, y);
+	Ec P;
+	mcl::initCurve<Ec, Zn>(para.curveType, &P, mode, ecMode);
 	Ec P2; Ec::add(P2, P, P);
 	Ec Q = P + P + P;
 	double addT, add2T, subT, dblT, mulT, mulCTT, mulRandT, mulCTRandT, normT;
diff --git a/sample/ecdh.cpp b/sample/ecdh.cpp
index d5c4a31b..4fca3c0c 100644
--- a/sample/ecdh.cpp
+++ b/sample/ecdh.cpp
@@ -7,31 +7,25 @@
 #include <mcl/fp.hpp>
 #include <mcl/ecparam.hpp>
 
-typedef mcl::FpT<> Fp;
+typedef mcl::FpT<mcl::FpTag> Fp;
 typedef mcl::FpT<mcl::ZnTag> Zn;
 typedef mcl::EcT<Fp> Ec;
 
 int main()
 {
-	cybozu::RandomGenerator rg;
 	/*
-		system setup with a parameter secp192k1 recommended by SECG
 		Ec is an elliptic curve over Fp
 		the cyclic group of <P> is isomorphic to Zn
 	*/
-	const mcl::EcParam& para = mcl::ecparam::secp192k1;
-	Zn::init(para.n);
-	Fp::init(para.p);
-	Ec::init(para.a, para.b);
-	const Ec P(Fp(para.gx), Fp(para.gy));
-
+	Ec P;
+	mcl::initCurve<Ec, Zn>(MCL_SECP192K1, &P);
 	/*
 		Alice setups a private key a and public key aP
 	*/
 	Zn a;
 	Ec aP;
 
-	a.setRand(rg);
+	a.setByCSPRNG();
 	Ec::mul(aP, P, a); // aP = a * P;
 
 	std::cout << "aP=" << aP << std::endl;
@@ -42,7 +36,7 @@ int main()
 	Zn b;
 	Ec bP;
 
-	b.setRand(rg);
+	b.setByCSPRNG();
 	Ec::mul(bP, P, b); // bP = b * P;
 
 	std::cout << "bP=" << bP << std::endl;
diff --git a/sample/vote.cpp b/sample/vote.cpp
index 88137187..36b08e25 100644
--- a/sample/vote.cpp
+++ b/sample/vote.cpp
@@ -16,8 +16,8 @@
 #include <mcl/elgamal.hpp>
 #include <mcl/ecparam.hpp>
 
-typedef mcl::FpT<> Fp;
-typedef mcl::FpT<mcl::ZnTag> Zn; // use ZnTag because Zn is different class with Fp
+typedef mcl::FpT<mcl::FpTag> Fp;
+typedef mcl::FpT<mcl::ZnTag> Zn;
 typedef mcl::EcT<Fp> Ec;
 typedef mcl::ElgamalT<Ec, Zn> Elgamal;
 
@@ -59,10 +59,7 @@ struct Param {
 
 void SysInit()
 {
-	const mcl::EcParam& para = mcl::ecparam::secp192k1;
-	Zn::init(para.n);
-	Fp::init(para.p);
-	Ec::init(para.a, para.b);
+	mcl::initCurve<Ec, Zn>(MCL_SECP192K1);
 }
 
 template<class T>
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index ec49adbf..7999443e 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -32,9 +32,7 @@ struct Test {
 		: para(para)
 	{
 		printf("fpMode=%s\n", mcl::fp::ModeToStr(fpMode));
-		Fp::init(para.p, fpMode);
-		Zn::init(para.n, fpMode);
-		Ec::init(para.a, para.b, ecMode);
+		mcl::initCurve<Ec, Zn>(para.curveType, 0, fpMode, ecMode);
 	}
 	void cstr() const
 	{
diff --git a/test/elgamal_test.cpp b/test/elgamal_test.cpp
index 8f27f901..f021cff4 100644
--- a/test/elgamal_test.cpp
+++ b/test/elgamal_test.cpp
@@ -15,13 +15,9 @@ cybozu::RandomGenerator g_rg;
 
 CYBOZU_TEST_AUTO(testEc)
 {
-	Fp::init(para.p);
-	Zn::init(para.n);
-	Ec::init(para.a, para.b);
-	const Fp x0(para.gx);
-	const Fp y0(para.gy);
+	Ec P;
+	mcl::initCurve<Ec, Zn>(para.curveType, &P);
 	const size_t bitSize = Zn::getBitSize();
-	const Ec P(x0, y0);
 	/*
 		Zn = <P>
 	*/

From 6e9f21d1afbdc207202d4a6e75d0bddc9c1f187b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 30 Jul 2019 13:44:08 +0900
Subject: [PATCH 038/553] [she] use initCurve

---
 include/mcl/she.hpp | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index 84f3e555..b95f01c9 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -577,22 +577,12 @@ struct SHET {
 	*/
 	static void initG1only(const mcl::EcParam& para, size_t hashSize = 1024, size_t tryNum = local::defaultTryNum)
 	{
-		Fp::init(para.p);
-		Fr::init(para.n);
-		G1::init(para.a, para.b);
-		const Fp x0(para.gx);
-		const Fp y0(para.gy);
-		P_.set(x0, y0);
-
+		mcl::initCurve<G1, Fr>(para.curveType, &P_);
 		setRangeForG1DLP(hashSize);
 		useDecG1ViaGT_ = false;
 		useDecG2ViaGT_ = false;
 		isG1only_ = true;
 		setTryNum(tryNum);
-		if (std::string(para.name) == mcl::ecparam::secp256k1.name) {
-			mcl::GLV1T<G1>::initForSecp256k1(Fr::getOp().mp);
-			G1::setMulArrayGLV(mcl::GLV1T<G1>::mulArray);
-		}
 	}
 	/*
 		set range for G1-DLP

From 0f3aae4febb286bf924660d809dd32c1cae63d70 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 8 Aug 2019 14:16:49 +0900
Subject: [PATCH 039/553] add openmp option to setvar.bat

---
 setvar.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setvar.bat b/setvar.bat
index 1d57fa69..2ceea1fc 100644
--- a/setvar.bat
+++ b/setvar.bat
@@ -1,2 +1,2 @@
-set CFLAGS=/MT /DNOMINMAX /Ox /DNDEBUG /W4 /Zi /EHsc /nologo -I./include -I../cybozulib_ext/include
+set CFLAGS=/MT /DNOMINMAX /Ox /DNDEBUG /openmp /W4 /Zi /EHsc /nologo -I./include -I../cybozulib_ext/include
 set LDFLAGS=/LIBPATH:..\cybozulib_ext\lib /LIBPATH:.\lib

From a6cab0b3856b2ea81a3be7788b9a85be6f48b590 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 11 Aug 2019 10:09:44 +0900
Subject: [PATCH 040/553] common.mk detects armv6l

---
 common.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common.mk b/common.mk
index 5c749e1a..8bbc0325 100644
--- a/common.mk
+++ b/common.mk
@@ -42,7 +42,7 @@ ifeq ($(ARCH),x86)
   BIT_OPT=-m32
   #LOW_ASM_SRC=src/asm/low_x86.asm
 endif
-ifeq ($(ARCH),armv7l)
+ifneq ($(findstring $(ARCH),armv7l/armv6l),)
   CPU=arm
   BIT=32
   #LOW_ASM_SRC=src/asm/low_arm.s

From b84d0c7fe0e549a7f708f5177b9467c670df3eae Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 13 Aug 2019 11:11:47 +0900
Subject: [PATCH 041/553] dump() accepts const void*

---
 include/mcl/op.hpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index a8b47e5f..80119845 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -366,10 +366,11 @@ inline const char* getIoSeparator(int ioMode)
 	return (ioMode & (IoArray | IoArrayRaw | IoSerialize | IoSerializeHexStr)) ? "" : " ";
 }
 
-inline void dump(const char *s, size_t n)
+inline void dump(const void *buf, size_t n)
 {
+	const uint8_t *s = (const uint8_t *)buf;
 	for (size_t i = 0; i < n; i++) {
-		printf("%02x ", (uint8_t)s[i]);
+		printf("%02x ", s[i]);
 	}
 	printf("\n");
 }

From bb287f8f1154acaf8f3d3b449becf77ca7c3ba01 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 13 Aug 2019 11:14:52 +0900
Subject: [PATCH 042/553] add Fp::getLittleEndian

---
 include/mcl/fp.hpp | 23 +++++++++++++++++++++++
 test/fp_test.cpp   | 31 +++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 8c164681..62d592f7 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -366,6 +366,29 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 			b.p = &v_[0];
 		}
 	}
+	/*
+		write a value with little endian
+		write buf[0] = 0 and return 1 if the value is 0
+		return written size if success else 0
+	*/
+	size_t getLittleEndian(void *buf, size_t maxBufSize) const
+	{
+		fp::Block b;
+		getBlock(b);
+		const uint8_t *src = (const uint8_t *)b.p;
+		uint8_t *dst = (uint8_t *)buf;
+		size_t n = b.n * sizeof(b.p[0]);
+		while (n > 0) {
+			if (src[n - 1]) break;
+			n--;
+		}
+		if (n == 0) n = 1; // zero
+		if (maxBufSize < n) return 0;
+		for (size_t i = 0; i < n; i++) {
+			dst[i] = src[i];
+		}
+		return n;
+	}
 	void setByCSPRNG(bool *pb, fp::RandGen rg = fp::RandGen())
 	{
 		if (rg.isZero()) rg = fp::RandGen::get();
diff --git a/test/fp_test.cpp b/test/fp_test.cpp
index 6b71075c..36ba4d69 100644
--- a/test/fp_test.cpp
+++ b/test/fp_test.cpp
@@ -672,6 +672,36 @@ void getInt64Test()
 	}
 }
 
+void getLittleEndianTest()
+{
+	if (Fp::getOp().bitSize < 80) return;
+	const struct {
+		const char *in;
+		uint8_t out[16];
+		size_t size;
+	} tbl[] = {
+		{ "0", { 0 }, 1 },
+		{ "1", { 1 }, 1 },
+		{ "0x1200", { 0x00, 0x12 }, 2 },
+		{ "0x123400", { 0x00, 0x34, 0x12 }, 3 },
+		{ "0x1234567890123456ab", { 0xab, 0x56, 0x34, 0x12, 0x90, 0x78, 0x56, 0x34, 0x12 }, 9 },
+	};
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		Fp x(tbl[i].in);
+		uint8_t buf[128];
+		size_t n = x.getLittleEndian(buf, tbl[i].size);
+		CYBOZU_TEST_EQUAL(n, tbl[i].size);
+		CYBOZU_TEST_EQUAL_ARRAY(buf, tbl[i].out, n);
+
+		n = x.getLittleEndian(buf, tbl[i].size + 1);
+		CYBOZU_TEST_EQUAL(n, tbl[i].size);
+		CYBOZU_TEST_EQUAL_ARRAY(buf, tbl[i].out, n);
+
+		n = x.getLittleEndian(buf, tbl[i].size - 1);
+		CYBOZU_TEST_EQUAL(n, 0);
+	}
+}
+
 void divBy2Test()
 {
 	const int tbl[] = { -4, -3, -2, -1, 0, 1, 2, 3 };
@@ -951,6 +981,7 @@ void sub(mcl::fp::Mode mode)
 		setArrayModTest();
 		getUint64Test();
 		getInt64Test();
+		getLittleEndianTest();
 		divBy2Test();
 		getStrTest();
 		setHashOfTest();

From 0a638c7ad2c25a45a257691031e3a694d47261f1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 13 Aug 2019 11:37:08 +0900
Subject: [PATCH 043/553] add mclBn{Fr,Fp}_getLittleEndian

---
 include/mcl/bn.h               |  8 ++++++++
 include/mcl/impl/bn_c_impl.hpp |  9 +++++++++
 test/bn_c_test.hpp             | 31 +++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 68053cbf..da2fe2f3 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -245,6 +245,14 @@ MCLBN_DLL_API void mclBnFr_setInt32(mclBnFr *y, int x);
 MCLBN_DLL_API int mclBnFr_setLittleEndian(mclBnFr *x, const void *buf, mclSize bufSize);
 MCLBN_DLL_API int mclBnFp_setLittleEndian(mclBnFp *x, const void *buf, mclSize bufSize);
 
+/*
+	write a value as little endian
+	return written size if success else 0
+	@note buf[0] = 0 and return 1 if the value is zero
+*/
+MCLBN_DLL_API mclSize mclBnFr_getLittleEndian(void *buf, mclSize maxBufSize, const mclBnFr *x);
+MCLBN_DLL_API mclSize mclBnFp_getLittleEndian(void *buf, mclSize maxBufSize, const mclBnFp *x);
+
 // set (buf mod r) to x
 // return 0 if bufSize <= (byte size of Fr * 2) else -1
 MCLBN_DLL_API int mclBnFr_setLittleEndianMod(mclBnFr *x, const void *buf, mclSize bufSize);
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 7c14f637..2df034d9 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -144,6 +144,10 @@ int mclBnFr_setLittleEndian(mclBnFr *x, const void *buf, mclSize bufSize)
 	cast(x)->setArrayMask((const char *)buf, bufSize);
 	return 0;
 }
+mclSize mclBnFr_getLittleEndian(void *buf, mclSize maxBufSize, const mclBnFr *x)
+{
+	return cast(x)->getLittleEndian(buf, maxBufSize);
+}
 int mclBnFr_setLittleEndianMod(mclBnFr *x, const void *buf, mclSize bufSize)
 {
 	bool b;
@@ -595,6 +599,11 @@ int mclBnFp_setLittleEndianMod(mclBnFp *x, const void *buf, mclSize bufSize)
 	cast(x)->setArray(&b, (const char *)buf, bufSize, mcl::fp::Mod);
 	return b ? 0 : -1;
 }
+
+mclSize mclBnFp_getLittleEndian(void *buf, mclSize maxBufSize, const mclBnFp *x)
+{
+	return cast(x)->getLittleEndian(buf, maxBufSize);
+}
 int mclBnFp_isEqual(const mclBnFp *x, const mclBnFp *y)
 {
 	return *cast(x) == *cast(y);
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 8db329dc..cc8ceabe 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -664,6 +664,37 @@ CYBOZU_TEST_AUTO(mapToG2)
 	CYBOZU_TEST_ASSERT(mclBnG2_isEqual(&P1, &P2));
 }
 
+CYBOZU_TEST_AUTO(getLittleEndian)
+{
+	const struct {
+		const char *in;
+		uint8_t out[16];
+		size_t size;
+	} tbl[] = {
+		{ "0", { 0 }, 1 },
+		{ "1", { 1 }, 1 },
+		{ "0x1200", { 0x00, 0x12 }, 2 },
+		{ "0x123400", { 0x00, 0x34, 0x12 }, 3 },
+		{ "0x1234567890123456ab", { 0xab, 0x56, 0x34, 0x12, 0x90, 0x78, 0x56, 0x34, 0x12 }, 9 },
+	};
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		size_t n;
+		mclBnFr x;
+		CYBOZU_TEST_ASSERT(!mclBnFr_setStr(&x, tbl[i].in, strlen(tbl[i].in), 0));
+		uint8_t buf[128];
+		n = mclBnFr_getLittleEndian(buf, tbl[i].size, &x);
+		CYBOZU_TEST_EQUAL(n, tbl[i].size);
+		CYBOZU_TEST_EQUAL_ARRAY(buf, tbl[i].out, n);
+
+		n = mclBnFr_getLittleEndian(buf, tbl[i].size + 1, &x);
+		CYBOZU_TEST_EQUAL(n, tbl[i].size);
+		CYBOZU_TEST_EQUAL_ARRAY(buf, tbl[i].out, n);
+
+		n = mclBnFr_getLittleEndian(buf, tbl[i].size - 1, &x);
+		CYBOZU_TEST_EQUAL(n, 0);
+	}
+}
+
 void G1onlyTest(int curve)
 {
 	printf("curve=%d\n", curve);

From 55a1de171b64b919a79dd7ce67447401d5e00805 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 13 Aug 2019 15:08:23 +0900
Subject: [PATCH 044/553] [doc] update getStr(mode)

---
 readme.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/readme.md b/readme.md
index 0510f1fb..5efa8740 100644
--- a/readme.md
+++ b/readme.md
@@ -358,14 +358,15 @@ These functions maps x into Gi according to [\[_Faster hashing to G2_\]].
 
 ## String format of G1 and G2
 G1 and G2 have three elements of Fp (x, y, z) for Jacobi coordinate.
-normalize() method normalizes it to affine coordinate (x, y, 1) or (0, 0, 0).
+`normalize()` method normalizes it to affine coordinate (x, y, 1) or (0, 0, 0).
 
-getStr() method gets
+getStr(mode = 0) method gets
 
 * `0` ; infinity
-* `1 <x> <y>` ; not compressed format
-* `2 <x>` ; compressed format for even y
-* `3 <x>` ; compressed format for odd y
+* `1 <x> <y>` ; Affine coordinate with mode = `mcl:IoEcAffine`
+* `4 <x> <y> <z>` ; jacobi/Proj coordinate with mode = `mcl::IoEcProj`
+* `2 <x>` ; compressed format for even y with mode = `mcl::IoEcCompY`
+* `3 <x>` ; compressed format for odd y with mode = `mcl::IoEcCompY`
 
 ## Generator of G1 and G2
 

From 21dedae745b5ce7f7072ffaebdf53eca3b0b4e45 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 13 Aug 2019 16:41:36 +0900
Subject: [PATCH 045/553] add mclBnFp_* like as mclBnFp_*

---
 include/mcl/bn.h               | 14 +++++
 include/mcl/impl/bn_c_impl.hpp | 56 +++++++++++++++++++
 test/bn_c_test.hpp             | 99 ++++++++++++++++++++++++++++++++++
 3 files changed, 169 insertions(+)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index da2fe2f3..e241f6d8 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -238,6 +238,8 @@ MCLBN_DLL_API void mclBnFp2_clear(mclBnFp2 *x);
 // set x to y
 MCLBN_DLL_API void mclBnFr_setInt(mclBnFr *y, mclInt x);
 MCLBN_DLL_API void mclBnFr_setInt32(mclBnFr *y, int x);
+MCLBN_DLL_API void mclBnFp_setInt(mclBnFp *y, mclInt x);
+MCLBN_DLL_API void mclBnFp_setInt32(mclBnFp *y, int x);
 
 // x = buf & (1 << bitLen(r)) - 1
 // if (x >= r) x &= (1 << (bitLen(r) - 1)) - 1
@@ -266,12 +268,16 @@ MCLBN_DLL_API int mclBnFr_isEqual(const mclBnFr *x, const mclBnFr *y);
 MCLBN_DLL_API int mclBnFr_isZero(const mclBnFr *x);
 MCLBN_DLL_API int mclBnFr_isOne(const mclBnFr *x);
 
+MCLBN_DLL_API int mclBnFp_isValid(const mclBnFp *x);
 MCLBN_DLL_API int mclBnFp_isEqual(const mclBnFp *x, const mclBnFp *y);
+MCLBN_DLL_API int mclBnFp_isZero(const mclBnFp *x);
+MCLBN_DLL_API int mclBnFp_isOne(const mclBnFp *x);
 MCLBN_DLL_API int mclBnFp2_isEqual(const mclBnFp2 *x, const mclBnFp2 *y);
 
 #ifndef MCL_DONT_USE_CSRPNG
 // return 0 if success
 MCLBN_DLL_API int mclBnFr_setByCSPRNG(mclBnFr *x);
+MCLBN_DLL_API int mclBnFp_setByCSPRNG(mclBnFp *x);
 
 /*
 	set user-defined random function for setByCSPRNG
@@ -302,6 +308,14 @@ MCLBN_DLL_API void mclBnFr_sub(mclBnFr *z, const mclBnFr *x, const mclBnFr *y);
 MCLBN_DLL_API void mclBnFr_mul(mclBnFr *z, const mclBnFr *x, const mclBnFr *y);
 MCLBN_DLL_API void mclBnFr_div(mclBnFr *z, const mclBnFr *x, const mclBnFr *y);
 
+MCLBN_DLL_API void mclBnFp_neg(mclBnFp *y, const mclBnFp *x);
+MCLBN_DLL_API void mclBnFp_inv(mclBnFp *y, const mclBnFp *x);
+MCLBN_DLL_API void mclBnFp_sqr(mclBnFp *y, const mclBnFp *x);
+MCLBN_DLL_API void mclBnFp_add(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
+MCLBN_DLL_API void mclBnFp_sub(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
+MCLBN_DLL_API void mclBnFp_mul(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
+MCLBN_DLL_API void mclBnFp_div(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
+
 ////////////////////////////////////////////////
 // set zero
 MCLBN_DLL_API void mclBnG1_clear(mclBnG1 *x);
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 2df034d9..a081ba9e 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -183,6 +183,12 @@ int mclBnFr_setByCSPRNG(mclBnFr *x)
 	cast(x)->setByCSPRNG(&b);
 	return b ? 0 : -1;
 }
+int mclBnFp_setByCSPRNG(mclBnFp *x)
+{
+	bool b;
+	cast(x)->setByCSPRNG(&b);
+	return b ? 0 : -1;
+}
 void mclBn_setRandFunc(void *self, unsigned int (*readFunc)(void *self, void *buf, unsigned int bufSize))
 {
 	mcl::fp::RandGen::setRandFunc(self, readFunc);
@@ -234,6 +240,35 @@ void mclBnFr_div(mclBnFr *z, const mclBnFr *x, const mclBnFr *y)
 	Fr::div(*cast(z),*cast(x), *cast(y));
 }
 
+void mclBnFp_neg(mclBnFp *y, const mclBnFp *x)
+{
+	Fp::neg(*cast(y), *cast(x));
+}
+void mclBnFp_inv(mclBnFp *y, const mclBnFp *x)
+{
+	Fp::inv(*cast(y), *cast(x));
+}
+void mclBnFp_sqr(mclBnFp *y, const mclBnFp *x)
+{
+	Fp::sqr(*cast(y), *cast(x));
+}
+void mclBnFp_add(mclBnFp *z, const mclBnFp *x, const mclBnFp *y)
+{
+	Fp::add(*cast(z),*cast(x), *cast(y));
+}
+void mclBnFp_sub(mclBnFp *z, const mclBnFp *x, const mclBnFp *y)
+{
+	Fp::sub(*cast(z),*cast(x), *cast(y));
+}
+void mclBnFp_mul(mclBnFp *z, const mclBnFp *x, const mclBnFp *y)
+{
+	Fp::mul(*cast(z),*cast(x), *cast(y));
+}
+void mclBnFp_div(mclBnFp *z, const mclBnFp *x, const mclBnFp *y)
+{
+	Fp::div(*cast(z),*cast(x), *cast(y));
+}
+
 ////////////////////////////////////////////////
 // set zero
 void mclBnG1_clear(mclBnG1 *x)
@@ -564,6 +599,15 @@ void mclBn_verifyOrderG2(int doVerify)
 	verifyOrderG2(doVerify != 0);
 }
 
+void mclBnFp_setInt(mclBnFp *y, mclInt x)
+{
+	*cast(y) = x;
+}
+void mclBnFp_setInt32(mclBnFp *y, int x)
+{
+	*cast(y) = x;
+}
+
 mclSize mclBnFp_getStr(char *buf, mclSize maxBufSize, const mclBnFp *x, int ioMode)
 {
 	return cast(x)->getStr(buf, maxBufSize, ioMode);
@@ -604,10 +648,22 @@ mclSize mclBnFp_getLittleEndian(void *buf, mclSize maxBufSize, const mclBnFp *x)
 {
 	return cast(x)->getLittleEndian(buf, maxBufSize);
 }
+int mclBnFp_isValid(const mclBnFp *x)
+{
+	return cast(x)->isValid();
+}
 int mclBnFp_isEqual(const mclBnFp *x, const mclBnFp *y)
 {
 	return *cast(x) == *cast(y);
 }
+int mclBnFp_isZero(const mclBnFp *x)
+{
+	return cast(x)->isZero();
+}
+int mclBnFp_isOne(const mclBnFp *x)
+{
+	return cast(x)->isOne();
+}
 
 int mclBnFp_setHashOf(mclBnFp *x, const void *buf, mclSize bufSize)
 {
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index cc8ceabe..7aaf94a5 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -568,6 +568,94 @@ CYBOZU_TEST_AUTO(setRandFunc)
 	}
 }
 
+CYBOZU_TEST_AUTO(Fp_1)
+{
+	mclBnFp x, y;
+	memset(&x, 0xff, sizeof(x));
+	CYBOZU_TEST_ASSERT(!mclBnFp_isValid(&x));
+	CYBOZU_TEST_ASSERT(!mclBnFp_isZero(&x));
+
+	mclBnFp_clear(&x);
+	CYBOZU_TEST_ASSERT(mclBnFp_isZero(&x));
+
+	mclBnFp_setInt(&x, 1);
+	CYBOZU_TEST_ASSERT(mclBnFp_isOne(&x));
+
+	mclBnFp_setInt(&y, -1);
+	CYBOZU_TEST_ASSERT(!mclBnFp_isEqual(&x, &y));
+
+	y = x;
+	CYBOZU_TEST_ASSERT(mclBnFp_isEqual(&x, &y));
+
+	mclBnFp_setHashOf(&x, "", 0);
+	mclBnFp_setHashOf(&y, "abc", 3);
+	CYBOZU_TEST_ASSERT(!mclBnFp_isEqual(&x, &y));
+	mclBnFp_setHashOf(&x, "abc", 3);
+	CYBOZU_TEST_ASSERT(mclBnFp_isEqual(&x, &y));
+
+	char buf[1024];
+	mclBnFp_setInt(&x, 12345678);
+	size_t size;
+	size = mclBnFp_getStr(buf, sizeof(buf), &x, 10);
+	CYBOZU_TEST_EQUAL(size, 8);
+	CYBOZU_TEST_EQUAL(buf, "12345678");
+
+	mclBnFp_setInt(&x, -7654321);
+	mclBnFp_neg(&x, &x);
+	size = mclBnFp_getStr(buf, sizeof(buf), &x, 10);
+	CYBOZU_TEST_EQUAL(size, 7);
+	CYBOZU_TEST_EQUAL(buf, "7654321");
+
+	mclBnFp_setInt(&y, 123 - 7654321);
+	mclBnFp_add(&x, &x, &y);
+	size = mclBnFp_getStr(buf, sizeof(buf), &x, 10);
+	CYBOZU_TEST_EQUAL(size, 3);
+	CYBOZU_TEST_EQUAL(buf, "123");
+
+	mclBnFp_setInt(&y, 100);
+	mclBnFp_sub(&x, &x, &y);
+	size = mclBnFp_getStr(buf, sizeof(buf), &x, 10);
+	CYBOZU_TEST_EQUAL(size, 2);
+	CYBOZU_TEST_EQUAL(buf, "23");
+
+	mclBnFp_mul(&x, &x, &y);
+	size = mclBnFp_getStr(buf, sizeof(buf), &x, 10);
+	CYBOZU_TEST_EQUAL(size, 4);
+	CYBOZU_TEST_EQUAL(buf, "2300");
+
+	mclBnFp_div(&x, &x, &y);
+	size = mclBnFp_getStr(buf, sizeof(buf), &x, 10);
+	CYBOZU_TEST_EQUAL(size, 2);
+	CYBOZU_TEST_EQUAL(buf, "23");
+
+	mclBnFp_mul(&x, &y, &y);
+	mclBnFp_sqr(&y, &y);
+	CYBOZU_TEST_ASSERT(mclBnFp_isEqual(&x, &y));
+
+	const char *s = "12345678901234567";
+	CYBOZU_TEST_ASSERT(!mclBnFp_setStr(&x, s, strlen(s), 10));
+	s = "20000000000000000";
+	CYBOZU_TEST_ASSERT(!mclBnFp_setStr(&y, s, strlen(s), 10));
+	mclBnFp_add(&x, &x, &y);
+	size = mclBnFp_getStr(buf, sizeof(buf), &x, 10);
+	CYBOZU_TEST_EQUAL(size, 17);
+	CYBOZU_TEST_EQUAL(buf, "32345678901234567");
+
+	mclBnFp_setInt(&x, 1);
+	mclBnFp_neg(&x, &x);
+	size = mclBnFp_getStr(buf, sizeof(buf), &x, 10);
+	CYBOZU_TEST_ASSERT(size > 0);
+	CYBOZU_TEST_EQUAL(size, strlen(buf));
+	CYBOZU_TEST_ASSERT(!mclBnFp_setStr(&y, buf, size, 10));
+	CYBOZU_TEST_ASSERT(mclBnFp_isEqual(&x, &y));
+
+	for (int i = 0; i < 10; i++) {
+		mclBnFp_setByCSPRNG(&x);
+		mclBnFp_getStr(buf, sizeof(buf), &x, 16);
+		printf("%s\n", buf);
+	}
+}
+
 CYBOZU_TEST_AUTO(Fp)
 {
 	mclBnFp x1, x2;
@@ -593,6 +681,17 @@ CYBOZU_TEST_AUTO(Fp)
 	mclBnFp_clear(&x1);
 	memset(&x2, 0, sizeof(x2));
 	CYBOZU_TEST_ASSERT(mclBnFp_isEqual(&x1, &x2));
+
+	mclBnFp_clear(&x1);
+	CYBOZU_TEST_ASSERT(mclBnFp_isZero(&x1));
+
+	mclBnFp_setInt(&x1, 1);
+	CYBOZU_TEST_ASSERT(mclBnFp_isOne(&x1));
+
+	mclBnFp_setInt(&x1, -1);
+	CYBOZU_TEST_ASSERT(!mclBnFp_isOne(&x1));
+    mclBnFp_neg(&x1, &x1);
+	CYBOZU_TEST_ASSERT(mclBnFp_isOne(&x1));
 }
 
 CYBOZU_TEST_AUTO(mod)

From d493249b39f31d0023582d62ce9bfc755fddfbd9 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 14 Aug 2019 10:56:17 +0900
Subject: [PATCH 046/553] v0.97

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 80119845..0d21c687 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x096; /* 0xABC = A.BC */
+static const int version = 0x097; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index 5efa8740..7088a9fb 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography.
 The current version supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+* v0.97 add some C api functions
 * v0.96 improved scalar multiplication
 * mclBn_setETHserialization(true) (de)serialize acoording to [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations) when BLS12-381 is used.
 * (Break backward compatibility) libmcl_dy.a is renamed to libmcl.a

From c0efee00e152c521bbc4d6d8948a5b832715b65a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 16 Aug 2019 17:37:57 +0900
Subject: [PATCH 047/553] [she] add getNonConstRef() for test

---
 include/mcl/she.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index b95f01c9..388b06fc 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -367,6 +367,8 @@ struct SHET {
 	public:
 		const G& getS() const { return S_; }
 		const G& getT() const { return T_; }
+		G& getNonConstRefS() { return S_; }
+		G& getNonConstRefT() { return T_; }
 		void clear()
 		{
 			S_.clear();

From 572fa8d81688d7e335ec55352ea382f736f22a30 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 17 Aug 2019 18:21:16 +0900
Subject: [PATCH 048/553] add millerLoopVec

---
 include/mcl/bn.hpp | 19 +++++++++++++++++++
 test/bn_test.cpp   | 23 +++++++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 147f8bb3..53a456fd 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1900,6 +1900,25 @@ inline void precomputedMillerLoop2mixed(Fp12& f, const G1& P1, const G2& Q1, con
 	precomputedMillerLoop2mixed(f, P1, Q1, P2, Q2coeff.data());
 }
 #endif
+
+/*
+	f = prod_{i=0}^{n-1} millerLoop(Pvec[i], Qvec[i])
+*/
+inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
+{
+	if (n == 0) {
+		f = 1;
+		return;
+	}
+	millerLoop(f, Pvec[0], Qvec[0]);
+	for (size_t i = 1; i < n; i++) {
+		Fp12 g;
+		millerLoop(g, Pvec[i], Qvec[i]);
+		f *= g;
+	}
+}
+
+
 inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calcG1(P, x); }
 inline void mapToG2(bool *pb, G2& P, const Fp2& x) { *pb = BN::param.mapTo.calcG2(P, x); }
 #ifndef CYBOZU_DONT_USE_EXCEPTION
diff --git a/test/bn_test.cpp b/test/bn_test.cpp
index 071ec706..b66cad8c 100644
--- a/test/bn_test.cpp
+++ b/test/bn_test.cpp
@@ -249,6 +249,28 @@ void testMillerLoop2(const G1& P1, const G2& Q1)
 	CYBOZU_TEST_EQUAL(e2, e3);
 }
 
+void testMillerLoopVec()
+{
+	const size_t n = 8;
+	G1 Pvec[n];
+	G2 Qvec[n];
+	char c = 'a';
+	for (size_t i = 0; i < n; i++) {
+		hashAndMapToG1(Pvec[i], &c, 1);
+		hashAndMapToG2(Qvec[i], &c, 1);
+		c++;
+	}
+	Fp12 f1, f2;
+	f1 = 1;
+	for (size_t i = 0; i < n; i++) {
+		Fp12 e;
+		millerLoop(e, Pvec[i], Qvec[i]);
+		f1 *= e;
+	}
+	millerLoopVec(f2, Pvec, Qvec, n);
+	CYBOZU_TEST_EQUAL(f1, f2);
+}
+
 void testPairing(const G1& P, const G2& Q, const char *eStr)
 {
 	Fp12 e1;
@@ -378,6 +400,7 @@ CYBOZU_TEST_AUTO(naive)
 		testPairing(P, Q, ts.e);
 		testPrecomputed(P, Q);
 		testMillerLoop2(P, Q);
+		testMillerLoopVec();
 		testBench(P, Q);
 		benchAddDblG1();
 		benchAddDblG2();

From 73ee7d5bd31c43c69885fb23380176dcf37d4554 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 19 Aug 2019 20:43:52 +0900
Subject: [PATCH 049/553] add cybozu/time.hpp

---
 include/cybozu/time.hpp | 281 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 281 insertions(+)
 create mode 100644 include/cybozu/time.hpp

diff --git a/include/cybozu/time.hpp b/include/cybozu/time.hpp
new file mode 100644
index 00000000..fbf7355a
--- /dev/null
+++ b/include/cybozu/time.hpp
@@ -0,0 +1,281 @@
+#pragma once
+/**
+	@file
+	@brief tiny time class
+
+	@author MITSUNARI Shigeo(@herumi)
+*/
+#include <ctime>
+#include <cybozu/exception.hpp>
+#include <cybozu/atoi.hpp>
+#include <cybozu/itoa.hpp>
+#ifdef _WIN32
+	#include <sys/timeb.h>
+#else
+	#include <sys/time.h>
+#endif
+
+namespace cybozu {
+
+/**
+	time struct with time_t and msec
+	@note time MUST be latesr than 1970/1/1
+*/
+class Time {
+	static const uint64_t epochBias = 116444736000000000ull;
+	std::time_t time_;
+	int msec_;
+public:
+	explicit Time(std::time_t time = 0, int msec = 0)
+		: time_(time)
+		, msec_(msec)
+	{
+	}
+	explicit Time(bool doSet)
+	{
+		if (doSet) setCurrentTime();
+	}
+	Time& setTime(std::time_t time, int msec = 0)
+	{
+		time_ = time;
+		msec_ = msec;
+		return *this;
+	}
+	/*
+		Windows FILETIME is defined as
+		struct FILETILME {
+			DWORD dwLowDateTime;
+			DWORD dwHighDateTime;
+		};
+		the value represents the number of 100-nanosecond intervals since January 1, 1601 (UTC).
+	*/
+	void setByFILETIME(uint32_t low, uint32_t high)
+	{
+		const uint64_t fileTime = (((uint64_t(high) << 32) | low) - epochBias) / 10000;
+		time_ = fileTime / 1000;
+		msec_ = fileTime % 1000;
+	}
+	/*
+		DWORD is defined as unsigned long in windows
+	*/
+	template<class dword>
+	void getFILETIME(dword& low, dword& high) const
+	{
+		const uint64_t fileTime = (time_ * 1000 + msec_) * 10000 + epochBias;
+		low = dword(fileTime);
+		high = dword(fileTime >> 32);
+	}
+	explicit Time(const std::string& in)
+	{
+		fromString(in);
+	}
+	explicit Time(const char *in)
+	{
+		fromString(in, in + strlen(in));
+	}
+	const std::time_t& getTime() const { return time_; }
+	int getMsec() const { return msec_; }
+	double getTimeSec() const { return time_ + msec_ * 1e-3; }
+	void addSec(int sec) { time_ += sec; }
+	bool operator<(const Time& rhs) const { return (time_ < rhs.time_) || (time_ == rhs.time_ && msec_ < rhs.msec_); }
+//	bool operator<=(const Time& rhs) const { return (*this < rhs) || (*this == rhs); }
+//	bool operator>(const Time& rhs) const { return rhs < *this; }
+	bool operator==(const Time& rhs) const { return (time_ == rhs.time_) && (msec_ == rhs.msec_); }
+	bool operator!=(const Time& rhs) const { return !(*this == rhs); }
+	/**
+		set time from string such as
+		2009-Jan-23T02:53:44Z
+		2009-Jan-23T02:53:44.078Z
+		2009-01-23T02:53:44Z
+		2009-01-23T02:53:44.078Z
+		@note 'T' may be ' '. '-' may be '/'. last char 'Z' is omissible
+	*/
+	void fromString(bool *pb, const std::string& in) { fromString(pb, &in[0], &in[0] + in.size()); }
+	void fromString(const std::string& in) { fromString(0, in); }
+
+	void fromString(bool *pb, const char *begin, const char *end)
+	{
+		const size_t len = end - begin;
+		if (len >= 19) {
+			const char *p = begin;
+			struct tm tm;
+			int num;
+			bool b;
+			tm.tm_year = getNum(&b, p, 4, 1970, 3000) - 1900;
+			if (!b) goto ERR;
+			p += 4;
+			char sep = *p++;
+			if (sep != '-' && sep != '/') goto ERR;
+
+			p = getMonth(&num, p);
+			if (p == 0) goto ERR;
+			tm.tm_mon = num;
+			if (*p++ != sep) goto ERR;
+
+			tm.tm_mday = getNum(&b, p, 2, 1, 31);
+			if (!b) goto ERR;
+			p += 2;
+			if (*p != ' ' && *p != 'T') goto ERR;
+			p++;
+
+			tm.tm_hour = getNum(&b, p, 2, 0, 23);
+			if (!b) goto ERR;
+			p += 2;
+			if (*p++ != ':') goto ERR;
+
+			tm.tm_min = getNum(&b, p, 2, 0, 59);
+			if (!b) goto ERR;
+			p += 2;
+			if (*p++ != ':') goto ERR;
+
+			tm.tm_sec = getNum(&b, p, 2, 0, 59);
+			if (!b) goto ERR;
+			p += 2;
+
+			if (p == end) {
+				msec_ = 0;
+			} else if (p + 1 == end && *p == 'Z') {
+				msec_ = 0;
+				p++;
+			} else if (*p == '.' && (p + 4 == end || (p + 5 == end && *(p + 4) == 'Z'))) {
+				msec_ = getNum(&b, p + 1, 3, 0, 999);
+				if (!b) goto ERR;
+//				p += 4;
+			} else {
+				goto ERR;
+			}
+#ifdef _WIN32
+			time_ = _mkgmtime64(&tm);
+			if (time_ == -1) goto ERR;
+#else
+			time_ = timegm(&tm);
+#endif
+			if (pb) {
+				*pb = true;
+			}
+			return;
+		}
+	ERR:
+		if (pb) {
+			*pb = false;
+			return;
+		}
+		throw cybozu::Exception("time::fromString") << std::string(begin, 24);
+	}
+	void fromString(const char *begin, const char *end) { fromString(0, begin, end); }
+
+	/**
+		get current time with format
+		@param out [out] output string
+		@param format [in] foramt for strftime and append three digits for msec
+		@param appendMsec [in] appemd <mmm>
+		@param doClear (append to out if false)
+		@note ex. "%Y-%b-%d %H:%M:%S." to get 2009-Jan-23 02:53:44.078
+	*/
+	void toString(std::string& out, const char *format, bool appendMsec = true, bool doClear = true) const
+	{
+		if (doClear) out.clear();
+		char buf[128];
+		struct tm tm;
+#ifdef _WIN32
+		bool isOK = _gmtime64_s(&tm, &time_) == 0;
+#else
+		bool isOK = gmtime_r(&time_, &tm) != 0;
+#endif
+		if (!isOK) throw cybozu::Exception("time::toString") << time_;
+#ifdef __GNUC__
+	#pragma GCC diagnostic push
+	#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
+		if (std::strftime(buf, sizeof(buf), format, &tm) == 0) {
+			throw cybozu::Exception("time::toString::too long") << format << time_;
+		}
+#ifdef __GNUC__
+	#pragma GCC diagnostic pop
+#endif
+		out += buf;
+		if (appendMsec) {
+			out += cybozu::itoaWithZero(msec_, 3);
+		}
+	}
+
+	/**
+		get current time such as 2009-01-23 02:53:44.078
+		@param out [out] sink string
+	*/
+	void toString(std::string& out, bool appendMsec = true, bool doClear = true) const
+	{
+		const char *format = appendMsec ? "%Y-%m-%d %H:%M:%S." : "%Y-%m-%d %H:%M:%S";
+		toString(out, format, appendMsec, doClear);
+	}
+	std::string toString(bool appendMsec = true) const { std::string out; toString(out, appendMsec); return out; }
+	/**
+		get current time
+	*/
+	Time& setCurrentTime()
+	{
+#ifdef _WIN32
+		struct _timeb timeb;
+		_ftime_s(&timeb);
+		time_ = timeb.time;
+		msec_ = timeb.millitm;
+#else
+		struct timeval tv;
+		gettimeofday(&tv, 0);
+		time_ = tv.tv_sec;
+		msec_ = tv.tv_usec / 1000;
+#endif
+		return *this;
+	}
+private:
+
+	int getNum(bool *b, const char *in, size_t len, int min, int max) const
+	{
+		int ret = cybozu::atoi(b, in, len);
+		if (min <= ret && ret <= max) {
+			return ret;
+		} else {
+			*b = false;
+			return 0;
+		}
+	}
+
+	/*
+		convert month-str to [0, 11]
+		@param ret [out] return idx
+		@param p [in] month-str
+		@retval next pointer or null
+	*/
+	const char *getMonth(int *ret, const char *p) const
+	{
+		static const char monthTbl[12][4] = {
+			"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+		};
+
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(monthTbl); i++) {
+			if (memcmp(p, monthTbl[i], 3) == 0) {
+				*ret = (int)i;
+				return p + 3;
+			}
+		}
+		bool b;
+		*ret = getNum(&b, p, 2, 1, 12) - 1;
+		if (b) {
+			return p + 2;
+		} else {
+			return 0;
+		}
+	}
+};
+
+inline std::ostream& operator<<(std::ostream& os, const cybozu::Time& time)
+{
+	return os << time.toString();
+}
+
+inline double GetCurrentTimeSec()
+{
+	return cybozu::Time(true).getTimeSec();
+}
+
+} // cybozu

From cbc1f48da28c2e072ce28a7ea33363cc346b7f8c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 19 Aug 2019 20:45:51 +0900
Subject: [PATCH 050/553] [cybozu] disable warning

---
 include/cybozu/option.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/cybozu/option.hpp b/include/cybozu/option.hpp
index a5dfd137..f7fa1ba0 100644
--- a/include/cybozu/option.hpp
+++ b/include/cybozu/option.hpp
@@ -345,7 +345,7 @@ class Option {
 	template<class T, class U>
 	void append(T *pvar, const U& defaultVal, bool isMust, const char *opt, const std::string& help = "")
 	{
-		*pvar = defaultVal;
+		*pvar = static_cast<const T&>(defaultVal);
 		appendSub(pvar, N_is1, isMust, opt, help);
 	}
 	/*

From c93811dde153a6db82e0e47c1eb1c8a57f661bf1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 20 Aug 2019 15:19:50 +0900
Subject: [PATCH 051/553] disable warning of cybozu::Option

---
 include/cybozu/option.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/cybozu/option.hpp b/include/cybozu/option.hpp
index a5dfd137..f7fa1ba0 100644
--- a/include/cybozu/option.hpp
+++ b/include/cybozu/option.hpp
@@ -345,7 +345,7 @@ class Option {
 	template<class T, class U>
 	void append(T *pvar, const U& defaultVal, bool isMust, const char *opt, const std::string& help = "")
 	{
-		*pvar = defaultVal;
+		*pvar = static_cast<const T&>(defaultVal);
 		appendSub(pvar, N_is1, isMust, opt, help);
 	}
 	/*

From 2c8d7947b796cfcb444a745f22cb7bf40f2d2806 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 21 Aug 2019 16:31:15 +0900
Subject: [PATCH 052/553] add cybozu/*.hpp

---
 include/cybozu/atomic.hpp | 144 +++++++
 include/cybozu/socket.hpp | 778 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 922 insertions(+)
 create mode 100644 include/cybozu/atomic.hpp
 create mode 100644 include/cybozu/socket.hpp

diff --git a/include/cybozu/atomic.hpp b/include/cybozu/atomic.hpp
new file mode 100644
index 00000000..4ecade13
--- /dev/null
+++ b/include/cybozu/atomic.hpp
@@ -0,0 +1,144 @@
+#pragma once
+/**
+	@file
+	@brief atomic operation
+
+	@author MITSUNARI Shigeo(@herumi)
+	@author MITSUNARI Shigeo
+*/
+#include <cybozu/inttype.hpp>
+#ifdef _WIN32
+#include <winsock2.h>
+#include <windows.h>
+#include <intrin.h>
+#else
+#include <emmintrin.h>
+#endif
+
+namespace cybozu {
+
+namespace atomic_local {
+
+template<size_t S>
+struct Tag {};
+
+template<>
+struct Tag<4> {
+	template<class T>
+	static inline T AtomicAddSub(T *p, T y)
+	{
+#ifdef _WIN32
+		return (T)_InterlockedExchangeAdd((long*)p, (long)y);
+#else
+		return static_cast<T>(__sync_fetch_and_add(p, y));
+#endif
+	}
+
+	template<class T>
+	static inline T AtomicCompareExchangeSub(T *p, T newValue, T oldValue)
+	{
+#ifdef _WIN32
+		return (T)_InterlockedCompareExchange((long*)p, (long)newValue, (long)oldValue);
+#else
+		return static_cast<T>(__sync_val_compare_and_swap(p, oldValue, newValue));
+#endif
+	}
+
+	template<class T>
+	static inline T AtomicExchangeSub(T *p, T newValue)
+	{
+#ifdef _WIN32
+		return (T)_InterlockedExchange((long*)p, (long)newValue);
+#else
+		return static_cast<T>(__sync_lock_test_and_set(p, newValue));
+#endif
+	}
+};
+
+template<>
+struct Tag<8> {
+#if (CYBOZU_OS_BIT == 64)
+	template<class T>
+	static inline T AtomicAddSub(T *p, T y)
+	{
+#ifdef _WIN32
+		return (T)_InterlockedExchangeAdd64((int64_t*)p, (int64_t)y);
+#else
+		return static_cast<T>(__sync_fetch_and_add(p, y));
+#endif
+	}
+#endif
+
+	template<class T>
+	static inline T AtomicCompareExchangeSub(T *p, T newValue, T oldValue)
+	{
+#ifdef _WIN32
+		return (T)_InterlockedCompareExchange64((int64_t*)p, (int64_t)newValue, (int64_t)oldValue);
+#else
+		return static_cast<T>(__sync_val_compare_and_swap(p, oldValue, newValue));
+#endif
+	}
+
+#if (CYBOZU_OS_BIT == 64)
+	template<class T>
+	static inline T AtomicExchangeSub(T *p, T newValue)
+	{
+#ifdef _WIN32
+		return (T)_InterlockedExchange64((int64_t*)p, (int64_t)newValue);
+#else
+		return static_cast<T>(__sync_lock_test_and_set(p, newValue));
+#endif
+	}
+#endif
+};
+
+} // atomic_local
+
+/**
+	atomic operation
+	see http://gcc.gnu.org/onlinedocs/gcc-4.4.0/gcc/Atomic-Builtins.html
+	http://msdn.microsoft.com/en-us/library/ms683504(VS.85).aspx
+*/
+/**
+	tmp = *p;
+	*p += y;
+	return tmp;
+*/
+template<class T>
+T AtomicAdd(T *p, T y)
+{
+	return atomic_local::Tag<sizeof(T)>::AtomicAddSub(p, y);
+}
+
+/**
+	tmp = *p;
+	if (*p == oldValue) *p = newValue;
+	return tmp;
+*/
+template<class T>
+T AtomicCompareExchange(T *p, T newValue, T oldValue)
+{
+	return atomic_local::Tag<sizeof(T)>::AtomicCompareExchangeSub(p, newValue, oldValue);
+}
+
+/**
+	tmp = *p;
+	*p = newValue;
+	return tmp;
+*/
+template<class T>
+T AtomicExchange(T *p, T newValue)
+{
+	return atomic_local::Tag<sizeof(T)>::AtomicExchangeSub(p, newValue);
+}
+
+inline void mfence()
+{
+#ifdef _MSC_VER
+	MemoryBarrier();
+#else
+	_mm_mfence();
+#endif
+}
+
+} // cybozu
diff --git a/include/cybozu/socket.hpp b/include/cybozu/socket.hpp
new file mode 100644
index 00000000..b470c940
--- /dev/null
+++ b/include/cybozu/socket.hpp
@@ -0,0 +1,778 @@
+#pragma once
+/**
+	@file
+	@brief tiny socket class
+
+	@author MITSUNARI Shigeo(@herumi)
+	@author MITSUNARI Shigeo
+*/
+#include <errno.h>
+#include <assert.h>
+#include <stdio.h>
+#ifdef _WIN32
+	#include <winsock2.h>
+	#include <ws2tcpip.h> // for socklen_t
+	#pragma comment(lib, "ws2_32.lib")
+	#pragma comment(lib, "iphlpapi.lib")
+	#pragma warning(push)
+	#pragma warning(disable : 4127) // constant condition
+#else
+	#include <unistd.h>
+	#include <sys/socket.h>
+	#include <sys/ioctl.h>
+	#include <netinet/tcp.h>
+	#include <arpa/inet.h>
+	#include <netdb.h>
+	#include <memory.h>
+	#include <signal.h>
+#endif
+#ifndef NDEBUG
+	#include <stdio.h>
+#endif
+
+#include <cybozu/atomic.hpp>
+#include <cybozu/exception.hpp>
+#include <cybozu/itoa.hpp>
+#include <string>
+
+#ifdef __linux__
+//	#define CYBOZU_SOCKET_USE_EPOLL
+	#include <sys/epoll.h>
+#endif
+
+namespace cybozu {
+
+#ifdef _MSC_VER
+struct NetErrorNo : public cybozu::ErrorNo {
+	NetErrorNo(NativeErrorNo err)
+		: cybozu::ErrorNo(err)
+	{
+	}
+	NetErrorNo()
+		: cybozu::ErrorNo(WSAGetLastError())
+	{
+	}
+};
+#else
+typedef cybozu::ErrorNo NetErrorNo;
+#endif
+
+#ifdef CYBOZU_SOCKET_USE_EPOLL
+
+namespace experimental {
+
+struct EpollEvent {
+	struct epoll_event ev_;
+	EpollEvent()
+	{
+		memset(&ev_, 0, sizeof(ev_));
+	}
+	void set(int fd, uint32_t events = EPOLLIN)
+	{
+		ev_.events = events;
+		ev_.data.fd = fd;
+	}
+	int getFd() const { return ev_.data.fd; }
+};
+
+class Epoll {
+	int efd_;
+	bool verify(const char *msg, int ret, int *err) const {
+		if (ret >= 0) return true;
+		if (err == 0) throw cybozu::Exception(msg) << cybozu::NetErrorNo();
+		*err = errno;
+		return false;
+	}
+public:
+	Epoll() : efd_(-1) {}
+	bool init(int *err = 0)
+	{
+		efd_ = ::epoll_create1(0);
+		return verify("Epoll:init", efd_, err);
+	}
+	~Epoll()
+	{
+		if (efd_ >= 0) ::close(efd_);
+	}
+	/*
+		throw if err == NULL
+	*/
+	bool ctrl(int op, int fd, EpollEvent *ev, int *err = 0) {
+		int ret = ::epoll_ctl(efd_, op, fd, &ev->ev_);
+		return verify("Epoll:ctrl", ret, err);
+	}
+	bool add(int fd, uint32_t events = EPOLLIN, int *err = 0) {
+		EpollEvent ev;
+		ev.set(fd, events);
+		return ctrl(EPOLL_CTL_ADD, fd, &ev, err);
+	}
+	bool del(int fd, int *err = 0) {
+		return ctrl(EPOLL_CTL_DEL, fd, NULL, err);
+	}
+	/*
+		msec : 0 : block
+	*/
+	int wait(EpollEvent *ev, int maxEv, int msec = 0)
+	{
+		/*
+		 0 : return immediately
+		-1 : block indefinitely
+		*/
+		if (msec == 0) {
+			msec = -1;
+		} else if (msec == -1) {
+			msec = 0;
+		}
+		int ret = ::epoll_wait(efd_, &ev->ev_, maxEv, msec);
+		if (ret == 0) return 0; // timeout
+		if (ret < 0) return -errno;
+		return ret;
+	}
+};
+
+struct AutoLock {
+	Epoll& ep_;
+	int fd_;
+	AutoLock(Epoll& ep, int fd, int events = EPOLLIN)
+		: ep_(ep)
+		, fd_(fd)
+	{
+		ep_.add(fd, events);
+	}
+	~AutoLock()
+	{
+		int err;
+		ep_.del(fd_, &err);
+	}
+};
+
+} // cybozu::experimental
+#endif
+
+namespace ssl {
+class ClientSocket;
+};
+
+namespace socket_local {
+
+#ifdef _WIN32
+	typedef SOCKET SocketHandle;
+#else
+	typedef int SocketHandle;
+#endif
+
+struct InitTerm {
+	/** call once for init */
+	InitTerm()
+	{
+#ifdef _WIN32
+		WSADATA data;
+		int err = ::WSAStartup(MAKEWORD(2, 2), &data);
+		if (err) {
+			fprintf(stderr, "WSAStartup failed : %d\n", err);
+			exit(1);
+		}
+#else
+		::signal(SIGPIPE, SIG_IGN);
+#endif
+	}
+	/** call once for term */
+	~InitTerm()
+	{
+#ifdef _WIN32
+		::WSACleanup();
+#endif
+	}
+	void dummyCall() { }
+};
+
+template<int dummy = 0>
+struct InstanceIsHere { static InitTerm it_; };
+
+template<int dummy>
+InitTerm InstanceIsHere<dummy>::it_;
+
+struct DummyCall {
+	DummyCall() { InstanceIsHere<>::it_.dummyCall(); }
+};
+
+} // cybozu::socket_local
+
+class SocketAddr {
+	union {
+		// http://www.coins.tsukuba.ac.jp/~syspro/2010/No6_files/sockaddr.html
+		struct sockaddr sa; /* 16byte */
+		struct sockaddr_in v4; /* 16byte */
+		struct sockaddr_in6 v6;
+	} addr_;
+	socklen_t addrlen_;
+	int family_;
+	friend class Socket;
+	void verify() // call in only Socket::accept
+	{
+		if (addrlen_ == sizeof(addr_.v4) && addr_.sa.sa_family == AF_INET) {
+			family_ = AF_INET;
+			return;
+		}
+		if (addrlen_ == sizeof(addr_.v6) && addr_.sa.sa_family == AF_INET6) {
+			family_ = AF_INET6;
+			return;
+		}
+		throw cybozu::Exception("cybozu:SocketAddr:verify") << addrlen_;
+	}
+public:
+	SocketAddr()
+		: addrlen_(0)
+		, family_(0)
+	{
+	}
+	SocketAddr(const std::string& address, uint16_t port, bool forceIpV6 = false)
+	{
+		set(address, port, forceIpV6);
+	}
+	void set(const std::string& address, uint16_t port, bool forceIpV6 = false)
+	{
+		char portStr[16];
+		CYBOZU_SNPRINTF(portStr, sizeof(portStr), "%d", port);
+		memset(&addr_, 0, sizeof(addr_));
+		addrlen_ = 0;
+		family_ = 0;
+
+		struct addrinfo *result = 0;
+		struct addrinfo hints;
+		memset(&hints, 0, sizeof(struct addrinfo));
+		hints.ai_family = AF_INET;
+		hints.ai_socktype = SOCK_STREAM;
+		hints.ai_protocol = IPPROTO_TCP;
+		hints.ai_flags = AI_NUMERICSERV; // AI_PASSIVE;
+		const int s = getaddrinfo(address.c_str(), portStr, &hints, &result);
+		// s == EAI_AGAIN
+		if (s || forceIpV6) {
+			hints.ai_family = AF_INET6;
+			hints.ai_flags |= AI_V4MAPPED;
+			if (getaddrinfo(address.c_str(), portStr, &hints, &result)) {
+				goto ERR_EXIT;
+			}
+		}
+		{
+			bool found = false;
+			for (const struct addrinfo *p = result; p; p = p->ai_next) {
+				const int family = p->ai_family;
+				if (family == hints.ai_family) {
+					if (p->ai_addrlen > sizeof(addr_)) {
+						break;
+					}
+					memcpy(&addr_, p->ai_addr, p->ai_addrlen);
+					addrlen_ = (socklen_t)p->ai_addrlen;
+					family_ = family;
+					found = true;
+					break;
+				}
+			}
+			freeaddrinfo(result);
+			if (found) return;
+		}
+	ERR_EXIT:
+		throw cybozu::Exception("SocketAddr:set") << address << port << cybozu::NetErrorNo();
+	}
+	socklen_t getSize() const { return addrlen_; }
+	int getFamily() const { return family_; }
+	const struct sockaddr *get() const { return &addr_.sa; }
+	uint16_t getPort() const {
+		if (family_ == AF_INET) {
+			return ntohs(addr_.v4.sin_port);
+		} else if (family_ == AF_INET6) {
+			return ntohs(addr_.v6.sin6_port);
+		}
+		throw cybozu::Exception("SocketAddr:getPort:bad family") << family_;
+	}
+	// compare addr without port
+	bool hasSameAddr(const SocketAddr& rhs) const
+	{
+		const uint8_t *v4 = 0;
+		const uint8_t *v6 = 0;
+		if (family_ == AF_INET) {
+			if (rhs.family_ == AF_INET) return memcmp(&addr_.v4.sin_addr, &rhs.addr_.v4.sin_addr, sizeof(in_addr)) == 0;
+			if (rhs.family_ != AF_INET6) return false;
+			v4 = (const uint8_t*)&addr_.v4.sin_addr;
+			v6 = (const uint8_t*)&rhs.addr_.v6.sin6_addr;
+		} else if (family_ != AF_INET6) {
+			return false;
+		} else {
+			if (rhs.family_ == AF_INET6) return memcmp(&addr_.v6.sin6_addr, &rhs.addr_.v6.sin6_addr, sizeof(in6_addr)) == 0;
+			if (rhs.family_ != AF_INET) return false;
+			v4 = (const uint8_t*)&rhs.addr_.v4.sin_addr;
+			v6 = (const uint8_t*)&addr_.v6.sin6_addr;
+		}
+		// Ipv6-mapped?
+		const uint8_t header[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff };
+		return memcmp(v6, header, 12) == 0 && memcmp(v6 + 12, v4, 4) == 0;
+	}
+	std::string toStr() const
+	{
+		if (family_ == AF_INET || family_ == AF_INET6) {
+			char buf[INET6_ADDRSTRLEN];
+			assert(INET_ADDRSTRLEN <= INET6_ADDRSTRLEN);
+			const bool isIPv4 = family_ == AF_INET;
+			const void *pa = isIPv4 ? (const void*)&addr_.v4.sin_addr : (const void*)&addr_.v6.sin6_addr;
+			// not "const void*" because of vc
+			const char *p = inet_ntop(family_, const_cast<void*>(pa), buf, sizeof(buf));
+			if (!p) throw cybozu::Exception("cybozu:SocketAddr:toStr") << cybozu::NetErrorNo();
+			if (isIPv4) return std::string(p) + ':' + cybozu::itoa(getPort());
+			return std::string("[") + p + "]:" + cybozu::itoa(getPort());
+		}
+		throw cybozu::Exception("cybozu:SocketAddr:toStr:bad family_") << family_;
+	}
+};
+/*
+	socket class
+	@note ower is moved if copied
+*/
+class Socket {
+	friend class cybozu::ssl::ClientSocket;
+private:
+	cybozu::socket_local::SocketHandle sd_;
+	Socket(const Socket&);
+	void operator=(const Socket&);
+#ifdef WIN32
+	void setTimeout(int type, int msec)
+	{
+		setSocketOption(type, msec);
+	}
+	/* return msec */
+	int getTimeout(int type) const
+	{
+		return getSocketOption(type);
+	}
+#else
+	void setTimeout(int type, int msec)
+	{
+		struct timeval t;
+		t.tv_sec = msec / 1000;
+		t.tv_usec = (msec % 1000) * 1000;
+		setSocketOption(type, t);
+	}
+	/* return msec */
+	int getTimeout(int type) const
+	{
+		struct timeval t;
+		getSocketOption(type, &t);
+		return t.tv_sec * 1000 + t.tv_usec / 1000; /* msec */
+	}
+#endif
+	void setBlocking(bool isBlocking)
+	{
+#ifdef _WIN32
+		u_long val = isBlocking ? 0 : 1;
+		int ret = ::ioctlsocket(sd_, FIONBIO, &val);
+#else
+		int val = isBlocking ? 0 : 1;
+		int ret = ::ioctl(sd_, FIONBIO, &val);
+#endif
+		if (ret < 0) throw cybozu::Exception("Socket:setBlocking") << cybozu::NetErrorNo() << isBlocking;
+	}
+public:
+#ifndef _WIN32
+	static const int INVALID_SOCKET = -1;
+#endif
+	Socket()
+		: sd_(INVALID_SOCKET)
+	{
+	}
+
+	bool isValid() const { return sd_ != INVALID_SOCKET; }
+
+	// move
+#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
+	Socket(Socket&& rhs)
+		: sd_(INVALID_SOCKET)
+	{
+		sd_ = cybozu::AtomicExchange(&rhs.sd_, sd_);
+	}
+#endif
+	// close and move
+	void moveFrom(Socket& rhs)
+	{
+		close();
+		sd_ = cybozu::AtomicExchange(&rhs.sd_, INVALID_SOCKET);
+	}
+#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
+	void operator=(Socket&& rhs)
+#else
+	void operator=(Socket& rhs)
+#endif
+	{
+		moveFrom(rhs);
+	}
+
+	~Socket()
+	{
+		close(cybozu::DontThrow);
+	}
+
+	bool close(bool dontThrow = false)
+	{
+		cybozu::socket_local::SocketHandle sd = cybozu::AtomicExchange(&sd_, INVALID_SOCKET);
+		if (sd == INVALID_SOCKET) return true;
+#ifdef _WIN32
+		// ::shutdown(sd, SD_SEND);
+		// shutdown is called in closesocket
+		bool isOK = ::closesocket(sd) == 0;
+#else
+		bool isOK = ::close(sd) == 0;
+#endif
+		if (!dontThrow && !isOK) throw cybozu::Exception("Socket:close") << cybozu::NetErrorNo();
+		return isOK;
+	}
+	/*
+		how 0 : SHUTRD ; disallow read
+		    1 : SHUT_WR ; disallow write
+		    2 : SHUT_RDWR ; disallow read/write
+	*/
+	bool shutdown(int how, bool dontThrow = false)
+	{
+		bool isOK = ::shutdown(sd_, how) == 0;
+		if (!dontThrow && !isOK) throw cybozu::Exception("Socket:waitForClose:shutdown") << cybozu::NetErrorNo();
+		return isOK;
+	}
+	/*
+		send FIN and wait for remote's close().
+		this function is used for the following situation.
+		sock.write()
+		sock.waitForClose()
+		sock.close()
+	*/
+	void waitForClose()
+	{
+		if (sd_ == INVALID_SOCKET) return;
+		//	send FIN and this socket can't write any data.
+		shutdown(1);
+		// wait for FIN from the peer.
+		char buf[1];
+		ssize_t readSize = readSome(buf, sizeof(buf));
+		if (readSize != 0) {
+			throw cybozu::Exception("Socket:waitForClose:readSome:bad size") << readSize;
+		}
+	}
+
+	/*!
+		receive data
+		@param buf [out] receive buffer
+		@param bufSize [in] receive buffer size(byte)
+		@note return read size
+	*/
+	size_t readSome(void *buf, size_t bufSize)
+	{
+		int size = (int)(std::min)((size_t)0x7fffffff, bufSize);
+#ifdef _WIN32
+		int readSize = ::recv(sd_, (char *)buf, size, 0);
+#else
+	RETRY:
+		ssize_t readSize = ::read(sd_, buf, size);
+		if (readSize < 0 && errno == EINTR) goto RETRY;
+#endif
+		if (readSize < 0) throw cybozu::Exception("Socket:readSome") << cybozu::NetErrorNo() << bufSize;
+		return readSize;
+	}
+
+	/*!
+		receive all data unless timeout
+		@param buf [out] receive buffer
+		@param bufSize [in] receive buffer size(byte)
+	*/
+	void read(void *buf, size_t bufSize)
+	{
+		char *p = (char *)buf;
+		while (bufSize > 0) {
+			size_t readSize = readSome(p, bufSize);
+			if (readSize == 0) throw cybozu::Exception("Socket:read:readSize is zero");
+			p += readSize;
+			bufSize -= readSize;
+		}
+	}
+	/*!
+		write all data
+		@param buf [out] send buffer
+		@param bufSize [in] send buffer size(byte)
+	*/
+	void write(bool *pb, const void *buf, size_t bufSize)
+	{
+		const char *p = (const char *)buf;
+		while (bufSize > 0) {
+			int size = (int)(std::min)(size_t(0x7fffffff), bufSize);
+#ifdef _WIN32
+			int writeSize = ::send(sd_, p, size, 0);
+#else
+			int writeSize = ::write(sd_, p, size);
+			if (writeSize < 0 && errno == EINTR) continue;
+#endif
+			if (writeSize < 0) {
+				*pb = false;
+				return;
+			}
+			p += writeSize;
+			bufSize -= writeSize;
+		}
+		*pb = true;
+	}
+	void write(const void *buf, size_t bufSize)
+	{
+		bool b;
+		write(&b, buf, bufSize);
+		if (!b) throw cybozu::Exception("Socket:write") << cybozu::NetErrorNo() << bufSize;
+	}
+	/**
+		connect to address:port
+		@param address [in] address
+		@param port [in] port
+		@param msec: 0 : block
+	*/
+	void connect(const std::string& address, uint16_t port, int msec = 0)
+	{
+		SocketAddr addr;
+		addr.set(address, port);
+		connect(addr, msec);
+	}
+	/**
+		connect to resolved socket addr
+	*/
+	void connect(const cybozu::SocketAddr& addr, int msec = 0)
+	{
+		if (isValid()) throw cybozu::Exception("Socket:connect:already connect");
+		sd_ = ::socket(addr.getFamily(), SOCK_STREAM, IPPROTO_TCP);
+		if (!isValid()) {
+			throw cybozu::Exception("Socket:connect:socket") << cybozu::NetErrorNo();
+		}
+		if (msec == 0) {
+			if (::connect(sd_, addr.get(), addr.getSize()) < 0) {
+				throw cybozu::Exception("Socket:connect") << cybozu::NetErrorNo() << addr.toStr();
+			}
+		} else {
+			setBlocking(false);
+			if (::connect(sd_, addr.get(), addr.getSize()) < 0) {
+#ifdef _WIN32
+				bool inProgress = WSAGetLastError() == WSAEWOULDBLOCK;
+#else
+				bool inProgress = errno == EINPROGRESS;
+#endif
+				if (!inProgress) throw cybozu::Exception("Socket:connect:not in progress") << cybozu::NetErrorNo() << addr.toStr();
+				if (!queryAccept(msec, false)) throw cybozu::Exception("Socket:connect:timeout") << addr.toStr();
+				int err = getSocketOption(SO_ERROR);
+				if (err != 0) throw cybozu::Exception("Socket::connect:bad socket") << cybozu::NetErrorNo(err);
+			}
+			setBlocking(true);
+		}
+	}
+
+	static const int allowIPv4 = 1;
+	static const int allowIPv6 = 2;
+	/**
+		init for server
+		@param port [in] port number
+	*/
+	void bind(uint16_t port, int mode = allowIPv4 | allowIPv6)
+	{
+		const int family = (mode & allowIPv6) ? AF_INET6 : AF_INET;
+		sd_ = ::socket(family, SOCK_STREAM, IPPROTO_TCP);
+		if (!isValid()) {
+			throw cybozu::Exception("Socket:bind:socket") << cybozu::NetErrorNo();
+		}
+		setSocketOption(SO_REUSEADDR, 1);
+		struct sockaddr_in6 addr6;
+		struct sockaddr_in addr4;
+		struct sockaddr *addr;
+		socklen_t addrLen;
+		if (mode & allowIPv6) {
+			setSocketOption(IPV6_V6ONLY, (mode & allowIPv4) ? 0 : 1, IPPROTO_IPV6);
+			memset(&addr6, 0, sizeof(addr6));
+			addr6.sin6_family = AF_INET6;
+			addr6.sin6_port = htons(port);
+			addr = (struct sockaddr*)&addr6;
+			addrLen = sizeof(addr6);
+		} else {
+			memset(&addr4, 0, sizeof(addr4));
+			addr4.sin_family = AF_INET;
+			addr4.sin_port = htons(port);
+			addr = (struct sockaddr*)&addr4;
+			addrLen = sizeof(addr4);
+		}
+		if (::bind(sd_, addr, addrLen) == 0) {
+			if (::listen(sd_, SOMAXCONN) == 0) {
+				return;
+			}
+		}
+		cybozu::NetErrorNo keep;
+		close(cybozu::DontThrow);
+		throw cybozu::Exception("Socket:bind") << keep;
+	}
+
+	/**
+		return positive if accepted
+		return zero if timeout
+		return negative(-errno) if error
+	*/
+	int queryAcceptNoThrow(int msec = 1000, bool checkWrite = true)
+	{
+		if (sd_ < 0) return -EBADF;
+#ifdef CYBOZU_SOCKET_USE_EPOLL
+		int err;
+		experimental::Epoll ep;
+		if (!ep.init(&err)) return -err;
+		uint32_t events = checkWrite ? EPOLLIN : EPOLLOUT;
+		experimental::AutoLock al(ep, sd_, events);
+		experimental::EpollEvent ev;
+		int ret = ep.wait(&ev, 1, msec);
+		if (ret != 1) return ret;
+		assert(ev.getFd() == sd_);
+		return ret;
+#else
+#ifndef _WIN32
+		// https://msdn.microsoft.com/en-us/library/windows/desktop/ms739169.aspx
+		if (sd_ >= FD_SETSIZE) return -EMFILE;
+#endif
+		struct timeval timeout;
+		timeout.tv_sec = msec / 1000;
+		timeout.tv_usec = (msec % 1000) * 1000;
+		fd_set fds;
+		FD_ZERO(&fds);
+		FD_SET((unsigned)sd_, &fds);
+		int fdNum;
+		if (checkWrite) {
+			fdNum = ::select((int)sd_ + 1, &fds, 0, 0, &timeout);
+		} else {
+			fdNum = ::select((int)sd_ + 1, 0, &fds, 0, &timeout);
+		}
+		if (fdNum < 0) return -errno;
+		return fdNum;
+#endif
+	}
+	/**
+		return true if acceptable, otherwise false
+		return false if one second passed
+		while (!server.queryAccept()) {
+		}
+		client.accept(server);
+	*/
+	bool queryAccept(int msec = 1000, bool checkWrite = true)
+	{
+		int ret = queryAcceptNoThrow(msec, checkWrite);
+		if (ret < 0) throw cybozu::Exception("Socket:queryAccept") << cybozu::NetErrorNo(-ret);
+		return ret > 0;
+	}
+
+	/**
+		accept for server
+	*/
+	void accept(Socket& client, SocketAddr *paddr = 0) const
+	{
+		if (paddr) {
+			struct sockaddr *psa = &paddr->addr_.sa;
+			paddr->addrlen_ = sizeof(paddr->addr_);
+			client.sd_ = ::accept(sd_, psa, &paddr->addrlen_);
+			paddr->verify();
+		} else {
+			client.sd_ = ::accept(sd_, 0, 0);
+		}
+		if (!client.isValid()) throw cybozu::Exception("Socket:accept") << cybozu::NetErrorNo();
+	}
+
+	template<typename T>
+	void setSocketOption(int optname, const T& value, int level = SOL_SOCKET)
+	{
+		bool isOK = setsockopt(sd_, level, optname, cybozu::cast<const char*>(&value), sizeof(T)) == 0;
+		if (!isOK) throw cybozu::Exception("Socket:setSocketOption") << cybozu::NetErrorNo();
+	}
+	template<typename T>
+	void getSocketOption(int optname, T* value, int level = SOL_SOCKET) const
+	{
+		socklen_t len = (socklen_t)sizeof(T);
+		bool isOK = getsockopt(sd_, level, optname, cybozu::cast<char*>(value), &len) == 0;
+		if (!isOK) throw cybozu::Exception("Socket:getSocketOption") << cybozu::NetErrorNo();
+	}
+	int getSocketOption(int optname) const
+	{
+		int ret;
+		getSocketOption(optname, &ret);
+		return ret;
+	}
+	/**
+		setup linger
+	*/
+	void setLinger(uint16_t l_onoff, uint16_t l_linger)
+	{
+		struct linger linger;
+		linger.l_onoff = l_onoff;
+		linger.l_linger = l_linger;
+		setSocketOption(SO_LINGER, &linger);
+	}
+	/**
+		get receive buffer size
+		@retval positive buffer size(byte)
+		@retval -1 error
+	*/
+	int getReceiveBufferSize() const
+	{
+		return getSocketOption(SO_RCVBUF);
+	}
+	/**
+		set receive buffer size
+		@param size [in] buffer size(byte)
+	*/
+	void setReceiveBufferSize(int size)
+	{
+		setSocketOption(SO_RCVBUF, size);
+	}
+	/**
+		get send buffer size
+		@retval positive buffer size(byte)
+		@retval -1 error
+	*/
+	int getSendBufferSize() const
+	{
+		return getSocketOption(SO_SNDBUF);
+	}
+	/**
+		sed send buffer size
+		@param size [in] buffer size(byte)
+	*/
+	void setSendBufferSize(int size)
+	{
+		setSocketOption(SO_SNDBUF, size);
+	}
+	/**
+		set send timeout
+		@param msec [in] msec
+	*/
+	void setSendTimeout(int msec)
+	{
+		setTimeout(SO_SNDTIMEO, msec);
+	}
+	/**
+		set receive timeout
+		@param msec [in] msec
+	*/
+	void setReceiveTimeout(int msec)
+	{
+		setTimeout(SO_RCVTIMEO, msec);
+	}
+	/**
+		get send timeout(msec)
+	*/
+	int getSendTimeout() const
+	{
+		return getTimeout(SO_SNDTIMEO);
+	}
+	/**
+		get receive timeout(msec)
+	*/
+	int getReceiveTimeout() const
+	{
+		return getTimeout(SO_RCVTIMEO);
+	}
+};
+
+} // cybozu
+
+#ifdef _WIN32
+	#pragma warning(pop)
+#endif

From 5298f0d26063e1811b603c9b7216f8da8f293bb9 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 22 Aug 2019 10:51:10 +0900
Subject: [PATCH 053/553] [xbyak] update

---
 src/xbyak/xbyak.h          |   3 +-
 src/xbyak/xbyak_mnemonic.h |   7 +-
 src/xbyak/xbyak_util.h     | 142 +++++++++++++++++++++++++++++++++++++
 3 files changed, 150 insertions(+), 2 deletions(-)

diff --git a/src/xbyak/xbyak.h b/src/xbyak/xbyak.h
index c28a536a..64b4ee3c 100644
--- a/src/xbyak/xbyak.h
+++ b/src/xbyak/xbyak.h
@@ -113,7 +113,7 @@ namespace Xbyak {
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x5790 /* 0xABCD = A.BC(D) */
+	VERSION = 0x5802 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
@@ -551,6 +551,7 @@ inline void Operand::setBit(int bit)
 		idx_ = idx;
 		kind_ = kind;
 		bit_ = bit;
+		if (bit >= 128) return; // keep mask_ and rounding_
 		mask_ = 0;
 		rounding_ = 0;
 		return;
diff --git a/src/xbyak/xbyak_mnemonic.h b/src/xbyak/xbyak_mnemonic.h
index 2733c612..893a588a 100644
--- a/src/xbyak/xbyak_mnemonic.h
+++ b/src/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "5.79"; }
+const char *getVersionString() const { return "5.802"; }
 void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -1684,6 +1684,8 @@ void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 |
 void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
 void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
 void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
+void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
+void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
 void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
 void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }
 void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
@@ -1709,6 +1711,7 @@ void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T
 void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
 void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
 void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
+void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
 void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
 void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
 void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); }
@@ -1769,6 +1772,8 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3
 void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
 void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
 void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
+void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
+void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
 void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
 void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }
 void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }
diff --git a/src/xbyak/xbyak_util.h b/src/xbyak/xbyak_util.h
index c2474c5b..c4e99ae2 100644
--- a/src/xbyak/xbyak_util.h
+++ b/src/xbyak/xbyak_util.h
@@ -54,6 +54,20 @@
 #endif
 #endif
 
+#ifdef XBYAK_USE_VTUNE
+	// -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
+	#include <jitprofiling.h>
+	#ifdef _MSC_VER
+		#pragma comment(lib, "libittnotify.lib")
+	#endif
+	#ifdef __linux__
+		#include <dlfcn.h>
+	#endif
+#endif
+#ifdef __linux__
+	#define XBYAK_USE_PERF
+#endif
+
 namespace Xbyak { namespace util {
 
 typedef enum {
@@ -331,6 +345,8 @@ class Cpu {
 	static const Type tAVX512_VNNI = uint64(1) << 54;
 	static const Type tAVX512_BITALG = uint64(1) << 55;
 	static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56;
+	static const Type tAVX512_BF16 = uint64(1) << 57;
+	static const Type tAVX512_VP2INTERSECT = uint64(1) << 58;
 
 	Cpu()
 		: type_(NONE)
@@ -410,6 +426,12 @@ class Cpu {
 						if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ;
 						if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
 						if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
+						if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
+					}
+					// EAX=07H, ECX=1
+					getCpuidEx(7, 1, data);
+					if (type_ & tAVX512F) {
+						if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
 					}
 				}
 			}
@@ -722,5 +744,125 @@ class StackFrame {
 };
 #endif
 
+class Profiler {
+	int mode_;
+	const char *suffix_;
+	const void *startAddr_;
+#ifdef XBYAK_USE_PERF
+	FILE *fp_;
+#endif
+public:
+	enum {
+		None = 0,
+		Perf = 1,
+		VTune = 2
+	};
+	Profiler()
+		: mode_(None)
+		, suffix_(0)
+		, startAddr_(0)
+#ifdef XBYAK_USE_PERF
+		, fp_(0)
+#endif
+	{
+	}
+	// append suffix to funcName
+	void setNameSuffix(const char *suffix)
+	{
+		suffix_ = suffix;
+	}
+	void setStartAddr(const void *startAddr)
+	{
+		startAddr_ = startAddr;
+	}
+	void init(int mode)
+	{
+		mode_ = None;
+		switch (mode) {
+		default:
+		case None:
+			return;
+		case Perf:
+#ifdef XBYAK_USE_PERF
+			close();
+			{
+				const int pid = getpid();
+				char name[128];
+				snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid);
+				fp_ = fopen(name, "wb");
+				if (fp_ == 0) {
+					fprintf(stderr, "can't open %s\n", name);
+					return;
+				}
+			}
+			mode_ = Perf;
+#endif
+			return;
+		case VTune:
+#ifdef XBYAK_USE_VTUNE
+			dlopen("dummy", RTLD_LAZY); // force to load dlopen to enable jit profiling
+			if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) {
+				fprintf(stderr, "VTune profiling is not active\n");
+				return;
+			}
+			mode_ = VTune;
+#endif
+			return;
+		}
+	}
+	~Profiler()
+	{
+		close();
+	}
+	void close()
+	{
+#ifdef XBYAK_USE_PERF
+		if (fp_ == 0) return;
+		fclose(fp_);
+		fp_ = 0;
+#endif
+	}
+	void set(const char *funcName, const void *startAddr, size_t funcSize) const
+	{
+		if (mode_ == None) return;
+#if !defined(XBYAK_USE_PERF) && !defined(XBYAK_USE_VTUNE)
+		(void)funcName;
+		(void)startAddr;
+		(void)funcSize;
+#endif
+#ifdef XBYAK_USE_PERF
+		if (mode_ == Perf) {
+			if (fp_ == 0) return;
+			fprintf(fp_, "%llx %zx %s%s\n", (long long)startAddr, funcSize, funcName, suffix_);
+		}
+#endif
+#ifdef XBYAK_USE_VTUNE
+		if (mode_ != VTune) return;
+		char className[] = "";
+		char fileName[] = "";
+		iJIT_Method_Load jmethod = {};
+		jmethod.method_id = iJIT_GetNewMethodID();
+		jmethod.class_file_name = className;
+		jmethod.source_file_name = fileName;
+		jmethod.method_load_address = const_cast<void*>(startAddr);
+		jmethod.method_size = funcSize;
+		jmethod.line_number_size = 0;
+		char buf[128];
+		snprintf(buf, sizeof(buf), "%s%s", funcName, suffix_);
+		jmethod.method_name = buf;
+		iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod);
+#endif
+	}
+	/*
+		for continuous set
+		funcSize = endAddr - <previous set endAddr>
+	*/
+	void set(const char *funcName, const void *endAddr)
+	{
+		set(funcName, startAddr_, (size_t)endAddr - (size_t)startAddr_);
+		startAddr_ = endAddr;
+	}
+};
+
 } } // end of util
 #endif

From b26ee426eb94e02931da181050199f9c08a2519c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 22 Aug 2019 13:59:12 +0900
Subject: [PATCH 054/553] support jit profiler for VTune

---
 Makefile               |  7 ++++
 src/fp_generator.hpp   | 78 ++++++------------------------------------
 src/xbyak/xbyak_util.h |  3 +-
 3 files changed, 19 insertions(+), 69 deletions(-)

diff --git a/Makefile b/Makefile
index 39a415cd..364c17e8 100644
--- a/Makefile
+++ b/Makefile
@@ -28,6 +28,13 @@ endif
 ifeq ($(MCL_USE_XBYAK),0)
   CFLAGS+=-DMCL_DONT_USE_XBYAK
 endif
+ifeq ($(MCL_USE_PROF),1)
+  CFLAGS+=-DMCL_USE_PROF
+endif
+ifeq ($(MCL_USE_PROF),2)
+  CFLAGS+=-DMCL_USE_PROF -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/
+  LDFLAGS+=-L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
+endif
 ##################################################################
 MCL_LIB=$(LIB_DIR)/libmcl.a
 MCL_SNAME=mcl
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 97bb8861..6185bb0e 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -127,71 +127,6 @@ if (rm.isReg()) { \
 
 namespace fp {
 
-struct Profiler {
-	FILE *fp_;
-	const char *suf_;
-	const uint8_t *prev_;
-	Profiler()
-		: fp_(0)
-		, suf_(0)
-		, prev_(0)
-	{
-	}
-	void init(const char *suf, const uint8_t *prev)
-	{
-#ifdef __linux__
-		close();
-		const char *s = getenv("MCL_PERF");
-		if (s == 0 || strcmp(s, "1") != 0) return;
-		fprintf(stderr, "use perf suf=%s\n", suf);
-		suf_ = suf;
-		const int pid = getpid();
-		char name[128];
-		snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid);
-		fp_ = fopen(name, "wb");
-		if (fp_ == 0) throw cybozu::Exception("PerMap") << name;
-		prev_ = prev;
-#else
-		(void)suf;
-		(void)prev;
-#endif
-	}
-	~Profiler()
-	{
-		close();
-	}
-	void close()
-	{
-#ifdef __linux__
-		if (fp_ == 0) return;
-		fclose(fp_);
-		fp_ = 0;
-		prev_ = 0;
-#endif
-	}
-	void set(const uint8_t *p, size_t n, const char *name) const
-	{
-#ifdef __linux__
-		if (fp_ == 0) return;
-		fprintf(fp_, "%llx %zx %s%s\n", (long long)p, n, name, suf_);
-#else
-		(void)p;
-		(void)n;
-		(void)name;
-#endif
-	}
-	void set(const char *name, const uint8_t *cur)
-	{
-#ifdef __linux__
-		set(prev_, cur - prev_, name);
-		prev_ = cur;
-#else
-		(void)name;
-		(void)cur;
-#endif
-	}
-};
-
 struct FpGenerator : Xbyak::CodeGenerator {
 	typedef Xbyak::RegExp RegExp;
 	typedef Xbyak::Reg64 Reg64;
@@ -268,7 +203,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	int pn_;
 	int FpByte_;
 	bool isFullBit_;
-	Profiler prof_;
+	Xbyak::util::Profiler prof_;
 
 	/*
 		@param op [in] ; use op.p, op.N, op.isFullBit
@@ -331,9 +266,16 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		FpByte_ = int(op.maxN * sizeof(uint64_t));
 		isFullBit_ = op.isFullBit;
 //		printf("p=%p, pn_=%d, isFullBit_=%d\n", p_, pn_, isFullBit_);
+#ifdef MCL_USE_PROF
 		static char suf[] = "_0";
-		prof_.init(suf, getCurr());
-		suf[1]++;
+		const char *s = getenv("MCL_PROF");
+		if (s && s[0] && s[1] == '\0') {
+			prof_.init(s[0] - '0');
+			prof_.setStartAddr(getCurr());
+			prof_.setNameSuffix(suf);
+			suf[1]++;
+		}
+#endif
 
 		op.fp_addPre = gen_addSubPre(true, pn_);
 		prof_.set("Fp_addPre", getCurr());
diff --git a/src/xbyak/xbyak_util.h b/src/xbyak/xbyak_util.h
index c4e99ae2..04c661c3 100644
--- a/src/xbyak/xbyak_util.h
+++ b/src/xbyak/xbyak_util.h
@@ -789,7 +789,7 @@ class Profiler {
 				const int pid = getpid();
 				char name[128];
 				snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid);
-				fp_ = fopen(name, "wb");
+				fp_ = fopen(name, "a+");
 				if (fp_ == 0) {
 					fprintf(stderr, "can't open %s\n", name);
 					return;
@@ -834,6 +834,7 @@ class Profiler {
 		if (mode_ == Perf) {
 			if (fp_ == 0) return;
 			fprintf(fp_, "%llx %zx %s%s\n", (long long)startAddr, funcSize, funcName, suffix_);
+			fflush(fp_);
 		}
 #endif
 #ifdef XBYAK_USE_VTUNE

From 40a2ec8fd8a2a9dfcf48fb82df094bb78b8a13ee Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 22 Aug 2019 21:26:18 +0900
Subject: [PATCH 055/553] fix include dependency of ec.hpp

---
 include/mcl/ec.hpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 36862391..1ab508a1 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -7,9 +7,7 @@
 	http://opensource.org/licenses/BSD-3-Clause
 */
 #include <stdlib.h>
-#include <cybozu/exception.hpp>
-#include <mcl/op.hpp>
-#include <mcl/util.hpp>
+#include <mcl/fp.hpp>
 #include <mcl/ecparam.hpp>
 
 //#define MCL_EC_USE_AFFINE

From 6761619ba6f7cd79bb215a479cbc20f8fd428874 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 26 Aug 2019 15:25:55 +0900
Subject: [PATCH 056/553] use secp256k1 in sample/ecdh.cpp

---
 sample/ecdh.cpp | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/sample/ecdh.cpp b/sample/ecdh.cpp
index 4fca3c0c..14ab70dd 100644
--- a/sample/ecdh.cpp
+++ b/sample/ecdh.cpp
@@ -3,43 +3,48 @@
 */
 #include <iostream>
 #include <fstream>
-#include <cybozu/random_generator.hpp>
-#include <mcl/fp.hpp>
-#include <mcl/ecparam.hpp>
+#include <mcl/ec.hpp>
 
-typedef mcl::FpT<mcl::FpTag> Fp;
-typedef mcl::FpT<mcl::ZnTag> Zn;
+typedef mcl::FpT<mcl::FpTag, 256> Fp;
+typedef mcl::FpT<mcl::ZnTag, 256> Fr;
 typedef mcl::EcT<Fp> Ec;
 
+void put(const char *msg, const Ec& P)
+{
+	std::cout << msg << P.getStr(mcl::IoEcAffine | 16) << std::endl;
+}
+
 int main()
 {
 	/*
 		Ec is an elliptic curve over Fp
-		the cyclic group of <P> is isomorphic to Zn
+		the cyclic group of <P> is isomorphic to Fr
 	*/
 	Ec P;
-	mcl::initCurve<Ec, Zn>(MCL_SECP192K1, &P);
+	mcl::initCurve<Ec, Fr>(MCL_SECP256K1, &P);
+	put("P=", P);
+
 	/*
 		Alice setups a private key a and public key aP
 	*/
-	Zn a;
+	Fr a;
 	Ec aP;
 
 	a.setByCSPRNG();
 	Ec::mul(aP, P, a); // aP = a * P;
 
-	std::cout << "aP=" << aP << std::endl;
+	put("aP=", aP);
 
 	/*
 		Bob setups a private key b and public key bP
 	*/
-	Zn b;
+	Fr b;
 	Ec bP;
 
 	b.setByCSPRNG();
 	Ec::mul(bP, P, b); // bP = b * P;
 
-	std::cout << "bP=" << bP << std::endl;
+	put("bP=", bP);
 
 	Ec abP, baP;
 

From 3ec6ea8a0e0c4deff15970117293202357ac51aa Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 28 Aug 2019 02:58:03 +0900
Subject: [PATCH 057/553] remove unnecessary set

---
 include/mcl/bn.hpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 53a456fd..85951c73 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1613,23 +1613,22 @@ inline void millerLoop(Fp12& f, const G1& P_, const G2& Q_)
 	if (BN::param.useNAF) {
 		G2::neg(negQ, Q);
 	}
-	Fp6 d, e, l;
-	d = e = l = 1;
+	Fp6 d, e;
 	G1 adjP = makeAdjP(P);
 	dblLine(d, T, adjP);
-	addLine(l, T, Q, P);
-	mulSparse2(f, d, l);
+	addLine(e, T, Q, P);
+	mulSparse2(f, d, e);
 	for (size_t i = 2; i < BN::param.siTbl.size(); i++) {
-		dblLine(l, T, adjP);
+		dblLine(e, T, adjP);
 		Fp12::sqr(f, f);
-		mulSparse(f, l);
+		mulSparse(f, e);
 		if (BN::param.siTbl[i]) {
 			if (BN::param.siTbl[i] > 0) {
-				addLine(l, T, Q, P);
+				addLine(e, T, Q, P);
 			} else {
-				addLine(l, T, negQ, P);
+				addLine(e, T, negQ, P);
 			}
-			mulSparse(f, l);
+			mulSparse(f, e);
 		}
 	}
 	if (BN::param.z < 0) {

From b74ec0cf1be9d53e74bc703928df2f19eb83c63b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 28 Aug 2019 03:52:23 +0900
Subject: [PATCH 058/553] refactor millerLoop

---
 include/mcl/bn.hpp | 104 ++++++++++++++++++++++-----------------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 85951c73..90a202b3 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1224,7 +1224,7 @@ inline void addLine(Fp6& l, G2& R, const G2& Q, const G1& P)
 inline void mulFp6cb_by_G1xy(Fp6& y, const Fp6& x, const G1& P)
 {
 	assert(P.isNormalized());
-	if (&y != &x) y.a = x.a;
+	y.a = x.a;
 	Fp2::mulFp(y.c, x.c, P.x);
 	Fp2::mulFp(y.b, x.b, P.y);
 }
@@ -1560,16 +1560,16 @@ inline void expHardPartBN(Fp12& y, const Fp12& x)
 #endif
 }
 /*
+	adjP = (P.x * 3, -P.y)
 	remark : returned value is NOT on a curve
 */
-inline G1 makeAdjP(const G1& P)
+inline void makeAdjP(G1& adjP, const G1& P)
 {
-	G1 adjP;
-	Fp::add(adjP.x, P.x, P.x);
-	adjP.x += P.x;
+	Fp x2;
+	Fp::add(x2, P.x, P.x);
+	Fp::add(adjP.x, x2, P.x);
 	Fp::neg(adjP.y, P.y);
-	adjP.z = 1;
-	return adjP;
+	// adjP.z.clear(); // not used
 }
 
 } // mcl::bn::local
@@ -1614,7 +1614,8 @@ inline void millerLoop(Fp12& f, const G1& P_, const G2& Q_)
 		G2::neg(negQ, Q);
 	}
 	Fp6 d, e;
-	G1 adjP = makeAdjP(P);
+	G1 adjP;
+	makeAdjP(adjP, P);
 	dblLine(d, T, adjP);
 	addLine(e, T, Q, P);
 	mulSparse2(f, d, e);
@@ -1636,12 +1637,11 @@ inline void millerLoop(Fp12& f, const G1& P_, const G2& Q_)
 		Fp6::neg(f.b, f.b);
 	}
 	if (BN::param.isBLS12) return;
-	G2 Q1, Q2;
-	Frobenius(Q1, Q);
-	Frobenius(Q2, Q1);
-	G2::neg(Q2, Q2);
-	addLine(d, T, Q1, P);
-	addLine(e, T, Q2, P);
+	Frobenius(Q, Q);
+	addLine(d, T, Q, P);
+	Frobenius(Q, Q);
+	G2::neg(Q, Q);
+	addLine(e, T, Q, P);
 	Fp12 ft;
 	mulSparse2(ft, d, e);
 	f *= ft;
@@ -1687,12 +1687,11 @@ inline void precomputeG2(Fp6 *Qcoeff, const G2& Q_)
 		G2::neg(T, T);
 	}
 	if (BN::param.isBLS12) return;
-	G2 Q1, Q2;
-	Frobenius(Q1, Q);
-	Frobenius(Q2, Q1);
-	G2::neg(Q2, Q2);
-	addLineWithoutP(Qcoeff[idx++], T, Q1);
-	addLineWithoutP(Qcoeff[idx++], T, Q2);
+	Frobenius(Q, Q);
+	addLineWithoutP(Qcoeff[idx++], T, Q);
+	Frobenius(Q, Q);
+	G2::neg(Q, Q);
+	addLineWithoutP(Qcoeff[idx++], T, Q);
 	assert(idx == BN::param.precomputedQcoeffSize);
 }
 /*
@@ -1720,9 +1719,10 @@ inline void precomputedMillerLoop(Fp12& f, const G1& P_, const Fp6* Qcoeff)
 {
 	G1 P(P_);
 	P.normalize();
-	G1 adjP = makeAdjP(P);
+	G1 adjP;
+	makeAdjP(adjP, P);
 	size_t idx = 0;
-	Fp6 d, e, l;
+	Fp6 d, e;
 	mulFp6cb_by_G1xy(d, Qcoeff[idx], adjP);
 	idx++;
 
@@ -1730,14 +1730,14 @@ inline void precomputedMillerLoop(Fp12& f, const G1& P_, const Fp6* Qcoeff)
 	idx++;
 	mulSparse2(f, d, e);
 	for (size_t i = 2; i < BN::param.siTbl.size(); i++) {
-		mulFp6cb_by_G1xy(l, Qcoeff[idx], adjP);
+		mulFp6cb_by_G1xy(e, Qcoeff[idx], adjP);
 		idx++;
 		Fp12::sqr(f, f);
-		mulSparse(f, l);
+		mulSparse(f, e);
 		if (BN::param.siTbl[i]) {
-			mulFp6cb_by_G1xy(l, Qcoeff[idx], P);
+			mulFp6cb_by_G1xy(e, Qcoeff[idx], P);
 			idx++;
-			mulSparse(f, l);
+			mulSparse(f, e);
 		}
 	}
 	if (BN::param.z < 0) {
@@ -1778,16 +1778,16 @@ inline void precomputedMillerLoop2mixed(Fp12& f, const G1& P1_, const G2& Q1_, c
 	if (BN::param.useNAF) {
 		G2::neg(negQ1, Q1);
 	}
-	G1 adjP1 = makeAdjP(P1);
-	G1 adjP2 = makeAdjP(P2);
+	G1 adjP1, adjP2;
+	makeAdjP(adjP1, P1);
+	makeAdjP(adjP2, P2);
 	size_t idx = 0;
-	Fp6 d1, d2, e1, e2, l1, l2;
+	Fp6 d1, d2, e1, e2;
 	dblLine(d1, T, adjP1);
 	mulFp6cb_by_G1xy(d2, Q2coeff[idx], adjP2);
 	idx++;
 
 	Fp12 f1, f2;
-	e1 = 1;
 	addLine(e1, T, Q1, P1);
 	mulSparse2(f1, d1, e1);
 
@@ -1796,21 +1796,21 @@ inline void precomputedMillerLoop2mixed(Fp12& f, const G1& P1_, const G2& Q1_, c
 	Fp12::mul(f, f1, f2);
 	idx++;
 	for (size_t i = 2; i < BN::param.siTbl.size(); i++) {
-		dblLine(l1, T, adjP1);
-		mulFp6cb_by_G1xy(l2, Q2coeff[idx], adjP2);
+		dblLine(e1, T, adjP1);
+		mulFp6cb_by_G1xy(e2, Q2coeff[idx], adjP2);
 		idx++;
 		Fp12::sqr(f, f);
-		mulSparse2(f1, l1, l2);
+		mulSparse2(f1, e1, e2);
 		f *= f1;
 		if (BN::param.siTbl[i]) {
 			if (BN::param.siTbl[i] > 0) {
-				addLine(l1, T, Q1, P1);
+				addLine(e1, T, Q1, P1);
 			} else {
-				addLine(l1, T, negQ1, P1);
+				addLine(e1, T, negQ1, P1);
 			}
-			mulFp6cb_by_G1xy(l2, Q2coeff[idx], P2);
+			mulFp6cb_by_G1xy(e2, Q2coeff[idx], P2);
 			idx++;
-			mulSparse2(f1, l1, l2);
+			mulSparse2(f1, e1, e2);
 			f *= f1;
 		}
 	}
@@ -1819,14 +1819,13 @@ inline void precomputedMillerLoop2mixed(Fp12& f, const G1& P1_, const G2& Q1_, c
 		Fp6::neg(f.b, f.b);
 	}
 	if (BN::param.isBLS12) return;
-	G2 Q11, Q12;
-	Frobenius(Q11, Q1);
-	Frobenius(Q12, Q11);
-	G2::neg(Q12, Q12);
-	addLine(d1, T, Q11, P1);
+	Frobenius(Q1, Q1);
+	addLine(d1, T, Q1, P1);
 	mulFp6cb_by_G1xy(d2, Q2coeff[idx], P2);
 	idx++;
-	addLine(e1, T, Q12, P1);
+	Frobenius(Q1, Q1);
+	G2::neg(Q1, Q1);
+	addLine(e1, T, Q1, P1);
 	mulFp6cb_by_G1xy(e2, Q2coeff[idx], P2);
 	idx++;
 	mulSparse2(f1, d1, e1);
@@ -1843,10 +1842,11 @@ inline void precomputedMillerLoop2(Fp12& f, const G1& P1_, const Fp6* Q1coeff, c
 	G1 P1(P1_), P2(P2_);
 	P1.normalize();
 	P2.normalize();
-	G1 adjP1 = makeAdjP(P1);
-	G1 adjP2 = makeAdjP(P2);
+	G1 adjP1, adjP2;
+	makeAdjP(adjP1, P1);
+	makeAdjP(adjP2, P2);
 	size_t idx = 0;
-	Fp6 d1, d2, e1, e2, l1, l2;
+	Fp6 d1, d2, e1, e2;
 	mulFp6cb_by_G1xy(d1, Q1coeff[idx], adjP1);
 	mulFp6cb_by_G1xy(d2, Q2coeff[idx], adjP2);
 	idx++;
@@ -1860,17 +1860,17 @@ inline void precomputedMillerLoop2(Fp12& f, const G1& P1_, const Fp6* Q1coeff, c
 	Fp12::mul(f, f1, f2);
 	idx++;
 	for (size_t i = 2; i < BN::param.siTbl.size(); i++) {
-		mulFp6cb_by_G1xy(l1, Q1coeff[idx], adjP1);
-		mulFp6cb_by_G1xy(l2, Q2coeff[idx], adjP2);
+		mulFp6cb_by_G1xy(e1, Q1coeff[idx], adjP1);
+		mulFp6cb_by_G1xy(e2, Q2coeff[idx], adjP2);
 		idx++;
 		Fp12::sqr(f, f);
-		mulSparse2(f1, l1, l2);
+		mulSparse2(f1, e1, e2);
 		f *= f1;
 		if (BN::param.siTbl[i]) {
-			mulFp6cb_by_G1xy(l1, Q1coeff[idx], P1);
-			mulFp6cb_by_G1xy(l2, Q2coeff[idx], P2);
+			mulFp6cb_by_G1xy(e1, Q1coeff[idx], P1);
+			mulFp6cb_by_G1xy(e2, Q2coeff[idx], P2);
 			idx++;
-			mulSparse2(f1, l1, l2);
+			mulSparse2(f1, e1, e2);
 			f *= f1;
 		}
 	}

From 1d2f4cb29b522ae93791943e421f8f0639dfaf4c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 28 Aug 2019 05:03:25 +0900
Subject: [PATCH 059/553] optimize millerLoopVec

---
 include/mcl/bn.hpp | 98 ++++++++++++++++++++++++++++++++++++++++------
 test/bench.hpp     | 15 +++++++
 2 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 90a202b3..15ebca8c 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1633,10 +1633,12 @@ inline void millerLoop(Fp12& f, const G1& P_, const G2& Q_)
 		}
 	}
 	if (BN::param.z < 0) {
-		G2::neg(T, T);
 		Fp6::neg(f.b, f.b);
 	}
 	if (BN::param.isBLS12) return;
+	if (BN::param.z < 0) {
+		G2::neg(T, T);
+	}
 	Frobenius(Q, Q);
 	addLine(d, T, Q, P);
 	Frobenius(Q, Q);
@@ -1900,24 +1902,98 @@ inline void precomputedMillerLoop2mixed(Fp12& f, const G1& P1, const G2& Q1, con
 }
 #endif
 
+template<size_t N>
+inline void millerLoopVecN(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
+{
+	assert(n <= N);
+	G1 P[N];
+	G2 Q[N];
+	// remove zero elements
+	{
+		size_t realN = 0;
+		for (size_t i = 0; i < n; i++) {
+			if (!Pvec[i].isZero() && !Qvec[i].isZero()) {
+				G1::normalize(P[realN], Pvec[i]);
+				G2::normalize(Q[realN], Qvec[i]);
+				realN++;
+			}
+		}
+		if (realN <= 0) {
+			f = 1;
+			return;
+		}
+		n = realN; // update n
+	}
+	// all P[] and Q[] are not zero
+	G2 T[N], negQ[N];
+	G1 adjP[N];
+	Fp6 d, e;
+	for (size_t i = 0; i < n; i++) {
+		T[i] = Q[i];
+		if (BN::param.useNAF) {
+			G2::neg(negQ[i], Q[i]);
+		}
+		makeAdjP(adjP[i], P[i]);
+		dblLine(d, T[i], adjP[i]);
+		addLine(e, T[i], Q[i], P[i]);
+		if (i == 0) {
+			mulSparse2(f, d, e);
+		} else {
+			Fp12 ft;
+			mulSparse2(ft, d, e);
+			f *= ft;
+		}
+	}
+	for (size_t j = 2; j < BN::param.siTbl.size(); j++) {
+		Fp12::sqr(f, f);
+		for (size_t i = 0; i < n; i++) {
+			dblLine(e, T[i], adjP[i]);
+			mulSparse(f, e);
+			int v = BN::param.siTbl[j];
+			if (v) {
+				if (v > 0) {
+					addLine(e, T[i], Q[i], P[i]);
+				} else {
+					addLine(e, T[i], negQ[i], P[i]);
+				}
+				mulSparse(f, e);
+			}
+		}
+	}
+	if (BN::param.z < 0) {
+		Fp6::neg(f.b, f.b);
+	}
+	if (BN::param.isBLS12) return;
+	for (size_t i = 0; i < n; i++) {
+		if (BN::param.z < 0) {
+			G2::neg(T[i], T[i]);
+		}
+		Frobenius(Q[i], Q[i]);
+		addLine(d, T[i], Q[i], P[i]);
+		Frobenius(Q[i], Q[i]);
+		G2::neg(Q[i], Q[i]);
+		addLine(e, T[i], Q[i], P[i]);
+		Fp12 ft;
+		mulSparse2(ft, d, e);
+		f *= ft;
+	}
+}
 /*
 	f = prod_{i=0}^{n-1} millerLoop(Pvec[i], Qvec[i])
 */
 inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
 {
-	if (n == 0) {
-		f = 1;
-		return;
-	}
-	millerLoop(f, Pvec[0], Qvec[0]);
-	for (size_t i = 1; i < n; i++) {
-		Fp12 g;
-		millerLoop(g, Pvec[i], Qvec[i]);
-		f *= g;
+	const size_t N = 16;
+	size_t remain = fp::min_(N, n);
+	millerLoopVecN<N>(f, Pvec, Qvec, remain);
+	for (size_t i = remain; i < n; i += N) {
+		remain = fp::min_(n - i, N);
+		Fp12 ft;
+		millerLoopVecN<N>(ft, Pvec + i, Qvec + i, remain);
+		f *= ft;
 	}
 }
 
-
 inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calcG1(P, x); }
 inline void mapToG2(bool *pb, G2& P, const Fp2& x) { *pb = BN::param.mapTo.calcG2(P, x); }
 #ifndef CYBOZU_DONT_USE_EXCEPTION
diff --git a/test/bench.hpp b/test/bench.hpp
index cc1639e6..cf2a7281 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -141,6 +141,21 @@ void testBench(const G1& P, const G2& Q)
 	CYBOZU_BENCH_C("precomputeG2  ", C, precomputeG2, Qcoeff, Q);
 	precomputeG2(Qcoeff, Q);
 	CYBOZU_BENCH_C("precomputedML ", C, precomputedMillerLoop, e2, P, Qcoeff);
+	const size_t n = 7;
+	G1 Pvec[n];
+	G2 Qvec[n];
+	for (size_t i = 0; i < n; i++) {
+		char d = (char)(i + 1);
+		hashAndMapToG1(Pvec[i], &d, 1);
+		hashAndMapToG2(Qvec[i], &d, 1);
+	}
+	e2 = 1;
+	for (size_t i = 0; i < n; i++) {
+		millerLoop(e1, Pvec[i], Qvec[i]);
+		e2 *= e1;
+	}
+	CYBOZU_BENCH_C("millerLoopVec ", 3000, millerLoopVec, e1, Pvec, Qvec, n);
+	CYBOZU_TEST_EQUAL(e1, e2);
 }
 
 inline void SquareRootPrecomputeTest(const mpz_class& p)

From 7df73796d504fceeebdc695fd56d60b14ebd989c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 28 Aug 2019 05:07:44 +0900
Subject: [PATCH 060/553] remove assert

---
 include/mcl/bn.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 15ebca8c..0a5744be 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1223,7 +1223,6 @@ inline void addLine(Fp6& l, G2& R, const G2& Q, const G1& P)
 }
 inline void mulFp6cb_by_G1xy(Fp6& y, const Fp6& x, const G1& P)
 {
-	assert(P.isNormalized());
 	y.a = x.a;
 	Fp2::mulFp(y.c, x.c, P.x);
 	Fp2::mulFp(y.b, x.b, P.y);

From 1d7c99e8a7f6cef5484b6a53440d3e398d7b6771 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 28 Aug 2019 05:18:23 +0900
Subject: [PATCH 061/553] add millerLoopVec

---
 include/mcl/bn.h               |  2 ++
 include/mcl/impl/bn_c_impl.hpp |  4 ++++
 test/bn_c_test.hpp             | 20 ++++++++++++++++++++
 3 files changed, 26 insertions(+)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index e241f6d8..20212da1 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -407,6 +407,8 @@ MCLBN_DLL_API void mclBnGT_pow(mclBnGT *z, const mclBnGT *x, const mclBnFr *y);
 MCLBN_DLL_API void mclBn_pairing(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y);
 MCLBN_DLL_API void mclBn_finalExp(mclBnGT *y, const mclBnGT *x);
 MCLBN_DLL_API void mclBn_millerLoop(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y);
+// z = prod_{i=0}^{n-1} millerLoop(x[i], y[i])
+MCLBN_DLL_API void mclBn_millerLoopVec(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y, mclSize n);
 
 // return precomputedQcoeffSize * sizeof(Fp6) / sizeof(uint64_t)
 MCLBN_DLL_API int mclBn_getUint64NumToPrecompute(void);
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index a081ba9e..291ff713 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -527,6 +527,10 @@ void mclBn_millerLoop(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y)
 {
 	millerLoop(*cast(z), *cast(x), *cast(y));
 }
+void mclBn_millerLoopVec(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y, mclSize n)
+{
+	millerLoopVec(*cast(z), cast(x), cast(y), n);
+}
 int mclBn_getUint64NumToPrecompute(void)
 {
 	return int(BN::param.precomputedQcoeffSize * sizeof(Fp6) / sizeof(uint64_t));
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 7aaf94a5..654ab900 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -367,6 +367,26 @@ CYBOZU_TEST_AUTO(precomputed)
 	CYBOZU_TEST_ASSERT(mclBnGT_isEqual(&e1, &f3));
 }
 
+CYBOZU_TEST_AUTO(millerLoopVec)
+{
+	const size_t n = 7;
+	mclBnG1 Pvec[n];
+	mclBnG2 Qvec[n];
+	for (size_t i = 0; i < n; i++) {
+		char d = (char)(i + 1);
+		mclBnG1_hashAndMapTo(&Pvec[i], &d, 1);
+		mclBnG2_hashAndMapTo(&Qvec[i], &d, 1);
+	}
+	mclBnGT e1, e2;
+	mclBnGT_setInt(&e2, 1);
+	for (size_t i = 0; i < n; i++) {
+		mclBn_millerLoop(&e1, &Pvec[i], &Qvec[i]);
+		mclBnGT_mul(&e2, &e2, &e1);
+	}
+	mclBn_millerLoopVec(&e1, Pvec, Qvec, n);
+	CYBOZU_TEST_ASSERT(mclBnGT_isEqual(&e1, &e2));
+}
+
 CYBOZU_TEST_AUTO(serialize)
 {
 	const size_t FrSize = mclBn_getFrByteSize();

From 68f332d0aeefa70692e6eb45e278c2b9fe1da6bb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 30 Aug 2019 11:13:28 +0900
Subject: [PATCH 062/553] change members of mclBn{Fp,G1,G2}

---
 include/mcl/bn.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 20212da1..42219895 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -75,30 +75,30 @@ typedef struct mclBnFp2 mclBnFp2;
 
 #else
 
+typedef struct {
+	uint64_t d[MCLBN_FP_UNIT_SIZE];
+} mclBnFp;
+
+typedef struct {
+	mclBnFp d[2];
+} mclBnFp2;
+
 typedef struct {
 	uint64_t d[MCLBN_FR_UNIT_SIZE];
 } mclBnFr;
 
 typedef struct {
-	uint64_t d[MCLBN_FP_UNIT_SIZE * 3];
+	mclBnFp x, y, z;
 } mclBnG1;
 
 typedef struct {
-	uint64_t d[MCLBN_FP_UNIT_SIZE * 2 * 3];
+	mclBnFp2 x, y, z;
 } mclBnG2;
 
 typedef struct {
-	uint64_t d[MCLBN_FP_UNIT_SIZE * 12];
+	mclBnFp d[12];
 } mclBnGT;
 
-typedef struct {
-	uint64_t d[MCLBN_FP_UNIT_SIZE];
-} mclBnFp;
-
-typedef struct {
-	mclBnFp d[2];
-} mclBnFp2;
-
 #endif
 
 #include <mcl/curve_type.h>

From e8dd8a68a0f6b6edb00885ed780800476765fdf3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 3 Sep 2019 14:29:44 +0900
Subject: [PATCH 063/553] add squareRoot into bn.h

---
 include/mcl/bn.h               |  6 +++
 include/mcl/impl/bn_c_impl.hpp | 13 +++++++
 test/bn_c_test.hpp             | 69 ++++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 42219895..42c566fc 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -316,6 +316,12 @@ MCLBN_DLL_API void mclBnFp_sub(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
 MCLBN_DLL_API void mclBnFp_mul(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
 MCLBN_DLL_API void mclBnFp_div(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
 
+// y is one of square root of x
+// return 0 if success else -1
+MCLBN_DLL_API int mclBnFr_squareRoot(mclBnFr *y, const mclBnFr *x);
+MCLBN_DLL_API int mclBnFp_squareRoot(mclBnFp *y, const mclBnFp *x);
+MCLBN_DLL_API int mclBnFp2_squareRoot(mclBnFp2 *y, const mclBnFp2 *x);
+
 ////////////////////////////////////////////////
 // set zero
 MCLBN_DLL_API void mclBnG1_clear(mclBnG1 *x);
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 291ff713..ce674555 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -269,6 +269,19 @@ void mclBnFp_div(mclBnFp *z, const mclBnFp *x, const mclBnFp *y)
 	Fp::div(*cast(z),*cast(x), *cast(y));
 }
 
+int mclBnFr_squareRoot(mclBnFr *y, const mclBnFr *x)
+{
+	return Fr::squareRoot(*cast(y), *cast(x)) ? 0 : -1;
+}
+int mclBnFp_squareRoot(mclBnFp *y, const mclBnFp *x)
+{
+	return Fp::squareRoot(*cast(y), *cast(x)) ? 0 : -1;
+}
+int mclBnFp2_squareRoot(mclBnFp2 *y, const mclBnFp2 *x)
+{
+	return Fp2::squareRoot(*cast(y), *cast(x)) ? 0 : -1;
+}
+
 ////////////////////////////////////////////////
 // set zero
 void mclBnG1_clear(mclBnG1 *x)
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 654ab900..62d7871a 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -760,6 +760,75 @@ CYBOZU_TEST_AUTO(Fp2)
 	CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&x1, &x2));
 }
 
+CYBOZU_TEST_AUTO(squareRootFr)
+{
+	mclBnFr x, y, y2;
+	for (int i = 0; i < 10; i++) {
+		mclBnFr_setInt(&x, i * i);
+		CYBOZU_TEST_EQUAL(mclBnFr_squareRoot(&y, &x), 0);
+		mclBnFr_sqr(&y2, &y);
+		CYBOZU_TEST_EQUAL(mclBnFr_isEqual(&x, &y2), 1);
+	}
+	char buf[128];
+	mclBnFr_setInt(&x, -1);
+	CYBOZU_TEST_ASSERT(mclBnFr_serialize(buf, sizeof(buf), &x) > 0);
+	int mod8 = (buf[0] + 1) & 7;
+	/*
+		(2)
+		(p) = (-1)^((p^2-1)/8) = 1 if and only if there is x s.t. x^2 = 2 mod p
+	*/
+	bool hasSquareRoot = (((mod8 * mod8 - 1) / 8) & 1) == 0;
+	printf("Fr:hasSquareRoot=%d\n", hasSquareRoot);
+	mclBnFr_setInt(&x, 2);
+	CYBOZU_TEST_EQUAL(mclBnFr_squareRoot(&y, &x), hasSquareRoot ? 0 : -1);
+	if (hasSquareRoot) {
+		mclBnFr_sqr(&y2, &y);
+		CYBOZU_TEST_EQUAL(mclBnFr_isEqual(&x, &y2), 1);
+	}
+}
+
+CYBOZU_TEST_AUTO(squareRootFp)
+{
+	mclBnFp x, y, y2;
+	for (int i = 0; i < 10; i++) {
+		mclBnFp_setInt(&x, i * i);
+		CYBOZU_TEST_EQUAL(mclBnFp_squareRoot(&y, &x), 0);
+		mclBnFp_sqr(&y2, &y);
+		CYBOZU_TEST_EQUAL(mclBnFp_isEqual(&x, &y2), 1);
+	}
+	char buf[128];
+	mclBnFp_setInt(&x, -1);
+	CYBOZU_TEST_ASSERT(mclBnFp_serialize(buf, sizeof(buf), &x) > 0);
+	int mod8 = (buf[0] + 1) & 7;
+	/*
+		(2)
+		(p) = (-1)^((p^2-1)/8) = 1 if and only if there is x s.t. x^2 = 2 mod p
+	*/
+	bool hasSquareRoot = (((mod8 * mod8 - 1) / 8) & 1) == 0;
+	printf("Fp:hasSquareRoot=%d\n", hasSquareRoot);
+	mclBnFp_setInt(&x, 2);
+	CYBOZU_TEST_EQUAL(mclBnFp_squareRoot(&y, &x), hasSquareRoot ? 0 : -1);
+	if (hasSquareRoot) {
+		mclBnFp_sqr(&y2, &y);
+		CYBOZU_TEST_EQUAL(mclBnFp_isEqual(&x, &y2), 1);
+	}
+}
+
+#if 0
+CYBOZU_TEST_AUTO(squareRootFp2)
+{
+	mclBnFp2 x, y, y2;
+	for (int i = 0; i < 10; i++) {
+		mclBnFp_setByCSPRNG(&x.d[0]);
+		mclBnFp_setByCSPRNG(&x.d[1]);
+		mclBnFp2_sqr(&x, &x);
+		CYBOZU_TEST_EQUAL(mclBnFp2_squareRoot(&y, &x), 0);
+		mclBnFp2_sqr(&y2, &y);
+		CYBOZU_TEST_EQUAL(mclBnFp2_isEqual(&x, &y2), 1);
+	}
+}
+#endif
+
 CYBOZU_TEST_AUTO(mapToG1)
 {
 	mclBnFp x;

From 1d6b4302ad2e6c29d3bbc87c155e847afb16877b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 3 Sep 2019 14:39:52 +0900
Subject: [PATCH 064/553] add comments to mcl{Fr,Fp,Fp2,G1,G2}

---
 include/mcl/bn.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 42c566fc..f18c6da5 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -79,14 +79,23 @@ typedef struct {
 	uint64_t d[MCLBN_FP_UNIT_SIZE];
 } mclBnFp;
 
+/*
+	x = d[0] + d[1] i where i^2 = -1
+*/
 typedef struct {
 	mclBnFp d[2];
 } mclBnFp2;
 
+/*
+	G1 and G2 are isomorphism to Fr
+*/
 typedef struct {
 	uint64_t d[MCLBN_FR_UNIT_SIZE];
 } mclBnFr;
 
+/*
+	G1 is defined over Fp
+*/
 typedef struct {
 	mclBnFp x, y, z;
 } mclBnG1;

From daaabddf1dbfe7cb594e7fbe4972c541685fa06c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 3 Sep 2019 15:48:18 +0900
Subject: [PATCH 065/553] add mclBnFp2_{add,sub,mul,div,neg,sqr,inv}

---
 include/mcl/bn.h               | 11 ++++++++++
 include/mcl/impl/bn_c_impl.hpp | 37 ++++++++++++++++++++++++++++++++++
 test/bn_c_test.hpp             | 25 +++++++++++++++++++++++
 3 files changed, 73 insertions(+)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index f18c6da5..68b87210 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -281,7 +281,10 @@ MCLBN_DLL_API int mclBnFp_isValid(const mclBnFp *x);
 MCLBN_DLL_API int mclBnFp_isEqual(const mclBnFp *x, const mclBnFp *y);
 MCLBN_DLL_API int mclBnFp_isZero(const mclBnFp *x);
 MCLBN_DLL_API int mclBnFp_isOne(const mclBnFp *x);
+
 MCLBN_DLL_API int mclBnFp2_isEqual(const mclBnFp2 *x, const mclBnFp2 *y);
+MCLBN_DLL_API int mclBnFp2_isZero(const mclBnFp2 *x);
+MCLBN_DLL_API int mclBnFp2_isOne(const mclBnFp2 *x);
 
 #ifndef MCL_DONT_USE_CSRPNG
 // return 0 if success
@@ -325,6 +328,14 @@ MCLBN_DLL_API void mclBnFp_sub(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
 MCLBN_DLL_API void mclBnFp_mul(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
 MCLBN_DLL_API void mclBnFp_div(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
 
+MCLBN_DLL_API void mclBnFp2_neg(mclBnFp2 *y, const mclBnFp2 *x);
+MCLBN_DLL_API void mclBnFp2_inv(mclBnFp2 *y, const mclBnFp2 *x);
+MCLBN_DLL_API void mclBnFp2_sqr(mclBnFp2 *y, const mclBnFp2 *x);
+MCLBN_DLL_API void mclBnFp2_add(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y);
+MCLBN_DLL_API void mclBnFp2_sub(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y);
+MCLBN_DLL_API void mclBnFp2_mul(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y);
+MCLBN_DLL_API void mclBnFp2_div(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y);
+
 // y is one of square root of x
 // return 0 if success else -1
 MCLBN_DLL_API int mclBnFr_squareRoot(mclBnFr *y, const mclBnFr *x);
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index ce674555..c02ab795 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -269,6 +269,35 @@ void mclBnFp_div(mclBnFp *z, const mclBnFp *x, const mclBnFp *y)
 	Fp::div(*cast(z),*cast(x), *cast(y));
 }
 
+void mclBnFp2_neg(mclBnFp2 *y, const mclBnFp2 *x)
+{
+	Fp2::neg(*cast(y), *cast(x));
+}
+void mclBnFp2_inv(mclBnFp2 *y, const mclBnFp2 *x)
+{
+	Fp2::inv(*cast(y), *cast(x));
+}
+void mclBnFp2_sqr(mclBnFp2 *y, const mclBnFp2 *x)
+{
+	Fp2::sqr(*cast(y), *cast(x));
+}
+void mclBnFp2_add(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y)
+{
+	Fp2::add(*cast(z),*cast(x), *cast(y));
+}
+void mclBnFp2_sub(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y)
+{
+	Fp2::sub(*cast(z),*cast(x), *cast(y));
+}
+void mclBnFp2_mul(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y)
+{
+	Fp2::mul(*cast(z),*cast(x), *cast(y));
+}
+void mclBnFp2_div(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y)
+{
+	Fp2::div(*cast(z),*cast(x), *cast(y));
+}
+
 int mclBnFr_squareRoot(mclBnFr *y, const mclBnFr *x)
 {
 	return Fr::squareRoot(*cast(y), *cast(x)) ? 0 : -1;
@@ -714,6 +743,14 @@ int mclBnFp2_isEqual(const mclBnFp2 *x, const mclBnFp2 *y)
 {
 	return *cast(x) == *cast(y);
 }
+int mclBnFp2_isZero(const mclBnFp2 *x)
+{
+	return cast(x)->isZero();
+}
+int mclBnFp2_isOne(const mclBnFp2 *x)
+{
+	return cast(x)->isOne();
+}
 
 int mclBnFp2_mapToG2(mclBnG2 *y, const mclBnFp2 *x)
 {
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 62d7871a..953c5a45 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -755,9 +755,34 @@ CYBOZU_TEST_AUTO(Fp2)
 	n = mclBnFp2_deserialize(&x2, buf, n);
 	CYBOZU_TEST_ASSERT(n > 0);
 	CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&x1, &x2));
+
+	mclBnFp2 y, z;
+	mclBnFp2_add(&y, &x1, &x2);
+	for (int i = 0; i < 2; i++) {
+		mclBnFp t;
+		mclBnFp_add(&t, &x1.d[i], &x2.d[i]);
+		CYBOZU_TEST_ASSERT(mclBnFp_isEqual(&y.d[i], &t));
+	}
+	mclBnFp2_sub(&y, &y, &x2);
+	CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&y, &x1));
+	mclBnFp2_mul(&y, &x1, &x2);
+	mclBnFp2_div(&y, &y, &x1);
+	CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&y, &x2));
+	mclBnFp2_inv(&y, &x1);
+	mclBnFp2_mul(&y, &y, &x1);
+	CYBOZU_TEST_ASSERT(mclBnFp2_isOne(&y));
+	mclBnFp2_sqr(&y, &x1);
+	mclBnFp2_mul(&z, &x1, &x1);
+	CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&y, &z));
+	mclBnFp2_sub(&y, &x1, &x2);
+	mclBnFp2_sub(&z, &x2, &x1);
+	mclBnFp2_neg(&z, &z);
+	CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&y, &z));
+
 	mclBnFp2_clear(&x1);
 	memset(&x2, 0, sizeof(x2));
 	CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&x1, &x2));
+	CYBOZU_TEST_ASSERT(mclBnFp2_isZero(&x1));
 }
 
 CYBOZU_TEST_AUTO(squareRootFr)

From 43a1e4983d3d23f10e2c31f6b45548c24276245e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 4 Sep 2019 14:36:20 +0900
Subject: [PATCH 066/553] add test of squareRootFp2

---
 test/bn_c_test.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 953c5a45..afa65f1d 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -839,7 +839,6 @@ CYBOZU_TEST_AUTO(squareRootFp)
 	}
 }
 
-#if 0
 CYBOZU_TEST_AUTO(squareRootFp2)
 {
 	mclBnFp2 x, y, y2;
@@ -852,7 +851,6 @@ CYBOZU_TEST_AUTO(squareRootFp2)
 		CYBOZU_TEST_EQUAL(mclBnFp2_isEqual(&x, &y2), 1);
 	}
 }
-#endif
 
 CYBOZU_TEST_AUTO(mapToG1)
 {

From 2c2db2277377cd6818dbaa93ab216ed5a225d7ff Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 4 Sep 2019 16:44:31 +0900
Subject: [PATCH 067/553] start to develop Ec::mulVec

---
 include/mcl/ec.hpp   | 14 ++++++++++++++
 test/bls12_test.cpp  |  2 ++
 test/bn384_test.cpp  |  2 ++
 test/bn512_test.cpp  |  2 ++
 test/bn_test.cpp     |  2 ++
 test/common_test.hpp | 25 +++++++++++++++++++++++++
 6 files changed, 47 insertions(+)
 create mode 100644 test/common_test.hpp

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 1ab508a1..bbaabd73 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -1009,6 +1009,20 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	{
 		mulArrayBase(z, x, gmp::getUnit(y), gmp::getUnitSize(y), y < 0, constTime);
 	}
+	/*
+		z = sum_{i=0}^{n-1} xVec[i] * yVec[i]
+	*/
+	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
+	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
+	{
+		EcT r, t;
+		r.clear();
+		for (size_t i = 0; i < n; i++) {
+			mul(t, xVec[i], yVec[i]);
+			r += t;
+		}
+		z = r;
+	}
 #ifndef CYBOZU_DONT_USE_EXCEPTION
 	static inline void init(const std::string& astr, const std::string& bstr, int mode = ec::Jacobi)
 	{
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 5af112f9..c86fed2d 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -6,6 +6,7 @@ cybozu::CpuClock clk;
 #include <mcl/bls12_381.hpp>
 #include <cybozu/option.hpp>
 #include <cybozu/xorshift.hpp>
+#include "common_test.hpp"
 
 #if defined(__EMSCRIPTEN__) && !defined(MCL_AVOID_EXCEPTION_TEST)
 	#define MCL_AVOID_EXCEPTION_TEST
@@ -383,6 +384,7 @@ CYBOZU_TEST_AUTO(naive)
 		testPairing(P, Q, ts.e);
 		testPrecomputed(P, Q);
 		testMillerLoop2(P, Q);
+		testCommon();
 		testBench(P, Q);
 	}
 	int count = (int)clk.getCount();
diff --git a/test/bn384_test.cpp b/test/bn384_test.cpp
index b5674a91..a8c7cdbb 100644
--- a/test/bn384_test.cpp
+++ b/test/bn384_test.cpp
@@ -5,6 +5,7 @@
 #include <cybozu/xorshift.hpp>
 #include <mcl/bn384.hpp>
 #include <mcl/bn.hpp>
+#include "common_test.hpp"
 
 using namespace mcl::bn384;
 
@@ -39,6 +40,7 @@ void testCurve(const mcl::CurveParam& cp)
 	pairing(e2, aP, bQ);
 	GT::pow(e1, e1, a * b);
 	CYBOZU_TEST_EQUAL(e1, e2);
+	testCommon();
 	testBench(P, Q);
 	testSquareRoot();
 	testLagrange();
diff --git a/test/bn512_test.cpp b/test/bn512_test.cpp
index 905bfd3d..db2aff16 100644
--- a/test/bn512_test.cpp
+++ b/test/bn512_test.cpp
@@ -5,6 +5,7 @@
 #include <cybozu/xorshift.hpp>
 #include <mcl/bn512.hpp>
 #include <mcl/bn.hpp>
+#include "common_test.hpp"
 
 using namespace mcl::bn512;
 
@@ -33,6 +34,7 @@ void testCurve(const mcl::CurveParam& cp)
 	pairing(e2, aP, bQ);
 	GT::pow(e1, e1, a * b);
 	CYBOZU_TEST_EQUAL(e1, e2);
+	testCommon();
 	testBench(P, Q);
 	testSquareRoot();
 	testLagrange();
diff --git a/test/bn_test.cpp b/test/bn_test.cpp
index b66cad8c..a2557a3e 100644
--- a/test/bn_test.cpp
+++ b/test/bn_test.cpp
@@ -6,6 +6,7 @@ cybozu::CpuClock clk;
 #include <mcl/bn256.hpp>
 #include <cybozu/option.hpp>
 #include <cybozu/xorshift.hpp>
+#include "common_test.hpp"
 
 #if defined(__EMSCRIPTEN__) && !defined(MCL_AVOID_EXCEPTION_TEST)
 	#define MCL_AVOID_EXCEPTION_TEST
@@ -401,6 +402,7 @@ CYBOZU_TEST_AUTO(naive)
 		testPrecomputed(P, Q);
 		testMillerLoop2(P, Q);
 		testMillerLoopVec();
+		testCommon();
 		testBench(P, Q);
 		benchAddDblG1();
 		benchAddDblG2();
diff --git a/test/common_test.hpp b/test/common_test.hpp
new file mode 100644
index 00000000..e29fd1f4
--- /dev/null
+++ b/test/common_test.hpp
@@ -0,0 +1,25 @@
+void testMulVec()
+{
+	using namespace mcl::bn;
+	const size_t n = 5;
+	G1 xVec[n];
+	Fr yVec[n];
+	G1 ok;
+	ok.clear();
+	char c = 'a';
+	for (size_t i = 0; i < n; i++) {
+		hashAndMapToG1(xVec[i], &c, 1);
+		yVec[i].setByCSPRNG();
+		G1 t;
+		G1::mul(t, xVec[i], yVec[i]);
+		ok += t;
+	}
+	G1 z;
+	G1::mulVec(z, xVec, yVec, n);
+	CYBOZU_TEST_EQUAL(z, ok);
+}
+
+void testCommon()
+{
+	testMulVec();
+}

From aebcdf1a83d4a543101a178e2f8ab96979f54de8 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 5 Sep 2019 16:21:32 +0900
Subject: [PATCH 068/553] mulVec

---
 include/mcl/ec.hpp   | 115 ++++++++++++++++++++++++++++++++++++-------
 test/common_test.hpp |   4 +-
 2 files changed, 100 insertions(+), 19 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index bbaabd73..a4ced6fa 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -49,6 +49,22 @@ bool get_a_flag(const mcl::Fp2T<Fp>& x)
 
 } // mcl::ec
 
+namespace local {
+
+template<class Ec, class Vec>
+void addTbl(Ec& Q, const Ec *tbl, const Vec& naf, size_t i)
+{
+	if (i >= naf.size()) return;
+	int n = naf[i];
+	if (n > 0) {
+		Q += tbl[(n - 1) >> 1];
+	} else if (n < 0) {
+		Q -= tbl[(-n - 1) >> 1];
+	}
+}
+
+} // mcl::local
+
 /*
 	elliptic curve
 	y^2 = x^3 + ax + b (affine)
@@ -990,6 +1006,33 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 	static inline void mulArrayBase(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 	{
+#if 0
+		mpz_class v;
+		bool b;
+		gmp::setArray(&b, v, y, yn);
+		assert(b); (void)b;
+		const int w = 5;
+		const size_t tblSize = 1 << (w - 2);
+		typedef mcl::FixedArray<int8_t, sizeof(EcT::Fp) * 8 + 1> NafArray;
+		NafArray naf;
+		EcT tbl[tblSize];
+		gmp::getNAFwidth(&b, naf, v, w);
+		assert(b); (void)b;
+		EcT P2;
+		tbl[0] = x;
+		dbl(P2, x);
+		for (size_t i = 1; i < tblSize; i++) {
+			add(tbl[i], tbl[i - 1], P2);
+		}
+		z.clear();
+		for (size_t i = 0; i < naf.size(); i++) {
+			EcT::dbl(z, z);
+			local::addTbl(z, tbl, naf, naf.size() - 1 - i);
+		}
+		if (isNegative) {
+			neg(z, z);
+		}
+#else
 		EcT tmp;
 		const EcT *px = &x;
 		if (&z == &x) {
@@ -1001,6 +1044,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		if (isNegative) {
 			neg(z, z);
 		}
+#endif
 	}
 	/*
 		generic mul
@@ -1010,11 +1054,60 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		mulArrayBase(z, x, gmp::getUnit(y), gmp::getUnitSize(y), y < 0, constTime);
 	}
 	/*
-		z = sum_{i=0}^{n-1} xVec[i] * yVec[i]
+		z += sum_{i=0}^{n-1} xVec[i] * yVec[i]
+		@note &z != xVec[i]
 	*/
+private:
+	template<size_t N, class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
+	static inline void addMulVecN(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
+	{
+		assert(n <= N);
+		EcT t;
+		const int w = 5;
+		const size_t tblSize = 1 << (w - 2);
+		typedef mcl::FixedArray<int8_t, maxBitSize + 1> NafArray;
+		NafArray naf[N];
+		EcT tbl[N][tblSize];
+		bool b;
+		size_t maxBit = 0;
+		for (size_t i = 0; i < n; i++) {
+			gmp::getNAFwidth(&b, naf[i], yVec[i].getMpz(), w);
+			assert(b); (void)b;
+			if (naf[i].size() > maxBit) maxBit = naf[i].size();
+			tbl[i][0] = xVec[i];
+			EcT P2;
+			EcT::dbl(P2, tbl[i][0]);
+			for (size_t j = 1; j < tblSize; j++) {
+				EcT::add(tbl[i][j], tbl[i][j - 1], P2);
+			}
+		}
+		t.clear();
+		for (size_t i = 0; i < maxBit; i++) {
+			EcT::dbl(t, t);
+			for (size_t j = 0; j < n; j++) {
+				local::addTbl(t, tbl[j], naf[j], maxBit - 1 - i);
+			}
+		}
+		z += t;
+	}
+
+public:
 	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
+	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n, bool old = false)
 	{
+		(void)old;
+#if 0
+if (!old) {
+		const size_t N = 16;
+		EcT r;
+		r.clear();
+		for (size_t i = 0; i < n; i += N) {
+			size_t remain = fp::min_(n - i, N);
+			addMulVecN<N>(r, xVec + i, yVec + i, remain);
+		}
+		z = r;
+} else {
+#else
 		EcT r, t;
 		r.clear();
 		for (size_t i = 0; i < n; i++) {
@@ -1022,6 +1115,8 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 			r += t;
 		}
 		z = r;
+#endif
+//}
 	}
 #ifndef CYBOZU_DONT_USE_EXCEPTION
 	static inline void init(const std::string& astr, const std::string& bstr, int mode = ec::Jacobi)
@@ -1081,22 +1176,6 @@ template<class Fp> void (*EcT<Fp>::mulArrayGLV)(EcT& z, const EcT& x, const fp::
 template<class Fp> int EcT<Fp>::mode_;
 #endif
 
-namespace local {
-
-template<class Ec, class Vec>
-void addTbl(Ec& Q, const Ec *tbl, const Vec& naf, size_t i)
-{
-	if (i >= naf.size()) return;
-	int n = naf[i];
-	if (n > 0) {
-		Q += tbl[(n - 1) >> 1];
-	} else if (n < 0) {
-		Q -= tbl[(-n - 1) >> 1];
-	}
-}
-
-} // mcl::local
-
 template<class Ec>
 struct GLV1T {
 	typedef typename Ec::Fp Fp;
diff --git a/test/common_test.hpp b/test/common_test.hpp
index e29fd1f4..400b5235 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -1,7 +1,7 @@
 void testMulVec()
 {
 	using namespace mcl::bn;
-	const size_t n = 5;
+	const size_t n = 3;
 	G1 xVec[n];
 	Fr yVec[n];
 	G1 ok;
@@ -17,6 +17,8 @@ void testMulVec()
 	G1 z;
 	G1::mulVec(z, xVec, yVec, n);
 	CYBOZU_TEST_EQUAL(z, ok);
+	CYBOZU_BENCH_C("mulVec(new)", 1000, G1::mulVec, z, xVec, yVec, n);
+	CYBOZU_BENCH_C("mulVec(old)", 1000, G1::mulVec, z, xVec, yVec, n, true);
 }
 
 void testCommon()

From 846b4ff8f6e0436eb417be7d8d91cf4db20a8cb1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 6 Sep 2019 21:06:06 +0900
Subject: [PATCH 069/553] add Ec::mulSmallInt

---
 include/mcl/ec.hpp | 126 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 123 insertions(+), 3 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index a4ced6fa..7ff7e8a3 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -994,9 +994,8 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	bool operator<=(const EcT& rhs) const { return !operator>(rhs); }
 	static inline void mulArray(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime = false)
 	{
-		if (!constTime && x.isZero()) {
-			z.clear();
-			return;
+		if (!constTime && yn == 1 && *y <= 16) {
+			if (mulSmallInt(z, x, static_cast<int>(*y), isNegative)) return;
 		}
 		if (mulArrayGLV && (constTime || yn > 1)) {
 			mulArrayGLV(z, x, y, yn, isNegative, constTime);
@@ -1004,6 +1003,127 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		}
 		mulArrayBase(z, x, y, yn, isNegative, constTime);
 	}
+	static inline bool mulSmallInt(EcT& z, const EcT& x, uint32_t y, bool isNegative)
+	{
+		switch (y) {
+		case 0: z.clear(); return true;
+		case 1: z = x; break;
+		case 2: dbl(z, x); break;
+		case 3: {
+			EcT t;
+			dbl(t, x);
+			add(z, t, x);
+			break;
+		}
+		case 4: {
+			dbl(z, x);
+			dbl(z, z);
+			break;
+		}
+		case 5: {
+			EcT t;
+			dbl(t, x);
+			dbl(t, t);
+			add(z, t, x);
+			break;
+		}
+		case 6: {
+			EcT t;
+			dbl(t, x);
+			add(z, t, x);
+			dbl(z, z);
+			break;
+		}
+		case 7: {
+			EcT t;
+			dbl(t, x);
+			dbl(t, t);
+			dbl(t, t);
+			sub(z, t, x);
+			break;
+		}
+		case 8: {
+			dbl(z, x);
+			dbl(z, z);
+			dbl(z, z);
+			break;
+		}
+		case 9: {
+			EcT t;
+			dbl(t, x);
+			dbl(t, t);
+			dbl(t, t);
+			add(z, t, x);
+			break;
+		}
+		case 10: {
+			EcT t;
+			dbl(t, x);
+			dbl(t, t);
+			add(z, t, x);
+			dbl(z, z);
+			break;
+		}
+		case 11: {
+			EcT t1, t2;
+			dbl(t1, x); // 2x
+			dbl(t2, t1);
+			dbl(t2, t2); // 8x
+			add(t2, t2, t1);
+			add(z, t2, x);
+			break;
+		}
+		case 12: {
+			EcT t1, t2;
+			dbl(t1, x);
+			dbl(t1, t1); // 4x
+			dbl(t2, t1); // 8x
+			add(z, t1, t2);
+			break;
+		}
+		case 13: {
+			EcT t1, t2;
+			dbl(t1, x);
+			dbl(t1, t1); // 4x
+			dbl(t2, t1); // 8x
+			add(t1, t1, t2); // 12x
+			add(z, t1, x);
+			break;
+		}
+		case 14: {
+			EcT t;
+			// (8 - 1) * 2
+			dbl(t, x);
+			dbl(t, t);
+			dbl(t, t);
+			sub(t, t, x);
+			dbl(z, t);
+			break;
+		}
+		case 15: {
+			EcT t;
+			dbl(t, x);
+			dbl(t, t);
+			dbl(t, t);
+			dbl(t, t);
+			sub(z, t, x);
+			break;
+		}
+		case 16: {
+			dbl(z, x);
+			dbl(z, z);
+			dbl(z, z);
+			dbl(z, z);
+			break;
+		}
+		default:
+			return false;
+		}
+		if (isNegative) {
+			neg(z, z);
+		}
+		return true;
+	}
 	static inline void mulArrayBase(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 	{
 #if 0

From 13d9500f3f9ac721fe186abdd7d74da5fb1dec9b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 6 Sep 2019 22:17:58 +0900
Subject: [PATCH 070/553] Ec::mulSmallInt errors if negative

---
 include/mcl/ec.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 7ff7e8a3..708b8549 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -994,7 +994,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	bool operator<=(const EcT& rhs) const { return !operator>(rhs); }
 	static inline void mulArray(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime = false)
 	{
-		if (!constTime && yn == 1 && *y <= 16) {
+		if (!constTime && yn == 1 && *y <= 16 && !isNegative) {
 			if (mulSmallInt(z, x, static_cast<int>(*y), isNegative)) return;
 		}
 		if (mulArrayGLV && (constTime || yn > 1)) {

From 6f82259d9ea9ee4e81a397b0722b121a777e60dd Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 8 Sep 2019 15:01:25 +0900
Subject: [PATCH 071/553] [bug] fix Ec::add(A, B, A) for no-normalized A, B

---
 include/mcl/ec.hpp | 18 ++++++++++++------
 test/ec_test.cpp   | 42 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 708b8549..9a802e7e 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -504,12 +504,18 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 			return;
 		}
 		if (isPzOne) {
-			R.z = H;
+			if (isQzOne) {
+				R.z = H;
+			} else {
+				Fp::mul(R.z, H, Q.z);
+			}
 		} else {
-			Fp::mul(R.z, P.z, H);
-		}
-		if (!isQzOne) {
-			R.z *= Q.z;
+			if (isQzOne) {
+				Fp::mul(R.z, P.z, H);
+			} else {
+				Fp::mul(R.z, P.z, Q.z);
+				R.z *= H;
+			}
 		}
 		Fp::sqr(H3, H); // H^2
 		Fp::sqr(R.y, r); // r^2
@@ -994,7 +1000,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	bool operator<=(const EcT& rhs) const { return !operator>(rhs); }
 	static inline void mulArray(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime = false)
 	{
-		if (!constTime && yn == 1 && *y <= 16 && !isNegative) {
+		if (!constTime && yn == 1) {
 			if (mulSmallInt(z, x, static_cast<int>(*y), isNegative)) return;
 		}
 		if (mulArrayGLV && (constTime || yn > 1)) {
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index 7999443e..3f1e8d42 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -205,11 +205,43 @@ struct Test {
 		Ec R;
 		R.clear();
 		for (int i = 0; i < 100; i++) {
-			Ec::mul(Q, P, i);
+			Q = P;
+			Ec::mul(Q, Q, i);
 			CYBOZU_TEST_EQUAL(Q, R);
+			Q = P;
+			if (Ec::mulSmallInt(Q, Q, i, false)) {
+				CYBOZU_TEST_EQUAL(Q, R);
+			}
 			R += P;
 		}
 	}
+	void add() const
+	{
+		Fp x(para.gx);
+		Fp y(para.gy);
+		Ec P1(x, y);
+		Ec P2, Q1, Q2;
+		Ec::dbl(P1, P1);
+		Ec::normalize(P2, P1);
+		Q1 = P1 + P1;
+		Ec::normalize(Q2, Q1);
+		Ec Ptbl[] = { P1, P2 };
+		Ec Qtbl[] = { Q1, Q2 };
+		for (int i = 0; i < 2; i++) {
+			for (int j = 0; j < 2; j++) {
+				Ec R1, R2, R3, R4;
+				R1 = Ptbl[i];
+				R2 = Qtbl[i];
+				Ec::add(R3, R1, R2);
+				Ec::add(R1, R1, R2);
+				CYBOZU_TEST_EQUAL(R1, R3);
+				R1 = Ptbl[i];
+				R2 = Qtbl[i];
+				Ec::add(R2, R1, R2);
+				CYBOZU_TEST_EQUAL(R2, R3);
+			}
+		}
+	}
 
 	void neg_mul() const
 	{
@@ -220,8 +252,13 @@ struct Test {
 		Ec R;
 		R.clear();
 		for (int i = 0; i < 100; i++) {
-			Ec::mul(Q, P, -i);
+			Q = P;
+			Ec::mul(Q, Q, -i);
 			CYBOZU_TEST_EQUAL(Q, R);
+			Q = P;
+			if (Ec::mulSmallInt(Q, Q, -i, true)) {
+				CYBOZU_TEST_EQUAL(Q, R);
+			}
 			R -= P;
 		}
 	}
@@ -476,6 +513,7 @@ mul 499.00usec
 		cstr();
 		ope();
 		mul();
+		add();
 		neg_mul();
 		mul_fp();
 		squareRoot();

From f4a2d2e3be8f46b274a1353815994233bab58c1e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 8 Sep 2019 15:04:44 +0900
Subject: [PATCH 072/553] add test for Ec::add, dbl

---
 test/ec_test.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index 3f1e8d42..0b12c308 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -215,7 +215,7 @@ struct Test {
 			R += P;
 		}
 	}
-	void add() const
+	void aliasAddDbl() const
 	{
 		Fp x(para.gx);
 		Fp y(para.gy);
@@ -231,15 +231,20 @@ struct Test {
 			for (int j = 0; j < 2; j++) {
 				Ec R1, R2, R3, R4;
 				R1 = Ptbl[i];
-				R2 = Qtbl[i];
+				R2 = Qtbl[j];
 				Ec::add(R3, R1, R2);
 				Ec::add(R1, R1, R2);
 				CYBOZU_TEST_EQUAL(R1, R3);
 				R1 = Ptbl[i];
-				R2 = Qtbl[i];
+				R2 = Qtbl[j];
 				Ec::add(R2, R1, R2);
 				CYBOZU_TEST_EQUAL(R2, R3);
 			}
+			Ec R1, R2;
+			R1 = Ptbl[i];
+			Ec::dbl(R2, R1);
+			Ec::dbl(R1, R1);
+			CYBOZU_TEST_EQUAL(R1, R2);
 		}
 	}
 
@@ -513,7 +518,7 @@ mul 499.00usec
 		cstr();
 		ope();
 		mul();
-		add();
+		aliasAddDbl();
 		neg_mul();
 		mul_fp();
 		squareRoot();

From 018bbe8e88e2470d4d76c656d3a2dd8274e406e7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 8 Sep 2019 15:05:02 +0900
Subject: [PATCH 073/553] [she] add test sheMul

---
 test/she_c_test.hpp | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/test/she_c_test.hpp b/test/she_c_test.hpp
index f7709080..58139f07 100644
--- a/test/she_c_test.hpp
+++ b/test/she_c_test.hpp
@@ -57,14 +57,40 @@ CYBOZU_TEST_AUTO(encDec)
 	CYBOZU_TEST_EQUAL(sheDecGT(&dec, &sec, &ct), 0);
 	CYBOZU_TEST_EQUAL(dec, m);
 
-	for (int m = -3; m < 3; m++) {
+	for (int m = -30; m < 30; m++) {
+		dec = 0;
 		sheEncG1(&c1, &pub, m);
+		CYBOZU_TEST_EQUAL(sheDecG1(&dec, &sec, &c1), 0);
+		CYBOZU_TEST_EQUAL(dec, m);
 		CYBOZU_TEST_EQUAL(sheIsZeroG1(&sec, &c1), m == 0);
+		dec = 0;
 		sheEncG2(&c2, &pub, m);
+		CYBOZU_TEST_EQUAL(sheDecG2(&dec, &sec, &c2), 0);
+		CYBOZU_TEST_EQUAL(dec, m);
 		CYBOZU_TEST_EQUAL(sheIsZeroG2(&sec, &c2), m == 0);
+		dec = 0;
 		sheEncGT(&ct, &pub, m);
+		CYBOZU_TEST_EQUAL(sheDecGT(&dec, &sec, &ct), 0);
+		CYBOZU_TEST_EQUAL(dec, m);
 		CYBOZU_TEST_EQUAL(sheIsZeroGT(&sec, &ct), m == 0);
 	}
+	for (int m = -30; m < 30; m++) {
+		dec = 0;
+		sheEncG1(&c1, &pub, 1);
+		sheMulG1(&c1, &c1, m);
+		CYBOZU_TEST_EQUAL(sheDecG1(&dec, &sec, &c1), 0);
+		CYBOZU_TEST_EQUAL(dec, m);
+		dec = 0;
+		sheEncG2(&c2, &pub, 1);
+		sheMulG2(&c2, &c2, m);
+		CYBOZU_TEST_EQUAL(sheDecG2(&dec, &sec, &c2), 0);
+		CYBOZU_TEST_EQUAL(dec, m);
+		dec = 0;
+		sheEncGT(&ct, &pub, 1);
+		sheMulGT(&ct, &ct, m);
+		CYBOZU_TEST_EQUAL(sheDecGT(&dec, &sec, &ct), 0);
+		CYBOZU_TEST_EQUAL(dec, m);
+	}
 }
 
 CYBOZU_TEST_AUTO(addMul)

From 51a3a60726046feb739a2fd6969afcd41c4e5d7f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 8 Sep 2019 15:08:51 +0900
Subject: [PATCH 074/553] v0.98

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 0d21c687..e4cbedc1 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x097; /* 0xABC = A.BC */
+static const int version = 0x098; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index 7088a9fb..630e2e0a 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography.
 The current version supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+* v0.98 bugfix Ec::add(P, Q, R) when P == R
 * v0.97 add some C api functions
 * v0.96 improved scalar multiplication
 * mclBn_setETHserialization(true) (de)serialize acoording to [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations) when BLS12-381 is used.

From 3667c260fd0d498da9fa863e49348a7b1a4a22a6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 10 Sep 2019 13:14:35 +0900
Subject: [PATCH 075/553] [doc] fix support curveType

---
 readme.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/readme.md b/readme.md
index 630e2e0a..dd1d195e 100644
--- a/readme.md
+++ b/readme.md
@@ -240,9 +240,9 @@ finalExp 546.259Kclk
 
 header        |support curveType        |sizeof Fr|sizeof Fp|
 --------------|-------------------------|---------|---------|
-bn256.hpp     |BN254                    |   32    |   32    |
-bls12_381.hpp |BLS12_381, BN254         |   32    |   48    |
-bn384.hpp     |BN381_1, BLS12_381, BN254|   48    |   48    |
+bn256.hpp     |BN254, BN_SNARK1         |   32    |   32    |
+bls12_381.hpp |the above + BLS12_381    |   32    |   48    |
+bn384.hpp     |the above + BN381_1      |   48    |   48    |
 
 ## C library
 

From b46e0071cefa42a5b1354c87de1e72c602283c38 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 11 Sep 2019 16:21:39 +0900
Subject: [PATCH 076/553] use new mulArray

---
 include/mcl/bn.hpp |  2 +-
 include/mcl/ec.hpp | 33 ++++++++++++++++++++++-----------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 0a5744be..3a4489a6 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -2099,7 +2099,7 @@ inline void init(bool *pb, const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode
 {
 	local::StaticVar<>::param.init(pb, cp, mode);
 	if (!*pb) return;
-	G1::setMulArrayGLV(local::GLV1::mulArray);
+	G1::setMulArrayGLV(local::GLV1::mulArrayGLV);
 	G2::setMulArrayGLV(local::mulArrayGLV2);
 	Fp12::setPowArrayGLV(local::powArrayGLV2);
 	G1::setCompressedExpression();
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 9a802e7e..35c8b6f9 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -998,18 +998,22 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	bool operator>=(const EcT& rhs) const { return !operator<(rhs); }
 	bool operator>(const EcT& rhs) const { return rhs < *this; }
 	bool operator<=(const EcT& rhs) const { return !operator>(rhs); }
-	static inline void mulArray(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime = false)
+	static inline void mulArray(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime = false, bool useGLV = true)
 	{
-		if (!constTime && yn == 1) {
-			if (mulSmallInt(z, x, static_cast<int>(*y), isNegative)) return;
+		if (!constTime) {
+			while (yn > 0) {
+				if (y[yn - 1]) break;
+				yn--;
+			}
+			if (yn <= 1 && mulSmallInt(z, x, *y, isNegative)) return;
 		}
-		if (mulArrayGLV && (constTime || yn > 1)) {
+		if (useGLV && mulArrayGLV && (yn * sizeof(fp::Unit) > 8)) {
 			mulArrayGLV(z, x, y, yn, isNegative, constTime);
 			return;
 		}
 		mulArrayBase(z, x, y, yn, isNegative, constTime);
 	}
-	static inline bool mulSmallInt(EcT& z, const EcT& x, uint32_t y, bool isNegative)
+	static inline bool mulSmallInt(EcT& z, const EcT& x, fp::Unit y, bool isNegative)
 	{
 		switch (y) {
 		case 0: z.clear(); return true;
@@ -1132,16 +1136,22 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 	static inline void mulArrayBase(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 	{
-#if 0
+#if 1
+		(void)constTime;
 		mpz_class v;
 		bool b;
 		gmp::setArray(&b, v, y, yn);
 		assert(b); (void)b;
-		const int w = 5;
+		const int maxW = 5;
+		const int maxTblSize = 1 << (maxW - 2);
+		/*
+			L = log2(y), w = (L <= 32) ? 3 : (L <= 128) ? 4 : 5;
+		*/
+		const int w = (yn == 1 && *y <= (1ull << 32)) ? 3 : (yn * sizeof(fp::Unit) > 16) ? 5 : 4;
 		const size_t tblSize = 1 << (w - 2);
 		typedef mcl::FixedArray<int8_t, sizeof(EcT::Fp) * 8 + 1> NafArray;
 		NafArray naf;
-		EcT tbl[tblSize];
+		EcT tbl[maxTblSize];
 		gmp::getNAFwidth(&b, naf, v, w);
 		assert(b); (void)b;
 		EcT P2;
@@ -1174,10 +1184,11 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 	/*
 		generic mul
+		GLV can't be applied in Fp12 - GT
 	*/
 	static inline void mulGeneric(EcT& z, const EcT& x, const mpz_class& y, bool constTime = false)
 	{
-		mulArrayBase(z, x, gmp::getUnit(y), gmp::getUnitSize(y), y < 0, constTime);
+		mulArray(z, x, gmp::getUnit(y), gmp::getUnitSize(y), y < 0, constTime, false);
 	}
 	/*
 		z += sum_{i=0}^{n-1} xVec[i] * yVec[i]
@@ -1388,7 +1399,7 @@ struct GLV1T {
 			local::addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
 		}
 	}
-	static void mulArray(Ec& z, const Ec& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
+	static void mulArrayGLV(Ec& z, const Ec& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 	{
 		mpz_class s;
 		bool b;
@@ -1459,7 +1470,7 @@ void initCurve(bool *pb, int curveType, Ec *P = 0, mcl::fp::Mode mode = fp::FP_A
 	}
 	if (curveType == MCL_SECP256K1) {
 		GLV1T<Ec>::initForSecp256k1(Zn::getOp().mp);
-		Ec::setMulArrayGLV(GLV1T<Ec>::mulArray);
+		Ec::setMulArrayGLV(GLV1T<Ec>::mulArrayGLV);
 	} else {
 		Ec::setMulArrayGLV(0);
 	}

From 58265a1e09373b3d62aac176c1b2ac3027717d7a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 12 Sep 2019 10:54:15 +0900
Subject: [PATCH 077/553] remove warning of vc

---
 include/mcl/ec.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 35c8b6f9..bb88c2e8 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -1147,8 +1147,8 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		/*
 			L = log2(y), w = (L <= 32) ? 3 : (L <= 128) ? 4 : 5;
 		*/
-		const int w = (yn == 1 && *y <= (1ull << 32)) ? 3 : (yn * sizeof(fp::Unit) > 16) ? 5 : 4;
-		const size_t tblSize = 1 << (w - 2);
+		const int w = (yn == 1 && *y <= (fp::Unit(1) << 32)) ? 3 : (yn * sizeof(fp::Unit) > 16) ? 5 : 4;
+		const size_t tblSize = size_t(1) << (w - 2);
 		typedef mcl::FixedArray<int8_t, sizeof(EcT::Fp) * 8 + 1> NafArray;
 		NafArray naf;
 		EcT tbl[maxTblSize];

From 0cec8d45efd12d9abe20aea7ee70f8c0a4f07c89 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 12 Sep 2019 11:27:44 +0900
Subject: [PATCH 078/553] add test of mulVec

---
 test/ec_test.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index 0b12c308..5c4d3c25 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -532,6 +532,51 @@ mul 499.00usec
 	void operator=(const Test&);
 };
 
+void naiveMulVec(Ec& out, const Ec *xVec, const Zn *yVec, size_t n)
+{
+	Ec r, t;
+	r.clear();
+	for (size_t i = 0; i < n; i++) {
+		Ec::mul(t, xVec[i], yVec[i]);
+		r += t;
+	}
+	out = r;
+}
+
+void mulVec(const mcl::EcParam& para)
+{
+	if (para.bitSize > 384) return;
+	const Fp x(para.gx);
+	const Fp y(para.gy);
+	Ec P(x, y);
+	P += P;
+	const int N = 20;
+	Ec xVec[N];
+	Zn yVec[N];
+	Ec Q1, Q2;
+
+	Ec::dbl(xVec[0], P);
+	for (size_t i = 1; i < N; i++) {
+		xVec[i] += P;
+	}
+	const size_t nTbl[] = { 1, 2, 3, 5, 15, 16, 17 };
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) {
+		const size_t n = nTbl[i];
+		naiveMulVec(Q1, xVec, yVec, n);
+		Ec::mulVec(Q2, xVec, yVec, n);
+		CYBOZU_TEST_EQUAL(Q1, Q2);
+	}
+	const int C = 1000;
+	CYBOZU_BENCH_C("naive (1)", C, naiveMulVec, Q1, xVec, yVec, 1);
+	CYBOZU_BENCH_C("mulVec(1)", C, Ec::mulVec, Q1, xVec, yVec, 1);
+	CYBOZU_BENCH_C("naive (2)", C, naiveMulVec, Q1, xVec, yVec, 2);
+	CYBOZU_BENCH_C("mulVec(2)", C, Ec::mulVec, Q1, xVec, yVec, 2);
+	CYBOZU_BENCH_C("naive (3)", C, naiveMulVec, Q1, xVec, yVec, 3);
+	CYBOZU_BENCH_C("mulVec(3)", C, Ec::mulVec, Q1, xVec, yVec, 3);
+	CYBOZU_BENCH_C("naive (9)", C, naiveMulVec, Q1, xVec, yVec, 9);
+	CYBOZU_BENCH_C("mulVec(9)", C, Ec::mulVec, Q1, xVec, yVec, 9);
+}
+
 void test_sub_sub(const mcl::EcParam& para, mcl::fp::Mode fpMode)
 {
 	puts("Proj");
@@ -551,6 +596,7 @@ void test_sub(const mcl::EcParam *para, size_t paraNum)
 #endif
 #ifdef MCL_USE_XBYAK
 		test_sub_sub(para[i], mcl::fp::FP_XBYAK);
+		mulVec(para[i]);
 #endif
 	}
 }

From 2f1b4c9a5659cfb9f1a3e56171a2737fc1488346 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 12 Sep 2019 15:54:55 +0900
Subject: [PATCH 079/553] new mulVec

---
 include/mcl/ec.hpp | 51 ++++++++++++++++++----------------------------
 test/ec_test.cpp   | 25 ++++++++++-------------
 2 files changed, 31 insertions(+), 45 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index bb88c2e8..bd919346 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -1155,8 +1155,8 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		gmp::getNAFwidth(&b, naf, v, w);
 		assert(b); (void)b;
 		EcT P2;
-		tbl[0] = x;
 		dbl(P2, x);
+		tbl[0] = x;
 		for (size_t i = 1; i < tblSize; i++) {
 			add(tbl[i], tbl[i - 1], P2);
 		}
@@ -1191,69 +1191,58 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		mulArray(z, x, gmp::getUnit(y), gmp::getUnitSize(y), y < 0, constTime, false);
 	}
 	/*
-		z += sum_{i=0}^{n-1} xVec[i] * yVec[i]
+		z = sum_{i=0}^{n-1} xVec[i] * yVec[i]
+		return min(N, n)
 		@note &z != xVec[i]
 	*/
 private:
-	template<size_t N, class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static inline void addMulVecN(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
+	template<size_t N = 32, class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
+	static inline size_t addMulVecN(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
 	{
-		assert(n <= N);
-		EcT t;
+		if (n > N) n = N;
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
 		typedef mcl::FixedArray<int8_t, maxBitSize + 1> NafArray;
 		NafArray naf[N];
 		EcT tbl[N][tblSize];
-		bool b;
 		size_t maxBit = 0;
 		for (size_t i = 0; i < n; i++) {
+			bool b;
 			gmp::getNAFwidth(&b, naf[i], yVec[i].getMpz(), w);
 			assert(b); (void)b;
 			if (naf[i].size() > maxBit) maxBit = naf[i].size();
-			tbl[i][0] = xVec[i];
 			EcT P2;
-			EcT::dbl(P2, tbl[i][0]);
+			EcT::dbl(P2, xVec[i]);
+			tbl[i][0] = xVec[i];
 			for (size_t j = 1; j < tblSize; j++) {
 				EcT::add(tbl[i][j], tbl[i][j - 1], P2);
 			}
 		}
-		t.clear();
+		z.clear();
 		for (size_t i = 0; i < maxBit; i++) {
-			EcT::dbl(t, t);
+			EcT::dbl(z, z);
 			for (size_t j = 0; j < n; j++) {
-				local::addTbl(t, tbl[j], naf[j], maxBit - 1 - i);
+				local::addTbl(z, tbl[j], naf[j], maxBit - 1 - i);
 			}
 		}
-		z += t;
+		return n;
 	}
 
 public:
 	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n, bool old = false)
+	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
 	{
-		(void)old;
-#if 0
-if (!old) {
-		const size_t N = 16;
 		EcT r;
 		r.clear();
-		for (size_t i = 0; i < n; i += N) {
-			size_t remain = fp::min_(n - i, N);
-			addMulVecN<N>(r, xVec + i, yVec + i, remain);
-		}
-		z = r;
-} else {
-#else
-		EcT r, t;
-		r.clear();
-		for (size_t i = 0; i < n; i++) {
-			mul(t, xVec[i], yVec[i]);
+		while (n > 0) {
+			EcT t;
+			size_t done = addMulVecN(t, xVec, yVec, n);
 			r += t;
+			xVec += done;
+			yVec += done;
+			n -= done;
 		}
 		z = r;
-#endif
-//}
 	}
 #ifndef CYBOZU_DONT_USE_EXCEPTION
 	static inline void init(const std::string& astr, const std::string& bstr, int mode = ec::Jacobi)
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index 5c4d3c25..2beac015 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -550,31 +550,28 @@ void mulVec(const mcl::EcParam& para)
 	const Fp y(para.gy);
 	Ec P(x, y);
 	P += P;
-	const int N = 20;
+	const int N = 33;
 	Ec xVec[N];
 	Zn yVec[N];
 	Ec Q1, Q2;
 
-	Ec::dbl(xVec[0], P);
-	for (size_t i = 1; i < N; i++) {
-		xVec[i] += P;
+	Ec::dbl(P, P);
+	for (size_t i = 0; i < N; i++) {
+		Ec::mul(xVec[i], P, i + 3);
+		yVec[i].setByCSPRNG();
 	}
-	const size_t nTbl[] = { 1, 2, 3, 5, 15, 16, 17 };
+	const size_t nTbl[] = { 1, 2, 3, 5, 30, 31, 32, 33 };
+	const int C = 400;
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) {
 		const size_t n = nTbl[i];
+		CYBOZU_TEST_ASSERT(n <= N);
 		naiveMulVec(Q1, xVec, yVec, n);
 		Ec::mulVec(Q2, xVec, yVec, n);
 		CYBOZU_TEST_EQUAL(Q1, Q2);
+		printf("n=%zd\n", n);
+		CYBOZU_BENCH_C("naive ", C, naiveMulVec, Q1, xVec, yVec, n);
+		CYBOZU_BENCH_C("mulVec", C, Ec::mulVec, Q1, xVec, yVec, n);
 	}
-	const int C = 1000;
-	CYBOZU_BENCH_C("naive (1)", C, naiveMulVec, Q1, xVec, yVec, 1);
-	CYBOZU_BENCH_C("mulVec(1)", C, Ec::mulVec, Q1, xVec, yVec, 1);
-	CYBOZU_BENCH_C("naive (2)", C, naiveMulVec, Q1, xVec, yVec, 2);
-	CYBOZU_BENCH_C("mulVec(2)", C, Ec::mulVec, Q1, xVec, yVec, 2);
-	CYBOZU_BENCH_C("naive (3)", C, naiveMulVec, Q1, xVec, yVec, 3);
-	CYBOZU_BENCH_C("mulVec(3)", C, Ec::mulVec, Q1, xVec, yVec, 3);
-	CYBOZU_BENCH_C("naive (9)", C, naiveMulVec, Q1, xVec, yVec, 9);
-	CYBOZU_BENCH_C("mulVec(9)", C, Ec::mulVec, Q1, xVec, yVec, 9);
 }
 
 void test_sub_sub(const mcl::EcParam& para, mcl::fp::Mode fpMode)

From 9f09970f70b317959a3421f5831f3df294985beb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 12 Sep 2019 16:29:08 +0900
Subject: [PATCH 080/553] fix for c++03

---
 include/mcl/ec.hpp   |  4 ++--
 test/bls12_test.cpp  |  2 +-
 test/bn384_test.cpp  |  2 +-
 test/bn512_test.cpp  |  2 +-
 test/bn_test.cpp     |  2 +-
 test/common_test.hpp | 55 +++++++++++++++++++++++++++++---------------
 6 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index bd919346..7a648688 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -1196,7 +1196,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		@note &z != xVec[i]
 	*/
 private:
-	template<size_t N = 32, class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
+	template<size_t N, class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
 	static inline size_t addMulVecN(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
 	{
 		if (n > N) n = N;
@@ -1236,7 +1236,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		r.clear();
 		while (n > 0) {
 			EcT t;
-			size_t done = addMulVecN(t, xVec, yVec, n);
+			size_t done = addMulVecN<32>(t, xVec, yVec, n);
 			r += t;
 			xVec += done;
 			yVec += done;
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index c86fed2d..6aac204e 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -384,7 +384,7 @@ CYBOZU_TEST_AUTO(naive)
 		testPairing(P, Q, ts.e);
 		testPrecomputed(P, Q);
 		testMillerLoop2(P, Q);
-		testCommon();
+		testCommon(P, Q);
 		testBench(P, Q);
 	}
 	int count = (int)clk.getCount();
diff --git a/test/bn384_test.cpp b/test/bn384_test.cpp
index a8c7cdbb..e248d48e 100644
--- a/test/bn384_test.cpp
+++ b/test/bn384_test.cpp
@@ -40,7 +40,7 @@ void testCurve(const mcl::CurveParam& cp)
 	pairing(e2, aP, bQ);
 	GT::pow(e1, e1, a * b);
 	CYBOZU_TEST_EQUAL(e1, e2);
-	testCommon();
+	testCommon(P, Q);
 	testBench(P, Q);
 	testSquareRoot();
 	testLagrange();
diff --git a/test/bn512_test.cpp b/test/bn512_test.cpp
index db2aff16..ebbc7c00 100644
--- a/test/bn512_test.cpp
+++ b/test/bn512_test.cpp
@@ -34,7 +34,7 @@ void testCurve(const mcl::CurveParam& cp)
 	pairing(e2, aP, bQ);
 	GT::pow(e1, e1, a * b);
 	CYBOZU_TEST_EQUAL(e1, e2);
-	testCommon();
+	testCommon(P, Q);
 	testBench(P, Q);
 	testSquareRoot();
 	testLagrange();
diff --git a/test/bn_test.cpp b/test/bn_test.cpp
index a2557a3e..397f9a11 100644
--- a/test/bn_test.cpp
+++ b/test/bn_test.cpp
@@ -402,7 +402,7 @@ CYBOZU_TEST_AUTO(naive)
 		testPrecomputed(P, Q);
 		testMillerLoop2(P, Q);
 		testMillerLoopVec();
-		testCommon();
+		testCommon(P, Q);
 		testBench(P, Q);
 		benchAddDblG1();
 		benchAddDblG2();
diff --git a/test/common_test.hpp b/test/common_test.hpp
index 400b5235..e5378e98 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -1,27 +1,44 @@
-void testMulVec()
+template<class G, class F>
+void naiveMulVec(G& out, const G *xVec, const F *yVec, size_t n)
 {
-	using namespace mcl::bn;
-	const size_t n = 3;
-	G1 xVec[n];
-	Fr yVec[n];
-	G1 ok;
-	ok.clear();
-	char c = 'a';
+	G r, t;
+	r.clear();
 	for (size_t i = 0; i < n; i++) {
-		hashAndMapToG1(xVec[i], &c, 1);
+		G::mul(t, xVec[i], yVec[i]);
+		r += t;
+	}
+	out = r;
+}
+
+template<class G>
+void testMulVec(const G& P)
+{
+	using namespace mcl::bn;
+	const int N = 33;
+	G xVec[N];
+	mcl::bn::Fr yVec[N];
+
+	for (size_t i = 0; i < N; i++) {
+		G::mul(xVec[i], P, i + 3);
 		yVec[i].setByCSPRNG();
-		G1 t;
-		G1::mul(t, xVec[i], yVec[i]);
-		ok += t;
 	}
-	G1 z;
-	G1::mulVec(z, xVec, yVec, n);
-	CYBOZU_TEST_EQUAL(z, ok);
-	CYBOZU_BENCH_C("mulVec(new)", 1000, G1::mulVec, z, xVec, yVec, n);
-	CYBOZU_BENCH_C("mulVec(old)", 1000, G1::mulVec, z, xVec, yVec, n, true);
+	const size_t nTbl[] = { 1, 2, 3, 5, 30, 31, 32, 33 };
+	const int C = 400;
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) {
+		const size_t n = nTbl[i];
+		G Q1, Q2;
+		CYBOZU_TEST_ASSERT(n <= N);
+		naiveMulVec(Q1, xVec, yVec, n);
+		G::mulVec(Q2, xVec, yVec, n);
+		CYBOZU_TEST_EQUAL(Q1, Q2);
+		printf("n=%zd\n", n);
+		CYBOZU_BENCH_C("naive ", C, naiveMulVec, Q1, xVec, yVec, n);
+		CYBOZU_BENCH_C("mulVec", C, G::mulVec, Q1, xVec, yVec, n);
+	}
 }
 
-void testCommon()
+template<class G1, class G2>
+void testCommon(const G1& P, const G2&)
 {
-	testMulVec();
+	testMulVec(P);
 }

From afb1c812f1133edfeb8466dee6bcb01c8001e1f7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 13 Sep 2019 11:30:54 +0900
Subject: [PATCH 081/553] support mulVec

---
 include/mcl/bn.hpp       | 111 +++++++++++++++++++++++-----
 include/mcl/ec.hpp       | 151 ++++++++++++++++++++-------------------
 include/mcl/gmp_util.hpp |   4 +-
 include/mcl/operator.hpp |  22 +++++-
 include/mcl/util.hpp     |  57 ++++++++++++++-
 test/bls12_test.cpp      |   2 +-
 test/bn384_test.cpp      |   2 +-
 test/bn512_test.cpp      |   2 +-
 test/bn_test.cpp         |   2 +-
 test/common_test.hpp     |  75 ++++++++++++++++---
 test/ec_test.cpp         |   6 +-
 test/fp_util_test.cpp    |  42 +++++++++++
 test/glv_test.cpp        |   6 +-
 test/she_test.cpp        |   2 +-
 14 files changed, 370 insertions(+), 114 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 3a4489a6..461eb195 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -560,14 +560,13 @@ struct MapTo {
 	}
 };
 
-typedef mcl::FixedArray<int8_t, MCL_MAX_FR_BIT_SIZE / 2 + 2> NafArray;
 
 /*
 	Software implementation of Attribute-Based Encryption: Appendixes
 	GLV for G1 on BN/BLS12
 */
 
-struct GLV1 : mcl::GLV1T<G1> {
+struct GLV1 : mcl::GLV1T<G1, Fr> {
 	static bool usePrecomputedTable(int curveType)
 	{
 		if (curveType < 0) return false;
@@ -577,7 +576,6 @@ struct GLV1 : mcl::GLV1T<G1> {
 			size_t rBitSize;
 			const char *v0, *v1;
 			const char *B[2][2];
-			const char *r;
 		} tbl[] = {
 			{
 				MCL_BN254,
@@ -595,7 +593,6 @@ struct GLV1 : mcl::GLV1T<G1> {
 						"-61818000000000020400000000000003",
 					},
 				},
-				"2523648240000001ba344d8000000007ff9f800000000010a10000000000000d",
 			},
 		};
 		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
@@ -609,20 +606,18 @@ struct GLV1 : mcl::GLV1T<G1> {
 			gmp::setStr(&b, B[0][1], tbl[i].B[0][1], 16); if (!b) continue;
 			gmp::setStr(&b, B[1][0], tbl[i].B[1][0], 16); if (!b) continue;
 			gmp::setStr(&b, B[1][1], tbl[i].B[1][1], 16); if (!b) continue;
-			gmp::setStr(&b, r, tbl[i].r, 16); if (!b) continue;
 			return true;
 		}
 		return false;
 	}
-	static void initForBN(const mpz_class& _r, const mpz_class& z, bool isBLS12 = false, int curveType = -1)
+	static void initForBN(const mpz_class& z, bool isBLS12 = false, int curveType = -1)
 	{
 		if (usePrecomputedTable(curveType)) return;
 		bool b = Fp::squareRoot(rw, -3);
 		assert(b);
 		(void)b;
 		rw = -(rw + 1) / 2;
-		r = _r;
-		rBitSize = gmp::getBitSize(r);
+		rBitSize = Fr::getOp().bitSize;
 		rBitSize = (rBitSize + fp::UnitBitSize - 1) & ~(fp::UnitBitSize - 1);// a little better size
 		if (isBLS12) {
 			/*
@@ -648,6 +643,7 @@ struct GLV1 : mcl::GLV1T<G1> {
 			B[1][1] = -6 * z * z - 4 * z - 1;
 		}
 		// [v0 v1] = [r 0] * B^(-1)
+		const mpz_class& r = Fr::getOp().mp;
 		v0 = ((-B[1][1]) << rBitSize) / r;
 		v1 = ((B[1][0]) << rBitSize) / r;
 	}
@@ -656,22 +652,24 @@ struct GLV1 : mcl::GLV1T<G1> {
 /*
 	GLV method for G2 and GT on BN/BLS12
 */
+template<class _Fr>
 struct GLV2 {
+	typedef _Fr Fr;
+	typedef mcl::FixedArray<int8_t, sizeof(Fr) * 8 / 4 + 4> NafArray;
 	size_t rBitSize;
 	mpz_class B[4][4];
-	mpz_class r;
 	mpz_class v[4];
 	mpz_class z;
 	mpz_class abs_z;
 	bool isBLS12;
 	GLV2() : rBitSize(0), isBLS12(false) {}
-	void init(const mpz_class& r, const mpz_class& z, bool isBLS12 = false)
+	void init(const mpz_class& z, bool isBLS12 = false)
 	{
-		this->r = r;
+		const mpz_class& r = Fr::getOp().mp;
 		this->z = z;
 		this->abs_z = z < 0 ? -z : z;
 		this->isBLS12 = isBLS12;
-		rBitSize = gmp::getBitSize(r);
+		rBitSize = Fr::getOp().bitSize;
 		rBitSize = (rBitSize + mcl::fp::UnitBitSize - 1) & ~(mcl::fp::UnitBitSize - 1);// a little better size
 		mpz_class z2p1 = z * 2 + 1;
 		B[0][0] = z + 1;
@@ -758,6 +756,11 @@ struct GLV2 {
 	template<class T>
 	void mul(T& Q, const T& P, mpz_class x, bool constTime = false) const
 	{
+#if 1
+		(void)constTime;
+		mulVecNGLV(Q, &P, &x, 1);
+#else
+		const mpz_class& r = Fr::getOp().mp;
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
 		const size_t splitN = 4;
@@ -805,8 +808,65 @@ struct GLV2 {
 			mcl::local::addTbl(Q, tbl[2], naf[2], maxBit - 1 - i);
 			mcl::local::addTbl(Q, tbl[3], naf[3], maxBit - 1 - i);
 		}
+#endif
 	}
-	void pow(Fp12& z, const Fp12& x, mpz_class y, bool constTime = false) const
+	template<class T>
+	size_t mulVecNGLV(T& z, const T *xVec, const mpz_class *yVec, size_t n) const
+	{
+		const mpz_class& r = Fr::getOp().mp;
+		const size_t N = 16;
+		if (n > N) n = N;
+		const int w = 5;
+		const size_t tblSize = 1 << (w - 2);
+		const int splitN = 4;
+		NafArray naf[N][splitN];
+		T tbl[N][splitN][tblSize];
+		bool b;
+		mpz_class u[splitN], y;
+		size_t maxBit = 0;
+
+		for (size_t i = 0; i < n; i++) {
+			y = yVec[i];
+			y %= r;
+			if (y < 0) {
+				y += r;
+			}
+			split(u, y);
+
+			for (int j = 0; j < splitN; j++) {
+				gmp::getNAFwidth(&b, naf[i][j], u[j], w);
+				assert(b); (void)b;
+				if (naf[i][j].size() > maxBit) maxBit = naf[i][j].size();
+			}
+
+			T P2;
+			T::dbl(P2, xVec[i]);
+			tbl[i][0][0] = xVec[i];
+			Frobenius(tbl[i][1][0], tbl[i][0][0]);
+			Frobenius(tbl[i][2][0], tbl[i][1][0]);
+			Frobenius(tbl[i][3][0], tbl[i][2][0]);
+			for (size_t j = 1; j < tblSize; j++) {
+				T::add(tbl[i][0][j], tbl[i][0][j - 1], P2);
+				Frobenius(tbl[i][1][j], tbl[i][0][j]);
+				Frobenius(tbl[i][2][j], tbl[i][1][j]);
+				Frobenius(tbl[i][3][j], tbl[i][2][j]);
+			}
+		}
+		z.clear();
+		for (size_t i = 0; i < maxBit; i++) {
+			const size_t bit = maxBit - 1 - i;
+			T::dbl(z, z);
+			for (size_t j = 0; j < n; j++) {
+				mcl::local::addTbl(z, tbl[j][0], naf[j][0], bit);
+				mcl::local::addTbl(z, tbl[j][1], naf[j][1], bit);
+				mcl::local::addTbl(z, tbl[j][2], naf[j][2], bit);
+				mcl::local::addTbl(z, tbl[j][3], naf[j][3], bit);
+			}
+		}
+		return n;
+
+	}
+	void pow(Fp12& z, const Fp12& x, const mpz_class& y, bool constTime = false) const
 	{
 		typedef GroupMtoA<Fp12> AG; // as additive group
 		AG& _z = static_cast<AG&>(z);
@@ -824,7 +884,7 @@ struct Param {
 	mpz_class p;
 	mpz_class r;
 	local::MapTo mapTo;
-	local::GLV2 glv2;
+	local::GLV2<Fr> glv2;
 	// for G2 Frobenius
 	Fp2 g2;
 	Fp2 g3;
@@ -939,8 +999,8 @@ struct Param {
 		} else {
 			mapTo.init(2 * p - r, z, cp.curveType);
 		}
-		GLV1::initForBN(r, z, isBLS12, cp.curveType);
-		glv2.init(r, z, isBLS12);
+		GLV1::initForBN(z, isBLS12, cp.curveType);
+		glv2.init(z, isBLS12);
 		basePoint.clear();
 		*pb = true;
 	}
@@ -1007,6 +1067,19 @@ inline void powArrayGLV2(Fp12& z, const Fp12& x, const mcl::fp::Unit *y, size_t
 	BN::param.glv2.pow(z, x, s, constTime);
 }
 
+inline size_t mulVecNGLV2(G2& z, const G2 *xVec, const mpz_class *yVec, size_t n)
+{
+	return BN::param.glv2.mulVecNGLV(z, xVec, yVec, n);
+}
+
+inline size_t powVecNGLV2(Fp12& z, const Fp12 *xVec, const mpz_class *yVec, size_t n)
+{
+	typedef GroupMtoA<Fp12> AG; // as additive group
+	AG& _z = static_cast<AG&>(z);
+	const AG *_xVec = static_cast<const AG*>(xVec);
+	return BN::param.glv2.mulVecNGLV(_z, _xVec, yVec, n);
+}
+
 /*
 	Faster Squaring in the Cyclotomic Subgroup of Sixth Degree Extensions
 	Robert Granger, Michael Scott
@@ -2099,9 +2172,9 @@ inline void init(bool *pb, const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode
 {
 	local::StaticVar<>::param.init(pb, cp, mode);
 	if (!*pb) return;
-	G1::setMulArrayGLV(local::GLV1::mulArrayGLV);
-	G2::setMulArrayGLV(local::mulArrayGLV2);
-	Fp12::setPowArrayGLV(local::powArrayGLV2);
+	G1::setMulArrayGLV(local::GLV1::mulArrayGLV, local::GLV1::mulVecNGLV);
+	G2::setMulArrayGLV(local::mulArrayGLV2, local::mulVecNGLV2);
+	Fp12::setPowArrayGLV(local::powArrayGLV2, local::powVecNGLV2);
 	G1::setCompressedExpression();
 	G2::setCompressedExpression();
 	*pb = true;
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 7a648688..4adc9ecf 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -30,6 +30,9 @@ enum Mode {
 
 namespace local {
 
+const size_t maxMulVecN = 32; // inner loop of mulVec
+const size_t maxMulVecNGLV = 16; // inner loop of mulVec with GLV
+
 // x is negative <=> x < half(:=(p+1)/2) <=> a = 1
 template<class Fp>
 bool get_a_flag(const Fp& x)
@@ -98,6 +101,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	static bool verifyOrder_;
 	static mpz_class order_;
 	static void (*mulArrayGLV)(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime);
+	static size_t (*mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn);
 	/* default constructor is undefined value */
 	EcT() {}
 	EcT(const Fp& _x, const Fp& _y)
@@ -211,6 +215,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		verifyOrder_ = false;
 		order_ = 0;
 		mulArrayGLV = 0;
+		mulVecNGLV = 0;
 #ifdef MCL_EC_USE_AFFINE
 		cybozu::disable_warning_unused_variable(mode);
 #else
@@ -232,9 +237,10 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 			// don't clear order_ because it is used for isValidOrder()
 		}
 	}
-	static void setMulArrayGLV(void f(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime))
+	static void setMulArrayGLV(void f(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime), size_t g(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn) = 0)
 	{
 		mulArrayGLV = f;
+		mulVecNGLV = g;
 	}
 	static inline void init(bool *pb, const char *astr, const char *bstr, int mode = ec::Jacobi)
 	{
@@ -1001,10 +1007,11 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	static inline void mulArray(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime = false, bool useGLV = true)
 	{
 		if (!constTime) {
-			while (yn > 0) {
-				if (y[yn - 1]) break;
-				yn--;
+			if (yn == 0) {
+				z.clear();
+				return;
 			}
+			yn = fp::getNonZeroArraySize(y, yn);
 			if (yn <= 1 && mulSmallInt(z, x, *y, isNegative)) return;
 		}
 		if (useGLV && mulArrayGLV && (yn * sizeof(fp::Unit) > 8)) {
@@ -1136,12 +1143,12 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 	static inline void mulArrayBase(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 	{
-#if 1
 		(void)constTime;
 		mpz_class v;
 		bool b;
 		gmp::setArray(&b, v, y, yn);
 		assert(b); (void)b;
+		if (isNegative) v = -v;
 		const int maxW = 5;
 		const int maxTblSize = 1 << (maxW - 2);
 		/*
@@ -1165,22 +1172,6 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 			EcT::dbl(z, z);
 			local::addTbl(z, tbl, naf, naf.size() - 1 - i);
 		}
-		if (isNegative) {
-			neg(z, z);
-		}
-#else
-		EcT tmp;
-		const EcT *px = &x;
-		if (&z == &x) {
-			tmp = x;
-			px = &tmp;
-		}
-		z.clear();
-		fp::powGeneric(z, *px, y, yn, EcT::add, EcT::dbl, EcT::normalize, constTime ? Fp::BaseFp::getBitSize() : 0);
-		if (isNegative) {
-			neg(z, z);
-		}
-#endif
 	}
 	/*
 		generic mul
@@ -1196,19 +1187,19 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		@note &z != xVec[i]
 	*/
 private:
-	template<size_t N, class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static inline size_t addMulVecN(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
+	static inline size_t mulVecN(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t n)
 	{
+		const size_t N = mcl::ec::local::maxMulVecN;
 		if (n > N) n = N;
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
-		typedef mcl::FixedArray<int8_t, maxBitSize + 1> NafArray;
+		typedef mcl::FixedArray<int8_t, sizeof(EcT::Fp) * 8 + 1> NafArray;
 		NafArray naf[N];
 		EcT tbl[N][tblSize];
 		size_t maxBit = 0;
 		for (size_t i = 0; i < n; i++) {
 			bool b;
-			gmp::getNAFwidth(&b, naf[i], yVec[i].getMpz(), w);
+			gmp::getNAFwidth(&b, naf[i], yVec[i], w);
 			assert(b); (void)b;
 			if (naf[i].size() > maxBit) maxBit = naf[i].size();
 			EcT P2;
@@ -1229,14 +1220,22 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 
 public:
-	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
+	static inline void mulVec(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t n)
 	{
+		size_t (*f)(EcT&, const EcT *, const mpz_class *, size_t n) = mulVecN;
+		/*
+			mulVecNGLV is a little slow for large n
+		*/
+		if (mulVecNGLV && n < mcl::ec::local::maxMulVecNGLV) {
+			size_t done = mulVecNGLV(z, xVec, yVec, n);
+			assert(done == n); (void)done;
+			return;
+		}
 		EcT r;
 		r.clear();
 		while (n > 0) {
 			EcT t;
-			size_t done = addMulVecN<32>(t, xVec, yVec, n);
+			size_t done = f(t, xVec, yVec, n);
 			r += t;
 			xVec += done;
 			yVec += done;
@@ -1298,18 +1297,20 @@ template<class Fp> int EcT<Fp>::ioMode_;
 template<class Fp> bool EcT<Fp>::verifyOrder_;
 template<class Fp> mpz_class EcT<Fp>::order_;
 template<class Fp> void (*EcT<Fp>::mulArrayGLV)(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime);
+template<class Fp> size_t (*EcT<Fp>::mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn);
 #ifndef MCL_EC_USE_AFFINE
 template<class Fp> int EcT<Fp>::mode_;
 #endif
 
-template<class Ec>
+// r = the order of Ec
+template<class Ec, class _Fr>
 struct GLV1T {
 	typedef typename Ec::Fp Fp;
+	typedef _Fr Fr;
 	static Fp rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
 	static size_t rBitSize;
 	static mpz_class v0, v1;
 	static mpz_class B[2][2];
-	static mpz_class r;
 public:
 #ifndef CYBOZU_DONT_USE_STRING
 	static void dump(const mpz_class& x)
@@ -1323,7 +1324,6 @@ struct GLV1T {
 		dump(v0);
 		dump(v1);
 		dump(B[0][0]); dump(B[0][1]); dump(B[1][0]); dump(B[1][1]);
-		dump(r);
 	}
 #endif
 	/*
@@ -1346,47 +1346,55 @@ struct GLV1T {
 		a = x - (t * B[0][0] + b * B[1][0]);
 		b = - (t * B[0][1] + b * B[1][1]);
 	}
-	static void mul(Ec& Q, const Ec& P, mpz_class x, bool constTime = false)
+	static void mul(Ec& Q, const Ec& P, const mpz_class& x, bool /*constTime*/ = false)
 	{
+		mulVecNGLV(Q, &P, &x, 1);
+	}
+	static inline size_t mulVecNGLV(Ec& z, const Ec *xVec, const mpz_class *yVec, size_t n)
+	{
+		const size_t N = mcl::ec::local::maxMulVecNGLV;
+		if (n > N) n = N;
 		const int w = 5;
+		const mpz_class& r = Fr::getOp().mp;
 		const size_t tblSize = 1 << (w - 2);
-		typedef mcl::FixedArray<int8_t, sizeof(Fp) * 8 / 2 + 2> NafArray;
-		NafArray naf[2];
-		mpz_class u[2];
-		Ec tbl[2][tblSize];
+		typedef mcl::FixedArray<int8_t, sizeof(Fr) * 8 / 2 + 2> NafArray;
+		NafArray naf[N][2];
+		Ec tbl[N][2][tblSize];
 		bool b;
+		mpz_class u[2], y;
+		size_t maxBit = 0;
+		for (size_t i = 0; i < n; i++) {
+			y = yVec[i];
+			y %= r;
+			if (y < 0) {
+				y += r;
+			}
+			split(u[0], u[1], y);
 
-		x %= r;
-		if (x == 0) {
-			Q.clear();
-			if (!constTime) return;
-		}
-		if (x < 0) {
-			x += r;
-		}
-		split(u[0], u[1], x);
-		gmp::getNAFwidth(&b, naf[0], u[0], w);
-		assert(b); (void)b;
-		gmp::getNAFwidth(&b, naf[1], u[1], w);
-		assert(b); (void)b;
+			for (int j = 0; j < 2; j++) {
+				gmp::getNAFwidth(&b, naf[i][j], u[j], w);
+				assert(b); (void)b;
+				if (naf[i][j].size() > maxBit) maxBit = naf[i][j].size();
+			}
 
-		tbl[0][0] = P;
-		mulLambda(tbl[1][0], tbl[0][0]);
-		{
 			Ec P2;
-			Ec::dbl(P2, P);
-			for (size_t i = 1; i < tblSize; i++) {
-				Ec::add(tbl[0][i], tbl[0][i - 1], P2);
-				mulLambda(tbl[1][i], tbl[0][i]);
+			Ec::dbl(P2, xVec[i]);
+			tbl[i][0][0] = xVec[i];
+			mulLambda(tbl[i][1][0], tbl[i][0][0]);
+			for (size_t j = 1; j < tblSize; j++) {
+				Ec::add(tbl[i][0][j], tbl[i][0][j - 1], P2);
+				mulLambda(tbl[i][1][j], tbl[i][0][j]);
 			}
 		}
-		const size_t maxBit = fp::max_(naf[0].size(), naf[1].size());
-		Q.clear();
+		z.clear();
 		for (size_t i = 0; i < maxBit; i++) {
-			Ec::dbl(Q, Q);
-			local::addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
-			local::addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
+			Ec::dbl(z, z);
+			for (size_t j = 0; j < n; j++) {
+				local::addTbl(z, tbl[j][0], naf[j][0], maxBit - 1 - i);
+				local::addTbl(z, tbl[j][1], naf[j][1], maxBit - 1 - i);
+			}
 		}
+		return n;
 	}
 	static void mulArrayGLV(Ec& z, const Ec& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 	{
@@ -1400,14 +1408,13 @@ struct GLV1T {
 	/*
 		initForBN() is defined in bn.hpp
 	*/
-	static void initForSecp256k1(const mpz_class& _r)
+	static void initForSecp256k1()
 	{
 		bool b = Fp::squareRoot(rw, -3);
 		assert(b);
 		(void)b;
 		rw = -(rw + 1) / 2;
-		r = _r;
-		rBitSize = gmp::getBitSize(r);
+		rBitSize = Fr::getOp().bitSize;
 		rBitSize = (rBitSize + fp::UnitBitSize - 1) & ~(fp::UnitBitSize - 1);
 		gmp::setStr(&b, B[0][0], "0x3086d221a7d46bcde86c90e49284eb15");
 		assert(b); (void)b;
@@ -1416,18 +1423,18 @@ struct GLV1T {
 		gmp::setStr(&b, B[1][0], "0x114ca50f7a8e2f3f657c1108d9d44cfd8");
 		assert(b); (void)b;
 		B[1][1] = B[0][0];
+		const mpz_class& r = Fr::getOp().mp;
 		v0 = ((B[1][1]) << rBitSize) / r;
 		v1 = ((-B[0][1]) << rBitSize) / r;
 	}
 };
 
 // rw = 1 / w = (-1 - sqrt(-3)) / 2
-template<class Ec> typename Ec::Fp GLV1T<Ec>::rw;
-template<class Ec> size_t GLV1T<Ec>::rBitSize;
-template<class Ec> mpz_class GLV1T<Ec>::v0;
-template<class Ec> mpz_class GLV1T<Ec>::v1;
-template<class Ec> mpz_class GLV1T<Ec>::B[2][2];
-template<class Ec> mpz_class GLV1T<Ec>::r;
+template<class Ec, class Fr> typename Ec::Fp GLV1T<Ec, Fr>::rw;
+template<class Ec, class Fr> size_t GLV1T<Ec, Fr>::rBitSize;
+template<class Ec, class Fr> mpz_class GLV1T<Ec, Fr>::v0;
+template<class Ec, class Fr> mpz_class GLV1T<Ec, Fr>::v1;
+template<class Ec, class Fr> mpz_class GLV1T<Ec, Fr>::B[2][2];
 
 /*
 	Ec : elliptic curve
@@ -1458,8 +1465,8 @@ void initCurve(bool *pb, int curveType, Ec *P = 0, mcl::fp::Mode mode = fp::FP_A
 		if (!*pb) return;
 	}
 	if (curveType == MCL_SECP256K1) {
-		GLV1T<Ec>::initForSecp256k1(Zn::getOp().mp);
-		Ec::setMulArrayGLV(GLV1T<Ec>::mulArrayGLV);
+		GLV1T<Ec, Zn>::initForSecp256k1();
+		Ec::setMulArrayGLV(GLV1T<Ec, Zn>::mulArrayGLV, GLV1T<Ec, Zn>::mulVecNGLV);
 	} else {
 		Ec::setMulArrayGLV(0);
 	}
diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index 42ec6a70..2c7938d0 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -615,13 +615,14 @@ template<class Vec>
 void getNAFwidth(bool *pb, Vec& naf, mpz_class x, size_t w)
 {
 	assert(w > 0);
+	*pb = true;
 	naf.clear();
-	size_t zeroNum = 0;
 	bool negative = false;
 	if (x < 0) {
 		negative = true;
 		x = -x;
 	}
+	size_t zeroNum = 0;
 	const int signedMaxW = 1 << (w - 1);
 	const int maxW = signedMaxW * 2;
 	const int maskW = maxW - 1;
@@ -651,7 +652,6 @@ void getNAFwidth(bool *pb, Vec& naf, mpz_class x, size_t w)
 			naf[i] = -naf[i];
 		}
 	}
-	*pb = true;
 }
 
 #ifndef CYBOZU_DONT_USE_EXCEPTION
diff --git a/include/mcl/operator.hpp b/include/mcl/operator.hpp
index e9bc506d..7a1a02cc 100644
--- a/include/mcl/operator.hpp
+++ b/include/mcl/operator.hpp
@@ -84,12 +84,29 @@ struct Operator : public E {
 	{
 		powArray(z, x, gmp::getUnit(y), gmp::getUnitSize(y), y < 0, true);
 	}
-	static void setPowArrayGLV(void f(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime))
+	static void setPowArrayGLV(void f(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime), size_t g(T& z, const T *xVec, const mpz_class *yVec, size_t n) = 0)
 	{
 		powArrayGLV = f;
+		powVecNGLV = g;
+	}
+	static void powVec(T& z, const T* xVec, const mpz_class *yVec, size_t n)
+	{
+		assert(powVecNGLV);
+		T r;
+		r.setOne();
+		while (n > 0) {
+			T t;
+			size_t done = powVecNGLV(t, xVec, yVec, n);
+			r *= t;
+			xVec += done;
+			yVec += done;
+			n -= done;
+		}
+		z = r;
 	}
 private:
 	static void (*powArrayGLV)(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime);
+	static size_t (*powVecNGLV)(T& z, const T* xVec, const mpz_class *yVec, size_t n);
 	static void powArray(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime)
 	{
 		if (powArrayGLV && (constTime || yn > 1)) {
@@ -117,6 +134,9 @@ struct Operator : public E {
 template<class T, class E>
 void (*Operator<T, E>::powArrayGLV)(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime);
 
+template<class T, class E>
+size_t (*Operator<T, E>::powVecNGLV)(T& z, const T* xVec, const mpz_class *yVec, size_t n);
+
 /*
 	T must have save and load
 */
diff --git a/include/mcl/util.hpp b/include/mcl/util.hpp
index a4062417..b33b10c3 100644
--- a/include/mcl/util.hpp
+++ b/include/mcl/util.hpp
@@ -157,7 +157,6 @@ void maskArray(T *x, size_t n, size_t bitSize)
 template<class T>
 size_t getNonZeroArraySize(const T *x, size_t n)
 {
-	assert(n > 0);
 	while (n > 0) {
 		if (x[n - 1]) return n;
 		n--;
@@ -165,6 +164,62 @@ size_t getNonZeroArraySize(const T *x, size_t n)
 	return 1;
 }
 
+template<class T>
+class BitIterator {
+	const T *x_;
+	size_t bitPos_;
+	size_t bitSize_;
+	static const size_t TbitSize = sizeof(T) * 8;
+public:
+	BitIterator(const T *x, size_t n)
+		: x_(x)
+		, bitPos_(0)
+	{
+		assert(n > 0);
+		n = getNonZeroArraySize(x, n);
+		if (n == 1 && x[0] == 0) {
+			bitSize_ = 1;
+		} else {
+			assert(x_[n - 1]);
+			bitSize_ = (n - 1) * sizeof(T) * 8 + 1 + cybozu::bsr<T>(x_[n - 1]);
+		}
+	}
+	bool hasNext() const { return bitPos_ < bitSize_; }
+	T getNext(size_t w)
+	{
+		assert(0 < w && w <= TbitSize);
+		assert(hasNext());
+		const size_t q = bitPos_ / TbitSize;
+		const size_t r = bitPos_ % TbitSize;
+		const size_t remain = bitSize_ - bitPos_;
+		if (w > remain) w = remain;
+		T v = x_[q] >> r;
+		if (r + w > TbitSize) {
+			v |= x_[q + 1] << (TbitSize - r);
+		}
+		bitPos_ += w;
+		return v & mask(w);
+	}
+	// whethere next bit is 1 or 0 (bitPos is not moved)
+	bool peekBit() const
+	{
+		assert(hasNext());
+		const size_t q = bitPos_ / TbitSize;
+		const size_t r = bitPos_ % TbitSize;
+		return (x_[q] >> r) & 1;
+	}
+	void skipBit()
+	{
+		assert(hasNext());
+		bitPos_++;
+	}
+	T mask(size_t w) const
+	{
+		assert(w <= TbitSize);
+		return (w == TbitSize ? T(0) : (T(1) << w)) - 1;
+	}
+};
+
 /*
 	@param out [inout] : set element of G ; out = x^y[]
 	@param x [in]
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 6aac204e..0379693b 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -384,7 +384,7 @@ CYBOZU_TEST_AUTO(naive)
 		testPairing(P, Q, ts.e);
 		testPrecomputed(P, Q);
 		testMillerLoop2(P, Q);
-		testCommon(P, Q);
+		testCommon<G1, G2, GT>(P, Q);
 		testBench(P, Q);
 	}
 	int count = (int)clk.getCount();
diff --git a/test/bn384_test.cpp b/test/bn384_test.cpp
index e248d48e..13fc0777 100644
--- a/test/bn384_test.cpp
+++ b/test/bn384_test.cpp
@@ -40,7 +40,7 @@ void testCurve(const mcl::CurveParam& cp)
 	pairing(e2, aP, bQ);
 	GT::pow(e1, e1, a * b);
 	CYBOZU_TEST_EQUAL(e1, e2);
-	testCommon(P, Q);
+	testCommon<G1, G2, GT>(P, Q);
 	testBench(P, Q);
 	testSquareRoot();
 	testLagrange();
diff --git a/test/bn512_test.cpp b/test/bn512_test.cpp
index ebbc7c00..8999bc9c 100644
--- a/test/bn512_test.cpp
+++ b/test/bn512_test.cpp
@@ -34,7 +34,7 @@ void testCurve(const mcl::CurveParam& cp)
 	pairing(e2, aP, bQ);
 	GT::pow(e1, e1, a * b);
 	CYBOZU_TEST_EQUAL(e1, e2);
-	testCommon(P, Q);
+	testCommon<G1, G2, GT>(P, Q);
 	testBench(P, Q);
 	testSquareRoot();
 	testLagrange();
diff --git a/test/bn_test.cpp b/test/bn_test.cpp
index 397f9a11..b62bc8d3 100644
--- a/test/bn_test.cpp
+++ b/test/bn_test.cpp
@@ -402,7 +402,7 @@ CYBOZU_TEST_AUTO(naive)
 		testPrecomputed(P, Q);
 		testMillerLoop2(P, Q);
 		testMillerLoopVec();
-		testCommon(P, Q);
+		testCommon<G1, G2, GT>(P, Q);
 		testBench(P, Q);
 		benchAddDblG1();
 		benchAddDblG2();
diff --git a/test/common_test.hpp b/test/common_test.hpp
index e5378e98..30530380 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -1,6 +1,10 @@
-template<class G, class F>
-void naiveMulVec(G& out, const G *xVec, const F *yVec, size_t n)
+template<class G>
+void naiveMulVec(G& out, const G *xVec, const mpz_class *yVec, size_t n)
 {
+	if (n == 1) {
+		G::mul(out, xVec[0], yVec[0]);
+		return;
+	}
 	G r, t;
 	r.clear();
 	for (size_t i = 0; i < n; i++) {
@@ -16,14 +20,13 @@ void testMulVec(const G& P)
 	using namespace mcl::bn;
 	const int N = 33;
 	G xVec[N];
-	mcl::bn::Fr yVec[N];
+	mpz_class yVec[N];
 
 	for (size_t i = 0; i < N; i++) {
 		G::mul(xVec[i], P, i + 3);
-		yVec[i].setByCSPRNG();
+		mcl::gmp::getRand(yVec[i], Fr::getOp().bitSize);
 	}
-	const size_t nTbl[] = { 1, 2, 3, 5, 30, 31, 32, 33 };
-	const int C = 400;
+	const size_t nTbl[] = { 1, 2, 3, 5, 7, 8, 9, 14, 15, 16, 30, 31, 32, 33 };
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) {
 		const size_t n = nTbl[i];
 		G Q1, Q2;
@@ -31,14 +34,70 @@ void testMulVec(const G& P)
 		naiveMulVec(Q1, xVec, yVec, n);
 		G::mulVec(Q2, xVec, yVec, n);
 		CYBOZU_TEST_EQUAL(Q1, Q2);
+#if 0//#ifdef NDEBUG
 		printf("n=%zd\n", n);
+		const int C = 400;
 		CYBOZU_BENCH_C("naive ", C, naiveMulVec, Q1, xVec, yVec, n);
 		CYBOZU_BENCH_C("mulVec", C, G::mulVec, Q1, xVec, yVec, n);
+#endif
+	}
+}
+
+template<class G>
+void naivePowVec(G& out, const G *xVec, const mpz_class *yVec, size_t n)
+{
+	if (n == 1) {
+		G::pow(out, xVec[0], yVec[0]);
+		return;
+	}
+	G r, t;
+	r.setOne();
+	for (size_t i = 0; i < n; i++) {
+		G::pow(t, xVec[i], yVec[i]);
+		r *= t;
+	}
+	out = r;
+}
+
+template<class G>
+inline void testPowVec(const G& e)
+{
+	using namespace mcl::bn;
+	const int N = 33;
+	G xVec[N];
+	mpz_class yVec[N];
+
+	xVec[0] = e;
+	for (size_t i = 0; i < N; i++) {
+		if (i > 0) G::mul(xVec[i], xVec[i - 1], e);
+		mcl::gmp::getRand(yVec[i], Fr::getOp().bitSize);
+	}
+	const size_t nTbl[] = { 1, 2, 3, 5, 7, 8, 9, 14, 15, 16, 30, 31, 32, 33 };
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) {
+		const size_t n = nTbl[i];
+		G Q1, Q2;
+		CYBOZU_TEST_ASSERT(n <= N);
+		naivePowVec(Q1, xVec, yVec, n);
+		G::powVec(Q2, xVec, yVec, n);
+		CYBOZU_TEST_EQUAL(Q1, Q2);
+#if 0//#ifdef NDEBUG
+		printf("n=%zd\n", n);
+		const int C = 400;
+		CYBOZU_BENCH_C("naive ", C, naivePowVec, Q1, xVec, yVec, n);
+		CYBOZU_BENCH_C("mulVec", C, G::powVec, Q1, xVec, yVec, n);
+#endif
 	}
 }
 
-template<class G1, class G2>
-void testCommon(const G1& P, const G2&)
+template<class G1, class G2, class GT>
+void testCommon(const G1& P, const G2& Q)
 {
+	puts("G1");
 	testMulVec(P);
+	puts("G2");
+	testMulVec(Q);
+	GT e;
+	mcl::bn::pairing(e, P, Q);
+	puts("GT");
+	testPowVec(e);
 }
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index 2beac015..047e2a7c 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -532,7 +532,7 @@ mul 499.00usec
 	void operator=(const Test&);
 };
 
-void naiveMulVec(Ec& out, const Ec *xVec, const Zn *yVec, size_t n)
+void naiveMulVec(Ec& out, const Ec *xVec, const mpz_class *yVec, size_t n)
 {
 	Ec r, t;
 	r.clear();
@@ -552,13 +552,13 @@ void mulVec(const mcl::EcParam& para)
 	P += P;
 	const int N = 33;
 	Ec xVec[N];
-	Zn yVec[N];
+	mpz_class yVec[N];
 	Ec Q1, Q2;
 
 	Ec::dbl(P, P);
 	for (size_t i = 0; i < N; i++) {
 		Ec::mul(xVec[i], P, i + 3);
-		yVec[i].setByCSPRNG();
+		mcl::gmp::getRand(yVec[i], Zn::getOp().bitSize);
 	}
 	const size_t nTbl[] = { 1, 2, 3, 5, 30, 31, 32, 33 };
 	const int C = 400;
diff --git a/test/fp_util_test.cpp b/test/fp_util_test.cpp
index e8a9f9aa..45b1573a 100644
--- a/test/fp_util_test.cpp
+++ b/test/fp_util_test.cpp
@@ -268,3 +268,45 @@ CYBOZU_TEST_AUTO(stream)
 		}
 	}
 }
+
+CYBOZU_TEST_AUTO(BitIterator)
+{
+	const struct Tbl {
+		uint32_t v[4];
+		uint32_t n;
+	} tbl[] = {
+		{ { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, 4 },
+		{ { 0 }, 1 },
+		{ { 0x12345678, 0x9abcdef0, 0xfedcba98, 0 }, 4 },
+		{ { 0x12345678, 0x9abcdef0, 0xfed,}, 3 },
+	};
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		const Tbl& t = tbl[i];
+		for (size_t w = 1; w <= 32; w++) {
+			mcl::fp::BitIterator<uint32_t> bi(t.v, t.n);
+			mpz_class x;
+			mcl::gmp::setArray(x, t.v, t.n);
+			while (bi.hasNext()) {
+				uint32_t v1 = bi.getNext(w);
+				mpz_class v2 = x & bi.mask(w);
+				CYBOZU_TEST_EQUAL(v1, v2);
+				x >>= w;
+			}
+			CYBOZU_TEST_EQUAL(x, 0);
+		}
+		// w = 1
+		{
+			mcl::fp::BitIterator<uint32_t> bi(t.v, t.n);
+			mpz_class x;
+			mcl::gmp::setArray(x, t.v, t.n);
+			while (bi.hasNext()) {
+				uint32_t v1 = bi.peekBit();
+				mpz_class v2 = x & 1;
+				CYBOZU_TEST_EQUAL(v1, v2);
+				x >>= 1;
+				bi.skipBit();
+			}
+			CYBOZU_TEST_EQUAL(x, 0);
+		}
+	}
+}
diff --git a/test/glv_test.cpp b/test/glv_test.cpp
index e1d49411..78bb8218 100644
--- a/test/glv_test.cpp
+++ b/test/glv_test.cpp
@@ -122,7 +122,7 @@ void testGLV1()
 	}
 
 	typedef mcl::bn::local::GLV1 GLV1;
-	GLV1::initForBN(BN::param.r, BN::param.z, BN::param.isBLS12);
+	GLV1::initForBN(BN::param.z, BN::param.isBLS12);
 	if (!BN::param.isBLS12) {
 		compareLength<GLV1>(oldGlv);
 	}
@@ -165,8 +165,8 @@ void testGLV2()
 	G2 Q0, Q1, Q2;
 	mpz_class z = BN::param.z;
 	mpz_class r = BN::param.r;
-	mcl::bn::local::GLV2 glv2;
-	glv2.init(r, z, BN::param.isBLS12);
+	mcl::bn::local::GLV2<Fr> glv2;
+	glv2.init(z, BN::param.isBLS12);
 	mpz_class n;
 	cybozu::XorShift rg;
 	mapToG2(Q0, 1);
diff --git a/test/she_test.cpp b/test/she_test.cpp
index 0782eda8..cec65f27 100644
--- a/test/she_test.cpp
+++ b/test/she_test.cpp
@@ -586,7 +586,7 @@ void decBench(const char *msg, int C, const SecretKey& sec, const PublicKey& pub
 	}
 }
 
-#ifndef PAPER
+#if !defined(PAPER) && defined(NDEBUG)
 CYBOZU_TEST_AUTO(hashBench)
 {
 	SecretKey& sec = g_sec;

From 3039bdb86fbb3ec0e46c66ac311e554fa05ca92f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 22 Sep 2019 15:10:13 +0900
Subject: [PATCH 082/553] change mulVec interface

---
 include/mcl/bn.hpp       |  2 +-
 include/mcl/ec.hpp       | 31 +++++++++++++++++++------------
 include/mcl/op.hpp       |  3 +++
 include/mcl/operator.hpp | 14 ++++++++++++--
 test/bls12_test.cpp      |  5 +++--
 test/bn384_test.cpp      |  5 +++--
 test/bn512_test.cpp      |  5 +++--
 test/bn_test.cpp         |  5 +++--
 test/common_test.hpp     | 13 ++++++-------
 test/ec_test.cpp         | 10 ++++++----
 10 files changed, 59 insertions(+), 34 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 461eb195..2d9eed59 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -814,7 +814,7 @@ struct GLV2 {
 	size_t mulVecNGLV(T& z, const T *xVec, const mpz_class *yVec, size_t n) const
 	{
 		const mpz_class& r = Fr::getOp().mp;
-		const size_t N = 16;
+		const size_t N = mcl::fp::maxMulVecNGLV;
 		if (n > N) n = N;
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 4adc9ecf..1e6172e0 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -30,9 +30,6 @@ enum Mode {
 
 namespace local {
 
-const size_t maxMulVecN = 32; // inner loop of mulVec
-const size_t maxMulVecNGLV = 16; // inner loop of mulVec with GLV
-
 // x is negative <=> x < half(:=(p+1)/2) <=> a = 1
 template<class Fp>
 bool get_a_flag(const Fp& x)
@@ -1187,9 +1184,10 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		@note &z != xVec[i]
 	*/
 private:
-	static inline size_t mulVecN(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t n)
+	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
+	static inline size_t mulVecN(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
 	{
-		const size_t N = mcl::ec::local::maxMulVecN;
+		const size_t N = mcl::fp::maxMulVecN;
 		if (n > N) n = N;
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
@@ -1197,9 +1195,12 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		NafArray naf[N];
 		EcT tbl[N][tblSize];
 		size_t maxBit = 0;
+		mpz_class y;
 		for (size_t i = 0; i < n; i++) {
 			bool b;
-			gmp::getNAFwidth(&b, naf[i], yVec[i], w);
+			yVec[i].getMpz(&b, y);
+			assert(b); (void)b;
+			gmp::getNAFwidth(&b, naf[i], y, w);
 			assert(b); (void)b;
 			if (naf[i].size() > maxBit) maxBit = naf[i].size();
 			EcT P2;
@@ -1220,14 +1221,20 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 
 public:
-	static inline void mulVec(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t n)
+	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
+	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
 	{
-		size_t (*f)(EcT&, const EcT *, const mpz_class *, size_t n) = mulVecN;
 		/*
 			mulVecNGLV is a little slow for large n
 		*/
-		if (mulVecNGLV && n < mcl::ec::local::maxMulVecNGLV) {
-			size_t done = mulVecNGLV(z, xVec, yVec, n);
+		if (mulVecNGLV && n < mcl::fp::maxMulVecNGLV) {
+			mpz_class myVec[mcl::fp::maxMulVecNGLV];
+			for (size_t i = 0; i < n; i++) {
+				bool b;
+				yVec[i].getMpz(&b, myVec[i]);
+				assert(b); (void)b;
+			}
+			size_t done = mulVecNGLV(z, xVec, myVec, n);
 			assert(done == n); (void)done;
 			return;
 		}
@@ -1235,7 +1242,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		r.clear();
 		while (n > 0) {
 			EcT t;
-			size_t done = f(t, xVec, yVec, n);
+			size_t done = mulVecN(t, xVec, yVec, n);
 			r += t;
 			xVec += done;
 			yVec += done;
@@ -1352,7 +1359,7 @@ struct GLV1T {
 	}
 	static inline size_t mulVecNGLV(Ec& z, const Ec *xVec, const mpz_class *yVec, size_t n)
 	{
-		const size_t N = mcl::ec::local::maxMulVecNGLV;
+		const size_t N = mcl::fp::maxMulVecNGLV;
 		if (n > N) n = N;
 		const int w = 5;
 		const mpz_class& r = Fr::getOp().mp;
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index e4cbedc1..2062b611 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -111,6 +111,9 @@ const size_t UnitBitSize = sizeof(Unit) * 8;
 const size_t maxUnitSize = (MCL_MAX_BIT_SIZE + UnitBitSize - 1) / UnitBitSize;
 #define MCL_MAX_UNIT_SIZE ((MCL_MAX_BIT_SIZE + MCL_UNIT_BIT_SIZE - 1) / MCL_UNIT_BIT_SIZE)
 
+const size_t maxMulVecN = 32; // inner loop of mulVec
+const size_t maxMulVecNGLV = 16; // inner loop of mulVec with GLV
+
 struct FpGenerator;
 struct Op;
 
diff --git a/include/mcl/operator.hpp b/include/mcl/operator.hpp
index 7a1a02cc..878afa6e 100644
--- a/include/mcl/operator.hpp
+++ b/include/mcl/operator.hpp
@@ -89,14 +89,24 @@ struct Operator : public E {
 		powArrayGLV = f;
 		powVecNGLV = g;
 	}
-	static void powVec(T& z, const T* xVec, const mpz_class *yVec, size_t n)
+	static const size_t powVecMaxN = 16;
+	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
+	static void powVec(T& z, const T* xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
 	{
 		assert(powVecNGLV);
 		T r;
 		r.setOne();
+		const size_t N = mcl::fp::maxMulVecNGLV;
+		mpz_class myVec[N];
 		while (n > 0) {
 			T t;
-			size_t done = powVecNGLV(t, xVec, yVec, n);
+			size_t tn = fp::min_(n, N);
+			for (size_t i = 0; i < tn; i++) {
+				bool b;
+				yVec[i].getMpz(&b, myVec[i]);
+				assert(b); (void)b;
+			}
+			size_t done = powVecNGLV(t, xVec, myVec, tn);
 			r *= t;
 			xVec += done;
 			yVec += done;
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 0379693b..3a693930 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -6,7 +6,6 @@ cybozu::CpuClock clk;
 #include <mcl/bls12_381.hpp>
 #include <cybozu/option.hpp>
 #include <cybozu/xorshift.hpp>
-#include "common_test.hpp"
 
 #if defined(__EMSCRIPTEN__) && !defined(MCL_AVOID_EXCEPTION_TEST)
 	#define MCL_AVOID_EXCEPTION_TEST
@@ -14,6 +13,8 @@ cybozu::CpuClock clk;
 
 using namespace mcl::bls12;
 
+#include "common_test.hpp"
+
 mcl::fp::Mode g_mode;
 
 const struct TestSet {
@@ -384,7 +385,7 @@ CYBOZU_TEST_AUTO(naive)
 		testPairing(P, Q, ts.e);
 		testPrecomputed(P, Q);
 		testMillerLoop2(P, Q);
-		testCommon<G1, G2, GT>(P, Q);
+		testCommon(P, Q);
 		testBench(P, Q);
 	}
 	int count = (int)clk.getCount();
diff --git a/test/bn384_test.cpp b/test/bn384_test.cpp
index 13fc0777..09436ff7 100644
--- a/test/bn384_test.cpp
+++ b/test/bn384_test.cpp
@@ -5,7 +5,6 @@
 #include <cybozu/xorshift.hpp>
 #include <mcl/bn384.hpp>
 #include <mcl/bn.hpp>
-#include "common_test.hpp"
 
 using namespace mcl::bn384;
 
@@ -13,6 +12,8 @@ mcl::fp::Mode g_mode;
 
 #include "bench.hpp"
 
+#include "common_test.hpp"
+
 void testCurve(const mcl::CurveParam& cp)
 {
 	initPairing(cp, g_mode);
@@ -40,7 +41,7 @@ void testCurve(const mcl::CurveParam& cp)
 	pairing(e2, aP, bQ);
 	GT::pow(e1, e1, a * b);
 	CYBOZU_TEST_EQUAL(e1, e2);
-	testCommon<G1, G2, GT>(P, Q);
+	testCommon(P, Q);
 	testBench(P, Q);
 	testSquareRoot();
 	testLagrange();
diff --git a/test/bn512_test.cpp b/test/bn512_test.cpp
index 8999bc9c..29697f5a 100644
--- a/test/bn512_test.cpp
+++ b/test/bn512_test.cpp
@@ -5,10 +5,11 @@
 #include <cybozu/xorshift.hpp>
 #include <mcl/bn512.hpp>
 #include <mcl/bn.hpp>
-#include "common_test.hpp"
 
 using namespace mcl::bn512;
 
+#include "common_test.hpp"
+
 mcl::fp::Mode g_mode;
 
 #include "bench.hpp"
@@ -34,7 +35,7 @@ void testCurve(const mcl::CurveParam& cp)
 	pairing(e2, aP, bQ);
 	GT::pow(e1, e1, a * b);
 	CYBOZU_TEST_EQUAL(e1, e2);
-	testCommon<G1, G2, GT>(P, Q);
+	testCommon(P, Q);
 	testBench(P, Q);
 	testSquareRoot();
 	testLagrange();
diff --git a/test/bn_test.cpp b/test/bn_test.cpp
index b62bc8d3..15f6f456 100644
--- a/test/bn_test.cpp
+++ b/test/bn_test.cpp
@@ -6,7 +6,6 @@ cybozu::CpuClock clk;
 #include <mcl/bn256.hpp>
 #include <cybozu/option.hpp>
 #include <cybozu/xorshift.hpp>
-#include "common_test.hpp"
 
 #if defined(__EMSCRIPTEN__) && !defined(MCL_AVOID_EXCEPTION_TEST)
 	#define MCL_AVOID_EXCEPTION_TEST
@@ -15,6 +14,8 @@ cybozu::CpuClock clk;
 typedef mcl::bn::local::Compress Compress;
 using namespace mcl::bn;
 
+#include "common_test.hpp"
+
 mcl::fp::Mode g_mode;
 
 const struct TestSet {
@@ -402,7 +403,7 @@ CYBOZU_TEST_AUTO(naive)
 		testPrecomputed(P, Q);
 		testMillerLoop2(P, Q);
 		testMillerLoopVec();
-		testCommon<G1, G2, GT>(P, Q);
+		testCommon(P, Q);
 		testBench(P, Q);
 		benchAddDblG1();
 		benchAddDblG2();
diff --git a/test/common_test.hpp b/test/common_test.hpp
index 30530380..f6d1dcd5 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -1,5 +1,5 @@
 template<class G>
-void naiveMulVec(G& out, const G *xVec, const mpz_class *yVec, size_t n)
+void naiveMulVec(G& out, const G *xVec, const Fr *yVec, size_t n)
 {
 	if (n == 1) {
 		G::mul(out, xVec[0], yVec[0]);
@@ -20,11 +20,11 @@ void testMulVec(const G& P)
 	using namespace mcl::bn;
 	const int N = 33;
 	G xVec[N];
-	mpz_class yVec[N];
+	Fr yVec[N];
 
 	for (size_t i = 0; i < N; i++) {
 		G::mul(xVec[i], P, i + 3);
-		mcl::gmp::getRand(yVec[i], Fr::getOp().bitSize);
+		yVec[i].setByCSPRNG();
 	}
 	const size_t nTbl[] = { 1, 2, 3, 5, 7, 8, 9, 14, 15, 16, 30, 31, 32, 33 };
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) {
@@ -44,7 +44,7 @@ void testMulVec(const G& P)
 }
 
 template<class G>
-void naivePowVec(G& out, const G *xVec, const mpz_class *yVec, size_t n)
+void naivePowVec(G& out, const G *xVec, const Fr *yVec, size_t n)
 {
 	if (n == 1) {
 		G::pow(out, xVec[0], yVec[0]);
@@ -65,12 +65,12 @@ inline void testPowVec(const G& e)
 	using namespace mcl::bn;
 	const int N = 33;
 	G xVec[N];
-	mpz_class yVec[N];
+	Fr yVec[N];
 
 	xVec[0] = e;
 	for (size_t i = 0; i < N; i++) {
 		if (i > 0) G::mul(xVec[i], xVec[i - 1], e);
-		mcl::gmp::getRand(yVec[i], Fr::getOp().bitSize);
+		yVec[i].setByCSPRNG();
 	}
 	const size_t nTbl[] = { 1, 2, 3, 5, 7, 8, 9, 14, 15, 16, 30, 31, 32, 33 };
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) {
@@ -89,7 +89,6 @@ inline void testPowVec(const G& e)
 	}
 }
 
-template<class G1, class G2, class GT>
 void testCommon(const G1& P, const G2& Q)
 {
 	puts("G1");
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index 047e2a7c..ea927a16 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -532,7 +532,7 @@ mul 499.00usec
 	void operator=(const Test&);
 };
 
-void naiveMulVec(Ec& out, const Ec *xVec, const mpz_class *yVec, size_t n)
+void naiveMulVec(Ec& out, const Ec *xVec, const Zn *yVec, size_t n)
 {
 	Ec r, t;
 	r.clear();
@@ -552,25 +552,27 @@ void mulVec(const mcl::EcParam& para)
 	P += P;
 	const int N = 33;
 	Ec xVec[N];
-	mpz_class yVec[N];
+	Zn yVec[N];
 	Ec Q1, Q2;
 
 	Ec::dbl(P, P);
 	for (size_t i = 0; i < N; i++) {
 		Ec::mul(xVec[i], P, i + 3);
-		mcl::gmp::getRand(yVec[i], Zn::getOp().bitSize);
+		yVec[i].setByCSPRNG();
 	}
 	const size_t nTbl[] = { 1, 2, 3, 5, 30, 31, 32, 33 };
-	const int C = 400;
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) {
 		const size_t n = nTbl[i];
 		CYBOZU_TEST_ASSERT(n <= N);
 		naiveMulVec(Q1, xVec, yVec, n);
 		Ec::mulVec(Q2, xVec, yVec, n);
 		CYBOZU_TEST_EQUAL(Q1, Q2);
+#ifndef NDEBUG
 		printf("n=%zd\n", n);
+		const int C = 400;
 		CYBOZU_BENCH_C("naive ", C, naiveMulVec, Q1, xVec, yVec, n);
 		CYBOZU_BENCH_C("mulVec", C, Ec::mulVec, Q1, xVec, yVec, n);
+#endif
 	}
 }
 

From e669a7a462c4d2614f8bb75c116acfc65336cb95 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 22 Sep 2019 18:07:43 +0900
Subject: [PATCH 083/553] add mclBnG{1,2,T}_mulVec

---
 include/mcl/bn.h               |  5 +++++
 include/mcl/impl/bn_c_impl.hpp | 13 ++++++++++++
 test/bn_c_test.hpp             | 38 ++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 68b87210..3860bd64 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -430,6 +430,11 @@ MCLBN_DLL_API void mclBnGT_powGeneric(mclBnGT *z, const mclBnGT *x, const mclBnF
 */
 MCLBN_DLL_API void mclBnGT_pow(mclBnGT *z, const mclBnGT *x, const mclBnFr *y);
 
+// z = sum_{i=0}^{n-1} x[i] y[i]
+MCLBN_DLL_API void mclBnG1_mulVec(mclBnG1 *z, const mclBnG1 *x, const mclBnFr *y, mclSize n);
+MCLBN_DLL_API void mclBnG2_mulVec(mclBnG2 *z, const mclBnG2 *x, const mclBnFr *y, mclSize n);
+MCLBN_DLL_API void mclBnGT_powVec(mclBnGT *z, const mclBnGT *x, const mclBnFr *y, mclSize n);
+
 MCLBN_DLL_API void mclBn_pairing(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y);
 MCLBN_DLL_API void mclBn_finalExp(mclBnGT *y, const mclBnGT *x);
 MCLBN_DLL_API void mclBn_millerLoop(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y);
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index c02ab795..ad534c75 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -557,6 +557,19 @@ void mclBnGT_powGeneric(mclBnGT *z, const mclBnGT *x, const mclBnFr *y)
 	Fp12::powGeneric(*cast(z), *cast(x), *cast(y));
 }
 
+void mclBnG1_mulVec(mclBnG1 *z, const mclBnG1 *x, const mclBnFr *y, mclSize n)
+{
+	G1::mulVec(*cast(z), cast(x), cast(y), n);
+}
+void mclBnG2_mulVec(mclBnG2 *z, const mclBnG2 *x, const mclBnFr *y, mclSize n)
+{
+	G2::mulVec(*cast(z), cast(x), cast(y), n);
+}
+void mclBnGT_powVec(mclBnGT *z, const mclBnGT *x, const mclBnFr *y, mclSize n)
+{
+	GT::powVec(*cast(z), cast(x), cast(y), n);
+}
+
 void mclBn_pairing(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y)
 {
 	pairing(*cast(z), *cast(x), *cast(y));
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index afa65f1d..52d7785f 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -906,6 +906,44 @@ CYBOZU_TEST_AUTO(getLittleEndian)
 	}
 }
 
+CYBOZU_TEST_AUTO(mulVec)
+{
+	const size_t N = 70;
+	mclBnG1 x1Vec[N], z1, w1;
+	mclBnG2 x2Vec[N], z2, w2;
+	mclBnGT xtVec[N], zt, wt;
+	mclBnFr yVec[N];
+
+	for (size_t i = 0; i < N; i++) {
+		char c = 'a' + i;
+		mclBnG1_hashAndMapTo(&x1Vec[i], &c, 1);
+		mclBnG2_hashAndMapTo(&x2Vec[i], &c, 1);
+		mclBn_pairing(&xtVec[i], &x1Vec[i], &x2Vec[i]);
+		mclBnFr_setByCSPRNG(&yVec[i]);
+	}
+	mclBnG1_mulVec(&z1, x1Vec, yVec, N);
+	mclBnG2_mulVec(&z2, x2Vec, yVec, N);
+	mclBnGT_powVec(&zt, xtVec, yVec, N);
+
+	mclBnG1_clear(&w1);
+	mclBnG2_clear(&w2);
+	mclBnGT_setInt(&wt, 1);
+	for (size_t i = 0; i < N; i++) {
+		mclBnG1 t1;
+		mclBnG2 t2;
+		mclBnGT tt;
+		mclBnG1_mul(&t1, &x1Vec[i], &yVec[i]);
+		mclBnG2_mul(&t2, &x2Vec[i], &yVec[i]);
+		mclBnGT_pow(&tt, &xtVec[i], &yVec[i]);
+		mclBnG1_add(&w1, &w1, &t1);
+		mclBnG2_add(&w2, &w2, &t2);
+		mclBnGT_mul(&wt, &wt, &tt);
+	}
+	CYBOZU_TEST_ASSERT(mclBnG1_isEqual(&z1, &w1));
+	CYBOZU_TEST_ASSERT(mclBnG2_isEqual(&z2, &w2));
+	CYBOZU_TEST_ASSERT(mclBnGT_isEqual(&zt, &wt));
+}
+
 void G1onlyTest(int curve)
 {
 	printf("curve=%d\n", curve);

From 3d619e0055cda458e6270ae7a943ccf490230e18 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 22 Sep 2019 18:09:03 +0900
Subject: [PATCH 084/553] v0.99

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 2062b611..787dcd51 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x098; /* 0xABC = A.BC */
+static const int version = 0x099; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index dd1d195e..f2644c66 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography.
 The current version supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+* v0.99 add mclBnG1_mulVec, etc.
 * v0.98 bugfix Ec::add(P, Q, R) when P == R
 * v0.97 add some C api functions
 * v0.96 improved scalar multiplication

From 5d6d1c51cc7f5ad2d98cd60cd899ddd2428fed99 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 25 Sep 2019 16:42:32 +0900
Subject: [PATCH 085/553] build bn_c384_256.dll

---
 mklib.bat | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mklib.bat b/mklib.bat
index aef14944..4e277e1a 100644
--- a/mklib.bat
+++ b/mklib.bat
@@ -19,6 +19,11 @@ if "%1"=="dll" (
   echo link /nologo /DLL /OUT:bin\mclbn256.dll obj\bn_c256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn256.lib
      link /nologo /DLL /OUT:bin\mclbn256.dll obj\bn_c256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn256.lib
 
+  echo cl /c %CFLAGS% src\bn_c384_256.cpp /Foobj\bn_c384_256.obj
+     cl /c %CFLAGS% src\bn_c384_256.cpp /Foobj\bn_c384_256.obj /DMCLBN_NO_AUTOLINK
+  echo link /nologo /DLL /OUT:bin\mclbn384_256.dll obj\bn_c384_256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn384_256.lib
+     link /nologo /DLL /OUT:bin\mclbn384_256.dll obj\bn_c384_256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn384_256.lib
+
   echo cl /c %CFLAGS% src\bn_c384.cpp /Foobj\bn_c384.obj
      cl /c %CFLAGS% src\bn_c384.cpp /Foobj\bn_c384.obj /DMCLBN_NO_AUTOLINK
   echo link /nologo /DLL /OUT:bin\mclbn384.dll obj\bn_c384.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn384.lib

From 8c617ba67b0d4b57292121c22db5eada2fd8ef1b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 25 Sep 2019 16:42:47 +0900
Subject: [PATCH 086/553] remove warning

---
 test/bn_c_test.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 52d7785f..264e8b6e 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -576,8 +576,8 @@ CYBOZU_TEST_AUTO(setRandFunc)
 			char buf[1024];
 			ret = mclBnFr_setByCSPRNG(&x);
 			CYBOZU_TEST_EQUAL(ret, 0);
-			ret = mclBnFr_getStr(buf, sizeof(buf), &x, 16);
-			CYBOZU_TEST_ASSERT(ret > 0);
+			size_t n = mclBnFr_getStr(buf, sizeof(buf), &x, 16);
+			CYBOZU_TEST_ASSERT(n > 0);
 			printf("%d %s\n", i, buf);
 		}
 		if (j == 0) {
@@ -915,7 +915,7 @@ CYBOZU_TEST_AUTO(mulVec)
 	mclBnFr yVec[N];
 
 	for (size_t i = 0; i < N; i++) {
-		char c = 'a' + i;
+		char c = char('a' + i);
 		mclBnG1_hashAndMapTo(&x1Vec[i], &c, 1);
 		mclBnG2_hashAndMapTo(&x2Vec[i], &c, 1);
 		mclBn_pairing(&xtVec[i], &x1Vec[i], &x2Vec[i]);
@@ -954,8 +954,8 @@ void G1onlyTest(int curve)
 	ret = mclBnG1_getBasePoint(&P0);
 	CYBOZU_TEST_EQUAL(ret, 0);
 	char buf[256];
-	ret = mclBnG1_getStr(buf, sizeof(buf), &P0, 16);
-	CYBOZU_TEST_ASSERT(ret > 0);
+	size_t n = mclBnG1_getStr(buf, sizeof(buf), &P0, 16);
+	CYBOZU_TEST_ASSERT(n > 0);
 	printf("basePoint=%s\n", buf);
 	G1test();
 }

From 26d9039b6c71d41c43507a17e58912f0f9baa08f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 26 Sep 2019 16:31:19 +0900
Subject: [PATCH 087/553] enable profile if MCL_PROF=2

---
 src/fp_generator.hpp | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 6185bb0e..2576cab9 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -268,9 +268,16 @@ struct FpGenerator : Xbyak::CodeGenerator {
 //		printf("p=%p, pn_=%d, isFullBit_=%d\n", p_, pn_, isFullBit_);
 #ifdef MCL_USE_PROF
 		static char suf[] = "_0";
-		const char *s = getenv("MCL_PROF");
-		if (s && s[0] && s[1] == '\0') {
-			prof_.init(s[0] - '0');
+		int profMode = 0;
+#ifdef XBYAK_USE_VTUNE
+		profMode = 2;
+#endif
+		{
+			const char *s = getenv("MCL_PROF");
+			if (s && s[0] && s[1] == '\0') profMode = s[0] - '0';
+		}
+		if (profMode) {
+			prof_.init(profMode);
 			prof_.setStartAddr(getCurr());
 			prof_.setNameSuffix(suf);
 			suf[1]++;

From 33bfcf4ac59c03370ec1f933ff5e426cbf69eec1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 27 Sep 2019 17:00:20 +0900
Subject: [PATCH 088/553] update doc

---
 readme.md | 300 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 159 insertions(+), 141 deletions(-)

diff --git a/readme.md b/readme.md
index f2644c66..493cd3a8 100644
--- a/readme.md
+++ b/readme.md
@@ -6,177 +6,113 @@ A portable and fast pairing-based cryptography library.
 
 # Abstract
 
-mcl is a library for pairing-based cryptography.
-The current version supports the optimal Ate pairing over BN curves and BLS12-381 curves.
-
-# News
-* v0.99 add mclBnG1_mulVec, etc.
-* v0.98 bugfix Ec::add(P, Q, R) when P == R
-* v0.97 add some C api functions
-* v0.96 improved scalar multiplication
-* mclBn_setETHserialization(true) (de)serialize acoording to [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations) when BLS12-381 is used.
-* (Break backward compatibility) libmcl_dy.a is renamed to libmcl.a
-    * The option SHARE_BASENAME_SUF is removed
-* 2nd argument of `mclBn_init` is changed from `maxUnitSize` to `compiledTimeVar`, which must be `MCLBN_COMPILED_TIME_VAR`.
-* break backward compatibility of mapToGi for BLS12. A map-to-function for BN is used.
-If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but this will be removed in the future.
+mcl is a library for pairing-based cryptography,
+which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
+
 
 # Support architecture
 
-* x86-64 Windows + Visual Studio
-* x86, x86-64 Linux + gcc/clang
-* ARM Linux
-* ARM64 Linux
-* (maybe any platform to be supported by LLVM)
-* WebAssembly
+- x86-64 Windows + Visual Studio
+- x86, x86-64 Linux + gcc/clang
+- x86-64 macOS
+- ARM / ARM64 Linux
+- WebAssembly
+- Android
+- iPhone
+- (maybe any platform to be supported by LLVM)
 
 # Support curves
 
-p(z) = 36z^4 + 36z^3 + 24z^2 + 6z + 1.
-
-* BN254 ; a BN curve over the 254-bit prime p(z) where z = -(2^62 + 2^55 + 1).
-* BN\_SNARK1 ; a BN curve over a 254-bit prime p such that n := p + 1 - t has high 2-adicity.
-* BN381\_1 ; a BN curve over the 381-bit prime p(z) where z = -(2^94 + 2^76 + 2^72 + 1).
-* BN462 ; a BN curve over the 462-bit prime p(z) where z = 2^114 + 2^101 - 2^14 - 1.
-* BLS12\_381 ; [a BLS12-381 curve](https://blog.z.cash/new-snark-curve/)
-
-# Benchmark
-
-## The latest benchmark(2018/11/7)
-
-### Intel Core i7-6700 3.4GHz(Skylake), Ubuntu 18.04.1 LTS
-
-curveType |              binary|clang-6.0.0|gcc-7.3.0|
-----------|--------------------|-----------|---------|
-BN254     |    bin/bn\_test.exe|    882Kclk|  933Kclk|
-BLS12-381 | bin/bls12\_test.exe|   2290Kclk| 2630Kclk|
-
-### Intel Core i7-7700 3.6GHz(Kaby Lake), Ubuntu 18.04.1 LTS on Windows 10 Vmware
-
-curveType |              binary|clang-6.0.0|gcc-7.3.0|
-----------|--------------------|-----------|---------|
-BN254     |    bin/bn\_test.exe|    900Kclk|  954Kclk|
-BLS12-381 | bin/bls12\_test.exe|   2340Kclk| 2680Kclk|
-
-* now investigating the reason why gcc is slower than clang.
-
-## Higher-bit BN curve benchmark
-
-For JavaScript(WebAssembly), see [ID based encryption demo](https://herumi.github.io/mcl-wasm/ibe-demo.html).
-
-paramter   |  x64| Firefox on x64|Safari on iPhone7|
------------|-----|---------------|-----------------|
-BN254      | 0.25|           2.48|             4.78|
-BN381\_1   | 0.95|           7.91|            11.74|
-BN462      | 2.16|          14.73|            22.77|
-
-* x64 : 'Kaby Lake Core i7-7700(3.6GHz)'.
-* Firefox : 64-bit version 58.
-* iPhone7 : iOS 11.2.1.
-* BN254 is by `test/bn_test.cpp`.
-* BN381\_1 and BN462 are  by `test/bn512_test.cpp`.
-* All the timings  are given in ms(milliseconds).
-
-The other benchmark results are [bench.txt](bench.txt).
+- BN curve ; p(z) = 36z^4 + 36z^3 + 24z^2 + 6z + 1.
+  - BN254 ; a BN curve over the 254-bit prime p(z) where z = -(2^62 + 2^55 + 1).
+  - BN\_SNARK1 ; a BN curve over a 254-bit prime p such that n := p + 1 - t has high 2-adicity.
+  - BN381\_1 ; a BN curve over the 381-bit prime p(z) where z = -(2^94 + 2^76 + 2^72 + 1).
+  - BN462 ; a BN curve over the 462-bit prime p(z) where z = 2^114 + 2^101 - 2^14 - 1.
+- BLS12\_381 ; [a BLS12-381 curve](https://blog.z.cash/new-snark-curve/)
 
-## An old benchmark of a BN curve BN254(2016/12/25).
+# How to build on Linux and macOS
+x86-64/ARM/ARM64 Linux, macOS and mingw64 are supported.
 
-* x64, x86 ; Inte Core i7-6700 3.4GHz(Skylake) upto 4GHz on Ubuntu 16.04.
-    * `sudo cpufreq-set -g performance`
-* arm ; 900MHz quad-core ARM Cortex-A7 on Raspberry Pi2, Linux 4.4.11-v7+
-* arm64 ; 1.2GHz ARM Cortex-A53 [HiKey](http://www.96boards.org/product/hikey/)
+## Installation Requirements
 
-software                                                 |   x64|  x86| arm|arm64(msec)
----------------------------------------------------------|------|-----|----|-----
-[ate-pairing](https://github.com/herumi/ate-pairing)     | 0.21 |   - |  - |    -
-mcl                                                      | 0.31 | 1.6 |22.6|  3.9
-[TEPLA](http://www.cipher.risk.tsukuba.ac.jp/tepla/)     | 1.76 | 3.7 | 37 | 17.9
-[RELIC](https://github.com/relic-toolkit/relic) PRIME=254| 0.30 | 3.5 | 36 |    -
-[MIRACL](https://github.com/miracl/MIRACL) ake12bnx      | 4.2  |   - | 78 |    -
-[NEONabe](http://sandia.cs.cinvestav.mx/Site/NEONabe)    |   -  |   - | 16 |    -
+[GMP](https://gmplib.org/) and [OpenSSL](https://www.openssl.org/) are necessary (default setting).
 
-* compile option for RELIC
 ```
-cmake -DARITH=x64-asm-254 -DFP_PRIME=254 -DFPX_METHD="INTEG;INTEG;LAZYR" -DPP_METHD="LAZYR;OATEP"
+apt install libgmp-dev libssl-dev # on Ubuntu
 ```
 
-# Installation Requirements
+## How to build with Makefile
 
-* [GMP](https://gmplib.org/) and OpenSSL
 ```
-apt install libgmp-dev libssl-dev
+git clone git://github.com/herumi/mcl
+cd mcl
+make -j4
 ```
 
-Create a working directory (e.g., work) and clone the following repositories.
+- `lib/libmcl.*` ; core library
+- `lib/libmclbn384_256.*` ; library to use C-API of BLS12-381 pairing
+
+## How to test of BLS12-381 pairing
+
 ```
-mkdir work
-cd work
-git clone git://github.com/herumi/mcl
-git clone git://github.com/herumi/cybozulib_ext ; for only Windows
+# C
+make bin/bn_c384_256_test.exe && bin/bn_c384_256_test.exe
+
+# C++
+make bin/bls12_test.exe && bin/bls12_test.exe
 ```
-* Cybozulib\_ext is a prerequisite for running OpenSSL and GMP on VC (Visual C++).
 
-# (Option) Without GMP
+## How to build without GMP
+
 ```
 make MCL_USE_GMP=0
+
 ```
-Define `MCL_USE_VINT` before including `bn.hpp`
+Define `MCL_USE_VINT` if using C++ header files.
+
+## How to build without OpenSSL
 
-# (Option) Without Openssl
 ```
 make MCL_USE_OPENSSL=0
 ```
-Define `MCL_DONT_USE_OPENSSL` before including `bn.hpp`
+Define `MCL_DONT_USE_OPENSSL` if using C++ header files.
 
-# Build and test on x86-64 Linux, macOS, ARM and ARM64 Linux
-To make lib/libmcl.a and test it:
-```
-cd work/mcl
-make test
-```
-To benchmark a pairing:
-```
-bin/bn_test.exe
-```
-To make sample programs:
-```
-make sample
-```
+## How to build on 32-bit x86 Linux
 
-if you want to change compiler options for optimization, then set `CFLAGS_OPT_USER`.
-```
-make CLFAGS_OPT_USER="-O2"
-```
+Build GMP and OpenSSL for 32-bit mode and install `<lib32>` at yourself.
 
-## Build for 32-bit Linux
-Build openssl and gmp for 32-bit mode and install `<lib32>`
 ```
 make ARCH=x86 CFLAGS_USER="-I <lib32>/include" LDFLAGS_USER="-L <lib32>/lib -Wl,-rpath,<lib32>/lib"
 ```
 
-## Build for 64-bit Windows
-1) make static library and use it
+# How to build on 64-bit Windows with Visual Studio
 
-```
-mklib
-mk -s test\bn_c256_test.cpp
-bin\bn_c256_test.exe
-```
-2) make dynamic library and use it
+Clone cybozulib\_ext,
+which provides compiled binaries of OpenSSL and [MPIR](http://mpir.org/).
 
 ```
+mkdir work
+cd work
+git clone git://github.com/herumi/mcl
+git clone git://github.com/herumi/cybozulib_ext
+cd work
+# static library
+mklib
+mk -s test\bls12_test.cpp && bin\bls12_test.exe
+# dynamic library
 mklib dll
-mk -d test\bn_c256_test.cpp
-bin\bn_c256_test.exe
+mk -d test\bls12_test.cpp && bin\bls12_test.exe
 ```
 
-open mcl.sln and build or if you have msbuild.exe
+(not maintenanced)
+Open mcl.sln and build or if you have msbuild.exe
 ```
 msbuild /p:Configuration=Release
 ```
 
-## Build with cmake
-For Linux,
+# How to build with CMake
+
+For Linux, macOS, etc.
 ```
 mkdir build
 cd build
@@ -190,7 +126,16 @@ cd build
 cmake .. -A x64
 msbuild mcl.sln /p:Configuration=Release /m
 ```
-## Build for wasm(WebAssembly)
+
+## options
+
+```
+cmake .. USE_GMP=OFF ; without GMP
+cmake .. USE_OPENSSL=OFF ; without OpenSSL
+```
+see `cmake .. -LA`.
+
+# How to build for wasm(WebAssembly)
 mcl supports emcc (Emscripten) and `test/bn_test.cpp` runs on browers such as Firefox, Chrome and Edge.
 
 * [IBE on browser](https://herumi.github.io/mcl-wasm/ibe-demo.html)
@@ -199,13 +144,73 @@ mcl supports emcc (Emscripten) and `test/bn_test.cpp` runs on browers such as Fi
 
 The timing of a pairing on `BN254` is 2.8msec on 64-bit Firefox with Skylake 3.4GHz.
 
-### Node.js
+# Node.js
 
 * [mcl-wasm](https://www.npmjs.com/package/mcl-wasm) pairing library
 * [bls-wasm](https://www.npmjs.com/package/bls-wasm) BLS signature library
 * [she-wasm](https://www.npmjs.com/package/she-wasm) 2 Level Homomorphic Encryption library
 
-### SELinux
+# Benchmark
+
+## The latest benchmark(2018/11/7)
+
+### Intel Core i7-6700 3.4GHz(Skylake), Ubuntu 18.04.1 LTS
+
+curveType |              binary|clang-6.0.0|gcc-7.3.0|
+----------|--------------------|-----------|---------|
+BN254     |    bin/bn\_test.exe|    882Kclk|  933Kclk|
+BLS12-381 | bin/bls12\_test.exe|   2290Kclk| 2630Kclk|
+
+### Intel Core i7-7700 3.6GHz(Kaby Lake), Ubuntu 18.04.1 LTS on Windows 10 Vmware
+
+curveType |              binary|clang-6.0.0|gcc-7.3.0|
+----------|--------------------|-----------|---------|
+BN254     |    bin/bn\_test.exe|    900Kclk|  954Kclk|
+BLS12-381 | bin/bls12\_test.exe|   2340Kclk| 2680Kclk|
+
+* now investigating the reason why gcc is slower than clang.
+
+## Higher-bit BN curve benchmark
+
+For JavaScript(WebAssembly), see [ID based encryption demo](https://herumi.github.io/mcl-wasm/ibe-demo.html).
+
+paramter   |  x64| Firefox on x64|Safari on iPhone7|
+-----------|-----|---------------|-----------------|
+BN254      | 0.25|           2.48|             4.78|
+BN381\_1   | 0.95|           7.91|            11.74|
+BN462      | 2.16|          14.73|            22.77|
+
+* x64 : 'Kaby Lake Core i7-7700(3.6GHz)'.
+* Firefox : 64-bit version 58.
+* iPhone7 : iOS 11.2.1.
+* BN254 is by `test/bn_test.cpp`.
+* BN381\_1 and BN462 are  by `test/bn512_test.cpp`.
+* All the timings  are given in ms(milliseconds).
+
+The other benchmark results are [bench.txt](bench.txt).
+
+## An old benchmark of a BN curve BN254(2016/12/25).
+
+* x64, x86 ; Inte Core i7-6700 3.4GHz(Skylake) upto 4GHz on Ubuntu 16.04.
+    * `sudo cpufreq-set -g performance`
+* arm ; 900MHz quad-core ARM Cortex-A7 on Raspberry Pi2, Linux 4.4.11-v7+
+* arm64 ; 1.2GHz ARM Cortex-A53 [HiKey](http://www.96boards.org/product/hikey/)
+
+software                                                 |   x64|  x86| arm|arm64(msec)
+---------------------------------------------------------|------|-----|----|-----
+[ate-pairing](https://github.com/herumi/ate-pairing)     | 0.21 |   - |  - |    -
+mcl                                                      | 0.31 | 1.6 |22.6|  3.9
+[TEPLA](http://www.cipher.risk.tsukuba.ac.jp/tepla/)     | 1.76 | 3.7 | 37 | 17.9
+[RELIC](https://github.com/relic-toolkit/relic) PRIME=254| 0.30 | 3.5 | 36 |    -
+[MIRACL](https://github.com/miracl/MIRACL) ake12bnx      | 4.2  |   - | 78 |    -
+[NEONabe](http://sandia.cs.cinvestav.mx/Site/NEONabe)    |   -  |   - | 16 |    -
+
+* compile option for RELIC
+```
+cmake -DARITH=x64-asm-254 -DFP_PRIME=254 -DFPX_METHD="INTEG;INTEG;LAZYR" -DPP_METHD="LAZYR;OATEP"
+```
+
+# SELinux
 mcl uses Xbyak JIT engine if it is available on x64 architecture,
 otherwise mcl uses a little slower functions generated by LLVM.
 The default mode enables SELinux security policy on CentOS, then JIT is disabled.
@@ -460,19 +465,32 @@ This library contains some part of the followings software licensed by BSD-3-Cla
 * [_Skew Frobenius Map and Efficient Scalar Multiplication for Pairing–Based Cryptography_](https://www.researchgate.net/publication/221282560_Skew_Frobenius_Map_and_Efficient_Scalar_Multiplication_for_Pairing-Based_Cryptography),
 Y. Sakemi, Y. Nogami, K. Okeya, Y. Morikawa, CANS 2008.
 
+# compatilibity
+
+- mclBn_setETHserialization(true) (de)serialize acoording to [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations) when BLS12-381 is used.
+- (Break backward compatibility) libmcl_dy.a is renamed to libmcl.a
+  - The option SHARE_BASENAME_SUF is removed
+- 2nd argument of `mclBn_init` is changed from `maxUnitSize` to `compiledTimeVar`, which must be `MCLBN_COMPILED_TIME_VAR`.
+- break backward compatibility of mapToGi for BLS12. A map-to-function for BN is used.
+If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but this will be removed in the future.
+
 # History
 
-* 2019/Jun/03 v0.95 fix a parser of 0b10 with base = 16
-* 2019/Apr/29 v0.94 mclBn_setETHserialization supports [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations)
-* 2019/Apr/24 v0.93 support ios
-* 2019/Mar/22 v0.92 shortcut for Ec::mul(Px, P, x) if P = 0
-* 2019/Mar/21 python binding of she256 for Linux/Mac/Windows
-* 2019/Mar/14 v0.91 modp supports mcl-wasm
-* 2019/Mar/12 v0.90 fix Vint::setArray(x) for x == this
-* 2019/Mar/07 add mclBnFr_setLittleEndianMod, mclBnFp_setLittleEndianMod
-* 2019/Feb/20 LagrangeInterpolation sets out = yVec[0] if k = 1
-* 2019/Jan/31 add mclBnFp_mapToG1, mclBnFp2_mapToG2
-* 2019/Jan/31 fix crash on x64-CPU without AVX (thanks to mortdeus)
+- 2019/Sep/22 v0.99 add mclBnG1_mulVec, etc.
+- 2019/Sep/08 v0.98 bugfix Ec::add(P, Q, R) when P == R
+- 2019/Aug/14 v0.97 add some C api functions
+- 2019/Jul/26 v0.96 improved scalar multiplication
+- 2019/Jun/03 v0.95 fix a parser of 0b10 with base = 16
+- 2019/Apr/29 v0.94 mclBn_setETHserialization supports [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations)
+- 2019/Apr/24 v0.93 support ios
+- 2019/Mar/22 v0.92 shortcut for Ec::mul(Px, P, x) if P = 0
+- 2019/Mar/21 python binding of she256 for Linux/Mac/Windows
+- 2019/Mar/14 v0.91 modp supports mcl-wasm
+- 2019/Mar/12 v0.90 fix Vint::setArray(x) for x == this
+- 2019/Mar/07 add mclBnFr_setLittleEndianMod, mclBnFp_setLittleEndianMod
+- 2019/Feb/20 LagrangeInterpolation sets out = yVec[0] if k = 1
+- 2019/Jan/31 add mclBnFp_mapToG1, mclBnFp2_mapToG2
+- 2019/Jan/31 fix crash on x64-CPU without AVX (thanks to mortdeus)
 
 # Author
 

From 1257ac7e361a05eb91200471aaf1b15ceadbb592 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 29 Sep 2019 11:26:13 +0900
Subject: [PATCH 089/553] pairing_c.c uses BLS12-381

---
 Makefile           | 4 ++--
 sample/pairing_c.c | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 364c17e8..15a90499 100644
--- a/Makefile
+++ b/Makefile
@@ -294,8 +294,8 @@ $(EXE_DIR)/bn_c384_256_test.exe: $(OBJ_DIR)/bn_c384_256_test.o $(BN384_256_LIB)
 $(EXE_DIR)/bn_c512_test.exe: $(OBJ_DIR)/bn_c512_test.o $(BN512_LIB) $(MCL_LIB)
 	$(PRE)$(CXX) $< -o $@ $(BN512_LIB) $(MCL_LIB) $(LDFLAGS)
 
-$(EXE_DIR)/pairing_c.exe: $(OBJ_DIR)/pairing_c.o $(BN256_LIB) $(MCL_LIB)
-	$(PRE)$(CC) $< -o $@ $(BN256_LIB) $(MCL_LIB) $(LDFLAGS) -lstdc++
+$(EXE_DIR)/pairing_c.exe: $(OBJ_DIR)/pairing_c.o $(BN384_256_LIB) $(MCL_LIB)
+	$(PRE)$(CC) $< -o $@ $(BN384_256_LIB) $(MCL_LIB) $(LDFLAGS) -lstdc++
 
 $(EXE_DIR)/she_c256_test.exe: $(OBJ_DIR)/she_c256_test.o $(SHE256_LIB) $(MCL_LIB)
 	$(PRE)$(CXX) $< -o $@ $(SHE256_LIB) $(MCL_LIB) $(LDFLAGS)
diff --git a/sample/pairing_c.c b/sample/pairing_c.c
index ac559087..a669ec7f 100644
--- a/sample/pairing_c.c
+++ b/sample/pairing_c.c
@@ -1,7 +1,6 @@
 #include <stdio.h>
 #include <string.h>
-#define MCLBN_FP_UNIT_SIZE 4
-#include <mcl/bn.h>
+#include <mcl/bn_c384_256.h>
 
 int g_err = 0;
 #define ASSERT(x) { if (!(x)) { printf("err %s:%d\n", __FILE__, __LINE__); g_err++; } }
@@ -11,7 +10,7 @@ int main()
 	char buf[1024];
 	const char *aStr = "123";
 	const char *bStr = "456";
-	int ret = mclBn_init(MCL_BN254, MCLBN_COMPILED_TIME_VAR);
+	int ret = mclBn_init(MCL_BLS12_381, MCLBN_COMPILED_TIME_VAR);
 	if (ret != 0) {
 		printf("err ret=%d\n", ret);
 		return 1;

From 0ddc4778230a61a6e287a9a2b8cf824c120cd9b8 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 29 Sep 2019 11:34:20 +0900
Subject: [PATCH 090/553] sample/pairing.cpp uses BLS12-381

---
 sample/pairing.cpp | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/sample/pairing.cpp b/sample/pairing.cpp
index 230583b6..51ebdc1d 100644
--- a/sample/pairing.cpp
+++ b/sample/pairing.cpp
@@ -1,6 +1,6 @@
-#include <mcl/bn256.hpp>
+#include <mcl/bls12_381.hpp>
 
-using namespace mcl::bn256;
+using namespace mcl::bn;
 
 void minimum_sample(const G1& P, const G2& Q)
 {
@@ -40,14 +40,11 @@ void precomputed(const G1& P, const G2& Q)
 
 int main()
 {
-	const char *aa = "12723517038133731887338407189719511622662176727675373276651903807414909099441";
-	const char *ab = "4168783608814932154536427934509895782246573715297911553964171371032945126671";
-	const char *ba = "13891744915211034074451795021214165905772212241412891944830863846330766296736";
-	const char *bb = "7937318970632701341203597196594272556916396164729705624521405069090520231616";
-
-	initPairing();
-	G2 Q(Fp2(aa, ab), Fp2(ba, bb));
-	G1 P(-1, 1);
+	initPairing(mcl::BLS12_381);
+	G1 P;
+	G2 Q;
+	hashAndMapToG1(P, "abc", 3);
+	hashAndMapToG2(Q, "abc", 3);
 
 	minimum_sample(P, Q);
 	miller_and_finel_exp(P, Q);

From 045de218ffe66da785e53f7f197e80e198711565 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 29 Sep 2019 22:50:30 +0900
Subject: [PATCH 091/553] fix comments of EvaluatePolynomial

---
 include/mcl/bn.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 3860bd64..5bdec0d3 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -465,7 +465,8 @@ MCLBN_DLL_API int mclBn_G2LagrangeInterpolation(mclBnG2 *out, const mclBnFr *xVe
 /*
 	evaluate polynomial
 	out = f(x) = c[0] + c[1] * x + c[2] * x^2 + ... + c[cSize - 1] * x^(cSize - 1)
-	@note cSize >= 2
+	return 0 if success else -1
+	@note cSize >= 1
 */
 MCLBN_DLL_API int mclBn_FrEvaluatePolynomial(mclBnFr *out, const mclBnFr *cVec, mclSize cSize, const mclBnFr *x);
 MCLBN_DLL_API int mclBn_G1EvaluatePolynomial(mclBnG1 *out, const mclBnG1 *cVec, mclSize cSize, const mclBnFr *x);

From 4bb5bb702d41e663148c98117250897eee7b4b52 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 30 Sep 2019 12:35:15 +0900
Subject: [PATCH 092/553] add mclBnGT_invGeneric

---
 include/mcl/bn.h               |  6 +++++-
 include/mcl/impl/bn_c_impl.hpp |  4 ++++
 readme.md                      |  2 ++
 test/bn_c_test.hpp             | 29 +++++++++++++++++++++++++++++
 4 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 5bdec0d3..0a5c3b46 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -413,13 +413,17 @@ MCLBN_DLL_API int mclBnGT_isZero(const mclBnGT *x);
 MCLBN_DLL_API int mclBnGT_isOne(const mclBnGT *x);
 
 MCLBN_DLL_API void mclBnGT_neg(mclBnGT *y, const mclBnGT *x);
-MCLBN_DLL_API void mclBnGT_inv(mclBnGT *y, const mclBnGT *x);
 MCLBN_DLL_API void mclBnGT_sqr(mclBnGT *y, const mclBnGT *x);
 MCLBN_DLL_API void mclBnGT_add(mclBnGT *z, const mclBnGT *x, const mclBnGT *y);
 MCLBN_DLL_API void mclBnGT_sub(mclBnGT *z, const mclBnGT *x, const mclBnGT *y);
 MCLBN_DLL_API void mclBnGT_mul(mclBnGT *z, const mclBnGT *x, const mclBnGT *y);
 MCLBN_DLL_API void mclBnGT_div(mclBnGT *z, const mclBnGT *x, const mclBnGT *y);
 
+// y = conjugate of x in Fp12, which is equal to the inverse of x if |x|^r = 1
+MCLBN_DLL_API void mclBnGT_inv(mclBnGT *y, const mclBnGT *x);
+// use invGeneric when x in Fp12 is not in GT
+MCLBN_DLL_API void mclBnGT_invGeneric(mclBnGT *y, const mclBnGT *x);
+
 /*
 	pow for all elements of Fp12
 */
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index ad534c75..18280e6e 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -524,6 +524,10 @@ void mclBnGT_neg(mclBnGT *y, const mclBnGT *x)
 	Fp12::neg(*cast(y), *cast(x));
 }
 void mclBnGT_inv(mclBnGT *y, const mclBnGT *x)
+{
+	Fp12::unitaryInv(*cast(y), *cast(x));
+}
+void mclBnGT_invGeneric(mclBnGT *y, const mclBnGT *x)
 {
 	Fp12::inv(*cast(y), *cast(x));
 }
diff --git a/readme.md b/readme.md
index 493cd3a8..18791846 100644
--- a/readme.md
+++ b/readme.md
@@ -467,6 +467,8 @@ Y. Sakemi, Y. Nogami, K. Okeya, Y. Morikawa, CANS 2008.
 
 # compatilibity
 
+- mclBnGT_inv returns a - b w, a conjugate of x for x = a + b w in Fp12 = Fp6[w]
+  - use mclBnGT_invGeneric if x is not in GT
 - mclBn_setETHserialization(true) (de)serialize acoording to [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations) when BLS12-381 is used.
 - (Break backward compatibility) libmcl_dy.a is renamed to libmcl.a
   - The option SHARE_BASENAME_SUF is removed
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 264e8b6e..e1f3ff05 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -298,6 +298,35 @@ CYBOZU_TEST_AUTO(GT)
 	CYBOZU_TEST_ASSERT(mclBnGT_isEqual(&x, &y));
 }
 
+CYBOZU_TEST_AUTO(GT_inv)
+{
+	mclBnG1 P;
+	mclBnG2 Q;
+	mclBnGT e, e1, e2, e3, e4;
+	mclBnG1_hashAndMapTo(&P, "1", 1);
+	mclBnG2_hashAndMapTo(&Q, "1", 1);
+	// e is not in GT
+	mclBn_millerLoop(&e, &P, &Q);
+	mclBnGT_inv(&e1, &e); // e1 = a - b w if e = a + b w where Fp12 = Fp6[w]
+	mclBnGT_invGeneric(&e2, &e);
+	mclBnGT_mul(&e3, &e, &e1);
+	mclBnGT_mul(&e4, &e, &e2);
+	CYBOZU_TEST_ASSERT(!mclBnGT_isOne(&e3)); // GT_inv does not give a correct inverse for an element not in GT
+	CYBOZU_TEST_ASSERT(mclBnGT_isOne(&e4));
+
+	mclBn_finalExp(&e3, &e3); // e3 is in GT then e3 = 1
+	CYBOZU_TEST_ASSERT(mclBnGT_isOne(&e3));
+
+	// e is in GT
+	mclBn_finalExp(&e, &e);
+	mclBnGT_inv(&e1, &e);
+	mclBnGT_invGeneric(&e2, &e);
+	mclBnGT_mul(&e3, &e, &e1);
+	mclBnGT_mul(&e4, &e, &e2);
+	CYBOZU_TEST_ASSERT(mclBnGT_isOne(&e3)); // GT_inv gives a correct inverse for an element in GT
+	CYBOZU_TEST_ASSERT(mclBnGT_isOne(&e4));
+}
+
 CYBOZU_TEST_AUTO(pairing)
 {
 	mclBnFr a, b, ab;

From 525946569532251877ad55dbd203ea99d20171ef Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 30 Sep 2019 12:45:58 +0900
Subject: [PATCH 093/553] add mclBnF{r,p}_isNegative

---
 include/mcl/bn.h               |  5 +++++
 include/mcl/impl/bn_c_impl.hpp |  8 ++++++++
 test/bn_c_test.hpp             | 25 +++++++++++++++++++++++++
 3 files changed, 38 insertions(+)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 0a5c3b46..e381c498 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -286,6 +286,11 @@ MCLBN_DLL_API int mclBnFp2_isEqual(const mclBnFp2 *x, const mclBnFp2 *y);
 MCLBN_DLL_API int mclBnFp2_isZero(const mclBnFp2 *x);
 MCLBN_DLL_API int mclBnFp2_isOne(const mclBnFp2 *x);
 
+// return 1 if half <= x < r, where half = (r + 1) / 2 else 0
+MCLBN_DLL_API int mclBnFr_isNegative(const mclBnFr *x);
+// return 1 if half <= x < p, where half = (p + 1) / 2 else 0
+MCLBN_DLL_API int mclBnFp_isNegative(const mclBnFp *x);
+
 #ifndef MCL_DONT_USE_CSRPNG
 // return 0 if success
 MCLBN_DLL_API int mclBnFr_setByCSPRNG(mclBnFr *x);
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 18280e6e..f9c3f139 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -175,6 +175,10 @@ int mclBnFr_isOne(const mclBnFr *x)
 {
 	return cast(x)->isOne();
 }
+int mclBnFr_isNegative(const mclBnFr *x)
+{
+	return cast(x)->isNegative();
+}
 
 #ifndef MCL_DONT_USE_CSRPNG
 int mclBnFr_setByCSPRNG(mclBnFr *x)
@@ -727,6 +731,10 @@ int mclBnFp_isOne(const mclBnFp *x)
 {
 	return cast(x)->isOne();
 }
+int mclBnFp_isNegative(const mclBnFp *x)
+{
+	return cast(x)->isNegative();
+}
 
 int mclBnFp_setHashOf(mclBnFp *x, const void *buf, mclSize bufSize)
 {
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index e1f3ff05..45095b7e 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -327,6 +327,31 @@ CYBOZU_TEST_AUTO(GT_inv)
 	CYBOZU_TEST_ASSERT(mclBnGT_isOne(&e4));
 }
 
+CYBOZU_TEST_AUTO(Fr_isNegative)
+{
+	mclBnFr a, half, one;
+	mclBnFr_setInt(&half, 2);
+	mclBnFr_inv(&half, &half); // half = (r + 1) / 2
+	mclBnFr_setInt(&one, 1);
+	mclBnFr_sub(&a, &half, &one);
+	CYBOZU_TEST_ASSERT(!mclBnFr_isNegative(&a));
+	mclBnFr_add(&a, &a, &one);
+	CYBOZU_TEST_ASSERT(mclBnFr_isNegative(&a));
+}
+
+CYBOZU_TEST_AUTO(Fp_isNegative)
+{
+	mclBnFp a, half, one;
+	mclBnFp_setInt(&half, 2);
+	mclBnFp_inv(&half, &half); // half = (p + 1) / 2
+	mclBnFp_setInt(&one, 1);
+	mclBnFp_sub(&a, &half, &one);
+	CYBOZU_TEST_ASSERT(!mclBnFp_isNegative(&a));
+	mclBnFp_add(&a, &a, &one);
+	CYBOZU_TEST_ASSERT(mclBnFp_isNegative(&a));
+}
+
+
 CYBOZU_TEST_AUTO(pairing)
 {
 	mclBnFr a, b, ab;

From b21c80153a0945a4f60581cb71d17de7f352b0f5 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 30 Sep 2019 12:46:30 +0900
Subject: [PATCH 094/553] add macro for ioMode of getStr

---
 include/mcl/bn.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index e381c498..b910025a 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -112,7 +112,10 @@ typedef struct {
 
 #include <mcl/curve_type.h>
 
+#define MCLBN_IO_EC_AFFINE 0
+#define MCLBN_IO_EC_PROJ 1024
 #define MCLBN_IO_SERIALIZE_HEX_STR 2048
+
 // for backword compatibility
 enum {
 	mclBn_CurveFp254BNb = 0,
@@ -434,7 +437,7 @@ MCLBN_DLL_API void mclBnGT_invGeneric(mclBnGT *y, const mclBnGT *x);
 */
 MCLBN_DLL_API void mclBnGT_powGeneric(mclBnGT *z, const mclBnGT *x, const mclBnFr *y);
 /*
-	pow for only {x|x^r = 1} in Fp12 by GLV method
+	pow for only {x|x^r = 1} in GT by GLV method
 	the value generated by pairing satisfies the condition
 */
 MCLBN_DLL_API void mclBnGT_pow(mclBnGT *z, const mclBnGT *x, const mclBnFr *y);

From c68217b9a75d5c6a713b7adb8db8d304ef50411b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 30 Sep 2019 14:20:24 +0900
Subject: [PATCH 095/553] add mclBnF{p,r}_isOdd

---
 include/mcl/bn.h               | 10 ++++++----
 include/mcl/impl/bn_c_impl.hpp |  8 ++++++++
 test/bn_c_test.hpp             | 21 +++++++++++++++++++++
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index b910025a..6e5d1ff1 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -279,20 +279,22 @@ MCLBN_DLL_API int mclBnFr_isValid(const mclBnFr *x);
 MCLBN_DLL_API int mclBnFr_isEqual(const mclBnFr *x, const mclBnFr *y);
 MCLBN_DLL_API int mclBnFr_isZero(const mclBnFr *x);
 MCLBN_DLL_API int mclBnFr_isOne(const mclBnFr *x);
+MCLBN_DLL_API int mclBnFr_isOdd(const mclBnFr *x);
+// return 1 if half <= x < r, where half = (r + 1) / 2 else 0
+MCLBN_DLL_API int mclBnFr_isNegative(const mclBnFr *x);
 
 MCLBN_DLL_API int mclBnFp_isValid(const mclBnFp *x);
 MCLBN_DLL_API int mclBnFp_isEqual(const mclBnFp *x, const mclBnFp *y);
 MCLBN_DLL_API int mclBnFp_isZero(const mclBnFp *x);
 MCLBN_DLL_API int mclBnFp_isOne(const mclBnFp *x);
+MCLBN_DLL_API int mclBnFp_isOdd(const mclBnFp *x);
+// return 1 if half <= x < p, where half = (p + 1) / 2 else 0
+MCLBN_DLL_API int mclBnFp_isNegative(const mclBnFp *x);
 
 MCLBN_DLL_API int mclBnFp2_isEqual(const mclBnFp2 *x, const mclBnFp2 *y);
 MCLBN_DLL_API int mclBnFp2_isZero(const mclBnFp2 *x);
 MCLBN_DLL_API int mclBnFp2_isOne(const mclBnFp2 *x);
 
-// return 1 if half <= x < r, where half = (r + 1) / 2 else 0
-MCLBN_DLL_API int mclBnFr_isNegative(const mclBnFr *x);
-// return 1 if half <= x < p, where half = (p + 1) / 2 else 0
-MCLBN_DLL_API int mclBnFp_isNegative(const mclBnFp *x);
 
 #ifndef MCL_DONT_USE_CSRPNG
 // return 0 if success
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index f9c3f139..51112a70 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -175,6 +175,10 @@ int mclBnFr_isOne(const mclBnFr *x)
 {
 	return cast(x)->isOne();
 }
+int mclBnFr_isOdd(const mclBnFr *x)
+{
+	return cast(x)->isOdd();
+}
 int mclBnFr_isNegative(const mclBnFr *x)
 {
 	return cast(x)->isNegative();
@@ -731,6 +735,10 @@ int mclBnFp_isOne(const mclBnFp *x)
 {
 	return cast(x)->isOne();
 }
+int mclBnFp_isOdd(const mclBnFp *x)
+{
+	return cast(x)->isOdd();
+}
 int mclBnFp_isNegative(const mclBnFp *x)
 {
 	return cast(x)->isNegative();
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 45095b7e..9c1818bb 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -351,6 +351,27 @@ CYBOZU_TEST_AUTO(Fp_isNegative)
 	CYBOZU_TEST_ASSERT(mclBnFp_isNegative(&a));
 }
 
+CYBOZU_TEST_AUTO(Fr_isOdd)
+{
+	mclBnFr x, one;
+	mclBnFr_clear(&x);
+	mclBnFr_setInt(&one, 1);
+	for (size_t i = 0; i < 100; i++) {
+		CYBOZU_TEST_EQUAL(mclBnFr_isOdd(&x), i & 1);
+		mclBnFr_add(&x, &x, &one);
+	}
+}
+
+CYBOZU_TEST_AUTO(Fp_isOdd)
+{
+	mclBnFp x, one;
+	mclBnFp_clear(&x);
+	mclBnFp_setInt(&one, 1);
+	for (size_t i = 0; i < 100; i++) {
+		CYBOZU_TEST_EQUAL(mclBnFp_isOdd(&x), i & 1);
+		mclBnFp_add(&x, &x, &one);
+	}
+}
 
 CYBOZU_TEST_AUTO(pairing)
 {

From e41ef6d6c9a3d84fe532eaa81146951f45254528 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 29 Sep 2019 22:51:23 +0900
Subject: [PATCH 096/553] add api.md

---
 api.md    | 538 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 readme.md | 192 +------------------
 2 files changed, 542 insertions(+), 188 deletions(-)
 create mode 100644 api.md

diff --git a/api.md b/api.md
new file mode 100644
index 00000000..1c0f4ea1
--- /dev/null
+++ b/api.md
@@ -0,0 +1,538 @@
+# C API
+
+## Minimum sample
+
+[sample/pairing_c.c](sample/pairing_c.c) is a sample of how to use BLS12-381 pairing.
+
+```
+cd mcl
+make -j4
+make bin/pairing_c.exe && bin/pairing_c.exe
+```
+
+## Header and libraries
+
+To use BLS12-381, include `mcl/bn_c384_256.h` and link
+- libmclbn384_256.{a,so}
+- libmcl.{a,so} ; core library
+
+`384_256` means the max bit size of `Fp` is 384 and that size of `Fr` is 256.
+
+## Notation
+
+The elliptic equation of a curve E is `E: y^2 = x^3 + b`.
+
+- `Fp` ; a finite field of a prime order `p`, where curves is defined over.
+- `Fr` ; a finite field of a prime order `r`.
+- `Fp2` ; the field extension over Fp with degree 2. Fp[i] / (i^2 + 1).
+- `Fp6` ; the field extension over Fp2 with degree 3. Fp2[v] / (v^3 - Xi) where Xi = i + 1.
+- `Fp12` ; the field extension over Fp6 with degree 2. Fp6[w] / (w^2 - v).
+- `G1` ; the cyclic subgroup of E(Fp).
+- `G2` ; the cyclic subgroup of the inverse image of E'(Fp^2) under a twisting isomorphism from E' to E.
+- `GT` ; the cyclie subgroup of Fp12.
+  - `G1`, `G2` and `GT` have the order `r`.
+
+The pairing e: G1 x G2 -> GT is the optimal ate pairing.
+
+mcl treats `G1` and `G2` as an additive group and `GT` as a multiplicative group.
+
+- `mclSize` ; `unsigned int` if WebAssembly else `size_t`
+
+### Curve Parameter
+r = |G1| = |G2| = |GT|
+
+curveType   | b| r and p |
+------------|--|------------------|
+BN254       | 2|r = 0x2523648240000001ba344d8000000007ff9f800000000010a10000000000000d <br> p = 0x2523648240000001ba344d80000000086121000000000013a700000000000013 |
+BLS12-381   | 4|r = 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001 <br> p = 0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab |
+BN381       | 2|r = 0x240026400f3d82b2e42de125b00158405b710818ac000007e0042f008e3e00000000001080046200000000000000000d <br> p = 0x240026400f3d82b2e42de125b00158405b710818ac00000840046200950400000000001380052e000000000000000013 |
+
+## Structures
+
+### `mclBnFp`
+This is a struct of `Fp`. The value is stored as Montgomery representation.
+
+### `mclBnFr`
+This is a struct of `Fr`. The value is stored as Montgomery representation.
+
+### `mclBnFp2`
+This is a struct of `Fp2` which has a member `mclBnFp d[2]`.
+
+An element `x` of `Fp2` is represented as `x = d[0] + d[1] i` where `i^2 = -1`.
+
+### `mclBnG1`
+This is a struct of `G1` which has three members `x`, `y`, `z` of type `mclBnFp`.
+
+An element `P` of `G1` is represented as `P = [x:y:z]` of a Jacobi coordinate.
+
+### `mclBnG2`
+This is a struct of `G2` which has three members `x`, `y`, `z` of type `mclBnFp2`.
+
+An element `Q` of `G2` is represented as `Q = [x:y:z]` of a Jacobi coordinate.
+
+### `mclBnGT`
+
+This is a struct of `GT` which has a member `mclBnFp d[12]`.
+
+### sizeof
+
+library           |MCLBN_FR_UNIT_SIZE|MCLBN_FP_UNIT_SIZE|sizeof Fr|sizeof Fp|
+------------------|------------------|------------------|---------|---------|
+libmclbn256.a     |          4       |         4        |   32    |   32    |
+libmclbn384_256.a |          4       |         6        |   32    |   48    |
+libmclbn384.a     |          6       |         6        |   48    |   48    |
+
+## Thread safety
+All functions except for initialization and changing global setting are thread-safe.
+
+## Initialization
+
+Initialize mcl library. Call this function at first before calling the other functions.
+
+```
+int mclBn_init(int curve, int compiledTimeVar);
+```
+
+- `curve` ; specify the curve type
+  - MCL_BN254 ; BN254 (a little faster if including `mcl/bn_c256.h` and linking `libmclbn256.{a,so}`)
+  - MCL_BN_SNARK1 ; the same parameter used in libsnark
+  - MCL_BLS12_381 ; BLS12-381
+  - MCL_BN381_1 ; BN381 (include `mcl/bn_c384.h` and link `libmclbn384.{a,so}`)
+- `compiledTimeVar` ; set `MCLBN_COMPILED_TIME_VAR`, which macro is used to make sure that
+the values are the same when the library is built and used.
+- return 0 if success.
+- This is not thread safe.
+
+## Global setting
+
+### Control to verify that a point of the elliptic curve has the order `r`.
+
+This function affects `setStr()` and `deserialize()` for G1/G2.
+```
+void mclBn_verifyOrderG1(int doVerify);
+void mclBn_verifyOrderG2(int doVerify);
+```
+- verify if `doVerify` is 1 or does not. The default parameter is 1.
+- The cost of verification is not small, so set `doVerify = 0` carefully if necessary.
+- This is not thread safe.
+
+## Setter / Getter
+
+### Clear
+Set `x` is zero.
+```
+void mclBnFr_clear(mclBnFr *x);
+void mclBnFp_clear(mclBnFp *x);
+void mclBnFp2_clear(mclBnFp2 *x);
+void mclBnG1_clear(mclBnG1 *x);
+void mclBnG2_clear(mclBnG2 *x);
+void mclBnGT_clear(mclBnGT *x);
+```
+
+### Set `x` to `y`.
+```
+void mclBnFp_setInt(mclBnFp *y, mclInt x);
+void mclBnFr_setInt(mclBnFr *y, mclInt x);
+void mclBnGT_setInt(mclBnGT *y, mclInt x);
+```
+
+### Set `buf[0..bufSize-1]` to `x` with masking according to the following way.
+```
+int mclBnFp_setLittleEndian(mclBnFp *x, const void *buf, mclSize bufSize);
+int mclBnFr_setLittleEndian(mclBnFr *x, const void *buf, mclSize bufSize);
+```
+1. set x = buf[0..bufSize-1] as little endian
+2. x &= (1 << bitLen(r)) - 1
+3. if (x >= r) x &= (1 << (bitLen(r) - 1)) - 1
+
+- always return 0
+
+### Set (`buf[0..bufSize-1]` mod `p` or `r`) to `x`.
+```
+int mclBnFp_setLittleEndianMod(mclBnFp *x, const void *buf, mclSize bufSize);
+int mclBnFr_setLittleEndianMod(mclBnFr *x, const void *buf, mclSize bufSize);
+```
+- return 0 if bufSize <= (sizeof(*x) * 8 * 2) else -1
+
+### Get little endian byte sequence corresponding `buf[0..maxBufSize-1]` to `x`
+```
+mclSize mclBnFr_getLittleEndian(void *buf, mclSize maxBufSize, const mclBnFr *x);
+mclSize mclBnFp_getLittleEndian(void *buf, mclSize maxBufSize, const mclBnFp *x);
+```
+- write `x` to `buf` as little endian
+- return the written size if sucess else 0
+- NOTE: `buf[0] = 0` and return 1 if `x` is zero.
+
+### Serialization
+### Serialize
+```
+mclSize mclBnFr_serialize(void *buf, mclSize maxBufSize, const mclBnFr *x);
+mclSize mclBnG1_serialize(void *buf, mclSize maxBufSize, const mclBnG1 *x);
+mclSize mclBnG2_serialize(void *buf, mclSize maxBufSize, const mclBnG2 *x);
+mclSize mclBnGT_serialize(void *buf, mclSize maxBufSize, const mclBnGT *x);
+mclSize mclBnFp_serialize(void *buf, mclSize maxBufSize, const mclBnFp *x);
+mclSize mclBnFp2_serialize(void *buf, mclSize maxBufSize, const mclBnFp2 *x);
+```
+- serialize `x` into `buf[0..maxBufSize-1]`
+- return written byte size if success else 0
+
+### Serialization format
+- `Fp`(resp.  `Fr`) ; a little endian byte sequence with a fixed size
+  - the size is the return value of `mclBn_getFpByteSize()` (resp. `mclBn_getFpByteSize()`).
+- `G1` ; a compressed fixed size
+  - the size is equal to `mclBn_getG1ByteSize()` (=`mclBn_getFpByteSize()`).
+- `G2` ; a compressed fixed size
+  - the size is equal to `mclBn_getG1ByteSize() * 2`.
+
+pseudo-code to serialize of `P` of `G1` (resp. `G2`)
+```
+size = mclBn_getG1ByteSize() # resp. mclBn_getG1ByteSize() * 2
+if P is zero:
+  return [0] * size
+else:
+  P = P.normalize()
+  s = P.x.serialize()
+  # x in Fp2 is odd <=> x.a is odd
+  if P.y is odd: # resp. P.y.d[0] is odd
+    s[byte-length(s) - 1] |= 0x80
+  return s
+```
+
+### Ethereum serialization mode for BLS12-381 (experimental)
+```
+void mclBn_setETHserialization(int ETHserialization);
+```
+- serialize according to [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations) if BLS12-381 is used and `ETHserialization = 1` (default 0).
+
+### Deserialize
+```
+mclSize mclBnFr_deserialize(mclBnFr *x, const void *buf, mclSize bufSize);
+mclSize mclBnG1_deserialize(mclBnG1 *x, const void *buf, mclSize bufSize);
+mclSize mclBnG2_deserialize(mclBnG2 *x, const void *buf, mclSize bufSize);
+mclSize mclBnGT_deserialize(mclBnGT *x, const void *buf, mclSize bufSize);
+mclSize mclBnFp_deserialize(mclBnFp *x, const void *buf, mclSize bufSize);
+mclSize mclBnFp2_deserialize(mclBnFp2 *x, const void *buf, mclSize bufSize);
+```
+- deserialize `x` from `buf[0..bufSize-1]`
+- return read size if success else 0
+
+## String conversion
+### Get string
+```
+mclSize mclBnFr_getStr(char *buf, mclSize maxBufSize, const mclBnFr *x, int ioMode);
+mclSize mclBnG1_getStr(char *buf, mclSize maxBufSize, const mclBnG1 *x, int ioMode);
+mclSize mclBnG2_getStr(char *buf, mclSize maxBufSize, const mclBnG2 *x, int ioMode);
+mclSize mclBnGT_getStr(char *buf, mclSize maxBufSize, const mclBnGT *x, int ioMode);
+mclSize mclBnFp_getStr(char *buf, mclSize maxBufSize, const mclBnFp *x, int ioMode);
+```
+- write `x` to `buf` according to `ioMode`
+- `ioMode`
+  - 10 ; decimal number
+  - 16 ; hexadecimal number
+  - `MCLBN_IO_EC_PROJ` ; output as Jacobi coordinate
+- return `strlen(buf)` if success else 0.
+
+The meaning of the output of `G1`.
+- `0` ; infinity
+- `1 <x> <y>` ; affine coordinate
+- `4 <x> <y> <z>` ; Jacobi coordinate
+- the element `<x>` of `G2` outputs `d[0] d[1]`.
+
+### Set string
+```
+int mclBnFr_setStr(mclBnFr *x, const char *buf, mclSize bufSize, int ioMode);
+int mclBnG1_setStr(mclBnG1 *x, const char *buf, mclSize bufSize, int ioMode);
+int mclBnG2_setStr(mclBnG2 *x, const char *buf, mclSize bufSize, int ioMode);
+int mclBnGT_setStr(mclBnGT *x, const char *buf, mclSize bufSize, int ioMode);
+int mclBnFp_setStr(mclBnFp *x, const char *buf, mclSize bufSize, int ioMode);
+```
+- set `buf[0..bufSize-1]` to `x` accoring to `ioMode`
+- return 0 if success else -1
+
+If you want to use the same generators of BLS12-381 with [zkcrypto](https://github.com/zkcrypto/pairing/tree/master/src/bls12_381#g2) then,
+
+```
+mclBnG1 P;
+mclBnG1_setStr(&P, "1 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569", 10);
+
+mclBnG2 Q;
+mclBnG2_setStr(&Q, "1 352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160 3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758 1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905 927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582");
+```
+
+
+## Set random value
+Set `x` by cryptographically secure pseudo random number generator.
+```
+int mclBnFr_setByCSPRNG(mclBnFr *x);
+int mclBnFp_setByCSPRNG(mclBnFp *x);
+```
+
+### Change random generator function
+```
+void mclBn_setRandFunc(
+  void *self,
+  unsigned int (*readFunc)(void *self, void *buf, unsigned int bufSize)
+);
+```
+- `self` ; user-defined pointer
+- `readFunc` ; user-defined function, which writes random `bufSize` bytes to `buf` and returns `bufSize` if success else returns 0.
+  - `readFunc` must be thread-safe.
+- Set the default random function if `self == 0` and `readFunc == 0`.
+- This is not thread safe.
+
+## Arithmetic operations
+### neg / inv / sqr / add / sub / mul / div of `Fr`, `Fp`, `Fp2`, `GT`.
+```
+void mclBnFr_neg(mclBnFr *y, const mclBnFr *x);
+void mclBnFr_inv(mclBnFr *y, const mclBnFr *x);
+void mclBnFr_sqr(mclBnFr *y, const mclBnFr *x);
+void mclBnFr_add(mclBnFr *z, const mclBnFr *x, const mclBnFr *y);
+void mclBnFr_sub(mclBnFr *z, const mclBnFr *x, const mclBnFr *y);
+void mclBnFr_mul(mclBnFr *z, const mclBnFr *x, const mclBnFr *y);
+void mclBnFr_div(mclBnFr *z, const mclBnFr *x, const mclBnFr *y);
+
+void mclBnFp_neg(mclBnFp *y, const mclBnFp *x);
+void mclBnFp_inv(mclBnFp *y, const mclBnFp *x);
+void mclBnFp_sqr(mclBnFp *y, const mclBnFp *x);
+void mclBnFp_add(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
+void mclBnFp_sub(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
+void mclBnFp_mul(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
+void mclBnFp_div(mclBnFp *z, const mclBnFp *x, const mclBnFp *y);
+
+void mclBnFp2_neg(mclBnFp2 *y, const mclBnFp2 *x);
+void mclBnFp2_inv(mclBnFp2 *y, const mclBnFp2 *x);
+void mclBnFp2_sqr(mclBnFp2 *y, const mclBnFp2 *x);
+void mclBnFp2_add(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y);
+void mclBnFp2_sub(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y);
+void mclBnFp2_mul(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y);
+void mclBnFp2_div(mclBnFp2 *z, const mclBnFp2 *x, const mclBnFp2 *y);
+
+void mclBnGT_inv(mclBnGT *y, const mclBnGT *x); // y = a - bw for x = a + bw where Fp12 = Fp6[w]
+void mclBnGT_sqr(mclBnGT *y, const mclBnGT *x);
+void mclBnGT_mul(mclBnGT *z, const mclBnGT *x, const mclBnGT *y);
+void mclBnGT_div(mclBnGT *z, const mclBnGT *x, const mclBnGT *y);
+```
+- use `mclBnGT_invGeneric` for an element in Fp12 - GT.
+
+- NOTE: The following functions does NOT return a GT element because GT is multiplicative group.
+
+```
+void mclBnGT_neg(mclBnGT *y, const mclBnGT *x);
+void mclBnGT_add(mclBnGT *z, const mclBnGT *x, const mclBnGT *y);
+void mclBnGT_sub(mclBnGT *z, const mclBnGT *x, const mclBnGT *y);
+```
+
+### Square root of `x`.
+```
+int mclBnFr_squareRoot(mclBnFr *y, const mclBnFr *x);
+int mclBnFp_squareRoot(mclBnFp *y, const mclBnFp *x);
+int mclBnFp2_squareRoot(mclBnFp2 *y, const mclBnFp2 *x);
+```
+- `y` is one of square root of `x` if `y` exists.
+- return 0 if success else -1
+
+### add / sub / dbl / neg for `G1` and `G2`.
+```
+void mclBnG1_neg(mclBnG1 *y, const mclBnG1 *x);
+void mclBnG1_dbl(mclBnG1 *y, const mclBnG1 *x);
+void mclBnG1_add(mclBnG1 *z, const mclBnG1 *x, const mclBnG1 *y);
+void mclBnG1_sub(mclBnG1 *z, const mclBnG1 *x, const mclBnG1 *y);
+
+void mclBnG2_neg(mclBnG2 *y, const mclBnG2 *x);
+void mclBnG2_dbl(mclBnG2 *y, const mclBnG2 *x);
+void mclBnG2_add(mclBnG2 *z, const mclBnG2 *x, const mclBnG2 *y);
+void mclBnG2_sub(mclBnG2 *z, const mclBnG2 *x, const mclBnG2 *y);
+```
+
+### Convert a point from Jacobi coordinate to affine.
+```
+void mclBnG1_normalize(mclBnG1 *y, const mclBnG1 *x);
+void mclBnG2_normalize(mclBnG2 *y, const mclBnG2 *x);
+```
+- convert `[x:y:z]` to `[x:y:1]` if `z != 0` else `[*:*:0]`
+
+### scalar multiplication
+```
+void mclBnG1_mul(mclBnG1 *z, const mclBnG1 *x, const mclBnFr *y);
+void mclBnG2_mul(mclBnG2 *z, const mclBnG2 *x, const mclBnFr *y);
+void mclBnGT_pow(mclBnGT *z, const mclBnGT *x, const mclBnFr *y);
+```
+- z = x * y for G1 / G2
+- z = pow(x, y) for GT
+
+- use `mclBnGT_powGeneric` for an element in Fp12 - GT.
+
+### multi scalar multiplication
+```
+void mclBnG1_mulVec(mclBnG1 *z, const mclBnG1 *x, const mclBnFr *y, mclSize n);
+void mclBnG2_mulVec(mclBnG2 *z, const mclBnG2 *x, const mclBnFr *y, mclSize n);
+void mclBnGT_powVec(mclBnGT *z, const mclBnGT *x, const mclBnFr *y, mclSize n);
+```
+- z = sum_{i=0}^{n-1} mul(x[i], y[i]) for G1 / G2.
+- z = prod_{i=0}^{n-1} pow(x[i], y[i]) for GT.
+
+## hash and mapTo functions
+### Set hash of `buf[0..bufSize-1]` to `x`
+```
+int mclBnFr_setHashOf(mclBnFr *x, const void *buf, mclSize bufSize);
+int mclBnFp_setHashOf(mclBnFp *x, const void *buf, mclSize bufSize);
+```
+- always return 0
+- use SHA-256 if sizeof(*x) <= 256 else SHA-512
+- set accoring to the same way as `setLittleEndian`
+  - support the other wasy if you want in the future
+
+### map `x` to G1 / G2.
+```
+int mclBnFp_mapToG1(mclBnG1 *y, const mclBnFp *x);
+int mclBnFp2_mapToG2(mclBnG2 *y, const mclBnFp2 *x);
+```
+- See `struct MapTo` in `mcl/bn.hpp` for the detail of the algorithm.
+- return 0 if success else -1
+
+### hash and map to G1 / G2.
+```
+int mclBnG1_hashAndMapTo(mclBnG1 *x, const void *buf, mclSize bufSize);
+int mclBnG2_hashAndMapTo(mclBnG2 *x, const void *buf, mclSize bufSize);
+```
+- Combine `setHashOf` and `mapTo` functions
+
+## Pairing operations
+The pairing function `e(P, Q)` is consist of two parts:
+  - `MillerLoop(P, Q)`
+  - `finalExp(x)`
+
+`finalExp` satisfies the following properties:
+  - `e(P, Q) = finalExp(MillerLoop(P, Q))`
+  - `e(P1, Q1) e(P2, Q2) = finalExp(MillerLoop(P1, Q1) MillerLoop(P2, Q2))`
+
+### pairing
+```
+void mclBn_pairing(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y);
+```
+### millerLoop
+```
+void mclBn_millerLoop(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y);
+```
+### finalExp
+```
+void mclBn_finalExp(mclBnGT *y, const mclBnGT *x);
+```
+
+## Variants of MillerLoop
+### multi pairing
+```
+void mclBn_millerLoopVec(mclBnGT *z, const mclBnG1 *x, const mclBnG2 *y, mclSize n);
+```
+- This function is for multi-pairing
+  - computes prod_{i=0}^{n-1} MillerLoop(x[i], y[i])
+  - prod_{i=0}^{n-1} e(x[i], y[i]) = finalExp(prod_{i=0}^{n-1} MillerLoop(x[i], y[i]))
+
+### pairing for a fixed point of G2
+```
+int mclBn_getUint64NumToPrecompute(void);
+void mclBn_precomputeG2(uint64_t *Qbuf, const mclBnG2 *Q);
+void mclBn_precomputedMillerLoop(mclBnGT *f, const mclBnG1 *P, const uint64_t *Qbuf);
+```
+These functions is the same computation of `pairing(P, Q);` as the followings:
+```
+uint64_t *Qbuf = (uint64_t*)malloc(mclBn_getUint64NumToPrecompute() * sizeof(uint64_t));
+mclBn_precomputeG2(Qbuf, Q); // precomputing of Q
+mclBn_precomputedMillerLoop(f, P, Qbuf); // pairing of any P of G1 and the fixed Q
+free(p);
+```
+
+```
+void mclBn_precomputedMillerLoop2(
+  mclBnGT *f,
+  const mclBnG1 *P1, const uint64_t *Q1buf,
+  const mclBnG1 *P2, const uint64_t *Q2buf
+);
+```
+- compute `MillerLoop(P1, Q1buf) * MillerLoop(P2, Q2buf)`
+
+
+```
+void mclBn_precomputedMillerLoop2mixed(
+  mclBnGT *f,
+  const mclBnG1 *P1, const mclBnG2 *Q1,
+  const mclBnG1 *P2, const uint64_t *Q2buf
+);
+```
+- compute `MillerLoop(P1, Q2) * MillerLoop(P2, Q2buf)`
+
+## Check value
+### Check validness
+```
+int mclBnFr_isValid(const mclBnFr *x);
+int mclBnFp_isValid(const mclBnFp *x);
+int mclBnG1_isValid(const mclBnG1 *x);
+int mclBnG2_isValid(const mclBnG2 *x);
+```
+- return 1 if true else 0
+
+### Check the order of a point
+```
+int mclBnG1_isValidOrder(const mclBnG1 *x);
+int mclBnG2_isValidOrder(const mclBnG2 *x);
+```
+- Check whether the order of `x` is valid or not
+- return 1 if true else 0
+- This function always cheks according to `mclBn_verifyOrderG1` and `mclBn_verifyOrderG2`.
+
+### Is equal / zero / one / isOdd
+```
+int mclBnFr_isEqual(const mclBnFr *x, const mclBnFr *y);
+int mclBnFr_isZero(const mclBnFr *x);
+int mclBnFr_isOne(const mclBnFr *x);
+int mclBnFr_isOdd(const mclBnFr *x);
+
+int mclBnFp_isEqual(const mclBnFp *x, const mclBnFp *y);
+int mclBnFp_isZero(const mclBnFp *x);
+int mclBnFp_isOne(const mclBnFp *x);
+int mclBnFp_isOdd(const mclBnFp *x);
+
+int mclBnFp2_isEqual(const mclBnFp2 *x, const mclBnFp2 *y);
+int mclBnFp2_isZero(const mclBnFp2 *x);
+int mclBnFp2_isOne(const mclBnFp2 *x);
+
+int mclBnG1_isEqual(const mclBnG1 *x, const mclBnG1 *y);
+int mclBnG1_isZero(const mclBnG1 *x);
+
+int mclBnG2_isEqual(const mclBnG2 *x, const mclBnG2 *y);
+int mclBnG2_isZero(const mclBnG2 *x);
+
+int mclBnGT_isEqual(const mclBnGT *x, const mclBnGT *y);
+int mclBnGT_isZero(const mclBnGT *x);
+int mclBnGT_isOne(const mclBnGT *x);
+```
+- return 1 if true else 0
+
+### isNegative
+```
+int mclBnFr_isNegative(const mclBnFr *x);
+int mclBnFp_isNegative(const mclBnFr *x);
+```
+return 1 if x >= half where half = (r + 1) / 2 (resp. (p + 1) / 2).
+
+## Lagrange interpolation
+
+```
+int mclBn_FrLagrangeInterpolation(mclBnFr *out, const mclBnFr *xVec, const mclBnFr *yVec, mclSize k);
+int mclBn_G1LagrangeInterpolation(mclBnG1 *out, const mclBnFr *xVec, const mclBnG1 *yVec, mclSize k);
+int mclBn_G2LagrangeInterpolation(mclBnG2 *out, const mclBnFr *xVec, const mclBnG2 *yVec, mclSize k);
+```
+- Lagrange interpolation
+- recover out = y(0) from {(xVec[i], yVec[i])} for {i=0..k-1}
+- return 0 if success else -1
+  - satisfy that xVec[i] != 0, xVec[i] != xVec[j] for i != j
+
+```
+int mclBn_FrEvaluatePolynomial(mclBnFr *out, const mclBnFr *cVec, mclSize cSize, const mclBnFr *x);
+int mclBn_G1EvaluatePolynomial(mclBnG1 *out, const mclBnG1 *cVec, mclSize cSize, const mclBnFr *x);
+int mclBn_G2EvaluatePolynomial(mclBnG2 *out, const mclBnG2 *cVec, mclSize cSize, const mclBnFr *x);
+```
+- Evaluate polynomial
+- out = f(x) = c[0] + c[1] * x + ... + c[cSize - 1] * x^{cSize - 1}
+- return 0 if success else -1
+  - satisfy cSize >= 1
diff --git a/readme.md b/readme.md
index 18791846..3b437723 100644
--- a/readme.md
+++ b/readme.md
@@ -9,7 +9,6 @@ A portable and fast pairing-based cryptography library.
 mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
-
 # Support architecture
 
 - x86-64 Windows + Visual Studio
@@ -30,6 +29,9 @@ which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
   - BN462 ; a BN curve over the 462-bit prime p(z) where z = 2^114 + 2^101 - 2^14 - 1.
 - BLS12\_381 ; [a BLS12-381 curve](https://blog.z.cash/new-snark-curve/)
 
+# C-API
+see [api.md](api.md)
+
 # How to build on Linux and macOS
 x86-64/ARM/ARM64 Linux, macOS and mingw64 are supported.
 
@@ -232,193 +234,6 @@ pairing   1.394Mclk
 finalExp 546.259Kclk
 ```
 
-# Libraries
-
-* G1 and G2 is defined over Fp
-* The order of G1 and G2 is r.
-* Use `bn256.hpp` if only BN254 is used.
-
-## C++ library
-
-* libmcl.a ; static C++ library of mcl
-* libmcl.so ; shared C++ library of mcl
-* the default parameter of curveType is BN254
-
-header        |support curveType        |sizeof Fr|sizeof Fp|
---------------|-------------------------|---------|---------|
-bn256.hpp     |BN254, BN_SNARK1         |   32    |   32    |
-bls12_381.hpp |the above + BLS12_381    |   32    |   48    |
-bn384.hpp     |the above + BN381_1      |   48    |   48    |
-
-## C library
-
-* Define `MCLBN_FR_UNIT_SIZE` and `MCLBN_FP_UNIT_SIZE` and include bn.h
-* set `MCLBN_FR_UNIT_SIZE = MCLBN_FP_UNIT_SIZE` unless `MCLBN_FR_UNIT_SIZE` is defined
-
-
-library           |MCLBN_FR_UNIT_SIZE|MCLBN_FP_UNIT_SIZE|
-------------------|------------------|------------------|
-sizeof            | Fr               |  Fp              |
-libmclbn256.a     |          4       |         4        |
-libmclbn384_256.a |          4       |         6        |
-libmclbn384.a     |          6       |         6        |
-
-
-* libmclbn*.a ; static C library
-* libmclbn*.so ; shared C library
-
-### 2nd argument of `mclBn_init`
-Specify `MCLBN_COMPILED_TIME_VAR` to 2nd argument of `mclBn_init`, which
-is defined as `MCLBN_FR_UNIT_SIZE * 10 + MCLBN_FP_UNIT_SIZE`.
-This parameter is used to make sure that the values are the same when the library is built and used.
-
-# How to initialize pairing library
-Call `mcl::bn256::initPairing` before calling any operations.
-```
-#include <mcl/bn256.hpp>
-mcl::bn::CurveParam cp = mcl::BN254; // or mcl::BN_SNARK1
-mcl::bn256::initPairing(cp);
-mcl::bn256::G1 P(...);
-mcl::bn256::G2 Q(...);
-mcl::bn256::Fp12 e;
-mcl::bn256::pairing(e, P, Q);
-```
-1. (BN254) a BN curve over the 254-bit prime p = p(z) where z = -(2^62 + 2^55 + 1).
-2. (BN_SNARK1) a BN curve over a 254-bit prime p such that n := p + 1 - t has high 2-adicity.
-3. BN381_1 with `mcl/bn384.hpp`.
-4. BN462 with `mcl/bn512.hpp`.
-
-See [test/bn_test.cpp](https://github.com/herumi/mcl/blob/master/test/bn_test.cpp).
-
-## Default constructor of Fp, Ec, etc.
-A default constructor does not initialize the instance.
-Set a valid value before reffering it.
-
-## Definition of groups
-
-The curve equation for a BN curve is:
-
-	E/Fp: y^2 = x^3 + b .
-
-* the cyclic group G1 is instantiated as E(Fp)[n] where n := p + 1 - t;
-* the cyclic group G2 is instantiated as the inverse image of E'(Fp^2)[n] under a twisting isomorphism phi from E' to E; and
-* the pairing e: G1 x G2 -> Fp12 is the optimal ate pairing.
-
-The field Fp12 is constructed via the following tower:
-
-* Fp2 = Fp[u] / (u^2 + 1)
-* Fp6 = Fp2[v] / (v^3 - Xi) where Xi = u + 1
-* Fp12 = Fp6[w] / (w^2 - v)
-* GT = { x in Fp12 | x^r = 1 }
-
-## Curve Parameter
-r = |G1| = |G2| = |GT|
-
-curveType   | hexadecimal number|
-------------|-------------------|
-BN254 r     | 2523648240000001ba344d8000000007ff9f800000000010a10000000000000d |
-BN254 p     | 2523648240000001ba344d80000000086121000000000013a700000000000013 |
-BN381 r     | 240026400f3d82b2e42de125b00158405b710818ac000007e0042f008e3e00000000001080046200000000000000000d |
-BN381 p     | 240026400f3d82b2e42de125b00158405b710818ac00000840046200950400000000001380052e000000000000000013 |
-BN462 r     | 240480360120023ffffffffff6ff0cf6b7d9bfca0000000000d812908ee1c201f7fffffffff6ff66fc7bf717f7c0000000002401b007e010800d |
-BN462 r     | 240480360120023ffffffffff6ff0cf6b7d9bfca0000000000d812908f41c8020ffffffffff6ff66fc6ff687f640000000002401b00840138013 |
-BLS12-381 r | 73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001 |
-BLS12-381 r | 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab |
-
-## Arithmetic operations
-
-G1 and G2 is additive group and has the following operations:
-
-* T::add(T& z, const T& x, const T& y); // z = x + y
-* T::sub(T& z, const T& x, const T& y); // z = x - y
-* T::neg(T& y, const T& x); // y = -x
-* T::mul(T& z, const T& x, const INT& y); // z = y times scalar multiplication of x
-
-Remark: &z == &x or &y are allowed. INT means integer type such as Fr, int and mpz_class.
-
-`T::mul` uses GLV method then `G2::mul` returns wrong value if x is not in G2.
-Use `T::mulGeneric(T& z, const T& x, const INT& y)` for x in phi^-1(E'(Fp^2)) - G2.
-
-Fp, Fp2, Fp6 and Fp12 have the following operations:
-
-* T::add(T& z, const T& x, const T& y); // z = x + y
-* T::sub(T& z, const T& x, const T& y); // z = x - y
-* T::mul(T& z, const T& x, const T& y); // z = x * y
-* T::div(T& z, const T& x, const T& y); // z = x / y
-* T::neg(T& y, const T& x); // y = -x
-* T::inv(T& y, const T& x); // y = 1/x
-* T::pow(T& z, const T& x, const INT& y); // z = x^y
-* Fp12::unitaryInv(T& y, const T& x); // y = conjugate of x
-
-Remark: `Fp12::mul` uses GLV method then returns wrong value if x is not in GT.
-Use `Fp12::mulGeneric` for x in Fp12 - GT.
-
-## Map To points
-
-Use these functions to make a point of G1 and G2.
-
-* mapToG1(G1& P, const Fp& x); // assume x != 0
-* mapToG2(G2& P, const Fp2& x);
-* hashAndMapToG1(G1& P, const void *buf, size_t bufSize); // set P by the hash value of [buf, bufSize)
-* hashAndMapToG2(G2& P, const void *buf, size_t bufSize);
-
-These functions maps x into Gi according to [\[_Faster hashing to G2_\]].
-
-## String format of G1 and G2
-G1 and G2 have three elements of Fp (x, y, z) for Jacobi coordinate.
-`normalize()` method normalizes it to affine coordinate (x, y, 1) or (0, 0, 0).
-
-getStr(mode = 0) method gets
-
-* `0` ; infinity
-* `1 <x> <y>` ; Affine coordinate with mode = `mcl:IoEcAffine`
-* `4 <x> <y> <z>` ; jacobi/Proj coordinate with mode = `mcl::IoEcProj`
-* `2 <x>` ; compressed format for even y with mode = `mcl::IoEcCompY`
-* `3 <x>` ; compressed format for odd y with mode = `mcl::IoEcCompY`
-
-## Generator of G1 and G2
-
-If you want to use the same generators of BLS12-381 with [zkcrypto](https://github.com/zkcrypto/pairing/tree/master/src/bls12_381#g2) then,
-
-```
-// G1 P
-P.setStr('1 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569')
-
-// G2 Q
-Q.setStr('1 352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160 3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758 1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905 927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582')
-```
-
-## Serialization format of G1 and G2
-
-pseudo-code to serialize of p
-```
-if bit-length(p) % 8 != 0:
-  size = Fp::getByteSize()
-  if p is zero:
-    return [0] * size
-  else:
-    s = x.serialize()
-    # x in Fp2 is odd <=> x.a is odd
-    if y is odd:
-      s[byte-length(s) - 1] |= 0x80
-    return s
-else:
-  size = Fp::getByteSize() + 1
-  if p is zero:
-    return [0] * size
-  else:
-    s = x.serialize()
-    if y is odd:
-      return 2:s
-    else:
-      return 3:s
-```
-
-## Verify an element in G2
-`G2::isValid()` checks that the element is in the curve of G2 and the order of it is r for subgroup attack.
-`G2::set()`, `G2::setStr` and `operator<<` also check the order.
-If you check it out of the library, then you can stop the verification by calling `G2::verifyOrderG2(false)`.
-
 # How to make asm files (optional)
 The asm files generated by this way are already put in `src/asm`, then it is not necessary to do this.
 
@@ -478,6 +293,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2019/Sep/30 v1.00 add some functions to bn.h ; [api.md](api.md).
 - 2019/Sep/22 v0.99 add mclBnG1_mulVec, etc.
 - 2019/Sep/08 v0.98 bugfix Ec::add(P, Q, R) when P == R
 - 2019/Aug/14 v0.97 add some C api functions

From 910993eba89da6a0c2be65c3f3e3e47750b65b1f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 30 Sep 2019 15:03:41 +0900
Subject: [PATCH 097/553] v1.00

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 787dcd51..224e612d 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x099; /* 0xABC = A.BC */
+static const int version = 0x100; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From 675e87005df90ccf4077d0148181e4e6fed069bb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 30 Sep 2019 16:04:15 +0900
Subject: [PATCH 098/553] [go] add G1MulVec, G2MulVec, MillerLoopVec

---
 ffi/go/mcl/mcl.go      | 24 +++++++++++++++
 ffi/go/mcl/mcl_test.go | 66 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index fbc439b4..c00bc8ef 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -311,6 +311,14 @@ func G1Mul(out *G1, x *G1, y *Fr) {
 	C.mclBnG1_mul(out.getPointer(), x.getPointer(), y.getPointer())
 }
 
+// G1MulVec -- multi scalar multiplication out = sum mul(xVec[i], yVec[i])
+func G1MulVec(out *G1, xVec []G1, yVec []Fr) {
+	if len(xVec) != len (yVec) {
+		panic("xVec and yVec have the same size")
+	}
+	C.mclBnG1_mulVec(out.getPointer(), (*C.mclBnG1)(unsafe.Pointer(&xVec[0])), (*C.mclBnFr)(unsafe.Pointer(&yVec[0])), (C.size_t)(len(xVec)))
+}
+
 // G1MulCT -- constant time (depending on bit lengh of y)
 func G1MulCT(out *G1, x *G1, y *Fr) {
 	C.mclBnG1_mulCT(out.getPointer(), x.getPointer(), y.getPointer())
@@ -421,6 +429,14 @@ func G2Mul(out *G2, x *G2, y *Fr) {
 	C.mclBnG2_mul(out.getPointer(), x.getPointer(), y.getPointer())
 }
 
+// G2MulVec -- multi scalar multiplication out = sum mul(xVec[i], yVec[i])
+func G2MulVec(out *G2, xVec []G2, yVec []Fr) {
+	if len(xVec) != len (yVec) {
+		panic("xVec and yVec have the same size")
+	}
+	C.mclBnG2_mulVec(out.getPointer(), (*C.mclBnG2)(unsafe.Pointer(&xVec[0])), (*C.mclBnFr)(unsafe.Pointer(&yVec[0])), (C.size_t)(len(xVec)))
+}
+
 // GT --
 type GT struct {
 	v C.mclBnGT
@@ -552,6 +568,14 @@ func MillerLoop(out *GT, x *G1, y *G2) {
 	C.mclBn_millerLoop(out.getPointer(), x.getPointer(), y.getPointer())
 }
 
+// MillerLoopVec -- multi pairing
+func MillerLoopVec(out *GT, xVec []G1, yVec []G2) {
+	if len(xVec) != len (yVec) {
+		panic("xVec and yVec have the same size")
+	}
+	C.mclBn_millerLoopVec(out.getPointer(), (*C.mclBnG1)(unsafe.Pointer(&xVec[0])), (*C.mclBnG2)(unsafe.Pointer(&yVec[0])), (C.size_t)(len(xVec)))
+}
+
 // GetUint64NumToPrecompute --
 func GetUint64NumToPrecompute() int {
 	return int(C.mclBn_getUint64NumToPrecompute())
diff --git a/ffi/go/mcl/mcl_test.go b/ffi/go/mcl/mcl_test.go
index 16bb6910..aa9b48b6 100644
--- a/ffi/go/mcl/mcl_test.go
+++ b/ffi/go/mcl/mcl_test.go
@@ -71,6 +71,71 @@ func testNegAdd(t *testing.T) {
 	}
 }
 
+func testVecG1(t *testing.T) {
+	N := 50
+	xVec := make([]G1, N)
+	yVec := make([]Fr, N)
+	xVec[0].HashAndMapTo([]byte("aa"))
+	var R1, R2 G1
+	for i := 0; i < N; i++ {
+		if i > 0 {
+			G1Dbl(&xVec[i], &xVec[i - 1])
+		}
+		yVec[i].SetByCSPRNG()
+		G1Mul(&R1, &xVec[i], &yVec[i])
+		G1Add(&R2, &R2, &R1)
+	}
+	G1MulVec(&R1, xVec, yVec)
+	if !R1.IsEqual(&R2) {
+		t.Errorf("wrong G1MulVec")
+	}
+}
+
+func testVecG2(t *testing.T) {
+	N := 50
+	xVec := make([]G2, N)
+	yVec := make([]Fr, N)
+	xVec[0].HashAndMapTo([]byte("aa"))
+	var R1, R2 G2
+	for i := 0; i < N; i++ {
+		if i > 0 {
+			G2Dbl(&xVec[i], &xVec[i - 1])
+		}
+		yVec[i].SetByCSPRNG()
+		G2Mul(&R1, &xVec[i], &yVec[i])
+		G2Add(&R2, &R2, &R1)
+	}
+	G2MulVec(&R1, xVec, yVec)
+	if !R1.IsEqual(&R2) {
+		t.Errorf("wrong G2MulVec")
+	}
+}
+
+func testVecPairing(t *testing.T) {
+	N := 50
+	xVec := make([]G1, N)
+	yVec := make([]G2, N)
+	var e1, e2 GT
+	e1.SetInt64(1)
+	for i := 0; i < N; i++ {
+		xVec[0].HashAndMapTo([]byte("aa"))
+		yVec[0].HashAndMapTo([]byte("aa"))
+		Pairing(&e2, &xVec[i], &yVec[i])
+		GTMul(&e1, &e1, &e2)
+	}
+	MillerLoopVec(&e2, xVec, yVec)
+	FinalExp(&e2, &e2)
+	if !e1.IsEqual(&e2) {
+		t.Errorf("wrong MillerLoopVec")
+	}
+}
+
+func testVec(t *testing.T) {
+	testVecG1(t)
+	testVecG2(t)
+	testVecPairing(t)
+}
+
 func testPairing(t *testing.T) {
 	var a, b, ab Fr
 	err := a.SetString("123", 10)
@@ -138,6 +203,7 @@ func testMcl(t *testing.T, c int) {
 	testHash(t)
 	testNegAdd(t)
 	testPairing(t)
+	testVec(t)
 	testGT(t)
 	testBadPointOfG2(t)
 }

From 4d3f4009cca71209317fccfd067605f0397edabd Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 30 Sep 2019 16:15:19 +0900
Subject: [PATCH 099/553] fix typo

---
 api.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api.md b/api.md
index 1c0f4ea1..70137a15 100644
--- a/api.md
+++ b/api.md
@@ -314,7 +314,7 @@ void mclBnGT_div(mclBnGT *z, const mclBnGT *x, const mclBnGT *y);
 ```
 - use `mclBnGT_invGeneric` for an element in Fp12 - GT.
 
-- NOTE: The following functions does NOT return a GT element because GT is multiplicative group.
+- NOTE: The following functions do NOT return a GT element because GT is multiplicative group.
 
 ```
 void mclBnGT_neg(mclBnGT *y, const mclBnGT *x);

From cf437ac839a4b1679df1c173f015872fc4ad9630 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 30 Sep 2019 17:03:56 +0900
Subject: [PATCH 100/553] fix typo

---
 api.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api.md b/api.md
index 70137a15..0a6f000e 100644
--- a/api.md
+++ b/api.md
@@ -154,7 +154,7 @@ int mclBnFr_setLittleEndianMod(mclBnFr *x, const void *buf, mclSize bufSize);
 ```
 - return 0 if bufSize <= (sizeof(*x) * 8 * 2) else -1
 
-### Get little endian byte sequence corresponding `buf[0..maxBufSize-1]` to `x`
+### Get little endian byte sequence `buf` corresponding to `x`
 ```
 mclSize mclBnFr_getLittleEndian(void *buf, mclSize maxBufSize, const mclBnFr *x);
 mclSize mclBnFp_getLittleEndian(void *buf, mclSize maxBufSize, const mclBnFp *x);

From 92539b0cd7e8711008a52579022c0e06bd78afa7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 5 Oct 2019 14:54:32 +0900
Subject: [PATCH 101/553] cybozu::Stream supports -fno-exceptions

---
 include/cybozu/stream.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/cybozu/stream.hpp b/include/cybozu/stream.hpp
index bc110bdb..6e03bd09 100644
--- a/include/cybozu/stream.hpp
+++ b/include/cybozu/stream.hpp
@@ -9,7 +9,9 @@
 #include <string>
 #include <iosfwd>
 #endif
+#ifndef CYBOZU_DONT_USE_EXCEPTION
 #include <cybozu/exception.hpp>
+#endif
 #include <memory.h>
 
 namespace cybozu {

From 5e02eafe9e5059a8f37e5690566c2ee8b70559e8 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 7 Oct 2019 15:01:13 +0900
Subject: [PATCH 102/553] fix for 32-bit env

---
 include/mcl/ec.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 1e6172e0..b598db69 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -1151,7 +1151,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		/*
 			L = log2(y), w = (L <= 32) ? 3 : (L <= 128) ? 4 : 5;
 		*/
-		const int w = (yn == 1 && *y <= (fp::Unit(1) << 32)) ? 3 : (yn * sizeof(fp::Unit) > 16) ? 5 : 4;
+		const int w = (yn == 1 && *y <= (1ull << 32)) ? 3 : (yn * sizeof(fp::Unit) > 16) ? 5 : 4;
 		const size_t tblSize = size_t(1) << (w - 2);
 		typedef mcl::FixedArray<int8_t, sizeof(EcT::Fp) * 8 + 1> NafArray;
 		NafArray naf;

From b108b31c17ea2c8889f5a0e3ef987b12475f4916 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 11 Oct 2019 10:24:59 +0900
Subject: [PATCH 103/553] disable hash if DONT_USE_EXCEPTION

---
 include/mcl/ec.hpp | 2 ++
 include/mcl/fp.hpp | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index b598db69..195394db 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -1491,6 +1491,7 @@ void initCurve(int curveType, Ec *P = 0, mcl::fp::Mode mode = fp::FP_AUTO, mcl::
 
 } // mcl
 
+#ifndef CYBOZU_DONT_USE_EXCEPTION
 #ifdef CYBOZU_USE_BOOST
 namespace mcl {
 template<class Fp>
@@ -1517,6 +1518,7 @@ struct hash<mcl::EcT<Fp> > {
 
 CYBOZU_NAMESPACE_TR1_END } // std
 #endif
+#endif
 
 #ifdef _MSC_VER
 	#pragma warning(pop)
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 62d592f7..a1f4b2cf 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -688,6 +688,7 @@ template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sqr)(FpT& y,
 
 } // mcl
 
+#ifndef CYBOZU_DONT_USE_EXCEPTION
 #ifdef CYBOZU_USE_BOOST
 namespace mcl {
 
@@ -711,6 +712,7 @@ struct hash<mcl::FpT<tag, maxBitSize> > {
 
 CYBOZU_NAMESPACE_TR1_END } // std::tr1
 #endif
+#endif
 
 #ifdef _MSC_VER
 	#pragma warning(pop)

From 402b825fc08ef7022f757c1b2b0d8fed4bdf5e0c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 11 Oct 2019 10:31:07 +0900
Subject: [PATCH 104/553] go fmt

---
 ffi/go/mcl/init.go     | 3 +--
 ffi/go/mcl/mcl.go      | 8 ++++----
 ffi/go/mcl/mcl_test.go | 4 ++--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/ffi/go/mcl/init.go b/ffi/go/mcl/init.go
index aaa7a7cd..89475ebb 100644
--- a/ffi/go/mcl/init.go
+++ b/ffi/go/mcl/init.go
@@ -11,6 +11,7 @@ package mcl
 */
 import "C"
 import "fmt"
+
 // Init --
 // call this function before calling all the other operations
 // this function is not thread safe
@@ -21,5 +22,3 @@ func Init(curve int) error {
 	}
 	return nil
 }
-
-
diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index c00bc8ef..4f4a5d42 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -313,7 +313,7 @@ func G1Mul(out *G1, x *G1, y *Fr) {
 
 // G1MulVec -- multi scalar multiplication out = sum mul(xVec[i], yVec[i])
 func G1MulVec(out *G1, xVec []G1, yVec []Fr) {
-	if len(xVec) != len (yVec) {
+	if len(xVec) != len(yVec) {
 		panic("xVec and yVec have the same size")
 	}
 	C.mclBnG1_mulVec(out.getPointer(), (*C.mclBnG1)(unsafe.Pointer(&xVec[0])), (*C.mclBnFr)(unsafe.Pointer(&yVec[0])), (C.size_t)(len(xVec)))
@@ -431,7 +431,7 @@ func G2Mul(out *G2, x *G2, y *Fr) {
 
 // G2MulVec -- multi scalar multiplication out = sum mul(xVec[i], yVec[i])
 func G2MulVec(out *G2, xVec []G2, yVec []Fr) {
-	if len(xVec) != len (yVec) {
+	if len(xVec) != len(yVec) {
 		panic("xVec and yVec have the same size")
 	}
 	C.mclBnG2_mulVec(out.getPointer(), (*C.mclBnG2)(unsafe.Pointer(&xVec[0])), (*C.mclBnFr)(unsafe.Pointer(&yVec[0])), (C.size_t)(len(xVec)))
@@ -568,9 +568,9 @@ func MillerLoop(out *GT, x *G1, y *G2) {
 	C.mclBn_millerLoop(out.getPointer(), x.getPointer(), y.getPointer())
 }
 
-// MillerLoopVec -- multi pairing
+// MillerLoopVec -- multi pairings ; out = prod_i e(xVec[i], yVec[i])
 func MillerLoopVec(out *GT, xVec []G1, yVec []G2) {
-	if len(xVec) != len (yVec) {
+	if len(xVec) != len(yVec) {
 		panic("xVec and yVec have the same size")
 	}
 	C.mclBn_millerLoopVec(out.getPointer(), (*C.mclBnG1)(unsafe.Pointer(&xVec[0])), (*C.mclBnG2)(unsafe.Pointer(&yVec[0])), (C.size_t)(len(xVec)))
diff --git a/ffi/go/mcl/mcl_test.go b/ffi/go/mcl/mcl_test.go
index aa9b48b6..7146a79f 100644
--- a/ffi/go/mcl/mcl_test.go
+++ b/ffi/go/mcl/mcl_test.go
@@ -79,7 +79,7 @@ func testVecG1(t *testing.T) {
 	var R1, R2 G1
 	for i := 0; i < N; i++ {
 		if i > 0 {
-			G1Dbl(&xVec[i], &xVec[i - 1])
+			G1Dbl(&xVec[i], &xVec[i-1])
 		}
 		yVec[i].SetByCSPRNG()
 		G1Mul(&R1, &xVec[i], &yVec[i])
@@ -99,7 +99,7 @@ func testVecG2(t *testing.T) {
 	var R1, R2 G2
 	for i := 0; i < N; i++ {
 		if i > 0 {
-			G2Dbl(&xVec[i], &xVec[i - 1])
+			G2Dbl(&xVec[i], &xVec[i-1])
 		}
 		yVec[i].SetByCSPRNG()
 		G2Mul(&R1, &xVec[i], &yVec[i])

From 92681e984e34ebe1d3fd3e5709e43e9221ddb087 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 15 Oct 2019 15:51:49 +0900
Subject: [PATCH 105/553] [go] add some operations in bn.h

---
 ffi/go/mcl/mcl.go | 315 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 313 insertions(+), 2 deletions(-)

diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index 4f4a5d42..7af4f125 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -125,6 +125,16 @@ func (x *Fr) SetLittleEndian(buf []byte) error {
 	return nil
 }
 
+// SetLittleEndianMod --
+func (x *Fr) SetLittleEndianMod(buf []byte) error {
+	// #nosec
+	err := C.mclBnFr_setLittleEndianMod(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if err != 0 {
+		return fmt.Errorf("err mclBnFr_setLittleEndianMod %x", err)
+	}
+	return nil
+}
+
 // IsEqual --
 func (x *Fr) IsEqual(rhs *Fr) bool {
 	return C.mclBnFr_isEqual(x.getPointer(), rhs.getPointer()) == 1
@@ -135,11 +145,26 @@ func (x *Fr) IsZero() bool {
 	return C.mclBnFr_isZero(x.getPointer()) == 1
 }
 
+// IsValid --
+func (x *Fr) IsValid() bool {
+	return C.mclBnFr_isValid(x.getPointer()) == 1
+}
+
 // IsOne --
 func (x *Fr) IsOne() bool {
 	return C.mclBnFr_isOne(x.getPointer()) == 1
 }
 
+// IsOdd --
+func (x *Fr) IsOdd() bool {
+	return C.mclBnFr_isOdd(x.getPointer()) == 1
+}
+
+// IsNegative -- true if x >= (r + 1) / 2
+func (x *Fr) IsNegative() bool {
+	return C.mclBnFr_isNegative(x.getPointer()) == 1
+}
+
 // SetByCSPRNG --
 func (x *Fr) SetByCSPRNG() {
 	err := C.mclBnFr_setByCSPRNG(x.getPointer())
@@ -186,6 +211,11 @@ func FrInv(out *Fr, x *Fr) {
 	C.mclBnFr_inv(out.getPointer(), x.getPointer())
 }
 
+// FrSqr --
+func FrSqr(out *Fr, x *Fr) {
+	C.mclBnFr_sqr(out.getPointer(), x.getPointer())
+}
+
 // FrAdd --
 func FrAdd(out *Fr, x *Fr, y *Fr) {
 	C.mclBnFr_add(out.getPointer(), x.getPointer(), y.getPointer())
@@ -206,9 +236,278 @@ func FrDiv(out *Fr, x *Fr, y *Fr) {
 	C.mclBnFr_div(out.getPointer(), x.getPointer(), y.getPointer())
 }
 
+// FrSquareRoot --
+func FrSquareRoot(out *Fr, x *Fr) bool {
+	return C.mclBnFr_squareRoot(out.getPointer(), x.getPointer()) == 0
+}
+
+// Fp --
+type Fp struct {
+	v C.mclBnFp
+}
+
+// getPointer --
+func (x *Fp) getPointer() (p *C.mclBnFp) {
+	// #nosec
+	return (*C.mclBnFp)(unsafe.Pointer(x))
+}
+
+// Clear --
+func (x *Fp) Clear() {
+	// #nosec
+	C.mclBnFp_clear(x.getPointer())
+}
+
+// SetInt64 --
+func (x *Fp) SetInt64(v int64) {
+	// #nosec
+	C.mclBnFp_setInt(x.getPointer(), C.int64_t(v))
+}
+
+// SetString --
+func (x *Fp) SetString(s string, base int) error {
+	buf := []byte(s)
+	// #nosec
+	err := C.mclBnFp_setStr(x.getPointer(), (*C.char)(unsafe.Pointer(&buf[0])), C.size_t(len(buf)), C.int(base))
+	if err != 0 {
+		return fmt.Errorf("err mclBnFp_setStr %x", err)
+	}
+	return nil
+}
+
+// Deserialize --
+func (x *Fp) Deserialize(buf []byte) error {
+	// #nosec
+	err := C.mclBnFp_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if err == 0 {
+		return fmt.Errorf("err mclBnFp_deserialize %x", buf)
+	}
+	return nil
+}
+
+// SetLittleEndian --
+func (x *Fp) SetLittleEndian(buf []byte) error {
+	// #nosec
+	err := C.mclBnFp_setLittleEndian(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if err != 0 {
+		return fmt.Errorf("err mclBnFp_setLittleEndian %x", err)
+	}
+	return nil
+}
+
+// SetLittleEndianMod --
+func (x *Fp) SetLittleEndianMod(buf []byte) error {
+	// #nosec
+	err := C.mclBnFp_setLittleEndianMod(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if err != 0 {
+		return fmt.Errorf("err mclBnFp_setLittleEndianMod %x", err)
+	}
+	return nil
+}
+
+// IsEqual --
+func (x *Fp) IsEqual(rhs *Fp) bool {
+	return C.mclBnFp_isEqual(x.getPointer(), rhs.getPointer()) == 1
+}
+
+// IsZero --
+func (x *Fp) IsZero() bool {
+	return C.mclBnFp_isZero(x.getPointer()) == 1
+}
+
+// IsValid --
+func (x *Fp) IsValid() bool {
+	return C.mclBnFp_isValid(x.getPointer()) == 1
+}
+
+// IsOne --
+func (x *Fp) IsOne() bool {
+	return C.mclBnFp_isOne(x.getPointer()) == 1
+}
+
+// IsOdd --
+func (x *Fp) IsOdd() bool {
+	return C.mclBnFp_isOdd(x.getPointer()) == 1
+}
+
+// IsNegative -- true if x >= (p + 1) / 2
+func (x *Fp) IsNegative() bool {
+	return C.mclBnFp_isNegative(x.getPointer()) == 1
+}
+
+// SetByCSPRNG --
+func (x *Fp) SetByCSPRNG() {
+	err := C.mclBnFp_setByCSPRNG(x.getPointer())
+	if err != 0 {
+		panic("err mclBnFp_setByCSPRNG")
+	}
+}
+
+// SetHashOf --
+func (x *Fp) SetHashOf(buf []byte) bool {
+	// #nosec
+	return C.mclBnFp_setHashOf(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf))) == 0
+}
+
+// GetString --
+func (x *Fp) GetString(base int) string {
+	buf := make([]byte, 2048)
+	// #nosec
+	n := C.mclBnFp_getStr((*C.char)(unsafe.Pointer(&buf[0])), C.size_t(len(buf)), x.getPointer(), C.int(base))
+	if n == 0 {
+		panic("err mclBnFp_getStr")
+	}
+	return string(buf[:n])
+}
+
+// Serialize --
+func (x *Fp) Serialize() []byte {
+	buf := make([]byte, 2048)
+	// #nosec
+	n := C.mclBnFp_serialize(unsafe.Pointer(&buf[0]), C.size_t(len(buf)), x.getPointer())
+	if n == 0 {
+		panic("err mclBnFp_serialize")
+	}
+	return buf[:n]
+}
+
+// FpNeg --
+func FpNeg(out *Fp, x *Fp) {
+	C.mclBnFp_neg(out.getPointer(), x.getPointer())
+}
+
+// FpInv --
+func FpInv(out *Fp, x *Fp) {
+	C.mclBnFp_inv(out.getPointer(), x.getPointer())
+}
+
+// FpSqr --
+func FpSqr(out *Fp, x *Fp) {
+	C.mclBnFp_sqr(out.getPointer(), x.getPointer())
+}
+
+// FpAdd --
+func FpAdd(out *Fp, x *Fp, y *Fp) {
+	C.mclBnFp_add(out.getPointer(), x.getPointer(), y.getPointer())
+}
+
+// FpSub --
+func FpSub(out *Fp, x *Fp, y *Fp) {
+	C.mclBnFp_sub(out.getPointer(), x.getPointer(), y.getPointer())
+}
+
+// FpMul --
+func FpMul(out *Fp, x *Fp, y *Fp) {
+	C.mclBnFp_mul(out.getPointer(), x.getPointer(), y.getPointer())
+}
+
+// FpDiv --
+func FpDiv(out *Fp, x *Fp, y *Fp) {
+	C.mclBnFp_div(out.getPointer(), x.getPointer(), y.getPointer())
+}
+
+// FpSquareRoot --
+func FpSquareRoot(out *Fp, x *Fp) bool {
+	return C.mclBnFp_squareRoot(out.getPointer(), x.getPointer()) == 0
+}
+
+// Fp2 --
+type Fp2 struct {
+	d [2]Fp
+}
+// getPointer --
+func (x *Fp2) getPointer() (p *C.mclBnFp2) {
+	// #nosec
+	return (*C.mclBnFp2)(unsafe.Pointer(x))
+}
+
+// Clear --
+func (x *Fp2) Clear() {
+	// #nosec
+	C.mclBnFp2_clear(x.getPointer())
+}
+
+// Deserialize --
+func (x *Fp2) Deserialize(buf []byte) error {
+	// #nosec
+	err := C.mclBnFp2_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if err == 0 {
+		return fmt.Errorf("err mclBnFp2_deserialize %x", buf)
+	}
+	return nil
+}
+
+// IsEqual --
+func (x *Fp2) IsEqual(rhs *Fp2) bool {
+	return C.mclBnFp2_isEqual(x.getPointer(), rhs.getPointer()) == 1
+}
+
+// IsZero --
+func (x *Fp2) IsZero() bool {
+	return C.mclBnFp2_isZero(x.getPointer()) == 1
+}
+
+// IsOne --
+func (x *Fp2) IsOne() bool {
+	return C.mclBnFp2_isOne(x.getPointer()) == 1
+}
+
+// Serialize --
+func (x *Fp2) Serialize() []byte {
+	buf := make([]byte, 2048)
+	// #nosec
+	n := C.mclBnFp2_serialize(unsafe.Pointer(&buf[0]), C.size_t(len(buf)), x.getPointer())
+	if n == 0 {
+		panic("err mclBnFp2_serialize")
+	}
+	return buf[:n]
+}
+
+// Fp2Neg --
+func Fp2Neg(out *Fp2, x *Fp2) {
+	C.mclBnFp2_neg(out.getPointer(), x.getPointer())
+}
+
+// Fp2Inv --
+func Fp2Inv(out *Fp2, x *Fp2) {
+	C.mclBnFp2_inv(out.getPointer(), x.getPointer())
+}
+
+// Fp2Sqr --
+func Fp2Sqr(out *Fp2, x *Fp2) {
+	C.mclBnFp2_sqr(out.getPointer(), x.getPointer())
+}
+
+// Fp2Add --
+func Fp2Add(out *Fp2, x *Fp2, y *Fp2) {
+	C.mclBnFp2_add(out.getPointer(), x.getPointer(), y.getPointer())
+}
+
+// Fp2Sub --
+func Fp2Sub(out *Fp2, x *Fp2, y *Fp2) {
+	C.mclBnFp2_sub(out.getPointer(), x.getPointer(), y.getPointer())
+}
+
+// Fp2Mul --
+func Fp2Mul(out *Fp2, x *Fp2, y *Fp2) {
+	C.mclBnFp2_mul(out.getPointer(), x.getPointer(), y.getPointer())
+}
+
+// Fp2Div --
+func Fp2Div(out *Fp2, x *Fp2, y *Fp2) {
+	C.mclBnFp2_div(out.getPointer(), x.getPointer(), y.getPointer())
+}
+
+// Fp2SquareRoot --
+func Fp2SquareRoot(out *Fp2, x *Fp2) bool {
+	return C.mclBnFp2_squareRoot(out.getPointer(), x.getPointer()) == 0
+}
+
 // G1 --
 type G1 struct {
-	v C.mclBnG1
+	x Fp
+	y Fp
+	z Fp
 }
 
 // getPointer --
@@ -254,6 +553,11 @@ func (x *G1) IsZero() bool {
 	return C.mclBnG1_isZero(x.getPointer()) == 1
 }
 
+// IsValid --
+func (x *G1) IsValid() bool {
+	return C.mclBnG1_isValid(x.getPointer()) == 1
+}
+
 // HashAndMapTo --
 func (x *G1) HashAndMapTo(buf []byte) error {
 	// #nosec
@@ -326,7 +630,9 @@ func G1MulCT(out *G1, x *G1, y *Fr) {
 
 // G2 --
 type G2 struct {
-	v C.mclBnG2
+	x Fp2
+	y Fp2
+	z Fp2
 }
 
 // getPointer --
@@ -372,6 +678,11 @@ func (x *G2) IsZero() bool {
 	return C.mclBnG2_isZero(x.getPointer()) == 1
 }
 
+// IsValid --
+func (x *G2) IsValid() bool {
+	return C.mclBnG2_isValid(x.getPointer()) == 1
+}
+
 // HashAndMapTo --
 func (x *G2) HashAndMapTo(buf []byte) error {
 	// #nosec

From 26c565176d4b4fb388e487b713970488fc92a87a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 16 Oct 2019 21:57:53 +0900
Subject: [PATCH 106/553] [go] public member of G1/G2

---
 ffi/go/mcl/mcl.go | 53 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index 7af4f125..c13d3f92 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -19,18 +19,24 @@ const CurveFp382_1 = C.mclBn_CurveFp382_1
 // CurveFp382_2 -- 382 bit curve 2
 const CurveFp382_2 = C.mclBn_CurveFp382_2
 
-// BLS12_381
+// BLS12_381 --
 const BLS12_381 = C.MCL_BLS12_381
 
-// IoSerializeHexStr
+// IoSerializeHexStr --
 const IoSerializeHexStr = C.MCLBN_IO_SERIALIZE_HEX_STR
 
-// GetFrUnitSize() --
+// IO_EC_AFFINE --
+const IO_EC_AFFINE = C.MCLBN_IO_EC_AFFINE
+
+// IO_EC_PROJ --
+const IO_EC_PROJ = C.MCLBN_IO_EC_PROJ
+
+// GetFrUnitSize --
 func GetFrUnitSize() int {
 	return int(C.MCLBN_FR_UNIT_SIZE)
 }
 
-// GetFpUnitSize() --
+// GetFpUnitSize --
 // same as GetMaxOpUnitSize()
 func GetFpUnitSize() int {
 	return int(C.MCLBN_FP_UNIT_SIZE)
@@ -71,6 +77,18 @@ func GetFieldOrder() string {
 	return string(buf[:n])
 }
 
+// SetETHserialization --
+func SetETHserialization(enable bool) {
+	var v C.int
+	if enable {
+		v = 1
+	} else {
+		v = 0
+	}
+	// #nosec
+	C.mclBn_setETHserialization(v)
+}
+
 // Fr --
 type Fr struct {
 	v C.mclBnFr
@@ -411,10 +429,11 @@ func FpSquareRoot(out *Fp, x *Fp) bool {
 	return C.mclBnFp_squareRoot(out.getPointer(), x.getPointer()) == 0
 }
 
-// Fp2 --
+// Fp2 -- x = D[0] + D[1] i where i^2 = -1
 type Fp2 struct {
-	d [2]Fp
+	D [2]Fp
 }
+
 // getPointer --
 func (x *Fp2) getPointer() (p *C.mclBnFp2) {
 	// #nosec
@@ -505,9 +524,9 @@ func Fp2SquareRoot(out *Fp2, x *Fp2) bool {
 
 // G1 --
 type G1 struct {
-	x Fp
-	y Fp
-	z Fp
+	X Fp
+	Y Fp
+	Z Fp
 }
 
 // getPointer --
@@ -590,6 +609,11 @@ func (x *G1) Serialize() []byte {
 	return buf[:n]
 }
 
+// G1Normalize --
+func G1Normalize(out *G1, x *G1) {
+	C.mclBnG1_normalize(out.getPointer(), x.getPointer())
+}
+
 // G1Neg --
 func G1Neg(out *G1, x *G1) {
 	C.mclBnG1_neg(out.getPointer(), x.getPointer())
@@ -630,9 +654,9 @@ func G1MulCT(out *G1, x *G1, y *Fr) {
 
 // G2 --
 type G2 struct {
-	x Fp2
-	y Fp2
-	z Fp2
+	X Fp2
+	Y Fp2
+	Z Fp2
 }
 
 // getPointer --
@@ -715,6 +739,11 @@ func (x *G2) Serialize() []byte {
 	return buf[:n]
 }
 
+// G2Normalize --
+func G2Normalize(out *G2, x *G2) {
+	C.mclBnG2_normalize(out.getPointer(), x.getPointer())
+}
+
 // G2Neg --
 func G2Neg(out *G2, x *G2) {
 	C.mclBnG2_neg(out.getPointer(), x.getPointer())

From 3013b7d90b6f3b2676c7893811e7b75cda36dfa5 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Oct 2019 11:46:32 +0900
Subject: [PATCH 107/553] add dependency of bn_c384_256.cpp

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 15a90499..7c4a0a84 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ include common.mk
 LIB_DIR=lib
 OBJ_DIR=obj
 EXE_DIR=bin
-SRC_SRC=fp.cpp bn_c256.cpp bn_c384.cpp bn_c512.cpp she_c256.cpp
+SRC_SRC=fp.cpp bn_c256.cpp bn_c384.cpp bn_c384_256.cpp bn_c512.cpp she_c256.cpp
 TEST_SRC=fp_test.cpp ec_test.cpp fp_util_test.cpp window_method_test.cpp elgamal_test.cpp fp_tower_test.cpp gmp_test.cpp bn_test.cpp bn384_test.cpp glv_test.cpp paillier_test.cpp she_test.cpp vint_test.cpp bn512_test.cpp ecdsa_test.cpp conversion_test.cpp
 TEST_SRC+=bn_c256_test.cpp bn_c384_test.cpp bn_c384_256_test.cpp bn_c512_test.cpp
 TEST_SRC+=she_c256_test.cpp she_c384_test.cpp she_c384_256_test.cpp

From 432b5457d411b4696f918b62500f5ec8b851d0b7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Oct 2019 14:43:25 +0900
Subject: [PATCH 108/553] split calcGi insto mapToEc and mulByCofactor

---
 include/mcl/bn.hpp | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 2d9eed59..3a8d61b8 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -519,13 +519,18 @@ struct MapTo {
 			initBLS12(z);
 		}
 	}
-	bool calcG1(G1& P, const Fp& t) const
+	template<class G, class F>
+	bool mapToEc(G& P, const F& t) const
 	{
 		if (useNaiveMapTo_) {
-			naiveMapTo<G1, Fp>(P, t);
+			naiveMapTo<G, F>(P, t);
 		} else {
-			if (!calcBN<G1, Fp>(P, t)) return false;
+			if (!calcBN<G, F>(P, t)) return false;
 		}
+		return true;
+	}
+	void mulByCofactor(G1& P) const
+	{
 		switch (type_) {
 		case BNtype:
 			// no subgroup
@@ -535,18 +540,9 @@ struct MapTo {
 			break;
 		}
 		assert(P.isValid());
-		return true;
 	}
-	/*
-		get the element in G2 by multiplying the cofactor
-	*/
-	bool calcG2(G2& P, const Fp2& t) const
+	void mulByCofactor(G2& P) const
 	{
-		if (useNaiveMapTo_) {
-			naiveMapTo<G2, Fp2>(P, t);
-		} else {
-			if (!calcBN<G2, Fp2>(P, t)) return false;
-		}
 		switch(type_) {
 		case BNtype:
 			mulByCofactorBN(P, P);
@@ -556,6 +552,12 @@ struct MapTo {
 			break;
 		}
 		assert(P.isValid());
+	}
+	template<class G, class F>
+	bool calc(G& P, const F& t) const
+	{
+		if (!mapToEc(P, t)) return false;
+		mulByCofactor(P);
 		return true;
 	}
 };
@@ -2066,8 +2068,8 @@ inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
 	}
 }
 
-inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calcG1(P, x); }
-inline void mapToG2(bool *pb, G2& P, const Fp2& x) { *pb = BN::param.mapTo.calcG2(P, x); }
+inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calc(P, x); }
+inline void mapToG2(bool *pb, G2& P, const Fp2& x) { *pb = BN::param.mapTo.calc(P, x); }
 #ifndef CYBOZU_DONT_USE_EXCEPTION
 inline void mapToG1(G1& P, const Fp& x)
 {

From 0aface885b6e2cef4c580c3d61085cb523062ced Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Oct 2019 17:14:46 +0900
Subject: [PATCH 109/553] ETH2.0 spec

---
 include/mcl/bn.h               | 10 +++++--
 include/mcl/bn.hpp             | 53 +++++++++++++++++++++++++++-------
 include/mcl/impl/bn_c_impl.hpp |  9 ++++--
 3 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 6e5d1ff1..78f8f271 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -191,10 +191,16 @@ MCLBN_DLL_API mclSize mclBn_getFieldOrder(char *buf, mclSize maxBufSize);
 
 /*
 	set ETH serialization mode for BLS12-381
-	@param ETHserialization [in] 1:enable,  0:disable
+	@param enable [in] 1:enable,  0:disable
 	@note ignore the flag if curve is not BLS12-381
 */
-MCLBN_DLL_API void mclBn_setETHserialization(int ETHserialization);
+MCLBN_DLL_API void mclBn_setETHserialization(int enable);
+/*
+	use mapToGi according to
+	https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#modular_squareroot
+*/
+MCLBN_DLL_API void mclBn_setETHmaptTo(int enable);
+
 ////////////////////////////////////////////////
 /*
 	deserialize
diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 3a8d61b8..b1b2cf95 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -326,6 +326,7 @@ struct MapTo {
 	mpz_class cofactor_;
 	int type_;
 	bool useNaiveMapTo_;
+	bool useETHsquareRoot_;
 
 	int legendre(bool *pb, const Fp& x) const
 	{
@@ -495,6 +496,23 @@ struct MapTo {
 		(void)b;
 		c2_ = (c1_ - 1) / 2;
 	}
+	// enable if standard Ec
+	void setNaiveMapTo(bool enable)
+	{
+		if (type_ == STD_ECtype) {
+			useNaiveMapTo_ = true;
+		} else {
+			useNaiveMapTo_ = enable;
+		}
+	}
+	void setETHsquareRoot(bool enable)
+	{
+		if (type_ == BLS12type) {
+			useETHsquareRoot_ = enable;
+		} else {
+			useETHsquareRoot_ = false;
+		}
+	}
 	/*
 		if type == STD_ECtype, then cofactor, z are not used.
 	*/
@@ -505,14 +523,8 @@ struct MapTo {
 		} else {
 			type_ = STD_ECtype;
 		}
-		if (type_ == STD_ECtype) {
-			useNaiveMapTo_ = true;
-		} else {
-			useNaiveMapTo_ = false;
-		}
-#ifdef MCL_USE_OLD_MAPTO_FOR_BLS12
-		if (type == BLS12type) useNaiveMapTo_ = true;
-#endif
+		setNaiveMapTo(false);
+		setETHsquareRoot(false);
 		if (type_ == BNtype) {
 			initBN(cofactor, z, curveType);
 		} else if (type_ == BLS12type) {
@@ -553,13 +565,26 @@ struct MapTo {
 		}
 		assert(P.isValid());
 	}
-	template<class G, class F>
-	bool calc(G& P, const F& t) const
+	bool calc(G1& P, const Fp& t) const
 	{
 		if (!mapToEc(P, t)) return false;
 		mulByCofactor(P);
 		return true;
 	}
+	bool calc(G2& P, const Fp2& t) const
+	{
+		if (!mapToEc(P, t)) return false;
+		if (useETHsquareRoot_) {
+			Fp2 negY;
+			Fp2::neg(negY, P.y);
+			int cmp = Fp::compare(P.y.b, negY.b);
+			if (!(cmp > 0 || (cmp == 0 && P.y.a > negY.a))) {
+				P.y = negY;
+			}
+		}
+		mulByCofactor(P);
+		return true;
+	}
 };
 
 
@@ -2068,6 +2093,14 @@ inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
 	}
 }
 
+inline void setETHmapTo(bool enable)
+{
+	local::StaticVar<>::param.mapTo.setNaiveMapTo(enable);
+}
+inline void setETHsquareRoot(bool enable)
+{
+	local::StaticVar<>::param.mapTo.setETHsquareRoot(enable);
+}
 inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calc(P, x); }
 inline void mapToG2(bool *pb, G2& P, const Fp2& x) { *pb = BN::param.mapTo.calc(P, x); }
 #ifndef CYBOZU_DONT_USE_EXCEPTION
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 51112a70..1d8fe043 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -113,9 +113,14 @@ mclSize mclBn_getFieldOrder(char *buf, mclSize maxBufSize)
 	return Fp::getModulo(buf, maxBufSize);
 }
 
-void mclBn_setETHserialization(int ETHserialization)
+void mclBn_setETHserialization(int enable)
 {
-	Fp::setETHserialization(ETHserialization == 1);
+	Fp::setETHserialization(enable == 1);
+}
+
+void mclBn_setETHmaptTo(int enable)
+{
+	setETHmapTo(enable == 1);
 }
 
 ////////////////////////////////////////////////

From 8c56aed56c1af194dd22cbaa3fd1210ecbd7a9cd Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 21 Oct 2019 20:52:42 +0900
Subject: [PATCH 110/553] [go] add DeserializeUncompressed

---
 ffi/go/mcl/mcl.go      | 143 ++++++++++++++++++++++++++++++++++++++---
 ffi/go/mcl/mcl_test.go |  39 +++++++++++
 2 files changed, 173 insertions(+), 9 deletions(-)

diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index c13d3f92..4134d8ff 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -53,6 +53,26 @@ func GetOpUnitSize() int {
 	return int(C.mclBn_getOpUnitSize())
 }
 
+// GetFrByteSize -- the serialized size of Fr
+func GetFrByteSize() int {
+	return int(C.mclBn_getFrByteSize())
+}
+
+// GetFpByteSize -- the serialized size of Fp
+func GetFpByteSize() int {
+	return int(C.mclBn_getFpByteSize())
+}
+
+// GetG1ByteSize -- the serialized size of G1
+func GetG1ByteSize() int {
+	return GetFpByteSize()
+}
+
+// GetG2ByteSize -- the serialized size of G2
+func GetG2ByteSize() int {
+	return GetFpByteSize() * 2
+}
+
 // GetCurveOrder --
 // return the order of G1
 func GetCurveOrder() string {
@@ -77,16 +97,30 @@ func GetFieldOrder() string {
 	return string(buf[:n])
 }
 
-// SetETHserialization --
-func SetETHserialization(enable bool) {
-	var v C.int
-	if enable {
-		v = 1
+func bool2Cint(b bool) C.int {
+	if b {
+		return 1
 	} else {
-		v = 0
+		return 0
 	}
+}
+
+// VerifyOrderG1 -- verify order if SetString/Deserialize are called
+func VerifyOrderG1(doVerify bool) {
 	// #nosec
-	C.mclBn_setETHserialization(v)
+	C.mclBn_verifyOrderG1(bool2Cint(doVerify))
+}
+
+// VerifyOrderG2 -- verify order if SetString/Deserialize are called
+func VerifyOrderG2(doVerify bool) {
+	// #nosec
+	C.mclBn_verifyOrderG2(bool2Cint(doVerify))
+}
+
+// SetETHserialization --
+func SetETHserialization(enable bool) {
+	// #nosec
+	C.mclBn_setETHserialization(bool2Cint(enable))
 }
 
 // Fr --
@@ -562,6 +596,26 @@ func (x *G1) Deserialize(buf []byte) error {
 	return nil
 }
 
+// DeserializeUncompressed -- x.Deserialize() + y.Deserialize()
+func (x *G1) DeserializeUncompressed(buf []byte) error {
+	// #nosec
+	var n = C.mclBnFp_deserialize(x.X.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if n == 0 {
+		return fmt.Errorf("err UncompressedDeserialize X %x", buf)
+	}
+	buf = buf[n:]
+	// #nosec
+	n = C.mclBnFp_deserialize(x.Y.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if n == 0 {
+		return fmt.Errorf("err UncompressedDeserialize Y %x", buf)
+	}
+	x.Z.SetInt64(1)
+	if !x.IsValid() {
+		return fmt.Errorf("err invalid point")
+	}
+	return nil
+}
+
 // IsEqual --
 func (x *G1) IsEqual(rhs *G1) bool {
 	return C.mclBnG1_isEqual(x.getPointer(), rhs.getPointer()) == 1
@@ -577,6 +631,11 @@ func (x *G1) IsValid() bool {
 	return C.mclBnG1_isValid(x.getPointer()) == 1
 }
 
+// IsValidOrder --
+func (x *G1) IsValidOrder() bool {
+	return C.mclBnG1_isValidOrder(x.getPointer()) == 1
+}
+
 // HashAndMapTo --
 func (x *G1) HashAndMapTo(buf []byte) error {
 	// #nosec
@@ -600,13 +659,33 @@ func (x *G1) GetString(base int) string {
 
 // Serialize --
 func (x *G1) Serialize() []byte {
-	buf := make([]byte, 2048)
+	buf := make([]byte, GetG1ByteSize())
 	// #nosec
 	n := C.mclBnG1_serialize(unsafe.Pointer(&buf[0]), C.size_t(len(buf)), x.getPointer())
 	if n == 0 {
 		panic("err mclBnG1_serialize")
 	}
-	return buf[:n]
+	return buf
+}
+
+// SerializeUncompressed -- all zero array if x.IsZero()
+func (x *G1) SerializeUncompressed() []byte {
+	buf := make([]byte, GetG1ByteSize()*2)
+	if x.IsZero() {
+		return buf
+	}
+	var nx G1
+	G1Normalize(&nx, x)
+	// #nosec
+	var n = C.mclBnFp_serialize(unsafe.Pointer(&buf[0]), C.size_t(len(buf)), nx.X.getPointer())
+	if n == 0 {
+		panic("err mclBnFp_serialize X")
+	}
+	n = C.mclBnFp_serialize(unsafe.Pointer(&buf[n]), C.size_t(len(buf))-n, nx.Y.getPointer())
+	if n == 0 {
+		panic("err mclBnFp_serialize Y")
+	}
+	return buf
 }
 
 // G1Normalize --
@@ -692,6 +771,27 @@ func (x *G2) Deserialize(buf []byte) error {
 	return nil
 }
 
+// DeserializeUncompressed -- x.Deserialize() + y.Deserialize()
+func (x *G2) DeserializeUncompressed(buf []byte) error {
+	// #nosec
+	var n = C.mclBnFp2_deserialize(x.X.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if n == 0 {
+		return fmt.Errorf("err UncompressedDeserialize X %x", buf)
+	}
+	buf = buf[n:]
+	// #nosec
+	n = C.mclBnFp2_deserialize(x.Y.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if n == 0 {
+		return fmt.Errorf("err UncompressedDeserialize Y %x", buf)
+	}
+	x.Z.D[0].SetInt64(1)
+	x.Z.D[1].Clear()
+	if !x.IsValid() {
+		return fmt.Errorf("err invalid point")
+	}
+	return nil
+}
+
 // IsEqual --
 func (x *G2) IsEqual(rhs *G2) bool {
 	return C.mclBnG2_isEqual(x.getPointer(), rhs.getPointer()) == 1
@@ -707,6 +807,11 @@ func (x *G2) IsValid() bool {
 	return C.mclBnG2_isValid(x.getPointer()) == 1
 }
 
+// IsValidOrder --
+func (x *G2) IsValidOrder() bool {
+	return C.mclBnG2_isValidOrder(x.getPointer()) == 1
+}
+
 // HashAndMapTo --
 func (x *G2) HashAndMapTo(buf []byte) error {
 	// #nosec
@@ -739,6 +844,26 @@ func (x *G2) Serialize() []byte {
 	return buf[:n]
 }
 
+// SerializeUncompressed -- all zero array if x.IsZero()
+func (x *G2) SerializeUncompressed() []byte {
+	buf := make([]byte, GetG2ByteSize()*2)
+	if x.IsZero() {
+		return buf
+	}
+	var nx G2
+	G2Normalize(&nx, x)
+	// #nosec
+	var n = C.mclBnFp2_serialize(unsafe.Pointer(&buf[0]), C.size_t(len(buf)), nx.X.getPointer())
+	if n == 0 {
+		panic("err mclBnFp2_serialize X")
+	}
+	n = C.mclBnFp2_serialize(unsafe.Pointer(&buf[n]), C.size_t(len(buf))-n, nx.Y.getPointer())
+	if n == 0 {
+		panic("err mclBnFp2_serialize Y")
+	}
+	return buf
+}
+
 // G2Normalize --
 func G2Normalize(out *G2, x *G2) {
 	C.mclBnG2_normalize(out.getPointer(), x.getPointer())
diff --git a/ffi/go/mcl/mcl_test.go b/ffi/go/mcl/mcl_test.go
index 7146a79f..51b147e3 100644
--- a/ffi/go/mcl/mcl_test.go
+++ b/ffi/go/mcl/mcl_test.go
@@ -195,6 +195,44 @@ func testPairing(t *testing.T) {
 	}
 }
 
+func testSerialize(t *testing.T) {
+	var x, xx Fr
+	var y, yy Fp
+	var P, PP G1
+	var Q, QQ G2
+	var e, ee GT
+	x.SetByCSPRNG()
+	y.SetByCSPRNG()
+	P.HashAndMapTo([]byte("abc"))
+	G1Dbl(&P, &P)
+	Q.HashAndMapTo([]byte("abc"))
+	G2Dbl(&Q, &Q)
+	Pairing(&e, &P, &Q)
+	if xx.Deserialize(x.Serialize()) != nil || !x.IsEqual(&xx) {
+		t.Error("Serialize Fr")
+	}
+	if yy.Deserialize(y.Serialize()) != nil || !y.IsEqual(&yy) {
+		t.Error("Serialize Fp")
+	}
+	if PP.Deserialize(P.Serialize()) != nil || !P.IsEqual(&PP) {
+		t.Error("Serialize G1")
+	}
+	if QQ.Deserialize(Q.Serialize()) != nil || !Q.IsEqual(&QQ) {
+		t.Error("Serialize G2")
+	}
+	if ee.Deserialize(e.Serialize()) != nil || !e.IsEqual(&ee) {
+		t.Error("Serialize GT")
+	}
+	G1Dbl(&PP, &PP)
+	if PP.DeserializeUncompressed(P.SerializeUncompressed()) != nil || !P.IsEqual(&PP) {
+		t.Error("SerializeUncompressed G1")
+	}
+	G2Dbl(&QQ, &QQ)
+	if QQ.DeserializeUncompressed(Q.SerializeUncompressed()) != nil || !Q.IsEqual(&QQ) {
+		t.Error("SerializeUncompressed G2")
+	}
+}
+
 func testMcl(t *testing.T, c int) {
 	err := Init(c)
 	if err != nil {
@@ -206,6 +244,7 @@ func testMcl(t *testing.T, c int) {
 	testVec(t)
 	testGT(t)
 	testBadPointOfG2(t)
+	testSerialize(t)
 }
 
 func TestMclMain(t *testing.T) {

From 1b4f4335b0b9b6629897334c95d4239f1de6d28d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 22 Oct 2019 11:57:45 +0900
Subject: [PATCH 111/553] change the format of zero

---
 ffi/go/mcl/mcl.go | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index 4134d8ff..641d39c9 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -596,8 +596,28 @@ func (x *G1) Deserialize(buf []byte) error {
 	return nil
 }
 
+const ZERO_HEADER = 1 << 6
+func isZeroFormat(buf []byte, n int) bool {
+	if len(buf) < n {
+		return false
+	}
+	if buf[0] != ZERO_HEADER {
+		return false
+	}
+	for i := 1; i < n; i++ {
+		if buf[i] != 0 {
+			return false
+		}
+	}
+	return true
+}
+
 // DeserializeUncompressed -- x.Deserialize() + y.Deserialize()
 func (x *G1) DeserializeUncompressed(buf []byte) error {
+	if isZeroFormat(buf, GetG1ByteSize()*2) {
+		x.Clear()
+		return nil
+	}
 	// #nosec
 	var n = C.mclBnFp_deserialize(x.X.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
 	if n == 0 {
@@ -672,6 +692,7 @@ func (x *G1) Serialize() []byte {
 func (x *G1) SerializeUncompressed() []byte {
 	buf := make([]byte, GetG1ByteSize()*2)
 	if x.IsZero() {
+		buf[0] = ZERO_HEADER
 		return buf
 	}
 	var nx G1
@@ -773,6 +794,10 @@ func (x *G2) Deserialize(buf []byte) error {
 
 // DeserializeUncompressed -- x.Deserialize() + y.Deserialize()
 func (x *G2) DeserializeUncompressed(buf []byte) error {
+	if isZeroFormat(buf, GetG2ByteSize()*2) {
+		x.Clear()
+		return nil
+	}
 	// #nosec
 	var n = C.mclBnFp2_deserialize(x.X.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
 	if n == 0 {
@@ -848,6 +873,7 @@ func (x *G2) Serialize() []byte {
 func (x *G2) SerializeUncompressed() []byte {
 	buf := make([]byte, GetG2ByteSize()*2)
 	if x.IsZero() {
+		buf[0] = ZERO_HEADER
 		return buf
 	}
 	var nx G2

From c0abe65cdf5341cf857dd463738fde2285e14fda Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 23 Oct 2019 21:15:24 +0900
Subject: [PATCH 112/553] enable setETHserialization for Fr

---
 ffi/go/mcl/mcl_test.go         | 15 ++++++++++
 include/mcl/bn.h               |  5 ++++
 include/mcl/fp.hpp             |  5 +++-
 include/mcl/impl/bn_c_impl.hpp | 12 ++++++++
 test/bn_c_test.hpp             | 54 +++++++++++++++++++++++++++++++---
 5 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/ffi/go/mcl/mcl_test.go b/ffi/go/mcl/mcl_test.go
index 51b147e3..e128648f 100644
--- a/ffi/go/mcl/mcl_test.go
+++ b/ffi/go/mcl/mcl_test.go
@@ -247,6 +247,20 @@ func testMcl(t *testing.T, c int) {
 	testSerialize(t)
 }
 
+func testETHserialize(t *testing.T) {
+	b := make([]byte, 32)
+	b[0] = 0x12
+	b[1] = 0x34
+	var x Fr
+	SetETHserialization(false)
+	x.Deserialize(b)
+	fmt.Printf("AAA x=%s\n", x.GetString(16))
+
+	SetETHserialization(true)
+	x.Deserialize(b)
+	fmt.Printf("AAA x=%s\n", x.GetString(16))
+}
+
 func TestMclMain(t *testing.T) {
 	t.Logf("GetMaxOpUnitSize() = %d\n", GetMaxOpUnitSize())
 	t.Log("CurveFp254BNb")
@@ -258,5 +272,6 @@ func TestMclMain(t *testing.T) {
 		}
 		t.Log("BLS12_381")
 		testMcl(t, BLS12_381)
+		testETHserialize(t)
 	}
 }
diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 78f8f271..04f88a3a 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -143,6 +143,7 @@ MCLBN_DLL_API int mclBn_getVersion();
 */
 MCLBN_DLL_API int mclBn_init(int curve, int compiledTimeVar);
 
+MCLBN_DLL_API int mclBn_getCurveType(void);
 
 /*
 	pairing : G1 x G2 -> GT
@@ -195,6 +196,10 @@ MCLBN_DLL_API mclSize mclBn_getFieldOrder(char *buf, mclSize maxBufSize);
 	@note ignore the flag if curve is not BLS12-381
 */
 MCLBN_DLL_API void mclBn_setETHserialization(int enable);
+
+// return 1 if ETH serialization mode else 0
+MCLBN_DLL_API int mclBn_getETHserialization(void);
+
 /*
 	use mapToGi according to
 	https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#modular_squareroot
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index a1f4b2cf..9303ddce 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -549,9 +549,12 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 	}
 	static void setETHserialization(bool ETHserialization)
 	{
-		if (getBitSize() != 381) return;
 		isETHserialization_ = ETHserialization;
 	}
+	static bool getETHserialization()
+	{
+		return isETHserialization_;
+	}
 	static inline bool isETHserialization() { return isETHserialization_; }
 	static inline int getIoMode() { return ioMode_; }
 	static inline size_t getModBitLen() { return getBitSize(); }
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 1d8fe043..58f62f37 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -83,6 +83,11 @@ int mclBn_init(int curve, int compiledTimeVar)
 	return b ? 0 : -1;
 }
 
+int mclBn_getCurveType()
+{
+	return mcl::bn::BN::param.cp.curveType;
+}
+
 int mclBn_getOpUnitSize()
 {
 	return (int)Fp::getUnitSize() * sizeof(mcl::fp::Unit) / sizeof(uint64_t);
@@ -115,7 +120,14 @@ mclSize mclBn_getFieldOrder(char *buf, mclSize maxBufSize)
 
 void mclBn_setETHserialization(int enable)
 {
+	if (mclBn_getCurveType() != MCL_BLS12_381) return;
 	Fp::setETHserialization(enable == 1);
+	Fr::setETHserialization(enable == 1);
+}
+
+int mclBn_getETHserialization()
+{
+	return Fp::getETHserialization() ? 1 : 0;
 }
 
 void mclBn_setETHmaptTo(int enable)
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 9c1818bb..85a81f9f 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -25,25 +25,28 @@ CYBOZU_TEST_AUTO(init)
 	CYBOZU_TEST_EQUAL(sizeof(mclBnG1), sizeof(G1));
 	CYBOZU_TEST_EQUAL(sizeof(mclBnG2), sizeof(G2));
 	CYBOZU_TEST_EQUAL(sizeof(mclBnGT), sizeof(Fp12));
+	int curveType;
 
 #if MCLBN_FP_UNIT_SIZE >= 4
 	printf("test BN254 %d\n", MCLBN_FP_UNIT_SIZE);
-	ret = mclBn_init(MCL_BN254, MCLBN_COMPILED_TIME_VAR);
+	curveType = MCL_BN254;
 #endif
 #if MCLBN_FP_UNIT_SIZE >= 6 && MCLBN_FR_UNIT_SIZE >= 4
 	printf("test BLS12_381 %d\n", MCLBN_FP_UNIT_SIZE);
-	ret = mclBn_init(MCL_BLS12_381, MCLBN_COMPILED_TIME_VAR);
+	curveType = MCL_BLS12_381;
 #endif
 #if MCLBN_FP_UNIT_SIZE >= 6 && MCLBN_FR_UNIT_SIZE >= 6
 	printf("test BN381_1 %d\n", MCLBN_FP_UNIT_SIZE);
-	ret = mclBn_init(MCL_BN381_1, MCLBN_COMPILED_TIME_VAR);
+	curveType = MCL_BN381_1;
 #endif
 #if MCLBN_FP_UNIT_SIZE == 8
 	printf("test BN462 %d\n", MCLBN_FP_UNIT_SIZE);
-	ret = mclBn_init(MCL_BN462, MCLBN_COMPILED_TIME_VAR);
+	curveType = MCL_BN462;
 #endif
+	ret = mclBn_init(curveType, MCLBN_COMPILED_TIME_VAR);
 	CYBOZU_TEST_EQUAL(ret, 0);
 	if (ret != 0) exit(1);
+	CYBOZU_TEST_EQUAL(curveType, mclBn_getCurveType());
 }
 
 CYBOZU_TEST_AUTO(Fr)
@@ -612,6 +615,49 @@ CYBOZU_TEST_AUTO(serializeToHexStr)
 	CYBOZU_TEST_EQUAL(n, expectSize);
 }
 
+CYBOZU_TEST_AUTO(ETHserialization)
+{
+	int curveType = mclBn_getCurveType();
+	if (curveType != MCL_BLS12_381) return;
+	int keepETH = mclBn_getETHserialization();
+	char buf[128] = {};
+	char str[128];
+	buf[0] = 0x12;
+	buf[1] = 0x34;
+	size_t n;
+	mclBnFr x;
+	mclBn_setETHserialization(false);
+	n = mclBnFr_deserialize(&x, buf, 32);
+	CYBOZU_TEST_EQUAL(n, 32);
+	n = mclBnFr_getStr(str, sizeof(str), &x, 16);
+	CYBOZU_TEST_ASSERT(n > 0);
+	CYBOZU_TEST_EQUAL(strcmp(str, "3412"), 0);
+
+	mclBn_setETHserialization(true);
+	n = mclBnFr_deserialize(&x, buf, 32);
+	CYBOZU_TEST_EQUAL(n, 32);
+	n = mclBnFr_getStr(str, sizeof(str), &x, 16);
+	CYBOZU_TEST_ASSERT(n > 0);
+	CYBOZU_TEST_EQUAL(strcmp(str, "1234000000000000000000000000000000000000000000000000000000000000"), 0);
+
+	mclBnFp y;
+	mclBn_setETHserialization(false);
+	n = mclBnFp_deserialize(&y, buf, 48);
+	CYBOZU_TEST_EQUAL(n, 48);
+	n = mclBnFp_getStr(str, sizeof(str), &y, 16);
+	CYBOZU_TEST_ASSERT(n > 0);
+	CYBOZU_TEST_EQUAL(strcmp(str, "3412"), 0);
+
+	mclBn_setETHserialization(true);
+	n = mclBnFp_deserialize(&y, buf, 48);
+	CYBOZU_TEST_EQUAL(n, 48);
+	n = mclBnFp_getStr(str, sizeof(str), &y, 16);
+	CYBOZU_TEST_ASSERT(n > 0);
+	CYBOZU_TEST_EQUAL(strcmp(str, "123400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"), 0);
+
+	mclBn_setETHserialization(keepETH);
+}
+
 #if MCLBN_FP_UNIT_SIZE == 6 && MCLBN_FR_UNIT_SIZE >= 6
 CYBOZU_TEST_AUTO(badG2)
 {

From 2113045aa6139ce20e01142272bad0f06549f6a4 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 25 Oct 2019 11:08:49 +0900
Subject: [PATCH 113/553] add setOriginalG2cofactor

---
 include/mcl/bn.h               | 16 +++++++--
 include/mcl/bn.hpp             | 63 ++++++++++++++++++++++++----------
 include/mcl/curve_type.h       |  6 ++++
 include/mcl/impl/bn_c_impl.hpp |  9 +++--
 test/bls12_test.cpp            | 24 ++++++++++++-
 5 files changed, 95 insertions(+), 23 deletions(-)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 04f88a3a..f4f3383f 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -201,10 +201,22 @@ MCLBN_DLL_API void mclBn_setETHserialization(int enable);
 MCLBN_DLL_API int mclBn_getETHserialization(void);
 
 /*
-	use mapToGi according to
+	use original g2cofactor
+	@param enable [in] 1:enable,  0:disable(default)
+	use faster algorithm for multiplication of G2 with g2cofactor if enable
+	The constant is 0x204d0ec030004ec0600000002fffffffd times original g2cofacotr
+	@see MapTo::mulByCofactorBLS12
+*/
+MCLBN_DLL_API void mclBn_setOriginalG2cofactor(int enable);
+
+/*
+	set map-to-function to mode (defalt:MCL_MAP_TO_MODE_ORIGINAL)
 	https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#modular_squareroot
+	return 0 if success else -1
+	@note call mclBn_setOriginalG2cofactor(true) if MCL_MAP_TO_MODE_ETH2
 */
-MCLBN_DLL_API void mclBn_setETHmaptTo(int enable);
+MCLBN_DLL_API int mclBn_setMapToMode(int mode);
+
 
 ////////////////////////////////////////////////
 /*
diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index b1b2cf95..a472d9d0 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -324,9 +324,16 @@ struct MapTo {
 	Fp c2_; // (-1 + sqrt(-3)) / 2
 	mpz_class z_;
 	mpz_class cofactor_;
+	mpz_class g2cofactor_;
 	int type_;
-	bool useNaiveMapTo_;
-	bool useETHsquareRoot_;
+	int mapToMode_;
+	bool useOriginalG2cofactor_;
+	MapTo()
+		: type_(0)
+		, mapToMode_(MCL_MAP_TO_MODE_ORIGINAL)
+		, useOriginalG2cofactor_(false)
+	{
+	}
 
 	int legendre(bool *pb, const Fp& x) const
 	{
@@ -455,6 +462,10 @@ struct MapTo {
 	*/
 	void mulByCofactorBLS12(G2& Q, const G2& P) const
 	{
+		if (useOriginalG2cofactor_) {
+			G2::mulGeneric(Q, P, g2cofactor_);
+			return;
+		}
 		G2 T0, T1;
 		G2::mulGeneric(T0, P, z_ - 1);
 		G2::mulGeneric(T1, T0, z_);
@@ -491,26 +502,39 @@ struct MapTo {
 		z_ = z;
 		// cofactor for G1
 		cofactor_ = (z - 1) * (z - 1) / 3;
+		const int g2Coff[] = { 13, -4, -4, 6, -4, 0, 5, -4, 1 };
+		g2cofactor_ = local::evalPoly(z, g2Coff) / 9;
 		bool b = Fp::squareRoot(c1_, -3);
 		assert(b);
 		(void)b;
 		c2_ = (c1_ - 1) / 2;
 	}
-	// enable if standard Ec
-	void setNaiveMapTo(bool enable)
+	/*
+		change mapTo function to mode
+	*/
+	bool setMapToMode(int mode)
 	{
 		if (type_ == STD_ECtype) {
-			useNaiveMapTo_ = true;
-		} else {
-			useNaiveMapTo_ = enable;
+			mapToMode_ = MCL_MAP_TO_TRY_AND_INC;
+			return true;
+		}
+		switch (mode) {
+		case MCL_MAP_TO_MODE_ORIGINAL:
+		case MCL_MAP_TO_TRY_AND_INC:
+		case MCL_MAP_TO_MODE_ETH2:
+			mapToMode_ = mode;
+			return true;
+			break;
+		default:
+			return false;
 		}
 	}
-	void setETHsquareRoot(bool enable)
+	void setOriginalG2cofactor(bool enable)
 	{
 		if (type_ == BLS12type) {
-			useETHsquareRoot_ = enable;
+			useOriginalG2cofactor_ = enable;
 		} else {
-			useETHsquareRoot_ = false;
+			useOriginalG2cofactor_ = false;
 		}
 	}
 	/*
@@ -523,8 +547,7 @@ struct MapTo {
 		} else {
 			type_ = STD_ECtype;
 		}
-		setNaiveMapTo(false);
-		setETHsquareRoot(false);
+		setMapToMode(MCL_MAP_TO_MODE_ORIGINAL);
 		if (type_ == BNtype) {
 			initBN(cofactor, z, curveType);
 		} else if (type_ == BLS12type) {
@@ -534,7 +557,7 @@ struct MapTo {
 	template<class G, class F>
 	bool mapToEc(G& P, const F& t) const
 	{
-		if (useNaiveMapTo_) {
+		if (mapToMode_ == MCL_MAP_TO_TRY_AND_INC || mapToMode_ == MCL_MAP_TO_MODE_ETH2) {
 			naiveMapTo<G, F>(P, t);
 		} else {
 			if (!calcBN<G, F>(P, t)) return false;
@@ -574,7 +597,7 @@ struct MapTo {
 	bool calc(G2& P, const Fp2& t) const
 	{
 		if (!mapToEc(P, t)) return false;
-		if (useETHsquareRoot_) {
+		if (mapToMode_ == MCL_MAP_TO_MODE_ETH2) {
 			Fp2 negY;
 			Fp2::neg(negY, P.y);
 			int cmp = Fp::compare(P.y.b, negY.b);
@@ -1070,6 +1093,7 @@ local::Param StaticVar<dummyImpl>::param;
 namespace BN {
 
 static const local::Param& param = local::StaticVar<>::param;
+static local::Param& NonConstParam = local::StaticVar<>::param;
 
 } // mcl::bn::BN
 
@@ -2093,13 +2117,16 @@ inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
 	}
 }
 
-inline void setETHmapTo(bool enable)
+inline void setOriginalG2cofactor(bool enable)
 {
-	local::StaticVar<>::param.mapTo.setNaiveMapTo(enable);
+	BN::NonConstParam.mapTo.setOriginalG2cofactor(enable);
 }
-inline void setETHsquareRoot(bool enable)
+inline bool setMapToMode(int mode)
 {
-	local::StaticVar<>::param.mapTo.setETHsquareRoot(enable);
+	if (mode == MCL_MAP_TO_MODE_ETH2) {
+		setOriginalG2cofactor(true);
+	}
+	return BN::NonConstParam.mapTo.setMapToMode(mode);
 }
 inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calc(P, x); }
 inline void mapToG2(bool *pb, G2& P, const Fp2& x) { *pb = BN::param.mapTo.calc(P, x); }
diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index 42ba6a60..1f551d70 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -35,3 +35,9 @@ enum {
 	MCL_NIST_P384 = MCL_SECP384R1,
 	MCL_NIST_P521 = MCL_SECP521R1
 };
+
+enum {
+	MCL_MAP_TO_MODE_ORIGINAL, // see MapTo::calcBN
+	MCL_MAP_TO_TRY_AND_INC, // try-and-incremental-x
+	MCL_MAP_TO_MODE_ETH2 // eth2.0 spec
+};
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 58f62f37..96fdc87f 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -130,9 +130,14 @@ int mclBn_getETHserialization()
 	return Fp::getETHserialization() ? 1 : 0;
 }
 
-void mclBn_setETHmaptTo(int enable)
+int mclBn_setMapToMode(int mode)
 {
-	setETHmapTo(enable == 1);
+	return setMapToMode(mode) ? 0 : -1;
+}
+
+void mclBn_setOriginalG2cofactor(int enable)
+{
+	setOriginalG2cofactor(enable == 1);
 }
 
 ////////////////////////////////////////////////
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 3a693930..359a1ce3 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -704,9 +704,31 @@ CYBOZU_TEST_AUTO(multi)
 	CYBOZU_BENCH_C("naiveG2", 100, (BN::param.mapTo.naiveMapTo<G2, Fp2>), Q, i++);
 }
 
-CYBOZU_TEST_AUTO(BLS12_G1mulCofactor)
+CYBOZU_TEST_AUTO(eth2)
 {
 	if (BN::param.cp.curveType != MCL_BLS12_381) return;
+	Fp::setETHserialization(true);
+	Fr::setETHserialization(true);
+	setMapToMode(MCL_MAP_TO_MODE_ETH2);
+	Fr sec;
+	sec.setStr("0x47b8192d77bf871b62e87859d653922725724a5c031afeabc60bcef5ff665138");
+	uint8_t msg[] = {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87, 33, 13, 72, 155, 73, 4, 185, 87, 46, 230, 247, 159, 191, 7, 148, 85, 120, 129, 175, 102, 169, 241, 139, 189, 44, 244, 68, 119, 60, 28, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 225, 95, 237, 38, 188, 142, 181, 147, 233, 183, 232, 13, 219, 92, 94, 79, 19, 174, 172, 105, 133, 207, 4, 113, 115, 242, 140, 138, 44, 215, 244, 77
+	};
+	const uint8_t sigStr[] = {
+		6, 239, 41, 231, 36, 30, 26, 28, 198, 15, 238, 50, 142, 50, 144, 192, 35, 213, 90, 103, 1, 219, 80, 14, 239, 171, 127, 145, 57, 26, 139, 135, 38, 253, 0, 36, 18, 30, 100, 99, 114, 129, 249, 7, 19, 127, 226, 104, 24, 123, 75, 172, 163, 99, 136, 233, 97, 148, 183, 58, 125, 83, 47, 110, 234, 107, 192, 152, 119, 141, 191, 211, 64, 69, 132, 97, 59, 91, 169, 218, 151, 213, 96, 46, 49, 253, 190, 146, 112, 184, 99, 135, 101, 41, 178, 84, 18, 210, 104, 251, 230, 10, 193, 72, 64, 52, 41, 52, 81, 12, 106, 12, 31, 250, 171, 222, 116, 82, 153, 227, 157, 225, 55, 196, 22, 100, 207, 162, 163, 65, 163, 112, 14, 234, 31, 243, 107, 2, 227, 249, 10, 187, 131, 10, 3, 211, 176, 25, 9, 1, 154, 245, 167, 74, 192, 135, 28, 44, 85, 238, 179, 95, 250, 20, 39, 137, 56, 40, 196, 66, 91, 125, 231, 240, 32, 204, 95, 9, 56, 38, 62, 180, 158, 95, 1, 58, 2, 126, 173, 200, 94, 46
+	};
+	(void)sigStr;
+	G1 gen;
+	gen.setStr("1 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569", 10);
+	Fp2 m;
+	CYBOZU_TEST_ASSERT(m.deserialize(msg, sizeof(msg)) > 0);
+	G2 Q;
+	mapToG2(Q, m);
+
+	G2 sig = Q * sec;
+	const char *expectSig = "b9d1bf921b3dd048bdce38c2ceac2a2a8093c864881f2415f22b198de935ffa791707855c1656dc21a7af2d502bb46590151d645f062634c3b2cb79c4ed1c4a4b8b3f19f0f5c76965c651553e83d153ff95353735156eff77692f7a62ae653fb";
+	CYBOZU_TEST_EQUAL(sig.getStr(mcl::IoSerializeHexStr), expectSig);
 }
 
 typedef std::vector<Fp> FpVec;

From f6aaf49fea4884bf40a301c0e212ef8379916686 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 25 Oct 2019 15:14:50 +0900
Subject: [PATCH 114/553] [go] add MapToGi

---
 ffi/go/mcl/mcl.go | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index 641d39c9..cf5a8fc1 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -1044,6 +1044,22 @@ func GTPow(out *GT, x *GT, y *Fr) {
 	C.mclBnGT_pow(out.getPointer(), x.getPointer(), y.getPointer())
 }
 
+// MapToG1 --
+func MapToG1(out *G1, x *Fp) error {
+	if C.mclBnFp_mapToG1(out.getPointer(), x.getPointer()) != 0 {
+		return fmt.Errorf("err mclBnFp_mapToG1")
+	}
+	return nil
+}
+
+// MapToG2 --
+func MapToG2(out *G2, x *Fp2) error {
+	if C.mclBnFp2_mapToG2(out.getPointer(), x.getPointer()) != 0 {
+		return fmt.Errorf("err mclBnFp2_mapToG2")
+	}
+	return nil
+}
+
 // Pairing --
 func Pairing(out *GT, x *G1, y *G2) {
 	C.mclBn_pairing(out.getPointer(), x.getPointer(), y.getPointer())

From 506a8c954aed52f3dbb62096ce44f5ae6ef8b104 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 25 Oct 2019 15:27:47 +0900
Subject: [PATCH 115/553] [go] add some configration functions

---
 ffi/go/mcl/mcl.go | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index cf5a8fc1..c9658afb 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -123,6 +123,22 @@ func SetETHserialization(enable bool) {
 	C.mclBn_setETHserialization(bool2Cint(enable))
 }
 
+// SetOriginalG2cofactor -- true if BLS_ETH is defined
+func SetOriginalG2cofactor(enable bool) {
+	// #nosec
+	C.mclBn_setOriginalG2cofactor(bool2Cint(enable))
+}
+
+// SetMapToMode --
+func SetMapToMode(mode int) error {
+	// #nosec
+	err := C.mclBn_setMapToMode((C.int)(mode))
+	if err != 0 {
+		return fmt.Errorf("SetMapToMode mode=%d\n", mode)
+	}
+	return nil
+}
+
 // Fr --
 type Fr struct {
 	v C.mclBnFr
@@ -597,6 +613,7 @@ func (x *G1) Deserialize(buf []byte) error {
 }
 
 const ZERO_HEADER = 1 << 6
+
 func isZeroFormat(buf []byte, n int) bool {
 	if len(buf) < n {
 		return false

From 19b3493db25ddbdff553fb42f52ba8692aca5cd1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 25 Oct 2019 16:51:39 +0900
Subject: [PATCH 116/553] [eth2] add benchmark of mapToG2

---
 test/bls12_test.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 359a1ce3..caa9cc0c 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -729,6 +729,10 @@ CYBOZU_TEST_AUTO(eth2)
 	G2 sig = Q * sec;
 	const char *expectSig = "b9d1bf921b3dd048bdce38c2ceac2a2a8093c864881f2415f22b198de935ffa791707855c1656dc21a7af2d502bb46590151d645f062634c3b2cb79c4ed1c4a4b8b3f19f0f5c76965c651553e83d153ff95353735156eff77692f7a62ae653fb";
 	CYBOZU_TEST_EQUAL(sig.getStr(mcl::IoSerializeHexStr), expectSig);
+
+	CYBOZU_BENCH_C("mapToG2 org-cofactor", 1000, mapToG2, Q, m);
+	setOriginalG2cofactor(false);
+	CYBOZU_BENCH_C("mapToG2 org-cofactor", 1000, mapToG2, Q, m);
 }
 
 typedef std::vector<Fp> FpVec;

From a549f8dbdcf741fdc3fbf125735ca5d875467492 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 25 Oct 2019 17:00:47 +0900
Subject: [PATCH 117/553] v1.01

---
 api.md             | 20 ++++++++++++++++++++
 include/mcl/op.hpp |  2 +-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/api.md b/api.md
index 0a6f000e..3c016a28 100644
--- a/api.md
+++ b/api.md
@@ -1,5 +1,25 @@
 # C API
 
+## New features
+
+Add compatibility mode with [eth2](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md)
+
+```
+void mclBn_setETHserialization(int enable);
+```
+The serialization/deserialization for `Fp`, `Fr`, `G1`, `G2` if `enable = 1`.
+
+```
+int mclBn_setMapToMode(int mode);
+```
+The map-to-G2 function if `mode = MCL_MAP_TO_MODE_ETH2`.
+
+```
+void mclBn_setOriginalG2cofactor(int enable);
+```
+Use faster multiplication of `G2` with cofactor if `enable = 1`.
+This is disabled if `mclBn_setMapToMode(MCL_MAP_TO_MODE_ETH2)`.
+
 ## Minimum sample
 
 [sample/pairing_c.c](sample/pairing_c.c) is a sample of how to use BLS12-381 pairing.
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 224e612d..7ffe404f 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x100; /* 0xABC = A.BC */
+static const int version = 0x101; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From f796b736299fbd6b3ca526e2246f6f4002918763 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 25 Oct 2019 18:33:52 +0900
Subject: [PATCH 118/553] [doc] add comments for mulByCofactorBLS12

---
 api.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/api.md b/api.md
index 3c016a28..a2e285bf 100644
--- a/api.md
+++ b/api.md
@@ -18,7 +18,8 @@ The map-to-G2 function if `mode = MCL_MAP_TO_MODE_ETH2`.
 void mclBn_setOriginalG2cofactor(int enable);
 ```
 Use faster multiplication of `G2` with cofactor if `enable = 1`.
-This is disabled if `mclBn_setMapToMode(MCL_MAP_TO_MODE_ETH2)`.
+This is enabled if `mclBn_setMapToMode(MCL_MAP_TO_MODE_ETH2)`.
+if `enable = 0`, then [the fast algorithm (mulByCofactorBLS12)](https://github.com/herumi/mcl/blob/master/include/mcl/bn.hpp#L463) is used.
 
 ## Minimum sample
 

From 120ac2b1a9eb9ee8087fa42b0c7cfaf224219346 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 25 Oct 2019 18:58:03 +0900
Subject: [PATCH 119/553] fix typo

---
 test/bls12_test.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index caa9cc0c..3b11974a 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -730,9 +730,9 @@ CYBOZU_TEST_AUTO(eth2)
 	const char *expectSig = "b9d1bf921b3dd048bdce38c2ceac2a2a8093c864881f2415f22b198de935ffa791707855c1656dc21a7af2d502bb46590151d645f062634c3b2cb79c4ed1c4a4b8b3f19f0f5c76965c651553e83d153ff95353735156eff77692f7a62ae653fb";
 	CYBOZU_TEST_EQUAL(sig.getStr(mcl::IoSerializeHexStr), expectSig);
 
-	CYBOZU_BENCH_C("mapToG2 org-cofactor", 1000, mapToG2, Q, m);
+	CYBOZU_BENCH_C("mapToG2  org-cofactor", 1000, mapToG2, Q, m);
 	setOriginalG2cofactor(false);
-	CYBOZU_BENCH_C("mapToG2 org-cofactor", 1000, mapToG2, Q, m);
+	CYBOZU_BENCH_C("mapToG2 fast-cofactor", 1000, mapToG2, Q, m);
 }
 
 typedef std::vector<Fp> FpVec;

From 9a1c70a59e736589a24240a7e500a02c9df96dd8 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 26 Oct 2019 17:29:27 +0900
Subject: [PATCH 120/553] faster multiplication of original G2 cofactor

---
 include/mcl/bn.hpp  | 21 ++++++++++++++++-----
 test/bls12_test.cpp | 13 +++++++++++++
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index a472d9d0..f38bc8b7 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -325,6 +325,7 @@ struct MapTo {
 	mpz_class z_;
 	mpz_class cofactor_;
 	mpz_class g2cofactor_;
+	Fr g2cofactorAdj_;
 	int type_;
 	int mapToMode_;
 	bool useOriginalG2cofactor_;
@@ -459,13 +460,10 @@ struct MapTo {
 		Efficient hash maps to G2 on BLS curves
 		Alessandro Budroni, Federico Pintore
 		Q = (z(z-1)-1)P + Frob((z-1)P) + Frob^2(2P)
+		original G2 cofactor = this cofactor * g2cofactorAdj_
 	*/
-	void mulByCofactorBLS12(G2& Q, const G2& P) const
+	void mulByCofactorBLS12fast(G2& Q, const G2& P) const
 	{
-		if (useOriginalG2cofactor_) {
-			G2::mulGeneric(Q, P, g2cofactor_);
-			return;
-		}
 		G2 T0, T1;
 		G2::mulGeneric(T0, P, z_ - 1);
 		G2::mulGeneric(T1, T0, z_);
@@ -476,6 +474,14 @@ struct MapTo {
 		Frobenius2(T1, T1);
 		G2::add(Q, T0, T1);
 	}
+	void mulByCofactorBLS12(G2& Q, const G2& P) const
+	{
+		mulByCofactorBLS12fast(Q, P);
+		if (useOriginalG2cofactor_) {
+			Q *= g2cofactorAdj_;
+			return;
+		}
+	}
 	/*
 		cofactor_ is for G2(not used now)
 	*/
@@ -508,6 +514,11 @@ struct MapTo {
 		assert(b);
 		(void)b;
 		c2_ = (c1_ - 1) / 2;
+		mpz_class t = (z * z - 1) * 3;;
+		g2cofactorAdj_.setMpz(&b, t);
+		assert(b);
+		(void)b;
+		Fr::inv(g2cofactorAdj_, g2cofactorAdj_);
 	}
 	/*
 		change mapTo function to mode
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 3b11974a..e45fcbdd 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -733,6 +733,19 @@ CYBOZU_TEST_AUTO(eth2)
 	CYBOZU_BENCH_C("mapToG2  org-cofactor", 1000, mapToG2, Q, m);
 	setOriginalG2cofactor(false);
 	CYBOZU_BENCH_C("mapToG2 fast-cofactor", 1000, mapToG2, Q, m);
+
+	Fp2 x;
+	x.a = 5;
+	x.b = 3;
+	const mpz_class& g2c = BN::param.mapTo.g2cofactor_;
+	const Fr& g2ca = BN::param.mapTo.g2cofactorAdj_;
+	G2 Q1, Q2, Q3;
+	BN::param.mapTo.mapToEc(Q, x);
+	G2::mulGeneric(Q1, Q, g2c);
+	Q2 = Q;
+	BN::param.mapTo.mulByCofactor(Q2);
+	Q2 *= g2ca;
+	CYBOZU_TEST_EQUAL(Q1, Q2);
 }
 
 typedef std::vector<Fp> FpVec;

From df7a1b5ff2052f2e3aafb2d1a9243733c05da21b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 26 Oct 2019 21:45:59 +0900
Subject: [PATCH 121/553] get g2 cofactor adj function

---
 include/mcl/bn.hpp  | 27 +++++++++++++++++++--------
 test/bls12_test.cpp |  2 +-
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index f38bc8b7..f1188c8e 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -326,6 +326,7 @@ struct MapTo {
 	mpz_class cofactor_;
 	mpz_class g2cofactor_;
 	Fr g2cofactorAdj_;
+	Fr g2cofactorAdjInv_;
 	int type_;
 	int mapToMode_;
 	bool useOriginalG2cofactor_;
@@ -515,10 +516,10 @@ struct MapTo {
 		(void)b;
 		c2_ = (c1_ - 1) / 2;
 		mpz_class t = (z * z - 1) * 3;;
-		g2cofactorAdj_.setMpz(&b, t);
+		g2cofactorAdjInv_.setMpz(&b, t);
 		assert(b);
 		(void)b;
-		Fr::inv(g2cofactorAdj_, g2cofactorAdj_);
+		Fr::inv(g2cofactorAdj_, g2cofactorAdjInv_);
 	}
 	/*
 		change mapTo function to mode
@@ -1104,7 +1105,7 @@ local::Param StaticVar<dummyImpl>::param;
 namespace BN {
 
 static const local::Param& param = local::StaticVar<>::param;
-static local::Param& NonConstParam = local::StaticVar<>::param;
+static local::Param& nonConstParam = local::StaticVar<>::param;
 
 } // mcl::bn::BN
 
@@ -2130,14 +2131,14 @@ inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
 
 inline void setOriginalG2cofactor(bool enable)
 {
-	BN::NonConstParam.mapTo.setOriginalG2cofactor(enable);
+	BN::nonConstParam.mapTo.setOriginalG2cofactor(enable);
 }
 inline bool setMapToMode(int mode)
 {
 	if (mode == MCL_MAP_TO_MODE_ETH2) {
 		setOriginalG2cofactor(true);
 	}
-	return BN::NonConstParam.mapTo.setMapToMode(mode);
+	return BN::nonConstParam.mapTo.setMapToMode(mode);
 }
 inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calc(P, x); }
 inline void mapToG2(bool *pb, G2& P, const Fp2& x) { *pb = BN::param.mapTo.calc(P, x); }
@@ -2243,7 +2244,7 @@ using namespace mcl::bn; // backward compatibility
 
 inline void init(bool *pb, const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode = fp::FP_AUTO)
 {
-	local::StaticVar<>::param.init(pb, cp, mode);
+	BN::nonConstParam.init(pb, cp, mode);
 	if (!*pb) return;
 	G1::setMulArrayGLV(local::GLV1::mulArrayGLV, local::GLV1::mulVecNGLV);
 	G2::setMulArrayGLV(local::mulArrayGLV2, local::mulVecNGLV2);
@@ -2280,7 +2281,7 @@ inline void initPairing(const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode =
 
 inline void initG1only(bool *pb, const mcl::EcParam& para)
 {
-	local::StaticVar<>::param.initG1only(pb, para);
+	BN::nonConstParam.initG1only(pb, para);
 	if (!*pb) return;
 	G1::setMulArrayGLV(0);
 	G2::setMulArrayGLV(0);
@@ -2291,7 +2292,17 @@ inline void initG1only(bool *pb, const mcl::EcParam& para)
 
 inline const G1& getG1basePoint()
 {
-	return local::StaticVar<>::param.basePoint;
+	return BN::param.basePoint;
+}
+
+inline const Fr& getG2cofactorAdj()
+{
+	return BN::param.mapTo.g2cofactorAdj_;
+}
+
+inline const Fr& getG2cofactorAdjInv()
+{
+	return BN::param.mapTo.g2cofactorAdjInv_;
 }
 
 } } // mcl::bn
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index e45fcbdd..3f056afa 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -738,7 +738,7 @@ CYBOZU_TEST_AUTO(eth2)
 	x.a = 5;
 	x.b = 3;
 	const mpz_class& g2c = BN::param.mapTo.g2cofactor_;
-	const Fr& g2ca = BN::param.mapTo.g2cofactorAdj_;
+	const Fr& g2ca = getG2cofactorAdj();
 	G2 Q1, Q2, Q3;
 	BN::param.mapTo.mapToEc(Q, x);
 	G2::mulGeneric(Q1, Q, g2c);

From d3aaf45e0fc9f38191b61609e71df614b2dcfb9a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 26 Oct 2019 22:11:23 +0900
Subject: [PATCH 122/553] add option to select algorithm for G2::mulByCofactor

---
 include/mcl/bn.hpp  | 18 +++++++++---------
 test/bls12_test.cpp |  7 +++----
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index f1188c8e..74eace9f 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -475,10 +475,10 @@ struct MapTo {
 		Frobenius2(T1, T1);
 		G2::add(Q, T0, T1);
 	}
-	void mulByCofactorBLS12(G2& Q, const G2& P) const
+	void mulByCofactorBLS12(G2& Q, const G2& P, bool fast = false) const
 	{
 		mulByCofactorBLS12fast(Q, P);
-		if (useOriginalG2cofactor_) {
+		if (useOriginalG2cofactor_ && !fast) {
 			Q *= g2cofactorAdj_;
 			return;
 		}
@@ -588,14 +588,14 @@ struct MapTo {
 		}
 		assert(P.isValid());
 	}
-	void mulByCofactor(G2& P) const
+	void mulByCofactor(G2& P, bool fast = false) const
 	{
 		switch(type_) {
 		case BNtype:
 			mulByCofactorBN(P, P);
 			break;
 		case BLS12type:
-			mulByCofactorBLS12(P, P);
+			mulByCofactorBLS12(P, P, fast);
 			break;
 		}
 		assert(P.isValid());
@@ -606,7 +606,7 @@ struct MapTo {
 		mulByCofactor(P);
 		return true;
 	}
-	bool calc(G2& P, const Fp2& t) const
+	bool calc(G2& P, const Fp2& t, bool fast = false) const
 	{
 		if (!mapToEc(P, t)) return false;
 		if (mapToMode_ == MCL_MAP_TO_MODE_ETH2) {
@@ -617,7 +617,7 @@ struct MapTo {
 				P.y = negY;
 			}
 		}
-		mulByCofactor(P);
+		mulByCofactor(P, fast);
 		return true;
 	}
 };
@@ -2141,7 +2141,7 @@ inline bool setMapToMode(int mode)
 	return BN::nonConstParam.mapTo.setMapToMode(mode);
 }
 inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calc(P, x); }
-inline void mapToG2(bool *pb, G2& P, const Fp2& x) { *pb = BN::param.mapTo.calc(P, x); }
+inline void mapToG2(bool *pb, G2& P, const Fp2& x, bool fast = false) { *pb = BN::param.mapTo.calc(P, x, fast); }
 #ifndef CYBOZU_DONT_USE_EXCEPTION
 inline void mapToG1(G1& P, const Fp& x)
 {
@@ -2149,10 +2149,10 @@ inline void mapToG1(G1& P, const Fp& x)
 	mapToG1(&b, P, x);
 	if (!b) throw cybozu::Exception("mapToG1:bad value") << x;
 }
-inline void mapToG2(G2& P, const Fp2& x)
+inline void mapToG2(G2& P, const Fp2& x, bool fast = false)
 {
 	bool b;
-	mapToG2(&b, P, x);
+	mapToG2(&b, P, x, fast);
 	if (!b) throw cybozu::Exception("mapToG2:bad value") << x;
 }
 #endif
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 3f056afa..c967006a 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -730,9 +730,8 @@ CYBOZU_TEST_AUTO(eth2)
 	const char *expectSig = "b9d1bf921b3dd048bdce38c2ceac2a2a8093c864881f2415f22b198de935ffa791707855c1656dc21a7af2d502bb46590151d645f062634c3b2cb79c4ed1c4a4b8b3f19f0f5c76965c651553e83d153ff95353735156eff77692f7a62ae653fb";
 	CYBOZU_TEST_EQUAL(sig.getStr(mcl::IoSerializeHexStr), expectSig);
 
-	CYBOZU_BENCH_C("mapToG2  org-cofactor", 1000, mapToG2, Q, m);
-	setOriginalG2cofactor(false);
-	CYBOZU_BENCH_C("mapToG2 fast-cofactor", 1000, mapToG2, Q, m);
+	CYBOZU_BENCH_C("mapToG2  org-cofactor", 1000, mapToG2, Q, m, false);
+	CYBOZU_BENCH_C("mapToG2 fast-cofactor", 1000, mapToG2, Q, m, true);
 
 	Fp2 x;
 	x.a = 5;
@@ -743,7 +742,7 @@ CYBOZU_TEST_AUTO(eth2)
 	BN::param.mapTo.mapToEc(Q, x);
 	G2::mulGeneric(Q1, Q, g2c);
 	Q2 = Q;
-	BN::param.mapTo.mulByCofactor(Q2);
+	BN::param.mapTo.mulByCofactor(Q2, true);
 	Q2 *= g2ca;
 	CYBOZU_TEST_EQUAL(Q1, Q2);
 }

From 0fe07b047c80127dfb6d9cd01e55ae036e449c96 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 30 Oct 2019 11:07:14 +0900
Subject: [PATCH 123/553] fix vint_test with MCL_USE_GMP=0

---
 test/fp_util_test.cpp | 4 ++--
 test/vint_test.cpp    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/fp_util_test.cpp b/test/fp_util_test.cpp
index 45b1573a..ba221bce 100644
--- a/test/fp_util_test.cpp
+++ b/test/fp_util_test.cpp
@@ -289,7 +289,7 @@ CYBOZU_TEST_AUTO(BitIterator)
 			while (bi.hasNext()) {
 				uint32_t v1 = bi.getNext(w);
 				mpz_class v2 = x & bi.mask(w);
-				CYBOZU_TEST_EQUAL(v1, v2);
+				CYBOZU_TEST_EQUAL(v2, v1);
 				x >>= w;
 			}
 			CYBOZU_TEST_EQUAL(x, 0);
@@ -302,7 +302,7 @@ CYBOZU_TEST_AUTO(BitIterator)
 			while (bi.hasNext()) {
 				uint32_t v1 = bi.peekBit();
 				mpz_class v2 = x & 1;
-				CYBOZU_TEST_EQUAL(v1, v2);
+				CYBOZU_TEST_EQUAL(v2, v1);
 				x >>= 1;
 				bi.skipBit();
 			}
diff --git a/test/vint_test.cpp b/test/vint_test.cpp
index a2d42197..ab378148 100644
--- a/test/vint_test.cpp
+++ b/test/vint_test.cpp
@@ -7,7 +7,7 @@
 #include <cybozu/benchmark.hpp>
 #include <cybozu/test.hpp>
 #include <cybozu/xorshift.hpp>
-#ifndef DONT_USE_GMP_IN_TEST
+#ifndef MCL_USE_VINT
 #include <gmpxx.h>
 #endif
 
@@ -1259,7 +1259,7 @@ CYBOZU_TEST_AUTO(bench)
 		x.setStr(tbl[i].x);
 		y.setStr(tbl[i].y);
 		CYBOZU_BENCH_C("fast div", N, Vint::div, z, x, y);
-#ifndef DONT_USE_GMP_IN_TEST
+#ifndef MCL_USE_VINT
 		{
 			mpz_class mx(tbl[i].x), my(tbl[i].y), mz;
 			CYBOZU_BENCH_C("gmp", N, mpz_div, mz.get_mpz_t(), mx.get_mpz_t(), my.get_mpz_t());

From bfd0459d2feeaf00fd4df3698f21f75ac8e7cc7f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 30 Oct 2019 13:37:32 +0900
Subject: [PATCH 124/553] disable GMP on macOS Catalina

---
 common.mk | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/common.mk b/common.mk
index 8bbc0325..68b6ba0e 100644
--- a/common.mk
+++ b/common.mk
@@ -95,6 +95,12 @@ CFLAGS+=$(CFLAGS_OPT_USER)
 endif
 CFLAGS+=$(CFLAGS_USER)
 MCL_USE_GMP?=1
+ifeq ($(OS),mac)
+  ifeq ($(shell sw_vers -productVersion),10.15)
+    # workaround because of GMP does not run well on Catalina
+    MCL_USE_GMP=0
+  endif
+endif
 MCL_USE_OPENSSL?=1
 ifeq ($(MCL_USE_GMP),0)
   CFLAGS+=-DMCL_USE_VINT

From 54ed316fe1499df9e59df393882ef7873fe5ed90 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 2 Nov 2019 21:27:21 +0900
Subject: [PATCH 125/553] rename TRY_AND_INC

---
 include/mcl/bn.hpp       | 6 +++---
 include/mcl/curve_type.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 74eace9f..c74840c3 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -527,12 +527,12 @@ struct MapTo {
 	bool setMapToMode(int mode)
 	{
 		if (type_ == STD_ECtype) {
-			mapToMode_ = MCL_MAP_TO_TRY_AND_INC;
+			mapToMode_ = MCL_MAP_TO_MODE_TRY_AND_INC;
 			return true;
 		}
 		switch (mode) {
 		case MCL_MAP_TO_MODE_ORIGINAL:
-		case MCL_MAP_TO_TRY_AND_INC:
+		case MCL_MAP_TO_MODE_TRY_AND_INC:
 		case MCL_MAP_TO_MODE_ETH2:
 			mapToMode_ = mode;
 			return true;
@@ -569,7 +569,7 @@ struct MapTo {
 	template<class G, class F>
 	bool mapToEc(G& P, const F& t) const
 	{
-		if (mapToMode_ == MCL_MAP_TO_TRY_AND_INC || mapToMode_ == MCL_MAP_TO_MODE_ETH2) {
+		if (mapToMode_ == MCL_MAP_TO_MODE_TRY_AND_INC || mapToMode_ == MCL_MAP_TO_MODE_ETH2) {
 			naiveMapTo<G, F>(P, t);
 		} else {
 			if (!calcBN<G, F>(P, t)) return false;
diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index 1f551d70..dca749d2 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -38,6 +38,6 @@ enum {
 
 enum {
 	MCL_MAP_TO_MODE_ORIGINAL, // see MapTo::calcBN
-	MCL_MAP_TO_TRY_AND_INC, // try-and-incremental-x
+	MCL_MAP_TO_MODE_TRY_AND_INC, // try-and-incremental-x
 	MCL_MAP_TO_MODE_ETH2 // eth2.0 spec
 };

From e27af0393c8fde7a5e0274ef65df3917b10f92d6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 11 Nov 2019 10:53:57 +0900
Subject: [PATCH 126/553] disable OpenSSL(default)

---
 CMakeLists.txt |  2 +-
 common.mk      |  2 +-
 readme.md      | 15 ++++-----------
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bc91a817..c4481d65 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ option(
 option(
 	USE_OPENSSL
 	"use openssl"
-	ON
+	OFF
 )
 option(
 	USE_GMP
diff --git a/common.mk b/common.mk
index 68b6ba0e..9ea2c0d6 100644
--- a/common.mk
+++ b/common.mk
@@ -101,7 +101,7 @@ ifeq ($(OS),mac)
     MCL_USE_GMP=0
   endif
 endif
-MCL_USE_OPENSSL?=1
+MCL_USE_OPENSSL?=0
 ifeq ($(MCL_USE_GMP),0)
   CFLAGS+=-DMCL_USE_VINT
 endif
diff --git a/readme.md b/readme.md
index 3b437723..60539b53 100644
--- a/readme.md
+++ b/readme.md
@@ -37,10 +37,10 @@ x86-64/ARM/ARM64 Linux, macOS and mingw64 are supported.
 
 ## Installation Requirements
 
-[GMP](https://gmplib.org/) and [OpenSSL](https://www.openssl.org/) are necessary (default setting).
+[GMP](https://gmplib.org/) is necessary (default setting).
 
 ```
-apt install libgmp-dev libssl-dev # on Ubuntu
+apt install libgmp-dev # on Ubuntu
 ```
 
 ## How to build with Makefile
@@ -72,16 +72,10 @@ make MCL_USE_GMP=0
 ```
 Define `MCL_USE_VINT` if using C++ header files.
 
-## How to build without OpenSSL
-
-```
-make MCL_USE_OPENSSL=0
-```
-Define `MCL_DONT_USE_OPENSSL` if using C++ header files.
 
 ## How to build on 32-bit x86 Linux
 
-Build GMP and OpenSSL for 32-bit mode and install `<lib32>` at yourself.
+Build GMP and for 32-bit mode and install `<lib32>` at yourself.
 
 ```
 make ARCH=x86 CFLAGS_USER="-I <lib32>/include" LDFLAGS_USER="-L <lib32>/lib -Wl,-rpath,<lib32>/lib"
@@ -90,7 +84,7 @@ make ARCH=x86 CFLAGS_USER="-I <lib32>/include" LDFLAGS_USER="-L <lib32>/lib -Wl,
 # How to build on 64-bit Windows with Visual Studio
 
 Clone cybozulib\_ext,
-which provides compiled binaries of OpenSSL and [MPIR](http://mpir.org/).
+which provides compiled binaries of [MPIR](http://mpir.org/).
 
 ```
 mkdir work
@@ -133,7 +127,6 @@ msbuild mcl.sln /p:Configuration=Release /m
 
 ```
 cmake .. USE_GMP=OFF ; without GMP
-cmake .. USE_OPENSSL=OFF ; without OpenSSL
 ```
 see `cmake .. -LA`.
 

From 0dfad750693baa7fc89c06c4278972ac11aecf72 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 11 Nov 2019 10:54:08 +0900
Subject: [PATCH 127/553] v1.02

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 7ffe404f..5cf83be9 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x101; /* 0xABC = A.BC */
+static const int version = 0x102; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From b399592446caae925cb775456ac4952da7de3872 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 15 Nov 2019 11:56:02 +0900
Subject: [PATCH 128/553] refactor according to issues/69

---
 src/fp.cpp | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index 08e73882..7eb1e03a 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -387,25 +387,24 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 
 #if defined(MCL_USE_LLVM) || defined(MCL_USE_XBYAK)
 	if (mode == FP_AUTO || mode == FP_LLVM || mode == FP_XBYAK) {
-		const char *pStr = "0xfffffffffffffffffffffffffffffffeffffffffffffffff";
-		bool b;
-		mpz_class p192;
-		gmp::setStr(&b, p192, pStr);
-		if (b && mp == p192) {
-			primeMode = PM_NIST_P192;
-			isMont = false;
-			isFastMod = true;
-		}
-	}
-	if (mode == FP_AUTO || mode == FP_LLVM || mode == FP_XBYAK) {
-		const char *pStr = "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff";
-		bool b;
-		mpz_class p521;
-		gmp::setStr(&b, p521, pStr);
-		if (b && mp == p521) {
-			primeMode = PM_NIST_P521;
-			isMont = false;
-			isFastMod = true;
+		const struct {
+			PrimeMode mode;
+			const char *str;
+		} tbl[] = {
+			{ PM_NIST_P192, "0xfffffffffffffffffffffffffffffffeffffffffffffffff" },
+			{ PM_NIST_P521, "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" },
+		};
+		// user fastMode for special primes
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+			bool b;
+			mpz_class target;
+			gmp::setStr(&b, target, tbl[i].str);
+			if (b && mp == target) {
+				primeMode = tbl[i].mode;
+				isMont = false;
+				isFastMod = true;
+				break;
+			}
 		}
 	}
 #endif

From d26bee4e33794243df1975442e58b4db6363f9fd Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 18 Nov 2019 10:02:39 +0900
Subject: [PATCH 129/553] fix typo

---
 src/fp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index 7eb1e03a..765a6d75 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -394,7 +394,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 			{ PM_NIST_P192, "0xfffffffffffffffffffffffffffffffeffffffffffffffff" },
 			{ PM_NIST_P521, "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" },
 		};
-		// user fastMode for special primes
+		// use fastMode for special primes
 		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 			bool b;
 			mpz_class target;

From fbe7d2e38d93f68989fe8cfb406cced8e2a22f57 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 3 Dec 2019 16:30:57 +0900
Subject: [PATCH 130/553] enable llvm-bmi2 without xbyak

---
 src/detect_cpu.hpp | 95 ++++++++++++++++++++++++++++++++++++++++++++++
 src/fp.cpp         | 15 ++++++--
 2 files changed, 106 insertions(+), 4 deletions(-)
 create mode 100644 src/detect_cpu.hpp

diff --git a/src/detect_cpu.hpp b/src/detect_cpu.hpp
new file mode 100644
index 00000000..9d63c0e8
--- /dev/null
+++ b/src/detect_cpu.hpp
@@ -0,0 +1,95 @@
+#pragma once
+/**
+	@file
+	@brief detect Intel CPU features
+	@author MITSUNARI Shigeo(@herumi)
+	@license modified new BSD license
+	http://opensource.org/licenses/BSD-3-Clause
+	This code is extracted from xbyak_util.h for compiling without xbyak
+*/
+
+#include <stdint.h>
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
+	#define MCL_INTEL_CPU_SPECIFIC
+#endif
+
+#ifdef MCL_INTEL_CPU_SPECIFIC
+#ifdef _MSC_VER
+	#include <intrin.h> // for __cpuid
+#else
+	#ifndef __GNUC_PREREQ
+		#define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor)))
+	#endif
+	#if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
+		#include <cpuid.h>
+	#else
+		#ifndef __cpuid
+			#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
+			#define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
+		#endif
+	#endif
+#endif
+#endif
+
+namespace mcl { namespace util {
+
+/**
+	CPU detection class
+*/
+class Cpu {
+	uint64_t type_;
+public:
+	/*
+		data[] = { eax, ebx, ecx, edx }
+	*/
+	static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
+	{
+#ifdef MCL_INTEL_CPU_SPECIFIC
+	#ifdef _MSC_VER
+		__cpuid(reinterpret_cast<int*>(data), eaxIn);
+	#else
+		__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
+	#endif
+#else
+		(void)eaxIn;
+		(void)data;
+#endif
+	}
+	static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
+	{
+#ifdef MCL_INTEL_CPU_SPECIFIC
+	#ifdef _MSC_VER
+		__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
+	#else
+		__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
+	#endif
+#else
+		(void)eaxIn;
+		(void)ecxIn;
+		(void)data;
+#endif
+	}
+	typedef uint64_t Type;
+	static const Type NONE = 0;
+	static const Type tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
+	Cpu()
+		: type_(NONE)
+	{
+		unsigned int data[4] = {};
+		const unsigned int& EAX = data[0];
+		const unsigned int& EBX = data[1];
+		getCpuid(0, data);
+		const unsigned int maxNum = EAX;
+		if (maxNum >= 7) {
+			getCpuidEx(7, 0, data);
+			if (EBX & (1U << 8)) type_ |= tBMI2;
+		}
+	}
+	bool has(Type type) const
+	{
+		return (type & type_) != 0;
+	}
+};
+
+} } // mcl::util
+
diff --git a/src/fp.cpp b/src/fp.cpp
index 765a6d75..07dfb78b 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -5,6 +5,8 @@
 #include <mcl/conversion.hpp>
 #ifdef MCL_USE_XBYAK
 #include "fp_generator.hpp"
+#else
+#include "detect_cpu.hpp"
 #endif
 #include "low_func.hpp"
 #ifdef MCL_USE_LLVM
@@ -253,13 +255,18 @@ void setOp(Op& op, Mode mode)
 	if (mode != fp::FP_GMP && mode != fp::FP_GMP_MONT) {
 #if MCL_LLVM_BMI2 == 1
 		const bool gmpIsFasterThanLLVM = false;//(N == 8 && MCL_SIZEOF_UNIT == 8);
-		Xbyak::util::Cpu cpu;
-		if (cpu.has(Xbyak::util::Cpu::tBMI2)) {
-			setOp2<N, LBMI2tag, (N * UnitBitSize <= 256), gmpIsFasterThanLLVM>(op);
+#ifdef MCL_USE_XBYAK
+		using namespace Xbyak;
+#else
+		using namespace mcl;
+#endif
+		util::Cpu cpu;
+		if (cpu.has(util::Cpu::tBMI2)) {
+			setOp2<N, LBMI2tag, (N * UnitBitSize <= 384), gmpIsFasterThanLLVM>(op);
 		} else
 #endif
 		{
-			setOp2<N, Ltag, (N * UnitBitSize <= 256), false>(op);
+			setOp2<N, Ltag, (N * UnitBitSize <= 384), false>(op);
 		}
 	}
 #else

From 0eb04f7e8a0e60feb6754b98f0423684166993cb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 5 Dec 2019 22:19:17 +0900
Subject: [PATCH 131/553] default disable to verifyOrder

---
 api.md              |  4 ++--
 include/mcl/bn.hpp  |  5 -----
 include/mcl/ec.hpp  |  3 ++-
 readme.md           |  1 +
 test/bench.hpp      |  4 ++--
 test/bls12_test.cpp | 25 +++++++++++++++++++++++++
 6 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/api.md b/api.md
index a2e285bf..eee225d7 100644
--- a/api.md
+++ b/api.md
@@ -133,8 +133,8 @@ This function affects `setStr()` and `deserialize()` for G1/G2.
 void mclBn_verifyOrderG1(int doVerify);
 void mclBn_verifyOrderG2(int doVerify);
 ```
-- verify if `doVerify` is 1 or does not. The default parameter is 1.
-- The cost of verification is not small, so set `doVerify = 0` carefully if necessary.
+- verify if `doVerify` is 1 or does not. The default parameter is 0 because the cost of verification is not small.
+- Set `doVerify = 1` if considering subgroup attack is necessary.
 - This is not thread safe.
 
 ## Setter / Getter
diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index c74840c3..b56f6ba9 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1030,11 +1030,7 @@ struct Param {
 			twist_b_type = tb_generic;
 		}
 		G1::init(0, cp.b, mcl::ec::Jacobi);
-		if (isBLS12) {
-			G1::setOrder(r);
-		}
 		G2::init(0, twist_b, mcl::ec::Jacobi);
-		G2::setOrder(r);
 
 		const mpz_class largest_c = isBLS12 ? abs_z : gmp::abs(z * 6 + 2);
 		useNAF = gmp::getNAF(siTbl, largest_c);
@@ -1074,7 +1070,6 @@ struct Param {
 		if (!*pb) return;
 		G1::init(pb, para.a, para.b);
 		if (!*pb) return;
-		G1::setOrder(Fr::getOp().mp);
 		mapTo.init(0, 0, para.curveType);
 		Fp x0, y0;
 		x0.setStr(pb, para.gx);
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 195394db..8cf3a49e 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -880,7 +880,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 				if (ec::local::get_a_flag(y) ^ a) {
 					Fp::neg(y, y);
 				}
-				return;
+				goto verifyOrder;
 			}
 			if (fp::isZeroArray(buf, n1)) {
 				clear();
@@ -935,6 +935,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 				return;
 			}
 		}
+	verifyOrder:
 		if (verifyOrder_ && !isValidOrder()) {
 			*pb = false;
 		} else {
diff --git a/readme.md b/readme.md
index 60539b53..5f6a06b9 100644
--- a/readme.md
+++ b/readme.md
@@ -286,6 +286,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2019/Dec/05 v1.03 disable to check the order in setStr
 - 2019/Sep/30 v1.00 add some functions to bn.h ; [api.md](api.md).
 - 2019/Sep/22 v0.99 add mclBnG1_mulVec, etc.
 - 2019/Sep/08 v0.98 bugfix Ec::add(P, Q, R) when P == R
diff --git a/test/bench.hpp b/test/bench.hpp
index cf2a7281..c8c3911b 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -82,15 +82,15 @@ void testBench(const G1& P, const G2& Q)
 	G2 QQ;
 	std::string s;
 	s = P.getStr();
+	verifyOrderG1(true);
 	CYBOZU_BENCH_C("G1::setStr chk", C, PP.setStr, s);
 	verifyOrderG1(false);
 	CYBOZU_BENCH_C("G1::setStr    ", C, PP.setStr, s);
-	verifyOrderG1(true);
 	s = Q.getStr();
+	verifyOrderG2(true);
 	CYBOZU_BENCH_C("G2::setStr chk", C, QQ.setStr, s);
 	verifyOrderG2(false);
 	CYBOZU_BENCH_C("G2::setStr    ", C, QQ.setStr, s);
-	verifyOrderG2(true);
 	CYBOZU_BENCH_C("hashAndMapToG1", C, hashAndMapToG1, PP, "abc", 3);
 	CYBOZU_BENCH_C("hashAndMapToG2", C, hashAndMapToG2, QQ, "abc", 3);
 #endif
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index c967006a..fe0ca7a7 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -747,6 +747,31 @@ CYBOZU_TEST_AUTO(eth2)
 	CYBOZU_TEST_EQUAL(Q1, Q2);
 }
 
+CYBOZU_TEST_AUTO(deserialize)
+{
+	if (BN::param.cp.curveType != MCL_BLS12_381) return;
+	G1 P;
+	G2 Q;
+	mapToG1(P, 5);
+	mapToG2(Q, 5);
+	char buf1[128];
+	char buf2[128];
+	size_t n1 = P.serialize(buf1, sizeof(buf1));
+	CYBOZU_TEST_ASSERT(n1 > 0);
+	CYBOZU_TEST_EQUAL(P.deserialize(buf1, n1), n1);
+	size_t n2 = Q.serialize(buf2, sizeof(buf2));
+	CYBOZU_TEST_ASSERT(n2 > 0);
+	CYBOZU_TEST_EQUAL(Q.deserialize(buf2, n2), n2);
+	for (int i = 0; i < 2; i++) {
+		bool doVerify = i == 0;
+		printf("verifyOrder(%d)\n", doVerify);
+		verifyOrderG1(doVerify);
+		verifyOrderG2(doVerify);
+		CYBOZU_BENCH_C("deserializeG1", 1000, P.deserialize, buf1, n1);
+		CYBOZU_BENCH_C("deserializeG2", 1000, Q.deserialize, buf2, n2);
+	}
+}
+
 typedef std::vector<Fp> FpVec;
 
 void f(FpVec& zv, const FpVec& xv, const FpVec& yv)

From 6ff5cfa9fc0a7e1ae5e019d7102840208664e017 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 7 Dec 2019 17:12:50 +0900
Subject: [PATCH 132/553] v1.03

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 5cf83be9..a5506089 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x102; /* 0xABC = A.BC */
+static const int version = 0x103; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From 844d9bb439bddcf35d13860009088ca91a787dc9 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 7 Dec 2019 21:01:21 +0900
Subject: [PATCH 133/553] update xbyak 5.85

---
 src/xbyak/xbyak.h          | 82 +++++++++++++++++++++++++++++++++-----
 src/xbyak/xbyak_mnemonic.h | 65 +++++++++++++++++++++++++++++-
 src/xbyak/xbyak_util.h     |  7 +---
 3 files changed, 137 insertions(+), 17 deletions(-)

diff --git a/src/xbyak/xbyak.h b/src/xbyak/xbyak.h
index 64b4ee3c..939ffee0 100644
--- a/src/xbyak/xbyak.h
+++ b/src/xbyak/xbyak.h
@@ -9,6 +9,9 @@
 	@note modified new BSD license
 	http://opensource.org/licenses/BSD-3-Clause
 */
+#if !defined(XBYAK_USE_OP_NAMES) && !defined(XBYAK_NO_OP_NAMES)
+	#define XBYAK_NO_OP_NAMES
+#endif
 #ifndef XBYAK_NO_OP_NAMES
 	#if not +0 // trick to detect whether 'not' is operator or not
 		#error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()."
@@ -80,6 +83,10 @@
 	#include <sys/mman.h>
 	#include <stdlib.h>
 #endif
+#if defined(__APPLE__) && defined(MAP_JIT)
+	#define XBYAK_USE_MAP_JIT
+	#include <sys/sysctl.h>
+#endif
 #if !defined(_MSC_VER) || (_MSC_VER >= 1600)
 	#include <stdint.h>
 #endif
@@ -113,7 +120,7 @@ namespace Xbyak {
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x5802 /* 0xABCD = A.BC(D) */
+	VERSION = 0x5850 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
@@ -186,8 +193,8 @@ enum {
 	ERR_INVALID_ZERO,
 	ERR_INVALID_RIP_IN_AUTO_GROW,
 	ERR_INVALID_MIB_ADDRESS,
-	ERR_INTERNAL,
-	ERR_X2APIC_IS_NOT_SUPPORTED
+	ERR_X2APIC_IS_NOT_SUPPORTED,
+	ERR_INTERNAL // Put it at last.
 };
 
 class Error : public std::exception {
@@ -196,8 +203,7 @@ class Error : public std::exception {
 	explicit Error(int err) : err_(err)
 	{
 		if (err_ < 0 || err_ > ERR_INTERNAL) {
-			fprintf(stderr, "bad err=%d in Xbyak::Error\n", err_);
-			exit(1);
+			err_ = ERR_INTERNAL;
 		}
 	}
 	operator int() const { return err_; }
@@ -248,10 +254,11 @@ class Error : public std::exception {
 			"invalid zero",
 			"invalid rip in AutoGrow",
 			"invalid mib address",
-			"internal error",
-			"x2APIC is not supported"
+			"x2APIC is not supported",
+			"internal error"
 		};
-		assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
+		assert(err_ <= ERR_INTERNAL);
+		assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
 		return errTbl[err_];
 	}
 };
@@ -325,6 +332,29 @@ struct Allocator {
 };
 
 #ifdef XBYAK_USE_MMAP_ALLOCATOR
+#ifdef XBYAK_USE_MAP_JIT
+namespace util {
+
+inline int getMacOsVersionPure()
+{
+	char buf[64];
+	size_t size = sizeof(buf);
+	int err = sysctlbyname("kern.osrelease", buf, &size, NULL, 0);
+	if (err != 0) return 0;
+	char *endp;
+	int major = strtol(buf, &endp, 10);
+	if (*endp != '.') return 0;
+	return major;
+}
+
+inline int getMacOsVersion()
+{
+	static const int version = getMacOsVersionPure();
+	return version;
+}
+
+} // util
+#endif
 class MmapAllocator : Allocator {
 	typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, size_t> SizeList;
 	SizeList sizeList_;
@@ -333,7 +363,11 @@ class MmapAllocator : Allocator {
 	{
 		const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
 		size = (size + alignedSizeM1) & ~alignedSizeM1;
-#ifdef MAP_ANONYMOUS
+#if defined(XBYAK_USE_MAP_JIT)
+		int mode = MAP_PRIVATE | MAP_ANONYMOUS;
+		const int mojaveVersion = 18;
+		if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT;
+#elif defined(MAP_ANONYMOUS)
 		const int mode = MAP_PRIVATE | MAP_ANONYMOUS;
 #elif defined(MAP_ANON)
 		const int mode = MAP_PRIVATE | MAP_ANON;
@@ -1714,6 +1748,14 @@ class CodeGenerator : public CodeArray {
 		db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
 		opAddr(addr, reg.getIdx(), immSize);
 	}
+	void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
+	{
+		if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
+		if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
+		rex(addr, reg);
+		db(code0); if (code1 != NONE) db(code1);
+		opAddr(addr, reg.getIdx());
+	}
 	void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
 	{
 		if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
@@ -2185,6 +2227,28 @@ class CodeGenerator : public CodeArray {
 		if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING);
 		opVex(x, 0, addr, type, code);
 	}
+	void opInOut(const Reg& a, const Reg& d, uint8 code)
+	{
+		if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) {
+			switch (a.getBit()) {
+			case 8: db(code); return;
+			case 16: db(0x66); db(code + 1); return;
+			case 32: db(code + 1); return;
+			}
+		}
+		throw Error(ERR_BAD_COMBINATION);
+	}
+	void opInOut(const Reg& a, uint8 code, uint8 v)
+	{
+		if (a.getIdx() == Operand::AL) {
+			switch (a.getBit()) {
+			case 8: db(code); db(v); return;
+			case 16: db(0x66); db(code + 1); db(v); return;
+			case 32: db(code + 1); db(v); return;
+			}
+		}
+		throw Error(ERR_BAD_COMBINATION);
+	}
 public:
 	unsigned int getVersion() const { return VERSION; }
 	using CodeArray::db;
diff --git a/src/xbyak/xbyak_mnemonic.h b/src/xbyak/xbyak_mnemonic.h
index 893a588a..457a6414 100644
--- a/src/xbyak/xbyak_mnemonic.h
+++ b/src/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "5.802"; }
+const char *getVersionString() const { return "5.85"; }
 void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -58,7 +58,9 @@ void cdq() { db(0x99); }
 void clc() { db(0xF8); }
 void cld() { db(0xFC); }
 void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
+void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); }
 void cli() { db(0xFA); }
+void clzero() { db(0x0F); db(0x01); db(0xFC); }
 void cmc() { db(0xF5); }
 void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524
 void cmovae(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }//-V524
@@ -170,6 +172,7 @@ void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM
 void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
 void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
 void emms() { db(0x0F); db(0x77); }
+void enter(uint16 x, uint8 y) { db(0xC8); dw(x); db(y); }
 void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); }
 void f2xm1() { db(0xD9); db(0xF0); }
 void fabs() { db(0xD9); db(0xE1); }
@@ -179,7 +182,10 @@ void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC
 void faddp() { db(0xDE); db(0xC1); }
 void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); }
 void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
+void fbld(const Address& addr) { opModM(addr, Reg32(4), 0xDF, 0x100); }
+void fbstp(const Address& addr) { opModM(addr, Reg32(6), 0xDF, 0x100); }
 void fchs() { db(0xD9); db(0xE0); }
+void fclex() { db(0x9B); db(0xDB); db(0xE2); }
 void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); }
 void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); }
 void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); }
@@ -240,6 +246,7 @@ void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); }
 void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); }
 void fld1() { db(0xD9); db(0xE8); }
 void fldcw(const Address& addr) { opModM(addr, Reg32(5), 0xD9, 0x100); }
+void fldenv(const Address& addr) { opModM(addr, Reg32(4), 0xD9, 0x100); }
 void fldl2e() { db(0xD9); db(0xEA); }
 void fldl2t() { db(0xD9); db(0xE9); }
 void fldlg2() { db(0xD9); db(0xEC); }
@@ -252,22 +259,33 @@ void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC
 void fmulp() { db(0xDE); db(0xC9); }
 void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); }
 void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); }
+void fnclex() { db(0xDB); db(0xE2); }
 void fninit() { db(0xDB); db(0xE3); }
 void fnop() { db(0xD9); db(0xD0); }
+void fnsave(const Address& addr) { opModM(addr, Reg32(6), 0xDD, 0x100); }
+void fnstcw(const Address& addr) { opModM(addr, Reg32(7), 0xD9, 0x100); }
+void fnstenv(const Address& addr) { opModM(addr, Reg32(6), 0xD9, 0x100); }
+void fnstsw(const Address& addr) { opModM(addr, Reg32(7), 0xDD, 0x100); }
+void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0xDF); db(0xE0); }
 void fpatan() { db(0xD9); db(0xF3); }
 void fprem() { db(0xD9); db(0xF8); }
 void fprem1() { db(0xD9); db(0xF5); }
 void fptan() { db(0xD9); db(0xF2); }
 void frndint() { db(0xD9); db(0xFC); }
+void frstor(const Address& addr) { opModM(addr, Reg32(4), 0xDD, 0x100); }
+void fsave(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xDD, 0x100); }
 void fscale() { db(0xD9); db(0xFD); }
 void fsin() { db(0xD9); db(0xFE); }
 void fsincos() { db(0xD9); db(0xFB); }
 void fsqrt() { db(0xD9); db(0xFA); }
 void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); }
 void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); }
-void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); }
+void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, 0x100); }
+void fstenv(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xD9, 0x100); }
 void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
 void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
+void fstsw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xDD, 0x100); }
+void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0x9B); db(0xDF); db(0xE0); }
 void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
 void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); }
 void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
@@ -294,6 +312,7 @@ void fwait() { db(0x9B); }
 void fxam() { db(0xD9); db(0xE5); }
 void fxch() { db(0xD9); db(0xC9); }
 void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
+void fxrstor(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xAE); }
 void fxtract() { db(0xD9); db(0xF4); }
 void fyl2x() { db(0xD9); db(0xF1); }
 void fyl2xp1() { db(0xD9); db(0xF9); }
@@ -306,8 +325,12 @@ void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXM
 void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
 void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
 void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); }
+void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }
+void in_(const Reg& a, uint8 v) { opInOut(a, 0xE4, v); }
 void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
 void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void int3() { db(0xCC); }
+void int_(uint8 x) { db(0xCD); db(x); }
 void ja(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524
 void ja(const char *label, LabelType type = T_AUTO) { ja(std::string(label), type); }//-V524
 void ja(const void *addr) { opJmpAbs(addr, T_NEAR, 0x77, 0x87, 0x0F); }//-V524
@@ -432,8 +455,24 @@ void lahf() { db(0x9F); }
 void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }
 void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
 void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }
+void leave() { db(0xC9); }
 void lfence() { db(0x0F); db(0xAE); db(0xE8); }
+void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); }
+void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB5); }
 void lock() { db(0xF0); }
+void lodsb() { db(0xAC); }
+void lodsd() { db(0xAD); }
+void lodsw() { db(0x66); db(0xAD); }
+void loop(const Label& label) { opJmp(label, T_SHORT, 0xE2, 0, 0); }
+void loop(const char *label) { loop(std::string(label)); }
+void loop(std::string label) { opJmp(label, T_SHORT, 0xE2, 0, 0); }
+void loope(const Label& label) { opJmp(label, T_SHORT, 0xE1, 0, 0); }
+void loope(const char *label) { loope(std::string(label)); }
+void loope(std::string label) { opJmp(label, T_SHORT, 0xE1, 0, 0); }
+void loopne(const Label& label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
+void loopne(const char *label) { loopne(std::string(label)); }
+void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
+void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); }
 void lzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); }
 void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66);  opModR(reg1, reg2, 0x0F, 0xF7); }
 void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }
@@ -447,6 +486,7 @@ void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXM
 void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); }
 void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); }
 void monitor() { db(0x0F); db(0x01); db(0xC8); }
+void monitorx() { db(0x0F); db(0x01); db(0xFA); }
 void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); }
 void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); }
 void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); }
@@ -503,12 +543,18 @@ void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM
 void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); }
 void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf6, true); }
 void mwait() { db(0x0F); db(0x01); db(0xC9); }
+void mwaitx() { db(0x0F); db(0x01); db(0xFB); }
 void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); }
 void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); }
 void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
 void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }
 void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); }
 void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); }
+void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }
+void out_(uint8 v, const Reg& a) { opInOut(a, 0xE6, v); }
+void outsb() { db(0x6E); }
+void outsd() { db(0x6F); }
+void outsw() { db(0x66); db(0x6F); }
 void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); }
 void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); }
 void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); }
@@ -666,6 +712,10 @@ void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER
 void rdtsc() { db(0x0F); db(0x31); }
 void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
 void rep() { db(0xF3); }
+void repe() { db(0xF3); }
+void repne() { db(0xF2); }
+void repnz() { db(0xF2); }
+void repz() { db(0xF3); }
 void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
 void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
 void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
@@ -757,6 +807,8 @@ void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM
 void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); }
 void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); }
 void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); }
+void sysenter() { db(0x0F); db(0x34); }
+void sysexit() { db(0x0F); db(0x35); }
 void tzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); }
 void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); }
 void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); }
@@ -1554,10 +1606,16 @@ void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
 void cdqe() { db(0x48); db(0x98); }
 void cqo() { db(0x48); db(0x99); }
 void cmpsq() { db(0x48); db(0xA7); }
+void popfq() { db(0x9D); }
+void pushfq() { db(0x9C); }
+void lodsq() { db(0x48); db(0xAD); }
 void movsq() { db(0x48); db(0xA5); }
 void scasq() { db(0x48); db(0xAF); }
 void stosq() { db(0x48); db(0xAB); }
+void syscall() { db(0x0F); db(0x05); }
+void sysret() { db(0x0F); db(0x07); }
 void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
+void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); }
 void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
 void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
 void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }
@@ -1580,12 +1638,15 @@ void aam() { db(0xD4); db(0x0A); }
 void aas() { db(0x3F); }
 void daa() { db(0x27); }
 void das() { db(0x2F); }
+void into() { db(0xCE); }
 void popad() { db(0x61); }
 void popfd() { db(0x9D); }
 void pusha() { db(0x60); }
 void pushad() { db(0x60); }
 void pushfd() { db(0x9C); }
 void popa() { db(0x61); }
+void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC5, 0x100); }
+void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC4, 0x100); }
 #endif
 #ifndef XBYAK_NO_OP_NAMES
 void and(const Operand& op1, const Operand& op2) { and_(op1, op2); }
diff --git a/src/xbyak/xbyak_util.h b/src/xbyak/xbyak_util.h
index 04c661c3..eefd1526 100644
--- a/src/xbyak/xbyak_util.h
+++ b/src/xbyak/xbyak_util.h
@@ -704,12 +704,7 @@ class StackFrame {
 	~StackFrame()
 	{
 		if (!makeEpilog_) return;
-		try {
-			close();
-		} catch (std::exception& e) {
-			printf("ERR:StackFrame %s\n", e.what());
-			exit(1);
-		}
+		close();
 	}
 private:
 	const int *getOrderTbl() const

From ecbc4e403cf26d26999f710b2ea1e651f368a978 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 3 Jan 2020 17:15:28 +0900
Subject: [PATCH 134/553] fix Makefile of pairing_c_min.exe

---
 Makefile           | 2 +-
 sample/pairing_c.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 7c4a0a84..e99ff198 100644
--- a/Makefile
+++ b/Makefile
@@ -367,7 +367,7 @@ ecdsa-wasm:
 bin/emu:
 	$(CXX) -g -o $@ src/fp.cpp src/bn_c256.cpp test/bn_c256_test.cpp -DMCL_DONT_USE_XBYAK -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_64BIT_PORTABLE -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=256 -I./include
 bin/pairing_c_min.exe: sample/pairing_c.c include/mcl/vint.hpp src/fp.cpp include/mcl/bn.hpp
-	$(CXX) -o $@ sample/pairing_c.c src/fp.cpp src/bn_c256.cpp -O3 -g -I./include -fno-threadsafe-statics -DMCL_DONT_USE_XBYAK -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=256 -DMCL_VINT_64BIT_PORTABLE -DCYBOZU_DONT_USE_STRING -DCYBOZU_DONT_USE_EXCEPTION -DNDEBUG # -DMCL_DONT_USE_CSPRNG
+	$(CXX) -std=c++03 -O3 -g -fno-threadsafe-statics -fno-exceptions -fno-rtti -o $@ sample/pairing_c.c src/fp.cpp src/bn_c384_256.cpp -I./include -DMCL_DONT_USE_XBYAK -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=384 -DMCL_VINT_64BIT_PORTABLE -DCYBOZU_DONT_USE_STRING -DCYBOZU_DONT_USE_EXCEPTION -DNDEBUG # -DMCL_DONT_USE_CSPRNG
 
 make_tbl:
 	$(MAKE) ../bls/src/qcoeff-bn254.hpp
diff --git a/sample/pairing_c.c b/sample/pairing_c.c
index a669ec7f..b174dbe8 100644
--- a/sample/pairing_c.c
+++ b/sample/pairing_c.c
@@ -7,7 +7,7 @@ int g_err = 0;
 
 int main()
 {
-	char buf[1024];
+	char buf[1600];
 	const char *aStr = "123";
 	const char *bStr = "456";
 	int ret = mclBn_init(MCL_BLS12_381, MCLBN_COMPILED_TIME_VAR);
@@ -27,16 +27,16 @@ int main()
 
 	ASSERT(!mclBnG1_hashAndMapTo(&P, "this", 4));
 	ASSERT(!mclBnG2_hashAndMapTo(&Q, "that", 4));
-	mclBnG1_getStr(buf, sizeof(buf), &P, 16);
+	ASSERT(mclBnG1_getStr(buf, sizeof(buf), &P, 16));
 	printf("P = %s\n", buf);
-	mclBnG2_getStr(buf, sizeof(buf), &Q, 16);
+	ASSERT(mclBnG2_getStr(buf, sizeof(buf), &Q, 16));
 	printf("Q = %s\n", buf);
 
 	mclBnG1_mul(&aP, &P, &a);
 	mclBnG2_mul(&bQ, &Q, &b);
 
 	mclBn_pairing(&e, &P, &Q);
-	mclBnGT_getStr(buf, sizeof(buf), &e, 16);
+	ASSERT(mclBnGT_getStr(buf, sizeof(buf), &e, 16));
 	printf("e = %s\n", buf);
 	mclBnGT_pow(&e1, &e, &a);
 	mclBn_pairing(&e2, &aP, &Q);

From 8ec855f162f97f64b1bab9838068b1425ad0d39a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 3 Jan 2020 18:05:25 +0900
Subject: [PATCH 135/553] add maptg2_wb19

---
 include/mcl/bn.hpp         |   8 +
 include/mcl/curve_type.h   |   3 +-
 include/mcl/mapto_wb19.hpp | 608 +++++++++++++++++++++++++++++++++++++
 3 files changed, 618 insertions(+), 1 deletion(-)
 create mode 100644 include/mcl/mapto_wb19.hpp

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index b56f6ba9..fd69b4b0 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -314,6 +314,8 @@ struct Compress {
 	}
 };
 
+#include <mcl/mapto_wb19.hpp>
+
 struct MapTo {
 	enum {
 		BNtype,
@@ -330,6 +332,7 @@ struct MapTo {
 	int type_;
 	int mapToMode_;
 	bool useOriginalG2cofactor_;
+	MapToG2_WB19 maptog2_wb19_;
 	MapTo()
 		: type_(0)
 		, mapToMode_(MCL_MAP_TO_MODE_ORIGINAL)
@@ -537,6 +540,11 @@ struct MapTo {
 			mapToMode_ = mode;
 			return true;
 			break;
+		case MCL_MAP_TO_MODE_WB19:
+			mapToMode_ = mode;
+			maptog2_wb19_.init();
+			return true;
+			break;
 		default:
 			return false;
 		}
diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index dca749d2..85ce7a59 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -39,5 +39,6 @@ enum {
 enum {
 	MCL_MAP_TO_MODE_ORIGINAL, // see MapTo::calcBN
 	MCL_MAP_TO_MODE_TRY_AND_INC, // try-and-incremental-x
-	MCL_MAP_TO_MODE_ETH2 // eth2.0 spec
+	MCL_MAP_TO_MODE_ETH2, // eth2.0 spec
+	MCL_MAP_TO_MODE_WB19
 };
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
new file mode 100644
index 00000000..22f3770d
--- /dev/null
+++ b/include/mcl/mapto_wb19.hpp
@@ -0,0 +1,608 @@
+#pragma once
+/**
+	@file
+	@brief map to G2 on BLS12-381 (must be included from mcl/bn.hpp)
+	@author MITSUNARI Shigeo(@herumi)
+	@license modified new BSD license
+	http://opensource.org/licenses/BSD-3-Clause
+	ref. https://eprint.iacr.org/2019/403 , https://github.com/algorand/bls_sigs_ref
+*/
+
+struct MapToG2_WB19 {
+	Fp2 xi;
+	Fp2 Ell2p_a;
+	Fp2 Ell2p_b;
+	Fp half;
+	mpz_class sqrtConst; // (p^2 - 9) / 16
+	Fp2 root4[4];
+	Fp2 etas[4];
+	Fp2 xnum[4];
+	Fp2 xden[3];
+	Fp2 ynum[4];
+	Fp2 yden[4];
+	struct Point {
+		Fp2 x, y, z;
+		bool isZero() const
+		{
+			return z.isZero();
+		}
+	};
+	// should be merged into ec.hpp
+	template<class G>
+	void neg(G& Q, const G& P) const
+	{
+		Q.x = P.x;
+		Fp2::neg(Q.y, P.y);
+		Q.z = P.z;
+	}
+	template<class G>
+	void add(G& R, const G& P, const G& Q) const
+	{
+		if (P.isZero()) {
+			R = Q;
+			return;
+		}
+		if (Q.isZero()) {
+			R = Q;
+			return;
+		}
+		Fp2 Z1Z1, Z2Z2, U1, U2, S1, S2;
+		Fp2::sqr(Z1Z1, P.z);
+		Fp2::sqr(Z2Z2, Q.z);
+		Fp2::mul(U1, P.x, Z2Z2);
+		Fp2::mul(U2, Q.x, Z1Z1);
+		Fp2::mul(S1, P.y, Q.z);
+		S1 *= Z2Z2;
+		Fp2::mul(S2, Q.y, P.z);
+		S2 *= Z1Z1;
+		if (U1 == U2 && S1 == S2) {
+			dbl(R, P);
+			return;
+		}
+		Fp2 H, I, J, rr, V;
+		Fp2::sub(H, U2, U1);
+		Fp2::add(I, H, H);
+		Fp2::sqr(I, I);
+		Fp2::mul(J, H, I);
+		Fp2::sub(rr, S2, S1);
+		rr += rr;
+		Fp2::mul(V, U1, I);
+		Fp2::mul(R.z, P.z, Q.z);
+		R.z *= H;
+		if (R.z.isZero()) {
+			R.x.clear();
+			R.y.clear();
+			return;
+		}
+		R.z += R.z;
+		Fp2::sqr(R.x, rr);
+		R.x -= J;
+		R.x -= V;
+		R.x -= V;
+		Fp2::sub(R.y, V, R.x);
+		R.y *= rr;
+		S1 *= J;
+		R.y -= S1;
+		R.y -= S1;
+	}
+	template<class G>
+	void dbl(G& Q, const G& P) const
+	{
+		Fp2 A, B, C, D, E, F;
+		Fp2::sqr(A, P.x);
+		Fp2::sqr(B, P.y);
+		Fp2::sqr(C, B);
+		Fp2::add(D, P.x, B);
+		Fp2::sqr(D, D);
+		D -= A;
+		D -= C;
+		D += D;
+		Fp2::add(E, A, A);
+		E += A;
+		Fp2::sqr(F, E);
+		Fp2::sub(Q.x, F, D);
+		Q.x -= D;
+		Fp2::mul(Q.z, P.y, P.z);
+		if (Q.z.isZero()) {
+			Q.x.clear();
+			Q.y.clear();
+			return;
+		}
+		Q.z += Q.z;
+		Fp2::sub(Q.y, D, Q.x);
+		Q.y *= E;
+		C += C;
+		C += C;
+		C += C;
+		Q.y -= C;
+	}
+	// P is on y^2 = x^3 + Ell2p_a x + Ell2p_b
+	bool isValidPoint(const Point& P) const
+	{
+    Fp2 y2, x2, z2, z4, t;
+    Fp2::sqr(x2, P.x);
+    Fp2::sqr(y2, P.y);
+    Fp2::sqr(z2, P.z);
+    Fp2::sqr(z4, z2);
+    Fp2::mul(t, z4, Ell2p_a);
+    t += x2;
+    t *= P.x;
+    z4 *= z2;
+    z4 *= Ell2p_b;
+    t += z4;
+    return y2 == t;
+	}
+	void init()
+	{
+		bool b;
+		xi.a = -2;
+		xi.b = -1;
+		Ell2p_a.a = 0;
+		Ell2p_a.b = 240;
+		Ell2p_b.a = 1012;
+		Ell2p_b.b = 1012;
+		half = -1;
+		half /= 2;
+		sqrtConst = Fp::getOp().mp;
+		sqrtConst *= sqrtConst;
+		sqrtConst -= 9;
+		sqrtConst /= 16;
+		const char *rv1Str = "0x6af0e0437ff400b6831e36d6bd17ffe48395dabc2d3435e77f76e17009241c5ee67992f72ec05f4c81084fbede3cc09";
+		root4[0].a = 1;
+		root4[0].b.clear();
+		root4[1].a.clear();
+		root4[1].b = 1;
+		root4[2].a.setStr(&b, rv1Str);
+		assert(b); (void)b;
+		root4[2].b = root4[2].a;
+		root4[3].a = root4[2].a;
+		Fp::neg(root4[3].b, root4[3].a);
+		const char *ev1Str = "0x699be3b8c6870965e5bf892ad5d2cc7b0e85a117402dfd83b7f4a947e02d978498255a2aaec0ac627b5afbdf1bf1c90";
+		const char *ev2Str = "0x8157cd83046453f5dd0972b6e3949e4288020b5b8a9cc99ca07e27089a2ce2436d965026adad3ef7baba37f2183e9b5";
+		const char *ev3Str = "0xab1c2ffdd6c253ca155231eb3e71ba044fd562f6f72bc5bad5ec46a0b7a3b0247cf08ce6c6317f40edbc653a72dee17";
+		const char *ev4Str = "0xaa404866706722864480885d68ad0ccac1967c7544b447873cc37e0181271e006df72162a3d3e0287bf597fbf7f8fc1";
+		Fp& ev1 = etas[0].a;
+		Fp& ev2 = etas[0].b;
+		Fp& ev3 = etas[2].a;
+		Fp& ev4 = etas[2].b;
+		ev1.setStr(&b, ev1Str);
+		assert(b); (void)b;
+		ev2.setStr(&b, ev2Str);
+		assert(b); (void)b;
+		Fp::neg(etas[1].a, ev2);
+		etas[1].b = ev1;
+		ev3.setStr(&b, ev3Str);
+		assert(b); (void)b;
+		ev4.setStr(&b, ev4Str);
+		assert(b); (void)b;
+		Fp::neg(etas[3].a, ev4);
+		etas[3].b = ev3;
+		init_iso();
+	}
+	void init_iso()
+	{
+		const char *tbl[] = {
+			"0x5c759507e8e333ebb5b7a9a47d7ed8532c52d39fd3a042a88b58423c50ae15d5c2638e343d9c71c6238aaaaaaaa97d6",
+			"0x11560bf17baa99bc32126fced787c88f984f87adf7ae0c7f9a208c6b4f20a4181472aaa9cb8d555526a9ffffffffc71a",
+			"0x11560bf17baa99bc32126fced787c88f984f87adf7ae0c7f9a208c6b4f20a4181472aaa9cb8d555526a9ffffffffc71e",
+			"0x8ab05f8bdd54cde190937e76bc3e447cc27c3d6fbd7063fcd104635a790520c0a395554e5c6aaaa9354ffffffffe38d",
+			"0x171d6541fa38ccfaed6dea691f5fb614cb14b4e7f4e810aa22d6108f142b85757098e38d0f671c7188e2aaaaaaaa5ed1",
+			"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaa63",
+			"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaa9f",
+			"0x1530477c7ab4113b59a4c18b076d11930f7da5d4a07f649bf54439d87d27e500fc8c25ebf8c92f6812cfc71c71c6d706",
+			"0x5c759507e8e333ebb5b7a9a47d7ed8532c52d39fd3a042a88b58423c50ae15d5c2638e343d9c71c6238aaaaaaaa97be",
+			"0x11560bf17baa99bc32126fced787c88f984f87adf7ae0c7f9a208c6b4f20a4181472aaa9cb8d555526a9ffffffffc71c",
+			"0x8ab05f8bdd54cde190937e76bc3e447cc27c3d6fbd7063fcd104635a790520c0a395554e5c6aaaa9354ffffffffe38f",
+			"0x124c9ad43b6cf79bfbf7043de3811ad0761b0f37a1e26286b0e977c69aa274524e79097a56dc4bd9e1b371c71c718b10",
+			"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffa8fb",
+			"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffa9d3",
+			"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaa99",
+		};
+		bool b;
+		xnum[0].a.setStr(&b, tbl[0]); assert(b); (void)b;
+		xnum[0].b = xnum[0].a;
+		xnum[1].a.clear();
+		xnum[1].b.setStr(&b, tbl[1]); assert(b); (void)b;
+		xnum[2].a.setStr(&b, tbl[2]); assert(b); (void)b;
+		xnum[2].b.setStr(&b, tbl[3]); assert(b); (void)b;
+		xnum[3].a.setStr(&b, tbl[4]); assert(b); (void)b;
+		xnum[3].b.clear();
+		xden[0].a.clear();
+		xden[0].b.setStr(&b, tbl[5]); assert(b); (void)b;
+		xden[1].a = 0xc;
+		xden[1].b.setStr(&b, tbl[6]); assert(b); (void)b;
+		xden[2].a = 1;
+		xden[2].b = 0;
+		ynum[0].a.setStr(&b, tbl[7]); assert(b); (void)b;
+		ynum[0].b = ynum[0].a;
+		ynum[1].a.clear();
+		ynum[1].b.setStr(&b, tbl[8]); assert(b); (void)b;
+		ynum[2].a.setStr(&b, tbl[9]); assert(b); (void)b;
+		ynum[2].b.setStr(&b, tbl[10]); assert(b); (void)b;
+		ynum[3].a.setStr(&b, tbl[11]); assert(b); (void)b;
+		ynum[3].b.clear();
+		yden[0].a.setStr(&b, tbl[12]); assert(b); (void)b;
+		yden[0].b = yden[0].a;
+		yden[1].a.clear();
+		yden[1].b.setStr(&b, tbl[13]); assert(b); (void)b;
+		yden[2].a = 0x12;
+		yden[2].b.setStr(&b, tbl[14]); assert(b); (void)b;
+		yden[3].a = 1;
+		yden[3].b.clear();
+	}
+	template<size_t N>
+	void evalPoly(Fp2& y, const Fp2& x, const Fp2 *zpows, const Fp2 (&cof)[N]) const
+	{
+		Fp2::mul(y, zpows[0], cof[N - 1]);
+		for (size_t i = 1; i < N; i++) {
+			y *= x;
+			Fp2 t;
+			Fp2::mul(t, zpows[i], cof[N - 1 - i]);
+			y += t;
+		}
+	}
+	// refer (xnum, xden, ynum, yden)
+	void iso3(G2& Q, const Point& P) const
+	{
+		Fp2 zpows[4];
+		zpows[0] = 1;
+		Fp2::sqr(zpows[1], P.z);
+		Fp2::sqr(zpows[2], zpows[1]);
+		Fp2::mul(zpows[3], zpows[2], zpows[1]);
+		Fp2 mapvals[4];
+		evalPoly(mapvals[0], P.x, zpows, xnum);
+		evalPoly(mapvals[1], P.x, zpows, xden);
+		evalPoly(mapvals[2], P.x, zpows, ynum);
+		evalPoly(mapvals[3], P.x, zpows, yden);
+		mapvals[1] *= zpows[1];
+		mapvals[2] *= P.y;
+		mapvals[3] *= zpows[1];
+		mapvals[3] *= P.z;
+		Fp2::mul(Q.z, mapvals[1], mapvals[3]);
+		Fp2::mul(Q.x, mapvals[0], mapvals[3]);
+		Q.x *= Q.z;
+		Fp2 t;
+		Fp2::sqr(t, Q.z);
+		Fp2::mul(Q.y, mapvals[2], mapvals[1]);
+		Q.y *= t;
+	}
+	/*
+		(a+bi)*(-2-i) = (b-2a)-(a+2b)i
+	*/
+	void mul_xi(Fp2& y, const Fp2& x) const
+	{
+		Fp t;
+		Fp::sub(t, x.b, x.a);
+		t -= x.a;
+		Fp::add(y.b, x.b, x.b);
+		y.b += x.a;
+		Fp::neg(y.b, y.b);
+		y.a = t;
+	}
+	bool isNegSign(const Fp2& x) const
+	{
+		if (x.b > half) return true;
+		if (!x.b.isZero()) return false;
+		if (x.a > half) return true;
+		if (!x.b.isZero()) return false;
+		return false;
+	}
+	void osswu2_help(Point& P, const Fp2& t) const
+	{
+		Fp2 t2, t2xi;
+		Fp2::sqr(t2, t);
+		Fp2 den, den2;
+//		Fp2::mul(t2xi, t2, xi);
+		mul_xi(t2xi, t2);
+		den = t2xi;
+		Fp2::sqr(den2, den);
+		// (t^2 * xi)^2 + (t^2 * xi)
+		den += den2;
+		Fp2 x0_num, x0_den;
+		Fp2::add(x0_num, den, 1);
+		x0_num *= Ell2p_b;
+		if (den.isZero()) {
+			Fp2::mul(x0_den, Ell2p_a, xi);
+		} else {
+			Fp2::mul(x0_den, -Ell2p_a, den);
+		}
+		Fp2 x0_den2, x0_den3, gx0_den, gx0_num;
+		Fp2::sqr(x0_den2, x0_den);
+		Fp2::mul(x0_den3, x0_den2, x0_den);
+		gx0_den = x0_den3;
+
+		Fp2::mul(gx0_num, Ell2p_b, gx0_den);
+		Fp2 tmp, tmp1, tmp2;
+		Fp2::mul(tmp, Ell2p_a, x0_num);
+		tmp *= x0_den2;
+		gx0_num += tmp;
+		Fp2::sqr(tmp, x0_num);
+		tmp *= x0_num;
+		gx0_num += tmp;
+
+		Fp2::sqr(tmp1, gx0_den); // x^2
+		Fp2::sqr(tmp2, tmp1); // x^4
+		tmp1 *= tmp2;
+		tmp1 *= gx0_den; // x^7
+		Fp2::mul(tmp2, gx0_num, tmp1);
+		tmp1 *= tmp2;
+		tmp1 *= gx0_den;
+		Fp2 candi;
+		Fp2::pow(candi, tmp1, sqrtConst);
+		candi *= tmp2;
+		bool isNegT = isNegSign(t);
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(root4); i++) {
+			Fp2::mul(P.y, candi, root4[i]);
+			Fp2::sqr(tmp, P.y);
+			tmp *= gx0_den;
+			if (tmp == gx0_num) {
+				if (isNegSign(P.y) != isNegT) {
+					Fp2::neg(P.y, P.y);
+				}
+				Fp2::mul(P.x, x0_num, x0_den);
+				P.y *= x0_den3;
+				P.z = x0_den;
+				return;
+			}
+		}
+		Fp2 x1_num, x1_den, gx1_num, gx1_den;
+		Fp2::mul(x1_num, t2xi, x0_num);
+		x1_den = x0_den;
+		Fp2::mul(gx1_num, den2, t2xi);
+		gx1_num *= gx0_num;
+		gx1_den = gx0_den;
+		candi *= t2;
+		candi *= t;
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(etas); i++) {
+			Fp2::mul(P.y, candi, etas[i]);
+			Fp2::sqr(tmp, P.y);
+			tmp *= gx1_den;
+			if (tmp == gx1_num) {
+				if (isNegSign(P.y) != isNegT) {
+					Fp2::neg(P.y, P.y);
+				}
+				Fp2::mul(P.x, x1_num, x1_den);
+				Fp2::sqr(tmp, x1_den);
+				P.y *= tmp;
+				P.y *= x1_den;
+				P.z = x1_den;
+				return;
+			}
+		}
+		assert(0);
+	}
+	void h2_chain(G2& t1, const G2& P) const
+	{
+		G2 t0, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
+		t0 = P;
+		dbl(t1, t0);
+		add(t4, t1, t0);
+		add(t2, t4, t1);
+		add(t3, t2, t1);
+		add(t11, t3, t1);
+		add(t9, t11, t1);
+		add(t10, t9, t1);
+		add(t5, t10, t1);
+		add(t7, t5, t1);
+		add(t15, t7, t1);
+		add(t13, t15, t1);
+		add(t6, t13, t1);
+		add(t14, t6, t1);
+		add(t12, t14, t1);
+		add(t8, t12, t1);
+		dbl(t1, t6);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t13);
+		for (size_t i = 0; i < 2; i++) dbl(t1, t1);
+		add(t1, t1, t0);
+		for (size_t i = 0; i < 9; i++) dbl(t1, t1);
+		add(t1, t1, t8);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t11);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t13);
+		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
+		add(t1, t1, t2);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t5);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t0);
+		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
+		add(t1, t1, t11);
+		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
+		add(t1, t1, t8);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t2);
+		for (size_t i = 0; i < 9; i++) dbl(t1, t1);
+		add(t1, t1, t5);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t11);
+		for (size_t i = 0; i < 2; i++) dbl(t1, t1);
+		add(t1, t1, t0);
+		for (size_t i = 0; i < 9; i++) dbl(t1, t1);
+		add(t1, t1, t8);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t13);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t0);
+		for (size_t i = 0; i < 11; i++) dbl(t1, t1);
+		add(t1, t1, t9);
+		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
+		add(t1, t1, t12);
+		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
+		add(t1, t1, t7);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t12);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t14);
+		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
+		add(t1, t1, t13);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t0);
+		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
+		add(t1, t1, t9);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t13);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t10);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t2);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t10);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t2);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t0);
+		for (size_t i = 0; i < 10; i++) dbl(t1, t1);
+		add(t1, t1, t9);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t14);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t9);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t15);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t8);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t12);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t5);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t15);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t2);
+		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
+		add(t1, t1, t5);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t9);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t15);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t14);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t8);
+		for (size_t i = 0; i < 10; i++) dbl(t1, t1);
+		add(t1, t1, t6);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t5);
+		for (size_t i = 0; i < 3; i++) dbl(t1, t1);
+		add(t1, t1, t0);
+		for (size_t i = 0; i < 9; i++) dbl(t1, t1);
+		add(t1, t1, t13);
+		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
+		add(t1, t1, t12);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t5);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t2);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t11);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t10);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t4);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t10);
+		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
+		add(t1, t1, t7);
+		for (size_t i = 0; i < 3; i++) dbl(t1, t1);
+		add(t1, t1, t2);
+		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
+		add(t1, t1, t9);
+		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
+		add(t1, t1, t9);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t8);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t7);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t6);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t5);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t4);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t5);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t4);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t4);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t5);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t4);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 3; i++) dbl(t1, t1);
+		add(t1, t1, t0);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
+		add(t1, t1, t3);
+		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
+		add(t1, t1, t2);
+	}
+	void mx_chain(G2& Q, const G2& P) const
+	{
+		G2 T;
+		dbl(T, P);
+		const size_t tbl[] = { 2, 3, 9, 32, 16 };
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+			add(T, T, P);
+			for (size_t j = 0; j < tbl[i]; j++) {
+				dbl(T, T);
+			}
+		}
+		Q = T;
+	}
+	void clear_h2(G2& Q, const G2& P) const
+	{
+#if 0
+		mcl::bn::BN::param.mapTo.mulByCofactorBLS12fast(Q, P);
+#else
+		G2 work, work2;
+		h2_chain(work, P);
+		dbl(work2, work);
+		add(work2, work, work2);
+		mx_chain(work, work2);
+		mx_chain(work, work);
+		neg(work2, work2);
+		add(Q, work, work2);
+#endif
+	}
+	void opt_swu2_map(G2& P, const Fp2& t, const Fp2 *t2 = 0) const
+	{
+		Point Pp;
+		osswu2_help(Pp, t);
+		if (t2) {
+			Point P2;
+			osswu2_help(P2, *t2);
+			add(Pp, Pp, P2);
+		}
+		iso3(P, Pp);
+		clear_h2(P, P);
+	}
+};
+

From 5200c33d446f2b62a2a80198099d7c2649ef6a95 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 8 Jan 2020 12:58:40 +0900
Subject: [PATCH 136/553] update cybozulib

---
 include/cybozu/option.hpp | 24 ++++++++++++++++++++++--
 include/cybozu/sha2.hpp   | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/include/cybozu/option.hpp b/include/cybozu/option.hpp
index f7fa1ba0..5dac705c 100644
--- a/include/cybozu/option.hpp
+++ b/include/cybozu/option.hpp
@@ -145,7 +145,7 @@ bool convertInt(T* x, const char *str)
 	if (factor > 1) {
 		if ((std::numeric_limits<T>::min)() / factor <= y
 			&& y <= (std::numeric_limits<T>::max)() / factor) {
-			*x = y * factor;
+			*x = static_cast<T>(y * factor);
 		} else {
 			return false;
 		}
@@ -155,9 +155,29 @@ bool convertInt(T* x, const char *str)
 	return true;
 }
 
+template<class T>
+void convertToStr(std::ostream& os, const T* p)
+{
+	os << *p;
+}
+template<>inline void convertToStr(std::ostream& os, const int8_t* p)
+{
+	os << static_cast<int>(*p);
+}
+template<>inline void convertToStr(std::ostream& os, const uint8_t* p)
+{
+	os << static_cast<int>(*p);
+}
+
 #define CYBOZU_OPTION_DEFINE_CONVERT_INT(type) \
 template<>inline bool convert(type* x, const char *str) { return convertInt(x, str); }
 
+CYBOZU_OPTION_DEFINE_CONVERT_INT(int8_t)
+CYBOZU_OPTION_DEFINE_CONVERT_INT(uint8_t)
+
+CYBOZU_OPTION_DEFINE_CONVERT_INT(int16_t)
+CYBOZU_OPTION_DEFINE_CONVERT_INT(uint16_t)
+
 CYBOZU_OPTION_DEFINE_CONVERT_INT(int)
 CYBOZU_OPTION_DEFINE_CONVERT_INT(long)
 CYBOZU_OPTION_DEFINE_CONVERT_INT(long long)
@@ -185,7 +205,7 @@ struct Holder : public HolderBase {
 	std::string toStr() const
 	{
 		std::ostringstream os;
-		os << *p_;
+		convertToStr(os, p_);
 		return os.str();
 	}
 	const void *get() const { return (void*)p_; }
diff --git a/include/cybozu/sha2.hpp b/include/cybozu/sha2.hpp
index 335a8975..f8630bd0 100644
--- a/include/cybozu/sha2.hpp
+++ b/include/cybozu/sha2.hpp
@@ -13,6 +13,7 @@
 #ifndef CYBOZU_DONT_USE_STRING
 #include <string>
 #endif
+#include <memory.h>
 
 #ifdef CYBOZU_USE_OPENSSL_SHA
 #ifdef __APPLE__
@@ -468,3 +469,38 @@ class Sha512 : public sha2_local::Common<Sha512> {
 } // cybozu
 
 #endif
+
+namespace cybozu {
+
+/*
+	HMAC-SHA-256
+	hmac must have 32 bytes buffer
+*/
+inline void hmac256(void *hmac, const void *key, size_t keySize, const void *msg, size_t msgSize)
+{
+	const uint8_t ipad = 0x36;
+	const uint8_t opad = 0x5c;
+	uint8_t k[64];
+	Sha256 hash;
+	if (keySize > 64) {
+		hash.digest(k, 32, key, keySize);
+		hash.clear();
+		keySize = 32;
+	} else {
+		memcpy(k, key, keySize);
+	}
+	for (size_t i = 0; i < keySize; i++) {
+		k[i] = k[i] ^ ipad;
+	}
+	memset(k + keySize, ipad, 64 - keySize);
+	hash.update(k, 64);
+	hash.digest(hmac, 32, msg, msgSize);
+	hash.clear();
+	for (size_t i = 0; i < 64; i++) {
+		k[i] = k[i] ^ (ipad ^ opad);
+	}
+	hash.update(k, 64);
+	hash.digest(hmac, 32, hmac, 32);
+}
+
+} // cybozu

From 5f884f86aef91dd47b622804fd509c7d70526434 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 13 Jan 2020 11:26:07 +0900
Subject: [PATCH 137/553] add hmac256addZeroByte

---
 include/cybozu/sha2.hpp | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/include/cybozu/sha2.hpp b/include/cybozu/sha2.hpp
index f8630bd0..f0029261 100644
--- a/include/cybozu/sha2.hpp
+++ b/include/cybozu/sha2.hpp
@@ -472,11 +472,9 @@ class Sha512 : public sha2_local::Common<Sha512> {
 
 namespace cybozu {
 
-/*
-	HMAC-SHA-256
-	hmac must have 32 bytes buffer
-*/
-inline void hmac256(void *hmac, const void *key, size_t keySize, const void *msg, size_t msgSize)
+namespace sha2_local {
+
+inline void hmac256_inner(void *hmac, const void *key, size_t keySize, const void *msg, size_t msgSize, bool addZeroByte)
 {
 	const uint8_t ipad = 0x36;
 	const uint8_t opad = 0x5c;
@@ -494,7 +492,13 @@ inline void hmac256(void *hmac, const void *key, size_t keySize, const void *msg
 	}
 	memset(k + keySize, ipad, 64 - keySize);
 	hash.update(k, 64);
-	hash.digest(hmac, 32, msg, msgSize);
+	if (addZeroByte) {
+		hash.update(msg, msgSize);
+		const char zero = '\x00';
+		hash.digest(hmac, 32, &zero, 1);
+	} else {
+		hash.digest(hmac, 32, msg, msgSize);
+	}
 	hash.clear();
 	for (size_t i = 0; i < 64; i++) {
 		k[i] = k[i] ^ (ipad ^ opad);
@@ -503,4 +507,23 @@ inline void hmac256(void *hmac, const void *key, size_t keySize, const void *msg
 	hash.digest(hmac, 32, hmac, 32);
 }
 
+} // cybozu::sha2_local
+
+/*
+	HMAC-SHA-256
+	hmac must have 32 bytes buffer
+*/
+inline void hmac256(void *hmac, const void *key, size_t keySize, const void *msg, size_t msgSize)
+{
+	sha2_local::hmac256_inner(hmac, key, keySize, msg, msgSize, false);
+}
+
+/*
+	hmac256 for [msg] + [\x00]
+*/
+inline void hmac256addZeroByte(void *hmac, const void *key, size_t keySize, const void *msg, size_t msgSize)
+{
+	sha2_local::hmac256_inner(hmac, key, keySize, msg, msgSize, true);
+}
+
 } // cybozu

From 88779335927a360244be492ec564c8b8ad1b7fcb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 15 Jan 2020 14:54:40 +0900
Subject: [PATCH 138/553] add cybozu/file.hpp

---
 include/cybozu/file.hpp | 621 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 621 insertions(+)
 create mode 100644 include/cybozu/file.hpp

diff --git a/include/cybozu/file.hpp b/include/cybozu/file.hpp
new file mode 100644
index 00000000..ff17b6fc
--- /dev/null
+++ b/include/cybozu/file.hpp
@@ -0,0 +1,621 @@
+#pragma once
+/**
+	@file
+	@brief file class and operations
+
+	@author MITSUNARI Shigeo(@herumi)
+*/
+
+#include <assert.h>
+#include <sys/stat.h> // for stat
+#include <cybozu/exception.hpp>
+#include <vector>
+#include <ios>
+#ifdef _WIN32
+	#include <shlwapi.h>
+	#include <io.h>
+	#include <fcntl.h>
+	#include <shlobj.h>
+	#include <direct.h>
+	#include <windows.h>
+	#pragma comment(lib, "shlwapi.lib")
+	#pragma comment(lib, "shell32.lib")
+	#pragma comment(lib, "User32.lib")
+#else
+	#include <stdio.h>
+	#include <unistd.h>
+	#include <sys/types.h>
+	#include <fcntl.h>
+	#include <dirent.h>
+#endif
+#ifdef __APPLE__
+	#include <mach-o/dyld.h>
+#endif
+
+namespace cybozu {
+
+class File {
+	std::string name_; // used for only errmsg
+#ifdef _WIN32
+	typedef HANDLE handleType;
+#else
+	typedef int handleType;
+	enum {
+		INVALID_HANDLE_VALUE = -1
+	};
+#endif
+	handleType hdl_;
+	bool doClose_;
+	bool isReadOnly_;
+	File(const File&);
+	void operator=(const File&);
+#ifdef _WIN32
+	void openSub()
+	{
+	}
+#endif
+	void verifyMode(std::ios::openmode mode)
+	{
+		doClose_ = true;
+		bool isCorrectMode = true;
+		if (!!(mode & std::ios::in) == !!(mode & std::ios::out)) {
+			isCorrectMode = false;
+		} else {
+			if (mode & std::ios::in) {
+				isReadOnly_ = true;
+				if ((mode & std::ios::app) || (mode & std::ios::trunc)) isCorrectMode = false;
+			} else {
+				isReadOnly_ = false;
+				if ((mode & std::ios::app) && (mode & std::ios::trunc)) isCorrectMode = false;
+			}
+		}
+		if (!isCorrectMode) {
+			throw cybozu::Exception("File:open:bad mode") << name_ << mode;
+		}
+	}
+#ifdef _WIN32
+	HANDLE createFile(const std::string& name, DWORD access, DWORD share, DWORD disposition)
+	{
+		return  ::CreateFileA(name.c_str(), access, share, NULL, disposition, FILE_ATTRIBUTE_NORMAL, NULL);
+	}
+	HANDLE createFile(const std::wstring& name, DWORD access, DWORD share, DWORD disposition)
+	{
+		return  ::CreateFileW(name.c_str(), access, share, NULL, disposition, FILE_ATTRIBUTE_NORMAL, NULL);
+	}
+	template<class T>
+	void setHandle(const T& name, std::ios::openmode mode)
+	{
+		DWORD access = GENERIC_READ;
+		DWORD disposition = OPEN_EXISTING;
+		DWORD share = FILE_SHARE_READ | FILE_SHARE_WRITE;
+		if (mode & std::ios::out) {
+			access = GENERIC_WRITE;
+			disposition = CREATE_ALWAYS;
+			if (mode & std::ios::app) {
+				disposition = OPEN_ALWAYS;
+			}
+		}
+		hdl_ = createFile(name, access, share, disposition);
+	}
+#else
+	void setHandle(const std::string& name, std::ios::openmode mode)
+	{
+		int flags = O_RDONLY; // | O_NOATIME; /* can't use on NFS */
+		mode_t access = 0644;
+		if (mode & std::ios::out) {
+			flags = O_WRONLY | O_CREAT;
+			if (mode & std::ios::app) {
+				flags |= O_APPEND;
+			} else {
+				flags |= O_TRUNC;
+			}
+		}
+		hdl_ = ::open(name.c_str(), flags, access);
+	}
+#endif
+	template<class T>
+	void openSub(const T& name, std::ios::openmode mode)
+	{
+		if (isOpen()) throw cybozu::Exception("File:open:alread opened") << name_;
+		verifyMode(mode);
+		setHandle(name, mode);
+		if (isOpen()) {
+			if (mode & std::ios::app) {
+				seek(getSize(), std::ios::beg);
+			}
+			return;
+		}
+		throw cybozu::Exception("File:open") << name_ << cybozu::ErrorNo() << static_cast<int>(mode);
+	}
+public:
+	File()
+		: hdl_(INVALID_HANDLE_VALUE)
+		, doClose_(true)
+		, isReadOnly_(false)
+	{
+	}
+	File(const std::string& name, std::ios::openmode mode)
+		: hdl_(INVALID_HANDLE_VALUE)
+		, doClose_(true)
+		, isReadOnly_(false)
+	{
+		open(name, mode);
+	}
+	/*
+		construct with file handle
+		@param hdl [in] file handle
+	*/
+	explicit File(handleType hdl)
+		: hdl_(hdl)
+		, doClose_(false)
+		, isReadOnly_(false)
+
+	{
+	}
+	~File() CYBOZU_NOEXCEPT
+	{
+		if (!doClose_) return;
+		try {
+			sync();
+			close();
+		} catch (std::exception& e) {
+			fprintf(stderr, "File:dstr:%s\n", e.what());
+		} catch (...) {
+			fprintf(stderr, "File:dstr:unknown\n");
+		}
+	}
+	bool isOpen() const CYBOZU_NOEXCEPT { return hdl_ != INVALID_HANDLE_VALUE; }
+	/**
+		support mode
+		always binary mode
+		ios::in : read only
+		ios::out : write only(+truncate)
+		ios::out + ios::app(append)
+	*/
+	void open(const std::string& name, std::ios::openmode mode)
+	{
+		name_ = name;
+		openSub(name, mode);
+	}
+	void openW(const std::string& name)
+	{
+		open(name, std::ios::out | std::ios::binary | std::ios::trunc);
+	}
+	void openR(const std::string& name)
+	{
+		open(name, std::ios::in | std::ios::binary);
+	}
+#ifdef _WIN32
+	void open(const std::wstring& name, std::ios::openmode mode)
+	{
+		name_ = cybozu::exception::wstr2str(name);
+		openSub(name, mode);
+	}
+	File(const std::wstring& name, std::ios::openmode mode)
+		: hdl_(INVALID_HANDLE_VALUE)
+	{
+		open(name, mode);
+	}
+	void openW(const std::wstring& name)
+	{
+		open(name, std::ios::out | std::ios::binary | std::ios::trunc);
+	}
+	void openR(const std::wstring& name)
+	{
+		open(name, std::ios::in | std::ios::binary);
+	}
+#endif
+	void close()
+	{
+		if (!isOpen()) return;
+#ifdef _WIN32
+		bool isOK = ::CloseHandle(hdl_) != 0;
+#else
+		bool isOK = ::close(hdl_) == 0;
+#endif
+		hdl_ = INVALID_HANDLE_VALUE;
+		if (isOK) return;
+		throw cybozu::Exception("File:close") << name_ << cybozu::ErrorNo();
+	}
+	/*
+		sync
+		@param doFullSync [in] call sync(for only Linux)
+	*/
+	void sync(bool doFullSync = false)
+	{
+		cybozu::disable_warning_unused_variable(doFullSync);
+		if (!isOpen()) return;
+		if (isReadOnly_) return;
+#ifdef _WIN32
+		/* fail if isReadOnly_ */
+		if (!::FlushFileBuffers(hdl_)) goto ERR_EXIT;
+#elif defined(__linux__) || defined(__CYGWIN__)
+		if (doFullSync) {
+			if (::fsync(hdl_)) goto ERR_EXIT;
+		} else {
+			if (::fdatasync(hdl_)) goto ERR_EXIT;
+		}
+#else
+		if (::fcntl(hdl_, F_FULLFSYNC)) goto ERR_EXIT;
+#endif
+		return;
+	ERR_EXIT:
+		throw cybozu::Exception("File:sync") << name_ << cybozu::ErrorNo();
+	}
+	void write(const void *buf, size_t bufSize)
+	{
+		const char *p = static_cast<const char *>(buf);
+		while (bufSize > 0) {
+			uint32_t size = static_cast<uint32_t>(std::min<size_t>(0x7fffffff, bufSize));
+#ifdef _WIN32
+			DWORD writeSize;
+			if (!::WriteFile(hdl_, p, size, &writeSize, NULL)) goto ERR_EXIT;
+#else
+			ssize_t writeSize = ::write(hdl_, p, size);
+			if (writeSize < 0) {
+				if (errno == EINTR) continue;
+				goto ERR_EXIT;
+			}
+#endif
+			p += writeSize;
+			bufSize -= writeSize;
+		}
+		return;
+	ERR_EXIT:
+		throw cybozu::Exception("File:write") << name_ << cybozu::ErrorNo();
+	}
+	size_t readSome(void *buf, size_t bufSize)
+	{
+		uint32_t size = static_cast<uint32_t>(std::min<size_t>(0x7fffffff, bufSize));
+#ifdef _WIN32
+		DWORD readSize;
+		if (!::ReadFile(hdl_, buf, size, &readSize, NULL)) goto ERR_EXIT;
+#else
+	RETRY:
+		ssize_t readSize = ::read(hdl_, buf, size);
+		if (readSize < 0) {
+			if (errno == EINTR) goto RETRY;
+			goto ERR_EXIT;
+		}
+#endif
+		return readSize;
+	ERR_EXIT:
+		throw cybozu::Exception("File:read") << name_ << cybozu::ErrorNo();
+	}
+	void read(void *buf, size_t bufSize)
+	{
+		char *p = static_cast<char *>(buf);
+		while (bufSize > 0) {
+			size_t readSize = readSome(p, bufSize);
+			p += readSize;
+			bufSize -= readSize;
+		}
+	}
+	void seek(int64_t pos, std::ios::seek_dir dir)
+	{
+#ifdef _WIN32
+		LARGE_INTEGER largePos;
+		largePos.QuadPart = pos;
+		DWORD posMode = FILE_BEGIN;
+		switch (dir) {
+		case std::ios::beg:
+			posMode = FILE_BEGIN;
+			break;
+		case std::ios::cur:
+			posMode = FILE_CURRENT;
+			break;
+		case std::ios::end:
+			posMode = FILE_END;
+			break;
+		default:
+			__assume(0);
+		}
+		bool isOK = SetFilePointerEx(hdl_, largePos, NULL, posMode) != 0;
+#else
+		int whence;
+		switch (dir) {
+		case std::ios::beg:
+			whence = SEEK_SET;
+			break;
+		case std::ios::cur:
+			whence = SEEK_CUR;
+			break;
+		case std::ios::end:
+		default:
+			whence = SEEK_END;
+			break;
+		}
+		bool isOK = lseek(hdl_, pos, whence) >= 0;
+#endif
+		if (isOK) return;
+		throw cybozu::Exception("File:seek") << name_ << cybozu::ErrorNo() << pos << static_cast<int>(dir);
+	}
+	uint64_t getSize() const
+	{
+		uint64_t fileSize;
+#ifdef _WIN32
+		LARGE_INTEGER size;
+		bool isOK = GetFileSizeEx(hdl_, &size) != 0;
+		fileSize = size.QuadPart;
+#else
+		struct stat stat;
+		bool isOK = fstat(hdl_, &stat) == 0;
+		fileSize = stat.st_size;
+#endif
+		if (isOK) return fileSize;
+		throw cybozu::Exception("File:getSize") << name_ << cybozu::ErrorNo();
+	}
+};
+
+/*
+	name has extension
+*/
+inline bool HasExtension(const std::string& name, const std::string& extension)
+{
+	const size_t extensionSize = extension.size();
+	if (extensionSize == 0) return true;
+	const size_t nameSize = name.size();
+	if (nameSize < extensionSize + 1) return false;
+	const char *p = &name[nameSize - extensionSize - 1];
+	return *p == '.' && memcmp(p + 1, &extension[0], extensionSize) == 0;
+}
+/*
+	split name as basename.suffix
+*/
+inline std::string GetBaseName(const std::string& name, std::string *suffix = 0)
+{
+	size_t pos = name.find_last_of('.');
+	if (pos == std::string::npos) {
+		if (suffix) suffix->clear();
+		return name;
+	}
+	if (suffix) {
+		*suffix = name.substr(pos + 1);
+	}
+	return name.substr(0, pos);
+}
+
+/**
+	replace \ with /
+*/
+inline void ReplaceBackSlash(std::string& str)
+{
+	for (size_t i = 0, n = str.size(); i < n; i++) {
+		if (str[i] == '\\') str[i] = '/';
+	}
+}
+
+/**
+	get exe path and baseNamme
+	@note file name is the form "xxx.exe" then baseName = xxx
+*/
+inline std::string GetExePath(std::string *baseName = 0)
+{
+	std::string path;
+	path.resize(4096);
+#ifdef _WIN32
+	if (!GetModuleFileNameA(NULL, &path[0], static_cast<int>(path.size()) - 2)) {
+		return "";
+	}
+	PathRemoveExtensionA(&path[0]);
+	if (baseName) {
+		*baseName = PathFindFileNameA(&path[0]);
+	}
+	if (::PathRemoveFileSpecA(&path[0])) {
+		::PathAddBackslashA(&path[0]);
+		path[0] = static_cast<char>(tolower(path[0]));
+		path.resize(strlen(&path[0]));
+		ReplaceBackSlash(path);
+	}
+#else
+#if defined(__APPLE__)
+	uint32_t size = (uint32_t)path.size();
+	if (_NSGetExecutablePath(&path[0], &size) != 0) {
+		return "";
+	}
+	path.resize(strlen(&path[0]));
+#else
+	int ret = readlink("/proc/self/exe", &path[0], path.size() - 2);
+	if (ret < 0) return "";
+	path.resize(ret);
+#endif
+	size_t pos = path.find_last_of('/');
+	if (pos != std::string::npos) {
+		if (baseName) {
+			const std::string name = path.substr(pos + 1);
+			std::string suffix;
+			std::string base = GetBaseName(name, &suffix);
+			if (suffix == "exe") {
+				*baseName = base;
+			} else {
+				*baseName = name;
+			}
+		}
+		path.resize(pos + 1);
+	}
+#endif
+	return path;
+}
+
+/**
+	get file size
+*/
+inline uint64_t GetFileSize(const std::string& name)
+{
+#ifdef _WIN32
+	struct __stat64 buf;
+	bool isOK = _stat64(name.c_str(), &buf) == 0;
+#else
+	struct stat buf;
+	bool isOK = stat(name.c_str(), &buf) == 0;
+#endif
+	if (isOK) return buf.st_size;
+	throw cybozu::Exception("GetFileSize") << name << cybozu::ErrorNo();
+}
+
+/**
+	verify whether path exists or not
+*/
+inline bool DoesFileExist(const std::string& path)
+{
+	if (path.empty()) return false;
+	std::string p = path;
+	char c = p[p.size() - 1];
+	if (c == '/' || c == '\\') {
+		p.resize(p.size() - 1);
+	}
+#ifdef _WIN32
+	struct _stat buf;
+	return _stat(p.c_str(), &buf) == 0;
+#else
+	struct stat buf;
+	return stat(p.c_str(), &buf) == 0;
+#endif
+}
+
+inline void RenameFile(const std::string& from, const std::string& to)
+{
+	if (DoesFileExist(to)) {
+		throw cybozu::Exception("RenameFile:file already exist") << from << to;
+	}
+#ifdef _WIN32
+	bool isOK = ::MoveFileExA(from.c_str(), to.c_str(), MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) != 0;
+#else
+	bool isOK = ::rename(from.c_str(), to.c_str()) == 0;
+#endif
+	if (!isOK) {
+		throw cybozu::Exception("RenameFile") << from << to << cybozu::ErrorNo();
+	}
+}
+
+/**
+	remove file
+*/
+inline void RemoveFile(const std::string& name)
+{
+#ifdef _WIN32
+	bool isOK = DeleteFileA(name.c_str()) != 0;
+#else
+	bool isOK = unlink(name.c_str()) == 0;
+#endif
+	if (!isOK) {
+		throw cybozu::Exception("RemoveFile") << name << cybozu::ErrorNo();
+	}
+}
+
+/*
+	remark of isFile()
+	not directory on Windows
+	not contain symbolic link on Linux
+*/
+struct FileInfo {
+	std::string name;
+	uint32_t attr; // dwFileAttributes for Windows, d_type for Linux
+#ifdef _WIN32
+	bool isUnknown() const { return attr == 0; }
+	bool isDirectory() const { verify(); return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0; }
+	bool isFile() const { verify(); return !isDirectory(); }
+#else
+	bool isUnknown() const { return attr == DT_UNKNOWN; }
+	bool isDirectory() const { verify(); return attr == DT_DIR; }
+	bool isFile() const { verify(); return attr == DT_REG; }
+#endif
+	FileInfo() : attr(0) {}
+	FileInfo(const std::string& name, uint32_t attr) : name(name), attr(attr) {}
+	void verify() const
+	{
+		if (isUnknown()) throw cybozu::Exception("FileInfo:unknown attr") << name;
+	}
+};
+
+typedef std::vector<FileInfo> FileList;
+
+
+namespace file_local {
+
+inline void filterAndPush(FileList& list, const FileInfo& fi, const std::string& extension, bool cond(const std::string&, const std::string&))
+{
+	if (fi.name == "." || fi.name == "..") {
+		return;
+	}
+	if (cond(fi.name, extension)) {
+		list.push_back(fi);
+	}
+}
+
+} // cybozu::file_local
+
+/**
+	get file name in dir
+	@param list [out] FileList
+	@param dir [in] directory
+	@param extension [in] select files(including directory) having extension such as "cpp" ; select all if suffix is empty
+	@param cond [in] filter function (select if cond(targetFile, suffix) is true)
+	@note "." and ".." are excluded
+*/
+inline bool GetFileList(FileList &list, const std::string& dir, const std::string& extension = "", bool (*cond)(const std::string&, const std::string&) = cybozu::HasExtension)
+{
+#ifdef _WIN32
+	std::string path = dir + "/*";
+	WIN32_FIND_DATAA fd;
+	struct Handle {
+		Handle(HANDLE hdl)
+			: hdl_(hdl)
+		{
+		}
+		~Handle()
+		{
+			if (hdl_ != INVALID_HANDLE_VALUE) {
+				FindClose(hdl_);
+			}
+		}
+		HANDLE hdl_;
+	};
+	Handle hdl(FindFirstFileA(path.c_str(), &fd));
+	if (hdl.hdl_ == INVALID_HANDLE_VALUE) {
+		return false;
+	}
+	do {
+		FileInfo fi(fd.cFileName, fd.dwFileAttributes);
+		file_local::filterAndPush(list, fi, extension, cond);
+	} while (FindNextFileA(hdl.hdl_, &fd) != 0);
+	return true;
+#else
+	struct Handle {
+		DIR *dir_;
+		Handle(DIR *dir)
+			: dir_(dir)
+		{
+			if (dir_ == 0) {
+				perror("opendir");
+			}
+		}
+		~Handle()
+		{
+			if (dir_) {
+				if (::closedir(dir_)) {
+					perror("closedir");
+				}
+			}
+		}
+		bool isValid() const { return dir_ != 0; }
+	};
+	Handle hdl(::opendir(dir.c_str()));
+	if (!hdl.isValid()) return false;
+	for (;;) {
+		struct dirent *dp = ::readdir(hdl.dir_);
+		if (dp == 0) return true;
+		FileInfo fi(dp->d_name, (uint8_t)dp->d_type);
+		file_local::filterAndPush(list, fi, extension, cond);
+	}
+#endif
+}
+
+inline FileList GetFileList(const std::string& dir, const std::string& extension = "", bool (*cond)(const std::string&, const std::string&) = cybozu::HasExtension)
+{
+	FileList fl;
+	if (GetFileList(fl, dir, extension, cond)) return fl;
+	throw cybozu::Exception("cybozu:GetFileList") << dir << cybozu::ErrorNo();
+}
+
+} // cybozu

From e6bb40a8382697511d2cf8408f71734f48c3b7e3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 15 Jan 2020 15:15:43 +0900
Subject: [PATCH 139/553] add Fp::setArrayMod

---
 include/mcl/fp.hpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 9303ddce..c9b10027 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -330,6 +330,7 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 	}
 	/*
 		mode = Mod : set x mod p if sizeof(S) * n <= 64 else error
+		set array x as little endian
 	*/
 	template<class S>
 	void setArray(bool *pb, const S *x, size_t n, mcl::fp::MaskMode mode = fp::NoMask)
@@ -346,6 +347,15 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 		fp::copyAndMask(v_, x, sizeof(S) * n, op_, fp::MaskAndMod);
 		toMont();
 	}
+	/*
+		set (array mod p)
+		error if sizeof(S) * n > 64
+	*/
+	template<class S>
+	void setArrayMod(bool *pb, const S *x, size_t n)
+	{
+		setArray(pb, x, n, fp::Mod);
+	}
 
 	/*
 		mask x with (1 << (bitLen - 1)) - 1 if x >= p

From c4472e1747e05e2b3c8fd125c27e493e23e53e7b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 15 Jan 2020 15:42:26 +0900
Subject: [PATCH 140/553] add map2curve_osswu2

---
 include/mcl/mapto_wb19.hpp | 50 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 22f3770d..4cb58b42 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -8,6 +8,48 @@
 	ref. https://eprint.iacr.org/2019/403 , https://github.com/algorand/bls_sigs_ref
 */
 
+inline void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize)
+{
+	uint8_t saltZero[32];
+	if (salt == 0 || saltSize == 0) {
+		memset(saltZero, 0, sizeof(saltZero));
+		salt = saltZero;
+		saltSize = sizeof(saltZero);
+	}
+	cybozu::hmac256addZeroByte(hmac, salt, saltSize, msg, msgSize);
+}
+
+inline void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6])
+{
+	info[5] = 1;
+	cybozu::hmac256(out, prk, 32, info, 6);
+	info[5] = 2;
+	memcpy(out + 32, info, 6);
+	cybozu::hmac256(out + 32, prk, 32, out, 32 + 6);
+}
+
+// ctr = 0 or 1 or 2
+inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
+{
+	assert(ctr <= 2);
+	const size_t degree = 2;
+	uint8_t msg_prime[32];
+	// add '\0' at the end of dst
+	// see. 5.3. Implementation of https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve
+	hkdf_extract_addZeroByte(msg_prime, reinterpret_cast<const uint8_t*>(dst), dstSize, reinterpret_cast<const uint8_t*>(msg), msgSize);
+	char info_pfx[] = "H2C000";
+	info_pfx[3] = ctr;
+	for (size_t i = 0; i < degree; i++) {
+		info_pfx[4] = char(i + 1);
+		uint8_t t[64];
+		hkdf_expand(t, msg_prime, info_pfx);
+		fp::local::byteSwap(t, 64);
+		bool b;
+		out.getFp0()[i].setArrayMod(&b, t, 64);
+		assert(b); (void)b;
+	}
+}
+
 struct MapToG2_WB19 {
 	Fp2 xi;
 	Fp2 Ell2p_a;
@@ -604,5 +646,13 @@ struct MapToG2_WB19 {
 		iso3(P, Pp);
 		clear_h2(P, P);
 	}
+	void map2curve_osswu2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
+	{
+		Fp2 t1, t2;
+		hashToFp2(t1, msg, msgSize, 0, dst, dstSize);
+		hashToFp2(t2, msg, msgSize, 1, dst, dstSize);
+		opt_swu2_map(out, t1, &t2);
+	}
+
 };
 

From 63fb03524b0b89d8cd69ed62fe69d1699e5e7203 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 15 Jan 2020 16:28:54 +0900
Subject: [PATCH 141/553] move hkdf_* to src/fp.cpp

---
 include/mcl/fp.hpp         |  3 +++
 include/mcl/mapto_wb19.hpp | 24 ++----------------------
 src/fp.cpp                 | 20 ++++++++++++++++++++
 3 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index c9b10027..403bcf14 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -73,6 +73,9 @@ bool isEnableJIT(); // 1st call is not threadsafe
 uint32_t sha256(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize);
 uint32_t sha512(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize);
 
+void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize);
+void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6]);
+
 namespace local {
 
 inline void byteSwap(void *x, size_t n)
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 4cb58b42..53598dfd 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -8,26 +8,6 @@
 	ref. https://eprint.iacr.org/2019/403 , https://github.com/algorand/bls_sigs_ref
 */
 
-inline void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize)
-{
-	uint8_t saltZero[32];
-	if (salt == 0 || saltSize == 0) {
-		memset(saltZero, 0, sizeof(saltZero));
-		salt = saltZero;
-		saltSize = sizeof(saltZero);
-	}
-	cybozu::hmac256addZeroByte(hmac, salt, saltSize, msg, msgSize);
-}
-
-inline void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6])
-{
-	info[5] = 1;
-	cybozu::hmac256(out, prk, 32, info, 6);
-	info[5] = 2;
-	memcpy(out + 32, info, 6);
-	cybozu::hmac256(out + 32, prk, 32, out, 32 + 6);
-}
-
 // ctr = 0 or 1 or 2
 inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
 {
@@ -36,13 +16,13 @@ inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, co
 	uint8_t msg_prime[32];
 	// add '\0' at the end of dst
 	// see. 5.3. Implementation of https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve
-	hkdf_extract_addZeroByte(msg_prime, reinterpret_cast<const uint8_t*>(dst), dstSize, reinterpret_cast<const uint8_t*>(msg), msgSize);
+	fp::hkdf_extract_addZeroByte(msg_prime, reinterpret_cast<const uint8_t*>(dst), dstSize, reinterpret_cast<const uint8_t*>(msg), msgSize);
 	char info_pfx[] = "H2C000";
 	info_pfx[3] = ctr;
 	for (size_t i = 0; i < degree; i++) {
 		info_pfx[4] = char(i + 1);
 		uint8_t t[64];
-		hkdf_expand(t, msg_prime, info_pfx);
+		fp::hkdf_expand(t, msg_prime, info_pfx);
 		fp::local::byteSwap(t, 64);
 		bool b;
 		out.getFp0()[i].setArrayMod(&b, t, 64);
diff --git a/src/fp.cpp b/src/fp.cpp
index 07dfb78b..4dce66d0 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -128,6 +128,26 @@ uint32_t sha512(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSiz
 	return (uint32_t)cybozu::Sha512().digest(out, maxOutSize, msg, msgSize);
 }
 
+void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize)
+{
+	uint8_t saltZero[32];
+	if (salt == 0 || saltSize == 0) {
+		memset(saltZero, 0, sizeof(saltZero));
+		salt = saltZero;
+		saltSize = sizeof(saltZero);
+	}
+	cybozu::hmac256addZeroByte(hmac, salt, saltSize, msg, msgSize);
+}
+
+void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6])
+{
+	info[5] = 1;
+	cybozu::hmac256(out, prk, 32, info, 6);
+	info[5] = 2;
+	memcpy(out + 32, info, 6);
+	cybozu::hmac256(out + 32, prk, 32, out, 32 + 6);
+}
+
 #ifndef MCL_USE_VINT
 static inline void set_mpz_t(mpz_t& z, const Unit* p, int n)
 {

From 77e4c0a9d16d282cd45b0b473efb317a20e4e9df Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 18 Jan 2020 11:34:39 +0900
Subject: [PATCH 142/553] compact h2_chain

---
 include/mcl/mapto_wb19.hpp | 236 +++++++------------------------------
 1 file changed, 45 insertions(+), 191 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 53598dfd..08178b51 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -393,198 +393,52 @@ struct MapToG2_WB19 {
 		}
 		assert(0);
 	}
-	void h2_chain(G2& t1, const G2& P) const
+	void h2_chain(G2& out, const G2& P) const
 	{
-		G2 t0, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
-		t0 = P;
-		dbl(t1, t0);
-		add(t4, t1, t0);
-		add(t2, t4, t1);
-		add(t3, t2, t1);
-		add(t11, t3, t1);
-		add(t9, t11, t1);
-		add(t10, t9, t1);
-		add(t5, t10, t1);
-		add(t7, t5, t1);
-		add(t15, t7, t1);
-		add(t13, t15, t1);
-		add(t6, t13, t1);
-		add(t14, t6, t1);
-		add(t12, t14, t1);
-		add(t8, t12, t1);
-		dbl(t1, t6);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t13);
-		for (size_t i = 0; i < 2; i++) dbl(t1, t1);
-		add(t1, t1, t0);
-		for (size_t i = 0; i < 9; i++) dbl(t1, t1);
-		add(t1, t1, t8);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t11);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t13);
-		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
-		add(t1, t1, t2);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t5);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t0);
-		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
-		add(t1, t1, t11);
-		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
-		add(t1, t1, t8);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t2);
-		for (size_t i = 0; i < 9; i++) dbl(t1, t1);
-		add(t1, t1, t5);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t11);
-		for (size_t i = 0; i < 2; i++) dbl(t1, t1);
-		add(t1, t1, t0);
-		for (size_t i = 0; i < 9; i++) dbl(t1, t1);
-		add(t1, t1, t8);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t13);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t0);
-		for (size_t i = 0; i < 11; i++) dbl(t1, t1);
-		add(t1, t1, t9);
-		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
-		add(t1, t1, t12);
-		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
-		add(t1, t1, t7);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t12);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t14);
-		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
-		add(t1, t1, t13);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t0);
-		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
-		add(t1, t1, t9);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t13);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t10);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t2);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t10);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t2);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t0);
-		for (size_t i = 0; i < 10; i++) dbl(t1, t1);
-		add(t1, t1, t9);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t14);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t9);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t15);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t8);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t12);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t5);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t15);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t2);
-		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
-		add(t1, t1, t5);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t9);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t15);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t14);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t8);
-		for (size_t i = 0; i < 10; i++) dbl(t1, t1);
-		add(t1, t1, t6);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t5);
-		for (size_t i = 0; i < 3; i++) dbl(t1, t1);
-		add(t1, t1, t0);
-		for (size_t i = 0; i < 9; i++) dbl(t1, t1);
-		add(t1, t1, t13);
-		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
-		add(t1, t1, t12);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t5);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t2);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t11);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t10);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t4);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t10);
-		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
-		add(t1, t1, t7);
-		for (size_t i = 0; i < 3; i++) dbl(t1, t1);
-		add(t1, t1, t2);
-		for (size_t i = 0; i < 4; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
-		add(t1, t1, t9);
-		for (size_t i = 0; i < 8; i++) dbl(t1, t1);
-		add(t1, t1, t9);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t8);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t7);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t6);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t5);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t4);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t5);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t4);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t4);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t5);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 7; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t4);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 3; i++) dbl(t1, t1);
-		add(t1, t1, t0);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 6; i++) dbl(t1, t1);
-		add(t1, t1, t3);
-		for (size_t i = 0; i < 5; i++) dbl(t1, t1);
-		add(t1, t1, t2);
+		G2 t[16];
+		t[0] = P;
+		dbl(t[1], t[0]);
+		add(t[4], t[1], t[0]);
+		add(t[2], t[4], t[1]);
+		add(t[3], t[2], t[1]);
+		add(t[11], t[3], t[1]);
+		add(t[9], t[11], t[1]);
+		add(t[10], t[9], t[1]);
+		add(t[5], t[10], t[1]);
+		add(t[7], t[5], t[1]);
+		add(t[15], t[7], t[1]);
+		add(t[13], t[15], t[1]);
+		add(t[6], t[13], t[1]);
+		add(t[14], t[6], t[1]);
+		add(t[12], t[14], t[1]);
+		add(t[8], t[12], t[1]);
+		dbl(t[1], t[6]);
+
+		const struct {
+			uint32_t n;
+			uint32_t idx;
+		} tbl[] = {
+			{ 5, 13 }, { 2, 0 }, { 9, 8 }, { 5, 11 }, { 6, 13 }, { 8, 2 }, { 5, 3 },
+			{ 5, 3 }, { 4, 5 }, { 4, 0 }, { 8, 11 }, { 8, 8 }, { 4, 2 }, { 9, 5 },
+			{ 6, 11 }, { 2, 0 }, { 9, 8 }, { 5, 13 }, { 4, 0 }, { 11, 9 }, { 7, 12 },
+			{ 7, 7 }, { 5, 12 }, { 5, 14 }, { 8, 13 }, { 6, 3 }, { 5, 0 }, { 8, 9 },
+			{ 6, 13 }, { 4, 10 }, { 4, 2 }, { 6, 10 }, { 6, 2 }, { 4, 0 }, { 10, 9 },
+			{ 6, 14 }, { 4, 3 }, { 6, 9 }, { 6, 15 }, { 5, 8 }, { 5, 12 }, { 4, 5 },
+			{ 6, 15 }, { 6, 2 }, { 7, 5 }, { 6, 3 }, { 6, 9 }, { 6, 15 }, { 6, 14 },
+			{ 5, 8 }, { 10, 6 }, { 5, 5 }, { 3, 0 }, { 9, 13 }, { 7, 12 }, { 4, 5 },
+			{ 6, 2 }, { 6, 11 }, { 4, 10 }, { 4, 4 }, { 6, 10 }, { 7, 7 }, { 3, 2 },
+			{ 4, 3 }, { 8, 9 }, { 8, 9 }, { 6, 8 }, { 5, 7 }, { 5, 6 }, { 6, 5 },
+			{ 6, 4 }, { 5, 5 }, { 6, 4 }, { 6, 3 }, { 6, 4 }, { 6, 5 }, { 6, 3 },
+			{ 7, 3 }, { 6, 3 }, { 5, 4 }, { 6, 3 }, { 6, 3 }, { 3, 0 }, { 6, 3 },
+			{ 6, 3 },
+		};
+		for (size_t j = 0; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) {
+			const uint32_t n = tbl[j].n;
+			for (size_t i = 0; i < n; i++) dbl(t[1], t[1]);
+			add(t[1], t[1], t[tbl[j].idx]);
+		}
+		for (size_t i = 0; i < 5; i++) dbl(t[1], t[1]);
+		add(out, t[1], t[2]);
 	}
 	void mx_chain(G2& Q, const G2& P) const
 	{

From 0737cbfa65bf47e2a6980a078977376e1565ff26 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 21 Jan 2020 15:00:27 +0900
Subject: [PATCH 143/553] add getMapToMode

---
 include/mcl/bn.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index fd69b4b0..a71a0077 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -2143,6 +2143,10 @@ inline bool setMapToMode(int mode)
 	}
 	return BN::nonConstParam.mapTo.setMapToMode(mode);
 }
+inline int getMapToMode()
+{
+	return BN::param.mapTo.mapToMode_;
+}
 inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calc(P, x); }
 inline void mapToG2(bool *pb, G2& P, const Fp2& x, bool fast = false) { *pb = BN::param.mapTo.calc(P, x, fast); }
 #ifndef CYBOZU_DONT_USE_EXCEPTION

From b8fcf48e3bd5858d6a36d03ddd90bc6abe70b069 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 21 Jan 2020 15:13:16 +0900
Subject: [PATCH 144/553] update comments for MAP_TO_MODE

---
 include/mcl/curve_type.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index 85ce7a59..10815592 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -39,6 +39,6 @@ enum {
 enum {
 	MCL_MAP_TO_MODE_ORIGINAL, // see MapTo::calcBN
 	MCL_MAP_TO_MODE_TRY_AND_INC, // try-and-incremental-x
-	MCL_MAP_TO_MODE_ETH2, // eth2.0 spec
-	MCL_MAP_TO_MODE_WB19
+	MCL_MAP_TO_MODE_ETH2, // old eth2.0 spec
+	MCL_MAP_TO_MODE_WB19 // used in new eth2.0 spec
 };

From 2f9f142043de86f14395f95d4d91d50867abd778 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 21 Jan 2020 16:35:19 +0900
Subject: [PATCH 145/553] fix indent

---
 include/mcl/mapto_wb19.hpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 08178b51..37c184ef 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -141,18 +141,18 @@ struct MapToG2_WB19 {
 	// P is on y^2 = x^3 + Ell2p_a x + Ell2p_b
 	bool isValidPoint(const Point& P) const
 	{
-    Fp2 y2, x2, z2, z4, t;
-    Fp2::sqr(x2, P.x);
-    Fp2::sqr(y2, P.y);
-    Fp2::sqr(z2, P.z);
-    Fp2::sqr(z4, z2);
-    Fp2::mul(t, z4, Ell2p_a);
-    t += x2;
-    t *= P.x;
-    z4 *= z2;
-    z4 *= Ell2p_b;
-    t += z4;
-    return y2 == t;
+		Fp2 y2, x2, z2, z4, t;
+		Fp2::sqr(x2, P.x);
+		Fp2::sqr(y2, P.y);
+		Fp2::sqr(z2, P.z);
+		Fp2::sqr(z4, z2);
+		Fp2::mul(t, z4, Ell2p_a);
+		t += x2;
+		t *= P.x;
+		z4 *= z2;
+		z4 *= Ell2p_b;
+		t += z4;
+		return y2 == t;
 	}
 	void init()
 	{

From 18b0a3bfa4c746f008083c6565e594ed0185b01f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 22 Jan 2020 09:13:56 +0900
Subject: [PATCH 146/553] add hkdf_extract without adding zero byte

---
 include/mcl/fp.hpp         |  1 +
 include/mcl/mapto_wb19.hpp | 20 ++++++++++++++------
 src/fp.cpp                 | 11 +++++++++++
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 403bcf14..894d939c 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -74,6 +74,7 @@ uint32_t sha256(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSiz
 uint32_t sha512(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize);
 
 void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize);
+void hkdf_extract(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize);
 void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6]);
 
 namespace local {
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 37c184ef..ba6662bc 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -9,14 +9,18 @@
 */
 
 // ctr = 0 or 1 or 2
-inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
+inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize, bool addZero = true)
 {
 	assert(ctr <= 2);
 	const size_t degree = 2;
 	uint8_t msg_prime[32];
 	// add '\0' at the end of dst
 	// see. 5.3. Implementation of https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve
-	fp::hkdf_extract_addZeroByte(msg_prime, reinterpret_cast<const uint8_t*>(dst), dstSize, reinterpret_cast<const uint8_t*>(msg), msgSize);
+	if (addZero) {
+		fp::hkdf_extract_addZeroByte(msg_prime, reinterpret_cast<const uint8_t*>(dst), dstSize, reinterpret_cast<const uint8_t*>(msg), msgSize);
+	} else {
+		fp::hkdf_extract(msg_prime, reinterpret_cast<const uint8_t*>(dst), dstSize, reinterpret_cast<const uint8_t*>(msg), msgSize);
+	}
 	char info_pfx[] = "H2C000";
 	info_pfx[3] = ctr;
 	for (size_t i = 0; i < degree; i++) {
@@ -480,13 +484,17 @@ struct MapToG2_WB19 {
 		iso3(P, Pp);
 		clear_h2(P, P);
 	}
-	void map2curve_osswu2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
+	void map2curve_osswu2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize, bool addZero = true) const
 	{
 		Fp2 t1, t2;
-		hashToFp2(t1, msg, msgSize, 0, dst, dstSize);
-		hashToFp2(t2, msg, msgSize, 1, dst, dstSize);
+		hashToFp2(t1, msg, msgSize, 0, dst, dstSize, addZero);
+		hashToFp2(t2, msg, msgSize, 1, dst, dstSize, addZero);
 		opt_swu2_map(out, t1, &t2);
 	}
-
+	void msgToG2(G2& out, const void *msg, size_t msgSize) const
+	{
+		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_";
+		map2curve_osswu2(out, msg, msgSize, dst, strlen(dst), false);
+	}
 };
 
diff --git a/src/fp.cpp b/src/fp.cpp
index 4dce66d0..64dc4a16 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -139,6 +139,17 @@ void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t salt
 	cybozu::hmac256addZeroByte(hmac, salt, saltSize, msg, msgSize);
 }
 
+void hkdf_extract(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize)
+{
+	uint8_t saltZero[32];
+	if (salt == 0 || saltSize == 0) {
+		memset(saltZero, 0, sizeof(saltZero));
+		salt = saltZero;
+		saltSize = sizeof(saltZero);
+	}
+	cybozu::hmac256(hmac, salt, saltSize, msg, msgSize);
+}
+
 void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6])
 {
 	info[5] = 1;

From fc1b24d5e13ca094c13a37cc911bdc13faad222a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 23 Jan 2020 09:16:16 +0900
Subject: [PATCH 147/553] add msgToG2

---
 include/mcl/bn.hpp         | 12 ++++++++++--
 include/mcl/mapto_wb19.hpp | 13 +++++++------
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index a71a0077..32f39285 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -332,7 +332,7 @@ struct MapTo {
 	int type_;
 	int mapToMode_;
 	bool useOriginalG2cofactor_;
-	MapToG2_WB19 maptog2_wb19_;
+	MapToG2_WB19 mapToG2_WB19_;
 	MapTo()
 		: type_(0)
 		, mapToMode_(MCL_MAP_TO_MODE_ORIGINAL)
@@ -542,7 +542,7 @@ struct MapTo {
 			break;
 		case MCL_MAP_TO_MODE_WB19:
 			mapToMode_ = mode;
-			maptog2_wb19_.init();
+			mapToG2_WB19_.init();
 			return true;
 			break;
 		default:
@@ -616,6 +616,10 @@ struct MapTo {
 	}
 	bool calc(G2& P, const Fp2& t, bool fast = false) const
 	{
+		if (mapToMode_ == MCL_MAP_TO_MODE_WB19) {
+			mapToG2_WB19_.opt_swu2_map(P, t);
+			return true;
+		}
 		if (!mapToEc(P, t)) return false;
 		if (mapToMode_ == MCL_MAP_TO_MODE_ETH2) {
 			Fp2 negY;
@@ -2175,6 +2179,10 @@ inline void hashAndMapToG1(G1& P, const void *buf, size_t bufSize)
 }
 inline void hashAndMapToG2(G2& P, const void *buf, size_t bufSize)
 {
+	if (getMapToMode() == MCL_MAP_TO_MODE_WB19) {
+		BN::param.mapTo.mapToG2_WB19_.msgToG2(P, buf, bufSize);
+		return;
+	}
 	Fp2 t;
 	t.a.setHashOf(buf, bufSize);
 	t.b.clear();
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index ba6662bc..2663fe2a 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -9,14 +9,15 @@
 */
 
 // ctr = 0 or 1 or 2
-inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize, bool addZero = true)
+inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
 {
+	const bool addZeroByte = false; // append zero byte to msg
 	assert(ctr <= 2);
 	const size_t degree = 2;
 	uint8_t msg_prime[32];
 	// add '\0' at the end of dst
 	// see. 5.3. Implementation of https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve
-	if (addZero) {
+	if (addZeroByte) {
 		fp::hkdf_extract_addZeroByte(msg_prime, reinterpret_cast<const uint8_t*>(dst), dstSize, reinterpret_cast<const uint8_t*>(msg), msgSize);
 	} else {
 		fp::hkdf_extract(msg_prime, reinterpret_cast<const uint8_t*>(dst), dstSize, reinterpret_cast<const uint8_t*>(msg), msgSize);
@@ -484,17 +485,17 @@ struct MapToG2_WB19 {
 		iso3(P, Pp);
 		clear_h2(P, P);
 	}
-	void map2curve_osswu2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize, bool addZero = true) const
+	void map2curve_osswu2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
 		Fp2 t1, t2;
-		hashToFp2(t1, msg, msgSize, 0, dst, dstSize, addZero);
-		hashToFp2(t2, msg, msgSize, 1, dst, dstSize, addZero);
+		hashToFp2(t1, msg, msgSize, 0, dst, dstSize);
+		hashToFp2(t2, msg, msgSize, 1, dst, dstSize);
 		opt_swu2_map(out, t1, &t2);
 	}
 	void msgToG2(G2& out, const void *msg, size_t msgSize) const
 	{
 		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_";
-		map2curve_osswu2(out, msg, msgSize, dst, strlen(dst), false);
+		map2curve_osswu2(out, msg, msgSize, dst, strlen(dst));
 	}
 };
 

From 1249f8600c4e1500df3e1e319a132adaa9b62a44 Mon Sep 17 00:00:00 2001
From: prprhyt <prprhyt@gmail.com>
Date: Fri, 24 Jan 2020 18:04:52 +0900
Subject: [PATCH 148/553] Fix she-api documents

---
 misc/she/she-api-ja.md | 2 +-
 misc/she/she-api.md    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/misc/she/she-api-ja.md b/misc/she/she-api-ja.md
index 850f11ff..93dd8f7d 100644
--- a/misc/she/she-api-ja.md
+++ b/misc/she/she-api-ja.md
@@ -250,7 +250,7 @@ PrecomputedPublicKeyはPublicKeyの高速版
 * `CT she.sub(CT x, CT y)`(JS)
     * 暗号文xから暗号文yを引いてzにセットする(またはその値を返す)
 * `void CT::neg(CT& y, const CT& x)`(C++)
-* `void she.neg(CT x)`(JS)
+* `CT she.neg(CT x)`(JS)
     * 暗号文xの符号反転をyにセットする(またはその値を返す)
 * `void CT::mul(CT& z, const CT& x, int y)`(C++)
 * `CT she.mulInt(CT x, int y)`(JS)
diff --git a/misc/she/she-api.md b/misc/she/she-api.md
index af54311e..fd2e0867 100644
--- a/misc/she/she-api.md
+++ b/misc/she/she-api.md
@@ -255,7 +255,7 @@ PK means PublicKey or PrecomputedPublicKey
 * `CT she.sub(CT x, CT y)`(JS)
     * subtract `x` and `y` and set the value to `z`(or return the value)
 * `void CT::neg(CT& y, const CT& x)`(C++)
-* `void she.neg(CT x)`(JS)
+* `CT she.neg(CT x)`(JS)
     * negate `x` and set the value to `y`(or return the value)
 * `void CT::mul(CT& z, const CT& x, int y)`(C++)
 * `CT she.mulInt(CT x, int y)`(JS)

From b5c2edee03804a3e62133596df02a2d5df803ba8 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 25 Jan 2020 12:33:25 +0900
Subject: [PATCH 149/553] init mapToG2_WBP19 in initBLS12

---
 include/mcl/bn.hpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 32f39285..e13cf4d0 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -523,6 +523,7 @@ struct MapTo {
 		assert(b);
 		(void)b;
 		Fr::inv(g2cofactorAdj_, g2cofactorAdjInv_);
+		mapToG2_WB19_.init();
 	}
 	/*
 		change mapTo function to mode
@@ -537,12 +538,8 @@ struct MapTo {
 		case MCL_MAP_TO_MODE_ORIGINAL:
 		case MCL_MAP_TO_MODE_TRY_AND_INC:
 		case MCL_MAP_TO_MODE_ETH2:
-			mapToMode_ = mode;
-			return true;
-			break;
 		case MCL_MAP_TO_MODE_WB19:
 			mapToMode_ = mode;
-			mapToG2_WB19_.init();
 			return true;
 			break;
 		default:

From 601abdb2089345aca6694b53478799012a51e854 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 25 Jan 2020 15:52:33 +0900
Subject: [PATCH 150/553] test of mapToG2_WB19

---
 test/mapto_wb19_test.cpp | 486 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 486 insertions(+)
 create mode 100644 test/mapto_wb19_test.cpp

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
new file mode 100644
index 00000000..0afcb784
--- /dev/null
+++ b/test/mapto_wb19_test.cpp
@@ -0,0 +1,486 @@
+#include <cybozu/test.hpp>
+#include <cybozu/sha2.hpp>
+#include <mcl/bls12_381.hpp>
+#include <iostream>
+#include <fstream>
+#include <cybozu/atoi.hpp>
+#include <cybozu/file.hpp>
+
+using namespace mcl;
+using namespace mcl::bn;
+
+void dump(const void *msg, size_t msgSize)
+{
+	const uint8_t *p = (const uint8_t *)msg;
+	for (size_t i = 0; i < msgSize; i++) {
+		printf("%02x", p[i]);
+	}
+	printf("\n");
+}
+
+void dump(const std::string& s)
+{
+	dump(s.c_str(), s.size());
+}
+
+std::string toHexStr(const void *_buf, size_t n)
+{
+	const uint8_t *buf = (const uint8_t*)_buf;
+	std::string out;
+	out.resize(n * 2);
+	for (size_t i = 0; i < n; i++) {
+		cybozu::itohex(&out[i * 2], 2, buf[i], false);
+	}
+	return out;
+}
+
+std::string toHexStr(const std::string& s)
+{
+	return toHexStr(s.c_str(), s.size());
+}
+
+typedef std::vector<uint8_t> Uint8Vec;
+
+Uint8Vec fromHexStr(const std::string& s)
+{
+	Uint8Vec ret(s.size() / 2);
+	for (size_t i = 0; i < s.size(); i += 2) {
+		ret[i / 2] = cybozu::hextoi(&s[i], 2);
+	}
+	return ret;
+}
+
+struct Fp2Str {
+	const char *a;
+	const char *b;
+};
+
+struct PointStr {
+	Fp2Str x;
+	Fp2Str y;
+	Fp2Str z;
+};
+
+void set(Fp2& x, const Fp2Str& s)
+{
+	x.a.setStr(s.a);
+	x.b.setStr(s.b);
+}
+
+template<class Point>
+void set(Point& P, const PointStr& s)
+{
+	set(P.x, s.x);
+	set(P.y, s.y);
+	set(P.z, s.z);
+}
+
+std::string toHexStr(const Fp2& x)
+{
+	uint8_t buf1[96];
+	uint8_t buf2[96];
+	size_t n1 = x.a.serialize(buf1, sizeof(buf1));
+	size_t n2 = x.b.serialize(buf2, sizeof(buf2));
+	return toHexStr(buf1, n1) + " " + toHexStr(buf2, n2);
+}
+
+std::string toHexStr(const G2& P)
+{
+	uint8_t xy[96];
+	size_t n = P.serialize(xy, 96);
+	CYBOZU_TEST_EQUAL(n, 96);
+	return toHexStr(xy, 96);
+}
+
+template<class T>
+void testHash_g2(const T& mapto, const std::string& fileName)
+{
+	const char *dst = "\x02";
+	printf("name=%s\n", fileName.c_str());
+	std::ifstream ifs(fileName.c_str());
+	Uint8Vec buf;
+	G2 out;
+	for (;;) {
+		std::string msg, zero, ret;
+		ifs >> msg >> zero >> ret;
+		if (zero != "00") break;
+		buf = fromHexStr(msg);
+		buf.push_back(0); // add zero byte
+		mapto.map2curve_osswu2(out, buf.data(), buf.size(), dst, strlen(dst));
+		std::string s = toHexStr(out);
+		CYBOZU_TEST_EQUAL(s, ret);
+	}
+}
+
+template<class T>
+void testHash_g2All(const T& mapto, const std::string& dir)
+{
+	cybozu::FileList list = cybozu::GetFileList(dir);
+	for (size_t i = 0; i < list.size(); i++) {
+		const cybozu::FileInfo& info = list[i];
+		testHash_g2(mapto, dir + "/" + info.name);
+	}
+}
+
+void testHMAC()
+{
+	const char *key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b";
+	const char *msg = "Hi There";
+	uint8_t hmac[32];
+	const char *expect = "b0344c61d8db38535ca8afceaf0bf12b881dc200c9833da726e9376c2e32cff7";
+	cybozu::hmac256(hmac, key, strlen(key), msg, strlen(msg));
+	std::string out = toHexStr(hmac, 32);
+	CYBOZU_TEST_EQUAL(out, expect);
+}
+
+void testHashToFp2()
+{
+	const char *msg = "the message to be signed";
+	const char *dst = "\x02";
+	const char *outS = "0xe54bc0f2e26071a79ba5fe7ae5307d39cf5519e581e03b43f39a431eccc258fa1477c517b1268b22986601ee5caa5ea 0x17e8397d5e687ff7f915c23f27fe1ca2c397a7df91de8c88dc82d34c9188a3ef719f9f20436ea8a5fe7d509fbc79214d";
+	Fp2 out, ok;
+	ok.setStr(outS);
+	mcl::bn::local::hashToFp2(out, msg, strlen(msg) + 1, 0, dst, strlen(dst));
+	CYBOZU_TEST_EQUAL(out, ok);
+}
+
+template<class T>
+void testMap2curve_osswu2(const T& mapto)
+{
+	const char *msg = "the message to be signed";
+	const char *dst = "\x02";
+	const PointStr outS = {
+		{
+			"0x29670bca15e948605ae32ac737b719f926bc8cb99e980bf0542cada47f71a9f299f4d8c332776da38c8768ea719911",
+			"0x111b35c14e065f0af7bb2697cba31bd21f629c0d42f75411340ae608df3bc2572b746935a788caa6ef10014ee02a0bf0",
+		},
+		{
+			"0xe99fd88ee5bd8272483b498245a59b34a22d4820cdd564fc044510210e6d8da62752ac467dac6421b330b2f62385305",
+			"0x199c95bcff2d9ae3486d12892740a35904deddc63d33d1080d498fbe1ce468a8efeb9d62e183c71f0a3bf58422e2f1a2",
+		},
+		{
+			"0x147428ea49f35d9864bfc6685e0651f340f1201082c9dce4b99c72d45bf2d4deda4dcb151cefdfd1dd224c8bb85c8a71",
+			"0x7a14a1a0a8a27423e5d912879fec8054ae95f035642e3806fa514b9f1dbbb2bc1144dac067c52305e60e8bc421ad5b4",
+		},
+	};
+	G2 out, ok;
+	set(ok, outS);
+	mapto.map2curve_osswu2(out, msg, strlen(msg) + 1 /* contains zero byte */, dst, strlen(dst));
+	CYBOZU_TEST_EQUAL(out, ok);
+}
+
+template<class T>
+void test2(const T& mapto)
+{
+	/*
+		testHashToBaseFP2
+		https://github.com/status-im/nim-blscurve/blob/de64516a5933a6e8ebb01a346430e61a201b5775/blscurve/hash_to_curve.nim#L492
+	*/
+	{
+		const char *msg = "msg";
+		uint8_t ctr = 0;
+		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_";
+		const char *expect = "18df4dc51885b18ca0082a4966b0def46287930b8f1c0b673b11ac48d19c8899bc150d83fd3a7a1430b0de541742c1d4 14eef8ca34b82d065d187a3904cb313dbb44558917cc5091574d9999b5ecfdd5af2fa3aea6e02fb253bf4ae670e72d55";
+		Fp2 x;
+		mcl::bn::local::hashToFp2(x, msg, strlen(msg) + 1 /* add zero byte */, ctr, dst, strlen(dst));
+		CYBOZU_TEST_EQUAL(toHexStr(x), expect);
+	}
+	/*
+		testMapToCurveG2
+		https://github.com/status-im/nim-blscurve/blob/de64516a5933a6e8ebb01a346430e61a201b5775/blscurve/hash_to_curve.nim#L531
+	*/
+}
+
+template<class T>
+void testSign(const T& mapto)
+{
+	const Fp& H = mapto.half;
+	const size_t N = 4;
+	const Fp tbl[N] = { 0, 1, H, H + 1 };
+	const int expect[N][N] = {
+		{  1, 1, 1, -1 },
+		{  1, 1, 1, -1 },
+		{  1, 1, 1, -1 },
+		{ -1, 1, 1, -1 },
+	};
+	Fp2 t;
+	for (size_t i = 0; i < N; i++) {
+		t.a = tbl[i];
+		for (size_t j = 0; j < N; j++) {
+			t.b = tbl[j];
+			if (mapto.isNegSign(t) != (expect[i][j] < 0)) {
+				printf("err %zd %zd\n", i, j);
+			}
+		}
+	}
+	puts("ok");
+}
+
+template<class T>
+void helpTest(const T& mapto)
+{
+	const struct {
+		const char *ta;
+		const char *tb;
+		const char *xa;
+		const char *xb;
+		const char *ya;
+		const char *yb;
+		const char *za;
+		const char *zb;
+	} tbl[] = {
+		{
+			"0xe54bc0f2e26071a79ba5fe7ae5307d39cf5519e581e03b43f39a431eccc258fa1477c517b1268b22986601ee5caa5ea",
+			"0x17e8397d5e687ff7f915c23f27fe1ca2c397a7df91de8c88dc82d34c9188a3ef719f9f20436ea8a5fe7d509fbc79214d",
+
+			"0x11d568058220b1826cacde2e367beef98ea1edfde5fbf0491231b7ffdfc867e5269f9cfe65347c32ead182ba6b8c3ba1",
+			"0x19f2778213e671ac444b1b579bfdf4e7fabeed9626dc909ce243b60397a6b5f65af0fbbe02a43c1e289f28c927012da1",
+
+			"0xfe17bc695a84ec060b6287a4e77a50f65ba8f2c6c433f8131036ddfe34e3071d1cb71c0000f6bcfada947b19d8588df",
+			"0xb76abd285945f787721e7e306895149523941586ac44f25a294c406a70ed570020992025aa307777cfe6c590567dfbe",
+
+			"0x1910249ae63241608e013eb13578b9b3d96774d35e5732fc75efd17c212dd310d7f4016d6f212f62f33d34f10252e3e3",
+			"0xdcd076cea67c76a6d0594c8f30c8cd8e9ead24f90870f723228f2203a55e04a5517c426ea2c4bae9d37a11c3d0f1912",
+		},
+		{
+			"0x2a8663422cc279aa8591819195a62cfd57357b7bcb6f4a9174275c2e2e754fb23e2f8a444d0d164990dc03dcb95a129",
+			"0x15cf611083511955a70fdcc80cb08c6e22b8043a3038065251d4d3f82c6051bac4933e41d589514c42fba13f78f297ef",
+
+			"0x74ee12dce0c9a8836017172b562ebe491273964dd63df71dea6eb778cd9040e8c9a7136e745013c1def93cc57ef0dae",
+			"0xedce8fa83a2435a796d207943b14ea4d1a9850e10a6c2035912f1c5bd579e9cabc54027b87a779af28f380cc5edc8a6",
+
+			"0x11367627461d742b4afac12bd789f1437787f2dc675cf2c7896f004ab8480c06cd06589748d8b9791b4969763962f73c",
+			"0x101d8e4c1598e72d943dad4695cfa74236d5065345f1e62e62c75ca30cb0c41c3f6197d7c57d46e8cdd07845d77e1e34",
+
+			"0x3952479e45a0826275c1481fbd78a2b4c5076b6a5cd4ad7e132c1ec460dcaef504943e2c6a969ba182e230da3850b4",
+			"0x13b8e64e2e233d1dc4506360c3bff93535642c2d3115c53c049e287e35c03212be882f0618cc50557e55b42be53e4893",
+		},
+	};
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		Fp2 t(tbl[i].ta, tbl[i].tb);
+		Fp2 x0(tbl[i].xa, tbl[i].xb);
+		Fp2 y0(tbl[i].ya, tbl[i].yb);
+		Fp2 z0(tbl[i].za, tbl[i].zb);
+		typename T::Point P;
+		mapto.osswu2_help(P, t);
+		CYBOZU_TEST_EQUAL(P.x, x0);
+		CYBOZU_TEST_EQUAL(P.y, y0);
+		CYBOZU_TEST_EQUAL(P.z, z0);
+		CYBOZU_TEST_ASSERT(mapto.isValidPoint(P));
+	}
+}
+
+template<class T>
+void addTest(const T& mapto)
+{
+	const struct Tbl {
+		PointStr P;
+		PointStr Q;
+		PointStr R;
+	} tbl[] = {
+		{
+			{
+				{
+					"0x111fe4d895d4a8eb21b87f8717727a638cb3f79b91217ac2b47ea599513a5e9bff14cd85f91e5bef822160e0ad4f6726",
+					"0x29180cfc2d6a6c717ad4b93725475117c959496d3163974cc08068c0319cb47ba7c8d49c0ebb1ed1a4659b91acab3f",
+				},
+				{
+					"0x192e14063ab46786058c355387e4141921a2b0fd1bcecd6bbf6e3e25f972b2b88fe23b1fd6b14f8070c7ada0bbcfb8d7",
+					"0x153bc38ad032b044e55f649b9b1e6384cfe0936b3be350e16a8cf847790bf718e9099b102fbdab5ad8f0acca6b0ac65a",
+				},
+				{
+					"0x119f8d49f20b7a3ef00527779ef9326250a835a742770e9599b3be1939d5e00f8b329781bea38e725e1b0de76354b2ea",
+					"0xd95d36844c2ef0678e3614c0d9698daf7d54cb41322fb6acf90a4fd61122c36213e6f811c81c573385110d98e49136",
+				},
+			},
+			{
+				{
+					"0x738abc340e315a70a95d22c68e4beb8f8ce8cb17ec4d8104285b5770a63b2e9fdceaffb88df1fde2104d807bd0fb5df",
+					"0x19edac9569a018b7a17ddd9554430318500e83e38c798d6f8e0a22e9e54ef2b0ec0cf4866013e3a43237eaf949c4548b",
+				},
+				{
+					"0x12234a4947cf5c0a0fc04edadefa7c3766489d927ad3d7d7236af997b0e0fd7deaaf4ab78aad390c6a8f0088f21256af",
+					"0x4a1cddb800e9fc6fb9f12e036bd0dae9a75c276f8007407cb9be46177e4338ac43d00f3dc413cab629d6305327ffbc",
+				},
+				{
+					"0x187212ac7f7d68aa32dafe6c1c52dc0411ea11cffa4c6a10e0ba407c94b8663376f1642379451a09a4c7ce6e691a557f",
+					"0x1381999b5cc68ae42d64d71ac99a20fb5874f3883a222a9e15c8211610481642b32b85da288872269480383b62696e5a",
+				},
+			},
+			{
+				{
+					"0x1027d652690099dd3bea0c8ec2f8686c8db37444b08067a40780a264f2edd995d3a39941a302289ac8025007e7f08e35",
+					"0xe4c1e12005a577f2a7487bd0bca91253bfff829258e7120716d70133dfc1c8f4aa80d2b4c076f267f3483ec1ca66cdc",
+				},
+				{
+					"0x16bd53f43f8acfb29d3a451a274445ca87d43f0e1a6550c6107654516fda0b4cd1a346369ef0d44d4ee78904ce1b3e4b",
+					"0xf0f67bbce56d7791c676b7af20f0d91382973c6c7b971a920525dbd58b13364ec226651308c8bc56e636d0458d46f50",
+				},
+				{
+					"0x8027cefbfd3e7e7fdc88735eddd7e669520197227bd2a7014078f56489267256fdfb27d080515412d69f86770f3ce",
+					"0x2470e1d8896cfe74ab01b68071b97d121333ebcec7a41cddd4581d736a25ba154ac94321a119906e3f41beec971d082",
+				},
+			},
+		},
+	};
+	typedef typename T::Point Point;
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		Point P, Q, R;
+		set(P, tbl[i].P);
+		set(Q, tbl[i].Q);
+		set(R, tbl[i].R);
+		Point E;
+		mapto.add(E, P, Q);
+		CYBOZU_TEST_EQUAL(R.x, E.x);
+		CYBOZU_TEST_EQUAL(R.y, E.y);
+		CYBOZU_TEST_EQUAL(R.z, E.z);
+	}
+}
+
+template<class T>
+void iso3Test(const T& mapto)
+{
+	const PointStr Ps = {
+		{
+			"0xf0d9554fa5b04dbc6b106727e987bd68fb8c0cc97226a3845b59cc9d09972f24ea5a0d93cd0eedd18318c0024bf3df0",
+			"0x656650d143a2cf4a913821fa6a90ab6baa0bb063d1207b15108ea919258bfa4bdd1ba7247e8e65300d526801e43dca6",
+		},
+		{
+			"0x13a4b7c833b2702dc6ac4f5ee6ee74923a24c28e5a9b8e3b5626f700489ea47f9b1c3aa8cc0f4b525ae56e1e89aba868",
+			"0x16c0b9a89dcbe4e375f1e4d064013adff8e6e09866d38769c08ce355fbac9c823d52df971286b091b46d2cd49625c09",
+		},
+		{
+			"0x176ce067d52f676d4f6778eda26f2e2e75f9f39712583e60e2b3f345e2b2a84df1ae9ffa241ce89b1a377e4286c85ccf",
+			"0x822bc033cf0eec8bea9037ede74db0a73d932dc9b43f855e1862b747b0e53312dde5ed301e32551a11a5ef2dfe2dbf4",
+		}
+	};
+	const PointStr Qs = {
+		{
+			"0x8d5483693b4cf3fd5c7a62dad4179503094a66a52f2498dcedb5c97a33697ba4110e2da42ddef98beeeab04619ec0fe",
+			"0xd45728bb18737fb6abf8cc94ad37957f95855da867ca718708503fd072d3707ca6059fefb5c52b2745210cdd7991d10",
+		},
+		{
+			"0x17027ae16e10908f87e79c70f96ba44b1b11fa40fb5ac5456162133860f14896ca363b58d81ef8cb068bdaca2e576ed7",
+			"0xfb2d1655b00027d5580bbff8afa6eec6e6caacf5df4020c5255eafb51d50710193a8e39eac760745c45cc6ec556a820",
+		},
+		{
+			"0x376b86a7d664dc080485c29a57618eee792396f154806f75c78599ee223103e77bee223037bb99354114201619ea06",
+			"0xf0c64e52dbb8e2dca3c790993c8f101012c516b2884db16de4d857ae6bfb85e9101ab15906870b3e5a18268a57bfc99",
+		}
+	};
+	const PointStr clearPs = {
+		{
+			"0x6f3d4cbd80011d9cbf0f0772502d1e6571d00bc24efc892659339fc8ae049e757c57d22368c33cfc6c64bc2df59b3da",
+			"0x71e02679953af97ed57d9301d126c3243de7faa3bbebd40b46af880ba3ba608b8c09c0a876401545ce6f901950f192",
+		},
+		{
+			"0x174d1e92bd85b0cf1dd2808bd96a25ed48ba1e8d15c1af5557f62719e9f425bd8df58c900cf036e57bce1b1c78efb859",
+			"0x1cfc358b91d57bf6aa9fa6c688b0ef516fdac0c9bfd9ef310ea11e44aaf778cca99430594a8f5eb37d31c1b1f72c2f6",
+		},
+		{
+			"0x17614e52aacf8804ed2e7509db5b72395e586e2edc92dba02da24e6f73d059226a6deb6e396bd39567cec952f3849a6c",
+			"0xb7b36b9b1bbcf801d21ca5164aa9a0e71df2b4710c67dc0cd275b786800935fc29defbdf9c7e23dc84e26af13ba761d",
+		}
+	};
+	typename T::Point P;
+	G2 Q1, Q2;
+	set(P, Ps);
+	set(Q1, Qs);
+	mapto.iso3(Q2, P);
+	CYBOZU_TEST_EQUAL(Q1, Q2);
+	set(Q1, clearPs);
+	mapto.clear_h2(Q2, Q2);
+	CYBOZU_TEST_EQUAL(Q1, Q2);
+}
+
+template<class T>
+void opt_swu2_mapTest(const T& mapto)
+{
+	const Fp2Str t1s = {
+		"0xafcfb20d836159f0cfb6f48c0ed808fd97a1cd1b9f1eb14451ff59e3884b1bf7665406cce673d434dde6933bdcf0ec9",
+		"0x36714c33fa9c79b0bb9ac963f57b2d2b2659e211893e64292ee2a8c1259b1a834a769782bae17202b537a1fe901c55e",
+	};
+	const Fp2Str t2s = {
+        "0xb9a2f39af0cc3264348ed00845545e2ccbed59ea541c726c8429871f9a0917fb4f7e049ac739065eea8354a2d1b2d21",
+		"0xc8810a06deb536d70531352bd2a3aac7496e187a8fc102d800c5f8ed839bd64d7102197aeb2b6164d20ff920ff63afe",
+	};
+	const PointStr t1t1s = {
+		{
+			"0x13ea937301cfb2a071a265b08e176854034c2e2ae49898e89c042bff176a1be7bf02dfda06f67d38819ca334218b9ff4",
+			"0x180ee537c06213034c842cad3b5a6d0053473e8bb92dd4c5826e59a45268cda3fe28814b1e9f3a58b9db657d9c24a0bd",
+		},
+		{
+			"0x13f4530154b75ce311849e775242b5e791058fd8e1d7df292b8e936e8be05e1cd9fa6eed6280357393d54adf3af0eb9c",
+			"0x10619dc087132cf699b02c905284c3449e80c295c8140345e45e21b7389c8f2cf7b5e223ef87f11f57eb1e689f6c141a",
+		},
+		{
+			"0x40f98938abaece4e47427371b3b6c500f9cdacae9d8b4da79ba9107720bd038057a4cc8ec8427d651760fd795d2415",
+			"0xac9cd43c4ba29f20ed5dd2aa4a634b39810e756313b4826f225efddfb1ae43185ac4f279e628731030e87405a965bf5",
+		},
+	};
+	const PointStr t1t2s = {
+		{
+			"0x126b4982298792ed049850bb92b55d26c33a8e3139f9ca1a20821496c7396ce5ad9042b0da529e60ec9c3ff8e983befe",
+			"0x11c1d2f6a6a81e1f82dee2278968326e23e6ae469252a51d86673bd8fb333b7bca615b63a068692ff419c5f3e388797b",
+		},
+		{
+			"0x92468e5829b26cc976aff103403b4b5304dd206228c6eb84ecf7b45709307390bf29dced39f9aa037b014ad6fb5a6e4",
+			"0x5bd54eef1fdade89c98ab5c27d3dd9e18868af4250ff3a49de71d060ab62b7be039a3b2a8ef0c870d9021f6eae22029",
+		},
+		{
+			"0x154920adb9d857620c2835f4a5445bda35da53411710d559b18430f1b48c7cf2048cc275e0a9e01436d355f76fa0a9ec",
+			"0xccc404e5d17aa51f7669402916cf86587ce7cd9c657e90b05d7c8860940f741e62628df420d92c659d159d4b7683cce",
+		},
+	};
+	Fp2 t1, t2;
+	set(t1, t1s);
+	set(t2, t2s);
+	G2 P1, P2;
+	set(P1, t1t2s);
+	mapto.opt_swu2_map(P2, t1, &t2);
+	CYBOZU_TEST_EQUAL(P1, P2);
+	set(P1, t1t1s);
+	mapto.opt_swu2_map(P2, t1, &t1);
+	CYBOZU_TEST_EQUAL(P1, P2);
+}
+
+template<class T>
+void testVec(const T& mapto, const char *file)
+{
+	std::ifstream ifs(file);
+	Fp2 t1, t2;
+	G2 out, P;
+	std::string s;
+	for (;;) {
+		ifs >> s;
+		if (s != "t1") break;
+		ifs >> t1;
+		ifs >> s;
+		CYBOZU_TEST_EQUAL(s, "t2");
+		ifs >> t2;
+		ifs >> s;
+		CYBOZU_TEST_EQUAL(s, "out");
+		ifs >> out.x >> out.y >> out.z;
+		mapto.opt_swu2_map(P, t1, &t2);
+		CYBOZU_TEST_EQUAL(P, out);
+	}
+}
+
+CYBOZU_TEST_AUTO(test)
+{
+	initPairing(mcl::BLS12_381);
+	Fp::setETHserialization(true);
+	bn::setMapToMode(MCL_MAP_TO_MODE_WB19);
+	const mcl::bn::local::MapToG2_WB19& mapto = BN::param.mapTo.mapToG2_WB19_;
+	test2(mapto);
+	helpTest(mapto);
+	addTest(mapto);
+	iso3Test(mapto);
+	opt_swu2_mapTest(mapto);
+	testHMAC();
+	testHashToFp2();
+	testMap2curve_osswu2(mapto);
+//	testVec(mapto, "fips_186_3_B233.txt");
+//	testVec(mapto, "misc.txt");
+//	testHash_g2All(mapto, "../../bls_sigs_ref/test-vectors/hash_g2/");
+}

From f0df4f202b608e8d861750616140b7fede305961 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 25 Jan 2020 16:08:41 +0900
Subject: [PATCH 151/553] add a test of status-im/nim-blscurve

---
 Makefile                 |  1 +
 test/mapto_wb19_test.cpp | 96 ++++++++++++++++++++++++----------------
 2 files changed, 60 insertions(+), 37 deletions(-)

diff --git a/Makefile b/Makefile
index e99ff198..bea4fbcf 100644
--- a/Makefile
+++ b/Makefile
@@ -8,6 +8,7 @@ TEST_SRC+=bn_c256_test.cpp bn_c384_test.cpp bn_c384_256_test.cpp bn_c512_test.cp
 TEST_SRC+=she_c256_test.cpp she_c384_test.cpp she_c384_256_test.cpp
 TEST_SRC+=aggregate_sig_test.cpp array_test.cpp
 TEST_SRC+=bls12_test.cpp
+TEST_SRC+=mapto_wb19_test.cpp
 TEST_SRC+=ecdsa_c_test.cpp
 TEST_SRC+=modp_test.cpp
 ifeq ($(CPU),x86-64)
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 0afcb784..13be8e65 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -9,6 +9,9 @@
 using namespace mcl;
 using namespace mcl::bn;
 
+typedef mcl::bn::local::MapToG2_WB19 MapTo;
+typedef MapTo::Point Point;
+
 void dump(const void *msg, size_t msgSize)
 {
 	const uint8_t *p = (const uint8_t *)msg;
@@ -189,6 +192,18 @@ void test2(const T& mapto)
 		testMapToCurveG2
 		https://github.com/status-im/nim-blscurve/blob/de64516a5933a6e8ebb01a346430e61a201b5775/blscurve/hash_to_curve.nim#L531
 	*/
+	{
+		const Fp2Str u0s = {
+			"0x004ad233c619209060e40059b81e4c1f92796b05aa1bc6358d65e53dc0d657dfbc713d4030b0b6d9234a6634fd1944e7",
+			"0x0e2386c82713441bc3b06a460bd81850f4bf376ea89c80b18c0881e855c58dc8e83b2fd23af983f4786508e30c42af01",
+		};
+		const Fp2Str u1s = {
+			"0x08a6a75e0a8d32f1e096f29047ea879dd34a5504218d7ce92c32c244786822fb73fbf708d167ad86537468249ec6df48",
+			"0x07016d0e5e13cd65780042c6f7b4c74ae1c58da438c99582696818b5c229895b893318dcb87d2a65e557d4ebeb408b70",
+		};
+		Fp2 u0, u1;
+		(void)mapto;
+	}
 }
 
 template<class T>
@@ -220,52 +235,59 @@ template<class T>
 void helpTest(const T& mapto)
 {
 	const struct {
-		const char *ta;
-		const char *tb;
-		const char *xa;
-		const char *xb;
-		const char *ya;
-		const char *yb;
-		const char *za;
-		const char *zb;
+		Fp2Str t;
+		Fp2Str x;
+		Fp2Str y;
+		Fp2Str z;
 	} tbl[] = {
 		{
-			"0xe54bc0f2e26071a79ba5fe7ae5307d39cf5519e581e03b43f39a431eccc258fa1477c517b1268b22986601ee5caa5ea",
-			"0x17e8397d5e687ff7f915c23f27fe1ca2c397a7df91de8c88dc82d34c9188a3ef719f9f20436ea8a5fe7d509fbc79214d",
-
-			"0x11d568058220b1826cacde2e367beef98ea1edfde5fbf0491231b7ffdfc867e5269f9cfe65347c32ead182ba6b8c3ba1",
-			"0x19f2778213e671ac444b1b579bfdf4e7fabeed9626dc909ce243b60397a6b5f65af0fbbe02a43c1e289f28c927012da1",
-
-			"0xfe17bc695a84ec060b6287a4e77a50f65ba8f2c6c433f8131036ddfe34e3071d1cb71c0000f6bcfada947b19d8588df",
-			"0xb76abd285945f787721e7e306895149523941586ac44f25a294c406a70ed570020992025aa307777cfe6c590567dfbe",
-
-			"0x1910249ae63241608e013eb13578b9b3d96774d35e5732fc75efd17c212dd310d7f4016d6f212f62f33d34f10252e3e3",
-			"0xdcd076cea67c76a6d0594c8f30c8cd8e9ead24f90870f723228f2203a55e04a5517c426ea2c4bae9d37a11c3d0f1912",
+			{
+				"0xe54bc0f2e26071a79ba5fe7ae5307d39cf5519e581e03b43f39a431eccc258fa1477c517b1268b22986601ee5caa5ea",
+				"0x17e8397d5e687ff7f915c23f27fe1ca2c397a7df91de8c88dc82d34c9188a3ef719f9f20436ea8a5fe7d509fbc79214d",
+			},
+			{
+				"0x11d568058220b1826cacde2e367beef98ea1edfde5fbf0491231b7ffdfc867e5269f9cfe65347c32ead182ba6b8c3ba1",
+				"0x19f2778213e671ac444b1b579bfdf4e7fabeed9626dc909ce243b60397a6b5f65af0fbbe02a43c1e289f28c927012da1",
+			},
+			{
+				"0xfe17bc695a84ec060b6287a4e77a50f65ba8f2c6c433f8131036ddfe34e3071d1cb71c0000f6bcfada947b19d8588df",
+				"0xb76abd285945f787721e7e306895149523941586ac44f25a294c406a70ed570020992025aa307777cfe6c590567dfbe",
+			},
+			{
+				"0x1910249ae63241608e013eb13578b9b3d96774d35e5732fc75efd17c212dd310d7f4016d6f212f62f33d34f10252e3e3",
+				"0xdcd076cea67c76a6d0594c8f30c8cd8e9ead24f90870f723228f2203a55e04a5517c426ea2c4bae9d37a11c3d0f1912",
+			},
 		},
 		{
-			"0x2a8663422cc279aa8591819195a62cfd57357b7bcb6f4a9174275c2e2e754fb23e2f8a444d0d164990dc03dcb95a129",
-			"0x15cf611083511955a70fdcc80cb08c6e22b8043a3038065251d4d3f82c6051bac4933e41d589514c42fba13f78f297ef",
-
-			"0x74ee12dce0c9a8836017172b562ebe491273964dd63df71dea6eb778cd9040e8c9a7136e745013c1def93cc57ef0dae",
-			"0xedce8fa83a2435a796d207943b14ea4d1a9850e10a6c2035912f1c5bd579e9cabc54027b87a779af28f380cc5edc8a6",
-
-			"0x11367627461d742b4afac12bd789f1437787f2dc675cf2c7896f004ab8480c06cd06589748d8b9791b4969763962f73c",
-			"0x101d8e4c1598e72d943dad4695cfa74236d5065345f1e62e62c75ca30cb0c41c3f6197d7c57d46e8cdd07845d77e1e34",
-
-			"0x3952479e45a0826275c1481fbd78a2b4c5076b6a5cd4ad7e132c1ec460dcaef504943e2c6a969ba182e230da3850b4",
-			"0x13b8e64e2e233d1dc4506360c3bff93535642c2d3115c53c049e287e35c03212be882f0618cc50557e55b42be53e4893",
+			{
+				"0x2a8663422cc279aa8591819195a62cfd57357b7bcb6f4a9174275c2e2e754fb23e2f8a444d0d164990dc03dcb95a129",
+				"0x15cf611083511955a70fdcc80cb08c6e22b8043a3038065251d4d3f82c6051bac4933e41d589514c42fba13f78f297ef",
+			},
+			{
+				"0x74ee12dce0c9a8836017172b562ebe491273964dd63df71dea6eb778cd9040e8c9a7136e745013c1def93cc57ef0dae",
+				"0xedce8fa83a2435a796d207943b14ea4d1a9850e10a6c2035912f1c5bd579e9cabc54027b87a779af28f380cc5edc8a6",
+			},
+			{
+				"0x11367627461d742b4afac12bd789f1437787f2dc675cf2c7896f004ab8480c06cd06589748d8b9791b4969763962f73c",
+				"0x101d8e4c1598e72d943dad4695cfa74236d5065345f1e62e62c75ca30cb0c41c3f6197d7c57d46e8cdd07845d77e1e34",
+			},
+			{
+				"0x3952479e45a0826275c1481fbd78a2b4c5076b6a5cd4ad7e132c1ec460dcaef504943e2c6a969ba182e230da3850b4",
+				"0x13b8e64e2e233d1dc4506360c3bff93535642c2d3115c53c049e287e35c03212be882f0618cc50557e55b42be53e4893",
+			},
 		},
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
-		Fp2 t(tbl[i].ta, tbl[i].tb);
-		Fp2 x0(tbl[i].xa, tbl[i].xb);
-		Fp2 y0(tbl[i].ya, tbl[i].yb);
-		Fp2 z0(tbl[i].za, tbl[i].zb);
+		Fp2 t, x, y, z;
 		typename T::Point P;
+		set(t, tbl[i].t);
+		set(x, tbl[i].x);
+		set(y, tbl[i].y);
+		set(z, tbl[i].z);
 		mapto.osswu2_help(P, t);
-		CYBOZU_TEST_EQUAL(P.x, x0);
-		CYBOZU_TEST_EQUAL(P.y, y0);
-		CYBOZU_TEST_EQUAL(P.z, z0);
+		CYBOZU_TEST_EQUAL(P.x, x);
+		CYBOZU_TEST_EQUAL(P.y, y);
+		CYBOZU_TEST_EQUAL(P.z, z);
 		CYBOZU_TEST_ASSERT(mapto.isValidPoint(P));
 	}
 }

From 50dde8fda1a38363c6c4bdb73bc5723749323f19 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 25 Jan 2020 17:37:09 +0900
Subject: [PATCH 152/553] add test of opt_swu2_map

---
 test/mapto_wb19_test.cpp | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 13be8e65..ac2caac2 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -6,6 +6,8 @@
 #include <cybozu/atoi.hpp>
 #include <cybozu/file.hpp>
 
+#define PUT(x) std::cout << #x "=" << (x) << std::endl;
+
 using namespace mcl;
 using namespace mcl::bn;
 
@@ -188,10 +190,6 @@ void test2(const T& mapto)
 		mcl::bn::local::hashToFp2(x, msg, strlen(msg) + 1 /* add zero byte */, ctr, dst, strlen(dst));
 		CYBOZU_TEST_EQUAL(toHexStr(x), expect);
 	}
-	/*
-		testMapToCurveG2
-		https://github.com/status-im/nim-blscurve/blob/de64516a5933a6e8ebb01a346430e61a201b5775/blscurve/hash_to_curve.nim#L531
-	*/
 	{
 		const Fp2Str u0s = {
 			"0x004ad233c619209060e40059b81e4c1f92796b05aa1bc6358d65e53dc0d657dfbc713d4030b0b6d9234a6634fd1944e7",
@@ -201,8 +199,25 @@ void test2(const T& mapto)
 			"0x08a6a75e0a8d32f1e096f29047ea879dd34a5504218d7ce92c32c244786822fb73fbf708d167ad86537468249ec6df48",
 			"0x07016d0e5e13cd65780042c6f7b4c74ae1c58da438c99582696818b5c229895b893318dcb87d2a65e557d4ebeb408b70",
 		};
-		Fp2 u0, u1;
-		(void)mapto;
+		// return value of opt_swu2_map in bls_sigs_ref/python-impl/opt_swu_g2.py
+		const Fp2Str xs = {
+			"0x4861c41efcc5fc56e62273692b48da25d950d2a0aaffb34eff80e8dbdc2d41ca38555ceb8554368436aea47d16056b5",
+			"0x9db5217528c55d982cf05fc54242bdcd25f1ebb73372e00e16d8e0f19dc3aeabdeef2d42d693405a04c37d60961526a",
+		};
+		const Fp2Str ys = {
+			"0x177d05b95e7879a7ddbd83c15114b5a4e9846fde72b2263072dc9e60db548ccbadaacb92cc4952d4f47425fe3c5e0172",
+			"0xfc82c99b928ed9df12a74f9215c3df8ae1e9a3fa54c00897889296890b23a0edcbb9653f9170bf715f882b35c0b4647",
+		};
+		Fp2 u0, u1, x, y;
+		set(u0, u0s);
+		set(u1, u1s);
+		set(x, xs);
+		set(y, ys);
+		G2 P;
+		mapto.opt_swu2_map(P, u0, &u1);
+		P.normalize();
+		CYBOZU_TEST_EQUAL(P.x, x);
+		CYBOZU_TEST_EQUAL(P.y, y);
 	}
 }
 

From 83d3517b10c634c8eb5bd9d419b7bd698d53f7f2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 25 Jan 2020 18:33:58 +0900
Subject: [PATCH 153/553] add mclBn_eth*

---
 include/mcl/bn.h               | 24 ++++++++++++++
 include/mcl/impl/bn_c_impl.hpp | 21 ++++++++++++
 test/bn_c_test.hpp             | 58 ++++++++++++++++++++++++++++++++++
 3 files changed, 103 insertions(+)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index f4f3383f..96e6b817 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -217,6 +217,30 @@ MCLBN_DLL_API void mclBn_setOriginalG2cofactor(int enable);
 */
 MCLBN_DLL_API int mclBn_setMapToMode(int mode);
 
+/*
+	the next three functions are auxiliary of the new eth 2.0 spec
+	these always return 0 if MCL_BLS12_381 is set
+*/
+/*
+	set out to hash of (msg[msgSize], ctr, dst[dstSize])
+	return 0 if success
+	@note append zero byte to msg if necessary
+*/
+MCLBN_DLL_API int mclBn_ethMsgToFp2(mclBnFp2 *out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize);
+
+/*
+	set out to hash of (t1, t2)
+	allow t2 is NULL
+	return 0 if success
+*/
+MCLBN_DLL_API int mclBn_ethFp2ToG2(mclBnG2 *out, const mclBnFp2 *t1, const mclBnFp2 *t2);
+
+/*
+	set out to hash of (msg[msgSize], dst[dstSize])
+	@note append zero byte to msg if necessary
+	return 0 if success
+*/
+MCLBN_DLL_API int mclBn_ethMsgToG2(mclBnG2 *out, const void *msg, size_t msgSize, const void *dst, size_t dstSize);
 
 ////////////////////////////////////////////////
 /*
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 96fdc87f..96ae2a8f 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -135,6 +135,27 @@ int mclBn_setMapToMode(int mode)
 	return setMapToMode(mode) ? 0 : -1;
 }
 
+int mclBn_ethMsgToFp2(mclBnFp2 *out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
+{
+	if (mclBn_getCurveType() != MCL_BLS12_381) return -1;
+	mcl::bn::BN::local::hashToFp2(*cast(out), msg, msgSize, ctr, dst, dstSize);
+	return 0;
+}
+
+int mclBn_ethFp2ToG2(mclBnG2 *out, const mclBnFp2 *t1, const mclBnFp2 *t2)
+{
+	if (mclBn_getCurveType() != MCL_BLS12_381) return -1;
+	mcl::bn::BN::param.mapTo.mapToG2_WB19_.opt_swu2_map(*cast(out), *cast(t1), cast(t2));
+	return 0;
+}
+
+int mclBn_ethMsgToG2(mclBnG2 *out, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
+{
+	if (mclBn_getCurveType() != MCL_BLS12_381) return -1;
+	mcl::bn::BN::param.mapTo.mapToG2_WB19_.map2curve_osswu2(*cast(out), msg, msgSize, dst, dstSize);
+	return 0;
+}
+
 void mclBn_setOriginalG2cofactor(int enable)
 {
 	setOriginalG2cofactor(enable == 1);
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 85a81f9f..c1a7d244 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -658,6 +658,64 @@ CYBOZU_TEST_AUTO(ETHserialization)
 	mclBn_setETHserialization(keepETH);
 }
 
+struct Fp2Str {
+	const char *a;
+	const char *b;
+};
+
+void setFp2(mclBnFp2 *x, const Fp2Str& s)
+{
+	CYBOZU_TEST_EQUAL(mclBnFp_setStr(&x->d[0], s.a, strlen(s.a), 16), 0);
+	CYBOZU_TEST_EQUAL(mclBnFp_setStr(&x->d[1], s.b, strlen(s.b), 16), 0);
+}
+
+CYBOZU_TEST_AUTO(eth_hash)
+{
+	int curveType = mclBn_getCurveType();
+	if (curveType != MCL_BLS12_381) return;
+	{
+		const char *msg = "msg";
+		uint8_t ctr = 0;
+		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_";
+		const Fp2Str ys = {
+			"18df4dc51885b18ca0082a4966b0def46287930b8f1c0b673b11ac48d19c8899bc150d83fd3a7a1430b0de541742c1d4",
+			"14eef8ca34b82d065d187a3904cb313dbb44558917cc5091574d9999b5ecfdd5af2fa3aea6e02fb253bf4ae670e72d55"
+		};
+		mclBnFp2 x, y;
+		CYBOZU_TEST_EQUAL(mclBn_ethMsgToFp2(&x, msg, strlen(msg) + 1 /* add zero byte */, ctr, dst, strlen(dst)), 0);
+		setFp2(&y, ys);
+		CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&x, &y));
+	}
+	{
+		const Fp2Str u0s = {
+			"0x004ad233c619209060e40059b81e4c1f92796b05aa1bc6358d65e53dc0d657dfbc713d4030b0b6d9234a6634fd1944e7",
+			"0x0e2386c82713441bc3b06a460bd81850f4bf376ea89c80b18c0881e855c58dc8e83b2fd23af983f4786508e30c42af01",
+		};
+		const Fp2Str u1s = {
+			"0x08a6a75e0a8d32f1e096f29047ea879dd34a5504218d7ce92c32c244786822fb73fbf708d167ad86537468249ec6df48",
+			"0x07016d0e5e13cd65780042c6f7b4c74ae1c58da438c99582696818b5c229895b893318dcb87d2a65e557d4ebeb408b70",
+		};
+		const Fp2Str xs = {
+			"0x4861c41efcc5fc56e62273692b48da25d950d2a0aaffb34eff80e8dbdc2d41ca38555ceb8554368436aea47d16056b5",
+			"0x9db5217528c55d982cf05fc54242bdcd25f1ebb73372e00e16d8e0f19dc3aeabdeef2d42d693405a04c37d60961526a",
+		};
+		const Fp2Str ys = {
+			"0x177d05b95e7879a7ddbd83c15114b5a4e9846fde72b2263072dc9e60db548ccbadaacb92cc4952d4f47425fe3c5e0172",
+			"0xfc82c99b928ed9df12a74f9215c3df8ae1e9a3fa54c00897889296890b23a0edcbb9653f9170bf715f882b35c0b4647",
+		};
+		mclBnFp2 u0, u1, x, y;
+		setFp2(&u0, u0s);
+		setFp2(&u1, u1s);
+		setFp2(&x, xs);
+		setFp2(&y, ys);
+		mclBnG2 P;
+		mclBn_ethFp2ToG2(&P, &u0, &u1);
+		mclBnG2_normalize(&P, &P);
+		CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&P.x, &x));
+		CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&P.y, &y));
+	}
+}
+
 #if MCLBN_FP_UNIT_SIZE == 6 && MCLBN_FR_UNIT_SIZE >= 6
 CYBOZU_TEST_AUTO(badG2)
 {

From ede75f1645c4de3cd861b87262cacdb6d335a31b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 25 Jan 2020 21:02:08 +0900
Subject: [PATCH 154/553] add test of mclBn_ethMsgToG2

---
 test/bn_c_test.hpp | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index c1a7d244..c98863bf 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -714,6 +714,28 @@ CYBOZU_TEST_AUTO(eth_hash)
 		CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&P.x, &x));
 		CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&P.y, &y));
 	}
+	{
+		const char *msg = "msg";
+		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_";
+		const Fp2Str xs = {
+			"0xb1871d245d50ec4e5a3ac790628864d24655208812abc420b67a93c5afdd38111137f14ca0f844ddbf69809897ca941",
+			"0xa8b490ae1aac870b16b1a82db2e9653ec14485fc5f38c2ce2926c526537262061d4cd8bc62cc90e98235952a7fe7f13",
+		};
+		const Fp2Str ys = {
+			"0x2c8e9f9d52870075ae5879be5a4994a16db6c93b34453d9c055eb058107a2d805cc307b0ba30144518fb36da5f97d12",
+			"0x344ce62d77dda0b4b509d5b5e6ef08f99c972fc0e5f0c25b25bb881384e85b8b1086043813e674f9bbc4b67dd47d9a7",
+		};
+		const Fp2Str zs = {
+			"0x1515a4d612e48626000f998a220029380a47e9e6c69d497db804e2dfc3dbce5cfb000a559b64f50796f26ddc4cf3be2c",
+			"0x1796ee0f0b9b65802c90e3e1586034f3826ec3538c66525de298d1ff2f7a26f2ec553ec64e5989ed9841c4456d0bddd7",
+		};
+		mclBnG2 P, Q;
+		mclBn_ethMsgToG2(&P, msg, strlen(msg) + 1 /* add zero byte */, dst, strlen(dst));
+		setFp2(&Q.x, xs);
+		setFp2(&Q.y, ys);
+		setFp2(&Q.z, zs);
+		CYBOZU_TEST_ASSERT(mclBnG2_isEqual(&P, &Q));
+	}
 }
 
 #if MCLBN_FP_UNIT_SIZE == 6 && MCLBN_FR_UNIT_SIZE >= 6

From 8f022a3de3f7d286753364d99b1e78a5ec42e92c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 25 Jan 2020 21:10:21 +0900
Subject: [PATCH 155/553] update readme.md

---
 readme.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/readme.md b/readme.md
index 5f6a06b9..9314d85f 100644
--- a/readme.md
+++ b/readme.md
@@ -9,6 +9,12 @@ A portable and fast pairing-based cryptography library.
 mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
+# News
+add new hash functions corresponding to python-impl of [algorand/bls_sig_ref](https://github.com/algorand/bls_sigs_ref).
+* `mclBn_ethMsgToFp2`(resp. `Hp2`)
+* `mclBn_ethFp2ToG2`(resp. `opt_swu2_map`)
+* `mclBn_ethMsgToG2`(resp. `map2curve_osswu2`)
+
 # Support architecture
 
 - x86-64 Windows + Visual Studio

From 124dd293a817b1224139952121b8b7ac2d4c0ee9 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 25 Jan 2020 21:11:51 +0900
Subject: [PATCH 156/553] update version

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index a5506089..d1ad3e43 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x103; /* 0xABC = A.BC */
+static const int version = 0x104; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index 9314d85f..efc2d6d8 100644
--- a/readme.md
+++ b/readme.md
@@ -292,6 +292,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2020/Jan/25 v1.04 add new hash functions
 - 2019/Dec/05 v1.03 disable to check the order in setStr
 - 2019/Sep/30 v1.00 add some functions to bn.h ; [api.md](api.md).
 - 2019/Sep/22 v0.99 add mclBnG1_mulVec, etc.

From 8e71598093ee63b4704866d2dd5fa393ce2037d2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 26 Jan 2020 10:08:47 +0900
Subject: [PATCH 157/553] add serializeToHexStr/deserializeHexStr

---
 include/mcl/operator.hpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/mcl/operator.hpp b/include/mcl/operator.hpp
index 878afa6e..84df77f7 100644
--- a/include/mcl/operator.hpp
+++ b/include/mcl/operator.hpp
@@ -184,6 +184,20 @@ struct Serializable : public E {
 		getStr(str, ioMode);
 		return str;
 	}
+	std::string serializeToHexStr() const
+	{
+		std::string str(sizeof(T) * 2, 0);
+		size_t n = serialize(&str[0], str.size(), IoSerializeHexStr);
+		str.resize(n);
+		return str;
+	}
+#ifndef CYBOZU_DONT_USE_EXCEPTION
+	void deserializeHexStr(const std::string& str)
+	{
+		size_t n = deserialize(str.c_str(), str.size(), IoSerializeHexStr);
+		if (n == 0) throw cybozu::Exception("bad str") << str;
+	}
+#endif
 #endif
 	// return written bytes
 	size_t serialize(void *buf, size_t maxBufSize, int ioMode = IoSerialize) const

From 0870bc8251016f9dc1ec77279ed07b892bb3c9f9 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 26 Jan 2020 10:20:17 +0900
Subject: [PATCH 158/553] add eth hash functions for c++

---
 include/mcl/bn.hpp | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index e13cf4d0..c7e7b4c8 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -2317,5 +2317,26 @@ inline const Fr& getG2cofactorAdjInv()
 	return BN::param.mapTo.g2cofactorAdjInv_;
 }
 
+inline bool ethMsgToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
+{
+	if (!BN::param.isBLS12) return false;
+	BN::local::hashToFp2(out, msg, msgSize, ctr, dst, dstSize);
+	return true;
+}
+
+inline bool ethFp2ToG2(G2& out, const Fp2& t1, const Fp2 *t2 = 0)
+{
+	if (!BN::param.isBLS12) return false;
+	BN::param.mapTo.mapToG2_WB19_.opt_swu2_map(out, t1, t2);
+	return true;
+}
+
+inline bool ethMsgToG2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
+{
+	if (!BN::param.isBLS12) return false;
+	BN::param.mapTo.mapToG2_WB19_.map2curve_osswu2(out, msg, msgSize, dst, dstSize);
+	return true;
+}
+
 } } // mcl::bn
 

From fee5144d0fb21d758f124d173ddd9f3fcd617b31 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 26 Jan 2020 10:34:09 +0900
Subject: [PATCH 159/553] use eth* functions for c api

---
 include/mcl/impl/bn_c_impl.hpp | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 96ae2a8f..f31380b0 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -137,23 +137,17 @@ int mclBn_setMapToMode(int mode)
 
 int mclBn_ethMsgToFp2(mclBnFp2 *out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
 {
-	if (mclBn_getCurveType() != MCL_BLS12_381) return -1;
-	mcl::bn::BN::local::hashToFp2(*cast(out), msg, msgSize, ctr, dst, dstSize);
-	return 0;
+	return mcl::bn::ethMsgToFp2(*cast(out), msg, msgSize, ctr, dst, dstSize) ? 0 : -1;
 }
 
 int mclBn_ethFp2ToG2(mclBnG2 *out, const mclBnFp2 *t1, const mclBnFp2 *t2)
 {
-	if (mclBn_getCurveType() != MCL_BLS12_381) return -1;
-	mcl::bn::BN::param.mapTo.mapToG2_WB19_.opt_swu2_map(*cast(out), *cast(t1), cast(t2));
-	return 0;
+	return mcl::bn::ethFp2ToG2(*cast(out), *cast(t1), cast(t2)) ? 0 : -1;
 }
 
 int mclBn_ethMsgToG2(mclBnG2 *out, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
 {
-	if (mclBn_getCurveType() != MCL_BLS12_381) return -1;
-	mcl::bn::BN::param.mapTo.mapToG2_WB19_.map2curve_osswu2(*cast(out), msg, msgSize, dst, dstSize);
-	return 0;
+	return mcl::bn::ethMsgToG2(*cast(out), msg, msgSize, dst, dstSize) ? 0 : -1;
 }
 
 void mclBn_setOriginalG2cofactor(int enable)

From f483ec41d761568bed05dae10808cf110abd2107 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 26 Jan 2020 10:34:42 +0900
Subject: [PATCH 160/553] refactor mapto test

---
 test/mapto_wb19_test.cpp | 67 ++++++++++++++++++++++------------------
 1 file changed, 37 insertions(+), 30 deletions(-)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index ac2caac2..60e1955a 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -97,8 +97,7 @@ std::string toHexStr(const G2& P)
 	return toHexStr(xy, 96);
 }
 
-template<class T>
-void testHash_g2(const T& mapto, const std::string& fileName)
+void testHash_g2(const std::string& fileName)
 {
 	const char *dst = "\x02";
 	printf("name=%s\n", fileName.c_str());
@@ -111,19 +110,18 @@ void testHash_g2(const T& mapto, const std::string& fileName)
 		if (zero != "00") break;
 		buf = fromHexStr(msg);
 		buf.push_back(0); // add zero byte
-		mapto.map2curve_osswu2(out, buf.data(), buf.size(), dst, strlen(dst));
+		ethMsgToG2(out, buf.data(), buf.size(), dst, strlen(dst));
 		std::string s = toHexStr(out);
 		CYBOZU_TEST_EQUAL(s, ret);
 	}
 }
 
-template<class T>
-void testHash_g2All(const T& mapto, const std::string& dir)
+void testHash_g2All(const std::string& dir)
 {
 	cybozu::FileList list = cybozu::GetFileList(dir);
 	for (size_t i = 0; i < list.size(); i++) {
 		const cybozu::FileInfo& info = list[i];
-		testHash_g2(mapto, dir + "/" + info.name);
+		testHash_g2(dir + "/" + info.name);
 	}
 }
 
@@ -149,8 +147,7 @@ void testHashToFp2()
 	CYBOZU_TEST_EQUAL(out, ok);
 }
 
-template<class T>
-void testMap2curve_osswu2(const T& mapto)
+void ethMsgToG2test()
 {
 	const char *msg = "the message to be signed";
 	const char *dst = "\x02";
@@ -170,12 +167,12 @@ void testMap2curve_osswu2(const T& mapto)
 	};
 	G2 out, ok;
 	set(ok, outS);
-	mapto.map2curve_osswu2(out, msg, strlen(msg) + 1 /* contains zero byte */, dst, strlen(dst));
+//	mapto.map2curve_osswu2(out, msg, strlen(msg) + 1 /* contains zero byte */, dst, strlen(dst));
+	ethMsgToG2(out, msg, strlen(msg) + 1 /* contains zero byte */, dst, strlen(dst));
 	CYBOZU_TEST_EQUAL(out, ok);
 }
 
-template<class T>
-void test2(const T& mapto)
+void test2()
 {
 	/*
 		testHashToBaseFP2
@@ -187,7 +184,7 @@ void test2(const T& mapto)
 		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_";
 		const char *expect = "18df4dc51885b18ca0082a4966b0def46287930b8f1c0b673b11ac48d19c8899bc150d83fd3a7a1430b0de541742c1d4 14eef8ca34b82d065d187a3904cb313dbb44558917cc5091574d9999b5ecfdd5af2fa3aea6e02fb253bf4ae670e72d55";
 		Fp2 x;
-		mcl::bn::local::hashToFp2(x, msg, strlen(msg) + 1 /* add zero byte */, ctr, dst, strlen(dst));
+		ethMsgToFp2(x, msg, strlen(msg) + 1 /* add zero byte */, ctr, dst, strlen(dst));
 		CYBOZU_TEST_EQUAL(toHexStr(x), expect);
 	}
 	{
@@ -214,11 +211,23 @@ void test2(const T& mapto)
 		set(x, xs);
 		set(y, ys);
 		G2 P;
-		mapto.opt_swu2_map(P, u0, &u1);
+		ethFp2ToG2(P, u0, &u1);
 		P.normalize();
 		CYBOZU_TEST_EQUAL(P.x, x);
 		CYBOZU_TEST_EQUAL(P.y, y);
 	}
+	{
+		// https://media.githubusercontent.com/media/ethereum/eth2.0-spec-tests/v0.10.1/tests/general/phase0/bls/sign/small/sign_case_11b8c7cad5238946/data.yaml
+		const char *secs = "47b8192d77bf871b62e87859d653922725724a5c031afeabc60bcef5ff665138";
+		const char msg[32] = {};
+		const char *expect = "b2deb7c656c86cb18c43dae94b21b107595486438e0b906f3bdb29fa316d0fc3cab1fc04c6ec9879c773849f2564d39317bfa948b4a35fc8509beafd3a2575c25c077ba8bca4df06cb547fe7ca3b107d49794b7132ef3b5493a6ffb2aad2a441";
+		Fr sec;
+		sec.deserializeHexStr(secs);
+		G2 Q;
+		Q.deserializeHexStr(expect);
+		Q *= (1/sec);
+		printf("Q=%s\n", Q.serializeToHexStr().c_str());
+	}
 }
 
 template<class T>
@@ -247,7 +256,7 @@ void testSign(const T& mapto)
 }
 
 template<class T>
-void helpTest(const T& mapto)
+void osswu2_helpTest(const T& mapto)
 {
 	const struct {
 		Fp2Str t;
@@ -294,7 +303,7 @@ void helpTest(const T& mapto)
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		Fp2 t, x, y, z;
-		typename T::Point P;
+		Point P;
 		set(t, tbl[i].t);
 		set(x, tbl[i].x);
 		set(y, tbl[i].y);
@@ -360,7 +369,6 @@ void addTest(const T& mapto)
 			},
 		},
 	};
-	typedef typename T::Point Point;
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		Point P, Q, R;
 		set(P, tbl[i].P);
@@ -430,8 +438,7 @@ void iso3Test(const T& mapto)
 	CYBOZU_TEST_EQUAL(Q1, Q2);
 }
 
-template<class T>
-void opt_swu2_mapTest(const T& mapto)
+void ethFp2ToG2test()
 {
 	const Fp2Str t1s = {
 		"0xafcfb20d836159f0cfb6f48c0ed808fd97a1cd1b9f1eb14451ff59e3884b1bf7665406cce673d434dde6933bdcf0ec9",
@@ -474,15 +481,14 @@ void opt_swu2_mapTest(const T& mapto)
 	set(t2, t2s);
 	G2 P1, P2;
 	set(P1, t1t2s);
-	mapto.opt_swu2_map(P2, t1, &t2);
+	ethFp2ToG2(P2, t1, &t2);
 	CYBOZU_TEST_EQUAL(P1, P2);
 	set(P1, t1t1s);
-	mapto.opt_swu2_map(P2, t1, &t1);
+	ethFp2ToG2(P2, t1, &t1);
 	CYBOZU_TEST_EQUAL(P1, P2);
 }
 
-template<class T>
-void testVec(const T& mapto, const char *file)
+void testVec(const char *file)
 {
 	std::ifstream ifs(file);
 	Fp2 t1, t2;
@@ -498,7 +504,7 @@ void testVec(const T& mapto, const char *file)
 		ifs >> s;
 		CYBOZU_TEST_EQUAL(s, "out");
 		ifs >> out.x >> out.y >> out.z;
-		mapto.opt_swu2_map(P, t1, &t2);
+		ethFp2ToG2(P, t1, &t2);
 		CYBOZU_TEST_EQUAL(P, out);
 	}
 }
@@ -509,15 +515,16 @@ CYBOZU_TEST_AUTO(test)
 	Fp::setETHserialization(true);
 	bn::setMapToMode(MCL_MAP_TO_MODE_WB19);
 	const mcl::bn::local::MapToG2_WB19& mapto = BN::param.mapTo.mapToG2_WB19_;
-	test2(mapto);
-	helpTest(mapto);
+	test2();
+	osswu2_helpTest(mapto);
 	addTest(mapto);
 	iso3Test(mapto);
-	opt_swu2_mapTest(mapto);
+	testSign(mapto);
+	ethFp2ToG2test();
 	testHMAC();
 	testHashToFp2();
-	testMap2curve_osswu2(mapto);
-//	testVec(mapto, "fips_186_3_B233.txt");
-//	testVec(mapto, "misc.txt");
-//	testHash_g2All(mapto, "../../bls_sigs_ref/test-vectors/hash_g2/");
+	ethMsgToG2test();
+//	testVec("fips_186_3_B233.txt");
+//	testVec("misc.txt");
+//	testHash_g2All("../../bls_sigs_ref/test-vectors/hash_g2/");
 }

From e4f30277377a1fc78e13bbe8d69d36c3351efbcb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 26 Jan 2020 17:25:55 +0900
Subject: [PATCH 161/553] add optimized_swu_G2

---
 include/mcl/mapto_wb19.hpp | 101 ++++++++++++++++++++++++++++++++++++-
 test/mapto_wb19_test.cpp   |  68 +++++++++++++++++++++++--
 2 files changed, 163 insertions(+), 6 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 2663fe2a..180c8240 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -314,12 +314,101 @@ struct MapToG2_WB19 {
 		if (!x.b.isZero()) return false;
 		return false;
 	}
+	/*
+		z = sqrt(u/v) = (uv^7) (uv^15)^((p^2-9)/16) * root4
+		return true if found
+	*/
+	bool sqr_div(Fp2& z, const Fp2& u, const Fp2& v) const
+	{
+		Fp2 gamma, t1, t2;
+		Fp2::sqr(gamma, v); // v^2
+		Fp2::sqr(t2, gamma); // v^4
+		Fp2::mul(t1, u, v); // uv
+		t1 *= gamma; // uv^3
+		t1 *= t2; // uv^7
+		Fp2::sqr(t2, t2); // v^8
+		t2 *= t1;
+		Fp2::pow(gamma, t2, sqrtConst);
+		gamma *= t1;
+		Fp2 candi;
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(root4); i++) {
+			Fp2::mul(candi, gamma, root4[i]);
+			Fp2::sqr(t1, candi);
+			t1 *= v;
+			if (t1 == u) {
+				z = candi;
+				return true;
+			}
+		}
+		z = gamma;
+		return false;
+	}
+	// https://github.com/ethereum/py_ecc
+	void optimized_swu_G2(Point& P, const Fp2& t) const
+	{
+		Fp2 t2, t2xi, t2xi2;
+		Fp2::sqr(t2, t);
+		mul_xi(t2xi, t2);
+		Fp2::sqr(t2xi2, t2xi);
+		Fp2 nume, deno;
+		// (t^2 * xi)^2 + (t^2 * xi)
+		Fp2::add(deno, t2xi2, t2xi);
+		Fp2::add(nume, deno, 1);
+		nume *= Ell2p_b;
+		if (deno.isZero()) {
+			Fp2::mul(deno, Ell2p_a, xi);
+		} else {
+			deno *= -Ell2p_a;
+		}
+		Fp2 u, v;
+		{
+			Fp2 deno2, tmp, tmp1, tmp2;
+			Fp2::sqr(deno2, deno);
+			Fp2::mul(v, deno2, deno);
+
+			Fp2::mul(u, Ell2p_b, v);
+			Fp2::mul(tmp, Ell2p_a, nume);
+			tmp *= deno2;
+			u += tmp;
+			Fp2::sqr(tmp, nume);
+			tmp *= nume;
+			u += tmp;
+		}
+		Fp2 candi;
+		bool success = sqr_div(candi, u, v);
+		P.y = candi;
+		candi *= t2;
+		candi *= t;
+		u *= t2xi2;
+		u *= t2xi;
+		bool success2 = false;
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(etas); i++) {
+			Fp2 t1;
+			Fp2::mul(t1, etas[i], candi);
+			Fp2::sqr(t2, t1);
+			t2 *= v;
+			if (t2 == u && !success && !success2) {
+				P.y = t1;
+				success2 = true;
+			}
+		}
+		assert(success || success2);
+		if (!success) {
+			nume *= t2xi;
+		}
+		if (isNegSign(t) != isNegSign(P.y)) {
+			Fp2::neg(P.y, P.y);
+		}
+		P.y *= deno;
+		P.x = nume;
+		P.z = deno;
+	}
+	// https://github.com/algorand/bls_sigs_ref
 	void osswu2_help(Point& P, const Fp2& t) const
 	{
 		Fp2 t2, t2xi;
 		Fp2::sqr(t2, t);
 		Fp2 den, den2;
-//		Fp2::mul(t2xi, t2, xi);
 		mul_xi(t2xi, t2);
 		den = t2xi;
 		Fp2::sqr(den2, den);
@@ -473,13 +562,23 @@ struct MapToG2_WB19 {
 		add(Q, work, work2);
 #endif
 	}
+	template<class T>
+	void put(const T& P) const
+	{
+		const int base = 10;
+		printf("x=%s\n", P.x.getStr(base).c_str());
+		printf("y=%s\n", P.y.getStr(base).c_str());
+		printf("z=%s\n", P.z.getStr(base).c_str());
+	}
 	void opt_swu2_map(G2& P, const Fp2& t, const Fp2 *t2 = 0) const
 	{
 		Point Pp;
 		osswu2_help(Pp, t);
+//put(Pp);
 		if (t2) {
 			Point P2;
 			osswu2_help(P2, *t2);
+//put(P2);
 			add(Pp, Pp, P2);
 		}
 		iso3(P, Pp);
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 60e1955a..32ba4a01 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -1,3 +1,4 @@
+#define PUT(x) std::cout << #x "=" << (x) << std::endl;
 #include <cybozu/test.hpp>
 #include <cybozu/sha2.hpp>
 #include <mcl/bls12_381.hpp>
@@ -6,8 +7,6 @@
 #include <cybozu/atoi.hpp>
 #include <cybozu/file.hpp>
 
-#define PUT(x) std::cout << #x "=" << (x) << std::endl;
-
 using namespace mcl;
 using namespace mcl::bn;
 
@@ -172,8 +171,10 @@ void ethMsgToG2test()
 	CYBOZU_TEST_EQUAL(out, ok);
 }
 
-void test2()
+template<class T>
+void test2(const T& mapto)
 {
+	(void)mapto;
 	/*
 		testHashToBaseFP2
 		https://github.com/status-im/nim-blscurve/blob/de64516a5933a6e8ebb01a346430e61a201b5775/blscurve/hash_to_curve.nim#L492
@@ -219,14 +220,38 @@ void test2()
 	{
 		// https://media.githubusercontent.com/media/ethereum/eth2.0-spec-tests/v0.10.1/tests/general/phase0/bls/sign/small/sign_case_11b8c7cad5238946/data.yaml
 		const char *secs = "47b8192d77bf871b62e87859d653922725724a5c031afeabc60bcef5ff665138";
-		const char msg[32] = {};
+		const char msg[33] = {};
 		const char *expect = "b2deb7c656c86cb18c43dae94b21b107595486438e0b906f3bdb29fa316d0fc3cab1fc04c6ec9879c773849f2564d39317bfa948b4a35fc8509beafd3a2575c25c077ba8bca4df06cb547fe7ca3b107d49794b7132ef3b5493a6ffb2aad2a441";
 		Fr sec;
 		sec.deserializeHexStr(secs);
 		G2 Q;
 		Q.deserializeHexStr(expect);
 		Q *= (1/sec);
+		G2 P;
+		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_";
+std::cout << std::hex;
+		ethMsgToG2(P, msg, 33, dst, strlen(dst));
+		printf("P=%s\n", P.serializeToHexStr().c_str());
 		printf("Q=%s\n", Q.serializeToHexStr().c_str());
+		mapto.put(P);
+		printf("equal %d\n", P == Q);
+PointStr ss = {
+{
+	"3257676086538823567761244186080544403330427395946948635449582231233180442322077484215757257097813156392664917178234",
+	"228537154970146118588036771068753907531432250550232803895899422656339347346840810590265440478956079727608969412311",
+},
+{
+	"2211656311977487430400091470761449132135875543285725344573261083165139360734602590585740129428161178745780787382986",
+	"40258781102313547933704047733645277081466097003572358028270922475602169023300010845551344432311507156784289541037",
+},
+{
+	"3554635405737095173231135338330740471713348364117258010850826274365262386961694608537862757803628655357449929362973",
+	"3305133470803621861948711123350198492693369595391902116552614265910644738630055172693143208260379598437272858586799",
+},
+};
+		Point RR;
+		set(RR, ss);
+		mapto.put(RR);
 	}
 }
 
@@ -509,13 +534,46 @@ void testVec(const char *file)
 	}
 }
 
+template<class T>
+void test3(const T& mapto)
+{
+	Fp2Str ts = {
+		"1918231859236664604157448091070531325862162392395253569013354101088957561890652491757605826252839368362075816084620",
+		"1765592454498940438559713185757713516213027777891663285362602185795653989012303939547547418058658378320847225866857",
+	};
+	PointStr out1s = {
+		{
+			"3927184272261705576225284664838663573624313247854459615864888213007837227449093837336748448846489186151562481034580",
+			"1903293468617299241460799312855927163610998535569367868293984916087966126786510088134190993502241498025510393259948",
+		},
+		{
+			"3991322739214666504999201807778913642377537002372597995520099276113880862779909709825029178857593814896063515454176",
+			"2999367925154329126226224834594837693635617675385117964685771461463180146028553717562548600391126160503718637741311",
+		},
+		{
+			"2578853905647618145305524664579860566455691148296386065391659245709237478565628968511959291772795541098532647163712",
+			"3910188857576114167072883940429120413632909260968721432280195359371907407125083761682822023489835923188989938783197",
+		},
+	};
+	Fp2 t;
+	set(t, ts);
+	Point P, Q;
+	mapto.optimized_swu_G2(P, t);
+	set(Q, out1s);
+	CYBOZU_TEST_EQUAL(P.x, Q.x);
+	CYBOZU_TEST_EQUAL(P.y, Q.y);
+	CYBOZU_TEST_EQUAL(P.z, Q.z);
+}
+
 CYBOZU_TEST_AUTO(test)
 {
 	initPairing(mcl::BLS12_381);
 	Fp::setETHserialization(true);
 	bn::setMapToMode(MCL_MAP_TO_MODE_WB19);
 	const mcl::bn::local::MapToG2_WB19& mapto = BN::param.mapTo.mapToG2_WB19_;
-	test2();
+	test3(mapto);
+return;
+	test2(mapto);
 	osswu2_helpTest(mapto);
 	addTest(mapto);
 	iso3Test(mapto);

From 6f8d014f2c05e20457bbd15b7f7531e868db1f63 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 27 Jan 2020 17:27:10 +0900
Subject: [PATCH 162/553] add some functions of py_ecc

---
 include/mcl/mapto_wb19.hpp |  93 ++++++++++++++++++---
 test/mapto_wb19_test.cpp   | 166 +++++++++++++++++++++++++------------
 2 files changed, 196 insertions(+), 63 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 180c8240..926f05a6 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -62,6 +62,7 @@ struct MapToG2_WB19 {
 		Fp2::neg(Q.y, P.y);
 		Q.z = P.z;
 	}
+	// Jacobi
 	template<class G>
 	void add(G& R, const G& P, const G& Q) const
 	{
@@ -260,30 +261,29 @@ struct MapToG2_WB19 {
 	template<size_t N>
 	void evalPoly(Fp2& y, const Fp2& x, const Fp2 *zpows, const Fp2 (&cof)[N]) const
 	{
-		Fp2::mul(y, zpows[0], cof[N - 1]);
+		y = cof[N - 1]; // always zpows[0] = 1
 		for (size_t i = 1; i < N; i++) {
 			y *= x;
 			Fp2 t;
-			Fp2::mul(t, zpows[i], cof[N - 1 - i]);
+			Fp2::mul(t, zpows[i - 1], cof[N - 1 - i]);
 			y += t;
 		}
 	}
 	// refer (xnum, xden, ynum, yden)
 	void iso3(G2& Q, const Point& P) const
 	{
-		Fp2 zpows[4];
-		zpows[0] = 1;
-		Fp2::sqr(zpows[1], P.z);
-		Fp2::sqr(zpows[2], zpows[1]);
-		Fp2::mul(zpows[3], zpows[2], zpows[1]);
+		Fp2 zpows[3];
+		Fp2::sqr(zpows[0], P.z);
+		Fp2::sqr(zpows[1], zpows[0]);
+		Fp2::mul(zpows[2], zpows[1], zpows[0]);
 		Fp2 mapvals[4];
 		evalPoly(mapvals[0], P.x, zpows, xnum);
 		evalPoly(mapvals[1], P.x, zpows, xden);
 		evalPoly(mapvals[2], P.x, zpows, ynum);
 		evalPoly(mapvals[3], P.x, zpows, yden);
-		mapvals[1] *= zpows[1];
+		mapvals[1] *= zpows[0];
 		mapvals[2] *= P.y;
-		mapvals[3] *= zpows[1];
+		mapvals[3] *= zpows[0];
 		mapvals[3] *= P.z;
 		Fp2::mul(Q.z, mapvals[1], mapvals[3]);
 		Fp2::mul(Q.x, mapvals[0], mapvals[3]);
@@ -344,7 +344,7 @@ struct MapToG2_WB19 {
 		return false;
 	}
 	// https://github.com/ethereum/py_ecc
-	void optimized_swu_G2(Point& P, const Fp2& t) const
+	void py_ecc_optimized_swu_G2(Point& P, const Fp2& t) const
 	{
 		Fp2 t2, t2xi, t2xi2;
 		Fp2::sqr(t2, t);
@@ -403,6 +403,64 @@ struct MapToG2_WB19 {
 		P.x = nume;
 		P.z = deno;
 	}
+	// Proj
+	void py_ecc_iso_map_G2(G2& Q, const Point& P) const
+	{
+		Fp2 zpows[3];
+		zpows[0] = P.z;
+		Fp2::sqr(zpows[1], zpows[0]);
+		Fp2::mul(zpows[2], zpows[1], zpows[0]);
+		Fp2 mapvals[4];
+		evalPoly(mapvals[0], P.x, zpows, xnum);
+		evalPoly(mapvals[1], P.x, zpows, xden);
+		evalPoly(mapvals[2], P.x, zpows, ynum);
+		evalPoly(mapvals[3], P.x, zpows, yden);
+		mapvals[1] *= P.z;
+		mapvals[2] *= P.y;
+		mapvals[3] *= P.z;
+		Fp2::mul(Q.z, mapvals[1], mapvals[3]);
+		Fp2::mul(Q.x, mapvals[0], mapvals[3]);
+		Fp2::mul(Q.y, mapvals[1], mapvals[2]);
+	}
+	/*
+		in : Jacobi [X:Y:Z]
+		out : Proj [A:B:C]
+		[X:Y:Z] as Jacobi
+		= (X/Z^2, Y/Z^3) as Affine
+		= [X/Z^2:Y/Z^3:1] as Proj
+		= [XZ:Y:Z^3] as Proj
+	*/
+	void toProj(G2& out, const G2& in) const
+	{
+		Fp2 z2;
+		Fp2::sqr(z2, in.z);
+		Fp2::mul(out.x, in.x, in.z);
+		out.y = in.y;
+		Fp2::mul(out.z, in.z, z2);
+	}
+	/*
+		in : Proj [X:Y:Z]
+		out : Jacobi [A:B:C]
+		[X:Y:Z] as Proj
+		= (X/Z, Y/Z) as Affine
+		= [X/Z:Y/Z:1] as Jacobi
+		= [XZ:YZ^2:Z] as Jacobi
+	*/
+	void toJacobi(G2& out, const G2& in) const
+	{
+		Fp2 z2;
+		Fp2::sqr(z2, in.z);
+		Fp2::mul(out.x, in.x, in.z);
+		Fp2::mul(out.y, in.y, z2);
+		out.z = in.z;
+	}
+	// Proj
+	void py_ecc_map_to_curve_G2(G2& out, const Fp2& t) const
+	{
+		Point P;
+		py_ecc_optimized_swu_G2(P, t);
+		py_ecc_iso_map_G2(out, P);
+	}
 	// https://github.com/algorand/bls_sigs_ref
 	void osswu2_help(Point& P, const Fp2& t) const
 	{
@@ -574,16 +632,27 @@ struct MapToG2_WB19 {
 	{
 		Point Pp;
 		osswu2_help(Pp, t);
-//put(Pp);
 		if (t2) {
 			Point P2;
 			osswu2_help(P2, *t2);
-//put(P2);
 			add(Pp, Pp, P2);
 		}
 		iso3(P, Pp);
 		clear_h2(P, P);
 	}
+	void py_ecc_hash_to_G2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
+	{
+		Fp2 t1, t2;
+		hashToFp2(t1, msg, msgSize, 0, dst, dstSize);
+		hashToFp2(t2, msg, msgSize, 1, dst, dstSize);
+		G2 P1, P2;
+		py_ecc_map_to_curve_G2(P1, t1);
+		py_ecc_map_to_curve_G2(P2, t2);
+		toJacobi(P1, P1);
+		toJacobi(P2, P2);
+		P1 += P2;
+		clear_h2(out, P1);
+	}
 	void map2curve_osswu2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
 		Fp2 t1, t2;
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 32ba4a01..83d269b3 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -96,7 +96,7 @@ std::string toHexStr(const G2& P)
 	return toHexStr(xy, 96);
 }
 
-void testHash_g2(const std::string& fileName)
+void ethMsgToG2test(const std::string& fileName)
 {
 	const char *dst = "\x02";
 	printf("name=%s\n", fileName.c_str());
@@ -115,13 +115,16 @@ void testHash_g2(const std::string& fileName)
 	}
 }
 
-void testHash_g2All(const std::string& dir)
+void ethMsgToG2testAll(const std::string& dir)
+	try
 {
 	cybozu::FileList list = cybozu::GetFileList(dir);
 	for (size_t i = 0; i < list.size(); i++) {
 		const cybozu::FileInfo& info = list[i];
-		testHash_g2(dir + "/" + info.name);
+		ethMsgToG2test(dir + "/" + info.name);
 	}
+} catch (...) {
+	printf("skip test because `%s` is not found\n", dir.c_str());
 }
 
 void testHMAC()
@@ -142,7 +145,7 @@ void testHashToFp2()
 	const char *outS = "0xe54bc0f2e26071a79ba5fe7ae5307d39cf5519e581e03b43f39a431eccc258fa1477c517b1268b22986601ee5caa5ea 0x17e8397d5e687ff7f915c23f27fe1ca2c397a7df91de8c88dc82d34c9188a3ef719f9f20436ea8a5fe7d509fbc79214d";
 	Fp2 out, ok;
 	ok.setStr(outS);
-	mcl::bn::local::hashToFp2(out, msg, strlen(msg) + 1, 0, dst, strlen(dst));
+	ethMsgToFp2(out, msg, strlen(msg) + 1, 0, dst, strlen(dst));
 	CYBOZU_TEST_EQUAL(out, ok);
 }
 
@@ -166,15 +169,13 @@ void ethMsgToG2test()
 	};
 	G2 out, ok;
 	set(ok, outS);
-//	mapto.map2curve_osswu2(out, msg, strlen(msg) + 1 /* contains zero byte */, dst, strlen(dst));
 	ethMsgToG2(out, msg, strlen(msg) + 1 /* contains zero byte */, dst, strlen(dst));
 	CYBOZU_TEST_EQUAL(out, ok);
 }
 
 template<class T>
-void test2(const T& mapto)
+void py_eccTest(const T& mapto)
 {
-	(void)mapto;
 	/*
 		testHashToBaseFP2
 		https://github.com/status-im/nim-blscurve/blob/de64516a5933a6e8ebb01a346430e61a201b5775/blscurve/hash_to_curve.nim#L492
@@ -221,37 +222,82 @@ void test2(const T& mapto)
 		// https://media.githubusercontent.com/media/ethereum/eth2.0-spec-tests/v0.10.1/tests/general/phase0/bls/sign/small/sign_case_11b8c7cad5238946/data.yaml
 		const char *secs = "47b8192d77bf871b62e87859d653922725724a5c031afeabc60bcef5ff665138";
 		const char msg[33] = {};
+		const PointStr sigs = {
+			{
+				"2293012529822761631014706649736058250445440108079005633865844964288531978383212702502746862140143627562812967825888",
+				"1475696770777687381853347234154288535008294218073605500048435508284141334771039537063168112498702685312150787094910",
+			},
+			{
+				"1469299105114671507318396580458717074245984116935623233990667855919962974356517750849608590897738614199799891365360",
+				"2030012464923141446228430710552804525466499055365665031199510204412192520245701820596000835423160058948948207746066",
+			},
+			{
+				"3767430478723640173773019527754919617225964135305264831468522226308636862085707682484234512649553124965049251340541",
+				"1620434249170283311052688271749383011546709139865619017626863134580828776106815964830529695055765742705622363756158",
+			}
+		};
 		const char *expect = "b2deb7c656c86cb18c43dae94b21b107595486438e0b906f3bdb29fa316d0fc3cab1fc04c6ec9879c773849f2564d39317bfa948b4a35fc8509beafd3a2575c25c077ba8bca4df06cb547fe7ca3b107d49794b7132ef3b5493a6ffb2aad2a441";
 		Fr sec;
-		sec.deserializeHexStr(secs);
-		G2 Q;
+		sec.setStr(secs, 16);
+		G2 P1, P2, Q;
+		set(Q, sigs);
 		Q.deserializeHexStr(expect);
-		Q *= (1/sec);
-		G2 P;
 		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_";
-std::cout << std::hex;
-		ethMsgToG2(P, msg, 33, dst, strlen(dst));
-		printf("P=%s\n", P.serializeToHexStr().c_str());
-		printf("Q=%s\n", Q.serializeToHexStr().c_str());
-		mapto.put(P);
-		printf("equal %d\n", P == Q);
-PointStr ss = {
-{
-	"3257676086538823567761244186080544403330427395946948635449582231233180442322077484215757257097813156392664917178234",
-	"228537154970146118588036771068753907531432250550232803895899422656339347346840810590265440478956079727608969412311",
-},
-{
-	"2211656311977487430400091470761449132135875543285725344573261083165139360734602590585740129428161178745780787382986",
-	"40258781102313547933704047733645277081466097003572358028270922475602169023300010845551344432311507156784289541037",
-},
-{
-	"3554635405737095173231135338330740471713348364117258010850826274365262386961694608537862757803628655357449929362973",
-	"3305133470803621861948711123350198492693369595391902116552614265910644738630055172693143208260379598437272858586799",
-},
-};
-		Point RR;
-		set(RR, ss);
-		mapto.put(RR);
+		const size_t dstSize = strlen(dst);
+		const size_t msgSize = 32 + 1;
+		Fp2 t1, t2;
+		ethMsgToFp2(t1, msg, msgSize, 0, dst, dstSize);
+		ethMsgToFp2(t2, msg, msgSize, 1, dst, dstSize);
+		mapto.py_ecc_map_to_curve_G2(P1, t1);
+		mapto.py_ecc_map_to_curve_G2(P2, t2);
+		const PointStr ss = {
+			{
+				"1972340536407012813644167184956896760015950618902823780657111692209122974250648595689834944711427684709284318183285",
+				"2952312506825835541808570850755873891927945826649651965587037814445801597710562388482713867284483531575836668891717",
+			},
+			{
+				"2802951456840474233717338518518040462806475389210379447165158098937491293557221993219251045678976553989024259770721",
+				"2695848095528813794114709219550802586214789808214026789183854152760661360110019071654047951530688159586363471282307",
+			},
+			{
+				"1480478729322062079370070638002133449414477155913782123147952976030053267833796311564176542916706247537348236105579",
+				"3253481872910728113595595353980041952789112074899014850028493351493155577726278005524067083458491999010934020984031",
+			}
+		};
+		mapto.toJacobi(P1, P1);
+		mapto.toJacobi(P2, P2);
+		P1 += P2;
+		G2 P11;
+		set(P11, ss);
+		mapto.toJacobi(P11, P11);
+		CYBOZU_TEST_EQUAL(P1, P11);
+		const PointStr clears = {
+			{
+				"1957332172874233660214089655571851577083897125827848734477574606688306573833007308344920242234605652569670194263389",
+				"1116411061540418343539740639798030171984762250397980084002067231825141620343376868772345493606425790045780405764984",
+			},
+			{
+				"1009600579479639236035097803661439342927513547544039095581093451111718225564873663970283187908867141796447259993680",
+				"1036550257360332982249682819433119008785814033355112815293516573225867246356464383591412294871954385805192773093413",
+			},
+			{
+				"1455356692682887406712747484663891805342757123109829795478648571883713143907445859929832639473694165616164972254859",
+				"625703068888812559481386371501827420717093467297957594257224036896125014497486535098535016737064365426613580045089",
+			},
+		};
+		set(P11, clears);
+		mapto.clear_h2(P1, P1);
+		mapto.toJacobi(P11, P11);
+		CYBOZU_TEST_EQUAL(P1, P11);
+		mapto.py_ecc_hash_to_G2(P1, msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(P1, P11);
+		ethMsgToG2(P1, msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(P1, P11);
+		set(P11, sigs);
+		mapto.toJacobi(P11, P11);
+		P1 *= sec;
+		CYBOZU_TEST_EQUAL(P1, P11);
+		CYBOZU_TEST_EQUAL(P1.serializeToHexStr(), expect);
 	}
 }
 
@@ -272,12 +318,9 @@ void testSign(const T& mapto)
 		t.a = tbl[i];
 		for (size_t j = 0; j < N; j++) {
 			t.b = tbl[j];
-			if (mapto.isNegSign(t) != (expect[i][j] < 0)) {
-				printf("err %zd %zd\n", i, j);
-			}
+			CYBOZU_TEST_EQUAL(mapto.isNegSign(t), (expect[i][j] < 0));
 		}
 	}
-	puts("ok");
 }
 
 template<class T>
@@ -516,6 +559,10 @@ void ethFp2ToG2test()
 void testVec(const char *file)
 {
 	std::ifstream ifs(file);
+	if (!ifs) {
+		printf("skip testVec because `%s` is not found\n", file);
+	}
+	printf("testVec %s\n", file);
 	Fp2 t1, t2;
 	G2 out, P;
 	std::string s;
@@ -535,7 +582,7 @@ void testVec(const char *file)
 }
 
 template<class T>
-void test3(const T& mapto)
+void py_eccTest2(const T& mapto)
 {
 	Fp2Str ts = {
 		"1918231859236664604157448091070531325862162392395253569013354101088957561890652491757605826252839368362075816084620",
@@ -555,14 +602,32 @@ void test3(const T& mapto)
 			"3910188857576114167072883940429120413632909260968721432280195359371907407125083761682822023489835923188989938783197",
 		},
 	};
+	PointStr out2s = {
+		{
+			"3257676086538823567761244186080544403330427395946948635449582231233180442322077484215757257097813156392664917178234",
+			"228537154970146118588036771068753907531432250550232803895899422656339347346840810590265440478956079727608969412311",
+		},
+		{
+			"2211656311977487430400091470761449132135875543285725344573261083165139360734602590585740129428161178745780787382986",
+			"40258781102313547933704047733645277081466097003572358028270922475602169023300010845551344432311507156784289541037",
+		},
+		{
+			"3554635405737095173231135338330740471713348364117258010850826274365262386961694608537862757803628655357449929362973",
+			"3305133470803621861948711123350198492693369595391902116552614265910644738630055172693143208260379598437272858586799",
+		},
+	};
 	Fp2 t;
 	set(t, ts);
-	Point P, Q;
-	mapto.optimized_swu_G2(P, t);
-	set(Q, out1s);
-	CYBOZU_TEST_EQUAL(P.x, Q.x);
-	CYBOZU_TEST_EQUAL(P.y, Q.y);
-	CYBOZU_TEST_EQUAL(P.z, Q.z);
+	Point p, q;
+	mapto.py_ecc_optimized_swu_G2(p, t);
+	set(q, out1s);
+	CYBOZU_TEST_EQUAL(p.x, q.x);
+	CYBOZU_TEST_EQUAL(p.y, q.y);
+	CYBOZU_TEST_EQUAL(p.z, q.z);
+	G2 P, Q;
+	set(P, out2s);
+	mapto.py_ecc_map_to_curve_G2(Q, t);
+	CYBOZU_TEST_EQUAL(P, Q);
 }
 
 CYBOZU_TEST_AUTO(test)
@@ -571,9 +636,8 @@ CYBOZU_TEST_AUTO(test)
 	Fp::setETHserialization(true);
 	bn::setMapToMode(MCL_MAP_TO_MODE_WB19);
 	const mcl::bn::local::MapToG2_WB19& mapto = BN::param.mapTo.mapToG2_WB19_;
-	test3(mapto);
-return;
-	test2(mapto);
+	py_eccTest(mapto);
+	py_eccTest2(mapto);
 	osswu2_helpTest(mapto);
 	addTest(mapto);
 	iso3Test(mapto);
@@ -582,7 +646,7 @@ return;
 	testHMAC();
 	testHashToFp2();
 	ethMsgToG2test();
-//	testVec("fips_186_3_B233.txt");
-//	testVec("misc.txt");
-//	testHash_g2All("../../bls_sigs_ref/test-vectors/hash_g2/");
+	testVec("../misc/mapto/fips_186_3_B233.txt");
+	testVec("../misc/mapto/misc.txt");
+	ethMsgToG2testAll("../bls_sigs_ref/test-vectors/hash_g2/");
 }

From 1a7610fc84801bafa2e005613bab419e80e9a302 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 28 Jan 2020 20:24:32 +0900
Subject: [PATCH 163/553] [change] ethMsgToFp2 addes zero byte at the end of
 msg

---
 include/mcl/mapto_wb19.hpp | 2 +-
 test/bn_c_test.hpp         | 4 ++--
 test/mapto_wb19_test.cpp   | 9 ++++-----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 926f05a6..fc6453da 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -11,7 +11,7 @@
 // ctr = 0 or 1 or 2
 inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
 {
-	const bool addZeroByte = false; // append zero byte to msg
+	const bool addZeroByte = true; // append zero byte to msg
 	assert(ctr <= 2);
 	const size_t degree = 2;
 	uint8_t msg_prime[32];
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index c98863bf..0f871236 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -682,7 +682,7 @@ CYBOZU_TEST_AUTO(eth_hash)
 			"14eef8ca34b82d065d187a3904cb313dbb44558917cc5091574d9999b5ecfdd5af2fa3aea6e02fb253bf4ae670e72d55"
 		};
 		mclBnFp2 x, y;
-		CYBOZU_TEST_EQUAL(mclBn_ethMsgToFp2(&x, msg, strlen(msg) + 1 /* add zero byte */, ctr, dst, strlen(dst)), 0);
+		CYBOZU_TEST_EQUAL(mclBn_ethMsgToFp2(&x, msg, strlen(msg), ctr, dst, strlen(dst)), 0);
 		setFp2(&y, ys);
 		CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&x, &y));
 	}
@@ -730,7 +730,7 @@ CYBOZU_TEST_AUTO(eth_hash)
 			"0x1796ee0f0b9b65802c90e3e1586034f3826ec3538c66525de298d1ff2f7a26f2ec553ec64e5989ed9841c4456d0bddd7",
 		};
 		mclBnG2 P, Q;
-		mclBn_ethMsgToG2(&P, msg, strlen(msg) + 1 /* add zero byte */, dst, strlen(dst));
+		mclBn_ethMsgToG2(&P, msg, strlen(msg), dst, strlen(dst));
 		setFp2(&Q.x, xs);
 		setFp2(&Q.y, ys);
 		setFp2(&Q.z, zs);
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 83d269b3..09f49c4a 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -108,7 +108,6 @@ void ethMsgToG2test(const std::string& fileName)
 		ifs >> msg >> zero >> ret;
 		if (zero != "00") break;
 		buf = fromHexStr(msg);
-		buf.push_back(0); // add zero byte
 		ethMsgToG2(out, buf.data(), buf.size(), dst, strlen(dst));
 		std::string s = toHexStr(out);
 		CYBOZU_TEST_EQUAL(s, ret);
@@ -145,7 +144,7 @@ void testHashToFp2()
 	const char *outS = "0xe54bc0f2e26071a79ba5fe7ae5307d39cf5519e581e03b43f39a431eccc258fa1477c517b1268b22986601ee5caa5ea 0x17e8397d5e687ff7f915c23f27fe1ca2c397a7df91de8c88dc82d34c9188a3ef719f9f20436ea8a5fe7d509fbc79214d";
 	Fp2 out, ok;
 	ok.setStr(outS);
-	ethMsgToFp2(out, msg, strlen(msg) + 1, 0, dst, strlen(dst));
+	ethMsgToFp2(out, msg, strlen(msg), 0, dst, strlen(dst));
 	CYBOZU_TEST_EQUAL(out, ok);
 }
 
@@ -169,7 +168,7 @@ void ethMsgToG2test()
 	};
 	G2 out, ok;
 	set(ok, outS);
-	ethMsgToG2(out, msg, strlen(msg) + 1 /* contains zero byte */, dst, strlen(dst));
+	ethMsgToG2(out, msg, strlen(msg), dst, strlen(dst));
 	CYBOZU_TEST_EQUAL(out, ok);
 }
 
@@ -186,7 +185,7 @@ void py_eccTest(const T& mapto)
 		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_";
 		const char *expect = "18df4dc51885b18ca0082a4966b0def46287930b8f1c0b673b11ac48d19c8899bc150d83fd3a7a1430b0de541742c1d4 14eef8ca34b82d065d187a3904cb313dbb44558917cc5091574d9999b5ecfdd5af2fa3aea6e02fb253bf4ae670e72d55";
 		Fp2 x;
-		ethMsgToFp2(x, msg, strlen(msg) + 1 /* add zero byte */, ctr, dst, strlen(dst));
+		ethMsgToFp2(x, msg, strlen(msg), ctr, dst, strlen(dst));
 		CYBOZU_TEST_EQUAL(toHexStr(x), expect);
 	}
 	{
@@ -244,7 +243,7 @@ void py_eccTest(const T& mapto)
 		Q.deserializeHexStr(expect);
 		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_";
 		const size_t dstSize = strlen(dst);
-		const size_t msgSize = 32 + 1;
+		const size_t msgSize = 32;
 		Fp2 t1, t2;
 		ethMsgToFp2(t1, msg, msgSize, 0, dst, dstSize);
 		ethMsgToFp2(t2, msg, msgSize, 1, dst, dstSize);

From 5d2379b2262347528ea9723e232eec38143d8cd6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 31 Jan 2020 14:34:06 +0900
Subject: [PATCH 164/553] v1.05

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index d1ad3e43..d6132a46 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x104; /* 0xABC = A.BC */
+static const int version = 0x105; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index efc2d6d8..73878896 100644
--- a/readme.md
+++ b/readme.md
@@ -292,6 +292,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2020/Jan/31 v1.05 mclBn_ethMsgToFp2 has changed to append zero byte at the end of msg
 - 2020/Jan/25 v1.04 add new hash functions
 - 2019/Dec/05 v1.03 disable to check the order in setStr
 - 2019/Sep/30 v1.00 add some functions to bn.h ; [api.md](api.md).

From 959a0548d5c675ace725f49ec806144b86b7751d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 10 Feb 2020 14:01:24 +0900
Subject: [PATCH 165/553] add IoEcAffineSerialize for serialize

---
 include/mcl/ec.hpp  | 44 ++++++++++++++++++++++++++++++++++++++++----
 include/mcl/fp.hpp  |  4 ++--
 include/mcl/op.hpp  | 10 ++++++++--
 test/bls12_test.cpp |  2 +-
 test/bn_test.cpp    |  2 +-
 5 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 8cf3a49e..ee747799 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -752,6 +752,23 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		}
 		EcT P(*this);
 		P.normalize();
+		if (ioMode & IoEcAffineSerialize) {
+			if (b_ == 0) { // assume Zero if x = y = 0
+				*pb = false;
+				return;
+			}
+			if (isZero()) {
+				// all zero
+				P.z.save(pb, os, IoSerialize);
+				if (!*pb) return;
+				P.z.save(pb, os, IoSerialize);
+				return;
+			}
+			P.x.save(pb, os, IoSerialize);
+			if (!*pb) return;
+			P.y.save(pb, os, IoSerialize);
+			return;
+		}
 		if (ioMode & (IoSerialize | IoSerializeHexStr)) {
 			const size_t n = Fp::getByteSize();
 			const size_t adj = isMSBserialize() ? 0 : 1;
@@ -835,6 +852,21 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 #else
 		z = 1;
 #endif
+		if (ioMode & IoEcAffineSerialize) {
+			if (b_ == 0) { // assume Zero if x = y = 0
+				*pb = false;
+				return;
+			}
+			x.load(pb, is, IoSerialize);
+			if (!*pb) return;
+			y.load(pb, is, IoSerialize);
+			if (!*pb) return;
+			if (x.isZero() && y.isZero()) {
+				z.clear();
+				return;
+			}
+			goto verifyValidness;
+		}
 		if (ioMode & (IoSerialize | IoSerializeHexStr)) {
 			const size_t n = Fp::getByteSize();
 			const size_t adj = isMSBserialize() ? 0 : 1;
@@ -917,10 +949,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 			x.load(pb, is, ioMode); if (!*pb) return;
 			if (c == '1') {
 				y.load(pb, is, ioMode); if (!*pb) return;
-				if (!isValid(x, y)) {
-					*pb = false;
-					return;
-				}
+				goto verifyValidness;
 			} else if (c == '2' || c == '3') {
 				bool isYodd = c == '3';
 				*pb = getYfromX(y, x, isYodd);
@@ -941,6 +970,13 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		} else {
 			*pb = true;
 		}
+		return;
+	verifyValidness:
+		if (!isValid(x, y)) {
+			*pb = false;
+			return;
+		}
+		goto verifyOrder;
 	}
 	// deplicated
 	static void setCompressedExpression(bool compressedExpression = true)
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 894d939c..d49fd45e 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -263,7 +263,7 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 	{
 		bool isMinus = false;
 		*pb = false;
-		if (ioMode & (IoArray | IoArrayRaw | IoSerialize | IoSerializeHexStr)) {
+		if (fp::isIoSerializeMode(ioMode)) {
 			const size_t n = getByteSize();
 			v_[op_.N - 1] = 0;
 			size_t readSize;
@@ -299,7 +299,7 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 	void save(bool *pb, OutputStream& os, int ioMode) const
 	{
 		const size_t n = getByteSize();
-		if (ioMode & (IoArray | IoArrayRaw | IoSerialize | IoSerializeHexStr)) {
+		if (fp::isIoSerializeMode(ioMode)) {
 			if (ioMode & IoArrayRaw) {
 				cybozu::write(pb, os, v_, n);
 			} else {
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index d6132a46..021d4159 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -101,11 +101,17 @@ enum IoMode {
 	IoSerialize = 512, // use MBS for 1-bit y
 	IoFixedSizeByteSeq = IoSerialize, // obsolete
 	IoEcProj = 1024, // projective or jacobi coordinate
-	IoSerializeHexStr = 2048 // printable hex string
+	IoSerializeHexStr = 2048, // printable hex string
+	IoEcAffineSerialize = 4096 // serialize [x:y]
 };
 
 namespace fp {
 
+inline bool isIoSerializeMode(int ioMode)
+{
+	return ioMode & (IoArray | IoArrayRaw | IoSerialize | IoEcAffineSerialize | IoSerializeHexStr);
+}
+
 const size_t UnitBitSize = sizeof(Unit) * 8;
 
 const size_t maxUnitSize = (MCL_MAX_BIT_SIZE + UnitBitSize - 1) / UnitBitSize;
@@ -366,7 +372,7 @@ struct Op {
 
 inline const char* getIoSeparator(int ioMode)
 {
-	return (ioMode & (IoArray | IoArrayRaw | IoSerialize | IoSerializeHexStr)) ? "" : " ";
+	return (ioMode & (IoArray | IoArrayRaw | IoSerialize | IoSerializeHexStr | IoEcAffineSerialize)) ? "" : " ";
 }
 
 inline void dump(const void *buf, size_t n)
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index fe0ca7a7..df8b1273 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -104,7 +104,7 @@ void pairingC(Fp12& e, const G1& P, const G2& Q)
 void testIoAll(const G1& P, const G2& Q)
 {
 	const int FpTbl[] = { 0, 2, 2|mcl::IoPrefix, 10, 16, 16|mcl::IoPrefix, mcl::IoArray, mcl::IoArrayRaw };
-	const int EcTbl[] = { mcl::IoEcAffine, mcl::IoEcProj, mcl::IoEcCompY, mcl::IoSerialize };
+	const int EcTbl[] = { mcl::IoEcAffine, mcl::IoEcProj, mcl::IoEcCompY, mcl::IoSerialize, mcl::IoEcAffineSerialize };
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(FpTbl); i++) {
 		for (size_t j = 0; j < CYBOZU_NUM_OF_ARRAY(EcTbl); j++) {
 			G1 P2 = P, P3;
diff --git a/test/bn_test.cpp b/test/bn_test.cpp
index 15f6f456..e6139b36 100644
--- a/test/bn_test.cpp
+++ b/test/bn_test.cpp
@@ -338,7 +338,7 @@ void testTrivial(const G1& P, const G2& Q)
 void testIoAll(const G1& P, const G2& Q)
 {
 	const int FpTbl[] = { 0, 2, 2|mcl::IoPrefix, 10, 16, 16|mcl::IoPrefix, mcl::IoArray, mcl::IoArrayRaw };
-	const int EcTbl[] = { mcl::IoEcAffine, mcl::IoEcProj, mcl::IoEcCompY, mcl::IoSerialize };
+	const int EcTbl[] = { mcl::IoEcAffine, mcl::IoEcProj, mcl::IoEcCompY, mcl::IoSerialize, mcl::IoEcAffineSerialize };
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(FpTbl); i++) {
 		for (size_t j = 0; j < CYBOZU_NUM_OF_ARRAY(EcTbl); j++) {
 			G1 P2 = P, P3;

From 04157779d6a69fc91d0f92282ebf21f447c513ab Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 12 Feb 2020 16:38:31 +0900
Subject: [PATCH 166/553] add CipherTestGi::isValid()

---
 include/mcl/she.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index 388b06fc..42dc50b0 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -374,6 +374,10 @@ struct SHET {
 			S_.clear();
 			T_.clear();
 		}
+		bool isValid() const
+		{
+			return S_.isValid() && T_.isValid();
+		}
 		static void add(CipherTextAT& z, const CipherTextAT& x, const CipherTextAT& y)
 		{
 			/*

From e0f7f5d0735f9214c157b4b163b3f048375e05c7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 12 Feb 2020 16:38:48 +0900
Subject: [PATCH 167/553] add test of window_method

---
 include/mcl/window_method.hpp | 34 ++++++++++++++++------------------
 test/window_method_test.cpp   | 31 +++++++++++++++++--------------
 2 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/include/mcl/window_method.hpp b/include/mcl/window_method.hpp
index cb4fad37..64ff8699 100644
--- a/include/mcl/window_method.hpp
+++ b/include/mcl/window_method.hpp
@@ -23,35 +23,33 @@ struct ArrayIterator {
 		, bitSize(bitSize)
 		, w(w)
 		, pos(0)
-		, mask((w == TbitSize ? 0 : (T(1) << w)) - 1)
+		, mask(makeMask(w))
 	{
 		assert(w <= TbitSize);
 	}
+	T makeMask(size_t w) const
+	{
+		return (w == TbitSize) ? ~T(0) : (T(1) << w) - 1;
+	}
 	bool hasNext() const { return bitSize > 0; }
 	T getNext()
 	{
-		if (w == TbitSize) {
-			bitSize -= w;
-			return *x++;
+		if (bitSize < w) {
+			w = bitSize;
+			mask = makeMask(w);
 		}
-		if (pos + w < TbitSize) {
-			T v = (*x >> pos) & mask;
-			pos += w;
-			if (bitSize < w) {
-				bitSize = 0;
+		if (pos + w <= TbitSize) {
+			T v = x[0] >> pos;
+			if (pos + w < TbitSize) {
+				pos += w;
+				v &= mask;
 			} else {
-				bitSize -= w;
+				pos = 0;
+				x++;
 			}
+			bitSize -= w;
 			return v;
 		}
-		if (pos + bitSize <= TbitSize) {
-			assert(bitSize <= w);
-			T v = *x >> pos;
-			assert((v >> bitSize) == 0);
-			bitSize = 0;
-			return v & mask;
-		}
-		assert(pos > 0);
 		T v = (x[0] >> pos) | (x[1] << (TbitSize - pos));
 		v &= mask;
 		pos = (pos + w) - TbitSize;
diff --git a/test/window_method_test.cpp b/test/window_method_test.cpp
index 1b0f702a..437ad556 100644
--- a/test/window_method_test.cpp
+++ b/test/window_method_test.cpp
@@ -6,21 +6,24 @@
 
 CYBOZU_TEST_AUTO(ArrayIterator)
 {
-	const uint32_t in[2] = { 0x12345678, 0xabcdef89 };
-	const size_t bitSize = 64;
-	for (size_t w = 1; w <= 32; w++) {
-		const uint32_t mask = uint32_t((uint64_t(1) << w) - 1);
-		mpz_class x;
-		mcl::gmp::setArray(x, in, 2);
-		mcl::fp::ArrayIterator<uint32_t> ai(in, bitSize, w);
-		size_t n = (bitSize + w - 1) / w;
-		for (size_t j = 0; j < n; j++) {
-			CYBOZU_TEST_ASSERT(ai.hasNext());
-			uint32_t v = ai.getNext();
-			CYBOZU_TEST_EQUAL(x & mask, v);
-			x >>= w;
+	const uint32_t in[] = { 0x12345678, 0xabcdef89, 0xaabbccdd };
+	for (size_t bitSize = 1; bitSize <= 64; bitSize++) {
+		for (size_t w = 1; w <= 32; w++) {
+
+			const uint32_t mask = uint32_t((uint64_t(1) << w) - 1);
+			mpz_class x;
+			mcl::gmp::setArray(x, in, CYBOZU_NUM_OF_ARRAY(in));
+			x &= (mpz_class(1) << bitSize) - 1;
+			mcl::fp::ArrayIterator<uint32_t> ai(in, bitSize, w);
+			size_t n = (bitSize + w - 1) / w;
+			for (size_t j = 0; j < n; j++) {
+				CYBOZU_TEST_ASSERT(ai.hasNext());
+				uint32_t v = ai.getNext();
+				CYBOZU_TEST_EQUAL(x & mask, v);
+				x >>= w;
+			}
+			CYBOZU_TEST_ASSERT(!ai.hasNext());
 		}
-		CYBOZU_TEST_ASSERT(!ai.hasNext());
 	}
 }
 

From c155ee3960e6aa33d1ef1f7d337b136158db01e5 Mon Sep 17 00:00:00 2001
From: erik aronesty <erik@q32.com>
Date: Mon, 2 Mar 2020 14:24:51 -0500
Subject: [PATCH 168/553] allow windows users to choose dynamic runtime dll
 build

---
 CMakeLists.txt | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c4481d65..aa1ada71 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -42,6 +42,11 @@ option(
 	"only lib"
 	OFF
 )
+option(
+    MSVC_RUNTIME_DLL
+    "use dynamic runtime /MD in msvc builds"
+	OFF
+)
 
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
@@ -66,8 +71,13 @@ if(USE_LLVM)
 endif()
 
 if(MSVC)
-	set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /W4 /Oy /Ox /EHsc /GS- /Zi /DNDEBUG /DNOMINMAX")
-	set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /W4 /DNOMINMAX")
+    if(MSVC_RUNTIME_DLL)
+        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MD /W4 /Oy /Ox /EHsc /GS- /Zi /DNDEBUG /DNOMINMAX")
+        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MDd /W4 /DNOMINMAX")
+    else()
+    	set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /W4 /Oy /Ox /EHsc /GS- /Zi /DNDEBUG /DNOMINMAX")
+	    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /W4 /DNOMINMAX")
+    endif()
 	link_directories(${CMAKE_SOURCE_DIR}/../cybozulib_ext/lib)
 	link_directories(${CMAKE_SOURCE_DIR}/lib)
 else()

From a004a00952f8201f1c43911395e7b6f019443538 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 4 Mar 2020 09:33:55 +0900
Subject: [PATCH 169/553] remove warning of vc in fp_test

---
 test/fp_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/fp_test.cpp b/test/fp_test.cpp
index 36ba4d69..469f35d3 100644
--- a/test/fp_test.cpp
+++ b/test/fp_test.cpp
@@ -651,7 +651,7 @@ void getInt64Test()
 {
 	const int64_t tbl[] = {
 		0, 1, 123, 0xffffffff, int64_t(0x7fffffffffffffffull),
-		-1, -2, -12345678, int64_t(-9223372036854775808ull)/*-int64_t(1) << 63*/,
+		-1, -2, -12345678, -int64_t(0x7fffffffffffffffull) - 1/*-int64_t(1) << 63*/,
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		int64_t a = tbl[i];

From a53b6b7f4c6b401029c99ffd3154f5868d868a94 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 4 Mar 2020 09:49:48 +0900
Subject: [PATCH 170/553] use _udiv128 for divUint on msvc 2019

---
 include/mcl/vint.hpp | 4 ++--
 test/vint_test.cpp   | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/mcl/vint.hpp b/include/mcl/vint.hpp
index 40f43b26..13e24e77 100644
--- a/include/mcl/vint.hpp
+++ b/include/mcl/vint.hpp
@@ -150,7 +150,7 @@ inline uint32_t divUnit(uint32_t *pr, uint32_t H, uint32_t L, uint32_t y)
 #if MCL_SIZEOF_UNIT == 8
 inline uint64_t divUnit(uint64_t *pr, uint64_t H, uint64_t L, uint64_t y)
 {
-#if defined(MCL_VINT_64BIT_PORTABLE)
+#if defined(MCL_VINT_64BIT_PORTABLE) || (defined(_MSC_VER) && _MSC_VER < 1920)
 	uint32_t px[4] = { uint32_t(L), uint32_t(L >> 32), uint32_t(H), uint32_t(H >> 32) };
 	uint32_t py[2] = { uint32_t(y), uint32_t(y >> 32) };
 	size_t xn = 4;
@@ -162,7 +162,7 @@ inline uint64_t divUnit(uint64_t *pr, uint64_t H, uint64_t L, uint64_t y)
 	*pr = make64(r[1], r[0]);
 	return make64(q[1], q[0]);
 #elif defined(_MSC_VER)
-	#error "divUnit for uint64_t is not supported"
+	return _udiv128(H, L, y, pr);
 #else
 	typedef __attribute__((mode(TI))) unsigned int uint128;
 	uint128 t = (uint128(H) << 64) | L;
diff --git a/test/vint_test.cpp b/test/vint_test.cpp
index ab378148..39c36889 100644
--- a/test/vint_test.cpp
+++ b/test/vint_test.cpp
@@ -9,6 +9,7 @@
 #include <cybozu/xorshift.hpp>
 #ifndef MCL_USE_VINT
 #include <gmpxx.h>
+#include <cybozu/link_mpir.hpp>
 #endif
 
 #define PUT(x) std::cout << #x "=" << x << std::endl;

From 1b043ade54bf7e30b8edc29eb01410746ba92d3d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 28 Feb 2020 21:09:11 +0900
Subject: [PATCH 171/553] dirty hack of multi curve instance

---
 include/mcl/bn.hpp         | 65 +++++++-------------------------------
 include/mcl/curve_type.h   | 59 ++++++++++++++++++++++++++++++++++
 include/mcl/mapto_wb19.hpp |  5 +++
 sample/multi.cpp           | 65 ++++++++++++++++++++++++++++++++++++++
 test/mapto_wb19_test.cpp   |  4 +--
 5 files changed, 142 insertions(+), 56 deletions(-)
 create mode 100644 sample/multi.cpp

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index c7e7b4c8..9e361886 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1,4 +1,6 @@
-#pragma once
+#ifndef MCL_INCLUDE_MCL_BN_HPP
+#define MCL_INCLUDE_MCL_BN_HPP
+// use MCL_INCLUDE_MCL_BN_HPP instead of #pragma once to be able to include twice
 /**
 	@file
 	@brief optimal ate pairing over BN-curve / BLS12-curve
@@ -9,6 +11,7 @@
 #include <mcl/fp_tower.hpp>
 #include <mcl/ec.hpp>
 #include <mcl/curve_type.h>
+#include <mcl/mapto_wb19.hpp>
 #include <assert.h>
 #ifndef CYBOZU_DONT_USE_EXCEPTION
 #include <vector>
@@ -24,58 +27,12 @@
 #ifndef MCL_MAX_FR_BIT_SIZE
 	#define MCL_MAX_FR_BIT_SIZE MCL_MAX_FP_BIT_SIZE
 #endif
+#ifndef MCL_NAMESPACE_BN
+	#define MCL_NAMESPACE_BN bn
+#endif
 namespace mcl {
 
-struct CurveParam {
-	/*
-		y^2 = x^3 + b
-		i^2 = -1
-		xi = xi_a + i
-		v^3 = xi
-		w^2 = v
-	*/
-	const char *z;
-	int b; // y^2 = x^3 + b
-	int xi_a; // xi = xi_a + i
-	/*
-		BN254, BN381 : Dtype
-		BLS12-381 : Mtype
-	*/
-	bool isMtype;
-	int curveType; // same in curve_type.h
-	bool operator==(const CurveParam& rhs) const
-	{
-		return strcmp(z, rhs.z) == 0 && b == rhs.b && xi_a == rhs.xi_a && isMtype == rhs.isMtype;
-	}
-	bool operator!=(const CurveParam& rhs) const { return !operator==(rhs); }
-};
-
-const CurveParam BN254 = { "-0x4080000000000001", 2, 1, false, MCL_BN254 }; // -(2^62 + 2^55 + 1)
-// provisional(experimental) param with maxBitSize = 384
-const CurveParam BN381_1 = { "-0x400011000000000000000001", 2, 1, false, MCL_BN381_1 }; // -(2^94 + 2^76 + 2^72 + 1) // A Family of Implementation-Friendly BN Elliptic Curves
-const CurveParam BN381_2 = { "-0x400040090001000000000001", 2, 1, false, MCL_BN381_2 }; // -(2^94 + 2^78 + 2^67 + 2^64 + 2^48 + 1) // used in relic-toolkit
-const CurveParam BN462 = { "0x4001fffffffffffffffffffffbfff", 5, 2, false, MCL_BN462 }; // 2^114 + 2^101 - 2^14 - 1 // https://eprint.iacr.org/2017/334
-const CurveParam BN_SNARK1 = { "4965661367192848881", 3, 9, false, MCL_BN_SNARK1 };
-const CurveParam BLS12_381 = { "-0xd201000000010000", 4, 1, true, MCL_BLS12_381 };
-const CurveParam BN160 = { "0x4000000031", 3, 4, false, MCL_BN160 };
-
-inline const CurveParam& getCurveParam(int type)
-{
-	switch (type) {
-	case MCL_BN254: return mcl::BN254;
-	case MCL_BN381_1: return mcl::BN381_1;
-	case MCL_BN381_2: return mcl::BN381_2;
-	case MCL_BN462: return mcl::BN462;
-	case MCL_BN_SNARK1: return mcl::BN_SNARK1;
-	case MCL_BLS12_381: return mcl::BLS12_381;
-	case MCL_BN160: return mcl::BN160;
-	default:
-		assert(0);
-		return mcl::BN254;
-	}
-}
-
-namespace bn {
+namespace MCL_NAMESPACE_BN {
 
 namespace local {
 struct FpTag;
@@ -314,7 +271,6 @@ struct Compress {
 	}
 };
 
-#include <mcl/mapto_wb19.hpp>
 
 struct MapTo {
 	enum {
@@ -332,7 +288,7 @@ struct MapTo {
 	int type_;
 	int mapToMode_;
 	bool useOriginalG2cofactor_;
-	MapToG2_WB19 mapToG2_WB19_;
+	MapToG2_WB19<Fp, Fp2, G2> mapToG2_WB19_;
 	MapTo()
 		: type_(0)
 		, mapToMode_(MCL_MAP_TO_MODE_ORIGINAL)
@@ -2320,7 +2276,7 @@ inline const Fr& getG2cofactorAdjInv()
 inline bool ethMsgToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
 {
 	if (!BN::param.isBLS12) return false;
-	BN::local::hashToFp2(out, msg, msgSize, ctr, dst, dstSize);
+	hashToFp2(out, msg, msgSize, ctr, dst, dstSize);
 	return true;
 }
 
@@ -2340,3 +2296,4 @@ inline bool ethMsgToG2(G2& out, const void *msg, size_t msgSize, const void *dst
 
 } } // mcl::bn
 
+#endif
diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index 10815592..01127cc8 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -42,3 +42,62 @@ enum {
 	MCL_MAP_TO_MODE_ETH2, // old eth2.0 spec
 	MCL_MAP_TO_MODE_WB19 // used in new eth2.0 spec
 };
+
+#ifdef __cplusplus
+
+#include <string.h>
+#include <assert.h>
+
+namespace mcl {
+
+struct CurveParam {
+	/*
+		y^2 = x^3 + b
+		i^2 = -1
+		xi = xi_a + i
+		v^3 = xi
+		w^2 = v
+	*/
+	const char *z;
+	int b; // y^2 = x^3 + b
+	int xi_a; // xi = xi_a + i
+	/*
+		BN254, BN381 : Dtype
+		BLS12-381 : Mtype
+	*/
+	bool isMtype;
+	int curveType; // same in curve_type.h
+	bool operator==(const CurveParam& rhs) const
+	{
+		return strcmp(z, rhs.z) == 0 && b == rhs.b && xi_a == rhs.xi_a && isMtype == rhs.isMtype;
+	}
+	bool operator!=(const CurveParam& rhs) const { return !operator==(rhs); }
+};
+
+const CurveParam BN254 = { "-0x4080000000000001", 2, 1, false, MCL_BN254 }; // -(2^62 + 2^55 + 1)
+// provisional(experimental) param with maxBitSize = 384
+const CurveParam BN381_1 = { "-0x400011000000000000000001", 2, 1, false, MCL_BN381_1 }; // -(2^94 + 2^76 + 2^72 + 1) // A Family of Implementation-Friendly BN Elliptic Curves
+const CurveParam BN381_2 = { "-0x400040090001000000000001", 2, 1, false, MCL_BN381_2 }; // -(2^94 + 2^78 + 2^67 + 2^64 + 2^48 + 1) // used in relic-toolkit
+const CurveParam BN462 = { "0x4001fffffffffffffffffffffbfff", 5, 2, false, MCL_BN462 }; // 2^114 + 2^101 - 2^14 - 1 // https://eprint.iacr.org/2017/334
+const CurveParam BN_SNARK1 = { "4965661367192848881", 3, 9, false, MCL_BN_SNARK1 };
+const CurveParam BLS12_381 = { "-0xd201000000010000", 4, 1, true, MCL_BLS12_381 };
+const CurveParam BN160 = { "0x4000000031", 3, 4, false, MCL_BN160 };
+
+inline const CurveParam& getCurveParam(int type)
+{
+	switch (type) {
+	case MCL_BN254: return mcl::BN254;
+	case MCL_BN381_1: return mcl::BN381_1;
+	case MCL_BN381_2: return mcl::BN381_2;
+	case MCL_BN462: return mcl::BN462;
+	case MCL_BN_SNARK1: return mcl::BN_SNARK1;
+	case MCL_BLS12_381: return mcl::BLS12_381;
+	case MCL_BN160: return mcl::BN160;
+	default:
+		assert(0);
+		return mcl::BN254;
+	}
+}
+
+} // mcl
+#endif
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index fc6453da..78d2941d 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -7,8 +7,10 @@
 	http://opensource.org/licenses/BSD-3-Clause
 	ref. https://eprint.iacr.org/2019/403 , https://github.com/algorand/bls_sigs_ref
 */
+namespace mcl {
 
 // ctr = 0 or 1 or 2
+template<class Fp2>
 inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
 {
 	const bool addZeroByte = true; // append zero byte to msg
@@ -35,6 +37,7 @@ inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, co
 	}
 }
 
+template<class Fp, class Fp2, class G2>
 struct MapToG2_WB19 {
 	Fp2 xi;
 	Fp2 Ell2p_a;
@@ -667,3 +670,5 @@ struct MapToG2_WB19 {
 	}
 };
 
+} // mcl
+
diff --git a/sample/multi.cpp b/sample/multi.cpp
new file mode 100644
index 00000000..24591cc4
--- /dev/null
+++ b/sample/multi.cpp
@@ -0,0 +1,65 @@
+/*
+	dirty hack to make multi instance of pairing functions
+*/
+#include <iostream>
+// BLS12-381 ; sizeof(Fp) = 48, sizeof(Fr) = 32
+#define MCL_MAX_FP_BIT_SIZE 384
+#define MCL_MAX_FR_BIT_SIZE 256
+#include <mcl/bn.hpp>
+// remove include gurad of bn.hpp
+#undef MCL_INCLUDE_MCL_BN_HPP
+// define other fp size
+// BN254 ; sizeof(Fp) = 32, sizeof(Fr) = 32
+#undef MCL_MAX_FP_BIT_SIZE
+#define MCL_MAX_FP_BIT_SIZE 256
+// define another namespace instead of bn
+#undef MCL_NAMESPACE_BN
+#define MCL_NAMESPACE_BN bn2
+#include <mcl/bn.hpp>
+
+#define PUT(x) std::cout << #x "=" << (x) << std::endl;
+int main()
+	try
+{
+	using namespace mcl;
+	mpz_class a = 123;
+	mpz_class b = 456;
+	bn::initPairing(mcl::BLS12_381);
+	bn2::initPairing(mcl::BN254);
+
+	bn::G1 P1;
+	bn::G2 Q1;
+	bn::GT e1, f1;
+
+	bn2::G1 P2;
+	bn2::G2 Q2;
+	bn2::GT e2, f2;
+
+	bn::hashAndMapToG1(P1, "abc", 3);
+	bn2::hashAndMapToG1(P2, "abc", 3);
+	PUT(P1);
+	PUT(P2);
+
+	bn::hashAndMapToG2(Q1, "abc", 3);
+	bn2::hashAndMapToG2(Q2, "abc", 3);
+
+	PUT(Q1);
+	PUT(Q2);
+	P1 += P1;
+	Q2 += Q2;
+
+	bn::pairing(e1, P1, Q1);
+	bn2::pairing(e2, P2, Q2);
+	P1 *= a;
+	Q1 *= b;
+	P2 *= a;
+	Q2 *= b;
+	bn::pairing(f1, P1, Q1);
+	bn2::pairing(f2, P2, Q2);
+	bn::GT::pow(e1, e1, a * b);
+	bn2::GT::pow(e2, e2, a * b);
+	printf("eq %d %d\n", e1 == f1, e2 == f2);
+} catch (std::exception& e) {
+	printf("err %s\n", e.what());
+	return 1;
+}
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 09f49c4a..405c3672 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -10,7 +10,7 @@
 using namespace mcl;
 using namespace mcl::bn;
 
-typedef mcl::bn::local::MapToG2_WB19 MapTo;
+typedef mcl::MapToG2_WB19<Fp, Fp2, G2> MapTo;
 typedef MapTo::Point Point;
 
 void dump(const void *msg, size_t msgSize)
@@ -634,7 +634,7 @@ CYBOZU_TEST_AUTO(test)
 	initPairing(mcl::BLS12_381);
 	Fp::setETHserialization(true);
 	bn::setMapToMode(MCL_MAP_TO_MODE_WB19);
-	const mcl::bn::local::MapToG2_WB19& mapto = BN::param.mapTo.mapToG2_WB19_;
+	const MapTo& mapto = BN::param.mapTo.mapToG2_WB19_;
 	py_eccTest(mapto);
 	py_eccTest2(mapto);
 	osswu2_helpTest(mapto);

From 00c9cb00a17818bce55704c0981c24816c21e677 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 9 Mar 2020 17:12:58 +0900
Subject: [PATCH 172/553] remove clang warning

---
 include/mcl/curve_type.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index 01127cc8..2b89a4e0 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -83,6 +83,10 @@ const CurveParam BN_SNARK1 = { "4965661367192848881", 3, 9, false, MCL_BN_SNARK1
 const CurveParam BLS12_381 = { "-0xd201000000010000", 4, 1, true, MCL_BLS12_381 };
 const CurveParam BN160 = { "0x4000000031", 3, 4, false, MCL_BN160 };
 
+#ifdef __clang__
+	#pragma GCC diagnostic push
+	#pragma GCC diagnostic ignored "-Wreturn-type-c-linkage"
+#endif
 inline const CurveParam& getCurveParam(int type)
 {
 	switch (type) {
@@ -98,6 +102,9 @@ inline const CurveParam& getCurveParam(int type)
 		return mcl::BN254;
 	}
 }
+#ifdef __clang__
+	#pragma GCC diagnostic pop
+#endif
 
 } // mcl
 #endif

From 9c3f370c88ff094c29c4bffa4b64eec51b6c09e4 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 13 Mar 2020 11:00:41 +0900
Subject: [PATCH 173/553] support hash-to-curve-06

---
 include/mcl/bn.hpp         | 16 +++++++++++++---
 include/mcl/curve_type.h   | 10 +++++++++-
 include/mcl/fp.hpp         |  2 ++
 include/mcl/mapto_wb19.hpp | 32 ++++++++++++++++++++++++++++----
 include/mcl/op.hpp         |  2 +-
 readme.md                  | 12 ++++++++----
 src/fp.cpp                 | 36 ++++++++++++++++++++++++++++++++++++
 test/mapto_wb19_test.cpp   | 25 +++++++++++++++++++++++++
 8 files changed, 122 insertions(+), 13 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 9e361886..ece87b9d 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -494,10 +494,19 @@ struct MapTo {
 		case MCL_MAP_TO_MODE_ORIGINAL:
 		case MCL_MAP_TO_MODE_TRY_AND_INC:
 		case MCL_MAP_TO_MODE_ETH2:
-		case MCL_MAP_TO_MODE_WB19:
 			mapToMode_ = mode;
 			return true;
 			break;
+		case MCL_MAP_TO_MODE_HASH_TO_CURVE_05:
+			mapToMode_ = mode;
+			mapToG2_WB19_.setDraftVersion(5);
+			return true;
+			break;
+		case MCL_MAP_TO_MODE_HASH_TO_CURVE_06:
+			mapToMode_ = mode;
+			mapToG2_WB19_.setDraftVersion(6);
+			return true;
+			break;
 		default:
 			return false;
 		}
@@ -569,7 +578,7 @@ struct MapTo {
 	}
 	bool calc(G2& P, const Fp2& t, bool fast = false) const
 	{
-		if (mapToMode_ == MCL_MAP_TO_MODE_WB19) {
+		if (mapToMode_ == MCL_MAP_TO_MODE_WB19 || mapToMode_ == MCL_MAP_TO_MODE_HASH_TO_CURVE_06) {
 			mapToG2_WB19_.opt_swu2_map(P, t);
 			return true;
 		}
@@ -2132,7 +2141,8 @@ inline void hashAndMapToG1(G1& P, const void *buf, size_t bufSize)
 }
 inline void hashAndMapToG2(G2& P, const void *buf, size_t bufSize)
 {
-	if (getMapToMode() == MCL_MAP_TO_MODE_WB19) {
+	int mode = getMapToMode();
+	if (mode == MCL_MAP_TO_MODE_WB19 || mode == MCL_MAP_TO_MODE_HASH_TO_CURVE_06) {
 		BN::param.mapTo.mapToG2_WB19_.msgToG2(P, buf, bufSize);
 		return;
 	}
diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index 2b89a4e0..c0eb8226 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -36,11 +36,19 @@ enum {
 	MCL_NIST_P521 = MCL_SECP521R1
 };
 
+/*
+	remark : if irtf-cfrg-hash-to-curve is compeletely fixed, then
+	MCL_MAP_TO_MODE_WB19, MCL_MAP_TO_MODE_HASH_TO_CURVE_0? will be removed and
+	only MCL_MAP_TO_MODE_HASH_TO_CURVE will be available.
+*/
 enum {
 	MCL_MAP_TO_MODE_ORIGINAL, // see MapTo::calcBN
 	MCL_MAP_TO_MODE_TRY_AND_INC, // try-and-incremental-x
 	MCL_MAP_TO_MODE_ETH2, // old eth2.0 spec
-	MCL_MAP_TO_MODE_WB19 // used in new eth2.0 spec
+	MCL_MAP_TO_MODE_WB19, // used in new eth2.0 spec
+	MCL_MAP_TO_MODE_HASH_TO_CURVE_05 = MCL_MAP_TO_MODE_WB19, // draft-irtf-cfrg-hash-to-curve-05
+	MCL_MAP_TO_MODE_HASH_TO_CURVE_06, // draft-irtf-cfrg-hash-to-curve-06
+	MCL_MAP_TO_MODE_HASH_TO_CURVE = MCL_MAP_TO_MODE_HASH_TO_CURVE_06 // the latset version
 };
 
 #ifdef __cplusplus
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index d49fd45e..0564d980 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -77,6 +77,8 @@ void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t salt
 void hkdf_extract(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize);
 void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6]);
 
+void expand_message_xmd(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize);
+
 namespace local {
 
 inline void byteSwap(void *x, size_t n)
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 78d2941d..a0c13933 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -50,6 +50,11 @@ struct MapToG2_WB19 {
 	Fp2 xden[3];
 	Fp2 ynum[4];
 	Fp2 yden[4];
+	int draftVersion_;
+	void setDraftVersion(int version)
+	{
+		draftVersion_ = version;
+	}
 	struct Point {
 		Fp2 x, y, z;
 		bool isZero() const
@@ -209,6 +214,7 @@ struct MapToG2_WB19 {
 		Fp::neg(etas[3].a, ev4);
 		etas[3].b = ev3;
 		init_iso();
+		draftVersion_ = 5;
 	}
 	void init_iso()
 	{
@@ -656,12 +662,30 @@ struct MapToG2_WB19 {
 		P1 += P2;
 		clear_h2(out, P1);
 	}
+	// hash-to-curve-06
+	void hashToFp2v6(Fp2 out[2], const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
+	{
+		uint8_t md[256];
+		mcl::fp::expand_message_xmd(md, msg, msgSize, dst, dstSize);
+		Fp *x = out[0].getFp0();
+		for (size_t i = 0; i < 4; i++) {
+			uint8_t *p = &md[64 * i];
+			fp::local::byteSwap(p, 64);
+			bool b;
+			x[i].setArrayMod(&b, p, 64);
+			assert(b); (void)b;
+		}
+	}
 	void map2curve_osswu2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
-		Fp2 t1, t2;
-		hashToFp2(t1, msg, msgSize, 0, dst, dstSize);
-		hashToFp2(t2, msg, msgSize, 1, dst, dstSize);
-		opt_swu2_map(out, t1, &t2);
+		Fp2 t[2];
+		if (draftVersion_ == 5) {
+			hashToFp2(t[0], msg, msgSize, 0, dst, dstSize);
+			hashToFp2(t[1], msg, msgSize, 1, dst, dstSize);
+		} else {
+			hashToFp2v6(t, msg, msgSize, dst, dstSize);
+		}
+		opt_swu2_map(out, t[0], &t[1]);
 	}
 	void msgToG2(G2& out, const void *msg, size_t msgSize) const
 	{
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 021d4159..3f8476bd 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x105; /* 0xABC = A.BC */
+static const int version = 0x106; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index 73878896..ace5483d 100644
--- a/readme.md
+++ b/readme.md
@@ -10,10 +10,13 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
-add new hash functions corresponding to python-impl of [algorand/bls_sig_ref](https://github.com/algorand/bls_sigs_ref).
-* `mclBn_ethMsgToFp2`(resp. `Hp2`)
-* `mclBn_ethFp2ToG2`(resp. `opt_swu2_map`)
-* `mclBn_ethMsgToG2`(resp. `map2curve_osswu2`)
+- add new hash-to-curve function of [draft-irtf-cfrg-hash-to-curve](https://cfrg.github.io/draft-irtf-cfrg-hash-to-curve/draft-irtf-cfrg-hash-to-curve.txt) at March 2020.
+  - call `setETHmode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);`
+  - The older `MAP_TO_MODE` will be removed after the draft is fixed.
+- add new hash functions corresponding to python-impl of [algorand/bls_sig_ref](https://github.com/algorand/bls_sigs_ref).
+  - `mclBn_ethMsgToFp2`(resp. `Hp2`)
+  - `mclBn_ethFp2ToG2`(resp. `opt_swu2_map`)
+  - `mclBn_ethMsgToG2`(resp. `map2curve_osswu2`)
 
 # Support architecture
 
@@ -292,6 +295,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2020/Mar/15 v1.06 support hash-to-curve-06
 - 2020/Jan/31 v1.05 mclBn_ethMsgToFp2 has changed to append zero byte at the end of msg
 - 2020/Jan/25 v1.04 add new hash functions
 - 2019/Dec/05 v1.03 disable to check the order in setStr
diff --git a/src/fp.cpp b/src/fp.cpp
index 64dc4a16..aab8e1e2 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -159,6 +159,42 @@ void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6])
 	cybozu::hmac256(out + 32, prk, 32, out, 32 + 6);
 }
 
+void expand_message_xmd(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize)
+{
+	const size_t len_in_bytes = 256;
+	const size_t mdSize = 32;
+	const size_t r_in_bytes = 64;
+	const size_t ell = len_in_bytes / mdSize;
+	uint8_t Z_pad[r_in_bytes] = {};
+	assert(dstSize < 256);
+	// BE(len_in_bytes, 2) + BE(0, 1) + BE(dstSize, 1)
+	uint8_t buf[2 + 1 + 1] = { 1, 0, 0, uint8_t(dstSize) };
+	uint8_t *const buf2 = buf + 2; // BE(0, 1) + BE(dstSize, 1)
+	cybozu::Sha256 h;
+	h.update(Z_pad, r_in_bytes);
+	h.update(msg, msgSize);
+	h.update(buf, sizeof(buf));
+	uint8_t md[mdSize];
+	h.digest(md, mdSize, dst, dstSize);
+	h.clear();
+	buf2[0] = 1;
+	h.update(md, mdSize);
+	h.update(buf2, 2);
+	h.digest(out, mdSize, dst, dstSize);
+	uint8_t mdXor[mdSize];
+	for (size_t i = 1; i < ell; i++) {
+		h.clear();
+		for (size_t j = 0; j < mdSize; j++) {
+			mdXor[j] = md[j] ^ out[mdSize * (i - 1) + j];
+		}
+		h.update(mdXor, mdSize);
+		buf2[0] = uint8_t(i + 1);
+		h.update(buf2, 2);
+		h.digest(out + mdSize * i, mdSize, dst, dstSize);
+	}
+}
+
+
 #ifndef MCL_USE_VINT
 static inline void set_mpz_t(mpz_t& z, const Unit* p, int n)
 {
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 405c3672..559de9c7 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -629,6 +629,30 @@ void py_eccTest2(const T& mapto)
 	CYBOZU_TEST_EQUAL(P, Q);
 }
 
+template<class T>
+void testHashToFp2v6(const T& mapto)
+{
+	const char msg[] = "asdf";
+	const char dst[] = "QUUX-V01-CS02";
+	Fp2 out[2];
+	mapto.hashToFp2v6(out, msg, strlen(msg), dst, strlen(dst));
+	const Fp2Str expectStr[] = {
+		{
+			"2036684013374073670470642478097435082393965905216073159069132582313283074894808330704754509140183015844408257838394",
+			"1442095344782436377607687657711937282361342321405422912347590889376773969332935605209326528060836557922932229521614",
+		},
+		{
+			"712603160732423529538850938327197859251773848793464448294977148617985113767869616209273456982966659285651019780554",
+			"3549454379036632156704729135192770954406411172309331582430747991672599371642148666322072960024366511631069032927782",
+		},
+	};
+	Fp2 expect[2];
+	for (int i = 0; i < 2; i++) {
+		set(expect[i], expectStr[i]);
+		CYBOZU_TEST_EQUAL(out[i], expect[i]);
+	}
+}
+
 CYBOZU_TEST_AUTO(test)
 {
 	initPairing(mcl::BLS12_381);
@@ -648,4 +672,5 @@ CYBOZU_TEST_AUTO(test)
 	testVec("../misc/mapto/fips_186_3_B233.txt");
 	testVec("../misc/mapto/misc.txt");
 	ethMsgToG2testAll("../bls_sigs_ref/test-vectors/hash_g2/");
+	testHashToFp2v6(mapto);
 }

From 9f01ea3ec906f4299df9e2aab99d510872f9924f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 15 Mar 2020 17:43:18 +0900
Subject: [PATCH 174/553] add test of hashToFp2v6

---
 include/mcl/mapto_wb19.hpp | 12 +++++++++
 test/mapto_wb19_test.cpp   | 53 +++++++++++++++++++++++++++++---------
 2 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index a0c13933..364c0a00 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -637,6 +637,18 @@ struct MapToG2_WB19 {
 		printf("y=%s\n", P.y.getStr(base).c_str());
 		printf("z=%s\n", P.z.getStr(base).c_str());
 	}
+	bool normalizeJacobi(Point& out, const Point& in) const
+	{
+		if (in.z.isZero()) return false;
+		Fp2 t;
+		Fp2::inv(t, in.z);
+		Fp2::mul(out.y, in.y, t);
+		Fp2::sqr(t, t);
+		Fp2::mul(out.x, in.x, t);
+		out.y *= t;
+		out.z = 1;
+		return true;
+	}
 	void opt_swu2_map(G2& P, const Fp2& t, const Fp2 *t2 = 0) const
 	{
 		Point Pp;
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 559de9c7..e129de24 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -633,24 +633,53 @@ template<class T>
 void testHashToFp2v6(const T& mapto)
 {
 	const char msg[] = "asdf";
-	const char dst[] = "QUUX-V01-CS02";
-	Fp2 out[2];
-	mapto.hashToFp2v6(out, msg, strlen(msg), dst, strlen(dst));
-	const Fp2Str expectStr[] = {
+	const struct {
+		const char *dst;
+		const Fp2Str s[2];
+	} tbl[] = {
 		{
-			"2036684013374073670470642478097435082393965905216073159069132582313283074894808330704754509140183015844408257838394",
-			"1442095344782436377607687657711937282361342321405422912347590889376773969332935605209326528060836557922932229521614",
+			"QUUX-V01-CS02",
+			{
+				{
+					"2036684013374073670470642478097435082393965905216073159069132582313283074894808330704754509140183015844408257838394",
+					"1442095344782436377607687657711937282361342321405422912347590889376773969332935605209326528060836557922932229521614",
+				},
+				{
+					"712603160732423529538850938327197859251773848793464448294977148617985113767869616209273456982966659285651019780554",
+					"3549454379036632156704729135192770954406411172309331582430747991672599371642148666322072960024366511631069032927782",
+				},
+			}
 		},
 		{
-			"712603160732423529538850938327197859251773848793464448294977148617985113767869616209273456982966659285651019780554",
-			"3549454379036632156704729135192770954406411172309331582430747991672599371642148666322072960024366511631069032927782",
+			"BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_",
+			{
+				{
+					"1184058645632270717238802026167521675640665254051621677891229161275546248273726163051942698406031256547695641333159",
+					"2796840541941870488250990266864713579761728392052042558603386652320835698725612365412314296122895578014688997245820",
+				},
+				{
+					"1432011693332698211658748968085869636612625272476301004513458304498234062483485462991424286092448663756703927705584",
+					"3596297820733241889565943496970554637589864863833863117721478512486741539397910569381754340032782454436609027606827",
+				},
+			}
 		},
 	};
-	Fp2 expect[2];
-	for (int i = 0; i < 2; i++) {
-		set(expect[i], expectStr[i]);
-		CYBOZU_TEST_EQUAL(out[i], expect[i]);
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		const char *dst = tbl[i].dst;
+		const Fp2Str *expectStr = tbl[i].s;
+		Fp2 out[2];
+		mapto.hashToFp2v6(out, msg, strlen(msg), dst, strlen(dst));
+		Fp2 expect[2];
+		for (int i = 0; i < 2; i++) {
+			set(expect[i], expectStr[i]);
+			CYBOZU_TEST_EQUAL(out[i], expect[i]);
+		}
 	}
+	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);
+	G2 P;
+	mcl::bn::hashAndMapToG2(P, "asdf", 4);
+	P.normalize();
+	printf("P=%s %s\n", P.x.getStr(10).c_str(), P.y.getStr(10).c_str());
 }
 
 CYBOZU_TEST_AUTO(test)

From 6544951800c112528110744a4723c29ea389b4bc Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 26 Mar 2020 13:03:49 +0900
Subject: [PATCH 175/553] add one test from draft-irtf-cfrg-hash-to-curve

---
 test/mapto_wb19_test.cpp | 41 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index e129de24..a8e8cbf4 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -632,12 +632,28 @@ void py_eccTest2(const T& mapto)
 template<class T>
 void testHashToFp2v6(const T& mapto)
 {
-	const char msg[] = "asdf";
 	const struct {
+		const char *msg;
 		const char *dst;
 		const Fp2Str s[2];
 	} tbl[] = {
 		{
+			// from draft-irtf-cfrg-hash-to-curve/poc/vectors/BLS12381G2_XMD:SHA-256_SSWU_RO_.json.swp
+			"abc",
+			"BLS12381G2_XMD:SHA-256_SSWU_RO_TESTGEN",
+			{
+				{
+					"0x0b7b2d371fc970671ddf7bc9ca4a70a1bd286af4487b497e460c0b44d405d73db576f8a08d59416cc976d4b1d0100775",
+					"0x0e86d0eb2d34c34fe8b2a1f2d999fa3dabcd504fdb4beb57e79756b08fd75b0a82660abc6026ecc4ccf327a522587b38",
+				},
+				{
+					"0x10376d048c060df1c5017a363144c482892fe2ce0061094327b8bbe49a713ce795726aa23b5402a271e9f1e7b9b6c7ba",
+					"0x0117f2ea63015e192d759f11a658a002e06112147d90f00d7429722456b9a1c63fef2dbe8df13168e3bd40af2fb959f3",
+				},
+			}
+		},
+		{
+			"asdf",
 			"QUUX-V01-CS02",
 			{
 				{
@@ -651,6 +667,7 @@ void testHashToFp2v6(const T& mapto)
 			}
 		},
 		{
+			"asdf",
 			"BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_",
 			{
 				{
@@ -665,6 +682,7 @@ void testHashToFp2v6(const T& mapto)
 		},
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		const char *msg = tbl[i].msg;
 		const char *dst = tbl[i].dst;
 		const Fp2Str *expectStr = tbl[i].s;
 		Fp2 out[2];
@@ -674,6 +692,27 @@ void testHashToFp2v6(const T& mapto)
 			set(expect[i], expectStr[i]);
 			CYBOZU_TEST_EQUAL(out[i], expect[i]);
 		}
+		if (i == 0) {
+			// from draft-irtf-cfrg-hash-to-curve/poc/vectors/BLS12381G2_XMD:SHA-256_SSWU_RO_.json.swp
+			const Fp2Str xys[] = {
+				{
+					"0x0b6d276d0bfbddde617a9ab4c175b07c9c4aecad2cdd6cc9ca541b61334a69c58680ef5692bbad03d2f572838df32b66",
+					"0x139e9d78ff6d9d163f979d14a64c5e57f82f1ef7e42ece338b571a9e92c0666f0f6bf1a5fc21e2d32bcb6432eab7037c",
+				},
+				{
+					"0x022f9ee5d596d06c5f2f735c3c5f743978f79fd57bf7d4291e221227f490d3f276066de9f9edc89c57e048ef4cf0ef72",
+					"0x14dd23517516a80d1d840e34f51dfb76946c7670fca0f36ad8ec9bde4ea82dfae119a21b076519bcc1c00152989a4d45",
+				},
+			};
+			G2 P;
+			mapto.opt_swu2_map(P, out[0], &out[1]);
+			P.normalize();
+			Fp2 t;
+			set(t, xys[0]);
+			CYBOZU_TEST_EQUAL(P.x, t);
+			set(t, xys[1]);
+			CYBOZU_TEST_EQUAL(P.y, t);
+		}
 	}
 	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);
 	G2 P;

From b96acf6e867479304f0e9246e8cdb966752e368b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 26 Mar 2020 14:02:04 +0900
Subject: [PATCH 176/553] DST for hash-to-curve is changed

---
 include/mcl/mapto_wb19.hpp | 7 ++++++-
 include/mcl/op.hpp         | 2 +-
 readme.md                  | 2 ++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 364c0a00..b77810fc 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -701,7 +701,12 @@ struct MapToG2_WB19 {
 	}
 	void msgToG2(G2& out, const void *msg, size_t msgSize) const
 	{
-		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_";
+		const char *dst;
+		if (draftVersion_ == 5) {
+			dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_";
+		} else {
+			dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
+		}
 		map2curve_osswu2(out, msg, msgSize, dst, strlen(dst));
 	}
 };
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 3f8476bd..f6e64e68 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x106; /* 0xABC = A.BC */
+static const int version = 0x107; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index ace5483d..f755abfb 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+- change DST of hash-to-curve for `MCL_MAP_TO_MODE_HASH_TO_CURVE_06`.
 - add new hash-to-curve function of [draft-irtf-cfrg-hash-to-curve](https://cfrg.github.io/draft-irtf-cfrg-hash-to-curve/draft-irtf-cfrg-hash-to-curve.txt) at March 2020.
   - call `setETHmode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);`
   - The older `MAP_TO_MODE` will be removed after the draft is fixed.
@@ -295,6 +296,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2020/Mar/26 v1.07 change DST for hash-to-curve-06
 - 2020/Mar/15 v1.06 support hash-to-curve-06
 - 2020/Jan/31 v1.05 mclBn_ethMsgToFp2 has changed to append zero byte at the end of msg
 - 2020/Jan/25 v1.04 add new hash functions

From 16fe1740de0a470717ed0b80ba79233ee0847b0e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 7 Apr 2020 10:28:27 +0900
Subject: [PATCH 177/553] remove unused code

---
 include/mcl/bn.hpp | 51 ----------------------------------------------
 1 file changed, 51 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index ece87b9d..05d1b6cf 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -792,59 +792,8 @@ struct GLV2 {
 	template<class T>
 	void mul(T& Q, const T& P, mpz_class x, bool constTime = false) const
 	{
-#if 1
 		(void)constTime;
 		mulVecNGLV(Q, &P, &x, 1);
-#else
-		const mpz_class& r = Fr::getOp().mp;
-		const int w = 5;
-		const size_t tblSize = 1 << (w - 2);
-		const size_t splitN = 4;
-		NafArray naf[splitN];
-		mpz_class u[splitN];
-		T tbl[splitN][tblSize];
-		bool b;
-
-		x %= r;
-		if (x == 0) {
-			Q.clear();
-			if (!constTime) return;
-		}
-		if (x < 0) {
-			x += r;
-		}
-		split(u, x);
-		tbl[0][0] = P;
-		Frobenius(tbl[1][0], tbl[0][0]);
-		Frobenius(tbl[2][0], tbl[1][0]);
-		Frobenius(tbl[3][0], tbl[2][0]);
-		for (size_t i = 0; i < splitN; i++) {
-			gmp::getNAFwidth(&b, naf[i], u[i], w);
-			assert(b); (void)b;
-		}
-		{
-			T P2;
-			T::dbl(P2, P);
-			for (size_t i = 1; i < tblSize; i++) {
-				T::add(tbl[0][i], tbl[0][i - 1], P2);
-				Frobenius(tbl[1][i], tbl[0][i]);
-				Frobenius(tbl[2][i], tbl[1][i]);
-				Frobenius(tbl[3][i], tbl[2][i]);
-			}
-		}
-		size_t maxBit = naf[0].size();
-		for (size_t i = 1; i < splitN; i++) {
-			if (naf[i].size() > maxBit) maxBit = naf[i].size();
-		}
-		Q.clear();
-		for (size_t i = 0; i < maxBit; i++) {
-			T::dbl(Q, Q);
-			mcl::local::addTbl(Q, tbl[0], naf[0], maxBit - 1 - i);
-			mcl::local::addTbl(Q, tbl[1], naf[1], maxBit - 1 - i);
-			mcl::local::addTbl(Q, tbl[2], naf[2], maxBit - 1 - i);
-			mcl::local::addTbl(Q, tbl[3], naf[3], maxBit - 1 - i);
-		}
-#endif
 	}
 	template<class T>
 	size_t mulVecNGLV(T& z, const T *xVec, const mpz_class *yVec, size_t n) const

From 823490974426780b5d1aa37d873472b2991a1ef7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 7 Apr 2020 13:26:34 +0900
Subject: [PATCH 178/553] move some methods to global

---
 include/mcl/ec.hpp | 167 ++++++++++++++++++++++++++-------------------
 1 file changed, 97 insertions(+), 70 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index ee747799..e44464f5 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -21,6 +21,22 @@ namespace mcl {
 
 template<class _Fp> class Fp2T;
 
+namespace local {
+
+template<class Ec, class Vec>
+void addTbl(Ec& Q, const Ec *tbl, const Vec& naf, size_t i)
+{
+	if (i >= naf.size()) return;
+	int n = naf[i];
+	if (n > 0) {
+		Q += tbl[(n - 1) >> 1];
+	} else if (n < 0) {
+		Q -= tbl[(-n - 1) >> 1];
+	}
+}
+
+} // mcl::local
+
 namespace ec {
 
 enum Mode {
@@ -47,23 +63,78 @@ bool get_a_flag(const mcl::Fp2T<Fp>& x)
 
 } // mcl::ec::local
 
-} // mcl::ec
+template<class T>
+void normalizeJacobi(T& x, T& y, T& z)
+{
+	assert(!z.isZero());
+	T rz2;
+	T::inv(z, z);
+	T::sqr(rz2, z);
+	x *= rz2;
+	y *= rz2;
+	y *= z;
+	z = 1;
+}
 
-namespace local {
+// Y^2 == X(X^2 + aZ^4) + bZ^6
+template<class T>
+bool isValidJacobi(const T& a, const T& b, const T& x, const T& y, const T& z)
+{
+	T y2, x2, z2, z4, t;
+	T::sqr(x2, x);
+	T::sqr(y2, y);
+	T::sqr(z2, z);
+	T::sqr(z4, z2);
+	T::mul(t, z4, a);
+	t += x2;
+	t *= x;
+	z4 *= z2;
+	z4 *= b;
+	t += z4;
+	return y2 == t;
+}
 
-template<class Ec, class Vec>
-void addTbl(Ec& Q, const Ec *tbl, const Vec& naf, size_t i)
+template<class T>
+void normalizeProj(T& x, T& y, T& z)
 {
-	if (i >= naf.size()) return;
-	int n = naf[i];
-	if (n > 0) {
-		Q += tbl[(n - 1) >> 1];
-	} else if (n < 0) {
-		Q -= tbl[(-n - 1) >> 1];
-	}
+	assert(!z.isZero());
+	T::inv(z, z);
+	x *= z;
+	y *= z;
+	z = 1;
 }
 
-} // mcl::local
+// (Y^2 - bZ^2)Z = X(X^2 + aZ^2)
+template<class T>
+bool isValidProj(const T& a, const T& b, const T& x, const T& y, const T& z)
+{
+	T y2, x2, z2, t;
+	T::sqr(x2, x);
+	T::sqr(y2, y);
+	T::sqr(z2, z);
+	T::mul(t, a, z2);
+	t += x2;
+	t *= x;
+	z2 *= b;
+	y2 -= z2;
+	y2 *= z;
+	return y2 == t;
+}
+
+// y^2 == (x^2 + a)x + b
+template<class T>
+bool isValidAffine(const T& a, const T& b, const T& x, const T& y)
+{
+	T y2, t;
+	T::sqr(y2, y);
+	T::sqr(t, x);
+	t += a;
+	t *= x;
+	t += b;
+	return y2 == t;
+}
+
+} // mcl::ec
 
 /*
 	elliptic curve
@@ -117,65 +188,24 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 private:
 	void normalizeJacobi()
 	{
-		assert(!z.isZero());
-		Fp rz2;
-		Fp::inv(z, z);
-		Fp::sqr(rz2, z);
-		x *= rz2;
-		y *= rz2;
-		y *= z;
-		z = 1;
+		ec::normalizeJacobi(x, y, z);
 	}
 	void normalizeProj()
 	{
-		assert(!z.isZero());
-		Fp::inv(z, z);
-		x *= z;
-		y *= z;
-		z = 1;
+		ec::normalizeProj(x, y, z);
 	}
-	// Y^2 == X(X^2 + aZ^4) + bZ^6
 	bool isValidJacobi() const
 	{
-		Fp y2, x2, z2, z4, t;
-		Fp::sqr(x2, x);
-		Fp::sqr(y2, y);
-		Fp::sqr(z2, z);
-		Fp::sqr(z4, z2);
-		Fp::mul(t, z4, a_);
-		t += x2;
-		t *= x;
-		z4 *= z2;
-		z4 *= b_;
-		t += z4;
-		return y2 == t;
+		return ec::isValidJacobi(a_, b_, x, y, z);
 	}
-	// (Y^2 - bZ^2)Z = X(X^2 + aZ^2)
 	bool isValidProj() const
 	{
-		Fp y2, x2, z2, t;
-		Fp::sqr(x2, x);
-		Fp::sqr(y2, y);
-		Fp::sqr(z2, z);
-		Fp::mul(t, a_, z2);
-		t += x2;
-		t *= x;
-		z2 *= b_;
-		y2 -= z2;
-		y2 *= z;
-		return y2 == t;
+		return ec::isValidProj(a_, b_, x, y, z);
 	}
 #endif
-	// y^2 == (x^2 + a)x + b
-	static inline bool isValid(const Fp& _x, const Fp& _y)
+	bool isValidAffine() const
 	{
-		Fp y2, t;
-		Fp::sqr(y2, _y);
-		Fp::sqr(t, _x);
-		t += a_;
-		t *= _x;
-		t += b_;
-		return y2 == t;
+		return ec::isValidAffine(a_, b_, x, y);
 	}
 public:
 	void normalize()
@@ -272,29 +302,26 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		} else
 #endif
 		{
-			isOK = isValid(x, y);
+			isOK = isValidAffine();
 		}
 		if (!isOK) return false;
 		if (verifyOrder_) return isValidOrder();
 		return true;
 	}
-	void set(bool *pb, const Fp& _x, const Fp& _y, bool verify = true)
+	void set(bool *pb, const Fp& x, const Fp& y, bool verify = true)
 	{
-		if (verify && !isValid(_x, _y)) {
-			*pb = false;
-			return;
-		}
-		x = _x; y = _y;
+		this->x = x; this->y = y;
 #ifdef MCL_EC_USE_AFFINE
 		inf_ = false;
 #else
 		z = 1;
 #endif
-		if (verify && verifyOrder_ && !isValidOrder()) {
-			*pb = false;
-		} else {
+		if (!verify || (isValidAffine() && (!verifyOrder_ || isValidOrder()))) {
 			*pb = true;
+			return;
 		}
+		*pb = false;
+		clear();
 	}
 	void clear()
 	{
@@ -972,7 +999,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		}
 		return;
 	verifyValidness:
-		if (!isValid(x, y)) {
+		if (!isValidAffine()) {
 			*pb = false;
 			return;
 		}

From 14f9e64a544e843bf6c28e3e126c25883eb4ab7d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 8 Apr 2020 13:51:47 +0900
Subject: [PATCH 179/553] add affine test

---
 include/mcl/ec.hpp | 137 ++++++++++++++++-----------------------------
 test/ec_test.cpp   |  40 +++++++------
 2 files changed, 72 insertions(+), 105 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index e44464f5..ce630535 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -10,8 +10,6 @@
 #include <mcl/fp.hpp>
 #include <mcl/ecparam.hpp>
 
-//#define MCL_EC_USE_AFFINE
-
 #ifdef _MSC_VER
 	#pragma warning(push)
 	#pragma warning(disable : 4458)
@@ -41,7 +39,8 @@ namespace ec {
 
 enum Mode {
 	Jacobi = 0,
-	Proj = 1
+	Proj = 1,
+	Affine
 };
 
 namespace local {
@@ -151,13 +150,8 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 public:
 	typedef _Fp Fp;
 	typedef _Fp BaseFp;
-#ifdef MCL_EC_USE_AFFINE
-	Fp x, y;
-	bool inf_;
-#else
 	Fp x, y, z;
 	static int mode_;
-#endif
 	static Fp a_;
 	static Fp b_;
 	static int specialA_;
@@ -178,13 +172,8 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 	bool isNormalized() const
 	{
-#ifdef MCL_EC_USE_AFFINE
-		return true;
-#else
 		return isZero() || z.isOne();
-#endif
 	}
-#ifndef MCL_EC_USE_AFFINE
 private:
 	void normalizeJacobi()
 	{
@@ -202,7 +191,6 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	{
 		return ec::isValidProj(a_, b_, x, y, z);
 	}
-#endif
 	bool isValidAffine() const
 	{
 		return ec::isValidAffine(a_, b_, x, y);
@@ -210,7 +198,6 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 public:
 	void normalize()
 	{
-#ifndef MCL_EC_USE_AFFINE
 		if (isNormalized()) return;
 		switch (mode_) {
 		case ec::Jacobi:
@@ -220,7 +207,6 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 			normalizeProj();
 			break;
 		}
-#endif
 	}
 	static void normalize(EcT& y, const EcT& x)
 	{
@@ -243,13 +229,9 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		order_ = 0;
 		mulArrayGLV = 0;
 		mulVecNGLV = 0;
-#ifdef MCL_EC_USE_AFFINE
-		cybozu::disable_warning_unused_variable(mode);
-#else
-		assert(mode == ec::Jacobi || mode == ec::Proj);
 		mode_ = mode;
-#endif
 	}
+	static inline int getMode() { return mode_; }
 	/*
 		verify the order of *this is equal to order if order != 0
 		in constructor, set, setStr, operator<<().
@@ -288,34 +270,25 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	bool isValid() const
 	{
 		if (isZero()) return true;
-		bool isOK = false;
-#ifndef MCL_EC_USE_AFFINE
-		if (!z.isOne()) {
-			switch (mode_) {
-			case ec::Jacobi:
-				isOK = isValidJacobi();
-				break;
-			case ec::Proj:
-				isOK = isValidProj();
-				break;
-			}
-		} else
-#endif
-		{
-			isOK = isValidAffine();
+		switch (mode_) {
+		case ec::Jacobi:
+			if (!isValidJacobi()) return false;
+			break;
+		case ec::Proj:
+			if (!isValidProj()) return false;
+			break;
+		case ec::Affine:
+			if (!isValidAffine()) return false;
+			break;
 		}
-		if (!isOK) return false;
 		if (verifyOrder_) return isValidOrder();
 		return true;
 	}
 	void set(bool *pb, const Fp& x, const Fp& y, bool verify = true)
 	{
-		this->x = x; this->y = y;
-#ifdef MCL_EC_USE_AFFINE
-		inf_ = false;
-#else
+		this->x = x;
+		this->y = y;
 		z = 1;
-#endif
 		if (!verify || (isValidAffine() && (!verifyOrder_ || isValidOrder()))) {
 			*pb = true;
 			return;
@@ -325,15 +298,10 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 	void clear()
 	{
-#ifdef MCL_EC_USE_AFFINE
-		inf_ = true;
-#else
-		z.clear();
-#endif
 		x.clear();
 		y.clear();
+		z.clear();
 	}
-#ifndef MCL_EC_USE_AFFINE
 	static inline void dblNoVerifyInfJacobi(EcT& R, const EcT& P)
 	{
 		Fp S, M, t, y2;
@@ -449,10 +417,8 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		Fp::sub(R.y, t, w);
 		R.y -= w;
 	}
-#endif
-	static inline void dblNoVerifyInf(EcT& R, const EcT& P)
+	static inline void dblNoVerifyInfAffine(EcT& R, const EcT& P)
 	{
-#ifdef MCL_EC_USE_AFFINE
 		Fp t, s;
 		Fp::sqr(t, P.x);
 		Fp::add(s, t, t);
@@ -468,8 +434,10 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		s *= t;
 		Fp::sub(R.y, s, P.y);
 		R.x = x3;
-		R.inf_ = false;
-#else
+		R.z = 1;
+	}
+	static inline void dblNoVerifyInf(EcT& R, const EcT& P)
+	{
 		switch (mode_) {
 		case ec::Jacobi:
 			dblNoVerifyInfJacobi(R, P);
@@ -477,8 +445,10 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		case ec::Proj:
 			dblNoVerifyInfProj(R, P);
 			break;
+		case ec::Affine:
+			dblNoVerifyInfAffine(R, P);
+			break;
 		}
-#endif
 	}
 	static inline void dbl(EcT& R, const EcT& P)
 	{
@@ -488,7 +458,6 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		}
 		dblNoVerifyInf(R, P);
 	}
-#ifndef MCL_EC_USE_AFFINE
 	static inline void addJacobi(EcT& R, const EcT& P, const EcT& Q, bool isPzOne, bool isQzOne)
 	{
 		Fp r, U1, S1, H, H3;
@@ -615,27 +584,22 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		R.y *= r;
 		R.y -= vv;
 	}
-#endif
-	static inline void add(EcT& R, const EcT& P, const EcT& Q) {
-		if (P.isZero()) { R = Q; return; }
-		if (Q.isZero()) { R = P; return; }
-		if (&P == &Q) {
-			dblNoVerifyInf(R, P);
-			return;
-		}
-#ifdef MCL_EC_USE_AFFINE
+	static inline void addAffine(EcT& R, const EcT& P, const EcT& Q)
+	{
 		Fp t;
-		Fp::neg(t, Q.y);
-		if (P.y == t) { R.clear(); return; }
 		Fp::sub(t, Q.x, P.x);
 		if (t.isZero()) {
-			dblNoVerifyInf(R, P);
+			if (P.y == Q.y) {
+				dblNoVerifyInf(R, P);
+			} else {
+				R.clear();
+			}
 			return;
 		}
 		Fp s;
 		Fp::sub(s, Q.y, P.y);
 		Fp::div(t, s, t);
-		R.inf_ = false;
+		R.z = 1;
 		Fp x3;
 		Fp::sqr(x3, t);
 		x3 -= P.x;
@@ -644,7 +608,14 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		s *= t;
 		Fp::sub(R.y, s, P.y);
 		R.x = x3;
-#else
+	}
+	static inline void add(EcT& R, const EcT& P, const EcT& Q) {
+		if (P.isZero()) { R = Q; return; }
+		if (Q.isZero()) { R = P; return; }
+		if (&P == &Q) {
+			dblNoVerifyInf(R, P);
+			return;
+		}
 		bool isPzOne = P.z.isOne();
 		bool isQzOne = Q.z.isOne();
 		switch (mode_) {
@@ -654,8 +625,10 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		case ec::Proj:
 			addProj(R, P, Q, isPzOne, isQzOne);
 			break;
+		case ec::Affine:
+			addAffine(R, P, Q);
+			break;
 		}
-#endif
 	}
 	static inline void sub(EcT& R, const EcT& P, const EcT& Q)
 	{
@@ -671,11 +644,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		}
 		R.x = P.x;
 		Fp::neg(R.y, P.y);
-#ifdef MCL_EC_USE_AFFINE
-		R.inf_ = false;
-#else
 		R.z = P.z;
-#endif
 	}
 	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
 	static inline void mul(EcT& z, const EcT& x, const FpT<tag, maxBitSize>& y)
@@ -742,11 +711,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 	bool isZero() const
 	{
-#ifdef MCL_EC_USE_AFFINE
-		return inf_;
-#else
 		return z.isZero();
-#endif
 	}
 	static inline bool isMSBserialize()
 	{
@@ -772,9 +737,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 				cybozu::writeChar(pb, os, sep);
 				if (!*pb) return;
 			}
-#ifndef MCL_EC_USE_AFFINE
 			z.save(pb, os, ioMode);
-#endif
 			return;
 		}
 		EcT P(*this);
@@ -874,11 +837,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	template<class InputStream>
 	void load(bool *pb, InputStream& is, int ioMode)
 	{
-#ifdef MCL_EC_USE_AFFINE
-		inf_ = false;
-#else
 		z = 1;
-#endif
 		if (ioMode & IoEcAffineSerialize) {
 			if (b_ == 0) { // assume Zero if x = y = 0
 				*pb = false;
@@ -983,9 +942,13 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 				if (!*pb) return;
 			} else if (c == '4') {
 				y.load(pb, is, ioMode); if (!*pb) return;
-#ifndef MCL_EC_USE_AFFINE
 				z.load(pb, is, ioMode); if (!*pb) return;
-#endif
+				if (mode_ == ec::Affine) {
+					if (!z.isZero() && !z.isOne()) {
+						*pb = false;
+						return;
+					}
+				}
 			} else {
 				*pb = false;
 				return;
@@ -1369,9 +1332,7 @@ template<class Fp> bool EcT<Fp>::verifyOrder_;
 template<class Fp> mpz_class EcT<Fp>::order_;
 template<class Fp> void (*EcT<Fp>::mulArrayGLV)(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime);
 template<class Fp> size_t (*EcT<Fp>::mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn);
-#ifndef MCL_EC_USE_AFFINE
 template<class Fp> int EcT<Fp>::mode_;
-#endif
 
 // r = the order of Ec
 template<class Ec, class _Fr>
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index ea927a16..27c5a297 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -1,8 +1,8 @@
-//#define MCL_EC_USE_AFFINE
 #define PUT(x) std::cout << #x "=" << (x) << std::endl
 #define CYBOZU_TEST_DISABLE_AUTO_RUN
 #include <cybozu/test.hpp>
 #include <cybozu/benchmark.hpp>
+#include <cybozu/xorshift.hpp>
 #include <mcl/gmp_util.hpp>
 
 #include <mcl/fp.hpp>
@@ -19,11 +19,7 @@ typedef mcl::EcT<Fp> Ec;
 CYBOZU_TEST_AUTO(sizeof)
 {
 	CYBOZU_TEST_EQUAL(sizeof(Fp), sizeof(mcl::fp::Unit) * Fp::maxSize);
-#ifdef MCL_EC_USE_AFFINE
-	CYBOZU_TEST_EQUAL(sizeof(Ec), sizeof(Fp) * 2 + sizeof(mcl::fp::Unit));
-#else
 	CYBOZU_TEST_EQUAL(sizeof(Ec), sizeof(Fp) * 3);
-#endif
 }
 
 struct Test {
@@ -96,9 +92,9 @@ struct Test {
 
 		{
 			Ec::dbl(R, P);
-#ifndef MCL_EC_USE_AFFINE
-			CYBOZU_TEST_ASSERT(!R.isNormalized());
-#endif
+			if (Ec::getMode() != mcl::ec::Affine) {
+				CYBOZU_TEST_ASSERT(!R.isNormalized());
+			}
 			CYBOZU_TEST_ASSERT(R.isValid());
 			Ec R2 = P + P;
 			CYBOZU_TEST_EQUAL(R, R2);
@@ -162,13 +158,17 @@ struct Test {
 			Ec R2;
 			P += P;
 			Q += P;
-			CYBOZU_TEST_ASSERT(!P.z.isOne());
-			CYBOZU_TEST_ASSERT(!Q.z.isOne());
+			if (Ec::getMode() == mcl::ec::Affine) {
+				CYBOZU_TEST_ASSERT(P.z.isOne());
+				CYBOZU_TEST_ASSERT(Q.z.isOne());
+			} else {
+				CYBOZU_TEST_ASSERT(!P.z.isOne());
+				CYBOZU_TEST_ASSERT(!Q.z.isOne());
+			}
 			Ec::add(R2, P, Q);
 
 			P.normalize();
 			CYBOZU_TEST_ASSERT(P.z.isOne());
-			CYBOZU_TEST_ASSERT(!Q.z.isOne());
 			// affine + generic
 			Ec::add(R, P, Q);
 			CYBOZU_TEST_EQUAL(R, R2);
@@ -177,14 +177,17 @@ struct Test {
 			CYBOZU_TEST_EQUAL(R, R2);
 
 			Q.normalize();
-			CYBOZU_TEST_ASSERT(P.z.isOne());
 			CYBOZU_TEST_ASSERT(Q.z.isOne());
 			// affine + affine
 			Ec::add(R, P, Q);
 			CYBOZU_TEST_EQUAL(R, R2);
 
 			P += P;
-			CYBOZU_TEST_ASSERT(!P.z.isOne());
+			if (Ec::getMode() == mcl::ec::Affine) {
+				CYBOZU_TEST_ASSERT(P.z.isOne());
+			} else {
+				CYBOZU_TEST_ASSERT(!P.z.isOne());
+			}
 			// generic
 			Ec::dbl(R2, P);
 
@@ -546,6 +549,7 @@ void naiveMulVec(Ec& out, const Ec *xVec, const Zn *yVec, size_t n)
 void mulVec(const mcl::EcParam& para)
 {
 	if (para.bitSize > 384) return;
+	cybozu::XorShift rg;
 	const Fp x(para.gx);
 	const Fp y(para.gy);
 	Ec P(x, y);
@@ -558,7 +562,7 @@ void mulVec(const mcl::EcParam& para)
 	Ec::dbl(P, P);
 	for (size_t i = 0; i < N; i++) {
 		Ec::mul(xVec[i], P, i + 3);
-		yVec[i].setByCSPRNG();
+		yVec[i].setByCSPRNG(rg);
 	}
 	const size_t nTbl[] = { 1, 2, 3, 5, 30, 31, 32, 33 };
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) {
@@ -582,6 +586,8 @@ void test_sub_sub(const mcl::EcParam& para, mcl::fp::Mode fpMode)
 	Test(para, fpMode, mcl::ec::Proj).run();
 	puts("Jacobi");
 	Test(para, fpMode, mcl::ec::Jacobi).run();
+	puts("Affine");
+	Test(para, fpMode, mcl::ec::Affine).run();
 }
 
 void test_sub(const mcl::EcParam *para, size_t paraNum)
@@ -595,8 +601,8 @@ void test_sub(const mcl::EcParam *para, size_t paraNum)
 #endif
 #ifdef MCL_USE_XBYAK
 		test_sub_sub(para[i], mcl::fp::FP_XBYAK);
-		mulVec(para[i]);
 #endif
+		mulVec(para[i]);
 	}
 }
 
@@ -606,7 +612,7 @@ CYBOZU_TEST_AUTO(all)
 {
 	if (g_partial & (1 << 3)) {
 		const struct mcl::EcParam para3[] = {
-	//		mcl::ecparam::p160_1,
+//			mcl::ecparam::p160_1,
 			mcl::ecparam::secp160k1,
 			mcl::ecparam::secp192k1,
 			mcl::ecparam::NIST_P192,
@@ -627,7 +633,7 @@ CYBOZU_TEST_AUTO(all)
 #if MCL_MAX_BIT_SIZE >= 384
 	if (g_partial & (1 << 6)) {
 		const struct mcl::EcParam para6[] = {
-	//		mcl::ecparam::secp384r1,
+//			mcl::ecparam::secp384r1,
 			mcl::ecparam::NIST_P384,
 		};
 		test_sub(para6, CYBOZU_NUM_OF_ARRAY(para6));

From 88688c210cc2039353cee9da8a4973b55c46f269 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 8 Apr 2020 17:03:08 +0900
Subject: [PATCH 180/553] change name of test var

---
 test/she_test.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/test/she_test.cpp b/test/she_test.cpp
index cec65f27..f2651d19 100644
--- a/test/she_test.cpp
+++ b/test/she_test.cpp
@@ -666,9 +666,9 @@ CYBOZU_TEST_AUTO(hashBench)
 	CYBOZU_BENCH_C("finalExp", C, finalExp, e, e);
 	CYBOZU_BENCH_C("precomML", C, precomputedMillerLoop, e, P, SHE::Qcoeff_);
 
-	CipherTextG1 c1;
-	CipherTextG2 c2;
-	CipherTextGT ct;
+	CipherTextG1 c1, c11;
+	CipherTextG2 c2, c21;
+	CipherTextGT ct, ct1;
 
 	int m = int(hashSize - 1);
 	printf("small m = %d\n", m);
@@ -695,9 +695,12 @@ CYBOZU_TEST_AUTO(hashBench)
 	CYBOZU_BENCH_C("CT:mulML", C, CipherTextGT::mulML, ct, c1, c2);
 	CYBOZU_BENCH_C("CT:finalExp", C, CipherTextGT::finalExp, ct, ct);
 
-	CYBOZU_BENCH_C("addG1   ", C, CipherTextG1::add, c1, c1, c1);
-	CYBOZU_BENCH_C("addG2   ", C, CipherTextG2::add, c2, c2, c2);
-	CYBOZU_BENCH_C("addGT   ", C, CipherTextGT::add, ct, ct, ct);
+	c11 = c1;
+	c21 = c2;
+	ct1 = ct;
+	CYBOZU_BENCH_C("addG1   ", C, CipherTextG1::add, c1, c1, c11);
+	CYBOZU_BENCH_C("addG2   ", C, CipherTextG2::add, c2, c2, c21);
+	CYBOZU_BENCH_C("addGT   ", C, CipherTextGT::add, ct, ct, ct1);
 	CYBOZU_BENCH_C("reRandG1", C, pub.reRand, c1);
 	CYBOZU_BENCH_C("reRandG2", C, pub.reRand, c2);
 	CYBOZU_BENCH_C("reRandGT", C, pub.reRand, ct);

From a03605c57f3b9058f822e35d45a9b6e53653e341 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 9 Apr 2020 14:21:20 +0900
Subject: [PATCH 181/553] mov ec operation to global

---
 include/mcl/ec.hpp | 754 ++++++++++++++++++++++++---------------------
 1 file changed, 403 insertions(+), 351 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index ce630535..31c4c96b 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -43,32 +43,38 @@ enum Mode {
 	Affine
 };
 
+enum ModeCoeffA {
+	zero,
+	minus3,
+	generic
+};
+
 namespace local {
 
 // x is negative <=> x < half(:=(p+1)/2) <=> a = 1
-template<class Fp>
-bool get_a_flag(const Fp& x)
+template<class F>
+bool get_a_flag(const F& x)
 {
 	return x.isNegative();
 }
 
 // Im(x) is negative <=> Im(x)  < half(:=(p+1)/2) <=> a = 1
 
-template<class Fp>
-bool get_a_flag(const mcl::Fp2T<Fp>& x)
+template<class F>
+bool get_a_flag(const mcl::Fp2T<F>& x)
 {
 	return get_a_flag(x.b); // x = a + bi
 }
 
 } // mcl::ec::local
 
-template<class T>
-void normalizeJacobi(T& x, T& y, T& z)
+template<class F>
+void normalizeJacobi(F& x, F& y, F& z)
 {
 	assert(!z.isZero());
-	T rz2;
-	T::inv(z, z);
-	T::sqr(rz2, z);
+	F rz2;
+	F::inv(z, z);
+	F::sqr(rz2, z);
 	x *= rz2;
 	y *= rz2;
 	y *= z;
@@ -76,15 +82,15 @@ void normalizeJacobi(T& x, T& y, T& z)
 }
 
 // Y^2 == X(X^2 + aZ^4) + bZ^6
-template<class T>
-bool isValidJacobi(const T& a, const T& b, const T& x, const T& y, const T& z)
+template<class F>
+bool isValidJacobi(const F& x, const F& y, const F& z, const F& a, const F& b)
 {
-	T y2, x2, z2, z4, t;
-	T::sqr(x2, x);
-	T::sqr(y2, y);
-	T::sqr(z2, z);
-	T::sqr(z4, z2);
-	T::mul(t, z4, a);
+	F y2, x2, z2, z4, t;
+	F::sqr(x2, x);
+	F::sqr(y2, y);
+	F::sqr(z2, z);
+	F::sqr(z4, z2);
+	F::mul(t, z4, a);
 	t += x2;
 	t *= x;
 	z4 *= z2;
@@ -93,25 +99,176 @@ bool isValidJacobi(const T& a, const T& b, const T& x, const T& y, const T& z)
 	return y2 == t;
 }
 
-template<class T>
-void normalizeProj(T& x, T& y, T& z)
+/*
+	   |a=0|-3| generic
+	sqr|  4| 6| 6
+	mul|  3| 3| 4
+	add| 12|13|13
+*/
+template<class E>
+void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
+{
+	typedef typename E::Fp F;
+	if (P.isZero()) {
+		R.clear();
+		return;
+	}
+	F S, M, t, y2;
+	F::sqr(y2, P.y);
+	F::mul(S, P.x, y2);
+	const bool isPzOne = P.z.isOne();
+	S += S;
+	S += S;
+	F::sqr(M, P.x);
+	switch (specialA) {
+	case zero:
+		F::add(t, M, M);
+		M += t;
+		break;
+	case minus3:
+		if (isPzOne) {
+			M -= P.z;
+		} else {
+			F::sqr(t, P.z);
+			F::sqr(t, t);
+			M -= t;
+		}
+		F::add(t, M, M);
+		M += t;
+		break;
+	case generic:
+	default:
+		if (isPzOne) {
+			t = a;
+		} else {
+			F::sqr(t, P.z);
+			F::sqr(t, t);
+			t *= a;
+		}
+		t += M;
+		M += M;
+		M += t;
+		break;
+	}
+	F::sqr(R.x, M);
+	R.x -= S;
+	R.x -= S;
+	if (isPzOne) {
+		R.z = P.y;
+	} else {
+		F::mul(R.z, P.y, P.z);
+	}
+	R.z += R.z;
+	F::sqr(y2, y2);
+	y2 += y2;
+	y2 += y2;
+	y2 += y2;
+	F::sub(R.y, S, R.x);
+	R.y *= M;
+	R.y -= y2;
+}
+
+/*
+	sqr|  4
+	mul| 12
+	add|  7
+*/
+template<class E>
+void addJacobi(E& R, const E& P, const E& Q, int specialA, const typename E::Fp& a)
+{
+	typedef typename E::Fp F;
+	if (P.isZero()) { R = Q; return; }
+	if (Q.isZero()) { R = P; return; }
+	bool isPzOne = P.z.isOne();
+	bool isQzOne = Q.z.isOne();
+	F r, U1, S1, H, H3;
+	if (isPzOne) {
+		// r = 1;
+	} else {
+		F::sqr(r, P.z);
+	}
+	if (isQzOne) {
+		U1 = P.x;
+		if (isPzOne) {
+			H = Q.x;
+		} else {
+			F::mul(H, Q.x, r);
+		}
+		H -= U1;
+		S1 = P.y;
+	} else {
+		F::sqr(S1, Q.z);
+		F::mul(U1, P.x, S1);
+		if (isPzOne) {
+			H = Q.x;
+		} else {
+			F::mul(H, Q.x, r);
+		}
+		H -= U1;
+		S1 *= Q.z;
+		S1 *= P.y;
+	}
+	if (isPzOne) {
+		r = Q.y;
+	} else {
+		r *= P.z;
+		r *= Q.y;
+	}
+	r -= S1;
+	if (H.isZero()) {
+		if (r.isZero()) {
+			ec::dblJacobi(R, P, specialA, a);
+		} else {
+			R.clear();
+		}
+		return;
+	}
+	if (isPzOne) {
+		if (isQzOne) {
+			R.z = H;
+		} else {
+			F::mul(R.z, H, Q.z);
+		}
+	} else {
+		if (isQzOne) {
+			F::mul(R.z, P.z, H);
+		} else {
+			F::mul(R.z, P.z, Q.z);
+			R.z *= H;
+		}
+	}
+	F::sqr(H3, H); // H^2
+	F::sqr(R.y, r); // r^2
+	U1 *= H3; // U1 H^2
+	H3 *= H; // H^3
+	R.y -= U1;
+	R.y -= U1;
+	F::sub(R.x, R.y, H3);
+	U1 -= R.x;
+	U1 *= r;
+	H3 *= S1;
+	F::sub(R.y, U1, H3);
+}
+
+template<class F>
+void normalizeProj(F& x, F& y, F& z)
 {
 	assert(!z.isZero());
-	T::inv(z, z);
+	F::inv(z, z);
 	x *= z;
 	y *= z;
 	z = 1;
 }
 
 // (Y^2 - bZ^2)Z = X(X^2 + aZ^2)
-template<class T>
-bool isValidProj(const T& a, const T& b, const T& x, const T& y, const T& z)
+template<class F>
+bool isValidProj(const F& x, const F& y, const F& z, const F& a, const F& b)
 {
-	T y2, x2, z2, t;
-	T::sqr(x2, x);
-	T::sqr(y2, y);
-	T::sqr(z2, z);
-	T::mul(t, a, z2);
+	F y2, x2, z2, t;
+	F::sqr(x2, x);
+	F::sqr(y2, y);
+	F::sqr(z2, z);
+	F::mul(t, a, z2);
 	t += x2;
 	t *= x;
 	z2 *= b;
@@ -120,19 +277,220 @@ bool isValidProj(const T& a, const T& b, const T& x, const T& y, const T& z)
 	return y2 == t;
 }
 
+/*
+	   |a=0|-3| generic
+	sqr|  4| 5| 5
+	mul|  8| 8| 9
+	add| 11|12|12
+*/
+template<class E>
+void dblProj(E& R, const E& P, int specialA, const typename E::Fp& a)
+{
+	typedef typename E::Fp F;
+	if (P.isZero()) {
+		R.clear();
+		return;
+	}
+	const bool isPzOne = P.z.isOne();
+	F w, t, h;
+	switch (specialA) {
+	case zero:
+		F::sqr(w, P.x);
+		F::add(t, w, w);
+		w += t;
+		break;
+	case minus3:
+		F::sqr(w, P.x);
+		if (isPzOne) {
+			w -= P.z;
+		} else {
+			F::sqr(t, P.z);
+			w -= t;
+		}
+		F::add(t, w, w);
+		w += t;
+		break;
+	case generic:
+	default:
+		if (isPzOne) {
+			w = a;
+		} else {
+			F::sqr(w, P.z);
+			w *= a;
+		}
+		F::sqr(t, P.x);
+		w += t;
+		w += t;
+		w += t; // w = a z^2 + 3x^2
+		break;
+	}
+	if (isPzOne) {
+		R.z = P.y;
+	} else {
+		F::mul(R.z, P.y, P.z); // s = yz
+	}
+	F::mul(t, R.z, P.x);
+	t *= P.y; // xys
+	t += t;
+	t += t; // 4(xys) ; 4B
+	F::sqr(h, w);
+	h -= t;
+	h -= t; // w^2 - 8B
+	F::mul(R.x, h, R.z);
+	t -= h; // h is free
+	t *= w;
+	F::sqr(w, P.y);
+	R.x += R.x;
+	R.z += R.z;
+	F::sqr(h, R.z);
+	w *= h;
+	R.z *= h;
+	F::sub(R.y, t, w);
+	R.y -= w;
+}
+
+/*
+	sqr|  2
+	mul| 12
+	add|  7
+*/
+template<class E>
+void addProj(E& R, const E& P, const E& Q, int specialA, const typename E::Fp& a)
+{
+	typedef typename E::Fp F;
+	if (P.isZero()) { R = Q; return; }
+	if (Q.isZero()) { R = P; return; }
+	bool isPzOne = P.z.isOne();
+	bool isQzOne = Q.z.isOne();
+	F r, PyQz, v, A, vv;
+	if (isQzOne) {
+		r = P.x;
+		PyQz = P.y;
+	} else {
+		F::mul(r, P.x, Q.z);
+		F::mul(PyQz, P.y, Q.z);
+	}
+	if (isPzOne) {
+		A = Q.y;
+		v = Q.x;
+	} else {
+		F::mul(A, Q.y, P.z);
+		F::mul(v, Q.x, P.z);
+	}
+	v -= r;
+	if (v.isZero()) {
+		if (A == PyQz) {
+			dblProj(R, P, specialA, a);
+		} else {
+			R.clear();
+		}
+		return;
+	}
+	F::sub(R.y, A, PyQz);
+	F::sqr(A, R.y);
+	F::sqr(vv, v);
+	r *= vv;
+	vv *= v;
+	if (isQzOne) {
+		R.z = P.z;
+	} else {
+		if (isPzOne) {
+			R.z = Q.z;
+		} else {
+			F::mul(R.z, P.z, Q.z);
+		}
+	}
+	// R.z = 1 if isPzOne && isQzOne
+	if (isPzOne && isQzOne) {
+		R.z = vv;
+	} else {
+		A *= R.z;
+		R.z *= vv;
+	}
+	A -= vv;
+	vv *= PyQz;
+	A -= r;
+	A -= r;
+	F::mul(R.x, v, A);
+	r -= A;
+	R.y *= r;
+	R.y -= vv;
+}
+
 // y^2 == (x^2 + a)x + b
-template<class T>
-bool isValidAffine(const T& a, const T& b, const T& x, const T& y)
+template<class F>
+bool isValidAffine(const F& x, const F& y, const F& a, const F& b)
 {
-	T y2, t;
-	T::sqr(y2, y);
-	T::sqr(t, x);
+	F y2, t;
+	F::sqr(y2, y);
+	F::sqr(t, x);
 	t += a;
 	t *= x;
 	t += b;
 	return y2 == t;
 }
 
+// y^2 = x^3 + ax + b
+template<class E>
+static inline void dblAffine(E& R, const E& P, const typename E::Fp& a)
+{
+	typedef typename E::Fp F;
+	if (P.isZero()) {
+		R.clear();
+		return;
+	}
+	if (P.y.isZero()) {
+		R.clear();
+		return;
+	}
+	F t, s;
+	F::sqr(t, P.x);
+	F::add(s, t, t);
+	t += s;
+	t += a;
+	F::add(s, P.y, P.y);
+	t /= s;
+	F::sqr(s, t);
+	s -= P.x;
+	F x3;
+	F::sub(x3, s, P.x);
+	F::sub(s, P.x, x3);
+	s *= t;
+	F::sub(R.y, s, P.y);
+	R.x = x3;
+	R.z = 1;
+}
+
+template<class E>
+void addAffine(E& R, const E& P, const E& Q, const typename E::Fp& a)
+{
+	typedef typename E::Fp F;
+	if (P.isZero()) { R = Q; return; }
+	if (Q.isZero()) { R = P; return; }
+	F t;
+	F::sub(t, Q.x, P.x);
+	if (t.isZero()) {
+		if (P.y == Q.y) {
+			dblAffine(R, P, a);
+		} else {
+			R.clear();
+		}
+		return;
+	}
+	F s;
+	F::sub(s, Q.y, P.y);
+	F::div(t, s, t);
+	R.z = 1;
+	F x3;
+	F::sqr(x3, t);
+	x3 -= P.x;
+	x3 -= Q.x;
+	F::sub(s, P.x, x3);
+	s *= t;
+	F::sub(R.y, s, P.y);
+	R.x = x3;
+}
+
 } // mcl::ec
 
 /*
@@ -142,11 +500,6 @@ bool isValidAffine(const T& a, const T& b, const T& x, const T& y)
 */
 template<class _Fp>
 class EcT : public fp::Serializable<EcT<_Fp> > {
-	enum {
-		zero,
-		minus3,
-		generic
-	};
 public:
 	typedef _Fp Fp;
 	typedef _Fp BaseFp;
@@ -185,15 +538,15 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 	bool isValidJacobi() const
 	{
-		return ec::isValidJacobi(a_, b_, x, y, z);
+		return ec::isValidJacobi(x, y, z, a_, b_);
 	}
 	bool isValidProj() const
 	{
-		return ec::isValidProj(a_, b_, x, y, z);
+		return ec::isValidProj(x, y, z, a_, b_);
 	}
 	bool isValidAffine() const
 	{
-		return ec::isValidAffine(a_, b_, x, y);
+		return ec::isValidAffine(x, y, a_, b_);
 	}
 public:
 	void normalize()
@@ -218,11 +571,11 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		a_ = a;
 		b_ = b;
 		if (a_.isZero()) {
-			specialA_ = zero;
+			specialA_ = ec::zero;
 		} else if (a_ == -3) {
-			specialA_ = minus3;
+			specialA_ = ec::minus3;
 		} else {
-			specialA_ = generic;
+			specialA_ = ec::generic;
 		}
 		ioMode_ = 0;
 		verifyOrder_ = false;
@@ -302,331 +655,30 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		y.clear();
 		z.clear();
 	}
-	static inline void dblNoVerifyInfJacobi(EcT& R, const EcT& P)
-	{
-		Fp S, M, t, y2;
-		Fp::sqr(y2, P.y);
-		Fp::mul(S, P.x, y2);
-		const bool isPzOne = P.z.isOne();
-		S += S;
-		S += S;
-		Fp::sqr(M, P.x);
-		switch (specialA_) {
-		case zero:
-			Fp::add(t, M, M);
-			M += t;
-			break;
-		case minus3:
-			if (isPzOne) {
-				M -= P.z;
-			} else {
-				Fp::sqr(t, P.z);
-				Fp::sqr(t, t);
-				M -= t;
-			}
-			Fp::add(t, M, M);
-			M += t;
-			break;
-		case generic:
-		default:
-			if (isPzOne) {
-				t = a_;
-			} else {
-				Fp::sqr(t, P.z);
-				Fp::sqr(t, t);
-				t *= a_;
-			}
-			t += M;
-			M += M;
-			M += t;
-			break;
-		}
-		Fp::sqr(R.x, M);
-		R.x -= S;
-		R.x -= S;
-		if (isPzOne) {
-			R.z = P.y;
-		} else {
-			Fp::mul(R.z, P.y, P.z);
-		}
-		R.z += R.z;
-		Fp::sqr(y2, y2);
-		y2 += y2;
-		y2 += y2;
-		y2 += y2;
-		Fp::sub(R.y, S, R.x);
-		R.y *= M;
-		R.y -= y2;
-	}
-	static inline void dblNoVerifyInfProj(EcT& R, const EcT& P)
-	{
-		const bool isPzOne = P.z.isOne();
-		Fp w, t, h;
-		switch (specialA_) {
-		case zero:
-			Fp::sqr(w, P.x);
-			Fp::add(t, w, w);
-			w += t;
-			break;
-		case minus3:
-			Fp::sqr(w, P.x);
-			if (isPzOne) {
-				w -= P.z;
-			} else {
-				Fp::sqr(t, P.z);
-				w -= t;
-			}
-			Fp::add(t, w, w);
-			w += t;
-			break;
-		case generic:
-		default:
-			if (isPzOne) {
-				w = a_;
-			} else {
-				Fp::sqr(w, P.z);
-				w *= a_;
-			}
-			Fp::sqr(t, P.x);
-			w += t;
-			w += t;
-			w += t; // w = a z^2 + 3x^2
-			break;
-		}
-		if (isPzOne) {
-			R.z = P.y;
-		} else {
-			Fp::mul(R.z, P.y, P.z); // s = yz
-		}
-		Fp::mul(t, R.z, P.x);
-		t *= P.y; // xys
-		t += t;
-		t += t; // 4(xys) ; 4B
-		Fp::sqr(h, w);
-		h -= t;
-		h -= t; // w^2 - 8B
-		Fp::mul(R.x, h, R.z);
-		t -= h; // h is free
-		t *= w;
-		Fp::sqr(w, P.y);
-		R.x += R.x;
-		R.z += R.z;
-		Fp::sqr(h, R.z);
-		w *= h;
-		R.z *= h;
-		Fp::sub(R.y, t, w);
-		R.y -= w;
-	}
-	static inline void dblNoVerifyInfAffine(EcT& R, const EcT& P)
-	{
-		Fp t, s;
-		Fp::sqr(t, P.x);
-		Fp::add(s, t, t);
-		t += s;
-		t += a_;
-		Fp::add(s, P.y, P.y);
-		t /= s;
-		Fp::sqr(s, t);
-		s -= P.x;
-		Fp x3;
-		Fp::sub(x3, s, P.x);
-		Fp::sub(s, P.x, x3);
-		s *= t;
-		Fp::sub(R.y, s, P.y);
-		R.x = x3;
-		R.z = 1;
-	}
-	static inline void dblNoVerifyInf(EcT& R, const EcT& P)
+	static inline void dbl(EcT& R, const EcT& P)
 	{
 		switch (mode_) {
 		case ec::Jacobi:
-			dblNoVerifyInfJacobi(R, P);
+			ec::dblJacobi(R, P, specialA_, a_);
 			break;
 		case ec::Proj:
-			dblNoVerifyInfProj(R, P);
+			ec::dblProj(R, P, specialA_, a_);
 			break;
 		case ec::Affine:
-			dblNoVerifyInfAffine(R, P);
+			ec::dblAffine(R, P, a_);
 			break;
 		}
 	}
-	static inline void dbl(EcT& R, const EcT& P)
-	{
-		if (P.isZero()) {
-			R.clear();
-			return;
-		}
-		dblNoVerifyInf(R, P);
-	}
-	static inline void addJacobi(EcT& R, const EcT& P, const EcT& Q, bool isPzOne, bool isQzOne)
-	{
-		Fp r, U1, S1, H, H3;
-		if (isPzOne) {
-			// r = 1;
-		} else {
-			Fp::sqr(r, P.z);
-		}
-		if (isQzOne) {
-			U1 = P.x;
-			if (isPzOne) {
-				H = Q.x;
-			} else {
-				Fp::mul(H, Q.x, r);
-			}
-			H -= U1;
-			S1 = P.y;
-		} else {
-			Fp::sqr(S1, Q.z);
-			Fp::mul(U1, P.x, S1);
-			if (isPzOne) {
-				H = Q.x;
-			} else {
-				Fp::mul(H, Q.x, r);
-			}
-			H -= U1;
-			S1 *= Q.z;
-			S1 *= P.y;
-		}
-		if (isPzOne) {
-			r = Q.y;
-		} else {
-			r *= P.z;
-			r *= Q.y;
-		}
-		r -= S1;
-		if (H.isZero()) {
-			if (r.isZero()) {
-				dblNoVerifyInf(R, P);
-			} else {
-				R.clear();
-			}
-			return;
-		}
-		if (isPzOne) {
-			if (isQzOne) {
-				R.z = H;
-			} else {
-				Fp::mul(R.z, H, Q.z);
-			}
-		} else {
-			if (isQzOne) {
-				Fp::mul(R.z, P.z, H);
-			} else {
-				Fp::mul(R.z, P.z, Q.z);
-				R.z *= H;
-			}
-		}
-		Fp::sqr(H3, H); // H^2
-		Fp::sqr(R.y, r); // r^2
-		U1 *= H3; // U1 H^2
-		H3 *= H; // H^3
-		R.y -= U1;
-		R.y -= U1;
-		Fp::sub(R.x, R.y, H3);
-		U1 -= R.x;
-		U1 *= r;
-		H3 *= S1;
-		Fp::sub(R.y, U1, H3);
-	}
-	static inline void addProj(EcT& R, const EcT& P, const EcT& Q, bool isPzOne, bool isQzOne)
-	{
-		Fp r, PyQz, v, A, vv;
-		if (isQzOne) {
-			r = P.x;
-			PyQz = P.y;
-		} else {
-			Fp::mul(r, P.x, Q.z);
-			Fp::mul(PyQz, P.y, Q.z);
-		}
-		if (isPzOne) {
-			A = Q.y;
-			v = Q.x;
-		} else {
-			Fp::mul(A, Q.y, P.z);
-			Fp::mul(v, Q.x, P.z);
-		}
-		v -= r;
-		if (v.isZero()) {
-			if (A == PyQz) {
-				dblNoVerifyInf(R, P);
-			} else {
-				R.clear();
-			}
-			return;
-		}
-		Fp::sub(R.y, A, PyQz);
-		Fp::sqr(A, R.y);
-		Fp::sqr(vv, v);
-		r *= vv;
-		vv *= v;
-		if (isQzOne) {
-			R.z = P.z;
-		} else {
-			if (isPzOne) {
-				R.z = Q.z;
-			} else {
-				Fp::mul(R.z, P.z, Q.z);
-			}
-		}
-		// R.z = 1 if isPzOne && isQzOne
-		if (isPzOne && isQzOne) {
-			R.z = vv;
-		} else {
-			A *= R.z;
-			R.z *= vv;
-		}
-		A -= vv;
-		vv *= PyQz;
-		A -= r;
-		A -= r;
-		Fp::mul(R.x, v, A);
-		r -= A;
-		R.y *= r;
-		R.y -= vv;
-	}
-	static inline void addAffine(EcT& R, const EcT& P, const EcT& Q)
-	{
-		Fp t;
-		Fp::sub(t, Q.x, P.x);
-		if (t.isZero()) {
-			if (P.y == Q.y) {
-				dblNoVerifyInf(R, P);
-			} else {
-				R.clear();
-			}
-			return;
-		}
-		Fp s;
-		Fp::sub(s, Q.y, P.y);
-		Fp::div(t, s, t);
-		R.z = 1;
-		Fp x3;
-		Fp::sqr(x3, t);
-		x3 -= P.x;
-		x3 -= Q.x;
-		Fp::sub(s, P.x, x3);
-		s *= t;
-		Fp::sub(R.y, s, P.y);
-		R.x = x3;
-	}
 	static inline void add(EcT& R, const EcT& P, const EcT& Q) {
-		if (P.isZero()) { R = Q; return; }
-		if (Q.isZero()) { R = P; return; }
-		if (&P == &Q) {
-			dblNoVerifyInf(R, P);
-			return;
-		}
-		bool isPzOne = P.z.isOne();
-		bool isQzOne = Q.z.isOne();
 		switch (mode_) {
 		case ec::Jacobi:
-			addJacobi(R, P, Q, isPzOne, isQzOne);
+			ec::addJacobi(R, P, Q, specialA_, a_);
 			break;
 		case ec::Proj:
-			addProj(R, P, Q, isPzOne, isQzOne);
+			ec::addProj(R, P, Q, specialA_, a_);
 			break;
 		case ec::Affine:
-			addAffine(R, P, Q);
+			ec::addAffine(R, P, Q, a_);
 			break;
 		}
 	}

From 75b1a8adc7e6857023fa1c4b22ec797e33992da7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 9 Apr 2020 17:09:31 +0900
Subject: [PATCH 182/553] rename tag name of special a

---
 include/mcl/ec.hpp | 49 +++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 31c4c96b..0444683d 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -44,9 +44,9 @@ enum Mode {
 };
 
 enum ModeCoeffA {
-	zero,
-	minus3,
-	generic
+	Zero,
+	Minus3,
+	GenericA
 };
 
 namespace local {
@@ -121,11 +121,11 @@ void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
 	S += S;
 	F::sqr(M, P.x);
 	switch (specialA) {
-	case zero:
+	case Zero:
 		F::add(t, M, M);
 		M += t;
 		break;
-	case minus3:
+	case Minus3:
 		if (isPzOne) {
 			M -= P.z;
 		} else {
@@ -136,7 +136,7 @@ void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
 		F::add(t, M, M);
 		M += t;
 		break;
-	case generic:
+	case GenericA:
 	default:
 		if (isPzOne) {
 			t = a;
@@ -173,10 +173,9 @@ void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
 	mul| 12
 	add|  7
 */
-template<class E>
-void addJacobi(E& R, const E& P, const E& Q, int specialA, const typename E::Fp& a)
+template<class E, class F>
+void addJacobi(E& R, const E& P, const E& Q, int specialA, const F& a)
 {
-	typedef typename E::Fp F;
 	if (P.isZero()) { R = Q; return; }
 	if (Q.isZero()) { R = P; return; }
 	bool isPzOne = P.z.isOne();
@@ -283,10 +282,9 @@ bool isValidProj(const F& x, const F& y, const F& z, const F& a, const F& b)
 	mul|  8| 8| 9
 	add| 11|12|12
 */
-template<class E>
-void dblProj(E& R, const E& P, int specialA, const typename E::Fp& a)
+template<class E, class F>
+void dblProj(E& R, const E& P, int specialA, const F& a)
 {
-	typedef typename E::Fp F;
 	if (P.isZero()) {
 		R.clear();
 		return;
@@ -294,12 +292,12 @@ void dblProj(E& R, const E& P, int specialA, const typename E::Fp& a)
 	const bool isPzOne = P.z.isOne();
 	F w, t, h;
 	switch (specialA) {
-	case zero:
+	case Zero:
 		F::sqr(w, P.x);
 		F::add(t, w, w);
 		w += t;
 		break;
-	case minus3:
+	case Minus3:
 		F::sqr(w, P.x);
 		if (isPzOne) {
 			w -= P.z;
@@ -310,7 +308,7 @@ void dblProj(E& R, const E& P, int specialA, const typename E::Fp& a)
 		F::add(t, w, w);
 		w += t;
 		break;
-	case generic:
+	case GenericA:
 	default:
 		if (isPzOne) {
 			w = a;
@@ -354,10 +352,9 @@ void dblProj(E& R, const E& P, int specialA, const typename E::Fp& a)
 	mul| 12
 	add|  7
 */
-template<class E>
-void addProj(E& R, const E& P, const E& Q, int specialA, const typename E::Fp& a)
+template<class E, class F>
+void addProj(E& R, const E& P, const E& Q, int specialA, const F& a)
 {
-	typedef typename E::Fp F;
 	if (P.isZero()) { R = Q; return; }
 	if (Q.isZero()) { R = P; return; }
 	bool isPzOne = P.z.isOne();
@@ -431,10 +428,9 @@ bool isValidAffine(const F& x, const F& y, const F& a, const F& b)
 }
 
 // y^2 = x^3 + ax + b
-template<class E>
-static inline void dblAffine(E& R, const E& P, const typename E::Fp& a)
+template<class E, class F>
+static inline void dblAffine(E& R, const E& P, const F& a)
 {
-	typedef typename E::Fp F;
 	if (P.isZero()) {
 		R.clear();
 		return;
@@ -461,10 +457,9 @@ static inline void dblAffine(E& R, const E& P, const typename E::Fp& a)
 	R.z = 1;
 }
 
-template<class E>
-void addAffine(E& R, const E& P, const E& Q, const typename E::Fp& a)
+template<class E, class F>
+void addAffine(E& R, const E& P, const E& Q, const F& a)
 {
-	typedef typename E::Fp F;
 	if (P.isZero()) { R = Q; return; }
 	if (Q.isZero()) { R = P; return; }
 	F t;
@@ -571,11 +566,11 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		a_ = a;
 		b_ = b;
 		if (a_.isZero()) {
-			specialA_ = ec::zero;
+			specialA_ = ec::Zero;
 		} else if (a_ == -3) {
-			specialA_ = ec::minus3;
+			specialA_ = ec::Minus3;
 		} else {
-			specialA_ = ec::generic;
+			specialA_ = ec::GenericA;
 		}
 		ioMode_ = 0;
 		verifyOrder_ = false;

From 9c62b3d9640cce131901c56c199d67fc3f2679d3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 11 Apr 2020 11:14:41 +0900
Subject: [PATCH 183/553] refactor ec

---
 include/mcl/ec.hpp         | 158 +++++++++++++++++++++++++++----------
 include/mcl/mapto_wb19.hpp |  64 +++++++++------
 test/ec_test.cpp           |   2 +-
 3 files changed, 159 insertions(+), 65 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 0444683d..a31cd6f6 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -71,9 +71,9 @@ bool get_a_flag(const mcl::Fp2T<F>& x)
 template<class F>
 void normalizeJacobi(F& x, F& y, F& z)
 {
-	assert(!z.isZero());
-	F rz2;
+	if (z.isZero()) return;
 	F::inv(z, z);
+	F rz2;
 	F::sqr(rz2, z);
 	x *= rz2;
 	y *= rz2;
@@ -81,6 +81,23 @@ void normalizeJacobi(F& x, F& y, F& z)
 	z = 1;
 }
 
+// (x/z^2, y/z^3)
+template<class F>
+bool isEqualJacobi(const F& x1, const F& y1, const F& z1, const F& x2, const F& y2, const F& z2)
+{
+	F s1, s2, t1, t2;
+	F::sqr(s1, z1);
+	F::sqr(s2, z2);
+	F::mul(t1, x1, s2);
+	F::mul(t2, x2, s1);
+	if (t1 != t2) return false;
+	F::mul(t1, y1, s2);
+	F::mul(t2, y2, s1);
+	t1 *= z2;
+	t2 *= z1;
+	return t1 == t2;
+}
+
 // Y^2 == X(X^2 + aZ^4) + bZ^6
 template<class F>
 bool isValidJacobi(const F& x, const F& y, const F& z, const F& a, const F& b)
@@ -100,10 +117,9 @@ bool isValidJacobi(const F& x, const F& y, const F& z, const F& a, const F& b)
 }
 
 /*
-	   |a=0|-3| generic
-	sqr|  4| 6| 6
-	mul|  3| 3| 4
-	add| 12|13|13
+	a = 0   3M + 4S + 12A
+	a = -3  3M + 6S + 13A
+	generic 4M + 6S + 13A
 */
 template<class E>
 void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
@@ -113,6 +129,65 @@ void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
 		R.clear();
 		return;
 	}
+#if 0
+	// a = 0    M + 7S + 15A
+	// a = -3   M + 8S + 18A
+	// generic 2M + 8S + 16A
+	F x2, y2, y4, z2, s, m, t;
+    F::sqr(x2, P.x);
+    F::sqr(y2, P.y);
+    F::sqr(y4, y2);
+	const bool isPzOne = P.z.isOne();
+	if (isPzOne) {
+		z2 = P.z;
+	} else {
+	    F::sqr(z2, P.z);
+	}
+    F::add(s, P.x, y2);
+    F::sqr(s, s);
+    s -= x2;
+    s -= y4;
+    s += s;
+	F::add(m, x2, x2);
+	m += x2;
+	switch (specialA) {
+	case Zero:
+		break;
+	case Minus3:
+		if (isPzOne) {
+			t = z2;
+		} else {
+			F::sqr(t, z2);
+		}
+		m -= t;
+		m -= t;
+		m -= t;
+		break;
+	case GenericA:
+	default:
+		if (isPzOne) {
+			m += a;
+		} else {
+			F::sqr(t, z2);
+			t *= a;
+			m += t;
+		}
+		break;
+	}
+    F::sqr(t, m);
+    t -= s;
+    F::sub(R.x, t, s); // m^2 - 2s
+	F::add(R.z, P.y, P.z);
+	F::sqr(R.z, R.z);
+	R.z -= y2;
+	R.z -= z2;
+	F::sub(R.y, s, R.x);
+	R.y *= m;
+	F::add(t, y4, y4);
+	t += t;
+	t += t;
+	R.y -= t;
+#else
 	F S, M, t, y2;
 	F::sqr(y2, P.y);
 	F::mul(S, P.x, y2);
@@ -166,13 +241,10 @@ void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
 	F::sub(R.y, S, R.x);
 	R.y *= M;
 	R.y -= y2;
+#endif
 }
 
-/*
-	sqr|  4
-	mul| 12
-	add|  7
-*/
+// 7M + 4S + 7A
 template<class E, class F>
 void addJacobi(E& R, const E& P, const E& Q, int specialA, const F& a)
 {
@@ -252,7 +324,7 @@ void addJacobi(E& R, const E& P, const E& Q, int specialA, const F& a)
 template<class F>
 void normalizeProj(F& x, F& y, F& z)
 {
-	assert(!z.isZero());
+	if (z.isZero()) return;
 	F::inv(z, z);
 	x *= z;
 	y *= z;
@@ -276,10 +348,23 @@ bool isValidProj(const F& x, const F& y, const F& z, const F& a, const F& b)
 	return y2 == t;
 }
 
+// (x/z, y/z)
+template<class F>
+bool isEqualProj(const F& x1, const F& y1, const F& z1, const F& x2, const F& y2, const F& z2)
+{
+	F t1, t2;
+	F::mul(t1, x1, z2);
+	F::mul(t2, x2, z1);
+	if (t1 != t2) return false;
+	F::mul(t1, y1, z2);
+	F::mul(t2, y2, z1);
+	return t1 == t2;
+}
+
 /*
 	   |a=0|-3| generic
-	sqr|  4| 5| 5
 	mul|  8| 8| 9
+	sqr|  4| 5| 5
 	add| 11|12|12
 */
 template<class E, class F>
@@ -348,8 +433,8 @@ void dblProj(E& R, const E& P, int specialA, const F& a)
 }
 
 /*
-	sqr|  2
 	mul| 12
+	sqr|  2
 	add|  7
 */
 template<class E, class F>
@@ -523,22 +608,6 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 		return isZero() || z.isOne();
 	}
 private:
-	void normalizeJacobi()
-	{
-		ec::normalizeJacobi(x, y, z);
-	}
-	void normalizeProj()
-	{
-		ec::normalizeProj(x, y, z);
-	}
-	bool isValidJacobi() const
-	{
-		return ec::isValidJacobi(x, y, z, a_, b_);
-	}
-	bool isValidProj() const
-	{
-		return ec::isValidProj(x, y, z, a_, b_);
-	}
 	bool isValidAffine() const
 	{
 		return ec::isValidAffine(x, y, a_, b_);
@@ -546,13 +615,12 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 public:
 	void normalize()
 	{
-		if (isNormalized()) return;
 		switch (mode_) {
 		case ec::Jacobi:
-			normalizeJacobi();
+			ec::normalizeJacobi(x, y, z);
 			break;
 		case ec::Proj:
-			normalizeProj();
+			ec::normalizeProj(x, y, z);
 			break;
 		}
 	}
@@ -617,15 +685,15 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	}
 	bool isValid() const
 	{
-		if (isZero()) return true;
 		switch (mode_) {
 		case ec::Jacobi:
-			if (!isValidJacobi()) return false;
+			if (!ec::isValidJacobi(x, y, z, a_, b_)) return false;
 			break;
 		case ec::Proj:
-			if (!isValidProj()) return false;
+			if (!ec::isValidProj(x, y, z, a_, b_)) return false;
 			break;
 		case ec::Affine:
+			if (z.isZero()) return true;
 			if (!isValidAffine()) return false;
 			break;
 		}
@@ -898,7 +966,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 				z.clear();
 				return;
 			}
-			goto verifyValidness;
+			goto verifyValidAffine;
 		}
 		if (ioMode & (IoSerialize | IoSerializeHexStr)) {
 			const size_t n = Fp::getByteSize();
@@ -982,7 +1050,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 			x.load(pb, is, ioMode); if (!*pb) return;
 			if (c == '1') {
 				y.load(pb, is, ioMode); if (!*pb) return;
-				goto verifyValidness;
+				goto verifyValidAffine;
 			} else if (c == '2' || c == '3') {
 				bool isYodd = c == '3';
 				*pb = getYfromX(y, x, isYodd);
@@ -1008,7 +1076,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 			*pb = true;
 		}
 		return;
-	verifyValidness:
+	verifyValidAffine:
 		if (!isValidAffine()) {
 			*pb = false;
 			return;
@@ -1063,9 +1131,15 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	EcT operator-() const { EcT x; neg(x, *this); return x; }
 	bool operator==(const EcT& rhs) const
 	{
-		EcT R;
-		sub(R, *this, rhs); // QQQ : optimized later
-		return R.isZero();
+		switch (mode_) {
+		case ec::Jacobi:
+			return ec::isEqualJacobi(x, y, z, rhs.x, rhs.y, rhs.z);
+		case ec::Proj:
+			return ec::isEqualProj(x, y, z, rhs.x, rhs.y, rhs.z);
+		case ec::Affine:
+		default:
+			return x == rhs.x && y == rhs.y && z == rhs.z;
+		}
 	}
 	bool operator!=(const EcT& rhs) const { return !operator==(rhs); }
 	bool operator<(const EcT& rhs) const
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index b77810fc..98e94a98 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -37,6 +37,26 @@ inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, co
 	}
 }
 
+namespace local {
+
+template<class F>
+struct PointT {
+	typedef F Fp;
+	F x, y, z;
+	bool isZero() const
+	{
+		return z.isZero();
+	}
+	void clear()
+	{
+		x.clear();
+		y.clear();
+		z.clear();
+	}
+};
+
+} // mcl::local
+
 template<class Fp, class Fp2, class G2>
 struct MapToG2_WB19 {
 	Fp2 xi;
@@ -51,17 +71,11 @@ struct MapToG2_WB19 {
 	Fp2 ynum[4];
 	Fp2 yden[4];
 	int draftVersion_;
+	typedef local::PointT<Fp2> Point;
 	void setDraftVersion(int version)
 	{
 		draftVersion_ = version;
 	}
-	struct Point {
-		Fp2 x, y, z;
-		bool isZero() const
-		{
-			return z.isZero();
-		}
-	};
 	// should be merged into ec.hpp
 	template<class G>
 	void neg(G& Q, const G& P) const
@@ -70,7 +84,7 @@ struct MapToG2_WB19 {
 		Fp2::neg(Q.y, P.y);
 		Q.z = P.z;
 	}
-	// Jacobi
+	// Jacobi : sqr 4, mul 12, add 11
 	template<class G>
 	void add(G& R, const G& P, const G& Q) const
 	{
@@ -121,9 +135,13 @@ struct MapToG2_WB19 {
 		R.y -= S1;
 		R.y -= S1;
 	}
+	// jacobi : sqr 5, mul 2, add 14
 	template<class G>
-	void dbl(G& Q, const G& P) const
+	void dblT(G& Q, const G& P) const
 	{
+#if 0
+		ec::dblJacobi(Q, P, ec::GenericA, Ell2p_a);
+#else
 		Fp2 A, B, C, D, E, F;
 		Fp2::sqr(A, P.x);
 		Fp2::sqr(B, P.y);
@@ -151,22 +169,24 @@ struct MapToG2_WB19 {
 		C += C;
 		C += C;
 		Q.y -= C;
+#endif
+	}
+	void dbl(Point& Q, const Point& P) const
+	{
+		dblT(Q, P);
+	}
+	void dbl(G2& Q, const G2& P) const
+	{
+		dblT(Q, P);
 	}
 	// P is on y^2 = x^3 + Ell2p_a x + Ell2p_b
 	bool isValidPoint(const Point& P) const
 	{
-		Fp2 y2, x2, z2, z4, t;
-		Fp2::sqr(x2, P.x);
-		Fp2::sqr(y2, P.y);
-		Fp2::sqr(z2, P.z);
-		Fp2::sqr(z4, z2);
-		Fp2::mul(t, z4, Ell2p_a);
-		t += x2;
-		t *= P.x;
-		z4 *= z2;
-		z4 *= Ell2p_b;
-		t += z4;
-		return y2 == t;
+		return ec::isValidJacobi(P.x, P.y, P.z, Ell2p_a, Ell2p_b);
+	}
+	bool isValidPoint(const G2& P) const
+	{
+		return P.isValid();
 	}
 	void init()
 	{
@@ -617,7 +637,7 @@ struct MapToG2_WB19 {
 	void clear_h2(G2& Q, const G2& P) const
 	{
 #if 0
-		mcl::bn::BN::param.mapTo.mulByCofactorBLS12fast(Q, P);
+		bn::param.mapTo.mulByCofactorBLS12fast(Q, P);
 #else
 		G2 work, work2;
 		h2_chain(work, P);
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index 27c5a297..4c93e42b 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -612,7 +612,7 @@ CYBOZU_TEST_AUTO(all)
 {
 	if (g_partial & (1 << 3)) {
 		const struct mcl::EcParam para3[] = {
-//			mcl::ecparam::p160_1,
+			mcl::ecparam::p160_1,
 			mcl::ecparam::secp160k1,
 			mcl::ecparam::secp192k1,
 			mcl::ecparam::NIST_P192,

From 7e6ab212fbcc160de823fc21dd25e3bf363cd792 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 12 Apr 2020 10:36:13 +0900
Subject: [PATCH 184/553] refactor ec::dbl

---
 include/mcl/ec.hpp         | 108 +++++++++----------------------------
 include/mcl/mapto_wb19.hpp |  17 +++---
 test/mapto_wb19_test.cpp   |   6 +--
 3 files changed, 38 insertions(+), 93 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index a31cd6f6..0b9376b5 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -117,9 +117,9 @@ bool isValidJacobi(const F& x, const F& y, const F& z, const F& a, const F& b)
 }
 
 /*
-	a = 0   3M + 4S + 12A
-	a = -3  3M + 6S + 13A
-	generic 4M + 6S + 13A
+	a = 0   2M + 5S + 14A
+	a = -3  2M + 7S + 15A
+	generic 3M + 7S + 15A
 */
 template<class E>
 void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
@@ -129,87 +129,31 @@ void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
 		R.clear();
 		return;
 	}
-#if 0
-	// a = 0    M + 7S + 15A
-	// a = -3   M + 8S + 18A
-	// generic 2M + 8S + 16A
-	F x2, y2, y4, z2, s, m, t;
-    F::sqr(x2, P.x);
-    F::sqr(y2, P.y);
-    F::sqr(y4, y2);
 	const bool isPzOne = P.z.isOne();
-	if (isPzOne) {
-		z2 = P.z;
-	} else {
-	    F::sqr(z2, P.z);
-	}
-    F::add(s, P.x, y2);
-    F::sqr(s, s);
-    s -= x2;
-    s -= y4;
-    s += s;
-	F::add(m, x2, x2);
-	m += x2;
-	switch (specialA) {
-	case Zero:
-		break;
-	case Minus3:
-		if (isPzOne) {
-			t = z2;
-		} else {
-			F::sqr(t, z2);
-		}
-		m -= t;
-		m -= t;
-		m -= t;
-		break;
-	case GenericA:
-	default:
-		if (isPzOne) {
-			m += a;
-		} else {
-			F::sqr(t, z2);
-			t *= a;
-			m += t;
-		}
-		break;
-	}
-    F::sqr(t, m);
-    t -= s;
-    F::sub(R.x, t, s); // m^2 - 2s
-	F::add(R.z, P.y, P.z);
-	F::sqr(R.z, R.z);
-	R.z -= y2;
-	R.z -= z2;
-	F::sub(R.y, s, R.x);
-	R.y *= m;
-	F::add(t, y4, y4);
-	t += t;
-	t += t;
-	R.y -= t;
-#else
-	F S, M, t, y2;
+	F x2, y2, xy, t;
+	F::sqr(x2, P.x);
 	F::sqr(y2, P.y);
-	F::mul(S, P.x, y2);
-	const bool isPzOne = P.z.isOne();
-	S += S;
-	S += S;
-	F::sqr(M, P.x);
+	F::add(xy, P.x, y2);
+	F::sqr(y2, y2);
+	F::sqr(xy, xy);
+	xy -= x2;
+	xy -= y2;
+	xy += xy;
 	switch (specialA) {
 	case Zero:
-		F::add(t, M, M);
-		M += t;
+		F::add(t, x2, x2);
+		x2 += t;
 		break;
 	case Minus3:
 		if (isPzOne) {
-			M -= P.z;
+			x2 -= P.z;
 		} else {
 			F::sqr(t, P.z);
 			F::sqr(t, t);
-			M -= t;
+			x2 -= t;
 		}
-		F::add(t, M, M);
-		M += t;
+		F::add(t, x2, x2);
+		x2 += t;
 		break;
 	case GenericA:
 	default:
@@ -220,28 +164,26 @@ void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
 			F::sqr(t, t);
 			t *= a;
 		}
-		t += M;
-		M += M;
-		M += t;
+		t += x2;
+		x2 += x2;
+		x2 += t;
 		break;
 	}
-	F::sqr(R.x, M);
-	R.x -= S;
-	R.x -= S;
+	F::sqr(R.x, x2);
+	R.x -= xy;
+	R.x -= xy;
 	if (isPzOne) {
 		R.z = P.y;
 	} else {
 		F::mul(R.z, P.y, P.z);
 	}
 	R.z += R.z;
-	F::sqr(y2, y2);
+	F::sub(R.y, xy, R.x);
+	R.y *= x2;
 	y2 += y2;
 	y2 += y2;
 	y2 += y2;
-	F::sub(R.y, S, R.x);
-	R.y *= M;
 	R.y -= y2;
-#endif
 }
 
 // 7M + 4S + 7A
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 98e94a98..3c96d411 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -135,14 +135,14 @@ struct MapToG2_WB19 {
 		R.y -= S1;
 		R.y -= S1;
 	}
-	// jacobi : sqr 5, mul 2, add 14
+	// jacobi : 2M + 5S + 14A
 	template<class G>
 	void dblT(G& Q, const G& P) const
 	{
 #if 0
 		ec::dblJacobi(Q, P, ec::GenericA, Ell2p_a);
 #else
-		Fp2 A, B, C, D, E, F;
+		Fp2 A, B, C, D, e, f;
 		Fp2::sqr(A, P.x);
 		Fp2::sqr(B, P.y);
 		Fp2::sqr(C, B);
@@ -151,10 +151,10 @@ struct MapToG2_WB19 {
 		D -= A;
 		D -= C;
 		D += D;
-		Fp2::add(E, A, A);
-		E += A;
-		Fp2::sqr(F, E);
-		Fp2::sub(Q.x, F, D);
+		Fp2::add(e, A, A);
+		e += A;
+		Fp2::sqr(f, e);
+		Fp2::sub(Q.x, f, D);
 		Q.x -= D;
 		Fp2::mul(Q.z, P.y, P.z);
 		if (Q.z.isZero()) {
@@ -164,7 +164,7 @@ struct MapToG2_WB19 {
 		}
 		Q.z += Q.z;
 		Fp2::sub(Q.y, D, Q.x);
-		Q.y *= E;
+		Q.y *= e;
 		C += C;
 		C += C;
 		C += C;
@@ -178,6 +178,7 @@ struct MapToG2_WB19 {
 	void dbl(G2& Q, const G2& P) const
 	{
 		dblT(Q, P);
+//		G2::dbl(Q, P);
 	}
 	// P is on y^2 = x^3 + Ell2p_a x + Ell2p_b
 	bool isValidPoint(const Point& P) const
@@ -301,6 +302,7 @@ struct MapToG2_WB19 {
 	// refer (xnum, xden, ynum, yden)
 	void iso3(G2& Q, const Point& P) const
 	{
+//		assert(isValidPoint(P));
 		Fp2 zpows[3];
 		Fp2::sqr(zpows[0], P.z);
 		Fp2::sqr(zpows[1], zpows[0]);
@@ -321,6 +323,7 @@ struct MapToG2_WB19 {
 		Fp2::sqr(t, Q.z);
 		Fp2::mul(Q.y, mapvals[2], mapvals[1]);
 		Q.y *= t;
+//		assert(Q.isValid());
 	}
 	/*
 		(a+bi)*(-2-i) = (b-2a)-(a+2b)i
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index a8e8cbf4..0d69a561 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -688,9 +688,9 @@ void testHashToFp2v6(const T& mapto)
 		Fp2 out[2];
 		mapto.hashToFp2v6(out, msg, strlen(msg), dst, strlen(dst));
 		Fp2 expect[2];
-		for (int i = 0; i < 2; i++) {
-			set(expect[i], expectStr[i]);
-			CYBOZU_TEST_EQUAL(out[i], expect[i]);
+		for (int j = 0; j < 2; j++) {
+			set(expect[j], expectStr[j]);
+			CYBOZU_TEST_EQUAL(out[j], expect[j]);
 		}
 		if (i == 0) {
 			// from draft-irtf-cfrg-hash-to-curve/poc/vectors/BLS12381G2_XMD:SHA-256_SSWU_RO_.json.swp

From a0dcc763f8f4325e4ecf8be573f8fd5ad7076df6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 12 Apr 2020 18:18:31 +0900
Subject: [PATCH 185/553] refactor mapto

---
 include/mcl/ec.hpp         | 224 +++++++++++++++++++++----------------
 include/mcl/mapto_wb19.hpp | 108 +++++++++---------
 2 files changed, 185 insertions(+), 147 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 0b9376b5..20cc7ab7 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -51,6 +51,11 @@ enum ModeCoeffA {
 
 namespace local {
 
+/*
+	elliptic class E must have
+	member variables of type Fp x, y, z
+	static member a_, b_, specialA_
+*/
 // x is negative <=> x < half(:=(p+1)/2) <=> a = 1
 template<class F>
 bool get_a_flag(const F& x)
@@ -68,61 +73,69 @@ bool get_a_flag(const mcl::Fp2T<F>& x)
 
 } // mcl::ec::local
 
-template<class F>
-void normalizeJacobi(F& x, F& y, F& z)
+template<class E>
+void normalizeJacobi(E& P)
 {
-	if (z.isZero()) return;
-	F::inv(z, z);
+	typedef typename E::Fp F;
+	if (P.z.isZero()) return;
+	F::inv(P.z, P.z);
 	F rz2;
-	F::sqr(rz2, z);
-	x *= rz2;
-	y *= rz2;
-	y *= z;
-	z = 1;
+	F::sqr(rz2, P.z);
+	P.x *= rz2;
+	P.y *= rz2;
+	P.y *= P.z;
+	P.z = 1;
 }
 
 // (x/z^2, y/z^3)
-template<class F>
-bool isEqualJacobi(const F& x1, const F& y1, const F& z1, const F& x2, const F& y2, const F& z2)
+template<class E>
+bool isEqualJacobi(const E& P1, const E& P2)
 {
+	typedef typename E::Fp F;
 	F s1, s2, t1, t2;
-	F::sqr(s1, z1);
-	F::sqr(s2, z2);
-	F::mul(t1, x1, s2);
-	F::mul(t2, x2, s1);
+	F::sqr(s1, P1.z);
+	F::sqr(s2, P2.z);
+	F::mul(t1, P1.x, s2);
+	F::mul(t2, P2.x, s1);
 	if (t1 != t2) return false;
-	F::mul(t1, y1, s2);
-	F::mul(t2, y2, s1);
-	t1 *= z2;
-	t2 *= z1;
+	F::mul(t1, P1.y, s2);
+	F::mul(t2, P2.y, s1);
+	t1 *= P2.z;
+	t2 *= P1.z;
 	return t1 == t2;
 }
 
 // Y^2 == X(X^2 + aZ^4) + bZ^6
-template<class F>
-bool isValidJacobi(const F& x, const F& y, const F& z, const F& a, const F& b)
+template<class E>
+bool isValidJacobi(const E& P)
 {
+	typedef typename E::Fp F;
 	F y2, x2, z2, z4, t;
-	F::sqr(x2, x);
-	F::sqr(y2, y);
-	F::sqr(z2, z);
+	F::sqr(x2, P.x);
+	F::sqr(y2, P.y);
+	F::sqr(z2, P.z);
 	F::sqr(z4, z2);
-	F::mul(t, z4, a);
+	F::mul(t, z4, E::a_);
 	t += x2;
-	t *= x;
+	t *= P.x;
 	z4 *= z2;
-	z4 *= b;
+	z4 *= E::b_;
 	t += z4;
 	return y2 == t;
 }
 
 /*
+	M > S + A
 	a = 0   2M + 5S + 14A
 	a = -3  2M + 7S + 15A
 	generic 3M + 7S + 15A
+	M == S
+	a = 0   3M + 4S + 13A
+	a = -3  3M + 6S + 14A
+	generic 4M + 6S + 14A
 */
 template<class E>
-void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
+void dblJacobi(E& R, const E& P)
 {
 	typedef typename E::Fp F;
 	if (P.isZero()) {
@@ -133,13 +146,19 @@ void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
 	F x2, y2, xy, t;
 	F::sqr(x2, P.x);
 	F::sqr(y2, P.y);
-	F::add(xy, P.x, y2);
-	F::sqr(y2, y2);
-	F::sqr(xy, xy);
-	xy -= x2;
-	xy -= y2;
-	xy += xy;
-	switch (specialA) {
+	if (sizeof(F) <= 32) { // M == S
+		F::mul(xy, P.x, y2);
+		xy += xy;
+		F::sqr(y2, y2);
+	} else { // M > S + A
+		F::add(xy, P.x, y2);
+		F::sqr(y2, y2);
+		F::sqr(xy, xy);
+		xy -= x2;
+		xy -= y2;
+	}
+	xy += xy; // 4xy^2
+	switch (E::specialA_) {
 	case Zero:
 		F::add(t, x2, x2);
 		x2 += t;
@@ -158,11 +177,11 @@ void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
 	case GenericA:
 	default:
 		if (isPzOne) {
-			t = a;
+			t = E::a_;
 		} else {
 			F::sqr(t, P.z);
 			F::sqr(t, t);
-			t *= a;
+			t *= E::a_;
 		}
 		t += x2;
 		x2 += x2;
@@ -187,9 +206,10 @@ void dblJacobi(E& R, const E& P, int specialA, const typename E::Fp& a)
 }
 
 // 7M + 4S + 7A
-template<class E, class F>
-void addJacobi(E& R, const E& P, const E& Q, int specialA, const F& a)
+template<class E>
+void addJacobi(E& R, const E& P, const E& Q)
 {
+	typedef typename E::Fp F;
 	if (P.isZero()) { R = Q; return; }
 	if (Q.isZero()) { R = P; return; }
 	bool isPzOne = P.z.isOne();
@@ -230,7 +250,7 @@ void addJacobi(E& R, const E& P, const E& Q, int specialA, const F& a)
 	r -= S1;
 	if (H.isZero()) {
 		if (r.isZero()) {
-			ec::dblJacobi(R, P, specialA, a);
+			ec::dblJacobi(R, P);
 		} else {
 			R.clear();
 		}
@@ -263,43 +283,46 @@ void addJacobi(E& R, const E& P, const E& Q, int specialA, const F& a)
 	F::sub(R.y, U1, H3);
 }
 
-template<class F>
-void normalizeProj(F& x, F& y, F& z)
+template<class E>
+void normalizeProj(E& P)
 {
-	if (z.isZero()) return;
-	F::inv(z, z);
-	x *= z;
-	y *= z;
-	z = 1;
+	typedef typename E::Fp F;
+	if (P.z.isZero()) return;
+	F::inv(P.z, P.z);
+	P.x *= P.z;
+	P.y *= P.z;
+	P.z = 1;
 }
 
 // (Y^2 - bZ^2)Z = X(X^2 + aZ^2)
-template<class F>
-bool isValidProj(const F& x, const F& y, const F& z, const F& a, const F& b)
+template<class E>
+bool isValidProj(const E& P)
 {
+	typedef typename E::Fp F;
 	F y2, x2, z2, t;
-	F::sqr(x2, x);
-	F::sqr(y2, y);
-	F::sqr(z2, z);
-	F::mul(t, a, z2);
+	F::sqr(x2, P.x);
+	F::sqr(y2, P.y);
+	F::sqr(z2, P.z);
+	F::mul(t, E::a_, z2);
 	t += x2;
-	t *= x;
-	z2 *= b;
+	t *= P.x;
+	z2 *= E::b_;
 	y2 -= z2;
-	y2 *= z;
+	y2 *= P.z;
 	return y2 == t;
 }
 
 // (x/z, y/z)
-template<class F>
-bool isEqualProj(const F& x1, const F& y1, const F& z1, const F& x2, const F& y2, const F& z2)
+template<class E>
+bool isEqualProj(const E& P1, const E& P2)
 {
+	typedef typename E::Fp F;
 	F t1, t2;
-	F::mul(t1, x1, z2);
-	F::mul(t2, x2, z1);
+	F::mul(t1, P1.x, P2.z);
+	F::mul(t2, P2.x, P1.z);
 	if (t1 != t2) return false;
-	F::mul(t1, y1, z2);
-	F::mul(t2, y2, z1);
+	F::mul(t1, P1.y, P2.z);
+	F::mul(t2, P2.y, P1.z);
 	return t1 == t2;
 }
 
@@ -309,16 +332,17 @@ bool isEqualProj(const F& x1, const F& y1, const F& z1, const F& x2, const F& y2
 	sqr|  4| 5| 5
 	add| 11|12|12
 */
-template<class E, class F>
-void dblProj(E& R, const E& P, int specialA, const F& a)
+template<class E>
+void dblProj(E& R, const E& P)
 {
+	typedef typename E::Fp F;
 	if (P.isZero()) {
 		R.clear();
 		return;
 	}
 	const bool isPzOne = P.z.isOne();
 	F w, t, h;
-	switch (specialA) {
+	switch (E::specialA_) {
 	case Zero:
 		F::sqr(w, P.x);
 		F::add(t, w, w);
@@ -338,10 +362,10 @@ void dblProj(E& R, const E& P, int specialA, const F& a)
 	case GenericA:
 	default:
 		if (isPzOne) {
-			w = a;
+			w = E::a_;
 		} else {
 			F::sqr(w, P.z);
-			w *= a;
+			w *= E::a_;
 		}
 		F::sqr(t, P.x);
 		w += t;
@@ -379,9 +403,10 @@ void dblProj(E& R, const E& P, int specialA, const F& a)
 	sqr|  2
 	add|  7
 */
-template<class E, class F>
-void addProj(E& R, const E& P, const E& Q, int specialA, const F& a)
+template<class E>
+void addProj(E& R, const E& P, const E& Q)
 {
+	typedef typename E::Fp F;
 	if (P.isZero()) { R = Q; return; }
 	if (Q.isZero()) { R = P; return; }
 	bool isPzOne = P.z.isOne();
@@ -404,7 +429,7 @@ void addProj(E& R, const E& P, const E& Q, int specialA, const F& a)
 	v -= r;
 	if (v.isZero()) {
 		if (A == PyQz) {
-			dblProj(R, P, specialA, a);
+			dblProj(R, P);
 		} else {
 			R.clear();
 		}
@@ -442,22 +467,25 @@ void addProj(E& R, const E& P, const E& Q, int specialA, const F& a)
 }
 
 // y^2 == (x^2 + a)x + b
-template<class F>
-bool isValidAffine(const F& x, const F& y, const F& a, const F& b)
+template<class E>
+bool isValidAffine(const E& P)
 {
+	typedef typename E::Fp F;
+	assert(!P.z.isZero());
 	F y2, t;
-	F::sqr(y2, y);
-	F::sqr(t, x);
-	t += a;
-	t *= x;
-	t += b;
+	F::sqr(y2, P.y);
+	F::sqr(t, P.x);
+	t += E::a_;
+	t *= P.x;
+	t += E::b_;
 	return y2 == t;
 }
 
 // y^2 = x^3 + ax + b
-template<class E, class F>
-static inline void dblAffine(E& R, const E& P, const F& a)
+template<class E>
+static inline void dblAffine(E& R, const E& P)
 {
+	typedef typename E::Fp F;
 	if (P.isZero()) {
 		R.clear();
 		return;
@@ -470,7 +498,7 @@ static inline void dblAffine(E& R, const E& P, const F& a)
 	F::sqr(t, P.x);
 	F::add(s, t, t);
 	t += s;
-	t += a;
+	t += E::a_;
 	F::add(s, P.y, P.y);
 	t /= s;
 	F::sqr(s, t);
@@ -484,16 +512,17 @@ static inline void dblAffine(E& R, const E& P, const F& a)
 	R.z = 1;
 }
 
-template<class E, class F>
-void addAffine(E& R, const E& P, const E& Q, const F& a)
+template<class E>
+void addAffine(E& R, const E& P, const E& Q)
 {
+	typedef typename E::Fp F;
 	if (P.isZero()) { R = Q; return; }
 	if (Q.isZero()) { R = P; return; }
 	F t;
 	F::sub(t, Q.x, P.x);
 	if (t.isZero()) {
 		if (P.y == Q.y) {
-			dblAffine(R, P, a);
+			dblAffine(R, P);
 		} else {
 			R.clear();
 		}
@@ -552,17 +581,17 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 private:
 	bool isValidAffine() const
 	{
-		return ec::isValidAffine(x, y, a_, b_);
+		return ec::isValidAffine(*this);
 	}
 public:
 	void normalize()
 	{
 		switch (mode_) {
 		case ec::Jacobi:
-			ec::normalizeJacobi(x, y, z);
+			ec::normalizeJacobi(*this);
 			break;
 		case ec::Proj:
-			ec::normalizeProj(x, y, z);
+			ec::normalizeProj(*this);
 			break;
 		}
 	}
@@ -629,10 +658,10 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	{
 		switch (mode_) {
 		case ec::Jacobi:
-			if (!ec::isValidJacobi(x, y, z, a_, b_)) return false;
+			if (!ec::isValidJacobi(*this)) return false;
 			break;
 		case ec::Proj:
-			if (!ec::isValidProj(x, y, z, a_, b_)) return false;
+			if (!ec::isValidProj(*this)) return false;
 			break;
 		case ec::Affine:
 			if (z.isZero()) return true;
@@ -664,26 +693,27 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	{
 		switch (mode_) {
 		case ec::Jacobi:
-			ec::dblJacobi(R, P, specialA_, a_);
+			ec::dblJacobi(R, P);
 			break;
 		case ec::Proj:
-			ec::dblProj(R, P, specialA_, a_);
+			ec::dblProj(R, P);
 			break;
 		case ec::Affine:
-			ec::dblAffine(R, P, a_);
+			ec::dblAffine(R, P);
 			break;
 		}
 	}
-	static inline void add(EcT& R, const EcT& P, const EcT& Q) {
+	static inline void add(EcT& R, const EcT& P, const EcT& Q)
+	{
 		switch (mode_) {
 		case ec::Jacobi:
-			ec::addJacobi(R, P, Q, specialA_, a_);
+			ec::addJacobi(R, P, Q);
 			break;
 		case ec::Proj:
-			ec::addProj(R, P, Q, specialA_, a_);
+			ec::addProj(R, P, Q);
 			break;
 		case ec::Affine:
-			ec::addAffine(R, P, Q, a_);
+			ec::addAffine(R, P, Q);
 			break;
 		}
 	}
@@ -1075,9 +1105,9 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	{
 		switch (mode_) {
 		case ec::Jacobi:
-			return ec::isEqualJacobi(x, y, z, rhs.x, rhs.y, rhs.z);
+			return ec::isEqualJacobi(*this, rhs);
 		case ec::Proj:
-			return ec::isEqualProj(x, y, z, rhs.x, rhs.y, rhs.z);
+			return ec::isEqualProj(*this, rhs);
 		case ec::Affine:
 		default:
 			return x == rhs.x && y == rhs.y && z == rhs.z;
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 3c96d411..92ab315b 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -43,6 +43,9 @@ template<class F>
 struct PointT {
 	typedef F Fp;
 	F x, y, z;
+	static F a_;
+	static F b_;
+	static int specialA_;
 	bool isZero() const
 	{
 		return z.isZero();
@@ -53,15 +56,24 @@ struct PointT {
 		y.clear();
 		z.clear();
 	}
+#if 0
+	bool isEqual(const PointT<F>& rhs) const
+	{
+		return ec::isEqualJacobi(*this, rhs);
+	}
+#endif
 };
 
+template<class F> F PointT<F>::a_;
+template<class F> F PointT<F>::b_;
+template<class F> int PointT<F>::specialA_;
+
 } // mcl::local
 
 template<class Fp, class Fp2, class G2>
 struct MapToG2_WB19 {
+	typedef local::PointT<Fp2> Point;
 	Fp2 xi;
-	Fp2 Ell2p_a;
-	Fp2 Ell2p_b;
 	Fp half;
 	mpz_class sqrtConst; // (p^2 - 9) / 16
 	Fp2 root4[4];
@@ -71,7 +83,6 @@ struct MapToG2_WB19 {
 	Fp2 ynum[4];
 	Fp2 yden[4];
 	int draftVersion_;
-	typedef local::PointT<Fp2> Point;
 	void setDraftVersion(int version)
 	{
 		draftVersion_ = version;
@@ -174,16 +185,12 @@ struct MapToG2_WB19 {
 	void dbl(Point& Q, const Point& P) const
 	{
 		dblT(Q, P);
-	}
-	void dbl(G2& Q, const G2& P) const
-	{
-		dblT(Q, P);
-//		G2::dbl(Q, P);
+//		ec::dblJacobi(Q, P);
 	}
 	// P is on y^2 = x^3 + Ell2p_a x + Ell2p_b
 	bool isValidPoint(const Point& P) const
 	{
-		return ec::isValidJacobi(P.x, P.y, P.z, Ell2p_a, Ell2p_b);
+		return ec::isValidJacobi(P);
 	}
 	bool isValidPoint(const G2& P) const
 	{
@@ -194,10 +201,11 @@ struct MapToG2_WB19 {
 		bool b;
 		xi.a = -2;
 		xi.b = -1;
-		Ell2p_a.a = 0;
-		Ell2p_a.b = 240;
-		Ell2p_b.a = 1012;
-		Ell2p_b.b = 1012;
+		Point::a_.a = 0;
+		Point::a_.b = 240;
+		Point::b_.a = 1012;
+		Point::b_.b = 1012;
+		Point::specialA_ = ec::GenericA;
 		half = -1;
 		half /= 2;
 		sqrtConst = Fp::getOp().mp;
@@ -386,11 +394,11 @@ struct MapToG2_WB19 {
 		// (t^2 * xi)^2 + (t^2 * xi)
 		Fp2::add(deno, t2xi2, t2xi);
 		Fp2::add(nume, deno, 1);
-		nume *= Ell2p_b;
+		nume *= Point::b_;
 		if (deno.isZero()) {
-			Fp2::mul(deno, Ell2p_a, xi);
+			Fp2::mul(deno, Point::a_, xi);
 		} else {
-			deno *= -Ell2p_a;
+			deno *= -Point::a_;
 		}
 		Fp2 u, v;
 		{
@@ -398,8 +406,8 @@ struct MapToG2_WB19 {
 			Fp2::sqr(deno2, deno);
 			Fp2::mul(v, deno2, deno);
 
-			Fp2::mul(u, Ell2p_b, v);
-			Fp2::mul(tmp, Ell2p_a, nume);
+			Fp2::mul(u, Point::b_, v);
+			Fp2::mul(tmp, Point::a_, nume);
 			tmp *= deno2;
 			u += tmp;
 			Fp2::sqr(tmp, nume);
@@ -506,20 +514,20 @@ struct MapToG2_WB19 {
 		den += den2;
 		Fp2 x0_num, x0_den;
 		Fp2::add(x0_num, den, 1);
-		x0_num *= Ell2p_b;
+		x0_num *= Point::b_;
 		if (den.isZero()) {
-			Fp2::mul(x0_den, Ell2p_a, xi);
+			Fp2::mul(x0_den, Point::a_, xi);
 		} else {
-			Fp2::mul(x0_den, -Ell2p_a, den);
+			Fp2::mul(x0_den, -Point::a_, den);
 		}
 		Fp2 x0_den2, x0_den3, gx0_den, gx0_num;
 		Fp2::sqr(x0_den2, x0_den);
 		Fp2::mul(x0_den3, x0_den2, x0_den);
 		gx0_den = x0_den3;
 
-		Fp2::mul(gx0_num, Ell2p_b, gx0_den);
+		Fp2::mul(gx0_num, Point::b_, gx0_den);
 		Fp2 tmp, tmp1, tmp2;
-		Fp2::mul(tmp, Ell2p_a, x0_num);
+		Fp2::mul(tmp, Point::a_, x0_num);
 		tmp *= x0_den2;
 		gx0_num += tmp;
 		Fp2::sqr(tmp, x0_num);
@@ -581,22 +589,22 @@ struct MapToG2_WB19 {
 	{
 		G2 t[16];
 		t[0] = P;
-		dbl(t[1], t[0]);
-		add(t[4], t[1], t[0]);
-		add(t[2], t[4], t[1]);
-		add(t[3], t[2], t[1]);
-		add(t[11], t[3], t[1]);
-		add(t[9], t[11], t[1]);
-		add(t[10], t[9], t[1]);
-		add(t[5], t[10], t[1]);
-		add(t[7], t[5], t[1]);
-		add(t[15], t[7], t[1]);
-		add(t[13], t[15], t[1]);
-		add(t[6], t[13], t[1]);
-		add(t[14], t[6], t[1]);
-		add(t[12], t[14], t[1]);
-		add(t[8], t[12], t[1]);
-		dbl(t[1], t[6]);
+		G2::dbl(t[1], t[0]);
+		G2::add(t[4], t[1], t[0]);
+		G2::add(t[2], t[4], t[1]);
+		G2::add(t[3], t[2], t[1]);
+		G2::add(t[11], t[3], t[1]);
+		G2::add(t[9], t[11], t[1]);
+		G2::add(t[10], t[9], t[1]);
+		G2::add(t[5], t[10], t[1]);
+		G2::add(t[7], t[5], t[1]);
+		G2::add(t[15], t[7], t[1]);
+		G2::add(t[13], t[15], t[1]);
+		G2::add(t[6], t[13], t[1]);
+		G2::add(t[14], t[6], t[1]);
+		G2::add(t[12], t[14], t[1]);
+		G2::add(t[8], t[12], t[1]);
+		G2::dbl(t[1], t[6]);
 
 		const struct {
 			uint32_t n;
@@ -618,21 +626,21 @@ struct MapToG2_WB19 {
 		};
 		for (size_t j = 0; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) {
 			const uint32_t n = tbl[j].n;
-			for (size_t i = 0; i < n; i++) dbl(t[1], t[1]);
-			add(t[1], t[1], t[tbl[j].idx]);
+			for (size_t i = 0; i < n; i++) G2::dbl(t[1], t[1]);
+			G2::add(t[1], t[1], t[tbl[j].idx]);
 		}
-		for (size_t i = 0; i < 5; i++) dbl(t[1], t[1]);
-		add(out, t[1], t[2]);
+		for (size_t i = 0; i < 5; i++) G2::dbl(t[1], t[1]);
+		G2::add(out, t[1], t[2]);
 	}
 	void mx_chain(G2& Q, const G2& P) const
 	{
 		G2 T;
-		dbl(T, P);
+		G2::dbl(T, P);
 		const size_t tbl[] = { 2, 3, 9, 32, 16 };
 		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
-			add(T, T, P);
+			G2::add(T, T, P);
 			for (size_t j = 0; j < tbl[i]; j++) {
-				dbl(T, T);
+				G2::dbl(T, T);
 			}
 		}
 		Q = T;
@@ -644,12 +652,12 @@ struct MapToG2_WB19 {
 #else
 		G2 work, work2;
 		h2_chain(work, P);
-		dbl(work2, work);
-		add(work2, work, work2);
+		G2::dbl(work2, work);
+		G2::add(work2, work, work2);
 		mx_chain(work, work2);
 		mx_chain(work, work);
-		neg(work2, work2);
-		add(Q, work, work2);
+		G2::neg(work2, work2);
+		G2::add(Q, work, work2);
 #endif
 	}
 	template<class T>

From 41c11a0f758251caecb07bb4f8e6c075c739f878 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 12 Apr 2020 19:54:20 +0900
Subject: [PATCH 186/553] move unused functions to test.cpp

---
 include/mcl/mapto_wb19.hpp | 144 -------------------------------
 test/mapto_wb19_test.cpp   | 168 ++++++++++++++++++++++++++++++++++---
 2 files changed, 158 insertions(+), 154 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 92ab315b..269ee122 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -354,114 +354,6 @@ struct MapToG2_WB19 {
 		if (!x.b.isZero()) return false;
 		return false;
 	}
-	/*
-		z = sqrt(u/v) = (uv^7) (uv^15)^((p^2-9)/16) * root4
-		return true if found
-	*/
-	bool sqr_div(Fp2& z, const Fp2& u, const Fp2& v) const
-	{
-		Fp2 gamma, t1, t2;
-		Fp2::sqr(gamma, v); // v^2
-		Fp2::sqr(t2, gamma); // v^4
-		Fp2::mul(t1, u, v); // uv
-		t1 *= gamma; // uv^3
-		t1 *= t2; // uv^7
-		Fp2::sqr(t2, t2); // v^8
-		t2 *= t1;
-		Fp2::pow(gamma, t2, sqrtConst);
-		gamma *= t1;
-		Fp2 candi;
-		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(root4); i++) {
-			Fp2::mul(candi, gamma, root4[i]);
-			Fp2::sqr(t1, candi);
-			t1 *= v;
-			if (t1 == u) {
-				z = candi;
-				return true;
-			}
-		}
-		z = gamma;
-		return false;
-	}
-	// https://github.com/ethereum/py_ecc
-	void py_ecc_optimized_swu_G2(Point& P, const Fp2& t) const
-	{
-		Fp2 t2, t2xi, t2xi2;
-		Fp2::sqr(t2, t);
-		mul_xi(t2xi, t2);
-		Fp2::sqr(t2xi2, t2xi);
-		Fp2 nume, deno;
-		// (t^2 * xi)^2 + (t^2 * xi)
-		Fp2::add(deno, t2xi2, t2xi);
-		Fp2::add(nume, deno, 1);
-		nume *= Point::b_;
-		if (deno.isZero()) {
-			Fp2::mul(deno, Point::a_, xi);
-		} else {
-			deno *= -Point::a_;
-		}
-		Fp2 u, v;
-		{
-			Fp2 deno2, tmp, tmp1, tmp2;
-			Fp2::sqr(deno2, deno);
-			Fp2::mul(v, deno2, deno);
-
-			Fp2::mul(u, Point::b_, v);
-			Fp2::mul(tmp, Point::a_, nume);
-			tmp *= deno2;
-			u += tmp;
-			Fp2::sqr(tmp, nume);
-			tmp *= nume;
-			u += tmp;
-		}
-		Fp2 candi;
-		bool success = sqr_div(candi, u, v);
-		P.y = candi;
-		candi *= t2;
-		candi *= t;
-		u *= t2xi2;
-		u *= t2xi;
-		bool success2 = false;
-		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(etas); i++) {
-			Fp2 t1;
-			Fp2::mul(t1, etas[i], candi);
-			Fp2::sqr(t2, t1);
-			t2 *= v;
-			if (t2 == u && !success && !success2) {
-				P.y = t1;
-				success2 = true;
-			}
-		}
-		assert(success || success2);
-		if (!success) {
-			nume *= t2xi;
-		}
-		if (isNegSign(t) != isNegSign(P.y)) {
-			Fp2::neg(P.y, P.y);
-		}
-		P.y *= deno;
-		P.x = nume;
-		P.z = deno;
-	}
-	// Proj
-	void py_ecc_iso_map_G2(G2& Q, const Point& P) const
-	{
-		Fp2 zpows[3];
-		zpows[0] = P.z;
-		Fp2::sqr(zpows[1], zpows[0]);
-		Fp2::mul(zpows[2], zpows[1], zpows[0]);
-		Fp2 mapvals[4];
-		evalPoly(mapvals[0], P.x, zpows, xnum);
-		evalPoly(mapvals[1], P.x, zpows, xden);
-		evalPoly(mapvals[2], P.x, zpows, ynum);
-		evalPoly(mapvals[3], P.x, zpows, yden);
-		mapvals[1] *= P.z;
-		mapvals[2] *= P.y;
-		mapvals[3] *= P.z;
-		Fp2::mul(Q.z, mapvals[1], mapvals[3]);
-		Fp2::mul(Q.x, mapvals[0], mapvals[3]);
-		Fp2::mul(Q.y, mapvals[1], mapvals[2]);
-	}
 	/*
 		in : Jacobi [X:Y:Z]
 		out : Proj [A:B:C]
@@ -478,29 +370,6 @@ struct MapToG2_WB19 {
 		out.y = in.y;
 		Fp2::mul(out.z, in.z, z2);
 	}
-	/*
-		in : Proj [X:Y:Z]
-		out : Jacobi [A:B:C]
-		[X:Y:Z] as Proj
-		= (X/Z, Y/Z) as Affine
-		= [X/Z:Y/Z:1] as Jacobi
-		= [XZ:YZ^2:Z] as Jacobi
-	*/
-	void toJacobi(G2& out, const G2& in) const
-	{
-		Fp2 z2;
-		Fp2::sqr(z2, in.z);
-		Fp2::mul(out.x, in.x, in.z);
-		Fp2::mul(out.y, in.y, z2);
-		out.z = in.z;
-	}
-	// Proj
-	void py_ecc_map_to_curve_G2(G2& out, const Fp2& t) const
-	{
-		Point P;
-		py_ecc_optimized_swu_G2(P, t);
-		py_ecc_iso_map_G2(out, P);
-	}
 	// https://github.com/algorand/bls_sigs_ref
 	void osswu2_help(Point& P, const Fp2& t) const
 	{
@@ -692,19 +561,6 @@ struct MapToG2_WB19 {
 		iso3(P, Pp);
 		clear_h2(P, P);
 	}
-	void py_ecc_hash_to_G2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
-	{
-		Fp2 t1, t2;
-		hashToFp2(t1, msg, msgSize, 0, dst, dstSize);
-		hashToFp2(t2, msg, msgSize, 1, dst, dstSize);
-		G2 P1, P2;
-		py_ecc_map_to_curve_G2(P1, t1);
-		py_ecc_map_to_curve_G2(P2, t2);
-		toJacobi(P1, P1);
-		toJacobi(P2, P2);
-		P1 += P2;
-		clear_h2(out, P1);
-	}
 	// hash-to-curve-06
 	void hashToFp2v6(Fp2 out[2], const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 0d69a561..8d32c6f4 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -96,6 +96,154 @@ std::string toHexStr(const G2& P)
 	return toHexStr(xy, 96);
 }
 
+/*
+	z = sqrt(u/v) = (uv^7) (uv^15)^((p^2-9)/16) * root4
+	return true if found
+*/
+bool sqr_div(const MapTo& mapto, Fp2& z, const Fp2& u, const Fp2& v)
+{
+	Fp2 gamma, t1, t2;
+	Fp2::sqr(gamma, v); // v^2
+	Fp2::sqr(t2, gamma); // v^4
+	Fp2::mul(t1, u, v); // uv
+	t1 *= gamma; // uv^3
+	t1 *= t2; // uv^7
+	Fp2::sqr(t2, t2); // v^8
+	t2 *= t1;
+	Fp2::pow(gamma, t2, mapto.sqrtConst);
+	gamma *= t1;
+	Fp2 candi;
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(mapto.root4); i++) {
+		Fp2::mul(candi, gamma, mapto.root4[i]);
+		Fp2::sqr(t1, candi);
+		t1 *= v;
+		if (t1 == u) {
+			z = candi;
+			return true;
+		}
+	}
+	z = gamma;
+	return false;
+}
+
+// Proj
+void py_ecc_iso_map_G2(const MapTo& mapto, G2& Q, const Point& P)
+{
+	Fp2 zpows[3];
+	zpows[0] = P.z;
+	Fp2::sqr(zpows[1], zpows[0]);
+	Fp2::mul(zpows[2], zpows[1], zpows[0]);
+	Fp2 mapvals[4];
+	mapto.evalPoly(mapvals[0], P.x, zpows, mapto.xnum);
+	mapto.evalPoly(mapvals[1], P.x, zpows, mapto.xden);
+	mapto.evalPoly(mapvals[2], P.x, zpows, mapto.ynum);
+	mapto.evalPoly(mapvals[3], P.x, zpows, mapto.yden);
+	mapvals[1] *= P.z;
+	mapvals[2] *= P.y;
+	mapvals[3] *= P.z;
+	Fp2::mul(Q.z, mapvals[1], mapvals[3]);
+	Fp2::mul(Q.x, mapvals[0], mapvals[3]);
+	Fp2::mul(Q.y, mapvals[1], mapvals[2]);
+}
+
+// https://github.com/ethereum/py_ecc
+void py_ecc_optimized_swu_G2(const MapTo& mapto, Point& P, const Fp2& t)
+{
+	Fp2 t2, t2xi, t2xi2;
+	Fp2::sqr(t2, t);
+	mapto.mul_xi(t2xi, t2);
+	Fp2::sqr(t2xi2, t2xi);
+	Fp2 nume, deno;
+	// (t^2 * xi)^2 + (t^2 * xi)
+	Fp2::add(deno, t2xi2, t2xi);
+	Fp2::add(nume, deno, 1);
+	nume *= Point::b_;
+	if (deno.isZero()) {
+		Fp2::mul(deno, Point::a_, mapto.xi);
+	} else {
+		deno *= -Point::a_;
+	}
+	Fp2 u, v;
+	{
+		Fp2 deno2, tmp, tmp1, tmp2;
+		Fp2::sqr(deno2, deno);
+		Fp2::mul(v, deno2, deno);
+
+		Fp2::mul(u, Point::b_, v);
+		Fp2::mul(tmp, Point::a_, nume);
+		tmp *= deno2;
+		u += tmp;
+		Fp2::sqr(tmp, nume);
+		tmp *= nume;
+		u += tmp;
+	}
+	Fp2 candi;
+	bool success = sqr_div(mapto, candi, u, v);
+	P.y = candi;
+	candi *= t2;
+	candi *= t;
+	u *= t2xi2;
+	u *= t2xi;
+	bool success2 = false;
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(mapto.etas); i++) {
+		Fp2 t1;
+		Fp2::mul(t1, mapto.etas[i], candi);
+		Fp2::sqr(t2, t1);
+		t2 *= v;
+		if (t2 == u && !success && !success2) {
+			P.y = t1;
+			success2 = true;
+		}
+	}
+	assert(success || success2);
+	if (!success) {
+		nume *= t2xi;
+	}
+	if (mapto.isNegSign(t) != mapto.isNegSign(P.y)) {
+		Fp2::neg(P.y, P.y);
+	}
+	P.y *= deno;
+	P.x = nume;
+	P.z = deno;
+}
+// Proj
+void py_ecc_map_to_curve_G2(const MapTo& mapto, G2& out, const Fp2& t)
+{
+	Point P;
+	py_ecc_optimized_swu_G2(mapto, P, t);
+	py_ecc_iso_map_G2(mapto, out, P);
+}
+/*
+	in : Proj [X:Y:Z]
+	out : Jacobi [A:B:C]
+	[X:Y:Z] as Proj
+	= (X/Z, Y/Z) as Affine
+	= [X/Z:Y/Z:1] as Jacobi
+	= [XZ:YZ^2:Z] as Jacobi
+*/
+void toJacobi(G2& out, const G2& in)
+{
+	Fp2 z2;
+	Fp2::sqr(z2, in.z);
+	Fp2::mul(out.x, in.x, in.z);
+	Fp2::mul(out.y, in.y, z2);
+	out.z = in.z;
+}
+
+void py_ecc_hash_to_G2(const MapTo& mapto, G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
+{
+	Fp2 t1, t2;
+	hashToFp2(t1, msg, msgSize, 0, dst, dstSize);
+	hashToFp2(t2, msg, msgSize, 1, dst, dstSize);
+	G2 P1, P2;
+	py_ecc_map_to_curve_G2(mapto, P1, t1);
+	py_ecc_map_to_curve_G2(mapto, P2, t2);
+	toJacobi(P1, P1);
+	toJacobi(P2, P2);
+	P1 += P2;
+	mapto.clear_h2(out, P1);
+}
+
 void ethMsgToG2test(const std::string& fileName)
 {
 	const char *dst = "\x02";
@@ -247,8 +395,8 @@ void py_eccTest(const T& mapto)
 		Fp2 t1, t2;
 		ethMsgToFp2(t1, msg, msgSize, 0, dst, dstSize);
 		ethMsgToFp2(t2, msg, msgSize, 1, dst, dstSize);
-		mapto.py_ecc_map_to_curve_G2(P1, t1);
-		mapto.py_ecc_map_to_curve_G2(P2, t2);
+		py_ecc_map_to_curve_G2(mapto, P1, t1);
+		py_ecc_map_to_curve_G2(mapto, P2, t2);
 		const PointStr ss = {
 			{
 				"1972340536407012813644167184956896760015950618902823780657111692209122974250648595689834944711427684709284318183285",
@@ -263,12 +411,12 @@ void py_eccTest(const T& mapto)
 				"3253481872910728113595595353980041952789112074899014850028493351493155577726278005524067083458491999010934020984031",
 			}
 		};
-		mapto.toJacobi(P1, P1);
-		mapto.toJacobi(P2, P2);
+		toJacobi(P1, P1);
+		toJacobi(P2, P2);
 		P1 += P2;
 		G2 P11;
 		set(P11, ss);
-		mapto.toJacobi(P11, P11);
+		toJacobi(P11, P11);
 		CYBOZU_TEST_EQUAL(P1, P11);
 		const PointStr clears = {
 			{
@@ -286,14 +434,14 @@ void py_eccTest(const T& mapto)
 		};
 		set(P11, clears);
 		mapto.clear_h2(P1, P1);
-		mapto.toJacobi(P11, P11);
+		toJacobi(P11, P11);
 		CYBOZU_TEST_EQUAL(P1, P11);
-		mapto.py_ecc_hash_to_G2(P1, msg, msgSize, dst, dstSize);
+		py_ecc_hash_to_G2(mapto, P1, msg, msgSize, dst, dstSize);
 		CYBOZU_TEST_EQUAL(P1, P11);
 		ethMsgToG2(P1, msg, msgSize, dst, dstSize);
 		CYBOZU_TEST_EQUAL(P1, P11);
 		set(P11, sigs);
-		mapto.toJacobi(P11, P11);
+		toJacobi(P11, P11);
 		P1 *= sec;
 		CYBOZU_TEST_EQUAL(P1, P11);
 		CYBOZU_TEST_EQUAL(P1.serializeToHexStr(), expect);
@@ -618,14 +766,14 @@ void py_eccTest2(const T& mapto)
 	Fp2 t;
 	set(t, ts);
 	Point p, q;
-	mapto.py_ecc_optimized_swu_G2(p, t);
+	py_ecc_optimized_swu_G2(mapto, p, t);
 	set(q, out1s);
 	CYBOZU_TEST_EQUAL(p.x, q.x);
 	CYBOZU_TEST_EQUAL(p.y, q.y);
 	CYBOZU_TEST_EQUAL(p.z, q.z);
 	G2 P, Q;
 	set(P, out2s);
-	mapto.py_ecc_map_to_curve_G2(Q, t);
+	py_ecc_map_to_curve_G2(mapto, Q, t);
 	CYBOZU_TEST_EQUAL(P, Q);
 }
 

From 2dd76c7b670c3cc830fef8d6948d5f33624619d2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 12 Apr 2020 21:43:00 +0900
Subject: [PATCH 187/553] unify ec operations

---
 include/mcl/mapto_wb19.hpp | 174 ++++---------------------------------
 test/mapto_wb19_test.cpp   |  23 +++--
 2 files changed, 29 insertions(+), 168 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 269ee122..07f5dfd3 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -39,6 +39,7 @@ inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, co
 
 namespace local {
 
+// y^2 = x^3 + 4(1 + i)
 template<class F>
 struct PointT {
 	typedef F Fp;
@@ -56,12 +57,10 @@ struct PointT {
 		y.clear();
 		z.clear();
 	}
-#if 0
 	bool isEqual(const PointT<F>& rhs) const
 	{
 		return ec::isEqualJacobi(*this, rhs);
 	}
-#endif
 };
 
 template<class F> F PointT<F>::a_;
@@ -73,9 +72,10 @@ template<class F> int PointT<F>::specialA_;
 template<class Fp, class Fp2, class G2>
 struct MapToG2_WB19 {
 	typedef local::PointT<Fp2> Point;
-	Fp2 xi;
 	Fp half;
 	mpz_class sqrtConst; // (p^2 - 9) / 16
+	Fp2 Ep_a;
+	Fp2 Ep_b;
 	Fp2 root4[4];
 	Fp2 etas[4];
 	Fp2 xnum[4];
@@ -87,125 +87,17 @@ struct MapToG2_WB19 {
 	{
 		draftVersion_ = version;
 	}
-	// should be merged into ec.hpp
-	template<class G>
-	void neg(G& Q, const G& P) const
-	{
-		Q.x = P.x;
-		Fp2::neg(Q.y, P.y);
-		Q.z = P.z;
-	}
-	// Jacobi : sqr 4, mul 12, add 11
-	template<class G>
-	void add(G& R, const G& P, const G& Q) const
-	{
-		if (P.isZero()) {
-			R = Q;
-			return;
-		}
-		if (Q.isZero()) {
-			R = Q;
-			return;
-		}
-		Fp2 Z1Z1, Z2Z2, U1, U2, S1, S2;
-		Fp2::sqr(Z1Z1, P.z);
-		Fp2::sqr(Z2Z2, Q.z);
-		Fp2::mul(U1, P.x, Z2Z2);
-		Fp2::mul(U2, Q.x, Z1Z1);
-		Fp2::mul(S1, P.y, Q.z);
-		S1 *= Z2Z2;
-		Fp2::mul(S2, Q.y, P.z);
-		S2 *= Z1Z1;
-		if (U1 == U2 && S1 == S2) {
-			dbl(R, P);
-			return;
-		}
-		Fp2 H, I, J, rr, V;
-		Fp2::sub(H, U2, U1);
-		Fp2::add(I, H, H);
-		Fp2::sqr(I, I);
-		Fp2::mul(J, H, I);
-		Fp2::sub(rr, S2, S1);
-		rr += rr;
-		Fp2::mul(V, U1, I);
-		Fp2::mul(R.z, P.z, Q.z);
-		R.z *= H;
-		if (R.z.isZero()) {
-			R.x.clear();
-			R.y.clear();
-			return;
-		}
-		R.z += R.z;
-		Fp2::sqr(R.x, rr);
-		R.x -= J;
-		R.x -= V;
-		R.x -= V;
-		Fp2::sub(R.y, V, R.x);
-		R.y *= rr;
-		S1 *= J;
-		R.y -= S1;
-		R.y -= S1;
-	}
-	// jacobi : 2M + 5S + 14A
-	template<class G>
-	void dblT(G& Q, const G& P) const
-	{
-#if 0
-		ec::dblJacobi(Q, P, ec::GenericA, Ell2p_a);
-#else
-		Fp2 A, B, C, D, e, f;
-		Fp2::sqr(A, P.x);
-		Fp2::sqr(B, P.y);
-		Fp2::sqr(C, B);
-		Fp2::add(D, P.x, B);
-		Fp2::sqr(D, D);
-		D -= A;
-		D -= C;
-		D += D;
-		Fp2::add(e, A, A);
-		e += A;
-		Fp2::sqr(f, e);
-		Fp2::sub(Q.x, f, D);
-		Q.x -= D;
-		Fp2::mul(Q.z, P.y, P.z);
-		if (Q.z.isZero()) {
-			Q.x.clear();
-			Q.y.clear();
-			return;
-		}
-		Q.z += Q.z;
-		Fp2::sub(Q.y, D, Q.x);
-		Q.y *= e;
-		C += C;
-		C += C;
-		C += C;
-		Q.y -= C;
-#endif
-	}
-	void dbl(Point& Q, const Point& P) const
-	{
-		dblT(Q, P);
-//		ec::dblJacobi(Q, P);
-	}
-	// P is on y^2 = x^3 + Ell2p_a x + Ell2p_b
-	bool isValidPoint(const Point& P) const
-	{
-		return ec::isValidJacobi(P);
-	}
-	bool isValidPoint(const G2& P) const
-	{
-		return P.isValid();
-	}
 	void init()
 	{
 		bool b;
-		xi.a = -2;
-		xi.b = -1;
-		Point::a_.a = 0;
-		Point::a_.b = 240;
-		Point::b_.a = 1012;
-		Point::b_.b = 1012;
-		Point::specialA_ = ec::GenericA;
+		Ep_a.a = 0;
+		Ep_a.b = 240;
+		Ep_b.a = 1012;
+		Ep_b.b = 1012;
+		Point::a_.clear();
+		Point::b_.a = 4;
+		Point::b_.b = 4;
+		Point::specialA_ = ec::Zero;
 		half = -1;
 		half /= 2;
 		sqrtConst = Fp::getOp().mp;
@@ -310,7 +202,6 @@ struct MapToG2_WB19 {
 	// refer (xnum, xden, ynum, yden)
 	void iso3(G2& Q, const Point& P) const
 	{
-//		assert(isValidPoint(P));
 		Fp2 zpows[3];
 		Fp2::sqr(zpows[0], P.z);
 		Fp2::sqr(zpows[1], zpows[0]);
@@ -331,9 +222,9 @@ struct MapToG2_WB19 {
 		Fp2::sqr(t, Q.z);
 		Fp2::mul(Q.y, mapvals[2], mapvals[1]);
 		Q.y *= t;
-//		assert(Q.isValid());
 	}
 	/*
+		xi = -2-i
 		(a+bi)*(-2-i) = (b-2a)-(a+2b)i
 	*/
 	void mul_xi(Fp2& y, const Fp2& x) const
@@ -354,22 +245,6 @@ struct MapToG2_WB19 {
 		if (!x.b.isZero()) return false;
 		return false;
 	}
-	/*
-		in : Jacobi [X:Y:Z]
-		out : Proj [A:B:C]
-		[X:Y:Z] as Jacobi
-		= (X/Z^2, Y/Z^3) as Affine
-		= [X/Z^2:Y/Z^3:1] as Proj
-		= [XZ:Y:Z^3] as Proj
-	*/
-	void toProj(G2& out, const G2& in) const
-	{
-		Fp2 z2;
-		Fp2::sqr(z2, in.z);
-		Fp2::mul(out.x, in.x, in.z);
-		out.y = in.y;
-		Fp2::mul(out.z, in.z, z2);
-	}
 	// https://github.com/algorand/bls_sigs_ref
 	void osswu2_help(Point& P, const Fp2& t) const
 	{
@@ -383,20 +258,20 @@ struct MapToG2_WB19 {
 		den += den2;
 		Fp2 x0_num, x0_den;
 		Fp2::add(x0_num, den, 1);
-		x0_num *= Point::b_;
+		x0_num *= Ep_b;
 		if (den.isZero()) {
-			Fp2::mul(x0_den, Point::a_, xi);
+			mul_xi(x0_den, Ep_a);
 		} else {
-			Fp2::mul(x0_den, -Point::a_, den);
+			Fp2::mul(x0_den, -Ep_a, den);
 		}
 		Fp2 x0_den2, x0_den3, gx0_den, gx0_num;
 		Fp2::sqr(x0_den2, x0_den);
 		Fp2::mul(x0_den3, x0_den2, x0_den);
 		gx0_den = x0_den3;
 
-		Fp2::mul(gx0_num, Point::b_, gx0_den);
+		Fp2::mul(gx0_num, Ep_b, gx0_den);
 		Fp2 tmp, tmp1, tmp2;
-		Fp2::mul(tmp, Point::a_, x0_num);
+		Fp2::mul(tmp, Ep_a, x0_num);
 		tmp *= x0_den2;
 		gx0_num += tmp;
 		Fp2::sqr(tmp, x0_num);
@@ -537,18 +412,6 @@ struct MapToG2_WB19 {
 		printf("y=%s\n", P.y.getStr(base).c_str());
 		printf("z=%s\n", P.z.getStr(base).c_str());
 	}
-	bool normalizeJacobi(Point& out, const Point& in) const
-	{
-		if (in.z.isZero()) return false;
-		Fp2 t;
-		Fp2::inv(t, in.z);
-		Fp2::mul(out.y, in.y, t);
-		Fp2::sqr(t, t);
-		Fp2::mul(out.x, in.x, t);
-		out.y *= t;
-		out.z = 1;
-		return true;
-	}
 	void opt_swu2_map(G2& P, const Fp2& t, const Fp2 *t2 = 0) const
 	{
 		Point Pp;
@@ -556,10 +419,11 @@ struct MapToG2_WB19 {
 		if (t2) {
 			Point P2;
 			osswu2_help(P2, *t2);
-			add(Pp, Pp, P2);
+			ec::addJacobi(Pp, Pp, P2);
 		}
 		iso3(P, Pp);
 		clear_h2(P, P);
+		// if (t2 && !ec::isValidJacobi(P)) { puts("QQQ"); }
 	}
 	// hash-to-curve-06
 	void hashToFp2v6(Fp2 out[2], const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 8d32c6f4..30a166b2 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -157,11 +157,11 @@ void py_ecc_optimized_swu_G2(const MapTo& mapto, Point& P, const Fp2& t)
 	// (t^2 * xi)^2 + (t^2 * xi)
 	Fp2::add(deno, t2xi2, t2xi);
 	Fp2::add(nume, deno, 1);
-	nume *= Point::b_;
+	nume *= mapto.Ep_b;
 	if (deno.isZero()) {
-		Fp2::mul(deno, Point::a_, mapto.xi);
+		mapto.mul_xi(deno, mapto.Ep_a);
 	} else {
-		deno *= -Point::a_;
+		deno *= -mapto.Ep_a;
 	}
 	Fp2 u, v;
 	{
@@ -169,8 +169,8 @@ void py_ecc_optimized_swu_G2(const MapTo& mapto, Point& P, const Fp2& t)
 		Fp2::sqr(deno2, deno);
 		Fp2::mul(v, deno2, deno);
 
-		Fp2::mul(u, Point::b_, v);
-		Fp2::mul(tmp, Point::a_, nume);
+		Fp2::mul(u, mapto.Ep_b, v);
+		Fp2::mul(tmp, mapto.Ep_a, nume);
 		tmp *= deno2;
 		u += tmp;
 		Fp2::sqr(tmp, nume);
@@ -527,12 +527,11 @@ void osswu2_helpTest(const T& mapto)
 		CYBOZU_TEST_EQUAL(P.x, x);
 		CYBOZU_TEST_EQUAL(P.y, y);
 		CYBOZU_TEST_EQUAL(P.z, z);
-		CYBOZU_TEST_ASSERT(mapto.isValidPoint(P));
+//		CYBOZU_TEST_ASSERT(P.isValid());
 	}
 }
 
-template<class T>
-void addTest(const T& mapto)
+void addTest()
 {
 	const struct Tbl {
 		PointStr P;
@@ -590,10 +589,8 @@ void addTest(const T& mapto)
 		set(Q, tbl[i].Q);
 		set(R, tbl[i].R);
 		Point E;
-		mapto.add(E, P, Q);
-		CYBOZU_TEST_EQUAL(R.x, E.x);
-		CYBOZU_TEST_EQUAL(R.y, E.y);
-		CYBOZU_TEST_EQUAL(R.z, E.z);
+		ec::addJacobi(E, P, Q);
+		CYBOZU_TEST_ASSERT(R.isEqual(E));
 	}
 }
 
@@ -878,7 +875,7 @@ CYBOZU_TEST_AUTO(test)
 	py_eccTest(mapto);
 	py_eccTest2(mapto);
 	osswu2_helpTest(mapto);
-	addTest(mapto);
+	addTest();
 	iso3Test(mapto);
 	testSign(mapto);
 	ethFp2ToG2test();

From 3bfc2d3457b74e13032233244b42f481372e0dfb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 13 Apr 2020 09:26:35 +0900
Subject: [PATCH 188/553] [she] fix compile error on clang

---
 include/mcl/she.hpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index 42dc50b0..d441e1c0 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -1810,7 +1810,14 @@ struct SHET {
 		template<class InputStream>
 		void load(bool *pb, InputStream& is, int ioMode = IoSerialize)
 		{
-			cybozu::writeChar(pb, isMultiplied_ ? '0' : '1', is); if (!*pb) return;
+			char c;
+			if (!cybozu::readChar(&c, is)) return;
+			if (c == '0' || c == '1') {
+				isMultiplied_ = c == '0';
+			} else {
+				*pb = false;
+				return;
+			}
 			if (isMultiplied()) {
 				m_.load(pb, is, ioMode);
 			} else {
@@ -1820,14 +1827,7 @@ struct SHET {
 		template<class OutputStream>
 		void save(bool *pb, OutputStream& os, int ioMode = IoSerialize) const
 		{
-			char c;
-			if (!cybozu::readChar(&c, os)) return;
-			if (c == '0' || c == '1') {
-				isMultiplied_ = c == '0';
-			} else {
-				*pb = false;
-				return;
-			}
+			cybozu::writeChar(pb, os, isMultiplied_ ? '0' : '1'); if (!*pb) return;
 			if (isMultiplied()) {
 				m_.save(pb, os, ioMode);
 			} else {

From 21f666d80cd01378fd2118065aab063dc8ea2c6d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 13 Apr 2020 10:40:24 +0900
Subject: [PATCH 189/553]  [wasm] remove warning of function signature mismatch

---
 src/gen.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/gen.cpp b/src/gen.cpp
index cd369014..2f9ae194 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -235,7 +235,8 @@ struct Code : public mcl::Generator {
 		resetGlobalIdx();
 		Operand out(IntPtr, unit);
 		Operand px(IntPtr, unit);
-		mcl_fpDbl_mod_NIST_P192 = Function("mcl_fpDbl_mod_NIST_P192L" + suf, Void, out, px);
+		Operand dummy(IntPtr, unit);
+		mcl_fpDbl_mod_NIST_P192 = Function("mcl_fpDbl_mod_NIST_P192L" + suf, Void, out, px, dummy);
 		verifyAndSetPrivate(mcl_fpDbl_mod_NIST_P192);
 		beginFunc(mcl_fpDbl_mod_NIST_P192);
 
@@ -294,7 +295,8 @@ struct Code : public mcl::Generator {
 		const size_t mask = -(1 << rem);
 		const Operand py(IntPtr, unit);
 		const Operand px(IntPtr, unit);
-		Function f("mcl_fpDbl_mod_NIST_P521L" + suf, Void, py, px);
+		const Operand dummy(IntPtr, unit);
+		Function f("mcl_fpDbl_mod_NIST_P521L" + suf, Void, py, px, dummy);
 		verifyAndSetPrivate(f);
 		beginFunc(f);
 		Operand x = loadN(px, n * 2 + 1);
@@ -333,14 +335,15 @@ struct Code : public mcl::Generator {
 		resetGlobalIdx();
 		Operand py(IntPtr, unit);
 		Operand px(IntPtr, unit);
-		mcl_fp_sqr_NIST_P192 = Function("mcl_fp_sqr_NIST_P192L" + suf, Void, py, px);
+		Operand dummy(IntPtr, unit);
+		mcl_fp_sqr_NIST_P192 = Function("mcl_fp_sqr_NIST_P192L" + suf, Void, py, px, dummy);
 		verifyAndSetPrivate(mcl_fp_sqr_NIST_P192);
 		beginFunc(mcl_fp_sqr_NIST_P192);
 		Operand buf = alloca_(unit, 192 * 2 / unit);
 		// QQQ define later
 		Function mcl_fpDbl_sqrPre("mcl_fpDbl_sqrPre" + cybozu::itoa(192 / unit) + "L" + suf, Void, buf, px);
 		call(mcl_fpDbl_sqrPre, buf, px);
-		call(mcl_fpDbl_mod_NIST_P192, py, buf);
+		call(mcl_fpDbl_mod_NIST_P192, py, buf, buf/*dummy*/);
 		ret(Void);
 		endFunc();
 	}
@@ -350,14 +353,15 @@ struct Code : public mcl::Generator {
 		Operand pz(IntPtr, unit);
 		Operand px(IntPtr, unit);
 		Operand py(IntPtr, unit);
-		Function f("mcl_fp_mulNIST_P192L" + suf, Void, pz, px, py);
+		Operand dummy(IntPtr, unit);
+		Function f("mcl_fp_mulNIST_P192L" + suf, Void, pz, px, py, dummy);
 		verifyAndSetPrivate(f);
 		beginFunc(f);
 		Operand buf = alloca_(unit, 192 * 2 / unit);
 		// QQQ define later
 		Function mcl_fpDbl_mulPre("mcl_fpDbl_mulPre" + cybozu::itoa(192 / unit) + "L" + suf, Void, buf, px, py);
 		call(mcl_fpDbl_mulPre, buf, px, py);
-		call(mcl_fpDbl_mod_NIST_P192, pz, buf);
+		call(mcl_fpDbl_mod_NIST_P192, pz, buf, buf/*dummy*/);
 		ret(Void);
 		endFunc();
 	}

From b23b7e1088ac16d9154892f0b729a07410cc76d6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 13 Apr 2020 12:09:51 +0900
Subject: [PATCH 190/553] [wasm] add __multi3

---
 include/mcl/bn.h |  2 +-
 src/gen.cpp      | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 96e6b817..b2211b72 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -127,7 +127,7 @@ enum {
 };
 
 // return 0xABC which means A.BC
-MCLBN_DLL_API int mclBn_getVersion();
+MCLBN_DLL_API int mclBn_getVersion(void);
 /*
 	init library
 	@param curve [in] type of bn curve
diff --git a/src/gen.cpp b/src/gen.cpp
index 2f9ae194..3edcfc59 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -120,10 +120,27 @@ struct Code : public mcl::Generator {
 		ac = shl(zext(ac, 128), 32);
 		z = add(ac, ad);
 	}
+	void gen_multi3()
+	{
+		resetGlobalIdx();
+		Operand z(Int, unit2);
+		Operand x(Int, unit);
+		Operand y(Int, unit);
+		std::string name = "__multi3";
+		Function f(name, z, x, y);
+//		f.setPrivate();
+		verifyAndSetPrivate(f);
+		beginFunc(f);
+
+		gen_mul64x64(z, x, y);
+		ret(z);
+		endFunc();
+	}
 	void gen_mulUU()
 	{
 		if (wasm) {
 			gen_mul32x32();
+			gen_multi3();
 		}
 		resetGlobalIdx();
 		Operand z(Int, unit2);

From d3f19c82cca60d3654d14ba73641af1ef70f1d64 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 14 Apr 2020 16:36:09 +0900
Subject: [PATCH 191/553] add mapto benchmark

---
 test/mapto_wb19_test.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 30a166b2..5e12290b 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -6,6 +6,7 @@
 #include <fstream>
 #include <cybozu/atoi.hpp>
 #include <cybozu/file.hpp>
+#include <cybozu/benchmark.hpp>
 
 using namespace mcl;
 using namespace mcl::bn;
@@ -437,6 +438,7 @@ void py_eccTest(const T& mapto)
 		toJacobi(P11, P11);
 		CYBOZU_TEST_EQUAL(P1, P11);
 		py_ecc_hash_to_G2(mapto, P1, msg, msgSize, dst, dstSize);
+		CYBOZU_BENCH_C("py_ecc_hash_to_G2", 1000, py_ecc_hash_to_G2, mapto, P1, msg, msgSize, dst, dstSize);
 		CYBOZU_TEST_EQUAL(P1, P11);
 		ethMsgToG2(P1, msg, msgSize, dst, dstSize);
 		CYBOZU_TEST_EQUAL(P1, P11);
@@ -862,6 +864,7 @@ void testHashToFp2v6(const T& mapto)
 	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);
 	G2 P;
 	mcl::bn::hashAndMapToG2(P, "asdf", 4);
+	CYBOZU_BENCH_C("draft06 hashAndMapToG2", 1000, mcl::bn::hashAndMapToG2, P, "asdf", 4);
 	P.normalize();
 	printf("P=%s %s\n", P.x.getStr(10).c_str(), P.y.getStr(10).c_str());
 }

From 90a594a8020b0788ab4233315c1daa5da832368c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 14 Apr 2020 16:37:04 +0900
Subject: [PATCH 192/553] use Fp::isNegative()

---
 include/mcl/mapto_wb19.hpp | 8 +++-----
 test/mapto_wb19_test.cpp   | 3 ++-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 07f5dfd3..f6ceb812 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -72,7 +72,6 @@ template<class F> int PointT<F>::specialA_;
 template<class Fp, class Fp2, class G2>
 struct MapToG2_WB19 {
 	typedef local::PointT<Fp2> Point;
-	Fp half;
 	mpz_class sqrtConst; // (p^2 - 9) / 16
 	Fp2 Ep_a;
 	Fp2 Ep_b;
@@ -98,8 +97,6 @@ struct MapToG2_WB19 {
 		Point::b_.a = 4;
 		Point::b_.b = 4;
 		Point::specialA_ = ec::Zero;
-		half = -1;
-		half /= 2;
 		sqrtConst = Fp::getOp().mp;
 		sqrtConst *= sqrtConst;
 		sqrtConst -= 9;
@@ -239,9 +236,10 @@ struct MapToG2_WB19 {
 	}
 	bool isNegSign(const Fp2& x) const
 	{
-		if (x.b > half) return true;
+		// x.isNegative() <=> x > (p-1)/2 <=> x >= (p+1)/2
+		if (x.b.isNegative()) return true;
 		if (!x.b.isZero()) return false;
-		if (x.a > half) return true;
+		if (x.a.isNegative()) return true;
 		if (!x.b.isZero()) return false;
 		return false;
 	}
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 5e12290b..33212335 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -453,7 +453,8 @@ void py_eccTest(const T& mapto)
 template<class T>
 void testSign(const T& mapto)
 {
-	const Fp& H = mapto.half;
+	Fp H = -1;
+	H /= 2;
 	const size_t N = 4;
 	const Fp tbl[N] = { 0, 1, H, H + 1 };
 	const int expect[N][N] = {

From 753e322c8060e5881ad3498813ebb7fdc8195a62 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 14 Apr 2020 16:37:55 +0900
Subject: [PATCH 193/553] remove unnecessary test

---
 include/mcl/mapto_wb19.hpp |  1 -
 test/mapto_wb19_test.cpp   | 17 -----------------
 2 files changed, 18 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index f6ceb812..612d4676 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -421,7 +421,6 @@ struct MapToG2_WB19 {
 		}
 		iso3(P, Pp);
 		clear_h2(P, P);
-		// if (t2 && !ec::isValidJacobi(P)) { puts("QQQ"); }
 	}
 	// hash-to-curve-06
 	void hashToFp2v6(Fp2 out[2], const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 33212335..65117f33 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -663,20 +663,6 @@ void ethFp2ToG2test()
         "0xb9a2f39af0cc3264348ed00845545e2ccbed59ea541c726c8429871f9a0917fb4f7e049ac739065eea8354a2d1b2d21",
 		"0xc8810a06deb536d70531352bd2a3aac7496e187a8fc102d800c5f8ed839bd64d7102197aeb2b6164d20ff920ff63afe",
 	};
-	const PointStr t1t1s = {
-		{
-			"0x13ea937301cfb2a071a265b08e176854034c2e2ae49898e89c042bff176a1be7bf02dfda06f67d38819ca334218b9ff4",
-			"0x180ee537c06213034c842cad3b5a6d0053473e8bb92dd4c5826e59a45268cda3fe28814b1e9f3a58b9db657d9c24a0bd",
-		},
-		{
-			"0x13f4530154b75ce311849e775242b5e791058fd8e1d7df292b8e936e8be05e1cd9fa6eed6280357393d54adf3af0eb9c",
-			"0x10619dc087132cf699b02c905284c3449e80c295c8140345e45e21b7389c8f2cf7b5e223ef87f11f57eb1e689f6c141a",
-		},
-		{
-			"0x40f98938abaece4e47427371b3b6c500f9cdacae9d8b4da79ba9107720bd038057a4cc8ec8427d651760fd795d2415",
-			"0xac9cd43c4ba29f20ed5dd2aa4a634b39810e756313b4826f225efddfb1ae43185ac4f279e628731030e87405a965bf5",
-		},
-	};
 	const PointStr t1t2s = {
 		{
 			"0x126b4982298792ed049850bb92b55d26c33a8e3139f9ca1a20821496c7396ce5ad9042b0da529e60ec9c3ff8e983befe",
@@ -698,9 +684,6 @@ void ethFp2ToG2test()
 	set(P1, t1t2s);
 	ethFp2ToG2(P2, t1, &t2);
 	CYBOZU_TEST_EQUAL(P1, P2);
-	set(P1, t1t1s);
-	ethFp2ToG2(P2, t1, &t1);
-	CYBOZU_TEST_EQUAL(P1, P2);
 }
 
 void testVec(const char *file)

From 59418d662111caeae715eecdab3212e45df28d96 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 14 Apr 2020 16:38:09 +0900
Subject: [PATCH 194/553] use mulByCofactorBLS12fast

---
 include/mcl/bn.hpp         | 14 ++++++++++++++
 include/mcl/mapto_wb19.hpp | 23 +++++++++++++----------
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 05d1b6cf..dbea7f68 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -11,6 +11,13 @@
 #include <mcl/fp_tower.hpp>
 #include <mcl/ec.hpp>
 #include <mcl/curve_type.h>
+namespace mcl { namespace local {
+
+// to export fast cofactor multiplication to mapto_wb19
+template<class T>
+void mulByCofactorBLS12fast(T& Q, const T& P);
+
+} } // mcl::local
 #include <mcl/mapto_wb19.hpp>
 #include <assert.h>
 #ifndef CYBOZU_DONT_USE_EXCEPTION
@@ -2255,4 +2262,11 @@ inline bool ethMsgToG2(G2& out, const void *msg, size_t msgSize, const void *dst
 
 } } // mcl::bn
 
+namespace mcl { namespace local {
+template<>
+inline void mulByCofactorBLS12fast(mcl::MCL_NAMESPACE_BN::G2& Q, const mcl::MCL_NAMESPACE_BN::G2& P)
+{
+	mcl::MCL_NAMESPACE_BN::BN::param.mapTo.mulByCofactorBLS12fast(Q, P);
+}
+} } // mcl::local
 #endif
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 612d4676..8ea55421 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -327,6 +327,7 @@ struct MapToG2_WB19 {
 		}
 		assert(0);
 	}
+#if 0
 	void h2_chain(G2& out, const G2& P) const
 	{
 		G2 t[16];
@@ -387,19 +388,21 @@ struct MapToG2_WB19 {
 		}
 		Q = T;
 	}
+#endif
 	void clear_h2(G2& Q, const G2& P) const
 	{
-#if 0
-		bn::param.mapTo.mulByCofactorBLS12fast(Q, P);
+#if 1
+		// 1.9Mclk can be reduced
+		mcl::local::mulByCofactorBLS12fast(Q, P);
 #else
-		G2 work, work2;
-		h2_chain(work, P);
-		G2::dbl(work2, work);
-		G2::add(work2, work, work2);
-		mx_chain(work, work2);
-		mx_chain(work, work);
-		G2::neg(work2, work2);
-		G2::add(Q, work, work2);
+		G2 T0, T1;
+		h2_chain(T0, P);
+		G2::dbl(T1, T0);
+		G2::add(T1, T0, T1);
+		mx_chain(T0, T1);
+		mx_chain(T0, T0);
+		G2::neg(T1, T1);
+		G2::add(Q, T0, T1);
 #endif
 	}
 	template<class T>

From bd66a8b07b70e3422029a353f5fd3a99bc5e59c6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 15 Apr 2020 16:45:56 +0900
Subject: [PATCH 195/553] remove stack-protector option

---
 common.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common.mk b/common.mk
index 9ea2c0d6..193e3934 100644
--- a/common.mk
+++ b/common.mk
@@ -66,7 +66,7 @@ ifeq ($(DEBUG),1)
     LDFLAGS+=-fsanitize=address
   endif
 else
-  CFLAGS_OPT+=-fomit-frame-pointer -DNDEBUG
+  CFLAGS_OPT+=-fomit-frame-pointer -DNDEBUG -fno-stack-protector
   ifeq ($(CXX),clang++)
     CFLAGS_OPT+=-O3
   else

From 74342272a3d310f2645217042cb018dfc967a001 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 15 Apr 2020 17:26:36 +0900
Subject: [PATCH 196/553] 4% speedup by removing unnecessary cstr

---
 include/mcl/fp_tower.hpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index dbbfacd3..48020380 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -16,16 +16,6 @@ class FpDblT : public fp::Serializable<FpDblT<Fp> > {
 	Unit v_[Fp::maxSize * 2];
 public:
 	static size_t getUnitSize() { return Fp::op_.N * 2; }
-	FpDblT() : v_()
-	{
-	}
-	FpDblT(const FpDblT& rhs)
-	{
-		const size_t n = getUnitSize();
-		for (size_t i = 0; i < n; i++) {
-			v_[i] = rhs.v_[i];
-		}
-	}
 	void dump() const
 	{
 		const size_t n = getUnitSize();

From 3b8caf72bcc4c427f1cfc4d608d792d487708645 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 16 Apr 2020 12:40:35 +0900
Subject: [PATCH 197/553] support llvm-9 or 10 but the generated code is slow

---
 Makefile         |  6 +++---
 src/gen.cpp      |  9 ++++-----
 src/llvm_gen.hpp | 25 +++++++++++++++++--------
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/Makefile b/Makefile
index bea4fbcf..29bc0ad5 100644
--- a/Makefile
+++ b/Makefile
@@ -68,13 +68,13 @@ all: $(MCL_LIB) $(MCL_SLIB) $(BN256_LIB) $(BN256_SLIB) $(BN384_LIB) $(BN384_SLIB
 #LLVM_VER=-3.8
 LLVM_LLC=llc$(LLVM_VER)
 LLVM_OPT=opt$(LLVM_VER)
-LLVM_OPT_VERSION=$(shell $(LLVM_OPT) --version 2>/dev/null | awk '/version/ {print $$3}')
+LLVM_OPT_VERSION=$(shell $(LLVM_OPT) --version 2>/dev/null | awk '/version/ { split($$3,a,"."); print a[1]}')
 GEN_EXE=src/gen
 GEN_EXE_OPT=-u $(BIT)
 # incompatibility between llvm 3.4 and the later version
 ifneq ($(LLVM_OPT_VERSION),)
-ifeq ($(shell expr $(LLVM_OPT_VERSION) \< 3.5.0),1)
-  GEN_EXE_OPT+=-old
+ifeq ($(shell expr $(LLVM_OPT_VERSION) \>= 9),1)
+  GEN_EXE_OPT+=-ver 0x90
 endif
 endif
 ifeq ($(OS),mac)
diff --git a/src/gen.cpp b/src/gen.cpp
index 3edcfc59..e60d404d 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -982,13 +982,13 @@ int main(int argc, char *argv[])
 	try
 {
 	uint32_t unit;
-	bool oldLLVM;
+	int llvmVer;
 	bool wasm;
 	std::string suf;
 	std::string privateFile;
 	cybozu::Option opt;
 	opt.appendOpt(&unit, uint32_t(sizeof(void*)) * 8, "u", ": unit");
-	opt.appendBoolOpt(&oldLLVM, "old", ": old LLVM(before 3.8)");
+	opt.appendOpt(&llvmVer, 0x70, "ver", ": llvm version");
 	opt.appendBoolOpt(&wasm, "wasm", ": for wasm");
 	opt.appendOpt(&suf, "", "s", ": suffix of function name");
 	opt.appendOpt(&privateFile, "", "f", ": private function list file");
@@ -1006,9 +1006,8 @@ int main(int argc, char *argv[])
 		}
 	}
 	Code c;
-	if (oldLLVM) {
-		c.setOldLLVM();
-	}
+	fprintf(stderr, "llvmVer=0x%02x\n", llvmVer);
+	c.setLlvmVer(llvmVer);
 	c.wasm = wasm;
 	c.setUnit(unit);
 	uint32_t maxBitSize = MCL_MAX_BIT_SIZE;
diff --git a/src/llvm_gen.hpp b/src/llvm_gen.hpp
index e60b8e9a..a0acd604 100644
--- a/src/llvm_gen.hpp
+++ b/src/llvm_gen.hpp
@@ -46,16 +46,14 @@ struct File {
 template<size_t dummy=0>
 struct Param {
 	static File f;
+	static int llvmVer;
 };
 
 template<size_t dummy>
 File Param<dummy>::f;
 
-bool isOldLLVM = false;
-
 } // mcl::impl
 
-inline bool isOldLLVM() { return impl::isOldLLVM; }
 
 struct Generator {
 	static const uint8_t None = 0;
@@ -63,7 +61,14 @@ struct Generator {
 	static const uint8_t Imm = 2;
 	static const uint8_t Ptr = 1 << 7;
 	static const uint8_t IntPtr = Int | Ptr;
-	void setOldLLVM() { impl::isOldLLVM = true; }
+	int llvmVer;
+	void setOldLLVM() { llvmVer = 0x37; }
+	static const int V8 = 0x90;
+	bool isNewer(int ver) const
+	{
+		return llvmVer > ver;
+	}
+	void setLlvmVer(int ver) { llvmVer = ver; }
 	struct Type {
 		uint8_t type;
 		bool isPtr;
@@ -474,9 +479,11 @@ inline Generator::Eval Generator::getelementptr(const Generator::Operand& p, con
 	Eval e;
 	e.op = p;
 	e.s = "getelementptr ";
-	if (!isOldLLVM()) {
-		e.s += "i" + cybozu::itoa(p.bit) + ", ";
+	const std::string bit = cybozu::itoa(p.bit);
+	if (isNewer(V8)) {
+		e.s += " inbounds " + bit + ", ";
 	}
+	e.s += "i" + bit + ", ";
 	e.s += p.toStr() + ", " + i.toStr();
 	return e;
 }
@@ -493,9 +500,11 @@ inline Generator::Eval Generator::load(const Generator::Operand& p)
 	e.op = p;
 	e.op.type.isPtr = false;
 	e.s = "load ";
-	if (!isOldLLVM()) {
-		e.s += "i" + cybozu::itoa(p.bit) + ", ";
+	const std::string bit = cybozu::itoa(p.bit);
+	if (isNewer(V8)) {
+		e.s += "i" + bit + ", ";
 	}
+	e.s += "i" + bit + ", ";
 	e.s += p.toStr();
 	return e;
 }

From bff17bffc7b802beef64377ad624cd7760e6a41f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 16 Apr 2020 14:11:27 +0900
Subject: [PATCH 198/553] how to profile by perf/vtune

---
 readme.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/readme.md b/readme.md
index f755abfb..ea4233ad 100644
--- a/readme.md
+++ b/readme.md
@@ -82,6 +82,20 @@ make MCL_USE_GMP=0
 ```
 Define `MCL_USE_VINT` if using C++ header files.
 
+## How to profile on Linux
+
+### Use perf
+```
+make MCL_USE_PROF=1 bin/bls12_test.exe
+env MCL_PROF=1 bin/bls12_test.exe
+```
+
+### Use Intel VTune profiler
+Supporse VTune is installed in `/opt/intel/vtune_amplifier/`.
+```
+make MCL_USE_PROF=2 bin/bls12_test.exe
+env MCL_PROF=2 bin/bls12_test.exe
+```
 
 ## How to build on 32-bit x86 Linux
 

From 03a9c3f486f2e168cfc803105b3e5e536c7a2e8e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 2 May 2020 11:07:24 +0900
Subject: [PATCH 199/553] add link to GitHub Sponsor

---
 readme.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index ea4233ad..038219d6 100644
--- a/readme.md
+++ b/readme.md
@@ -334,4 +334,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # Author
 
-光成滋生 MITSUNARI Shigeo(herumi@nifty.com)
+MITSUNARI Shigeo(herumi@nifty.com)
+
+# Sponsors welcome
+[GitHub Sponsor](https://github.com/sponsors/herumi)

From 95e80e3c7b3d3ba0e56ae4d8fdc75f80318fab6a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 6 May 2020 18:14:01 +0900
Subject: [PATCH 200/553] remove warning of vc

---
 include/mcl/util.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/util.hpp b/include/mcl/util.hpp
index b33b10c3..0b5a7296 100644
--- a/include/mcl/util.hpp
+++ b/include/mcl/util.hpp
@@ -286,7 +286,7 @@ void powGeneric(G& out, const G& x, const T *y, size_t n, const Mul& mul, const
 		out = x;
 	}
 	for (int i = (int)n - 1; i >= 0; i--) {
-		T v = y[i];
+		v = y[i];
 		for (int j = m - 2; j >= 0; j -= 2) {
 			sqr(out, out);
 			sqr(out, out);

From 2c24950b039fc8327bca1b0e16acbfb5e58e7f7a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 8 May 2020 17:37:38 +0900
Subject: [PATCH 201/553] Z_pad is static constant

---
 src/fp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index aab8e1e2..cc39188e 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -165,7 +165,7 @@ void expand_message_xmd(uint8_t out[256], const void *msg, size_t msgSize, const
 	const size_t mdSize = 32;
 	const size_t r_in_bytes = 64;
 	const size_t ell = len_in_bytes / mdSize;
-	uint8_t Z_pad[r_in_bytes] = {};
+	static const uint8_t Z_pad[r_in_bytes] = {};
 	assert(dstSize < 256);
 	// BE(len_in_bytes, 2) + BE(0, 1) + BE(dstSize, 1)
 	uint8_t buf[2 + 1 + 1] = { 1, 0, 0, uint8_t(dstSize) };

From 336c69d799f3d76d80e0e82977c1498631d6ec72 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 10 May 2020 16:54:19 +0900
Subject: [PATCH 202/553] expand_message_xmd for draft-07

---
 include/mcl/fp.hpp         |  3 +++
 include/mcl/mapto_wb19.hpp |  2 +-
 src/fp.cpp                 | 44 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 0564d980..5de0dfbf 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -77,8 +77,11 @@ void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t salt
 void hkdf_extract(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize);
 void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6]);
 
+// draft-07
 void expand_message_xmd(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize);
 
+void expand_message_xmd06(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize);
+
 namespace local {
 
 inline void byteSwap(void *x, size_t n)
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 8ea55421..8eff6ba3 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -429,7 +429,7 @@ struct MapToG2_WB19 {
 	void hashToFp2v6(Fp2 out[2], const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
 		uint8_t md[256];
-		mcl::fp::expand_message_xmd(md, msg, msgSize, dst, dstSize);
+		mcl::fp::expand_message_xmd06(md, msg, msgSize, dst, dstSize);
 		Fp *x = out[0].getFp0();
 		for (size_t i = 0; i < 4; i++) {
 			uint8_t *p = &md[64 * i];
diff --git a/src/fp.cpp b/src/fp.cpp
index cc39188e..f9307acd 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -159,7 +159,7 @@ void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6])
 	cybozu::hmac256(out + 32, prk, 32, out, 32 + 6);
 }
 
-void expand_message_xmd(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize)
+void expand_message_xmd06(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize)
 {
 	const size_t len_in_bytes = 256;
 	const size_t mdSize = 32;
@@ -194,6 +194,48 @@ void expand_message_xmd(uint8_t out[256], const void *msg, size_t msgSize, const
 	}
 }
 
+void expand_message_xmd(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize)
+{
+	const size_t len_in_bytes = 256;
+	const size_t mdSize = 32;
+	const size_t r_in_bytes = 64;
+	const size_t ell = len_in_bytes / mdSize;
+	static const uint8_t Z_pad[r_in_bytes] = {};
+	assert(dstSize < 256);
+	/*
+		Z_apd | msg | BE(len_in_bytes, 2) | BE(0, 1) | DST | BE(dstSize, 1)
+	*/
+	static const uint8_t lenBuf[2] = { 1, 0 }; // 256 = len_in_bytes
+	uint8_t iBuf = 0;
+	uint8_t dstSizeBuf = uint8_t(dstSize);
+	cybozu::Sha256 h;
+	h.update(Z_pad, r_in_bytes);
+	h.update(msg, msgSize);
+	h.update(lenBuf, sizeof(lenBuf));
+	h.update(&iBuf, 1);
+	h.update(dst, dstSize);
+	uint8_t md[mdSize];
+	h.digest(md, mdSize, &dstSizeBuf, 1);
+	h.clear();
+	h.update(md, mdSize);
+	iBuf = 1;
+	h.update(&iBuf, 1);
+	h.update(dst, dstSize);
+	h.digest(out, mdSize, &dstSizeBuf, 1);
+	uint8_t mdXor[mdSize];
+	for (size_t i = 1; i < ell; i++) {
+		h.clear();
+		for (size_t j = 0; j < mdSize; j++) {
+			mdXor[j] = md[j] ^ out[mdSize * (i - 1) + j];
+		}
+		h.update(mdXor, mdSize);
+		iBuf = uint8_t(i + 1);
+		h.update(&iBuf, 1);
+		h.update(dst, dstSize);
+		h.digest(out + mdSize * i, mdSize, &dstSizeBuf, 1);
+	}
+}
+
 
 #ifndef MCL_USE_VINT
 static inline void set_mpz_t(mpz_t& z, const Unit* p, int n)

From f1313a683182989bca3acf52f960ea6def82165a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 10 May 2020 18:07:36 +0900
Subject: [PATCH 203/553] add test of draft-07

---
 include/mcl/curve_type.h |  3 ++-
 test/mapto_wb19_test.cpp | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index c0eb8226..dae261c6 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -48,7 +48,8 @@ enum {
 	MCL_MAP_TO_MODE_WB19, // used in new eth2.0 spec
 	MCL_MAP_TO_MODE_HASH_TO_CURVE_05 = MCL_MAP_TO_MODE_WB19, // draft-irtf-cfrg-hash-to-curve-05
 	MCL_MAP_TO_MODE_HASH_TO_CURVE_06, // draft-irtf-cfrg-hash-to-curve-06
-	MCL_MAP_TO_MODE_HASH_TO_CURVE = MCL_MAP_TO_MODE_HASH_TO_CURVE_06 // the latset version
+	MCL_MAP_TO_MODE_HASH_TO_CURVE_07, // draft-irtf-cfrg-hash-to-curve-07
+	MCL_MAP_TO_MODE_HASH_TO_CURVE = MCL_MAP_TO_MODE_HASH_TO_CURVE_07 // the latset version
 };
 
 #ifdef __cplusplus
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 65117f33..debdab71 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -853,6 +853,27 @@ void testHashToFp2v6(const T& mapto)
 	printf("P=%s %s\n", P.x.getStr(10).c_str(), P.y.getStr(10).c_str());
 }
 
+template<class T>
+void testHashToFp2v7(const T&/* mapto*/)
+{
+	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_07);
+	{
+		char msg[] = "asdf";
+		char dst[] = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
+		/*
+			https://github.com:cfrg/draft-irtf-cfrg-hash-to-curve
+			tag: draft-irtf-cfrg-hash-to-curve-07
+			the return value of expand_message_xmd in hash_to_field.py
+		*/
+		char expect[] = "ca53fcd6f140590d19138f38819eb13330c014a1670e40f0f8e991de7b35e21a1fca52a14486c8e8acc9d865718cd41fe3638c2fb50fdc75b95690dc58f86494005fb37fc330366a7fef5f6e26bb631f4a5462affab2b9a9630c3b1c63621875baf782dd435500fda05ba7a9e86a766eeffe259128dc6e43c1852c58034856c4c4e2158c3414a881c17b727be5400432bf5c0cd02066a3b763e25e3ca32f19ca69a807bbc14c7c8c7988915fb1df523c536f744aa8b9bd0bbcea9800a236355690a4765491cd8969ca2f8cac8b021d97306e6ce6a2126b2868cf57f59f5fc416385bc1c2ae396c62608adc6b9174bbdb981a4601c3bd81bbe086e385d9a909aa";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[256];
+		mcl::fp::expand_message_xmd(md, msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
+}
+
 CYBOZU_TEST_AUTO(test)
 {
 	initPairing(mcl::BLS12_381);
@@ -873,4 +894,5 @@ CYBOZU_TEST_AUTO(test)
 	testVec("../misc/mapto/misc.txt");
 	ethMsgToG2testAll("../bls_sigs_ref/test-vectors/hash_g2/");
 	testHashToFp2v6(mapto);
+	testHashToFp2v7(mapto);
 }

From 63cb8d0d24950b1500a7337067ac7c16e3cae7d1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 13 May 2020 13:54:48 +0900
Subject: [PATCH 204/553] add sgn0 of draft-07

---
 include/mcl/mapto_wb19.hpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 8eff6ba3..306ff68f 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -234,8 +234,22 @@ struct MapToG2_WB19 {
 		Fp::neg(y.b, y.b);
 		y.a = t;
 	}
+	bool sgn0(const Fp& x) const
+	{
+		return x.isOdd();
+	}
+	bool sgn0(const Fp2& x) const
+	{
+		bool sign0 = sgn0(x.a);
+		bool zero0 = x.a.isZero();
+		bool sign1 = sgn0(x.b);
+		return sign0 || (zero0 & sign1);
+	}
 	bool isNegSign(const Fp2& x) const
 	{
+		if (draftVersion_ == 7) {
+			return sgn0(x);
+		}
 		// x.isNegative() <=> x > (p-1)/2 <=> x >= (p+1)/2
 		if (x.b.isNegative()) return true;
 		if (!x.b.isZero()) return false;

From 711ae4207263d52724abc324b44fa1414e3e165e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 13 May 2020 14:11:50 +0900
Subject: [PATCH 205/553] rename old hashTo function

---
 include/mcl/bn.hpp         |  2 +-
 include/mcl/mapto_wb19.hpp | 10 +++++-----
 test/mapto_wb19_test.cpp   |  6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index dbea7f68..e4081378 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -2242,7 +2242,7 @@ inline const Fr& getG2cofactorAdjInv()
 inline bool ethMsgToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
 {
 	if (!BN::param.isBLS12) return false;
-	hashToFp2(out, msg, msgSize, ctr, dst, dstSize);
+	hashToFp2old(out, msg, msgSize, ctr, dst, dstSize);
 	return true;
 }
 
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 306ff68f..b53478bc 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -11,7 +11,7 @@ namespace mcl {
 
 // ctr = 0 or 1 or 2
 template<class Fp2>
-inline void hashToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
+inline void hashToFp2old(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
 {
 	const bool addZeroByte = true; // append zero byte to msg
 	assert(ctr <= 2);
@@ -440,7 +440,7 @@ struct MapToG2_WB19 {
 		clear_h2(P, P);
 	}
 	// hash-to-curve-06
-	void hashToFp2v6(Fp2 out[2], const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
+	void hashToFp2(Fp2 out[2], const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
 		uint8_t md[256];
 		mcl::fp::expand_message_xmd06(md, msg, msgSize, dst, dstSize);
@@ -457,10 +457,10 @@ struct MapToG2_WB19 {
 	{
 		Fp2 t[2];
 		if (draftVersion_ == 5) {
-			hashToFp2(t[0], msg, msgSize, 0, dst, dstSize);
-			hashToFp2(t[1], msg, msgSize, 1, dst, dstSize);
+			hashToFp2old(t[0], msg, msgSize, 0, dst, dstSize);
+			hashToFp2old(t[1], msg, msgSize, 1, dst, dstSize);
 		} else {
-			hashToFp2v6(t, msg, msgSize, dst, dstSize);
+			hashToFp2(t, msg, msgSize, dst, dstSize);
 		}
 		opt_swu2_map(out, t[0], &t[1]);
 	}
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index debdab71..1278a040 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -234,8 +234,8 @@ void toJacobi(G2& out, const G2& in)
 void py_ecc_hash_to_G2(const MapTo& mapto, G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
 {
 	Fp2 t1, t2;
-	hashToFp2(t1, msg, msgSize, 0, dst, dstSize);
-	hashToFp2(t2, msg, msgSize, 1, dst, dstSize);
+	hashToFp2old(t1, msg, msgSize, 0, dst, dstSize);
+	hashToFp2old(t2, msg, msgSize, 1, dst, dstSize);
 	G2 P1, P2;
 	py_ecc_map_to_curve_G2(mapto, P1, t1);
 	py_ecc_map_to_curve_G2(mapto, P2, t2);
@@ -817,7 +817,7 @@ void testHashToFp2v6(const T& mapto)
 		const char *dst = tbl[i].dst;
 		const Fp2Str *expectStr = tbl[i].s;
 		Fp2 out[2];
-		mapto.hashToFp2v6(out, msg, strlen(msg), dst, strlen(dst));
+		mapto.hashToFp2(out, msg, strlen(msg), dst, strlen(dst));
 		Fp2 expect[2];
 		for (int j = 0; j < 2; j++) {
 			set(expect[j], expectStr[j]);

From eb5f51a9af49f174cab429e4a0bc346183ae82c6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 13 May 2020 14:20:37 +0900
Subject: [PATCH 206/553] hashToFp2 supports draft-07

---
 include/mcl/bn.hpp         | 5 +++++
 include/mcl/mapto_wb19.hpp | 6 +++++-
 test/mapto_wb19_test.cpp   | 2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index e4081378..b429d2de 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -514,6 +514,11 @@ struct MapTo {
 			mapToG2_WB19_.setDraftVersion(6);
 			return true;
 			break;
+		case MCL_MAP_TO_MODE_HASH_TO_CURVE_07:
+			mapToMode_ = mode;
+			mapToG2_WB19_.setDraftVersion(7);
+			return true;
+			break;
 		default:
 			return false;
 		}
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index b53478bc..45e31c85 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -443,7 +443,11 @@ struct MapToG2_WB19 {
 	void hashToFp2(Fp2 out[2], const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
 		uint8_t md[256];
-		mcl::fp::expand_message_xmd06(md, msg, msgSize, dst, dstSize);
+		if (draftVersion_ == 6) {
+			mcl::fp::expand_message_xmd06(md, msg, msgSize, dst, dstSize);
+		} else {
+			mcl::fp::expand_message_xmd(md, msg, msgSize, dst, dstSize);
+		}
 		Fp *x = out[0].getFp0();
 		for (size_t i = 0; i < 4; i++) {
 			uint8_t *p = &md[64 * i];
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 1278a040..72349429 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -812,6 +812,7 @@ void testHashToFp2v6(const T& mapto)
 			}
 		},
 	};
+	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		const char *msg = tbl[i].msg;
 		const char *dst = tbl[i].dst;
@@ -845,7 +846,6 @@ void testHashToFp2v6(const T& mapto)
 			CYBOZU_TEST_EQUAL(P.y, t);
 		}
 	}
-	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);
 	G2 P;
 	mcl::bn::hashAndMapToG2(P, "asdf", 4);
 	CYBOZU_BENCH_C("draft06 hashAndMapToG2", 1000, mcl::bn::hashAndMapToG2, P, "asdf", 4);

From d5f287bcc7050c8c79885c22043ef24a317f0fcc Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 13 May 2020 15:45:19 +0900
Subject: [PATCH 207/553] test of msgToG2 for draft-07

---
 test/mapto_wb19_test.cpp | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 72349429..13a3b3e5 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -854,9 +854,30 @@ void testHashToFp2v6(const T& mapto)
 }
 
 template<class T>
-void testHashToFp2v7(const T&/* mapto*/)
+void testHashToFp2v7(const T& mapto)
 {
 	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_07);
+	{
+		const char *msg = "asdf";
+		PointStr s = {
+			{
+				"2525875563870715639912451285996878827057943937903727288399283574780255586622124951113038778168766058972461529282986",
+				"3132482115871619853374334004070359337604487429071253737901486558733107203612153024147084489564256619439711974285977",
+			},
+			{
+				"2106640002084734620850657217129389007976098691731730501862206029008913488613958311385644530040820978748080676977912",
+				"2882649322619140307052211460282445786973517746532934590265600680988689024512167659295505342688129634612479405019290",
+			},
+			{
+				"1",
+				"0",
+			}
+		};
+		G2 P1, P2;
+		mapto.msgToG2(P1, msg, strlen(msg));
+		set(P2, s);
+		CYBOZU_TEST_EQUAL(P1, P2);
+	}
 	{
 		char msg[] = "asdf";
 		char dst[] = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";

From 19ece2524b1fc8db89117e1105155d8d71133d62 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 13 May 2020 16:24:49 +0900
Subject: [PATCH 208/553] add test of draft-07

---
 test/mapto_wb19_test.cpp | 65 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 13a3b3e5..192fa025 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -893,6 +893,71 @@ void testHashToFp2v7(const T& mapto)
 		mcl::fp::expand_message_xmd(md, msg, msgSize, dst, dstSize);
 		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
 	}
+	{
+		const char *dst = "BLS12381G2_XMD:SHA-256_SSWU_RO_TESTGEN";
+		size_t dstSize = strlen(dst);
+		const struct {
+			const char *msg;
+			Fp2Str x;
+			Fp2Str y;
+		} tbl[] = {
+			// fd12ba0 : https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve/poc/vectors/BLS12381G2_XMD:SHA-256_SSWU_RO_.json
+			{
+				"", // msg
+				{ // P.x
+					"0x0a650bd36ae7455cb3fe5d8bb1310594551456f5c6593aec9ee0c03d2f6cb693bd2c5e99d4e23cbaec767609314f51d3",
+					"0x0fbdae26f9f9586a46d4b0b70390d09064ef2afe5c99348438a3c7d9756471e015cb534204c1b6824617a85024c772dc",
+				},
+				{ // P.y
+					"0x0d8d49e7737d8f9fc5cef7c4b8817633103faf2613016cb86a1f3fc29968fe2413e232d9208d2d74a89bf7a48ac36f83",
+					"0x02e5cf8f9b7348428cc9e66b9a9b36fe45ba0b0a146290c3a68d92895b1af0e1f2d9f889fb412670ae8478d8abd4c5aa",
+				}
+			},
+			{
+				"abc",
+				{
+					"0x1953ce6d4267939c7360756d9cca8eb34aac4633ef35369a7dc249445069888e7d1b3f9d2e75fbd468fbcbba7110ea02",
+					"0x03578447618463deb106b60e609c6f7cc446dc6035f84a72801ba17c94cd800583b493b948eff0033f09086fdd7f6175",
+				},
+				{
+					"0x0882ab045b8fe4d7d557ebb59a63a35ac9f3d312581b509af0f8eaa2960cbc5e1e36bb969b6e22980b5cbdd0787fcf4e",
+					"0x0184d26779ae9d4670aca9b267dbd4d3b30443ad05b8546d36a195686e1ccc3a59194aea05ed5bce7c3144a29ec047c4",
+				},
+			},
+			{
+				"abcdef0123456789",
+				{
+					"0x17b461fc3b96a30c2408958cbfa5f5927b6063a8ad199d5ebf2d7cdeffa9c20c85487204804fab53f950b2f87db365aa",
+					"0x195fad48982e186ce3c5c82133aefc9b26d55979b6f530992a8849d4263ec5d57f7a181553c8799bcc83da44847bdc8d",
+				},
+				{
+					"0x174a3473a3af2d0302b9065e895ca4adba4ece6ce0b41148ba597001abb152f852dd9a96fb45c9de0a43d944746f833e",
+					"0x005cdf3d984e3391e7e969276fb4bc02323c5924a4449af167030d855acc2600cf3d4fab025432c6d868c79571a95bef",
+				},
+			},
+			{
+				"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+				{
+					"0x0a162306f3b0f2bb326f0c4fb0e1fea020019c3af796dcd1d7264f50ddae94cacf3cade74603834d44b9ab3d5d0a6c98",
+					"0x123b6bd9feeba26dd4ad00f8bfda2718c9700dc093ea5287d7711844644eb981848316d3f3f57d5d3a652c6cdc816aca",
+				},
+				{
+					"0x15c1d4f1a685bb63ee67ca1fd96155e3d091e852a684b78d085fd34f6091e5249ddddbdcf2e7ec82ce6c04c63647eeb7",
+					"0x05483f3b96d9252dd4fc0868344dfaf3c9d145e3387db23fa8e449304fab6a7b6ec9c15f05c0a1ea66ff0efcc03e001a",
+				},
+			},
+		};
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+			const char *msg = tbl[i].msg;
+			size_t msgSize = strlen(msg);
+			G2 P1, P2;
+			set(P1.x, tbl[i].x);
+			set(P1.y, tbl[i].y);
+			P1.z = 1;
+			mapto.map2curve_osswu2(P2, msg, msgSize, dst, dstSize);
+			CYBOZU_TEST_EQUAL(P1, P2);
+		}
+	}
 }
 
 CYBOZU_TEST_AUTO(test)

From cb4fea5d66a3518d6ecf6877e024b48cdb553490 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 13 May 2020 16:28:29 +0900
Subject: [PATCH 209/553] update link to test

---
 test/mapto_wb19_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 192fa025..b05e4b46 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -901,7 +901,7 @@ void testHashToFp2v7(const T& mapto)
 			Fp2Str x;
 			Fp2Str y;
 		} tbl[] = {
-			// fd12ba0 : https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve/poc/vectors/BLS12381G2_XMD:SHA-256_SSWU_RO_.json
+			// https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-07.html#name-bls12381g2_xmdsha-256_sswu_
 			{
 				"", // msg
 				{ // P.x

From 21dbb7ab8bad3526a0708b2ae94836d4da13ab98 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 13 May 2020 16:32:24 +0900
Subject: [PATCH 210/553] v1.08

---
 include/mcl/op.hpp       | 2 +-
 readme.md                | 3 +++
 test/mapto_wb19_test.cpp | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index f6e64e68..5e357003 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x107; /* 0xABC = A.BC */
+static const int version = 0x108; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index 038219d6..b6c782e4 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,8 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+- `MCL_MAP_TO_MODE_HASH_TO_CURVE_07` is added for [draft-07](https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-07.txt).
+  - The older version will be removed in the future.
 - change DST of hash-to-curve for `MCL_MAP_TO_MODE_HASH_TO_CURVE_06`.
 - add new hash-to-curve function of [draft-irtf-cfrg-hash-to-curve](https://cfrg.github.io/draft-irtf-cfrg-hash-to-curve/draft-irtf-cfrg-hash-to-curve.txt) at March 2020.
   - call `setETHmode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);`
@@ -310,6 +312,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2020/May/13 v1.08 support draft-irtf-cfrg-hash-to-curve-07
 - 2020/Mar/26 v1.07 change DST for hash-to-curve-06
 - 2020/Mar/15 v1.06 support hash-to-curve-06
 - 2020/Jan/31 v1.05 mclBn_ethMsgToFp2 has changed to append zero byte at the end of msg
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index b05e4b46..b7bfe634 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -850,7 +850,7 @@ void testHashToFp2v6(const T& mapto)
 	mcl::bn::hashAndMapToG2(P, "asdf", 4);
 	CYBOZU_BENCH_C("draft06 hashAndMapToG2", 1000, mcl::bn::hashAndMapToG2, P, "asdf", 4);
 	P.normalize();
-	printf("P=%s %s\n", P.x.getStr(10).c_str(), P.y.getStr(10).c_str());
+//	printf("P=%s %s\n", P.x.getStr(10).c_str(), P.y.getStr(10).c_str());
 }
 
 template<class T>

From 7b4eb83d5bf0940504bfe891f70335d41f5a6037 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 13 May 2020 20:14:03 +0900
Subject: [PATCH 211/553] hashAndMapToG2 support draft-07

---
 include/mcl/bn.hpp       | 4 ++--
 include/mcl/op.hpp       | 2 +-
 readme.md                | 2 +-
 test/mapto_wb19_test.cpp | 7 +++++++
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index b429d2de..1e526f5b 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -590,7 +590,7 @@ struct MapTo {
 	}
 	bool calc(G2& P, const Fp2& t, bool fast = false) const
 	{
-		if (mapToMode_ == MCL_MAP_TO_MODE_WB19 || mapToMode_ == MCL_MAP_TO_MODE_HASH_TO_CURVE_06) {
+		if (mapToMode_ == MCL_MAP_TO_MODE_WB19 || mapToMode_ >= MCL_MAP_TO_MODE_HASH_TO_CURVE_06) {
 			mapToG2_WB19_.opt_swu2_map(P, t);
 			return true;
 		}
@@ -2103,7 +2103,7 @@ inline void hashAndMapToG1(G1& P, const void *buf, size_t bufSize)
 inline void hashAndMapToG2(G2& P, const void *buf, size_t bufSize)
 {
 	int mode = getMapToMode();
-	if (mode == MCL_MAP_TO_MODE_WB19 || mode == MCL_MAP_TO_MODE_HASH_TO_CURVE_06) {
+	if (mode == MCL_MAP_TO_MODE_WB19 || mode >= MCL_MAP_TO_MODE_HASH_TO_CURVE_06) {
 		BN::param.mapTo.mapToG2_WB19_.msgToG2(P, buf, bufSize);
 		return;
 	}
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 5e357003..db1dd7c3 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x108; /* 0xABC = A.BC */
+static const int version = 0x109; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index b6c782e4..34ae2c6f 100644
--- a/readme.md
+++ b/readme.md
@@ -312,7 +312,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
-- 2020/May/13 v1.08 support draft-irtf-cfrg-hash-to-curve-07
+- 2020/May/13 v1.09 support draft-irtf-cfrg-hash-to-curve-07
 - 2020/Mar/26 v1.07 change DST for hash-to-curve-06
 - 2020/Mar/15 v1.06 support hash-to-curve-06
 - 2020/Jan/31 v1.05 mclBn_ethMsgToFp2 has changed to append zero byte at the end of msg
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index b7bfe634..d5393829 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -957,6 +957,13 @@ void testHashToFp2v7(const T& mapto)
 			mapto.map2curve_osswu2(P2, msg, msgSize, dst, dstSize);
 			CYBOZU_TEST_EQUAL(P1, P2);
 		}
+		{
+			G2 P;
+			mcl::bn::hashAndMapToG2(P, "asdf", 4);
+			CYBOZU_BENCH_C("draft07 hashAndMapToG2", 1000, mcl::bn::hashAndMapToG2, P, "asdf", 4);
+			P.normalize();
+			printf("P=%s %s\n", P.x.getStr(10).c_str(), P.y.getStr(10).c_str());
+		}
 	}
 }
 

From d51fd79c86954a443b1c7ce67d7bcdb8a63ddedb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 26 May 2020 11:10:56 +0900
Subject: [PATCH 212/553] setBigEndianMod

---
 include/mcl/bn.h               |  2 ++
 include/mcl/fp.hpp             | 37 ++++++++++++++++++++++++++++++++++
 include/mcl/impl/bn_c_impl.hpp |  9 ++++++++-
 include/mcl/mapto_wb19.hpp     |  7 ++-----
 4 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index b2211b72..905733db 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -317,9 +317,11 @@ MCLBN_DLL_API mclSize mclBnFp_getLittleEndian(void *buf, mclSize maxBufSize, con
 // set (buf mod r) to x
 // return 0 if bufSize <= (byte size of Fr * 2) else -1
 MCLBN_DLL_API int mclBnFr_setLittleEndianMod(mclBnFr *x, const void *buf, mclSize bufSize);
+MCLBN_DLL_API int mclBnFr_setBigEndianMod(mclBnFr *x, const void *buf, mclSize bufSize);
 // set (buf mod p) to x
 // return 0 if bufSize <= (byte size of Fp * 2) else -1
 MCLBN_DLL_API int mclBnFp_setLittleEndianMod(mclBnFp *x, const void *buf, mclSize bufSize);
+MCLBN_DLL_API int mclBnFp_setBigEndianMod(mclBnFp *x, const void *buf, mclSize bufSize);
 
 // return 1 if true and 0 otherwise
 MCLBN_DLL_API int mclBnFr_isValid(const mclBnFr *x);
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 5de0dfbf..96110a8b 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -408,6 +408,31 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 		}
 		return n;
 	}
+	/*
+		set (little endian % p)
+		error if bufSize > 64
+	*/
+	void setLittleEndianMod(bool *pb, const void *buf, size_t bufSize)
+	{
+		setArray(pb, (const char *)buf, bufSize, mcl::fp::Mod);
+	}
+	/*
+		set (big endian % p)
+		error if bufSize > 64
+	*/
+	void setBigEndianMod(bool *pb, const void *buf, size_t bufSize)
+	{
+		if (bufSize > 64) {
+			*pb = false;
+			return;
+		}
+		const uint8_t *p = (const uint8_t*)buf;
+		uint8_t swapBuf[64];
+		for (size_t i = 0; i < bufSize; i++) {
+			swapBuf[bufSize - 1 - i] = p[i];
+		}
+		setArray(pb, swapBuf, bufSize, mcl::fp::Mod);
+	}
 	void setByCSPRNG(bool *pb, fp::RandGen rg = fp::RandGen())
 	{
 		if (rg.isZero()) rg = fp::RandGen::get();
@@ -416,6 +441,18 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 		setArrayMask(v_, op_.N);
 	}
 #ifndef CYBOZU_DONT_USE_EXCEPTION
+	void setLittleEndianMod(const void *buf, size_t bufSize)
+	{
+		bool b;
+		setLittleEndianMod(&b, buf, bufSize, mcl::fp::Mod);
+		if (!b) throw cybozu::Exception("setLittleEndianMod");
+	}
+	void setBigEndianMod(const void *buf, size_t bufSize)
+	{
+		bool b;
+		setBigEndianMod(&b, buf, bufSize, mcl::fp::Mod);
+		if (!b) throw cybozu::Exception("setBigEndianMod");
+	}
 	void setByCSPRNG(fp::RandGen rg = fp::RandGen())
 	{
 		bool b;
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index f31380b0..d6018617 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -748,7 +748,14 @@ int mclBnFp_setLittleEndian(mclBnFp *x, const void *buf, mclSize bufSize)
 int mclBnFp_setLittleEndianMod(mclBnFp *x, const void *buf, mclSize bufSize)
 {
 	bool b;
-	cast(x)->setArray(&b, (const char *)buf, bufSize, mcl::fp::Mod);
+	cast(x)->setLittleEndianMod(&b, buf, bufSize);
+	return b ? 0 : -1;
+}
+
+int mclBnFp_setBigEndianMod(mclBnFp *x, const void *buf, mclSize bufSize)
+{
+	bool b;
+	cast(x)->setBigEndianMod(&b, buf, bufSize);
 	return b ? 0 : -1;
 }
 
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 45e31c85..6b353bc3 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -30,9 +30,8 @@ inline void hashToFp2old(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr,
 		info_pfx[4] = char(i + 1);
 		uint8_t t[64];
 		fp::hkdf_expand(t, msg_prime, info_pfx);
-		fp::local::byteSwap(t, 64);
 		bool b;
-		out.getFp0()[i].setArrayMod(&b, t, 64);
+		out.getFp0()[i].setBigEndianMod(&b, t, 64);
 		assert(b); (void)b;
 	}
 }
@@ -450,10 +449,8 @@ struct MapToG2_WB19 {
 		}
 		Fp *x = out[0].getFp0();
 		for (size_t i = 0; i < 4; i++) {
-			uint8_t *p = &md[64 * i];
-			fp::local::byteSwap(p, 64);
 			bool b;
-			x[i].setArrayMod(&b, p, 64);
+			x[i].setBigEndianMod(&b, &md[64 * i], 64);
 			assert(b); (void)b;
 		}
 	}

From 2ede71f70cf434fd648d1ccf7eabbb7d02e44618 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Jun 2020 14:02:24 +0900
Subject: [PATCH 213/553] refactor mulCT

---
 include/mcl/bn.hpp       | 26 +++++++-----
 include/mcl/ec.hpp       | 87 ++++++++++++++++++++++++++++++++++++----
 include/mcl/operator.hpp | 10 ++---
 test/common_test.hpp     | 15 +++++++
 4 files changed, 116 insertions(+), 22 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 1e526f5b..2927ec25 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -802,18 +802,26 @@ struct GLV2 {
 		}
 	}
 	template<class T>
-	void mul(T& Q, const T& P, mpz_class x, bool constTime = false) const
+	void mul(T& Q, const T& P, const mpz_class& x, bool constTime = false) const
 	{
-		(void)constTime;
-		mulVecNGLV(Q, &P, &x, 1);
+		mulVecNGLV(Q, &P, &x, 1, constTime);
 	}
 	template<class T>
-	size_t mulVecNGLV(T& z, const T *xVec, const mpz_class *yVec, size_t n) const
+	static void mulLambda(T& Q, const T& P)
 	{
+		Frobenius(Q, P);
+	}
+	template<class T>
+	size_t mulVecNGLV(T& z, const T *xVec, const mpz_class *yVec, size_t n, bool constTime) const
+	{
+		if (n == 1 && constTime) {
+			ec::local::mul1CT<GLV2<_Fr>, T, Fr, 4, 4>(*this, z, *xVec, *yVec);
+			return 1;
+		}
 		const mpz_class& r = Fr::getOp().mp;
 		const size_t N = mcl::fp::maxMulVecNGLV;
 		if (n > N) n = N;
-		const int w = 5;
+		const int w = 4;
 		const size_t tblSize = 1 << (w - 2);
 		const int splitN = 4;
 		NafArray naf[N][splitN];
@@ -1060,17 +1068,17 @@ inline void powArrayGLV2(Fp12& z, const Fp12& x, const mcl::fp::Unit *y, size_t
 	BN::param.glv2.pow(z, x, s, constTime);
 }
 
-inline size_t mulVecNGLV2(G2& z, const G2 *xVec, const mpz_class *yVec, size_t n)
+inline size_t mulVecNGLV2(G2& z, const G2 *xVec, const mpz_class *yVec, size_t n, bool constTime)
 {
-	return BN::param.glv2.mulVecNGLV(z, xVec, yVec, n);
+	return BN::param.glv2.mulVecNGLV(z, xVec, yVec, n, constTime);
 }
 
-inline size_t powVecNGLV2(Fp12& z, const Fp12 *xVec, const mpz_class *yVec, size_t n)
+inline size_t powVecNGLV2(Fp12& z, const Fp12 *xVec, const mpz_class *yVec, size_t n, bool constTime)
 {
 	typedef GroupMtoA<Fp12> AG; // as additive group
 	AG& _z = static_cast<AG&>(z);
 	const AG *_xVec = static_cast<const AG*>(xVec);
-	return BN::param.glv2.mulVecNGLV(_z, _xVec, yVec, n);
+	return BN::param.glv2.mulVecNGLV(_z, _xVec, yVec, n, constTime);
 }
 
 /*
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 20cc7ab7..36789938 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -9,6 +9,7 @@
 #include <stdlib.h>
 #include <mcl/fp.hpp>
 #include <mcl/ecparam.hpp>
+#include <mcl/window_method.hpp>
 
 #ifdef _MSC_VER
 	#pragma warning(push)
@@ -71,6 +72,76 @@ bool get_a_flag(const mcl::Fp2T<F>& x)
 	return get_a_flag(x.b); // x = a + bi
 }
 
+template<class GLV, class G, class F, int splitN, size_t w>
+void mul1CT(const GLV& glv, G& Q, const G& P, const mpz_class& x)
+{
+	const mpz_class& r = F::getOp().mp;
+	const size_t tblSize = 1 << w;
+	G tbl[splitN][tblSize];
+	bool negTbl[splitN];
+	mpz_class u[splitN];
+	mpz_class y = x % r;
+	if (y < 0) {
+		y += r;
+	}
+	glv.split(u, y);
+	for (int i = 0; i < splitN; i++) {
+		if (u[i] < 0) {
+			gmp::neg(u[i], u[i]);
+			negTbl[i] = true;
+		} else {
+			negTbl[i] = false;
+		}
+		tbl[i][0].clear();
+	}
+	tbl[0][1] = P;
+	for (size_t j = 2; j < tblSize; j++) {
+		G::add(tbl[0][j], tbl[0][j - 1], P);
+	}
+	for (int i = 1; i < splitN; i++) {
+		for (size_t j = 1; j < tblSize; j++) {
+			GLV::mulLambda(tbl[i][j], tbl[i - 1][j]);
+		}
+	}
+	for (int i = 0; i < splitN; i++) {
+		if (negTbl[i]) {
+			for (size_t j = 0; j < tblSize; j++) {
+				G::neg(tbl[i][j], tbl[i][j]);
+			}
+		}
+	}
+	mcl::FixedArray<int8_t, sizeof(F) * 8 / w + 1> vTbl[splitN];
+	size_t bitSizeTbl[splitN];
+	size_t maxBitSize = 0;
+	for (size_t i = 0; i < splitN; i++) {
+		size_t bitSize = gmp::getBitSize(u[i]);
+		bitSizeTbl[i] = bitSize;
+		if (bitSize > maxBitSize) {
+			maxBitSize = bitSize;
+		}
+	}
+	int loopN = (maxBitSize + w - 1) / w;
+	for (int i = 0; i < splitN; i++) {
+		fp::ArrayIterator<fp::Unit> itr(gmp::getUnit(u[i]), bitSizeTbl[i], w);
+		bool b = vTbl[i].resize(loopN);
+		assert(b);
+		(void)b;
+		for (int j = 0; j < loopN; j++) {
+			vTbl[i][loopN - 1 - j] = itr.getNext();
+		}
+	}
+	Q.clear();
+	for (int k = 0; k < loopN; k++) {
+		for (size_t i = 0; i < w; i++) {
+			G::dbl(Q, Q);
+		}
+		for (size_t i = 0; i < splitN; i++) {
+			uint8_t v = vTbl[i][k];
+			G::add(Q, Q, tbl[i][v]);
+		}
+	}
+}
+
 } // mcl::ec::local
 
 template<class E>
@@ -567,7 +638,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	static bool verifyOrder_;
 	static mpz_class order_;
 	static void (*mulArrayGLV)(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime);
-	static size_t (*mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn);
+	static size_t (*mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn, bool constTime);
 	/* default constructor is undefined value */
 	EcT() {}
 	EcT(const Fp& _x, const Fp& _y)
@@ -633,7 +704,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 			// don't clear order_ because it is used for isValidOrder()
 		}
 	}
-	static void setMulArrayGLV(void f(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime), size_t g(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn) = 0)
+	static void setMulArrayGLV(void f(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime), size_t g(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn, bool constTime) = 0)
 	{
 		mulArrayGLV = f;
 		mulVecNGLV = g;
@@ -1342,7 +1413,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 
 public:
 	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
+	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n, bool constTime = false)
 	{
 		/*
 			mulVecNGLV is a little slow for large n
@@ -1354,7 +1425,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 				yVec[i].getMpz(&b, myVec[i]);
 				assert(b); (void)b;
 			}
-			size_t done = mulVecNGLV(z, xVec, myVec, n);
+			size_t done = mulVecNGLV(z, xVec, myVec, n, constTime);
 			assert(done == n); (void)done;
 			return;
 		}
@@ -1424,7 +1495,7 @@ template<class Fp> int EcT<Fp>::ioMode_;
 template<class Fp> bool EcT<Fp>::verifyOrder_;
 template<class Fp> mpz_class EcT<Fp>::order_;
 template<class Fp> void (*EcT<Fp>::mulArrayGLV)(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime);
-template<class Fp> size_t (*EcT<Fp>::mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn);
+template<class Fp> size_t (*EcT<Fp>::mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn, bool constTime);
 template<class Fp> int EcT<Fp>::mode_;
 
 // r = the order of Ec
@@ -1471,11 +1542,11 @@ struct GLV1T {
 		a = x - (t * B[0][0] + b * B[1][0]);
 		b = - (t * B[0][1] + b * B[1][1]);
 	}
-	static void mul(Ec& Q, const Ec& P, const mpz_class& x, bool /*constTime*/ = false)
+	static void mul(Ec& Q, const Ec& P, const mpz_class& x, bool constTime = false)
 	{
-		mulVecNGLV(Q, &P, &x, 1);
+		mulVecNGLV(Q, &P, &x, 1, constTime);
 	}
-	static inline size_t mulVecNGLV(Ec& z, const Ec *xVec, const mpz_class *yVec, size_t n)
+	static inline size_t mulVecNGLV(Ec& z, const Ec *xVec, const mpz_class *yVec, size_t n, bool /*constTime*/ = false)
 	{
 		const size_t N = mcl::fp::maxMulVecNGLV;
 		if (n > N) n = N;
diff --git a/include/mcl/operator.hpp b/include/mcl/operator.hpp
index 84df77f7..1658c2df 100644
--- a/include/mcl/operator.hpp
+++ b/include/mcl/operator.hpp
@@ -84,14 +84,14 @@ struct Operator : public E {
 	{
 		powArray(z, x, gmp::getUnit(y), gmp::getUnitSize(y), y < 0, true);
 	}
-	static void setPowArrayGLV(void f(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime), size_t g(T& z, const T *xVec, const mpz_class *yVec, size_t n) = 0)
+	static void setPowArrayGLV(void f(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime), size_t g(T& z, const T *xVec, const mpz_class *yVec, size_t n, bool constTime) = 0)
 	{
 		powArrayGLV = f;
 		powVecNGLV = g;
 	}
 	static const size_t powVecMaxN = 16;
 	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static void powVec(T& z, const T* xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
+	static void powVec(T& z, const T* xVec, const FpT<tag, maxBitSize> *yVec, size_t n, bool constTime = false)
 	{
 		assert(powVecNGLV);
 		T r;
@@ -106,7 +106,7 @@ struct Operator : public E {
 				yVec[i].getMpz(&b, myVec[i]);
 				assert(b); (void)b;
 			}
-			size_t done = powVecNGLV(t, xVec, myVec, tn);
+			size_t done = powVecNGLV(t, xVec, myVec, tn, constTime);
 			r *= t;
 			xVec += done;
 			yVec += done;
@@ -116,7 +116,7 @@ struct Operator : public E {
 	}
 private:
 	static void (*powArrayGLV)(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime);
-	static size_t (*powVecNGLV)(T& z, const T* xVec, const mpz_class *yVec, size_t n);
+	static size_t (*powVecNGLV)(T& z, const T* xVec, const mpz_class *yVec, size_t n, bool constTime);
 	static void powArray(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime)
 	{
 		if (powArrayGLV && (constTime || yn > 1)) {
@@ -145,7 +145,7 @@ template<class T, class E>
 void (*Operator<T, E>::powArrayGLV)(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime);
 
 template<class T, class E>
-size_t (*Operator<T, E>::powVecNGLV)(T& z, const T* xVec, const mpz_class *yVec, size_t n);
+size_t (*Operator<T, E>::powVecNGLV)(T& z, const T* xVec, const mpz_class *yVec, size_t n, bool constTime);
 
 /*
 	T must have save and load
diff --git a/test/common_test.hpp b/test/common_test.hpp
index f6d1dcd5..54d3beda 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -89,12 +89,27 @@ inline void testPowVec(const G& e)
 	}
 }
 
+template<class G>
+void testMulCT(const G& P)
+{
+	cybozu::XorShift rg;
+	G Q1, Q2;
+	for (int i = 0; i < 100; i++) {
+		Fr x;
+		x.setByCSPRNG(rg);
+		G::mul(Q1, P, x);
+		G::mulCT(Q2, P, x);
+		CYBOZU_TEST_EQUAL(Q1, Q2);
+	}
+}
+
 void testCommon(const G1& P, const G2& Q)
 {
 	puts("G1");
 	testMulVec(P);
 	puts("G2");
 	testMulVec(Q);
+	testMulCT(Q);
 	GT e;
 	mcl::bn::pairing(e, P, Q);
 	puts("GT");

From 67672f4050347afc834f966633daa3134e60c926 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Jun 2020 16:07:05 +0900
Subject: [PATCH 214/553] refactor mul

---
 include/mcl/bn.hpp | 73 +++++++++++++++++++++++++---------------------
 include/mcl/ec.hpp | 26 ++++++++++++-----
 test/bn_test.cpp   |  2 +-
 test/glv_test.cpp  | 19 +++++++-----
 4 files changed, 71 insertions(+), 49 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 2927ec25..21540511 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -701,22 +701,22 @@ struct GLV1 : mcl::GLV1T<G1, Fr> {
 	GLV method for G2 and GT on BN/BLS12
 */
 template<class _Fr>
-struct GLV2 {
+struct GLV2T {
+	typedef GLV2T<_Fr> GLV2;
 	typedef _Fr Fr;
 	typedef mcl::FixedArray<int8_t, sizeof(Fr) * 8 / 4 + 4> NafArray;
-	size_t rBitSize;
-	mpz_class B[4][4];
-	mpz_class v[4];
-	mpz_class z;
-	mpz_class abs_z;
-	bool isBLS12;
-	GLV2() : rBitSize(0), isBLS12(false) {}
-	void init(const mpz_class& z, bool isBLS12 = false)
+	static size_t rBitSize;
+	static mpz_class B[4][4];
+	static mpz_class v[4];
+	static mpz_class z;
+	static mpz_class abs_z;
+	static bool isBLS12;
+	static void init(const mpz_class& z, bool isBLS12 = false)
 	{
 		const mpz_class& r = Fr::getOp().mp;
-		this->z = z;
-		this->abs_z = z < 0 ? -z : z;
-		this->isBLS12 = isBLS12;
+		GLV2::z = z;
+		GLV2::abs_z = z < 0 ? -z : z;
+		GLV2::isBLS12 = isBLS12;
 		rBitSize = Fr::getOp().bitSize;
 		rBitSize = (rBitSize + mcl::fp::UnitBitSize - 1) & ~(mcl::fp::UnitBitSize - 1);// a little better size
 		mpz_class z2p1 = z * 2 + 1;
@@ -767,7 +767,7 @@ struct GLV2 {
 	/*
 		u[] = [x, 0, 0, 0] - v[] * x * B
 	*/
-	void split(mpz_class u[4], const mpz_class& x) const
+	static void split(mpz_class u[4], const mpz_class& x)
 	{
 		if (isBLS12) {
 			/*
@@ -802,7 +802,7 @@ struct GLV2 {
 		}
 	}
 	template<class T>
-	void mul(T& Q, const T& P, const mpz_class& x, bool constTime = false) const
+	static void mul(T& Q, const T& P, const mpz_class& x, bool constTime = false)
 	{
 		mulVecNGLV(Q, &P, &x, 1, constTime);
 	}
@@ -812,10 +812,10 @@ struct GLV2 {
 		Frobenius(Q, P);
 	}
 	template<class T>
-	size_t mulVecNGLV(T& z, const T *xVec, const mpz_class *yVec, size_t n, bool constTime) const
+	static size_t mulVecNGLV(T& z, const T *xVec, const mpz_class *yVec, size_t n, bool constTime)
 	{
 		if (n == 1 && constTime) {
-			ec::local::mul1CT<GLV2<_Fr>, T, Fr, 4, 4>(*this, z, *xVec, *yVec);
+			ec::local::mul1CT<GLV2, T, Fr, 4, 4>(z, *xVec, *yVec);
 			return 1;
 		}
 		const mpz_class& r = Fr::getOp().mp;
@@ -847,14 +847,14 @@ struct GLV2 {
 			T P2;
 			T::dbl(P2, xVec[i]);
 			tbl[i][0][0] = xVec[i];
-			Frobenius(tbl[i][1][0], tbl[i][0][0]);
-			Frobenius(tbl[i][2][0], tbl[i][1][0]);
-			Frobenius(tbl[i][3][0], tbl[i][2][0]);
+			for (int k = 1; k < w; k++) {
+				mulLambda(tbl[i][k][0], tbl[i][k - 1][0]);
+			}
 			for (size_t j = 1; j < tblSize; j++) {
 				T::add(tbl[i][0][j], tbl[i][0][j - 1], P2);
-				Frobenius(tbl[i][1][j], tbl[i][0][j]);
-				Frobenius(tbl[i][2][j], tbl[i][1][j]);
-				Frobenius(tbl[i][3][j], tbl[i][2][j]);
+				for (int k = 1; k < w; k++) {
+					mulLambda(tbl[i][k][j], tbl[i][k - 1][j]);
+				}
 			}
 		}
 		z.clear();
@@ -862,16 +862,15 @@ struct GLV2 {
 			const size_t bit = maxBit - 1 - i;
 			T::dbl(z, z);
 			for (size_t j = 0; j < n; j++) {
-				mcl::local::addTbl(z, tbl[j][0], naf[j][0], bit);
-				mcl::local::addTbl(z, tbl[j][1], naf[j][1], bit);
-				mcl::local::addTbl(z, tbl[j][2], naf[j][2], bit);
-				mcl::local::addTbl(z, tbl[j][3], naf[j][3], bit);
+				for (int k = 0; k < w; k++) {
+					mcl::local::addTbl(z, tbl[j][k], naf[j][k], bit);
+				}
 			}
 		}
 		return n;
 
 	}
-	void pow(Fp12& z, const Fp12& x, const mpz_class& y, bool constTime = false) const
+	static void pow(Fp12& z, const Fp12& x, const mpz_class& y, bool constTime = false)
 	{
 		typedef GroupMtoA<Fp12> AG; // as additive group
 		AG& _z = static_cast<AG&>(z);
@@ -880,6 +879,13 @@ struct GLV2 {
 	}
 };
 
+template<class Fr> size_t GLV2T<Fr>::rBitSize = 0;
+template<class Fr> mpz_class GLV2T<Fr>::B[4][4];
+template<class Fr> mpz_class GLV2T<Fr>::v[4];
+template<class Fr> mpz_class GLV2T<Fr>::z;
+template<class Fr> mpz_class GLV2T<Fr>::abs_z;
+template<class Fr> bool GLV2T<Fr>::isBLS12 = false;
+
 struct Param {
 	CurveParam cp;
 	mpz_class z;
@@ -889,7 +895,6 @@ struct Param {
 	mpz_class p;
 	mpz_class r;
 	local::MapTo mapTo;
-	local::GLV2<Fr> glv2;
 	// for G2 Frobenius
 	Fp2 g2;
 	Fp2 g3;
@@ -1001,7 +1006,7 @@ struct Param {
 			mapTo.init(2 * p - r, z, cp.curveType);
 		}
 		GLV1::initForBN(z, isBLS12, cp.curveType);
-		glv2.init(z, isBLS12);
+		GLV2T<Fr>::init(z, isBLS12);
 		basePoint.clear();
 		*pb = true;
 	}
@@ -1049,6 +1054,8 @@ static local::Param& nonConstParam = local::StaticVar<>::param;
 
 namespace local {
 
+typedef GLV2T<Fr> GLV2;
+
 inline void mulArrayGLV2(G2& z, const G2& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 {
 	mpz_class s;
@@ -1056,7 +1063,7 @@ inline void mulArrayGLV2(G2& z, const G2& x, const mcl::fp::Unit *y, size_t yn,
 	mcl::gmp::setArray(&b, s, y, yn);
 	assert(b);
 	if (isNegative) s = -s;
-	BN::param.glv2.mul(z, x, s, constTime);
+	GLV2::mul(z, x, s, constTime);
 }
 inline void powArrayGLV2(Fp12& z, const Fp12& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 {
@@ -1065,12 +1072,12 @@ inline void powArrayGLV2(Fp12& z, const Fp12& x, const mcl::fp::Unit *y, size_t
 	mcl::gmp::setArray(&b, s, y, yn);
 	assert(b);
 	if (isNegative) s = -s;
-	BN::param.glv2.pow(z, x, s, constTime);
+	GLV2::pow(z, x, s, constTime);
 }
 
 inline size_t mulVecNGLV2(G2& z, const G2 *xVec, const mpz_class *yVec, size_t n, bool constTime)
 {
-	return BN::param.glv2.mulVecNGLV(z, xVec, yVec, n, constTime);
+	return GLV2::mulVecNGLV(z, xVec, yVec, n, constTime);
 }
 
 inline size_t powVecNGLV2(Fp12& z, const Fp12 *xVec, const mpz_class *yVec, size_t n, bool constTime)
@@ -1078,7 +1085,7 @@ inline size_t powVecNGLV2(Fp12& z, const Fp12 *xVec, const mpz_class *yVec, size
 	typedef GroupMtoA<Fp12> AG; // as additive group
 	AG& _z = static_cast<AG&>(z);
 	const AG *_xVec = static_cast<const AG*>(xVec);
-	return BN::param.glv2.mulVecNGLV(_z, _xVec, yVec, n, constTime);
+	return GLV2::mulVecNGLV(_z, _xVec, yVec, n, constTime);
 }
 
 /*
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 36789938..33e02dfc 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -72,19 +72,25 @@ bool get_a_flag(const mcl::Fp2T<F>& x)
 	return get_a_flag(x.b); // x = a + bi
 }
 
+/*
+	Q = x P
+	splitN = 2(G1) or 4(G2)
+	w : window size
+*/
 template<class GLV, class G, class F, int splitN, size_t w>
-void mul1CT(const GLV& glv, G& Q, const G& P, const mpz_class& x)
+void mul1CT(G& Q, const G& P, const mpz_class& x)
 {
 	const mpz_class& r = F::getOp().mp;
 	const size_t tblSize = 1 << w;
 	G tbl[splitN][tblSize];
 	bool negTbl[splitN];
 	mpz_class u[splitN];
-	mpz_class y = x % r;
+	mpz_class y;
+	F::getOp().modp.modp(y, x);
 	if (y < 0) {
 		y += r;
 	}
-	glv.split(u, y);
+	GLV::split(u, y);
 	for (int i = 0; i < splitN; i++) {
 		if (u[i] < 0) {
 			gmp::neg(u[i], u[i]);
@@ -1532,10 +1538,12 @@ struct GLV1T {
 		Q.z = P.z;
 	}
 	/*
-		x = a + b * lambda mod r
+		x = u[0] + u[1] * lambda mod r
 	*/
-	static void split(mpz_class& a, mpz_class& b, const mpz_class& x)
+	static void split(mpz_class u[2], const mpz_class& x)
 	{
+		mpz_class& a = u[0];
+		mpz_class& b = u[1];
 		mpz_class t;
 		t = (x * v0) >> rBitSize;
 		b = (x * v1) >> rBitSize;
@@ -1546,8 +1554,12 @@ struct GLV1T {
 	{
 		mulVecNGLV(Q, &P, &x, 1, constTime);
 	}
-	static inline size_t mulVecNGLV(Ec& z, const Ec *xVec, const mpz_class *yVec, size_t n, bool /*constTime*/ = false)
+	static inline size_t mulVecNGLV(Ec& z, const Ec *xVec, const mpz_class *yVec, size_t n, bool constTime)
 	{
+		if (n == 1 && constTime) {
+			ec::local::mul1CT<GLV1T<Ec, _Fr>, Ec, _Fr, 2, 4>(z, *xVec, *yVec);
+			return 1;
+		}
 		const size_t N = mcl::fp::maxMulVecNGLV;
 		if (n > N) n = N;
 		const int w = 5;
@@ -1565,7 +1577,7 @@ struct GLV1T {
 			if (y < 0) {
 				y += r;
 			}
-			split(u[0], u[1], y);
+			split(u, y);
 
 			for (int j = 0; j < 2; j++) {
 				gmp::getNAFwidth(&b, naf[i][j], u[j], w);
diff --git a/test/bn_test.cpp b/test/bn_test.cpp
index e6139b36..1a503c5d 100644
--- a/test/bn_test.cpp
+++ b/test/bn_test.cpp
@@ -211,7 +211,7 @@ void testFp12pow(const G1& P, const G2& Q)
 		x.setRand(rg);
 		mpz_class xm = x.getMpz();
 		Fp12::pow(e1, e, xm);
-		BN::param.glv2.pow(e2, e, xm);
+		local::GLV2::pow(e2, e, xm);
 		CYBOZU_TEST_EQUAL(e1, e2);
 	}
 }
diff --git a/test/glv_test.cpp b/test/glv_test.cpp
index 78bb8218..59bdcdd2 100644
--- a/test/glv_test.cpp
+++ b/test/glv_test.cpp
@@ -83,12 +83,15 @@ void compareLength(const GLV2& lhs)
 	int lt = 0;
 	int eq = 0;
 	int gt = 0;
-	mpz_class R0, R1, L0, L1, x;
+	mpz_class R[2];
+	mpz_class L0, L1, x;
+	mpz_class& R0 = R[0];
+	mpz_class& R1 = R[1];
 	Fr r;
 	for (int i = 1; i < 1000; i++) {
 		r.setRand(rg);
 		x = r.getMpz();
-		mcl::bn::local::GLV1::split(R0, R1, x);
+		mcl::bn::local::GLV1::split(R,x);
 		lhs.split(L0, L1, x);
 
 		size_t R0n = mcl::gmp::getBitSize(R0);
@@ -162,33 +165,33 @@ void testGLV1()
 */
 void testGLV2()
 {
+	typedef local::GLV2 GLV2;
 	G2 Q0, Q1, Q2;
 	mpz_class z = BN::param.z;
 	mpz_class r = BN::param.r;
-	mcl::bn::local::GLV2<Fr> glv2;
-	glv2.init(z, BN::param.isBLS12);
+	GLV2::init(z, BN::param.isBLS12);
 	mpz_class n;
 	cybozu::XorShift rg;
 	mapToG2(Q0, 1);
 	for (int i = -10; i < 10; i++) {
 		n = i;
 		G2::mulGeneric(Q1, Q0, n);
-		glv2.mul(Q2, Q0, n);
+		GLV2::mul(Q2, Q0, n);
 		CYBOZU_TEST_EQUAL(Q1, Q2);
 	}
 	for (int i = 1; i < 100; i++) {
-		mcl::gmp::getRand(n, glv2.rBitSize, rg);
+		mcl::gmp::getRand(n, GLV2::rBitSize, rg);
 		n %= r;
 		n -= r/2;
 		mapToG2(Q0, i);
 		G2::mulGeneric(Q1, Q0, n);
-		glv2.mul(Q2, Q0, n);
+		GLV2::mul(Q2, Q0, n);
 		CYBOZU_TEST_EQUAL(Q1, Q2);
 	}
 	Fr s;
 	mapToG2(Q0, 123);
 	CYBOZU_BENCH_C("G2::mul", 1000, Q2 = Q0; s.setRand(rg); G2::mulGeneric, Q2, Q1, s.getMpz());
-	CYBOZU_BENCH_C("G2::glv", 1000, Q1 = Q0; s.setRand(rg); glv2.mul, Q2, Q1, s.getMpz());
+	CYBOZU_BENCH_C("G2::glv", 1000, Q1 = Q0; s.setRand(rg); GLV2::mul, Q2, Q1, s.getMpz());
 }
 
 void testGT()

From 4527c4aa08ad7ff714400bb06e8ebd10093b85a6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Jun 2020 16:39:04 +0900
Subject: [PATCH 215/553] unify mulVecNGLV

---
 include/mcl/bn.hpp |  53 +----------------------
 include/mcl/ec.hpp | 105 ++++++++++++++++++++++++++-------------------
 2 files changed, 62 insertions(+), 96 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 21540511..08467ad0 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -704,7 +704,6 @@ template<class _Fr>
 struct GLV2T {
 	typedef GLV2T<_Fr> GLV2;
 	typedef _Fr Fr;
-	typedef mcl::FixedArray<int8_t, sizeof(Fr) * 8 / 4 + 4> NafArray;
 	static size_t rBitSize;
 	static mpz_class B[4][4];
 	static mpz_class v[4];
@@ -818,57 +817,7 @@ struct GLV2T {
 			ec::local::mul1CT<GLV2, T, Fr, 4, 4>(z, *xVec, *yVec);
 			return 1;
 		}
-		const mpz_class& r = Fr::getOp().mp;
-		const size_t N = mcl::fp::maxMulVecNGLV;
-		if (n > N) n = N;
-		const int w = 4;
-		const size_t tblSize = 1 << (w - 2);
-		const int splitN = 4;
-		NafArray naf[N][splitN];
-		T tbl[N][splitN][tblSize];
-		bool b;
-		mpz_class u[splitN], y;
-		size_t maxBit = 0;
-
-		for (size_t i = 0; i < n; i++) {
-			y = yVec[i];
-			y %= r;
-			if (y < 0) {
-				y += r;
-			}
-			split(u, y);
-
-			for (int j = 0; j < splitN; j++) {
-				gmp::getNAFwidth(&b, naf[i][j], u[j], w);
-				assert(b); (void)b;
-				if (naf[i][j].size() > maxBit) maxBit = naf[i][j].size();
-			}
-
-			T P2;
-			T::dbl(P2, xVec[i]);
-			tbl[i][0][0] = xVec[i];
-			for (int k = 1; k < w; k++) {
-				mulLambda(tbl[i][k][0], tbl[i][k - 1][0]);
-			}
-			for (size_t j = 1; j < tblSize; j++) {
-				T::add(tbl[i][0][j], tbl[i][0][j - 1], P2);
-				for (int k = 1; k < w; k++) {
-					mulLambda(tbl[i][k][j], tbl[i][k - 1][j]);
-				}
-			}
-		}
-		z.clear();
-		for (size_t i = 0; i < maxBit; i++) {
-			const size_t bit = maxBit - 1 - i;
-			T::dbl(z, z);
-			for (size_t j = 0; j < n; j++) {
-				for (int k = 0; k < w; k++) {
-					mcl::local::addTbl(z, tbl[j][k], naf[j][k], bit);
-				}
-			}
-		}
-		return n;
-
+		return ec::local::mulVecNGLVT<GLV2, T, Fr, 4, 5, fp::maxMulVecNGLV>(z, xVec, yVec, n);
 	}
 	static void pow(Fp12& z, const Fp12& x, const mpz_class& y, bool constTime = false)
 	{
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 33e02dfc..ce23f5be 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -148,6 +148,64 @@ void mul1CT(G& Q, const G& P, const mpz_class& x)
 	}
 }
 
+/*
+	z += xVec[i] * yVec[i] for i = 0, ..., min(N, n)
+	splitN = 2(G1) or 4(G2)
+	w : window size
+*/
+template<class GLV, class G, class F, int splitN, int w, size_t N>
+static size_t mulVecNGLVT(G& z, const G *xVec, const mpz_class *yVec, size_t n)
+{
+	const mpz_class& r = F::getOp().mp;
+	const size_t tblSize = 1 << (w - 2);
+	typedef mcl::FixedArray<int8_t, sizeof(F) * 8 / splitN + splitN> NafArray;
+	NafArray naf[N][splitN];
+	G tbl[N][splitN][tblSize];
+	bool b;
+	mpz_class u[splitN], y;
+	size_t maxBit = 0;
+
+	if (n > N) n = N;
+	for (size_t i = 0; i < n; i++) {
+		y = yVec[i];
+		y %= r;
+		if (y < 0) {
+			y += r;
+		}
+		GLV::split(u, y);
+
+		for (int j = 0; j < splitN; j++) {
+			gmp::getNAFwidth(&b, naf[i][j], u[j], w);
+			assert(b); (void)b;
+			if (naf[i][j].size() > maxBit) maxBit = naf[i][j].size();
+		}
+
+		G P2;
+		G::dbl(P2, xVec[i]);
+		tbl[i][0][0] = xVec[i];
+		for (int k = 1; k < splitN; k++) {
+			GLV::mulLambda(tbl[i][k][0], tbl[i][k - 1][0]);
+		}
+		for (size_t j = 1; j < tblSize; j++) {
+			G::add(tbl[i][0][j], tbl[i][0][j - 1], P2);
+			for (int k = 1; k < splitN; k++) {
+				GLV::mulLambda(tbl[i][k][j], tbl[i][k - 1][j]);
+			}
+		}
+	}
+	z.clear();
+	for (size_t i = 0; i < maxBit; i++) {
+		const size_t bit = maxBit - 1 - i;
+		G::dbl(z, z);
+		for (size_t j = 0; j < n; j++) {
+			for (int k = 0; k < splitN; k++) {
+				mcl::local::addTbl(z, tbl[j][k], naf[j][k], bit);
+			}
+		}
+	}
+	return n;
+}
+
 } // mcl::ec::local
 
 template<class E>
@@ -1507,6 +1565,7 @@ template<class Fp> int EcT<Fp>::mode_;
 // r = the order of Ec
 template<class Ec, class _Fr>
 struct GLV1T {
+	typedef GLV1T<Ec, _Fr> GLV1;
 	typedef typename Ec::Fp Fp;
 	typedef _Fr Fr;
 	static Fp rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
@@ -1557,52 +1616,10 @@ struct GLV1T {
 	static inline size_t mulVecNGLV(Ec& z, const Ec *xVec, const mpz_class *yVec, size_t n, bool constTime)
 	{
 		if (n == 1 && constTime) {
-			ec::local::mul1CT<GLV1T<Ec, _Fr>, Ec, _Fr, 2, 4>(z, *xVec, *yVec);
+			ec::local::mul1CT<GLV1, Ec, _Fr, 2, 4>(z, *xVec, *yVec);
 			return 1;
 		}
-		const size_t N = mcl::fp::maxMulVecNGLV;
-		if (n > N) n = N;
-		const int w = 5;
-		const mpz_class& r = Fr::getOp().mp;
-		const size_t tblSize = 1 << (w - 2);
-		typedef mcl::FixedArray<int8_t, sizeof(Fr) * 8 / 2 + 2> NafArray;
-		NafArray naf[N][2];
-		Ec tbl[N][2][tblSize];
-		bool b;
-		mpz_class u[2], y;
-		size_t maxBit = 0;
-		for (size_t i = 0; i < n; i++) {
-			y = yVec[i];
-			y %= r;
-			if (y < 0) {
-				y += r;
-			}
-			split(u, y);
-
-			for (int j = 0; j < 2; j++) {
-				gmp::getNAFwidth(&b, naf[i][j], u[j], w);
-				assert(b); (void)b;
-				if (naf[i][j].size() > maxBit) maxBit = naf[i][j].size();
-			}
-
-			Ec P2;
-			Ec::dbl(P2, xVec[i]);
-			tbl[i][0][0] = xVec[i];
-			mulLambda(tbl[i][1][0], tbl[i][0][0]);
-			for (size_t j = 1; j < tblSize; j++) {
-				Ec::add(tbl[i][0][j], tbl[i][0][j - 1], P2);
-				mulLambda(tbl[i][1][j], tbl[i][0][j]);
-			}
-		}
-		z.clear();
-		for (size_t i = 0; i < maxBit; i++) {
-			Ec::dbl(z, z);
-			for (size_t j = 0; j < n; j++) {
-				local::addTbl(z, tbl[j][0], naf[j][0], maxBit - 1 - i);
-				local::addTbl(z, tbl[j][1], naf[j][1], maxBit - 1 - i);
-			}
-		}
-		return n;
+		return ec::local::mulVecNGLVT<GLV1, Ec, _Fr, 2, 5, mcl::fp::maxMulVecNGLV>(z, xVec, yVec, n);
 	}
 	static void mulArrayGLV(Ec& z, const Ec& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
 	{

From fd8352e1a594cceb458555165e1abc78d2a04ba0 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Jun 2020 20:00:39 +0900
Subject: [PATCH 216/553] v1.10

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index db1dd7c3..958d438f 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x109; /* 0xABC = A.BC */
+static const int version = 0x110; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From b27ecb2fc491cc8b5b8e480d520fa973fd323f1e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 5 Jun 2020 17:32:45 +0900
Subject: [PATCH 217/553] add test of hash-to-curve-08

---
 test/mapto_wb19_test.cpp | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index d5393829..b289ae48 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -894,16 +894,16 @@ void testHashToFp2v7(const T& mapto)
 		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
 	}
 	{
-		const char *dst = "BLS12381G2_XMD:SHA-256_SSWU_RO_TESTGEN";
-		size_t dstSize = strlen(dst);
 		const struct {
 			const char *msg;
+			const char *dst;
 			Fp2Str x;
 			Fp2Str y;
 		} tbl[] = {
-			// https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-07.html#name-bls12381g2_xmdsha-256_sswu_
+			// https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-hash-to-curve-07#appendix-G.10.1
 			{
 				"", // msg
+				"BLS12381G2_XMD:SHA-256_SSWU_RO_TESTGEN",
 				{ // P.x
 					"0x0a650bd36ae7455cb3fe5d8bb1310594551456f5c6593aec9ee0c03d2f6cb693bd2c5e99d4e23cbaec767609314f51d3",
 					"0x0fbdae26f9f9586a46d4b0b70390d09064ef2afe5c99348438a3c7d9756471e015cb534204c1b6824617a85024c772dc",
@@ -915,6 +915,7 @@ void testHashToFp2v7(const T& mapto)
 			},
 			{
 				"abc",
+				"BLS12381G2_XMD:SHA-256_SSWU_RO_TESTGEN",
 				{
 					"0x1953ce6d4267939c7360756d9cca8eb34aac4633ef35369a7dc249445069888e7d1b3f9d2e75fbd468fbcbba7110ea02",
 					"0x03578447618463deb106b60e609c6f7cc446dc6035f84a72801ba17c94cd800583b493b948eff0033f09086fdd7f6175",
@@ -926,6 +927,7 @@ void testHashToFp2v7(const T& mapto)
 			},
 			{
 				"abcdef0123456789",
+				"BLS12381G2_XMD:SHA-256_SSWU_RO_TESTGEN",
 				{
 					"0x17b461fc3b96a30c2408958cbfa5f5927b6063a8ad199d5ebf2d7cdeffa9c20c85487204804fab53f950b2f87db365aa",
 					"0x195fad48982e186ce3c5c82133aefc9b26d55979b6f530992a8849d4263ec5d57f7a181553c8799bcc83da44847bdc8d",
@@ -937,6 +939,7 @@ void testHashToFp2v7(const T& mapto)
 			},
 			{
 				"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+				"BLS12381G2_XMD:SHA-256_SSWU_RO_TESTGEN",
 				{
 					"0x0a162306f3b0f2bb326f0c4fb0e1fea020019c3af796dcd1d7264f50ddae94cacf3cade74603834d44b9ab3d5d0a6c98",
 					"0x123b6bd9feeba26dd4ad00f8bfda2718c9700dc093ea5287d7711844644eb981848316d3f3f57d5d3a652c6cdc816aca",
@@ -946,10 +949,25 @@ void testHashToFp2v7(const T& mapto)
 					"0x05483f3b96d9252dd4fc0868344dfaf3c9d145e3387db23fa8e449304fab6a7b6ec9c15f05c0a1ea66ff0efcc03e001a",
 				},
 			},
+			// https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-08.html#name-bls12381g2_xmdsha-256_sswu_
+			{
+				"", // msg
+				"QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
+				{ // P.x
+					"0x0141ebfbdca40eb85b87142e130ab689c673cf60f1a3e98d69335266f30d9b8d4ac44c1038e9dcdd5393faf5c41fb78a",
+					"0x05cb8437535e20ecffaef7752baddf98034139c38452458baeefab379ba13dff5bf5dd71b72418717047f5b0f37da03d",
+				},
+				{ // P.y
+					"0x0503921d7f6a12805e72940b963c0cf3471c7b2a524950ca195d11062ee75ec076daf2d4bc358c4b190c0c98064fdd92",
+					"0x12424ac32561493f3fe3c260708a12b7c620e7be00099a974e259ddc7d1f6395c3c811cdd19f1e8dbf3e9ecfdcbab8d6",
+				}
+			},
 		};
 		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 			const char *msg = tbl[i].msg;
 			size_t msgSize = strlen(msg);
+			const char *dst = tbl[i].dst;
+			size_t dstSize = strlen(dst);
 			G2 P1, P2;
 			set(P1.x, tbl[i].x);
 			set(P1.y, tbl[i].y);

From 2095a80e17e4e90fea3361d9190f0cebd5982aa2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Jun 2020 14:07:58 +0900
Subject: [PATCH 218/553] aggs supports serialize

---
 include/mcl/aggregate_sig.hpp | 60 +++++++++++++++++++++++++++++++----
 test/aggregate_sig_test.cpp   | 30 ++++++++++++++++++
 2 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/include/mcl/aggregate_sig.hpp b/include/mcl/aggregate_sig.hpp
index f3140570..9ee824c8 100644
--- a/include/mcl/aggregate_sig.hpp
+++ b/include/mcl/aggregate_sig.hpp
@@ -62,16 +62,32 @@ struct AGGST {
 		friend class SecretKey;
 		friend class PublicKey;
 	public:
+		template<class InputStream>
+		void load(bool *pb, InputStream& is, int ioMode = IoSerialize)
+		{
+			S_.load(pb, is, ioMode);
+		}
+		template<class OutputStream>
+		void save(bool *pb, OutputStream& os, int ioMode = IoSerialize) const
+		{
+			S_.save(pb, os, ioMode);
+		}
+#ifndef CYBOZU_DONT_USE_EXCEPTION
 		template<class InputStream>
 		void load(InputStream& is, int ioMode = IoSerialize)
 		{
-			S_.load(is, ioMode);
+			bool b;
+			load(&b, is, ioMode);
+			if (!b) throw cybozu::Exception("Signature:load");
 		}
 		template<class OutputStream>
 		void save(OutputStream& os, int ioMode = IoSerialize) const
 		{
-			S_.save(os, ioMode);
+			bool b;
+			save(&b, os, ioMode);
+			if (!b) throw cybozu::Exception("Signature:save");
 		}
+#endif
 		friend std::istream& operator>>(std::istream& is, Signature& self)
 		{
 			self.load(is, fp::detectIoMode(G1::getIoMode(), is));
@@ -155,16 +171,32 @@ struct AGGST {
 		friend class SecretKey;
 		friend class Signature;
 	public:
+		template<class InputStream>
+		void load(bool *pb, InputStream& is, int ioMode = IoSerialize)
+		{
+			xQ_.load(pb, is, ioMode);
+		}
+		template<class OutputStream>
+		void save(bool *pb, OutputStream& os, int ioMode = IoSerialize) const
+		{
+			xQ_.save(pb, os, ioMode);
+		}
+#ifndef CYBOZU_DONT_USE_EXCEPTION
 		template<class InputStream>
 		void load(InputStream& is, int ioMode = IoSerialize)
 		{
-			xQ_.load(is, ioMode);
+			bool b;
+			load(&b, is, ioMode);
+			if (!b) throw cybozu::Exception("PublicKey:load");
 		}
 		template<class OutputStream>
 		void save(OutputStream& os, int ioMode = IoSerialize) const
 		{
-			xQ_.save(os, ioMode);
+			bool b;
+			save(&b, os, ioMode);
+			if (!b) throw cybozu::Exception("PublicKey:save");
 		}
+#endif
 		friend std::istream& operator>>(std::istream& is, PublicKey& self)
 		{
 			self.load(is, fp::detectIoMode(G2::getIoMode(), is));
@@ -208,16 +240,32 @@ struct AGGST {
 		friend class PublicKey;
 		friend class Signature;
 	public:
+		template<class InputStream>
+		void load(bool *pb, InputStream& is, int ioMode = IoSerialize)
+		{
+			x_.load(pb, is, ioMode);
+		}
+		template<class OutputStream>
+		void save(bool *pb, OutputStream& os, int ioMode = IoSerialize) const
+		{
+			x_.save(pb, os, ioMode);
+		}
+#ifndef CYBOZU_DONT_USE_EXCEPTION
 		template<class InputStream>
 		void load(InputStream& is, int ioMode = IoSerialize)
 		{
-			x_.load(is, ioMode);
+			bool b;
+			load(&b, is, ioMode);
+			if (!b) throw cybozu::Exception("SecretKey:load");
 		}
 		template<class OutputStream>
 		void save(OutputStream& os, int ioMode = IoSerialize) const
 		{
-			x_.save(os, ioMode);
+			bool b;
+			save(&b, os, ioMode);
+			if (!b) throw cybozu::Exception("SecretKey:save");
 		}
+#endif
 		friend std::istream& operator>>(std::istream& is, SecretKey& self)
 		{
 			self.load(is, fp::detectIoMode(Fr::getIoMode(), is));
diff --git a/test/aggregate_sig_test.cpp b/test/aggregate_sig_test.cpp
index c3a0e758..33b86092 100644
--- a/test/aggregate_sig_test.cpp
+++ b/test/aggregate_sig_test.cpp
@@ -21,6 +21,36 @@ CYBOZU_TEST_AUTO(init)
 	CYBOZU_TEST_ASSERT(pub.verify(sig, m));
 }
 
+template<class T>
+void serializeTest(const T& x)
+{
+	std::stringstream ss;
+	ss << x;
+	T y;
+	ss >> y;
+	CYBOZU_TEST_EQUAL(x, y);
+	char buf[1024];
+	size_t n;
+	n = x.serialize(buf, sizeof(buf));
+	CYBOZU_TEST_ASSERT(n > 0);
+	T z;
+	CYBOZU_TEST_EQUAL(z.deserialize(buf, n), n);
+	CYBOZU_TEST_EQUAL(x, z);
+}
+
+CYBOZU_TEST_AUTO(serialize)
+{
+	SecretKey sec;
+	sec.init();
+	PublicKey pub;
+	sec.getPublicKey(pub);
+	Signature sig;
+	sec.sign(sig, "abc");
+	serializeTest(sec);
+	serializeTest(pub);
+	serializeTest(sig);
+}
+
 void aggregateTest(const std::vector<std::string>& msgVec)
 {
 	const size_t n = msgVec.size();

From 42b0fb05adc42988962212c8feadd074139ae0f7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Jun 2020 14:13:51 +0900
Subject: [PATCH 219/553] remove vc warnings

---
 include/mcl/ec.hpp         | 10 +++++-----
 include/mcl/mapto_wb19.hpp |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index ce23f5be..d7d1b8e0 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -116,7 +116,7 @@ void mul1CT(G& Q, const G& P, const mpz_class& x)
 			}
 		}
 	}
-	mcl::FixedArray<int8_t, sizeof(F) * 8 / w + 1> vTbl[splitN];
+	mcl::FixedArray<uint8_t, sizeof(F) * 8 / w + 1> vTbl[splitN];
 	size_t bitSizeTbl[splitN];
 	size_t maxBitSize = 0;
 	for (size_t i = 0; i < splitN; i++) {
@@ -126,18 +126,18 @@ void mul1CT(G& Q, const G& P, const mpz_class& x)
 			maxBitSize = bitSize;
 		}
 	}
-	int loopN = (maxBitSize + w - 1) / w;
+	size_t loopN = (maxBitSize + w - 1) / w;
 	for (int i = 0; i < splitN; i++) {
 		fp::ArrayIterator<fp::Unit> itr(gmp::getUnit(u[i]), bitSizeTbl[i], w);
 		bool b = vTbl[i].resize(loopN);
 		assert(b);
 		(void)b;
-		for (int j = 0; j < loopN; j++) {
-			vTbl[i][loopN - 1 - j] = itr.getNext();
+		for (size_t j = 0; j < loopN; j++) {
+			vTbl[i][loopN - 1 - j] = (uint8_t)itr.getNext();
 		}
 	}
 	Q.clear();
-	for (int k = 0; k < loopN; k++) {
+	for (size_t k = 0; k < loopN; k++) {
 		for (size_t i = 0; i < w; i++) {
 			G::dbl(Q, Q);
 		}
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 6b353bc3..dd11e099 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -81,9 +81,9 @@ struct MapToG2_WB19 {
 	Fp2 ynum[4];
 	Fp2 yden[4];
 	int draftVersion_;
-	void setDraftVersion(int version)
+	void setDraftVersion(int draftVersion)
 	{
-		draftVersion_ = version;
+		draftVersion_ = draftVersion;
 	}
 	void init()
 	{

From 66716376b1c48c7aef8f173cac13d1f3b775959d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 13 Jun 2020 17:48:02 +0900
Subject: [PATCH 220/553] Azure App Service requires CRYPT_MACHINE_KEYSET

---
 include/cybozu/random_generator.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/cybozu/random_generator.hpp b/include/cybozu/random_generator.hpp
index 375db06a..d0627f8d 100644
--- a/include/cybozu/random_generator.hpp
+++ b/include/cybozu/random_generator.hpp
@@ -34,8 +34,8 @@ class RandomGenerator {
 		: prov_(0)
 		, pos_(bufSize)
 	{
-		DWORD flagTbl[] = { 0, CRYPT_NEWKEYSET };
-		for (int i = 0; i < 2; i++) {
+		DWORD flagTbl[] = { 0, CRYPT_NEWKEYSET, CRYPT_MACHINE_KEYSET };
+		for (int i = 0; i < 3; i++) {
 			if (CryptAcquireContext(&prov_, NULL, NULL, PROV_RSA_FULL, flagTbl[i]) != 0) return;
 		}
 #ifdef CYBOZU_DONT_USE_EXCEPTION

From 7fe3e07cdbe52e202a4001a7fb27595cabd1fe90 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Jun 2020 11:49:49 +0900
Subject: [PATCH 221/553] [doc] fix link

---
 api.md | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/api.md b/api.md
index eee225d7..52498264 100644
--- a/api.md
+++ b/api.md
@@ -2,18 +2,6 @@
 
 ## New features
 
-Add compatibility mode with [eth2](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md)
-
-```
-void mclBn_setETHserialization(int enable);
-```
-The serialization/deserialization for `Fp`, `Fr`, `G1`, `G2` if `enable = 1`.
-
-```
-int mclBn_setMapToMode(int mode);
-```
-The map-to-G2 function if `mode = MCL_MAP_TO_MODE_ETH2`.
-
 ```
 void mclBn_setOriginalG2cofactor(int enable);
 ```
@@ -126,6 +114,11 @@ the values are the same when the library is built and used.
 
 ## Global setting
 
+```
+int mclBn_setMapToMode(int mode);
+```
+The map-to-G2 function if `mode = MCL_MAP_TO_MODE_HASH_TO_CURVE`.
+
 ### Control to verify that a point of the elliptic curve has the order `r`.
 
 This function affects `setStr()` and `deserialize()` for G1/G2.
@@ -219,11 +212,11 @@ else:
   return s
 ```
 
-### Ethereum serialization mode for BLS12-381 (experimental)
+### Ethereum serialization mode for BLS12-381
 ```
 void mclBn_setETHserialization(int ETHserialization);
 ```
-- serialize according to [ETH2.0 serialization of BLS12-381](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#point-representations) if BLS12-381 is used and `ETHserialization = 1` (default 0).
+- serialize according to [serialization of BLS12-381](https://github.com/zkcrypto/pairing/blob/master/src/bls12_381/README.md#serialization) if BLS12-381 is used and `ETHserialization = 1` (default 0).
 
 ### Deserialize
 ```

From 3acf697650d5934da130c5b6ba13bac2e25d1ba3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Jun 2020 11:50:58 +0900
Subject: [PATCH 222/553] reduce stack size for Gi::mul

---
 include/mcl/bn.hpp       | 20 ++++++++++----------
 include/mcl/ec.hpp       | 22 +++++++++++-----------
 include/mcl/operator.hpp | 10 +++++-----
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 08467ad0..d274679c 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -803,7 +803,11 @@ struct GLV2T {
 	template<class T>
 	static void mul(T& Q, const T& P, const mpz_class& x, bool constTime = false)
 	{
-		mulVecNGLV(Q, &P, &x, 1, constTime);
+		if (constTime) {
+			ec::local::mul1CT<GLV2, T, Fr, 4, 4>(Q, P, x);
+		} else {
+			ec::local::mulVecNGLVT<GLV2, T, Fr, 4, 5, 1>(Q, &P, &x, 1);
+		}
 	}
 	template<class T>
 	static void mulLambda(T& Q, const T& P)
@@ -811,12 +815,8 @@ struct GLV2T {
 		Frobenius(Q, P);
 	}
 	template<class T>
-	static size_t mulVecNGLV(T& z, const T *xVec, const mpz_class *yVec, size_t n, bool constTime)
+	static size_t mulVecNGLV(T& z, const T *xVec, const mpz_class *yVec, size_t n)
 	{
-		if (n == 1 && constTime) {
-			ec::local::mul1CT<GLV2, T, Fr, 4, 4>(z, *xVec, *yVec);
-			return 1;
-		}
 		return ec::local::mulVecNGLVT<GLV2, T, Fr, 4, 5, fp::maxMulVecNGLV>(z, xVec, yVec, n);
 	}
 	static void pow(Fp12& z, const Fp12& x, const mpz_class& y, bool constTime = false)
@@ -1024,17 +1024,17 @@ inline void powArrayGLV2(Fp12& z, const Fp12& x, const mcl::fp::Unit *y, size_t
 	GLV2::pow(z, x, s, constTime);
 }
 
-inline size_t mulVecNGLV2(G2& z, const G2 *xVec, const mpz_class *yVec, size_t n, bool constTime)
+inline size_t mulVecNGLV2(G2& z, const G2 *xVec, const mpz_class *yVec, size_t n)
 {
-	return GLV2::mulVecNGLV(z, xVec, yVec, n, constTime);
+	return GLV2::mulVecNGLV(z, xVec, yVec, n);
 }
 
-inline size_t powVecNGLV2(Fp12& z, const Fp12 *xVec, const mpz_class *yVec, size_t n, bool constTime)
+inline size_t powVecNGLV2(Fp12& z, const Fp12 *xVec, const mpz_class *yVec, size_t n)
 {
 	typedef GroupMtoA<Fp12> AG; // as additive group
 	AG& _z = static_cast<AG&>(z);
 	const AG *_xVec = static_cast<const AG*>(xVec);
-	return GLV2::mulVecNGLV(_z, _xVec, yVec, n, constTime);
+	return GLV2::mulVecNGLV(_z, _xVec, yVec, n);
 }
 
 /*
diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index d7d1b8e0..4a29cfa3 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -702,7 +702,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 	static bool verifyOrder_;
 	static mpz_class order_;
 	static void (*mulArrayGLV)(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime);
-	static size_t (*mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn, bool constTime);
+	static size_t (*mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn);
 	/* default constructor is undefined value */
 	EcT() {}
 	EcT(const Fp& _x, const Fp& _y)
@@ -768,7 +768,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 			// don't clear order_ because it is used for isValidOrder()
 		}
 	}
-	static void setMulArrayGLV(void f(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime), size_t g(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn, bool constTime) = 0)
+	static void setMulArrayGLV(void f(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime), size_t g(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn) = 0)
 	{
 		mulArrayGLV = f;
 		mulVecNGLV = g;
@@ -1477,7 +1477,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 
 public:
 	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n, bool constTime = false)
+	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
 	{
 		/*
 			mulVecNGLV is a little slow for large n
@@ -1489,7 +1489,7 @@ class EcT : public fp::Serializable<EcT<_Fp> > {
 				yVec[i].getMpz(&b, myVec[i]);
 				assert(b); (void)b;
 			}
-			size_t done = mulVecNGLV(z, xVec, myVec, n, constTime);
+			size_t done = mulVecNGLV(z, xVec, myVec, n);
 			assert(done == n); (void)done;
 			return;
 		}
@@ -1559,7 +1559,7 @@ template<class Fp> int EcT<Fp>::ioMode_;
 template<class Fp> bool EcT<Fp>::verifyOrder_;
 template<class Fp> mpz_class EcT<Fp>::order_;
 template<class Fp> void (*EcT<Fp>::mulArrayGLV)(EcT& z, const EcT& x, const fp::Unit *y, size_t yn, bool isNegative, bool constTime);
-template<class Fp> size_t (*EcT<Fp>::mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn, bool constTime);
+template<class Fp> size_t (*EcT<Fp>::mulVecNGLV)(EcT& z, const EcT *xVec, const mpz_class *yVec, size_t yn);
 template<class Fp> int EcT<Fp>::mode_;
 
 // r = the order of Ec
@@ -1611,14 +1611,14 @@ struct GLV1T {
 	}
 	static void mul(Ec& Q, const Ec& P, const mpz_class& x, bool constTime = false)
 	{
-		mulVecNGLV(Q, &P, &x, 1, constTime);
+		if (constTime) {
+			ec::local::mul1CT<GLV1, Ec, _Fr, 2, 4>(Q, P, x);
+		} else {
+			ec::local::mulVecNGLVT<GLV1, Ec, _Fr, 2, 5, 1>(Q, &P, &x, 1);
+		}
 	}
-	static inline size_t mulVecNGLV(Ec& z, const Ec *xVec, const mpz_class *yVec, size_t n, bool constTime)
+	static inline size_t mulVecNGLV(Ec& z, const Ec *xVec, const mpz_class *yVec, size_t n)
 	{
-		if (n == 1 && constTime) {
-			ec::local::mul1CT<GLV1, Ec, _Fr, 2, 4>(z, *xVec, *yVec);
-			return 1;
-		}
 		return ec::local::mulVecNGLVT<GLV1, Ec, _Fr, 2, 5, mcl::fp::maxMulVecNGLV>(z, xVec, yVec, n);
 	}
 	static void mulArrayGLV(Ec& z, const Ec& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
diff --git a/include/mcl/operator.hpp b/include/mcl/operator.hpp
index 1658c2df..84df77f7 100644
--- a/include/mcl/operator.hpp
+++ b/include/mcl/operator.hpp
@@ -84,14 +84,14 @@ struct Operator : public E {
 	{
 		powArray(z, x, gmp::getUnit(y), gmp::getUnitSize(y), y < 0, true);
 	}
-	static void setPowArrayGLV(void f(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime), size_t g(T& z, const T *xVec, const mpz_class *yVec, size_t n, bool constTime) = 0)
+	static void setPowArrayGLV(void f(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime), size_t g(T& z, const T *xVec, const mpz_class *yVec, size_t n) = 0)
 	{
 		powArrayGLV = f;
 		powVecNGLV = g;
 	}
 	static const size_t powVecMaxN = 16;
 	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static void powVec(T& z, const T* xVec, const FpT<tag, maxBitSize> *yVec, size_t n, bool constTime = false)
+	static void powVec(T& z, const T* xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
 	{
 		assert(powVecNGLV);
 		T r;
@@ -106,7 +106,7 @@ struct Operator : public E {
 				yVec[i].getMpz(&b, myVec[i]);
 				assert(b); (void)b;
 			}
-			size_t done = powVecNGLV(t, xVec, myVec, tn, constTime);
+			size_t done = powVecNGLV(t, xVec, myVec, tn);
 			r *= t;
 			xVec += done;
 			yVec += done;
@@ -116,7 +116,7 @@ struct Operator : public E {
 	}
 private:
 	static void (*powArrayGLV)(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime);
-	static size_t (*powVecNGLV)(T& z, const T* xVec, const mpz_class *yVec, size_t n, bool constTime);
+	static size_t (*powVecNGLV)(T& z, const T* xVec, const mpz_class *yVec, size_t n);
 	static void powArray(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime)
 	{
 		if (powArrayGLV && (constTime || yn > 1)) {
@@ -145,7 +145,7 @@ template<class T, class E>
 void (*Operator<T, E>::powArrayGLV)(T& z, const T& x, const Unit *y, size_t yn, bool isNegative, bool constTime);
 
 template<class T, class E>
-size_t (*Operator<T, E>::powVecNGLV)(T& z, const T* xVec, const mpz_class *yVec, size_t n, bool constTime);
+size_t (*Operator<T, E>::powVecNGLV)(T& z, const T* xVec, const mpz_class *yVec, size_t n);
 
 /*
 	T must have save and load

From 74f1b42f722c6322ffc08f90a9d5a40924cb7724 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Jun 2020 15:08:09 +0900
Subject: [PATCH 223/553] some old eth* functions will be duplicated

---
 include/mcl/bn.h   | 2 ++
 include/mcl/bn.hpp | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 905733db..04d12aea 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -226,6 +226,7 @@ MCLBN_DLL_API int mclBn_setMapToMode(int mode);
 	return 0 if success
 	@note append zero byte to msg if necessary
 */
+// deprecated
 MCLBN_DLL_API int mclBn_ethMsgToFp2(mclBnFp2 *out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize);
 
 /*
@@ -240,6 +241,7 @@ MCLBN_DLL_API int mclBn_ethFp2ToG2(mclBnG2 *out, const mclBnFp2 *t1, const mclBn
 	@note append zero byte to msg if necessary
 	return 0 if success
 */
+// deprecated
 MCLBN_DLL_API int mclBn_ethMsgToG2(mclBnG2 *out, const void *msg, size_t msgSize, const void *dst, size_t dstSize);
 
 ////////////////////////////////////////////////
diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index d274679c..ee8df173 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -2208,6 +2208,7 @@ inline const Fr& getG2cofactorAdjInv()
 	return BN::param.mapTo.g2cofactorAdjInv_;
 }
 
+// deprecated
 inline bool ethMsgToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
 {
 	if (!BN::param.isBLS12) return false;
@@ -2222,6 +2223,7 @@ inline bool ethFp2ToG2(G2& out, const Fp2& t1, const Fp2 *t2 = 0)
 	return true;
 }
 
+// deprecated
 inline bool ethMsgToG2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
 {
 	if (!BN::param.isBLS12) return false;

From f6c46616ed82fb489c16123255f06e815afe4a40 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Jun 2020 15:08:50 +0900
Subject: [PATCH 224/553] update xbyak

---
 src/xbyak/xbyak.h          | 37 ++++++++++++++++++++++---------------
 src/xbyak/xbyak_mnemonic.h | 10 +++++-----
 src/xbyak/xbyak_util.h     | 19 +++++++++++++++++--
 3 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/src/xbyak/xbyak.h b/src/xbyak/xbyak.h
index 939ffee0..63efccd7 100644
--- a/src/xbyak/xbyak.h
+++ b/src/xbyak/xbyak.h
@@ -9,14 +9,9 @@
 	@note modified new BSD license
 	http://opensource.org/licenses/BSD-3-Clause
 */
-#if !defined(XBYAK_USE_OP_NAMES) && !defined(XBYAK_NO_OP_NAMES)
+#if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not
 	#define XBYAK_NO_OP_NAMES
 #endif
-#ifndef XBYAK_NO_OP_NAMES
-	#if not +0 // trick to detect whether 'not' is operator or not
-		#error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()."
-	#endif
-#endif
 
 #include <stdio.h> // for debug print
 #include <assert.h>
@@ -29,7 +24,9 @@
 
 // #define XBYAK_DISABLE_AVX512
 
-//#define XBYAK_USE_MMAP_ALLOCATOR
+#if !defined(XBYAK_USE_MMAP_ALLOCATOR) && !defined(XBYAK_DONT_USE_MMAP_ALLOCATOR)
+	#define XBYAK_USE_MMAP_ALLOCATOR
+#endif
 #if !defined(__GNUC__) || defined(__MINGW32__)
 	#undef XBYAK_USE_MMAP_ALLOCATOR
 #endif
@@ -83,9 +80,12 @@
 	#include <sys/mman.h>
 	#include <stdlib.h>
 #endif
-#if defined(__APPLE__) && defined(MAP_JIT)
+#if defined(__APPLE__) && !defined(XBYAK_DONT_USE_MAP_JIT)
 	#define XBYAK_USE_MAP_JIT
 	#include <sys/sysctl.h>
+	#ifndef MAP_JIT
+		#define MAP_JIT 0x800
+	#endif
 #endif
 #if !defined(_MSC_VER) || (_MSC_VER >= 1600)
 	#include <stdint.h>
@@ -120,7 +120,7 @@ namespace Xbyak {
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x5850 /* 0xABCD = A.BC(D) */
+	VERSION = 0x5912 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
@@ -469,9 +469,8 @@ class Operand {
 	}
 	// err if MMX/FPU/OPMASK/BNDREG
 	void setBit(int bit);
-	void setOpmaskIdx(int idx, bool ignore_idx0 = false)
+	void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true)
 	{
-		if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
 		if (mask_) throw Error(ERR_OPMASK_IS_ALREADY_SET);
 		mask_ = idx;
 	}
@@ -555,7 +554,7 @@ inline void Operand::setBit(int bit)
 {
 	if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512) goto ERR;
 	if (isBit(bit)) return;
-	if (is(MEM)) {
+	if (is(MEM | OPMASK)) {
 		bit_ = bit;
 		return;
 	}
@@ -1667,6 +1666,7 @@ class CodeGenerator : public CodeArray {
 		bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
 		bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
 		if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
+		if (aaa == 0) z = 0; // clear T_z if mask is not set
 		db(0x62);
 		db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3));
 		db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
@@ -1780,6 +1780,7 @@ class CodeGenerator : public CodeArray {
 			db(longCode); dd(disp - longJmpSize);
 		}
 	}
+	bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); }
 	template<class T>
 	void opJmp(T& label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
 	{
@@ -1789,7 +1790,7 @@ class CodeGenerator : public CodeArray {
 			makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref);
 		} else {
 			int jmpSize = 0;
-			if (type == T_NEAR) {
+			if (isNEAR(type)) {
 				jmpSize = 4;
 				if (longPref) db(longPref);
 				db(longCode); dd(0);
@@ -1804,7 +1805,7 @@ class CodeGenerator : public CodeArray {
 	void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref = 0)
 	{
 		if (isAutoGrow()) {
-			if (type != T_NEAR) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW);
+			if (!isNEAR(type)) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW);
 			if (size_ + 16 >= maxSize_) growMemory();
 			if (longPref) db(longPref);
 			db(longCode);
@@ -2258,7 +2259,7 @@ class CodeGenerator : public CodeArray {
 	const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
 	const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7;
 	const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7;
-	const Ymm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7;
+	const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7;
 	const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
 	const Reg16 ax, cx, dx, bx, sp, bp, si, di;
 	const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
@@ -2298,6 +2299,9 @@ class CodeGenerator : public CodeArray {
 #ifndef XBYAK_DISABLE_SEGMENT
 	const Segment es, cs, ss, ds, fs, gs;
 #endif
+private:
+	bool isDefaultJmpNEAR_;
+public:
 	void L(const std::string& label) { labelMgr_.defineSlabel(label); }
 	void L(Label& label) { labelMgr_.defineClabel(label); }
 	Label L() { Label label; L(label); return label; }
@@ -2317,6 +2321,8 @@ class CodeGenerator : public CodeArray {
 	void putL(std::string label) { putL_inner(label); }
 	void putL(const Label& label) { putL_inner(label); }
 
+	// set default type of `jmp` of undefined label to T_NEAR
+	void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; }
 	void jmp(const Operand& op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); }
 	void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
 	void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
@@ -2575,6 +2581,7 @@ class CodeGenerator : public CodeArray {
 #ifndef XBYAK_DISABLE_SEGMENT
 		, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
 #endif
+		, isDefaultJmpNEAR_(false)
 	{
 		labelMgr_.set(this);
 	}
diff --git a/src/xbyak/xbyak_mnemonic.h b/src/xbyak/xbyak_mnemonic.h
index 457a6414..2de6ec23 100644
--- a/src/xbyak/xbyak_mnemonic.h
+++ b/src/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "5.85"; }
+const char *getVersionString() const { return "5.912"; }
 void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -1671,17 +1671,17 @@ void kandnw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r
 void kandq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x41); }
 void kandw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x41); }
 void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }
-void kmovb(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }
+void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }
 void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }
 void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }
 void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }
-void kmovd(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }
+void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }
 void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }
 void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }
 void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }
-void kmovq(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }
+void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }
 void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }
-void kmovw(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }
+void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }
 void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }
 void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }
 void knotb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x44); }
diff --git a/src/xbyak/xbyak_util.h b/src/xbyak/xbyak_util.h
index eefd1526..4f79d8f9 100644
--- a/src/xbyak/xbyak_util.h
+++ b/src/xbyak/xbyak_util.h
@@ -1,5 +1,6 @@
 #ifndef XBYAK_XBYAK_UTIL_H_
 #define XBYAK_XBYAK_UTIL_H_
+#include <string.h>
 
 /**
 	utility class and functions for Xbyak
@@ -146,6 +147,11 @@ class Cpu {
 					numCores_[level - 1] = extractBit(data[1], 0, 15);
 				}
 			}
+			/*
+				Fallback values in case a hypervisor has 0xB leaf zeroed-out.
+			*/
+			numCores_[SmtLevel - 1] = (std::max)(1u, numCores_[SmtLevel - 1]);
+			numCores_[CoreLevel - 1] = (std::max)(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
 		} else {
 			/*
 				Failed to deremine num of cores without x2APIC support.
@@ -754,7 +760,7 @@ class Profiler {
 	};
 	Profiler()
 		: mode_(None)
-		, suffix_(0)
+		, suffix_("")
 		, startAddr_(0)
 #ifdef XBYAK_USE_PERF
 		, fp_(0)
@@ -828,7 +834,16 @@ class Profiler {
 #ifdef XBYAK_USE_PERF
 		if (mode_ == Perf) {
 			if (fp_ == 0) return;
-			fprintf(fp_, "%llx %zx %s%s\n", (long long)startAddr, funcSize, funcName, suffix_);
+			fprintf(fp_, "%llx %zx %s%s", (long long)startAddr, funcSize, funcName, suffix_);
+			/*
+				perf does not recognize the function name which is less than 3,
+				so append '_' at the end of the name if necessary
+			*/
+			size_t n = strlen(funcName) + strlen(suffix_);
+			for (size_t i = n; i < 3; i++) {
+				fprintf(fp_, "_");
+			}
+			fprintf(fp_, "\n");
 			fflush(fp_);
 		}
 #endif

From f3f27d5b0d5299da8486c6248541b1248d17c8fa Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Jun 2020 15:24:11 +0900
Subject: [PATCH 225/553] maxMulVecNGLV can be redefined

---
 include/mcl/op.hpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 958d438f..56db4960 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -118,7 +118,11 @@ const size_t maxUnitSize = (MCL_MAX_BIT_SIZE + UnitBitSize - 1) / UnitBitSize;
 #define MCL_MAX_UNIT_SIZE ((MCL_MAX_BIT_SIZE + MCL_UNIT_BIT_SIZE - 1) / MCL_UNIT_BIT_SIZE)
 
 const size_t maxMulVecN = 32; // inner loop of mulVec
-const size_t maxMulVecNGLV = 16; // inner loop of mulVec with GLV
+
+#ifndef MCL_MAX_MUL_VEC_NGLV
+	#define MCL_MAX_MUL_VEC_NGLV 16
+#endif
+const size_t maxMulVecNGLV = MCL_MAX_MUL_VEC_NGLV; // inner loop of mulVec with GLV
 
 struct FpGenerator;
 struct Op;

From 69f890ae17ba1ca0334bafbaa3103421e2136700 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Jun 2020 17:19:41 +0900
Subject: [PATCH 226/553] v1.11

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 56db4960..df8fe7e6 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x110; /* 0xABC = A.BC */
+static const int version = 0x111; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From 0741627756b201ca696289fda3745dfcc672a052 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 21 Jun 2020 20:04:53 +0900
Subject: [PATCH 227/553] add CRYPT_VERIFYCONTEXT to CryptoAcquireContext

---
 include/cybozu/random_generator.hpp | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/include/cybozu/random_generator.hpp b/include/cybozu/random_generator.hpp
index d0627f8d..66900f97 100644
--- a/include/cybozu/random_generator.hpp
+++ b/include/cybozu/random_generator.hpp
@@ -17,7 +17,6 @@
 #ifdef _MSC_VER
 #pragma comment (lib, "advapi32.lib")
 #endif
-#include <cybozu/critical_section.hpp>
 #else
 #include <sys/types.h>
 #include <fcntl.h>
@@ -32,10 +31,9 @@ class RandomGenerator {
 #ifdef _WIN32
 	RandomGenerator()
 		: prov_(0)
-		, pos_(bufSize)
 	{
-		DWORD flagTbl[] = { 0, CRYPT_NEWKEYSET, CRYPT_MACHINE_KEYSET };
-		for (int i = 0; i < 3; i++) {
+		DWORD flagTbl[] = { CRYPT_VERIFYCONTEXT | CRYPT_SILENT, 0, CRYPT_MACHINE_KEYSET };
+		for (int i = 0; i < CYBOZU_NUM_OF_ARRAY(flagTbl); i++) {
 			if (CryptAcquireContext(&prov_, NULL, NULL, PROV_RSA_FULL, flagTbl[i]) != 0) return;
 		}
 #ifdef CYBOZU_DONT_USE_EXCEPTION
@@ -62,29 +60,11 @@ class RandomGenerator {
 	template<class T>
 	void read(bool *pb, T *buf, size_t bufNum)
 	{
-		cybozu::AutoLockCs al(cs_);
 		const size_t byteSize = sizeof(T) * bufNum;
-		if (byteSize > bufSize) {
-			if (!read_inner(buf, byteSize)) {
-				*pb = false;
-				return;
-			}
-		} else {
-			if (pos_ + byteSize > bufSize) {
-				read_inner(buf_, bufSize);
-				pos_ = 0;
-			}
-			memcpy(buf, buf_ + pos_, byteSize);
-			pos_ += byteSize;
-		}
-		*pb = true;
+		*pb = read_inner(buf, byteSize);
 	}
 private:
 	HCRYPTPROV prov_;
-	static const size_t bufSize = 1024;
-	char buf_[bufSize];
-	size_t pos_;
-	cybozu::CriticalSection cs_;
 #else
 	RandomGenerator()
 		: fp_(::fopen("/dev/urandom", "rb"))

From 0928a1764765609a74bd16bcd6c09467f3d959db Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 22 Jun 2020 15:54:00 +0900
Subject: [PATCH 228/553] cmake install bn_c*.h headers

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index aa1ada71..d56f24c3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -190,7 +190,7 @@ foreach(bit IN ITEMS 256 384 384_256 512)
 	target_link_libraries(bn_c${bit}_test mclbn${bit})
 endforeach()
 
-file(GLOB MCL_HEADERS include/mcl/*.hpp include/mcl/bn.h include/mcl/curve_type.h)
+file(GLOB MCL_HEADERS include/mcl/*.hpp include/mcl/bn.h include/mcl/curve_type.h include/mcl/bn_c256.h include/mcl/bn_c384_256.h include/mcl/bn_c384.h)
 file(GLOB CYBOZULIB_HEADERS include/cybozu/*.hpp)
 
 install(TARGETS mcl DESTINATION lib)

From 34afe9cddab7cd5ae4a954935cad7fd143f77c91 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 22 Jun 2020 17:35:30 +0900
Subject: [PATCH 229/553] mcl-wasm generates mcl_c384_256.{js,wasm}

---
 Makefile | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 29bc0ad5..ebcb69cd 100644
--- a/Makefile
+++ b/Makefile
@@ -344,6 +344,9 @@ endif
 ../she-wasm/she_c384.js: src/she_c384.cpp $(SHE_C_DEP)
 	emcc -o $@ src/fp.cpp src/she_c384.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -s TOTAL_MEMORY=67108864 -s DISABLE_EXCEPTION_CATCHING=0
 
+../mcl-wasm/mcl_c384_256.js: src/bn_c384_256.cpp $(MCL_C_DEP)
+	emcc -o $@ src/fp.cpp src/bn_c384_256.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions -MD -MP -MF obj/mcl_c384_256.d
+
 ../mcl-wasm/mcl_c.js: src/bn_c256.cpp $(MCL_C_DEP)
 	emcc -o $@ src/fp.cpp src/bn_c256.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=256 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions -MD -MP -MF obj/mcl_c.d
 
@@ -354,8 +357,9 @@ endif
 	emcc -o $@ src/fp.cpp src/ecdsa_c.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=256 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions
 
 mcl-wasm:
-	$(MAKE) ../mcl-wasm/mcl_c.js
-	$(MAKE) ../mcl-wasm/mcl_c512.js
+	$(MAKE) ../mcl-wasm/mcl_c384_256.js
+#	$(MAKE) ../mcl-wasm/mcl_c.js
+#	$(MAKE) ../mcl-wasm/mcl_c512.js
 
 she-wasm:
 	$(MAKE) ../she-wasm/she_c.js

From 2f3ccdde992a2e8f031f9e422e05f3dcc253c127 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 22 Jun 2020 18:17:14 +0900
Subject: [PATCH 230/553] modify emcc option for mcl-wasm

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ebcb69cd..32b2efe4 100644
--- a/Makefile
+++ b/Makefile
@@ -329,7 +329,7 @@ test: $(TEST_EXE)
 
 EMCC_OPT=-I./include -I./src -Wall -Wextra
 EMCC_OPT+=-O3 -DNDEBUG -DMCLSHE_WIN_SIZE=8
-EMCC_OPT+=-s WASM=1 -s NO_EXIT_RUNTIME=1 -s MODULARIZE=1 #-s ASSERTIONS=1
+EMCC_OPT+=-s WASM=1 -s NO_EXIT_RUNTIME=1 -s NODEJS_CATCH_EXIT=0 -s NODEJS_CATCH_REJECTION=0  -s MODULARIZE=1 #-s ASSERTIONS=1
 EMCC_OPT+=-DCYBOZU_MINIMUM_EXCEPTION
 EMCC_OPT+=-s ABORTING_MALLOC=0
 SHE_C_DEP=src/fp.cpp src/she_c_impl.hpp include/mcl/she.hpp include/mcl/fp.hpp include/mcl/op.hpp include/mcl/she.h Makefile

From e48973af5019ed698df6a28458688a26ec9ca34a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Jun 2020 11:05:11 +0900
Subject: [PATCH 231/553] add mclBnFr_setBigEndianMod

---
 include/mcl/impl/bn_c_impl.hpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index d6018617..52eaa504 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -181,6 +181,13 @@ int mclBnFr_setLittleEndian(mclBnFr *x, const void *buf, mclSize bufSize)
 	cast(x)->setArrayMask((const char *)buf, bufSize);
 	return 0;
 }
+int mclBnFr_setBigEndianMod(mclBnFr *x, const void *buf, mclSize bufSize)
+{
+	bool b;
+	cast(x)->setBigEndianMod(&b, buf, bufSize);
+	return b ? 0 : -1;
+}
+
 mclSize mclBnFr_getLittleEndian(void *buf, mclSize maxBufSize, const mclBnFr *x)
 {
 	return cast(x)->getLittleEndian(buf, maxBufSize);

From ef4a9de2571469861dc18ff73613a79b655de1d2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Jun 2020 14:19:30 +0900
Subject: [PATCH 232/553] fix ec comparison with zero

---
 include/mcl/ec.hpp | 12 ++++++++++++
 test/ec_test.cpp   |  3 +++
 2 files changed, 15 insertions(+)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 4a29cfa3..e1db56df 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -227,6 +227,12 @@ template<class E>
 bool isEqualJacobi(const E& P1, const E& P2)
 {
 	typedef typename E::Fp F;
+	bool zero1 = P1.isZero();
+	bool zero2 = P2.isZero();
+	if (zero1) {
+		return zero2;
+	}
+	if (zero2) return false;
 	F s1, s2, t1, t2;
 	F::sqr(s1, P1.z);
 	F::sqr(s2, P2.z);
@@ -452,6 +458,12 @@ template<class E>
 bool isEqualProj(const E& P1, const E& P2)
 {
 	typedef typename E::Fp F;
+	bool zero1 = P1.isZero();
+	bool zero2 = P2.isZero();
+	if (zero1) {
+		return zero2;
+	}
+	if (zero2) return false;
 	F t1, t2;
 	F::mul(t1, P1.x, P2.z);
 	F::mul(t2, P2.x, P1.z);
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index 4c93e42b..a3e79e52 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -95,6 +95,9 @@ struct Test {
 			if (Ec::getMode() != mcl::ec::Affine) {
 				CYBOZU_TEST_ASSERT(!R.isNormalized());
 			}
+			CYBOZU_TEST_ASSERT(O == O);
+			CYBOZU_TEST_ASSERT(R != O);
+			CYBOZU_TEST_ASSERT(O != R);
 			CYBOZU_TEST_ASSERT(R.isValid());
 			Ec R2 = P + P;
 			CYBOZU_TEST_EQUAL(R, R2);

From 574cabc15f733baabe8951afe1970a7314f82748 Mon Sep 17 00:00:00 2001
From: Kelly Olson <ineffectualproperty@users.noreply.github.com>
Date: Tue, 23 Jun 2020 21:35:41 -0700
Subject: [PATCH 233/553] Update readme.md to fix URLs

---
 readme.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/readme.md b/readme.md
index 34ae2c6f..51ecd8fc 100644
--- a/readme.md
+++ b/readme.md
@@ -282,8 +282,8 @@ modified new BSD License
 http://opensource.org/licenses/BSD-3-Clause
 
 This library contains some part of the followings software licensed by BSD-3-Clause.
-* [xbyak](https://github.com/heurmi/xbyak)
-* [cybozulib](https://github.com/heurmi/cybozulib)
+* [xbyak](https://github.com/herumi/xbyak)
+* [cybozulib](https://github.com/herumi/cybozulib)
 * [Lifted-ElGamal](https://github.com/aistcrypt/Lifted-ElGamal)
 
 # References

From 7c3f6c3c63db5cde0943583872e0999b8ebe1a0d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Jun 2020 15:26:43 +0900
Subject: [PATCH 234/553] fix link of url in readme.md

---
 readme.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/readme.md b/readme.md
index 51ecd8fc..f56e64f9 100644
--- a/readme.md
+++ b/readme.md
@@ -10,10 +10,10 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
-- `MCL_MAP_TO_MODE_HASH_TO_CURVE_07` is added for [draft-07](https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-07.txt).
+- `MCL_MAP_TO_MODE_HASH_TO_CURVE_07` is added for [hash-to-curve-draft-07](https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/07/).
   - The older version will be removed in the future.
 - change DST of hash-to-curve for `MCL_MAP_TO_MODE_HASH_TO_CURVE_06`.
-- add new hash-to-curve function of [draft-irtf-cfrg-hash-to-curve](https://cfrg.github.io/draft-irtf-cfrg-hash-to-curve/draft-irtf-cfrg-hash-to-curve.txt) at March 2020.
+- add new hash-to-curve function of [hash-to-curve-draft-06](https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/06/) at March 2020.
   - call `setETHmode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);`
   - The older `MAP_TO_MODE` will be removed after the draft is fixed.
 - add new hash functions corresponding to python-impl of [algorand/bls_sig_ref](https://github.com/algorand/bls_sigs_ref).

From 9e0148698d8215f720aaf6808dceb5bcf4c6b577 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Jun 2020 13:48:06 +0900
Subject: [PATCH 235/553] add test of draft07

---
 test/mapto_wb19_test.cpp | 70 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index b289ae48..3041419f 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -985,6 +985,75 @@ void testHashToFp2v7(const T& mapto)
 	}
 }
 
+void testEth2phase0()
+{
+	const struct {
+		const char *sec;
+		const char *msg;
+		const char *out;
+	} tbl[] = {
+		{
+			"328388aff0d4a5b7dc9205abd374e7e98f3cd9f3418edb4eafda5fb16473d216",
+			"abababababababababababababababababababababababababababababababab",
+			"ae82747ddeefe4fd64cf9cedb9b04ae3e8a43420cd255e3c7cd06a8d88b7c7f8638543719981c5d16fa3527c468c25f0026704a6951bde891360c7e8d12ddee0559004ccdbe6046b55bae1b257ee97f7cdb955773d7cf29adf3ccbb9975e4eb9",
+		},
+		{
+			"47b8192d77bf871b62e87859d653922725724a5c031afeabc60bcef5ff665138",
+			"abababababababababababababababababababababababababababababababab",
+			"9674e2228034527f4c083206032b020310face156d4a4685e2fcaec2f6f3665aa635d90347b6ce124eb879266b1e801d185de36a0a289b85e9039662634f2eea1e02e670bc7ab849d006a70b2f93b84597558a05b879c8d445f387a5d5b653df",
+		},
+		{
+			"328388aff0d4a5b7dc9205abd374e7e98f3cd9f3418edb4eafda5fb16473d216",
+			"5656565656565656565656565656565656565656565656565656565656565656",
+			"a4efa926610b8bd1c8330c918b7a5e9bf374e53435ef8b7ec186abf62e1b1f65aeaaeb365677ac1d1172a1f5b44b4e6d022c252c58486c0a759fbdc7de15a756acc4d343064035667a594b4c2a6f0b0b421975977f297dba63ee2f63ffe47bb6",
+		},
+		{
+			"47b8192d77bf871b62e87859d653922725724a5c031afeabc60bcef5ff665138",
+			"5656565656565656565656565656565656565656565656565656565656565656",
+			"af1390c3c47acdb37131a51216da683c509fce0e954328a59f93aebda7e4ff974ba208d9a4a2a2389f892a9d418d618418dd7f7a6bc7aa0da999a9d3a5b815bc085e14fd001f6a1948768a3f4afefc8b8240dda329f984cb345c6363272ba4fe",
+		},
+		{
+			"263dbd792f5b1be47ed85f8938c0f29586af0d3ac7b977f21c278fe1462040e3",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"b6ed936746e01f8ecf281f020953fbf1f01debd5657c4a383940b020b26507f6076334f91e2366c96e9ab279fb5158090352ea1c5b0c9274504f4f0e7053af24802e51e4568d164fe986834f41e55c8e850ce1f98458c0cfc9ab380b55285a55",
+		},
+		{
+			"47b8192d77bf871b62e87859d653922725724a5c031afeabc60bcef5ff665138",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"b23c46be3a001c63ca711f87a005c200cc550b9429d5f4eb38d74322144f1b63926da3388979e5321012fb1a0526bcd100b5ef5fe72628ce4cd5e904aeaa3279527843fae5ca9ca675f4f51ed8f83bbf7155da9ecc9663100a885d5dc6df96d9",
+		},
+		{
+			"328388aff0d4a5b7dc9205abd374e7e98f3cd9f3418edb4eafda5fb16473d216",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"948a7cb99f76d616c2c564ce9bf4a519f1bea6b0a624a02276443c245854219fabb8d4ce061d255af5330b078d5380681751aa7053da2c98bae898edc218c75f07e24d8802a17cd1f6833b71e58f5eb5b94208b4d0bb3848cecb075ea21be115",
+		},
+		{
+			"263dbd792f5b1be47ed85f8938c0f29586af0d3ac7b977f21c278fe1462040e3",
+			"abababababababababababababababababababababababababababababababab",
+			"91347bccf740d859038fcdcaf233eeceb2a436bcaaee9b2aa3bfb70efe29dfb2677562ccbea1c8e061fb9971b0753c240622fab78489ce96768259fc01360346da5b9f579e5da0d941e4c6ba18a0e64906082375394f337fa1af2b7127b0d121",
+		},
+		{
+			"263dbd792f5b1be47ed85f8938c0f29586af0d3ac7b977f21c278fe1462040e3",
+			"5656565656565656565656565656565656565656565656565656565656565656",
+			"882730e5d03f6b42c3abc26d3372625034e1d871b65a8a6b900a56dae22da98abbe1b68f85e49fe7652a55ec3d0591c20767677e33e5cbb1207315c41a9ac03be39c2e7668edc043d6cb1d9fd93033caa8a1c5b0e84bedaeb6c64972503a43eb",
+		},
+	};
+	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_07);
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		const Uint8Vec msg = fromHexStr(tbl[i].msg);
+		const Uint8Vec out = fromHexStr(tbl[i].out);
+		Fr r;
+		r.setStr(tbl[i].sec, 16);
+		G2 P;
+		mcl::bn::hashAndMapToG2(P, msg.data(), msg.size());
+		P *= r;
+		P.normalize();
+		uint8_t buf[256];
+		size_t n = P.serialize(buf, sizeof(buf));
+		CYBOZU_TEST_EQUAL_ARRAY(out.data(), buf, n);
+	}
+}
+
 CYBOZU_TEST_AUTO(test)
 {
 	initPairing(mcl::BLS12_381);
@@ -1006,4 +1075,5 @@ CYBOZU_TEST_AUTO(test)
 	ethMsgToG2testAll("../bls_sigs_ref/test-vectors/hash_g2/");
 	testHashToFp2v6(mapto);
 	testHashToFp2v7(mapto);
+	testEth2phase0();
 }

From eb4cdddf02af78f62c2481ffe41a46781c405c05 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Jun 2020 15:02:46 +0900
Subject: [PATCH 236/553] expand_message_xmd for G1

---
 include/mcl/fp.hpp         |  4 ++--
 include/mcl/mapto_wb19.hpp |  2 +-
 src/fp.cpp                 | 13 +++++++------
 test/mapto_wb19_test.cpp   | 19 ++++++++++++++++++-
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 96110a8b..4f3c35b6 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -77,8 +77,8 @@ void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t salt
 void hkdf_extract(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize);
 void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6]);
 
-// draft-07
-void expand_message_xmd(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize);
+// draft-07 outSize = 128 or 256
+void expand_message_xmd(uint8_t out[], size_t outSize, const void *msg, size_t msgSize, const void *dst, size_t dstSize);
 
 void expand_message_xmd06(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize);
 
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index dd11e099..bb7df524 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -445,7 +445,7 @@ struct MapToG2_WB19 {
 		if (draftVersion_ == 6) {
 			mcl::fp::expand_message_xmd06(md, msg, msgSize, dst, dstSize);
 		} else {
-			mcl::fp::expand_message_xmd(md, msg, msgSize, dst, dstSize);
+			mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
 		}
 		Fp *x = out[0].getFp0();
 		for (size_t i = 0; i < 4; i++) {
diff --git a/src/fp.cpp b/src/fp.cpp
index f9307acd..343d2427 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -194,20 +194,21 @@ void expand_message_xmd06(uint8_t out[256], const void *msg, size_t msgSize, con
 	}
 }
 
-void expand_message_xmd(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize)
+void expand_message_xmd(uint8_t out[], size_t outSize, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
 {
-	const size_t len_in_bytes = 256;
+	assert(outSize == 128 || outSize == 256);
 	const size_t mdSize = 32;
 	const size_t r_in_bytes = 64;
-	const size_t ell = len_in_bytes / mdSize;
+	const size_t n = outSize / mdSize;
 	static const uint8_t Z_pad[r_in_bytes] = {};
 	assert(dstSize < 256);
 	/*
-		Z_apd | msg | BE(len_in_bytes, 2) | BE(0, 1) | DST | BE(dstSize, 1)
+		Z_apd | msg | BE(outSize, 2) | BE(0, 1) | DST | BE(dstSize, 1)
 	*/
-	static const uint8_t lenBuf[2] = { 1, 0 }; // 256 = len_in_bytes
+	uint8_t lenBuf[2] = { 1, 0 }; // 256 = outSize
 	uint8_t iBuf = 0;
 	uint8_t dstSizeBuf = uint8_t(dstSize);
+	cybozu::Set16bitAsBE(lenBuf, uint16_t(outSize));
 	cybozu::Sha256 h;
 	h.update(Z_pad, r_in_bytes);
 	h.update(msg, msgSize);
@@ -223,7 +224,7 @@ void expand_message_xmd(uint8_t out[256], const void *msg, size_t msgSize, const
 	h.update(dst, dstSize);
 	h.digest(out, mdSize, &dstSizeBuf, 1);
 	uint8_t mdXor[mdSize];
-	for (size_t i = 1; i < ell; i++) {
+	for (size_t i = 1; i < n; i++) {
 		h.clear();
 		for (size_t j = 0; j < mdSize; j++) {
 			mdXor[j] = md[j] ^ out[mdSize * (i - 1) + j];
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 3041419f..3dbf8f93 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -890,7 +890,17 @@ void testHashToFp2v7(const T& mapto)
 		size_t msgSize = strlen(msg);
 		size_t dstSize = strlen(dst);
 		uint8_t md[256];
-		mcl::fp::expand_message_xmd(md, msg, msgSize, dst, dstSize);
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
+	{
+		char msg[] = "asdf";
+		char dst[] = "QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_";
+		char expect[] = "ecc25edef8f6b277e27a88cf5ca0cdd4c4a49e8ba273d6069a4f0c9db05d37b78e700a875f4bb5972bfce49a867172ec1cb8c5524b1853994bb8af52a8ad2338d2cf688cf788b732372c10013445cd2c16a08a462028ae8ffff3082c8e47e8437dee5a58801e03ee8320980ae7c071ab022473231789d543d56defe9ff53bdba";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[128];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
 		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
 	}
 	{
@@ -1054,6 +1064,12 @@ void testEth2phase0()
 	}
 }
 
+template<class T>
+void testHashToG1(const T& mapto)
+{
+	(void)mapto;
+}
+
 CYBOZU_TEST_AUTO(test)
 {
 	initPairing(mcl::BLS12_381);
@@ -1076,4 +1092,5 @@ CYBOZU_TEST_AUTO(test)
 	testHashToFp2v6(mapto);
 	testHashToFp2v7(mapto);
 	testEth2phase0();
+	testHashToG1(mapto);
 }

From e1237ab19bd8820f9b9a66069e759bbcccf03fbf Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Jun 2020 10:57:28 +0900
Subject: [PATCH 237/553] remove unused code

---
 include/mcl/mapto_wb19.hpp | 73 --------------------------------------
 1 file changed, 73 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index bb7df524..970d37df 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -340,83 +340,10 @@ struct MapToG2_WB19 {
 		}
 		assert(0);
 	}
-#if 0
-	void h2_chain(G2& out, const G2& P) const
-	{
-		G2 t[16];
-		t[0] = P;
-		G2::dbl(t[1], t[0]);
-		G2::add(t[4], t[1], t[0]);
-		G2::add(t[2], t[4], t[1]);
-		G2::add(t[3], t[2], t[1]);
-		G2::add(t[11], t[3], t[1]);
-		G2::add(t[9], t[11], t[1]);
-		G2::add(t[10], t[9], t[1]);
-		G2::add(t[5], t[10], t[1]);
-		G2::add(t[7], t[5], t[1]);
-		G2::add(t[15], t[7], t[1]);
-		G2::add(t[13], t[15], t[1]);
-		G2::add(t[6], t[13], t[1]);
-		G2::add(t[14], t[6], t[1]);
-		G2::add(t[12], t[14], t[1]);
-		G2::add(t[8], t[12], t[1]);
-		G2::dbl(t[1], t[6]);
-
-		const struct {
-			uint32_t n;
-			uint32_t idx;
-		} tbl[] = {
-			{ 5, 13 }, { 2, 0 }, { 9, 8 }, { 5, 11 }, { 6, 13 }, { 8, 2 }, { 5, 3 },
-			{ 5, 3 }, { 4, 5 }, { 4, 0 }, { 8, 11 }, { 8, 8 }, { 4, 2 }, { 9, 5 },
-			{ 6, 11 }, { 2, 0 }, { 9, 8 }, { 5, 13 }, { 4, 0 }, { 11, 9 }, { 7, 12 },
-			{ 7, 7 }, { 5, 12 }, { 5, 14 }, { 8, 13 }, { 6, 3 }, { 5, 0 }, { 8, 9 },
-			{ 6, 13 }, { 4, 10 }, { 4, 2 }, { 6, 10 }, { 6, 2 }, { 4, 0 }, { 10, 9 },
-			{ 6, 14 }, { 4, 3 }, { 6, 9 }, { 6, 15 }, { 5, 8 }, { 5, 12 }, { 4, 5 },
-			{ 6, 15 }, { 6, 2 }, { 7, 5 }, { 6, 3 }, { 6, 9 }, { 6, 15 }, { 6, 14 },
-			{ 5, 8 }, { 10, 6 }, { 5, 5 }, { 3, 0 }, { 9, 13 }, { 7, 12 }, { 4, 5 },
-			{ 6, 2 }, { 6, 11 }, { 4, 10 }, { 4, 4 }, { 6, 10 }, { 7, 7 }, { 3, 2 },
-			{ 4, 3 }, { 8, 9 }, { 8, 9 }, { 6, 8 }, { 5, 7 }, { 5, 6 }, { 6, 5 },
-			{ 6, 4 }, { 5, 5 }, { 6, 4 }, { 6, 3 }, { 6, 4 }, { 6, 5 }, { 6, 3 },
-			{ 7, 3 }, { 6, 3 }, { 5, 4 }, { 6, 3 }, { 6, 3 }, { 3, 0 }, { 6, 3 },
-			{ 6, 3 },
-		};
-		for (size_t j = 0; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) {
-			const uint32_t n = tbl[j].n;
-			for (size_t i = 0; i < n; i++) G2::dbl(t[1], t[1]);
-			G2::add(t[1], t[1], t[tbl[j].idx]);
-		}
-		for (size_t i = 0; i < 5; i++) G2::dbl(t[1], t[1]);
-		G2::add(out, t[1], t[2]);
-	}
-	void mx_chain(G2& Q, const G2& P) const
-	{
-		G2 T;
-		G2::dbl(T, P);
-		const size_t tbl[] = { 2, 3, 9, 32, 16 };
-		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
-			G2::add(T, T, P);
-			for (size_t j = 0; j < tbl[i]; j++) {
-				G2::dbl(T, T);
-			}
-		}
-		Q = T;
-	}
-#endif
 	void clear_h2(G2& Q, const G2& P) const
 	{
-#if 1
 		// 1.9Mclk can be reduced
 		mcl::local::mulByCofactorBLS12fast(Q, P);
-#else
-		G2 T0, T1;
-		h2_chain(T0, P);
-		G2::dbl(T1, T0);
-		G2::add(T1, T0, T1);
-		mx_chain(T0, T1);
-		mx_chain(T0, T0);
-		G2::neg(T1, T1);
-		G2::add(Q, T0, T1);
-#endif
 	}
 	template<class T>
 	void put(const T& P) const

From 066d3accf5a135bcdfcc9f2ddf1b413226e01f34 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Jun 2020 11:56:33 +0900
Subject: [PATCH 238/553] add 11 to mulSmallUnit

---
 include/mcl/util.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/mcl/util.hpp b/include/mcl/util.hpp
index 0b5a7296..01324055 100644
--- a/include/mcl/util.hpp
+++ b/include/mcl/util.hpp
@@ -327,6 +327,7 @@ bool mulSmallUnit(T& z, const T& x, U y)
 	case 8: T::add(z, x, x); T::add(z, z, z); T::add(z, z, z); break;
 	case 9: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, t); T::add(z, t, x); break; }
 	case 10: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, x); T::add(z, t, t); break; }
+	case 11: { T t; T::add(t, x, x); T::add(t, t, x); T::add(t, t, t); T::add(t, t, t); T::sub(z, t, x); break; }
 	default:
 		return false;
 	}

From 6a71548dde2e42faf7e24b3fa7e57b1604bb4a1c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Jun 2020 17:19:23 +0900
Subject: [PATCH 239/553] sswuG1

---
 include/mcl/mapto_wb19.hpp | 65 ++++++++++++++++++++++++++++++++++++++
 test/mapto_wb19_test.cpp   | 39 +++++++++++++++++++++--
 2 files changed, 101 insertions(+), 3 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 970d37df..113cbbda 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -80,6 +80,8 @@ struct MapToG2_WB19 {
 	Fp2 xden[3];
 	Fp2 ynum[4];
 	Fp2 yden[4];
+	Fp g1A, g1B, g1c1, g1c2;
+	int g1Z;
 	int draftVersion_;
 	void setDraftVersion(int draftVersion)
 	{
@@ -132,6 +134,21 @@ struct MapToG2_WB19 {
 		etas[3].b = ev3;
 		init_iso();
 		draftVersion_ = 5;
+		{
+			const char *A = "0x144698a3b8e9433d693a02c96d4982b0ea985383ee66a8d8e8981aefd881ac98936f8da0e0f97f5cf428082d584c1d";
+			const char *B = "0x12e2908d11688030018b12e8753eee3b2016c1f0f24f4070a0b9c14fcef35ef55a23215a316ceaa5d1cc48e98e172be0";
+			const char *c1 = "0x680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaaa";
+			const char *c2 = "0x3d689d1e0e762cef9f2bec6130316806b4c80eda6fc10ce77ae83eab1ea8b8b8a407c9c6db195e06f2dbeabc2baeff5";
+			g1A.setStr(&b, A);
+			assert(b); (void)b;
+			g1B.setStr(&b, B);
+			assert(b); (void)b;
+			g1c1.setStr(&b, c1);
+			assert(b); (void)b;
+			g1c2.setStr(&b, c2);
+			assert(b); (void)b;
+			g1Z = 11;
+		}
 	}
 	void init_iso()
 	{
@@ -256,6 +273,54 @@ struct MapToG2_WB19 {
 		if (!x.b.isZero()) return false;
 		return false;
 	}
+	// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-07#appendix-D.3.5
+	void sswuG1(Fp& xn, Fp& xd, Fp& y, const Fp& u) const
+	{
+		const Fp& A = g1A;
+		const Fp& B = g1B;
+		const Fp& c1 = g1c1;
+		const Fp& c2 = g1c2;
+		const int Z = g1Z;
+		Fp u2, u2Z, t, t2, t3;
+
+		Fp::sqr(u2, u);
+		Fp::mulUnit(u2Z, u2, Z);
+		Fp::sqr(t, u2Z);
+		Fp::add(xd, t, u2Z);
+		if (xd.isZero()) {
+			Fp::mulUnit(xd, A, Z);
+			xn = B;
+		} else {
+			Fp::add(xn, xd, Fp::one());
+			xn *= B;
+			xd *= A;
+			Fp::neg(xd, xd);
+		}
+		Fp::sqr(t, xd);
+		Fp::mul(t2, t, xd);
+		t *= A;
+		Fp::sqr(t3, xn);
+		t3 += t;
+		t3 *= xn;
+		Fp::mul(t, t2, B);
+		t3 += t;
+		Fp::sqr(y, t2);
+		Fp::mul(t, t3, t2);
+		y *= t;
+		Fp::pow(y, y, c1);
+		y *= t;
+		Fp::sqr(t, y);
+		t *= t2;
+		if (t != t3) {
+			xn *= u2Z;
+			y *= c2;
+			y *= u2;
+			y *= u;
+		}
+		if (sgn0(u) != sgn0(y)) {
+			Fp::neg(y, y);
+		}
+	}
 	// https://github.com/algorand/bls_sigs_ref
 	void osswu2_help(Point& P, const Fp2& t) const
 	{
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 3dbf8f93..82d4a9cc 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -1065,9 +1065,42 @@ void testEth2phase0()
 }
 
 template<class T>
-void testHashToG1(const T& mapto)
+void testSswuG1(const T& mapto)
 {
-	(void)mapto;
+	const struct {
+		const char *u;
+		const char *xn;
+		const char *xd;
+		const char *y;
+	} tbl[] = {
+		{
+			"0",
+			"2906670324641927570491258158026293881577086121416628140204402091718288198173574630967936031029026176254968826637280",
+			"134093699507829814821517650980559345626771735832728306571853989028117161444712301203928819168120125800913069360447",
+			"883926319761702754759909536142450234040420493353017578303105057331414514426056372828799438842649753623273850162620",
+		},
+		{
+			"1",
+			"1899737305729263819017890260937734483867440857300594896394519620134021106669873067956151260450660652775675911846846",
+			"2393285161127709615559578013969192009035621989946268206469810267786625713154290249995541799111574154426937440234423",
+			"930707443353688021592152842018127582116075842630002779852379799673382026358889394936840703051493045692645732041175",
+		},
+		{
+			"2445954111132780748727614926881625117054159133000189976501123519233969822355358926084559381412726536178576396564099",
+			"1380948948858039589493865757655255282539355225819860723137103295095584615993188368169864518071716731687572756871254",
+			"3943815976847699234459109633672806041428347164453405394564656059649800794974863796342327007702642595444543195342842",
+			"2822129059347872230939996033946474192520362213555773694753196763199812747558444338256205967106315253391997542043187",
+		},
+	};
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		Fp u;
+		u.setStr(tbl[i].u);
+		Fp xn, xd, y;
+		mapto.sswuG1(xn, xd, y, u);
+		CYBOZU_TEST_EQUAL(xn.getStr(), tbl[i].xn);
+		CYBOZU_TEST_EQUAL(xd.getStr(), tbl[i].xd);
+		CYBOZU_TEST_EQUAL(y.getStr(), tbl[i].y);
+	}
 }
 
 CYBOZU_TEST_AUTO(test)
@@ -1092,5 +1125,5 @@ CYBOZU_TEST_AUTO(test)
 	testHashToFp2v6(mapto);
 	testHashToFp2v7(mapto);
 	testEth2phase0();
-	testHashToG1(mapto);
+	testSswuG1(mapto);
 }

From 2488fa3f6d57c3f0579b11d5047988690b9d372c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 28 Jun 2020 13:59:04 +0900
Subject: [PATCH 240/553] add init_iso11

---
 include/mcl/bn.hpp         |  18 +++---
 include/mcl/mapto_wb19.hpp | 126 +++++++++++++++++++++++++++++++++++--
 test/mapto_wb19_test.cpp   |   4 +-
 3 files changed, 133 insertions(+), 15 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index ee8df173..657c8fac 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -295,7 +295,7 @@ struct MapTo {
 	int type_;
 	int mapToMode_;
 	bool useOriginalG2cofactor_;
-	MapToG2_WB19<Fp, Fp2, G2> mapToG2_WB19_;
+	MapTo_WB19<Fp, G1, Fp2, G2> mapTo_WB19_;
 	MapTo()
 		: type_(0)
 		, mapToMode_(MCL_MAP_TO_MODE_ORIGINAL)
@@ -486,7 +486,7 @@ struct MapTo {
 		assert(b);
 		(void)b;
 		Fr::inv(g2cofactorAdj_, g2cofactorAdjInv_);
-		mapToG2_WB19_.init();
+		mapTo_WB19_.init();
 	}
 	/*
 		change mapTo function to mode
@@ -506,17 +506,17 @@ struct MapTo {
 			break;
 		case MCL_MAP_TO_MODE_HASH_TO_CURVE_05:
 			mapToMode_ = mode;
-			mapToG2_WB19_.setDraftVersion(5);
+			mapTo_WB19_.setDraftVersion(5);
 			return true;
 			break;
 		case MCL_MAP_TO_MODE_HASH_TO_CURVE_06:
 			mapToMode_ = mode;
-			mapToG2_WB19_.setDraftVersion(6);
+			mapTo_WB19_.setDraftVersion(6);
 			return true;
 			break;
 		case MCL_MAP_TO_MODE_HASH_TO_CURVE_07:
 			mapToMode_ = mode;
-			mapToG2_WB19_.setDraftVersion(7);
+			mapTo_WB19_.setDraftVersion(7);
 			return true;
 			break;
 		default:
@@ -591,7 +591,7 @@ struct MapTo {
 	bool calc(G2& P, const Fp2& t, bool fast = false) const
 	{
 		if (mapToMode_ == MCL_MAP_TO_MODE_WB19 || mapToMode_ >= MCL_MAP_TO_MODE_HASH_TO_CURVE_06) {
-			mapToG2_WB19_.opt_swu2_map(P, t);
+			mapTo_WB19_.opt_swu2_map(P, t);
 			return true;
 		}
 		if (!mapToEc(P, t)) return false;
@@ -2068,7 +2068,7 @@ inline void hashAndMapToG2(G2& P, const void *buf, size_t bufSize)
 {
 	int mode = getMapToMode();
 	if (mode == MCL_MAP_TO_MODE_WB19 || mode >= MCL_MAP_TO_MODE_HASH_TO_CURVE_06) {
-		BN::param.mapTo.mapToG2_WB19_.msgToG2(P, buf, bufSize);
+		BN::param.mapTo.mapTo_WB19_.msgToG2(P, buf, bufSize);
 		return;
 	}
 	Fp2 t;
@@ -2219,7 +2219,7 @@ inline bool ethMsgToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr,
 inline bool ethFp2ToG2(G2& out, const Fp2& t1, const Fp2 *t2 = 0)
 {
 	if (!BN::param.isBLS12) return false;
-	BN::param.mapTo.mapToG2_WB19_.opt_swu2_map(out, t1, t2);
+	BN::param.mapTo.mapTo_WB19_.opt_swu2_map(out, t1, t2);
 	return true;
 }
 
@@ -2227,7 +2227,7 @@ inline bool ethFp2ToG2(G2& out, const Fp2& t1, const Fp2 *t2 = 0)
 inline bool ethMsgToG2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
 {
 	if (!BN::param.isBLS12) return false;
-	BN::param.mapTo.mapToG2_WB19_.map2curve_osswu2(out, msg, msgSize, dst, dstSize);
+	BN::param.mapTo.mapTo_WB19_.map2curve_osswu2(out, msg, msgSize, dst, dstSize);
 	return true;
 }
 
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 113cbbda..c5da8746 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -68,8 +68,8 @@ template<class F> int PointT<F>::specialA_;
 
 } // mcl::local
 
-template<class Fp, class Fp2, class G2>
-struct MapToG2_WB19 {
+template<class Fp, class G1, class Fp2, class G2>
+struct MapTo_WB19 {
 	typedef local::PointT<Fp2> Point;
 	mpz_class sqrtConst; // (p^2 - 9) / 16
 	Fp2 Ep_a;
@@ -81,6 +81,10 @@ struct MapToG2_WB19 {
 	Fp2 ynum[4];
 	Fp2 yden[4];
 	Fp g1A, g1B, g1c1, g1c2;
+	Fp g1xnum[11];
+	Fp g1xden[11];
+	Fp g1ynum[16];
+	Fp g1yden[16];
 	int g1Z;
 	int draftVersion_;
 	void setDraftVersion(int draftVersion)
@@ -132,7 +136,7 @@ struct MapToG2_WB19 {
 		assert(b); (void)b;
 		Fp::neg(etas[3].a, ev4);
 		etas[3].b = ev3;
-		init_iso();
+		init_iso3();
 		draftVersion_ = 5;
 		{
 			const char *A = "0x144698a3b8e9433d693a02c96d4982b0ea985383ee66a8d8e8981aefd881ac98936f8da0e0f97f5cf428082d584c1d";
@@ -149,8 +153,18 @@ struct MapToG2_WB19 {
 			assert(b); (void)b;
 			g1Z = 11;
 		}
+		init_iso11();
 	}
-	void init_iso()
+	void initArray(Fp *dst, const char **s, size_t n) const
+	{
+		bool b;
+		for (size_t i = 0; i < n; i++) {
+			dst[i].setStr(&b, s[i]);
+			assert(b);
+			(void)b;
+		}
+	}
+	void init_iso3()
 	{
 		const char *tbl[] = {
 			"0x5c759507e8e333ebb5b7a9a47d7ed8532c52d39fd3a042a88b58423c50ae15d5c2638e343d9c71c6238aaaaaaaa97d6",
@@ -201,6 +215,76 @@ struct MapToG2_WB19 {
 		yden[3].a = 1;
 		yden[3].b.clear();
 	}
+	void init_iso11()
+	{
+		const char *xnumStr[] = {
+			"0x11a05f2b1e833340b809101dd99815856b303e88a2d7005ff2627b56cdb4e2c85610c2d5f2e62d6eaeac1662734649b7",
+			"0x17294ed3e943ab2f0588bab22147a81c7c17e75b2f6a8417f565e33c70d1e86b4838f2a6f318c356e834eef1b3cb83bb",
+			"0xd54005db97678ec1d1048c5d10a9a1bce032473295983e56878e501ec68e25c958c3e3d2a09729fe0179f9dac9edcb0",
+			"0x1778e7166fcc6db74e0609d307e55412d7f5e4656a8dbf25f1b33289f1b330835336e25ce3107193c5b388641d9b6861",
+			"0xe99726a3199f4436642b4b3e4118e5499db995a1257fb3f086eeb65982fac18985a286f301e77c451154ce9ac8895d9",
+			"0x1630c3250d7313ff01d1201bf7a74ab5db3cb17dd952799b9ed3ab9097e68f90a0870d2dcae73d19cd13c1c66f652983",
+			"0xd6ed6553fe44d296a3726c38ae652bfb11586264f0f8ce19008e218f9c86b2a8da25128c1052ecaddd7f225a139ed84",
+			"0x17b81e7701abdbe2e8743884d1117e53356de5ab275b4db1a682c62ef0f2753339b7c8f8c8f475af9ccb5618e3f0c88e",
+			"0x80d3cf1f9a78fc47b90b33563be990dc43b756ce79f5574a2c596c928c5d1de4fa295f296b74e956d71986a8497e317",
+			"0x169b1f8e1bcfa7c42e0c37515d138f22dd2ecb803a0c5c99676314baf4bb1b7fa3190b2edc0327797f241067be390c9e",
+			"0x10321da079ce07e272d8ec09d2565b0dfa7dccdde6787f96d50af36003b14866f69b771f8c285decca67df3f1605fb7b",
+			"0x6e08c248e260e70bd1e962381edee3d31d79d7e22c837bc23c0bf1bc24c6b68c24b1b80b64d391fa9c8ba2e8ba2d229",
+		};
+		const char *xdenStr[] = {
+			"0x8ca8d548cff19ae18b2e62f4bd3fa6f01d5ef4ba35b48ba9c9588617fc8ac62b558d681be343df8993cf9fa40d21b1c",
+			"0x12561a5deb559c4348b4711298e536367041e8ca0cf0800c0126c2588c48bf5713daa8846cb026e9e5c8276ec82b3bff",
+			"0xb2962fe57a3225e8137e629bff2991f6f89416f5a718cd1fca64e00b11aceacd6a3d0967c94fedcfcc239ba5cb83e19",
+			"0x3425581a58ae2fec83aafef7c40eb545b08243f16b1655154cca8abc28d6fd04976d5243eecf5c4130de8938dc62cd8",
+			"0x13a8e162022914a80a6f1d5f43e7a07dffdfc759a12062bb8d6b44e833b306da9bd29ba81f35781d539d395b3532a21e",
+			"0xe7355f8e4e667b955390f7f0506c6e9395735e9ce9cad4d0a43bcef24b8982f7400d24bc4228f11c02df9a29f6304a5",
+			"0x772caacf16936190f3e0c63e0596721570f5799af53a1894e2e073062aede9cea73b3538f0de06cec2574496ee84a3a",
+			"0x14a7ac2a9d64a8b230b3f5b074cf01996e7f63c21bca68a81996e1cdf9822c580fa5b9489d11e2d311f7d99bbdcc5a5e",
+			"0xa10ecf6ada54f825e920b3dafc7a3cce07f8d1d7161366b74100da67f39883503826692abba43704776ec3a79a1d641",
+			"0x95fc13ab9e92ad4476d6e3eb3a56680f682b4ee96f7d03776df533978f31c1593174e4b4b7865002d6384d168ecdd0a",
+			"0x1",
+		};
+		const char *ynumStr[] = {
+			"0x90d97c81ba24ee0259d1f094980dcfa11ad138e48a869522b52af6c956543d3cd0c7aee9b3ba3c2be9845719707bb33",
+			"0x134996a104ee5811d51036d776fb46831223e96c254f383d0f906343eb67ad34d6c56711962fa8bfe097e75a2e41c696",
+			"0xcc786baa966e66f4a384c86a3b49942552e2d658a31ce2c344be4b91400da7d26d521628b00523b8dfe240c72de1f6",
+			"0x1f86376e8981c217898751ad8746757d42aa7b90eeb791c09e4a3ec03251cf9de405aba9ec61deca6355c77b0e5f4cb",
+			"0x8cc03fdefe0ff135caf4fe2a21529c4195536fbe3ce50b879833fd221351adc2ee7f8dc099040a841b6daecf2e8fedb",
+			"0x16603fca40634b6a2211e11db8f0a6a074a7d0d4afadb7bd76505c3d3ad5544e203f6326c95a807299b23ab13633a5f0",
+			"0x4ab0b9bcfac1bbcb2c977d027796b3ce75bb8ca2be184cb5231413c4d634f3747a87ac2460f415ec961f8855fe9d6f2",
+			"0x987c8d5333ab86fde9926bd2ca6c674170a05bfe3bdd81ffd038da6c26c842642f64550fedfe935a15e4ca31870fb29",
+			"0x9fc4018bd96684be88c9e221e4da1bb8f3abd16679dc26c1e8b6e6a1f20cabe69d65201c78607a360370e577bdba587",
+			"0xe1bba7a1186bdb5223abde7ada14a23c42a0ca7915af6fe06985e7ed1e4d43b9b3f7055dd4eba6f2bafaaebca731c30",
+			"0x19713e47937cd1be0dfd0b8f1d43fb93cd2fcbcb6caf493fd1183e416389e61031bf3a5cce3fbafce813711ad011c132",
+			"0x18b46a908f36f6deb918c143fed2edcc523559b8aaf0c2462e6bfe7f911f643249d9cdf41b44d606ce07c8a4d0074d8e",
+			"0xb182cac101b9399d155096004f53f447aa7b12a3426b08ec02710e807b4633f06c851c1919211f20d4c04f00b971ef8",
+			"0x245a394ad1eca9b72fc00ae7be315dc757b3b080d4c158013e6632d3c40659cc6cf90ad1c232a6442d9d3f5db980133",
+			"0x5c129645e44cf1102a159f748c4a3fc5e673d81d7e86568d9ab0f5d396a7ce46ba1049b6579afb7866b1e715475224b",
+			"0x15e6be4e990f03ce4ea50b3b42df2eb5cb181d8f84965a3957add4fa95af01b2b665027efec01c7704b456be69c8b604",
+		};
+		const char *ydenStr[] = {
+			"0x16112c4c3a9c98b252181140fad0eae9601a6de578980be6eec3232b5be72e7a07f3688ef60c206d01479253b03663c1",
+			"0x1962d75c2381201e1a0cbd6c43c348b885c84ff731c4d59ca4a10356f453e01f78a4260763529e3532f6102c2e49a03d",
+			"0x58df3306640da276faaae7d6e8eb15778c4855551ae7f310c35a5dd279cd2eca6757cd636f96f891e2538b53dbf67f2",
+			"0x16b7d288798e5395f20d23bf89edb4d1d115c5dbddbcd30e123da489e726af41727364f2c28297ada8d26d98445f5416",
+			"0xbe0e079545f43e4b00cc912f8228ddcc6d19c9f0f69bbb0542eda0fc9dec916a20b15dc0fd2ededda39142311a5001d",
+			"0x8d9e5297186db2d9fb266eaac783182b70152c65550d881c5ecd87b6f0f5a6449f38db9dfa9cce202c6477faaf9b7ac",
+			"0x166007c08a99db2fc3ba8734ace9824b5eecfdfa8d0cf8ef5dd365bc400a0051d5fa9c01a58b1fb93d1a1399126a775c",
+			"0x16a3ef08be3ea7ea03bcddfabba6ff6ee5a4375efa1f4fd7feb34fd206357132b920f5b00801dee460ee415a15812ed9",
+			"0x1866c8ed336c61231a1be54fd1d74cc4f9fb0ce4c6af5920abc5750c4bf39b4852cfe2f7bb9248836b233d9d55535d4a",
+			"0x167a55cda70a6e1cea820597d94a84903216f763e13d87bb5308592e7ea7d4fbc7385ea3d529b35e346ef48bb8913f55",
+			"0x4d2f259eea405bd48f010a01ad2911d9c6dd039bb61a6290e591b36e636a5c871a5c29f4f83060400f8b49cba8f6aa8",
+			"0xaccbb67481d033ff5852c1e48c50c477f94ff8aefce42d28c0f9a88cea7913516f968986f7ebbea9684b529e2561092",
+			"0xad6b9514c767fe3c3613144b45f1496543346d98adf02267d5ceef9a00d9b8693000763e3b90ac11e99b138573345cc",
+			"0x2660400eb2e4f3b628bdd0d53cd76f2bf565b94e72927c1cb748df27942480e420517bd8714cc80d1fadc1326ed06f7",
+			"0xe0fa1d816ddc03e6b24255e0d7819c171c40f65e273b853324efcd6356caa205ca2f570f13497804415473a1d634b8f",
+			"0x1",
+		};
+		initArray(g1xnum, xnumStr, CYBOZU_NUM_OF_ARRAY(xnumStr));
+		initArray(g1xden, xdenStr, CYBOZU_NUM_OF_ARRAY(xdenStr));
+		initArray(g1ynum, ynumStr, CYBOZU_NUM_OF_ARRAY(ynumStr));
+		initArray(g1yden, ydenStr, CYBOZU_NUM_OF_ARRAY(ydenStr));
+	}
 	template<size_t N>
 	void evalPoly(Fp2& y, const Fp2& x, const Fp2 *zpows, const Fp2 (&cof)[N]) const
 	{
@@ -236,6 +320,30 @@ struct MapToG2_WB19 {
 		Fp2::mul(Q.y, mapvals[2], mapvals[1]);
 		Q.y *= t;
 	}
+	// refer (g1xnum, g1xden, g1ynum, g1yden)
+	void iso11(G1& Q, const Point& P) const
+	{
+		Fp2 zpows[3];
+		Fp2::sqr(zpows[0], P.z);
+		Fp2::sqr(zpows[1], zpows[0]);
+		Fp2::mul(zpows[2], zpows[1], zpows[0]);
+		Fp2 mapvals[4];
+		evalPoly(mapvals[0], P.x, zpows, xnum);
+		evalPoly(mapvals[1], P.x, zpows, xden);
+		evalPoly(mapvals[2], P.x, zpows, ynum);
+		evalPoly(mapvals[3], P.x, zpows, yden);
+		mapvals[1] *= zpows[0];
+		mapvals[2] *= P.y;
+		mapvals[3] *= zpows[0];
+		mapvals[3] *= P.z;
+		Fp2::mul(Q.z, mapvals[1], mapvals[3]);
+		Fp2::mul(Q.x, mapvals[0], mapvals[3]);
+		Q.x *= Q.z;
+		Fp2 t;
+		Fp2::sqr(t, Q.z);
+		Fp2::mul(Q.y, mapvals[2], mapvals[1]);
+		Q.y *= t;
+	}
 	/*
 		xi = -2-i
 		(a+bi)*(-2-i) = (b-2a)-(a+2b)i
@@ -321,6 +429,16 @@ struct MapToG2_WB19 {
 			Fp::neg(y, y);
 		}
 	}
+	void sswuG1(Fp pt[3], const Fp& u) const
+	{
+		Fp xn, y;
+		Fp& xd = pt[2];
+		sswuG1(xn, xd, y, u);
+		Fp::mul(pt[0], xn, xd);
+		Fp::sqr(pt[1], xd);
+		pt[1] *= xd;
+		pt[1] *= y;
+	}
 	// https://github.com/algorand/bls_sigs_ref
 	void osswu2_help(Point& P, const Fp2& t) const
 	{
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 82d4a9cc..8ea5f58f 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -11,7 +11,7 @@
 using namespace mcl;
 using namespace mcl::bn;
 
-typedef mcl::MapToG2_WB19<Fp, Fp2, G2> MapTo;
+typedef mcl::MapTo_WB19<Fp, G1, Fp2, G2> MapTo;
 typedef MapTo::Point Point;
 
 void dump(const void *msg, size_t msgSize)
@@ -1108,7 +1108,7 @@ CYBOZU_TEST_AUTO(test)
 	initPairing(mcl::BLS12_381);
 	Fp::setETHserialization(true);
 	bn::setMapToMode(MCL_MAP_TO_MODE_WB19);
-	const MapTo& mapto = BN::param.mapTo.mapToG2_WB19_;
+	const MapTo& mapto = BN::param.mapTo.mapTo_WB19_;
 	py_eccTest(mapto);
 	py_eccTest2(mapto);
 	osswu2_helpTest(mapto);

From 73337422293c5af62e4068bc0d8e712e29cb58fa Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 29 Jun 2020 11:41:45 +0900
Subject: [PATCH 241/553] rename vars in mapto

---
 include/mcl/mapto_wb19.hpp | 46 +++++++++++++++++++++-----------------
 test/mapto_wb19_test.cpp   | 32 +++++++++++++-------------
 2 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index c5da8746..53430b24 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -70,10 +70,11 @@ template<class F> int PointT<F>::specialA_;
 
 template<class Fp, class G1, class Fp2, class G2>
 struct MapTo_WB19 {
-	typedef local::PointT<Fp2> Point;
+	typedef local::PointT<Fp> E1;
+	typedef local::PointT<Fp2> E2;
 	mpz_class sqrtConst; // (p^2 - 9) / 16
-	Fp2 Ep_a;
-	Fp2 Ep_b;
+	Fp2 g2A;
+	Fp2 g2B;
 	Fp2 root4[4];
 	Fp2 etas[4];
 	Fp2 xnum[4];
@@ -94,14 +95,17 @@ struct MapTo_WB19 {
 	void init()
 	{
 		bool b;
-		Ep_a.a = 0;
-		Ep_a.b = 240;
-		Ep_b.a = 1012;
-		Ep_b.b = 1012;
-		Point::a_.clear();
-		Point::b_.a = 4;
-		Point::b_.b = 4;
-		Point::specialA_ = ec::Zero;
+		g2A.a = 0;
+		g2A.b = 240;
+		g2B.a = 1012;
+		g2B.b = 1012;
+		E1::a_.clear();
+		E1::b_ = 4;
+		E1::specialA_ = ec::Zero;
+		E2::a_.clear();
+		E2::b_.a = 4;
+		E2::b_.b = 4;
+		E2::specialA_ = ec::Zero;
 		sqrtConst = Fp::getOp().mp;
 		sqrtConst *= sqrtConst;
 		sqrtConst -= 9;
@@ -297,7 +301,7 @@ struct MapTo_WB19 {
 		}
 	}
 	// refer (xnum, xden, ynum, yden)
-	void iso3(G2& Q, const Point& P) const
+	void iso3(G2& Q, const E2& P) const
 	{
 		Fp2 zpows[3];
 		Fp2::sqr(zpows[0], P.z);
@@ -321,7 +325,7 @@ struct MapTo_WB19 {
 		Q.y *= t;
 	}
 	// refer (g1xnum, g1xden, g1ynum, g1yden)
-	void iso11(G1& Q, const Point& P) const
+	void iso11(G1& Q, const E2& P) const
 	{
 		Fp2 zpows[3];
 		Fp2::sqr(zpows[0], P.z);
@@ -440,7 +444,7 @@ struct MapTo_WB19 {
 		pt[1] *= y;
 	}
 	// https://github.com/algorand/bls_sigs_ref
-	void osswu2_help(Point& P, const Fp2& t) const
+	void osswu2_help(E2& P, const Fp2& t) const
 	{
 		Fp2 t2, t2xi;
 		Fp2::sqr(t2, t);
@@ -452,20 +456,20 @@ struct MapTo_WB19 {
 		den += den2;
 		Fp2 x0_num, x0_den;
 		Fp2::add(x0_num, den, 1);
-		x0_num *= Ep_b;
+		x0_num *= g2B;
 		if (den.isZero()) {
-			mul_xi(x0_den, Ep_a);
+			mul_xi(x0_den, g2A);
 		} else {
-			Fp2::mul(x0_den, -Ep_a, den);
+			Fp2::mul(x0_den, -g2A, den);
 		}
 		Fp2 x0_den2, x0_den3, gx0_den, gx0_num;
 		Fp2::sqr(x0_den2, x0_den);
 		Fp2::mul(x0_den3, x0_den2, x0_den);
 		gx0_den = x0_den3;
 
-		Fp2::mul(gx0_num, Ep_b, gx0_den);
+		Fp2::mul(gx0_num, g2B, gx0_den);
 		Fp2 tmp, tmp1, tmp2;
-		Fp2::mul(tmp, Ep_a, x0_num);
+		Fp2::mul(tmp, g2A, x0_num);
 		tmp *= x0_den2;
 		gx0_num += tmp;
 		Fp2::sqr(tmp, x0_num);
@@ -538,10 +542,10 @@ struct MapTo_WB19 {
 	}
 	void opt_swu2_map(G2& P, const Fp2& t, const Fp2 *t2 = 0) const
 	{
-		Point Pp;
+		E2 Pp;
 		osswu2_help(Pp, t);
 		if (t2) {
-			Point P2;
+			E2 P2;
 			osswu2_help(P2, *t2);
 			ec::addJacobi(Pp, Pp, P2);
 		}
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 8ea5f58f..9652c691 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -12,7 +12,7 @@ using namespace mcl;
 using namespace mcl::bn;
 
 typedef mcl::MapTo_WB19<Fp, G1, Fp2, G2> MapTo;
-typedef MapTo::Point Point;
+typedef MapTo::E2 E2;
 
 void dump(const void *msg, size_t msgSize)
 {
@@ -72,8 +72,8 @@ void set(Fp2& x, const Fp2Str& s)
 	x.b.setStr(s.b);
 }
 
-template<class Point>
-void set(Point& P, const PointStr& s)
+template<class E2>
+void set(E2& P, const PointStr& s)
 {
 	set(P.x, s.x);
 	set(P.y, s.y);
@@ -128,7 +128,7 @@ bool sqr_div(const MapTo& mapto, Fp2& z, const Fp2& u, const Fp2& v)
 }
 
 // Proj
-void py_ecc_iso_map_G2(const MapTo& mapto, G2& Q, const Point& P)
+void py_ecc_iso_map_G2(const MapTo& mapto, G2& Q, const E2& P)
 {
 	Fp2 zpows[3];
 	zpows[0] = P.z;
@@ -148,7 +148,7 @@ void py_ecc_iso_map_G2(const MapTo& mapto, G2& Q, const Point& P)
 }
 
 // https://github.com/ethereum/py_ecc
-void py_ecc_optimized_swu_G2(const MapTo& mapto, Point& P, const Fp2& t)
+void py_ecc_optimized_swu_G2(const MapTo& mapto, E2& P, const Fp2& t)
 {
 	Fp2 t2, t2xi, t2xi2;
 	Fp2::sqr(t2, t);
@@ -158,11 +158,11 @@ void py_ecc_optimized_swu_G2(const MapTo& mapto, Point& P, const Fp2& t)
 	// (t^2 * xi)^2 + (t^2 * xi)
 	Fp2::add(deno, t2xi2, t2xi);
 	Fp2::add(nume, deno, 1);
-	nume *= mapto.Ep_b;
+	nume *= mapto.g2B;
 	if (deno.isZero()) {
-		mapto.mul_xi(deno, mapto.Ep_a);
+		mapto.mul_xi(deno, mapto.g2A);
 	} else {
-		deno *= -mapto.Ep_a;
+		deno *= -mapto.g2A;
 	}
 	Fp2 u, v;
 	{
@@ -170,8 +170,8 @@ void py_ecc_optimized_swu_G2(const MapTo& mapto, Point& P, const Fp2& t)
 		Fp2::sqr(deno2, deno);
 		Fp2::mul(v, deno2, deno);
 
-		Fp2::mul(u, mapto.Ep_b, v);
-		Fp2::mul(tmp, mapto.Ep_a, nume);
+		Fp2::mul(u, mapto.g2B, v);
+		Fp2::mul(tmp, mapto.g2A, nume);
 		tmp *= deno2;
 		u += tmp;
 		Fp2::sqr(tmp, nume);
@@ -210,7 +210,7 @@ void py_ecc_optimized_swu_G2(const MapTo& mapto, Point& P, const Fp2& t)
 // Proj
 void py_ecc_map_to_curve_G2(const MapTo& mapto, G2& out, const Fp2& t)
 {
-	Point P;
+	E2 P;
 	py_ecc_optimized_swu_G2(mapto, P, t);
 	py_ecc_iso_map_G2(mapto, out, P);
 }
@@ -521,7 +521,7 @@ void osswu2_helpTest(const T& mapto)
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		Fp2 t, x, y, z;
-		Point P;
+		E2 P;
 		set(t, tbl[i].t);
 		set(x, tbl[i].x);
 		set(y, tbl[i].y);
@@ -587,11 +587,11 @@ void addTest()
 		},
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
-		Point P, Q, R;
+		E2 P, Q, R;
 		set(P, tbl[i].P);
 		set(Q, tbl[i].Q);
 		set(R, tbl[i].R);
-		Point E;
+		E2 E;
 		ec::addJacobi(E, P, Q);
 		CYBOZU_TEST_ASSERT(R.isEqual(E));
 	}
@@ -642,7 +642,7 @@ void iso3Test(const T& mapto)
 			"0xb7b36b9b1bbcf801d21ca5164aa9a0e71df2b4710c67dc0cd275b786800935fc29defbdf9c7e23dc84e26af13ba761d",
 		}
 	};
-	typename T::Point P;
+	typename T::E2 P;
 	G2 Q1, Q2;
 	set(P, Ps);
 	set(Q1, Qs);
@@ -748,7 +748,7 @@ void py_eccTest2(const T& mapto)
 	};
 	Fp2 t;
 	set(t, ts);
-	Point p, q;
+	E2 p, q;
 	py_ecc_optimized_swu_G2(mapto, p, t);
 	set(q, out1s);
 	CYBOZU_TEST_EQUAL(p.x, q.x);

From 732c6b09934901f0b63fda447e458e82a416b82d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 29 Jun 2020 15:09:58 +0900
Subject: [PATCH 242/553] add sswuG1 for E1

---
 include/mcl/mapto_wb19.hpp | 33 +++++++++++++++++++++++++++------
 test/mapto_wb19_test.cpp   | 30 +++++++++++++++++++++++++++---
 2 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 53430b24..bb97655b 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -433,15 +433,15 @@ struct MapTo_WB19 {
 			Fp::neg(y, y);
 		}
 	}
-	void sswuG1(Fp pt[3], const Fp& u) const
+	void sswuG1(E1& pt, const Fp& u) const
 	{
 		Fp xn, y;
-		Fp& xd = pt[2];
+		Fp& xd = pt.z;
 		sswuG1(xn, xd, y, u);
-		Fp::mul(pt[0], xn, xd);
-		Fp::sqr(pt[1], xd);
-		pt[1] *= xd;
-		pt[1] *= y;
+		Fp::mul(pt.x, xn, xd);
+		Fp::sqr(pt.y, xd);
+		pt.y *= xd;
+		pt.y *= y;
 	}
 	// https://github.com/algorand/bls_sigs_ref
 	void osswu2_help(E2& P, const Fp2& t) const
@@ -589,6 +589,27 @@ struct MapTo_WB19 {
 		}
 		map2curve_osswu2(out, msg, msgSize, dst, strlen(dst));
 	}
+#if 0
+	void msgToG1(G1& out, const void *msg, size_t msgSize) const
+	{
+		assert(draftVersion_ == 7);
+		const char *dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
+		const size_t dstSize = strlen(dst);
+		uint8_t md[128];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		Fp u[2];
+		for (size_t i = 0; i < 2; i++) {
+			bool b;
+			u[i].setBigEndianMod(&b, &md[64 * i], 64);
+			assert(b); (void)b;
+		}
+		E1 P1, P2;
+		sswuG1(P1, u[0]);
+		sswuG1(P2, u[1]);
+		ec::addJacobi(P1, P1, P2); // ok
+		// ec::normalizeJacobi(P1);
+	}
+#endif
 };
 
 } // mcl
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 9652c691..fee202e1 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -812,7 +812,6 @@ void testHashToFp2v6(const T& mapto)
 			}
 		},
 	};
-	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		const char *msg = tbl[i].msg;
 		const char *dst = tbl[i].dst;
@@ -856,7 +855,6 @@ void testHashToFp2v6(const T& mapto)
 template<class T>
 void testHashToFp2v7(const T& mapto)
 {
-	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_07);
 	{
 		const char *msg = "asdf";
 		PointStr s = {
@@ -1048,7 +1046,6 @@ void testEth2phase0()
 			"882730e5d03f6b42c3abc26d3372625034e1d871b65a8a6b900a56dae22da98abbe1b68f85e49fe7652a55ec3d0591c20767677e33e5cbb1207315c41a9ac03be39c2e7668edc043d6cb1d9fd93033caa8a1c5b0e84bedaeb6c64972503a43eb",
 		},
 	};
-	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_07);
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		const Uint8Vec msg = fromHexStr(tbl[i].msg);
 		const Uint8Vec out = fromHexStr(tbl[i].out);
@@ -1103,6 +1100,30 @@ void testSswuG1(const T& mapto)
 	}
 }
 
+template<class T>
+void testMsgToG1(const T& mapto)
+{
+	const struct {
+		const char *msg;
+		const char *x;
+		const char *y;
+		const char *z;
+	} tbl[] = {
+		{
+			"asdf",
+			"0",
+			"0",
+			"0",
+		},
+	};
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		const char *msg = tbl[i].msg;
+		const size_t msgSize = strlen(msg);
+		G1 P;
+		mapto.msgToG1(P, msg, msgSize);
+	}
+}
+
 CYBOZU_TEST_AUTO(test)
 {
 	initPairing(mcl::BLS12_381);
@@ -1122,8 +1143,11 @@ CYBOZU_TEST_AUTO(test)
 	testVec("../misc/mapto/fips_186_3_B233.txt");
 	testVec("../misc/mapto/misc.txt");
 	ethMsgToG2testAll("../bls_sigs_ref/test-vectors/hash_g2/");
+	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);
 	testHashToFp2v6(mapto);
+	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_07);
 	testHashToFp2v7(mapto);
 	testEth2phase0();
 	testSswuG1(mapto);
+//	testMsgToG1(mapto);
 }

From 65bb0890cae8250b2145db36a0d0d59686d739c5 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 30 Jun 2020 10:31:27 +0900
Subject: [PATCH 243/553] add iso11

---
 include/mcl/mapto_wb19.hpp | 67 +++++++++++++++++++++++++-------------
 test/mapto_wb19_test.cpp   | 14 +++++---
 2 files changed, 53 insertions(+), 28 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index bb97655b..42fd6197 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -82,7 +82,7 @@ struct MapTo_WB19 {
 	Fp2 ynum[4];
 	Fp2 yden[4];
 	Fp g1A, g1B, g1c1, g1c2;
-	Fp g1xnum[11];
+	Fp g1xnum[12];
 	Fp g1xden[11];
 	Fp g1ynum[16];
 	Fp g1yden[16];
@@ -289,14 +289,14 @@ struct MapTo_WB19 {
 		initArray(g1ynum, ynumStr, CYBOZU_NUM_OF_ARRAY(ynumStr));
 		initArray(g1yden, ydenStr, CYBOZU_NUM_OF_ARRAY(ydenStr));
 	}
-	template<size_t N>
-	void evalPoly(Fp2& y, const Fp2& x, const Fp2 *zpows, const Fp2 (&cof)[N]) const
+	template<class F, size_t N>
+	void evalPoly(F& y, const F& x, const F *zpows, const F (&cof)[N]) const
 	{
 		y = cof[N - 1]; // always zpows[0] = 1
 		for (size_t i = 1; i < N; i++) {
 			y *= x;
-			Fp2 t;
-			Fp2::mul(t, zpows[i - 1], cof[N - 1 - i]);
+			F t;
+			F::mul(t, zpows[i - 1], cof[N - 1 - i]);
 			y += t;
 		}
 	}
@@ -324,29 +324,52 @@ struct MapTo_WB19 {
 		Fp2::mul(Q.y, mapvals[2], mapvals[1]);
 		Q.y *= t;
 	}
+	template<class X, class C, size_t N>
+	X evalPoly2(const X& x, const C (&c)[N]) const
+	{
+		X ret = c[N - 1];
+		for (size_t i = 1; i < N; i++) {
+			ret *= x;
+			ret += c[N - 1 - i];
+		}
+		return ret;
+	}
 	// refer (g1xnum, g1xden, g1ynum, g1yden)
-	void iso11(G1& Q, const E2& P) const
+	void iso11(G1& Q, E1& P) const
 	{
-		Fp2 zpows[3];
-		Fp2::sqr(zpows[0], P.z);
-		Fp2::sqr(zpows[1], zpows[0]);
-		Fp2::mul(zpows[2], zpows[1], zpows[0]);
-		Fp2 mapvals[4];
-		evalPoly(mapvals[0], P.x, zpows, xnum);
-		evalPoly(mapvals[1], P.x, zpows, xden);
-		evalPoly(mapvals[2], P.x, zpows, ynum);
-		evalPoly(mapvals[3], P.x, zpows, yden);
+#if 1
+		ec::normalizeJacobi(P);
+		Fp xn, xd, yn, yd;
+		xn = evalPoly2(P.x, g1xnum);
+		xd = evalPoly2(P.x, g1xden);
+		yn = evalPoly2(P.x, g1ynum);
+		yd = evalPoly2(P.x, g1yden);
+		Fp::div(Q.x, xn, xd);
+		Fp::div(Q.y, yn, yd);
+		Q.y *= P.y;
+		Q.z = 1;
+#else
+		Fp zpows[3];
+		Fp::sqr(zpows[0], P.z);
+		Fp::sqr(zpows[1], zpows[0]);
+		Fp::mul(zpows[2], zpows[1], zpows[0]);
+		Fp mapvals[4];
+		evalPoly(mapvals[0], P.x, zpows, g1xnum);
+		evalPoly(mapvals[1], P.x, zpows, g1xden);
+		evalPoly(mapvals[2], P.x, zpows, g1ynum);
+		evalPoly(mapvals[3], P.x, zpows, g1yden);
 		mapvals[1] *= zpows[0];
 		mapvals[2] *= P.y;
 		mapvals[3] *= zpows[0];
 		mapvals[3] *= P.z;
-		Fp2::mul(Q.z, mapvals[1], mapvals[3]);
-		Fp2::mul(Q.x, mapvals[0], mapvals[3]);
+		Fp::mul(Q.z, mapvals[1], mapvals[3]);
+		Fp::mul(Q.x, mapvals[0], mapvals[3]);
 		Q.x *= Q.z;
-		Fp2 t;
-		Fp2::sqr(t, Q.z);
-		Fp2::mul(Q.y, mapvals[2], mapvals[1]);
+		Fp t;
+		Fp::sqr(t, Q.z);
+		Fp::mul(Q.y, mapvals[2], mapvals[1]);
 		Q.y *= t;
+#endif
 	}
 	/*
 		xi = -2-i
@@ -589,7 +612,6 @@ struct MapTo_WB19 {
 		}
 		map2curve_osswu2(out, msg, msgSize, dst, strlen(dst));
 	}
-#if 0
 	void msgToG1(G1& out, const void *msg, size_t msgSize) const
 	{
 		assert(draftVersion_ == 7);
@@ -607,9 +629,8 @@ struct MapTo_WB19 {
 		sswuG1(P1, u[0]);
 		sswuG1(P2, u[1]);
 		ec::addJacobi(P1, P1, P2); // ok
-		// ec::normalizeJacobi(P1);
+		iso11(out, P1);
 	}
-#endif
 };
 
 } // mcl
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index fee202e1..1938cb9e 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -1111,16 +1111,20 @@ void testMsgToG1(const T& mapto)
 	} tbl[] = {
 		{
 			"asdf",
-			"0",
-			"0",
-			"0",
+			"14f99d14fa81bad3cc6232c0dee394235fb61287be4a262085604684a20790fbc7954ae6b2d545f05f967c9f624a116a",
+			"acfaebe113b047b38d8eb3a37bbdf77ed0d392289f642e6e7b1611305ae537fa0a574a8235042672b49f44f54d00646",
+			"1",
 		},
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		const char *msg = tbl[i].msg;
 		const size_t msgSize = strlen(msg);
-		G1 P;
+		G1 P, Q;
 		mapto.msgToG1(P, msg, msgSize);
+		Q.x.setStr(tbl[i].x, 16);
+		Q.y.setStr(tbl[i].y, 16);
+		Q.z.setStr(tbl[i].z, 16);
+		CYBOZU_TEST_EQUAL(P, Q);
 	}
 }
 
@@ -1149,5 +1153,5 @@ CYBOZU_TEST_AUTO(test)
 	testHashToFp2v7(mapto);
 	testEth2phase0();
 	testSswuG1(mapto);
-//	testMsgToG1(mapto);
+	testMsgToG1(mapto);
 }

From 61e29716112677a5e560f60d596040f6f7c6d1d7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 30 Jun 2020 12:17:44 +0900
Subject: [PATCH 244/553] add g1cofactor

---
 include/mcl/mapto_wb19.hpp | 17 +++++++++++++----
 test/mapto_wb19_test.cpp   | 22 ++++++++++++++++------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 42fd6197..a35d79c3 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -86,6 +86,7 @@ struct MapTo_WB19 {
 	Fp g1xden[11];
 	Fp g1ynum[16];
 	Fp g1yden[16];
+	mpz_class g1cofactor;
 	int g1Z;
 	int draftVersion_;
 	void setDraftVersion(int draftVersion)
@@ -156,6 +157,8 @@ struct MapTo_WB19 {
 			g1c2.setStr(&b, c2);
 			assert(b); (void)b;
 			g1Z = 11;
+			gmp::setStr(&b, g1cofactor, "d201000000010001", 16);
+			assert(b); (void)b;
 		}
 		init_iso11();
 	}
@@ -612,11 +615,9 @@ struct MapTo_WB19 {
 		}
 		map2curve_osswu2(out, msg, msgSize, dst, strlen(dst));
 	}
-	void msgToG1(G1& out, const void *msg, size_t msgSize) const
+	void msgToG1(G1& out, const void *msg, size_t msgSize, const char *dst, size_t dstSize) const
 	{
 		assert(draftVersion_ == 7);
-		const char *dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
-		const size_t dstSize = strlen(dst);
 		uint8_t md[128];
 		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
 		Fp u[2];
@@ -628,8 +629,16 @@ struct MapTo_WB19 {
 		E1 P1, P2;
 		sswuG1(P1, u[0]);
 		sswuG1(P2, u[1]);
-		ec::addJacobi(P1, P1, P2); // ok
+		ec::addJacobi(P1, P1, P2);
 		iso11(out, P1);
+		G1::mulGeneric(out, out, g1cofactor);
+	}
+	void msgToG1(G1& out, const void *msg, size_t msgSize) const
+	{
+		assert(draftVersion_ == 7);
+		const char *dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
+		const size_t dstSize = strlen(dst);
+		msgToG1(out, msg, msgSize, dst, dstSize);
 	}
 };
 
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 1938cb9e..cc278e1c 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -1105,25 +1105,35 @@ void testMsgToG1(const T& mapto)
 {
 	const struct {
 		const char *msg;
+		const char *dst;
 		const char *x;
 		const char *y;
-		const char *z;
 	} tbl[] = {
 		{
 			"asdf",
-			"14f99d14fa81bad3cc6232c0dee394235fb61287be4a262085604684a20790fbc7954ae6b2d545f05f967c9f624a116a",
-			"acfaebe113b047b38d8eb3a37bbdf77ed0d392289f642e6e7b1611305ae537fa0a574a8235042672b49f44f54d00646",
-			"1",
+			"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_",
+			"bc73d15443009a8ff2ddce864136d892274dd8365c60d0d2d44cc543387348e366a8f1e1401427e37743c29ed2c939a",
+			"101e26428a1b78c05458cb1cc37d2d87876ad3437096d2827f376702d4451667fe1fa82e82795495d33d466133ed1862",
 		},
+		// https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.txt
+		// H.9.1.  BLS12381G1_XMD:SHA-256_SSWU_RO_
+		{
+			"",
+			"QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_",
+			"052926add2207b76ca4fa57a8734416c8dc95e24501772c814278700eed6d1e4e8cf62d9c09db0fac349612b759e79a1",
+			"08ba738453bfed09cb546dbb0783dbb3a5f1f566ed67bb6be0e8c67e2e81a4cc68ee29813bb7994998f3eae0c9c6a265",
+		}
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		const char *msg = tbl[i].msg;
 		const size_t msgSize = strlen(msg);
+		const char *dst = tbl[i].dst;
+		const size_t dstSize = strlen(dst);
 		G1 P, Q;
-		mapto.msgToG1(P, msg, msgSize);
+		mapto.msgToG1(P, msg, msgSize, dst, dstSize);
 		Q.x.setStr(tbl[i].x, 16);
 		Q.y.setStr(tbl[i].y, 16);
-		Q.z.setStr(tbl[i].z, 16);
+		Q.z = 1;
 		CYBOZU_TEST_EQUAL(P, Q);
 	}
 }

From 718de1493c98c850e1d9066ce5f1fdc6b736d9f2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 30 Jun 2020 12:21:59 +0900
Subject: [PATCH 245/553] add hash-to-curve g1 test

---
 test/mapto_wb19_test.cpp | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index cc278e1c..59754a17 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -1122,7 +1122,31 @@ void testMsgToG1(const T& mapto)
 			"QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_",
 			"052926add2207b76ca4fa57a8734416c8dc95e24501772c814278700eed6d1e4e8cf62d9c09db0fac349612b759e79a1",
 			"08ba738453bfed09cb546dbb0783dbb3a5f1f566ed67bb6be0e8c67e2e81a4cc68ee29813bb7994998f3eae0c9c6a265",
-		}
+		},
+		{
+			"abc",
+			"QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_",
+			"03567bc5ef9c690c2ab2ecdf6a96ef1c139cc0b2f284dca0a9a7943388a49a3aee664ba5379a7655d3c68900be2f6903",
+			"0b9c15f3fe6e5cf4211f346271d7b01c8f3b28be689c8429c85b67af215533311f0b8dfaaa154fa6b88176c229f2885d",
+		},
+		{
+			"abcdef0123456789",
+			"QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_",
+			"11e0b079dea29a68f0383ee94fed1b940995272407e3bb916bbf268c263ddd57a6a27200a784cbc248e84f357ce82d98",
+			"03a87ae2caf14e8ee52e51fa2ed8eefe80f02457004ba4d486d6aa1f517c0889501dc7413753f9599b099ebcbbd2d709",
+		},
+		{
+			"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq",
+			"QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_",
+			"15f68eaa693b95ccb85215dc65fa81038d69629f70aeee0d0f677cf22285e7bf58d7cb86eefe8f2e9bc3f8cb84fac488",
+			"1807a1d50c29f430b8cafc4f8638dfeeadf51211e1602a5f184443076715f91bb90a48ba1e370edce6ae1062f5e6dd38",
+		},
+		{
+			"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+			"QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_",
+			"082aabae8b7dedb0e78aeb619ad3bfd9277a2f77ba7fad20ef6aabdc6c31d19ba5a6d12283553294c1825c4b3ca2dcfe",
+			"05b84ae5a942248eea39e1d91030458c40153f3b654ab7872d779ad1e942856a20c438e8d99bc8abfbf74729ce1f7ac8",
+		},
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		const char *msg = tbl[i].msg;

From c7120c896344f38a2d186b079ccde2c24210e00a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 30 Jun 2020 14:02:32 +0900
Subject: [PATCH 246/553] reduce Fp::div in iso11

---
 include/mcl/mapto_wb19.hpp | 36 ++++++++++--------------------------
 test/mapto_wb19_test.cpp   |  1 +
 2 files changed, 11 insertions(+), 26 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index a35d79c3..17246d95 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -340,39 +340,23 @@ struct MapTo_WB19 {
 	// refer (g1xnum, g1xden, g1ynum, g1yden)
 	void iso11(G1& Q, E1& P) const
 	{
-#if 1
 		ec::normalizeJacobi(P);
 		Fp xn, xd, yn, yd;
 		xn = evalPoly2(P.x, g1xnum);
 		xd = evalPoly2(P.x, g1xden);
 		yn = evalPoly2(P.x, g1ynum);
 		yd = evalPoly2(P.x, g1yden);
-		Fp::div(Q.x, xn, xd);
-		Fp::div(Q.y, yn, yd);
-		Q.y *= P.y;
-		Q.z = 1;
-#else
-		Fp zpows[3];
-		Fp::sqr(zpows[0], P.z);
-		Fp::sqr(zpows[1], zpows[0]);
-		Fp::mul(zpows[2], zpows[1], zpows[0]);
-		Fp mapvals[4];
-		evalPoly(mapvals[0], P.x, zpows, g1xnum);
-		evalPoly(mapvals[1], P.x, zpows, g1xden);
-		evalPoly(mapvals[2], P.x, zpows, g1ynum);
-		evalPoly(mapvals[3], P.x, zpows, g1yden);
-		mapvals[1] *= zpows[0];
-		mapvals[2] *= P.y;
-		mapvals[3] *= zpows[0];
-		mapvals[3] *= P.z;
-		Fp::mul(Q.z, mapvals[1], mapvals[3]);
-		Fp::mul(Q.x, mapvals[0], mapvals[3]);
+		/*
+			[xn/xd:y * yn/yd:1] = [xn xd yd^2:y yn xd^3 yd^2:xd yd]
+			=[xn yd z:y yn xd z^2:z] where z = xd yd
+		*/
+		Fp::mul(Q.z, xd, yd);
+		Fp::mul(Q.x, xn, yd);
 		Q.x *= Q.z;
-		Fp t;
-		Fp::sqr(t, Q.z);
-		Fp::mul(Q.y, mapvals[2], mapvals[1]);
-		Q.y *= t;
-#endif
+		Fp::mul(Q.y, P.y, yn);
+		Q.y *= xd;
+		Fp::sqr(xd, Q.z);
+		Q.y *= xd;
 	}
 	/*
 		xi = -2-i
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 59754a17..5b3e547e 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -1159,6 +1159,7 @@ void testMsgToG1(const T& mapto)
 		Q.y.setStr(tbl[i].y, 16);
 		Q.z = 1;
 		CYBOZU_TEST_EQUAL(P, Q);
+CYBOZU_BENCH_C("msgToG1", 1000, mapto.msgToG1, P, msg, msgSize, dst, dstSize);
 	}
 }
 

From 258b63c48768471452293e021b1c3dafef5e6a31 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 30 Jun 2020 14:22:16 +0900
Subject: [PATCH 247/553] change mode of hashAndMapToG1

---
 include/mcl/bn.hpp       | 5 +++++
 test/mapto_wb19_test.cpp | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 657c8fac..61886d05 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -2056,6 +2056,11 @@ inline void mapToG2(G2& P, const Fp2& x, bool fast = false)
 #endif
 inline void hashAndMapToG1(G1& P, const void *buf, size_t bufSize)
 {
+	int mode = getMapToMode();
+	if (mode == MCL_MAP_TO_MODE_HASH_TO_CURVE_07) {
+		BN::param.mapTo.mapTo_WB19_.msgToG1(P, buf, bufSize);
+		return;
+	}
 	Fp t;
 	t.setHashOf(buf, bufSize);
 	bool b;
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 5b3e547e..80a96636 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -1160,6 +1160,11 @@ void testMsgToG1(const T& mapto)
 		Q.z = 1;
 		CYBOZU_TEST_EQUAL(P, Q);
 CYBOZU_BENCH_C("msgToG1", 1000, mapto.msgToG1, P, msg, msgSize, dst, dstSize);
+		if (i == 0) { // correct dst
+			P.clear();
+			bn::hashAndMapToG1(P, msg, msgSize);
+			CYBOZU_TEST_EQUAL(P, Q);
+		}
 	}
 }
 

From e9e5f9fcf1b8b50d379a3a0fd84c6221a0324638 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 30 Jun 2020 12:25:10 +0900
Subject: [PATCH 248/553] v1.20

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index df8fe7e6..3993f1e4 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x111; /* 0xABC = A.BC */
+static const int version = 0x120; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index f56e64f9..742cb800 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+- `hashAndMapToG1` is compatible with [hash-to-curve-09 BLS12381G1_XMD:SHA-256_SSWU_RO_](https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.html#name-bls12381g1_xmdsha-256_sswu_)
 - `MCL_MAP_TO_MODE_HASH_TO_CURVE_07` is added for [hash-to-curve-draft-07](https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/07/).
   - The older version will be removed in the future.
 - change DST of hash-to-curve for `MCL_MAP_TO_MODE_HASH_TO_CURVE_06`.

From 322b90c1601acd51a2bfdd5a9ed45759526620f3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 4 Jul 2020 10:42:24 +0900
Subject: [PATCH 249/553] remove unnecessary param

---
 include/mcl/fp.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 4f3c35b6..cfe40e9a 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -444,13 +444,13 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 	void setLittleEndianMod(const void *buf, size_t bufSize)
 	{
 		bool b;
-		setLittleEndianMod(&b, buf, bufSize, mcl::fp::Mod);
+		setLittleEndianMod(&b, buf, bufSize);
 		if (!b) throw cybozu::Exception("setLittleEndianMod");
 	}
 	void setBigEndianMod(const void *buf, size_t bufSize)
 	{
 		bool b;
-		setBigEndianMod(&b, buf, bufSize, mcl::fp::Mod);
+		setBigEndianMod(&b, buf, bufSize);
 		if (!b) throw cybozu::Exception("setBigEndianMod");
 	}
 	void setByCSPRNG(fp::RandGen rg = fp::RandGen())

From 7909bd297bd6b9652e9d89ce19b04feb4edfabf6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 4 Jul 2020 11:07:06 +0900
Subject: [PATCH 250/553] mcl::bn::mapToG1 is compatible to MAP_TO_G1 in
 eip-2537

---
 include/mcl/bn.hpp         |  4 +++
 include/mcl/mapto_wb19.hpp | 19 +++++++++-----
 include/mcl/op.hpp         |  2 +-
 readme.md                  |  4 ++-
 test/mapto_wb19_test.cpp   | 54 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 61886d05..7a19d0b9 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -584,6 +584,10 @@ struct MapTo {
 	}
 	bool calc(G1& P, const Fp& t) const
 	{
+		if (mapToMode_ == MCL_MAP_TO_MODE_HASH_TO_CURVE_07) {
+			mapTo_WB19_.FpToG1(P, t);
+			return true;
+		}
 		if (!mapToEc(P, t)) return false;
 		mulByCofactor(P);
 		return true;
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 17246d95..4ecaed97 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -599,6 +599,18 @@ struct MapTo_WB19 {
 		}
 		map2curve_osswu2(out, msg, msgSize, dst, strlen(dst));
 	}
+	void FpToG1(G1& out, const Fp& u0, const Fp *u1 = 0) const
+	{
+		E1 P1;
+		sswuG1(P1, u0);
+		if (u1) {
+			E1 P2;
+			sswuG1(P2, *u1);
+			ec::addJacobi(P1, P1, P2);
+		}
+		iso11(out, P1);
+		G1::mulGeneric(out, out, g1cofactor);
+	}
 	void msgToG1(G1& out, const void *msg, size_t msgSize, const char *dst, size_t dstSize) const
 	{
 		assert(draftVersion_ == 7);
@@ -610,12 +622,7 @@ struct MapTo_WB19 {
 			u[i].setBigEndianMod(&b, &md[64 * i], 64);
 			assert(b); (void)b;
 		}
-		E1 P1, P2;
-		sswuG1(P1, u[0]);
-		sswuG1(P2, u[1]);
-		ec::addJacobi(P1, P1, P2);
-		iso11(out, P1);
-		G1::mulGeneric(out, out, g1cofactor);
+		FpToG1(out, u[0], &u[1]);
 	}
 	void msgToG1(G1& out, const void *msg, size_t msgSize) const
 	{
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 3993f1e4..2d2a72eb 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x120; /* 0xABC = A.BC */
+static const int version = 0x121; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index 742cb800..c1b1b569 100644
--- a/readme.md
+++ b/readme.md
@@ -10,7 +10,8 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
-- `hashAndMapToG1` is compatible with [hash-to-curve-09 BLS12381G1_XMD:SHA-256_SSWU_RO_](https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.html#name-bls12381g1_xmdsha-256_sswu_)
+- `mcl::bn::mapToG1(G1& out, const Fp& v)` supports `BLS12_MAP_FP_TO_G1` in [EIP 2537](https://eips.ethereum.org/EIPS/eip-2537).
+- `mcl::bn::hashAndMapToG1(G1& out, const void *msg, size_t msgSize)` supports ([hash-to-curve-09 BLS12381G1_XMD:SHA-256_SSWU_RO_](https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.html#name-bls12381g1_xmdsha-256_sswu_))
 - `MCL_MAP_TO_MODE_HASH_TO_CURVE_07` is added for [hash-to-curve-draft-07](https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/07/).
   - The older version will be removed in the future.
 - change DST of hash-to-curve for `MCL_MAP_TO_MODE_HASH_TO_CURVE_06`.
@@ -313,6 +314,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2020/Jun/04 v1.21 mapToG1 and hashAndMapToG1 are compatible to irtf/eip-2537
 - 2020/May/13 v1.09 support draft-irtf-cfrg-hash-to-curve-07
 - 2020/Mar/26 v1.07 change DST for hash-to-curve-06
 - 2020/Mar/15 v1.06 support hash-to-curve-06
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 80a96636..7528ce12 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -1168,6 +1168,59 @@ CYBOZU_BENCH_C("msgToG1", 1000, mapto.msgToG1, P, msg, msgSize, dst, dstSize);
 	}
 }
 
+std::string appendZeroToRight(const std::string& s, size_t n)
+{
+	if (s.size() >= n) return s;
+	return std::string(n - s.size(), '0') + s;
+}
+
+template<class T>
+void testFpToG1(const T& mapto)
+{
+	const struct {
+		const char *in;
+		const char *out;
+	} tbl[] = {
+		// https://github.com/matter-labs/eip1962/blob/master/src/test/test_vectors/eip2537/fp_to_g1.csv
+		{
+			"0000000000000000000000000000000014406e5bfb9209256a3820879a29ac2f62d6aca82324bf3ae2aa7d3c54792043bd8c791fccdb080c1a52dc68b8b69350",
+			"000000000000000000000000000000000d7721bcdb7ce1047557776eb2659a444166dc6dd55c7ca6e240e21ae9aa18f529f04ac31d861b54faf3307692545db700000000000000000000000000000000108286acbdf4384f67659a8abe89e712a504cb3ce1cba07a716869025d60d499a00d1da8cdc92958918c222ea93d87f0",
+		},
+		{
+			"000000000000000000000000000000000e885bb33996e12f07da69073e2c0cc880bc8eff26d2a724299eb12d54f4bcf26f4748bb020e80a7e3794a7b0e47a641",
+			"00000000000000000000000000000000191ba6e4c4dafa22c03d41b050fe8782629337641be21e0397dc2553eb8588318a21d30647182782dee7f62a22fd020c000000000000000000000000000000000a721510a67277eabed3f153bd91df0074e1cbd37ef65b85226b1ce4fb5346d943cf21c388f0c5edbc753888254c760a",
+		},
+		{
+			"000000000000000000000000000000000ba1b6d79150bdc368a14157ebfe8b5f691cf657a6bbe30e79b6654691136577d2ef1b36bfb232e3336e7e4c9352a8ed",
+			"000000000000000000000000000000001658c31c0db44b5f029dba56786776358f184341458577b94d3a53c877af84ffbb1a13cc47d228a76abb4b67912991850000000000000000000000000000000018cf1f27eab0a1a66f28a227bd624b7d1286af8f85562c3f03950879dd3b8b4b72e74b034223c6fd93de7cd1ade367cb",
+		},
+	};
+	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+		Uint8Vec v = fromHexStr(tbl[i].in);
+		Fp x;
+		/*
+			serialize() accepts only values in [0, p).
+			The test values exceed p, so use setBigEndianMod to set (v mod p).
+		*/
+		x.setBigEndianMod(v.data(), v.size());
+		G1 P;
+		mapto.FpToG1(P, x);
+		/*
+			The test value is the form such as "000...<x>000...<y>".
+			So normalize P and pad zeros to compare it.
+		*/
+		P.normalize();
+		const size_t L = 128;
+		std::string s = appendZeroToRight(P.x.getStr(16), L) + appendZeroToRight(P.y.getStr(16), L);
+		CYBOZU_TEST_EQUAL(s, tbl[i].out);
+		{
+			G1 Q;
+			mcl::bn::mapToG1(Q, x);
+			CYBOZU_TEST_EQUAL(P, Q);
+		}
+	}
+}
+
 CYBOZU_TEST_AUTO(test)
 {
 	initPairing(mcl::BLS12_381);
@@ -1194,4 +1247,5 @@ CYBOZU_TEST_AUTO(test)
 	testEth2phase0();
 	testSswuG1(mapto);
 	testMsgToG1(mapto);
+	testFpToG1(mapto);
 }

From 730c50d4eaff1e0d685a92ac8c896e873749471b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 6 Jul 2020 18:17:43 +0900
Subject: [PATCH 251/553] remove old eth hash-functions

---
 api.md                         |  10 +-
 ffi/go/mcl/mcl.go              |   6 -
 include/mcl/bn.h               |  13 +-
 include/mcl/bn.hpp             |  40 +--
 include/mcl/curve_type.h       |   8 +-
 include/mcl/fp.hpp             |   2 -
 include/mcl/impl/bn_c_impl.hpp |   5 -
 include/mcl/mapto_wb19.hpp     |  38 +--
 src/fp.cpp                     |  35 --
 test/bls12_test.cpp            |  43 ---
 test/bn_c_test.hpp             |  69 ----
 test/mapto_wb19_test.cpp       | 598 +--------------------------------
 12 files changed, 16 insertions(+), 851 deletions(-)

diff --git a/api.md b/api.md
index 52498264..0045b7a5 100644
--- a/api.md
+++ b/api.md
@@ -1,13 +1,9 @@
 # C API
 
-## New features
+## News
 
-```
-void mclBn_setOriginalG2cofactor(int enable);
-```
-Use faster multiplication of `G2` with cofactor if `enable = 1`.
-This is enabled if `mclBn_setMapToMode(MCL_MAP_TO_MODE_ETH2)`.
-if `enable = 0`, then [the fast algorithm (mulByCofactorBLS12)](https://github.com/herumi/mcl/blob/master/include/mcl/bn.hpp#L463) is used.
+APIs for old ethreum hash functions are removed.
+`mclBn_setMapToMode` supports only `MCL_MAP_TO_MODE_ETH2`.
 
 ## Minimum sample
 
diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index c9658afb..97f0c7d7 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -123,12 +123,6 @@ func SetETHserialization(enable bool) {
 	C.mclBn_setETHserialization(bool2Cint(enable))
 }
 
-// SetOriginalG2cofactor -- true if BLS_ETH is defined
-func SetOriginalG2cofactor(enable bool) {
-	// #nosec
-	C.mclBn_setOriginalG2cofactor(bool2Cint(enable))
-}
-
 // SetMapToMode --
 func SetMapToMode(mode int) error {
 	// #nosec
diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 04d12aea..1396d6b1 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -201,19 +201,8 @@ MCLBN_DLL_API void mclBn_setETHserialization(int enable);
 MCLBN_DLL_API int mclBn_getETHserialization(void);
 
 /*
-	use original g2cofactor
-	@param enable [in] 1:enable,  0:disable(default)
-	use faster algorithm for multiplication of G2 with g2cofactor if enable
-	The constant is 0x204d0ec030004ec0600000002fffffffd times original g2cofacotr
-	@see MapTo::mulByCofactorBLS12
-*/
-MCLBN_DLL_API void mclBn_setOriginalG2cofactor(int enable);
-
-/*
-	set map-to-function to mode (defalt:MCL_MAP_TO_MODE_ORIGINAL)
-	https://github.com/ethereum/eth2.0-specs/blob/dev/specs/bls_signature.md#modular_squareroot
+	set map-to-function to mode (only support MCL_MAP_TO_MODE_HASH_TO_CURVE_07)
 	return 0 if success else -1
-	@note call mclBn_setOriginalG2cofactor(true) if MCL_MAP_TO_MODE_ETH2
 */
 MCLBN_DLL_API int mclBn_setMapToMode(int mode);
 
diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 7a19d0b9..13e43e91 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -500,37 +500,18 @@ struct MapTo {
 		switch (mode) {
 		case MCL_MAP_TO_MODE_ORIGINAL:
 		case MCL_MAP_TO_MODE_TRY_AND_INC:
-		case MCL_MAP_TO_MODE_ETH2:
+//		case MCL_MAP_TO_MODE_ETH2:
 			mapToMode_ = mode;
 			return true;
 			break;
-		case MCL_MAP_TO_MODE_HASH_TO_CURVE_05:
-			mapToMode_ = mode;
-			mapTo_WB19_.setDraftVersion(5);
-			return true;
-			break;
-		case MCL_MAP_TO_MODE_HASH_TO_CURVE_06:
-			mapToMode_ = mode;
-			mapTo_WB19_.setDraftVersion(6);
-			return true;
-			break;
 		case MCL_MAP_TO_MODE_HASH_TO_CURVE_07:
 			mapToMode_ = mode;
-			mapTo_WB19_.setDraftVersion(7);
 			return true;
 			break;
 		default:
 			return false;
 		}
 	}
-	void setOriginalG2cofactor(bool enable)
-	{
-		if (type_ == BLS12type) {
-			useOriginalG2cofactor_ = enable;
-		} else {
-			useOriginalG2cofactor_ = false;
-		}
-	}
 	/*
 		if type == STD_ECtype, then cofactor, z are not used.
 	*/
@@ -551,7 +532,7 @@ struct MapTo {
 	template<class G, class F>
 	bool mapToEc(G& P, const F& t) const
 	{
-		if (mapToMode_ == MCL_MAP_TO_MODE_TRY_AND_INC || mapToMode_ == MCL_MAP_TO_MODE_ETH2) {
+		if (mapToMode_ == MCL_MAP_TO_MODE_TRY_AND_INC) {
 			naiveMapTo<G, F>(P, t);
 		} else {
 			if (!calcBN<G, F>(P, t)) return false;
@@ -594,19 +575,11 @@ struct MapTo {
 	}
 	bool calc(G2& P, const Fp2& t, bool fast = false) const
 	{
-		if (mapToMode_ == MCL_MAP_TO_MODE_WB19 || mapToMode_ >= MCL_MAP_TO_MODE_HASH_TO_CURVE_06) {
+		if (mapToMode_ == MCL_MAP_TO_MODE_HASH_TO_CURVE_07) {
 			mapTo_WB19_.opt_swu2_map(P, t);
 			return true;
 		}
 		if (!mapToEc(P, t)) return false;
-		if (mapToMode_ == MCL_MAP_TO_MODE_ETH2) {
-			Fp2 negY;
-			Fp2::neg(negY, P.y);
-			int cmp = Fp::compare(P.y.b, negY.b);
-			if (!(cmp > 0 || (cmp == 0 && P.y.a > negY.a))) {
-				P.y = negY;
-			}
-		}
 		mulByCofactor(P, fast);
 		return true;
 	}
@@ -2027,15 +2000,8 @@ inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
 	}
 }
 
-inline void setOriginalG2cofactor(bool enable)
-{
-	BN::nonConstParam.mapTo.setOriginalG2cofactor(enable);
-}
 inline bool setMapToMode(int mode)
 {
-	if (mode == MCL_MAP_TO_MODE_ETH2) {
-		setOriginalG2cofactor(true);
-	}
 	return BN::nonConstParam.mapTo.setMapToMode(mode);
 }
 inline int getMapToMode()
diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index dae261c6..454f8d8c 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -44,10 +44,10 @@ enum {
 enum {
 	MCL_MAP_TO_MODE_ORIGINAL, // see MapTo::calcBN
 	MCL_MAP_TO_MODE_TRY_AND_INC, // try-and-incremental-x
-	MCL_MAP_TO_MODE_ETH2, // old eth2.0 spec
-	MCL_MAP_TO_MODE_WB19, // used in new eth2.0 spec
-	MCL_MAP_TO_MODE_HASH_TO_CURVE_05 = MCL_MAP_TO_MODE_WB19, // draft-irtf-cfrg-hash-to-curve-05
-	MCL_MAP_TO_MODE_HASH_TO_CURVE_06, // draft-irtf-cfrg-hash-to-curve-06
+	MCL_MAP_TO_MODE_ETH2, // (deprecated) old eth2.0 spec
+	MCL_MAP_TO_MODE_WB19, // (deprecated) used in new eth2.0 spec
+	MCL_MAP_TO_MODE_HASH_TO_CURVE_05 = MCL_MAP_TO_MODE_WB19, // (deprecated) draft-irtf-cfrg-hash-to-curve-05
+	MCL_MAP_TO_MODE_HASH_TO_CURVE_06, // (deprecated) draft-irtf-cfrg-hash-to-curve-06
 	MCL_MAP_TO_MODE_HASH_TO_CURVE_07, // draft-irtf-cfrg-hash-to-curve-07
 	MCL_MAP_TO_MODE_HASH_TO_CURVE = MCL_MAP_TO_MODE_HASH_TO_CURVE_07 // the latset version
 };
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index cfe40e9a..a0af7477 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -80,8 +80,6 @@ void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6]);
 // draft-07 outSize = 128 or 256
 void expand_message_xmd(uint8_t out[], size_t outSize, const void *msg, size_t msgSize, const void *dst, size_t dstSize);
 
-void expand_message_xmd06(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize);
-
 namespace local {
 
 inline void byteSwap(void *x, size_t n)
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 52eaa504..1cca8383 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -150,11 +150,6 @@ int mclBn_ethMsgToG2(mclBnG2 *out, const void *msg, size_t msgSize, const void *
 	return mcl::bn::ethMsgToG2(*cast(out), msg, msgSize, dst, dstSize) ? 0 : -1;
 }
 
-void mclBn_setOriginalG2cofactor(int enable)
-{
-	setOriginalG2cofactor(enable == 1);
-}
-
 ////////////////////////////////////////////////
 // set zero
 void mclBnFr_clear(mclBnFr *x)
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 4ecaed97..cefd3ac4 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -88,11 +88,6 @@ struct MapTo_WB19 {
 	Fp g1yden[16];
 	mpz_class g1cofactor;
 	int g1Z;
-	int draftVersion_;
-	void setDraftVersion(int draftVersion)
-	{
-		draftVersion_ = draftVersion;
-	}
 	void init()
 	{
 		bool b;
@@ -142,7 +137,6 @@ struct MapTo_WB19 {
 		Fp::neg(etas[3].a, ev4);
 		etas[3].b = ev3;
 		init_iso3();
-		draftVersion_ = 5;
 		{
 			const char *A = "0x144698a3b8e9433d693a02c96d4982b0ea985383ee66a8d8e8981aefd881ac98936f8da0e0f97f5cf428082d584c1d";
 			const char *B = "0x12e2908d11688030018b12e8753eee3b2016c1f0f24f4070a0b9c14fcef35ef55a23215a316ceaa5d1cc48e98e172be0";
@@ -385,15 +379,7 @@ struct MapTo_WB19 {
 	}
 	bool isNegSign(const Fp2& x) const
 	{
-		if (draftVersion_ == 7) {
-			return sgn0(x);
-		}
-		// x.isNegative() <=> x > (p-1)/2 <=> x >= (p+1)/2
-		if (x.b.isNegative()) return true;
-		if (!x.b.isZero()) return false;
-		if (x.a.isNegative()) return true;
-		if (!x.b.isZero()) return false;
-		return false;
+		return sgn0(x);
 	}
 	// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-07#appendix-D.3.5
 	void sswuG1(Fp& xn, Fp& xd, Fp& y, const Fp& u) const
@@ -566,11 +552,7 @@ struct MapTo_WB19 {
 	void hashToFp2(Fp2 out[2], const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
 		uint8_t md[256];
-		if (draftVersion_ == 6) {
-			mcl::fp::expand_message_xmd06(md, msg, msgSize, dst, dstSize);
-		} else {
-			mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		}
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
 		Fp *x = out[0].getFp0();
 		for (size_t i = 0; i < 4; i++) {
 			bool b;
@@ -581,22 +563,12 @@ struct MapTo_WB19 {
 	void map2curve_osswu2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
 		Fp2 t[2];
-		if (draftVersion_ == 5) {
-			hashToFp2old(t[0], msg, msgSize, 0, dst, dstSize);
-			hashToFp2old(t[1], msg, msgSize, 1, dst, dstSize);
-		} else {
-			hashToFp2(t, msg, msgSize, dst, dstSize);
-		}
+		hashToFp2(t, msg, msgSize, dst, dstSize);
 		opt_swu2_map(out, t[0], &t[1]);
 	}
 	void msgToG2(G2& out, const void *msg, size_t msgSize) const
 	{
-		const char *dst;
-		if (draftVersion_ == 5) {
-			dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_";
-		} else {
-			dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
-		}
+		const char *dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
 		map2curve_osswu2(out, msg, msgSize, dst, strlen(dst));
 	}
 	void FpToG1(G1& out, const Fp& u0, const Fp *u1 = 0) const
@@ -613,7 +585,6 @@ struct MapTo_WB19 {
 	}
 	void msgToG1(G1& out, const void *msg, size_t msgSize, const char *dst, size_t dstSize) const
 	{
-		assert(draftVersion_ == 7);
 		uint8_t md[128];
 		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
 		Fp u[2];
@@ -626,7 +597,6 @@ struct MapTo_WB19 {
 	}
 	void msgToG1(G1& out, const void *msg, size_t msgSize) const
 	{
-		assert(draftVersion_ == 7);
 		const char *dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
 		const size_t dstSize = strlen(dst);
 		msgToG1(out, msg, msgSize, dst, dstSize);
diff --git a/src/fp.cpp b/src/fp.cpp
index 343d2427..ab09ff1b 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -159,41 +159,6 @@ void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6])
 	cybozu::hmac256(out + 32, prk, 32, out, 32 + 6);
 }
 
-void expand_message_xmd06(uint8_t out[256], const void *msg, size_t msgSize, const void *dst, size_t dstSize)
-{
-	const size_t len_in_bytes = 256;
-	const size_t mdSize = 32;
-	const size_t r_in_bytes = 64;
-	const size_t ell = len_in_bytes / mdSize;
-	static const uint8_t Z_pad[r_in_bytes] = {};
-	assert(dstSize < 256);
-	// BE(len_in_bytes, 2) + BE(0, 1) + BE(dstSize, 1)
-	uint8_t buf[2 + 1 + 1] = { 1, 0, 0, uint8_t(dstSize) };
-	uint8_t *const buf2 = buf + 2; // BE(0, 1) + BE(dstSize, 1)
-	cybozu::Sha256 h;
-	h.update(Z_pad, r_in_bytes);
-	h.update(msg, msgSize);
-	h.update(buf, sizeof(buf));
-	uint8_t md[mdSize];
-	h.digest(md, mdSize, dst, dstSize);
-	h.clear();
-	buf2[0] = 1;
-	h.update(md, mdSize);
-	h.update(buf2, 2);
-	h.digest(out, mdSize, dst, dstSize);
-	uint8_t mdXor[mdSize];
-	for (size_t i = 1; i < ell; i++) {
-		h.clear();
-		for (size_t j = 0; j < mdSize; j++) {
-			mdXor[j] = md[j] ^ out[mdSize * (i - 1) + j];
-		}
-		h.update(mdXor, mdSize);
-		buf2[0] = uint8_t(i + 1);
-		h.update(buf2, 2);
-		h.digest(out + mdSize * i, mdSize, dst, dstSize);
-	}
-}
-
 void expand_message_xmd(uint8_t out[], size_t outSize, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
 {
 	assert(outSize == 128 || outSize == 256);
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index df8b1273..1ea05186 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -704,49 +704,6 @@ CYBOZU_TEST_AUTO(multi)
 	CYBOZU_BENCH_C("naiveG2", 100, (BN::param.mapTo.naiveMapTo<G2, Fp2>), Q, i++);
 }
 
-CYBOZU_TEST_AUTO(eth2)
-{
-	if (BN::param.cp.curveType != MCL_BLS12_381) return;
-	Fp::setETHserialization(true);
-	Fr::setETHserialization(true);
-	setMapToMode(MCL_MAP_TO_MODE_ETH2);
-	Fr sec;
-	sec.setStr("0x47b8192d77bf871b62e87859d653922725724a5c031afeabc60bcef5ff665138");
-	uint8_t msg[] = {
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87, 33, 13, 72, 155, 73, 4, 185, 87, 46, 230, 247, 159, 191, 7, 148, 85, 120, 129, 175, 102, 169, 241, 139, 189, 44, 244, 68, 119, 60, 28, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 225, 95, 237, 38, 188, 142, 181, 147, 233, 183, 232, 13, 219, 92, 94, 79, 19, 174, 172, 105, 133, 207, 4, 113, 115, 242, 140, 138, 44, 215, 244, 77
-	};
-	const uint8_t sigStr[] = {
-		6, 239, 41, 231, 36, 30, 26, 28, 198, 15, 238, 50, 142, 50, 144, 192, 35, 213, 90, 103, 1, 219, 80, 14, 239, 171, 127, 145, 57, 26, 139, 135, 38, 253, 0, 36, 18, 30, 100, 99, 114, 129, 249, 7, 19, 127, 226, 104, 24, 123, 75, 172, 163, 99, 136, 233, 97, 148, 183, 58, 125, 83, 47, 110, 234, 107, 192, 152, 119, 141, 191, 211, 64, 69, 132, 97, 59, 91, 169, 218, 151, 213, 96, 46, 49, 253, 190, 146, 112, 184, 99, 135, 101, 41, 178, 84, 18, 210, 104, 251, 230, 10, 193, 72, 64, 52, 41, 52, 81, 12, 106, 12, 31, 250, 171, 222, 116, 82, 153, 227, 157, 225, 55, 196, 22, 100, 207, 162, 163, 65, 163, 112, 14, 234, 31, 243, 107, 2, 227, 249, 10, 187, 131, 10, 3, 211, 176, 25, 9, 1, 154, 245, 167, 74, 192, 135, 28, 44, 85, 238, 179, 95, 250, 20, 39, 137, 56, 40, 196, 66, 91, 125, 231, 240, 32, 204, 95, 9, 56, 38, 62, 180, 158, 95, 1, 58, 2, 126, 173, 200, 94, 46
-	};
-	(void)sigStr;
-	G1 gen;
-	gen.setStr("1 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569", 10);
-	Fp2 m;
-	CYBOZU_TEST_ASSERT(m.deserialize(msg, sizeof(msg)) > 0);
-	G2 Q;
-	mapToG2(Q, m);
-
-	G2 sig = Q * sec;
-	const char *expectSig = "b9d1bf921b3dd048bdce38c2ceac2a2a8093c864881f2415f22b198de935ffa791707855c1656dc21a7af2d502bb46590151d645f062634c3b2cb79c4ed1c4a4b8b3f19f0f5c76965c651553e83d153ff95353735156eff77692f7a62ae653fb";
-	CYBOZU_TEST_EQUAL(sig.getStr(mcl::IoSerializeHexStr), expectSig);
-
-	CYBOZU_BENCH_C("mapToG2  org-cofactor", 1000, mapToG2, Q, m, false);
-	CYBOZU_BENCH_C("mapToG2 fast-cofactor", 1000, mapToG2, Q, m, true);
-
-	Fp2 x;
-	x.a = 5;
-	x.b = 3;
-	const mpz_class& g2c = BN::param.mapTo.g2cofactor_;
-	const Fr& g2ca = getG2cofactorAdj();
-	G2 Q1, Q2, Q3;
-	BN::param.mapTo.mapToEc(Q, x);
-	G2::mulGeneric(Q1, Q, g2c);
-	Q2 = Q;
-	BN::param.mapTo.mulByCofactor(Q2, true);
-	Q2 *= g2ca;
-	CYBOZU_TEST_EQUAL(Q1, Q2);
-}
-
 CYBOZU_TEST_AUTO(deserialize)
 {
 	if (BN::param.cp.curveType != MCL_BLS12_381) return;
diff --git a/test/bn_c_test.hpp b/test/bn_c_test.hpp
index 0f871236..446f72e0 100644
--- a/test/bn_c_test.hpp
+++ b/test/bn_c_test.hpp
@@ -669,75 +669,6 @@ void setFp2(mclBnFp2 *x, const Fp2Str& s)
 	CYBOZU_TEST_EQUAL(mclBnFp_setStr(&x->d[1], s.b, strlen(s.b), 16), 0);
 }
 
-CYBOZU_TEST_AUTO(eth_hash)
-{
-	int curveType = mclBn_getCurveType();
-	if (curveType != MCL_BLS12_381) return;
-	{
-		const char *msg = "msg";
-		uint8_t ctr = 0;
-		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_";
-		const Fp2Str ys = {
-			"18df4dc51885b18ca0082a4966b0def46287930b8f1c0b673b11ac48d19c8899bc150d83fd3a7a1430b0de541742c1d4",
-			"14eef8ca34b82d065d187a3904cb313dbb44558917cc5091574d9999b5ecfdd5af2fa3aea6e02fb253bf4ae670e72d55"
-		};
-		mclBnFp2 x, y;
-		CYBOZU_TEST_EQUAL(mclBn_ethMsgToFp2(&x, msg, strlen(msg), ctr, dst, strlen(dst)), 0);
-		setFp2(&y, ys);
-		CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&x, &y));
-	}
-	{
-		const Fp2Str u0s = {
-			"0x004ad233c619209060e40059b81e4c1f92796b05aa1bc6358d65e53dc0d657dfbc713d4030b0b6d9234a6634fd1944e7",
-			"0x0e2386c82713441bc3b06a460bd81850f4bf376ea89c80b18c0881e855c58dc8e83b2fd23af983f4786508e30c42af01",
-		};
-		const Fp2Str u1s = {
-			"0x08a6a75e0a8d32f1e096f29047ea879dd34a5504218d7ce92c32c244786822fb73fbf708d167ad86537468249ec6df48",
-			"0x07016d0e5e13cd65780042c6f7b4c74ae1c58da438c99582696818b5c229895b893318dcb87d2a65e557d4ebeb408b70",
-		};
-		const Fp2Str xs = {
-			"0x4861c41efcc5fc56e62273692b48da25d950d2a0aaffb34eff80e8dbdc2d41ca38555ceb8554368436aea47d16056b5",
-			"0x9db5217528c55d982cf05fc54242bdcd25f1ebb73372e00e16d8e0f19dc3aeabdeef2d42d693405a04c37d60961526a",
-		};
-		const Fp2Str ys = {
-			"0x177d05b95e7879a7ddbd83c15114b5a4e9846fde72b2263072dc9e60db548ccbadaacb92cc4952d4f47425fe3c5e0172",
-			"0xfc82c99b928ed9df12a74f9215c3df8ae1e9a3fa54c00897889296890b23a0edcbb9653f9170bf715f882b35c0b4647",
-		};
-		mclBnFp2 u0, u1, x, y;
-		setFp2(&u0, u0s);
-		setFp2(&u1, u1s);
-		setFp2(&x, xs);
-		setFp2(&y, ys);
-		mclBnG2 P;
-		mclBn_ethFp2ToG2(&P, &u0, &u1);
-		mclBnG2_normalize(&P, &P);
-		CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&P.x, &x));
-		CYBOZU_TEST_ASSERT(mclBnFp2_isEqual(&P.y, &y));
-	}
-	{
-		const char *msg = "msg";
-		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_";
-		const Fp2Str xs = {
-			"0xb1871d245d50ec4e5a3ac790628864d24655208812abc420b67a93c5afdd38111137f14ca0f844ddbf69809897ca941",
-			"0xa8b490ae1aac870b16b1a82db2e9653ec14485fc5f38c2ce2926c526537262061d4cd8bc62cc90e98235952a7fe7f13",
-		};
-		const Fp2Str ys = {
-			"0x2c8e9f9d52870075ae5879be5a4994a16db6c93b34453d9c055eb058107a2d805cc307b0ba30144518fb36da5f97d12",
-			"0x344ce62d77dda0b4b509d5b5e6ef08f99c972fc0e5f0c25b25bb881384e85b8b1086043813e674f9bbc4b67dd47d9a7",
-		};
-		const Fp2Str zs = {
-			"0x1515a4d612e48626000f998a220029380a47e9e6c69d497db804e2dfc3dbce5cfb000a559b64f50796f26ddc4cf3be2c",
-			"0x1796ee0f0b9b65802c90e3e1586034f3826ec3538c66525de298d1ff2f7a26f2ec553ec64e5989ed9841c4456d0bddd7",
-		};
-		mclBnG2 P, Q;
-		mclBn_ethMsgToG2(&P, msg, strlen(msg), dst, strlen(dst));
-		setFp2(&Q.x, xs);
-		setFp2(&Q.y, ys);
-		setFp2(&Q.z, zs);
-		CYBOZU_TEST_ASSERT(mclBnG2_isEqual(&P, &Q));
-	}
-}
-
 #if MCLBN_FP_UNIT_SIZE == 6 && MCLBN_FR_UNIT_SIZE >= 6
 CYBOZU_TEST_AUTO(badG2)
 {
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 7528ce12..7cd6b2db 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -97,123 +97,6 @@ std::string toHexStr(const G2& P)
 	return toHexStr(xy, 96);
 }
 
-/*
-	z = sqrt(u/v) = (uv^7) (uv^15)^((p^2-9)/16) * root4
-	return true if found
-*/
-bool sqr_div(const MapTo& mapto, Fp2& z, const Fp2& u, const Fp2& v)
-{
-	Fp2 gamma, t1, t2;
-	Fp2::sqr(gamma, v); // v^2
-	Fp2::sqr(t2, gamma); // v^4
-	Fp2::mul(t1, u, v); // uv
-	t1 *= gamma; // uv^3
-	t1 *= t2; // uv^7
-	Fp2::sqr(t2, t2); // v^8
-	t2 *= t1;
-	Fp2::pow(gamma, t2, mapto.sqrtConst);
-	gamma *= t1;
-	Fp2 candi;
-	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(mapto.root4); i++) {
-		Fp2::mul(candi, gamma, mapto.root4[i]);
-		Fp2::sqr(t1, candi);
-		t1 *= v;
-		if (t1 == u) {
-			z = candi;
-			return true;
-		}
-	}
-	z = gamma;
-	return false;
-}
-
-// Proj
-void py_ecc_iso_map_G2(const MapTo& mapto, G2& Q, const E2& P)
-{
-	Fp2 zpows[3];
-	zpows[0] = P.z;
-	Fp2::sqr(zpows[1], zpows[0]);
-	Fp2::mul(zpows[2], zpows[1], zpows[0]);
-	Fp2 mapvals[4];
-	mapto.evalPoly(mapvals[0], P.x, zpows, mapto.xnum);
-	mapto.evalPoly(mapvals[1], P.x, zpows, mapto.xden);
-	mapto.evalPoly(mapvals[2], P.x, zpows, mapto.ynum);
-	mapto.evalPoly(mapvals[3], P.x, zpows, mapto.yden);
-	mapvals[1] *= P.z;
-	mapvals[2] *= P.y;
-	mapvals[3] *= P.z;
-	Fp2::mul(Q.z, mapvals[1], mapvals[3]);
-	Fp2::mul(Q.x, mapvals[0], mapvals[3]);
-	Fp2::mul(Q.y, mapvals[1], mapvals[2]);
-}
-
-// https://github.com/ethereum/py_ecc
-void py_ecc_optimized_swu_G2(const MapTo& mapto, E2& P, const Fp2& t)
-{
-	Fp2 t2, t2xi, t2xi2;
-	Fp2::sqr(t2, t);
-	mapto.mul_xi(t2xi, t2);
-	Fp2::sqr(t2xi2, t2xi);
-	Fp2 nume, deno;
-	// (t^2 * xi)^2 + (t^2 * xi)
-	Fp2::add(deno, t2xi2, t2xi);
-	Fp2::add(nume, deno, 1);
-	nume *= mapto.g2B;
-	if (deno.isZero()) {
-		mapto.mul_xi(deno, mapto.g2A);
-	} else {
-		deno *= -mapto.g2A;
-	}
-	Fp2 u, v;
-	{
-		Fp2 deno2, tmp, tmp1, tmp2;
-		Fp2::sqr(deno2, deno);
-		Fp2::mul(v, deno2, deno);
-
-		Fp2::mul(u, mapto.g2B, v);
-		Fp2::mul(tmp, mapto.g2A, nume);
-		tmp *= deno2;
-		u += tmp;
-		Fp2::sqr(tmp, nume);
-		tmp *= nume;
-		u += tmp;
-	}
-	Fp2 candi;
-	bool success = sqr_div(mapto, candi, u, v);
-	P.y = candi;
-	candi *= t2;
-	candi *= t;
-	u *= t2xi2;
-	u *= t2xi;
-	bool success2 = false;
-	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(mapto.etas); i++) {
-		Fp2 t1;
-		Fp2::mul(t1, mapto.etas[i], candi);
-		Fp2::sqr(t2, t1);
-		t2 *= v;
-		if (t2 == u && !success && !success2) {
-			P.y = t1;
-			success2 = true;
-		}
-	}
-	assert(success || success2);
-	if (!success) {
-		nume *= t2xi;
-	}
-	if (mapto.isNegSign(t) != mapto.isNegSign(P.y)) {
-		Fp2::neg(P.y, P.y);
-	}
-	P.y *= deno;
-	P.x = nume;
-	P.z = deno;
-}
-// Proj
-void py_ecc_map_to_curve_G2(const MapTo& mapto, G2& out, const Fp2& t)
-{
-	E2 P;
-	py_ecc_optimized_swu_G2(mapto, P, t);
-	py_ecc_iso_map_G2(mapto, out, P);
-}
 /*
 	in : Proj [X:Y:Z]
 	out : Jacobi [A:B:C]
@@ -231,50 +114,6 @@ void toJacobi(G2& out, const G2& in)
 	out.z = in.z;
 }
 
-void py_ecc_hash_to_G2(const MapTo& mapto, G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
-{
-	Fp2 t1, t2;
-	hashToFp2old(t1, msg, msgSize, 0, dst, dstSize);
-	hashToFp2old(t2, msg, msgSize, 1, dst, dstSize);
-	G2 P1, P2;
-	py_ecc_map_to_curve_G2(mapto, P1, t1);
-	py_ecc_map_to_curve_G2(mapto, P2, t2);
-	toJacobi(P1, P1);
-	toJacobi(P2, P2);
-	P1 += P2;
-	mapto.clear_h2(out, P1);
-}
-
-void ethMsgToG2test(const std::string& fileName)
-{
-	const char *dst = "\x02";
-	printf("name=%s\n", fileName.c_str());
-	std::ifstream ifs(fileName.c_str());
-	Uint8Vec buf;
-	G2 out;
-	for (;;) {
-		std::string msg, zero, ret;
-		ifs >> msg >> zero >> ret;
-		if (zero != "00") break;
-		buf = fromHexStr(msg);
-		ethMsgToG2(out, buf.data(), buf.size(), dst, strlen(dst));
-		std::string s = toHexStr(out);
-		CYBOZU_TEST_EQUAL(s, ret);
-	}
-}
-
-void ethMsgToG2testAll(const std::string& dir)
-	try
-{
-	cybozu::FileList list = cybozu::GetFileList(dir);
-	for (size_t i = 0; i < list.size(); i++) {
-		const cybozu::FileInfo& info = list[i];
-		ethMsgToG2test(dir + "/" + info.name);
-	}
-} catch (...) {
-	printf("skip test because `%s` is not found\n", dir.c_str());
-}
-
 void testHMAC()
 {
 	const char *key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b";
@@ -286,254 +125,6 @@ void testHMAC()
 	CYBOZU_TEST_EQUAL(out, expect);
 }
 
-void testHashToFp2()
-{
-	const char *msg = "the message to be signed";
-	const char *dst = "\x02";
-	const char *outS = "0xe54bc0f2e26071a79ba5fe7ae5307d39cf5519e581e03b43f39a431eccc258fa1477c517b1268b22986601ee5caa5ea 0x17e8397d5e687ff7f915c23f27fe1ca2c397a7df91de8c88dc82d34c9188a3ef719f9f20436ea8a5fe7d509fbc79214d";
-	Fp2 out, ok;
-	ok.setStr(outS);
-	ethMsgToFp2(out, msg, strlen(msg), 0, dst, strlen(dst));
-	CYBOZU_TEST_EQUAL(out, ok);
-}
-
-void ethMsgToG2test()
-{
-	const char *msg = "the message to be signed";
-	const char *dst = "\x02";
-	const PointStr outS = {
-		{
-			"0x29670bca15e948605ae32ac737b719f926bc8cb99e980bf0542cada47f71a9f299f4d8c332776da38c8768ea719911",
-			"0x111b35c14e065f0af7bb2697cba31bd21f629c0d42f75411340ae608df3bc2572b746935a788caa6ef10014ee02a0bf0",
-		},
-		{
-			"0xe99fd88ee5bd8272483b498245a59b34a22d4820cdd564fc044510210e6d8da62752ac467dac6421b330b2f62385305",
-			"0x199c95bcff2d9ae3486d12892740a35904deddc63d33d1080d498fbe1ce468a8efeb9d62e183c71f0a3bf58422e2f1a2",
-		},
-		{
-			"0x147428ea49f35d9864bfc6685e0651f340f1201082c9dce4b99c72d45bf2d4deda4dcb151cefdfd1dd224c8bb85c8a71",
-			"0x7a14a1a0a8a27423e5d912879fec8054ae95f035642e3806fa514b9f1dbbb2bc1144dac067c52305e60e8bc421ad5b4",
-		},
-	};
-	G2 out, ok;
-	set(ok, outS);
-	ethMsgToG2(out, msg, strlen(msg), dst, strlen(dst));
-	CYBOZU_TEST_EQUAL(out, ok);
-}
-
-template<class T>
-void py_eccTest(const T& mapto)
-{
-	/*
-		testHashToBaseFP2
-		https://github.com/status-im/nim-blscurve/blob/de64516a5933a6e8ebb01a346430e61a201b5775/blscurve/hash_to_curve.nim#L492
-	*/
-	{
-		const char *msg = "msg";
-		uint8_t ctr = 0;
-		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_";
-		const char *expect = "18df4dc51885b18ca0082a4966b0def46287930b8f1c0b673b11ac48d19c8899bc150d83fd3a7a1430b0de541742c1d4 14eef8ca34b82d065d187a3904cb313dbb44558917cc5091574d9999b5ecfdd5af2fa3aea6e02fb253bf4ae670e72d55";
-		Fp2 x;
-		ethMsgToFp2(x, msg, strlen(msg), ctr, dst, strlen(dst));
-		CYBOZU_TEST_EQUAL(toHexStr(x), expect);
-	}
-	{
-		const Fp2Str u0s = {
-			"0x004ad233c619209060e40059b81e4c1f92796b05aa1bc6358d65e53dc0d657dfbc713d4030b0b6d9234a6634fd1944e7",
-			"0x0e2386c82713441bc3b06a460bd81850f4bf376ea89c80b18c0881e855c58dc8e83b2fd23af983f4786508e30c42af01",
-		};
-		const Fp2Str u1s = {
-			"0x08a6a75e0a8d32f1e096f29047ea879dd34a5504218d7ce92c32c244786822fb73fbf708d167ad86537468249ec6df48",
-			"0x07016d0e5e13cd65780042c6f7b4c74ae1c58da438c99582696818b5c229895b893318dcb87d2a65e557d4ebeb408b70",
-		};
-		// return value of opt_swu2_map in bls_sigs_ref/python-impl/opt_swu_g2.py
-		const Fp2Str xs = {
-			"0x4861c41efcc5fc56e62273692b48da25d950d2a0aaffb34eff80e8dbdc2d41ca38555ceb8554368436aea47d16056b5",
-			"0x9db5217528c55d982cf05fc54242bdcd25f1ebb73372e00e16d8e0f19dc3aeabdeef2d42d693405a04c37d60961526a",
-		};
-		const Fp2Str ys = {
-			"0x177d05b95e7879a7ddbd83c15114b5a4e9846fde72b2263072dc9e60db548ccbadaacb92cc4952d4f47425fe3c5e0172",
-			"0xfc82c99b928ed9df12a74f9215c3df8ae1e9a3fa54c00897889296890b23a0edcbb9653f9170bf715f882b35c0b4647",
-		};
-		Fp2 u0, u1, x, y;
-		set(u0, u0s);
-		set(u1, u1s);
-		set(x, xs);
-		set(y, ys);
-		G2 P;
-		ethFp2ToG2(P, u0, &u1);
-		P.normalize();
-		CYBOZU_TEST_EQUAL(P.x, x);
-		CYBOZU_TEST_EQUAL(P.y, y);
-	}
-	{
-		// https://media.githubusercontent.com/media/ethereum/eth2.0-spec-tests/v0.10.1/tests/general/phase0/bls/sign/small/sign_case_11b8c7cad5238946/data.yaml
-		const char *secs = "47b8192d77bf871b62e87859d653922725724a5c031afeabc60bcef5ff665138";
-		const char msg[33] = {};
-		const PointStr sigs = {
-			{
-				"2293012529822761631014706649736058250445440108079005633865844964288531978383212702502746862140143627562812967825888",
-				"1475696770777687381853347234154288535008294218073605500048435508284141334771039537063168112498702685312150787094910",
-			},
-			{
-				"1469299105114671507318396580458717074245984116935623233990667855919962974356517750849608590897738614199799891365360",
-				"2030012464923141446228430710552804525466499055365665031199510204412192520245701820596000835423160058948948207746066",
-			},
-			{
-				"3767430478723640173773019527754919617225964135305264831468522226308636862085707682484234512649553124965049251340541",
-				"1620434249170283311052688271749383011546709139865619017626863134580828776106815964830529695055765742705622363756158",
-			}
-		};
-		const char *expect = "b2deb7c656c86cb18c43dae94b21b107595486438e0b906f3bdb29fa316d0fc3cab1fc04c6ec9879c773849f2564d39317bfa948b4a35fc8509beafd3a2575c25c077ba8bca4df06cb547fe7ca3b107d49794b7132ef3b5493a6ffb2aad2a441";
-		Fr sec;
-		sec.setStr(secs, 16);
-		G2 P1, P2, Q;
-		set(Q, sigs);
-		Q.deserializeHexStr(expect);
-		const char *dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_";
-		const size_t dstSize = strlen(dst);
-		const size_t msgSize = 32;
-		Fp2 t1, t2;
-		ethMsgToFp2(t1, msg, msgSize, 0, dst, dstSize);
-		ethMsgToFp2(t2, msg, msgSize, 1, dst, dstSize);
-		py_ecc_map_to_curve_G2(mapto, P1, t1);
-		py_ecc_map_to_curve_G2(mapto, P2, t2);
-		const PointStr ss = {
-			{
-				"1972340536407012813644167184956896760015950618902823780657111692209122974250648595689834944711427684709284318183285",
-				"2952312506825835541808570850755873891927945826649651965587037814445801597710562388482713867284483531575836668891717",
-			},
-			{
-				"2802951456840474233717338518518040462806475389210379447165158098937491293557221993219251045678976553989024259770721",
-				"2695848095528813794114709219550802586214789808214026789183854152760661360110019071654047951530688159586363471282307",
-			},
-			{
-				"1480478729322062079370070638002133449414477155913782123147952976030053267833796311564176542916706247537348236105579",
-				"3253481872910728113595595353980041952789112074899014850028493351493155577726278005524067083458491999010934020984031",
-			}
-		};
-		toJacobi(P1, P1);
-		toJacobi(P2, P2);
-		P1 += P2;
-		G2 P11;
-		set(P11, ss);
-		toJacobi(P11, P11);
-		CYBOZU_TEST_EQUAL(P1, P11);
-		const PointStr clears = {
-			{
-				"1957332172874233660214089655571851577083897125827848734477574606688306573833007308344920242234605652569670194263389",
-				"1116411061540418343539740639798030171984762250397980084002067231825141620343376868772345493606425790045780405764984",
-			},
-			{
-				"1009600579479639236035097803661439342927513547544039095581093451111718225564873663970283187908867141796447259993680",
-				"1036550257360332982249682819433119008785814033355112815293516573225867246356464383591412294871954385805192773093413",
-			},
-			{
-				"1455356692682887406712747484663891805342757123109829795478648571883713143907445859929832639473694165616164972254859",
-				"625703068888812559481386371501827420717093467297957594257224036896125014497486535098535016737064365426613580045089",
-			},
-		};
-		set(P11, clears);
-		mapto.clear_h2(P1, P1);
-		toJacobi(P11, P11);
-		CYBOZU_TEST_EQUAL(P1, P11);
-		py_ecc_hash_to_G2(mapto, P1, msg, msgSize, dst, dstSize);
-		CYBOZU_BENCH_C("py_ecc_hash_to_G2", 1000, py_ecc_hash_to_G2, mapto, P1, msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(P1, P11);
-		ethMsgToG2(P1, msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(P1, P11);
-		set(P11, sigs);
-		toJacobi(P11, P11);
-		P1 *= sec;
-		CYBOZU_TEST_EQUAL(P1, P11);
-		CYBOZU_TEST_EQUAL(P1.serializeToHexStr(), expect);
-	}
-}
-
-template<class T>
-void testSign(const T& mapto)
-{
-	Fp H = -1;
-	H /= 2;
-	const size_t N = 4;
-	const Fp tbl[N] = { 0, 1, H, H + 1 };
-	const int expect[N][N] = {
-		{  1, 1, 1, -1 },
-		{  1, 1, 1, -1 },
-		{  1, 1, 1, -1 },
-		{ -1, 1, 1, -1 },
-	};
-	Fp2 t;
-	for (size_t i = 0; i < N; i++) {
-		t.a = tbl[i];
-		for (size_t j = 0; j < N; j++) {
-			t.b = tbl[j];
-			CYBOZU_TEST_EQUAL(mapto.isNegSign(t), (expect[i][j] < 0));
-		}
-	}
-}
-
-template<class T>
-void osswu2_helpTest(const T& mapto)
-{
-	const struct {
-		Fp2Str t;
-		Fp2Str x;
-		Fp2Str y;
-		Fp2Str z;
-	} tbl[] = {
-		{
-			{
-				"0xe54bc0f2e26071a79ba5fe7ae5307d39cf5519e581e03b43f39a431eccc258fa1477c517b1268b22986601ee5caa5ea",
-				"0x17e8397d5e687ff7f915c23f27fe1ca2c397a7df91de8c88dc82d34c9188a3ef719f9f20436ea8a5fe7d509fbc79214d",
-			},
-			{
-				"0x11d568058220b1826cacde2e367beef98ea1edfde5fbf0491231b7ffdfc867e5269f9cfe65347c32ead182ba6b8c3ba1",
-				"0x19f2778213e671ac444b1b579bfdf4e7fabeed9626dc909ce243b60397a6b5f65af0fbbe02a43c1e289f28c927012da1",
-			},
-			{
-				"0xfe17bc695a84ec060b6287a4e77a50f65ba8f2c6c433f8131036ddfe34e3071d1cb71c0000f6bcfada947b19d8588df",
-				"0xb76abd285945f787721e7e306895149523941586ac44f25a294c406a70ed570020992025aa307777cfe6c590567dfbe",
-			},
-			{
-				"0x1910249ae63241608e013eb13578b9b3d96774d35e5732fc75efd17c212dd310d7f4016d6f212f62f33d34f10252e3e3",
-				"0xdcd076cea67c76a6d0594c8f30c8cd8e9ead24f90870f723228f2203a55e04a5517c426ea2c4bae9d37a11c3d0f1912",
-			},
-		},
-		{
-			{
-				"0x2a8663422cc279aa8591819195a62cfd57357b7bcb6f4a9174275c2e2e754fb23e2f8a444d0d164990dc03dcb95a129",
-				"0x15cf611083511955a70fdcc80cb08c6e22b8043a3038065251d4d3f82c6051bac4933e41d589514c42fba13f78f297ef",
-			},
-			{
-				"0x74ee12dce0c9a8836017172b562ebe491273964dd63df71dea6eb778cd9040e8c9a7136e745013c1def93cc57ef0dae",
-				"0xedce8fa83a2435a796d207943b14ea4d1a9850e10a6c2035912f1c5bd579e9cabc54027b87a779af28f380cc5edc8a6",
-			},
-			{
-				"0x11367627461d742b4afac12bd789f1437787f2dc675cf2c7896f004ab8480c06cd06589748d8b9791b4969763962f73c",
-				"0x101d8e4c1598e72d943dad4695cfa74236d5065345f1e62e62c75ca30cb0c41c3f6197d7c57d46e8cdd07845d77e1e34",
-			},
-			{
-				"0x3952479e45a0826275c1481fbd78a2b4c5076b6a5cd4ad7e132c1ec460dcaef504943e2c6a969ba182e230da3850b4",
-				"0x13b8e64e2e233d1dc4506360c3bff93535642c2d3115c53c049e287e35c03212be882f0618cc50557e55b42be53e4893",
-			},
-		},
-	};
-	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
-		Fp2 t, x, y, z;
-		E2 P;
-		set(t, tbl[i].t);
-		set(x, tbl[i].x);
-		set(y, tbl[i].y);
-		set(z, tbl[i].z);
-		mapto.osswu2_help(P, t);
-		CYBOZU_TEST_EQUAL(P.x, x);
-		CYBOZU_TEST_EQUAL(P.y, y);
-		CYBOZU_TEST_EQUAL(P.z, z);
-//		CYBOZU_TEST_ASSERT(P.isValid());
-	}
-}
-
 void addTest()
 {
 	const struct Tbl {
@@ -653,39 +244,6 @@ void iso3Test(const T& mapto)
 	CYBOZU_TEST_EQUAL(Q1, Q2);
 }
 
-void ethFp2ToG2test()
-{
-	const Fp2Str t1s = {
-		"0xafcfb20d836159f0cfb6f48c0ed808fd97a1cd1b9f1eb14451ff59e3884b1bf7665406cce673d434dde6933bdcf0ec9",
-		"0x36714c33fa9c79b0bb9ac963f57b2d2b2659e211893e64292ee2a8c1259b1a834a769782bae17202b537a1fe901c55e",
-	};
-	const Fp2Str t2s = {
-        "0xb9a2f39af0cc3264348ed00845545e2ccbed59ea541c726c8429871f9a0917fb4f7e049ac739065eea8354a2d1b2d21",
-		"0xc8810a06deb536d70531352bd2a3aac7496e187a8fc102d800c5f8ed839bd64d7102197aeb2b6164d20ff920ff63afe",
-	};
-	const PointStr t1t2s = {
-		{
-			"0x126b4982298792ed049850bb92b55d26c33a8e3139f9ca1a20821496c7396ce5ad9042b0da529e60ec9c3ff8e983befe",
-			"0x11c1d2f6a6a81e1f82dee2278968326e23e6ae469252a51d86673bd8fb333b7bca615b63a068692ff419c5f3e388797b",
-		},
-		{
-			"0x92468e5829b26cc976aff103403b4b5304dd206228c6eb84ecf7b45709307390bf29dced39f9aa037b014ad6fb5a6e4",
-			"0x5bd54eef1fdade89c98ab5c27d3dd9e18868af4250ff3a49de71d060ab62b7be039a3b2a8ef0c870d9021f6eae22029",
-		},
-		{
-			"0x154920adb9d857620c2835f4a5445bda35da53411710d559b18430f1b48c7cf2048cc275e0a9e01436d355f76fa0a9ec",
-			"0xccc404e5d17aa51f7669402916cf86587ce7cd9c657e90b05d7c8860940f741e62628df420d92c659d159d4b7683cce",
-		},
-	};
-	Fp2 t1, t2;
-	set(t1, t1s);
-	set(t2, t2s);
-	G2 P1, P2;
-	set(P1, t1t2s);
-	ethFp2ToG2(P2, t1, &t2);
-	CYBOZU_TEST_EQUAL(P1, P2);
-}
-
 void testVec(const char *file)
 {
 	std::ifstream ifs(file);
@@ -711,147 +269,6 @@ void testVec(const char *file)
 	}
 }
 
-template<class T>
-void py_eccTest2(const T& mapto)
-{
-	Fp2Str ts = {
-		"1918231859236664604157448091070531325862162392395253569013354101088957561890652491757605826252839368362075816084620",
-		"1765592454498940438559713185757713516213027777891663285362602185795653989012303939547547418058658378320847225866857",
-	};
-	PointStr out1s = {
-		{
-			"3927184272261705576225284664838663573624313247854459615864888213007837227449093837336748448846489186151562481034580",
-			"1903293468617299241460799312855927163610998535569367868293984916087966126786510088134190993502241498025510393259948",
-		},
-		{
-			"3991322739214666504999201807778913642377537002372597995520099276113880862779909709825029178857593814896063515454176",
-			"2999367925154329126226224834594837693635617675385117964685771461463180146028553717562548600391126160503718637741311",
-		},
-		{
-			"2578853905647618145305524664579860566455691148296386065391659245709237478565628968511959291772795541098532647163712",
-			"3910188857576114167072883940429120413632909260968721432280195359371907407125083761682822023489835923188989938783197",
-		},
-	};
-	PointStr out2s = {
-		{
-			"3257676086538823567761244186080544403330427395946948635449582231233180442322077484215757257097813156392664917178234",
-			"228537154970146118588036771068753907531432250550232803895899422656339347346840810590265440478956079727608969412311",
-		},
-		{
-			"2211656311977487430400091470761449132135875543285725344573261083165139360734602590585740129428161178745780787382986",
-			"40258781102313547933704047733645277081466097003572358028270922475602169023300010845551344432311507156784289541037",
-		},
-		{
-			"3554635405737095173231135338330740471713348364117258010850826274365262386961694608537862757803628655357449929362973",
-			"3305133470803621861948711123350198492693369595391902116552614265910644738630055172693143208260379598437272858586799",
-		},
-	};
-	Fp2 t;
-	set(t, ts);
-	E2 p, q;
-	py_ecc_optimized_swu_G2(mapto, p, t);
-	set(q, out1s);
-	CYBOZU_TEST_EQUAL(p.x, q.x);
-	CYBOZU_TEST_EQUAL(p.y, q.y);
-	CYBOZU_TEST_EQUAL(p.z, q.z);
-	G2 P, Q;
-	set(P, out2s);
-	py_ecc_map_to_curve_G2(mapto, Q, t);
-	CYBOZU_TEST_EQUAL(P, Q);
-}
-
-template<class T>
-void testHashToFp2v6(const T& mapto)
-{
-	const struct {
-		const char *msg;
-		const char *dst;
-		const Fp2Str s[2];
-	} tbl[] = {
-		{
-			// from draft-irtf-cfrg-hash-to-curve/poc/vectors/BLS12381G2_XMD:SHA-256_SSWU_RO_.json.swp
-			"abc",
-			"BLS12381G2_XMD:SHA-256_SSWU_RO_TESTGEN",
-			{
-				{
-					"0x0b7b2d371fc970671ddf7bc9ca4a70a1bd286af4487b497e460c0b44d405d73db576f8a08d59416cc976d4b1d0100775",
-					"0x0e86d0eb2d34c34fe8b2a1f2d999fa3dabcd504fdb4beb57e79756b08fd75b0a82660abc6026ecc4ccf327a522587b38",
-				},
-				{
-					"0x10376d048c060df1c5017a363144c482892fe2ce0061094327b8bbe49a713ce795726aa23b5402a271e9f1e7b9b6c7ba",
-					"0x0117f2ea63015e192d759f11a658a002e06112147d90f00d7429722456b9a1c63fef2dbe8df13168e3bd40af2fb959f3",
-				},
-			}
-		},
-		{
-			"asdf",
-			"QUUX-V01-CS02",
-			{
-				{
-					"2036684013374073670470642478097435082393965905216073159069132582313283074894808330704754509140183015844408257838394",
-					"1442095344782436377607687657711937282361342321405422912347590889376773969332935605209326528060836557922932229521614",
-				},
-				{
-					"712603160732423529538850938327197859251773848793464448294977148617985113767869616209273456982966659285651019780554",
-					"3549454379036632156704729135192770954406411172309331582430747991672599371642148666322072960024366511631069032927782",
-				},
-			}
-		},
-		{
-			"asdf",
-			"BLS_SIG_BLS12381G2-SHA256-SSWU-RO-_POP_",
-			{
-				{
-					"1184058645632270717238802026167521675640665254051621677891229161275546248273726163051942698406031256547695641333159",
-					"2796840541941870488250990266864713579761728392052042558603386652320835698725612365412314296122895578014688997245820",
-				},
-				{
-					"1432011693332698211658748968085869636612625272476301004513458304498234062483485462991424286092448663756703927705584",
-					"3596297820733241889565943496970554637589864863833863117721478512486741539397910569381754340032782454436609027606827",
-				},
-			}
-		},
-	};
-	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
-		const char *msg = tbl[i].msg;
-		const char *dst = tbl[i].dst;
-		const Fp2Str *expectStr = tbl[i].s;
-		Fp2 out[2];
-		mapto.hashToFp2(out, msg, strlen(msg), dst, strlen(dst));
-		Fp2 expect[2];
-		for (int j = 0; j < 2; j++) {
-			set(expect[j], expectStr[j]);
-			CYBOZU_TEST_EQUAL(out[j], expect[j]);
-		}
-		if (i == 0) {
-			// from draft-irtf-cfrg-hash-to-curve/poc/vectors/BLS12381G2_XMD:SHA-256_SSWU_RO_.json.swp
-			const Fp2Str xys[] = {
-				{
-					"0x0b6d276d0bfbddde617a9ab4c175b07c9c4aecad2cdd6cc9ca541b61334a69c58680ef5692bbad03d2f572838df32b66",
-					"0x139e9d78ff6d9d163f979d14a64c5e57f82f1ef7e42ece338b571a9e92c0666f0f6bf1a5fc21e2d32bcb6432eab7037c",
-				},
-				{
-					"0x022f9ee5d596d06c5f2f735c3c5f743978f79fd57bf7d4291e221227f490d3f276066de9f9edc89c57e048ef4cf0ef72",
-					"0x14dd23517516a80d1d840e34f51dfb76946c7670fca0f36ad8ec9bde4ea82dfae119a21b076519bcc1c00152989a4d45",
-				},
-			};
-			G2 P;
-			mapto.opt_swu2_map(P, out[0], &out[1]);
-			P.normalize();
-			Fp2 t;
-			set(t, xys[0]);
-			CYBOZU_TEST_EQUAL(P.x, t);
-			set(t, xys[1]);
-			CYBOZU_TEST_EQUAL(P.y, t);
-		}
-	}
-	G2 P;
-	mcl::bn::hashAndMapToG2(P, "asdf", 4);
-	CYBOZU_BENCH_C("draft06 hashAndMapToG2", 1000, mcl::bn::hashAndMapToG2, P, "asdf", 4);
-	P.normalize();
-//	printf("P=%s %s\n", P.x.getStr(10).c_str(), P.y.getStr(10).c_str());
-}
-
 template<class T>
 void testHashToFp2v7(const T& mapto)
 {
@@ -1225,24 +642,11 @@ CYBOZU_TEST_AUTO(test)
 {
 	initPairing(mcl::BLS12_381);
 	Fp::setETHserialization(true);
-	bn::setMapToMode(MCL_MAP_TO_MODE_WB19);
+	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_07);
 	const MapTo& mapto = BN::param.mapTo.mapTo_WB19_;
-	py_eccTest(mapto);
-	py_eccTest2(mapto);
-	osswu2_helpTest(mapto);
 	addTest();
 	iso3Test(mapto);
-	testSign(mapto);
-	ethFp2ToG2test();
 	testHMAC();
-	testHashToFp2();
-	ethMsgToG2test();
-	testVec("../misc/mapto/fips_186_3_B233.txt");
-	testVec("../misc/mapto/misc.txt");
-	ethMsgToG2testAll("../bls_sigs_ref/test-vectors/hash_g2/");
-	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);
-	testHashToFp2v6(mapto);
-	bn::setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE_07);
 	testHashToFp2v7(mapto);
 	testEth2phase0();
 	testSswuG1(mapto);

From c193bb4cfe6fb5d69feb5e8e359117a83e9f28a5 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 6 Jul 2020 18:30:08 +0900
Subject: [PATCH 252/553] mclBn_eth* functions are removed

---
 include/mcl/bn.h               | 27 ---------------------------
 include/mcl/bn.hpp             | 33 ---------------------------------
 include/mcl/impl/bn_c_impl.hpp | 15 ---------------
 readme.md                      |  1 +
 test/mapto_wb19_test.cpp       | 25 -------------------------
 5 files changed, 1 insertion(+), 100 deletions(-)

diff --git a/include/mcl/bn.h b/include/mcl/bn.h
index 1396d6b1..d553066e 100644
--- a/include/mcl/bn.h
+++ b/include/mcl/bn.h
@@ -206,33 +206,6 @@ MCLBN_DLL_API int mclBn_getETHserialization(void);
 */
 MCLBN_DLL_API int mclBn_setMapToMode(int mode);
 
-/*
-	the next three functions are auxiliary of the new eth 2.0 spec
-	these always return 0 if MCL_BLS12_381 is set
-*/
-/*
-	set out to hash of (msg[msgSize], ctr, dst[dstSize])
-	return 0 if success
-	@note append zero byte to msg if necessary
-*/
-// deprecated
-MCLBN_DLL_API int mclBn_ethMsgToFp2(mclBnFp2 *out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize);
-
-/*
-	set out to hash of (t1, t2)
-	allow t2 is NULL
-	return 0 if success
-*/
-MCLBN_DLL_API int mclBn_ethFp2ToG2(mclBnG2 *out, const mclBnFp2 *t1, const mclBnFp2 *t2);
-
-/*
-	set out to hash of (msg[msgSize], dst[dstSize])
-	@note append zero byte to msg if necessary
-	return 0 if success
-*/
-// deprecated
-MCLBN_DLL_API int mclBn_ethMsgToG2(mclBnG2 *out, const void *msg, size_t msgSize, const void *dst, size_t dstSize);
-
 ////////////////////////////////////////////////
 /*
 	deserialize
diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 13e43e91..427ddf39 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -2173,39 +2173,6 @@ inline const G1& getG1basePoint()
 	return BN::param.basePoint;
 }
 
-inline const Fr& getG2cofactorAdj()
-{
-	return BN::param.mapTo.g2cofactorAdj_;
-}
-
-inline const Fr& getG2cofactorAdjInv()
-{
-	return BN::param.mapTo.g2cofactorAdjInv_;
-}
-
-// deprecated
-inline bool ethMsgToFp2(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
-{
-	if (!BN::param.isBLS12) return false;
-	hashToFp2old(out, msg, msgSize, ctr, dst, dstSize);
-	return true;
-}
-
-inline bool ethFp2ToG2(G2& out, const Fp2& t1, const Fp2 *t2 = 0)
-{
-	if (!BN::param.isBLS12) return false;
-	BN::param.mapTo.mapTo_WB19_.opt_swu2_map(out, t1, t2);
-	return true;
-}
-
-// deprecated
-inline bool ethMsgToG2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
-{
-	if (!BN::param.isBLS12) return false;
-	BN::param.mapTo.mapTo_WB19_.map2curve_osswu2(out, msg, msgSize, dst, dstSize);
-	return true;
-}
-
 } } // mcl::bn
 
 namespace mcl { namespace local {
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index 1cca8383..cf0b5475 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -135,21 +135,6 @@ int mclBn_setMapToMode(int mode)
 	return setMapToMode(mode) ? 0 : -1;
 }
 
-int mclBn_ethMsgToFp2(mclBnFp2 *out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
-{
-	return mcl::bn::ethMsgToFp2(*cast(out), msg, msgSize, ctr, dst, dstSize) ? 0 : -1;
-}
-
-int mclBn_ethFp2ToG2(mclBnG2 *out, const mclBnFp2 *t1, const mclBnFp2 *t2)
-{
-	return mcl::bn::ethFp2ToG2(*cast(out), *cast(t1), cast(t2)) ? 0 : -1;
-}
-
-int mclBn_ethMsgToG2(mclBnG2 *out, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
-{
-	return mcl::bn::ethMsgToG2(*cast(out), msg, msgSize, dst, dstSize) ? 0 : -1;
-}
-
 ////////////////////////////////////////////////
 // set zero
 void mclBnFr_clear(mclBnFr *x)
diff --git a/readme.md b/readme.md
index c1b1b569..1762e11c 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+- `mclBn_eth*` functions are removed.
 - `mcl::bn::mapToG1(G1& out, const Fp& v)` supports `BLS12_MAP_FP_TO_G1` in [EIP 2537](https://eips.ethereum.org/EIPS/eip-2537).
 - `mcl::bn::hashAndMapToG1(G1& out, const void *msg, size_t msgSize)` supports ([hash-to-curve-09 BLS12381G1_XMD:SHA-256_SSWU_RO_](https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.html#name-bls12381g1_xmdsha-256_sswu_))
 - `MCL_MAP_TO_MODE_HASH_TO_CURVE_07` is added for [hash-to-curve-draft-07](https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/07/).
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 7cd6b2db..c40c1298 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -244,31 +244,6 @@ void iso3Test(const T& mapto)
 	CYBOZU_TEST_EQUAL(Q1, Q2);
 }
 
-void testVec(const char *file)
-{
-	std::ifstream ifs(file);
-	if (!ifs) {
-		printf("skip testVec because `%s` is not found\n", file);
-	}
-	printf("testVec %s\n", file);
-	Fp2 t1, t2;
-	G2 out, P;
-	std::string s;
-	for (;;) {
-		ifs >> s;
-		if (s != "t1") break;
-		ifs >> t1;
-		ifs >> s;
-		CYBOZU_TEST_EQUAL(s, "t2");
-		ifs >> t2;
-		ifs >> s;
-		CYBOZU_TEST_EQUAL(s, "out");
-		ifs >> out.x >> out.y >> out.z;
-		ethFp2ToG2(P, t1, &t2);
-		CYBOZU_TEST_EQUAL(P, out);
-	}
-}
-
 template<class T>
 void testHashToFp2v7(const T& mapto)
 {

From d79c5acb489ac54a7bd2544f8210c732c0caaa12 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 7 Jul 2020 12:17:19 +0900
Subject: [PATCH 253/553] rename inner functions of MapTo

---
 include/mcl/bn.hpp         |  2 +-
 include/mcl/mapto_wb19.hpp | 38 +++++++++++++++-----------------------
 include/mcl/op.hpp         |  2 +-
 readme.md                  |  1 +
 test/mapto_wb19_test.cpp   |  4 ++--
 5 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 427ddf39..921685f1 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -576,7 +576,7 @@ struct MapTo {
 	bool calc(G2& P, const Fp2& t, bool fast = false) const
 	{
 		if (mapToMode_ == MCL_MAP_TO_MODE_HASH_TO_CURVE_07) {
-			mapTo_WB19_.opt_swu2_map(P, t);
+			mapTo_WB19_.Fp2ToG2(P, t);
 			return true;
 		}
 		if (!mapToEc(P, t)) return false;
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index cefd3ac4..a212811e 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -366,21 +366,17 @@ struct MapTo_WB19 {
 		Fp::neg(y.b, y.b);
 		y.a = t;
 	}
-	bool sgn0(const Fp& x) const
+	bool isNegSign(const Fp& x) const
 	{
 		return x.isOdd();
 	}
-	bool sgn0(const Fp2& x) const
+	bool isNegSign(const Fp2& x) const
 	{
-		bool sign0 = sgn0(x.a);
+		bool sign0 = isNegSign(x.a);
 		bool zero0 = x.a.isZero();
-		bool sign1 = sgn0(x.b);
+		bool sign1 = isNegSign(x.b);
 		return sign0 || (zero0 & sign1);
 	}
-	bool isNegSign(const Fp2& x) const
-	{
-		return sgn0(x);
-	}
 	// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-07#appendix-D.3.5
 	void sswuG1(Fp& xn, Fp& xd, Fp& y, const Fp& u) const
 	{
@@ -425,7 +421,7 @@ struct MapTo_WB19 {
 			y *= u2;
 			y *= u;
 		}
-		if (sgn0(u) != sgn0(y)) {
+		if (isNegSign(u) != isNegSign(y)) {
 			Fp::neg(y, y);
 		}
 	}
@@ -440,7 +436,7 @@ struct MapTo_WB19 {
 		pt.y *= y;
 	}
 	// https://github.com/algorand/bls_sigs_ref
-	void osswu2_help(E2& P, const Fp2& t) const
+	void sswuG2(E2& P, const Fp2& t) const
 	{
 		Fp2 t2, t2xi;
 		Fp2::sqr(t2, t);
@@ -523,11 +519,6 @@ struct MapTo_WB19 {
 		}
 		assert(0);
 	}
-	void clear_h2(G2& Q, const G2& P) const
-	{
-		// 1.9Mclk can be reduced
-		mcl::local::mulByCofactorBLS12fast(Q, P);
-	}
 	template<class T>
 	void put(const T& P) const
 	{
@@ -536,19 +527,18 @@ struct MapTo_WB19 {
 		printf("y=%s\n", P.y.getStr(base).c_str());
 		printf("z=%s\n", P.z.getStr(base).c_str());
 	}
-	void opt_swu2_map(G2& P, const Fp2& t, const Fp2 *t2 = 0) const
+	void Fp2ToG2(G2& P, const Fp2& t, const Fp2 *t2 = 0) const
 	{
 		E2 Pp;
-		osswu2_help(Pp, t);
+		sswuG2(Pp, t);
 		if (t2) {
 			E2 P2;
-			osswu2_help(P2, *t2);
+			sswuG2(P2, *t2);
 			ec::addJacobi(Pp, Pp, P2);
 		}
 		iso3(P, Pp);
-		clear_h2(P, P);
+		mcl::local::mulByCofactorBLS12fast(P, P);
 	}
-	// hash-to-curve-06
 	void hashToFp2(Fp2 out[2], const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
 		uint8_t md[256];
@@ -560,16 +550,17 @@ struct MapTo_WB19 {
 			assert(b); (void)b;
 		}
 	}
-	void map2curve_osswu2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
+	void msgToG2(G2& out, const void *msg, size_t msgSize, const void *dst, size_t dstSize) const
 	{
 		Fp2 t[2];
 		hashToFp2(t, msg, msgSize, dst, dstSize);
-		opt_swu2_map(out, t[0], &t[1]);
+		Fp2ToG2(out, t[0], &t[1]);
 	}
 	void msgToG2(G2& out, const void *msg, size_t msgSize) const
 	{
 		const char *dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
-		map2curve_osswu2(out, msg, msgSize, dst, strlen(dst));
+		const size_t dstSize = strlen(dst);
+		msgToG2(out, msg, msgSize, dst, dstSize);
 	}
 	void FpToG1(G1& out, const Fp& u0, const Fp *u1 = 0) const
 	{
@@ -595,6 +586,7 @@ struct MapTo_WB19 {
 		}
 		FpToG1(out, u[0], &u[1]);
 	}
+
 	void msgToG1(G1& out, const void *msg, size_t msgSize) const
 	{
 		const char *dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 2d2a72eb..ddf9122b 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x121; /* 0xABC = A.BC */
+static const int version = 0x122; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index 1762e11c..45480866 100644
--- a/readme.md
+++ b/readme.md
@@ -315,6 +315,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2020/Jun/07 v1.22 remove old hash-to-curve functions
 - 2020/Jun/04 v1.21 mapToG1 and hashAndMapToG1 are compatible to irtf/eip-2537
 - 2020/May/13 v1.09 support draft-irtf-cfrg-hash-to-curve-07
 - 2020/Mar/26 v1.07 change DST for hash-to-curve-06
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index c40c1298..e7bee9eb 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -240,7 +240,7 @@ void iso3Test(const T& mapto)
 	mapto.iso3(Q2, P);
 	CYBOZU_TEST_EQUAL(Q1, Q2);
 	set(Q1, clearPs);
-	mapto.clear_h2(Q2, Q2);
+	mcl::local::mulByCofactorBLS12fast(Q2, Q2);
 	CYBOZU_TEST_EQUAL(Q1, Q2);
 }
 
@@ -372,7 +372,7 @@ void testHashToFp2v7(const T& mapto)
 			set(P1.x, tbl[i].x);
 			set(P1.y, tbl[i].y);
 			P1.z = 1;
-			mapto.map2curve_osswu2(P2, msg, msgSize, dst, dstSize);
+			mapto.msgToG2(P2, msg, msgSize, dst, dstSize);
 			CYBOZU_TEST_EQUAL(P1, P2);
 		}
 		{

From 526d39af3d0799cad9f21946d88573984160e3e3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 20 Jul 2020 08:43:09 +0900
Subject: [PATCH 254/553] [she] disable exception catching

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 32b2efe4..eef4ccc9 100644
--- a/Makefile
+++ b/Makefile
@@ -339,10 +339,10 @@ ifeq ($(MCL_USE_LLVM),2)
   SHE_C_DEP+=src/base64m.ll
 endif
 ../she-wasm/she_c.js: src/she_c256.cpp $(SHE_C_DEP)
-	emcc -o $@ src/fp.cpp src/she_c256.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=256 -s TOTAL_MEMORY=67108864 -s DISABLE_EXCEPTION_CATCHING=0
+	emcc -o $@ src/fp.cpp src/she_c256.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=256 -s TOTAL_MEMORY=67108864 -s DISABLE_EXCEPTION_CATCHING=1
 
 ../she-wasm/she_c384.js: src/she_c384.cpp $(SHE_C_DEP)
-	emcc -o $@ src/fp.cpp src/she_c384.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -s TOTAL_MEMORY=67108864 -s DISABLE_EXCEPTION_CATCHING=0
+	emcc -o $@ src/fp.cpp src/she_c384.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -s TOTAL_MEMORY=67108864 -s DISABLE_EXCEPTION_CATCHING=1
 
 ../mcl-wasm/mcl_c384_256.js: src/bn_c384_256.cpp $(MCL_C_DEP)
 	emcc -o $@ src/fp.cpp src/bn_c384_256.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions -MD -MP -MF obj/mcl_c384_256.d

From d309a4a28a5b4f422c7cc764206a9c2c0160c7ce Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 19 Jul 2020 18:37:58 +0900
Subject: [PATCH 255/553] add C# bindings

---
 ffi/cs/mcl.sln          |  31 +++
 ffi/cs/mcl/mcl.cs       | 565 ++++++++++++++++++++++++++++++++++++++++
 ffi/cs/mcl/mcl.csproj   |  13 +
 ffi/cs/test/test.cs     | 161 ++++++++++++
 ffi/cs/test/test.csproj |  28 ++
 5 files changed, 798 insertions(+)
 create mode 100644 ffi/cs/mcl.sln
 create mode 100644 ffi/cs/mcl/mcl.cs
 create mode 100644 ffi/cs/mcl/mcl.csproj
 create mode 100644 ffi/cs/test/test.cs
 create mode 100644 ffi/cs/test/test.csproj

diff --git a/ffi/cs/mcl.sln b/ffi/cs/mcl.sln
new file mode 100644
index 00000000..182d04e5
--- /dev/null
+++ b/ffi/cs/mcl.sln
@@ -0,0 +1,31 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.30309.148
+MinimumVisualStudioVersion = 15.0.26124.0
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "test", "test\test.csproj", "{31ABF32C-3DAC-47EF-8B99-B531F88B6FDC}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mcl", "mcl\mcl.csproj", "{73288FB5-7173-4AE9-86C6-F76DF219C37B}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{31ABF32C-3DAC-47EF-8B99-B531F88B6FDC}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{31ABF32C-3DAC-47EF-8B99-B531F88B6FDC}.Debug|x64.Build.0 = Debug|Any CPU
+		{31ABF32C-3DAC-47EF-8B99-B531F88B6FDC}.Release|x64.ActiveCfg = Release|Any CPU
+		{31ABF32C-3DAC-47EF-8B99-B531F88B6FDC}.Release|x64.Build.0 = Release|Any CPU
+		{73288FB5-7173-4AE9-86C6-F76DF219C37B}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{73288FB5-7173-4AE9-86C6-F76DF219C37B}.Debug|x64.Build.0 = Debug|Any CPU
+		{73288FB5-7173-4AE9-86C6-F76DF219C37B}.Release|x64.ActiveCfg = Release|Any CPU
+		{73288FB5-7173-4AE9-86C6-F76DF219C37B}.Release|x64.Build.0 = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {ACBC75BC-5FB9-48DF-A78D-7C6BAD21E647}
+	EndGlobalSection
+EndGlobal
diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
new file mode 100644
index 00000000..d508a485
--- /dev/null
+++ b/ffi/cs/mcl/mcl.cs
@@ -0,0 +1,565 @@
+����using System;
+using System.Text;
+using System.Runtime.InteropServices;
+
+namespace mcl {
+    public class MCL {
+        public const int BN254 = 0;
+        public const int BN_SNARK = 4;
+        public const int BLS12_381 = 5;
+        public const int FR_UNIT_SIZE = 4;
+        public const int FP_UNIT_SIZE = 6; // 4 if mclbn256.dll is used
+
+        public const int G1_UNIT_SIZE = FP_UNIT_SIZE * 3;
+        public const int G2_UNIT_SIZE = FP_UNIT_SIZE * 2 * 3;
+        public const int GT_UNIT_SIZE = FP_UNIT_SIZE * 12;
+
+        public const string dllName = "mclbn384_256";
+        [DllImport(dllName)] public static extern int mclBn_init(int curve, int compiledTimeVar);
+        [DllImport(dllName)] public static extern void mclBnFr_clear(ref Fr x);
+        [DllImport(dllName)] public static extern void mclBnFr_setInt(ref Fr y, int x);
+        [DllImport(dllName)] public static extern int mclBnFr_setStr(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
+        [DllImport(dllName)] public static extern int mclBnFr_isValid(ref Fr x);
+        [DllImport(dllName)] public static extern int mclBnFr_isEqual(ref Fr x, ref Fr y);
+        [DllImport(dllName)] public static extern int mclBnFr_isZero(ref Fr x);
+        [DllImport(dllName)] public static extern int mclBnFr_isOne(ref Fr x);
+        [DllImport(dllName)] public static extern void mclBnFr_setByCSPRNG(ref Fr x);
+
+        [DllImport(dllName)] public static extern int mclBnFr_setHashOf(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern int mclBnFr_getStr([Out] StringBuilder buf, long maxBufSize, ref Fr x, int ioMode);
+
+        [DllImport(dllName)] public static extern void mclBnFr_neg(ref Fr y, ref Fr x);
+        [DllImport(dllName)] public static extern void mclBnFr_inv(ref Fr y, ref Fr x);
+        [DllImport(dllName)] public static extern void mclBnFr_add(ref Fr z, ref Fr x, ref Fr y);
+        [DllImport(dllName)] public static extern void mclBnFr_sub(ref Fr z, ref Fr x, ref Fr y);
+        [DllImport(dllName)] public static extern void mclBnFr_mul(ref Fr z, ref Fr x, ref Fr y);
+        [DllImport(dllName)] public static extern void mclBnFr_div(ref Fr z, ref Fr x, ref Fr y);
+
+        [DllImport(dllName)] public static extern void mclBnFp_clear(ref Fp x);
+        [DllImport(dllName)] public static extern void mclBnFp_setInt(ref Fp y, int x);
+        [DllImport(dllName)] public static extern int mclBnFp_setStr(ref Fp x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
+        [DllImport(dllName)] public static extern int mclBnFp_isValid(ref Fp x);
+        [DllImport(dllName)] public static extern int mclBnFp_isEqual(ref Fp x, ref Fp y);
+        [DllImport(dllName)] public static extern int mclBnFp_isZero(ref Fp x);
+        [DllImport(dllName)] public static extern int mclBnFp_isOne(ref Fp x);
+        [DllImport(dllName)] public static extern void mclBnFp_setByCSPRNG(ref Fp x);
+
+        [DllImport(dllName)] public static extern int mclBnFp_setHashOf(ref Fp x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern int mclBnFp_getStr([Out] StringBuilder buf, long maxBufSize, ref Fp x, int ioMode);
+
+        [DllImport(dllName)] public static extern void mclBnFp_neg(ref Fp y, ref Fp x);
+        [DllImport(dllName)] public static extern void mclBnFp_inv(ref Fp y, ref Fp x);
+        [DllImport(dllName)] public static extern void mclBnFp_add(ref Fp z, ref Fp x, ref Fp y);
+        [DllImport(dllName)] public static extern void mclBnFp_sub(ref Fp z, ref Fp x, ref Fp y);
+        [DllImport(dllName)] public static extern void mclBnFp_mul(ref Fp z, ref Fp x, ref Fp y);
+        [DllImport(dllName)] public static extern void mclBnFp_div(ref Fp z, ref Fp x, ref Fp y);
+        [DllImport(dllName)] public static extern void mclBnG1_clear(ref G1 x);
+        [DllImport(dllName)] public static extern int mclBnG1_setStr(ref G1 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
+        [DllImport(dllName)] public static extern int mclBnG1_isValid(ref G1 x);
+        [DllImport(dllName)] public static extern int mclBnG1_isEqual(ref G1 x, ref G1 y);
+        [DllImport(dllName)] public static extern int mclBnG1_isZero(ref G1 x);
+        [DllImport(dllName)] public static extern int mclBnG1_hashAndMapTo(ref G1 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern long mclBnG1_getStr([Out] StringBuilder buf, long maxBufSize, ref G1 x, int ioMode);
+        [DllImport(dllName)] public static extern void mclBnG1_neg(ref G1 y, ref G1 x);
+        [DllImport(dllName)] public static extern void mclBnG1_dbl(ref G1 y, ref G1 x);
+        [DllImport(dllName)] public static extern void mclBnG1_add(ref G1 z, ref G1 x, ref G1 y);
+        [DllImport(dllName)] public static extern void mclBnG1_sub(ref G1 z, ref G1 x, ref G1 y);
+        [DllImport(dllName)] public static extern void mclBnG1_mul(ref G1 z, ref G1 x, ref Fr y);
+        [DllImport(dllName)] public static extern void mclBnG2_clear(ref G2 x);
+        [DllImport(dllName)] public static extern int mclBnG2_setStr(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
+        [DllImport(dllName)] public static extern int mclBnG2_isValid(ref G2 x);
+        [DllImport(dllName)] public static extern int mclBnG2_isEqual(ref G2 x, ref G2 y);
+        [DllImport(dllName)] public static extern int mclBnG2_isZero(ref G2 x);
+        [DllImport(dllName)] public static extern int mclBnG2_hashAndMapTo(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern long mclBnG2_getStr([Out] StringBuilder buf, long maxBufSize, ref G2 x, int ioMode);
+        [DllImport(dllName)] public static extern void mclBnG2_neg(ref G2 y, ref G2 x);
+        [DllImport(dllName)] public static extern void mclBnG2_dbl(ref G2 y, ref G2 x);
+        [DllImport(dllName)] public static extern void mclBnG2_add(ref G2 z, ref G2 x, ref G2 y);
+        [DllImport(dllName)] public static extern void mclBnG2_sub(ref G2 z, ref G2 x, ref G2 y);
+        [DllImport(dllName)] public static extern void mclBnG2_mul(ref G2 z, ref G2 x, ref Fr y);
+
+        [DllImport(dllName)] public static extern void mclBnGT_clear(ref GT x);
+        [DllImport(dllName)] public static extern int mclBnGT_setStr(ref GT x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
+        [DllImport(dllName)] public static extern int mclBnGT_isEqual(ref GT x, ref GT y);
+        [DllImport(dllName)] public static extern int mclBnGT_isZero(ref GT x);
+        [DllImport(dllName)] public static extern int mclBnGT_isOne(ref GT x);
+        [DllImport(dllName)] public static extern long mclBnGT_getStr([Out] StringBuilder buf, long maxBufSize, ref GT x, int ioMode);
+        [DllImport(dllName)] public static extern void mclBnGT_neg(ref GT y, ref GT x);
+        [DllImport(dllName)] public static extern void mclBnGT_inv(ref GT y, ref GT x);
+        [DllImport(dllName)] public static extern void mclBnGT_add(ref GT z, ref GT x, ref GT y);
+        [DllImport(dllName)] public static extern void mclBnGT_sub(ref GT z, ref GT x, ref GT y);
+        [DllImport(dllName)] public static extern void mclBnGT_mul(ref GT z, ref GT x, ref GT y);
+        [DllImport(dllName)] public static extern void mclBnGT_div(ref GT z, ref GT x, ref GT y);
+        [DllImport(dllName)] public static extern void mclBnGT_pow(ref GT z, ref GT x, ref Fr y);
+        [DllImport(dllName)] public static extern void mclBn_pairing(ref GT z, ref G1 x, ref G2 y);
+        [DllImport(dllName)] public static extern void mclBn_finalExp(ref GT y, ref GT x);
+        [DllImport(dllName)] public static extern void mclBn_millerLoop(ref GT z, ref G1 x, ref G2 y);
+        [DllImport(dllName)] public static extern int mclBnFp_setLittleEndianMod(ref Fp y, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern int mclBnFr_setLittleEndianMod(ref Fr y, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern int mclBnFp_serialize([Out] StringBuilder buf, long maxBufSiz, ref Fr x);
+        [DllImport(dllName)] public static extern int mclBnFr_serialize([Out] StringBuilder buf, long maxBufSiz, ref Fr x);
+        [DllImport(dllName)] public static extern int mclBnG1_serialize([Out] StringBuilder buf, long maxBufSiz, ref G1 x);
+        [DllImport(dllName)] public static extern int mclBnG2_serialize([Out] StringBuilder buf, long maxBufSiz, ref G2 x);
+        [DllImport(dllName)] public static extern int mclBnGT_serialize([Out] StringBuilder buf, long maxBufSiz, ref GT x);
+        [DllImport(dllName)] public static extern int mclBnFr_deserialize(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern int mclBnFp_deserialize(ref Fp x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern int mclBnG1_deserialize(ref G1 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern int mclBnG2_deserialize(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern int mclBnGT_deserialize(ref GT x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+
+        public static void Init(int curveType = BN254)
+        {
+            if (!System.Environment.Is64BitProcess) {
+                throw new PlatformNotSupportedException("not 64-bit system");
+            }
+            const int COMPILED_TIME_VAR = FR_UNIT_SIZE * 10 + FP_UNIT_SIZE;
+            if (mclBn_init(curveType, COMPILED_TIME_VAR) != 0) {
+                throw new ArgumentException("mclBn_init");
+            }
+        }
+        [StructLayout(LayoutKind.Sequential)]
+        struct U128 {
+            private ulong v0, v1;
+        }
+
+        [StructLayout(LayoutKind.Sequential)]
+        public struct Fr {
+            private U128 v0, v1;
+            public void Clear()
+            {
+                mclBnFr_clear(ref this);
+            }
+            public void SetInt(int x)
+            {
+                mclBnFr_setInt(ref this, x);
+            }
+            public void SetStr(string s, int ioMode)
+            {
+                if (mclBnFr_setStr(ref this, s, s.Length, ioMode) != 0) {
+                    throw new ArgumentException("mclBnFr_setStr" + s);
+                }
+            }
+            public bool IsValid()
+            {
+                return mclBnFr_isValid(ref this) == 1;
+            }
+            public bool Equals(Fr rhs)
+            {
+                return mclBnFr_isEqual(ref this, ref rhs) == 1;
+            }
+            public bool IsZero()
+            {
+                return mclBnFr_isZero(ref this) == 1;
+            }
+            public bool IsOne()
+            {
+                return mclBnFr_isOne(ref this) == 1;
+            }
+            public void SetByCSPRNG()
+            {
+                mclBnFr_setByCSPRNG(ref this);
+            }
+            public void SetHashOf(String s)
+            {
+                if (mclBnFr_setHashOf(ref this, s, s.Length) != 0) {
+                    throw new InvalidOperationException("mclBnFr_setHashOf:" + s);
+                }
+            }
+            public string GetStr(int ioMode)
+            {
+                StringBuilder sb = new StringBuilder(1024);
+                long size = mclBnFr_getStr(sb, sb.Capacity, ref this, ioMode);
+                if (size == 0) {
+                    throw new InvalidOperationException("mclBnFr_getStr:");
+                }
+                return sb.ToString();
+            }
+            public void Neg(Fr x)
+            {
+                mclBnFr_neg(ref this, ref x);
+            }
+            public void Inv(Fr x)
+            {
+                mclBnFr_inv(ref this, ref x);
+            }
+            public void Add(Fr x, Fr y)
+            {
+                mclBnFr_add(ref this, ref x, ref y);
+            }
+            public void Sub(Fr x, Fr y)
+            {
+                mclBnFr_sub(ref this, ref x, ref y);
+            }
+            public void Mul(Fr x, Fr y)
+            {
+                mclBnFr_mul(ref this, ref x, ref y);
+            }
+            public void Div(Fr x, Fr y)
+            {
+                mclBnFr_div(ref this, ref x, ref y);
+            }
+            public static Fr operator -(Fr x)
+            {
+                Fr y = new Fr();
+                y.Neg(x);
+                return y;
+            }
+            public static Fr operator +(Fr x, Fr y)
+            {
+                Fr z = new Fr();
+                z.Add(x, y);
+                return z;
+            }
+            public static Fr operator -(Fr x, Fr y)
+            {
+                Fr z = new Fr();
+                z.Sub(x, y);
+                return z;
+            }
+            public static Fr operator *(Fr x, Fr y)
+            {
+                Fr z = new Fr();
+                z.Mul(x, y);
+                return z;
+            }
+            public static Fr operator /(Fr x, Fr y)
+            {
+                Fr z = new Fr();
+                z.Div(x, y);
+                return z;
+            }
+        }
+        [StructLayout(LayoutKind.Sequential)]
+        public struct Fp {
+            private U128 v0, v1, v2;
+            public void Clear()
+            {
+                mclBnFp_clear(ref this);
+            }
+            public void SetInt(int x)
+            {
+                mclBnFp_setInt(ref this, x);
+            }
+            public void SetStr(string s, int ioMode)
+            {
+                if (mclBnFp_setStr(ref this, s, s.Length, ioMode) != 0) {
+                    throw new ArgumentException("mclBnFp_setStr" + s);
+                }
+            }
+            public bool IsValid()
+            {
+                return mclBnFp_isValid(ref this) == 1;
+            }
+            public bool Equals(Fp rhs)
+            {
+                return mclBnFp_isEqual(ref this, ref rhs) == 1;
+            }
+            public bool IsZero()
+            {
+                return mclBnFp_isZero(ref this) == 1;
+            }
+            public bool IsOne()
+            {
+                return mclBnFp_isOne(ref this) == 1;
+            }
+            public void SetByCSPRNG()
+            {
+                mclBnFp_setByCSPRNG(ref this);
+            }
+            public string GetStr(int ioMode)
+            {
+                StringBuilder sb = new StringBuilder(1024);
+                long size = mclBnFp_getStr(sb, sb.Capacity, ref this, ioMode);
+                if (size == 0) {
+                    throw new InvalidOperationException("mclBnFp_getStr:");
+                }
+                return sb.ToString();
+            }
+            public void Neg(Fp x)
+            {
+                mclBnFp_neg(ref this, ref x);
+            }
+            public void Inv(Fp x)
+            {
+                mclBnFp_inv(ref this, ref x);
+            }
+            public void Add(Fp x, Fp y)
+            {
+                mclBnFp_add(ref this, ref x, ref y);
+            }
+            public void Sub(Fp x, Fp y)
+            {
+                mclBnFp_sub(ref this, ref x, ref y);
+            }
+            public void Mul(Fp x, Fp y)
+            {
+                mclBnFp_mul(ref this, ref x, ref y);
+            }
+            public void Div(Fp x, Fp y)
+            {
+                mclBnFp_div(ref this, ref x, ref y);
+            }
+            public static Fp operator -(Fp x)
+            {
+                Fp y = new Fp();
+                y.Neg(x);
+                return y;
+            }
+            public static Fp operator +(Fp x, Fp y)
+            {
+                Fp z = new Fp();
+                z.Add(x, y);
+                return z;
+            }
+            public static Fp operator -(Fp x, Fp y)
+            {
+                Fp z = new Fp();
+                z.Sub(x, y);
+                return z;
+            }
+            public static Fp operator *(Fp x, Fp y)
+            {
+                Fp z = new Fp();
+                z.Mul(x, y);
+                return z;
+            }
+            public static Fp operator /(Fp x, Fp y)
+            {
+                Fp z = new Fp();
+                z.Div(x, y);
+                return z;
+            }
+        }
+        [StructLayout(LayoutKind.Sequential)]
+        public struct Fp2 {
+            private Fp a, b;
+        }
+        [StructLayout(LayoutKind.Sequential)]
+        public struct G1 {
+            private Fp x, y, z;
+            public void Clear()
+            {
+                mclBnG1_clear(ref this);
+            }
+            public void SetStr(String s, int ioMode)
+            {
+                if (mclBnG1_setStr(ref this, s, s.Length, ioMode) != 0) {
+                    throw new ArgumentException("mclBnG1_setStr:" + s);
+                }
+            }
+            public bool IsValid()
+            {
+                return mclBnG1_isValid(ref this) == 1;
+            }
+            public bool Equals(G1 rhs)
+            {
+                return mclBnG1_isEqual(ref this, ref rhs) == 1;
+            }
+            public bool IsZero()
+            {
+                return mclBnG1_isZero(ref this) == 1;
+            }
+            public void HashAndMapTo(String s)
+            {
+                if (mclBnG1_hashAndMapTo(ref this, s, s.Length) != 0) {
+                    throw new ArgumentException("mclBnG1_hashAndMapTo:" + s);
+                }
+            }
+            public string GetStr(int ioMode)
+            {
+                StringBuilder sb = new StringBuilder(1024);
+                long size = mclBnG1_getStr(sb, sb.Capacity, ref this, ioMode);
+                if (size == 0) {
+                    throw new InvalidOperationException("mclBnG1_getStr:");
+                }
+                return sb.ToString();
+            }
+            public void Neg(G1 x)
+            {
+                mclBnG1_neg(ref this, ref x);
+            }
+            public void Dbl(G1 x)
+            {
+                mclBnG1_dbl(ref this, ref x);
+            }
+            public void Add(G1 x, G1 y)
+            {
+                mclBnG1_add(ref this, ref x, ref y);
+            }
+            public void Sub(G1 x, G1 y)
+            {
+                mclBnG1_sub(ref this, ref x, ref y);
+            }
+            public void Mul(G1 x, Fr y)
+            {
+                mclBnG1_mul(ref this, ref x, ref y);
+            }
+        }
+        [StructLayout(LayoutKind.Sequential)]
+        public struct G2 {
+            private Fp2 x, y, z;
+            public void Clear()
+            {
+                mclBnG2_clear(ref this);
+            }
+            public void SetStr(String s, int ioMode)
+            {
+                if (mclBnG2_setStr(ref this, s, s.Length, ioMode) != 0) {
+                    throw new ArgumentException("mclBnG2_setStr:" + s);
+                }
+            }
+            public bool IsValid()
+            {
+                return mclBnG2_isValid(ref this) == 1;
+            }
+            public bool Equals(G2 rhs)
+            {
+                return mclBnG2_isEqual(ref this, ref rhs) == 1;
+            }
+            public bool IsZero()
+            {
+                return mclBnG2_isZero(ref this) == 1;
+            }
+            public void HashAndMapTo(String s)
+            {
+                if (mclBnG2_hashAndMapTo(ref this, s, s.Length) != 0) {
+                    throw new ArgumentException("mclBnG2_hashAndMapTo:" + s);
+                }
+            }
+            public string GetStr(int ioMode)
+            {
+                StringBuilder sb = new StringBuilder(1024);
+                long size = mclBnG2_getStr(sb, sb.Capacity, ref this, ioMode);
+                if (size == 0) {
+                    throw new InvalidOperationException("mclBnG2_getStr:");
+                }
+                return sb.ToString();
+            }
+            public void Neg(G2 x)
+            {
+                mclBnG2_neg(ref this, ref x);
+            }
+            public void Dbl(G2 x)
+            {
+                mclBnG2_dbl(ref this, ref x);
+            }
+            public void Add(G2 x, G2 y)
+            {
+                mclBnG2_add(ref this, ref x, ref y);
+            }
+            public void Sub(G2 x, G2 y)
+            {
+                mclBnG2_sub(ref this, ref x, ref y);
+            }
+            public void Mul(G2 x, Fr y)
+            {
+                mclBnG2_mul(ref this, ref x, ref y);
+            }
+        }
+        [StructLayout(LayoutKind.Sequential)]
+        public struct GT {
+            private Fp v00, v01, v02, v03, v04, v05, v06, v07, v08, v09, v10, v11;
+            public void Clear()
+            {
+                mclBnGT_clear(ref this);
+            }
+            public void SetStr(String s, int ioMode)
+            {
+                if (mclBnGT_setStr(ref this, s, s.Length, ioMode) != 0) {
+                    throw new ArgumentException("mclBnGT_setStr:" + s);
+                }
+            }
+            public bool Equals(GT rhs)
+            {
+                return mclBnGT_isEqual(ref this, ref rhs) == 1;
+            }
+            public bool IsZero()
+            {
+                return mclBnGT_isZero(ref this) == 1;
+            }
+            public bool IsOne()
+            {
+                return mclBnGT_isOne(ref this) == 1;
+            }
+            public string GetStr(int ioMode)
+            {
+                StringBuilder sb = new StringBuilder(1024);
+                long size = mclBnGT_getStr(sb, sb.Capacity, ref this, ioMode);
+                if (size == 0) {
+                    throw new InvalidOperationException("mclBnGT_getStr:");
+                }
+                return sb.ToString();
+            }
+            public void Neg(GT x)
+            {
+                mclBnGT_neg(ref this, ref x);
+            }
+            public void Inv(GT x)
+            {
+                mclBnGT_inv(ref this, ref x);
+            }
+            public void Add(GT x, GT y)
+            {
+                mclBnGT_add(ref this, ref x, ref y);
+            }
+            public void Sub(GT x, GT y)
+            {
+                mclBnGT_sub(ref this, ref x, ref y);
+            }
+            public void Mul(GT x, GT y)
+            {
+                mclBnGT_mul(ref this, ref x, ref y);
+            }
+            public void Div(GT x, GT y)
+            {
+                mclBnGT_div(ref this, ref x, ref y);
+            }
+            public static GT operator -(GT x)
+            {
+                GT y = new GT();
+                y.Neg(x);
+                return y;
+            }
+            public static GT operator +(GT x, GT y)
+            {
+                GT z = new GT();
+                z.Add(x, y);
+                return z;
+            }
+            public static GT operator -(GT x, GT y)
+            {
+                GT z = new GT();
+                z.Sub(x, y);
+                return z;
+            }
+            public static GT operator *(GT x, GT y)
+            {
+                GT z = new GT();
+                z.Mul(x, y);
+                return z;
+            }
+            public static GT operator /(GT x, GT y)
+            {
+                GT z = new GT();
+                z.Div(x, y);
+                return z;
+            }
+            public void Pow(GT x, Fr y)
+            {
+                mclBnGT_pow(ref this, ref x, ref y);
+            }
+            public void Pairing(G1 x, G2 y)
+            {
+                mclBn_pairing(ref this, ref x, ref y);
+            }
+            public void FinalExp(GT x)
+            {
+                mclBn_finalExp(ref this, ref x);
+            }
+            public void MillerLoop(G1 x, G2 y)
+            {
+                mclBn_millerLoop(ref this, ref x, ref y);
+            }
+        }
+    }
+}
diff --git a/ffi/cs/mcl/mcl.csproj b/ffi/cs/mcl/mcl.csproj
new file mode 100644
index 00000000..ec210537
--- /dev/null
+++ b/ffi/cs/mcl/mcl.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netstandard2.1</TargetFramework>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Compile Remove="CVS\**" />
+    <EmbeddedResource Remove="CVS\**" />
+    <None Remove="CVS\**" />
+  </ItemGroup>
+
+</Project>
diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
new file mode 100644
index 00000000..bf1002e5
--- /dev/null
+++ b/ffi/cs/test/test.cs
@@ -0,0 +1,161 @@
+using System;
+
+namespace mcl {
+    using static MCL;
+    class MCLTest {
+        static int err = 0;
+        static void assert(string msg, bool b)
+        {
+            if (b) return;
+            Console.WriteLine("ERR {0}", msg);
+            err++;
+        }
+        static void Main(string[] args)
+        {
+            Console.WriteLine("BN254");
+            TestCurve(BN254);
+            Console.WriteLine("BN_SNARK");
+            TestCurve(BN_SNARK);
+            Console.WriteLine("BLS12_381");
+            TestCurve(BLS12_381);
+        }
+
+        static void TestCurve(int curveType)
+
+        {
+            err = 0;
+            try {
+                Init(curveType);
+                TestFr();
+                TestG1();
+                TestG2();
+                TestPairing();
+                if (err == 0) {
+                    Console.WriteLine("all tests succeed");
+                } else {
+                    Console.WriteLine("err={0}", err);
+                }
+            } catch (Exception e) {
+                Console.WriteLine("ERR={0}", e);
+            }
+        }
+        static void TestFr()
+        {
+            Console.WriteLine("TestFr");
+            Fr x = new Fr();
+            x.Clear();
+            assert("0", x.GetStr(10) == "0");
+            assert("0.IzZero", x.IsZero());
+            assert("!0.IzOne", !x.IsOne());
+            x.SetInt(1);
+            assert("1", x.GetStr(10) == "1");
+            assert("!1.IzZero", !x.IsZero());
+            assert("1.IzOne", x.IsOne());
+            x.SetInt(3);
+            assert("3", x.GetStr(10) == "3");
+            assert("!3.IzZero", !x.IsZero());
+            assert("!3.IzOne", !x.IsOne());
+            x.SetInt(-5);
+            x = -x;
+            assert("5", x.GetStr(10) == "5");
+            x.SetInt(4);
+            x = x * x;
+            assert("16", x.GetStr(10) == "16");
+            assert("10", x.GetStr(16) == "10");
+            Fr y;
+            y = x;
+            assert("x == y", x.Equals(y));
+            x.SetInt(123);
+            assert("123", x.GetStr(10) == "123");
+            assert("7b", x.GetStr(16) == "7b");
+            assert("y != x", !x.Equals(y));
+            Console.WriteLine("exception test");
+            try {
+                x.SetStr("1234567891234x", 10);
+                Console.WriteLine("x = {0}", x);
+            } catch (Exception e) {
+                Console.WriteLine("OK ; expected exception: {0}", e);
+            }
+            x.SetStr("1234567891234", 10);
+            assert("1234567891234", x.GetStr(10) == "1234567891234");
+        }
+        static void TestG1()
+        {
+            Console.WriteLine("TestG1");
+            G1 P = new G1();
+            P.Clear();
+            assert("P.IsValid", P.IsValid());
+            assert("P.IsZero", P.IsZero());
+            P.HashAndMapTo("abc");
+            assert("P.IsValid", P.IsValid());
+            assert("!P.IsZero", !P.IsZero());
+            G1 Q = new G1();
+            Q = P;
+            assert("P == Q", Q.Equals(P));
+            Q.Neg(P);
+            Q.Add(Q, P);
+            assert("P = Q", Q.IsZero());
+            Q.Dbl(P);
+            G1 R = new G1();
+            R.Add(P, P);
+            assert("Q == R", Q.Equals(R));
+            Fr x = new Fr();
+            x.SetInt(3);
+            R.Add(R, P);
+            Q.Mul(P, x);
+            assert("Q == R", Q.Equals(R));
+        }
+        static void TestG2()
+        {
+            Console.WriteLine("TestG2");
+            G2 P = new G2();
+            P.Clear();
+            assert("P is valid", P.IsValid());
+            assert("P is zero", P.IsZero());
+            P.HashAndMapTo("abc");
+            assert("P is valid", P.IsValid());
+            assert("P is not zero", !P.IsZero());
+            G2 Q = new G2();
+            Q = P;
+            assert("P == Q", Q.Equals(P));
+            Q.Neg(P);
+            Q.Add(Q, P);
+            assert("Q is zero", Q.IsZero());
+            Q.Dbl(P);
+            G2 R = new G2();
+            R.Add(P, P);
+            assert("Q == R", Q.Equals(R));
+            Fr x = new Fr();
+            x.SetInt(3);
+            R.Add(R, P);
+            Q.Mul(P, x);
+            assert("Q == R", Q.Equals(R));
+        }
+        static void TestPairing()
+        {
+            Console.WriteLine("TestG2");
+            G1 P = new G1();
+            P.HashAndMapTo("123");
+            G2 Q = new G2();
+            Q.HashAndMapTo("1");
+            Fr a = new Fr();
+            Fr b = new Fr();
+            a.SetStr("12345678912345673453", 10);
+            b.SetStr("230498230982394243424", 10);
+            G1 aP = new G1();
+            G2 bQ = new G2();
+            aP.Mul(P, a);
+            bQ.Mul(Q, b);
+            GT e1 = new GT();
+            GT e2 = new GT();
+            GT e3 = new GT();
+            e1.Pairing(P, Q);
+            e2.Pairing(aP, Q);
+            e3.Pow(e1, a);
+            assert("e2.Equals(e3)", e2.Equals(e3));
+            e2.Pairing(P, bQ);
+            e3.Pow(e1, b);
+            assert("e2.Equals(e3)", e2.Equals(e3));
+        }
+    }
+}
diff --git a/ffi/cs/test/test.csproj b/ffi/cs/test/test.csproj
new file mode 100644
index 00000000..ddabc650
--- /dev/null
+++ b/ffi/cs/test/test.csproj
@@ -0,0 +1,28 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <ItemGroup>
+    <Compile Remove="CVS\**" />
+    <EmbeddedResource Remove="CVS\**" />
+    <None Remove="CVS\**" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\mcl\mcl.csproj" />
+  </ItemGroup>
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>netcoreapp3.1</TargetFramework>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
+    <PlatformTarget>x64</PlatformTarget>
+    <OutputPath></OutputPath>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
+    <PlatformTarget>x64</PlatformTarget>
+    <OutputPath></OutputPath>
+  </PropertyGroup>
+
+</Project>

From 6ddebac588881e6f6fc94d1deb2c8017d1aa8e79 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 19 Jul 2020 22:11:18 +0900
Subject: [PATCH 256/553] remove warning of vc

---
 include/mcl/gmp_util.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index 2c7938d0..ed0880ba 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -97,7 +97,7 @@ template<class T>
 void getArray(bool *pb, T *buf, size_t maxSize, const mpz_class& x)
 {
 #ifdef MCL_USE_VINT
-	*pb = getArray_(buf, maxSize, x.getUnit(), x.getUnitSize());
+	*pb = getArray_(buf, maxSize, x.getUnit(), (int)x.getUnitSize());
 #else
 	*pb = getArray_(buf, maxSize, x.get_mpz_t()->_mp_d, x.get_mpz_t()->_mp_size);
 #endif

From 0901e76cc2bac59034abbdf73750a2a1a7ade8a2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 19 Jul 2020 22:11:46 +0900
Subject: [PATCH 257/553] mklib makes dll without OpenSSL/GMP

---
 mklib.bat  | 28 +++++++++++-----------------
 setvar.bat |  4 ++--
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/mklib.bat b/mklib.bat
index 4e277e1a..a283d4fa 100644
--- a/mklib.bat
+++ b/mklib.bat
@@ -8,6 +8,10 @@ if "%1"=="dll" (
 rem nasm -f win64 -D_WIN64 src\asm\low_x86-64.asm
 rem lib /OUT:lib\mcl.lib /nodefaultlib fp.obj src\asm\low_x86-64.obj
 
+if "%1"=="dll" (
+  set CFLAGS=%CFLAGS% /DMCL_NO_AUTOLINK /DMCLBN_NO_AUTOLINK
+)
+
 echo cl /c %CFLAGS% src\fp.cpp /Foobj\fp.obj
      cl /c %CFLAGS% src\fp.cpp /Foobj\fp.obj
 echo lib /nologo /OUT:lib\mcl.lib /nodefaultlib obj\fp.obj
@@ -15,27 +19,17 @@ echo lib /nologo /OUT:lib\mcl.lib /nodefaultlib obj\fp.obj
 
 if "%1"=="dll" (
   echo cl /c %CFLAGS% src\bn_c256.cpp /Foobj\bn_c256.obj
-     cl /c %CFLAGS% src\bn_c256.cpp /Foobj\bn_c256.obj /DMCLBN_NO_AUTOLINK
+     cl /c %CFLAGS% src\bn_c256.cpp /Foobj\bn_c256.obj
   echo link /nologo /DLL /OUT:bin\mclbn256.dll obj\bn_c256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn256.lib
      link /nologo /DLL /OUT:bin\mclbn256.dll obj\bn_c256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn256.lib
 
   echo cl /c %CFLAGS% src\bn_c384_256.cpp /Foobj\bn_c384_256.obj
-     cl /c %CFLAGS% src\bn_c384_256.cpp /Foobj\bn_c384_256.obj /DMCLBN_NO_AUTOLINK
+     cl /c %CFLAGS% src\bn_c384_256.cpp /Foobj\bn_c384_256.obj
   echo link /nologo /DLL /OUT:bin\mclbn384_256.dll obj\bn_c384_256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn384_256.lib
      link /nologo /DLL /OUT:bin\mclbn384_256.dll obj\bn_c384_256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn384_256.lib
 
-  echo cl /c %CFLAGS% src\bn_c384.cpp /Foobj\bn_c384.obj
-     cl /c %CFLAGS% src\bn_c384.cpp /Foobj\bn_c384.obj /DMCLBN_NO_AUTOLINK
-  echo link /nologo /DLL /OUT:bin\mclbn384.dll obj\bn_c384.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn384.lib
-     link /nologo /DLL /OUT:bin\mclbn384.dll obj\bn_c384.obj obj\fp.obj %LDFLAGS% /implib:lib\mclbn384.lib
-
-  echo cl /c %CFLAGS% src\she_c256.cpp /Foobj\she_c256.obj /DMCLBN_NO_AUTOLINK
-     cl /c %CFLAGS% src\she_c256.cpp /Foobj\she_c256.obj /DMCLBN_NO_AUTOLINK
-  echo link /nologo /DLL /OUT:bin\mclshe256.dll obj\she_c256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclshe_c256.lib
-     link /nologo /DLL /OUT:bin\mclshe256.dll obj\she_c256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclshe_c256.lib
-
-  echo cl /c %CFLAGS% src\she_c384_256.cpp /Foobj\she_c384_256.obj /DMCLBN_NO_AUTOLINK
-     cl /c %CFLAGS% src\she_c384_256.cpp /Foobj\she_c384_256.obj /DMCLBN_NO_AUTOLINK
+  echo cl /c %CFLAGS% src\she_c384_256.cpp /Foobj\she_c384_256.obj /DMCL_NO_AUTOLINK
+     cl /c %CFLAGS% src\she_c384_256.cpp /Foobj\she_c384_256.obj /DMCL_NO_AUTOLINK
   echo link /nologo /DLL /OUT:bin\mclshe384_256.dll obj\she_c384_256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclshe_c384_256.lib
      link /nologo /DLL /OUT:bin\mclshe384_256.dll obj\she_c384_256.obj obj\fp.obj %LDFLAGS% /implib:lib\mclshe_c384_256.lib
 ) else (
@@ -43,7 +37,7 @@ if "%1"=="dll" (
      cl /c %CFLAGS% src\bn_c256.cpp /Foobj\bn_c256.obj
      lib /nologo /OUT:lib\mclbn256.lib /nodefaultlib obj\bn_c256.obj lib\mcl.lib
 
-  echo cl /c %CFLAGS% src\bn_c384.cpp /Foobj\bn_c384.obj
-     cl /c %CFLAGS% src\bn_c384.cpp /Foobj\bn_c384.obj
-     lib /nologo /OUT:lib\mclbn384.lib /nodefaultlib obj\bn_c384.obj lib\mcl.lib
+  echo cl /c %CFLAGS% src\bn_c384_256.cpp /Foobj\bn_c384_256.obj
+     cl /c %CFLAGS% src\bn_c384_256.cpp /Foobj\bn_c384_256.obj
+     lib /nologo /OUT:lib\mclbn384_256.lib /nodefaultlib obj\bn_c384_256.obj lib\mcl.lib
 )
diff --git a/setvar.bat b/setvar.bat
index 2ceea1fc..c679580b 100644
--- a/setvar.bat
+++ b/setvar.bat
@@ -1,2 +1,2 @@
-set CFLAGS=/MT /DNOMINMAX /Ox /DNDEBUG /openmp /W4 /Zi /EHsc /nologo -I./include -I../cybozulib_ext/include
-set LDFLAGS=/LIBPATH:..\cybozulib_ext\lib /LIBPATH:.\lib
+set CFLAGS=/MT /DNOMINMAX /Ox /DNDEBUG /openmp /W4 /Zi /EHsc /nologo -I./include -I../cybozulib_ext/include -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=384
+set LDFLAGS=/LIBPATH:.\lib

From ae670c82f79e12b23aa7a5c0358bf0775956ad7e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 20 Jul 2020 15:21:38 +0900
Subject: [PATCH 258/553] [C#] add Serialize/Deserialize

---
 ffi/cs/mcl/mcl.cs   | 405 +++++++++++++++++++++++++-------------------
 ffi/cs/test/test.cs |  95 +++++++++--
 2 files changed, 314 insertions(+), 186 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index d508a485..e0c778e8 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -19,93 +19,93 @@ public class MCL {
         [DllImport(dllName)] public static extern void mclBnFr_clear(ref Fr x);
         [DllImport(dllName)] public static extern void mclBnFr_setInt(ref Fr y, int x);
         [DllImport(dllName)] public static extern int mclBnFr_setStr(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
-        [DllImport(dllName)] public static extern int mclBnFr_isValid(ref Fr x);
-        [DllImport(dllName)] public static extern int mclBnFr_isEqual(ref Fr x, ref Fr y);
-        [DllImport(dllName)] public static extern int mclBnFr_isZero(ref Fr x);
-        [DllImport(dllName)] public static extern int mclBnFr_isOne(ref Fr x);
+        [DllImport(dllName)] public static extern int mclBnFr_isValid(in Fr x);
+        [DllImport(dllName)] public static extern int mclBnFr_isEqual(in Fr x, in Fr y);
+        [DllImport(dllName)] public static extern int mclBnFr_isZero(in Fr x);
+        [DllImport(dllName)] public static extern int mclBnFr_isOne(in Fr x);
         [DllImport(dllName)] public static extern void mclBnFr_setByCSPRNG(ref Fr x);
 
         [DllImport(dllName)] public static extern int mclBnFr_setHashOf(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-        [DllImport(dllName)] public static extern int mclBnFr_getStr([Out] StringBuilder buf, long maxBufSize, ref Fr x, int ioMode);
+        [DllImport(dllName)] public static extern int mclBnFr_getStr([Out] StringBuilder buf, long maxBufSize, in Fr x, int ioMode);
 
-        [DllImport(dllName)] public static extern void mclBnFr_neg(ref Fr y, ref Fr x);
-        [DllImport(dllName)] public static extern void mclBnFr_inv(ref Fr y, ref Fr x);
-        [DllImport(dllName)] public static extern void mclBnFr_add(ref Fr z, ref Fr x, ref Fr y);
-        [DllImport(dllName)] public static extern void mclBnFr_sub(ref Fr z, ref Fr x, ref Fr y);
-        [DllImport(dllName)] public static extern void mclBnFr_mul(ref Fr z, ref Fr x, ref Fr y);
-        [DllImport(dllName)] public static extern void mclBnFr_div(ref Fr z, ref Fr x, ref Fr y);
+        [DllImport(dllName)] public static extern void mclBnFr_neg(ref Fr y, in Fr x);
+        [DllImport(dllName)] public static extern void mclBnFr_inv(ref Fr y, in Fr x);
+        [DllImport(dllName)] public static extern void mclBnFr_add(ref Fr z, in Fr x, in Fr y);
+        [DllImport(dllName)] public static extern void mclBnFr_sub(ref Fr z, in Fr x, in Fr y);
+        [DllImport(dllName)] public static extern void mclBnFr_mul(ref Fr z, in Fr x, in Fr y);
+        [DllImport(dllName)] public static extern void mclBnFr_div(ref Fr z, in Fr x, in Fr y);
 
         [DllImport(dllName)] public static extern void mclBnFp_clear(ref Fp x);
         [DllImport(dllName)] public static extern void mclBnFp_setInt(ref Fp y, int x);
         [DllImport(dllName)] public static extern int mclBnFp_setStr(ref Fp x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
-        [DllImport(dllName)] public static extern int mclBnFp_isValid(ref Fp x);
-        [DllImport(dllName)] public static extern int mclBnFp_isEqual(ref Fp x, ref Fp y);
-        [DllImport(dllName)] public static extern int mclBnFp_isZero(ref Fp x);
-        [DllImport(dllName)] public static extern int mclBnFp_isOne(ref Fp x);
+        [DllImport(dllName)] public static extern int mclBnFp_isValid(in Fp x);
+        [DllImport(dllName)] public static extern int mclBnFp_isEqual(in Fp x, in Fp y);
+        [DllImport(dllName)] public static extern int mclBnFp_isZero(in Fp x);
+        [DllImport(dllName)] public static extern int mclBnFp_isOne(in Fp x);
         [DllImport(dllName)] public static extern void mclBnFp_setByCSPRNG(ref Fp x);
 
         [DllImport(dllName)] public static extern int mclBnFp_setHashOf(ref Fp x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-        [DllImport(dllName)] public static extern int mclBnFp_getStr([Out] StringBuilder buf, long maxBufSize, ref Fp x, int ioMode);
+        [DllImport(dllName)] public static extern int mclBnFp_getStr([Out] StringBuilder buf, long maxBufSize, in Fp x, int ioMode);
 
-        [DllImport(dllName)] public static extern void mclBnFp_neg(ref Fp y, ref Fp x);
-        [DllImport(dllName)] public static extern void mclBnFp_inv(ref Fp y, ref Fp x);
-        [DllImport(dllName)] public static extern void mclBnFp_add(ref Fp z, ref Fp x, ref Fp y);
-        [DllImport(dllName)] public static extern void mclBnFp_sub(ref Fp z, ref Fp x, ref Fp y);
-        [DllImport(dllName)] public static extern void mclBnFp_mul(ref Fp z, ref Fp x, ref Fp y);
-        [DllImport(dllName)] public static extern void mclBnFp_div(ref Fp z, ref Fp x, ref Fp y);
+        [DllImport(dllName)] public static extern void mclBnFp_neg(ref Fp y, in Fp x);
+        [DllImport(dllName)] public static extern void mclBnFp_inv(ref Fp y, in Fp x);
+        [DllImport(dllName)] public static extern void mclBnFp_add(ref Fp z, in Fp x, in Fp y);
+        [DllImport(dllName)] public static extern void mclBnFp_sub(ref Fp z, in Fp x, in Fp y);
+        [DllImport(dllName)] public static extern void mclBnFp_mul(ref Fp z, in Fp x, in Fp y);
+        [DllImport(dllName)] public static extern void mclBnFp_div(ref Fp z, in Fp x, in Fp y);
         [DllImport(dllName)] public static extern void mclBnG1_clear(ref G1 x);
         [DllImport(dllName)] public static extern int mclBnG1_setStr(ref G1 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
-        [DllImport(dllName)] public static extern int mclBnG1_isValid(ref G1 x);
-        [DllImport(dllName)] public static extern int mclBnG1_isEqual(ref G1 x, ref G1 y);
-        [DllImport(dllName)] public static extern int mclBnG1_isZero(ref G1 x);
+        [DllImport(dllName)] public static extern int mclBnG1_isValid(in G1 x);
+        [DllImport(dllName)] public static extern int mclBnG1_isEqual(in G1 x, in G1 y);
+        [DllImport(dllName)] public static extern int mclBnG1_isZero(in G1 x);
         [DllImport(dllName)] public static extern int mclBnG1_hashAndMapTo(ref G1 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-        [DllImport(dllName)] public static extern long mclBnG1_getStr([Out] StringBuilder buf, long maxBufSize, ref G1 x, int ioMode);
-        [DllImport(dllName)] public static extern void mclBnG1_neg(ref G1 y, ref G1 x);
-        [DllImport(dllName)] public static extern void mclBnG1_dbl(ref G1 y, ref G1 x);
-        [DllImport(dllName)] public static extern void mclBnG1_add(ref G1 z, ref G1 x, ref G1 y);
-        [DllImport(dllName)] public static extern void mclBnG1_sub(ref G1 z, ref G1 x, ref G1 y);
-        [DllImport(dllName)] public static extern void mclBnG1_mul(ref G1 z, ref G1 x, ref Fr y);
+        [DllImport(dllName)] public static extern long mclBnG1_getStr([Out] StringBuilder buf, long maxBufSize, in G1 x, int ioMode);
+        [DllImport(dllName)] public static extern void mclBnG1_neg(ref G1 y, in G1 x);
+        [DllImport(dllName)] public static extern void mclBnG1_dbl(ref G1 y, in G1 x);
+        [DllImport(dllName)] public static extern void mclBnG1_add(ref G1 z, in G1 x, in G1 y);
+        [DllImport(dllName)] public static extern void mclBnG1_sub(ref G1 z, in G1 x, in G1 y);
+        [DllImport(dllName)] public static extern void mclBnG1_mul(ref G1 z, in G1 x, in Fr y);
         [DllImport(dllName)] public static extern void mclBnG2_clear(ref G2 x);
         [DllImport(dllName)] public static extern int mclBnG2_setStr(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
-        [DllImport(dllName)] public static extern int mclBnG2_isValid(ref G2 x);
-        [DllImport(dllName)] public static extern int mclBnG2_isEqual(ref G2 x, ref G2 y);
-        [DllImport(dllName)] public static extern int mclBnG2_isZero(ref G2 x);
+        [DllImport(dllName)] public static extern int mclBnG2_isValid(in G2 x);
+        [DllImport(dllName)] public static extern int mclBnG2_isEqual(in G2 x, in G2 y);
+        [DllImport(dllName)] public static extern int mclBnG2_isZero(in G2 x);
         [DllImport(dllName)] public static extern int mclBnG2_hashAndMapTo(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-        [DllImport(dllName)] public static extern long mclBnG2_getStr([Out] StringBuilder buf, long maxBufSize, ref G2 x, int ioMode);
-        [DllImport(dllName)] public static extern void mclBnG2_neg(ref G2 y, ref G2 x);
-        [DllImport(dllName)] public static extern void mclBnG2_dbl(ref G2 y, ref G2 x);
-        [DllImport(dllName)] public static extern void mclBnG2_add(ref G2 z, ref G2 x, ref G2 y);
-        [DllImport(dllName)] public static extern void mclBnG2_sub(ref G2 z, ref G2 x, ref G2 y);
-        [DllImport(dllName)] public static extern void mclBnG2_mul(ref G2 z, ref G2 x, ref Fr y);
+        [DllImport(dllName)] public static extern long mclBnG2_getStr([Out] StringBuilder buf, long maxBufSize, in G2 x, int ioMode);
+        [DllImport(dllName)] public static extern void mclBnG2_neg(ref G2 y, in G2 x);
+        [DllImport(dllName)] public static extern void mclBnG2_dbl(ref G2 y, in G2 x);
+        [DllImport(dllName)] public static extern void mclBnG2_add(ref G2 z, in G2 x, in G2 y);
+        [DllImport(dllName)] public static extern void mclBnG2_sub(ref G2 z, in G2 x, in G2 y);
+        [DllImport(dllName)] public static extern void mclBnG2_mul(ref G2 z, in G2 x, in Fr y);
 
         [DllImport(dllName)] public static extern void mclBnGT_clear(ref GT x);
         [DllImport(dllName)] public static extern int mclBnGT_setStr(ref GT x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
-        [DllImport(dllName)] public static extern int mclBnGT_isEqual(ref GT x, ref GT y);
-        [DllImport(dllName)] public static extern int mclBnGT_isZero(ref GT x);
-        [DllImport(dllName)] public static extern int mclBnGT_isOne(ref GT x);
-        [DllImport(dllName)] public static extern long mclBnGT_getStr([Out] StringBuilder buf, long maxBufSize, ref GT x, int ioMode);
-        [DllImport(dllName)] public static extern void mclBnGT_neg(ref GT y, ref GT x);
-        [DllImport(dllName)] public static extern void mclBnGT_inv(ref GT y, ref GT x);
-        [DllImport(dllName)] public static extern void mclBnGT_add(ref GT z, ref GT x, ref GT y);
-        [DllImport(dllName)] public static extern void mclBnGT_sub(ref GT z, ref GT x, ref GT y);
-        [DllImport(dllName)] public static extern void mclBnGT_mul(ref GT z, ref GT x, ref GT y);
-        [DllImport(dllName)] public static extern void mclBnGT_div(ref GT z, ref GT x, ref GT y);
-        [DllImport(dllName)] public static extern void mclBnGT_pow(ref GT z, ref GT x, ref Fr y);
-        [DllImport(dllName)] public static extern void mclBn_pairing(ref GT z, ref G1 x, ref G2 y);
-        [DllImport(dllName)] public static extern void mclBn_finalExp(ref GT y, ref GT x);
-        [DllImport(dllName)] public static extern void mclBn_millerLoop(ref GT z, ref G1 x, ref G2 y);
+        [DllImport(dllName)] public static extern int mclBnGT_isEqual(in GT x, in GT y);
+        [DllImport(dllName)] public static extern int mclBnGT_isZero(in GT x);
+        [DllImport(dllName)] public static extern int mclBnGT_isOne(in GT x);
+        [DllImport(dllName)] public static extern long mclBnGT_getStr([Out] StringBuilder buf, long maxBufSize, in GT x, int ioMode);
+        [DllImport(dllName)] public static extern void mclBnGT_neg(ref GT y, in GT x);
+        [DllImport(dllName)] public static extern void mclBnGT_inv(ref GT y, in GT x);
+        [DllImport(dllName)] public static extern void mclBnGT_add(ref GT z, in GT x, in GT y);
+        [DllImport(dllName)] public static extern void mclBnGT_sub(ref GT z, in GT x, in GT y);
+        [DllImport(dllName)] public static extern void mclBnGT_mul(ref GT z, in GT x, in GT y);
+        [DllImport(dllName)] public static extern void mclBnGT_div(ref GT z, in GT x, in GT y);
+        [DllImport(dllName)] public static extern void mclBnGT_pow(ref GT z, in GT x, in Fr y);
+        [DllImport(dllName)] public static extern void mclBn_pairing(ref GT z, in G1 x, in G2 y);
+        [DllImport(dllName)] public static extern void mclBn_finalExp(ref GT y, in GT x);
+        [DllImport(dllName)] public static extern void mclBn_millerLoop(ref GT z, in G1 x, in G2 y);
         [DllImport(dllName)] public static extern int mclBnFp_setLittleEndianMod(ref Fp y, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
         [DllImport(dllName)] public static extern int mclBnFr_setLittleEndianMod(ref Fr y, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-        [DllImport(dllName)] public static extern int mclBnFp_serialize([Out] StringBuilder buf, long maxBufSiz, ref Fr x);
-        [DllImport(dllName)] public static extern int mclBnFr_serialize([Out] StringBuilder buf, long maxBufSiz, ref Fr x);
-        [DllImport(dllName)] public static extern int mclBnG1_serialize([Out] StringBuilder buf, long maxBufSiz, ref G1 x);
-        [DllImport(dllName)] public static extern int mclBnG2_serialize([Out] StringBuilder buf, long maxBufSiz, ref G2 x);
-        [DllImport(dllName)] public static extern int mclBnGT_serialize([Out] StringBuilder buf, long maxBufSiz, ref GT x);
-        [DllImport(dllName)] public static extern int mclBnFr_deserialize(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-        [DllImport(dllName)] public static extern int mclBnFp_deserialize(ref Fp x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-        [DllImport(dllName)] public static extern int mclBnG1_deserialize(ref G1 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-        [DllImport(dllName)] public static extern int mclBnG2_deserialize(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-        [DllImport(dllName)] public static extern int mclBnGT_deserialize(ref GT x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern int mclBn_getFrByteSize();
+        [DllImport(dllName)] public static extern int mclBn_getFpByteSize();
+        [DllImport(dllName)] public static extern ulong mclBnFp_serialize([Out] byte[] buf, ulong maxBufSize, in Fp x);
+        [DllImport(dllName)] public static extern ulong mclBnFr_serialize([Out] byte[] buf, ulong maxBufSize, in Fr x);
+        [DllImport(dllName)] public static extern ulong mclBnG1_serialize([Out]byte[] buf, ulong maxBufSize, in G1 x);
+        [DllImport(dllName)] public static extern ulong mclBnG2_serialize([Out]byte[] buf, ulong maxBufSize, in G2 x);
+        [DllImport(dllName)] public static extern ulong mclBnFr_deserialize(ref Fr x, [In]byte[] buf, ulong bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnFp_deserialize(ref Fp x, [In]byte[] buf, ulong bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnG1_deserialize(ref G1 x, [In]byte[] buf, ulong bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnG2_deserialize(ref G2 x, [In]byte[] buf, ulong bufSize);
 
         public static void Init(int curveType = BN254)
         {
@@ -141,19 +141,19 @@ public void SetStr(string s, int ioMode)
             }
             public bool IsValid()
             {
-                return mclBnFr_isValid(ref this) == 1;
+                return mclBnFr_isValid(this) == 1;
             }
-            public bool Equals(Fr rhs)
+            public bool Equals(in Fr rhs)
             {
-                return mclBnFr_isEqual(ref this, ref rhs) == 1;
+                return mclBnFr_isEqual(this, rhs) == 1;
             }
             public bool IsZero()
             {
-                return mclBnFr_isZero(ref this) == 1;
+                return mclBnFr_isZero(this) == 1;
             }
             public bool IsOne()
             {
-                return mclBnFr_isOne(ref this) == 1;
+                return mclBnFr_isOne(this) == 1;
             }
             public void SetByCSPRNG()
             {
@@ -168,61 +168,78 @@ public void SetHashOf(String s)
             public string GetStr(int ioMode)
             {
                 StringBuilder sb = new StringBuilder(1024);
-                long size = mclBnFr_getStr(sb, sb.Capacity, ref this, ioMode);
+                long size = mclBnFr_getStr(sb, sb.Capacity, this, ioMode);
                 if (size == 0) {
                     throw new InvalidOperationException("mclBnFr_getStr:");
                 }
                 return sb.ToString();
             }
-            public void Neg(Fr x)
+            public byte[] Serialize()
             {
-                mclBnFr_neg(ref this, ref x);
+                byte[] buf = new byte[mclBn_getFrByteSize()];
+                ulong n = mclBnFr_serialize(buf, (ulong)buf.Length, this);
+                if (n != (ulong)buf.Length) {
+                    throw new ArithmeticException("mclBnFr_serialize");
+                }
+                return buf;
+            }
+            public void Deserialize(byte[] buf)
+            {
+                ulong n = mclBnFr_deserialize(ref this, buf, (ulong)buf.Length);
+                if (n == 0) {
+                    throw new ArithmeticException("mclBnFr_deserialize");
+                }
+            }
+
+            public void Neg(in Fr x)
+            {
+                mclBnFr_neg(ref this, x);
             }
-            public void Inv(Fr x)
+            public void Inv(in Fr x)
             {
-                mclBnFr_inv(ref this, ref x);
+                mclBnFr_inv(ref this, x);
             }
-            public void Add(Fr x, Fr y)
+            public void Add(in Fr x, in Fr y)
             {
-                mclBnFr_add(ref this, ref x, ref y);
+                mclBnFr_add(ref this, x, y);
             }
-            public void Sub(Fr x, Fr y)
+            public void Sub(in Fr x, in Fr y)
             {
-                mclBnFr_sub(ref this, ref x, ref y);
+                mclBnFr_sub(ref this, x, y);
             }
-            public void Mul(Fr x, Fr y)
+            public void Mul(in Fr x, in Fr y)
             {
-                mclBnFr_mul(ref this, ref x, ref y);
+                mclBnFr_mul(ref this, x, y);
             }
-            public void Div(Fr x, Fr y)
+            public void Div(in Fr x, in Fr y)
             {
-                mclBnFr_div(ref this, ref x, ref y);
+                mclBnFr_div(ref this, x, y);
             }
-            public static Fr operator -(Fr x)
+            public static Fr operator -(in Fr x)
             {
                 Fr y = new Fr();
                 y.Neg(x);
                 return y;
             }
-            public static Fr operator +(Fr x, Fr y)
+            public static Fr operator +(in Fr x, in Fr y)
             {
                 Fr z = new Fr();
                 z.Add(x, y);
                 return z;
             }
-            public static Fr operator -(Fr x, Fr y)
+            public static Fr operator -(in Fr x, in Fr y)
             {
                 Fr z = new Fr();
                 z.Sub(x, y);
                 return z;
             }
-            public static Fr operator *(Fr x, Fr y)
+            public static Fr operator *(in Fr x, in Fr y)
             {
                 Fr z = new Fr();
                 z.Mul(x, y);
                 return z;
             }
-            public static Fr operator /(Fr x, Fr y)
+            public static Fr operator /(in Fr x, in Fr y)
             {
                 Fr z = new Fr();
                 z.Div(x, y);
@@ -248,19 +265,19 @@ public void SetStr(string s, int ioMode)
             }
             public bool IsValid()
             {
-                return mclBnFp_isValid(ref this) == 1;
+                return mclBnFp_isValid(this) == 1;
             }
-            public bool Equals(Fp rhs)
+            public bool Equals(in Fp rhs)
             {
-                return mclBnFp_isEqual(ref this, ref rhs) == 1;
+                return mclBnFp_isEqual(this, rhs) == 1;
             }
             public bool IsZero()
             {
-                return mclBnFp_isZero(ref this) == 1;
+                return mclBnFp_isZero(this) == 1;
             }
             public bool IsOne()
             {
-                return mclBnFp_isOne(ref this) == 1;
+                return mclBnFp_isOne(this) == 1;
             }
             public void SetByCSPRNG()
             {
@@ -269,61 +286,77 @@ public void SetByCSPRNG()
             public string GetStr(int ioMode)
             {
                 StringBuilder sb = new StringBuilder(1024);
-                long size = mclBnFp_getStr(sb, sb.Capacity, ref this, ioMode);
+                long size = mclBnFp_getStr(sb, sb.Capacity, this, ioMode);
                 if (size == 0) {
                     throw new InvalidOperationException("mclBnFp_getStr:");
                 }
                 return sb.ToString();
             }
-            public void Neg(Fp x)
+            public byte[] Serialize()
+            {
+                byte[] buf = new byte[mclBn_getFpByteSize()];
+                ulong n = mclBnFp_serialize(buf, (ulong)buf.Length, this);
+                if (n != (ulong)buf.Length) {
+                    throw new ArithmeticException("mclBnFp_serialize");
+                }
+                return buf;
+            }
+            public void Deserialize(byte[] buf)
+            {
+                ulong n = mclBnFp_deserialize(ref this, buf, (ulong)buf.Length);
+                if (n == 0) {
+                    throw new ArithmeticException("mclBnFp_deserialize");
+                }
+            }
+            public void Neg(in Fp x)
             {
-                mclBnFp_neg(ref this, ref x);
+                mclBnFp_neg(ref this, x);
             }
-            public void Inv(Fp x)
+            public void Inv(in Fp x)
             {
-                mclBnFp_inv(ref this, ref x);
+                mclBnFp_inv(ref this, x);
             }
-            public void Add(Fp x, Fp y)
+            public void Add(in Fp x, in Fp y)
             {
-                mclBnFp_add(ref this, ref x, ref y);
+                mclBnFp_add(ref this, x, y);
             }
-            public void Sub(Fp x, Fp y)
+            public void Sub(in Fp x, in Fp y)
             {
-                mclBnFp_sub(ref this, ref x, ref y);
+                mclBnFp_sub(ref this, x, y);
             }
-            public void Mul(Fp x, Fp y)
+            public void Mul(in Fp x, in Fp y)
             {
-                mclBnFp_mul(ref this, ref x, ref y);
+                mclBnFp_mul(ref this, x, y);
             }
-            public void Div(Fp x, Fp y)
+            public void Div(in Fp x, in Fp y)
             {
-                mclBnFp_div(ref this, ref x, ref y);
+                mclBnFp_div(ref this, x, y);
             }
-            public static Fp operator -(Fp x)
+            public static Fp operator -(in Fp x)
             {
                 Fp y = new Fp();
                 y.Neg(x);
                 return y;
             }
-            public static Fp operator +(Fp x, Fp y)
+            public static Fp operator +(in Fp x, in Fp y)
             {
                 Fp z = new Fp();
                 z.Add(x, y);
                 return z;
             }
-            public static Fp operator -(Fp x, Fp y)
+            public static Fp operator -(in Fp x, in Fp y)
             {
                 Fp z = new Fp();
                 z.Sub(x, y);
                 return z;
             }
-            public static Fp operator *(Fp x, Fp y)
+            public static Fp operator *(in Fp x, in Fp y)
             {
                 Fp z = new Fp();
                 z.Mul(x, y);
                 return z;
             }
-            public static Fp operator /(Fp x, Fp y)
+            public static Fp operator /(in Fp x, in Fp y)
             {
                 Fp z = new Fp();
                 z.Div(x, y);
@@ -349,15 +382,15 @@ public void SetStr(String s, int ioMode)
             }
             public bool IsValid()
             {
-                return mclBnG1_isValid(ref this) == 1;
+                return mclBnG1_isValid(this) == 1;
             }
-            public bool Equals(G1 rhs)
+            public bool Equals(in G1 rhs)
             {
-                return mclBnG1_isEqual(ref this, ref rhs) == 1;
+                return mclBnG1_isEqual(this, rhs) == 1;
             }
             public bool IsZero()
             {
-                return mclBnG1_isZero(ref this) == 1;
+                return mclBnG1_isZero(this) == 1;
             }
             public void HashAndMapTo(String s)
             {
@@ -368,31 +401,47 @@ public void HashAndMapTo(String s)
             public string GetStr(int ioMode)
             {
                 StringBuilder sb = new StringBuilder(1024);
-                long size = mclBnG1_getStr(sb, sb.Capacity, ref this, ioMode);
+                long size = mclBnG1_getStr(sb, sb.Capacity, this, ioMode);
                 if (size == 0) {
                     throw new InvalidOperationException("mclBnG1_getStr:");
                 }
                 return sb.ToString();
             }
-            public void Neg(G1 x)
+            public byte[] Serialize()
             {
-                mclBnG1_neg(ref this, ref x);
+                byte[] buf = new byte[mclBn_getFpByteSize()];
+                ulong n = mclBnG1_serialize(buf, (ulong)buf.Length, this);
+                if (n != (ulong)buf.Length) {
+                    throw new ArithmeticException("mclBnG1_serialize");
+                }
+                return buf;
+            }
+            public void Deserialize(byte[] buf)
+            {
+                ulong n = mclBnG1_deserialize(ref this, buf, (ulong)buf.Length);
+                if (n == 0) {
+                    throw new ArithmeticException("mclBnG1_deserialize");
+                }
+            }
+            public void Neg(in G1 x)
+            {
+                mclBnG1_neg(ref this, x);
             }
-            public void Dbl(G1 x)
+            public void Dbl(in G1 x)
             {
-                mclBnG1_dbl(ref this, ref x);
+                mclBnG1_dbl(ref this, x);
             }
-            public void Add(G1 x, G1 y)
+            public void Add(in G1 x, in G1 y)
             {
-                mclBnG1_add(ref this, ref x, ref y);
+                mclBnG1_add(ref this, x, y);
             }
-            public void Sub(G1 x, G1 y)
+            public void Sub(in G1 x, in G1 y)
             {
-                mclBnG1_sub(ref this, ref x, ref y);
+                mclBnG1_sub(ref this, x, y);
             }
-            public void Mul(G1 x, Fr y)
+            public void Mul(in G1 x, in Fr y)
             {
-                mclBnG1_mul(ref this, ref x, ref y);
+                mclBnG1_mul(ref this, x, y);
             }
         }
         [StructLayout(LayoutKind.Sequential)]
@@ -410,15 +459,15 @@ public void SetStr(String s, int ioMode)
             }
             public bool IsValid()
             {
-                return mclBnG2_isValid(ref this) == 1;
+                return mclBnG2_isValid(this) == 1;
             }
-            public bool Equals(G2 rhs)
+            public bool Equals(in G2 rhs)
             {
-                return mclBnG2_isEqual(ref this, ref rhs) == 1;
+                return mclBnG2_isEqual(this, rhs) == 1;
             }
             public bool IsZero()
             {
-                return mclBnG2_isZero(ref this) == 1;
+                return mclBnG2_isZero(this) == 1;
             }
             public void HashAndMapTo(String s)
             {
@@ -429,31 +478,47 @@ public void HashAndMapTo(String s)
             public string GetStr(int ioMode)
             {
                 StringBuilder sb = new StringBuilder(1024);
-                long size = mclBnG2_getStr(sb, sb.Capacity, ref this, ioMode);
+                long size = mclBnG2_getStr(sb, sb.Capacity, this, ioMode);
                 if (size == 0) {
                     throw new InvalidOperationException("mclBnG2_getStr:");
                 }
                 return sb.ToString();
             }
-            public void Neg(G2 x)
+            public byte[] Serialize()
+            {
+                byte[] buf = new byte[mclBn_getFpByteSize() * 2];
+                ulong n = mclBnG2_serialize(buf, (ulong)buf.Length, this);
+                if (n != (ulong)buf.Length) {
+                    throw new ArithmeticException("mclBnG2_serialize");
+                }
+                return buf;
+            }
+            public void Deserialize(byte[] buf)
+            {
+                ulong n = mclBnG2_deserialize(ref this, buf, (ulong)buf.Length);
+                if (n == 0) {
+                    throw new ArithmeticException("mclBnG2_deserialize");
+                }
+            }
+            public void Neg(in G2 x)
             {
-                mclBnG2_neg(ref this, ref x);
+                mclBnG2_neg(ref this, x);
             }
-            public void Dbl(G2 x)
+            public void Dbl(in G2 x)
             {
-                mclBnG2_dbl(ref this, ref x);
+                mclBnG2_dbl(ref this, x);
             }
-            public void Add(G2 x, G2 y)
+            public void Add(in G2 x, in G2 y)
             {
-                mclBnG2_add(ref this, ref x, ref y);
+                mclBnG2_add(ref this, x, y);
             }
-            public void Sub(G2 x, G2 y)
+            public void Sub(in G2 x, in G2 y)
             {
-                mclBnG2_sub(ref this, ref x, ref y);
+                mclBnG2_sub(ref this, x, y);
             }
-            public void Mul(G2 x, Fr y)
+            public void Mul(in G2 x, Fr y)
             {
-                mclBnG2_mul(ref this, ref x, ref y);
+                mclBnG2_mul(ref this, x, y);
             }
         }
         [StructLayout(LayoutKind.Sequential)]
@@ -469,96 +534,96 @@ public void SetStr(String s, int ioMode)
                     throw new ArgumentException("mclBnGT_setStr:" + s);
                 }
             }
-            public bool Equals(GT rhs)
+            public bool Equals(in GT rhs)
             {
-                return mclBnGT_isEqual(ref this, ref rhs) == 1;
+                return mclBnGT_isEqual(this, rhs) == 1;
             }
             public bool IsZero()
             {
-                return mclBnGT_isZero(ref this) == 1;
+                return mclBnGT_isZero(this) == 1;
             }
             public bool IsOne()
             {
-                return mclBnGT_isOne(ref this) == 1;
+                return mclBnGT_isOne(this) == 1;
             }
             public string GetStr(int ioMode)
             {
                 StringBuilder sb = new StringBuilder(1024);
-                long size = mclBnGT_getStr(sb, sb.Capacity, ref this, ioMode);
+                long size = mclBnGT_getStr(sb, sb.Capacity, this, ioMode);
                 if (size == 0) {
                     throw new InvalidOperationException("mclBnGT_getStr:");
                 }
                 return sb.ToString();
             }
-            public void Neg(GT x)
+            public void Neg(in GT x)
             {
-                mclBnGT_neg(ref this, ref x);
+                mclBnGT_neg(ref this, x);
             }
-            public void Inv(GT x)
+            public void Inv(in GT x)
             {
-                mclBnGT_inv(ref this, ref x);
+                mclBnGT_inv(ref this, x);
             }
-            public void Add(GT x, GT y)
+            public void Add(in GT x, in GT y)
             {
-                mclBnGT_add(ref this, ref x, ref y);
+                mclBnGT_add(ref this, x, y);
             }
-            public void Sub(GT x, GT y)
+            public void Sub(in GT x, in GT y)
             {
-                mclBnGT_sub(ref this, ref x, ref y);
+                mclBnGT_sub(ref this, x, y);
             }
-            public void Mul(GT x, GT y)
+            public void Mul(in GT x, in GT y)
             {
-                mclBnGT_mul(ref this, ref x, ref y);
+                mclBnGT_mul(ref this, x, y);
             }
-            public void Div(GT x, GT y)
+            public void Div(in GT x, in GT y)
             {
-                mclBnGT_div(ref this, ref x, ref y);
+                mclBnGT_div(ref this, x, y);
             }
-            public static GT operator -(GT x)
+            public static GT operator -(in GT x)
             {
                 GT y = new GT();
                 y.Neg(x);
                 return y;
             }
-            public static GT operator +(GT x, GT y)
+            public static GT operator +(in GT x, in GT y)
             {
                 GT z = new GT();
                 z.Add(x, y);
                 return z;
             }
-            public static GT operator -(GT x, GT y)
+            public static GT operator -(in GT x, in GT y)
             {
                 GT z = new GT();
                 z.Sub(x, y);
                 return z;
             }
-            public static GT operator *(GT x, GT y)
+            public static GT operator *(in GT x, in GT y)
             {
                 GT z = new GT();
                 z.Mul(x, y);
                 return z;
             }
-            public static GT operator /(GT x, GT y)
+            public static GT operator /(in GT x, in GT y)
             {
                 GT z = new GT();
                 z.Div(x, y);
                 return z;
             }
-            public void Pow(GT x, Fr y)
+            public void Pow(in GT x, in Fr y)
             {
-                mclBnGT_pow(ref this, ref x, ref y);
+                mclBnGT_pow(ref this, x, y);
             }
-            public void Pairing(G1 x, G2 y)
+            public void Pairing(in G1 x, in G2 y)
             {
-                mclBn_pairing(ref this, ref x, ref y);
+                mclBn_pairing(ref this, x, y);
             }
-            public void FinalExp(GT x)
+            public void FinalExp(in GT x)
             {
-                mclBn_finalExp(ref this, ref x);
+                mclBn_finalExp(ref this, x);
             }
-            public void MillerLoop(G1 x, G2 y)
+            public void MillerLoop(in G1 x, in G2 y)
             {
-                mclBn_millerLoop(ref this, ref x, ref y);
+                mclBn_millerLoop(ref this, x, y);
             }
         }
     }
diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
index bf1002e5..6e854fa5 100644
--- a/ffi/cs/test/test.cs
+++ b/ffi/cs/test/test.cs
@@ -11,25 +11,15 @@ static void assert(string msg, bool b)
             err++;
         }
         static void Main(string[] args)
-        {
-            Console.WriteLine("BN254");
-            TestCurve(BN254);
-            Console.WriteLine("BN_SNARK");
-            TestCurve(BN_SNARK);
-            Console.WriteLine("BLS12_381");
-            TestCurve(BLS12_381);
-        }
-
-        static void TestCurve(int curveType)
-
         {
             err = 0;
             try {
-                Init(curveType);
-                TestFr();
-                TestG1();
-                TestG2();
-                TestPairing();
+                Console.WriteLine("BN254");
+                TestCurve(BN254);
+                Console.WriteLine("BN_SNARK");
+                TestCurve(BN_SNARK);
+                Console.WriteLine("BLS12_381");
+                TestCurve(BLS12_381);
                 if (err == 0) {
                     Console.WriteLine("all tests succeed");
                 } else {
@@ -39,6 +29,17 @@ static void TestCurve(int curveType)
                 Console.WriteLine("ERR={0}", e);
             }
         }
+
+        static void TestCurve(int curveType)
+
+        {
+            Init(curveType);
+            TestFr();
+            TestFp();
+            TestG1();
+            TestG2();
+            TestPairing();
+        }
         static void TestFr()
         {
             Console.WriteLine("TestFr");
@@ -78,6 +79,56 @@ static void TestFr()
             }
             x.SetStr("1234567891234", 10);
             assert("1234567891234", x.GetStr(10) == "1234567891234");
+            {
+                byte[] buf = x.Serialize();
+                y.Deserialize(buf);
+                assert("x == y", x.Equals(y));
+            }
+        }
+        static void TestFp()
+        {
+            Console.WriteLine("TestFp");
+            Fp x = new Fp();
+            x.Clear();
+            assert("0", x.GetStr(10) == "0");
+            assert("0.IzZero", x.IsZero());
+            assert("!0.IzOne", !x.IsOne());
+            x.SetInt(1);
+            assert("1", x.GetStr(10) == "1");
+            assert("!1.IzZero", !x.IsZero());
+            assert("1.IzOne", x.IsOne());
+            x.SetInt(3);
+            assert("3", x.GetStr(10) == "3");
+            assert("!3.IzZero", !x.IsZero());
+            assert("!3.IzOne", !x.IsOne());
+            x.SetInt(-5);
+            x = -x;
+            assert("5", x.GetStr(10) == "5");
+            x.SetInt(4);
+            x = x * x;
+            assert("16", x.GetStr(10) == "16");
+            assert("10", x.GetStr(16) == "10");
+            Fp y;
+            y = x;
+            assert("x == y", x.Equals(y));
+            x.SetInt(123);
+            assert("123", x.GetStr(10) == "123");
+            assert("7b", x.GetStr(16) == "7b");
+            assert("y != x", !x.Equals(y));
+            Console.WriteLine("exception test");
+            try {
+                x.SetStr("1234567891234x", 10);
+                Console.WriteLine("x = {0}", x);
+            } catch (Exception e) {
+                Console.WriteLine("OK ; expected exception: {0}", e);
+            }
+            x.SetStr("1234567891234", 10);
+            assert("1234567891234", x.GetStr(10) == "1234567891234");
+            {
+                byte[] buf = x.Serialize();
+                y.Deserialize(buf);
+                assert("x == y", x.Equals(y));
+            }
         }
         static void TestG1()
         {
@@ -104,6 +155,12 @@ static void TestG1()
             R.Add(R, P);
             Q.Mul(P, x);
             assert("Q == R", Q.Equals(R));
+            {
+                byte[] buf = P.Serialize();
+                Q.Clear();
+                Q.Deserialize(buf);
+                assert("P == Q", P.Equals(Q));
+            }
         }
         static void TestG2()
         {
@@ -130,6 +187,12 @@ static void TestG2()
             R.Add(R, P);
             Q.Mul(P, x);
             assert("Q == R", Q.Equals(R));
+            {
+                byte[] buf = P.Serialize();
+                Q.Clear();
+                Q.Deserialize(buf);
+                assert("P == Q", P.Equals(Q));
+            }
         }
         static void TestPairing()
         {

From 58898ff5c7c52b6421c12445349978913242d39e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 20 Jul 2020 15:42:28 +0900
Subject: [PATCH 259/553] [C#] add Normalize

---
 ffi/cs/mcl/mcl.cs | 54 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index e0c778e8..84caac3d 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -7,6 +7,7 @@ public class MCL {
         public const int BN254 = 0;
         public const int BN_SNARK = 4;
         public const int BLS12_381 = 5;
+        public const int MCL_MAP_TO_MODE_HASH_TO_CURVE = 5;
         public const int FR_UNIT_SIZE = 4;
         public const int FP_UNIT_SIZE = 6; // 4 if mclbn256.dll is used
 
@@ -16,6 +17,8 @@ public class MCL {
 
         public const string dllName = "mclbn384_256";
         [DllImport(dllName)] public static extern int mclBn_init(int curve, int compiledTimeVar);
+        [DllImport(dllName)] public static extern void mclBn_setETHserialization(int enable);
+        [DllImport(dllName)] public static extern int mclBn_setMapToMode(int mode);
         [DllImport(dllName)] public static extern void mclBnFr_clear(ref Fr x);
         [DllImport(dllName)] public static extern void mclBnFr_setInt(ref Fr y, int x);
         [DllImport(dllName)] public static extern int mclBnFr_setStr(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
@@ -53,6 +56,7 @@ public class MCL {
         [DllImport(dllName)] public static extern void mclBnFp_sub(ref Fp z, in Fp x, in Fp y);
         [DllImport(dllName)] public static extern void mclBnFp_mul(ref Fp z, in Fp x, in Fp y);
         [DllImport(dllName)] public static extern void mclBnFp_div(ref Fp z, in Fp x, in Fp y);
+
         [DllImport(dllName)] public static extern void mclBnG1_clear(ref G1 x);
         [DllImport(dllName)] public static extern int mclBnG1_setStr(ref G1 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
         [DllImport(dllName)] public static extern int mclBnG1_isValid(in G1 x);
@@ -62,9 +66,11 @@ public class MCL {
         [DllImport(dllName)] public static extern long mclBnG1_getStr([Out] StringBuilder buf, long maxBufSize, in G1 x, int ioMode);
         [DllImport(dllName)] public static extern void mclBnG1_neg(ref G1 y, in G1 x);
         [DllImport(dllName)] public static extern void mclBnG1_dbl(ref G1 y, in G1 x);
+        [DllImport(dllName)] public static extern void mclBnG1_normalize(ref G1 y, in G1 x);
         [DllImport(dllName)] public static extern void mclBnG1_add(ref G1 z, in G1 x, in G1 y);
         [DllImport(dllName)] public static extern void mclBnG1_sub(ref G1 z, in G1 x, in G1 y);
         [DllImport(dllName)] public static extern void mclBnG1_mul(ref G1 z, in G1 x, in Fr y);
+
         [DllImport(dllName)] public static extern void mclBnG2_clear(ref G2 x);
         [DllImport(dllName)] public static extern int mclBnG2_setStr(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
         [DllImport(dllName)] public static extern int mclBnG2_isValid(in G2 x);
@@ -74,6 +80,7 @@ public class MCL {
         [DllImport(dllName)] public static extern long mclBnG2_getStr([Out] StringBuilder buf, long maxBufSize, in G2 x, int ioMode);
         [DllImport(dllName)] public static extern void mclBnG2_neg(ref G2 y, in G2 x);
         [DllImport(dllName)] public static extern void mclBnG2_dbl(ref G2 y, in G2 x);
+        [DllImport(dllName)] public static extern void mclBnG2_normalize(ref G2 y, in G2 x);
         [DllImport(dllName)] public static extern void mclBnG2_add(ref G2 z, in G2 x, in G2 y);
         [DllImport(dllName)] public static extern void mclBnG2_sub(ref G2 z, in G2 x, in G2 y);
         [DllImport(dllName)] public static extern void mclBnG2_mul(ref G2 z, in G2 x, in Fr y);
@@ -94,8 +101,10 @@ public class MCL {
         [DllImport(dllName)] public static extern void mclBn_pairing(ref GT z, in G1 x, in G2 y);
         [DllImport(dllName)] public static extern void mclBn_finalExp(ref GT y, in GT x);
         [DllImport(dllName)] public static extern void mclBn_millerLoop(ref GT z, in G1 x, in G2 y);
-        [DllImport(dllName)] public static extern int mclBnFp_setLittleEndianMod(ref Fp y, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-        [DllImport(dllName)] public static extern int mclBnFr_setLittleEndianMod(ref Fr y, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnFp_setLittleEndianMod(ref Fp y, [In] byte[] buf, ulong bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnFr_setLittleEndianMod(ref Fr y, [In] byte[] buf, ulong bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnFp_setBigEndianMod(ref Fp y, [In] byte[] buf, ulong bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnFr_setBigEndianMod(ref Fr y, [In] byte[] buf, ulong bufSize);
         [DllImport(dllName)] public static extern int mclBn_getFrByteSize();
         [DllImport(dllName)] public static extern int mclBn_getFpByteSize();
         [DllImport(dllName)] public static extern ulong mclBnFp_serialize([Out] byte[] buf, ulong maxBufSize, in Fp x);
@@ -117,6 +126,11 @@ public static void Init(int curveType = BN254)
                 throw new ArgumentException("mclBn_init");
             }
         }
+        public static void ETHmode()
+        {
+            mclBn_setETHserialization(1);
+            mclBn_setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE);
+        }
         [StructLayout(LayoutKind.Sequential)]
         struct U128 {
             private ulong v0, v1;
@@ -190,6 +204,20 @@ public void Deserialize(byte[] buf)
                     throw new ArithmeticException("mclBnFr_deserialize");
                 }
             }
+            public void SetLittleEndianMod(byte[] buf)
+            {
+                ulong n = mclBnFr_setLittleEndianMod(ref this, buf, (ulong)buf.Length);
+                if (n == 0) {
+                    throw new ArithmeticException("mclBnFr_setLittleEndianMod");
+                }
+            }
+            public void SetBigEndianMod(byte[] buf)
+            {
+                ulong n = mclBnFr_setBigEndianMod(ref this, buf, (ulong)buf.Length);
+                if (n == 0) {
+                    throw new ArithmeticException("mclBnFr_setBigEndianMod");
+                }
+            }
 
             public void Neg(in Fr x)
             {
@@ -308,6 +336,20 @@ public void Deserialize(byte[] buf)
                     throw new ArithmeticException("mclBnFp_deserialize");
                 }
             }
+            public void SetLittleEndianMod(byte[] buf)
+            {
+                ulong n = mclBnFp_setLittleEndianMod(ref this, buf, (ulong)buf.Length);
+                if (n == 0) {
+                    throw new ArithmeticException("mclBnFp_setLittleEndianMod");
+                }
+            }
+            public void SetBigEndianMod(byte[] buf)
+            {
+                ulong n = mclBnFp_setBigEndianMod(ref this, buf, (ulong)buf.Length);
+                if (n == 0) {
+                    throw new ArithmeticException("mclBnFp_setBigEndianMod");
+                }
+            }
             public void Neg(in Fp x)
             {
                 mclBnFp_neg(ref this, x);
@@ -431,6 +473,10 @@ public void Dbl(in G1 x)
             {
                 mclBnG1_dbl(ref this, x);
             }
+            public void Normalize(in G1 x)
+            {
+                mclBnG1_normalize(ref this, x);
+            }
             public void Add(in G1 x, in G1 y)
             {
                 mclBnG1_add(ref this, x, y);
@@ -508,6 +554,10 @@ public void Dbl(in G2 x)
             {
                 mclBnG2_dbl(ref this, x);
             }
+            public void Normalize(in G2 x)
+            {
+                mclBnG2_normalize(ref this, x);
+            }
             public void Add(in G2 x, in G2 y)
             {
                 mclBnG2_add(ref this, x, y);

From aa9ace2f82a5130d050f32f8e3f7be07ac21156a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 20 Jul 2020 15:46:46 +0900
Subject: [PATCH 260/553] [C#] add readme.md

---
 ffi/cs/readme.md | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 ffi/cs/readme.md

diff --git a/ffi/cs/readme.md b/ffi/cs/readme.md
new file mode 100644
index 00000000..e5f27856
--- /dev/null
+++ b/ffi/cs/readme.md
@@ -0,0 +1,11 @@
+# C# binding of mcl library
+
+# How to build `bin/mclbn384_256.dll`.
+
+```
+git clone https://github.com/herumi/mcl
+cd mcl
+mklib dll
+```
+
+Open `ffi/cs/mcl.sln` and Set the directory of `mcl/bin` to `workingDirectory` at `Debug` of test project.

From d4d9091e540f30bccc0493ec246dc03c7fea26dd Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 21 Jul 2020 15:41:37 +0900
Subject: [PATCH 261/553] change the return type of getCurveParam

---
 include/mcl/curve_type.h       | 19 +++++++++----------
 include/mcl/impl/bn_c_impl.hpp |  5 +++--
 sample/she_make_dlp_table.cpp  |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/mcl/curve_type.h b/include/mcl/curve_type.h
index 454f8d8c..1a2d7593 100644
--- a/include/mcl/curve_type.h
+++ b/include/mcl/curve_type.h
@@ -96,19 +96,18 @@ const CurveParam BN160 = { "0x4000000031", 3, 4, false, MCL_BN160 };
 	#pragma GCC diagnostic push
 	#pragma GCC diagnostic ignored "-Wreturn-type-c-linkage"
 #endif
-inline const CurveParam& getCurveParam(int type)
+inline const CurveParam* getCurveParam(int type)
 {
 	switch (type) {
-	case MCL_BN254: return mcl::BN254;
-	case MCL_BN381_1: return mcl::BN381_1;
-	case MCL_BN381_2: return mcl::BN381_2;
-	case MCL_BN462: return mcl::BN462;
-	case MCL_BN_SNARK1: return mcl::BN_SNARK1;
-	case MCL_BLS12_381: return mcl::BLS12_381;
-	case MCL_BN160: return mcl::BN160;
+	case MCL_BN254: return &mcl::BN254;
+	case MCL_BN381_1: return &mcl::BN381_1;
+	case MCL_BN381_2: return &mcl::BN381_2;
+	case MCL_BN462: return &mcl::BN462;
+	case MCL_BN_SNARK1: return &mcl::BN_SNARK1;
+	case MCL_BLS12_381: return &mcl::BLS12_381;
+	case MCL_BN160: return &mcl::BN160;
 	default:
-		assert(0);
-		return mcl::BN254;
+		return 0;
 	}
 }
 #ifdef __clang__
diff --git a/include/mcl/impl/bn_c_impl.hpp b/include/mcl/impl/bn_c_impl.hpp
index cf0b5475..fcaa151b 100644
--- a/include/mcl/impl/bn_c_impl.hpp
+++ b/include/mcl/impl/bn_c_impl.hpp
@@ -77,9 +77,10 @@ int mclBn_init(int curve, int compiledTimeVar)
 		initG1only(&b, *para);
 		return b ? 0 : -1;
 	}
-	const mcl::CurveParam& cp = mcl::getCurveParam(curve);
+	const mcl::CurveParam* cp = mcl::getCurveParam(curve);
+	if (cp == 0) return -1;
 	bool b;
-	initPairing(&b, cp);
+	initPairing(&b, *cp);
 	return b ? 0 : -1;
 }
 
diff --git a/sample/she_make_dlp_table.cpp b/sample/she_make_dlp_table.cpp
index 41f18e22..86dfff2c 100644
--- a/sample/she_make_dlp_table.cpp
+++ b/sample/she_make_dlp_table.cpp
@@ -30,7 +30,7 @@ void makeTable(const Param& param, const char *groupStr, HashTable& hashTbl, con
 
 void run(const Param& param)
 {
-	SHE::init(mcl::getCurveParam(param.curveType));
+	SHE::init(*mcl::getCurveParam(param.curveType));
 
 	switch (param.group) {
 	case 1:

From 5b221f05eb1ff23d0002ff27a4df81e9e431b173 Mon Sep 17 00:00:00 2001
From: jonny rhea <jonathan.rhea@gmail.com>
Date: Thu, 16 Jul 2020 16:10:16 -0500
Subject: [PATCH 262/553] fixes a CALL16 reloc error when compiling with mip
 toolchain.

---
 src/gen.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gen.cpp b/src/gen.cpp
index e60d404d..3ede9fe4 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -671,7 +671,6 @@ struct Code : public mcl::Generator {
 		Operand y(Int, unit);
 		std::string name = "mulPv" + cybozu::itoa(bit) + "x" + cybozu::itoa(unit);
 		mulPvM[bit] = Function(name, z, px, y);
-		mulPvM[bit].setPrivate();
 		verifyAndSetPrivate(mulPvM[bit]);
 		beginFunc(mulPvM[bit]);
 		OperandVec L(N), H(N);

From 4a23b96b30fa0d4fd122bc03a806d6efa8ef54e5 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 24 Jul 2020 17:22:17 +0900
Subject: [PATCH 263/553] link to pull/82

---
 src/gen.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gen.cpp b/src/gen.cpp
index 3ede9fe4..ca8af98c 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -671,6 +671,8 @@ struct Code : public mcl::Generator {
 		Operand y(Int, unit);
 		std::string name = "mulPv" + cybozu::itoa(bit) + "x" + cybozu::itoa(unit);
 		mulPvM[bit] = Function(name, z, px, y);
+		// workaround at https://github.com/herumi/mcl/pull/82
+//		mulPvM[bit].setPrivate();
 		verifyAndSetPrivate(mulPvM[bit]);
 		beginFunc(mulPvM[bit]);
 		OperandVec L(N), H(N);

From b4d8a29e5d312e255919e7a0491cb66a9f92a698 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 24 Jul 2020 18:08:29 +0900
Subject: [PATCH 264/553] [C#] add static member

---
 ffi/cs/mcl/mcl.cs | 230 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 197 insertions(+), 33 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 84caac3d..49483eee 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -33,6 +33,7 @@ public class MCL {
 
         [DllImport(dllName)] public static extern void mclBnFr_neg(ref Fr y, in Fr x);
         [DllImport(dllName)] public static extern void mclBnFr_inv(ref Fr y, in Fr x);
+        [DllImport(dllName)] public static extern void mclBnFr_sqr(ref Fr y, in Fr x);
         [DllImport(dllName)] public static extern void mclBnFr_add(ref Fr z, in Fr x, in Fr y);
         [DllImport(dllName)] public static extern void mclBnFr_sub(ref Fr z, in Fr x, in Fr y);
         [DllImport(dllName)] public static extern void mclBnFr_mul(ref Fr z, in Fr x, in Fr y);
@@ -52,6 +53,7 @@ public class MCL {
 
         [DllImport(dllName)] public static extern void mclBnFp_neg(ref Fp y, in Fp x);
         [DllImport(dllName)] public static extern void mclBnFp_inv(ref Fp y, in Fp x);
+        [DllImport(dllName)] public static extern void mclBnFp_sqr(ref Fp y, in Fp x);
         [DllImport(dllName)] public static extern void mclBnFp_add(ref Fp z, in Fp x, in Fp y);
         [DllImport(dllName)] public static extern void mclBnFp_sub(ref Fp z, in Fp x, in Fp y);
         [DllImport(dllName)] public static extern void mclBnFp_mul(ref Fp z, in Fp x, in Fp y);
@@ -93,6 +95,7 @@ public class MCL {
         [DllImport(dllName)] public static extern long mclBnGT_getStr([Out] StringBuilder buf, long maxBufSize, in GT x, int ioMode);
         [DllImport(dllName)] public static extern void mclBnGT_neg(ref GT y, in GT x);
         [DllImport(dllName)] public static extern void mclBnGT_inv(ref GT y, in GT x);
+        [DllImport(dllName)] public static extern void mclBnGT_sqr(ref GT y, in GT x);
         [DllImport(dllName)] public static extern void mclBnGT_add(ref GT z, in GT x, in GT y);
         [DllImport(dllName)] public static extern void mclBnGT_sub(ref GT z, in GT x, in GT y);
         [DllImport(dllName)] public static extern void mclBnGT_mul(ref GT z, in GT x, in GT y);
@@ -131,6 +134,155 @@ public static void ETHmode()
             mclBn_setETHserialization(1);
             mclBn_setMapToMode(MCL_MAP_TO_MODE_HASH_TO_CURVE);
         }
+        public static void Add(ref Fr z, in Fr x, in Fr y)
+        {
+            mclBnFr_add(ref z, x, y);
+        }
+        public static void Sub(ref Fr z, in Fr x, in Fr y)
+        {
+            mclBnFr_sub(ref z, x, y);
+        }
+        public static void Mul(ref Fr z, in Fr x, in Fr y)
+        {
+            mclBnFr_mul(ref z, x, y);
+        }
+        public static void Div(ref Fr z, in Fr x, in Fr y)
+        {
+            mclBnFr_div(ref z, x, y);
+        }
+        public static void Neg(ref Fr y, in Fr x)
+        {
+            mclBnFr_neg(ref y, x);
+        }
+        public static void Inv(ref Fr y, in Fr x)
+        {
+            mclBnFr_inv(ref y, x);
+        }
+        public static void Sqr(ref Fr y, in Fr x)
+        {
+            mclBnFr_sqr(ref y, x);
+        }
+
+        public static void Add(ref Fp z, in Fp x, in Fp y)
+        {
+            mclBnFp_add(ref z, x, y);
+        }
+        public static void Sub(ref Fp z, in Fp x, in Fp y)
+        {
+            mclBnFp_sub(ref z, x, y);
+        }
+        public static void Mul(ref Fp z, in Fp x, in Fp y)
+        {
+            mclBnFp_mul(ref z, x, y);
+        }
+        public static void Div(ref Fp z, in Fp x, in Fp y)
+        {
+            mclBnFp_div(ref z, x, y);
+        }
+        public static void Neg(ref Fp y, in Fp x)
+        {
+            mclBnFp_neg(ref y, x);
+        }
+        public static void Inv(ref Fp y, in Fp x)
+        {
+            mclBnFp_inv(ref y, x);
+        }
+        public static void Sqr(ref Fp y, in Fp x)
+        {
+            mclBnFp_sqr(ref y, x);
+        }
+        public static void Add(ref G1 z, in G1 x, in G1 y)
+        {
+            mclBnG1_add(ref z, x, y);
+        }
+        public static void Sub(ref G1 z, in G1 x, in G1 y)
+        {
+            mclBnG1_sub(ref z, x, y);
+        }
+        public static void Mul(ref G1 z, in G1 x, in Fr y)
+        {
+            mclBnG1_mul(ref z, x, y);
+        }
+        public static void Neg(ref G1 y, in G1 x)
+        {
+            mclBnG1_neg(ref y, x);
+        }
+        public static void Dbl(ref G1 y, in G1 x)
+        {
+            mclBnG1_dbl(ref y, x);
+        }
+        public static void Normalize(ref G1 y, in G1 x)
+        {
+            mclBnG1_normalize(ref y, x);
+        }
+        public static void Add(ref G2 z, in G2 x, in G2 y)
+        {
+            mclBnG2_add(ref z, x, y);
+        }
+        public static void Sub(ref G2 z, in G2 x, in G2 y)
+        {
+            mclBnG2_sub(ref z, x, y);
+        }
+        public static void Mul(ref G2 z, in G2 x, in Fr y)
+        {
+            mclBnG2_mul(ref z, x, y);
+        }
+        public static void Neg(ref G2 y, in G2 x)
+        {
+            mclBnG2_neg(ref y, x);
+        }
+        public static void Dbl(ref G2 y, in G2 x)
+        {
+            mclBnG2_dbl(ref y, x);
+        }
+        public static void Normalize(ref G2 y, in G2 x)
+        {
+            mclBnG2_normalize(ref y, x);
+        }
+        public static void Add(ref GT z, in GT x, in GT y)
+        {
+            mclBnGT_add(ref z, x, y);
+        }
+        public static void Sub(ref GT z, in GT x, in GT y)
+        {
+            mclBnGT_sub(ref z, x, y);
+        }
+        public static void Mul(ref GT z, in GT x, in GT y)
+        {
+            mclBnGT_mul(ref z, x, y);
+        }
+        public static void Div(ref GT z, in GT x, in GT y)
+        {
+            mclBnGT_div(ref z, x, y);
+        }
+        public static void Neg(ref GT y, in GT x)
+        {
+            mclBnGT_neg(ref y, x);
+        }
+        public static void Inv(ref GT y, in GT x)
+        {
+            mclBnGT_inv(ref y, x);
+        }
+        public static void Sqr(ref GT y, in GT x)
+        {
+            mclBnGT_sqr(ref y, x);
+        }
+        public static void Pow(ref GT z, in GT x, in Fr y)
+        {
+            mclBnGT_pow(ref z, x, y);
+        }
+        public static void Pairing(ref GT z, in G1 x, in G2 y)
+        {
+            mclBn_pairing(ref z, x, y);
+        }
+        public static void FinalExp(ref GT y, in GT x)
+        {
+            mclBn_finalExp(ref y, x);
+        }
+        public static void MillerLoop(ref GT z, in G1 x, in G2 y)
+        {
+            mclBn_millerLoop(ref z, x, y);
+        }
         [StructLayout(LayoutKind.Sequential)]
         struct U128 {
             private ulong v0, v1;
@@ -227,21 +379,25 @@ public void Inv(in Fr x)
             {
                 mclBnFr_inv(ref this, x);
             }
+            public void Sqr(in Fr x)
+            {
+                MCL.Sqr(ref this, x);
+            }
             public void Add(in Fr x, in Fr y)
             {
-                mclBnFr_add(ref this, x, y);
+                MCL.Add(ref this, x, y);
             }
             public void Sub(in Fr x, in Fr y)
             {
-                mclBnFr_sub(ref this, x, y);
+                MCL.Sub(ref this, x, y);
             }
             public void Mul(in Fr x, in Fr y)
             {
-                mclBnFr_mul(ref this, x, y);
+                MCL.Mul(ref this, x, y);
             }
             public void Div(in Fr x, in Fr y)
             {
-                mclBnFr_div(ref this, x, y);
+                MCL.Div(ref this, x, y);
             }
             public static Fr operator -(in Fr x)
             {
@@ -352,27 +508,31 @@ public void SetBigEndianMod(byte[] buf)
             }
             public void Neg(in Fp x)
             {
-                mclBnFp_neg(ref this, x);
+                MCL.Neg(ref this, x);
             }
             public void Inv(in Fp x)
             {
-                mclBnFp_inv(ref this, x);
+                MCL.Inv(ref this, x);
+            }
+            public void Sqr(in Fp x)
+            {
+                MCL.Sqr(ref this, x);
             }
             public void Add(in Fp x, in Fp y)
             {
-                mclBnFp_add(ref this, x, y);
+                MCL.Add(ref this, x, y);
             }
             public void Sub(in Fp x, in Fp y)
             {
-                mclBnFp_sub(ref this, x, y);
+                MCL.Sub(ref this, x, y);
             }
             public void Mul(in Fp x, in Fp y)
             {
-                mclBnFp_mul(ref this, x, y);
+                MCL.Mul(ref this, x, y);
             }
             public void Div(in Fp x, in Fp y)
             {
-                mclBnFp_div(ref this, x, y);
+                MCL.Div(ref this, x, y);
             }
             public static Fp operator -(in Fp x)
             {
@@ -467,27 +627,27 @@ public void Deserialize(byte[] buf)
             }
             public void Neg(in G1 x)
             {
-                mclBnG1_neg(ref this, x);
+                MCL.Neg(ref this, x);
             }
             public void Dbl(in G1 x)
             {
-                mclBnG1_dbl(ref this, x);
+                MCL.Dbl(ref this, x);
             }
             public void Normalize(in G1 x)
             {
-                mclBnG1_normalize(ref this, x);
+                MCL.Normalize(ref this, x);
             }
             public void Add(in G1 x, in G1 y)
             {
-                mclBnG1_add(ref this, x, y);
+                MCL.Add(ref this, x, y);
             }
             public void Sub(in G1 x, in G1 y)
             {
-                mclBnG1_sub(ref this, x, y);
+                MCL.Sub(ref this, x, y);
             }
             public void Mul(in G1 x, in Fr y)
             {
-                mclBnG1_mul(ref this, x, y);
+                MCL.Mul(ref this, x, y);
             }
         }
         [StructLayout(LayoutKind.Sequential)]
@@ -548,27 +708,27 @@ public void Deserialize(byte[] buf)
             }
             public void Neg(in G2 x)
             {
-                mclBnG2_neg(ref this, x);
+                MCL.Neg(ref this, x);
             }
             public void Dbl(in G2 x)
             {
-                mclBnG2_dbl(ref this, x);
+                MCL.Dbl(ref this, x);
             }
             public void Normalize(in G2 x)
             {
-                mclBnG2_normalize(ref this, x);
+                MCL.Normalize(ref this, x);
             }
             public void Add(in G2 x, in G2 y)
             {
-                mclBnG2_add(ref this, x, y);
+                MCL.Add(ref this, x, y);
             }
             public void Sub(in G2 x, in G2 y)
             {
-                mclBnG2_sub(ref this, x, y);
+                MCL.Sub(ref this, x, y);
             }
-            public void Mul(in G2 x, Fr y)
+            public void Mul(in G2 x, in Fr y)
             {
-                mclBnG2_mul(ref this, x, y);
+                MCL.Mul(ref this, x, y);
             }
         }
         [StructLayout(LayoutKind.Sequential)]
@@ -607,27 +767,31 @@ public string GetStr(int ioMode)
             }
             public void Neg(in GT x)
             {
-                mclBnGT_neg(ref this, x);
+                MCL.Neg(ref this, x);
             }
             public void Inv(in GT x)
             {
-                mclBnGT_inv(ref this, x);
+                MCL.Inv(ref this, x);
+            }
+            public void Sqr(in GT x)
+            {
+                MCL.Sqr(ref this, x);
             }
             public void Add(in GT x, in GT y)
             {
-                mclBnGT_add(ref this, x, y);
+                MCL.Add(ref this, x, y);
             }
             public void Sub(in GT x, in GT y)
             {
-                mclBnGT_sub(ref this, x, y);
+                MCL.Sub(ref this, x, y);
             }
             public void Mul(in GT x, in GT y)
             {
-                mclBnGT_mul(ref this, x, y);
+                MCL.Mul(ref this, x, y);
             }
             public void Div(in GT x, in GT y)
             {
-                mclBnGT_div(ref this, x, y);
+                MCL.Div(ref this, x, y);
             }
             public static GT operator -(in GT x)
             {
@@ -661,19 +825,19 @@ public void Div(in GT x, in GT y)
             }
             public void Pow(in GT x, in Fr y)
             {
-                mclBnGT_pow(ref this, x, y);
+                MCL.Pow(ref this, x, y);
             }
             public void Pairing(in G1 x, in G2 y)
             {
-                mclBn_pairing(ref this, x, y);
+                MCL.Pairing(ref this, x, y);
             }
             public void FinalExp(in GT x)
             {
-                mclBn_finalExp(ref this, x);
+                MCL.FinalExp(ref this, x);
             }
             public void MillerLoop(in G1 x, in G2 y)
             {
-                mclBn_millerLoop(ref this, x, y);
+                MCL.MillerLoop(ref this, x, y);
             }
         }
     }

From 68ec65aec1c54def219dc0b1df31a8e958d42cf3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 24 Jul 2020 18:14:34 +0900
Subject: [PATCH 265/553] [C#] update doc

---
 ffi/cs/readme.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ffi/cs/readme.md b/ffi/cs/readme.md
index e5f27856..a9872a74 100644
--- a/ffi/cs/readme.md
+++ b/ffi/cs/readme.md
@@ -9,3 +9,7 @@ mklib dll
 ```
 
 Open `ffi/cs/mcl.sln` and Set the directory of `mcl/bin` to `workingDirectory` at `Debug` of test project.
+
+# Remark
+- `bn256.cs` is an old code. It will be removed in the future.
+- `mcl/mcl.cs` is a new version. It support `BN254`, `BN_SNARK` and `BLS12_381` curve, which requires `mclbn384_256.dll`.
\ No newline at end of file

From 2e989e6d7c539981727dd4b13da4e62de21fd195 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 24 Jul 2020 20:48:41 +0900
Subject: [PATCH 266/553] [C#] how to init for ETH

---
 ffi/cs/readme.md | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/ffi/cs/readme.md b/ffi/cs/readme.md
index a9872a74..9fcae915 100644
--- a/ffi/cs/readme.md
+++ b/ffi/cs/readme.md
@@ -8,8 +8,17 @@ cd mcl
 mklib dll
 ```
 
-Open `ffi/cs/mcl.sln` and Set the directory of `mcl/bin` to `workingDirectory` at `Debug` of test project.
+Open `mcl/ffi/cs/mcl.sln` and Set the directory of `mcl/bin` to `workingDirectory` at `Debug` of test project.
 
 # Remark
 - `bn256.cs` is an old code. It will be removed in the future.
-- `mcl/mcl.cs` is a new version. It support `BN254`, `BN_SNARK` and `BLS12_381` curve, which requires `mclbn384_256.dll`.
\ No newline at end of file
+- `mcl/mcl.cs` is a new version. It support `BN254`, `BN_SNARK` and `BLS12_381` curve, which requires `mclbn384_256.dll`.
+
+# `ETHmode` with `BLS12_381`
+
+If you need the map-to-G1/G2 function defined in [Hashing to Elliptic Curves](https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.html),
+then initialize this library as the followings:
+```
+MCL.Init(BLS12_381);
+MCL.ETHmode();
+```

From b3c2480266eb4a203bcadefae2baac9c12efcbb2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 28 Jul 2020 11:42:23 +0900
Subject: [PATCH 267/553] [C#] add test of mapToG1

---
 ffi/cs/mcl/mcl.cs   |  8 ++++----
 ffi/cs/test/test.cs | 31 +++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 49483eee..035b225b 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -567,11 +567,11 @@ public void Div(in Fp x, in Fp y)
         }
         [StructLayout(LayoutKind.Sequential)]
         public struct Fp2 {
-            private Fp a, b;
+            public Fp a, b;
         }
         [StructLayout(LayoutKind.Sequential)]
         public struct G1 {
-            private Fp x, y, z;
+            public Fp x, y, z;
             public void Clear()
             {
                 mclBnG1_clear(ref this);
@@ -652,7 +652,7 @@ public void Mul(in G1 x, in Fr y)
         }
         [StructLayout(LayoutKind.Sequential)]
         public struct G2 {
-            private Fp2 x, y, z;
+            public Fp2 x, y, z;
             public void Clear()
             {
                 mclBnG2_clear(ref this);
@@ -733,7 +733,7 @@ public void Mul(in G2 x, in Fr y)
         }
         [StructLayout(LayoutKind.Sequential)]
         public struct GT {
-            private Fp v00, v01, v02, v03, v04, v05, v06, v07, v08, v09, v10, v11;
+            public Fp v00, v01, v02, v03, v04, v05, v06, v07, v08, v09, v10, v11;
             public void Clear()
             {
                 mclBnGT_clear(ref this);
diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
index 6e854fa5..832d9697 100644
--- a/ffi/cs/test/test.cs
+++ b/ffi/cs/test/test.cs
@@ -20,6 +20,9 @@ static void Main(string[] args)
                 TestCurve(BN_SNARK);
                 Console.WriteLine("BLS12_381");
                 TestCurve(BLS12_381);
+                Console.WriteLine("BLS12_381 eth");
+                ETHmode();
+                TestETH();
                 if (err == 0) {
                     Console.WriteLine("all tests succeed");
                 } else {
@@ -220,5 +223,33 @@ static void TestPairing()
             e3.Pow(e1, b);
             assert("e2.Equals(e3)", e2.Equals(e3));
         }
+        static void TestETH_mapToG1()
+        {
+            var tbl = new[] {
+                new {
+                    msg = "asdf",
+                    dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_",
+                    x = "bc73d15443009a8ff2ddce864136d892274dd8365c60d0d2d44cc543387348e366a8f1e1401427e37743c29ed2c939a",
+                    y = "101e26428a1b78c05458cb1cc37d2d87876ad3437096d2827f376702d4451667fe1fa82e82795495d33d466133ed1862",
+                },
+           };
+            G1 P = new G1();
+            Fp x = new Fp();
+            Fp y = new Fp();
+            foreach (var v in tbl) {
+                P.HashAndMapTo(v.msg);
+                x.SetStr(v.x, 16);
+                y.SetStr(v.y, 16);
+                Normalize(ref P, P);
+                Console.WriteLine("x={0}", P.x.GetStr(16));
+                Console.WriteLine("y={0}", P.y.GetStr(16));
+                assert("P.x", P.x.Equals(x));
+                assert("P.y", P.y.Equals(y));
+            }
+        }
+        static void TestETH()
+        {
+            TestETH_mapToG1();
+        }
     }
 }

From 9eb8c7d1f57e60e34c970decc0594244581d491b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 28 Jul 2020 15:36:31 +0900
Subject: [PATCH 268/553] update cybozulib

_
---
 include/cybozu/atomic.hpp           |  4 +++-
 include/cybozu/exception.hpp        |  4 +++-
 include/cybozu/file.hpp             | 17 +++++++++++------
 include/cybozu/mutex.hpp            |  3 +++
 include/cybozu/random_generator.hpp |  6 ++++--
 include/cybozu/socket.hpp           | 19 +++++++++++++------
 6 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/include/cybozu/atomic.hpp b/include/cybozu/atomic.hpp
index 4ecade13..d26c8251 100644
--- a/include/cybozu/atomic.hpp
+++ b/include/cybozu/atomic.hpp
@@ -8,7 +8,9 @@
 */
 #include <cybozu/inttype.hpp>
 #ifdef _WIN32
-#include <winsock2.h>
+#ifndef WIN32_LEAN_AND_MEAN
+	#define WIN32_LEAN_AND_MEAN
+#endif
 #include <windows.h>
 #include <intrin.h>
 #else
diff --git a/include/cybozu/exception.hpp b/include/cybozu/exception.hpp
index 247ba4de..f5b044f6 100644
--- a/include/cybozu/exception.hpp
+++ b/include/cybozu/exception.hpp
@@ -42,7 +42,9 @@ class Exception {
 #include <errno.h>
 #include <stdio.h>
 #ifdef _WIN32
-	#include <winsock2.h>
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
 	#include <windows.h>
 #else
 	#include <string.h> // for strerror_r
diff --git a/include/cybozu/file.hpp b/include/cybozu/file.hpp
index ff17b6fc..dd96dce9 100644
--- a/include/cybozu/file.hpp
+++ b/include/cybozu/file.hpp
@@ -4,6 +4,7 @@
 	@brief file class and operations
 
 	@author MITSUNARI Shigeo(@herumi)
+	@remark mingw requires -lshlwapi option
 */
 
 #include <assert.h>
@@ -17,10 +18,15 @@
 	#include <fcntl.h>
 	#include <shlobj.h>
 	#include <direct.h>
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
 	#include <windows.h>
-	#pragma comment(lib, "shlwapi.lib")
-	#pragma comment(lib, "shell32.lib")
-	#pragma comment(lib, "User32.lib")
+	#ifdef _MSC_VER
+		#pragma comment(lib, "shlwapi.lib")
+		#pragma comment(lib, "shell32.lib")
+		#pragma comment(lib, "User32.lib")
+	#endif
 #else
 	#include <stdio.h>
 	#include <unistd.h>
@@ -302,13 +308,12 @@ class File {
 			posMode = FILE_BEGIN;
 			break;
 		case std::ios::cur:
+		default:
 			posMode = FILE_CURRENT;
 			break;
 		case std::ios::end:
 			posMode = FILE_END;
 			break;
-		default:
-			__assume(0);
 		}
 		bool isOK = SetFilePointerEx(hdl_, largePos, NULL, posMode) != 0;
 #else
@@ -318,10 +323,10 @@ class File {
 			whence = SEEK_SET;
 			break;
 		case std::ios::cur:
+		default:
 			whence = SEEK_CUR;
 			break;
 		case std::ios::end:
-		default:
 			whence = SEEK_END;
 			break;
 		}
diff --git a/include/cybozu/mutex.hpp b/include/cybozu/mutex.hpp
index acde6bcb..508fb57b 100644
--- a/include/cybozu/mutex.hpp
+++ b/include/cybozu/mutex.hpp
@@ -8,6 +8,9 @@
 */
 
 #ifdef _WIN32
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
 	#include <windows.h>
 #else
 	#include <pthread.h>
diff --git a/include/cybozu/random_generator.hpp b/include/cybozu/random_generator.hpp
index 66900f97..a711916f 100644
--- a/include/cybozu/random_generator.hpp
+++ b/include/cybozu/random_generator.hpp
@@ -11,7 +11,9 @@
 #include <cybozu/exception.hpp>
 #endif
 #ifdef _WIN32
-#include <winsock2.h>
+#ifndef WIN32_LEAN_AND_MEAN
+	#define WIN32_LEAN_AND_MEAN
+#endif
 #include <windows.h>
 #include <wincrypt.h>
 #ifdef _MSC_VER
@@ -33,7 +35,7 @@ class RandomGenerator {
 		: prov_(0)
 	{
 		DWORD flagTbl[] = { CRYPT_VERIFYCONTEXT | CRYPT_SILENT, 0, CRYPT_MACHINE_KEYSET };
-		for (int i = 0; i < CYBOZU_NUM_OF_ARRAY(flagTbl); i++) {
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(flagTbl); i++) {
 			if (CryptAcquireContext(&prov_, NULL, NULL, PROV_RSA_FULL, flagTbl[i]) != 0) return;
 		}
 #ifdef CYBOZU_DONT_USE_EXCEPTION
diff --git a/include/cybozu/socket.hpp b/include/cybozu/socket.hpp
index b470c940..c7493bd2 100644
--- a/include/cybozu/socket.hpp
+++ b/include/cybozu/socket.hpp
@@ -5,17 +5,24 @@
 
 	@author MITSUNARI Shigeo(@herumi)
 	@author MITSUNARI Shigeo
+	@remark mingw requires -lws2_32 option
 */
 #include <errno.h>
 #include <assert.h>
 #include <stdio.h>
 #ifdef _WIN32
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
+	#include <windows.h>
 	#include <winsock2.h>
 	#include <ws2tcpip.h> // for socklen_t
-	#pragma comment(lib, "ws2_32.lib")
-	#pragma comment(lib, "iphlpapi.lib")
-	#pragma warning(push)
-	#pragma warning(disable : 4127) // constant condition
+	#ifdef _MSC_VER
+		#pragma comment(lib, "ws2_32.lib")
+		#pragma comment(lib, "iphlpapi.lib")
+		#pragma warning(push)
+		#pragma warning(disable : 4127) // constant condition
+	#endif
 #else
 	#include <unistd.h>
 	#include <sys/socket.h>
@@ -613,7 +620,7 @@ class Socket {
 	*/
 	int queryAcceptNoThrow(int msec = 1000, bool checkWrite = true)
 	{
-		if (sd_ < 0) return -EBADF;
+		if (sd_ == INVALID_SOCKET) return -EBADF;
 #ifdef CYBOZU_SOCKET_USE_EPOLL
 		int err;
 		experimental::Epoll ep;
@@ -773,6 +780,6 @@ class Socket {
 
 } // cybozu
 
-#ifdef _WIN32
+#ifdef _MSC_VER
 	#pragma warning(pop)
 #endif

From 49e6633eaa9bda8a9713fb9ad3a23c1fcf205329 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 29 Jul 2020 10:45:24 +0900
Subject: [PATCH 269/553] add test of isValidOrder

---
 test/bls12_test.cpp | 113 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 1ea05186..723bf3ac 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -729,6 +729,119 @@ CYBOZU_TEST_AUTO(deserialize)
 	}
 }
 
+CYBOZU_TEST_AUTO(verifyG1)
+{
+	const char *ok_x = "ad50e39253e0de4fad89440f01f1874c8bc91fdcd59ad66162984b10690e51ccf4d95e4222df14549d745d8b971199";
+	const char *ok_y = "2f76c6f3a006f0bbfb88c02a4643702ff52ff34c1fcb59af611b7f1cf47938ffbf2c68a6e31a40bf668544087374f70";
+
+	const char *ng_x = "1534fc82e2566c826b195314b32bf47576c24632444450d701de2601cec0c0d6b6090e7227850005e81f54039066602b";
+	const char *ng_y = "15899715142d265027d1a9fba8f2f10a3f21938071b4bbdb5dce8c5caa0d93588482d33d9a62bcbbd23ab6af6d689710";
+
+	Fp x, y;
+	G1 P, Q;
+	char buf[128];
+	size_t n;
+	P.x.setStr(ok_x, 16);
+	P.y.setStr(ok_y, 16);
+	P.z = 1;
+
+	// valid point, valid order
+	verifyOrderG1(false);
+	CYBOZU_TEST_ASSERT(P.isValid());
+	CYBOZU_TEST_ASSERT(P.isValidOrder());
+	n = P.serialize(buf, sizeof(buf));
+	n = Q.deserialize(buf, n);
+	CYBOZU_TEST_ASSERT(n > 0);
+	CYBOZU_TEST_EQUAL(P, Q);
+
+	verifyOrderG1(true);
+	CYBOZU_TEST_ASSERT(P.isValid());
+	CYBOZU_TEST_ASSERT(P.isValidOrder());
+	Q.clear();
+	n = Q.deserialize(buf, n);
+	CYBOZU_TEST_ASSERT(n > 0);
+	CYBOZU_TEST_EQUAL(P, Q);
+
+	// invalid point
+	P.z = 2;
+	CYBOZU_TEST_ASSERT(!P.isValid());
+
+	// valid point, invalid order
+	verifyOrderG1(false);
+	P.x.setStr(ng_x, 16);
+	P.y.setStr(ng_y, 16);
+	P.z = 1;
+	CYBOZU_TEST_ASSERT(P.isValid());
+	CYBOZU_TEST_ASSERT(!P.isValidOrder());
+	n = P.serialize(buf, sizeof(buf));
+	n = Q.deserialize(buf, n);
+	CYBOZU_TEST_ASSERT(n > 0); // success because of no-check the order
+	CYBOZU_TEST_EQUAL(P, Q);
+
+	verifyOrderG1(true);
+	CYBOZU_TEST_ASSERT(!P.isValid()); // fail because of invalid order
+	Q.clear();
+	n = Q.deserialize(buf, n); // fail because of invalid order
+	CYBOZU_TEST_ASSERT(n == 0);
+}
+
+CYBOZU_TEST_AUTO(verifyG2)
+{
+	const char *ok_x = "1400ddb63494b2f3717d8706a834f928323cef590dd1f2bc8edaf857889e82c9b4cf242324526c9045bc8fec05f98fe9 14b38e10fd6d2d63dfe704c3f0b1741474dfeaef88d6cdca4334413320701c74e5df8c7859947f6901c0a3c30dba23c9";
+	const char *ok_y = "187452296c28d5206880d2a86e8c7fc79df88e20b906a1fc1d5855da6b2b4ae6f8c83a591e2e5350753d2d7fe3c7b4 9c205210f33e9cdaaa4630b3f6fad29744224e5100456973fcaf031cdbce8ad3f71d42af3f7733a3985d3a3d2f4be53";
+
+	const char *ng_x = "717f18d36bd40d090948f2d4dac2a03f6469d234f4beb75f67e66d51ea5540652189c61d01d1cfe3f5e9318e48bdf8a 13fc0389cb74ad6c8875c34f85e2bb93ca1bed48c14f2dd0f5cd741853014fe278c9551a9ac5850f678a423664f8287f";
+	const char *ng_y = "5412e6cef6b7189f31810c0cbac6b6350b18691be1fefed131a033f2df393b9c3a423c605666226c1efa833de11363b 101ed6eafbf85be7273ec5aec3471aa2c1018d7463cc48dfe9a7c872a7745e81317c88ce0c89a9086975feb4a2749074";
+
+	Fp x, y;
+	G2 P, Q;
+	char buf[128];
+	size_t n;
+	P.x.setStr(ok_x, 16);
+	P.y.setStr(ok_y, 16);
+	P.z = 1;
+
+	// valid point, valid order
+	verifyOrderG2(false);
+	CYBOZU_TEST_ASSERT(P.isValid());
+	CYBOZU_TEST_ASSERT(P.isValidOrder());
+	n = P.serialize(buf, sizeof(buf));
+	n = Q.deserialize(buf, n);
+	CYBOZU_TEST_ASSERT(n > 0);
+	CYBOZU_TEST_EQUAL(P, Q);
+
+	verifyOrderG2(true);
+	CYBOZU_TEST_ASSERT(P.isValid());
+	CYBOZU_TEST_ASSERT(P.isValidOrder());
+	Q.clear();
+	n = Q.deserialize(buf, n);
+	CYBOZU_TEST_ASSERT(n > 0);
+	CYBOZU_TEST_EQUAL(P, Q);
+
+	// invalid point
+	P.z = 2;
+	CYBOZU_TEST_ASSERT(!P.isValid());
+
+	// valid point, invalid order
+	verifyOrderG2(false);
+	P.x.setStr(ng_x, 16);
+	P.y.setStr(ng_y, 16);
+	P.z = 1;
+	CYBOZU_TEST_ASSERT(P.isValid());
+	CYBOZU_TEST_ASSERT(!P.isValidOrder());
+	n = P.serialize(buf, sizeof(buf));
+	n = Q.deserialize(buf, n);
+	CYBOZU_TEST_ASSERT(n > 0); // success because of no-check the order
+	CYBOZU_TEST_EQUAL(P, Q);
+
+	verifyOrderG2(true);
+	CYBOZU_TEST_ASSERT(!P.isValid()); // fail because of invalid order
+	Q.clear();
+	n = Q.deserialize(buf, n); // fail because of invalid order
+	CYBOZU_TEST_ASSERT(n == 0);
+}
+
+
 typedef std::vector<Fp> FpVec;
 
 void f(FpVec& zv, const FpVec& xv, const FpVec& yv)

From 9b8677498998faa323ef699256ee1b89db27093f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 4 Aug 2020 14:15:46 +0900
Subject: [PATCH 270/553] update xbyak v5.941

---
 src/xbyak/xbyak.h          | 377 +++++++++++++++++++++----------------
 src/xbyak/xbyak_mnemonic.h | 156 ++++++++-------
 src/xbyak/xbyak_util.h     |  26 ++-
 3 files changed, 319 insertions(+), 240 deletions(-)

diff --git a/src/xbyak/xbyak.h b/src/xbyak/xbyak.h
index 63efccd7..41894d00 100644
--- a/src/xbyak/xbyak.h
+++ b/src/xbyak/xbyak.h
@@ -72,13 +72,17 @@
 	#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
 #endif
 #ifdef _WIN32
-	#include <winsock2.h>
+	#ifndef WIN32_LEAN_AND_MEAN
+		#define WIN32_LEAN_AND_MEAN
+	#endif
 	#include <windows.h>
 	#include <malloc.h>
+	#define XBYAK_TLS __declspec(thread)
 #elif defined(__GNUC__)
 	#include <unistd.h>
 	#include <sys/mman.h>
 	#include <stdlib.h>
+	#define XBYAK_TLS __thread
 #endif
 #if defined(__APPLE__) && !defined(XBYAK_DONT_USE_MAP_JIT)
 	#define XBYAK_USE_MAP_JIT
@@ -120,7 +124,7 @@ namespace Xbyak {
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x5912 /* 0xABCD = A.BC(D) */
+	VERSION = 0x5941 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
@@ -194,9 +198,80 @@ enum {
 	ERR_INVALID_RIP_IN_AUTO_GROW,
 	ERR_INVALID_MIB_ADDRESS,
 	ERR_X2APIC_IS_NOT_SUPPORTED,
+	ERR_NOT_SUPPORTED,
 	ERR_INTERNAL // Put it at last.
 };
 
+inline const char *ConvertErrorToString(int err)
+{
+	static const char *errTbl[] = {
+		"none",
+		"bad addressing",
+		"code is too big",
+		"bad scale",
+		"esp can't be index",
+		"bad combination",
+		"bad size of register",
+		"imm is too big",
+		"bad align",
+		"label is redefined",
+		"label is too far",
+		"label is not found",
+		"code is not copyable",
+		"bad parameter",
+		"can't protect",
+		"can't use 64bit disp(use (void*))",
+		"offset is too big",
+		"MEM size is not specified",
+		"bad mem size",
+		"bad st combination",
+		"over local label",
+		"under local label",
+		"can't alloc",
+		"T_SHORT is not supported in AutoGrow",
+		"bad protect mode",
+		"bad pNum",
+		"bad tNum",
+		"bad vsib addressing",
+		"can't convert",
+		"label is not set by L()",
+		"label is already set by L()",
+		"bad label string",
+		"err munmap",
+		"opmask is already set",
+		"rounding is already set",
+		"k0 is invalid",
+		"evex is invalid",
+		"sae(suppress all exceptions) is invalid",
+		"er(embedded rounding) is invalid",
+		"invalid broadcast",
+		"invalid opmask with memory",
+		"invalid zero",
+		"invalid rip in AutoGrow",
+		"invalid mib address",
+		"x2APIC is not supported",
+		"not supported",
+		"internal error"
+	};
+	assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
+	return err <= ERR_INTERNAL ? errTbl[err] : "unknown err";
+}
+
+#ifdef XBYAK_NO_EXCEPTION
+namespace local {
+
+static XBYAK_TLS int l_err = 0;
+inline void SetError(int err) { if (err) l_err = err; } // keep the first err code
+
+} // local
+
+inline void ClearError() { local::l_err = 0; }
+inline int GetError() { return local::l_err; }
+
+#define XBYAK_THROW(err) { local::SetError(err); return; }
+#define XBYAK_THROW_RET(err, r) { local::SetError(err); return r; }
+
+#else
 class Error : public std::exception {
 	int err_;
 public:
@@ -209,65 +284,24 @@ class Error : public std::exception {
 	operator int() const { return err_; }
 	const char *what() const throw()
 	{
-		static const char *errTbl[] = {
-			"none",
-			"bad addressing",
-			"code is too big",
-			"bad scale",
-			"esp can't be index",
-			"bad combination",
-			"bad size of register",
-			"imm is too big",
-			"bad align",
-			"label is redefined",
-			"label is too far",
-			"label is not found",
-			"code is not copyable",
-			"bad parameter",
-			"can't protect",
-			"can't use 64bit disp(use (void*))",
-			"offset is too big",
-			"MEM size is not specified",
-			"bad mem size",
-			"bad st combination",
-			"over local label",
-			"under local label",
-			"can't alloc",
-			"T_SHORT is not supported in AutoGrow",
-			"bad protect mode",
-			"bad pNum",
-			"bad tNum",
-			"bad vsib addressing",
-			"can't convert",
-			"label is not set by L()",
-			"label is already set by L()",
-			"bad label string",
-			"err munmap",
-			"opmask is already set",
-			"rounding is already set",
-			"k0 is invalid",
-			"evex is invalid",
-			"sae(suppress all exceptions) is invalid",
-			"er(embedded rounding) is invalid",
-			"invalid broadcast",
-			"invalid opmask with memory",
-			"invalid zero",
-			"invalid rip in AutoGrow",
-			"invalid mib address",
-			"x2APIC is not supported",
-			"internal error"
-		};
-		assert(err_ <= ERR_INTERNAL);
-		assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
-		return errTbl[err_];
+		return ConvertErrorToString(err_);
 	}
 };
 
+// dummy functions
+inline void ClearError() { }
+inline int GetError() { return 0; }
+
 inline const char *ConvertErrorToString(const Error& err)
 {
 	return err.what();
 }
 
+#define XBYAK_THROW(err) { throw Error(err); }
+#define XBYAK_THROW_RET(err, r) { throw Error(err); }
+
+#endif
+
 inline void *AlignedMalloc(size_t size, size_t alignment)
 {
 #ifdef __MINGW32__
@@ -307,7 +341,7 @@ inline bool IsInInt32(uint64 x) { return ~uint64(0x7fffffffu) <= x || x <= 0x7FF
 inline uint32 VerifyInInt32(uint64 x)
 {
 #ifdef XBYAK64
-	if (!IsInInt32(x)) throw Error(ERR_OFFSET_IS_TOO_BIG);
+	if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0)
 #endif
 	return static_cast<uint32>(x);
 }
@@ -375,7 +409,7 @@ class MmapAllocator : Allocator {
 		#error "not supported"
 #endif
 		void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, -1, 0);
-		if (p == MAP_FAILED) throw Error(ERR_CANT_ALLOC);
+		if (p == MAP_FAILED) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
 		assert(p);
 		sizeList_[(uintptr_t)p] = size;
 		return (uint8*)p;
@@ -384,8 +418,8 @@ class MmapAllocator : Allocator {
 	{
 		if (p == 0) return;
 		SizeList::iterator i = sizeList_.find((uintptr_t)p);
-		if (i == sizeList_.end()) throw Error(ERR_BAD_PARAMETER);
-		if (munmap((void*)i->first, i->second) < 0) throw Error(ERR_MUNMAP);
+		if (i == sizeList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER)
+		if (munmap((void*)i->first, i->second) < 0) XBYAK_THROW(ERR_MUNMAP)
 		sizeList_.erase(i);
 	}
 };
@@ -397,8 +431,8 @@ class Reg;
 class Operand {
 	static const uint8 EXT8BIT = 0x20;
 	unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil
-	unsigned int kind_:9;
-	unsigned int bit_:10;
+	unsigned int kind_:10;
+	unsigned int bit_:14;
 protected:
 	unsigned int zero_:1;
 	unsigned int mask_:3;
@@ -415,7 +449,8 @@ class Operand {
 		YMM = 1 << 5,
 		ZMM = 1 << 6,
 		OPMASK = 1 << 7,
-		BNDREG = 1 << 8
+		BNDREG = 1 << 8,
+		TMM = 1 << 9
 	};
 	enum Code {
 #ifdef XBYAK64
@@ -445,6 +480,7 @@ class Operand {
 	bool isXMM() const { return is(XMM); }
 	bool isYMM() const { return is(YMM); }
 	bool isZMM() const { return is(ZMM); }
+	bool isTMM() const { return is(TMM); }
 	bool isXMEM() const { return is(XMM | MEM); }
 	bool isYMEM() const { return is(YMM | MEM); }
 	bool isZMEM() const { return is(ZMM | MEM); }
@@ -463,20 +499,20 @@ class Operand {
 	int getRounding() const { return rounding_; }
 	void setKind(Kind kind)
 	{
-		if ((kind & (XMM|YMM|ZMM)) == 0) return;
+		if ((kind & (XMM|YMM|ZMM|TMM)) == 0) return;
 		kind_ = kind;
-		bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512;
+		bit_ = kind == XMM ? 128 : kind == YMM ? 256 : kind == ZMM ? 512 : 8192;
 	}
 	// err if MMX/FPU/OPMASK/BNDREG
 	void setBit(int bit);
 	void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true)
 	{
-		if (mask_) throw Error(ERR_OPMASK_IS_ALREADY_SET);
+		if (mask_) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET)
 		mask_ = idx;
 	}
 	void setRounding(int idx)
 	{
-		if (rounding_) throw Error(ERR_ROUNDING_IS_ALREADY_SET);
+		if (rounding_) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET)
 		rounding_ = idx;
 	}
 	void setZero() { zero_ = true; }
@@ -513,6 +549,11 @@ class Operand {
 		} else if (isOPMASK()) {
 			static const char *tbl[8] = { "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" };
 			return tbl[idx];
+		} else if (isTMM()) {
+			static const char *tbl[8] = {
+				"tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7"
+			};
+			return tbl[idx];
 		} else if (isZMM()) {
 			static const char *tbl[32] = {
 				"zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15",
@@ -541,7 +582,7 @@ class Operand {
 			static const char *tbl[4] = { "bnd0", "bnd1", "bnd2", "bnd3" };
 			return tbl[idx];
 		}
-		throw Error(ERR_INTERNAL);
+		XBYAK_THROW_RET(ERR_INTERNAL, 0);
 	}
 	bool isEqualIfNotInherited(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_ && zero_ == rhs.zero_ && mask_ == rhs.mask_ && rounding_ == rhs.rounding_; }
 	bool operator==(const Operand& rhs) const;
@@ -552,13 +593,13 @@ class Operand {
 
 inline void Operand::setBit(int bit)
 {
-	if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512) goto ERR;
+	if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512 && bit != 8192) goto ERR;
 	if (isBit(bit)) return;
 	if (is(MEM | OPMASK)) {
 		bit_ = bit;
 		return;
 	}
-	if (is(REG | XMM | YMM | ZMM)) {
+	if (is(REG | XMM | YMM | ZMM | TMM)) {
 		int idx = getIdx();
 		// err if converting ah, bh, ch, dh
 		if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit()) goto ERR;
@@ -580,6 +621,7 @@ inline void Operand::setBit(int bit)
 		case 128: kind = XMM; break;
 		case 256: kind = YMM; break;
 		case 512: kind = ZMM; break;
+		case 8192: kind = TMM; break;
 		}
 		idx_ = idx;
 		kind_ = kind;
@@ -590,7 +632,7 @@ inline void Operand::setBit(int bit)
 		return;
 	}
 ERR:
-	throw Error(ERR_CANT_CONVERT);
+	XBYAK_THROW(ERR_CANT_CONVERT)
 }
 
 class Label;
@@ -674,6 +716,12 @@ struct Zmm : public Ymm {
 	Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; }
 };
 
+#ifdef XBYAK64
+struct Tmm : public Reg {
+	explicit Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { }
+};
+#endif
+
 struct Opmask : public Reg {
 	explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
 };
@@ -718,11 +766,11 @@ struct RegRip {
 		return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
 	}
 	friend const RegRip operator+(const RegRip& r, const Label& label) {
-		if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING);
+		if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
 		return RegRip(r.disp_, &label);
 	}
 	friend const RegRip operator+(const RegRip& r, const void *addr) {
-		if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING);
+		if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
 		return RegRip(r.disp_ + (sint64)addr, 0, true);
 	}
 };
@@ -782,9 +830,9 @@ class RegExp {
 		: scale_(scale)
 		, disp_(0)
 	{
-		if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
+		if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM|Reg::TMM)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
 		if (scale == 0) return;
-		if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw Error(ERR_BAD_SCALE);
+		if (scale != 1 && scale != 2 && scale != 4 && scale != 8) XBYAK_THROW(ERR_BAD_SCALE)
 		if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index
 			index_ = r;
 		} else {
@@ -812,10 +860,10 @@ class RegExp {
 	size_t getDisp() const { return disp_; }
 	void verify() const
 	{
-		if (base_.getBit() >= 128) throw Error(ERR_BAD_SIZE_OF_REGISTER);
+		if (base_.getBit() >= 128) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
 		if (index_.getBit() && index_.getBit() <= 64) {
-			if (index_.getIdx() == Operand::ESP) throw Error(ERR_ESP_CANT_BE_INDEX);
-			if (base_.getBit() && base_.getBit() != index_.getBit()) throw Error(ERR_BAD_SIZE_OF_REGISTER);
+			if (index_.getIdx() == Operand::ESP) XBYAK_THROW(ERR_ESP_CANT_BE_INDEX)
+			if (base_.getBit() && base_.getBit() != index_.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
 		}
 	}
 	friend RegExp operator+(const RegExp& a, const RegExp& b);
@@ -838,12 +886,12 @@ class RegExp {
 
 inline RegExp operator+(const RegExp& a, const RegExp& b)
 {
-	if (a.index_.getBit() && b.index_.getBit()) throw Error(ERR_BAD_ADDRESSING);
+	if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
 	RegExp ret = a;
 	if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; }
 	if (b.base_.getBit()) {
 		if (ret.base_.getBit()) {
-			if (ret.index_.getBit()) throw Error(ERR_BAD_ADDRESSING);
+			if (ret.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
 			// base + base => base + index * 1
 			ret.index_ = b.base_;
 			// [reg + esp] => [esp + reg]
@@ -917,7 +965,7 @@ class CodeArray {
 	{
 		const size_t newSize = (std::max<size_t>)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2);
 		uint8 *newTop = alloc_->alloc(newSize);
-		if (newTop == 0) throw Error(ERR_CANT_ALLOC);
+		if (newTop == 0) XBYAK_THROW(ERR_CANT_ALLOC)
 		for (size_t i = 0; i < size_; i++) newTop[i] = top_[i];
 		alloc_->free(top_);
 		top_ = newTop;
@@ -949,10 +997,10 @@ class CodeArray {
 		, size_(0)
 		, isCalledCalcJmpAddress_(false)
 	{
-		if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
+		if (maxSize_ > 0 && top_ == 0) XBYAK_THROW(ERR_CANT_ALLOC)
 		if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
 			alloc_->free(top_);
-			throw Error(ERR_CANT_PROTECT);
+			XBYAK_THROW(ERR_CANT_PROTECT)
 		}
 	}
 	virtual ~CodeArray()
@@ -966,7 +1014,7 @@ class CodeArray {
 	{
 		bool isOK = protect(top_, maxSize_, mode);
 		if (isOK) return true;
-		if (throwException) throw Error(ERR_CANT_PROTECT);
+		if (throwException) XBYAK_THROW_RET(ERR_CANT_PROTECT, false)
 		return false;
 	}
 	bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
@@ -983,7 +1031,7 @@ class CodeArray {
 			if (type_ == AUTO_GROW) {
 				growMemory();
 			} else {
-				throw Error(ERR_CODE_IS_TOO_BIG);
+				XBYAK_THROW(ERR_CODE_IS_TOO_BIG)
 			}
 		}
 		top_[size_++] = static_cast<uint8>(code);
@@ -994,7 +1042,7 @@ class CodeArray {
 	}
 	void db(uint64 code, size_t codeSize)
 	{
-		if (codeSize > 8) throw Error(ERR_BAD_PARAMETER);
+		if (codeSize > 8) XBYAK_THROW(ERR_BAD_PARAMETER)
 		for (size_t i = 0; i < codeSize; i++) db(static_cast<uint8>(code >> (i * 8)));
 	}
 	void dw(uint32 code) { db(code, 2); }
@@ -1009,7 +1057,7 @@ class CodeArray {
 	size_t getSize() const { return size_; }
 	void setSize(size_t size)
 	{
-		if (size > maxSize_) throw Error(ERR_OFFSET_IS_TOO_BIG);
+		if (size > maxSize_) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
 		size_ = size;
 	}
 	void dump() const
@@ -1042,7 +1090,7 @@ class CodeArray {
 	void rewrite(size_t offset, uint64 disp, size_t size)
 	{
 		assert(offset < maxSize_);
-		if (size != 1 && size != 2 && size != 4 && size != 8) throw Error(ERR_BAD_PARAMETER);
+		if (size != 1 && size != 2 && size != 4 && size != 8) XBYAK_THROW(ERR_BAD_PARAMETER)
 		uint8 *const data = top_ + offset;
 		for (size_t i = 0; i < size; i++) {
 			data[i] = static_cast<uint8>(disp >> (i * 8));
@@ -1271,7 +1319,7 @@ class LabelManager {
 		// add label
 		typename DefList::value_type item(labelId, addrOffset);
 		std::pair<typename DefList::iterator, bool> ret = defList.insert(item);
-		if (!ret.second) throw Error(ERR_LABEL_IS_REDEFINED);
+		if (!ret.second) XBYAK_THROW(ERR_LABEL_IS_REDEFINED)
 		// search undefined label
 		for (;;) {
 			typename UndefList::iterator itr = undefList.find(labelId);
@@ -1286,9 +1334,9 @@ class LabelManager {
 			} else {
 				disp = addrOffset - jmp->endOfJmp + jmp->disp;
 #ifdef XBYAK64
-				if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) throw Error(ERR_OFFSET_IS_TOO_BIG);
+				if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
 #endif
-				if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32)disp)) throw Error(ERR_LABEL_IS_TOO_FAR);
+				if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
 			}
 			if (base_->isAutoGrow()) {
 				base_->save(offset, disp, jmp->jmpSize, jmp->mode);
@@ -1360,6 +1408,7 @@ class LabelManager {
 		clabelDefList_.clear();
 		clabelUndefList_.clear();
 		resetLabelPtrList();
+		ClearError();
 	}
 	void enterLocal()
 	{
@@ -1367,14 +1416,14 @@ class LabelManager {
 	}
 	void leaveLocal()
 	{
-		if (stateList_.size() <= 2) throw Error(ERR_UNDER_LOCAL_LABEL);
-		if (hasUndefinedLabel_inner(stateList_.back().undefList)) throw Error(ERR_LABEL_IS_NOT_FOUND);
+		if (stateList_.size() <= 2) XBYAK_THROW(ERR_UNDER_LOCAL_LABEL)
+		if (hasUndefinedLabel_inner(stateList_.back().undefList)) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND)
 		stateList_.pop_back();
 	}
 	void set(CodeArray *base) { base_ = base; }
 	void defineSlabel(std::string label)
 	{
-		if (label == "@b" || label == "@f") throw Error(ERR_BAD_LABEL_STR);
+		if (label == "@b" || label == "@f") XBYAK_THROW(ERR_BAD_LABEL_STR)
 		if (label == "@@") {
 			SlabelDefList& defList = stateList_.front().defList;
 			SlabelDefList::iterator i = defList.find("@f");
@@ -1401,7 +1450,7 @@ class LabelManager {
 	void assign(Label& dst, const Label& src)
 	{
 		ClabelDefList::const_iterator i = clabelDefList_.find(src.id);
-		if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
+		if (i == clabelDefList_.end()) XBYAK_THROW(ERR_LABEL_ISNOT_SET_BY_L)
 		define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
 		dst.mgr = this;
 		labelPtrList_.insert(&dst);
@@ -1413,7 +1462,7 @@ class LabelManager {
 			if (defList.find("@f") != defList.end()) {
 				label = "@f";
 			} else if (defList.find("@b") == defList.end()) {
-				throw Error(ERR_LABEL_IS_NOT_FOUND);
+				XBYAK_THROW_RET(ERR_LABEL_IS_NOT_FOUND, false)
 			}
 		} else if (label == "@f") {
 			if (defList.find("@f") != defList.end()) {
@@ -1456,7 +1505,7 @@ inline Label::Label(const Label& rhs)
 }
 inline Label& Label::operator=(const Label& rhs)
 {
-	if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
+	if (id) XBYAK_THROW_RET(ERR_LABEL_IS_ALREADY_SET_BY_L, *this)
 	id = rhs.id;
 	mgr = rhs.mgr;
 	if (mgr) mgr->incRefCount(id, this);
@@ -1485,7 +1534,7 @@ class CodeGenerator : public CodeArray {
 	CodeGenerator operator=(const CodeGenerator&); // don't call
 #ifdef XBYAK64
 	enum { i32e = 32 | 64, BIT = 64 };
-	static const size_t dummyAddr = (size_t(0x11223344) << 32) | 55667788;
+	static const uint64 dummyAddr = uint64(0x1122334455667788ull);
 	typedef Reg64 NativeReg;
 #else
 	enum { i32e = 32, BIT = 32 };
@@ -1532,7 +1581,7 @@ class CodeGenerator : public CodeArray {
 		uint8 rex = 0;
 		const Operand *p1 = &op1, *p2 = &op2;
 		if (p1->isMEM()) std::swap(p1, p2);
-		if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION);
+		if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
 		if (p2->isMEM()) {
 			const Address& addr = p2->getAddress();
 			if (BIT == 64 && addr.is32bit()) db(0x67);
@@ -1592,7 +1641,7 @@ class CodeGenerator : public CodeArray {
 		bool r = reg.isExtIdx();
 		bool b = base.isExtIdx();
 		int idx = v ? v->getIdx() : 0;
-		if ((idx | reg.getIdx() | base.getIdx()) >= 16) throw Error(ERR_BAD_COMBINATION);
+		if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION)
 		uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
 		uint32 vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
 		if (!b && !x && !w && (type & T_0F)) {
@@ -1606,23 +1655,23 @@ class CodeGenerator : public CodeArray {
 	void verifySAE(const Reg& r, int type) const
 	{
 		if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
-		throw Error(ERR_SAE_IS_INVALID);
+		XBYAK_THROW(ERR_SAE_IS_INVALID)
 	}
 	void verifyER(const Reg& r, int type) const
 	{
 		if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
-		throw Error(ERR_ER_IS_INVALID);
+		XBYAK_THROW(ERR_ER_IS_INVALID)
 	}
 	// (a, b, c) contains non zero two or three values then err
 	int verifyDuplicate(int a, int b, int c, int err)
 	{
 		int v = a | b | c;
-		if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
+		if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0)
 		return v;
 	}
 	int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
 	{
-		if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
+		if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
 		int w = (type & T_EW1) ? 1 : 0;
 		uint32 mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
 		uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
@@ -1680,10 +1729,10 @@ class CodeGenerator : public CodeArray {
 	}
 	void setSIB(const RegExp& e, int reg, int disp8N = 0)
 	{
-		size_t disp64 = e.getDisp();
+		uint64 disp64 = e.getDisp();
 #ifdef XBYAK64
-		size_t high = disp64 >> 32;
-		if (high != 0 && high != 0xFFFFFFFF) throw Error(ERR_OFFSET_IS_TOO_BIG);
+		uint64 high = disp64 >> 32;
+		if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
 #endif
 		uint32 disp = static_cast<uint32>(disp64);
 		const Reg& base = e.getBase();
@@ -1743,23 +1792,23 @@ class CodeGenerator : public CodeArray {
 	}
 	void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
 	{
-		if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
+		if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
 		rex(addr, reg);
 		db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
 		opAddr(addr, reg.getIdx(), immSize);
 	}
 	void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
 	{
-		if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
-		if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
+		if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
+		if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
 		rex(addr, reg);
 		db(code0); if (code1 != NONE) db(code1);
 		opAddr(addr, reg.getIdx());
 	}
 	void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
 	{
-		if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
-		if (addr.getMode() != Address::M_ModRM) throw Error(ERR_INVALID_MIB_ADDRESS);
+		if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
+		if (addr.getMode() != Address::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS)
 		if (BIT == 64 && addr.is32bit()) db(0x67);
 		const RegExp& regExp = addr.getRegExp(false);
 		uint8 rex = regExp.getRex();
@@ -1775,7 +1824,7 @@ class CodeGenerator : public CodeArray {
 		if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) {
 			db(shortCode); db(disp - shortJmpSize);
 		} else {
-			if (type == T_SHORT) throw Error(ERR_LABEL_IS_TOO_FAR);
+			if (type == T_SHORT) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
 			if (longPref) db(longPref);
 			db(longCode); dd(disp - longJmpSize);
 		}
@@ -1805,7 +1854,7 @@ class CodeGenerator : public CodeArray {
 	void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref = 0)
 	{
 		if (isAutoGrow()) {
-			if (!isNEAR(type)) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW);
+			if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW)
 			if (size_ + 16 >= maxSize_) growMemory();
 			if (longPref) db(longPref);
 			db(longCode);
@@ -1821,7 +1870,7 @@ class CodeGenerator : public CodeArray {
 	// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
 	void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
 	{
-		if (!permitVisb && addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING);
+		if (!permitVisb && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
 		if (addr.getMode() == Address::M_ModRM) {
 			setSIB(addr.getRegExp(), reg, disp8N);
 		} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
@@ -1831,7 +1880,7 @@ class CodeGenerator : public CodeArray {
 			} else {
 				size_t disp = addr.getDisp();
 				if (addr.getMode() == Address::M_ripAddr) {
-					if (isAutoGrow()) throw Error(ERR_INVALID_RIP_IN_AUTO_GROW);
+					if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW)
 					disp -= (size_t)getCurr() + 4 + immSize;
 				}
 				dd(inner::VerifyInInt32(disp));
@@ -1841,7 +1890,7 @@ class CodeGenerator : public CodeArray {
 	/* preCode is for SSSE3/SSE4 */
 	void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE)
 	{
-		if (isValid && !isValid(reg, op)) throw Error(ERR_BAD_COMBINATION);
+		if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
 		if (pref != NONE) db(pref);
 		if (op.isMEM()) {
 			opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
@@ -1868,7 +1917,7 @@ class CodeGenerator : public CodeArray {
 		} else if (op1.isMEM() && op2.isXMM()) {
 			opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1);
 		} else {
-			throw Error(ERR_BAD_COMBINATION);
+			XBYAK_THROW(ERR_BAD_COMBINATION)
 		}
 	}
 	void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false)
@@ -1889,7 +1938,7 @@ class CodeGenerator : public CodeArray {
 		} else if (op.isMEM()) {
 			opModM(op.getAddress(), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize);
 		} else {
-			throw Error(ERR_BAD_COMBINATION);
+			XBYAK_THROW(ERR_BAD_COMBINATION)
 		}
 	}
 	void opShift(const Operand& op, int imm, int ext)
@@ -1900,7 +1949,7 @@ class CodeGenerator : public CodeArray {
 	}
 	void opShift(const Operand& op, const Reg8& _cl, int ext)
 	{
-		if (_cl.getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION);
+		if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
 		opR_ModM(op, 0, ext, 0xD2);
 	}
 	void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
@@ -1910,12 +1959,12 @@ class CodeGenerator : public CodeArray {
 		} else if (condM) {
 			opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize);
 		} else {
-			throw Error(ERR_BAD_COMBINATION);
+			XBYAK_THROW(ERR_BAD_COMBINATION)
 		}
 	}
 	void opShxd(const Operand& op, const Reg& reg, uint8 imm, int code, const Reg8 *_cl = 0)
 	{
-		if (_cl && _cl->getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION);
+		if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
 		opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1);
 		if (!_cl) db(imm);
 	}
@@ -1934,7 +1983,7 @@ class CodeGenerator : public CodeArray {
 		verifyMemHasSize(op);
 		uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
 		if (op.isBit(8)) immBit = 8;
-		if (op.getBit() < immBit) throw Error(ERR_IMM_IS_TOO_BIG);
+		if (op.getBit() < immBit) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
 		if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
 		if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al
 			rex(op);
@@ -1976,21 +2025,21 @@ class CodeGenerator : public CodeArray {
 				return;
 			}
 		}
-		throw Error(ERR_BAD_COMBINATION);
+		XBYAK_THROW(ERR_BAD_COMBINATION)
 	}
 	void verifyMemHasSize(const Operand& op) const
 	{
-		if (op.isMEM() && op.getBit() == 0) throw Error(ERR_MEM_SIZE_IS_NOT_SPECIFIED);
+		if (op.isMEM() && op.getBit() == 0) XBYAK_THROW(ERR_MEM_SIZE_IS_NOT_SPECIFIED)
 	}
 	/*
 		mov(r, imm) = db(imm, mov_imm(r, imm))
 	*/
-	int mov_imm(const Reg& reg, size_t imm)
+	int mov_imm(const Reg& reg, uint64 imm)
 	{
 		int bit = reg.getBit();
 		const int idx = reg.getIdx();
 		int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3);
-		if (bit == 64 && (imm & ~size_t(0xffffffffu)) == 0) {
+		if (bit == 64 && (imm & ~uint64(0xffffffffu)) == 0) {
 			rex(Reg32(idx));
 			bit = 32;
 		} else {
@@ -2027,19 +2076,19 @@ class CodeGenerator : public CodeArray {
 	}
 	void opMovxx(const Reg& reg, const Operand& op, uint8 code)
 	{
-		if (op.isBit(32)) throw Error(ERR_BAD_COMBINATION);
+		if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION)
 		int w = op.isBit(16);
 #ifdef XBYAK64
-		if (op.isHigh8bit()) throw Error(ERR_BAD_COMBINATION);
+		if (op.isHigh8bit()) XBYAK_THROW(ERR_BAD_COMBINATION)
 #endif
 		bool cond = reg.isREG() && (reg.getBit() > op.getBit());
 		opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
 	}
 	void opFpuMem(const Address& addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext)
 	{
-		if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
+		if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
 		uint8 code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
-		if (!code) throw Error(ERR_BAD_MEM_SIZE);
+		if (!code) XBYAK_THROW(ERR_BAD_MEM_SIZE)
 		if (m64ext && addr.isBit(64)) ext = m64ext;
 
 		rex(addr, st0);
@@ -2051,7 +2100,7 @@ class CodeGenerator : public CodeArray {
 	void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32 code1, uint32 code2)
 	{
 		uint32 code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0;
-		if (!code) throw Error(ERR_BAD_ST_COMBINATION);
+		if (!code) XBYAK_THROW(ERR_BAD_ST_COMBINATION)
 		db(uint8(code >> 8));
 		db(uint8(code | (reg1.getIdx() | reg2.getIdx())));
 	}
@@ -2071,10 +2120,10 @@ class CodeGenerator : public CodeArray {
 			bool x = index.isExtIdx();
 			if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
 				int aaa = addr.getOpmaskIdx();
-				if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
+				if (aaa && !(type & T_M_K)) XBYAK_THROW(ERR_INVALID_OPMASK_WITH_MEMORY)
 				bool b = false;
 				if (addr.isBroadcast()) {
-					if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
+					if (!(type & (T_B32 | T_B64))) XBYAK_THROW(ERR_INVALID_BROADCAST)
 					b = true;
 				}
 				int VL = regExp.isVsib() ? index.getBit() : 0;
@@ -2102,7 +2151,7 @@ class CodeGenerator : public CodeArray {
 		const Operand *p2 = &op2;
 		if (!isR_R_RM) std::swap(p1, p2);
 		const unsigned int bit = r.getBit();
-		if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) throw Error(ERR_BAD_COMBINATION);
+		if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION)
 		type |= (bit == 64) ? T_W1 : T_W0;
 		opVex(r, p1, *p2, type, code, imm8);
 	}
@@ -2115,23 +2164,23 @@ class CodeGenerator : public CodeArray {
 			op = &op1;
 		}
 		// (x1, x2, op)
-		if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) throw Error(ERR_BAD_COMBINATION);
+		if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) XBYAK_THROW(ERR_BAD_COMBINATION)
 		opVex(x1, x2, *op, type, code0, imm8);
 	}
 	void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE)
 	{
-		if (!op3.isMEM() && (x2.getKind() != op3.getKind())) throw Error(ERR_BAD_COMBINATION);
+		if (!op3.isMEM() && (x2.getKind() != op3.getKind())) XBYAK_THROW(ERR_BAD_COMBINATION)
 		opVex(k, &x2, op3, type, code0, imm8);
 	}
 	// (x, x/m), (y, x/m256), (z, y/m)
 	void checkCvt1(const Operand& x, const Operand& op) const
 	{
-		if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) throw Error(ERR_BAD_COMBINATION);
+		if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION)
 	}
 	// (x, x/m), (x, y/m256), (y, z/m)
 	void checkCvt2(const Xmm& x, const Operand& op) const
 	{
-		if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) throw Error(ERR_BAD_COMBINATION);
+		if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
 	}
 	void opCvt2(const Xmm& x, const Operand& op, int type, int code)
 	{
@@ -2141,7 +2190,7 @@ class CodeGenerator : public CodeArray {
 	}
 	void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8 code)
 	{
-		if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) throw Error(ERR_BAD_SIZE_OF_REGISTER);
+		if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
 		Xmm x(op.getIdx());
 		const Operand *p = op.isREG() ? &x : &op;
 		opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code);
@@ -2158,16 +2207,16 @@ class CodeGenerator : public CodeArray {
 	// QQQ:need to refactor
 	void opSp1(const Reg& reg, const Operand& op, uint8 pref, uint8 code0, uint8 code1)
 	{
-		if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
+		if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
 		bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
-		if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION);
+		if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
 		if (is16bit) db(0x66);
 		db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
 	}
 	void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int mode)
 	{
 		const RegExp& regExp = addr.getRegExp();
-		if (!regExp.isVsib(128 | 256)) throw Error(ERR_BAD_VSIB_ADDRESSING);
+		if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
 		const int y_vx_y = 0;
 		const int y_vy_y = 1;
 //		const int x_vy_x = 2;
@@ -2181,7 +2230,7 @@ class CodeGenerator : public CodeArray {
 			} else { // x_vy_x
 				isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM();
 			}
-			if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING);
+			if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
 		}
 		opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code);
 	}
@@ -2201,11 +2250,11 @@ class CodeGenerator : public CodeArray {
 		case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return;
 			break;
 		}
-		throw Error(ERR_BAD_VSIB_ADDRESSING);
+		XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
 	}
 	void opGather2(const Xmm& x, const Address& addr, int type, uint8 code, int mode)
 	{
-		if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
+		if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
 		checkGather2(x, addr.getRegExp().getIndex(), mode);
 		opVex(x, 0, addr, type, code);
 	}
@@ -2216,16 +2265,16 @@ class CodeGenerator : public CodeArray {
 	void opVmov(const Operand& op, const Xmm& x, int type, uint8 code, bool mode)
 	{
 		if (mode) {
-			if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM())))  throw Error(ERR_BAD_COMBINATION);
+			if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
 		} else {
-			if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION);
+			if (!op.isMEM() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION)
 		}
 		opVex(x, 0, op, type, code);
 	}
 	void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8 code, Operand::Kind kind)
 	{
-		if (addr.hasZero()) throw Error(ERR_INVALID_ZERO);
-		if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING);
+		if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
+		if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
 		opVex(x, 0, addr, type, code);
 	}
 	void opInOut(const Reg& a, const Reg& d, uint8 code)
@@ -2237,7 +2286,7 @@ class CodeGenerator : public CodeArray {
 			case 32: db(code + 1); return;
 			}
 		}
-		throw Error(ERR_BAD_COMBINATION);
+		XBYAK_THROW(ERR_BAD_COMBINATION)
 	}
 	void opInOut(const Reg& a, uint8 code, uint8 v)
 	{
@@ -2248,8 +2297,17 @@ class CodeGenerator : public CodeArray {
 			case 32: db(code + 1); db(v); return;
 			}
 		}
-		throw Error(ERR_BAD_COMBINATION);
+		XBYAK_THROW(ERR_BAD_COMBINATION)
+	}
+#ifdef XBYAK64
+	void opAMX(const Tmm& t1, const Address& addr, int type, int code0)
+	{
+		// require both base and index
+		const RegExp exp = addr.getRegExp(false);
+		if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED)
+		opVex(t1, &tmm0, addr, type, code0);
 	}
+#endif
 public:
 	unsigned int getVersion() const { return VERSION; }
 	using CodeArray::db;
@@ -2285,6 +2343,7 @@ class CodeGenerator : public CodeArray {
 	const Zmm zmm8, zmm9, zmm10, zmm11, zmm12, zmm13, zmm14, zmm15;
 	const Zmm zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23;
 	const Zmm zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31;
+	const Tmm tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7;
 	const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience
 	const Xmm &xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23;
 	const Xmm &xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31;
@@ -2411,7 +2470,7 @@ class CodeGenerator : public CodeArray {
 				db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3);
 				db(addr->getDisp(), 8);
 			} else {
-				throw Error(ERR_BAD_COMBINATION);
+				XBYAK_THROW(ERR_BAD_COMBINATION)
 			}
 		} else
 #else
@@ -2425,7 +2484,7 @@ class CodeGenerator : public CodeArray {
 			opRM_RM(reg1, reg2, 0x88);
 		}
 	}
-	void mov(const Operand& op, size_t imm)
+	void mov(const Operand& op, uint64 imm)
 	{
 		if (op.isREG()) {
 			const int size = mov_imm(op.getReg(), imm);
@@ -2435,15 +2494,15 @@ class CodeGenerator : public CodeArray {
 			int immSize = op.getBit() / 8;
 			if (immSize <= 4) {
 				sint64 s = sint64(imm) >> (immSize * 8);
-				if (s != 0 && s != -1) throw Error(ERR_IMM_IS_TOO_BIG);
+				if (s != 0 && s != -1) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
 			} else {
-				if (!inner::IsInInt32(imm)) throw Error(ERR_IMM_IS_TOO_BIG);
+				if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
 				immSize = 4;
 			}
 			opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize);
 			db(static_cast<uint32>(imm), immSize);
 		} else {
-			throw Error(ERR_BAD_COMBINATION);
+			XBYAK_THROW(ERR_BAD_COMBINATION)
 		}
 	}
 	void mov(const NativeReg& reg, const char *label) // can't use std::string
@@ -2466,7 +2525,7 @@ class CodeGenerator : public CodeArray {
 		if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) {
 			p1 = &op2; p2 = &op1;
 		}
-		if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION);
+		if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
 		if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0)
 #ifdef XBYAK64
 			&& (p2->getIdx() != 0 || !p1->isREG(32))
@@ -2496,7 +2555,7 @@ class CodeGenerator : public CodeArray {
 	{
 		switch (seg.getIdx()) {
 		case Segment::es: db(0x07); break;
-		case Segment::cs: throw Error(ERR_BAD_COMBINATION);
+		case Segment::cs: XBYAK_THROW(ERR_BAD_COMBINATION)
 		case Segment::ss: db(0x17); break;
 		case Segment::ds: db(0x1F); break;
 		case Segment::fs: db(0x0F); db(0xA1); break;
@@ -2566,6 +2625,7 @@ class CodeGenerator : public CodeArray {
 		, zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15)
 		, zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23)
 		, zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31)
+		, tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7)
 		// for my convenience
 		, xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15)
 		, xm16(xmm16), xm17(xmm17), xm18(xmm18), xm19(xmm19), xm20(xmm20), xm21(xmm21), xm22(xmm22), xm23(xmm23)
@@ -2598,7 +2658,7 @@ class CodeGenerator : public CodeArray {
 	*/
 	void ready(ProtectMode mode = PROTECT_RWE)
 	{
-		if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND);
+		if (hasUndefinedLabel()) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND)
 		if (isAutoGrow()) {
 			calcJmpAddress();
 			if (useProtect()) setProtectMode(mode);
@@ -2663,7 +2723,7 @@ class CodeGenerator : public CodeArray {
 	void align(size_t x = 16, bool useMultiByteNop = true)
 	{
 		if (x == 1) return;
-		if (x < 1 || (x & (x - 1))) throw Error(ERR_BAD_ALIGN);
+		if (x < 1 || (x & (x - 1))) XBYAK_THROW(ERR_BAD_ALIGN)
 		if (isAutoGrow() && x > inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", (int)x);
 		size_t remain = size_t(getCurr()) % x;
 		if (remain) {
@@ -2702,6 +2762,7 @@ static const Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29
 static const Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15);
 static const Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23);
 static const Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31);
+static const Tmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7);
 static const RegRip rip;
 #endif
 #ifndef XBYAK_DISABLE_SEGMENT
diff --git a/src/xbyak/xbyak_mnemonic.h b/src/xbyak/xbyak_mnemonic.h
index 2de6ec23..393a8dcc 100644
--- a/src/xbyak/xbyak_mnemonic.h
+++ b/src/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "5.912"; }
+const char *getVersionString() const { return "5.941"; }
 void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -266,7 +266,7 @@ void fnsave(const Address& addr) { opModM(addr, Reg32(6), 0xDD, 0x100); }
 void fnstcw(const Address& addr) { opModM(addr, Reg32(7), 0xD9, 0x100); }
 void fnstenv(const Address& addr) { opModM(addr, Reg32(6), 0xD9, 0x100); }
 void fnstsw(const Address& addr) { opModM(addr, Reg32(7), 0xDD, 0x100); }
-void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0xDF); db(0xE0); }
+void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); }
 void fpatan() { db(0xD9); db(0xF3); }
 void fprem() { db(0xD9); db(0xF8); }
 void fprem1() { db(0xD9); db(0xF5); }
@@ -285,7 +285,7 @@ void fstenv(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xD9, 0x100)
 void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
 void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
 void fstsw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xDD, 0x100); }
-void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0x9B); db(0xDF); db(0xE0); }
+void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); }
 void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
 void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); }
 void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
@@ -454,7 +454,7 @@ void jz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0
 void lahf() { db(0x9F); }
 void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }
 void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
-void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }
+void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }
 void leave() { db(0xC9); }
 void lfence() { db(0x0F); db(0xAE); db(0xE8); }
 void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); }
@@ -475,7 +475,7 @@ void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
 void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); }
 void lzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); }
 void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66);  opModR(reg1, reg2, 0x0F, 0xF7); }
-void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }
+void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); }
 void maxpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x66, isXMM_XMMorMEM); }
 void maxps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x100, isXMM_XMMorMEM); }
 void maxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF2, isXMM_XMMorMEM); }
@@ -516,7 +516,7 @@ void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm,
 void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); }
 void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); }
 void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); }
-void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) throw Error(ERR_BAD_COMBINATION); opModM(addr, mmx, 0x0F, 0xE7); }
+void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModM(addr, mmx, 0x0F, 0xE7); }
 void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); }
 void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); }
 void movq2dq(const Xmm& xmm, const Mmx& mmx) { db(0xF3); opModR(xmm, mmx, 0x0F, 0xD6); }
@@ -609,7 +609,7 @@ void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, NON
 void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, NONE, 0x38); }
 void pinsrb(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }
 void pinsrd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }
-void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }
+void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }
 void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, NONE, 0x38); }
 void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); }
 void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
@@ -707,8 +707,8 @@ void rcr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 3); }
 void rcr(const Operand& op, int imm) { opShift(op, imm, 3); }
 void rdmsr() { db(0x0F); db(0x32); }
 void rdpmc() { db(0x0F); db(0x33); }
-void rdrand(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }
-void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }
+void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }
+void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }
 void rdtsc() { db(0x0F); db(0x31); }
 void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
 void rep() { db(0xF3); }
@@ -839,8 +839,8 @@ void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) {
 void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4A, x4.getIdx() << 4); }
 void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }
 void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); }
-void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }
-void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x18); }
+void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }
+void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x18); }
 void vcmpeq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 16); }
 void vcmpeq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 16); }
 void vcmpeq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 16); }
@@ -999,9 +999,9 @@ void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
 void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5E); }
 void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); }
 void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); }
-void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }
-void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }
-void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }
+void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }
+void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }
+void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }
 void vfmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x98); }
 void vfmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x98); }
 void vfmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x99); }
@@ -1073,8 +1073,8 @@ void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
 void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7C); }
 void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7D); }
 void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
-void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
-void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
+void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
+void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
 void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
 void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
 void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
@@ -1095,25 +1095,25 @@ void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_
 void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); }
 void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x29); }
 void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); }
-void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }
-void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }
+void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }
+void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }
 void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP | T_F2 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z, 0x12); }
 void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM, 0x7F); }
 void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x6F); }
 void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM, 0x7F); }
 void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x6F); }
-void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }
+void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }
 void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x17); }
-void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x16); }
+void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x16); }
 void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x17); }
-void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x16); }
-void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }
+void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x16); }
+void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }
 void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x13); }
-void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x12); }
+void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x12); }
 void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x13); }
-void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x12); }
-void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }
-void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }
+void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x12); }
+void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }
+void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }
 void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }
 void vmovntdqa(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); }
 void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }
@@ -1123,12 +1123,12 @@ void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() <
 void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }
 void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_M_K, 0x11); }
 void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); }
-void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); }
+void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); }
 void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x16); }
 void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x12); }
 void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_M_K, 0x11); }
 void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
-void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
+void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
 void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x11); }
 void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); }
 void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x11); }
@@ -1163,10 +1163,10 @@ void vpavgw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1,
 void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x02, imm); }
 void vpblendvb(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4C, x4.getIdx() << 4); }
 void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0E, imm); }
-void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x78); }
-void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); }
-void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); }
-void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); }
+void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x78); }
+void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); }
+void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); }
+void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); }
 void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); }
 void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x74); }
 void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x76); }
@@ -1180,8 +1180,8 @@ void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
 void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); }
 void vpcmpistri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); }
 void vpcmpistrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); }
-void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
-void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
+void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
+void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
 void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
 void vpermilpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x0D); }
 void vpermilpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_EVEX | T_B64, 0x05, imm); }
@@ -1192,10 +1192,10 @@ void vpermpd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1,
 void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x16); }
 void vpermq(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x00, imm); }
 void vpermq(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x36); }
-void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }
-void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
-void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
-void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
+void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }
+void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
+void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
+void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
 void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); }
 void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); }
 void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); }
@@ -1207,10 +1207,10 @@ void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66
 void vphsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x06); }
 void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x07); }
 void vphsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x05); }
-void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }
-void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
-void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
-void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
+void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }
+void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
+void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
+void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
 void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); }
 void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); }
 void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); }
@@ -1229,7 +1229,7 @@ void vpminsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1,
 void vpminub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDA); }
 void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3B); }
 void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3A); }
-void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }
+void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }
 void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x21); }
 void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N2 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x22); }
 void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x20); }
@@ -1618,15 +1618,27 @@ void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
 void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); }
 void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
 void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
-void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }
-void pextrq(const Operand& op, const Xmm& xmm, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }
-void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }
+void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }
+void pextrq(const Operand& op, const Xmm& xmm, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }
+void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }
 void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }
 void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }
 void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); }
 void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }
 void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
 void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }
+void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
+void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
+void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
+void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }
+void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }
+void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }
+void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }
+void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }
+void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
+void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
+void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }
+void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
 #else
 void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
 void jcxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
@@ -1671,17 +1683,17 @@ void kandnw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r
 void kandq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x41); }
 void kandw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x41); }
 void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }
-void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }
+void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }
 void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }
 void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }
 void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }
-void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }
+void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }
 void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }
 void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }
 void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }
-void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }
+void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }
 void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }
-void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) throw Error(ERR_BAD_COMBINATION); opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }
+void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }
 void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }
 void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }
 void knotb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x44); }
@@ -1777,22 +1789,22 @@ void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0
 void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
 void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); }
 void vexpandps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x88); }
-void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); }
-void vextractf32x8(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
-void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); }
-void vextractf64x4(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
-void vextracti32x4(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x39, imm); }
-void vextracti32x8(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
-void vextracti64x2(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); }
-void vextracti64x4(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
+void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); }
+void vextractf32x8(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
+void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); }
+void vextractf64x4(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
+void vextracti32x4(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x39, imm); }
+void vextracti32x8(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
+void vextracti64x2(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); }
+void vextracti64x4(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
 void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); }
 void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
 void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
 void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
-void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
-void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
-void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
-void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
+void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
+void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
+void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
+void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
 void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
 void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
 void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
@@ -1813,14 +1825,14 @@ void vgetmantpd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x,
 void vgetmantps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); }
 void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
 void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
-void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); }
-void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
-void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x18, imm); }
-void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
-void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x38, imm); }
-void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
-void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
-void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
+void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); }
+void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
+void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x18, imm); }
+void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
+void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x38, imm); }
+void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
+void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
+void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
 void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
 void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
 void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
@@ -1833,8 +1845,8 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3
 void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
 void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
 void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
-void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
-void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
+void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
+void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
 void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
 void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }
 void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }
diff --git a/src/xbyak/xbyak_util.h b/src/xbyak/xbyak_util.h
index 4f79d8f9..5caed058 100644
--- a/src/xbyak/xbyak_util.h
+++ b/src/xbyak/xbyak_util.h
@@ -220,23 +220,23 @@ class Cpu {
 	int displayModel; // model + extModel
 
 	unsigned int getNumCores(IntelCpuTopologyLevel level) {
-		if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
+		if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
 		switch (level) {
 		case SmtLevel: return numCores_[level - 1];
 		case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
-		default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
+		default: XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
 		}
 	}
 
 	unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
 	unsigned int getCoresSharingDataCache(unsigned int i) const
 	{
-		if (i >= dataCacheLevels_) throw  Error(ERR_BAD_PARAMETER);
+		if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
 		return coresSharignDataCache_[i];
 	}
 	unsigned int getDataCacheSize(unsigned int i) const
 	{
-		if (i >= dataCacheLevels_) throw  Error(ERR_BAD_PARAMETER);
+		if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
 		return dataCacheSize_[i];
 	}
 
@@ -353,6 +353,9 @@ class Cpu {
 	static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56;
 	static const Type tAVX512_BF16 = uint64(1) << 57;
 	static const Type tAVX512_VP2INTERSECT = uint64(1) << 58;
+	static const Type tAMX_TILE = uint64(1) << 59;
+	static const Type tAMX_INT8 = uint64(1) << 60;
+	static const Type tAMX_BF16 = uint64(1) << 61;
 
 	Cpu()
 		: type_(NONE)
@@ -456,6 +459,9 @@ class Cpu {
 			if (EBX & (1U << 14)) type_ |= tMPX;
 			if (EBX & (1U << 29)) type_ |= tSHA;
 			if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
+			if (EDX & (1U << 24)) type_ |= tAMX_TILE;
+			if (EDX & (1U << 25)) type_ |= tAMX_INT8;
+			if (EDX & (1U << 22)) type_ |= tAMX_BF16;
 		}
 		setFamily();
 		setNumCores();
@@ -558,7 +564,7 @@ class Pack {
 	{
 		if (n_ == maxTblNum) {
 			fprintf(stderr, "ERR Pack::can't append\n");
-			throw Error(ERR_BAD_PARAMETER);
+			XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this)
 		}
 		tbl_[n_++] = &t;
 		return *this;
@@ -567,7 +573,7 @@ class Pack {
 	{
 		if (n > maxTblNum) {
 			fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n);
-			throw Error(ERR_BAD_PARAMETER);
+			XBYAK_THROW(ERR_BAD_PARAMETER)
 		}
 		n_ = n;
 		for (size_t i = 0; i < n; i++) {
@@ -578,7 +584,7 @@ class Pack {
 	{
 		if (n >= n_) {
 			fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
-			throw Error(ERR_BAD_PARAMETER);
+			XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax)
 		}
 		return *tbl_[n];
 	}
@@ -591,7 +597,7 @@ class Pack {
 		if (num == size_t(-1)) num = n_ - pos;
 		if (pos + num > n_) {
 			fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num);
-			throw Error(ERR_BAD_PARAMETER);
+			XBYAK_THROW_RET(ERR_BAD_PARAMETER, Pack())
 		}
 		Pack pack;
 		pack.n_ = num;
@@ -666,9 +672,9 @@ class StackFrame {
 		, t(t_)
 	{
 		using namespace Xbyak;
-		if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
+		if (pNum < 0 || pNum > 4) XBYAK_THROW(ERR_BAD_PNUM)
 		const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
-		if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
+		if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM)
 		const Reg64& _rsp = code->rsp;
 		saveNum_ = (std::max)(0, allRegNum - noSaveNum);
 		const int *tbl = getOrderTbl() + noSaveNum;

From a145a2144ab03c8b7e585d784ef851da8515002f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 4 Aug 2020 14:43:11 +0900
Subject: [PATCH 271/553] nothrow version(TBD)

---
 Makefile             |  2 +-
 src/fp_generator.hpp | 12 +++++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/Makefile b/Makefile
index eef4ccc9..346be6ab 100644
--- a/Makefile
+++ b/Makefile
@@ -372,7 +372,7 @@ ecdsa-wasm:
 bin/emu:
 	$(CXX) -g -o $@ src/fp.cpp src/bn_c256.cpp test/bn_c256_test.cpp -DMCL_DONT_USE_XBYAK -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_64BIT_PORTABLE -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=256 -I./include
 bin/pairing_c_min.exe: sample/pairing_c.c include/mcl/vint.hpp src/fp.cpp include/mcl/bn.hpp
-	$(CXX) -std=c++03 -O3 -g -fno-threadsafe-statics -fno-exceptions -fno-rtti -o $@ sample/pairing_c.c src/fp.cpp src/bn_c384_256.cpp -I./include -DMCL_DONT_USE_XBYAK -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=384 -DMCL_VINT_64BIT_PORTABLE -DCYBOZU_DONT_USE_STRING -DCYBOZU_DONT_USE_EXCEPTION -DNDEBUG # -DMCL_DONT_USE_CSPRNG
+	$(CXX) -std=c++03 -O3 -g -fno-threadsafe-statics -fno-exceptions -fno-rtti -o $@ sample/pairing_c.c src/fp.cpp src/bn_c384_256.cpp -I./include -DXBYAK_NO_EXCEPTION -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=384 -DMCL_VINT_64BIT_PORTABLE -DCYBOZU_DONT_USE_STRING -DCYBOZU_DONT_USE_EXCEPTION -DNDEBUG # -DMCL_DONT_USE_CSPRNG
 
 make_tbl:
 	$(MAKE) ../bls/src/qcoeff-bn254.hpp
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 2576cab9..97ce9ae2 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -78,7 +78,7 @@ struct MixPack {
 	}
 	void removeLast()
 	{
-		if (!size()) throw cybozu::Exception("MixPack:removeLast:empty");
+		assert(size());
 		if (mn > 0) {
 			mn--;
 		} else {
@@ -248,6 +248,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		reset(); // reset jit code for reuse
 		setProtectModeRW(); // read/write memory
 		init_inner(op);
+		// ToDo : recover op if false
+		if (Xbyak::GetError()) return false;
 //		printf("code size=%d\n", (int)getSize());
 		setProtectModeRE(); // set read/exec memory
 		return true;
@@ -1822,9 +1824,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void sqr2(const Reg64& y3, const Reg64& y2, const Reg64& y1, const Reg64& y0, const Reg64& x1, const Reg64& x0, const Reg64& t1, const Reg64& t0)
 	{
-		if (!useMulx_) {
-			throw cybozu::Exception("sqr2:not support mulx");
-		}
+		assert(useMulx_);
 		mov(rdx, x0);
 		mulx(y1, y0, x0); // x0^2
 		mov(rdx, x1);
@@ -1843,9 +1843,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void mul2x2(const RegExp& px, const RegExp& py, const Reg64& t4, const Reg64& t3, const Reg64& t2, const Reg64& t1, const Reg64& t0)
 	{
-		if (!useMulx_) {
-			throw cybozu::Exception("mul2x2:not support mulx");
-		}
+		assert(useMulx_);
 #if 0
 		// # of add is less, but a little slower
 		mov(t4, ptr [py + 8 * 0]);

From c6c6e49eb894fa3c46271b0c7fa03d6c590f284c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 19 Aug 2020 10:10:58 +0900
Subject: [PATCH 272/553] remove unused old functions

---
 include/mcl/fp.hpp         |  4 ----
 include/mcl/mapto_wb19.hpp | 27 ---------------------------
 src/fp.cpp                 | 31 -------------------------------
 3 files changed, 62 deletions(-)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index a0af7477..6c5b0b05 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -73,10 +73,6 @@ bool isEnableJIT(); // 1st call is not threadsafe
 uint32_t sha256(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize);
 uint32_t sha512(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize);
 
-void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize);
-void hkdf_extract(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize);
-void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6]);
-
 // draft-07 outSize = 128 or 256
 void expand_message_xmd(uint8_t out[], size_t outSize, const void *msg, size_t msgSize, const void *dst, size_t dstSize);
 
diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index a212811e..814baaa5 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -9,33 +9,6 @@
 */
 namespace mcl {
 
-// ctr = 0 or 1 or 2
-template<class Fp2>
-inline void hashToFp2old(Fp2& out, const void *msg, size_t msgSize, uint8_t ctr, const void *dst, size_t dstSize)
-{
-	const bool addZeroByte = true; // append zero byte to msg
-	assert(ctr <= 2);
-	const size_t degree = 2;
-	uint8_t msg_prime[32];
-	// add '\0' at the end of dst
-	// see. 5.3. Implementation of https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve
-	if (addZeroByte) {
-		fp::hkdf_extract_addZeroByte(msg_prime, reinterpret_cast<const uint8_t*>(dst), dstSize, reinterpret_cast<const uint8_t*>(msg), msgSize);
-	} else {
-		fp::hkdf_extract(msg_prime, reinterpret_cast<const uint8_t*>(dst), dstSize, reinterpret_cast<const uint8_t*>(msg), msgSize);
-	}
-	char info_pfx[] = "H2C000";
-	info_pfx[3] = ctr;
-	for (size_t i = 0; i < degree; i++) {
-		info_pfx[4] = char(i + 1);
-		uint8_t t[64];
-		fp::hkdf_expand(t, msg_prime, info_pfx);
-		bool b;
-		out.getFp0()[i].setBigEndianMod(&b, t, 64);
-		assert(b); (void)b;
-	}
-}
-
 namespace local {
 
 // y^2 = x^3 + 4(1 + i)
diff --git a/src/fp.cpp b/src/fp.cpp
index ab09ff1b..2b201081 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -128,37 +128,6 @@ uint32_t sha512(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSiz
 	return (uint32_t)cybozu::Sha512().digest(out, maxOutSize, msg, msgSize);
 }
 
-void hkdf_extract_addZeroByte(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize)
-{
-	uint8_t saltZero[32];
-	if (salt == 0 || saltSize == 0) {
-		memset(saltZero, 0, sizeof(saltZero));
-		salt = saltZero;
-		saltSize = sizeof(saltZero);
-	}
-	cybozu::hmac256addZeroByte(hmac, salt, saltSize, msg, msgSize);
-}
-
-void hkdf_extract(uint8_t hmac[32], const uint8_t *salt, size_t saltSize, const uint8_t *msg, size_t msgSize)
-{
-	uint8_t saltZero[32];
-	if (salt == 0 || saltSize == 0) {
-		memset(saltZero, 0, sizeof(saltZero));
-		salt = saltZero;
-		saltSize = sizeof(saltZero);
-	}
-	cybozu::hmac256(hmac, salt, saltSize, msg, msgSize);
-}
-
-void hkdf_expand(uint8_t out[64], const uint8_t prk[32], char info[6])
-{
-	info[5] = 1;
-	cybozu::hmac256(out, prk, 32, info, 6);
-	info[5] = 2;
-	memcpy(out + 32, info, 6);
-	cybozu::hmac256(out + 32, prk, 32, out, 32 + 6);
-}
-
 void expand_message_xmd(uint8_t out[], size_t outSize, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
 {
 	assert(outSize == 128 || outSize == 256);

From 90b4dc254ee25d9b0298828e5333b9ebd239f533 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 19 Aug 2020 10:20:24 +0900
Subject: [PATCH 273/553] init is not necessary

---
 src/fp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index 2b201081..edf47d21 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -139,7 +139,7 @@ void expand_message_xmd(uint8_t out[], size_t outSize, const void *msg, size_t m
 	/*
 		Z_apd | msg | BE(outSize, 2) | BE(0, 1) | DST | BE(dstSize, 1)
 	*/
-	uint8_t lenBuf[2] = { 1, 0 }; // 256 = outSize
+	uint8_t lenBuf[2];
 	uint8_t iBuf = 0;
 	uint8_t dstSizeBuf = uint8_t(dstSize);
 	cybozu::Set16bitAsBE(lenBuf, uint16_t(outSize));

From b575c9c18821704e1fa84f7517bff3cd578340ba Mon Sep 17 00:00:00 2001
From: Alexander van der Meij <github@vandermeij.tech>
Date: Tue, 25 Aug 2020 11:16:09 +0200
Subject: [PATCH 274/553] allow building on openbsd using clang

---
 common.mk | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/common.mk b/common.mk
index 193e3934..5e3d6307 100644
--- a/common.mk
+++ b/common.mk
@@ -23,6 +23,13 @@ ifeq ($(UNAME_S),Darwin)
 else
   LIB_SUF=so
 endif
+ifeq ($(UNAME_S),OpenBSD)
+  OS=openbsd
+  CXX=clang++
+  CFLAGS+=-I/usr/local/include
+  LDFLAGS+=-L/usr/local/lib
+endif
+
 ARCH?=$(shell uname -m)
 ifneq ($(findstring $(ARCH),x86_64/amd64),)
   CPU=x86-64
@@ -51,7 +58,7 @@ ifeq ($(ARCH),aarch64)
   CPU=aarch64
   BIT=64
 endif
-ifeq ($(findstring $(OS),mac/mingw64),)
+ifeq ($(findstring $(OS),mac/mingw64/openbsd),)
   LDFLAGS+=-lrt
 endif
 

From 50170e0d570521ee0c424d27127a24a4c8f77c39 Mon Sep 17 00:00:00 2001
From: Mohamed Amin JABRI <mohamed-amin.jabri@bitflyer.com>
Date: Thu, 27 Aug 2020 17:16:22 +0900
Subject: [PATCH 275/553] Fix BOM at begining of file.

---
 ffi/cs/mcl/mcl.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 035b225b..8a7b6fea 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -1,4 +1,4 @@
-����using System;
+﻿using System;
 using System.Text;
 using System.Runtime.InteropServices;
 

From 0f6f5a148b9d8db4b879f4d809ee94208ac75b53 Mon Sep 17 00:00:00 2001
From: Mohamed Amin JABRI <mohamed-amin.jabri@bitflyer.com>
Date: Thu, 27 Aug 2020 17:17:53 +0900
Subject: [PATCH 276/553] Make DllImport for mclBn256 cross-plateform (remove
 .dll extension)

---
 ffi/cs/bn256.cs | 115 ++++++++++++++++++++++++------------------------
 1 file changed, 58 insertions(+), 57 deletions(-)

diff --git a/ffi/cs/bn256.cs b/ffi/cs/bn256.cs
index 22169d1b..afa34919 100644
--- a/ffi/cs/bn256.cs
+++ b/ffi/cs/bn256.cs
@@ -4,125 +4,126 @@
 
 namespace mcl {
 	public class BN256 {
-		[DllImport("mclBn256.dll")]
+		public const string dllName = "mclBn256";
+		[DllImport(dllName)]
 		public static extern int mclBn_init(int curve, int compiledTimeVar);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnFr_clear(ref Fr x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnFr_setInt(ref Fr y, int x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnFr_setStr(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnFr_isValid(ref Fr x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnFr_isEqual(ref Fr x, ref Fr y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnFr_isZero(ref Fr x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnFr_isOne(ref Fr x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnFr_setByCSPRNG(ref Fr x);
 
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnFr_setHashOf(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnFr_getStr([Out]StringBuilder buf, long maxBufSize, ref Fr x, int ioMode);
 
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnFr_neg(ref Fr y, ref Fr x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnFr_inv(ref Fr y, ref Fr x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnFr_add(ref Fr z, ref Fr x, ref Fr y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnFr_sub(ref Fr z, ref Fr x, ref Fr y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnFr_mul(ref Fr z, ref Fr x, ref Fr y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnFr_div(ref Fr z, ref Fr x, ref Fr y);
 
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG1_clear(ref G1 x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnG1_setStr(ref G1 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnG1_isValid(ref G1 x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnG1_isEqual(ref G1 x, ref G1 y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnG1_isZero(ref G1 x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnG1_hashAndMapTo(ref G1 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern long mclBnG1_getStr([Out]StringBuilder buf, long maxBufSize, ref G1 x, int ioMode);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG1_neg(ref G1 y, ref G1 x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG1_dbl(ref G1 y, ref G1 x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG1_add(ref G1 z, ref G1 x, ref G1 y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG1_sub(ref G1 z, ref G1 x, ref G1 y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG1_mul(ref G1 z, ref G1 x, ref Fr y);
 
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG2_clear(ref G2 x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnG2_setStr(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnG2_isValid(ref G2 x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnG2_isEqual(ref G2 x, ref G2 y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnG2_isZero(ref G2 x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnG2_hashAndMapTo(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern long mclBnG2_getStr([Out]StringBuilder buf, long maxBufSize, ref G2 x, int ioMode);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG2_neg(ref G2 y, ref G2 x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG2_dbl(ref G2 y, ref G2 x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG2_add(ref G2 z, ref G2 x, ref G2 y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG2_sub(ref G2 z, ref G2 x, ref G2 y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnG2_mul(ref G2 z, ref G2 x, ref Fr y);
 
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnGT_clear(ref GT x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnGT_setStr(ref GT x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnGT_isEqual(ref GT x, ref GT y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnGT_isZero(ref GT x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern int mclBnGT_isOne(ref GT x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern long mclBnGT_getStr([Out]StringBuilder buf, long maxBufSize, ref GT x, int ioMode);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnGT_neg(ref GT y, ref GT x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnGT_inv(ref GT y, ref GT x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnGT_add(ref GT z, ref GT x, ref GT y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnGT_sub(ref GT z, ref GT x, ref GT y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnGT_mul(ref GT z, ref GT x, ref GT y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnGT_div(ref GT z, ref GT x, ref GT y);
 
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBnGT_pow(ref GT z, ref GT x, ref Fr y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBn_pairing(ref GT z, ref G1 x, ref G2 y);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBn_finalExp(ref GT y, ref GT x);
-		[DllImport("mclBn256.dll")]
+		[DllImport(dllName)]
 		public static extern void mclBn_millerLoop(ref GT z, ref G1 x, ref G2 y);
 
 		public static void init()

From 852b2a53e1f1b3657f70d3d31c8336c2d117c5fb Mon Sep 17 00:00:00 2001
From: Mohamed Amin JABRI <mohamed-amin.jabri@bitflyer.com>
Date: Thu, 27 Aug 2020 17:20:16 +0900
Subject: [PATCH 277/553] Improve cmake build.

Use cmake in target-based approach and provide Export for cmake targets.
Also Add a build shell script for linux, macOS and windows (using
gitbash).
---
 .gitignore            |   3 +
 CMakeLists.txt        | 431 ++++++++++++++++++++++++++++++------------
 build.sh              |  67 +++++++
 cmake/FindGMP.cmake   |  88 +++++++++
 sample/CMakeLists.txt |  17 ++
 test/CMakeLists.txt   |  62 ++++++
 6 files changed, 543 insertions(+), 125 deletions(-)
 create mode 100755 build.sh
 create mode 100644 cmake/FindGMP.cmake
 create mode 100644 sample/CMakeLists.txt
 create mode 100644 test/CMakeLists.txt

diff --git a/.gitignore b/.gitignore
index f5edb370..547e7a02 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,6 @@ GTAGS
 *.exe
 *.swp
 .cvsignore
+build/
+external/
+Testing/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d56f24c3..45fd3105 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,219 +1,400 @@
-cmake_minimum_required (VERSION 3.0)
-project(mcl CXX ASM)
-set(SRCS src/fp.cpp)
+cmake_minimum_required (VERSION 3.8)
+project(mcl CXX C ASM)
 
 option(
 	MCL_MAX_BIT_SIZE
 	"max bit size for Fp"
 	0
 )
+if(MSVC)
+	option(
+		MCL_DOWNLOAD_SOURCE
+		"download cybozulib_ext"
+		OFF
+		)
+endif()
 option(
-	DOWNLOAD_SOURCE
-	"download cybozulib_ext"
-	OFF
-)
-option(
-	USE_OPENSSL
+	MCL_USE_OPENSSL
 	"use openssl"
 	OFF
 )
 option(
-	USE_GMP
+	MCL_USE_GMP
 	"use gmp"
 	ON
 )
 option(
-	USE_ASM
+	MCL_USE_ASM
 	"use asm"
 	ON
 )
 option(
-	USE_XBYAK
+	MCL_USE_XBYAK
 	"use xbyak"
 	ON
 )
 option(
-	USE_LLVM
+	MCL_USE_LLVM
 	"use base64.ll with -DCMAKE_CXX_COMPILER=clang++"
 	OFF
 )
 option(
-	ONLY_LIB
-	"only lib"
+	MCL_BUILD_SAMPLE
+	"Build mcl samples"
 	OFF
 )
 option(
-    MSVC_RUNTIME_DLL
-    "use dynamic runtime /MD in msvc builds"
+	MCL_BUILD_TESTING
+	"Build mcl tests"
 	OFF
 )
+if(MSVC)
+	option(
+		MCL_MSVC_RUNTIME_DLL
+		"use dynamic runtime /MD in msvc builds"
+		OFF
+	)
+endif()
+
+
+if(MSVC)
+	set(MCL_CYBOZULIB_EXT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../cybozulib_ext"
+		CACHE PATH "external cybozulib_ext directory")
+endif()
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 
-if(USE_LLVM)
+if(MCL_USE_LLVM AND NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+	message(WARNING "MCL_USE_LLVM will not be used: requiring clang/clang++.")
+endif()
+
+add_library(mcl SHARED src/fp.cpp)
+add_library(mcl::mcl ALIAS mcl)
+target_compile_definitions(mcl PUBLIC MCL_NO_AUTOLINK MCLBN_NO_AUTOLINK)
+target_include_directories(mcl PUBLIC
+	$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+	$<INSTALL_INTERFACE:$CMAKE_INSTALL_DIR/include>)
+set_target_properties(mcl PROPERTIES
+	POSITION_INDEPENDENT_CODE ON)
+#set_target_properties(mcl PROPERTIES OUTPUT_NAME mcl VERSION 1.0.0 SOVERSION 1)
+# For semantics of ABI compatibility including when you must bump SOVERSION, see:
+# https://community.kde.org/Policies/Binary_Compatibility_Issues_With_C%2B%2B#The_Do.27s_and_Don.27ts
+
+add_library(mcl_st STATIC src/fp.cpp)
+add_library(mcl::mcl_st ALIAS mcl_st)
+target_compile_definitions(mcl_st PUBLIC MCL_NO_AUTOLINK MCLBN_NO_AUTOLINK)
+target_include_directories(mcl_st PUBLIC
+	$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+	$<INSTALL_INTERFACE:$CMAKE_INSTALL_DIR/include>)
+set_target_properties(mcl_st PROPERTIES
+	OUTPUT_NAME mcl
+	POSITION_INDEPENDENT_CODE ON)
+#set_target_properties(mcl_st PROPERTIES PREFIX "lib")
+
+if(MCL_USE_LLVM AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 	add_executable(gen src/gen.cpp)
-	add_custom_target(base64.ll
-		DEPENDS gen
-		SOURCES base64.ll
-	)
+	target_include_directories(gen PRIVATE
+		${CMAKE_CURRENT_SOURCE_DIR}/include)
+	if(MCL_USE_GMP)
+		find_package(GMP REQUIRED)
+		target_link_libraries(gen PRIVATE GMP::GMPXX GMP::GMP)
+	endif()
+
 	add_custom_command(OUTPUT base64.ll
 		COMMAND gen > base64.ll
-	)
-	add_custom_target(base64.o
-		DEPENDS base64.ll
-		SOURCES base64.o
-	)
+		DEPENDS gen
+		WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+	add_custom_target(gen_base64.ll
+		SOURCES ${CMAKE_CURRENT_BINARY_DIR}/base64.ll)
+
 	add_custom_command(OUTPUT base64.o
 		COMMAND ${CMAKE_CXX_COMPILER} -c -o base64.o base64.ll -O3 -fPIC
-	)
+		DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/base64.ll
+		WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+	add_custom_target(gen_base64.o
+		SOURCES ${CMAKE_CURRENT_BINARY_DIR}/base64.o)
+
+	target_link_libraries(mcl PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/base64.o)
+	add_dependencies(mcl gen_base64.o)
+	target_link_libraries(mcl_st PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/base64.o)
+	add_dependencies(mcl_st gen_base64.o)
 endif()
 
 if(MSVC)
-    if(MSVC_RUNTIME_DLL)
-        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MD /W4 /Oy /Ox /EHsc /GS- /Zi /DNDEBUG /DNOMINMAX")
-        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MDd /W4 /DNOMINMAX")
-    else()
-    	set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /W4 /Oy /Ox /EHsc /GS- /Zi /DNDEBUG /DNOMINMAX")
-	    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /W4 /DNOMINMAX")
-    endif()
-	link_directories(${CMAKE_SOURCE_DIR}/../cybozulib_ext/lib)
-	link_directories(${CMAKE_SOURCE_DIR}/lib)
-else()
-	if("${CFLAGS_OPT_USER}" STREQUAL "")
-		set(CFLAGS_OPT_USER "-O3 -DNDEBUG -march=native")
+	if(MCL_MSVC_RUNTIME_DLL)
+		set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MD /Oy /Ox /EHsc /GS- /Zi /DNDEBUG")
+		set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MDd")
+	else()
+		set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /Oy /Ox /EHsc /GS- /Zi /DNDEBUG")
+		set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd")
 	endif()
-	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith ${CFLAGS_OPT_USER}")
+	target_compile_definitions(mcl PUBLIC NOMINMAX)
+	target_compile_definitions(mcl_st PUBLIC NOMINMAX)
+	# set compiler flags for warnings level
+	set(MCL_COMPILE_OPTIONS /W4)
+	target_compile_options(mcl PRIVATE ${MCL_COMPILE_OPTIONS})
+	target_compile_options(mcl_st PRIVATE ${MCL_COMPILE_OPTIONS})
+else()
+	# Set compiler flags for warnings
+	set(MCL_COMPILE_OPTIONS -Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align
+		-Wwrite-strings -Wfloat-equal -Wpointer-arith -march=native)
+
+	target_compile_options(mcl PRIVATE ${MCL_COMPILE_OPTIONS})
+	target_compile_options(mcl_st PRIVATE ${MCL_COMPILE_OPTIONS})
+	set_target_properties(mcl PROPERTIES
+		CXX_STANDARD 11
+		CXX_STANDARD_REQUIRED YES
+		CXX_EXTENSIONS NO)
+	set_target_properties(mcl_st PROPERTIES
+		CXX_STANDARD 11
+		CXX_STANDARD_REQUIRED YES
+		CXX_EXTENSIONS NO)
+	target_compile_features(mcl PUBLIC cxx_std_11)
+	target_compile_features(mcl_st PUBLIC cxx_std_11)
 
 	if(${MCL_MAX_BIT_SIZE} GREATER 0)
-		add_definitions(-DMCL_MAX_BIT_SIZE=${MCL_MAX_BIT_SIZE})
+		target_compile_definitions(mcl PUBLIC MCL_MAX_BIT_SIZE=${MCL_MAX_BIT_SIZE})
+		target_compile_definitions(mcl_st PUBLIC MCL_MAX_BIT_SIZE=${MCL_MAX_BIT_SIZE})
 	endif()
 
-	if(USE_LLVM)
-		add_definitions(-DMCL_USE_LLVM=1 -DMCL_LLVM_BMI2=0)
-	elseif(USE_ASM)
+	if(MCL_USE_LLVM AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+		target_compile_definitions(mcl PUBLIC MCL_USE_LLVM=1 MCL_LLVM_BMI2=0)
+		target_compile_definitions(mcl_st PUBLIC MCL_USE_LLVM=1 MCL_LLVM_BMI2=0)
+	elseif(MCL_USE_ASM)
 		if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
-			add_definitions(-DMCL_USE_LLVM=1)
-			set(SRCS ${SRCS} src/asm/aarch64.s)
+			target_compile_definitions(mcl PUBLIC MCL_USE_LLVM=1)
+			target_compile_definitions(mcl_st PUBLIC MCL_USE_LLVM=1)
+			target_sources(mcl PRIVATE src/asm/aarch64.s)
+			target_sources(mcl_st PRIVATE src/asm/aarch64.s)
 			set(CPU arch64)
 		elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
-			add_definitions(-DMCL_USE_LLVM=1)
-			set(SRCS ${SRCS} src/asm/arm.s)
+			target_compile_definitions(mcl PUBLIC MCL_USE_LLVM=1)
+			target_compile_definitions(mcl_st PUBLIC MCL_USE_LLVM=1)
+			target_sources(mcl PRIVATE src/asm/arm.s)
+			target_sources(mcl_st PRIVATE src/asm/arm.s)
 			set(CPU arm)
 		elseif(APPLE)
-			add_definitions(-DMCL_USE_LLVM=1)
-			set(SRCS ${SRCS} src/asm/x86-64mac.s src/asm/x86-64mac.bmi2.s)
+			target_compile_definitions(mcl PUBLIC MCL_USE_LLVM=1)
+			target_compile_definitions(mcl_st PUBLIC MCL_USE_LLVM=1)
+			target_sources(mcl PRIVATE src/asm/x86-64mac.s src/asm/x86-64mac.bmi2.s)
+			target_sources(mcl_st PRIVATE src/asm/x86-64mac.s src/asm/x86-64mac.bmi2.s)
 			set(CPU x86-64)
 		elseif(UNIX)
-			add_definitions(-DMCL_USE_LLVM=1)
-			set(SRCS ${SRCS} src/asm/x86-64.s src/asm/x86-64.bmi2.s)
+			target_compile_definitions(mcl PUBLIC MCL_USE_LLVM=1)
+			target_compile_definitions(mcl_st PUBLIC MCL_USE_LLVM=1)
+			target_sources(mcl PRIVATE src/asm/x86-64.s src/asm/x86-64.bmi2.s)
+			target_sources(mcl_st PRIVATE src/asm/x86-64.s src/asm/x86-64.bmi2.s)
 			set(CPU x86-64)
 		endif()
 	endif()
-	if(USE_GMP)
-		set(EXT_LIBS ${EXT_LIBS} gmp gmpxx)
+	if(MCL_USE_GMP)
+		find_package(GMP REQUIRED)
+		target_link_libraries(mcl PUBLIC GMP::GMPXX GMP::GMP)
+		target_link_libraries(mcl_st PUBLIC GMP::GMPXX GMP::GMP)
 	endif()
-	if(USE_OPENSSL)
-		set(EXT_LIBS ${EXT_LIBS} crypto)
+	if(MCL_USE_OPENSSL)
+		find_package(OpenSSL REQUIRED)
+		target_link_libraries(mcl PUBLIC OpenSSL::Crypto)
+		target_link_libraries(mcl_st PUBLIC OpenSSL::Crypto)
 	endif()
 endif()
 
-if(NOT USE_GMP)
-	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMCL_USE_VINT -DMCL_VINT_FIXED_BUFFER")
+if(NOT MCL_USE_GMP)
+	target_compile_definitions(mcl PUBLIC MCL_USE_VINT MCL_VINT_FIXED_BUFFER)
+	target_compile_definitions(mcl_st PUBLIC MCL_USE_VINT MCL_VINT_FIXED_BUFFER)
 endif()
-if(NOT USE_OPENSSL)
-	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMCL_DONT_USE_OPENSSL")
+if(NOT MCL_USE_OPENSSL)
+	target_compile_definitions(mcl PUBLIC MCL_DONT_USE_OPENSSL)
+	target_compile_definitions(mcl_st PUBLIC MCL_DONT_USE_OPENSSL)
 endif()
-if(NOT USE_XBYAK)
-	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMCL_DONT_USE_XBYAK")
+if(NOT MCL_USE_XBYAK)
+	target_compile_definitions(mcl PUBLIC MCL_DONT_USE_XBYAK)
+	target_compile_definitions(mcl_st PUBLIC MCL_DONT_USE_XBYAK)
 endif()
 
-if(DOWNLOAD_SOURCE)
+if(MCL_DOWNLOAD_SOURCE)
 	if(MSVC)
+		set(CYBOZULIB_EXT_DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cybozulib_ext)
 		set(CYBOZULIB_EXT_TAG release20170521)
 		set(FILES config.h gmp-impl.h gmp-mparam.h gmp.h gmpxx.h longlong.h mpir.h mpirxx.h)
 		foreach(file IN ITEMS ${FILES})
-			file(DOWNLOAD https://raw.githubusercontent.com/herumi/cybozulib_ext/${CYBOZULIB_EXT_TAG}/include/${file} ${mcl_SOURCE_DIR}/include/cybozulib_ext/${file})
+			file(DOWNLOAD https://raw.githubusercontent.com/herumi/cybozulib_ext/${CYBOZULIB_EXT_TAG}/include/${file} ${CYBOZULIB_EXT_DOWNLOAD_DIR}/include/${file})
 			message("download cybozulib_ext/" ${file})
 		endforeach()
 		set(FILES aes.h applink.c asn1.h asn1_mac.h asn1t.h bio.h blowfish.h bn.h buffer.h camellia.h cast.h cmac.h cms.h comp.h conf.h conf_api.h crypto.h des.h des_old.h dh.h dsa.h dso.h dtls1.h e_os2.h ebcdic.h ec.h ecdh.h ecdsa.h engine.h err.h evp.h hmac.h idea.h krb5_asn.h kssl.h lhash.h md4.h md5.h mdc2.h modes.h obj_mac.h objects.h ocsp.h opensslconf.h opensslv.h ossl_typ.h pem.h pem2.h pkcs12.h pkcs7.h pqueue.h rand.h rc2.h rc4.h ripemd.h rsa.h safestack.h seed.h sha.h srp.h srtp.h ssl.h ssl2.h ssl23.h ssl3.h stack.h symhacks.h tls1.h ts.h txt_db.h ui.h ui_compat.h whrlpool.h x509.h x509_vfy.h x509v3.h)
 		foreach(file IN ITEMS ${FILES})
-			file(DOWNLOAD https://raw.githubusercontent.com/herumi/cybozulib_ext/${CYBOZULIB_EXT_TAG}/include/openssl/${file} ${mcl_SOURCE_DIR}/include/cybozulib_ext/openssl/${file})
+			file(DOWNLOAD
+				https://raw.githubusercontent.com/herumi/cybozulib_ext/${CYBOZULIB_EXT_TAG}/include/openssl/${file} ${CYBOZULIB_EXT_DOWNLOAD_DIR}/include/openssl/${file})
 			message("download cybozulib_ext/openssl/" ${file})
 		endforeach()
 		set(FILES mpir.lib mpirxx.lib mpirxx.pdb ssleay32.lib libeay32.lib mpir.pdb)
 		foreach(file IN ITEMS ${FILES})
-			file(DOWNLOAD https://raw.githubusercontent.com/herumi/cybozulib_ext/${CYBOZULIB_EXT_TAG}/lib/mt/14/${file} ${mcl_SOURCE_DIR}/lib/mt/14/${file})
+			file(DOWNLOAD
+			    https://raw.githubusercontent.com/herumi/cybozulib_ext/${CYBOZULIB_EXT_TAG}/lib/mt/14/${file} ${CYBOZULIB_EXT_DOWNLOAD_DIR}/lib/mt/14/${file})
 			message("download lib/mt/14/" ${file})
 		endforeach()
-		if(MSVC)
-			include_directories(
-				${mcl_SOURCE_DIR}/include/cybozulib_ext
-			)
-		endif()
+
+		# mpir
+		add_library(cybozulib_ext::mpir STATIC IMPORTED)
+		set_target_properties(cybozulib_ext::mpir PROPERTIES
+			INTERFACE_INCLUDE_DIRECTORIES ${CYBOZULIB_EXT_DOWNLOAD_DIR}/include
+			IMPORTED_LOCATION ${CYBOZULIB_EXT_DOWNLOAD_DIR}/lib/mt/14/mpir.lib)
+		# mpirxx
+		add_library(cybozulib_ext::mpirxx STATIC IMPORTED)
+		set_target_properties(cybozulib_ext::mpirxx PROPERTIES
+			INTERFACE_INCLUDE_DIRECTORIES ${CYBOZULIB_EXT_DOWNLOAD_DIR}/include
+			IMPORTED_LOCATION ${CYBOZULIB_EXT_DOWNLOAD_DIR}/lib/mt/14/mpirxx.lib)
+		# libeay32
+		add_library(cybozulib_ext::libeay32 STATIC IMPORTED)
+		set_target_properties(cybozulib_ext::libeay32 PROPERTIES
+			INTERFACE_INCLUDE_DIRECTORIES ${CYBOZULIB_EXT_DOWNLOAD_DIR}/include
+			IMPORTED_LOCATION ${CYBOZULIB_EXT_DOWNLOAD_DIR}/lib/mt/14/libeay32.lib)
+		# ssleay32
+		add_library(cybozulib_ext::ssleay32 STATIC IMPORTED)
+		set_target_properties(cybozulib_ext::ssleay32 PROPERTIES
+			INTERFACE_INCLUDE_DIRECTORIES ${CYBOZULIB_EXT_DOWNLOAD_DIR}/include
+			IMPORTED_LOCATION ${CYBOZULIB_EXT_DOWNLOAD_DIR}/lib/mt/14/ssleay32.lib)
+		# abstracted cybozulib_ext libraries
+		add_library(windows_specific INTERFACE)
+		add_library(mcl::windows_specific ALIAS windows_specific)
+		target_link_libraries(windows_specific INTERFACE
+			-LIBPATH:${CYBOZULIB_EXT_DOWNLOAD_DIR}/lib
+			-LIBPATH:${CYBOZULIB_EXT_DOWNLOAD_DIR}/lib/mt/14
+			cybozulib_ext::mpir
+			cybozulib_ext::mpirxx
+			cybozulib_ext::libeay32
+			cybozulib_ext::ssleay32)
+
+		target_link_libraries(mcl PUBLIC mcl::windows_specific)
+		target_link_libraries(mcl_st PUBLIC mcl::windows_specific)
 	endif()
 else()
 	if(MSVC)
-		include_directories(
-			${mcl_SOURCE_DIR}/../cybozulib_ext/include
-		)
+		# mpir
+		add_library(cybozulib_ext::mpir STATIC IMPORTED)
+		set_target_properties(cybozulib_ext::mpir PROPERTIES
+			INTERFACE_INCLUDE_DIRECTORIES ${MCL_CYBOZULIB_EXT_DIR}/include
+			IMPORTED_LOCATION ${MCL_CYBOZULIB_EXT_DIR}/lib/mt/14/mpir.lib)
+		# mpirxx
+		add_library(cybozulib_ext::mpirxx STATIC IMPORTED)
+		set_target_properties(cybozulib_ext::mpirxx PROPERTIES
+			INTERFACE_INCLUDE_DIRECTORIES ${MCL_CYBOZULIB_EXT_DIR}/include
+			IMPORTED_LOCATION ${MCL_CYBOZULIB_EXT_DIR}/lib/mt/14/mpirxx.lib)
+		# libeay32
+		add_library(cybozulib_ext::libeay32 STATIC IMPORTED)
+		set_target_properties(cybozulib_ext::libeay32 PROPERTIES
+			INTERFACE_INCLUDE_DIRECTORIES ${MCL_CYBOZULIB_EXT_DIR}/include
+			IMPORTED_LOCATION ${MCL_CYBOZULIB_EXT_DIR}/lib/mt/14/libeay32.lib)
+		# ssleay32
+		add_library(cybozulib_ext::ssleay32 STATIC IMPORTED)
+		set_target_properties(cybozulib_ext::ssleay32 PROPERTIES
+			INTERFACE_INCLUDE_DIRECTORIES ${MCL_CYBOZULIB_EXT_DIR}/include
+			IMPORTED_LOCATION ${MCL_CYBOZULIB_EXT_DIR}/lib/mt/14/ssleay32.lib)
+		 # abstracted cybozulib_ext libraries
+		add_library(windows_specific INTERFACE)
+		add_library(mcl::windows_specific ALIAS windows_specific)
+		target_link_libraries(windows_specific INTERFACE
+			-LIBPATH:${MCL_CYBOZULIB_EXT_DIR}/lib
+			-LIBPATH:${MCL_CYBOZULIB_EXT_DIR}/lib/mt/14
+			cybozulib_ext::mpir
+			cybozulib_ext::mpirxx
+			cybozulib_ext::libeay32
+			cybozulib_ext::ssleay32)
+
+		target_link_libraries(mcl PUBLIC mcl::windows_specific)
+		target_link_libraries(mcl_st PUBLIC mcl::windows_specific)
 	endif()
 endif()
 
-include_directories(
-	${mcl_SOURCE_DIR}/include
-)
+# mclbnXXX
+foreach(bit IN ITEMS 256 384 384_256 512)
+	add_library(mclbn${bit} SHARED src/bn_c${bit}.cpp)
+	add_library(mcl::mclbn${bit} ALIAS mclbn${bit})
+	set_target_properties(mclbn${bit} PROPERTIES
+		CXX_STANDARD 11
+		CXX_STANDARD_REQUIRED YES
+		CXX_EXTENSIONS NO)
+	target_compile_options(mclbn${bit} PRIVATE ${MCL_COMPILE_OPTIONS})
+	target_compile_definitions(mclbn${bit}
+		PUBLIC MCL_NO_AUTOLINK MCLBN_NO_AUTOLINK)
+	target_link_libraries(mclbn${bit} PUBLIC mcl::mcl)
+endforeach()
+
+# mclsheXXX
+foreach(bit IN ITEMS 256 384 384_256)
+	add_library(mclshe${bit} SHARED src/she_c${bit}.cpp)
+	add_library(mcl::mclshe${bit} ALIAS mclshe${bit})
+	set_target_properties(mclshe${bit} PROPERTIES
+		CXX_STANDARD 11
+		CXX_STANDARD_REQUIRED YES
+		CXX_EXTENSIONS NO)
+	target_compile_options(mclshe${bit} PRIVATE ${MCL_COMPILE_OPTIONS})
+	target_compile_definitions(mclshe${bit}
+		PUBLIC MCL_NO_AUTOLINK MCLBN_NO_AUTOLINK MCLSHE_NO_AUTOLINK)
+	target_link_libraries(mclshe${bit} PUBLIC mcl::mcl)
+endforeach()
 
-if(USE_LLVM)
-	add_library(mcl SHARED ${SRCS} base64.o)
-	add_library(mcl_st STATIC ${SRCS} base64.o)
-	add_dependencies(mcl base64.o)
-	add_dependencies(mcl_st base64.o)
+# ECDSA
+add_library(mclecdsa SHARED src/ecdsa_c.cpp)
+add_library(mcl::mclecdsa ALIAS mclecdsa)
+set_target_properties(mclecdsa PROPERTIES
+	CXX_STANDARD 11
+	CXX_STANDARD_REQUIRED YES
+	CXX_EXTENSIONS NO)
+target_link_libraries(mclecdsa PUBLIC mcl::mcl)
+
+if(MSVC)
+	install(TARGETS mcl mcl_st mclbn256 mclbn384 mclbn384_256 mclbn512 mclshe256
+		mclshe384 mclshe384_256 windows_specific
+		EXPORT mclTargets
+		LIBRARY DESTINATION lib
+		ARCHIVE DESTINATION lib
+		RUNTIME DESTINATION lib)
 else()
-	add_library(mcl SHARED ${SRCS})
-	add_library(mcl_st STATIC ${SRCS})
+	install(TARGETS mcl mcl_st mclbn256 mclbn384 mclbn384_256 mclbn512 mclshe256
+		mclshe384 mclshe384_256
+		EXPORT mclTargets
+		LIBRARY DESTINATION lib
+		ARCHIVE DESTINATION lib
+		RUNTIME DESTINATION lib)
 endif()
-target_link_libraries(mcl ${EXT_LIBS})
-target_link_libraries(mcl_st ${EXT_LIBS})
-set_target_properties(mcl_st PROPERTIES OUTPUT_NAME mcl)
-#set_target_properties(mcl_st PROPERTIES PREFIX "lib")
-#set_target_properties(mcl PROPERTIES OUTPUT_NAME mcl VERSION 1.0.0 SOVERSION 1)
-# For semantics of ABI compatibility including when you must bump SOVERSION, see:
-# https://community.kde.org/Policies/Binary_Compatibility_Issues_With_C%2B%2B#The_Do.27s_and_Don.27ts
+install(DIRECTORY include/mcl
+	DESTINATION include
+	FILES_MATCHING PATTERN "*.hpp"
+	PATTERN "curve_type.h"
+	PATTERN "bn.h"
+	PATTERN "bn_c256.h"
+	PATTERN "bn_c384_256.h"
+	PATTERN "bn_c384.h")
+install(DIRECTORY include/cybozu
+	DESTINATION include
+	FILES_MATCHING PATTERN "*.hpp")
 
-set(LIBS mcl ${EXT_LIBS})
-foreach(bit IN ITEMS 256 384 384_256 512)
-	add_library(mclbn${bit} SHARED src/bn_c${bit}.cpp)
-	target_link_libraries(mclbn${bit} ${LIBS})
-	add_executable(bn_c${bit}_test test/bn_c${bit}_test.cpp)
-	target_link_libraries(bn_c${bit}_test mclbn${bit})
-endforeach()
+install(EXPORT mclTargets
+	FILE mclTargets.cmake
+	NAMESPACE mcl::
+	DESTINATION lib/cmake/mcl)
 
-file(GLOB MCL_HEADERS include/mcl/*.hpp include/mcl/bn.h include/mcl/curve_type.h include/mcl/bn_c256.h include/mcl/bn_c384_256.h include/mcl/bn_c384.h)
-file(GLOB CYBOZULIB_HEADERS include/cybozu/*.hpp)
-
-install(TARGETS mcl DESTINATION lib)
-install(TARGETS mcl_st DESTINATION lib)
-install(TARGETS mclbn256 DESTINATION lib)
-install(TARGETS mclbn384 DESTINATION lib)
-install(TARGETS mclbn384_256 DESTINATION lib)
-install(TARGETS mclbn512 DESTINATION lib)
-install(FILES ${MCL_HEADERS} DESTINATION include/mcl)
-install(FILES include/mcl/impl/bn_c_impl.hpp DESTINATION include/mcl/impl)
-install(FILES ${CYBOZULIB_HEADERS} DESTINATION include/cybozu)
-
-if(NOT ONLY_LIB)
-	set(TEST_BASE fp_test ec_test fp_util_test window_method_test elgamal_test fp_tower_test gmp_test bn_test glv_test)
-	#set(TEST_BASE bn_test)
-	foreach(base IN ITEMS ${TEST_BASE})
-		add_executable(
-			${base}
-			test/${base}.cpp
-		)
-		target_link_libraries(
-			${base}
-			${LIBS}
-		)
-	endforeach()
+# support local build-tree export to allow import from external projects
+export(EXPORT mclTargets
+	FILE mclTargets.cmake
+	NAMESPACE mcl::)
+set(CMAKE_EXPORT_PACKAGE_REGISTRY ON)
+export(PACKAGE mcl)
+
+
+# Tests
+if((CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME OR MCL_BUILD_TESTING)
+	AND BUILD_TESTING)
+	enable_testing()
+	add_subdirectory(test)
+endif()
+
+if(MCL_BUILD_SAMPLE)
+	# sample code
+	add_subdirectory(sample)
 endif()
diff --git a/build.sh b/build.sh
new file mode 100755
index 00000000..a2041a24
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+BUILD_DIR=${1:-build}
+
+
+windows_build()
+{
+  if [ -d "${SCRIPT_DIR}/../cybozulib_ext"  ]; then
+      DOWNLOAD_CYBOZULIB_EXT="OFF"
+      CYBOZULIB_EXT_OPTION="-DMCL_CYBOZULIB_EXT_DIR:PATH=${SCRIPT_DIR}/../cybozulib_ext"
+  else
+      DOWNLOAD_CYBOZULIB_EXT="ON"
+      CYBOZULIB_EXT_OPTION=""
+  fi
+
+  cmake -E remove_directory ${BUILD_DIR}
+  cmake -E make_directory ${BUILD_DIR}
+  cmake -H${SCRIPT_DIR} -B${BUILD_DIR} -A x64 \
+    -DBUILD_TESTING=ON \
+    -DMCL_BUILD_SAMPLE=ON \
+    -DCMAKE_INSTALL_PREFIX=${BUILD_DIR}/install \
+    -DMCL_DOWNLOAD_SOURCE=${DOWNLOAD_CYBOZULIB_EXT} ${CYBOZULIB_EXT_OPTION}
+  cmake --build ${BUILD_DIR} --clean-first --config Release --parallel
+}
+
+linux_build()
+{
+  cmake -E remove_directory ${BUILD_DIR}
+  cmake -E make_directory ${BUILD_DIR}
+  cmake -H${SCRIPT_DIR} -B${BUILD_DIR} -DCMAKE_BUILD_TYPE=Release \
+    -DBUILD_TESTING=ON \
+    -DMCL_BUILD_SAMPLE=ON \
+    -DMCL_USE_LLVM=ON \
+    -DMCL_USE_OPENSSL=ON \
+    -DCMAKE_INSTALL_PREFIX=${BUILD_DIR}/install
+  cmake --build ${BUILD_DIR} --clean-first -- -j
+}
+
+osx_build()
+{
+  OPENSSL_ROOT_DIR="/usr/local/opt/openssl"
+
+  cmake -E remove_directory ${BUILD_DIR}
+  cmake -E make_directory ${BUILD_DIR}
+  cmake -H${SCRIPT_DIR} -B${BUILD_DIR} -DCMAKE_BUILD_TYPE=Release \
+    -DBUILD_TESTING=ON \
+    -DMCL_BUILD_SAMPLE=ON \
+    -DMCL_USE_LLVM=ON \
+    -DMCL_USE_OPENSSL=ON \
+    -DOPENSSL_ROOT_DIR="${OPENSSL_ROOT_DIR}" \
+    -DCMAKE_INSTALL_PREFIX=${BUILD_DIR}/install
+  cmake --build ${BUILD_DIR} --clean-first -- -j
+}
+
+os=`uname -s`
+case "${os}" in
+  CYGWIN*|MINGW32*|MSYS*|MINGW*)
+    windows_build
+    ;;
+  Darwin*)
+    osx_build
+    ;;
+  *)
+    linux_build
+    ;;
+esac
diff --git a/cmake/FindGMP.cmake b/cmake/FindGMP.cmake
new file mode 100644
index 00000000..58c9b96b
--- /dev/null
+++ b/cmake/FindGMP.cmake
@@ -0,0 +1,88 @@
+# FindGMP.cmake
+#
+# Finds the GNU Multiple Precision Arithmetic Library (GMP)
+# See http://gmplib.org/
+#
+# This will define the following variables::
+#
+#    GMP_FOUND
+#    GMP_VERSION
+#    GMP_DEFINITIONS
+#    GMP_INCLUDE_DIR
+#    GMP_LIBRARY
+#    GMP_GMPXX_DEFINITIONS
+#    GMP_GMPXX_INCLUDE_DIR
+#    GMP_GMPXX_LIBRARY
+#
+# and the following imported targets::
+#
+#     GMP::GMP
+#     GMP::GMPXX
+
+find_package(PkgConfig QUIET)
+pkg_check_modules(PC_GMP QUIET gmp gmpxx)
+
+set(GMP_VERSION ${PC_GMP_gmp_VERSION})
+
+find_library(GMP_LIBRARY
+	NAMES gmp libgmp
+	HINTS
+		${PC_GMP_gmp_LIBDIR}
+		${PC_GMP_gmp_LIBRARY_DIRS})
+
+find_path(GMP_INCLUDE_DIR
+	NAMES gmp.h
+	HINTS
+		${PC_GMP_gmp_INCLUDEDIR}
+		${PC_GMP_gmp_INCLUDE_DIRS})
+
+find_library(GMP_GMPXX_LIBRARY
+	NAMES gmpxx libgmpxx
+	HINTS
+		${PC_GMP_gmpxx_LIBDIR}
+		${PC_GMP_gmpxx_LIBRARY_DIRS})
+
+find_path(GMP_GMPXX_INCLUDE_DIR
+	NAMES gmpxx.h
+	HINTS
+		${PC_GMP_gmpxx_INCLUDEDIR}
+		${PC_GMP_gmpxx_INCLUDE_DIRS})
+
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(GMP
+	REQUIRED_VARS
+		GMP_INCLUDE_DIR
+		GMP_LIBRARY
+	GMP_GMPXX_INCLUDE_DIR
+	GMP_GMPXX_LIBRARY
+	VERSION_VAR GMP_VERSION)
+
+if(GMP_FOUND)
+	set(GMP_LIBRARIES ${GMP_LIBRARY})
+	set(GMP_INCLUDE_DIRS ${GMP_INCLUDE_DIR})
+	set(GMP_DEFINITIONS ${PC_GMP_gmp_CFLAGS_OTHER})
+	set(GMP_GMPXX_LIBRARIES ${GMP_GMPXX_LIBRARY})
+	set(GMP_GMPXX_INCLUDE_DIRS ${GMP_GMPXX_INCLUDE_DIR})
+	set(GMP_GMPXX_DEFINITIONS ${PC_GMP_gmpxx_CFLAGS_OTHER})
+
+	if(NOT TARGET GMP::GMP)
+		add_library(GMP::GMP UNKNOWN IMPORTED)
+		set_target_properties(GMP::GMP PROPERTIES
+			INTERFACE_COMPILE_OPTIONS "${PC_GMP_gmp_CFLAGS_OTHER}"
+			INTERFACE_INCLUDE_DIRECTORIES "${GMP_INCLUDE_DIR}"
+			IMPORTED_LOCATION "${GMP_LIBRARY}")
+	endif()
+
+	if(NOT TARGET GMP::GMPXX)
+		add_library(GMP::GMPXX UNKNOWN IMPORTED)
+		set_target_properties(GMP::GMPXX PROPERTIES
+			INTERFACE_COMPILE_OPTIONS "${PC_GMP_gmpxx_CFLAGS_OTHER}"
+			INTERFACE_INCLUDE_DIRECTORIES "${GMP_GMPXX_INCLUDE_DIR}"
+			INTERFACE_LINK_LIBRARIES GMP::GMP
+			IMPORTED_LOCATION "${GMP_GMPXX_LIBRARY}")
+	endif()
+endif()
+
+mark_as_advanced(GMP_FOUND GMP_INCLUDE_DIR GMP_LIBRARY)
+mark_as_advanced(GMP_GMPXX_INCLUDE_DIR GMP_GMPXX_LIBRARY)
diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt
new file mode 100644
index 00000000..f0fffbd2
--- /dev/null
+++ b/sample/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Samples
+set(MCL_SAMPLE bench ecdh random rawbench vote pairing large tri-dh bls_sig she_smpl)
+foreach(sample IN ITEMS ${MCL_SAMPLE})
+	add_executable(sample_${sample} ${sample}.cpp)
+	target_link_libraries(sample_${sample} PRIVATE mcl::mcl)
+	set_target_properties(sample_${sample} PROPERTIES
+		CXX_STANDARD 11
+		CXX_STANDARD_REQUIRED YES
+	CXX_EXTENSIONS NO)
+	target_compile_options(sample_${sample} PRIVATE ${MCL_COMPILE_OPTIONS})
+	target_compile_definitions(sample_${sample} PRIVATE MCL_DONT_EXPORT)
+endforeach()
+
+# C interface Sample
+add_executable(sample_pairing_c_min pairing_c.c)
+target_link_libraries(sample_pairing_c_min PRIVATE mcl::mclbn384_256)
+target_compile_definitions(sample_pairing_c_min PRIVATE MCL_DONT_EXPORT)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 00000000..22efd786
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,62 @@
+# Tests
+set(MCL_TEST_BASE fp_test ec_test fp_util_test window_method_test elgamal_test
+	fp_tower_test gmp_test bn_test glv_test)
+foreach(base IN ITEMS ${MCL_TEST_BASE})
+	add_executable(${base} ${base}.cpp)
+	target_link_libraries(${base} PRIVATE mcl::mcl)
+	set_target_properties(${base} PROPERTIES
+		CXX_STANDARD 11
+		CXX_STANDARD_REQUIRED YES
+		CXX_EXTENSIONS NO)
+	target_compile_options(${base} PRIVATE ${MCL_COMPILE_OPTIONS})
+	target_compile_definitions(${base} PRIVATE MCL_DONT_EXPORT)
+	add_test(NAME ${base} COMMAND ${base})
+endforeach()
+
+# Tests for mclbnXXX
+foreach(bit IN ITEMS 256 384 384_256 512)
+	add_executable(bn_c${bit}_test bn_c${bit}_test.cpp)
+	target_link_libraries(bn_c${bit}_test PRIVATE mcl::mclbn${bit})
+	set_target_properties(bn_c${bit}_test PROPERTIES
+		CXX_STANDARD 11
+		CXX_STANDARD_REQUIRED YES
+		CXX_EXTENSIONS NO)
+	target_compile_options(bn_c${bit}_test PRIVATE ${MCL_COMPILE_OPTIONS})
+	target_compile_definitions(bn_c${bit}_test PRIVATE MCLBN_DONT_EXPORT)
+	add_test(NAME bn_c${bit}_test COMMAND bn_c${bit}_test)
+endforeach()
+
+# Tests for mclsheXXX
+foreach(bit IN ITEMS 256 384 384_256)
+	add_executable(she_c${bit}_test she_c${bit}_test.cpp)
+	target_link_libraries(she_c${bit}_test PRIVATE mcl::mclshe${bit})
+	set_target_properties(she_c${bit}_test PROPERTIES
+		CXX_STANDARD 11
+		CXX_STANDARD_REQUIRED YES
+		CXX_EXTENSIONS NO)
+	target_compile_options(she_c${bit}_test PRIVATE ${MCL_COMPILE_OPTIONS})
+	target_compile_definitions(she_c${bit}_test PRIVATE MCLBN_DONT_EXPORT)
+	add_test(NAME she_c${bit}_test COMMAND she_c${bit}_test)
+endforeach()
+
+# ecdsa_test
+add_executable(ecdsa_test ecdsa_test.cpp)
+target_link_libraries(ecdsa_test PRIVATE mcl::mclecdsa)
+set_target_properties(ecdsa_test PROPERTIES
+	CXX_STANDARD 11
+	CXX_STANDARD_REQUIRED YES
+	CXX_EXTENSIONS NO)
+target_compile_options(ecdsa_test PRIVATE ${MCL_COMPILE_OPTIONS})
+target_compile_definitions(ecdsa_test PRIVATE MCLBN_DONT_EXPORT)
+add_test(NAME ecdsa_test COMMAND ecdsa_test)
+
+# ecdsa_c_test
+add_executable(ecdsa_c_test ecdsa_c_test.cpp)
+target_link_libraries(ecdsa_c_test PRIVATE mcl::mclecdsa)
+set_target_properties(ecdsa_c_test PROPERTIES
+	CXX_STANDARD 11
+	CXX_STANDARD_REQUIRED YES
+	CXX_EXTENSIONS NO)
+target_compile_options(ecdsa_c_test PRIVATE ${MCL_COMPILE_OPTIONS})
+target_compile_definitions(ecdsa_c_test PRIVATE MCLBN_DONT_EXPORT)
+add_test(NAME ecdsa_c_test COMMAND ecdsa_c_test)

From 45cde9162677e16d535737e875f97f0a4303f5f3 Mon Sep 17 00:00:00 2001
From: Mohamed Amin JABRI <mohamed-amin.jabri@bitflyer.com>
Date: Thu, 27 Aug 2020 18:28:50 +0900
Subject: [PATCH 278/553] Update readme: mention build.sh convenience script to
 build mcl on Linux, macOS and Windows.

---
 readme.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/readme.md b/readme.md
index 45480866..cf6ed155 100644
--- a/readme.md
+++ b/readme.md
@@ -152,6 +152,11 @@ cmake .. -A x64
 msbuild mcl.sln /p:Configuration=Release /m
 ```
 
+For your convenience you could use the build script `build.sh` on Linux, macOS and
+Windows (requires Git Bash).
+
+On Windows, `build.sh` expects [cybozulib_ext](https://github.com/herumi/cybozulib_ext) to be within the same parent directory, otherwise, it will be downloaded into `external\cybozulib_ext` directory.
+
 ## options
 
 ```

From 4797e01714265c9697966913cc47953337e7c21b Mon Sep 17 00:00:00 2001
From: Mohamed Amin JABRI <mohamed-amin.jabri@bitflyer.com>
Date: Sun, 30 Aug 2020 18:04:50 +0900
Subject: [PATCH 279/553] Add project/soname version.

---
 CMakeLists.txt | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 45fd3105..fd0237ee 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,8 @@
 cmake_minimum_required (VERSION 3.8)
-project(mcl CXX C ASM)
+
+project(mcl
+	VERSION 1.22
+	LANGUAGES CXX C ASM)
 
 option(
 	MCL_MAX_BIT_SIZE
@@ -80,7 +83,10 @@ target_include_directories(mcl PUBLIC
 	$<INSTALL_INTERFACE:$CMAKE_INSTALL_DIR/include>)
 set_target_properties(mcl PROPERTIES
 	POSITION_INDEPENDENT_CODE ON)
-#set_target_properties(mcl PROPERTIES OUTPUT_NAME mcl VERSION 1.0.0 SOVERSION 1)
+set_target_properties(mcl PROPERTIES
+	OUTPUT_NAME mcl
+	VERSION ${mcl_VERSION}
+	SOVERSION ${mcl_VERSION_MAJOR})
 # For semantics of ABI compatibility including when you must bump SOVERSION, see:
 # https://community.kde.org/Policies/Binary_Compatibility_Issues_With_C%2B%2B#The_Do.27s_and_Don.27ts
 
@@ -322,6 +328,9 @@ foreach(bit IN ITEMS 256 384 384_256 512)
 	target_compile_definitions(mclbn${bit}
 		PUBLIC MCL_NO_AUTOLINK MCLBN_NO_AUTOLINK)
 	target_link_libraries(mclbn${bit} PUBLIC mcl::mcl)
+	set_target_properties(mclbn${bit} PROPERTIES
+		VERSION ${mcl_VERSION}
+		SOVERSION ${mcl_VERSION_MAJOR})
 endforeach()
 
 # mclsheXXX
@@ -336,6 +345,9 @@ foreach(bit IN ITEMS 256 384 384_256)
 	target_compile_definitions(mclshe${bit}
 		PUBLIC MCL_NO_AUTOLINK MCLBN_NO_AUTOLINK MCLSHE_NO_AUTOLINK)
 	target_link_libraries(mclshe${bit} PUBLIC mcl::mcl)
+	set_target_properties(mclshe${bit} PROPERTIES
+		VERSION ${mcl_VERSION}
+		SOVERSION ${mcl_VERSION_MAJOR})
 endforeach()
 
 # ECDSA
@@ -345,6 +357,9 @@ set_target_properties(mclecdsa PROPERTIES
 	CXX_STANDARD 11
 	CXX_STANDARD_REQUIRED YES
 	CXX_EXTENSIONS NO)
+set_target_properties(mclecdsa PROPERTIES
+	VERSION ${mcl_VERSION}
+	SOVERSION ${mcl_VERSION_MAJOR})
 target_link_libraries(mclecdsa PUBLIC mcl::mcl)
 
 if(MSVC)

From 793ef212c90a6ce6cebfb70e8d02c12486414b41 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 1 Sep 2020 18:36:22 +0900
Subject: [PATCH 280/553] if CXX is defined then use it on OpenSBD

---
 common.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common.mk b/common.mk
index 5e3d6307..4816049f 100644
--- a/common.mk
+++ b/common.mk
@@ -25,7 +25,7 @@ else
 endif
 ifeq ($(UNAME_S),OpenBSD)
   OS=openbsd
-  CXX=clang++
+  CXX?=clang++
   CFLAGS+=-I/usr/local/include
   LDFLAGS+=-L/usr/local/lib
 endif

From c0b7fb89f713da879283a37d0aa5d24791e45d35 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 3 Sep 2020 16:18:09 +0900
Subject: [PATCH 281/553] remove mclshe and mclecdsa

---
 CMakeLists.txt | 37 +++----------------------------------
 1 file changed, 3 insertions(+), 34 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fd0237ee..90c2df4c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -317,7 +317,7 @@ else()
 endif()
 
 # mclbnXXX
-foreach(bit IN ITEMS 256 384 384_256 512)
+foreach(bit IN ITEMS 256 384 384_256)
 	add_library(mclbn${bit} SHARED src/bn_c${bit}.cpp)
 	add_library(mcl::mclbn${bit} ALIAS mclbn${bit})
 	set_target_properties(mclbn${bit} PROPERTIES
@@ -333,45 +333,14 @@ foreach(bit IN ITEMS 256 384 384_256 512)
 		SOVERSION ${mcl_VERSION_MAJOR})
 endforeach()
 
-# mclsheXXX
-foreach(bit IN ITEMS 256 384 384_256)
-	add_library(mclshe${bit} SHARED src/she_c${bit}.cpp)
-	add_library(mcl::mclshe${bit} ALIAS mclshe${bit})
-	set_target_properties(mclshe${bit} PROPERTIES
-		CXX_STANDARD 11
-		CXX_STANDARD_REQUIRED YES
-		CXX_EXTENSIONS NO)
-	target_compile_options(mclshe${bit} PRIVATE ${MCL_COMPILE_OPTIONS})
-	target_compile_definitions(mclshe${bit}
-		PUBLIC MCL_NO_AUTOLINK MCLBN_NO_AUTOLINK MCLSHE_NO_AUTOLINK)
-	target_link_libraries(mclshe${bit} PUBLIC mcl::mcl)
-	set_target_properties(mclshe${bit} PROPERTIES
-		VERSION ${mcl_VERSION}
-		SOVERSION ${mcl_VERSION_MAJOR})
-endforeach()
-
-# ECDSA
-add_library(mclecdsa SHARED src/ecdsa_c.cpp)
-add_library(mcl::mclecdsa ALIAS mclecdsa)
-set_target_properties(mclecdsa PROPERTIES
-	CXX_STANDARD 11
-	CXX_STANDARD_REQUIRED YES
-	CXX_EXTENSIONS NO)
-set_target_properties(mclecdsa PROPERTIES
-	VERSION ${mcl_VERSION}
-	SOVERSION ${mcl_VERSION_MAJOR})
-target_link_libraries(mclecdsa PUBLIC mcl::mcl)
-
 if(MSVC)
-	install(TARGETS mcl mcl_st mclbn256 mclbn384 mclbn384_256 mclbn512 mclshe256
-		mclshe384 mclshe384_256 windows_specific
+	install(TARGETS mcl mcl_st mclbn256 mclbn384 mclbn384_256 windows_specific
 		EXPORT mclTargets
 		LIBRARY DESTINATION lib
 		ARCHIVE DESTINATION lib
 		RUNTIME DESTINATION lib)
 else()
-	install(TARGETS mcl mcl_st mclbn256 mclbn384 mclbn384_256 mclbn512 mclshe256
-		mclshe384 mclshe384_256
+	install(TARGETS mcl mcl_st mclbn256 mclbn384 mclbn384_256
 		EXPORT mclTargets
 		LIBRARY DESTINATION lib
 		ARCHIVE DESTINATION lib

From 384b600009b6dd5d84eb16d0f656d52813fdb2fa Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 3 Sep 2020 17:02:14 +0900
Subject: [PATCH 282/553] remove mclshe and ecdsa test from cmake

---
 test/CMakeLists.txt | 37 +------------------------------------
 1 file changed, 1 insertion(+), 36 deletions(-)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 22efd786..98d911c8 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -14,7 +14,7 @@ foreach(base IN ITEMS ${MCL_TEST_BASE})
 endforeach()
 
 # Tests for mclbnXXX
-foreach(bit IN ITEMS 256 384 384_256 512)
+foreach(bit IN ITEMS 256 384 384_256)
 	add_executable(bn_c${bit}_test bn_c${bit}_test.cpp)
 	target_link_libraries(bn_c${bit}_test PRIVATE mcl::mclbn${bit})
 	set_target_properties(bn_c${bit}_test PROPERTIES
@@ -25,38 +25,3 @@ foreach(bit IN ITEMS 256 384 384_256 512)
 	target_compile_definitions(bn_c${bit}_test PRIVATE MCLBN_DONT_EXPORT)
 	add_test(NAME bn_c${bit}_test COMMAND bn_c${bit}_test)
 endforeach()
-
-# Tests for mclsheXXX
-foreach(bit IN ITEMS 256 384 384_256)
-	add_executable(she_c${bit}_test she_c${bit}_test.cpp)
-	target_link_libraries(she_c${bit}_test PRIVATE mcl::mclshe${bit})
-	set_target_properties(she_c${bit}_test PROPERTIES
-		CXX_STANDARD 11
-		CXX_STANDARD_REQUIRED YES
-		CXX_EXTENSIONS NO)
-	target_compile_options(she_c${bit}_test PRIVATE ${MCL_COMPILE_OPTIONS})
-	target_compile_definitions(she_c${bit}_test PRIVATE MCLBN_DONT_EXPORT)
-	add_test(NAME she_c${bit}_test COMMAND she_c${bit}_test)
-endforeach()
-
-# ecdsa_test
-add_executable(ecdsa_test ecdsa_test.cpp)
-target_link_libraries(ecdsa_test PRIVATE mcl::mclecdsa)
-set_target_properties(ecdsa_test PROPERTIES
-	CXX_STANDARD 11
-	CXX_STANDARD_REQUIRED YES
-	CXX_EXTENSIONS NO)
-target_compile_options(ecdsa_test PRIVATE ${MCL_COMPILE_OPTIONS})
-target_compile_definitions(ecdsa_test PRIVATE MCLBN_DONT_EXPORT)
-add_test(NAME ecdsa_test COMMAND ecdsa_test)
-
-# ecdsa_c_test
-add_executable(ecdsa_c_test ecdsa_c_test.cpp)
-target_link_libraries(ecdsa_c_test PRIVATE mcl::mclecdsa)
-set_target_properties(ecdsa_c_test PROPERTIES
-	CXX_STANDARD 11
-	CXX_STANDARD_REQUIRED YES
-	CXX_EXTENSIONS NO)
-target_compile_options(ecdsa_c_test PRIVATE ${MCL_COMPILE_OPTIONS})
-target_compile_definitions(ecdsa_c_test PRIVATE MCLBN_DONT_EXPORT)
-add_test(NAME ecdsa_c_test COMMAND ecdsa_c_test)

From c060dea39d35845408114a4eef147d96f3bf284d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 3 Sep 2020 17:13:19 +0900
Subject: [PATCH 283/553] add default value to cmake

---
 CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 90c2df4c..c4eb3e89 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,10 +4,12 @@ project(mcl
 	VERSION 1.22
 	LANGUAGES CXX C ASM)
 
+set(CMAKE_BUILD_TYPE "Release")
+
 option(
 	MCL_MAX_BIT_SIZE
 	"max bit size for Fp"
-	0
+	384
 )
 if(MSVC)
 	option(

From 1c402095e2b8855af112f3fa4fb37c317ffec7a7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 3 Sep 2020 17:19:53 +0900
Subject: [PATCH 284/553] update doc for cmake

---
 readme.md           | 9 ++++++++-
 test/CMakeLists.txt | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/readme.md b/readme.md
index cf6ed155..94be8441 100644
--- a/readme.md
+++ b/readme.md
@@ -160,10 +160,17 @@ On Windows, `build.sh` expects [cybozulib_ext](https://github.com/herumi/cybozul
 ## options
 
 ```
-cmake .. USE_GMP=OFF ; without GMP
+cmake .. MCL_USE_GMP=OFF ; without GMP
 ```
 see `cmake .. -LA`.
 
+## tests
+make test binaries in `./bin`.
+```
+cmake .. -DBUILD_TESTING=ON
+make -j4
+```
+
 # How to build for wasm(WebAssembly)
 mcl supports emcc (Emscripten) and `test/bn_test.cpp` runs on browers such as Firefox, Chrome and Edge.
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 98d911c8..9a4f6350 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,5 +1,5 @@
 # Tests
-set(MCL_TEST_BASE fp_test ec_test fp_util_test window_method_test elgamal_test
+set(MCL_TEST_BASE fp_test ec_test fp_util_test window_method_test elgamal_test bls12_test
 	fp_tower_test gmp_test bn_test glv_test)
 foreach(base IN ITEMS ${MCL_TEST_BASE})
 	add_executable(${base} ${base}.cpp)

From 485c5c709d1ae61bd26346c1d5a4bb548d9fb83f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 8 Sep 2020 16:02:06 +0900
Subject: [PATCH 285/553] update xbyak

---
 src/detect_cpu.hpp         |  95 --------
 src/fp.cpp                 |  13 +-
 src/xbyak/xbyak.h          | 444 ++++++++++++++++++-----------------
 src/xbyak/xbyak_mnemonic.h | 468 ++++++++++++++++++-------------------
 src/xbyak/xbyak_util.h     |  99 ++++----
 5 files changed, 525 insertions(+), 594 deletions(-)
 delete mode 100644 src/detect_cpu.hpp

diff --git a/src/detect_cpu.hpp b/src/detect_cpu.hpp
deleted file mode 100644
index 9d63c0e8..00000000
--- a/src/detect_cpu.hpp
+++ /dev/null
@@ -1,95 +0,0 @@
-#pragma once
-/**
-	@file
-	@brief detect Intel CPU features
-	@author MITSUNARI Shigeo(@herumi)
-	@license modified new BSD license
-	http://opensource.org/licenses/BSD-3-Clause
-	This code is extracted from xbyak_util.h for compiling without xbyak
-*/
-
-#include <stdint.h>
-#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
-	#define MCL_INTEL_CPU_SPECIFIC
-#endif
-
-#ifdef MCL_INTEL_CPU_SPECIFIC
-#ifdef _MSC_VER
-	#include <intrin.h> // for __cpuid
-#else
-	#ifndef __GNUC_PREREQ
-		#define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor)))
-	#endif
-	#if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
-		#include <cpuid.h>
-	#else
-		#ifndef __cpuid
-			#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
-			#define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
-		#endif
-	#endif
-#endif
-#endif
-
-namespace mcl { namespace util {
-
-/**
-	CPU detection class
-*/
-class Cpu {
-	uint64_t type_;
-public:
-	/*
-		data[] = { eax, ebx, ecx, edx }
-	*/
-	static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
-	{
-#ifdef MCL_INTEL_CPU_SPECIFIC
-	#ifdef _MSC_VER
-		__cpuid(reinterpret_cast<int*>(data), eaxIn);
-	#else
-		__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
-	#endif
-#else
-		(void)eaxIn;
-		(void)data;
-#endif
-	}
-	static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
-	{
-#ifdef MCL_INTEL_CPU_SPECIFIC
-	#ifdef _MSC_VER
-		__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
-	#else
-		__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
-	#endif
-#else
-		(void)eaxIn;
-		(void)ecxIn;
-		(void)data;
-#endif
-	}
-	typedef uint64_t Type;
-	static const Type NONE = 0;
-	static const Type tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
-	Cpu()
-		: type_(NONE)
-	{
-		unsigned int data[4] = {};
-		const unsigned int& EAX = data[0];
-		const unsigned int& EBX = data[1];
-		getCpuid(0, data);
-		const unsigned int maxNum = EAX;
-		if (maxNum >= 7) {
-			getCpuidEx(7, 0, data);
-			if (EBX & (1U << 8)) type_ |= tBMI2;
-		}
-	}
-	bool has(Type type) const
-	{
-		return (type & type_) != 0;
-	}
-};
-
-} } // mcl::util
-
diff --git a/src/fp.cpp b/src/fp.cpp
index edf47d21..b3b07d19 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -6,7 +6,9 @@
 #ifdef MCL_USE_XBYAK
 #include "fp_generator.hpp"
 #else
-#include "detect_cpu.hpp"
+#define XBYAK_ONLY_CLASS_CPU
+#include "xbyak/xbyak_util.h"
+//#include "detect_cpu.hpp"
 #endif
 #include "low_func.hpp"
 #ifdef MCL_USE_LLVM
@@ -299,13 +301,8 @@ void setOp(Op& op, Mode mode)
 	if (mode != fp::FP_GMP && mode != fp::FP_GMP_MONT) {
 #if MCL_LLVM_BMI2 == 1
 		const bool gmpIsFasterThanLLVM = false;//(N == 8 && MCL_SIZEOF_UNIT == 8);
-#ifdef MCL_USE_XBYAK
-		using namespace Xbyak;
-#else
-		using namespace mcl;
-#endif
-		util::Cpu cpu;
-		if (cpu.has(util::Cpu::tBMI2)) {
+		Xbyak::util::Cpu cpu;
+		if (cpu.has(Xbyak::util::Cpu::tBMI2)) {
 			setOp2<N, LBMI2tag, (N * UnitBitSize <= 384), gmpIsFasterThanLLVM>(op);
 		} else
 #endif
diff --git a/src/xbyak/xbyak.h b/src/xbyak/xbyak.h
index 41894d00..67b75f30 100644
--- a/src/xbyak/xbyak.h
+++ b/src/xbyak/xbyak.h
@@ -109,7 +109,18 @@
 #endif
 
 #if (__cplusplus >= 201103) || (_MSC_VER >= 1800)
+	#undef XBYAK_TLS
+	#define XBYAK_TLS thread_local
 	#define XBYAK_VARIADIC_TEMPLATE
+	#define XBYAK_NOEXCEPT noexcept
+#else
+	#define XBYAK_NOEXCEPT throw()
+#endif
+
+#if (__cplusplus >= 201402L) || (_MSC_VER >= 1910) // Visual Studio 2017 version 15.0
+	#define XBYAK_CONSTEXPR constexpr // require c++14 or later
+#else
+	#define XBYAK_CONSTEXPR
 #endif
 
 #ifdef _MSC_VER
@@ -124,21 +135,17 @@ namespace Xbyak {
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x5941 /* 0xABCD = A.BC(D) */
+	VERSION = 0x5970 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
 #define MIE_INTEGER_TYPE_DEFINED
-#ifdef _MSC_VER
-	typedef unsigned __int64 uint64;
-	typedef __int64 sint64;
-#else
-	typedef uint64_t uint64;
-	typedef int64_t sint64;
-#endif
-typedef unsigned int uint32;
-typedef unsigned short uint16;
-typedef unsigned char uint8;
+// for backward compatibility
+typedef uint64_t uint64;
+typedef int64_t sint64;
+typedef uint32_t uint32;
+typedef uint16_t uint16;
+typedef uint8_t uint8;
 #endif
 
 #ifndef MIE_ALIGN
@@ -282,7 +289,7 @@ class Error : public std::exception {
 		}
 	}
 	operator int() const { return err_; }
-	const char *what() const throw()
+	const char *what() const XBYAK_NOEXCEPT
 	{
 		return ConvertErrorToString(err_);
 	}
@@ -327,7 +334,7 @@ inline void AlignedFree(void *p)
 }
 
 template<class To, class From>
-inline const To CastTo(From p) throw()
+inline const To CastTo(From p) XBYAK_NOEXCEPT
 {
 	return (const To)(size_t)(p);
 }
@@ -335,15 +342,15 @@ namespace inner {
 
 static const size_t ALIGN_PAGE_SIZE = 4096;
 
-inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
-inline bool IsInInt32(uint64 x) { return ~uint64(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; }
+inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
+inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; }
 
-inline uint32 VerifyInInt32(uint64 x)
+inline uint32_t VerifyInInt32(uint64_t x)
 {
 #ifdef XBYAK64
 	if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0)
 #endif
-	return static_cast<uint32>(x);
+	return static_cast<uint32_t>(x);
 }
 
 enum LabelMode {
@@ -358,8 +365,8 @@ enum LabelMode {
 	custom allocator
 */
 struct Allocator {
-	virtual uint8 *alloc(size_t size) { return reinterpret_cast<uint8*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
-	virtual void free(uint8 *p) { AlignedFree(p); }
+	virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
+	virtual void free(uint8_t *p) { AlignedFree(p); }
 	virtual ~Allocator() {}
 	/* override to return false if you call protect() manually */
 	virtual bool useProtect() const { return true; }
@@ -393,7 +400,7 @@ class MmapAllocator : Allocator {
 	typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, size_t> SizeList;
 	SizeList sizeList_;
 public:
-	uint8 *alloc(size_t size)
+	uint8_t *alloc(size_t size)
 	{
 		const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
 		size = (size + alignedSizeM1) & ~alignedSizeM1;
@@ -412,9 +419,9 @@ class MmapAllocator : Allocator {
 		if (p == MAP_FAILED) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
 		assert(p);
 		sizeList_[(uintptr_t)p] = size;
-		return (uint8*)p;
+		return (uint8_t*)p;
 	}
-	void free(uint8 *p)
+	void free(uint8_t *p)
 	{
 		if (p == 0) return;
 		SizeList::iterator i = sizeList_.find((uintptr_t)p);
@@ -429,7 +436,7 @@ class Address;
 class Reg;
 
 class Operand {
-	static const uint8 EXT8BIT = 0x20;
+	static const uint8_t EXT8BIT = 0x20;
 	unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil
 	unsigned int kind_:10;
 	unsigned int bit_:14;
@@ -464,39 +471,39 @@ class Operand {
 		AX = 0, CX, DX, BX, SP, BP, SI, DI,
 		AL = 0, CL, DL, BL, AH, CH, DH, BH
 	};
-	Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { }
-	Operand(int idx, Kind kind, int bit, bool ext8bit = 0)
-		: idx_(static_cast<uint8>(idx | (ext8bit ? EXT8BIT : 0)))
+	XBYAK_CONSTEXPR Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { }
+	XBYAK_CONSTEXPR Operand(int idx, Kind kind, int bit, bool ext8bit = 0)
+		: idx_(static_cast<uint8_t>(idx | (ext8bit ? EXT8BIT : 0)))
 		, kind_(kind)
 		, bit_(bit)
 		, zero_(0), mask_(0), rounding_(0)
 	{
 		assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two
 	}
-	Kind getKind() const { return static_cast<Kind>(kind_); }
-	int getIdx() const { return idx_ & (EXT8BIT - 1); }
-	bool isNone() const { return kind_ == 0; }
-	bool isMMX() const { return is(MMX); }
-	bool isXMM() const { return is(XMM); }
-	bool isYMM() const { return is(YMM); }
-	bool isZMM() const { return is(ZMM); }
-	bool isTMM() const { return is(TMM); }
-	bool isXMEM() const { return is(XMM | MEM); }
-	bool isYMEM() const { return is(YMM | MEM); }
-	bool isZMEM() const { return is(ZMM | MEM); }
-	bool isOPMASK() const { return is(OPMASK); }
-	bool isBNDREG() const { return is(BNDREG); }
-	bool isREG(int bit = 0) const { return is(REG, bit); }
-	bool isMEM(int bit = 0) const { return is(MEM, bit); }
-	bool isFPU() const { return is(FPU); }
-	bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; }
-	bool isExtIdx() const { return (getIdx() & 8) != 0; }
-	bool isExtIdx2() const { return (getIdx() & 16) != 0; }
-	bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); }
-	bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); }
-	bool hasZero() const { return zero_; }
-	int getOpmaskIdx() const { return mask_; }
-	int getRounding() const { return rounding_; }
+	XBYAK_CONSTEXPR Kind getKind() const { return static_cast<Kind>(kind_); }
+	XBYAK_CONSTEXPR int getIdx() const { return idx_ & (EXT8BIT - 1); }
+	XBYAK_CONSTEXPR bool isNone() const { return kind_ == 0; }
+	XBYAK_CONSTEXPR bool isMMX() const { return is(MMX); }
+	XBYAK_CONSTEXPR bool isXMM() const { return is(XMM); }
+	XBYAK_CONSTEXPR bool isYMM() const { return is(YMM); }
+	XBYAK_CONSTEXPR bool isZMM() const { return is(ZMM); }
+	XBYAK_CONSTEXPR bool isTMM() const { return is(TMM); }
+	XBYAK_CONSTEXPR bool isXMEM() const { return is(XMM | MEM); }
+	XBYAK_CONSTEXPR bool isYMEM() const { return is(YMM | MEM); }
+	XBYAK_CONSTEXPR bool isZMEM() const { return is(ZMM | MEM); }
+	XBYAK_CONSTEXPR bool isOPMASK() const { return is(OPMASK); }
+	XBYAK_CONSTEXPR bool isBNDREG() const { return is(BNDREG); }
+	XBYAK_CONSTEXPR bool isREG(int bit = 0) const { return is(REG, bit); }
+	XBYAK_CONSTEXPR bool isMEM(int bit = 0) const { return is(MEM, bit); }
+	XBYAK_CONSTEXPR bool isFPU() const { return is(FPU); }
+	XBYAK_CONSTEXPR bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; }
+	XBYAK_CONSTEXPR bool isExtIdx() const { return (getIdx() & 8) != 0; }
+	XBYAK_CONSTEXPR bool isExtIdx2() const { return (getIdx() & 16) != 0; }
+	XBYAK_CONSTEXPR bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); }
+	XBYAK_CONSTEXPR bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); }
+	XBYAK_CONSTEXPR bool hasZero() const { return zero_; }
+	XBYAK_CONSTEXPR int getOpmaskIdx() const { return mask_; }
+	XBYAK_CONSTEXPR int getRounding() const { return rounding_; }
 	void setKind(Kind kind)
 	{
 		if ((kind & (XMM|YMM|ZMM|TMM)) == 0) return;
@@ -525,12 +532,12 @@ class Operand {
 		return AH <= idx && idx <= BH;
 	}
 	// any bit is accetable if bit == 0
-	bool is(int kind, uint32 bit = 0) const
+	XBYAK_CONSTEXPR bool is(int kind, uint32_t bit = 0) const
 	{
 		return (kind == 0 || (kind_ & kind)) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16)
 	}
-	bool isBit(uint32 bit) const { return (bit_ & bit) != 0; }
-	uint32 getBit() const { return bit_; }
+	XBYAK_CONSTEXPR bool isBit(uint32_t bit) const { return (bit_ & bit) != 0; }
+	XBYAK_CONSTEXPR uint32_t getBit() const { return bit_; }
 	const char *toString() const
 	{
 		const int idx = getIdx();
@@ -645,17 +652,17 @@ struct Reg64;
 #endif
 class Reg : public Operand {
 public:
-	Reg() { }
-	Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { }
+	XBYAK_CONSTEXPR Reg() { }
+	XBYAK_CONSTEXPR Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { }
 	// convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM
 	Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; }
-	uint8 getRexW() const { return isREG(64) ? 8 : 0; }
-	uint8 getRexR() const { return isExtIdx() ? 4 : 0; }
-	uint8 getRexX() const { return isExtIdx() ? 2 : 0; }
-	uint8 getRexB() const { return isExtIdx() ? 1 : 0; }
-	uint8 getRex(const Reg& base = Reg()) const
+	uint8_t getRexW() const { return isREG(64) ? 8 : 0; }
+	uint8_t getRexR() const { return isExtIdx() ? 4 : 0; }
+	uint8_t getRexX() const { return isExtIdx() ? 2 : 0; }
+	uint8_t getRexB() const { return isExtIdx() ? 1 : 0; }
+	uint8_t getRex(const Reg& base = Reg()) const
 	{
-		uint8 rex = getRexW() | getRexR() | base.getRexW() | base.getRexB();
+		uint8_t rex = getRexW() | getRexR() | base.getRexW() | base.getRexB();
 		if (rex || isExt8bit() || base.isExt8bit()) rex |= 0x40;
 		return rex;
 	}
@@ -674,15 +681,15 @@ inline const Reg& Operand::getReg() const
 }
 
 struct Reg8 : public Reg {
-	explicit Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { }
+	explicit XBYAK_CONSTEXPR Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { }
 };
 
 struct Reg16 : public Reg {
-	explicit Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { }
+	explicit XBYAK_CONSTEXPR Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { }
 };
 
 struct Mmx : public Reg {
-	explicit Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { }
+	explicit XBYAK_CONSTEXPR Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { }
 };
 
 struct EvexModifierRounding {
@@ -693,41 +700,41 @@ struct EvexModifierRounding {
 		T_RZ_SAE = 4,
 		T_SAE = 5
 	};
-	explicit EvexModifierRounding(int rounding) : rounding(rounding) {}
+	explicit XBYAK_CONSTEXPR EvexModifierRounding(int rounding) : rounding(rounding) {}
 	int rounding;
 };
-struct EvexModifierZero{EvexModifierZero() {}};
+struct EvexModifierZero{ XBYAK_CONSTEXPR EvexModifierZero() {}};
 
 struct Xmm : public Mmx {
-	explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
-	Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { }
+	explicit XBYAK_CONSTEXPR Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
+	XBYAK_CONSTEXPR Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { }
 	Xmm operator|(const EvexModifierRounding& emr) const { Xmm r(*this); r.setRounding(emr.rounding); return r; }
 	Xmm copyAndSetIdx(int idx) const { Xmm ret(*this); ret.setIdx(idx); return ret; }
 	Xmm copyAndSetKind(Operand::Kind kind) const { Xmm ret(*this); ret.setKind(kind); return ret; }
 };
 
 struct Ymm : public Xmm {
-	explicit Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { }
+	explicit XBYAK_CONSTEXPR Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { }
 	Ymm operator|(const EvexModifierRounding& emr) const { Ymm r(*this); r.setRounding(emr.rounding); return r; }
 };
 
 struct Zmm : public Ymm {
-	explicit Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { }
+	explicit XBYAK_CONSTEXPR Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { }
 	Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; }
 };
 
 #ifdef XBYAK64
 struct Tmm : public Reg {
-	explicit Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { }
+	explicit XBYAK_CONSTEXPR Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { }
 };
 #endif
 
 struct Opmask : public Reg {
-	explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
+	explicit XBYAK_CONSTEXPR Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
 };
 
 struct BoundsReg : public Reg {
-	explicit BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {}
+	explicit XBYAK_CONSTEXPR BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {}
 };
 
 template<class T>T operator|(const T& x, const Opmask& k) { T r(x); r.setOpmaskIdx(k.getIdx()); return r; }
@@ -735,34 +742,34 @@ template<class T>T operator|(const T& x, const EvexModifierZero&) { T r(x); r.se
 template<class T>T operator|(const T& x, const EvexModifierRounding& emr) { T r(x); r.setRounding(emr.rounding); return r; }
 
 struct Fpu : public Reg {
-	explicit Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { }
+	explicit XBYAK_CONSTEXPR Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { }
 };
 
 struct Reg32e : public Reg {
-	explicit Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {}
+	explicit XBYAK_CONSTEXPR Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {}
 };
 struct Reg32 : public Reg32e {
-	explicit Reg32(int idx = 0) : Reg32e(idx, 32) {}
+	explicit XBYAK_CONSTEXPR Reg32(int idx = 0) : Reg32e(idx, 32) {}
 };
 #ifdef XBYAK64
 struct Reg64 : public Reg32e {
-	explicit Reg64(int idx = 0) : Reg32e(idx, 64) {}
+	explicit XBYAK_CONSTEXPR Reg64(int idx = 0) : Reg32e(idx, 64) {}
 };
 struct RegRip {
-	sint64 disp_;
+	int64_t disp_;
 	const Label* label_;
 	bool isAddr_;
-	explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
+	explicit XBYAK_CONSTEXPR RegRip(int64_t disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
 	friend const RegRip operator+(const RegRip& r, int disp) {
 		return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
 	}
 	friend const RegRip operator-(const RegRip& r, int disp) {
 		return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
 	}
-	friend const RegRip operator+(const RegRip& r, sint64 disp) {
+	friend const RegRip operator+(const RegRip& r, int64_t disp) {
 		return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
 	}
-	friend const RegRip operator-(const RegRip& r, sint64 disp) {
+	friend const RegRip operator-(const RegRip& r, int64_t disp) {
 		return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
 	}
 	friend const RegRip operator+(const RegRip& r, const Label& label) {
@@ -771,7 +778,7 @@ struct RegRip {
 	}
 	friend const RegRip operator+(const RegRip& r, const void *addr) {
 		if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
-		return RegRip(r.disp_ + (sint64)addr, 0, true);
+		return RegRip(r.disp_ + (int64_t)addr, 0, true);
 	}
 };
 #endif
@@ -806,7 +813,7 @@ class Segment {
 	enum {
 		es, cs, ss, ds, fs, gs
 	};
-	explicit Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); }
+	explicit XBYAK_CONSTEXPR Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); }
 	int getIdx() const { return idx_; }
 	const char *toString() const
 	{
@@ -825,8 +832,8 @@ class RegExp {
 #else
 	enum { i32e = 32 };
 #endif
-	RegExp(size_t disp = 0) : scale_(0), disp_(disp) { }
-	RegExp(const Reg& r, int scale = 1)
+	XBYAK_CONSTEXPR RegExp(size_t disp = 0) : scale_(0), disp_(disp) { }
+	XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1)
 		: scale_(scale)
 		, disp_(0)
 	{
@@ -858,7 +865,7 @@ class RegExp {
 	const Reg& getIndex() const { return index_; }
 	int getScale() const { return scale_; }
 	size_t getDisp() const { return disp_; }
-	void verify() const
+	XBYAK_CONSTEXPR void verify() const
 	{
 		if (base_.getBit() >= 128) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
 		if (index_.getBit() && index_.getBit() <= 64) {
@@ -868,10 +875,10 @@ class RegExp {
 	}
 	friend RegExp operator+(const RegExp& a, const RegExp& b);
 	friend RegExp operator-(const RegExp& e, size_t disp);
-	uint8 getRex() const
+	uint8_t getRex() const
 	{
-		uint8 rex = index_.getRexX() | base_.getRexB();
-		return rex ? uint8(rex | 0x40) : 0;
+		uint8_t rex = index_.getRexX() | base_.getRexB();
+		return rex ? uint8_t(rex | 0x40) : 0;
 	}
 private:
 	/*
@@ -935,9 +942,9 @@ class CodeArray {
 		inner::LabelMode mode;
 		AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode)
 			: codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {}
-		uint64 getVal(const uint8 *top) const
+		uint64_t getVal(const uint8_t *top) const
 		{
-			uint64 disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top);
+			uint64_t disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top);
 			if (jmpSize == 4) disp = inner::VerifyInInt32(disp);
 			return disp;
 		}
@@ -953,7 +960,7 @@ class CodeArray {
 	Allocator *alloc_;
 protected:
 	size_t maxSize_;
-	uint8 *top_;
+	uint8_t *top_;
 	size_t size_;
 	bool isCalledCalcJmpAddress_;
 
@@ -964,7 +971,7 @@ class CodeArray {
 	void growMemory()
 	{
 		const size_t newSize = (std::max<size_t>)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2);
-		uint8 *newTop = alloc_->alloc(newSize);
+		uint8_t *newTop = alloc_->alloc(newSize);
 		if (newTop == 0) XBYAK_THROW(ERR_CANT_ALLOC)
 		for (size_t i = 0; i < size_; i++) newTop[i] = top_[i];
 		alloc_->free(top_);
@@ -978,7 +985,7 @@ class CodeArray {
 	{
 		if (isCalledCalcJmpAddress_) return;
 		for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) {
-			uint64 disp = i->getVal(top_);
+			uint64_t disp = i->getVal(top_);
 			rewrite(i->codeOffset, disp, i->jmpSize);
 		}
 		isCalledCalcJmpAddress_ = true;
@@ -993,7 +1000,7 @@ class CodeArray {
 		: type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
 		, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
 		, maxSize_(maxSize)
-		, top_(type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
+		, top_(type_ == USER_BUF ? reinterpret_cast<uint8_t*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
 		, size_(0)
 		, isCalledCalcJmpAddress_(false)
 	{
@@ -1034,24 +1041,24 @@ class CodeArray {
 				XBYAK_THROW(ERR_CODE_IS_TOO_BIG)
 			}
 		}
-		top_[size_++] = static_cast<uint8>(code);
+		top_[size_++] = static_cast<uint8_t>(code);
 	}
-	void db(const uint8 *code, size_t codeSize)
+	void db(const uint8_t *code, size_t codeSize)
 	{
 		for (size_t i = 0; i < codeSize; i++) db(code[i]);
 	}
-	void db(uint64 code, size_t codeSize)
+	void db(uint64_t code, size_t codeSize)
 	{
 		if (codeSize > 8) XBYAK_THROW(ERR_BAD_PARAMETER)
-		for (size_t i = 0; i < codeSize; i++) db(static_cast<uint8>(code >> (i * 8)));
+		for (size_t i = 0; i < codeSize; i++) db(static_cast<uint8_t>(code >> (i * 8)));
 	}
-	void dw(uint32 code) { db(code, 2); }
-	void dd(uint32 code) { db(code, 4); }
-	void dq(uint64 code) { db(code, 8); }
-	const uint8 *getCode() const { return top_; }
+	void dw(uint32_t code) { db(code, 2); }
+	void dd(uint32_t code) { db(code, 4); }
+	void dq(uint64_t code) { db(code, 8); }
+	const uint8_t *getCode() const { return top_; }
 	template<class F>
 	const F getCode() const { return reinterpret_cast<F>(top_); }
-	const uint8 *getCurr() const { return &top_[size_]; }
+	const uint8_t *getCurr() const { return &top_[size_]; }
 	template<class F>
 	const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
 	size_t getSize() const { return size_; }
@@ -1062,7 +1069,7 @@ class CodeArray {
 	}
 	void dump() const
 	{
-		const uint8 *p = getCode();
+		const uint8_t *p = getCode();
 		size_t bufSize = getSize();
 		size_t remain = bufSize;
 		for (int i = 0; i < 4; i++) {
@@ -1087,13 +1094,13 @@ class CodeArray {
 		@param disp [in] offset from the next of jmp
 		@param size [in] write size(1, 2, 4, 8)
 	*/
-	void rewrite(size_t offset, uint64 disp, size_t size)
+	void rewrite(size_t offset, uint64_t disp, size_t size)
 	{
 		assert(offset < maxSize_);
 		if (size != 1 && size != 2 && size != 4 && size != 8) XBYAK_THROW(ERR_BAD_PARAMETER)
-		uint8 *const data = top_ + offset;
+		uint8_t *const data = top_ + offset;
 		for (size_t i = 0; i < size; i++) {
-			data[i] = static_cast<uint8>(disp >> (i * 8));
+			data[i] = static_cast<uint8_t>(disp >> (i * 8));
 		}
 	}
 	void save(size_t offset, size_t val, int size, inner::LabelMode mode)
@@ -1150,9 +1157,9 @@ class CodeArray {
 		@param alignedSize [in] power of two
 		@return aligned addr by alingedSize
 	*/
-	static inline uint8 *getAlignedAddress(uint8 *addr, size_t alignedSize = 16)
+	static inline uint8_t *getAlignedAddress(uint8_t *addr, size_t alignedSize = 16)
 	{
-		return reinterpret_cast<uint8*>((reinterpret_cast<size_t>(addr) + alignedSize - 1) & ~(alignedSize - static_cast<size_t>(1)));
+		return reinterpret_cast<uint8_t*>((reinterpret_cast<size_t>(addr) + alignedSize - 1) & ~(alignedSize - static_cast<size_t>(1)));
 	}
 };
 
@@ -1164,15 +1171,15 @@ class Address : public Operand {
 		M_rip,
 		M_ripAddr
 	};
-	Address(uint32 sizeBit, bool broadcast, const RegExp& e)
+	XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e)
 		: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
 	{
 		e_.verify();
 	}
 #ifdef XBYAK64
-	explicit Address(size_t disp)
+	explicit XBYAK_CONSTEXPR Address(size_t disp)
 		: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
-	Address(uint32 sizeBit, bool broadcast, const RegRip& addr)
+	XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr)
 		: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
 #endif
 	RegExp getRegExp(bool optimize = true) const
@@ -1183,7 +1190,7 @@ class Address : public Operand {
 	bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
 	bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
 	size_t getDisp() const { return e_.getDisp(); }
-	uint8 getRex() const
+	uint8_t getRex() const
 	{
 		if (mode_ != M_ModRM) return 0;
 		return getRegExp().getRex();
@@ -1220,9 +1227,9 @@ class AddressFrame {
 	void operator=(const AddressFrame&);
 	AddressFrame(const AddressFrame&);
 public:
-	const uint32 bit_;
+	const uint32_t bit_;
 	const bool broadcast_;
-	explicit AddressFrame(uint32 bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { }
+	explicit XBYAK_CONSTEXPR AddressFrame(uint32_t bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { }
 	Address operator[](const RegExp& e) const
 	{
 		return Address(bit_, broadcast_, e);
@@ -1232,7 +1239,7 @@ class AddressFrame {
 		return Address(bit_, broadcast_, RegExp(reinterpret_cast<size_t>(disp)));
 	}
 #ifdef XBYAK64
-	Address operator[](uint64 disp) const { return Address(disp); }
+	Address operator[](uint64_t disp) const { return Address(disp); }
 	Address operator[](const RegRip& addr) const { return Address(bit_, broadcast_, addr); }
 #endif
 };
@@ -1261,7 +1268,7 @@ class Label {
 	~Label();
 	void clear() { mgr = 0; id = 0; }
 	int getId() const { return id; }
-	const uint8 *getAddress() const;
+	const uint8_t *getAddress() const;
 
 	// backward compatibility
 	static inline std::string toStr(int num)
@@ -1336,7 +1343,7 @@ class LabelManager {
 #ifdef XBYAK64
 				if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
 #endif
-				if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
+				if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32_t)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
 			}
 			if (base_->isAutoGrow()) {
 				base_->save(offset, disp, jmp->jmpSize, jmp->mode);
@@ -1493,7 +1500,7 @@ class LabelManager {
 		return false;
 	}
 	bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); }
-	const uint8 *getCode() const { return base_->getCode(); }
+	const uint8_t *getCode() const { return base_->getCode(); }
 	bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); }
 };
 
@@ -1515,7 +1522,7 @@ inline Label::~Label()
 {
 	if (id && mgr) mgr->decRefCount(id, this);
 }
-inline const uint8* Label::getAddress() const
+inline const uint8_t* Label::getAddress() const
 {
 	if (mgr == 0 || !mgr->isReady()) return 0;
 	size_t offset;
@@ -1534,7 +1541,7 @@ class CodeGenerator : public CodeArray {
 	CodeGenerator operator=(const CodeGenerator&); // don't call
 #ifdef XBYAK64
 	enum { i32e = 32 | 64, BIT = 64 };
-	static const uint64 dummyAddr = uint64(0x1122334455667788ull);
+	static const uint64_t dummyAddr = uint64_t(0x1122334455667788ull);
 	typedef Reg64 NativeReg;
 #else
 	enum { i32e = 32, BIT = 32 };
@@ -1578,7 +1585,7 @@ class CodeGenerator : public CodeArray {
 	}
 	void rex(const Operand& op1, const Operand& op2 = Operand())
 	{
-		uint8 rex = 0;
+		uint8_t rex = 0;
 		const Operand *p1 = &op1, *p2 = &op2;
 		if (p1->isMEM()) std::swap(p1, p2);
 		if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
@@ -1642,12 +1649,12 @@ class CodeGenerator : public CodeArray {
 		bool b = base.isExtIdx();
 		int idx = v ? v->getIdx() : 0;
 		if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION)
-		uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
-		uint32 vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
+		uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
+		uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
 		if (!b && !x && !w && (type & T_0F)) {
 			db(0xC5); db((r ? 0 : 0x80) | vvvv);
 		} else {
-			uint32 mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
+			uint32_t mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
 			db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv);
 		}
 		db(code);
@@ -1669,15 +1676,15 @@ class CodeGenerator : public CodeArray {
 		if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0)
 		return v;
 	}
-	int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
+	int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false)
 	{
 		if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
 		int w = (type & T_EW1) ? 1 : 0;
-		uint32 mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
-		uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
+		uint32_t mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
+		uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
 
 		int idx = v ? v->getIdx() : 0;
-		uint32 vvvv = ~idx;
+		uint32_t vvvv = ~idx;
 
 		bool R = !reg.isExtIdx();
 		bool X = x ? false : !base.isExtIdx2();
@@ -1725,16 +1732,16 @@ class CodeGenerator : public CodeArray {
 	}
 	void setModRM(int mod, int r1, int r2)
 	{
-		db(static_cast<uint8>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)));
+		db(static_cast<uint8_t>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)));
 	}
 	void setSIB(const RegExp& e, int reg, int disp8N = 0)
 	{
-		uint64 disp64 = e.getDisp();
+		uint64_t disp64 = e.getDisp();
 #ifdef XBYAK64
-		uint64 high = disp64 >> 32;
+		uint64_t high = disp64 >> 32;
 		if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
 #endif
-		uint32 disp = static_cast<uint32>(disp64);
+		uint32_t disp = static_cast<uint32_t>(disp64);
 		const Reg& base = e.getBase();
 		const Reg& index = e.getIndex();
 		const int baseIdx = base.getIdx();
@@ -1753,7 +1760,7 @@ class CodeGenerator : public CodeArray {
 				}
 			} else {
 				// disp must be casted to signed
-				uint32 t = static_cast<uint32>(static_cast<int>(disp) / disp8N);
+				uint32_t t = static_cast<uint32_t>(static_cast<int>(disp) / disp8N);
 				if ((disp % disp8N) == 0 && inner::IsInDisp8(t)) {
 					disp = t;
 					mod = mod01;
@@ -1783,7 +1790,7 @@ class CodeGenerator : public CodeArray {
 		}
 	}
 	LabelManager labelMgr_;
-	bool isInDisp16(uint32 x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
+	bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
 	void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE)
 	{
 		rex(reg2, reg1);
@@ -1811,12 +1818,12 @@ class CodeGenerator : public CodeArray {
 		if (addr.getMode() != Address::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS)
 		if (BIT == 64 && addr.is32bit()) db(0x67);
 		const RegExp& regExp = addr.getRegExp(false);
-		uint8 rex = regExp.getRex();
+		uint8_t rex = regExp.getRex();
 		if (rex) db(rex);
 		db(code0); db(code1);
 		setSIB(regExp, reg.getIdx());
 	}
-	void makeJmp(uint32 disp, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
+	void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
 	{
 		const int shortJmpSize = 2;
 		const int longHeaderSize = longPref ? 2 : 1;
@@ -1831,7 +1838,7 @@ class CodeGenerator : public CodeArray {
 	}
 	bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); }
 	template<class T>
-	void opJmp(T& label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
+	void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
 	{
 		if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */
 		size_t offset = 0;
@@ -1851,7 +1858,7 @@ class CodeGenerator : public CodeArray {
 			labelMgr_.addUndefinedLabel(label, jmp);
 		}
 	}
-	void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref = 0)
+	void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0)
 	{
 		if (isAutoGrow()) {
 			if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW)
@@ -1861,7 +1868,7 @@ class CodeGenerator : public CodeArray {
 			dd(0);
 			save(size_ - 4, size_t(addr) - size_, 4, inner::Labs);
 		} else {
-			makeJmp(inner::VerifyInInt32(reinterpret_cast<const uint8*>(addr) - getCurr()), type, shortCode, longCode, longPref);
+			makeJmp(inner::VerifyInInt32(reinterpret_cast<const uint8_t*>(addr) - getCurr()), type, shortCode, longCode, longPref);
 		}
 
 	}
@@ -1962,7 +1969,7 @@ class CodeGenerator : public CodeArray {
 			XBYAK_THROW(ERR_BAD_COMBINATION)
 		}
 	}
-	void opShxd(const Operand& op, const Reg& reg, uint8 imm, int code, const Reg8 *_cl = 0)
+	void opShxd(const Operand& op, const Reg& reg, uint8_t imm, int code, const Reg8 *_cl = 0)
 	{
 		if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
 		opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1);
@@ -1978,10 +1985,10 @@ class CodeGenerator : public CodeArray {
 		}
 	}
 	// (REG|MEM, IMM)
-	void opRM_I(const Operand& op, uint32 imm, int code, int ext)
+	void opRM_I(const Operand& op, uint32_t imm, int code, int ext)
 	{
 		verifyMemHasSize(op);
-		uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
+		uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
 		if (op.isBit(8)) immBit = 8;
 		if (op.getBit() < immBit) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
 		if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
@@ -2034,12 +2041,12 @@ class CodeGenerator : public CodeArray {
 	/*
 		mov(r, imm) = db(imm, mov_imm(r, imm))
 	*/
-	int mov_imm(const Reg& reg, uint64 imm)
+	int mov_imm(const Reg& reg, uint64_t imm)
 	{
 		int bit = reg.getBit();
 		const int idx = reg.getIdx();
 		int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3);
-		if (bit == 64 && (imm & ~uint64(0xffffffffu)) == 0) {
+		if (bit == 64 && (imm & ~uint64_t(0xffffffffu)) == 0) {
 			rex(Reg32(idx));
 			bit = 32;
 		} else {
@@ -2063,18 +2070,18 @@ class CodeGenerator : public CodeArray {
 			if (relative) {
 				db(inner::VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize);
 			} else if (isAutoGrow()) {
-				db(uint64(0), jmpSize);
+				db(uint64_t(0), jmpSize);
 				save(size_ - jmpSize, offset, jmpSize, inner::LaddTop);
 			} else {
 				db(size_t(top_) + offset, jmpSize);
 			}
 			return;
 		}
-		db(uint64(0), jmpSize);
+		db(uint64_t(0), jmpSize);
 		JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp);
 		labelMgr_.addUndefinedLabel(label, jmp);
 	}
-	void opMovxx(const Reg& reg, const Operand& op, uint8 code)
+	void opMovxx(const Reg& reg, const Operand& op, uint8_t code)
 	{
 		if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION)
 		int w = op.isBit(16);
@@ -2084,10 +2091,10 @@ class CodeGenerator : public CodeArray {
 		bool cond = reg.isREG() && (reg.getBit() > op.getBit());
 		opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
 	}
-	void opFpuMem(const Address& addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext)
+	void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext)
 	{
 		if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
-		uint8 code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
+		uint8_t code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
 		if (!code) XBYAK_THROW(ERR_BAD_MEM_SIZE)
 		if (m64ext && addr.isBit(64)) ext = m64ext;
 
@@ -2097,14 +2104,14 @@ class CodeGenerator : public CodeArray {
 	}
 	// use code1 if reg1 == st0
 	// use code2 if reg1 != st0 && reg2 == st0
-	void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32 code1, uint32 code2)
+	void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32_t code1, uint32_t code2)
 	{
-		uint32 code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0;
+		uint32_t code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0;
 		if (!code) XBYAK_THROW(ERR_BAD_ST_COMBINATION)
-		db(uint8(code >> 8));
-		db(uint8(code | (reg1.getIdx() | reg2.getIdx())));
+		db(uint8_t(code >> 8));
+		db(uint8_t(code | (reg1.getIdx() | reg2.getIdx())));
 	}
-	void opFpu(const Fpu& reg, uint8 code1, uint8 code2)
+	void opFpu(const Fpu& reg, uint8_t code1, uint8_t code2)
 	{
 		db(code1); db(code2 | reg.getIdx());
 	}
@@ -2145,7 +2152,7 @@ class CodeGenerator : public CodeArray {
 	}
 	// (r, r, r/m) if isR_R_RM
 	// (r, r/m, r)
-	void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8 code, bool isR_R_RM, int imm8 = NONE)
+	void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8_t code, bool isR_R_RM, int imm8 = NONE)
 	{
 		const Operand *p1 = &op1;
 		const Operand *p2 = &op2;
@@ -2188,7 +2195,7 @@ class CodeGenerator : public CodeArray {
 		Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
 		opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
 	}
-	void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8 code)
+	void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code)
 	{
 		if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
 		Xmm x(op.getIdx());
@@ -2205,7 +2212,7 @@ class CodeGenerator : public CodeArray {
 		opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8);
 	}
 	// QQQ:need to refactor
-	void opSp1(const Reg& reg, const Operand& op, uint8 pref, uint8 code0, uint8 code1)
+	void opSp1(const Reg& reg, const Operand& op, uint8_t pref, uint8_t code0, uint8_t code1)
 	{
 		if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
 		bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
@@ -2213,7 +2220,7 @@ class CodeGenerator : public CodeArray {
 		if (is16bit) db(0x66);
 		db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
 	}
-	void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int mode)
+	void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8_t code, int mode)
 	{
 		const RegExp& regExp = addr.getRegExp();
 		if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
@@ -2252,7 +2259,7 @@ class CodeGenerator : public CodeArray {
 		}
 		XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
 	}
-	void opGather2(const Xmm& x, const Address& addr, int type, uint8 code, int mode)
+	void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode)
 	{
 		if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
 		checkGather2(x, addr.getRegExp().getIndex(), mode);
@@ -2262,7 +2269,7 @@ class CodeGenerator : public CodeArray {
 		xx_xy_yz ; mode = true
 		xx_xy_xz ; mode = false
 	*/
-	void opVmov(const Operand& op, const Xmm& x, int type, uint8 code, bool mode)
+	void opVmov(const Operand& op, const Xmm& x, int type, uint8_t code, bool mode)
 	{
 		if (mode) {
 			if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
@@ -2271,13 +2278,13 @@ class CodeGenerator : public CodeArray {
 		}
 		opVex(x, 0, op, type, code);
 	}
-	void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8 code, Operand::Kind kind)
+	void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8_t code, Operand::Kind kind)
 	{
 		if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
 		if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
 		opVex(x, 0, addr, type, code);
 	}
-	void opInOut(const Reg& a, const Reg& d, uint8 code)
+	void opInOut(const Reg& a, const Reg& d, uint8_t code)
 	{
 		if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) {
 			switch (a.getBit()) {
@@ -2288,7 +2295,7 @@ class CodeGenerator : public CodeArray {
 		}
 		XBYAK_THROW(ERR_BAD_COMBINATION)
 	}
-	void opInOut(const Reg& a, uint8 code, uint8 v)
+	void opInOut(const Reg& a, uint8_t code, uint8_t v)
 	{
 		if (a.getIdx() == Operand::AL) {
 			switch (a.getBit()) {
@@ -2404,7 +2411,7 @@ class CodeGenerator : public CodeArray {
 	{
 		opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), 0x84);
 	}
-	void test(const Operand& op, uint32 imm)
+	void test(const Operand& op, uint32_t imm)
 	{
 		verifyMemHasSize(op);
         int immSize = (std::min)(op.getBit() / 8, 4U);
@@ -2429,18 +2436,18 @@ class CodeGenerator : public CodeArray {
 	}
 	void push(const Operand& op) { opPushPop(op, 0xFF, 6, 0x50); }
 	void pop(const Operand& op) { opPushPop(op, 0x8F, 0, 0x58); }
-	void push(const AddressFrame& af, uint32 imm)
+	void push(const AddressFrame& af, uint32_t imm)
 	{
-		if (af.bit_ == 8 && inner::IsInDisp8(imm)) {
+		if (af.bit_ == 8) {
 			db(0x6A); db(imm);
-		} else if (af.bit_ == 16 && isInDisp16(imm)) {
+		} else if (af.bit_ == 16) {
 			db(0x66); db(0x68); dw(imm);
 		} else {
 			db(0x68); dd(imm);
 		}
 	}
 	/* use "push(word, 4)" if you want "push word 4" */
-	void push(uint32 imm)
+	void push(uint32_t imm)
 	{
 		if (inner::IsInDisp8(imm)) {
 			push(byte, imm);
@@ -2452,7 +2459,7 @@ class CodeGenerator : public CodeArray {
 	{
 		const Reg *reg = 0;
 		const Address *addr = 0;
-		uint8 code = 0;
+		uint8_t code = 0;
 		if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp]
 			reg = &reg1.getReg();
 			addr= &reg2.getAddress();
@@ -2477,14 +2484,14 @@ class CodeGenerator : public CodeArray {
 		if (code && addr->isOnlyDisp()) {
 			rex(*reg, *addr);
 			db(code | (reg->isBit(8) ? 0 : 1));
-			dd(static_cast<uint32>(addr->getDisp()));
+			dd(static_cast<uint32_t>(addr->getDisp()));
 		} else
 #endif
 		{
 			opRM_RM(reg1, reg2, 0x88);
 		}
 	}
-	void mov(const Operand& op, uint64 imm)
+	void mov(const Operand& op, uint64_t imm)
 	{
 		if (op.isREG()) {
 			const int size = mov_imm(op.getReg(), imm);
@@ -2493,27 +2500,24 @@ class CodeGenerator : public CodeArray {
 			verifyMemHasSize(op);
 			int immSize = op.getBit() / 8;
 			if (immSize <= 4) {
-				sint64 s = sint64(imm) >> (immSize * 8);
+				int64_t s = int64_t(imm) >> (immSize * 8);
 				if (s != 0 && s != -1) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
 			} else {
 				if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
 				immSize = 4;
 			}
 			opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize);
-			db(static_cast<uint32>(imm), immSize);
+			db(static_cast<uint32_t>(imm), immSize);
 		} else {
 			XBYAK_THROW(ERR_BAD_COMBINATION)
 		}
 	}
-	void mov(const NativeReg& reg, const char *label) // can't use std::string
-	{
-		if (label == 0) {
-			mov(static_cast<const Operand&>(reg), 0); // call imm
-			return;
-		}
-		mov_imm(reg, dummyAddr);
-		putL(label);
-	}
+
+	// The template is used to avoid ambiguity when the 2nd argument is 0.
+	// When the 2nd argument is 0 the call goes to
+	// `void mov(const Operand& op, uint64_t imm)`.
+	template <typename T1, typename T2>
+	void mov(const T1&, const T2 *) { T1::unexpected; }
 	void mov(const NativeReg& reg, const Label& label)
 	{
 		mov_imm(reg, dummyAddr);
@@ -2695,7 +2699,7 @@ class CodeGenerator : public CodeArray {
 			AMD and Intel seem to agree on the same sequences for up to 9 bytes:
 			https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf
 		*/
-		static const uint8 nopTbl[9][9] = {
+		static const uint8_t nopTbl[9][9] = {
 			{0x90},
 			{0x66, 0x90},
 			{0x0F, 0x1F, 0x00},
@@ -2709,7 +2713,7 @@ class CodeGenerator : public CodeArray {
 		const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]);
 		while (size > 0) {
 			size_t len = (std::min)(n, size);
-			const uint8 *seq = nopTbl[len - 1];
+			const uint8_t *seq = nopTbl[len - 1];
 			db(seq, len);
 			size -= len;
 		}
@@ -2733,40 +2737,48 @@ class CodeGenerator : public CodeArray {
 #endif
 };
 
+template <>
+inline void CodeGenerator::mov(const NativeReg& reg, const char *label) // can't use std::string
+{
+	assert(label);
+	mov_imm(reg, dummyAddr);
+	putL(label);
+}
+
 namespace util {
-static const Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7);
-static const Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7);
-static const Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7);
-static const Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7);
-static const Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI);
-static const Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI);
-static const Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH);
-static const AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512);
-static const AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true);
-static const Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7);
-static const Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7);
-static const BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3);
-static const EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE);
-static const EvexModifierZero T_z;
+static const XBYAK_CONSTEXPR Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7);
+static const XBYAK_CONSTEXPR Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7);
+static const XBYAK_CONSTEXPR Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7);
+static const XBYAK_CONSTEXPR Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7);
+static const XBYAK_CONSTEXPR Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI);
+static const XBYAK_CONSTEXPR Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI);
+static const XBYAK_CONSTEXPR Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH);
+static const XBYAK_CONSTEXPR AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512);
+static const XBYAK_CONSTEXPR AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true);
+static const XBYAK_CONSTEXPR Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7);
+static const XBYAK_CONSTEXPR Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7);
+static const XBYAK_CONSTEXPR BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3);
+static const XBYAK_CONSTEXPR EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE);
+static const XBYAK_CONSTEXPR EvexModifierZero T_z;
 #ifdef XBYAK64
-static const Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15);
-static const Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15);
-static const Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15);
-static const Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true);
-static const Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15);
-static const Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23);
-static const Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31);
-static const Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15);
-static const Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23);
-static const Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31);
-static const Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15);
-static const Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23);
-static const Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31);
-static const Tmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7);
-static const RegRip rip;
+static const XBYAK_CONSTEXPR Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15);
+static const XBYAK_CONSTEXPR Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15);
+static const XBYAK_CONSTEXPR Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15);
+static const XBYAK_CONSTEXPR Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true);
+static const XBYAK_CONSTEXPR Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15);
+static const XBYAK_CONSTEXPR Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23);
+static const XBYAK_CONSTEXPR Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31);
+static const XBYAK_CONSTEXPR Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15);
+static const XBYAK_CONSTEXPR Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23);
+static const XBYAK_CONSTEXPR Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31);
+static const XBYAK_CONSTEXPR Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15);
+static const XBYAK_CONSTEXPR Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23);
+static const XBYAK_CONSTEXPR Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31);
+static const XBYAK_CONSTEXPR Zmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7);
+static const XBYAK_CONSTEXPR RegRip rip;
 #endif
 #ifndef XBYAK_DISABLE_SEGMENT
-static const Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs);
+static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs);
 #endif
 } // util
 
diff --git a/src/xbyak/xbyak_mnemonic.h b/src/xbyak/xbyak_mnemonic.h
index 393a8dcc..85e8bed5 100644
--- a/src/xbyak/xbyak_mnemonic.h
+++ b/src/xbyak/xbyak_mnemonic.h
@@ -1,8 +1,8 @@
-const char *getVersionString() const { return "5.941"; }
-void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
+const char *getVersionString() const { return "5.97"; }
+void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
-void add(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x00, 0); }
+void add(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x00, 0); }
 void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); }
 void addpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x66, isXMM_XMMorMEM); }
 void addps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x100, isXMM_XMMorMEM); }
@@ -16,8 +16,8 @@ void aesdeclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDF, 0x66,
 void aesenc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDC, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
 void aesenclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDD, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
 void aesimc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDB, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void aeskeygenassist(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void and_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); }
+void aeskeygenassist(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void and_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x20, 4); }
 void and_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); }
 void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_0F38, 0xf2, true); }
 void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXMM_XMMorMEM); }
@@ -25,8 +25,8 @@ void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isX
 void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); }
 void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); }
 void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); }
-void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
-void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
+void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
+void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
 void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
 void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
 void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, T_0F38, 0xf3, false); }
@@ -45,13 +45,13 @@ void bsf(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e
 void bsr(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
 void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }
 void bt(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xA3); }
-void bt(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 4, 0x0f, 0xba, NONE, false, 1); db(imm); }
+void bt(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 4, 0x0f, 0xba, NONE, false, 1); db(imm); }
 void btc(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xBB); }
-void btc(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 7, 0x0f, 0xba, NONE, false, 1); db(imm); }
+void btc(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 7, 0x0f, 0xba, NONE, false, 1); db(imm); }
 void btr(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xB3); }
-void btr(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 6, 0x0f, 0xba, NONE, false, 1); db(imm); }
+void btr(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 6, 0x0f, 0xba, NONE, false, 1); db(imm); }
 void bts(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xAB); }
-void bts(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 5, 0x0f, 0xba, NONE, false, 1); db(imm); }
+void bts(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 5, 0x0f, 0xba, NONE, false, 1); db(imm); }
 void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf5, false); }
 void cbw() { db(0x66); db(0x98); }
 void cdq() { db(0x99); }
@@ -92,7 +92,7 @@ void cmovpe(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 |
 void cmovpo(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 11); }//-V524
 void cmovs(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 8); }//-V524
 void cmovz(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 4); }//-V524
-void cmp(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x38, 7); }
+void cmp(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x38, 7); }
 void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); }
 void cmpeqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 0); }
 void cmpeqps(const Xmm& x, const Operand& op) { cmpps(x, op, 0); }
@@ -122,12 +122,12 @@ void cmpordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 7); }
 void cmpordps(const Xmm& x, const Operand& op) { cmpps(x, op, 7); }
 void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); }
 void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); }
-void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
-void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
+void cmppd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
+void cmpps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
 void cmpsb() { db(0xA6); }
 void cmpsd() { db(0xA7); }
-void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
-void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
+void cmpsd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
+void cmpss(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
 void cmpsw() { db(0x66); db(0xA7); }
 void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); }
 void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); }
@@ -169,11 +169,11 @@ void divpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x66, isXMM
 void divps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x100, isXMM_XMMorMEM); }
 void divsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF2, isXMM_XMMorMEM); }
 void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM_XMMorMEM); }
-void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
-void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
+void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
+void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
 void emms() { db(0x0F); db(0x77); }
-void enter(uint16 x, uint8 y) { db(0xC8); dw(x); db(y); }
-void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); }
+void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }
+void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }
 void f2xm1() { db(0xD9); db(0xF0); }
 void fabs() { db(0xD9); db(0xE1); }
 void fadd(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 0, 0); }
@@ -316,8 +316,8 @@ void fxrstor(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xAE); }
 void fxtract() { db(0xD9); db(0xF4); }
 void fyl2x() { db(0xD9); db(0xF1); }
 void fyl2xp1() { db(0xD9); db(0xF9); }
-void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
-void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCE, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
+void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
+void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCE, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
 void gf2p8mulb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
 void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
 void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
@@ -326,11 +326,11 @@ void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXM
 void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
 void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); }
 void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }
-void in_(const Reg& a, uint8 v) { opInOut(a, 0xE4, v); }
+void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); }
 void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
-void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
 void int3() { db(0xCC); }
-void int_(uint8 x) { db(0xCD); db(x); }
+void int_(uint8_t x) { db(0xCD); db(x); }
 void ja(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524
 void ja(const char *label, LabelType type = T_AUTO) { ja(std::string(label), type); }//-V524
 void ja(const void *addr) { opJmpAbs(addr, T_NEAR, 0x77, 0x87, 0x0F); }//-V524
@@ -535,7 +535,7 @@ void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x66); }
 void movups(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x11); }
 void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x100); }
 void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); }
-void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
+void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
 void mul(const Operand& op) { opR_ModM(op, 0, 4, 0xF6); }
 void mulpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x66, isXMM_XMMorMEM); }
 void mulps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x100, isXMM_XMMorMEM); }
@@ -546,12 +546,12 @@ void mwait() { db(0x0F); db(0x01); db(0xC9); }
 void mwaitx() { db(0x0F); db(0x01); db(0xFB); }
 void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); }
 void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); }
-void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
+void or_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x08, 1); }
 void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }
 void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); }
 void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); }
 void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }
-void out_(uint8 v, const Reg& a) { opInOut(a, 0xE6, v); }
+void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); }
 void outsb() { db(0x6E); }
 void outsd() { db(0x6F); }
 void outsw() { db(0x66); db(0x6F); }
@@ -570,36 +570,36 @@ void paddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xED); }
 void paddusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDC); }
 void paddusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDD); }
 void paddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFD); }
-void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8>(imm), 0x3a); }
+void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8_t>(imm), 0x3a); }
 void pand(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDB); }
 void pandn(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDF); }
 void pause() { db(0xF3); db(0x90); }
 void pavgb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE0); }
 void pavgw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE3); }
 void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
+void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
 void pclmulhqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x11); }
 void pclmulhqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x01); }
 void pclmullqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x10); }
 void pclmullqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x00); }
-void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
+void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
 void pcmpeqb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x74); }
 void pcmpeqd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x76); }
 void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
 void pcmpeqw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x75); }
-void pcmpestri(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void pcmpestrm(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void pcmpestri(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void pcmpestrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
 void pcmpgtb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x64); }
 void pcmpgtd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x66); }
 void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
 void pcmpgtw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x65); }
-void pcmpistri(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void pcmpistrm(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void pcmpistri(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void pcmpistrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
 void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf5, true); }
 void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F3 | T_0F38, 0xf5, true); }
-void pextrb(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x14, imm); }
-void pextrd(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x16, imm); }
-void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); }
+void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); }
+void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); }
+void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }
 void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); }
 void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, NONE, 0x38); }
 void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); }
@@ -607,8 +607,8 @@ void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66,
 void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, NONE, 0x38); }
 void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, NONE, 0x38); }
 void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, NONE, 0x38); }
-void pinsrb(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }
-void pinsrd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }
+void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }
+void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }
 void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }
 void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, NONE, 0x38); }
 void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); }
@@ -655,10 +655,10 @@ void prefetchw(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0x0D); }
 void prefetchwt1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0x0D); }
 void psadbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF6); }
 void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); }
-void pshufd(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); }
-void pshufhw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); }
-void pshuflw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); }
-void pshufw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); }
+void pshufd(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); }
+void pshufhw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); }
+void pshuflw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); }
+void pshufw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); }
 void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, NONE, 0x38); }
 void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, NONE, 0x38); }
 void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, NONE, 0x38); }
@@ -721,11 +721,11 @@ void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
 void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
 void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); }
 void ror(const Operand& op, int imm) { opShift(op, imm, 1); }
-void rorx(const Reg32e& r, const Operand& op, uint8 imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }
-void roundpd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void roundps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
-void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
+void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }
+void roundpd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void roundps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
+void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
 void rsqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0x100, isXMM_XMMorMEM); }
 void rsqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0xF3, isXMM_XMMorMEM); }
 void sahf() { db(0x9E); }
@@ -734,7 +734,7 @@ void sal(const Operand& op, int imm) { opShift(op, imm, 4); }
 void sar(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 7); }
 void sar(const Operand& op, int imm) { opShift(op, imm, 7); }
 void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); }
-void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
+void sbb(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x18, 3); }
 void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
 void scasb() { db(0xAE); }
 void scasd() { db(0xAF); }
@@ -773,22 +773,22 @@ void sfence() { db(0x0F); db(0xAE); db(0xF8); }
 void sha1msg1(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xC9, NONE, isXMM_XMMorMEM, NONE, 0x38); }
 void sha1msg2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCA, NONE, isXMM_XMMorMEM, NONE, 0x38); }
 void sha1nexte(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xC8, NONE, isXMM_XMMorMEM, NONE, 0x38); }
-void sha1rnds4(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, imm, 0x3A); }
+void sha1rnds4(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, imm, 0x3A); }
 void sha256msg1(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, NONE, 0x38); }
 void sha256msg2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCD, NONE, isXMM_XMMorMEM, NONE, 0x38); }
 void sha256rnds2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCB, NONE, isXMM_XMMorMEM, NONE, 0x38); }
 void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); }
 void shl(const Operand& op, int imm) { opShift(op, imm, 4); }
 void shld(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xA4, &_cl); }
-void shld(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xA4); }
+void shld(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0xA4); }
 void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_66 | T_0F38, 0xf7, false); }
 void shr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 5); }
 void shr(const Operand& op, int imm) { opShift(op, imm, 5); }
 void shrd(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xAC, &_cl); }
-void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xAC); }
+void shrd(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0xAC); }
 void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F2 | T_0F38, 0xf7, false); }
-void shufpd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); }
-void shufps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); }
+void shufpd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); }
+void shufps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); }
 void sqrtpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x66, isXMM_XMMorMEM); }
 void sqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x100, isXMM_XMMorMEM); }
 void sqrtsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF2, isXMM_XMMorMEM); }
@@ -801,7 +801,7 @@ void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
 void stosb() { db(0xAA); }
 void stosd() { db(0xAB); }
 void stosw() { db(0x66); db(0xAB); }
-void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
+void sub(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x28, 5); }
 void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
 void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
 void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); }
@@ -828,13 +828,13 @@ void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operan
 void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDC); }
 void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDD); }
 void vaesimc(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_W0, 0xDB); }
-void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); }
+void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); }
 void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x55); }
 void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
 void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
 void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
-void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); }
-void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); }
+void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); }
+void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); }
 void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); }
 void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4A, x4.getIdx() << 4); }
 void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }
@@ -953,10 +953,10 @@ void vcmpordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2,
 void vcmpordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 7); }
 void vcmpordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 7); }
 void vcmpordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 7); }
-void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xC2, imm); }
-void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0xC2, imm); }
-void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F, 0xC2, imm); }
-void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0xC2, imm); }
+void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xC2, imm); }
+void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0xC2, imm); }
+void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F, 0xC2, imm); }
+void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0xC2, imm); }
 void vcmptrue_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 31); }
 void vcmptrue_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 31); }
 void vcmptrue_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 31); }
@@ -982,7 +982,7 @@ void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_
 void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
 void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
 void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }
-void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }
+void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }
 void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }
 void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x5A); }
 void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }
@@ -997,11 +997,11 @@ void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
 void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); }
 void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5E); }
 void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5E); }
-void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); }
-void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); }
-void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }
-void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }
-void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }
+void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); }
+void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); }
+void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }
+void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }
+void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }
 void vfmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x98); }
 void vfmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x98); }
 void vfmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x99); }
@@ -1066,16 +1066,16 @@ void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1
 void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); }
 void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); }
 void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); }
-void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
-void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
+void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
+void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
 void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); }
 void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); }
 void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7C); }
 void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7D); }
 void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
-void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
-void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
-void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
+void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
+void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
+void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
 void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
 void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
 void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }
@@ -1133,7 +1133,7 @@ void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_
 void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); }
 void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x11); }
 void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); }
-void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); }
+void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); }
 void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
 void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); }
 void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x59); }
@@ -1155,47 +1155,47 @@ void vpaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1,
 void vpaddusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDC); }
 void vpaddusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDD); }
 void vpaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xFD); }
-void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_YMM | T_EVEX, 0x0F, imm); }
+void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_YMM | T_EVEX, 0x0F, imm); }
 void vpand(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDB); }
 void vpandn(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDF); }
 void vpavgb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE0); }
 void vpavgw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE3); }
-void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x02, imm); }
+void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x02, imm); }
 void vpblendvb(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4C, x4.getIdx() << 4); }
-void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0E, imm); }
+void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0E, imm); }
 void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x78); }
 void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); }
 void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); }
 void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); }
-void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); }
+void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); }
 void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x74); }
 void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x76); }
 void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x29); }
 void vpcmpeqw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x75); }
-void vpcmpestri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x61, imm); }
-void vpcmpestrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x60, imm); }
+void vpcmpestri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x61, imm); }
+void vpcmpestrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x60, imm); }
 void vpcmpgtb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x64); }
 void vpcmpgtd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x66); }
 void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x37); }
 void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); }
-void vpcmpistri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); }
-void vpcmpistrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); }
-void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
-void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
+void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); }
+void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); }
+void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
+void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
 void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
 void vpermilpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x0D); }
-void vpermilpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_EVEX | T_B64, 0x05, imm); }
+void vpermilpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_EVEX | T_B64, 0x05, imm); }
 void vpermilps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x0C); }
-void vpermilps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_EVEX | T_B32, 0x04, imm); }
-void vpermpd(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x01, imm); }
+void vpermilps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_EVEX | T_B32, 0x04, imm); }
+void vpermpd(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x01, imm); }
 void vpermpd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x16); }
 void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x16); }
-void vpermq(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x00, imm); }
+void vpermq(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x00, imm); }
 void vpermq(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x36); }
-void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }
-void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
-void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
-void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
+void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }
+void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
+void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
+void vpextrw(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
 void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); }
 void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); }
 void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); }
@@ -1207,10 +1207,10 @@ void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66
 void vphsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x06); }
 void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x07); }
 void vphsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x05); }
-void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }
-void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
-void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
-void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
+void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }
+void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
+void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
+void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
 void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); }
 void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); }
 void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); }
@@ -1252,34 +1252,34 @@ void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
 void vpor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEB); }
 void vpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF6); }
 void vpshufb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x00); }
-void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x70, imm); }
-void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX, 0x70, imm); }
-void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX, 0x70, imm); }
+void vpshufd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x70, imm); }
+void vpshufhw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX, 0x70, imm); }
+void vpshuflw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX, 0x70, imm); }
 void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
 void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
 void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
-void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
+void vpslld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
 void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
-void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
-void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
+void vpslldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
+void vpsllq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
 void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
 void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
 void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
-void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
+void vpsllw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
 void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
-void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
+void vpsrad(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
 void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
 void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
-void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
+void vpsraw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
 void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
-void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
+void vpsrld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
 void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
-void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
-void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
+void vpsrldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
+void vpsrlq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
 void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
 void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
 void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
-void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
+void vpsrlw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
 void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
 void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
 void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }
@@ -1301,14 +1301,14 @@ void vpunpcklwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(
 void vpxor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEF); }
 void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x53); }
 void vrcpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x53); }
-void vroundpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x09, imm); }
-void vroundps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x08, imm); }
-void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0B, imm); }
-void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0A, imm); }
+void vroundpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x09, imm); }
+void vroundps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x08, imm); }
+void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0B, imm); }
+void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0A, imm); }
 void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x52); }
 void vrsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x52); }
-void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xC6, imm); }
-void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xC6, imm); }
+void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xC6, imm); }
+void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xC6, imm); }
 void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x51); }
 void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x51); }
 void vsqrtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x51); }
@@ -1336,13 +1336,13 @@ void wrmsr() { db(0x0F); db(0x30); }
 void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }
 void xgetbv() { db(0x0F); db(0x01); db(0xD0); }
 void xlatb() { db(0xD7); }
-void xor_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); }
+void xor_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x30, 6); }
 void xor_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); }
 void xorpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x66, isXMM_XMMorMEM); }
 void xorps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x100, isXMM_XMMorMEM); }
 #ifdef XBYAK_ENABLE_OMITTED_OPERAND
-void vblendpd(const Xmm& x, const Operand& op, uint8 imm) { vblendpd(x, x, op, imm); }
-void vblendps(const Xmm& x, const Operand& op, uint8 imm) { vblendps(x, x, op, imm); }
+void vblendpd(const Xmm& x, const Operand& op, uint8_t imm) { vblendpd(x, x, op, imm); }
+void vblendps(const Xmm& x, const Operand& op, uint8_t imm) { vblendps(x, x, op, imm); }
 void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { vblendvpd(x1, x1, op, x4); }
 void vblendvps(const Xmm& x1, const Operand& op, const Xmm& x4) { vblendvps(x1, x1, op, x4); }
 void vcmpeq_ospd(const Xmm& x, const Operand& op) { vcmpeq_ospd(x, x, op); }
@@ -1457,10 +1457,10 @@ void vcmpordpd(const Xmm& x, const Operand& op) { vcmpordpd(x, x, op); }
 void vcmpordps(const Xmm& x, const Operand& op) { vcmpordps(x, x, op); }
 void vcmpordsd(const Xmm& x, const Operand& op) { vcmpordsd(x, x, op); }
 void vcmpordss(const Xmm& x, const Operand& op) { vcmpordss(x, x, op); }
-void vcmppd(const Xmm& x, const Operand& op, uint8 imm) { vcmppd(x, x, op, imm); }
-void vcmpps(const Xmm& x, const Operand& op, uint8 imm) { vcmpps(x, x, op, imm); }
-void vcmpsd(const Xmm& x, const Operand& op, uint8 imm) { vcmpsd(x, x, op, imm); }
-void vcmpss(const Xmm& x, const Operand& op, uint8 imm) { vcmpss(x, x, op, imm); }
+void vcmppd(const Xmm& x, const Operand& op, uint8_t imm) { vcmppd(x, x, op, imm); }
+void vcmpps(const Xmm& x, const Operand& op, uint8_t imm) { vcmpps(x, x, op, imm); }
+void vcmpsd(const Xmm& x, const Operand& op, uint8_t imm) { vcmpsd(x, x, op, imm); }
+void vcmpss(const Xmm& x, const Operand& op, uint8_t imm) { vcmpss(x, x, op, imm); }
 void vcmptrue_uspd(const Xmm& x, const Operand& op) { vcmptrue_uspd(x, x, op); }
 void vcmptrue_usps(const Xmm& x, const Operand& op) { vcmptrue_usps(x, x, op); }
 void vcmptrue_ussd(const Xmm& x, const Operand& op) { vcmptrue_ussd(x, x, op); }
@@ -1481,10 +1481,10 @@ void vcvtsd2ss(const Xmm& x, const Operand& op) { vcvtsd2ss(x, x, op); }
 void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); }
 void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); }
 void vcvtss2sd(const Xmm& x, const Operand& op) { vcvtss2sd(x, x, op); }
-void vdppd(const Xmm& x, const Operand& op, uint8 imm) { vdppd(x, x, op, imm); }
-void vdpps(const Xmm& x, const Operand& op, uint8 imm) { vdpps(x, x, op, imm); }
-void vinsertps(const Xmm& x, const Operand& op, uint8 imm) { vinsertps(x, x, op, imm); }
-void vmpsadbw(const Xmm& x, const Operand& op, uint8 imm) { vmpsadbw(x, x, op, imm); }
+void vdppd(const Xmm& x, const Operand& op, uint8_t imm) { vdppd(x, x, op, imm); }
+void vdpps(const Xmm& x, const Operand& op, uint8_t imm) { vdpps(x, x, op, imm); }
+void vinsertps(const Xmm& x, const Operand& op, uint8_t imm) { vinsertps(x, x, op, imm); }
+void vmpsadbw(const Xmm& x, const Operand& op, uint8_t imm) { vmpsadbw(x, x, op, imm); }
 void vpackssdw(const Xmm& x, const Operand& op) { vpackssdw(x, x, op); }
 void vpacksswb(const Xmm& x, const Operand& op) { vpacksswb(x, x, op); }
 void vpackusdw(const Xmm& x, const Operand& op) { vpackusdw(x, x, op); }
@@ -1497,15 +1497,15 @@ void vpaddsw(const Xmm& x, const Operand& op) { vpaddsw(x, x, op); }
 void vpaddusb(const Xmm& x, const Operand& op) { vpaddusb(x, x, op); }
 void vpaddusw(const Xmm& x, const Operand& op) { vpaddusw(x, x, op); }
 void vpaddw(const Xmm& x, const Operand& op) { vpaddw(x, x, op); }
-void vpalignr(const Xmm& x, const Operand& op, uint8 imm) { vpalignr(x, x, op, imm); }
+void vpalignr(const Xmm& x, const Operand& op, uint8_t imm) { vpalignr(x, x, op, imm); }
 void vpand(const Xmm& x, const Operand& op) { vpand(x, x, op); }
 void vpandn(const Xmm& x, const Operand& op) { vpandn(x, x, op); }
 void vpavgb(const Xmm& x, const Operand& op) { vpavgb(x, x, op); }
 void vpavgw(const Xmm& x, const Operand& op) { vpavgw(x, x, op); }
-void vpblendd(const Xmm& x, const Operand& op, uint8 imm) { vpblendd(x, x, op, imm); }
+void vpblendd(const Xmm& x, const Operand& op, uint8_t imm) { vpblendd(x, x, op, imm); }
 void vpblendvb(const Xmm& x1, const Operand& op, const Xmm& x4) { vpblendvb(x1, x1, op, x4); }
-void vpblendw(const Xmm& x, const Operand& op, uint8 imm) { vpblendw(x, x, op, imm); }
-void vpclmulqdq(const Xmm& x, const Operand& op, uint8 imm) { vpclmulqdq(x, x, op, imm); }
+void vpblendw(const Xmm& x, const Operand& op, uint8_t imm) { vpblendw(x, x, op, imm); }
+void vpclmulqdq(const Xmm& x, const Operand& op, uint8_t imm) { vpclmulqdq(x, x, op, imm); }
 void vpcmpeqb(const Xmm& x, const Operand& op) { vpcmpeqb(x, x, op); }
 void vpcmpeqd(const Xmm& x, const Operand& op) { vpcmpeqd(x, x, op); }
 void vpcmpeqq(const Xmm& x, const Operand& op) { vpcmpeqq(x, x, op); }
@@ -1520,10 +1520,10 @@ void vphaddw(const Xmm& x, const Operand& op) { vphaddw(x, x, op); }
 void vphsubd(const Xmm& x, const Operand& op) { vphsubd(x, x, op); }
 void vphsubsw(const Xmm& x, const Operand& op) { vphsubsw(x, x, op); }
 void vphsubw(const Xmm& x, const Operand& op) { vphsubw(x, x, op); }
-void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { vpinsrb(x, x, op, imm); }
-void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { vpinsrd(x, x, op, imm); }
-void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { vpinsrq(x, x, op, imm); }
-void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { vpinsrw(x, x, op, imm); }
+void vpinsrb(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrb(x, x, op, imm); }
+void vpinsrd(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrd(x, x, op, imm); }
+void vpinsrq(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrq(x, x, op, imm); }
+void vpinsrw(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrw(x, x, op, imm); }
 void vpmaddubsw(const Xmm& x, const Operand& op) { vpmaddubsw(x, x, op); }
 void vpmaddwd(const Xmm& x, const Operand& op) { vpmaddwd(x, x, op); }
 void vpmaxsb(const Xmm& x, const Operand& op) { vpmaxsb(x, x, op); }
@@ -1551,23 +1551,23 @@ void vpsignb(const Xmm& x, const Operand& op) { vpsignb(x, x, op); }
 void vpsignd(const Xmm& x, const Operand& op) { vpsignd(x, x, op); }
 void vpsignw(const Xmm& x, const Operand& op) { vpsignw(x, x, op); }
 void vpslld(const Xmm& x, const Operand& op) { vpslld(x, x, op); }
-void vpslld(const Xmm& x, uint8 imm) { vpslld(x, x, imm); }
-void vpslldq(const Xmm& x, uint8 imm) { vpslldq(x, x, imm); }
+void vpslld(const Xmm& x, uint8_t imm) { vpslld(x, x, imm); }
+void vpslldq(const Xmm& x, uint8_t imm) { vpslldq(x, x, imm); }
 void vpsllq(const Xmm& x, const Operand& op) { vpsllq(x, x, op); }
-void vpsllq(const Xmm& x, uint8 imm) { vpsllq(x, x, imm); }
+void vpsllq(const Xmm& x, uint8_t imm) { vpsllq(x, x, imm); }
 void vpsllw(const Xmm& x, const Operand& op) { vpsllw(x, x, op); }
-void vpsllw(const Xmm& x, uint8 imm) { vpsllw(x, x, imm); }
+void vpsllw(const Xmm& x, uint8_t imm) { vpsllw(x, x, imm); }
 void vpsrad(const Xmm& x, const Operand& op) { vpsrad(x, x, op); }
-void vpsrad(const Xmm& x, uint8 imm) { vpsrad(x, x, imm); }
+void vpsrad(const Xmm& x, uint8_t imm) { vpsrad(x, x, imm); }
 void vpsraw(const Xmm& x, const Operand& op) { vpsraw(x, x, op); }
-void vpsraw(const Xmm& x, uint8 imm) { vpsraw(x, x, imm); }
+void vpsraw(const Xmm& x, uint8_t imm) { vpsraw(x, x, imm); }
 void vpsrld(const Xmm& x, const Operand& op) { vpsrld(x, x, op); }
-void vpsrld(const Xmm& x, uint8 imm) { vpsrld(x, x, imm); }
-void vpsrldq(const Xmm& x, uint8 imm) { vpsrldq(x, x, imm); }
+void vpsrld(const Xmm& x, uint8_t imm) { vpsrld(x, x, imm); }
+void vpsrldq(const Xmm& x, uint8_t imm) { vpsrldq(x, x, imm); }
 void vpsrlq(const Xmm& x, const Operand& op) { vpsrlq(x, x, op); }
-void vpsrlq(const Xmm& x, uint8 imm) { vpsrlq(x, x, imm); }
+void vpsrlq(const Xmm& x, uint8_t imm) { vpsrlq(x, x, imm); }
 void vpsrlw(const Xmm& x, const Operand& op) { vpsrlw(x, x, op); }
-void vpsrlw(const Xmm& x, uint8 imm) { vpsrlw(x, x, imm); }
+void vpsrlw(const Xmm& x, uint8_t imm) { vpsrlw(x, x, imm); }
 void vpsubb(const Xmm& x, const Operand& op) { vpsubb(x, x, op); }
 void vpsubd(const Xmm& x, const Operand& op) { vpsubd(x, x, op); }
 void vpsubq(const Xmm& x, const Operand& op) { vpsubq(x, x, op); }
@@ -1586,11 +1586,11 @@ void vpunpcklqdq(const Xmm& x, const Operand& op) { vpunpcklqdq(x, x, op); }
 void vpunpcklwd(const Xmm& x, const Operand& op) { vpunpcklwd(x, x, op); }
 void vpxor(const Xmm& x, const Operand& op) { vpxor(x, x, op); }
 void vrcpss(const Xmm& x, const Operand& op) { vrcpss(x, x, op); }
-void vroundsd(const Xmm& x, const Operand& op, uint8 imm) { vroundsd(x, x, op, imm); }
-void vroundss(const Xmm& x, const Operand& op, uint8 imm) { vroundss(x, x, op, imm); }
+void vroundsd(const Xmm& x, const Operand& op, uint8_t imm) { vroundsd(x, x, op, imm); }
+void vroundss(const Xmm& x, const Operand& op, uint8_t imm) { vroundss(x, x, op, imm); }
 void vrsqrtss(const Xmm& x, const Operand& op) { vrsqrtss(x, x, op); }
-void vshufpd(const Xmm& x, const Operand& op, uint8 imm) { vshufpd(x, x, op, imm); }
-void vshufps(const Xmm& x, const Operand& op, uint8 imm) { vshufps(x, x, op, imm); }
+void vshufpd(const Xmm& x, const Operand& op, uint8_t imm) { vshufpd(x, x, op, imm); }
+void vshufps(const Xmm& x, const Operand& op, uint8_t imm) { vshufps(x, x, op, imm); }
 void vsqrtsd(const Xmm& x, const Operand& op) { vsqrtsd(x, x, op); }
 void vsqrtss(const Xmm& x, const Operand& op) { vsqrtss(x, x, op); }
 void vunpckhpd(const Xmm& x, const Operand& op) { vunpckhpd(x, x, op); }
@@ -1619,8 +1619,8 @@ void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); }
 void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
 void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
 void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }
-void pextrq(const Operand& op, const Xmm& xmm, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }
-void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }
+void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }
+void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }
 void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }
 void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }
 void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); }
@@ -1662,11 +1662,11 @@ void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC4, 0x100
 #endif
 #ifndef XBYAK_NO_OP_NAMES
 void and(const Operand& op1, const Operand& op2) { and_(op1, op2); }
-void and(const Operand& op, uint32 imm) { and_(op, imm); }
+void and(const Operand& op, uint32_t imm) { and_(op, imm); }
 void or(const Operand& op1, const Operand& op2) { or_(op1, op2); }
-void or(const Operand& op, uint32 imm) { or_(op, imm); }
+void or(const Operand& op, uint32_t imm) { or_(op, imm); }
 void xor(const Operand& op1, const Operand& op2) { xor_(op1, op2); }
-void xor(const Operand& op, uint32 imm) { xor_(op, imm); }
+void xor(const Operand& op, uint32_t imm) { xor_(op, imm); }
 void not(const Operand& op) { not_(op); }
 #endif
 #ifndef XBYAK_DISABLE_AVX512
@@ -1708,14 +1708,14 @@ void kortestd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66
 void kortestq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x98); }
 void kortestw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x98); }
 void korw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x45); }
-void kshiftlb(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x32, imm); }
-void kshiftld(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x33, imm); }
-void kshiftlq(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x33, imm); }
-void kshiftlw(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x32, imm); }
-void kshiftrb(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x30, imm); }
-void kshiftrd(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x31, imm); }
-void kshiftrq(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x31, imm); }
-void kshiftrw(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x30, imm); }
+void kshiftlb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x32, imm); }
+void kshiftld(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x33, imm); }
+void kshiftlq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x33, imm); }
+void kshiftlw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x32, imm); }
+void kshiftrb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x30, imm); }
+void kshiftrd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x31, imm); }
+void kshiftrq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x31, imm); }
+void kshiftrw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x30, imm); }
 void ktestb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x99); }
 void ktestd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x99); }
 void ktestq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x99); }
@@ -1735,8 +1735,8 @@ void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM
 void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }
 void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }
 void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }
-void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); }
-void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); }
+void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); }
+void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); }
 void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x65); }
 void vblendmps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x65); }
 void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }
@@ -1749,10 +1749,10 @@ void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_
 void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }
 void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }
 void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }
-void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); }
-void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); }
-void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
-void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
+void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); }
+void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); }
+void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
+void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
 void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); }
 void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
 void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
@@ -1783,28 +1783,28 @@ void vcvtuqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 |
 void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }
 void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
 void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
-void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
+void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
 void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
 void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
 void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
 void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); }
 void vexpandps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x88); }
-void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); }
-void vextractf32x8(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
-void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); }
-void vextractf64x4(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
-void vextracti32x4(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x39, imm); }
-void vextracti32x8(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
-void vextracti64x2(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); }
-void vextracti64x4(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
-void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); }
-void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
-void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
-void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
-void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
-void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
-void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
-void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
+void vextractf32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); }
+void vextractf32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
+void vextractf64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); }
+void vextractf64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
+void vextracti32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x39, imm); }
+void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
+void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); }
+void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
+void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); }
+void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
+void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
+void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
+void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
+void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
+void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
+void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
 void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
 void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
 void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
@@ -1821,18 +1821,18 @@ void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T
 void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
 void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
 void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); }
-void vgetmantpd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); }
-void vgetmantps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); }
-void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
-void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
-void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); }
-void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
-void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x18, imm); }
-void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
-void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x38, imm); }
-void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
-void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
-void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
+void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); }
+void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); }
+void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
+void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
+void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); }
+void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
+void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x18, imm); }
+void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
+void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x38, imm); }
+void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
+void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
+void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
 void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
 void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
 void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
@@ -1863,8 +1863,8 @@ void vpbroadcastd(const Xmm& x, const Reg32& r) { opVex(x, 0, r, T_66 | T_0F38 |
 void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }
 void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }
 void vpbroadcastw(const Xmm& x, const Reg16& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7B); }
-void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
-void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1F, imm); }
+void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
+void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1F, imm); }
 void vpcmpeqb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x74); }
 void vpcmpeqd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_B32, 0x76); }
 void vpcmpeqq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x29); }
@@ -1873,12 +1873,12 @@ void vpcmpgtb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k
 void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x66); }
 void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x37); }
 void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x65); }
-void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1F, imm); }
-void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
-void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1E, imm); }
-void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1E, imm); }
-void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
-void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
+void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1F, imm); }
+void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
+void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1E, imm); }
+void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1E, imm); }
+void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
+void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
 void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8B); }
 void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8B); }
 void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xC4); }
@@ -1951,39 +1951,39 @@ void vpopcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_
 void vpopcntw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
 void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEB); }
 void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEB); }
-void vprold(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
-void vprolq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
+void vprold(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
+void vprolq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
 void vprolvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x15); }
 void vprolvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x15); }
-void vprord(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
-void vprorq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
+void vprord(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
+void vprorq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
 void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); }
 void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); }
 void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); }
 void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); }
 void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); }
 void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); }
-void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
-void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
+void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
+void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
 void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); }
 void vpshldvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71); }
 void vpshldvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70); }
-void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70, imm); }
-void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73, imm); }
-void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73, imm); }
+void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70, imm); }
+void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73, imm); }
+void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73, imm); }
 void vpshrdvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73); }
 void vpshrdvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73); }
 void vpshrdvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72); }
-void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); }
+void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); }
 void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }
 void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x12); }
-void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
+void vpsraq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
 void vpsraq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX, 0xE2); }
 void vpsravq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x46); }
 void vpsravw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x11); }
 void vpsrlvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x10); }
-void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); }
-void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); }
+void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); }
+void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); }
 void vptestmb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x26); }
 void vptestmd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x27); }
 void vptestmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x27); }
@@ -1994,10 +1994,10 @@ void vptestnmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(
 void vptestnmw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x26); }
 void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEF); }
 void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEF); }
-void vrangepd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x50, imm); }
-void vrangeps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50, imm); }
-void vrangesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x51, imm); }
-void vrangess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x51, imm); }
+void vrangepd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x50, imm); }
+void vrangeps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50, imm); }
+void vrangesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x51, imm); }
+void vrangess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x51, imm); }
 void vrcp14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4C); }
 void vrcp14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4C); }
 void vrcp14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX, 0x4D); }
@@ -2006,14 +2006,14 @@ void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_
 void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }
 void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCB); }
 void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCB); }
-void vreducepd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); }
-void vreduceps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); }
-void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
-void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
-void vrndscalepd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); }
-void vrndscaleps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); }
-void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_MUST_EVEX, 0x0B, imm); }
-void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_MUST_EVEX, 0x0A, imm); }
+void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); }
+void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); }
+void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
+void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
+void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); }
+void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); }
+void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_MUST_EVEX, 0x0B, imm); }
+void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_MUST_EVEX, 0x0A, imm); }
 void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4E); }
 void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4E); }
 void vrsqrt14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x4F); }
@@ -2038,10 +2038,10 @@ void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66
 void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
 void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); }
 void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); }
-void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
-void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
-void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
-void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
+void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
+void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
+void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
+void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
 #ifdef XBYAK64
 void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
 void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
diff --git a/src/xbyak/xbyak_util.h b/src/xbyak/xbyak_util.h
index 5caed058..1516fc33 100644
--- a/src/xbyak/xbyak_util.h
+++ b/src/xbyak/xbyak_util.h
@@ -1,5 +1,16 @@
 #ifndef XBYAK_XBYAK_UTIL_H_
 #define XBYAK_XBYAK_UTIL_H_
+
+#ifdef XBYAK_ONLY_CLASS_CPU
+#include <stdint.h>
+#include <stdlib.h>
+#include <algorithm>
+#include <assert.h>
+#ifndef XBYAK_THROW
+	#define XBYAK_THROW(x) ;
+	#define XBYAK_THROW_RET(x, y) return y;
+#endif
+#else
 #include <string.h>
 
 /**
@@ -9,6 +20,7 @@
 	@note this header is UNDER CONSTRUCTION!
 */
 #include "xbyak.h"
+#endif // XBYAK_ONLY_CLASS_CPU
 
 #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
 	#define XBYAK_INTEL_CPU_SPECIFIC
@@ -80,7 +92,7 @@ typedef enum {
 	CPU detection class
 */
 class Cpu {
-	uint64 type_;
+	uint64_t type_;
 	//system topology
 	bool x2APIC_supported_;
 	static const size_t maxTopologyLevels = 2;
@@ -219,7 +231,7 @@ class Cpu {
 	int displayFamily; // family + extFamily
 	int displayModel; // model + extModel
 
-	unsigned int getNumCores(IntelCpuTopologyLevel level) {
+	unsigned int getNumCores(IntelCpuTopologyLevel level) const {
 		if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
 		switch (level) {
 		case SmtLevel: return numCores_[level - 1];
@@ -270,7 +282,7 @@ class Cpu {
 		(void)data;
 #endif
 	}
-	static inline uint64 getXfeature()
+	static inline uint64_t getXfeature()
 	{
 #ifdef XBYAK_INTEL_CPU_SPECIFIC
 	#ifdef _MSC_VER
@@ -280,13 +292,13 @@ class Cpu {
 		// xgetvb is not support on gcc 4.2
 //		__asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
 		__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
-		return ((uint64)edx << 32) | eax;
+		return ((uint64_t)edx << 32) | eax;
 	#endif
 #else
 		return 0;
 #endif
 	}
-	typedef uint64 Type;
+	typedef uint64_t Type;
 
 	static const Type NONE = 0;
 	static const Type tMMX = 1 << 0;
@@ -323,39 +335,39 @@ class Cpu {
 	static const Type tADX = 1 << 28; // adcx, adox
 	static const Type tRDSEED = 1 << 29; // rdseed
 	static const Type tSMAP = 1 << 30; // stac
-	static const Type tHLE = uint64(1) << 31; // xacquire, xrelease, xtest
-	static const Type tRTM = uint64(1) << 32; // xbegin, xend, xabort
-	static const Type tF16C = uint64(1) << 33; // vcvtph2ps, vcvtps2ph
-	static const Type tMOVBE = uint64(1) << 34; // mobve
-	static const Type tAVX512F = uint64(1) << 35;
-	static const Type tAVX512DQ = uint64(1) << 36;
-	static const Type tAVX512_IFMA = uint64(1) << 37;
+	static const Type tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest
+	static const Type tRTM = uint64_t(1) << 32; // xbegin, xend, xabort
+	static const Type tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph
+	static const Type tMOVBE = uint64_t(1) << 34; // mobve
+	static const Type tAVX512F = uint64_t(1) << 35;
+	static const Type tAVX512DQ = uint64_t(1) << 36;
+	static const Type tAVX512_IFMA = uint64_t(1) << 37;
 	static const Type tAVX512IFMA = tAVX512_IFMA;
-	static const Type tAVX512PF = uint64(1) << 38;
-	static const Type tAVX512ER = uint64(1) << 39;
-	static const Type tAVX512CD = uint64(1) << 40;
-	static const Type tAVX512BW = uint64(1) << 41;
-	static const Type tAVX512VL = uint64(1) << 42;
-	static const Type tAVX512_VBMI = uint64(1) << 43;
+	static const Type tAVX512PF = uint64_t(1) << 38;
+	static const Type tAVX512ER = uint64_t(1) << 39;
+	static const Type tAVX512CD = uint64_t(1) << 40;
+	static const Type tAVX512BW = uint64_t(1) << 41;
+	static const Type tAVX512VL = uint64_t(1) << 42;
+	static const Type tAVX512_VBMI = uint64_t(1) << 43;
 	static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual
-	static const Type tAVX512_4VNNIW = uint64(1) << 44;
-	static const Type tAVX512_4FMAPS = uint64(1) << 45;
-	static const Type tPREFETCHWT1 = uint64(1) << 46;
-	static const Type tPREFETCHW = uint64(1) << 47;
-	static const Type tSHA = uint64(1) << 48;
-	static const Type tMPX = uint64(1) << 49;
-	static const Type tAVX512_VBMI2 = uint64(1) << 50;
-	static const Type tGFNI = uint64(1) << 51;
-	static const Type tVAES = uint64(1) << 52;
-	static const Type tVPCLMULQDQ = uint64(1) << 53;
-	static const Type tAVX512_VNNI = uint64(1) << 54;
-	static const Type tAVX512_BITALG = uint64(1) << 55;
-	static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56;
-	static const Type tAVX512_BF16 = uint64(1) << 57;
-	static const Type tAVX512_VP2INTERSECT = uint64(1) << 58;
-	static const Type tAMX_TILE = uint64(1) << 59;
-	static const Type tAMX_INT8 = uint64(1) << 60;
-	static const Type tAMX_BF16 = uint64(1) << 61;
+	static const Type tAVX512_4VNNIW = uint64_t(1) << 44;
+	static const Type tAVX512_4FMAPS = uint64_t(1) << 45;
+	static const Type tPREFETCHWT1 = uint64_t(1) << 46;
+	static const Type tPREFETCHW = uint64_t(1) << 47;
+	static const Type tSHA = uint64_t(1) << 48;
+	static const Type tMPX = uint64_t(1) << 49;
+	static const Type tAVX512_VBMI2 = uint64_t(1) << 50;
+	static const Type tGFNI = uint64_t(1) << 51;
+	static const Type tVAES = uint64_t(1) << 52;
+	static const Type tVPCLMULQDQ = uint64_t(1) << 53;
+	static const Type tAVX512_VNNI = uint64_t(1) << 54;
+	static const Type tAVX512_BITALG = uint64_t(1) << 55;
+	static const Type tAVX512_VPOPCNTDQ = uint64_t(1) << 56;
+	static const Type tAVX512_BF16 = uint64_t(1) << 57;
+	static const Type tAVX512_VP2INTERSECT = uint64_t(1) << 58;
+	static const Type tAMX_TILE = uint64_t(1) << 59;
+	static const Type tAMX_INT8 = uint64_t(1) << 60;
+	static const Type tAMX_BF16 = uint64_t(1) << 61;
 
 	Cpu()
 		: type_(NONE)
@@ -410,7 +422,7 @@ class Cpu {
 
 		if (type_ & tOSXSAVE) {
 			// check XFEATURE_ENABLED_MASK[2:1] = '11b'
-			uint64 bv = getXfeature();
+			uint64_t bv = getXfeature();
 			if ((bv & 6) == 6) {
 				if (ECX & (1U << 28)) type_ |= tAVX;
 				if (ECX & (1U << 12)) type_ |= tFMA;
@@ -469,9 +481,11 @@ class Cpu {
 	}
 	void putFamily() const
 	{
+#ifndef XBYAK_ONLY_CLASS_CPU
 		printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n",
 			family, model, stepping, extFamily, extModel);
 		printf("display:family=%X, model=%X\n", displayFamily, displayModel);
+#endif
 	}
 	bool has(Type type) const
 	{
@@ -479,9 +493,10 @@ class Cpu {
 	}
 };
 
+#ifndef XBYAK_ONLY_CLASS_CPU
 class Clock {
 public:
-	static inline uint64 getRdtsc()
+	static inline uint64_t getRdtsc()
 	{
 #ifdef XBYAK_INTEL_CPU_SPECIFIC
 	#ifdef _MSC_VER
@@ -489,7 +504,7 @@ class Clock {
 	#else
 		unsigned int eax, edx;
 		__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
-		return ((uint64)edx << 32) | eax;
+		return ((uint64_t)edx << 32) | eax;
 	#endif
 #else
 		// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
@@ -511,10 +526,10 @@ class Clock {
 		count_++;
 	}
 	int getCount() const { return count_; }
-	uint64 getClock() const { return clock_; }
+	uint64_t getClock() const { return clock_; }
 	void clear() { count_ = 0; clock_ = 0; }
 private:
-	uint64 clock_;
+	uint64_t clock_;
 	int count_;
 };
 
@@ -880,6 +895,8 @@ class Profiler {
 		startAddr_ = endAddr;
 	}
 };
+#endif // XBYAK_ONLY_CLASS_CPU
 
 } } // end of util
+
 #endif

From 5acf84b9c45ebf16b4beaddcd43b1d6e383e7a78 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 14 Sep 2020 15:56:59 +0900
Subject: [PATCH 286/553] remove old unused code

---
 include/mcl/bn.hpp | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 921685f1..3a32bb45 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1460,7 +1460,6 @@ inline void expHardPartBLS12(Fp12& y, const Fp12& x)
 	Fp12::pow(y, x, (p4 - p2 + 1) / param.r * 3);
 	return;
 #endif
-#if 1
 	Fp12 a0, a1, a2, a3, a4, a5, a6, a7;
 	Fp12::unitaryInv(a0, x); // a0 = x^-1
 	fasterSqr(a1, a0); // x^-2
@@ -1485,19 +1484,6 @@ inline void expHardPartBLS12(Fp12& y, const Fp12& x)
 	a7 *= x; // x^(z^2-2z+1) = x^c3
 	Fp12::Frobenius3(y, a7);
 	y *= a1;
-#else
-	Fp12 t1, t2, t3;
-	Fp12::Frobenius(t1, x);
-	Fp12::Frobenius(t2, t1);
-	Fp12::Frobenius(t3, t2);
-	Fp12::pow(t1, t1, param.exp_c1);
-	Fp12::pow(t2, t2, param.exp_c2);
-	Fp12::pow(t3, t3, param.exp_c3);
-	Fp12::pow(y, x, param.exp_c0);
-	y *= t1;
-	y *= t2;
-	y *= t3;
-#endif
 }
 /*
 	Faster Hashing to G2

From c278054238c7f8ab353992a198ed41501b719582 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 14 Sep 2020 16:58:35 +0900
Subject: [PATCH 287/553] finalExp by 2020/875.pdf

---
 include/mcl/bn.hpp | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 3a32bb45..9721a739 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1460,6 +1460,33 @@ inline void expHardPartBLS12(Fp12& y, const Fp12& x)
 	Fp12::pow(y, x, (p4 - p2 + 1) / param.r * 3);
 	return;
 #endif
+#if 1
+	/*
+		Efficient Final Exponentiation via Cyclotomic Structure
+		for Pairings over Families of Elliptic Curves
+		https://eprint.iacr.org/2020/875.pdf p.13
+		(z-1)^2 (z+p)(z^2+p^2-1)+3
+	*/
+	Fp12 a0, a1, a2, a3, a4;
+	pow_z(a0, x); // z
+	Fp12::unitaryInv(a1, x); // -1
+	a0 *= a1; // z-1
+	pow_z(a1, a0); // (z-1)^z
+	Fp12::unitaryInv(a0, a0); // -(z-1)
+	a0 *= a1; // (z-1)^2
+	pow_z(a1, a0); // z
+	Fp12::Frobenius(a0, a0); // p
+	a0 *=a1; // (z-1)^2 (z+p)
+	pow_z(a1, a0); // z
+	pow_z(a1, a1); // z^2
+	Fp12::Frobenius2(a2, a0); // p^2
+	Fp12::unitaryInv(a0, a0); // -1
+	a0 *= a1;
+	a0 *= a2; // z^2+p^2-1
+	fasterSqr(a1, x);
+	a1 *= x; // x^3
+	Fp12::mul(y, a0, a1);
+#else
 	Fp12 a0, a1, a2, a3, a4, a5, a6, a7;
 	Fp12::unitaryInv(a0, x); // a0 = x^-1
 	fasterSqr(a1, a0); // x^-2
@@ -1484,6 +1511,7 @@ inline void expHardPartBLS12(Fp12& y, const Fp12& x)
 	a7 *= x; // x^(z^2-2z+1) = x^c3
 	Fp12::Frobenius3(y, a7);
 	y *= a1;
+#endif
 }
 /*
 	Faster Hashing to G2

From dc65ed2ccc413d6236b30b627e4e34050f04a5fe Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 14 Sep 2020 16:59:16 +0900
Subject: [PATCH 288/553] update version

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index ddf9122b..99c0e4d8 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -23,7 +23,7 @@
 
 namespace mcl {
 
-static const int version = 0x122; /* 0xABC = A.BC */
+static const int version = 0x123; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From 9bfc47ca8da25668ec305647b19ab4302fe32c1b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 14 Sep 2020 20:47:03 +0900
Subject: [PATCH 289/553] remove unused var

---
 include/mcl/bn.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 9721a739..3668da26 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1467,7 +1467,7 @@ inline void expHardPartBLS12(Fp12& y, const Fp12& x)
 		https://eprint.iacr.org/2020/875.pdf p.13
 		(z-1)^2 (z+p)(z^2+p^2-1)+3
 	*/
-	Fp12 a0, a1, a2, a3, a4;
+	Fp12 a0, a1, a2;
 	pow_z(a0, x); // z
 	Fp12::unitaryInv(a1, x); // -1
 	a0 *= a1; // z-1

From 10621c6299d3db1c88fd0c27e63654edada08049 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 15 Sep 2020 16:20:14 +0900
Subject: [PATCH 290/553] refactor profiler

---
 include/mcl/bn.hpp   |   4 +-
 include/mcl/fp.hpp   |  12 ++---
 include/mcl/op.hpp   |   2 +-
 src/fp.cpp           |  10 ++--
 src/fp_generator.hpp | 111 ++++++++++++++++++++++++++++++++-----------
 5 files changed, 97 insertions(+), 42 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 3668da26..8710f554 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -875,9 +875,9 @@ struct Param {
 			assert((p % 6) == 1);
 			r = local::evalPoly(z, rCoff);
 		}
-		Fr::init(pb, r, mode);
+		Fr::init(pb, r, mode, "Fr");
 		if (!*pb) return;
-		Fp::init(pb, cp.xi_a, p, mode);
+		Fp::init(pb, cp.xi_a, p, mode, "Fp");
 		if (!*pb) return;
 		Fp2::init();
 		const Fp2 xi(cp.xi_a, 1);
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 6c5b0b05..c8b5a6d8 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -130,10 +130,10 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 		xi_a is used for Fp2::mul_xi(), where xi = xi_a + i and i^2 = -1
 		if xi_a = 0 then asm functions for Fp2 are not generated.
 	*/
-	static inline void init(bool *pb, int xi_a, const mpz_class& p, fp::Mode mode = fp::FP_AUTO)
+	static inline void init(bool *pb, int xi_a, const mpz_class& p, fp::Mode mode = fp::FP_AUTO, const char *suf = 0)
 	{
 		assert(maxBitSize <= MCL_MAX_BIT_SIZE);
-		*pb = op_.init(p, maxBitSize, xi_a, mode);
+		*pb = op_.init(p, maxBitSize, xi_a, mode, suf);
 		if (!*pb) return;
 		{ // set oneRep
 			FpT& one = *reinterpret_cast<FpT*>(op_.oneRep);
@@ -163,16 +163,16 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 #endif
 		*pb = true;
 	}
-	static inline void init(bool *pb, const mpz_class& p, fp::Mode mode = fp::FP_AUTO)
+	static inline void init(bool *pb, const mpz_class& p, fp::Mode mode = fp::FP_AUTO, const char *suf = 0)
 	{
-		init(pb, 0, p, mode);
+		init(pb, 0, p, mode, suf);
 	}
-	static inline void init(bool *pb, const char *mstr, fp::Mode mode = fp::FP_AUTO)
+	static inline void init(bool *pb, const char *mstr, fp::Mode mode = fp::FP_AUTO, const char *suf = 0)
 	{
 		mpz_class p;
 		gmp::setStr(pb, p, mstr);
 		if (!*pb) return;
-		init(pb, p, mode);
+		init(pb, p, mode, suf);
 	}
 	static inline size_t getModulo(char *buf, size_t bufSize)
 	{
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 99c0e4d8..45320e50 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -364,7 +364,7 @@ struct Op {
 		*/
 		fp_mul(y, x, R2, p);
 	}
-	bool init(const mpz_class& p, size_t maxBitSize, int xi_a, Mode mode, size_t mclMaxBitSize = MCL_MAX_BIT_SIZE);
+	bool init(const mpz_class& p, size_t maxBitSize, int xi_a, Mode mode, const char *suf = 0, size_t mclMaxBitSize = MCL_MAX_BIT_SIZE);
 #ifdef MCL_USE_XBYAK
 	static FpGenerator* createFpGenerator();
 	static void destroyFpGenerator(FpGenerator *fg);
diff --git a/src/fp.cpp b/src/fp.cpp
index b3b07d19..998a53b6 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -346,7 +346,7 @@ static void initInvTbl(Op& op)
 }
 #endif
 
-static bool initForMont(Op& op, const Unit *p, Mode mode)
+static bool initForMont(Op& op, const Unit *p, Mode mode, const char *suf)
 {
 	const size_t N = op.N;
 	bool b;
@@ -366,17 +366,19 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 	if (mode != FP_XBYAK) return true;
 #ifdef MCL_USE_XBYAK
 	if (op.fg == 0) op.fg = Op::createFpGenerator();
-	bool useXbyak = op.fg->init(op);
+	bool useXbyak = op.fg->init(op, suf);
 
 	if (useXbyak && op.isMont && N <= 4) {
 		op.fp_invOp = &invOpForMontC;
 		initInvTbl(op);
 	}
+#else
+	(void)suf;
 #endif
 	return true;
 }
 
-bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size_t mclMaxBitSize)
+bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, const char *suf, size_t mclMaxBitSize)
 {
 	if (mclMaxBitSize != MCL_MAX_BIT_SIZE) return false;
 #ifdef MCL_USE_VINT
@@ -534,7 +536,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 		if (!b) return false;
 	}
 	modp.init(mp);
-	return fp::initForMont(*this, p, mode);
+	return fp::initForMont(*this, p, mode, suf);
 }
 
 void copyUnitToByteAsLE(uint8_t *dst, const Unit *src, size_t byteSize)
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 97ce9ae2..08d58445 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -23,8 +23,59 @@
 	#pragma warning(disable : 4458)
 #endif
 
+//#define MCL_FREEZE_JIT
+
 namespace mcl {
 
+#ifdef MCL_FREEZE_JIT
+struct Profiler {
+	FILE *fp_;
+	const uint8_t *prev_;
+	std::string suf_;
+	Profiler()
+		: fp_(0)
+		, prev_(0)
+	{
+	}
+	~Profiler()
+	{
+		if (fp_) fclose(fp_);
+	}
+	void open(const std::string& fileName)
+	{
+		fp_ = fopen(fileName.c_str(), "wb");
+	}
+	void setStartAddr(const uint8_t *addr)
+	{
+		prev_ = addr;
+	}
+	void setNameSuffix(const char *suf)
+	{
+		suf_ = suf;
+	}
+	void set(const char *name, const uint8_t *end)
+	{
+		fprintf(fp_, "global %s%s\n", suf_.c_str(), name);
+		fprintf(fp_, "align 16\n");
+		fprintf(fp_, "%s%s:\n", suf_.c_str(), name);
+		const uint8_t *p = prev_;
+		size_t remain = end - prev_;
+		while (remain > 0) {
+			size_t n = remain >= 16 ? 16 : remain;
+			fprintf(fp_, "db ");
+			for (size_t i = 0; i < n; i++) {
+				fprintf(fp_, "0x%02x,", *p++);
+			}
+			fprintf(fp_, "\n");
+			remain -= n;
+		}
+		prev_ = end;
+	}
+};
+#else
+typedef Xbyak::util::Profiler Profiler;
+#endif
+
 namespace fp_gen_local {
 
 class MemReg {
@@ -203,7 +254,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	int pn_;
 	int FpByte_;
 	bool isFullBit_;
-	Xbyak::util::Profiler prof_;
+	Profiler prof_;
 
 	/*
 		@param op [in] ; use op.p, op.N, op.isFullBit
@@ -242,12 +293,12 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		useMulx_ = cpu_.has(Xbyak::util::Cpu::tBMI2);
 		useAdx_ = cpu_.has(Xbyak::util::Cpu::tADX);
 	}
-	bool init(Op& op)
+	bool init(Op& op, const char *suf)
 	{
 		if (!cpu_.has(Xbyak::util::Cpu::tAVX)) return false;
 		reset(); // reset jit code for reuse
 		setProtectModeRW(); // read/write memory
-		init_inner(op);
+		init_inner(op, suf);
 		// ToDo : recover op if false
 		if (Xbyak::GetError()) return false;
 //		printf("code size=%d\n", (int)getSize());
@@ -255,7 +306,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		return true;
 	}
 private:
-	void init_inner(Op& op)
+	void init_inner(Op& op, const char *suf)
 	{
 		op_ = &op;
 		L(pL_);
@@ -269,7 +320,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		isFullBit_ = op.isFullBit;
 //		printf("p=%p, pn_=%d, isFullBit_=%d\n", p_, pn_, isFullBit_);
 #ifdef MCL_USE_PROF
-		static char suf[] = "_0";
 		int profMode = 0;
 #ifdef XBYAK_USE_VTUNE
 		profMode = 2;
@@ -281,89 +331,92 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		if (profMode) {
 			prof_.init(profMode);
 			prof_.setStartAddr(getCurr());
+			if (suf == 0) suf = "fp";
 			prof_.setNameSuffix(suf);
 			suf[1]++;
 		}
+#else
+		(void)suf;
 #endif
 
 		op.fp_addPre = gen_addSubPre(true, pn_);
-		prof_.set("Fp_addPre", getCurr());
+		prof_.set("_addPre", getCurr());
 
 		op.fp_subPre = gen_addSubPre(false, pn_);
-		prof_.set("Fp_subPre", getCurr());
+		prof_.set("_subPre", getCurr());
 
 		op.fp_addA_ = gen_fp_add();
-		prof_.set("Fp_add", getCurr());
+		prof_.set("_add", getCurr());
 
 		op.fp_subA_ = gen_fp_sub();
-		prof_.set("Fp_sub", getCurr());
+		prof_.set("_sub", getCurr());
 
 		op.fp_shr1 = gen_shr1();
-		prof_.set("Fp_shr1", getCurr());
+		prof_.set("_shr1", getCurr());
 
 		op.fp_negA_ = gen_fp_neg();
-		prof_.set("Fp_neg", getCurr());
+		prof_.set("_neg", getCurr());
 
 		op.fpDbl_addA_ = gen_fpDbl_add();
-		prof_.set("FpDbl_add", getCurr());
+		prof_.set("Dbl_add", getCurr());
 
 		op.fpDbl_subA_ = gen_fpDbl_sub();
-		prof_.set("FpDbl_sub", getCurr());
+		prof_.set("Dbl_sub", getCurr());
 
 		op.fpDbl_addPre = gen_addSubPre(true, pn_ * 2);
-		prof_.set("FpDbl_addPre", getCurr());
+		prof_.set("Dbl_addPre", getCurr());
 
 		op.fpDbl_subPre = gen_addSubPre(false, pn_ * 2);
-		prof_.set("FpDbl_subPre", getCurr());
+		prof_.set("Dbl_subPre", getCurr());
 
 		op.fpDbl_mulPreA_ = gen_fpDbl_mulPre();
-		prof_.set("FpDbl_mulPre", getCurr());
+		prof_.set("Dbl_mulPre", getCurr());
 
 		op.fpDbl_sqrPreA_ = gen_fpDbl_sqrPre();
-		prof_.set("FpDbl_sqrPre", getCurr());
+		prof_.set("Dbl_sqrPre", getCurr());
 
 		op.fpDbl_modA_ = gen_fpDbl_mod(op);
-		prof_.set("FpDbl_mod", getCurr());
+		prof_.set("Dbl_mod", getCurr());
 
 		op.fp_mulA_ = gen_mul();
-		prof_.set("Fp_mul", getCurr());
+		prof_.set("_mul", getCurr());
 		if (op.fp_mulA_) {
 			op.fp_mul = fp::func_ptr_cast<void4u>(op.fp_mulA_); // used in toMont/fromMont
 		}
 		op.fp_sqrA_ = gen_sqr();
-		prof_.set("Fp_sqr", getCurr());
+		prof_.set("_sqr", getCurr());
 
 		if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4
 			align(16);
 			op.fp_preInv = getCurr<int2u>();
 			gen_preInv();
-			prof_.set("preInv", getCurr());
+			prof_.set("_preInv", getCurr());
 		}
 		if (op.xi_a == 0) return; // Fp2 is not used
 		op.fp2_addA_ = gen_fp2_add();
-		prof_.set("Fp2_add", getCurr());
+		prof_.set("2_add", getCurr());
 
 		op.fp2_subA_ = gen_fp2_sub();
-		prof_.set("Fp2_sub", getCurr());
+		prof_.set("2_sub", getCurr());
 
 		op.fp2_negA_ = gen_fp2_neg();
-		prof_.set("Fp2_neg", getCurr());
+		prof_.set("2_neg", getCurr());
 
 		op.fp2_mulNF = 0;
 		op.fp2Dbl_mulPreA_ = gen_fp2Dbl_mulPre();
-		prof_.set("Fp2Dbl_mulPre", getCurr());
+		prof_.set("2Dbl_mulPre", getCurr());
 
 		op.fp2Dbl_sqrPreA_ = gen_fp2Dbl_sqrPre();
-		prof_.set("Fp2Dbl_sqrPre", getCurr());
+		prof_.set("2Dbl_sqrPre", getCurr());
 
 		op.fp2_mulA_ = gen_fp2_mul();
-		prof_.set("Fp2_mul", getCurr());
+		prof_.set("2_mul", getCurr());
 
 		op.fp2_sqrA_ = gen_fp2_sqr();
-		prof_.set("Fp2_sqr", getCurr());
+		prof_.set("2_sqr", getCurr());
 
 		op.fp2_mul_xiA_ = gen_fp2_mul_xi();
-		prof_.set("Fp2_mul_xi", getCurr());
+		prof_.set("2_mul_xi", getCurr());
 	}
 	u3u gen_addSubPre(bool isAdd, int n)
 	{

From 7146cfd0f425acb17f4aff1951dd307388da9075 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 15 Sep 2020 16:43:55 +0900
Subject: [PATCH 291/553] add dump_code

---
 Makefile             |  3 +++
 src/dump_code.cpp    |  7 +++++++
 src/fp_generator.hpp | 26 +++++++++++++++++++++++---
 3 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 src/dump_code.cpp

diff --git a/Makefile b/Makefile
index 346be6ab..75cca34c 100644
--- a/Makefile
+++ b/Makefile
@@ -237,6 +237,9 @@ endif
 $(GEN_EXE): src/gen.cpp src/llvm_gen.hpp
 	$(CXX) -o $@ $< $(CFLAGS)
 
+src/dump_code: src/dump_code.cpp src/fp.cpp src/fp_generator.hpp
+	$(CXX) -o $@ src/dump_code.cpp src/fp.cpp -I include -DMCL_FREEZE_JIT -DMCL_MAX_BIT_SIZE=384 -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER
+
 asm: $(LLVM_SRC)
 	$(LLVM_OPT) -O3 -o - $(LLVM_SRC) | $(LLVM_LLC) -O3 $(LLVM_FLAGS) -x86-asm-syntax=intel
 
diff --git a/src/dump_code.cpp b/src/dump_code.cpp
new file mode 100644
index 00000000..f1655e91
--- /dev/null
+++ b/src/dump_code.cpp
@@ -0,0 +1,7 @@
+#include <mcl/bls12_381.hpp>
+
+int main()
+{
+	mcl::bn::initPairing(mcl::BLS12_381);
+}
+
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 08d58445..5b00fa68 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -28,28 +28,31 @@
 namespace mcl {
 
 #ifdef MCL_FREEZE_JIT
+// not profiler, but dump jit code
 struct Profiler {
 	FILE *fp_;
 	const uint8_t *prev_;
 	std::string suf_;
 	Profiler()
-		: fp_(0)
+		: fp_(stdout)
 		, prev_(0)
 	{
 	}
 	~Profiler()
 	{
-		if (fp_) fclose(fp_);
+//		if (fp_) fclose(fp_);
 	}
+#if 0
 	void open(const std::string& fileName)
 	{
 		fp_ = fopen(fileName.c_str(), "wb");
 	}
+#endif
 	void setStartAddr(const uint8_t *addr)
 	{
 		prev_ = addr;
 	}
-	void setNameSuffix(const char *suf)
+	void setNameSuffix(const std::string& suf)
 	{
 		suf_ = suf;
 	}
@@ -71,6 +74,18 @@ struct Profiler {
 		}
 		prev_ = end;
 	}
+	void dumpData(const void *begin, const void *end)
+	{
+		fprintf(fp_, "align 16\n");
+		fprintf(fp_, "dq ");
+		const uint64_t *p = (const uint64_t*)begin;
+		const uint64_t *pe = (const uint64_t*)end;
+		const size_t n = pe - p;
+		for (size_t i = 0; i < n; i++) {
+			fprintf(fp_, "0x%016llx,", (unsigned long long)*p++);
+		}
+		fprintf(fp_, "\n");
+	}
 };
 #else
 typedef Xbyak::util::Profiler Profiler;
@@ -314,6 +329,11 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		for (size_t i = 0; i < op.N; i++) {
 			dq(op.p[i]);
 		}
+#ifdef MCL_FREEZE_JIT
+		prof_.dumpData(p_, getCurr());
+		prof_.setStartAddr(getCurr());
+		prof_.setNameSuffix(std::string("mclx_") + suf);
+#endif
 		rp_ = fp::getMontgomeryCoeff(p_[0]);
 		pn_ = (int)op.N;
 		FpByte_ = int(op.maxN * sizeof(uint64_t));

From c29157cc9a17a36e80cd73d4c32cd7d40220d508 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 15 Sep 2020 17:23:08 +0900
Subject: [PATCH 292/553] test static_code

---
 Makefile                  | 13 +++++++++++--
 src/fp_generator.hpp      |  6 +++---
 test/static_code_test.cpp | 11 +++++++++++
 3 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 test/static_code_test.cpp

diff --git a/Makefile b/Makefile
index 75cca34c..dbc2a411 100644
--- a/Makefile
+++ b/Makefile
@@ -238,8 +238,17 @@ $(GEN_EXE): src/gen.cpp src/llvm_gen.hpp
 	$(CXX) -o $@ $< $(CFLAGS)
 
 src/dump_code: src/dump_code.cpp src/fp.cpp src/fp_generator.hpp
-	$(CXX) -o $@ src/dump_code.cpp src/fp.cpp -I include -DMCL_FREEZE_JIT -DMCL_MAX_BIT_SIZE=384 -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER
+	$(CXX) -o $@ src/dump_code.cpp src/fp.cpp -I include -DMCL_DUMP_JIT -DMCL_MAX_BIT_SIZE=384 -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER
 
+src/static_code.asm: src/dump_code
+	$< > $@
+
+obj/static_code.o: src/static_code.asm
+	nasm -felf64 -o $@ $<
+
+bin/static_code_test.exe: test/static_code_test.cpp src/fp.cpp obj/static_code.o
+	$(CXX) -o $@ -O3 $^ -DMCL_STATIC_JIT -DMCL_MAX_BIT_SIZE=384 -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -I include -Wall -Wextra
+ 
 asm: $(LLVM_SRC)
 	$(LLVM_OPT) -O3 -o - $(LLVM_SRC) | $(LLVM_LLC) -O3 $(LLVM_FLAGS) -x86-asm-syntax=intel
 
@@ -391,7 +400,7 @@ update_cybozulib:
 	cp -a $(addprefix ../cybozulib/,$(wildcard include/cybozu/*.hpp)) include/cybozu/
 
 clean:
-	$(RM) $(LIB_DIR)/*.a $(LIB_DIR)/*.$(LIB_SUF) $(OBJ_DIR)/*.o $(OBJ_DIR)/*.obj $(OBJ_DIR)/*.d $(EXE_DIR)/*.exe $(GEN_EXE) $(ASM_OBJ) $(LIB_OBJ) $(BN256_OBJ) $(BN384_OBJ) $(BN512_OBJ) $(FUNC_LIST) src/*.ll lib/*.a
+	$(RM) $(LIB_DIR)/*.a $(LIB_DIR)/*.$(LIB_SUF) $(OBJ_DIR)/*.o $(OBJ_DIR)/*.obj $(OBJ_DIR)/*.d $(EXE_DIR)/*.exe $(GEN_EXE) $(ASM_OBJ) $(LIB_OBJ) $(BN256_OBJ) $(BN384_OBJ) $(BN512_OBJ) $(FUNC_LIST) src/*.ll lib/*.a src/static_code.asm src/dump_code
 
 ALL_SRC=$(SRC_SRC) $(TEST_SRC) $(SAMPLE_SRC)
 DEPEND_FILE=$(addprefix $(OBJ_DIR)/, $(addsuffix .d,$(basename $(ALL_SRC))))
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 5b00fa68..7a3771f0 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -23,11 +23,11 @@
 	#pragma warning(disable : 4458)
 #endif
 
-//#define MCL_FREEZE_JIT
+//#define MCL_DUMP_JIT
 
 namespace mcl {
 
-#ifdef MCL_FREEZE_JIT
+#ifdef MCL_DUMP_JIT
 // not profiler, but dump jit code
 struct Profiler {
 	FILE *fp_;
@@ -329,7 +329,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		for (size_t i = 0; i < op.N; i++) {
 			dq(op.p[i]);
 		}
-#ifdef MCL_FREEZE_JIT
+#ifdef MCL_DUMP_JIT
 		prof_.dumpData(p_, getCurr());
 		prof_.setStartAddr(getCurr());
 		prof_.setNameSuffix(std::string("mclx_") + suf);
diff --git a/test/static_code_test.cpp b/test/static_code_test.cpp
new file mode 100644
index 00000000..56d8420f
--- /dev/null
+++ b/test/static_code_test.cpp
@@ -0,0 +1,11 @@
+#include <mcl/bls12_381.hpp>
+
+using namespace mcl::bn;
+
+int main()
+{
+	initPairing(mcl::BLS12_381);
+	Fr x;
+	x = 3;
+	printf("%s\n", x.getStr(16).c_str());
+}

From f11b3be1ab34abe0b15e56e605749030202f22ae Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 16 Sep 2020 17:39:28 +0900
Subject: [PATCH 293/553] refactor DumpCode

---
 src/fp_generator.hpp      | 182 ++++++++++++++++++++------------------
 test/static_code_test.cpp |   8 +-
 2 files changed, 104 insertions(+), 86 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 7a3771f0..2ce90ba1 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -23,46 +23,34 @@
 	#pragma warning(disable : 4458)
 #endif
 
-//#define MCL_DUMP_JIT
-
 namespace mcl {
 
+#ifdef MCL_STATIC_JIT
+typedef fp::Unit Unit;
+extern "C" {
+Unit mclx_Fr_addPre(Unit*, const Unit*, const Unit*);
+void mclx_Fr_add(Unit*, const Unit*, const Unit*);
+
+Unit mclx_Fp_addPre(Unit*, const Unit*, const Unit*);
+void mclx_Fp_add(Unit*, const Unit*, const Unit*);
+}
+#endif
+
 #ifdef MCL_DUMP_JIT
-// not profiler, but dump jit code
-struct Profiler {
+struct DumpCode {
 	FILE *fp_;
-	const uint8_t *prev_;
-	std::string suf_;
-	Profiler()
+	DumpCode()
 		: fp_(stdout)
-		, prev_(0)
-	{
-	}
-	~Profiler()
-	{
-//		if (fp_) fclose(fp_);
-	}
-#if 0
-	void open(const std::string& fileName)
 	{
-		fp_ = fopen(fileName.c_str(), "wb");
 	}
-#endif
-	void setStartAddr(const uint8_t *addr)
+	void set(const std::string& name, const uint8_t *begin, const size_t size)
 	{
-		prev_ = addr;
-	}
-	void setNameSuffix(const std::string& suf)
-	{
-		suf_ = suf;
-	}
-	void set(const char *name, const uint8_t *end)
-	{
-		fprintf(fp_, "global %s%s\n", suf_.c_str(), name);
+		fprintf(fp_, "segment .text\n");
+		fprintf(fp_, "global %s\n", name.c_str());
 		fprintf(fp_, "align 16\n");
-		fprintf(fp_, "%s%s:\n", suf_.c_str(), name);
-		const uint8_t *p = prev_;
-		size_t remain = end - prev_;
+		fprintf(fp_, "%s:\n", name.c_str());
+		const uint8_t *p = begin;
+		size_t remain = size;
 		while (remain > 0) {
 			size_t n = remain >= 16 ? 16 : remain;
 			fprintf(fp_, "db ");
@@ -72,7 +60,6 @@ struct Profiler {
 			fprintf(fp_, "\n");
 			remain -= n;
 		}
-		prev_ = end;
 	}
 	void dumpData(const void *begin, const void *end)
 	{
@@ -87,8 +74,19 @@ struct Profiler {
 		fprintf(fp_, "\n");
 	}
 };
+template<class T>
+void setFuncInfo(DumpCode& prof, const char *suf, const char *name, const T& begin, const uint8_t* end)
+{
+	const uint8_t*p = (const uint8_t*)begin;
+	prof.set(std::string("mclx_") + suf + name, p, end - p);
+}
 #else
-typedef Xbyak::util::Profiler Profiler;
+template<class T>
+void setFuncInfo(Xbyak::util::Profiler& prof, const char *suf, const char *name, const T& begin, const uint8_t* end)
+{
+	const uint8_t*p = (const uint8_t*)begin;
+	prof.set((std::string("mclx_") + suf + name).c_str(), p, end - p);
+}
 #endif
 
 namespace fp_gen_local {
@@ -269,7 +267,11 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	int pn_;
 	int FpByte_;
 	bool isFullBit_;
-	Profiler prof_;
+#ifdef MCL_DUMP_JIT
+	DumpCode prof_;
+#else
+	Xbyak::util::Profiler prof_;
+#endif
 
 	/*
 		@param op [in] ; use op.p, op.N, op.isFullBit
@@ -331,8 +333,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		}
 #ifdef MCL_DUMP_JIT
 		prof_.dumpData(p_, getCurr());
-		prof_.setStartAddr(getCurr());
-		prof_.setNameSuffix(std::string("mclx_") + suf);
 #endif
 		rp_ = fp::getMontgomeryCoeff(p_[0]);
 		pn_ = (int)op.N;
@@ -351,97 +351,130 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		if (profMode) {
 			prof_.init(profMode);
 			prof_.setStartAddr(getCurr());
-			if (suf == 0) suf = "fp";
-			prof_.setNameSuffix(suf);
-			suf[1]++;
 		}
 #else
 		(void)suf;
 #endif
 
+		align(16);
 		op.fp_addPre = gen_addSubPre(true, pn_);
-		prof_.set("_addPre", getCurr());
+		setFuncInfo(prof_, suf, "_addPre", op.fp_addPre, getCurr());
 
+		align(16);
 		op.fp_subPre = gen_addSubPre(false, pn_);
-		prof_.set("_subPre", getCurr());
+		setFuncInfo(prof_, suf, "_subPre", op.fp_subPre, getCurr());
 
+		align(16);
 		op.fp_addA_ = gen_fp_add();
-		prof_.set("_add", getCurr());
+		setFuncInfo(prof_, suf, "_add", op.fp_addA_, getCurr());
 
 		op.fp_subA_ = gen_fp_sub();
-		prof_.set("_sub", getCurr());
+		setFuncInfo(prof_, suf, "_sub", op.fp_subA_, getCurr());
+		align(16);
 
 		op.fp_shr1 = gen_shr1();
-		prof_.set("_shr1", getCurr());
+		setFuncInfo(prof_, suf, "_shr1", op.fp_shr1, getCurr());
+		align(16);
 
 		op.fp_negA_ = gen_fp_neg();
-		prof_.set("_neg", getCurr());
+		setFuncInfo(prof_, suf, "_neg", op.fp_negA_, getCurr());
+		align(16);
 
 		op.fpDbl_addA_ = gen_fpDbl_add();
-		prof_.set("Dbl_add", getCurr());
+		setFuncInfo(prof_, suf, "Dbl_add", op.fpDbl_addA_, getCurr());
+		align(16);
 
 		op.fpDbl_subA_ = gen_fpDbl_sub();
-		prof_.set("Dbl_sub", getCurr());
+		setFuncInfo(prof_, suf, "Dbl_sub", op.fpDbl_subA_, getCurr());
+		align(16);
 
 		op.fpDbl_addPre = gen_addSubPre(true, pn_ * 2);
-		prof_.set("Dbl_addPre", getCurr());
+		setFuncInfo(prof_, suf, "Dbl_addPre", op.fpDbl_addPre, getCurr());
+		align(16);
 
 		op.fpDbl_subPre = gen_addSubPre(false, pn_ * 2);
-		prof_.set("Dbl_subPre", getCurr());
+		setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr());
+		align(16);
 
 		op.fpDbl_mulPreA_ = gen_fpDbl_mulPre();
-		prof_.set("Dbl_mulPre", getCurr());
+		setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPreA_, getCurr());
+		align(16);
 
 		op.fpDbl_sqrPreA_ = gen_fpDbl_sqrPre();
-		prof_.set("Dbl_sqrPre", getCurr());
+		setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPreA_, getCurr());
+		align(16);
 
 		op.fpDbl_modA_ = gen_fpDbl_mod(op);
-		prof_.set("Dbl_mod", getCurr());
+		setFuncInfo(prof_, suf, "Dbl_mod", op.fpDbl_modA_, getCurr());
+		align(16);
 
 		op.fp_mulA_ = gen_mul();
-		prof_.set("_mul", getCurr());
+		setFuncInfo(prof_, suf, "_mul", op.fp_mulA_, getCurr());
+		align(16);
+
 		if (op.fp_mulA_) {
 			op.fp_mul = fp::func_ptr_cast<void4u>(op.fp_mulA_); // used in toMont/fromMont
 		}
+
 		op.fp_sqrA_ = gen_sqr();
-		prof_.set("_sqr", getCurr());
+		setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr());
+		align(16);
 
 		if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4
-			align(16);
 			op.fp_preInv = getCurr<int2u>();
 			gen_preInv();
-			prof_.set("_preInv", getCurr());
+			setFuncInfo(prof_, suf, "_preInv", op.fp_preInv, getCurr());
+			align(16);
 		}
 		if (op.xi_a == 0) return; // Fp2 is not used
 		op.fp2_addA_ = gen_fp2_add();
-		prof_.set("2_add", getCurr());
+		setFuncInfo(prof_, suf, "2_add", op.fp2_addA_, getCurr());
+		align(16);
 
 		op.fp2_subA_ = gen_fp2_sub();
-		prof_.set("2_sub", getCurr());
+		setFuncInfo(prof_, suf, "2_sub", op.fp2_subA_, getCurr());
+		align(16);
 
 		op.fp2_negA_ = gen_fp2_neg();
-		prof_.set("2_neg", getCurr());
+		setFuncInfo(prof_, suf, "2_neg", op.fp2_negA_, getCurr());
+		align(16);
 
 		op.fp2_mulNF = 0;
 		op.fp2Dbl_mulPreA_ = gen_fp2Dbl_mulPre();
-		prof_.set("2Dbl_mulPre", getCurr());
+		if (op.fp2Dbl_mulPreA_) setFuncInfo(prof_, suf, "2Dbl_mulPre", op.fp2Dbl_mulPreA_, getCurr());
+		align(16);
 
 		op.fp2Dbl_sqrPreA_ = gen_fp2Dbl_sqrPre();
-		prof_.set("2Dbl_sqrPre", getCurr());
+		if (op.fp2Dbl_sqrPreA_) setFuncInfo(prof_, suf, "2Dbl_sqrPre", op.fp2Dbl_sqrPreA_, getCurr());
+		align(16);
 
 		op.fp2_mulA_ = gen_fp2_mul();
-		prof_.set("2_mul", getCurr());
+		setFuncInfo(prof_, suf, "2_mul", op.fp2_mulA_, getCurr());
+		align(16);
 
 		op.fp2_sqrA_ = gen_fp2_sqr();
-		prof_.set("2_sqr", getCurr());
+		setFuncInfo(prof_, suf, "2_sqr", op.fp2_sqrA_, getCurr());
+		align(16);
 
 		op.fp2_mul_xiA_ = gen_fp2_mul_xi();
-		prof_.set("2_mul_xi", getCurr());
+		setFuncInfo(prof_, suf, "2_mul_xi", op.fp2_mul_xiA_, getCurr());
+		align(16);
+
+#ifdef MCL_STATIC_JIT
+		const bool isFp = strcmp(suf, "Fp") == 0;
+printf("isFp=%d\n", isFp);
+		if (isFp) {
+			op.fp_addPre = mclx_Fp_addPre;
+			op.fp_addA_ = mclx_Fr_add;
+		} else {
+			op.fp_addPre = mclx_Fr_addPre;
+			op.fp_addA_ = mclx_Fr_add;
+		}
+#endif
 	}
 	u3u gen_addSubPre(bool isAdd, int n)
 	{
 //		if (isFullBit_) return 0;
-		align(16);
 		u3u func = getCurr<u3u>();
 		StackFrame sf(this, 3);
 		if (isAdd) {
@@ -721,7 +754,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fp_add()
 	{
-		align(16);
 		void3u func = getCurr<void3u>();
 		if (pn_ <= 4) {
 			gen_fp_add_le4();
@@ -769,7 +801,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fpDbl_add()
 	{
-		align(16);
 		void3u func = getCurr<void3u>();
 		if (pn_ <= 4) {
 			int tn = pn_ * 2 + (isFullBit_ ? 1 : 0);
@@ -797,7 +828,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fpDbl_sub()
 	{
-		align(16);
 		void3u func = getCurr<void3u>();
 		if (pn_ <= 4) {
 			int tn = pn_ * 2;
@@ -847,7 +877,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fp_sub()
 	{
-		align(16);
 		void3u func = getCurr<void3u>();
 		if (pn_ <= 4) {
 			gen_fp_sub_le4();
@@ -872,7 +901,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void2u gen_fp_neg()
 	{
-		align(16);
 		void2u func = getCurr<void2u>();
 		StackFrame sf(this, 2, UseRDX | pn_);
 		gen_raw_neg(sf.p[0], sf.p[1], sf.t);
@@ -880,7 +908,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void2u gen_shr1()
 	{
-		align(16);
 		void2u func = getCurr<void2u>();
 		const int c = 1;
 		StackFrame sf(this, 2, 1);
@@ -901,7 +928,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_mul()
 	{
-		align(16);
 		void3u func = getCurr<void3u>();
 		if (op_->primeMode == PM_NIST_P192) {
 			StackFrame sf(this, 3, 10 | UseRDX, 8 * 6);
@@ -1214,7 +1240,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void2u gen_fpDbl_mod(const fp::Op& op)
 	{
-		align(16);
 		void2u func = getCurr<void2u>();
 		if (op.primeMode == PM_NIST_P192) {
 			StackFrame sf(this, 2, 6 | UseRDX);
@@ -1260,7 +1285,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void2u gen_sqr()
 	{
-		align(16);
 		void2u func = getCurr<void2u>();
 		if (op_->primeMode == PM_NIST_P192) {
 			StackFrame sf(this, 3, 10 | UseRDX, 6 * 8);
@@ -2364,7 +2388,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void2u gen_fpDbl_sqrPre()
 	{
-		align(16);
 		void2u func = getCurr<void2u>();
 		if (pn_ == 2 && useMulx_) {
 			StackFrame sf(this, 2, 7 | UseRDX);
@@ -2405,7 +2428,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fpDbl_mulPre()
 	{
-		align(16);
 		void3u func = getCurr<void3u>();
 		if (pn_ == 2 && useMulx_) {
 			StackFrame sf(this, 3, 5 | UseRDX);
@@ -3446,7 +3468,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 //		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
 		// almost same for pn_ == 6
 		if (pn_ != 4) return 0;
-		align(16);
 		void3u func = getCurr<void3u>();
 
 		const RegExp z = rsp + 0 * 8;
@@ -3511,7 +3532,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 //		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
 		// almost same for pn_ == 6
 		if (pn_ != 4) return 0;
-		align(16);
 		void2u func = getCurr<void2u>();
 		// almost same for pn_ == 6
 		if (pn_ != 4) return 0;
@@ -3597,7 +3617,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fp2_add()
 	{
-		align(16);
 		void3u func = getCurr<void3u>();
 		if (pn_ == 4 && !isFullBit_) {
 			gen_fp2_add4();
@@ -3611,7 +3630,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fp2_sub()
 	{
-		align(16);
 		void3u func = getCurr<void3u>();
 		if (pn_ == 4 && !isFullBit_) {
 			gen_fp2_sub4();
@@ -3697,7 +3715,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	{
 		if (isFullBit_) return 0;
 		if (op_->xi_a != 1) return 0;
-		align(16);
 		void2u func = getCurr<void2u>();
 		if (pn_ == 4) {
 			gen_fp2_mul_xi4();
@@ -3711,7 +3728,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void2u gen_fp2_neg()
 	{
-		align(16);
 		void2u func = getCurr<void2u>();
 		if (pn_ <= 6) {
 			StackFrame sf(this, 2, UseRDX | pn_);
@@ -3725,7 +3741,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	{
 		if (isFullBit_) return 0;
 		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
-		align(16);
 		void3u func = getCurr<void3u>();
 		bool embedded = pn_ == 4;
 
@@ -3802,7 +3817,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	{
 		if (isFullBit_) return 0;
 		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
-		align(16);
 		void2u func = getCurr<void2u>();
 
 		const RegExp y = rsp + 0 * 8;
diff --git a/test/static_code_test.cpp b/test/static_code_test.cpp
index 56d8420f..93dc223f 100644
--- a/test/static_code_test.cpp
+++ b/test/static_code_test.cpp
@@ -5,7 +5,11 @@ using namespace mcl::bn;
 int main()
 {
 	initPairing(mcl::BLS12_381);
-	Fr x;
+	Fp x, y, z;
 	x = 3;
-	printf("%s\n", x.getStr(16).c_str());
+	y = 5;
+	z = x + y;
+	printf("x=%s\n", x.getStr(16).c_str());
+	printf("y=%s\n", y.getStr(16).c_str());
+	printf("z=%s\n", z.getStr(16).c_str());
 }

From 0c6b2c59b8630a3e74aca267d546b52e3c3a1ad4 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 16 Sep 2020 17:49:01 +0900
Subject: [PATCH 294/553] use rip instead of abs addr

---
 src/fp_generator.hpp | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 2ce90ba1..a7536e6e 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -77,6 +77,7 @@ struct DumpCode {
 template<class T>
 void setFuncInfo(DumpCode& prof, const char *suf, const char *name, const T& begin, const uint8_t* end)
 {
+	if (suf == 0) suf = "";
 	const uint8_t*p = (const uint8_t*)begin;
 	prof.set(std::string("mclx_") + suf + name, p, end - p);
 }
@@ -84,6 +85,7 @@ void setFuncInfo(DumpCode& prof, const char *suf, const char *name, const T& beg
 template<class T>
 void setFuncInfo(Xbyak::util::Profiler& prof, const char *suf, const char *name, const T& begin, const uint8_t* end)
 {
+	if (suf == 0) suf = "";
 	const uint8_t*p = (const uint8_t*)begin;
 	prof.set((std::string("mclx_") + suf + name).c_str(), p, end - p);
 }
@@ -535,7 +537,7 @@ printf("isFp=%d\n", isFp);
 		}
 		jmp(exit);
 	L(nonZero);
-		mov(rax, pL_);
+		lea(rax, ptr[rip+pL_]);
 		for (size_t i = 0; i < t.size(); i++) {
 			mov(rdx, ptr [rax + i * 8]);
 			if (i == 0) {
@@ -663,7 +665,7 @@ printf("isFp=%d\n", isFp);
 			mov(*fullReg, 0);
 			adc(*fullReg, 0);
 		}
-		mov(rax, pL_);
+		lea(rax, ptr[rip+pL_]);
 		sub_rm(p1, rax);
 		if (fullReg) {
 			sbb(*fullReg, 0);
@@ -683,7 +685,7 @@ printf("isFp=%d\n", isFp);
 		const Pack& p1 = t.sub(pn_, pn_);
 		load_rm(p0, px);
 		sub_rm(p0, py, withCarry);
-		mov(rax, pL_);
+		lea(rax, ptr[rip+pL_]);
 		load_rm(p1, rax);
 		sbb(rax, rax); // rax = (x > y) ? 0 : -1
 		for (size_t i = 0; i < p1.size(); i++) {
@@ -724,7 +726,7 @@ printf("isFp=%d\n", isFp);
 		Label exit;
 		if (isFullBit_) {
 			jnc("@f");
-			mov(t2[0], pL_); // t2 is not used
+			lea(t2[0], ptr[rip+pL_]); // t2[0] is not used
 			sub_rm(t1, t2[0]);
 			jmp(exit);
 		L("@@");
@@ -771,7 +773,7 @@ printf("isFp=%d\n", isFp);
 
 		inLocalLabel();
 		gen_raw_add(pz, px, py, rax, pn_);
-		mov(px, pL_); // destroy px
+		lea(px, ptr[rip+pL_]);
 		if (isFullBit_) {
 			jc(".over", jmpMode);
 		}
@@ -894,7 +896,7 @@ printf("isFp=%d\n", isFp);
 		Label exit;
 		gen_raw_sub(pz, px, py, rax, pn_);
 		jnc(exit, jmpMode);
-		mov(px, pL_);
+		lea(px, ptr[rip+pL_]);
 		gen_raw_add(pz, pz, px, rax, pn_);
 	L(exit);
 		return func;
@@ -1000,7 +1002,7 @@ printf("isFp=%d\n", isFp);
 
 		mov(a, rp_);
 		mul(t6);
-		mov(t0, pL_);
+		lea(t0, ptr[rip+pL_]);
 		mov(t7, a); // q
 
 		// [d:t7:t1] = p * q
@@ -1069,7 +1071,7 @@ printf("isFp=%d\n", isFp);
 
 		mov(a, rp_);
 		mul(t10);
-		mov(t0, pL_);
+		lea(t0, ptr[rip+pL_]);
 		mov(t7, a); // q
 
 		// [d:t7:t2:t1] = p * q
@@ -1149,7 +1151,7 @@ printf("isFp=%d\n", isFp);
 
 		mov(a, rp_);
 		mul(z);
-		mov(t0, pL_);
+		lea(t0, ptr[rip+pL_]);
 		mov(t7, a); // q
 
 		// [d:t7:t3:t2:t1] = p * q
@@ -1405,7 +1407,7 @@ printf("isFp=%d\n", isFp);
 
 	L(fp_mulL);
 		vmovq(xm0, p0); // save p0
-		mov(p0, pL_);
+		lea(p0, ptr[rip+pL_]);
 		vmovq(xm1, p2);
 		mov(p2, ptr [p2]);
 		montgomery4_1(rp_, t0, t7, t3, t2, t1, p1, p2, p0, t4, t5, t6, t8, t9, true, xm2);
@@ -1501,7 +1503,7 @@ printf("isFp=%d\n", isFp);
 		mov(a, rp_);
 		mul(c[0]); // q = a
 		mov(d, a);
-		mov(t1, pL_);
+		lea(t1, ptr[rip+pL_]);
 		// c += p * q
 		mulAdd(c, 6, t1);
 	}
@@ -1547,7 +1549,7 @@ printf("isFp=%d\n", isFp);
 		const Pack z = Pack(t3, t2, t1, t0, t7, t6);
 		const Pack keep = Pack(rdx, rax, px, py, t8, t9);
 		mov_rr(keep, z);
-		mov(t5, pL_);
+		lea(t5, ptr[rip+pL_]);
 		sub_rm(z, t5);
 		cmovc_rr(z, keep);
 		store_mr(pz, z);
@@ -1577,7 +1579,7 @@ printf("isFp=%d\n", isFp);
 		const Reg64& t9 = sf.t[9];
 
 		vmovq(xm0, p0); // save p0
-		mov(t7, pL_);
+		lea(t7, ptr[rip+pL_]);
 		mov(t9, ptr [p2]);
 		//                c3, c2, c1, c0, px, y,  p,
 		montgomery3_1(rp_, t0, t3, t2, t1, p1, t9, t7, t4, t5, t6, t8, p0, true);
@@ -1623,7 +1625,7 @@ printf("isFp=%d\n", isFp);
 		const Reg64& t9 = sf.t[9];
 
 		vmovq(xm0, pz); // save pz
-		mov(t7, pL_);
+		lea(t7, ptr[rip+pL_]);
 		mov(t9, ptr [px]);
 		mul3x1_sqr1(px, t9, t3, t2, t1, t0);
 		mov(t0, rdx);

From 3768ebfedf27cbf94b572ad900131e931bed7268 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 16 Sep 2020 18:39:41 +0900
Subject: [PATCH 295/553] test mclx_Fp_mul

---
 src/fp_generator.hpp      | 93 ++++++++++++++++++++++++++++++++++++---
 test/bench.hpp            |  1 +
 test/static_code_test.cpp | 35 ++++++++++++---
 3 files changed, 118 insertions(+), 11 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index a7536e6e..2feaf7bc 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -28,11 +28,53 @@ namespace mcl {
 #ifdef MCL_STATIC_JIT
 typedef fp::Unit Unit;
 extern "C" {
-Unit mclx_Fr_addPre(Unit*, const Unit*, const Unit*);
-void mclx_Fr_add(Unit*, const Unit*, const Unit*);
-
 Unit mclx_Fp_addPre(Unit*, const Unit*, const Unit*);
+Unit mclx_Fp_subPre(Unit*, const Unit*, const Unit*);
 void mclx_Fp_add(Unit*, const Unit*, const Unit*);
+void mclx_Fp_sub(Unit*, const Unit*, const Unit*);
+void mclx_Fp_shr1(Unit*, const Unit*);
+void mclx_Fp_neg(Unit*, const Unit*);
+void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
+void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
+void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
+void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
+Unit mclx_FpDbl_addPre(Unit*, const Unit*, const Unit*);
+Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*);
+void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*);
+void mclx_FpDbl_sqrPre(Unit*, const Unit*);
+void mclx_FpDbl_mod(Unit*, const Unit*);
+void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
+void mclx_Fp_sqr(Unit*, const Unit*);
+void mclx_Fp2_add(Unit*, const Unit*, const Unit*);
+void mclx_Fp2_sub(Unit*, const Unit*, const Unit*);
+void mclx_Fp2_neg(Unit*, const Unit*);
+void mclx_Fp2_mul(Unit*, const Unit*, const Unit*);
+void mclx_Fp2_sqr(Unit*, const Unit*);
+void mclx_Fp2_mul_xi(Unit*, const Unit*);
+
+Unit mclx_Fr_addPre(Unit*, const Unit*, const Unit*);
+Unit mclx_Fr_subPre(Unit*, const Unit*, const Unit*);
+void mclx_Fr_add(Unit*, const Unit*, const Unit*);
+void mclx_Fr_sub(Unit*, const Unit*, const Unit*);
+void mclx_Fr_shr1(Unit*, const Unit*);
+void mclx_Fr_neg(Unit*, const Unit*);
+void mclx_FrDbl_add(Unit*, const Unit*, const Unit*);
+void mclx_FrDbl_sub(Unit*, const Unit*, const Unit*);
+void mclx_FrDbl_add(Unit*, const Unit*, const Unit*);
+void mclx_FrDbl_sub(Unit*, const Unit*, const Unit*);
+Unit mclx_FrDbl_addPre(Unit*, const Unit*, const Unit*);
+Unit mclx_FrDbl_subPre(Unit*, const Unit*, const Unit*);
+void mclx_FrDbl_mulPre(Unit*, const Unit*, const Unit*);
+void mclx_FrDbl_sqrPre(Unit*, const Unit*);
+void mclx_FrDbl_mod(Unit*, const Unit*);
+void mclx_Fr_mul(Unit*, const Unit*, const Unit*);
+void mclx_Fr_sqr(Unit*, const Unit*);
+void mclx_Fr2_add(Unit*, const Unit*, const Unit*);
+void mclx_Fr2_sub(Unit*, const Unit*, const Unit*);
+void mclx_Fr2_neg(Unit*, const Unit*);
+void mclx_Fr2_mul(Unit*, const Unit*, const Unit*);
+void mclx_Fr2_sqr(Unit*, const Unit*);
+void mclx_Fr2_mul_xi(Unit*, const Unit*);
 }
 #endif
 
@@ -327,6 +369,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 private:
 	void init_inner(Op& op, const char *suf)
 	{
+		const bool isFp = suf && suf[0] == 'F' && suf[1] == 'p';
 		op_ = &op;
 		L(pL_);
 		p_ = reinterpret_cast<const uint64_t*>(getCurr());
@@ -382,6 +425,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		setFuncInfo(prof_, suf, "_neg", op.fp_negA_, getCurr());
 		align(16);
 
+if (op.xi_a) {
 		op.fpDbl_addA_ = gen_fpDbl_add();
 		setFuncInfo(prof_, suf, "Dbl_add", op.fpDbl_addA_, getCurr());
 		align(16);
@@ -409,6 +453,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		op.fpDbl_modA_ = gen_fpDbl_mod(op);
 		setFuncInfo(prof_, suf, "Dbl_mod", op.fpDbl_modA_, getCurr());
 		align(16);
+}
 
 		op.fp_mulA_ = gen_mul();
 		setFuncInfo(prof_, suf, "_mul", op.fp_mulA_, getCurr());
@@ -463,14 +508,50 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		align(16);
 
 #ifdef MCL_STATIC_JIT
-		const bool isFp = strcmp(suf, "Fp") == 0;
-printf("isFp=%d\n", isFp);
 		if (isFp) {
+			// Fp, sizeof(Fp) = 48
 			op.fp_addPre = mclx_Fp_addPre;
-			op.fp_addA_ = mclx_Fr_add;
+			op.fp_subPre = mclx_Fp_subPre;
+			op.fp_addA_ = mclx_Fp_add;
+			op.fp_subA_ = mclx_Fp_sub;
+			op.fp_shr1 = mclx_Fp_shr1;
+			op.fp_negA_ = mclx_Fp_neg;
+			op.fpDbl_addA_ = mclx_FpDbl_add;
+			op.fpDbl_subA_ = mclx_FpDbl_sub;
+			op.fpDbl_addPre = mclx_FpDbl_addPre;
+			op.fpDbl_subPre = mclx_FpDbl_subPre;
+			op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre;
+			op.fpDbl_sqrPreA_ = mclx_FpDbl_sqrPre;
+			op.fpDbl_modA_ = mclx_FpDbl_mod;
+			op.fp_mulA_ = mclx_Fp_mul;
+			op.fp_sqrA_ = mclx_Fp_sqr;
+#if 0
+//			op.fp_preInv = mclx_Fp_preInv;
+			op.fp2_addA_ = mclx_Fp2_add;
+			op.fp2_subA_ = mclx_Fp2_sub;
+			op.fp2_negA_ = mclx_Fp2_neg;
+			op.fp2_mulA_ = mclx_Fp2_mul;
+			op.fp2_sqrA_ = mclx_Fp2_sqr;
+			op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
+#endif
 		} else {
+			// Fr, sizeof(Fr) = 32
 			op.fp_addPre = mclx_Fr_addPre;
+			op.fp_subPre = mclx_Fr_subPre;
 			op.fp_addA_ = mclx_Fr_add;
+			op.fp_subA_ = mclx_Fr_sub;
+			op.fp_shr1 = mclx_Fr_shr1;
+			op.fp_negA_ = mclx_Fr_neg;
+			op.fpDbl_addA_ = mclx_FpDbl_add;
+			op.fpDbl_subA_ = mclx_FpDbl_sub;
+			op.fpDbl_addPre = mclx_FpDbl_addPre;
+			op.fpDbl_subPre = mclx_FpDbl_subPre;
+			op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre;
+			op.fpDbl_sqrPreA_ = mclx_FpDbl_sqrPre;
+			op.fpDbl_modA_ = mclx_FpDbl_mod;
+			op.fp_mulA_ = mclx_Fr_mul;
+			op.fp_sqrA_ = mclx_Fr_sqr;
+			op.fp_preInv = mclx_Fr_preInv;
 		}
 #endif
 	}
diff --git a/test/bench.hpp b/test/bench.hpp
index c8c3911b..b4a8bd29 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -100,6 +100,7 @@ void testBench(const G1& P, const G2& Q)
 	CYBOZU_BENCH_C("Fp::mul       ", C3, Fp::mul, x, x, y);
 	CYBOZU_BENCH_C("Fp::sqr       ", C3, Fp::sqr, x, x);
 	CYBOZU_BENCH_C("Fp::inv       ", C3, Fp::inv, x, x);
+	CYBOZU_BENCH_C("Fp::pow       ", C3, Fp::pow, x, x, y);
 	Fp2 xx, yy;
 	xx.a = x;
 	xx.b = 3;
diff --git a/test/static_code_test.cpp b/test/static_code_test.cpp
index 93dc223f..e69fda7b 100644
--- a/test/static_code_test.cpp
+++ b/test/static_code_test.cpp
@@ -2,14 +2,39 @@
 
 using namespace mcl::bn;
 
-int main()
+void testFr()
+{
+	Fr x, y, z;
+	x = 3;
+	y = 5;
+	z = x + y;
+	printf("x=%s\n", x.getStr().c_str());
+	printf("y=%s\n", y.getStr().c_str());
+	printf("z=%s\n", z.getStr().c_str());
+	z = x * y;
+	printf("z=%s\n", z.getStr().c_str());
+	Fr::sqr(z, x);
+	printf("z=%s\n", z.getStr().c_str());
+}
+
+void testFp()
 {
-	initPairing(mcl::BLS12_381);
 	Fp x, y, z;
 	x = 3;
 	y = 5;
 	z = x + y;
-	printf("x=%s\n", x.getStr(16).c_str());
-	printf("y=%s\n", y.getStr(16).c_str());
-	printf("z=%s\n", z.getStr(16).c_str());
+	printf("x=%s\n", x.getStr().c_str());
+	printf("y=%s\n", y.getStr().c_str());
+	printf("z=%s\n", z.getStr().c_str());
+	z = x * y;
+	printf("z=%s\n", z.getStr().c_str());
+	Fp::sqr(z, x);
+	printf("z=%s\n", z.getStr().c_str());
+}
+
+int main()
+{
+	initPairing(mcl::BLS12_381);
+	testFr();
+	testFp();
 }

From df3e118538c40072e6b1a4cab65ff07d18b62fd2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 17 Sep 2020 10:16:53 +0900
Subject: [PATCH 296/553] remove suf

---
 include/mcl/bn.hpp   |   4 +-
 include/mcl/fp.hpp   |  12 ++---
 include/mcl/op.hpp   |   2 +-
 src/fp.cpp           |  10 ++--
 src/fp_generator.hpp | 118 ++++++++++++++++---------------------------
 5 files changed, 56 insertions(+), 90 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 8710f554..3668da26 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -875,9 +875,9 @@ struct Param {
 			assert((p % 6) == 1);
 			r = local::evalPoly(z, rCoff);
 		}
-		Fr::init(pb, r, mode, "Fr");
+		Fr::init(pb, r, mode);
 		if (!*pb) return;
-		Fp::init(pb, cp.xi_a, p, mode, "Fp");
+		Fp::init(pb, cp.xi_a, p, mode);
 		if (!*pb) return;
 		Fp2::init();
 		const Fp2 xi(cp.xi_a, 1);
diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index c8b5a6d8..6c5b0b05 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -130,10 +130,10 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 		xi_a is used for Fp2::mul_xi(), where xi = xi_a + i and i^2 = -1
 		if xi_a = 0 then asm functions for Fp2 are not generated.
 	*/
-	static inline void init(bool *pb, int xi_a, const mpz_class& p, fp::Mode mode = fp::FP_AUTO, const char *suf = 0)
+	static inline void init(bool *pb, int xi_a, const mpz_class& p, fp::Mode mode = fp::FP_AUTO)
 	{
 		assert(maxBitSize <= MCL_MAX_BIT_SIZE);
-		*pb = op_.init(p, maxBitSize, xi_a, mode, suf);
+		*pb = op_.init(p, maxBitSize, xi_a, mode);
 		if (!*pb) return;
 		{ // set oneRep
 			FpT& one = *reinterpret_cast<FpT*>(op_.oneRep);
@@ -163,16 +163,16 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 #endif
 		*pb = true;
 	}
-	static inline void init(bool *pb, const mpz_class& p, fp::Mode mode = fp::FP_AUTO, const char *suf = 0)
+	static inline void init(bool *pb, const mpz_class& p, fp::Mode mode = fp::FP_AUTO)
 	{
-		init(pb, 0, p, mode, suf);
+		init(pb, 0, p, mode);
 	}
-	static inline void init(bool *pb, const char *mstr, fp::Mode mode = fp::FP_AUTO, const char *suf = 0)
+	static inline void init(bool *pb, const char *mstr, fp::Mode mode = fp::FP_AUTO)
 	{
 		mpz_class p;
 		gmp::setStr(pb, p, mstr);
 		if (!*pb) return;
-		init(pb, p, mode, suf);
+		init(pb, p, mode);
 	}
 	static inline size_t getModulo(char *buf, size_t bufSize)
 	{
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 45320e50..99c0e4d8 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -364,7 +364,7 @@ struct Op {
 		*/
 		fp_mul(y, x, R2, p);
 	}
-	bool init(const mpz_class& p, size_t maxBitSize, int xi_a, Mode mode, const char *suf = 0, size_t mclMaxBitSize = MCL_MAX_BIT_SIZE);
+	bool init(const mpz_class& p, size_t maxBitSize, int xi_a, Mode mode, size_t mclMaxBitSize = MCL_MAX_BIT_SIZE);
 #ifdef MCL_USE_XBYAK
 	static FpGenerator* createFpGenerator();
 	static void destroyFpGenerator(FpGenerator *fg);
diff --git a/src/fp.cpp b/src/fp.cpp
index 998a53b6..b3b07d19 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -346,7 +346,7 @@ static void initInvTbl(Op& op)
 }
 #endif
 
-static bool initForMont(Op& op, const Unit *p, Mode mode, const char *suf)
+static bool initForMont(Op& op, const Unit *p, Mode mode)
 {
 	const size_t N = op.N;
 	bool b;
@@ -366,19 +366,17 @@ static bool initForMont(Op& op, const Unit *p, Mode mode, const char *suf)
 	if (mode != FP_XBYAK) return true;
 #ifdef MCL_USE_XBYAK
 	if (op.fg == 0) op.fg = Op::createFpGenerator();
-	bool useXbyak = op.fg->init(op, suf);
+	bool useXbyak = op.fg->init(op);
 
 	if (useXbyak && op.isMont && N <= 4) {
 		op.fp_invOp = &invOpForMontC;
 		initInvTbl(op);
 	}
-#else
-	(void)suf;
 #endif
 	return true;
 }
 
-bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, const char *suf, size_t mclMaxBitSize)
+bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size_t mclMaxBitSize)
 {
 	if (mclMaxBitSize != MCL_MAX_BIT_SIZE) return false;
 #ifdef MCL_USE_VINT
@@ -536,7 +534,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, cons
 		if (!b) return false;
 	}
 	modp.init(mp);
-	return fp::initForMont(*this, p, mode, suf);
+	return fp::initForMont(*this, p, mode);
 }
 
 void copyUnitToByteAsLE(uint8_t *dst, const Unit *src, size_t byteSize)
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 2feaf7bc..f4a626a5 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -34,6 +34,8 @@ void mclx_Fp_add(Unit*, const Unit*, const Unit*);
 void mclx_Fp_sub(Unit*, const Unit*, const Unit*);
 void mclx_Fp_shr1(Unit*, const Unit*);
 void mclx_Fp_neg(Unit*, const Unit*);
+void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
+void mclx_Fp_sqr(Unit*, const Unit*);
 void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
@@ -43,8 +45,6 @@ Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_sqrPre(Unit*, const Unit*);
 void mclx_FpDbl_mod(Unit*, const Unit*);
-void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
-void mclx_Fp_sqr(Unit*, const Unit*);
 void mclx_Fp2_add(Unit*, const Unit*, const Unit*);
 void mclx_Fp2_sub(Unit*, const Unit*, const Unit*);
 void mclx_Fp2_neg(Unit*, const Unit*);
@@ -58,23 +58,9 @@ void mclx_Fr_add(Unit*, const Unit*, const Unit*);
 void mclx_Fr_sub(Unit*, const Unit*, const Unit*);
 void mclx_Fr_shr1(Unit*, const Unit*);
 void mclx_Fr_neg(Unit*, const Unit*);
-void mclx_FrDbl_add(Unit*, const Unit*, const Unit*);
-void mclx_FrDbl_sub(Unit*, const Unit*, const Unit*);
-void mclx_FrDbl_add(Unit*, const Unit*, const Unit*);
-void mclx_FrDbl_sub(Unit*, const Unit*, const Unit*);
-Unit mclx_FrDbl_addPre(Unit*, const Unit*, const Unit*);
-Unit mclx_FrDbl_subPre(Unit*, const Unit*, const Unit*);
-void mclx_FrDbl_mulPre(Unit*, const Unit*, const Unit*);
-void mclx_FrDbl_sqrPre(Unit*, const Unit*);
-void mclx_FrDbl_mod(Unit*, const Unit*);
 void mclx_Fr_mul(Unit*, const Unit*, const Unit*);
 void mclx_Fr_sqr(Unit*, const Unit*);
-void mclx_Fr2_add(Unit*, const Unit*, const Unit*);
-void mclx_Fr2_sub(Unit*, const Unit*, const Unit*);
-void mclx_Fr2_neg(Unit*, const Unit*);
-void mclx_Fr2_mul(Unit*, const Unit*, const Unit*);
-void mclx_Fr2_sqr(Unit*, const Unit*);
-void mclx_Fr2_mul_xi(Unit*, const Unit*);
+int mclx_Fr_preInv(Unit*, const Unit*);
 }
 #endif
 
@@ -354,12 +340,12 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		useMulx_ = cpu_.has(Xbyak::util::Cpu::tBMI2);
 		useAdx_ = cpu_.has(Xbyak::util::Cpu::tADX);
 	}
-	bool init(Op& op, const char *suf)
+	bool init(Op& op)
 	{
 		if (!cpu_.has(Xbyak::util::Cpu::tAVX)) return false;
 		reset(); // reset jit code for reuse
 		setProtectModeRW(); // read/write memory
-		init_inner(op, suf);
+		init_inner(op);
 		// ToDo : recover op if false
 		if (Xbyak::GetError()) return false;
 //		printf("code size=%d\n", (int)getSize());
@@ -367,9 +353,9 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		return true;
 	}
 private:
-	void init_inner(Op& op, const char *suf)
+	void init_inner(Op& op)
 	{
-		const bool isFp = suf && suf[0] == 'F' && suf[1] == 'p';
+		const char *suf = op.xi_a ? "Fp" : "Fr";
 		op_ = &op;
 		L(pL_);
 		p_ = reinterpret_cast<const uint64_t*>(getCurr());
@@ -413,102 +399,100 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		op.fp_addA_ = gen_fp_add();
 		setFuncInfo(prof_, suf, "_add", op.fp_addA_, getCurr());
 
+		align(16);
 		op.fp_subA_ = gen_fp_sub();
 		setFuncInfo(prof_, suf, "_sub", op.fp_subA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fp_shr1 = gen_shr1();
 		setFuncInfo(prof_, suf, "_shr1", op.fp_shr1, getCurr());
-		align(16);
 
+		align(16);
 		op.fp_negA_ = gen_fp_neg();
 		setFuncInfo(prof_, suf, "_neg", op.fp_negA_, getCurr());
+
 		align(16);
+		op.fp_mulA_ = gen_mul();
+		setFuncInfo(prof_, suf, "_mul", op.fp_mulA_, getCurr());
+
+		if (op.fp_mulA_) {
+			op.fp_mul = fp::func_ptr_cast<void4u>(op.fp_mulA_); // used in toMont/fromMont
+		}
 
-if (op.xi_a) {
+		align(16);
+		op.fp_sqrA_ = gen_sqr();
+		setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr());
+
+		if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4
+			align(16);
+			op.fp_preInv = getCurr<int2u>();
+			gen_preInv();
+			setFuncInfo(prof_, suf, "_preInv", op.fp_preInv, getCurr());
+		}
+		if (op.xi_a == 0) return; // Fp2 is not used
+		align(16);
 		op.fpDbl_addA_ = gen_fpDbl_add();
 		setFuncInfo(prof_, suf, "Dbl_add", op.fpDbl_addA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fpDbl_subA_ = gen_fpDbl_sub();
 		setFuncInfo(prof_, suf, "Dbl_sub", op.fpDbl_subA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fpDbl_addPre = gen_addSubPre(true, pn_ * 2);
 		setFuncInfo(prof_, suf, "Dbl_addPre", op.fpDbl_addPre, getCurr());
-		align(16);
 
+		align(16);
 		op.fpDbl_subPre = gen_addSubPre(false, pn_ * 2);
 		setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr());
-		align(16);
 
+		align(16);
 		op.fpDbl_mulPreA_ = gen_fpDbl_mulPre();
 		setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPreA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fpDbl_sqrPreA_ = gen_fpDbl_sqrPre();
 		setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPreA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fpDbl_modA_ = gen_fpDbl_mod(op);
 		setFuncInfo(prof_, suf, "Dbl_mod", op.fpDbl_modA_, getCurr());
-		align(16);
-}
 
-		op.fp_mulA_ = gen_mul();
-		setFuncInfo(prof_, suf, "_mul", op.fp_mulA_, getCurr());
 		align(16);
-
-		if (op.fp_mulA_) {
-			op.fp_mul = fp::func_ptr_cast<void4u>(op.fp_mulA_); // used in toMont/fromMont
-		}
-
-		op.fp_sqrA_ = gen_sqr();
-		setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr());
-		align(16);
-
-		if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4
-			op.fp_preInv = getCurr<int2u>();
-			gen_preInv();
-			setFuncInfo(prof_, suf, "_preInv", op.fp_preInv, getCurr());
-			align(16);
-		}
-		if (op.xi_a == 0) return; // Fp2 is not used
 		op.fp2_addA_ = gen_fp2_add();
 		setFuncInfo(prof_, suf, "2_add", op.fp2_addA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fp2_subA_ = gen_fp2_sub();
 		setFuncInfo(prof_, suf, "2_sub", op.fp2_subA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fp2_negA_ = gen_fp2_neg();
 		setFuncInfo(prof_, suf, "2_neg", op.fp2_negA_, getCurr());
-		align(16);
 
 		op.fp2_mulNF = 0;
+		align(16);
 		op.fp2Dbl_mulPreA_ = gen_fp2Dbl_mulPre();
 		if (op.fp2Dbl_mulPreA_) setFuncInfo(prof_, suf, "2Dbl_mulPre", op.fp2Dbl_mulPreA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fp2Dbl_sqrPreA_ = gen_fp2Dbl_sqrPre();
 		if (op.fp2Dbl_sqrPreA_) setFuncInfo(prof_, suf, "2Dbl_sqrPre", op.fp2Dbl_sqrPreA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fp2_mulA_ = gen_fp2_mul();
 		setFuncInfo(prof_, suf, "2_mul", op.fp2_mulA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fp2_sqrA_ = gen_fp2_sqr();
 		setFuncInfo(prof_, suf, "2_sqr", op.fp2_sqrA_, getCurr());
-		align(16);
 
+		align(16);
 		op.fp2_mul_xiA_ = gen_fp2_mul_xi();
 		setFuncInfo(prof_, suf, "2_mul_xi", op.fp2_mul_xiA_, getCurr());
-		align(16);
 
 #ifdef MCL_STATIC_JIT
-		if (isFp) {
+		if (op.xi_a) {
 			// Fp, sizeof(Fp) = 48
 			op.fp_addPre = mclx_Fp_addPre;
 			op.fp_subPre = mclx_Fp_subPre;
@@ -525,15 +509,6 @@ if (op.xi_a) {
 			op.fpDbl_modA_ = mclx_FpDbl_mod;
 			op.fp_mulA_ = mclx_Fp_mul;
 			op.fp_sqrA_ = mclx_Fp_sqr;
-#if 0
-//			op.fp_preInv = mclx_Fp_preInv;
-			op.fp2_addA_ = mclx_Fp2_add;
-			op.fp2_subA_ = mclx_Fp2_sub;
-			op.fp2_negA_ = mclx_Fp2_neg;
-			op.fp2_mulA_ = mclx_Fp2_mul;
-			op.fp2_sqrA_ = mclx_Fp2_sqr;
-			op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
-#endif
 		} else {
 			// Fr, sizeof(Fr) = 32
 			op.fp_addPre = mclx_Fr_addPre;
@@ -542,13 +517,6 @@ if (op.xi_a) {
 			op.fp_subA_ = mclx_Fr_sub;
 			op.fp_shr1 = mclx_Fr_shr1;
 			op.fp_negA_ = mclx_Fr_neg;
-			op.fpDbl_addA_ = mclx_FpDbl_add;
-			op.fpDbl_subA_ = mclx_FpDbl_sub;
-			op.fpDbl_addPre = mclx_FpDbl_addPre;
-			op.fpDbl_subPre = mclx_FpDbl_subPre;
-			op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre;
-			op.fpDbl_sqrPreA_ = mclx_FpDbl_sqrPre;
-			op.fpDbl_modA_ = mclx_FpDbl_mod;
 			op.fp_mulA_ = mclx_Fr_mul;
 			op.fp_sqrA_ = mclx_Fr_sqr;
 			op.fp_preInv = mclx_Fr_preInv;

From 11a752cb6cefded11603a54d5be594fd81626a44 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 17 Sep 2020 10:41:07 +0900
Subject: [PATCH 297/553] Fr, Fp, Fp2 test ok

---
 Makefile                  |  2 +-
 src/fp_generator.hpp      | 10 +++-----
 test/static_code_test.cpp | 53 +++++++++++++++++++++------------------
 3 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/Makefile b/Makefile
index dbc2a411..9e37876b 100644
--- a/Makefile
+++ b/Makefile
@@ -238,7 +238,7 @@ $(GEN_EXE): src/gen.cpp src/llvm_gen.hpp
 	$(CXX) -o $@ $< $(CFLAGS)
 
 src/dump_code: src/dump_code.cpp src/fp.cpp src/fp_generator.hpp
-	$(CXX) -o $@ src/dump_code.cpp src/fp.cpp -I include -DMCL_DUMP_JIT -DMCL_MAX_BIT_SIZE=384 -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER
+	$(CXX) -o $@ src/dump_code.cpp src/fp.cpp -g -I include -DMCL_DUMP_JIT -DMCL_MAX_BIT_SIZE=384 -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER
 
 src/static_code.asm: src/dump_code
 	$< > $@
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index f4a626a5..b5d4628e 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -410,7 +410,9 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		align(16);
 		op.fp_negA_ = gen_fp_neg();
 		setFuncInfo(prof_, suf, "_neg", op.fp_negA_, getCurr());
-
+		align(16);
+		op.fpDbl_modA_ = gen_fpDbl_mod(op);
+		setFuncInfo(prof_, suf, "Dbl_mod", op.fpDbl_modA_, getCurr());
 		align(16);
 		op.fp_mulA_ = gen_mul();
 		setFuncInfo(prof_, suf, "_mul", op.fp_mulA_, getCurr());
@@ -454,10 +456,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		op.fpDbl_sqrPreA_ = gen_fpDbl_sqrPre();
 		setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPreA_, getCurr());
 
-		align(16);
-		op.fpDbl_modA_ = gen_fpDbl_mod(op);
-		setFuncInfo(prof_, suf, "Dbl_mod", op.fpDbl_modA_, getCurr());
-
 		align(16);
 		op.fp2_addA_ = gen_fp2_add();
 		setFuncInfo(prof_, suf, "2_add", op.fp2_addA_, getCurr());
@@ -493,7 +491,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 
 #ifdef MCL_STATIC_JIT
 		if (op.xi_a) {
-			// Fp, sizeof(Fp) = 48
+			// Fp, sizeof(Fp) = 48, supports Fp2
 			op.fp_addPre = mclx_Fp_addPre;
 			op.fp_subPre = mclx_Fp_subPre;
 			op.fp_addA_ = mclx_Fp_add;
diff --git a/test/static_code_test.cpp b/test/static_code_test.cpp
index e69fda7b..8238e481 100644
--- a/test/static_code_test.cpp
+++ b/test/static_code_test.cpp
@@ -1,40 +1,45 @@
+#include <cybozu/test.hpp>
 #include <mcl/bls12_381.hpp>
 
 using namespace mcl::bn;
 
-void testFr()
+CYBOZU_TEST_AUTO(init)
 {
-	Fr x, y, z;
+	initPairing(mcl::BLS12_381);
+}
+
+CYBOZU_TEST_AUTO(Fr)
+{
+	Fr x, y;
 	x = 3;
 	y = 5;
-	z = x + y;
-	printf("x=%s\n", x.getStr().c_str());
-	printf("y=%s\n", y.getStr().c_str());
-	printf("z=%s\n", z.getStr().c_str());
-	z = x * y;
-	printf("z=%s\n", z.getStr().c_str());
-	Fr::sqr(z, x);
-	printf("z=%s\n", z.getStr().c_str());
+	CYBOZU_TEST_EQUAL(x + y, 8);
+	CYBOZU_TEST_EQUAL(x - y, -2);
+	CYBOZU_TEST_EQUAL(x * y, 15);
 }
 
-void testFp()
+CYBOZU_TEST_AUTO(Fp)
 {
-	Fp x, y, z;
+	Fp x, y;
 	x = 3;
 	y = 5;
-	z = x + y;
-	printf("x=%s\n", x.getStr().c_str());
-	printf("y=%s\n", y.getStr().c_str());
-	printf("z=%s\n", z.getStr().c_str());
-	z = x * y;
-	printf("z=%s\n", z.getStr().c_str());
-	Fp::sqr(z, x);
-	printf("z=%s\n", z.getStr().c_str());
+	CYBOZU_TEST_EQUAL(x + y, 8);
+	CYBOZU_TEST_EQUAL(x - y, -2);
+	CYBOZU_TEST_EQUAL(x * y, 15);
 }
 
-int main()
+CYBOZU_TEST_AUTO(Fp2)
 {
-	initPairing(mcl::BLS12_381);
-	testFr();
-	testFp();
+	Fp2 x, y;
+	x.a = 3;
+	x.b = 2;
+	y.a = 1;
+	y.b = 4;
+	/*
+		(3+2i)(1+4i)=3-8+(12+2)i
+	*/
+	CYBOZU_TEST_EQUAL(x + y, Fp2(4, 6));
+	CYBOZU_TEST_EQUAL(x - y, Fp2(2, -2));
+	CYBOZU_TEST_EQUAL(x * y, Fp2(-5, 14));
 }
+

From a522fd532d207e698665efa631166a26e96b9f35 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 17 Sep 2020 11:07:09 +0900
Subject: [PATCH 298/553] test of G1 and G2

---
 test/static_code_test.cpp | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/test/static_code_test.cpp b/test/static_code_test.cpp
index 8238e481..3751741e 100644
--- a/test/static_code_test.cpp
+++ b/test/static_code_test.cpp
@@ -43,3 +43,28 @@ CYBOZU_TEST_AUTO(Fp2)
 	CYBOZU_TEST_EQUAL(x * y, Fp2(-5, 14));
 }
 
+CYBOZU_TEST_AUTO(G1)
+{
+	G1 P, Q;
+	hashAndMapToG1(P, "abc", 3);
+	Fr r1, r2;
+	r1.setHashOf("abc", 3);
+	r2 = -r1;
+	G1::mul(Q, P, r1);
+	Q = -Q;
+	P *= r2;
+	CYBOZU_TEST_EQUAL(P, Q);
+}
+
+CYBOZU_TEST_AUTO(G2)
+{
+	G2 P, Q;
+	hashAndMapToG2(P, "abc", 3);
+	Fr r1, r2;
+	r1.setHashOf("abc", 3);
+	r2 = -r1;
+	G2::mul(Q, P, r1);
+	Q = -Q;
+	P *= r2;
+	CYBOZU_TEST_EQUAL(P, Q);
+}

From eaabb2337b011fb4989752a42fcf2d4eefa65fcf Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 17 Sep 2020 16:44:20 +0900
Subject: [PATCH 299/553] fix pic code

---
 Makefile               |  2 +-
 include/mcl/op.hpp     |  7 +++-
 sample/bench.cpp       |  4 +-
 sample/rawbench.cpp    |  2 +-
 src/fp.cpp             | 25 ++++++++++--
 src/fp_generator.hpp   | 84 +++-------------------------------------
 src/fp_static_code.hpp | 87 ++++++++++++++++++++++++++++++++++++++++++
 src/low_func.hpp       |  2 +-
 test/ec_test.cpp       |  2 +-
 test/fp_test.cpp       |  6 +--
 test/fp_tower_test.cpp |  2 +-
 11 files changed, 131 insertions(+), 92 deletions(-)
 create mode 100644 src/fp_static_code.hpp

diff --git a/Makefile b/Makefile
index 9e37876b..1b59ce78 100644
--- a/Makefile
+++ b/Makefile
@@ -247,7 +247,7 @@ obj/static_code.o: src/static_code.asm
 	nasm -felf64 -o $@ $<
 
 bin/static_code_test.exe: test/static_code_test.cpp src/fp.cpp obj/static_code.o
-	$(CXX) -o $@ -O3 $^ -DMCL_STATIC_JIT -DMCL_MAX_BIT_SIZE=384 -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -I include -Wall -Wextra
+	$(CXX) -o $@ -O3 $^ -g -DMCL_DONT_USE_XBYAK -DMCL_STATIC_CODE -DMCL_MAX_BIT_SIZE=384 -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -I include -Wall -Wextra
  
 asm: $(LLVM_SRC)
 	$(LLVM_OPT) -O3 -o - $(LLVM_SRC) | $(LLVM_LLC) -O3 $(LLVM_FLAGS) -x86-asm-syntax=intel
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 99c0e4d8..22a78b18 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -16,6 +16,9 @@
 #endif
 #if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8)
 	#define MCL_USE_XBYAK
+#endif
+#if defined(MCL_USE_XBYAK) || defined(MCL_STATIC_CODE)
+	#define MCL_X64_ASM
 	#define MCL_XBYAK_DIRECT_CALL
 #endif
 
@@ -202,6 +205,8 @@ struct Op {
 	Unit R3[maxUnitSize];
 #ifdef MCL_USE_XBYAK
 	FpGenerator *fg;
+#endif
+#ifdef MCL_X64_ASM
 	mcl::Array<Unit> invTbl;
 #endif
 	void3u fp_addA_;
@@ -288,7 +293,7 @@ struct Op {
 		memset(one, 0, sizeof(one));
 		memset(R2, 0, sizeof(R2));
 		memset(R3, 0, sizeof(R3));
-#ifdef MCL_USE_XBYAK
+#ifdef MCL_X64_ASM
 		invTbl.clear();
 #endif
 		fp_addA_ = 0;
diff --git a/sample/bench.cpp b/sample/bench.cpp
index de81f258..d3c101ce 100644
--- a/sample/bench.cpp
+++ b/sample/bench.cpp
@@ -68,7 +68,7 @@ void benchFp(size_t bitSize, int mode)
 		if (mode & 4) benchFpSub(tbl[i].p, tbl[i].x, tbl[i].y, mcl::fp::FP_LLVM);
 		if (mode & 8) benchFpSub(tbl[i].p, tbl[i].x, tbl[i].y, mcl::fp::FP_LLVM_MONT);
 #endif
-#ifdef MCL_USE_XBYAK
+#ifdef MCL_X64_ASM
 		if (mode & 16) benchFpSub(tbl[i].p, tbl[i].x, tbl[i].y, mcl::fp::FP_XBYAK);
 #endif
 	}
@@ -122,7 +122,7 @@ void benchEc(size_t bitSize, int mode, mcl::ec::Mode ecMode)
 		if (mode & 4) benchEcSub(tbl[i], mcl::fp::FP_LLVM, ecMode);
 		if (mode & 8) benchEcSub(tbl[i], mcl::fp::FP_LLVM_MONT, ecMode);
 #endif
-#ifdef MCL_USE_XBYAK
+#ifdef MCL_X64_ASM
 		if (mode & 16) benchEcSub(tbl[i], mcl::fp::FP_XBYAK, ecMode);
 #endif
 	}
diff --git a/sample/rawbench.cpp b/sample/rawbench.cpp
index 4d7506ef..cc74bc3a 100644
--- a/sample/rawbench.cpp
+++ b/sample/rawbench.cpp
@@ -168,7 +168,7 @@ int main(int argc, char *argv[])
 		benchRaw(tbl[i], mcl::fp::FP_LLVM);
 		benchRaw(tbl[i], mcl::fp::FP_LLVM_MONT);
 #endif
-#ifdef MCL_USE_XBYAK
+#ifdef MCL_X64_ASM
 		if (bitSize <= 384) {
 			benchRaw(tbl[i], mcl::fp::FP_XBYAK);
 		}
diff --git a/src/fp.cpp b/src/fp.cpp
index b3b07d19..ab3a1a7e 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -3,12 +3,14 @@
 #include <cybozu/sha2.hpp>
 #include <cybozu/endian.hpp>
 #include <mcl/conversion.hpp>
+#ifdef MCL_STATIC_CODE
+#include "fp_static_code.hpp"
+#endif
 #ifdef MCL_USE_XBYAK
 #include "fp_generator.hpp"
 #else
 #define XBYAK_ONLY_CLASS_CPU
 #include "xbyak/xbyak_util.h"
-//#include "detect_cpu.hpp"
 #endif
 #include "low_func.hpp"
 #ifdef MCL_USE_LLVM
@@ -315,7 +317,7 @@ void setOp(Op& op, Mode mode)
 #endif
 }
 
-#ifdef MCL_USE_XBYAK
+#ifdef MCL_X64_ASM
 inline void invOpForMontC(Unit *y, const Unit *x, const Op& op)
 {
 	Unit r[maxUnitSize];
@@ -372,6 +374,12 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 		op.fp_invOp = &invOpForMontC;
 		initInvTbl(op);
 	}
+#elif defined(MCL_STATIC_CODE)
+	fp::setStaticCode(op);
+	if (op.isMont && N <= 4) {
+		op.fp_invOp = &invOpForMontC;
+		initInvTbl(op);
+	}
 #endif
 	return true;
 }
@@ -403,14 +411,25 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 	priority : MCL_USE_XBYAK > MCL_USE_LLVM > none
 	Xbyak > llvm_mont > llvm > gmp_mont > gmp
 */
-#ifdef MCL_USE_XBYAK
+#ifdef MCL_X64_ASM
 	if (mode == FP_AUTO) mode = FP_XBYAK;
 	if (mode == FP_XBYAK && bitSize > 384) {
 		mode = FP_AUTO;
 	}
+#ifdef MCL_USE_XBYAK
 	if (!isEnableJIT()) {
 		mode = FP_AUTO;
 	}
+#elif MCL_STATIC_CODE
+	{
+		// static jit code uses avx, mulx, adox, adcx
+		using namespace Xbyak::util;
+		Cpu cpu;
+		if (!(cpu.has(Cpu::tAVX) && cpu.has(Cpu::tBMI2) && cpu.has(Cpu::tADX))) {
+			mode = FP_AUTO;
+		}
+	}
+#endif
 #else
 	if (mode == FP_XBYAK) mode = FP_AUTO;
 #endif
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index b5d4628e..42433682 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -7,7 +7,6 @@
 	http://opensource.org/licenses/BSD-3-Clause
 */
 #if CYBOZU_HOST == CYBOZU_HOST_INTEL
-#define XBYAK_NO_OP_NAMES
 #define XBYAK_DISABLE_AVX512
 #include "xbyak/xbyak_util.h"
 
@@ -25,45 +24,6 @@
 
 namespace mcl {
 
-#ifdef MCL_STATIC_JIT
-typedef fp::Unit Unit;
-extern "C" {
-Unit mclx_Fp_addPre(Unit*, const Unit*, const Unit*);
-Unit mclx_Fp_subPre(Unit*, const Unit*, const Unit*);
-void mclx_Fp_add(Unit*, const Unit*, const Unit*);
-void mclx_Fp_sub(Unit*, const Unit*, const Unit*);
-void mclx_Fp_shr1(Unit*, const Unit*);
-void mclx_Fp_neg(Unit*, const Unit*);
-void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
-void mclx_Fp_sqr(Unit*, const Unit*);
-void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
-void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
-void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
-void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
-Unit mclx_FpDbl_addPre(Unit*, const Unit*, const Unit*);
-Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*);
-void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*);
-void mclx_FpDbl_sqrPre(Unit*, const Unit*);
-void mclx_FpDbl_mod(Unit*, const Unit*);
-void mclx_Fp2_add(Unit*, const Unit*, const Unit*);
-void mclx_Fp2_sub(Unit*, const Unit*, const Unit*);
-void mclx_Fp2_neg(Unit*, const Unit*);
-void mclx_Fp2_mul(Unit*, const Unit*, const Unit*);
-void mclx_Fp2_sqr(Unit*, const Unit*);
-void mclx_Fp2_mul_xi(Unit*, const Unit*);
-
-Unit mclx_Fr_addPre(Unit*, const Unit*, const Unit*);
-Unit mclx_Fr_subPre(Unit*, const Unit*, const Unit*);
-void mclx_Fr_add(Unit*, const Unit*, const Unit*);
-void mclx_Fr_sub(Unit*, const Unit*, const Unit*);
-void mclx_Fr_shr1(Unit*, const Unit*);
-void mclx_Fr_neg(Unit*, const Unit*);
-void mclx_Fr_mul(Unit*, const Unit*, const Unit*);
-void mclx_Fr_sqr(Unit*, const Unit*);
-int mclx_Fr_preInv(Unit*, const Unit*);
-}
-#endif
-
 #ifdef MCL_DUMP_JIT
 struct DumpCode {
 	FILE *fp_;
@@ -488,38 +448,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		align(16);
 		op.fp2_mul_xiA_ = gen_fp2_mul_xi();
 		setFuncInfo(prof_, suf, "2_mul_xi", op.fp2_mul_xiA_, getCurr());
-
-#ifdef MCL_STATIC_JIT
-		if (op.xi_a) {
-			// Fp, sizeof(Fp) = 48, supports Fp2
-			op.fp_addPre = mclx_Fp_addPre;
-			op.fp_subPre = mclx_Fp_subPre;
-			op.fp_addA_ = mclx_Fp_add;
-			op.fp_subA_ = mclx_Fp_sub;
-			op.fp_shr1 = mclx_Fp_shr1;
-			op.fp_negA_ = mclx_Fp_neg;
-			op.fpDbl_addA_ = mclx_FpDbl_add;
-			op.fpDbl_subA_ = mclx_FpDbl_sub;
-			op.fpDbl_addPre = mclx_FpDbl_addPre;
-			op.fpDbl_subPre = mclx_FpDbl_subPre;
-			op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre;
-			op.fpDbl_sqrPreA_ = mclx_FpDbl_sqrPre;
-			op.fpDbl_modA_ = mclx_FpDbl_mod;
-			op.fp_mulA_ = mclx_Fp_mul;
-			op.fp_sqrA_ = mclx_Fp_sqr;
-		} else {
-			// Fr, sizeof(Fr) = 32
-			op.fp_addPre = mclx_Fr_addPre;
-			op.fp_subPre = mclx_Fr_subPre;
-			op.fp_addA_ = mclx_Fr_add;
-			op.fp_subA_ = mclx_Fr_sub;
-			op.fp_shr1 = mclx_Fr_shr1;
-			op.fp_negA_ = mclx_Fr_neg;
-			op.fp_mulA_ = mclx_Fr_mul;
-			op.fp_sqrA_ = mclx_Fr_sqr;
-			op.fp_preInv = mclx_Fr_preInv;
-		}
-#endif
 	}
 	u3u gen_addSubPre(bool isAdd, int n)
 	{
@@ -2774,7 +2702,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(rax, px);
 		// px is free frome here
 		load_mp(vv, rax, t); // v = x
-		mov(rax, pL_);
+		lea(rax, ptr[rip+pL_]);
 		load_mp(uu, rax, t); // u = p_
 		// k = 0
 		xor_(rax, rax);
@@ -2852,7 +2780,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& t2 = ss.getReg(0);
 		const Reg64& t3 = rdx;
 
-		mov(t2, pL_);
+		lea(t2, ptr[rip+pL_]);
 		if (isFullBit_) {
 			mov(t, ptr [rTop]);
 			test(t, t);
@@ -3724,7 +3652,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			}
 		}
 		sub_rr(a, b);
-		mov(rax, pL_);
+		lea(rax, ptr[rip+pL_]);
 		load_rm(b, rax);
 		sbb(rax, rax);
 		for (int i = 0; i < pn_; i++) {
@@ -3732,7 +3660,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		}
 		add_rr(a, b);
 		store_mr(py, a);
-		mov(rax, pL_);
+		lea(rax, ptr[rip+pL_]);
 		mov_rr(a, t);
 		sub_rm(t, rax);
 		cmovc_rr(t, a);
@@ -3750,7 +3678,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov_rr(b, a);
 		add_rm(b, px + FpByte_);
 		sub_rm(a, px + FpByte_);
-		mov(rax, pL_);
+		lea(rax, ptr[rip+pL_]);
 		jnc("@f");
 		add_rm(a, rax);
 	L("@@");
@@ -3925,7 +3853,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 				mov(ptr [(RegExp)t2 + i * 8], rax);
 			}
 			// t3 = a + p - b
-			mov(rax, pL_);
+			lea(rax, ptr[rip+pL_]);
 			add_rm(a, rax);
 			sub_rr(a, b);
 			store_mr(t3, a);
diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
new file mode 100644
index 00000000..0da39cbe
--- /dev/null
+++ b/src/fp_static_code.hpp
@@ -0,0 +1,87 @@
+#pragma once
+/**
+	@file
+	@brief Fp generator
+	@author MITSUNARI Shigeo(@herumi)
+	@license modified new BSD license
+	http://opensource.org/licenses/BSD-3-Clause
+*/
+#ifndef MCL_STATIC_CODE
+	#error "define MCL_STATIC_CODE"
+#endif
+
+namespace mcl { namespace fp {
+
+extern "C" {
+
+Unit mclx_Fp_addPre(Unit*, const Unit*, const Unit*);
+Unit mclx_Fp_subPre(Unit*, const Unit*, const Unit*);
+void mclx_Fp_add(Unit*, const Unit*, const Unit*);
+void mclx_Fp_sub(Unit*, const Unit*, const Unit*);
+void mclx_Fp_shr1(Unit*, const Unit*);
+void mclx_Fp_neg(Unit*, const Unit*);
+void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
+void mclx_Fp_sqr(Unit*, const Unit*);
+void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
+void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
+void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
+void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
+Unit mclx_FpDbl_addPre(Unit*, const Unit*, const Unit*);
+Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*);
+void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*);
+void mclx_FpDbl_sqrPre(Unit*, const Unit*);
+void mclx_FpDbl_mod(Unit*, const Unit*);
+void mclx_Fp2_add(Unit*, const Unit*, const Unit*);
+void mclx_Fp2_sub(Unit*, const Unit*, const Unit*);
+void mclx_Fp2_neg(Unit*, const Unit*);
+void mclx_Fp2_mul(Unit*, const Unit*, const Unit*);
+void mclx_Fp2_sqr(Unit*, const Unit*);
+void mclx_Fp2_mul_xi(Unit*, const Unit*);
+
+Unit mclx_Fr_addPre(Unit*, const Unit*, const Unit*);
+Unit mclx_Fr_subPre(Unit*, const Unit*, const Unit*);
+void mclx_Fr_add(Unit*, const Unit*, const Unit*);
+void mclx_Fr_sub(Unit*, const Unit*, const Unit*);
+void mclx_Fr_shr1(Unit*, const Unit*);
+void mclx_Fr_neg(Unit*, const Unit*);
+void mclx_Fr_mul(Unit*, const Unit*, const Unit*);
+void mclx_Fr_sqr(Unit*, const Unit*);
+int mclx_Fr_preInv(Unit*, const Unit*);
+} // extern "C"
+
+void setStaticCode(mcl::fp::Op& op)
+{
+	if (op.xi_a) {
+		// Fp, sizeof(Fp) = 48, supports Fp2
+		op.fp_addPre = mclx_Fp_addPre;
+		op.fp_subPre = mclx_Fp_subPre;
+		op.fp_addA_ = mclx_Fp_add;
+		op.fp_subA_ = mclx_Fp_sub;
+		op.fp_shr1 = mclx_Fp_shr1;
+		op.fp_negA_ = mclx_Fp_neg;
+		op.fpDbl_addA_ = mclx_FpDbl_add;
+		op.fpDbl_subA_ = mclx_FpDbl_sub;
+		op.fpDbl_addPre = mclx_FpDbl_addPre;
+		op.fpDbl_subPre = mclx_FpDbl_subPre;
+		op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre;
+		op.fpDbl_sqrPreA_ = mclx_FpDbl_sqrPre;
+		op.fpDbl_modA_ = mclx_FpDbl_mod;
+		op.fp_mulA_ = mclx_Fp_mul;
+		op.fp_sqrA_ = mclx_Fp_sqr;
+	} else {
+		// Fr, sizeof(Fr) = 32
+		op.fp_addPre = mclx_Fr_addPre;
+		op.fp_subPre = mclx_Fr_subPre;
+		op.fp_addA_ = mclx_Fr_add;
+		op.fp_subA_ = mclx_Fr_sub;
+		op.fp_shr1 = mclx_Fr_shr1;
+		op.fp_negA_ = mclx_Fr_neg;
+		op.fp_mulA_ = mclx_Fr_mul;
+		op.fp_sqrA_ = mclx_Fr_sqr;
+		op.fp_preInv = mclx_Fr_preInv;
+	}
+	op.fp_mul = fp::func_ptr_cast<void4u>(op.fp_mulA_);
+}
+
+} } // mcl::fp
+
diff --git a/src/low_func.hpp b/src/low_func.hpp
index 89a748e5..2db815e9 100644
--- a/src/low_func.hpp
+++ b/src/low_func.hpp
@@ -16,7 +16,7 @@
 #endif
 
 #ifndef MCL_LLVM_BMI2
-	#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && defined(MCL_USE_XBYAK) && !defined(MCL_USE_VINT)
+	#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && defined(MCL_X64_ASM) && !defined(MCL_USE_VINT)
 		#define MCL_LLVM_BMI2 1
 	#endif
 #endif
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index a3e79e52..855ceba8 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -602,7 +602,7 @@ void test_sub(const mcl::EcParam *para, size_t paraNum)
 		test_sub_sub(para[i], mcl::fp::FP_LLVM);
 		test_sub_sub(para[i], mcl::fp::FP_LLVM_MONT);
 #endif
-#ifdef MCL_USE_XBYAK
+#ifdef MCL_X64_ASM
 		test_sub_sub(para[i], mcl::fp::FP_XBYAK);
 #endif
 		mulVec(para[i]);
diff --git a/test/fp_test.cpp b/test/fp_test.cpp
index 469f35d3..70fef8a8 100644
--- a/test/fp_test.cpp
+++ b/test/fp_test.cpp
@@ -876,7 +876,7 @@ void modpTest()
 }
 
 #include <iostream>
-#if (defined(MCL_USE_LLVM) || defined(MCL_USE_XBYAK)) && (MCL_MAX_BIT_SIZE >= 521)
+#if (defined(MCL_USE_LLVM) || defined(MCL_X64_ASM)) && (MCL_MAX_BIT_SIZE >= 521)
 CYBOZU_TEST_AUTO(mod_NIST_P521)
 {
 	const size_t len = 521;
@@ -908,7 +908,7 @@ CYBOZU_TEST_AUTO(mod_NIST_P521)
 		mcl_fpDbl_mod_NIST_P521L(ex, in, Fp::getOp().p);
 		CYBOZU_TEST_EQUAL_ARRAY(ex, ok, N + 1);
 #endif
-#ifdef MCL_USE_XBYAK
+#ifdef MCL_X64_ASM
 		const mcl::fp::Op& op = Fp::getOp();
 		if (!op.isMont) {
 			op.fpDbl_mod(ex, in, op.p);
@@ -1014,7 +1014,7 @@ CYBOZU_TEST_AUTO(main)
 		sub(mcl::fp::FP_LLVM_MONT);
 	}
 #endif
-#ifdef MCL_USE_XBYAK
+#ifdef MCL_X64_ASM
 	if (g_mode.empty() || g_mode == "xbyak") {
 		sub(mcl::fp::FP_XBYAK);
 	}
diff --git a/test/fp_tower_test.cpp b/test/fp_tower_test.cpp
index c26c5d7e..45763762 100644
--- a/test/fp_tower_test.cpp
+++ b/test/fp_tower_test.cpp
@@ -465,7 +465,7 @@ void testAll()
 		test(p, mcl::fp::FP_LLVM);
 		test(p, mcl::fp::FP_LLVM_MONT);
 #endif
-#ifdef MCL_USE_XBYAK
+#ifdef MCL_X64_ASM
 		test(p, mcl::fp::FP_XBYAK);
 #endif
 	}

From 32453e25a20c382ba6ceff3a078cd201115ada07 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 17 Sep 2020 16:59:46 +0900
Subject: [PATCH 300/553] add MCL_STATIC_CODE

---
 Makefile | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1b59ce78..123249fd 100644
--- a/Makefile
+++ b/Makefile
@@ -11,6 +11,13 @@ TEST_SRC+=bls12_test.cpp
 TEST_SRC+=mapto_wb19_test.cpp
 TEST_SRC+=ecdsa_c_test.cpp
 TEST_SRC+=modp_test.cpp
+ifeq ($(MCL_STATIC_CODE),1)
+  MCL_USE_XBYAK=0
+  MCL_MAX_BIT_SIZE=384
+  CFLAGS+=-DMCL_STATI_CODE
+  LIB_OBJ=obj/static_code.o
+  TEST_SRC=bls12_test.cpp
+endif
 ifeq ($(CPU),x86-64)
   MCL_USE_XBYAK?=1
   TEST_SRC+=mont_fp_test.cpp sq_test.cpp
@@ -86,7 +93,7 @@ ifneq ($(CPU),)
   ASM_SRC=$(ASM_SRC_PATH_NAME).s
 endif
 ASM_OBJ=$(OBJ_DIR)/$(CPU).o
-LIB_OBJ=$(OBJ_DIR)/fp.o
+LIB_OBJ+=$(OBJ_DIR)/fp.o
 BN256_OBJ=$(OBJ_DIR)/bn_c256.o
 BN384_OBJ=$(OBJ_DIR)/bn_c384.o
 BN384_256_OBJ=$(OBJ_DIR)/bn_c384_256.o

From 938e15432a82c91c526168ea790fa3084b96702e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Sep 2020 11:15:42 +0900
Subject: [PATCH 301/553] fix typo

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 123249fd..ce7fc341 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@ TEST_SRC+=modp_test.cpp
 ifeq ($(MCL_STATIC_CODE),1)
   MCL_USE_XBYAK=0
   MCL_MAX_BIT_SIZE=384
-  CFLAGS+=-DMCL_STATI_CODE
+  CFLAGS+=-DMCL_STATIC_CODE
   LIB_OBJ=obj/static_code.o
   TEST_SRC=bls12_test.cpp
 endif

From ad7b7891fa66388a36387769290ff8658a8620c1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Sep 2020 11:16:01 +0900
Subject: [PATCH 302/553] fix bls12_test for static code

---
 include/mcl/bn.hpp     |  6 ++++++
 src/fp_static_code.hpp | 17 +++++++++++------
 test/bls12_test.cpp    |  3 +++
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 3668da26..ab9e15cf 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -854,6 +854,12 @@ struct Param {
 	{
 		this->cp = cp;
 		isBLS12 = cp.curveType == MCL_BLS12_381;
+#ifdef MCL_STATIC_CODE
+		if (!isBLS12) {
+			*pb = false;
+			return;
+		}
+#endif
 		gmp::setStr(pb, z, cp.z);
 		if (!*pb) return;
 		isNegative = z < 0;
diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
index 0da39cbe..832062e4 100644
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@@ -20,17 +20,15 @@ void mclx_Fp_add(Unit*, const Unit*, const Unit*);
 void mclx_Fp_sub(Unit*, const Unit*, const Unit*);
 void mclx_Fp_shr1(Unit*, const Unit*);
 void mclx_Fp_neg(Unit*, const Unit*);
+void mclx_FpDbl_mod(Unit*, const Unit*);
 void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
 void mclx_Fp_sqr(Unit*, const Unit*);
 void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
-void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
-void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
 Unit mclx_FpDbl_addPre(Unit*, const Unit*, const Unit*);
 Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_sqrPre(Unit*, const Unit*);
-void mclx_FpDbl_mod(Unit*, const Unit*);
 void mclx_Fp2_add(Unit*, const Unit*, const Unit*);
 void mclx_Fp2_sub(Unit*, const Unit*, const Unit*);
 void mclx_Fp2_neg(Unit*, const Unit*);
@@ -59,15 +57,22 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fp_subA_ = mclx_Fp_sub;
 		op.fp_shr1 = mclx_Fp_shr1;
 		op.fp_negA_ = mclx_Fp_neg;
+		op.fpDbl_modA_ = mclx_FpDbl_mod;
+		op.fp_mulA_ = mclx_Fp_mul;
+		op.fp_sqrA_ = mclx_Fp_sqr;
 		op.fpDbl_addA_ = mclx_FpDbl_add;
 		op.fpDbl_subA_ = mclx_FpDbl_sub;
 		op.fpDbl_addPre = mclx_FpDbl_addPre;
 		op.fpDbl_subPre = mclx_FpDbl_subPre;
 		op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre;
 		op.fpDbl_sqrPreA_ = mclx_FpDbl_sqrPre;
-		op.fpDbl_modA_ = mclx_FpDbl_mod;
-		op.fp_mulA_ = mclx_Fp_mul;
-		op.fp_sqrA_ = mclx_Fp_sqr;
+		op.fp2_addA_ = mclx_Fp2_add;
+		op.fp2_subA_ = mclx_Fp2_sub;
+		op.fp2_negA_ = mclx_Fp2_neg;
+		op.fp2_mulNF = 0;
+		op.fp2_mulA_ = mclx_Fp2_mul;
+		op.fp2_sqrA_ = mclx_Fp2_sqr;
+		op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
 	} else {
 		// Fr, sizeof(Fr) = 32
 		op.fp_addPre = mclx_Fr_addPre;
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 723bf3ac..94dce59d 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -688,6 +688,8 @@ CYBOZU_TEST_AUTO(multi)
 	G1 P;
 	G2 Q;
 	int i;
+
+#ifndef MCL_STATIC_CODE
 	puts("BN254");
 	testCurve(mcl::BN254);
 	i = 1;
@@ -695,6 +697,7 @@ CYBOZU_TEST_AUTO(multi)
 	CYBOZU_BENCH_C("naiveG2", 100, (BN::param.mapTo.naiveMapTo<G1, Fp>), P, i++);
 	CYBOZU_BENCH_C("calcBN2", 100, (BN::param.mapTo.calcBN<G2, Fp2>), Q, i++);
 	CYBOZU_BENCH_C("naiveG2", 100, (BN::param.mapTo.naiveMapTo<G2, Fp2>), Q, i++);
+#endif
 	puts("BLS12_381");
 	testCurve(mcl::BLS12_381);
 	i = 1;

From 4ee23f5fd3797e556898c22ce849871e72b7f342 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Sep 2020 11:18:31 +0900
Subject: [PATCH 303/553] add -Wundef

---
 common.mk           | 2 +-
 test/bls12_test.cpp | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/common.mk b/common.mk
index 4816049f..6f4ed72b 100644
--- a/common.mk
+++ b/common.mk
@@ -91,7 +91,7 @@ else
     CFLAGS_OPT+=$(MARCH)
   endif
 endif
-CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
+CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith -Wundef
 CFLAGS+=-g3
 INC_OPT=-I include -I test
 CFLAGS+=$(CFLAGS_WARN) $(BIT_OPT) $(INC_OPT)
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index 94dce59d..ec4204ca 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -864,7 +864,11 @@ int main(int argc, char *argv[])
 		return 1;
 	}
 	g_mode = mcl::fp::StrToMode(mode);
+#ifdef MCL_STATIC_CODE
+	printf("static code for BLS12-381\n");
+#else
 	printf("JIT %d\n", mcl::fp::isEnableJIT());
+#endif
 #if 0
 	initPairing(mcl::BLS12_381);
 	cybozu::XorShift rg;

From 4fd5fef8edbe886989dbff4b4d91ccd5bfdc2dca Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Sep 2020 11:55:55 +0900
Subject: [PATCH 304/553] static_code does not need llvm-bmi2

---
 Makefile         |  4 +++-
 src/low_func.hpp |  4 +++-
 test/bench.hpp   | 12 ++++++++++++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index ce7fc341..21f5f150 100644
--- a/Makefile
+++ b/Makefile
@@ -113,7 +113,9 @@ ifeq ($(MCL_USE_LLVM),1)
   LIB_OBJ+=$(ASM_OBJ)
   # special case for intel with bmi2
   ifeq ($(INTEL),1)
-    LIB_OBJ+=$(OBJ_DIR)/$(CPU).bmi2.o
+    ifneq ($(MCL_STATIC_CODE),1)
+      LIB_OBJ+=$(OBJ_DIR)/$(CPU).bmi2.o
+    endif
   endif
 endif
 LLVM_SRC=src/base$(BIT).ll
diff --git a/src/low_func.hpp b/src/low_func.hpp
index 2db815e9..9192e51d 100644
--- a/src/low_func.hpp
+++ b/src/low_func.hpp
@@ -16,8 +16,10 @@
 #endif
 
 #ifndef MCL_LLVM_BMI2
-	#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && defined(MCL_X64_ASM) && !defined(MCL_USE_VINT)
+	#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && !defined(MCL_STATIC_CODE) && !defined(MCL_USE_VINT)
 		#define MCL_LLVM_BMI2 1
+	#else
+		#define MCL_LLVM_BMI2 0
 	#endif
 #endif
 
diff --git a/test/bench.hpp b/test/bench.hpp
index b4a8bd29..f7acfced 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -101,6 +101,18 @@ void testBench(const G1& P, const G2& Q)
 	CYBOZU_BENCH_C("Fp::sqr       ", C3, Fp::sqr, x, x);
 	CYBOZU_BENCH_C("Fp::inv       ", C3, Fp::inv, x, x);
 	CYBOZU_BENCH_C("Fp::pow       ", C3, Fp::pow, x, x, y);
+	{
+		Fr a, b, c;
+		a.setHashOf("abc", 3);
+		b.setHashOf("123", 3);
+		CYBOZU_BENCH_C("Fr::add       ", C3, Fr::add, a, a, b);
+		CYBOZU_BENCH_C("Fr::sub       ", C3, Fr::sub, a, a, b);
+		CYBOZU_BENCH_C("Fr::neg       ", C3, Fr::neg, a, a);
+		CYBOZU_BENCH_C("Fr::mul       ", C3, Fr::mul, a, a, b);
+		CYBOZU_BENCH_C("Fr::sqr       ", C3, Fr::sqr, a, a);
+		CYBOZU_BENCH_C("Fr::inv       ", C3, Fr::inv, a, a);
+		CYBOZU_BENCH_C("Fr::pow       ", C3, Fr::pow, a, a, b);
+	}
 	Fp2 xx, yy;
 	xx.a = x;
 	xx.b = 3;

From eca0e51959704f02e63dbf1b9ffde7383b902514 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 23 Sep 2020 12:03:36 +0900
Subject: [PATCH 305/553] DUMP_JIT always dump_code regardless of cpu

---
 src/fp.cpp                 | 35 +++++++++++++++++++++++++----------
 src/fp_generator.hpp       | 21 ++++++++++++---------
 test/fp_generator_test.cpp |  5 +----
 3 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index ab3a1a7e..1913559a 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -3,15 +3,27 @@
 #include <cybozu/sha2.hpp>
 #include <cybozu/endian.hpp>
 #include <mcl/conversion.hpp>
+
+#if defined(MCL_STATIC_CODE) || defined(MCL_USE_XBYAK) || defined(MCL_USE_LLVM)
+
+#ifdef MCL_USE_XBYAK
+	#define XBYAK_DISABLE_AVX512
+#else
+	#define XBYAK_ONLY_CLASS_CPU
+#endif
+
+#include "xbyak/xbyak_util.h"
+Xbyak::util::Cpu g_cpu;
+
 #ifdef MCL_STATIC_CODE
 #include "fp_static_code.hpp"
 #endif
 #ifdef MCL_USE_XBYAK
 #include "fp_generator.hpp"
-#else
-#define XBYAK_ONLY_CLASS_CPU
-#include "xbyak/xbyak_util.h"
 #endif
+
+#endif
+
 #include "low_func.hpp"
 #ifdef MCL_USE_LLVM
 #include "proto.hpp"
@@ -303,8 +315,7 @@ void setOp(Op& op, Mode mode)
 	if (mode != fp::FP_GMP && mode != fp::FP_GMP_MONT) {
 #if MCL_LLVM_BMI2 == 1
 		const bool gmpIsFasterThanLLVM = false;//(N == 8 && MCL_SIZEOF_UNIT == 8);
-		Xbyak::util::Cpu cpu;
-		if (cpu.has(Xbyak::util::Cpu::tBMI2)) {
+		if (g_cpu.has(Xbyak::util::Cpu::tBMI2)) {
 			setOp2<N, LBMI2tag, (N * UnitBitSize <= 384), gmpIsFasterThanLLVM>(op);
 		} else
 #endif
@@ -368,9 +379,14 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 	if (mode != FP_XBYAK) return true;
 #ifdef MCL_USE_XBYAK
 	if (op.fg == 0) op.fg = Op::createFpGenerator();
-	bool useXbyak = op.fg->init(op);
+	bool useXbyak = op.fg->init(op, g_cpu);
+#ifdef MCL_USE_VINT
+	const int maxN = 6;
+#else
+	const int maxN = 4;
+#endif
 
-	if (useXbyak && op.isMont && N <= 4) {
+	if (useXbyak && op.isMont && N <= maxN) {
 		op.fp_invOp = &invOpForMontC;
 		initInvTbl(op);
 	}
@@ -420,12 +436,11 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 	if (!isEnableJIT()) {
 		mode = FP_AUTO;
 	}
-#elif MCL_STATIC_CODE
+#elif defined(MCL_STATIC_CODE)
 	{
 		// static jit code uses avx, mulx, adox, adcx
 		using namespace Xbyak::util;
-		Cpu cpu;
-		if (!(cpu.has(Cpu::tAVX) && cpu.has(Cpu::tBMI2) && cpu.has(Cpu::tADX))) {
+		if (!(g_cpu.has(Cpu::tAVX) && g_cpu.has(Cpu::tBMI2) && g_cpu.has(Cpu::tADX))) {
 			mode = FP_AUTO;
 		}
 	}
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 42433682..32130290 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -7,8 +7,6 @@
 	http://opensource.org/licenses/BSD-3-Clause
 */
 #if CYBOZU_HOST == CYBOZU_HOST_INTEL
-#define XBYAK_DISABLE_AVX512
-#include "xbyak/xbyak_util.h"
 
 #if MCL_SIZEOF_UNIT == 8
 #include <stdio.h>
@@ -230,9 +228,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		Ext2(const Ext2&);
 		void operator=(const Ext2&);
 	};
-	Xbyak::util::Cpu cpu_;
-	bool useMulx_;
-	bool  useAdx_;
 	const Reg64& gp0;
 	const Reg64& gp1;
 	const Reg64& gp2;
@@ -257,6 +252,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	int pn_;
 	int FpByte_;
 	bool isFullBit_;
+	bool useMulx_;
+	bool useAdx_;
 #ifdef MCL_DUMP_JIT
 	DumpCode prof_;
 #else
@@ -297,12 +294,18 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		, pn_(0)
 		, FpByte_(0)
 	{
-		useMulx_ = cpu_.has(Xbyak::util::Cpu::tBMI2);
-		useAdx_ = cpu_.has(Xbyak::util::Cpu::tADX);
 	}
-	bool init(Op& op)
+	bool init(Op& op, const Xbyak::util::Cpu& cpu)
 	{
-		if (!cpu_.has(Xbyak::util::Cpu::tAVX)) return false;
+#ifdef MCL_DUMP_JIT
+		useMulx_ = true;
+		useAdx_ = true;
+		(void)cpu;
+#else
+		if (!cpu.has(Xbyak::util::Cpu::tAVX)) return false;
+		useMulx_ = cpu.has(Xbyak::util::Cpu::tBMI2);
+		useAdx_ = cpu.has(Xbyak::util::Cpu::tADX);
+#endif
 		reset(); // reset jit code for reuse
 		setProtectModeRW(); // read/write memory
 		init_inner(op);
diff --git a/test/fp_generator_test.cpp b/test/fp_generator_test.cpp
index 60ec5cd4..39cfa27f 100644
--- a/test/fp_generator_test.cpp
+++ b/test/fp_generator_test.cpp
@@ -1,12 +1,10 @@
 #include <cybozu/test.hpp>
-#if MCL_SIZEOF_UNIT == 4
-// not support
-#else
 #include <mcl/gmp_util.hpp>
 #include <stdint.h>
 #include <string>
 #include <cybozu/itoa.hpp>
 #include <mcl/fp.hpp>
+#include "../src/xbyak/xbyak_util.h"
 #include "../src/fp_generator.hpp"
 #include <iostream>
 #include <cybozu/xorshift.hpp>
@@ -204,4 +202,3 @@ CYBOZU_TEST_AUTO(all)
 		test(primeTable[i]);
 	}
 }
-#endif

From d0e2d6ce15ba22dd2c660e7b88c67fcb7deb939c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 23 Sep 2020 12:34:26 +0900
Subject: [PATCH 306/553] update cybozulib

---
 include/cybozu/atoi.hpp      | 5 ++---
 include/cybozu/exception.hpp | 1 -
 include/cybozu/inttype.hpp   | 7 ++++++-
 include/cybozu/itoa.hpp      | 3 +--
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/cybozu/atoi.hpp b/include/cybozu/atoi.hpp
index a22853a1..3930571c 100644
--- a/include/cybozu/atoi.hpp
+++ b/include/cybozu/atoi.hpp
@@ -7,7 +7,6 @@
 */
 
 #include <memory.h>
-#include <limits.h>
 #include <limits>
 #include <cybozu/exception.hpp>
 
@@ -173,7 +172,7 @@ class atoi {
 	}
 	inline operator int() const
 	{
-		return atoi_local::convertToInt<int>(b_, p_, size_, "2147483648", INT_MIN, 214748364, '8');
+		return atoi_local::convertToInt<int>(b_, p_, size_, "2147483648", /*INT_MIN*/-2147483648, 214748364, '8');
 	}
 	inline operator unsigned int() const
 	{
@@ -181,7 +180,7 @@ class atoi {
 	}
 	inline operator long long() const
 	{
-		return atoi_local::convertToInt<long long>(b_, p_, size_, "9223372036854775808", LLONG_MIN, 922337203685477580LL, '8');
+		return atoi_local::convertToInt<long long>(b_, p_, size_, "9223372036854775808", CYBOZU_LLONG_MIN, 922337203685477580LL, '8');
 	}
 	inline operator unsigned long long() const
 	{
diff --git a/include/cybozu/exception.hpp b/include/cybozu/exception.hpp
index f5b044f6..a9668ba5 100644
--- a/include/cybozu/exception.hpp
+++ b/include/cybozu/exception.hpp
@@ -203,7 +203,6 @@ class ErrorNo {
 	}
 	/**
 		convert NativeErrNo to string(maybe UTF8)
-		@param err [in] errno
 		@note Linux   : same as ConvertErrorNoToString
 			  Windows : for Win32 API(use en-us)
 	*/
diff --git a/include/cybozu/inttype.hpp b/include/cybozu/inttype.hpp
index 62856bdb..f74a1d19 100644
--- a/include/cybozu/inttype.hpp
+++ b/include/cybozu/inttype.hpp
@@ -72,6 +72,9 @@
 	#endif
 #endif
 
+// LLONG_MIN in limits.h is not defined in some env.
+#define CYBOZU_LLONG_MIN (-9223372036854775807ll-1)
+
 #define CYBOZU_CPP_VERSION_CPP03 0
 #define CYBOZU_CPP_VERSION_TR1 1
 #define CYBOZU_CPP_VERSION_CPP11 2
@@ -88,7 +91,7 @@
 	#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP17
 #elif (__cplusplus >= 201402)
 	#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP14
-#elif (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
+#elif (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
 	#if defined(_MSC_VER) && (_MSC_VER <= 1600)
 		#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
 	#else
@@ -150,8 +153,10 @@
 
 #if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
 	#define CYBOZU_NOEXCEPT noexcept
+	#define CYBOZU_NULLPTR nullptr
 #else
 	#define CYBOZU_NOEXCEPT throw()
+	#define CYBOZU_NULLPTR 0
 #endif
 namespace cybozu {
 template<class T>
diff --git a/include/cybozu/itoa.hpp b/include/cybozu/itoa.hpp
index 072e5b8b..96869386 100644
--- a/include/cybozu/itoa.hpp
+++ b/include/cybozu/itoa.hpp
@@ -5,7 +5,6 @@
 
 	@author MITSUNARI Shigeo(@herumi)
 */
-#include <limits.h>
 #ifndef CYBOZU_DONT_USE_STRING
 #include <string>
 #endif
@@ -122,7 +121,7 @@ size_t uintToBin(char *buf, size_t bufSize, UT x)
 template<class T>
 size_t intToDec(char *buf, size_t bufSize, T x)
 {
-	if (x == LLONG_MIN) {
+	if (x == CYBOZU_LLONG_MIN) {
 		const char minStr[] = "-9223372036854775808";
 		const size_t minStrLen = sizeof(minStr) - 1;
 		if (bufSize < minStrLen) {

From 5a2f20f6500e8dbf77942799a131e85a6983d6c1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 23 Sep 2020 13:00:26 +0900
Subject: [PATCH 307/553] fix typo

---
 include/mcl/elgamal.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/elgamal.hpp b/include/mcl/elgamal.hpp
index 9cf29198..58d05712 100644
--- a/include/mcl/elgamal.hpp
+++ b/include/mcl/elgamal.hpp
@@ -386,7 +386,7 @@ struct ElgamalT {
 		create table g^i for i in [rangeMin, rangeMax]
 	*/
 	struct PowerCache {
-#if (CYBOZU_CPP_VERSION > CYBOZU_CPP_VERSION_CP03)
+#if (CYBOZU_CPP_VERSION > CYBOZU_CPP_VERSION_CPP03)
 		typedef CYBOZU_NAMESPACE_STD::unordered_map<Ec, int> Cache;
 #else
 		typedef std::map<Ec, int> Cache;

From 5af07a5522e585df5e71053f038b2813a8358a73 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 23 Sep 2020 14:34:09 +0900
Subject: [PATCH 308/553] update xbyak

---
 src/xbyak/xbyak.h | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/xbyak/xbyak.h b/src/xbyak/xbyak.h
index 67b75f30..481ec9bd 100644
--- a/src/xbyak/xbyak.h
+++ b/src/xbyak/xbyak.h
@@ -108,7 +108,7 @@
 	#endif
 #endif
 
-#if (__cplusplus >= 201103) || (_MSC_VER >= 1800)
+#if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1800)
 	#undef XBYAK_TLS
 	#define XBYAK_TLS thread_local
 	#define XBYAK_VARIADIC_TEMPLATE
@@ -117,7 +117,7 @@
 	#define XBYAK_NOEXCEPT throw()
 #endif
 
-#if (__cplusplus >= 201402L) || (_MSC_VER >= 1910) // Visual Studio 2017 version 15.0
+#if (__cplusplus >= 201402L) || (defined(_MSC_VER) && _MSC_VER >= 1910) // Visual Studio 2017 version 15.0
 	#define XBYAK_CONSTEXPR constexpr // require c++14 or later
 #else
 	#define XBYAK_CONSTEXPR
@@ -267,13 +267,22 @@ inline const char *ConvertErrorToString(int err)
 #ifdef XBYAK_NO_EXCEPTION
 namespace local {
 
-static XBYAK_TLS int l_err = 0;
-inline void SetError(int err) { if (err) l_err = err; } // keep the first err code
+inline int& GetErrorRef() {
+	static XBYAK_TLS int err = 0;
+	return err;
+}
+
+inline void SetError(int err) {
+	if (local::GetErrorRef()) return; // keep the first err code
+	local::GetErrorRef() = err;
+}
 
 } // local
 
-inline void ClearError() { local::l_err = 0; }
-inline int GetError() { return local::l_err; }
+inline void ClearError() {
+	local::GetErrorRef() = 0;
+}
+inline int GetError() { return local::GetErrorRef(); }
 
 #define XBYAK_THROW(err) { local::SetError(err); return; }
 #define XBYAK_THROW_RET(err, r) { local::SetError(err); return r; }

From 251bf1de3bf4a3806b22c432469f2a526d7d3827 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 23 Sep 2020 16:00:46 +0900
Subject: [PATCH 309/553] return true if DUMP_JIT

---
 include/mcl/bn.hpp | 4 ++++
 src/fp.cpp         | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index ab9e15cf..68accfb8 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -885,6 +885,10 @@ struct Param {
 		if (!*pb) return;
 		Fp::init(pb, cp.xi_a, p, mode);
 		if (!*pb) return;
+#ifdef MCL_DUMP_JIT
+		*pb = true;
+		return;
+#endif
 		Fp2::init();
 		const Fp2 xi(cp.xi_a, 1);
 		g2 = Fp2::get_gTbl()[0];
diff --git a/src/fp.cpp b/src/fp.cpp
index 1913559a..d46eb6e9 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -380,6 +380,9 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 #ifdef MCL_USE_XBYAK
 	if (op.fg == 0) op.fg = Op::createFpGenerator();
 	bool useXbyak = op.fg->init(op, g_cpu);
+#ifdef MCL_DUMP_JIT
+	return true;
+#endif
 #ifdef MCL_USE_VINT
 	const int maxN = 6;
 #else

From af1950d8fb7f16b5d21f81fdfa2006fc9ecf6511 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Sep 2020 14:31:28 +0900
Subject: [PATCH 310/553] STATIC_CODE supports mac

---
 Makefile             | 6 +++---
 common.mk            | 3 +++
 src/fp_generator.hpp | 7 ++++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 21f5f150..58e0750e 100644
--- a/Makefile
+++ b/Makefile
@@ -11,11 +11,12 @@ TEST_SRC+=bls12_test.cpp
 TEST_SRC+=mapto_wb19_test.cpp
 TEST_SRC+=ecdsa_c_test.cpp
 TEST_SRC+=modp_test.cpp
+LIB_OBJ=$(OBJ_DIR)/fp.o
 ifeq ($(MCL_STATIC_CODE),1)
   MCL_USE_XBYAK=0
   MCL_MAX_BIT_SIZE=384
   CFLAGS+=-DMCL_STATIC_CODE
-  LIB_OBJ=obj/static_code.o
+  LIB_OBJ+=obj/static_code.o
   TEST_SRC=bls12_test.cpp
 endif
 ifeq ($(CPU),x86-64)
@@ -93,7 +94,6 @@ ifneq ($(CPU),)
   ASM_SRC=$(ASM_SRC_PATH_NAME).s
 endif
 ASM_OBJ=$(OBJ_DIR)/$(CPU).o
-LIB_OBJ+=$(OBJ_DIR)/fp.o
 BN256_OBJ=$(OBJ_DIR)/bn_c256.o
 BN384_OBJ=$(OBJ_DIR)/bn_c384.o
 BN384_256_OBJ=$(OBJ_DIR)/bn_c384_256.o
@@ -253,7 +253,7 @@ src/static_code.asm: src/dump_code
 	$< > $@
 
 obj/static_code.o: src/static_code.asm
-	nasm -felf64 -o $@ $<
+	nasm $(NASM_ELF_OPT) -o $@ $<
 
 bin/static_code_test.exe: test/static_code_test.cpp src/fp.cpp obj/static_code.o
 	$(CXX) -o $@ -O3 $^ -g -DMCL_DONT_USE_XBYAK -DMCL_STATIC_CODE -DMCL_MAX_BIT_SIZE=384 -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -I include -Wall -Wextra
diff --git a/common.mk b/common.mk
index 6f4ed72b..2870ac94 100644
--- a/common.mk
+++ b/common.mk
@@ -1,11 +1,13 @@
 GCC_VER=$(shell $(PRE)$(CC) -dumpversion)
 UNAME_S=$(shell uname -s)
+NASM_ELF_OPT=-felf64
 ifeq ($(UNAME_S),Linux)
   OS=Linux
 endif
 ifeq ($(findstring MINGW64,$(UNAME_S)),MINGW64)
   OS=mingw64
   CFLAGS+=-D__USE_MINGW_ANSI_STDIO=1
+  NASM_ELF_OPT=-fwin64
 endif
 ifeq ($(findstring CYGWIN,$(UNAME_S)),CYGWIN)
   OS=cygwin
@@ -20,6 +22,7 @@ ifeq ($(UNAME_S),Darwin)
   GMP_DIR?=/usr/local/opt/gmp
   CFLAGS+=-I$(GMP_DIR)/include
   LDFLAGS+=-L$(GMP_DIR)/lib
+  NASM_ELF_OPT=-fmacho64
 else
   LIB_SUF=so
 endif
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 32130290..34a6dede 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -65,7 +65,12 @@ void setFuncInfo(DumpCode& prof, const char *suf, const char *name, const T& beg
 {
 	if (suf == 0) suf = "";
 	const uint8_t*p = (const uint8_t*)begin;
-	prof.set(std::string("mclx_") + suf + name, p, end - p);
+#ifdef __APPLE__
+	std::string pre = "_mclx_";
+#else
+	std::string pre = "mclx_";
+#endif
+	prof.set(pre + suf + name, p, end - p);
 }
 #else
 template<class T>

From eb1f0002d0b7c5bd494f0c07e01972b2f705d3c2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 23 Sep 2020 19:58:51 +0900
Subject: [PATCH 311/553] gen.cpp does not require GMP

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 21f5f150..61330cee 100644
--- a/Makefile
+++ b/Makefile
@@ -244,7 +244,7 @@ else
 endif
 
 $(GEN_EXE): src/gen.cpp src/llvm_gen.hpp
-	$(CXX) -o $@ $< $(CFLAGS)
+	$(CXX) -o $@ $< $(CFLAGS) -DMCL_USE_VINT -DMCL_VINT_FIXED_BUFFER
 
 src/dump_code: src/dump_code.cpp src/fp.cpp src/fp_generator.hpp
 	$(CXX) -o $@ src/dump_code.cpp src/fp.cpp -g -I include -DMCL_DUMP_JIT -DMCL_MAX_BIT_SIZE=384 -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER

From c07c4a6b9fcad98028c47a05b36c35f81bbcb701 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 24 Sep 2020 13:48:19 +0900
Subject: [PATCH 312/553] test of binary method of inv

---
 include/mcl/vint.hpp | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/include/mcl/vint.hpp b/include/mcl/vint.hpp
index 13e24e77..13c4483a 100644
--- a/include/mcl/vint.hpp
+++ b/include/mcl/vint.hpp
@@ -1746,6 +1746,46 @@ class VintT {
 	static void invMod(VintT& y, const VintT& x, const VintT& m)
 	{
 		assert(!x.isZero() && !m.isZero());
+#if 0
+		VintT u = x;
+		VintT v = m;
+		VintT x1 = 1, x2 = 0;
+		VintT t;
+		while (u != 1 && v != 1) {
+			while (u.isEven()) {
+				u >>= 1;
+				if (x1.isOdd()) {
+					x1 += m;
+				}
+				x1 >>= 1;
+			}
+			while (v.isEven()) {
+				v >>= 1;
+				if (x2.isOdd()) {
+					x2 += m;
+				}
+				x2 >>= 1;
+			}
+			if (u >= v) {
+				u -= v;
+				x1 -= x2;
+				if (x1 < 0) {
+					x1 += m;
+				}
+			} else {
+				v -= u;
+				x2 -= x1;
+				if (x2 < 0) {
+					x2 += m;
+				}
+			}
+		}
+		if (u == 1) {
+			y = x1;
+		} else {
+			y = x2;
+		}
+#else
 		if (x == 1) {
 			y = 1;
 			return;
@@ -1778,6 +1818,7 @@ class VintT {
 			}
 			b -= a * q;
 		}
+#endif
 	}
 	/*
 		Miller-Rabin

From c456490f7364af9598c4a8ae6e81e3930ed53d4c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 24 Sep 2020 13:48:39 +0900
Subject: [PATCH 313/553] bench of inv

---
 test/bench.hpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/test/bench.hpp b/test/bench.hpp
index f7acfced..09af59e2 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -52,6 +52,12 @@ void benchAddDblG2()
 	CYBOZU_BENCH_C("G2::dbl(2)", C, G2::dbl, P3, P1);
 }
 
+template<class T>
+void invAdd(T& out, const T& x, const T& y)
+{
+	T::inv(out, x);
+	out += y;
+}
 
 void testBench(const G1& P, const G2& Q)
 {
@@ -99,7 +105,7 @@ void testBench(const G1& P, const G2& Q)
 	CYBOZU_BENCH_C("Fp::neg       ", C3, Fp::neg, x, x);
 	CYBOZU_BENCH_C("Fp::mul       ", C3, Fp::mul, x, x, y);
 	CYBOZU_BENCH_C("Fp::sqr       ", C3, Fp::sqr, x, x);
-	CYBOZU_BENCH_C("Fp::inv       ", C3, Fp::inv, x, x);
+	CYBOZU_BENCH_C("Fp::inv       ", C3, invAdd, x, x, y);
 	CYBOZU_BENCH_C("Fp::pow       ", C3, Fp::pow, x, x, y);
 	{
 		Fr a, b, c;
@@ -110,7 +116,7 @@ void testBench(const G1& P, const G2& Q)
 		CYBOZU_BENCH_C("Fr::neg       ", C3, Fr::neg, a, a);
 		CYBOZU_BENCH_C("Fr::mul       ", C3, Fr::mul, a, a, b);
 		CYBOZU_BENCH_C("Fr::sqr       ", C3, Fr::sqr, a, a);
-		CYBOZU_BENCH_C("Fr::inv       ", C3, Fr::inv, a, a);
+		CYBOZU_BENCH_C("Fr::inv       ", C3, invAdd, a, a, b);
 		CYBOZU_BENCH_C("Fr::pow       ", C3, Fr::pow, a, a, b);
 	}
 	Fp2 xx, yy;
@@ -128,7 +134,7 @@ void testBench(const G1& P, const G2& Q)
 	CYBOZU_BENCH_C("Fp2::mul      ", C3, Fp2::mul, xx, xx, yy);
 	CYBOZU_BENCH_C("Fp2::mul_xi   ", C3, Fp2::mul_xi, xx, xx);
 	CYBOZU_BENCH_C("Fp2::sqr      ", C3, Fp2::sqr, xx, xx);
-	CYBOZU_BENCH_C("Fp2::inv      ", C3, Fp2::inv, xx, xx);
+	CYBOZU_BENCH_C("Fp2::inv      ", C3, invAdd, xx, xx, yy);
 	CYBOZU_BENCH_C("FpDbl::addPre ", C3, FpDbl::addPre, d1, d1, d0);
 	CYBOZU_BENCH_C("FpDbl::subPre ", C3, FpDbl::subPre, d1, d1, d0);
 	CYBOZU_BENCH_C("FpDbl::add    ", C3, FpDbl::add, d1, d1, d0);

From fce9599a59d4a1b80d0c8c41e6f46f8bb4295c39 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 29 Sep 2020 15:49:42 +0900
Subject: [PATCH 314/553] change internal millerLoopVecN api

---
 include/mcl/bn.hpp | 23 ++++++++++++++++-------
 test/bn_test.cpp   | 21 ++++++++++++---------
 2 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 68accfb8..1b7c614f 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -1932,8 +1932,15 @@ inline void precomputedMillerLoop2mixed(Fp12& f, const G1& P1, const G2& Q1, con
 }
 #endif
 
+/*
+	e = prod_i ML(Pvec[i], Qvec[i])
+	if initF:
+	  _f = e
+	else:
+	  _f *= e
+*/
 template<size_t N>
-inline void millerLoopVecN(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
+inline void millerLoopVecN(Fp12& _f, const G1* Pvec, const G2* Qvec, size_t n, bool initF)
 {
 	assert(n <= N);
 	G1 P[N];
@@ -1949,11 +1956,13 @@ inline void millerLoopVecN(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
 			}
 		}
 		if (realN <= 0) {
-			f = 1;
+			if (initF) _f = 1;
 			return;
 		}
 		n = realN; // update n
 	}
+	Fp12 ff;
+	Fp12& f(initF ? _f : ff);
 	// all P[] and Q[] are not zero
 	G2 T[N], negQ[N];
 	G1 adjP[N];
@@ -1993,7 +2002,7 @@ inline void millerLoopVecN(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
 	if (BN::param.z < 0) {
 		Fp6::neg(f.b, f.b);
 	}
-	if (BN::param.isBLS12) return;
+	if (BN::param.isBLS12) goto EXIT;
 	for (size_t i = 0; i < n; i++) {
 		if (BN::param.z < 0) {
 			G2::neg(T[i], T[i]);
@@ -2007,6 +2016,8 @@ inline void millerLoopVecN(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
 		mulSparse2(ft, d, e);
 		f *= ft;
 	}
+EXIT:
+	if (!initF) _f *= f;
 }
 /*
 	f = prod_{i=0}^{n-1} millerLoop(Pvec[i], Qvec[i])
@@ -2015,12 +2026,10 @@ inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
 {
 	const size_t N = 16;
 	size_t remain = fp::min_(N, n);
-	millerLoopVecN<N>(f, Pvec, Qvec, remain);
+	millerLoopVecN<N>(f, Pvec, Qvec, remain, true);
 	for (size_t i = remain; i < n; i += N) {
 		remain = fp::min_(n - i, N);
-		Fp12 ft;
-		millerLoopVecN<N>(ft, Pvec + i, Qvec + i, remain);
-		f *= ft;
+		millerLoopVecN<N>(f, Pvec + i, Qvec + i, remain, false);
 	}
 }
 
diff --git a/test/bn_test.cpp b/test/bn_test.cpp
index 1a503c5d..f9a2914f 100644
--- a/test/bn_test.cpp
+++ b/test/bn_test.cpp
@@ -253,7 +253,7 @@ void testMillerLoop2(const G1& P1, const G2& Q1)
 
 void testMillerLoopVec()
 {
-	const size_t n = 8;
+	const size_t n = 40;
 	G1 Pvec[n];
 	G2 Qvec[n];
 	char c = 'a';
@@ -262,15 +262,18 @@ void testMillerLoopVec()
 		hashAndMapToG2(Qvec[i], &c, 1);
 		c++;
 	}
-	Fp12 f1, f2;
-	f1 = 1;
-	for (size_t i = 0; i < n; i++) {
-		Fp12 e;
-		millerLoop(e, Pvec[i], Qvec[i]);
-		f1 *= e;
+	for (size_t m = 0; m < n; m++) {
+		Fp12 f1, f2;
+		f1 = 1;
+		f2.clear();
+		for (size_t i = 0; i < m; i++) {
+			Fp12 e;
+			millerLoop(e, Pvec[i], Qvec[i]);
+			f1 *= e;
+		}
+		millerLoopVec(f2, Pvec, Qvec, m);
+		CYBOZU_TEST_EQUAL(f1, f2);
 	}
-	millerLoopVec(f2, Pvec, Qvec, n);
-	CYBOZU_TEST_EQUAL(f1, f2);
 }
 
 void testPairing(const G1& P, const G2& Q, const char *eStr)

From 02cce0ed478449afd3839ee05aee52a9225129b2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 29 Sep 2020 16:01:27 +0900
Subject: [PATCH 315/553] add initF to millerLoopVec

---
 include/mcl/bn.hpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 1b7c614f..072efab5 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -2020,13 +2020,17 @@ inline void millerLoopVecN(Fp12& _f, const G1* Pvec, const G2* Qvec, size_t n, b
 	if (!initF) _f *= f;
 }
 /*
-	f = prod_{i=0}^{n-1} millerLoop(Pvec[i], Qvec[i])
+	_f = prod_{i=0}^{n-1} millerLoop(Pvec[i], Qvec[i])
+	if initF:
+	  f = _f
+	else:
+	  f *= _f
 */
-inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n)
+inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n, bool initF = true)
 {
 	const size_t N = 16;
 	size_t remain = fp::min_(N, n);
-	millerLoopVecN<N>(f, Pvec, Qvec, remain, true);
+	millerLoopVecN<N>(f, Pvec, Qvec, remain, initF);
 	for (size_t i = remain; i < n; i += N) {
 		remain = fp::min_(n - i, N);
 		millerLoopVecN<N>(f, Pvec + i, Qvec + i, remain, false);

From 512a0abd44ab9123f7bdebaff87587e839af3a2d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 1 Oct 2020 12:23:17 +0900
Subject: [PATCH 316/553] Fp::inv uses generated code

---
 include/mcl/op.hpp     |  2 +-
 src/fp.cpp             | 28 +++++++++++++++-------------
 src/fp_generator.hpp   |  4 ++--
 src/fp_static_code.hpp |  2 ++
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 22a78b18..e8f572ff 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -14,7 +14,7 @@
 	#define MCL_DONT_USE_XBYAK
 	#define MCL_DONT_USE_OPENSSL
 #endif
-#if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8)
+#if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8) && !defined(MCL_STATIC_CODE)
 	#define MCL_USE_XBYAK
 #endif
 #if defined(MCL_USE_XBYAK) || defined(MCL_STATIC_CODE)
diff --git a/src/fp.cpp b/src/fp.cpp
index d46eb6e9..442ae43e 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -377,29 +377,31 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 	}
 	op.rp = getMontgomeryCoeff(p[0]);
 	if (mode != FP_XBYAK) return true;
+
+#ifdef MCL_USE_VINT
+	const int maxInvN = 6;
+#else
+	const int maxInvN = 4;
+#endif
+
+#ifdef MCL_X64_ASM
+
 #ifdef MCL_USE_XBYAK
 	if (op.fg == 0) op.fg = Op::createFpGenerator();
-	bool useXbyak = op.fg->init(op, g_cpu);
+	bool enableInv = op.fg->init(op, g_cpu);
 #ifdef MCL_DUMP_JIT
 	return true;
 #endif
-#ifdef MCL_USE_VINT
-	const int maxN = 6;
-#else
-	const int maxN = 4;
-#endif
-
-	if (useXbyak && op.isMont && N <= maxN) {
-		op.fp_invOp = &invOpForMontC;
-		initInvTbl(op);
-	}
 #elif defined(MCL_STATIC_CODE)
 	fp::setStaticCode(op);
-	if (op.isMont && N <= 4) {
+	bool enableInv = true;
+#endif // MCL_USE_XBYAK
+
+	if (enableInv && op.isMont && N <= maxInvN) {
 		op.fp_invOp = &invOpForMontC;
 		initInvTbl(op);
 	}
-#endif
+#endif // MCL_X64_ASM
 	return true;
 }
 
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 34a6dede..8002a9a0 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -393,7 +393,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		op.fp_sqrA_ = gen_sqr();
 		setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr());
 
-		if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4
+		if (op.primeMode != PM_NIST_P192 && op.N <= 6) { // support general op.N but not fast for op.N > 4
 			align(16);
 			op.fp_preInv = getCurr<int2u>();
 			gen_preInv();
@@ -2676,7 +2676,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void gen_preInv()
 	{
-		assert(1 <= pn_ && pn_ <= 4);
+		assert(1 <= pn_ && pn_ <= 6);
 		const int freeRegNum = 13;
 		StackFrame sf(this, 2, 10 | UseRDX | UseRCX, (std::max<int>(0, pn_ * 5 - freeRegNum) + 1 + (isFullBit_ ? 1 : 0)) * 8);
 		const Reg64& pr = sf.p[0];
diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
index 832062e4..09d4d01d 100644
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@@ -25,6 +25,7 @@ void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
 void mclx_Fp_sqr(Unit*, const Unit*);
 void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
+int mclx_Fp_preInv(Unit*, const Unit*);
 Unit mclx_FpDbl_addPre(Unit*, const Unit*, const Unit*);
 Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*);
@@ -73,6 +74,7 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fp2_mulA_ = mclx_Fp2_mul;
 		op.fp2_sqrA_ = mclx_Fp2_sqr;
 		op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
+		op.fp_preInv = mclx_Fp_preInv;
 	} else {
 		// Fr, sizeof(Fr) = 32
 		op.fp_addPre = mclx_Fr_addPre;

From 0e64636878ea411c7fa0bd750a3b03866cd92c21 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 1 Oct 2020 12:24:52 +0900
Subject: [PATCH 317/553] move MCL_STATIC_CODE flag to common.mk

---
 Makefile  | 3 ---
 common.mk | 5 +++++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 92377dbc..2edb3b48 100644
--- a/Makefile
+++ b/Makefile
@@ -13,9 +13,6 @@ TEST_SRC+=ecdsa_c_test.cpp
 TEST_SRC+=modp_test.cpp
 LIB_OBJ=$(OBJ_DIR)/fp.o
 ifeq ($(MCL_STATIC_CODE),1)
-  MCL_USE_XBYAK=0
-  MCL_MAX_BIT_SIZE=384
-  CFLAGS+=-DMCL_STATIC_CODE
   LIB_OBJ+=obj/static_code.o
   TEST_SRC=bls12_test.cpp
 endif
diff --git a/common.mk b/common.mk
index 2870ac94..ec7d0e65 100644
--- a/common.mk
+++ b/common.mk
@@ -127,6 +127,11 @@ endif
 ifeq ($(MCL_USE_OPENSSL),1)
   OPENSSL_LIB=-lcrypto
 endif
+ifeq ($(MCL_STATIC_CODE),1)
+  MCL_USE_XBYAK=0
+  MCL_MAX_BIT_SIZE=384
+  CFLAGS+=-DMCL_STATIC_CODE
+endif
 LDFLAGS+=$(GMP_LIB) $(OPENSSL_LIB) $(BIT_OPT) $(LDFLAGS_USER)
 
 CFLAGS+=-fPIC

From 7f86eafef4ee928665c0374b1d47f91af249f44f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Laurent=20Gr=C3=A9my?= <lgremy@quarkslab.com>
Date: Fri, 2 Oct 2020 17:06:16 +0200
Subject: [PATCH 318/553] Add test vectors in section H.10.1 of
 draft-irtf-cfrg-hash-to-curve-09.

---
 test/mapto_wb19_test.cpp | 49 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index e7bee9eb..534919a0 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -362,6 +362,55 @@ void testHashToFp2v7(const T& mapto)
 					"0x12424ac32561493f3fe3c260708a12b7c620e7be00099a974e259ddc7d1f6395c3c811cdd19f1e8dbf3e9ecfdcbab8d6",
 				}
 			},
+			// https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.html#name-bls12381g2_xmdsha-256_sswu_
+			{
+				"abc", // msg
+        "QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
+				{ // P.x
+					"0x02c2d18e033b960562aae3cab37a27ce00d80ccd5ba4b7fe0e7a210245129dbec7780ccc7954725f4168aff2787776e6",
+					"0x139cddbccdc5e91b9623efd38c49f81a6f83f175e80b06fc374de9eb4b41dfe4ca3a230ed250fbe3a2acf73a41177fd8",
+				},
+				{ // P.y
+					"0x1787327b68159716a37440985269cf584bcb1e621d3a7202be6ea05c4cfe244aeb197642555a0645fb87bf7466b2ba48",
+					"0x00aa65dae3c8d732d10ecd2c50f8a1baf3001578f71c694e03866e9f3d49ac1e1ce70dd94a733534f106d4cec0eddd16",
+				}
+			},
+			{
+				"abcdef0123456789", // msg
+        "QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
+				{ // P.x
+					"0x121982811d2491fde9ba7ed31ef9ca474f0e1501297f68c298e9f4c0028add35aea8bb83d53c08cfc007c1e005723cd0",
+					"0x190d119345b94fbd15497bcba94ecf7db2cbfd1e1fe7da034d26cbba169fb3968288b3fafb265f9ebd380512a71c3f2c",
+				},
+				{ // P.y
+					"0x05571a0f8d3c08d094576981f4a3b8eda0a8e771fcdcc8ecceaf1356a6acf17574518acb506e435b639353c2e14827c8",
+					"0x0bb5e7572275c567462d91807de765611490205a941a5a6af3b1691bfe596c31225d3aabdf15faff860cb4ef17c7c3be",
+				}
+			},
+			{
+				"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq", // msg
+        "QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
+				{ // P.x
+					"0x19a84dd7248a1066f737cc34502ee5555bd3c19f2ecdb3c7d9e24dc65d4e25e50d83f0f77105e955d78f4762d33c17da",
+					"0x0934aba516a52d8ae479939a91998299c76d39cc0c035cd18813bec433f587e2d7a4fef038260eef0cef4d02aae3eb91",
+				},
+				{ // P.y
+					"0x14f81cd421617428bc3b9fe25afbb751d934a00493524bc4e065635b0555084dd54679df1536101b2c979c0152d09192",
+					"0x09bcccfa036b4847c9950780733633f13619994394c23ff0b32fa6b795844f4a0673e20282d07bc69641cee04f5e5662",
+				}
+			},
+			{
+				"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // msg
+        "QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
+				{ // P.x
+					"0x01a6ba2f9a11fa5598b2d8ace0fbe0a0eacb65deceb476fbbcb64fd24557c2f4b18ecfc5663e54ae16a84f5ab7f62534",
+					"0x11fca2ff525572795a801eed17eb12785887c7b63fb77a42be46ce4a34131d71f7a73e95fee3f812aea3de78b4d01569",
+				},
+				{ // P.y
+					"0x0b6798718c8aed24bc19cb27f866f1c9effcdbf92397ad6448b5c9db90d2b9da6cbabf48adc1adf59a1a28344e79d57e",
+					"0x03a47f8e6d1763ba0cad63d6114c0accbef65707825a511b251a660a9b3994249ae4e63fac38b23da0c398689ee2ab52",
+				}
+			},
 		};
 		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 			const char *msg = tbl[i].msg;

From 81f624864561b636b85f6c8d40f96a1fa20f000b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Laurent=20Gr=C3=A9my?= <lgremy@quarkslab.com>
Date: Fri, 2 Oct 2020 17:08:13 +0200
Subject: [PATCH 319/553] Add test vectors in section I.1. of
 draft-irtf-cfrg-hash-to-curve-09.

---
 test/mapto_wb19_test.cpp | 91 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 534919a0..ec005f2a 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -293,6 +293,97 @@ void testHashToFp2v7(const T& mapto)
 		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
 		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
 	}
+	// Test coming from https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-09#appendix-I.1
+	{
+		char msg[] = "";
+		char dst[] = "QUUX-V01-CS02-with-expander";
+		char expect[] = "f659819a6473c1835b25ea59e3d38914c98b374f0970b7e4c92181df928fca88";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[32];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
+	{
+		char msg[] = "abc";
+		char dst[] = "QUUX-V01-CS02-with-expander";
+		char expect[] = "1c38f7c211ef233367b2420d04798fa4698080a8901021a795a1151775fe4da7";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[32];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
+	{
+		char msg[] = "abcdef0123456789";
+		char dst[] = "QUUX-V01-CS02-with-expander";
+		char expect[] = "8f7e7b66791f0da0dbb5ec7c22ec637f79758c0a48170bfb7c4611bd304ece89";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[32];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
+	{
+		char msg[] = "q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq";
+		char dst[] = "QUUX-V01-CS02-with-expander";
+		char expect[] = "72d5aa5ec810370d1f0013c0df2f1d65699494ee2a39f72e1716b1b964e1c642";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[32];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
+	{
+		char msg[] = "";
+		char dst[] = "QUUX-V01-CS02-with-expander";
+		char expect[] = "8bcffd1a3cae24cf9cd7ab85628fd111bb17e3739d3b53f89580d217aa79526f1708354a76a402d3569d6a9d19ef3de4d0b991e4f54b9f20dcde9b95a66824cbdf6c1a963a1913d43fd7ac443a02fc5d9d8d77e2071b86ab114a9f34150954a7531da568a1ea8c760861c0cde2005afc2c114042ee7b5848f5303f0611cf297f";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[128];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
+	{
+		char msg[] = "abc";
+		char dst[] = "QUUX-V01-CS02-with-expander";
+		char expect[] = "fe994ec51bdaa821598047b3121c149b364b178606d5e72bfbb713933acc29c186f316baecf7ea22212f2496ef3f785a27e84a40d8b299cec56032763eceeff4c61bd1fe65ed81decafff4a31d0198619c0aa0c6c51fca15520789925e813dcfd318b542f8799441271f4db9ee3b8092a7a2e8d5b75b73e28fb1ab6b4573c192";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[128];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
+	{
+		char msg[] = "abcdef0123456789";
+		char dst[] = "QUUX-V01-CS02-with-expander";
+		char expect[] = "c9ec7941811b1e19ce98e21db28d22259354d4d0643e301175e2f474e030d32694e9dd5520dde93f3600d8edad94e5c364903088a7228cc9eff685d7eaac50d5a5a8229d083b51de4ccc3733917f4b9535a819b445814890b7029b5de805bf62b33a4dc7e24acdf2c924e9fe50d55a6b832c8c84c7f82474b34e48c6d43867be";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[128];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
+	{
+		char msg[] = "q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq";
+		char dst[] = "QUUX-V01-CS02-with-expander";
+		char expect[] = "48e256ddba722053ba462b2b93351fc966026e6d6db493189798181c5f3feea377b5a6f1d8368d7453faef715f9aecb078cd402cbd548c0e179c4ed1e4c7e5b048e0a39d31817b5b24f50db58bb3720fe96ba53db947842120a068816ac05c159bb5266c63658b4f000cbf87b1209a225def8ef1dca917bcda79a1e42acd8069";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[128];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
+	{
+		char msg[] = "a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+		char dst[] = "QUUX-V01-CS02-with-expander";
+		char expect[] = "396962db47f749ec3b5042ce2452b619607f27fd3939ece2746a7614fb83a1d097f554df3927b084e55de92c7871430d6b95c2a13896d8a33bc48587b1f66d21b128a1a8240d5b0c26dfe795a1a842a0807bb148b77c2ef82ed4b6c9f7fcb732e7f94466c8b51e52bf378fba044a31f5cb44583a892f5969dcd73b3fa128816e";
+		size_t msgSize = strlen(msg);
+		size_t dstSize = strlen(dst);
+		uint8_t md[128];
+		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
+		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+	}
 	{
 		const struct {
 			const char *msg;

From 6427215b98e7f431f23344d34eaef7de52a1a308 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 7 Oct 2020 12:17:22 +0900
Subject: [PATCH 320/553] remove warning for mcl-wasm

---
 src/fp.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index 442ae43e..710869ac 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -378,14 +378,14 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 	op.rp = getMontgomeryCoeff(p[0]);
 	if (mode != FP_XBYAK) return true;
 
+#ifdef MCL_X64_ASM
+
 #ifdef MCL_USE_VINT
 	const int maxInvN = 6;
 #else
 	const int maxInvN = 4;
 #endif
 
-#ifdef MCL_X64_ASM
-
 #ifdef MCL_USE_XBYAK
 	if (op.fg == 0) op.fg = Op::createFpGenerator();
 	bool enableInv = op.fg->init(op, g_cpu);

From 0114a3029f74829e79dc51de6dfb28f5da580632 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 7 Oct 2020 12:17:45 +0900
Subject: [PATCH 321/553] version 1.23

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index e8f572ff..6da18c0c 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x123; /* 0xABC = A.BC */
+static const int version = 0x124; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From a3cb8ff42172cd730e834a8ad56c2d35e5f45c9d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 8 Oct 2020 14:11:46 +0900
Subject: [PATCH 322/553] remove warning by -Wundef

---
 include/cybozu/inttype.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/cybozu/inttype.hpp b/include/cybozu/inttype.hpp
index f74a1d19..b6f99afb 100644
--- a/include/cybozu/inttype.hpp
+++ b/include/cybozu/inttype.hpp
@@ -97,7 +97,7 @@
 	#else
 		#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP11
 	#endif
-#elif CYBOZU_GNUC_PREREQ(4, 5) || (CYBOZU_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || (__clang_major__ >= 3)
+#elif CYBOZU_GNUC_PREREQ(4, 5) || (CYBOZU_GNUC_PREREQ(4, 2) && (defined(__GLIBCXX__) &&__GLIBCXX__ >= 20070719)) || defined(__INTEL_COMPILER) || (__clang_major__ >= 3)
 	#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
 #else
 	#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP03

From e1efcbe55f63ceaa491f2e3df96ff60665fc7046 Mon Sep 17 00:00:00 2001
From: raphael <rheitjoh@mail.uni-paderborn.de>
Date: Mon, 12 Oct 2020 12:24:42 +0200
Subject: [PATCH 323/553] Add GT inv to java ffi

---
 ffi/java/com/herumi/mcl/Fp.java           |   3 +-
 ffi/java/com/herumi/mcl/Fr.java           |   3 +-
 ffi/java/com/herumi/mcl/G1.java           |   3 +-
 ffi/java/com/herumi/mcl/G2.java           |   3 +-
 ffi/java/com/herumi/mcl/GT.java           |   3 +-
 ffi/java/com/herumi/mcl/Mcl.java          |   7 +-
 ffi/java/com/herumi/mcl/MclConstants.java |   3 +-
 ffi/java/com/herumi/mcl/MclJNI.java       |   4 +-
 ffi/java/mcl_impl.hpp                     |   5 +
 ffi/java/mcl_wrap.cxx                     | 224 ++++++++++++----------
 10 files changed, 142 insertions(+), 116 deletions(-)

diff --git a/ffi/java/com/herumi/mcl/Fp.java b/ffi/java/com/herumi/mcl/Fp.java
index bac8b549..b9b6acb7 100644
--- a/ffi/java/com/herumi/mcl/Fp.java
+++ b/ffi/java/com/herumi/mcl/Fp.java
@@ -1,12 +1,11 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
-package com.herumi.mcl;
 
 public class Fp {
   private transient long swigCPtr;
diff --git a/ffi/java/com/herumi/mcl/Fr.java b/ffi/java/com/herumi/mcl/Fr.java
index 94656c62..1436a360 100644
--- a/ffi/java/com/herumi/mcl/Fr.java
+++ b/ffi/java/com/herumi/mcl/Fr.java
@@ -1,12 +1,11 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
-package com.herumi.mcl;
 
 public class Fr {
   private transient long swigCPtr;
diff --git a/ffi/java/com/herumi/mcl/G1.java b/ffi/java/com/herumi/mcl/G1.java
index 2492a3b5..58464177 100644
--- a/ffi/java/com/herumi/mcl/G1.java
+++ b/ffi/java/com/herumi/mcl/G1.java
@@ -1,12 +1,11 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
-package com.herumi.mcl;
 
 public class G1 {
   private transient long swigCPtr;
diff --git a/ffi/java/com/herumi/mcl/G2.java b/ffi/java/com/herumi/mcl/G2.java
index 5bd7e664..5dfb9fe6 100644
--- a/ffi/java/com/herumi/mcl/G2.java
+++ b/ffi/java/com/herumi/mcl/G2.java
@@ -1,12 +1,11 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
-package com.herumi.mcl;
 
 public class G2 {
   private transient long swigCPtr;
diff --git a/ffi/java/com/herumi/mcl/GT.java b/ffi/java/com/herumi/mcl/GT.java
index 3aefca68..a5ae08ea 100644
--- a/ffi/java/com/herumi/mcl/GT.java
+++ b/ffi/java/com/herumi/mcl/GT.java
@@ -1,12 +1,11 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
-package com.herumi.mcl;
 
 public class GT {
   private transient long swigCPtr;
diff --git a/ffi/java/com/herumi/mcl/Mcl.java b/ffi/java/com/herumi/mcl/Mcl.java
index c292c1ea..1bccc916 100644
--- a/ffi/java/com/herumi/mcl/Mcl.java
+++ b/ffi/java/com/herumi/mcl/Mcl.java
@@ -1,12 +1,11 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
-package com.herumi.mcl;
 
 public class Mcl implements MclConstants {
   public static void SystemInit(int curveType) {
@@ -113,4 +112,8 @@ public static void mul(GT z, GT x, GT y) {
     MclJNI.mul__SWIG_4(GT.getCPtr(z), z, GT.getCPtr(x), x, GT.getCPtr(y), y);
   }
 
+  public static void inv(GT y, GT x) {
+    MclJNI.inv(GT.getCPtr(y), y, GT.getCPtr(x), x);
+  }
+
 }
diff --git a/ffi/java/com/herumi/mcl/MclConstants.java b/ffi/java/com/herumi/mcl/MclConstants.java
index 6d31b200..376f1422 100644
--- a/ffi/java/com/herumi/mcl/MclConstants.java
+++ b/ffi/java/com/herumi/mcl/MclConstants.java
@@ -1,12 +1,11 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
-package com.herumi.mcl;
 
 public interface MclConstants {
   public final static int BN254 = 0;
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
index ccb56a44..b6928fc2 100644
--- a/ffi/java/com/herumi/mcl/MclJNI.java
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -1,12 +1,11 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
-package com.herumi.mcl;
 
 public class MclJNI {
   public final static native void SystemInit(int jarg1);
@@ -93,6 +92,7 @@ public class MclJNI {
   public final static native byte[] G2_serialize(long jarg1, G2 jarg1_);
   public final static native void delete_G2(long jarg1);
   public final static native void mul__SWIG_4(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, GT jarg3_);
+  public final static native void inv(long jarg1, GT jarg1_, long jarg2, GT jarg2_);
   public final static native long new_GT__SWIG_0();
   public final static native long new_GT__SWIG_1(long jarg1, GT jarg1_);
   public final static native boolean GT_equals(long jarg1, GT jarg1_, long jarg2, GT jarg2_);
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index 8eb23737..1c206d32 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -335,6 +335,7 @@ void mul(G2& z, const G2& x, const Fr& y)
 class GT {
 	mcl::bn::Fp12 self_;
 	friend void mul(GT& z, const GT& x, const GT& y);
+	friend void inv(GT& y, GT& x);
 	friend void pow(GT& z, const GT& x, const Fr& y);
 	friend void pairing(GT& e, const G1& P, const G2& Q);
 public:
@@ -367,6 +368,10 @@ void mul(GT& z, const GT& x, const GT& y)
 {
 	mcl::bn::Fp12::mul(z.self_, x.self_, y.self_);
 }
+void inv(GT& y, GT& x) 
+{
+	mcl::bn::inv(y.self_, x.self_);
+}
 void pow(GT& z, const GT& x, const Fr& y)
 {
 	mcl::bn::Fp12::pow(z.self_, x.self_, y.self_);
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index 6446e7f8..753cf59c 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * This file is not intended to be easily readable and contains a number of
  * coding conventions designed to improve portability and efficiency. Do not make
@@ -182,15 +182,16 @@ template <typename T> T SwigValueInit() {
 
 /* Support for throwing Java exceptions */
 typedef enum {
-  SWIG_JavaOutOfMemoryError = 1, 
-  SWIG_JavaIOException, 
-  SWIG_JavaRuntimeException, 
+  SWIG_JavaOutOfMemoryError = 1,
+  SWIG_JavaIOException,
+  SWIG_JavaRuntimeException,
   SWIG_JavaIndexOutOfBoundsException,
   SWIG_JavaArithmeticException,
   SWIG_JavaIllegalArgumentException,
   SWIG_JavaNullPointerException,
   SWIG_JavaDirectorPureVirtual,
-  SWIG_JavaUnknownError
+  SWIG_JavaUnknownError,
+  SWIG_JavaIllegalStateException,
 } SWIG_JavaExceptionCodes;
 
 typedef struct {
@@ -211,6 +212,7 @@ static void SWIGUNUSED SWIG_JavaThrowException(JNIEnv *jenv, SWIG_JavaExceptionC
     { SWIG_JavaNullPointerException, "java/lang/NullPointerException" },
     { SWIG_JavaDirectorPureVirtual, "java/lang/RuntimeException" },
     { SWIG_JavaUnknownError,  "java/lang/UnknownError" },
+    { SWIG_JavaIllegalStateException, "java/lang/IllegalStateException" },
     { (SWIG_JavaExceptionCodes)0,  "java/lang/UnknownError" }
   };
   const SWIG_JavaExceptions_t *except_ptr = java_exceptions;
@@ -247,7 +249,7 @@ static void SWIGUNUSED SWIG_JavaThrowException(JNIEnv *jenv, SWIG_JavaExceptionC
 extern "C" {
 #endif
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_SystemInit(JNIEnv *jenv, jclass jcls, jint jarg1) {
+SWIGEXPORT void JNICALL Java_MclJNI_SystemInit(JNIEnv *jenv, jclass jcls, jint jarg1) {
   int arg1 ;
   
   (void)jenv;
@@ -262,7 +264,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_SystemInit(JNIEnv *jenv, jcla
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
   
@@ -284,7 +286,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_10(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -313,7 +315,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_10(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -342,7 +344,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_10(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -371,7 +373,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_10(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = 0 ;
   G1 *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -400,7 +402,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_11(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G2 *arg1 = 0 ;
   G2 *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -429,7 +431,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_12(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_div_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -458,7 +460,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_10(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pow(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_pow(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   GT *arg1 = 0 ;
   GT *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -487,7 +489,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pow(JNIEnv *jenv, jclass jcls
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   Fr *result = 0 ;
   
@@ -499,7 +501,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_10(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jlong jresult = 0 ;
   Fr *arg1 = 0 ;
   Fr *result = 0 ;
@@ -518,7 +520,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_11(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
   jlong jresult = 0 ;
   int arg1 ;
   Fr *result = 0 ;
@@ -532,7 +534,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_12(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1, jint jarg2) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1, jint jarg2) {
   jlong jresult = 0 ;
   std::string *arg1 = 0 ;
   int arg2 ;
@@ -561,7 +563,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_13(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jstring jarg1) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jstring jarg1) {
   jlong jresult = 0 ;
   std::string *arg1 = 0 ;
   Fr *result = 0 ;
@@ -588,7 +590,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_14(JNIEnv *j
 }
 
 
-SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fr_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jboolean JNICALL Java_MclJNI_Fr_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jboolean jresult = 0 ;
   Fr *arg1 = (Fr *) 0 ;
   Fr *arg2 = 0 ;
@@ -610,7 +612,7 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fr_1equals(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   Fr *arg1 = (Fr *) 0 ;
   std::string *arg2 = 0 ;
   int arg3 ;
@@ -638,7 +640,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_10(JNIEnv
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
   Fr *arg1 = (Fr *) 0 ;
   std::string *arg2 = 0 ;
   
@@ -664,7 +666,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_11(JNIEnv
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   Fr *arg1 = (Fr *) 0 ;
   int arg2 ;
   
@@ -677,7 +679,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setInt(JNIEnv *jenv, jcla
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fr_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   Fr *arg1 = (Fr *) 0 ;
   
   (void)jenv;
@@ -688,7 +690,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1clear(JNIEnv *jenv, jclas
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setByCSPRNG(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setByCSPRNG(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   Fr *arg1 = (Fr *) 0 ;
   
   (void)jenv;
@@ -699,7 +701,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setByCSPRNG(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fr_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT jstring JNICALL Java_MclJNI_Fr_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   jstring jresult = 0 ;
   Fr *arg1 = (Fr *) 0 ;
   int arg2 ;
@@ -721,7 +723,7 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fr_1toString_1_1SWIG_10(JN
 }
 
 
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fr_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jstring JNICALL Java_MclJNI_Fr_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jstring jresult = 0 ;
   Fr *arg1 = (Fr *) 0 ;
   std::string result;
@@ -741,7 +743,7 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fr_1toString_1_1SWIG_11(JN
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fr_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   Fr *arg1 = (Fr *) 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -772,7 +774,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1deserialize(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fr_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jbyteArray JNICALL Java_MclJNI_Fr_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   Fr *arg1 = (Fr *) 0 ;
   std::string *arg2 = 0 ;
@@ -797,7 +799,7 @@ SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fr_1serialize(JNIEnv *j
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1Fr(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+SWIGEXPORT void JNICALL Java_MclJNI_delete_1Fr(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   Fr *arg1 = (Fr *) 0 ;
   
   (void)jenv;
@@ -807,7 +809,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1Fr(JNIEnv *jenv, jcla
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
   
@@ -829,7 +831,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_11(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -858,7 +860,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_11(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -887,7 +889,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_11(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -916,7 +918,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_13(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_div_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -945,7 +947,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_11(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   Fp *result = 0 ;
   
@@ -957,7 +959,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_10(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jlong jresult = 0 ;
   Fp *arg1 = 0 ;
   Fp *result = 0 ;
@@ -976,7 +978,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_11(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
   jlong jresult = 0 ;
   int arg1 ;
   Fp *result = 0 ;
@@ -990,7 +992,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_12(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1, jint jarg2) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1, jint jarg2) {
   jlong jresult = 0 ;
   std::string *arg1 = 0 ;
   int arg2 ;
@@ -1019,7 +1021,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_13(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jstring jarg1) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jstring jarg1) {
   jlong jresult = 0 ;
   std::string *arg1 = 0 ;
   Fp *result = 0 ;
@@ -1046,7 +1048,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_14(JNIEnv *j
 }
 
 
-SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fp_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jboolean JNICALL Java_MclJNI_Fp_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jboolean jresult = 0 ;
   Fp *arg1 = (Fp *) 0 ;
   Fp *arg2 = 0 ;
@@ -1068,7 +1070,7 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fp_1equals(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   Fp *arg1 = (Fp *) 0 ;
   std::string *arg2 = 0 ;
   int arg3 ;
@@ -1096,7 +1098,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_10(JNIEnv
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
   Fp *arg1 = (Fp *) 0 ;
   std::string *arg2 = 0 ;
   
@@ -1122,7 +1124,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_11(JNIEnv
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   Fp *arg1 = (Fp *) 0 ;
   int arg2 ;
   
@@ -1135,7 +1137,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setInt(JNIEnv *jenv, jcla
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fp_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   Fp *arg1 = (Fp *) 0 ;
   
   (void)jenv;
@@ -1146,7 +1148,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1clear(JNIEnv *jenv, jclas
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setByCSPRNG(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setByCSPRNG(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   Fp *arg1 = (Fp *) 0 ;
   
   (void)jenv;
@@ -1157,7 +1159,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setByCSPRNG(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fp_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT jstring JNICALL Java_MclJNI_Fp_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   jstring jresult = 0 ;
   Fp *arg1 = (Fp *) 0 ;
   int arg2 ;
@@ -1179,7 +1181,7 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fp_1toString_1_1SWIG_10(JN
 }
 
 
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fp_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jstring JNICALL Java_MclJNI_Fp_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jstring jresult = 0 ;
   Fp *arg1 = (Fp *) 0 ;
   std::string result;
@@ -1199,7 +1201,7 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fp_1toString_1_1SWIG_11(JN
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_Fp_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   Fp *arg1 = (Fp *) 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -1230,7 +1232,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1deserialize(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fp_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jbyteArray JNICALL Java_MclJNI_Fp_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   Fp *arg1 = (Fp *) 0 ;
   std::string *arg2 = 0 ;
@@ -1255,7 +1257,7 @@ SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fp_1serialize(JNIEnv *j
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1Fp(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+SWIGEXPORT void JNICALL Java_MclJNI_delete_1Fp(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   Fp *arg1 = (Fp *) 0 ;
   
   (void)jenv;
@@ -1265,7 +1267,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1Fp(JNIEnv *jenv, jcla
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   G1 *arg1 = 0 ;
   G1 *arg2 = 0 ;
   
@@ -1287,7 +1289,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_12(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_dbl_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_MclJNI_dbl_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   G1 *arg1 = 0 ;
   G1 *arg2 = 0 ;
   
@@ -1309,7 +1311,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_dbl_1_1SWIG_10(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = 0 ;
   G1 *arg2 = 0 ;
   G1 *arg3 = 0 ;
@@ -1338,7 +1340,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_12(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = 0 ;
   G1 *arg2 = 0 ;
   G1 *arg3 = 0 ;
@@ -1367,7 +1369,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_12(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pairing(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_pairing(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   GT *arg1 = 0 ;
   G1 *arg2 = 0 ;
   G2 *arg3 = 0 ;
@@ -1396,7 +1398,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pairing(JNIEnv *jenv, jclass
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_hashAndMapToG1(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_hashAndMapToG1(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   G1 *arg1 = 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -1431,7 +1433,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_hashAndMapToG1(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G1_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   G1 *result = 0 ;
   
@@ -1443,7 +1445,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_10(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G1_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jlong jresult = 0 ;
   G1 *arg1 = 0 ;
   G1 *result = 0 ;
@@ -1462,7 +1464,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_11(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G1_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jlong jresult = 0 ;
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
@@ -1493,7 +1495,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_12(JNIEnv *j
 }
 
 
-SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jboolean JNICALL Java_MclJNI_G1_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jboolean jresult = 0 ;
   G1 *arg1 = (G1 *) 0 ;
   G1 *arg2 = 0 ;
@@ -1515,7 +1517,7 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1equals(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = (G1 *) 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -1545,7 +1547,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_MclJNI_G1_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   G1 *arg1 = (G1 *) 0 ;
   
   (void)jenv;
@@ -1556,7 +1558,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1clear(JNIEnv *jenv, jclas
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+SWIGEXPORT void JNICALL Java_MclJNI_G1_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   G1 *arg1 = (G1 *) 0 ;
   std::string *arg2 = 0 ;
   int arg3 ;
@@ -1584,7 +1586,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1setStr_1_1SWIG_10(JNIEnv
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_G1_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
   G1 *arg1 = (G1 *) 0 ;
   std::string *arg2 = 0 ;
   
@@ -1610,7 +1612,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1setStr_1_1SWIG_11(JNIEnv
 }
 
 
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G1_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT jstring JNICALL Java_MclJNI_G1_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   jstring jresult = 0 ;
   G1 *arg1 = (G1 *) 0 ;
   int arg2 ;
@@ -1632,7 +1634,7 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G1_1toString_1_1SWIG_10(JN
 }
 
 
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G1_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jstring JNICALL Java_MclJNI_G1_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jstring jresult = 0 ;
   G1 *arg1 = (G1 *) 0 ;
   std::string result;
@@ -1652,7 +1654,7 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G1_1toString_1_1SWIG_11(JN
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_G1_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   G1 *arg1 = (G1 *) 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -1683,7 +1685,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1deserialize(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_G1_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jbyteArray JNICALL Java_MclJNI_G1_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   G1 *arg1 = (G1 *) 0 ;
   std::string *arg2 = 0 ;
@@ -1708,7 +1710,7 @@ SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_G1_1serialize(JNIEnv *j
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G1(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+SWIGEXPORT void JNICALL Java_MclJNI_delete_1G1(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   G1 *arg1 = (G1 *) 0 ;
   
   (void)jenv;
@@ -1718,7 +1720,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G1(JNIEnv *jenv, jcla
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   G2 *arg1 = 0 ;
   G2 *arg2 = 0 ;
   
@@ -1740,7 +1742,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_13(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_dbl_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_MclJNI_dbl_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   G2 *arg1 = 0 ;
   G2 *arg2 = 0 ;
   
@@ -1762,7 +1764,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_dbl_1_1SWIG_11(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G2 *arg1 = 0 ;
   G2 *arg2 = 0 ;
   G2 *arg3 = 0 ;
@@ -1791,7 +1793,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_13(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G2 *arg1 = 0 ;
   G2 *arg2 = 0 ;
   G2 *arg3 = 0 ;
@@ -1820,7 +1822,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_13(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_hashAndMapToG2(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_hashAndMapToG2(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   G2 *arg1 = 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -1855,7 +1857,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_hashAndMapToG2(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G2_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   G2 *result = 0 ;
   
@@ -1867,7 +1869,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_10(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G2_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jlong jresult = 0 ;
   G2 *arg1 = 0 ;
   G2 *result = 0 ;
@@ -1886,7 +1888,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_11(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G2_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_) {
   jlong jresult = 0 ;
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
@@ -1931,7 +1933,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_12(JNIEnv *j
 }
 
 
-SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G2_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jboolean JNICALL Java_MclJNI_G2_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jboolean jresult = 0 ;
   G2 *arg1 = (G2 *) 0 ;
   G2 *arg2 = 0 ;
@@ -1953,7 +1955,7 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G2_1equals(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_, jlong jarg5, jobject jarg5_) {
+SWIGEXPORT void JNICALL Java_MclJNI_G2_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_, jlong jarg5, jobject jarg5_) {
   G2 *arg1 = (G2 *) 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -1997,7 +1999,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1set(JNIEnv *jenv, jclass
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_MclJNI_G2_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   G2 *arg1 = (G2 *) 0 ;
   
   (void)jenv;
@@ -2008,7 +2010,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1clear(JNIEnv *jenv, jclas
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+SWIGEXPORT void JNICALL Java_MclJNI_G2_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   G2 *arg1 = (G2 *) 0 ;
   std::string *arg2 = 0 ;
   int arg3 ;
@@ -2036,7 +2038,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1setStr_1_1SWIG_10(JNIEnv
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_G2_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
   G2 *arg1 = (G2 *) 0 ;
   std::string *arg2 = 0 ;
   
@@ -2062,7 +2064,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1setStr_1_1SWIG_11(JNIEnv
 }
 
 
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G2_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT jstring JNICALL Java_MclJNI_G2_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   jstring jresult = 0 ;
   G2 *arg1 = (G2 *) 0 ;
   int arg2 ;
@@ -2084,7 +2086,7 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G2_1toString_1_1SWIG_10(JN
 }
 
 
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G2_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jstring JNICALL Java_MclJNI_G2_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jstring jresult = 0 ;
   G2 *arg1 = (G2 *) 0 ;
   std::string result;
@@ -2104,7 +2106,7 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G2_1toString_1_1SWIG_11(JN
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_G2_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   G2 *arg1 = (G2 *) 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -2135,7 +2137,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1deserialize(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_G2_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jbyteArray JNICALL Java_MclJNI_G2_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   G2 *arg1 = (G2 *) 0 ;
   std::string *arg2 = 0 ;
@@ -2160,7 +2162,7 @@ SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_G2_1serialize(JNIEnv *j
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G2(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+SWIGEXPORT void JNICALL Java_MclJNI_delete_1G2(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   G2 *arg1 = (G2 *) 0 ;
   
   (void)jenv;
@@ -2170,7 +2172,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G2(JNIEnv *jenv, jcla
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   GT *arg1 = 0 ;
   GT *arg2 = 0 ;
   GT *arg3 = 0 ;
@@ -2199,7 +2201,29 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_14(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1GT_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+SWIGEXPORT void JNICALL Java_MclJNI_inv(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  GT *arg1 = 0 ;
+  GT *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(GT **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT & reference is null");
+    return ;
+  } 
+  arg2 = *(GT **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "GT & reference is null");
+    return ;
+  } 
+  inv(*arg1,*arg2);
+}
+
+
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1GT_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   GT *result = 0 ;
   
@@ -2211,7 +2235,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1GT_1_1SWIG_10(JNIEnv *j
 }
 
 
-SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1GT_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jlong JNICALL Java_MclJNI_new_1GT_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jlong jresult = 0 ;
   GT *arg1 = 0 ;
   GT *result = 0 ;
@@ -2230,7 +2254,7 @@ SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1GT_1_1SWIG_11(JNIEnv *j
 }
 
 
-SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_GT_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jboolean JNICALL Java_MclJNI_GT_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jboolean jresult = 0 ;
   GT *arg1 = (GT *) 0 ;
   GT *arg2 = 0 ;
@@ -2252,7 +2276,7 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_GT_1equals(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_MclJNI_GT_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   GT *arg1 = (GT *) 0 ;
   
   (void)jenv;
@@ -2263,7 +2287,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1clear(JNIEnv *jenv, jclas
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+SWIGEXPORT void JNICALL Java_MclJNI_GT_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   GT *arg1 = (GT *) 0 ;
   std::string *arg2 = 0 ;
   int arg3 ;
@@ -2291,7 +2315,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1setStr_1_1SWIG_10(JNIEnv
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_GT_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
   GT *arg1 = (GT *) 0 ;
   std::string *arg2 = 0 ;
   
@@ -2317,7 +2341,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1setStr_1_1SWIG_11(JNIEnv
 }
 
 
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT jstring JNICALL Java_MclJNI_GT_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   jstring jresult = 0 ;
   GT *arg1 = (GT *) 0 ;
   int arg2 ;
@@ -2339,7 +2363,7 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_10(JN
 }
 
 
-SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jstring JNICALL Java_MclJNI_GT_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jstring jresult = 0 ;
   GT *arg1 = (GT *) 0 ;
   std::string result;
@@ -2359,7 +2383,7 @@ SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_11(JN
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_MclJNI_GT_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   GT *arg1 = (GT *) 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -2390,7 +2414,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1deserialize(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_GT_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jbyteArray JNICALL Java_MclJNI_GT_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   GT *arg1 = (GT *) 0 ;
   std::string *arg2 = 0 ;
@@ -2415,7 +2439,7 @@ SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_GT_1serialize(JNIEnv *j
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1GT(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+SWIGEXPORT void JNICALL Java_MclJNI_delete_1GT(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   GT *arg1 = (GT *) 0 ;
   
   (void)jenv;

From 0d9af2d2032960919fc6e656262e3a318922b249 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 13 Oct 2020 10:38:11 +0900
Subject: [PATCH 324/553] dst for maptToG1 has changed

---
 include/mcl/mapto_wb19.hpp | 2 +-
 readme.md                  | 1 +
 test/mapto_wb19_test.cpp   | 7 ++++---
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/mcl/mapto_wb19.hpp b/include/mcl/mapto_wb19.hpp
index 814baaa5..216a4218 100644
--- a/include/mcl/mapto_wb19.hpp
+++ b/include/mcl/mapto_wb19.hpp
@@ -562,7 +562,7 @@ struct MapTo_WB19 {
 
 	void msgToG1(G1& out, const void *msg, size_t msgSize) const
 	{
-		const char *dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
+		const char *dst = "BLS_SIG_BLS12381G1_XMD:SHA-256_SSWU_RO_POP_";
 		const size_t dstSize = strlen(dst);
 		msgToG1(out, msg, msgSize, dst, dstSize);
 	}
diff --git a/readme.md b/readme.md
index 94be8441..b8fa2647 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+- dst for mapToG1 has changed to `BLS_SIG_BLS12381G1_XMD:SHA-256_SSWU_RO_POP_`.
 - `mclBn_eth*` functions are removed.
 - `mcl::bn::mapToG1(G1& out, const Fp& v)` supports `BLS12_MAP_FP_TO_G1` in [EIP 2537](https://eips.ethereum.org/EIPS/eip-2537).
 - `mcl::bn::hashAndMapToG1(G1& out, const void *msg, size_t msgSize)` supports ([hash-to-curve-09 BLS12381G1_XMD:SHA-256_SSWU_RO_](https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.html#name-bls12381g1_xmdsha-256_sswu_))
diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index e7bee9eb..db4586b6 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -502,10 +502,11 @@ void testMsgToG1(const T& mapto)
 		const char *y;
 	} tbl[] = {
 		{
+			// generated by draft-irtf-cfrg-hash-to-curve/poc/suite_bls12381g1.sage
 			"asdf",
-			"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_",
-			"bc73d15443009a8ff2ddce864136d892274dd8365c60d0d2d44cc543387348e366a8f1e1401427e37743c29ed2c939a",
-			"101e26428a1b78c05458cb1cc37d2d87876ad3437096d2827f376702d4451667fe1fa82e82795495d33d466133ed1862",
+			"BLS_SIG_BLS12381G1_XMD:SHA-256_SSWU_RO_POP_",
+			"a72df17570d0eb81260042edbea415ad49bdb94a1bc1ce9d1bf147d0d48268170764bb513a3b994d662e1faba137106",
+			"122b77eca1ed58795b7cd456576362f4f7bd7a572a29334b4817898a42414d31e9c0267f2dc481a4daf8bcf4a460322",
 		},
 		// https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.txt
 		// H.9.1.  BLS12381G1_XMD:SHA-256_SSWU_RO_

From 2758b01440744a5e52371a6ab55ae05b26ed5955 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 13 Oct 2020 11:22:44 +0900
Subject: [PATCH 325/553] refine tests

---
 test/mapto_wb19_test.cpp | 212 +++++++++++++++++----------------------
 1 file changed, 94 insertions(+), 118 deletions(-)

diff --git a/test/mapto_wb19_test.cpp b/test/mapto_wb19_test.cpp
index 37719953..3c78b93c 100644
--- a/test/mapto_wb19_test.cpp
+++ b/test/mapto_wb19_test.cpp
@@ -269,120 +269,96 @@ void testHashToFp2v7(const T& mapto)
 		CYBOZU_TEST_EQUAL(P1, P2);
 	}
 	{
-		char msg[] = "asdf";
-		char dst[] = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_";
-		/*
-			https://github.com:cfrg/draft-irtf-cfrg-hash-to-curve
-			tag: draft-irtf-cfrg-hash-to-curve-07
-			the return value of expand_message_xmd in hash_to_field.py
-		*/
-		char expect[] = "ca53fcd6f140590d19138f38819eb13330c014a1670e40f0f8e991de7b35e21a1fca52a14486c8e8acc9d865718cd41fe3638c2fb50fdc75b95690dc58f86494005fb37fc330366a7fef5f6e26bb631f4a5462affab2b9a9630c3b1c63621875baf782dd435500fda05ba7a9e86a766eeffe259128dc6e43c1852c58034856c4c4e2158c3414a881c17b727be5400432bf5c0cd02066a3b763e25e3ca32f19ca69a807bbc14c7c8c7988915fb1df523c536f744aa8b9bd0bbcea9800a236355690a4765491cd8969ca2f8cac8b021d97306e6ce6a2126b2868cf57f59f5fc416385bc1c2ae396c62608adc6b9174bbdb981a4601c3bd81bbe086e385d9a909aa";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[256];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
-	}
-	{
-		char msg[] = "asdf";
-		char dst[] = "QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_";
-		char expect[] = "ecc25edef8f6b277e27a88cf5ca0cdd4c4a49e8ba273d6069a4f0c9db05d37b78e700a875f4bb5972bfce49a867172ec1cb8c5524b1853994bb8af52a8ad2338d2cf688cf788b732372c10013445cd2c16a08a462028ae8ffff3082c8e47e8437dee5a58801e03ee8320980ae7c071ab022473231789d543d56defe9ff53bdba";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[128];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
-	}
-	// Test coming from https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-09#appendix-I.1
-	{
-		char msg[] = "";
-		char dst[] = "QUUX-V01-CS02-with-expander";
-		char expect[] = "f659819a6473c1835b25ea59e3d38914c98b374f0970b7e4c92181df928fca88";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[32];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
-	}
-	{
-		char msg[] = "abc";
-		char dst[] = "QUUX-V01-CS02-with-expander";
-		char expect[] = "1c38f7c211ef233367b2420d04798fa4698080a8901021a795a1151775fe4da7";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[32];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
-	}
-	{
-		char msg[] = "abcdef0123456789";
-		char dst[] = "QUUX-V01-CS02-with-expander";
-		char expect[] = "8f7e7b66791f0da0dbb5ec7c22ec637f79758c0a48170bfb7c4611bd304ece89";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[32];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
-	}
-	{
-		char msg[] = "q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq";
-		char dst[] = "QUUX-V01-CS02-with-expander";
-		char expect[] = "72d5aa5ec810370d1f0013c0df2f1d65699494ee2a39f72e1716b1b964e1c642";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[32];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
-	}
-	{
-		char msg[] = "";
-		char dst[] = "QUUX-V01-CS02-with-expander";
-		char expect[] = "8bcffd1a3cae24cf9cd7ab85628fd111bb17e3739d3b53f89580d217aa79526f1708354a76a402d3569d6a9d19ef3de4d0b991e4f54b9f20dcde9b95a66824cbdf6c1a963a1913d43fd7ac443a02fc5d9d8d77e2071b86ab114a9f34150954a7531da568a1ea8c760861c0cde2005afc2c114042ee7b5848f5303f0611cf297f";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[128];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
-	}
-	{
-		char msg[] = "abc";
-		char dst[] = "QUUX-V01-CS02-with-expander";
-		char expect[] = "fe994ec51bdaa821598047b3121c149b364b178606d5e72bfbb713933acc29c186f316baecf7ea22212f2496ef3f785a27e84a40d8b299cec56032763eceeff4c61bd1fe65ed81decafff4a31d0198619c0aa0c6c51fca15520789925e813dcfd318b542f8799441271f4db9ee3b8092a7a2e8d5b75b73e28fb1ab6b4573c192";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[128];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
-	}
-	{
-		char msg[] = "abcdef0123456789";
-		char dst[] = "QUUX-V01-CS02-with-expander";
-		char expect[] = "c9ec7941811b1e19ce98e21db28d22259354d4d0643e301175e2f474e030d32694e9dd5520dde93f3600d8edad94e5c364903088a7228cc9eff685d7eaac50d5a5a8229d083b51de4ccc3733917f4b9535a819b445814890b7029b5de805bf62b33a4dc7e24acdf2c924e9fe50d55a6b832c8c84c7f82474b34e48c6d43867be";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[128];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
-	}
-	{
-		char msg[] = "q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq";
-		char dst[] = "QUUX-V01-CS02-with-expander";
-		char expect[] = "48e256ddba722053ba462b2b93351fc966026e6d6db493189798181c5f3feea377b5a6f1d8368d7453faef715f9aecb078cd402cbd548c0e179c4ed1e4c7e5b048e0a39d31817b5b24f50db58bb3720fe96ba53db947842120a068816ac05c159bb5266c63658b4f000cbf87b1209a225def8ef1dca917bcda79a1e42acd8069";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[128];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
-	}
-	{
-		char msg[] = "a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
-		char dst[] = "QUUX-V01-CS02-with-expander";
-		char expect[] = "396962db47f749ec3b5042ce2452b619607f27fd3939ece2746a7614fb83a1d097f554df3927b084e55de92c7871430d6b95c2a13896d8a33bc48587b1f66d21b128a1a8240d5b0c26dfe795a1a842a0807bb148b77c2ef82ed4b6c9f7fcb732e7f94466c8b51e52bf378fba044a31f5cb44583a892f5969dcd73b3fa128816e";
-		size_t msgSize = strlen(msg);
-		size_t dstSize = strlen(dst);
-		uint8_t md[128];
-		mcl::fp::expand_message_xmd(md, sizeof(md), msg, msgSize, dst, dstSize);
-		CYBOZU_TEST_EQUAL(toHexStr(md, sizeof(md)), expect);
+		struct Tbl {
+			const char *msg;
+			const char *dst;
+			const char *expect;
+			size_t mdSize;
+		} tbl[] = {
+			{
+				/*
+					https://github.com:cfrg/draft-irtf-cfrg-hash-to-curve
+					tag: draft-irtf-cfrg-hash-to-curve-07
+					the return value of expand_message_xmd in hash_to_field.py
+				*/
+				"asdf",
+				"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_",
+				"ca53fcd6f140590d19138f38819eb13330c014a1670e40f0f8e991de7b35e21a1fca52a14486c8e8acc9d865718cd41fe3638c2fb50fdc75b95690dc58f86494005fb37fc330366a7fef5f6e26bb631f4a5462affab2b9a9630c3b1c63621875baf782dd435500fda05ba7a9e86a766eeffe259128dc6e43c1852c58034856c4c4e2158c3414a881c17b727be5400432bf5c0cd02066a3b763e25e3ca32f19ca69a807bbc14c7c8c7988915fb1df523c536f744aa8b9bd0bbcea9800a236355690a4765491cd8969ca2f8cac8b021d97306e6ce6a2126b2868cf57f59f5fc416385bc1c2ae396c62608adc6b9174bbdb981a4601c3bd81bbe086e385d9a909aa",
+				256,
+			},
+			{
+				"asdf",
+				"QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_",
+				"ecc25edef8f6b277e27a88cf5ca0cdd4c4a49e8ba273d6069a4f0c9db05d37b78e700a875f4bb5972bfce49a867172ec1cb8c5524b1853994bb8af52a8ad2338d2cf688cf788b732372c10013445cd2c16a08a462028ae8ffff3082c8e47e8437dee5a58801e03ee8320980ae7c071ab022473231789d543d56defe9ff53bdba",
+				128,
+			},
+			// Test coming from https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-09#appendix-I.1
+			{
+				"",
+				"QUUX-V01-CS02-with-expander",
+				"f659819a6473c1835b25ea59e3d38914c98b374f0970b7e4c92181df928fca88",
+				32,
+			},
+			{
+				"abc",
+				"QUUX-V01-CS02-with-expander",
+				"1c38f7c211ef233367b2420d04798fa4698080a8901021a795a1151775fe4da7",
+				32,
+			},
+			{
+				"abcdef0123456789",
+				"QUUX-V01-CS02-with-expander",
+				"8f7e7b66791f0da0dbb5ec7c22ec637f79758c0a48170bfb7c4611bd304ece89",
+				32,
+			},
+			{
+				"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq",
+				"QUUX-V01-CS02-with-expander",
+				"72d5aa5ec810370d1f0013c0df2f1d65699494ee2a39f72e1716b1b964e1c642",
+				32,
+			},
+			{
+				"",
+				"QUUX-V01-CS02-with-expander",
+				"8bcffd1a3cae24cf9cd7ab85628fd111bb17e3739d3b53f89580d217aa79526f1708354a76a402d3569d6a9d19ef3de4d0b991e4f54b9f20dcde9b95a66824cbdf6c1a963a1913d43fd7ac443a02fc5d9d8d77e2071b86ab114a9f34150954a7531da568a1ea8c760861c0cde2005afc2c114042ee7b5848f5303f0611cf297f",
+				128,
+			},
+			{
+				"abc",
+				"QUUX-V01-CS02-with-expander",
+				"fe994ec51bdaa821598047b3121c149b364b178606d5e72bfbb713933acc29c186f316baecf7ea22212f2496ef3f785a27e84a40d8b299cec56032763eceeff4c61bd1fe65ed81decafff4a31d0198619c0aa0c6c51fca15520789925e813dcfd318b542f8799441271f4db9ee3b8092a7a2e8d5b75b73e28fb1ab6b4573c192",
+				128,
+			},
+			{
+				"abcdef0123456789",
+				"QUUX-V01-CS02-with-expander",
+				"c9ec7941811b1e19ce98e21db28d22259354d4d0643e301175e2f474e030d32694e9dd5520dde93f3600d8edad94e5c364903088a7228cc9eff685d7eaac50d5a5a8229d083b51de4ccc3733917f4b9535a819b445814890b7029b5de805bf62b33a4dc7e24acdf2c924e9fe50d55a6b832c8c84c7f82474b34e48c6d43867be",
+				128,
+			},
+			{
+				"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq",
+				"QUUX-V01-CS02-with-expander",
+				"48e256ddba722053ba462b2b93351fc966026e6d6db493189798181c5f3feea377b5a6f1d8368d7453faef715f9aecb078cd402cbd548c0e179c4ed1e4c7e5b048e0a39d31817b5b24f50db58bb3720fe96ba53db947842120a068816ac05c159bb5266c63658b4f000cbf87b1209a225def8ef1dca917bcda79a1e42acd8069",
+				128,
+			},
+			{
+				"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+				"QUUX-V01-CS02-with-expander",
+				"396962db47f749ec3b5042ce2452b619607f27fd3939ece2746a7614fb83a1d097f554df3927b084e55de92c7871430d6b95c2a13896d8a33bc48587b1f66d21b128a1a8240d5b0c26dfe795a1a842a0807bb148b77c2ef82ed4b6c9f7fcb732e7f94466c8b51e52bf378fba044a31f5cb44583a892f5969dcd73b3fa128816e",
+				128,
+			},
+		};
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
+			const char *msg = tbl[i].msg;
+			const char *dst = tbl[i].dst;
+			const char *expect = tbl[i].expect;
+			size_t mdSize = tbl[i].mdSize;
+			uint8_t md[256];
+			size_t msgSize = strlen(msg);
+			size_t dstSize = strlen(dst);
+			mcl::fp::expand_message_xmd(md, mdSize, msg, msgSize, dst, dstSize);
+			CYBOZU_TEST_EQUAL(toHexStr(md, mdSize), expect);
+		}
 	}
 	{
 		const struct {
@@ -456,7 +432,7 @@ void testHashToFp2v7(const T& mapto)
 			// https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.html#name-bls12381g2_xmdsha-256_sswu_
 			{
 				"abc", // msg
-        "QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
+				"QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
 				{ // P.x
 					"0x02c2d18e033b960562aae3cab37a27ce00d80ccd5ba4b7fe0e7a210245129dbec7780ccc7954725f4168aff2787776e6",
 					"0x139cddbccdc5e91b9623efd38c49f81a6f83f175e80b06fc374de9eb4b41dfe4ca3a230ed250fbe3a2acf73a41177fd8",
@@ -468,7 +444,7 @@ void testHashToFp2v7(const T& mapto)
 			},
 			{
 				"abcdef0123456789", // msg
-        "QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
+				"QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
 				{ // P.x
 					"0x121982811d2491fde9ba7ed31ef9ca474f0e1501297f68c298e9f4c0028add35aea8bb83d53c08cfc007c1e005723cd0",
 					"0x190d119345b94fbd15497bcba94ecf7db2cbfd1e1fe7da034d26cbba169fb3968288b3fafb265f9ebd380512a71c3f2c",
@@ -480,7 +456,7 @@ void testHashToFp2v7(const T& mapto)
 			},
 			{
 				"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq", // msg
-        "QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
+				"QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
 				{ // P.x
 					"0x19a84dd7248a1066f737cc34502ee5555bd3c19f2ecdb3c7d9e24dc65d4e25e50d83f0f77105e955d78f4762d33c17da",
 					"0x0934aba516a52d8ae479939a91998299c76d39cc0c035cd18813bec433f587e2d7a4fef038260eef0cef4d02aae3eb91",
@@ -492,7 +468,7 @@ void testHashToFp2v7(const T& mapto)
 			},
 			{
 				"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // msg
-        "QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
+				"QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_",
 				{ // P.x
 					"0x01a6ba2f9a11fa5598b2d8ace0fbe0a0eacb65deceb476fbbcb64fd24557c2f4b18ecfc5663e54ae16a84f5ab7f62534",
 					"0x11fca2ff525572795a801eed17eb12785887c7b63fb77a42be46ce4a34131d71f7a73e95fee3f812aea3de78b4d01569",

From 99757f9f0f48dde35f20f5298685fc191bda38e8 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 13 Oct 2020 11:58:48 +0900
Subject: [PATCH 326/553] fix assert of expand_message_xmd

---
 src/fp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index 710869ac..9bcd3717 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -146,8 +146,8 @@ uint32_t sha512(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSiz
 
 void expand_message_xmd(uint8_t out[], size_t outSize, const void *msg, size_t msgSize, const void *dst, size_t dstSize)
 {
-	assert(outSize == 128 || outSize == 256);
 	const size_t mdSize = 32;
+	assert((outSize % mdSize) == 0 && 0 < outSize && outSize <= 256);
 	const size_t r_in_bytes = 64;
 	const size_t n = outSize / mdSize;
 	static const uint8_t Z_pad[r_in_bytes] = {};

From 74eae4c83b2f7a8dc5fd9e09dea850460f4df670 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 13 Oct 2020 13:48:23 +0900
Subject: [PATCH 327/553] update to v1.25

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 6da18c0c..28a3d38a 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x124; /* 0xABC = A.BC */
+static const int version = 0x125; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From dacfdaefea7e8b92e8c02ae3cb92ba7ed29ba6bd Mon Sep 17 00:00:00 2001
From: raphael <rheitjoh@mail.uni-paderborn.de>
Date: Tue, 13 Oct 2020 10:45:38 +0200
Subject: [PATCH 328/553] Regenerate java ffi to try out different inv location

---
 ffi/java/mcl_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index 1c206d32..18590488 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -370,7 +370,7 @@ void mul(GT& z, const GT& x, const GT& y)
 }
 void inv(GT& y, GT& x) 
 {
-	mcl::bn::inv(y.self_, x.self_);
+	mcl::bn::Fp12::inv(y.self_, x.self_);
 }
 void pow(GT& z, const GT& x, const Fr& y)
 {

From 016fd377dd14955d8ac1a635c5d36c0e0be10126 Mon Sep 17 00:00:00 2001
From: raphael <rheitjoh@mail.uni-paderborn.de>
Date: Tue, 13 Oct 2020 10:56:13 +0200
Subject: [PATCH 329/553] Regenerate java ffi to fix missing package
 declarations

---
 ffi/java/com/herumi/mcl/Fp.java           |   1 +
 ffi/java/com/herumi/mcl/Fr.java           |   1 +
 ffi/java/com/herumi/mcl/G1.java           |   1 +
 ffi/java/com/herumi/mcl/G2.java           |   1 +
 ffi/java/com/herumi/mcl/GT.java           |   1 +
 ffi/java/com/herumi/mcl/Mcl.java          |   1 +
 ffi/java/com/herumi/mcl/MclConstants.java |   1 +
 ffi/java/com/herumi/mcl/MclJNI.java       |   1 +
 ffi/java/mcl_wrap.cxx                     | 192 +++++++++++-----------
 9 files changed, 104 insertions(+), 96 deletions(-)

diff --git a/ffi/java/com/herumi/mcl/Fp.java b/ffi/java/com/herumi/mcl/Fp.java
index b9b6acb7..ea83f384 100644
--- a/ffi/java/com/herumi/mcl/Fp.java
+++ b/ffi/java/com/herumi/mcl/Fp.java
@@ -6,6 +6,7 @@
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
+package com.herumi.mcl;
 
 public class Fp {
   private transient long swigCPtr;
diff --git a/ffi/java/com/herumi/mcl/Fr.java b/ffi/java/com/herumi/mcl/Fr.java
index 1436a360..5d1e00a4 100644
--- a/ffi/java/com/herumi/mcl/Fr.java
+++ b/ffi/java/com/herumi/mcl/Fr.java
@@ -6,6 +6,7 @@
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
+package com.herumi.mcl;
 
 public class Fr {
   private transient long swigCPtr;
diff --git a/ffi/java/com/herumi/mcl/G1.java b/ffi/java/com/herumi/mcl/G1.java
index 58464177..a101f516 100644
--- a/ffi/java/com/herumi/mcl/G1.java
+++ b/ffi/java/com/herumi/mcl/G1.java
@@ -6,6 +6,7 @@
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
+package com.herumi.mcl;
 
 public class G1 {
   private transient long swigCPtr;
diff --git a/ffi/java/com/herumi/mcl/G2.java b/ffi/java/com/herumi/mcl/G2.java
index 5dfb9fe6..8ac8c481 100644
--- a/ffi/java/com/herumi/mcl/G2.java
+++ b/ffi/java/com/herumi/mcl/G2.java
@@ -6,6 +6,7 @@
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
+package com.herumi.mcl;
 
 public class G2 {
   private transient long swigCPtr;
diff --git a/ffi/java/com/herumi/mcl/GT.java b/ffi/java/com/herumi/mcl/GT.java
index a5ae08ea..06df6bb9 100644
--- a/ffi/java/com/herumi/mcl/GT.java
+++ b/ffi/java/com/herumi/mcl/GT.java
@@ -6,6 +6,7 @@
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
+package com.herumi.mcl;
 
 public class GT {
   private transient long swigCPtr;
diff --git a/ffi/java/com/herumi/mcl/Mcl.java b/ffi/java/com/herumi/mcl/Mcl.java
index 1bccc916..8074e393 100644
--- a/ffi/java/com/herumi/mcl/Mcl.java
+++ b/ffi/java/com/herumi/mcl/Mcl.java
@@ -6,6 +6,7 @@
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
+package com.herumi.mcl;
 
 public class Mcl implements MclConstants {
   public static void SystemInit(int curveType) {
diff --git a/ffi/java/com/herumi/mcl/MclConstants.java b/ffi/java/com/herumi/mcl/MclConstants.java
index 376f1422..8068eca9 100644
--- a/ffi/java/com/herumi/mcl/MclConstants.java
+++ b/ffi/java/com/herumi/mcl/MclConstants.java
@@ -6,6 +6,7 @@
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
+package com.herumi.mcl;
 
 public interface MclConstants {
   public final static int BN254 = 0;
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
index b6928fc2..7d60c90d 100644
--- a/ffi/java/com/herumi/mcl/MclJNI.java
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -6,6 +6,7 @@
  * the SWIG interface file instead.
  * ----------------------------------------------------------------------------- */
 
+package com.herumi.mcl;
 
 public class MclJNI {
   public final static native void SystemInit(int jarg1);
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index 753cf59c..fbd267f1 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -249,7 +249,7 @@ static void SWIGUNUSED SWIG_JavaThrowException(JNIEnv *jenv, SWIG_JavaExceptionC
 extern "C" {
 #endif
 
-SWIGEXPORT void JNICALL Java_MclJNI_SystemInit(JNIEnv *jenv, jclass jcls, jint jarg1) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_SystemInit(JNIEnv *jenv, jclass jcls, jint jarg1) {
   int arg1 ;
   
   (void)jenv;
@@ -264,7 +264,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_SystemInit(JNIEnv *jenv, jclass jcls, jint j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
   
@@ -286,7 +286,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -315,7 +315,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -344,7 +344,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -373,7 +373,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = 0 ;
   G1 *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -402,7 +402,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G2 *arg1 = 0 ;
   G2 *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -431,7 +431,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_div_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -460,7 +460,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_div_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_pow(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pow(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   GT *arg1 = 0 ;
   GT *arg2 = 0 ;
   Fr *arg3 = 0 ;
@@ -489,7 +489,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_pow(JNIEnv *jenv, jclass jcls, jlong jarg1,
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   Fr *result = 0 ;
   
@@ -501,7 +501,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_10(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jlong jresult = 0 ;
   Fr *arg1 = 0 ;
   Fr *result = 0 ;
@@ -520,7 +520,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_11(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
   jlong jresult = 0 ;
   int arg1 ;
   Fr *result = 0 ;
@@ -534,7 +534,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_12(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1, jint jarg2) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1, jint jarg2) {
   jlong jresult = 0 ;
   std::string *arg1 = 0 ;
   int arg2 ;
@@ -563,7 +563,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_13(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jstring jarg1) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fr_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jstring jarg1) {
   jlong jresult = 0 ;
   std::string *arg1 = 0 ;
   Fr *result = 0 ;
@@ -590,7 +590,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fr_1_1SWIG_14(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jboolean JNICALL Java_MclJNI_Fr_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fr_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jboolean jresult = 0 ;
   Fr *arg1 = (Fr *) 0 ;
   Fr *arg2 = 0 ;
@@ -612,7 +612,7 @@ SWIGEXPORT jboolean JNICALL Java_MclJNI_Fr_1equals(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   Fr *arg1 = (Fr *) 0 ;
   std::string *arg2 = 0 ;
   int arg3 ;
@@ -640,7 +640,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
   Fr *arg1 = (Fr *) 0 ;
   std::string *arg2 = 0 ;
   
@@ -666,7 +666,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   Fr *arg1 = (Fr *) 0 ;
   int arg2 ;
   
@@ -679,7 +679,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setInt(JNIEnv *jenv, jclass jcls, jlong
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fr_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   Fr *arg1 = (Fr *) 0 ;
   
   (void)jenv;
@@ -690,7 +690,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fr_1clear(JNIEnv *jenv, jclass jcls, jlong j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setByCSPRNG(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setByCSPRNG(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   Fr *arg1 = (Fr *) 0 ;
   
   (void)jenv;
@@ -701,7 +701,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fr_1setByCSPRNG(JNIEnv *jenv, jclass jcls, j
 }
 
 
-SWIGEXPORT jstring JNICALL Java_MclJNI_Fr_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fr_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   jstring jresult = 0 ;
   Fr *arg1 = (Fr *) 0 ;
   int arg2 ;
@@ -723,7 +723,7 @@ SWIGEXPORT jstring JNICALL Java_MclJNI_Fr_1toString_1_1SWIG_10(JNIEnv *jenv, jcl
 }
 
 
-SWIGEXPORT jstring JNICALL Java_MclJNI_Fr_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fr_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jstring jresult = 0 ;
   Fr *arg1 = (Fr *) 0 ;
   std::string result;
@@ -743,7 +743,7 @@ SWIGEXPORT jstring JNICALL Java_MclJNI_Fr_1toString_1_1SWIG_11(JNIEnv *jenv, jcl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fr_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   Fr *arg1 = (Fr *) 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -774,7 +774,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fr_1deserialize(JNIEnv *jenv, jclass jcls, j
 }
 
 
-SWIGEXPORT jbyteArray JNICALL Java_MclJNI_Fr_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fr_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   Fr *arg1 = (Fr *) 0 ;
   std::string *arg2 = 0 ;
@@ -799,7 +799,7 @@ SWIGEXPORT jbyteArray JNICALL Java_MclJNI_Fr_1serialize(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_delete_1Fr(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1Fr(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   Fr *arg1 = (Fr *) 0 ;
   
   (void)jenv;
@@ -809,7 +809,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_delete_1Fr(JNIEnv *jenv, jclass jcls, jlong
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
   
@@ -831,7 +831,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -860,7 +860,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -889,7 +889,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -918,7 +918,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_div_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_div_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -947,7 +947,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_div_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   Fp *result = 0 ;
   
@@ -959,7 +959,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_10(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jlong jresult = 0 ;
   Fp *arg1 = 0 ;
   Fp *result = 0 ;
@@ -978,7 +978,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_11(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jint jarg1) {
   jlong jresult = 0 ;
   int arg1 ;
   Fp *result = 0 ;
@@ -992,7 +992,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_12(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1, jint jarg2) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jstring jarg1, jint jarg2) {
   jlong jresult = 0 ;
   std::string *arg1 = 0 ;
   int arg2 ;
@@ -1021,7 +1021,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_13(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jstring jarg1) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1Fp_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jstring jarg1) {
   jlong jresult = 0 ;
   std::string *arg1 = 0 ;
   Fp *result = 0 ;
@@ -1048,7 +1048,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1Fp_1_1SWIG_14(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jboolean JNICALL Java_MclJNI_Fp_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fp_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jboolean jresult = 0 ;
   Fp *arg1 = (Fp *) 0 ;
   Fp *arg2 = 0 ;
@@ -1070,7 +1070,7 @@ SWIGEXPORT jboolean JNICALL Java_MclJNI_Fp_1equals(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   Fp *arg1 = (Fp *) 0 ;
   std::string *arg2 = 0 ;
   int arg3 ;
@@ -1098,7 +1098,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
   Fp *arg1 = (Fp *) 0 ;
   std::string *arg2 = 0 ;
   
@@ -1124,7 +1124,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setInt(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   Fp *arg1 = (Fp *) 0 ;
   int arg2 ;
   
@@ -1137,7 +1137,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setInt(JNIEnv *jenv, jclass jcls, jlong
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fp_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   Fp *arg1 = (Fp *) 0 ;
   
   (void)jenv;
@@ -1148,7 +1148,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fp_1clear(JNIEnv *jenv, jclass jcls, jlong j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setByCSPRNG(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setByCSPRNG(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   Fp *arg1 = (Fp *) 0 ;
   
   (void)jenv;
@@ -1159,7 +1159,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fp_1setByCSPRNG(JNIEnv *jenv, jclass jcls, j
 }
 
 
-SWIGEXPORT jstring JNICALL Java_MclJNI_Fp_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fp_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   jstring jresult = 0 ;
   Fp *arg1 = (Fp *) 0 ;
   int arg2 ;
@@ -1181,7 +1181,7 @@ SWIGEXPORT jstring JNICALL Java_MclJNI_Fp_1toString_1_1SWIG_10(JNIEnv *jenv, jcl
 }
 
 
-SWIGEXPORT jstring JNICALL Java_MclJNI_Fp_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_Fp_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jstring jresult = 0 ;
   Fp *arg1 = (Fp *) 0 ;
   std::string result;
@@ -1201,7 +1201,7 @@ SWIGEXPORT jstring JNICALL Java_MclJNI_Fp_1toString_1_1SWIG_11(JNIEnv *jenv, jcl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_Fp_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   Fp *arg1 = (Fp *) 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -1232,7 +1232,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_Fp_1deserialize(JNIEnv *jenv, jclass jcls, j
 }
 
 
-SWIGEXPORT jbyteArray JNICALL Java_MclJNI_Fp_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fp_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   Fp *arg1 = (Fp *) 0 ;
   std::string *arg2 = 0 ;
@@ -1257,7 +1257,7 @@ SWIGEXPORT jbyteArray JNICALL Java_MclJNI_Fp_1serialize(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_delete_1Fp(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1Fp(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   Fp *arg1 = (Fp *) 0 ;
   
   (void)jenv;
@@ -1267,7 +1267,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_delete_1Fp(JNIEnv *jenv, jclass jcls, jlong
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   G1 *arg1 = 0 ;
   G1 *arg2 = 0 ;
   
@@ -1289,7 +1289,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_dbl_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_dbl_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   G1 *arg1 = 0 ;
   G1 *arg2 = 0 ;
   
@@ -1311,7 +1311,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_dbl_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = 0 ;
   G1 *arg2 = 0 ;
   G1 *arg3 = 0 ;
@@ -1340,7 +1340,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = 0 ;
   G1 *arg2 = 0 ;
   G1 *arg3 = 0 ;
@@ -1369,7 +1369,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_pairing(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_pairing(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   GT *arg1 = 0 ;
   G1 *arg2 = 0 ;
   G2 *arg3 = 0 ;
@@ -1398,7 +1398,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_pairing(JNIEnv *jenv, jclass jcls, jlong jar
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_hashAndMapToG1(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_hashAndMapToG1(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   G1 *arg1 = 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -1433,7 +1433,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_hashAndMapToG1(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G1_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   G1 *result = 0 ;
   
@@ -1445,7 +1445,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G1_1_1SWIG_10(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G1_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jlong jresult = 0 ;
   G1 *arg1 = 0 ;
   G1 *result = 0 ;
@@ -1464,7 +1464,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G1_1_1SWIG_11(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G1_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G1_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jlong jresult = 0 ;
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
@@ -1495,7 +1495,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G1_1_1SWIG_12(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jboolean JNICALL Java_MclJNI_G1_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jboolean jresult = 0 ;
   G1 *arg1 = (G1 *) 0 ;
   G1 *arg2 = 0 ;
@@ -1517,7 +1517,7 @@ SWIGEXPORT jboolean JNICALL Java_MclJNI_G1_1equals(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = (G1 *) 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -1547,7 +1547,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jar
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_G1_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   G1 *arg1 = (G1 *) 0 ;
   
   (void)jenv;
@@ -1558,7 +1558,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_G1_1clear(JNIEnv *jenv, jclass jcls, jlong j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_G1_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   G1 *arg1 = (G1 *) 0 ;
   std::string *arg2 = 0 ;
   int arg3 ;
@@ -1586,7 +1586,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_G1_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_G1_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
   G1 *arg1 = (G1 *) 0 ;
   std::string *arg2 = 0 ;
   
@@ -1612,7 +1612,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_G1_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass j
 }
 
 
-SWIGEXPORT jstring JNICALL Java_MclJNI_G1_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G1_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   jstring jresult = 0 ;
   G1 *arg1 = (G1 *) 0 ;
   int arg2 ;
@@ -1634,7 +1634,7 @@ SWIGEXPORT jstring JNICALL Java_MclJNI_G1_1toString_1_1SWIG_10(JNIEnv *jenv, jcl
 }
 
 
-SWIGEXPORT jstring JNICALL Java_MclJNI_G1_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G1_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jstring jresult = 0 ;
   G1 *arg1 = (G1 *) 0 ;
   std::string result;
@@ -1654,7 +1654,7 @@ SWIGEXPORT jstring JNICALL Java_MclJNI_G1_1toString_1_1SWIG_11(JNIEnv *jenv, jcl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_G1_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   G1 *arg1 = (G1 *) 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -1685,7 +1685,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_G1_1deserialize(JNIEnv *jenv, jclass jcls, j
 }
 
 
-SWIGEXPORT jbyteArray JNICALL Java_MclJNI_G1_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_G1_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   G1 *arg1 = (G1 *) 0 ;
   std::string *arg2 = 0 ;
@@ -1710,7 +1710,7 @@ SWIGEXPORT jbyteArray JNICALL Java_MclJNI_G1_1serialize(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_delete_1G1(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G1(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   G1 *arg1 = (G1 *) 0 ;
   
   (void)jenv;
@@ -1720,7 +1720,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_delete_1G1(JNIEnv *jenv, jclass jcls, jlong
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   G2 *arg1 = 0 ;
   G2 *arg2 = 0 ;
   
@@ -1742,7 +1742,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_neg_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_dbl_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_dbl_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   G2 *arg1 = 0 ;
   G2 *arg2 = 0 ;
   
@@ -1764,7 +1764,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_dbl_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G2 *arg1 = 0 ;
   G2 *arg2 = 0 ;
   G2 *arg3 = 0 ;
@@ -1793,7 +1793,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_add_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_sub_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G2 *arg1 = 0 ;
   G2 *arg2 = 0 ;
   G2 *arg3 = 0 ;
@@ -1822,7 +1822,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_sub_1_1SWIG_13(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_hashAndMapToG2(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_hashAndMapToG2(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   G2 *arg1 = 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -1857,7 +1857,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_hashAndMapToG2(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G2_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   G2 *result = 0 ;
   
@@ -1869,7 +1869,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G2_1_1SWIG_10(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G2_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jlong jresult = 0 ;
   G2 *arg1 = 0 ;
   G2 *result = 0 ;
@@ -1888,7 +1888,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G2_1_1SWIG_11(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G2_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1G2_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_) {
   jlong jresult = 0 ;
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
@@ -1933,7 +1933,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1G2_1_1SWIG_12(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jboolean JNICALL Java_MclJNI_G2_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G2_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jboolean jresult = 0 ;
   G2 *arg1 = (G2 *) 0 ;
   G2 *arg2 = 0 ;
@@ -1955,7 +1955,7 @@ SWIGEXPORT jboolean JNICALL Java_MclJNI_G2_1equals(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_G2_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_, jlong jarg5, jobject jarg5_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_, jlong jarg5, jobject jarg5_) {
   G2 *arg1 = (G2 *) 0 ;
   Fp *arg2 = 0 ;
   Fp *arg3 = 0 ;
@@ -1999,7 +1999,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_G2_1set(JNIEnv *jenv, jclass jcls, jlong jar
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_G2_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   G2 *arg1 = (G2 *) 0 ;
   
   (void)jenv;
@@ -2010,7 +2010,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_G2_1clear(JNIEnv *jenv, jclass jcls, jlong j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_G2_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   G2 *arg1 = (G2 *) 0 ;
   std::string *arg2 = 0 ;
   int arg3 ;
@@ -2038,7 +2038,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_G2_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_G2_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
   G2 *arg1 = (G2 *) 0 ;
   std::string *arg2 = 0 ;
   
@@ -2064,7 +2064,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_G2_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass j
 }
 
 
-SWIGEXPORT jstring JNICALL Java_MclJNI_G2_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G2_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   jstring jresult = 0 ;
   G2 *arg1 = (G2 *) 0 ;
   int arg2 ;
@@ -2086,7 +2086,7 @@ SWIGEXPORT jstring JNICALL Java_MclJNI_G2_1toString_1_1SWIG_10(JNIEnv *jenv, jcl
 }
 
 
-SWIGEXPORT jstring JNICALL Java_MclJNI_G2_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_G2_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jstring jresult = 0 ;
   G2 *arg1 = (G2 *) 0 ;
   std::string result;
@@ -2106,7 +2106,7 @@ SWIGEXPORT jstring JNICALL Java_MclJNI_G2_1toString_1_1SWIG_11(JNIEnv *jenv, jcl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_G2_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   G2 *arg1 = (G2 *) 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -2137,7 +2137,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_G2_1deserialize(JNIEnv *jenv, jclass jcls, j
 }
 
 
-SWIGEXPORT jbyteArray JNICALL Java_MclJNI_G2_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_G2_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   G2 *arg1 = (G2 *) 0 ;
   std::string *arg2 = 0 ;
@@ -2162,7 +2162,7 @@ SWIGEXPORT jbyteArray JNICALL Java_MclJNI_G2_1serialize(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_delete_1G2(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1G2(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   G2 *arg1 = (G2 *) 0 ;
   
   (void)jenv;
@@ -2172,7 +2172,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_delete_1G2(JNIEnv *jenv, jclass jcls, jlong
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   GT *arg1 = 0 ;
   GT *arg2 = 0 ;
   GT *arg3 = 0 ;
@@ -2201,7 +2201,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_mul_1_1SWIG_14(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_inv(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_inv(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   GT *arg1 = 0 ;
   GT *arg2 = 0 ;
   
@@ -2223,7 +2223,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_inv(JNIEnv *jenv, jclass jcls, jlong jarg1,
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1GT_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1GT_1_1SWIG_10(JNIEnv *jenv, jclass jcls) {
   jlong jresult = 0 ;
   GT *result = 0 ;
   
@@ -2235,7 +2235,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1GT_1_1SWIG_10(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jlong JNICALL Java_MclJNI_new_1GT_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jlong JNICALL Java_com_herumi_mcl_MclJNI_new_1GT_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jlong jresult = 0 ;
   GT *arg1 = 0 ;
   GT *result = 0 ;
@@ -2254,7 +2254,7 @@ SWIGEXPORT jlong JNICALL Java_MclJNI_new_1GT_1_1SWIG_11(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT jboolean JNICALL Java_MclJNI_GT_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_GT_1equals(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   jboolean jresult = 0 ;
   GT *arg1 = (GT *) 0 ;
   GT *arg2 = 0 ;
@@ -2276,7 +2276,7 @@ SWIGEXPORT jboolean JNICALL Java_MclJNI_GT_1equals(JNIEnv *jenv, jclass jcls, jl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_GT_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   GT *arg1 = (GT *) 0 ;
   
   (void)jenv;
@@ -2287,7 +2287,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_GT_1clear(JNIEnv *jenv, jclass jcls, jlong j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_GT_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   GT *arg1 = (GT *) 0 ;
   std::string *arg2 = 0 ;
   int arg3 ;
@@ -2315,7 +2315,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_GT_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass j
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_GT_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2) {
   GT *arg1 = (GT *) 0 ;
   std::string *arg2 = 0 ;
   
@@ -2341,7 +2341,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_GT_1setStr_1_1SWIG_11(JNIEnv *jenv, jclass j
 }
 
 
-SWIGEXPORT jstring JNICALL Java_MclJNI_GT_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jint jarg2) {
   jstring jresult = 0 ;
   GT *arg1 = (GT *) 0 ;
   int arg2 ;
@@ -2363,7 +2363,7 @@ SWIGEXPORT jstring JNICALL Java_MclJNI_GT_1toString_1_1SWIG_10(JNIEnv *jenv, jcl
 }
 
 
-SWIGEXPORT jstring JNICALL Java_MclJNI_GT_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jstring JNICALL Java_com_herumi_mcl_MclJNI_GT_1toString_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jstring jresult = 0 ;
   GT *arg1 = (GT *) 0 ;
   std::string result;
@@ -2383,7 +2383,7 @@ SWIGEXPORT jstring JNICALL Java_MclJNI_GT_1toString_1_1SWIG_11(JNIEnv *jenv, jcl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_GT_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1deserialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
   GT *arg1 = (GT *) 0 ;
   char *arg2 = (char *) 0 ;
   size_t arg3 ;
@@ -2414,7 +2414,7 @@ SWIGEXPORT void JNICALL Java_MclJNI_GT_1deserialize(JNIEnv *jenv, jclass jcls, j
 }
 
 
-SWIGEXPORT jbyteArray JNICALL Java_MclJNI_GT_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_GT_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   GT *arg1 = (GT *) 0 ;
   std::string *arg2 = 0 ;
@@ -2439,7 +2439,7 @@ SWIGEXPORT jbyteArray JNICALL Java_MclJNI_GT_1serialize(JNIEnv *jenv, jclass jcl
 }
 
 
-SWIGEXPORT void JNICALL Java_MclJNI_delete_1GT(JNIEnv *jenv, jclass jcls, jlong jarg1) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_delete_1GT(JNIEnv *jenv, jclass jcls, jlong jarg1) {
   GT *arg1 = (GT *) 0 ;
   
   (void)jenv;

From e3bbe3fd758e7152d71025468698fe24120f9f99 Mon Sep 17 00:00:00 2001
From: raphael <rheitjoh@mail.uni-paderborn.de>
Date: Tue, 13 Oct 2020 11:36:50 +0200
Subject: [PATCH 330/553] Add Mcl.inv for GT to java ffi docs

---
 ffi/java/java.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ffi/java/java.md b/ffi/java/java.md
index 0e6d3cb2..3d23fc38 100644
--- a/ffi/java/java.md
+++ b/ffi/java/java.md
@@ -50,6 +50,7 @@ Mcl.SystemInit(curveType); // curveType = Mcl.BN254 or Mcl.BLS12_381
 * `GT::setStr(String str)` ; set by the result of `toString()` method
 * `Mcl.mul(GT z, GT x, GT y)` ; `z = x * y`
 * `Mcl.pow(GT z, GT x, Fr y)` ; `z = x ^ y`
+* `Mcl.inv(GT y, GT x)` ; `y = x^{-1}`
 
 ## pairing
 * `Mcl.pairing(GT e, G1 P, G2 Q)` ; e = e(P, Q)

From 0d436b2378e582a3979ab2f9a7fbb6b04f786391 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 15 Oct 2020 16:07:24 +0900
Subject: [PATCH 331/553] [java] remove space

---
 ffi/java/mcl_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index 18590488..bc3bd9f3 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -368,7 +368,7 @@ void mul(GT& z, const GT& x, const GT& y)
 {
 	mcl::bn::Fp12::mul(z.self_, x.self_, y.self_);
 }
-void inv(GT& y, GT& x) 
+void inv(GT& y, GT& x)
 {
 	mcl::bn::Fp12::inv(y.self_, x.self_);
 }

From 44f3595a6d7412561de57315914ed5c5784aa237 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 15 Oct 2020 16:27:38 +0900
Subject: [PATCH 332/553] [java] use unitaryInv instead of inv

---
 ffi/java/MclTest.java | 4 ++++
 ffi/java/mcl_impl.hpp | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/ffi/java/MclTest.java b/ffi/java/MclTest.java
index c47966dd..8438e846 100644
--- a/ffi/java/MclTest.java
+++ b/ffi/java/MclTest.java
@@ -107,6 +107,10 @@ public static void testCurve(int curveType, String name) {
 			Mcl.mul(cP, P, c); // cP = P * c
 			Mcl.pairing(e1, cP, Q);
 			assertBool("e1 == e2", e1.equals(e2));
+			Mcl.inv(e1, e1);
+			Mcl.mul(e1, e1, e2);
+			e2.setStr("1 0 0 0 0 0 0 0 0 0 0 0");
+			assertBool("e1 == 1", e1.equals(e2));
 
 			BLSsignature(Q);
 			if (errN == 0) {
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index bc3bd9f3..668910d7 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -370,7 +370,7 @@ void mul(GT& z, const GT& x, const GT& y)
 }
 void inv(GT& y, GT& x)
 {
-	mcl::bn::Fp12::inv(y.self_, x.self_);
+	mcl::bn::Fp12::unitaryInv(y.self_, x.self_);
 }
 void pow(GT& z, const GT& x, const Fr& y)
 {

From bd5a3686924d4ef38f994bb400f87a684ee65fe8 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 15 Oct 2020 16:30:19 +0900
Subject: [PATCH 333/553] [java] add isZero, isOne

---
 ffi/java/MclTest.java               |  1 +
 ffi/java/com/herumi/mcl/Fp.java     |  4 ++
 ffi/java/com/herumi/mcl/Fr.java     |  4 ++
 ffi/java/com/herumi/mcl/G1.java     |  4 ++
 ffi/java/com/herumi/mcl/G2.java     |  4 ++
 ffi/java/com/herumi/mcl/GT.java     |  4 ++
 ffi/java/com/herumi/mcl/MclJNI.java |  5 ++
 ffi/java/mcl_impl.hpp               |  5 ++
 ffi/java/mcl_wrap.cxx               | 75 +++++++++++++++++++++++++++++
 9 files changed, 106 insertions(+)

diff --git a/ffi/java/MclTest.java b/ffi/java/MclTest.java
index 8438e846..3dd6dc73 100644
--- a/ffi/java/MclTest.java
+++ b/ffi/java/MclTest.java
@@ -111,6 +111,7 @@ public static void testCurve(int curveType, String name) {
 			Mcl.mul(e1, e1, e2);
 			e2.setStr("1 0 0 0 0 0 0 0 0 0 0 0");
 			assertBool("e1 == 1", e1.equals(e2));
+			assertBool("e1 == 1", e1.isOne());
 
 			BLSsignature(Q);
 			if (errN == 0) {
diff --git a/ffi/java/com/herumi/mcl/Fp.java b/ffi/java/com/herumi/mcl/Fp.java
index ea83f384..5d50e988 100644
--- a/ffi/java/com/herumi/mcl/Fp.java
+++ b/ffi/java/com/herumi/mcl/Fp.java
@@ -60,6 +60,10 @@ public boolean equals(Fp rhs) {
     return MclJNI.Fp_equals(swigCPtr, this, Fp.getCPtr(rhs), rhs);
   }
 
+  public boolean isZero() {
+    return MclJNI.Fp_isZero(swigCPtr, this);
+  }
+
   public void setStr(String str, int base) {
     MclJNI.Fp_setStr__SWIG_0(swigCPtr, this, str, base);
   }
diff --git a/ffi/java/com/herumi/mcl/Fr.java b/ffi/java/com/herumi/mcl/Fr.java
index 5d1e00a4..8ed95dfa 100644
--- a/ffi/java/com/herumi/mcl/Fr.java
+++ b/ffi/java/com/herumi/mcl/Fr.java
@@ -60,6 +60,10 @@ public boolean equals(Fr rhs) {
     return MclJNI.Fr_equals(swigCPtr, this, Fr.getCPtr(rhs), rhs);
   }
 
+  public boolean isZero() {
+    return MclJNI.Fr_isZero(swigCPtr, this);
+  }
+
   public void setStr(String str, int base) {
     MclJNI.Fr_setStr__SWIG_0(swigCPtr, this, str, base);
   }
diff --git a/ffi/java/com/herumi/mcl/G1.java b/ffi/java/com/herumi/mcl/G1.java
index a101f516..d46e3f14 100644
--- a/ffi/java/com/herumi/mcl/G1.java
+++ b/ffi/java/com/herumi/mcl/G1.java
@@ -52,6 +52,10 @@ public boolean equals(G1 rhs) {
     return MclJNI.G1_equals(swigCPtr, this, G1.getCPtr(rhs), rhs);
   }
 
+  public boolean isZero() {
+    return MclJNI.G1_isZero(swigCPtr, this);
+  }
+
   public void set(Fp x, Fp y) {
     MclJNI.G1_set(swigCPtr, this, Fp.getCPtr(x), x, Fp.getCPtr(y), y);
   }
diff --git a/ffi/java/com/herumi/mcl/G2.java b/ffi/java/com/herumi/mcl/G2.java
index 8ac8c481..aed241e0 100644
--- a/ffi/java/com/herumi/mcl/G2.java
+++ b/ffi/java/com/herumi/mcl/G2.java
@@ -52,6 +52,10 @@ public boolean equals(G2 rhs) {
     return MclJNI.G2_equals(swigCPtr, this, G2.getCPtr(rhs), rhs);
   }
 
+  public boolean isZero() {
+    return MclJNI.G2_isZero(swigCPtr, this);
+  }
+
   public void set(Fp ax, Fp ay, Fp bx, Fp by) {
     MclJNI.G2_set(swigCPtr, this, Fp.getCPtr(ax), ax, Fp.getCPtr(ay), ay, Fp.getCPtr(bx), bx, Fp.getCPtr(by), by);
   }
diff --git a/ffi/java/com/herumi/mcl/GT.java b/ffi/java/com/herumi/mcl/GT.java
index 06df6bb9..64386ca2 100644
--- a/ffi/java/com/herumi/mcl/GT.java
+++ b/ffi/java/com/herumi/mcl/GT.java
@@ -48,6 +48,10 @@ public boolean equals(GT rhs) {
     return MclJNI.GT_equals(swigCPtr, this, GT.getCPtr(rhs), rhs);
   }
 
+  public boolean isOne() {
+    return MclJNI.GT_isOne(swigCPtr, this);
+  }
+
   public void clear() {
     MclJNI.GT_clear(swigCPtr, this);
   }
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
index 7d60c90d..24e34cf0 100644
--- a/ffi/java/com/herumi/mcl/MclJNI.java
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -24,6 +24,7 @@ public class MclJNI {
   public final static native long new_Fr__SWIG_3(String jarg1, int jarg2);
   public final static native long new_Fr__SWIG_4(String jarg1);
   public final static native boolean Fr_equals(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_);
+  public final static native boolean Fr_isZero(long jarg1, Fr jarg1_);
   public final static native void Fr_setStr__SWIG_0(long jarg1, Fr jarg1_, String jarg2, int jarg3);
   public final static native void Fr_setStr__SWIG_1(long jarg1, Fr jarg1_, String jarg2);
   public final static native void Fr_setInt(long jarg1, Fr jarg1_, int jarg2);
@@ -45,6 +46,7 @@ public class MclJNI {
   public final static native long new_Fp__SWIG_3(String jarg1, int jarg2);
   public final static native long new_Fp__SWIG_4(String jarg1);
   public final static native boolean Fp_equals(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
+  public final static native boolean Fp_isZero(long jarg1, Fp jarg1_);
   public final static native void Fp_setStr__SWIG_0(long jarg1, Fp jarg1_, String jarg2, int jarg3);
   public final static native void Fp_setStr__SWIG_1(long jarg1, Fp jarg1_, String jarg2);
   public final static native void Fp_setInt(long jarg1, Fp jarg1_, int jarg2);
@@ -65,6 +67,7 @@ public class MclJNI {
   public final static native long new_G1__SWIG_1(long jarg1, G1 jarg1_);
   public final static native long new_G1__SWIG_2(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
   public final static native boolean G1_equals(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
+  public final static native boolean G1_isZero(long jarg1, G1 jarg1_);
   public final static native void G1_set(long jarg1, G1 jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
   public final static native void G1_clear(long jarg1, G1 jarg1_);
   public final static native void G1_setStr__SWIG_0(long jarg1, G1 jarg1_, String jarg2, int jarg3);
@@ -83,6 +86,7 @@ public class MclJNI {
   public final static native long new_G2__SWIG_1(long jarg1, G2 jarg1_);
   public final static native long new_G2__SWIG_2(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_, long jarg4, Fp jarg4_);
   public final static native boolean G2_equals(long jarg1, G2 jarg1_, long jarg2, G2 jarg2_);
+  public final static native boolean G2_isZero(long jarg1, G2 jarg1_);
   public final static native void G2_set(long jarg1, G2 jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_, long jarg4, Fp jarg4_, long jarg5, Fp jarg5_);
   public final static native void G2_clear(long jarg1, G2 jarg1_);
   public final static native void G2_setStr__SWIG_0(long jarg1, G2 jarg1_, String jarg2, int jarg3);
@@ -97,6 +101,7 @@ public class MclJNI {
   public final static native long new_GT__SWIG_0();
   public final static native long new_GT__SWIG_1(long jarg1, GT jarg1_);
   public final static native boolean GT_equals(long jarg1, GT jarg1_, long jarg2, GT jarg2_);
+  public final static native boolean GT_isOne(long jarg1, GT jarg1_);
   public final static native void GT_clear(long jarg1, GT jarg1_);
   public final static native void GT_setStr__SWIG_0(long jarg1, GT jarg1_, String jarg2, int jarg3);
   public final static native void GT_setStr__SWIG_1(long jarg1, GT jarg1_, String jarg2);
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index 668910d7..9bd1ef62 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -63,6 +63,7 @@ class Fr {
 	Fr(const std::string& str, int base = 0) throw(std::exception)
 		: self_(str, base) {}
 	bool equals(const Fr& rhs) const { return self_ == rhs.self_; }
+	bool isZero() const { return self_.isZero(); }
 	void setStr(const std::string& str, int base = 0) throw(std::exception)
 	{
 		self_.setStr(str, base);
@@ -135,6 +136,7 @@ class Fp {
 	Fp(const std::string& str, int base = 0) throw(std::exception)
 		: self_(str, base) {}
 	bool equals(const Fp& rhs) const { return self_ == rhs.self_; }
+	bool isZero() const { return self_.isZero(); }
 	void setStr(const std::string& str, int base = 0) throw(std::exception)
 	{
 		self_.setStr(str, base);
@@ -209,6 +211,7 @@ class G1 {
 	G1(const Fp& x, const Fp& y) throw(std::exception)
 		: self_(x.self_, y.self_) { }
 	bool equals(const G1& rhs) const { return self_ == rhs.self_; }
+	bool isZero() const { return self_.isZero(); }
 	void set(const Fp& x, const Fp& y) throw(std::exception)
 	{
 		self_.set(x.self_, y.self_);
@@ -279,6 +282,7 @@ class G2 {
 	{
 	}
 	bool equals(const G2& rhs) const { return self_ == rhs.self_; }
+	bool isZero() const { return self_.isZero(); }
 	void set(const Fp& ax, const Fp& ay, const Fp& bx, const Fp& by) throw(std::exception)
 	{
 		self_.set(mcl::bn::Fp2(ax.self_, ay.self_), mcl::bn::Fp2(bx.self_, by.self_));
@@ -342,6 +346,7 @@ class GT {
 	GT() {}
 	GT(const GT& rhs) : self_(rhs.self_) {}
 	bool equals(const GT& rhs) const { return self_ == rhs.self_; }
+	bool isOne() const { return self_.isOne(); }
 	void clear()
 	{
 		self_.clear();
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index fbd267f1..1caec48f 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -612,6 +612,21 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fr_1equals(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fr_1isZero(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jboolean jresult = 0 ;
+  Fr *arg1 = (Fr *) 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  result = (bool)((Fr const *)arg1)->isZero();
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   Fr *arg1 = (Fr *) 0 ;
   std::string *arg2 = 0 ;
@@ -1070,6 +1085,21 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fp_1equals(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fp_1isZero(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jboolean jresult = 0 ;
+  Fp *arg1 = (Fp *) 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  result = (bool)((Fp const *)arg1)->isZero();
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   Fp *arg1 = (Fp *) 0 ;
   std::string *arg2 = 0 ;
@@ -1517,6 +1547,21 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1equals(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isZero(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jboolean jresult = 0 ;
+  G1 *arg1 = (G1 *) 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1; 
+  result = (bool)((G1 const *)arg1)->isZero();
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = (G1 *) 0 ;
   Fp *arg2 = 0 ;
@@ -1955,6 +2000,21 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G2_1equals(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G2_1isZero(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jboolean jresult = 0 ;
+  G2 *arg1 = (G2 *) 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G2 **)&jarg1; 
+  result = (bool)((G2 const *)arg1)->isZero();
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G2_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_, jlong jarg4, jobject jarg4_, jlong jarg5, jobject jarg5_) {
   G2 *arg1 = (G2 *) 0 ;
   Fp *arg2 = 0 ;
@@ -2276,6 +2336,21 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_GT_1equals(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_GT_1isOne(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jboolean jresult = 0 ;
+  GT *arg1 = (GT *) 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(GT **)&jarg1; 
+  result = (bool)((GT const *)arg1)->isOne();
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_GT_1clear(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   GT *arg1 = (GT *) 0 ;
   

From 5e1c3fc2102c7847dba1736d50351a64deb3fcb3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 20 Oct 2020 11:43:18 +0900
Subject: [PATCH 334/553] [doc] modify how to build GMP

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index b8fa2647..c3f175dd 100644
--- a/readme.md
+++ b/readme.md
@@ -105,7 +105,7 @@ env MCL_PROF=2 bin/bls12_test.exe
 
 ## How to build on 32-bit x86 Linux
 
-Build GMP and for 32-bit mode and install `<lib32>` at yourself.
+Build GMP for 32-bit mode (`env ABI=32 ./configure --enable-cxx ...`) and install `<lib32>` at yourself.
 
 ```
 make ARCH=x86 CFLAGS_USER="-I <lib32>/include" LDFLAGS_USER="-L <lib32>/lib -Wl,-rpath,<lib32>/lib"

From 776fab5ca6c108a56d06c721780636fb0a997a48 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 21 Oct 2020 10:33:59 +0900
Subject: [PATCH 335/553] mulSmallUnit supports 12

---
 include/mcl/util.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/mcl/util.hpp b/include/mcl/util.hpp
index 01324055..8915c885 100644
--- a/include/mcl/util.hpp
+++ b/include/mcl/util.hpp
@@ -328,6 +328,7 @@ bool mulSmallUnit(T& z, const T& x, U y)
 	case 9: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, t); T::add(z, t, x); break; }
 	case 10: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, x); T::add(z, t, t); break; }
 	case 11: { T t; T::add(t, x, x); T::add(t, t, x); T::add(t, t, t); T::add(t, t, t); T::sub(z, t, x); break; }
+	case 12: { T t; T::add(t, x, x); T::add(t, t, t); T::add(z, t, t); T::add(z, z, t); break; }
 	default:
 		return false;
 	}

From 34fdf9a67ea818db590f43eea48684fc40f2b61b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 21 Oct 2020 15:30:41 +0900
Subject: [PATCH 336/553] add addCTProj

---
 include/mcl/ec.hpp | 48 ++++++++++++++++++++++++++++++++++++++++++++++
 test/ec_test.cpp   | 27 ++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index e1db56df..7ae672a5 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -424,6 +424,54 @@ void addJacobi(E& R, const E& P, const E& Q)
 	F::sub(R.y, U1, H3);
 }
 
+/*
+	accept P == Q
+	https://github.com/apache/incubator-milagro-crypto-c/blob/fa0a45a3/src/ecp.c.in#L767-L976
+*/
+template<class E>
+void addCTProj(E& R, const E& P, const E& Q)
+{
+	typedef typename E::Fp F;
+	assert(E::a_ == 0);
+	F b3;
+	F::add(b3, E::b_, E::b_);
+	b3 += E::b_;
+	F t0, t1, t2, t3, t4, x3, y3, z3;
+	F::mul(t0, P.x, Q.x);
+	F::mul(t1, P.y, Q.y);
+	F::mul(t2, P.z, Q.z);
+	F::add(t3, P.x, P.y);
+	F::add(t4, Q.x, Q.y);
+	F::mul(t3, t3, t4);
+	F::add(t4, t0, t1);
+	F::sub(t3, t3, t4);
+	F::add(t4, P.y, P.z);
+	F::add(x3, Q.y, Q.z);
+	F::mul(t4, t4, x3);
+	F::add(x3, t1, t2);
+	F::sub(t4, t4, x3);
+	F::add(x3, P.x, P.z);
+	F::add(y3, Q.x, Q.z);
+	F::mul(x3, x3, y3);
+	F::add(y3, t0, t2);
+	F::sub(y3, x3, y3);
+	F::add(x3, t0, t0);
+	F::add(t0, t0, x3);
+	t2 *= b3;
+	F::add(z3, t1, t2);
+	F::sub(t1, t1, t2);
+	y3 *= b3;
+	F::mul(x3, y3, t4);
+	F::mul(t2, t3, t1);
+	F::sub(R.x, t2, x3);
+	F::mul(y3, y3, t0);
+	F::mul(t1, t1, z3);
+	F::add(R.y, y3, t1);
+	F::mul(t0, t0, t3);
+	F::mul(z3, z3, t4);
+	F::add(R.z, z3, t0);
+}
+
 template<class E>
 void normalizeProj(E& P)
 {
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index 855ceba8..1aa241f3 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -491,6 +491,32 @@ struct Test {
 		CYBOZU_TEST_ASSERT(!(P1 < P1));
 		CYBOZU_TEST_ASSERT((P1 <= P1));
 	}
+	void addCT() const
+	{
+		if (Ec::getMode() != mcl::ec::Proj) return;
+		if (Ec::a_ != 0) return;
+		Fp x(para.gx);
+		Fp y(para.gy);
+		Ec P(x, y), Q, R, Zero;
+		Zero.clear();
+		mcl::ec::addCTProj(Q, P, P);
+		Ec::add(R, P, P);
+		CYBOZU_TEST_EQUAL(Q, R);
+		mcl::ec::addCTProj(Q, Q, P);
+		Ec::add(R, R, P);
+		CYBOZU_TEST_EQUAL(Q, R);
+/*
+		mcl::ec::addCTProj(Q, Q, Zero);
+		Ec::add(R, R, Zero);
+		CYBOZU_TEST_EQUAL(Q, R);
+		mcl::ec::addCTProj(Q, Zero, Q);
+		Ec::add(R, Zero, R);
+		CYBOZU_TEST_EQUAL(Q, R);
+*/
+		mcl::ec::addCTProj(Q, Zero, Zero);
+		Ec::add(R, Zero, Zero);
+		CYBOZU_TEST_EQUAL(Q, R);
+	}
 
 	template<class F>
 	void test(F f, const char *msg) const
@@ -532,6 +558,7 @@ mul 499.00usec
 		ioMode();
 		mulCT();
 		compare();
+		addCT();
 	}
 private:
 	Test(const Test&);

From ad59c9eff7fe7a83e966a42376643fb85f309c8f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 25 Oct 2020 20:08:54 +0900
Subject: [PATCH 337/553] test addCTProj for Zero

---
 include/mcl/ec.hpp | 1 +
 test/ec_test.cpp   | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index 7ae672a5..d8a0fc18 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -427,6 +427,7 @@ void addJacobi(E& R, const E& P, const E& Q)
 /*
 	accept P == Q
 	https://github.com/apache/incubator-milagro-crypto-c/blob/fa0a45a3/src/ecp.c.in#L767-L976
+	(x, y, z) is zero <=> x = 0, y = 1, z = 0
 */
 template<class E>
 void addCTProj(E& R, const E& P, const E& Q)
diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index 1aa241f3..f5447140 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -499,20 +499,19 @@ struct Test {
 		Fp y(para.gy);
 		Ec P(x, y), Q, R, Zero;
 		Zero.clear();
+		Zero.y = 1;
 		mcl::ec::addCTProj(Q, P, P);
 		Ec::add(R, P, P);
 		CYBOZU_TEST_EQUAL(Q, R);
 		mcl::ec::addCTProj(Q, Q, P);
 		Ec::add(R, R, P);
 		CYBOZU_TEST_EQUAL(Q, R);
-/*
 		mcl::ec::addCTProj(Q, Q, Zero);
 		Ec::add(R, R, Zero);
 		CYBOZU_TEST_EQUAL(Q, R);
 		mcl::ec::addCTProj(Q, Zero, Q);
 		Ec::add(R, Zero, R);
 		CYBOZU_TEST_EQUAL(Q, R);
-*/
 		mcl::ec::addCTProj(Q, Zero, Zero);
 		Ec::add(R, Zero, Zero);
 		CYBOZU_TEST_EQUAL(Q, R);

From 2497535f8cb72bab8a2e3a9dbecd2fec4dae2eb3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 27 Oct 2020 22:51:52 +0900
Subject: [PATCH 338/553] check FreeBSD in commom.mk

---
 common.mk | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/common.mk b/common.mk
index ec7d0e65..29cd6505 100644
--- a/common.mk
+++ b/common.mk
@@ -32,6 +32,12 @@ ifeq ($(UNAME_S),OpenBSD)
   CFLAGS+=-I/usr/local/include
   LDFLAGS+=-L/usr/local/lib
 endif
+ifeq ($(UNAME_S),FreeBSD)
+  OS=freebsd
+  CXX?=clang++
+  CFLAGS+=-I/usr/local/include
+  LDFLAGS+=-L/usr/local/lib
+endif
 
 ARCH?=$(shell uname -m)
 ifneq ($(findstring $(ARCH),x86_64/amd64),)

From 8e986394ae9b242072f2107818c35e9239b3de26 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 27 Oct 2020 22:53:56 +0900
Subject: [PATCH 339/553] default build without GMP

---
 common.mk |  2 +-
 readme.md | 17 +++++++----------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/common.mk b/common.mk
index 29cd6505..5aca290b 100644
--- a/common.mk
+++ b/common.mk
@@ -110,7 +110,7 @@ ifeq ($(DEBUG),0)
 CFLAGS+=$(CFLAGS_OPT_USER)
 endif
 CFLAGS+=$(CFLAGS_USER)
-MCL_USE_GMP?=1
+MCL_USE_GMP?=0
 ifeq ($(OS),mac)
   ifeq ($(shell sw_vers -productVersion),10.15)
     # workaround because of GMP does not run well on Catalina
diff --git a/readme.md b/readme.md
index b8fa2647..d99acba8 100644
--- a/readme.md
+++ b/readme.md
@@ -51,14 +51,6 @@ see [api.md](api.md)
 # How to build on Linux and macOS
 x86-64/ARM/ARM64 Linux, macOS and mingw64 are supported.
 
-## Installation Requirements
-
-[GMP](https://gmplib.org/) is necessary (default setting).
-
-```
-apt install libgmp-dev # on Ubuntu
-```
-
 ## How to build with Makefile
 
 ```
@@ -80,10 +72,15 @@ make bin/bn_c384_256_test.exe && bin/bn_c384_256_test.exe
 make bin/bls12_test.exe && bin/bls12_test.exe
 ```
 
-## How to build without GMP
+## How to build with GMP
+Install [GMP](https://gmplib.org/).
+
+```
+apt install libgmp-dev # on Ubuntu
+```
 
 ```
-make MCL_USE_GMP=0
+make MCL_USE_GMP=1
 
 ```
 Define `MCL_USE_VINT` if using C++ header files.

From 9f97f255006ceb5185ed63ea868602eae9950631 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 27 Oct 2020 23:47:26 +0900
Subject: [PATCH 340/553] Revert "default build without GMP"

This reverts commit 8e986394ae9b242072f2107818c35e9239b3de26.
---
 common.mk |  2 +-
 readme.md | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/common.mk b/common.mk
index 5aca290b..29cd6505 100644
--- a/common.mk
+++ b/common.mk
@@ -110,7 +110,7 @@ ifeq ($(DEBUG),0)
 CFLAGS+=$(CFLAGS_OPT_USER)
 endif
 CFLAGS+=$(CFLAGS_USER)
-MCL_USE_GMP?=0
+MCL_USE_GMP?=1
 ifeq ($(OS),mac)
   ifeq ($(shell sw_vers -productVersion),10.15)
     # workaround because of GMP does not run well on Catalina
diff --git a/readme.md b/readme.md
index d1c5619a..c3f175dd 100644
--- a/readme.md
+++ b/readme.md
@@ -51,6 +51,14 @@ see [api.md](api.md)
 # How to build on Linux and macOS
 x86-64/ARM/ARM64 Linux, macOS and mingw64 are supported.
 
+## Installation Requirements
+
+[GMP](https://gmplib.org/) is necessary (default setting).
+
+```
+apt install libgmp-dev # on Ubuntu
+```
+
 ## How to build with Makefile
 
 ```
@@ -72,15 +80,10 @@ make bin/bn_c384_256_test.exe && bin/bn_c384_256_test.exe
 make bin/bls12_test.exe && bin/bls12_test.exe
 ```
 
-## How to build with GMP
-Install [GMP](https://gmplib.org/).
-
-```
-apt install libgmp-dev # on Ubuntu
-```
+## How to build without GMP
 
 ```
-make MCL_USE_GMP=1
+make MCL_USE_GMP=0
 
 ```
 Define `MCL_USE_VINT` if using C++ header files.

From 95b32ec6e9c9830d685177c92d1e3ce46215df8c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 27 Oct 2020 23:51:02 +0900
Subject: [PATCH 341/553] modify condition for DUMP_JIT

---
 src/fp.cpp | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index 9bcd3717..24617ad9 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -376,17 +376,14 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 		if (!b) return false;
 	}
 	op.rp = getMontgomeryCoeff(p[0]);
-	if (mode != FP_XBYAK) return true;
 
+	(void)mode;
 #ifdef MCL_X64_ASM
 
-#ifdef MCL_USE_VINT
-	const int maxInvN = 6;
-#else
-	const int maxInvN = 4;
-#endif
-
 #ifdef MCL_USE_XBYAK
+#ifndef MCL_DUMP_JIT
+	if (mode != FP_XBYAK) return true;
+#endif
 	if (op.fg == 0) op.fg = Op::createFpGenerator();
 	bool enableInv = op.fg->init(op, g_cpu);
 #ifdef MCL_DUMP_JIT
@@ -397,6 +394,11 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 	bool enableInv = true;
 #endif // MCL_USE_XBYAK
 
+#ifdef MCL_USE_VINT
+	const int maxInvN = 6;
+#else
+	const int maxInvN = 4;
+#endif
 	if (enableInv && op.isMont && N <= maxInvN) {
 		op.fp_invOp = &invOpForMontC;
 		initInvTbl(op);

From b32aecd583af99ee6a63c12e14224140fde442d9 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 28 Oct 2020 01:42:35 +0900
Subject: [PATCH 342/553] v1.26

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 28a3d38a..27a5c52e 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x125; /* 0xABC = A.BC */
+static const int version = 0x126; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From 3af5eaa48fc1536f4223460fbf8b88211489affb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 5 Nov 2020 09:13:59 +0900
Subject: [PATCH 343/553] [she] add decWithZkpDec

---
 include/mcl/she.hpp | 82 +++++++++++++++++++++++++++++++++++++++++++++
 test/she_test.cpp   | 19 +++++++++++
 2 files changed, 101 insertions(+)

diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index d441e1c0..90db0f40 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -544,6 +544,7 @@ struct SHET {
 	struct ZkpBinTag;
 	struct ZkpEqTag; // d_[] = { c, sp, ss, sm }
 	struct ZkpBinEqTag; // d_[] = { d0, d1, sp0, sp1, ss, sp, sm }
+	struct ZkpDecTag; // d_[] = { c, h }
 public:
 	/*
 		Zkp for m = 0 or 1
@@ -557,6 +558,10 @@ struct SHET {
 		Zkp for (m = 0 or 1) and decG1(c1) == decG2(c2)
 	*/
 	typedef ZkpT<ZkpBinEqTag, 7> ZkpBinEq;
+	/*
+		Zkp for Dec(c) = m
+	*/
+	typedef ZkpT<ZkpDecTag, 2> ZkpDec;
 
 	typedef CipherTextAT<G1> CipherTextG1;
 	typedef CipherTextAT<G2> CipherTextG2;
@@ -775,6 +780,50 @@ struct SHET {
 				return isZero(c.a_);
 			}
 		}
+		int64_t decWithZkpDec(bool *pok, ZkpDec& zkp, const CipherTextG1& c, const PublicKey& pub) const
+		{
+			/*
+				c = (S, T)
+				S = mP + rxP
+				T = rP
+				R = S - xT = mP
+			*/
+			G1 R;
+			G1::mul(R, c.T_, x_);
+			G1::sub(R, c.S_, R);
+			int64_t m = PhashTbl_.log(R, pok);
+			if (!*pok) return 0;
+			const G1& P1 = P_;
+			const G1& P2 = c.T_; // rP
+			const G1& A1 = pub.xP_;
+			G1 A2;
+			G1::sub(A2, c.S_, R); // rxP
+			Fr b;
+			b.setRand();
+			G1 B1, B2;
+			G1::mul(B1, P1, b);
+			G1::mul(B2, P2, b);
+			char buf[sizeof(G1) * 5];
+			cybozu::MemoryOutputStream os(buf, sizeof(buf));
+			P2.save(os);
+			A1.save(os);
+			A2.save(os);
+			B1.save(os);
+			B2.save(os);
+			Fr& d = zkp.d_[0];
+			Fr& h = zkp.d_[1];
+			h.setHashOf(buf, os.getPos());
+			Fr::mul(d, h, x_);
+			d += b;
+			return m;
+		}
+		int64_t decWithZkpDec(ZkpDec& zkp, const CipherTextG1& c, const PublicKey& pub) const
+		{
+			bool b;
+			int64_t ret = decWithZkpDec(&b, zkp, c, pub);
+			if (!b) throw cybozu::Exception("she:SecretKey:decWithZkpDec");
+			return ret;
+		}
 		template<class InputStream>
 		void load(bool *pb, InputStream& is, int ioMode = IoSerialize)
 		{
@@ -1288,6 +1337,38 @@ struct SHET {
 			const MulG<G1> xPmul(xP_);
 			return verifyZkpBin(c.S_, c.T_, P_, zkp, PhashTbl_.getWM(), xPmul);
 		}
+		bool verify(const CipherTextG1& c, int64_t m, const ZkpDec& zkp) const
+		{
+			/*
+				Enc(m;r) - Enc(m;0) = (S, T) - (mP, 0) = (S - mP, T)
+			*/
+			const Fr& d = zkp.d_[0];
+			const Fr& h = zkp.d_[1];
+			const G1& P1 = P_;
+			const G1& P2 = c.T_; // rP
+			const G1& A1 = xP_;
+			G1 A2;
+			G1::mul(A2, P_, m);
+//			PhashTbl_.getWM().mul(A2, m);
+			G1::sub(A2, c.S_, A2); // S - mP = xrP
+			G1 B1, B2, T;
+			G1::mul(B1, P1, d);
+			G1::mul(B2, P2, d);
+			G1::mul(T, A1, h);
+			B1 -= T;
+			G1::mul(T, A2, h);
+			B2 -= T;
+			char buf[sizeof(G1) * 5];
+			cybozu::MemoryOutputStream os(buf, sizeof(buf));
+			P2.save(os);
+			A1.save(os);
+			A2.save(os);
+			B1.save(os);
+			B2.save(os);
+			Fr h2;
+			h2.setHashOf(buf, os.getPos());
+			return h == h2;
+		}
 		bool verify(const CipherTextG2& c, const ZkpBin& zkp) const
 		{
 			const MulG<G2> yQmul(yQ_);
@@ -1896,6 +1977,7 @@ typedef SHE::CipherText CipherText;
 typedef SHE::ZkpBin ZkpBin;
 typedef SHE::ZkpEq ZkpEq;
 typedef SHE::ZkpBinEq ZkpBinEq;
+typedef SHE::ZkpDec ZkpDec;
 
 inline void init(const mcl::CurveParam& cp = mcl::BN254, size_t hashSize = 1024, size_t tryNum = local::defaultTryNum)
 {
diff --git a/test/she_test.cpp b/test/she_test.cpp
index f2651d19..f7095ec4 100644
--- a/test/she_test.cpp
+++ b/test/she_test.cpp
@@ -331,6 +331,25 @@ CYBOZU_TEST_AUTO(ZkpBinEq)
 	ZkpBinEqTest(sec, ppub);
 }
 
+CYBOZU_TEST_AUTO(ZkpDec)
+{
+	const SecretKey& sec = g_sec;
+	PublicKey pub;
+	sec.getPublicKey(pub);
+	CipherTextG1 c;
+	int m = 123;
+	pub.enc(c, m);
+	ZkpDec zkp;
+	CYBOZU_TEST_EQUAL(sec.decWithZkpDec(zkp, c, pub), m);
+	CYBOZU_TEST_ASSERT(pub.verify(c, m, zkp));
+	CYBOZU_TEST_ASSERT(!pub.verify(c, m + 1, zkp));
+	CipherTextG1 c2;
+	pub.enc(c2, m);
+	CYBOZU_TEST_ASSERT(!pub.verify(c2, m, zkp));
+	zkp.d_[0] += 1;
+	CYBOZU_TEST_ASSERT(!pub.verify(c, m, zkp));
+}
+
 CYBOZU_TEST_AUTO(add_sub_mul)
 {
 	const SecretKey& sec = g_sec;

From 0704eb875bf1219a4e0a41c076d45f82ee19a1f0 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 5 Nov 2020 09:43:57 +0900
Subject: [PATCH 344/553] [she] add decWithZkpDec C api

---
 include/mcl/she.h   | 13 +++++++++++++
 src/she_c_impl.hpp  | 25 +++++++++++++++++++++++++
 test/she_c_test.hpp | 22 ++++++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/include/mcl/she.h b/include/mcl/she.h
index d474216b..84a25a72 100644
--- a/include/mcl/she.h
+++ b/include/mcl/she.h
@@ -74,6 +74,10 @@ typedef struct {
 typedef struct {
 	mclBnFr d[7];
 } sheZkpBinEq;
+
+typedef struct {
+	mclBnFr d[7];
+} sheZkpDec;
 /*
 	initialize this library
 	call this once before using the other functions
@@ -96,6 +100,7 @@ MCLSHE_DLL_API mclSize sheCipherTextGTSerialize(void *buf, mclSize maxBufSize, c
 MCLSHE_DLL_API mclSize sheZkpBinSerialize(void *buf, mclSize maxBufSize, const sheZkpBin *zkp);
 MCLSHE_DLL_API mclSize sheZkpEqSerialize(void *buf, mclSize maxBufSize, const sheZkpEq *zkp);
 MCLSHE_DLL_API mclSize sheZkpBinEqSerialize(void *buf, mclSize maxBufSize, const sheZkpBinEq *zkp);
+MCLSHE_DLL_API mclSize sheZkpDecSerialize(void *buf, mclSize maxBufSize, const sheZkpDec *zkp);
 
 // return read byte size if sucess else 0
 MCLSHE_DLL_API mclSize sheSecretKeyDeserialize(sheSecretKey* sec, const void *buf, mclSize bufSize);
@@ -106,6 +111,7 @@ MCLSHE_DLL_API mclSize sheCipherTextGTDeserialize(sheCipherTextGT* c, const void
 MCLSHE_DLL_API mclSize sheZkpBinDeserialize(sheZkpBin* zkp, const void *buf, mclSize bufSize);
 MCLSHE_DLL_API mclSize sheZkpEqDeserialize(sheZkpEq* zkp, const void *buf, mclSize bufSize);
 MCLSHE_DLL_API mclSize sheZkpBinEqDeserialize(sheZkpBinEq* zkp, const void *buf, mclSize bufSize);
+MCLSHE_DLL_API mclSize sheZkpDecDeserialize(sheZkpDec* zkp, const void *buf, mclSize bufSize);
 
 /*
 	set secretKey if system has /dev/urandom or CryptGenRandom
@@ -192,6 +198,12 @@ MCLSHE_DLL_API int shePrecomputedPublicKeyEncWithZkpBinEq(sheCipherTextG1 *c1, s
 MCLSHE_DLL_API int sheEncWithZkpEq(sheCipherTextG1 *c1, sheCipherTextG2 *c2, sheZkpEq *zkp, const shePublicKey *pub, mclInt m);
 MCLSHE_DLL_API int shePrecomputedPublicKeyEncWithZkpEq(sheCipherTextG1 *c1, sheCipherTextG2 *c2, sheZkpEq *zkp, const shePrecomputedPublicKey *ppub, mclInt m);
 
+/*
+	Zkp s.t. Dec(c) = m
+	return 0 if success
+*/
+MCLSHE_DLL_API int sheDecWithZkpDecG1(mclInt *m, sheZkpDec *zkp, const sheSecretKey *sec, const sheCipherTextG1 *c, const shePublicKey *pub);
+
 /*
 	decode c and set m
 	return 0 if success
@@ -211,6 +223,7 @@ MCLSHE_DLL_API int shePrecomputedPublicKeyVerifyZkpBinG1(const shePrecomputedPub
 MCLSHE_DLL_API int shePrecomputedPublicKeyVerifyZkpBinG2(const shePrecomputedPublicKey *ppub, const sheCipherTextG2 *c, const sheZkpBin *zkp);
 MCLSHE_DLL_API int shePrecomputedPublicKeyVerifyZkpEq(const shePrecomputedPublicKey *ppub, const sheCipherTextG1 *c1, const sheCipherTextG2 *c2, const sheZkpEq *zkp);
 MCLSHE_DLL_API int shePrecomputedPublicKeyVerifyZkpBinEq(const shePrecomputedPublicKey *ppub, const sheCipherTextG1 *c1, const sheCipherTextG2 *c2, const sheZkpBinEq *zkp);
+MCLSHE_DLL_API int sheVerifyZkpDecG1(const shePublicKey *pub, const sheCipherTextG1 *c1, mclInt m, const sheZkpDec *zkp);
 /*
 	decode c via GT and set m
 	return 0 if success
diff --git a/src/she_c_impl.hpp b/src/she_c_impl.hpp
index 8dd0a54f..6fcb2d38 100644
--- a/src/she_c_impl.hpp
+++ b/src/she_c_impl.hpp
@@ -41,6 +41,9 @@ static const ZkpEq *cast(const sheZkpEq *p) { return reinterpret_cast<const ZkpE
 static ZkpBinEq *cast(sheZkpBinEq *p) { return reinterpret_cast<ZkpBinEq*>(p); }
 static const ZkpBinEq *cast(const sheZkpBinEq *p) { return reinterpret_cast<const ZkpBinEq*>(p); }
 
+static ZkpDec *cast(sheZkpDec *p) { return reinterpret_cast<ZkpDec*>(p); }
+static const ZkpDec *cast(const sheZkpDec *p) { return reinterpret_cast<const ZkpDec*>(p); }
+
 int sheInit(int curve, int compiledTimeVar)
 	try
 {
@@ -116,6 +119,11 @@ mclSize sheZkpBinEqSerialize(void *buf, mclSize maxBufSize, const sheZkpBinEq *z
 	return (mclSize)cast(zkp)->serialize(buf, maxBufSize);
 }
 
+mclSize sheZkpDecSerialize(void *buf, mclSize maxBufSize, const sheZkpDec *zkp)
+{
+	return (mclSize)cast(zkp)->serialize(buf, maxBufSize);
+}
+
 mclSize sheSecretKeyDeserialize(sheSecretKey* sec, const void *buf, mclSize bufSize)
 {
 	return (mclSize)cast(sec)->deserialize(buf, bufSize);
@@ -156,6 +164,11 @@ mclSize sheZkpBinEqDeserialize(sheZkpBinEq* zkp, const void *buf, mclSize bufSiz
 	return (mclSize)cast(zkp)->deserialize(buf, bufSize);
 }
 
+mclSize sheZkpDecDeserialize(sheZkpDec* zkp, const void *buf, mclSize bufSize)
+{
+	return (mclSize)cast(zkp)->deserialize(buf, bufSize);
+}
+
 int sheSecretKeySetByCSPRNG(sheSecretKey *sec)
 {
 	cast(sec)->setByCSPRNG();
@@ -768,3 +781,15 @@ int shePrecomputedPublicKeyVerifyZkpBinEq(const shePrecomputedPublicKey *ppub, c
 	return verifyT(*cast(ppub), *cast(c1), *cast(c2), *cast(zkp));
 }
 
+int sheDecWithZkpDecG1(mclInt *m, sheZkpDec *zkp, const sheSecretKey *sec, const sheCipherTextG1 *c, const shePublicKey *pub)
+{
+	bool b;
+	*m = cast(sec)->decWithZkpDec(&b, *cast(zkp), *cast(c), *cast(pub));
+	return b ? 0 : -1;
+}
+
+int sheVerifyZkpDecG1(const shePublicKey *pub, const sheCipherTextG1 *c1, mclInt m, const sheZkpDec *zkp)
+{
+	return cast(pub)->verify(*cast(c1), m, *cast(zkp));
+}
+
diff --git a/test/she_c_test.hpp b/test/she_c_test.hpp
index 58139f07..b00b6a2c 100644
--- a/test/she_c_test.hpp
+++ b/test/she_c_test.hpp
@@ -443,6 +443,28 @@ void ZkpEqTest(const sheSecretKey *sec, const PK *pub, encWithZkpFunc encWithZkp
 }
 
 CYBOZU_TEST_AUTO(ZkpEq)
+{
+	sheSecretKey sec;
+	sheSecretKeySetByCSPRNG(&sec);
+	shePublicKey pub;
+	sheGetPublicKey(&pub, &sec);
+	int m = 123;
+	sheCipherTextG1 c1;
+	sheEncG1(&c1, &pub, m);
+	sheZkpDec zkp;
+	int64_t dec;
+	CYBOZU_TEST_EQUAL(sheDecWithZkpDecG1(&dec, &zkp, &sec, &c1, &pub), 0);
+	CYBOZU_TEST_EQUAL(m, dec);
+	CYBOZU_TEST_EQUAL(sheVerifyZkpDecG1(&pub, &c1, m, &zkp), 1);
+	CYBOZU_TEST_EQUAL(sheVerifyZkpDecG1(&pub, &c1, m + 1, &zkp), 0);
+	sheCipherTextG1 c2;
+	sheEncG1(&c2, &pub, m);
+	CYBOZU_TEST_EQUAL(sheVerifyZkpDecG1(&pub, &c2, m, &zkp), 0);
+	zkp.d[0].d[0]++;
+	CYBOZU_TEST_EQUAL(sheVerifyZkpDecG1(&pub, &c1, m, &zkp), 0);
+}
+
+CYBOZU_TEST_AUTO(ZkpDec)
 {
 	sheSecretKey sec;
 	sheSecretKeySetByCSPRNG(&sec);

From 2420cee2c3a770d673a8db99ffc9f7e34bcf4a29 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 6 Nov 2020 09:30:09 +0900
Subject: [PATCH 345/553] [she] algo of decWithZkpDec

---
 misc/she/memo.txt | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 misc/she/memo.txt

diff --git a/misc/she/memo.txt b/misc/she/memo.txt
new file mode 100644
index 00000000..68ebe85e
--- /dev/null
+++ b/misc/she/memo.txt
@@ -0,0 +1,31 @@
+P ; generator
+x ; secret key
+xP ; public key
+Enc(m;r) = (mP + rxP, rP)
+
+c = (S, T)
+dec(c) := S - xT
+dec(Enc(m;r)) = (mP + rxP) - x(rP) = mP
+DLP(mP) := m
+Dec(c) := DLP(dec(c))
+
+ZKP of dec(c) = m
+
+z := Enc(m;0) = (mP, 0)
+
+c - z = (rxP, rP) ; r is unknown
+
+ZKP of dec(c - z) = 0
+(P1, P2) := (P, rP)
+(A1, A2) := (xP, xrP)
+
+Prover shows that x(P1, P2) = (A1, A2) without revealing x.
+b ; rand
+B = (b P1, b P2)
+h = Hash(P2, A1, A2, B1, B2)
+d = b + h a
+pi = (d, h)
+
+Verifier
+Bi := d Pi - h Ai
+verify h = Hash(P2, A2, A2, B1, B2)

From 17f3e80c58a8c720c73cd5c5b2132766e81e25eb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 10 Nov 2020 14:23:19 +0900
Subject: [PATCH 346/553] fix macro condition

---
 src/fp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index 24617ad9..eb8a7de8 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -4,7 +4,7 @@
 #include <cybozu/endian.hpp>
 #include <mcl/conversion.hpp>
 
-#if defined(MCL_STATIC_CODE) || defined(MCL_USE_XBYAK) || defined(MCL_USE_LLVM)
+#if defined(MCL_STATIC_CODE) || defined(MCL_USE_XBYAK) || (defined(MCL_USE_LLVM) && (CYBOZU_HOST == CYBOZU_HOST_INTEL))
 
 #ifdef MCL_USE_XBYAK
 	#define XBYAK_DISABLE_AVX512

From cb16b0af7243ea523201e0210e49334db53467ec Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 10 Nov 2020 14:24:34 +0900
Subject: [PATCH 347/553] v1.27

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 27a5c52e..72e40a32 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x126; /* 0xABC = A.BC */
+static const int version = 0x127; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From 702b390ccbf080535e62606dc50a5d388ffbfb33 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 10 Nov 2020 18:25:20 +0900
Subject: [PATCH 348/553] mcl.js contains mcl.wasm

---
 Makefile | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 2edb3b48..b60492d6 100644
--- a/Makefile
+++ b/Makefile
@@ -363,13 +363,7 @@ endif
 	emcc -o $@ src/fp.cpp src/she_c384.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -s TOTAL_MEMORY=67108864 -s DISABLE_EXCEPTION_CATCHING=1
 
 ../mcl-wasm/mcl_c384_256.js: src/bn_c384_256.cpp $(MCL_C_DEP)
-	emcc -o $@ src/fp.cpp src/bn_c384_256.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions -MD -MP -MF obj/mcl_c384_256.d
-
-../mcl-wasm/mcl_c.js: src/bn_c256.cpp $(MCL_C_DEP)
-	emcc -o $@ src/fp.cpp src/bn_c256.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=256 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions -MD -MP -MF obj/mcl_c.d
-
-../mcl-wasm/mcl_c512.js: src/bn_c512.cpp $(MCL_C_DEP)
-	emcc -o $@ src/fp.cpp src/bn_c512.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=512 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions
+	emcc -o $@ src/fp.cpp src/bn_c384_256.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions -MD -MP -MF obj/mcl_c384_256.d -s SINGLE_FILE=1
 
 ../ecdsa-wasm/ecdsa_c.js: src/ecdsa_c.cpp src/fp.cpp include/mcl/ecdsa.hpp include/mcl/ecdsa.h Makefile
 	emcc -o $@ src/fp.cpp src/ecdsa_c.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=256 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions

From edc48bc1abec9b3e28c98368a983c0041c131d07 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 22 Nov 2020 17:33:42 +0900
Subject: [PATCH 349/553] [she] add makeHash for ZKP

---
 include/mcl/she.hpp | 105 +++++++++++++-------------------------------
 1 file changed, 30 insertions(+), 75 deletions(-)

diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index 90db0f40..e1f8a87b 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -325,6 +325,27 @@ int log(const G& P, const G& xP)
 	throw cybozu::Exception("she:log:not found");
 }
 
+struct DummyOut {
+	template<class OutStream>
+	void save(OutStream&) const {}
+};
+template<class F, class T0, class T1, class T2, class T3, class T4 = DummyOut, class T5 = DummyOut, class T6 = DummyOut, class T7 = DummyOut, class T8 = DummyOut, class T9 = DummyOut>
+void makeHash(F& h, char *buf, const size_t bufSize, const T0 *t0, const T1 *t1, const T2 *t2, const T3 *t3, const T4 *t4, const T5 *t5 = 0, const T6 *t6 = 0, const T7 *t7 = 0, const T8 *t8 = 0, const T9 *t9 = 0)
+{
+	cybozu::MemoryOutputStream os(buf, bufSize);
+	if (t0) t0->save(os);
+	if (t1) t1->save(os);
+	if (t2) t2->save(os);
+	if (t3) t3->save(os);
+	if (t4) t4->save(os);
+	if (t5) t5->save(os);
+	if (t6) t6->save(os);
+	if (t7) t7->save(os);
+	if (t8) t8->save(os);
+	if (t9) t9->save(os);
+	h.setHashOf(buf, os.getPos());
+}
+
 } // mcl::she::local
 
 template<size_t dummyInpl = 0>
@@ -803,16 +824,10 @@ struct SHET {
 			G1 B1, B2;
 			G1::mul(B1, P1, b);
 			G1::mul(B2, P2, b);
-			char buf[sizeof(G1) * 5];
-			cybozu::MemoryOutputStream os(buf, sizeof(buf));
-			P2.save(os);
-			A1.save(os);
-			A2.save(os);
-			B1.save(os);
-			B2.save(os);
 			Fr& d = zkp.d_[0];
 			Fr& h = zkp.d_[1];
-			h.setHashOf(buf, os.getPos());
+			char buf[sizeof(G1) * 5];
+			local::makeHash(h, buf, sizeof(buf), &P2, &A1, &A2, &B1, &B2);
 			Fr::mul(d, h, x_);
 			d += b;
 			return m;
@@ -937,15 +952,8 @@ struct SHET {
 		Pmul.mul(static_cast<I&>(R[0][m]), r); // R[0][m] = r P
 		xPmul.mul(R[1][m], r); // R[1][m] = r xP
 		char buf[sizeof(G) * 2];
-		cybozu::MemoryOutputStream os(buf, sizeof(buf));
-		S.save(os);
-		T.save(os);
-		R[0][0].save(os);
-		R[0][1].save(os);
-		R[1][0].save(os);
-		R[1][1].save(os);
 		Fr c;
-		c.setHashOf(buf, os.getPos());
+		local::makeHash(c, buf, sizeof(buf), &S, &T, &R[0][0], &R[0][1], &R[1][0], &R[1][1]);
 		d[m] = c - d[1-m];
 		s[m] = r + d[m] * encRand;
 	}
@@ -976,15 +984,8 @@ struct SHET {
 		G::mul(T2, T2, d[1]);
 		G::sub(R[1][1], T1, T2);
 		char buf[sizeof(G) * 2];
-		cybozu::MemoryOutputStream os(buf, sizeof(buf));
-		S.save(os);
-		T.save(os);
-		R[0][0].save(os);
-		R[0][1].save(os);
-		R[1][0].save(os);
-		R[1][1].save(os);
 		Fr c;
-		c.setHashOf(buf, os.getPos());
+		local::makeHash(c, buf, sizeof(buf), &S, &T, &R[0][0], &R[0][1], &R[1][0], &R[1][1]);
 		return c == d[0] + d[1];
 	}
 	/*
@@ -1007,20 +1008,11 @@ struct SHET {
 		ElGamalEnc(R1, R2, rm, Pmul, xPmul, &rp);
 		ElGamalEnc(R3, R4, rm, Qmul, yQmul, &rs);
 		char buf[sizeof(G1) * 4 + sizeof(G2) * 4];
-		cybozu::MemoryOutputStream os(buf, sizeof(buf));
-		S1.save(os);
-		T1.save(os);
-		S2.save(os);
-		T2.save(os);
-		R1.save(os);
-		R2.save(os);
-		R3.save(os);
-		R4.save(os);
 		Fr& c = zkp.d_[0];
 		Fr& sp = zkp.d_[1];
 		Fr& ss = zkp.d_[2];
 		Fr& sm = zkp.d_[3];
-		c.setHashOf(buf, os.getPos());
+		local::makeHash(c, buf, sizeof(buf), &S1, &T1, &S2, &T2, &R1, &R2, &R3, &R4);
 		Fr::mul(sp, c, p);
 		sp += rp;
 		Fr::mul(ss, c, s);
@@ -1048,17 +1040,8 @@ struct SHET {
 		G2::mul(X2, T2, c);
 		R4 -= X2;
 		char buf[sizeof(G1) * 4 + sizeof(G2) * 4];
-		cybozu::MemoryOutputStream os(buf, sizeof(buf));
-		S1.save(os);
-		T1.save(os);
-		S2.save(os);
-		T2.save(os);
-		R1.save(os);
-		R2.save(os);
-		R3.save(os);
-		R4.save(os);
 		Fr c2;
-		c2.setHashOf(buf, os.getPos());
+		local::makeHash(c2, buf, sizeof(buf), &S1, &T1, &S2, &T2, &R1, &R2, &R3, &R4);
 		return c == c2;
 	}
 	/*
@@ -1103,19 +1086,8 @@ struct SHET {
 		ElGamalEnc(R4, R3, rm, Pmul, xPmul, &rp);
 		ElGamalEnc(R6, R5, rm, Qmul, yQmul, &rs);
 		char buf[sizeof(Fp) * 12];
-		cybozu::MemoryOutputStream os(buf, sizeof(buf));
-		S1.save(os);
-		T1.save(os);
-		R1[0].save(os);
-		R1[1].save(os);
-		R2[0].save(os);
-		R2[1].save(os);
-		R3.save(os);
-		R4.save(os);
-		R5.save(os);
-		R6.save(os);
 		Fr c;
-		c.setHashOf(buf, os.getPos());
+		local::makeHash(c, buf, sizeof(buf), &S1, &T1, &R1[0], &R1[1], &R2[0], &R2[1], &R3, &R4, &R5, &R6);
 		Fr::sub(d[m], c, d[1-m]);
 		Fr::mul(spm[m], d[m], p);
 		spm[m] += rpm;
@@ -1163,19 +1135,8 @@ struct SHET {
 		G2::mul(X2, S2, c);
 		R6 -= X2;
 		char buf[sizeof(Fp) * 12];
-		cybozu::MemoryOutputStream os(buf, sizeof(buf));
-		S1.save(os);
-		T1.save(os);
-		R1[0].save(os);
-		R1[1].save(os);
-		R2[0].save(os);
-		R2[1].save(os);
-		R3.save(os);
-		R4.save(os);
-		R5.save(os);
-		R6.save(os);
 		Fr c2;
-		c2.setHashOf(buf, os.getPos());
+		local::makeHash(c2, buf, sizeof(buf), &S1, &T1, &R1[0], &R1[1], &R2[0], &R2[1], &R3, &R4, &R5, &R6);
 		return c == c2;
 	}
 	/*
@@ -1359,14 +1320,8 @@ struct SHET {
 			G1::mul(T, A2, h);
 			B2 -= T;
 			char buf[sizeof(G1) * 5];
-			cybozu::MemoryOutputStream os(buf, sizeof(buf));
-			P2.save(os);
-			A1.save(os);
-			A2.save(os);
-			B1.save(os);
-			B2.save(os);
 			Fr h2;
-			h2.setHashOf(buf, os.getPos());
+			local::makeHash(h2, buf, sizeof(buf), &P2, &A1, &A2, &B1, &B2);
 			return h == h2;
 		}
 		bool verify(const CipherTextG2& c, const ZkpBin& zkp) const

From 467fa05e64333311d9af88a104ce4deda563249d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 22 Nov 2020 18:13:32 +0900
Subject: [PATCH 350/553] [she] makeHash supports C++03

---
 include/mcl/she.hpp | 70 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 53 insertions(+), 17 deletions(-)

diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index e1f8a87b..ddbe86ee 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -324,25 +324,61 @@ int log(const G& P, const G& xP)
 	}
 	throw cybozu::Exception("she:log:not found");
 }
-
-struct DummyOut {
-	template<class OutStream>
-	void save(OutStream&) const {}
-};
-template<class F, class T0, class T1, class T2, class T3, class T4 = DummyOut, class T5 = DummyOut, class T6 = DummyOut, class T7 = DummyOut, class T8 = DummyOut, class T9 = DummyOut>
-void makeHash(F& h, char *buf, const size_t bufSize, const T0 *t0, const T1 *t1, const T2 *t2, const T3 *t3, const T4 *t4, const T5 *t5 = 0, const T6 *t6 = 0, const T7 *t7 = 0, const T8 *t8 = 0, const T9 *t9 = 0)
+// 5
+template<class F, class T0, class T1, class T2, class T3, class T4>
+void makeHash(F& h, char *buf, const size_t bufSize, const T0 *t0, const T1 *t1, const T2 *t2, const T3 *t3, const T4 *t4)
+{
+	cybozu::MemoryOutputStream os(buf, bufSize);
+	t0->save(os);
+	t1->save(os);
+	t2->save(os);
+	t3->save(os);
+	t4->save(os);
+	h.setHashOf(buf, os.getPos());
+}
+// 6
+template<class F, class T0, class T1, class T2, class T3, class T4, class T5>
+void makeHash(F& h, char *buf, const size_t bufSize, const T0 *t0, const T1 *t1, const T2 *t2, const T3 *t3, const T4 *t4, const T5 *t5)
+{
+	cybozu::MemoryOutputStream os(buf, bufSize);
+	t0->save(os);
+	t1->save(os);
+	t2->save(os);
+	t3->save(os);
+	t4->save(os);
+	t5->save(os);
+	h.setHashOf(buf, os.getPos());
+}
+// 8
+template<class F, class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7>
+void makeHash(F& h, char *buf, const size_t bufSize, const T0 *t0, const T1 *t1, const T2 *t2, const T3 *t3, const T4 *t4, const T5 *t5, const T6 *t6, const T7 *t7)
+{
+	cybozu::MemoryOutputStream os(buf, bufSize);
+	t0->save(os);
+	t1->save(os);
+	t2->save(os);
+	t3->save(os);
+	t4->save(os);
+	t5->save(os);
+	t6->save(os);
+	t7->save(os);
+	h.setHashOf(buf, os.getPos());
+}
+// 10
+template<class F, class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
+void makeHash(F& h, char *buf, const size_t bufSize, const T0 *t0, const T1 *t1, const T2 *t2, const T3 *t3, const T4 *t4, const T5 *t5, const T6 *t6, const T7 *t7, const T8 *t8, const T9 *t9)
 {
 	cybozu::MemoryOutputStream os(buf, bufSize);
-	if (t0) t0->save(os);
-	if (t1) t1->save(os);
-	if (t2) t2->save(os);
-	if (t3) t3->save(os);
-	if (t4) t4->save(os);
-	if (t5) t5->save(os);
-	if (t6) t6->save(os);
-	if (t7) t7->save(os);
-	if (t8) t8->save(os);
-	if (t9) t9->save(os);
+	t0->save(os);
+	t1->save(os);
+	t2->save(os);
+	t3->save(os);
+	t4->save(os);
+	t5->save(os);
+	t6->save(os);
+	t7->save(os);
+	t8->save(os);
+	t9->save(os);
 	h.setHashOf(buf, os.getPos());
 }
 

From 28af4a6b2595d75cbc22c088069c9a700cd486d8 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 24 Nov 2020 10:12:49 +0900
Subject: [PATCH 351/553] add llvm-IR for easy build

---
 src/base32.ll | 54255 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/base64.ll | 15383 ++++++++++++++
 2 files changed, 69638 insertions(+)
 create mode 100644 src/base32.ll
 create mode 100644 src/base64.ll

diff --git a/src/base32.ll b/src/base32.ll
new file mode 100644
index 00000000..1cfbbe8e
--- /dev/null
+++ b/src/base32.ll
@@ -0,0 +1,54255 @@
+define private i64 @mul32x32L(i32 %r2, i32 %r3)
+{
+%r4 = zext i32 %r2 to i64
+%r5 = zext i32 %r3 to i64
+%r6 = mul i64 %r4, %r5
+ret i64 %r6
+}
+define private i32 @extractHigh32(i64 %r2)
+{
+%r3 = lshr i64 %r2, 32
+%r4 = trunc i64 %r3 to i32
+ret i32 %r4
+}
+define private i64 @mulPos32x32(i32* noalias  %r2, i32 %r3, i32 %r4)
+{
+%r5 = getelementptr i32, i32* %r2, i32 %r4
+%r6 = load i32, i32* %r5
+%r7 = call i64 @mul32x32L(i32 %r6, i32 %r3)
+ret i64 %r7
+}
+define i192 @makeNIST_P192L()
+{
+%r8 = sub i64 0, 1
+%r9 = sub i64 0, 2
+%r10 = sub i64 0, 1
+%r11 = zext i64 %r8 to i192
+%r12 = zext i64 %r9 to i192
+%r13 = zext i64 %r10 to i192
+%r14 = shl i192 %r12, 64
+%r15 = shl i192 %r13, 128
+%r16 = add i192 %r11, %r14
+%r17 = add i192 %r16, %r15
+ret i192 %r17
+}
+define void @mcl_fpDbl_mod_NIST_P192L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r2
+%r5 = zext i32 %r4 to i64
+%r7 = getelementptr i32, i32* %r2, i32 1
+%r8 = load i32, i32* %r7
+%r9 = zext i32 %r8 to i64
+%r10 = shl i64 %r9, 32
+%r11 = or i64 %r5, %r10
+%r12 = zext i64 %r11 to i96
+%r14 = getelementptr i32, i32* %r2, i32 2
+%r15 = load i32, i32* %r14
+%r16 = zext i32 %r15 to i96
+%r17 = shl i96 %r16, 64
+%r18 = or i96 %r12, %r17
+%r19 = zext i96 %r18 to i128
+%r21 = getelementptr i32, i32* %r2, i32 3
+%r22 = load i32, i32* %r21
+%r23 = zext i32 %r22 to i128
+%r24 = shl i128 %r23, 96
+%r25 = or i128 %r19, %r24
+%r26 = zext i128 %r25 to i160
+%r28 = getelementptr i32, i32* %r2, i32 4
+%r29 = load i32, i32* %r28
+%r30 = zext i32 %r29 to i160
+%r31 = shl i160 %r30, 128
+%r32 = or i160 %r26, %r31
+%r33 = zext i160 %r32 to i192
+%r35 = getelementptr i32, i32* %r2, i32 5
+%r36 = load i32, i32* %r35
+%r37 = zext i32 %r36 to i192
+%r38 = shl i192 %r37, 160
+%r39 = or i192 %r33, %r38
+%r40 = zext i192 %r39 to i256
+%r42 = getelementptr i32, i32* %r2, i32 6
+%r43 = load i32, i32* %r42
+%r44 = zext i32 %r43 to i64
+%r46 = getelementptr i32, i32* %r42, i32 1
+%r47 = load i32, i32* %r46
+%r48 = zext i32 %r47 to i64
+%r49 = shl i64 %r48, 32
+%r50 = or i64 %r44, %r49
+%r51 = zext i64 %r50 to i96
+%r53 = getelementptr i32, i32* %r42, i32 2
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i96
+%r56 = shl i96 %r55, 64
+%r57 = or i96 %r51, %r56
+%r58 = zext i96 %r57 to i128
+%r60 = getelementptr i32, i32* %r42, i32 3
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i128
+%r63 = shl i128 %r62, 96
+%r64 = or i128 %r58, %r63
+%r65 = zext i128 %r64 to i160
+%r67 = getelementptr i32, i32* %r42, i32 4
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i160
+%r70 = shl i160 %r69, 128
+%r71 = or i160 %r65, %r70
+%r72 = zext i160 %r71 to i192
+%r74 = getelementptr i32, i32* %r42, i32 5
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i192
+%r77 = shl i192 %r76, 160
+%r78 = or i192 %r72, %r77
+%r79 = zext i192 %r78 to i256
+%r80 = shl i192 %r78, 64
+%r81 = zext i192 %r80 to i256
+%r82 = lshr i192 %r78, 128
+%r83 = trunc i192 %r82 to i64
+%r84 = zext i64 %r83 to i256
+%r85 = or i256 %r81, %r84
+%r86 = shl i256 %r84, 64
+%r87 = add i256 %r40, %r79
+%r88 = add i256 %r87, %r85
+%r89 = add i256 %r88, %r86
+%r90 = lshr i256 %r89, 192
+%r91 = trunc i256 %r90 to i64
+%r92 = zext i64 %r91 to i256
+%r93 = shl i256 %r92, 64
+%r94 = or i256 %r92, %r93
+%r95 = trunc i256 %r89 to i192
+%r96 = zext i192 %r95 to i256
+%r97 = add i256 %r96, %r94
+%r98 = call i192 @makeNIST_P192L()
+%r99 = zext i192 %r98 to i256
+%r100 = sub i256 %r97, %r99
+%r101 = lshr i256 %r100, 192
+%r102 = trunc i256 %r101 to i1
+%r103 = select i1 %r102, i256 %r97, i256 %r100
+%r104 = trunc i256 %r103 to i192
+%r105 = trunc i192 %r104 to i32
+%r107 = getelementptr i32, i32* %r1, i32 0
+store i32 %r105, i32* %r107
+%r108 = lshr i192 %r104, 32
+%r109 = trunc i192 %r108 to i32
+%r111 = getelementptr i32, i32* %r1, i32 1
+store i32 %r109, i32* %r111
+%r112 = lshr i192 %r108, 32
+%r113 = trunc i192 %r112 to i32
+%r115 = getelementptr i32, i32* %r1, i32 2
+store i32 %r113, i32* %r115
+%r116 = lshr i192 %r112, 32
+%r117 = trunc i192 %r116 to i32
+%r119 = getelementptr i32, i32* %r1, i32 3
+store i32 %r117, i32* %r119
+%r120 = lshr i192 %r116, 32
+%r121 = trunc i192 %r120 to i32
+%r123 = getelementptr i32, i32* %r1, i32 4
+store i32 %r121, i32* %r123
+%r124 = lshr i192 %r120, 32
+%r125 = trunc i192 %r124 to i32
+%r127 = getelementptr i32, i32* %r1, i32 5
+store i32 %r125, i32* %r127
+ret void
+}
+define void @mcl_fp_sqr_NIST_P192L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = alloca i32, i32 12
+call void @mcl_fpDbl_sqrPre6L(i32* %r5, i32* %r2)
+call void @mcl_fpDbl_mod_NIST_P192L(i32* %r1, i32* %r5, i32* %r5)
+ret void
+}
+define void @mcl_fp_mulNIST_P192L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r6 = alloca i32, i32 12
+call void @mcl_fpDbl_mulPre6L(i32* %r6, i32* %r2, i32* %r3)
+call void @mcl_fpDbl_mod_NIST_P192L(i32* %r1, i32* %r6, i32* %r6)
+ret void
+}
+define void @mcl_fpDbl_mod_NIST_P521L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r2
+%r5 = zext i32 %r4 to i64
+%r7 = getelementptr i32, i32* %r2, i32 1
+%r8 = load i32, i32* %r7
+%r9 = zext i32 %r8 to i64
+%r10 = shl i64 %r9, 32
+%r11 = or i64 %r5, %r10
+%r12 = zext i64 %r11 to i96
+%r14 = getelementptr i32, i32* %r2, i32 2
+%r15 = load i32, i32* %r14
+%r16 = zext i32 %r15 to i96
+%r17 = shl i96 %r16, 64
+%r18 = or i96 %r12, %r17
+%r19 = zext i96 %r18 to i128
+%r21 = getelementptr i32, i32* %r2, i32 3
+%r22 = load i32, i32* %r21
+%r23 = zext i32 %r22 to i128
+%r24 = shl i128 %r23, 96
+%r25 = or i128 %r19, %r24
+%r26 = zext i128 %r25 to i160
+%r28 = getelementptr i32, i32* %r2, i32 4
+%r29 = load i32, i32* %r28
+%r30 = zext i32 %r29 to i160
+%r31 = shl i160 %r30, 128
+%r32 = or i160 %r26, %r31
+%r33 = zext i160 %r32 to i192
+%r35 = getelementptr i32, i32* %r2, i32 5
+%r36 = load i32, i32* %r35
+%r37 = zext i32 %r36 to i192
+%r38 = shl i192 %r37, 160
+%r39 = or i192 %r33, %r38
+%r40 = zext i192 %r39 to i224
+%r42 = getelementptr i32, i32* %r2, i32 6
+%r43 = load i32, i32* %r42
+%r44 = zext i32 %r43 to i224
+%r45 = shl i224 %r44, 192
+%r46 = or i224 %r40, %r45
+%r47 = zext i224 %r46 to i256
+%r49 = getelementptr i32, i32* %r2, i32 7
+%r50 = load i32, i32* %r49
+%r51 = zext i32 %r50 to i256
+%r52 = shl i256 %r51, 224
+%r53 = or i256 %r47, %r52
+%r54 = zext i256 %r53 to i288
+%r56 = getelementptr i32, i32* %r2, i32 8
+%r57 = load i32, i32* %r56
+%r58 = zext i32 %r57 to i288
+%r59 = shl i288 %r58, 256
+%r60 = or i288 %r54, %r59
+%r61 = zext i288 %r60 to i320
+%r63 = getelementptr i32, i32* %r2, i32 9
+%r64 = load i32, i32* %r63
+%r65 = zext i32 %r64 to i320
+%r66 = shl i320 %r65, 288
+%r67 = or i320 %r61, %r66
+%r68 = zext i320 %r67 to i352
+%r70 = getelementptr i32, i32* %r2, i32 10
+%r71 = load i32, i32* %r70
+%r72 = zext i32 %r71 to i352
+%r73 = shl i352 %r72, 320
+%r74 = or i352 %r68, %r73
+%r75 = zext i352 %r74 to i384
+%r77 = getelementptr i32, i32* %r2, i32 11
+%r78 = load i32, i32* %r77
+%r79 = zext i32 %r78 to i384
+%r80 = shl i384 %r79, 352
+%r81 = or i384 %r75, %r80
+%r82 = zext i384 %r81 to i416
+%r84 = getelementptr i32, i32* %r2, i32 12
+%r85 = load i32, i32* %r84
+%r86 = zext i32 %r85 to i416
+%r87 = shl i416 %r86, 384
+%r88 = or i416 %r82, %r87
+%r89 = zext i416 %r88 to i448
+%r91 = getelementptr i32, i32* %r2, i32 13
+%r92 = load i32, i32* %r91
+%r93 = zext i32 %r92 to i448
+%r94 = shl i448 %r93, 416
+%r95 = or i448 %r89, %r94
+%r96 = zext i448 %r95 to i480
+%r98 = getelementptr i32, i32* %r2, i32 14
+%r99 = load i32, i32* %r98
+%r100 = zext i32 %r99 to i480
+%r101 = shl i480 %r100, 448
+%r102 = or i480 %r96, %r101
+%r103 = zext i480 %r102 to i512
+%r105 = getelementptr i32, i32* %r2, i32 15
+%r106 = load i32, i32* %r105
+%r107 = zext i32 %r106 to i512
+%r108 = shl i512 %r107, 480
+%r109 = or i512 %r103, %r108
+%r110 = zext i512 %r109 to i544
+%r112 = getelementptr i32, i32* %r2, i32 16
+%r113 = load i32, i32* %r112
+%r114 = zext i32 %r113 to i544
+%r115 = shl i544 %r114, 512
+%r116 = or i544 %r110, %r115
+%r117 = zext i544 %r116 to i576
+%r119 = getelementptr i32, i32* %r2, i32 17
+%r120 = load i32, i32* %r119
+%r121 = zext i32 %r120 to i576
+%r122 = shl i576 %r121, 544
+%r123 = or i576 %r117, %r122
+%r124 = zext i576 %r123 to i608
+%r126 = getelementptr i32, i32* %r2, i32 18
+%r127 = load i32, i32* %r126
+%r128 = zext i32 %r127 to i608
+%r129 = shl i608 %r128, 576
+%r130 = or i608 %r124, %r129
+%r131 = zext i608 %r130 to i640
+%r133 = getelementptr i32, i32* %r2, i32 19
+%r134 = load i32, i32* %r133
+%r135 = zext i32 %r134 to i640
+%r136 = shl i640 %r135, 608
+%r137 = or i640 %r131, %r136
+%r138 = zext i640 %r137 to i672
+%r140 = getelementptr i32, i32* %r2, i32 20
+%r141 = load i32, i32* %r140
+%r142 = zext i32 %r141 to i672
+%r143 = shl i672 %r142, 640
+%r144 = or i672 %r138, %r143
+%r145 = zext i672 %r144 to i704
+%r147 = getelementptr i32, i32* %r2, i32 21
+%r148 = load i32, i32* %r147
+%r149 = zext i32 %r148 to i704
+%r150 = shl i704 %r149, 672
+%r151 = or i704 %r145, %r150
+%r152 = zext i704 %r151 to i736
+%r154 = getelementptr i32, i32* %r2, i32 22
+%r155 = load i32, i32* %r154
+%r156 = zext i32 %r155 to i736
+%r157 = shl i736 %r156, 704
+%r158 = or i736 %r152, %r157
+%r159 = zext i736 %r158 to i768
+%r161 = getelementptr i32, i32* %r2, i32 23
+%r162 = load i32, i32* %r161
+%r163 = zext i32 %r162 to i768
+%r164 = shl i768 %r163, 736
+%r165 = or i768 %r159, %r164
+%r166 = zext i768 %r165 to i800
+%r168 = getelementptr i32, i32* %r2, i32 24
+%r169 = load i32, i32* %r168
+%r170 = zext i32 %r169 to i800
+%r171 = shl i800 %r170, 768
+%r172 = or i800 %r166, %r171
+%r173 = zext i800 %r172 to i832
+%r175 = getelementptr i32, i32* %r2, i32 25
+%r176 = load i32, i32* %r175
+%r177 = zext i32 %r176 to i832
+%r178 = shl i832 %r177, 800
+%r179 = or i832 %r173, %r178
+%r180 = zext i832 %r179 to i864
+%r182 = getelementptr i32, i32* %r2, i32 26
+%r183 = load i32, i32* %r182
+%r184 = zext i32 %r183 to i864
+%r185 = shl i864 %r184, 832
+%r186 = or i864 %r180, %r185
+%r187 = zext i864 %r186 to i896
+%r189 = getelementptr i32, i32* %r2, i32 27
+%r190 = load i32, i32* %r189
+%r191 = zext i32 %r190 to i896
+%r192 = shl i896 %r191, 864
+%r193 = or i896 %r187, %r192
+%r194 = zext i896 %r193 to i928
+%r196 = getelementptr i32, i32* %r2, i32 28
+%r197 = load i32, i32* %r196
+%r198 = zext i32 %r197 to i928
+%r199 = shl i928 %r198, 896
+%r200 = or i928 %r194, %r199
+%r201 = zext i928 %r200 to i960
+%r203 = getelementptr i32, i32* %r2, i32 29
+%r204 = load i32, i32* %r203
+%r205 = zext i32 %r204 to i960
+%r206 = shl i960 %r205, 928
+%r207 = or i960 %r201, %r206
+%r208 = zext i960 %r207 to i992
+%r210 = getelementptr i32, i32* %r2, i32 30
+%r211 = load i32, i32* %r210
+%r212 = zext i32 %r211 to i992
+%r213 = shl i992 %r212, 960
+%r214 = or i992 %r208, %r213
+%r215 = zext i992 %r214 to i1024
+%r217 = getelementptr i32, i32* %r2, i32 31
+%r218 = load i32, i32* %r217
+%r219 = zext i32 %r218 to i1024
+%r220 = shl i1024 %r219, 992
+%r221 = or i1024 %r215, %r220
+%r222 = zext i1024 %r221 to i1056
+%r224 = getelementptr i32, i32* %r2, i32 32
+%r225 = load i32, i32* %r224
+%r226 = zext i32 %r225 to i1056
+%r227 = shl i1056 %r226, 1024
+%r228 = or i1056 %r222, %r227
+%r229 = trunc i1056 %r228 to i521
+%r230 = zext i521 %r229 to i544
+%r231 = lshr i1056 %r228, 521
+%r232 = trunc i1056 %r231 to i544
+%r233 = add i544 %r230, %r232
+%r234 = lshr i544 %r233, 521
+%r236 = and i544 %r234, 1
+%r237 = add i544 %r233, %r236
+%r238 = trunc i544 %r237 to i521
+%r239 = zext i521 %r238 to i544
+%r240 = lshr i544 %r239, 512
+%r241 = trunc i544 %r240 to i32
+%r243 = or i32 %r241, -512
+%r244 = lshr i544 %r239, 0
+%r245 = trunc i544 %r244 to i32
+%r246 = and i32 %r243, %r245
+%r247 = lshr i544 %r239, 32
+%r248 = trunc i544 %r247 to i32
+%r249 = and i32 %r246, %r248
+%r250 = lshr i544 %r239, 64
+%r251 = trunc i544 %r250 to i32
+%r252 = and i32 %r249, %r251
+%r253 = lshr i544 %r239, 96
+%r254 = trunc i544 %r253 to i32
+%r255 = and i32 %r252, %r254
+%r256 = lshr i544 %r239, 128
+%r257 = trunc i544 %r256 to i32
+%r258 = and i32 %r255, %r257
+%r259 = lshr i544 %r239, 160
+%r260 = trunc i544 %r259 to i32
+%r261 = and i32 %r258, %r260
+%r262 = lshr i544 %r239, 192
+%r263 = trunc i544 %r262 to i32
+%r264 = and i32 %r261, %r263
+%r265 = lshr i544 %r239, 224
+%r266 = trunc i544 %r265 to i32
+%r267 = and i32 %r264, %r266
+%r268 = lshr i544 %r239, 256
+%r269 = trunc i544 %r268 to i32
+%r270 = and i32 %r267, %r269
+%r271 = lshr i544 %r239, 288
+%r272 = trunc i544 %r271 to i32
+%r273 = and i32 %r270, %r272
+%r274 = lshr i544 %r239, 320
+%r275 = trunc i544 %r274 to i32
+%r276 = and i32 %r273, %r275
+%r277 = lshr i544 %r239, 352
+%r278 = trunc i544 %r277 to i32
+%r279 = and i32 %r276, %r278
+%r280 = lshr i544 %r239, 384
+%r281 = trunc i544 %r280 to i32
+%r282 = and i32 %r279, %r281
+%r283 = lshr i544 %r239, 416
+%r284 = trunc i544 %r283 to i32
+%r285 = and i32 %r282, %r284
+%r286 = lshr i544 %r239, 448
+%r287 = trunc i544 %r286 to i32
+%r288 = and i32 %r285, %r287
+%r289 = lshr i544 %r239, 480
+%r290 = trunc i544 %r289 to i32
+%r291 = and i32 %r288, %r290
+%r293 = icmp eq i32 %r291, -1
+br i1%r293, label %zero, label %nonzero
+zero:
+store i32 0, i32* %r1
+%r297 = getelementptr i32, i32* %r1, i32 1
+store i32 0, i32* %r297
+%r300 = getelementptr i32, i32* %r1, i32 2
+store i32 0, i32* %r300
+%r303 = getelementptr i32, i32* %r1, i32 3
+store i32 0, i32* %r303
+%r306 = getelementptr i32, i32* %r1, i32 4
+store i32 0, i32* %r306
+%r309 = getelementptr i32, i32* %r1, i32 5
+store i32 0, i32* %r309
+%r312 = getelementptr i32, i32* %r1, i32 6
+store i32 0, i32* %r312
+%r315 = getelementptr i32, i32* %r1, i32 7
+store i32 0, i32* %r315
+%r318 = getelementptr i32, i32* %r1, i32 8
+store i32 0, i32* %r318
+%r321 = getelementptr i32, i32* %r1, i32 9
+store i32 0, i32* %r321
+%r324 = getelementptr i32, i32* %r1, i32 10
+store i32 0, i32* %r324
+%r327 = getelementptr i32, i32* %r1, i32 11
+store i32 0, i32* %r327
+%r330 = getelementptr i32, i32* %r1, i32 12
+store i32 0, i32* %r330
+%r333 = getelementptr i32, i32* %r1, i32 13
+store i32 0, i32* %r333
+%r336 = getelementptr i32, i32* %r1, i32 14
+store i32 0, i32* %r336
+%r339 = getelementptr i32, i32* %r1, i32 15
+store i32 0, i32* %r339
+%r342 = getelementptr i32, i32* %r1, i32 16
+store i32 0, i32* %r342
+ret void
+nonzero:
+%r343 = trunc i544 %r239 to i32
+%r345 = getelementptr i32, i32* %r1, i32 0
+store i32 %r343, i32* %r345
+%r346 = lshr i544 %r239, 32
+%r347 = trunc i544 %r346 to i32
+%r349 = getelementptr i32, i32* %r1, i32 1
+store i32 %r347, i32* %r349
+%r350 = lshr i544 %r346, 32
+%r351 = trunc i544 %r350 to i32
+%r353 = getelementptr i32, i32* %r1, i32 2
+store i32 %r351, i32* %r353
+%r354 = lshr i544 %r350, 32
+%r355 = trunc i544 %r354 to i32
+%r357 = getelementptr i32, i32* %r1, i32 3
+store i32 %r355, i32* %r357
+%r358 = lshr i544 %r354, 32
+%r359 = trunc i544 %r358 to i32
+%r361 = getelementptr i32, i32* %r1, i32 4
+store i32 %r359, i32* %r361
+%r362 = lshr i544 %r358, 32
+%r363 = trunc i544 %r362 to i32
+%r365 = getelementptr i32, i32* %r1, i32 5
+store i32 %r363, i32* %r365
+%r366 = lshr i544 %r362, 32
+%r367 = trunc i544 %r366 to i32
+%r369 = getelementptr i32, i32* %r1, i32 6
+store i32 %r367, i32* %r369
+%r370 = lshr i544 %r366, 32
+%r371 = trunc i544 %r370 to i32
+%r373 = getelementptr i32, i32* %r1, i32 7
+store i32 %r371, i32* %r373
+%r374 = lshr i544 %r370, 32
+%r375 = trunc i544 %r374 to i32
+%r377 = getelementptr i32, i32* %r1, i32 8
+store i32 %r375, i32* %r377
+%r378 = lshr i544 %r374, 32
+%r379 = trunc i544 %r378 to i32
+%r381 = getelementptr i32, i32* %r1, i32 9
+store i32 %r379, i32* %r381
+%r382 = lshr i544 %r378, 32
+%r383 = trunc i544 %r382 to i32
+%r385 = getelementptr i32, i32* %r1, i32 10
+store i32 %r383, i32* %r385
+%r386 = lshr i544 %r382, 32
+%r387 = trunc i544 %r386 to i32
+%r389 = getelementptr i32, i32* %r1, i32 11
+store i32 %r387, i32* %r389
+%r390 = lshr i544 %r386, 32
+%r391 = trunc i544 %r390 to i32
+%r393 = getelementptr i32, i32* %r1, i32 12
+store i32 %r391, i32* %r393
+%r394 = lshr i544 %r390, 32
+%r395 = trunc i544 %r394 to i32
+%r397 = getelementptr i32, i32* %r1, i32 13
+store i32 %r395, i32* %r397
+%r398 = lshr i544 %r394, 32
+%r399 = trunc i544 %r398 to i32
+%r401 = getelementptr i32, i32* %r1, i32 14
+store i32 %r399, i32* %r401
+%r402 = lshr i544 %r398, 32
+%r403 = trunc i544 %r402 to i32
+%r405 = getelementptr i32, i32* %r1, i32 15
+store i32 %r403, i32* %r405
+%r406 = lshr i544 %r402, 32
+%r407 = trunc i544 %r406 to i32
+%r409 = getelementptr i32, i32* %r1, i32 16
+store i32 %r407, i32* %r409
+ret void
+}
+define i64 @mulPv32x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r8 = zext i32 %r6 to i64
+%r9 = zext i32 %r7 to i64
+%r10 = shl i64 %r9, 32
+%r11 = add i64 %r8, %r10
+ret i64 %r11
+}
+define void @mcl_fp_mulUnitPre1L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i64 @mulPv32x32(i32* %r2, i32 %r3)
+%r5 = trunc i64 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i64 %r4, 32
+%r9 = trunc i64 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+ret void
+}
+define void @mcl_fpDbl_mulPre1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r2
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r4 to i64
+%r7 = zext i32 %r5 to i64
+%r8 = mul i64 %r6, %r7
+%r9 = trunc i64 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 0
+store i32 %r9, i32* %r11
+%r12 = lshr i64 %r8, 32
+%r13 = trunc i64 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+ret void
+}
+define void @mcl_fpDbl_sqrPre1L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = load i32, i32* %r2
+%r5 = zext i32 %r3 to i64
+%r6 = zext i32 %r4 to i64
+%r7 = mul i64 %r5, %r6
+%r8 = trunc i64 %r7 to i32
+%r10 = getelementptr i32, i32* %r1, i32 0
+store i32 %r8, i32* %r10
+%r11 = lshr i64 %r7, 32
+%r12 = trunc i64 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+ret void
+}
+define void @mcl_fp_mont1L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i64 @mulPv32x32(i32* %r2, i32 %r10)
+%r12 = zext i64 %r11 to i96
+%r13 = trunc i64 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i64 @mulPv32x32(i32* %r4, i32 %r14)
+%r16 = zext i64 %r15 to i96
+%r17 = add i96 %r12, %r16
+%r18 = lshr i96 %r17, 32
+%r19 = trunc i96 %r18 to i64
+%r20 = load i32, i32* %r4
+%r21 = zext i32 %r20 to i64
+%r22 = sub i64 %r19, %r21
+%r23 = lshr i64 %r22, 32
+%r24 = trunc i64 %r23 to i1
+%r25 = select i1 %r24, i64 %r19, i64 %r22
+%r26 = trunc i64 %r25 to i32
+store i32 %r26, i32* %r1
+ret void
+}
+define void @mcl_fp_montNF1L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i64 @mulPv32x32(i32* %r2, i32 %r8)
+%r10 = trunc i64 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i64 @mulPv32x32(i32* %r4, i32 %r11)
+%r13 = add i64 %r9, %r12
+%r14 = lshr i64 %r13, 32
+%r15 = trunc i64 %r14 to i32
+%r16 = load i32, i32* %r4
+%r17 = sub i32 %r15, %r16
+%r18 = lshr i32 %r17, 31
+%r19 = trunc i32 %r18 to i1
+%r20 = select i1 %r19, i32 %r15, i32 %r17
+store i32 %r20, i32* %r1
+ret void
+}
+define void @mcl_fp_montRed1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = load i32, i32* %r2
+%r9 = zext i32 %r8 to i64
+%r11 = getelementptr i32, i32* %r2, i32 1
+%r12 = load i32, i32* %r11
+%r13 = zext i32 %r12 to i64
+%r14 = shl i64 %r13, 32
+%r15 = or i64 %r9, %r14
+%r16 = zext i64 %r15 to i96
+%r17 = trunc i96 %r16 to i32
+%r18 = mul i32 %r17, %r6
+%r19 = call i64 @mulPv32x32(i32* %r3, i32 %r18)
+%r20 = zext i64 %r19 to i96
+%r21 = add i96 %r16, %r20
+%r22 = lshr i96 %r21, 32
+%r23 = trunc i96 %r22 to i64
+%r24 = zext i32 %r7 to i64
+%r25 = sub i64 %r23, %r24
+%r26 = lshr i64 %r25, 32
+%r27 = trunc i64 %r26 to i1
+%r28 = select i1 %r27, i64 %r23, i64 %r25
+%r29 = trunc i64 %r28 to i32
+store i32 %r29, i32* %r1
+ret void
+}
+define i32 @mcl_fp_addPre1L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r7 = load i32, i32* %r4
+%r8 = zext i32 %r7 to i64
+%r9 = add i64 %r6, %r8
+%r10 = trunc i64 %r9 to i32
+store i32 %r10, i32* %r2
+%r11 = lshr i64 %r9, 32
+%r12 = trunc i64 %r11 to i32
+ret i32 %r12
+}
+define i32 @mcl_fp_subPre1L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r7 = load i32, i32* %r4
+%r8 = zext i32 %r7 to i64
+%r9 = sub i64 %r6, %r8
+%r10 = trunc i64 %r9 to i32
+store i32 %r10, i32* %r2
+%r11 = lshr i64 %r9, 32
+%r12 = trunc i64 %r11 to i32
+%r14 = and i32 %r12, 1
+ret i32 %r14
+}
+define void @mcl_fp_shr1_1L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = lshr i32 %r3, 1
+store i32 %r4, i32* %r1
+ret void
+}
+define void @mcl_fp_add1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = load i32, i32* %r3
+%r7 = zext i32 %r5 to i64
+%r8 = zext i32 %r6 to i64
+%r9 = add i64 %r7, %r8
+%r10 = trunc i64 %r9 to i32
+store i32 %r10, i32* %r1
+%r11 = load i32, i32* %r4
+%r12 = zext i32 %r11 to i64
+%r13 = sub i64 %r9, %r12
+%r14 = lshr i64 %r13, 32
+%r15 = trunc i64 %r14 to i1
+br i1%r15, label %carry, label %nocarry
+nocarry:
+%r16 = trunc i64 %r13 to i32
+store i32 %r16, i32* %r1
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = load i32, i32* %r3
+%r7 = add i32 %r5, %r6
+%r8 = load i32, i32* %r4
+%r9 = sub i32 %r7, %r8
+%r10 = lshr i32 %r9, 31
+%r11 = trunc i32 %r10 to i1
+%r12 = select i1 %r11, i32 %r7, i32 %r9
+store i32 %r12, i32* %r1
+ret void
+}
+define void @mcl_fp_sub1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = load i32, i32* %r3
+%r7 = zext i32 %r5 to i64
+%r8 = zext i32 %r6 to i64
+%r9 = sub i64 %r7, %r8
+%r10 = trunc i64 %r9 to i32
+%r11 = lshr i64 %r9, 32
+%r12 = trunc i64 %r11 to i1
+store i32 %r10, i32* %r1
+br i1%r12, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r13 = load i32, i32* %r4
+%r14 = add i32 %r10, %r13
+store i32 %r14, i32* %r1
+ret void
+}
+define void @mcl_fp_subNF1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = load i32, i32* %r3
+%r7 = sub i32 %r5, %r6
+%r8 = lshr i32 %r7, 31
+%r9 = trunc i32 %r8 to i1
+%r10 = load i32, i32* %r4
+%r12 = select i1 %r9, i32 %r10, i32 0
+%r13 = add i32 %r7, %r12
+store i32 %r13, i32* %r1
+ret void
+}
+define void @mcl_fpDbl_add1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = load i32, i32* %r3
+%r14 = zext i32 %r13 to i64
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = zext i32 %r17 to i64
+%r19 = shl i64 %r18, 32
+%r20 = or i64 %r14, %r19
+%r21 = zext i64 %r12 to i96
+%r22 = zext i64 %r20 to i96
+%r23 = add i96 %r21, %r22
+%r24 = trunc i96 %r23 to i32
+store i32 %r24, i32* %r1
+%r25 = lshr i96 %r23, 32
+%r26 = trunc i96 %r25 to i64
+%r27 = load i32, i32* %r4
+%r28 = zext i32 %r27 to i64
+%r29 = sub i64 %r26, %r28
+%r30 = lshr i64 %r29, 32
+%r31 = trunc i64 %r30 to i1
+%r32 = select i1 %r31, i64 %r26, i64 %r29
+%r33 = trunc i64 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 1
+store i32 %r33, i32* %r35
+ret void
+}
+define void @mcl_fpDbl_sub1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = load i32, i32* %r3
+%r14 = zext i32 %r13 to i64
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = zext i32 %r17 to i64
+%r19 = shl i64 %r18, 32
+%r20 = or i64 %r14, %r19
+%r21 = zext i64 %r12 to i96
+%r22 = zext i64 %r20 to i96
+%r23 = sub i96 %r21, %r22
+%r24 = trunc i96 %r23 to i32
+store i32 %r24, i32* %r1
+%r25 = lshr i96 %r23, 32
+%r26 = trunc i96 %r25 to i32
+%r27 = lshr i96 %r23, 64
+%r28 = trunc i96 %r27 to i1
+%r29 = load i32, i32* %r4
+%r31 = select i1 %r28, i32 %r29, i32 0
+%r32 = add i32 %r26, %r31
+%r34 = getelementptr i32, i32* %r1, i32 1
+store i32 %r32, i32* %r34
+ret void
+}
+define i96 @mulPv64x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r12 = zext i32 %r6 to i64
+%r13 = zext i32 %r10 to i64
+%r14 = shl i64 %r13, 32
+%r15 = or i64 %r12, %r14
+%r16 = zext i32 %r7 to i64
+%r17 = zext i32 %r11 to i64
+%r18 = shl i64 %r17, 32
+%r19 = or i64 %r16, %r18
+%r20 = zext i64 %r15 to i96
+%r21 = zext i64 %r19 to i96
+%r22 = shl i96 %r21, 32
+%r23 = add i96 %r20, %r22
+ret i96 %r23
+}
+define void @mcl_fp_mulUnitPre2L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i96 @mulPv64x32(i32* %r2, i32 %r3)
+%r5 = trunc i96 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i96 %r4, 32
+%r9 = trunc i96 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i96 %r8, 32
+%r13 = trunc i96 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+ret void
+}
+define void @mcl_fpDbl_mulPre2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i96 @mulPv64x32(i32* %r2, i32 %r4)
+%r6 = trunc i96 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i96 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i96 @mulPv64x32(i32* %r2, i32 %r10)
+%r12 = add i96 %r7, %r11
+%r14 = getelementptr i32, i32* %r1, i32 1
+%r15 = trunc i96 %r12 to i32
+%r17 = getelementptr i32, i32* %r14, i32 0
+store i32 %r15, i32* %r17
+%r18 = lshr i96 %r12, 32
+%r19 = trunc i96 %r18 to i32
+%r21 = getelementptr i32, i32* %r14, i32 1
+store i32 %r19, i32* %r21
+%r22 = lshr i96 %r18, 32
+%r23 = trunc i96 %r22 to i32
+%r25 = getelementptr i32, i32* %r14, i32 2
+store i32 %r23, i32* %r25
+ret void
+}
+define void @mcl_fpDbl_sqrPre2L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i96 @mulPv64x32(i32* %r2, i32 %r3)
+%r5 = trunc i96 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i96 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i96 @mulPv64x32(i32* %r2, i32 %r9)
+%r11 = add i96 %r6, %r10
+%r13 = getelementptr i32, i32* %r1, i32 1
+%r14 = trunc i96 %r11 to i32
+%r16 = getelementptr i32, i32* %r13, i32 0
+store i32 %r14, i32* %r16
+%r17 = lshr i96 %r11, 32
+%r18 = trunc i96 %r17 to i32
+%r20 = getelementptr i32, i32* %r13, i32 1
+store i32 %r18, i32* %r20
+%r21 = lshr i96 %r17, 32
+%r22 = trunc i96 %r21 to i32
+%r24 = getelementptr i32, i32* %r13, i32 2
+store i32 %r22, i32* %r24
+ret void
+}
+define void @mcl_fp_mont2L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i96 @mulPv64x32(i32* %r2, i32 %r10)
+%r12 = zext i96 %r11 to i128
+%r13 = trunc i96 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i96 @mulPv64x32(i32* %r4, i32 %r14)
+%r16 = zext i96 %r15 to i128
+%r17 = add i128 %r12, %r16
+%r18 = lshr i128 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i96 @mulPv64x32(i32* %r2, i32 %r21)
+%r23 = zext i96 %r22 to i128
+%r24 = add i128 %r18, %r23
+%r25 = trunc i128 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i96 @mulPv64x32(i32* %r4, i32 %r26)
+%r28 = zext i96 %r27 to i128
+%r29 = add i128 %r24, %r28
+%r30 = lshr i128 %r29, 32
+%r31 = trunc i128 %r30 to i96
+%r32 = load i32, i32* %r4
+%r33 = zext i32 %r32 to i64
+%r35 = getelementptr i32, i32* %r4, i32 1
+%r36 = load i32, i32* %r35
+%r37 = zext i32 %r36 to i64
+%r38 = shl i64 %r37, 32
+%r39 = or i64 %r33, %r38
+%r40 = zext i64 %r39 to i96
+%r41 = sub i96 %r31, %r40
+%r42 = lshr i96 %r41, 64
+%r43 = trunc i96 %r42 to i1
+%r44 = select i1 %r43, i96 %r31, i96 %r41
+%r45 = trunc i96 %r44 to i64
+%r46 = trunc i64 %r45 to i32
+%r48 = getelementptr i32, i32* %r1, i32 0
+store i32 %r46, i32* %r48
+%r49 = lshr i64 %r45, 32
+%r50 = trunc i64 %r49 to i32
+%r52 = getelementptr i32, i32* %r1, i32 1
+store i32 %r50, i32* %r52
+ret void
+}
+define void @mcl_fp_montNF2L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i96 @mulPv64x32(i32* %r2, i32 %r8)
+%r10 = trunc i96 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i96 @mulPv64x32(i32* %r4, i32 %r11)
+%r13 = add i96 %r9, %r12
+%r14 = lshr i96 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i96 @mulPv64x32(i32* %r2, i32 %r17)
+%r19 = add i96 %r14, %r18
+%r20 = trunc i96 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i96 @mulPv64x32(i32* %r4, i32 %r21)
+%r23 = add i96 %r19, %r22
+%r24 = lshr i96 %r23, 32
+%r25 = trunc i96 %r24 to i64
+%r26 = load i32, i32* %r4
+%r27 = zext i32 %r26 to i64
+%r29 = getelementptr i32, i32* %r4, i32 1
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i64
+%r32 = shl i64 %r31, 32
+%r33 = or i64 %r27, %r32
+%r34 = sub i64 %r25, %r33
+%r35 = lshr i64 %r34, 63
+%r36 = trunc i64 %r35 to i1
+%r37 = select i1 %r36, i64 %r25, i64 %r34
+%r38 = trunc i64 %r37 to i32
+%r40 = getelementptr i32, i32* %r1, i32 0
+store i32 %r38, i32* %r40
+%r41 = lshr i64 %r37, 32
+%r42 = trunc i64 %r41 to i32
+%r44 = getelementptr i32, i32* %r1, i32 1
+store i32 %r42, i32* %r44
+ret void
+}
+define void @mcl_fp_montRed2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = load i32, i32* %r2
+%r16 = zext i32 %r15 to i64
+%r18 = getelementptr i32, i32* %r2, i32 1
+%r19 = load i32, i32* %r18
+%r20 = zext i32 %r19 to i64
+%r21 = shl i64 %r20, 32
+%r22 = or i64 %r16, %r21
+%r23 = zext i64 %r22 to i96
+%r25 = getelementptr i32, i32* %r2, i32 2
+%r26 = load i32, i32* %r25
+%r27 = zext i32 %r26 to i96
+%r28 = shl i96 %r27, 64
+%r29 = or i96 %r23, %r28
+%r30 = zext i96 %r29 to i128
+%r32 = getelementptr i32, i32* %r2, i32 3
+%r33 = load i32, i32* %r32
+%r34 = zext i32 %r33 to i128
+%r35 = shl i128 %r34, 96
+%r36 = or i128 %r30, %r35
+%r37 = zext i128 %r36 to i160
+%r38 = trunc i160 %r37 to i32
+%r39 = mul i32 %r38, %r6
+%r40 = call i96 @mulPv64x32(i32* %r3, i32 %r39)
+%r41 = zext i96 %r40 to i160
+%r42 = add i160 %r37, %r41
+%r43 = lshr i160 %r42, 32
+%r44 = trunc i160 %r43 to i128
+%r45 = trunc i128 %r44 to i32
+%r46 = mul i32 %r45, %r6
+%r47 = call i96 @mulPv64x32(i32* %r3, i32 %r46)
+%r48 = zext i96 %r47 to i128
+%r49 = add i128 %r44, %r48
+%r50 = lshr i128 %r49, 32
+%r51 = trunc i128 %r50 to i96
+%r52 = zext i64 %r14 to i96
+%r53 = sub i96 %r51, %r52
+%r54 = lshr i96 %r53, 64
+%r55 = trunc i96 %r54 to i1
+%r56 = select i1 %r55, i96 %r51, i96 %r53
+%r57 = trunc i96 %r56 to i64
+%r58 = trunc i64 %r57 to i32
+%r60 = getelementptr i32, i32* %r1, i32 0
+store i32 %r58, i32* %r60
+%r61 = lshr i64 %r57, 32
+%r62 = trunc i64 %r61 to i32
+%r64 = getelementptr i32, i32* %r1, i32 1
+store i32 %r62, i32* %r64
+ret void
+}
+define i32 @mcl_fp_addPre2L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r14 = load i32, i32* %r4
+%r15 = zext i32 %r14 to i64
+%r17 = getelementptr i32, i32* %r4, i32 1
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i64
+%r20 = shl i64 %r19, 32
+%r21 = or i64 %r15, %r20
+%r22 = zext i64 %r21 to i96
+%r23 = add i96 %r13, %r22
+%r24 = trunc i96 %r23 to i64
+%r25 = trunc i64 %r24 to i32
+%r27 = getelementptr i32, i32* %r2, i32 0
+store i32 %r25, i32* %r27
+%r28 = lshr i64 %r24, 32
+%r29 = trunc i64 %r28 to i32
+%r31 = getelementptr i32, i32* %r2, i32 1
+store i32 %r29, i32* %r31
+%r32 = lshr i96 %r23, 64
+%r33 = trunc i96 %r32 to i32
+ret i32 %r33
+}
+define i32 @mcl_fp_subPre2L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r14 = load i32, i32* %r4
+%r15 = zext i32 %r14 to i64
+%r17 = getelementptr i32, i32* %r4, i32 1
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i64
+%r20 = shl i64 %r19, 32
+%r21 = or i64 %r15, %r20
+%r22 = zext i64 %r21 to i96
+%r23 = sub i96 %r13, %r22
+%r24 = trunc i96 %r23 to i64
+%r25 = trunc i64 %r24 to i32
+%r27 = getelementptr i32, i32* %r2, i32 0
+store i32 %r25, i32* %r27
+%r28 = lshr i64 %r24, 32
+%r29 = trunc i64 %r28 to i32
+%r31 = getelementptr i32, i32* %r2, i32 1
+store i32 %r29, i32* %r31
+%r32 = lshr i96 %r23, 64
+%r33 = trunc i96 %r32 to i32
+%r35 = and i32 %r33, 1
+ret i32 %r35
+}
+define void @mcl_fp_shr1_2L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = lshr i64 %r10, 1
+%r12 = trunc i64 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 0
+store i32 %r12, i32* %r14
+%r15 = lshr i64 %r11, 32
+%r16 = trunc i64 %r15 to i32
+%r18 = getelementptr i32, i32* %r1, i32 1
+store i32 %r16, i32* %r18
+ret void
+}
+define void @mcl_fp_add2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = load i32, i32* %r3
+%r14 = zext i32 %r13 to i64
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = zext i32 %r17 to i64
+%r19 = shl i64 %r18, 32
+%r20 = or i64 %r14, %r19
+%r21 = zext i64 %r12 to i96
+%r22 = zext i64 %r20 to i96
+%r23 = add i96 %r21, %r22
+%r24 = trunc i96 %r23 to i64
+%r25 = trunc i64 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 0
+store i32 %r25, i32* %r27
+%r28 = lshr i64 %r24, 32
+%r29 = trunc i64 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 1
+store i32 %r29, i32* %r31
+%r32 = load i32, i32* %r4
+%r33 = zext i32 %r32 to i64
+%r35 = getelementptr i32, i32* %r4, i32 1
+%r36 = load i32, i32* %r35
+%r37 = zext i32 %r36 to i64
+%r38 = shl i64 %r37, 32
+%r39 = or i64 %r33, %r38
+%r40 = zext i64 %r39 to i96
+%r41 = sub i96 %r23, %r40
+%r42 = lshr i96 %r41, 64
+%r43 = trunc i96 %r42 to i1
+br i1%r43, label %carry, label %nocarry
+nocarry:
+%r44 = trunc i96 %r41 to i64
+%r45 = trunc i64 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 0
+store i32 %r45, i32* %r47
+%r48 = lshr i64 %r44, 32
+%r49 = trunc i64 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 1
+store i32 %r49, i32* %r51
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = load i32, i32* %r3
+%r14 = zext i32 %r13 to i64
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = zext i32 %r17 to i64
+%r19 = shl i64 %r18, 32
+%r20 = or i64 %r14, %r19
+%r21 = add i64 %r12, %r20
+%r22 = load i32, i32* %r4
+%r23 = zext i32 %r22 to i64
+%r25 = getelementptr i32, i32* %r4, i32 1
+%r26 = load i32, i32* %r25
+%r27 = zext i32 %r26 to i64
+%r28 = shl i64 %r27, 32
+%r29 = or i64 %r23, %r28
+%r30 = sub i64 %r21, %r29
+%r31 = lshr i64 %r30, 63
+%r32 = trunc i64 %r31 to i1
+%r33 = select i1 %r32, i64 %r21, i64 %r30
+%r34 = trunc i64 %r33 to i32
+%r36 = getelementptr i32, i32* %r1, i32 0
+store i32 %r34, i32* %r36
+%r37 = lshr i64 %r33, 32
+%r38 = trunc i64 %r37 to i32
+%r40 = getelementptr i32, i32* %r1, i32 1
+store i32 %r38, i32* %r40
+ret void
+}
+define void @mcl_fp_sub2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = load i32, i32* %r3
+%r14 = zext i32 %r13 to i64
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = zext i32 %r17 to i64
+%r19 = shl i64 %r18, 32
+%r20 = or i64 %r14, %r19
+%r21 = zext i64 %r12 to i96
+%r22 = zext i64 %r20 to i96
+%r23 = sub i96 %r21, %r22
+%r24 = trunc i96 %r23 to i64
+%r25 = lshr i96 %r23, 64
+%r26 = trunc i96 %r25 to i1
+%r27 = trunc i64 %r24 to i32
+%r29 = getelementptr i32, i32* %r1, i32 0
+store i32 %r27, i32* %r29
+%r30 = lshr i64 %r24, 32
+%r31 = trunc i64 %r30 to i32
+%r33 = getelementptr i32, i32* %r1, i32 1
+store i32 %r31, i32* %r33
+br i1%r26, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r34 = load i32, i32* %r4
+%r35 = zext i32 %r34 to i64
+%r37 = getelementptr i32, i32* %r4, i32 1
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i64
+%r40 = shl i64 %r39, 32
+%r41 = or i64 %r35, %r40
+%r42 = add i64 %r24, %r41
+%r43 = trunc i64 %r42 to i32
+%r45 = getelementptr i32, i32* %r1, i32 0
+store i32 %r43, i32* %r45
+%r46 = lshr i64 %r42, 32
+%r47 = trunc i64 %r46 to i32
+%r49 = getelementptr i32, i32* %r1, i32 1
+store i32 %r47, i32* %r49
+ret void
+}
+define void @mcl_fp_subNF2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = load i32, i32* %r3
+%r14 = zext i32 %r13 to i64
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = zext i32 %r17 to i64
+%r19 = shl i64 %r18, 32
+%r20 = or i64 %r14, %r19
+%r21 = sub i64 %r12, %r20
+%r22 = lshr i64 %r21, 63
+%r23 = trunc i64 %r22 to i1
+%r24 = load i32, i32* %r4
+%r25 = zext i32 %r24 to i64
+%r27 = getelementptr i32, i32* %r4, i32 1
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i64
+%r30 = shl i64 %r29, 32
+%r31 = or i64 %r25, %r30
+%r33 = select i1 %r23, i64 %r31, i64 0
+%r34 = add i64 %r21, %r33
+%r35 = trunc i64 %r34 to i32
+%r37 = getelementptr i32, i32* %r1, i32 0
+store i32 %r35, i32* %r37
+%r38 = lshr i64 %r34, 32
+%r39 = trunc i64 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 1
+store i32 %r39, i32* %r41
+ret void
+}
+define void @mcl_fpDbl_add2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = load i32, i32* %r3
+%r28 = zext i32 %r27 to i64
+%r30 = getelementptr i32, i32* %r3, i32 1
+%r31 = load i32, i32* %r30
+%r32 = zext i32 %r31 to i64
+%r33 = shl i64 %r32, 32
+%r34 = or i64 %r28, %r33
+%r35 = zext i64 %r34 to i96
+%r37 = getelementptr i32, i32* %r3, i32 2
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i96
+%r40 = shl i96 %r39, 64
+%r41 = or i96 %r35, %r40
+%r42 = zext i96 %r41 to i128
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i128
+%r47 = shl i128 %r46, 96
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r26 to i160
+%r50 = zext i128 %r48 to i160
+%r51 = add i160 %r49, %r50
+%r52 = trunc i160 %r51 to i64
+%r53 = trunc i64 %r52 to i32
+%r55 = getelementptr i32, i32* %r1, i32 0
+store i32 %r53, i32* %r55
+%r56 = lshr i64 %r52, 32
+%r57 = trunc i64 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 1
+store i32 %r57, i32* %r59
+%r60 = lshr i160 %r51, 64
+%r61 = trunc i160 %r60 to i96
+%r62 = load i32, i32* %r4
+%r63 = zext i32 %r62 to i64
+%r65 = getelementptr i32, i32* %r4, i32 1
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i64
+%r68 = shl i64 %r67, 32
+%r69 = or i64 %r63, %r68
+%r70 = zext i64 %r69 to i96
+%r71 = sub i96 %r61, %r70
+%r72 = lshr i96 %r71, 64
+%r73 = trunc i96 %r72 to i1
+%r74 = select i1 %r73, i96 %r61, i96 %r71
+%r75 = trunc i96 %r74 to i64
+%r77 = getelementptr i32, i32* %r1, i32 2
+%r78 = trunc i64 %r75 to i32
+%r80 = getelementptr i32, i32* %r77, i32 0
+store i32 %r78, i32* %r80
+%r81 = lshr i64 %r75, 32
+%r82 = trunc i64 %r81 to i32
+%r84 = getelementptr i32, i32* %r77, i32 1
+store i32 %r82, i32* %r84
+ret void
+}
+define void @mcl_fpDbl_sub2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = load i32, i32* %r3
+%r28 = zext i32 %r27 to i64
+%r30 = getelementptr i32, i32* %r3, i32 1
+%r31 = load i32, i32* %r30
+%r32 = zext i32 %r31 to i64
+%r33 = shl i64 %r32, 32
+%r34 = or i64 %r28, %r33
+%r35 = zext i64 %r34 to i96
+%r37 = getelementptr i32, i32* %r3, i32 2
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i96
+%r40 = shl i96 %r39, 64
+%r41 = or i96 %r35, %r40
+%r42 = zext i96 %r41 to i128
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i128
+%r47 = shl i128 %r46, 96
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r26 to i160
+%r50 = zext i128 %r48 to i160
+%r51 = sub i160 %r49, %r50
+%r52 = trunc i160 %r51 to i64
+%r53 = trunc i64 %r52 to i32
+%r55 = getelementptr i32, i32* %r1, i32 0
+store i32 %r53, i32* %r55
+%r56 = lshr i64 %r52, 32
+%r57 = trunc i64 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 1
+store i32 %r57, i32* %r59
+%r60 = lshr i160 %r51, 64
+%r61 = trunc i160 %r60 to i64
+%r62 = lshr i160 %r51, 128
+%r63 = trunc i160 %r62 to i1
+%r64 = load i32, i32* %r4
+%r65 = zext i32 %r64 to i64
+%r67 = getelementptr i32, i32* %r4, i32 1
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i64
+%r70 = shl i64 %r69, 32
+%r71 = or i64 %r65, %r70
+%r73 = select i1 %r63, i64 %r71, i64 0
+%r74 = add i64 %r61, %r73
+%r76 = getelementptr i32, i32* %r1, i32 2
+%r77 = trunc i64 %r74 to i32
+%r79 = getelementptr i32, i32* %r76, i32 0
+store i32 %r77, i32* %r79
+%r80 = lshr i64 %r74, 32
+%r81 = trunc i64 %r80 to i32
+%r83 = getelementptr i32, i32* %r76, i32 1
+store i32 %r81, i32* %r83
+ret void
+}
+define i128 @mulPv96x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r16 = zext i32 %r6 to i64
+%r17 = zext i32 %r10 to i64
+%r18 = shl i64 %r17, 32
+%r19 = or i64 %r16, %r18
+%r20 = zext i64 %r19 to i96
+%r21 = zext i32 %r14 to i96
+%r22 = shl i96 %r21, 64
+%r23 = or i96 %r20, %r22
+%r24 = zext i32 %r7 to i64
+%r25 = zext i32 %r11 to i64
+%r26 = shl i64 %r25, 32
+%r27 = or i64 %r24, %r26
+%r28 = zext i64 %r27 to i96
+%r29 = zext i32 %r15 to i96
+%r30 = shl i96 %r29, 64
+%r31 = or i96 %r28, %r30
+%r32 = zext i96 %r23 to i128
+%r33 = zext i96 %r31 to i128
+%r34 = shl i128 %r33, 32
+%r35 = add i128 %r32, %r34
+ret i128 %r35
+}
+define void @mcl_fp_mulUnitPre3L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i128 @mulPv96x32(i32* %r2, i32 %r3)
+%r5 = trunc i128 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i128 %r4, 32
+%r9 = trunc i128 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i128 %r8, 32
+%r13 = trunc i128 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i128 %r12, 32
+%r17 = trunc i128 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+ret void
+}
+define void @mcl_fpDbl_mulPre3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i128 @mulPv96x32(i32* %r2, i32 %r4)
+%r6 = trunc i128 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i128 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i128 @mulPv96x32(i32* %r2, i32 %r10)
+%r12 = add i128 %r7, %r11
+%r13 = trunc i128 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+%r16 = lshr i128 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i128 @mulPv96x32(i32* %r2, i32 %r19)
+%r21 = add i128 %r16, %r20
+%r23 = getelementptr i32, i32* %r1, i32 2
+%r24 = trunc i128 %r21 to i32
+%r26 = getelementptr i32, i32* %r23, i32 0
+store i32 %r24, i32* %r26
+%r27 = lshr i128 %r21, 32
+%r28 = trunc i128 %r27 to i32
+%r30 = getelementptr i32, i32* %r23, i32 1
+store i32 %r28, i32* %r30
+%r31 = lshr i128 %r27, 32
+%r32 = trunc i128 %r31 to i32
+%r34 = getelementptr i32, i32* %r23, i32 2
+store i32 %r32, i32* %r34
+%r35 = lshr i128 %r31, 32
+%r36 = trunc i128 %r35 to i32
+%r38 = getelementptr i32, i32* %r23, i32 3
+store i32 %r36, i32* %r38
+ret void
+}
+define void @mcl_fpDbl_sqrPre3L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i128 @mulPv96x32(i32* %r2, i32 %r3)
+%r5 = trunc i128 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i128 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i128 @mulPv96x32(i32* %r2, i32 %r9)
+%r11 = add i128 %r6, %r10
+%r12 = trunc i128 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+%r15 = lshr i128 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i128 @mulPv96x32(i32* %r2, i32 %r18)
+%r20 = add i128 %r15, %r19
+%r22 = getelementptr i32, i32* %r1, i32 2
+%r23 = trunc i128 %r20 to i32
+%r25 = getelementptr i32, i32* %r22, i32 0
+store i32 %r23, i32* %r25
+%r26 = lshr i128 %r20, 32
+%r27 = trunc i128 %r26 to i32
+%r29 = getelementptr i32, i32* %r22, i32 1
+store i32 %r27, i32* %r29
+%r30 = lshr i128 %r26, 32
+%r31 = trunc i128 %r30 to i32
+%r33 = getelementptr i32, i32* %r22, i32 2
+store i32 %r31, i32* %r33
+%r34 = lshr i128 %r30, 32
+%r35 = trunc i128 %r34 to i32
+%r37 = getelementptr i32, i32* %r22, i32 3
+store i32 %r35, i32* %r37
+ret void
+}
+define void @mcl_fp_mont3L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i128 @mulPv96x32(i32* %r2, i32 %r10)
+%r12 = zext i128 %r11 to i160
+%r13 = trunc i128 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i128 @mulPv96x32(i32* %r4, i32 %r14)
+%r16 = zext i128 %r15 to i160
+%r17 = add i160 %r12, %r16
+%r18 = lshr i160 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i128 @mulPv96x32(i32* %r2, i32 %r21)
+%r23 = zext i128 %r22 to i160
+%r24 = add i160 %r18, %r23
+%r25 = trunc i160 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i128 @mulPv96x32(i32* %r4, i32 %r26)
+%r28 = zext i128 %r27 to i160
+%r29 = add i160 %r24, %r28
+%r30 = lshr i160 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i128 @mulPv96x32(i32* %r2, i32 %r33)
+%r35 = zext i128 %r34 to i160
+%r36 = add i160 %r30, %r35
+%r37 = trunc i160 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i128 @mulPv96x32(i32* %r4, i32 %r38)
+%r40 = zext i128 %r39 to i160
+%r41 = add i160 %r36, %r40
+%r42 = lshr i160 %r41, 32
+%r43 = trunc i160 %r42 to i128
+%r44 = load i32, i32* %r4
+%r45 = zext i32 %r44 to i64
+%r47 = getelementptr i32, i32* %r4, i32 1
+%r48 = load i32, i32* %r47
+%r49 = zext i32 %r48 to i64
+%r50 = shl i64 %r49, 32
+%r51 = or i64 %r45, %r50
+%r52 = zext i64 %r51 to i96
+%r54 = getelementptr i32, i32* %r4, i32 2
+%r55 = load i32, i32* %r54
+%r56 = zext i32 %r55 to i96
+%r57 = shl i96 %r56, 64
+%r58 = or i96 %r52, %r57
+%r59 = zext i96 %r58 to i128
+%r60 = sub i128 %r43, %r59
+%r61 = lshr i128 %r60, 96
+%r62 = trunc i128 %r61 to i1
+%r63 = select i1 %r62, i128 %r43, i128 %r60
+%r64 = trunc i128 %r63 to i96
+%r65 = trunc i96 %r64 to i32
+%r67 = getelementptr i32, i32* %r1, i32 0
+store i32 %r65, i32* %r67
+%r68 = lshr i96 %r64, 32
+%r69 = trunc i96 %r68 to i32
+%r71 = getelementptr i32, i32* %r1, i32 1
+store i32 %r69, i32* %r71
+%r72 = lshr i96 %r68, 32
+%r73 = trunc i96 %r72 to i32
+%r75 = getelementptr i32, i32* %r1, i32 2
+store i32 %r73, i32* %r75
+ret void
+}
+define void @mcl_fp_montNF3L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i128 @mulPv96x32(i32* %r2, i32 %r8)
+%r10 = trunc i128 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i128 @mulPv96x32(i32* %r4, i32 %r11)
+%r13 = add i128 %r9, %r12
+%r14 = lshr i128 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i128 @mulPv96x32(i32* %r2, i32 %r17)
+%r19 = add i128 %r14, %r18
+%r20 = trunc i128 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i128 @mulPv96x32(i32* %r4, i32 %r21)
+%r23 = add i128 %r19, %r22
+%r24 = lshr i128 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i128 @mulPv96x32(i32* %r2, i32 %r27)
+%r29 = add i128 %r24, %r28
+%r30 = trunc i128 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i128 @mulPv96x32(i32* %r4, i32 %r31)
+%r33 = add i128 %r29, %r32
+%r34 = lshr i128 %r33, 32
+%r35 = trunc i128 %r34 to i96
+%r36 = load i32, i32* %r4
+%r37 = zext i32 %r36 to i64
+%r39 = getelementptr i32, i32* %r4, i32 1
+%r40 = load i32, i32* %r39
+%r41 = zext i32 %r40 to i64
+%r42 = shl i64 %r41, 32
+%r43 = or i64 %r37, %r42
+%r44 = zext i64 %r43 to i96
+%r46 = getelementptr i32, i32* %r4, i32 2
+%r47 = load i32, i32* %r46
+%r48 = zext i32 %r47 to i96
+%r49 = shl i96 %r48, 64
+%r50 = or i96 %r44, %r49
+%r51 = sub i96 %r35, %r50
+%r52 = lshr i96 %r51, 95
+%r53 = trunc i96 %r52 to i1
+%r54 = select i1 %r53, i96 %r35, i96 %r51
+%r55 = trunc i96 %r54 to i32
+%r57 = getelementptr i32, i32* %r1, i32 0
+store i32 %r55, i32* %r57
+%r58 = lshr i96 %r54, 32
+%r59 = trunc i96 %r58 to i32
+%r61 = getelementptr i32, i32* %r1, i32 1
+store i32 %r59, i32* %r61
+%r62 = lshr i96 %r58, 32
+%r63 = trunc i96 %r62 to i32
+%r65 = getelementptr i32, i32* %r1, i32 2
+store i32 %r63, i32* %r65
+ret void
+}
+define void @mcl_fp_montRed3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = load i32, i32* %r2
+%r23 = zext i32 %r22 to i64
+%r25 = getelementptr i32, i32* %r2, i32 1
+%r26 = load i32, i32* %r25
+%r27 = zext i32 %r26 to i64
+%r28 = shl i64 %r27, 32
+%r29 = or i64 %r23, %r28
+%r30 = zext i64 %r29 to i96
+%r32 = getelementptr i32, i32* %r2, i32 2
+%r33 = load i32, i32* %r32
+%r34 = zext i32 %r33 to i96
+%r35 = shl i96 %r34, 64
+%r36 = or i96 %r30, %r35
+%r37 = zext i96 %r36 to i128
+%r39 = getelementptr i32, i32* %r2, i32 3
+%r40 = load i32, i32* %r39
+%r41 = zext i32 %r40 to i128
+%r42 = shl i128 %r41, 96
+%r43 = or i128 %r37, %r42
+%r44 = zext i128 %r43 to i160
+%r46 = getelementptr i32, i32* %r2, i32 4
+%r47 = load i32, i32* %r46
+%r48 = zext i32 %r47 to i160
+%r49 = shl i160 %r48, 128
+%r50 = or i160 %r44, %r49
+%r51 = zext i160 %r50 to i192
+%r53 = getelementptr i32, i32* %r2, i32 5
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i192
+%r56 = shl i192 %r55, 160
+%r57 = or i192 %r51, %r56
+%r58 = zext i192 %r57 to i224
+%r59 = trunc i224 %r58 to i32
+%r60 = mul i32 %r59, %r6
+%r61 = call i128 @mulPv96x32(i32* %r3, i32 %r60)
+%r62 = zext i128 %r61 to i224
+%r63 = add i224 %r58, %r62
+%r64 = lshr i224 %r63, 32
+%r65 = trunc i224 %r64 to i192
+%r66 = trunc i192 %r65 to i32
+%r67 = mul i32 %r66, %r6
+%r68 = call i128 @mulPv96x32(i32* %r3, i32 %r67)
+%r69 = zext i128 %r68 to i192
+%r70 = add i192 %r65, %r69
+%r71 = lshr i192 %r70, 32
+%r72 = trunc i192 %r71 to i160
+%r73 = trunc i160 %r72 to i32
+%r74 = mul i32 %r73, %r6
+%r75 = call i128 @mulPv96x32(i32* %r3, i32 %r74)
+%r76 = zext i128 %r75 to i160
+%r77 = add i160 %r72, %r76
+%r78 = lshr i160 %r77, 32
+%r79 = trunc i160 %r78 to i128
+%r80 = zext i96 %r21 to i128
+%r81 = sub i128 %r79, %r80
+%r82 = lshr i128 %r81, 96
+%r83 = trunc i128 %r82 to i1
+%r84 = select i1 %r83, i128 %r79, i128 %r81
+%r85 = trunc i128 %r84 to i96
+%r86 = trunc i96 %r85 to i32
+%r88 = getelementptr i32, i32* %r1, i32 0
+store i32 %r86, i32* %r88
+%r89 = lshr i96 %r85, 32
+%r90 = trunc i96 %r89 to i32
+%r92 = getelementptr i32, i32* %r1, i32 1
+store i32 %r90, i32* %r92
+%r93 = lshr i96 %r89, 32
+%r94 = trunc i96 %r93 to i32
+%r96 = getelementptr i32, i32* %r1, i32 2
+store i32 %r94, i32* %r96
+ret void
+}
+define i32 @mcl_fp_addPre3L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r21 = load i32, i32* %r4
+%r22 = zext i32 %r21 to i64
+%r24 = getelementptr i32, i32* %r4, i32 1
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i64
+%r27 = shl i64 %r26, 32
+%r28 = or i64 %r22, %r27
+%r29 = zext i64 %r28 to i96
+%r31 = getelementptr i32, i32* %r4, i32 2
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i96
+%r34 = shl i96 %r33, 64
+%r35 = or i96 %r29, %r34
+%r36 = zext i96 %r35 to i128
+%r37 = add i128 %r20, %r36
+%r38 = trunc i128 %r37 to i96
+%r39 = trunc i96 %r38 to i32
+%r41 = getelementptr i32, i32* %r2, i32 0
+store i32 %r39, i32* %r41
+%r42 = lshr i96 %r38, 32
+%r43 = trunc i96 %r42 to i32
+%r45 = getelementptr i32, i32* %r2, i32 1
+store i32 %r43, i32* %r45
+%r46 = lshr i96 %r42, 32
+%r47 = trunc i96 %r46 to i32
+%r49 = getelementptr i32, i32* %r2, i32 2
+store i32 %r47, i32* %r49
+%r50 = lshr i128 %r37, 96
+%r51 = trunc i128 %r50 to i32
+ret i32 %r51
+}
+define i32 @mcl_fp_subPre3L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r21 = load i32, i32* %r4
+%r22 = zext i32 %r21 to i64
+%r24 = getelementptr i32, i32* %r4, i32 1
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i64
+%r27 = shl i64 %r26, 32
+%r28 = or i64 %r22, %r27
+%r29 = zext i64 %r28 to i96
+%r31 = getelementptr i32, i32* %r4, i32 2
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i96
+%r34 = shl i96 %r33, 64
+%r35 = or i96 %r29, %r34
+%r36 = zext i96 %r35 to i128
+%r37 = sub i128 %r20, %r36
+%r38 = trunc i128 %r37 to i96
+%r39 = trunc i96 %r38 to i32
+%r41 = getelementptr i32, i32* %r2, i32 0
+store i32 %r39, i32* %r41
+%r42 = lshr i96 %r38, 32
+%r43 = trunc i96 %r42 to i32
+%r45 = getelementptr i32, i32* %r2, i32 1
+store i32 %r43, i32* %r45
+%r46 = lshr i96 %r42, 32
+%r47 = trunc i96 %r46 to i32
+%r49 = getelementptr i32, i32* %r2, i32 2
+store i32 %r47, i32* %r49
+%r50 = lshr i128 %r37, 96
+%r51 = trunc i128 %r50 to i32
+%r53 = and i32 %r51, 1
+ret i32 %r53
+}
+define void @mcl_fp_shr1_3L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = lshr i96 %r17, 1
+%r19 = trunc i96 %r18 to i32
+%r21 = getelementptr i32, i32* %r1, i32 0
+store i32 %r19, i32* %r21
+%r22 = lshr i96 %r18, 32
+%r23 = trunc i96 %r22 to i32
+%r25 = getelementptr i32, i32* %r1, i32 1
+store i32 %r23, i32* %r25
+%r26 = lshr i96 %r22, 32
+%r27 = trunc i96 %r26 to i32
+%r29 = getelementptr i32, i32* %r1, i32 2
+store i32 %r27, i32* %r29
+ret void
+}
+define void @mcl_fp_add3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = load i32, i32* %r3
+%r21 = zext i32 %r20 to i64
+%r23 = getelementptr i32, i32* %r3, i32 1
+%r24 = load i32, i32* %r23
+%r25 = zext i32 %r24 to i64
+%r26 = shl i64 %r25, 32
+%r27 = or i64 %r21, %r26
+%r28 = zext i64 %r27 to i96
+%r30 = getelementptr i32, i32* %r3, i32 2
+%r31 = load i32, i32* %r30
+%r32 = zext i32 %r31 to i96
+%r33 = shl i96 %r32, 64
+%r34 = or i96 %r28, %r33
+%r35 = zext i96 %r19 to i128
+%r36 = zext i96 %r34 to i128
+%r37 = add i128 %r35, %r36
+%r38 = trunc i128 %r37 to i96
+%r39 = trunc i96 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 0
+store i32 %r39, i32* %r41
+%r42 = lshr i96 %r38, 32
+%r43 = trunc i96 %r42 to i32
+%r45 = getelementptr i32, i32* %r1, i32 1
+store i32 %r43, i32* %r45
+%r46 = lshr i96 %r42, 32
+%r47 = trunc i96 %r46 to i32
+%r49 = getelementptr i32, i32* %r1, i32 2
+store i32 %r47, i32* %r49
+%r50 = load i32, i32* %r4
+%r51 = zext i32 %r50 to i64
+%r53 = getelementptr i32, i32* %r4, i32 1
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i64
+%r56 = shl i64 %r55, 32
+%r57 = or i64 %r51, %r56
+%r58 = zext i64 %r57 to i96
+%r60 = getelementptr i32, i32* %r4, i32 2
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i96
+%r63 = shl i96 %r62, 64
+%r64 = or i96 %r58, %r63
+%r65 = zext i96 %r64 to i128
+%r66 = sub i128 %r37, %r65
+%r67 = lshr i128 %r66, 96
+%r68 = trunc i128 %r67 to i1
+br i1%r68, label %carry, label %nocarry
+nocarry:
+%r69 = trunc i128 %r66 to i96
+%r70 = trunc i96 %r69 to i32
+%r72 = getelementptr i32, i32* %r1, i32 0
+store i32 %r70, i32* %r72
+%r73 = lshr i96 %r69, 32
+%r74 = trunc i96 %r73 to i32
+%r76 = getelementptr i32, i32* %r1, i32 1
+store i32 %r74, i32* %r76
+%r77 = lshr i96 %r73, 32
+%r78 = trunc i96 %r77 to i32
+%r80 = getelementptr i32, i32* %r1, i32 2
+store i32 %r78, i32* %r80
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = load i32, i32* %r3
+%r21 = zext i32 %r20 to i64
+%r23 = getelementptr i32, i32* %r3, i32 1
+%r24 = load i32, i32* %r23
+%r25 = zext i32 %r24 to i64
+%r26 = shl i64 %r25, 32
+%r27 = or i64 %r21, %r26
+%r28 = zext i64 %r27 to i96
+%r30 = getelementptr i32, i32* %r3, i32 2
+%r31 = load i32, i32* %r30
+%r32 = zext i32 %r31 to i96
+%r33 = shl i96 %r32, 64
+%r34 = or i96 %r28, %r33
+%r35 = add i96 %r19, %r34
+%r36 = load i32, i32* %r4
+%r37 = zext i32 %r36 to i64
+%r39 = getelementptr i32, i32* %r4, i32 1
+%r40 = load i32, i32* %r39
+%r41 = zext i32 %r40 to i64
+%r42 = shl i64 %r41, 32
+%r43 = or i64 %r37, %r42
+%r44 = zext i64 %r43 to i96
+%r46 = getelementptr i32, i32* %r4, i32 2
+%r47 = load i32, i32* %r46
+%r48 = zext i32 %r47 to i96
+%r49 = shl i96 %r48, 64
+%r50 = or i96 %r44, %r49
+%r51 = sub i96 %r35, %r50
+%r52 = lshr i96 %r51, 95
+%r53 = trunc i96 %r52 to i1
+%r54 = select i1 %r53, i96 %r35, i96 %r51
+%r55 = trunc i96 %r54 to i32
+%r57 = getelementptr i32, i32* %r1, i32 0
+store i32 %r55, i32* %r57
+%r58 = lshr i96 %r54, 32
+%r59 = trunc i96 %r58 to i32
+%r61 = getelementptr i32, i32* %r1, i32 1
+store i32 %r59, i32* %r61
+%r62 = lshr i96 %r58, 32
+%r63 = trunc i96 %r62 to i32
+%r65 = getelementptr i32, i32* %r1, i32 2
+store i32 %r63, i32* %r65
+ret void
+}
+define void @mcl_fp_sub3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = load i32, i32* %r3
+%r21 = zext i32 %r20 to i64
+%r23 = getelementptr i32, i32* %r3, i32 1
+%r24 = load i32, i32* %r23
+%r25 = zext i32 %r24 to i64
+%r26 = shl i64 %r25, 32
+%r27 = or i64 %r21, %r26
+%r28 = zext i64 %r27 to i96
+%r30 = getelementptr i32, i32* %r3, i32 2
+%r31 = load i32, i32* %r30
+%r32 = zext i32 %r31 to i96
+%r33 = shl i96 %r32, 64
+%r34 = or i96 %r28, %r33
+%r35 = zext i96 %r19 to i128
+%r36 = zext i96 %r34 to i128
+%r37 = sub i128 %r35, %r36
+%r38 = trunc i128 %r37 to i96
+%r39 = lshr i128 %r37, 96
+%r40 = trunc i128 %r39 to i1
+%r41 = trunc i96 %r38 to i32
+%r43 = getelementptr i32, i32* %r1, i32 0
+store i32 %r41, i32* %r43
+%r44 = lshr i96 %r38, 32
+%r45 = trunc i96 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 1
+store i32 %r45, i32* %r47
+%r48 = lshr i96 %r44, 32
+%r49 = trunc i96 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 2
+store i32 %r49, i32* %r51
+br i1%r40, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r52 = load i32, i32* %r4
+%r53 = zext i32 %r52 to i64
+%r55 = getelementptr i32, i32* %r4, i32 1
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i64
+%r58 = shl i64 %r57, 32
+%r59 = or i64 %r53, %r58
+%r60 = zext i64 %r59 to i96
+%r62 = getelementptr i32, i32* %r4, i32 2
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i96
+%r65 = shl i96 %r64, 64
+%r66 = or i96 %r60, %r65
+%r67 = add i96 %r38, %r66
+%r68 = trunc i96 %r67 to i32
+%r70 = getelementptr i32, i32* %r1, i32 0
+store i32 %r68, i32* %r70
+%r71 = lshr i96 %r67, 32
+%r72 = trunc i96 %r71 to i32
+%r74 = getelementptr i32, i32* %r1, i32 1
+store i32 %r72, i32* %r74
+%r75 = lshr i96 %r71, 32
+%r76 = trunc i96 %r75 to i32
+%r78 = getelementptr i32, i32* %r1, i32 2
+store i32 %r76, i32* %r78
+ret void
+}
+define void @mcl_fp_subNF3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = load i32, i32* %r3
+%r21 = zext i32 %r20 to i64
+%r23 = getelementptr i32, i32* %r3, i32 1
+%r24 = load i32, i32* %r23
+%r25 = zext i32 %r24 to i64
+%r26 = shl i64 %r25, 32
+%r27 = or i64 %r21, %r26
+%r28 = zext i64 %r27 to i96
+%r30 = getelementptr i32, i32* %r3, i32 2
+%r31 = load i32, i32* %r30
+%r32 = zext i32 %r31 to i96
+%r33 = shl i96 %r32, 64
+%r34 = or i96 %r28, %r33
+%r35 = sub i96 %r19, %r34
+%r36 = lshr i96 %r35, 95
+%r37 = trunc i96 %r36 to i1
+%r38 = load i32, i32* %r4
+%r39 = zext i32 %r38 to i64
+%r41 = getelementptr i32, i32* %r4, i32 1
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i64
+%r44 = shl i64 %r43, 32
+%r45 = or i64 %r39, %r44
+%r46 = zext i64 %r45 to i96
+%r48 = getelementptr i32, i32* %r4, i32 2
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i96
+%r51 = shl i96 %r50, 64
+%r52 = or i96 %r46, %r51
+%r54 = select i1 %r37, i96 %r52, i96 0
+%r55 = add i96 %r35, %r54
+%r56 = trunc i96 %r55 to i32
+%r58 = getelementptr i32, i32* %r1, i32 0
+store i32 %r56, i32* %r58
+%r59 = lshr i96 %r55, 32
+%r60 = trunc i96 %r59 to i32
+%r62 = getelementptr i32, i32* %r1, i32 1
+store i32 %r60, i32* %r62
+%r63 = lshr i96 %r59, 32
+%r64 = trunc i96 %r63 to i32
+%r66 = getelementptr i32, i32* %r1, i32 2
+store i32 %r64, i32* %r66
+ret void
+}
+define void @mcl_fpDbl_add3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = load i32, i32* %r3
+%r42 = zext i32 %r41 to i64
+%r44 = getelementptr i32, i32* %r3, i32 1
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i64
+%r47 = shl i64 %r46, 32
+%r48 = or i64 %r42, %r47
+%r49 = zext i64 %r48 to i96
+%r51 = getelementptr i32, i32* %r3, i32 2
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r49, %r54
+%r56 = zext i96 %r55 to i128
+%r58 = getelementptr i32, i32* %r3, i32 3
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i128
+%r61 = shl i128 %r60, 96
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i160
+%r65 = getelementptr i32, i32* %r3, i32 4
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i160
+%r68 = shl i160 %r67, 128
+%r69 = or i160 %r63, %r68
+%r70 = zext i160 %r69 to i192
+%r72 = getelementptr i32, i32* %r3, i32 5
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i192
+%r75 = shl i192 %r74, 160
+%r76 = or i192 %r70, %r75
+%r77 = zext i192 %r40 to i224
+%r78 = zext i192 %r76 to i224
+%r79 = add i224 %r77, %r78
+%r80 = trunc i224 %r79 to i96
+%r81 = trunc i96 %r80 to i32
+%r83 = getelementptr i32, i32* %r1, i32 0
+store i32 %r81, i32* %r83
+%r84 = lshr i96 %r80, 32
+%r85 = trunc i96 %r84 to i32
+%r87 = getelementptr i32, i32* %r1, i32 1
+store i32 %r85, i32* %r87
+%r88 = lshr i96 %r84, 32
+%r89 = trunc i96 %r88 to i32
+%r91 = getelementptr i32, i32* %r1, i32 2
+store i32 %r89, i32* %r91
+%r92 = lshr i224 %r79, 96
+%r93 = trunc i224 %r92 to i128
+%r94 = load i32, i32* %r4
+%r95 = zext i32 %r94 to i64
+%r97 = getelementptr i32, i32* %r4, i32 1
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i64
+%r100 = shl i64 %r99, 32
+%r101 = or i64 %r95, %r100
+%r102 = zext i64 %r101 to i96
+%r104 = getelementptr i32, i32* %r4, i32 2
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i96
+%r107 = shl i96 %r106, 64
+%r108 = or i96 %r102, %r107
+%r109 = zext i96 %r108 to i128
+%r110 = sub i128 %r93, %r109
+%r111 = lshr i128 %r110, 96
+%r112 = trunc i128 %r111 to i1
+%r113 = select i1 %r112, i128 %r93, i128 %r110
+%r114 = trunc i128 %r113 to i96
+%r116 = getelementptr i32, i32* %r1, i32 3
+%r117 = trunc i96 %r114 to i32
+%r119 = getelementptr i32, i32* %r116, i32 0
+store i32 %r117, i32* %r119
+%r120 = lshr i96 %r114, 32
+%r121 = trunc i96 %r120 to i32
+%r123 = getelementptr i32, i32* %r116, i32 1
+store i32 %r121, i32* %r123
+%r124 = lshr i96 %r120, 32
+%r125 = trunc i96 %r124 to i32
+%r127 = getelementptr i32, i32* %r116, i32 2
+store i32 %r125, i32* %r127
+ret void
+}
+define void @mcl_fpDbl_sub3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = load i32, i32* %r3
+%r42 = zext i32 %r41 to i64
+%r44 = getelementptr i32, i32* %r3, i32 1
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i64
+%r47 = shl i64 %r46, 32
+%r48 = or i64 %r42, %r47
+%r49 = zext i64 %r48 to i96
+%r51 = getelementptr i32, i32* %r3, i32 2
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r49, %r54
+%r56 = zext i96 %r55 to i128
+%r58 = getelementptr i32, i32* %r3, i32 3
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i128
+%r61 = shl i128 %r60, 96
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i160
+%r65 = getelementptr i32, i32* %r3, i32 4
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i160
+%r68 = shl i160 %r67, 128
+%r69 = or i160 %r63, %r68
+%r70 = zext i160 %r69 to i192
+%r72 = getelementptr i32, i32* %r3, i32 5
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i192
+%r75 = shl i192 %r74, 160
+%r76 = or i192 %r70, %r75
+%r77 = zext i192 %r40 to i224
+%r78 = zext i192 %r76 to i224
+%r79 = sub i224 %r77, %r78
+%r80 = trunc i224 %r79 to i96
+%r81 = trunc i96 %r80 to i32
+%r83 = getelementptr i32, i32* %r1, i32 0
+store i32 %r81, i32* %r83
+%r84 = lshr i96 %r80, 32
+%r85 = trunc i96 %r84 to i32
+%r87 = getelementptr i32, i32* %r1, i32 1
+store i32 %r85, i32* %r87
+%r88 = lshr i96 %r84, 32
+%r89 = trunc i96 %r88 to i32
+%r91 = getelementptr i32, i32* %r1, i32 2
+store i32 %r89, i32* %r91
+%r92 = lshr i224 %r79, 96
+%r93 = trunc i224 %r92 to i96
+%r94 = lshr i224 %r79, 192
+%r95 = trunc i224 %r94 to i1
+%r96 = load i32, i32* %r4
+%r97 = zext i32 %r96 to i64
+%r99 = getelementptr i32, i32* %r4, i32 1
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i64
+%r102 = shl i64 %r101, 32
+%r103 = or i64 %r97, %r102
+%r104 = zext i64 %r103 to i96
+%r106 = getelementptr i32, i32* %r4, i32 2
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i96
+%r109 = shl i96 %r108, 64
+%r110 = or i96 %r104, %r109
+%r112 = select i1 %r95, i96 %r110, i96 0
+%r113 = add i96 %r93, %r112
+%r115 = getelementptr i32, i32* %r1, i32 3
+%r116 = trunc i96 %r113 to i32
+%r118 = getelementptr i32, i32* %r115, i32 0
+store i32 %r116, i32* %r118
+%r119 = lshr i96 %r113, 32
+%r120 = trunc i96 %r119 to i32
+%r122 = getelementptr i32, i32* %r115, i32 1
+store i32 %r120, i32* %r122
+%r123 = lshr i96 %r119, 32
+%r124 = trunc i96 %r123 to i32
+%r126 = getelementptr i32, i32* %r115, i32 2
+store i32 %r124, i32* %r126
+ret void
+}
+define i160 @mulPv128x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r20 = zext i32 %r6 to i64
+%r21 = zext i32 %r10 to i64
+%r22 = shl i64 %r21, 32
+%r23 = or i64 %r20, %r22
+%r24 = zext i64 %r23 to i96
+%r25 = zext i32 %r14 to i96
+%r26 = shl i96 %r25, 64
+%r27 = or i96 %r24, %r26
+%r28 = zext i96 %r27 to i128
+%r29 = zext i32 %r18 to i128
+%r30 = shl i128 %r29, 96
+%r31 = or i128 %r28, %r30
+%r32 = zext i32 %r7 to i64
+%r33 = zext i32 %r11 to i64
+%r34 = shl i64 %r33, 32
+%r35 = or i64 %r32, %r34
+%r36 = zext i64 %r35 to i96
+%r37 = zext i32 %r15 to i96
+%r38 = shl i96 %r37, 64
+%r39 = or i96 %r36, %r38
+%r40 = zext i96 %r39 to i128
+%r41 = zext i32 %r19 to i128
+%r42 = shl i128 %r41, 96
+%r43 = or i128 %r40, %r42
+%r44 = zext i128 %r31 to i160
+%r45 = zext i128 %r43 to i160
+%r46 = shl i160 %r45, 32
+%r47 = add i160 %r44, %r46
+ret i160 %r47
+}
+define void @mcl_fp_mulUnitPre4L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i160 @mulPv128x32(i32* %r2, i32 %r3)
+%r5 = trunc i160 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i160 %r4, 32
+%r9 = trunc i160 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i160 %r8, 32
+%r13 = trunc i160 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i160 %r12, 32
+%r17 = trunc i160 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i160 %r16, 32
+%r21 = trunc i160 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+ret void
+}
+define void @mcl_fpDbl_mulPre4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i160 @mulPv128x32(i32* %r2, i32 %r4)
+%r6 = trunc i160 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i160 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i160 @mulPv128x32(i32* %r2, i32 %r10)
+%r12 = add i160 %r7, %r11
+%r13 = trunc i160 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+%r16 = lshr i160 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i160 @mulPv128x32(i32* %r2, i32 %r19)
+%r21 = add i160 %r16, %r20
+%r22 = trunc i160 %r21 to i32
+%r24 = getelementptr i32, i32* %r1, i32 2
+store i32 %r22, i32* %r24
+%r25 = lshr i160 %r21, 32
+%r27 = getelementptr i32, i32* %r3, i32 3
+%r28 = load i32, i32* %r27
+%r29 = call i160 @mulPv128x32(i32* %r2, i32 %r28)
+%r30 = add i160 %r25, %r29
+%r32 = getelementptr i32, i32* %r1, i32 3
+%r33 = trunc i160 %r30 to i32
+%r35 = getelementptr i32, i32* %r32, i32 0
+store i32 %r33, i32* %r35
+%r36 = lshr i160 %r30, 32
+%r37 = trunc i160 %r36 to i32
+%r39 = getelementptr i32, i32* %r32, i32 1
+store i32 %r37, i32* %r39
+%r40 = lshr i160 %r36, 32
+%r41 = trunc i160 %r40 to i32
+%r43 = getelementptr i32, i32* %r32, i32 2
+store i32 %r41, i32* %r43
+%r44 = lshr i160 %r40, 32
+%r45 = trunc i160 %r44 to i32
+%r47 = getelementptr i32, i32* %r32, i32 3
+store i32 %r45, i32* %r47
+%r48 = lshr i160 %r44, 32
+%r49 = trunc i160 %r48 to i32
+%r51 = getelementptr i32, i32* %r32, i32 4
+store i32 %r49, i32* %r51
+ret void
+}
+define void @mcl_fpDbl_sqrPre4L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i160 @mulPv128x32(i32* %r2, i32 %r3)
+%r5 = trunc i160 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i160 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i160 @mulPv128x32(i32* %r2, i32 %r9)
+%r11 = add i160 %r6, %r10
+%r12 = trunc i160 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+%r15 = lshr i160 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i160 @mulPv128x32(i32* %r2, i32 %r18)
+%r20 = add i160 %r15, %r19
+%r21 = trunc i160 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 2
+store i32 %r21, i32* %r23
+%r24 = lshr i160 %r20, 32
+%r26 = getelementptr i32, i32* %r2, i32 3
+%r27 = load i32, i32* %r26
+%r28 = call i160 @mulPv128x32(i32* %r2, i32 %r27)
+%r29 = add i160 %r24, %r28
+%r31 = getelementptr i32, i32* %r1, i32 3
+%r32 = trunc i160 %r29 to i32
+%r34 = getelementptr i32, i32* %r31, i32 0
+store i32 %r32, i32* %r34
+%r35 = lshr i160 %r29, 32
+%r36 = trunc i160 %r35 to i32
+%r38 = getelementptr i32, i32* %r31, i32 1
+store i32 %r36, i32* %r38
+%r39 = lshr i160 %r35, 32
+%r40 = trunc i160 %r39 to i32
+%r42 = getelementptr i32, i32* %r31, i32 2
+store i32 %r40, i32* %r42
+%r43 = lshr i160 %r39, 32
+%r44 = trunc i160 %r43 to i32
+%r46 = getelementptr i32, i32* %r31, i32 3
+store i32 %r44, i32* %r46
+%r47 = lshr i160 %r43, 32
+%r48 = trunc i160 %r47 to i32
+%r50 = getelementptr i32, i32* %r31, i32 4
+store i32 %r48, i32* %r50
+ret void
+}
+define void @mcl_fp_mont4L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i160 @mulPv128x32(i32* %r2, i32 %r10)
+%r12 = zext i160 %r11 to i192
+%r13 = trunc i160 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i160 @mulPv128x32(i32* %r4, i32 %r14)
+%r16 = zext i160 %r15 to i192
+%r17 = add i192 %r12, %r16
+%r18 = lshr i192 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i160 @mulPv128x32(i32* %r2, i32 %r21)
+%r23 = zext i160 %r22 to i192
+%r24 = add i192 %r18, %r23
+%r25 = trunc i192 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i160 @mulPv128x32(i32* %r4, i32 %r26)
+%r28 = zext i160 %r27 to i192
+%r29 = add i192 %r24, %r28
+%r30 = lshr i192 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i160 @mulPv128x32(i32* %r2, i32 %r33)
+%r35 = zext i160 %r34 to i192
+%r36 = add i192 %r30, %r35
+%r37 = trunc i192 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i160 @mulPv128x32(i32* %r4, i32 %r38)
+%r40 = zext i160 %r39 to i192
+%r41 = add i192 %r36, %r40
+%r42 = lshr i192 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i160 @mulPv128x32(i32* %r2, i32 %r45)
+%r47 = zext i160 %r46 to i192
+%r48 = add i192 %r42, %r47
+%r49 = trunc i192 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i160 @mulPv128x32(i32* %r4, i32 %r50)
+%r52 = zext i160 %r51 to i192
+%r53 = add i192 %r48, %r52
+%r54 = lshr i192 %r53, 32
+%r55 = trunc i192 %r54 to i160
+%r56 = load i32, i32* %r4
+%r57 = zext i32 %r56 to i64
+%r59 = getelementptr i32, i32* %r4, i32 1
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i64
+%r62 = shl i64 %r61, 32
+%r63 = or i64 %r57, %r62
+%r64 = zext i64 %r63 to i96
+%r66 = getelementptr i32, i32* %r4, i32 2
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i96
+%r69 = shl i96 %r68, 64
+%r70 = or i96 %r64, %r69
+%r71 = zext i96 %r70 to i128
+%r73 = getelementptr i32, i32* %r4, i32 3
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i128
+%r76 = shl i128 %r75, 96
+%r77 = or i128 %r71, %r76
+%r78 = zext i128 %r77 to i160
+%r79 = sub i160 %r55, %r78
+%r80 = lshr i160 %r79, 128
+%r81 = trunc i160 %r80 to i1
+%r82 = select i1 %r81, i160 %r55, i160 %r79
+%r83 = trunc i160 %r82 to i128
+%r84 = trunc i128 %r83 to i32
+%r86 = getelementptr i32, i32* %r1, i32 0
+store i32 %r84, i32* %r86
+%r87 = lshr i128 %r83, 32
+%r88 = trunc i128 %r87 to i32
+%r90 = getelementptr i32, i32* %r1, i32 1
+store i32 %r88, i32* %r90
+%r91 = lshr i128 %r87, 32
+%r92 = trunc i128 %r91 to i32
+%r94 = getelementptr i32, i32* %r1, i32 2
+store i32 %r92, i32* %r94
+%r95 = lshr i128 %r91, 32
+%r96 = trunc i128 %r95 to i32
+%r98 = getelementptr i32, i32* %r1, i32 3
+store i32 %r96, i32* %r98
+ret void
+}
+define void @mcl_fp_montNF4L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i160 @mulPv128x32(i32* %r2, i32 %r8)
+%r10 = trunc i160 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i160 @mulPv128x32(i32* %r4, i32 %r11)
+%r13 = add i160 %r9, %r12
+%r14 = lshr i160 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i160 @mulPv128x32(i32* %r2, i32 %r17)
+%r19 = add i160 %r14, %r18
+%r20 = trunc i160 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i160 @mulPv128x32(i32* %r4, i32 %r21)
+%r23 = add i160 %r19, %r22
+%r24 = lshr i160 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i160 @mulPv128x32(i32* %r2, i32 %r27)
+%r29 = add i160 %r24, %r28
+%r30 = trunc i160 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i160 @mulPv128x32(i32* %r4, i32 %r31)
+%r33 = add i160 %r29, %r32
+%r34 = lshr i160 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i160 @mulPv128x32(i32* %r2, i32 %r37)
+%r39 = add i160 %r34, %r38
+%r40 = trunc i160 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i160 @mulPv128x32(i32* %r4, i32 %r41)
+%r43 = add i160 %r39, %r42
+%r44 = lshr i160 %r43, 32
+%r45 = trunc i160 %r44 to i128
+%r46 = load i32, i32* %r4
+%r47 = zext i32 %r46 to i64
+%r49 = getelementptr i32, i32* %r4, i32 1
+%r50 = load i32, i32* %r49
+%r51 = zext i32 %r50 to i64
+%r52 = shl i64 %r51, 32
+%r53 = or i64 %r47, %r52
+%r54 = zext i64 %r53 to i96
+%r56 = getelementptr i32, i32* %r4, i32 2
+%r57 = load i32, i32* %r56
+%r58 = zext i32 %r57 to i96
+%r59 = shl i96 %r58, 64
+%r60 = or i96 %r54, %r59
+%r61 = zext i96 %r60 to i128
+%r63 = getelementptr i32, i32* %r4, i32 3
+%r64 = load i32, i32* %r63
+%r65 = zext i32 %r64 to i128
+%r66 = shl i128 %r65, 96
+%r67 = or i128 %r61, %r66
+%r68 = sub i128 %r45, %r67
+%r69 = lshr i128 %r68, 127
+%r70 = trunc i128 %r69 to i1
+%r71 = select i1 %r70, i128 %r45, i128 %r68
+%r72 = trunc i128 %r71 to i32
+%r74 = getelementptr i32, i32* %r1, i32 0
+store i32 %r72, i32* %r74
+%r75 = lshr i128 %r71, 32
+%r76 = trunc i128 %r75 to i32
+%r78 = getelementptr i32, i32* %r1, i32 1
+store i32 %r76, i32* %r78
+%r79 = lshr i128 %r75, 32
+%r80 = trunc i128 %r79 to i32
+%r82 = getelementptr i32, i32* %r1, i32 2
+store i32 %r80, i32* %r82
+%r83 = lshr i128 %r79, 32
+%r84 = trunc i128 %r83 to i32
+%r86 = getelementptr i32, i32* %r1, i32 3
+store i32 %r84, i32* %r86
+ret void
+}
+define void @mcl_fp_montRed4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = load i32, i32* %r2
+%r30 = zext i32 %r29 to i64
+%r32 = getelementptr i32, i32* %r2, i32 1
+%r33 = load i32, i32* %r32
+%r34 = zext i32 %r33 to i64
+%r35 = shl i64 %r34, 32
+%r36 = or i64 %r30, %r35
+%r37 = zext i64 %r36 to i96
+%r39 = getelementptr i32, i32* %r2, i32 2
+%r40 = load i32, i32* %r39
+%r41 = zext i32 %r40 to i96
+%r42 = shl i96 %r41, 64
+%r43 = or i96 %r37, %r42
+%r44 = zext i96 %r43 to i128
+%r46 = getelementptr i32, i32* %r2, i32 3
+%r47 = load i32, i32* %r46
+%r48 = zext i32 %r47 to i128
+%r49 = shl i128 %r48, 96
+%r50 = or i128 %r44, %r49
+%r51 = zext i128 %r50 to i160
+%r53 = getelementptr i32, i32* %r2, i32 4
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i160
+%r56 = shl i160 %r55, 128
+%r57 = or i160 %r51, %r56
+%r58 = zext i160 %r57 to i192
+%r60 = getelementptr i32, i32* %r2, i32 5
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i192
+%r63 = shl i192 %r62, 160
+%r64 = or i192 %r58, %r63
+%r65 = zext i192 %r64 to i224
+%r67 = getelementptr i32, i32* %r2, i32 6
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i224
+%r70 = shl i224 %r69, 192
+%r71 = or i224 %r65, %r70
+%r72 = zext i224 %r71 to i256
+%r74 = getelementptr i32, i32* %r2, i32 7
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i256
+%r77 = shl i256 %r76, 224
+%r78 = or i256 %r72, %r77
+%r79 = zext i256 %r78 to i288
+%r80 = trunc i288 %r79 to i32
+%r81 = mul i32 %r80, %r6
+%r82 = call i160 @mulPv128x32(i32* %r3, i32 %r81)
+%r83 = zext i160 %r82 to i288
+%r84 = add i288 %r79, %r83
+%r85 = lshr i288 %r84, 32
+%r86 = trunc i288 %r85 to i256
+%r87 = trunc i256 %r86 to i32
+%r88 = mul i32 %r87, %r6
+%r89 = call i160 @mulPv128x32(i32* %r3, i32 %r88)
+%r90 = zext i160 %r89 to i256
+%r91 = add i256 %r86, %r90
+%r92 = lshr i256 %r91, 32
+%r93 = trunc i256 %r92 to i224
+%r94 = trunc i224 %r93 to i32
+%r95 = mul i32 %r94, %r6
+%r96 = call i160 @mulPv128x32(i32* %r3, i32 %r95)
+%r97 = zext i160 %r96 to i224
+%r98 = add i224 %r93, %r97
+%r99 = lshr i224 %r98, 32
+%r100 = trunc i224 %r99 to i192
+%r101 = trunc i192 %r100 to i32
+%r102 = mul i32 %r101, %r6
+%r103 = call i160 @mulPv128x32(i32* %r3, i32 %r102)
+%r104 = zext i160 %r103 to i192
+%r105 = add i192 %r100, %r104
+%r106 = lshr i192 %r105, 32
+%r107 = trunc i192 %r106 to i160
+%r108 = zext i128 %r28 to i160
+%r109 = sub i160 %r107, %r108
+%r110 = lshr i160 %r109, 128
+%r111 = trunc i160 %r110 to i1
+%r112 = select i1 %r111, i160 %r107, i160 %r109
+%r113 = trunc i160 %r112 to i128
+%r114 = trunc i128 %r113 to i32
+%r116 = getelementptr i32, i32* %r1, i32 0
+store i32 %r114, i32* %r116
+%r117 = lshr i128 %r113, 32
+%r118 = trunc i128 %r117 to i32
+%r120 = getelementptr i32, i32* %r1, i32 1
+store i32 %r118, i32* %r120
+%r121 = lshr i128 %r117, 32
+%r122 = trunc i128 %r121 to i32
+%r124 = getelementptr i32, i32* %r1, i32 2
+store i32 %r122, i32* %r124
+%r125 = lshr i128 %r121, 32
+%r126 = trunc i128 %r125 to i32
+%r128 = getelementptr i32, i32* %r1, i32 3
+store i32 %r126, i32* %r128
+ret void
+}
+define i32 @mcl_fp_addPre4L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r28 = load i32, i32* %r4
+%r29 = zext i32 %r28 to i64
+%r31 = getelementptr i32, i32* %r4, i32 1
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i64
+%r34 = shl i64 %r33, 32
+%r35 = or i64 %r29, %r34
+%r36 = zext i64 %r35 to i96
+%r38 = getelementptr i32, i32* %r4, i32 2
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i96
+%r41 = shl i96 %r40, 64
+%r42 = or i96 %r36, %r41
+%r43 = zext i96 %r42 to i128
+%r45 = getelementptr i32, i32* %r4, i32 3
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i128
+%r48 = shl i128 %r47, 96
+%r49 = or i128 %r43, %r48
+%r50 = zext i128 %r49 to i160
+%r51 = add i160 %r27, %r50
+%r52 = trunc i160 %r51 to i128
+%r53 = trunc i128 %r52 to i32
+%r55 = getelementptr i32, i32* %r2, i32 0
+store i32 %r53, i32* %r55
+%r56 = lshr i128 %r52, 32
+%r57 = trunc i128 %r56 to i32
+%r59 = getelementptr i32, i32* %r2, i32 1
+store i32 %r57, i32* %r59
+%r60 = lshr i128 %r56, 32
+%r61 = trunc i128 %r60 to i32
+%r63 = getelementptr i32, i32* %r2, i32 2
+store i32 %r61, i32* %r63
+%r64 = lshr i128 %r60, 32
+%r65 = trunc i128 %r64 to i32
+%r67 = getelementptr i32, i32* %r2, i32 3
+store i32 %r65, i32* %r67
+%r68 = lshr i160 %r51, 128
+%r69 = trunc i160 %r68 to i32
+ret i32 %r69
+}
+define i32 @mcl_fp_subPre4L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r28 = load i32, i32* %r4
+%r29 = zext i32 %r28 to i64
+%r31 = getelementptr i32, i32* %r4, i32 1
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i64
+%r34 = shl i64 %r33, 32
+%r35 = or i64 %r29, %r34
+%r36 = zext i64 %r35 to i96
+%r38 = getelementptr i32, i32* %r4, i32 2
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i96
+%r41 = shl i96 %r40, 64
+%r42 = or i96 %r36, %r41
+%r43 = zext i96 %r42 to i128
+%r45 = getelementptr i32, i32* %r4, i32 3
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i128
+%r48 = shl i128 %r47, 96
+%r49 = or i128 %r43, %r48
+%r50 = zext i128 %r49 to i160
+%r51 = sub i160 %r27, %r50
+%r52 = trunc i160 %r51 to i128
+%r53 = trunc i128 %r52 to i32
+%r55 = getelementptr i32, i32* %r2, i32 0
+store i32 %r53, i32* %r55
+%r56 = lshr i128 %r52, 32
+%r57 = trunc i128 %r56 to i32
+%r59 = getelementptr i32, i32* %r2, i32 1
+store i32 %r57, i32* %r59
+%r60 = lshr i128 %r56, 32
+%r61 = trunc i128 %r60 to i32
+%r63 = getelementptr i32, i32* %r2, i32 2
+store i32 %r61, i32* %r63
+%r64 = lshr i128 %r60, 32
+%r65 = trunc i128 %r64 to i32
+%r67 = getelementptr i32, i32* %r2, i32 3
+store i32 %r65, i32* %r67
+%r68 = lshr i160 %r51, 128
+%r69 = trunc i160 %r68 to i32
+%r71 = and i32 %r69, 1
+ret i32 %r71
+}
+define void @mcl_fp_shr1_4L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = lshr i128 %r24, 1
+%r26 = trunc i128 %r25 to i32
+%r28 = getelementptr i32, i32* %r1, i32 0
+store i32 %r26, i32* %r28
+%r29 = lshr i128 %r25, 32
+%r30 = trunc i128 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 1
+store i32 %r30, i32* %r32
+%r33 = lshr i128 %r29, 32
+%r34 = trunc i128 %r33 to i32
+%r36 = getelementptr i32, i32* %r1, i32 2
+store i32 %r34, i32* %r36
+%r37 = lshr i128 %r33, 32
+%r38 = trunc i128 %r37 to i32
+%r40 = getelementptr i32, i32* %r1, i32 3
+store i32 %r38, i32* %r40
+ret void
+}
+define void @mcl_fp_add4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = load i32, i32* %r3
+%r28 = zext i32 %r27 to i64
+%r30 = getelementptr i32, i32* %r3, i32 1
+%r31 = load i32, i32* %r30
+%r32 = zext i32 %r31 to i64
+%r33 = shl i64 %r32, 32
+%r34 = or i64 %r28, %r33
+%r35 = zext i64 %r34 to i96
+%r37 = getelementptr i32, i32* %r3, i32 2
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i96
+%r40 = shl i96 %r39, 64
+%r41 = or i96 %r35, %r40
+%r42 = zext i96 %r41 to i128
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i128
+%r47 = shl i128 %r46, 96
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r26 to i160
+%r50 = zext i128 %r48 to i160
+%r51 = add i160 %r49, %r50
+%r52 = trunc i160 %r51 to i128
+%r53 = trunc i128 %r52 to i32
+%r55 = getelementptr i32, i32* %r1, i32 0
+store i32 %r53, i32* %r55
+%r56 = lshr i128 %r52, 32
+%r57 = trunc i128 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 1
+store i32 %r57, i32* %r59
+%r60 = lshr i128 %r56, 32
+%r61 = trunc i128 %r60 to i32
+%r63 = getelementptr i32, i32* %r1, i32 2
+store i32 %r61, i32* %r63
+%r64 = lshr i128 %r60, 32
+%r65 = trunc i128 %r64 to i32
+%r67 = getelementptr i32, i32* %r1, i32 3
+store i32 %r65, i32* %r67
+%r68 = load i32, i32* %r4
+%r69 = zext i32 %r68 to i64
+%r71 = getelementptr i32, i32* %r4, i32 1
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i64
+%r74 = shl i64 %r73, 32
+%r75 = or i64 %r69, %r74
+%r76 = zext i64 %r75 to i96
+%r78 = getelementptr i32, i32* %r4, i32 2
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i96
+%r81 = shl i96 %r80, 64
+%r82 = or i96 %r76, %r81
+%r83 = zext i96 %r82 to i128
+%r85 = getelementptr i32, i32* %r4, i32 3
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i128
+%r88 = shl i128 %r87, 96
+%r89 = or i128 %r83, %r88
+%r90 = zext i128 %r89 to i160
+%r91 = sub i160 %r51, %r90
+%r92 = lshr i160 %r91, 128
+%r93 = trunc i160 %r92 to i1
+br i1%r93, label %carry, label %nocarry
+nocarry:
+%r94 = trunc i160 %r91 to i128
+%r95 = trunc i128 %r94 to i32
+%r97 = getelementptr i32, i32* %r1, i32 0
+store i32 %r95, i32* %r97
+%r98 = lshr i128 %r94, 32
+%r99 = trunc i128 %r98 to i32
+%r101 = getelementptr i32, i32* %r1, i32 1
+store i32 %r99, i32* %r101
+%r102 = lshr i128 %r98, 32
+%r103 = trunc i128 %r102 to i32
+%r105 = getelementptr i32, i32* %r1, i32 2
+store i32 %r103, i32* %r105
+%r106 = lshr i128 %r102, 32
+%r107 = trunc i128 %r106 to i32
+%r109 = getelementptr i32, i32* %r1, i32 3
+store i32 %r107, i32* %r109
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = load i32, i32* %r3
+%r28 = zext i32 %r27 to i64
+%r30 = getelementptr i32, i32* %r3, i32 1
+%r31 = load i32, i32* %r30
+%r32 = zext i32 %r31 to i64
+%r33 = shl i64 %r32, 32
+%r34 = or i64 %r28, %r33
+%r35 = zext i64 %r34 to i96
+%r37 = getelementptr i32, i32* %r3, i32 2
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i96
+%r40 = shl i96 %r39, 64
+%r41 = or i96 %r35, %r40
+%r42 = zext i96 %r41 to i128
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i128
+%r47 = shl i128 %r46, 96
+%r48 = or i128 %r42, %r47
+%r49 = add i128 %r26, %r48
+%r50 = load i32, i32* %r4
+%r51 = zext i32 %r50 to i64
+%r53 = getelementptr i32, i32* %r4, i32 1
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i64
+%r56 = shl i64 %r55, 32
+%r57 = or i64 %r51, %r56
+%r58 = zext i64 %r57 to i96
+%r60 = getelementptr i32, i32* %r4, i32 2
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i96
+%r63 = shl i96 %r62, 64
+%r64 = or i96 %r58, %r63
+%r65 = zext i96 %r64 to i128
+%r67 = getelementptr i32, i32* %r4, i32 3
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i128
+%r70 = shl i128 %r69, 96
+%r71 = or i128 %r65, %r70
+%r72 = sub i128 %r49, %r71
+%r73 = lshr i128 %r72, 127
+%r74 = trunc i128 %r73 to i1
+%r75 = select i1 %r74, i128 %r49, i128 %r72
+%r76 = trunc i128 %r75 to i32
+%r78 = getelementptr i32, i32* %r1, i32 0
+store i32 %r76, i32* %r78
+%r79 = lshr i128 %r75, 32
+%r80 = trunc i128 %r79 to i32
+%r82 = getelementptr i32, i32* %r1, i32 1
+store i32 %r80, i32* %r82
+%r83 = lshr i128 %r79, 32
+%r84 = trunc i128 %r83 to i32
+%r86 = getelementptr i32, i32* %r1, i32 2
+store i32 %r84, i32* %r86
+%r87 = lshr i128 %r83, 32
+%r88 = trunc i128 %r87 to i32
+%r90 = getelementptr i32, i32* %r1, i32 3
+store i32 %r88, i32* %r90
+ret void
+}
+define void @mcl_fp_sub4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = load i32, i32* %r3
+%r28 = zext i32 %r27 to i64
+%r30 = getelementptr i32, i32* %r3, i32 1
+%r31 = load i32, i32* %r30
+%r32 = zext i32 %r31 to i64
+%r33 = shl i64 %r32, 32
+%r34 = or i64 %r28, %r33
+%r35 = zext i64 %r34 to i96
+%r37 = getelementptr i32, i32* %r3, i32 2
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i96
+%r40 = shl i96 %r39, 64
+%r41 = or i96 %r35, %r40
+%r42 = zext i96 %r41 to i128
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i128
+%r47 = shl i128 %r46, 96
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r26 to i160
+%r50 = zext i128 %r48 to i160
+%r51 = sub i160 %r49, %r50
+%r52 = trunc i160 %r51 to i128
+%r53 = lshr i160 %r51, 128
+%r54 = trunc i160 %r53 to i1
+%r55 = trunc i128 %r52 to i32
+%r57 = getelementptr i32, i32* %r1, i32 0
+store i32 %r55, i32* %r57
+%r58 = lshr i128 %r52, 32
+%r59 = trunc i128 %r58 to i32
+%r61 = getelementptr i32, i32* %r1, i32 1
+store i32 %r59, i32* %r61
+%r62 = lshr i128 %r58, 32
+%r63 = trunc i128 %r62 to i32
+%r65 = getelementptr i32, i32* %r1, i32 2
+store i32 %r63, i32* %r65
+%r66 = lshr i128 %r62, 32
+%r67 = trunc i128 %r66 to i32
+%r69 = getelementptr i32, i32* %r1, i32 3
+store i32 %r67, i32* %r69
+br i1%r54, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r70 = load i32, i32* %r4
+%r71 = zext i32 %r70 to i64
+%r73 = getelementptr i32, i32* %r4, i32 1
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i64
+%r76 = shl i64 %r75, 32
+%r77 = or i64 %r71, %r76
+%r78 = zext i64 %r77 to i96
+%r80 = getelementptr i32, i32* %r4, i32 2
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i96
+%r83 = shl i96 %r82, 64
+%r84 = or i96 %r78, %r83
+%r85 = zext i96 %r84 to i128
+%r87 = getelementptr i32, i32* %r4, i32 3
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i128
+%r90 = shl i128 %r89, 96
+%r91 = or i128 %r85, %r90
+%r92 = add i128 %r52, %r91
+%r93 = trunc i128 %r92 to i32
+%r95 = getelementptr i32, i32* %r1, i32 0
+store i32 %r93, i32* %r95
+%r96 = lshr i128 %r92, 32
+%r97 = trunc i128 %r96 to i32
+%r99 = getelementptr i32, i32* %r1, i32 1
+store i32 %r97, i32* %r99
+%r100 = lshr i128 %r96, 32
+%r101 = trunc i128 %r100 to i32
+%r103 = getelementptr i32, i32* %r1, i32 2
+store i32 %r101, i32* %r103
+%r104 = lshr i128 %r100, 32
+%r105 = trunc i128 %r104 to i32
+%r107 = getelementptr i32, i32* %r1, i32 3
+store i32 %r105, i32* %r107
+ret void
+}
+define void @mcl_fp_subNF4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = load i32, i32* %r3
+%r28 = zext i32 %r27 to i64
+%r30 = getelementptr i32, i32* %r3, i32 1
+%r31 = load i32, i32* %r30
+%r32 = zext i32 %r31 to i64
+%r33 = shl i64 %r32, 32
+%r34 = or i64 %r28, %r33
+%r35 = zext i64 %r34 to i96
+%r37 = getelementptr i32, i32* %r3, i32 2
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i96
+%r40 = shl i96 %r39, 64
+%r41 = or i96 %r35, %r40
+%r42 = zext i96 %r41 to i128
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i128
+%r47 = shl i128 %r46, 96
+%r48 = or i128 %r42, %r47
+%r49 = sub i128 %r26, %r48
+%r50 = lshr i128 %r49, 127
+%r51 = trunc i128 %r50 to i1
+%r52 = load i32, i32* %r4
+%r53 = zext i32 %r52 to i64
+%r55 = getelementptr i32, i32* %r4, i32 1
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i64
+%r58 = shl i64 %r57, 32
+%r59 = or i64 %r53, %r58
+%r60 = zext i64 %r59 to i96
+%r62 = getelementptr i32, i32* %r4, i32 2
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i96
+%r65 = shl i96 %r64, 64
+%r66 = or i96 %r60, %r65
+%r67 = zext i96 %r66 to i128
+%r69 = getelementptr i32, i32* %r4, i32 3
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i128
+%r72 = shl i128 %r71, 96
+%r73 = or i128 %r67, %r72
+%r75 = select i1 %r51, i128 %r73, i128 0
+%r76 = add i128 %r49, %r75
+%r77 = trunc i128 %r76 to i32
+%r79 = getelementptr i32, i32* %r1, i32 0
+store i32 %r77, i32* %r79
+%r80 = lshr i128 %r76, 32
+%r81 = trunc i128 %r80 to i32
+%r83 = getelementptr i32, i32* %r1, i32 1
+store i32 %r81, i32* %r83
+%r84 = lshr i128 %r80, 32
+%r85 = trunc i128 %r84 to i32
+%r87 = getelementptr i32, i32* %r1, i32 2
+store i32 %r85, i32* %r87
+%r88 = lshr i128 %r84, 32
+%r89 = trunc i128 %r88 to i32
+%r91 = getelementptr i32, i32* %r1, i32 3
+store i32 %r89, i32* %r91
+ret void
+}
+define void @mcl_fpDbl_add4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = load i32, i32* %r3
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r3, i32 1
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r3, i32 2
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r3, i32 3
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r79 = getelementptr i32, i32* %r3, i32 4
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r77, %r82
+%r84 = zext i160 %r83 to i192
+%r86 = getelementptr i32, i32* %r3, i32 5
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i192
+%r89 = shl i192 %r88, 160
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i224
+%r93 = getelementptr i32, i32* %r3, i32 6
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i224
+%r96 = shl i224 %r95, 192
+%r97 = or i224 %r91, %r96
+%r98 = zext i224 %r97 to i256
+%r100 = getelementptr i32, i32* %r3, i32 7
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i256
+%r103 = shl i256 %r102, 224
+%r104 = or i256 %r98, %r103
+%r105 = zext i256 %r54 to i288
+%r106 = zext i256 %r104 to i288
+%r107 = add i288 %r105, %r106
+%r108 = trunc i288 %r107 to i128
+%r109 = trunc i128 %r108 to i32
+%r111 = getelementptr i32, i32* %r1, i32 0
+store i32 %r109, i32* %r111
+%r112 = lshr i128 %r108, 32
+%r113 = trunc i128 %r112 to i32
+%r115 = getelementptr i32, i32* %r1, i32 1
+store i32 %r113, i32* %r115
+%r116 = lshr i128 %r112, 32
+%r117 = trunc i128 %r116 to i32
+%r119 = getelementptr i32, i32* %r1, i32 2
+store i32 %r117, i32* %r119
+%r120 = lshr i128 %r116, 32
+%r121 = trunc i128 %r120 to i32
+%r123 = getelementptr i32, i32* %r1, i32 3
+store i32 %r121, i32* %r123
+%r124 = lshr i288 %r107, 128
+%r125 = trunc i288 %r124 to i160
+%r126 = load i32, i32* %r4
+%r127 = zext i32 %r126 to i64
+%r129 = getelementptr i32, i32* %r4, i32 1
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i64
+%r132 = shl i64 %r131, 32
+%r133 = or i64 %r127, %r132
+%r134 = zext i64 %r133 to i96
+%r136 = getelementptr i32, i32* %r4, i32 2
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i96
+%r139 = shl i96 %r138, 64
+%r140 = or i96 %r134, %r139
+%r141 = zext i96 %r140 to i128
+%r143 = getelementptr i32, i32* %r4, i32 3
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i128
+%r146 = shl i128 %r145, 96
+%r147 = or i128 %r141, %r146
+%r148 = zext i128 %r147 to i160
+%r149 = sub i160 %r125, %r148
+%r150 = lshr i160 %r149, 128
+%r151 = trunc i160 %r150 to i1
+%r152 = select i1 %r151, i160 %r125, i160 %r149
+%r153 = trunc i160 %r152 to i128
+%r155 = getelementptr i32, i32* %r1, i32 4
+%r156 = trunc i128 %r153 to i32
+%r158 = getelementptr i32, i32* %r155, i32 0
+store i32 %r156, i32* %r158
+%r159 = lshr i128 %r153, 32
+%r160 = trunc i128 %r159 to i32
+%r162 = getelementptr i32, i32* %r155, i32 1
+store i32 %r160, i32* %r162
+%r163 = lshr i128 %r159, 32
+%r164 = trunc i128 %r163 to i32
+%r166 = getelementptr i32, i32* %r155, i32 2
+store i32 %r164, i32* %r166
+%r167 = lshr i128 %r163, 32
+%r168 = trunc i128 %r167 to i32
+%r170 = getelementptr i32, i32* %r155, i32 3
+store i32 %r168, i32* %r170
+ret void
+}
+define void @mcl_fpDbl_sub4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = load i32, i32* %r3
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r3, i32 1
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r3, i32 2
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r3, i32 3
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r79 = getelementptr i32, i32* %r3, i32 4
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r77, %r82
+%r84 = zext i160 %r83 to i192
+%r86 = getelementptr i32, i32* %r3, i32 5
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i192
+%r89 = shl i192 %r88, 160
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i224
+%r93 = getelementptr i32, i32* %r3, i32 6
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i224
+%r96 = shl i224 %r95, 192
+%r97 = or i224 %r91, %r96
+%r98 = zext i224 %r97 to i256
+%r100 = getelementptr i32, i32* %r3, i32 7
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i256
+%r103 = shl i256 %r102, 224
+%r104 = or i256 %r98, %r103
+%r105 = zext i256 %r54 to i288
+%r106 = zext i256 %r104 to i288
+%r107 = sub i288 %r105, %r106
+%r108 = trunc i288 %r107 to i128
+%r109 = trunc i128 %r108 to i32
+%r111 = getelementptr i32, i32* %r1, i32 0
+store i32 %r109, i32* %r111
+%r112 = lshr i128 %r108, 32
+%r113 = trunc i128 %r112 to i32
+%r115 = getelementptr i32, i32* %r1, i32 1
+store i32 %r113, i32* %r115
+%r116 = lshr i128 %r112, 32
+%r117 = trunc i128 %r116 to i32
+%r119 = getelementptr i32, i32* %r1, i32 2
+store i32 %r117, i32* %r119
+%r120 = lshr i128 %r116, 32
+%r121 = trunc i128 %r120 to i32
+%r123 = getelementptr i32, i32* %r1, i32 3
+store i32 %r121, i32* %r123
+%r124 = lshr i288 %r107, 128
+%r125 = trunc i288 %r124 to i128
+%r126 = lshr i288 %r107, 256
+%r127 = trunc i288 %r126 to i1
+%r128 = load i32, i32* %r4
+%r129 = zext i32 %r128 to i64
+%r131 = getelementptr i32, i32* %r4, i32 1
+%r132 = load i32, i32* %r131
+%r133 = zext i32 %r132 to i64
+%r134 = shl i64 %r133, 32
+%r135 = or i64 %r129, %r134
+%r136 = zext i64 %r135 to i96
+%r138 = getelementptr i32, i32* %r4, i32 2
+%r139 = load i32, i32* %r138
+%r140 = zext i32 %r139 to i96
+%r141 = shl i96 %r140, 64
+%r142 = or i96 %r136, %r141
+%r143 = zext i96 %r142 to i128
+%r145 = getelementptr i32, i32* %r4, i32 3
+%r146 = load i32, i32* %r145
+%r147 = zext i32 %r146 to i128
+%r148 = shl i128 %r147, 96
+%r149 = or i128 %r143, %r148
+%r151 = select i1 %r127, i128 %r149, i128 0
+%r152 = add i128 %r125, %r151
+%r154 = getelementptr i32, i32* %r1, i32 4
+%r155 = trunc i128 %r152 to i32
+%r157 = getelementptr i32, i32* %r154, i32 0
+store i32 %r155, i32* %r157
+%r158 = lshr i128 %r152, 32
+%r159 = trunc i128 %r158 to i32
+%r161 = getelementptr i32, i32* %r154, i32 1
+store i32 %r159, i32* %r161
+%r162 = lshr i128 %r158, 32
+%r163 = trunc i128 %r162 to i32
+%r165 = getelementptr i32, i32* %r154, i32 2
+store i32 %r163, i32* %r165
+%r166 = lshr i128 %r162, 32
+%r167 = trunc i128 %r166 to i32
+%r169 = getelementptr i32, i32* %r154, i32 3
+store i32 %r167, i32* %r169
+ret void
+}
+define i192 @mulPv160x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r24 = zext i32 %r6 to i64
+%r25 = zext i32 %r10 to i64
+%r26 = shl i64 %r25, 32
+%r27 = or i64 %r24, %r26
+%r28 = zext i64 %r27 to i96
+%r29 = zext i32 %r14 to i96
+%r30 = shl i96 %r29, 64
+%r31 = or i96 %r28, %r30
+%r32 = zext i96 %r31 to i128
+%r33 = zext i32 %r18 to i128
+%r34 = shl i128 %r33, 96
+%r35 = or i128 %r32, %r34
+%r36 = zext i128 %r35 to i160
+%r37 = zext i32 %r22 to i160
+%r38 = shl i160 %r37, 128
+%r39 = or i160 %r36, %r38
+%r40 = zext i32 %r7 to i64
+%r41 = zext i32 %r11 to i64
+%r42 = shl i64 %r41, 32
+%r43 = or i64 %r40, %r42
+%r44 = zext i64 %r43 to i96
+%r45 = zext i32 %r15 to i96
+%r46 = shl i96 %r45, 64
+%r47 = or i96 %r44, %r46
+%r48 = zext i96 %r47 to i128
+%r49 = zext i32 %r19 to i128
+%r50 = shl i128 %r49, 96
+%r51 = or i128 %r48, %r50
+%r52 = zext i128 %r51 to i160
+%r53 = zext i32 %r23 to i160
+%r54 = shl i160 %r53, 128
+%r55 = or i160 %r52, %r54
+%r56 = zext i160 %r39 to i192
+%r57 = zext i160 %r55 to i192
+%r58 = shl i192 %r57, 32
+%r59 = add i192 %r56, %r58
+ret i192 %r59
+}
+define void @mcl_fp_mulUnitPre5L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i192 @mulPv160x32(i32* %r2, i32 %r3)
+%r5 = trunc i192 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i192 %r4, 32
+%r9 = trunc i192 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i192 %r8, 32
+%r13 = trunc i192 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i192 %r12, 32
+%r17 = trunc i192 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i192 %r16, 32
+%r21 = trunc i192 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i192 %r20, 32
+%r25 = trunc i192 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+ret void
+}
+define void @mcl_fpDbl_mulPre5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i192 @mulPv160x32(i32* %r2, i32 %r4)
+%r6 = trunc i192 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i192 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i192 @mulPv160x32(i32* %r2, i32 %r10)
+%r12 = add i192 %r7, %r11
+%r13 = trunc i192 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+%r16 = lshr i192 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i192 @mulPv160x32(i32* %r2, i32 %r19)
+%r21 = add i192 %r16, %r20
+%r22 = trunc i192 %r21 to i32
+%r24 = getelementptr i32, i32* %r1, i32 2
+store i32 %r22, i32* %r24
+%r25 = lshr i192 %r21, 32
+%r27 = getelementptr i32, i32* %r3, i32 3
+%r28 = load i32, i32* %r27
+%r29 = call i192 @mulPv160x32(i32* %r2, i32 %r28)
+%r30 = add i192 %r25, %r29
+%r31 = trunc i192 %r30 to i32
+%r33 = getelementptr i32, i32* %r1, i32 3
+store i32 %r31, i32* %r33
+%r34 = lshr i192 %r30, 32
+%r36 = getelementptr i32, i32* %r3, i32 4
+%r37 = load i32, i32* %r36
+%r38 = call i192 @mulPv160x32(i32* %r2, i32 %r37)
+%r39 = add i192 %r34, %r38
+%r41 = getelementptr i32, i32* %r1, i32 4
+%r42 = trunc i192 %r39 to i32
+%r44 = getelementptr i32, i32* %r41, i32 0
+store i32 %r42, i32* %r44
+%r45 = lshr i192 %r39, 32
+%r46 = trunc i192 %r45 to i32
+%r48 = getelementptr i32, i32* %r41, i32 1
+store i32 %r46, i32* %r48
+%r49 = lshr i192 %r45, 32
+%r50 = trunc i192 %r49 to i32
+%r52 = getelementptr i32, i32* %r41, i32 2
+store i32 %r50, i32* %r52
+%r53 = lshr i192 %r49, 32
+%r54 = trunc i192 %r53 to i32
+%r56 = getelementptr i32, i32* %r41, i32 3
+store i32 %r54, i32* %r56
+%r57 = lshr i192 %r53, 32
+%r58 = trunc i192 %r57 to i32
+%r60 = getelementptr i32, i32* %r41, i32 4
+store i32 %r58, i32* %r60
+%r61 = lshr i192 %r57, 32
+%r62 = trunc i192 %r61 to i32
+%r64 = getelementptr i32, i32* %r41, i32 5
+store i32 %r62, i32* %r64
+ret void
+}
+define void @mcl_fpDbl_sqrPre5L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i192 @mulPv160x32(i32* %r2, i32 %r3)
+%r5 = trunc i192 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i192 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i192 @mulPv160x32(i32* %r2, i32 %r9)
+%r11 = add i192 %r6, %r10
+%r12 = trunc i192 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+%r15 = lshr i192 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i192 @mulPv160x32(i32* %r2, i32 %r18)
+%r20 = add i192 %r15, %r19
+%r21 = trunc i192 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 2
+store i32 %r21, i32* %r23
+%r24 = lshr i192 %r20, 32
+%r26 = getelementptr i32, i32* %r2, i32 3
+%r27 = load i32, i32* %r26
+%r28 = call i192 @mulPv160x32(i32* %r2, i32 %r27)
+%r29 = add i192 %r24, %r28
+%r30 = trunc i192 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 3
+store i32 %r30, i32* %r32
+%r33 = lshr i192 %r29, 32
+%r35 = getelementptr i32, i32* %r2, i32 4
+%r36 = load i32, i32* %r35
+%r37 = call i192 @mulPv160x32(i32* %r2, i32 %r36)
+%r38 = add i192 %r33, %r37
+%r40 = getelementptr i32, i32* %r1, i32 4
+%r41 = trunc i192 %r38 to i32
+%r43 = getelementptr i32, i32* %r40, i32 0
+store i32 %r41, i32* %r43
+%r44 = lshr i192 %r38, 32
+%r45 = trunc i192 %r44 to i32
+%r47 = getelementptr i32, i32* %r40, i32 1
+store i32 %r45, i32* %r47
+%r48 = lshr i192 %r44, 32
+%r49 = trunc i192 %r48 to i32
+%r51 = getelementptr i32, i32* %r40, i32 2
+store i32 %r49, i32* %r51
+%r52 = lshr i192 %r48, 32
+%r53 = trunc i192 %r52 to i32
+%r55 = getelementptr i32, i32* %r40, i32 3
+store i32 %r53, i32* %r55
+%r56 = lshr i192 %r52, 32
+%r57 = trunc i192 %r56 to i32
+%r59 = getelementptr i32, i32* %r40, i32 4
+store i32 %r57, i32* %r59
+%r60 = lshr i192 %r56, 32
+%r61 = trunc i192 %r60 to i32
+%r63 = getelementptr i32, i32* %r40, i32 5
+store i32 %r61, i32* %r63
+ret void
+}
+define void @mcl_fp_mont5L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i192 @mulPv160x32(i32* %r2, i32 %r10)
+%r12 = zext i192 %r11 to i224
+%r13 = trunc i192 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i192 @mulPv160x32(i32* %r4, i32 %r14)
+%r16 = zext i192 %r15 to i224
+%r17 = add i224 %r12, %r16
+%r18 = lshr i224 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i192 @mulPv160x32(i32* %r2, i32 %r21)
+%r23 = zext i192 %r22 to i224
+%r24 = add i224 %r18, %r23
+%r25 = trunc i224 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i192 @mulPv160x32(i32* %r4, i32 %r26)
+%r28 = zext i192 %r27 to i224
+%r29 = add i224 %r24, %r28
+%r30 = lshr i224 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i192 @mulPv160x32(i32* %r2, i32 %r33)
+%r35 = zext i192 %r34 to i224
+%r36 = add i224 %r30, %r35
+%r37 = trunc i224 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i192 @mulPv160x32(i32* %r4, i32 %r38)
+%r40 = zext i192 %r39 to i224
+%r41 = add i224 %r36, %r40
+%r42 = lshr i224 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i192 @mulPv160x32(i32* %r2, i32 %r45)
+%r47 = zext i192 %r46 to i224
+%r48 = add i224 %r42, %r47
+%r49 = trunc i224 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i192 @mulPv160x32(i32* %r4, i32 %r50)
+%r52 = zext i192 %r51 to i224
+%r53 = add i224 %r48, %r52
+%r54 = lshr i224 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i192 @mulPv160x32(i32* %r2, i32 %r57)
+%r59 = zext i192 %r58 to i224
+%r60 = add i224 %r54, %r59
+%r61 = trunc i224 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i192 @mulPv160x32(i32* %r4, i32 %r62)
+%r64 = zext i192 %r63 to i224
+%r65 = add i224 %r60, %r64
+%r66 = lshr i224 %r65, 32
+%r67 = trunc i224 %r66 to i192
+%r68 = load i32, i32* %r4
+%r69 = zext i32 %r68 to i64
+%r71 = getelementptr i32, i32* %r4, i32 1
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i64
+%r74 = shl i64 %r73, 32
+%r75 = or i64 %r69, %r74
+%r76 = zext i64 %r75 to i96
+%r78 = getelementptr i32, i32* %r4, i32 2
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i96
+%r81 = shl i96 %r80, 64
+%r82 = or i96 %r76, %r81
+%r83 = zext i96 %r82 to i128
+%r85 = getelementptr i32, i32* %r4, i32 3
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i128
+%r88 = shl i128 %r87, 96
+%r89 = or i128 %r83, %r88
+%r90 = zext i128 %r89 to i160
+%r92 = getelementptr i32, i32* %r4, i32 4
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i160
+%r95 = shl i160 %r94, 128
+%r96 = or i160 %r90, %r95
+%r97 = zext i160 %r96 to i192
+%r98 = sub i192 %r67, %r97
+%r99 = lshr i192 %r98, 160
+%r100 = trunc i192 %r99 to i1
+%r101 = select i1 %r100, i192 %r67, i192 %r98
+%r102 = trunc i192 %r101 to i160
+%r103 = trunc i160 %r102 to i32
+%r105 = getelementptr i32, i32* %r1, i32 0
+store i32 %r103, i32* %r105
+%r106 = lshr i160 %r102, 32
+%r107 = trunc i160 %r106 to i32
+%r109 = getelementptr i32, i32* %r1, i32 1
+store i32 %r107, i32* %r109
+%r110 = lshr i160 %r106, 32
+%r111 = trunc i160 %r110 to i32
+%r113 = getelementptr i32, i32* %r1, i32 2
+store i32 %r111, i32* %r113
+%r114 = lshr i160 %r110, 32
+%r115 = trunc i160 %r114 to i32
+%r117 = getelementptr i32, i32* %r1, i32 3
+store i32 %r115, i32* %r117
+%r118 = lshr i160 %r114, 32
+%r119 = trunc i160 %r118 to i32
+%r121 = getelementptr i32, i32* %r1, i32 4
+store i32 %r119, i32* %r121
+ret void
+}
+define void @mcl_fp_montNF5L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i192 @mulPv160x32(i32* %r2, i32 %r8)
+%r10 = trunc i192 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i192 @mulPv160x32(i32* %r4, i32 %r11)
+%r13 = add i192 %r9, %r12
+%r14 = lshr i192 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i192 @mulPv160x32(i32* %r2, i32 %r17)
+%r19 = add i192 %r14, %r18
+%r20 = trunc i192 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i192 @mulPv160x32(i32* %r4, i32 %r21)
+%r23 = add i192 %r19, %r22
+%r24 = lshr i192 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i192 @mulPv160x32(i32* %r2, i32 %r27)
+%r29 = add i192 %r24, %r28
+%r30 = trunc i192 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i192 @mulPv160x32(i32* %r4, i32 %r31)
+%r33 = add i192 %r29, %r32
+%r34 = lshr i192 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i192 @mulPv160x32(i32* %r2, i32 %r37)
+%r39 = add i192 %r34, %r38
+%r40 = trunc i192 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i192 @mulPv160x32(i32* %r4, i32 %r41)
+%r43 = add i192 %r39, %r42
+%r44 = lshr i192 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i192 @mulPv160x32(i32* %r2, i32 %r47)
+%r49 = add i192 %r44, %r48
+%r50 = trunc i192 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i192 @mulPv160x32(i32* %r4, i32 %r51)
+%r53 = add i192 %r49, %r52
+%r54 = lshr i192 %r53, 32
+%r55 = trunc i192 %r54 to i160
+%r56 = load i32, i32* %r4
+%r57 = zext i32 %r56 to i64
+%r59 = getelementptr i32, i32* %r4, i32 1
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i64
+%r62 = shl i64 %r61, 32
+%r63 = or i64 %r57, %r62
+%r64 = zext i64 %r63 to i96
+%r66 = getelementptr i32, i32* %r4, i32 2
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i96
+%r69 = shl i96 %r68, 64
+%r70 = or i96 %r64, %r69
+%r71 = zext i96 %r70 to i128
+%r73 = getelementptr i32, i32* %r4, i32 3
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i128
+%r76 = shl i128 %r75, 96
+%r77 = or i128 %r71, %r76
+%r78 = zext i128 %r77 to i160
+%r80 = getelementptr i32, i32* %r4, i32 4
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i160
+%r83 = shl i160 %r82, 128
+%r84 = or i160 %r78, %r83
+%r85 = sub i160 %r55, %r84
+%r86 = lshr i160 %r85, 159
+%r87 = trunc i160 %r86 to i1
+%r88 = select i1 %r87, i160 %r55, i160 %r85
+%r89 = trunc i160 %r88 to i32
+%r91 = getelementptr i32, i32* %r1, i32 0
+store i32 %r89, i32* %r91
+%r92 = lshr i160 %r88, 32
+%r93 = trunc i160 %r92 to i32
+%r95 = getelementptr i32, i32* %r1, i32 1
+store i32 %r93, i32* %r95
+%r96 = lshr i160 %r92, 32
+%r97 = trunc i160 %r96 to i32
+%r99 = getelementptr i32, i32* %r1, i32 2
+store i32 %r97, i32* %r99
+%r100 = lshr i160 %r96, 32
+%r101 = trunc i160 %r100 to i32
+%r103 = getelementptr i32, i32* %r1, i32 3
+store i32 %r101, i32* %r103
+%r104 = lshr i160 %r100, 32
+%r105 = trunc i160 %r104 to i32
+%r107 = getelementptr i32, i32* %r1, i32 4
+store i32 %r105, i32* %r107
+ret void
+}
+define void @mcl_fp_montRed5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = load i32, i32* %r2
+%r37 = zext i32 %r36 to i64
+%r39 = getelementptr i32, i32* %r2, i32 1
+%r40 = load i32, i32* %r39
+%r41 = zext i32 %r40 to i64
+%r42 = shl i64 %r41, 32
+%r43 = or i64 %r37, %r42
+%r44 = zext i64 %r43 to i96
+%r46 = getelementptr i32, i32* %r2, i32 2
+%r47 = load i32, i32* %r46
+%r48 = zext i32 %r47 to i96
+%r49 = shl i96 %r48, 64
+%r50 = or i96 %r44, %r49
+%r51 = zext i96 %r50 to i128
+%r53 = getelementptr i32, i32* %r2, i32 3
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i128
+%r56 = shl i128 %r55, 96
+%r57 = or i128 %r51, %r56
+%r58 = zext i128 %r57 to i160
+%r60 = getelementptr i32, i32* %r2, i32 4
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i160
+%r63 = shl i160 %r62, 128
+%r64 = or i160 %r58, %r63
+%r65 = zext i160 %r64 to i192
+%r67 = getelementptr i32, i32* %r2, i32 5
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i192
+%r70 = shl i192 %r69, 160
+%r71 = or i192 %r65, %r70
+%r72 = zext i192 %r71 to i224
+%r74 = getelementptr i32, i32* %r2, i32 6
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i224
+%r77 = shl i224 %r76, 192
+%r78 = or i224 %r72, %r77
+%r79 = zext i224 %r78 to i256
+%r81 = getelementptr i32, i32* %r2, i32 7
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i256
+%r84 = shl i256 %r83, 224
+%r85 = or i256 %r79, %r84
+%r86 = zext i256 %r85 to i288
+%r88 = getelementptr i32, i32* %r2, i32 8
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i288
+%r91 = shl i288 %r90, 256
+%r92 = or i288 %r86, %r91
+%r93 = zext i288 %r92 to i320
+%r95 = getelementptr i32, i32* %r2, i32 9
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i320
+%r98 = shl i320 %r97, 288
+%r99 = or i320 %r93, %r98
+%r100 = zext i320 %r99 to i352
+%r101 = trunc i352 %r100 to i32
+%r102 = mul i32 %r101, %r6
+%r103 = call i192 @mulPv160x32(i32* %r3, i32 %r102)
+%r104 = zext i192 %r103 to i352
+%r105 = add i352 %r100, %r104
+%r106 = lshr i352 %r105, 32
+%r107 = trunc i352 %r106 to i320
+%r108 = trunc i320 %r107 to i32
+%r109 = mul i32 %r108, %r6
+%r110 = call i192 @mulPv160x32(i32* %r3, i32 %r109)
+%r111 = zext i192 %r110 to i320
+%r112 = add i320 %r107, %r111
+%r113 = lshr i320 %r112, 32
+%r114 = trunc i320 %r113 to i288
+%r115 = trunc i288 %r114 to i32
+%r116 = mul i32 %r115, %r6
+%r117 = call i192 @mulPv160x32(i32* %r3, i32 %r116)
+%r118 = zext i192 %r117 to i288
+%r119 = add i288 %r114, %r118
+%r120 = lshr i288 %r119, 32
+%r121 = trunc i288 %r120 to i256
+%r122 = trunc i256 %r121 to i32
+%r123 = mul i32 %r122, %r6
+%r124 = call i192 @mulPv160x32(i32* %r3, i32 %r123)
+%r125 = zext i192 %r124 to i256
+%r126 = add i256 %r121, %r125
+%r127 = lshr i256 %r126, 32
+%r128 = trunc i256 %r127 to i224
+%r129 = trunc i224 %r128 to i32
+%r130 = mul i32 %r129, %r6
+%r131 = call i192 @mulPv160x32(i32* %r3, i32 %r130)
+%r132 = zext i192 %r131 to i224
+%r133 = add i224 %r128, %r132
+%r134 = lshr i224 %r133, 32
+%r135 = trunc i224 %r134 to i192
+%r136 = zext i160 %r35 to i192
+%r137 = sub i192 %r135, %r136
+%r138 = lshr i192 %r137, 160
+%r139 = trunc i192 %r138 to i1
+%r140 = select i1 %r139, i192 %r135, i192 %r137
+%r141 = trunc i192 %r140 to i160
+%r142 = trunc i160 %r141 to i32
+%r144 = getelementptr i32, i32* %r1, i32 0
+store i32 %r142, i32* %r144
+%r145 = lshr i160 %r141, 32
+%r146 = trunc i160 %r145 to i32
+%r148 = getelementptr i32, i32* %r1, i32 1
+store i32 %r146, i32* %r148
+%r149 = lshr i160 %r145, 32
+%r150 = trunc i160 %r149 to i32
+%r152 = getelementptr i32, i32* %r1, i32 2
+store i32 %r150, i32* %r152
+%r153 = lshr i160 %r149, 32
+%r154 = trunc i160 %r153 to i32
+%r156 = getelementptr i32, i32* %r1, i32 3
+store i32 %r154, i32* %r156
+%r157 = lshr i160 %r153, 32
+%r158 = trunc i160 %r157 to i32
+%r160 = getelementptr i32, i32* %r1, i32 4
+store i32 %r158, i32* %r160
+ret void
+}
+define i32 @mcl_fp_addPre5L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r35 = load i32, i32* %r4
+%r36 = zext i32 %r35 to i64
+%r38 = getelementptr i32, i32* %r4, i32 1
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i64
+%r41 = shl i64 %r40, 32
+%r42 = or i64 %r36, %r41
+%r43 = zext i64 %r42 to i96
+%r45 = getelementptr i32, i32* %r4, i32 2
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i96
+%r48 = shl i96 %r47, 64
+%r49 = or i96 %r43, %r48
+%r50 = zext i96 %r49 to i128
+%r52 = getelementptr i32, i32* %r4, i32 3
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i128
+%r55 = shl i128 %r54, 96
+%r56 = or i128 %r50, %r55
+%r57 = zext i128 %r56 to i160
+%r59 = getelementptr i32, i32* %r4, i32 4
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i160
+%r62 = shl i160 %r61, 128
+%r63 = or i160 %r57, %r62
+%r64 = zext i160 %r63 to i192
+%r65 = add i192 %r34, %r64
+%r66 = trunc i192 %r65 to i160
+%r67 = trunc i160 %r66 to i32
+%r69 = getelementptr i32, i32* %r2, i32 0
+store i32 %r67, i32* %r69
+%r70 = lshr i160 %r66, 32
+%r71 = trunc i160 %r70 to i32
+%r73 = getelementptr i32, i32* %r2, i32 1
+store i32 %r71, i32* %r73
+%r74 = lshr i160 %r70, 32
+%r75 = trunc i160 %r74 to i32
+%r77 = getelementptr i32, i32* %r2, i32 2
+store i32 %r75, i32* %r77
+%r78 = lshr i160 %r74, 32
+%r79 = trunc i160 %r78 to i32
+%r81 = getelementptr i32, i32* %r2, i32 3
+store i32 %r79, i32* %r81
+%r82 = lshr i160 %r78, 32
+%r83 = trunc i160 %r82 to i32
+%r85 = getelementptr i32, i32* %r2, i32 4
+store i32 %r83, i32* %r85
+%r86 = lshr i192 %r65, 160
+%r87 = trunc i192 %r86 to i32
+ret i32 %r87
+}
+define i32 @mcl_fp_subPre5L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r35 = load i32, i32* %r4
+%r36 = zext i32 %r35 to i64
+%r38 = getelementptr i32, i32* %r4, i32 1
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i64
+%r41 = shl i64 %r40, 32
+%r42 = or i64 %r36, %r41
+%r43 = zext i64 %r42 to i96
+%r45 = getelementptr i32, i32* %r4, i32 2
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i96
+%r48 = shl i96 %r47, 64
+%r49 = or i96 %r43, %r48
+%r50 = zext i96 %r49 to i128
+%r52 = getelementptr i32, i32* %r4, i32 3
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i128
+%r55 = shl i128 %r54, 96
+%r56 = or i128 %r50, %r55
+%r57 = zext i128 %r56 to i160
+%r59 = getelementptr i32, i32* %r4, i32 4
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i160
+%r62 = shl i160 %r61, 128
+%r63 = or i160 %r57, %r62
+%r64 = zext i160 %r63 to i192
+%r65 = sub i192 %r34, %r64
+%r66 = trunc i192 %r65 to i160
+%r67 = trunc i160 %r66 to i32
+%r69 = getelementptr i32, i32* %r2, i32 0
+store i32 %r67, i32* %r69
+%r70 = lshr i160 %r66, 32
+%r71 = trunc i160 %r70 to i32
+%r73 = getelementptr i32, i32* %r2, i32 1
+store i32 %r71, i32* %r73
+%r74 = lshr i160 %r70, 32
+%r75 = trunc i160 %r74 to i32
+%r77 = getelementptr i32, i32* %r2, i32 2
+store i32 %r75, i32* %r77
+%r78 = lshr i160 %r74, 32
+%r79 = trunc i160 %r78 to i32
+%r81 = getelementptr i32, i32* %r2, i32 3
+store i32 %r79, i32* %r81
+%r82 = lshr i160 %r78, 32
+%r83 = trunc i160 %r82 to i32
+%r85 = getelementptr i32, i32* %r2, i32 4
+store i32 %r83, i32* %r85
+%r86 = lshr i192 %r65, 160
+%r87 = trunc i192 %r86 to i32
+%r89 = and i32 %r87, 1
+ret i32 %r89
+}
+define void @mcl_fp_shr1_5L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = lshr i160 %r31, 1
+%r33 = trunc i160 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 0
+store i32 %r33, i32* %r35
+%r36 = lshr i160 %r32, 32
+%r37 = trunc i160 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 1
+store i32 %r37, i32* %r39
+%r40 = lshr i160 %r36, 32
+%r41 = trunc i160 %r40 to i32
+%r43 = getelementptr i32, i32* %r1, i32 2
+store i32 %r41, i32* %r43
+%r44 = lshr i160 %r40, 32
+%r45 = trunc i160 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 3
+store i32 %r45, i32* %r47
+%r48 = lshr i160 %r44, 32
+%r49 = trunc i160 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 4
+store i32 %r49, i32* %r51
+ret void
+}
+define void @mcl_fp_add5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = load i32, i32* %r3
+%r35 = zext i32 %r34 to i64
+%r37 = getelementptr i32, i32* %r3, i32 1
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i64
+%r40 = shl i64 %r39, 32
+%r41 = or i64 %r35, %r40
+%r42 = zext i64 %r41 to i96
+%r44 = getelementptr i32, i32* %r3, i32 2
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i96
+%r47 = shl i96 %r46, 64
+%r48 = or i96 %r42, %r47
+%r49 = zext i96 %r48 to i128
+%r51 = getelementptr i32, i32* %r3, i32 3
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i128
+%r54 = shl i128 %r53, 96
+%r55 = or i128 %r49, %r54
+%r56 = zext i128 %r55 to i160
+%r58 = getelementptr i32, i32* %r3, i32 4
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i160
+%r61 = shl i160 %r60, 128
+%r62 = or i160 %r56, %r61
+%r63 = zext i160 %r33 to i192
+%r64 = zext i160 %r62 to i192
+%r65 = add i192 %r63, %r64
+%r66 = trunc i192 %r65 to i160
+%r67 = trunc i160 %r66 to i32
+%r69 = getelementptr i32, i32* %r1, i32 0
+store i32 %r67, i32* %r69
+%r70 = lshr i160 %r66, 32
+%r71 = trunc i160 %r70 to i32
+%r73 = getelementptr i32, i32* %r1, i32 1
+store i32 %r71, i32* %r73
+%r74 = lshr i160 %r70, 32
+%r75 = trunc i160 %r74 to i32
+%r77 = getelementptr i32, i32* %r1, i32 2
+store i32 %r75, i32* %r77
+%r78 = lshr i160 %r74, 32
+%r79 = trunc i160 %r78 to i32
+%r81 = getelementptr i32, i32* %r1, i32 3
+store i32 %r79, i32* %r81
+%r82 = lshr i160 %r78, 32
+%r83 = trunc i160 %r82 to i32
+%r85 = getelementptr i32, i32* %r1, i32 4
+store i32 %r83, i32* %r85
+%r86 = load i32, i32* %r4
+%r87 = zext i32 %r86 to i64
+%r89 = getelementptr i32, i32* %r4, i32 1
+%r90 = load i32, i32* %r89
+%r91 = zext i32 %r90 to i64
+%r92 = shl i64 %r91, 32
+%r93 = or i64 %r87, %r92
+%r94 = zext i64 %r93 to i96
+%r96 = getelementptr i32, i32* %r4, i32 2
+%r97 = load i32, i32* %r96
+%r98 = zext i32 %r97 to i96
+%r99 = shl i96 %r98, 64
+%r100 = or i96 %r94, %r99
+%r101 = zext i96 %r100 to i128
+%r103 = getelementptr i32, i32* %r4, i32 3
+%r104 = load i32, i32* %r103
+%r105 = zext i32 %r104 to i128
+%r106 = shl i128 %r105, 96
+%r107 = or i128 %r101, %r106
+%r108 = zext i128 %r107 to i160
+%r110 = getelementptr i32, i32* %r4, i32 4
+%r111 = load i32, i32* %r110
+%r112 = zext i32 %r111 to i160
+%r113 = shl i160 %r112, 128
+%r114 = or i160 %r108, %r113
+%r115 = zext i160 %r114 to i192
+%r116 = sub i192 %r65, %r115
+%r117 = lshr i192 %r116, 160
+%r118 = trunc i192 %r117 to i1
+br i1%r118, label %carry, label %nocarry
+nocarry:
+%r119 = trunc i192 %r116 to i160
+%r120 = trunc i160 %r119 to i32
+%r122 = getelementptr i32, i32* %r1, i32 0
+store i32 %r120, i32* %r122
+%r123 = lshr i160 %r119, 32
+%r124 = trunc i160 %r123 to i32
+%r126 = getelementptr i32, i32* %r1, i32 1
+store i32 %r124, i32* %r126
+%r127 = lshr i160 %r123, 32
+%r128 = trunc i160 %r127 to i32
+%r130 = getelementptr i32, i32* %r1, i32 2
+store i32 %r128, i32* %r130
+%r131 = lshr i160 %r127, 32
+%r132 = trunc i160 %r131 to i32
+%r134 = getelementptr i32, i32* %r1, i32 3
+store i32 %r132, i32* %r134
+%r135 = lshr i160 %r131, 32
+%r136 = trunc i160 %r135 to i32
+%r138 = getelementptr i32, i32* %r1, i32 4
+store i32 %r136, i32* %r138
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = load i32, i32* %r3
+%r35 = zext i32 %r34 to i64
+%r37 = getelementptr i32, i32* %r3, i32 1
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i64
+%r40 = shl i64 %r39, 32
+%r41 = or i64 %r35, %r40
+%r42 = zext i64 %r41 to i96
+%r44 = getelementptr i32, i32* %r3, i32 2
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i96
+%r47 = shl i96 %r46, 64
+%r48 = or i96 %r42, %r47
+%r49 = zext i96 %r48 to i128
+%r51 = getelementptr i32, i32* %r3, i32 3
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i128
+%r54 = shl i128 %r53, 96
+%r55 = or i128 %r49, %r54
+%r56 = zext i128 %r55 to i160
+%r58 = getelementptr i32, i32* %r3, i32 4
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i160
+%r61 = shl i160 %r60, 128
+%r62 = or i160 %r56, %r61
+%r63 = add i160 %r33, %r62
+%r64 = load i32, i32* %r4
+%r65 = zext i32 %r64 to i64
+%r67 = getelementptr i32, i32* %r4, i32 1
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i64
+%r70 = shl i64 %r69, 32
+%r71 = or i64 %r65, %r70
+%r72 = zext i64 %r71 to i96
+%r74 = getelementptr i32, i32* %r4, i32 2
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i96
+%r77 = shl i96 %r76, 64
+%r78 = or i96 %r72, %r77
+%r79 = zext i96 %r78 to i128
+%r81 = getelementptr i32, i32* %r4, i32 3
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i128
+%r84 = shl i128 %r83, 96
+%r85 = or i128 %r79, %r84
+%r86 = zext i128 %r85 to i160
+%r88 = getelementptr i32, i32* %r4, i32 4
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i160
+%r91 = shl i160 %r90, 128
+%r92 = or i160 %r86, %r91
+%r93 = sub i160 %r63, %r92
+%r94 = lshr i160 %r93, 159
+%r95 = trunc i160 %r94 to i1
+%r96 = select i1 %r95, i160 %r63, i160 %r93
+%r97 = trunc i160 %r96 to i32
+%r99 = getelementptr i32, i32* %r1, i32 0
+store i32 %r97, i32* %r99
+%r100 = lshr i160 %r96, 32
+%r101 = trunc i160 %r100 to i32
+%r103 = getelementptr i32, i32* %r1, i32 1
+store i32 %r101, i32* %r103
+%r104 = lshr i160 %r100, 32
+%r105 = trunc i160 %r104 to i32
+%r107 = getelementptr i32, i32* %r1, i32 2
+store i32 %r105, i32* %r107
+%r108 = lshr i160 %r104, 32
+%r109 = trunc i160 %r108 to i32
+%r111 = getelementptr i32, i32* %r1, i32 3
+store i32 %r109, i32* %r111
+%r112 = lshr i160 %r108, 32
+%r113 = trunc i160 %r112 to i32
+%r115 = getelementptr i32, i32* %r1, i32 4
+store i32 %r113, i32* %r115
+ret void
+}
+define void @mcl_fp_sub5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = load i32, i32* %r3
+%r35 = zext i32 %r34 to i64
+%r37 = getelementptr i32, i32* %r3, i32 1
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i64
+%r40 = shl i64 %r39, 32
+%r41 = or i64 %r35, %r40
+%r42 = zext i64 %r41 to i96
+%r44 = getelementptr i32, i32* %r3, i32 2
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i96
+%r47 = shl i96 %r46, 64
+%r48 = or i96 %r42, %r47
+%r49 = zext i96 %r48 to i128
+%r51 = getelementptr i32, i32* %r3, i32 3
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i128
+%r54 = shl i128 %r53, 96
+%r55 = or i128 %r49, %r54
+%r56 = zext i128 %r55 to i160
+%r58 = getelementptr i32, i32* %r3, i32 4
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i160
+%r61 = shl i160 %r60, 128
+%r62 = or i160 %r56, %r61
+%r63 = zext i160 %r33 to i192
+%r64 = zext i160 %r62 to i192
+%r65 = sub i192 %r63, %r64
+%r66 = trunc i192 %r65 to i160
+%r67 = lshr i192 %r65, 160
+%r68 = trunc i192 %r67 to i1
+%r69 = trunc i160 %r66 to i32
+%r71 = getelementptr i32, i32* %r1, i32 0
+store i32 %r69, i32* %r71
+%r72 = lshr i160 %r66, 32
+%r73 = trunc i160 %r72 to i32
+%r75 = getelementptr i32, i32* %r1, i32 1
+store i32 %r73, i32* %r75
+%r76 = lshr i160 %r72, 32
+%r77 = trunc i160 %r76 to i32
+%r79 = getelementptr i32, i32* %r1, i32 2
+store i32 %r77, i32* %r79
+%r80 = lshr i160 %r76, 32
+%r81 = trunc i160 %r80 to i32
+%r83 = getelementptr i32, i32* %r1, i32 3
+store i32 %r81, i32* %r83
+%r84 = lshr i160 %r80, 32
+%r85 = trunc i160 %r84 to i32
+%r87 = getelementptr i32, i32* %r1, i32 4
+store i32 %r85, i32* %r87
+br i1%r68, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r88 = load i32, i32* %r4
+%r89 = zext i32 %r88 to i64
+%r91 = getelementptr i32, i32* %r4, i32 1
+%r92 = load i32, i32* %r91
+%r93 = zext i32 %r92 to i64
+%r94 = shl i64 %r93, 32
+%r95 = or i64 %r89, %r94
+%r96 = zext i64 %r95 to i96
+%r98 = getelementptr i32, i32* %r4, i32 2
+%r99 = load i32, i32* %r98
+%r100 = zext i32 %r99 to i96
+%r101 = shl i96 %r100, 64
+%r102 = or i96 %r96, %r101
+%r103 = zext i96 %r102 to i128
+%r105 = getelementptr i32, i32* %r4, i32 3
+%r106 = load i32, i32* %r105
+%r107 = zext i32 %r106 to i128
+%r108 = shl i128 %r107, 96
+%r109 = or i128 %r103, %r108
+%r110 = zext i128 %r109 to i160
+%r112 = getelementptr i32, i32* %r4, i32 4
+%r113 = load i32, i32* %r112
+%r114 = zext i32 %r113 to i160
+%r115 = shl i160 %r114, 128
+%r116 = or i160 %r110, %r115
+%r117 = add i160 %r66, %r116
+%r118 = trunc i160 %r117 to i32
+%r120 = getelementptr i32, i32* %r1, i32 0
+store i32 %r118, i32* %r120
+%r121 = lshr i160 %r117, 32
+%r122 = trunc i160 %r121 to i32
+%r124 = getelementptr i32, i32* %r1, i32 1
+store i32 %r122, i32* %r124
+%r125 = lshr i160 %r121, 32
+%r126 = trunc i160 %r125 to i32
+%r128 = getelementptr i32, i32* %r1, i32 2
+store i32 %r126, i32* %r128
+%r129 = lshr i160 %r125, 32
+%r130 = trunc i160 %r129 to i32
+%r132 = getelementptr i32, i32* %r1, i32 3
+store i32 %r130, i32* %r132
+%r133 = lshr i160 %r129, 32
+%r134 = trunc i160 %r133 to i32
+%r136 = getelementptr i32, i32* %r1, i32 4
+store i32 %r134, i32* %r136
+ret void
+}
+define void @mcl_fp_subNF5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = load i32, i32* %r3
+%r35 = zext i32 %r34 to i64
+%r37 = getelementptr i32, i32* %r3, i32 1
+%r38 = load i32, i32* %r37
+%r39 = zext i32 %r38 to i64
+%r40 = shl i64 %r39, 32
+%r41 = or i64 %r35, %r40
+%r42 = zext i64 %r41 to i96
+%r44 = getelementptr i32, i32* %r3, i32 2
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i96
+%r47 = shl i96 %r46, 64
+%r48 = or i96 %r42, %r47
+%r49 = zext i96 %r48 to i128
+%r51 = getelementptr i32, i32* %r3, i32 3
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i128
+%r54 = shl i128 %r53, 96
+%r55 = or i128 %r49, %r54
+%r56 = zext i128 %r55 to i160
+%r58 = getelementptr i32, i32* %r3, i32 4
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i160
+%r61 = shl i160 %r60, 128
+%r62 = or i160 %r56, %r61
+%r63 = sub i160 %r33, %r62
+%r64 = lshr i160 %r63, 159
+%r65 = trunc i160 %r64 to i1
+%r66 = load i32, i32* %r4
+%r67 = zext i32 %r66 to i64
+%r69 = getelementptr i32, i32* %r4, i32 1
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i64
+%r72 = shl i64 %r71, 32
+%r73 = or i64 %r67, %r72
+%r74 = zext i64 %r73 to i96
+%r76 = getelementptr i32, i32* %r4, i32 2
+%r77 = load i32, i32* %r76
+%r78 = zext i32 %r77 to i96
+%r79 = shl i96 %r78, 64
+%r80 = or i96 %r74, %r79
+%r81 = zext i96 %r80 to i128
+%r83 = getelementptr i32, i32* %r4, i32 3
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i128
+%r86 = shl i128 %r85, 96
+%r87 = or i128 %r81, %r86
+%r88 = zext i128 %r87 to i160
+%r90 = getelementptr i32, i32* %r4, i32 4
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i160
+%r93 = shl i160 %r92, 128
+%r94 = or i160 %r88, %r93
+%r96 = select i1 %r65, i160 %r94, i160 0
+%r97 = add i160 %r63, %r96
+%r98 = trunc i160 %r97 to i32
+%r100 = getelementptr i32, i32* %r1, i32 0
+store i32 %r98, i32* %r100
+%r101 = lshr i160 %r97, 32
+%r102 = trunc i160 %r101 to i32
+%r104 = getelementptr i32, i32* %r1, i32 1
+store i32 %r102, i32* %r104
+%r105 = lshr i160 %r101, 32
+%r106 = trunc i160 %r105 to i32
+%r108 = getelementptr i32, i32* %r1, i32 2
+store i32 %r106, i32* %r108
+%r109 = lshr i160 %r105, 32
+%r110 = trunc i160 %r109 to i32
+%r112 = getelementptr i32, i32* %r1, i32 3
+store i32 %r110, i32* %r112
+%r113 = lshr i160 %r109, 32
+%r114 = trunc i160 %r113 to i32
+%r116 = getelementptr i32, i32* %r1, i32 4
+store i32 %r114, i32* %r116
+ret void
+}
+define void @mcl_fpDbl_add5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = load i32, i32* %r3
+%r70 = zext i32 %r69 to i64
+%r72 = getelementptr i32, i32* %r3, i32 1
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i64
+%r75 = shl i64 %r74, 32
+%r76 = or i64 %r70, %r75
+%r77 = zext i64 %r76 to i96
+%r79 = getelementptr i32, i32* %r3, i32 2
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i96
+%r82 = shl i96 %r81, 64
+%r83 = or i96 %r77, %r82
+%r84 = zext i96 %r83 to i128
+%r86 = getelementptr i32, i32* %r3, i32 3
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i128
+%r89 = shl i128 %r88, 96
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i160
+%r93 = getelementptr i32, i32* %r3, i32 4
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i160
+%r96 = shl i160 %r95, 128
+%r97 = or i160 %r91, %r96
+%r98 = zext i160 %r97 to i192
+%r100 = getelementptr i32, i32* %r3, i32 5
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i192
+%r103 = shl i192 %r102, 160
+%r104 = or i192 %r98, %r103
+%r105 = zext i192 %r104 to i224
+%r107 = getelementptr i32, i32* %r3, i32 6
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i224
+%r110 = shl i224 %r109, 192
+%r111 = or i224 %r105, %r110
+%r112 = zext i224 %r111 to i256
+%r114 = getelementptr i32, i32* %r3, i32 7
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i256
+%r117 = shl i256 %r116, 224
+%r118 = or i256 %r112, %r117
+%r119 = zext i256 %r118 to i288
+%r121 = getelementptr i32, i32* %r3, i32 8
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i288
+%r124 = shl i288 %r123, 256
+%r125 = or i288 %r119, %r124
+%r126 = zext i288 %r125 to i320
+%r128 = getelementptr i32, i32* %r3, i32 9
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i320
+%r131 = shl i320 %r130, 288
+%r132 = or i320 %r126, %r131
+%r133 = zext i320 %r68 to i352
+%r134 = zext i320 %r132 to i352
+%r135 = add i352 %r133, %r134
+%r136 = trunc i352 %r135 to i160
+%r137 = trunc i160 %r136 to i32
+%r139 = getelementptr i32, i32* %r1, i32 0
+store i32 %r137, i32* %r139
+%r140 = lshr i160 %r136, 32
+%r141 = trunc i160 %r140 to i32
+%r143 = getelementptr i32, i32* %r1, i32 1
+store i32 %r141, i32* %r143
+%r144 = lshr i160 %r140, 32
+%r145 = trunc i160 %r144 to i32
+%r147 = getelementptr i32, i32* %r1, i32 2
+store i32 %r145, i32* %r147
+%r148 = lshr i160 %r144, 32
+%r149 = trunc i160 %r148 to i32
+%r151 = getelementptr i32, i32* %r1, i32 3
+store i32 %r149, i32* %r151
+%r152 = lshr i160 %r148, 32
+%r153 = trunc i160 %r152 to i32
+%r155 = getelementptr i32, i32* %r1, i32 4
+store i32 %r153, i32* %r155
+%r156 = lshr i352 %r135, 160
+%r157 = trunc i352 %r156 to i192
+%r158 = load i32, i32* %r4
+%r159 = zext i32 %r158 to i64
+%r161 = getelementptr i32, i32* %r4, i32 1
+%r162 = load i32, i32* %r161
+%r163 = zext i32 %r162 to i64
+%r164 = shl i64 %r163, 32
+%r165 = or i64 %r159, %r164
+%r166 = zext i64 %r165 to i96
+%r168 = getelementptr i32, i32* %r4, i32 2
+%r169 = load i32, i32* %r168
+%r170 = zext i32 %r169 to i96
+%r171 = shl i96 %r170, 64
+%r172 = or i96 %r166, %r171
+%r173 = zext i96 %r172 to i128
+%r175 = getelementptr i32, i32* %r4, i32 3
+%r176 = load i32, i32* %r175
+%r177 = zext i32 %r176 to i128
+%r178 = shl i128 %r177, 96
+%r179 = or i128 %r173, %r178
+%r180 = zext i128 %r179 to i160
+%r182 = getelementptr i32, i32* %r4, i32 4
+%r183 = load i32, i32* %r182
+%r184 = zext i32 %r183 to i160
+%r185 = shl i160 %r184, 128
+%r186 = or i160 %r180, %r185
+%r187 = zext i160 %r186 to i192
+%r188 = sub i192 %r157, %r187
+%r189 = lshr i192 %r188, 160
+%r190 = trunc i192 %r189 to i1
+%r191 = select i1 %r190, i192 %r157, i192 %r188
+%r192 = trunc i192 %r191 to i160
+%r194 = getelementptr i32, i32* %r1, i32 5
+%r195 = trunc i160 %r192 to i32
+%r197 = getelementptr i32, i32* %r194, i32 0
+store i32 %r195, i32* %r197
+%r198 = lshr i160 %r192, 32
+%r199 = trunc i160 %r198 to i32
+%r201 = getelementptr i32, i32* %r194, i32 1
+store i32 %r199, i32* %r201
+%r202 = lshr i160 %r198, 32
+%r203 = trunc i160 %r202 to i32
+%r205 = getelementptr i32, i32* %r194, i32 2
+store i32 %r203, i32* %r205
+%r206 = lshr i160 %r202, 32
+%r207 = trunc i160 %r206 to i32
+%r209 = getelementptr i32, i32* %r194, i32 3
+store i32 %r207, i32* %r209
+%r210 = lshr i160 %r206, 32
+%r211 = trunc i160 %r210 to i32
+%r213 = getelementptr i32, i32* %r194, i32 4
+store i32 %r211, i32* %r213
+ret void
+}
+define void @mcl_fpDbl_sub5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = load i32, i32* %r3
+%r70 = zext i32 %r69 to i64
+%r72 = getelementptr i32, i32* %r3, i32 1
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i64
+%r75 = shl i64 %r74, 32
+%r76 = or i64 %r70, %r75
+%r77 = zext i64 %r76 to i96
+%r79 = getelementptr i32, i32* %r3, i32 2
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i96
+%r82 = shl i96 %r81, 64
+%r83 = or i96 %r77, %r82
+%r84 = zext i96 %r83 to i128
+%r86 = getelementptr i32, i32* %r3, i32 3
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i128
+%r89 = shl i128 %r88, 96
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i160
+%r93 = getelementptr i32, i32* %r3, i32 4
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i160
+%r96 = shl i160 %r95, 128
+%r97 = or i160 %r91, %r96
+%r98 = zext i160 %r97 to i192
+%r100 = getelementptr i32, i32* %r3, i32 5
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i192
+%r103 = shl i192 %r102, 160
+%r104 = or i192 %r98, %r103
+%r105 = zext i192 %r104 to i224
+%r107 = getelementptr i32, i32* %r3, i32 6
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i224
+%r110 = shl i224 %r109, 192
+%r111 = or i224 %r105, %r110
+%r112 = zext i224 %r111 to i256
+%r114 = getelementptr i32, i32* %r3, i32 7
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i256
+%r117 = shl i256 %r116, 224
+%r118 = or i256 %r112, %r117
+%r119 = zext i256 %r118 to i288
+%r121 = getelementptr i32, i32* %r3, i32 8
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i288
+%r124 = shl i288 %r123, 256
+%r125 = or i288 %r119, %r124
+%r126 = zext i288 %r125 to i320
+%r128 = getelementptr i32, i32* %r3, i32 9
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i320
+%r131 = shl i320 %r130, 288
+%r132 = or i320 %r126, %r131
+%r133 = zext i320 %r68 to i352
+%r134 = zext i320 %r132 to i352
+%r135 = sub i352 %r133, %r134
+%r136 = trunc i352 %r135 to i160
+%r137 = trunc i160 %r136 to i32
+%r139 = getelementptr i32, i32* %r1, i32 0
+store i32 %r137, i32* %r139
+%r140 = lshr i160 %r136, 32
+%r141 = trunc i160 %r140 to i32
+%r143 = getelementptr i32, i32* %r1, i32 1
+store i32 %r141, i32* %r143
+%r144 = lshr i160 %r140, 32
+%r145 = trunc i160 %r144 to i32
+%r147 = getelementptr i32, i32* %r1, i32 2
+store i32 %r145, i32* %r147
+%r148 = lshr i160 %r144, 32
+%r149 = trunc i160 %r148 to i32
+%r151 = getelementptr i32, i32* %r1, i32 3
+store i32 %r149, i32* %r151
+%r152 = lshr i160 %r148, 32
+%r153 = trunc i160 %r152 to i32
+%r155 = getelementptr i32, i32* %r1, i32 4
+store i32 %r153, i32* %r155
+%r156 = lshr i352 %r135, 160
+%r157 = trunc i352 %r156 to i160
+%r158 = lshr i352 %r135, 320
+%r159 = trunc i352 %r158 to i1
+%r160 = load i32, i32* %r4
+%r161 = zext i32 %r160 to i64
+%r163 = getelementptr i32, i32* %r4, i32 1
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i64
+%r166 = shl i64 %r165, 32
+%r167 = or i64 %r161, %r166
+%r168 = zext i64 %r167 to i96
+%r170 = getelementptr i32, i32* %r4, i32 2
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i96
+%r173 = shl i96 %r172, 64
+%r174 = or i96 %r168, %r173
+%r175 = zext i96 %r174 to i128
+%r177 = getelementptr i32, i32* %r4, i32 3
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i128
+%r180 = shl i128 %r179, 96
+%r181 = or i128 %r175, %r180
+%r182 = zext i128 %r181 to i160
+%r184 = getelementptr i32, i32* %r4, i32 4
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i160
+%r187 = shl i160 %r186, 128
+%r188 = or i160 %r182, %r187
+%r190 = select i1 %r159, i160 %r188, i160 0
+%r191 = add i160 %r157, %r190
+%r193 = getelementptr i32, i32* %r1, i32 5
+%r194 = trunc i160 %r191 to i32
+%r196 = getelementptr i32, i32* %r193, i32 0
+store i32 %r194, i32* %r196
+%r197 = lshr i160 %r191, 32
+%r198 = trunc i160 %r197 to i32
+%r200 = getelementptr i32, i32* %r193, i32 1
+store i32 %r198, i32* %r200
+%r201 = lshr i160 %r197, 32
+%r202 = trunc i160 %r201 to i32
+%r204 = getelementptr i32, i32* %r193, i32 2
+store i32 %r202, i32* %r204
+%r205 = lshr i160 %r201, 32
+%r206 = trunc i160 %r205 to i32
+%r208 = getelementptr i32, i32* %r193, i32 3
+store i32 %r206, i32* %r208
+%r209 = lshr i160 %r205, 32
+%r210 = trunc i160 %r209 to i32
+%r212 = getelementptr i32, i32* %r193, i32 4
+store i32 %r210, i32* %r212
+ret void
+}
+define i224 @mulPv192x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r28 = zext i32 %r6 to i64
+%r29 = zext i32 %r10 to i64
+%r30 = shl i64 %r29, 32
+%r31 = or i64 %r28, %r30
+%r32 = zext i64 %r31 to i96
+%r33 = zext i32 %r14 to i96
+%r34 = shl i96 %r33, 64
+%r35 = or i96 %r32, %r34
+%r36 = zext i96 %r35 to i128
+%r37 = zext i32 %r18 to i128
+%r38 = shl i128 %r37, 96
+%r39 = or i128 %r36, %r38
+%r40 = zext i128 %r39 to i160
+%r41 = zext i32 %r22 to i160
+%r42 = shl i160 %r41, 128
+%r43 = or i160 %r40, %r42
+%r44 = zext i160 %r43 to i192
+%r45 = zext i32 %r26 to i192
+%r46 = shl i192 %r45, 160
+%r47 = or i192 %r44, %r46
+%r48 = zext i32 %r7 to i64
+%r49 = zext i32 %r11 to i64
+%r50 = shl i64 %r49, 32
+%r51 = or i64 %r48, %r50
+%r52 = zext i64 %r51 to i96
+%r53 = zext i32 %r15 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r52, %r54
+%r56 = zext i96 %r55 to i128
+%r57 = zext i32 %r19 to i128
+%r58 = shl i128 %r57, 96
+%r59 = or i128 %r56, %r58
+%r60 = zext i128 %r59 to i160
+%r61 = zext i32 %r23 to i160
+%r62 = shl i160 %r61, 128
+%r63 = or i160 %r60, %r62
+%r64 = zext i160 %r63 to i192
+%r65 = zext i32 %r27 to i192
+%r66 = shl i192 %r65, 160
+%r67 = or i192 %r64, %r66
+%r68 = zext i192 %r47 to i224
+%r69 = zext i192 %r67 to i224
+%r70 = shl i224 %r69, 32
+%r71 = add i224 %r68, %r70
+ret i224 %r71
+}
+define void @mcl_fp_mulUnitPre6L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i224 @mulPv192x32(i32* %r2, i32 %r3)
+%r5 = trunc i224 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i224 %r4, 32
+%r9 = trunc i224 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i224 %r8, 32
+%r13 = trunc i224 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i224 %r12, 32
+%r17 = trunc i224 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i224 %r16, 32
+%r21 = trunc i224 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i224 %r20, 32
+%r25 = trunc i224 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i224 %r24, 32
+%r29 = trunc i224 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+ret void
+}
+define void @mcl_fpDbl_mulPre6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i224 @mulPv192x32(i32* %r2, i32 %r4)
+%r6 = trunc i224 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i224 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i224 @mulPv192x32(i32* %r2, i32 %r10)
+%r12 = add i224 %r7, %r11
+%r13 = trunc i224 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+%r16 = lshr i224 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i224 @mulPv192x32(i32* %r2, i32 %r19)
+%r21 = add i224 %r16, %r20
+%r22 = trunc i224 %r21 to i32
+%r24 = getelementptr i32, i32* %r1, i32 2
+store i32 %r22, i32* %r24
+%r25 = lshr i224 %r21, 32
+%r27 = getelementptr i32, i32* %r3, i32 3
+%r28 = load i32, i32* %r27
+%r29 = call i224 @mulPv192x32(i32* %r2, i32 %r28)
+%r30 = add i224 %r25, %r29
+%r31 = trunc i224 %r30 to i32
+%r33 = getelementptr i32, i32* %r1, i32 3
+store i32 %r31, i32* %r33
+%r34 = lshr i224 %r30, 32
+%r36 = getelementptr i32, i32* %r3, i32 4
+%r37 = load i32, i32* %r36
+%r38 = call i224 @mulPv192x32(i32* %r2, i32 %r37)
+%r39 = add i224 %r34, %r38
+%r40 = trunc i224 %r39 to i32
+%r42 = getelementptr i32, i32* %r1, i32 4
+store i32 %r40, i32* %r42
+%r43 = lshr i224 %r39, 32
+%r45 = getelementptr i32, i32* %r3, i32 5
+%r46 = load i32, i32* %r45
+%r47 = call i224 @mulPv192x32(i32* %r2, i32 %r46)
+%r48 = add i224 %r43, %r47
+%r50 = getelementptr i32, i32* %r1, i32 5
+%r51 = trunc i224 %r48 to i32
+%r53 = getelementptr i32, i32* %r50, i32 0
+store i32 %r51, i32* %r53
+%r54 = lshr i224 %r48, 32
+%r55 = trunc i224 %r54 to i32
+%r57 = getelementptr i32, i32* %r50, i32 1
+store i32 %r55, i32* %r57
+%r58 = lshr i224 %r54, 32
+%r59 = trunc i224 %r58 to i32
+%r61 = getelementptr i32, i32* %r50, i32 2
+store i32 %r59, i32* %r61
+%r62 = lshr i224 %r58, 32
+%r63 = trunc i224 %r62 to i32
+%r65 = getelementptr i32, i32* %r50, i32 3
+store i32 %r63, i32* %r65
+%r66 = lshr i224 %r62, 32
+%r67 = trunc i224 %r66 to i32
+%r69 = getelementptr i32, i32* %r50, i32 4
+store i32 %r67, i32* %r69
+%r70 = lshr i224 %r66, 32
+%r71 = trunc i224 %r70 to i32
+%r73 = getelementptr i32, i32* %r50, i32 5
+store i32 %r71, i32* %r73
+%r74 = lshr i224 %r70, 32
+%r75 = trunc i224 %r74 to i32
+%r77 = getelementptr i32, i32* %r50, i32 6
+store i32 %r75, i32* %r77
+ret void
+}
+define void @mcl_fpDbl_sqrPre6L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i224 @mulPv192x32(i32* %r2, i32 %r3)
+%r5 = trunc i224 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i224 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i224 @mulPv192x32(i32* %r2, i32 %r9)
+%r11 = add i224 %r6, %r10
+%r12 = trunc i224 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+%r15 = lshr i224 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i224 @mulPv192x32(i32* %r2, i32 %r18)
+%r20 = add i224 %r15, %r19
+%r21 = trunc i224 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 2
+store i32 %r21, i32* %r23
+%r24 = lshr i224 %r20, 32
+%r26 = getelementptr i32, i32* %r2, i32 3
+%r27 = load i32, i32* %r26
+%r28 = call i224 @mulPv192x32(i32* %r2, i32 %r27)
+%r29 = add i224 %r24, %r28
+%r30 = trunc i224 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 3
+store i32 %r30, i32* %r32
+%r33 = lshr i224 %r29, 32
+%r35 = getelementptr i32, i32* %r2, i32 4
+%r36 = load i32, i32* %r35
+%r37 = call i224 @mulPv192x32(i32* %r2, i32 %r36)
+%r38 = add i224 %r33, %r37
+%r39 = trunc i224 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 4
+store i32 %r39, i32* %r41
+%r42 = lshr i224 %r38, 32
+%r44 = getelementptr i32, i32* %r2, i32 5
+%r45 = load i32, i32* %r44
+%r46 = call i224 @mulPv192x32(i32* %r2, i32 %r45)
+%r47 = add i224 %r42, %r46
+%r49 = getelementptr i32, i32* %r1, i32 5
+%r50 = trunc i224 %r47 to i32
+%r52 = getelementptr i32, i32* %r49, i32 0
+store i32 %r50, i32* %r52
+%r53 = lshr i224 %r47, 32
+%r54 = trunc i224 %r53 to i32
+%r56 = getelementptr i32, i32* %r49, i32 1
+store i32 %r54, i32* %r56
+%r57 = lshr i224 %r53, 32
+%r58 = trunc i224 %r57 to i32
+%r60 = getelementptr i32, i32* %r49, i32 2
+store i32 %r58, i32* %r60
+%r61 = lshr i224 %r57, 32
+%r62 = trunc i224 %r61 to i32
+%r64 = getelementptr i32, i32* %r49, i32 3
+store i32 %r62, i32* %r64
+%r65 = lshr i224 %r61, 32
+%r66 = trunc i224 %r65 to i32
+%r68 = getelementptr i32, i32* %r49, i32 4
+store i32 %r66, i32* %r68
+%r69 = lshr i224 %r65, 32
+%r70 = trunc i224 %r69 to i32
+%r72 = getelementptr i32, i32* %r49, i32 5
+store i32 %r70, i32* %r72
+%r73 = lshr i224 %r69, 32
+%r74 = trunc i224 %r73 to i32
+%r76 = getelementptr i32, i32* %r49, i32 6
+store i32 %r74, i32* %r76
+ret void
+}
+define void @mcl_fp_mont6L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i224 @mulPv192x32(i32* %r2, i32 %r10)
+%r12 = zext i224 %r11 to i256
+%r13 = trunc i224 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i224 @mulPv192x32(i32* %r4, i32 %r14)
+%r16 = zext i224 %r15 to i256
+%r17 = add i256 %r12, %r16
+%r18 = lshr i256 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i224 @mulPv192x32(i32* %r2, i32 %r21)
+%r23 = zext i224 %r22 to i256
+%r24 = add i256 %r18, %r23
+%r25 = trunc i256 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i224 @mulPv192x32(i32* %r4, i32 %r26)
+%r28 = zext i224 %r27 to i256
+%r29 = add i256 %r24, %r28
+%r30 = lshr i256 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i224 @mulPv192x32(i32* %r2, i32 %r33)
+%r35 = zext i224 %r34 to i256
+%r36 = add i256 %r30, %r35
+%r37 = trunc i256 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i224 @mulPv192x32(i32* %r4, i32 %r38)
+%r40 = zext i224 %r39 to i256
+%r41 = add i256 %r36, %r40
+%r42 = lshr i256 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i224 @mulPv192x32(i32* %r2, i32 %r45)
+%r47 = zext i224 %r46 to i256
+%r48 = add i256 %r42, %r47
+%r49 = trunc i256 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i224 @mulPv192x32(i32* %r4, i32 %r50)
+%r52 = zext i224 %r51 to i256
+%r53 = add i256 %r48, %r52
+%r54 = lshr i256 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i224 @mulPv192x32(i32* %r2, i32 %r57)
+%r59 = zext i224 %r58 to i256
+%r60 = add i256 %r54, %r59
+%r61 = trunc i256 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i224 @mulPv192x32(i32* %r4, i32 %r62)
+%r64 = zext i224 %r63 to i256
+%r65 = add i256 %r60, %r64
+%r66 = lshr i256 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i224 @mulPv192x32(i32* %r2, i32 %r69)
+%r71 = zext i224 %r70 to i256
+%r72 = add i256 %r66, %r71
+%r73 = trunc i256 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i224 @mulPv192x32(i32* %r4, i32 %r74)
+%r76 = zext i224 %r75 to i256
+%r77 = add i256 %r72, %r76
+%r78 = lshr i256 %r77, 32
+%r79 = trunc i256 %r78 to i224
+%r80 = load i32, i32* %r4
+%r81 = zext i32 %r80 to i64
+%r83 = getelementptr i32, i32* %r4, i32 1
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i64
+%r86 = shl i64 %r85, 32
+%r87 = or i64 %r81, %r86
+%r88 = zext i64 %r87 to i96
+%r90 = getelementptr i32, i32* %r4, i32 2
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i96
+%r93 = shl i96 %r92, 64
+%r94 = or i96 %r88, %r93
+%r95 = zext i96 %r94 to i128
+%r97 = getelementptr i32, i32* %r4, i32 3
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i128
+%r100 = shl i128 %r99, 96
+%r101 = or i128 %r95, %r100
+%r102 = zext i128 %r101 to i160
+%r104 = getelementptr i32, i32* %r4, i32 4
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i160
+%r107 = shl i160 %r106, 128
+%r108 = or i160 %r102, %r107
+%r109 = zext i160 %r108 to i192
+%r111 = getelementptr i32, i32* %r4, i32 5
+%r112 = load i32, i32* %r111
+%r113 = zext i32 %r112 to i192
+%r114 = shl i192 %r113, 160
+%r115 = or i192 %r109, %r114
+%r116 = zext i192 %r115 to i224
+%r117 = sub i224 %r79, %r116
+%r118 = lshr i224 %r117, 192
+%r119 = trunc i224 %r118 to i1
+%r120 = select i1 %r119, i224 %r79, i224 %r117
+%r121 = trunc i224 %r120 to i192
+%r122 = trunc i192 %r121 to i32
+%r124 = getelementptr i32, i32* %r1, i32 0
+store i32 %r122, i32* %r124
+%r125 = lshr i192 %r121, 32
+%r126 = trunc i192 %r125 to i32
+%r128 = getelementptr i32, i32* %r1, i32 1
+store i32 %r126, i32* %r128
+%r129 = lshr i192 %r125, 32
+%r130 = trunc i192 %r129 to i32
+%r132 = getelementptr i32, i32* %r1, i32 2
+store i32 %r130, i32* %r132
+%r133 = lshr i192 %r129, 32
+%r134 = trunc i192 %r133 to i32
+%r136 = getelementptr i32, i32* %r1, i32 3
+store i32 %r134, i32* %r136
+%r137 = lshr i192 %r133, 32
+%r138 = trunc i192 %r137 to i32
+%r140 = getelementptr i32, i32* %r1, i32 4
+store i32 %r138, i32* %r140
+%r141 = lshr i192 %r137, 32
+%r142 = trunc i192 %r141 to i32
+%r144 = getelementptr i32, i32* %r1, i32 5
+store i32 %r142, i32* %r144
+ret void
+}
+define void @mcl_fp_montNF6L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i224 @mulPv192x32(i32* %r2, i32 %r8)
+%r10 = trunc i224 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i224 @mulPv192x32(i32* %r4, i32 %r11)
+%r13 = add i224 %r9, %r12
+%r14 = lshr i224 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i224 @mulPv192x32(i32* %r2, i32 %r17)
+%r19 = add i224 %r14, %r18
+%r20 = trunc i224 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i224 @mulPv192x32(i32* %r4, i32 %r21)
+%r23 = add i224 %r19, %r22
+%r24 = lshr i224 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i224 @mulPv192x32(i32* %r2, i32 %r27)
+%r29 = add i224 %r24, %r28
+%r30 = trunc i224 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i224 @mulPv192x32(i32* %r4, i32 %r31)
+%r33 = add i224 %r29, %r32
+%r34 = lshr i224 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i224 @mulPv192x32(i32* %r2, i32 %r37)
+%r39 = add i224 %r34, %r38
+%r40 = trunc i224 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i224 @mulPv192x32(i32* %r4, i32 %r41)
+%r43 = add i224 %r39, %r42
+%r44 = lshr i224 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i224 @mulPv192x32(i32* %r2, i32 %r47)
+%r49 = add i224 %r44, %r48
+%r50 = trunc i224 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i224 @mulPv192x32(i32* %r4, i32 %r51)
+%r53 = add i224 %r49, %r52
+%r54 = lshr i224 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i224 @mulPv192x32(i32* %r2, i32 %r57)
+%r59 = add i224 %r54, %r58
+%r60 = trunc i224 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i224 @mulPv192x32(i32* %r4, i32 %r61)
+%r63 = add i224 %r59, %r62
+%r64 = lshr i224 %r63, 32
+%r65 = trunc i224 %r64 to i192
+%r66 = load i32, i32* %r4
+%r67 = zext i32 %r66 to i64
+%r69 = getelementptr i32, i32* %r4, i32 1
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i64
+%r72 = shl i64 %r71, 32
+%r73 = or i64 %r67, %r72
+%r74 = zext i64 %r73 to i96
+%r76 = getelementptr i32, i32* %r4, i32 2
+%r77 = load i32, i32* %r76
+%r78 = zext i32 %r77 to i96
+%r79 = shl i96 %r78, 64
+%r80 = or i96 %r74, %r79
+%r81 = zext i96 %r80 to i128
+%r83 = getelementptr i32, i32* %r4, i32 3
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i128
+%r86 = shl i128 %r85, 96
+%r87 = or i128 %r81, %r86
+%r88 = zext i128 %r87 to i160
+%r90 = getelementptr i32, i32* %r4, i32 4
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i160
+%r93 = shl i160 %r92, 128
+%r94 = or i160 %r88, %r93
+%r95 = zext i160 %r94 to i192
+%r97 = getelementptr i32, i32* %r4, i32 5
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i192
+%r100 = shl i192 %r99, 160
+%r101 = or i192 %r95, %r100
+%r102 = sub i192 %r65, %r101
+%r103 = lshr i192 %r102, 191
+%r104 = trunc i192 %r103 to i1
+%r105 = select i1 %r104, i192 %r65, i192 %r102
+%r106 = trunc i192 %r105 to i32
+%r108 = getelementptr i32, i32* %r1, i32 0
+store i32 %r106, i32* %r108
+%r109 = lshr i192 %r105, 32
+%r110 = trunc i192 %r109 to i32
+%r112 = getelementptr i32, i32* %r1, i32 1
+store i32 %r110, i32* %r112
+%r113 = lshr i192 %r109, 32
+%r114 = trunc i192 %r113 to i32
+%r116 = getelementptr i32, i32* %r1, i32 2
+store i32 %r114, i32* %r116
+%r117 = lshr i192 %r113, 32
+%r118 = trunc i192 %r117 to i32
+%r120 = getelementptr i32, i32* %r1, i32 3
+store i32 %r118, i32* %r120
+%r121 = lshr i192 %r117, 32
+%r122 = trunc i192 %r121 to i32
+%r124 = getelementptr i32, i32* %r1, i32 4
+store i32 %r122, i32* %r124
+%r125 = lshr i192 %r121, 32
+%r126 = trunc i192 %r125 to i32
+%r128 = getelementptr i32, i32* %r1, i32 5
+store i32 %r126, i32* %r128
+ret void
+}
+define void @mcl_fp_montRed6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = load i32, i32* %r2
+%r44 = zext i32 %r43 to i64
+%r46 = getelementptr i32, i32* %r2, i32 1
+%r47 = load i32, i32* %r46
+%r48 = zext i32 %r47 to i64
+%r49 = shl i64 %r48, 32
+%r50 = or i64 %r44, %r49
+%r51 = zext i64 %r50 to i96
+%r53 = getelementptr i32, i32* %r2, i32 2
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i96
+%r56 = shl i96 %r55, 64
+%r57 = or i96 %r51, %r56
+%r58 = zext i96 %r57 to i128
+%r60 = getelementptr i32, i32* %r2, i32 3
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i128
+%r63 = shl i128 %r62, 96
+%r64 = or i128 %r58, %r63
+%r65 = zext i128 %r64 to i160
+%r67 = getelementptr i32, i32* %r2, i32 4
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i160
+%r70 = shl i160 %r69, 128
+%r71 = or i160 %r65, %r70
+%r72 = zext i160 %r71 to i192
+%r74 = getelementptr i32, i32* %r2, i32 5
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i192
+%r77 = shl i192 %r76, 160
+%r78 = or i192 %r72, %r77
+%r79 = zext i192 %r78 to i224
+%r81 = getelementptr i32, i32* %r2, i32 6
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i224
+%r84 = shl i224 %r83, 192
+%r85 = or i224 %r79, %r84
+%r86 = zext i224 %r85 to i256
+%r88 = getelementptr i32, i32* %r2, i32 7
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i256
+%r91 = shl i256 %r90, 224
+%r92 = or i256 %r86, %r91
+%r93 = zext i256 %r92 to i288
+%r95 = getelementptr i32, i32* %r2, i32 8
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i288
+%r98 = shl i288 %r97, 256
+%r99 = or i288 %r93, %r98
+%r100 = zext i288 %r99 to i320
+%r102 = getelementptr i32, i32* %r2, i32 9
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i320
+%r105 = shl i320 %r104, 288
+%r106 = or i320 %r100, %r105
+%r107 = zext i320 %r106 to i352
+%r109 = getelementptr i32, i32* %r2, i32 10
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i352
+%r112 = shl i352 %r111, 320
+%r113 = or i352 %r107, %r112
+%r114 = zext i352 %r113 to i384
+%r116 = getelementptr i32, i32* %r2, i32 11
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i384
+%r119 = shl i384 %r118, 352
+%r120 = or i384 %r114, %r119
+%r121 = zext i384 %r120 to i416
+%r122 = trunc i416 %r121 to i32
+%r123 = mul i32 %r122, %r6
+%r124 = call i224 @mulPv192x32(i32* %r3, i32 %r123)
+%r125 = zext i224 %r124 to i416
+%r126 = add i416 %r121, %r125
+%r127 = lshr i416 %r126, 32
+%r128 = trunc i416 %r127 to i384
+%r129 = trunc i384 %r128 to i32
+%r130 = mul i32 %r129, %r6
+%r131 = call i224 @mulPv192x32(i32* %r3, i32 %r130)
+%r132 = zext i224 %r131 to i384
+%r133 = add i384 %r128, %r132
+%r134 = lshr i384 %r133, 32
+%r135 = trunc i384 %r134 to i352
+%r136 = trunc i352 %r135 to i32
+%r137 = mul i32 %r136, %r6
+%r138 = call i224 @mulPv192x32(i32* %r3, i32 %r137)
+%r139 = zext i224 %r138 to i352
+%r140 = add i352 %r135, %r139
+%r141 = lshr i352 %r140, 32
+%r142 = trunc i352 %r141 to i320
+%r143 = trunc i320 %r142 to i32
+%r144 = mul i32 %r143, %r6
+%r145 = call i224 @mulPv192x32(i32* %r3, i32 %r144)
+%r146 = zext i224 %r145 to i320
+%r147 = add i320 %r142, %r146
+%r148 = lshr i320 %r147, 32
+%r149 = trunc i320 %r148 to i288
+%r150 = trunc i288 %r149 to i32
+%r151 = mul i32 %r150, %r6
+%r152 = call i224 @mulPv192x32(i32* %r3, i32 %r151)
+%r153 = zext i224 %r152 to i288
+%r154 = add i288 %r149, %r153
+%r155 = lshr i288 %r154, 32
+%r156 = trunc i288 %r155 to i256
+%r157 = trunc i256 %r156 to i32
+%r158 = mul i32 %r157, %r6
+%r159 = call i224 @mulPv192x32(i32* %r3, i32 %r158)
+%r160 = zext i224 %r159 to i256
+%r161 = add i256 %r156, %r160
+%r162 = lshr i256 %r161, 32
+%r163 = trunc i256 %r162 to i224
+%r164 = zext i192 %r42 to i224
+%r165 = sub i224 %r163, %r164
+%r166 = lshr i224 %r165, 192
+%r167 = trunc i224 %r166 to i1
+%r168 = select i1 %r167, i224 %r163, i224 %r165
+%r169 = trunc i224 %r168 to i192
+%r170 = trunc i192 %r169 to i32
+%r172 = getelementptr i32, i32* %r1, i32 0
+store i32 %r170, i32* %r172
+%r173 = lshr i192 %r169, 32
+%r174 = trunc i192 %r173 to i32
+%r176 = getelementptr i32, i32* %r1, i32 1
+store i32 %r174, i32* %r176
+%r177 = lshr i192 %r173, 32
+%r178 = trunc i192 %r177 to i32
+%r180 = getelementptr i32, i32* %r1, i32 2
+store i32 %r178, i32* %r180
+%r181 = lshr i192 %r177, 32
+%r182 = trunc i192 %r181 to i32
+%r184 = getelementptr i32, i32* %r1, i32 3
+store i32 %r182, i32* %r184
+%r185 = lshr i192 %r181, 32
+%r186 = trunc i192 %r185 to i32
+%r188 = getelementptr i32, i32* %r1, i32 4
+store i32 %r186, i32* %r188
+%r189 = lshr i192 %r185, 32
+%r190 = trunc i192 %r189 to i32
+%r192 = getelementptr i32, i32* %r1, i32 5
+store i32 %r190, i32* %r192
+ret void
+}
+define i32 @mcl_fp_addPre6L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r42 = load i32, i32* %r4
+%r43 = zext i32 %r42 to i64
+%r45 = getelementptr i32, i32* %r4, i32 1
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i64
+%r48 = shl i64 %r47, 32
+%r49 = or i64 %r43, %r48
+%r50 = zext i64 %r49 to i96
+%r52 = getelementptr i32, i32* %r4, i32 2
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i96
+%r55 = shl i96 %r54, 64
+%r56 = or i96 %r50, %r55
+%r57 = zext i96 %r56 to i128
+%r59 = getelementptr i32, i32* %r4, i32 3
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i128
+%r62 = shl i128 %r61, 96
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i160
+%r66 = getelementptr i32, i32* %r4, i32 4
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i160
+%r69 = shl i160 %r68, 128
+%r70 = or i160 %r64, %r69
+%r71 = zext i160 %r70 to i192
+%r73 = getelementptr i32, i32* %r4, i32 5
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i192
+%r76 = shl i192 %r75, 160
+%r77 = or i192 %r71, %r76
+%r78 = zext i192 %r77 to i224
+%r79 = add i224 %r41, %r78
+%r80 = trunc i224 %r79 to i192
+%r81 = trunc i192 %r80 to i32
+%r83 = getelementptr i32, i32* %r2, i32 0
+store i32 %r81, i32* %r83
+%r84 = lshr i192 %r80, 32
+%r85 = trunc i192 %r84 to i32
+%r87 = getelementptr i32, i32* %r2, i32 1
+store i32 %r85, i32* %r87
+%r88 = lshr i192 %r84, 32
+%r89 = trunc i192 %r88 to i32
+%r91 = getelementptr i32, i32* %r2, i32 2
+store i32 %r89, i32* %r91
+%r92 = lshr i192 %r88, 32
+%r93 = trunc i192 %r92 to i32
+%r95 = getelementptr i32, i32* %r2, i32 3
+store i32 %r93, i32* %r95
+%r96 = lshr i192 %r92, 32
+%r97 = trunc i192 %r96 to i32
+%r99 = getelementptr i32, i32* %r2, i32 4
+store i32 %r97, i32* %r99
+%r100 = lshr i192 %r96, 32
+%r101 = trunc i192 %r100 to i32
+%r103 = getelementptr i32, i32* %r2, i32 5
+store i32 %r101, i32* %r103
+%r104 = lshr i224 %r79, 192
+%r105 = trunc i224 %r104 to i32
+ret i32 %r105
+}
+define i32 @mcl_fp_subPre6L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r42 = load i32, i32* %r4
+%r43 = zext i32 %r42 to i64
+%r45 = getelementptr i32, i32* %r4, i32 1
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i64
+%r48 = shl i64 %r47, 32
+%r49 = or i64 %r43, %r48
+%r50 = zext i64 %r49 to i96
+%r52 = getelementptr i32, i32* %r4, i32 2
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i96
+%r55 = shl i96 %r54, 64
+%r56 = or i96 %r50, %r55
+%r57 = zext i96 %r56 to i128
+%r59 = getelementptr i32, i32* %r4, i32 3
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i128
+%r62 = shl i128 %r61, 96
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i160
+%r66 = getelementptr i32, i32* %r4, i32 4
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i160
+%r69 = shl i160 %r68, 128
+%r70 = or i160 %r64, %r69
+%r71 = zext i160 %r70 to i192
+%r73 = getelementptr i32, i32* %r4, i32 5
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i192
+%r76 = shl i192 %r75, 160
+%r77 = or i192 %r71, %r76
+%r78 = zext i192 %r77 to i224
+%r79 = sub i224 %r41, %r78
+%r80 = trunc i224 %r79 to i192
+%r81 = trunc i192 %r80 to i32
+%r83 = getelementptr i32, i32* %r2, i32 0
+store i32 %r81, i32* %r83
+%r84 = lshr i192 %r80, 32
+%r85 = trunc i192 %r84 to i32
+%r87 = getelementptr i32, i32* %r2, i32 1
+store i32 %r85, i32* %r87
+%r88 = lshr i192 %r84, 32
+%r89 = trunc i192 %r88 to i32
+%r91 = getelementptr i32, i32* %r2, i32 2
+store i32 %r89, i32* %r91
+%r92 = lshr i192 %r88, 32
+%r93 = trunc i192 %r92 to i32
+%r95 = getelementptr i32, i32* %r2, i32 3
+store i32 %r93, i32* %r95
+%r96 = lshr i192 %r92, 32
+%r97 = trunc i192 %r96 to i32
+%r99 = getelementptr i32, i32* %r2, i32 4
+store i32 %r97, i32* %r99
+%r100 = lshr i192 %r96, 32
+%r101 = trunc i192 %r100 to i32
+%r103 = getelementptr i32, i32* %r2, i32 5
+store i32 %r101, i32* %r103
+%r104 = lshr i224 %r79, 192
+%r105 = trunc i224 %r104 to i32
+%r107 = and i32 %r105, 1
+ret i32 %r107
+}
+define void @mcl_fp_shr1_6L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = lshr i192 %r38, 1
+%r40 = trunc i192 %r39 to i32
+%r42 = getelementptr i32, i32* %r1, i32 0
+store i32 %r40, i32* %r42
+%r43 = lshr i192 %r39, 32
+%r44 = trunc i192 %r43 to i32
+%r46 = getelementptr i32, i32* %r1, i32 1
+store i32 %r44, i32* %r46
+%r47 = lshr i192 %r43, 32
+%r48 = trunc i192 %r47 to i32
+%r50 = getelementptr i32, i32* %r1, i32 2
+store i32 %r48, i32* %r50
+%r51 = lshr i192 %r47, 32
+%r52 = trunc i192 %r51 to i32
+%r54 = getelementptr i32, i32* %r1, i32 3
+store i32 %r52, i32* %r54
+%r55 = lshr i192 %r51, 32
+%r56 = trunc i192 %r55 to i32
+%r58 = getelementptr i32, i32* %r1, i32 4
+store i32 %r56, i32* %r58
+%r59 = lshr i192 %r55, 32
+%r60 = trunc i192 %r59 to i32
+%r62 = getelementptr i32, i32* %r1, i32 5
+store i32 %r60, i32* %r62
+ret void
+}
+define void @mcl_fp_add6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = load i32, i32* %r3
+%r42 = zext i32 %r41 to i64
+%r44 = getelementptr i32, i32* %r3, i32 1
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i64
+%r47 = shl i64 %r46, 32
+%r48 = or i64 %r42, %r47
+%r49 = zext i64 %r48 to i96
+%r51 = getelementptr i32, i32* %r3, i32 2
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r49, %r54
+%r56 = zext i96 %r55 to i128
+%r58 = getelementptr i32, i32* %r3, i32 3
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i128
+%r61 = shl i128 %r60, 96
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i160
+%r65 = getelementptr i32, i32* %r3, i32 4
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i160
+%r68 = shl i160 %r67, 128
+%r69 = or i160 %r63, %r68
+%r70 = zext i160 %r69 to i192
+%r72 = getelementptr i32, i32* %r3, i32 5
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i192
+%r75 = shl i192 %r74, 160
+%r76 = or i192 %r70, %r75
+%r77 = zext i192 %r40 to i224
+%r78 = zext i192 %r76 to i224
+%r79 = add i224 %r77, %r78
+%r80 = trunc i224 %r79 to i192
+%r81 = trunc i192 %r80 to i32
+%r83 = getelementptr i32, i32* %r1, i32 0
+store i32 %r81, i32* %r83
+%r84 = lshr i192 %r80, 32
+%r85 = trunc i192 %r84 to i32
+%r87 = getelementptr i32, i32* %r1, i32 1
+store i32 %r85, i32* %r87
+%r88 = lshr i192 %r84, 32
+%r89 = trunc i192 %r88 to i32
+%r91 = getelementptr i32, i32* %r1, i32 2
+store i32 %r89, i32* %r91
+%r92 = lshr i192 %r88, 32
+%r93 = trunc i192 %r92 to i32
+%r95 = getelementptr i32, i32* %r1, i32 3
+store i32 %r93, i32* %r95
+%r96 = lshr i192 %r92, 32
+%r97 = trunc i192 %r96 to i32
+%r99 = getelementptr i32, i32* %r1, i32 4
+store i32 %r97, i32* %r99
+%r100 = lshr i192 %r96, 32
+%r101 = trunc i192 %r100 to i32
+%r103 = getelementptr i32, i32* %r1, i32 5
+store i32 %r101, i32* %r103
+%r104 = load i32, i32* %r4
+%r105 = zext i32 %r104 to i64
+%r107 = getelementptr i32, i32* %r4, i32 1
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i64
+%r110 = shl i64 %r109, 32
+%r111 = or i64 %r105, %r110
+%r112 = zext i64 %r111 to i96
+%r114 = getelementptr i32, i32* %r4, i32 2
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i96
+%r117 = shl i96 %r116, 64
+%r118 = or i96 %r112, %r117
+%r119 = zext i96 %r118 to i128
+%r121 = getelementptr i32, i32* %r4, i32 3
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i128
+%r124 = shl i128 %r123, 96
+%r125 = or i128 %r119, %r124
+%r126 = zext i128 %r125 to i160
+%r128 = getelementptr i32, i32* %r4, i32 4
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i160
+%r131 = shl i160 %r130, 128
+%r132 = or i160 %r126, %r131
+%r133 = zext i160 %r132 to i192
+%r135 = getelementptr i32, i32* %r4, i32 5
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i192
+%r138 = shl i192 %r137, 160
+%r139 = or i192 %r133, %r138
+%r140 = zext i192 %r139 to i224
+%r141 = sub i224 %r79, %r140
+%r142 = lshr i224 %r141, 192
+%r143 = trunc i224 %r142 to i1
+br i1%r143, label %carry, label %nocarry
+nocarry:
+%r144 = trunc i224 %r141 to i192
+%r145 = trunc i192 %r144 to i32
+%r147 = getelementptr i32, i32* %r1, i32 0
+store i32 %r145, i32* %r147
+%r148 = lshr i192 %r144, 32
+%r149 = trunc i192 %r148 to i32
+%r151 = getelementptr i32, i32* %r1, i32 1
+store i32 %r149, i32* %r151
+%r152 = lshr i192 %r148, 32
+%r153 = trunc i192 %r152 to i32
+%r155 = getelementptr i32, i32* %r1, i32 2
+store i32 %r153, i32* %r155
+%r156 = lshr i192 %r152, 32
+%r157 = trunc i192 %r156 to i32
+%r159 = getelementptr i32, i32* %r1, i32 3
+store i32 %r157, i32* %r159
+%r160 = lshr i192 %r156, 32
+%r161 = trunc i192 %r160 to i32
+%r163 = getelementptr i32, i32* %r1, i32 4
+store i32 %r161, i32* %r163
+%r164 = lshr i192 %r160, 32
+%r165 = trunc i192 %r164 to i32
+%r167 = getelementptr i32, i32* %r1, i32 5
+store i32 %r165, i32* %r167
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = load i32, i32* %r3
+%r42 = zext i32 %r41 to i64
+%r44 = getelementptr i32, i32* %r3, i32 1
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i64
+%r47 = shl i64 %r46, 32
+%r48 = or i64 %r42, %r47
+%r49 = zext i64 %r48 to i96
+%r51 = getelementptr i32, i32* %r3, i32 2
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r49, %r54
+%r56 = zext i96 %r55 to i128
+%r58 = getelementptr i32, i32* %r3, i32 3
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i128
+%r61 = shl i128 %r60, 96
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i160
+%r65 = getelementptr i32, i32* %r3, i32 4
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i160
+%r68 = shl i160 %r67, 128
+%r69 = or i160 %r63, %r68
+%r70 = zext i160 %r69 to i192
+%r72 = getelementptr i32, i32* %r3, i32 5
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i192
+%r75 = shl i192 %r74, 160
+%r76 = or i192 %r70, %r75
+%r77 = add i192 %r40, %r76
+%r78 = load i32, i32* %r4
+%r79 = zext i32 %r78 to i64
+%r81 = getelementptr i32, i32* %r4, i32 1
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i64
+%r84 = shl i64 %r83, 32
+%r85 = or i64 %r79, %r84
+%r86 = zext i64 %r85 to i96
+%r88 = getelementptr i32, i32* %r4, i32 2
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i96
+%r91 = shl i96 %r90, 64
+%r92 = or i96 %r86, %r91
+%r93 = zext i96 %r92 to i128
+%r95 = getelementptr i32, i32* %r4, i32 3
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i128
+%r98 = shl i128 %r97, 96
+%r99 = or i128 %r93, %r98
+%r100 = zext i128 %r99 to i160
+%r102 = getelementptr i32, i32* %r4, i32 4
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i160
+%r105 = shl i160 %r104, 128
+%r106 = or i160 %r100, %r105
+%r107 = zext i160 %r106 to i192
+%r109 = getelementptr i32, i32* %r4, i32 5
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i192
+%r112 = shl i192 %r111, 160
+%r113 = or i192 %r107, %r112
+%r114 = sub i192 %r77, %r113
+%r115 = lshr i192 %r114, 191
+%r116 = trunc i192 %r115 to i1
+%r117 = select i1 %r116, i192 %r77, i192 %r114
+%r118 = trunc i192 %r117 to i32
+%r120 = getelementptr i32, i32* %r1, i32 0
+store i32 %r118, i32* %r120
+%r121 = lshr i192 %r117, 32
+%r122 = trunc i192 %r121 to i32
+%r124 = getelementptr i32, i32* %r1, i32 1
+store i32 %r122, i32* %r124
+%r125 = lshr i192 %r121, 32
+%r126 = trunc i192 %r125 to i32
+%r128 = getelementptr i32, i32* %r1, i32 2
+store i32 %r126, i32* %r128
+%r129 = lshr i192 %r125, 32
+%r130 = trunc i192 %r129 to i32
+%r132 = getelementptr i32, i32* %r1, i32 3
+store i32 %r130, i32* %r132
+%r133 = lshr i192 %r129, 32
+%r134 = trunc i192 %r133 to i32
+%r136 = getelementptr i32, i32* %r1, i32 4
+store i32 %r134, i32* %r136
+%r137 = lshr i192 %r133, 32
+%r138 = trunc i192 %r137 to i32
+%r140 = getelementptr i32, i32* %r1, i32 5
+store i32 %r138, i32* %r140
+ret void
+}
+define void @mcl_fp_sub6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = load i32, i32* %r3
+%r42 = zext i32 %r41 to i64
+%r44 = getelementptr i32, i32* %r3, i32 1
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i64
+%r47 = shl i64 %r46, 32
+%r48 = or i64 %r42, %r47
+%r49 = zext i64 %r48 to i96
+%r51 = getelementptr i32, i32* %r3, i32 2
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r49, %r54
+%r56 = zext i96 %r55 to i128
+%r58 = getelementptr i32, i32* %r3, i32 3
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i128
+%r61 = shl i128 %r60, 96
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i160
+%r65 = getelementptr i32, i32* %r3, i32 4
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i160
+%r68 = shl i160 %r67, 128
+%r69 = or i160 %r63, %r68
+%r70 = zext i160 %r69 to i192
+%r72 = getelementptr i32, i32* %r3, i32 5
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i192
+%r75 = shl i192 %r74, 160
+%r76 = or i192 %r70, %r75
+%r77 = zext i192 %r40 to i224
+%r78 = zext i192 %r76 to i224
+%r79 = sub i224 %r77, %r78
+%r80 = trunc i224 %r79 to i192
+%r81 = lshr i224 %r79, 192
+%r82 = trunc i224 %r81 to i1
+%r83 = trunc i192 %r80 to i32
+%r85 = getelementptr i32, i32* %r1, i32 0
+store i32 %r83, i32* %r85
+%r86 = lshr i192 %r80, 32
+%r87 = trunc i192 %r86 to i32
+%r89 = getelementptr i32, i32* %r1, i32 1
+store i32 %r87, i32* %r89
+%r90 = lshr i192 %r86, 32
+%r91 = trunc i192 %r90 to i32
+%r93 = getelementptr i32, i32* %r1, i32 2
+store i32 %r91, i32* %r93
+%r94 = lshr i192 %r90, 32
+%r95 = trunc i192 %r94 to i32
+%r97 = getelementptr i32, i32* %r1, i32 3
+store i32 %r95, i32* %r97
+%r98 = lshr i192 %r94, 32
+%r99 = trunc i192 %r98 to i32
+%r101 = getelementptr i32, i32* %r1, i32 4
+store i32 %r99, i32* %r101
+%r102 = lshr i192 %r98, 32
+%r103 = trunc i192 %r102 to i32
+%r105 = getelementptr i32, i32* %r1, i32 5
+store i32 %r103, i32* %r105
+br i1%r82, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r106 = load i32, i32* %r4
+%r107 = zext i32 %r106 to i64
+%r109 = getelementptr i32, i32* %r4, i32 1
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i64
+%r112 = shl i64 %r111, 32
+%r113 = or i64 %r107, %r112
+%r114 = zext i64 %r113 to i96
+%r116 = getelementptr i32, i32* %r4, i32 2
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i96
+%r119 = shl i96 %r118, 64
+%r120 = or i96 %r114, %r119
+%r121 = zext i96 %r120 to i128
+%r123 = getelementptr i32, i32* %r4, i32 3
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i128
+%r126 = shl i128 %r125, 96
+%r127 = or i128 %r121, %r126
+%r128 = zext i128 %r127 to i160
+%r130 = getelementptr i32, i32* %r4, i32 4
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i160
+%r133 = shl i160 %r132, 128
+%r134 = or i160 %r128, %r133
+%r135 = zext i160 %r134 to i192
+%r137 = getelementptr i32, i32* %r4, i32 5
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i192
+%r140 = shl i192 %r139, 160
+%r141 = or i192 %r135, %r140
+%r142 = add i192 %r80, %r141
+%r143 = trunc i192 %r142 to i32
+%r145 = getelementptr i32, i32* %r1, i32 0
+store i32 %r143, i32* %r145
+%r146 = lshr i192 %r142, 32
+%r147 = trunc i192 %r146 to i32
+%r149 = getelementptr i32, i32* %r1, i32 1
+store i32 %r147, i32* %r149
+%r150 = lshr i192 %r146, 32
+%r151 = trunc i192 %r150 to i32
+%r153 = getelementptr i32, i32* %r1, i32 2
+store i32 %r151, i32* %r153
+%r154 = lshr i192 %r150, 32
+%r155 = trunc i192 %r154 to i32
+%r157 = getelementptr i32, i32* %r1, i32 3
+store i32 %r155, i32* %r157
+%r158 = lshr i192 %r154, 32
+%r159 = trunc i192 %r158 to i32
+%r161 = getelementptr i32, i32* %r1, i32 4
+store i32 %r159, i32* %r161
+%r162 = lshr i192 %r158, 32
+%r163 = trunc i192 %r162 to i32
+%r165 = getelementptr i32, i32* %r1, i32 5
+store i32 %r163, i32* %r165
+ret void
+}
+define void @mcl_fp_subNF6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = load i32, i32* %r3
+%r42 = zext i32 %r41 to i64
+%r44 = getelementptr i32, i32* %r3, i32 1
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i64
+%r47 = shl i64 %r46, 32
+%r48 = or i64 %r42, %r47
+%r49 = zext i64 %r48 to i96
+%r51 = getelementptr i32, i32* %r3, i32 2
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r49, %r54
+%r56 = zext i96 %r55 to i128
+%r58 = getelementptr i32, i32* %r3, i32 3
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i128
+%r61 = shl i128 %r60, 96
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i160
+%r65 = getelementptr i32, i32* %r3, i32 4
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i160
+%r68 = shl i160 %r67, 128
+%r69 = or i160 %r63, %r68
+%r70 = zext i160 %r69 to i192
+%r72 = getelementptr i32, i32* %r3, i32 5
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i192
+%r75 = shl i192 %r74, 160
+%r76 = or i192 %r70, %r75
+%r77 = sub i192 %r40, %r76
+%r78 = lshr i192 %r77, 191
+%r79 = trunc i192 %r78 to i1
+%r80 = load i32, i32* %r4
+%r81 = zext i32 %r80 to i64
+%r83 = getelementptr i32, i32* %r4, i32 1
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i64
+%r86 = shl i64 %r85, 32
+%r87 = or i64 %r81, %r86
+%r88 = zext i64 %r87 to i96
+%r90 = getelementptr i32, i32* %r4, i32 2
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i96
+%r93 = shl i96 %r92, 64
+%r94 = or i96 %r88, %r93
+%r95 = zext i96 %r94 to i128
+%r97 = getelementptr i32, i32* %r4, i32 3
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i128
+%r100 = shl i128 %r99, 96
+%r101 = or i128 %r95, %r100
+%r102 = zext i128 %r101 to i160
+%r104 = getelementptr i32, i32* %r4, i32 4
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i160
+%r107 = shl i160 %r106, 128
+%r108 = or i160 %r102, %r107
+%r109 = zext i160 %r108 to i192
+%r111 = getelementptr i32, i32* %r4, i32 5
+%r112 = load i32, i32* %r111
+%r113 = zext i32 %r112 to i192
+%r114 = shl i192 %r113, 160
+%r115 = or i192 %r109, %r114
+%r117 = select i1 %r79, i192 %r115, i192 0
+%r118 = add i192 %r77, %r117
+%r119 = trunc i192 %r118 to i32
+%r121 = getelementptr i32, i32* %r1, i32 0
+store i32 %r119, i32* %r121
+%r122 = lshr i192 %r118, 32
+%r123 = trunc i192 %r122 to i32
+%r125 = getelementptr i32, i32* %r1, i32 1
+store i32 %r123, i32* %r125
+%r126 = lshr i192 %r122, 32
+%r127 = trunc i192 %r126 to i32
+%r129 = getelementptr i32, i32* %r1, i32 2
+store i32 %r127, i32* %r129
+%r130 = lshr i192 %r126, 32
+%r131 = trunc i192 %r130 to i32
+%r133 = getelementptr i32, i32* %r1, i32 3
+store i32 %r131, i32* %r133
+%r134 = lshr i192 %r130, 32
+%r135 = trunc i192 %r134 to i32
+%r137 = getelementptr i32, i32* %r1, i32 4
+store i32 %r135, i32* %r137
+%r138 = lshr i192 %r134, 32
+%r139 = trunc i192 %r138 to i32
+%r141 = getelementptr i32, i32* %r1, i32 5
+store i32 %r139, i32* %r141
+ret void
+}
+define void @mcl_fpDbl_add6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = zext i384 %r82 to i416
+%r162 = zext i384 %r160 to i416
+%r163 = add i416 %r161, %r162
+%r164 = trunc i416 %r163 to i192
+%r165 = trunc i192 %r164 to i32
+%r167 = getelementptr i32, i32* %r1, i32 0
+store i32 %r165, i32* %r167
+%r168 = lshr i192 %r164, 32
+%r169 = trunc i192 %r168 to i32
+%r171 = getelementptr i32, i32* %r1, i32 1
+store i32 %r169, i32* %r171
+%r172 = lshr i192 %r168, 32
+%r173 = trunc i192 %r172 to i32
+%r175 = getelementptr i32, i32* %r1, i32 2
+store i32 %r173, i32* %r175
+%r176 = lshr i192 %r172, 32
+%r177 = trunc i192 %r176 to i32
+%r179 = getelementptr i32, i32* %r1, i32 3
+store i32 %r177, i32* %r179
+%r180 = lshr i192 %r176, 32
+%r181 = trunc i192 %r180 to i32
+%r183 = getelementptr i32, i32* %r1, i32 4
+store i32 %r181, i32* %r183
+%r184 = lshr i192 %r180, 32
+%r185 = trunc i192 %r184 to i32
+%r187 = getelementptr i32, i32* %r1, i32 5
+store i32 %r185, i32* %r187
+%r188 = lshr i416 %r163, 192
+%r189 = trunc i416 %r188 to i224
+%r190 = load i32, i32* %r4
+%r191 = zext i32 %r190 to i64
+%r193 = getelementptr i32, i32* %r4, i32 1
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i64
+%r196 = shl i64 %r195, 32
+%r197 = or i64 %r191, %r196
+%r198 = zext i64 %r197 to i96
+%r200 = getelementptr i32, i32* %r4, i32 2
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i96
+%r203 = shl i96 %r202, 64
+%r204 = or i96 %r198, %r203
+%r205 = zext i96 %r204 to i128
+%r207 = getelementptr i32, i32* %r4, i32 3
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i128
+%r210 = shl i128 %r209, 96
+%r211 = or i128 %r205, %r210
+%r212 = zext i128 %r211 to i160
+%r214 = getelementptr i32, i32* %r4, i32 4
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i160
+%r217 = shl i160 %r216, 128
+%r218 = or i160 %r212, %r217
+%r219 = zext i160 %r218 to i192
+%r221 = getelementptr i32, i32* %r4, i32 5
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i192
+%r224 = shl i192 %r223, 160
+%r225 = or i192 %r219, %r224
+%r226 = zext i192 %r225 to i224
+%r227 = sub i224 %r189, %r226
+%r228 = lshr i224 %r227, 192
+%r229 = trunc i224 %r228 to i1
+%r230 = select i1 %r229, i224 %r189, i224 %r227
+%r231 = trunc i224 %r230 to i192
+%r233 = getelementptr i32, i32* %r1, i32 6
+%r234 = trunc i192 %r231 to i32
+%r236 = getelementptr i32, i32* %r233, i32 0
+store i32 %r234, i32* %r236
+%r237 = lshr i192 %r231, 32
+%r238 = trunc i192 %r237 to i32
+%r240 = getelementptr i32, i32* %r233, i32 1
+store i32 %r238, i32* %r240
+%r241 = lshr i192 %r237, 32
+%r242 = trunc i192 %r241 to i32
+%r244 = getelementptr i32, i32* %r233, i32 2
+store i32 %r242, i32* %r244
+%r245 = lshr i192 %r241, 32
+%r246 = trunc i192 %r245 to i32
+%r248 = getelementptr i32, i32* %r233, i32 3
+store i32 %r246, i32* %r248
+%r249 = lshr i192 %r245, 32
+%r250 = trunc i192 %r249 to i32
+%r252 = getelementptr i32, i32* %r233, i32 4
+store i32 %r250, i32* %r252
+%r253 = lshr i192 %r249, 32
+%r254 = trunc i192 %r253 to i32
+%r256 = getelementptr i32, i32* %r233, i32 5
+store i32 %r254, i32* %r256
+ret void
+}
+define void @mcl_fpDbl_sub6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = zext i384 %r82 to i416
+%r162 = zext i384 %r160 to i416
+%r163 = sub i416 %r161, %r162
+%r164 = trunc i416 %r163 to i192
+%r165 = trunc i192 %r164 to i32
+%r167 = getelementptr i32, i32* %r1, i32 0
+store i32 %r165, i32* %r167
+%r168 = lshr i192 %r164, 32
+%r169 = trunc i192 %r168 to i32
+%r171 = getelementptr i32, i32* %r1, i32 1
+store i32 %r169, i32* %r171
+%r172 = lshr i192 %r168, 32
+%r173 = trunc i192 %r172 to i32
+%r175 = getelementptr i32, i32* %r1, i32 2
+store i32 %r173, i32* %r175
+%r176 = lshr i192 %r172, 32
+%r177 = trunc i192 %r176 to i32
+%r179 = getelementptr i32, i32* %r1, i32 3
+store i32 %r177, i32* %r179
+%r180 = lshr i192 %r176, 32
+%r181 = trunc i192 %r180 to i32
+%r183 = getelementptr i32, i32* %r1, i32 4
+store i32 %r181, i32* %r183
+%r184 = lshr i192 %r180, 32
+%r185 = trunc i192 %r184 to i32
+%r187 = getelementptr i32, i32* %r1, i32 5
+store i32 %r185, i32* %r187
+%r188 = lshr i416 %r163, 192
+%r189 = trunc i416 %r188 to i192
+%r190 = lshr i416 %r163, 384
+%r191 = trunc i416 %r190 to i1
+%r192 = load i32, i32* %r4
+%r193 = zext i32 %r192 to i64
+%r195 = getelementptr i32, i32* %r4, i32 1
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i64
+%r198 = shl i64 %r197, 32
+%r199 = or i64 %r193, %r198
+%r200 = zext i64 %r199 to i96
+%r202 = getelementptr i32, i32* %r4, i32 2
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i96
+%r205 = shl i96 %r204, 64
+%r206 = or i96 %r200, %r205
+%r207 = zext i96 %r206 to i128
+%r209 = getelementptr i32, i32* %r4, i32 3
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i128
+%r212 = shl i128 %r211, 96
+%r213 = or i128 %r207, %r212
+%r214 = zext i128 %r213 to i160
+%r216 = getelementptr i32, i32* %r4, i32 4
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i160
+%r219 = shl i160 %r218, 128
+%r220 = or i160 %r214, %r219
+%r221 = zext i160 %r220 to i192
+%r223 = getelementptr i32, i32* %r4, i32 5
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i192
+%r226 = shl i192 %r225, 160
+%r227 = or i192 %r221, %r226
+%r229 = select i1 %r191, i192 %r227, i192 0
+%r230 = add i192 %r189, %r229
+%r232 = getelementptr i32, i32* %r1, i32 6
+%r233 = trunc i192 %r230 to i32
+%r235 = getelementptr i32, i32* %r232, i32 0
+store i32 %r233, i32* %r235
+%r236 = lshr i192 %r230, 32
+%r237 = trunc i192 %r236 to i32
+%r239 = getelementptr i32, i32* %r232, i32 1
+store i32 %r237, i32* %r239
+%r240 = lshr i192 %r236, 32
+%r241 = trunc i192 %r240 to i32
+%r243 = getelementptr i32, i32* %r232, i32 2
+store i32 %r241, i32* %r243
+%r244 = lshr i192 %r240, 32
+%r245 = trunc i192 %r244 to i32
+%r247 = getelementptr i32, i32* %r232, i32 3
+store i32 %r245, i32* %r247
+%r248 = lshr i192 %r244, 32
+%r249 = trunc i192 %r248 to i32
+%r251 = getelementptr i32, i32* %r232, i32 4
+store i32 %r249, i32* %r251
+%r252 = lshr i192 %r248, 32
+%r253 = trunc i192 %r252 to i32
+%r255 = getelementptr i32, i32* %r232, i32 5
+store i32 %r253, i32* %r255
+ret void
+}
+define i256 @mulPv224x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r32 = zext i32 %r6 to i64
+%r33 = zext i32 %r10 to i64
+%r34 = shl i64 %r33, 32
+%r35 = or i64 %r32, %r34
+%r36 = zext i64 %r35 to i96
+%r37 = zext i32 %r14 to i96
+%r38 = shl i96 %r37, 64
+%r39 = or i96 %r36, %r38
+%r40 = zext i96 %r39 to i128
+%r41 = zext i32 %r18 to i128
+%r42 = shl i128 %r41, 96
+%r43 = or i128 %r40, %r42
+%r44 = zext i128 %r43 to i160
+%r45 = zext i32 %r22 to i160
+%r46 = shl i160 %r45, 128
+%r47 = or i160 %r44, %r46
+%r48 = zext i160 %r47 to i192
+%r49 = zext i32 %r26 to i192
+%r50 = shl i192 %r49, 160
+%r51 = or i192 %r48, %r50
+%r52 = zext i192 %r51 to i224
+%r53 = zext i32 %r30 to i224
+%r54 = shl i224 %r53, 192
+%r55 = or i224 %r52, %r54
+%r56 = zext i32 %r7 to i64
+%r57 = zext i32 %r11 to i64
+%r58 = shl i64 %r57, 32
+%r59 = or i64 %r56, %r58
+%r60 = zext i64 %r59 to i96
+%r61 = zext i32 %r15 to i96
+%r62 = shl i96 %r61, 64
+%r63 = or i96 %r60, %r62
+%r64 = zext i96 %r63 to i128
+%r65 = zext i32 %r19 to i128
+%r66 = shl i128 %r65, 96
+%r67 = or i128 %r64, %r66
+%r68 = zext i128 %r67 to i160
+%r69 = zext i32 %r23 to i160
+%r70 = shl i160 %r69, 128
+%r71 = or i160 %r68, %r70
+%r72 = zext i160 %r71 to i192
+%r73 = zext i32 %r27 to i192
+%r74 = shl i192 %r73, 160
+%r75 = or i192 %r72, %r74
+%r76 = zext i192 %r75 to i224
+%r77 = zext i32 %r31 to i224
+%r78 = shl i224 %r77, 192
+%r79 = or i224 %r76, %r78
+%r80 = zext i224 %r55 to i256
+%r81 = zext i224 %r79 to i256
+%r82 = shl i256 %r81, 32
+%r83 = add i256 %r80, %r82
+ret i256 %r83
+}
+define void @mcl_fp_mulUnitPre7L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i256 @mulPv224x32(i32* %r2, i32 %r3)
+%r5 = trunc i256 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i256 %r4, 32
+%r9 = trunc i256 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i256 %r8, 32
+%r13 = trunc i256 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i256 %r12, 32
+%r17 = trunc i256 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i256 %r16, 32
+%r21 = trunc i256 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i256 %r20, 32
+%r25 = trunc i256 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i256 %r24, 32
+%r29 = trunc i256 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i256 %r28, 32
+%r33 = trunc i256 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+ret void
+}
+define void @mcl_fpDbl_mulPre7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i256 @mulPv224x32(i32* %r2, i32 %r4)
+%r6 = trunc i256 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i256 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i256 @mulPv224x32(i32* %r2, i32 %r10)
+%r12 = add i256 %r7, %r11
+%r13 = trunc i256 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+%r16 = lshr i256 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i256 @mulPv224x32(i32* %r2, i32 %r19)
+%r21 = add i256 %r16, %r20
+%r22 = trunc i256 %r21 to i32
+%r24 = getelementptr i32, i32* %r1, i32 2
+store i32 %r22, i32* %r24
+%r25 = lshr i256 %r21, 32
+%r27 = getelementptr i32, i32* %r3, i32 3
+%r28 = load i32, i32* %r27
+%r29 = call i256 @mulPv224x32(i32* %r2, i32 %r28)
+%r30 = add i256 %r25, %r29
+%r31 = trunc i256 %r30 to i32
+%r33 = getelementptr i32, i32* %r1, i32 3
+store i32 %r31, i32* %r33
+%r34 = lshr i256 %r30, 32
+%r36 = getelementptr i32, i32* %r3, i32 4
+%r37 = load i32, i32* %r36
+%r38 = call i256 @mulPv224x32(i32* %r2, i32 %r37)
+%r39 = add i256 %r34, %r38
+%r40 = trunc i256 %r39 to i32
+%r42 = getelementptr i32, i32* %r1, i32 4
+store i32 %r40, i32* %r42
+%r43 = lshr i256 %r39, 32
+%r45 = getelementptr i32, i32* %r3, i32 5
+%r46 = load i32, i32* %r45
+%r47 = call i256 @mulPv224x32(i32* %r2, i32 %r46)
+%r48 = add i256 %r43, %r47
+%r49 = trunc i256 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 5
+store i32 %r49, i32* %r51
+%r52 = lshr i256 %r48, 32
+%r54 = getelementptr i32, i32* %r3, i32 6
+%r55 = load i32, i32* %r54
+%r56 = call i256 @mulPv224x32(i32* %r2, i32 %r55)
+%r57 = add i256 %r52, %r56
+%r59 = getelementptr i32, i32* %r1, i32 6
+%r60 = trunc i256 %r57 to i32
+%r62 = getelementptr i32, i32* %r59, i32 0
+store i32 %r60, i32* %r62
+%r63 = lshr i256 %r57, 32
+%r64 = trunc i256 %r63 to i32
+%r66 = getelementptr i32, i32* %r59, i32 1
+store i32 %r64, i32* %r66
+%r67 = lshr i256 %r63, 32
+%r68 = trunc i256 %r67 to i32
+%r70 = getelementptr i32, i32* %r59, i32 2
+store i32 %r68, i32* %r70
+%r71 = lshr i256 %r67, 32
+%r72 = trunc i256 %r71 to i32
+%r74 = getelementptr i32, i32* %r59, i32 3
+store i32 %r72, i32* %r74
+%r75 = lshr i256 %r71, 32
+%r76 = trunc i256 %r75 to i32
+%r78 = getelementptr i32, i32* %r59, i32 4
+store i32 %r76, i32* %r78
+%r79 = lshr i256 %r75, 32
+%r80 = trunc i256 %r79 to i32
+%r82 = getelementptr i32, i32* %r59, i32 5
+store i32 %r80, i32* %r82
+%r83 = lshr i256 %r79, 32
+%r84 = trunc i256 %r83 to i32
+%r86 = getelementptr i32, i32* %r59, i32 6
+store i32 %r84, i32* %r86
+%r87 = lshr i256 %r83, 32
+%r88 = trunc i256 %r87 to i32
+%r90 = getelementptr i32, i32* %r59, i32 7
+store i32 %r88, i32* %r90
+ret void
+}
+define void @mcl_fpDbl_sqrPre7L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i256 @mulPv224x32(i32* %r2, i32 %r3)
+%r5 = trunc i256 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i256 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i256 @mulPv224x32(i32* %r2, i32 %r9)
+%r11 = add i256 %r6, %r10
+%r12 = trunc i256 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+%r15 = lshr i256 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i256 @mulPv224x32(i32* %r2, i32 %r18)
+%r20 = add i256 %r15, %r19
+%r21 = trunc i256 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 2
+store i32 %r21, i32* %r23
+%r24 = lshr i256 %r20, 32
+%r26 = getelementptr i32, i32* %r2, i32 3
+%r27 = load i32, i32* %r26
+%r28 = call i256 @mulPv224x32(i32* %r2, i32 %r27)
+%r29 = add i256 %r24, %r28
+%r30 = trunc i256 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 3
+store i32 %r30, i32* %r32
+%r33 = lshr i256 %r29, 32
+%r35 = getelementptr i32, i32* %r2, i32 4
+%r36 = load i32, i32* %r35
+%r37 = call i256 @mulPv224x32(i32* %r2, i32 %r36)
+%r38 = add i256 %r33, %r37
+%r39 = trunc i256 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 4
+store i32 %r39, i32* %r41
+%r42 = lshr i256 %r38, 32
+%r44 = getelementptr i32, i32* %r2, i32 5
+%r45 = load i32, i32* %r44
+%r46 = call i256 @mulPv224x32(i32* %r2, i32 %r45)
+%r47 = add i256 %r42, %r46
+%r48 = trunc i256 %r47 to i32
+%r50 = getelementptr i32, i32* %r1, i32 5
+store i32 %r48, i32* %r50
+%r51 = lshr i256 %r47, 32
+%r53 = getelementptr i32, i32* %r2, i32 6
+%r54 = load i32, i32* %r53
+%r55 = call i256 @mulPv224x32(i32* %r2, i32 %r54)
+%r56 = add i256 %r51, %r55
+%r58 = getelementptr i32, i32* %r1, i32 6
+%r59 = trunc i256 %r56 to i32
+%r61 = getelementptr i32, i32* %r58, i32 0
+store i32 %r59, i32* %r61
+%r62 = lshr i256 %r56, 32
+%r63 = trunc i256 %r62 to i32
+%r65 = getelementptr i32, i32* %r58, i32 1
+store i32 %r63, i32* %r65
+%r66 = lshr i256 %r62, 32
+%r67 = trunc i256 %r66 to i32
+%r69 = getelementptr i32, i32* %r58, i32 2
+store i32 %r67, i32* %r69
+%r70 = lshr i256 %r66, 32
+%r71 = trunc i256 %r70 to i32
+%r73 = getelementptr i32, i32* %r58, i32 3
+store i32 %r71, i32* %r73
+%r74 = lshr i256 %r70, 32
+%r75 = trunc i256 %r74 to i32
+%r77 = getelementptr i32, i32* %r58, i32 4
+store i32 %r75, i32* %r77
+%r78 = lshr i256 %r74, 32
+%r79 = trunc i256 %r78 to i32
+%r81 = getelementptr i32, i32* %r58, i32 5
+store i32 %r79, i32* %r81
+%r82 = lshr i256 %r78, 32
+%r83 = trunc i256 %r82 to i32
+%r85 = getelementptr i32, i32* %r58, i32 6
+store i32 %r83, i32* %r85
+%r86 = lshr i256 %r82, 32
+%r87 = trunc i256 %r86 to i32
+%r89 = getelementptr i32, i32* %r58, i32 7
+store i32 %r87, i32* %r89
+ret void
+}
+define void @mcl_fp_mont7L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i256 @mulPv224x32(i32* %r2, i32 %r10)
+%r12 = zext i256 %r11 to i288
+%r13 = trunc i256 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i256 @mulPv224x32(i32* %r4, i32 %r14)
+%r16 = zext i256 %r15 to i288
+%r17 = add i288 %r12, %r16
+%r18 = lshr i288 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i256 @mulPv224x32(i32* %r2, i32 %r21)
+%r23 = zext i256 %r22 to i288
+%r24 = add i288 %r18, %r23
+%r25 = trunc i288 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i256 @mulPv224x32(i32* %r4, i32 %r26)
+%r28 = zext i256 %r27 to i288
+%r29 = add i288 %r24, %r28
+%r30 = lshr i288 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i256 @mulPv224x32(i32* %r2, i32 %r33)
+%r35 = zext i256 %r34 to i288
+%r36 = add i288 %r30, %r35
+%r37 = trunc i288 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i256 @mulPv224x32(i32* %r4, i32 %r38)
+%r40 = zext i256 %r39 to i288
+%r41 = add i288 %r36, %r40
+%r42 = lshr i288 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i256 @mulPv224x32(i32* %r2, i32 %r45)
+%r47 = zext i256 %r46 to i288
+%r48 = add i288 %r42, %r47
+%r49 = trunc i288 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i256 @mulPv224x32(i32* %r4, i32 %r50)
+%r52 = zext i256 %r51 to i288
+%r53 = add i288 %r48, %r52
+%r54 = lshr i288 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i256 @mulPv224x32(i32* %r2, i32 %r57)
+%r59 = zext i256 %r58 to i288
+%r60 = add i288 %r54, %r59
+%r61 = trunc i288 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i256 @mulPv224x32(i32* %r4, i32 %r62)
+%r64 = zext i256 %r63 to i288
+%r65 = add i288 %r60, %r64
+%r66 = lshr i288 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i256 @mulPv224x32(i32* %r2, i32 %r69)
+%r71 = zext i256 %r70 to i288
+%r72 = add i288 %r66, %r71
+%r73 = trunc i288 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i256 @mulPv224x32(i32* %r4, i32 %r74)
+%r76 = zext i256 %r75 to i288
+%r77 = add i288 %r72, %r76
+%r78 = lshr i288 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i256 @mulPv224x32(i32* %r2, i32 %r81)
+%r83 = zext i256 %r82 to i288
+%r84 = add i288 %r78, %r83
+%r85 = trunc i288 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i256 @mulPv224x32(i32* %r4, i32 %r86)
+%r88 = zext i256 %r87 to i288
+%r89 = add i288 %r84, %r88
+%r90 = lshr i288 %r89, 32
+%r91 = trunc i288 %r90 to i256
+%r92 = load i32, i32* %r4
+%r93 = zext i32 %r92 to i64
+%r95 = getelementptr i32, i32* %r4, i32 1
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i64
+%r98 = shl i64 %r97, 32
+%r99 = or i64 %r93, %r98
+%r100 = zext i64 %r99 to i96
+%r102 = getelementptr i32, i32* %r4, i32 2
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i96
+%r105 = shl i96 %r104, 64
+%r106 = or i96 %r100, %r105
+%r107 = zext i96 %r106 to i128
+%r109 = getelementptr i32, i32* %r4, i32 3
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i128
+%r112 = shl i128 %r111, 96
+%r113 = or i128 %r107, %r112
+%r114 = zext i128 %r113 to i160
+%r116 = getelementptr i32, i32* %r4, i32 4
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i160
+%r119 = shl i160 %r118, 128
+%r120 = or i160 %r114, %r119
+%r121 = zext i160 %r120 to i192
+%r123 = getelementptr i32, i32* %r4, i32 5
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i192
+%r126 = shl i192 %r125, 160
+%r127 = or i192 %r121, %r126
+%r128 = zext i192 %r127 to i224
+%r130 = getelementptr i32, i32* %r4, i32 6
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i224
+%r133 = shl i224 %r132, 192
+%r134 = or i224 %r128, %r133
+%r135 = zext i224 %r134 to i256
+%r136 = sub i256 %r91, %r135
+%r137 = lshr i256 %r136, 224
+%r138 = trunc i256 %r137 to i1
+%r139 = select i1 %r138, i256 %r91, i256 %r136
+%r140 = trunc i256 %r139 to i224
+%r141 = trunc i224 %r140 to i32
+%r143 = getelementptr i32, i32* %r1, i32 0
+store i32 %r141, i32* %r143
+%r144 = lshr i224 %r140, 32
+%r145 = trunc i224 %r144 to i32
+%r147 = getelementptr i32, i32* %r1, i32 1
+store i32 %r145, i32* %r147
+%r148 = lshr i224 %r144, 32
+%r149 = trunc i224 %r148 to i32
+%r151 = getelementptr i32, i32* %r1, i32 2
+store i32 %r149, i32* %r151
+%r152 = lshr i224 %r148, 32
+%r153 = trunc i224 %r152 to i32
+%r155 = getelementptr i32, i32* %r1, i32 3
+store i32 %r153, i32* %r155
+%r156 = lshr i224 %r152, 32
+%r157 = trunc i224 %r156 to i32
+%r159 = getelementptr i32, i32* %r1, i32 4
+store i32 %r157, i32* %r159
+%r160 = lshr i224 %r156, 32
+%r161 = trunc i224 %r160 to i32
+%r163 = getelementptr i32, i32* %r1, i32 5
+store i32 %r161, i32* %r163
+%r164 = lshr i224 %r160, 32
+%r165 = trunc i224 %r164 to i32
+%r167 = getelementptr i32, i32* %r1, i32 6
+store i32 %r165, i32* %r167
+ret void
+}
+define void @mcl_fp_montNF7L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i256 @mulPv224x32(i32* %r2, i32 %r8)
+%r10 = trunc i256 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i256 @mulPv224x32(i32* %r4, i32 %r11)
+%r13 = add i256 %r9, %r12
+%r14 = lshr i256 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i256 @mulPv224x32(i32* %r2, i32 %r17)
+%r19 = add i256 %r14, %r18
+%r20 = trunc i256 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i256 @mulPv224x32(i32* %r4, i32 %r21)
+%r23 = add i256 %r19, %r22
+%r24 = lshr i256 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i256 @mulPv224x32(i32* %r2, i32 %r27)
+%r29 = add i256 %r24, %r28
+%r30 = trunc i256 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i256 @mulPv224x32(i32* %r4, i32 %r31)
+%r33 = add i256 %r29, %r32
+%r34 = lshr i256 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i256 @mulPv224x32(i32* %r2, i32 %r37)
+%r39 = add i256 %r34, %r38
+%r40 = trunc i256 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i256 @mulPv224x32(i32* %r4, i32 %r41)
+%r43 = add i256 %r39, %r42
+%r44 = lshr i256 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i256 @mulPv224x32(i32* %r2, i32 %r47)
+%r49 = add i256 %r44, %r48
+%r50 = trunc i256 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i256 @mulPv224x32(i32* %r4, i32 %r51)
+%r53 = add i256 %r49, %r52
+%r54 = lshr i256 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i256 @mulPv224x32(i32* %r2, i32 %r57)
+%r59 = add i256 %r54, %r58
+%r60 = trunc i256 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i256 @mulPv224x32(i32* %r4, i32 %r61)
+%r63 = add i256 %r59, %r62
+%r64 = lshr i256 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i256 @mulPv224x32(i32* %r2, i32 %r67)
+%r69 = add i256 %r64, %r68
+%r70 = trunc i256 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i256 @mulPv224x32(i32* %r4, i32 %r71)
+%r73 = add i256 %r69, %r72
+%r74 = lshr i256 %r73, 32
+%r75 = trunc i256 %r74 to i224
+%r76 = load i32, i32* %r4
+%r77 = zext i32 %r76 to i64
+%r79 = getelementptr i32, i32* %r4, i32 1
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i64
+%r82 = shl i64 %r81, 32
+%r83 = or i64 %r77, %r82
+%r84 = zext i64 %r83 to i96
+%r86 = getelementptr i32, i32* %r4, i32 2
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i96
+%r89 = shl i96 %r88, 64
+%r90 = or i96 %r84, %r89
+%r91 = zext i96 %r90 to i128
+%r93 = getelementptr i32, i32* %r4, i32 3
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i128
+%r96 = shl i128 %r95, 96
+%r97 = or i128 %r91, %r96
+%r98 = zext i128 %r97 to i160
+%r100 = getelementptr i32, i32* %r4, i32 4
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i160
+%r103 = shl i160 %r102, 128
+%r104 = or i160 %r98, %r103
+%r105 = zext i160 %r104 to i192
+%r107 = getelementptr i32, i32* %r4, i32 5
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i192
+%r110 = shl i192 %r109, 160
+%r111 = or i192 %r105, %r110
+%r112 = zext i192 %r111 to i224
+%r114 = getelementptr i32, i32* %r4, i32 6
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i224
+%r117 = shl i224 %r116, 192
+%r118 = or i224 %r112, %r117
+%r119 = sub i224 %r75, %r118
+%r120 = lshr i224 %r119, 223
+%r121 = trunc i224 %r120 to i1
+%r122 = select i1 %r121, i224 %r75, i224 %r119
+%r123 = trunc i224 %r122 to i32
+%r125 = getelementptr i32, i32* %r1, i32 0
+store i32 %r123, i32* %r125
+%r126 = lshr i224 %r122, 32
+%r127 = trunc i224 %r126 to i32
+%r129 = getelementptr i32, i32* %r1, i32 1
+store i32 %r127, i32* %r129
+%r130 = lshr i224 %r126, 32
+%r131 = trunc i224 %r130 to i32
+%r133 = getelementptr i32, i32* %r1, i32 2
+store i32 %r131, i32* %r133
+%r134 = lshr i224 %r130, 32
+%r135 = trunc i224 %r134 to i32
+%r137 = getelementptr i32, i32* %r1, i32 3
+store i32 %r135, i32* %r137
+%r138 = lshr i224 %r134, 32
+%r139 = trunc i224 %r138 to i32
+%r141 = getelementptr i32, i32* %r1, i32 4
+store i32 %r139, i32* %r141
+%r142 = lshr i224 %r138, 32
+%r143 = trunc i224 %r142 to i32
+%r145 = getelementptr i32, i32* %r1, i32 5
+store i32 %r143, i32* %r145
+%r146 = lshr i224 %r142, 32
+%r147 = trunc i224 %r146 to i32
+%r149 = getelementptr i32, i32* %r1, i32 6
+store i32 %r147, i32* %r149
+ret void
+}
+define void @mcl_fp_montRed7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = load i32, i32* %r2
+%r51 = zext i32 %r50 to i64
+%r53 = getelementptr i32, i32* %r2, i32 1
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i64
+%r56 = shl i64 %r55, 32
+%r57 = or i64 %r51, %r56
+%r58 = zext i64 %r57 to i96
+%r60 = getelementptr i32, i32* %r2, i32 2
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i96
+%r63 = shl i96 %r62, 64
+%r64 = or i96 %r58, %r63
+%r65 = zext i96 %r64 to i128
+%r67 = getelementptr i32, i32* %r2, i32 3
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i128
+%r70 = shl i128 %r69, 96
+%r71 = or i128 %r65, %r70
+%r72 = zext i128 %r71 to i160
+%r74 = getelementptr i32, i32* %r2, i32 4
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i160
+%r77 = shl i160 %r76, 128
+%r78 = or i160 %r72, %r77
+%r79 = zext i160 %r78 to i192
+%r81 = getelementptr i32, i32* %r2, i32 5
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i192
+%r84 = shl i192 %r83, 160
+%r85 = or i192 %r79, %r84
+%r86 = zext i192 %r85 to i224
+%r88 = getelementptr i32, i32* %r2, i32 6
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i224
+%r91 = shl i224 %r90, 192
+%r92 = or i224 %r86, %r91
+%r93 = zext i224 %r92 to i256
+%r95 = getelementptr i32, i32* %r2, i32 7
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i256
+%r98 = shl i256 %r97, 224
+%r99 = or i256 %r93, %r98
+%r100 = zext i256 %r99 to i288
+%r102 = getelementptr i32, i32* %r2, i32 8
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i288
+%r105 = shl i288 %r104, 256
+%r106 = or i288 %r100, %r105
+%r107 = zext i288 %r106 to i320
+%r109 = getelementptr i32, i32* %r2, i32 9
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i320
+%r112 = shl i320 %r111, 288
+%r113 = or i320 %r107, %r112
+%r114 = zext i320 %r113 to i352
+%r116 = getelementptr i32, i32* %r2, i32 10
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i352
+%r119 = shl i352 %r118, 320
+%r120 = or i352 %r114, %r119
+%r121 = zext i352 %r120 to i384
+%r123 = getelementptr i32, i32* %r2, i32 11
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i384
+%r126 = shl i384 %r125, 352
+%r127 = or i384 %r121, %r126
+%r128 = zext i384 %r127 to i416
+%r130 = getelementptr i32, i32* %r2, i32 12
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i416
+%r133 = shl i416 %r132, 384
+%r134 = or i416 %r128, %r133
+%r135 = zext i416 %r134 to i448
+%r137 = getelementptr i32, i32* %r2, i32 13
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i448
+%r140 = shl i448 %r139, 416
+%r141 = or i448 %r135, %r140
+%r142 = zext i448 %r141 to i480
+%r143 = trunc i480 %r142 to i32
+%r144 = mul i32 %r143, %r6
+%r145 = call i256 @mulPv224x32(i32* %r3, i32 %r144)
+%r146 = zext i256 %r145 to i480
+%r147 = add i480 %r142, %r146
+%r148 = lshr i480 %r147, 32
+%r149 = trunc i480 %r148 to i448
+%r150 = trunc i448 %r149 to i32
+%r151 = mul i32 %r150, %r6
+%r152 = call i256 @mulPv224x32(i32* %r3, i32 %r151)
+%r153 = zext i256 %r152 to i448
+%r154 = add i448 %r149, %r153
+%r155 = lshr i448 %r154, 32
+%r156 = trunc i448 %r155 to i416
+%r157 = trunc i416 %r156 to i32
+%r158 = mul i32 %r157, %r6
+%r159 = call i256 @mulPv224x32(i32* %r3, i32 %r158)
+%r160 = zext i256 %r159 to i416
+%r161 = add i416 %r156, %r160
+%r162 = lshr i416 %r161, 32
+%r163 = trunc i416 %r162 to i384
+%r164 = trunc i384 %r163 to i32
+%r165 = mul i32 %r164, %r6
+%r166 = call i256 @mulPv224x32(i32* %r3, i32 %r165)
+%r167 = zext i256 %r166 to i384
+%r168 = add i384 %r163, %r167
+%r169 = lshr i384 %r168, 32
+%r170 = trunc i384 %r169 to i352
+%r171 = trunc i352 %r170 to i32
+%r172 = mul i32 %r171, %r6
+%r173 = call i256 @mulPv224x32(i32* %r3, i32 %r172)
+%r174 = zext i256 %r173 to i352
+%r175 = add i352 %r170, %r174
+%r176 = lshr i352 %r175, 32
+%r177 = trunc i352 %r176 to i320
+%r178 = trunc i320 %r177 to i32
+%r179 = mul i32 %r178, %r6
+%r180 = call i256 @mulPv224x32(i32* %r3, i32 %r179)
+%r181 = zext i256 %r180 to i320
+%r182 = add i320 %r177, %r181
+%r183 = lshr i320 %r182, 32
+%r184 = trunc i320 %r183 to i288
+%r185 = trunc i288 %r184 to i32
+%r186 = mul i32 %r185, %r6
+%r187 = call i256 @mulPv224x32(i32* %r3, i32 %r186)
+%r188 = zext i256 %r187 to i288
+%r189 = add i288 %r184, %r188
+%r190 = lshr i288 %r189, 32
+%r191 = trunc i288 %r190 to i256
+%r192 = zext i224 %r49 to i256
+%r193 = sub i256 %r191, %r192
+%r194 = lshr i256 %r193, 224
+%r195 = trunc i256 %r194 to i1
+%r196 = select i1 %r195, i256 %r191, i256 %r193
+%r197 = trunc i256 %r196 to i224
+%r198 = trunc i224 %r197 to i32
+%r200 = getelementptr i32, i32* %r1, i32 0
+store i32 %r198, i32* %r200
+%r201 = lshr i224 %r197, 32
+%r202 = trunc i224 %r201 to i32
+%r204 = getelementptr i32, i32* %r1, i32 1
+store i32 %r202, i32* %r204
+%r205 = lshr i224 %r201, 32
+%r206 = trunc i224 %r205 to i32
+%r208 = getelementptr i32, i32* %r1, i32 2
+store i32 %r206, i32* %r208
+%r209 = lshr i224 %r205, 32
+%r210 = trunc i224 %r209 to i32
+%r212 = getelementptr i32, i32* %r1, i32 3
+store i32 %r210, i32* %r212
+%r213 = lshr i224 %r209, 32
+%r214 = trunc i224 %r213 to i32
+%r216 = getelementptr i32, i32* %r1, i32 4
+store i32 %r214, i32* %r216
+%r217 = lshr i224 %r213, 32
+%r218 = trunc i224 %r217 to i32
+%r220 = getelementptr i32, i32* %r1, i32 5
+store i32 %r218, i32* %r220
+%r221 = lshr i224 %r217, 32
+%r222 = trunc i224 %r221 to i32
+%r224 = getelementptr i32, i32* %r1, i32 6
+store i32 %r222, i32* %r224
+ret void
+}
+define i32 @mcl_fp_addPre7L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r49 = load i32, i32* %r4
+%r50 = zext i32 %r49 to i64
+%r52 = getelementptr i32, i32* %r4, i32 1
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i64
+%r55 = shl i64 %r54, 32
+%r56 = or i64 %r50, %r55
+%r57 = zext i64 %r56 to i96
+%r59 = getelementptr i32, i32* %r4, i32 2
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i96
+%r62 = shl i96 %r61, 64
+%r63 = or i96 %r57, %r62
+%r64 = zext i96 %r63 to i128
+%r66 = getelementptr i32, i32* %r4, i32 3
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i128
+%r69 = shl i128 %r68, 96
+%r70 = or i128 %r64, %r69
+%r71 = zext i128 %r70 to i160
+%r73 = getelementptr i32, i32* %r4, i32 4
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i160
+%r76 = shl i160 %r75, 128
+%r77 = or i160 %r71, %r76
+%r78 = zext i160 %r77 to i192
+%r80 = getelementptr i32, i32* %r4, i32 5
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i192
+%r83 = shl i192 %r82, 160
+%r84 = or i192 %r78, %r83
+%r85 = zext i192 %r84 to i224
+%r87 = getelementptr i32, i32* %r4, i32 6
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i224
+%r90 = shl i224 %r89, 192
+%r91 = or i224 %r85, %r90
+%r92 = zext i224 %r91 to i256
+%r93 = add i256 %r48, %r92
+%r94 = trunc i256 %r93 to i224
+%r95 = trunc i224 %r94 to i32
+%r97 = getelementptr i32, i32* %r2, i32 0
+store i32 %r95, i32* %r97
+%r98 = lshr i224 %r94, 32
+%r99 = trunc i224 %r98 to i32
+%r101 = getelementptr i32, i32* %r2, i32 1
+store i32 %r99, i32* %r101
+%r102 = lshr i224 %r98, 32
+%r103 = trunc i224 %r102 to i32
+%r105 = getelementptr i32, i32* %r2, i32 2
+store i32 %r103, i32* %r105
+%r106 = lshr i224 %r102, 32
+%r107 = trunc i224 %r106 to i32
+%r109 = getelementptr i32, i32* %r2, i32 3
+store i32 %r107, i32* %r109
+%r110 = lshr i224 %r106, 32
+%r111 = trunc i224 %r110 to i32
+%r113 = getelementptr i32, i32* %r2, i32 4
+store i32 %r111, i32* %r113
+%r114 = lshr i224 %r110, 32
+%r115 = trunc i224 %r114 to i32
+%r117 = getelementptr i32, i32* %r2, i32 5
+store i32 %r115, i32* %r117
+%r118 = lshr i224 %r114, 32
+%r119 = trunc i224 %r118 to i32
+%r121 = getelementptr i32, i32* %r2, i32 6
+store i32 %r119, i32* %r121
+%r122 = lshr i256 %r93, 224
+%r123 = trunc i256 %r122 to i32
+ret i32 %r123
+}
+define i32 @mcl_fp_subPre7L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r49 = load i32, i32* %r4
+%r50 = zext i32 %r49 to i64
+%r52 = getelementptr i32, i32* %r4, i32 1
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i64
+%r55 = shl i64 %r54, 32
+%r56 = or i64 %r50, %r55
+%r57 = zext i64 %r56 to i96
+%r59 = getelementptr i32, i32* %r4, i32 2
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i96
+%r62 = shl i96 %r61, 64
+%r63 = or i96 %r57, %r62
+%r64 = zext i96 %r63 to i128
+%r66 = getelementptr i32, i32* %r4, i32 3
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i128
+%r69 = shl i128 %r68, 96
+%r70 = or i128 %r64, %r69
+%r71 = zext i128 %r70 to i160
+%r73 = getelementptr i32, i32* %r4, i32 4
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i160
+%r76 = shl i160 %r75, 128
+%r77 = or i160 %r71, %r76
+%r78 = zext i160 %r77 to i192
+%r80 = getelementptr i32, i32* %r4, i32 5
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i192
+%r83 = shl i192 %r82, 160
+%r84 = or i192 %r78, %r83
+%r85 = zext i192 %r84 to i224
+%r87 = getelementptr i32, i32* %r4, i32 6
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i224
+%r90 = shl i224 %r89, 192
+%r91 = or i224 %r85, %r90
+%r92 = zext i224 %r91 to i256
+%r93 = sub i256 %r48, %r92
+%r94 = trunc i256 %r93 to i224
+%r95 = trunc i224 %r94 to i32
+%r97 = getelementptr i32, i32* %r2, i32 0
+store i32 %r95, i32* %r97
+%r98 = lshr i224 %r94, 32
+%r99 = trunc i224 %r98 to i32
+%r101 = getelementptr i32, i32* %r2, i32 1
+store i32 %r99, i32* %r101
+%r102 = lshr i224 %r98, 32
+%r103 = trunc i224 %r102 to i32
+%r105 = getelementptr i32, i32* %r2, i32 2
+store i32 %r103, i32* %r105
+%r106 = lshr i224 %r102, 32
+%r107 = trunc i224 %r106 to i32
+%r109 = getelementptr i32, i32* %r2, i32 3
+store i32 %r107, i32* %r109
+%r110 = lshr i224 %r106, 32
+%r111 = trunc i224 %r110 to i32
+%r113 = getelementptr i32, i32* %r2, i32 4
+store i32 %r111, i32* %r113
+%r114 = lshr i224 %r110, 32
+%r115 = trunc i224 %r114 to i32
+%r117 = getelementptr i32, i32* %r2, i32 5
+store i32 %r115, i32* %r117
+%r118 = lshr i224 %r114, 32
+%r119 = trunc i224 %r118 to i32
+%r121 = getelementptr i32, i32* %r2, i32 6
+store i32 %r119, i32* %r121
+%r122 = lshr i256 %r93, 224
+%r123 = trunc i256 %r122 to i32
+%r125 = and i32 %r123, 1
+ret i32 %r125
+}
+define void @mcl_fp_shr1_7L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = lshr i224 %r45, 1
+%r47 = trunc i224 %r46 to i32
+%r49 = getelementptr i32, i32* %r1, i32 0
+store i32 %r47, i32* %r49
+%r50 = lshr i224 %r46, 32
+%r51 = trunc i224 %r50 to i32
+%r53 = getelementptr i32, i32* %r1, i32 1
+store i32 %r51, i32* %r53
+%r54 = lshr i224 %r50, 32
+%r55 = trunc i224 %r54 to i32
+%r57 = getelementptr i32, i32* %r1, i32 2
+store i32 %r55, i32* %r57
+%r58 = lshr i224 %r54, 32
+%r59 = trunc i224 %r58 to i32
+%r61 = getelementptr i32, i32* %r1, i32 3
+store i32 %r59, i32* %r61
+%r62 = lshr i224 %r58, 32
+%r63 = trunc i224 %r62 to i32
+%r65 = getelementptr i32, i32* %r1, i32 4
+store i32 %r63, i32* %r65
+%r66 = lshr i224 %r62, 32
+%r67 = trunc i224 %r66 to i32
+%r69 = getelementptr i32, i32* %r1, i32 5
+store i32 %r67, i32* %r69
+%r70 = lshr i224 %r66, 32
+%r71 = trunc i224 %r70 to i32
+%r73 = getelementptr i32, i32* %r1, i32 6
+store i32 %r71, i32* %r73
+ret void
+}
+define void @mcl_fp_add7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = load i32, i32* %r3
+%r49 = zext i32 %r48 to i64
+%r51 = getelementptr i32, i32* %r3, i32 1
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i64
+%r54 = shl i64 %r53, 32
+%r55 = or i64 %r49, %r54
+%r56 = zext i64 %r55 to i96
+%r58 = getelementptr i32, i32* %r3, i32 2
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i96
+%r61 = shl i96 %r60, 64
+%r62 = or i96 %r56, %r61
+%r63 = zext i96 %r62 to i128
+%r65 = getelementptr i32, i32* %r3, i32 3
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i128
+%r68 = shl i128 %r67, 96
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i160
+%r72 = getelementptr i32, i32* %r3, i32 4
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i160
+%r75 = shl i160 %r74, 128
+%r76 = or i160 %r70, %r75
+%r77 = zext i160 %r76 to i192
+%r79 = getelementptr i32, i32* %r3, i32 5
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r77, %r82
+%r84 = zext i192 %r83 to i224
+%r86 = getelementptr i32, i32* %r3, i32 6
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i224
+%r89 = shl i224 %r88, 192
+%r90 = or i224 %r84, %r89
+%r91 = zext i224 %r47 to i256
+%r92 = zext i224 %r90 to i256
+%r93 = add i256 %r91, %r92
+%r94 = trunc i256 %r93 to i224
+%r95 = trunc i224 %r94 to i32
+%r97 = getelementptr i32, i32* %r1, i32 0
+store i32 %r95, i32* %r97
+%r98 = lshr i224 %r94, 32
+%r99 = trunc i224 %r98 to i32
+%r101 = getelementptr i32, i32* %r1, i32 1
+store i32 %r99, i32* %r101
+%r102 = lshr i224 %r98, 32
+%r103 = trunc i224 %r102 to i32
+%r105 = getelementptr i32, i32* %r1, i32 2
+store i32 %r103, i32* %r105
+%r106 = lshr i224 %r102, 32
+%r107 = trunc i224 %r106 to i32
+%r109 = getelementptr i32, i32* %r1, i32 3
+store i32 %r107, i32* %r109
+%r110 = lshr i224 %r106, 32
+%r111 = trunc i224 %r110 to i32
+%r113 = getelementptr i32, i32* %r1, i32 4
+store i32 %r111, i32* %r113
+%r114 = lshr i224 %r110, 32
+%r115 = trunc i224 %r114 to i32
+%r117 = getelementptr i32, i32* %r1, i32 5
+store i32 %r115, i32* %r117
+%r118 = lshr i224 %r114, 32
+%r119 = trunc i224 %r118 to i32
+%r121 = getelementptr i32, i32* %r1, i32 6
+store i32 %r119, i32* %r121
+%r122 = load i32, i32* %r4
+%r123 = zext i32 %r122 to i64
+%r125 = getelementptr i32, i32* %r4, i32 1
+%r126 = load i32, i32* %r125
+%r127 = zext i32 %r126 to i64
+%r128 = shl i64 %r127, 32
+%r129 = or i64 %r123, %r128
+%r130 = zext i64 %r129 to i96
+%r132 = getelementptr i32, i32* %r4, i32 2
+%r133 = load i32, i32* %r132
+%r134 = zext i32 %r133 to i96
+%r135 = shl i96 %r134, 64
+%r136 = or i96 %r130, %r135
+%r137 = zext i96 %r136 to i128
+%r139 = getelementptr i32, i32* %r4, i32 3
+%r140 = load i32, i32* %r139
+%r141 = zext i32 %r140 to i128
+%r142 = shl i128 %r141, 96
+%r143 = or i128 %r137, %r142
+%r144 = zext i128 %r143 to i160
+%r146 = getelementptr i32, i32* %r4, i32 4
+%r147 = load i32, i32* %r146
+%r148 = zext i32 %r147 to i160
+%r149 = shl i160 %r148, 128
+%r150 = or i160 %r144, %r149
+%r151 = zext i160 %r150 to i192
+%r153 = getelementptr i32, i32* %r4, i32 5
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i192
+%r156 = shl i192 %r155, 160
+%r157 = or i192 %r151, %r156
+%r158 = zext i192 %r157 to i224
+%r160 = getelementptr i32, i32* %r4, i32 6
+%r161 = load i32, i32* %r160
+%r162 = zext i32 %r161 to i224
+%r163 = shl i224 %r162, 192
+%r164 = or i224 %r158, %r163
+%r165 = zext i224 %r164 to i256
+%r166 = sub i256 %r93, %r165
+%r167 = lshr i256 %r166, 224
+%r168 = trunc i256 %r167 to i1
+br i1%r168, label %carry, label %nocarry
+nocarry:
+%r169 = trunc i256 %r166 to i224
+%r170 = trunc i224 %r169 to i32
+%r172 = getelementptr i32, i32* %r1, i32 0
+store i32 %r170, i32* %r172
+%r173 = lshr i224 %r169, 32
+%r174 = trunc i224 %r173 to i32
+%r176 = getelementptr i32, i32* %r1, i32 1
+store i32 %r174, i32* %r176
+%r177 = lshr i224 %r173, 32
+%r178 = trunc i224 %r177 to i32
+%r180 = getelementptr i32, i32* %r1, i32 2
+store i32 %r178, i32* %r180
+%r181 = lshr i224 %r177, 32
+%r182 = trunc i224 %r181 to i32
+%r184 = getelementptr i32, i32* %r1, i32 3
+store i32 %r182, i32* %r184
+%r185 = lshr i224 %r181, 32
+%r186 = trunc i224 %r185 to i32
+%r188 = getelementptr i32, i32* %r1, i32 4
+store i32 %r186, i32* %r188
+%r189 = lshr i224 %r185, 32
+%r190 = trunc i224 %r189 to i32
+%r192 = getelementptr i32, i32* %r1, i32 5
+store i32 %r190, i32* %r192
+%r193 = lshr i224 %r189, 32
+%r194 = trunc i224 %r193 to i32
+%r196 = getelementptr i32, i32* %r1, i32 6
+store i32 %r194, i32* %r196
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = load i32, i32* %r3
+%r49 = zext i32 %r48 to i64
+%r51 = getelementptr i32, i32* %r3, i32 1
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i64
+%r54 = shl i64 %r53, 32
+%r55 = or i64 %r49, %r54
+%r56 = zext i64 %r55 to i96
+%r58 = getelementptr i32, i32* %r3, i32 2
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i96
+%r61 = shl i96 %r60, 64
+%r62 = or i96 %r56, %r61
+%r63 = zext i96 %r62 to i128
+%r65 = getelementptr i32, i32* %r3, i32 3
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i128
+%r68 = shl i128 %r67, 96
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i160
+%r72 = getelementptr i32, i32* %r3, i32 4
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i160
+%r75 = shl i160 %r74, 128
+%r76 = or i160 %r70, %r75
+%r77 = zext i160 %r76 to i192
+%r79 = getelementptr i32, i32* %r3, i32 5
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r77, %r82
+%r84 = zext i192 %r83 to i224
+%r86 = getelementptr i32, i32* %r3, i32 6
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i224
+%r89 = shl i224 %r88, 192
+%r90 = or i224 %r84, %r89
+%r91 = add i224 %r47, %r90
+%r92 = load i32, i32* %r4
+%r93 = zext i32 %r92 to i64
+%r95 = getelementptr i32, i32* %r4, i32 1
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i64
+%r98 = shl i64 %r97, 32
+%r99 = or i64 %r93, %r98
+%r100 = zext i64 %r99 to i96
+%r102 = getelementptr i32, i32* %r4, i32 2
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i96
+%r105 = shl i96 %r104, 64
+%r106 = or i96 %r100, %r105
+%r107 = zext i96 %r106 to i128
+%r109 = getelementptr i32, i32* %r4, i32 3
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i128
+%r112 = shl i128 %r111, 96
+%r113 = or i128 %r107, %r112
+%r114 = zext i128 %r113 to i160
+%r116 = getelementptr i32, i32* %r4, i32 4
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i160
+%r119 = shl i160 %r118, 128
+%r120 = or i160 %r114, %r119
+%r121 = zext i160 %r120 to i192
+%r123 = getelementptr i32, i32* %r4, i32 5
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i192
+%r126 = shl i192 %r125, 160
+%r127 = or i192 %r121, %r126
+%r128 = zext i192 %r127 to i224
+%r130 = getelementptr i32, i32* %r4, i32 6
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i224
+%r133 = shl i224 %r132, 192
+%r134 = or i224 %r128, %r133
+%r135 = sub i224 %r91, %r134
+%r136 = lshr i224 %r135, 223
+%r137 = trunc i224 %r136 to i1
+%r138 = select i1 %r137, i224 %r91, i224 %r135
+%r139 = trunc i224 %r138 to i32
+%r141 = getelementptr i32, i32* %r1, i32 0
+store i32 %r139, i32* %r141
+%r142 = lshr i224 %r138, 32
+%r143 = trunc i224 %r142 to i32
+%r145 = getelementptr i32, i32* %r1, i32 1
+store i32 %r143, i32* %r145
+%r146 = lshr i224 %r142, 32
+%r147 = trunc i224 %r146 to i32
+%r149 = getelementptr i32, i32* %r1, i32 2
+store i32 %r147, i32* %r149
+%r150 = lshr i224 %r146, 32
+%r151 = trunc i224 %r150 to i32
+%r153 = getelementptr i32, i32* %r1, i32 3
+store i32 %r151, i32* %r153
+%r154 = lshr i224 %r150, 32
+%r155 = trunc i224 %r154 to i32
+%r157 = getelementptr i32, i32* %r1, i32 4
+store i32 %r155, i32* %r157
+%r158 = lshr i224 %r154, 32
+%r159 = trunc i224 %r158 to i32
+%r161 = getelementptr i32, i32* %r1, i32 5
+store i32 %r159, i32* %r161
+%r162 = lshr i224 %r158, 32
+%r163 = trunc i224 %r162 to i32
+%r165 = getelementptr i32, i32* %r1, i32 6
+store i32 %r163, i32* %r165
+ret void
+}
+define void @mcl_fp_sub7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = load i32, i32* %r3
+%r49 = zext i32 %r48 to i64
+%r51 = getelementptr i32, i32* %r3, i32 1
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i64
+%r54 = shl i64 %r53, 32
+%r55 = or i64 %r49, %r54
+%r56 = zext i64 %r55 to i96
+%r58 = getelementptr i32, i32* %r3, i32 2
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i96
+%r61 = shl i96 %r60, 64
+%r62 = or i96 %r56, %r61
+%r63 = zext i96 %r62 to i128
+%r65 = getelementptr i32, i32* %r3, i32 3
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i128
+%r68 = shl i128 %r67, 96
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i160
+%r72 = getelementptr i32, i32* %r3, i32 4
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i160
+%r75 = shl i160 %r74, 128
+%r76 = or i160 %r70, %r75
+%r77 = zext i160 %r76 to i192
+%r79 = getelementptr i32, i32* %r3, i32 5
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r77, %r82
+%r84 = zext i192 %r83 to i224
+%r86 = getelementptr i32, i32* %r3, i32 6
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i224
+%r89 = shl i224 %r88, 192
+%r90 = or i224 %r84, %r89
+%r91 = zext i224 %r47 to i256
+%r92 = zext i224 %r90 to i256
+%r93 = sub i256 %r91, %r92
+%r94 = trunc i256 %r93 to i224
+%r95 = lshr i256 %r93, 224
+%r96 = trunc i256 %r95 to i1
+%r97 = trunc i224 %r94 to i32
+%r99 = getelementptr i32, i32* %r1, i32 0
+store i32 %r97, i32* %r99
+%r100 = lshr i224 %r94, 32
+%r101 = trunc i224 %r100 to i32
+%r103 = getelementptr i32, i32* %r1, i32 1
+store i32 %r101, i32* %r103
+%r104 = lshr i224 %r100, 32
+%r105 = trunc i224 %r104 to i32
+%r107 = getelementptr i32, i32* %r1, i32 2
+store i32 %r105, i32* %r107
+%r108 = lshr i224 %r104, 32
+%r109 = trunc i224 %r108 to i32
+%r111 = getelementptr i32, i32* %r1, i32 3
+store i32 %r109, i32* %r111
+%r112 = lshr i224 %r108, 32
+%r113 = trunc i224 %r112 to i32
+%r115 = getelementptr i32, i32* %r1, i32 4
+store i32 %r113, i32* %r115
+%r116 = lshr i224 %r112, 32
+%r117 = trunc i224 %r116 to i32
+%r119 = getelementptr i32, i32* %r1, i32 5
+store i32 %r117, i32* %r119
+%r120 = lshr i224 %r116, 32
+%r121 = trunc i224 %r120 to i32
+%r123 = getelementptr i32, i32* %r1, i32 6
+store i32 %r121, i32* %r123
+br i1%r96, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r124 = load i32, i32* %r4
+%r125 = zext i32 %r124 to i64
+%r127 = getelementptr i32, i32* %r4, i32 1
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i64
+%r130 = shl i64 %r129, 32
+%r131 = or i64 %r125, %r130
+%r132 = zext i64 %r131 to i96
+%r134 = getelementptr i32, i32* %r4, i32 2
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i96
+%r137 = shl i96 %r136, 64
+%r138 = or i96 %r132, %r137
+%r139 = zext i96 %r138 to i128
+%r141 = getelementptr i32, i32* %r4, i32 3
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i128
+%r144 = shl i128 %r143, 96
+%r145 = or i128 %r139, %r144
+%r146 = zext i128 %r145 to i160
+%r148 = getelementptr i32, i32* %r4, i32 4
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i160
+%r151 = shl i160 %r150, 128
+%r152 = or i160 %r146, %r151
+%r153 = zext i160 %r152 to i192
+%r155 = getelementptr i32, i32* %r4, i32 5
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i192
+%r158 = shl i192 %r157, 160
+%r159 = or i192 %r153, %r158
+%r160 = zext i192 %r159 to i224
+%r162 = getelementptr i32, i32* %r4, i32 6
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i224
+%r165 = shl i224 %r164, 192
+%r166 = or i224 %r160, %r165
+%r167 = add i224 %r94, %r166
+%r168 = trunc i224 %r167 to i32
+%r170 = getelementptr i32, i32* %r1, i32 0
+store i32 %r168, i32* %r170
+%r171 = lshr i224 %r167, 32
+%r172 = trunc i224 %r171 to i32
+%r174 = getelementptr i32, i32* %r1, i32 1
+store i32 %r172, i32* %r174
+%r175 = lshr i224 %r171, 32
+%r176 = trunc i224 %r175 to i32
+%r178 = getelementptr i32, i32* %r1, i32 2
+store i32 %r176, i32* %r178
+%r179 = lshr i224 %r175, 32
+%r180 = trunc i224 %r179 to i32
+%r182 = getelementptr i32, i32* %r1, i32 3
+store i32 %r180, i32* %r182
+%r183 = lshr i224 %r179, 32
+%r184 = trunc i224 %r183 to i32
+%r186 = getelementptr i32, i32* %r1, i32 4
+store i32 %r184, i32* %r186
+%r187 = lshr i224 %r183, 32
+%r188 = trunc i224 %r187 to i32
+%r190 = getelementptr i32, i32* %r1, i32 5
+store i32 %r188, i32* %r190
+%r191 = lshr i224 %r187, 32
+%r192 = trunc i224 %r191 to i32
+%r194 = getelementptr i32, i32* %r1, i32 6
+store i32 %r192, i32* %r194
+ret void
+}
+define void @mcl_fp_subNF7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = load i32, i32* %r3
+%r49 = zext i32 %r48 to i64
+%r51 = getelementptr i32, i32* %r3, i32 1
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i64
+%r54 = shl i64 %r53, 32
+%r55 = or i64 %r49, %r54
+%r56 = zext i64 %r55 to i96
+%r58 = getelementptr i32, i32* %r3, i32 2
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i96
+%r61 = shl i96 %r60, 64
+%r62 = or i96 %r56, %r61
+%r63 = zext i96 %r62 to i128
+%r65 = getelementptr i32, i32* %r3, i32 3
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i128
+%r68 = shl i128 %r67, 96
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i160
+%r72 = getelementptr i32, i32* %r3, i32 4
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i160
+%r75 = shl i160 %r74, 128
+%r76 = or i160 %r70, %r75
+%r77 = zext i160 %r76 to i192
+%r79 = getelementptr i32, i32* %r3, i32 5
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r77, %r82
+%r84 = zext i192 %r83 to i224
+%r86 = getelementptr i32, i32* %r3, i32 6
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i224
+%r89 = shl i224 %r88, 192
+%r90 = or i224 %r84, %r89
+%r91 = sub i224 %r47, %r90
+%r92 = lshr i224 %r91, 223
+%r93 = trunc i224 %r92 to i1
+%r94 = load i32, i32* %r4
+%r95 = zext i32 %r94 to i64
+%r97 = getelementptr i32, i32* %r4, i32 1
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i64
+%r100 = shl i64 %r99, 32
+%r101 = or i64 %r95, %r100
+%r102 = zext i64 %r101 to i96
+%r104 = getelementptr i32, i32* %r4, i32 2
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i96
+%r107 = shl i96 %r106, 64
+%r108 = or i96 %r102, %r107
+%r109 = zext i96 %r108 to i128
+%r111 = getelementptr i32, i32* %r4, i32 3
+%r112 = load i32, i32* %r111
+%r113 = zext i32 %r112 to i128
+%r114 = shl i128 %r113, 96
+%r115 = or i128 %r109, %r114
+%r116 = zext i128 %r115 to i160
+%r118 = getelementptr i32, i32* %r4, i32 4
+%r119 = load i32, i32* %r118
+%r120 = zext i32 %r119 to i160
+%r121 = shl i160 %r120, 128
+%r122 = or i160 %r116, %r121
+%r123 = zext i160 %r122 to i192
+%r125 = getelementptr i32, i32* %r4, i32 5
+%r126 = load i32, i32* %r125
+%r127 = zext i32 %r126 to i192
+%r128 = shl i192 %r127, 160
+%r129 = or i192 %r123, %r128
+%r130 = zext i192 %r129 to i224
+%r132 = getelementptr i32, i32* %r4, i32 6
+%r133 = load i32, i32* %r132
+%r134 = zext i32 %r133 to i224
+%r135 = shl i224 %r134, 192
+%r136 = or i224 %r130, %r135
+%r138 = select i1 %r93, i224 %r136, i224 0
+%r139 = add i224 %r91, %r138
+%r140 = trunc i224 %r139 to i32
+%r142 = getelementptr i32, i32* %r1, i32 0
+store i32 %r140, i32* %r142
+%r143 = lshr i224 %r139, 32
+%r144 = trunc i224 %r143 to i32
+%r146 = getelementptr i32, i32* %r1, i32 1
+store i32 %r144, i32* %r146
+%r147 = lshr i224 %r143, 32
+%r148 = trunc i224 %r147 to i32
+%r150 = getelementptr i32, i32* %r1, i32 2
+store i32 %r148, i32* %r150
+%r151 = lshr i224 %r147, 32
+%r152 = trunc i224 %r151 to i32
+%r154 = getelementptr i32, i32* %r1, i32 3
+store i32 %r152, i32* %r154
+%r155 = lshr i224 %r151, 32
+%r156 = trunc i224 %r155 to i32
+%r158 = getelementptr i32, i32* %r1, i32 4
+store i32 %r156, i32* %r158
+%r159 = lshr i224 %r155, 32
+%r160 = trunc i224 %r159 to i32
+%r162 = getelementptr i32, i32* %r1, i32 5
+store i32 %r160, i32* %r162
+%r163 = lshr i224 %r159, 32
+%r164 = trunc i224 %r163 to i32
+%r166 = getelementptr i32, i32* %r1, i32 6
+store i32 %r164, i32* %r166
+ret void
+}
+define void @mcl_fpDbl_add7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = load i32, i32* %r3
+%r98 = zext i32 %r97 to i64
+%r100 = getelementptr i32, i32* %r3, i32 1
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i64
+%r103 = shl i64 %r102, 32
+%r104 = or i64 %r98, %r103
+%r105 = zext i64 %r104 to i96
+%r107 = getelementptr i32, i32* %r3, i32 2
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i96
+%r110 = shl i96 %r109, 64
+%r111 = or i96 %r105, %r110
+%r112 = zext i96 %r111 to i128
+%r114 = getelementptr i32, i32* %r3, i32 3
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i128
+%r117 = shl i128 %r116, 96
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i160
+%r121 = getelementptr i32, i32* %r3, i32 4
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i160
+%r124 = shl i160 %r123, 128
+%r125 = or i160 %r119, %r124
+%r126 = zext i160 %r125 to i192
+%r128 = getelementptr i32, i32* %r3, i32 5
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i192
+%r131 = shl i192 %r130, 160
+%r132 = or i192 %r126, %r131
+%r133 = zext i192 %r132 to i224
+%r135 = getelementptr i32, i32* %r3, i32 6
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i224
+%r138 = shl i224 %r137, 192
+%r139 = or i224 %r133, %r138
+%r140 = zext i224 %r139 to i256
+%r142 = getelementptr i32, i32* %r3, i32 7
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i256
+%r145 = shl i256 %r144, 224
+%r146 = or i256 %r140, %r145
+%r147 = zext i256 %r146 to i288
+%r149 = getelementptr i32, i32* %r3, i32 8
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i288
+%r152 = shl i288 %r151, 256
+%r153 = or i288 %r147, %r152
+%r154 = zext i288 %r153 to i320
+%r156 = getelementptr i32, i32* %r3, i32 9
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i320
+%r159 = shl i320 %r158, 288
+%r160 = or i320 %r154, %r159
+%r161 = zext i320 %r160 to i352
+%r163 = getelementptr i32, i32* %r3, i32 10
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i352
+%r166 = shl i352 %r165, 320
+%r167 = or i352 %r161, %r166
+%r168 = zext i352 %r167 to i384
+%r170 = getelementptr i32, i32* %r3, i32 11
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i384
+%r173 = shl i384 %r172, 352
+%r174 = or i384 %r168, %r173
+%r175 = zext i384 %r174 to i416
+%r177 = getelementptr i32, i32* %r3, i32 12
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i416
+%r180 = shl i416 %r179, 384
+%r181 = or i416 %r175, %r180
+%r182 = zext i416 %r181 to i448
+%r184 = getelementptr i32, i32* %r3, i32 13
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i448
+%r187 = shl i448 %r186, 416
+%r188 = or i448 %r182, %r187
+%r189 = zext i448 %r96 to i480
+%r190 = zext i448 %r188 to i480
+%r191 = add i480 %r189, %r190
+%r192 = trunc i480 %r191 to i224
+%r193 = trunc i224 %r192 to i32
+%r195 = getelementptr i32, i32* %r1, i32 0
+store i32 %r193, i32* %r195
+%r196 = lshr i224 %r192, 32
+%r197 = trunc i224 %r196 to i32
+%r199 = getelementptr i32, i32* %r1, i32 1
+store i32 %r197, i32* %r199
+%r200 = lshr i224 %r196, 32
+%r201 = trunc i224 %r200 to i32
+%r203 = getelementptr i32, i32* %r1, i32 2
+store i32 %r201, i32* %r203
+%r204 = lshr i224 %r200, 32
+%r205 = trunc i224 %r204 to i32
+%r207 = getelementptr i32, i32* %r1, i32 3
+store i32 %r205, i32* %r207
+%r208 = lshr i224 %r204, 32
+%r209 = trunc i224 %r208 to i32
+%r211 = getelementptr i32, i32* %r1, i32 4
+store i32 %r209, i32* %r211
+%r212 = lshr i224 %r208, 32
+%r213 = trunc i224 %r212 to i32
+%r215 = getelementptr i32, i32* %r1, i32 5
+store i32 %r213, i32* %r215
+%r216 = lshr i224 %r212, 32
+%r217 = trunc i224 %r216 to i32
+%r219 = getelementptr i32, i32* %r1, i32 6
+store i32 %r217, i32* %r219
+%r220 = lshr i480 %r191, 224
+%r221 = trunc i480 %r220 to i256
+%r222 = load i32, i32* %r4
+%r223 = zext i32 %r222 to i64
+%r225 = getelementptr i32, i32* %r4, i32 1
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i64
+%r228 = shl i64 %r227, 32
+%r229 = or i64 %r223, %r228
+%r230 = zext i64 %r229 to i96
+%r232 = getelementptr i32, i32* %r4, i32 2
+%r233 = load i32, i32* %r232
+%r234 = zext i32 %r233 to i96
+%r235 = shl i96 %r234, 64
+%r236 = or i96 %r230, %r235
+%r237 = zext i96 %r236 to i128
+%r239 = getelementptr i32, i32* %r4, i32 3
+%r240 = load i32, i32* %r239
+%r241 = zext i32 %r240 to i128
+%r242 = shl i128 %r241, 96
+%r243 = or i128 %r237, %r242
+%r244 = zext i128 %r243 to i160
+%r246 = getelementptr i32, i32* %r4, i32 4
+%r247 = load i32, i32* %r246
+%r248 = zext i32 %r247 to i160
+%r249 = shl i160 %r248, 128
+%r250 = or i160 %r244, %r249
+%r251 = zext i160 %r250 to i192
+%r253 = getelementptr i32, i32* %r4, i32 5
+%r254 = load i32, i32* %r253
+%r255 = zext i32 %r254 to i192
+%r256 = shl i192 %r255, 160
+%r257 = or i192 %r251, %r256
+%r258 = zext i192 %r257 to i224
+%r260 = getelementptr i32, i32* %r4, i32 6
+%r261 = load i32, i32* %r260
+%r262 = zext i32 %r261 to i224
+%r263 = shl i224 %r262, 192
+%r264 = or i224 %r258, %r263
+%r265 = zext i224 %r264 to i256
+%r266 = sub i256 %r221, %r265
+%r267 = lshr i256 %r266, 224
+%r268 = trunc i256 %r267 to i1
+%r269 = select i1 %r268, i256 %r221, i256 %r266
+%r270 = trunc i256 %r269 to i224
+%r272 = getelementptr i32, i32* %r1, i32 7
+%r273 = trunc i224 %r270 to i32
+%r275 = getelementptr i32, i32* %r272, i32 0
+store i32 %r273, i32* %r275
+%r276 = lshr i224 %r270, 32
+%r277 = trunc i224 %r276 to i32
+%r279 = getelementptr i32, i32* %r272, i32 1
+store i32 %r277, i32* %r279
+%r280 = lshr i224 %r276, 32
+%r281 = trunc i224 %r280 to i32
+%r283 = getelementptr i32, i32* %r272, i32 2
+store i32 %r281, i32* %r283
+%r284 = lshr i224 %r280, 32
+%r285 = trunc i224 %r284 to i32
+%r287 = getelementptr i32, i32* %r272, i32 3
+store i32 %r285, i32* %r287
+%r288 = lshr i224 %r284, 32
+%r289 = trunc i224 %r288 to i32
+%r291 = getelementptr i32, i32* %r272, i32 4
+store i32 %r289, i32* %r291
+%r292 = lshr i224 %r288, 32
+%r293 = trunc i224 %r292 to i32
+%r295 = getelementptr i32, i32* %r272, i32 5
+store i32 %r293, i32* %r295
+%r296 = lshr i224 %r292, 32
+%r297 = trunc i224 %r296 to i32
+%r299 = getelementptr i32, i32* %r272, i32 6
+store i32 %r297, i32* %r299
+ret void
+}
+define void @mcl_fpDbl_sub7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = load i32, i32* %r3
+%r98 = zext i32 %r97 to i64
+%r100 = getelementptr i32, i32* %r3, i32 1
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i64
+%r103 = shl i64 %r102, 32
+%r104 = or i64 %r98, %r103
+%r105 = zext i64 %r104 to i96
+%r107 = getelementptr i32, i32* %r3, i32 2
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i96
+%r110 = shl i96 %r109, 64
+%r111 = or i96 %r105, %r110
+%r112 = zext i96 %r111 to i128
+%r114 = getelementptr i32, i32* %r3, i32 3
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i128
+%r117 = shl i128 %r116, 96
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i160
+%r121 = getelementptr i32, i32* %r3, i32 4
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i160
+%r124 = shl i160 %r123, 128
+%r125 = or i160 %r119, %r124
+%r126 = zext i160 %r125 to i192
+%r128 = getelementptr i32, i32* %r3, i32 5
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i192
+%r131 = shl i192 %r130, 160
+%r132 = or i192 %r126, %r131
+%r133 = zext i192 %r132 to i224
+%r135 = getelementptr i32, i32* %r3, i32 6
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i224
+%r138 = shl i224 %r137, 192
+%r139 = or i224 %r133, %r138
+%r140 = zext i224 %r139 to i256
+%r142 = getelementptr i32, i32* %r3, i32 7
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i256
+%r145 = shl i256 %r144, 224
+%r146 = or i256 %r140, %r145
+%r147 = zext i256 %r146 to i288
+%r149 = getelementptr i32, i32* %r3, i32 8
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i288
+%r152 = shl i288 %r151, 256
+%r153 = or i288 %r147, %r152
+%r154 = zext i288 %r153 to i320
+%r156 = getelementptr i32, i32* %r3, i32 9
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i320
+%r159 = shl i320 %r158, 288
+%r160 = or i320 %r154, %r159
+%r161 = zext i320 %r160 to i352
+%r163 = getelementptr i32, i32* %r3, i32 10
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i352
+%r166 = shl i352 %r165, 320
+%r167 = or i352 %r161, %r166
+%r168 = zext i352 %r167 to i384
+%r170 = getelementptr i32, i32* %r3, i32 11
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i384
+%r173 = shl i384 %r172, 352
+%r174 = or i384 %r168, %r173
+%r175 = zext i384 %r174 to i416
+%r177 = getelementptr i32, i32* %r3, i32 12
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i416
+%r180 = shl i416 %r179, 384
+%r181 = or i416 %r175, %r180
+%r182 = zext i416 %r181 to i448
+%r184 = getelementptr i32, i32* %r3, i32 13
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i448
+%r187 = shl i448 %r186, 416
+%r188 = or i448 %r182, %r187
+%r189 = zext i448 %r96 to i480
+%r190 = zext i448 %r188 to i480
+%r191 = sub i480 %r189, %r190
+%r192 = trunc i480 %r191 to i224
+%r193 = trunc i224 %r192 to i32
+%r195 = getelementptr i32, i32* %r1, i32 0
+store i32 %r193, i32* %r195
+%r196 = lshr i224 %r192, 32
+%r197 = trunc i224 %r196 to i32
+%r199 = getelementptr i32, i32* %r1, i32 1
+store i32 %r197, i32* %r199
+%r200 = lshr i224 %r196, 32
+%r201 = trunc i224 %r200 to i32
+%r203 = getelementptr i32, i32* %r1, i32 2
+store i32 %r201, i32* %r203
+%r204 = lshr i224 %r200, 32
+%r205 = trunc i224 %r204 to i32
+%r207 = getelementptr i32, i32* %r1, i32 3
+store i32 %r205, i32* %r207
+%r208 = lshr i224 %r204, 32
+%r209 = trunc i224 %r208 to i32
+%r211 = getelementptr i32, i32* %r1, i32 4
+store i32 %r209, i32* %r211
+%r212 = lshr i224 %r208, 32
+%r213 = trunc i224 %r212 to i32
+%r215 = getelementptr i32, i32* %r1, i32 5
+store i32 %r213, i32* %r215
+%r216 = lshr i224 %r212, 32
+%r217 = trunc i224 %r216 to i32
+%r219 = getelementptr i32, i32* %r1, i32 6
+store i32 %r217, i32* %r219
+%r220 = lshr i480 %r191, 224
+%r221 = trunc i480 %r220 to i224
+%r222 = lshr i480 %r191, 448
+%r223 = trunc i480 %r222 to i1
+%r224 = load i32, i32* %r4
+%r225 = zext i32 %r224 to i64
+%r227 = getelementptr i32, i32* %r4, i32 1
+%r228 = load i32, i32* %r227
+%r229 = zext i32 %r228 to i64
+%r230 = shl i64 %r229, 32
+%r231 = or i64 %r225, %r230
+%r232 = zext i64 %r231 to i96
+%r234 = getelementptr i32, i32* %r4, i32 2
+%r235 = load i32, i32* %r234
+%r236 = zext i32 %r235 to i96
+%r237 = shl i96 %r236, 64
+%r238 = or i96 %r232, %r237
+%r239 = zext i96 %r238 to i128
+%r241 = getelementptr i32, i32* %r4, i32 3
+%r242 = load i32, i32* %r241
+%r243 = zext i32 %r242 to i128
+%r244 = shl i128 %r243, 96
+%r245 = or i128 %r239, %r244
+%r246 = zext i128 %r245 to i160
+%r248 = getelementptr i32, i32* %r4, i32 4
+%r249 = load i32, i32* %r248
+%r250 = zext i32 %r249 to i160
+%r251 = shl i160 %r250, 128
+%r252 = or i160 %r246, %r251
+%r253 = zext i160 %r252 to i192
+%r255 = getelementptr i32, i32* %r4, i32 5
+%r256 = load i32, i32* %r255
+%r257 = zext i32 %r256 to i192
+%r258 = shl i192 %r257, 160
+%r259 = or i192 %r253, %r258
+%r260 = zext i192 %r259 to i224
+%r262 = getelementptr i32, i32* %r4, i32 6
+%r263 = load i32, i32* %r262
+%r264 = zext i32 %r263 to i224
+%r265 = shl i224 %r264, 192
+%r266 = or i224 %r260, %r265
+%r268 = select i1 %r223, i224 %r266, i224 0
+%r269 = add i224 %r221, %r268
+%r271 = getelementptr i32, i32* %r1, i32 7
+%r272 = trunc i224 %r269 to i32
+%r274 = getelementptr i32, i32* %r271, i32 0
+store i32 %r272, i32* %r274
+%r275 = lshr i224 %r269, 32
+%r276 = trunc i224 %r275 to i32
+%r278 = getelementptr i32, i32* %r271, i32 1
+store i32 %r276, i32* %r278
+%r279 = lshr i224 %r275, 32
+%r280 = trunc i224 %r279 to i32
+%r282 = getelementptr i32, i32* %r271, i32 2
+store i32 %r280, i32* %r282
+%r283 = lshr i224 %r279, 32
+%r284 = trunc i224 %r283 to i32
+%r286 = getelementptr i32, i32* %r271, i32 3
+store i32 %r284, i32* %r286
+%r287 = lshr i224 %r283, 32
+%r288 = trunc i224 %r287 to i32
+%r290 = getelementptr i32, i32* %r271, i32 4
+store i32 %r288, i32* %r290
+%r291 = lshr i224 %r287, 32
+%r292 = trunc i224 %r291 to i32
+%r294 = getelementptr i32, i32* %r271, i32 5
+store i32 %r292, i32* %r294
+%r295 = lshr i224 %r291, 32
+%r296 = trunc i224 %r295 to i32
+%r298 = getelementptr i32, i32* %r271, i32 6
+store i32 %r296, i32* %r298
+ret void
+}
+define i288 @mulPv256x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r36 = zext i32 %r6 to i64
+%r37 = zext i32 %r10 to i64
+%r38 = shl i64 %r37, 32
+%r39 = or i64 %r36, %r38
+%r40 = zext i64 %r39 to i96
+%r41 = zext i32 %r14 to i96
+%r42 = shl i96 %r41, 64
+%r43 = or i96 %r40, %r42
+%r44 = zext i96 %r43 to i128
+%r45 = zext i32 %r18 to i128
+%r46 = shl i128 %r45, 96
+%r47 = or i128 %r44, %r46
+%r48 = zext i128 %r47 to i160
+%r49 = zext i32 %r22 to i160
+%r50 = shl i160 %r49, 128
+%r51 = or i160 %r48, %r50
+%r52 = zext i160 %r51 to i192
+%r53 = zext i32 %r26 to i192
+%r54 = shl i192 %r53, 160
+%r55 = or i192 %r52, %r54
+%r56 = zext i192 %r55 to i224
+%r57 = zext i32 %r30 to i224
+%r58 = shl i224 %r57, 192
+%r59 = or i224 %r56, %r58
+%r60 = zext i224 %r59 to i256
+%r61 = zext i32 %r34 to i256
+%r62 = shl i256 %r61, 224
+%r63 = or i256 %r60, %r62
+%r64 = zext i32 %r7 to i64
+%r65 = zext i32 %r11 to i64
+%r66 = shl i64 %r65, 32
+%r67 = or i64 %r64, %r66
+%r68 = zext i64 %r67 to i96
+%r69 = zext i32 %r15 to i96
+%r70 = shl i96 %r69, 64
+%r71 = or i96 %r68, %r70
+%r72 = zext i96 %r71 to i128
+%r73 = zext i32 %r19 to i128
+%r74 = shl i128 %r73, 96
+%r75 = or i128 %r72, %r74
+%r76 = zext i128 %r75 to i160
+%r77 = zext i32 %r23 to i160
+%r78 = shl i160 %r77, 128
+%r79 = or i160 %r76, %r78
+%r80 = zext i160 %r79 to i192
+%r81 = zext i32 %r27 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r80, %r82
+%r84 = zext i192 %r83 to i224
+%r85 = zext i32 %r31 to i224
+%r86 = shl i224 %r85, 192
+%r87 = or i224 %r84, %r86
+%r88 = zext i224 %r87 to i256
+%r89 = zext i32 %r35 to i256
+%r90 = shl i256 %r89, 224
+%r91 = or i256 %r88, %r90
+%r92 = zext i256 %r63 to i288
+%r93 = zext i256 %r91 to i288
+%r94 = shl i288 %r93, 32
+%r95 = add i288 %r92, %r94
+ret i288 %r95
+}
+define void @mcl_fp_mulUnitPre8L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i288 @mulPv256x32(i32* %r2, i32 %r3)
+%r5 = trunc i288 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i288 %r4, 32
+%r9 = trunc i288 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i288 %r8, 32
+%r13 = trunc i288 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i288 %r12, 32
+%r17 = trunc i288 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i288 %r16, 32
+%r21 = trunc i288 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i288 %r20, 32
+%r25 = trunc i288 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i288 %r24, 32
+%r29 = trunc i288 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i288 %r28, 32
+%r33 = trunc i288 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+%r36 = lshr i288 %r32, 32
+%r37 = trunc i288 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 8
+store i32 %r37, i32* %r39
+ret void
+}
+define void @mcl_fpDbl_mulPre8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r2, i32 4
+%r7 = getelementptr i32, i32* %r3, i32 4
+%r9 = getelementptr i32, i32* %r1, i32 8
+call void @mcl_fpDbl_mulPre4L(i32* %r1, i32* %r2, i32* %r3)
+call void @mcl_fpDbl_mulPre4L(i32* %r9, i32* %r5, i32* %r7)
+%r10 = load i32, i32* %r5
+%r11 = zext i32 %r10 to i64
+%r13 = getelementptr i32, i32* %r5, i32 1
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i64
+%r16 = shl i64 %r15, 32
+%r17 = or i64 %r11, %r16
+%r18 = zext i64 %r17 to i96
+%r20 = getelementptr i32, i32* %r5, i32 2
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i96
+%r23 = shl i96 %r22, 64
+%r24 = or i96 %r18, %r23
+%r25 = zext i96 %r24 to i128
+%r27 = getelementptr i32, i32* %r5, i32 3
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i128
+%r30 = shl i128 %r29, 96
+%r31 = or i128 %r25, %r30
+%r32 = zext i128 %r31 to i160
+%r33 = load i32, i32* %r2
+%r34 = zext i32 %r33 to i64
+%r36 = getelementptr i32, i32* %r2, i32 1
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i64
+%r39 = shl i64 %r38, 32
+%r40 = or i64 %r34, %r39
+%r41 = zext i64 %r40 to i96
+%r43 = getelementptr i32, i32* %r2, i32 2
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i96
+%r46 = shl i96 %r45, 64
+%r47 = or i96 %r41, %r46
+%r48 = zext i96 %r47 to i128
+%r50 = getelementptr i32, i32* %r2, i32 3
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i128
+%r53 = shl i128 %r52, 96
+%r54 = or i128 %r48, %r53
+%r55 = zext i128 %r54 to i160
+%r56 = load i32, i32* %r7
+%r57 = zext i32 %r56 to i64
+%r59 = getelementptr i32, i32* %r7, i32 1
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i64
+%r62 = shl i64 %r61, 32
+%r63 = or i64 %r57, %r62
+%r64 = zext i64 %r63 to i96
+%r66 = getelementptr i32, i32* %r7, i32 2
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i96
+%r69 = shl i96 %r68, 64
+%r70 = or i96 %r64, %r69
+%r71 = zext i96 %r70 to i128
+%r73 = getelementptr i32, i32* %r7, i32 3
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i128
+%r76 = shl i128 %r75, 96
+%r77 = or i128 %r71, %r76
+%r78 = zext i128 %r77 to i160
+%r79 = load i32, i32* %r3
+%r80 = zext i32 %r79 to i64
+%r82 = getelementptr i32, i32* %r3, i32 1
+%r83 = load i32, i32* %r82
+%r84 = zext i32 %r83 to i64
+%r85 = shl i64 %r84, 32
+%r86 = or i64 %r80, %r85
+%r87 = zext i64 %r86 to i96
+%r89 = getelementptr i32, i32* %r3, i32 2
+%r90 = load i32, i32* %r89
+%r91 = zext i32 %r90 to i96
+%r92 = shl i96 %r91, 64
+%r93 = or i96 %r87, %r92
+%r94 = zext i96 %r93 to i128
+%r96 = getelementptr i32, i32* %r3, i32 3
+%r97 = load i32, i32* %r96
+%r98 = zext i32 %r97 to i128
+%r99 = shl i128 %r98, 96
+%r100 = or i128 %r94, %r99
+%r101 = zext i128 %r100 to i160
+%r102 = add i160 %r32, %r55
+%r103 = add i160 %r78, %r101
+%r105 = alloca i32, i32 8
+%r106 = trunc i160 %r102 to i128
+%r107 = trunc i160 %r103 to i128
+%r108 = lshr i160 %r102, 128
+%r109 = trunc i160 %r108 to i1
+%r110 = lshr i160 %r103, 128
+%r111 = trunc i160 %r110 to i1
+%r112 = and i1 %r109, %r111
+%r114 = select i1 %r109, i128 %r107, i128 0
+%r116 = select i1 %r111, i128 %r106, i128 0
+%r118 = alloca i32, i32 4
+%r120 = alloca i32, i32 4
+%r121 = trunc i128 %r106 to i32
+%r123 = getelementptr i32, i32* %r118, i32 0
+store i32 %r121, i32* %r123
+%r124 = lshr i128 %r106, 32
+%r125 = trunc i128 %r124 to i32
+%r127 = getelementptr i32, i32* %r118, i32 1
+store i32 %r125, i32* %r127
+%r128 = lshr i128 %r124, 32
+%r129 = trunc i128 %r128 to i32
+%r131 = getelementptr i32, i32* %r118, i32 2
+store i32 %r129, i32* %r131
+%r132 = lshr i128 %r128, 32
+%r133 = trunc i128 %r132 to i32
+%r135 = getelementptr i32, i32* %r118, i32 3
+store i32 %r133, i32* %r135
+%r136 = trunc i128 %r107 to i32
+%r138 = getelementptr i32, i32* %r120, i32 0
+store i32 %r136, i32* %r138
+%r139 = lshr i128 %r107, 32
+%r140 = trunc i128 %r139 to i32
+%r142 = getelementptr i32, i32* %r120, i32 1
+store i32 %r140, i32* %r142
+%r143 = lshr i128 %r139, 32
+%r144 = trunc i128 %r143 to i32
+%r146 = getelementptr i32, i32* %r120, i32 2
+store i32 %r144, i32* %r146
+%r147 = lshr i128 %r143, 32
+%r148 = trunc i128 %r147 to i32
+%r150 = getelementptr i32, i32* %r120, i32 3
+store i32 %r148, i32* %r150
+call void @mcl_fpDbl_mulPre4L(i32* %r105, i32* %r118, i32* %r120)
+%r151 = load i32, i32* %r105
+%r152 = zext i32 %r151 to i64
+%r154 = getelementptr i32, i32* %r105, i32 1
+%r155 = load i32, i32* %r154
+%r156 = zext i32 %r155 to i64
+%r157 = shl i64 %r156, 32
+%r158 = or i64 %r152, %r157
+%r159 = zext i64 %r158 to i96
+%r161 = getelementptr i32, i32* %r105, i32 2
+%r162 = load i32, i32* %r161
+%r163 = zext i32 %r162 to i96
+%r164 = shl i96 %r163, 64
+%r165 = or i96 %r159, %r164
+%r166 = zext i96 %r165 to i128
+%r168 = getelementptr i32, i32* %r105, i32 3
+%r169 = load i32, i32* %r168
+%r170 = zext i32 %r169 to i128
+%r171 = shl i128 %r170, 96
+%r172 = or i128 %r166, %r171
+%r173 = zext i128 %r172 to i160
+%r175 = getelementptr i32, i32* %r105, i32 4
+%r176 = load i32, i32* %r175
+%r177 = zext i32 %r176 to i160
+%r178 = shl i160 %r177, 128
+%r179 = or i160 %r173, %r178
+%r180 = zext i160 %r179 to i192
+%r182 = getelementptr i32, i32* %r105, i32 5
+%r183 = load i32, i32* %r182
+%r184 = zext i32 %r183 to i192
+%r185 = shl i192 %r184, 160
+%r186 = or i192 %r180, %r185
+%r187 = zext i192 %r186 to i224
+%r189 = getelementptr i32, i32* %r105, i32 6
+%r190 = load i32, i32* %r189
+%r191 = zext i32 %r190 to i224
+%r192 = shl i224 %r191, 192
+%r193 = or i224 %r187, %r192
+%r194 = zext i224 %r193 to i256
+%r196 = getelementptr i32, i32* %r105, i32 7
+%r197 = load i32, i32* %r196
+%r198 = zext i32 %r197 to i256
+%r199 = shl i256 %r198, 224
+%r200 = or i256 %r194, %r199
+%r201 = zext i256 %r200 to i288
+%r202 = zext i1 %r112 to i288
+%r203 = shl i288 %r202, 256
+%r204 = or i288 %r201, %r203
+%r205 = zext i128 %r114 to i288
+%r206 = zext i128 %r116 to i288
+%r207 = shl i288 %r205, 128
+%r208 = shl i288 %r206, 128
+%r209 = add i288 %r204, %r207
+%r210 = add i288 %r209, %r208
+%r211 = load i32, i32* %r1
+%r212 = zext i32 %r211 to i64
+%r214 = getelementptr i32, i32* %r1, i32 1
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i64
+%r217 = shl i64 %r216, 32
+%r218 = or i64 %r212, %r217
+%r219 = zext i64 %r218 to i96
+%r221 = getelementptr i32, i32* %r1, i32 2
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i96
+%r224 = shl i96 %r223, 64
+%r225 = or i96 %r219, %r224
+%r226 = zext i96 %r225 to i128
+%r228 = getelementptr i32, i32* %r1, i32 3
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i128
+%r231 = shl i128 %r230, 96
+%r232 = or i128 %r226, %r231
+%r233 = zext i128 %r232 to i160
+%r235 = getelementptr i32, i32* %r1, i32 4
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i160
+%r238 = shl i160 %r237, 128
+%r239 = or i160 %r233, %r238
+%r240 = zext i160 %r239 to i192
+%r242 = getelementptr i32, i32* %r1, i32 5
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i192
+%r245 = shl i192 %r244, 160
+%r246 = or i192 %r240, %r245
+%r247 = zext i192 %r246 to i224
+%r249 = getelementptr i32, i32* %r1, i32 6
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i224
+%r252 = shl i224 %r251, 192
+%r253 = or i224 %r247, %r252
+%r254 = zext i224 %r253 to i256
+%r256 = getelementptr i32, i32* %r1, i32 7
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i256
+%r259 = shl i256 %r258, 224
+%r260 = or i256 %r254, %r259
+%r261 = zext i256 %r260 to i288
+%r262 = sub i288 %r210, %r261
+%r264 = getelementptr i32, i32* %r1, i32 8
+%r265 = load i32, i32* %r264
+%r266 = zext i32 %r265 to i64
+%r268 = getelementptr i32, i32* %r264, i32 1
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i64
+%r271 = shl i64 %r270, 32
+%r272 = or i64 %r266, %r271
+%r273 = zext i64 %r272 to i96
+%r275 = getelementptr i32, i32* %r264, i32 2
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i96
+%r278 = shl i96 %r277, 64
+%r279 = or i96 %r273, %r278
+%r280 = zext i96 %r279 to i128
+%r282 = getelementptr i32, i32* %r264, i32 3
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i128
+%r285 = shl i128 %r284, 96
+%r286 = or i128 %r280, %r285
+%r287 = zext i128 %r286 to i160
+%r289 = getelementptr i32, i32* %r264, i32 4
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i160
+%r292 = shl i160 %r291, 128
+%r293 = or i160 %r287, %r292
+%r294 = zext i160 %r293 to i192
+%r296 = getelementptr i32, i32* %r264, i32 5
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i192
+%r299 = shl i192 %r298, 160
+%r300 = or i192 %r294, %r299
+%r301 = zext i192 %r300 to i224
+%r303 = getelementptr i32, i32* %r264, i32 6
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i224
+%r306 = shl i224 %r305, 192
+%r307 = or i224 %r301, %r306
+%r308 = zext i224 %r307 to i256
+%r310 = getelementptr i32, i32* %r264, i32 7
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i256
+%r313 = shl i256 %r312, 224
+%r314 = or i256 %r308, %r313
+%r315 = zext i256 %r314 to i288
+%r316 = sub i288 %r262, %r315
+%r317 = zext i288 %r316 to i384
+%r319 = getelementptr i32, i32* %r1, i32 4
+%r320 = load i32, i32* %r319
+%r321 = zext i32 %r320 to i64
+%r323 = getelementptr i32, i32* %r319, i32 1
+%r324 = load i32, i32* %r323
+%r325 = zext i32 %r324 to i64
+%r326 = shl i64 %r325, 32
+%r327 = or i64 %r321, %r326
+%r328 = zext i64 %r327 to i96
+%r330 = getelementptr i32, i32* %r319, i32 2
+%r331 = load i32, i32* %r330
+%r332 = zext i32 %r331 to i96
+%r333 = shl i96 %r332, 64
+%r334 = or i96 %r328, %r333
+%r335 = zext i96 %r334 to i128
+%r337 = getelementptr i32, i32* %r319, i32 3
+%r338 = load i32, i32* %r337
+%r339 = zext i32 %r338 to i128
+%r340 = shl i128 %r339, 96
+%r341 = or i128 %r335, %r340
+%r342 = zext i128 %r341 to i160
+%r344 = getelementptr i32, i32* %r319, i32 4
+%r345 = load i32, i32* %r344
+%r346 = zext i32 %r345 to i160
+%r347 = shl i160 %r346, 128
+%r348 = or i160 %r342, %r347
+%r349 = zext i160 %r348 to i192
+%r351 = getelementptr i32, i32* %r319, i32 5
+%r352 = load i32, i32* %r351
+%r353 = zext i32 %r352 to i192
+%r354 = shl i192 %r353, 160
+%r355 = or i192 %r349, %r354
+%r356 = zext i192 %r355 to i224
+%r358 = getelementptr i32, i32* %r319, i32 6
+%r359 = load i32, i32* %r358
+%r360 = zext i32 %r359 to i224
+%r361 = shl i224 %r360, 192
+%r362 = or i224 %r356, %r361
+%r363 = zext i224 %r362 to i256
+%r365 = getelementptr i32, i32* %r319, i32 7
+%r366 = load i32, i32* %r365
+%r367 = zext i32 %r366 to i256
+%r368 = shl i256 %r367, 224
+%r369 = or i256 %r363, %r368
+%r370 = zext i256 %r369 to i288
+%r372 = getelementptr i32, i32* %r319, i32 8
+%r373 = load i32, i32* %r372
+%r374 = zext i32 %r373 to i288
+%r375 = shl i288 %r374, 256
+%r376 = or i288 %r370, %r375
+%r377 = zext i288 %r376 to i320
+%r379 = getelementptr i32, i32* %r319, i32 9
+%r380 = load i32, i32* %r379
+%r381 = zext i32 %r380 to i320
+%r382 = shl i320 %r381, 288
+%r383 = or i320 %r377, %r382
+%r384 = zext i320 %r383 to i352
+%r386 = getelementptr i32, i32* %r319, i32 10
+%r387 = load i32, i32* %r386
+%r388 = zext i32 %r387 to i352
+%r389 = shl i352 %r388, 320
+%r390 = or i352 %r384, %r389
+%r391 = zext i352 %r390 to i384
+%r393 = getelementptr i32, i32* %r319, i32 11
+%r394 = load i32, i32* %r393
+%r395 = zext i32 %r394 to i384
+%r396 = shl i384 %r395, 352
+%r397 = or i384 %r391, %r396
+%r398 = add i384 %r317, %r397
+%r400 = getelementptr i32, i32* %r1, i32 4
+%r401 = trunc i384 %r398 to i32
+%r403 = getelementptr i32, i32* %r400, i32 0
+store i32 %r401, i32* %r403
+%r404 = lshr i384 %r398, 32
+%r405 = trunc i384 %r404 to i32
+%r407 = getelementptr i32, i32* %r400, i32 1
+store i32 %r405, i32* %r407
+%r408 = lshr i384 %r404, 32
+%r409 = trunc i384 %r408 to i32
+%r411 = getelementptr i32, i32* %r400, i32 2
+store i32 %r409, i32* %r411
+%r412 = lshr i384 %r408, 32
+%r413 = trunc i384 %r412 to i32
+%r415 = getelementptr i32, i32* %r400, i32 3
+store i32 %r413, i32* %r415
+%r416 = lshr i384 %r412, 32
+%r417 = trunc i384 %r416 to i32
+%r419 = getelementptr i32, i32* %r400, i32 4
+store i32 %r417, i32* %r419
+%r420 = lshr i384 %r416, 32
+%r421 = trunc i384 %r420 to i32
+%r423 = getelementptr i32, i32* %r400, i32 5
+store i32 %r421, i32* %r423
+%r424 = lshr i384 %r420, 32
+%r425 = trunc i384 %r424 to i32
+%r427 = getelementptr i32, i32* %r400, i32 6
+store i32 %r425, i32* %r427
+%r428 = lshr i384 %r424, 32
+%r429 = trunc i384 %r428 to i32
+%r431 = getelementptr i32, i32* %r400, i32 7
+store i32 %r429, i32* %r431
+%r432 = lshr i384 %r428, 32
+%r433 = trunc i384 %r432 to i32
+%r435 = getelementptr i32, i32* %r400, i32 8
+store i32 %r433, i32* %r435
+%r436 = lshr i384 %r432, 32
+%r437 = trunc i384 %r436 to i32
+%r439 = getelementptr i32, i32* %r400, i32 9
+store i32 %r437, i32* %r439
+%r440 = lshr i384 %r436, 32
+%r441 = trunc i384 %r440 to i32
+%r443 = getelementptr i32, i32* %r400, i32 10
+store i32 %r441, i32* %r443
+%r444 = lshr i384 %r440, 32
+%r445 = trunc i384 %r444 to i32
+%r447 = getelementptr i32, i32* %r400, i32 11
+store i32 %r445, i32* %r447
+ret void
+}
+define void @mcl_fpDbl_sqrPre8L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r4 = getelementptr i32, i32* %r2, i32 4
+%r6 = getelementptr i32, i32* %r2, i32 4
+%r8 = getelementptr i32, i32* %r1, i32 8
+call void @mcl_fpDbl_mulPre4L(i32* %r1, i32* %r2, i32* %r2)
+call void @mcl_fpDbl_mulPre4L(i32* %r8, i32* %r4, i32* %r6)
+%r9 = load i32, i32* %r4
+%r10 = zext i32 %r9 to i64
+%r12 = getelementptr i32, i32* %r4, i32 1
+%r13 = load i32, i32* %r12
+%r14 = zext i32 %r13 to i64
+%r15 = shl i64 %r14, 32
+%r16 = or i64 %r10, %r15
+%r17 = zext i64 %r16 to i96
+%r19 = getelementptr i32, i32* %r4, i32 2
+%r20 = load i32, i32* %r19
+%r21 = zext i32 %r20 to i96
+%r22 = shl i96 %r21, 64
+%r23 = or i96 %r17, %r22
+%r24 = zext i96 %r23 to i128
+%r26 = getelementptr i32, i32* %r4, i32 3
+%r27 = load i32, i32* %r26
+%r28 = zext i32 %r27 to i128
+%r29 = shl i128 %r28, 96
+%r30 = or i128 %r24, %r29
+%r31 = zext i128 %r30 to i160
+%r32 = load i32, i32* %r2
+%r33 = zext i32 %r32 to i64
+%r35 = getelementptr i32, i32* %r2, i32 1
+%r36 = load i32, i32* %r35
+%r37 = zext i32 %r36 to i64
+%r38 = shl i64 %r37, 32
+%r39 = or i64 %r33, %r38
+%r40 = zext i64 %r39 to i96
+%r42 = getelementptr i32, i32* %r2, i32 2
+%r43 = load i32, i32* %r42
+%r44 = zext i32 %r43 to i96
+%r45 = shl i96 %r44, 64
+%r46 = or i96 %r40, %r45
+%r47 = zext i96 %r46 to i128
+%r49 = getelementptr i32, i32* %r2, i32 3
+%r50 = load i32, i32* %r49
+%r51 = zext i32 %r50 to i128
+%r52 = shl i128 %r51, 96
+%r53 = or i128 %r47, %r52
+%r54 = zext i128 %r53 to i160
+%r55 = load i32, i32* %r6
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r6, i32 1
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r6, i32 2
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r6, i32 3
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r78 = load i32, i32* %r2
+%r79 = zext i32 %r78 to i64
+%r81 = getelementptr i32, i32* %r2, i32 1
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i64
+%r84 = shl i64 %r83, 32
+%r85 = or i64 %r79, %r84
+%r86 = zext i64 %r85 to i96
+%r88 = getelementptr i32, i32* %r2, i32 2
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i96
+%r91 = shl i96 %r90, 64
+%r92 = or i96 %r86, %r91
+%r93 = zext i96 %r92 to i128
+%r95 = getelementptr i32, i32* %r2, i32 3
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i128
+%r98 = shl i128 %r97, 96
+%r99 = or i128 %r93, %r98
+%r100 = zext i128 %r99 to i160
+%r101 = add i160 %r31, %r54
+%r102 = add i160 %r77, %r100
+%r104 = alloca i32, i32 8
+%r105 = trunc i160 %r101 to i128
+%r106 = trunc i160 %r102 to i128
+%r107 = lshr i160 %r101, 128
+%r108 = trunc i160 %r107 to i1
+%r109 = lshr i160 %r102, 128
+%r110 = trunc i160 %r109 to i1
+%r111 = and i1 %r108, %r110
+%r113 = select i1 %r108, i128 %r106, i128 0
+%r115 = select i1 %r110, i128 %r105, i128 0
+%r117 = alloca i32, i32 4
+%r119 = alloca i32, i32 4
+%r120 = trunc i128 %r105 to i32
+%r122 = getelementptr i32, i32* %r117, i32 0
+store i32 %r120, i32* %r122
+%r123 = lshr i128 %r105, 32
+%r124 = trunc i128 %r123 to i32
+%r126 = getelementptr i32, i32* %r117, i32 1
+store i32 %r124, i32* %r126
+%r127 = lshr i128 %r123, 32
+%r128 = trunc i128 %r127 to i32
+%r130 = getelementptr i32, i32* %r117, i32 2
+store i32 %r128, i32* %r130
+%r131 = lshr i128 %r127, 32
+%r132 = trunc i128 %r131 to i32
+%r134 = getelementptr i32, i32* %r117, i32 3
+store i32 %r132, i32* %r134
+%r135 = trunc i128 %r106 to i32
+%r137 = getelementptr i32, i32* %r119, i32 0
+store i32 %r135, i32* %r137
+%r138 = lshr i128 %r106, 32
+%r139 = trunc i128 %r138 to i32
+%r141 = getelementptr i32, i32* %r119, i32 1
+store i32 %r139, i32* %r141
+%r142 = lshr i128 %r138, 32
+%r143 = trunc i128 %r142 to i32
+%r145 = getelementptr i32, i32* %r119, i32 2
+store i32 %r143, i32* %r145
+%r146 = lshr i128 %r142, 32
+%r147 = trunc i128 %r146 to i32
+%r149 = getelementptr i32, i32* %r119, i32 3
+store i32 %r147, i32* %r149
+call void @mcl_fpDbl_mulPre4L(i32* %r104, i32* %r117, i32* %r119)
+%r150 = load i32, i32* %r104
+%r151 = zext i32 %r150 to i64
+%r153 = getelementptr i32, i32* %r104, i32 1
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i64
+%r156 = shl i64 %r155, 32
+%r157 = or i64 %r151, %r156
+%r158 = zext i64 %r157 to i96
+%r160 = getelementptr i32, i32* %r104, i32 2
+%r161 = load i32, i32* %r160
+%r162 = zext i32 %r161 to i96
+%r163 = shl i96 %r162, 64
+%r164 = or i96 %r158, %r163
+%r165 = zext i96 %r164 to i128
+%r167 = getelementptr i32, i32* %r104, i32 3
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i128
+%r170 = shl i128 %r169, 96
+%r171 = or i128 %r165, %r170
+%r172 = zext i128 %r171 to i160
+%r174 = getelementptr i32, i32* %r104, i32 4
+%r175 = load i32, i32* %r174
+%r176 = zext i32 %r175 to i160
+%r177 = shl i160 %r176, 128
+%r178 = or i160 %r172, %r177
+%r179 = zext i160 %r178 to i192
+%r181 = getelementptr i32, i32* %r104, i32 5
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i192
+%r184 = shl i192 %r183, 160
+%r185 = or i192 %r179, %r184
+%r186 = zext i192 %r185 to i224
+%r188 = getelementptr i32, i32* %r104, i32 6
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i224
+%r191 = shl i224 %r190, 192
+%r192 = or i224 %r186, %r191
+%r193 = zext i224 %r192 to i256
+%r195 = getelementptr i32, i32* %r104, i32 7
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i256
+%r198 = shl i256 %r197, 224
+%r199 = or i256 %r193, %r198
+%r200 = zext i256 %r199 to i288
+%r201 = zext i1 %r111 to i288
+%r202 = shl i288 %r201, 256
+%r203 = or i288 %r200, %r202
+%r204 = zext i128 %r113 to i288
+%r205 = zext i128 %r115 to i288
+%r206 = shl i288 %r204, 128
+%r207 = shl i288 %r205, 128
+%r208 = add i288 %r203, %r206
+%r209 = add i288 %r208, %r207
+%r210 = load i32, i32* %r1
+%r211 = zext i32 %r210 to i64
+%r213 = getelementptr i32, i32* %r1, i32 1
+%r214 = load i32, i32* %r213
+%r215 = zext i32 %r214 to i64
+%r216 = shl i64 %r215, 32
+%r217 = or i64 %r211, %r216
+%r218 = zext i64 %r217 to i96
+%r220 = getelementptr i32, i32* %r1, i32 2
+%r221 = load i32, i32* %r220
+%r222 = zext i32 %r221 to i96
+%r223 = shl i96 %r222, 64
+%r224 = or i96 %r218, %r223
+%r225 = zext i96 %r224 to i128
+%r227 = getelementptr i32, i32* %r1, i32 3
+%r228 = load i32, i32* %r227
+%r229 = zext i32 %r228 to i128
+%r230 = shl i128 %r229, 96
+%r231 = or i128 %r225, %r230
+%r232 = zext i128 %r231 to i160
+%r234 = getelementptr i32, i32* %r1, i32 4
+%r235 = load i32, i32* %r234
+%r236 = zext i32 %r235 to i160
+%r237 = shl i160 %r236, 128
+%r238 = or i160 %r232, %r237
+%r239 = zext i160 %r238 to i192
+%r241 = getelementptr i32, i32* %r1, i32 5
+%r242 = load i32, i32* %r241
+%r243 = zext i32 %r242 to i192
+%r244 = shl i192 %r243, 160
+%r245 = or i192 %r239, %r244
+%r246 = zext i192 %r245 to i224
+%r248 = getelementptr i32, i32* %r1, i32 6
+%r249 = load i32, i32* %r248
+%r250 = zext i32 %r249 to i224
+%r251 = shl i224 %r250, 192
+%r252 = or i224 %r246, %r251
+%r253 = zext i224 %r252 to i256
+%r255 = getelementptr i32, i32* %r1, i32 7
+%r256 = load i32, i32* %r255
+%r257 = zext i32 %r256 to i256
+%r258 = shl i256 %r257, 224
+%r259 = or i256 %r253, %r258
+%r260 = zext i256 %r259 to i288
+%r261 = sub i288 %r209, %r260
+%r263 = getelementptr i32, i32* %r1, i32 8
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i64
+%r267 = getelementptr i32, i32* %r263, i32 1
+%r268 = load i32, i32* %r267
+%r269 = zext i32 %r268 to i64
+%r270 = shl i64 %r269, 32
+%r271 = or i64 %r265, %r270
+%r272 = zext i64 %r271 to i96
+%r274 = getelementptr i32, i32* %r263, i32 2
+%r275 = load i32, i32* %r274
+%r276 = zext i32 %r275 to i96
+%r277 = shl i96 %r276, 64
+%r278 = or i96 %r272, %r277
+%r279 = zext i96 %r278 to i128
+%r281 = getelementptr i32, i32* %r263, i32 3
+%r282 = load i32, i32* %r281
+%r283 = zext i32 %r282 to i128
+%r284 = shl i128 %r283, 96
+%r285 = or i128 %r279, %r284
+%r286 = zext i128 %r285 to i160
+%r288 = getelementptr i32, i32* %r263, i32 4
+%r289 = load i32, i32* %r288
+%r290 = zext i32 %r289 to i160
+%r291 = shl i160 %r290, 128
+%r292 = or i160 %r286, %r291
+%r293 = zext i160 %r292 to i192
+%r295 = getelementptr i32, i32* %r263, i32 5
+%r296 = load i32, i32* %r295
+%r297 = zext i32 %r296 to i192
+%r298 = shl i192 %r297, 160
+%r299 = or i192 %r293, %r298
+%r300 = zext i192 %r299 to i224
+%r302 = getelementptr i32, i32* %r263, i32 6
+%r303 = load i32, i32* %r302
+%r304 = zext i32 %r303 to i224
+%r305 = shl i224 %r304, 192
+%r306 = or i224 %r300, %r305
+%r307 = zext i224 %r306 to i256
+%r309 = getelementptr i32, i32* %r263, i32 7
+%r310 = load i32, i32* %r309
+%r311 = zext i32 %r310 to i256
+%r312 = shl i256 %r311, 224
+%r313 = or i256 %r307, %r312
+%r314 = zext i256 %r313 to i288
+%r315 = sub i288 %r261, %r314
+%r316 = zext i288 %r315 to i384
+%r318 = getelementptr i32, i32* %r1, i32 4
+%r319 = load i32, i32* %r318
+%r320 = zext i32 %r319 to i64
+%r322 = getelementptr i32, i32* %r318, i32 1
+%r323 = load i32, i32* %r322
+%r324 = zext i32 %r323 to i64
+%r325 = shl i64 %r324, 32
+%r326 = or i64 %r320, %r325
+%r327 = zext i64 %r326 to i96
+%r329 = getelementptr i32, i32* %r318, i32 2
+%r330 = load i32, i32* %r329
+%r331 = zext i32 %r330 to i96
+%r332 = shl i96 %r331, 64
+%r333 = or i96 %r327, %r332
+%r334 = zext i96 %r333 to i128
+%r336 = getelementptr i32, i32* %r318, i32 3
+%r337 = load i32, i32* %r336
+%r338 = zext i32 %r337 to i128
+%r339 = shl i128 %r338, 96
+%r340 = or i128 %r334, %r339
+%r341 = zext i128 %r340 to i160
+%r343 = getelementptr i32, i32* %r318, i32 4
+%r344 = load i32, i32* %r343
+%r345 = zext i32 %r344 to i160
+%r346 = shl i160 %r345, 128
+%r347 = or i160 %r341, %r346
+%r348 = zext i160 %r347 to i192
+%r350 = getelementptr i32, i32* %r318, i32 5
+%r351 = load i32, i32* %r350
+%r352 = zext i32 %r351 to i192
+%r353 = shl i192 %r352, 160
+%r354 = or i192 %r348, %r353
+%r355 = zext i192 %r354 to i224
+%r357 = getelementptr i32, i32* %r318, i32 6
+%r358 = load i32, i32* %r357
+%r359 = zext i32 %r358 to i224
+%r360 = shl i224 %r359, 192
+%r361 = or i224 %r355, %r360
+%r362 = zext i224 %r361 to i256
+%r364 = getelementptr i32, i32* %r318, i32 7
+%r365 = load i32, i32* %r364
+%r366 = zext i32 %r365 to i256
+%r367 = shl i256 %r366, 224
+%r368 = or i256 %r362, %r367
+%r369 = zext i256 %r368 to i288
+%r371 = getelementptr i32, i32* %r318, i32 8
+%r372 = load i32, i32* %r371
+%r373 = zext i32 %r372 to i288
+%r374 = shl i288 %r373, 256
+%r375 = or i288 %r369, %r374
+%r376 = zext i288 %r375 to i320
+%r378 = getelementptr i32, i32* %r318, i32 9
+%r379 = load i32, i32* %r378
+%r380 = zext i32 %r379 to i320
+%r381 = shl i320 %r380, 288
+%r382 = or i320 %r376, %r381
+%r383 = zext i320 %r382 to i352
+%r385 = getelementptr i32, i32* %r318, i32 10
+%r386 = load i32, i32* %r385
+%r387 = zext i32 %r386 to i352
+%r388 = shl i352 %r387, 320
+%r389 = or i352 %r383, %r388
+%r390 = zext i352 %r389 to i384
+%r392 = getelementptr i32, i32* %r318, i32 11
+%r393 = load i32, i32* %r392
+%r394 = zext i32 %r393 to i384
+%r395 = shl i384 %r394, 352
+%r396 = or i384 %r390, %r395
+%r397 = add i384 %r316, %r396
+%r399 = getelementptr i32, i32* %r1, i32 4
+%r400 = trunc i384 %r397 to i32
+%r402 = getelementptr i32, i32* %r399, i32 0
+store i32 %r400, i32* %r402
+%r403 = lshr i384 %r397, 32
+%r404 = trunc i384 %r403 to i32
+%r406 = getelementptr i32, i32* %r399, i32 1
+store i32 %r404, i32* %r406
+%r407 = lshr i384 %r403, 32
+%r408 = trunc i384 %r407 to i32
+%r410 = getelementptr i32, i32* %r399, i32 2
+store i32 %r408, i32* %r410
+%r411 = lshr i384 %r407, 32
+%r412 = trunc i384 %r411 to i32
+%r414 = getelementptr i32, i32* %r399, i32 3
+store i32 %r412, i32* %r414
+%r415 = lshr i384 %r411, 32
+%r416 = trunc i384 %r415 to i32
+%r418 = getelementptr i32, i32* %r399, i32 4
+store i32 %r416, i32* %r418
+%r419 = lshr i384 %r415, 32
+%r420 = trunc i384 %r419 to i32
+%r422 = getelementptr i32, i32* %r399, i32 5
+store i32 %r420, i32* %r422
+%r423 = lshr i384 %r419, 32
+%r424 = trunc i384 %r423 to i32
+%r426 = getelementptr i32, i32* %r399, i32 6
+store i32 %r424, i32* %r426
+%r427 = lshr i384 %r423, 32
+%r428 = trunc i384 %r427 to i32
+%r430 = getelementptr i32, i32* %r399, i32 7
+store i32 %r428, i32* %r430
+%r431 = lshr i384 %r427, 32
+%r432 = trunc i384 %r431 to i32
+%r434 = getelementptr i32, i32* %r399, i32 8
+store i32 %r432, i32* %r434
+%r435 = lshr i384 %r431, 32
+%r436 = trunc i384 %r435 to i32
+%r438 = getelementptr i32, i32* %r399, i32 9
+store i32 %r436, i32* %r438
+%r439 = lshr i384 %r435, 32
+%r440 = trunc i384 %r439 to i32
+%r442 = getelementptr i32, i32* %r399, i32 10
+store i32 %r440, i32* %r442
+%r443 = lshr i384 %r439, 32
+%r444 = trunc i384 %r443 to i32
+%r446 = getelementptr i32, i32* %r399, i32 11
+store i32 %r444, i32* %r446
+ret void
+}
+define void @mcl_fp_mont8L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i288 @mulPv256x32(i32* %r2, i32 %r10)
+%r12 = zext i288 %r11 to i320
+%r13 = trunc i288 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i288 @mulPv256x32(i32* %r4, i32 %r14)
+%r16 = zext i288 %r15 to i320
+%r17 = add i320 %r12, %r16
+%r18 = lshr i320 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i288 @mulPv256x32(i32* %r2, i32 %r21)
+%r23 = zext i288 %r22 to i320
+%r24 = add i320 %r18, %r23
+%r25 = trunc i320 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i288 @mulPv256x32(i32* %r4, i32 %r26)
+%r28 = zext i288 %r27 to i320
+%r29 = add i320 %r24, %r28
+%r30 = lshr i320 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i288 @mulPv256x32(i32* %r2, i32 %r33)
+%r35 = zext i288 %r34 to i320
+%r36 = add i320 %r30, %r35
+%r37 = trunc i320 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i288 @mulPv256x32(i32* %r4, i32 %r38)
+%r40 = zext i288 %r39 to i320
+%r41 = add i320 %r36, %r40
+%r42 = lshr i320 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i288 @mulPv256x32(i32* %r2, i32 %r45)
+%r47 = zext i288 %r46 to i320
+%r48 = add i320 %r42, %r47
+%r49 = trunc i320 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i288 @mulPv256x32(i32* %r4, i32 %r50)
+%r52 = zext i288 %r51 to i320
+%r53 = add i320 %r48, %r52
+%r54 = lshr i320 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i288 @mulPv256x32(i32* %r2, i32 %r57)
+%r59 = zext i288 %r58 to i320
+%r60 = add i320 %r54, %r59
+%r61 = trunc i320 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i288 @mulPv256x32(i32* %r4, i32 %r62)
+%r64 = zext i288 %r63 to i320
+%r65 = add i320 %r60, %r64
+%r66 = lshr i320 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i288 @mulPv256x32(i32* %r2, i32 %r69)
+%r71 = zext i288 %r70 to i320
+%r72 = add i320 %r66, %r71
+%r73 = trunc i320 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i288 @mulPv256x32(i32* %r4, i32 %r74)
+%r76 = zext i288 %r75 to i320
+%r77 = add i320 %r72, %r76
+%r78 = lshr i320 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i288 @mulPv256x32(i32* %r2, i32 %r81)
+%r83 = zext i288 %r82 to i320
+%r84 = add i320 %r78, %r83
+%r85 = trunc i320 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i288 @mulPv256x32(i32* %r4, i32 %r86)
+%r88 = zext i288 %r87 to i320
+%r89 = add i320 %r84, %r88
+%r90 = lshr i320 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i288 @mulPv256x32(i32* %r2, i32 %r93)
+%r95 = zext i288 %r94 to i320
+%r96 = add i320 %r90, %r95
+%r97 = trunc i320 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i288 @mulPv256x32(i32* %r4, i32 %r98)
+%r100 = zext i288 %r99 to i320
+%r101 = add i320 %r96, %r100
+%r102 = lshr i320 %r101, 32
+%r103 = trunc i320 %r102 to i288
+%r104 = load i32, i32* %r4
+%r105 = zext i32 %r104 to i64
+%r107 = getelementptr i32, i32* %r4, i32 1
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i64
+%r110 = shl i64 %r109, 32
+%r111 = or i64 %r105, %r110
+%r112 = zext i64 %r111 to i96
+%r114 = getelementptr i32, i32* %r4, i32 2
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i96
+%r117 = shl i96 %r116, 64
+%r118 = or i96 %r112, %r117
+%r119 = zext i96 %r118 to i128
+%r121 = getelementptr i32, i32* %r4, i32 3
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i128
+%r124 = shl i128 %r123, 96
+%r125 = or i128 %r119, %r124
+%r126 = zext i128 %r125 to i160
+%r128 = getelementptr i32, i32* %r4, i32 4
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i160
+%r131 = shl i160 %r130, 128
+%r132 = or i160 %r126, %r131
+%r133 = zext i160 %r132 to i192
+%r135 = getelementptr i32, i32* %r4, i32 5
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i192
+%r138 = shl i192 %r137, 160
+%r139 = or i192 %r133, %r138
+%r140 = zext i192 %r139 to i224
+%r142 = getelementptr i32, i32* %r4, i32 6
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i224
+%r145 = shl i224 %r144, 192
+%r146 = or i224 %r140, %r145
+%r147 = zext i224 %r146 to i256
+%r149 = getelementptr i32, i32* %r4, i32 7
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i256
+%r152 = shl i256 %r151, 224
+%r153 = or i256 %r147, %r152
+%r154 = zext i256 %r153 to i288
+%r155 = sub i288 %r103, %r154
+%r156 = lshr i288 %r155, 256
+%r157 = trunc i288 %r156 to i1
+%r158 = select i1 %r157, i288 %r103, i288 %r155
+%r159 = trunc i288 %r158 to i256
+%r160 = trunc i256 %r159 to i32
+%r162 = getelementptr i32, i32* %r1, i32 0
+store i32 %r160, i32* %r162
+%r163 = lshr i256 %r159, 32
+%r164 = trunc i256 %r163 to i32
+%r166 = getelementptr i32, i32* %r1, i32 1
+store i32 %r164, i32* %r166
+%r167 = lshr i256 %r163, 32
+%r168 = trunc i256 %r167 to i32
+%r170 = getelementptr i32, i32* %r1, i32 2
+store i32 %r168, i32* %r170
+%r171 = lshr i256 %r167, 32
+%r172 = trunc i256 %r171 to i32
+%r174 = getelementptr i32, i32* %r1, i32 3
+store i32 %r172, i32* %r174
+%r175 = lshr i256 %r171, 32
+%r176 = trunc i256 %r175 to i32
+%r178 = getelementptr i32, i32* %r1, i32 4
+store i32 %r176, i32* %r178
+%r179 = lshr i256 %r175, 32
+%r180 = trunc i256 %r179 to i32
+%r182 = getelementptr i32, i32* %r1, i32 5
+store i32 %r180, i32* %r182
+%r183 = lshr i256 %r179, 32
+%r184 = trunc i256 %r183 to i32
+%r186 = getelementptr i32, i32* %r1, i32 6
+store i32 %r184, i32* %r186
+%r187 = lshr i256 %r183, 32
+%r188 = trunc i256 %r187 to i32
+%r190 = getelementptr i32, i32* %r1, i32 7
+store i32 %r188, i32* %r190
+ret void
+}
+define void @mcl_fp_montNF8L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i288 @mulPv256x32(i32* %r2, i32 %r8)
+%r10 = trunc i288 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i288 @mulPv256x32(i32* %r4, i32 %r11)
+%r13 = add i288 %r9, %r12
+%r14 = lshr i288 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i288 @mulPv256x32(i32* %r2, i32 %r17)
+%r19 = add i288 %r14, %r18
+%r20 = trunc i288 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i288 @mulPv256x32(i32* %r4, i32 %r21)
+%r23 = add i288 %r19, %r22
+%r24 = lshr i288 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i288 @mulPv256x32(i32* %r2, i32 %r27)
+%r29 = add i288 %r24, %r28
+%r30 = trunc i288 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i288 @mulPv256x32(i32* %r4, i32 %r31)
+%r33 = add i288 %r29, %r32
+%r34 = lshr i288 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i288 @mulPv256x32(i32* %r2, i32 %r37)
+%r39 = add i288 %r34, %r38
+%r40 = trunc i288 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i288 @mulPv256x32(i32* %r4, i32 %r41)
+%r43 = add i288 %r39, %r42
+%r44 = lshr i288 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i288 @mulPv256x32(i32* %r2, i32 %r47)
+%r49 = add i288 %r44, %r48
+%r50 = trunc i288 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i288 @mulPv256x32(i32* %r4, i32 %r51)
+%r53 = add i288 %r49, %r52
+%r54 = lshr i288 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i288 @mulPv256x32(i32* %r2, i32 %r57)
+%r59 = add i288 %r54, %r58
+%r60 = trunc i288 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i288 @mulPv256x32(i32* %r4, i32 %r61)
+%r63 = add i288 %r59, %r62
+%r64 = lshr i288 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i288 @mulPv256x32(i32* %r2, i32 %r67)
+%r69 = add i288 %r64, %r68
+%r70 = trunc i288 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i288 @mulPv256x32(i32* %r4, i32 %r71)
+%r73 = add i288 %r69, %r72
+%r74 = lshr i288 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i288 @mulPv256x32(i32* %r2, i32 %r77)
+%r79 = add i288 %r74, %r78
+%r80 = trunc i288 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i288 @mulPv256x32(i32* %r4, i32 %r81)
+%r83 = add i288 %r79, %r82
+%r84 = lshr i288 %r83, 32
+%r85 = trunc i288 %r84 to i256
+%r86 = load i32, i32* %r4
+%r87 = zext i32 %r86 to i64
+%r89 = getelementptr i32, i32* %r4, i32 1
+%r90 = load i32, i32* %r89
+%r91 = zext i32 %r90 to i64
+%r92 = shl i64 %r91, 32
+%r93 = or i64 %r87, %r92
+%r94 = zext i64 %r93 to i96
+%r96 = getelementptr i32, i32* %r4, i32 2
+%r97 = load i32, i32* %r96
+%r98 = zext i32 %r97 to i96
+%r99 = shl i96 %r98, 64
+%r100 = or i96 %r94, %r99
+%r101 = zext i96 %r100 to i128
+%r103 = getelementptr i32, i32* %r4, i32 3
+%r104 = load i32, i32* %r103
+%r105 = zext i32 %r104 to i128
+%r106 = shl i128 %r105, 96
+%r107 = or i128 %r101, %r106
+%r108 = zext i128 %r107 to i160
+%r110 = getelementptr i32, i32* %r4, i32 4
+%r111 = load i32, i32* %r110
+%r112 = zext i32 %r111 to i160
+%r113 = shl i160 %r112, 128
+%r114 = or i160 %r108, %r113
+%r115 = zext i160 %r114 to i192
+%r117 = getelementptr i32, i32* %r4, i32 5
+%r118 = load i32, i32* %r117
+%r119 = zext i32 %r118 to i192
+%r120 = shl i192 %r119, 160
+%r121 = or i192 %r115, %r120
+%r122 = zext i192 %r121 to i224
+%r124 = getelementptr i32, i32* %r4, i32 6
+%r125 = load i32, i32* %r124
+%r126 = zext i32 %r125 to i224
+%r127 = shl i224 %r126, 192
+%r128 = or i224 %r122, %r127
+%r129 = zext i224 %r128 to i256
+%r131 = getelementptr i32, i32* %r4, i32 7
+%r132 = load i32, i32* %r131
+%r133 = zext i32 %r132 to i256
+%r134 = shl i256 %r133, 224
+%r135 = or i256 %r129, %r134
+%r136 = sub i256 %r85, %r135
+%r137 = lshr i256 %r136, 255
+%r138 = trunc i256 %r137 to i1
+%r139 = select i1 %r138, i256 %r85, i256 %r136
+%r140 = trunc i256 %r139 to i32
+%r142 = getelementptr i32, i32* %r1, i32 0
+store i32 %r140, i32* %r142
+%r143 = lshr i256 %r139, 32
+%r144 = trunc i256 %r143 to i32
+%r146 = getelementptr i32, i32* %r1, i32 1
+store i32 %r144, i32* %r146
+%r147 = lshr i256 %r143, 32
+%r148 = trunc i256 %r147 to i32
+%r150 = getelementptr i32, i32* %r1, i32 2
+store i32 %r148, i32* %r150
+%r151 = lshr i256 %r147, 32
+%r152 = trunc i256 %r151 to i32
+%r154 = getelementptr i32, i32* %r1, i32 3
+store i32 %r152, i32* %r154
+%r155 = lshr i256 %r151, 32
+%r156 = trunc i256 %r155 to i32
+%r158 = getelementptr i32, i32* %r1, i32 4
+store i32 %r156, i32* %r158
+%r159 = lshr i256 %r155, 32
+%r160 = trunc i256 %r159 to i32
+%r162 = getelementptr i32, i32* %r1, i32 5
+store i32 %r160, i32* %r162
+%r163 = lshr i256 %r159, 32
+%r164 = trunc i256 %r163 to i32
+%r166 = getelementptr i32, i32* %r1, i32 6
+store i32 %r164, i32* %r166
+%r167 = lshr i256 %r163, 32
+%r168 = trunc i256 %r167 to i32
+%r170 = getelementptr i32, i32* %r1, i32 7
+store i32 %r168, i32* %r170
+ret void
+}
+define void @mcl_fp_montRed8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = load i32, i32* %r2
+%r58 = zext i32 %r57 to i64
+%r60 = getelementptr i32, i32* %r2, i32 1
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i64
+%r63 = shl i64 %r62, 32
+%r64 = or i64 %r58, %r63
+%r65 = zext i64 %r64 to i96
+%r67 = getelementptr i32, i32* %r2, i32 2
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i96
+%r70 = shl i96 %r69, 64
+%r71 = or i96 %r65, %r70
+%r72 = zext i96 %r71 to i128
+%r74 = getelementptr i32, i32* %r2, i32 3
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i128
+%r77 = shl i128 %r76, 96
+%r78 = or i128 %r72, %r77
+%r79 = zext i128 %r78 to i160
+%r81 = getelementptr i32, i32* %r2, i32 4
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i160
+%r84 = shl i160 %r83, 128
+%r85 = or i160 %r79, %r84
+%r86 = zext i160 %r85 to i192
+%r88 = getelementptr i32, i32* %r2, i32 5
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i192
+%r91 = shl i192 %r90, 160
+%r92 = or i192 %r86, %r91
+%r93 = zext i192 %r92 to i224
+%r95 = getelementptr i32, i32* %r2, i32 6
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i224
+%r98 = shl i224 %r97, 192
+%r99 = or i224 %r93, %r98
+%r100 = zext i224 %r99 to i256
+%r102 = getelementptr i32, i32* %r2, i32 7
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i256
+%r105 = shl i256 %r104, 224
+%r106 = or i256 %r100, %r105
+%r107 = zext i256 %r106 to i288
+%r109 = getelementptr i32, i32* %r2, i32 8
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i288
+%r112 = shl i288 %r111, 256
+%r113 = or i288 %r107, %r112
+%r114 = zext i288 %r113 to i320
+%r116 = getelementptr i32, i32* %r2, i32 9
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i320
+%r119 = shl i320 %r118, 288
+%r120 = or i320 %r114, %r119
+%r121 = zext i320 %r120 to i352
+%r123 = getelementptr i32, i32* %r2, i32 10
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i352
+%r126 = shl i352 %r125, 320
+%r127 = or i352 %r121, %r126
+%r128 = zext i352 %r127 to i384
+%r130 = getelementptr i32, i32* %r2, i32 11
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i384
+%r133 = shl i384 %r132, 352
+%r134 = or i384 %r128, %r133
+%r135 = zext i384 %r134 to i416
+%r137 = getelementptr i32, i32* %r2, i32 12
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i416
+%r140 = shl i416 %r139, 384
+%r141 = or i416 %r135, %r140
+%r142 = zext i416 %r141 to i448
+%r144 = getelementptr i32, i32* %r2, i32 13
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i448
+%r147 = shl i448 %r146, 416
+%r148 = or i448 %r142, %r147
+%r149 = zext i448 %r148 to i480
+%r151 = getelementptr i32, i32* %r2, i32 14
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i480
+%r154 = shl i480 %r153, 448
+%r155 = or i480 %r149, %r154
+%r156 = zext i480 %r155 to i512
+%r158 = getelementptr i32, i32* %r2, i32 15
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i512
+%r161 = shl i512 %r160, 480
+%r162 = or i512 %r156, %r161
+%r163 = zext i512 %r162 to i544
+%r164 = trunc i544 %r163 to i32
+%r165 = mul i32 %r164, %r6
+%r166 = call i288 @mulPv256x32(i32* %r3, i32 %r165)
+%r167 = zext i288 %r166 to i544
+%r168 = add i544 %r163, %r167
+%r169 = lshr i544 %r168, 32
+%r170 = trunc i544 %r169 to i512
+%r171 = trunc i512 %r170 to i32
+%r172 = mul i32 %r171, %r6
+%r173 = call i288 @mulPv256x32(i32* %r3, i32 %r172)
+%r174 = zext i288 %r173 to i512
+%r175 = add i512 %r170, %r174
+%r176 = lshr i512 %r175, 32
+%r177 = trunc i512 %r176 to i480
+%r178 = trunc i480 %r177 to i32
+%r179 = mul i32 %r178, %r6
+%r180 = call i288 @mulPv256x32(i32* %r3, i32 %r179)
+%r181 = zext i288 %r180 to i480
+%r182 = add i480 %r177, %r181
+%r183 = lshr i480 %r182, 32
+%r184 = trunc i480 %r183 to i448
+%r185 = trunc i448 %r184 to i32
+%r186 = mul i32 %r185, %r6
+%r187 = call i288 @mulPv256x32(i32* %r3, i32 %r186)
+%r188 = zext i288 %r187 to i448
+%r189 = add i448 %r184, %r188
+%r190 = lshr i448 %r189, 32
+%r191 = trunc i448 %r190 to i416
+%r192 = trunc i416 %r191 to i32
+%r193 = mul i32 %r192, %r6
+%r194 = call i288 @mulPv256x32(i32* %r3, i32 %r193)
+%r195 = zext i288 %r194 to i416
+%r196 = add i416 %r191, %r195
+%r197 = lshr i416 %r196, 32
+%r198 = trunc i416 %r197 to i384
+%r199 = trunc i384 %r198 to i32
+%r200 = mul i32 %r199, %r6
+%r201 = call i288 @mulPv256x32(i32* %r3, i32 %r200)
+%r202 = zext i288 %r201 to i384
+%r203 = add i384 %r198, %r202
+%r204 = lshr i384 %r203, 32
+%r205 = trunc i384 %r204 to i352
+%r206 = trunc i352 %r205 to i32
+%r207 = mul i32 %r206, %r6
+%r208 = call i288 @mulPv256x32(i32* %r3, i32 %r207)
+%r209 = zext i288 %r208 to i352
+%r210 = add i352 %r205, %r209
+%r211 = lshr i352 %r210, 32
+%r212 = trunc i352 %r211 to i320
+%r213 = trunc i320 %r212 to i32
+%r214 = mul i32 %r213, %r6
+%r215 = call i288 @mulPv256x32(i32* %r3, i32 %r214)
+%r216 = zext i288 %r215 to i320
+%r217 = add i320 %r212, %r216
+%r218 = lshr i320 %r217, 32
+%r219 = trunc i320 %r218 to i288
+%r220 = zext i256 %r56 to i288
+%r221 = sub i288 %r219, %r220
+%r222 = lshr i288 %r221, 256
+%r223 = trunc i288 %r222 to i1
+%r224 = select i1 %r223, i288 %r219, i288 %r221
+%r225 = trunc i288 %r224 to i256
+%r226 = trunc i256 %r225 to i32
+%r228 = getelementptr i32, i32* %r1, i32 0
+store i32 %r226, i32* %r228
+%r229 = lshr i256 %r225, 32
+%r230 = trunc i256 %r229 to i32
+%r232 = getelementptr i32, i32* %r1, i32 1
+store i32 %r230, i32* %r232
+%r233 = lshr i256 %r229, 32
+%r234 = trunc i256 %r233 to i32
+%r236 = getelementptr i32, i32* %r1, i32 2
+store i32 %r234, i32* %r236
+%r237 = lshr i256 %r233, 32
+%r238 = trunc i256 %r237 to i32
+%r240 = getelementptr i32, i32* %r1, i32 3
+store i32 %r238, i32* %r240
+%r241 = lshr i256 %r237, 32
+%r242 = trunc i256 %r241 to i32
+%r244 = getelementptr i32, i32* %r1, i32 4
+store i32 %r242, i32* %r244
+%r245 = lshr i256 %r241, 32
+%r246 = trunc i256 %r245 to i32
+%r248 = getelementptr i32, i32* %r1, i32 5
+store i32 %r246, i32* %r248
+%r249 = lshr i256 %r245, 32
+%r250 = trunc i256 %r249 to i32
+%r252 = getelementptr i32, i32* %r1, i32 6
+store i32 %r250, i32* %r252
+%r253 = lshr i256 %r249, 32
+%r254 = trunc i256 %r253 to i32
+%r256 = getelementptr i32, i32* %r1, i32 7
+store i32 %r254, i32* %r256
+ret void
+}
+define i32 @mcl_fp_addPre8L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r56 = load i32, i32* %r4
+%r57 = zext i32 %r56 to i64
+%r59 = getelementptr i32, i32* %r4, i32 1
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i64
+%r62 = shl i64 %r61, 32
+%r63 = or i64 %r57, %r62
+%r64 = zext i64 %r63 to i96
+%r66 = getelementptr i32, i32* %r4, i32 2
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i96
+%r69 = shl i96 %r68, 64
+%r70 = or i96 %r64, %r69
+%r71 = zext i96 %r70 to i128
+%r73 = getelementptr i32, i32* %r4, i32 3
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i128
+%r76 = shl i128 %r75, 96
+%r77 = or i128 %r71, %r76
+%r78 = zext i128 %r77 to i160
+%r80 = getelementptr i32, i32* %r4, i32 4
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i160
+%r83 = shl i160 %r82, 128
+%r84 = or i160 %r78, %r83
+%r85 = zext i160 %r84 to i192
+%r87 = getelementptr i32, i32* %r4, i32 5
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i192
+%r90 = shl i192 %r89, 160
+%r91 = or i192 %r85, %r90
+%r92 = zext i192 %r91 to i224
+%r94 = getelementptr i32, i32* %r4, i32 6
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i224
+%r97 = shl i224 %r96, 192
+%r98 = or i224 %r92, %r97
+%r99 = zext i224 %r98 to i256
+%r101 = getelementptr i32, i32* %r4, i32 7
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i256
+%r104 = shl i256 %r103, 224
+%r105 = or i256 %r99, %r104
+%r106 = zext i256 %r105 to i288
+%r107 = add i288 %r55, %r106
+%r108 = trunc i288 %r107 to i256
+%r109 = trunc i256 %r108 to i32
+%r111 = getelementptr i32, i32* %r2, i32 0
+store i32 %r109, i32* %r111
+%r112 = lshr i256 %r108, 32
+%r113 = trunc i256 %r112 to i32
+%r115 = getelementptr i32, i32* %r2, i32 1
+store i32 %r113, i32* %r115
+%r116 = lshr i256 %r112, 32
+%r117 = trunc i256 %r116 to i32
+%r119 = getelementptr i32, i32* %r2, i32 2
+store i32 %r117, i32* %r119
+%r120 = lshr i256 %r116, 32
+%r121 = trunc i256 %r120 to i32
+%r123 = getelementptr i32, i32* %r2, i32 3
+store i32 %r121, i32* %r123
+%r124 = lshr i256 %r120, 32
+%r125 = trunc i256 %r124 to i32
+%r127 = getelementptr i32, i32* %r2, i32 4
+store i32 %r125, i32* %r127
+%r128 = lshr i256 %r124, 32
+%r129 = trunc i256 %r128 to i32
+%r131 = getelementptr i32, i32* %r2, i32 5
+store i32 %r129, i32* %r131
+%r132 = lshr i256 %r128, 32
+%r133 = trunc i256 %r132 to i32
+%r135 = getelementptr i32, i32* %r2, i32 6
+store i32 %r133, i32* %r135
+%r136 = lshr i256 %r132, 32
+%r137 = trunc i256 %r136 to i32
+%r139 = getelementptr i32, i32* %r2, i32 7
+store i32 %r137, i32* %r139
+%r140 = lshr i288 %r107, 256
+%r141 = trunc i288 %r140 to i32
+ret i32 %r141
+}
+define i32 @mcl_fp_subPre8L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r56 = load i32, i32* %r4
+%r57 = zext i32 %r56 to i64
+%r59 = getelementptr i32, i32* %r4, i32 1
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i64
+%r62 = shl i64 %r61, 32
+%r63 = or i64 %r57, %r62
+%r64 = zext i64 %r63 to i96
+%r66 = getelementptr i32, i32* %r4, i32 2
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i96
+%r69 = shl i96 %r68, 64
+%r70 = or i96 %r64, %r69
+%r71 = zext i96 %r70 to i128
+%r73 = getelementptr i32, i32* %r4, i32 3
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i128
+%r76 = shl i128 %r75, 96
+%r77 = or i128 %r71, %r76
+%r78 = zext i128 %r77 to i160
+%r80 = getelementptr i32, i32* %r4, i32 4
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i160
+%r83 = shl i160 %r82, 128
+%r84 = or i160 %r78, %r83
+%r85 = zext i160 %r84 to i192
+%r87 = getelementptr i32, i32* %r4, i32 5
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i192
+%r90 = shl i192 %r89, 160
+%r91 = or i192 %r85, %r90
+%r92 = zext i192 %r91 to i224
+%r94 = getelementptr i32, i32* %r4, i32 6
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i224
+%r97 = shl i224 %r96, 192
+%r98 = or i224 %r92, %r97
+%r99 = zext i224 %r98 to i256
+%r101 = getelementptr i32, i32* %r4, i32 7
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i256
+%r104 = shl i256 %r103, 224
+%r105 = or i256 %r99, %r104
+%r106 = zext i256 %r105 to i288
+%r107 = sub i288 %r55, %r106
+%r108 = trunc i288 %r107 to i256
+%r109 = trunc i256 %r108 to i32
+%r111 = getelementptr i32, i32* %r2, i32 0
+store i32 %r109, i32* %r111
+%r112 = lshr i256 %r108, 32
+%r113 = trunc i256 %r112 to i32
+%r115 = getelementptr i32, i32* %r2, i32 1
+store i32 %r113, i32* %r115
+%r116 = lshr i256 %r112, 32
+%r117 = trunc i256 %r116 to i32
+%r119 = getelementptr i32, i32* %r2, i32 2
+store i32 %r117, i32* %r119
+%r120 = lshr i256 %r116, 32
+%r121 = trunc i256 %r120 to i32
+%r123 = getelementptr i32, i32* %r2, i32 3
+store i32 %r121, i32* %r123
+%r124 = lshr i256 %r120, 32
+%r125 = trunc i256 %r124 to i32
+%r127 = getelementptr i32, i32* %r2, i32 4
+store i32 %r125, i32* %r127
+%r128 = lshr i256 %r124, 32
+%r129 = trunc i256 %r128 to i32
+%r131 = getelementptr i32, i32* %r2, i32 5
+store i32 %r129, i32* %r131
+%r132 = lshr i256 %r128, 32
+%r133 = trunc i256 %r132 to i32
+%r135 = getelementptr i32, i32* %r2, i32 6
+store i32 %r133, i32* %r135
+%r136 = lshr i256 %r132, 32
+%r137 = trunc i256 %r136 to i32
+%r139 = getelementptr i32, i32* %r2, i32 7
+store i32 %r137, i32* %r139
+%r140 = lshr i288 %r107, 256
+%r141 = trunc i288 %r140 to i32
+%r143 = and i32 %r141, 1
+ret i32 %r143
+}
+define void @mcl_fp_shr1_8L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = lshr i256 %r52, 1
+%r54 = trunc i256 %r53 to i32
+%r56 = getelementptr i32, i32* %r1, i32 0
+store i32 %r54, i32* %r56
+%r57 = lshr i256 %r53, 32
+%r58 = trunc i256 %r57 to i32
+%r60 = getelementptr i32, i32* %r1, i32 1
+store i32 %r58, i32* %r60
+%r61 = lshr i256 %r57, 32
+%r62 = trunc i256 %r61 to i32
+%r64 = getelementptr i32, i32* %r1, i32 2
+store i32 %r62, i32* %r64
+%r65 = lshr i256 %r61, 32
+%r66 = trunc i256 %r65 to i32
+%r68 = getelementptr i32, i32* %r1, i32 3
+store i32 %r66, i32* %r68
+%r69 = lshr i256 %r65, 32
+%r70 = trunc i256 %r69 to i32
+%r72 = getelementptr i32, i32* %r1, i32 4
+store i32 %r70, i32* %r72
+%r73 = lshr i256 %r69, 32
+%r74 = trunc i256 %r73 to i32
+%r76 = getelementptr i32, i32* %r1, i32 5
+store i32 %r74, i32* %r76
+%r77 = lshr i256 %r73, 32
+%r78 = trunc i256 %r77 to i32
+%r80 = getelementptr i32, i32* %r1, i32 6
+store i32 %r78, i32* %r80
+%r81 = lshr i256 %r77, 32
+%r82 = trunc i256 %r81 to i32
+%r84 = getelementptr i32, i32* %r1, i32 7
+store i32 %r82, i32* %r84
+ret void
+}
+define void @mcl_fp_add8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = load i32, i32* %r3
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r3, i32 1
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r3, i32 2
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r3, i32 3
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r79 = getelementptr i32, i32* %r3, i32 4
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r77, %r82
+%r84 = zext i160 %r83 to i192
+%r86 = getelementptr i32, i32* %r3, i32 5
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i192
+%r89 = shl i192 %r88, 160
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i224
+%r93 = getelementptr i32, i32* %r3, i32 6
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i224
+%r96 = shl i224 %r95, 192
+%r97 = or i224 %r91, %r96
+%r98 = zext i224 %r97 to i256
+%r100 = getelementptr i32, i32* %r3, i32 7
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i256
+%r103 = shl i256 %r102, 224
+%r104 = or i256 %r98, %r103
+%r105 = zext i256 %r54 to i288
+%r106 = zext i256 %r104 to i288
+%r107 = add i288 %r105, %r106
+%r108 = trunc i288 %r107 to i256
+%r109 = trunc i256 %r108 to i32
+%r111 = getelementptr i32, i32* %r1, i32 0
+store i32 %r109, i32* %r111
+%r112 = lshr i256 %r108, 32
+%r113 = trunc i256 %r112 to i32
+%r115 = getelementptr i32, i32* %r1, i32 1
+store i32 %r113, i32* %r115
+%r116 = lshr i256 %r112, 32
+%r117 = trunc i256 %r116 to i32
+%r119 = getelementptr i32, i32* %r1, i32 2
+store i32 %r117, i32* %r119
+%r120 = lshr i256 %r116, 32
+%r121 = trunc i256 %r120 to i32
+%r123 = getelementptr i32, i32* %r1, i32 3
+store i32 %r121, i32* %r123
+%r124 = lshr i256 %r120, 32
+%r125 = trunc i256 %r124 to i32
+%r127 = getelementptr i32, i32* %r1, i32 4
+store i32 %r125, i32* %r127
+%r128 = lshr i256 %r124, 32
+%r129 = trunc i256 %r128 to i32
+%r131 = getelementptr i32, i32* %r1, i32 5
+store i32 %r129, i32* %r131
+%r132 = lshr i256 %r128, 32
+%r133 = trunc i256 %r132 to i32
+%r135 = getelementptr i32, i32* %r1, i32 6
+store i32 %r133, i32* %r135
+%r136 = lshr i256 %r132, 32
+%r137 = trunc i256 %r136 to i32
+%r139 = getelementptr i32, i32* %r1, i32 7
+store i32 %r137, i32* %r139
+%r140 = load i32, i32* %r4
+%r141 = zext i32 %r140 to i64
+%r143 = getelementptr i32, i32* %r4, i32 1
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i64
+%r146 = shl i64 %r145, 32
+%r147 = or i64 %r141, %r146
+%r148 = zext i64 %r147 to i96
+%r150 = getelementptr i32, i32* %r4, i32 2
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i96
+%r153 = shl i96 %r152, 64
+%r154 = or i96 %r148, %r153
+%r155 = zext i96 %r154 to i128
+%r157 = getelementptr i32, i32* %r4, i32 3
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i128
+%r160 = shl i128 %r159, 96
+%r161 = or i128 %r155, %r160
+%r162 = zext i128 %r161 to i160
+%r164 = getelementptr i32, i32* %r4, i32 4
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i160
+%r167 = shl i160 %r166, 128
+%r168 = or i160 %r162, %r167
+%r169 = zext i160 %r168 to i192
+%r171 = getelementptr i32, i32* %r4, i32 5
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i192
+%r174 = shl i192 %r173, 160
+%r175 = or i192 %r169, %r174
+%r176 = zext i192 %r175 to i224
+%r178 = getelementptr i32, i32* %r4, i32 6
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i224
+%r181 = shl i224 %r180, 192
+%r182 = or i224 %r176, %r181
+%r183 = zext i224 %r182 to i256
+%r185 = getelementptr i32, i32* %r4, i32 7
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i256
+%r188 = shl i256 %r187, 224
+%r189 = or i256 %r183, %r188
+%r190 = zext i256 %r189 to i288
+%r191 = sub i288 %r107, %r190
+%r192 = lshr i288 %r191, 256
+%r193 = trunc i288 %r192 to i1
+br i1%r193, label %carry, label %nocarry
+nocarry:
+%r194 = trunc i288 %r191 to i256
+%r195 = trunc i256 %r194 to i32
+%r197 = getelementptr i32, i32* %r1, i32 0
+store i32 %r195, i32* %r197
+%r198 = lshr i256 %r194, 32
+%r199 = trunc i256 %r198 to i32
+%r201 = getelementptr i32, i32* %r1, i32 1
+store i32 %r199, i32* %r201
+%r202 = lshr i256 %r198, 32
+%r203 = trunc i256 %r202 to i32
+%r205 = getelementptr i32, i32* %r1, i32 2
+store i32 %r203, i32* %r205
+%r206 = lshr i256 %r202, 32
+%r207 = trunc i256 %r206 to i32
+%r209 = getelementptr i32, i32* %r1, i32 3
+store i32 %r207, i32* %r209
+%r210 = lshr i256 %r206, 32
+%r211 = trunc i256 %r210 to i32
+%r213 = getelementptr i32, i32* %r1, i32 4
+store i32 %r211, i32* %r213
+%r214 = lshr i256 %r210, 32
+%r215 = trunc i256 %r214 to i32
+%r217 = getelementptr i32, i32* %r1, i32 5
+store i32 %r215, i32* %r217
+%r218 = lshr i256 %r214, 32
+%r219 = trunc i256 %r218 to i32
+%r221 = getelementptr i32, i32* %r1, i32 6
+store i32 %r219, i32* %r221
+%r222 = lshr i256 %r218, 32
+%r223 = trunc i256 %r222 to i32
+%r225 = getelementptr i32, i32* %r1, i32 7
+store i32 %r223, i32* %r225
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = load i32, i32* %r3
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r3, i32 1
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r3, i32 2
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r3, i32 3
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r79 = getelementptr i32, i32* %r3, i32 4
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r77, %r82
+%r84 = zext i160 %r83 to i192
+%r86 = getelementptr i32, i32* %r3, i32 5
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i192
+%r89 = shl i192 %r88, 160
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i224
+%r93 = getelementptr i32, i32* %r3, i32 6
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i224
+%r96 = shl i224 %r95, 192
+%r97 = or i224 %r91, %r96
+%r98 = zext i224 %r97 to i256
+%r100 = getelementptr i32, i32* %r3, i32 7
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i256
+%r103 = shl i256 %r102, 224
+%r104 = or i256 %r98, %r103
+%r105 = add i256 %r54, %r104
+%r106 = load i32, i32* %r4
+%r107 = zext i32 %r106 to i64
+%r109 = getelementptr i32, i32* %r4, i32 1
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i64
+%r112 = shl i64 %r111, 32
+%r113 = or i64 %r107, %r112
+%r114 = zext i64 %r113 to i96
+%r116 = getelementptr i32, i32* %r4, i32 2
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i96
+%r119 = shl i96 %r118, 64
+%r120 = or i96 %r114, %r119
+%r121 = zext i96 %r120 to i128
+%r123 = getelementptr i32, i32* %r4, i32 3
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i128
+%r126 = shl i128 %r125, 96
+%r127 = or i128 %r121, %r126
+%r128 = zext i128 %r127 to i160
+%r130 = getelementptr i32, i32* %r4, i32 4
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i160
+%r133 = shl i160 %r132, 128
+%r134 = or i160 %r128, %r133
+%r135 = zext i160 %r134 to i192
+%r137 = getelementptr i32, i32* %r4, i32 5
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i192
+%r140 = shl i192 %r139, 160
+%r141 = or i192 %r135, %r140
+%r142 = zext i192 %r141 to i224
+%r144 = getelementptr i32, i32* %r4, i32 6
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i224
+%r147 = shl i224 %r146, 192
+%r148 = or i224 %r142, %r147
+%r149 = zext i224 %r148 to i256
+%r151 = getelementptr i32, i32* %r4, i32 7
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i256
+%r154 = shl i256 %r153, 224
+%r155 = or i256 %r149, %r154
+%r156 = sub i256 %r105, %r155
+%r157 = lshr i256 %r156, 255
+%r158 = trunc i256 %r157 to i1
+%r159 = select i1 %r158, i256 %r105, i256 %r156
+%r160 = trunc i256 %r159 to i32
+%r162 = getelementptr i32, i32* %r1, i32 0
+store i32 %r160, i32* %r162
+%r163 = lshr i256 %r159, 32
+%r164 = trunc i256 %r163 to i32
+%r166 = getelementptr i32, i32* %r1, i32 1
+store i32 %r164, i32* %r166
+%r167 = lshr i256 %r163, 32
+%r168 = trunc i256 %r167 to i32
+%r170 = getelementptr i32, i32* %r1, i32 2
+store i32 %r168, i32* %r170
+%r171 = lshr i256 %r167, 32
+%r172 = trunc i256 %r171 to i32
+%r174 = getelementptr i32, i32* %r1, i32 3
+store i32 %r172, i32* %r174
+%r175 = lshr i256 %r171, 32
+%r176 = trunc i256 %r175 to i32
+%r178 = getelementptr i32, i32* %r1, i32 4
+store i32 %r176, i32* %r178
+%r179 = lshr i256 %r175, 32
+%r180 = trunc i256 %r179 to i32
+%r182 = getelementptr i32, i32* %r1, i32 5
+store i32 %r180, i32* %r182
+%r183 = lshr i256 %r179, 32
+%r184 = trunc i256 %r183 to i32
+%r186 = getelementptr i32, i32* %r1, i32 6
+store i32 %r184, i32* %r186
+%r187 = lshr i256 %r183, 32
+%r188 = trunc i256 %r187 to i32
+%r190 = getelementptr i32, i32* %r1, i32 7
+store i32 %r188, i32* %r190
+ret void
+}
+define void @mcl_fp_sub8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = load i32, i32* %r3
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r3, i32 1
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r3, i32 2
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r3, i32 3
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r79 = getelementptr i32, i32* %r3, i32 4
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r77, %r82
+%r84 = zext i160 %r83 to i192
+%r86 = getelementptr i32, i32* %r3, i32 5
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i192
+%r89 = shl i192 %r88, 160
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i224
+%r93 = getelementptr i32, i32* %r3, i32 6
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i224
+%r96 = shl i224 %r95, 192
+%r97 = or i224 %r91, %r96
+%r98 = zext i224 %r97 to i256
+%r100 = getelementptr i32, i32* %r3, i32 7
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i256
+%r103 = shl i256 %r102, 224
+%r104 = or i256 %r98, %r103
+%r105 = zext i256 %r54 to i288
+%r106 = zext i256 %r104 to i288
+%r107 = sub i288 %r105, %r106
+%r108 = trunc i288 %r107 to i256
+%r109 = lshr i288 %r107, 256
+%r110 = trunc i288 %r109 to i1
+%r111 = trunc i256 %r108 to i32
+%r113 = getelementptr i32, i32* %r1, i32 0
+store i32 %r111, i32* %r113
+%r114 = lshr i256 %r108, 32
+%r115 = trunc i256 %r114 to i32
+%r117 = getelementptr i32, i32* %r1, i32 1
+store i32 %r115, i32* %r117
+%r118 = lshr i256 %r114, 32
+%r119 = trunc i256 %r118 to i32
+%r121 = getelementptr i32, i32* %r1, i32 2
+store i32 %r119, i32* %r121
+%r122 = lshr i256 %r118, 32
+%r123 = trunc i256 %r122 to i32
+%r125 = getelementptr i32, i32* %r1, i32 3
+store i32 %r123, i32* %r125
+%r126 = lshr i256 %r122, 32
+%r127 = trunc i256 %r126 to i32
+%r129 = getelementptr i32, i32* %r1, i32 4
+store i32 %r127, i32* %r129
+%r130 = lshr i256 %r126, 32
+%r131 = trunc i256 %r130 to i32
+%r133 = getelementptr i32, i32* %r1, i32 5
+store i32 %r131, i32* %r133
+%r134 = lshr i256 %r130, 32
+%r135 = trunc i256 %r134 to i32
+%r137 = getelementptr i32, i32* %r1, i32 6
+store i32 %r135, i32* %r137
+%r138 = lshr i256 %r134, 32
+%r139 = trunc i256 %r138 to i32
+%r141 = getelementptr i32, i32* %r1, i32 7
+store i32 %r139, i32* %r141
+br i1%r110, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r142 = load i32, i32* %r4
+%r143 = zext i32 %r142 to i64
+%r145 = getelementptr i32, i32* %r4, i32 1
+%r146 = load i32, i32* %r145
+%r147 = zext i32 %r146 to i64
+%r148 = shl i64 %r147, 32
+%r149 = or i64 %r143, %r148
+%r150 = zext i64 %r149 to i96
+%r152 = getelementptr i32, i32* %r4, i32 2
+%r153 = load i32, i32* %r152
+%r154 = zext i32 %r153 to i96
+%r155 = shl i96 %r154, 64
+%r156 = or i96 %r150, %r155
+%r157 = zext i96 %r156 to i128
+%r159 = getelementptr i32, i32* %r4, i32 3
+%r160 = load i32, i32* %r159
+%r161 = zext i32 %r160 to i128
+%r162 = shl i128 %r161, 96
+%r163 = or i128 %r157, %r162
+%r164 = zext i128 %r163 to i160
+%r166 = getelementptr i32, i32* %r4, i32 4
+%r167 = load i32, i32* %r166
+%r168 = zext i32 %r167 to i160
+%r169 = shl i160 %r168, 128
+%r170 = or i160 %r164, %r169
+%r171 = zext i160 %r170 to i192
+%r173 = getelementptr i32, i32* %r4, i32 5
+%r174 = load i32, i32* %r173
+%r175 = zext i32 %r174 to i192
+%r176 = shl i192 %r175, 160
+%r177 = or i192 %r171, %r176
+%r178 = zext i192 %r177 to i224
+%r180 = getelementptr i32, i32* %r4, i32 6
+%r181 = load i32, i32* %r180
+%r182 = zext i32 %r181 to i224
+%r183 = shl i224 %r182, 192
+%r184 = or i224 %r178, %r183
+%r185 = zext i224 %r184 to i256
+%r187 = getelementptr i32, i32* %r4, i32 7
+%r188 = load i32, i32* %r187
+%r189 = zext i32 %r188 to i256
+%r190 = shl i256 %r189, 224
+%r191 = or i256 %r185, %r190
+%r192 = add i256 %r108, %r191
+%r193 = trunc i256 %r192 to i32
+%r195 = getelementptr i32, i32* %r1, i32 0
+store i32 %r193, i32* %r195
+%r196 = lshr i256 %r192, 32
+%r197 = trunc i256 %r196 to i32
+%r199 = getelementptr i32, i32* %r1, i32 1
+store i32 %r197, i32* %r199
+%r200 = lshr i256 %r196, 32
+%r201 = trunc i256 %r200 to i32
+%r203 = getelementptr i32, i32* %r1, i32 2
+store i32 %r201, i32* %r203
+%r204 = lshr i256 %r200, 32
+%r205 = trunc i256 %r204 to i32
+%r207 = getelementptr i32, i32* %r1, i32 3
+store i32 %r205, i32* %r207
+%r208 = lshr i256 %r204, 32
+%r209 = trunc i256 %r208 to i32
+%r211 = getelementptr i32, i32* %r1, i32 4
+store i32 %r209, i32* %r211
+%r212 = lshr i256 %r208, 32
+%r213 = trunc i256 %r212 to i32
+%r215 = getelementptr i32, i32* %r1, i32 5
+store i32 %r213, i32* %r215
+%r216 = lshr i256 %r212, 32
+%r217 = trunc i256 %r216 to i32
+%r219 = getelementptr i32, i32* %r1, i32 6
+store i32 %r217, i32* %r219
+%r220 = lshr i256 %r216, 32
+%r221 = trunc i256 %r220 to i32
+%r223 = getelementptr i32, i32* %r1, i32 7
+store i32 %r221, i32* %r223
+ret void
+}
+define void @mcl_fp_subNF8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = load i32, i32* %r3
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r3, i32 1
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r3, i32 2
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r3, i32 3
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r79 = getelementptr i32, i32* %r3, i32 4
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r77, %r82
+%r84 = zext i160 %r83 to i192
+%r86 = getelementptr i32, i32* %r3, i32 5
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i192
+%r89 = shl i192 %r88, 160
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i224
+%r93 = getelementptr i32, i32* %r3, i32 6
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i224
+%r96 = shl i224 %r95, 192
+%r97 = or i224 %r91, %r96
+%r98 = zext i224 %r97 to i256
+%r100 = getelementptr i32, i32* %r3, i32 7
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i256
+%r103 = shl i256 %r102, 224
+%r104 = or i256 %r98, %r103
+%r105 = sub i256 %r54, %r104
+%r106 = lshr i256 %r105, 255
+%r107 = trunc i256 %r106 to i1
+%r108 = load i32, i32* %r4
+%r109 = zext i32 %r108 to i64
+%r111 = getelementptr i32, i32* %r4, i32 1
+%r112 = load i32, i32* %r111
+%r113 = zext i32 %r112 to i64
+%r114 = shl i64 %r113, 32
+%r115 = or i64 %r109, %r114
+%r116 = zext i64 %r115 to i96
+%r118 = getelementptr i32, i32* %r4, i32 2
+%r119 = load i32, i32* %r118
+%r120 = zext i32 %r119 to i96
+%r121 = shl i96 %r120, 64
+%r122 = or i96 %r116, %r121
+%r123 = zext i96 %r122 to i128
+%r125 = getelementptr i32, i32* %r4, i32 3
+%r126 = load i32, i32* %r125
+%r127 = zext i32 %r126 to i128
+%r128 = shl i128 %r127, 96
+%r129 = or i128 %r123, %r128
+%r130 = zext i128 %r129 to i160
+%r132 = getelementptr i32, i32* %r4, i32 4
+%r133 = load i32, i32* %r132
+%r134 = zext i32 %r133 to i160
+%r135 = shl i160 %r134, 128
+%r136 = or i160 %r130, %r135
+%r137 = zext i160 %r136 to i192
+%r139 = getelementptr i32, i32* %r4, i32 5
+%r140 = load i32, i32* %r139
+%r141 = zext i32 %r140 to i192
+%r142 = shl i192 %r141, 160
+%r143 = or i192 %r137, %r142
+%r144 = zext i192 %r143 to i224
+%r146 = getelementptr i32, i32* %r4, i32 6
+%r147 = load i32, i32* %r146
+%r148 = zext i32 %r147 to i224
+%r149 = shl i224 %r148, 192
+%r150 = or i224 %r144, %r149
+%r151 = zext i224 %r150 to i256
+%r153 = getelementptr i32, i32* %r4, i32 7
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i256
+%r156 = shl i256 %r155, 224
+%r157 = or i256 %r151, %r156
+%r159 = select i1 %r107, i256 %r157, i256 0
+%r160 = add i256 %r105, %r159
+%r161 = trunc i256 %r160 to i32
+%r163 = getelementptr i32, i32* %r1, i32 0
+store i32 %r161, i32* %r163
+%r164 = lshr i256 %r160, 32
+%r165 = trunc i256 %r164 to i32
+%r167 = getelementptr i32, i32* %r1, i32 1
+store i32 %r165, i32* %r167
+%r168 = lshr i256 %r164, 32
+%r169 = trunc i256 %r168 to i32
+%r171 = getelementptr i32, i32* %r1, i32 2
+store i32 %r169, i32* %r171
+%r172 = lshr i256 %r168, 32
+%r173 = trunc i256 %r172 to i32
+%r175 = getelementptr i32, i32* %r1, i32 3
+store i32 %r173, i32* %r175
+%r176 = lshr i256 %r172, 32
+%r177 = trunc i256 %r176 to i32
+%r179 = getelementptr i32, i32* %r1, i32 4
+store i32 %r177, i32* %r179
+%r180 = lshr i256 %r176, 32
+%r181 = trunc i256 %r180 to i32
+%r183 = getelementptr i32, i32* %r1, i32 5
+store i32 %r181, i32* %r183
+%r184 = lshr i256 %r180, 32
+%r185 = trunc i256 %r184 to i32
+%r187 = getelementptr i32, i32* %r1, i32 6
+store i32 %r185, i32* %r187
+%r188 = lshr i256 %r184, 32
+%r189 = trunc i256 %r188 to i32
+%r191 = getelementptr i32, i32* %r1, i32 7
+store i32 %r189, i32* %r191
+ret void
+}
+define void @mcl_fpDbl_add8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = zext i512 %r110 to i544
+%r218 = zext i512 %r216 to i544
+%r219 = add i544 %r217, %r218
+%r220 = trunc i544 %r219 to i256
+%r221 = trunc i256 %r220 to i32
+%r223 = getelementptr i32, i32* %r1, i32 0
+store i32 %r221, i32* %r223
+%r224 = lshr i256 %r220, 32
+%r225 = trunc i256 %r224 to i32
+%r227 = getelementptr i32, i32* %r1, i32 1
+store i32 %r225, i32* %r227
+%r228 = lshr i256 %r224, 32
+%r229 = trunc i256 %r228 to i32
+%r231 = getelementptr i32, i32* %r1, i32 2
+store i32 %r229, i32* %r231
+%r232 = lshr i256 %r228, 32
+%r233 = trunc i256 %r232 to i32
+%r235 = getelementptr i32, i32* %r1, i32 3
+store i32 %r233, i32* %r235
+%r236 = lshr i256 %r232, 32
+%r237 = trunc i256 %r236 to i32
+%r239 = getelementptr i32, i32* %r1, i32 4
+store i32 %r237, i32* %r239
+%r240 = lshr i256 %r236, 32
+%r241 = trunc i256 %r240 to i32
+%r243 = getelementptr i32, i32* %r1, i32 5
+store i32 %r241, i32* %r243
+%r244 = lshr i256 %r240, 32
+%r245 = trunc i256 %r244 to i32
+%r247 = getelementptr i32, i32* %r1, i32 6
+store i32 %r245, i32* %r247
+%r248 = lshr i256 %r244, 32
+%r249 = trunc i256 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 7
+store i32 %r249, i32* %r251
+%r252 = lshr i544 %r219, 256
+%r253 = trunc i544 %r252 to i288
+%r254 = load i32, i32* %r4
+%r255 = zext i32 %r254 to i64
+%r257 = getelementptr i32, i32* %r4, i32 1
+%r258 = load i32, i32* %r257
+%r259 = zext i32 %r258 to i64
+%r260 = shl i64 %r259, 32
+%r261 = or i64 %r255, %r260
+%r262 = zext i64 %r261 to i96
+%r264 = getelementptr i32, i32* %r4, i32 2
+%r265 = load i32, i32* %r264
+%r266 = zext i32 %r265 to i96
+%r267 = shl i96 %r266, 64
+%r268 = or i96 %r262, %r267
+%r269 = zext i96 %r268 to i128
+%r271 = getelementptr i32, i32* %r4, i32 3
+%r272 = load i32, i32* %r271
+%r273 = zext i32 %r272 to i128
+%r274 = shl i128 %r273, 96
+%r275 = or i128 %r269, %r274
+%r276 = zext i128 %r275 to i160
+%r278 = getelementptr i32, i32* %r4, i32 4
+%r279 = load i32, i32* %r278
+%r280 = zext i32 %r279 to i160
+%r281 = shl i160 %r280, 128
+%r282 = or i160 %r276, %r281
+%r283 = zext i160 %r282 to i192
+%r285 = getelementptr i32, i32* %r4, i32 5
+%r286 = load i32, i32* %r285
+%r287 = zext i32 %r286 to i192
+%r288 = shl i192 %r287, 160
+%r289 = or i192 %r283, %r288
+%r290 = zext i192 %r289 to i224
+%r292 = getelementptr i32, i32* %r4, i32 6
+%r293 = load i32, i32* %r292
+%r294 = zext i32 %r293 to i224
+%r295 = shl i224 %r294, 192
+%r296 = or i224 %r290, %r295
+%r297 = zext i224 %r296 to i256
+%r299 = getelementptr i32, i32* %r4, i32 7
+%r300 = load i32, i32* %r299
+%r301 = zext i32 %r300 to i256
+%r302 = shl i256 %r301, 224
+%r303 = or i256 %r297, %r302
+%r304 = zext i256 %r303 to i288
+%r305 = sub i288 %r253, %r304
+%r306 = lshr i288 %r305, 256
+%r307 = trunc i288 %r306 to i1
+%r308 = select i1 %r307, i288 %r253, i288 %r305
+%r309 = trunc i288 %r308 to i256
+%r311 = getelementptr i32, i32* %r1, i32 8
+%r312 = trunc i256 %r309 to i32
+%r314 = getelementptr i32, i32* %r311, i32 0
+store i32 %r312, i32* %r314
+%r315 = lshr i256 %r309, 32
+%r316 = trunc i256 %r315 to i32
+%r318 = getelementptr i32, i32* %r311, i32 1
+store i32 %r316, i32* %r318
+%r319 = lshr i256 %r315, 32
+%r320 = trunc i256 %r319 to i32
+%r322 = getelementptr i32, i32* %r311, i32 2
+store i32 %r320, i32* %r322
+%r323 = lshr i256 %r319, 32
+%r324 = trunc i256 %r323 to i32
+%r326 = getelementptr i32, i32* %r311, i32 3
+store i32 %r324, i32* %r326
+%r327 = lshr i256 %r323, 32
+%r328 = trunc i256 %r327 to i32
+%r330 = getelementptr i32, i32* %r311, i32 4
+store i32 %r328, i32* %r330
+%r331 = lshr i256 %r327, 32
+%r332 = trunc i256 %r331 to i32
+%r334 = getelementptr i32, i32* %r311, i32 5
+store i32 %r332, i32* %r334
+%r335 = lshr i256 %r331, 32
+%r336 = trunc i256 %r335 to i32
+%r338 = getelementptr i32, i32* %r311, i32 6
+store i32 %r336, i32* %r338
+%r339 = lshr i256 %r335, 32
+%r340 = trunc i256 %r339 to i32
+%r342 = getelementptr i32, i32* %r311, i32 7
+store i32 %r340, i32* %r342
+ret void
+}
+define void @mcl_fpDbl_sub8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = zext i512 %r110 to i544
+%r218 = zext i512 %r216 to i544
+%r219 = sub i544 %r217, %r218
+%r220 = trunc i544 %r219 to i256
+%r221 = trunc i256 %r220 to i32
+%r223 = getelementptr i32, i32* %r1, i32 0
+store i32 %r221, i32* %r223
+%r224 = lshr i256 %r220, 32
+%r225 = trunc i256 %r224 to i32
+%r227 = getelementptr i32, i32* %r1, i32 1
+store i32 %r225, i32* %r227
+%r228 = lshr i256 %r224, 32
+%r229 = trunc i256 %r228 to i32
+%r231 = getelementptr i32, i32* %r1, i32 2
+store i32 %r229, i32* %r231
+%r232 = lshr i256 %r228, 32
+%r233 = trunc i256 %r232 to i32
+%r235 = getelementptr i32, i32* %r1, i32 3
+store i32 %r233, i32* %r235
+%r236 = lshr i256 %r232, 32
+%r237 = trunc i256 %r236 to i32
+%r239 = getelementptr i32, i32* %r1, i32 4
+store i32 %r237, i32* %r239
+%r240 = lshr i256 %r236, 32
+%r241 = trunc i256 %r240 to i32
+%r243 = getelementptr i32, i32* %r1, i32 5
+store i32 %r241, i32* %r243
+%r244 = lshr i256 %r240, 32
+%r245 = trunc i256 %r244 to i32
+%r247 = getelementptr i32, i32* %r1, i32 6
+store i32 %r245, i32* %r247
+%r248 = lshr i256 %r244, 32
+%r249 = trunc i256 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 7
+store i32 %r249, i32* %r251
+%r252 = lshr i544 %r219, 256
+%r253 = trunc i544 %r252 to i256
+%r254 = lshr i544 %r219, 512
+%r255 = trunc i544 %r254 to i1
+%r256 = load i32, i32* %r4
+%r257 = zext i32 %r256 to i64
+%r259 = getelementptr i32, i32* %r4, i32 1
+%r260 = load i32, i32* %r259
+%r261 = zext i32 %r260 to i64
+%r262 = shl i64 %r261, 32
+%r263 = or i64 %r257, %r262
+%r264 = zext i64 %r263 to i96
+%r266 = getelementptr i32, i32* %r4, i32 2
+%r267 = load i32, i32* %r266
+%r268 = zext i32 %r267 to i96
+%r269 = shl i96 %r268, 64
+%r270 = or i96 %r264, %r269
+%r271 = zext i96 %r270 to i128
+%r273 = getelementptr i32, i32* %r4, i32 3
+%r274 = load i32, i32* %r273
+%r275 = zext i32 %r274 to i128
+%r276 = shl i128 %r275, 96
+%r277 = or i128 %r271, %r276
+%r278 = zext i128 %r277 to i160
+%r280 = getelementptr i32, i32* %r4, i32 4
+%r281 = load i32, i32* %r280
+%r282 = zext i32 %r281 to i160
+%r283 = shl i160 %r282, 128
+%r284 = or i160 %r278, %r283
+%r285 = zext i160 %r284 to i192
+%r287 = getelementptr i32, i32* %r4, i32 5
+%r288 = load i32, i32* %r287
+%r289 = zext i32 %r288 to i192
+%r290 = shl i192 %r289, 160
+%r291 = or i192 %r285, %r290
+%r292 = zext i192 %r291 to i224
+%r294 = getelementptr i32, i32* %r4, i32 6
+%r295 = load i32, i32* %r294
+%r296 = zext i32 %r295 to i224
+%r297 = shl i224 %r296, 192
+%r298 = or i224 %r292, %r297
+%r299 = zext i224 %r298 to i256
+%r301 = getelementptr i32, i32* %r4, i32 7
+%r302 = load i32, i32* %r301
+%r303 = zext i32 %r302 to i256
+%r304 = shl i256 %r303, 224
+%r305 = or i256 %r299, %r304
+%r307 = select i1 %r255, i256 %r305, i256 0
+%r308 = add i256 %r253, %r307
+%r310 = getelementptr i32, i32* %r1, i32 8
+%r311 = trunc i256 %r308 to i32
+%r313 = getelementptr i32, i32* %r310, i32 0
+store i32 %r311, i32* %r313
+%r314 = lshr i256 %r308, 32
+%r315 = trunc i256 %r314 to i32
+%r317 = getelementptr i32, i32* %r310, i32 1
+store i32 %r315, i32* %r317
+%r318 = lshr i256 %r314, 32
+%r319 = trunc i256 %r318 to i32
+%r321 = getelementptr i32, i32* %r310, i32 2
+store i32 %r319, i32* %r321
+%r322 = lshr i256 %r318, 32
+%r323 = trunc i256 %r322 to i32
+%r325 = getelementptr i32, i32* %r310, i32 3
+store i32 %r323, i32* %r325
+%r326 = lshr i256 %r322, 32
+%r327 = trunc i256 %r326 to i32
+%r329 = getelementptr i32, i32* %r310, i32 4
+store i32 %r327, i32* %r329
+%r330 = lshr i256 %r326, 32
+%r331 = trunc i256 %r330 to i32
+%r333 = getelementptr i32, i32* %r310, i32 5
+store i32 %r331, i32* %r333
+%r334 = lshr i256 %r330, 32
+%r335 = trunc i256 %r334 to i32
+%r337 = getelementptr i32, i32* %r310, i32 6
+store i32 %r335, i32* %r337
+%r338 = lshr i256 %r334, 32
+%r339 = trunc i256 %r338 to i32
+%r341 = getelementptr i32, i32* %r310, i32 7
+store i32 %r339, i32* %r341
+ret void
+}
+define i320 @mulPv288x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r40 = zext i32 %r6 to i64
+%r41 = zext i32 %r10 to i64
+%r42 = shl i64 %r41, 32
+%r43 = or i64 %r40, %r42
+%r44 = zext i64 %r43 to i96
+%r45 = zext i32 %r14 to i96
+%r46 = shl i96 %r45, 64
+%r47 = or i96 %r44, %r46
+%r48 = zext i96 %r47 to i128
+%r49 = zext i32 %r18 to i128
+%r50 = shl i128 %r49, 96
+%r51 = or i128 %r48, %r50
+%r52 = zext i128 %r51 to i160
+%r53 = zext i32 %r22 to i160
+%r54 = shl i160 %r53, 128
+%r55 = or i160 %r52, %r54
+%r56 = zext i160 %r55 to i192
+%r57 = zext i32 %r26 to i192
+%r58 = shl i192 %r57, 160
+%r59 = or i192 %r56, %r58
+%r60 = zext i192 %r59 to i224
+%r61 = zext i32 %r30 to i224
+%r62 = shl i224 %r61, 192
+%r63 = or i224 %r60, %r62
+%r64 = zext i224 %r63 to i256
+%r65 = zext i32 %r34 to i256
+%r66 = shl i256 %r65, 224
+%r67 = or i256 %r64, %r66
+%r68 = zext i256 %r67 to i288
+%r69 = zext i32 %r38 to i288
+%r70 = shl i288 %r69, 256
+%r71 = or i288 %r68, %r70
+%r72 = zext i32 %r7 to i64
+%r73 = zext i32 %r11 to i64
+%r74 = shl i64 %r73, 32
+%r75 = or i64 %r72, %r74
+%r76 = zext i64 %r75 to i96
+%r77 = zext i32 %r15 to i96
+%r78 = shl i96 %r77, 64
+%r79 = or i96 %r76, %r78
+%r80 = zext i96 %r79 to i128
+%r81 = zext i32 %r19 to i128
+%r82 = shl i128 %r81, 96
+%r83 = or i128 %r80, %r82
+%r84 = zext i128 %r83 to i160
+%r85 = zext i32 %r23 to i160
+%r86 = shl i160 %r85, 128
+%r87 = or i160 %r84, %r86
+%r88 = zext i160 %r87 to i192
+%r89 = zext i32 %r27 to i192
+%r90 = shl i192 %r89, 160
+%r91 = or i192 %r88, %r90
+%r92 = zext i192 %r91 to i224
+%r93 = zext i32 %r31 to i224
+%r94 = shl i224 %r93, 192
+%r95 = or i224 %r92, %r94
+%r96 = zext i224 %r95 to i256
+%r97 = zext i32 %r35 to i256
+%r98 = shl i256 %r97, 224
+%r99 = or i256 %r96, %r98
+%r100 = zext i256 %r99 to i288
+%r101 = zext i32 %r39 to i288
+%r102 = shl i288 %r101, 256
+%r103 = or i288 %r100, %r102
+%r104 = zext i288 %r71 to i320
+%r105 = zext i288 %r103 to i320
+%r106 = shl i320 %r105, 32
+%r107 = add i320 %r104, %r106
+ret i320 %r107
+}
+define void @mcl_fp_mulUnitPre9L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i320 @mulPv288x32(i32* %r2, i32 %r3)
+%r5 = trunc i320 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i320 %r4, 32
+%r9 = trunc i320 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i320 %r8, 32
+%r13 = trunc i320 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i320 %r12, 32
+%r17 = trunc i320 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i320 %r16, 32
+%r21 = trunc i320 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i320 %r20, 32
+%r25 = trunc i320 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i320 %r24, 32
+%r29 = trunc i320 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i320 %r28, 32
+%r33 = trunc i320 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+%r36 = lshr i320 %r32, 32
+%r37 = trunc i320 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 8
+store i32 %r37, i32* %r39
+%r40 = lshr i320 %r36, 32
+%r41 = trunc i320 %r40 to i32
+%r43 = getelementptr i32, i32* %r1, i32 9
+store i32 %r41, i32* %r43
+ret void
+}
+define void @mcl_fpDbl_mulPre9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i320 @mulPv288x32(i32* %r2, i32 %r4)
+%r6 = trunc i320 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i320 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i320 @mulPv288x32(i32* %r2, i32 %r10)
+%r12 = add i320 %r7, %r11
+%r13 = trunc i320 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+%r16 = lshr i320 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i320 @mulPv288x32(i32* %r2, i32 %r19)
+%r21 = add i320 %r16, %r20
+%r22 = trunc i320 %r21 to i32
+%r24 = getelementptr i32, i32* %r1, i32 2
+store i32 %r22, i32* %r24
+%r25 = lshr i320 %r21, 32
+%r27 = getelementptr i32, i32* %r3, i32 3
+%r28 = load i32, i32* %r27
+%r29 = call i320 @mulPv288x32(i32* %r2, i32 %r28)
+%r30 = add i320 %r25, %r29
+%r31 = trunc i320 %r30 to i32
+%r33 = getelementptr i32, i32* %r1, i32 3
+store i32 %r31, i32* %r33
+%r34 = lshr i320 %r30, 32
+%r36 = getelementptr i32, i32* %r3, i32 4
+%r37 = load i32, i32* %r36
+%r38 = call i320 @mulPv288x32(i32* %r2, i32 %r37)
+%r39 = add i320 %r34, %r38
+%r40 = trunc i320 %r39 to i32
+%r42 = getelementptr i32, i32* %r1, i32 4
+store i32 %r40, i32* %r42
+%r43 = lshr i320 %r39, 32
+%r45 = getelementptr i32, i32* %r3, i32 5
+%r46 = load i32, i32* %r45
+%r47 = call i320 @mulPv288x32(i32* %r2, i32 %r46)
+%r48 = add i320 %r43, %r47
+%r49 = trunc i320 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 5
+store i32 %r49, i32* %r51
+%r52 = lshr i320 %r48, 32
+%r54 = getelementptr i32, i32* %r3, i32 6
+%r55 = load i32, i32* %r54
+%r56 = call i320 @mulPv288x32(i32* %r2, i32 %r55)
+%r57 = add i320 %r52, %r56
+%r58 = trunc i320 %r57 to i32
+%r60 = getelementptr i32, i32* %r1, i32 6
+store i32 %r58, i32* %r60
+%r61 = lshr i320 %r57, 32
+%r63 = getelementptr i32, i32* %r3, i32 7
+%r64 = load i32, i32* %r63
+%r65 = call i320 @mulPv288x32(i32* %r2, i32 %r64)
+%r66 = add i320 %r61, %r65
+%r67 = trunc i320 %r66 to i32
+%r69 = getelementptr i32, i32* %r1, i32 7
+store i32 %r67, i32* %r69
+%r70 = lshr i320 %r66, 32
+%r72 = getelementptr i32, i32* %r3, i32 8
+%r73 = load i32, i32* %r72
+%r74 = call i320 @mulPv288x32(i32* %r2, i32 %r73)
+%r75 = add i320 %r70, %r74
+%r77 = getelementptr i32, i32* %r1, i32 8
+%r78 = trunc i320 %r75 to i32
+%r80 = getelementptr i32, i32* %r77, i32 0
+store i32 %r78, i32* %r80
+%r81 = lshr i320 %r75, 32
+%r82 = trunc i320 %r81 to i32
+%r84 = getelementptr i32, i32* %r77, i32 1
+store i32 %r82, i32* %r84
+%r85 = lshr i320 %r81, 32
+%r86 = trunc i320 %r85 to i32
+%r88 = getelementptr i32, i32* %r77, i32 2
+store i32 %r86, i32* %r88
+%r89 = lshr i320 %r85, 32
+%r90 = trunc i320 %r89 to i32
+%r92 = getelementptr i32, i32* %r77, i32 3
+store i32 %r90, i32* %r92
+%r93 = lshr i320 %r89, 32
+%r94 = trunc i320 %r93 to i32
+%r96 = getelementptr i32, i32* %r77, i32 4
+store i32 %r94, i32* %r96
+%r97 = lshr i320 %r93, 32
+%r98 = trunc i320 %r97 to i32
+%r100 = getelementptr i32, i32* %r77, i32 5
+store i32 %r98, i32* %r100
+%r101 = lshr i320 %r97, 32
+%r102 = trunc i320 %r101 to i32
+%r104 = getelementptr i32, i32* %r77, i32 6
+store i32 %r102, i32* %r104
+%r105 = lshr i320 %r101, 32
+%r106 = trunc i320 %r105 to i32
+%r108 = getelementptr i32, i32* %r77, i32 7
+store i32 %r106, i32* %r108
+%r109 = lshr i320 %r105, 32
+%r110 = trunc i320 %r109 to i32
+%r112 = getelementptr i32, i32* %r77, i32 8
+store i32 %r110, i32* %r112
+%r113 = lshr i320 %r109, 32
+%r114 = trunc i320 %r113 to i32
+%r116 = getelementptr i32, i32* %r77, i32 9
+store i32 %r114, i32* %r116
+ret void
+}
+define void @mcl_fpDbl_sqrPre9L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i320 @mulPv288x32(i32* %r2, i32 %r3)
+%r5 = trunc i320 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i320 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i320 @mulPv288x32(i32* %r2, i32 %r9)
+%r11 = add i320 %r6, %r10
+%r12 = trunc i320 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+%r15 = lshr i320 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i320 @mulPv288x32(i32* %r2, i32 %r18)
+%r20 = add i320 %r15, %r19
+%r21 = trunc i320 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 2
+store i32 %r21, i32* %r23
+%r24 = lshr i320 %r20, 32
+%r26 = getelementptr i32, i32* %r2, i32 3
+%r27 = load i32, i32* %r26
+%r28 = call i320 @mulPv288x32(i32* %r2, i32 %r27)
+%r29 = add i320 %r24, %r28
+%r30 = trunc i320 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 3
+store i32 %r30, i32* %r32
+%r33 = lshr i320 %r29, 32
+%r35 = getelementptr i32, i32* %r2, i32 4
+%r36 = load i32, i32* %r35
+%r37 = call i320 @mulPv288x32(i32* %r2, i32 %r36)
+%r38 = add i320 %r33, %r37
+%r39 = trunc i320 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 4
+store i32 %r39, i32* %r41
+%r42 = lshr i320 %r38, 32
+%r44 = getelementptr i32, i32* %r2, i32 5
+%r45 = load i32, i32* %r44
+%r46 = call i320 @mulPv288x32(i32* %r2, i32 %r45)
+%r47 = add i320 %r42, %r46
+%r48 = trunc i320 %r47 to i32
+%r50 = getelementptr i32, i32* %r1, i32 5
+store i32 %r48, i32* %r50
+%r51 = lshr i320 %r47, 32
+%r53 = getelementptr i32, i32* %r2, i32 6
+%r54 = load i32, i32* %r53
+%r55 = call i320 @mulPv288x32(i32* %r2, i32 %r54)
+%r56 = add i320 %r51, %r55
+%r57 = trunc i320 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 6
+store i32 %r57, i32* %r59
+%r60 = lshr i320 %r56, 32
+%r62 = getelementptr i32, i32* %r2, i32 7
+%r63 = load i32, i32* %r62
+%r64 = call i320 @mulPv288x32(i32* %r2, i32 %r63)
+%r65 = add i320 %r60, %r64
+%r66 = trunc i320 %r65 to i32
+%r68 = getelementptr i32, i32* %r1, i32 7
+store i32 %r66, i32* %r68
+%r69 = lshr i320 %r65, 32
+%r71 = getelementptr i32, i32* %r2, i32 8
+%r72 = load i32, i32* %r71
+%r73 = call i320 @mulPv288x32(i32* %r2, i32 %r72)
+%r74 = add i320 %r69, %r73
+%r76 = getelementptr i32, i32* %r1, i32 8
+%r77 = trunc i320 %r74 to i32
+%r79 = getelementptr i32, i32* %r76, i32 0
+store i32 %r77, i32* %r79
+%r80 = lshr i320 %r74, 32
+%r81 = trunc i320 %r80 to i32
+%r83 = getelementptr i32, i32* %r76, i32 1
+store i32 %r81, i32* %r83
+%r84 = lshr i320 %r80, 32
+%r85 = trunc i320 %r84 to i32
+%r87 = getelementptr i32, i32* %r76, i32 2
+store i32 %r85, i32* %r87
+%r88 = lshr i320 %r84, 32
+%r89 = trunc i320 %r88 to i32
+%r91 = getelementptr i32, i32* %r76, i32 3
+store i32 %r89, i32* %r91
+%r92 = lshr i320 %r88, 32
+%r93 = trunc i320 %r92 to i32
+%r95 = getelementptr i32, i32* %r76, i32 4
+store i32 %r93, i32* %r95
+%r96 = lshr i320 %r92, 32
+%r97 = trunc i320 %r96 to i32
+%r99 = getelementptr i32, i32* %r76, i32 5
+store i32 %r97, i32* %r99
+%r100 = lshr i320 %r96, 32
+%r101 = trunc i320 %r100 to i32
+%r103 = getelementptr i32, i32* %r76, i32 6
+store i32 %r101, i32* %r103
+%r104 = lshr i320 %r100, 32
+%r105 = trunc i320 %r104 to i32
+%r107 = getelementptr i32, i32* %r76, i32 7
+store i32 %r105, i32* %r107
+%r108 = lshr i320 %r104, 32
+%r109 = trunc i320 %r108 to i32
+%r111 = getelementptr i32, i32* %r76, i32 8
+store i32 %r109, i32* %r111
+%r112 = lshr i320 %r108, 32
+%r113 = trunc i320 %r112 to i32
+%r115 = getelementptr i32, i32* %r76, i32 9
+store i32 %r113, i32* %r115
+ret void
+}
+define void @mcl_fp_mont9L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i320 @mulPv288x32(i32* %r2, i32 %r10)
+%r12 = zext i320 %r11 to i352
+%r13 = trunc i320 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i320 @mulPv288x32(i32* %r4, i32 %r14)
+%r16 = zext i320 %r15 to i352
+%r17 = add i352 %r12, %r16
+%r18 = lshr i352 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i320 @mulPv288x32(i32* %r2, i32 %r21)
+%r23 = zext i320 %r22 to i352
+%r24 = add i352 %r18, %r23
+%r25 = trunc i352 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i320 @mulPv288x32(i32* %r4, i32 %r26)
+%r28 = zext i320 %r27 to i352
+%r29 = add i352 %r24, %r28
+%r30 = lshr i352 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i320 @mulPv288x32(i32* %r2, i32 %r33)
+%r35 = zext i320 %r34 to i352
+%r36 = add i352 %r30, %r35
+%r37 = trunc i352 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i320 @mulPv288x32(i32* %r4, i32 %r38)
+%r40 = zext i320 %r39 to i352
+%r41 = add i352 %r36, %r40
+%r42 = lshr i352 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i320 @mulPv288x32(i32* %r2, i32 %r45)
+%r47 = zext i320 %r46 to i352
+%r48 = add i352 %r42, %r47
+%r49 = trunc i352 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i320 @mulPv288x32(i32* %r4, i32 %r50)
+%r52 = zext i320 %r51 to i352
+%r53 = add i352 %r48, %r52
+%r54 = lshr i352 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i320 @mulPv288x32(i32* %r2, i32 %r57)
+%r59 = zext i320 %r58 to i352
+%r60 = add i352 %r54, %r59
+%r61 = trunc i352 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i320 @mulPv288x32(i32* %r4, i32 %r62)
+%r64 = zext i320 %r63 to i352
+%r65 = add i352 %r60, %r64
+%r66 = lshr i352 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i320 @mulPv288x32(i32* %r2, i32 %r69)
+%r71 = zext i320 %r70 to i352
+%r72 = add i352 %r66, %r71
+%r73 = trunc i352 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i320 @mulPv288x32(i32* %r4, i32 %r74)
+%r76 = zext i320 %r75 to i352
+%r77 = add i352 %r72, %r76
+%r78 = lshr i352 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i320 @mulPv288x32(i32* %r2, i32 %r81)
+%r83 = zext i320 %r82 to i352
+%r84 = add i352 %r78, %r83
+%r85 = trunc i352 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i320 @mulPv288x32(i32* %r4, i32 %r86)
+%r88 = zext i320 %r87 to i352
+%r89 = add i352 %r84, %r88
+%r90 = lshr i352 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i320 @mulPv288x32(i32* %r2, i32 %r93)
+%r95 = zext i320 %r94 to i352
+%r96 = add i352 %r90, %r95
+%r97 = trunc i352 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i320 @mulPv288x32(i32* %r4, i32 %r98)
+%r100 = zext i320 %r99 to i352
+%r101 = add i352 %r96, %r100
+%r102 = lshr i352 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i320 @mulPv288x32(i32* %r2, i32 %r105)
+%r107 = zext i320 %r106 to i352
+%r108 = add i352 %r102, %r107
+%r109 = trunc i352 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i320 @mulPv288x32(i32* %r4, i32 %r110)
+%r112 = zext i320 %r111 to i352
+%r113 = add i352 %r108, %r112
+%r114 = lshr i352 %r113, 32
+%r115 = trunc i352 %r114 to i320
+%r116 = load i32, i32* %r4
+%r117 = zext i32 %r116 to i64
+%r119 = getelementptr i32, i32* %r4, i32 1
+%r120 = load i32, i32* %r119
+%r121 = zext i32 %r120 to i64
+%r122 = shl i64 %r121, 32
+%r123 = or i64 %r117, %r122
+%r124 = zext i64 %r123 to i96
+%r126 = getelementptr i32, i32* %r4, i32 2
+%r127 = load i32, i32* %r126
+%r128 = zext i32 %r127 to i96
+%r129 = shl i96 %r128, 64
+%r130 = or i96 %r124, %r129
+%r131 = zext i96 %r130 to i128
+%r133 = getelementptr i32, i32* %r4, i32 3
+%r134 = load i32, i32* %r133
+%r135 = zext i32 %r134 to i128
+%r136 = shl i128 %r135, 96
+%r137 = or i128 %r131, %r136
+%r138 = zext i128 %r137 to i160
+%r140 = getelementptr i32, i32* %r4, i32 4
+%r141 = load i32, i32* %r140
+%r142 = zext i32 %r141 to i160
+%r143 = shl i160 %r142, 128
+%r144 = or i160 %r138, %r143
+%r145 = zext i160 %r144 to i192
+%r147 = getelementptr i32, i32* %r4, i32 5
+%r148 = load i32, i32* %r147
+%r149 = zext i32 %r148 to i192
+%r150 = shl i192 %r149, 160
+%r151 = or i192 %r145, %r150
+%r152 = zext i192 %r151 to i224
+%r154 = getelementptr i32, i32* %r4, i32 6
+%r155 = load i32, i32* %r154
+%r156 = zext i32 %r155 to i224
+%r157 = shl i224 %r156, 192
+%r158 = or i224 %r152, %r157
+%r159 = zext i224 %r158 to i256
+%r161 = getelementptr i32, i32* %r4, i32 7
+%r162 = load i32, i32* %r161
+%r163 = zext i32 %r162 to i256
+%r164 = shl i256 %r163, 224
+%r165 = or i256 %r159, %r164
+%r166 = zext i256 %r165 to i288
+%r168 = getelementptr i32, i32* %r4, i32 8
+%r169 = load i32, i32* %r168
+%r170 = zext i32 %r169 to i288
+%r171 = shl i288 %r170, 256
+%r172 = or i288 %r166, %r171
+%r173 = zext i288 %r172 to i320
+%r174 = sub i320 %r115, %r173
+%r175 = lshr i320 %r174, 288
+%r176 = trunc i320 %r175 to i1
+%r177 = select i1 %r176, i320 %r115, i320 %r174
+%r178 = trunc i320 %r177 to i288
+%r179 = trunc i288 %r178 to i32
+%r181 = getelementptr i32, i32* %r1, i32 0
+store i32 %r179, i32* %r181
+%r182 = lshr i288 %r178, 32
+%r183 = trunc i288 %r182 to i32
+%r185 = getelementptr i32, i32* %r1, i32 1
+store i32 %r183, i32* %r185
+%r186 = lshr i288 %r182, 32
+%r187 = trunc i288 %r186 to i32
+%r189 = getelementptr i32, i32* %r1, i32 2
+store i32 %r187, i32* %r189
+%r190 = lshr i288 %r186, 32
+%r191 = trunc i288 %r190 to i32
+%r193 = getelementptr i32, i32* %r1, i32 3
+store i32 %r191, i32* %r193
+%r194 = lshr i288 %r190, 32
+%r195 = trunc i288 %r194 to i32
+%r197 = getelementptr i32, i32* %r1, i32 4
+store i32 %r195, i32* %r197
+%r198 = lshr i288 %r194, 32
+%r199 = trunc i288 %r198 to i32
+%r201 = getelementptr i32, i32* %r1, i32 5
+store i32 %r199, i32* %r201
+%r202 = lshr i288 %r198, 32
+%r203 = trunc i288 %r202 to i32
+%r205 = getelementptr i32, i32* %r1, i32 6
+store i32 %r203, i32* %r205
+%r206 = lshr i288 %r202, 32
+%r207 = trunc i288 %r206 to i32
+%r209 = getelementptr i32, i32* %r1, i32 7
+store i32 %r207, i32* %r209
+%r210 = lshr i288 %r206, 32
+%r211 = trunc i288 %r210 to i32
+%r213 = getelementptr i32, i32* %r1, i32 8
+store i32 %r211, i32* %r213
+ret void
+}
+define void @mcl_fp_montNF9L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i320 @mulPv288x32(i32* %r2, i32 %r8)
+%r10 = trunc i320 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i320 @mulPv288x32(i32* %r4, i32 %r11)
+%r13 = add i320 %r9, %r12
+%r14 = lshr i320 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i320 @mulPv288x32(i32* %r2, i32 %r17)
+%r19 = add i320 %r14, %r18
+%r20 = trunc i320 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i320 @mulPv288x32(i32* %r4, i32 %r21)
+%r23 = add i320 %r19, %r22
+%r24 = lshr i320 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i320 @mulPv288x32(i32* %r2, i32 %r27)
+%r29 = add i320 %r24, %r28
+%r30 = trunc i320 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i320 @mulPv288x32(i32* %r4, i32 %r31)
+%r33 = add i320 %r29, %r32
+%r34 = lshr i320 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i320 @mulPv288x32(i32* %r2, i32 %r37)
+%r39 = add i320 %r34, %r38
+%r40 = trunc i320 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i320 @mulPv288x32(i32* %r4, i32 %r41)
+%r43 = add i320 %r39, %r42
+%r44 = lshr i320 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i320 @mulPv288x32(i32* %r2, i32 %r47)
+%r49 = add i320 %r44, %r48
+%r50 = trunc i320 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i320 @mulPv288x32(i32* %r4, i32 %r51)
+%r53 = add i320 %r49, %r52
+%r54 = lshr i320 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i320 @mulPv288x32(i32* %r2, i32 %r57)
+%r59 = add i320 %r54, %r58
+%r60 = trunc i320 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i320 @mulPv288x32(i32* %r4, i32 %r61)
+%r63 = add i320 %r59, %r62
+%r64 = lshr i320 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i320 @mulPv288x32(i32* %r2, i32 %r67)
+%r69 = add i320 %r64, %r68
+%r70 = trunc i320 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i320 @mulPv288x32(i32* %r4, i32 %r71)
+%r73 = add i320 %r69, %r72
+%r74 = lshr i320 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i320 @mulPv288x32(i32* %r2, i32 %r77)
+%r79 = add i320 %r74, %r78
+%r80 = trunc i320 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i320 @mulPv288x32(i32* %r4, i32 %r81)
+%r83 = add i320 %r79, %r82
+%r84 = lshr i320 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i320 @mulPv288x32(i32* %r2, i32 %r87)
+%r89 = add i320 %r84, %r88
+%r90 = trunc i320 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i320 @mulPv288x32(i32* %r4, i32 %r91)
+%r93 = add i320 %r89, %r92
+%r94 = lshr i320 %r93, 32
+%r95 = trunc i320 %r94 to i288
+%r96 = load i32, i32* %r4
+%r97 = zext i32 %r96 to i64
+%r99 = getelementptr i32, i32* %r4, i32 1
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i64
+%r102 = shl i64 %r101, 32
+%r103 = or i64 %r97, %r102
+%r104 = zext i64 %r103 to i96
+%r106 = getelementptr i32, i32* %r4, i32 2
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i96
+%r109 = shl i96 %r108, 64
+%r110 = or i96 %r104, %r109
+%r111 = zext i96 %r110 to i128
+%r113 = getelementptr i32, i32* %r4, i32 3
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i128
+%r116 = shl i128 %r115, 96
+%r117 = or i128 %r111, %r116
+%r118 = zext i128 %r117 to i160
+%r120 = getelementptr i32, i32* %r4, i32 4
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i160
+%r123 = shl i160 %r122, 128
+%r124 = or i160 %r118, %r123
+%r125 = zext i160 %r124 to i192
+%r127 = getelementptr i32, i32* %r4, i32 5
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i192
+%r130 = shl i192 %r129, 160
+%r131 = or i192 %r125, %r130
+%r132 = zext i192 %r131 to i224
+%r134 = getelementptr i32, i32* %r4, i32 6
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i224
+%r137 = shl i224 %r136, 192
+%r138 = or i224 %r132, %r137
+%r139 = zext i224 %r138 to i256
+%r141 = getelementptr i32, i32* %r4, i32 7
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i256
+%r144 = shl i256 %r143, 224
+%r145 = or i256 %r139, %r144
+%r146 = zext i256 %r145 to i288
+%r148 = getelementptr i32, i32* %r4, i32 8
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i288
+%r151 = shl i288 %r150, 256
+%r152 = or i288 %r146, %r151
+%r153 = sub i288 %r95, %r152
+%r154 = lshr i288 %r153, 287
+%r155 = trunc i288 %r154 to i1
+%r156 = select i1 %r155, i288 %r95, i288 %r153
+%r157 = trunc i288 %r156 to i32
+%r159 = getelementptr i32, i32* %r1, i32 0
+store i32 %r157, i32* %r159
+%r160 = lshr i288 %r156, 32
+%r161 = trunc i288 %r160 to i32
+%r163 = getelementptr i32, i32* %r1, i32 1
+store i32 %r161, i32* %r163
+%r164 = lshr i288 %r160, 32
+%r165 = trunc i288 %r164 to i32
+%r167 = getelementptr i32, i32* %r1, i32 2
+store i32 %r165, i32* %r167
+%r168 = lshr i288 %r164, 32
+%r169 = trunc i288 %r168 to i32
+%r171 = getelementptr i32, i32* %r1, i32 3
+store i32 %r169, i32* %r171
+%r172 = lshr i288 %r168, 32
+%r173 = trunc i288 %r172 to i32
+%r175 = getelementptr i32, i32* %r1, i32 4
+store i32 %r173, i32* %r175
+%r176 = lshr i288 %r172, 32
+%r177 = trunc i288 %r176 to i32
+%r179 = getelementptr i32, i32* %r1, i32 5
+store i32 %r177, i32* %r179
+%r180 = lshr i288 %r176, 32
+%r181 = trunc i288 %r180 to i32
+%r183 = getelementptr i32, i32* %r1, i32 6
+store i32 %r181, i32* %r183
+%r184 = lshr i288 %r180, 32
+%r185 = trunc i288 %r184 to i32
+%r187 = getelementptr i32, i32* %r1, i32 7
+store i32 %r185, i32* %r187
+%r188 = lshr i288 %r184, 32
+%r189 = trunc i288 %r188 to i32
+%r191 = getelementptr i32, i32* %r1, i32 8
+store i32 %r189, i32* %r191
+ret void
+}
+define void @mcl_fp_montRed9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = load i32, i32* %r2
+%r65 = zext i32 %r64 to i64
+%r67 = getelementptr i32, i32* %r2, i32 1
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i64
+%r70 = shl i64 %r69, 32
+%r71 = or i64 %r65, %r70
+%r72 = zext i64 %r71 to i96
+%r74 = getelementptr i32, i32* %r2, i32 2
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i96
+%r77 = shl i96 %r76, 64
+%r78 = or i96 %r72, %r77
+%r79 = zext i96 %r78 to i128
+%r81 = getelementptr i32, i32* %r2, i32 3
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i128
+%r84 = shl i128 %r83, 96
+%r85 = or i128 %r79, %r84
+%r86 = zext i128 %r85 to i160
+%r88 = getelementptr i32, i32* %r2, i32 4
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i160
+%r91 = shl i160 %r90, 128
+%r92 = or i160 %r86, %r91
+%r93 = zext i160 %r92 to i192
+%r95 = getelementptr i32, i32* %r2, i32 5
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i192
+%r98 = shl i192 %r97, 160
+%r99 = or i192 %r93, %r98
+%r100 = zext i192 %r99 to i224
+%r102 = getelementptr i32, i32* %r2, i32 6
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i224
+%r105 = shl i224 %r104, 192
+%r106 = or i224 %r100, %r105
+%r107 = zext i224 %r106 to i256
+%r109 = getelementptr i32, i32* %r2, i32 7
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i256
+%r112 = shl i256 %r111, 224
+%r113 = or i256 %r107, %r112
+%r114 = zext i256 %r113 to i288
+%r116 = getelementptr i32, i32* %r2, i32 8
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i288
+%r119 = shl i288 %r118, 256
+%r120 = or i288 %r114, %r119
+%r121 = zext i288 %r120 to i320
+%r123 = getelementptr i32, i32* %r2, i32 9
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i320
+%r126 = shl i320 %r125, 288
+%r127 = or i320 %r121, %r126
+%r128 = zext i320 %r127 to i352
+%r130 = getelementptr i32, i32* %r2, i32 10
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i352
+%r133 = shl i352 %r132, 320
+%r134 = or i352 %r128, %r133
+%r135 = zext i352 %r134 to i384
+%r137 = getelementptr i32, i32* %r2, i32 11
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i384
+%r140 = shl i384 %r139, 352
+%r141 = or i384 %r135, %r140
+%r142 = zext i384 %r141 to i416
+%r144 = getelementptr i32, i32* %r2, i32 12
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i416
+%r147 = shl i416 %r146, 384
+%r148 = or i416 %r142, %r147
+%r149 = zext i416 %r148 to i448
+%r151 = getelementptr i32, i32* %r2, i32 13
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i448
+%r154 = shl i448 %r153, 416
+%r155 = or i448 %r149, %r154
+%r156 = zext i448 %r155 to i480
+%r158 = getelementptr i32, i32* %r2, i32 14
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i480
+%r161 = shl i480 %r160, 448
+%r162 = or i480 %r156, %r161
+%r163 = zext i480 %r162 to i512
+%r165 = getelementptr i32, i32* %r2, i32 15
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i512
+%r168 = shl i512 %r167, 480
+%r169 = or i512 %r163, %r168
+%r170 = zext i512 %r169 to i544
+%r172 = getelementptr i32, i32* %r2, i32 16
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i544
+%r175 = shl i544 %r174, 512
+%r176 = or i544 %r170, %r175
+%r177 = zext i544 %r176 to i576
+%r179 = getelementptr i32, i32* %r2, i32 17
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i576
+%r182 = shl i576 %r181, 544
+%r183 = or i576 %r177, %r182
+%r184 = zext i576 %r183 to i608
+%r185 = trunc i608 %r184 to i32
+%r186 = mul i32 %r185, %r6
+%r187 = call i320 @mulPv288x32(i32* %r3, i32 %r186)
+%r188 = zext i320 %r187 to i608
+%r189 = add i608 %r184, %r188
+%r190 = lshr i608 %r189, 32
+%r191 = trunc i608 %r190 to i576
+%r192 = trunc i576 %r191 to i32
+%r193 = mul i32 %r192, %r6
+%r194 = call i320 @mulPv288x32(i32* %r3, i32 %r193)
+%r195 = zext i320 %r194 to i576
+%r196 = add i576 %r191, %r195
+%r197 = lshr i576 %r196, 32
+%r198 = trunc i576 %r197 to i544
+%r199 = trunc i544 %r198 to i32
+%r200 = mul i32 %r199, %r6
+%r201 = call i320 @mulPv288x32(i32* %r3, i32 %r200)
+%r202 = zext i320 %r201 to i544
+%r203 = add i544 %r198, %r202
+%r204 = lshr i544 %r203, 32
+%r205 = trunc i544 %r204 to i512
+%r206 = trunc i512 %r205 to i32
+%r207 = mul i32 %r206, %r6
+%r208 = call i320 @mulPv288x32(i32* %r3, i32 %r207)
+%r209 = zext i320 %r208 to i512
+%r210 = add i512 %r205, %r209
+%r211 = lshr i512 %r210, 32
+%r212 = trunc i512 %r211 to i480
+%r213 = trunc i480 %r212 to i32
+%r214 = mul i32 %r213, %r6
+%r215 = call i320 @mulPv288x32(i32* %r3, i32 %r214)
+%r216 = zext i320 %r215 to i480
+%r217 = add i480 %r212, %r216
+%r218 = lshr i480 %r217, 32
+%r219 = trunc i480 %r218 to i448
+%r220 = trunc i448 %r219 to i32
+%r221 = mul i32 %r220, %r6
+%r222 = call i320 @mulPv288x32(i32* %r3, i32 %r221)
+%r223 = zext i320 %r222 to i448
+%r224 = add i448 %r219, %r223
+%r225 = lshr i448 %r224, 32
+%r226 = trunc i448 %r225 to i416
+%r227 = trunc i416 %r226 to i32
+%r228 = mul i32 %r227, %r6
+%r229 = call i320 @mulPv288x32(i32* %r3, i32 %r228)
+%r230 = zext i320 %r229 to i416
+%r231 = add i416 %r226, %r230
+%r232 = lshr i416 %r231, 32
+%r233 = trunc i416 %r232 to i384
+%r234 = trunc i384 %r233 to i32
+%r235 = mul i32 %r234, %r6
+%r236 = call i320 @mulPv288x32(i32* %r3, i32 %r235)
+%r237 = zext i320 %r236 to i384
+%r238 = add i384 %r233, %r237
+%r239 = lshr i384 %r238, 32
+%r240 = trunc i384 %r239 to i352
+%r241 = trunc i352 %r240 to i32
+%r242 = mul i32 %r241, %r6
+%r243 = call i320 @mulPv288x32(i32* %r3, i32 %r242)
+%r244 = zext i320 %r243 to i352
+%r245 = add i352 %r240, %r244
+%r246 = lshr i352 %r245, 32
+%r247 = trunc i352 %r246 to i320
+%r248 = zext i288 %r63 to i320
+%r249 = sub i320 %r247, %r248
+%r250 = lshr i320 %r249, 288
+%r251 = trunc i320 %r250 to i1
+%r252 = select i1 %r251, i320 %r247, i320 %r249
+%r253 = trunc i320 %r252 to i288
+%r254 = trunc i288 %r253 to i32
+%r256 = getelementptr i32, i32* %r1, i32 0
+store i32 %r254, i32* %r256
+%r257 = lshr i288 %r253, 32
+%r258 = trunc i288 %r257 to i32
+%r260 = getelementptr i32, i32* %r1, i32 1
+store i32 %r258, i32* %r260
+%r261 = lshr i288 %r257, 32
+%r262 = trunc i288 %r261 to i32
+%r264 = getelementptr i32, i32* %r1, i32 2
+store i32 %r262, i32* %r264
+%r265 = lshr i288 %r261, 32
+%r266 = trunc i288 %r265 to i32
+%r268 = getelementptr i32, i32* %r1, i32 3
+store i32 %r266, i32* %r268
+%r269 = lshr i288 %r265, 32
+%r270 = trunc i288 %r269 to i32
+%r272 = getelementptr i32, i32* %r1, i32 4
+store i32 %r270, i32* %r272
+%r273 = lshr i288 %r269, 32
+%r274 = trunc i288 %r273 to i32
+%r276 = getelementptr i32, i32* %r1, i32 5
+store i32 %r274, i32* %r276
+%r277 = lshr i288 %r273, 32
+%r278 = trunc i288 %r277 to i32
+%r280 = getelementptr i32, i32* %r1, i32 6
+store i32 %r278, i32* %r280
+%r281 = lshr i288 %r277, 32
+%r282 = trunc i288 %r281 to i32
+%r284 = getelementptr i32, i32* %r1, i32 7
+store i32 %r282, i32* %r284
+%r285 = lshr i288 %r281, 32
+%r286 = trunc i288 %r285 to i32
+%r288 = getelementptr i32, i32* %r1, i32 8
+store i32 %r286, i32* %r288
+ret void
+}
+define i32 @mcl_fp_addPre9L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r63 = load i32, i32* %r4
+%r64 = zext i32 %r63 to i64
+%r66 = getelementptr i32, i32* %r4, i32 1
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i64
+%r69 = shl i64 %r68, 32
+%r70 = or i64 %r64, %r69
+%r71 = zext i64 %r70 to i96
+%r73 = getelementptr i32, i32* %r4, i32 2
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i96
+%r76 = shl i96 %r75, 64
+%r77 = or i96 %r71, %r76
+%r78 = zext i96 %r77 to i128
+%r80 = getelementptr i32, i32* %r4, i32 3
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i128
+%r83 = shl i128 %r82, 96
+%r84 = or i128 %r78, %r83
+%r85 = zext i128 %r84 to i160
+%r87 = getelementptr i32, i32* %r4, i32 4
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i160
+%r90 = shl i160 %r89, 128
+%r91 = or i160 %r85, %r90
+%r92 = zext i160 %r91 to i192
+%r94 = getelementptr i32, i32* %r4, i32 5
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i192
+%r97 = shl i192 %r96, 160
+%r98 = or i192 %r92, %r97
+%r99 = zext i192 %r98 to i224
+%r101 = getelementptr i32, i32* %r4, i32 6
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i224
+%r104 = shl i224 %r103, 192
+%r105 = or i224 %r99, %r104
+%r106 = zext i224 %r105 to i256
+%r108 = getelementptr i32, i32* %r4, i32 7
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i256
+%r111 = shl i256 %r110, 224
+%r112 = or i256 %r106, %r111
+%r113 = zext i256 %r112 to i288
+%r115 = getelementptr i32, i32* %r4, i32 8
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i288
+%r118 = shl i288 %r117, 256
+%r119 = or i288 %r113, %r118
+%r120 = zext i288 %r119 to i320
+%r121 = add i320 %r62, %r120
+%r122 = trunc i320 %r121 to i288
+%r123 = trunc i288 %r122 to i32
+%r125 = getelementptr i32, i32* %r2, i32 0
+store i32 %r123, i32* %r125
+%r126 = lshr i288 %r122, 32
+%r127 = trunc i288 %r126 to i32
+%r129 = getelementptr i32, i32* %r2, i32 1
+store i32 %r127, i32* %r129
+%r130 = lshr i288 %r126, 32
+%r131 = trunc i288 %r130 to i32
+%r133 = getelementptr i32, i32* %r2, i32 2
+store i32 %r131, i32* %r133
+%r134 = lshr i288 %r130, 32
+%r135 = trunc i288 %r134 to i32
+%r137 = getelementptr i32, i32* %r2, i32 3
+store i32 %r135, i32* %r137
+%r138 = lshr i288 %r134, 32
+%r139 = trunc i288 %r138 to i32
+%r141 = getelementptr i32, i32* %r2, i32 4
+store i32 %r139, i32* %r141
+%r142 = lshr i288 %r138, 32
+%r143 = trunc i288 %r142 to i32
+%r145 = getelementptr i32, i32* %r2, i32 5
+store i32 %r143, i32* %r145
+%r146 = lshr i288 %r142, 32
+%r147 = trunc i288 %r146 to i32
+%r149 = getelementptr i32, i32* %r2, i32 6
+store i32 %r147, i32* %r149
+%r150 = lshr i288 %r146, 32
+%r151 = trunc i288 %r150 to i32
+%r153 = getelementptr i32, i32* %r2, i32 7
+store i32 %r151, i32* %r153
+%r154 = lshr i288 %r150, 32
+%r155 = trunc i288 %r154 to i32
+%r157 = getelementptr i32, i32* %r2, i32 8
+store i32 %r155, i32* %r157
+%r158 = lshr i320 %r121, 288
+%r159 = trunc i320 %r158 to i32
+ret i32 %r159
+}
+define i32 @mcl_fp_subPre9L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r63 = load i32, i32* %r4
+%r64 = zext i32 %r63 to i64
+%r66 = getelementptr i32, i32* %r4, i32 1
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i64
+%r69 = shl i64 %r68, 32
+%r70 = or i64 %r64, %r69
+%r71 = zext i64 %r70 to i96
+%r73 = getelementptr i32, i32* %r4, i32 2
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i96
+%r76 = shl i96 %r75, 64
+%r77 = or i96 %r71, %r76
+%r78 = zext i96 %r77 to i128
+%r80 = getelementptr i32, i32* %r4, i32 3
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i128
+%r83 = shl i128 %r82, 96
+%r84 = or i128 %r78, %r83
+%r85 = zext i128 %r84 to i160
+%r87 = getelementptr i32, i32* %r4, i32 4
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i160
+%r90 = shl i160 %r89, 128
+%r91 = or i160 %r85, %r90
+%r92 = zext i160 %r91 to i192
+%r94 = getelementptr i32, i32* %r4, i32 5
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i192
+%r97 = shl i192 %r96, 160
+%r98 = or i192 %r92, %r97
+%r99 = zext i192 %r98 to i224
+%r101 = getelementptr i32, i32* %r4, i32 6
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i224
+%r104 = shl i224 %r103, 192
+%r105 = or i224 %r99, %r104
+%r106 = zext i224 %r105 to i256
+%r108 = getelementptr i32, i32* %r4, i32 7
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i256
+%r111 = shl i256 %r110, 224
+%r112 = or i256 %r106, %r111
+%r113 = zext i256 %r112 to i288
+%r115 = getelementptr i32, i32* %r4, i32 8
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i288
+%r118 = shl i288 %r117, 256
+%r119 = or i288 %r113, %r118
+%r120 = zext i288 %r119 to i320
+%r121 = sub i320 %r62, %r120
+%r122 = trunc i320 %r121 to i288
+%r123 = trunc i288 %r122 to i32
+%r125 = getelementptr i32, i32* %r2, i32 0
+store i32 %r123, i32* %r125
+%r126 = lshr i288 %r122, 32
+%r127 = trunc i288 %r126 to i32
+%r129 = getelementptr i32, i32* %r2, i32 1
+store i32 %r127, i32* %r129
+%r130 = lshr i288 %r126, 32
+%r131 = trunc i288 %r130 to i32
+%r133 = getelementptr i32, i32* %r2, i32 2
+store i32 %r131, i32* %r133
+%r134 = lshr i288 %r130, 32
+%r135 = trunc i288 %r134 to i32
+%r137 = getelementptr i32, i32* %r2, i32 3
+store i32 %r135, i32* %r137
+%r138 = lshr i288 %r134, 32
+%r139 = trunc i288 %r138 to i32
+%r141 = getelementptr i32, i32* %r2, i32 4
+store i32 %r139, i32* %r141
+%r142 = lshr i288 %r138, 32
+%r143 = trunc i288 %r142 to i32
+%r145 = getelementptr i32, i32* %r2, i32 5
+store i32 %r143, i32* %r145
+%r146 = lshr i288 %r142, 32
+%r147 = trunc i288 %r146 to i32
+%r149 = getelementptr i32, i32* %r2, i32 6
+store i32 %r147, i32* %r149
+%r150 = lshr i288 %r146, 32
+%r151 = trunc i288 %r150 to i32
+%r153 = getelementptr i32, i32* %r2, i32 7
+store i32 %r151, i32* %r153
+%r154 = lshr i288 %r150, 32
+%r155 = trunc i288 %r154 to i32
+%r157 = getelementptr i32, i32* %r2, i32 8
+store i32 %r155, i32* %r157
+%r158 = lshr i320 %r121, 288
+%r159 = trunc i320 %r158 to i32
+%r161 = and i32 %r159, 1
+ret i32 %r161
+}
+define void @mcl_fp_shr1_9L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = zext i256 %r52 to i288
+%r55 = getelementptr i32, i32* %r2, i32 8
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i288
+%r58 = shl i288 %r57, 256
+%r59 = or i288 %r53, %r58
+%r60 = lshr i288 %r59, 1
+%r61 = trunc i288 %r60 to i32
+%r63 = getelementptr i32, i32* %r1, i32 0
+store i32 %r61, i32* %r63
+%r64 = lshr i288 %r60, 32
+%r65 = trunc i288 %r64 to i32
+%r67 = getelementptr i32, i32* %r1, i32 1
+store i32 %r65, i32* %r67
+%r68 = lshr i288 %r64, 32
+%r69 = trunc i288 %r68 to i32
+%r71 = getelementptr i32, i32* %r1, i32 2
+store i32 %r69, i32* %r71
+%r72 = lshr i288 %r68, 32
+%r73 = trunc i288 %r72 to i32
+%r75 = getelementptr i32, i32* %r1, i32 3
+store i32 %r73, i32* %r75
+%r76 = lshr i288 %r72, 32
+%r77 = trunc i288 %r76 to i32
+%r79 = getelementptr i32, i32* %r1, i32 4
+store i32 %r77, i32* %r79
+%r80 = lshr i288 %r76, 32
+%r81 = trunc i288 %r80 to i32
+%r83 = getelementptr i32, i32* %r1, i32 5
+store i32 %r81, i32* %r83
+%r84 = lshr i288 %r80, 32
+%r85 = trunc i288 %r84 to i32
+%r87 = getelementptr i32, i32* %r1, i32 6
+store i32 %r85, i32* %r87
+%r88 = lshr i288 %r84, 32
+%r89 = trunc i288 %r88 to i32
+%r91 = getelementptr i32, i32* %r1, i32 7
+store i32 %r89, i32* %r91
+%r92 = lshr i288 %r88, 32
+%r93 = trunc i288 %r92 to i32
+%r95 = getelementptr i32, i32* %r1, i32 8
+store i32 %r93, i32* %r95
+ret void
+}
+define void @mcl_fp_add9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = load i32, i32* %r3
+%r63 = zext i32 %r62 to i64
+%r65 = getelementptr i32, i32* %r3, i32 1
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i64
+%r68 = shl i64 %r67, 32
+%r69 = or i64 %r63, %r68
+%r70 = zext i64 %r69 to i96
+%r72 = getelementptr i32, i32* %r3, i32 2
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i96
+%r75 = shl i96 %r74, 64
+%r76 = or i96 %r70, %r75
+%r77 = zext i96 %r76 to i128
+%r79 = getelementptr i32, i32* %r3, i32 3
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i128
+%r82 = shl i128 %r81, 96
+%r83 = or i128 %r77, %r82
+%r84 = zext i128 %r83 to i160
+%r86 = getelementptr i32, i32* %r3, i32 4
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i160
+%r89 = shl i160 %r88, 128
+%r90 = or i160 %r84, %r89
+%r91 = zext i160 %r90 to i192
+%r93 = getelementptr i32, i32* %r3, i32 5
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i192
+%r96 = shl i192 %r95, 160
+%r97 = or i192 %r91, %r96
+%r98 = zext i192 %r97 to i224
+%r100 = getelementptr i32, i32* %r3, i32 6
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i224
+%r103 = shl i224 %r102, 192
+%r104 = or i224 %r98, %r103
+%r105 = zext i224 %r104 to i256
+%r107 = getelementptr i32, i32* %r3, i32 7
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i256
+%r110 = shl i256 %r109, 224
+%r111 = or i256 %r105, %r110
+%r112 = zext i256 %r111 to i288
+%r114 = getelementptr i32, i32* %r3, i32 8
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i288
+%r117 = shl i288 %r116, 256
+%r118 = or i288 %r112, %r117
+%r119 = zext i288 %r61 to i320
+%r120 = zext i288 %r118 to i320
+%r121 = add i320 %r119, %r120
+%r122 = trunc i320 %r121 to i288
+%r123 = trunc i288 %r122 to i32
+%r125 = getelementptr i32, i32* %r1, i32 0
+store i32 %r123, i32* %r125
+%r126 = lshr i288 %r122, 32
+%r127 = trunc i288 %r126 to i32
+%r129 = getelementptr i32, i32* %r1, i32 1
+store i32 %r127, i32* %r129
+%r130 = lshr i288 %r126, 32
+%r131 = trunc i288 %r130 to i32
+%r133 = getelementptr i32, i32* %r1, i32 2
+store i32 %r131, i32* %r133
+%r134 = lshr i288 %r130, 32
+%r135 = trunc i288 %r134 to i32
+%r137 = getelementptr i32, i32* %r1, i32 3
+store i32 %r135, i32* %r137
+%r138 = lshr i288 %r134, 32
+%r139 = trunc i288 %r138 to i32
+%r141 = getelementptr i32, i32* %r1, i32 4
+store i32 %r139, i32* %r141
+%r142 = lshr i288 %r138, 32
+%r143 = trunc i288 %r142 to i32
+%r145 = getelementptr i32, i32* %r1, i32 5
+store i32 %r143, i32* %r145
+%r146 = lshr i288 %r142, 32
+%r147 = trunc i288 %r146 to i32
+%r149 = getelementptr i32, i32* %r1, i32 6
+store i32 %r147, i32* %r149
+%r150 = lshr i288 %r146, 32
+%r151 = trunc i288 %r150 to i32
+%r153 = getelementptr i32, i32* %r1, i32 7
+store i32 %r151, i32* %r153
+%r154 = lshr i288 %r150, 32
+%r155 = trunc i288 %r154 to i32
+%r157 = getelementptr i32, i32* %r1, i32 8
+store i32 %r155, i32* %r157
+%r158 = load i32, i32* %r4
+%r159 = zext i32 %r158 to i64
+%r161 = getelementptr i32, i32* %r4, i32 1
+%r162 = load i32, i32* %r161
+%r163 = zext i32 %r162 to i64
+%r164 = shl i64 %r163, 32
+%r165 = or i64 %r159, %r164
+%r166 = zext i64 %r165 to i96
+%r168 = getelementptr i32, i32* %r4, i32 2
+%r169 = load i32, i32* %r168
+%r170 = zext i32 %r169 to i96
+%r171 = shl i96 %r170, 64
+%r172 = or i96 %r166, %r171
+%r173 = zext i96 %r172 to i128
+%r175 = getelementptr i32, i32* %r4, i32 3
+%r176 = load i32, i32* %r175
+%r177 = zext i32 %r176 to i128
+%r178 = shl i128 %r177, 96
+%r179 = or i128 %r173, %r178
+%r180 = zext i128 %r179 to i160
+%r182 = getelementptr i32, i32* %r4, i32 4
+%r183 = load i32, i32* %r182
+%r184 = zext i32 %r183 to i160
+%r185 = shl i160 %r184, 128
+%r186 = or i160 %r180, %r185
+%r187 = zext i160 %r186 to i192
+%r189 = getelementptr i32, i32* %r4, i32 5
+%r190 = load i32, i32* %r189
+%r191 = zext i32 %r190 to i192
+%r192 = shl i192 %r191, 160
+%r193 = or i192 %r187, %r192
+%r194 = zext i192 %r193 to i224
+%r196 = getelementptr i32, i32* %r4, i32 6
+%r197 = load i32, i32* %r196
+%r198 = zext i32 %r197 to i224
+%r199 = shl i224 %r198, 192
+%r200 = or i224 %r194, %r199
+%r201 = zext i224 %r200 to i256
+%r203 = getelementptr i32, i32* %r4, i32 7
+%r204 = load i32, i32* %r203
+%r205 = zext i32 %r204 to i256
+%r206 = shl i256 %r205, 224
+%r207 = or i256 %r201, %r206
+%r208 = zext i256 %r207 to i288
+%r210 = getelementptr i32, i32* %r4, i32 8
+%r211 = load i32, i32* %r210
+%r212 = zext i32 %r211 to i288
+%r213 = shl i288 %r212, 256
+%r214 = or i288 %r208, %r213
+%r215 = zext i288 %r214 to i320
+%r216 = sub i320 %r121, %r215
+%r217 = lshr i320 %r216, 288
+%r218 = trunc i320 %r217 to i1
+br i1%r218, label %carry, label %nocarry
+nocarry:
+%r219 = trunc i320 %r216 to i288
+%r220 = trunc i288 %r219 to i32
+%r222 = getelementptr i32, i32* %r1, i32 0
+store i32 %r220, i32* %r222
+%r223 = lshr i288 %r219, 32
+%r224 = trunc i288 %r223 to i32
+%r226 = getelementptr i32, i32* %r1, i32 1
+store i32 %r224, i32* %r226
+%r227 = lshr i288 %r223, 32
+%r228 = trunc i288 %r227 to i32
+%r230 = getelementptr i32, i32* %r1, i32 2
+store i32 %r228, i32* %r230
+%r231 = lshr i288 %r227, 32
+%r232 = trunc i288 %r231 to i32
+%r234 = getelementptr i32, i32* %r1, i32 3
+store i32 %r232, i32* %r234
+%r235 = lshr i288 %r231, 32
+%r236 = trunc i288 %r235 to i32
+%r238 = getelementptr i32, i32* %r1, i32 4
+store i32 %r236, i32* %r238
+%r239 = lshr i288 %r235, 32
+%r240 = trunc i288 %r239 to i32
+%r242 = getelementptr i32, i32* %r1, i32 5
+store i32 %r240, i32* %r242
+%r243 = lshr i288 %r239, 32
+%r244 = trunc i288 %r243 to i32
+%r246 = getelementptr i32, i32* %r1, i32 6
+store i32 %r244, i32* %r246
+%r247 = lshr i288 %r243, 32
+%r248 = trunc i288 %r247 to i32
+%r250 = getelementptr i32, i32* %r1, i32 7
+store i32 %r248, i32* %r250
+%r251 = lshr i288 %r247, 32
+%r252 = trunc i288 %r251 to i32
+%r254 = getelementptr i32, i32* %r1, i32 8
+store i32 %r252, i32* %r254
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = load i32, i32* %r3
+%r63 = zext i32 %r62 to i64
+%r65 = getelementptr i32, i32* %r3, i32 1
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i64
+%r68 = shl i64 %r67, 32
+%r69 = or i64 %r63, %r68
+%r70 = zext i64 %r69 to i96
+%r72 = getelementptr i32, i32* %r3, i32 2
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i96
+%r75 = shl i96 %r74, 64
+%r76 = or i96 %r70, %r75
+%r77 = zext i96 %r76 to i128
+%r79 = getelementptr i32, i32* %r3, i32 3
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i128
+%r82 = shl i128 %r81, 96
+%r83 = or i128 %r77, %r82
+%r84 = zext i128 %r83 to i160
+%r86 = getelementptr i32, i32* %r3, i32 4
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i160
+%r89 = shl i160 %r88, 128
+%r90 = or i160 %r84, %r89
+%r91 = zext i160 %r90 to i192
+%r93 = getelementptr i32, i32* %r3, i32 5
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i192
+%r96 = shl i192 %r95, 160
+%r97 = or i192 %r91, %r96
+%r98 = zext i192 %r97 to i224
+%r100 = getelementptr i32, i32* %r3, i32 6
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i224
+%r103 = shl i224 %r102, 192
+%r104 = or i224 %r98, %r103
+%r105 = zext i224 %r104 to i256
+%r107 = getelementptr i32, i32* %r3, i32 7
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i256
+%r110 = shl i256 %r109, 224
+%r111 = or i256 %r105, %r110
+%r112 = zext i256 %r111 to i288
+%r114 = getelementptr i32, i32* %r3, i32 8
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i288
+%r117 = shl i288 %r116, 256
+%r118 = or i288 %r112, %r117
+%r119 = add i288 %r61, %r118
+%r120 = load i32, i32* %r4
+%r121 = zext i32 %r120 to i64
+%r123 = getelementptr i32, i32* %r4, i32 1
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i64
+%r126 = shl i64 %r125, 32
+%r127 = or i64 %r121, %r126
+%r128 = zext i64 %r127 to i96
+%r130 = getelementptr i32, i32* %r4, i32 2
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i96
+%r133 = shl i96 %r132, 64
+%r134 = or i96 %r128, %r133
+%r135 = zext i96 %r134 to i128
+%r137 = getelementptr i32, i32* %r4, i32 3
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i128
+%r140 = shl i128 %r139, 96
+%r141 = or i128 %r135, %r140
+%r142 = zext i128 %r141 to i160
+%r144 = getelementptr i32, i32* %r4, i32 4
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i160
+%r147 = shl i160 %r146, 128
+%r148 = or i160 %r142, %r147
+%r149 = zext i160 %r148 to i192
+%r151 = getelementptr i32, i32* %r4, i32 5
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i192
+%r154 = shl i192 %r153, 160
+%r155 = or i192 %r149, %r154
+%r156 = zext i192 %r155 to i224
+%r158 = getelementptr i32, i32* %r4, i32 6
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i224
+%r161 = shl i224 %r160, 192
+%r162 = or i224 %r156, %r161
+%r163 = zext i224 %r162 to i256
+%r165 = getelementptr i32, i32* %r4, i32 7
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i256
+%r168 = shl i256 %r167, 224
+%r169 = or i256 %r163, %r168
+%r170 = zext i256 %r169 to i288
+%r172 = getelementptr i32, i32* %r4, i32 8
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i288
+%r175 = shl i288 %r174, 256
+%r176 = or i288 %r170, %r175
+%r177 = sub i288 %r119, %r176
+%r178 = lshr i288 %r177, 287
+%r179 = trunc i288 %r178 to i1
+%r180 = select i1 %r179, i288 %r119, i288 %r177
+%r181 = trunc i288 %r180 to i32
+%r183 = getelementptr i32, i32* %r1, i32 0
+store i32 %r181, i32* %r183
+%r184 = lshr i288 %r180, 32
+%r185 = trunc i288 %r184 to i32
+%r187 = getelementptr i32, i32* %r1, i32 1
+store i32 %r185, i32* %r187
+%r188 = lshr i288 %r184, 32
+%r189 = trunc i288 %r188 to i32
+%r191 = getelementptr i32, i32* %r1, i32 2
+store i32 %r189, i32* %r191
+%r192 = lshr i288 %r188, 32
+%r193 = trunc i288 %r192 to i32
+%r195 = getelementptr i32, i32* %r1, i32 3
+store i32 %r193, i32* %r195
+%r196 = lshr i288 %r192, 32
+%r197 = trunc i288 %r196 to i32
+%r199 = getelementptr i32, i32* %r1, i32 4
+store i32 %r197, i32* %r199
+%r200 = lshr i288 %r196, 32
+%r201 = trunc i288 %r200 to i32
+%r203 = getelementptr i32, i32* %r1, i32 5
+store i32 %r201, i32* %r203
+%r204 = lshr i288 %r200, 32
+%r205 = trunc i288 %r204 to i32
+%r207 = getelementptr i32, i32* %r1, i32 6
+store i32 %r205, i32* %r207
+%r208 = lshr i288 %r204, 32
+%r209 = trunc i288 %r208 to i32
+%r211 = getelementptr i32, i32* %r1, i32 7
+store i32 %r209, i32* %r211
+%r212 = lshr i288 %r208, 32
+%r213 = trunc i288 %r212 to i32
+%r215 = getelementptr i32, i32* %r1, i32 8
+store i32 %r213, i32* %r215
+ret void
+}
+define void @mcl_fp_sub9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = load i32, i32* %r3
+%r63 = zext i32 %r62 to i64
+%r65 = getelementptr i32, i32* %r3, i32 1
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i64
+%r68 = shl i64 %r67, 32
+%r69 = or i64 %r63, %r68
+%r70 = zext i64 %r69 to i96
+%r72 = getelementptr i32, i32* %r3, i32 2
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i96
+%r75 = shl i96 %r74, 64
+%r76 = or i96 %r70, %r75
+%r77 = zext i96 %r76 to i128
+%r79 = getelementptr i32, i32* %r3, i32 3
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i128
+%r82 = shl i128 %r81, 96
+%r83 = or i128 %r77, %r82
+%r84 = zext i128 %r83 to i160
+%r86 = getelementptr i32, i32* %r3, i32 4
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i160
+%r89 = shl i160 %r88, 128
+%r90 = or i160 %r84, %r89
+%r91 = zext i160 %r90 to i192
+%r93 = getelementptr i32, i32* %r3, i32 5
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i192
+%r96 = shl i192 %r95, 160
+%r97 = or i192 %r91, %r96
+%r98 = zext i192 %r97 to i224
+%r100 = getelementptr i32, i32* %r3, i32 6
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i224
+%r103 = shl i224 %r102, 192
+%r104 = or i224 %r98, %r103
+%r105 = zext i224 %r104 to i256
+%r107 = getelementptr i32, i32* %r3, i32 7
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i256
+%r110 = shl i256 %r109, 224
+%r111 = or i256 %r105, %r110
+%r112 = zext i256 %r111 to i288
+%r114 = getelementptr i32, i32* %r3, i32 8
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i288
+%r117 = shl i288 %r116, 256
+%r118 = or i288 %r112, %r117
+%r119 = zext i288 %r61 to i320
+%r120 = zext i288 %r118 to i320
+%r121 = sub i320 %r119, %r120
+%r122 = trunc i320 %r121 to i288
+%r123 = lshr i320 %r121, 288
+%r124 = trunc i320 %r123 to i1
+%r125 = trunc i288 %r122 to i32
+%r127 = getelementptr i32, i32* %r1, i32 0
+store i32 %r125, i32* %r127
+%r128 = lshr i288 %r122, 32
+%r129 = trunc i288 %r128 to i32
+%r131 = getelementptr i32, i32* %r1, i32 1
+store i32 %r129, i32* %r131
+%r132 = lshr i288 %r128, 32
+%r133 = trunc i288 %r132 to i32
+%r135 = getelementptr i32, i32* %r1, i32 2
+store i32 %r133, i32* %r135
+%r136 = lshr i288 %r132, 32
+%r137 = trunc i288 %r136 to i32
+%r139 = getelementptr i32, i32* %r1, i32 3
+store i32 %r137, i32* %r139
+%r140 = lshr i288 %r136, 32
+%r141 = trunc i288 %r140 to i32
+%r143 = getelementptr i32, i32* %r1, i32 4
+store i32 %r141, i32* %r143
+%r144 = lshr i288 %r140, 32
+%r145 = trunc i288 %r144 to i32
+%r147 = getelementptr i32, i32* %r1, i32 5
+store i32 %r145, i32* %r147
+%r148 = lshr i288 %r144, 32
+%r149 = trunc i288 %r148 to i32
+%r151 = getelementptr i32, i32* %r1, i32 6
+store i32 %r149, i32* %r151
+%r152 = lshr i288 %r148, 32
+%r153 = trunc i288 %r152 to i32
+%r155 = getelementptr i32, i32* %r1, i32 7
+store i32 %r153, i32* %r155
+%r156 = lshr i288 %r152, 32
+%r157 = trunc i288 %r156 to i32
+%r159 = getelementptr i32, i32* %r1, i32 8
+store i32 %r157, i32* %r159
+br i1%r124, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r160 = load i32, i32* %r4
+%r161 = zext i32 %r160 to i64
+%r163 = getelementptr i32, i32* %r4, i32 1
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i64
+%r166 = shl i64 %r165, 32
+%r167 = or i64 %r161, %r166
+%r168 = zext i64 %r167 to i96
+%r170 = getelementptr i32, i32* %r4, i32 2
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i96
+%r173 = shl i96 %r172, 64
+%r174 = or i96 %r168, %r173
+%r175 = zext i96 %r174 to i128
+%r177 = getelementptr i32, i32* %r4, i32 3
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i128
+%r180 = shl i128 %r179, 96
+%r181 = or i128 %r175, %r180
+%r182 = zext i128 %r181 to i160
+%r184 = getelementptr i32, i32* %r4, i32 4
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i160
+%r187 = shl i160 %r186, 128
+%r188 = or i160 %r182, %r187
+%r189 = zext i160 %r188 to i192
+%r191 = getelementptr i32, i32* %r4, i32 5
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i192
+%r194 = shl i192 %r193, 160
+%r195 = or i192 %r189, %r194
+%r196 = zext i192 %r195 to i224
+%r198 = getelementptr i32, i32* %r4, i32 6
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i224
+%r201 = shl i224 %r200, 192
+%r202 = or i224 %r196, %r201
+%r203 = zext i224 %r202 to i256
+%r205 = getelementptr i32, i32* %r4, i32 7
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i256
+%r208 = shl i256 %r207, 224
+%r209 = or i256 %r203, %r208
+%r210 = zext i256 %r209 to i288
+%r212 = getelementptr i32, i32* %r4, i32 8
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i288
+%r215 = shl i288 %r214, 256
+%r216 = or i288 %r210, %r215
+%r217 = add i288 %r122, %r216
+%r218 = trunc i288 %r217 to i32
+%r220 = getelementptr i32, i32* %r1, i32 0
+store i32 %r218, i32* %r220
+%r221 = lshr i288 %r217, 32
+%r222 = trunc i288 %r221 to i32
+%r224 = getelementptr i32, i32* %r1, i32 1
+store i32 %r222, i32* %r224
+%r225 = lshr i288 %r221, 32
+%r226 = trunc i288 %r225 to i32
+%r228 = getelementptr i32, i32* %r1, i32 2
+store i32 %r226, i32* %r228
+%r229 = lshr i288 %r225, 32
+%r230 = trunc i288 %r229 to i32
+%r232 = getelementptr i32, i32* %r1, i32 3
+store i32 %r230, i32* %r232
+%r233 = lshr i288 %r229, 32
+%r234 = trunc i288 %r233 to i32
+%r236 = getelementptr i32, i32* %r1, i32 4
+store i32 %r234, i32* %r236
+%r237 = lshr i288 %r233, 32
+%r238 = trunc i288 %r237 to i32
+%r240 = getelementptr i32, i32* %r1, i32 5
+store i32 %r238, i32* %r240
+%r241 = lshr i288 %r237, 32
+%r242 = trunc i288 %r241 to i32
+%r244 = getelementptr i32, i32* %r1, i32 6
+store i32 %r242, i32* %r244
+%r245 = lshr i288 %r241, 32
+%r246 = trunc i288 %r245 to i32
+%r248 = getelementptr i32, i32* %r1, i32 7
+store i32 %r246, i32* %r248
+%r249 = lshr i288 %r245, 32
+%r250 = trunc i288 %r249 to i32
+%r252 = getelementptr i32, i32* %r1, i32 8
+store i32 %r250, i32* %r252
+ret void
+}
+define void @mcl_fp_subNF9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = load i32, i32* %r3
+%r63 = zext i32 %r62 to i64
+%r65 = getelementptr i32, i32* %r3, i32 1
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i64
+%r68 = shl i64 %r67, 32
+%r69 = or i64 %r63, %r68
+%r70 = zext i64 %r69 to i96
+%r72 = getelementptr i32, i32* %r3, i32 2
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i96
+%r75 = shl i96 %r74, 64
+%r76 = or i96 %r70, %r75
+%r77 = zext i96 %r76 to i128
+%r79 = getelementptr i32, i32* %r3, i32 3
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i128
+%r82 = shl i128 %r81, 96
+%r83 = or i128 %r77, %r82
+%r84 = zext i128 %r83 to i160
+%r86 = getelementptr i32, i32* %r3, i32 4
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i160
+%r89 = shl i160 %r88, 128
+%r90 = or i160 %r84, %r89
+%r91 = zext i160 %r90 to i192
+%r93 = getelementptr i32, i32* %r3, i32 5
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i192
+%r96 = shl i192 %r95, 160
+%r97 = or i192 %r91, %r96
+%r98 = zext i192 %r97 to i224
+%r100 = getelementptr i32, i32* %r3, i32 6
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i224
+%r103 = shl i224 %r102, 192
+%r104 = or i224 %r98, %r103
+%r105 = zext i224 %r104 to i256
+%r107 = getelementptr i32, i32* %r3, i32 7
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i256
+%r110 = shl i256 %r109, 224
+%r111 = or i256 %r105, %r110
+%r112 = zext i256 %r111 to i288
+%r114 = getelementptr i32, i32* %r3, i32 8
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i288
+%r117 = shl i288 %r116, 256
+%r118 = or i288 %r112, %r117
+%r119 = sub i288 %r61, %r118
+%r120 = lshr i288 %r119, 287
+%r121 = trunc i288 %r120 to i1
+%r122 = load i32, i32* %r4
+%r123 = zext i32 %r122 to i64
+%r125 = getelementptr i32, i32* %r4, i32 1
+%r126 = load i32, i32* %r125
+%r127 = zext i32 %r126 to i64
+%r128 = shl i64 %r127, 32
+%r129 = or i64 %r123, %r128
+%r130 = zext i64 %r129 to i96
+%r132 = getelementptr i32, i32* %r4, i32 2
+%r133 = load i32, i32* %r132
+%r134 = zext i32 %r133 to i96
+%r135 = shl i96 %r134, 64
+%r136 = or i96 %r130, %r135
+%r137 = zext i96 %r136 to i128
+%r139 = getelementptr i32, i32* %r4, i32 3
+%r140 = load i32, i32* %r139
+%r141 = zext i32 %r140 to i128
+%r142 = shl i128 %r141, 96
+%r143 = or i128 %r137, %r142
+%r144 = zext i128 %r143 to i160
+%r146 = getelementptr i32, i32* %r4, i32 4
+%r147 = load i32, i32* %r146
+%r148 = zext i32 %r147 to i160
+%r149 = shl i160 %r148, 128
+%r150 = or i160 %r144, %r149
+%r151 = zext i160 %r150 to i192
+%r153 = getelementptr i32, i32* %r4, i32 5
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i192
+%r156 = shl i192 %r155, 160
+%r157 = or i192 %r151, %r156
+%r158 = zext i192 %r157 to i224
+%r160 = getelementptr i32, i32* %r4, i32 6
+%r161 = load i32, i32* %r160
+%r162 = zext i32 %r161 to i224
+%r163 = shl i224 %r162, 192
+%r164 = or i224 %r158, %r163
+%r165 = zext i224 %r164 to i256
+%r167 = getelementptr i32, i32* %r4, i32 7
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i256
+%r170 = shl i256 %r169, 224
+%r171 = or i256 %r165, %r170
+%r172 = zext i256 %r171 to i288
+%r174 = getelementptr i32, i32* %r4, i32 8
+%r175 = load i32, i32* %r174
+%r176 = zext i32 %r175 to i288
+%r177 = shl i288 %r176, 256
+%r178 = or i288 %r172, %r177
+%r180 = select i1 %r121, i288 %r178, i288 0
+%r181 = add i288 %r119, %r180
+%r182 = trunc i288 %r181 to i32
+%r184 = getelementptr i32, i32* %r1, i32 0
+store i32 %r182, i32* %r184
+%r185 = lshr i288 %r181, 32
+%r186 = trunc i288 %r185 to i32
+%r188 = getelementptr i32, i32* %r1, i32 1
+store i32 %r186, i32* %r188
+%r189 = lshr i288 %r185, 32
+%r190 = trunc i288 %r189 to i32
+%r192 = getelementptr i32, i32* %r1, i32 2
+store i32 %r190, i32* %r192
+%r193 = lshr i288 %r189, 32
+%r194 = trunc i288 %r193 to i32
+%r196 = getelementptr i32, i32* %r1, i32 3
+store i32 %r194, i32* %r196
+%r197 = lshr i288 %r193, 32
+%r198 = trunc i288 %r197 to i32
+%r200 = getelementptr i32, i32* %r1, i32 4
+store i32 %r198, i32* %r200
+%r201 = lshr i288 %r197, 32
+%r202 = trunc i288 %r201 to i32
+%r204 = getelementptr i32, i32* %r1, i32 5
+store i32 %r202, i32* %r204
+%r205 = lshr i288 %r201, 32
+%r206 = trunc i288 %r205 to i32
+%r208 = getelementptr i32, i32* %r1, i32 6
+store i32 %r206, i32* %r208
+%r209 = lshr i288 %r205, 32
+%r210 = trunc i288 %r209 to i32
+%r212 = getelementptr i32, i32* %r1, i32 7
+store i32 %r210, i32* %r212
+%r213 = lshr i288 %r209, 32
+%r214 = trunc i288 %r213 to i32
+%r216 = getelementptr i32, i32* %r1, i32 8
+store i32 %r214, i32* %r216
+ret void
+}
+define void @mcl_fpDbl_add9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = load i32, i32* %r3
+%r126 = zext i32 %r125 to i64
+%r128 = getelementptr i32, i32* %r3, i32 1
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i64
+%r131 = shl i64 %r130, 32
+%r132 = or i64 %r126, %r131
+%r133 = zext i64 %r132 to i96
+%r135 = getelementptr i32, i32* %r3, i32 2
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i96
+%r138 = shl i96 %r137, 64
+%r139 = or i96 %r133, %r138
+%r140 = zext i96 %r139 to i128
+%r142 = getelementptr i32, i32* %r3, i32 3
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i128
+%r145 = shl i128 %r144, 96
+%r146 = or i128 %r140, %r145
+%r147 = zext i128 %r146 to i160
+%r149 = getelementptr i32, i32* %r3, i32 4
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i160
+%r152 = shl i160 %r151, 128
+%r153 = or i160 %r147, %r152
+%r154 = zext i160 %r153 to i192
+%r156 = getelementptr i32, i32* %r3, i32 5
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i192
+%r159 = shl i192 %r158, 160
+%r160 = or i192 %r154, %r159
+%r161 = zext i192 %r160 to i224
+%r163 = getelementptr i32, i32* %r3, i32 6
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i224
+%r166 = shl i224 %r165, 192
+%r167 = or i224 %r161, %r166
+%r168 = zext i224 %r167 to i256
+%r170 = getelementptr i32, i32* %r3, i32 7
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i256
+%r173 = shl i256 %r172, 224
+%r174 = or i256 %r168, %r173
+%r175 = zext i256 %r174 to i288
+%r177 = getelementptr i32, i32* %r3, i32 8
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i288
+%r180 = shl i288 %r179, 256
+%r181 = or i288 %r175, %r180
+%r182 = zext i288 %r181 to i320
+%r184 = getelementptr i32, i32* %r3, i32 9
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i320
+%r187 = shl i320 %r186, 288
+%r188 = or i320 %r182, %r187
+%r189 = zext i320 %r188 to i352
+%r191 = getelementptr i32, i32* %r3, i32 10
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i352
+%r194 = shl i352 %r193, 320
+%r195 = or i352 %r189, %r194
+%r196 = zext i352 %r195 to i384
+%r198 = getelementptr i32, i32* %r3, i32 11
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i384
+%r201 = shl i384 %r200, 352
+%r202 = or i384 %r196, %r201
+%r203 = zext i384 %r202 to i416
+%r205 = getelementptr i32, i32* %r3, i32 12
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i416
+%r208 = shl i416 %r207, 384
+%r209 = or i416 %r203, %r208
+%r210 = zext i416 %r209 to i448
+%r212 = getelementptr i32, i32* %r3, i32 13
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i448
+%r215 = shl i448 %r214, 416
+%r216 = or i448 %r210, %r215
+%r217 = zext i448 %r216 to i480
+%r219 = getelementptr i32, i32* %r3, i32 14
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i480
+%r222 = shl i480 %r221, 448
+%r223 = or i480 %r217, %r222
+%r224 = zext i480 %r223 to i512
+%r226 = getelementptr i32, i32* %r3, i32 15
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i512
+%r229 = shl i512 %r228, 480
+%r230 = or i512 %r224, %r229
+%r231 = zext i512 %r230 to i544
+%r233 = getelementptr i32, i32* %r3, i32 16
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i544
+%r236 = shl i544 %r235, 512
+%r237 = or i544 %r231, %r236
+%r238 = zext i544 %r237 to i576
+%r240 = getelementptr i32, i32* %r3, i32 17
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i576
+%r243 = shl i576 %r242, 544
+%r244 = or i576 %r238, %r243
+%r245 = zext i576 %r124 to i608
+%r246 = zext i576 %r244 to i608
+%r247 = add i608 %r245, %r246
+%r248 = trunc i608 %r247 to i288
+%r249 = trunc i288 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 0
+store i32 %r249, i32* %r251
+%r252 = lshr i288 %r248, 32
+%r253 = trunc i288 %r252 to i32
+%r255 = getelementptr i32, i32* %r1, i32 1
+store i32 %r253, i32* %r255
+%r256 = lshr i288 %r252, 32
+%r257 = trunc i288 %r256 to i32
+%r259 = getelementptr i32, i32* %r1, i32 2
+store i32 %r257, i32* %r259
+%r260 = lshr i288 %r256, 32
+%r261 = trunc i288 %r260 to i32
+%r263 = getelementptr i32, i32* %r1, i32 3
+store i32 %r261, i32* %r263
+%r264 = lshr i288 %r260, 32
+%r265 = trunc i288 %r264 to i32
+%r267 = getelementptr i32, i32* %r1, i32 4
+store i32 %r265, i32* %r267
+%r268 = lshr i288 %r264, 32
+%r269 = trunc i288 %r268 to i32
+%r271 = getelementptr i32, i32* %r1, i32 5
+store i32 %r269, i32* %r271
+%r272 = lshr i288 %r268, 32
+%r273 = trunc i288 %r272 to i32
+%r275 = getelementptr i32, i32* %r1, i32 6
+store i32 %r273, i32* %r275
+%r276 = lshr i288 %r272, 32
+%r277 = trunc i288 %r276 to i32
+%r279 = getelementptr i32, i32* %r1, i32 7
+store i32 %r277, i32* %r279
+%r280 = lshr i288 %r276, 32
+%r281 = trunc i288 %r280 to i32
+%r283 = getelementptr i32, i32* %r1, i32 8
+store i32 %r281, i32* %r283
+%r284 = lshr i608 %r247, 288
+%r285 = trunc i608 %r284 to i320
+%r286 = load i32, i32* %r4
+%r287 = zext i32 %r286 to i64
+%r289 = getelementptr i32, i32* %r4, i32 1
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i64
+%r292 = shl i64 %r291, 32
+%r293 = or i64 %r287, %r292
+%r294 = zext i64 %r293 to i96
+%r296 = getelementptr i32, i32* %r4, i32 2
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i96
+%r299 = shl i96 %r298, 64
+%r300 = or i96 %r294, %r299
+%r301 = zext i96 %r300 to i128
+%r303 = getelementptr i32, i32* %r4, i32 3
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i128
+%r306 = shl i128 %r305, 96
+%r307 = or i128 %r301, %r306
+%r308 = zext i128 %r307 to i160
+%r310 = getelementptr i32, i32* %r4, i32 4
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i160
+%r313 = shl i160 %r312, 128
+%r314 = or i160 %r308, %r313
+%r315 = zext i160 %r314 to i192
+%r317 = getelementptr i32, i32* %r4, i32 5
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i192
+%r320 = shl i192 %r319, 160
+%r321 = or i192 %r315, %r320
+%r322 = zext i192 %r321 to i224
+%r324 = getelementptr i32, i32* %r4, i32 6
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i224
+%r327 = shl i224 %r326, 192
+%r328 = or i224 %r322, %r327
+%r329 = zext i224 %r328 to i256
+%r331 = getelementptr i32, i32* %r4, i32 7
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i256
+%r334 = shl i256 %r333, 224
+%r335 = or i256 %r329, %r334
+%r336 = zext i256 %r335 to i288
+%r338 = getelementptr i32, i32* %r4, i32 8
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i288
+%r341 = shl i288 %r340, 256
+%r342 = or i288 %r336, %r341
+%r343 = zext i288 %r342 to i320
+%r344 = sub i320 %r285, %r343
+%r345 = lshr i320 %r344, 288
+%r346 = trunc i320 %r345 to i1
+%r347 = select i1 %r346, i320 %r285, i320 %r344
+%r348 = trunc i320 %r347 to i288
+%r350 = getelementptr i32, i32* %r1, i32 9
+%r351 = trunc i288 %r348 to i32
+%r353 = getelementptr i32, i32* %r350, i32 0
+store i32 %r351, i32* %r353
+%r354 = lshr i288 %r348, 32
+%r355 = trunc i288 %r354 to i32
+%r357 = getelementptr i32, i32* %r350, i32 1
+store i32 %r355, i32* %r357
+%r358 = lshr i288 %r354, 32
+%r359 = trunc i288 %r358 to i32
+%r361 = getelementptr i32, i32* %r350, i32 2
+store i32 %r359, i32* %r361
+%r362 = lshr i288 %r358, 32
+%r363 = trunc i288 %r362 to i32
+%r365 = getelementptr i32, i32* %r350, i32 3
+store i32 %r363, i32* %r365
+%r366 = lshr i288 %r362, 32
+%r367 = trunc i288 %r366 to i32
+%r369 = getelementptr i32, i32* %r350, i32 4
+store i32 %r367, i32* %r369
+%r370 = lshr i288 %r366, 32
+%r371 = trunc i288 %r370 to i32
+%r373 = getelementptr i32, i32* %r350, i32 5
+store i32 %r371, i32* %r373
+%r374 = lshr i288 %r370, 32
+%r375 = trunc i288 %r374 to i32
+%r377 = getelementptr i32, i32* %r350, i32 6
+store i32 %r375, i32* %r377
+%r378 = lshr i288 %r374, 32
+%r379 = trunc i288 %r378 to i32
+%r381 = getelementptr i32, i32* %r350, i32 7
+store i32 %r379, i32* %r381
+%r382 = lshr i288 %r378, 32
+%r383 = trunc i288 %r382 to i32
+%r385 = getelementptr i32, i32* %r350, i32 8
+store i32 %r383, i32* %r385
+ret void
+}
+define void @mcl_fpDbl_sub9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = load i32, i32* %r3
+%r126 = zext i32 %r125 to i64
+%r128 = getelementptr i32, i32* %r3, i32 1
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i64
+%r131 = shl i64 %r130, 32
+%r132 = or i64 %r126, %r131
+%r133 = zext i64 %r132 to i96
+%r135 = getelementptr i32, i32* %r3, i32 2
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i96
+%r138 = shl i96 %r137, 64
+%r139 = or i96 %r133, %r138
+%r140 = zext i96 %r139 to i128
+%r142 = getelementptr i32, i32* %r3, i32 3
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i128
+%r145 = shl i128 %r144, 96
+%r146 = or i128 %r140, %r145
+%r147 = zext i128 %r146 to i160
+%r149 = getelementptr i32, i32* %r3, i32 4
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i160
+%r152 = shl i160 %r151, 128
+%r153 = or i160 %r147, %r152
+%r154 = zext i160 %r153 to i192
+%r156 = getelementptr i32, i32* %r3, i32 5
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i192
+%r159 = shl i192 %r158, 160
+%r160 = or i192 %r154, %r159
+%r161 = zext i192 %r160 to i224
+%r163 = getelementptr i32, i32* %r3, i32 6
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i224
+%r166 = shl i224 %r165, 192
+%r167 = or i224 %r161, %r166
+%r168 = zext i224 %r167 to i256
+%r170 = getelementptr i32, i32* %r3, i32 7
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i256
+%r173 = shl i256 %r172, 224
+%r174 = or i256 %r168, %r173
+%r175 = zext i256 %r174 to i288
+%r177 = getelementptr i32, i32* %r3, i32 8
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i288
+%r180 = shl i288 %r179, 256
+%r181 = or i288 %r175, %r180
+%r182 = zext i288 %r181 to i320
+%r184 = getelementptr i32, i32* %r3, i32 9
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i320
+%r187 = shl i320 %r186, 288
+%r188 = or i320 %r182, %r187
+%r189 = zext i320 %r188 to i352
+%r191 = getelementptr i32, i32* %r3, i32 10
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i352
+%r194 = shl i352 %r193, 320
+%r195 = or i352 %r189, %r194
+%r196 = zext i352 %r195 to i384
+%r198 = getelementptr i32, i32* %r3, i32 11
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i384
+%r201 = shl i384 %r200, 352
+%r202 = or i384 %r196, %r201
+%r203 = zext i384 %r202 to i416
+%r205 = getelementptr i32, i32* %r3, i32 12
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i416
+%r208 = shl i416 %r207, 384
+%r209 = or i416 %r203, %r208
+%r210 = zext i416 %r209 to i448
+%r212 = getelementptr i32, i32* %r3, i32 13
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i448
+%r215 = shl i448 %r214, 416
+%r216 = or i448 %r210, %r215
+%r217 = zext i448 %r216 to i480
+%r219 = getelementptr i32, i32* %r3, i32 14
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i480
+%r222 = shl i480 %r221, 448
+%r223 = or i480 %r217, %r222
+%r224 = zext i480 %r223 to i512
+%r226 = getelementptr i32, i32* %r3, i32 15
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i512
+%r229 = shl i512 %r228, 480
+%r230 = or i512 %r224, %r229
+%r231 = zext i512 %r230 to i544
+%r233 = getelementptr i32, i32* %r3, i32 16
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i544
+%r236 = shl i544 %r235, 512
+%r237 = or i544 %r231, %r236
+%r238 = zext i544 %r237 to i576
+%r240 = getelementptr i32, i32* %r3, i32 17
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i576
+%r243 = shl i576 %r242, 544
+%r244 = or i576 %r238, %r243
+%r245 = zext i576 %r124 to i608
+%r246 = zext i576 %r244 to i608
+%r247 = sub i608 %r245, %r246
+%r248 = trunc i608 %r247 to i288
+%r249 = trunc i288 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 0
+store i32 %r249, i32* %r251
+%r252 = lshr i288 %r248, 32
+%r253 = trunc i288 %r252 to i32
+%r255 = getelementptr i32, i32* %r1, i32 1
+store i32 %r253, i32* %r255
+%r256 = lshr i288 %r252, 32
+%r257 = trunc i288 %r256 to i32
+%r259 = getelementptr i32, i32* %r1, i32 2
+store i32 %r257, i32* %r259
+%r260 = lshr i288 %r256, 32
+%r261 = trunc i288 %r260 to i32
+%r263 = getelementptr i32, i32* %r1, i32 3
+store i32 %r261, i32* %r263
+%r264 = lshr i288 %r260, 32
+%r265 = trunc i288 %r264 to i32
+%r267 = getelementptr i32, i32* %r1, i32 4
+store i32 %r265, i32* %r267
+%r268 = lshr i288 %r264, 32
+%r269 = trunc i288 %r268 to i32
+%r271 = getelementptr i32, i32* %r1, i32 5
+store i32 %r269, i32* %r271
+%r272 = lshr i288 %r268, 32
+%r273 = trunc i288 %r272 to i32
+%r275 = getelementptr i32, i32* %r1, i32 6
+store i32 %r273, i32* %r275
+%r276 = lshr i288 %r272, 32
+%r277 = trunc i288 %r276 to i32
+%r279 = getelementptr i32, i32* %r1, i32 7
+store i32 %r277, i32* %r279
+%r280 = lshr i288 %r276, 32
+%r281 = trunc i288 %r280 to i32
+%r283 = getelementptr i32, i32* %r1, i32 8
+store i32 %r281, i32* %r283
+%r284 = lshr i608 %r247, 288
+%r285 = trunc i608 %r284 to i288
+%r286 = lshr i608 %r247, 576
+%r287 = trunc i608 %r286 to i1
+%r288 = load i32, i32* %r4
+%r289 = zext i32 %r288 to i64
+%r291 = getelementptr i32, i32* %r4, i32 1
+%r292 = load i32, i32* %r291
+%r293 = zext i32 %r292 to i64
+%r294 = shl i64 %r293, 32
+%r295 = or i64 %r289, %r294
+%r296 = zext i64 %r295 to i96
+%r298 = getelementptr i32, i32* %r4, i32 2
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i96
+%r301 = shl i96 %r300, 64
+%r302 = or i96 %r296, %r301
+%r303 = zext i96 %r302 to i128
+%r305 = getelementptr i32, i32* %r4, i32 3
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i128
+%r308 = shl i128 %r307, 96
+%r309 = or i128 %r303, %r308
+%r310 = zext i128 %r309 to i160
+%r312 = getelementptr i32, i32* %r4, i32 4
+%r313 = load i32, i32* %r312
+%r314 = zext i32 %r313 to i160
+%r315 = shl i160 %r314, 128
+%r316 = or i160 %r310, %r315
+%r317 = zext i160 %r316 to i192
+%r319 = getelementptr i32, i32* %r4, i32 5
+%r320 = load i32, i32* %r319
+%r321 = zext i32 %r320 to i192
+%r322 = shl i192 %r321, 160
+%r323 = or i192 %r317, %r322
+%r324 = zext i192 %r323 to i224
+%r326 = getelementptr i32, i32* %r4, i32 6
+%r327 = load i32, i32* %r326
+%r328 = zext i32 %r327 to i224
+%r329 = shl i224 %r328, 192
+%r330 = or i224 %r324, %r329
+%r331 = zext i224 %r330 to i256
+%r333 = getelementptr i32, i32* %r4, i32 7
+%r334 = load i32, i32* %r333
+%r335 = zext i32 %r334 to i256
+%r336 = shl i256 %r335, 224
+%r337 = or i256 %r331, %r336
+%r338 = zext i256 %r337 to i288
+%r340 = getelementptr i32, i32* %r4, i32 8
+%r341 = load i32, i32* %r340
+%r342 = zext i32 %r341 to i288
+%r343 = shl i288 %r342, 256
+%r344 = or i288 %r338, %r343
+%r346 = select i1 %r287, i288 %r344, i288 0
+%r347 = add i288 %r285, %r346
+%r349 = getelementptr i32, i32* %r1, i32 9
+%r350 = trunc i288 %r347 to i32
+%r352 = getelementptr i32, i32* %r349, i32 0
+store i32 %r350, i32* %r352
+%r353 = lshr i288 %r347, 32
+%r354 = trunc i288 %r353 to i32
+%r356 = getelementptr i32, i32* %r349, i32 1
+store i32 %r354, i32* %r356
+%r357 = lshr i288 %r353, 32
+%r358 = trunc i288 %r357 to i32
+%r360 = getelementptr i32, i32* %r349, i32 2
+store i32 %r358, i32* %r360
+%r361 = lshr i288 %r357, 32
+%r362 = trunc i288 %r361 to i32
+%r364 = getelementptr i32, i32* %r349, i32 3
+store i32 %r362, i32* %r364
+%r365 = lshr i288 %r361, 32
+%r366 = trunc i288 %r365 to i32
+%r368 = getelementptr i32, i32* %r349, i32 4
+store i32 %r366, i32* %r368
+%r369 = lshr i288 %r365, 32
+%r370 = trunc i288 %r369 to i32
+%r372 = getelementptr i32, i32* %r349, i32 5
+store i32 %r370, i32* %r372
+%r373 = lshr i288 %r369, 32
+%r374 = trunc i288 %r373 to i32
+%r376 = getelementptr i32, i32* %r349, i32 6
+store i32 %r374, i32* %r376
+%r377 = lshr i288 %r373, 32
+%r378 = trunc i288 %r377 to i32
+%r380 = getelementptr i32, i32* %r349, i32 7
+store i32 %r378, i32* %r380
+%r381 = lshr i288 %r377, 32
+%r382 = trunc i288 %r381 to i32
+%r384 = getelementptr i32, i32* %r349, i32 8
+store i32 %r382, i32* %r384
+ret void
+}
+define i352 @mulPv320x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
+%r42 = trunc i64 %r41 to i32
+%r43 = call i32 @extractHigh32(i64 %r41)
+%r44 = zext i32 %r6 to i64
+%r45 = zext i32 %r10 to i64
+%r46 = shl i64 %r45, 32
+%r47 = or i64 %r44, %r46
+%r48 = zext i64 %r47 to i96
+%r49 = zext i32 %r14 to i96
+%r50 = shl i96 %r49, 64
+%r51 = or i96 %r48, %r50
+%r52 = zext i96 %r51 to i128
+%r53 = zext i32 %r18 to i128
+%r54 = shl i128 %r53, 96
+%r55 = or i128 %r52, %r54
+%r56 = zext i128 %r55 to i160
+%r57 = zext i32 %r22 to i160
+%r58 = shl i160 %r57, 128
+%r59 = or i160 %r56, %r58
+%r60 = zext i160 %r59 to i192
+%r61 = zext i32 %r26 to i192
+%r62 = shl i192 %r61, 160
+%r63 = or i192 %r60, %r62
+%r64 = zext i192 %r63 to i224
+%r65 = zext i32 %r30 to i224
+%r66 = shl i224 %r65, 192
+%r67 = or i224 %r64, %r66
+%r68 = zext i224 %r67 to i256
+%r69 = zext i32 %r34 to i256
+%r70 = shl i256 %r69, 224
+%r71 = or i256 %r68, %r70
+%r72 = zext i256 %r71 to i288
+%r73 = zext i32 %r38 to i288
+%r74 = shl i288 %r73, 256
+%r75 = or i288 %r72, %r74
+%r76 = zext i288 %r75 to i320
+%r77 = zext i32 %r42 to i320
+%r78 = shl i320 %r77, 288
+%r79 = or i320 %r76, %r78
+%r80 = zext i32 %r7 to i64
+%r81 = zext i32 %r11 to i64
+%r82 = shl i64 %r81, 32
+%r83 = or i64 %r80, %r82
+%r84 = zext i64 %r83 to i96
+%r85 = zext i32 %r15 to i96
+%r86 = shl i96 %r85, 64
+%r87 = or i96 %r84, %r86
+%r88 = zext i96 %r87 to i128
+%r89 = zext i32 %r19 to i128
+%r90 = shl i128 %r89, 96
+%r91 = or i128 %r88, %r90
+%r92 = zext i128 %r91 to i160
+%r93 = zext i32 %r23 to i160
+%r94 = shl i160 %r93, 128
+%r95 = or i160 %r92, %r94
+%r96 = zext i160 %r95 to i192
+%r97 = zext i32 %r27 to i192
+%r98 = shl i192 %r97, 160
+%r99 = or i192 %r96, %r98
+%r100 = zext i192 %r99 to i224
+%r101 = zext i32 %r31 to i224
+%r102 = shl i224 %r101, 192
+%r103 = or i224 %r100, %r102
+%r104 = zext i224 %r103 to i256
+%r105 = zext i32 %r35 to i256
+%r106 = shl i256 %r105, 224
+%r107 = or i256 %r104, %r106
+%r108 = zext i256 %r107 to i288
+%r109 = zext i32 %r39 to i288
+%r110 = shl i288 %r109, 256
+%r111 = or i288 %r108, %r110
+%r112 = zext i288 %r111 to i320
+%r113 = zext i32 %r43 to i320
+%r114 = shl i320 %r113, 288
+%r115 = or i320 %r112, %r114
+%r116 = zext i320 %r79 to i352
+%r117 = zext i320 %r115 to i352
+%r118 = shl i352 %r117, 32
+%r119 = add i352 %r116, %r118
+ret i352 %r119
+}
+define void @mcl_fp_mulUnitPre10L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i352 @mulPv320x32(i32* %r2, i32 %r3)
+%r5 = trunc i352 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i352 %r4, 32
+%r9 = trunc i352 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i352 %r8, 32
+%r13 = trunc i352 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i352 %r12, 32
+%r17 = trunc i352 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i352 %r16, 32
+%r21 = trunc i352 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i352 %r20, 32
+%r25 = trunc i352 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i352 %r24, 32
+%r29 = trunc i352 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i352 %r28, 32
+%r33 = trunc i352 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+%r36 = lshr i352 %r32, 32
+%r37 = trunc i352 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 8
+store i32 %r37, i32* %r39
+%r40 = lshr i352 %r36, 32
+%r41 = trunc i352 %r40 to i32
+%r43 = getelementptr i32, i32* %r1, i32 9
+store i32 %r41, i32* %r43
+%r44 = lshr i352 %r40, 32
+%r45 = trunc i352 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 10
+store i32 %r45, i32* %r47
+ret void
+}
+define void @mcl_fpDbl_mulPre10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r2, i32 5
+%r7 = getelementptr i32, i32* %r3, i32 5
+%r9 = getelementptr i32, i32* %r1, i32 10
+call void @mcl_fpDbl_mulPre5L(i32* %r1, i32* %r2, i32* %r3)
+call void @mcl_fpDbl_mulPre5L(i32* %r9, i32* %r5, i32* %r7)
+%r10 = load i32, i32* %r5
+%r11 = zext i32 %r10 to i64
+%r13 = getelementptr i32, i32* %r5, i32 1
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i64
+%r16 = shl i64 %r15, 32
+%r17 = or i64 %r11, %r16
+%r18 = zext i64 %r17 to i96
+%r20 = getelementptr i32, i32* %r5, i32 2
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i96
+%r23 = shl i96 %r22, 64
+%r24 = or i96 %r18, %r23
+%r25 = zext i96 %r24 to i128
+%r27 = getelementptr i32, i32* %r5, i32 3
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i128
+%r30 = shl i128 %r29, 96
+%r31 = or i128 %r25, %r30
+%r32 = zext i128 %r31 to i160
+%r34 = getelementptr i32, i32* %r5, i32 4
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i160
+%r37 = shl i160 %r36, 128
+%r38 = or i160 %r32, %r37
+%r39 = zext i160 %r38 to i192
+%r40 = load i32, i32* %r2
+%r41 = zext i32 %r40 to i64
+%r43 = getelementptr i32, i32* %r2, i32 1
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i64
+%r46 = shl i64 %r45, 32
+%r47 = or i64 %r41, %r46
+%r48 = zext i64 %r47 to i96
+%r50 = getelementptr i32, i32* %r2, i32 2
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i96
+%r53 = shl i96 %r52, 64
+%r54 = or i96 %r48, %r53
+%r55 = zext i96 %r54 to i128
+%r57 = getelementptr i32, i32* %r2, i32 3
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i128
+%r60 = shl i128 %r59, 96
+%r61 = or i128 %r55, %r60
+%r62 = zext i128 %r61 to i160
+%r64 = getelementptr i32, i32* %r2, i32 4
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i160
+%r67 = shl i160 %r66, 128
+%r68 = or i160 %r62, %r67
+%r69 = zext i160 %r68 to i192
+%r70 = load i32, i32* %r7
+%r71 = zext i32 %r70 to i64
+%r73 = getelementptr i32, i32* %r7, i32 1
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i64
+%r76 = shl i64 %r75, 32
+%r77 = or i64 %r71, %r76
+%r78 = zext i64 %r77 to i96
+%r80 = getelementptr i32, i32* %r7, i32 2
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i96
+%r83 = shl i96 %r82, 64
+%r84 = or i96 %r78, %r83
+%r85 = zext i96 %r84 to i128
+%r87 = getelementptr i32, i32* %r7, i32 3
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i128
+%r90 = shl i128 %r89, 96
+%r91 = or i128 %r85, %r90
+%r92 = zext i128 %r91 to i160
+%r94 = getelementptr i32, i32* %r7, i32 4
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i160
+%r97 = shl i160 %r96, 128
+%r98 = or i160 %r92, %r97
+%r99 = zext i160 %r98 to i192
+%r100 = load i32, i32* %r3
+%r101 = zext i32 %r100 to i64
+%r103 = getelementptr i32, i32* %r3, i32 1
+%r104 = load i32, i32* %r103
+%r105 = zext i32 %r104 to i64
+%r106 = shl i64 %r105, 32
+%r107 = or i64 %r101, %r106
+%r108 = zext i64 %r107 to i96
+%r110 = getelementptr i32, i32* %r3, i32 2
+%r111 = load i32, i32* %r110
+%r112 = zext i32 %r111 to i96
+%r113 = shl i96 %r112, 64
+%r114 = or i96 %r108, %r113
+%r115 = zext i96 %r114 to i128
+%r117 = getelementptr i32, i32* %r3, i32 3
+%r118 = load i32, i32* %r117
+%r119 = zext i32 %r118 to i128
+%r120 = shl i128 %r119, 96
+%r121 = or i128 %r115, %r120
+%r122 = zext i128 %r121 to i160
+%r124 = getelementptr i32, i32* %r3, i32 4
+%r125 = load i32, i32* %r124
+%r126 = zext i32 %r125 to i160
+%r127 = shl i160 %r126, 128
+%r128 = or i160 %r122, %r127
+%r129 = zext i160 %r128 to i192
+%r130 = add i192 %r39, %r69
+%r131 = add i192 %r99, %r129
+%r133 = alloca i32, i32 10
+%r134 = trunc i192 %r130 to i160
+%r135 = trunc i192 %r131 to i160
+%r136 = lshr i192 %r130, 160
+%r137 = trunc i192 %r136 to i1
+%r138 = lshr i192 %r131, 160
+%r139 = trunc i192 %r138 to i1
+%r140 = and i1 %r137, %r139
+%r142 = select i1 %r137, i160 %r135, i160 0
+%r144 = select i1 %r139, i160 %r134, i160 0
+%r146 = alloca i32, i32 5
+%r148 = alloca i32, i32 5
+%r149 = trunc i160 %r134 to i32
+%r151 = getelementptr i32, i32* %r146, i32 0
+store i32 %r149, i32* %r151
+%r152 = lshr i160 %r134, 32
+%r153 = trunc i160 %r152 to i32
+%r155 = getelementptr i32, i32* %r146, i32 1
+store i32 %r153, i32* %r155
+%r156 = lshr i160 %r152, 32
+%r157 = trunc i160 %r156 to i32
+%r159 = getelementptr i32, i32* %r146, i32 2
+store i32 %r157, i32* %r159
+%r160 = lshr i160 %r156, 32
+%r161 = trunc i160 %r160 to i32
+%r163 = getelementptr i32, i32* %r146, i32 3
+store i32 %r161, i32* %r163
+%r164 = lshr i160 %r160, 32
+%r165 = trunc i160 %r164 to i32
+%r167 = getelementptr i32, i32* %r146, i32 4
+store i32 %r165, i32* %r167
+%r168 = trunc i160 %r135 to i32
+%r170 = getelementptr i32, i32* %r148, i32 0
+store i32 %r168, i32* %r170
+%r171 = lshr i160 %r135, 32
+%r172 = trunc i160 %r171 to i32
+%r174 = getelementptr i32, i32* %r148, i32 1
+store i32 %r172, i32* %r174
+%r175 = lshr i160 %r171, 32
+%r176 = trunc i160 %r175 to i32
+%r178 = getelementptr i32, i32* %r148, i32 2
+store i32 %r176, i32* %r178
+%r179 = lshr i160 %r175, 32
+%r180 = trunc i160 %r179 to i32
+%r182 = getelementptr i32, i32* %r148, i32 3
+store i32 %r180, i32* %r182
+%r183 = lshr i160 %r179, 32
+%r184 = trunc i160 %r183 to i32
+%r186 = getelementptr i32, i32* %r148, i32 4
+store i32 %r184, i32* %r186
+call void @mcl_fpDbl_mulPre5L(i32* %r133, i32* %r146, i32* %r148)
+%r187 = load i32, i32* %r133
+%r188 = zext i32 %r187 to i64
+%r190 = getelementptr i32, i32* %r133, i32 1
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i64
+%r193 = shl i64 %r192, 32
+%r194 = or i64 %r188, %r193
+%r195 = zext i64 %r194 to i96
+%r197 = getelementptr i32, i32* %r133, i32 2
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i96
+%r200 = shl i96 %r199, 64
+%r201 = or i96 %r195, %r200
+%r202 = zext i96 %r201 to i128
+%r204 = getelementptr i32, i32* %r133, i32 3
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i128
+%r207 = shl i128 %r206, 96
+%r208 = or i128 %r202, %r207
+%r209 = zext i128 %r208 to i160
+%r211 = getelementptr i32, i32* %r133, i32 4
+%r212 = load i32, i32* %r211
+%r213 = zext i32 %r212 to i160
+%r214 = shl i160 %r213, 128
+%r215 = or i160 %r209, %r214
+%r216 = zext i160 %r215 to i192
+%r218 = getelementptr i32, i32* %r133, i32 5
+%r219 = load i32, i32* %r218
+%r220 = zext i32 %r219 to i192
+%r221 = shl i192 %r220, 160
+%r222 = or i192 %r216, %r221
+%r223 = zext i192 %r222 to i224
+%r225 = getelementptr i32, i32* %r133, i32 6
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i224
+%r228 = shl i224 %r227, 192
+%r229 = or i224 %r223, %r228
+%r230 = zext i224 %r229 to i256
+%r232 = getelementptr i32, i32* %r133, i32 7
+%r233 = load i32, i32* %r232
+%r234 = zext i32 %r233 to i256
+%r235 = shl i256 %r234, 224
+%r236 = or i256 %r230, %r235
+%r237 = zext i256 %r236 to i288
+%r239 = getelementptr i32, i32* %r133, i32 8
+%r240 = load i32, i32* %r239
+%r241 = zext i32 %r240 to i288
+%r242 = shl i288 %r241, 256
+%r243 = or i288 %r237, %r242
+%r244 = zext i288 %r243 to i320
+%r246 = getelementptr i32, i32* %r133, i32 9
+%r247 = load i32, i32* %r246
+%r248 = zext i32 %r247 to i320
+%r249 = shl i320 %r248, 288
+%r250 = or i320 %r244, %r249
+%r251 = zext i320 %r250 to i352
+%r252 = zext i1 %r140 to i352
+%r253 = shl i352 %r252, 320
+%r254 = or i352 %r251, %r253
+%r255 = zext i160 %r142 to i352
+%r256 = zext i160 %r144 to i352
+%r257 = shl i352 %r255, 160
+%r258 = shl i352 %r256, 160
+%r259 = add i352 %r254, %r257
+%r260 = add i352 %r259, %r258
+%r261 = load i32, i32* %r1
+%r262 = zext i32 %r261 to i64
+%r264 = getelementptr i32, i32* %r1, i32 1
+%r265 = load i32, i32* %r264
+%r266 = zext i32 %r265 to i64
+%r267 = shl i64 %r266, 32
+%r268 = or i64 %r262, %r267
+%r269 = zext i64 %r268 to i96
+%r271 = getelementptr i32, i32* %r1, i32 2
+%r272 = load i32, i32* %r271
+%r273 = zext i32 %r272 to i96
+%r274 = shl i96 %r273, 64
+%r275 = or i96 %r269, %r274
+%r276 = zext i96 %r275 to i128
+%r278 = getelementptr i32, i32* %r1, i32 3
+%r279 = load i32, i32* %r278
+%r280 = zext i32 %r279 to i128
+%r281 = shl i128 %r280, 96
+%r282 = or i128 %r276, %r281
+%r283 = zext i128 %r282 to i160
+%r285 = getelementptr i32, i32* %r1, i32 4
+%r286 = load i32, i32* %r285
+%r287 = zext i32 %r286 to i160
+%r288 = shl i160 %r287, 128
+%r289 = or i160 %r283, %r288
+%r290 = zext i160 %r289 to i192
+%r292 = getelementptr i32, i32* %r1, i32 5
+%r293 = load i32, i32* %r292
+%r294 = zext i32 %r293 to i192
+%r295 = shl i192 %r294, 160
+%r296 = or i192 %r290, %r295
+%r297 = zext i192 %r296 to i224
+%r299 = getelementptr i32, i32* %r1, i32 6
+%r300 = load i32, i32* %r299
+%r301 = zext i32 %r300 to i224
+%r302 = shl i224 %r301, 192
+%r303 = or i224 %r297, %r302
+%r304 = zext i224 %r303 to i256
+%r306 = getelementptr i32, i32* %r1, i32 7
+%r307 = load i32, i32* %r306
+%r308 = zext i32 %r307 to i256
+%r309 = shl i256 %r308, 224
+%r310 = or i256 %r304, %r309
+%r311 = zext i256 %r310 to i288
+%r313 = getelementptr i32, i32* %r1, i32 8
+%r314 = load i32, i32* %r313
+%r315 = zext i32 %r314 to i288
+%r316 = shl i288 %r315, 256
+%r317 = or i288 %r311, %r316
+%r318 = zext i288 %r317 to i320
+%r320 = getelementptr i32, i32* %r1, i32 9
+%r321 = load i32, i32* %r320
+%r322 = zext i32 %r321 to i320
+%r323 = shl i320 %r322, 288
+%r324 = or i320 %r318, %r323
+%r325 = zext i320 %r324 to i352
+%r326 = sub i352 %r260, %r325
+%r328 = getelementptr i32, i32* %r1, i32 10
+%r329 = load i32, i32* %r328
+%r330 = zext i32 %r329 to i64
+%r332 = getelementptr i32, i32* %r328, i32 1
+%r333 = load i32, i32* %r332
+%r334 = zext i32 %r333 to i64
+%r335 = shl i64 %r334, 32
+%r336 = or i64 %r330, %r335
+%r337 = zext i64 %r336 to i96
+%r339 = getelementptr i32, i32* %r328, i32 2
+%r340 = load i32, i32* %r339
+%r341 = zext i32 %r340 to i96
+%r342 = shl i96 %r341, 64
+%r343 = or i96 %r337, %r342
+%r344 = zext i96 %r343 to i128
+%r346 = getelementptr i32, i32* %r328, i32 3
+%r347 = load i32, i32* %r346
+%r348 = zext i32 %r347 to i128
+%r349 = shl i128 %r348, 96
+%r350 = or i128 %r344, %r349
+%r351 = zext i128 %r350 to i160
+%r353 = getelementptr i32, i32* %r328, i32 4
+%r354 = load i32, i32* %r353
+%r355 = zext i32 %r354 to i160
+%r356 = shl i160 %r355, 128
+%r357 = or i160 %r351, %r356
+%r358 = zext i160 %r357 to i192
+%r360 = getelementptr i32, i32* %r328, i32 5
+%r361 = load i32, i32* %r360
+%r362 = zext i32 %r361 to i192
+%r363 = shl i192 %r362, 160
+%r364 = or i192 %r358, %r363
+%r365 = zext i192 %r364 to i224
+%r367 = getelementptr i32, i32* %r328, i32 6
+%r368 = load i32, i32* %r367
+%r369 = zext i32 %r368 to i224
+%r370 = shl i224 %r369, 192
+%r371 = or i224 %r365, %r370
+%r372 = zext i224 %r371 to i256
+%r374 = getelementptr i32, i32* %r328, i32 7
+%r375 = load i32, i32* %r374
+%r376 = zext i32 %r375 to i256
+%r377 = shl i256 %r376, 224
+%r378 = or i256 %r372, %r377
+%r379 = zext i256 %r378 to i288
+%r381 = getelementptr i32, i32* %r328, i32 8
+%r382 = load i32, i32* %r381
+%r383 = zext i32 %r382 to i288
+%r384 = shl i288 %r383, 256
+%r385 = or i288 %r379, %r384
+%r386 = zext i288 %r385 to i320
+%r388 = getelementptr i32, i32* %r328, i32 9
+%r389 = load i32, i32* %r388
+%r390 = zext i32 %r389 to i320
+%r391 = shl i320 %r390, 288
+%r392 = or i320 %r386, %r391
+%r393 = zext i320 %r392 to i352
+%r394 = sub i352 %r326, %r393
+%r395 = zext i352 %r394 to i480
+%r397 = getelementptr i32, i32* %r1, i32 5
+%r398 = load i32, i32* %r397
+%r399 = zext i32 %r398 to i64
+%r401 = getelementptr i32, i32* %r397, i32 1
+%r402 = load i32, i32* %r401
+%r403 = zext i32 %r402 to i64
+%r404 = shl i64 %r403, 32
+%r405 = or i64 %r399, %r404
+%r406 = zext i64 %r405 to i96
+%r408 = getelementptr i32, i32* %r397, i32 2
+%r409 = load i32, i32* %r408
+%r410 = zext i32 %r409 to i96
+%r411 = shl i96 %r410, 64
+%r412 = or i96 %r406, %r411
+%r413 = zext i96 %r412 to i128
+%r415 = getelementptr i32, i32* %r397, i32 3
+%r416 = load i32, i32* %r415
+%r417 = zext i32 %r416 to i128
+%r418 = shl i128 %r417, 96
+%r419 = or i128 %r413, %r418
+%r420 = zext i128 %r419 to i160
+%r422 = getelementptr i32, i32* %r397, i32 4
+%r423 = load i32, i32* %r422
+%r424 = zext i32 %r423 to i160
+%r425 = shl i160 %r424, 128
+%r426 = or i160 %r420, %r425
+%r427 = zext i160 %r426 to i192
+%r429 = getelementptr i32, i32* %r397, i32 5
+%r430 = load i32, i32* %r429
+%r431 = zext i32 %r430 to i192
+%r432 = shl i192 %r431, 160
+%r433 = or i192 %r427, %r432
+%r434 = zext i192 %r433 to i224
+%r436 = getelementptr i32, i32* %r397, i32 6
+%r437 = load i32, i32* %r436
+%r438 = zext i32 %r437 to i224
+%r439 = shl i224 %r438, 192
+%r440 = or i224 %r434, %r439
+%r441 = zext i224 %r440 to i256
+%r443 = getelementptr i32, i32* %r397, i32 7
+%r444 = load i32, i32* %r443
+%r445 = zext i32 %r444 to i256
+%r446 = shl i256 %r445, 224
+%r447 = or i256 %r441, %r446
+%r448 = zext i256 %r447 to i288
+%r450 = getelementptr i32, i32* %r397, i32 8
+%r451 = load i32, i32* %r450
+%r452 = zext i32 %r451 to i288
+%r453 = shl i288 %r452, 256
+%r454 = or i288 %r448, %r453
+%r455 = zext i288 %r454 to i320
+%r457 = getelementptr i32, i32* %r397, i32 9
+%r458 = load i32, i32* %r457
+%r459 = zext i32 %r458 to i320
+%r460 = shl i320 %r459, 288
+%r461 = or i320 %r455, %r460
+%r462 = zext i320 %r461 to i352
+%r464 = getelementptr i32, i32* %r397, i32 10
+%r465 = load i32, i32* %r464
+%r466 = zext i32 %r465 to i352
+%r467 = shl i352 %r466, 320
+%r468 = or i352 %r462, %r467
+%r469 = zext i352 %r468 to i384
+%r471 = getelementptr i32, i32* %r397, i32 11
+%r472 = load i32, i32* %r471
+%r473 = zext i32 %r472 to i384
+%r474 = shl i384 %r473, 352
+%r475 = or i384 %r469, %r474
+%r476 = zext i384 %r475 to i416
+%r478 = getelementptr i32, i32* %r397, i32 12
+%r479 = load i32, i32* %r478
+%r480 = zext i32 %r479 to i416
+%r481 = shl i416 %r480, 384
+%r482 = or i416 %r476, %r481
+%r483 = zext i416 %r482 to i448
+%r485 = getelementptr i32, i32* %r397, i32 13
+%r486 = load i32, i32* %r485
+%r487 = zext i32 %r486 to i448
+%r488 = shl i448 %r487, 416
+%r489 = or i448 %r483, %r488
+%r490 = zext i448 %r489 to i480
+%r492 = getelementptr i32, i32* %r397, i32 14
+%r493 = load i32, i32* %r492
+%r494 = zext i32 %r493 to i480
+%r495 = shl i480 %r494, 448
+%r496 = or i480 %r490, %r495
+%r497 = add i480 %r395, %r496
+%r499 = getelementptr i32, i32* %r1, i32 5
+%r500 = trunc i480 %r497 to i32
+%r502 = getelementptr i32, i32* %r499, i32 0
+store i32 %r500, i32* %r502
+%r503 = lshr i480 %r497, 32
+%r504 = trunc i480 %r503 to i32
+%r506 = getelementptr i32, i32* %r499, i32 1
+store i32 %r504, i32* %r506
+%r507 = lshr i480 %r503, 32
+%r508 = trunc i480 %r507 to i32
+%r510 = getelementptr i32, i32* %r499, i32 2
+store i32 %r508, i32* %r510
+%r511 = lshr i480 %r507, 32
+%r512 = trunc i480 %r511 to i32
+%r514 = getelementptr i32, i32* %r499, i32 3
+store i32 %r512, i32* %r514
+%r515 = lshr i480 %r511, 32
+%r516 = trunc i480 %r515 to i32
+%r518 = getelementptr i32, i32* %r499, i32 4
+store i32 %r516, i32* %r518
+%r519 = lshr i480 %r515, 32
+%r520 = trunc i480 %r519 to i32
+%r522 = getelementptr i32, i32* %r499, i32 5
+store i32 %r520, i32* %r522
+%r523 = lshr i480 %r519, 32
+%r524 = trunc i480 %r523 to i32
+%r526 = getelementptr i32, i32* %r499, i32 6
+store i32 %r524, i32* %r526
+%r527 = lshr i480 %r523, 32
+%r528 = trunc i480 %r527 to i32
+%r530 = getelementptr i32, i32* %r499, i32 7
+store i32 %r528, i32* %r530
+%r531 = lshr i480 %r527, 32
+%r532 = trunc i480 %r531 to i32
+%r534 = getelementptr i32, i32* %r499, i32 8
+store i32 %r532, i32* %r534
+%r535 = lshr i480 %r531, 32
+%r536 = trunc i480 %r535 to i32
+%r538 = getelementptr i32, i32* %r499, i32 9
+store i32 %r536, i32* %r538
+%r539 = lshr i480 %r535, 32
+%r540 = trunc i480 %r539 to i32
+%r542 = getelementptr i32, i32* %r499, i32 10
+store i32 %r540, i32* %r542
+%r543 = lshr i480 %r539, 32
+%r544 = trunc i480 %r543 to i32
+%r546 = getelementptr i32, i32* %r499, i32 11
+store i32 %r544, i32* %r546
+%r547 = lshr i480 %r543, 32
+%r548 = trunc i480 %r547 to i32
+%r550 = getelementptr i32, i32* %r499, i32 12
+store i32 %r548, i32* %r550
+%r551 = lshr i480 %r547, 32
+%r552 = trunc i480 %r551 to i32
+%r554 = getelementptr i32, i32* %r499, i32 13
+store i32 %r552, i32* %r554
+%r555 = lshr i480 %r551, 32
+%r556 = trunc i480 %r555 to i32
+%r558 = getelementptr i32, i32* %r499, i32 14
+store i32 %r556, i32* %r558
+ret void
+}
+define void @mcl_fpDbl_sqrPre10L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r4 = getelementptr i32, i32* %r2, i32 5
+%r6 = getelementptr i32, i32* %r2, i32 5
+%r8 = getelementptr i32, i32* %r1, i32 10
+call void @mcl_fpDbl_mulPre5L(i32* %r1, i32* %r2, i32* %r2)
+call void @mcl_fpDbl_mulPre5L(i32* %r8, i32* %r4, i32* %r6)
+%r9 = load i32, i32* %r4
+%r10 = zext i32 %r9 to i64
+%r12 = getelementptr i32, i32* %r4, i32 1
+%r13 = load i32, i32* %r12
+%r14 = zext i32 %r13 to i64
+%r15 = shl i64 %r14, 32
+%r16 = or i64 %r10, %r15
+%r17 = zext i64 %r16 to i96
+%r19 = getelementptr i32, i32* %r4, i32 2
+%r20 = load i32, i32* %r19
+%r21 = zext i32 %r20 to i96
+%r22 = shl i96 %r21, 64
+%r23 = or i96 %r17, %r22
+%r24 = zext i96 %r23 to i128
+%r26 = getelementptr i32, i32* %r4, i32 3
+%r27 = load i32, i32* %r26
+%r28 = zext i32 %r27 to i128
+%r29 = shl i128 %r28, 96
+%r30 = or i128 %r24, %r29
+%r31 = zext i128 %r30 to i160
+%r33 = getelementptr i32, i32* %r4, i32 4
+%r34 = load i32, i32* %r33
+%r35 = zext i32 %r34 to i160
+%r36 = shl i160 %r35, 128
+%r37 = or i160 %r31, %r36
+%r38 = zext i160 %r37 to i192
+%r39 = load i32, i32* %r2
+%r40 = zext i32 %r39 to i64
+%r42 = getelementptr i32, i32* %r2, i32 1
+%r43 = load i32, i32* %r42
+%r44 = zext i32 %r43 to i64
+%r45 = shl i64 %r44, 32
+%r46 = or i64 %r40, %r45
+%r47 = zext i64 %r46 to i96
+%r49 = getelementptr i32, i32* %r2, i32 2
+%r50 = load i32, i32* %r49
+%r51 = zext i32 %r50 to i96
+%r52 = shl i96 %r51, 64
+%r53 = or i96 %r47, %r52
+%r54 = zext i96 %r53 to i128
+%r56 = getelementptr i32, i32* %r2, i32 3
+%r57 = load i32, i32* %r56
+%r58 = zext i32 %r57 to i128
+%r59 = shl i128 %r58, 96
+%r60 = or i128 %r54, %r59
+%r61 = zext i128 %r60 to i160
+%r63 = getelementptr i32, i32* %r2, i32 4
+%r64 = load i32, i32* %r63
+%r65 = zext i32 %r64 to i160
+%r66 = shl i160 %r65, 128
+%r67 = or i160 %r61, %r66
+%r68 = zext i160 %r67 to i192
+%r69 = load i32, i32* %r6
+%r70 = zext i32 %r69 to i64
+%r72 = getelementptr i32, i32* %r6, i32 1
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i64
+%r75 = shl i64 %r74, 32
+%r76 = or i64 %r70, %r75
+%r77 = zext i64 %r76 to i96
+%r79 = getelementptr i32, i32* %r6, i32 2
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i96
+%r82 = shl i96 %r81, 64
+%r83 = or i96 %r77, %r82
+%r84 = zext i96 %r83 to i128
+%r86 = getelementptr i32, i32* %r6, i32 3
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i128
+%r89 = shl i128 %r88, 96
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i160
+%r93 = getelementptr i32, i32* %r6, i32 4
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i160
+%r96 = shl i160 %r95, 128
+%r97 = or i160 %r91, %r96
+%r98 = zext i160 %r97 to i192
+%r99 = load i32, i32* %r2
+%r100 = zext i32 %r99 to i64
+%r102 = getelementptr i32, i32* %r2, i32 1
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i64
+%r105 = shl i64 %r104, 32
+%r106 = or i64 %r100, %r105
+%r107 = zext i64 %r106 to i96
+%r109 = getelementptr i32, i32* %r2, i32 2
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i96
+%r112 = shl i96 %r111, 64
+%r113 = or i96 %r107, %r112
+%r114 = zext i96 %r113 to i128
+%r116 = getelementptr i32, i32* %r2, i32 3
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i128
+%r119 = shl i128 %r118, 96
+%r120 = or i128 %r114, %r119
+%r121 = zext i128 %r120 to i160
+%r123 = getelementptr i32, i32* %r2, i32 4
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i160
+%r126 = shl i160 %r125, 128
+%r127 = or i160 %r121, %r126
+%r128 = zext i160 %r127 to i192
+%r129 = add i192 %r38, %r68
+%r130 = add i192 %r98, %r128
+%r132 = alloca i32, i32 10
+%r133 = trunc i192 %r129 to i160
+%r134 = trunc i192 %r130 to i160
+%r135 = lshr i192 %r129, 160
+%r136 = trunc i192 %r135 to i1
+%r137 = lshr i192 %r130, 160
+%r138 = trunc i192 %r137 to i1
+%r139 = and i1 %r136, %r138
+%r141 = select i1 %r136, i160 %r134, i160 0
+%r143 = select i1 %r138, i160 %r133, i160 0
+%r145 = alloca i32, i32 5
+%r147 = alloca i32, i32 5
+%r148 = trunc i160 %r133 to i32
+%r150 = getelementptr i32, i32* %r145, i32 0
+store i32 %r148, i32* %r150
+%r151 = lshr i160 %r133, 32
+%r152 = trunc i160 %r151 to i32
+%r154 = getelementptr i32, i32* %r145, i32 1
+store i32 %r152, i32* %r154
+%r155 = lshr i160 %r151, 32
+%r156 = trunc i160 %r155 to i32
+%r158 = getelementptr i32, i32* %r145, i32 2
+store i32 %r156, i32* %r158
+%r159 = lshr i160 %r155, 32
+%r160 = trunc i160 %r159 to i32
+%r162 = getelementptr i32, i32* %r145, i32 3
+store i32 %r160, i32* %r162
+%r163 = lshr i160 %r159, 32
+%r164 = trunc i160 %r163 to i32
+%r166 = getelementptr i32, i32* %r145, i32 4
+store i32 %r164, i32* %r166
+%r167 = trunc i160 %r134 to i32
+%r169 = getelementptr i32, i32* %r147, i32 0
+store i32 %r167, i32* %r169
+%r170 = lshr i160 %r134, 32
+%r171 = trunc i160 %r170 to i32
+%r173 = getelementptr i32, i32* %r147, i32 1
+store i32 %r171, i32* %r173
+%r174 = lshr i160 %r170, 32
+%r175 = trunc i160 %r174 to i32
+%r177 = getelementptr i32, i32* %r147, i32 2
+store i32 %r175, i32* %r177
+%r178 = lshr i160 %r174, 32
+%r179 = trunc i160 %r178 to i32
+%r181 = getelementptr i32, i32* %r147, i32 3
+store i32 %r179, i32* %r181
+%r182 = lshr i160 %r178, 32
+%r183 = trunc i160 %r182 to i32
+%r185 = getelementptr i32, i32* %r147, i32 4
+store i32 %r183, i32* %r185
+call void @mcl_fpDbl_mulPre5L(i32* %r132, i32* %r145, i32* %r147)
+%r186 = load i32, i32* %r132
+%r187 = zext i32 %r186 to i64
+%r189 = getelementptr i32, i32* %r132, i32 1
+%r190 = load i32, i32* %r189
+%r191 = zext i32 %r190 to i64
+%r192 = shl i64 %r191, 32
+%r193 = or i64 %r187, %r192
+%r194 = zext i64 %r193 to i96
+%r196 = getelementptr i32, i32* %r132, i32 2
+%r197 = load i32, i32* %r196
+%r198 = zext i32 %r197 to i96
+%r199 = shl i96 %r198, 64
+%r200 = or i96 %r194, %r199
+%r201 = zext i96 %r200 to i128
+%r203 = getelementptr i32, i32* %r132, i32 3
+%r204 = load i32, i32* %r203
+%r205 = zext i32 %r204 to i128
+%r206 = shl i128 %r205, 96
+%r207 = or i128 %r201, %r206
+%r208 = zext i128 %r207 to i160
+%r210 = getelementptr i32, i32* %r132, i32 4
+%r211 = load i32, i32* %r210
+%r212 = zext i32 %r211 to i160
+%r213 = shl i160 %r212, 128
+%r214 = or i160 %r208, %r213
+%r215 = zext i160 %r214 to i192
+%r217 = getelementptr i32, i32* %r132, i32 5
+%r218 = load i32, i32* %r217
+%r219 = zext i32 %r218 to i192
+%r220 = shl i192 %r219, 160
+%r221 = or i192 %r215, %r220
+%r222 = zext i192 %r221 to i224
+%r224 = getelementptr i32, i32* %r132, i32 6
+%r225 = load i32, i32* %r224
+%r226 = zext i32 %r225 to i224
+%r227 = shl i224 %r226, 192
+%r228 = or i224 %r222, %r227
+%r229 = zext i224 %r228 to i256
+%r231 = getelementptr i32, i32* %r132, i32 7
+%r232 = load i32, i32* %r231
+%r233 = zext i32 %r232 to i256
+%r234 = shl i256 %r233, 224
+%r235 = or i256 %r229, %r234
+%r236 = zext i256 %r235 to i288
+%r238 = getelementptr i32, i32* %r132, i32 8
+%r239 = load i32, i32* %r238
+%r240 = zext i32 %r239 to i288
+%r241 = shl i288 %r240, 256
+%r242 = or i288 %r236, %r241
+%r243 = zext i288 %r242 to i320
+%r245 = getelementptr i32, i32* %r132, i32 9
+%r246 = load i32, i32* %r245
+%r247 = zext i32 %r246 to i320
+%r248 = shl i320 %r247, 288
+%r249 = or i320 %r243, %r248
+%r250 = zext i320 %r249 to i352
+%r251 = zext i1 %r139 to i352
+%r252 = shl i352 %r251, 320
+%r253 = or i352 %r250, %r252
+%r254 = zext i160 %r141 to i352
+%r255 = zext i160 %r143 to i352
+%r256 = shl i352 %r254, 160
+%r257 = shl i352 %r255, 160
+%r258 = add i352 %r253, %r256
+%r259 = add i352 %r258, %r257
+%r260 = load i32, i32* %r1
+%r261 = zext i32 %r260 to i64
+%r263 = getelementptr i32, i32* %r1, i32 1
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i64
+%r266 = shl i64 %r265, 32
+%r267 = or i64 %r261, %r266
+%r268 = zext i64 %r267 to i96
+%r270 = getelementptr i32, i32* %r1, i32 2
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i96
+%r273 = shl i96 %r272, 64
+%r274 = or i96 %r268, %r273
+%r275 = zext i96 %r274 to i128
+%r277 = getelementptr i32, i32* %r1, i32 3
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i128
+%r280 = shl i128 %r279, 96
+%r281 = or i128 %r275, %r280
+%r282 = zext i128 %r281 to i160
+%r284 = getelementptr i32, i32* %r1, i32 4
+%r285 = load i32, i32* %r284
+%r286 = zext i32 %r285 to i160
+%r287 = shl i160 %r286, 128
+%r288 = or i160 %r282, %r287
+%r289 = zext i160 %r288 to i192
+%r291 = getelementptr i32, i32* %r1, i32 5
+%r292 = load i32, i32* %r291
+%r293 = zext i32 %r292 to i192
+%r294 = shl i192 %r293, 160
+%r295 = or i192 %r289, %r294
+%r296 = zext i192 %r295 to i224
+%r298 = getelementptr i32, i32* %r1, i32 6
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i224
+%r301 = shl i224 %r300, 192
+%r302 = or i224 %r296, %r301
+%r303 = zext i224 %r302 to i256
+%r305 = getelementptr i32, i32* %r1, i32 7
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i256
+%r308 = shl i256 %r307, 224
+%r309 = or i256 %r303, %r308
+%r310 = zext i256 %r309 to i288
+%r312 = getelementptr i32, i32* %r1, i32 8
+%r313 = load i32, i32* %r312
+%r314 = zext i32 %r313 to i288
+%r315 = shl i288 %r314, 256
+%r316 = or i288 %r310, %r315
+%r317 = zext i288 %r316 to i320
+%r319 = getelementptr i32, i32* %r1, i32 9
+%r320 = load i32, i32* %r319
+%r321 = zext i32 %r320 to i320
+%r322 = shl i320 %r321, 288
+%r323 = or i320 %r317, %r322
+%r324 = zext i320 %r323 to i352
+%r325 = sub i352 %r259, %r324
+%r327 = getelementptr i32, i32* %r1, i32 10
+%r328 = load i32, i32* %r327
+%r329 = zext i32 %r328 to i64
+%r331 = getelementptr i32, i32* %r327, i32 1
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i64
+%r334 = shl i64 %r333, 32
+%r335 = or i64 %r329, %r334
+%r336 = zext i64 %r335 to i96
+%r338 = getelementptr i32, i32* %r327, i32 2
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i96
+%r341 = shl i96 %r340, 64
+%r342 = or i96 %r336, %r341
+%r343 = zext i96 %r342 to i128
+%r345 = getelementptr i32, i32* %r327, i32 3
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i128
+%r348 = shl i128 %r347, 96
+%r349 = or i128 %r343, %r348
+%r350 = zext i128 %r349 to i160
+%r352 = getelementptr i32, i32* %r327, i32 4
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i160
+%r355 = shl i160 %r354, 128
+%r356 = or i160 %r350, %r355
+%r357 = zext i160 %r356 to i192
+%r359 = getelementptr i32, i32* %r327, i32 5
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i192
+%r362 = shl i192 %r361, 160
+%r363 = or i192 %r357, %r362
+%r364 = zext i192 %r363 to i224
+%r366 = getelementptr i32, i32* %r327, i32 6
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i224
+%r369 = shl i224 %r368, 192
+%r370 = or i224 %r364, %r369
+%r371 = zext i224 %r370 to i256
+%r373 = getelementptr i32, i32* %r327, i32 7
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i256
+%r376 = shl i256 %r375, 224
+%r377 = or i256 %r371, %r376
+%r378 = zext i256 %r377 to i288
+%r380 = getelementptr i32, i32* %r327, i32 8
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i288
+%r383 = shl i288 %r382, 256
+%r384 = or i288 %r378, %r383
+%r385 = zext i288 %r384 to i320
+%r387 = getelementptr i32, i32* %r327, i32 9
+%r388 = load i32, i32* %r387
+%r389 = zext i32 %r388 to i320
+%r390 = shl i320 %r389, 288
+%r391 = or i320 %r385, %r390
+%r392 = zext i320 %r391 to i352
+%r393 = sub i352 %r325, %r392
+%r394 = zext i352 %r393 to i480
+%r396 = getelementptr i32, i32* %r1, i32 5
+%r397 = load i32, i32* %r396
+%r398 = zext i32 %r397 to i64
+%r400 = getelementptr i32, i32* %r396, i32 1
+%r401 = load i32, i32* %r400
+%r402 = zext i32 %r401 to i64
+%r403 = shl i64 %r402, 32
+%r404 = or i64 %r398, %r403
+%r405 = zext i64 %r404 to i96
+%r407 = getelementptr i32, i32* %r396, i32 2
+%r408 = load i32, i32* %r407
+%r409 = zext i32 %r408 to i96
+%r410 = shl i96 %r409, 64
+%r411 = or i96 %r405, %r410
+%r412 = zext i96 %r411 to i128
+%r414 = getelementptr i32, i32* %r396, i32 3
+%r415 = load i32, i32* %r414
+%r416 = zext i32 %r415 to i128
+%r417 = shl i128 %r416, 96
+%r418 = or i128 %r412, %r417
+%r419 = zext i128 %r418 to i160
+%r421 = getelementptr i32, i32* %r396, i32 4
+%r422 = load i32, i32* %r421
+%r423 = zext i32 %r422 to i160
+%r424 = shl i160 %r423, 128
+%r425 = or i160 %r419, %r424
+%r426 = zext i160 %r425 to i192
+%r428 = getelementptr i32, i32* %r396, i32 5
+%r429 = load i32, i32* %r428
+%r430 = zext i32 %r429 to i192
+%r431 = shl i192 %r430, 160
+%r432 = or i192 %r426, %r431
+%r433 = zext i192 %r432 to i224
+%r435 = getelementptr i32, i32* %r396, i32 6
+%r436 = load i32, i32* %r435
+%r437 = zext i32 %r436 to i224
+%r438 = shl i224 %r437, 192
+%r439 = or i224 %r433, %r438
+%r440 = zext i224 %r439 to i256
+%r442 = getelementptr i32, i32* %r396, i32 7
+%r443 = load i32, i32* %r442
+%r444 = zext i32 %r443 to i256
+%r445 = shl i256 %r444, 224
+%r446 = or i256 %r440, %r445
+%r447 = zext i256 %r446 to i288
+%r449 = getelementptr i32, i32* %r396, i32 8
+%r450 = load i32, i32* %r449
+%r451 = zext i32 %r450 to i288
+%r452 = shl i288 %r451, 256
+%r453 = or i288 %r447, %r452
+%r454 = zext i288 %r453 to i320
+%r456 = getelementptr i32, i32* %r396, i32 9
+%r457 = load i32, i32* %r456
+%r458 = zext i32 %r457 to i320
+%r459 = shl i320 %r458, 288
+%r460 = or i320 %r454, %r459
+%r461 = zext i320 %r460 to i352
+%r463 = getelementptr i32, i32* %r396, i32 10
+%r464 = load i32, i32* %r463
+%r465 = zext i32 %r464 to i352
+%r466 = shl i352 %r465, 320
+%r467 = or i352 %r461, %r466
+%r468 = zext i352 %r467 to i384
+%r470 = getelementptr i32, i32* %r396, i32 11
+%r471 = load i32, i32* %r470
+%r472 = zext i32 %r471 to i384
+%r473 = shl i384 %r472, 352
+%r474 = or i384 %r468, %r473
+%r475 = zext i384 %r474 to i416
+%r477 = getelementptr i32, i32* %r396, i32 12
+%r478 = load i32, i32* %r477
+%r479 = zext i32 %r478 to i416
+%r480 = shl i416 %r479, 384
+%r481 = or i416 %r475, %r480
+%r482 = zext i416 %r481 to i448
+%r484 = getelementptr i32, i32* %r396, i32 13
+%r485 = load i32, i32* %r484
+%r486 = zext i32 %r485 to i448
+%r487 = shl i448 %r486, 416
+%r488 = or i448 %r482, %r487
+%r489 = zext i448 %r488 to i480
+%r491 = getelementptr i32, i32* %r396, i32 14
+%r492 = load i32, i32* %r491
+%r493 = zext i32 %r492 to i480
+%r494 = shl i480 %r493, 448
+%r495 = or i480 %r489, %r494
+%r496 = add i480 %r394, %r495
+%r498 = getelementptr i32, i32* %r1, i32 5
+%r499 = trunc i480 %r496 to i32
+%r501 = getelementptr i32, i32* %r498, i32 0
+store i32 %r499, i32* %r501
+%r502 = lshr i480 %r496, 32
+%r503 = trunc i480 %r502 to i32
+%r505 = getelementptr i32, i32* %r498, i32 1
+store i32 %r503, i32* %r505
+%r506 = lshr i480 %r502, 32
+%r507 = trunc i480 %r506 to i32
+%r509 = getelementptr i32, i32* %r498, i32 2
+store i32 %r507, i32* %r509
+%r510 = lshr i480 %r506, 32
+%r511 = trunc i480 %r510 to i32
+%r513 = getelementptr i32, i32* %r498, i32 3
+store i32 %r511, i32* %r513
+%r514 = lshr i480 %r510, 32
+%r515 = trunc i480 %r514 to i32
+%r517 = getelementptr i32, i32* %r498, i32 4
+store i32 %r515, i32* %r517
+%r518 = lshr i480 %r514, 32
+%r519 = trunc i480 %r518 to i32
+%r521 = getelementptr i32, i32* %r498, i32 5
+store i32 %r519, i32* %r521
+%r522 = lshr i480 %r518, 32
+%r523 = trunc i480 %r522 to i32
+%r525 = getelementptr i32, i32* %r498, i32 6
+store i32 %r523, i32* %r525
+%r526 = lshr i480 %r522, 32
+%r527 = trunc i480 %r526 to i32
+%r529 = getelementptr i32, i32* %r498, i32 7
+store i32 %r527, i32* %r529
+%r530 = lshr i480 %r526, 32
+%r531 = trunc i480 %r530 to i32
+%r533 = getelementptr i32, i32* %r498, i32 8
+store i32 %r531, i32* %r533
+%r534 = lshr i480 %r530, 32
+%r535 = trunc i480 %r534 to i32
+%r537 = getelementptr i32, i32* %r498, i32 9
+store i32 %r535, i32* %r537
+%r538 = lshr i480 %r534, 32
+%r539 = trunc i480 %r538 to i32
+%r541 = getelementptr i32, i32* %r498, i32 10
+store i32 %r539, i32* %r541
+%r542 = lshr i480 %r538, 32
+%r543 = trunc i480 %r542 to i32
+%r545 = getelementptr i32, i32* %r498, i32 11
+store i32 %r543, i32* %r545
+%r546 = lshr i480 %r542, 32
+%r547 = trunc i480 %r546 to i32
+%r549 = getelementptr i32, i32* %r498, i32 12
+store i32 %r547, i32* %r549
+%r550 = lshr i480 %r546, 32
+%r551 = trunc i480 %r550 to i32
+%r553 = getelementptr i32, i32* %r498, i32 13
+store i32 %r551, i32* %r553
+%r554 = lshr i480 %r550, 32
+%r555 = trunc i480 %r554 to i32
+%r557 = getelementptr i32, i32* %r498, i32 14
+store i32 %r555, i32* %r557
+ret void
+}
+define void @mcl_fp_mont10L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i352 @mulPv320x32(i32* %r2, i32 %r10)
+%r12 = zext i352 %r11 to i384
+%r13 = trunc i352 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i352 @mulPv320x32(i32* %r4, i32 %r14)
+%r16 = zext i352 %r15 to i384
+%r17 = add i384 %r12, %r16
+%r18 = lshr i384 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i352 @mulPv320x32(i32* %r2, i32 %r21)
+%r23 = zext i352 %r22 to i384
+%r24 = add i384 %r18, %r23
+%r25 = trunc i384 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i352 @mulPv320x32(i32* %r4, i32 %r26)
+%r28 = zext i352 %r27 to i384
+%r29 = add i384 %r24, %r28
+%r30 = lshr i384 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i352 @mulPv320x32(i32* %r2, i32 %r33)
+%r35 = zext i352 %r34 to i384
+%r36 = add i384 %r30, %r35
+%r37 = trunc i384 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i352 @mulPv320x32(i32* %r4, i32 %r38)
+%r40 = zext i352 %r39 to i384
+%r41 = add i384 %r36, %r40
+%r42 = lshr i384 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i352 @mulPv320x32(i32* %r2, i32 %r45)
+%r47 = zext i352 %r46 to i384
+%r48 = add i384 %r42, %r47
+%r49 = trunc i384 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i352 @mulPv320x32(i32* %r4, i32 %r50)
+%r52 = zext i352 %r51 to i384
+%r53 = add i384 %r48, %r52
+%r54 = lshr i384 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i352 @mulPv320x32(i32* %r2, i32 %r57)
+%r59 = zext i352 %r58 to i384
+%r60 = add i384 %r54, %r59
+%r61 = trunc i384 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i352 @mulPv320x32(i32* %r4, i32 %r62)
+%r64 = zext i352 %r63 to i384
+%r65 = add i384 %r60, %r64
+%r66 = lshr i384 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i352 @mulPv320x32(i32* %r2, i32 %r69)
+%r71 = zext i352 %r70 to i384
+%r72 = add i384 %r66, %r71
+%r73 = trunc i384 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i352 @mulPv320x32(i32* %r4, i32 %r74)
+%r76 = zext i352 %r75 to i384
+%r77 = add i384 %r72, %r76
+%r78 = lshr i384 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i352 @mulPv320x32(i32* %r2, i32 %r81)
+%r83 = zext i352 %r82 to i384
+%r84 = add i384 %r78, %r83
+%r85 = trunc i384 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i352 @mulPv320x32(i32* %r4, i32 %r86)
+%r88 = zext i352 %r87 to i384
+%r89 = add i384 %r84, %r88
+%r90 = lshr i384 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i352 @mulPv320x32(i32* %r2, i32 %r93)
+%r95 = zext i352 %r94 to i384
+%r96 = add i384 %r90, %r95
+%r97 = trunc i384 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i352 @mulPv320x32(i32* %r4, i32 %r98)
+%r100 = zext i352 %r99 to i384
+%r101 = add i384 %r96, %r100
+%r102 = lshr i384 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i352 @mulPv320x32(i32* %r2, i32 %r105)
+%r107 = zext i352 %r106 to i384
+%r108 = add i384 %r102, %r107
+%r109 = trunc i384 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i352 @mulPv320x32(i32* %r4, i32 %r110)
+%r112 = zext i352 %r111 to i384
+%r113 = add i384 %r108, %r112
+%r114 = lshr i384 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 9
+%r117 = load i32, i32* %r116
+%r118 = call i352 @mulPv320x32(i32* %r2, i32 %r117)
+%r119 = zext i352 %r118 to i384
+%r120 = add i384 %r114, %r119
+%r121 = trunc i384 %r120 to i32
+%r122 = mul i32 %r121, %r7
+%r123 = call i352 @mulPv320x32(i32* %r4, i32 %r122)
+%r124 = zext i352 %r123 to i384
+%r125 = add i384 %r120, %r124
+%r126 = lshr i384 %r125, 32
+%r127 = trunc i384 %r126 to i352
+%r128 = load i32, i32* %r4
+%r129 = zext i32 %r128 to i64
+%r131 = getelementptr i32, i32* %r4, i32 1
+%r132 = load i32, i32* %r131
+%r133 = zext i32 %r132 to i64
+%r134 = shl i64 %r133, 32
+%r135 = or i64 %r129, %r134
+%r136 = zext i64 %r135 to i96
+%r138 = getelementptr i32, i32* %r4, i32 2
+%r139 = load i32, i32* %r138
+%r140 = zext i32 %r139 to i96
+%r141 = shl i96 %r140, 64
+%r142 = or i96 %r136, %r141
+%r143 = zext i96 %r142 to i128
+%r145 = getelementptr i32, i32* %r4, i32 3
+%r146 = load i32, i32* %r145
+%r147 = zext i32 %r146 to i128
+%r148 = shl i128 %r147, 96
+%r149 = or i128 %r143, %r148
+%r150 = zext i128 %r149 to i160
+%r152 = getelementptr i32, i32* %r4, i32 4
+%r153 = load i32, i32* %r152
+%r154 = zext i32 %r153 to i160
+%r155 = shl i160 %r154, 128
+%r156 = or i160 %r150, %r155
+%r157 = zext i160 %r156 to i192
+%r159 = getelementptr i32, i32* %r4, i32 5
+%r160 = load i32, i32* %r159
+%r161 = zext i32 %r160 to i192
+%r162 = shl i192 %r161, 160
+%r163 = or i192 %r157, %r162
+%r164 = zext i192 %r163 to i224
+%r166 = getelementptr i32, i32* %r4, i32 6
+%r167 = load i32, i32* %r166
+%r168 = zext i32 %r167 to i224
+%r169 = shl i224 %r168, 192
+%r170 = or i224 %r164, %r169
+%r171 = zext i224 %r170 to i256
+%r173 = getelementptr i32, i32* %r4, i32 7
+%r174 = load i32, i32* %r173
+%r175 = zext i32 %r174 to i256
+%r176 = shl i256 %r175, 224
+%r177 = or i256 %r171, %r176
+%r178 = zext i256 %r177 to i288
+%r180 = getelementptr i32, i32* %r4, i32 8
+%r181 = load i32, i32* %r180
+%r182 = zext i32 %r181 to i288
+%r183 = shl i288 %r182, 256
+%r184 = or i288 %r178, %r183
+%r185 = zext i288 %r184 to i320
+%r187 = getelementptr i32, i32* %r4, i32 9
+%r188 = load i32, i32* %r187
+%r189 = zext i32 %r188 to i320
+%r190 = shl i320 %r189, 288
+%r191 = or i320 %r185, %r190
+%r192 = zext i320 %r191 to i352
+%r193 = sub i352 %r127, %r192
+%r194 = lshr i352 %r193, 320
+%r195 = trunc i352 %r194 to i1
+%r196 = select i1 %r195, i352 %r127, i352 %r193
+%r197 = trunc i352 %r196 to i320
+%r198 = trunc i320 %r197 to i32
+%r200 = getelementptr i32, i32* %r1, i32 0
+store i32 %r198, i32* %r200
+%r201 = lshr i320 %r197, 32
+%r202 = trunc i320 %r201 to i32
+%r204 = getelementptr i32, i32* %r1, i32 1
+store i32 %r202, i32* %r204
+%r205 = lshr i320 %r201, 32
+%r206 = trunc i320 %r205 to i32
+%r208 = getelementptr i32, i32* %r1, i32 2
+store i32 %r206, i32* %r208
+%r209 = lshr i320 %r205, 32
+%r210 = trunc i320 %r209 to i32
+%r212 = getelementptr i32, i32* %r1, i32 3
+store i32 %r210, i32* %r212
+%r213 = lshr i320 %r209, 32
+%r214 = trunc i320 %r213 to i32
+%r216 = getelementptr i32, i32* %r1, i32 4
+store i32 %r214, i32* %r216
+%r217 = lshr i320 %r213, 32
+%r218 = trunc i320 %r217 to i32
+%r220 = getelementptr i32, i32* %r1, i32 5
+store i32 %r218, i32* %r220
+%r221 = lshr i320 %r217, 32
+%r222 = trunc i320 %r221 to i32
+%r224 = getelementptr i32, i32* %r1, i32 6
+store i32 %r222, i32* %r224
+%r225 = lshr i320 %r221, 32
+%r226 = trunc i320 %r225 to i32
+%r228 = getelementptr i32, i32* %r1, i32 7
+store i32 %r226, i32* %r228
+%r229 = lshr i320 %r225, 32
+%r230 = trunc i320 %r229 to i32
+%r232 = getelementptr i32, i32* %r1, i32 8
+store i32 %r230, i32* %r232
+%r233 = lshr i320 %r229, 32
+%r234 = trunc i320 %r233 to i32
+%r236 = getelementptr i32, i32* %r1, i32 9
+store i32 %r234, i32* %r236
+ret void
+}
+define void @mcl_fp_montNF10L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i352 @mulPv320x32(i32* %r2, i32 %r8)
+%r10 = trunc i352 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i352 @mulPv320x32(i32* %r4, i32 %r11)
+%r13 = add i352 %r9, %r12
+%r14 = lshr i352 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i352 @mulPv320x32(i32* %r2, i32 %r17)
+%r19 = add i352 %r14, %r18
+%r20 = trunc i352 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i352 @mulPv320x32(i32* %r4, i32 %r21)
+%r23 = add i352 %r19, %r22
+%r24 = lshr i352 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i352 @mulPv320x32(i32* %r2, i32 %r27)
+%r29 = add i352 %r24, %r28
+%r30 = trunc i352 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i352 @mulPv320x32(i32* %r4, i32 %r31)
+%r33 = add i352 %r29, %r32
+%r34 = lshr i352 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i352 @mulPv320x32(i32* %r2, i32 %r37)
+%r39 = add i352 %r34, %r38
+%r40 = trunc i352 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i352 @mulPv320x32(i32* %r4, i32 %r41)
+%r43 = add i352 %r39, %r42
+%r44 = lshr i352 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i352 @mulPv320x32(i32* %r2, i32 %r47)
+%r49 = add i352 %r44, %r48
+%r50 = trunc i352 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i352 @mulPv320x32(i32* %r4, i32 %r51)
+%r53 = add i352 %r49, %r52
+%r54 = lshr i352 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i352 @mulPv320x32(i32* %r2, i32 %r57)
+%r59 = add i352 %r54, %r58
+%r60 = trunc i352 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i352 @mulPv320x32(i32* %r4, i32 %r61)
+%r63 = add i352 %r59, %r62
+%r64 = lshr i352 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i352 @mulPv320x32(i32* %r2, i32 %r67)
+%r69 = add i352 %r64, %r68
+%r70 = trunc i352 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i352 @mulPv320x32(i32* %r4, i32 %r71)
+%r73 = add i352 %r69, %r72
+%r74 = lshr i352 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i352 @mulPv320x32(i32* %r2, i32 %r77)
+%r79 = add i352 %r74, %r78
+%r80 = trunc i352 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i352 @mulPv320x32(i32* %r4, i32 %r81)
+%r83 = add i352 %r79, %r82
+%r84 = lshr i352 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i352 @mulPv320x32(i32* %r2, i32 %r87)
+%r89 = add i352 %r84, %r88
+%r90 = trunc i352 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i352 @mulPv320x32(i32* %r4, i32 %r91)
+%r93 = add i352 %r89, %r92
+%r94 = lshr i352 %r93, 32
+%r96 = getelementptr i32, i32* %r3, i32 9
+%r97 = load i32, i32* %r96
+%r98 = call i352 @mulPv320x32(i32* %r2, i32 %r97)
+%r99 = add i352 %r94, %r98
+%r100 = trunc i352 %r99 to i32
+%r101 = mul i32 %r100, %r7
+%r102 = call i352 @mulPv320x32(i32* %r4, i32 %r101)
+%r103 = add i352 %r99, %r102
+%r104 = lshr i352 %r103, 32
+%r105 = trunc i352 %r104 to i320
+%r106 = load i32, i32* %r4
+%r107 = zext i32 %r106 to i64
+%r109 = getelementptr i32, i32* %r4, i32 1
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i64
+%r112 = shl i64 %r111, 32
+%r113 = or i64 %r107, %r112
+%r114 = zext i64 %r113 to i96
+%r116 = getelementptr i32, i32* %r4, i32 2
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i96
+%r119 = shl i96 %r118, 64
+%r120 = or i96 %r114, %r119
+%r121 = zext i96 %r120 to i128
+%r123 = getelementptr i32, i32* %r4, i32 3
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i128
+%r126 = shl i128 %r125, 96
+%r127 = or i128 %r121, %r126
+%r128 = zext i128 %r127 to i160
+%r130 = getelementptr i32, i32* %r4, i32 4
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i160
+%r133 = shl i160 %r132, 128
+%r134 = or i160 %r128, %r133
+%r135 = zext i160 %r134 to i192
+%r137 = getelementptr i32, i32* %r4, i32 5
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i192
+%r140 = shl i192 %r139, 160
+%r141 = or i192 %r135, %r140
+%r142 = zext i192 %r141 to i224
+%r144 = getelementptr i32, i32* %r4, i32 6
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i224
+%r147 = shl i224 %r146, 192
+%r148 = or i224 %r142, %r147
+%r149 = zext i224 %r148 to i256
+%r151 = getelementptr i32, i32* %r4, i32 7
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i256
+%r154 = shl i256 %r153, 224
+%r155 = or i256 %r149, %r154
+%r156 = zext i256 %r155 to i288
+%r158 = getelementptr i32, i32* %r4, i32 8
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i288
+%r161 = shl i288 %r160, 256
+%r162 = or i288 %r156, %r161
+%r163 = zext i288 %r162 to i320
+%r165 = getelementptr i32, i32* %r4, i32 9
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i320
+%r168 = shl i320 %r167, 288
+%r169 = or i320 %r163, %r168
+%r170 = sub i320 %r105, %r169
+%r171 = lshr i320 %r170, 319
+%r172 = trunc i320 %r171 to i1
+%r173 = select i1 %r172, i320 %r105, i320 %r170
+%r174 = trunc i320 %r173 to i32
+%r176 = getelementptr i32, i32* %r1, i32 0
+store i32 %r174, i32* %r176
+%r177 = lshr i320 %r173, 32
+%r178 = trunc i320 %r177 to i32
+%r180 = getelementptr i32, i32* %r1, i32 1
+store i32 %r178, i32* %r180
+%r181 = lshr i320 %r177, 32
+%r182 = trunc i320 %r181 to i32
+%r184 = getelementptr i32, i32* %r1, i32 2
+store i32 %r182, i32* %r184
+%r185 = lshr i320 %r181, 32
+%r186 = trunc i320 %r185 to i32
+%r188 = getelementptr i32, i32* %r1, i32 3
+store i32 %r186, i32* %r188
+%r189 = lshr i320 %r185, 32
+%r190 = trunc i320 %r189 to i32
+%r192 = getelementptr i32, i32* %r1, i32 4
+store i32 %r190, i32* %r192
+%r193 = lshr i320 %r189, 32
+%r194 = trunc i320 %r193 to i32
+%r196 = getelementptr i32, i32* %r1, i32 5
+store i32 %r194, i32* %r196
+%r197 = lshr i320 %r193, 32
+%r198 = trunc i320 %r197 to i32
+%r200 = getelementptr i32, i32* %r1, i32 6
+store i32 %r198, i32* %r200
+%r201 = lshr i320 %r197, 32
+%r202 = trunc i320 %r201 to i32
+%r204 = getelementptr i32, i32* %r1, i32 7
+store i32 %r202, i32* %r204
+%r205 = lshr i320 %r201, 32
+%r206 = trunc i320 %r205 to i32
+%r208 = getelementptr i32, i32* %r1, i32 8
+store i32 %r206, i32* %r208
+%r209 = lshr i320 %r205, 32
+%r210 = trunc i320 %r209 to i32
+%r212 = getelementptr i32, i32* %r1, i32 9
+store i32 %r210, i32* %r212
+ret void
+}
+define void @mcl_fp_montRed10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = load i32, i32* %r2
+%r72 = zext i32 %r71 to i64
+%r74 = getelementptr i32, i32* %r2, i32 1
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i64
+%r77 = shl i64 %r76, 32
+%r78 = or i64 %r72, %r77
+%r79 = zext i64 %r78 to i96
+%r81 = getelementptr i32, i32* %r2, i32 2
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i96
+%r84 = shl i96 %r83, 64
+%r85 = or i96 %r79, %r84
+%r86 = zext i96 %r85 to i128
+%r88 = getelementptr i32, i32* %r2, i32 3
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i128
+%r91 = shl i128 %r90, 96
+%r92 = or i128 %r86, %r91
+%r93 = zext i128 %r92 to i160
+%r95 = getelementptr i32, i32* %r2, i32 4
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i160
+%r98 = shl i160 %r97, 128
+%r99 = or i160 %r93, %r98
+%r100 = zext i160 %r99 to i192
+%r102 = getelementptr i32, i32* %r2, i32 5
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i192
+%r105 = shl i192 %r104, 160
+%r106 = or i192 %r100, %r105
+%r107 = zext i192 %r106 to i224
+%r109 = getelementptr i32, i32* %r2, i32 6
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i224
+%r112 = shl i224 %r111, 192
+%r113 = or i224 %r107, %r112
+%r114 = zext i224 %r113 to i256
+%r116 = getelementptr i32, i32* %r2, i32 7
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i256
+%r119 = shl i256 %r118, 224
+%r120 = or i256 %r114, %r119
+%r121 = zext i256 %r120 to i288
+%r123 = getelementptr i32, i32* %r2, i32 8
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i288
+%r126 = shl i288 %r125, 256
+%r127 = or i288 %r121, %r126
+%r128 = zext i288 %r127 to i320
+%r130 = getelementptr i32, i32* %r2, i32 9
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i320
+%r133 = shl i320 %r132, 288
+%r134 = or i320 %r128, %r133
+%r135 = zext i320 %r134 to i352
+%r137 = getelementptr i32, i32* %r2, i32 10
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i352
+%r140 = shl i352 %r139, 320
+%r141 = or i352 %r135, %r140
+%r142 = zext i352 %r141 to i384
+%r144 = getelementptr i32, i32* %r2, i32 11
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i384
+%r147 = shl i384 %r146, 352
+%r148 = or i384 %r142, %r147
+%r149 = zext i384 %r148 to i416
+%r151 = getelementptr i32, i32* %r2, i32 12
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i416
+%r154 = shl i416 %r153, 384
+%r155 = or i416 %r149, %r154
+%r156 = zext i416 %r155 to i448
+%r158 = getelementptr i32, i32* %r2, i32 13
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i448
+%r161 = shl i448 %r160, 416
+%r162 = or i448 %r156, %r161
+%r163 = zext i448 %r162 to i480
+%r165 = getelementptr i32, i32* %r2, i32 14
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i480
+%r168 = shl i480 %r167, 448
+%r169 = or i480 %r163, %r168
+%r170 = zext i480 %r169 to i512
+%r172 = getelementptr i32, i32* %r2, i32 15
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i512
+%r175 = shl i512 %r174, 480
+%r176 = or i512 %r170, %r175
+%r177 = zext i512 %r176 to i544
+%r179 = getelementptr i32, i32* %r2, i32 16
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i544
+%r182 = shl i544 %r181, 512
+%r183 = or i544 %r177, %r182
+%r184 = zext i544 %r183 to i576
+%r186 = getelementptr i32, i32* %r2, i32 17
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i576
+%r189 = shl i576 %r188, 544
+%r190 = or i576 %r184, %r189
+%r191 = zext i576 %r190 to i608
+%r193 = getelementptr i32, i32* %r2, i32 18
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i608
+%r196 = shl i608 %r195, 576
+%r197 = or i608 %r191, %r196
+%r198 = zext i608 %r197 to i640
+%r200 = getelementptr i32, i32* %r2, i32 19
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i640
+%r203 = shl i640 %r202, 608
+%r204 = or i640 %r198, %r203
+%r205 = zext i640 %r204 to i672
+%r206 = trunc i672 %r205 to i32
+%r207 = mul i32 %r206, %r6
+%r208 = call i352 @mulPv320x32(i32* %r3, i32 %r207)
+%r209 = zext i352 %r208 to i672
+%r210 = add i672 %r205, %r209
+%r211 = lshr i672 %r210, 32
+%r212 = trunc i672 %r211 to i640
+%r213 = trunc i640 %r212 to i32
+%r214 = mul i32 %r213, %r6
+%r215 = call i352 @mulPv320x32(i32* %r3, i32 %r214)
+%r216 = zext i352 %r215 to i640
+%r217 = add i640 %r212, %r216
+%r218 = lshr i640 %r217, 32
+%r219 = trunc i640 %r218 to i608
+%r220 = trunc i608 %r219 to i32
+%r221 = mul i32 %r220, %r6
+%r222 = call i352 @mulPv320x32(i32* %r3, i32 %r221)
+%r223 = zext i352 %r222 to i608
+%r224 = add i608 %r219, %r223
+%r225 = lshr i608 %r224, 32
+%r226 = trunc i608 %r225 to i576
+%r227 = trunc i576 %r226 to i32
+%r228 = mul i32 %r227, %r6
+%r229 = call i352 @mulPv320x32(i32* %r3, i32 %r228)
+%r230 = zext i352 %r229 to i576
+%r231 = add i576 %r226, %r230
+%r232 = lshr i576 %r231, 32
+%r233 = trunc i576 %r232 to i544
+%r234 = trunc i544 %r233 to i32
+%r235 = mul i32 %r234, %r6
+%r236 = call i352 @mulPv320x32(i32* %r3, i32 %r235)
+%r237 = zext i352 %r236 to i544
+%r238 = add i544 %r233, %r237
+%r239 = lshr i544 %r238, 32
+%r240 = trunc i544 %r239 to i512
+%r241 = trunc i512 %r240 to i32
+%r242 = mul i32 %r241, %r6
+%r243 = call i352 @mulPv320x32(i32* %r3, i32 %r242)
+%r244 = zext i352 %r243 to i512
+%r245 = add i512 %r240, %r244
+%r246 = lshr i512 %r245, 32
+%r247 = trunc i512 %r246 to i480
+%r248 = trunc i480 %r247 to i32
+%r249 = mul i32 %r248, %r6
+%r250 = call i352 @mulPv320x32(i32* %r3, i32 %r249)
+%r251 = zext i352 %r250 to i480
+%r252 = add i480 %r247, %r251
+%r253 = lshr i480 %r252, 32
+%r254 = trunc i480 %r253 to i448
+%r255 = trunc i448 %r254 to i32
+%r256 = mul i32 %r255, %r6
+%r257 = call i352 @mulPv320x32(i32* %r3, i32 %r256)
+%r258 = zext i352 %r257 to i448
+%r259 = add i448 %r254, %r258
+%r260 = lshr i448 %r259, 32
+%r261 = trunc i448 %r260 to i416
+%r262 = trunc i416 %r261 to i32
+%r263 = mul i32 %r262, %r6
+%r264 = call i352 @mulPv320x32(i32* %r3, i32 %r263)
+%r265 = zext i352 %r264 to i416
+%r266 = add i416 %r261, %r265
+%r267 = lshr i416 %r266, 32
+%r268 = trunc i416 %r267 to i384
+%r269 = trunc i384 %r268 to i32
+%r270 = mul i32 %r269, %r6
+%r271 = call i352 @mulPv320x32(i32* %r3, i32 %r270)
+%r272 = zext i352 %r271 to i384
+%r273 = add i384 %r268, %r272
+%r274 = lshr i384 %r273, 32
+%r275 = trunc i384 %r274 to i352
+%r276 = zext i320 %r70 to i352
+%r277 = sub i352 %r275, %r276
+%r278 = lshr i352 %r277, 320
+%r279 = trunc i352 %r278 to i1
+%r280 = select i1 %r279, i352 %r275, i352 %r277
+%r281 = trunc i352 %r280 to i320
+%r282 = trunc i320 %r281 to i32
+%r284 = getelementptr i32, i32* %r1, i32 0
+store i32 %r282, i32* %r284
+%r285 = lshr i320 %r281, 32
+%r286 = trunc i320 %r285 to i32
+%r288 = getelementptr i32, i32* %r1, i32 1
+store i32 %r286, i32* %r288
+%r289 = lshr i320 %r285, 32
+%r290 = trunc i320 %r289 to i32
+%r292 = getelementptr i32, i32* %r1, i32 2
+store i32 %r290, i32* %r292
+%r293 = lshr i320 %r289, 32
+%r294 = trunc i320 %r293 to i32
+%r296 = getelementptr i32, i32* %r1, i32 3
+store i32 %r294, i32* %r296
+%r297 = lshr i320 %r293, 32
+%r298 = trunc i320 %r297 to i32
+%r300 = getelementptr i32, i32* %r1, i32 4
+store i32 %r298, i32* %r300
+%r301 = lshr i320 %r297, 32
+%r302 = trunc i320 %r301 to i32
+%r304 = getelementptr i32, i32* %r1, i32 5
+store i32 %r302, i32* %r304
+%r305 = lshr i320 %r301, 32
+%r306 = trunc i320 %r305 to i32
+%r308 = getelementptr i32, i32* %r1, i32 6
+store i32 %r306, i32* %r308
+%r309 = lshr i320 %r305, 32
+%r310 = trunc i320 %r309 to i32
+%r312 = getelementptr i32, i32* %r1, i32 7
+store i32 %r310, i32* %r312
+%r313 = lshr i320 %r309, 32
+%r314 = trunc i320 %r313 to i32
+%r316 = getelementptr i32, i32* %r1, i32 8
+store i32 %r314, i32* %r316
+%r317 = lshr i320 %r313, 32
+%r318 = trunc i320 %r317 to i32
+%r320 = getelementptr i32, i32* %r1, i32 9
+store i32 %r318, i32* %r320
+ret void
+}
+define i32 @mcl_fp_addPre10L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r70 = load i32, i32* %r4
+%r71 = zext i32 %r70 to i64
+%r73 = getelementptr i32, i32* %r4, i32 1
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i64
+%r76 = shl i64 %r75, 32
+%r77 = or i64 %r71, %r76
+%r78 = zext i64 %r77 to i96
+%r80 = getelementptr i32, i32* %r4, i32 2
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i96
+%r83 = shl i96 %r82, 64
+%r84 = or i96 %r78, %r83
+%r85 = zext i96 %r84 to i128
+%r87 = getelementptr i32, i32* %r4, i32 3
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i128
+%r90 = shl i128 %r89, 96
+%r91 = or i128 %r85, %r90
+%r92 = zext i128 %r91 to i160
+%r94 = getelementptr i32, i32* %r4, i32 4
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i160
+%r97 = shl i160 %r96, 128
+%r98 = or i160 %r92, %r97
+%r99 = zext i160 %r98 to i192
+%r101 = getelementptr i32, i32* %r4, i32 5
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i192
+%r104 = shl i192 %r103, 160
+%r105 = or i192 %r99, %r104
+%r106 = zext i192 %r105 to i224
+%r108 = getelementptr i32, i32* %r4, i32 6
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i224
+%r111 = shl i224 %r110, 192
+%r112 = or i224 %r106, %r111
+%r113 = zext i224 %r112 to i256
+%r115 = getelementptr i32, i32* %r4, i32 7
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i256
+%r118 = shl i256 %r117, 224
+%r119 = or i256 %r113, %r118
+%r120 = zext i256 %r119 to i288
+%r122 = getelementptr i32, i32* %r4, i32 8
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i288
+%r125 = shl i288 %r124, 256
+%r126 = or i288 %r120, %r125
+%r127 = zext i288 %r126 to i320
+%r129 = getelementptr i32, i32* %r4, i32 9
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i320
+%r132 = shl i320 %r131, 288
+%r133 = or i320 %r127, %r132
+%r134 = zext i320 %r133 to i352
+%r135 = add i352 %r69, %r134
+%r136 = trunc i352 %r135 to i320
+%r137 = trunc i320 %r136 to i32
+%r139 = getelementptr i32, i32* %r2, i32 0
+store i32 %r137, i32* %r139
+%r140 = lshr i320 %r136, 32
+%r141 = trunc i320 %r140 to i32
+%r143 = getelementptr i32, i32* %r2, i32 1
+store i32 %r141, i32* %r143
+%r144 = lshr i320 %r140, 32
+%r145 = trunc i320 %r144 to i32
+%r147 = getelementptr i32, i32* %r2, i32 2
+store i32 %r145, i32* %r147
+%r148 = lshr i320 %r144, 32
+%r149 = trunc i320 %r148 to i32
+%r151 = getelementptr i32, i32* %r2, i32 3
+store i32 %r149, i32* %r151
+%r152 = lshr i320 %r148, 32
+%r153 = trunc i320 %r152 to i32
+%r155 = getelementptr i32, i32* %r2, i32 4
+store i32 %r153, i32* %r155
+%r156 = lshr i320 %r152, 32
+%r157 = trunc i320 %r156 to i32
+%r159 = getelementptr i32, i32* %r2, i32 5
+store i32 %r157, i32* %r159
+%r160 = lshr i320 %r156, 32
+%r161 = trunc i320 %r160 to i32
+%r163 = getelementptr i32, i32* %r2, i32 6
+store i32 %r161, i32* %r163
+%r164 = lshr i320 %r160, 32
+%r165 = trunc i320 %r164 to i32
+%r167 = getelementptr i32, i32* %r2, i32 7
+store i32 %r165, i32* %r167
+%r168 = lshr i320 %r164, 32
+%r169 = trunc i320 %r168 to i32
+%r171 = getelementptr i32, i32* %r2, i32 8
+store i32 %r169, i32* %r171
+%r172 = lshr i320 %r168, 32
+%r173 = trunc i320 %r172 to i32
+%r175 = getelementptr i32, i32* %r2, i32 9
+store i32 %r173, i32* %r175
+%r176 = lshr i352 %r135, 320
+%r177 = trunc i352 %r176 to i32
+ret i32 %r177
+}
+define i32 @mcl_fp_subPre10L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r70 = load i32, i32* %r4
+%r71 = zext i32 %r70 to i64
+%r73 = getelementptr i32, i32* %r4, i32 1
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i64
+%r76 = shl i64 %r75, 32
+%r77 = or i64 %r71, %r76
+%r78 = zext i64 %r77 to i96
+%r80 = getelementptr i32, i32* %r4, i32 2
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i96
+%r83 = shl i96 %r82, 64
+%r84 = or i96 %r78, %r83
+%r85 = zext i96 %r84 to i128
+%r87 = getelementptr i32, i32* %r4, i32 3
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i128
+%r90 = shl i128 %r89, 96
+%r91 = or i128 %r85, %r90
+%r92 = zext i128 %r91 to i160
+%r94 = getelementptr i32, i32* %r4, i32 4
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i160
+%r97 = shl i160 %r96, 128
+%r98 = or i160 %r92, %r97
+%r99 = zext i160 %r98 to i192
+%r101 = getelementptr i32, i32* %r4, i32 5
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i192
+%r104 = shl i192 %r103, 160
+%r105 = or i192 %r99, %r104
+%r106 = zext i192 %r105 to i224
+%r108 = getelementptr i32, i32* %r4, i32 6
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i224
+%r111 = shl i224 %r110, 192
+%r112 = or i224 %r106, %r111
+%r113 = zext i224 %r112 to i256
+%r115 = getelementptr i32, i32* %r4, i32 7
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i256
+%r118 = shl i256 %r117, 224
+%r119 = or i256 %r113, %r118
+%r120 = zext i256 %r119 to i288
+%r122 = getelementptr i32, i32* %r4, i32 8
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i288
+%r125 = shl i288 %r124, 256
+%r126 = or i288 %r120, %r125
+%r127 = zext i288 %r126 to i320
+%r129 = getelementptr i32, i32* %r4, i32 9
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i320
+%r132 = shl i320 %r131, 288
+%r133 = or i320 %r127, %r132
+%r134 = zext i320 %r133 to i352
+%r135 = sub i352 %r69, %r134
+%r136 = trunc i352 %r135 to i320
+%r137 = trunc i320 %r136 to i32
+%r139 = getelementptr i32, i32* %r2, i32 0
+store i32 %r137, i32* %r139
+%r140 = lshr i320 %r136, 32
+%r141 = trunc i320 %r140 to i32
+%r143 = getelementptr i32, i32* %r2, i32 1
+store i32 %r141, i32* %r143
+%r144 = lshr i320 %r140, 32
+%r145 = trunc i320 %r144 to i32
+%r147 = getelementptr i32, i32* %r2, i32 2
+store i32 %r145, i32* %r147
+%r148 = lshr i320 %r144, 32
+%r149 = trunc i320 %r148 to i32
+%r151 = getelementptr i32, i32* %r2, i32 3
+store i32 %r149, i32* %r151
+%r152 = lshr i320 %r148, 32
+%r153 = trunc i320 %r152 to i32
+%r155 = getelementptr i32, i32* %r2, i32 4
+store i32 %r153, i32* %r155
+%r156 = lshr i320 %r152, 32
+%r157 = trunc i320 %r156 to i32
+%r159 = getelementptr i32, i32* %r2, i32 5
+store i32 %r157, i32* %r159
+%r160 = lshr i320 %r156, 32
+%r161 = trunc i320 %r160 to i32
+%r163 = getelementptr i32, i32* %r2, i32 6
+store i32 %r161, i32* %r163
+%r164 = lshr i320 %r160, 32
+%r165 = trunc i320 %r164 to i32
+%r167 = getelementptr i32, i32* %r2, i32 7
+store i32 %r165, i32* %r167
+%r168 = lshr i320 %r164, 32
+%r169 = trunc i320 %r168 to i32
+%r171 = getelementptr i32, i32* %r2, i32 8
+store i32 %r169, i32* %r171
+%r172 = lshr i320 %r168, 32
+%r173 = trunc i320 %r172 to i32
+%r175 = getelementptr i32, i32* %r2, i32 9
+store i32 %r173, i32* %r175
+%r176 = lshr i352 %r135, 320
+%r177 = trunc i352 %r176 to i32
+%r179 = and i32 %r177, 1
+ret i32 %r179
+}
+define void @mcl_fp_shr1_10L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = zext i256 %r52 to i288
+%r55 = getelementptr i32, i32* %r2, i32 8
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i288
+%r58 = shl i288 %r57, 256
+%r59 = or i288 %r53, %r58
+%r60 = zext i288 %r59 to i320
+%r62 = getelementptr i32, i32* %r2, i32 9
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i320
+%r65 = shl i320 %r64, 288
+%r66 = or i320 %r60, %r65
+%r67 = lshr i320 %r66, 1
+%r68 = trunc i320 %r67 to i32
+%r70 = getelementptr i32, i32* %r1, i32 0
+store i32 %r68, i32* %r70
+%r71 = lshr i320 %r67, 32
+%r72 = trunc i320 %r71 to i32
+%r74 = getelementptr i32, i32* %r1, i32 1
+store i32 %r72, i32* %r74
+%r75 = lshr i320 %r71, 32
+%r76 = trunc i320 %r75 to i32
+%r78 = getelementptr i32, i32* %r1, i32 2
+store i32 %r76, i32* %r78
+%r79 = lshr i320 %r75, 32
+%r80 = trunc i320 %r79 to i32
+%r82 = getelementptr i32, i32* %r1, i32 3
+store i32 %r80, i32* %r82
+%r83 = lshr i320 %r79, 32
+%r84 = trunc i320 %r83 to i32
+%r86 = getelementptr i32, i32* %r1, i32 4
+store i32 %r84, i32* %r86
+%r87 = lshr i320 %r83, 32
+%r88 = trunc i320 %r87 to i32
+%r90 = getelementptr i32, i32* %r1, i32 5
+store i32 %r88, i32* %r90
+%r91 = lshr i320 %r87, 32
+%r92 = trunc i320 %r91 to i32
+%r94 = getelementptr i32, i32* %r1, i32 6
+store i32 %r92, i32* %r94
+%r95 = lshr i320 %r91, 32
+%r96 = trunc i320 %r95 to i32
+%r98 = getelementptr i32, i32* %r1, i32 7
+store i32 %r96, i32* %r98
+%r99 = lshr i320 %r95, 32
+%r100 = trunc i320 %r99 to i32
+%r102 = getelementptr i32, i32* %r1, i32 8
+store i32 %r100, i32* %r102
+%r103 = lshr i320 %r99, 32
+%r104 = trunc i320 %r103 to i32
+%r106 = getelementptr i32, i32* %r1, i32 9
+store i32 %r104, i32* %r106
+ret void
+}
+define void @mcl_fp_add10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = load i32, i32* %r3
+%r70 = zext i32 %r69 to i64
+%r72 = getelementptr i32, i32* %r3, i32 1
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i64
+%r75 = shl i64 %r74, 32
+%r76 = or i64 %r70, %r75
+%r77 = zext i64 %r76 to i96
+%r79 = getelementptr i32, i32* %r3, i32 2
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i96
+%r82 = shl i96 %r81, 64
+%r83 = or i96 %r77, %r82
+%r84 = zext i96 %r83 to i128
+%r86 = getelementptr i32, i32* %r3, i32 3
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i128
+%r89 = shl i128 %r88, 96
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i160
+%r93 = getelementptr i32, i32* %r3, i32 4
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i160
+%r96 = shl i160 %r95, 128
+%r97 = or i160 %r91, %r96
+%r98 = zext i160 %r97 to i192
+%r100 = getelementptr i32, i32* %r3, i32 5
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i192
+%r103 = shl i192 %r102, 160
+%r104 = or i192 %r98, %r103
+%r105 = zext i192 %r104 to i224
+%r107 = getelementptr i32, i32* %r3, i32 6
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i224
+%r110 = shl i224 %r109, 192
+%r111 = or i224 %r105, %r110
+%r112 = zext i224 %r111 to i256
+%r114 = getelementptr i32, i32* %r3, i32 7
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i256
+%r117 = shl i256 %r116, 224
+%r118 = or i256 %r112, %r117
+%r119 = zext i256 %r118 to i288
+%r121 = getelementptr i32, i32* %r3, i32 8
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i288
+%r124 = shl i288 %r123, 256
+%r125 = or i288 %r119, %r124
+%r126 = zext i288 %r125 to i320
+%r128 = getelementptr i32, i32* %r3, i32 9
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i320
+%r131 = shl i320 %r130, 288
+%r132 = or i320 %r126, %r131
+%r133 = zext i320 %r68 to i352
+%r134 = zext i320 %r132 to i352
+%r135 = add i352 %r133, %r134
+%r136 = trunc i352 %r135 to i320
+%r137 = trunc i320 %r136 to i32
+%r139 = getelementptr i32, i32* %r1, i32 0
+store i32 %r137, i32* %r139
+%r140 = lshr i320 %r136, 32
+%r141 = trunc i320 %r140 to i32
+%r143 = getelementptr i32, i32* %r1, i32 1
+store i32 %r141, i32* %r143
+%r144 = lshr i320 %r140, 32
+%r145 = trunc i320 %r144 to i32
+%r147 = getelementptr i32, i32* %r1, i32 2
+store i32 %r145, i32* %r147
+%r148 = lshr i320 %r144, 32
+%r149 = trunc i320 %r148 to i32
+%r151 = getelementptr i32, i32* %r1, i32 3
+store i32 %r149, i32* %r151
+%r152 = lshr i320 %r148, 32
+%r153 = trunc i320 %r152 to i32
+%r155 = getelementptr i32, i32* %r1, i32 4
+store i32 %r153, i32* %r155
+%r156 = lshr i320 %r152, 32
+%r157 = trunc i320 %r156 to i32
+%r159 = getelementptr i32, i32* %r1, i32 5
+store i32 %r157, i32* %r159
+%r160 = lshr i320 %r156, 32
+%r161 = trunc i320 %r160 to i32
+%r163 = getelementptr i32, i32* %r1, i32 6
+store i32 %r161, i32* %r163
+%r164 = lshr i320 %r160, 32
+%r165 = trunc i320 %r164 to i32
+%r167 = getelementptr i32, i32* %r1, i32 7
+store i32 %r165, i32* %r167
+%r168 = lshr i320 %r164, 32
+%r169 = trunc i320 %r168 to i32
+%r171 = getelementptr i32, i32* %r1, i32 8
+store i32 %r169, i32* %r171
+%r172 = lshr i320 %r168, 32
+%r173 = trunc i320 %r172 to i32
+%r175 = getelementptr i32, i32* %r1, i32 9
+store i32 %r173, i32* %r175
+%r176 = load i32, i32* %r4
+%r177 = zext i32 %r176 to i64
+%r179 = getelementptr i32, i32* %r4, i32 1
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i64
+%r182 = shl i64 %r181, 32
+%r183 = or i64 %r177, %r182
+%r184 = zext i64 %r183 to i96
+%r186 = getelementptr i32, i32* %r4, i32 2
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i96
+%r189 = shl i96 %r188, 64
+%r190 = or i96 %r184, %r189
+%r191 = zext i96 %r190 to i128
+%r193 = getelementptr i32, i32* %r4, i32 3
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i128
+%r196 = shl i128 %r195, 96
+%r197 = or i128 %r191, %r196
+%r198 = zext i128 %r197 to i160
+%r200 = getelementptr i32, i32* %r4, i32 4
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i160
+%r203 = shl i160 %r202, 128
+%r204 = or i160 %r198, %r203
+%r205 = zext i160 %r204 to i192
+%r207 = getelementptr i32, i32* %r4, i32 5
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i192
+%r210 = shl i192 %r209, 160
+%r211 = or i192 %r205, %r210
+%r212 = zext i192 %r211 to i224
+%r214 = getelementptr i32, i32* %r4, i32 6
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i224
+%r217 = shl i224 %r216, 192
+%r218 = or i224 %r212, %r217
+%r219 = zext i224 %r218 to i256
+%r221 = getelementptr i32, i32* %r4, i32 7
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i256
+%r224 = shl i256 %r223, 224
+%r225 = or i256 %r219, %r224
+%r226 = zext i256 %r225 to i288
+%r228 = getelementptr i32, i32* %r4, i32 8
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i288
+%r231 = shl i288 %r230, 256
+%r232 = or i288 %r226, %r231
+%r233 = zext i288 %r232 to i320
+%r235 = getelementptr i32, i32* %r4, i32 9
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i320
+%r238 = shl i320 %r237, 288
+%r239 = or i320 %r233, %r238
+%r240 = zext i320 %r239 to i352
+%r241 = sub i352 %r135, %r240
+%r242 = lshr i352 %r241, 320
+%r243 = trunc i352 %r242 to i1
+br i1%r243, label %carry, label %nocarry
+nocarry:
+%r244 = trunc i352 %r241 to i320
+%r245 = trunc i320 %r244 to i32
+%r247 = getelementptr i32, i32* %r1, i32 0
+store i32 %r245, i32* %r247
+%r248 = lshr i320 %r244, 32
+%r249 = trunc i320 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 1
+store i32 %r249, i32* %r251
+%r252 = lshr i320 %r248, 32
+%r253 = trunc i320 %r252 to i32
+%r255 = getelementptr i32, i32* %r1, i32 2
+store i32 %r253, i32* %r255
+%r256 = lshr i320 %r252, 32
+%r257 = trunc i320 %r256 to i32
+%r259 = getelementptr i32, i32* %r1, i32 3
+store i32 %r257, i32* %r259
+%r260 = lshr i320 %r256, 32
+%r261 = trunc i320 %r260 to i32
+%r263 = getelementptr i32, i32* %r1, i32 4
+store i32 %r261, i32* %r263
+%r264 = lshr i320 %r260, 32
+%r265 = trunc i320 %r264 to i32
+%r267 = getelementptr i32, i32* %r1, i32 5
+store i32 %r265, i32* %r267
+%r268 = lshr i320 %r264, 32
+%r269 = trunc i320 %r268 to i32
+%r271 = getelementptr i32, i32* %r1, i32 6
+store i32 %r269, i32* %r271
+%r272 = lshr i320 %r268, 32
+%r273 = trunc i320 %r272 to i32
+%r275 = getelementptr i32, i32* %r1, i32 7
+store i32 %r273, i32* %r275
+%r276 = lshr i320 %r272, 32
+%r277 = trunc i320 %r276 to i32
+%r279 = getelementptr i32, i32* %r1, i32 8
+store i32 %r277, i32* %r279
+%r280 = lshr i320 %r276, 32
+%r281 = trunc i320 %r280 to i32
+%r283 = getelementptr i32, i32* %r1, i32 9
+store i32 %r281, i32* %r283
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = load i32, i32* %r3
+%r70 = zext i32 %r69 to i64
+%r72 = getelementptr i32, i32* %r3, i32 1
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i64
+%r75 = shl i64 %r74, 32
+%r76 = or i64 %r70, %r75
+%r77 = zext i64 %r76 to i96
+%r79 = getelementptr i32, i32* %r3, i32 2
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i96
+%r82 = shl i96 %r81, 64
+%r83 = or i96 %r77, %r82
+%r84 = zext i96 %r83 to i128
+%r86 = getelementptr i32, i32* %r3, i32 3
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i128
+%r89 = shl i128 %r88, 96
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i160
+%r93 = getelementptr i32, i32* %r3, i32 4
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i160
+%r96 = shl i160 %r95, 128
+%r97 = or i160 %r91, %r96
+%r98 = zext i160 %r97 to i192
+%r100 = getelementptr i32, i32* %r3, i32 5
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i192
+%r103 = shl i192 %r102, 160
+%r104 = or i192 %r98, %r103
+%r105 = zext i192 %r104 to i224
+%r107 = getelementptr i32, i32* %r3, i32 6
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i224
+%r110 = shl i224 %r109, 192
+%r111 = or i224 %r105, %r110
+%r112 = zext i224 %r111 to i256
+%r114 = getelementptr i32, i32* %r3, i32 7
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i256
+%r117 = shl i256 %r116, 224
+%r118 = or i256 %r112, %r117
+%r119 = zext i256 %r118 to i288
+%r121 = getelementptr i32, i32* %r3, i32 8
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i288
+%r124 = shl i288 %r123, 256
+%r125 = or i288 %r119, %r124
+%r126 = zext i288 %r125 to i320
+%r128 = getelementptr i32, i32* %r3, i32 9
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i320
+%r131 = shl i320 %r130, 288
+%r132 = or i320 %r126, %r131
+%r133 = add i320 %r68, %r132
+%r134 = load i32, i32* %r4
+%r135 = zext i32 %r134 to i64
+%r137 = getelementptr i32, i32* %r4, i32 1
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i64
+%r140 = shl i64 %r139, 32
+%r141 = or i64 %r135, %r140
+%r142 = zext i64 %r141 to i96
+%r144 = getelementptr i32, i32* %r4, i32 2
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i96
+%r147 = shl i96 %r146, 64
+%r148 = or i96 %r142, %r147
+%r149 = zext i96 %r148 to i128
+%r151 = getelementptr i32, i32* %r4, i32 3
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i128
+%r154 = shl i128 %r153, 96
+%r155 = or i128 %r149, %r154
+%r156 = zext i128 %r155 to i160
+%r158 = getelementptr i32, i32* %r4, i32 4
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i160
+%r161 = shl i160 %r160, 128
+%r162 = or i160 %r156, %r161
+%r163 = zext i160 %r162 to i192
+%r165 = getelementptr i32, i32* %r4, i32 5
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i192
+%r168 = shl i192 %r167, 160
+%r169 = or i192 %r163, %r168
+%r170 = zext i192 %r169 to i224
+%r172 = getelementptr i32, i32* %r4, i32 6
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i224
+%r175 = shl i224 %r174, 192
+%r176 = or i224 %r170, %r175
+%r177 = zext i224 %r176 to i256
+%r179 = getelementptr i32, i32* %r4, i32 7
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i256
+%r182 = shl i256 %r181, 224
+%r183 = or i256 %r177, %r182
+%r184 = zext i256 %r183 to i288
+%r186 = getelementptr i32, i32* %r4, i32 8
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i288
+%r189 = shl i288 %r188, 256
+%r190 = or i288 %r184, %r189
+%r191 = zext i288 %r190 to i320
+%r193 = getelementptr i32, i32* %r4, i32 9
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i320
+%r196 = shl i320 %r195, 288
+%r197 = or i320 %r191, %r196
+%r198 = sub i320 %r133, %r197
+%r199 = lshr i320 %r198, 319
+%r200 = trunc i320 %r199 to i1
+%r201 = select i1 %r200, i320 %r133, i320 %r198
+%r202 = trunc i320 %r201 to i32
+%r204 = getelementptr i32, i32* %r1, i32 0
+store i32 %r202, i32* %r204
+%r205 = lshr i320 %r201, 32
+%r206 = trunc i320 %r205 to i32
+%r208 = getelementptr i32, i32* %r1, i32 1
+store i32 %r206, i32* %r208
+%r209 = lshr i320 %r205, 32
+%r210 = trunc i320 %r209 to i32
+%r212 = getelementptr i32, i32* %r1, i32 2
+store i32 %r210, i32* %r212
+%r213 = lshr i320 %r209, 32
+%r214 = trunc i320 %r213 to i32
+%r216 = getelementptr i32, i32* %r1, i32 3
+store i32 %r214, i32* %r216
+%r217 = lshr i320 %r213, 32
+%r218 = trunc i320 %r217 to i32
+%r220 = getelementptr i32, i32* %r1, i32 4
+store i32 %r218, i32* %r220
+%r221 = lshr i320 %r217, 32
+%r222 = trunc i320 %r221 to i32
+%r224 = getelementptr i32, i32* %r1, i32 5
+store i32 %r222, i32* %r224
+%r225 = lshr i320 %r221, 32
+%r226 = trunc i320 %r225 to i32
+%r228 = getelementptr i32, i32* %r1, i32 6
+store i32 %r226, i32* %r228
+%r229 = lshr i320 %r225, 32
+%r230 = trunc i320 %r229 to i32
+%r232 = getelementptr i32, i32* %r1, i32 7
+store i32 %r230, i32* %r232
+%r233 = lshr i320 %r229, 32
+%r234 = trunc i320 %r233 to i32
+%r236 = getelementptr i32, i32* %r1, i32 8
+store i32 %r234, i32* %r236
+%r237 = lshr i320 %r233, 32
+%r238 = trunc i320 %r237 to i32
+%r240 = getelementptr i32, i32* %r1, i32 9
+store i32 %r238, i32* %r240
+ret void
+}
+define void @mcl_fp_sub10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = load i32, i32* %r3
+%r70 = zext i32 %r69 to i64
+%r72 = getelementptr i32, i32* %r3, i32 1
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i64
+%r75 = shl i64 %r74, 32
+%r76 = or i64 %r70, %r75
+%r77 = zext i64 %r76 to i96
+%r79 = getelementptr i32, i32* %r3, i32 2
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i96
+%r82 = shl i96 %r81, 64
+%r83 = or i96 %r77, %r82
+%r84 = zext i96 %r83 to i128
+%r86 = getelementptr i32, i32* %r3, i32 3
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i128
+%r89 = shl i128 %r88, 96
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i160
+%r93 = getelementptr i32, i32* %r3, i32 4
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i160
+%r96 = shl i160 %r95, 128
+%r97 = or i160 %r91, %r96
+%r98 = zext i160 %r97 to i192
+%r100 = getelementptr i32, i32* %r3, i32 5
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i192
+%r103 = shl i192 %r102, 160
+%r104 = or i192 %r98, %r103
+%r105 = zext i192 %r104 to i224
+%r107 = getelementptr i32, i32* %r3, i32 6
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i224
+%r110 = shl i224 %r109, 192
+%r111 = or i224 %r105, %r110
+%r112 = zext i224 %r111 to i256
+%r114 = getelementptr i32, i32* %r3, i32 7
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i256
+%r117 = shl i256 %r116, 224
+%r118 = or i256 %r112, %r117
+%r119 = zext i256 %r118 to i288
+%r121 = getelementptr i32, i32* %r3, i32 8
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i288
+%r124 = shl i288 %r123, 256
+%r125 = or i288 %r119, %r124
+%r126 = zext i288 %r125 to i320
+%r128 = getelementptr i32, i32* %r3, i32 9
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i320
+%r131 = shl i320 %r130, 288
+%r132 = or i320 %r126, %r131
+%r133 = zext i320 %r68 to i352
+%r134 = zext i320 %r132 to i352
+%r135 = sub i352 %r133, %r134
+%r136 = trunc i352 %r135 to i320
+%r137 = lshr i352 %r135, 320
+%r138 = trunc i352 %r137 to i1
+%r139 = trunc i320 %r136 to i32
+%r141 = getelementptr i32, i32* %r1, i32 0
+store i32 %r139, i32* %r141
+%r142 = lshr i320 %r136, 32
+%r143 = trunc i320 %r142 to i32
+%r145 = getelementptr i32, i32* %r1, i32 1
+store i32 %r143, i32* %r145
+%r146 = lshr i320 %r142, 32
+%r147 = trunc i320 %r146 to i32
+%r149 = getelementptr i32, i32* %r1, i32 2
+store i32 %r147, i32* %r149
+%r150 = lshr i320 %r146, 32
+%r151 = trunc i320 %r150 to i32
+%r153 = getelementptr i32, i32* %r1, i32 3
+store i32 %r151, i32* %r153
+%r154 = lshr i320 %r150, 32
+%r155 = trunc i320 %r154 to i32
+%r157 = getelementptr i32, i32* %r1, i32 4
+store i32 %r155, i32* %r157
+%r158 = lshr i320 %r154, 32
+%r159 = trunc i320 %r158 to i32
+%r161 = getelementptr i32, i32* %r1, i32 5
+store i32 %r159, i32* %r161
+%r162 = lshr i320 %r158, 32
+%r163 = trunc i320 %r162 to i32
+%r165 = getelementptr i32, i32* %r1, i32 6
+store i32 %r163, i32* %r165
+%r166 = lshr i320 %r162, 32
+%r167 = trunc i320 %r166 to i32
+%r169 = getelementptr i32, i32* %r1, i32 7
+store i32 %r167, i32* %r169
+%r170 = lshr i320 %r166, 32
+%r171 = trunc i320 %r170 to i32
+%r173 = getelementptr i32, i32* %r1, i32 8
+store i32 %r171, i32* %r173
+%r174 = lshr i320 %r170, 32
+%r175 = trunc i320 %r174 to i32
+%r177 = getelementptr i32, i32* %r1, i32 9
+store i32 %r175, i32* %r177
+br i1%r138, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r178 = load i32, i32* %r4
+%r179 = zext i32 %r178 to i64
+%r181 = getelementptr i32, i32* %r4, i32 1
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i64
+%r184 = shl i64 %r183, 32
+%r185 = or i64 %r179, %r184
+%r186 = zext i64 %r185 to i96
+%r188 = getelementptr i32, i32* %r4, i32 2
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i96
+%r191 = shl i96 %r190, 64
+%r192 = or i96 %r186, %r191
+%r193 = zext i96 %r192 to i128
+%r195 = getelementptr i32, i32* %r4, i32 3
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i128
+%r198 = shl i128 %r197, 96
+%r199 = or i128 %r193, %r198
+%r200 = zext i128 %r199 to i160
+%r202 = getelementptr i32, i32* %r4, i32 4
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i160
+%r205 = shl i160 %r204, 128
+%r206 = or i160 %r200, %r205
+%r207 = zext i160 %r206 to i192
+%r209 = getelementptr i32, i32* %r4, i32 5
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i192
+%r212 = shl i192 %r211, 160
+%r213 = or i192 %r207, %r212
+%r214 = zext i192 %r213 to i224
+%r216 = getelementptr i32, i32* %r4, i32 6
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i224
+%r219 = shl i224 %r218, 192
+%r220 = or i224 %r214, %r219
+%r221 = zext i224 %r220 to i256
+%r223 = getelementptr i32, i32* %r4, i32 7
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i256
+%r226 = shl i256 %r225, 224
+%r227 = or i256 %r221, %r226
+%r228 = zext i256 %r227 to i288
+%r230 = getelementptr i32, i32* %r4, i32 8
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i288
+%r233 = shl i288 %r232, 256
+%r234 = or i288 %r228, %r233
+%r235 = zext i288 %r234 to i320
+%r237 = getelementptr i32, i32* %r4, i32 9
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i320
+%r240 = shl i320 %r239, 288
+%r241 = or i320 %r235, %r240
+%r242 = add i320 %r136, %r241
+%r243 = trunc i320 %r242 to i32
+%r245 = getelementptr i32, i32* %r1, i32 0
+store i32 %r243, i32* %r245
+%r246 = lshr i320 %r242, 32
+%r247 = trunc i320 %r246 to i32
+%r249 = getelementptr i32, i32* %r1, i32 1
+store i32 %r247, i32* %r249
+%r250 = lshr i320 %r246, 32
+%r251 = trunc i320 %r250 to i32
+%r253 = getelementptr i32, i32* %r1, i32 2
+store i32 %r251, i32* %r253
+%r254 = lshr i320 %r250, 32
+%r255 = trunc i320 %r254 to i32
+%r257 = getelementptr i32, i32* %r1, i32 3
+store i32 %r255, i32* %r257
+%r258 = lshr i320 %r254, 32
+%r259 = trunc i320 %r258 to i32
+%r261 = getelementptr i32, i32* %r1, i32 4
+store i32 %r259, i32* %r261
+%r262 = lshr i320 %r258, 32
+%r263 = trunc i320 %r262 to i32
+%r265 = getelementptr i32, i32* %r1, i32 5
+store i32 %r263, i32* %r265
+%r266 = lshr i320 %r262, 32
+%r267 = trunc i320 %r266 to i32
+%r269 = getelementptr i32, i32* %r1, i32 6
+store i32 %r267, i32* %r269
+%r270 = lshr i320 %r266, 32
+%r271 = trunc i320 %r270 to i32
+%r273 = getelementptr i32, i32* %r1, i32 7
+store i32 %r271, i32* %r273
+%r274 = lshr i320 %r270, 32
+%r275 = trunc i320 %r274 to i32
+%r277 = getelementptr i32, i32* %r1, i32 8
+store i32 %r275, i32* %r277
+%r278 = lshr i320 %r274, 32
+%r279 = trunc i320 %r278 to i32
+%r281 = getelementptr i32, i32* %r1, i32 9
+store i32 %r279, i32* %r281
+ret void
+}
+define void @mcl_fp_subNF10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = load i32, i32* %r3
+%r70 = zext i32 %r69 to i64
+%r72 = getelementptr i32, i32* %r3, i32 1
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i64
+%r75 = shl i64 %r74, 32
+%r76 = or i64 %r70, %r75
+%r77 = zext i64 %r76 to i96
+%r79 = getelementptr i32, i32* %r3, i32 2
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i96
+%r82 = shl i96 %r81, 64
+%r83 = or i96 %r77, %r82
+%r84 = zext i96 %r83 to i128
+%r86 = getelementptr i32, i32* %r3, i32 3
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i128
+%r89 = shl i128 %r88, 96
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i160
+%r93 = getelementptr i32, i32* %r3, i32 4
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i160
+%r96 = shl i160 %r95, 128
+%r97 = or i160 %r91, %r96
+%r98 = zext i160 %r97 to i192
+%r100 = getelementptr i32, i32* %r3, i32 5
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i192
+%r103 = shl i192 %r102, 160
+%r104 = or i192 %r98, %r103
+%r105 = zext i192 %r104 to i224
+%r107 = getelementptr i32, i32* %r3, i32 6
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i224
+%r110 = shl i224 %r109, 192
+%r111 = or i224 %r105, %r110
+%r112 = zext i224 %r111 to i256
+%r114 = getelementptr i32, i32* %r3, i32 7
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i256
+%r117 = shl i256 %r116, 224
+%r118 = or i256 %r112, %r117
+%r119 = zext i256 %r118 to i288
+%r121 = getelementptr i32, i32* %r3, i32 8
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i288
+%r124 = shl i288 %r123, 256
+%r125 = or i288 %r119, %r124
+%r126 = zext i288 %r125 to i320
+%r128 = getelementptr i32, i32* %r3, i32 9
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i320
+%r131 = shl i320 %r130, 288
+%r132 = or i320 %r126, %r131
+%r133 = sub i320 %r68, %r132
+%r134 = lshr i320 %r133, 319
+%r135 = trunc i320 %r134 to i1
+%r136 = load i32, i32* %r4
+%r137 = zext i32 %r136 to i64
+%r139 = getelementptr i32, i32* %r4, i32 1
+%r140 = load i32, i32* %r139
+%r141 = zext i32 %r140 to i64
+%r142 = shl i64 %r141, 32
+%r143 = or i64 %r137, %r142
+%r144 = zext i64 %r143 to i96
+%r146 = getelementptr i32, i32* %r4, i32 2
+%r147 = load i32, i32* %r146
+%r148 = zext i32 %r147 to i96
+%r149 = shl i96 %r148, 64
+%r150 = or i96 %r144, %r149
+%r151 = zext i96 %r150 to i128
+%r153 = getelementptr i32, i32* %r4, i32 3
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i128
+%r156 = shl i128 %r155, 96
+%r157 = or i128 %r151, %r156
+%r158 = zext i128 %r157 to i160
+%r160 = getelementptr i32, i32* %r4, i32 4
+%r161 = load i32, i32* %r160
+%r162 = zext i32 %r161 to i160
+%r163 = shl i160 %r162, 128
+%r164 = or i160 %r158, %r163
+%r165 = zext i160 %r164 to i192
+%r167 = getelementptr i32, i32* %r4, i32 5
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i192
+%r170 = shl i192 %r169, 160
+%r171 = or i192 %r165, %r170
+%r172 = zext i192 %r171 to i224
+%r174 = getelementptr i32, i32* %r4, i32 6
+%r175 = load i32, i32* %r174
+%r176 = zext i32 %r175 to i224
+%r177 = shl i224 %r176, 192
+%r178 = or i224 %r172, %r177
+%r179 = zext i224 %r178 to i256
+%r181 = getelementptr i32, i32* %r4, i32 7
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i256
+%r184 = shl i256 %r183, 224
+%r185 = or i256 %r179, %r184
+%r186 = zext i256 %r185 to i288
+%r188 = getelementptr i32, i32* %r4, i32 8
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i288
+%r191 = shl i288 %r190, 256
+%r192 = or i288 %r186, %r191
+%r193 = zext i288 %r192 to i320
+%r195 = getelementptr i32, i32* %r4, i32 9
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i320
+%r198 = shl i320 %r197, 288
+%r199 = or i320 %r193, %r198
+%r201 = select i1 %r135, i320 %r199, i320 0
+%r202 = add i320 %r133, %r201
+%r203 = trunc i320 %r202 to i32
+%r205 = getelementptr i32, i32* %r1, i32 0
+store i32 %r203, i32* %r205
+%r206 = lshr i320 %r202, 32
+%r207 = trunc i320 %r206 to i32
+%r209 = getelementptr i32, i32* %r1, i32 1
+store i32 %r207, i32* %r209
+%r210 = lshr i320 %r206, 32
+%r211 = trunc i320 %r210 to i32
+%r213 = getelementptr i32, i32* %r1, i32 2
+store i32 %r211, i32* %r213
+%r214 = lshr i320 %r210, 32
+%r215 = trunc i320 %r214 to i32
+%r217 = getelementptr i32, i32* %r1, i32 3
+store i32 %r215, i32* %r217
+%r218 = lshr i320 %r214, 32
+%r219 = trunc i320 %r218 to i32
+%r221 = getelementptr i32, i32* %r1, i32 4
+store i32 %r219, i32* %r221
+%r222 = lshr i320 %r218, 32
+%r223 = trunc i320 %r222 to i32
+%r225 = getelementptr i32, i32* %r1, i32 5
+store i32 %r223, i32* %r225
+%r226 = lshr i320 %r222, 32
+%r227 = trunc i320 %r226 to i32
+%r229 = getelementptr i32, i32* %r1, i32 6
+store i32 %r227, i32* %r229
+%r230 = lshr i320 %r226, 32
+%r231 = trunc i320 %r230 to i32
+%r233 = getelementptr i32, i32* %r1, i32 7
+store i32 %r231, i32* %r233
+%r234 = lshr i320 %r230, 32
+%r235 = trunc i320 %r234 to i32
+%r237 = getelementptr i32, i32* %r1, i32 8
+store i32 %r235, i32* %r237
+%r238 = lshr i320 %r234, 32
+%r239 = trunc i320 %r238 to i32
+%r241 = getelementptr i32, i32* %r1, i32 9
+store i32 %r239, i32* %r241
+ret void
+}
+define void @mcl_fpDbl_add10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = load i32, i32* %r3
+%r140 = zext i32 %r139 to i64
+%r142 = getelementptr i32, i32* %r3, i32 1
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i64
+%r145 = shl i64 %r144, 32
+%r146 = or i64 %r140, %r145
+%r147 = zext i64 %r146 to i96
+%r149 = getelementptr i32, i32* %r3, i32 2
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i96
+%r152 = shl i96 %r151, 64
+%r153 = or i96 %r147, %r152
+%r154 = zext i96 %r153 to i128
+%r156 = getelementptr i32, i32* %r3, i32 3
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i128
+%r159 = shl i128 %r158, 96
+%r160 = or i128 %r154, %r159
+%r161 = zext i128 %r160 to i160
+%r163 = getelementptr i32, i32* %r3, i32 4
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i160
+%r166 = shl i160 %r165, 128
+%r167 = or i160 %r161, %r166
+%r168 = zext i160 %r167 to i192
+%r170 = getelementptr i32, i32* %r3, i32 5
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i192
+%r173 = shl i192 %r172, 160
+%r174 = or i192 %r168, %r173
+%r175 = zext i192 %r174 to i224
+%r177 = getelementptr i32, i32* %r3, i32 6
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i224
+%r180 = shl i224 %r179, 192
+%r181 = or i224 %r175, %r180
+%r182 = zext i224 %r181 to i256
+%r184 = getelementptr i32, i32* %r3, i32 7
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i256
+%r187 = shl i256 %r186, 224
+%r188 = or i256 %r182, %r187
+%r189 = zext i256 %r188 to i288
+%r191 = getelementptr i32, i32* %r3, i32 8
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i288
+%r194 = shl i288 %r193, 256
+%r195 = or i288 %r189, %r194
+%r196 = zext i288 %r195 to i320
+%r198 = getelementptr i32, i32* %r3, i32 9
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i320
+%r201 = shl i320 %r200, 288
+%r202 = or i320 %r196, %r201
+%r203 = zext i320 %r202 to i352
+%r205 = getelementptr i32, i32* %r3, i32 10
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i352
+%r208 = shl i352 %r207, 320
+%r209 = or i352 %r203, %r208
+%r210 = zext i352 %r209 to i384
+%r212 = getelementptr i32, i32* %r3, i32 11
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i384
+%r215 = shl i384 %r214, 352
+%r216 = or i384 %r210, %r215
+%r217 = zext i384 %r216 to i416
+%r219 = getelementptr i32, i32* %r3, i32 12
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i416
+%r222 = shl i416 %r221, 384
+%r223 = or i416 %r217, %r222
+%r224 = zext i416 %r223 to i448
+%r226 = getelementptr i32, i32* %r3, i32 13
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i448
+%r229 = shl i448 %r228, 416
+%r230 = or i448 %r224, %r229
+%r231 = zext i448 %r230 to i480
+%r233 = getelementptr i32, i32* %r3, i32 14
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i480
+%r236 = shl i480 %r235, 448
+%r237 = or i480 %r231, %r236
+%r238 = zext i480 %r237 to i512
+%r240 = getelementptr i32, i32* %r3, i32 15
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i512
+%r243 = shl i512 %r242, 480
+%r244 = or i512 %r238, %r243
+%r245 = zext i512 %r244 to i544
+%r247 = getelementptr i32, i32* %r3, i32 16
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i544
+%r250 = shl i544 %r249, 512
+%r251 = or i544 %r245, %r250
+%r252 = zext i544 %r251 to i576
+%r254 = getelementptr i32, i32* %r3, i32 17
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i576
+%r257 = shl i576 %r256, 544
+%r258 = or i576 %r252, %r257
+%r259 = zext i576 %r258 to i608
+%r261 = getelementptr i32, i32* %r3, i32 18
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i608
+%r264 = shl i608 %r263, 576
+%r265 = or i608 %r259, %r264
+%r266 = zext i608 %r265 to i640
+%r268 = getelementptr i32, i32* %r3, i32 19
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i640
+%r271 = shl i640 %r270, 608
+%r272 = or i640 %r266, %r271
+%r273 = zext i640 %r138 to i672
+%r274 = zext i640 %r272 to i672
+%r275 = add i672 %r273, %r274
+%r276 = trunc i672 %r275 to i320
+%r277 = trunc i320 %r276 to i32
+%r279 = getelementptr i32, i32* %r1, i32 0
+store i32 %r277, i32* %r279
+%r280 = lshr i320 %r276, 32
+%r281 = trunc i320 %r280 to i32
+%r283 = getelementptr i32, i32* %r1, i32 1
+store i32 %r281, i32* %r283
+%r284 = lshr i320 %r280, 32
+%r285 = trunc i320 %r284 to i32
+%r287 = getelementptr i32, i32* %r1, i32 2
+store i32 %r285, i32* %r287
+%r288 = lshr i320 %r284, 32
+%r289 = trunc i320 %r288 to i32
+%r291 = getelementptr i32, i32* %r1, i32 3
+store i32 %r289, i32* %r291
+%r292 = lshr i320 %r288, 32
+%r293 = trunc i320 %r292 to i32
+%r295 = getelementptr i32, i32* %r1, i32 4
+store i32 %r293, i32* %r295
+%r296 = lshr i320 %r292, 32
+%r297 = trunc i320 %r296 to i32
+%r299 = getelementptr i32, i32* %r1, i32 5
+store i32 %r297, i32* %r299
+%r300 = lshr i320 %r296, 32
+%r301 = trunc i320 %r300 to i32
+%r303 = getelementptr i32, i32* %r1, i32 6
+store i32 %r301, i32* %r303
+%r304 = lshr i320 %r300, 32
+%r305 = trunc i320 %r304 to i32
+%r307 = getelementptr i32, i32* %r1, i32 7
+store i32 %r305, i32* %r307
+%r308 = lshr i320 %r304, 32
+%r309 = trunc i320 %r308 to i32
+%r311 = getelementptr i32, i32* %r1, i32 8
+store i32 %r309, i32* %r311
+%r312 = lshr i320 %r308, 32
+%r313 = trunc i320 %r312 to i32
+%r315 = getelementptr i32, i32* %r1, i32 9
+store i32 %r313, i32* %r315
+%r316 = lshr i672 %r275, 320
+%r317 = trunc i672 %r316 to i352
+%r318 = load i32, i32* %r4
+%r319 = zext i32 %r318 to i64
+%r321 = getelementptr i32, i32* %r4, i32 1
+%r322 = load i32, i32* %r321
+%r323 = zext i32 %r322 to i64
+%r324 = shl i64 %r323, 32
+%r325 = or i64 %r319, %r324
+%r326 = zext i64 %r325 to i96
+%r328 = getelementptr i32, i32* %r4, i32 2
+%r329 = load i32, i32* %r328
+%r330 = zext i32 %r329 to i96
+%r331 = shl i96 %r330, 64
+%r332 = or i96 %r326, %r331
+%r333 = zext i96 %r332 to i128
+%r335 = getelementptr i32, i32* %r4, i32 3
+%r336 = load i32, i32* %r335
+%r337 = zext i32 %r336 to i128
+%r338 = shl i128 %r337, 96
+%r339 = or i128 %r333, %r338
+%r340 = zext i128 %r339 to i160
+%r342 = getelementptr i32, i32* %r4, i32 4
+%r343 = load i32, i32* %r342
+%r344 = zext i32 %r343 to i160
+%r345 = shl i160 %r344, 128
+%r346 = or i160 %r340, %r345
+%r347 = zext i160 %r346 to i192
+%r349 = getelementptr i32, i32* %r4, i32 5
+%r350 = load i32, i32* %r349
+%r351 = zext i32 %r350 to i192
+%r352 = shl i192 %r351, 160
+%r353 = or i192 %r347, %r352
+%r354 = zext i192 %r353 to i224
+%r356 = getelementptr i32, i32* %r4, i32 6
+%r357 = load i32, i32* %r356
+%r358 = zext i32 %r357 to i224
+%r359 = shl i224 %r358, 192
+%r360 = or i224 %r354, %r359
+%r361 = zext i224 %r360 to i256
+%r363 = getelementptr i32, i32* %r4, i32 7
+%r364 = load i32, i32* %r363
+%r365 = zext i32 %r364 to i256
+%r366 = shl i256 %r365, 224
+%r367 = or i256 %r361, %r366
+%r368 = zext i256 %r367 to i288
+%r370 = getelementptr i32, i32* %r4, i32 8
+%r371 = load i32, i32* %r370
+%r372 = zext i32 %r371 to i288
+%r373 = shl i288 %r372, 256
+%r374 = or i288 %r368, %r373
+%r375 = zext i288 %r374 to i320
+%r377 = getelementptr i32, i32* %r4, i32 9
+%r378 = load i32, i32* %r377
+%r379 = zext i32 %r378 to i320
+%r380 = shl i320 %r379, 288
+%r381 = or i320 %r375, %r380
+%r382 = zext i320 %r381 to i352
+%r383 = sub i352 %r317, %r382
+%r384 = lshr i352 %r383, 320
+%r385 = trunc i352 %r384 to i1
+%r386 = select i1 %r385, i352 %r317, i352 %r383
+%r387 = trunc i352 %r386 to i320
+%r389 = getelementptr i32, i32* %r1, i32 10
+%r390 = trunc i320 %r387 to i32
+%r392 = getelementptr i32, i32* %r389, i32 0
+store i32 %r390, i32* %r392
+%r393 = lshr i320 %r387, 32
+%r394 = trunc i320 %r393 to i32
+%r396 = getelementptr i32, i32* %r389, i32 1
+store i32 %r394, i32* %r396
+%r397 = lshr i320 %r393, 32
+%r398 = trunc i320 %r397 to i32
+%r400 = getelementptr i32, i32* %r389, i32 2
+store i32 %r398, i32* %r400
+%r401 = lshr i320 %r397, 32
+%r402 = trunc i320 %r401 to i32
+%r404 = getelementptr i32, i32* %r389, i32 3
+store i32 %r402, i32* %r404
+%r405 = lshr i320 %r401, 32
+%r406 = trunc i320 %r405 to i32
+%r408 = getelementptr i32, i32* %r389, i32 4
+store i32 %r406, i32* %r408
+%r409 = lshr i320 %r405, 32
+%r410 = trunc i320 %r409 to i32
+%r412 = getelementptr i32, i32* %r389, i32 5
+store i32 %r410, i32* %r412
+%r413 = lshr i320 %r409, 32
+%r414 = trunc i320 %r413 to i32
+%r416 = getelementptr i32, i32* %r389, i32 6
+store i32 %r414, i32* %r416
+%r417 = lshr i320 %r413, 32
+%r418 = trunc i320 %r417 to i32
+%r420 = getelementptr i32, i32* %r389, i32 7
+store i32 %r418, i32* %r420
+%r421 = lshr i320 %r417, 32
+%r422 = trunc i320 %r421 to i32
+%r424 = getelementptr i32, i32* %r389, i32 8
+store i32 %r422, i32* %r424
+%r425 = lshr i320 %r421, 32
+%r426 = trunc i320 %r425 to i32
+%r428 = getelementptr i32, i32* %r389, i32 9
+store i32 %r426, i32* %r428
+ret void
+}
+define void @mcl_fpDbl_sub10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = load i32, i32* %r3
+%r140 = zext i32 %r139 to i64
+%r142 = getelementptr i32, i32* %r3, i32 1
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i64
+%r145 = shl i64 %r144, 32
+%r146 = or i64 %r140, %r145
+%r147 = zext i64 %r146 to i96
+%r149 = getelementptr i32, i32* %r3, i32 2
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i96
+%r152 = shl i96 %r151, 64
+%r153 = or i96 %r147, %r152
+%r154 = zext i96 %r153 to i128
+%r156 = getelementptr i32, i32* %r3, i32 3
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i128
+%r159 = shl i128 %r158, 96
+%r160 = or i128 %r154, %r159
+%r161 = zext i128 %r160 to i160
+%r163 = getelementptr i32, i32* %r3, i32 4
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i160
+%r166 = shl i160 %r165, 128
+%r167 = or i160 %r161, %r166
+%r168 = zext i160 %r167 to i192
+%r170 = getelementptr i32, i32* %r3, i32 5
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i192
+%r173 = shl i192 %r172, 160
+%r174 = or i192 %r168, %r173
+%r175 = zext i192 %r174 to i224
+%r177 = getelementptr i32, i32* %r3, i32 6
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i224
+%r180 = shl i224 %r179, 192
+%r181 = or i224 %r175, %r180
+%r182 = zext i224 %r181 to i256
+%r184 = getelementptr i32, i32* %r3, i32 7
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i256
+%r187 = shl i256 %r186, 224
+%r188 = or i256 %r182, %r187
+%r189 = zext i256 %r188 to i288
+%r191 = getelementptr i32, i32* %r3, i32 8
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i288
+%r194 = shl i288 %r193, 256
+%r195 = or i288 %r189, %r194
+%r196 = zext i288 %r195 to i320
+%r198 = getelementptr i32, i32* %r3, i32 9
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i320
+%r201 = shl i320 %r200, 288
+%r202 = or i320 %r196, %r201
+%r203 = zext i320 %r202 to i352
+%r205 = getelementptr i32, i32* %r3, i32 10
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i352
+%r208 = shl i352 %r207, 320
+%r209 = or i352 %r203, %r208
+%r210 = zext i352 %r209 to i384
+%r212 = getelementptr i32, i32* %r3, i32 11
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i384
+%r215 = shl i384 %r214, 352
+%r216 = or i384 %r210, %r215
+%r217 = zext i384 %r216 to i416
+%r219 = getelementptr i32, i32* %r3, i32 12
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i416
+%r222 = shl i416 %r221, 384
+%r223 = or i416 %r217, %r222
+%r224 = zext i416 %r223 to i448
+%r226 = getelementptr i32, i32* %r3, i32 13
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i448
+%r229 = shl i448 %r228, 416
+%r230 = or i448 %r224, %r229
+%r231 = zext i448 %r230 to i480
+%r233 = getelementptr i32, i32* %r3, i32 14
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i480
+%r236 = shl i480 %r235, 448
+%r237 = or i480 %r231, %r236
+%r238 = zext i480 %r237 to i512
+%r240 = getelementptr i32, i32* %r3, i32 15
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i512
+%r243 = shl i512 %r242, 480
+%r244 = or i512 %r238, %r243
+%r245 = zext i512 %r244 to i544
+%r247 = getelementptr i32, i32* %r3, i32 16
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i544
+%r250 = shl i544 %r249, 512
+%r251 = or i544 %r245, %r250
+%r252 = zext i544 %r251 to i576
+%r254 = getelementptr i32, i32* %r3, i32 17
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i576
+%r257 = shl i576 %r256, 544
+%r258 = or i576 %r252, %r257
+%r259 = zext i576 %r258 to i608
+%r261 = getelementptr i32, i32* %r3, i32 18
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i608
+%r264 = shl i608 %r263, 576
+%r265 = or i608 %r259, %r264
+%r266 = zext i608 %r265 to i640
+%r268 = getelementptr i32, i32* %r3, i32 19
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i640
+%r271 = shl i640 %r270, 608
+%r272 = or i640 %r266, %r271
+%r273 = zext i640 %r138 to i672
+%r274 = zext i640 %r272 to i672
+%r275 = sub i672 %r273, %r274
+%r276 = trunc i672 %r275 to i320
+%r277 = trunc i320 %r276 to i32
+%r279 = getelementptr i32, i32* %r1, i32 0
+store i32 %r277, i32* %r279
+%r280 = lshr i320 %r276, 32
+%r281 = trunc i320 %r280 to i32
+%r283 = getelementptr i32, i32* %r1, i32 1
+store i32 %r281, i32* %r283
+%r284 = lshr i320 %r280, 32
+%r285 = trunc i320 %r284 to i32
+%r287 = getelementptr i32, i32* %r1, i32 2
+store i32 %r285, i32* %r287
+%r288 = lshr i320 %r284, 32
+%r289 = trunc i320 %r288 to i32
+%r291 = getelementptr i32, i32* %r1, i32 3
+store i32 %r289, i32* %r291
+%r292 = lshr i320 %r288, 32
+%r293 = trunc i320 %r292 to i32
+%r295 = getelementptr i32, i32* %r1, i32 4
+store i32 %r293, i32* %r295
+%r296 = lshr i320 %r292, 32
+%r297 = trunc i320 %r296 to i32
+%r299 = getelementptr i32, i32* %r1, i32 5
+store i32 %r297, i32* %r299
+%r300 = lshr i320 %r296, 32
+%r301 = trunc i320 %r300 to i32
+%r303 = getelementptr i32, i32* %r1, i32 6
+store i32 %r301, i32* %r303
+%r304 = lshr i320 %r300, 32
+%r305 = trunc i320 %r304 to i32
+%r307 = getelementptr i32, i32* %r1, i32 7
+store i32 %r305, i32* %r307
+%r308 = lshr i320 %r304, 32
+%r309 = trunc i320 %r308 to i32
+%r311 = getelementptr i32, i32* %r1, i32 8
+store i32 %r309, i32* %r311
+%r312 = lshr i320 %r308, 32
+%r313 = trunc i320 %r312 to i32
+%r315 = getelementptr i32, i32* %r1, i32 9
+store i32 %r313, i32* %r315
+%r316 = lshr i672 %r275, 320
+%r317 = trunc i672 %r316 to i320
+%r318 = lshr i672 %r275, 640
+%r319 = trunc i672 %r318 to i1
+%r320 = load i32, i32* %r4
+%r321 = zext i32 %r320 to i64
+%r323 = getelementptr i32, i32* %r4, i32 1
+%r324 = load i32, i32* %r323
+%r325 = zext i32 %r324 to i64
+%r326 = shl i64 %r325, 32
+%r327 = or i64 %r321, %r326
+%r328 = zext i64 %r327 to i96
+%r330 = getelementptr i32, i32* %r4, i32 2
+%r331 = load i32, i32* %r330
+%r332 = zext i32 %r331 to i96
+%r333 = shl i96 %r332, 64
+%r334 = or i96 %r328, %r333
+%r335 = zext i96 %r334 to i128
+%r337 = getelementptr i32, i32* %r4, i32 3
+%r338 = load i32, i32* %r337
+%r339 = zext i32 %r338 to i128
+%r340 = shl i128 %r339, 96
+%r341 = or i128 %r335, %r340
+%r342 = zext i128 %r341 to i160
+%r344 = getelementptr i32, i32* %r4, i32 4
+%r345 = load i32, i32* %r344
+%r346 = zext i32 %r345 to i160
+%r347 = shl i160 %r346, 128
+%r348 = or i160 %r342, %r347
+%r349 = zext i160 %r348 to i192
+%r351 = getelementptr i32, i32* %r4, i32 5
+%r352 = load i32, i32* %r351
+%r353 = zext i32 %r352 to i192
+%r354 = shl i192 %r353, 160
+%r355 = or i192 %r349, %r354
+%r356 = zext i192 %r355 to i224
+%r358 = getelementptr i32, i32* %r4, i32 6
+%r359 = load i32, i32* %r358
+%r360 = zext i32 %r359 to i224
+%r361 = shl i224 %r360, 192
+%r362 = or i224 %r356, %r361
+%r363 = zext i224 %r362 to i256
+%r365 = getelementptr i32, i32* %r4, i32 7
+%r366 = load i32, i32* %r365
+%r367 = zext i32 %r366 to i256
+%r368 = shl i256 %r367, 224
+%r369 = or i256 %r363, %r368
+%r370 = zext i256 %r369 to i288
+%r372 = getelementptr i32, i32* %r4, i32 8
+%r373 = load i32, i32* %r372
+%r374 = zext i32 %r373 to i288
+%r375 = shl i288 %r374, 256
+%r376 = or i288 %r370, %r375
+%r377 = zext i288 %r376 to i320
+%r379 = getelementptr i32, i32* %r4, i32 9
+%r380 = load i32, i32* %r379
+%r381 = zext i32 %r380 to i320
+%r382 = shl i320 %r381, 288
+%r383 = or i320 %r377, %r382
+%r385 = select i1 %r319, i320 %r383, i320 0
+%r386 = add i320 %r317, %r385
+%r388 = getelementptr i32, i32* %r1, i32 10
+%r389 = trunc i320 %r386 to i32
+%r391 = getelementptr i32, i32* %r388, i32 0
+store i32 %r389, i32* %r391
+%r392 = lshr i320 %r386, 32
+%r393 = trunc i320 %r392 to i32
+%r395 = getelementptr i32, i32* %r388, i32 1
+store i32 %r393, i32* %r395
+%r396 = lshr i320 %r392, 32
+%r397 = trunc i320 %r396 to i32
+%r399 = getelementptr i32, i32* %r388, i32 2
+store i32 %r397, i32* %r399
+%r400 = lshr i320 %r396, 32
+%r401 = trunc i320 %r400 to i32
+%r403 = getelementptr i32, i32* %r388, i32 3
+store i32 %r401, i32* %r403
+%r404 = lshr i320 %r400, 32
+%r405 = trunc i320 %r404 to i32
+%r407 = getelementptr i32, i32* %r388, i32 4
+store i32 %r405, i32* %r407
+%r408 = lshr i320 %r404, 32
+%r409 = trunc i320 %r408 to i32
+%r411 = getelementptr i32, i32* %r388, i32 5
+store i32 %r409, i32* %r411
+%r412 = lshr i320 %r408, 32
+%r413 = trunc i320 %r412 to i32
+%r415 = getelementptr i32, i32* %r388, i32 6
+store i32 %r413, i32* %r415
+%r416 = lshr i320 %r412, 32
+%r417 = trunc i320 %r416 to i32
+%r419 = getelementptr i32, i32* %r388, i32 7
+store i32 %r417, i32* %r419
+%r420 = lshr i320 %r416, 32
+%r421 = trunc i320 %r420 to i32
+%r423 = getelementptr i32, i32* %r388, i32 8
+store i32 %r421, i32* %r423
+%r424 = lshr i320 %r420, 32
+%r425 = trunc i320 %r424 to i32
+%r427 = getelementptr i32, i32* %r388, i32 9
+store i32 %r425, i32* %r427
+ret void
+}
+define i384 @mulPv352x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
+%r42 = trunc i64 %r41 to i32
+%r43 = call i32 @extractHigh32(i64 %r41)
+%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
+%r46 = trunc i64 %r45 to i32
+%r47 = call i32 @extractHigh32(i64 %r45)
+%r48 = zext i32 %r6 to i64
+%r49 = zext i32 %r10 to i64
+%r50 = shl i64 %r49, 32
+%r51 = or i64 %r48, %r50
+%r52 = zext i64 %r51 to i96
+%r53 = zext i32 %r14 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r52, %r54
+%r56 = zext i96 %r55 to i128
+%r57 = zext i32 %r18 to i128
+%r58 = shl i128 %r57, 96
+%r59 = or i128 %r56, %r58
+%r60 = zext i128 %r59 to i160
+%r61 = zext i32 %r22 to i160
+%r62 = shl i160 %r61, 128
+%r63 = or i160 %r60, %r62
+%r64 = zext i160 %r63 to i192
+%r65 = zext i32 %r26 to i192
+%r66 = shl i192 %r65, 160
+%r67 = or i192 %r64, %r66
+%r68 = zext i192 %r67 to i224
+%r69 = zext i32 %r30 to i224
+%r70 = shl i224 %r69, 192
+%r71 = or i224 %r68, %r70
+%r72 = zext i224 %r71 to i256
+%r73 = zext i32 %r34 to i256
+%r74 = shl i256 %r73, 224
+%r75 = or i256 %r72, %r74
+%r76 = zext i256 %r75 to i288
+%r77 = zext i32 %r38 to i288
+%r78 = shl i288 %r77, 256
+%r79 = or i288 %r76, %r78
+%r80 = zext i288 %r79 to i320
+%r81 = zext i32 %r42 to i320
+%r82 = shl i320 %r81, 288
+%r83 = or i320 %r80, %r82
+%r84 = zext i320 %r83 to i352
+%r85 = zext i32 %r46 to i352
+%r86 = shl i352 %r85, 320
+%r87 = or i352 %r84, %r86
+%r88 = zext i32 %r7 to i64
+%r89 = zext i32 %r11 to i64
+%r90 = shl i64 %r89, 32
+%r91 = or i64 %r88, %r90
+%r92 = zext i64 %r91 to i96
+%r93 = zext i32 %r15 to i96
+%r94 = shl i96 %r93, 64
+%r95 = or i96 %r92, %r94
+%r96 = zext i96 %r95 to i128
+%r97 = zext i32 %r19 to i128
+%r98 = shl i128 %r97, 96
+%r99 = or i128 %r96, %r98
+%r100 = zext i128 %r99 to i160
+%r101 = zext i32 %r23 to i160
+%r102 = shl i160 %r101, 128
+%r103 = or i160 %r100, %r102
+%r104 = zext i160 %r103 to i192
+%r105 = zext i32 %r27 to i192
+%r106 = shl i192 %r105, 160
+%r107 = or i192 %r104, %r106
+%r108 = zext i192 %r107 to i224
+%r109 = zext i32 %r31 to i224
+%r110 = shl i224 %r109, 192
+%r111 = or i224 %r108, %r110
+%r112 = zext i224 %r111 to i256
+%r113 = zext i32 %r35 to i256
+%r114 = shl i256 %r113, 224
+%r115 = or i256 %r112, %r114
+%r116 = zext i256 %r115 to i288
+%r117 = zext i32 %r39 to i288
+%r118 = shl i288 %r117, 256
+%r119 = or i288 %r116, %r118
+%r120 = zext i288 %r119 to i320
+%r121 = zext i32 %r43 to i320
+%r122 = shl i320 %r121, 288
+%r123 = or i320 %r120, %r122
+%r124 = zext i320 %r123 to i352
+%r125 = zext i32 %r47 to i352
+%r126 = shl i352 %r125, 320
+%r127 = or i352 %r124, %r126
+%r128 = zext i352 %r87 to i384
+%r129 = zext i352 %r127 to i384
+%r130 = shl i384 %r129, 32
+%r131 = add i384 %r128, %r130
+ret i384 %r131
+}
+define void @mcl_fp_mulUnitPre11L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i384 @mulPv352x32(i32* %r2, i32 %r3)
+%r5 = trunc i384 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i384 %r4, 32
+%r9 = trunc i384 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i384 %r8, 32
+%r13 = trunc i384 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i384 %r12, 32
+%r17 = trunc i384 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i384 %r16, 32
+%r21 = trunc i384 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i384 %r20, 32
+%r25 = trunc i384 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i384 %r24, 32
+%r29 = trunc i384 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i384 %r28, 32
+%r33 = trunc i384 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+%r36 = lshr i384 %r32, 32
+%r37 = trunc i384 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 8
+store i32 %r37, i32* %r39
+%r40 = lshr i384 %r36, 32
+%r41 = trunc i384 %r40 to i32
+%r43 = getelementptr i32, i32* %r1, i32 9
+store i32 %r41, i32* %r43
+%r44 = lshr i384 %r40, 32
+%r45 = trunc i384 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 10
+store i32 %r45, i32* %r47
+%r48 = lshr i384 %r44, 32
+%r49 = trunc i384 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 11
+store i32 %r49, i32* %r51
+ret void
+}
+define void @mcl_fpDbl_mulPre11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i384 @mulPv352x32(i32* %r2, i32 %r4)
+%r6 = trunc i384 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i384 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i384 @mulPv352x32(i32* %r2, i32 %r10)
+%r12 = add i384 %r7, %r11
+%r13 = trunc i384 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+%r16 = lshr i384 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i384 @mulPv352x32(i32* %r2, i32 %r19)
+%r21 = add i384 %r16, %r20
+%r22 = trunc i384 %r21 to i32
+%r24 = getelementptr i32, i32* %r1, i32 2
+store i32 %r22, i32* %r24
+%r25 = lshr i384 %r21, 32
+%r27 = getelementptr i32, i32* %r3, i32 3
+%r28 = load i32, i32* %r27
+%r29 = call i384 @mulPv352x32(i32* %r2, i32 %r28)
+%r30 = add i384 %r25, %r29
+%r31 = trunc i384 %r30 to i32
+%r33 = getelementptr i32, i32* %r1, i32 3
+store i32 %r31, i32* %r33
+%r34 = lshr i384 %r30, 32
+%r36 = getelementptr i32, i32* %r3, i32 4
+%r37 = load i32, i32* %r36
+%r38 = call i384 @mulPv352x32(i32* %r2, i32 %r37)
+%r39 = add i384 %r34, %r38
+%r40 = trunc i384 %r39 to i32
+%r42 = getelementptr i32, i32* %r1, i32 4
+store i32 %r40, i32* %r42
+%r43 = lshr i384 %r39, 32
+%r45 = getelementptr i32, i32* %r3, i32 5
+%r46 = load i32, i32* %r45
+%r47 = call i384 @mulPv352x32(i32* %r2, i32 %r46)
+%r48 = add i384 %r43, %r47
+%r49 = trunc i384 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 5
+store i32 %r49, i32* %r51
+%r52 = lshr i384 %r48, 32
+%r54 = getelementptr i32, i32* %r3, i32 6
+%r55 = load i32, i32* %r54
+%r56 = call i384 @mulPv352x32(i32* %r2, i32 %r55)
+%r57 = add i384 %r52, %r56
+%r58 = trunc i384 %r57 to i32
+%r60 = getelementptr i32, i32* %r1, i32 6
+store i32 %r58, i32* %r60
+%r61 = lshr i384 %r57, 32
+%r63 = getelementptr i32, i32* %r3, i32 7
+%r64 = load i32, i32* %r63
+%r65 = call i384 @mulPv352x32(i32* %r2, i32 %r64)
+%r66 = add i384 %r61, %r65
+%r67 = trunc i384 %r66 to i32
+%r69 = getelementptr i32, i32* %r1, i32 7
+store i32 %r67, i32* %r69
+%r70 = lshr i384 %r66, 32
+%r72 = getelementptr i32, i32* %r3, i32 8
+%r73 = load i32, i32* %r72
+%r74 = call i384 @mulPv352x32(i32* %r2, i32 %r73)
+%r75 = add i384 %r70, %r74
+%r76 = trunc i384 %r75 to i32
+%r78 = getelementptr i32, i32* %r1, i32 8
+store i32 %r76, i32* %r78
+%r79 = lshr i384 %r75, 32
+%r81 = getelementptr i32, i32* %r3, i32 9
+%r82 = load i32, i32* %r81
+%r83 = call i384 @mulPv352x32(i32* %r2, i32 %r82)
+%r84 = add i384 %r79, %r83
+%r85 = trunc i384 %r84 to i32
+%r87 = getelementptr i32, i32* %r1, i32 9
+store i32 %r85, i32* %r87
+%r88 = lshr i384 %r84, 32
+%r90 = getelementptr i32, i32* %r3, i32 10
+%r91 = load i32, i32* %r90
+%r92 = call i384 @mulPv352x32(i32* %r2, i32 %r91)
+%r93 = add i384 %r88, %r92
+%r95 = getelementptr i32, i32* %r1, i32 10
+%r96 = trunc i384 %r93 to i32
+%r98 = getelementptr i32, i32* %r95, i32 0
+store i32 %r96, i32* %r98
+%r99 = lshr i384 %r93, 32
+%r100 = trunc i384 %r99 to i32
+%r102 = getelementptr i32, i32* %r95, i32 1
+store i32 %r100, i32* %r102
+%r103 = lshr i384 %r99, 32
+%r104 = trunc i384 %r103 to i32
+%r106 = getelementptr i32, i32* %r95, i32 2
+store i32 %r104, i32* %r106
+%r107 = lshr i384 %r103, 32
+%r108 = trunc i384 %r107 to i32
+%r110 = getelementptr i32, i32* %r95, i32 3
+store i32 %r108, i32* %r110
+%r111 = lshr i384 %r107, 32
+%r112 = trunc i384 %r111 to i32
+%r114 = getelementptr i32, i32* %r95, i32 4
+store i32 %r112, i32* %r114
+%r115 = lshr i384 %r111, 32
+%r116 = trunc i384 %r115 to i32
+%r118 = getelementptr i32, i32* %r95, i32 5
+store i32 %r116, i32* %r118
+%r119 = lshr i384 %r115, 32
+%r120 = trunc i384 %r119 to i32
+%r122 = getelementptr i32, i32* %r95, i32 6
+store i32 %r120, i32* %r122
+%r123 = lshr i384 %r119, 32
+%r124 = trunc i384 %r123 to i32
+%r126 = getelementptr i32, i32* %r95, i32 7
+store i32 %r124, i32* %r126
+%r127 = lshr i384 %r123, 32
+%r128 = trunc i384 %r127 to i32
+%r130 = getelementptr i32, i32* %r95, i32 8
+store i32 %r128, i32* %r130
+%r131 = lshr i384 %r127, 32
+%r132 = trunc i384 %r131 to i32
+%r134 = getelementptr i32, i32* %r95, i32 9
+store i32 %r132, i32* %r134
+%r135 = lshr i384 %r131, 32
+%r136 = trunc i384 %r135 to i32
+%r138 = getelementptr i32, i32* %r95, i32 10
+store i32 %r136, i32* %r138
+%r139 = lshr i384 %r135, 32
+%r140 = trunc i384 %r139 to i32
+%r142 = getelementptr i32, i32* %r95, i32 11
+store i32 %r140, i32* %r142
+ret void
+}
+define void @mcl_fpDbl_sqrPre11L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i384 @mulPv352x32(i32* %r2, i32 %r3)
+%r5 = trunc i384 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i384 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i384 @mulPv352x32(i32* %r2, i32 %r9)
+%r11 = add i384 %r6, %r10
+%r12 = trunc i384 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+%r15 = lshr i384 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i384 @mulPv352x32(i32* %r2, i32 %r18)
+%r20 = add i384 %r15, %r19
+%r21 = trunc i384 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 2
+store i32 %r21, i32* %r23
+%r24 = lshr i384 %r20, 32
+%r26 = getelementptr i32, i32* %r2, i32 3
+%r27 = load i32, i32* %r26
+%r28 = call i384 @mulPv352x32(i32* %r2, i32 %r27)
+%r29 = add i384 %r24, %r28
+%r30 = trunc i384 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 3
+store i32 %r30, i32* %r32
+%r33 = lshr i384 %r29, 32
+%r35 = getelementptr i32, i32* %r2, i32 4
+%r36 = load i32, i32* %r35
+%r37 = call i384 @mulPv352x32(i32* %r2, i32 %r36)
+%r38 = add i384 %r33, %r37
+%r39 = trunc i384 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 4
+store i32 %r39, i32* %r41
+%r42 = lshr i384 %r38, 32
+%r44 = getelementptr i32, i32* %r2, i32 5
+%r45 = load i32, i32* %r44
+%r46 = call i384 @mulPv352x32(i32* %r2, i32 %r45)
+%r47 = add i384 %r42, %r46
+%r48 = trunc i384 %r47 to i32
+%r50 = getelementptr i32, i32* %r1, i32 5
+store i32 %r48, i32* %r50
+%r51 = lshr i384 %r47, 32
+%r53 = getelementptr i32, i32* %r2, i32 6
+%r54 = load i32, i32* %r53
+%r55 = call i384 @mulPv352x32(i32* %r2, i32 %r54)
+%r56 = add i384 %r51, %r55
+%r57 = trunc i384 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 6
+store i32 %r57, i32* %r59
+%r60 = lshr i384 %r56, 32
+%r62 = getelementptr i32, i32* %r2, i32 7
+%r63 = load i32, i32* %r62
+%r64 = call i384 @mulPv352x32(i32* %r2, i32 %r63)
+%r65 = add i384 %r60, %r64
+%r66 = trunc i384 %r65 to i32
+%r68 = getelementptr i32, i32* %r1, i32 7
+store i32 %r66, i32* %r68
+%r69 = lshr i384 %r65, 32
+%r71 = getelementptr i32, i32* %r2, i32 8
+%r72 = load i32, i32* %r71
+%r73 = call i384 @mulPv352x32(i32* %r2, i32 %r72)
+%r74 = add i384 %r69, %r73
+%r75 = trunc i384 %r74 to i32
+%r77 = getelementptr i32, i32* %r1, i32 8
+store i32 %r75, i32* %r77
+%r78 = lshr i384 %r74, 32
+%r80 = getelementptr i32, i32* %r2, i32 9
+%r81 = load i32, i32* %r80
+%r82 = call i384 @mulPv352x32(i32* %r2, i32 %r81)
+%r83 = add i384 %r78, %r82
+%r84 = trunc i384 %r83 to i32
+%r86 = getelementptr i32, i32* %r1, i32 9
+store i32 %r84, i32* %r86
+%r87 = lshr i384 %r83, 32
+%r89 = getelementptr i32, i32* %r2, i32 10
+%r90 = load i32, i32* %r89
+%r91 = call i384 @mulPv352x32(i32* %r2, i32 %r90)
+%r92 = add i384 %r87, %r91
+%r94 = getelementptr i32, i32* %r1, i32 10
+%r95 = trunc i384 %r92 to i32
+%r97 = getelementptr i32, i32* %r94, i32 0
+store i32 %r95, i32* %r97
+%r98 = lshr i384 %r92, 32
+%r99 = trunc i384 %r98 to i32
+%r101 = getelementptr i32, i32* %r94, i32 1
+store i32 %r99, i32* %r101
+%r102 = lshr i384 %r98, 32
+%r103 = trunc i384 %r102 to i32
+%r105 = getelementptr i32, i32* %r94, i32 2
+store i32 %r103, i32* %r105
+%r106 = lshr i384 %r102, 32
+%r107 = trunc i384 %r106 to i32
+%r109 = getelementptr i32, i32* %r94, i32 3
+store i32 %r107, i32* %r109
+%r110 = lshr i384 %r106, 32
+%r111 = trunc i384 %r110 to i32
+%r113 = getelementptr i32, i32* %r94, i32 4
+store i32 %r111, i32* %r113
+%r114 = lshr i384 %r110, 32
+%r115 = trunc i384 %r114 to i32
+%r117 = getelementptr i32, i32* %r94, i32 5
+store i32 %r115, i32* %r117
+%r118 = lshr i384 %r114, 32
+%r119 = trunc i384 %r118 to i32
+%r121 = getelementptr i32, i32* %r94, i32 6
+store i32 %r119, i32* %r121
+%r122 = lshr i384 %r118, 32
+%r123 = trunc i384 %r122 to i32
+%r125 = getelementptr i32, i32* %r94, i32 7
+store i32 %r123, i32* %r125
+%r126 = lshr i384 %r122, 32
+%r127 = trunc i384 %r126 to i32
+%r129 = getelementptr i32, i32* %r94, i32 8
+store i32 %r127, i32* %r129
+%r130 = lshr i384 %r126, 32
+%r131 = trunc i384 %r130 to i32
+%r133 = getelementptr i32, i32* %r94, i32 9
+store i32 %r131, i32* %r133
+%r134 = lshr i384 %r130, 32
+%r135 = trunc i384 %r134 to i32
+%r137 = getelementptr i32, i32* %r94, i32 10
+store i32 %r135, i32* %r137
+%r138 = lshr i384 %r134, 32
+%r139 = trunc i384 %r138 to i32
+%r141 = getelementptr i32, i32* %r94, i32 11
+store i32 %r139, i32* %r141
+ret void
+}
+define void @mcl_fp_mont11L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i384 @mulPv352x32(i32* %r2, i32 %r10)
+%r12 = zext i384 %r11 to i416
+%r13 = trunc i384 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i384 @mulPv352x32(i32* %r4, i32 %r14)
+%r16 = zext i384 %r15 to i416
+%r17 = add i416 %r12, %r16
+%r18 = lshr i416 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i384 @mulPv352x32(i32* %r2, i32 %r21)
+%r23 = zext i384 %r22 to i416
+%r24 = add i416 %r18, %r23
+%r25 = trunc i416 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i384 @mulPv352x32(i32* %r4, i32 %r26)
+%r28 = zext i384 %r27 to i416
+%r29 = add i416 %r24, %r28
+%r30 = lshr i416 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i384 @mulPv352x32(i32* %r2, i32 %r33)
+%r35 = zext i384 %r34 to i416
+%r36 = add i416 %r30, %r35
+%r37 = trunc i416 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i384 @mulPv352x32(i32* %r4, i32 %r38)
+%r40 = zext i384 %r39 to i416
+%r41 = add i416 %r36, %r40
+%r42 = lshr i416 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i384 @mulPv352x32(i32* %r2, i32 %r45)
+%r47 = zext i384 %r46 to i416
+%r48 = add i416 %r42, %r47
+%r49 = trunc i416 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i384 @mulPv352x32(i32* %r4, i32 %r50)
+%r52 = zext i384 %r51 to i416
+%r53 = add i416 %r48, %r52
+%r54 = lshr i416 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i384 @mulPv352x32(i32* %r2, i32 %r57)
+%r59 = zext i384 %r58 to i416
+%r60 = add i416 %r54, %r59
+%r61 = trunc i416 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i384 @mulPv352x32(i32* %r4, i32 %r62)
+%r64 = zext i384 %r63 to i416
+%r65 = add i416 %r60, %r64
+%r66 = lshr i416 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i384 @mulPv352x32(i32* %r2, i32 %r69)
+%r71 = zext i384 %r70 to i416
+%r72 = add i416 %r66, %r71
+%r73 = trunc i416 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i384 @mulPv352x32(i32* %r4, i32 %r74)
+%r76 = zext i384 %r75 to i416
+%r77 = add i416 %r72, %r76
+%r78 = lshr i416 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i384 @mulPv352x32(i32* %r2, i32 %r81)
+%r83 = zext i384 %r82 to i416
+%r84 = add i416 %r78, %r83
+%r85 = trunc i416 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i384 @mulPv352x32(i32* %r4, i32 %r86)
+%r88 = zext i384 %r87 to i416
+%r89 = add i416 %r84, %r88
+%r90 = lshr i416 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i384 @mulPv352x32(i32* %r2, i32 %r93)
+%r95 = zext i384 %r94 to i416
+%r96 = add i416 %r90, %r95
+%r97 = trunc i416 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i384 @mulPv352x32(i32* %r4, i32 %r98)
+%r100 = zext i384 %r99 to i416
+%r101 = add i416 %r96, %r100
+%r102 = lshr i416 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i384 @mulPv352x32(i32* %r2, i32 %r105)
+%r107 = zext i384 %r106 to i416
+%r108 = add i416 %r102, %r107
+%r109 = trunc i416 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i384 @mulPv352x32(i32* %r4, i32 %r110)
+%r112 = zext i384 %r111 to i416
+%r113 = add i416 %r108, %r112
+%r114 = lshr i416 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 9
+%r117 = load i32, i32* %r116
+%r118 = call i384 @mulPv352x32(i32* %r2, i32 %r117)
+%r119 = zext i384 %r118 to i416
+%r120 = add i416 %r114, %r119
+%r121 = trunc i416 %r120 to i32
+%r122 = mul i32 %r121, %r7
+%r123 = call i384 @mulPv352x32(i32* %r4, i32 %r122)
+%r124 = zext i384 %r123 to i416
+%r125 = add i416 %r120, %r124
+%r126 = lshr i416 %r125, 32
+%r128 = getelementptr i32, i32* %r3, i32 10
+%r129 = load i32, i32* %r128
+%r130 = call i384 @mulPv352x32(i32* %r2, i32 %r129)
+%r131 = zext i384 %r130 to i416
+%r132 = add i416 %r126, %r131
+%r133 = trunc i416 %r132 to i32
+%r134 = mul i32 %r133, %r7
+%r135 = call i384 @mulPv352x32(i32* %r4, i32 %r134)
+%r136 = zext i384 %r135 to i416
+%r137 = add i416 %r132, %r136
+%r138 = lshr i416 %r137, 32
+%r139 = trunc i416 %r138 to i384
+%r140 = load i32, i32* %r4
+%r141 = zext i32 %r140 to i64
+%r143 = getelementptr i32, i32* %r4, i32 1
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i64
+%r146 = shl i64 %r145, 32
+%r147 = or i64 %r141, %r146
+%r148 = zext i64 %r147 to i96
+%r150 = getelementptr i32, i32* %r4, i32 2
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i96
+%r153 = shl i96 %r152, 64
+%r154 = or i96 %r148, %r153
+%r155 = zext i96 %r154 to i128
+%r157 = getelementptr i32, i32* %r4, i32 3
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i128
+%r160 = shl i128 %r159, 96
+%r161 = or i128 %r155, %r160
+%r162 = zext i128 %r161 to i160
+%r164 = getelementptr i32, i32* %r4, i32 4
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i160
+%r167 = shl i160 %r166, 128
+%r168 = or i160 %r162, %r167
+%r169 = zext i160 %r168 to i192
+%r171 = getelementptr i32, i32* %r4, i32 5
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i192
+%r174 = shl i192 %r173, 160
+%r175 = or i192 %r169, %r174
+%r176 = zext i192 %r175 to i224
+%r178 = getelementptr i32, i32* %r4, i32 6
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i224
+%r181 = shl i224 %r180, 192
+%r182 = or i224 %r176, %r181
+%r183 = zext i224 %r182 to i256
+%r185 = getelementptr i32, i32* %r4, i32 7
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i256
+%r188 = shl i256 %r187, 224
+%r189 = or i256 %r183, %r188
+%r190 = zext i256 %r189 to i288
+%r192 = getelementptr i32, i32* %r4, i32 8
+%r193 = load i32, i32* %r192
+%r194 = zext i32 %r193 to i288
+%r195 = shl i288 %r194, 256
+%r196 = or i288 %r190, %r195
+%r197 = zext i288 %r196 to i320
+%r199 = getelementptr i32, i32* %r4, i32 9
+%r200 = load i32, i32* %r199
+%r201 = zext i32 %r200 to i320
+%r202 = shl i320 %r201, 288
+%r203 = or i320 %r197, %r202
+%r204 = zext i320 %r203 to i352
+%r206 = getelementptr i32, i32* %r4, i32 10
+%r207 = load i32, i32* %r206
+%r208 = zext i32 %r207 to i352
+%r209 = shl i352 %r208, 320
+%r210 = or i352 %r204, %r209
+%r211 = zext i352 %r210 to i384
+%r212 = sub i384 %r139, %r211
+%r213 = lshr i384 %r212, 352
+%r214 = trunc i384 %r213 to i1
+%r215 = select i1 %r214, i384 %r139, i384 %r212
+%r216 = trunc i384 %r215 to i352
+%r217 = trunc i352 %r216 to i32
+%r219 = getelementptr i32, i32* %r1, i32 0
+store i32 %r217, i32* %r219
+%r220 = lshr i352 %r216, 32
+%r221 = trunc i352 %r220 to i32
+%r223 = getelementptr i32, i32* %r1, i32 1
+store i32 %r221, i32* %r223
+%r224 = lshr i352 %r220, 32
+%r225 = trunc i352 %r224 to i32
+%r227 = getelementptr i32, i32* %r1, i32 2
+store i32 %r225, i32* %r227
+%r228 = lshr i352 %r224, 32
+%r229 = trunc i352 %r228 to i32
+%r231 = getelementptr i32, i32* %r1, i32 3
+store i32 %r229, i32* %r231
+%r232 = lshr i352 %r228, 32
+%r233 = trunc i352 %r232 to i32
+%r235 = getelementptr i32, i32* %r1, i32 4
+store i32 %r233, i32* %r235
+%r236 = lshr i352 %r232, 32
+%r237 = trunc i352 %r236 to i32
+%r239 = getelementptr i32, i32* %r1, i32 5
+store i32 %r237, i32* %r239
+%r240 = lshr i352 %r236, 32
+%r241 = trunc i352 %r240 to i32
+%r243 = getelementptr i32, i32* %r1, i32 6
+store i32 %r241, i32* %r243
+%r244 = lshr i352 %r240, 32
+%r245 = trunc i352 %r244 to i32
+%r247 = getelementptr i32, i32* %r1, i32 7
+store i32 %r245, i32* %r247
+%r248 = lshr i352 %r244, 32
+%r249 = trunc i352 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 8
+store i32 %r249, i32* %r251
+%r252 = lshr i352 %r248, 32
+%r253 = trunc i352 %r252 to i32
+%r255 = getelementptr i32, i32* %r1, i32 9
+store i32 %r253, i32* %r255
+%r256 = lshr i352 %r252, 32
+%r257 = trunc i352 %r256 to i32
+%r259 = getelementptr i32, i32* %r1, i32 10
+store i32 %r257, i32* %r259
+ret void
+}
+define void @mcl_fp_montNF11L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i384 @mulPv352x32(i32* %r2, i32 %r8)
+%r10 = trunc i384 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i384 @mulPv352x32(i32* %r4, i32 %r11)
+%r13 = add i384 %r9, %r12
+%r14 = lshr i384 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i384 @mulPv352x32(i32* %r2, i32 %r17)
+%r19 = add i384 %r14, %r18
+%r20 = trunc i384 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i384 @mulPv352x32(i32* %r4, i32 %r21)
+%r23 = add i384 %r19, %r22
+%r24 = lshr i384 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i384 @mulPv352x32(i32* %r2, i32 %r27)
+%r29 = add i384 %r24, %r28
+%r30 = trunc i384 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i384 @mulPv352x32(i32* %r4, i32 %r31)
+%r33 = add i384 %r29, %r32
+%r34 = lshr i384 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i384 @mulPv352x32(i32* %r2, i32 %r37)
+%r39 = add i384 %r34, %r38
+%r40 = trunc i384 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i384 @mulPv352x32(i32* %r4, i32 %r41)
+%r43 = add i384 %r39, %r42
+%r44 = lshr i384 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i384 @mulPv352x32(i32* %r2, i32 %r47)
+%r49 = add i384 %r44, %r48
+%r50 = trunc i384 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i384 @mulPv352x32(i32* %r4, i32 %r51)
+%r53 = add i384 %r49, %r52
+%r54 = lshr i384 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i384 @mulPv352x32(i32* %r2, i32 %r57)
+%r59 = add i384 %r54, %r58
+%r60 = trunc i384 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i384 @mulPv352x32(i32* %r4, i32 %r61)
+%r63 = add i384 %r59, %r62
+%r64 = lshr i384 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i384 @mulPv352x32(i32* %r2, i32 %r67)
+%r69 = add i384 %r64, %r68
+%r70 = trunc i384 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i384 @mulPv352x32(i32* %r4, i32 %r71)
+%r73 = add i384 %r69, %r72
+%r74 = lshr i384 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i384 @mulPv352x32(i32* %r2, i32 %r77)
+%r79 = add i384 %r74, %r78
+%r80 = trunc i384 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i384 @mulPv352x32(i32* %r4, i32 %r81)
+%r83 = add i384 %r79, %r82
+%r84 = lshr i384 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i384 @mulPv352x32(i32* %r2, i32 %r87)
+%r89 = add i384 %r84, %r88
+%r90 = trunc i384 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i384 @mulPv352x32(i32* %r4, i32 %r91)
+%r93 = add i384 %r89, %r92
+%r94 = lshr i384 %r93, 32
+%r96 = getelementptr i32, i32* %r3, i32 9
+%r97 = load i32, i32* %r96
+%r98 = call i384 @mulPv352x32(i32* %r2, i32 %r97)
+%r99 = add i384 %r94, %r98
+%r100 = trunc i384 %r99 to i32
+%r101 = mul i32 %r100, %r7
+%r102 = call i384 @mulPv352x32(i32* %r4, i32 %r101)
+%r103 = add i384 %r99, %r102
+%r104 = lshr i384 %r103, 32
+%r106 = getelementptr i32, i32* %r3, i32 10
+%r107 = load i32, i32* %r106
+%r108 = call i384 @mulPv352x32(i32* %r2, i32 %r107)
+%r109 = add i384 %r104, %r108
+%r110 = trunc i384 %r109 to i32
+%r111 = mul i32 %r110, %r7
+%r112 = call i384 @mulPv352x32(i32* %r4, i32 %r111)
+%r113 = add i384 %r109, %r112
+%r114 = lshr i384 %r113, 32
+%r115 = trunc i384 %r114 to i352
+%r116 = load i32, i32* %r4
+%r117 = zext i32 %r116 to i64
+%r119 = getelementptr i32, i32* %r4, i32 1
+%r120 = load i32, i32* %r119
+%r121 = zext i32 %r120 to i64
+%r122 = shl i64 %r121, 32
+%r123 = or i64 %r117, %r122
+%r124 = zext i64 %r123 to i96
+%r126 = getelementptr i32, i32* %r4, i32 2
+%r127 = load i32, i32* %r126
+%r128 = zext i32 %r127 to i96
+%r129 = shl i96 %r128, 64
+%r130 = or i96 %r124, %r129
+%r131 = zext i96 %r130 to i128
+%r133 = getelementptr i32, i32* %r4, i32 3
+%r134 = load i32, i32* %r133
+%r135 = zext i32 %r134 to i128
+%r136 = shl i128 %r135, 96
+%r137 = or i128 %r131, %r136
+%r138 = zext i128 %r137 to i160
+%r140 = getelementptr i32, i32* %r4, i32 4
+%r141 = load i32, i32* %r140
+%r142 = zext i32 %r141 to i160
+%r143 = shl i160 %r142, 128
+%r144 = or i160 %r138, %r143
+%r145 = zext i160 %r144 to i192
+%r147 = getelementptr i32, i32* %r4, i32 5
+%r148 = load i32, i32* %r147
+%r149 = zext i32 %r148 to i192
+%r150 = shl i192 %r149, 160
+%r151 = or i192 %r145, %r150
+%r152 = zext i192 %r151 to i224
+%r154 = getelementptr i32, i32* %r4, i32 6
+%r155 = load i32, i32* %r154
+%r156 = zext i32 %r155 to i224
+%r157 = shl i224 %r156, 192
+%r158 = or i224 %r152, %r157
+%r159 = zext i224 %r158 to i256
+%r161 = getelementptr i32, i32* %r4, i32 7
+%r162 = load i32, i32* %r161
+%r163 = zext i32 %r162 to i256
+%r164 = shl i256 %r163, 224
+%r165 = or i256 %r159, %r164
+%r166 = zext i256 %r165 to i288
+%r168 = getelementptr i32, i32* %r4, i32 8
+%r169 = load i32, i32* %r168
+%r170 = zext i32 %r169 to i288
+%r171 = shl i288 %r170, 256
+%r172 = or i288 %r166, %r171
+%r173 = zext i288 %r172 to i320
+%r175 = getelementptr i32, i32* %r4, i32 9
+%r176 = load i32, i32* %r175
+%r177 = zext i32 %r176 to i320
+%r178 = shl i320 %r177, 288
+%r179 = or i320 %r173, %r178
+%r180 = zext i320 %r179 to i352
+%r182 = getelementptr i32, i32* %r4, i32 10
+%r183 = load i32, i32* %r182
+%r184 = zext i32 %r183 to i352
+%r185 = shl i352 %r184, 320
+%r186 = or i352 %r180, %r185
+%r187 = sub i352 %r115, %r186
+%r188 = lshr i352 %r187, 351
+%r189 = trunc i352 %r188 to i1
+%r190 = select i1 %r189, i352 %r115, i352 %r187
+%r191 = trunc i352 %r190 to i32
+%r193 = getelementptr i32, i32* %r1, i32 0
+store i32 %r191, i32* %r193
+%r194 = lshr i352 %r190, 32
+%r195 = trunc i352 %r194 to i32
+%r197 = getelementptr i32, i32* %r1, i32 1
+store i32 %r195, i32* %r197
+%r198 = lshr i352 %r194, 32
+%r199 = trunc i352 %r198 to i32
+%r201 = getelementptr i32, i32* %r1, i32 2
+store i32 %r199, i32* %r201
+%r202 = lshr i352 %r198, 32
+%r203 = trunc i352 %r202 to i32
+%r205 = getelementptr i32, i32* %r1, i32 3
+store i32 %r203, i32* %r205
+%r206 = lshr i352 %r202, 32
+%r207 = trunc i352 %r206 to i32
+%r209 = getelementptr i32, i32* %r1, i32 4
+store i32 %r207, i32* %r209
+%r210 = lshr i352 %r206, 32
+%r211 = trunc i352 %r210 to i32
+%r213 = getelementptr i32, i32* %r1, i32 5
+store i32 %r211, i32* %r213
+%r214 = lshr i352 %r210, 32
+%r215 = trunc i352 %r214 to i32
+%r217 = getelementptr i32, i32* %r1, i32 6
+store i32 %r215, i32* %r217
+%r218 = lshr i352 %r214, 32
+%r219 = trunc i352 %r218 to i32
+%r221 = getelementptr i32, i32* %r1, i32 7
+store i32 %r219, i32* %r221
+%r222 = lshr i352 %r218, 32
+%r223 = trunc i352 %r222 to i32
+%r225 = getelementptr i32, i32* %r1, i32 8
+store i32 %r223, i32* %r225
+%r226 = lshr i352 %r222, 32
+%r227 = trunc i352 %r226 to i32
+%r229 = getelementptr i32, i32* %r1, i32 9
+store i32 %r227, i32* %r229
+%r230 = lshr i352 %r226, 32
+%r231 = trunc i352 %r230 to i32
+%r233 = getelementptr i32, i32* %r1, i32 10
+store i32 %r231, i32* %r233
+ret void
+}
+define void @mcl_fp_montRed11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i352
+%r73 = getelementptr i32, i32* %r3, i32 10
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i352
+%r76 = shl i352 %r75, 320
+%r77 = or i352 %r71, %r76
+%r78 = load i32, i32* %r2
+%r79 = zext i32 %r78 to i64
+%r81 = getelementptr i32, i32* %r2, i32 1
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i64
+%r84 = shl i64 %r83, 32
+%r85 = or i64 %r79, %r84
+%r86 = zext i64 %r85 to i96
+%r88 = getelementptr i32, i32* %r2, i32 2
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i96
+%r91 = shl i96 %r90, 64
+%r92 = or i96 %r86, %r91
+%r93 = zext i96 %r92 to i128
+%r95 = getelementptr i32, i32* %r2, i32 3
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i128
+%r98 = shl i128 %r97, 96
+%r99 = or i128 %r93, %r98
+%r100 = zext i128 %r99 to i160
+%r102 = getelementptr i32, i32* %r2, i32 4
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i160
+%r105 = shl i160 %r104, 128
+%r106 = or i160 %r100, %r105
+%r107 = zext i160 %r106 to i192
+%r109 = getelementptr i32, i32* %r2, i32 5
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i192
+%r112 = shl i192 %r111, 160
+%r113 = or i192 %r107, %r112
+%r114 = zext i192 %r113 to i224
+%r116 = getelementptr i32, i32* %r2, i32 6
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i224
+%r119 = shl i224 %r118, 192
+%r120 = or i224 %r114, %r119
+%r121 = zext i224 %r120 to i256
+%r123 = getelementptr i32, i32* %r2, i32 7
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i256
+%r126 = shl i256 %r125, 224
+%r127 = or i256 %r121, %r126
+%r128 = zext i256 %r127 to i288
+%r130 = getelementptr i32, i32* %r2, i32 8
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i288
+%r133 = shl i288 %r132, 256
+%r134 = or i288 %r128, %r133
+%r135 = zext i288 %r134 to i320
+%r137 = getelementptr i32, i32* %r2, i32 9
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i320
+%r140 = shl i320 %r139, 288
+%r141 = or i320 %r135, %r140
+%r142 = zext i320 %r141 to i352
+%r144 = getelementptr i32, i32* %r2, i32 10
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i352
+%r147 = shl i352 %r146, 320
+%r148 = or i352 %r142, %r147
+%r149 = zext i352 %r148 to i384
+%r151 = getelementptr i32, i32* %r2, i32 11
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i384
+%r154 = shl i384 %r153, 352
+%r155 = or i384 %r149, %r154
+%r156 = zext i384 %r155 to i416
+%r158 = getelementptr i32, i32* %r2, i32 12
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i416
+%r161 = shl i416 %r160, 384
+%r162 = or i416 %r156, %r161
+%r163 = zext i416 %r162 to i448
+%r165 = getelementptr i32, i32* %r2, i32 13
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i448
+%r168 = shl i448 %r167, 416
+%r169 = or i448 %r163, %r168
+%r170 = zext i448 %r169 to i480
+%r172 = getelementptr i32, i32* %r2, i32 14
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i480
+%r175 = shl i480 %r174, 448
+%r176 = or i480 %r170, %r175
+%r177 = zext i480 %r176 to i512
+%r179 = getelementptr i32, i32* %r2, i32 15
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i512
+%r182 = shl i512 %r181, 480
+%r183 = or i512 %r177, %r182
+%r184 = zext i512 %r183 to i544
+%r186 = getelementptr i32, i32* %r2, i32 16
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i544
+%r189 = shl i544 %r188, 512
+%r190 = or i544 %r184, %r189
+%r191 = zext i544 %r190 to i576
+%r193 = getelementptr i32, i32* %r2, i32 17
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i576
+%r196 = shl i576 %r195, 544
+%r197 = or i576 %r191, %r196
+%r198 = zext i576 %r197 to i608
+%r200 = getelementptr i32, i32* %r2, i32 18
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i608
+%r203 = shl i608 %r202, 576
+%r204 = or i608 %r198, %r203
+%r205 = zext i608 %r204 to i640
+%r207 = getelementptr i32, i32* %r2, i32 19
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i640
+%r210 = shl i640 %r209, 608
+%r211 = or i640 %r205, %r210
+%r212 = zext i640 %r211 to i672
+%r214 = getelementptr i32, i32* %r2, i32 20
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i672
+%r217 = shl i672 %r216, 640
+%r218 = or i672 %r212, %r217
+%r219 = zext i672 %r218 to i704
+%r221 = getelementptr i32, i32* %r2, i32 21
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i704
+%r224 = shl i704 %r223, 672
+%r225 = or i704 %r219, %r224
+%r226 = zext i704 %r225 to i736
+%r227 = trunc i736 %r226 to i32
+%r228 = mul i32 %r227, %r6
+%r229 = call i384 @mulPv352x32(i32* %r3, i32 %r228)
+%r230 = zext i384 %r229 to i736
+%r231 = add i736 %r226, %r230
+%r232 = lshr i736 %r231, 32
+%r233 = trunc i736 %r232 to i704
+%r234 = trunc i704 %r233 to i32
+%r235 = mul i32 %r234, %r6
+%r236 = call i384 @mulPv352x32(i32* %r3, i32 %r235)
+%r237 = zext i384 %r236 to i704
+%r238 = add i704 %r233, %r237
+%r239 = lshr i704 %r238, 32
+%r240 = trunc i704 %r239 to i672
+%r241 = trunc i672 %r240 to i32
+%r242 = mul i32 %r241, %r6
+%r243 = call i384 @mulPv352x32(i32* %r3, i32 %r242)
+%r244 = zext i384 %r243 to i672
+%r245 = add i672 %r240, %r244
+%r246 = lshr i672 %r245, 32
+%r247 = trunc i672 %r246 to i640
+%r248 = trunc i640 %r247 to i32
+%r249 = mul i32 %r248, %r6
+%r250 = call i384 @mulPv352x32(i32* %r3, i32 %r249)
+%r251 = zext i384 %r250 to i640
+%r252 = add i640 %r247, %r251
+%r253 = lshr i640 %r252, 32
+%r254 = trunc i640 %r253 to i608
+%r255 = trunc i608 %r254 to i32
+%r256 = mul i32 %r255, %r6
+%r257 = call i384 @mulPv352x32(i32* %r3, i32 %r256)
+%r258 = zext i384 %r257 to i608
+%r259 = add i608 %r254, %r258
+%r260 = lshr i608 %r259, 32
+%r261 = trunc i608 %r260 to i576
+%r262 = trunc i576 %r261 to i32
+%r263 = mul i32 %r262, %r6
+%r264 = call i384 @mulPv352x32(i32* %r3, i32 %r263)
+%r265 = zext i384 %r264 to i576
+%r266 = add i576 %r261, %r265
+%r267 = lshr i576 %r266, 32
+%r268 = trunc i576 %r267 to i544
+%r269 = trunc i544 %r268 to i32
+%r270 = mul i32 %r269, %r6
+%r271 = call i384 @mulPv352x32(i32* %r3, i32 %r270)
+%r272 = zext i384 %r271 to i544
+%r273 = add i544 %r268, %r272
+%r274 = lshr i544 %r273, 32
+%r275 = trunc i544 %r274 to i512
+%r276 = trunc i512 %r275 to i32
+%r277 = mul i32 %r276, %r6
+%r278 = call i384 @mulPv352x32(i32* %r3, i32 %r277)
+%r279 = zext i384 %r278 to i512
+%r280 = add i512 %r275, %r279
+%r281 = lshr i512 %r280, 32
+%r282 = trunc i512 %r281 to i480
+%r283 = trunc i480 %r282 to i32
+%r284 = mul i32 %r283, %r6
+%r285 = call i384 @mulPv352x32(i32* %r3, i32 %r284)
+%r286 = zext i384 %r285 to i480
+%r287 = add i480 %r282, %r286
+%r288 = lshr i480 %r287, 32
+%r289 = trunc i480 %r288 to i448
+%r290 = trunc i448 %r289 to i32
+%r291 = mul i32 %r290, %r6
+%r292 = call i384 @mulPv352x32(i32* %r3, i32 %r291)
+%r293 = zext i384 %r292 to i448
+%r294 = add i448 %r289, %r293
+%r295 = lshr i448 %r294, 32
+%r296 = trunc i448 %r295 to i416
+%r297 = trunc i416 %r296 to i32
+%r298 = mul i32 %r297, %r6
+%r299 = call i384 @mulPv352x32(i32* %r3, i32 %r298)
+%r300 = zext i384 %r299 to i416
+%r301 = add i416 %r296, %r300
+%r302 = lshr i416 %r301, 32
+%r303 = trunc i416 %r302 to i384
+%r304 = zext i352 %r77 to i384
+%r305 = sub i384 %r303, %r304
+%r306 = lshr i384 %r305, 352
+%r307 = trunc i384 %r306 to i1
+%r308 = select i1 %r307, i384 %r303, i384 %r305
+%r309 = trunc i384 %r308 to i352
+%r310 = trunc i352 %r309 to i32
+%r312 = getelementptr i32, i32* %r1, i32 0
+store i32 %r310, i32* %r312
+%r313 = lshr i352 %r309, 32
+%r314 = trunc i352 %r313 to i32
+%r316 = getelementptr i32, i32* %r1, i32 1
+store i32 %r314, i32* %r316
+%r317 = lshr i352 %r313, 32
+%r318 = trunc i352 %r317 to i32
+%r320 = getelementptr i32, i32* %r1, i32 2
+store i32 %r318, i32* %r320
+%r321 = lshr i352 %r317, 32
+%r322 = trunc i352 %r321 to i32
+%r324 = getelementptr i32, i32* %r1, i32 3
+store i32 %r322, i32* %r324
+%r325 = lshr i352 %r321, 32
+%r326 = trunc i352 %r325 to i32
+%r328 = getelementptr i32, i32* %r1, i32 4
+store i32 %r326, i32* %r328
+%r329 = lshr i352 %r325, 32
+%r330 = trunc i352 %r329 to i32
+%r332 = getelementptr i32, i32* %r1, i32 5
+store i32 %r330, i32* %r332
+%r333 = lshr i352 %r329, 32
+%r334 = trunc i352 %r333 to i32
+%r336 = getelementptr i32, i32* %r1, i32 6
+store i32 %r334, i32* %r336
+%r337 = lshr i352 %r333, 32
+%r338 = trunc i352 %r337 to i32
+%r340 = getelementptr i32, i32* %r1, i32 7
+store i32 %r338, i32* %r340
+%r341 = lshr i352 %r337, 32
+%r342 = trunc i352 %r341 to i32
+%r344 = getelementptr i32, i32* %r1, i32 8
+store i32 %r342, i32* %r344
+%r345 = lshr i352 %r341, 32
+%r346 = trunc i352 %r345 to i32
+%r348 = getelementptr i32, i32* %r1, i32 9
+store i32 %r346, i32* %r348
+%r349 = lshr i352 %r345, 32
+%r350 = trunc i352 %r349 to i32
+%r352 = getelementptr i32, i32* %r1, i32 10
+store i32 %r350, i32* %r352
+ret void
+}
+define i32 @mcl_fp_addPre11L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r77 = load i32, i32* %r4
+%r78 = zext i32 %r77 to i64
+%r80 = getelementptr i32, i32* %r4, i32 1
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i64
+%r83 = shl i64 %r82, 32
+%r84 = or i64 %r78, %r83
+%r85 = zext i64 %r84 to i96
+%r87 = getelementptr i32, i32* %r4, i32 2
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i96
+%r90 = shl i96 %r89, 64
+%r91 = or i96 %r85, %r90
+%r92 = zext i96 %r91 to i128
+%r94 = getelementptr i32, i32* %r4, i32 3
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i128
+%r97 = shl i128 %r96, 96
+%r98 = or i128 %r92, %r97
+%r99 = zext i128 %r98 to i160
+%r101 = getelementptr i32, i32* %r4, i32 4
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i160
+%r104 = shl i160 %r103, 128
+%r105 = or i160 %r99, %r104
+%r106 = zext i160 %r105 to i192
+%r108 = getelementptr i32, i32* %r4, i32 5
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i192
+%r111 = shl i192 %r110, 160
+%r112 = or i192 %r106, %r111
+%r113 = zext i192 %r112 to i224
+%r115 = getelementptr i32, i32* %r4, i32 6
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i224
+%r118 = shl i224 %r117, 192
+%r119 = or i224 %r113, %r118
+%r120 = zext i224 %r119 to i256
+%r122 = getelementptr i32, i32* %r4, i32 7
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i256
+%r125 = shl i256 %r124, 224
+%r126 = or i256 %r120, %r125
+%r127 = zext i256 %r126 to i288
+%r129 = getelementptr i32, i32* %r4, i32 8
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i288
+%r132 = shl i288 %r131, 256
+%r133 = or i288 %r127, %r132
+%r134 = zext i288 %r133 to i320
+%r136 = getelementptr i32, i32* %r4, i32 9
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i320
+%r139 = shl i320 %r138, 288
+%r140 = or i320 %r134, %r139
+%r141 = zext i320 %r140 to i352
+%r143 = getelementptr i32, i32* %r4, i32 10
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i352
+%r146 = shl i352 %r145, 320
+%r147 = or i352 %r141, %r146
+%r148 = zext i352 %r147 to i384
+%r149 = add i384 %r76, %r148
+%r150 = trunc i384 %r149 to i352
+%r151 = trunc i352 %r150 to i32
+%r153 = getelementptr i32, i32* %r2, i32 0
+store i32 %r151, i32* %r153
+%r154 = lshr i352 %r150, 32
+%r155 = trunc i352 %r154 to i32
+%r157 = getelementptr i32, i32* %r2, i32 1
+store i32 %r155, i32* %r157
+%r158 = lshr i352 %r154, 32
+%r159 = trunc i352 %r158 to i32
+%r161 = getelementptr i32, i32* %r2, i32 2
+store i32 %r159, i32* %r161
+%r162 = lshr i352 %r158, 32
+%r163 = trunc i352 %r162 to i32
+%r165 = getelementptr i32, i32* %r2, i32 3
+store i32 %r163, i32* %r165
+%r166 = lshr i352 %r162, 32
+%r167 = trunc i352 %r166 to i32
+%r169 = getelementptr i32, i32* %r2, i32 4
+store i32 %r167, i32* %r169
+%r170 = lshr i352 %r166, 32
+%r171 = trunc i352 %r170 to i32
+%r173 = getelementptr i32, i32* %r2, i32 5
+store i32 %r171, i32* %r173
+%r174 = lshr i352 %r170, 32
+%r175 = trunc i352 %r174 to i32
+%r177 = getelementptr i32, i32* %r2, i32 6
+store i32 %r175, i32* %r177
+%r178 = lshr i352 %r174, 32
+%r179 = trunc i352 %r178 to i32
+%r181 = getelementptr i32, i32* %r2, i32 7
+store i32 %r179, i32* %r181
+%r182 = lshr i352 %r178, 32
+%r183 = trunc i352 %r182 to i32
+%r185 = getelementptr i32, i32* %r2, i32 8
+store i32 %r183, i32* %r185
+%r186 = lshr i352 %r182, 32
+%r187 = trunc i352 %r186 to i32
+%r189 = getelementptr i32, i32* %r2, i32 9
+store i32 %r187, i32* %r189
+%r190 = lshr i352 %r186, 32
+%r191 = trunc i352 %r190 to i32
+%r193 = getelementptr i32, i32* %r2, i32 10
+store i32 %r191, i32* %r193
+%r194 = lshr i384 %r149, 352
+%r195 = trunc i384 %r194 to i32
+ret i32 %r195
+}
+define i32 @mcl_fp_subPre11L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r77 = load i32, i32* %r4
+%r78 = zext i32 %r77 to i64
+%r80 = getelementptr i32, i32* %r4, i32 1
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i64
+%r83 = shl i64 %r82, 32
+%r84 = or i64 %r78, %r83
+%r85 = zext i64 %r84 to i96
+%r87 = getelementptr i32, i32* %r4, i32 2
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i96
+%r90 = shl i96 %r89, 64
+%r91 = or i96 %r85, %r90
+%r92 = zext i96 %r91 to i128
+%r94 = getelementptr i32, i32* %r4, i32 3
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i128
+%r97 = shl i128 %r96, 96
+%r98 = or i128 %r92, %r97
+%r99 = zext i128 %r98 to i160
+%r101 = getelementptr i32, i32* %r4, i32 4
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i160
+%r104 = shl i160 %r103, 128
+%r105 = or i160 %r99, %r104
+%r106 = zext i160 %r105 to i192
+%r108 = getelementptr i32, i32* %r4, i32 5
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i192
+%r111 = shl i192 %r110, 160
+%r112 = or i192 %r106, %r111
+%r113 = zext i192 %r112 to i224
+%r115 = getelementptr i32, i32* %r4, i32 6
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i224
+%r118 = shl i224 %r117, 192
+%r119 = or i224 %r113, %r118
+%r120 = zext i224 %r119 to i256
+%r122 = getelementptr i32, i32* %r4, i32 7
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i256
+%r125 = shl i256 %r124, 224
+%r126 = or i256 %r120, %r125
+%r127 = zext i256 %r126 to i288
+%r129 = getelementptr i32, i32* %r4, i32 8
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i288
+%r132 = shl i288 %r131, 256
+%r133 = or i288 %r127, %r132
+%r134 = zext i288 %r133 to i320
+%r136 = getelementptr i32, i32* %r4, i32 9
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i320
+%r139 = shl i320 %r138, 288
+%r140 = or i320 %r134, %r139
+%r141 = zext i320 %r140 to i352
+%r143 = getelementptr i32, i32* %r4, i32 10
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i352
+%r146 = shl i352 %r145, 320
+%r147 = or i352 %r141, %r146
+%r148 = zext i352 %r147 to i384
+%r149 = sub i384 %r76, %r148
+%r150 = trunc i384 %r149 to i352
+%r151 = trunc i352 %r150 to i32
+%r153 = getelementptr i32, i32* %r2, i32 0
+store i32 %r151, i32* %r153
+%r154 = lshr i352 %r150, 32
+%r155 = trunc i352 %r154 to i32
+%r157 = getelementptr i32, i32* %r2, i32 1
+store i32 %r155, i32* %r157
+%r158 = lshr i352 %r154, 32
+%r159 = trunc i352 %r158 to i32
+%r161 = getelementptr i32, i32* %r2, i32 2
+store i32 %r159, i32* %r161
+%r162 = lshr i352 %r158, 32
+%r163 = trunc i352 %r162 to i32
+%r165 = getelementptr i32, i32* %r2, i32 3
+store i32 %r163, i32* %r165
+%r166 = lshr i352 %r162, 32
+%r167 = trunc i352 %r166 to i32
+%r169 = getelementptr i32, i32* %r2, i32 4
+store i32 %r167, i32* %r169
+%r170 = lshr i352 %r166, 32
+%r171 = trunc i352 %r170 to i32
+%r173 = getelementptr i32, i32* %r2, i32 5
+store i32 %r171, i32* %r173
+%r174 = lshr i352 %r170, 32
+%r175 = trunc i352 %r174 to i32
+%r177 = getelementptr i32, i32* %r2, i32 6
+store i32 %r175, i32* %r177
+%r178 = lshr i352 %r174, 32
+%r179 = trunc i352 %r178 to i32
+%r181 = getelementptr i32, i32* %r2, i32 7
+store i32 %r179, i32* %r181
+%r182 = lshr i352 %r178, 32
+%r183 = trunc i352 %r182 to i32
+%r185 = getelementptr i32, i32* %r2, i32 8
+store i32 %r183, i32* %r185
+%r186 = lshr i352 %r182, 32
+%r187 = trunc i352 %r186 to i32
+%r189 = getelementptr i32, i32* %r2, i32 9
+store i32 %r187, i32* %r189
+%r190 = lshr i352 %r186, 32
+%r191 = trunc i352 %r190 to i32
+%r193 = getelementptr i32, i32* %r2, i32 10
+store i32 %r191, i32* %r193
+%r194 = lshr i384 %r149, 352
+%r195 = trunc i384 %r194 to i32
+%r197 = and i32 %r195, 1
+ret i32 %r197
+}
+define void @mcl_fp_shr1_11L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = zext i256 %r52 to i288
+%r55 = getelementptr i32, i32* %r2, i32 8
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i288
+%r58 = shl i288 %r57, 256
+%r59 = or i288 %r53, %r58
+%r60 = zext i288 %r59 to i320
+%r62 = getelementptr i32, i32* %r2, i32 9
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i320
+%r65 = shl i320 %r64, 288
+%r66 = or i320 %r60, %r65
+%r67 = zext i320 %r66 to i352
+%r69 = getelementptr i32, i32* %r2, i32 10
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i352
+%r72 = shl i352 %r71, 320
+%r73 = or i352 %r67, %r72
+%r74 = lshr i352 %r73, 1
+%r75 = trunc i352 %r74 to i32
+%r77 = getelementptr i32, i32* %r1, i32 0
+store i32 %r75, i32* %r77
+%r78 = lshr i352 %r74, 32
+%r79 = trunc i352 %r78 to i32
+%r81 = getelementptr i32, i32* %r1, i32 1
+store i32 %r79, i32* %r81
+%r82 = lshr i352 %r78, 32
+%r83 = trunc i352 %r82 to i32
+%r85 = getelementptr i32, i32* %r1, i32 2
+store i32 %r83, i32* %r85
+%r86 = lshr i352 %r82, 32
+%r87 = trunc i352 %r86 to i32
+%r89 = getelementptr i32, i32* %r1, i32 3
+store i32 %r87, i32* %r89
+%r90 = lshr i352 %r86, 32
+%r91 = trunc i352 %r90 to i32
+%r93 = getelementptr i32, i32* %r1, i32 4
+store i32 %r91, i32* %r93
+%r94 = lshr i352 %r90, 32
+%r95 = trunc i352 %r94 to i32
+%r97 = getelementptr i32, i32* %r1, i32 5
+store i32 %r95, i32* %r97
+%r98 = lshr i352 %r94, 32
+%r99 = trunc i352 %r98 to i32
+%r101 = getelementptr i32, i32* %r1, i32 6
+store i32 %r99, i32* %r101
+%r102 = lshr i352 %r98, 32
+%r103 = trunc i352 %r102 to i32
+%r105 = getelementptr i32, i32* %r1, i32 7
+store i32 %r103, i32* %r105
+%r106 = lshr i352 %r102, 32
+%r107 = trunc i352 %r106 to i32
+%r109 = getelementptr i32, i32* %r1, i32 8
+store i32 %r107, i32* %r109
+%r110 = lshr i352 %r106, 32
+%r111 = trunc i352 %r110 to i32
+%r113 = getelementptr i32, i32* %r1, i32 9
+store i32 %r111, i32* %r113
+%r114 = lshr i352 %r110, 32
+%r115 = trunc i352 %r114 to i32
+%r117 = getelementptr i32, i32* %r1, i32 10
+store i32 %r115, i32* %r117
+ret void
+}
+define void @mcl_fp_add11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = load i32, i32* %r3
+%r77 = zext i32 %r76 to i64
+%r79 = getelementptr i32, i32* %r3, i32 1
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i64
+%r82 = shl i64 %r81, 32
+%r83 = or i64 %r77, %r82
+%r84 = zext i64 %r83 to i96
+%r86 = getelementptr i32, i32* %r3, i32 2
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i96
+%r89 = shl i96 %r88, 64
+%r90 = or i96 %r84, %r89
+%r91 = zext i96 %r90 to i128
+%r93 = getelementptr i32, i32* %r3, i32 3
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i128
+%r96 = shl i128 %r95, 96
+%r97 = or i128 %r91, %r96
+%r98 = zext i128 %r97 to i160
+%r100 = getelementptr i32, i32* %r3, i32 4
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i160
+%r103 = shl i160 %r102, 128
+%r104 = or i160 %r98, %r103
+%r105 = zext i160 %r104 to i192
+%r107 = getelementptr i32, i32* %r3, i32 5
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i192
+%r110 = shl i192 %r109, 160
+%r111 = or i192 %r105, %r110
+%r112 = zext i192 %r111 to i224
+%r114 = getelementptr i32, i32* %r3, i32 6
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i224
+%r117 = shl i224 %r116, 192
+%r118 = or i224 %r112, %r117
+%r119 = zext i224 %r118 to i256
+%r121 = getelementptr i32, i32* %r3, i32 7
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i256
+%r124 = shl i256 %r123, 224
+%r125 = or i256 %r119, %r124
+%r126 = zext i256 %r125 to i288
+%r128 = getelementptr i32, i32* %r3, i32 8
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i288
+%r131 = shl i288 %r130, 256
+%r132 = or i288 %r126, %r131
+%r133 = zext i288 %r132 to i320
+%r135 = getelementptr i32, i32* %r3, i32 9
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i320
+%r138 = shl i320 %r137, 288
+%r139 = or i320 %r133, %r138
+%r140 = zext i320 %r139 to i352
+%r142 = getelementptr i32, i32* %r3, i32 10
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i352
+%r145 = shl i352 %r144, 320
+%r146 = or i352 %r140, %r145
+%r147 = zext i352 %r75 to i384
+%r148 = zext i352 %r146 to i384
+%r149 = add i384 %r147, %r148
+%r150 = trunc i384 %r149 to i352
+%r151 = trunc i352 %r150 to i32
+%r153 = getelementptr i32, i32* %r1, i32 0
+store i32 %r151, i32* %r153
+%r154 = lshr i352 %r150, 32
+%r155 = trunc i352 %r154 to i32
+%r157 = getelementptr i32, i32* %r1, i32 1
+store i32 %r155, i32* %r157
+%r158 = lshr i352 %r154, 32
+%r159 = trunc i352 %r158 to i32
+%r161 = getelementptr i32, i32* %r1, i32 2
+store i32 %r159, i32* %r161
+%r162 = lshr i352 %r158, 32
+%r163 = trunc i352 %r162 to i32
+%r165 = getelementptr i32, i32* %r1, i32 3
+store i32 %r163, i32* %r165
+%r166 = lshr i352 %r162, 32
+%r167 = trunc i352 %r166 to i32
+%r169 = getelementptr i32, i32* %r1, i32 4
+store i32 %r167, i32* %r169
+%r170 = lshr i352 %r166, 32
+%r171 = trunc i352 %r170 to i32
+%r173 = getelementptr i32, i32* %r1, i32 5
+store i32 %r171, i32* %r173
+%r174 = lshr i352 %r170, 32
+%r175 = trunc i352 %r174 to i32
+%r177 = getelementptr i32, i32* %r1, i32 6
+store i32 %r175, i32* %r177
+%r178 = lshr i352 %r174, 32
+%r179 = trunc i352 %r178 to i32
+%r181 = getelementptr i32, i32* %r1, i32 7
+store i32 %r179, i32* %r181
+%r182 = lshr i352 %r178, 32
+%r183 = trunc i352 %r182 to i32
+%r185 = getelementptr i32, i32* %r1, i32 8
+store i32 %r183, i32* %r185
+%r186 = lshr i352 %r182, 32
+%r187 = trunc i352 %r186 to i32
+%r189 = getelementptr i32, i32* %r1, i32 9
+store i32 %r187, i32* %r189
+%r190 = lshr i352 %r186, 32
+%r191 = trunc i352 %r190 to i32
+%r193 = getelementptr i32, i32* %r1, i32 10
+store i32 %r191, i32* %r193
+%r194 = load i32, i32* %r4
+%r195 = zext i32 %r194 to i64
+%r197 = getelementptr i32, i32* %r4, i32 1
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i64
+%r200 = shl i64 %r199, 32
+%r201 = or i64 %r195, %r200
+%r202 = zext i64 %r201 to i96
+%r204 = getelementptr i32, i32* %r4, i32 2
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i96
+%r207 = shl i96 %r206, 64
+%r208 = or i96 %r202, %r207
+%r209 = zext i96 %r208 to i128
+%r211 = getelementptr i32, i32* %r4, i32 3
+%r212 = load i32, i32* %r211
+%r213 = zext i32 %r212 to i128
+%r214 = shl i128 %r213, 96
+%r215 = or i128 %r209, %r214
+%r216 = zext i128 %r215 to i160
+%r218 = getelementptr i32, i32* %r4, i32 4
+%r219 = load i32, i32* %r218
+%r220 = zext i32 %r219 to i160
+%r221 = shl i160 %r220, 128
+%r222 = or i160 %r216, %r221
+%r223 = zext i160 %r222 to i192
+%r225 = getelementptr i32, i32* %r4, i32 5
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i192
+%r228 = shl i192 %r227, 160
+%r229 = or i192 %r223, %r228
+%r230 = zext i192 %r229 to i224
+%r232 = getelementptr i32, i32* %r4, i32 6
+%r233 = load i32, i32* %r232
+%r234 = zext i32 %r233 to i224
+%r235 = shl i224 %r234, 192
+%r236 = or i224 %r230, %r235
+%r237 = zext i224 %r236 to i256
+%r239 = getelementptr i32, i32* %r4, i32 7
+%r240 = load i32, i32* %r239
+%r241 = zext i32 %r240 to i256
+%r242 = shl i256 %r241, 224
+%r243 = or i256 %r237, %r242
+%r244 = zext i256 %r243 to i288
+%r246 = getelementptr i32, i32* %r4, i32 8
+%r247 = load i32, i32* %r246
+%r248 = zext i32 %r247 to i288
+%r249 = shl i288 %r248, 256
+%r250 = or i288 %r244, %r249
+%r251 = zext i288 %r250 to i320
+%r253 = getelementptr i32, i32* %r4, i32 9
+%r254 = load i32, i32* %r253
+%r255 = zext i32 %r254 to i320
+%r256 = shl i320 %r255, 288
+%r257 = or i320 %r251, %r256
+%r258 = zext i320 %r257 to i352
+%r260 = getelementptr i32, i32* %r4, i32 10
+%r261 = load i32, i32* %r260
+%r262 = zext i32 %r261 to i352
+%r263 = shl i352 %r262, 320
+%r264 = or i352 %r258, %r263
+%r265 = zext i352 %r264 to i384
+%r266 = sub i384 %r149, %r265
+%r267 = lshr i384 %r266, 352
+%r268 = trunc i384 %r267 to i1
+br i1%r268, label %carry, label %nocarry
+nocarry:
+%r269 = trunc i384 %r266 to i352
+%r270 = trunc i352 %r269 to i32
+%r272 = getelementptr i32, i32* %r1, i32 0
+store i32 %r270, i32* %r272
+%r273 = lshr i352 %r269, 32
+%r274 = trunc i352 %r273 to i32
+%r276 = getelementptr i32, i32* %r1, i32 1
+store i32 %r274, i32* %r276
+%r277 = lshr i352 %r273, 32
+%r278 = trunc i352 %r277 to i32
+%r280 = getelementptr i32, i32* %r1, i32 2
+store i32 %r278, i32* %r280
+%r281 = lshr i352 %r277, 32
+%r282 = trunc i352 %r281 to i32
+%r284 = getelementptr i32, i32* %r1, i32 3
+store i32 %r282, i32* %r284
+%r285 = lshr i352 %r281, 32
+%r286 = trunc i352 %r285 to i32
+%r288 = getelementptr i32, i32* %r1, i32 4
+store i32 %r286, i32* %r288
+%r289 = lshr i352 %r285, 32
+%r290 = trunc i352 %r289 to i32
+%r292 = getelementptr i32, i32* %r1, i32 5
+store i32 %r290, i32* %r292
+%r293 = lshr i352 %r289, 32
+%r294 = trunc i352 %r293 to i32
+%r296 = getelementptr i32, i32* %r1, i32 6
+store i32 %r294, i32* %r296
+%r297 = lshr i352 %r293, 32
+%r298 = trunc i352 %r297 to i32
+%r300 = getelementptr i32, i32* %r1, i32 7
+store i32 %r298, i32* %r300
+%r301 = lshr i352 %r297, 32
+%r302 = trunc i352 %r301 to i32
+%r304 = getelementptr i32, i32* %r1, i32 8
+store i32 %r302, i32* %r304
+%r305 = lshr i352 %r301, 32
+%r306 = trunc i352 %r305 to i32
+%r308 = getelementptr i32, i32* %r1, i32 9
+store i32 %r306, i32* %r308
+%r309 = lshr i352 %r305, 32
+%r310 = trunc i352 %r309 to i32
+%r312 = getelementptr i32, i32* %r1, i32 10
+store i32 %r310, i32* %r312
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = load i32, i32* %r3
+%r77 = zext i32 %r76 to i64
+%r79 = getelementptr i32, i32* %r3, i32 1
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i64
+%r82 = shl i64 %r81, 32
+%r83 = or i64 %r77, %r82
+%r84 = zext i64 %r83 to i96
+%r86 = getelementptr i32, i32* %r3, i32 2
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i96
+%r89 = shl i96 %r88, 64
+%r90 = or i96 %r84, %r89
+%r91 = zext i96 %r90 to i128
+%r93 = getelementptr i32, i32* %r3, i32 3
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i128
+%r96 = shl i128 %r95, 96
+%r97 = or i128 %r91, %r96
+%r98 = zext i128 %r97 to i160
+%r100 = getelementptr i32, i32* %r3, i32 4
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i160
+%r103 = shl i160 %r102, 128
+%r104 = or i160 %r98, %r103
+%r105 = zext i160 %r104 to i192
+%r107 = getelementptr i32, i32* %r3, i32 5
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i192
+%r110 = shl i192 %r109, 160
+%r111 = or i192 %r105, %r110
+%r112 = zext i192 %r111 to i224
+%r114 = getelementptr i32, i32* %r3, i32 6
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i224
+%r117 = shl i224 %r116, 192
+%r118 = or i224 %r112, %r117
+%r119 = zext i224 %r118 to i256
+%r121 = getelementptr i32, i32* %r3, i32 7
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i256
+%r124 = shl i256 %r123, 224
+%r125 = or i256 %r119, %r124
+%r126 = zext i256 %r125 to i288
+%r128 = getelementptr i32, i32* %r3, i32 8
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i288
+%r131 = shl i288 %r130, 256
+%r132 = or i288 %r126, %r131
+%r133 = zext i288 %r132 to i320
+%r135 = getelementptr i32, i32* %r3, i32 9
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i320
+%r138 = shl i320 %r137, 288
+%r139 = or i320 %r133, %r138
+%r140 = zext i320 %r139 to i352
+%r142 = getelementptr i32, i32* %r3, i32 10
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i352
+%r145 = shl i352 %r144, 320
+%r146 = or i352 %r140, %r145
+%r147 = add i352 %r75, %r146
+%r148 = load i32, i32* %r4
+%r149 = zext i32 %r148 to i64
+%r151 = getelementptr i32, i32* %r4, i32 1
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i64
+%r154 = shl i64 %r153, 32
+%r155 = or i64 %r149, %r154
+%r156 = zext i64 %r155 to i96
+%r158 = getelementptr i32, i32* %r4, i32 2
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i96
+%r161 = shl i96 %r160, 64
+%r162 = or i96 %r156, %r161
+%r163 = zext i96 %r162 to i128
+%r165 = getelementptr i32, i32* %r4, i32 3
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i128
+%r168 = shl i128 %r167, 96
+%r169 = or i128 %r163, %r168
+%r170 = zext i128 %r169 to i160
+%r172 = getelementptr i32, i32* %r4, i32 4
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i160
+%r175 = shl i160 %r174, 128
+%r176 = or i160 %r170, %r175
+%r177 = zext i160 %r176 to i192
+%r179 = getelementptr i32, i32* %r4, i32 5
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i192
+%r182 = shl i192 %r181, 160
+%r183 = or i192 %r177, %r182
+%r184 = zext i192 %r183 to i224
+%r186 = getelementptr i32, i32* %r4, i32 6
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i224
+%r189 = shl i224 %r188, 192
+%r190 = or i224 %r184, %r189
+%r191 = zext i224 %r190 to i256
+%r193 = getelementptr i32, i32* %r4, i32 7
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i256
+%r196 = shl i256 %r195, 224
+%r197 = or i256 %r191, %r196
+%r198 = zext i256 %r197 to i288
+%r200 = getelementptr i32, i32* %r4, i32 8
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i288
+%r203 = shl i288 %r202, 256
+%r204 = or i288 %r198, %r203
+%r205 = zext i288 %r204 to i320
+%r207 = getelementptr i32, i32* %r4, i32 9
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i320
+%r210 = shl i320 %r209, 288
+%r211 = or i320 %r205, %r210
+%r212 = zext i320 %r211 to i352
+%r214 = getelementptr i32, i32* %r4, i32 10
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i352
+%r217 = shl i352 %r216, 320
+%r218 = or i352 %r212, %r217
+%r219 = sub i352 %r147, %r218
+%r220 = lshr i352 %r219, 351
+%r221 = trunc i352 %r220 to i1
+%r222 = select i1 %r221, i352 %r147, i352 %r219
+%r223 = trunc i352 %r222 to i32
+%r225 = getelementptr i32, i32* %r1, i32 0
+store i32 %r223, i32* %r225
+%r226 = lshr i352 %r222, 32
+%r227 = trunc i352 %r226 to i32
+%r229 = getelementptr i32, i32* %r1, i32 1
+store i32 %r227, i32* %r229
+%r230 = lshr i352 %r226, 32
+%r231 = trunc i352 %r230 to i32
+%r233 = getelementptr i32, i32* %r1, i32 2
+store i32 %r231, i32* %r233
+%r234 = lshr i352 %r230, 32
+%r235 = trunc i352 %r234 to i32
+%r237 = getelementptr i32, i32* %r1, i32 3
+store i32 %r235, i32* %r237
+%r238 = lshr i352 %r234, 32
+%r239 = trunc i352 %r238 to i32
+%r241 = getelementptr i32, i32* %r1, i32 4
+store i32 %r239, i32* %r241
+%r242 = lshr i352 %r238, 32
+%r243 = trunc i352 %r242 to i32
+%r245 = getelementptr i32, i32* %r1, i32 5
+store i32 %r243, i32* %r245
+%r246 = lshr i352 %r242, 32
+%r247 = trunc i352 %r246 to i32
+%r249 = getelementptr i32, i32* %r1, i32 6
+store i32 %r247, i32* %r249
+%r250 = lshr i352 %r246, 32
+%r251 = trunc i352 %r250 to i32
+%r253 = getelementptr i32, i32* %r1, i32 7
+store i32 %r251, i32* %r253
+%r254 = lshr i352 %r250, 32
+%r255 = trunc i352 %r254 to i32
+%r257 = getelementptr i32, i32* %r1, i32 8
+store i32 %r255, i32* %r257
+%r258 = lshr i352 %r254, 32
+%r259 = trunc i352 %r258 to i32
+%r261 = getelementptr i32, i32* %r1, i32 9
+store i32 %r259, i32* %r261
+%r262 = lshr i352 %r258, 32
+%r263 = trunc i352 %r262 to i32
+%r265 = getelementptr i32, i32* %r1, i32 10
+store i32 %r263, i32* %r265
+ret void
+}
+define void @mcl_fp_sub11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = load i32, i32* %r3
+%r77 = zext i32 %r76 to i64
+%r79 = getelementptr i32, i32* %r3, i32 1
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i64
+%r82 = shl i64 %r81, 32
+%r83 = or i64 %r77, %r82
+%r84 = zext i64 %r83 to i96
+%r86 = getelementptr i32, i32* %r3, i32 2
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i96
+%r89 = shl i96 %r88, 64
+%r90 = or i96 %r84, %r89
+%r91 = zext i96 %r90 to i128
+%r93 = getelementptr i32, i32* %r3, i32 3
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i128
+%r96 = shl i128 %r95, 96
+%r97 = or i128 %r91, %r96
+%r98 = zext i128 %r97 to i160
+%r100 = getelementptr i32, i32* %r3, i32 4
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i160
+%r103 = shl i160 %r102, 128
+%r104 = or i160 %r98, %r103
+%r105 = zext i160 %r104 to i192
+%r107 = getelementptr i32, i32* %r3, i32 5
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i192
+%r110 = shl i192 %r109, 160
+%r111 = or i192 %r105, %r110
+%r112 = zext i192 %r111 to i224
+%r114 = getelementptr i32, i32* %r3, i32 6
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i224
+%r117 = shl i224 %r116, 192
+%r118 = or i224 %r112, %r117
+%r119 = zext i224 %r118 to i256
+%r121 = getelementptr i32, i32* %r3, i32 7
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i256
+%r124 = shl i256 %r123, 224
+%r125 = or i256 %r119, %r124
+%r126 = zext i256 %r125 to i288
+%r128 = getelementptr i32, i32* %r3, i32 8
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i288
+%r131 = shl i288 %r130, 256
+%r132 = or i288 %r126, %r131
+%r133 = zext i288 %r132 to i320
+%r135 = getelementptr i32, i32* %r3, i32 9
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i320
+%r138 = shl i320 %r137, 288
+%r139 = or i320 %r133, %r138
+%r140 = zext i320 %r139 to i352
+%r142 = getelementptr i32, i32* %r3, i32 10
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i352
+%r145 = shl i352 %r144, 320
+%r146 = or i352 %r140, %r145
+%r147 = zext i352 %r75 to i384
+%r148 = zext i352 %r146 to i384
+%r149 = sub i384 %r147, %r148
+%r150 = trunc i384 %r149 to i352
+%r151 = lshr i384 %r149, 352
+%r152 = trunc i384 %r151 to i1
+%r153 = trunc i352 %r150 to i32
+%r155 = getelementptr i32, i32* %r1, i32 0
+store i32 %r153, i32* %r155
+%r156 = lshr i352 %r150, 32
+%r157 = trunc i352 %r156 to i32
+%r159 = getelementptr i32, i32* %r1, i32 1
+store i32 %r157, i32* %r159
+%r160 = lshr i352 %r156, 32
+%r161 = trunc i352 %r160 to i32
+%r163 = getelementptr i32, i32* %r1, i32 2
+store i32 %r161, i32* %r163
+%r164 = lshr i352 %r160, 32
+%r165 = trunc i352 %r164 to i32
+%r167 = getelementptr i32, i32* %r1, i32 3
+store i32 %r165, i32* %r167
+%r168 = lshr i352 %r164, 32
+%r169 = trunc i352 %r168 to i32
+%r171 = getelementptr i32, i32* %r1, i32 4
+store i32 %r169, i32* %r171
+%r172 = lshr i352 %r168, 32
+%r173 = trunc i352 %r172 to i32
+%r175 = getelementptr i32, i32* %r1, i32 5
+store i32 %r173, i32* %r175
+%r176 = lshr i352 %r172, 32
+%r177 = trunc i352 %r176 to i32
+%r179 = getelementptr i32, i32* %r1, i32 6
+store i32 %r177, i32* %r179
+%r180 = lshr i352 %r176, 32
+%r181 = trunc i352 %r180 to i32
+%r183 = getelementptr i32, i32* %r1, i32 7
+store i32 %r181, i32* %r183
+%r184 = lshr i352 %r180, 32
+%r185 = trunc i352 %r184 to i32
+%r187 = getelementptr i32, i32* %r1, i32 8
+store i32 %r185, i32* %r187
+%r188 = lshr i352 %r184, 32
+%r189 = trunc i352 %r188 to i32
+%r191 = getelementptr i32, i32* %r1, i32 9
+store i32 %r189, i32* %r191
+%r192 = lshr i352 %r188, 32
+%r193 = trunc i352 %r192 to i32
+%r195 = getelementptr i32, i32* %r1, i32 10
+store i32 %r193, i32* %r195
+br i1%r152, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r196 = load i32, i32* %r4
+%r197 = zext i32 %r196 to i64
+%r199 = getelementptr i32, i32* %r4, i32 1
+%r200 = load i32, i32* %r199
+%r201 = zext i32 %r200 to i64
+%r202 = shl i64 %r201, 32
+%r203 = or i64 %r197, %r202
+%r204 = zext i64 %r203 to i96
+%r206 = getelementptr i32, i32* %r4, i32 2
+%r207 = load i32, i32* %r206
+%r208 = zext i32 %r207 to i96
+%r209 = shl i96 %r208, 64
+%r210 = or i96 %r204, %r209
+%r211 = zext i96 %r210 to i128
+%r213 = getelementptr i32, i32* %r4, i32 3
+%r214 = load i32, i32* %r213
+%r215 = zext i32 %r214 to i128
+%r216 = shl i128 %r215, 96
+%r217 = or i128 %r211, %r216
+%r218 = zext i128 %r217 to i160
+%r220 = getelementptr i32, i32* %r4, i32 4
+%r221 = load i32, i32* %r220
+%r222 = zext i32 %r221 to i160
+%r223 = shl i160 %r222, 128
+%r224 = or i160 %r218, %r223
+%r225 = zext i160 %r224 to i192
+%r227 = getelementptr i32, i32* %r4, i32 5
+%r228 = load i32, i32* %r227
+%r229 = zext i32 %r228 to i192
+%r230 = shl i192 %r229, 160
+%r231 = or i192 %r225, %r230
+%r232 = zext i192 %r231 to i224
+%r234 = getelementptr i32, i32* %r4, i32 6
+%r235 = load i32, i32* %r234
+%r236 = zext i32 %r235 to i224
+%r237 = shl i224 %r236, 192
+%r238 = or i224 %r232, %r237
+%r239 = zext i224 %r238 to i256
+%r241 = getelementptr i32, i32* %r4, i32 7
+%r242 = load i32, i32* %r241
+%r243 = zext i32 %r242 to i256
+%r244 = shl i256 %r243, 224
+%r245 = or i256 %r239, %r244
+%r246 = zext i256 %r245 to i288
+%r248 = getelementptr i32, i32* %r4, i32 8
+%r249 = load i32, i32* %r248
+%r250 = zext i32 %r249 to i288
+%r251 = shl i288 %r250, 256
+%r252 = or i288 %r246, %r251
+%r253 = zext i288 %r252 to i320
+%r255 = getelementptr i32, i32* %r4, i32 9
+%r256 = load i32, i32* %r255
+%r257 = zext i32 %r256 to i320
+%r258 = shl i320 %r257, 288
+%r259 = or i320 %r253, %r258
+%r260 = zext i320 %r259 to i352
+%r262 = getelementptr i32, i32* %r4, i32 10
+%r263 = load i32, i32* %r262
+%r264 = zext i32 %r263 to i352
+%r265 = shl i352 %r264, 320
+%r266 = or i352 %r260, %r265
+%r267 = add i352 %r150, %r266
+%r268 = trunc i352 %r267 to i32
+%r270 = getelementptr i32, i32* %r1, i32 0
+store i32 %r268, i32* %r270
+%r271 = lshr i352 %r267, 32
+%r272 = trunc i352 %r271 to i32
+%r274 = getelementptr i32, i32* %r1, i32 1
+store i32 %r272, i32* %r274
+%r275 = lshr i352 %r271, 32
+%r276 = trunc i352 %r275 to i32
+%r278 = getelementptr i32, i32* %r1, i32 2
+store i32 %r276, i32* %r278
+%r279 = lshr i352 %r275, 32
+%r280 = trunc i352 %r279 to i32
+%r282 = getelementptr i32, i32* %r1, i32 3
+store i32 %r280, i32* %r282
+%r283 = lshr i352 %r279, 32
+%r284 = trunc i352 %r283 to i32
+%r286 = getelementptr i32, i32* %r1, i32 4
+store i32 %r284, i32* %r286
+%r287 = lshr i352 %r283, 32
+%r288 = trunc i352 %r287 to i32
+%r290 = getelementptr i32, i32* %r1, i32 5
+store i32 %r288, i32* %r290
+%r291 = lshr i352 %r287, 32
+%r292 = trunc i352 %r291 to i32
+%r294 = getelementptr i32, i32* %r1, i32 6
+store i32 %r292, i32* %r294
+%r295 = lshr i352 %r291, 32
+%r296 = trunc i352 %r295 to i32
+%r298 = getelementptr i32, i32* %r1, i32 7
+store i32 %r296, i32* %r298
+%r299 = lshr i352 %r295, 32
+%r300 = trunc i352 %r299 to i32
+%r302 = getelementptr i32, i32* %r1, i32 8
+store i32 %r300, i32* %r302
+%r303 = lshr i352 %r299, 32
+%r304 = trunc i352 %r303 to i32
+%r306 = getelementptr i32, i32* %r1, i32 9
+store i32 %r304, i32* %r306
+%r307 = lshr i352 %r303, 32
+%r308 = trunc i352 %r307 to i32
+%r310 = getelementptr i32, i32* %r1, i32 10
+store i32 %r308, i32* %r310
+ret void
+}
+define void @mcl_fp_subNF11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = load i32, i32* %r3
+%r77 = zext i32 %r76 to i64
+%r79 = getelementptr i32, i32* %r3, i32 1
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i64
+%r82 = shl i64 %r81, 32
+%r83 = or i64 %r77, %r82
+%r84 = zext i64 %r83 to i96
+%r86 = getelementptr i32, i32* %r3, i32 2
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i96
+%r89 = shl i96 %r88, 64
+%r90 = or i96 %r84, %r89
+%r91 = zext i96 %r90 to i128
+%r93 = getelementptr i32, i32* %r3, i32 3
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i128
+%r96 = shl i128 %r95, 96
+%r97 = or i128 %r91, %r96
+%r98 = zext i128 %r97 to i160
+%r100 = getelementptr i32, i32* %r3, i32 4
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i160
+%r103 = shl i160 %r102, 128
+%r104 = or i160 %r98, %r103
+%r105 = zext i160 %r104 to i192
+%r107 = getelementptr i32, i32* %r3, i32 5
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i192
+%r110 = shl i192 %r109, 160
+%r111 = or i192 %r105, %r110
+%r112 = zext i192 %r111 to i224
+%r114 = getelementptr i32, i32* %r3, i32 6
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i224
+%r117 = shl i224 %r116, 192
+%r118 = or i224 %r112, %r117
+%r119 = zext i224 %r118 to i256
+%r121 = getelementptr i32, i32* %r3, i32 7
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i256
+%r124 = shl i256 %r123, 224
+%r125 = or i256 %r119, %r124
+%r126 = zext i256 %r125 to i288
+%r128 = getelementptr i32, i32* %r3, i32 8
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i288
+%r131 = shl i288 %r130, 256
+%r132 = or i288 %r126, %r131
+%r133 = zext i288 %r132 to i320
+%r135 = getelementptr i32, i32* %r3, i32 9
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i320
+%r138 = shl i320 %r137, 288
+%r139 = or i320 %r133, %r138
+%r140 = zext i320 %r139 to i352
+%r142 = getelementptr i32, i32* %r3, i32 10
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i352
+%r145 = shl i352 %r144, 320
+%r146 = or i352 %r140, %r145
+%r147 = sub i352 %r75, %r146
+%r148 = lshr i352 %r147, 351
+%r149 = trunc i352 %r148 to i1
+%r150 = load i32, i32* %r4
+%r151 = zext i32 %r150 to i64
+%r153 = getelementptr i32, i32* %r4, i32 1
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i64
+%r156 = shl i64 %r155, 32
+%r157 = or i64 %r151, %r156
+%r158 = zext i64 %r157 to i96
+%r160 = getelementptr i32, i32* %r4, i32 2
+%r161 = load i32, i32* %r160
+%r162 = zext i32 %r161 to i96
+%r163 = shl i96 %r162, 64
+%r164 = or i96 %r158, %r163
+%r165 = zext i96 %r164 to i128
+%r167 = getelementptr i32, i32* %r4, i32 3
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i128
+%r170 = shl i128 %r169, 96
+%r171 = or i128 %r165, %r170
+%r172 = zext i128 %r171 to i160
+%r174 = getelementptr i32, i32* %r4, i32 4
+%r175 = load i32, i32* %r174
+%r176 = zext i32 %r175 to i160
+%r177 = shl i160 %r176, 128
+%r178 = or i160 %r172, %r177
+%r179 = zext i160 %r178 to i192
+%r181 = getelementptr i32, i32* %r4, i32 5
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i192
+%r184 = shl i192 %r183, 160
+%r185 = or i192 %r179, %r184
+%r186 = zext i192 %r185 to i224
+%r188 = getelementptr i32, i32* %r4, i32 6
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i224
+%r191 = shl i224 %r190, 192
+%r192 = or i224 %r186, %r191
+%r193 = zext i224 %r192 to i256
+%r195 = getelementptr i32, i32* %r4, i32 7
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i256
+%r198 = shl i256 %r197, 224
+%r199 = or i256 %r193, %r198
+%r200 = zext i256 %r199 to i288
+%r202 = getelementptr i32, i32* %r4, i32 8
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i288
+%r205 = shl i288 %r204, 256
+%r206 = or i288 %r200, %r205
+%r207 = zext i288 %r206 to i320
+%r209 = getelementptr i32, i32* %r4, i32 9
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i320
+%r212 = shl i320 %r211, 288
+%r213 = or i320 %r207, %r212
+%r214 = zext i320 %r213 to i352
+%r216 = getelementptr i32, i32* %r4, i32 10
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i352
+%r219 = shl i352 %r218, 320
+%r220 = or i352 %r214, %r219
+%r222 = select i1 %r149, i352 %r220, i352 0
+%r223 = add i352 %r147, %r222
+%r224 = trunc i352 %r223 to i32
+%r226 = getelementptr i32, i32* %r1, i32 0
+store i32 %r224, i32* %r226
+%r227 = lshr i352 %r223, 32
+%r228 = trunc i352 %r227 to i32
+%r230 = getelementptr i32, i32* %r1, i32 1
+store i32 %r228, i32* %r230
+%r231 = lshr i352 %r227, 32
+%r232 = trunc i352 %r231 to i32
+%r234 = getelementptr i32, i32* %r1, i32 2
+store i32 %r232, i32* %r234
+%r235 = lshr i352 %r231, 32
+%r236 = trunc i352 %r235 to i32
+%r238 = getelementptr i32, i32* %r1, i32 3
+store i32 %r236, i32* %r238
+%r239 = lshr i352 %r235, 32
+%r240 = trunc i352 %r239 to i32
+%r242 = getelementptr i32, i32* %r1, i32 4
+store i32 %r240, i32* %r242
+%r243 = lshr i352 %r239, 32
+%r244 = trunc i352 %r243 to i32
+%r246 = getelementptr i32, i32* %r1, i32 5
+store i32 %r244, i32* %r246
+%r247 = lshr i352 %r243, 32
+%r248 = trunc i352 %r247 to i32
+%r250 = getelementptr i32, i32* %r1, i32 6
+store i32 %r248, i32* %r250
+%r251 = lshr i352 %r247, 32
+%r252 = trunc i352 %r251 to i32
+%r254 = getelementptr i32, i32* %r1, i32 7
+store i32 %r252, i32* %r254
+%r255 = lshr i352 %r251, 32
+%r256 = trunc i352 %r255 to i32
+%r258 = getelementptr i32, i32* %r1, i32 8
+store i32 %r256, i32* %r258
+%r259 = lshr i352 %r255, 32
+%r260 = trunc i352 %r259 to i32
+%r262 = getelementptr i32, i32* %r1, i32 9
+store i32 %r260, i32* %r262
+%r263 = lshr i352 %r259, 32
+%r264 = trunc i352 %r263 to i32
+%r266 = getelementptr i32, i32* %r1, i32 10
+store i32 %r264, i32* %r266
+ret void
+}
+define void @mcl_fpDbl_add11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = load i32, i32* %r3
+%r154 = zext i32 %r153 to i64
+%r156 = getelementptr i32, i32* %r3, i32 1
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i64
+%r159 = shl i64 %r158, 32
+%r160 = or i64 %r154, %r159
+%r161 = zext i64 %r160 to i96
+%r163 = getelementptr i32, i32* %r3, i32 2
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i96
+%r166 = shl i96 %r165, 64
+%r167 = or i96 %r161, %r166
+%r168 = zext i96 %r167 to i128
+%r170 = getelementptr i32, i32* %r3, i32 3
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i128
+%r173 = shl i128 %r172, 96
+%r174 = or i128 %r168, %r173
+%r175 = zext i128 %r174 to i160
+%r177 = getelementptr i32, i32* %r3, i32 4
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i160
+%r180 = shl i160 %r179, 128
+%r181 = or i160 %r175, %r180
+%r182 = zext i160 %r181 to i192
+%r184 = getelementptr i32, i32* %r3, i32 5
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i192
+%r187 = shl i192 %r186, 160
+%r188 = or i192 %r182, %r187
+%r189 = zext i192 %r188 to i224
+%r191 = getelementptr i32, i32* %r3, i32 6
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i224
+%r194 = shl i224 %r193, 192
+%r195 = or i224 %r189, %r194
+%r196 = zext i224 %r195 to i256
+%r198 = getelementptr i32, i32* %r3, i32 7
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i256
+%r201 = shl i256 %r200, 224
+%r202 = or i256 %r196, %r201
+%r203 = zext i256 %r202 to i288
+%r205 = getelementptr i32, i32* %r3, i32 8
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i288
+%r208 = shl i288 %r207, 256
+%r209 = or i288 %r203, %r208
+%r210 = zext i288 %r209 to i320
+%r212 = getelementptr i32, i32* %r3, i32 9
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i320
+%r215 = shl i320 %r214, 288
+%r216 = or i320 %r210, %r215
+%r217 = zext i320 %r216 to i352
+%r219 = getelementptr i32, i32* %r3, i32 10
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i352
+%r222 = shl i352 %r221, 320
+%r223 = or i352 %r217, %r222
+%r224 = zext i352 %r223 to i384
+%r226 = getelementptr i32, i32* %r3, i32 11
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i384
+%r229 = shl i384 %r228, 352
+%r230 = or i384 %r224, %r229
+%r231 = zext i384 %r230 to i416
+%r233 = getelementptr i32, i32* %r3, i32 12
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i416
+%r236 = shl i416 %r235, 384
+%r237 = or i416 %r231, %r236
+%r238 = zext i416 %r237 to i448
+%r240 = getelementptr i32, i32* %r3, i32 13
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i448
+%r243 = shl i448 %r242, 416
+%r244 = or i448 %r238, %r243
+%r245 = zext i448 %r244 to i480
+%r247 = getelementptr i32, i32* %r3, i32 14
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i480
+%r250 = shl i480 %r249, 448
+%r251 = or i480 %r245, %r250
+%r252 = zext i480 %r251 to i512
+%r254 = getelementptr i32, i32* %r3, i32 15
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i512
+%r257 = shl i512 %r256, 480
+%r258 = or i512 %r252, %r257
+%r259 = zext i512 %r258 to i544
+%r261 = getelementptr i32, i32* %r3, i32 16
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i544
+%r264 = shl i544 %r263, 512
+%r265 = or i544 %r259, %r264
+%r266 = zext i544 %r265 to i576
+%r268 = getelementptr i32, i32* %r3, i32 17
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i576
+%r271 = shl i576 %r270, 544
+%r272 = or i576 %r266, %r271
+%r273 = zext i576 %r272 to i608
+%r275 = getelementptr i32, i32* %r3, i32 18
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i608
+%r278 = shl i608 %r277, 576
+%r279 = or i608 %r273, %r278
+%r280 = zext i608 %r279 to i640
+%r282 = getelementptr i32, i32* %r3, i32 19
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i640
+%r285 = shl i640 %r284, 608
+%r286 = or i640 %r280, %r285
+%r287 = zext i640 %r286 to i672
+%r289 = getelementptr i32, i32* %r3, i32 20
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i672
+%r292 = shl i672 %r291, 640
+%r293 = or i672 %r287, %r292
+%r294 = zext i672 %r293 to i704
+%r296 = getelementptr i32, i32* %r3, i32 21
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i704
+%r299 = shl i704 %r298, 672
+%r300 = or i704 %r294, %r299
+%r301 = zext i704 %r152 to i736
+%r302 = zext i704 %r300 to i736
+%r303 = add i736 %r301, %r302
+%r304 = trunc i736 %r303 to i352
+%r305 = trunc i352 %r304 to i32
+%r307 = getelementptr i32, i32* %r1, i32 0
+store i32 %r305, i32* %r307
+%r308 = lshr i352 %r304, 32
+%r309 = trunc i352 %r308 to i32
+%r311 = getelementptr i32, i32* %r1, i32 1
+store i32 %r309, i32* %r311
+%r312 = lshr i352 %r308, 32
+%r313 = trunc i352 %r312 to i32
+%r315 = getelementptr i32, i32* %r1, i32 2
+store i32 %r313, i32* %r315
+%r316 = lshr i352 %r312, 32
+%r317 = trunc i352 %r316 to i32
+%r319 = getelementptr i32, i32* %r1, i32 3
+store i32 %r317, i32* %r319
+%r320 = lshr i352 %r316, 32
+%r321 = trunc i352 %r320 to i32
+%r323 = getelementptr i32, i32* %r1, i32 4
+store i32 %r321, i32* %r323
+%r324 = lshr i352 %r320, 32
+%r325 = trunc i352 %r324 to i32
+%r327 = getelementptr i32, i32* %r1, i32 5
+store i32 %r325, i32* %r327
+%r328 = lshr i352 %r324, 32
+%r329 = trunc i352 %r328 to i32
+%r331 = getelementptr i32, i32* %r1, i32 6
+store i32 %r329, i32* %r331
+%r332 = lshr i352 %r328, 32
+%r333 = trunc i352 %r332 to i32
+%r335 = getelementptr i32, i32* %r1, i32 7
+store i32 %r333, i32* %r335
+%r336 = lshr i352 %r332, 32
+%r337 = trunc i352 %r336 to i32
+%r339 = getelementptr i32, i32* %r1, i32 8
+store i32 %r337, i32* %r339
+%r340 = lshr i352 %r336, 32
+%r341 = trunc i352 %r340 to i32
+%r343 = getelementptr i32, i32* %r1, i32 9
+store i32 %r341, i32* %r343
+%r344 = lshr i352 %r340, 32
+%r345 = trunc i352 %r344 to i32
+%r347 = getelementptr i32, i32* %r1, i32 10
+store i32 %r345, i32* %r347
+%r348 = lshr i736 %r303, 352
+%r349 = trunc i736 %r348 to i384
+%r350 = load i32, i32* %r4
+%r351 = zext i32 %r350 to i64
+%r353 = getelementptr i32, i32* %r4, i32 1
+%r354 = load i32, i32* %r353
+%r355 = zext i32 %r354 to i64
+%r356 = shl i64 %r355, 32
+%r357 = or i64 %r351, %r356
+%r358 = zext i64 %r357 to i96
+%r360 = getelementptr i32, i32* %r4, i32 2
+%r361 = load i32, i32* %r360
+%r362 = zext i32 %r361 to i96
+%r363 = shl i96 %r362, 64
+%r364 = or i96 %r358, %r363
+%r365 = zext i96 %r364 to i128
+%r367 = getelementptr i32, i32* %r4, i32 3
+%r368 = load i32, i32* %r367
+%r369 = zext i32 %r368 to i128
+%r370 = shl i128 %r369, 96
+%r371 = or i128 %r365, %r370
+%r372 = zext i128 %r371 to i160
+%r374 = getelementptr i32, i32* %r4, i32 4
+%r375 = load i32, i32* %r374
+%r376 = zext i32 %r375 to i160
+%r377 = shl i160 %r376, 128
+%r378 = or i160 %r372, %r377
+%r379 = zext i160 %r378 to i192
+%r381 = getelementptr i32, i32* %r4, i32 5
+%r382 = load i32, i32* %r381
+%r383 = zext i32 %r382 to i192
+%r384 = shl i192 %r383, 160
+%r385 = or i192 %r379, %r384
+%r386 = zext i192 %r385 to i224
+%r388 = getelementptr i32, i32* %r4, i32 6
+%r389 = load i32, i32* %r388
+%r390 = zext i32 %r389 to i224
+%r391 = shl i224 %r390, 192
+%r392 = or i224 %r386, %r391
+%r393 = zext i224 %r392 to i256
+%r395 = getelementptr i32, i32* %r4, i32 7
+%r396 = load i32, i32* %r395
+%r397 = zext i32 %r396 to i256
+%r398 = shl i256 %r397, 224
+%r399 = or i256 %r393, %r398
+%r400 = zext i256 %r399 to i288
+%r402 = getelementptr i32, i32* %r4, i32 8
+%r403 = load i32, i32* %r402
+%r404 = zext i32 %r403 to i288
+%r405 = shl i288 %r404, 256
+%r406 = or i288 %r400, %r405
+%r407 = zext i288 %r406 to i320
+%r409 = getelementptr i32, i32* %r4, i32 9
+%r410 = load i32, i32* %r409
+%r411 = zext i32 %r410 to i320
+%r412 = shl i320 %r411, 288
+%r413 = or i320 %r407, %r412
+%r414 = zext i320 %r413 to i352
+%r416 = getelementptr i32, i32* %r4, i32 10
+%r417 = load i32, i32* %r416
+%r418 = zext i32 %r417 to i352
+%r419 = shl i352 %r418, 320
+%r420 = or i352 %r414, %r419
+%r421 = zext i352 %r420 to i384
+%r422 = sub i384 %r349, %r421
+%r423 = lshr i384 %r422, 352
+%r424 = trunc i384 %r423 to i1
+%r425 = select i1 %r424, i384 %r349, i384 %r422
+%r426 = trunc i384 %r425 to i352
+%r428 = getelementptr i32, i32* %r1, i32 11
+%r429 = trunc i352 %r426 to i32
+%r431 = getelementptr i32, i32* %r428, i32 0
+store i32 %r429, i32* %r431
+%r432 = lshr i352 %r426, 32
+%r433 = trunc i352 %r432 to i32
+%r435 = getelementptr i32, i32* %r428, i32 1
+store i32 %r433, i32* %r435
+%r436 = lshr i352 %r432, 32
+%r437 = trunc i352 %r436 to i32
+%r439 = getelementptr i32, i32* %r428, i32 2
+store i32 %r437, i32* %r439
+%r440 = lshr i352 %r436, 32
+%r441 = trunc i352 %r440 to i32
+%r443 = getelementptr i32, i32* %r428, i32 3
+store i32 %r441, i32* %r443
+%r444 = lshr i352 %r440, 32
+%r445 = trunc i352 %r444 to i32
+%r447 = getelementptr i32, i32* %r428, i32 4
+store i32 %r445, i32* %r447
+%r448 = lshr i352 %r444, 32
+%r449 = trunc i352 %r448 to i32
+%r451 = getelementptr i32, i32* %r428, i32 5
+store i32 %r449, i32* %r451
+%r452 = lshr i352 %r448, 32
+%r453 = trunc i352 %r452 to i32
+%r455 = getelementptr i32, i32* %r428, i32 6
+store i32 %r453, i32* %r455
+%r456 = lshr i352 %r452, 32
+%r457 = trunc i352 %r456 to i32
+%r459 = getelementptr i32, i32* %r428, i32 7
+store i32 %r457, i32* %r459
+%r460 = lshr i352 %r456, 32
+%r461 = trunc i352 %r460 to i32
+%r463 = getelementptr i32, i32* %r428, i32 8
+store i32 %r461, i32* %r463
+%r464 = lshr i352 %r460, 32
+%r465 = trunc i352 %r464 to i32
+%r467 = getelementptr i32, i32* %r428, i32 9
+store i32 %r465, i32* %r467
+%r468 = lshr i352 %r464, 32
+%r469 = trunc i352 %r468 to i32
+%r471 = getelementptr i32, i32* %r428, i32 10
+store i32 %r469, i32* %r471
+ret void
+}
+define void @mcl_fpDbl_sub11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = load i32, i32* %r3
+%r154 = zext i32 %r153 to i64
+%r156 = getelementptr i32, i32* %r3, i32 1
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i64
+%r159 = shl i64 %r158, 32
+%r160 = or i64 %r154, %r159
+%r161 = zext i64 %r160 to i96
+%r163 = getelementptr i32, i32* %r3, i32 2
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i96
+%r166 = shl i96 %r165, 64
+%r167 = or i96 %r161, %r166
+%r168 = zext i96 %r167 to i128
+%r170 = getelementptr i32, i32* %r3, i32 3
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i128
+%r173 = shl i128 %r172, 96
+%r174 = or i128 %r168, %r173
+%r175 = zext i128 %r174 to i160
+%r177 = getelementptr i32, i32* %r3, i32 4
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i160
+%r180 = shl i160 %r179, 128
+%r181 = or i160 %r175, %r180
+%r182 = zext i160 %r181 to i192
+%r184 = getelementptr i32, i32* %r3, i32 5
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i192
+%r187 = shl i192 %r186, 160
+%r188 = or i192 %r182, %r187
+%r189 = zext i192 %r188 to i224
+%r191 = getelementptr i32, i32* %r3, i32 6
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i224
+%r194 = shl i224 %r193, 192
+%r195 = or i224 %r189, %r194
+%r196 = zext i224 %r195 to i256
+%r198 = getelementptr i32, i32* %r3, i32 7
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i256
+%r201 = shl i256 %r200, 224
+%r202 = or i256 %r196, %r201
+%r203 = zext i256 %r202 to i288
+%r205 = getelementptr i32, i32* %r3, i32 8
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i288
+%r208 = shl i288 %r207, 256
+%r209 = or i288 %r203, %r208
+%r210 = zext i288 %r209 to i320
+%r212 = getelementptr i32, i32* %r3, i32 9
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i320
+%r215 = shl i320 %r214, 288
+%r216 = or i320 %r210, %r215
+%r217 = zext i320 %r216 to i352
+%r219 = getelementptr i32, i32* %r3, i32 10
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i352
+%r222 = shl i352 %r221, 320
+%r223 = or i352 %r217, %r222
+%r224 = zext i352 %r223 to i384
+%r226 = getelementptr i32, i32* %r3, i32 11
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i384
+%r229 = shl i384 %r228, 352
+%r230 = or i384 %r224, %r229
+%r231 = zext i384 %r230 to i416
+%r233 = getelementptr i32, i32* %r3, i32 12
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i416
+%r236 = shl i416 %r235, 384
+%r237 = or i416 %r231, %r236
+%r238 = zext i416 %r237 to i448
+%r240 = getelementptr i32, i32* %r3, i32 13
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i448
+%r243 = shl i448 %r242, 416
+%r244 = or i448 %r238, %r243
+%r245 = zext i448 %r244 to i480
+%r247 = getelementptr i32, i32* %r3, i32 14
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i480
+%r250 = shl i480 %r249, 448
+%r251 = or i480 %r245, %r250
+%r252 = zext i480 %r251 to i512
+%r254 = getelementptr i32, i32* %r3, i32 15
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i512
+%r257 = shl i512 %r256, 480
+%r258 = or i512 %r252, %r257
+%r259 = zext i512 %r258 to i544
+%r261 = getelementptr i32, i32* %r3, i32 16
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i544
+%r264 = shl i544 %r263, 512
+%r265 = or i544 %r259, %r264
+%r266 = zext i544 %r265 to i576
+%r268 = getelementptr i32, i32* %r3, i32 17
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i576
+%r271 = shl i576 %r270, 544
+%r272 = or i576 %r266, %r271
+%r273 = zext i576 %r272 to i608
+%r275 = getelementptr i32, i32* %r3, i32 18
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i608
+%r278 = shl i608 %r277, 576
+%r279 = or i608 %r273, %r278
+%r280 = zext i608 %r279 to i640
+%r282 = getelementptr i32, i32* %r3, i32 19
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i640
+%r285 = shl i640 %r284, 608
+%r286 = or i640 %r280, %r285
+%r287 = zext i640 %r286 to i672
+%r289 = getelementptr i32, i32* %r3, i32 20
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i672
+%r292 = shl i672 %r291, 640
+%r293 = or i672 %r287, %r292
+%r294 = zext i672 %r293 to i704
+%r296 = getelementptr i32, i32* %r3, i32 21
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i704
+%r299 = shl i704 %r298, 672
+%r300 = or i704 %r294, %r299
+%r301 = zext i704 %r152 to i736
+%r302 = zext i704 %r300 to i736
+%r303 = sub i736 %r301, %r302
+%r304 = trunc i736 %r303 to i352
+%r305 = trunc i352 %r304 to i32
+%r307 = getelementptr i32, i32* %r1, i32 0
+store i32 %r305, i32* %r307
+%r308 = lshr i352 %r304, 32
+%r309 = trunc i352 %r308 to i32
+%r311 = getelementptr i32, i32* %r1, i32 1
+store i32 %r309, i32* %r311
+%r312 = lshr i352 %r308, 32
+%r313 = trunc i352 %r312 to i32
+%r315 = getelementptr i32, i32* %r1, i32 2
+store i32 %r313, i32* %r315
+%r316 = lshr i352 %r312, 32
+%r317 = trunc i352 %r316 to i32
+%r319 = getelementptr i32, i32* %r1, i32 3
+store i32 %r317, i32* %r319
+%r320 = lshr i352 %r316, 32
+%r321 = trunc i352 %r320 to i32
+%r323 = getelementptr i32, i32* %r1, i32 4
+store i32 %r321, i32* %r323
+%r324 = lshr i352 %r320, 32
+%r325 = trunc i352 %r324 to i32
+%r327 = getelementptr i32, i32* %r1, i32 5
+store i32 %r325, i32* %r327
+%r328 = lshr i352 %r324, 32
+%r329 = trunc i352 %r328 to i32
+%r331 = getelementptr i32, i32* %r1, i32 6
+store i32 %r329, i32* %r331
+%r332 = lshr i352 %r328, 32
+%r333 = trunc i352 %r332 to i32
+%r335 = getelementptr i32, i32* %r1, i32 7
+store i32 %r333, i32* %r335
+%r336 = lshr i352 %r332, 32
+%r337 = trunc i352 %r336 to i32
+%r339 = getelementptr i32, i32* %r1, i32 8
+store i32 %r337, i32* %r339
+%r340 = lshr i352 %r336, 32
+%r341 = trunc i352 %r340 to i32
+%r343 = getelementptr i32, i32* %r1, i32 9
+store i32 %r341, i32* %r343
+%r344 = lshr i352 %r340, 32
+%r345 = trunc i352 %r344 to i32
+%r347 = getelementptr i32, i32* %r1, i32 10
+store i32 %r345, i32* %r347
+%r348 = lshr i736 %r303, 352
+%r349 = trunc i736 %r348 to i352
+%r350 = lshr i736 %r303, 704
+%r351 = trunc i736 %r350 to i1
+%r352 = load i32, i32* %r4
+%r353 = zext i32 %r352 to i64
+%r355 = getelementptr i32, i32* %r4, i32 1
+%r356 = load i32, i32* %r355
+%r357 = zext i32 %r356 to i64
+%r358 = shl i64 %r357, 32
+%r359 = or i64 %r353, %r358
+%r360 = zext i64 %r359 to i96
+%r362 = getelementptr i32, i32* %r4, i32 2
+%r363 = load i32, i32* %r362
+%r364 = zext i32 %r363 to i96
+%r365 = shl i96 %r364, 64
+%r366 = or i96 %r360, %r365
+%r367 = zext i96 %r366 to i128
+%r369 = getelementptr i32, i32* %r4, i32 3
+%r370 = load i32, i32* %r369
+%r371 = zext i32 %r370 to i128
+%r372 = shl i128 %r371, 96
+%r373 = or i128 %r367, %r372
+%r374 = zext i128 %r373 to i160
+%r376 = getelementptr i32, i32* %r4, i32 4
+%r377 = load i32, i32* %r376
+%r378 = zext i32 %r377 to i160
+%r379 = shl i160 %r378, 128
+%r380 = or i160 %r374, %r379
+%r381 = zext i160 %r380 to i192
+%r383 = getelementptr i32, i32* %r4, i32 5
+%r384 = load i32, i32* %r383
+%r385 = zext i32 %r384 to i192
+%r386 = shl i192 %r385, 160
+%r387 = or i192 %r381, %r386
+%r388 = zext i192 %r387 to i224
+%r390 = getelementptr i32, i32* %r4, i32 6
+%r391 = load i32, i32* %r390
+%r392 = zext i32 %r391 to i224
+%r393 = shl i224 %r392, 192
+%r394 = or i224 %r388, %r393
+%r395 = zext i224 %r394 to i256
+%r397 = getelementptr i32, i32* %r4, i32 7
+%r398 = load i32, i32* %r397
+%r399 = zext i32 %r398 to i256
+%r400 = shl i256 %r399, 224
+%r401 = or i256 %r395, %r400
+%r402 = zext i256 %r401 to i288
+%r404 = getelementptr i32, i32* %r4, i32 8
+%r405 = load i32, i32* %r404
+%r406 = zext i32 %r405 to i288
+%r407 = shl i288 %r406, 256
+%r408 = or i288 %r402, %r407
+%r409 = zext i288 %r408 to i320
+%r411 = getelementptr i32, i32* %r4, i32 9
+%r412 = load i32, i32* %r411
+%r413 = zext i32 %r412 to i320
+%r414 = shl i320 %r413, 288
+%r415 = or i320 %r409, %r414
+%r416 = zext i320 %r415 to i352
+%r418 = getelementptr i32, i32* %r4, i32 10
+%r419 = load i32, i32* %r418
+%r420 = zext i32 %r419 to i352
+%r421 = shl i352 %r420, 320
+%r422 = or i352 %r416, %r421
+%r424 = select i1 %r351, i352 %r422, i352 0
+%r425 = add i352 %r349, %r424
+%r427 = getelementptr i32, i32* %r1, i32 11
+%r428 = trunc i352 %r425 to i32
+%r430 = getelementptr i32, i32* %r427, i32 0
+store i32 %r428, i32* %r430
+%r431 = lshr i352 %r425, 32
+%r432 = trunc i352 %r431 to i32
+%r434 = getelementptr i32, i32* %r427, i32 1
+store i32 %r432, i32* %r434
+%r435 = lshr i352 %r431, 32
+%r436 = trunc i352 %r435 to i32
+%r438 = getelementptr i32, i32* %r427, i32 2
+store i32 %r436, i32* %r438
+%r439 = lshr i352 %r435, 32
+%r440 = trunc i352 %r439 to i32
+%r442 = getelementptr i32, i32* %r427, i32 3
+store i32 %r440, i32* %r442
+%r443 = lshr i352 %r439, 32
+%r444 = trunc i352 %r443 to i32
+%r446 = getelementptr i32, i32* %r427, i32 4
+store i32 %r444, i32* %r446
+%r447 = lshr i352 %r443, 32
+%r448 = trunc i352 %r447 to i32
+%r450 = getelementptr i32, i32* %r427, i32 5
+store i32 %r448, i32* %r450
+%r451 = lshr i352 %r447, 32
+%r452 = trunc i352 %r451 to i32
+%r454 = getelementptr i32, i32* %r427, i32 6
+store i32 %r452, i32* %r454
+%r455 = lshr i352 %r451, 32
+%r456 = trunc i352 %r455 to i32
+%r458 = getelementptr i32, i32* %r427, i32 7
+store i32 %r456, i32* %r458
+%r459 = lshr i352 %r455, 32
+%r460 = trunc i352 %r459 to i32
+%r462 = getelementptr i32, i32* %r427, i32 8
+store i32 %r460, i32* %r462
+%r463 = lshr i352 %r459, 32
+%r464 = trunc i352 %r463 to i32
+%r466 = getelementptr i32, i32* %r427, i32 9
+store i32 %r464, i32* %r466
+%r467 = lshr i352 %r463, 32
+%r468 = trunc i352 %r467 to i32
+%r470 = getelementptr i32, i32* %r427, i32 10
+store i32 %r468, i32* %r470
+ret void
+}
+define i416 @mulPv384x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
+%r42 = trunc i64 %r41 to i32
+%r43 = call i32 @extractHigh32(i64 %r41)
+%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
+%r46 = trunc i64 %r45 to i32
+%r47 = call i32 @extractHigh32(i64 %r45)
+%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
+%r50 = trunc i64 %r49 to i32
+%r51 = call i32 @extractHigh32(i64 %r49)
+%r52 = zext i32 %r6 to i64
+%r53 = zext i32 %r10 to i64
+%r54 = shl i64 %r53, 32
+%r55 = or i64 %r52, %r54
+%r56 = zext i64 %r55 to i96
+%r57 = zext i32 %r14 to i96
+%r58 = shl i96 %r57, 64
+%r59 = or i96 %r56, %r58
+%r60 = zext i96 %r59 to i128
+%r61 = zext i32 %r18 to i128
+%r62 = shl i128 %r61, 96
+%r63 = or i128 %r60, %r62
+%r64 = zext i128 %r63 to i160
+%r65 = zext i32 %r22 to i160
+%r66 = shl i160 %r65, 128
+%r67 = or i160 %r64, %r66
+%r68 = zext i160 %r67 to i192
+%r69 = zext i32 %r26 to i192
+%r70 = shl i192 %r69, 160
+%r71 = or i192 %r68, %r70
+%r72 = zext i192 %r71 to i224
+%r73 = zext i32 %r30 to i224
+%r74 = shl i224 %r73, 192
+%r75 = or i224 %r72, %r74
+%r76 = zext i224 %r75 to i256
+%r77 = zext i32 %r34 to i256
+%r78 = shl i256 %r77, 224
+%r79 = or i256 %r76, %r78
+%r80 = zext i256 %r79 to i288
+%r81 = zext i32 %r38 to i288
+%r82 = shl i288 %r81, 256
+%r83 = or i288 %r80, %r82
+%r84 = zext i288 %r83 to i320
+%r85 = zext i32 %r42 to i320
+%r86 = shl i320 %r85, 288
+%r87 = or i320 %r84, %r86
+%r88 = zext i320 %r87 to i352
+%r89 = zext i32 %r46 to i352
+%r90 = shl i352 %r89, 320
+%r91 = or i352 %r88, %r90
+%r92 = zext i352 %r91 to i384
+%r93 = zext i32 %r50 to i384
+%r94 = shl i384 %r93, 352
+%r95 = or i384 %r92, %r94
+%r96 = zext i32 %r7 to i64
+%r97 = zext i32 %r11 to i64
+%r98 = shl i64 %r97, 32
+%r99 = or i64 %r96, %r98
+%r100 = zext i64 %r99 to i96
+%r101 = zext i32 %r15 to i96
+%r102 = shl i96 %r101, 64
+%r103 = or i96 %r100, %r102
+%r104 = zext i96 %r103 to i128
+%r105 = zext i32 %r19 to i128
+%r106 = shl i128 %r105, 96
+%r107 = or i128 %r104, %r106
+%r108 = zext i128 %r107 to i160
+%r109 = zext i32 %r23 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r108, %r110
+%r112 = zext i160 %r111 to i192
+%r113 = zext i32 %r27 to i192
+%r114 = shl i192 %r113, 160
+%r115 = or i192 %r112, %r114
+%r116 = zext i192 %r115 to i224
+%r117 = zext i32 %r31 to i224
+%r118 = shl i224 %r117, 192
+%r119 = or i224 %r116, %r118
+%r120 = zext i224 %r119 to i256
+%r121 = zext i32 %r35 to i256
+%r122 = shl i256 %r121, 224
+%r123 = or i256 %r120, %r122
+%r124 = zext i256 %r123 to i288
+%r125 = zext i32 %r39 to i288
+%r126 = shl i288 %r125, 256
+%r127 = or i288 %r124, %r126
+%r128 = zext i288 %r127 to i320
+%r129 = zext i32 %r43 to i320
+%r130 = shl i320 %r129, 288
+%r131 = or i320 %r128, %r130
+%r132 = zext i320 %r131 to i352
+%r133 = zext i32 %r47 to i352
+%r134 = shl i352 %r133, 320
+%r135 = or i352 %r132, %r134
+%r136 = zext i352 %r135 to i384
+%r137 = zext i32 %r51 to i384
+%r138 = shl i384 %r137, 352
+%r139 = or i384 %r136, %r138
+%r140 = zext i384 %r95 to i416
+%r141 = zext i384 %r139 to i416
+%r142 = shl i416 %r141, 32
+%r143 = add i416 %r140, %r142
+ret i416 %r143
+}
+define void @mcl_fp_mulUnitPre12L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i416 @mulPv384x32(i32* %r2, i32 %r3)
+%r5 = trunc i416 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i416 %r4, 32
+%r9 = trunc i416 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i416 %r8, 32
+%r13 = trunc i416 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i416 %r12, 32
+%r17 = trunc i416 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i416 %r16, 32
+%r21 = trunc i416 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i416 %r20, 32
+%r25 = trunc i416 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i416 %r24, 32
+%r29 = trunc i416 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i416 %r28, 32
+%r33 = trunc i416 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+%r36 = lshr i416 %r32, 32
+%r37 = trunc i416 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 8
+store i32 %r37, i32* %r39
+%r40 = lshr i416 %r36, 32
+%r41 = trunc i416 %r40 to i32
+%r43 = getelementptr i32, i32* %r1, i32 9
+store i32 %r41, i32* %r43
+%r44 = lshr i416 %r40, 32
+%r45 = trunc i416 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 10
+store i32 %r45, i32* %r47
+%r48 = lshr i416 %r44, 32
+%r49 = trunc i416 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 11
+store i32 %r49, i32* %r51
+%r52 = lshr i416 %r48, 32
+%r53 = trunc i416 %r52 to i32
+%r55 = getelementptr i32, i32* %r1, i32 12
+store i32 %r53, i32* %r55
+ret void
+}
+define void @mcl_fpDbl_mulPre12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r2, i32 6
+%r7 = getelementptr i32, i32* %r3, i32 6
+%r9 = getelementptr i32, i32* %r1, i32 12
+call void @mcl_fpDbl_mulPre6L(i32* %r1, i32* %r2, i32* %r3)
+call void @mcl_fpDbl_mulPre6L(i32* %r9, i32* %r5, i32* %r7)
+%r10 = load i32, i32* %r5
+%r11 = zext i32 %r10 to i64
+%r13 = getelementptr i32, i32* %r5, i32 1
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i64
+%r16 = shl i64 %r15, 32
+%r17 = or i64 %r11, %r16
+%r18 = zext i64 %r17 to i96
+%r20 = getelementptr i32, i32* %r5, i32 2
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i96
+%r23 = shl i96 %r22, 64
+%r24 = or i96 %r18, %r23
+%r25 = zext i96 %r24 to i128
+%r27 = getelementptr i32, i32* %r5, i32 3
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i128
+%r30 = shl i128 %r29, 96
+%r31 = or i128 %r25, %r30
+%r32 = zext i128 %r31 to i160
+%r34 = getelementptr i32, i32* %r5, i32 4
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i160
+%r37 = shl i160 %r36, 128
+%r38 = or i160 %r32, %r37
+%r39 = zext i160 %r38 to i192
+%r41 = getelementptr i32, i32* %r5, i32 5
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i192
+%r44 = shl i192 %r43, 160
+%r45 = or i192 %r39, %r44
+%r46 = zext i192 %r45 to i224
+%r47 = load i32, i32* %r2
+%r48 = zext i32 %r47 to i64
+%r50 = getelementptr i32, i32* %r2, i32 1
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i64
+%r53 = shl i64 %r52, 32
+%r54 = or i64 %r48, %r53
+%r55 = zext i64 %r54 to i96
+%r57 = getelementptr i32, i32* %r2, i32 2
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i96
+%r60 = shl i96 %r59, 64
+%r61 = or i96 %r55, %r60
+%r62 = zext i96 %r61 to i128
+%r64 = getelementptr i32, i32* %r2, i32 3
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i128
+%r67 = shl i128 %r66, 96
+%r68 = or i128 %r62, %r67
+%r69 = zext i128 %r68 to i160
+%r71 = getelementptr i32, i32* %r2, i32 4
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i160
+%r74 = shl i160 %r73, 128
+%r75 = or i160 %r69, %r74
+%r76 = zext i160 %r75 to i192
+%r78 = getelementptr i32, i32* %r2, i32 5
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i192
+%r81 = shl i192 %r80, 160
+%r82 = or i192 %r76, %r81
+%r83 = zext i192 %r82 to i224
+%r84 = load i32, i32* %r7
+%r85 = zext i32 %r84 to i64
+%r87 = getelementptr i32, i32* %r7, i32 1
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i64
+%r90 = shl i64 %r89, 32
+%r91 = or i64 %r85, %r90
+%r92 = zext i64 %r91 to i96
+%r94 = getelementptr i32, i32* %r7, i32 2
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i96
+%r97 = shl i96 %r96, 64
+%r98 = or i96 %r92, %r97
+%r99 = zext i96 %r98 to i128
+%r101 = getelementptr i32, i32* %r7, i32 3
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i128
+%r104 = shl i128 %r103, 96
+%r105 = or i128 %r99, %r104
+%r106 = zext i128 %r105 to i160
+%r108 = getelementptr i32, i32* %r7, i32 4
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i160
+%r111 = shl i160 %r110, 128
+%r112 = or i160 %r106, %r111
+%r113 = zext i160 %r112 to i192
+%r115 = getelementptr i32, i32* %r7, i32 5
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i192
+%r118 = shl i192 %r117, 160
+%r119 = or i192 %r113, %r118
+%r120 = zext i192 %r119 to i224
+%r121 = load i32, i32* %r3
+%r122 = zext i32 %r121 to i64
+%r124 = getelementptr i32, i32* %r3, i32 1
+%r125 = load i32, i32* %r124
+%r126 = zext i32 %r125 to i64
+%r127 = shl i64 %r126, 32
+%r128 = or i64 %r122, %r127
+%r129 = zext i64 %r128 to i96
+%r131 = getelementptr i32, i32* %r3, i32 2
+%r132 = load i32, i32* %r131
+%r133 = zext i32 %r132 to i96
+%r134 = shl i96 %r133, 64
+%r135 = or i96 %r129, %r134
+%r136 = zext i96 %r135 to i128
+%r138 = getelementptr i32, i32* %r3, i32 3
+%r139 = load i32, i32* %r138
+%r140 = zext i32 %r139 to i128
+%r141 = shl i128 %r140, 96
+%r142 = or i128 %r136, %r141
+%r143 = zext i128 %r142 to i160
+%r145 = getelementptr i32, i32* %r3, i32 4
+%r146 = load i32, i32* %r145
+%r147 = zext i32 %r146 to i160
+%r148 = shl i160 %r147, 128
+%r149 = or i160 %r143, %r148
+%r150 = zext i160 %r149 to i192
+%r152 = getelementptr i32, i32* %r3, i32 5
+%r153 = load i32, i32* %r152
+%r154 = zext i32 %r153 to i192
+%r155 = shl i192 %r154, 160
+%r156 = or i192 %r150, %r155
+%r157 = zext i192 %r156 to i224
+%r158 = add i224 %r46, %r83
+%r159 = add i224 %r120, %r157
+%r161 = alloca i32, i32 12
+%r162 = trunc i224 %r158 to i192
+%r163 = trunc i224 %r159 to i192
+%r164 = lshr i224 %r158, 192
+%r165 = trunc i224 %r164 to i1
+%r166 = lshr i224 %r159, 192
+%r167 = trunc i224 %r166 to i1
+%r168 = and i1 %r165, %r167
+%r170 = select i1 %r165, i192 %r163, i192 0
+%r172 = select i1 %r167, i192 %r162, i192 0
+%r174 = alloca i32, i32 6
+%r176 = alloca i32, i32 6
+%r177 = trunc i192 %r162 to i32
+%r179 = getelementptr i32, i32* %r174, i32 0
+store i32 %r177, i32* %r179
+%r180 = lshr i192 %r162, 32
+%r181 = trunc i192 %r180 to i32
+%r183 = getelementptr i32, i32* %r174, i32 1
+store i32 %r181, i32* %r183
+%r184 = lshr i192 %r180, 32
+%r185 = trunc i192 %r184 to i32
+%r187 = getelementptr i32, i32* %r174, i32 2
+store i32 %r185, i32* %r187
+%r188 = lshr i192 %r184, 32
+%r189 = trunc i192 %r188 to i32
+%r191 = getelementptr i32, i32* %r174, i32 3
+store i32 %r189, i32* %r191
+%r192 = lshr i192 %r188, 32
+%r193 = trunc i192 %r192 to i32
+%r195 = getelementptr i32, i32* %r174, i32 4
+store i32 %r193, i32* %r195
+%r196 = lshr i192 %r192, 32
+%r197 = trunc i192 %r196 to i32
+%r199 = getelementptr i32, i32* %r174, i32 5
+store i32 %r197, i32* %r199
+%r200 = trunc i192 %r163 to i32
+%r202 = getelementptr i32, i32* %r176, i32 0
+store i32 %r200, i32* %r202
+%r203 = lshr i192 %r163, 32
+%r204 = trunc i192 %r203 to i32
+%r206 = getelementptr i32, i32* %r176, i32 1
+store i32 %r204, i32* %r206
+%r207 = lshr i192 %r203, 32
+%r208 = trunc i192 %r207 to i32
+%r210 = getelementptr i32, i32* %r176, i32 2
+store i32 %r208, i32* %r210
+%r211 = lshr i192 %r207, 32
+%r212 = trunc i192 %r211 to i32
+%r214 = getelementptr i32, i32* %r176, i32 3
+store i32 %r212, i32* %r214
+%r215 = lshr i192 %r211, 32
+%r216 = trunc i192 %r215 to i32
+%r218 = getelementptr i32, i32* %r176, i32 4
+store i32 %r216, i32* %r218
+%r219 = lshr i192 %r215, 32
+%r220 = trunc i192 %r219 to i32
+%r222 = getelementptr i32, i32* %r176, i32 5
+store i32 %r220, i32* %r222
+call void @mcl_fpDbl_mulPre6L(i32* %r161, i32* %r174, i32* %r176)
+%r223 = load i32, i32* %r161
+%r224 = zext i32 %r223 to i64
+%r226 = getelementptr i32, i32* %r161, i32 1
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i64
+%r229 = shl i64 %r228, 32
+%r230 = or i64 %r224, %r229
+%r231 = zext i64 %r230 to i96
+%r233 = getelementptr i32, i32* %r161, i32 2
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i96
+%r236 = shl i96 %r235, 64
+%r237 = or i96 %r231, %r236
+%r238 = zext i96 %r237 to i128
+%r240 = getelementptr i32, i32* %r161, i32 3
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i128
+%r243 = shl i128 %r242, 96
+%r244 = or i128 %r238, %r243
+%r245 = zext i128 %r244 to i160
+%r247 = getelementptr i32, i32* %r161, i32 4
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i160
+%r250 = shl i160 %r249, 128
+%r251 = or i160 %r245, %r250
+%r252 = zext i160 %r251 to i192
+%r254 = getelementptr i32, i32* %r161, i32 5
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i192
+%r257 = shl i192 %r256, 160
+%r258 = or i192 %r252, %r257
+%r259 = zext i192 %r258 to i224
+%r261 = getelementptr i32, i32* %r161, i32 6
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i224
+%r264 = shl i224 %r263, 192
+%r265 = or i224 %r259, %r264
+%r266 = zext i224 %r265 to i256
+%r268 = getelementptr i32, i32* %r161, i32 7
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i256
+%r271 = shl i256 %r270, 224
+%r272 = or i256 %r266, %r271
+%r273 = zext i256 %r272 to i288
+%r275 = getelementptr i32, i32* %r161, i32 8
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i288
+%r278 = shl i288 %r277, 256
+%r279 = or i288 %r273, %r278
+%r280 = zext i288 %r279 to i320
+%r282 = getelementptr i32, i32* %r161, i32 9
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i320
+%r285 = shl i320 %r284, 288
+%r286 = or i320 %r280, %r285
+%r287 = zext i320 %r286 to i352
+%r289 = getelementptr i32, i32* %r161, i32 10
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i352
+%r292 = shl i352 %r291, 320
+%r293 = or i352 %r287, %r292
+%r294 = zext i352 %r293 to i384
+%r296 = getelementptr i32, i32* %r161, i32 11
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i384
+%r299 = shl i384 %r298, 352
+%r300 = or i384 %r294, %r299
+%r301 = zext i384 %r300 to i416
+%r302 = zext i1 %r168 to i416
+%r303 = shl i416 %r302, 384
+%r304 = or i416 %r301, %r303
+%r305 = zext i192 %r170 to i416
+%r306 = zext i192 %r172 to i416
+%r307 = shl i416 %r305, 192
+%r308 = shl i416 %r306, 192
+%r309 = add i416 %r304, %r307
+%r310 = add i416 %r309, %r308
+%r311 = load i32, i32* %r1
+%r312 = zext i32 %r311 to i64
+%r314 = getelementptr i32, i32* %r1, i32 1
+%r315 = load i32, i32* %r314
+%r316 = zext i32 %r315 to i64
+%r317 = shl i64 %r316, 32
+%r318 = or i64 %r312, %r317
+%r319 = zext i64 %r318 to i96
+%r321 = getelementptr i32, i32* %r1, i32 2
+%r322 = load i32, i32* %r321
+%r323 = zext i32 %r322 to i96
+%r324 = shl i96 %r323, 64
+%r325 = or i96 %r319, %r324
+%r326 = zext i96 %r325 to i128
+%r328 = getelementptr i32, i32* %r1, i32 3
+%r329 = load i32, i32* %r328
+%r330 = zext i32 %r329 to i128
+%r331 = shl i128 %r330, 96
+%r332 = or i128 %r326, %r331
+%r333 = zext i128 %r332 to i160
+%r335 = getelementptr i32, i32* %r1, i32 4
+%r336 = load i32, i32* %r335
+%r337 = zext i32 %r336 to i160
+%r338 = shl i160 %r337, 128
+%r339 = or i160 %r333, %r338
+%r340 = zext i160 %r339 to i192
+%r342 = getelementptr i32, i32* %r1, i32 5
+%r343 = load i32, i32* %r342
+%r344 = zext i32 %r343 to i192
+%r345 = shl i192 %r344, 160
+%r346 = or i192 %r340, %r345
+%r347 = zext i192 %r346 to i224
+%r349 = getelementptr i32, i32* %r1, i32 6
+%r350 = load i32, i32* %r349
+%r351 = zext i32 %r350 to i224
+%r352 = shl i224 %r351, 192
+%r353 = or i224 %r347, %r352
+%r354 = zext i224 %r353 to i256
+%r356 = getelementptr i32, i32* %r1, i32 7
+%r357 = load i32, i32* %r356
+%r358 = zext i32 %r357 to i256
+%r359 = shl i256 %r358, 224
+%r360 = or i256 %r354, %r359
+%r361 = zext i256 %r360 to i288
+%r363 = getelementptr i32, i32* %r1, i32 8
+%r364 = load i32, i32* %r363
+%r365 = zext i32 %r364 to i288
+%r366 = shl i288 %r365, 256
+%r367 = or i288 %r361, %r366
+%r368 = zext i288 %r367 to i320
+%r370 = getelementptr i32, i32* %r1, i32 9
+%r371 = load i32, i32* %r370
+%r372 = zext i32 %r371 to i320
+%r373 = shl i320 %r372, 288
+%r374 = or i320 %r368, %r373
+%r375 = zext i320 %r374 to i352
+%r377 = getelementptr i32, i32* %r1, i32 10
+%r378 = load i32, i32* %r377
+%r379 = zext i32 %r378 to i352
+%r380 = shl i352 %r379, 320
+%r381 = or i352 %r375, %r380
+%r382 = zext i352 %r381 to i384
+%r384 = getelementptr i32, i32* %r1, i32 11
+%r385 = load i32, i32* %r384
+%r386 = zext i32 %r385 to i384
+%r387 = shl i384 %r386, 352
+%r388 = or i384 %r382, %r387
+%r389 = zext i384 %r388 to i416
+%r390 = sub i416 %r310, %r389
+%r392 = getelementptr i32, i32* %r1, i32 12
+%r393 = load i32, i32* %r392
+%r394 = zext i32 %r393 to i64
+%r396 = getelementptr i32, i32* %r392, i32 1
+%r397 = load i32, i32* %r396
+%r398 = zext i32 %r397 to i64
+%r399 = shl i64 %r398, 32
+%r400 = or i64 %r394, %r399
+%r401 = zext i64 %r400 to i96
+%r403 = getelementptr i32, i32* %r392, i32 2
+%r404 = load i32, i32* %r403
+%r405 = zext i32 %r404 to i96
+%r406 = shl i96 %r405, 64
+%r407 = or i96 %r401, %r406
+%r408 = zext i96 %r407 to i128
+%r410 = getelementptr i32, i32* %r392, i32 3
+%r411 = load i32, i32* %r410
+%r412 = zext i32 %r411 to i128
+%r413 = shl i128 %r412, 96
+%r414 = or i128 %r408, %r413
+%r415 = zext i128 %r414 to i160
+%r417 = getelementptr i32, i32* %r392, i32 4
+%r418 = load i32, i32* %r417
+%r419 = zext i32 %r418 to i160
+%r420 = shl i160 %r419, 128
+%r421 = or i160 %r415, %r420
+%r422 = zext i160 %r421 to i192
+%r424 = getelementptr i32, i32* %r392, i32 5
+%r425 = load i32, i32* %r424
+%r426 = zext i32 %r425 to i192
+%r427 = shl i192 %r426, 160
+%r428 = or i192 %r422, %r427
+%r429 = zext i192 %r428 to i224
+%r431 = getelementptr i32, i32* %r392, i32 6
+%r432 = load i32, i32* %r431
+%r433 = zext i32 %r432 to i224
+%r434 = shl i224 %r433, 192
+%r435 = or i224 %r429, %r434
+%r436 = zext i224 %r435 to i256
+%r438 = getelementptr i32, i32* %r392, i32 7
+%r439 = load i32, i32* %r438
+%r440 = zext i32 %r439 to i256
+%r441 = shl i256 %r440, 224
+%r442 = or i256 %r436, %r441
+%r443 = zext i256 %r442 to i288
+%r445 = getelementptr i32, i32* %r392, i32 8
+%r446 = load i32, i32* %r445
+%r447 = zext i32 %r446 to i288
+%r448 = shl i288 %r447, 256
+%r449 = or i288 %r443, %r448
+%r450 = zext i288 %r449 to i320
+%r452 = getelementptr i32, i32* %r392, i32 9
+%r453 = load i32, i32* %r452
+%r454 = zext i32 %r453 to i320
+%r455 = shl i320 %r454, 288
+%r456 = or i320 %r450, %r455
+%r457 = zext i320 %r456 to i352
+%r459 = getelementptr i32, i32* %r392, i32 10
+%r460 = load i32, i32* %r459
+%r461 = zext i32 %r460 to i352
+%r462 = shl i352 %r461, 320
+%r463 = or i352 %r457, %r462
+%r464 = zext i352 %r463 to i384
+%r466 = getelementptr i32, i32* %r392, i32 11
+%r467 = load i32, i32* %r466
+%r468 = zext i32 %r467 to i384
+%r469 = shl i384 %r468, 352
+%r470 = or i384 %r464, %r469
+%r471 = zext i384 %r470 to i416
+%r472 = sub i416 %r390, %r471
+%r473 = zext i416 %r472 to i576
+%r475 = getelementptr i32, i32* %r1, i32 6
+%r476 = load i32, i32* %r475
+%r477 = zext i32 %r476 to i64
+%r479 = getelementptr i32, i32* %r475, i32 1
+%r480 = load i32, i32* %r479
+%r481 = zext i32 %r480 to i64
+%r482 = shl i64 %r481, 32
+%r483 = or i64 %r477, %r482
+%r484 = zext i64 %r483 to i96
+%r486 = getelementptr i32, i32* %r475, i32 2
+%r487 = load i32, i32* %r486
+%r488 = zext i32 %r487 to i96
+%r489 = shl i96 %r488, 64
+%r490 = or i96 %r484, %r489
+%r491 = zext i96 %r490 to i128
+%r493 = getelementptr i32, i32* %r475, i32 3
+%r494 = load i32, i32* %r493
+%r495 = zext i32 %r494 to i128
+%r496 = shl i128 %r495, 96
+%r497 = or i128 %r491, %r496
+%r498 = zext i128 %r497 to i160
+%r500 = getelementptr i32, i32* %r475, i32 4
+%r501 = load i32, i32* %r500
+%r502 = zext i32 %r501 to i160
+%r503 = shl i160 %r502, 128
+%r504 = or i160 %r498, %r503
+%r505 = zext i160 %r504 to i192
+%r507 = getelementptr i32, i32* %r475, i32 5
+%r508 = load i32, i32* %r507
+%r509 = zext i32 %r508 to i192
+%r510 = shl i192 %r509, 160
+%r511 = or i192 %r505, %r510
+%r512 = zext i192 %r511 to i224
+%r514 = getelementptr i32, i32* %r475, i32 6
+%r515 = load i32, i32* %r514
+%r516 = zext i32 %r515 to i224
+%r517 = shl i224 %r516, 192
+%r518 = or i224 %r512, %r517
+%r519 = zext i224 %r518 to i256
+%r521 = getelementptr i32, i32* %r475, i32 7
+%r522 = load i32, i32* %r521
+%r523 = zext i32 %r522 to i256
+%r524 = shl i256 %r523, 224
+%r525 = or i256 %r519, %r524
+%r526 = zext i256 %r525 to i288
+%r528 = getelementptr i32, i32* %r475, i32 8
+%r529 = load i32, i32* %r528
+%r530 = zext i32 %r529 to i288
+%r531 = shl i288 %r530, 256
+%r532 = or i288 %r526, %r531
+%r533 = zext i288 %r532 to i320
+%r535 = getelementptr i32, i32* %r475, i32 9
+%r536 = load i32, i32* %r535
+%r537 = zext i32 %r536 to i320
+%r538 = shl i320 %r537, 288
+%r539 = or i320 %r533, %r538
+%r540 = zext i320 %r539 to i352
+%r542 = getelementptr i32, i32* %r475, i32 10
+%r543 = load i32, i32* %r542
+%r544 = zext i32 %r543 to i352
+%r545 = shl i352 %r544, 320
+%r546 = or i352 %r540, %r545
+%r547 = zext i352 %r546 to i384
+%r549 = getelementptr i32, i32* %r475, i32 11
+%r550 = load i32, i32* %r549
+%r551 = zext i32 %r550 to i384
+%r552 = shl i384 %r551, 352
+%r553 = or i384 %r547, %r552
+%r554 = zext i384 %r553 to i416
+%r556 = getelementptr i32, i32* %r475, i32 12
+%r557 = load i32, i32* %r556
+%r558 = zext i32 %r557 to i416
+%r559 = shl i416 %r558, 384
+%r560 = or i416 %r554, %r559
+%r561 = zext i416 %r560 to i448
+%r563 = getelementptr i32, i32* %r475, i32 13
+%r564 = load i32, i32* %r563
+%r565 = zext i32 %r564 to i448
+%r566 = shl i448 %r565, 416
+%r567 = or i448 %r561, %r566
+%r568 = zext i448 %r567 to i480
+%r570 = getelementptr i32, i32* %r475, i32 14
+%r571 = load i32, i32* %r570
+%r572 = zext i32 %r571 to i480
+%r573 = shl i480 %r572, 448
+%r574 = or i480 %r568, %r573
+%r575 = zext i480 %r574 to i512
+%r577 = getelementptr i32, i32* %r475, i32 15
+%r578 = load i32, i32* %r577
+%r579 = zext i32 %r578 to i512
+%r580 = shl i512 %r579, 480
+%r581 = or i512 %r575, %r580
+%r582 = zext i512 %r581 to i544
+%r584 = getelementptr i32, i32* %r475, i32 16
+%r585 = load i32, i32* %r584
+%r586 = zext i32 %r585 to i544
+%r587 = shl i544 %r586, 512
+%r588 = or i544 %r582, %r587
+%r589 = zext i544 %r588 to i576
+%r591 = getelementptr i32, i32* %r475, i32 17
+%r592 = load i32, i32* %r591
+%r593 = zext i32 %r592 to i576
+%r594 = shl i576 %r593, 544
+%r595 = or i576 %r589, %r594
+%r596 = add i576 %r473, %r595
+%r598 = getelementptr i32, i32* %r1, i32 6
+%r599 = trunc i576 %r596 to i32
+%r601 = getelementptr i32, i32* %r598, i32 0
+store i32 %r599, i32* %r601
+%r602 = lshr i576 %r596, 32
+%r603 = trunc i576 %r602 to i32
+%r605 = getelementptr i32, i32* %r598, i32 1
+store i32 %r603, i32* %r605
+%r606 = lshr i576 %r602, 32
+%r607 = trunc i576 %r606 to i32
+%r609 = getelementptr i32, i32* %r598, i32 2
+store i32 %r607, i32* %r609
+%r610 = lshr i576 %r606, 32
+%r611 = trunc i576 %r610 to i32
+%r613 = getelementptr i32, i32* %r598, i32 3
+store i32 %r611, i32* %r613
+%r614 = lshr i576 %r610, 32
+%r615 = trunc i576 %r614 to i32
+%r617 = getelementptr i32, i32* %r598, i32 4
+store i32 %r615, i32* %r617
+%r618 = lshr i576 %r614, 32
+%r619 = trunc i576 %r618 to i32
+%r621 = getelementptr i32, i32* %r598, i32 5
+store i32 %r619, i32* %r621
+%r622 = lshr i576 %r618, 32
+%r623 = trunc i576 %r622 to i32
+%r625 = getelementptr i32, i32* %r598, i32 6
+store i32 %r623, i32* %r625
+%r626 = lshr i576 %r622, 32
+%r627 = trunc i576 %r626 to i32
+%r629 = getelementptr i32, i32* %r598, i32 7
+store i32 %r627, i32* %r629
+%r630 = lshr i576 %r626, 32
+%r631 = trunc i576 %r630 to i32
+%r633 = getelementptr i32, i32* %r598, i32 8
+store i32 %r631, i32* %r633
+%r634 = lshr i576 %r630, 32
+%r635 = trunc i576 %r634 to i32
+%r637 = getelementptr i32, i32* %r598, i32 9
+store i32 %r635, i32* %r637
+%r638 = lshr i576 %r634, 32
+%r639 = trunc i576 %r638 to i32
+%r641 = getelementptr i32, i32* %r598, i32 10
+store i32 %r639, i32* %r641
+%r642 = lshr i576 %r638, 32
+%r643 = trunc i576 %r642 to i32
+%r645 = getelementptr i32, i32* %r598, i32 11
+store i32 %r643, i32* %r645
+%r646 = lshr i576 %r642, 32
+%r647 = trunc i576 %r646 to i32
+%r649 = getelementptr i32, i32* %r598, i32 12
+store i32 %r647, i32* %r649
+%r650 = lshr i576 %r646, 32
+%r651 = trunc i576 %r650 to i32
+%r653 = getelementptr i32, i32* %r598, i32 13
+store i32 %r651, i32* %r653
+%r654 = lshr i576 %r650, 32
+%r655 = trunc i576 %r654 to i32
+%r657 = getelementptr i32, i32* %r598, i32 14
+store i32 %r655, i32* %r657
+%r658 = lshr i576 %r654, 32
+%r659 = trunc i576 %r658 to i32
+%r661 = getelementptr i32, i32* %r598, i32 15
+store i32 %r659, i32* %r661
+%r662 = lshr i576 %r658, 32
+%r663 = trunc i576 %r662 to i32
+%r665 = getelementptr i32, i32* %r598, i32 16
+store i32 %r663, i32* %r665
+%r666 = lshr i576 %r662, 32
+%r667 = trunc i576 %r666 to i32
+%r669 = getelementptr i32, i32* %r598, i32 17
+store i32 %r667, i32* %r669
+ret void
+}
+define void @mcl_fpDbl_sqrPre12L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r4 = getelementptr i32, i32* %r2, i32 6
+%r6 = getelementptr i32, i32* %r2, i32 6
+%r8 = getelementptr i32, i32* %r1, i32 12
+call void @mcl_fpDbl_mulPre6L(i32* %r1, i32* %r2, i32* %r2)
+call void @mcl_fpDbl_mulPre6L(i32* %r8, i32* %r4, i32* %r6)
+%r9 = load i32, i32* %r4
+%r10 = zext i32 %r9 to i64
+%r12 = getelementptr i32, i32* %r4, i32 1
+%r13 = load i32, i32* %r12
+%r14 = zext i32 %r13 to i64
+%r15 = shl i64 %r14, 32
+%r16 = or i64 %r10, %r15
+%r17 = zext i64 %r16 to i96
+%r19 = getelementptr i32, i32* %r4, i32 2
+%r20 = load i32, i32* %r19
+%r21 = zext i32 %r20 to i96
+%r22 = shl i96 %r21, 64
+%r23 = or i96 %r17, %r22
+%r24 = zext i96 %r23 to i128
+%r26 = getelementptr i32, i32* %r4, i32 3
+%r27 = load i32, i32* %r26
+%r28 = zext i32 %r27 to i128
+%r29 = shl i128 %r28, 96
+%r30 = or i128 %r24, %r29
+%r31 = zext i128 %r30 to i160
+%r33 = getelementptr i32, i32* %r4, i32 4
+%r34 = load i32, i32* %r33
+%r35 = zext i32 %r34 to i160
+%r36 = shl i160 %r35, 128
+%r37 = or i160 %r31, %r36
+%r38 = zext i160 %r37 to i192
+%r40 = getelementptr i32, i32* %r4, i32 5
+%r41 = load i32, i32* %r40
+%r42 = zext i32 %r41 to i192
+%r43 = shl i192 %r42, 160
+%r44 = or i192 %r38, %r43
+%r45 = zext i192 %r44 to i224
+%r46 = load i32, i32* %r2
+%r47 = zext i32 %r46 to i64
+%r49 = getelementptr i32, i32* %r2, i32 1
+%r50 = load i32, i32* %r49
+%r51 = zext i32 %r50 to i64
+%r52 = shl i64 %r51, 32
+%r53 = or i64 %r47, %r52
+%r54 = zext i64 %r53 to i96
+%r56 = getelementptr i32, i32* %r2, i32 2
+%r57 = load i32, i32* %r56
+%r58 = zext i32 %r57 to i96
+%r59 = shl i96 %r58, 64
+%r60 = or i96 %r54, %r59
+%r61 = zext i96 %r60 to i128
+%r63 = getelementptr i32, i32* %r2, i32 3
+%r64 = load i32, i32* %r63
+%r65 = zext i32 %r64 to i128
+%r66 = shl i128 %r65, 96
+%r67 = or i128 %r61, %r66
+%r68 = zext i128 %r67 to i160
+%r70 = getelementptr i32, i32* %r2, i32 4
+%r71 = load i32, i32* %r70
+%r72 = zext i32 %r71 to i160
+%r73 = shl i160 %r72, 128
+%r74 = or i160 %r68, %r73
+%r75 = zext i160 %r74 to i192
+%r77 = getelementptr i32, i32* %r2, i32 5
+%r78 = load i32, i32* %r77
+%r79 = zext i32 %r78 to i192
+%r80 = shl i192 %r79, 160
+%r81 = or i192 %r75, %r80
+%r82 = zext i192 %r81 to i224
+%r83 = load i32, i32* %r6
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r6, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r6, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r6, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r6, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r6, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r120 = load i32, i32* %r2
+%r121 = zext i32 %r120 to i64
+%r123 = getelementptr i32, i32* %r2, i32 1
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i64
+%r126 = shl i64 %r125, 32
+%r127 = or i64 %r121, %r126
+%r128 = zext i64 %r127 to i96
+%r130 = getelementptr i32, i32* %r2, i32 2
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i96
+%r133 = shl i96 %r132, 64
+%r134 = or i96 %r128, %r133
+%r135 = zext i96 %r134 to i128
+%r137 = getelementptr i32, i32* %r2, i32 3
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i128
+%r140 = shl i128 %r139, 96
+%r141 = or i128 %r135, %r140
+%r142 = zext i128 %r141 to i160
+%r144 = getelementptr i32, i32* %r2, i32 4
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i160
+%r147 = shl i160 %r146, 128
+%r148 = or i160 %r142, %r147
+%r149 = zext i160 %r148 to i192
+%r151 = getelementptr i32, i32* %r2, i32 5
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i192
+%r154 = shl i192 %r153, 160
+%r155 = or i192 %r149, %r154
+%r156 = zext i192 %r155 to i224
+%r157 = add i224 %r45, %r82
+%r158 = add i224 %r119, %r156
+%r160 = alloca i32, i32 12
+%r161 = trunc i224 %r157 to i192
+%r162 = trunc i224 %r158 to i192
+%r163 = lshr i224 %r157, 192
+%r164 = trunc i224 %r163 to i1
+%r165 = lshr i224 %r158, 192
+%r166 = trunc i224 %r165 to i1
+%r167 = and i1 %r164, %r166
+%r169 = select i1 %r164, i192 %r162, i192 0
+%r171 = select i1 %r166, i192 %r161, i192 0
+%r173 = alloca i32, i32 6
+%r175 = alloca i32, i32 6
+%r176 = trunc i192 %r161 to i32
+%r178 = getelementptr i32, i32* %r173, i32 0
+store i32 %r176, i32* %r178
+%r179 = lshr i192 %r161, 32
+%r180 = trunc i192 %r179 to i32
+%r182 = getelementptr i32, i32* %r173, i32 1
+store i32 %r180, i32* %r182
+%r183 = lshr i192 %r179, 32
+%r184 = trunc i192 %r183 to i32
+%r186 = getelementptr i32, i32* %r173, i32 2
+store i32 %r184, i32* %r186
+%r187 = lshr i192 %r183, 32
+%r188 = trunc i192 %r187 to i32
+%r190 = getelementptr i32, i32* %r173, i32 3
+store i32 %r188, i32* %r190
+%r191 = lshr i192 %r187, 32
+%r192 = trunc i192 %r191 to i32
+%r194 = getelementptr i32, i32* %r173, i32 4
+store i32 %r192, i32* %r194
+%r195 = lshr i192 %r191, 32
+%r196 = trunc i192 %r195 to i32
+%r198 = getelementptr i32, i32* %r173, i32 5
+store i32 %r196, i32* %r198
+%r199 = trunc i192 %r162 to i32
+%r201 = getelementptr i32, i32* %r175, i32 0
+store i32 %r199, i32* %r201
+%r202 = lshr i192 %r162, 32
+%r203 = trunc i192 %r202 to i32
+%r205 = getelementptr i32, i32* %r175, i32 1
+store i32 %r203, i32* %r205
+%r206 = lshr i192 %r202, 32
+%r207 = trunc i192 %r206 to i32
+%r209 = getelementptr i32, i32* %r175, i32 2
+store i32 %r207, i32* %r209
+%r210 = lshr i192 %r206, 32
+%r211 = trunc i192 %r210 to i32
+%r213 = getelementptr i32, i32* %r175, i32 3
+store i32 %r211, i32* %r213
+%r214 = lshr i192 %r210, 32
+%r215 = trunc i192 %r214 to i32
+%r217 = getelementptr i32, i32* %r175, i32 4
+store i32 %r215, i32* %r217
+%r218 = lshr i192 %r214, 32
+%r219 = trunc i192 %r218 to i32
+%r221 = getelementptr i32, i32* %r175, i32 5
+store i32 %r219, i32* %r221
+call void @mcl_fpDbl_mulPre6L(i32* %r160, i32* %r173, i32* %r175)
+%r222 = load i32, i32* %r160
+%r223 = zext i32 %r222 to i64
+%r225 = getelementptr i32, i32* %r160, i32 1
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i64
+%r228 = shl i64 %r227, 32
+%r229 = or i64 %r223, %r228
+%r230 = zext i64 %r229 to i96
+%r232 = getelementptr i32, i32* %r160, i32 2
+%r233 = load i32, i32* %r232
+%r234 = zext i32 %r233 to i96
+%r235 = shl i96 %r234, 64
+%r236 = or i96 %r230, %r235
+%r237 = zext i96 %r236 to i128
+%r239 = getelementptr i32, i32* %r160, i32 3
+%r240 = load i32, i32* %r239
+%r241 = zext i32 %r240 to i128
+%r242 = shl i128 %r241, 96
+%r243 = or i128 %r237, %r242
+%r244 = zext i128 %r243 to i160
+%r246 = getelementptr i32, i32* %r160, i32 4
+%r247 = load i32, i32* %r246
+%r248 = zext i32 %r247 to i160
+%r249 = shl i160 %r248, 128
+%r250 = or i160 %r244, %r249
+%r251 = zext i160 %r250 to i192
+%r253 = getelementptr i32, i32* %r160, i32 5
+%r254 = load i32, i32* %r253
+%r255 = zext i32 %r254 to i192
+%r256 = shl i192 %r255, 160
+%r257 = or i192 %r251, %r256
+%r258 = zext i192 %r257 to i224
+%r260 = getelementptr i32, i32* %r160, i32 6
+%r261 = load i32, i32* %r260
+%r262 = zext i32 %r261 to i224
+%r263 = shl i224 %r262, 192
+%r264 = or i224 %r258, %r263
+%r265 = zext i224 %r264 to i256
+%r267 = getelementptr i32, i32* %r160, i32 7
+%r268 = load i32, i32* %r267
+%r269 = zext i32 %r268 to i256
+%r270 = shl i256 %r269, 224
+%r271 = or i256 %r265, %r270
+%r272 = zext i256 %r271 to i288
+%r274 = getelementptr i32, i32* %r160, i32 8
+%r275 = load i32, i32* %r274
+%r276 = zext i32 %r275 to i288
+%r277 = shl i288 %r276, 256
+%r278 = or i288 %r272, %r277
+%r279 = zext i288 %r278 to i320
+%r281 = getelementptr i32, i32* %r160, i32 9
+%r282 = load i32, i32* %r281
+%r283 = zext i32 %r282 to i320
+%r284 = shl i320 %r283, 288
+%r285 = or i320 %r279, %r284
+%r286 = zext i320 %r285 to i352
+%r288 = getelementptr i32, i32* %r160, i32 10
+%r289 = load i32, i32* %r288
+%r290 = zext i32 %r289 to i352
+%r291 = shl i352 %r290, 320
+%r292 = or i352 %r286, %r291
+%r293 = zext i352 %r292 to i384
+%r295 = getelementptr i32, i32* %r160, i32 11
+%r296 = load i32, i32* %r295
+%r297 = zext i32 %r296 to i384
+%r298 = shl i384 %r297, 352
+%r299 = or i384 %r293, %r298
+%r300 = zext i384 %r299 to i416
+%r301 = zext i1 %r167 to i416
+%r302 = shl i416 %r301, 384
+%r303 = or i416 %r300, %r302
+%r304 = zext i192 %r169 to i416
+%r305 = zext i192 %r171 to i416
+%r306 = shl i416 %r304, 192
+%r307 = shl i416 %r305, 192
+%r308 = add i416 %r303, %r306
+%r309 = add i416 %r308, %r307
+%r310 = load i32, i32* %r1
+%r311 = zext i32 %r310 to i64
+%r313 = getelementptr i32, i32* %r1, i32 1
+%r314 = load i32, i32* %r313
+%r315 = zext i32 %r314 to i64
+%r316 = shl i64 %r315, 32
+%r317 = or i64 %r311, %r316
+%r318 = zext i64 %r317 to i96
+%r320 = getelementptr i32, i32* %r1, i32 2
+%r321 = load i32, i32* %r320
+%r322 = zext i32 %r321 to i96
+%r323 = shl i96 %r322, 64
+%r324 = or i96 %r318, %r323
+%r325 = zext i96 %r324 to i128
+%r327 = getelementptr i32, i32* %r1, i32 3
+%r328 = load i32, i32* %r327
+%r329 = zext i32 %r328 to i128
+%r330 = shl i128 %r329, 96
+%r331 = or i128 %r325, %r330
+%r332 = zext i128 %r331 to i160
+%r334 = getelementptr i32, i32* %r1, i32 4
+%r335 = load i32, i32* %r334
+%r336 = zext i32 %r335 to i160
+%r337 = shl i160 %r336, 128
+%r338 = or i160 %r332, %r337
+%r339 = zext i160 %r338 to i192
+%r341 = getelementptr i32, i32* %r1, i32 5
+%r342 = load i32, i32* %r341
+%r343 = zext i32 %r342 to i192
+%r344 = shl i192 %r343, 160
+%r345 = or i192 %r339, %r344
+%r346 = zext i192 %r345 to i224
+%r348 = getelementptr i32, i32* %r1, i32 6
+%r349 = load i32, i32* %r348
+%r350 = zext i32 %r349 to i224
+%r351 = shl i224 %r350, 192
+%r352 = or i224 %r346, %r351
+%r353 = zext i224 %r352 to i256
+%r355 = getelementptr i32, i32* %r1, i32 7
+%r356 = load i32, i32* %r355
+%r357 = zext i32 %r356 to i256
+%r358 = shl i256 %r357, 224
+%r359 = or i256 %r353, %r358
+%r360 = zext i256 %r359 to i288
+%r362 = getelementptr i32, i32* %r1, i32 8
+%r363 = load i32, i32* %r362
+%r364 = zext i32 %r363 to i288
+%r365 = shl i288 %r364, 256
+%r366 = or i288 %r360, %r365
+%r367 = zext i288 %r366 to i320
+%r369 = getelementptr i32, i32* %r1, i32 9
+%r370 = load i32, i32* %r369
+%r371 = zext i32 %r370 to i320
+%r372 = shl i320 %r371, 288
+%r373 = or i320 %r367, %r372
+%r374 = zext i320 %r373 to i352
+%r376 = getelementptr i32, i32* %r1, i32 10
+%r377 = load i32, i32* %r376
+%r378 = zext i32 %r377 to i352
+%r379 = shl i352 %r378, 320
+%r380 = or i352 %r374, %r379
+%r381 = zext i352 %r380 to i384
+%r383 = getelementptr i32, i32* %r1, i32 11
+%r384 = load i32, i32* %r383
+%r385 = zext i32 %r384 to i384
+%r386 = shl i384 %r385, 352
+%r387 = or i384 %r381, %r386
+%r388 = zext i384 %r387 to i416
+%r389 = sub i416 %r309, %r388
+%r391 = getelementptr i32, i32* %r1, i32 12
+%r392 = load i32, i32* %r391
+%r393 = zext i32 %r392 to i64
+%r395 = getelementptr i32, i32* %r391, i32 1
+%r396 = load i32, i32* %r395
+%r397 = zext i32 %r396 to i64
+%r398 = shl i64 %r397, 32
+%r399 = or i64 %r393, %r398
+%r400 = zext i64 %r399 to i96
+%r402 = getelementptr i32, i32* %r391, i32 2
+%r403 = load i32, i32* %r402
+%r404 = zext i32 %r403 to i96
+%r405 = shl i96 %r404, 64
+%r406 = or i96 %r400, %r405
+%r407 = zext i96 %r406 to i128
+%r409 = getelementptr i32, i32* %r391, i32 3
+%r410 = load i32, i32* %r409
+%r411 = zext i32 %r410 to i128
+%r412 = shl i128 %r411, 96
+%r413 = or i128 %r407, %r412
+%r414 = zext i128 %r413 to i160
+%r416 = getelementptr i32, i32* %r391, i32 4
+%r417 = load i32, i32* %r416
+%r418 = zext i32 %r417 to i160
+%r419 = shl i160 %r418, 128
+%r420 = or i160 %r414, %r419
+%r421 = zext i160 %r420 to i192
+%r423 = getelementptr i32, i32* %r391, i32 5
+%r424 = load i32, i32* %r423
+%r425 = zext i32 %r424 to i192
+%r426 = shl i192 %r425, 160
+%r427 = or i192 %r421, %r426
+%r428 = zext i192 %r427 to i224
+%r430 = getelementptr i32, i32* %r391, i32 6
+%r431 = load i32, i32* %r430
+%r432 = zext i32 %r431 to i224
+%r433 = shl i224 %r432, 192
+%r434 = or i224 %r428, %r433
+%r435 = zext i224 %r434 to i256
+%r437 = getelementptr i32, i32* %r391, i32 7
+%r438 = load i32, i32* %r437
+%r439 = zext i32 %r438 to i256
+%r440 = shl i256 %r439, 224
+%r441 = or i256 %r435, %r440
+%r442 = zext i256 %r441 to i288
+%r444 = getelementptr i32, i32* %r391, i32 8
+%r445 = load i32, i32* %r444
+%r446 = zext i32 %r445 to i288
+%r447 = shl i288 %r446, 256
+%r448 = or i288 %r442, %r447
+%r449 = zext i288 %r448 to i320
+%r451 = getelementptr i32, i32* %r391, i32 9
+%r452 = load i32, i32* %r451
+%r453 = zext i32 %r452 to i320
+%r454 = shl i320 %r453, 288
+%r455 = or i320 %r449, %r454
+%r456 = zext i320 %r455 to i352
+%r458 = getelementptr i32, i32* %r391, i32 10
+%r459 = load i32, i32* %r458
+%r460 = zext i32 %r459 to i352
+%r461 = shl i352 %r460, 320
+%r462 = or i352 %r456, %r461
+%r463 = zext i352 %r462 to i384
+%r465 = getelementptr i32, i32* %r391, i32 11
+%r466 = load i32, i32* %r465
+%r467 = zext i32 %r466 to i384
+%r468 = shl i384 %r467, 352
+%r469 = or i384 %r463, %r468
+%r470 = zext i384 %r469 to i416
+%r471 = sub i416 %r389, %r470
+%r472 = zext i416 %r471 to i576
+%r474 = getelementptr i32, i32* %r1, i32 6
+%r475 = load i32, i32* %r474
+%r476 = zext i32 %r475 to i64
+%r478 = getelementptr i32, i32* %r474, i32 1
+%r479 = load i32, i32* %r478
+%r480 = zext i32 %r479 to i64
+%r481 = shl i64 %r480, 32
+%r482 = or i64 %r476, %r481
+%r483 = zext i64 %r482 to i96
+%r485 = getelementptr i32, i32* %r474, i32 2
+%r486 = load i32, i32* %r485
+%r487 = zext i32 %r486 to i96
+%r488 = shl i96 %r487, 64
+%r489 = or i96 %r483, %r488
+%r490 = zext i96 %r489 to i128
+%r492 = getelementptr i32, i32* %r474, i32 3
+%r493 = load i32, i32* %r492
+%r494 = zext i32 %r493 to i128
+%r495 = shl i128 %r494, 96
+%r496 = or i128 %r490, %r495
+%r497 = zext i128 %r496 to i160
+%r499 = getelementptr i32, i32* %r474, i32 4
+%r500 = load i32, i32* %r499
+%r501 = zext i32 %r500 to i160
+%r502 = shl i160 %r501, 128
+%r503 = or i160 %r497, %r502
+%r504 = zext i160 %r503 to i192
+%r506 = getelementptr i32, i32* %r474, i32 5
+%r507 = load i32, i32* %r506
+%r508 = zext i32 %r507 to i192
+%r509 = shl i192 %r508, 160
+%r510 = or i192 %r504, %r509
+%r511 = zext i192 %r510 to i224
+%r513 = getelementptr i32, i32* %r474, i32 6
+%r514 = load i32, i32* %r513
+%r515 = zext i32 %r514 to i224
+%r516 = shl i224 %r515, 192
+%r517 = or i224 %r511, %r516
+%r518 = zext i224 %r517 to i256
+%r520 = getelementptr i32, i32* %r474, i32 7
+%r521 = load i32, i32* %r520
+%r522 = zext i32 %r521 to i256
+%r523 = shl i256 %r522, 224
+%r524 = or i256 %r518, %r523
+%r525 = zext i256 %r524 to i288
+%r527 = getelementptr i32, i32* %r474, i32 8
+%r528 = load i32, i32* %r527
+%r529 = zext i32 %r528 to i288
+%r530 = shl i288 %r529, 256
+%r531 = or i288 %r525, %r530
+%r532 = zext i288 %r531 to i320
+%r534 = getelementptr i32, i32* %r474, i32 9
+%r535 = load i32, i32* %r534
+%r536 = zext i32 %r535 to i320
+%r537 = shl i320 %r536, 288
+%r538 = or i320 %r532, %r537
+%r539 = zext i320 %r538 to i352
+%r541 = getelementptr i32, i32* %r474, i32 10
+%r542 = load i32, i32* %r541
+%r543 = zext i32 %r542 to i352
+%r544 = shl i352 %r543, 320
+%r545 = or i352 %r539, %r544
+%r546 = zext i352 %r545 to i384
+%r548 = getelementptr i32, i32* %r474, i32 11
+%r549 = load i32, i32* %r548
+%r550 = zext i32 %r549 to i384
+%r551 = shl i384 %r550, 352
+%r552 = or i384 %r546, %r551
+%r553 = zext i384 %r552 to i416
+%r555 = getelementptr i32, i32* %r474, i32 12
+%r556 = load i32, i32* %r555
+%r557 = zext i32 %r556 to i416
+%r558 = shl i416 %r557, 384
+%r559 = or i416 %r553, %r558
+%r560 = zext i416 %r559 to i448
+%r562 = getelementptr i32, i32* %r474, i32 13
+%r563 = load i32, i32* %r562
+%r564 = zext i32 %r563 to i448
+%r565 = shl i448 %r564, 416
+%r566 = or i448 %r560, %r565
+%r567 = zext i448 %r566 to i480
+%r569 = getelementptr i32, i32* %r474, i32 14
+%r570 = load i32, i32* %r569
+%r571 = zext i32 %r570 to i480
+%r572 = shl i480 %r571, 448
+%r573 = or i480 %r567, %r572
+%r574 = zext i480 %r573 to i512
+%r576 = getelementptr i32, i32* %r474, i32 15
+%r577 = load i32, i32* %r576
+%r578 = zext i32 %r577 to i512
+%r579 = shl i512 %r578, 480
+%r580 = or i512 %r574, %r579
+%r581 = zext i512 %r580 to i544
+%r583 = getelementptr i32, i32* %r474, i32 16
+%r584 = load i32, i32* %r583
+%r585 = zext i32 %r584 to i544
+%r586 = shl i544 %r585, 512
+%r587 = or i544 %r581, %r586
+%r588 = zext i544 %r587 to i576
+%r590 = getelementptr i32, i32* %r474, i32 17
+%r591 = load i32, i32* %r590
+%r592 = zext i32 %r591 to i576
+%r593 = shl i576 %r592, 544
+%r594 = or i576 %r588, %r593
+%r595 = add i576 %r472, %r594
+%r597 = getelementptr i32, i32* %r1, i32 6
+%r598 = trunc i576 %r595 to i32
+%r600 = getelementptr i32, i32* %r597, i32 0
+store i32 %r598, i32* %r600
+%r601 = lshr i576 %r595, 32
+%r602 = trunc i576 %r601 to i32
+%r604 = getelementptr i32, i32* %r597, i32 1
+store i32 %r602, i32* %r604
+%r605 = lshr i576 %r601, 32
+%r606 = trunc i576 %r605 to i32
+%r608 = getelementptr i32, i32* %r597, i32 2
+store i32 %r606, i32* %r608
+%r609 = lshr i576 %r605, 32
+%r610 = trunc i576 %r609 to i32
+%r612 = getelementptr i32, i32* %r597, i32 3
+store i32 %r610, i32* %r612
+%r613 = lshr i576 %r609, 32
+%r614 = trunc i576 %r613 to i32
+%r616 = getelementptr i32, i32* %r597, i32 4
+store i32 %r614, i32* %r616
+%r617 = lshr i576 %r613, 32
+%r618 = trunc i576 %r617 to i32
+%r620 = getelementptr i32, i32* %r597, i32 5
+store i32 %r618, i32* %r620
+%r621 = lshr i576 %r617, 32
+%r622 = trunc i576 %r621 to i32
+%r624 = getelementptr i32, i32* %r597, i32 6
+store i32 %r622, i32* %r624
+%r625 = lshr i576 %r621, 32
+%r626 = trunc i576 %r625 to i32
+%r628 = getelementptr i32, i32* %r597, i32 7
+store i32 %r626, i32* %r628
+%r629 = lshr i576 %r625, 32
+%r630 = trunc i576 %r629 to i32
+%r632 = getelementptr i32, i32* %r597, i32 8
+store i32 %r630, i32* %r632
+%r633 = lshr i576 %r629, 32
+%r634 = trunc i576 %r633 to i32
+%r636 = getelementptr i32, i32* %r597, i32 9
+store i32 %r634, i32* %r636
+%r637 = lshr i576 %r633, 32
+%r638 = trunc i576 %r637 to i32
+%r640 = getelementptr i32, i32* %r597, i32 10
+store i32 %r638, i32* %r640
+%r641 = lshr i576 %r637, 32
+%r642 = trunc i576 %r641 to i32
+%r644 = getelementptr i32, i32* %r597, i32 11
+store i32 %r642, i32* %r644
+%r645 = lshr i576 %r641, 32
+%r646 = trunc i576 %r645 to i32
+%r648 = getelementptr i32, i32* %r597, i32 12
+store i32 %r646, i32* %r648
+%r649 = lshr i576 %r645, 32
+%r650 = trunc i576 %r649 to i32
+%r652 = getelementptr i32, i32* %r597, i32 13
+store i32 %r650, i32* %r652
+%r653 = lshr i576 %r649, 32
+%r654 = trunc i576 %r653 to i32
+%r656 = getelementptr i32, i32* %r597, i32 14
+store i32 %r654, i32* %r656
+%r657 = lshr i576 %r653, 32
+%r658 = trunc i576 %r657 to i32
+%r660 = getelementptr i32, i32* %r597, i32 15
+store i32 %r658, i32* %r660
+%r661 = lshr i576 %r657, 32
+%r662 = trunc i576 %r661 to i32
+%r664 = getelementptr i32, i32* %r597, i32 16
+store i32 %r662, i32* %r664
+%r665 = lshr i576 %r661, 32
+%r666 = trunc i576 %r665 to i32
+%r668 = getelementptr i32, i32* %r597, i32 17
+store i32 %r666, i32* %r668
+ret void
+}
+define void @mcl_fp_mont12L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i416 @mulPv384x32(i32* %r2, i32 %r10)
+%r12 = zext i416 %r11 to i448
+%r13 = trunc i416 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i416 @mulPv384x32(i32* %r4, i32 %r14)
+%r16 = zext i416 %r15 to i448
+%r17 = add i448 %r12, %r16
+%r18 = lshr i448 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i416 @mulPv384x32(i32* %r2, i32 %r21)
+%r23 = zext i416 %r22 to i448
+%r24 = add i448 %r18, %r23
+%r25 = trunc i448 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i416 @mulPv384x32(i32* %r4, i32 %r26)
+%r28 = zext i416 %r27 to i448
+%r29 = add i448 %r24, %r28
+%r30 = lshr i448 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i416 @mulPv384x32(i32* %r2, i32 %r33)
+%r35 = zext i416 %r34 to i448
+%r36 = add i448 %r30, %r35
+%r37 = trunc i448 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i416 @mulPv384x32(i32* %r4, i32 %r38)
+%r40 = zext i416 %r39 to i448
+%r41 = add i448 %r36, %r40
+%r42 = lshr i448 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i416 @mulPv384x32(i32* %r2, i32 %r45)
+%r47 = zext i416 %r46 to i448
+%r48 = add i448 %r42, %r47
+%r49 = trunc i448 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i416 @mulPv384x32(i32* %r4, i32 %r50)
+%r52 = zext i416 %r51 to i448
+%r53 = add i448 %r48, %r52
+%r54 = lshr i448 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i416 @mulPv384x32(i32* %r2, i32 %r57)
+%r59 = zext i416 %r58 to i448
+%r60 = add i448 %r54, %r59
+%r61 = trunc i448 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i416 @mulPv384x32(i32* %r4, i32 %r62)
+%r64 = zext i416 %r63 to i448
+%r65 = add i448 %r60, %r64
+%r66 = lshr i448 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i416 @mulPv384x32(i32* %r2, i32 %r69)
+%r71 = zext i416 %r70 to i448
+%r72 = add i448 %r66, %r71
+%r73 = trunc i448 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i416 @mulPv384x32(i32* %r4, i32 %r74)
+%r76 = zext i416 %r75 to i448
+%r77 = add i448 %r72, %r76
+%r78 = lshr i448 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i416 @mulPv384x32(i32* %r2, i32 %r81)
+%r83 = zext i416 %r82 to i448
+%r84 = add i448 %r78, %r83
+%r85 = trunc i448 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i416 @mulPv384x32(i32* %r4, i32 %r86)
+%r88 = zext i416 %r87 to i448
+%r89 = add i448 %r84, %r88
+%r90 = lshr i448 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i416 @mulPv384x32(i32* %r2, i32 %r93)
+%r95 = zext i416 %r94 to i448
+%r96 = add i448 %r90, %r95
+%r97 = trunc i448 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i416 @mulPv384x32(i32* %r4, i32 %r98)
+%r100 = zext i416 %r99 to i448
+%r101 = add i448 %r96, %r100
+%r102 = lshr i448 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i416 @mulPv384x32(i32* %r2, i32 %r105)
+%r107 = zext i416 %r106 to i448
+%r108 = add i448 %r102, %r107
+%r109 = trunc i448 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i416 @mulPv384x32(i32* %r4, i32 %r110)
+%r112 = zext i416 %r111 to i448
+%r113 = add i448 %r108, %r112
+%r114 = lshr i448 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 9
+%r117 = load i32, i32* %r116
+%r118 = call i416 @mulPv384x32(i32* %r2, i32 %r117)
+%r119 = zext i416 %r118 to i448
+%r120 = add i448 %r114, %r119
+%r121 = trunc i448 %r120 to i32
+%r122 = mul i32 %r121, %r7
+%r123 = call i416 @mulPv384x32(i32* %r4, i32 %r122)
+%r124 = zext i416 %r123 to i448
+%r125 = add i448 %r120, %r124
+%r126 = lshr i448 %r125, 32
+%r128 = getelementptr i32, i32* %r3, i32 10
+%r129 = load i32, i32* %r128
+%r130 = call i416 @mulPv384x32(i32* %r2, i32 %r129)
+%r131 = zext i416 %r130 to i448
+%r132 = add i448 %r126, %r131
+%r133 = trunc i448 %r132 to i32
+%r134 = mul i32 %r133, %r7
+%r135 = call i416 @mulPv384x32(i32* %r4, i32 %r134)
+%r136 = zext i416 %r135 to i448
+%r137 = add i448 %r132, %r136
+%r138 = lshr i448 %r137, 32
+%r140 = getelementptr i32, i32* %r3, i32 11
+%r141 = load i32, i32* %r140
+%r142 = call i416 @mulPv384x32(i32* %r2, i32 %r141)
+%r143 = zext i416 %r142 to i448
+%r144 = add i448 %r138, %r143
+%r145 = trunc i448 %r144 to i32
+%r146 = mul i32 %r145, %r7
+%r147 = call i416 @mulPv384x32(i32* %r4, i32 %r146)
+%r148 = zext i416 %r147 to i448
+%r149 = add i448 %r144, %r148
+%r150 = lshr i448 %r149, 32
+%r151 = trunc i448 %r150 to i416
+%r152 = load i32, i32* %r4
+%r153 = zext i32 %r152 to i64
+%r155 = getelementptr i32, i32* %r4, i32 1
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i64
+%r158 = shl i64 %r157, 32
+%r159 = or i64 %r153, %r158
+%r160 = zext i64 %r159 to i96
+%r162 = getelementptr i32, i32* %r4, i32 2
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i96
+%r165 = shl i96 %r164, 64
+%r166 = or i96 %r160, %r165
+%r167 = zext i96 %r166 to i128
+%r169 = getelementptr i32, i32* %r4, i32 3
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i128
+%r172 = shl i128 %r171, 96
+%r173 = or i128 %r167, %r172
+%r174 = zext i128 %r173 to i160
+%r176 = getelementptr i32, i32* %r4, i32 4
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i160
+%r179 = shl i160 %r178, 128
+%r180 = or i160 %r174, %r179
+%r181 = zext i160 %r180 to i192
+%r183 = getelementptr i32, i32* %r4, i32 5
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i192
+%r186 = shl i192 %r185, 160
+%r187 = or i192 %r181, %r186
+%r188 = zext i192 %r187 to i224
+%r190 = getelementptr i32, i32* %r4, i32 6
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i224
+%r193 = shl i224 %r192, 192
+%r194 = or i224 %r188, %r193
+%r195 = zext i224 %r194 to i256
+%r197 = getelementptr i32, i32* %r4, i32 7
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i256
+%r200 = shl i256 %r199, 224
+%r201 = or i256 %r195, %r200
+%r202 = zext i256 %r201 to i288
+%r204 = getelementptr i32, i32* %r4, i32 8
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i288
+%r207 = shl i288 %r206, 256
+%r208 = or i288 %r202, %r207
+%r209 = zext i288 %r208 to i320
+%r211 = getelementptr i32, i32* %r4, i32 9
+%r212 = load i32, i32* %r211
+%r213 = zext i32 %r212 to i320
+%r214 = shl i320 %r213, 288
+%r215 = or i320 %r209, %r214
+%r216 = zext i320 %r215 to i352
+%r218 = getelementptr i32, i32* %r4, i32 10
+%r219 = load i32, i32* %r218
+%r220 = zext i32 %r219 to i352
+%r221 = shl i352 %r220, 320
+%r222 = or i352 %r216, %r221
+%r223 = zext i352 %r222 to i384
+%r225 = getelementptr i32, i32* %r4, i32 11
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i384
+%r228 = shl i384 %r227, 352
+%r229 = or i384 %r223, %r228
+%r230 = zext i384 %r229 to i416
+%r231 = sub i416 %r151, %r230
+%r232 = lshr i416 %r231, 384
+%r233 = trunc i416 %r232 to i1
+%r234 = select i1 %r233, i416 %r151, i416 %r231
+%r235 = trunc i416 %r234 to i384
+%r236 = trunc i384 %r235 to i32
+%r238 = getelementptr i32, i32* %r1, i32 0
+store i32 %r236, i32* %r238
+%r239 = lshr i384 %r235, 32
+%r240 = trunc i384 %r239 to i32
+%r242 = getelementptr i32, i32* %r1, i32 1
+store i32 %r240, i32* %r242
+%r243 = lshr i384 %r239, 32
+%r244 = trunc i384 %r243 to i32
+%r246 = getelementptr i32, i32* %r1, i32 2
+store i32 %r244, i32* %r246
+%r247 = lshr i384 %r243, 32
+%r248 = trunc i384 %r247 to i32
+%r250 = getelementptr i32, i32* %r1, i32 3
+store i32 %r248, i32* %r250
+%r251 = lshr i384 %r247, 32
+%r252 = trunc i384 %r251 to i32
+%r254 = getelementptr i32, i32* %r1, i32 4
+store i32 %r252, i32* %r254
+%r255 = lshr i384 %r251, 32
+%r256 = trunc i384 %r255 to i32
+%r258 = getelementptr i32, i32* %r1, i32 5
+store i32 %r256, i32* %r258
+%r259 = lshr i384 %r255, 32
+%r260 = trunc i384 %r259 to i32
+%r262 = getelementptr i32, i32* %r1, i32 6
+store i32 %r260, i32* %r262
+%r263 = lshr i384 %r259, 32
+%r264 = trunc i384 %r263 to i32
+%r266 = getelementptr i32, i32* %r1, i32 7
+store i32 %r264, i32* %r266
+%r267 = lshr i384 %r263, 32
+%r268 = trunc i384 %r267 to i32
+%r270 = getelementptr i32, i32* %r1, i32 8
+store i32 %r268, i32* %r270
+%r271 = lshr i384 %r267, 32
+%r272 = trunc i384 %r271 to i32
+%r274 = getelementptr i32, i32* %r1, i32 9
+store i32 %r272, i32* %r274
+%r275 = lshr i384 %r271, 32
+%r276 = trunc i384 %r275 to i32
+%r278 = getelementptr i32, i32* %r1, i32 10
+store i32 %r276, i32* %r278
+%r279 = lshr i384 %r275, 32
+%r280 = trunc i384 %r279 to i32
+%r282 = getelementptr i32, i32* %r1, i32 11
+store i32 %r280, i32* %r282
+ret void
+}
+define void @mcl_fp_montNF12L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i416 @mulPv384x32(i32* %r2, i32 %r8)
+%r10 = trunc i416 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i416 @mulPv384x32(i32* %r4, i32 %r11)
+%r13 = add i416 %r9, %r12
+%r14 = lshr i416 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i416 @mulPv384x32(i32* %r2, i32 %r17)
+%r19 = add i416 %r14, %r18
+%r20 = trunc i416 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i416 @mulPv384x32(i32* %r4, i32 %r21)
+%r23 = add i416 %r19, %r22
+%r24 = lshr i416 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i416 @mulPv384x32(i32* %r2, i32 %r27)
+%r29 = add i416 %r24, %r28
+%r30 = trunc i416 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i416 @mulPv384x32(i32* %r4, i32 %r31)
+%r33 = add i416 %r29, %r32
+%r34 = lshr i416 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i416 @mulPv384x32(i32* %r2, i32 %r37)
+%r39 = add i416 %r34, %r38
+%r40 = trunc i416 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i416 @mulPv384x32(i32* %r4, i32 %r41)
+%r43 = add i416 %r39, %r42
+%r44 = lshr i416 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i416 @mulPv384x32(i32* %r2, i32 %r47)
+%r49 = add i416 %r44, %r48
+%r50 = trunc i416 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i416 @mulPv384x32(i32* %r4, i32 %r51)
+%r53 = add i416 %r49, %r52
+%r54 = lshr i416 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i416 @mulPv384x32(i32* %r2, i32 %r57)
+%r59 = add i416 %r54, %r58
+%r60 = trunc i416 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i416 @mulPv384x32(i32* %r4, i32 %r61)
+%r63 = add i416 %r59, %r62
+%r64 = lshr i416 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i416 @mulPv384x32(i32* %r2, i32 %r67)
+%r69 = add i416 %r64, %r68
+%r70 = trunc i416 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i416 @mulPv384x32(i32* %r4, i32 %r71)
+%r73 = add i416 %r69, %r72
+%r74 = lshr i416 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i416 @mulPv384x32(i32* %r2, i32 %r77)
+%r79 = add i416 %r74, %r78
+%r80 = trunc i416 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i416 @mulPv384x32(i32* %r4, i32 %r81)
+%r83 = add i416 %r79, %r82
+%r84 = lshr i416 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i416 @mulPv384x32(i32* %r2, i32 %r87)
+%r89 = add i416 %r84, %r88
+%r90 = trunc i416 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i416 @mulPv384x32(i32* %r4, i32 %r91)
+%r93 = add i416 %r89, %r92
+%r94 = lshr i416 %r93, 32
+%r96 = getelementptr i32, i32* %r3, i32 9
+%r97 = load i32, i32* %r96
+%r98 = call i416 @mulPv384x32(i32* %r2, i32 %r97)
+%r99 = add i416 %r94, %r98
+%r100 = trunc i416 %r99 to i32
+%r101 = mul i32 %r100, %r7
+%r102 = call i416 @mulPv384x32(i32* %r4, i32 %r101)
+%r103 = add i416 %r99, %r102
+%r104 = lshr i416 %r103, 32
+%r106 = getelementptr i32, i32* %r3, i32 10
+%r107 = load i32, i32* %r106
+%r108 = call i416 @mulPv384x32(i32* %r2, i32 %r107)
+%r109 = add i416 %r104, %r108
+%r110 = trunc i416 %r109 to i32
+%r111 = mul i32 %r110, %r7
+%r112 = call i416 @mulPv384x32(i32* %r4, i32 %r111)
+%r113 = add i416 %r109, %r112
+%r114 = lshr i416 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 11
+%r117 = load i32, i32* %r116
+%r118 = call i416 @mulPv384x32(i32* %r2, i32 %r117)
+%r119 = add i416 %r114, %r118
+%r120 = trunc i416 %r119 to i32
+%r121 = mul i32 %r120, %r7
+%r122 = call i416 @mulPv384x32(i32* %r4, i32 %r121)
+%r123 = add i416 %r119, %r122
+%r124 = lshr i416 %r123, 32
+%r125 = trunc i416 %r124 to i384
+%r126 = load i32, i32* %r4
+%r127 = zext i32 %r126 to i64
+%r129 = getelementptr i32, i32* %r4, i32 1
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i64
+%r132 = shl i64 %r131, 32
+%r133 = or i64 %r127, %r132
+%r134 = zext i64 %r133 to i96
+%r136 = getelementptr i32, i32* %r4, i32 2
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i96
+%r139 = shl i96 %r138, 64
+%r140 = or i96 %r134, %r139
+%r141 = zext i96 %r140 to i128
+%r143 = getelementptr i32, i32* %r4, i32 3
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i128
+%r146 = shl i128 %r145, 96
+%r147 = or i128 %r141, %r146
+%r148 = zext i128 %r147 to i160
+%r150 = getelementptr i32, i32* %r4, i32 4
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i160
+%r153 = shl i160 %r152, 128
+%r154 = or i160 %r148, %r153
+%r155 = zext i160 %r154 to i192
+%r157 = getelementptr i32, i32* %r4, i32 5
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i192
+%r160 = shl i192 %r159, 160
+%r161 = or i192 %r155, %r160
+%r162 = zext i192 %r161 to i224
+%r164 = getelementptr i32, i32* %r4, i32 6
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i224
+%r167 = shl i224 %r166, 192
+%r168 = or i224 %r162, %r167
+%r169 = zext i224 %r168 to i256
+%r171 = getelementptr i32, i32* %r4, i32 7
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i256
+%r174 = shl i256 %r173, 224
+%r175 = or i256 %r169, %r174
+%r176 = zext i256 %r175 to i288
+%r178 = getelementptr i32, i32* %r4, i32 8
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i288
+%r181 = shl i288 %r180, 256
+%r182 = or i288 %r176, %r181
+%r183 = zext i288 %r182 to i320
+%r185 = getelementptr i32, i32* %r4, i32 9
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i320
+%r188 = shl i320 %r187, 288
+%r189 = or i320 %r183, %r188
+%r190 = zext i320 %r189 to i352
+%r192 = getelementptr i32, i32* %r4, i32 10
+%r193 = load i32, i32* %r192
+%r194 = zext i32 %r193 to i352
+%r195 = shl i352 %r194, 320
+%r196 = or i352 %r190, %r195
+%r197 = zext i352 %r196 to i384
+%r199 = getelementptr i32, i32* %r4, i32 11
+%r200 = load i32, i32* %r199
+%r201 = zext i32 %r200 to i384
+%r202 = shl i384 %r201, 352
+%r203 = or i384 %r197, %r202
+%r204 = sub i384 %r125, %r203
+%r205 = lshr i384 %r204, 383
+%r206 = trunc i384 %r205 to i1
+%r207 = select i1 %r206, i384 %r125, i384 %r204
+%r208 = trunc i384 %r207 to i32
+%r210 = getelementptr i32, i32* %r1, i32 0
+store i32 %r208, i32* %r210
+%r211 = lshr i384 %r207, 32
+%r212 = trunc i384 %r211 to i32
+%r214 = getelementptr i32, i32* %r1, i32 1
+store i32 %r212, i32* %r214
+%r215 = lshr i384 %r211, 32
+%r216 = trunc i384 %r215 to i32
+%r218 = getelementptr i32, i32* %r1, i32 2
+store i32 %r216, i32* %r218
+%r219 = lshr i384 %r215, 32
+%r220 = trunc i384 %r219 to i32
+%r222 = getelementptr i32, i32* %r1, i32 3
+store i32 %r220, i32* %r222
+%r223 = lshr i384 %r219, 32
+%r224 = trunc i384 %r223 to i32
+%r226 = getelementptr i32, i32* %r1, i32 4
+store i32 %r224, i32* %r226
+%r227 = lshr i384 %r223, 32
+%r228 = trunc i384 %r227 to i32
+%r230 = getelementptr i32, i32* %r1, i32 5
+store i32 %r228, i32* %r230
+%r231 = lshr i384 %r227, 32
+%r232 = trunc i384 %r231 to i32
+%r234 = getelementptr i32, i32* %r1, i32 6
+store i32 %r232, i32* %r234
+%r235 = lshr i384 %r231, 32
+%r236 = trunc i384 %r235 to i32
+%r238 = getelementptr i32, i32* %r1, i32 7
+store i32 %r236, i32* %r238
+%r239 = lshr i384 %r235, 32
+%r240 = trunc i384 %r239 to i32
+%r242 = getelementptr i32, i32* %r1, i32 8
+store i32 %r240, i32* %r242
+%r243 = lshr i384 %r239, 32
+%r244 = trunc i384 %r243 to i32
+%r246 = getelementptr i32, i32* %r1, i32 9
+store i32 %r244, i32* %r246
+%r247 = lshr i384 %r243, 32
+%r248 = trunc i384 %r247 to i32
+%r250 = getelementptr i32, i32* %r1, i32 10
+store i32 %r248, i32* %r250
+%r251 = lshr i384 %r247, 32
+%r252 = trunc i384 %r251 to i32
+%r254 = getelementptr i32, i32* %r1, i32 11
+store i32 %r252, i32* %r254
+ret void
+}
+define void @mcl_fp_montRed12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i352
+%r73 = getelementptr i32, i32* %r3, i32 10
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i352
+%r76 = shl i352 %r75, 320
+%r77 = or i352 %r71, %r76
+%r78 = zext i352 %r77 to i384
+%r80 = getelementptr i32, i32* %r3, i32 11
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i384
+%r83 = shl i384 %r82, 352
+%r84 = or i384 %r78, %r83
+%r85 = load i32, i32* %r2
+%r86 = zext i32 %r85 to i64
+%r88 = getelementptr i32, i32* %r2, i32 1
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i64
+%r91 = shl i64 %r90, 32
+%r92 = or i64 %r86, %r91
+%r93 = zext i64 %r92 to i96
+%r95 = getelementptr i32, i32* %r2, i32 2
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i96
+%r98 = shl i96 %r97, 64
+%r99 = or i96 %r93, %r98
+%r100 = zext i96 %r99 to i128
+%r102 = getelementptr i32, i32* %r2, i32 3
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i128
+%r105 = shl i128 %r104, 96
+%r106 = or i128 %r100, %r105
+%r107 = zext i128 %r106 to i160
+%r109 = getelementptr i32, i32* %r2, i32 4
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i160
+%r112 = shl i160 %r111, 128
+%r113 = or i160 %r107, %r112
+%r114 = zext i160 %r113 to i192
+%r116 = getelementptr i32, i32* %r2, i32 5
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i192
+%r119 = shl i192 %r118, 160
+%r120 = or i192 %r114, %r119
+%r121 = zext i192 %r120 to i224
+%r123 = getelementptr i32, i32* %r2, i32 6
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i224
+%r126 = shl i224 %r125, 192
+%r127 = or i224 %r121, %r126
+%r128 = zext i224 %r127 to i256
+%r130 = getelementptr i32, i32* %r2, i32 7
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i256
+%r133 = shl i256 %r132, 224
+%r134 = or i256 %r128, %r133
+%r135 = zext i256 %r134 to i288
+%r137 = getelementptr i32, i32* %r2, i32 8
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i288
+%r140 = shl i288 %r139, 256
+%r141 = or i288 %r135, %r140
+%r142 = zext i288 %r141 to i320
+%r144 = getelementptr i32, i32* %r2, i32 9
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i320
+%r147 = shl i320 %r146, 288
+%r148 = or i320 %r142, %r147
+%r149 = zext i320 %r148 to i352
+%r151 = getelementptr i32, i32* %r2, i32 10
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i352
+%r154 = shl i352 %r153, 320
+%r155 = or i352 %r149, %r154
+%r156 = zext i352 %r155 to i384
+%r158 = getelementptr i32, i32* %r2, i32 11
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i384
+%r161 = shl i384 %r160, 352
+%r162 = or i384 %r156, %r161
+%r163 = zext i384 %r162 to i416
+%r165 = getelementptr i32, i32* %r2, i32 12
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i416
+%r168 = shl i416 %r167, 384
+%r169 = or i416 %r163, %r168
+%r170 = zext i416 %r169 to i448
+%r172 = getelementptr i32, i32* %r2, i32 13
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i448
+%r175 = shl i448 %r174, 416
+%r176 = or i448 %r170, %r175
+%r177 = zext i448 %r176 to i480
+%r179 = getelementptr i32, i32* %r2, i32 14
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i480
+%r182 = shl i480 %r181, 448
+%r183 = or i480 %r177, %r182
+%r184 = zext i480 %r183 to i512
+%r186 = getelementptr i32, i32* %r2, i32 15
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i512
+%r189 = shl i512 %r188, 480
+%r190 = or i512 %r184, %r189
+%r191 = zext i512 %r190 to i544
+%r193 = getelementptr i32, i32* %r2, i32 16
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i544
+%r196 = shl i544 %r195, 512
+%r197 = or i544 %r191, %r196
+%r198 = zext i544 %r197 to i576
+%r200 = getelementptr i32, i32* %r2, i32 17
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i576
+%r203 = shl i576 %r202, 544
+%r204 = or i576 %r198, %r203
+%r205 = zext i576 %r204 to i608
+%r207 = getelementptr i32, i32* %r2, i32 18
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i608
+%r210 = shl i608 %r209, 576
+%r211 = or i608 %r205, %r210
+%r212 = zext i608 %r211 to i640
+%r214 = getelementptr i32, i32* %r2, i32 19
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i640
+%r217 = shl i640 %r216, 608
+%r218 = or i640 %r212, %r217
+%r219 = zext i640 %r218 to i672
+%r221 = getelementptr i32, i32* %r2, i32 20
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i672
+%r224 = shl i672 %r223, 640
+%r225 = or i672 %r219, %r224
+%r226 = zext i672 %r225 to i704
+%r228 = getelementptr i32, i32* %r2, i32 21
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i704
+%r231 = shl i704 %r230, 672
+%r232 = or i704 %r226, %r231
+%r233 = zext i704 %r232 to i736
+%r235 = getelementptr i32, i32* %r2, i32 22
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i736
+%r238 = shl i736 %r237, 704
+%r239 = or i736 %r233, %r238
+%r240 = zext i736 %r239 to i768
+%r242 = getelementptr i32, i32* %r2, i32 23
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i768
+%r245 = shl i768 %r244, 736
+%r246 = or i768 %r240, %r245
+%r247 = zext i768 %r246 to i800
+%r248 = trunc i800 %r247 to i32
+%r249 = mul i32 %r248, %r6
+%r250 = call i416 @mulPv384x32(i32* %r3, i32 %r249)
+%r251 = zext i416 %r250 to i800
+%r252 = add i800 %r247, %r251
+%r253 = lshr i800 %r252, 32
+%r254 = trunc i800 %r253 to i768
+%r255 = trunc i768 %r254 to i32
+%r256 = mul i32 %r255, %r6
+%r257 = call i416 @mulPv384x32(i32* %r3, i32 %r256)
+%r258 = zext i416 %r257 to i768
+%r259 = add i768 %r254, %r258
+%r260 = lshr i768 %r259, 32
+%r261 = trunc i768 %r260 to i736
+%r262 = trunc i736 %r261 to i32
+%r263 = mul i32 %r262, %r6
+%r264 = call i416 @mulPv384x32(i32* %r3, i32 %r263)
+%r265 = zext i416 %r264 to i736
+%r266 = add i736 %r261, %r265
+%r267 = lshr i736 %r266, 32
+%r268 = trunc i736 %r267 to i704
+%r269 = trunc i704 %r268 to i32
+%r270 = mul i32 %r269, %r6
+%r271 = call i416 @mulPv384x32(i32* %r3, i32 %r270)
+%r272 = zext i416 %r271 to i704
+%r273 = add i704 %r268, %r272
+%r274 = lshr i704 %r273, 32
+%r275 = trunc i704 %r274 to i672
+%r276 = trunc i672 %r275 to i32
+%r277 = mul i32 %r276, %r6
+%r278 = call i416 @mulPv384x32(i32* %r3, i32 %r277)
+%r279 = zext i416 %r278 to i672
+%r280 = add i672 %r275, %r279
+%r281 = lshr i672 %r280, 32
+%r282 = trunc i672 %r281 to i640
+%r283 = trunc i640 %r282 to i32
+%r284 = mul i32 %r283, %r6
+%r285 = call i416 @mulPv384x32(i32* %r3, i32 %r284)
+%r286 = zext i416 %r285 to i640
+%r287 = add i640 %r282, %r286
+%r288 = lshr i640 %r287, 32
+%r289 = trunc i640 %r288 to i608
+%r290 = trunc i608 %r289 to i32
+%r291 = mul i32 %r290, %r6
+%r292 = call i416 @mulPv384x32(i32* %r3, i32 %r291)
+%r293 = zext i416 %r292 to i608
+%r294 = add i608 %r289, %r293
+%r295 = lshr i608 %r294, 32
+%r296 = trunc i608 %r295 to i576
+%r297 = trunc i576 %r296 to i32
+%r298 = mul i32 %r297, %r6
+%r299 = call i416 @mulPv384x32(i32* %r3, i32 %r298)
+%r300 = zext i416 %r299 to i576
+%r301 = add i576 %r296, %r300
+%r302 = lshr i576 %r301, 32
+%r303 = trunc i576 %r302 to i544
+%r304 = trunc i544 %r303 to i32
+%r305 = mul i32 %r304, %r6
+%r306 = call i416 @mulPv384x32(i32* %r3, i32 %r305)
+%r307 = zext i416 %r306 to i544
+%r308 = add i544 %r303, %r307
+%r309 = lshr i544 %r308, 32
+%r310 = trunc i544 %r309 to i512
+%r311 = trunc i512 %r310 to i32
+%r312 = mul i32 %r311, %r6
+%r313 = call i416 @mulPv384x32(i32* %r3, i32 %r312)
+%r314 = zext i416 %r313 to i512
+%r315 = add i512 %r310, %r314
+%r316 = lshr i512 %r315, 32
+%r317 = trunc i512 %r316 to i480
+%r318 = trunc i480 %r317 to i32
+%r319 = mul i32 %r318, %r6
+%r320 = call i416 @mulPv384x32(i32* %r3, i32 %r319)
+%r321 = zext i416 %r320 to i480
+%r322 = add i480 %r317, %r321
+%r323 = lshr i480 %r322, 32
+%r324 = trunc i480 %r323 to i448
+%r325 = trunc i448 %r324 to i32
+%r326 = mul i32 %r325, %r6
+%r327 = call i416 @mulPv384x32(i32* %r3, i32 %r326)
+%r328 = zext i416 %r327 to i448
+%r329 = add i448 %r324, %r328
+%r330 = lshr i448 %r329, 32
+%r331 = trunc i448 %r330 to i416
+%r332 = zext i384 %r84 to i416
+%r333 = sub i416 %r331, %r332
+%r334 = lshr i416 %r333, 384
+%r335 = trunc i416 %r334 to i1
+%r336 = select i1 %r335, i416 %r331, i416 %r333
+%r337 = trunc i416 %r336 to i384
+%r338 = trunc i384 %r337 to i32
+%r340 = getelementptr i32, i32* %r1, i32 0
+store i32 %r338, i32* %r340
+%r341 = lshr i384 %r337, 32
+%r342 = trunc i384 %r341 to i32
+%r344 = getelementptr i32, i32* %r1, i32 1
+store i32 %r342, i32* %r344
+%r345 = lshr i384 %r341, 32
+%r346 = trunc i384 %r345 to i32
+%r348 = getelementptr i32, i32* %r1, i32 2
+store i32 %r346, i32* %r348
+%r349 = lshr i384 %r345, 32
+%r350 = trunc i384 %r349 to i32
+%r352 = getelementptr i32, i32* %r1, i32 3
+store i32 %r350, i32* %r352
+%r353 = lshr i384 %r349, 32
+%r354 = trunc i384 %r353 to i32
+%r356 = getelementptr i32, i32* %r1, i32 4
+store i32 %r354, i32* %r356
+%r357 = lshr i384 %r353, 32
+%r358 = trunc i384 %r357 to i32
+%r360 = getelementptr i32, i32* %r1, i32 5
+store i32 %r358, i32* %r360
+%r361 = lshr i384 %r357, 32
+%r362 = trunc i384 %r361 to i32
+%r364 = getelementptr i32, i32* %r1, i32 6
+store i32 %r362, i32* %r364
+%r365 = lshr i384 %r361, 32
+%r366 = trunc i384 %r365 to i32
+%r368 = getelementptr i32, i32* %r1, i32 7
+store i32 %r366, i32* %r368
+%r369 = lshr i384 %r365, 32
+%r370 = trunc i384 %r369 to i32
+%r372 = getelementptr i32, i32* %r1, i32 8
+store i32 %r370, i32* %r372
+%r373 = lshr i384 %r369, 32
+%r374 = trunc i384 %r373 to i32
+%r376 = getelementptr i32, i32* %r1, i32 9
+store i32 %r374, i32* %r376
+%r377 = lshr i384 %r373, 32
+%r378 = trunc i384 %r377 to i32
+%r380 = getelementptr i32, i32* %r1, i32 10
+store i32 %r378, i32* %r380
+%r381 = lshr i384 %r377, 32
+%r382 = trunc i384 %r381 to i32
+%r384 = getelementptr i32, i32* %r1, i32 11
+store i32 %r382, i32* %r384
+ret void
+}
+define i32 @mcl_fp_addPre12L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r84 = load i32, i32* %r4
+%r85 = zext i32 %r84 to i64
+%r87 = getelementptr i32, i32* %r4, i32 1
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i64
+%r90 = shl i64 %r89, 32
+%r91 = or i64 %r85, %r90
+%r92 = zext i64 %r91 to i96
+%r94 = getelementptr i32, i32* %r4, i32 2
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i96
+%r97 = shl i96 %r96, 64
+%r98 = or i96 %r92, %r97
+%r99 = zext i96 %r98 to i128
+%r101 = getelementptr i32, i32* %r4, i32 3
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i128
+%r104 = shl i128 %r103, 96
+%r105 = or i128 %r99, %r104
+%r106 = zext i128 %r105 to i160
+%r108 = getelementptr i32, i32* %r4, i32 4
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i160
+%r111 = shl i160 %r110, 128
+%r112 = or i160 %r106, %r111
+%r113 = zext i160 %r112 to i192
+%r115 = getelementptr i32, i32* %r4, i32 5
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i192
+%r118 = shl i192 %r117, 160
+%r119 = or i192 %r113, %r118
+%r120 = zext i192 %r119 to i224
+%r122 = getelementptr i32, i32* %r4, i32 6
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i224
+%r125 = shl i224 %r124, 192
+%r126 = or i224 %r120, %r125
+%r127 = zext i224 %r126 to i256
+%r129 = getelementptr i32, i32* %r4, i32 7
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i256
+%r132 = shl i256 %r131, 224
+%r133 = or i256 %r127, %r132
+%r134 = zext i256 %r133 to i288
+%r136 = getelementptr i32, i32* %r4, i32 8
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i288
+%r139 = shl i288 %r138, 256
+%r140 = or i288 %r134, %r139
+%r141 = zext i288 %r140 to i320
+%r143 = getelementptr i32, i32* %r4, i32 9
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i320
+%r146 = shl i320 %r145, 288
+%r147 = or i320 %r141, %r146
+%r148 = zext i320 %r147 to i352
+%r150 = getelementptr i32, i32* %r4, i32 10
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i352
+%r153 = shl i352 %r152, 320
+%r154 = or i352 %r148, %r153
+%r155 = zext i352 %r154 to i384
+%r157 = getelementptr i32, i32* %r4, i32 11
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i384
+%r160 = shl i384 %r159, 352
+%r161 = or i384 %r155, %r160
+%r162 = zext i384 %r161 to i416
+%r163 = add i416 %r83, %r162
+%r164 = trunc i416 %r163 to i384
+%r165 = trunc i384 %r164 to i32
+%r167 = getelementptr i32, i32* %r2, i32 0
+store i32 %r165, i32* %r167
+%r168 = lshr i384 %r164, 32
+%r169 = trunc i384 %r168 to i32
+%r171 = getelementptr i32, i32* %r2, i32 1
+store i32 %r169, i32* %r171
+%r172 = lshr i384 %r168, 32
+%r173 = trunc i384 %r172 to i32
+%r175 = getelementptr i32, i32* %r2, i32 2
+store i32 %r173, i32* %r175
+%r176 = lshr i384 %r172, 32
+%r177 = trunc i384 %r176 to i32
+%r179 = getelementptr i32, i32* %r2, i32 3
+store i32 %r177, i32* %r179
+%r180 = lshr i384 %r176, 32
+%r181 = trunc i384 %r180 to i32
+%r183 = getelementptr i32, i32* %r2, i32 4
+store i32 %r181, i32* %r183
+%r184 = lshr i384 %r180, 32
+%r185 = trunc i384 %r184 to i32
+%r187 = getelementptr i32, i32* %r2, i32 5
+store i32 %r185, i32* %r187
+%r188 = lshr i384 %r184, 32
+%r189 = trunc i384 %r188 to i32
+%r191 = getelementptr i32, i32* %r2, i32 6
+store i32 %r189, i32* %r191
+%r192 = lshr i384 %r188, 32
+%r193 = trunc i384 %r192 to i32
+%r195 = getelementptr i32, i32* %r2, i32 7
+store i32 %r193, i32* %r195
+%r196 = lshr i384 %r192, 32
+%r197 = trunc i384 %r196 to i32
+%r199 = getelementptr i32, i32* %r2, i32 8
+store i32 %r197, i32* %r199
+%r200 = lshr i384 %r196, 32
+%r201 = trunc i384 %r200 to i32
+%r203 = getelementptr i32, i32* %r2, i32 9
+store i32 %r201, i32* %r203
+%r204 = lshr i384 %r200, 32
+%r205 = trunc i384 %r204 to i32
+%r207 = getelementptr i32, i32* %r2, i32 10
+store i32 %r205, i32* %r207
+%r208 = lshr i384 %r204, 32
+%r209 = trunc i384 %r208 to i32
+%r211 = getelementptr i32, i32* %r2, i32 11
+store i32 %r209, i32* %r211
+%r212 = lshr i416 %r163, 384
+%r213 = trunc i416 %r212 to i32
+ret i32 %r213
+}
+define i32 @mcl_fp_subPre12L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r84 = load i32, i32* %r4
+%r85 = zext i32 %r84 to i64
+%r87 = getelementptr i32, i32* %r4, i32 1
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i64
+%r90 = shl i64 %r89, 32
+%r91 = or i64 %r85, %r90
+%r92 = zext i64 %r91 to i96
+%r94 = getelementptr i32, i32* %r4, i32 2
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i96
+%r97 = shl i96 %r96, 64
+%r98 = or i96 %r92, %r97
+%r99 = zext i96 %r98 to i128
+%r101 = getelementptr i32, i32* %r4, i32 3
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i128
+%r104 = shl i128 %r103, 96
+%r105 = or i128 %r99, %r104
+%r106 = zext i128 %r105 to i160
+%r108 = getelementptr i32, i32* %r4, i32 4
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i160
+%r111 = shl i160 %r110, 128
+%r112 = or i160 %r106, %r111
+%r113 = zext i160 %r112 to i192
+%r115 = getelementptr i32, i32* %r4, i32 5
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i192
+%r118 = shl i192 %r117, 160
+%r119 = or i192 %r113, %r118
+%r120 = zext i192 %r119 to i224
+%r122 = getelementptr i32, i32* %r4, i32 6
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i224
+%r125 = shl i224 %r124, 192
+%r126 = or i224 %r120, %r125
+%r127 = zext i224 %r126 to i256
+%r129 = getelementptr i32, i32* %r4, i32 7
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i256
+%r132 = shl i256 %r131, 224
+%r133 = or i256 %r127, %r132
+%r134 = zext i256 %r133 to i288
+%r136 = getelementptr i32, i32* %r4, i32 8
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i288
+%r139 = shl i288 %r138, 256
+%r140 = or i288 %r134, %r139
+%r141 = zext i288 %r140 to i320
+%r143 = getelementptr i32, i32* %r4, i32 9
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i320
+%r146 = shl i320 %r145, 288
+%r147 = or i320 %r141, %r146
+%r148 = zext i320 %r147 to i352
+%r150 = getelementptr i32, i32* %r4, i32 10
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i352
+%r153 = shl i352 %r152, 320
+%r154 = or i352 %r148, %r153
+%r155 = zext i352 %r154 to i384
+%r157 = getelementptr i32, i32* %r4, i32 11
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i384
+%r160 = shl i384 %r159, 352
+%r161 = or i384 %r155, %r160
+%r162 = zext i384 %r161 to i416
+%r163 = sub i416 %r83, %r162
+%r164 = trunc i416 %r163 to i384
+%r165 = trunc i384 %r164 to i32
+%r167 = getelementptr i32, i32* %r2, i32 0
+store i32 %r165, i32* %r167
+%r168 = lshr i384 %r164, 32
+%r169 = trunc i384 %r168 to i32
+%r171 = getelementptr i32, i32* %r2, i32 1
+store i32 %r169, i32* %r171
+%r172 = lshr i384 %r168, 32
+%r173 = trunc i384 %r172 to i32
+%r175 = getelementptr i32, i32* %r2, i32 2
+store i32 %r173, i32* %r175
+%r176 = lshr i384 %r172, 32
+%r177 = trunc i384 %r176 to i32
+%r179 = getelementptr i32, i32* %r2, i32 3
+store i32 %r177, i32* %r179
+%r180 = lshr i384 %r176, 32
+%r181 = trunc i384 %r180 to i32
+%r183 = getelementptr i32, i32* %r2, i32 4
+store i32 %r181, i32* %r183
+%r184 = lshr i384 %r180, 32
+%r185 = trunc i384 %r184 to i32
+%r187 = getelementptr i32, i32* %r2, i32 5
+store i32 %r185, i32* %r187
+%r188 = lshr i384 %r184, 32
+%r189 = trunc i384 %r188 to i32
+%r191 = getelementptr i32, i32* %r2, i32 6
+store i32 %r189, i32* %r191
+%r192 = lshr i384 %r188, 32
+%r193 = trunc i384 %r192 to i32
+%r195 = getelementptr i32, i32* %r2, i32 7
+store i32 %r193, i32* %r195
+%r196 = lshr i384 %r192, 32
+%r197 = trunc i384 %r196 to i32
+%r199 = getelementptr i32, i32* %r2, i32 8
+store i32 %r197, i32* %r199
+%r200 = lshr i384 %r196, 32
+%r201 = trunc i384 %r200 to i32
+%r203 = getelementptr i32, i32* %r2, i32 9
+store i32 %r201, i32* %r203
+%r204 = lshr i384 %r200, 32
+%r205 = trunc i384 %r204 to i32
+%r207 = getelementptr i32, i32* %r2, i32 10
+store i32 %r205, i32* %r207
+%r208 = lshr i384 %r204, 32
+%r209 = trunc i384 %r208 to i32
+%r211 = getelementptr i32, i32* %r2, i32 11
+store i32 %r209, i32* %r211
+%r212 = lshr i416 %r163, 384
+%r213 = trunc i416 %r212 to i32
+%r215 = and i32 %r213, 1
+ret i32 %r215
+}
+define void @mcl_fp_shr1_12L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = zext i256 %r52 to i288
+%r55 = getelementptr i32, i32* %r2, i32 8
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i288
+%r58 = shl i288 %r57, 256
+%r59 = or i288 %r53, %r58
+%r60 = zext i288 %r59 to i320
+%r62 = getelementptr i32, i32* %r2, i32 9
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i320
+%r65 = shl i320 %r64, 288
+%r66 = or i320 %r60, %r65
+%r67 = zext i320 %r66 to i352
+%r69 = getelementptr i32, i32* %r2, i32 10
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i352
+%r72 = shl i352 %r71, 320
+%r73 = or i352 %r67, %r72
+%r74 = zext i352 %r73 to i384
+%r76 = getelementptr i32, i32* %r2, i32 11
+%r77 = load i32, i32* %r76
+%r78 = zext i32 %r77 to i384
+%r79 = shl i384 %r78, 352
+%r80 = or i384 %r74, %r79
+%r81 = lshr i384 %r80, 1
+%r82 = trunc i384 %r81 to i32
+%r84 = getelementptr i32, i32* %r1, i32 0
+store i32 %r82, i32* %r84
+%r85 = lshr i384 %r81, 32
+%r86 = trunc i384 %r85 to i32
+%r88 = getelementptr i32, i32* %r1, i32 1
+store i32 %r86, i32* %r88
+%r89 = lshr i384 %r85, 32
+%r90 = trunc i384 %r89 to i32
+%r92 = getelementptr i32, i32* %r1, i32 2
+store i32 %r90, i32* %r92
+%r93 = lshr i384 %r89, 32
+%r94 = trunc i384 %r93 to i32
+%r96 = getelementptr i32, i32* %r1, i32 3
+store i32 %r94, i32* %r96
+%r97 = lshr i384 %r93, 32
+%r98 = trunc i384 %r97 to i32
+%r100 = getelementptr i32, i32* %r1, i32 4
+store i32 %r98, i32* %r100
+%r101 = lshr i384 %r97, 32
+%r102 = trunc i384 %r101 to i32
+%r104 = getelementptr i32, i32* %r1, i32 5
+store i32 %r102, i32* %r104
+%r105 = lshr i384 %r101, 32
+%r106 = trunc i384 %r105 to i32
+%r108 = getelementptr i32, i32* %r1, i32 6
+store i32 %r106, i32* %r108
+%r109 = lshr i384 %r105, 32
+%r110 = trunc i384 %r109 to i32
+%r112 = getelementptr i32, i32* %r1, i32 7
+store i32 %r110, i32* %r112
+%r113 = lshr i384 %r109, 32
+%r114 = trunc i384 %r113 to i32
+%r116 = getelementptr i32, i32* %r1, i32 8
+store i32 %r114, i32* %r116
+%r117 = lshr i384 %r113, 32
+%r118 = trunc i384 %r117 to i32
+%r120 = getelementptr i32, i32* %r1, i32 9
+store i32 %r118, i32* %r120
+%r121 = lshr i384 %r117, 32
+%r122 = trunc i384 %r121 to i32
+%r124 = getelementptr i32, i32* %r1, i32 10
+store i32 %r122, i32* %r124
+%r125 = lshr i384 %r121, 32
+%r126 = trunc i384 %r125 to i32
+%r128 = getelementptr i32, i32* %r1, i32 11
+store i32 %r126, i32* %r128
+ret void
+}
+define void @mcl_fp_add12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = zext i384 %r82 to i416
+%r162 = zext i384 %r160 to i416
+%r163 = add i416 %r161, %r162
+%r164 = trunc i416 %r163 to i384
+%r165 = trunc i384 %r164 to i32
+%r167 = getelementptr i32, i32* %r1, i32 0
+store i32 %r165, i32* %r167
+%r168 = lshr i384 %r164, 32
+%r169 = trunc i384 %r168 to i32
+%r171 = getelementptr i32, i32* %r1, i32 1
+store i32 %r169, i32* %r171
+%r172 = lshr i384 %r168, 32
+%r173 = trunc i384 %r172 to i32
+%r175 = getelementptr i32, i32* %r1, i32 2
+store i32 %r173, i32* %r175
+%r176 = lshr i384 %r172, 32
+%r177 = trunc i384 %r176 to i32
+%r179 = getelementptr i32, i32* %r1, i32 3
+store i32 %r177, i32* %r179
+%r180 = lshr i384 %r176, 32
+%r181 = trunc i384 %r180 to i32
+%r183 = getelementptr i32, i32* %r1, i32 4
+store i32 %r181, i32* %r183
+%r184 = lshr i384 %r180, 32
+%r185 = trunc i384 %r184 to i32
+%r187 = getelementptr i32, i32* %r1, i32 5
+store i32 %r185, i32* %r187
+%r188 = lshr i384 %r184, 32
+%r189 = trunc i384 %r188 to i32
+%r191 = getelementptr i32, i32* %r1, i32 6
+store i32 %r189, i32* %r191
+%r192 = lshr i384 %r188, 32
+%r193 = trunc i384 %r192 to i32
+%r195 = getelementptr i32, i32* %r1, i32 7
+store i32 %r193, i32* %r195
+%r196 = lshr i384 %r192, 32
+%r197 = trunc i384 %r196 to i32
+%r199 = getelementptr i32, i32* %r1, i32 8
+store i32 %r197, i32* %r199
+%r200 = lshr i384 %r196, 32
+%r201 = trunc i384 %r200 to i32
+%r203 = getelementptr i32, i32* %r1, i32 9
+store i32 %r201, i32* %r203
+%r204 = lshr i384 %r200, 32
+%r205 = trunc i384 %r204 to i32
+%r207 = getelementptr i32, i32* %r1, i32 10
+store i32 %r205, i32* %r207
+%r208 = lshr i384 %r204, 32
+%r209 = trunc i384 %r208 to i32
+%r211 = getelementptr i32, i32* %r1, i32 11
+store i32 %r209, i32* %r211
+%r212 = load i32, i32* %r4
+%r213 = zext i32 %r212 to i64
+%r215 = getelementptr i32, i32* %r4, i32 1
+%r216 = load i32, i32* %r215
+%r217 = zext i32 %r216 to i64
+%r218 = shl i64 %r217, 32
+%r219 = or i64 %r213, %r218
+%r220 = zext i64 %r219 to i96
+%r222 = getelementptr i32, i32* %r4, i32 2
+%r223 = load i32, i32* %r222
+%r224 = zext i32 %r223 to i96
+%r225 = shl i96 %r224, 64
+%r226 = or i96 %r220, %r225
+%r227 = zext i96 %r226 to i128
+%r229 = getelementptr i32, i32* %r4, i32 3
+%r230 = load i32, i32* %r229
+%r231 = zext i32 %r230 to i128
+%r232 = shl i128 %r231, 96
+%r233 = or i128 %r227, %r232
+%r234 = zext i128 %r233 to i160
+%r236 = getelementptr i32, i32* %r4, i32 4
+%r237 = load i32, i32* %r236
+%r238 = zext i32 %r237 to i160
+%r239 = shl i160 %r238, 128
+%r240 = or i160 %r234, %r239
+%r241 = zext i160 %r240 to i192
+%r243 = getelementptr i32, i32* %r4, i32 5
+%r244 = load i32, i32* %r243
+%r245 = zext i32 %r244 to i192
+%r246 = shl i192 %r245, 160
+%r247 = or i192 %r241, %r246
+%r248 = zext i192 %r247 to i224
+%r250 = getelementptr i32, i32* %r4, i32 6
+%r251 = load i32, i32* %r250
+%r252 = zext i32 %r251 to i224
+%r253 = shl i224 %r252, 192
+%r254 = or i224 %r248, %r253
+%r255 = zext i224 %r254 to i256
+%r257 = getelementptr i32, i32* %r4, i32 7
+%r258 = load i32, i32* %r257
+%r259 = zext i32 %r258 to i256
+%r260 = shl i256 %r259, 224
+%r261 = or i256 %r255, %r260
+%r262 = zext i256 %r261 to i288
+%r264 = getelementptr i32, i32* %r4, i32 8
+%r265 = load i32, i32* %r264
+%r266 = zext i32 %r265 to i288
+%r267 = shl i288 %r266, 256
+%r268 = or i288 %r262, %r267
+%r269 = zext i288 %r268 to i320
+%r271 = getelementptr i32, i32* %r4, i32 9
+%r272 = load i32, i32* %r271
+%r273 = zext i32 %r272 to i320
+%r274 = shl i320 %r273, 288
+%r275 = or i320 %r269, %r274
+%r276 = zext i320 %r275 to i352
+%r278 = getelementptr i32, i32* %r4, i32 10
+%r279 = load i32, i32* %r278
+%r280 = zext i32 %r279 to i352
+%r281 = shl i352 %r280, 320
+%r282 = or i352 %r276, %r281
+%r283 = zext i352 %r282 to i384
+%r285 = getelementptr i32, i32* %r4, i32 11
+%r286 = load i32, i32* %r285
+%r287 = zext i32 %r286 to i384
+%r288 = shl i384 %r287, 352
+%r289 = or i384 %r283, %r288
+%r290 = zext i384 %r289 to i416
+%r291 = sub i416 %r163, %r290
+%r292 = lshr i416 %r291, 384
+%r293 = trunc i416 %r292 to i1
+br i1%r293, label %carry, label %nocarry
+nocarry:
+%r294 = trunc i416 %r291 to i384
+%r295 = trunc i384 %r294 to i32
+%r297 = getelementptr i32, i32* %r1, i32 0
+store i32 %r295, i32* %r297
+%r298 = lshr i384 %r294, 32
+%r299 = trunc i384 %r298 to i32
+%r301 = getelementptr i32, i32* %r1, i32 1
+store i32 %r299, i32* %r301
+%r302 = lshr i384 %r298, 32
+%r303 = trunc i384 %r302 to i32
+%r305 = getelementptr i32, i32* %r1, i32 2
+store i32 %r303, i32* %r305
+%r306 = lshr i384 %r302, 32
+%r307 = trunc i384 %r306 to i32
+%r309 = getelementptr i32, i32* %r1, i32 3
+store i32 %r307, i32* %r309
+%r310 = lshr i384 %r306, 32
+%r311 = trunc i384 %r310 to i32
+%r313 = getelementptr i32, i32* %r1, i32 4
+store i32 %r311, i32* %r313
+%r314 = lshr i384 %r310, 32
+%r315 = trunc i384 %r314 to i32
+%r317 = getelementptr i32, i32* %r1, i32 5
+store i32 %r315, i32* %r317
+%r318 = lshr i384 %r314, 32
+%r319 = trunc i384 %r318 to i32
+%r321 = getelementptr i32, i32* %r1, i32 6
+store i32 %r319, i32* %r321
+%r322 = lshr i384 %r318, 32
+%r323 = trunc i384 %r322 to i32
+%r325 = getelementptr i32, i32* %r1, i32 7
+store i32 %r323, i32* %r325
+%r326 = lshr i384 %r322, 32
+%r327 = trunc i384 %r326 to i32
+%r329 = getelementptr i32, i32* %r1, i32 8
+store i32 %r327, i32* %r329
+%r330 = lshr i384 %r326, 32
+%r331 = trunc i384 %r330 to i32
+%r333 = getelementptr i32, i32* %r1, i32 9
+store i32 %r331, i32* %r333
+%r334 = lshr i384 %r330, 32
+%r335 = trunc i384 %r334 to i32
+%r337 = getelementptr i32, i32* %r1, i32 10
+store i32 %r335, i32* %r337
+%r338 = lshr i384 %r334, 32
+%r339 = trunc i384 %r338 to i32
+%r341 = getelementptr i32, i32* %r1, i32 11
+store i32 %r339, i32* %r341
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = add i384 %r82, %r160
+%r162 = load i32, i32* %r4
+%r163 = zext i32 %r162 to i64
+%r165 = getelementptr i32, i32* %r4, i32 1
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i64
+%r168 = shl i64 %r167, 32
+%r169 = or i64 %r163, %r168
+%r170 = zext i64 %r169 to i96
+%r172 = getelementptr i32, i32* %r4, i32 2
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i96
+%r175 = shl i96 %r174, 64
+%r176 = or i96 %r170, %r175
+%r177 = zext i96 %r176 to i128
+%r179 = getelementptr i32, i32* %r4, i32 3
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i128
+%r182 = shl i128 %r181, 96
+%r183 = or i128 %r177, %r182
+%r184 = zext i128 %r183 to i160
+%r186 = getelementptr i32, i32* %r4, i32 4
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i160
+%r189 = shl i160 %r188, 128
+%r190 = or i160 %r184, %r189
+%r191 = zext i160 %r190 to i192
+%r193 = getelementptr i32, i32* %r4, i32 5
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i192
+%r196 = shl i192 %r195, 160
+%r197 = or i192 %r191, %r196
+%r198 = zext i192 %r197 to i224
+%r200 = getelementptr i32, i32* %r4, i32 6
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i224
+%r203 = shl i224 %r202, 192
+%r204 = or i224 %r198, %r203
+%r205 = zext i224 %r204 to i256
+%r207 = getelementptr i32, i32* %r4, i32 7
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i256
+%r210 = shl i256 %r209, 224
+%r211 = or i256 %r205, %r210
+%r212 = zext i256 %r211 to i288
+%r214 = getelementptr i32, i32* %r4, i32 8
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i288
+%r217 = shl i288 %r216, 256
+%r218 = or i288 %r212, %r217
+%r219 = zext i288 %r218 to i320
+%r221 = getelementptr i32, i32* %r4, i32 9
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i320
+%r224 = shl i320 %r223, 288
+%r225 = or i320 %r219, %r224
+%r226 = zext i320 %r225 to i352
+%r228 = getelementptr i32, i32* %r4, i32 10
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i352
+%r231 = shl i352 %r230, 320
+%r232 = or i352 %r226, %r231
+%r233 = zext i352 %r232 to i384
+%r235 = getelementptr i32, i32* %r4, i32 11
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i384
+%r238 = shl i384 %r237, 352
+%r239 = or i384 %r233, %r238
+%r240 = sub i384 %r161, %r239
+%r241 = lshr i384 %r240, 383
+%r242 = trunc i384 %r241 to i1
+%r243 = select i1 %r242, i384 %r161, i384 %r240
+%r244 = trunc i384 %r243 to i32
+%r246 = getelementptr i32, i32* %r1, i32 0
+store i32 %r244, i32* %r246
+%r247 = lshr i384 %r243, 32
+%r248 = trunc i384 %r247 to i32
+%r250 = getelementptr i32, i32* %r1, i32 1
+store i32 %r248, i32* %r250
+%r251 = lshr i384 %r247, 32
+%r252 = trunc i384 %r251 to i32
+%r254 = getelementptr i32, i32* %r1, i32 2
+store i32 %r252, i32* %r254
+%r255 = lshr i384 %r251, 32
+%r256 = trunc i384 %r255 to i32
+%r258 = getelementptr i32, i32* %r1, i32 3
+store i32 %r256, i32* %r258
+%r259 = lshr i384 %r255, 32
+%r260 = trunc i384 %r259 to i32
+%r262 = getelementptr i32, i32* %r1, i32 4
+store i32 %r260, i32* %r262
+%r263 = lshr i384 %r259, 32
+%r264 = trunc i384 %r263 to i32
+%r266 = getelementptr i32, i32* %r1, i32 5
+store i32 %r264, i32* %r266
+%r267 = lshr i384 %r263, 32
+%r268 = trunc i384 %r267 to i32
+%r270 = getelementptr i32, i32* %r1, i32 6
+store i32 %r268, i32* %r270
+%r271 = lshr i384 %r267, 32
+%r272 = trunc i384 %r271 to i32
+%r274 = getelementptr i32, i32* %r1, i32 7
+store i32 %r272, i32* %r274
+%r275 = lshr i384 %r271, 32
+%r276 = trunc i384 %r275 to i32
+%r278 = getelementptr i32, i32* %r1, i32 8
+store i32 %r276, i32* %r278
+%r279 = lshr i384 %r275, 32
+%r280 = trunc i384 %r279 to i32
+%r282 = getelementptr i32, i32* %r1, i32 9
+store i32 %r280, i32* %r282
+%r283 = lshr i384 %r279, 32
+%r284 = trunc i384 %r283 to i32
+%r286 = getelementptr i32, i32* %r1, i32 10
+store i32 %r284, i32* %r286
+%r287 = lshr i384 %r283, 32
+%r288 = trunc i384 %r287 to i32
+%r290 = getelementptr i32, i32* %r1, i32 11
+store i32 %r288, i32* %r290
+ret void
+}
+define void @mcl_fp_sub12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = zext i384 %r82 to i416
+%r162 = zext i384 %r160 to i416
+%r163 = sub i416 %r161, %r162
+%r164 = trunc i416 %r163 to i384
+%r165 = lshr i416 %r163, 384
+%r166 = trunc i416 %r165 to i1
+%r167 = trunc i384 %r164 to i32
+%r169 = getelementptr i32, i32* %r1, i32 0
+store i32 %r167, i32* %r169
+%r170 = lshr i384 %r164, 32
+%r171 = trunc i384 %r170 to i32
+%r173 = getelementptr i32, i32* %r1, i32 1
+store i32 %r171, i32* %r173
+%r174 = lshr i384 %r170, 32
+%r175 = trunc i384 %r174 to i32
+%r177 = getelementptr i32, i32* %r1, i32 2
+store i32 %r175, i32* %r177
+%r178 = lshr i384 %r174, 32
+%r179 = trunc i384 %r178 to i32
+%r181 = getelementptr i32, i32* %r1, i32 3
+store i32 %r179, i32* %r181
+%r182 = lshr i384 %r178, 32
+%r183 = trunc i384 %r182 to i32
+%r185 = getelementptr i32, i32* %r1, i32 4
+store i32 %r183, i32* %r185
+%r186 = lshr i384 %r182, 32
+%r187 = trunc i384 %r186 to i32
+%r189 = getelementptr i32, i32* %r1, i32 5
+store i32 %r187, i32* %r189
+%r190 = lshr i384 %r186, 32
+%r191 = trunc i384 %r190 to i32
+%r193 = getelementptr i32, i32* %r1, i32 6
+store i32 %r191, i32* %r193
+%r194 = lshr i384 %r190, 32
+%r195 = trunc i384 %r194 to i32
+%r197 = getelementptr i32, i32* %r1, i32 7
+store i32 %r195, i32* %r197
+%r198 = lshr i384 %r194, 32
+%r199 = trunc i384 %r198 to i32
+%r201 = getelementptr i32, i32* %r1, i32 8
+store i32 %r199, i32* %r201
+%r202 = lshr i384 %r198, 32
+%r203 = trunc i384 %r202 to i32
+%r205 = getelementptr i32, i32* %r1, i32 9
+store i32 %r203, i32* %r205
+%r206 = lshr i384 %r202, 32
+%r207 = trunc i384 %r206 to i32
+%r209 = getelementptr i32, i32* %r1, i32 10
+store i32 %r207, i32* %r209
+%r210 = lshr i384 %r206, 32
+%r211 = trunc i384 %r210 to i32
+%r213 = getelementptr i32, i32* %r1, i32 11
+store i32 %r211, i32* %r213
+br i1%r166, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r214 = load i32, i32* %r4
+%r215 = zext i32 %r214 to i64
+%r217 = getelementptr i32, i32* %r4, i32 1
+%r218 = load i32, i32* %r217
+%r219 = zext i32 %r218 to i64
+%r220 = shl i64 %r219, 32
+%r221 = or i64 %r215, %r220
+%r222 = zext i64 %r221 to i96
+%r224 = getelementptr i32, i32* %r4, i32 2
+%r225 = load i32, i32* %r224
+%r226 = zext i32 %r225 to i96
+%r227 = shl i96 %r226, 64
+%r228 = or i96 %r222, %r227
+%r229 = zext i96 %r228 to i128
+%r231 = getelementptr i32, i32* %r4, i32 3
+%r232 = load i32, i32* %r231
+%r233 = zext i32 %r232 to i128
+%r234 = shl i128 %r233, 96
+%r235 = or i128 %r229, %r234
+%r236 = zext i128 %r235 to i160
+%r238 = getelementptr i32, i32* %r4, i32 4
+%r239 = load i32, i32* %r238
+%r240 = zext i32 %r239 to i160
+%r241 = shl i160 %r240, 128
+%r242 = or i160 %r236, %r241
+%r243 = zext i160 %r242 to i192
+%r245 = getelementptr i32, i32* %r4, i32 5
+%r246 = load i32, i32* %r245
+%r247 = zext i32 %r246 to i192
+%r248 = shl i192 %r247, 160
+%r249 = or i192 %r243, %r248
+%r250 = zext i192 %r249 to i224
+%r252 = getelementptr i32, i32* %r4, i32 6
+%r253 = load i32, i32* %r252
+%r254 = zext i32 %r253 to i224
+%r255 = shl i224 %r254, 192
+%r256 = or i224 %r250, %r255
+%r257 = zext i224 %r256 to i256
+%r259 = getelementptr i32, i32* %r4, i32 7
+%r260 = load i32, i32* %r259
+%r261 = zext i32 %r260 to i256
+%r262 = shl i256 %r261, 224
+%r263 = or i256 %r257, %r262
+%r264 = zext i256 %r263 to i288
+%r266 = getelementptr i32, i32* %r4, i32 8
+%r267 = load i32, i32* %r266
+%r268 = zext i32 %r267 to i288
+%r269 = shl i288 %r268, 256
+%r270 = or i288 %r264, %r269
+%r271 = zext i288 %r270 to i320
+%r273 = getelementptr i32, i32* %r4, i32 9
+%r274 = load i32, i32* %r273
+%r275 = zext i32 %r274 to i320
+%r276 = shl i320 %r275, 288
+%r277 = or i320 %r271, %r276
+%r278 = zext i320 %r277 to i352
+%r280 = getelementptr i32, i32* %r4, i32 10
+%r281 = load i32, i32* %r280
+%r282 = zext i32 %r281 to i352
+%r283 = shl i352 %r282, 320
+%r284 = or i352 %r278, %r283
+%r285 = zext i352 %r284 to i384
+%r287 = getelementptr i32, i32* %r4, i32 11
+%r288 = load i32, i32* %r287
+%r289 = zext i32 %r288 to i384
+%r290 = shl i384 %r289, 352
+%r291 = or i384 %r285, %r290
+%r292 = add i384 %r164, %r291
+%r293 = trunc i384 %r292 to i32
+%r295 = getelementptr i32, i32* %r1, i32 0
+store i32 %r293, i32* %r295
+%r296 = lshr i384 %r292, 32
+%r297 = trunc i384 %r296 to i32
+%r299 = getelementptr i32, i32* %r1, i32 1
+store i32 %r297, i32* %r299
+%r300 = lshr i384 %r296, 32
+%r301 = trunc i384 %r300 to i32
+%r303 = getelementptr i32, i32* %r1, i32 2
+store i32 %r301, i32* %r303
+%r304 = lshr i384 %r300, 32
+%r305 = trunc i384 %r304 to i32
+%r307 = getelementptr i32, i32* %r1, i32 3
+store i32 %r305, i32* %r307
+%r308 = lshr i384 %r304, 32
+%r309 = trunc i384 %r308 to i32
+%r311 = getelementptr i32, i32* %r1, i32 4
+store i32 %r309, i32* %r311
+%r312 = lshr i384 %r308, 32
+%r313 = trunc i384 %r312 to i32
+%r315 = getelementptr i32, i32* %r1, i32 5
+store i32 %r313, i32* %r315
+%r316 = lshr i384 %r312, 32
+%r317 = trunc i384 %r316 to i32
+%r319 = getelementptr i32, i32* %r1, i32 6
+store i32 %r317, i32* %r319
+%r320 = lshr i384 %r316, 32
+%r321 = trunc i384 %r320 to i32
+%r323 = getelementptr i32, i32* %r1, i32 7
+store i32 %r321, i32* %r323
+%r324 = lshr i384 %r320, 32
+%r325 = trunc i384 %r324 to i32
+%r327 = getelementptr i32, i32* %r1, i32 8
+store i32 %r325, i32* %r327
+%r328 = lshr i384 %r324, 32
+%r329 = trunc i384 %r328 to i32
+%r331 = getelementptr i32, i32* %r1, i32 9
+store i32 %r329, i32* %r331
+%r332 = lshr i384 %r328, 32
+%r333 = trunc i384 %r332 to i32
+%r335 = getelementptr i32, i32* %r1, i32 10
+store i32 %r333, i32* %r335
+%r336 = lshr i384 %r332, 32
+%r337 = trunc i384 %r336 to i32
+%r339 = getelementptr i32, i32* %r1, i32 11
+store i32 %r337, i32* %r339
+ret void
+}
+define void @mcl_fp_subNF12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = sub i384 %r82, %r160
+%r162 = lshr i384 %r161, 383
+%r163 = trunc i384 %r162 to i1
+%r164 = load i32, i32* %r4
+%r165 = zext i32 %r164 to i64
+%r167 = getelementptr i32, i32* %r4, i32 1
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i64
+%r170 = shl i64 %r169, 32
+%r171 = or i64 %r165, %r170
+%r172 = zext i64 %r171 to i96
+%r174 = getelementptr i32, i32* %r4, i32 2
+%r175 = load i32, i32* %r174
+%r176 = zext i32 %r175 to i96
+%r177 = shl i96 %r176, 64
+%r178 = or i96 %r172, %r177
+%r179 = zext i96 %r178 to i128
+%r181 = getelementptr i32, i32* %r4, i32 3
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i128
+%r184 = shl i128 %r183, 96
+%r185 = or i128 %r179, %r184
+%r186 = zext i128 %r185 to i160
+%r188 = getelementptr i32, i32* %r4, i32 4
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i160
+%r191 = shl i160 %r190, 128
+%r192 = or i160 %r186, %r191
+%r193 = zext i160 %r192 to i192
+%r195 = getelementptr i32, i32* %r4, i32 5
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i192
+%r198 = shl i192 %r197, 160
+%r199 = or i192 %r193, %r198
+%r200 = zext i192 %r199 to i224
+%r202 = getelementptr i32, i32* %r4, i32 6
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i224
+%r205 = shl i224 %r204, 192
+%r206 = or i224 %r200, %r205
+%r207 = zext i224 %r206 to i256
+%r209 = getelementptr i32, i32* %r4, i32 7
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i256
+%r212 = shl i256 %r211, 224
+%r213 = or i256 %r207, %r212
+%r214 = zext i256 %r213 to i288
+%r216 = getelementptr i32, i32* %r4, i32 8
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i288
+%r219 = shl i288 %r218, 256
+%r220 = or i288 %r214, %r219
+%r221 = zext i288 %r220 to i320
+%r223 = getelementptr i32, i32* %r4, i32 9
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i320
+%r226 = shl i320 %r225, 288
+%r227 = or i320 %r221, %r226
+%r228 = zext i320 %r227 to i352
+%r230 = getelementptr i32, i32* %r4, i32 10
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i352
+%r233 = shl i352 %r232, 320
+%r234 = or i352 %r228, %r233
+%r235 = zext i352 %r234 to i384
+%r237 = getelementptr i32, i32* %r4, i32 11
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i384
+%r240 = shl i384 %r239, 352
+%r241 = or i384 %r235, %r240
+%r243 = select i1 %r163, i384 %r241, i384 0
+%r244 = add i384 %r161, %r243
+%r245 = trunc i384 %r244 to i32
+%r247 = getelementptr i32, i32* %r1, i32 0
+store i32 %r245, i32* %r247
+%r248 = lshr i384 %r244, 32
+%r249 = trunc i384 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 1
+store i32 %r249, i32* %r251
+%r252 = lshr i384 %r248, 32
+%r253 = trunc i384 %r252 to i32
+%r255 = getelementptr i32, i32* %r1, i32 2
+store i32 %r253, i32* %r255
+%r256 = lshr i384 %r252, 32
+%r257 = trunc i384 %r256 to i32
+%r259 = getelementptr i32, i32* %r1, i32 3
+store i32 %r257, i32* %r259
+%r260 = lshr i384 %r256, 32
+%r261 = trunc i384 %r260 to i32
+%r263 = getelementptr i32, i32* %r1, i32 4
+store i32 %r261, i32* %r263
+%r264 = lshr i384 %r260, 32
+%r265 = trunc i384 %r264 to i32
+%r267 = getelementptr i32, i32* %r1, i32 5
+store i32 %r265, i32* %r267
+%r268 = lshr i384 %r264, 32
+%r269 = trunc i384 %r268 to i32
+%r271 = getelementptr i32, i32* %r1, i32 6
+store i32 %r269, i32* %r271
+%r272 = lshr i384 %r268, 32
+%r273 = trunc i384 %r272 to i32
+%r275 = getelementptr i32, i32* %r1, i32 7
+store i32 %r273, i32* %r275
+%r276 = lshr i384 %r272, 32
+%r277 = trunc i384 %r276 to i32
+%r279 = getelementptr i32, i32* %r1, i32 8
+store i32 %r277, i32* %r279
+%r280 = lshr i384 %r276, 32
+%r281 = trunc i384 %r280 to i32
+%r283 = getelementptr i32, i32* %r1, i32 9
+store i32 %r281, i32* %r283
+%r284 = lshr i384 %r280, 32
+%r285 = trunc i384 %r284 to i32
+%r287 = getelementptr i32, i32* %r1, i32 10
+store i32 %r285, i32* %r287
+%r288 = lshr i384 %r284, 32
+%r289 = trunc i384 %r288 to i32
+%r291 = getelementptr i32, i32* %r1, i32 11
+store i32 %r289, i32* %r291
+ret void
+}
+define void @mcl_fpDbl_add12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = load i32, i32* %r3
+%r168 = zext i32 %r167 to i64
+%r170 = getelementptr i32, i32* %r3, i32 1
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i64
+%r173 = shl i64 %r172, 32
+%r174 = or i64 %r168, %r173
+%r175 = zext i64 %r174 to i96
+%r177 = getelementptr i32, i32* %r3, i32 2
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i96
+%r180 = shl i96 %r179, 64
+%r181 = or i96 %r175, %r180
+%r182 = zext i96 %r181 to i128
+%r184 = getelementptr i32, i32* %r3, i32 3
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i128
+%r187 = shl i128 %r186, 96
+%r188 = or i128 %r182, %r187
+%r189 = zext i128 %r188 to i160
+%r191 = getelementptr i32, i32* %r3, i32 4
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i160
+%r194 = shl i160 %r193, 128
+%r195 = or i160 %r189, %r194
+%r196 = zext i160 %r195 to i192
+%r198 = getelementptr i32, i32* %r3, i32 5
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i192
+%r201 = shl i192 %r200, 160
+%r202 = or i192 %r196, %r201
+%r203 = zext i192 %r202 to i224
+%r205 = getelementptr i32, i32* %r3, i32 6
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i224
+%r208 = shl i224 %r207, 192
+%r209 = or i224 %r203, %r208
+%r210 = zext i224 %r209 to i256
+%r212 = getelementptr i32, i32* %r3, i32 7
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i256
+%r215 = shl i256 %r214, 224
+%r216 = or i256 %r210, %r215
+%r217 = zext i256 %r216 to i288
+%r219 = getelementptr i32, i32* %r3, i32 8
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i288
+%r222 = shl i288 %r221, 256
+%r223 = or i288 %r217, %r222
+%r224 = zext i288 %r223 to i320
+%r226 = getelementptr i32, i32* %r3, i32 9
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i320
+%r229 = shl i320 %r228, 288
+%r230 = or i320 %r224, %r229
+%r231 = zext i320 %r230 to i352
+%r233 = getelementptr i32, i32* %r3, i32 10
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i352
+%r236 = shl i352 %r235, 320
+%r237 = or i352 %r231, %r236
+%r238 = zext i352 %r237 to i384
+%r240 = getelementptr i32, i32* %r3, i32 11
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i384
+%r243 = shl i384 %r242, 352
+%r244 = or i384 %r238, %r243
+%r245 = zext i384 %r244 to i416
+%r247 = getelementptr i32, i32* %r3, i32 12
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i416
+%r250 = shl i416 %r249, 384
+%r251 = or i416 %r245, %r250
+%r252 = zext i416 %r251 to i448
+%r254 = getelementptr i32, i32* %r3, i32 13
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i448
+%r257 = shl i448 %r256, 416
+%r258 = or i448 %r252, %r257
+%r259 = zext i448 %r258 to i480
+%r261 = getelementptr i32, i32* %r3, i32 14
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i480
+%r264 = shl i480 %r263, 448
+%r265 = or i480 %r259, %r264
+%r266 = zext i480 %r265 to i512
+%r268 = getelementptr i32, i32* %r3, i32 15
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i512
+%r271 = shl i512 %r270, 480
+%r272 = or i512 %r266, %r271
+%r273 = zext i512 %r272 to i544
+%r275 = getelementptr i32, i32* %r3, i32 16
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i544
+%r278 = shl i544 %r277, 512
+%r279 = or i544 %r273, %r278
+%r280 = zext i544 %r279 to i576
+%r282 = getelementptr i32, i32* %r3, i32 17
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i576
+%r285 = shl i576 %r284, 544
+%r286 = or i576 %r280, %r285
+%r287 = zext i576 %r286 to i608
+%r289 = getelementptr i32, i32* %r3, i32 18
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i608
+%r292 = shl i608 %r291, 576
+%r293 = or i608 %r287, %r292
+%r294 = zext i608 %r293 to i640
+%r296 = getelementptr i32, i32* %r3, i32 19
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i640
+%r299 = shl i640 %r298, 608
+%r300 = or i640 %r294, %r299
+%r301 = zext i640 %r300 to i672
+%r303 = getelementptr i32, i32* %r3, i32 20
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i672
+%r306 = shl i672 %r305, 640
+%r307 = or i672 %r301, %r306
+%r308 = zext i672 %r307 to i704
+%r310 = getelementptr i32, i32* %r3, i32 21
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i704
+%r313 = shl i704 %r312, 672
+%r314 = or i704 %r308, %r313
+%r315 = zext i704 %r314 to i736
+%r317 = getelementptr i32, i32* %r3, i32 22
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i736
+%r320 = shl i736 %r319, 704
+%r321 = or i736 %r315, %r320
+%r322 = zext i736 %r321 to i768
+%r324 = getelementptr i32, i32* %r3, i32 23
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i768
+%r327 = shl i768 %r326, 736
+%r328 = or i768 %r322, %r327
+%r329 = zext i768 %r166 to i800
+%r330 = zext i768 %r328 to i800
+%r331 = add i800 %r329, %r330
+%r332 = trunc i800 %r331 to i384
+%r333 = trunc i384 %r332 to i32
+%r335 = getelementptr i32, i32* %r1, i32 0
+store i32 %r333, i32* %r335
+%r336 = lshr i384 %r332, 32
+%r337 = trunc i384 %r336 to i32
+%r339 = getelementptr i32, i32* %r1, i32 1
+store i32 %r337, i32* %r339
+%r340 = lshr i384 %r336, 32
+%r341 = trunc i384 %r340 to i32
+%r343 = getelementptr i32, i32* %r1, i32 2
+store i32 %r341, i32* %r343
+%r344 = lshr i384 %r340, 32
+%r345 = trunc i384 %r344 to i32
+%r347 = getelementptr i32, i32* %r1, i32 3
+store i32 %r345, i32* %r347
+%r348 = lshr i384 %r344, 32
+%r349 = trunc i384 %r348 to i32
+%r351 = getelementptr i32, i32* %r1, i32 4
+store i32 %r349, i32* %r351
+%r352 = lshr i384 %r348, 32
+%r353 = trunc i384 %r352 to i32
+%r355 = getelementptr i32, i32* %r1, i32 5
+store i32 %r353, i32* %r355
+%r356 = lshr i384 %r352, 32
+%r357 = trunc i384 %r356 to i32
+%r359 = getelementptr i32, i32* %r1, i32 6
+store i32 %r357, i32* %r359
+%r360 = lshr i384 %r356, 32
+%r361 = trunc i384 %r360 to i32
+%r363 = getelementptr i32, i32* %r1, i32 7
+store i32 %r361, i32* %r363
+%r364 = lshr i384 %r360, 32
+%r365 = trunc i384 %r364 to i32
+%r367 = getelementptr i32, i32* %r1, i32 8
+store i32 %r365, i32* %r367
+%r368 = lshr i384 %r364, 32
+%r369 = trunc i384 %r368 to i32
+%r371 = getelementptr i32, i32* %r1, i32 9
+store i32 %r369, i32* %r371
+%r372 = lshr i384 %r368, 32
+%r373 = trunc i384 %r372 to i32
+%r375 = getelementptr i32, i32* %r1, i32 10
+store i32 %r373, i32* %r375
+%r376 = lshr i384 %r372, 32
+%r377 = trunc i384 %r376 to i32
+%r379 = getelementptr i32, i32* %r1, i32 11
+store i32 %r377, i32* %r379
+%r380 = lshr i800 %r331, 384
+%r381 = trunc i800 %r380 to i416
+%r382 = load i32, i32* %r4
+%r383 = zext i32 %r382 to i64
+%r385 = getelementptr i32, i32* %r4, i32 1
+%r386 = load i32, i32* %r385
+%r387 = zext i32 %r386 to i64
+%r388 = shl i64 %r387, 32
+%r389 = or i64 %r383, %r388
+%r390 = zext i64 %r389 to i96
+%r392 = getelementptr i32, i32* %r4, i32 2
+%r393 = load i32, i32* %r392
+%r394 = zext i32 %r393 to i96
+%r395 = shl i96 %r394, 64
+%r396 = or i96 %r390, %r395
+%r397 = zext i96 %r396 to i128
+%r399 = getelementptr i32, i32* %r4, i32 3
+%r400 = load i32, i32* %r399
+%r401 = zext i32 %r400 to i128
+%r402 = shl i128 %r401, 96
+%r403 = or i128 %r397, %r402
+%r404 = zext i128 %r403 to i160
+%r406 = getelementptr i32, i32* %r4, i32 4
+%r407 = load i32, i32* %r406
+%r408 = zext i32 %r407 to i160
+%r409 = shl i160 %r408, 128
+%r410 = or i160 %r404, %r409
+%r411 = zext i160 %r410 to i192
+%r413 = getelementptr i32, i32* %r4, i32 5
+%r414 = load i32, i32* %r413
+%r415 = zext i32 %r414 to i192
+%r416 = shl i192 %r415, 160
+%r417 = or i192 %r411, %r416
+%r418 = zext i192 %r417 to i224
+%r420 = getelementptr i32, i32* %r4, i32 6
+%r421 = load i32, i32* %r420
+%r422 = zext i32 %r421 to i224
+%r423 = shl i224 %r422, 192
+%r424 = or i224 %r418, %r423
+%r425 = zext i224 %r424 to i256
+%r427 = getelementptr i32, i32* %r4, i32 7
+%r428 = load i32, i32* %r427
+%r429 = zext i32 %r428 to i256
+%r430 = shl i256 %r429, 224
+%r431 = or i256 %r425, %r430
+%r432 = zext i256 %r431 to i288
+%r434 = getelementptr i32, i32* %r4, i32 8
+%r435 = load i32, i32* %r434
+%r436 = zext i32 %r435 to i288
+%r437 = shl i288 %r436, 256
+%r438 = or i288 %r432, %r437
+%r439 = zext i288 %r438 to i320
+%r441 = getelementptr i32, i32* %r4, i32 9
+%r442 = load i32, i32* %r441
+%r443 = zext i32 %r442 to i320
+%r444 = shl i320 %r443, 288
+%r445 = or i320 %r439, %r444
+%r446 = zext i320 %r445 to i352
+%r448 = getelementptr i32, i32* %r4, i32 10
+%r449 = load i32, i32* %r448
+%r450 = zext i32 %r449 to i352
+%r451 = shl i352 %r450, 320
+%r452 = or i352 %r446, %r451
+%r453 = zext i352 %r452 to i384
+%r455 = getelementptr i32, i32* %r4, i32 11
+%r456 = load i32, i32* %r455
+%r457 = zext i32 %r456 to i384
+%r458 = shl i384 %r457, 352
+%r459 = or i384 %r453, %r458
+%r460 = zext i384 %r459 to i416
+%r461 = sub i416 %r381, %r460
+%r462 = lshr i416 %r461, 384
+%r463 = trunc i416 %r462 to i1
+%r464 = select i1 %r463, i416 %r381, i416 %r461
+%r465 = trunc i416 %r464 to i384
+%r467 = getelementptr i32, i32* %r1, i32 12
+%r468 = trunc i384 %r465 to i32
+%r470 = getelementptr i32, i32* %r467, i32 0
+store i32 %r468, i32* %r470
+%r471 = lshr i384 %r465, 32
+%r472 = trunc i384 %r471 to i32
+%r474 = getelementptr i32, i32* %r467, i32 1
+store i32 %r472, i32* %r474
+%r475 = lshr i384 %r471, 32
+%r476 = trunc i384 %r475 to i32
+%r478 = getelementptr i32, i32* %r467, i32 2
+store i32 %r476, i32* %r478
+%r479 = lshr i384 %r475, 32
+%r480 = trunc i384 %r479 to i32
+%r482 = getelementptr i32, i32* %r467, i32 3
+store i32 %r480, i32* %r482
+%r483 = lshr i384 %r479, 32
+%r484 = trunc i384 %r483 to i32
+%r486 = getelementptr i32, i32* %r467, i32 4
+store i32 %r484, i32* %r486
+%r487 = lshr i384 %r483, 32
+%r488 = trunc i384 %r487 to i32
+%r490 = getelementptr i32, i32* %r467, i32 5
+store i32 %r488, i32* %r490
+%r491 = lshr i384 %r487, 32
+%r492 = trunc i384 %r491 to i32
+%r494 = getelementptr i32, i32* %r467, i32 6
+store i32 %r492, i32* %r494
+%r495 = lshr i384 %r491, 32
+%r496 = trunc i384 %r495 to i32
+%r498 = getelementptr i32, i32* %r467, i32 7
+store i32 %r496, i32* %r498
+%r499 = lshr i384 %r495, 32
+%r500 = trunc i384 %r499 to i32
+%r502 = getelementptr i32, i32* %r467, i32 8
+store i32 %r500, i32* %r502
+%r503 = lshr i384 %r499, 32
+%r504 = trunc i384 %r503 to i32
+%r506 = getelementptr i32, i32* %r467, i32 9
+store i32 %r504, i32* %r506
+%r507 = lshr i384 %r503, 32
+%r508 = trunc i384 %r507 to i32
+%r510 = getelementptr i32, i32* %r467, i32 10
+store i32 %r508, i32* %r510
+%r511 = lshr i384 %r507, 32
+%r512 = trunc i384 %r511 to i32
+%r514 = getelementptr i32, i32* %r467, i32 11
+store i32 %r512, i32* %r514
+ret void
+}
+define void @mcl_fpDbl_sub12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = load i32, i32* %r3
+%r168 = zext i32 %r167 to i64
+%r170 = getelementptr i32, i32* %r3, i32 1
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i64
+%r173 = shl i64 %r172, 32
+%r174 = or i64 %r168, %r173
+%r175 = zext i64 %r174 to i96
+%r177 = getelementptr i32, i32* %r3, i32 2
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i96
+%r180 = shl i96 %r179, 64
+%r181 = or i96 %r175, %r180
+%r182 = zext i96 %r181 to i128
+%r184 = getelementptr i32, i32* %r3, i32 3
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i128
+%r187 = shl i128 %r186, 96
+%r188 = or i128 %r182, %r187
+%r189 = zext i128 %r188 to i160
+%r191 = getelementptr i32, i32* %r3, i32 4
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i160
+%r194 = shl i160 %r193, 128
+%r195 = or i160 %r189, %r194
+%r196 = zext i160 %r195 to i192
+%r198 = getelementptr i32, i32* %r3, i32 5
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i192
+%r201 = shl i192 %r200, 160
+%r202 = or i192 %r196, %r201
+%r203 = zext i192 %r202 to i224
+%r205 = getelementptr i32, i32* %r3, i32 6
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i224
+%r208 = shl i224 %r207, 192
+%r209 = or i224 %r203, %r208
+%r210 = zext i224 %r209 to i256
+%r212 = getelementptr i32, i32* %r3, i32 7
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i256
+%r215 = shl i256 %r214, 224
+%r216 = or i256 %r210, %r215
+%r217 = zext i256 %r216 to i288
+%r219 = getelementptr i32, i32* %r3, i32 8
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i288
+%r222 = shl i288 %r221, 256
+%r223 = or i288 %r217, %r222
+%r224 = zext i288 %r223 to i320
+%r226 = getelementptr i32, i32* %r3, i32 9
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i320
+%r229 = shl i320 %r228, 288
+%r230 = or i320 %r224, %r229
+%r231 = zext i320 %r230 to i352
+%r233 = getelementptr i32, i32* %r3, i32 10
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i352
+%r236 = shl i352 %r235, 320
+%r237 = or i352 %r231, %r236
+%r238 = zext i352 %r237 to i384
+%r240 = getelementptr i32, i32* %r3, i32 11
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i384
+%r243 = shl i384 %r242, 352
+%r244 = or i384 %r238, %r243
+%r245 = zext i384 %r244 to i416
+%r247 = getelementptr i32, i32* %r3, i32 12
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i416
+%r250 = shl i416 %r249, 384
+%r251 = or i416 %r245, %r250
+%r252 = zext i416 %r251 to i448
+%r254 = getelementptr i32, i32* %r3, i32 13
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i448
+%r257 = shl i448 %r256, 416
+%r258 = or i448 %r252, %r257
+%r259 = zext i448 %r258 to i480
+%r261 = getelementptr i32, i32* %r3, i32 14
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i480
+%r264 = shl i480 %r263, 448
+%r265 = or i480 %r259, %r264
+%r266 = zext i480 %r265 to i512
+%r268 = getelementptr i32, i32* %r3, i32 15
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i512
+%r271 = shl i512 %r270, 480
+%r272 = or i512 %r266, %r271
+%r273 = zext i512 %r272 to i544
+%r275 = getelementptr i32, i32* %r3, i32 16
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i544
+%r278 = shl i544 %r277, 512
+%r279 = or i544 %r273, %r278
+%r280 = zext i544 %r279 to i576
+%r282 = getelementptr i32, i32* %r3, i32 17
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i576
+%r285 = shl i576 %r284, 544
+%r286 = or i576 %r280, %r285
+%r287 = zext i576 %r286 to i608
+%r289 = getelementptr i32, i32* %r3, i32 18
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i608
+%r292 = shl i608 %r291, 576
+%r293 = or i608 %r287, %r292
+%r294 = zext i608 %r293 to i640
+%r296 = getelementptr i32, i32* %r3, i32 19
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i640
+%r299 = shl i640 %r298, 608
+%r300 = or i640 %r294, %r299
+%r301 = zext i640 %r300 to i672
+%r303 = getelementptr i32, i32* %r3, i32 20
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i672
+%r306 = shl i672 %r305, 640
+%r307 = or i672 %r301, %r306
+%r308 = zext i672 %r307 to i704
+%r310 = getelementptr i32, i32* %r3, i32 21
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i704
+%r313 = shl i704 %r312, 672
+%r314 = or i704 %r308, %r313
+%r315 = zext i704 %r314 to i736
+%r317 = getelementptr i32, i32* %r3, i32 22
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i736
+%r320 = shl i736 %r319, 704
+%r321 = or i736 %r315, %r320
+%r322 = zext i736 %r321 to i768
+%r324 = getelementptr i32, i32* %r3, i32 23
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i768
+%r327 = shl i768 %r326, 736
+%r328 = or i768 %r322, %r327
+%r329 = zext i768 %r166 to i800
+%r330 = zext i768 %r328 to i800
+%r331 = sub i800 %r329, %r330
+%r332 = trunc i800 %r331 to i384
+%r333 = trunc i384 %r332 to i32
+%r335 = getelementptr i32, i32* %r1, i32 0
+store i32 %r333, i32* %r335
+%r336 = lshr i384 %r332, 32
+%r337 = trunc i384 %r336 to i32
+%r339 = getelementptr i32, i32* %r1, i32 1
+store i32 %r337, i32* %r339
+%r340 = lshr i384 %r336, 32
+%r341 = trunc i384 %r340 to i32
+%r343 = getelementptr i32, i32* %r1, i32 2
+store i32 %r341, i32* %r343
+%r344 = lshr i384 %r340, 32
+%r345 = trunc i384 %r344 to i32
+%r347 = getelementptr i32, i32* %r1, i32 3
+store i32 %r345, i32* %r347
+%r348 = lshr i384 %r344, 32
+%r349 = trunc i384 %r348 to i32
+%r351 = getelementptr i32, i32* %r1, i32 4
+store i32 %r349, i32* %r351
+%r352 = lshr i384 %r348, 32
+%r353 = trunc i384 %r352 to i32
+%r355 = getelementptr i32, i32* %r1, i32 5
+store i32 %r353, i32* %r355
+%r356 = lshr i384 %r352, 32
+%r357 = trunc i384 %r356 to i32
+%r359 = getelementptr i32, i32* %r1, i32 6
+store i32 %r357, i32* %r359
+%r360 = lshr i384 %r356, 32
+%r361 = trunc i384 %r360 to i32
+%r363 = getelementptr i32, i32* %r1, i32 7
+store i32 %r361, i32* %r363
+%r364 = lshr i384 %r360, 32
+%r365 = trunc i384 %r364 to i32
+%r367 = getelementptr i32, i32* %r1, i32 8
+store i32 %r365, i32* %r367
+%r368 = lshr i384 %r364, 32
+%r369 = trunc i384 %r368 to i32
+%r371 = getelementptr i32, i32* %r1, i32 9
+store i32 %r369, i32* %r371
+%r372 = lshr i384 %r368, 32
+%r373 = trunc i384 %r372 to i32
+%r375 = getelementptr i32, i32* %r1, i32 10
+store i32 %r373, i32* %r375
+%r376 = lshr i384 %r372, 32
+%r377 = trunc i384 %r376 to i32
+%r379 = getelementptr i32, i32* %r1, i32 11
+store i32 %r377, i32* %r379
+%r380 = lshr i800 %r331, 384
+%r381 = trunc i800 %r380 to i384
+%r382 = lshr i800 %r331, 768
+%r383 = trunc i800 %r382 to i1
+%r384 = load i32, i32* %r4
+%r385 = zext i32 %r384 to i64
+%r387 = getelementptr i32, i32* %r4, i32 1
+%r388 = load i32, i32* %r387
+%r389 = zext i32 %r388 to i64
+%r390 = shl i64 %r389, 32
+%r391 = or i64 %r385, %r390
+%r392 = zext i64 %r391 to i96
+%r394 = getelementptr i32, i32* %r4, i32 2
+%r395 = load i32, i32* %r394
+%r396 = zext i32 %r395 to i96
+%r397 = shl i96 %r396, 64
+%r398 = or i96 %r392, %r397
+%r399 = zext i96 %r398 to i128
+%r401 = getelementptr i32, i32* %r4, i32 3
+%r402 = load i32, i32* %r401
+%r403 = zext i32 %r402 to i128
+%r404 = shl i128 %r403, 96
+%r405 = or i128 %r399, %r404
+%r406 = zext i128 %r405 to i160
+%r408 = getelementptr i32, i32* %r4, i32 4
+%r409 = load i32, i32* %r408
+%r410 = zext i32 %r409 to i160
+%r411 = shl i160 %r410, 128
+%r412 = or i160 %r406, %r411
+%r413 = zext i160 %r412 to i192
+%r415 = getelementptr i32, i32* %r4, i32 5
+%r416 = load i32, i32* %r415
+%r417 = zext i32 %r416 to i192
+%r418 = shl i192 %r417, 160
+%r419 = or i192 %r413, %r418
+%r420 = zext i192 %r419 to i224
+%r422 = getelementptr i32, i32* %r4, i32 6
+%r423 = load i32, i32* %r422
+%r424 = zext i32 %r423 to i224
+%r425 = shl i224 %r424, 192
+%r426 = or i224 %r420, %r425
+%r427 = zext i224 %r426 to i256
+%r429 = getelementptr i32, i32* %r4, i32 7
+%r430 = load i32, i32* %r429
+%r431 = zext i32 %r430 to i256
+%r432 = shl i256 %r431, 224
+%r433 = or i256 %r427, %r432
+%r434 = zext i256 %r433 to i288
+%r436 = getelementptr i32, i32* %r4, i32 8
+%r437 = load i32, i32* %r436
+%r438 = zext i32 %r437 to i288
+%r439 = shl i288 %r438, 256
+%r440 = or i288 %r434, %r439
+%r441 = zext i288 %r440 to i320
+%r443 = getelementptr i32, i32* %r4, i32 9
+%r444 = load i32, i32* %r443
+%r445 = zext i32 %r444 to i320
+%r446 = shl i320 %r445, 288
+%r447 = or i320 %r441, %r446
+%r448 = zext i320 %r447 to i352
+%r450 = getelementptr i32, i32* %r4, i32 10
+%r451 = load i32, i32* %r450
+%r452 = zext i32 %r451 to i352
+%r453 = shl i352 %r452, 320
+%r454 = or i352 %r448, %r453
+%r455 = zext i352 %r454 to i384
+%r457 = getelementptr i32, i32* %r4, i32 11
+%r458 = load i32, i32* %r457
+%r459 = zext i32 %r458 to i384
+%r460 = shl i384 %r459, 352
+%r461 = or i384 %r455, %r460
+%r463 = select i1 %r383, i384 %r461, i384 0
+%r464 = add i384 %r381, %r463
+%r466 = getelementptr i32, i32* %r1, i32 12
+%r467 = trunc i384 %r464 to i32
+%r469 = getelementptr i32, i32* %r466, i32 0
+store i32 %r467, i32* %r469
+%r470 = lshr i384 %r464, 32
+%r471 = trunc i384 %r470 to i32
+%r473 = getelementptr i32, i32* %r466, i32 1
+store i32 %r471, i32* %r473
+%r474 = lshr i384 %r470, 32
+%r475 = trunc i384 %r474 to i32
+%r477 = getelementptr i32, i32* %r466, i32 2
+store i32 %r475, i32* %r477
+%r478 = lshr i384 %r474, 32
+%r479 = trunc i384 %r478 to i32
+%r481 = getelementptr i32, i32* %r466, i32 3
+store i32 %r479, i32* %r481
+%r482 = lshr i384 %r478, 32
+%r483 = trunc i384 %r482 to i32
+%r485 = getelementptr i32, i32* %r466, i32 4
+store i32 %r483, i32* %r485
+%r486 = lshr i384 %r482, 32
+%r487 = trunc i384 %r486 to i32
+%r489 = getelementptr i32, i32* %r466, i32 5
+store i32 %r487, i32* %r489
+%r490 = lshr i384 %r486, 32
+%r491 = trunc i384 %r490 to i32
+%r493 = getelementptr i32, i32* %r466, i32 6
+store i32 %r491, i32* %r493
+%r494 = lshr i384 %r490, 32
+%r495 = trunc i384 %r494 to i32
+%r497 = getelementptr i32, i32* %r466, i32 7
+store i32 %r495, i32* %r497
+%r498 = lshr i384 %r494, 32
+%r499 = trunc i384 %r498 to i32
+%r501 = getelementptr i32, i32* %r466, i32 8
+store i32 %r499, i32* %r501
+%r502 = lshr i384 %r498, 32
+%r503 = trunc i384 %r502 to i32
+%r505 = getelementptr i32, i32* %r466, i32 9
+store i32 %r503, i32* %r505
+%r506 = lshr i384 %r502, 32
+%r507 = trunc i384 %r506 to i32
+%r509 = getelementptr i32, i32* %r466, i32 10
+store i32 %r507, i32* %r509
+%r510 = lshr i384 %r506, 32
+%r511 = trunc i384 %r510 to i32
+%r513 = getelementptr i32, i32* %r466, i32 11
+store i32 %r511, i32* %r513
+ret void
+}
+define i448 @mulPv416x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
+%r42 = trunc i64 %r41 to i32
+%r43 = call i32 @extractHigh32(i64 %r41)
+%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
+%r46 = trunc i64 %r45 to i32
+%r47 = call i32 @extractHigh32(i64 %r45)
+%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
+%r50 = trunc i64 %r49 to i32
+%r51 = call i32 @extractHigh32(i64 %r49)
+%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
+%r54 = trunc i64 %r53 to i32
+%r55 = call i32 @extractHigh32(i64 %r53)
+%r56 = zext i32 %r6 to i64
+%r57 = zext i32 %r10 to i64
+%r58 = shl i64 %r57, 32
+%r59 = or i64 %r56, %r58
+%r60 = zext i64 %r59 to i96
+%r61 = zext i32 %r14 to i96
+%r62 = shl i96 %r61, 64
+%r63 = or i96 %r60, %r62
+%r64 = zext i96 %r63 to i128
+%r65 = zext i32 %r18 to i128
+%r66 = shl i128 %r65, 96
+%r67 = or i128 %r64, %r66
+%r68 = zext i128 %r67 to i160
+%r69 = zext i32 %r22 to i160
+%r70 = shl i160 %r69, 128
+%r71 = or i160 %r68, %r70
+%r72 = zext i160 %r71 to i192
+%r73 = zext i32 %r26 to i192
+%r74 = shl i192 %r73, 160
+%r75 = or i192 %r72, %r74
+%r76 = zext i192 %r75 to i224
+%r77 = zext i32 %r30 to i224
+%r78 = shl i224 %r77, 192
+%r79 = or i224 %r76, %r78
+%r80 = zext i224 %r79 to i256
+%r81 = zext i32 %r34 to i256
+%r82 = shl i256 %r81, 224
+%r83 = or i256 %r80, %r82
+%r84 = zext i256 %r83 to i288
+%r85 = zext i32 %r38 to i288
+%r86 = shl i288 %r85, 256
+%r87 = or i288 %r84, %r86
+%r88 = zext i288 %r87 to i320
+%r89 = zext i32 %r42 to i320
+%r90 = shl i320 %r89, 288
+%r91 = or i320 %r88, %r90
+%r92 = zext i320 %r91 to i352
+%r93 = zext i32 %r46 to i352
+%r94 = shl i352 %r93, 320
+%r95 = or i352 %r92, %r94
+%r96 = zext i352 %r95 to i384
+%r97 = zext i32 %r50 to i384
+%r98 = shl i384 %r97, 352
+%r99 = or i384 %r96, %r98
+%r100 = zext i384 %r99 to i416
+%r101 = zext i32 %r54 to i416
+%r102 = shl i416 %r101, 384
+%r103 = or i416 %r100, %r102
+%r104 = zext i32 %r7 to i64
+%r105 = zext i32 %r11 to i64
+%r106 = shl i64 %r105, 32
+%r107 = or i64 %r104, %r106
+%r108 = zext i64 %r107 to i96
+%r109 = zext i32 %r15 to i96
+%r110 = shl i96 %r109, 64
+%r111 = or i96 %r108, %r110
+%r112 = zext i96 %r111 to i128
+%r113 = zext i32 %r19 to i128
+%r114 = shl i128 %r113, 96
+%r115 = or i128 %r112, %r114
+%r116 = zext i128 %r115 to i160
+%r117 = zext i32 %r23 to i160
+%r118 = shl i160 %r117, 128
+%r119 = or i160 %r116, %r118
+%r120 = zext i160 %r119 to i192
+%r121 = zext i32 %r27 to i192
+%r122 = shl i192 %r121, 160
+%r123 = or i192 %r120, %r122
+%r124 = zext i192 %r123 to i224
+%r125 = zext i32 %r31 to i224
+%r126 = shl i224 %r125, 192
+%r127 = or i224 %r124, %r126
+%r128 = zext i224 %r127 to i256
+%r129 = zext i32 %r35 to i256
+%r130 = shl i256 %r129, 224
+%r131 = or i256 %r128, %r130
+%r132 = zext i256 %r131 to i288
+%r133 = zext i32 %r39 to i288
+%r134 = shl i288 %r133, 256
+%r135 = or i288 %r132, %r134
+%r136 = zext i288 %r135 to i320
+%r137 = zext i32 %r43 to i320
+%r138 = shl i320 %r137, 288
+%r139 = or i320 %r136, %r138
+%r140 = zext i320 %r139 to i352
+%r141 = zext i32 %r47 to i352
+%r142 = shl i352 %r141, 320
+%r143 = or i352 %r140, %r142
+%r144 = zext i352 %r143 to i384
+%r145 = zext i32 %r51 to i384
+%r146 = shl i384 %r145, 352
+%r147 = or i384 %r144, %r146
+%r148 = zext i384 %r147 to i416
+%r149 = zext i32 %r55 to i416
+%r150 = shl i416 %r149, 384
+%r151 = or i416 %r148, %r150
+%r152 = zext i416 %r103 to i448
+%r153 = zext i416 %r151 to i448
+%r154 = shl i448 %r153, 32
+%r155 = add i448 %r152, %r154
+ret i448 %r155
+}
+define void @mcl_fp_mulUnitPre13L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i448 @mulPv416x32(i32* %r2, i32 %r3)
+%r5 = trunc i448 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i448 %r4, 32
+%r9 = trunc i448 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i448 %r8, 32
+%r13 = trunc i448 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i448 %r12, 32
+%r17 = trunc i448 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i448 %r16, 32
+%r21 = trunc i448 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i448 %r20, 32
+%r25 = trunc i448 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i448 %r24, 32
+%r29 = trunc i448 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i448 %r28, 32
+%r33 = trunc i448 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+%r36 = lshr i448 %r32, 32
+%r37 = trunc i448 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 8
+store i32 %r37, i32* %r39
+%r40 = lshr i448 %r36, 32
+%r41 = trunc i448 %r40 to i32
+%r43 = getelementptr i32, i32* %r1, i32 9
+store i32 %r41, i32* %r43
+%r44 = lshr i448 %r40, 32
+%r45 = trunc i448 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 10
+store i32 %r45, i32* %r47
+%r48 = lshr i448 %r44, 32
+%r49 = trunc i448 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 11
+store i32 %r49, i32* %r51
+%r52 = lshr i448 %r48, 32
+%r53 = trunc i448 %r52 to i32
+%r55 = getelementptr i32, i32* %r1, i32 12
+store i32 %r53, i32* %r55
+%r56 = lshr i448 %r52, 32
+%r57 = trunc i448 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 13
+store i32 %r57, i32* %r59
+ret void
+}
+define void @mcl_fpDbl_mulPre13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i448 @mulPv416x32(i32* %r2, i32 %r4)
+%r6 = trunc i448 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i448 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i448 @mulPv416x32(i32* %r2, i32 %r10)
+%r12 = add i448 %r7, %r11
+%r13 = trunc i448 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+%r16 = lshr i448 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i448 @mulPv416x32(i32* %r2, i32 %r19)
+%r21 = add i448 %r16, %r20
+%r22 = trunc i448 %r21 to i32
+%r24 = getelementptr i32, i32* %r1, i32 2
+store i32 %r22, i32* %r24
+%r25 = lshr i448 %r21, 32
+%r27 = getelementptr i32, i32* %r3, i32 3
+%r28 = load i32, i32* %r27
+%r29 = call i448 @mulPv416x32(i32* %r2, i32 %r28)
+%r30 = add i448 %r25, %r29
+%r31 = trunc i448 %r30 to i32
+%r33 = getelementptr i32, i32* %r1, i32 3
+store i32 %r31, i32* %r33
+%r34 = lshr i448 %r30, 32
+%r36 = getelementptr i32, i32* %r3, i32 4
+%r37 = load i32, i32* %r36
+%r38 = call i448 @mulPv416x32(i32* %r2, i32 %r37)
+%r39 = add i448 %r34, %r38
+%r40 = trunc i448 %r39 to i32
+%r42 = getelementptr i32, i32* %r1, i32 4
+store i32 %r40, i32* %r42
+%r43 = lshr i448 %r39, 32
+%r45 = getelementptr i32, i32* %r3, i32 5
+%r46 = load i32, i32* %r45
+%r47 = call i448 @mulPv416x32(i32* %r2, i32 %r46)
+%r48 = add i448 %r43, %r47
+%r49 = trunc i448 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 5
+store i32 %r49, i32* %r51
+%r52 = lshr i448 %r48, 32
+%r54 = getelementptr i32, i32* %r3, i32 6
+%r55 = load i32, i32* %r54
+%r56 = call i448 @mulPv416x32(i32* %r2, i32 %r55)
+%r57 = add i448 %r52, %r56
+%r58 = trunc i448 %r57 to i32
+%r60 = getelementptr i32, i32* %r1, i32 6
+store i32 %r58, i32* %r60
+%r61 = lshr i448 %r57, 32
+%r63 = getelementptr i32, i32* %r3, i32 7
+%r64 = load i32, i32* %r63
+%r65 = call i448 @mulPv416x32(i32* %r2, i32 %r64)
+%r66 = add i448 %r61, %r65
+%r67 = trunc i448 %r66 to i32
+%r69 = getelementptr i32, i32* %r1, i32 7
+store i32 %r67, i32* %r69
+%r70 = lshr i448 %r66, 32
+%r72 = getelementptr i32, i32* %r3, i32 8
+%r73 = load i32, i32* %r72
+%r74 = call i448 @mulPv416x32(i32* %r2, i32 %r73)
+%r75 = add i448 %r70, %r74
+%r76 = trunc i448 %r75 to i32
+%r78 = getelementptr i32, i32* %r1, i32 8
+store i32 %r76, i32* %r78
+%r79 = lshr i448 %r75, 32
+%r81 = getelementptr i32, i32* %r3, i32 9
+%r82 = load i32, i32* %r81
+%r83 = call i448 @mulPv416x32(i32* %r2, i32 %r82)
+%r84 = add i448 %r79, %r83
+%r85 = trunc i448 %r84 to i32
+%r87 = getelementptr i32, i32* %r1, i32 9
+store i32 %r85, i32* %r87
+%r88 = lshr i448 %r84, 32
+%r90 = getelementptr i32, i32* %r3, i32 10
+%r91 = load i32, i32* %r90
+%r92 = call i448 @mulPv416x32(i32* %r2, i32 %r91)
+%r93 = add i448 %r88, %r92
+%r94 = trunc i448 %r93 to i32
+%r96 = getelementptr i32, i32* %r1, i32 10
+store i32 %r94, i32* %r96
+%r97 = lshr i448 %r93, 32
+%r99 = getelementptr i32, i32* %r3, i32 11
+%r100 = load i32, i32* %r99
+%r101 = call i448 @mulPv416x32(i32* %r2, i32 %r100)
+%r102 = add i448 %r97, %r101
+%r103 = trunc i448 %r102 to i32
+%r105 = getelementptr i32, i32* %r1, i32 11
+store i32 %r103, i32* %r105
+%r106 = lshr i448 %r102, 32
+%r108 = getelementptr i32, i32* %r3, i32 12
+%r109 = load i32, i32* %r108
+%r110 = call i448 @mulPv416x32(i32* %r2, i32 %r109)
+%r111 = add i448 %r106, %r110
+%r113 = getelementptr i32, i32* %r1, i32 12
+%r114 = trunc i448 %r111 to i32
+%r116 = getelementptr i32, i32* %r113, i32 0
+store i32 %r114, i32* %r116
+%r117 = lshr i448 %r111, 32
+%r118 = trunc i448 %r117 to i32
+%r120 = getelementptr i32, i32* %r113, i32 1
+store i32 %r118, i32* %r120
+%r121 = lshr i448 %r117, 32
+%r122 = trunc i448 %r121 to i32
+%r124 = getelementptr i32, i32* %r113, i32 2
+store i32 %r122, i32* %r124
+%r125 = lshr i448 %r121, 32
+%r126 = trunc i448 %r125 to i32
+%r128 = getelementptr i32, i32* %r113, i32 3
+store i32 %r126, i32* %r128
+%r129 = lshr i448 %r125, 32
+%r130 = trunc i448 %r129 to i32
+%r132 = getelementptr i32, i32* %r113, i32 4
+store i32 %r130, i32* %r132
+%r133 = lshr i448 %r129, 32
+%r134 = trunc i448 %r133 to i32
+%r136 = getelementptr i32, i32* %r113, i32 5
+store i32 %r134, i32* %r136
+%r137 = lshr i448 %r133, 32
+%r138 = trunc i448 %r137 to i32
+%r140 = getelementptr i32, i32* %r113, i32 6
+store i32 %r138, i32* %r140
+%r141 = lshr i448 %r137, 32
+%r142 = trunc i448 %r141 to i32
+%r144 = getelementptr i32, i32* %r113, i32 7
+store i32 %r142, i32* %r144
+%r145 = lshr i448 %r141, 32
+%r146 = trunc i448 %r145 to i32
+%r148 = getelementptr i32, i32* %r113, i32 8
+store i32 %r146, i32* %r148
+%r149 = lshr i448 %r145, 32
+%r150 = trunc i448 %r149 to i32
+%r152 = getelementptr i32, i32* %r113, i32 9
+store i32 %r150, i32* %r152
+%r153 = lshr i448 %r149, 32
+%r154 = trunc i448 %r153 to i32
+%r156 = getelementptr i32, i32* %r113, i32 10
+store i32 %r154, i32* %r156
+%r157 = lshr i448 %r153, 32
+%r158 = trunc i448 %r157 to i32
+%r160 = getelementptr i32, i32* %r113, i32 11
+store i32 %r158, i32* %r160
+%r161 = lshr i448 %r157, 32
+%r162 = trunc i448 %r161 to i32
+%r164 = getelementptr i32, i32* %r113, i32 12
+store i32 %r162, i32* %r164
+%r165 = lshr i448 %r161, 32
+%r166 = trunc i448 %r165 to i32
+%r168 = getelementptr i32, i32* %r113, i32 13
+store i32 %r166, i32* %r168
+ret void
+}
+define void @mcl_fpDbl_sqrPre13L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i448 @mulPv416x32(i32* %r2, i32 %r3)
+%r5 = trunc i448 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i448 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i448 @mulPv416x32(i32* %r2, i32 %r9)
+%r11 = add i448 %r6, %r10
+%r12 = trunc i448 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+%r15 = lshr i448 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i448 @mulPv416x32(i32* %r2, i32 %r18)
+%r20 = add i448 %r15, %r19
+%r21 = trunc i448 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 2
+store i32 %r21, i32* %r23
+%r24 = lshr i448 %r20, 32
+%r26 = getelementptr i32, i32* %r2, i32 3
+%r27 = load i32, i32* %r26
+%r28 = call i448 @mulPv416x32(i32* %r2, i32 %r27)
+%r29 = add i448 %r24, %r28
+%r30 = trunc i448 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 3
+store i32 %r30, i32* %r32
+%r33 = lshr i448 %r29, 32
+%r35 = getelementptr i32, i32* %r2, i32 4
+%r36 = load i32, i32* %r35
+%r37 = call i448 @mulPv416x32(i32* %r2, i32 %r36)
+%r38 = add i448 %r33, %r37
+%r39 = trunc i448 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 4
+store i32 %r39, i32* %r41
+%r42 = lshr i448 %r38, 32
+%r44 = getelementptr i32, i32* %r2, i32 5
+%r45 = load i32, i32* %r44
+%r46 = call i448 @mulPv416x32(i32* %r2, i32 %r45)
+%r47 = add i448 %r42, %r46
+%r48 = trunc i448 %r47 to i32
+%r50 = getelementptr i32, i32* %r1, i32 5
+store i32 %r48, i32* %r50
+%r51 = lshr i448 %r47, 32
+%r53 = getelementptr i32, i32* %r2, i32 6
+%r54 = load i32, i32* %r53
+%r55 = call i448 @mulPv416x32(i32* %r2, i32 %r54)
+%r56 = add i448 %r51, %r55
+%r57 = trunc i448 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 6
+store i32 %r57, i32* %r59
+%r60 = lshr i448 %r56, 32
+%r62 = getelementptr i32, i32* %r2, i32 7
+%r63 = load i32, i32* %r62
+%r64 = call i448 @mulPv416x32(i32* %r2, i32 %r63)
+%r65 = add i448 %r60, %r64
+%r66 = trunc i448 %r65 to i32
+%r68 = getelementptr i32, i32* %r1, i32 7
+store i32 %r66, i32* %r68
+%r69 = lshr i448 %r65, 32
+%r71 = getelementptr i32, i32* %r2, i32 8
+%r72 = load i32, i32* %r71
+%r73 = call i448 @mulPv416x32(i32* %r2, i32 %r72)
+%r74 = add i448 %r69, %r73
+%r75 = trunc i448 %r74 to i32
+%r77 = getelementptr i32, i32* %r1, i32 8
+store i32 %r75, i32* %r77
+%r78 = lshr i448 %r74, 32
+%r80 = getelementptr i32, i32* %r2, i32 9
+%r81 = load i32, i32* %r80
+%r82 = call i448 @mulPv416x32(i32* %r2, i32 %r81)
+%r83 = add i448 %r78, %r82
+%r84 = trunc i448 %r83 to i32
+%r86 = getelementptr i32, i32* %r1, i32 9
+store i32 %r84, i32* %r86
+%r87 = lshr i448 %r83, 32
+%r89 = getelementptr i32, i32* %r2, i32 10
+%r90 = load i32, i32* %r89
+%r91 = call i448 @mulPv416x32(i32* %r2, i32 %r90)
+%r92 = add i448 %r87, %r91
+%r93 = trunc i448 %r92 to i32
+%r95 = getelementptr i32, i32* %r1, i32 10
+store i32 %r93, i32* %r95
+%r96 = lshr i448 %r92, 32
+%r98 = getelementptr i32, i32* %r2, i32 11
+%r99 = load i32, i32* %r98
+%r100 = call i448 @mulPv416x32(i32* %r2, i32 %r99)
+%r101 = add i448 %r96, %r100
+%r102 = trunc i448 %r101 to i32
+%r104 = getelementptr i32, i32* %r1, i32 11
+store i32 %r102, i32* %r104
+%r105 = lshr i448 %r101, 32
+%r107 = getelementptr i32, i32* %r2, i32 12
+%r108 = load i32, i32* %r107
+%r109 = call i448 @mulPv416x32(i32* %r2, i32 %r108)
+%r110 = add i448 %r105, %r109
+%r112 = getelementptr i32, i32* %r1, i32 12
+%r113 = trunc i448 %r110 to i32
+%r115 = getelementptr i32, i32* %r112, i32 0
+store i32 %r113, i32* %r115
+%r116 = lshr i448 %r110, 32
+%r117 = trunc i448 %r116 to i32
+%r119 = getelementptr i32, i32* %r112, i32 1
+store i32 %r117, i32* %r119
+%r120 = lshr i448 %r116, 32
+%r121 = trunc i448 %r120 to i32
+%r123 = getelementptr i32, i32* %r112, i32 2
+store i32 %r121, i32* %r123
+%r124 = lshr i448 %r120, 32
+%r125 = trunc i448 %r124 to i32
+%r127 = getelementptr i32, i32* %r112, i32 3
+store i32 %r125, i32* %r127
+%r128 = lshr i448 %r124, 32
+%r129 = trunc i448 %r128 to i32
+%r131 = getelementptr i32, i32* %r112, i32 4
+store i32 %r129, i32* %r131
+%r132 = lshr i448 %r128, 32
+%r133 = trunc i448 %r132 to i32
+%r135 = getelementptr i32, i32* %r112, i32 5
+store i32 %r133, i32* %r135
+%r136 = lshr i448 %r132, 32
+%r137 = trunc i448 %r136 to i32
+%r139 = getelementptr i32, i32* %r112, i32 6
+store i32 %r137, i32* %r139
+%r140 = lshr i448 %r136, 32
+%r141 = trunc i448 %r140 to i32
+%r143 = getelementptr i32, i32* %r112, i32 7
+store i32 %r141, i32* %r143
+%r144 = lshr i448 %r140, 32
+%r145 = trunc i448 %r144 to i32
+%r147 = getelementptr i32, i32* %r112, i32 8
+store i32 %r145, i32* %r147
+%r148 = lshr i448 %r144, 32
+%r149 = trunc i448 %r148 to i32
+%r151 = getelementptr i32, i32* %r112, i32 9
+store i32 %r149, i32* %r151
+%r152 = lshr i448 %r148, 32
+%r153 = trunc i448 %r152 to i32
+%r155 = getelementptr i32, i32* %r112, i32 10
+store i32 %r153, i32* %r155
+%r156 = lshr i448 %r152, 32
+%r157 = trunc i448 %r156 to i32
+%r159 = getelementptr i32, i32* %r112, i32 11
+store i32 %r157, i32* %r159
+%r160 = lshr i448 %r156, 32
+%r161 = trunc i448 %r160 to i32
+%r163 = getelementptr i32, i32* %r112, i32 12
+store i32 %r161, i32* %r163
+%r164 = lshr i448 %r160, 32
+%r165 = trunc i448 %r164 to i32
+%r167 = getelementptr i32, i32* %r112, i32 13
+store i32 %r165, i32* %r167
+ret void
+}
+define void @mcl_fp_mont13L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i448 @mulPv416x32(i32* %r2, i32 %r10)
+%r12 = zext i448 %r11 to i480
+%r13 = trunc i448 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i448 @mulPv416x32(i32* %r4, i32 %r14)
+%r16 = zext i448 %r15 to i480
+%r17 = add i480 %r12, %r16
+%r18 = lshr i480 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i448 @mulPv416x32(i32* %r2, i32 %r21)
+%r23 = zext i448 %r22 to i480
+%r24 = add i480 %r18, %r23
+%r25 = trunc i480 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i448 @mulPv416x32(i32* %r4, i32 %r26)
+%r28 = zext i448 %r27 to i480
+%r29 = add i480 %r24, %r28
+%r30 = lshr i480 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i448 @mulPv416x32(i32* %r2, i32 %r33)
+%r35 = zext i448 %r34 to i480
+%r36 = add i480 %r30, %r35
+%r37 = trunc i480 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i448 @mulPv416x32(i32* %r4, i32 %r38)
+%r40 = zext i448 %r39 to i480
+%r41 = add i480 %r36, %r40
+%r42 = lshr i480 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i448 @mulPv416x32(i32* %r2, i32 %r45)
+%r47 = zext i448 %r46 to i480
+%r48 = add i480 %r42, %r47
+%r49 = trunc i480 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i448 @mulPv416x32(i32* %r4, i32 %r50)
+%r52 = zext i448 %r51 to i480
+%r53 = add i480 %r48, %r52
+%r54 = lshr i480 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i448 @mulPv416x32(i32* %r2, i32 %r57)
+%r59 = zext i448 %r58 to i480
+%r60 = add i480 %r54, %r59
+%r61 = trunc i480 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i448 @mulPv416x32(i32* %r4, i32 %r62)
+%r64 = zext i448 %r63 to i480
+%r65 = add i480 %r60, %r64
+%r66 = lshr i480 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i448 @mulPv416x32(i32* %r2, i32 %r69)
+%r71 = zext i448 %r70 to i480
+%r72 = add i480 %r66, %r71
+%r73 = trunc i480 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i448 @mulPv416x32(i32* %r4, i32 %r74)
+%r76 = zext i448 %r75 to i480
+%r77 = add i480 %r72, %r76
+%r78 = lshr i480 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i448 @mulPv416x32(i32* %r2, i32 %r81)
+%r83 = zext i448 %r82 to i480
+%r84 = add i480 %r78, %r83
+%r85 = trunc i480 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i448 @mulPv416x32(i32* %r4, i32 %r86)
+%r88 = zext i448 %r87 to i480
+%r89 = add i480 %r84, %r88
+%r90 = lshr i480 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i448 @mulPv416x32(i32* %r2, i32 %r93)
+%r95 = zext i448 %r94 to i480
+%r96 = add i480 %r90, %r95
+%r97 = trunc i480 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i448 @mulPv416x32(i32* %r4, i32 %r98)
+%r100 = zext i448 %r99 to i480
+%r101 = add i480 %r96, %r100
+%r102 = lshr i480 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i448 @mulPv416x32(i32* %r2, i32 %r105)
+%r107 = zext i448 %r106 to i480
+%r108 = add i480 %r102, %r107
+%r109 = trunc i480 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i448 @mulPv416x32(i32* %r4, i32 %r110)
+%r112 = zext i448 %r111 to i480
+%r113 = add i480 %r108, %r112
+%r114 = lshr i480 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 9
+%r117 = load i32, i32* %r116
+%r118 = call i448 @mulPv416x32(i32* %r2, i32 %r117)
+%r119 = zext i448 %r118 to i480
+%r120 = add i480 %r114, %r119
+%r121 = trunc i480 %r120 to i32
+%r122 = mul i32 %r121, %r7
+%r123 = call i448 @mulPv416x32(i32* %r4, i32 %r122)
+%r124 = zext i448 %r123 to i480
+%r125 = add i480 %r120, %r124
+%r126 = lshr i480 %r125, 32
+%r128 = getelementptr i32, i32* %r3, i32 10
+%r129 = load i32, i32* %r128
+%r130 = call i448 @mulPv416x32(i32* %r2, i32 %r129)
+%r131 = zext i448 %r130 to i480
+%r132 = add i480 %r126, %r131
+%r133 = trunc i480 %r132 to i32
+%r134 = mul i32 %r133, %r7
+%r135 = call i448 @mulPv416x32(i32* %r4, i32 %r134)
+%r136 = zext i448 %r135 to i480
+%r137 = add i480 %r132, %r136
+%r138 = lshr i480 %r137, 32
+%r140 = getelementptr i32, i32* %r3, i32 11
+%r141 = load i32, i32* %r140
+%r142 = call i448 @mulPv416x32(i32* %r2, i32 %r141)
+%r143 = zext i448 %r142 to i480
+%r144 = add i480 %r138, %r143
+%r145 = trunc i480 %r144 to i32
+%r146 = mul i32 %r145, %r7
+%r147 = call i448 @mulPv416x32(i32* %r4, i32 %r146)
+%r148 = zext i448 %r147 to i480
+%r149 = add i480 %r144, %r148
+%r150 = lshr i480 %r149, 32
+%r152 = getelementptr i32, i32* %r3, i32 12
+%r153 = load i32, i32* %r152
+%r154 = call i448 @mulPv416x32(i32* %r2, i32 %r153)
+%r155 = zext i448 %r154 to i480
+%r156 = add i480 %r150, %r155
+%r157 = trunc i480 %r156 to i32
+%r158 = mul i32 %r157, %r7
+%r159 = call i448 @mulPv416x32(i32* %r4, i32 %r158)
+%r160 = zext i448 %r159 to i480
+%r161 = add i480 %r156, %r160
+%r162 = lshr i480 %r161, 32
+%r163 = trunc i480 %r162 to i448
+%r164 = load i32, i32* %r4
+%r165 = zext i32 %r164 to i64
+%r167 = getelementptr i32, i32* %r4, i32 1
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i64
+%r170 = shl i64 %r169, 32
+%r171 = or i64 %r165, %r170
+%r172 = zext i64 %r171 to i96
+%r174 = getelementptr i32, i32* %r4, i32 2
+%r175 = load i32, i32* %r174
+%r176 = zext i32 %r175 to i96
+%r177 = shl i96 %r176, 64
+%r178 = or i96 %r172, %r177
+%r179 = zext i96 %r178 to i128
+%r181 = getelementptr i32, i32* %r4, i32 3
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i128
+%r184 = shl i128 %r183, 96
+%r185 = or i128 %r179, %r184
+%r186 = zext i128 %r185 to i160
+%r188 = getelementptr i32, i32* %r4, i32 4
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i160
+%r191 = shl i160 %r190, 128
+%r192 = or i160 %r186, %r191
+%r193 = zext i160 %r192 to i192
+%r195 = getelementptr i32, i32* %r4, i32 5
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i192
+%r198 = shl i192 %r197, 160
+%r199 = or i192 %r193, %r198
+%r200 = zext i192 %r199 to i224
+%r202 = getelementptr i32, i32* %r4, i32 6
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i224
+%r205 = shl i224 %r204, 192
+%r206 = or i224 %r200, %r205
+%r207 = zext i224 %r206 to i256
+%r209 = getelementptr i32, i32* %r4, i32 7
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i256
+%r212 = shl i256 %r211, 224
+%r213 = or i256 %r207, %r212
+%r214 = zext i256 %r213 to i288
+%r216 = getelementptr i32, i32* %r4, i32 8
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i288
+%r219 = shl i288 %r218, 256
+%r220 = or i288 %r214, %r219
+%r221 = zext i288 %r220 to i320
+%r223 = getelementptr i32, i32* %r4, i32 9
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i320
+%r226 = shl i320 %r225, 288
+%r227 = or i320 %r221, %r226
+%r228 = zext i320 %r227 to i352
+%r230 = getelementptr i32, i32* %r4, i32 10
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i352
+%r233 = shl i352 %r232, 320
+%r234 = or i352 %r228, %r233
+%r235 = zext i352 %r234 to i384
+%r237 = getelementptr i32, i32* %r4, i32 11
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i384
+%r240 = shl i384 %r239, 352
+%r241 = or i384 %r235, %r240
+%r242 = zext i384 %r241 to i416
+%r244 = getelementptr i32, i32* %r4, i32 12
+%r245 = load i32, i32* %r244
+%r246 = zext i32 %r245 to i416
+%r247 = shl i416 %r246, 384
+%r248 = or i416 %r242, %r247
+%r249 = zext i416 %r248 to i448
+%r250 = sub i448 %r163, %r249
+%r251 = lshr i448 %r250, 416
+%r252 = trunc i448 %r251 to i1
+%r253 = select i1 %r252, i448 %r163, i448 %r250
+%r254 = trunc i448 %r253 to i416
+%r255 = trunc i416 %r254 to i32
+%r257 = getelementptr i32, i32* %r1, i32 0
+store i32 %r255, i32* %r257
+%r258 = lshr i416 %r254, 32
+%r259 = trunc i416 %r258 to i32
+%r261 = getelementptr i32, i32* %r1, i32 1
+store i32 %r259, i32* %r261
+%r262 = lshr i416 %r258, 32
+%r263 = trunc i416 %r262 to i32
+%r265 = getelementptr i32, i32* %r1, i32 2
+store i32 %r263, i32* %r265
+%r266 = lshr i416 %r262, 32
+%r267 = trunc i416 %r266 to i32
+%r269 = getelementptr i32, i32* %r1, i32 3
+store i32 %r267, i32* %r269
+%r270 = lshr i416 %r266, 32
+%r271 = trunc i416 %r270 to i32
+%r273 = getelementptr i32, i32* %r1, i32 4
+store i32 %r271, i32* %r273
+%r274 = lshr i416 %r270, 32
+%r275 = trunc i416 %r274 to i32
+%r277 = getelementptr i32, i32* %r1, i32 5
+store i32 %r275, i32* %r277
+%r278 = lshr i416 %r274, 32
+%r279 = trunc i416 %r278 to i32
+%r281 = getelementptr i32, i32* %r1, i32 6
+store i32 %r279, i32* %r281
+%r282 = lshr i416 %r278, 32
+%r283 = trunc i416 %r282 to i32
+%r285 = getelementptr i32, i32* %r1, i32 7
+store i32 %r283, i32* %r285
+%r286 = lshr i416 %r282, 32
+%r287 = trunc i416 %r286 to i32
+%r289 = getelementptr i32, i32* %r1, i32 8
+store i32 %r287, i32* %r289
+%r290 = lshr i416 %r286, 32
+%r291 = trunc i416 %r290 to i32
+%r293 = getelementptr i32, i32* %r1, i32 9
+store i32 %r291, i32* %r293
+%r294 = lshr i416 %r290, 32
+%r295 = trunc i416 %r294 to i32
+%r297 = getelementptr i32, i32* %r1, i32 10
+store i32 %r295, i32* %r297
+%r298 = lshr i416 %r294, 32
+%r299 = trunc i416 %r298 to i32
+%r301 = getelementptr i32, i32* %r1, i32 11
+store i32 %r299, i32* %r301
+%r302 = lshr i416 %r298, 32
+%r303 = trunc i416 %r302 to i32
+%r305 = getelementptr i32, i32* %r1, i32 12
+store i32 %r303, i32* %r305
+ret void
+}
+define void @mcl_fp_montNF13L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i448 @mulPv416x32(i32* %r2, i32 %r8)
+%r10 = trunc i448 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i448 @mulPv416x32(i32* %r4, i32 %r11)
+%r13 = add i448 %r9, %r12
+%r14 = lshr i448 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i448 @mulPv416x32(i32* %r2, i32 %r17)
+%r19 = add i448 %r14, %r18
+%r20 = trunc i448 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i448 @mulPv416x32(i32* %r4, i32 %r21)
+%r23 = add i448 %r19, %r22
+%r24 = lshr i448 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i448 @mulPv416x32(i32* %r2, i32 %r27)
+%r29 = add i448 %r24, %r28
+%r30 = trunc i448 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i448 @mulPv416x32(i32* %r4, i32 %r31)
+%r33 = add i448 %r29, %r32
+%r34 = lshr i448 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i448 @mulPv416x32(i32* %r2, i32 %r37)
+%r39 = add i448 %r34, %r38
+%r40 = trunc i448 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i448 @mulPv416x32(i32* %r4, i32 %r41)
+%r43 = add i448 %r39, %r42
+%r44 = lshr i448 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i448 @mulPv416x32(i32* %r2, i32 %r47)
+%r49 = add i448 %r44, %r48
+%r50 = trunc i448 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i448 @mulPv416x32(i32* %r4, i32 %r51)
+%r53 = add i448 %r49, %r52
+%r54 = lshr i448 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i448 @mulPv416x32(i32* %r2, i32 %r57)
+%r59 = add i448 %r54, %r58
+%r60 = trunc i448 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i448 @mulPv416x32(i32* %r4, i32 %r61)
+%r63 = add i448 %r59, %r62
+%r64 = lshr i448 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i448 @mulPv416x32(i32* %r2, i32 %r67)
+%r69 = add i448 %r64, %r68
+%r70 = trunc i448 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i448 @mulPv416x32(i32* %r4, i32 %r71)
+%r73 = add i448 %r69, %r72
+%r74 = lshr i448 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i448 @mulPv416x32(i32* %r2, i32 %r77)
+%r79 = add i448 %r74, %r78
+%r80 = trunc i448 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i448 @mulPv416x32(i32* %r4, i32 %r81)
+%r83 = add i448 %r79, %r82
+%r84 = lshr i448 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i448 @mulPv416x32(i32* %r2, i32 %r87)
+%r89 = add i448 %r84, %r88
+%r90 = trunc i448 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i448 @mulPv416x32(i32* %r4, i32 %r91)
+%r93 = add i448 %r89, %r92
+%r94 = lshr i448 %r93, 32
+%r96 = getelementptr i32, i32* %r3, i32 9
+%r97 = load i32, i32* %r96
+%r98 = call i448 @mulPv416x32(i32* %r2, i32 %r97)
+%r99 = add i448 %r94, %r98
+%r100 = trunc i448 %r99 to i32
+%r101 = mul i32 %r100, %r7
+%r102 = call i448 @mulPv416x32(i32* %r4, i32 %r101)
+%r103 = add i448 %r99, %r102
+%r104 = lshr i448 %r103, 32
+%r106 = getelementptr i32, i32* %r3, i32 10
+%r107 = load i32, i32* %r106
+%r108 = call i448 @mulPv416x32(i32* %r2, i32 %r107)
+%r109 = add i448 %r104, %r108
+%r110 = trunc i448 %r109 to i32
+%r111 = mul i32 %r110, %r7
+%r112 = call i448 @mulPv416x32(i32* %r4, i32 %r111)
+%r113 = add i448 %r109, %r112
+%r114 = lshr i448 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 11
+%r117 = load i32, i32* %r116
+%r118 = call i448 @mulPv416x32(i32* %r2, i32 %r117)
+%r119 = add i448 %r114, %r118
+%r120 = trunc i448 %r119 to i32
+%r121 = mul i32 %r120, %r7
+%r122 = call i448 @mulPv416x32(i32* %r4, i32 %r121)
+%r123 = add i448 %r119, %r122
+%r124 = lshr i448 %r123, 32
+%r126 = getelementptr i32, i32* %r3, i32 12
+%r127 = load i32, i32* %r126
+%r128 = call i448 @mulPv416x32(i32* %r2, i32 %r127)
+%r129 = add i448 %r124, %r128
+%r130 = trunc i448 %r129 to i32
+%r131 = mul i32 %r130, %r7
+%r132 = call i448 @mulPv416x32(i32* %r4, i32 %r131)
+%r133 = add i448 %r129, %r132
+%r134 = lshr i448 %r133, 32
+%r135 = trunc i448 %r134 to i416
+%r136 = load i32, i32* %r4
+%r137 = zext i32 %r136 to i64
+%r139 = getelementptr i32, i32* %r4, i32 1
+%r140 = load i32, i32* %r139
+%r141 = zext i32 %r140 to i64
+%r142 = shl i64 %r141, 32
+%r143 = or i64 %r137, %r142
+%r144 = zext i64 %r143 to i96
+%r146 = getelementptr i32, i32* %r4, i32 2
+%r147 = load i32, i32* %r146
+%r148 = zext i32 %r147 to i96
+%r149 = shl i96 %r148, 64
+%r150 = or i96 %r144, %r149
+%r151 = zext i96 %r150 to i128
+%r153 = getelementptr i32, i32* %r4, i32 3
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i128
+%r156 = shl i128 %r155, 96
+%r157 = or i128 %r151, %r156
+%r158 = zext i128 %r157 to i160
+%r160 = getelementptr i32, i32* %r4, i32 4
+%r161 = load i32, i32* %r160
+%r162 = zext i32 %r161 to i160
+%r163 = shl i160 %r162, 128
+%r164 = or i160 %r158, %r163
+%r165 = zext i160 %r164 to i192
+%r167 = getelementptr i32, i32* %r4, i32 5
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i192
+%r170 = shl i192 %r169, 160
+%r171 = or i192 %r165, %r170
+%r172 = zext i192 %r171 to i224
+%r174 = getelementptr i32, i32* %r4, i32 6
+%r175 = load i32, i32* %r174
+%r176 = zext i32 %r175 to i224
+%r177 = shl i224 %r176, 192
+%r178 = or i224 %r172, %r177
+%r179 = zext i224 %r178 to i256
+%r181 = getelementptr i32, i32* %r4, i32 7
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i256
+%r184 = shl i256 %r183, 224
+%r185 = or i256 %r179, %r184
+%r186 = zext i256 %r185 to i288
+%r188 = getelementptr i32, i32* %r4, i32 8
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i288
+%r191 = shl i288 %r190, 256
+%r192 = or i288 %r186, %r191
+%r193 = zext i288 %r192 to i320
+%r195 = getelementptr i32, i32* %r4, i32 9
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i320
+%r198 = shl i320 %r197, 288
+%r199 = or i320 %r193, %r198
+%r200 = zext i320 %r199 to i352
+%r202 = getelementptr i32, i32* %r4, i32 10
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i352
+%r205 = shl i352 %r204, 320
+%r206 = or i352 %r200, %r205
+%r207 = zext i352 %r206 to i384
+%r209 = getelementptr i32, i32* %r4, i32 11
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i384
+%r212 = shl i384 %r211, 352
+%r213 = or i384 %r207, %r212
+%r214 = zext i384 %r213 to i416
+%r216 = getelementptr i32, i32* %r4, i32 12
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i416
+%r219 = shl i416 %r218, 384
+%r220 = or i416 %r214, %r219
+%r221 = sub i416 %r135, %r220
+%r222 = lshr i416 %r221, 415
+%r223 = trunc i416 %r222 to i1
+%r224 = select i1 %r223, i416 %r135, i416 %r221
+%r225 = trunc i416 %r224 to i32
+%r227 = getelementptr i32, i32* %r1, i32 0
+store i32 %r225, i32* %r227
+%r228 = lshr i416 %r224, 32
+%r229 = trunc i416 %r228 to i32
+%r231 = getelementptr i32, i32* %r1, i32 1
+store i32 %r229, i32* %r231
+%r232 = lshr i416 %r228, 32
+%r233 = trunc i416 %r232 to i32
+%r235 = getelementptr i32, i32* %r1, i32 2
+store i32 %r233, i32* %r235
+%r236 = lshr i416 %r232, 32
+%r237 = trunc i416 %r236 to i32
+%r239 = getelementptr i32, i32* %r1, i32 3
+store i32 %r237, i32* %r239
+%r240 = lshr i416 %r236, 32
+%r241 = trunc i416 %r240 to i32
+%r243 = getelementptr i32, i32* %r1, i32 4
+store i32 %r241, i32* %r243
+%r244 = lshr i416 %r240, 32
+%r245 = trunc i416 %r244 to i32
+%r247 = getelementptr i32, i32* %r1, i32 5
+store i32 %r245, i32* %r247
+%r248 = lshr i416 %r244, 32
+%r249 = trunc i416 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 6
+store i32 %r249, i32* %r251
+%r252 = lshr i416 %r248, 32
+%r253 = trunc i416 %r252 to i32
+%r255 = getelementptr i32, i32* %r1, i32 7
+store i32 %r253, i32* %r255
+%r256 = lshr i416 %r252, 32
+%r257 = trunc i416 %r256 to i32
+%r259 = getelementptr i32, i32* %r1, i32 8
+store i32 %r257, i32* %r259
+%r260 = lshr i416 %r256, 32
+%r261 = trunc i416 %r260 to i32
+%r263 = getelementptr i32, i32* %r1, i32 9
+store i32 %r261, i32* %r263
+%r264 = lshr i416 %r260, 32
+%r265 = trunc i416 %r264 to i32
+%r267 = getelementptr i32, i32* %r1, i32 10
+store i32 %r265, i32* %r267
+%r268 = lshr i416 %r264, 32
+%r269 = trunc i416 %r268 to i32
+%r271 = getelementptr i32, i32* %r1, i32 11
+store i32 %r269, i32* %r271
+%r272 = lshr i416 %r268, 32
+%r273 = trunc i416 %r272 to i32
+%r275 = getelementptr i32, i32* %r1, i32 12
+store i32 %r273, i32* %r275
+ret void
+}
+define void @mcl_fp_montRed13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i352
+%r73 = getelementptr i32, i32* %r3, i32 10
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i352
+%r76 = shl i352 %r75, 320
+%r77 = or i352 %r71, %r76
+%r78 = zext i352 %r77 to i384
+%r80 = getelementptr i32, i32* %r3, i32 11
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i384
+%r83 = shl i384 %r82, 352
+%r84 = or i384 %r78, %r83
+%r85 = zext i384 %r84 to i416
+%r87 = getelementptr i32, i32* %r3, i32 12
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i416
+%r90 = shl i416 %r89, 384
+%r91 = or i416 %r85, %r90
+%r92 = load i32, i32* %r2
+%r93 = zext i32 %r92 to i64
+%r95 = getelementptr i32, i32* %r2, i32 1
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i64
+%r98 = shl i64 %r97, 32
+%r99 = or i64 %r93, %r98
+%r100 = zext i64 %r99 to i96
+%r102 = getelementptr i32, i32* %r2, i32 2
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i96
+%r105 = shl i96 %r104, 64
+%r106 = or i96 %r100, %r105
+%r107 = zext i96 %r106 to i128
+%r109 = getelementptr i32, i32* %r2, i32 3
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i128
+%r112 = shl i128 %r111, 96
+%r113 = or i128 %r107, %r112
+%r114 = zext i128 %r113 to i160
+%r116 = getelementptr i32, i32* %r2, i32 4
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i160
+%r119 = shl i160 %r118, 128
+%r120 = or i160 %r114, %r119
+%r121 = zext i160 %r120 to i192
+%r123 = getelementptr i32, i32* %r2, i32 5
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i192
+%r126 = shl i192 %r125, 160
+%r127 = or i192 %r121, %r126
+%r128 = zext i192 %r127 to i224
+%r130 = getelementptr i32, i32* %r2, i32 6
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i224
+%r133 = shl i224 %r132, 192
+%r134 = or i224 %r128, %r133
+%r135 = zext i224 %r134 to i256
+%r137 = getelementptr i32, i32* %r2, i32 7
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i256
+%r140 = shl i256 %r139, 224
+%r141 = or i256 %r135, %r140
+%r142 = zext i256 %r141 to i288
+%r144 = getelementptr i32, i32* %r2, i32 8
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i288
+%r147 = shl i288 %r146, 256
+%r148 = or i288 %r142, %r147
+%r149 = zext i288 %r148 to i320
+%r151 = getelementptr i32, i32* %r2, i32 9
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i320
+%r154 = shl i320 %r153, 288
+%r155 = or i320 %r149, %r154
+%r156 = zext i320 %r155 to i352
+%r158 = getelementptr i32, i32* %r2, i32 10
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i352
+%r161 = shl i352 %r160, 320
+%r162 = or i352 %r156, %r161
+%r163 = zext i352 %r162 to i384
+%r165 = getelementptr i32, i32* %r2, i32 11
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i384
+%r168 = shl i384 %r167, 352
+%r169 = or i384 %r163, %r168
+%r170 = zext i384 %r169 to i416
+%r172 = getelementptr i32, i32* %r2, i32 12
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i416
+%r175 = shl i416 %r174, 384
+%r176 = or i416 %r170, %r175
+%r177 = zext i416 %r176 to i448
+%r179 = getelementptr i32, i32* %r2, i32 13
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i448
+%r182 = shl i448 %r181, 416
+%r183 = or i448 %r177, %r182
+%r184 = zext i448 %r183 to i480
+%r186 = getelementptr i32, i32* %r2, i32 14
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i480
+%r189 = shl i480 %r188, 448
+%r190 = or i480 %r184, %r189
+%r191 = zext i480 %r190 to i512
+%r193 = getelementptr i32, i32* %r2, i32 15
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i512
+%r196 = shl i512 %r195, 480
+%r197 = or i512 %r191, %r196
+%r198 = zext i512 %r197 to i544
+%r200 = getelementptr i32, i32* %r2, i32 16
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i544
+%r203 = shl i544 %r202, 512
+%r204 = or i544 %r198, %r203
+%r205 = zext i544 %r204 to i576
+%r207 = getelementptr i32, i32* %r2, i32 17
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i576
+%r210 = shl i576 %r209, 544
+%r211 = or i576 %r205, %r210
+%r212 = zext i576 %r211 to i608
+%r214 = getelementptr i32, i32* %r2, i32 18
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i608
+%r217 = shl i608 %r216, 576
+%r218 = or i608 %r212, %r217
+%r219 = zext i608 %r218 to i640
+%r221 = getelementptr i32, i32* %r2, i32 19
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i640
+%r224 = shl i640 %r223, 608
+%r225 = or i640 %r219, %r224
+%r226 = zext i640 %r225 to i672
+%r228 = getelementptr i32, i32* %r2, i32 20
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i672
+%r231 = shl i672 %r230, 640
+%r232 = or i672 %r226, %r231
+%r233 = zext i672 %r232 to i704
+%r235 = getelementptr i32, i32* %r2, i32 21
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i704
+%r238 = shl i704 %r237, 672
+%r239 = or i704 %r233, %r238
+%r240 = zext i704 %r239 to i736
+%r242 = getelementptr i32, i32* %r2, i32 22
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i736
+%r245 = shl i736 %r244, 704
+%r246 = or i736 %r240, %r245
+%r247 = zext i736 %r246 to i768
+%r249 = getelementptr i32, i32* %r2, i32 23
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i768
+%r252 = shl i768 %r251, 736
+%r253 = or i768 %r247, %r252
+%r254 = zext i768 %r253 to i800
+%r256 = getelementptr i32, i32* %r2, i32 24
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i800
+%r259 = shl i800 %r258, 768
+%r260 = or i800 %r254, %r259
+%r261 = zext i800 %r260 to i832
+%r263 = getelementptr i32, i32* %r2, i32 25
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i832
+%r266 = shl i832 %r265, 800
+%r267 = or i832 %r261, %r266
+%r268 = zext i832 %r267 to i864
+%r269 = trunc i864 %r268 to i32
+%r270 = mul i32 %r269, %r6
+%r271 = call i448 @mulPv416x32(i32* %r3, i32 %r270)
+%r272 = zext i448 %r271 to i864
+%r273 = add i864 %r268, %r272
+%r274 = lshr i864 %r273, 32
+%r275 = trunc i864 %r274 to i832
+%r276 = trunc i832 %r275 to i32
+%r277 = mul i32 %r276, %r6
+%r278 = call i448 @mulPv416x32(i32* %r3, i32 %r277)
+%r279 = zext i448 %r278 to i832
+%r280 = add i832 %r275, %r279
+%r281 = lshr i832 %r280, 32
+%r282 = trunc i832 %r281 to i800
+%r283 = trunc i800 %r282 to i32
+%r284 = mul i32 %r283, %r6
+%r285 = call i448 @mulPv416x32(i32* %r3, i32 %r284)
+%r286 = zext i448 %r285 to i800
+%r287 = add i800 %r282, %r286
+%r288 = lshr i800 %r287, 32
+%r289 = trunc i800 %r288 to i768
+%r290 = trunc i768 %r289 to i32
+%r291 = mul i32 %r290, %r6
+%r292 = call i448 @mulPv416x32(i32* %r3, i32 %r291)
+%r293 = zext i448 %r292 to i768
+%r294 = add i768 %r289, %r293
+%r295 = lshr i768 %r294, 32
+%r296 = trunc i768 %r295 to i736
+%r297 = trunc i736 %r296 to i32
+%r298 = mul i32 %r297, %r6
+%r299 = call i448 @mulPv416x32(i32* %r3, i32 %r298)
+%r300 = zext i448 %r299 to i736
+%r301 = add i736 %r296, %r300
+%r302 = lshr i736 %r301, 32
+%r303 = trunc i736 %r302 to i704
+%r304 = trunc i704 %r303 to i32
+%r305 = mul i32 %r304, %r6
+%r306 = call i448 @mulPv416x32(i32* %r3, i32 %r305)
+%r307 = zext i448 %r306 to i704
+%r308 = add i704 %r303, %r307
+%r309 = lshr i704 %r308, 32
+%r310 = trunc i704 %r309 to i672
+%r311 = trunc i672 %r310 to i32
+%r312 = mul i32 %r311, %r6
+%r313 = call i448 @mulPv416x32(i32* %r3, i32 %r312)
+%r314 = zext i448 %r313 to i672
+%r315 = add i672 %r310, %r314
+%r316 = lshr i672 %r315, 32
+%r317 = trunc i672 %r316 to i640
+%r318 = trunc i640 %r317 to i32
+%r319 = mul i32 %r318, %r6
+%r320 = call i448 @mulPv416x32(i32* %r3, i32 %r319)
+%r321 = zext i448 %r320 to i640
+%r322 = add i640 %r317, %r321
+%r323 = lshr i640 %r322, 32
+%r324 = trunc i640 %r323 to i608
+%r325 = trunc i608 %r324 to i32
+%r326 = mul i32 %r325, %r6
+%r327 = call i448 @mulPv416x32(i32* %r3, i32 %r326)
+%r328 = zext i448 %r327 to i608
+%r329 = add i608 %r324, %r328
+%r330 = lshr i608 %r329, 32
+%r331 = trunc i608 %r330 to i576
+%r332 = trunc i576 %r331 to i32
+%r333 = mul i32 %r332, %r6
+%r334 = call i448 @mulPv416x32(i32* %r3, i32 %r333)
+%r335 = zext i448 %r334 to i576
+%r336 = add i576 %r331, %r335
+%r337 = lshr i576 %r336, 32
+%r338 = trunc i576 %r337 to i544
+%r339 = trunc i544 %r338 to i32
+%r340 = mul i32 %r339, %r6
+%r341 = call i448 @mulPv416x32(i32* %r3, i32 %r340)
+%r342 = zext i448 %r341 to i544
+%r343 = add i544 %r338, %r342
+%r344 = lshr i544 %r343, 32
+%r345 = trunc i544 %r344 to i512
+%r346 = trunc i512 %r345 to i32
+%r347 = mul i32 %r346, %r6
+%r348 = call i448 @mulPv416x32(i32* %r3, i32 %r347)
+%r349 = zext i448 %r348 to i512
+%r350 = add i512 %r345, %r349
+%r351 = lshr i512 %r350, 32
+%r352 = trunc i512 %r351 to i480
+%r353 = trunc i480 %r352 to i32
+%r354 = mul i32 %r353, %r6
+%r355 = call i448 @mulPv416x32(i32* %r3, i32 %r354)
+%r356 = zext i448 %r355 to i480
+%r357 = add i480 %r352, %r356
+%r358 = lshr i480 %r357, 32
+%r359 = trunc i480 %r358 to i448
+%r360 = zext i416 %r91 to i448
+%r361 = sub i448 %r359, %r360
+%r362 = lshr i448 %r361, 416
+%r363 = trunc i448 %r362 to i1
+%r364 = select i1 %r363, i448 %r359, i448 %r361
+%r365 = trunc i448 %r364 to i416
+%r366 = trunc i416 %r365 to i32
+%r368 = getelementptr i32, i32* %r1, i32 0
+store i32 %r366, i32* %r368
+%r369 = lshr i416 %r365, 32
+%r370 = trunc i416 %r369 to i32
+%r372 = getelementptr i32, i32* %r1, i32 1
+store i32 %r370, i32* %r372
+%r373 = lshr i416 %r369, 32
+%r374 = trunc i416 %r373 to i32
+%r376 = getelementptr i32, i32* %r1, i32 2
+store i32 %r374, i32* %r376
+%r377 = lshr i416 %r373, 32
+%r378 = trunc i416 %r377 to i32
+%r380 = getelementptr i32, i32* %r1, i32 3
+store i32 %r378, i32* %r380
+%r381 = lshr i416 %r377, 32
+%r382 = trunc i416 %r381 to i32
+%r384 = getelementptr i32, i32* %r1, i32 4
+store i32 %r382, i32* %r384
+%r385 = lshr i416 %r381, 32
+%r386 = trunc i416 %r385 to i32
+%r388 = getelementptr i32, i32* %r1, i32 5
+store i32 %r386, i32* %r388
+%r389 = lshr i416 %r385, 32
+%r390 = trunc i416 %r389 to i32
+%r392 = getelementptr i32, i32* %r1, i32 6
+store i32 %r390, i32* %r392
+%r393 = lshr i416 %r389, 32
+%r394 = trunc i416 %r393 to i32
+%r396 = getelementptr i32, i32* %r1, i32 7
+store i32 %r394, i32* %r396
+%r397 = lshr i416 %r393, 32
+%r398 = trunc i416 %r397 to i32
+%r400 = getelementptr i32, i32* %r1, i32 8
+store i32 %r398, i32* %r400
+%r401 = lshr i416 %r397, 32
+%r402 = trunc i416 %r401 to i32
+%r404 = getelementptr i32, i32* %r1, i32 9
+store i32 %r402, i32* %r404
+%r405 = lshr i416 %r401, 32
+%r406 = trunc i416 %r405 to i32
+%r408 = getelementptr i32, i32* %r1, i32 10
+store i32 %r406, i32* %r408
+%r409 = lshr i416 %r405, 32
+%r410 = trunc i416 %r409 to i32
+%r412 = getelementptr i32, i32* %r1, i32 11
+store i32 %r410, i32* %r412
+%r413 = lshr i416 %r409, 32
+%r414 = trunc i416 %r413 to i32
+%r416 = getelementptr i32, i32* %r1, i32 12
+store i32 %r414, i32* %r416
+ret void
+}
+define i32 @mcl_fp_addPre13L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r3, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r91 = load i32, i32* %r4
+%r92 = zext i32 %r91 to i64
+%r94 = getelementptr i32, i32* %r4, i32 1
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i64
+%r97 = shl i64 %r96, 32
+%r98 = or i64 %r92, %r97
+%r99 = zext i64 %r98 to i96
+%r101 = getelementptr i32, i32* %r4, i32 2
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i96
+%r104 = shl i96 %r103, 64
+%r105 = or i96 %r99, %r104
+%r106 = zext i96 %r105 to i128
+%r108 = getelementptr i32, i32* %r4, i32 3
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i128
+%r111 = shl i128 %r110, 96
+%r112 = or i128 %r106, %r111
+%r113 = zext i128 %r112 to i160
+%r115 = getelementptr i32, i32* %r4, i32 4
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i160
+%r118 = shl i160 %r117, 128
+%r119 = or i160 %r113, %r118
+%r120 = zext i160 %r119 to i192
+%r122 = getelementptr i32, i32* %r4, i32 5
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i192
+%r125 = shl i192 %r124, 160
+%r126 = or i192 %r120, %r125
+%r127 = zext i192 %r126 to i224
+%r129 = getelementptr i32, i32* %r4, i32 6
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i224
+%r132 = shl i224 %r131, 192
+%r133 = or i224 %r127, %r132
+%r134 = zext i224 %r133 to i256
+%r136 = getelementptr i32, i32* %r4, i32 7
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i256
+%r139 = shl i256 %r138, 224
+%r140 = or i256 %r134, %r139
+%r141 = zext i256 %r140 to i288
+%r143 = getelementptr i32, i32* %r4, i32 8
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i288
+%r146 = shl i288 %r145, 256
+%r147 = or i288 %r141, %r146
+%r148 = zext i288 %r147 to i320
+%r150 = getelementptr i32, i32* %r4, i32 9
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i320
+%r153 = shl i320 %r152, 288
+%r154 = or i320 %r148, %r153
+%r155 = zext i320 %r154 to i352
+%r157 = getelementptr i32, i32* %r4, i32 10
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i352
+%r160 = shl i352 %r159, 320
+%r161 = or i352 %r155, %r160
+%r162 = zext i352 %r161 to i384
+%r164 = getelementptr i32, i32* %r4, i32 11
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i384
+%r167 = shl i384 %r166, 352
+%r168 = or i384 %r162, %r167
+%r169 = zext i384 %r168 to i416
+%r171 = getelementptr i32, i32* %r4, i32 12
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i416
+%r174 = shl i416 %r173, 384
+%r175 = or i416 %r169, %r174
+%r176 = zext i416 %r175 to i448
+%r177 = add i448 %r90, %r176
+%r178 = trunc i448 %r177 to i416
+%r179 = trunc i416 %r178 to i32
+%r181 = getelementptr i32, i32* %r2, i32 0
+store i32 %r179, i32* %r181
+%r182 = lshr i416 %r178, 32
+%r183 = trunc i416 %r182 to i32
+%r185 = getelementptr i32, i32* %r2, i32 1
+store i32 %r183, i32* %r185
+%r186 = lshr i416 %r182, 32
+%r187 = trunc i416 %r186 to i32
+%r189 = getelementptr i32, i32* %r2, i32 2
+store i32 %r187, i32* %r189
+%r190 = lshr i416 %r186, 32
+%r191 = trunc i416 %r190 to i32
+%r193 = getelementptr i32, i32* %r2, i32 3
+store i32 %r191, i32* %r193
+%r194 = lshr i416 %r190, 32
+%r195 = trunc i416 %r194 to i32
+%r197 = getelementptr i32, i32* %r2, i32 4
+store i32 %r195, i32* %r197
+%r198 = lshr i416 %r194, 32
+%r199 = trunc i416 %r198 to i32
+%r201 = getelementptr i32, i32* %r2, i32 5
+store i32 %r199, i32* %r201
+%r202 = lshr i416 %r198, 32
+%r203 = trunc i416 %r202 to i32
+%r205 = getelementptr i32, i32* %r2, i32 6
+store i32 %r203, i32* %r205
+%r206 = lshr i416 %r202, 32
+%r207 = trunc i416 %r206 to i32
+%r209 = getelementptr i32, i32* %r2, i32 7
+store i32 %r207, i32* %r209
+%r210 = lshr i416 %r206, 32
+%r211 = trunc i416 %r210 to i32
+%r213 = getelementptr i32, i32* %r2, i32 8
+store i32 %r211, i32* %r213
+%r214 = lshr i416 %r210, 32
+%r215 = trunc i416 %r214 to i32
+%r217 = getelementptr i32, i32* %r2, i32 9
+store i32 %r215, i32* %r217
+%r218 = lshr i416 %r214, 32
+%r219 = trunc i416 %r218 to i32
+%r221 = getelementptr i32, i32* %r2, i32 10
+store i32 %r219, i32* %r221
+%r222 = lshr i416 %r218, 32
+%r223 = trunc i416 %r222 to i32
+%r225 = getelementptr i32, i32* %r2, i32 11
+store i32 %r223, i32* %r225
+%r226 = lshr i416 %r222, 32
+%r227 = trunc i416 %r226 to i32
+%r229 = getelementptr i32, i32* %r2, i32 12
+store i32 %r227, i32* %r229
+%r230 = lshr i448 %r177, 416
+%r231 = trunc i448 %r230 to i32
+ret i32 %r231
+}
+define i32 @mcl_fp_subPre13L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r3, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r91 = load i32, i32* %r4
+%r92 = zext i32 %r91 to i64
+%r94 = getelementptr i32, i32* %r4, i32 1
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i64
+%r97 = shl i64 %r96, 32
+%r98 = or i64 %r92, %r97
+%r99 = zext i64 %r98 to i96
+%r101 = getelementptr i32, i32* %r4, i32 2
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i96
+%r104 = shl i96 %r103, 64
+%r105 = or i96 %r99, %r104
+%r106 = zext i96 %r105 to i128
+%r108 = getelementptr i32, i32* %r4, i32 3
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i128
+%r111 = shl i128 %r110, 96
+%r112 = or i128 %r106, %r111
+%r113 = zext i128 %r112 to i160
+%r115 = getelementptr i32, i32* %r4, i32 4
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i160
+%r118 = shl i160 %r117, 128
+%r119 = or i160 %r113, %r118
+%r120 = zext i160 %r119 to i192
+%r122 = getelementptr i32, i32* %r4, i32 5
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i192
+%r125 = shl i192 %r124, 160
+%r126 = or i192 %r120, %r125
+%r127 = zext i192 %r126 to i224
+%r129 = getelementptr i32, i32* %r4, i32 6
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i224
+%r132 = shl i224 %r131, 192
+%r133 = or i224 %r127, %r132
+%r134 = zext i224 %r133 to i256
+%r136 = getelementptr i32, i32* %r4, i32 7
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i256
+%r139 = shl i256 %r138, 224
+%r140 = or i256 %r134, %r139
+%r141 = zext i256 %r140 to i288
+%r143 = getelementptr i32, i32* %r4, i32 8
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i288
+%r146 = shl i288 %r145, 256
+%r147 = or i288 %r141, %r146
+%r148 = zext i288 %r147 to i320
+%r150 = getelementptr i32, i32* %r4, i32 9
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i320
+%r153 = shl i320 %r152, 288
+%r154 = or i320 %r148, %r153
+%r155 = zext i320 %r154 to i352
+%r157 = getelementptr i32, i32* %r4, i32 10
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i352
+%r160 = shl i352 %r159, 320
+%r161 = or i352 %r155, %r160
+%r162 = zext i352 %r161 to i384
+%r164 = getelementptr i32, i32* %r4, i32 11
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i384
+%r167 = shl i384 %r166, 352
+%r168 = or i384 %r162, %r167
+%r169 = zext i384 %r168 to i416
+%r171 = getelementptr i32, i32* %r4, i32 12
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i416
+%r174 = shl i416 %r173, 384
+%r175 = or i416 %r169, %r174
+%r176 = zext i416 %r175 to i448
+%r177 = sub i448 %r90, %r176
+%r178 = trunc i448 %r177 to i416
+%r179 = trunc i416 %r178 to i32
+%r181 = getelementptr i32, i32* %r2, i32 0
+store i32 %r179, i32* %r181
+%r182 = lshr i416 %r178, 32
+%r183 = trunc i416 %r182 to i32
+%r185 = getelementptr i32, i32* %r2, i32 1
+store i32 %r183, i32* %r185
+%r186 = lshr i416 %r182, 32
+%r187 = trunc i416 %r186 to i32
+%r189 = getelementptr i32, i32* %r2, i32 2
+store i32 %r187, i32* %r189
+%r190 = lshr i416 %r186, 32
+%r191 = trunc i416 %r190 to i32
+%r193 = getelementptr i32, i32* %r2, i32 3
+store i32 %r191, i32* %r193
+%r194 = lshr i416 %r190, 32
+%r195 = trunc i416 %r194 to i32
+%r197 = getelementptr i32, i32* %r2, i32 4
+store i32 %r195, i32* %r197
+%r198 = lshr i416 %r194, 32
+%r199 = trunc i416 %r198 to i32
+%r201 = getelementptr i32, i32* %r2, i32 5
+store i32 %r199, i32* %r201
+%r202 = lshr i416 %r198, 32
+%r203 = trunc i416 %r202 to i32
+%r205 = getelementptr i32, i32* %r2, i32 6
+store i32 %r203, i32* %r205
+%r206 = lshr i416 %r202, 32
+%r207 = trunc i416 %r206 to i32
+%r209 = getelementptr i32, i32* %r2, i32 7
+store i32 %r207, i32* %r209
+%r210 = lshr i416 %r206, 32
+%r211 = trunc i416 %r210 to i32
+%r213 = getelementptr i32, i32* %r2, i32 8
+store i32 %r211, i32* %r213
+%r214 = lshr i416 %r210, 32
+%r215 = trunc i416 %r214 to i32
+%r217 = getelementptr i32, i32* %r2, i32 9
+store i32 %r215, i32* %r217
+%r218 = lshr i416 %r214, 32
+%r219 = trunc i416 %r218 to i32
+%r221 = getelementptr i32, i32* %r2, i32 10
+store i32 %r219, i32* %r221
+%r222 = lshr i416 %r218, 32
+%r223 = trunc i416 %r222 to i32
+%r225 = getelementptr i32, i32* %r2, i32 11
+store i32 %r223, i32* %r225
+%r226 = lshr i416 %r222, 32
+%r227 = trunc i416 %r226 to i32
+%r229 = getelementptr i32, i32* %r2, i32 12
+store i32 %r227, i32* %r229
+%r230 = lshr i448 %r177, 416
+%r231 = trunc i448 %r230 to i32
+%r233 = and i32 %r231, 1
+ret i32 %r233
+}
+define void @mcl_fp_shr1_13L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = zext i256 %r52 to i288
+%r55 = getelementptr i32, i32* %r2, i32 8
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i288
+%r58 = shl i288 %r57, 256
+%r59 = or i288 %r53, %r58
+%r60 = zext i288 %r59 to i320
+%r62 = getelementptr i32, i32* %r2, i32 9
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i320
+%r65 = shl i320 %r64, 288
+%r66 = or i320 %r60, %r65
+%r67 = zext i320 %r66 to i352
+%r69 = getelementptr i32, i32* %r2, i32 10
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i352
+%r72 = shl i352 %r71, 320
+%r73 = or i352 %r67, %r72
+%r74 = zext i352 %r73 to i384
+%r76 = getelementptr i32, i32* %r2, i32 11
+%r77 = load i32, i32* %r76
+%r78 = zext i32 %r77 to i384
+%r79 = shl i384 %r78, 352
+%r80 = or i384 %r74, %r79
+%r81 = zext i384 %r80 to i416
+%r83 = getelementptr i32, i32* %r2, i32 12
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i416
+%r86 = shl i416 %r85, 384
+%r87 = or i416 %r81, %r86
+%r88 = lshr i416 %r87, 1
+%r89 = trunc i416 %r88 to i32
+%r91 = getelementptr i32, i32* %r1, i32 0
+store i32 %r89, i32* %r91
+%r92 = lshr i416 %r88, 32
+%r93 = trunc i416 %r92 to i32
+%r95 = getelementptr i32, i32* %r1, i32 1
+store i32 %r93, i32* %r95
+%r96 = lshr i416 %r92, 32
+%r97 = trunc i416 %r96 to i32
+%r99 = getelementptr i32, i32* %r1, i32 2
+store i32 %r97, i32* %r99
+%r100 = lshr i416 %r96, 32
+%r101 = trunc i416 %r100 to i32
+%r103 = getelementptr i32, i32* %r1, i32 3
+store i32 %r101, i32* %r103
+%r104 = lshr i416 %r100, 32
+%r105 = trunc i416 %r104 to i32
+%r107 = getelementptr i32, i32* %r1, i32 4
+store i32 %r105, i32* %r107
+%r108 = lshr i416 %r104, 32
+%r109 = trunc i416 %r108 to i32
+%r111 = getelementptr i32, i32* %r1, i32 5
+store i32 %r109, i32* %r111
+%r112 = lshr i416 %r108, 32
+%r113 = trunc i416 %r112 to i32
+%r115 = getelementptr i32, i32* %r1, i32 6
+store i32 %r113, i32* %r115
+%r116 = lshr i416 %r112, 32
+%r117 = trunc i416 %r116 to i32
+%r119 = getelementptr i32, i32* %r1, i32 7
+store i32 %r117, i32* %r119
+%r120 = lshr i416 %r116, 32
+%r121 = trunc i416 %r120 to i32
+%r123 = getelementptr i32, i32* %r1, i32 8
+store i32 %r121, i32* %r123
+%r124 = lshr i416 %r120, 32
+%r125 = trunc i416 %r124 to i32
+%r127 = getelementptr i32, i32* %r1, i32 9
+store i32 %r125, i32* %r127
+%r128 = lshr i416 %r124, 32
+%r129 = trunc i416 %r128 to i32
+%r131 = getelementptr i32, i32* %r1, i32 10
+store i32 %r129, i32* %r131
+%r132 = lshr i416 %r128, 32
+%r133 = trunc i416 %r132 to i32
+%r135 = getelementptr i32, i32* %r1, i32 11
+store i32 %r133, i32* %r135
+%r136 = lshr i416 %r132, 32
+%r137 = trunc i416 %r136 to i32
+%r139 = getelementptr i32, i32* %r1, i32 12
+store i32 %r137, i32* %r139
+ret void
+}
+define void @mcl_fp_add13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = load i32, i32* %r3
+%r91 = zext i32 %r90 to i64
+%r93 = getelementptr i32, i32* %r3, i32 1
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i64
+%r96 = shl i64 %r95, 32
+%r97 = or i64 %r91, %r96
+%r98 = zext i64 %r97 to i96
+%r100 = getelementptr i32, i32* %r3, i32 2
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i96
+%r103 = shl i96 %r102, 64
+%r104 = or i96 %r98, %r103
+%r105 = zext i96 %r104 to i128
+%r107 = getelementptr i32, i32* %r3, i32 3
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i128
+%r110 = shl i128 %r109, 96
+%r111 = or i128 %r105, %r110
+%r112 = zext i128 %r111 to i160
+%r114 = getelementptr i32, i32* %r3, i32 4
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i160
+%r117 = shl i160 %r116, 128
+%r118 = or i160 %r112, %r117
+%r119 = zext i160 %r118 to i192
+%r121 = getelementptr i32, i32* %r3, i32 5
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i192
+%r124 = shl i192 %r123, 160
+%r125 = or i192 %r119, %r124
+%r126 = zext i192 %r125 to i224
+%r128 = getelementptr i32, i32* %r3, i32 6
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i224
+%r131 = shl i224 %r130, 192
+%r132 = or i224 %r126, %r131
+%r133 = zext i224 %r132 to i256
+%r135 = getelementptr i32, i32* %r3, i32 7
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i256
+%r138 = shl i256 %r137, 224
+%r139 = or i256 %r133, %r138
+%r140 = zext i256 %r139 to i288
+%r142 = getelementptr i32, i32* %r3, i32 8
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i288
+%r145 = shl i288 %r144, 256
+%r146 = or i288 %r140, %r145
+%r147 = zext i288 %r146 to i320
+%r149 = getelementptr i32, i32* %r3, i32 9
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i320
+%r152 = shl i320 %r151, 288
+%r153 = or i320 %r147, %r152
+%r154 = zext i320 %r153 to i352
+%r156 = getelementptr i32, i32* %r3, i32 10
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i352
+%r159 = shl i352 %r158, 320
+%r160 = or i352 %r154, %r159
+%r161 = zext i352 %r160 to i384
+%r163 = getelementptr i32, i32* %r3, i32 11
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i384
+%r166 = shl i384 %r165, 352
+%r167 = or i384 %r161, %r166
+%r168 = zext i384 %r167 to i416
+%r170 = getelementptr i32, i32* %r3, i32 12
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i416
+%r173 = shl i416 %r172, 384
+%r174 = or i416 %r168, %r173
+%r175 = zext i416 %r89 to i448
+%r176 = zext i416 %r174 to i448
+%r177 = add i448 %r175, %r176
+%r178 = trunc i448 %r177 to i416
+%r179 = trunc i416 %r178 to i32
+%r181 = getelementptr i32, i32* %r1, i32 0
+store i32 %r179, i32* %r181
+%r182 = lshr i416 %r178, 32
+%r183 = trunc i416 %r182 to i32
+%r185 = getelementptr i32, i32* %r1, i32 1
+store i32 %r183, i32* %r185
+%r186 = lshr i416 %r182, 32
+%r187 = trunc i416 %r186 to i32
+%r189 = getelementptr i32, i32* %r1, i32 2
+store i32 %r187, i32* %r189
+%r190 = lshr i416 %r186, 32
+%r191 = trunc i416 %r190 to i32
+%r193 = getelementptr i32, i32* %r1, i32 3
+store i32 %r191, i32* %r193
+%r194 = lshr i416 %r190, 32
+%r195 = trunc i416 %r194 to i32
+%r197 = getelementptr i32, i32* %r1, i32 4
+store i32 %r195, i32* %r197
+%r198 = lshr i416 %r194, 32
+%r199 = trunc i416 %r198 to i32
+%r201 = getelementptr i32, i32* %r1, i32 5
+store i32 %r199, i32* %r201
+%r202 = lshr i416 %r198, 32
+%r203 = trunc i416 %r202 to i32
+%r205 = getelementptr i32, i32* %r1, i32 6
+store i32 %r203, i32* %r205
+%r206 = lshr i416 %r202, 32
+%r207 = trunc i416 %r206 to i32
+%r209 = getelementptr i32, i32* %r1, i32 7
+store i32 %r207, i32* %r209
+%r210 = lshr i416 %r206, 32
+%r211 = trunc i416 %r210 to i32
+%r213 = getelementptr i32, i32* %r1, i32 8
+store i32 %r211, i32* %r213
+%r214 = lshr i416 %r210, 32
+%r215 = trunc i416 %r214 to i32
+%r217 = getelementptr i32, i32* %r1, i32 9
+store i32 %r215, i32* %r217
+%r218 = lshr i416 %r214, 32
+%r219 = trunc i416 %r218 to i32
+%r221 = getelementptr i32, i32* %r1, i32 10
+store i32 %r219, i32* %r221
+%r222 = lshr i416 %r218, 32
+%r223 = trunc i416 %r222 to i32
+%r225 = getelementptr i32, i32* %r1, i32 11
+store i32 %r223, i32* %r225
+%r226 = lshr i416 %r222, 32
+%r227 = trunc i416 %r226 to i32
+%r229 = getelementptr i32, i32* %r1, i32 12
+store i32 %r227, i32* %r229
+%r230 = load i32, i32* %r4
+%r231 = zext i32 %r230 to i64
+%r233 = getelementptr i32, i32* %r4, i32 1
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i64
+%r236 = shl i64 %r235, 32
+%r237 = or i64 %r231, %r236
+%r238 = zext i64 %r237 to i96
+%r240 = getelementptr i32, i32* %r4, i32 2
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i96
+%r243 = shl i96 %r242, 64
+%r244 = or i96 %r238, %r243
+%r245 = zext i96 %r244 to i128
+%r247 = getelementptr i32, i32* %r4, i32 3
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i128
+%r250 = shl i128 %r249, 96
+%r251 = or i128 %r245, %r250
+%r252 = zext i128 %r251 to i160
+%r254 = getelementptr i32, i32* %r4, i32 4
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i160
+%r257 = shl i160 %r256, 128
+%r258 = or i160 %r252, %r257
+%r259 = zext i160 %r258 to i192
+%r261 = getelementptr i32, i32* %r4, i32 5
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i192
+%r264 = shl i192 %r263, 160
+%r265 = or i192 %r259, %r264
+%r266 = zext i192 %r265 to i224
+%r268 = getelementptr i32, i32* %r4, i32 6
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i224
+%r271 = shl i224 %r270, 192
+%r272 = or i224 %r266, %r271
+%r273 = zext i224 %r272 to i256
+%r275 = getelementptr i32, i32* %r4, i32 7
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i256
+%r278 = shl i256 %r277, 224
+%r279 = or i256 %r273, %r278
+%r280 = zext i256 %r279 to i288
+%r282 = getelementptr i32, i32* %r4, i32 8
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i288
+%r285 = shl i288 %r284, 256
+%r286 = or i288 %r280, %r285
+%r287 = zext i288 %r286 to i320
+%r289 = getelementptr i32, i32* %r4, i32 9
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i320
+%r292 = shl i320 %r291, 288
+%r293 = or i320 %r287, %r292
+%r294 = zext i320 %r293 to i352
+%r296 = getelementptr i32, i32* %r4, i32 10
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i352
+%r299 = shl i352 %r298, 320
+%r300 = or i352 %r294, %r299
+%r301 = zext i352 %r300 to i384
+%r303 = getelementptr i32, i32* %r4, i32 11
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i384
+%r306 = shl i384 %r305, 352
+%r307 = or i384 %r301, %r306
+%r308 = zext i384 %r307 to i416
+%r310 = getelementptr i32, i32* %r4, i32 12
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i416
+%r313 = shl i416 %r312, 384
+%r314 = or i416 %r308, %r313
+%r315 = zext i416 %r314 to i448
+%r316 = sub i448 %r177, %r315
+%r317 = lshr i448 %r316, 416
+%r318 = trunc i448 %r317 to i1
+br i1%r318, label %carry, label %nocarry
+nocarry:
+%r319 = trunc i448 %r316 to i416
+%r320 = trunc i416 %r319 to i32
+%r322 = getelementptr i32, i32* %r1, i32 0
+store i32 %r320, i32* %r322
+%r323 = lshr i416 %r319, 32
+%r324 = trunc i416 %r323 to i32
+%r326 = getelementptr i32, i32* %r1, i32 1
+store i32 %r324, i32* %r326
+%r327 = lshr i416 %r323, 32
+%r328 = trunc i416 %r327 to i32
+%r330 = getelementptr i32, i32* %r1, i32 2
+store i32 %r328, i32* %r330
+%r331 = lshr i416 %r327, 32
+%r332 = trunc i416 %r331 to i32
+%r334 = getelementptr i32, i32* %r1, i32 3
+store i32 %r332, i32* %r334
+%r335 = lshr i416 %r331, 32
+%r336 = trunc i416 %r335 to i32
+%r338 = getelementptr i32, i32* %r1, i32 4
+store i32 %r336, i32* %r338
+%r339 = lshr i416 %r335, 32
+%r340 = trunc i416 %r339 to i32
+%r342 = getelementptr i32, i32* %r1, i32 5
+store i32 %r340, i32* %r342
+%r343 = lshr i416 %r339, 32
+%r344 = trunc i416 %r343 to i32
+%r346 = getelementptr i32, i32* %r1, i32 6
+store i32 %r344, i32* %r346
+%r347 = lshr i416 %r343, 32
+%r348 = trunc i416 %r347 to i32
+%r350 = getelementptr i32, i32* %r1, i32 7
+store i32 %r348, i32* %r350
+%r351 = lshr i416 %r347, 32
+%r352 = trunc i416 %r351 to i32
+%r354 = getelementptr i32, i32* %r1, i32 8
+store i32 %r352, i32* %r354
+%r355 = lshr i416 %r351, 32
+%r356 = trunc i416 %r355 to i32
+%r358 = getelementptr i32, i32* %r1, i32 9
+store i32 %r356, i32* %r358
+%r359 = lshr i416 %r355, 32
+%r360 = trunc i416 %r359 to i32
+%r362 = getelementptr i32, i32* %r1, i32 10
+store i32 %r360, i32* %r362
+%r363 = lshr i416 %r359, 32
+%r364 = trunc i416 %r363 to i32
+%r366 = getelementptr i32, i32* %r1, i32 11
+store i32 %r364, i32* %r366
+%r367 = lshr i416 %r363, 32
+%r368 = trunc i416 %r367 to i32
+%r370 = getelementptr i32, i32* %r1, i32 12
+store i32 %r368, i32* %r370
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = load i32, i32* %r3
+%r91 = zext i32 %r90 to i64
+%r93 = getelementptr i32, i32* %r3, i32 1
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i64
+%r96 = shl i64 %r95, 32
+%r97 = or i64 %r91, %r96
+%r98 = zext i64 %r97 to i96
+%r100 = getelementptr i32, i32* %r3, i32 2
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i96
+%r103 = shl i96 %r102, 64
+%r104 = or i96 %r98, %r103
+%r105 = zext i96 %r104 to i128
+%r107 = getelementptr i32, i32* %r3, i32 3
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i128
+%r110 = shl i128 %r109, 96
+%r111 = or i128 %r105, %r110
+%r112 = zext i128 %r111 to i160
+%r114 = getelementptr i32, i32* %r3, i32 4
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i160
+%r117 = shl i160 %r116, 128
+%r118 = or i160 %r112, %r117
+%r119 = zext i160 %r118 to i192
+%r121 = getelementptr i32, i32* %r3, i32 5
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i192
+%r124 = shl i192 %r123, 160
+%r125 = or i192 %r119, %r124
+%r126 = zext i192 %r125 to i224
+%r128 = getelementptr i32, i32* %r3, i32 6
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i224
+%r131 = shl i224 %r130, 192
+%r132 = or i224 %r126, %r131
+%r133 = zext i224 %r132 to i256
+%r135 = getelementptr i32, i32* %r3, i32 7
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i256
+%r138 = shl i256 %r137, 224
+%r139 = or i256 %r133, %r138
+%r140 = zext i256 %r139 to i288
+%r142 = getelementptr i32, i32* %r3, i32 8
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i288
+%r145 = shl i288 %r144, 256
+%r146 = or i288 %r140, %r145
+%r147 = zext i288 %r146 to i320
+%r149 = getelementptr i32, i32* %r3, i32 9
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i320
+%r152 = shl i320 %r151, 288
+%r153 = or i320 %r147, %r152
+%r154 = zext i320 %r153 to i352
+%r156 = getelementptr i32, i32* %r3, i32 10
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i352
+%r159 = shl i352 %r158, 320
+%r160 = or i352 %r154, %r159
+%r161 = zext i352 %r160 to i384
+%r163 = getelementptr i32, i32* %r3, i32 11
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i384
+%r166 = shl i384 %r165, 352
+%r167 = or i384 %r161, %r166
+%r168 = zext i384 %r167 to i416
+%r170 = getelementptr i32, i32* %r3, i32 12
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i416
+%r173 = shl i416 %r172, 384
+%r174 = or i416 %r168, %r173
+%r175 = add i416 %r89, %r174
+%r176 = load i32, i32* %r4
+%r177 = zext i32 %r176 to i64
+%r179 = getelementptr i32, i32* %r4, i32 1
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i64
+%r182 = shl i64 %r181, 32
+%r183 = or i64 %r177, %r182
+%r184 = zext i64 %r183 to i96
+%r186 = getelementptr i32, i32* %r4, i32 2
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i96
+%r189 = shl i96 %r188, 64
+%r190 = or i96 %r184, %r189
+%r191 = zext i96 %r190 to i128
+%r193 = getelementptr i32, i32* %r4, i32 3
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i128
+%r196 = shl i128 %r195, 96
+%r197 = or i128 %r191, %r196
+%r198 = zext i128 %r197 to i160
+%r200 = getelementptr i32, i32* %r4, i32 4
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i160
+%r203 = shl i160 %r202, 128
+%r204 = or i160 %r198, %r203
+%r205 = zext i160 %r204 to i192
+%r207 = getelementptr i32, i32* %r4, i32 5
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i192
+%r210 = shl i192 %r209, 160
+%r211 = or i192 %r205, %r210
+%r212 = zext i192 %r211 to i224
+%r214 = getelementptr i32, i32* %r4, i32 6
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i224
+%r217 = shl i224 %r216, 192
+%r218 = or i224 %r212, %r217
+%r219 = zext i224 %r218 to i256
+%r221 = getelementptr i32, i32* %r4, i32 7
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i256
+%r224 = shl i256 %r223, 224
+%r225 = or i256 %r219, %r224
+%r226 = zext i256 %r225 to i288
+%r228 = getelementptr i32, i32* %r4, i32 8
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i288
+%r231 = shl i288 %r230, 256
+%r232 = or i288 %r226, %r231
+%r233 = zext i288 %r232 to i320
+%r235 = getelementptr i32, i32* %r4, i32 9
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i320
+%r238 = shl i320 %r237, 288
+%r239 = or i320 %r233, %r238
+%r240 = zext i320 %r239 to i352
+%r242 = getelementptr i32, i32* %r4, i32 10
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i352
+%r245 = shl i352 %r244, 320
+%r246 = or i352 %r240, %r245
+%r247 = zext i352 %r246 to i384
+%r249 = getelementptr i32, i32* %r4, i32 11
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i384
+%r252 = shl i384 %r251, 352
+%r253 = or i384 %r247, %r252
+%r254 = zext i384 %r253 to i416
+%r256 = getelementptr i32, i32* %r4, i32 12
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i416
+%r259 = shl i416 %r258, 384
+%r260 = or i416 %r254, %r259
+%r261 = sub i416 %r175, %r260
+%r262 = lshr i416 %r261, 415
+%r263 = trunc i416 %r262 to i1
+%r264 = select i1 %r263, i416 %r175, i416 %r261
+%r265 = trunc i416 %r264 to i32
+%r267 = getelementptr i32, i32* %r1, i32 0
+store i32 %r265, i32* %r267
+%r268 = lshr i416 %r264, 32
+%r269 = trunc i416 %r268 to i32
+%r271 = getelementptr i32, i32* %r1, i32 1
+store i32 %r269, i32* %r271
+%r272 = lshr i416 %r268, 32
+%r273 = trunc i416 %r272 to i32
+%r275 = getelementptr i32, i32* %r1, i32 2
+store i32 %r273, i32* %r275
+%r276 = lshr i416 %r272, 32
+%r277 = trunc i416 %r276 to i32
+%r279 = getelementptr i32, i32* %r1, i32 3
+store i32 %r277, i32* %r279
+%r280 = lshr i416 %r276, 32
+%r281 = trunc i416 %r280 to i32
+%r283 = getelementptr i32, i32* %r1, i32 4
+store i32 %r281, i32* %r283
+%r284 = lshr i416 %r280, 32
+%r285 = trunc i416 %r284 to i32
+%r287 = getelementptr i32, i32* %r1, i32 5
+store i32 %r285, i32* %r287
+%r288 = lshr i416 %r284, 32
+%r289 = trunc i416 %r288 to i32
+%r291 = getelementptr i32, i32* %r1, i32 6
+store i32 %r289, i32* %r291
+%r292 = lshr i416 %r288, 32
+%r293 = trunc i416 %r292 to i32
+%r295 = getelementptr i32, i32* %r1, i32 7
+store i32 %r293, i32* %r295
+%r296 = lshr i416 %r292, 32
+%r297 = trunc i416 %r296 to i32
+%r299 = getelementptr i32, i32* %r1, i32 8
+store i32 %r297, i32* %r299
+%r300 = lshr i416 %r296, 32
+%r301 = trunc i416 %r300 to i32
+%r303 = getelementptr i32, i32* %r1, i32 9
+store i32 %r301, i32* %r303
+%r304 = lshr i416 %r300, 32
+%r305 = trunc i416 %r304 to i32
+%r307 = getelementptr i32, i32* %r1, i32 10
+store i32 %r305, i32* %r307
+%r308 = lshr i416 %r304, 32
+%r309 = trunc i416 %r308 to i32
+%r311 = getelementptr i32, i32* %r1, i32 11
+store i32 %r309, i32* %r311
+%r312 = lshr i416 %r308, 32
+%r313 = trunc i416 %r312 to i32
+%r315 = getelementptr i32, i32* %r1, i32 12
+store i32 %r313, i32* %r315
+ret void
+}
+define void @mcl_fp_sub13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = load i32, i32* %r3
+%r91 = zext i32 %r90 to i64
+%r93 = getelementptr i32, i32* %r3, i32 1
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i64
+%r96 = shl i64 %r95, 32
+%r97 = or i64 %r91, %r96
+%r98 = zext i64 %r97 to i96
+%r100 = getelementptr i32, i32* %r3, i32 2
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i96
+%r103 = shl i96 %r102, 64
+%r104 = or i96 %r98, %r103
+%r105 = zext i96 %r104 to i128
+%r107 = getelementptr i32, i32* %r3, i32 3
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i128
+%r110 = shl i128 %r109, 96
+%r111 = or i128 %r105, %r110
+%r112 = zext i128 %r111 to i160
+%r114 = getelementptr i32, i32* %r3, i32 4
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i160
+%r117 = shl i160 %r116, 128
+%r118 = or i160 %r112, %r117
+%r119 = zext i160 %r118 to i192
+%r121 = getelementptr i32, i32* %r3, i32 5
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i192
+%r124 = shl i192 %r123, 160
+%r125 = or i192 %r119, %r124
+%r126 = zext i192 %r125 to i224
+%r128 = getelementptr i32, i32* %r3, i32 6
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i224
+%r131 = shl i224 %r130, 192
+%r132 = or i224 %r126, %r131
+%r133 = zext i224 %r132 to i256
+%r135 = getelementptr i32, i32* %r3, i32 7
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i256
+%r138 = shl i256 %r137, 224
+%r139 = or i256 %r133, %r138
+%r140 = zext i256 %r139 to i288
+%r142 = getelementptr i32, i32* %r3, i32 8
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i288
+%r145 = shl i288 %r144, 256
+%r146 = or i288 %r140, %r145
+%r147 = zext i288 %r146 to i320
+%r149 = getelementptr i32, i32* %r3, i32 9
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i320
+%r152 = shl i320 %r151, 288
+%r153 = or i320 %r147, %r152
+%r154 = zext i320 %r153 to i352
+%r156 = getelementptr i32, i32* %r3, i32 10
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i352
+%r159 = shl i352 %r158, 320
+%r160 = or i352 %r154, %r159
+%r161 = zext i352 %r160 to i384
+%r163 = getelementptr i32, i32* %r3, i32 11
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i384
+%r166 = shl i384 %r165, 352
+%r167 = or i384 %r161, %r166
+%r168 = zext i384 %r167 to i416
+%r170 = getelementptr i32, i32* %r3, i32 12
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i416
+%r173 = shl i416 %r172, 384
+%r174 = or i416 %r168, %r173
+%r175 = zext i416 %r89 to i448
+%r176 = zext i416 %r174 to i448
+%r177 = sub i448 %r175, %r176
+%r178 = trunc i448 %r177 to i416
+%r179 = lshr i448 %r177, 416
+%r180 = trunc i448 %r179 to i1
+%r181 = trunc i416 %r178 to i32
+%r183 = getelementptr i32, i32* %r1, i32 0
+store i32 %r181, i32* %r183
+%r184 = lshr i416 %r178, 32
+%r185 = trunc i416 %r184 to i32
+%r187 = getelementptr i32, i32* %r1, i32 1
+store i32 %r185, i32* %r187
+%r188 = lshr i416 %r184, 32
+%r189 = trunc i416 %r188 to i32
+%r191 = getelementptr i32, i32* %r1, i32 2
+store i32 %r189, i32* %r191
+%r192 = lshr i416 %r188, 32
+%r193 = trunc i416 %r192 to i32
+%r195 = getelementptr i32, i32* %r1, i32 3
+store i32 %r193, i32* %r195
+%r196 = lshr i416 %r192, 32
+%r197 = trunc i416 %r196 to i32
+%r199 = getelementptr i32, i32* %r1, i32 4
+store i32 %r197, i32* %r199
+%r200 = lshr i416 %r196, 32
+%r201 = trunc i416 %r200 to i32
+%r203 = getelementptr i32, i32* %r1, i32 5
+store i32 %r201, i32* %r203
+%r204 = lshr i416 %r200, 32
+%r205 = trunc i416 %r204 to i32
+%r207 = getelementptr i32, i32* %r1, i32 6
+store i32 %r205, i32* %r207
+%r208 = lshr i416 %r204, 32
+%r209 = trunc i416 %r208 to i32
+%r211 = getelementptr i32, i32* %r1, i32 7
+store i32 %r209, i32* %r211
+%r212 = lshr i416 %r208, 32
+%r213 = trunc i416 %r212 to i32
+%r215 = getelementptr i32, i32* %r1, i32 8
+store i32 %r213, i32* %r215
+%r216 = lshr i416 %r212, 32
+%r217 = trunc i416 %r216 to i32
+%r219 = getelementptr i32, i32* %r1, i32 9
+store i32 %r217, i32* %r219
+%r220 = lshr i416 %r216, 32
+%r221 = trunc i416 %r220 to i32
+%r223 = getelementptr i32, i32* %r1, i32 10
+store i32 %r221, i32* %r223
+%r224 = lshr i416 %r220, 32
+%r225 = trunc i416 %r224 to i32
+%r227 = getelementptr i32, i32* %r1, i32 11
+store i32 %r225, i32* %r227
+%r228 = lshr i416 %r224, 32
+%r229 = trunc i416 %r228 to i32
+%r231 = getelementptr i32, i32* %r1, i32 12
+store i32 %r229, i32* %r231
+br i1%r180, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r232 = load i32, i32* %r4
+%r233 = zext i32 %r232 to i64
+%r235 = getelementptr i32, i32* %r4, i32 1
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i64
+%r238 = shl i64 %r237, 32
+%r239 = or i64 %r233, %r238
+%r240 = zext i64 %r239 to i96
+%r242 = getelementptr i32, i32* %r4, i32 2
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i96
+%r245 = shl i96 %r244, 64
+%r246 = or i96 %r240, %r245
+%r247 = zext i96 %r246 to i128
+%r249 = getelementptr i32, i32* %r4, i32 3
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i128
+%r252 = shl i128 %r251, 96
+%r253 = or i128 %r247, %r252
+%r254 = zext i128 %r253 to i160
+%r256 = getelementptr i32, i32* %r4, i32 4
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i160
+%r259 = shl i160 %r258, 128
+%r260 = or i160 %r254, %r259
+%r261 = zext i160 %r260 to i192
+%r263 = getelementptr i32, i32* %r4, i32 5
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i192
+%r266 = shl i192 %r265, 160
+%r267 = or i192 %r261, %r266
+%r268 = zext i192 %r267 to i224
+%r270 = getelementptr i32, i32* %r4, i32 6
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i224
+%r273 = shl i224 %r272, 192
+%r274 = or i224 %r268, %r273
+%r275 = zext i224 %r274 to i256
+%r277 = getelementptr i32, i32* %r4, i32 7
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i256
+%r280 = shl i256 %r279, 224
+%r281 = or i256 %r275, %r280
+%r282 = zext i256 %r281 to i288
+%r284 = getelementptr i32, i32* %r4, i32 8
+%r285 = load i32, i32* %r284
+%r286 = zext i32 %r285 to i288
+%r287 = shl i288 %r286, 256
+%r288 = or i288 %r282, %r287
+%r289 = zext i288 %r288 to i320
+%r291 = getelementptr i32, i32* %r4, i32 9
+%r292 = load i32, i32* %r291
+%r293 = zext i32 %r292 to i320
+%r294 = shl i320 %r293, 288
+%r295 = or i320 %r289, %r294
+%r296 = zext i320 %r295 to i352
+%r298 = getelementptr i32, i32* %r4, i32 10
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i352
+%r301 = shl i352 %r300, 320
+%r302 = or i352 %r296, %r301
+%r303 = zext i352 %r302 to i384
+%r305 = getelementptr i32, i32* %r4, i32 11
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i384
+%r308 = shl i384 %r307, 352
+%r309 = or i384 %r303, %r308
+%r310 = zext i384 %r309 to i416
+%r312 = getelementptr i32, i32* %r4, i32 12
+%r313 = load i32, i32* %r312
+%r314 = zext i32 %r313 to i416
+%r315 = shl i416 %r314, 384
+%r316 = or i416 %r310, %r315
+%r317 = add i416 %r178, %r316
+%r318 = trunc i416 %r317 to i32
+%r320 = getelementptr i32, i32* %r1, i32 0
+store i32 %r318, i32* %r320
+%r321 = lshr i416 %r317, 32
+%r322 = trunc i416 %r321 to i32
+%r324 = getelementptr i32, i32* %r1, i32 1
+store i32 %r322, i32* %r324
+%r325 = lshr i416 %r321, 32
+%r326 = trunc i416 %r325 to i32
+%r328 = getelementptr i32, i32* %r1, i32 2
+store i32 %r326, i32* %r328
+%r329 = lshr i416 %r325, 32
+%r330 = trunc i416 %r329 to i32
+%r332 = getelementptr i32, i32* %r1, i32 3
+store i32 %r330, i32* %r332
+%r333 = lshr i416 %r329, 32
+%r334 = trunc i416 %r333 to i32
+%r336 = getelementptr i32, i32* %r1, i32 4
+store i32 %r334, i32* %r336
+%r337 = lshr i416 %r333, 32
+%r338 = trunc i416 %r337 to i32
+%r340 = getelementptr i32, i32* %r1, i32 5
+store i32 %r338, i32* %r340
+%r341 = lshr i416 %r337, 32
+%r342 = trunc i416 %r341 to i32
+%r344 = getelementptr i32, i32* %r1, i32 6
+store i32 %r342, i32* %r344
+%r345 = lshr i416 %r341, 32
+%r346 = trunc i416 %r345 to i32
+%r348 = getelementptr i32, i32* %r1, i32 7
+store i32 %r346, i32* %r348
+%r349 = lshr i416 %r345, 32
+%r350 = trunc i416 %r349 to i32
+%r352 = getelementptr i32, i32* %r1, i32 8
+store i32 %r350, i32* %r352
+%r353 = lshr i416 %r349, 32
+%r354 = trunc i416 %r353 to i32
+%r356 = getelementptr i32, i32* %r1, i32 9
+store i32 %r354, i32* %r356
+%r357 = lshr i416 %r353, 32
+%r358 = trunc i416 %r357 to i32
+%r360 = getelementptr i32, i32* %r1, i32 10
+store i32 %r358, i32* %r360
+%r361 = lshr i416 %r357, 32
+%r362 = trunc i416 %r361 to i32
+%r364 = getelementptr i32, i32* %r1, i32 11
+store i32 %r362, i32* %r364
+%r365 = lshr i416 %r361, 32
+%r366 = trunc i416 %r365 to i32
+%r368 = getelementptr i32, i32* %r1, i32 12
+store i32 %r366, i32* %r368
+ret void
+}
+define void @mcl_fp_subNF13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = load i32, i32* %r3
+%r91 = zext i32 %r90 to i64
+%r93 = getelementptr i32, i32* %r3, i32 1
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i64
+%r96 = shl i64 %r95, 32
+%r97 = or i64 %r91, %r96
+%r98 = zext i64 %r97 to i96
+%r100 = getelementptr i32, i32* %r3, i32 2
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i96
+%r103 = shl i96 %r102, 64
+%r104 = or i96 %r98, %r103
+%r105 = zext i96 %r104 to i128
+%r107 = getelementptr i32, i32* %r3, i32 3
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i128
+%r110 = shl i128 %r109, 96
+%r111 = or i128 %r105, %r110
+%r112 = zext i128 %r111 to i160
+%r114 = getelementptr i32, i32* %r3, i32 4
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i160
+%r117 = shl i160 %r116, 128
+%r118 = or i160 %r112, %r117
+%r119 = zext i160 %r118 to i192
+%r121 = getelementptr i32, i32* %r3, i32 5
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i192
+%r124 = shl i192 %r123, 160
+%r125 = or i192 %r119, %r124
+%r126 = zext i192 %r125 to i224
+%r128 = getelementptr i32, i32* %r3, i32 6
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i224
+%r131 = shl i224 %r130, 192
+%r132 = or i224 %r126, %r131
+%r133 = zext i224 %r132 to i256
+%r135 = getelementptr i32, i32* %r3, i32 7
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i256
+%r138 = shl i256 %r137, 224
+%r139 = or i256 %r133, %r138
+%r140 = zext i256 %r139 to i288
+%r142 = getelementptr i32, i32* %r3, i32 8
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i288
+%r145 = shl i288 %r144, 256
+%r146 = or i288 %r140, %r145
+%r147 = zext i288 %r146 to i320
+%r149 = getelementptr i32, i32* %r3, i32 9
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i320
+%r152 = shl i320 %r151, 288
+%r153 = or i320 %r147, %r152
+%r154 = zext i320 %r153 to i352
+%r156 = getelementptr i32, i32* %r3, i32 10
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i352
+%r159 = shl i352 %r158, 320
+%r160 = or i352 %r154, %r159
+%r161 = zext i352 %r160 to i384
+%r163 = getelementptr i32, i32* %r3, i32 11
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i384
+%r166 = shl i384 %r165, 352
+%r167 = or i384 %r161, %r166
+%r168 = zext i384 %r167 to i416
+%r170 = getelementptr i32, i32* %r3, i32 12
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i416
+%r173 = shl i416 %r172, 384
+%r174 = or i416 %r168, %r173
+%r175 = sub i416 %r89, %r174
+%r176 = lshr i416 %r175, 415
+%r177 = trunc i416 %r176 to i1
+%r178 = load i32, i32* %r4
+%r179 = zext i32 %r178 to i64
+%r181 = getelementptr i32, i32* %r4, i32 1
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i64
+%r184 = shl i64 %r183, 32
+%r185 = or i64 %r179, %r184
+%r186 = zext i64 %r185 to i96
+%r188 = getelementptr i32, i32* %r4, i32 2
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i96
+%r191 = shl i96 %r190, 64
+%r192 = or i96 %r186, %r191
+%r193 = zext i96 %r192 to i128
+%r195 = getelementptr i32, i32* %r4, i32 3
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i128
+%r198 = shl i128 %r197, 96
+%r199 = or i128 %r193, %r198
+%r200 = zext i128 %r199 to i160
+%r202 = getelementptr i32, i32* %r4, i32 4
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i160
+%r205 = shl i160 %r204, 128
+%r206 = or i160 %r200, %r205
+%r207 = zext i160 %r206 to i192
+%r209 = getelementptr i32, i32* %r4, i32 5
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i192
+%r212 = shl i192 %r211, 160
+%r213 = or i192 %r207, %r212
+%r214 = zext i192 %r213 to i224
+%r216 = getelementptr i32, i32* %r4, i32 6
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i224
+%r219 = shl i224 %r218, 192
+%r220 = or i224 %r214, %r219
+%r221 = zext i224 %r220 to i256
+%r223 = getelementptr i32, i32* %r4, i32 7
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i256
+%r226 = shl i256 %r225, 224
+%r227 = or i256 %r221, %r226
+%r228 = zext i256 %r227 to i288
+%r230 = getelementptr i32, i32* %r4, i32 8
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i288
+%r233 = shl i288 %r232, 256
+%r234 = or i288 %r228, %r233
+%r235 = zext i288 %r234 to i320
+%r237 = getelementptr i32, i32* %r4, i32 9
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i320
+%r240 = shl i320 %r239, 288
+%r241 = or i320 %r235, %r240
+%r242 = zext i320 %r241 to i352
+%r244 = getelementptr i32, i32* %r4, i32 10
+%r245 = load i32, i32* %r244
+%r246 = zext i32 %r245 to i352
+%r247 = shl i352 %r246, 320
+%r248 = or i352 %r242, %r247
+%r249 = zext i352 %r248 to i384
+%r251 = getelementptr i32, i32* %r4, i32 11
+%r252 = load i32, i32* %r251
+%r253 = zext i32 %r252 to i384
+%r254 = shl i384 %r253, 352
+%r255 = or i384 %r249, %r254
+%r256 = zext i384 %r255 to i416
+%r258 = getelementptr i32, i32* %r4, i32 12
+%r259 = load i32, i32* %r258
+%r260 = zext i32 %r259 to i416
+%r261 = shl i416 %r260, 384
+%r262 = or i416 %r256, %r261
+%r264 = select i1 %r177, i416 %r262, i416 0
+%r265 = add i416 %r175, %r264
+%r266 = trunc i416 %r265 to i32
+%r268 = getelementptr i32, i32* %r1, i32 0
+store i32 %r266, i32* %r268
+%r269 = lshr i416 %r265, 32
+%r270 = trunc i416 %r269 to i32
+%r272 = getelementptr i32, i32* %r1, i32 1
+store i32 %r270, i32* %r272
+%r273 = lshr i416 %r269, 32
+%r274 = trunc i416 %r273 to i32
+%r276 = getelementptr i32, i32* %r1, i32 2
+store i32 %r274, i32* %r276
+%r277 = lshr i416 %r273, 32
+%r278 = trunc i416 %r277 to i32
+%r280 = getelementptr i32, i32* %r1, i32 3
+store i32 %r278, i32* %r280
+%r281 = lshr i416 %r277, 32
+%r282 = trunc i416 %r281 to i32
+%r284 = getelementptr i32, i32* %r1, i32 4
+store i32 %r282, i32* %r284
+%r285 = lshr i416 %r281, 32
+%r286 = trunc i416 %r285 to i32
+%r288 = getelementptr i32, i32* %r1, i32 5
+store i32 %r286, i32* %r288
+%r289 = lshr i416 %r285, 32
+%r290 = trunc i416 %r289 to i32
+%r292 = getelementptr i32, i32* %r1, i32 6
+store i32 %r290, i32* %r292
+%r293 = lshr i416 %r289, 32
+%r294 = trunc i416 %r293 to i32
+%r296 = getelementptr i32, i32* %r1, i32 7
+store i32 %r294, i32* %r296
+%r297 = lshr i416 %r293, 32
+%r298 = trunc i416 %r297 to i32
+%r300 = getelementptr i32, i32* %r1, i32 8
+store i32 %r298, i32* %r300
+%r301 = lshr i416 %r297, 32
+%r302 = trunc i416 %r301 to i32
+%r304 = getelementptr i32, i32* %r1, i32 9
+store i32 %r302, i32* %r304
+%r305 = lshr i416 %r301, 32
+%r306 = trunc i416 %r305 to i32
+%r308 = getelementptr i32, i32* %r1, i32 10
+store i32 %r306, i32* %r308
+%r309 = lshr i416 %r305, 32
+%r310 = trunc i416 %r309 to i32
+%r312 = getelementptr i32, i32* %r1, i32 11
+store i32 %r310, i32* %r312
+%r313 = lshr i416 %r309, 32
+%r314 = trunc i416 %r313 to i32
+%r316 = getelementptr i32, i32* %r1, i32 12
+store i32 %r314, i32* %r316
+ret void
+}
+define void @mcl_fpDbl_add13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = zext i768 %r166 to i800
+%r169 = getelementptr i32, i32* %r2, i32 24
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i800
+%r172 = shl i800 %r171, 768
+%r173 = or i800 %r167, %r172
+%r174 = zext i800 %r173 to i832
+%r176 = getelementptr i32, i32* %r2, i32 25
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i832
+%r179 = shl i832 %r178, 800
+%r180 = or i832 %r174, %r179
+%r181 = load i32, i32* %r3
+%r182 = zext i32 %r181 to i64
+%r184 = getelementptr i32, i32* %r3, i32 1
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i64
+%r187 = shl i64 %r186, 32
+%r188 = or i64 %r182, %r187
+%r189 = zext i64 %r188 to i96
+%r191 = getelementptr i32, i32* %r3, i32 2
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i96
+%r194 = shl i96 %r193, 64
+%r195 = or i96 %r189, %r194
+%r196 = zext i96 %r195 to i128
+%r198 = getelementptr i32, i32* %r3, i32 3
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i128
+%r201 = shl i128 %r200, 96
+%r202 = or i128 %r196, %r201
+%r203 = zext i128 %r202 to i160
+%r205 = getelementptr i32, i32* %r3, i32 4
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i160
+%r208 = shl i160 %r207, 128
+%r209 = or i160 %r203, %r208
+%r210 = zext i160 %r209 to i192
+%r212 = getelementptr i32, i32* %r3, i32 5
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i192
+%r215 = shl i192 %r214, 160
+%r216 = or i192 %r210, %r215
+%r217 = zext i192 %r216 to i224
+%r219 = getelementptr i32, i32* %r3, i32 6
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i224
+%r222 = shl i224 %r221, 192
+%r223 = or i224 %r217, %r222
+%r224 = zext i224 %r223 to i256
+%r226 = getelementptr i32, i32* %r3, i32 7
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i256
+%r229 = shl i256 %r228, 224
+%r230 = or i256 %r224, %r229
+%r231 = zext i256 %r230 to i288
+%r233 = getelementptr i32, i32* %r3, i32 8
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i288
+%r236 = shl i288 %r235, 256
+%r237 = or i288 %r231, %r236
+%r238 = zext i288 %r237 to i320
+%r240 = getelementptr i32, i32* %r3, i32 9
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i320
+%r243 = shl i320 %r242, 288
+%r244 = or i320 %r238, %r243
+%r245 = zext i320 %r244 to i352
+%r247 = getelementptr i32, i32* %r3, i32 10
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i352
+%r250 = shl i352 %r249, 320
+%r251 = or i352 %r245, %r250
+%r252 = zext i352 %r251 to i384
+%r254 = getelementptr i32, i32* %r3, i32 11
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i384
+%r257 = shl i384 %r256, 352
+%r258 = or i384 %r252, %r257
+%r259 = zext i384 %r258 to i416
+%r261 = getelementptr i32, i32* %r3, i32 12
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i416
+%r264 = shl i416 %r263, 384
+%r265 = or i416 %r259, %r264
+%r266 = zext i416 %r265 to i448
+%r268 = getelementptr i32, i32* %r3, i32 13
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i448
+%r271 = shl i448 %r270, 416
+%r272 = or i448 %r266, %r271
+%r273 = zext i448 %r272 to i480
+%r275 = getelementptr i32, i32* %r3, i32 14
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i480
+%r278 = shl i480 %r277, 448
+%r279 = or i480 %r273, %r278
+%r280 = zext i480 %r279 to i512
+%r282 = getelementptr i32, i32* %r3, i32 15
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i512
+%r285 = shl i512 %r284, 480
+%r286 = or i512 %r280, %r285
+%r287 = zext i512 %r286 to i544
+%r289 = getelementptr i32, i32* %r3, i32 16
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i544
+%r292 = shl i544 %r291, 512
+%r293 = or i544 %r287, %r292
+%r294 = zext i544 %r293 to i576
+%r296 = getelementptr i32, i32* %r3, i32 17
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i576
+%r299 = shl i576 %r298, 544
+%r300 = or i576 %r294, %r299
+%r301 = zext i576 %r300 to i608
+%r303 = getelementptr i32, i32* %r3, i32 18
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i608
+%r306 = shl i608 %r305, 576
+%r307 = or i608 %r301, %r306
+%r308 = zext i608 %r307 to i640
+%r310 = getelementptr i32, i32* %r3, i32 19
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i640
+%r313 = shl i640 %r312, 608
+%r314 = or i640 %r308, %r313
+%r315 = zext i640 %r314 to i672
+%r317 = getelementptr i32, i32* %r3, i32 20
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i672
+%r320 = shl i672 %r319, 640
+%r321 = or i672 %r315, %r320
+%r322 = zext i672 %r321 to i704
+%r324 = getelementptr i32, i32* %r3, i32 21
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i704
+%r327 = shl i704 %r326, 672
+%r328 = or i704 %r322, %r327
+%r329 = zext i704 %r328 to i736
+%r331 = getelementptr i32, i32* %r3, i32 22
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i736
+%r334 = shl i736 %r333, 704
+%r335 = or i736 %r329, %r334
+%r336 = zext i736 %r335 to i768
+%r338 = getelementptr i32, i32* %r3, i32 23
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i768
+%r341 = shl i768 %r340, 736
+%r342 = or i768 %r336, %r341
+%r343 = zext i768 %r342 to i800
+%r345 = getelementptr i32, i32* %r3, i32 24
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i800
+%r348 = shl i800 %r347, 768
+%r349 = or i800 %r343, %r348
+%r350 = zext i800 %r349 to i832
+%r352 = getelementptr i32, i32* %r3, i32 25
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i832
+%r355 = shl i832 %r354, 800
+%r356 = or i832 %r350, %r355
+%r357 = zext i832 %r180 to i864
+%r358 = zext i832 %r356 to i864
+%r359 = add i864 %r357, %r358
+%r360 = trunc i864 %r359 to i416
+%r361 = trunc i416 %r360 to i32
+%r363 = getelementptr i32, i32* %r1, i32 0
+store i32 %r361, i32* %r363
+%r364 = lshr i416 %r360, 32
+%r365 = trunc i416 %r364 to i32
+%r367 = getelementptr i32, i32* %r1, i32 1
+store i32 %r365, i32* %r367
+%r368 = lshr i416 %r364, 32
+%r369 = trunc i416 %r368 to i32
+%r371 = getelementptr i32, i32* %r1, i32 2
+store i32 %r369, i32* %r371
+%r372 = lshr i416 %r368, 32
+%r373 = trunc i416 %r372 to i32
+%r375 = getelementptr i32, i32* %r1, i32 3
+store i32 %r373, i32* %r375
+%r376 = lshr i416 %r372, 32
+%r377 = trunc i416 %r376 to i32
+%r379 = getelementptr i32, i32* %r1, i32 4
+store i32 %r377, i32* %r379
+%r380 = lshr i416 %r376, 32
+%r381 = trunc i416 %r380 to i32
+%r383 = getelementptr i32, i32* %r1, i32 5
+store i32 %r381, i32* %r383
+%r384 = lshr i416 %r380, 32
+%r385 = trunc i416 %r384 to i32
+%r387 = getelementptr i32, i32* %r1, i32 6
+store i32 %r385, i32* %r387
+%r388 = lshr i416 %r384, 32
+%r389 = trunc i416 %r388 to i32
+%r391 = getelementptr i32, i32* %r1, i32 7
+store i32 %r389, i32* %r391
+%r392 = lshr i416 %r388, 32
+%r393 = trunc i416 %r392 to i32
+%r395 = getelementptr i32, i32* %r1, i32 8
+store i32 %r393, i32* %r395
+%r396 = lshr i416 %r392, 32
+%r397 = trunc i416 %r396 to i32
+%r399 = getelementptr i32, i32* %r1, i32 9
+store i32 %r397, i32* %r399
+%r400 = lshr i416 %r396, 32
+%r401 = trunc i416 %r400 to i32
+%r403 = getelementptr i32, i32* %r1, i32 10
+store i32 %r401, i32* %r403
+%r404 = lshr i416 %r400, 32
+%r405 = trunc i416 %r404 to i32
+%r407 = getelementptr i32, i32* %r1, i32 11
+store i32 %r405, i32* %r407
+%r408 = lshr i416 %r404, 32
+%r409 = trunc i416 %r408 to i32
+%r411 = getelementptr i32, i32* %r1, i32 12
+store i32 %r409, i32* %r411
+%r412 = lshr i864 %r359, 416
+%r413 = trunc i864 %r412 to i448
+%r414 = load i32, i32* %r4
+%r415 = zext i32 %r414 to i64
+%r417 = getelementptr i32, i32* %r4, i32 1
+%r418 = load i32, i32* %r417
+%r419 = zext i32 %r418 to i64
+%r420 = shl i64 %r419, 32
+%r421 = or i64 %r415, %r420
+%r422 = zext i64 %r421 to i96
+%r424 = getelementptr i32, i32* %r4, i32 2
+%r425 = load i32, i32* %r424
+%r426 = zext i32 %r425 to i96
+%r427 = shl i96 %r426, 64
+%r428 = or i96 %r422, %r427
+%r429 = zext i96 %r428 to i128
+%r431 = getelementptr i32, i32* %r4, i32 3
+%r432 = load i32, i32* %r431
+%r433 = zext i32 %r432 to i128
+%r434 = shl i128 %r433, 96
+%r435 = or i128 %r429, %r434
+%r436 = zext i128 %r435 to i160
+%r438 = getelementptr i32, i32* %r4, i32 4
+%r439 = load i32, i32* %r438
+%r440 = zext i32 %r439 to i160
+%r441 = shl i160 %r440, 128
+%r442 = or i160 %r436, %r441
+%r443 = zext i160 %r442 to i192
+%r445 = getelementptr i32, i32* %r4, i32 5
+%r446 = load i32, i32* %r445
+%r447 = zext i32 %r446 to i192
+%r448 = shl i192 %r447, 160
+%r449 = or i192 %r443, %r448
+%r450 = zext i192 %r449 to i224
+%r452 = getelementptr i32, i32* %r4, i32 6
+%r453 = load i32, i32* %r452
+%r454 = zext i32 %r453 to i224
+%r455 = shl i224 %r454, 192
+%r456 = or i224 %r450, %r455
+%r457 = zext i224 %r456 to i256
+%r459 = getelementptr i32, i32* %r4, i32 7
+%r460 = load i32, i32* %r459
+%r461 = zext i32 %r460 to i256
+%r462 = shl i256 %r461, 224
+%r463 = or i256 %r457, %r462
+%r464 = zext i256 %r463 to i288
+%r466 = getelementptr i32, i32* %r4, i32 8
+%r467 = load i32, i32* %r466
+%r468 = zext i32 %r467 to i288
+%r469 = shl i288 %r468, 256
+%r470 = or i288 %r464, %r469
+%r471 = zext i288 %r470 to i320
+%r473 = getelementptr i32, i32* %r4, i32 9
+%r474 = load i32, i32* %r473
+%r475 = zext i32 %r474 to i320
+%r476 = shl i320 %r475, 288
+%r477 = or i320 %r471, %r476
+%r478 = zext i320 %r477 to i352
+%r480 = getelementptr i32, i32* %r4, i32 10
+%r481 = load i32, i32* %r480
+%r482 = zext i32 %r481 to i352
+%r483 = shl i352 %r482, 320
+%r484 = or i352 %r478, %r483
+%r485 = zext i352 %r484 to i384
+%r487 = getelementptr i32, i32* %r4, i32 11
+%r488 = load i32, i32* %r487
+%r489 = zext i32 %r488 to i384
+%r490 = shl i384 %r489, 352
+%r491 = or i384 %r485, %r490
+%r492 = zext i384 %r491 to i416
+%r494 = getelementptr i32, i32* %r4, i32 12
+%r495 = load i32, i32* %r494
+%r496 = zext i32 %r495 to i416
+%r497 = shl i416 %r496, 384
+%r498 = or i416 %r492, %r497
+%r499 = zext i416 %r498 to i448
+%r500 = sub i448 %r413, %r499
+%r501 = lshr i448 %r500, 416
+%r502 = trunc i448 %r501 to i1
+%r503 = select i1 %r502, i448 %r413, i448 %r500
+%r504 = trunc i448 %r503 to i416
+%r506 = getelementptr i32, i32* %r1, i32 13
+%r507 = trunc i416 %r504 to i32
+%r509 = getelementptr i32, i32* %r506, i32 0
+store i32 %r507, i32* %r509
+%r510 = lshr i416 %r504, 32
+%r511 = trunc i416 %r510 to i32
+%r513 = getelementptr i32, i32* %r506, i32 1
+store i32 %r511, i32* %r513
+%r514 = lshr i416 %r510, 32
+%r515 = trunc i416 %r514 to i32
+%r517 = getelementptr i32, i32* %r506, i32 2
+store i32 %r515, i32* %r517
+%r518 = lshr i416 %r514, 32
+%r519 = trunc i416 %r518 to i32
+%r521 = getelementptr i32, i32* %r506, i32 3
+store i32 %r519, i32* %r521
+%r522 = lshr i416 %r518, 32
+%r523 = trunc i416 %r522 to i32
+%r525 = getelementptr i32, i32* %r506, i32 4
+store i32 %r523, i32* %r525
+%r526 = lshr i416 %r522, 32
+%r527 = trunc i416 %r526 to i32
+%r529 = getelementptr i32, i32* %r506, i32 5
+store i32 %r527, i32* %r529
+%r530 = lshr i416 %r526, 32
+%r531 = trunc i416 %r530 to i32
+%r533 = getelementptr i32, i32* %r506, i32 6
+store i32 %r531, i32* %r533
+%r534 = lshr i416 %r530, 32
+%r535 = trunc i416 %r534 to i32
+%r537 = getelementptr i32, i32* %r506, i32 7
+store i32 %r535, i32* %r537
+%r538 = lshr i416 %r534, 32
+%r539 = trunc i416 %r538 to i32
+%r541 = getelementptr i32, i32* %r506, i32 8
+store i32 %r539, i32* %r541
+%r542 = lshr i416 %r538, 32
+%r543 = trunc i416 %r542 to i32
+%r545 = getelementptr i32, i32* %r506, i32 9
+store i32 %r543, i32* %r545
+%r546 = lshr i416 %r542, 32
+%r547 = trunc i416 %r546 to i32
+%r549 = getelementptr i32, i32* %r506, i32 10
+store i32 %r547, i32* %r549
+%r550 = lshr i416 %r546, 32
+%r551 = trunc i416 %r550 to i32
+%r553 = getelementptr i32, i32* %r506, i32 11
+store i32 %r551, i32* %r553
+%r554 = lshr i416 %r550, 32
+%r555 = trunc i416 %r554 to i32
+%r557 = getelementptr i32, i32* %r506, i32 12
+store i32 %r555, i32* %r557
+ret void
+}
+define void @mcl_fpDbl_sub13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = zext i768 %r166 to i800
+%r169 = getelementptr i32, i32* %r2, i32 24
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i800
+%r172 = shl i800 %r171, 768
+%r173 = or i800 %r167, %r172
+%r174 = zext i800 %r173 to i832
+%r176 = getelementptr i32, i32* %r2, i32 25
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i832
+%r179 = shl i832 %r178, 800
+%r180 = or i832 %r174, %r179
+%r181 = load i32, i32* %r3
+%r182 = zext i32 %r181 to i64
+%r184 = getelementptr i32, i32* %r3, i32 1
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i64
+%r187 = shl i64 %r186, 32
+%r188 = or i64 %r182, %r187
+%r189 = zext i64 %r188 to i96
+%r191 = getelementptr i32, i32* %r3, i32 2
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i96
+%r194 = shl i96 %r193, 64
+%r195 = or i96 %r189, %r194
+%r196 = zext i96 %r195 to i128
+%r198 = getelementptr i32, i32* %r3, i32 3
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i128
+%r201 = shl i128 %r200, 96
+%r202 = or i128 %r196, %r201
+%r203 = zext i128 %r202 to i160
+%r205 = getelementptr i32, i32* %r3, i32 4
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i160
+%r208 = shl i160 %r207, 128
+%r209 = or i160 %r203, %r208
+%r210 = zext i160 %r209 to i192
+%r212 = getelementptr i32, i32* %r3, i32 5
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i192
+%r215 = shl i192 %r214, 160
+%r216 = or i192 %r210, %r215
+%r217 = zext i192 %r216 to i224
+%r219 = getelementptr i32, i32* %r3, i32 6
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i224
+%r222 = shl i224 %r221, 192
+%r223 = or i224 %r217, %r222
+%r224 = zext i224 %r223 to i256
+%r226 = getelementptr i32, i32* %r3, i32 7
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i256
+%r229 = shl i256 %r228, 224
+%r230 = or i256 %r224, %r229
+%r231 = zext i256 %r230 to i288
+%r233 = getelementptr i32, i32* %r3, i32 8
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i288
+%r236 = shl i288 %r235, 256
+%r237 = or i288 %r231, %r236
+%r238 = zext i288 %r237 to i320
+%r240 = getelementptr i32, i32* %r3, i32 9
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i320
+%r243 = shl i320 %r242, 288
+%r244 = or i320 %r238, %r243
+%r245 = zext i320 %r244 to i352
+%r247 = getelementptr i32, i32* %r3, i32 10
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i352
+%r250 = shl i352 %r249, 320
+%r251 = or i352 %r245, %r250
+%r252 = zext i352 %r251 to i384
+%r254 = getelementptr i32, i32* %r3, i32 11
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i384
+%r257 = shl i384 %r256, 352
+%r258 = or i384 %r252, %r257
+%r259 = zext i384 %r258 to i416
+%r261 = getelementptr i32, i32* %r3, i32 12
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i416
+%r264 = shl i416 %r263, 384
+%r265 = or i416 %r259, %r264
+%r266 = zext i416 %r265 to i448
+%r268 = getelementptr i32, i32* %r3, i32 13
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i448
+%r271 = shl i448 %r270, 416
+%r272 = or i448 %r266, %r271
+%r273 = zext i448 %r272 to i480
+%r275 = getelementptr i32, i32* %r3, i32 14
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i480
+%r278 = shl i480 %r277, 448
+%r279 = or i480 %r273, %r278
+%r280 = zext i480 %r279 to i512
+%r282 = getelementptr i32, i32* %r3, i32 15
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i512
+%r285 = shl i512 %r284, 480
+%r286 = or i512 %r280, %r285
+%r287 = zext i512 %r286 to i544
+%r289 = getelementptr i32, i32* %r3, i32 16
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i544
+%r292 = shl i544 %r291, 512
+%r293 = or i544 %r287, %r292
+%r294 = zext i544 %r293 to i576
+%r296 = getelementptr i32, i32* %r3, i32 17
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i576
+%r299 = shl i576 %r298, 544
+%r300 = or i576 %r294, %r299
+%r301 = zext i576 %r300 to i608
+%r303 = getelementptr i32, i32* %r3, i32 18
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i608
+%r306 = shl i608 %r305, 576
+%r307 = or i608 %r301, %r306
+%r308 = zext i608 %r307 to i640
+%r310 = getelementptr i32, i32* %r3, i32 19
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i640
+%r313 = shl i640 %r312, 608
+%r314 = or i640 %r308, %r313
+%r315 = zext i640 %r314 to i672
+%r317 = getelementptr i32, i32* %r3, i32 20
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i672
+%r320 = shl i672 %r319, 640
+%r321 = or i672 %r315, %r320
+%r322 = zext i672 %r321 to i704
+%r324 = getelementptr i32, i32* %r3, i32 21
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i704
+%r327 = shl i704 %r326, 672
+%r328 = or i704 %r322, %r327
+%r329 = zext i704 %r328 to i736
+%r331 = getelementptr i32, i32* %r3, i32 22
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i736
+%r334 = shl i736 %r333, 704
+%r335 = or i736 %r329, %r334
+%r336 = zext i736 %r335 to i768
+%r338 = getelementptr i32, i32* %r3, i32 23
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i768
+%r341 = shl i768 %r340, 736
+%r342 = or i768 %r336, %r341
+%r343 = zext i768 %r342 to i800
+%r345 = getelementptr i32, i32* %r3, i32 24
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i800
+%r348 = shl i800 %r347, 768
+%r349 = or i800 %r343, %r348
+%r350 = zext i800 %r349 to i832
+%r352 = getelementptr i32, i32* %r3, i32 25
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i832
+%r355 = shl i832 %r354, 800
+%r356 = or i832 %r350, %r355
+%r357 = zext i832 %r180 to i864
+%r358 = zext i832 %r356 to i864
+%r359 = sub i864 %r357, %r358
+%r360 = trunc i864 %r359 to i416
+%r361 = trunc i416 %r360 to i32
+%r363 = getelementptr i32, i32* %r1, i32 0
+store i32 %r361, i32* %r363
+%r364 = lshr i416 %r360, 32
+%r365 = trunc i416 %r364 to i32
+%r367 = getelementptr i32, i32* %r1, i32 1
+store i32 %r365, i32* %r367
+%r368 = lshr i416 %r364, 32
+%r369 = trunc i416 %r368 to i32
+%r371 = getelementptr i32, i32* %r1, i32 2
+store i32 %r369, i32* %r371
+%r372 = lshr i416 %r368, 32
+%r373 = trunc i416 %r372 to i32
+%r375 = getelementptr i32, i32* %r1, i32 3
+store i32 %r373, i32* %r375
+%r376 = lshr i416 %r372, 32
+%r377 = trunc i416 %r376 to i32
+%r379 = getelementptr i32, i32* %r1, i32 4
+store i32 %r377, i32* %r379
+%r380 = lshr i416 %r376, 32
+%r381 = trunc i416 %r380 to i32
+%r383 = getelementptr i32, i32* %r1, i32 5
+store i32 %r381, i32* %r383
+%r384 = lshr i416 %r380, 32
+%r385 = trunc i416 %r384 to i32
+%r387 = getelementptr i32, i32* %r1, i32 6
+store i32 %r385, i32* %r387
+%r388 = lshr i416 %r384, 32
+%r389 = trunc i416 %r388 to i32
+%r391 = getelementptr i32, i32* %r1, i32 7
+store i32 %r389, i32* %r391
+%r392 = lshr i416 %r388, 32
+%r393 = trunc i416 %r392 to i32
+%r395 = getelementptr i32, i32* %r1, i32 8
+store i32 %r393, i32* %r395
+%r396 = lshr i416 %r392, 32
+%r397 = trunc i416 %r396 to i32
+%r399 = getelementptr i32, i32* %r1, i32 9
+store i32 %r397, i32* %r399
+%r400 = lshr i416 %r396, 32
+%r401 = trunc i416 %r400 to i32
+%r403 = getelementptr i32, i32* %r1, i32 10
+store i32 %r401, i32* %r403
+%r404 = lshr i416 %r400, 32
+%r405 = trunc i416 %r404 to i32
+%r407 = getelementptr i32, i32* %r1, i32 11
+store i32 %r405, i32* %r407
+%r408 = lshr i416 %r404, 32
+%r409 = trunc i416 %r408 to i32
+%r411 = getelementptr i32, i32* %r1, i32 12
+store i32 %r409, i32* %r411
+%r412 = lshr i864 %r359, 416
+%r413 = trunc i864 %r412 to i416
+%r414 = lshr i864 %r359, 832
+%r415 = trunc i864 %r414 to i1
+%r416 = load i32, i32* %r4
+%r417 = zext i32 %r416 to i64
+%r419 = getelementptr i32, i32* %r4, i32 1
+%r420 = load i32, i32* %r419
+%r421 = zext i32 %r420 to i64
+%r422 = shl i64 %r421, 32
+%r423 = or i64 %r417, %r422
+%r424 = zext i64 %r423 to i96
+%r426 = getelementptr i32, i32* %r4, i32 2
+%r427 = load i32, i32* %r426
+%r428 = zext i32 %r427 to i96
+%r429 = shl i96 %r428, 64
+%r430 = or i96 %r424, %r429
+%r431 = zext i96 %r430 to i128
+%r433 = getelementptr i32, i32* %r4, i32 3
+%r434 = load i32, i32* %r433
+%r435 = zext i32 %r434 to i128
+%r436 = shl i128 %r435, 96
+%r437 = or i128 %r431, %r436
+%r438 = zext i128 %r437 to i160
+%r440 = getelementptr i32, i32* %r4, i32 4
+%r441 = load i32, i32* %r440
+%r442 = zext i32 %r441 to i160
+%r443 = shl i160 %r442, 128
+%r444 = or i160 %r438, %r443
+%r445 = zext i160 %r444 to i192
+%r447 = getelementptr i32, i32* %r4, i32 5
+%r448 = load i32, i32* %r447
+%r449 = zext i32 %r448 to i192
+%r450 = shl i192 %r449, 160
+%r451 = or i192 %r445, %r450
+%r452 = zext i192 %r451 to i224
+%r454 = getelementptr i32, i32* %r4, i32 6
+%r455 = load i32, i32* %r454
+%r456 = zext i32 %r455 to i224
+%r457 = shl i224 %r456, 192
+%r458 = or i224 %r452, %r457
+%r459 = zext i224 %r458 to i256
+%r461 = getelementptr i32, i32* %r4, i32 7
+%r462 = load i32, i32* %r461
+%r463 = zext i32 %r462 to i256
+%r464 = shl i256 %r463, 224
+%r465 = or i256 %r459, %r464
+%r466 = zext i256 %r465 to i288
+%r468 = getelementptr i32, i32* %r4, i32 8
+%r469 = load i32, i32* %r468
+%r470 = zext i32 %r469 to i288
+%r471 = shl i288 %r470, 256
+%r472 = or i288 %r466, %r471
+%r473 = zext i288 %r472 to i320
+%r475 = getelementptr i32, i32* %r4, i32 9
+%r476 = load i32, i32* %r475
+%r477 = zext i32 %r476 to i320
+%r478 = shl i320 %r477, 288
+%r479 = or i320 %r473, %r478
+%r480 = zext i320 %r479 to i352
+%r482 = getelementptr i32, i32* %r4, i32 10
+%r483 = load i32, i32* %r482
+%r484 = zext i32 %r483 to i352
+%r485 = shl i352 %r484, 320
+%r486 = or i352 %r480, %r485
+%r487 = zext i352 %r486 to i384
+%r489 = getelementptr i32, i32* %r4, i32 11
+%r490 = load i32, i32* %r489
+%r491 = zext i32 %r490 to i384
+%r492 = shl i384 %r491, 352
+%r493 = or i384 %r487, %r492
+%r494 = zext i384 %r493 to i416
+%r496 = getelementptr i32, i32* %r4, i32 12
+%r497 = load i32, i32* %r496
+%r498 = zext i32 %r497 to i416
+%r499 = shl i416 %r498, 384
+%r500 = or i416 %r494, %r499
+%r502 = select i1 %r415, i416 %r500, i416 0
+%r503 = add i416 %r413, %r502
+%r505 = getelementptr i32, i32* %r1, i32 13
+%r506 = trunc i416 %r503 to i32
+%r508 = getelementptr i32, i32* %r505, i32 0
+store i32 %r506, i32* %r508
+%r509 = lshr i416 %r503, 32
+%r510 = trunc i416 %r509 to i32
+%r512 = getelementptr i32, i32* %r505, i32 1
+store i32 %r510, i32* %r512
+%r513 = lshr i416 %r509, 32
+%r514 = trunc i416 %r513 to i32
+%r516 = getelementptr i32, i32* %r505, i32 2
+store i32 %r514, i32* %r516
+%r517 = lshr i416 %r513, 32
+%r518 = trunc i416 %r517 to i32
+%r520 = getelementptr i32, i32* %r505, i32 3
+store i32 %r518, i32* %r520
+%r521 = lshr i416 %r517, 32
+%r522 = trunc i416 %r521 to i32
+%r524 = getelementptr i32, i32* %r505, i32 4
+store i32 %r522, i32* %r524
+%r525 = lshr i416 %r521, 32
+%r526 = trunc i416 %r525 to i32
+%r528 = getelementptr i32, i32* %r505, i32 5
+store i32 %r526, i32* %r528
+%r529 = lshr i416 %r525, 32
+%r530 = trunc i416 %r529 to i32
+%r532 = getelementptr i32, i32* %r505, i32 6
+store i32 %r530, i32* %r532
+%r533 = lshr i416 %r529, 32
+%r534 = trunc i416 %r533 to i32
+%r536 = getelementptr i32, i32* %r505, i32 7
+store i32 %r534, i32* %r536
+%r537 = lshr i416 %r533, 32
+%r538 = trunc i416 %r537 to i32
+%r540 = getelementptr i32, i32* %r505, i32 8
+store i32 %r538, i32* %r540
+%r541 = lshr i416 %r537, 32
+%r542 = trunc i416 %r541 to i32
+%r544 = getelementptr i32, i32* %r505, i32 9
+store i32 %r542, i32* %r544
+%r545 = lshr i416 %r541, 32
+%r546 = trunc i416 %r545 to i32
+%r548 = getelementptr i32, i32* %r505, i32 10
+store i32 %r546, i32* %r548
+%r549 = lshr i416 %r545, 32
+%r550 = trunc i416 %r549 to i32
+%r552 = getelementptr i32, i32* %r505, i32 11
+store i32 %r550, i32* %r552
+%r553 = lshr i416 %r549, 32
+%r554 = trunc i416 %r553 to i32
+%r556 = getelementptr i32, i32* %r505, i32 12
+store i32 %r554, i32* %r556
+ret void
+}
+define i480 @mulPv448x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
+%r42 = trunc i64 %r41 to i32
+%r43 = call i32 @extractHigh32(i64 %r41)
+%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
+%r46 = trunc i64 %r45 to i32
+%r47 = call i32 @extractHigh32(i64 %r45)
+%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
+%r50 = trunc i64 %r49 to i32
+%r51 = call i32 @extractHigh32(i64 %r49)
+%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
+%r54 = trunc i64 %r53 to i32
+%r55 = call i32 @extractHigh32(i64 %r53)
+%r57 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 13)
+%r58 = trunc i64 %r57 to i32
+%r59 = call i32 @extractHigh32(i64 %r57)
+%r60 = zext i32 %r6 to i64
+%r61 = zext i32 %r10 to i64
+%r62 = shl i64 %r61, 32
+%r63 = or i64 %r60, %r62
+%r64 = zext i64 %r63 to i96
+%r65 = zext i32 %r14 to i96
+%r66 = shl i96 %r65, 64
+%r67 = or i96 %r64, %r66
+%r68 = zext i96 %r67 to i128
+%r69 = zext i32 %r18 to i128
+%r70 = shl i128 %r69, 96
+%r71 = or i128 %r68, %r70
+%r72 = zext i128 %r71 to i160
+%r73 = zext i32 %r22 to i160
+%r74 = shl i160 %r73, 128
+%r75 = or i160 %r72, %r74
+%r76 = zext i160 %r75 to i192
+%r77 = zext i32 %r26 to i192
+%r78 = shl i192 %r77, 160
+%r79 = or i192 %r76, %r78
+%r80 = zext i192 %r79 to i224
+%r81 = zext i32 %r30 to i224
+%r82 = shl i224 %r81, 192
+%r83 = or i224 %r80, %r82
+%r84 = zext i224 %r83 to i256
+%r85 = zext i32 %r34 to i256
+%r86 = shl i256 %r85, 224
+%r87 = or i256 %r84, %r86
+%r88 = zext i256 %r87 to i288
+%r89 = zext i32 %r38 to i288
+%r90 = shl i288 %r89, 256
+%r91 = or i288 %r88, %r90
+%r92 = zext i288 %r91 to i320
+%r93 = zext i32 %r42 to i320
+%r94 = shl i320 %r93, 288
+%r95 = or i320 %r92, %r94
+%r96 = zext i320 %r95 to i352
+%r97 = zext i32 %r46 to i352
+%r98 = shl i352 %r97, 320
+%r99 = or i352 %r96, %r98
+%r100 = zext i352 %r99 to i384
+%r101 = zext i32 %r50 to i384
+%r102 = shl i384 %r101, 352
+%r103 = or i384 %r100, %r102
+%r104 = zext i384 %r103 to i416
+%r105 = zext i32 %r54 to i416
+%r106 = shl i416 %r105, 384
+%r107 = or i416 %r104, %r106
+%r108 = zext i416 %r107 to i448
+%r109 = zext i32 %r58 to i448
+%r110 = shl i448 %r109, 416
+%r111 = or i448 %r108, %r110
+%r112 = zext i32 %r7 to i64
+%r113 = zext i32 %r11 to i64
+%r114 = shl i64 %r113, 32
+%r115 = or i64 %r112, %r114
+%r116 = zext i64 %r115 to i96
+%r117 = zext i32 %r15 to i96
+%r118 = shl i96 %r117, 64
+%r119 = or i96 %r116, %r118
+%r120 = zext i96 %r119 to i128
+%r121 = zext i32 %r19 to i128
+%r122 = shl i128 %r121, 96
+%r123 = or i128 %r120, %r122
+%r124 = zext i128 %r123 to i160
+%r125 = zext i32 %r23 to i160
+%r126 = shl i160 %r125, 128
+%r127 = or i160 %r124, %r126
+%r128 = zext i160 %r127 to i192
+%r129 = zext i32 %r27 to i192
+%r130 = shl i192 %r129, 160
+%r131 = or i192 %r128, %r130
+%r132 = zext i192 %r131 to i224
+%r133 = zext i32 %r31 to i224
+%r134 = shl i224 %r133, 192
+%r135 = or i224 %r132, %r134
+%r136 = zext i224 %r135 to i256
+%r137 = zext i32 %r35 to i256
+%r138 = shl i256 %r137, 224
+%r139 = or i256 %r136, %r138
+%r140 = zext i256 %r139 to i288
+%r141 = zext i32 %r39 to i288
+%r142 = shl i288 %r141, 256
+%r143 = or i288 %r140, %r142
+%r144 = zext i288 %r143 to i320
+%r145 = zext i32 %r43 to i320
+%r146 = shl i320 %r145, 288
+%r147 = or i320 %r144, %r146
+%r148 = zext i320 %r147 to i352
+%r149 = zext i32 %r47 to i352
+%r150 = shl i352 %r149, 320
+%r151 = or i352 %r148, %r150
+%r152 = zext i352 %r151 to i384
+%r153 = zext i32 %r51 to i384
+%r154 = shl i384 %r153, 352
+%r155 = or i384 %r152, %r154
+%r156 = zext i384 %r155 to i416
+%r157 = zext i32 %r55 to i416
+%r158 = shl i416 %r157, 384
+%r159 = or i416 %r156, %r158
+%r160 = zext i416 %r159 to i448
+%r161 = zext i32 %r59 to i448
+%r162 = shl i448 %r161, 416
+%r163 = or i448 %r160, %r162
+%r164 = zext i448 %r111 to i480
+%r165 = zext i448 %r163 to i480
+%r166 = shl i480 %r165, 32
+%r167 = add i480 %r164, %r166
+ret i480 %r167
+}
+define void @mcl_fp_mulUnitPre14L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i480 @mulPv448x32(i32* %r2, i32 %r3)
+%r5 = trunc i480 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i480 %r4, 32
+%r9 = trunc i480 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i480 %r8, 32
+%r13 = trunc i480 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i480 %r12, 32
+%r17 = trunc i480 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i480 %r16, 32
+%r21 = trunc i480 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i480 %r20, 32
+%r25 = trunc i480 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i480 %r24, 32
+%r29 = trunc i480 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i480 %r28, 32
+%r33 = trunc i480 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+%r36 = lshr i480 %r32, 32
+%r37 = trunc i480 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 8
+store i32 %r37, i32* %r39
+%r40 = lshr i480 %r36, 32
+%r41 = trunc i480 %r40 to i32
+%r43 = getelementptr i32, i32* %r1, i32 9
+store i32 %r41, i32* %r43
+%r44 = lshr i480 %r40, 32
+%r45 = trunc i480 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 10
+store i32 %r45, i32* %r47
+%r48 = lshr i480 %r44, 32
+%r49 = trunc i480 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 11
+store i32 %r49, i32* %r51
+%r52 = lshr i480 %r48, 32
+%r53 = trunc i480 %r52 to i32
+%r55 = getelementptr i32, i32* %r1, i32 12
+store i32 %r53, i32* %r55
+%r56 = lshr i480 %r52, 32
+%r57 = trunc i480 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 13
+store i32 %r57, i32* %r59
+%r60 = lshr i480 %r56, 32
+%r61 = trunc i480 %r60 to i32
+%r63 = getelementptr i32, i32* %r1, i32 14
+store i32 %r61, i32* %r63
+ret void
+}
+define void @mcl_fpDbl_mulPre14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r2, i32 7
+%r7 = getelementptr i32, i32* %r3, i32 7
+%r9 = getelementptr i32, i32* %r1, i32 14
+call void @mcl_fpDbl_mulPre7L(i32* %r1, i32* %r2, i32* %r3)
+call void @mcl_fpDbl_mulPre7L(i32* %r9, i32* %r5, i32* %r7)
+%r10 = load i32, i32* %r5
+%r11 = zext i32 %r10 to i64
+%r13 = getelementptr i32, i32* %r5, i32 1
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i64
+%r16 = shl i64 %r15, 32
+%r17 = or i64 %r11, %r16
+%r18 = zext i64 %r17 to i96
+%r20 = getelementptr i32, i32* %r5, i32 2
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i96
+%r23 = shl i96 %r22, 64
+%r24 = or i96 %r18, %r23
+%r25 = zext i96 %r24 to i128
+%r27 = getelementptr i32, i32* %r5, i32 3
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i128
+%r30 = shl i128 %r29, 96
+%r31 = or i128 %r25, %r30
+%r32 = zext i128 %r31 to i160
+%r34 = getelementptr i32, i32* %r5, i32 4
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i160
+%r37 = shl i160 %r36, 128
+%r38 = or i160 %r32, %r37
+%r39 = zext i160 %r38 to i192
+%r41 = getelementptr i32, i32* %r5, i32 5
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i192
+%r44 = shl i192 %r43, 160
+%r45 = or i192 %r39, %r44
+%r46 = zext i192 %r45 to i224
+%r48 = getelementptr i32, i32* %r5, i32 6
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i224
+%r51 = shl i224 %r50, 192
+%r52 = or i224 %r46, %r51
+%r53 = zext i224 %r52 to i256
+%r54 = load i32, i32* %r2
+%r55 = zext i32 %r54 to i64
+%r57 = getelementptr i32, i32* %r2, i32 1
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i64
+%r60 = shl i64 %r59, 32
+%r61 = or i64 %r55, %r60
+%r62 = zext i64 %r61 to i96
+%r64 = getelementptr i32, i32* %r2, i32 2
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i96
+%r67 = shl i96 %r66, 64
+%r68 = or i96 %r62, %r67
+%r69 = zext i96 %r68 to i128
+%r71 = getelementptr i32, i32* %r2, i32 3
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i128
+%r74 = shl i128 %r73, 96
+%r75 = or i128 %r69, %r74
+%r76 = zext i128 %r75 to i160
+%r78 = getelementptr i32, i32* %r2, i32 4
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i160
+%r81 = shl i160 %r80, 128
+%r82 = or i160 %r76, %r81
+%r83 = zext i160 %r82 to i192
+%r85 = getelementptr i32, i32* %r2, i32 5
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i192
+%r88 = shl i192 %r87, 160
+%r89 = or i192 %r83, %r88
+%r90 = zext i192 %r89 to i224
+%r92 = getelementptr i32, i32* %r2, i32 6
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i224
+%r95 = shl i224 %r94, 192
+%r96 = or i224 %r90, %r95
+%r97 = zext i224 %r96 to i256
+%r98 = load i32, i32* %r7
+%r99 = zext i32 %r98 to i64
+%r101 = getelementptr i32, i32* %r7, i32 1
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i64
+%r104 = shl i64 %r103, 32
+%r105 = or i64 %r99, %r104
+%r106 = zext i64 %r105 to i96
+%r108 = getelementptr i32, i32* %r7, i32 2
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i96
+%r111 = shl i96 %r110, 64
+%r112 = or i96 %r106, %r111
+%r113 = zext i96 %r112 to i128
+%r115 = getelementptr i32, i32* %r7, i32 3
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i128
+%r118 = shl i128 %r117, 96
+%r119 = or i128 %r113, %r118
+%r120 = zext i128 %r119 to i160
+%r122 = getelementptr i32, i32* %r7, i32 4
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i160
+%r125 = shl i160 %r124, 128
+%r126 = or i160 %r120, %r125
+%r127 = zext i160 %r126 to i192
+%r129 = getelementptr i32, i32* %r7, i32 5
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i192
+%r132 = shl i192 %r131, 160
+%r133 = or i192 %r127, %r132
+%r134 = zext i192 %r133 to i224
+%r136 = getelementptr i32, i32* %r7, i32 6
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i224
+%r139 = shl i224 %r138, 192
+%r140 = or i224 %r134, %r139
+%r141 = zext i224 %r140 to i256
+%r142 = load i32, i32* %r3
+%r143 = zext i32 %r142 to i64
+%r145 = getelementptr i32, i32* %r3, i32 1
+%r146 = load i32, i32* %r145
+%r147 = zext i32 %r146 to i64
+%r148 = shl i64 %r147, 32
+%r149 = or i64 %r143, %r148
+%r150 = zext i64 %r149 to i96
+%r152 = getelementptr i32, i32* %r3, i32 2
+%r153 = load i32, i32* %r152
+%r154 = zext i32 %r153 to i96
+%r155 = shl i96 %r154, 64
+%r156 = or i96 %r150, %r155
+%r157 = zext i96 %r156 to i128
+%r159 = getelementptr i32, i32* %r3, i32 3
+%r160 = load i32, i32* %r159
+%r161 = zext i32 %r160 to i128
+%r162 = shl i128 %r161, 96
+%r163 = or i128 %r157, %r162
+%r164 = zext i128 %r163 to i160
+%r166 = getelementptr i32, i32* %r3, i32 4
+%r167 = load i32, i32* %r166
+%r168 = zext i32 %r167 to i160
+%r169 = shl i160 %r168, 128
+%r170 = or i160 %r164, %r169
+%r171 = zext i160 %r170 to i192
+%r173 = getelementptr i32, i32* %r3, i32 5
+%r174 = load i32, i32* %r173
+%r175 = zext i32 %r174 to i192
+%r176 = shl i192 %r175, 160
+%r177 = or i192 %r171, %r176
+%r178 = zext i192 %r177 to i224
+%r180 = getelementptr i32, i32* %r3, i32 6
+%r181 = load i32, i32* %r180
+%r182 = zext i32 %r181 to i224
+%r183 = shl i224 %r182, 192
+%r184 = or i224 %r178, %r183
+%r185 = zext i224 %r184 to i256
+%r186 = add i256 %r53, %r97
+%r187 = add i256 %r141, %r185
+%r189 = alloca i32, i32 14
+%r190 = trunc i256 %r186 to i224
+%r191 = trunc i256 %r187 to i224
+%r192 = lshr i256 %r186, 224
+%r193 = trunc i256 %r192 to i1
+%r194 = lshr i256 %r187, 224
+%r195 = trunc i256 %r194 to i1
+%r196 = and i1 %r193, %r195
+%r198 = select i1 %r193, i224 %r191, i224 0
+%r200 = select i1 %r195, i224 %r190, i224 0
+%r202 = alloca i32, i32 7
+%r204 = alloca i32, i32 7
+%r205 = trunc i224 %r190 to i32
+%r207 = getelementptr i32, i32* %r202, i32 0
+store i32 %r205, i32* %r207
+%r208 = lshr i224 %r190, 32
+%r209 = trunc i224 %r208 to i32
+%r211 = getelementptr i32, i32* %r202, i32 1
+store i32 %r209, i32* %r211
+%r212 = lshr i224 %r208, 32
+%r213 = trunc i224 %r212 to i32
+%r215 = getelementptr i32, i32* %r202, i32 2
+store i32 %r213, i32* %r215
+%r216 = lshr i224 %r212, 32
+%r217 = trunc i224 %r216 to i32
+%r219 = getelementptr i32, i32* %r202, i32 3
+store i32 %r217, i32* %r219
+%r220 = lshr i224 %r216, 32
+%r221 = trunc i224 %r220 to i32
+%r223 = getelementptr i32, i32* %r202, i32 4
+store i32 %r221, i32* %r223
+%r224 = lshr i224 %r220, 32
+%r225 = trunc i224 %r224 to i32
+%r227 = getelementptr i32, i32* %r202, i32 5
+store i32 %r225, i32* %r227
+%r228 = lshr i224 %r224, 32
+%r229 = trunc i224 %r228 to i32
+%r231 = getelementptr i32, i32* %r202, i32 6
+store i32 %r229, i32* %r231
+%r232 = trunc i224 %r191 to i32
+%r234 = getelementptr i32, i32* %r204, i32 0
+store i32 %r232, i32* %r234
+%r235 = lshr i224 %r191, 32
+%r236 = trunc i224 %r235 to i32
+%r238 = getelementptr i32, i32* %r204, i32 1
+store i32 %r236, i32* %r238
+%r239 = lshr i224 %r235, 32
+%r240 = trunc i224 %r239 to i32
+%r242 = getelementptr i32, i32* %r204, i32 2
+store i32 %r240, i32* %r242
+%r243 = lshr i224 %r239, 32
+%r244 = trunc i224 %r243 to i32
+%r246 = getelementptr i32, i32* %r204, i32 3
+store i32 %r244, i32* %r246
+%r247 = lshr i224 %r243, 32
+%r248 = trunc i224 %r247 to i32
+%r250 = getelementptr i32, i32* %r204, i32 4
+store i32 %r248, i32* %r250
+%r251 = lshr i224 %r247, 32
+%r252 = trunc i224 %r251 to i32
+%r254 = getelementptr i32, i32* %r204, i32 5
+store i32 %r252, i32* %r254
+%r255 = lshr i224 %r251, 32
+%r256 = trunc i224 %r255 to i32
+%r258 = getelementptr i32, i32* %r204, i32 6
+store i32 %r256, i32* %r258
+call void @mcl_fpDbl_mulPre7L(i32* %r189, i32* %r202, i32* %r204)
+%r259 = load i32, i32* %r189
+%r260 = zext i32 %r259 to i64
+%r262 = getelementptr i32, i32* %r189, i32 1
+%r263 = load i32, i32* %r262
+%r264 = zext i32 %r263 to i64
+%r265 = shl i64 %r264, 32
+%r266 = or i64 %r260, %r265
+%r267 = zext i64 %r266 to i96
+%r269 = getelementptr i32, i32* %r189, i32 2
+%r270 = load i32, i32* %r269
+%r271 = zext i32 %r270 to i96
+%r272 = shl i96 %r271, 64
+%r273 = or i96 %r267, %r272
+%r274 = zext i96 %r273 to i128
+%r276 = getelementptr i32, i32* %r189, i32 3
+%r277 = load i32, i32* %r276
+%r278 = zext i32 %r277 to i128
+%r279 = shl i128 %r278, 96
+%r280 = or i128 %r274, %r279
+%r281 = zext i128 %r280 to i160
+%r283 = getelementptr i32, i32* %r189, i32 4
+%r284 = load i32, i32* %r283
+%r285 = zext i32 %r284 to i160
+%r286 = shl i160 %r285, 128
+%r287 = or i160 %r281, %r286
+%r288 = zext i160 %r287 to i192
+%r290 = getelementptr i32, i32* %r189, i32 5
+%r291 = load i32, i32* %r290
+%r292 = zext i32 %r291 to i192
+%r293 = shl i192 %r292, 160
+%r294 = or i192 %r288, %r293
+%r295 = zext i192 %r294 to i224
+%r297 = getelementptr i32, i32* %r189, i32 6
+%r298 = load i32, i32* %r297
+%r299 = zext i32 %r298 to i224
+%r300 = shl i224 %r299, 192
+%r301 = or i224 %r295, %r300
+%r302 = zext i224 %r301 to i256
+%r304 = getelementptr i32, i32* %r189, i32 7
+%r305 = load i32, i32* %r304
+%r306 = zext i32 %r305 to i256
+%r307 = shl i256 %r306, 224
+%r308 = or i256 %r302, %r307
+%r309 = zext i256 %r308 to i288
+%r311 = getelementptr i32, i32* %r189, i32 8
+%r312 = load i32, i32* %r311
+%r313 = zext i32 %r312 to i288
+%r314 = shl i288 %r313, 256
+%r315 = or i288 %r309, %r314
+%r316 = zext i288 %r315 to i320
+%r318 = getelementptr i32, i32* %r189, i32 9
+%r319 = load i32, i32* %r318
+%r320 = zext i32 %r319 to i320
+%r321 = shl i320 %r320, 288
+%r322 = or i320 %r316, %r321
+%r323 = zext i320 %r322 to i352
+%r325 = getelementptr i32, i32* %r189, i32 10
+%r326 = load i32, i32* %r325
+%r327 = zext i32 %r326 to i352
+%r328 = shl i352 %r327, 320
+%r329 = or i352 %r323, %r328
+%r330 = zext i352 %r329 to i384
+%r332 = getelementptr i32, i32* %r189, i32 11
+%r333 = load i32, i32* %r332
+%r334 = zext i32 %r333 to i384
+%r335 = shl i384 %r334, 352
+%r336 = or i384 %r330, %r335
+%r337 = zext i384 %r336 to i416
+%r339 = getelementptr i32, i32* %r189, i32 12
+%r340 = load i32, i32* %r339
+%r341 = zext i32 %r340 to i416
+%r342 = shl i416 %r341, 384
+%r343 = or i416 %r337, %r342
+%r344 = zext i416 %r343 to i448
+%r346 = getelementptr i32, i32* %r189, i32 13
+%r347 = load i32, i32* %r346
+%r348 = zext i32 %r347 to i448
+%r349 = shl i448 %r348, 416
+%r350 = or i448 %r344, %r349
+%r351 = zext i448 %r350 to i480
+%r352 = zext i1 %r196 to i480
+%r353 = shl i480 %r352, 448
+%r354 = or i480 %r351, %r353
+%r355 = zext i224 %r198 to i480
+%r356 = zext i224 %r200 to i480
+%r357 = shl i480 %r355, 224
+%r358 = shl i480 %r356, 224
+%r359 = add i480 %r354, %r357
+%r360 = add i480 %r359, %r358
+%r361 = load i32, i32* %r1
+%r362 = zext i32 %r361 to i64
+%r364 = getelementptr i32, i32* %r1, i32 1
+%r365 = load i32, i32* %r364
+%r366 = zext i32 %r365 to i64
+%r367 = shl i64 %r366, 32
+%r368 = or i64 %r362, %r367
+%r369 = zext i64 %r368 to i96
+%r371 = getelementptr i32, i32* %r1, i32 2
+%r372 = load i32, i32* %r371
+%r373 = zext i32 %r372 to i96
+%r374 = shl i96 %r373, 64
+%r375 = or i96 %r369, %r374
+%r376 = zext i96 %r375 to i128
+%r378 = getelementptr i32, i32* %r1, i32 3
+%r379 = load i32, i32* %r378
+%r380 = zext i32 %r379 to i128
+%r381 = shl i128 %r380, 96
+%r382 = or i128 %r376, %r381
+%r383 = zext i128 %r382 to i160
+%r385 = getelementptr i32, i32* %r1, i32 4
+%r386 = load i32, i32* %r385
+%r387 = zext i32 %r386 to i160
+%r388 = shl i160 %r387, 128
+%r389 = or i160 %r383, %r388
+%r390 = zext i160 %r389 to i192
+%r392 = getelementptr i32, i32* %r1, i32 5
+%r393 = load i32, i32* %r392
+%r394 = zext i32 %r393 to i192
+%r395 = shl i192 %r394, 160
+%r396 = or i192 %r390, %r395
+%r397 = zext i192 %r396 to i224
+%r399 = getelementptr i32, i32* %r1, i32 6
+%r400 = load i32, i32* %r399
+%r401 = zext i32 %r400 to i224
+%r402 = shl i224 %r401, 192
+%r403 = or i224 %r397, %r402
+%r404 = zext i224 %r403 to i256
+%r406 = getelementptr i32, i32* %r1, i32 7
+%r407 = load i32, i32* %r406
+%r408 = zext i32 %r407 to i256
+%r409 = shl i256 %r408, 224
+%r410 = or i256 %r404, %r409
+%r411 = zext i256 %r410 to i288
+%r413 = getelementptr i32, i32* %r1, i32 8
+%r414 = load i32, i32* %r413
+%r415 = zext i32 %r414 to i288
+%r416 = shl i288 %r415, 256
+%r417 = or i288 %r411, %r416
+%r418 = zext i288 %r417 to i320
+%r420 = getelementptr i32, i32* %r1, i32 9
+%r421 = load i32, i32* %r420
+%r422 = zext i32 %r421 to i320
+%r423 = shl i320 %r422, 288
+%r424 = or i320 %r418, %r423
+%r425 = zext i320 %r424 to i352
+%r427 = getelementptr i32, i32* %r1, i32 10
+%r428 = load i32, i32* %r427
+%r429 = zext i32 %r428 to i352
+%r430 = shl i352 %r429, 320
+%r431 = or i352 %r425, %r430
+%r432 = zext i352 %r431 to i384
+%r434 = getelementptr i32, i32* %r1, i32 11
+%r435 = load i32, i32* %r434
+%r436 = zext i32 %r435 to i384
+%r437 = shl i384 %r436, 352
+%r438 = or i384 %r432, %r437
+%r439 = zext i384 %r438 to i416
+%r441 = getelementptr i32, i32* %r1, i32 12
+%r442 = load i32, i32* %r441
+%r443 = zext i32 %r442 to i416
+%r444 = shl i416 %r443, 384
+%r445 = or i416 %r439, %r444
+%r446 = zext i416 %r445 to i448
+%r448 = getelementptr i32, i32* %r1, i32 13
+%r449 = load i32, i32* %r448
+%r450 = zext i32 %r449 to i448
+%r451 = shl i448 %r450, 416
+%r452 = or i448 %r446, %r451
+%r453 = zext i448 %r452 to i480
+%r454 = sub i480 %r360, %r453
+%r456 = getelementptr i32, i32* %r1, i32 14
+%r457 = load i32, i32* %r456
+%r458 = zext i32 %r457 to i64
+%r460 = getelementptr i32, i32* %r456, i32 1
+%r461 = load i32, i32* %r460
+%r462 = zext i32 %r461 to i64
+%r463 = shl i64 %r462, 32
+%r464 = or i64 %r458, %r463
+%r465 = zext i64 %r464 to i96
+%r467 = getelementptr i32, i32* %r456, i32 2
+%r468 = load i32, i32* %r467
+%r469 = zext i32 %r468 to i96
+%r470 = shl i96 %r469, 64
+%r471 = or i96 %r465, %r470
+%r472 = zext i96 %r471 to i128
+%r474 = getelementptr i32, i32* %r456, i32 3
+%r475 = load i32, i32* %r474
+%r476 = zext i32 %r475 to i128
+%r477 = shl i128 %r476, 96
+%r478 = or i128 %r472, %r477
+%r479 = zext i128 %r478 to i160
+%r481 = getelementptr i32, i32* %r456, i32 4
+%r482 = load i32, i32* %r481
+%r483 = zext i32 %r482 to i160
+%r484 = shl i160 %r483, 128
+%r485 = or i160 %r479, %r484
+%r486 = zext i160 %r485 to i192
+%r488 = getelementptr i32, i32* %r456, i32 5
+%r489 = load i32, i32* %r488
+%r490 = zext i32 %r489 to i192
+%r491 = shl i192 %r490, 160
+%r492 = or i192 %r486, %r491
+%r493 = zext i192 %r492 to i224
+%r495 = getelementptr i32, i32* %r456, i32 6
+%r496 = load i32, i32* %r495
+%r497 = zext i32 %r496 to i224
+%r498 = shl i224 %r497, 192
+%r499 = or i224 %r493, %r498
+%r500 = zext i224 %r499 to i256
+%r502 = getelementptr i32, i32* %r456, i32 7
+%r503 = load i32, i32* %r502
+%r504 = zext i32 %r503 to i256
+%r505 = shl i256 %r504, 224
+%r506 = or i256 %r500, %r505
+%r507 = zext i256 %r506 to i288
+%r509 = getelementptr i32, i32* %r456, i32 8
+%r510 = load i32, i32* %r509
+%r511 = zext i32 %r510 to i288
+%r512 = shl i288 %r511, 256
+%r513 = or i288 %r507, %r512
+%r514 = zext i288 %r513 to i320
+%r516 = getelementptr i32, i32* %r456, i32 9
+%r517 = load i32, i32* %r516
+%r518 = zext i32 %r517 to i320
+%r519 = shl i320 %r518, 288
+%r520 = or i320 %r514, %r519
+%r521 = zext i320 %r520 to i352
+%r523 = getelementptr i32, i32* %r456, i32 10
+%r524 = load i32, i32* %r523
+%r525 = zext i32 %r524 to i352
+%r526 = shl i352 %r525, 320
+%r527 = or i352 %r521, %r526
+%r528 = zext i352 %r527 to i384
+%r530 = getelementptr i32, i32* %r456, i32 11
+%r531 = load i32, i32* %r530
+%r532 = zext i32 %r531 to i384
+%r533 = shl i384 %r532, 352
+%r534 = or i384 %r528, %r533
+%r535 = zext i384 %r534 to i416
+%r537 = getelementptr i32, i32* %r456, i32 12
+%r538 = load i32, i32* %r537
+%r539 = zext i32 %r538 to i416
+%r540 = shl i416 %r539, 384
+%r541 = or i416 %r535, %r540
+%r542 = zext i416 %r541 to i448
+%r544 = getelementptr i32, i32* %r456, i32 13
+%r545 = load i32, i32* %r544
+%r546 = zext i32 %r545 to i448
+%r547 = shl i448 %r546, 416
+%r548 = or i448 %r542, %r547
+%r549 = zext i448 %r548 to i480
+%r550 = sub i480 %r454, %r549
+%r551 = zext i480 %r550 to i672
+%r553 = getelementptr i32, i32* %r1, i32 7
+%r554 = load i32, i32* %r553
+%r555 = zext i32 %r554 to i64
+%r557 = getelementptr i32, i32* %r553, i32 1
+%r558 = load i32, i32* %r557
+%r559 = zext i32 %r558 to i64
+%r560 = shl i64 %r559, 32
+%r561 = or i64 %r555, %r560
+%r562 = zext i64 %r561 to i96
+%r564 = getelementptr i32, i32* %r553, i32 2
+%r565 = load i32, i32* %r564
+%r566 = zext i32 %r565 to i96
+%r567 = shl i96 %r566, 64
+%r568 = or i96 %r562, %r567
+%r569 = zext i96 %r568 to i128
+%r571 = getelementptr i32, i32* %r553, i32 3
+%r572 = load i32, i32* %r571
+%r573 = zext i32 %r572 to i128
+%r574 = shl i128 %r573, 96
+%r575 = or i128 %r569, %r574
+%r576 = zext i128 %r575 to i160
+%r578 = getelementptr i32, i32* %r553, i32 4
+%r579 = load i32, i32* %r578
+%r580 = zext i32 %r579 to i160
+%r581 = shl i160 %r580, 128
+%r582 = or i160 %r576, %r581
+%r583 = zext i160 %r582 to i192
+%r585 = getelementptr i32, i32* %r553, i32 5
+%r586 = load i32, i32* %r585
+%r587 = zext i32 %r586 to i192
+%r588 = shl i192 %r587, 160
+%r589 = or i192 %r583, %r588
+%r590 = zext i192 %r589 to i224
+%r592 = getelementptr i32, i32* %r553, i32 6
+%r593 = load i32, i32* %r592
+%r594 = zext i32 %r593 to i224
+%r595 = shl i224 %r594, 192
+%r596 = or i224 %r590, %r595
+%r597 = zext i224 %r596 to i256
+%r599 = getelementptr i32, i32* %r553, i32 7
+%r600 = load i32, i32* %r599
+%r601 = zext i32 %r600 to i256
+%r602 = shl i256 %r601, 224
+%r603 = or i256 %r597, %r602
+%r604 = zext i256 %r603 to i288
+%r606 = getelementptr i32, i32* %r553, i32 8
+%r607 = load i32, i32* %r606
+%r608 = zext i32 %r607 to i288
+%r609 = shl i288 %r608, 256
+%r610 = or i288 %r604, %r609
+%r611 = zext i288 %r610 to i320
+%r613 = getelementptr i32, i32* %r553, i32 9
+%r614 = load i32, i32* %r613
+%r615 = zext i32 %r614 to i320
+%r616 = shl i320 %r615, 288
+%r617 = or i320 %r611, %r616
+%r618 = zext i320 %r617 to i352
+%r620 = getelementptr i32, i32* %r553, i32 10
+%r621 = load i32, i32* %r620
+%r622 = zext i32 %r621 to i352
+%r623 = shl i352 %r622, 320
+%r624 = or i352 %r618, %r623
+%r625 = zext i352 %r624 to i384
+%r627 = getelementptr i32, i32* %r553, i32 11
+%r628 = load i32, i32* %r627
+%r629 = zext i32 %r628 to i384
+%r630 = shl i384 %r629, 352
+%r631 = or i384 %r625, %r630
+%r632 = zext i384 %r631 to i416
+%r634 = getelementptr i32, i32* %r553, i32 12
+%r635 = load i32, i32* %r634
+%r636 = zext i32 %r635 to i416
+%r637 = shl i416 %r636, 384
+%r638 = or i416 %r632, %r637
+%r639 = zext i416 %r638 to i448
+%r641 = getelementptr i32, i32* %r553, i32 13
+%r642 = load i32, i32* %r641
+%r643 = zext i32 %r642 to i448
+%r644 = shl i448 %r643, 416
+%r645 = or i448 %r639, %r644
+%r646 = zext i448 %r645 to i480
+%r648 = getelementptr i32, i32* %r553, i32 14
+%r649 = load i32, i32* %r648
+%r650 = zext i32 %r649 to i480
+%r651 = shl i480 %r650, 448
+%r652 = or i480 %r646, %r651
+%r653 = zext i480 %r652 to i512
+%r655 = getelementptr i32, i32* %r553, i32 15
+%r656 = load i32, i32* %r655
+%r657 = zext i32 %r656 to i512
+%r658 = shl i512 %r657, 480
+%r659 = or i512 %r653, %r658
+%r660 = zext i512 %r659 to i544
+%r662 = getelementptr i32, i32* %r553, i32 16
+%r663 = load i32, i32* %r662
+%r664 = zext i32 %r663 to i544
+%r665 = shl i544 %r664, 512
+%r666 = or i544 %r660, %r665
+%r667 = zext i544 %r666 to i576
+%r669 = getelementptr i32, i32* %r553, i32 17
+%r670 = load i32, i32* %r669
+%r671 = zext i32 %r670 to i576
+%r672 = shl i576 %r671, 544
+%r673 = or i576 %r667, %r672
+%r674 = zext i576 %r673 to i608
+%r676 = getelementptr i32, i32* %r553, i32 18
+%r677 = load i32, i32* %r676
+%r678 = zext i32 %r677 to i608
+%r679 = shl i608 %r678, 576
+%r680 = or i608 %r674, %r679
+%r681 = zext i608 %r680 to i640
+%r683 = getelementptr i32, i32* %r553, i32 19
+%r684 = load i32, i32* %r683
+%r685 = zext i32 %r684 to i640
+%r686 = shl i640 %r685, 608
+%r687 = or i640 %r681, %r686
+%r688 = zext i640 %r687 to i672
+%r690 = getelementptr i32, i32* %r553, i32 20
+%r691 = load i32, i32* %r690
+%r692 = zext i32 %r691 to i672
+%r693 = shl i672 %r692, 640
+%r694 = or i672 %r688, %r693
+%r695 = add i672 %r551, %r694
+%r697 = getelementptr i32, i32* %r1, i32 7
+%r698 = trunc i672 %r695 to i32
+%r700 = getelementptr i32, i32* %r697, i32 0
+store i32 %r698, i32* %r700
+%r701 = lshr i672 %r695, 32
+%r702 = trunc i672 %r701 to i32
+%r704 = getelementptr i32, i32* %r697, i32 1
+store i32 %r702, i32* %r704
+%r705 = lshr i672 %r701, 32
+%r706 = trunc i672 %r705 to i32
+%r708 = getelementptr i32, i32* %r697, i32 2
+store i32 %r706, i32* %r708
+%r709 = lshr i672 %r705, 32
+%r710 = trunc i672 %r709 to i32
+%r712 = getelementptr i32, i32* %r697, i32 3
+store i32 %r710, i32* %r712
+%r713 = lshr i672 %r709, 32
+%r714 = trunc i672 %r713 to i32
+%r716 = getelementptr i32, i32* %r697, i32 4
+store i32 %r714, i32* %r716
+%r717 = lshr i672 %r713, 32
+%r718 = trunc i672 %r717 to i32
+%r720 = getelementptr i32, i32* %r697, i32 5
+store i32 %r718, i32* %r720
+%r721 = lshr i672 %r717, 32
+%r722 = trunc i672 %r721 to i32
+%r724 = getelementptr i32, i32* %r697, i32 6
+store i32 %r722, i32* %r724
+%r725 = lshr i672 %r721, 32
+%r726 = trunc i672 %r725 to i32
+%r728 = getelementptr i32, i32* %r697, i32 7
+store i32 %r726, i32* %r728
+%r729 = lshr i672 %r725, 32
+%r730 = trunc i672 %r729 to i32
+%r732 = getelementptr i32, i32* %r697, i32 8
+store i32 %r730, i32* %r732
+%r733 = lshr i672 %r729, 32
+%r734 = trunc i672 %r733 to i32
+%r736 = getelementptr i32, i32* %r697, i32 9
+store i32 %r734, i32* %r736
+%r737 = lshr i672 %r733, 32
+%r738 = trunc i672 %r737 to i32
+%r740 = getelementptr i32, i32* %r697, i32 10
+store i32 %r738, i32* %r740
+%r741 = lshr i672 %r737, 32
+%r742 = trunc i672 %r741 to i32
+%r744 = getelementptr i32, i32* %r697, i32 11
+store i32 %r742, i32* %r744
+%r745 = lshr i672 %r741, 32
+%r746 = trunc i672 %r745 to i32
+%r748 = getelementptr i32, i32* %r697, i32 12
+store i32 %r746, i32* %r748
+%r749 = lshr i672 %r745, 32
+%r750 = trunc i672 %r749 to i32
+%r752 = getelementptr i32, i32* %r697, i32 13
+store i32 %r750, i32* %r752
+%r753 = lshr i672 %r749, 32
+%r754 = trunc i672 %r753 to i32
+%r756 = getelementptr i32, i32* %r697, i32 14
+store i32 %r754, i32* %r756
+%r757 = lshr i672 %r753, 32
+%r758 = trunc i672 %r757 to i32
+%r760 = getelementptr i32, i32* %r697, i32 15
+store i32 %r758, i32* %r760
+%r761 = lshr i672 %r757, 32
+%r762 = trunc i672 %r761 to i32
+%r764 = getelementptr i32, i32* %r697, i32 16
+store i32 %r762, i32* %r764
+%r765 = lshr i672 %r761, 32
+%r766 = trunc i672 %r765 to i32
+%r768 = getelementptr i32, i32* %r697, i32 17
+store i32 %r766, i32* %r768
+%r769 = lshr i672 %r765, 32
+%r770 = trunc i672 %r769 to i32
+%r772 = getelementptr i32, i32* %r697, i32 18
+store i32 %r770, i32* %r772
+%r773 = lshr i672 %r769, 32
+%r774 = trunc i672 %r773 to i32
+%r776 = getelementptr i32, i32* %r697, i32 19
+store i32 %r774, i32* %r776
+%r777 = lshr i672 %r773, 32
+%r778 = trunc i672 %r777 to i32
+%r780 = getelementptr i32, i32* %r697, i32 20
+store i32 %r778, i32* %r780
+ret void
+}
+define void @mcl_fpDbl_sqrPre14L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r4 = getelementptr i32, i32* %r2, i32 7
+%r6 = getelementptr i32, i32* %r2, i32 7
+%r8 = getelementptr i32, i32* %r1, i32 14
+call void @mcl_fpDbl_mulPre7L(i32* %r1, i32* %r2, i32* %r2)
+call void @mcl_fpDbl_mulPre7L(i32* %r8, i32* %r4, i32* %r6)
+%r9 = load i32, i32* %r4
+%r10 = zext i32 %r9 to i64
+%r12 = getelementptr i32, i32* %r4, i32 1
+%r13 = load i32, i32* %r12
+%r14 = zext i32 %r13 to i64
+%r15 = shl i64 %r14, 32
+%r16 = or i64 %r10, %r15
+%r17 = zext i64 %r16 to i96
+%r19 = getelementptr i32, i32* %r4, i32 2
+%r20 = load i32, i32* %r19
+%r21 = zext i32 %r20 to i96
+%r22 = shl i96 %r21, 64
+%r23 = or i96 %r17, %r22
+%r24 = zext i96 %r23 to i128
+%r26 = getelementptr i32, i32* %r4, i32 3
+%r27 = load i32, i32* %r26
+%r28 = zext i32 %r27 to i128
+%r29 = shl i128 %r28, 96
+%r30 = or i128 %r24, %r29
+%r31 = zext i128 %r30 to i160
+%r33 = getelementptr i32, i32* %r4, i32 4
+%r34 = load i32, i32* %r33
+%r35 = zext i32 %r34 to i160
+%r36 = shl i160 %r35, 128
+%r37 = or i160 %r31, %r36
+%r38 = zext i160 %r37 to i192
+%r40 = getelementptr i32, i32* %r4, i32 5
+%r41 = load i32, i32* %r40
+%r42 = zext i32 %r41 to i192
+%r43 = shl i192 %r42, 160
+%r44 = or i192 %r38, %r43
+%r45 = zext i192 %r44 to i224
+%r47 = getelementptr i32, i32* %r4, i32 6
+%r48 = load i32, i32* %r47
+%r49 = zext i32 %r48 to i224
+%r50 = shl i224 %r49, 192
+%r51 = or i224 %r45, %r50
+%r52 = zext i224 %r51 to i256
+%r53 = load i32, i32* %r2
+%r54 = zext i32 %r53 to i64
+%r56 = getelementptr i32, i32* %r2, i32 1
+%r57 = load i32, i32* %r56
+%r58 = zext i32 %r57 to i64
+%r59 = shl i64 %r58, 32
+%r60 = or i64 %r54, %r59
+%r61 = zext i64 %r60 to i96
+%r63 = getelementptr i32, i32* %r2, i32 2
+%r64 = load i32, i32* %r63
+%r65 = zext i32 %r64 to i96
+%r66 = shl i96 %r65, 64
+%r67 = or i96 %r61, %r66
+%r68 = zext i96 %r67 to i128
+%r70 = getelementptr i32, i32* %r2, i32 3
+%r71 = load i32, i32* %r70
+%r72 = zext i32 %r71 to i128
+%r73 = shl i128 %r72, 96
+%r74 = or i128 %r68, %r73
+%r75 = zext i128 %r74 to i160
+%r77 = getelementptr i32, i32* %r2, i32 4
+%r78 = load i32, i32* %r77
+%r79 = zext i32 %r78 to i160
+%r80 = shl i160 %r79, 128
+%r81 = or i160 %r75, %r80
+%r82 = zext i160 %r81 to i192
+%r84 = getelementptr i32, i32* %r2, i32 5
+%r85 = load i32, i32* %r84
+%r86 = zext i32 %r85 to i192
+%r87 = shl i192 %r86, 160
+%r88 = or i192 %r82, %r87
+%r89 = zext i192 %r88 to i224
+%r91 = getelementptr i32, i32* %r2, i32 6
+%r92 = load i32, i32* %r91
+%r93 = zext i32 %r92 to i224
+%r94 = shl i224 %r93, 192
+%r95 = or i224 %r89, %r94
+%r96 = zext i224 %r95 to i256
+%r97 = load i32, i32* %r6
+%r98 = zext i32 %r97 to i64
+%r100 = getelementptr i32, i32* %r6, i32 1
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i64
+%r103 = shl i64 %r102, 32
+%r104 = or i64 %r98, %r103
+%r105 = zext i64 %r104 to i96
+%r107 = getelementptr i32, i32* %r6, i32 2
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i96
+%r110 = shl i96 %r109, 64
+%r111 = or i96 %r105, %r110
+%r112 = zext i96 %r111 to i128
+%r114 = getelementptr i32, i32* %r6, i32 3
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i128
+%r117 = shl i128 %r116, 96
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i160
+%r121 = getelementptr i32, i32* %r6, i32 4
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i160
+%r124 = shl i160 %r123, 128
+%r125 = or i160 %r119, %r124
+%r126 = zext i160 %r125 to i192
+%r128 = getelementptr i32, i32* %r6, i32 5
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i192
+%r131 = shl i192 %r130, 160
+%r132 = or i192 %r126, %r131
+%r133 = zext i192 %r132 to i224
+%r135 = getelementptr i32, i32* %r6, i32 6
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i224
+%r138 = shl i224 %r137, 192
+%r139 = or i224 %r133, %r138
+%r140 = zext i224 %r139 to i256
+%r141 = load i32, i32* %r2
+%r142 = zext i32 %r141 to i64
+%r144 = getelementptr i32, i32* %r2, i32 1
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i64
+%r147 = shl i64 %r146, 32
+%r148 = or i64 %r142, %r147
+%r149 = zext i64 %r148 to i96
+%r151 = getelementptr i32, i32* %r2, i32 2
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i96
+%r154 = shl i96 %r153, 64
+%r155 = or i96 %r149, %r154
+%r156 = zext i96 %r155 to i128
+%r158 = getelementptr i32, i32* %r2, i32 3
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i128
+%r161 = shl i128 %r160, 96
+%r162 = or i128 %r156, %r161
+%r163 = zext i128 %r162 to i160
+%r165 = getelementptr i32, i32* %r2, i32 4
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i160
+%r168 = shl i160 %r167, 128
+%r169 = or i160 %r163, %r168
+%r170 = zext i160 %r169 to i192
+%r172 = getelementptr i32, i32* %r2, i32 5
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i192
+%r175 = shl i192 %r174, 160
+%r176 = or i192 %r170, %r175
+%r177 = zext i192 %r176 to i224
+%r179 = getelementptr i32, i32* %r2, i32 6
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i224
+%r182 = shl i224 %r181, 192
+%r183 = or i224 %r177, %r182
+%r184 = zext i224 %r183 to i256
+%r185 = add i256 %r52, %r96
+%r186 = add i256 %r140, %r184
+%r188 = alloca i32, i32 14
+%r189 = trunc i256 %r185 to i224
+%r190 = trunc i256 %r186 to i224
+%r191 = lshr i256 %r185, 224
+%r192 = trunc i256 %r191 to i1
+%r193 = lshr i256 %r186, 224
+%r194 = trunc i256 %r193 to i1
+%r195 = and i1 %r192, %r194
+%r197 = select i1 %r192, i224 %r190, i224 0
+%r199 = select i1 %r194, i224 %r189, i224 0
+%r201 = alloca i32, i32 7
+%r203 = alloca i32, i32 7
+%r204 = trunc i224 %r189 to i32
+%r206 = getelementptr i32, i32* %r201, i32 0
+store i32 %r204, i32* %r206
+%r207 = lshr i224 %r189, 32
+%r208 = trunc i224 %r207 to i32
+%r210 = getelementptr i32, i32* %r201, i32 1
+store i32 %r208, i32* %r210
+%r211 = lshr i224 %r207, 32
+%r212 = trunc i224 %r211 to i32
+%r214 = getelementptr i32, i32* %r201, i32 2
+store i32 %r212, i32* %r214
+%r215 = lshr i224 %r211, 32
+%r216 = trunc i224 %r215 to i32
+%r218 = getelementptr i32, i32* %r201, i32 3
+store i32 %r216, i32* %r218
+%r219 = lshr i224 %r215, 32
+%r220 = trunc i224 %r219 to i32
+%r222 = getelementptr i32, i32* %r201, i32 4
+store i32 %r220, i32* %r222
+%r223 = lshr i224 %r219, 32
+%r224 = trunc i224 %r223 to i32
+%r226 = getelementptr i32, i32* %r201, i32 5
+store i32 %r224, i32* %r226
+%r227 = lshr i224 %r223, 32
+%r228 = trunc i224 %r227 to i32
+%r230 = getelementptr i32, i32* %r201, i32 6
+store i32 %r228, i32* %r230
+%r231 = trunc i224 %r190 to i32
+%r233 = getelementptr i32, i32* %r203, i32 0
+store i32 %r231, i32* %r233
+%r234 = lshr i224 %r190, 32
+%r235 = trunc i224 %r234 to i32
+%r237 = getelementptr i32, i32* %r203, i32 1
+store i32 %r235, i32* %r237
+%r238 = lshr i224 %r234, 32
+%r239 = trunc i224 %r238 to i32
+%r241 = getelementptr i32, i32* %r203, i32 2
+store i32 %r239, i32* %r241
+%r242 = lshr i224 %r238, 32
+%r243 = trunc i224 %r242 to i32
+%r245 = getelementptr i32, i32* %r203, i32 3
+store i32 %r243, i32* %r245
+%r246 = lshr i224 %r242, 32
+%r247 = trunc i224 %r246 to i32
+%r249 = getelementptr i32, i32* %r203, i32 4
+store i32 %r247, i32* %r249
+%r250 = lshr i224 %r246, 32
+%r251 = trunc i224 %r250 to i32
+%r253 = getelementptr i32, i32* %r203, i32 5
+store i32 %r251, i32* %r253
+%r254 = lshr i224 %r250, 32
+%r255 = trunc i224 %r254 to i32
+%r257 = getelementptr i32, i32* %r203, i32 6
+store i32 %r255, i32* %r257
+call void @mcl_fpDbl_mulPre7L(i32* %r188, i32* %r201, i32* %r203)
+%r258 = load i32, i32* %r188
+%r259 = zext i32 %r258 to i64
+%r261 = getelementptr i32, i32* %r188, i32 1
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i64
+%r264 = shl i64 %r263, 32
+%r265 = or i64 %r259, %r264
+%r266 = zext i64 %r265 to i96
+%r268 = getelementptr i32, i32* %r188, i32 2
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i96
+%r271 = shl i96 %r270, 64
+%r272 = or i96 %r266, %r271
+%r273 = zext i96 %r272 to i128
+%r275 = getelementptr i32, i32* %r188, i32 3
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i128
+%r278 = shl i128 %r277, 96
+%r279 = or i128 %r273, %r278
+%r280 = zext i128 %r279 to i160
+%r282 = getelementptr i32, i32* %r188, i32 4
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i160
+%r285 = shl i160 %r284, 128
+%r286 = or i160 %r280, %r285
+%r287 = zext i160 %r286 to i192
+%r289 = getelementptr i32, i32* %r188, i32 5
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i192
+%r292 = shl i192 %r291, 160
+%r293 = or i192 %r287, %r292
+%r294 = zext i192 %r293 to i224
+%r296 = getelementptr i32, i32* %r188, i32 6
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i224
+%r299 = shl i224 %r298, 192
+%r300 = or i224 %r294, %r299
+%r301 = zext i224 %r300 to i256
+%r303 = getelementptr i32, i32* %r188, i32 7
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i256
+%r306 = shl i256 %r305, 224
+%r307 = or i256 %r301, %r306
+%r308 = zext i256 %r307 to i288
+%r310 = getelementptr i32, i32* %r188, i32 8
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i288
+%r313 = shl i288 %r312, 256
+%r314 = or i288 %r308, %r313
+%r315 = zext i288 %r314 to i320
+%r317 = getelementptr i32, i32* %r188, i32 9
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i320
+%r320 = shl i320 %r319, 288
+%r321 = or i320 %r315, %r320
+%r322 = zext i320 %r321 to i352
+%r324 = getelementptr i32, i32* %r188, i32 10
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i352
+%r327 = shl i352 %r326, 320
+%r328 = or i352 %r322, %r327
+%r329 = zext i352 %r328 to i384
+%r331 = getelementptr i32, i32* %r188, i32 11
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i384
+%r334 = shl i384 %r333, 352
+%r335 = or i384 %r329, %r334
+%r336 = zext i384 %r335 to i416
+%r338 = getelementptr i32, i32* %r188, i32 12
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i416
+%r341 = shl i416 %r340, 384
+%r342 = or i416 %r336, %r341
+%r343 = zext i416 %r342 to i448
+%r345 = getelementptr i32, i32* %r188, i32 13
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i448
+%r348 = shl i448 %r347, 416
+%r349 = or i448 %r343, %r348
+%r350 = zext i448 %r349 to i480
+%r351 = zext i1 %r195 to i480
+%r352 = shl i480 %r351, 448
+%r353 = or i480 %r350, %r352
+%r354 = zext i224 %r197 to i480
+%r355 = zext i224 %r199 to i480
+%r356 = shl i480 %r354, 224
+%r357 = shl i480 %r355, 224
+%r358 = add i480 %r353, %r356
+%r359 = add i480 %r358, %r357
+%r360 = load i32, i32* %r1
+%r361 = zext i32 %r360 to i64
+%r363 = getelementptr i32, i32* %r1, i32 1
+%r364 = load i32, i32* %r363
+%r365 = zext i32 %r364 to i64
+%r366 = shl i64 %r365, 32
+%r367 = or i64 %r361, %r366
+%r368 = zext i64 %r367 to i96
+%r370 = getelementptr i32, i32* %r1, i32 2
+%r371 = load i32, i32* %r370
+%r372 = zext i32 %r371 to i96
+%r373 = shl i96 %r372, 64
+%r374 = or i96 %r368, %r373
+%r375 = zext i96 %r374 to i128
+%r377 = getelementptr i32, i32* %r1, i32 3
+%r378 = load i32, i32* %r377
+%r379 = zext i32 %r378 to i128
+%r380 = shl i128 %r379, 96
+%r381 = or i128 %r375, %r380
+%r382 = zext i128 %r381 to i160
+%r384 = getelementptr i32, i32* %r1, i32 4
+%r385 = load i32, i32* %r384
+%r386 = zext i32 %r385 to i160
+%r387 = shl i160 %r386, 128
+%r388 = or i160 %r382, %r387
+%r389 = zext i160 %r388 to i192
+%r391 = getelementptr i32, i32* %r1, i32 5
+%r392 = load i32, i32* %r391
+%r393 = zext i32 %r392 to i192
+%r394 = shl i192 %r393, 160
+%r395 = or i192 %r389, %r394
+%r396 = zext i192 %r395 to i224
+%r398 = getelementptr i32, i32* %r1, i32 6
+%r399 = load i32, i32* %r398
+%r400 = zext i32 %r399 to i224
+%r401 = shl i224 %r400, 192
+%r402 = or i224 %r396, %r401
+%r403 = zext i224 %r402 to i256
+%r405 = getelementptr i32, i32* %r1, i32 7
+%r406 = load i32, i32* %r405
+%r407 = zext i32 %r406 to i256
+%r408 = shl i256 %r407, 224
+%r409 = or i256 %r403, %r408
+%r410 = zext i256 %r409 to i288
+%r412 = getelementptr i32, i32* %r1, i32 8
+%r413 = load i32, i32* %r412
+%r414 = zext i32 %r413 to i288
+%r415 = shl i288 %r414, 256
+%r416 = or i288 %r410, %r415
+%r417 = zext i288 %r416 to i320
+%r419 = getelementptr i32, i32* %r1, i32 9
+%r420 = load i32, i32* %r419
+%r421 = zext i32 %r420 to i320
+%r422 = shl i320 %r421, 288
+%r423 = or i320 %r417, %r422
+%r424 = zext i320 %r423 to i352
+%r426 = getelementptr i32, i32* %r1, i32 10
+%r427 = load i32, i32* %r426
+%r428 = zext i32 %r427 to i352
+%r429 = shl i352 %r428, 320
+%r430 = or i352 %r424, %r429
+%r431 = zext i352 %r430 to i384
+%r433 = getelementptr i32, i32* %r1, i32 11
+%r434 = load i32, i32* %r433
+%r435 = zext i32 %r434 to i384
+%r436 = shl i384 %r435, 352
+%r437 = or i384 %r431, %r436
+%r438 = zext i384 %r437 to i416
+%r440 = getelementptr i32, i32* %r1, i32 12
+%r441 = load i32, i32* %r440
+%r442 = zext i32 %r441 to i416
+%r443 = shl i416 %r442, 384
+%r444 = or i416 %r438, %r443
+%r445 = zext i416 %r444 to i448
+%r447 = getelementptr i32, i32* %r1, i32 13
+%r448 = load i32, i32* %r447
+%r449 = zext i32 %r448 to i448
+%r450 = shl i448 %r449, 416
+%r451 = or i448 %r445, %r450
+%r452 = zext i448 %r451 to i480
+%r453 = sub i480 %r359, %r452
+%r455 = getelementptr i32, i32* %r1, i32 14
+%r456 = load i32, i32* %r455
+%r457 = zext i32 %r456 to i64
+%r459 = getelementptr i32, i32* %r455, i32 1
+%r460 = load i32, i32* %r459
+%r461 = zext i32 %r460 to i64
+%r462 = shl i64 %r461, 32
+%r463 = or i64 %r457, %r462
+%r464 = zext i64 %r463 to i96
+%r466 = getelementptr i32, i32* %r455, i32 2
+%r467 = load i32, i32* %r466
+%r468 = zext i32 %r467 to i96
+%r469 = shl i96 %r468, 64
+%r470 = or i96 %r464, %r469
+%r471 = zext i96 %r470 to i128
+%r473 = getelementptr i32, i32* %r455, i32 3
+%r474 = load i32, i32* %r473
+%r475 = zext i32 %r474 to i128
+%r476 = shl i128 %r475, 96
+%r477 = or i128 %r471, %r476
+%r478 = zext i128 %r477 to i160
+%r480 = getelementptr i32, i32* %r455, i32 4
+%r481 = load i32, i32* %r480
+%r482 = zext i32 %r481 to i160
+%r483 = shl i160 %r482, 128
+%r484 = or i160 %r478, %r483
+%r485 = zext i160 %r484 to i192
+%r487 = getelementptr i32, i32* %r455, i32 5
+%r488 = load i32, i32* %r487
+%r489 = zext i32 %r488 to i192
+%r490 = shl i192 %r489, 160
+%r491 = or i192 %r485, %r490
+%r492 = zext i192 %r491 to i224
+%r494 = getelementptr i32, i32* %r455, i32 6
+%r495 = load i32, i32* %r494
+%r496 = zext i32 %r495 to i224
+%r497 = shl i224 %r496, 192
+%r498 = or i224 %r492, %r497
+%r499 = zext i224 %r498 to i256
+%r501 = getelementptr i32, i32* %r455, i32 7
+%r502 = load i32, i32* %r501
+%r503 = zext i32 %r502 to i256
+%r504 = shl i256 %r503, 224
+%r505 = or i256 %r499, %r504
+%r506 = zext i256 %r505 to i288
+%r508 = getelementptr i32, i32* %r455, i32 8
+%r509 = load i32, i32* %r508
+%r510 = zext i32 %r509 to i288
+%r511 = shl i288 %r510, 256
+%r512 = or i288 %r506, %r511
+%r513 = zext i288 %r512 to i320
+%r515 = getelementptr i32, i32* %r455, i32 9
+%r516 = load i32, i32* %r515
+%r517 = zext i32 %r516 to i320
+%r518 = shl i320 %r517, 288
+%r519 = or i320 %r513, %r518
+%r520 = zext i320 %r519 to i352
+%r522 = getelementptr i32, i32* %r455, i32 10
+%r523 = load i32, i32* %r522
+%r524 = zext i32 %r523 to i352
+%r525 = shl i352 %r524, 320
+%r526 = or i352 %r520, %r525
+%r527 = zext i352 %r526 to i384
+%r529 = getelementptr i32, i32* %r455, i32 11
+%r530 = load i32, i32* %r529
+%r531 = zext i32 %r530 to i384
+%r532 = shl i384 %r531, 352
+%r533 = or i384 %r527, %r532
+%r534 = zext i384 %r533 to i416
+%r536 = getelementptr i32, i32* %r455, i32 12
+%r537 = load i32, i32* %r536
+%r538 = zext i32 %r537 to i416
+%r539 = shl i416 %r538, 384
+%r540 = or i416 %r534, %r539
+%r541 = zext i416 %r540 to i448
+%r543 = getelementptr i32, i32* %r455, i32 13
+%r544 = load i32, i32* %r543
+%r545 = zext i32 %r544 to i448
+%r546 = shl i448 %r545, 416
+%r547 = or i448 %r541, %r546
+%r548 = zext i448 %r547 to i480
+%r549 = sub i480 %r453, %r548
+%r550 = zext i480 %r549 to i672
+%r552 = getelementptr i32, i32* %r1, i32 7
+%r553 = load i32, i32* %r552
+%r554 = zext i32 %r553 to i64
+%r556 = getelementptr i32, i32* %r552, i32 1
+%r557 = load i32, i32* %r556
+%r558 = zext i32 %r557 to i64
+%r559 = shl i64 %r558, 32
+%r560 = or i64 %r554, %r559
+%r561 = zext i64 %r560 to i96
+%r563 = getelementptr i32, i32* %r552, i32 2
+%r564 = load i32, i32* %r563
+%r565 = zext i32 %r564 to i96
+%r566 = shl i96 %r565, 64
+%r567 = or i96 %r561, %r566
+%r568 = zext i96 %r567 to i128
+%r570 = getelementptr i32, i32* %r552, i32 3
+%r571 = load i32, i32* %r570
+%r572 = zext i32 %r571 to i128
+%r573 = shl i128 %r572, 96
+%r574 = or i128 %r568, %r573
+%r575 = zext i128 %r574 to i160
+%r577 = getelementptr i32, i32* %r552, i32 4
+%r578 = load i32, i32* %r577
+%r579 = zext i32 %r578 to i160
+%r580 = shl i160 %r579, 128
+%r581 = or i160 %r575, %r580
+%r582 = zext i160 %r581 to i192
+%r584 = getelementptr i32, i32* %r552, i32 5
+%r585 = load i32, i32* %r584
+%r586 = zext i32 %r585 to i192
+%r587 = shl i192 %r586, 160
+%r588 = or i192 %r582, %r587
+%r589 = zext i192 %r588 to i224
+%r591 = getelementptr i32, i32* %r552, i32 6
+%r592 = load i32, i32* %r591
+%r593 = zext i32 %r592 to i224
+%r594 = shl i224 %r593, 192
+%r595 = or i224 %r589, %r594
+%r596 = zext i224 %r595 to i256
+%r598 = getelementptr i32, i32* %r552, i32 7
+%r599 = load i32, i32* %r598
+%r600 = zext i32 %r599 to i256
+%r601 = shl i256 %r600, 224
+%r602 = or i256 %r596, %r601
+%r603 = zext i256 %r602 to i288
+%r605 = getelementptr i32, i32* %r552, i32 8
+%r606 = load i32, i32* %r605
+%r607 = zext i32 %r606 to i288
+%r608 = shl i288 %r607, 256
+%r609 = or i288 %r603, %r608
+%r610 = zext i288 %r609 to i320
+%r612 = getelementptr i32, i32* %r552, i32 9
+%r613 = load i32, i32* %r612
+%r614 = zext i32 %r613 to i320
+%r615 = shl i320 %r614, 288
+%r616 = or i320 %r610, %r615
+%r617 = zext i320 %r616 to i352
+%r619 = getelementptr i32, i32* %r552, i32 10
+%r620 = load i32, i32* %r619
+%r621 = zext i32 %r620 to i352
+%r622 = shl i352 %r621, 320
+%r623 = or i352 %r617, %r622
+%r624 = zext i352 %r623 to i384
+%r626 = getelementptr i32, i32* %r552, i32 11
+%r627 = load i32, i32* %r626
+%r628 = zext i32 %r627 to i384
+%r629 = shl i384 %r628, 352
+%r630 = or i384 %r624, %r629
+%r631 = zext i384 %r630 to i416
+%r633 = getelementptr i32, i32* %r552, i32 12
+%r634 = load i32, i32* %r633
+%r635 = zext i32 %r634 to i416
+%r636 = shl i416 %r635, 384
+%r637 = or i416 %r631, %r636
+%r638 = zext i416 %r637 to i448
+%r640 = getelementptr i32, i32* %r552, i32 13
+%r641 = load i32, i32* %r640
+%r642 = zext i32 %r641 to i448
+%r643 = shl i448 %r642, 416
+%r644 = or i448 %r638, %r643
+%r645 = zext i448 %r644 to i480
+%r647 = getelementptr i32, i32* %r552, i32 14
+%r648 = load i32, i32* %r647
+%r649 = zext i32 %r648 to i480
+%r650 = shl i480 %r649, 448
+%r651 = or i480 %r645, %r650
+%r652 = zext i480 %r651 to i512
+%r654 = getelementptr i32, i32* %r552, i32 15
+%r655 = load i32, i32* %r654
+%r656 = zext i32 %r655 to i512
+%r657 = shl i512 %r656, 480
+%r658 = or i512 %r652, %r657
+%r659 = zext i512 %r658 to i544
+%r661 = getelementptr i32, i32* %r552, i32 16
+%r662 = load i32, i32* %r661
+%r663 = zext i32 %r662 to i544
+%r664 = shl i544 %r663, 512
+%r665 = or i544 %r659, %r664
+%r666 = zext i544 %r665 to i576
+%r668 = getelementptr i32, i32* %r552, i32 17
+%r669 = load i32, i32* %r668
+%r670 = zext i32 %r669 to i576
+%r671 = shl i576 %r670, 544
+%r672 = or i576 %r666, %r671
+%r673 = zext i576 %r672 to i608
+%r675 = getelementptr i32, i32* %r552, i32 18
+%r676 = load i32, i32* %r675
+%r677 = zext i32 %r676 to i608
+%r678 = shl i608 %r677, 576
+%r679 = or i608 %r673, %r678
+%r680 = zext i608 %r679 to i640
+%r682 = getelementptr i32, i32* %r552, i32 19
+%r683 = load i32, i32* %r682
+%r684 = zext i32 %r683 to i640
+%r685 = shl i640 %r684, 608
+%r686 = or i640 %r680, %r685
+%r687 = zext i640 %r686 to i672
+%r689 = getelementptr i32, i32* %r552, i32 20
+%r690 = load i32, i32* %r689
+%r691 = zext i32 %r690 to i672
+%r692 = shl i672 %r691, 640
+%r693 = or i672 %r687, %r692
+%r694 = add i672 %r550, %r693
+%r696 = getelementptr i32, i32* %r1, i32 7
+%r697 = trunc i672 %r694 to i32
+%r699 = getelementptr i32, i32* %r696, i32 0
+store i32 %r697, i32* %r699
+%r700 = lshr i672 %r694, 32
+%r701 = trunc i672 %r700 to i32
+%r703 = getelementptr i32, i32* %r696, i32 1
+store i32 %r701, i32* %r703
+%r704 = lshr i672 %r700, 32
+%r705 = trunc i672 %r704 to i32
+%r707 = getelementptr i32, i32* %r696, i32 2
+store i32 %r705, i32* %r707
+%r708 = lshr i672 %r704, 32
+%r709 = trunc i672 %r708 to i32
+%r711 = getelementptr i32, i32* %r696, i32 3
+store i32 %r709, i32* %r711
+%r712 = lshr i672 %r708, 32
+%r713 = trunc i672 %r712 to i32
+%r715 = getelementptr i32, i32* %r696, i32 4
+store i32 %r713, i32* %r715
+%r716 = lshr i672 %r712, 32
+%r717 = trunc i672 %r716 to i32
+%r719 = getelementptr i32, i32* %r696, i32 5
+store i32 %r717, i32* %r719
+%r720 = lshr i672 %r716, 32
+%r721 = trunc i672 %r720 to i32
+%r723 = getelementptr i32, i32* %r696, i32 6
+store i32 %r721, i32* %r723
+%r724 = lshr i672 %r720, 32
+%r725 = trunc i672 %r724 to i32
+%r727 = getelementptr i32, i32* %r696, i32 7
+store i32 %r725, i32* %r727
+%r728 = lshr i672 %r724, 32
+%r729 = trunc i672 %r728 to i32
+%r731 = getelementptr i32, i32* %r696, i32 8
+store i32 %r729, i32* %r731
+%r732 = lshr i672 %r728, 32
+%r733 = trunc i672 %r732 to i32
+%r735 = getelementptr i32, i32* %r696, i32 9
+store i32 %r733, i32* %r735
+%r736 = lshr i672 %r732, 32
+%r737 = trunc i672 %r736 to i32
+%r739 = getelementptr i32, i32* %r696, i32 10
+store i32 %r737, i32* %r739
+%r740 = lshr i672 %r736, 32
+%r741 = trunc i672 %r740 to i32
+%r743 = getelementptr i32, i32* %r696, i32 11
+store i32 %r741, i32* %r743
+%r744 = lshr i672 %r740, 32
+%r745 = trunc i672 %r744 to i32
+%r747 = getelementptr i32, i32* %r696, i32 12
+store i32 %r745, i32* %r747
+%r748 = lshr i672 %r744, 32
+%r749 = trunc i672 %r748 to i32
+%r751 = getelementptr i32, i32* %r696, i32 13
+store i32 %r749, i32* %r751
+%r752 = lshr i672 %r748, 32
+%r753 = trunc i672 %r752 to i32
+%r755 = getelementptr i32, i32* %r696, i32 14
+store i32 %r753, i32* %r755
+%r756 = lshr i672 %r752, 32
+%r757 = trunc i672 %r756 to i32
+%r759 = getelementptr i32, i32* %r696, i32 15
+store i32 %r757, i32* %r759
+%r760 = lshr i672 %r756, 32
+%r761 = trunc i672 %r760 to i32
+%r763 = getelementptr i32, i32* %r696, i32 16
+store i32 %r761, i32* %r763
+%r764 = lshr i672 %r760, 32
+%r765 = trunc i672 %r764 to i32
+%r767 = getelementptr i32, i32* %r696, i32 17
+store i32 %r765, i32* %r767
+%r768 = lshr i672 %r764, 32
+%r769 = trunc i672 %r768 to i32
+%r771 = getelementptr i32, i32* %r696, i32 18
+store i32 %r769, i32* %r771
+%r772 = lshr i672 %r768, 32
+%r773 = trunc i672 %r772 to i32
+%r775 = getelementptr i32, i32* %r696, i32 19
+store i32 %r773, i32* %r775
+%r776 = lshr i672 %r772, 32
+%r777 = trunc i672 %r776 to i32
+%r779 = getelementptr i32, i32* %r696, i32 20
+store i32 %r777, i32* %r779
+ret void
+}
+define void @mcl_fp_mont14L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i480 @mulPv448x32(i32* %r2, i32 %r10)
+%r12 = zext i480 %r11 to i512
+%r13 = trunc i480 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i480 @mulPv448x32(i32* %r4, i32 %r14)
+%r16 = zext i480 %r15 to i512
+%r17 = add i512 %r12, %r16
+%r18 = lshr i512 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i480 @mulPv448x32(i32* %r2, i32 %r21)
+%r23 = zext i480 %r22 to i512
+%r24 = add i512 %r18, %r23
+%r25 = trunc i512 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i480 @mulPv448x32(i32* %r4, i32 %r26)
+%r28 = zext i480 %r27 to i512
+%r29 = add i512 %r24, %r28
+%r30 = lshr i512 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i480 @mulPv448x32(i32* %r2, i32 %r33)
+%r35 = zext i480 %r34 to i512
+%r36 = add i512 %r30, %r35
+%r37 = trunc i512 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i480 @mulPv448x32(i32* %r4, i32 %r38)
+%r40 = zext i480 %r39 to i512
+%r41 = add i512 %r36, %r40
+%r42 = lshr i512 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i480 @mulPv448x32(i32* %r2, i32 %r45)
+%r47 = zext i480 %r46 to i512
+%r48 = add i512 %r42, %r47
+%r49 = trunc i512 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i480 @mulPv448x32(i32* %r4, i32 %r50)
+%r52 = zext i480 %r51 to i512
+%r53 = add i512 %r48, %r52
+%r54 = lshr i512 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i480 @mulPv448x32(i32* %r2, i32 %r57)
+%r59 = zext i480 %r58 to i512
+%r60 = add i512 %r54, %r59
+%r61 = trunc i512 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i480 @mulPv448x32(i32* %r4, i32 %r62)
+%r64 = zext i480 %r63 to i512
+%r65 = add i512 %r60, %r64
+%r66 = lshr i512 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i480 @mulPv448x32(i32* %r2, i32 %r69)
+%r71 = zext i480 %r70 to i512
+%r72 = add i512 %r66, %r71
+%r73 = trunc i512 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i480 @mulPv448x32(i32* %r4, i32 %r74)
+%r76 = zext i480 %r75 to i512
+%r77 = add i512 %r72, %r76
+%r78 = lshr i512 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i480 @mulPv448x32(i32* %r2, i32 %r81)
+%r83 = zext i480 %r82 to i512
+%r84 = add i512 %r78, %r83
+%r85 = trunc i512 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i480 @mulPv448x32(i32* %r4, i32 %r86)
+%r88 = zext i480 %r87 to i512
+%r89 = add i512 %r84, %r88
+%r90 = lshr i512 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i480 @mulPv448x32(i32* %r2, i32 %r93)
+%r95 = zext i480 %r94 to i512
+%r96 = add i512 %r90, %r95
+%r97 = trunc i512 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i480 @mulPv448x32(i32* %r4, i32 %r98)
+%r100 = zext i480 %r99 to i512
+%r101 = add i512 %r96, %r100
+%r102 = lshr i512 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i480 @mulPv448x32(i32* %r2, i32 %r105)
+%r107 = zext i480 %r106 to i512
+%r108 = add i512 %r102, %r107
+%r109 = trunc i512 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i480 @mulPv448x32(i32* %r4, i32 %r110)
+%r112 = zext i480 %r111 to i512
+%r113 = add i512 %r108, %r112
+%r114 = lshr i512 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 9
+%r117 = load i32, i32* %r116
+%r118 = call i480 @mulPv448x32(i32* %r2, i32 %r117)
+%r119 = zext i480 %r118 to i512
+%r120 = add i512 %r114, %r119
+%r121 = trunc i512 %r120 to i32
+%r122 = mul i32 %r121, %r7
+%r123 = call i480 @mulPv448x32(i32* %r4, i32 %r122)
+%r124 = zext i480 %r123 to i512
+%r125 = add i512 %r120, %r124
+%r126 = lshr i512 %r125, 32
+%r128 = getelementptr i32, i32* %r3, i32 10
+%r129 = load i32, i32* %r128
+%r130 = call i480 @mulPv448x32(i32* %r2, i32 %r129)
+%r131 = zext i480 %r130 to i512
+%r132 = add i512 %r126, %r131
+%r133 = trunc i512 %r132 to i32
+%r134 = mul i32 %r133, %r7
+%r135 = call i480 @mulPv448x32(i32* %r4, i32 %r134)
+%r136 = zext i480 %r135 to i512
+%r137 = add i512 %r132, %r136
+%r138 = lshr i512 %r137, 32
+%r140 = getelementptr i32, i32* %r3, i32 11
+%r141 = load i32, i32* %r140
+%r142 = call i480 @mulPv448x32(i32* %r2, i32 %r141)
+%r143 = zext i480 %r142 to i512
+%r144 = add i512 %r138, %r143
+%r145 = trunc i512 %r144 to i32
+%r146 = mul i32 %r145, %r7
+%r147 = call i480 @mulPv448x32(i32* %r4, i32 %r146)
+%r148 = zext i480 %r147 to i512
+%r149 = add i512 %r144, %r148
+%r150 = lshr i512 %r149, 32
+%r152 = getelementptr i32, i32* %r3, i32 12
+%r153 = load i32, i32* %r152
+%r154 = call i480 @mulPv448x32(i32* %r2, i32 %r153)
+%r155 = zext i480 %r154 to i512
+%r156 = add i512 %r150, %r155
+%r157 = trunc i512 %r156 to i32
+%r158 = mul i32 %r157, %r7
+%r159 = call i480 @mulPv448x32(i32* %r4, i32 %r158)
+%r160 = zext i480 %r159 to i512
+%r161 = add i512 %r156, %r160
+%r162 = lshr i512 %r161, 32
+%r164 = getelementptr i32, i32* %r3, i32 13
+%r165 = load i32, i32* %r164
+%r166 = call i480 @mulPv448x32(i32* %r2, i32 %r165)
+%r167 = zext i480 %r166 to i512
+%r168 = add i512 %r162, %r167
+%r169 = trunc i512 %r168 to i32
+%r170 = mul i32 %r169, %r7
+%r171 = call i480 @mulPv448x32(i32* %r4, i32 %r170)
+%r172 = zext i480 %r171 to i512
+%r173 = add i512 %r168, %r172
+%r174 = lshr i512 %r173, 32
+%r175 = trunc i512 %r174 to i480
+%r176 = load i32, i32* %r4
+%r177 = zext i32 %r176 to i64
+%r179 = getelementptr i32, i32* %r4, i32 1
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i64
+%r182 = shl i64 %r181, 32
+%r183 = or i64 %r177, %r182
+%r184 = zext i64 %r183 to i96
+%r186 = getelementptr i32, i32* %r4, i32 2
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i96
+%r189 = shl i96 %r188, 64
+%r190 = or i96 %r184, %r189
+%r191 = zext i96 %r190 to i128
+%r193 = getelementptr i32, i32* %r4, i32 3
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i128
+%r196 = shl i128 %r195, 96
+%r197 = or i128 %r191, %r196
+%r198 = zext i128 %r197 to i160
+%r200 = getelementptr i32, i32* %r4, i32 4
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i160
+%r203 = shl i160 %r202, 128
+%r204 = or i160 %r198, %r203
+%r205 = zext i160 %r204 to i192
+%r207 = getelementptr i32, i32* %r4, i32 5
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i192
+%r210 = shl i192 %r209, 160
+%r211 = or i192 %r205, %r210
+%r212 = zext i192 %r211 to i224
+%r214 = getelementptr i32, i32* %r4, i32 6
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i224
+%r217 = shl i224 %r216, 192
+%r218 = or i224 %r212, %r217
+%r219 = zext i224 %r218 to i256
+%r221 = getelementptr i32, i32* %r4, i32 7
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i256
+%r224 = shl i256 %r223, 224
+%r225 = or i256 %r219, %r224
+%r226 = zext i256 %r225 to i288
+%r228 = getelementptr i32, i32* %r4, i32 8
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i288
+%r231 = shl i288 %r230, 256
+%r232 = or i288 %r226, %r231
+%r233 = zext i288 %r232 to i320
+%r235 = getelementptr i32, i32* %r4, i32 9
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i320
+%r238 = shl i320 %r237, 288
+%r239 = or i320 %r233, %r238
+%r240 = zext i320 %r239 to i352
+%r242 = getelementptr i32, i32* %r4, i32 10
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i352
+%r245 = shl i352 %r244, 320
+%r246 = or i352 %r240, %r245
+%r247 = zext i352 %r246 to i384
+%r249 = getelementptr i32, i32* %r4, i32 11
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i384
+%r252 = shl i384 %r251, 352
+%r253 = or i384 %r247, %r252
+%r254 = zext i384 %r253 to i416
+%r256 = getelementptr i32, i32* %r4, i32 12
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i416
+%r259 = shl i416 %r258, 384
+%r260 = or i416 %r254, %r259
+%r261 = zext i416 %r260 to i448
+%r263 = getelementptr i32, i32* %r4, i32 13
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i448
+%r266 = shl i448 %r265, 416
+%r267 = or i448 %r261, %r266
+%r268 = zext i448 %r267 to i480
+%r269 = sub i480 %r175, %r268
+%r270 = lshr i480 %r269, 448
+%r271 = trunc i480 %r270 to i1
+%r272 = select i1 %r271, i480 %r175, i480 %r269
+%r273 = trunc i480 %r272 to i448
+%r274 = trunc i448 %r273 to i32
+%r276 = getelementptr i32, i32* %r1, i32 0
+store i32 %r274, i32* %r276
+%r277 = lshr i448 %r273, 32
+%r278 = trunc i448 %r277 to i32
+%r280 = getelementptr i32, i32* %r1, i32 1
+store i32 %r278, i32* %r280
+%r281 = lshr i448 %r277, 32
+%r282 = trunc i448 %r281 to i32
+%r284 = getelementptr i32, i32* %r1, i32 2
+store i32 %r282, i32* %r284
+%r285 = lshr i448 %r281, 32
+%r286 = trunc i448 %r285 to i32
+%r288 = getelementptr i32, i32* %r1, i32 3
+store i32 %r286, i32* %r288
+%r289 = lshr i448 %r285, 32
+%r290 = trunc i448 %r289 to i32
+%r292 = getelementptr i32, i32* %r1, i32 4
+store i32 %r290, i32* %r292
+%r293 = lshr i448 %r289, 32
+%r294 = trunc i448 %r293 to i32
+%r296 = getelementptr i32, i32* %r1, i32 5
+store i32 %r294, i32* %r296
+%r297 = lshr i448 %r293, 32
+%r298 = trunc i448 %r297 to i32
+%r300 = getelementptr i32, i32* %r1, i32 6
+store i32 %r298, i32* %r300
+%r301 = lshr i448 %r297, 32
+%r302 = trunc i448 %r301 to i32
+%r304 = getelementptr i32, i32* %r1, i32 7
+store i32 %r302, i32* %r304
+%r305 = lshr i448 %r301, 32
+%r306 = trunc i448 %r305 to i32
+%r308 = getelementptr i32, i32* %r1, i32 8
+store i32 %r306, i32* %r308
+%r309 = lshr i448 %r305, 32
+%r310 = trunc i448 %r309 to i32
+%r312 = getelementptr i32, i32* %r1, i32 9
+store i32 %r310, i32* %r312
+%r313 = lshr i448 %r309, 32
+%r314 = trunc i448 %r313 to i32
+%r316 = getelementptr i32, i32* %r1, i32 10
+store i32 %r314, i32* %r316
+%r317 = lshr i448 %r313, 32
+%r318 = trunc i448 %r317 to i32
+%r320 = getelementptr i32, i32* %r1, i32 11
+store i32 %r318, i32* %r320
+%r321 = lshr i448 %r317, 32
+%r322 = trunc i448 %r321 to i32
+%r324 = getelementptr i32, i32* %r1, i32 12
+store i32 %r322, i32* %r324
+%r325 = lshr i448 %r321, 32
+%r326 = trunc i448 %r325 to i32
+%r328 = getelementptr i32, i32* %r1, i32 13
+store i32 %r326, i32* %r328
+ret void
+}
+define void @mcl_fp_montNF14L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i480 @mulPv448x32(i32* %r2, i32 %r8)
+%r10 = trunc i480 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i480 @mulPv448x32(i32* %r4, i32 %r11)
+%r13 = add i480 %r9, %r12
+%r14 = lshr i480 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i480 @mulPv448x32(i32* %r2, i32 %r17)
+%r19 = add i480 %r14, %r18
+%r20 = trunc i480 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i480 @mulPv448x32(i32* %r4, i32 %r21)
+%r23 = add i480 %r19, %r22
+%r24 = lshr i480 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i480 @mulPv448x32(i32* %r2, i32 %r27)
+%r29 = add i480 %r24, %r28
+%r30 = trunc i480 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i480 @mulPv448x32(i32* %r4, i32 %r31)
+%r33 = add i480 %r29, %r32
+%r34 = lshr i480 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i480 @mulPv448x32(i32* %r2, i32 %r37)
+%r39 = add i480 %r34, %r38
+%r40 = trunc i480 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i480 @mulPv448x32(i32* %r4, i32 %r41)
+%r43 = add i480 %r39, %r42
+%r44 = lshr i480 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i480 @mulPv448x32(i32* %r2, i32 %r47)
+%r49 = add i480 %r44, %r48
+%r50 = trunc i480 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i480 @mulPv448x32(i32* %r4, i32 %r51)
+%r53 = add i480 %r49, %r52
+%r54 = lshr i480 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i480 @mulPv448x32(i32* %r2, i32 %r57)
+%r59 = add i480 %r54, %r58
+%r60 = trunc i480 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i480 @mulPv448x32(i32* %r4, i32 %r61)
+%r63 = add i480 %r59, %r62
+%r64 = lshr i480 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i480 @mulPv448x32(i32* %r2, i32 %r67)
+%r69 = add i480 %r64, %r68
+%r70 = trunc i480 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i480 @mulPv448x32(i32* %r4, i32 %r71)
+%r73 = add i480 %r69, %r72
+%r74 = lshr i480 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i480 @mulPv448x32(i32* %r2, i32 %r77)
+%r79 = add i480 %r74, %r78
+%r80 = trunc i480 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i480 @mulPv448x32(i32* %r4, i32 %r81)
+%r83 = add i480 %r79, %r82
+%r84 = lshr i480 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i480 @mulPv448x32(i32* %r2, i32 %r87)
+%r89 = add i480 %r84, %r88
+%r90 = trunc i480 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i480 @mulPv448x32(i32* %r4, i32 %r91)
+%r93 = add i480 %r89, %r92
+%r94 = lshr i480 %r93, 32
+%r96 = getelementptr i32, i32* %r3, i32 9
+%r97 = load i32, i32* %r96
+%r98 = call i480 @mulPv448x32(i32* %r2, i32 %r97)
+%r99 = add i480 %r94, %r98
+%r100 = trunc i480 %r99 to i32
+%r101 = mul i32 %r100, %r7
+%r102 = call i480 @mulPv448x32(i32* %r4, i32 %r101)
+%r103 = add i480 %r99, %r102
+%r104 = lshr i480 %r103, 32
+%r106 = getelementptr i32, i32* %r3, i32 10
+%r107 = load i32, i32* %r106
+%r108 = call i480 @mulPv448x32(i32* %r2, i32 %r107)
+%r109 = add i480 %r104, %r108
+%r110 = trunc i480 %r109 to i32
+%r111 = mul i32 %r110, %r7
+%r112 = call i480 @mulPv448x32(i32* %r4, i32 %r111)
+%r113 = add i480 %r109, %r112
+%r114 = lshr i480 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 11
+%r117 = load i32, i32* %r116
+%r118 = call i480 @mulPv448x32(i32* %r2, i32 %r117)
+%r119 = add i480 %r114, %r118
+%r120 = trunc i480 %r119 to i32
+%r121 = mul i32 %r120, %r7
+%r122 = call i480 @mulPv448x32(i32* %r4, i32 %r121)
+%r123 = add i480 %r119, %r122
+%r124 = lshr i480 %r123, 32
+%r126 = getelementptr i32, i32* %r3, i32 12
+%r127 = load i32, i32* %r126
+%r128 = call i480 @mulPv448x32(i32* %r2, i32 %r127)
+%r129 = add i480 %r124, %r128
+%r130 = trunc i480 %r129 to i32
+%r131 = mul i32 %r130, %r7
+%r132 = call i480 @mulPv448x32(i32* %r4, i32 %r131)
+%r133 = add i480 %r129, %r132
+%r134 = lshr i480 %r133, 32
+%r136 = getelementptr i32, i32* %r3, i32 13
+%r137 = load i32, i32* %r136
+%r138 = call i480 @mulPv448x32(i32* %r2, i32 %r137)
+%r139 = add i480 %r134, %r138
+%r140 = trunc i480 %r139 to i32
+%r141 = mul i32 %r140, %r7
+%r142 = call i480 @mulPv448x32(i32* %r4, i32 %r141)
+%r143 = add i480 %r139, %r142
+%r144 = lshr i480 %r143, 32
+%r145 = trunc i480 %r144 to i448
+%r146 = load i32, i32* %r4
+%r147 = zext i32 %r146 to i64
+%r149 = getelementptr i32, i32* %r4, i32 1
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i64
+%r152 = shl i64 %r151, 32
+%r153 = or i64 %r147, %r152
+%r154 = zext i64 %r153 to i96
+%r156 = getelementptr i32, i32* %r4, i32 2
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i96
+%r159 = shl i96 %r158, 64
+%r160 = or i96 %r154, %r159
+%r161 = zext i96 %r160 to i128
+%r163 = getelementptr i32, i32* %r4, i32 3
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i128
+%r166 = shl i128 %r165, 96
+%r167 = or i128 %r161, %r166
+%r168 = zext i128 %r167 to i160
+%r170 = getelementptr i32, i32* %r4, i32 4
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i160
+%r173 = shl i160 %r172, 128
+%r174 = or i160 %r168, %r173
+%r175 = zext i160 %r174 to i192
+%r177 = getelementptr i32, i32* %r4, i32 5
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i192
+%r180 = shl i192 %r179, 160
+%r181 = or i192 %r175, %r180
+%r182 = zext i192 %r181 to i224
+%r184 = getelementptr i32, i32* %r4, i32 6
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i224
+%r187 = shl i224 %r186, 192
+%r188 = or i224 %r182, %r187
+%r189 = zext i224 %r188 to i256
+%r191 = getelementptr i32, i32* %r4, i32 7
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i256
+%r194 = shl i256 %r193, 224
+%r195 = or i256 %r189, %r194
+%r196 = zext i256 %r195 to i288
+%r198 = getelementptr i32, i32* %r4, i32 8
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i288
+%r201 = shl i288 %r200, 256
+%r202 = or i288 %r196, %r201
+%r203 = zext i288 %r202 to i320
+%r205 = getelementptr i32, i32* %r4, i32 9
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i320
+%r208 = shl i320 %r207, 288
+%r209 = or i320 %r203, %r208
+%r210 = zext i320 %r209 to i352
+%r212 = getelementptr i32, i32* %r4, i32 10
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i352
+%r215 = shl i352 %r214, 320
+%r216 = or i352 %r210, %r215
+%r217 = zext i352 %r216 to i384
+%r219 = getelementptr i32, i32* %r4, i32 11
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i384
+%r222 = shl i384 %r221, 352
+%r223 = or i384 %r217, %r222
+%r224 = zext i384 %r223 to i416
+%r226 = getelementptr i32, i32* %r4, i32 12
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i416
+%r229 = shl i416 %r228, 384
+%r230 = or i416 %r224, %r229
+%r231 = zext i416 %r230 to i448
+%r233 = getelementptr i32, i32* %r4, i32 13
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i448
+%r236 = shl i448 %r235, 416
+%r237 = or i448 %r231, %r236
+%r238 = sub i448 %r145, %r237
+%r239 = lshr i448 %r238, 447
+%r240 = trunc i448 %r239 to i1
+%r241 = select i1 %r240, i448 %r145, i448 %r238
+%r242 = trunc i448 %r241 to i32
+%r244 = getelementptr i32, i32* %r1, i32 0
+store i32 %r242, i32* %r244
+%r245 = lshr i448 %r241, 32
+%r246 = trunc i448 %r245 to i32
+%r248 = getelementptr i32, i32* %r1, i32 1
+store i32 %r246, i32* %r248
+%r249 = lshr i448 %r245, 32
+%r250 = trunc i448 %r249 to i32
+%r252 = getelementptr i32, i32* %r1, i32 2
+store i32 %r250, i32* %r252
+%r253 = lshr i448 %r249, 32
+%r254 = trunc i448 %r253 to i32
+%r256 = getelementptr i32, i32* %r1, i32 3
+store i32 %r254, i32* %r256
+%r257 = lshr i448 %r253, 32
+%r258 = trunc i448 %r257 to i32
+%r260 = getelementptr i32, i32* %r1, i32 4
+store i32 %r258, i32* %r260
+%r261 = lshr i448 %r257, 32
+%r262 = trunc i448 %r261 to i32
+%r264 = getelementptr i32, i32* %r1, i32 5
+store i32 %r262, i32* %r264
+%r265 = lshr i448 %r261, 32
+%r266 = trunc i448 %r265 to i32
+%r268 = getelementptr i32, i32* %r1, i32 6
+store i32 %r266, i32* %r268
+%r269 = lshr i448 %r265, 32
+%r270 = trunc i448 %r269 to i32
+%r272 = getelementptr i32, i32* %r1, i32 7
+store i32 %r270, i32* %r272
+%r273 = lshr i448 %r269, 32
+%r274 = trunc i448 %r273 to i32
+%r276 = getelementptr i32, i32* %r1, i32 8
+store i32 %r274, i32* %r276
+%r277 = lshr i448 %r273, 32
+%r278 = trunc i448 %r277 to i32
+%r280 = getelementptr i32, i32* %r1, i32 9
+store i32 %r278, i32* %r280
+%r281 = lshr i448 %r277, 32
+%r282 = trunc i448 %r281 to i32
+%r284 = getelementptr i32, i32* %r1, i32 10
+store i32 %r282, i32* %r284
+%r285 = lshr i448 %r281, 32
+%r286 = trunc i448 %r285 to i32
+%r288 = getelementptr i32, i32* %r1, i32 11
+store i32 %r286, i32* %r288
+%r289 = lshr i448 %r285, 32
+%r290 = trunc i448 %r289 to i32
+%r292 = getelementptr i32, i32* %r1, i32 12
+store i32 %r290, i32* %r292
+%r293 = lshr i448 %r289, 32
+%r294 = trunc i448 %r293 to i32
+%r296 = getelementptr i32, i32* %r1, i32 13
+store i32 %r294, i32* %r296
+ret void
+}
+define void @mcl_fp_montRed14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i352
+%r73 = getelementptr i32, i32* %r3, i32 10
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i352
+%r76 = shl i352 %r75, 320
+%r77 = or i352 %r71, %r76
+%r78 = zext i352 %r77 to i384
+%r80 = getelementptr i32, i32* %r3, i32 11
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i384
+%r83 = shl i384 %r82, 352
+%r84 = or i384 %r78, %r83
+%r85 = zext i384 %r84 to i416
+%r87 = getelementptr i32, i32* %r3, i32 12
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i416
+%r90 = shl i416 %r89, 384
+%r91 = or i416 %r85, %r90
+%r92 = zext i416 %r91 to i448
+%r94 = getelementptr i32, i32* %r3, i32 13
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i448
+%r97 = shl i448 %r96, 416
+%r98 = or i448 %r92, %r97
+%r99 = load i32, i32* %r2
+%r100 = zext i32 %r99 to i64
+%r102 = getelementptr i32, i32* %r2, i32 1
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i64
+%r105 = shl i64 %r104, 32
+%r106 = or i64 %r100, %r105
+%r107 = zext i64 %r106 to i96
+%r109 = getelementptr i32, i32* %r2, i32 2
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i96
+%r112 = shl i96 %r111, 64
+%r113 = or i96 %r107, %r112
+%r114 = zext i96 %r113 to i128
+%r116 = getelementptr i32, i32* %r2, i32 3
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i128
+%r119 = shl i128 %r118, 96
+%r120 = or i128 %r114, %r119
+%r121 = zext i128 %r120 to i160
+%r123 = getelementptr i32, i32* %r2, i32 4
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i160
+%r126 = shl i160 %r125, 128
+%r127 = or i160 %r121, %r126
+%r128 = zext i160 %r127 to i192
+%r130 = getelementptr i32, i32* %r2, i32 5
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i192
+%r133 = shl i192 %r132, 160
+%r134 = or i192 %r128, %r133
+%r135 = zext i192 %r134 to i224
+%r137 = getelementptr i32, i32* %r2, i32 6
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i224
+%r140 = shl i224 %r139, 192
+%r141 = or i224 %r135, %r140
+%r142 = zext i224 %r141 to i256
+%r144 = getelementptr i32, i32* %r2, i32 7
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i256
+%r147 = shl i256 %r146, 224
+%r148 = or i256 %r142, %r147
+%r149 = zext i256 %r148 to i288
+%r151 = getelementptr i32, i32* %r2, i32 8
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i288
+%r154 = shl i288 %r153, 256
+%r155 = or i288 %r149, %r154
+%r156 = zext i288 %r155 to i320
+%r158 = getelementptr i32, i32* %r2, i32 9
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i320
+%r161 = shl i320 %r160, 288
+%r162 = or i320 %r156, %r161
+%r163 = zext i320 %r162 to i352
+%r165 = getelementptr i32, i32* %r2, i32 10
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i352
+%r168 = shl i352 %r167, 320
+%r169 = or i352 %r163, %r168
+%r170 = zext i352 %r169 to i384
+%r172 = getelementptr i32, i32* %r2, i32 11
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i384
+%r175 = shl i384 %r174, 352
+%r176 = or i384 %r170, %r175
+%r177 = zext i384 %r176 to i416
+%r179 = getelementptr i32, i32* %r2, i32 12
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i416
+%r182 = shl i416 %r181, 384
+%r183 = or i416 %r177, %r182
+%r184 = zext i416 %r183 to i448
+%r186 = getelementptr i32, i32* %r2, i32 13
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i448
+%r189 = shl i448 %r188, 416
+%r190 = or i448 %r184, %r189
+%r191 = zext i448 %r190 to i480
+%r193 = getelementptr i32, i32* %r2, i32 14
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i480
+%r196 = shl i480 %r195, 448
+%r197 = or i480 %r191, %r196
+%r198 = zext i480 %r197 to i512
+%r200 = getelementptr i32, i32* %r2, i32 15
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i512
+%r203 = shl i512 %r202, 480
+%r204 = or i512 %r198, %r203
+%r205 = zext i512 %r204 to i544
+%r207 = getelementptr i32, i32* %r2, i32 16
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i544
+%r210 = shl i544 %r209, 512
+%r211 = or i544 %r205, %r210
+%r212 = zext i544 %r211 to i576
+%r214 = getelementptr i32, i32* %r2, i32 17
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i576
+%r217 = shl i576 %r216, 544
+%r218 = or i576 %r212, %r217
+%r219 = zext i576 %r218 to i608
+%r221 = getelementptr i32, i32* %r2, i32 18
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i608
+%r224 = shl i608 %r223, 576
+%r225 = or i608 %r219, %r224
+%r226 = zext i608 %r225 to i640
+%r228 = getelementptr i32, i32* %r2, i32 19
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i640
+%r231 = shl i640 %r230, 608
+%r232 = or i640 %r226, %r231
+%r233 = zext i640 %r232 to i672
+%r235 = getelementptr i32, i32* %r2, i32 20
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i672
+%r238 = shl i672 %r237, 640
+%r239 = or i672 %r233, %r238
+%r240 = zext i672 %r239 to i704
+%r242 = getelementptr i32, i32* %r2, i32 21
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i704
+%r245 = shl i704 %r244, 672
+%r246 = or i704 %r240, %r245
+%r247 = zext i704 %r246 to i736
+%r249 = getelementptr i32, i32* %r2, i32 22
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i736
+%r252 = shl i736 %r251, 704
+%r253 = or i736 %r247, %r252
+%r254 = zext i736 %r253 to i768
+%r256 = getelementptr i32, i32* %r2, i32 23
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i768
+%r259 = shl i768 %r258, 736
+%r260 = or i768 %r254, %r259
+%r261 = zext i768 %r260 to i800
+%r263 = getelementptr i32, i32* %r2, i32 24
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i800
+%r266 = shl i800 %r265, 768
+%r267 = or i800 %r261, %r266
+%r268 = zext i800 %r267 to i832
+%r270 = getelementptr i32, i32* %r2, i32 25
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i832
+%r273 = shl i832 %r272, 800
+%r274 = or i832 %r268, %r273
+%r275 = zext i832 %r274 to i864
+%r277 = getelementptr i32, i32* %r2, i32 26
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i864
+%r280 = shl i864 %r279, 832
+%r281 = or i864 %r275, %r280
+%r282 = zext i864 %r281 to i896
+%r284 = getelementptr i32, i32* %r2, i32 27
+%r285 = load i32, i32* %r284
+%r286 = zext i32 %r285 to i896
+%r287 = shl i896 %r286, 864
+%r288 = or i896 %r282, %r287
+%r289 = zext i896 %r288 to i928
+%r290 = trunc i928 %r289 to i32
+%r291 = mul i32 %r290, %r6
+%r292 = call i480 @mulPv448x32(i32* %r3, i32 %r291)
+%r293 = zext i480 %r292 to i928
+%r294 = add i928 %r289, %r293
+%r295 = lshr i928 %r294, 32
+%r296 = trunc i928 %r295 to i896
+%r297 = trunc i896 %r296 to i32
+%r298 = mul i32 %r297, %r6
+%r299 = call i480 @mulPv448x32(i32* %r3, i32 %r298)
+%r300 = zext i480 %r299 to i896
+%r301 = add i896 %r296, %r300
+%r302 = lshr i896 %r301, 32
+%r303 = trunc i896 %r302 to i864
+%r304 = trunc i864 %r303 to i32
+%r305 = mul i32 %r304, %r6
+%r306 = call i480 @mulPv448x32(i32* %r3, i32 %r305)
+%r307 = zext i480 %r306 to i864
+%r308 = add i864 %r303, %r307
+%r309 = lshr i864 %r308, 32
+%r310 = trunc i864 %r309 to i832
+%r311 = trunc i832 %r310 to i32
+%r312 = mul i32 %r311, %r6
+%r313 = call i480 @mulPv448x32(i32* %r3, i32 %r312)
+%r314 = zext i480 %r313 to i832
+%r315 = add i832 %r310, %r314
+%r316 = lshr i832 %r315, 32
+%r317 = trunc i832 %r316 to i800
+%r318 = trunc i800 %r317 to i32
+%r319 = mul i32 %r318, %r6
+%r320 = call i480 @mulPv448x32(i32* %r3, i32 %r319)
+%r321 = zext i480 %r320 to i800
+%r322 = add i800 %r317, %r321
+%r323 = lshr i800 %r322, 32
+%r324 = trunc i800 %r323 to i768
+%r325 = trunc i768 %r324 to i32
+%r326 = mul i32 %r325, %r6
+%r327 = call i480 @mulPv448x32(i32* %r3, i32 %r326)
+%r328 = zext i480 %r327 to i768
+%r329 = add i768 %r324, %r328
+%r330 = lshr i768 %r329, 32
+%r331 = trunc i768 %r330 to i736
+%r332 = trunc i736 %r331 to i32
+%r333 = mul i32 %r332, %r6
+%r334 = call i480 @mulPv448x32(i32* %r3, i32 %r333)
+%r335 = zext i480 %r334 to i736
+%r336 = add i736 %r331, %r335
+%r337 = lshr i736 %r336, 32
+%r338 = trunc i736 %r337 to i704
+%r339 = trunc i704 %r338 to i32
+%r340 = mul i32 %r339, %r6
+%r341 = call i480 @mulPv448x32(i32* %r3, i32 %r340)
+%r342 = zext i480 %r341 to i704
+%r343 = add i704 %r338, %r342
+%r344 = lshr i704 %r343, 32
+%r345 = trunc i704 %r344 to i672
+%r346 = trunc i672 %r345 to i32
+%r347 = mul i32 %r346, %r6
+%r348 = call i480 @mulPv448x32(i32* %r3, i32 %r347)
+%r349 = zext i480 %r348 to i672
+%r350 = add i672 %r345, %r349
+%r351 = lshr i672 %r350, 32
+%r352 = trunc i672 %r351 to i640
+%r353 = trunc i640 %r352 to i32
+%r354 = mul i32 %r353, %r6
+%r355 = call i480 @mulPv448x32(i32* %r3, i32 %r354)
+%r356 = zext i480 %r355 to i640
+%r357 = add i640 %r352, %r356
+%r358 = lshr i640 %r357, 32
+%r359 = trunc i640 %r358 to i608
+%r360 = trunc i608 %r359 to i32
+%r361 = mul i32 %r360, %r6
+%r362 = call i480 @mulPv448x32(i32* %r3, i32 %r361)
+%r363 = zext i480 %r362 to i608
+%r364 = add i608 %r359, %r363
+%r365 = lshr i608 %r364, 32
+%r366 = trunc i608 %r365 to i576
+%r367 = trunc i576 %r366 to i32
+%r368 = mul i32 %r367, %r6
+%r369 = call i480 @mulPv448x32(i32* %r3, i32 %r368)
+%r370 = zext i480 %r369 to i576
+%r371 = add i576 %r366, %r370
+%r372 = lshr i576 %r371, 32
+%r373 = trunc i576 %r372 to i544
+%r374 = trunc i544 %r373 to i32
+%r375 = mul i32 %r374, %r6
+%r376 = call i480 @mulPv448x32(i32* %r3, i32 %r375)
+%r377 = zext i480 %r376 to i544
+%r378 = add i544 %r373, %r377
+%r379 = lshr i544 %r378, 32
+%r380 = trunc i544 %r379 to i512
+%r381 = trunc i512 %r380 to i32
+%r382 = mul i32 %r381, %r6
+%r383 = call i480 @mulPv448x32(i32* %r3, i32 %r382)
+%r384 = zext i480 %r383 to i512
+%r385 = add i512 %r380, %r384
+%r386 = lshr i512 %r385, 32
+%r387 = trunc i512 %r386 to i480
+%r388 = zext i448 %r98 to i480
+%r389 = sub i480 %r387, %r388
+%r390 = lshr i480 %r389, 448
+%r391 = trunc i480 %r390 to i1
+%r392 = select i1 %r391, i480 %r387, i480 %r389
+%r393 = trunc i480 %r392 to i448
+%r394 = trunc i448 %r393 to i32
+%r396 = getelementptr i32, i32* %r1, i32 0
+store i32 %r394, i32* %r396
+%r397 = lshr i448 %r393, 32
+%r398 = trunc i448 %r397 to i32
+%r400 = getelementptr i32, i32* %r1, i32 1
+store i32 %r398, i32* %r400
+%r401 = lshr i448 %r397, 32
+%r402 = trunc i448 %r401 to i32
+%r404 = getelementptr i32, i32* %r1, i32 2
+store i32 %r402, i32* %r404
+%r405 = lshr i448 %r401, 32
+%r406 = trunc i448 %r405 to i32
+%r408 = getelementptr i32, i32* %r1, i32 3
+store i32 %r406, i32* %r408
+%r409 = lshr i448 %r405, 32
+%r410 = trunc i448 %r409 to i32
+%r412 = getelementptr i32, i32* %r1, i32 4
+store i32 %r410, i32* %r412
+%r413 = lshr i448 %r409, 32
+%r414 = trunc i448 %r413 to i32
+%r416 = getelementptr i32, i32* %r1, i32 5
+store i32 %r414, i32* %r416
+%r417 = lshr i448 %r413, 32
+%r418 = trunc i448 %r417 to i32
+%r420 = getelementptr i32, i32* %r1, i32 6
+store i32 %r418, i32* %r420
+%r421 = lshr i448 %r417, 32
+%r422 = trunc i448 %r421 to i32
+%r424 = getelementptr i32, i32* %r1, i32 7
+store i32 %r422, i32* %r424
+%r425 = lshr i448 %r421, 32
+%r426 = trunc i448 %r425 to i32
+%r428 = getelementptr i32, i32* %r1, i32 8
+store i32 %r426, i32* %r428
+%r429 = lshr i448 %r425, 32
+%r430 = trunc i448 %r429 to i32
+%r432 = getelementptr i32, i32* %r1, i32 9
+store i32 %r430, i32* %r432
+%r433 = lshr i448 %r429, 32
+%r434 = trunc i448 %r433 to i32
+%r436 = getelementptr i32, i32* %r1, i32 10
+store i32 %r434, i32* %r436
+%r437 = lshr i448 %r433, 32
+%r438 = trunc i448 %r437 to i32
+%r440 = getelementptr i32, i32* %r1, i32 11
+store i32 %r438, i32* %r440
+%r441 = lshr i448 %r437, 32
+%r442 = trunc i448 %r441 to i32
+%r444 = getelementptr i32, i32* %r1, i32 12
+store i32 %r442, i32* %r444
+%r445 = lshr i448 %r441, 32
+%r446 = trunc i448 %r445 to i32
+%r448 = getelementptr i32, i32* %r1, i32 13
+store i32 %r446, i32* %r448
+ret void
+}
+define i32 @mcl_fp_addPre14L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r3, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r3, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r98 = load i32, i32* %r4
+%r99 = zext i32 %r98 to i64
+%r101 = getelementptr i32, i32* %r4, i32 1
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i64
+%r104 = shl i64 %r103, 32
+%r105 = or i64 %r99, %r104
+%r106 = zext i64 %r105 to i96
+%r108 = getelementptr i32, i32* %r4, i32 2
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i96
+%r111 = shl i96 %r110, 64
+%r112 = or i96 %r106, %r111
+%r113 = zext i96 %r112 to i128
+%r115 = getelementptr i32, i32* %r4, i32 3
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i128
+%r118 = shl i128 %r117, 96
+%r119 = or i128 %r113, %r118
+%r120 = zext i128 %r119 to i160
+%r122 = getelementptr i32, i32* %r4, i32 4
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i160
+%r125 = shl i160 %r124, 128
+%r126 = or i160 %r120, %r125
+%r127 = zext i160 %r126 to i192
+%r129 = getelementptr i32, i32* %r4, i32 5
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i192
+%r132 = shl i192 %r131, 160
+%r133 = or i192 %r127, %r132
+%r134 = zext i192 %r133 to i224
+%r136 = getelementptr i32, i32* %r4, i32 6
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i224
+%r139 = shl i224 %r138, 192
+%r140 = or i224 %r134, %r139
+%r141 = zext i224 %r140 to i256
+%r143 = getelementptr i32, i32* %r4, i32 7
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i256
+%r146 = shl i256 %r145, 224
+%r147 = or i256 %r141, %r146
+%r148 = zext i256 %r147 to i288
+%r150 = getelementptr i32, i32* %r4, i32 8
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i288
+%r153 = shl i288 %r152, 256
+%r154 = or i288 %r148, %r153
+%r155 = zext i288 %r154 to i320
+%r157 = getelementptr i32, i32* %r4, i32 9
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i320
+%r160 = shl i320 %r159, 288
+%r161 = or i320 %r155, %r160
+%r162 = zext i320 %r161 to i352
+%r164 = getelementptr i32, i32* %r4, i32 10
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i352
+%r167 = shl i352 %r166, 320
+%r168 = or i352 %r162, %r167
+%r169 = zext i352 %r168 to i384
+%r171 = getelementptr i32, i32* %r4, i32 11
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i384
+%r174 = shl i384 %r173, 352
+%r175 = or i384 %r169, %r174
+%r176 = zext i384 %r175 to i416
+%r178 = getelementptr i32, i32* %r4, i32 12
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i416
+%r181 = shl i416 %r180, 384
+%r182 = or i416 %r176, %r181
+%r183 = zext i416 %r182 to i448
+%r185 = getelementptr i32, i32* %r4, i32 13
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i448
+%r188 = shl i448 %r187, 416
+%r189 = or i448 %r183, %r188
+%r190 = zext i448 %r189 to i480
+%r191 = add i480 %r97, %r190
+%r192 = trunc i480 %r191 to i448
+%r193 = trunc i448 %r192 to i32
+%r195 = getelementptr i32, i32* %r2, i32 0
+store i32 %r193, i32* %r195
+%r196 = lshr i448 %r192, 32
+%r197 = trunc i448 %r196 to i32
+%r199 = getelementptr i32, i32* %r2, i32 1
+store i32 %r197, i32* %r199
+%r200 = lshr i448 %r196, 32
+%r201 = trunc i448 %r200 to i32
+%r203 = getelementptr i32, i32* %r2, i32 2
+store i32 %r201, i32* %r203
+%r204 = lshr i448 %r200, 32
+%r205 = trunc i448 %r204 to i32
+%r207 = getelementptr i32, i32* %r2, i32 3
+store i32 %r205, i32* %r207
+%r208 = lshr i448 %r204, 32
+%r209 = trunc i448 %r208 to i32
+%r211 = getelementptr i32, i32* %r2, i32 4
+store i32 %r209, i32* %r211
+%r212 = lshr i448 %r208, 32
+%r213 = trunc i448 %r212 to i32
+%r215 = getelementptr i32, i32* %r2, i32 5
+store i32 %r213, i32* %r215
+%r216 = lshr i448 %r212, 32
+%r217 = trunc i448 %r216 to i32
+%r219 = getelementptr i32, i32* %r2, i32 6
+store i32 %r217, i32* %r219
+%r220 = lshr i448 %r216, 32
+%r221 = trunc i448 %r220 to i32
+%r223 = getelementptr i32, i32* %r2, i32 7
+store i32 %r221, i32* %r223
+%r224 = lshr i448 %r220, 32
+%r225 = trunc i448 %r224 to i32
+%r227 = getelementptr i32, i32* %r2, i32 8
+store i32 %r225, i32* %r227
+%r228 = lshr i448 %r224, 32
+%r229 = trunc i448 %r228 to i32
+%r231 = getelementptr i32, i32* %r2, i32 9
+store i32 %r229, i32* %r231
+%r232 = lshr i448 %r228, 32
+%r233 = trunc i448 %r232 to i32
+%r235 = getelementptr i32, i32* %r2, i32 10
+store i32 %r233, i32* %r235
+%r236 = lshr i448 %r232, 32
+%r237 = trunc i448 %r236 to i32
+%r239 = getelementptr i32, i32* %r2, i32 11
+store i32 %r237, i32* %r239
+%r240 = lshr i448 %r236, 32
+%r241 = trunc i448 %r240 to i32
+%r243 = getelementptr i32, i32* %r2, i32 12
+store i32 %r241, i32* %r243
+%r244 = lshr i448 %r240, 32
+%r245 = trunc i448 %r244 to i32
+%r247 = getelementptr i32, i32* %r2, i32 13
+store i32 %r245, i32* %r247
+%r248 = lshr i480 %r191, 448
+%r249 = trunc i480 %r248 to i32
+ret i32 %r249
+}
+define i32 @mcl_fp_subPre14L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r3, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r3, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r98 = load i32, i32* %r4
+%r99 = zext i32 %r98 to i64
+%r101 = getelementptr i32, i32* %r4, i32 1
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i64
+%r104 = shl i64 %r103, 32
+%r105 = or i64 %r99, %r104
+%r106 = zext i64 %r105 to i96
+%r108 = getelementptr i32, i32* %r4, i32 2
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i96
+%r111 = shl i96 %r110, 64
+%r112 = or i96 %r106, %r111
+%r113 = zext i96 %r112 to i128
+%r115 = getelementptr i32, i32* %r4, i32 3
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i128
+%r118 = shl i128 %r117, 96
+%r119 = or i128 %r113, %r118
+%r120 = zext i128 %r119 to i160
+%r122 = getelementptr i32, i32* %r4, i32 4
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i160
+%r125 = shl i160 %r124, 128
+%r126 = or i160 %r120, %r125
+%r127 = zext i160 %r126 to i192
+%r129 = getelementptr i32, i32* %r4, i32 5
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i192
+%r132 = shl i192 %r131, 160
+%r133 = or i192 %r127, %r132
+%r134 = zext i192 %r133 to i224
+%r136 = getelementptr i32, i32* %r4, i32 6
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i224
+%r139 = shl i224 %r138, 192
+%r140 = or i224 %r134, %r139
+%r141 = zext i224 %r140 to i256
+%r143 = getelementptr i32, i32* %r4, i32 7
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i256
+%r146 = shl i256 %r145, 224
+%r147 = or i256 %r141, %r146
+%r148 = zext i256 %r147 to i288
+%r150 = getelementptr i32, i32* %r4, i32 8
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i288
+%r153 = shl i288 %r152, 256
+%r154 = or i288 %r148, %r153
+%r155 = zext i288 %r154 to i320
+%r157 = getelementptr i32, i32* %r4, i32 9
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i320
+%r160 = shl i320 %r159, 288
+%r161 = or i320 %r155, %r160
+%r162 = zext i320 %r161 to i352
+%r164 = getelementptr i32, i32* %r4, i32 10
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i352
+%r167 = shl i352 %r166, 320
+%r168 = or i352 %r162, %r167
+%r169 = zext i352 %r168 to i384
+%r171 = getelementptr i32, i32* %r4, i32 11
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i384
+%r174 = shl i384 %r173, 352
+%r175 = or i384 %r169, %r174
+%r176 = zext i384 %r175 to i416
+%r178 = getelementptr i32, i32* %r4, i32 12
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i416
+%r181 = shl i416 %r180, 384
+%r182 = or i416 %r176, %r181
+%r183 = zext i416 %r182 to i448
+%r185 = getelementptr i32, i32* %r4, i32 13
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i448
+%r188 = shl i448 %r187, 416
+%r189 = or i448 %r183, %r188
+%r190 = zext i448 %r189 to i480
+%r191 = sub i480 %r97, %r190
+%r192 = trunc i480 %r191 to i448
+%r193 = trunc i448 %r192 to i32
+%r195 = getelementptr i32, i32* %r2, i32 0
+store i32 %r193, i32* %r195
+%r196 = lshr i448 %r192, 32
+%r197 = trunc i448 %r196 to i32
+%r199 = getelementptr i32, i32* %r2, i32 1
+store i32 %r197, i32* %r199
+%r200 = lshr i448 %r196, 32
+%r201 = trunc i448 %r200 to i32
+%r203 = getelementptr i32, i32* %r2, i32 2
+store i32 %r201, i32* %r203
+%r204 = lshr i448 %r200, 32
+%r205 = trunc i448 %r204 to i32
+%r207 = getelementptr i32, i32* %r2, i32 3
+store i32 %r205, i32* %r207
+%r208 = lshr i448 %r204, 32
+%r209 = trunc i448 %r208 to i32
+%r211 = getelementptr i32, i32* %r2, i32 4
+store i32 %r209, i32* %r211
+%r212 = lshr i448 %r208, 32
+%r213 = trunc i448 %r212 to i32
+%r215 = getelementptr i32, i32* %r2, i32 5
+store i32 %r213, i32* %r215
+%r216 = lshr i448 %r212, 32
+%r217 = trunc i448 %r216 to i32
+%r219 = getelementptr i32, i32* %r2, i32 6
+store i32 %r217, i32* %r219
+%r220 = lshr i448 %r216, 32
+%r221 = trunc i448 %r220 to i32
+%r223 = getelementptr i32, i32* %r2, i32 7
+store i32 %r221, i32* %r223
+%r224 = lshr i448 %r220, 32
+%r225 = trunc i448 %r224 to i32
+%r227 = getelementptr i32, i32* %r2, i32 8
+store i32 %r225, i32* %r227
+%r228 = lshr i448 %r224, 32
+%r229 = trunc i448 %r228 to i32
+%r231 = getelementptr i32, i32* %r2, i32 9
+store i32 %r229, i32* %r231
+%r232 = lshr i448 %r228, 32
+%r233 = trunc i448 %r232 to i32
+%r235 = getelementptr i32, i32* %r2, i32 10
+store i32 %r233, i32* %r235
+%r236 = lshr i448 %r232, 32
+%r237 = trunc i448 %r236 to i32
+%r239 = getelementptr i32, i32* %r2, i32 11
+store i32 %r237, i32* %r239
+%r240 = lshr i448 %r236, 32
+%r241 = trunc i448 %r240 to i32
+%r243 = getelementptr i32, i32* %r2, i32 12
+store i32 %r241, i32* %r243
+%r244 = lshr i448 %r240, 32
+%r245 = trunc i448 %r244 to i32
+%r247 = getelementptr i32, i32* %r2, i32 13
+store i32 %r245, i32* %r247
+%r248 = lshr i480 %r191, 448
+%r249 = trunc i480 %r248 to i32
+%r251 = and i32 %r249, 1
+ret i32 %r251
+}
+define void @mcl_fp_shr1_14L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = zext i256 %r52 to i288
+%r55 = getelementptr i32, i32* %r2, i32 8
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i288
+%r58 = shl i288 %r57, 256
+%r59 = or i288 %r53, %r58
+%r60 = zext i288 %r59 to i320
+%r62 = getelementptr i32, i32* %r2, i32 9
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i320
+%r65 = shl i320 %r64, 288
+%r66 = or i320 %r60, %r65
+%r67 = zext i320 %r66 to i352
+%r69 = getelementptr i32, i32* %r2, i32 10
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i352
+%r72 = shl i352 %r71, 320
+%r73 = or i352 %r67, %r72
+%r74 = zext i352 %r73 to i384
+%r76 = getelementptr i32, i32* %r2, i32 11
+%r77 = load i32, i32* %r76
+%r78 = zext i32 %r77 to i384
+%r79 = shl i384 %r78, 352
+%r80 = or i384 %r74, %r79
+%r81 = zext i384 %r80 to i416
+%r83 = getelementptr i32, i32* %r2, i32 12
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i416
+%r86 = shl i416 %r85, 384
+%r87 = or i416 %r81, %r86
+%r88 = zext i416 %r87 to i448
+%r90 = getelementptr i32, i32* %r2, i32 13
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i448
+%r93 = shl i448 %r92, 416
+%r94 = or i448 %r88, %r93
+%r95 = lshr i448 %r94, 1
+%r96 = trunc i448 %r95 to i32
+%r98 = getelementptr i32, i32* %r1, i32 0
+store i32 %r96, i32* %r98
+%r99 = lshr i448 %r95, 32
+%r100 = trunc i448 %r99 to i32
+%r102 = getelementptr i32, i32* %r1, i32 1
+store i32 %r100, i32* %r102
+%r103 = lshr i448 %r99, 32
+%r104 = trunc i448 %r103 to i32
+%r106 = getelementptr i32, i32* %r1, i32 2
+store i32 %r104, i32* %r106
+%r107 = lshr i448 %r103, 32
+%r108 = trunc i448 %r107 to i32
+%r110 = getelementptr i32, i32* %r1, i32 3
+store i32 %r108, i32* %r110
+%r111 = lshr i448 %r107, 32
+%r112 = trunc i448 %r111 to i32
+%r114 = getelementptr i32, i32* %r1, i32 4
+store i32 %r112, i32* %r114
+%r115 = lshr i448 %r111, 32
+%r116 = trunc i448 %r115 to i32
+%r118 = getelementptr i32, i32* %r1, i32 5
+store i32 %r116, i32* %r118
+%r119 = lshr i448 %r115, 32
+%r120 = trunc i448 %r119 to i32
+%r122 = getelementptr i32, i32* %r1, i32 6
+store i32 %r120, i32* %r122
+%r123 = lshr i448 %r119, 32
+%r124 = trunc i448 %r123 to i32
+%r126 = getelementptr i32, i32* %r1, i32 7
+store i32 %r124, i32* %r126
+%r127 = lshr i448 %r123, 32
+%r128 = trunc i448 %r127 to i32
+%r130 = getelementptr i32, i32* %r1, i32 8
+store i32 %r128, i32* %r130
+%r131 = lshr i448 %r127, 32
+%r132 = trunc i448 %r131 to i32
+%r134 = getelementptr i32, i32* %r1, i32 9
+store i32 %r132, i32* %r134
+%r135 = lshr i448 %r131, 32
+%r136 = trunc i448 %r135 to i32
+%r138 = getelementptr i32, i32* %r1, i32 10
+store i32 %r136, i32* %r138
+%r139 = lshr i448 %r135, 32
+%r140 = trunc i448 %r139 to i32
+%r142 = getelementptr i32, i32* %r1, i32 11
+store i32 %r140, i32* %r142
+%r143 = lshr i448 %r139, 32
+%r144 = trunc i448 %r143 to i32
+%r146 = getelementptr i32, i32* %r1, i32 12
+store i32 %r144, i32* %r146
+%r147 = lshr i448 %r143, 32
+%r148 = trunc i448 %r147 to i32
+%r150 = getelementptr i32, i32* %r1, i32 13
+store i32 %r148, i32* %r150
+ret void
+}
+define void @mcl_fp_add14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = load i32, i32* %r3
+%r98 = zext i32 %r97 to i64
+%r100 = getelementptr i32, i32* %r3, i32 1
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i64
+%r103 = shl i64 %r102, 32
+%r104 = or i64 %r98, %r103
+%r105 = zext i64 %r104 to i96
+%r107 = getelementptr i32, i32* %r3, i32 2
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i96
+%r110 = shl i96 %r109, 64
+%r111 = or i96 %r105, %r110
+%r112 = zext i96 %r111 to i128
+%r114 = getelementptr i32, i32* %r3, i32 3
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i128
+%r117 = shl i128 %r116, 96
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i160
+%r121 = getelementptr i32, i32* %r3, i32 4
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i160
+%r124 = shl i160 %r123, 128
+%r125 = or i160 %r119, %r124
+%r126 = zext i160 %r125 to i192
+%r128 = getelementptr i32, i32* %r3, i32 5
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i192
+%r131 = shl i192 %r130, 160
+%r132 = or i192 %r126, %r131
+%r133 = zext i192 %r132 to i224
+%r135 = getelementptr i32, i32* %r3, i32 6
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i224
+%r138 = shl i224 %r137, 192
+%r139 = or i224 %r133, %r138
+%r140 = zext i224 %r139 to i256
+%r142 = getelementptr i32, i32* %r3, i32 7
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i256
+%r145 = shl i256 %r144, 224
+%r146 = or i256 %r140, %r145
+%r147 = zext i256 %r146 to i288
+%r149 = getelementptr i32, i32* %r3, i32 8
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i288
+%r152 = shl i288 %r151, 256
+%r153 = or i288 %r147, %r152
+%r154 = zext i288 %r153 to i320
+%r156 = getelementptr i32, i32* %r3, i32 9
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i320
+%r159 = shl i320 %r158, 288
+%r160 = or i320 %r154, %r159
+%r161 = zext i320 %r160 to i352
+%r163 = getelementptr i32, i32* %r3, i32 10
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i352
+%r166 = shl i352 %r165, 320
+%r167 = or i352 %r161, %r166
+%r168 = zext i352 %r167 to i384
+%r170 = getelementptr i32, i32* %r3, i32 11
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i384
+%r173 = shl i384 %r172, 352
+%r174 = or i384 %r168, %r173
+%r175 = zext i384 %r174 to i416
+%r177 = getelementptr i32, i32* %r3, i32 12
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i416
+%r180 = shl i416 %r179, 384
+%r181 = or i416 %r175, %r180
+%r182 = zext i416 %r181 to i448
+%r184 = getelementptr i32, i32* %r3, i32 13
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i448
+%r187 = shl i448 %r186, 416
+%r188 = or i448 %r182, %r187
+%r189 = zext i448 %r96 to i480
+%r190 = zext i448 %r188 to i480
+%r191 = add i480 %r189, %r190
+%r192 = trunc i480 %r191 to i448
+%r193 = trunc i448 %r192 to i32
+%r195 = getelementptr i32, i32* %r1, i32 0
+store i32 %r193, i32* %r195
+%r196 = lshr i448 %r192, 32
+%r197 = trunc i448 %r196 to i32
+%r199 = getelementptr i32, i32* %r1, i32 1
+store i32 %r197, i32* %r199
+%r200 = lshr i448 %r196, 32
+%r201 = trunc i448 %r200 to i32
+%r203 = getelementptr i32, i32* %r1, i32 2
+store i32 %r201, i32* %r203
+%r204 = lshr i448 %r200, 32
+%r205 = trunc i448 %r204 to i32
+%r207 = getelementptr i32, i32* %r1, i32 3
+store i32 %r205, i32* %r207
+%r208 = lshr i448 %r204, 32
+%r209 = trunc i448 %r208 to i32
+%r211 = getelementptr i32, i32* %r1, i32 4
+store i32 %r209, i32* %r211
+%r212 = lshr i448 %r208, 32
+%r213 = trunc i448 %r212 to i32
+%r215 = getelementptr i32, i32* %r1, i32 5
+store i32 %r213, i32* %r215
+%r216 = lshr i448 %r212, 32
+%r217 = trunc i448 %r216 to i32
+%r219 = getelementptr i32, i32* %r1, i32 6
+store i32 %r217, i32* %r219
+%r220 = lshr i448 %r216, 32
+%r221 = trunc i448 %r220 to i32
+%r223 = getelementptr i32, i32* %r1, i32 7
+store i32 %r221, i32* %r223
+%r224 = lshr i448 %r220, 32
+%r225 = trunc i448 %r224 to i32
+%r227 = getelementptr i32, i32* %r1, i32 8
+store i32 %r225, i32* %r227
+%r228 = lshr i448 %r224, 32
+%r229 = trunc i448 %r228 to i32
+%r231 = getelementptr i32, i32* %r1, i32 9
+store i32 %r229, i32* %r231
+%r232 = lshr i448 %r228, 32
+%r233 = trunc i448 %r232 to i32
+%r235 = getelementptr i32, i32* %r1, i32 10
+store i32 %r233, i32* %r235
+%r236 = lshr i448 %r232, 32
+%r237 = trunc i448 %r236 to i32
+%r239 = getelementptr i32, i32* %r1, i32 11
+store i32 %r237, i32* %r239
+%r240 = lshr i448 %r236, 32
+%r241 = trunc i448 %r240 to i32
+%r243 = getelementptr i32, i32* %r1, i32 12
+store i32 %r241, i32* %r243
+%r244 = lshr i448 %r240, 32
+%r245 = trunc i448 %r244 to i32
+%r247 = getelementptr i32, i32* %r1, i32 13
+store i32 %r245, i32* %r247
+%r248 = load i32, i32* %r4
+%r249 = zext i32 %r248 to i64
+%r251 = getelementptr i32, i32* %r4, i32 1
+%r252 = load i32, i32* %r251
+%r253 = zext i32 %r252 to i64
+%r254 = shl i64 %r253, 32
+%r255 = or i64 %r249, %r254
+%r256 = zext i64 %r255 to i96
+%r258 = getelementptr i32, i32* %r4, i32 2
+%r259 = load i32, i32* %r258
+%r260 = zext i32 %r259 to i96
+%r261 = shl i96 %r260, 64
+%r262 = or i96 %r256, %r261
+%r263 = zext i96 %r262 to i128
+%r265 = getelementptr i32, i32* %r4, i32 3
+%r266 = load i32, i32* %r265
+%r267 = zext i32 %r266 to i128
+%r268 = shl i128 %r267, 96
+%r269 = or i128 %r263, %r268
+%r270 = zext i128 %r269 to i160
+%r272 = getelementptr i32, i32* %r4, i32 4
+%r273 = load i32, i32* %r272
+%r274 = zext i32 %r273 to i160
+%r275 = shl i160 %r274, 128
+%r276 = or i160 %r270, %r275
+%r277 = zext i160 %r276 to i192
+%r279 = getelementptr i32, i32* %r4, i32 5
+%r280 = load i32, i32* %r279
+%r281 = zext i32 %r280 to i192
+%r282 = shl i192 %r281, 160
+%r283 = or i192 %r277, %r282
+%r284 = zext i192 %r283 to i224
+%r286 = getelementptr i32, i32* %r4, i32 6
+%r287 = load i32, i32* %r286
+%r288 = zext i32 %r287 to i224
+%r289 = shl i224 %r288, 192
+%r290 = or i224 %r284, %r289
+%r291 = zext i224 %r290 to i256
+%r293 = getelementptr i32, i32* %r4, i32 7
+%r294 = load i32, i32* %r293
+%r295 = zext i32 %r294 to i256
+%r296 = shl i256 %r295, 224
+%r297 = or i256 %r291, %r296
+%r298 = zext i256 %r297 to i288
+%r300 = getelementptr i32, i32* %r4, i32 8
+%r301 = load i32, i32* %r300
+%r302 = zext i32 %r301 to i288
+%r303 = shl i288 %r302, 256
+%r304 = or i288 %r298, %r303
+%r305 = zext i288 %r304 to i320
+%r307 = getelementptr i32, i32* %r4, i32 9
+%r308 = load i32, i32* %r307
+%r309 = zext i32 %r308 to i320
+%r310 = shl i320 %r309, 288
+%r311 = or i320 %r305, %r310
+%r312 = zext i320 %r311 to i352
+%r314 = getelementptr i32, i32* %r4, i32 10
+%r315 = load i32, i32* %r314
+%r316 = zext i32 %r315 to i352
+%r317 = shl i352 %r316, 320
+%r318 = or i352 %r312, %r317
+%r319 = zext i352 %r318 to i384
+%r321 = getelementptr i32, i32* %r4, i32 11
+%r322 = load i32, i32* %r321
+%r323 = zext i32 %r322 to i384
+%r324 = shl i384 %r323, 352
+%r325 = or i384 %r319, %r324
+%r326 = zext i384 %r325 to i416
+%r328 = getelementptr i32, i32* %r4, i32 12
+%r329 = load i32, i32* %r328
+%r330 = zext i32 %r329 to i416
+%r331 = shl i416 %r330, 384
+%r332 = or i416 %r326, %r331
+%r333 = zext i416 %r332 to i448
+%r335 = getelementptr i32, i32* %r4, i32 13
+%r336 = load i32, i32* %r335
+%r337 = zext i32 %r336 to i448
+%r338 = shl i448 %r337, 416
+%r339 = or i448 %r333, %r338
+%r340 = zext i448 %r339 to i480
+%r341 = sub i480 %r191, %r340
+%r342 = lshr i480 %r341, 448
+%r343 = trunc i480 %r342 to i1
+br i1%r343, label %carry, label %nocarry
+nocarry:
+%r344 = trunc i480 %r341 to i448
+%r345 = trunc i448 %r344 to i32
+%r347 = getelementptr i32, i32* %r1, i32 0
+store i32 %r345, i32* %r347
+%r348 = lshr i448 %r344, 32
+%r349 = trunc i448 %r348 to i32
+%r351 = getelementptr i32, i32* %r1, i32 1
+store i32 %r349, i32* %r351
+%r352 = lshr i448 %r348, 32
+%r353 = trunc i448 %r352 to i32
+%r355 = getelementptr i32, i32* %r1, i32 2
+store i32 %r353, i32* %r355
+%r356 = lshr i448 %r352, 32
+%r357 = trunc i448 %r356 to i32
+%r359 = getelementptr i32, i32* %r1, i32 3
+store i32 %r357, i32* %r359
+%r360 = lshr i448 %r356, 32
+%r361 = trunc i448 %r360 to i32
+%r363 = getelementptr i32, i32* %r1, i32 4
+store i32 %r361, i32* %r363
+%r364 = lshr i448 %r360, 32
+%r365 = trunc i448 %r364 to i32
+%r367 = getelementptr i32, i32* %r1, i32 5
+store i32 %r365, i32* %r367
+%r368 = lshr i448 %r364, 32
+%r369 = trunc i448 %r368 to i32
+%r371 = getelementptr i32, i32* %r1, i32 6
+store i32 %r369, i32* %r371
+%r372 = lshr i448 %r368, 32
+%r373 = trunc i448 %r372 to i32
+%r375 = getelementptr i32, i32* %r1, i32 7
+store i32 %r373, i32* %r375
+%r376 = lshr i448 %r372, 32
+%r377 = trunc i448 %r376 to i32
+%r379 = getelementptr i32, i32* %r1, i32 8
+store i32 %r377, i32* %r379
+%r380 = lshr i448 %r376, 32
+%r381 = trunc i448 %r380 to i32
+%r383 = getelementptr i32, i32* %r1, i32 9
+store i32 %r381, i32* %r383
+%r384 = lshr i448 %r380, 32
+%r385 = trunc i448 %r384 to i32
+%r387 = getelementptr i32, i32* %r1, i32 10
+store i32 %r385, i32* %r387
+%r388 = lshr i448 %r384, 32
+%r389 = trunc i448 %r388 to i32
+%r391 = getelementptr i32, i32* %r1, i32 11
+store i32 %r389, i32* %r391
+%r392 = lshr i448 %r388, 32
+%r393 = trunc i448 %r392 to i32
+%r395 = getelementptr i32, i32* %r1, i32 12
+store i32 %r393, i32* %r395
+%r396 = lshr i448 %r392, 32
+%r397 = trunc i448 %r396 to i32
+%r399 = getelementptr i32, i32* %r1, i32 13
+store i32 %r397, i32* %r399
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = load i32, i32* %r3
+%r98 = zext i32 %r97 to i64
+%r100 = getelementptr i32, i32* %r3, i32 1
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i64
+%r103 = shl i64 %r102, 32
+%r104 = or i64 %r98, %r103
+%r105 = zext i64 %r104 to i96
+%r107 = getelementptr i32, i32* %r3, i32 2
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i96
+%r110 = shl i96 %r109, 64
+%r111 = or i96 %r105, %r110
+%r112 = zext i96 %r111 to i128
+%r114 = getelementptr i32, i32* %r3, i32 3
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i128
+%r117 = shl i128 %r116, 96
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i160
+%r121 = getelementptr i32, i32* %r3, i32 4
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i160
+%r124 = shl i160 %r123, 128
+%r125 = or i160 %r119, %r124
+%r126 = zext i160 %r125 to i192
+%r128 = getelementptr i32, i32* %r3, i32 5
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i192
+%r131 = shl i192 %r130, 160
+%r132 = or i192 %r126, %r131
+%r133 = zext i192 %r132 to i224
+%r135 = getelementptr i32, i32* %r3, i32 6
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i224
+%r138 = shl i224 %r137, 192
+%r139 = or i224 %r133, %r138
+%r140 = zext i224 %r139 to i256
+%r142 = getelementptr i32, i32* %r3, i32 7
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i256
+%r145 = shl i256 %r144, 224
+%r146 = or i256 %r140, %r145
+%r147 = zext i256 %r146 to i288
+%r149 = getelementptr i32, i32* %r3, i32 8
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i288
+%r152 = shl i288 %r151, 256
+%r153 = or i288 %r147, %r152
+%r154 = zext i288 %r153 to i320
+%r156 = getelementptr i32, i32* %r3, i32 9
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i320
+%r159 = shl i320 %r158, 288
+%r160 = or i320 %r154, %r159
+%r161 = zext i320 %r160 to i352
+%r163 = getelementptr i32, i32* %r3, i32 10
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i352
+%r166 = shl i352 %r165, 320
+%r167 = or i352 %r161, %r166
+%r168 = zext i352 %r167 to i384
+%r170 = getelementptr i32, i32* %r3, i32 11
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i384
+%r173 = shl i384 %r172, 352
+%r174 = or i384 %r168, %r173
+%r175 = zext i384 %r174 to i416
+%r177 = getelementptr i32, i32* %r3, i32 12
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i416
+%r180 = shl i416 %r179, 384
+%r181 = or i416 %r175, %r180
+%r182 = zext i416 %r181 to i448
+%r184 = getelementptr i32, i32* %r3, i32 13
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i448
+%r187 = shl i448 %r186, 416
+%r188 = or i448 %r182, %r187
+%r189 = add i448 %r96, %r188
+%r190 = load i32, i32* %r4
+%r191 = zext i32 %r190 to i64
+%r193 = getelementptr i32, i32* %r4, i32 1
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i64
+%r196 = shl i64 %r195, 32
+%r197 = or i64 %r191, %r196
+%r198 = zext i64 %r197 to i96
+%r200 = getelementptr i32, i32* %r4, i32 2
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i96
+%r203 = shl i96 %r202, 64
+%r204 = or i96 %r198, %r203
+%r205 = zext i96 %r204 to i128
+%r207 = getelementptr i32, i32* %r4, i32 3
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i128
+%r210 = shl i128 %r209, 96
+%r211 = or i128 %r205, %r210
+%r212 = zext i128 %r211 to i160
+%r214 = getelementptr i32, i32* %r4, i32 4
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i160
+%r217 = shl i160 %r216, 128
+%r218 = or i160 %r212, %r217
+%r219 = zext i160 %r218 to i192
+%r221 = getelementptr i32, i32* %r4, i32 5
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i192
+%r224 = shl i192 %r223, 160
+%r225 = or i192 %r219, %r224
+%r226 = zext i192 %r225 to i224
+%r228 = getelementptr i32, i32* %r4, i32 6
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i224
+%r231 = shl i224 %r230, 192
+%r232 = or i224 %r226, %r231
+%r233 = zext i224 %r232 to i256
+%r235 = getelementptr i32, i32* %r4, i32 7
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i256
+%r238 = shl i256 %r237, 224
+%r239 = or i256 %r233, %r238
+%r240 = zext i256 %r239 to i288
+%r242 = getelementptr i32, i32* %r4, i32 8
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i288
+%r245 = shl i288 %r244, 256
+%r246 = or i288 %r240, %r245
+%r247 = zext i288 %r246 to i320
+%r249 = getelementptr i32, i32* %r4, i32 9
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i320
+%r252 = shl i320 %r251, 288
+%r253 = or i320 %r247, %r252
+%r254 = zext i320 %r253 to i352
+%r256 = getelementptr i32, i32* %r4, i32 10
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i352
+%r259 = shl i352 %r258, 320
+%r260 = or i352 %r254, %r259
+%r261 = zext i352 %r260 to i384
+%r263 = getelementptr i32, i32* %r4, i32 11
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i384
+%r266 = shl i384 %r265, 352
+%r267 = or i384 %r261, %r266
+%r268 = zext i384 %r267 to i416
+%r270 = getelementptr i32, i32* %r4, i32 12
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i416
+%r273 = shl i416 %r272, 384
+%r274 = or i416 %r268, %r273
+%r275 = zext i416 %r274 to i448
+%r277 = getelementptr i32, i32* %r4, i32 13
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i448
+%r280 = shl i448 %r279, 416
+%r281 = or i448 %r275, %r280
+%r282 = sub i448 %r189, %r281
+%r283 = lshr i448 %r282, 447
+%r284 = trunc i448 %r283 to i1
+%r285 = select i1 %r284, i448 %r189, i448 %r282
+%r286 = trunc i448 %r285 to i32
+%r288 = getelementptr i32, i32* %r1, i32 0
+store i32 %r286, i32* %r288
+%r289 = lshr i448 %r285, 32
+%r290 = trunc i448 %r289 to i32
+%r292 = getelementptr i32, i32* %r1, i32 1
+store i32 %r290, i32* %r292
+%r293 = lshr i448 %r289, 32
+%r294 = trunc i448 %r293 to i32
+%r296 = getelementptr i32, i32* %r1, i32 2
+store i32 %r294, i32* %r296
+%r297 = lshr i448 %r293, 32
+%r298 = trunc i448 %r297 to i32
+%r300 = getelementptr i32, i32* %r1, i32 3
+store i32 %r298, i32* %r300
+%r301 = lshr i448 %r297, 32
+%r302 = trunc i448 %r301 to i32
+%r304 = getelementptr i32, i32* %r1, i32 4
+store i32 %r302, i32* %r304
+%r305 = lshr i448 %r301, 32
+%r306 = trunc i448 %r305 to i32
+%r308 = getelementptr i32, i32* %r1, i32 5
+store i32 %r306, i32* %r308
+%r309 = lshr i448 %r305, 32
+%r310 = trunc i448 %r309 to i32
+%r312 = getelementptr i32, i32* %r1, i32 6
+store i32 %r310, i32* %r312
+%r313 = lshr i448 %r309, 32
+%r314 = trunc i448 %r313 to i32
+%r316 = getelementptr i32, i32* %r1, i32 7
+store i32 %r314, i32* %r316
+%r317 = lshr i448 %r313, 32
+%r318 = trunc i448 %r317 to i32
+%r320 = getelementptr i32, i32* %r1, i32 8
+store i32 %r318, i32* %r320
+%r321 = lshr i448 %r317, 32
+%r322 = trunc i448 %r321 to i32
+%r324 = getelementptr i32, i32* %r1, i32 9
+store i32 %r322, i32* %r324
+%r325 = lshr i448 %r321, 32
+%r326 = trunc i448 %r325 to i32
+%r328 = getelementptr i32, i32* %r1, i32 10
+store i32 %r326, i32* %r328
+%r329 = lshr i448 %r325, 32
+%r330 = trunc i448 %r329 to i32
+%r332 = getelementptr i32, i32* %r1, i32 11
+store i32 %r330, i32* %r332
+%r333 = lshr i448 %r329, 32
+%r334 = trunc i448 %r333 to i32
+%r336 = getelementptr i32, i32* %r1, i32 12
+store i32 %r334, i32* %r336
+%r337 = lshr i448 %r333, 32
+%r338 = trunc i448 %r337 to i32
+%r340 = getelementptr i32, i32* %r1, i32 13
+store i32 %r338, i32* %r340
+ret void
+}
+define void @mcl_fp_sub14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = load i32, i32* %r3
+%r98 = zext i32 %r97 to i64
+%r100 = getelementptr i32, i32* %r3, i32 1
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i64
+%r103 = shl i64 %r102, 32
+%r104 = or i64 %r98, %r103
+%r105 = zext i64 %r104 to i96
+%r107 = getelementptr i32, i32* %r3, i32 2
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i96
+%r110 = shl i96 %r109, 64
+%r111 = or i96 %r105, %r110
+%r112 = zext i96 %r111 to i128
+%r114 = getelementptr i32, i32* %r3, i32 3
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i128
+%r117 = shl i128 %r116, 96
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i160
+%r121 = getelementptr i32, i32* %r3, i32 4
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i160
+%r124 = shl i160 %r123, 128
+%r125 = or i160 %r119, %r124
+%r126 = zext i160 %r125 to i192
+%r128 = getelementptr i32, i32* %r3, i32 5
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i192
+%r131 = shl i192 %r130, 160
+%r132 = or i192 %r126, %r131
+%r133 = zext i192 %r132 to i224
+%r135 = getelementptr i32, i32* %r3, i32 6
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i224
+%r138 = shl i224 %r137, 192
+%r139 = or i224 %r133, %r138
+%r140 = zext i224 %r139 to i256
+%r142 = getelementptr i32, i32* %r3, i32 7
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i256
+%r145 = shl i256 %r144, 224
+%r146 = or i256 %r140, %r145
+%r147 = zext i256 %r146 to i288
+%r149 = getelementptr i32, i32* %r3, i32 8
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i288
+%r152 = shl i288 %r151, 256
+%r153 = or i288 %r147, %r152
+%r154 = zext i288 %r153 to i320
+%r156 = getelementptr i32, i32* %r3, i32 9
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i320
+%r159 = shl i320 %r158, 288
+%r160 = or i320 %r154, %r159
+%r161 = zext i320 %r160 to i352
+%r163 = getelementptr i32, i32* %r3, i32 10
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i352
+%r166 = shl i352 %r165, 320
+%r167 = or i352 %r161, %r166
+%r168 = zext i352 %r167 to i384
+%r170 = getelementptr i32, i32* %r3, i32 11
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i384
+%r173 = shl i384 %r172, 352
+%r174 = or i384 %r168, %r173
+%r175 = zext i384 %r174 to i416
+%r177 = getelementptr i32, i32* %r3, i32 12
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i416
+%r180 = shl i416 %r179, 384
+%r181 = or i416 %r175, %r180
+%r182 = zext i416 %r181 to i448
+%r184 = getelementptr i32, i32* %r3, i32 13
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i448
+%r187 = shl i448 %r186, 416
+%r188 = or i448 %r182, %r187
+%r189 = zext i448 %r96 to i480
+%r190 = zext i448 %r188 to i480
+%r191 = sub i480 %r189, %r190
+%r192 = trunc i480 %r191 to i448
+%r193 = lshr i480 %r191, 448
+%r194 = trunc i480 %r193 to i1
+%r195 = trunc i448 %r192 to i32
+%r197 = getelementptr i32, i32* %r1, i32 0
+store i32 %r195, i32* %r197
+%r198 = lshr i448 %r192, 32
+%r199 = trunc i448 %r198 to i32
+%r201 = getelementptr i32, i32* %r1, i32 1
+store i32 %r199, i32* %r201
+%r202 = lshr i448 %r198, 32
+%r203 = trunc i448 %r202 to i32
+%r205 = getelementptr i32, i32* %r1, i32 2
+store i32 %r203, i32* %r205
+%r206 = lshr i448 %r202, 32
+%r207 = trunc i448 %r206 to i32
+%r209 = getelementptr i32, i32* %r1, i32 3
+store i32 %r207, i32* %r209
+%r210 = lshr i448 %r206, 32
+%r211 = trunc i448 %r210 to i32
+%r213 = getelementptr i32, i32* %r1, i32 4
+store i32 %r211, i32* %r213
+%r214 = lshr i448 %r210, 32
+%r215 = trunc i448 %r214 to i32
+%r217 = getelementptr i32, i32* %r1, i32 5
+store i32 %r215, i32* %r217
+%r218 = lshr i448 %r214, 32
+%r219 = trunc i448 %r218 to i32
+%r221 = getelementptr i32, i32* %r1, i32 6
+store i32 %r219, i32* %r221
+%r222 = lshr i448 %r218, 32
+%r223 = trunc i448 %r222 to i32
+%r225 = getelementptr i32, i32* %r1, i32 7
+store i32 %r223, i32* %r225
+%r226 = lshr i448 %r222, 32
+%r227 = trunc i448 %r226 to i32
+%r229 = getelementptr i32, i32* %r1, i32 8
+store i32 %r227, i32* %r229
+%r230 = lshr i448 %r226, 32
+%r231 = trunc i448 %r230 to i32
+%r233 = getelementptr i32, i32* %r1, i32 9
+store i32 %r231, i32* %r233
+%r234 = lshr i448 %r230, 32
+%r235 = trunc i448 %r234 to i32
+%r237 = getelementptr i32, i32* %r1, i32 10
+store i32 %r235, i32* %r237
+%r238 = lshr i448 %r234, 32
+%r239 = trunc i448 %r238 to i32
+%r241 = getelementptr i32, i32* %r1, i32 11
+store i32 %r239, i32* %r241
+%r242 = lshr i448 %r238, 32
+%r243 = trunc i448 %r242 to i32
+%r245 = getelementptr i32, i32* %r1, i32 12
+store i32 %r243, i32* %r245
+%r246 = lshr i448 %r242, 32
+%r247 = trunc i448 %r246 to i32
+%r249 = getelementptr i32, i32* %r1, i32 13
+store i32 %r247, i32* %r249
+br i1%r194, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r250 = load i32, i32* %r4
+%r251 = zext i32 %r250 to i64
+%r253 = getelementptr i32, i32* %r4, i32 1
+%r254 = load i32, i32* %r253
+%r255 = zext i32 %r254 to i64
+%r256 = shl i64 %r255, 32
+%r257 = or i64 %r251, %r256
+%r258 = zext i64 %r257 to i96
+%r260 = getelementptr i32, i32* %r4, i32 2
+%r261 = load i32, i32* %r260
+%r262 = zext i32 %r261 to i96
+%r263 = shl i96 %r262, 64
+%r264 = or i96 %r258, %r263
+%r265 = zext i96 %r264 to i128
+%r267 = getelementptr i32, i32* %r4, i32 3
+%r268 = load i32, i32* %r267
+%r269 = zext i32 %r268 to i128
+%r270 = shl i128 %r269, 96
+%r271 = or i128 %r265, %r270
+%r272 = zext i128 %r271 to i160
+%r274 = getelementptr i32, i32* %r4, i32 4
+%r275 = load i32, i32* %r274
+%r276 = zext i32 %r275 to i160
+%r277 = shl i160 %r276, 128
+%r278 = or i160 %r272, %r277
+%r279 = zext i160 %r278 to i192
+%r281 = getelementptr i32, i32* %r4, i32 5
+%r282 = load i32, i32* %r281
+%r283 = zext i32 %r282 to i192
+%r284 = shl i192 %r283, 160
+%r285 = or i192 %r279, %r284
+%r286 = zext i192 %r285 to i224
+%r288 = getelementptr i32, i32* %r4, i32 6
+%r289 = load i32, i32* %r288
+%r290 = zext i32 %r289 to i224
+%r291 = shl i224 %r290, 192
+%r292 = or i224 %r286, %r291
+%r293 = zext i224 %r292 to i256
+%r295 = getelementptr i32, i32* %r4, i32 7
+%r296 = load i32, i32* %r295
+%r297 = zext i32 %r296 to i256
+%r298 = shl i256 %r297, 224
+%r299 = or i256 %r293, %r298
+%r300 = zext i256 %r299 to i288
+%r302 = getelementptr i32, i32* %r4, i32 8
+%r303 = load i32, i32* %r302
+%r304 = zext i32 %r303 to i288
+%r305 = shl i288 %r304, 256
+%r306 = or i288 %r300, %r305
+%r307 = zext i288 %r306 to i320
+%r309 = getelementptr i32, i32* %r4, i32 9
+%r310 = load i32, i32* %r309
+%r311 = zext i32 %r310 to i320
+%r312 = shl i320 %r311, 288
+%r313 = or i320 %r307, %r312
+%r314 = zext i320 %r313 to i352
+%r316 = getelementptr i32, i32* %r4, i32 10
+%r317 = load i32, i32* %r316
+%r318 = zext i32 %r317 to i352
+%r319 = shl i352 %r318, 320
+%r320 = or i352 %r314, %r319
+%r321 = zext i352 %r320 to i384
+%r323 = getelementptr i32, i32* %r4, i32 11
+%r324 = load i32, i32* %r323
+%r325 = zext i32 %r324 to i384
+%r326 = shl i384 %r325, 352
+%r327 = or i384 %r321, %r326
+%r328 = zext i384 %r327 to i416
+%r330 = getelementptr i32, i32* %r4, i32 12
+%r331 = load i32, i32* %r330
+%r332 = zext i32 %r331 to i416
+%r333 = shl i416 %r332, 384
+%r334 = or i416 %r328, %r333
+%r335 = zext i416 %r334 to i448
+%r337 = getelementptr i32, i32* %r4, i32 13
+%r338 = load i32, i32* %r337
+%r339 = zext i32 %r338 to i448
+%r340 = shl i448 %r339, 416
+%r341 = or i448 %r335, %r340
+%r342 = add i448 %r192, %r341
+%r343 = trunc i448 %r342 to i32
+%r345 = getelementptr i32, i32* %r1, i32 0
+store i32 %r343, i32* %r345
+%r346 = lshr i448 %r342, 32
+%r347 = trunc i448 %r346 to i32
+%r349 = getelementptr i32, i32* %r1, i32 1
+store i32 %r347, i32* %r349
+%r350 = lshr i448 %r346, 32
+%r351 = trunc i448 %r350 to i32
+%r353 = getelementptr i32, i32* %r1, i32 2
+store i32 %r351, i32* %r353
+%r354 = lshr i448 %r350, 32
+%r355 = trunc i448 %r354 to i32
+%r357 = getelementptr i32, i32* %r1, i32 3
+store i32 %r355, i32* %r357
+%r358 = lshr i448 %r354, 32
+%r359 = trunc i448 %r358 to i32
+%r361 = getelementptr i32, i32* %r1, i32 4
+store i32 %r359, i32* %r361
+%r362 = lshr i448 %r358, 32
+%r363 = trunc i448 %r362 to i32
+%r365 = getelementptr i32, i32* %r1, i32 5
+store i32 %r363, i32* %r365
+%r366 = lshr i448 %r362, 32
+%r367 = trunc i448 %r366 to i32
+%r369 = getelementptr i32, i32* %r1, i32 6
+store i32 %r367, i32* %r369
+%r370 = lshr i448 %r366, 32
+%r371 = trunc i448 %r370 to i32
+%r373 = getelementptr i32, i32* %r1, i32 7
+store i32 %r371, i32* %r373
+%r374 = lshr i448 %r370, 32
+%r375 = trunc i448 %r374 to i32
+%r377 = getelementptr i32, i32* %r1, i32 8
+store i32 %r375, i32* %r377
+%r378 = lshr i448 %r374, 32
+%r379 = trunc i448 %r378 to i32
+%r381 = getelementptr i32, i32* %r1, i32 9
+store i32 %r379, i32* %r381
+%r382 = lshr i448 %r378, 32
+%r383 = trunc i448 %r382 to i32
+%r385 = getelementptr i32, i32* %r1, i32 10
+store i32 %r383, i32* %r385
+%r386 = lshr i448 %r382, 32
+%r387 = trunc i448 %r386 to i32
+%r389 = getelementptr i32, i32* %r1, i32 11
+store i32 %r387, i32* %r389
+%r390 = lshr i448 %r386, 32
+%r391 = trunc i448 %r390 to i32
+%r393 = getelementptr i32, i32* %r1, i32 12
+store i32 %r391, i32* %r393
+%r394 = lshr i448 %r390, 32
+%r395 = trunc i448 %r394 to i32
+%r397 = getelementptr i32, i32* %r1, i32 13
+store i32 %r395, i32* %r397
+ret void
+}
+define void @mcl_fp_subNF14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = load i32, i32* %r3
+%r98 = zext i32 %r97 to i64
+%r100 = getelementptr i32, i32* %r3, i32 1
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i64
+%r103 = shl i64 %r102, 32
+%r104 = or i64 %r98, %r103
+%r105 = zext i64 %r104 to i96
+%r107 = getelementptr i32, i32* %r3, i32 2
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i96
+%r110 = shl i96 %r109, 64
+%r111 = or i96 %r105, %r110
+%r112 = zext i96 %r111 to i128
+%r114 = getelementptr i32, i32* %r3, i32 3
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i128
+%r117 = shl i128 %r116, 96
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i160
+%r121 = getelementptr i32, i32* %r3, i32 4
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i160
+%r124 = shl i160 %r123, 128
+%r125 = or i160 %r119, %r124
+%r126 = zext i160 %r125 to i192
+%r128 = getelementptr i32, i32* %r3, i32 5
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i192
+%r131 = shl i192 %r130, 160
+%r132 = or i192 %r126, %r131
+%r133 = zext i192 %r132 to i224
+%r135 = getelementptr i32, i32* %r3, i32 6
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i224
+%r138 = shl i224 %r137, 192
+%r139 = or i224 %r133, %r138
+%r140 = zext i224 %r139 to i256
+%r142 = getelementptr i32, i32* %r3, i32 7
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i256
+%r145 = shl i256 %r144, 224
+%r146 = or i256 %r140, %r145
+%r147 = zext i256 %r146 to i288
+%r149 = getelementptr i32, i32* %r3, i32 8
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i288
+%r152 = shl i288 %r151, 256
+%r153 = or i288 %r147, %r152
+%r154 = zext i288 %r153 to i320
+%r156 = getelementptr i32, i32* %r3, i32 9
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i320
+%r159 = shl i320 %r158, 288
+%r160 = or i320 %r154, %r159
+%r161 = zext i320 %r160 to i352
+%r163 = getelementptr i32, i32* %r3, i32 10
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i352
+%r166 = shl i352 %r165, 320
+%r167 = or i352 %r161, %r166
+%r168 = zext i352 %r167 to i384
+%r170 = getelementptr i32, i32* %r3, i32 11
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i384
+%r173 = shl i384 %r172, 352
+%r174 = or i384 %r168, %r173
+%r175 = zext i384 %r174 to i416
+%r177 = getelementptr i32, i32* %r3, i32 12
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i416
+%r180 = shl i416 %r179, 384
+%r181 = or i416 %r175, %r180
+%r182 = zext i416 %r181 to i448
+%r184 = getelementptr i32, i32* %r3, i32 13
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i448
+%r187 = shl i448 %r186, 416
+%r188 = or i448 %r182, %r187
+%r189 = sub i448 %r96, %r188
+%r190 = lshr i448 %r189, 447
+%r191 = trunc i448 %r190 to i1
+%r192 = load i32, i32* %r4
+%r193 = zext i32 %r192 to i64
+%r195 = getelementptr i32, i32* %r4, i32 1
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i64
+%r198 = shl i64 %r197, 32
+%r199 = or i64 %r193, %r198
+%r200 = zext i64 %r199 to i96
+%r202 = getelementptr i32, i32* %r4, i32 2
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i96
+%r205 = shl i96 %r204, 64
+%r206 = or i96 %r200, %r205
+%r207 = zext i96 %r206 to i128
+%r209 = getelementptr i32, i32* %r4, i32 3
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i128
+%r212 = shl i128 %r211, 96
+%r213 = or i128 %r207, %r212
+%r214 = zext i128 %r213 to i160
+%r216 = getelementptr i32, i32* %r4, i32 4
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i160
+%r219 = shl i160 %r218, 128
+%r220 = or i160 %r214, %r219
+%r221 = zext i160 %r220 to i192
+%r223 = getelementptr i32, i32* %r4, i32 5
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i192
+%r226 = shl i192 %r225, 160
+%r227 = or i192 %r221, %r226
+%r228 = zext i192 %r227 to i224
+%r230 = getelementptr i32, i32* %r4, i32 6
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i224
+%r233 = shl i224 %r232, 192
+%r234 = or i224 %r228, %r233
+%r235 = zext i224 %r234 to i256
+%r237 = getelementptr i32, i32* %r4, i32 7
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i256
+%r240 = shl i256 %r239, 224
+%r241 = or i256 %r235, %r240
+%r242 = zext i256 %r241 to i288
+%r244 = getelementptr i32, i32* %r4, i32 8
+%r245 = load i32, i32* %r244
+%r246 = zext i32 %r245 to i288
+%r247 = shl i288 %r246, 256
+%r248 = or i288 %r242, %r247
+%r249 = zext i288 %r248 to i320
+%r251 = getelementptr i32, i32* %r4, i32 9
+%r252 = load i32, i32* %r251
+%r253 = zext i32 %r252 to i320
+%r254 = shl i320 %r253, 288
+%r255 = or i320 %r249, %r254
+%r256 = zext i320 %r255 to i352
+%r258 = getelementptr i32, i32* %r4, i32 10
+%r259 = load i32, i32* %r258
+%r260 = zext i32 %r259 to i352
+%r261 = shl i352 %r260, 320
+%r262 = or i352 %r256, %r261
+%r263 = zext i352 %r262 to i384
+%r265 = getelementptr i32, i32* %r4, i32 11
+%r266 = load i32, i32* %r265
+%r267 = zext i32 %r266 to i384
+%r268 = shl i384 %r267, 352
+%r269 = or i384 %r263, %r268
+%r270 = zext i384 %r269 to i416
+%r272 = getelementptr i32, i32* %r4, i32 12
+%r273 = load i32, i32* %r272
+%r274 = zext i32 %r273 to i416
+%r275 = shl i416 %r274, 384
+%r276 = or i416 %r270, %r275
+%r277 = zext i416 %r276 to i448
+%r279 = getelementptr i32, i32* %r4, i32 13
+%r280 = load i32, i32* %r279
+%r281 = zext i32 %r280 to i448
+%r282 = shl i448 %r281, 416
+%r283 = or i448 %r277, %r282
+%r285 = select i1 %r191, i448 %r283, i448 0
+%r286 = add i448 %r189, %r285
+%r287 = trunc i448 %r286 to i32
+%r289 = getelementptr i32, i32* %r1, i32 0
+store i32 %r287, i32* %r289
+%r290 = lshr i448 %r286, 32
+%r291 = trunc i448 %r290 to i32
+%r293 = getelementptr i32, i32* %r1, i32 1
+store i32 %r291, i32* %r293
+%r294 = lshr i448 %r290, 32
+%r295 = trunc i448 %r294 to i32
+%r297 = getelementptr i32, i32* %r1, i32 2
+store i32 %r295, i32* %r297
+%r298 = lshr i448 %r294, 32
+%r299 = trunc i448 %r298 to i32
+%r301 = getelementptr i32, i32* %r1, i32 3
+store i32 %r299, i32* %r301
+%r302 = lshr i448 %r298, 32
+%r303 = trunc i448 %r302 to i32
+%r305 = getelementptr i32, i32* %r1, i32 4
+store i32 %r303, i32* %r305
+%r306 = lshr i448 %r302, 32
+%r307 = trunc i448 %r306 to i32
+%r309 = getelementptr i32, i32* %r1, i32 5
+store i32 %r307, i32* %r309
+%r310 = lshr i448 %r306, 32
+%r311 = trunc i448 %r310 to i32
+%r313 = getelementptr i32, i32* %r1, i32 6
+store i32 %r311, i32* %r313
+%r314 = lshr i448 %r310, 32
+%r315 = trunc i448 %r314 to i32
+%r317 = getelementptr i32, i32* %r1, i32 7
+store i32 %r315, i32* %r317
+%r318 = lshr i448 %r314, 32
+%r319 = trunc i448 %r318 to i32
+%r321 = getelementptr i32, i32* %r1, i32 8
+store i32 %r319, i32* %r321
+%r322 = lshr i448 %r318, 32
+%r323 = trunc i448 %r322 to i32
+%r325 = getelementptr i32, i32* %r1, i32 9
+store i32 %r323, i32* %r325
+%r326 = lshr i448 %r322, 32
+%r327 = trunc i448 %r326 to i32
+%r329 = getelementptr i32, i32* %r1, i32 10
+store i32 %r327, i32* %r329
+%r330 = lshr i448 %r326, 32
+%r331 = trunc i448 %r330 to i32
+%r333 = getelementptr i32, i32* %r1, i32 11
+store i32 %r331, i32* %r333
+%r334 = lshr i448 %r330, 32
+%r335 = trunc i448 %r334 to i32
+%r337 = getelementptr i32, i32* %r1, i32 12
+store i32 %r335, i32* %r337
+%r338 = lshr i448 %r334, 32
+%r339 = trunc i448 %r338 to i32
+%r341 = getelementptr i32, i32* %r1, i32 13
+store i32 %r339, i32* %r341
+ret void
+}
+define void @mcl_fpDbl_add14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = zext i768 %r166 to i800
+%r169 = getelementptr i32, i32* %r2, i32 24
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i800
+%r172 = shl i800 %r171, 768
+%r173 = or i800 %r167, %r172
+%r174 = zext i800 %r173 to i832
+%r176 = getelementptr i32, i32* %r2, i32 25
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i832
+%r179 = shl i832 %r178, 800
+%r180 = or i832 %r174, %r179
+%r181 = zext i832 %r180 to i864
+%r183 = getelementptr i32, i32* %r2, i32 26
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i864
+%r186 = shl i864 %r185, 832
+%r187 = or i864 %r181, %r186
+%r188 = zext i864 %r187 to i896
+%r190 = getelementptr i32, i32* %r2, i32 27
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i896
+%r193 = shl i896 %r192, 864
+%r194 = or i896 %r188, %r193
+%r195 = load i32, i32* %r3
+%r196 = zext i32 %r195 to i64
+%r198 = getelementptr i32, i32* %r3, i32 1
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i64
+%r201 = shl i64 %r200, 32
+%r202 = or i64 %r196, %r201
+%r203 = zext i64 %r202 to i96
+%r205 = getelementptr i32, i32* %r3, i32 2
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i96
+%r208 = shl i96 %r207, 64
+%r209 = or i96 %r203, %r208
+%r210 = zext i96 %r209 to i128
+%r212 = getelementptr i32, i32* %r3, i32 3
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i128
+%r215 = shl i128 %r214, 96
+%r216 = or i128 %r210, %r215
+%r217 = zext i128 %r216 to i160
+%r219 = getelementptr i32, i32* %r3, i32 4
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i160
+%r222 = shl i160 %r221, 128
+%r223 = or i160 %r217, %r222
+%r224 = zext i160 %r223 to i192
+%r226 = getelementptr i32, i32* %r3, i32 5
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i192
+%r229 = shl i192 %r228, 160
+%r230 = or i192 %r224, %r229
+%r231 = zext i192 %r230 to i224
+%r233 = getelementptr i32, i32* %r3, i32 6
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i224
+%r236 = shl i224 %r235, 192
+%r237 = or i224 %r231, %r236
+%r238 = zext i224 %r237 to i256
+%r240 = getelementptr i32, i32* %r3, i32 7
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i256
+%r243 = shl i256 %r242, 224
+%r244 = or i256 %r238, %r243
+%r245 = zext i256 %r244 to i288
+%r247 = getelementptr i32, i32* %r3, i32 8
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i288
+%r250 = shl i288 %r249, 256
+%r251 = or i288 %r245, %r250
+%r252 = zext i288 %r251 to i320
+%r254 = getelementptr i32, i32* %r3, i32 9
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i320
+%r257 = shl i320 %r256, 288
+%r258 = or i320 %r252, %r257
+%r259 = zext i320 %r258 to i352
+%r261 = getelementptr i32, i32* %r3, i32 10
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i352
+%r264 = shl i352 %r263, 320
+%r265 = or i352 %r259, %r264
+%r266 = zext i352 %r265 to i384
+%r268 = getelementptr i32, i32* %r3, i32 11
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i384
+%r271 = shl i384 %r270, 352
+%r272 = or i384 %r266, %r271
+%r273 = zext i384 %r272 to i416
+%r275 = getelementptr i32, i32* %r3, i32 12
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i416
+%r278 = shl i416 %r277, 384
+%r279 = or i416 %r273, %r278
+%r280 = zext i416 %r279 to i448
+%r282 = getelementptr i32, i32* %r3, i32 13
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i448
+%r285 = shl i448 %r284, 416
+%r286 = or i448 %r280, %r285
+%r287 = zext i448 %r286 to i480
+%r289 = getelementptr i32, i32* %r3, i32 14
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i480
+%r292 = shl i480 %r291, 448
+%r293 = or i480 %r287, %r292
+%r294 = zext i480 %r293 to i512
+%r296 = getelementptr i32, i32* %r3, i32 15
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i512
+%r299 = shl i512 %r298, 480
+%r300 = or i512 %r294, %r299
+%r301 = zext i512 %r300 to i544
+%r303 = getelementptr i32, i32* %r3, i32 16
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i544
+%r306 = shl i544 %r305, 512
+%r307 = or i544 %r301, %r306
+%r308 = zext i544 %r307 to i576
+%r310 = getelementptr i32, i32* %r3, i32 17
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i576
+%r313 = shl i576 %r312, 544
+%r314 = or i576 %r308, %r313
+%r315 = zext i576 %r314 to i608
+%r317 = getelementptr i32, i32* %r3, i32 18
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i608
+%r320 = shl i608 %r319, 576
+%r321 = or i608 %r315, %r320
+%r322 = zext i608 %r321 to i640
+%r324 = getelementptr i32, i32* %r3, i32 19
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i640
+%r327 = shl i640 %r326, 608
+%r328 = or i640 %r322, %r327
+%r329 = zext i640 %r328 to i672
+%r331 = getelementptr i32, i32* %r3, i32 20
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i672
+%r334 = shl i672 %r333, 640
+%r335 = or i672 %r329, %r334
+%r336 = zext i672 %r335 to i704
+%r338 = getelementptr i32, i32* %r3, i32 21
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i704
+%r341 = shl i704 %r340, 672
+%r342 = or i704 %r336, %r341
+%r343 = zext i704 %r342 to i736
+%r345 = getelementptr i32, i32* %r3, i32 22
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i736
+%r348 = shl i736 %r347, 704
+%r349 = or i736 %r343, %r348
+%r350 = zext i736 %r349 to i768
+%r352 = getelementptr i32, i32* %r3, i32 23
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i768
+%r355 = shl i768 %r354, 736
+%r356 = or i768 %r350, %r355
+%r357 = zext i768 %r356 to i800
+%r359 = getelementptr i32, i32* %r3, i32 24
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i800
+%r362 = shl i800 %r361, 768
+%r363 = or i800 %r357, %r362
+%r364 = zext i800 %r363 to i832
+%r366 = getelementptr i32, i32* %r3, i32 25
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i832
+%r369 = shl i832 %r368, 800
+%r370 = or i832 %r364, %r369
+%r371 = zext i832 %r370 to i864
+%r373 = getelementptr i32, i32* %r3, i32 26
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i864
+%r376 = shl i864 %r375, 832
+%r377 = or i864 %r371, %r376
+%r378 = zext i864 %r377 to i896
+%r380 = getelementptr i32, i32* %r3, i32 27
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i896
+%r383 = shl i896 %r382, 864
+%r384 = or i896 %r378, %r383
+%r385 = zext i896 %r194 to i928
+%r386 = zext i896 %r384 to i928
+%r387 = add i928 %r385, %r386
+%r388 = trunc i928 %r387 to i448
+%r389 = trunc i448 %r388 to i32
+%r391 = getelementptr i32, i32* %r1, i32 0
+store i32 %r389, i32* %r391
+%r392 = lshr i448 %r388, 32
+%r393 = trunc i448 %r392 to i32
+%r395 = getelementptr i32, i32* %r1, i32 1
+store i32 %r393, i32* %r395
+%r396 = lshr i448 %r392, 32
+%r397 = trunc i448 %r396 to i32
+%r399 = getelementptr i32, i32* %r1, i32 2
+store i32 %r397, i32* %r399
+%r400 = lshr i448 %r396, 32
+%r401 = trunc i448 %r400 to i32
+%r403 = getelementptr i32, i32* %r1, i32 3
+store i32 %r401, i32* %r403
+%r404 = lshr i448 %r400, 32
+%r405 = trunc i448 %r404 to i32
+%r407 = getelementptr i32, i32* %r1, i32 4
+store i32 %r405, i32* %r407
+%r408 = lshr i448 %r404, 32
+%r409 = trunc i448 %r408 to i32
+%r411 = getelementptr i32, i32* %r1, i32 5
+store i32 %r409, i32* %r411
+%r412 = lshr i448 %r408, 32
+%r413 = trunc i448 %r412 to i32
+%r415 = getelementptr i32, i32* %r1, i32 6
+store i32 %r413, i32* %r415
+%r416 = lshr i448 %r412, 32
+%r417 = trunc i448 %r416 to i32
+%r419 = getelementptr i32, i32* %r1, i32 7
+store i32 %r417, i32* %r419
+%r420 = lshr i448 %r416, 32
+%r421 = trunc i448 %r420 to i32
+%r423 = getelementptr i32, i32* %r1, i32 8
+store i32 %r421, i32* %r423
+%r424 = lshr i448 %r420, 32
+%r425 = trunc i448 %r424 to i32
+%r427 = getelementptr i32, i32* %r1, i32 9
+store i32 %r425, i32* %r427
+%r428 = lshr i448 %r424, 32
+%r429 = trunc i448 %r428 to i32
+%r431 = getelementptr i32, i32* %r1, i32 10
+store i32 %r429, i32* %r431
+%r432 = lshr i448 %r428, 32
+%r433 = trunc i448 %r432 to i32
+%r435 = getelementptr i32, i32* %r1, i32 11
+store i32 %r433, i32* %r435
+%r436 = lshr i448 %r432, 32
+%r437 = trunc i448 %r436 to i32
+%r439 = getelementptr i32, i32* %r1, i32 12
+store i32 %r437, i32* %r439
+%r440 = lshr i448 %r436, 32
+%r441 = trunc i448 %r440 to i32
+%r443 = getelementptr i32, i32* %r1, i32 13
+store i32 %r441, i32* %r443
+%r444 = lshr i928 %r387, 448
+%r445 = trunc i928 %r444 to i480
+%r446 = load i32, i32* %r4
+%r447 = zext i32 %r446 to i64
+%r449 = getelementptr i32, i32* %r4, i32 1
+%r450 = load i32, i32* %r449
+%r451 = zext i32 %r450 to i64
+%r452 = shl i64 %r451, 32
+%r453 = or i64 %r447, %r452
+%r454 = zext i64 %r453 to i96
+%r456 = getelementptr i32, i32* %r4, i32 2
+%r457 = load i32, i32* %r456
+%r458 = zext i32 %r457 to i96
+%r459 = shl i96 %r458, 64
+%r460 = or i96 %r454, %r459
+%r461 = zext i96 %r460 to i128
+%r463 = getelementptr i32, i32* %r4, i32 3
+%r464 = load i32, i32* %r463
+%r465 = zext i32 %r464 to i128
+%r466 = shl i128 %r465, 96
+%r467 = or i128 %r461, %r466
+%r468 = zext i128 %r467 to i160
+%r470 = getelementptr i32, i32* %r4, i32 4
+%r471 = load i32, i32* %r470
+%r472 = zext i32 %r471 to i160
+%r473 = shl i160 %r472, 128
+%r474 = or i160 %r468, %r473
+%r475 = zext i160 %r474 to i192
+%r477 = getelementptr i32, i32* %r4, i32 5
+%r478 = load i32, i32* %r477
+%r479 = zext i32 %r478 to i192
+%r480 = shl i192 %r479, 160
+%r481 = or i192 %r475, %r480
+%r482 = zext i192 %r481 to i224
+%r484 = getelementptr i32, i32* %r4, i32 6
+%r485 = load i32, i32* %r484
+%r486 = zext i32 %r485 to i224
+%r487 = shl i224 %r486, 192
+%r488 = or i224 %r482, %r487
+%r489 = zext i224 %r488 to i256
+%r491 = getelementptr i32, i32* %r4, i32 7
+%r492 = load i32, i32* %r491
+%r493 = zext i32 %r492 to i256
+%r494 = shl i256 %r493, 224
+%r495 = or i256 %r489, %r494
+%r496 = zext i256 %r495 to i288
+%r498 = getelementptr i32, i32* %r4, i32 8
+%r499 = load i32, i32* %r498
+%r500 = zext i32 %r499 to i288
+%r501 = shl i288 %r500, 256
+%r502 = or i288 %r496, %r501
+%r503 = zext i288 %r502 to i320
+%r505 = getelementptr i32, i32* %r4, i32 9
+%r506 = load i32, i32* %r505
+%r507 = zext i32 %r506 to i320
+%r508 = shl i320 %r507, 288
+%r509 = or i320 %r503, %r508
+%r510 = zext i320 %r509 to i352
+%r512 = getelementptr i32, i32* %r4, i32 10
+%r513 = load i32, i32* %r512
+%r514 = zext i32 %r513 to i352
+%r515 = shl i352 %r514, 320
+%r516 = or i352 %r510, %r515
+%r517 = zext i352 %r516 to i384
+%r519 = getelementptr i32, i32* %r4, i32 11
+%r520 = load i32, i32* %r519
+%r521 = zext i32 %r520 to i384
+%r522 = shl i384 %r521, 352
+%r523 = or i384 %r517, %r522
+%r524 = zext i384 %r523 to i416
+%r526 = getelementptr i32, i32* %r4, i32 12
+%r527 = load i32, i32* %r526
+%r528 = zext i32 %r527 to i416
+%r529 = shl i416 %r528, 384
+%r530 = or i416 %r524, %r529
+%r531 = zext i416 %r530 to i448
+%r533 = getelementptr i32, i32* %r4, i32 13
+%r534 = load i32, i32* %r533
+%r535 = zext i32 %r534 to i448
+%r536 = shl i448 %r535, 416
+%r537 = or i448 %r531, %r536
+%r538 = zext i448 %r537 to i480
+%r539 = sub i480 %r445, %r538
+%r540 = lshr i480 %r539, 448
+%r541 = trunc i480 %r540 to i1
+%r542 = select i1 %r541, i480 %r445, i480 %r539
+%r543 = trunc i480 %r542 to i448
+%r545 = getelementptr i32, i32* %r1, i32 14
+%r546 = trunc i448 %r543 to i32
+%r548 = getelementptr i32, i32* %r545, i32 0
+store i32 %r546, i32* %r548
+%r549 = lshr i448 %r543, 32
+%r550 = trunc i448 %r549 to i32
+%r552 = getelementptr i32, i32* %r545, i32 1
+store i32 %r550, i32* %r552
+%r553 = lshr i448 %r549, 32
+%r554 = trunc i448 %r553 to i32
+%r556 = getelementptr i32, i32* %r545, i32 2
+store i32 %r554, i32* %r556
+%r557 = lshr i448 %r553, 32
+%r558 = trunc i448 %r557 to i32
+%r560 = getelementptr i32, i32* %r545, i32 3
+store i32 %r558, i32* %r560
+%r561 = lshr i448 %r557, 32
+%r562 = trunc i448 %r561 to i32
+%r564 = getelementptr i32, i32* %r545, i32 4
+store i32 %r562, i32* %r564
+%r565 = lshr i448 %r561, 32
+%r566 = trunc i448 %r565 to i32
+%r568 = getelementptr i32, i32* %r545, i32 5
+store i32 %r566, i32* %r568
+%r569 = lshr i448 %r565, 32
+%r570 = trunc i448 %r569 to i32
+%r572 = getelementptr i32, i32* %r545, i32 6
+store i32 %r570, i32* %r572
+%r573 = lshr i448 %r569, 32
+%r574 = trunc i448 %r573 to i32
+%r576 = getelementptr i32, i32* %r545, i32 7
+store i32 %r574, i32* %r576
+%r577 = lshr i448 %r573, 32
+%r578 = trunc i448 %r577 to i32
+%r580 = getelementptr i32, i32* %r545, i32 8
+store i32 %r578, i32* %r580
+%r581 = lshr i448 %r577, 32
+%r582 = trunc i448 %r581 to i32
+%r584 = getelementptr i32, i32* %r545, i32 9
+store i32 %r582, i32* %r584
+%r585 = lshr i448 %r581, 32
+%r586 = trunc i448 %r585 to i32
+%r588 = getelementptr i32, i32* %r545, i32 10
+store i32 %r586, i32* %r588
+%r589 = lshr i448 %r585, 32
+%r590 = trunc i448 %r589 to i32
+%r592 = getelementptr i32, i32* %r545, i32 11
+store i32 %r590, i32* %r592
+%r593 = lshr i448 %r589, 32
+%r594 = trunc i448 %r593 to i32
+%r596 = getelementptr i32, i32* %r545, i32 12
+store i32 %r594, i32* %r596
+%r597 = lshr i448 %r593, 32
+%r598 = trunc i448 %r597 to i32
+%r600 = getelementptr i32, i32* %r545, i32 13
+store i32 %r598, i32* %r600
+ret void
+}
+define void @mcl_fpDbl_sub14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = zext i768 %r166 to i800
+%r169 = getelementptr i32, i32* %r2, i32 24
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i800
+%r172 = shl i800 %r171, 768
+%r173 = or i800 %r167, %r172
+%r174 = zext i800 %r173 to i832
+%r176 = getelementptr i32, i32* %r2, i32 25
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i832
+%r179 = shl i832 %r178, 800
+%r180 = or i832 %r174, %r179
+%r181 = zext i832 %r180 to i864
+%r183 = getelementptr i32, i32* %r2, i32 26
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i864
+%r186 = shl i864 %r185, 832
+%r187 = or i864 %r181, %r186
+%r188 = zext i864 %r187 to i896
+%r190 = getelementptr i32, i32* %r2, i32 27
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i896
+%r193 = shl i896 %r192, 864
+%r194 = or i896 %r188, %r193
+%r195 = load i32, i32* %r3
+%r196 = zext i32 %r195 to i64
+%r198 = getelementptr i32, i32* %r3, i32 1
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i64
+%r201 = shl i64 %r200, 32
+%r202 = or i64 %r196, %r201
+%r203 = zext i64 %r202 to i96
+%r205 = getelementptr i32, i32* %r3, i32 2
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i96
+%r208 = shl i96 %r207, 64
+%r209 = or i96 %r203, %r208
+%r210 = zext i96 %r209 to i128
+%r212 = getelementptr i32, i32* %r3, i32 3
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i128
+%r215 = shl i128 %r214, 96
+%r216 = or i128 %r210, %r215
+%r217 = zext i128 %r216 to i160
+%r219 = getelementptr i32, i32* %r3, i32 4
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i160
+%r222 = shl i160 %r221, 128
+%r223 = or i160 %r217, %r222
+%r224 = zext i160 %r223 to i192
+%r226 = getelementptr i32, i32* %r3, i32 5
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i192
+%r229 = shl i192 %r228, 160
+%r230 = or i192 %r224, %r229
+%r231 = zext i192 %r230 to i224
+%r233 = getelementptr i32, i32* %r3, i32 6
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i224
+%r236 = shl i224 %r235, 192
+%r237 = or i224 %r231, %r236
+%r238 = zext i224 %r237 to i256
+%r240 = getelementptr i32, i32* %r3, i32 7
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i256
+%r243 = shl i256 %r242, 224
+%r244 = or i256 %r238, %r243
+%r245 = zext i256 %r244 to i288
+%r247 = getelementptr i32, i32* %r3, i32 8
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i288
+%r250 = shl i288 %r249, 256
+%r251 = or i288 %r245, %r250
+%r252 = zext i288 %r251 to i320
+%r254 = getelementptr i32, i32* %r3, i32 9
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i320
+%r257 = shl i320 %r256, 288
+%r258 = or i320 %r252, %r257
+%r259 = zext i320 %r258 to i352
+%r261 = getelementptr i32, i32* %r3, i32 10
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i352
+%r264 = shl i352 %r263, 320
+%r265 = or i352 %r259, %r264
+%r266 = zext i352 %r265 to i384
+%r268 = getelementptr i32, i32* %r3, i32 11
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i384
+%r271 = shl i384 %r270, 352
+%r272 = or i384 %r266, %r271
+%r273 = zext i384 %r272 to i416
+%r275 = getelementptr i32, i32* %r3, i32 12
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i416
+%r278 = shl i416 %r277, 384
+%r279 = or i416 %r273, %r278
+%r280 = zext i416 %r279 to i448
+%r282 = getelementptr i32, i32* %r3, i32 13
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i448
+%r285 = shl i448 %r284, 416
+%r286 = or i448 %r280, %r285
+%r287 = zext i448 %r286 to i480
+%r289 = getelementptr i32, i32* %r3, i32 14
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i480
+%r292 = shl i480 %r291, 448
+%r293 = or i480 %r287, %r292
+%r294 = zext i480 %r293 to i512
+%r296 = getelementptr i32, i32* %r3, i32 15
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i512
+%r299 = shl i512 %r298, 480
+%r300 = or i512 %r294, %r299
+%r301 = zext i512 %r300 to i544
+%r303 = getelementptr i32, i32* %r3, i32 16
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i544
+%r306 = shl i544 %r305, 512
+%r307 = or i544 %r301, %r306
+%r308 = zext i544 %r307 to i576
+%r310 = getelementptr i32, i32* %r3, i32 17
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i576
+%r313 = shl i576 %r312, 544
+%r314 = or i576 %r308, %r313
+%r315 = zext i576 %r314 to i608
+%r317 = getelementptr i32, i32* %r3, i32 18
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i608
+%r320 = shl i608 %r319, 576
+%r321 = or i608 %r315, %r320
+%r322 = zext i608 %r321 to i640
+%r324 = getelementptr i32, i32* %r3, i32 19
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i640
+%r327 = shl i640 %r326, 608
+%r328 = or i640 %r322, %r327
+%r329 = zext i640 %r328 to i672
+%r331 = getelementptr i32, i32* %r3, i32 20
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i672
+%r334 = shl i672 %r333, 640
+%r335 = or i672 %r329, %r334
+%r336 = zext i672 %r335 to i704
+%r338 = getelementptr i32, i32* %r3, i32 21
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i704
+%r341 = shl i704 %r340, 672
+%r342 = or i704 %r336, %r341
+%r343 = zext i704 %r342 to i736
+%r345 = getelementptr i32, i32* %r3, i32 22
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i736
+%r348 = shl i736 %r347, 704
+%r349 = or i736 %r343, %r348
+%r350 = zext i736 %r349 to i768
+%r352 = getelementptr i32, i32* %r3, i32 23
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i768
+%r355 = shl i768 %r354, 736
+%r356 = or i768 %r350, %r355
+%r357 = zext i768 %r356 to i800
+%r359 = getelementptr i32, i32* %r3, i32 24
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i800
+%r362 = shl i800 %r361, 768
+%r363 = or i800 %r357, %r362
+%r364 = zext i800 %r363 to i832
+%r366 = getelementptr i32, i32* %r3, i32 25
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i832
+%r369 = shl i832 %r368, 800
+%r370 = or i832 %r364, %r369
+%r371 = zext i832 %r370 to i864
+%r373 = getelementptr i32, i32* %r3, i32 26
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i864
+%r376 = shl i864 %r375, 832
+%r377 = or i864 %r371, %r376
+%r378 = zext i864 %r377 to i896
+%r380 = getelementptr i32, i32* %r3, i32 27
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i896
+%r383 = shl i896 %r382, 864
+%r384 = or i896 %r378, %r383
+%r385 = zext i896 %r194 to i928
+%r386 = zext i896 %r384 to i928
+%r387 = sub i928 %r385, %r386
+%r388 = trunc i928 %r387 to i448
+%r389 = trunc i448 %r388 to i32
+%r391 = getelementptr i32, i32* %r1, i32 0
+store i32 %r389, i32* %r391
+%r392 = lshr i448 %r388, 32
+%r393 = trunc i448 %r392 to i32
+%r395 = getelementptr i32, i32* %r1, i32 1
+store i32 %r393, i32* %r395
+%r396 = lshr i448 %r392, 32
+%r397 = trunc i448 %r396 to i32
+%r399 = getelementptr i32, i32* %r1, i32 2
+store i32 %r397, i32* %r399
+%r400 = lshr i448 %r396, 32
+%r401 = trunc i448 %r400 to i32
+%r403 = getelementptr i32, i32* %r1, i32 3
+store i32 %r401, i32* %r403
+%r404 = lshr i448 %r400, 32
+%r405 = trunc i448 %r404 to i32
+%r407 = getelementptr i32, i32* %r1, i32 4
+store i32 %r405, i32* %r407
+%r408 = lshr i448 %r404, 32
+%r409 = trunc i448 %r408 to i32
+%r411 = getelementptr i32, i32* %r1, i32 5
+store i32 %r409, i32* %r411
+%r412 = lshr i448 %r408, 32
+%r413 = trunc i448 %r412 to i32
+%r415 = getelementptr i32, i32* %r1, i32 6
+store i32 %r413, i32* %r415
+%r416 = lshr i448 %r412, 32
+%r417 = trunc i448 %r416 to i32
+%r419 = getelementptr i32, i32* %r1, i32 7
+store i32 %r417, i32* %r419
+%r420 = lshr i448 %r416, 32
+%r421 = trunc i448 %r420 to i32
+%r423 = getelementptr i32, i32* %r1, i32 8
+store i32 %r421, i32* %r423
+%r424 = lshr i448 %r420, 32
+%r425 = trunc i448 %r424 to i32
+%r427 = getelementptr i32, i32* %r1, i32 9
+store i32 %r425, i32* %r427
+%r428 = lshr i448 %r424, 32
+%r429 = trunc i448 %r428 to i32
+%r431 = getelementptr i32, i32* %r1, i32 10
+store i32 %r429, i32* %r431
+%r432 = lshr i448 %r428, 32
+%r433 = trunc i448 %r432 to i32
+%r435 = getelementptr i32, i32* %r1, i32 11
+store i32 %r433, i32* %r435
+%r436 = lshr i448 %r432, 32
+%r437 = trunc i448 %r436 to i32
+%r439 = getelementptr i32, i32* %r1, i32 12
+store i32 %r437, i32* %r439
+%r440 = lshr i448 %r436, 32
+%r441 = trunc i448 %r440 to i32
+%r443 = getelementptr i32, i32* %r1, i32 13
+store i32 %r441, i32* %r443
+%r444 = lshr i928 %r387, 448
+%r445 = trunc i928 %r444 to i448
+%r446 = lshr i928 %r387, 896
+%r447 = trunc i928 %r446 to i1
+%r448 = load i32, i32* %r4
+%r449 = zext i32 %r448 to i64
+%r451 = getelementptr i32, i32* %r4, i32 1
+%r452 = load i32, i32* %r451
+%r453 = zext i32 %r452 to i64
+%r454 = shl i64 %r453, 32
+%r455 = or i64 %r449, %r454
+%r456 = zext i64 %r455 to i96
+%r458 = getelementptr i32, i32* %r4, i32 2
+%r459 = load i32, i32* %r458
+%r460 = zext i32 %r459 to i96
+%r461 = shl i96 %r460, 64
+%r462 = or i96 %r456, %r461
+%r463 = zext i96 %r462 to i128
+%r465 = getelementptr i32, i32* %r4, i32 3
+%r466 = load i32, i32* %r465
+%r467 = zext i32 %r466 to i128
+%r468 = shl i128 %r467, 96
+%r469 = or i128 %r463, %r468
+%r470 = zext i128 %r469 to i160
+%r472 = getelementptr i32, i32* %r4, i32 4
+%r473 = load i32, i32* %r472
+%r474 = zext i32 %r473 to i160
+%r475 = shl i160 %r474, 128
+%r476 = or i160 %r470, %r475
+%r477 = zext i160 %r476 to i192
+%r479 = getelementptr i32, i32* %r4, i32 5
+%r480 = load i32, i32* %r479
+%r481 = zext i32 %r480 to i192
+%r482 = shl i192 %r481, 160
+%r483 = or i192 %r477, %r482
+%r484 = zext i192 %r483 to i224
+%r486 = getelementptr i32, i32* %r4, i32 6
+%r487 = load i32, i32* %r486
+%r488 = zext i32 %r487 to i224
+%r489 = shl i224 %r488, 192
+%r490 = or i224 %r484, %r489
+%r491 = zext i224 %r490 to i256
+%r493 = getelementptr i32, i32* %r4, i32 7
+%r494 = load i32, i32* %r493
+%r495 = zext i32 %r494 to i256
+%r496 = shl i256 %r495, 224
+%r497 = or i256 %r491, %r496
+%r498 = zext i256 %r497 to i288
+%r500 = getelementptr i32, i32* %r4, i32 8
+%r501 = load i32, i32* %r500
+%r502 = zext i32 %r501 to i288
+%r503 = shl i288 %r502, 256
+%r504 = or i288 %r498, %r503
+%r505 = zext i288 %r504 to i320
+%r507 = getelementptr i32, i32* %r4, i32 9
+%r508 = load i32, i32* %r507
+%r509 = zext i32 %r508 to i320
+%r510 = shl i320 %r509, 288
+%r511 = or i320 %r505, %r510
+%r512 = zext i320 %r511 to i352
+%r514 = getelementptr i32, i32* %r4, i32 10
+%r515 = load i32, i32* %r514
+%r516 = zext i32 %r515 to i352
+%r517 = shl i352 %r516, 320
+%r518 = or i352 %r512, %r517
+%r519 = zext i352 %r518 to i384
+%r521 = getelementptr i32, i32* %r4, i32 11
+%r522 = load i32, i32* %r521
+%r523 = zext i32 %r522 to i384
+%r524 = shl i384 %r523, 352
+%r525 = or i384 %r519, %r524
+%r526 = zext i384 %r525 to i416
+%r528 = getelementptr i32, i32* %r4, i32 12
+%r529 = load i32, i32* %r528
+%r530 = zext i32 %r529 to i416
+%r531 = shl i416 %r530, 384
+%r532 = or i416 %r526, %r531
+%r533 = zext i416 %r532 to i448
+%r535 = getelementptr i32, i32* %r4, i32 13
+%r536 = load i32, i32* %r535
+%r537 = zext i32 %r536 to i448
+%r538 = shl i448 %r537, 416
+%r539 = or i448 %r533, %r538
+%r541 = select i1 %r447, i448 %r539, i448 0
+%r542 = add i448 %r445, %r541
+%r544 = getelementptr i32, i32* %r1, i32 14
+%r545 = trunc i448 %r542 to i32
+%r547 = getelementptr i32, i32* %r544, i32 0
+store i32 %r545, i32* %r547
+%r548 = lshr i448 %r542, 32
+%r549 = trunc i448 %r548 to i32
+%r551 = getelementptr i32, i32* %r544, i32 1
+store i32 %r549, i32* %r551
+%r552 = lshr i448 %r548, 32
+%r553 = trunc i448 %r552 to i32
+%r555 = getelementptr i32, i32* %r544, i32 2
+store i32 %r553, i32* %r555
+%r556 = lshr i448 %r552, 32
+%r557 = trunc i448 %r556 to i32
+%r559 = getelementptr i32, i32* %r544, i32 3
+store i32 %r557, i32* %r559
+%r560 = lshr i448 %r556, 32
+%r561 = trunc i448 %r560 to i32
+%r563 = getelementptr i32, i32* %r544, i32 4
+store i32 %r561, i32* %r563
+%r564 = lshr i448 %r560, 32
+%r565 = trunc i448 %r564 to i32
+%r567 = getelementptr i32, i32* %r544, i32 5
+store i32 %r565, i32* %r567
+%r568 = lshr i448 %r564, 32
+%r569 = trunc i448 %r568 to i32
+%r571 = getelementptr i32, i32* %r544, i32 6
+store i32 %r569, i32* %r571
+%r572 = lshr i448 %r568, 32
+%r573 = trunc i448 %r572 to i32
+%r575 = getelementptr i32, i32* %r544, i32 7
+store i32 %r573, i32* %r575
+%r576 = lshr i448 %r572, 32
+%r577 = trunc i448 %r576 to i32
+%r579 = getelementptr i32, i32* %r544, i32 8
+store i32 %r577, i32* %r579
+%r580 = lshr i448 %r576, 32
+%r581 = trunc i448 %r580 to i32
+%r583 = getelementptr i32, i32* %r544, i32 9
+store i32 %r581, i32* %r583
+%r584 = lshr i448 %r580, 32
+%r585 = trunc i448 %r584 to i32
+%r587 = getelementptr i32, i32* %r544, i32 10
+store i32 %r585, i32* %r587
+%r588 = lshr i448 %r584, 32
+%r589 = trunc i448 %r588 to i32
+%r591 = getelementptr i32, i32* %r544, i32 11
+store i32 %r589, i32* %r591
+%r592 = lshr i448 %r588, 32
+%r593 = trunc i448 %r592 to i32
+%r595 = getelementptr i32, i32* %r544, i32 12
+store i32 %r593, i32* %r595
+%r596 = lshr i448 %r592, 32
+%r597 = trunc i448 %r596 to i32
+%r599 = getelementptr i32, i32* %r544, i32 13
+store i32 %r597, i32* %r599
+ret void
+}
+define i512 @mulPv480x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
+%r42 = trunc i64 %r41 to i32
+%r43 = call i32 @extractHigh32(i64 %r41)
+%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
+%r46 = trunc i64 %r45 to i32
+%r47 = call i32 @extractHigh32(i64 %r45)
+%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
+%r50 = trunc i64 %r49 to i32
+%r51 = call i32 @extractHigh32(i64 %r49)
+%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
+%r54 = trunc i64 %r53 to i32
+%r55 = call i32 @extractHigh32(i64 %r53)
+%r57 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 13)
+%r58 = trunc i64 %r57 to i32
+%r59 = call i32 @extractHigh32(i64 %r57)
+%r61 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 14)
+%r62 = trunc i64 %r61 to i32
+%r63 = call i32 @extractHigh32(i64 %r61)
+%r64 = zext i32 %r6 to i64
+%r65 = zext i32 %r10 to i64
+%r66 = shl i64 %r65, 32
+%r67 = or i64 %r64, %r66
+%r68 = zext i64 %r67 to i96
+%r69 = zext i32 %r14 to i96
+%r70 = shl i96 %r69, 64
+%r71 = or i96 %r68, %r70
+%r72 = zext i96 %r71 to i128
+%r73 = zext i32 %r18 to i128
+%r74 = shl i128 %r73, 96
+%r75 = or i128 %r72, %r74
+%r76 = zext i128 %r75 to i160
+%r77 = zext i32 %r22 to i160
+%r78 = shl i160 %r77, 128
+%r79 = or i160 %r76, %r78
+%r80 = zext i160 %r79 to i192
+%r81 = zext i32 %r26 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r80, %r82
+%r84 = zext i192 %r83 to i224
+%r85 = zext i32 %r30 to i224
+%r86 = shl i224 %r85, 192
+%r87 = or i224 %r84, %r86
+%r88 = zext i224 %r87 to i256
+%r89 = zext i32 %r34 to i256
+%r90 = shl i256 %r89, 224
+%r91 = or i256 %r88, %r90
+%r92 = zext i256 %r91 to i288
+%r93 = zext i32 %r38 to i288
+%r94 = shl i288 %r93, 256
+%r95 = or i288 %r92, %r94
+%r96 = zext i288 %r95 to i320
+%r97 = zext i32 %r42 to i320
+%r98 = shl i320 %r97, 288
+%r99 = or i320 %r96, %r98
+%r100 = zext i320 %r99 to i352
+%r101 = zext i32 %r46 to i352
+%r102 = shl i352 %r101, 320
+%r103 = or i352 %r100, %r102
+%r104 = zext i352 %r103 to i384
+%r105 = zext i32 %r50 to i384
+%r106 = shl i384 %r105, 352
+%r107 = or i384 %r104, %r106
+%r108 = zext i384 %r107 to i416
+%r109 = zext i32 %r54 to i416
+%r110 = shl i416 %r109, 384
+%r111 = or i416 %r108, %r110
+%r112 = zext i416 %r111 to i448
+%r113 = zext i32 %r58 to i448
+%r114 = shl i448 %r113, 416
+%r115 = or i448 %r112, %r114
+%r116 = zext i448 %r115 to i480
+%r117 = zext i32 %r62 to i480
+%r118 = shl i480 %r117, 448
+%r119 = or i480 %r116, %r118
+%r120 = zext i32 %r7 to i64
+%r121 = zext i32 %r11 to i64
+%r122 = shl i64 %r121, 32
+%r123 = or i64 %r120, %r122
+%r124 = zext i64 %r123 to i96
+%r125 = zext i32 %r15 to i96
+%r126 = shl i96 %r125, 64
+%r127 = or i96 %r124, %r126
+%r128 = zext i96 %r127 to i128
+%r129 = zext i32 %r19 to i128
+%r130 = shl i128 %r129, 96
+%r131 = or i128 %r128, %r130
+%r132 = zext i128 %r131 to i160
+%r133 = zext i32 %r23 to i160
+%r134 = shl i160 %r133, 128
+%r135 = or i160 %r132, %r134
+%r136 = zext i160 %r135 to i192
+%r137 = zext i32 %r27 to i192
+%r138 = shl i192 %r137, 160
+%r139 = or i192 %r136, %r138
+%r140 = zext i192 %r139 to i224
+%r141 = zext i32 %r31 to i224
+%r142 = shl i224 %r141, 192
+%r143 = or i224 %r140, %r142
+%r144 = zext i224 %r143 to i256
+%r145 = zext i32 %r35 to i256
+%r146 = shl i256 %r145, 224
+%r147 = or i256 %r144, %r146
+%r148 = zext i256 %r147 to i288
+%r149 = zext i32 %r39 to i288
+%r150 = shl i288 %r149, 256
+%r151 = or i288 %r148, %r150
+%r152 = zext i288 %r151 to i320
+%r153 = zext i32 %r43 to i320
+%r154 = shl i320 %r153, 288
+%r155 = or i320 %r152, %r154
+%r156 = zext i320 %r155 to i352
+%r157 = zext i32 %r47 to i352
+%r158 = shl i352 %r157, 320
+%r159 = or i352 %r156, %r158
+%r160 = zext i352 %r159 to i384
+%r161 = zext i32 %r51 to i384
+%r162 = shl i384 %r161, 352
+%r163 = or i384 %r160, %r162
+%r164 = zext i384 %r163 to i416
+%r165 = zext i32 %r55 to i416
+%r166 = shl i416 %r165, 384
+%r167 = or i416 %r164, %r166
+%r168 = zext i416 %r167 to i448
+%r169 = zext i32 %r59 to i448
+%r170 = shl i448 %r169, 416
+%r171 = or i448 %r168, %r170
+%r172 = zext i448 %r171 to i480
+%r173 = zext i32 %r63 to i480
+%r174 = shl i480 %r173, 448
+%r175 = or i480 %r172, %r174
+%r176 = zext i480 %r119 to i512
+%r177 = zext i480 %r175 to i512
+%r178 = shl i512 %r177, 32
+%r179 = add i512 %r176, %r178
+ret i512 %r179
+}
+define void @mcl_fp_mulUnitPre15L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i512 @mulPv480x32(i32* %r2, i32 %r3)
+%r5 = trunc i512 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i512 %r4, 32
+%r9 = trunc i512 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i512 %r8, 32
+%r13 = trunc i512 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i512 %r12, 32
+%r17 = trunc i512 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i512 %r16, 32
+%r21 = trunc i512 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i512 %r20, 32
+%r25 = trunc i512 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i512 %r24, 32
+%r29 = trunc i512 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i512 %r28, 32
+%r33 = trunc i512 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+%r36 = lshr i512 %r32, 32
+%r37 = trunc i512 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 8
+store i32 %r37, i32* %r39
+%r40 = lshr i512 %r36, 32
+%r41 = trunc i512 %r40 to i32
+%r43 = getelementptr i32, i32* %r1, i32 9
+store i32 %r41, i32* %r43
+%r44 = lshr i512 %r40, 32
+%r45 = trunc i512 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 10
+store i32 %r45, i32* %r47
+%r48 = lshr i512 %r44, 32
+%r49 = trunc i512 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 11
+store i32 %r49, i32* %r51
+%r52 = lshr i512 %r48, 32
+%r53 = trunc i512 %r52 to i32
+%r55 = getelementptr i32, i32* %r1, i32 12
+store i32 %r53, i32* %r55
+%r56 = lshr i512 %r52, 32
+%r57 = trunc i512 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 13
+store i32 %r57, i32* %r59
+%r60 = lshr i512 %r56, 32
+%r61 = trunc i512 %r60 to i32
+%r63 = getelementptr i32, i32* %r1, i32 14
+store i32 %r61, i32* %r63
+%r64 = lshr i512 %r60, 32
+%r65 = trunc i512 %r64 to i32
+%r67 = getelementptr i32, i32* %r1, i32 15
+store i32 %r65, i32* %r67
+ret void
+}
+define void @mcl_fpDbl_mulPre15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i512 @mulPv480x32(i32* %r2, i32 %r4)
+%r6 = trunc i512 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i512 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i512 @mulPv480x32(i32* %r2, i32 %r10)
+%r12 = add i512 %r7, %r11
+%r13 = trunc i512 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+%r16 = lshr i512 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i512 @mulPv480x32(i32* %r2, i32 %r19)
+%r21 = add i512 %r16, %r20
+%r22 = trunc i512 %r21 to i32
+%r24 = getelementptr i32, i32* %r1, i32 2
+store i32 %r22, i32* %r24
+%r25 = lshr i512 %r21, 32
+%r27 = getelementptr i32, i32* %r3, i32 3
+%r28 = load i32, i32* %r27
+%r29 = call i512 @mulPv480x32(i32* %r2, i32 %r28)
+%r30 = add i512 %r25, %r29
+%r31 = trunc i512 %r30 to i32
+%r33 = getelementptr i32, i32* %r1, i32 3
+store i32 %r31, i32* %r33
+%r34 = lshr i512 %r30, 32
+%r36 = getelementptr i32, i32* %r3, i32 4
+%r37 = load i32, i32* %r36
+%r38 = call i512 @mulPv480x32(i32* %r2, i32 %r37)
+%r39 = add i512 %r34, %r38
+%r40 = trunc i512 %r39 to i32
+%r42 = getelementptr i32, i32* %r1, i32 4
+store i32 %r40, i32* %r42
+%r43 = lshr i512 %r39, 32
+%r45 = getelementptr i32, i32* %r3, i32 5
+%r46 = load i32, i32* %r45
+%r47 = call i512 @mulPv480x32(i32* %r2, i32 %r46)
+%r48 = add i512 %r43, %r47
+%r49 = trunc i512 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 5
+store i32 %r49, i32* %r51
+%r52 = lshr i512 %r48, 32
+%r54 = getelementptr i32, i32* %r3, i32 6
+%r55 = load i32, i32* %r54
+%r56 = call i512 @mulPv480x32(i32* %r2, i32 %r55)
+%r57 = add i512 %r52, %r56
+%r58 = trunc i512 %r57 to i32
+%r60 = getelementptr i32, i32* %r1, i32 6
+store i32 %r58, i32* %r60
+%r61 = lshr i512 %r57, 32
+%r63 = getelementptr i32, i32* %r3, i32 7
+%r64 = load i32, i32* %r63
+%r65 = call i512 @mulPv480x32(i32* %r2, i32 %r64)
+%r66 = add i512 %r61, %r65
+%r67 = trunc i512 %r66 to i32
+%r69 = getelementptr i32, i32* %r1, i32 7
+store i32 %r67, i32* %r69
+%r70 = lshr i512 %r66, 32
+%r72 = getelementptr i32, i32* %r3, i32 8
+%r73 = load i32, i32* %r72
+%r74 = call i512 @mulPv480x32(i32* %r2, i32 %r73)
+%r75 = add i512 %r70, %r74
+%r76 = trunc i512 %r75 to i32
+%r78 = getelementptr i32, i32* %r1, i32 8
+store i32 %r76, i32* %r78
+%r79 = lshr i512 %r75, 32
+%r81 = getelementptr i32, i32* %r3, i32 9
+%r82 = load i32, i32* %r81
+%r83 = call i512 @mulPv480x32(i32* %r2, i32 %r82)
+%r84 = add i512 %r79, %r83
+%r85 = trunc i512 %r84 to i32
+%r87 = getelementptr i32, i32* %r1, i32 9
+store i32 %r85, i32* %r87
+%r88 = lshr i512 %r84, 32
+%r90 = getelementptr i32, i32* %r3, i32 10
+%r91 = load i32, i32* %r90
+%r92 = call i512 @mulPv480x32(i32* %r2, i32 %r91)
+%r93 = add i512 %r88, %r92
+%r94 = trunc i512 %r93 to i32
+%r96 = getelementptr i32, i32* %r1, i32 10
+store i32 %r94, i32* %r96
+%r97 = lshr i512 %r93, 32
+%r99 = getelementptr i32, i32* %r3, i32 11
+%r100 = load i32, i32* %r99
+%r101 = call i512 @mulPv480x32(i32* %r2, i32 %r100)
+%r102 = add i512 %r97, %r101
+%r103 = trunc i512 %r102 to i32
+%r105 = getelementptr i32, i32* %r1, i32 11
+store i32 %r103, i32* %r105
+%r106 = lshr i512 %r102, 32
+%r108 = getelementptr i32, i32* %r3, i32 12
+%r109 = load i32, i32* %r108
+%r110 = call i512 @mulPv480x32(i32* %r2, i32 %r109)
+%r111 = add i512 %r106, %r110
+%r112 = trunc i512 %r111 to i32
+%r114 = getelementptr i32, i32* %r1, i32 12
+store i32 %r112, i32* %r114
+%r115 = lshr i512 %r111, 32
+%r117 = getelementptr i32, i32* %r3, i32 13
+%r118 = load i32, i32* %r117
+%r119 = call i512 @mulPv480x32(i32* %r2, i32 %r118)
+%r120 = add i512 %r115, %r119
+%r121 = trunc i512 %r120 to i32
+%r123 = getelementptr i32, i32* %r1, i32 13
+store i32 %r121, i32* %r123
+%r124 = lshr i512 %r120, 32
+%r126 = getelementptr i32, i32* %r3, i32 14
+%r127 = load i32, i32* %r126
+%r128 = call i512 @mulPv480x32(i32* %r2, i32 %r127)
+%r129 = add i512 %r124, %r128
+%r131 = getelementptr i32, i32* %r1, i32 14
+%r132 = trunc i512 %r129 to i32
+%r134 = getelementptr i32, i32* %r131, i32 0
+store i32 %r132, i32* %r134
+%r135 = lshr i512 %r129, 32
+%r136 = trunc i512 %r135 to i32
+%r138 = getelementptr i32, i32* %r131, i32 1
+store i32 %r136, i32* %r138
+%r139 = lshr i512 %r135, 32
+%r140 = trunc i512 %r139 to i32
+%r142 = getelementptr i32, i32* %r131, i32 2
+store i32 %r140, i32* %r142
+%r143 = lshr i512 %r139, 32
+%r144 = trunc i512 %r143 to i32
+%r146 = getelementptr i32, i32* %r131, i32 3
+store i32 %r144, i32* %r146
+%r147 = lshr i512 %r143, 32
+%r148 = trunc i512 %r147 to i32
+%r150 = getelementptr i32, i32* %r131, i32 4
+store i32 %r148, i32* %r150
+%r151 = lshr i512 %r147, 32
+%r152 = trunc i512 %r151 to i32
+%r154 = getelementptr i32, i32* %r131, i32 5
+store i32 %r152, i32* %r154
+%r155 = lshr i512 %r151, 32
+%r156 = trunc i512 %r155 to i32
+%r158 = getelementptr i32, i32* %r131, i32 6
+store i32 %r156, i32* %r158
+%r159 = lshr i512 %r155, 32
+%r160 = trunc i512 %r159 to i32
+%r162 = getelementptr i32, i32* %r131, i32 7
+store i32 %r160, i32* %r162
+%r163 = lshr i512 %r159, 32
+%r164 = trunc i512 %r163 to i32
+%r166 = getelementptr i32, i32* %r131, i32 8
+store i32 %r164, i32* %r166
+%r167 = lshr i512 %r163, 32
+%r168 = trunc i512 %r167 to i32
+%r170 = getelementptr i32, i32* %r131, i32 9
+store i32 %r168, i32* %r170
+%r171 = lshr i512 %r167, 32
+%r172 = trunc i512 %r171 to i32
+%r174 = getelementptr i32, i32* %r131, i32 10
+store i32 %r172, i32* %r174
+%r175 = lshr i512 %r171, 32
+%r176 = trunc i512 %r175 to i32
+%r178 = getelementptr i32, i32* %r131, i32 11
+store i32 %r176, i32* %r178
+%r179 = lshr i512 %r175, 32
+%r180 = trunc i512 %r179 to i32
+%r182 = getelementptr i32, i32* %r131, i32 12
+store i32 %r180, i32* %r182
+%r183 = lshr i512 %r179, 32
+%r184 = trunc i512 %r183 to i32
+%r186 = getelementptr i32, i32* %r131, i32 13
+store i32 %r184, i32* %r186
+%r187 = lshr i512 %r183, 32
+%r188 = trunc i512 %r187 to i32
+%r190 = getelementptr i32, i32* %r131, i32 14
+store i32 %r188, i32* %r190
+%r191 = lshr i512 %r187, 32
+%r192 = trunc i512 %r191 to i32
+%r194 = getelementptr i32, i32* %r131, i32 15
+store i32 %r192, i32* %r194
+ret void
+}
+define void @mcl_fpDbl_sqrPre15L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i512 @mulPv480x32(i32* %r2, i32 %r3)
+%r5 = trunc i512 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i512 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i512 @mulPv480x32(i32* %r2, i32 %r9)
+%r11 = add i512 %r6, %r10
+%r12 = trunc i512 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+%r15 = lshr i512 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i512 @mulPv480x32(i32* %r2, i32 %r18)
+%r20 = add i512 %r15, %r19
+%r21 = trunc i512 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 2
+store i32 %r21, i32* %r23
+%r24 = lshr i512 %r20, 32
+%r26 = getelementptr i32, i32* %r2, i32 3
+%r27 = load i32, i32* %r26
+%r28 = call i512 @mulPv480x32(i32* %r2, i32 %r27)
+%r29 = add i512 %r24, %r28
+%r30 = trunc i512 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 3
+store i32 %r30, i32* %r32
+%r33 = lshr i512 %r29, 32
+%r35 = getelementptr i32, i32* %r2, i32 4
+%r36 = load i32, i32* %r35
+%r37 = call i512 @mulPv480x32(i32* %r2, i32 %r36)
+%r38 = add i512 %r33, %r37
+%r39 = trunc i512 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 4
+store i32 %r39, i32* %r41
+%r42 = lshr i512 %r38, 32
+%r44 = getelementptr i32, i32* %r2, i32 5
+%r45 = load i32, i32* %r44
+%r46 = call i512 @mulPv480x32(i32* %r2, i32 %r45)
+%r47 = add i512 %r42, %r46
+%r48 = trunc i512 %r47 to i32
+%r50 = getelementptr i32, i32* %r1, i32 5
+store i32 %r48, i32* %r50
+%r51 = lshr i512 %r47, 32
+%r53 = getelementptr i32, i32* %r2, i32 6
+%r54 = load i32, i32* %r53
+%r55 = call i512 @mulPv480x32(i32* %r2, i32 %r54)
+%r56 = add i512 %r51, %r55
+%r57 = trunc i512 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 6
+store i32 %r57, i32* %r59
+%r60 = lshr i512 %r56, 32
+%r62 = getelementptr i32, i32* %r2, i32 7
+%r63 = load i32, i32* %r62
+%r64 = call i512 @mulPv480x32(i32* %r2, i32 %r63)
+%r65 = add i512 %r60, %r64
+%r66 = trunc i512 %r65 to i32
+%r68 = getelementptr i32, i32* %r1, i32 7
+store i32 %r66, i32* %r68
+%r69 = lshr i512 %r65, 32
+%r71 = getelementptr i32, i32* %r2, i32 8
+%r72 = load i32, i32* %r71
+%r73 = call i512 @mulPv480x32(i32* %r2, i32 %r72)
+%r74 = add i512 %r69, %r73
+%r75 = trunc i512 %r74 to i32
+%r77 = getelementptr i32, i32* %r1, i32 8
+store i32 %r75, i32* %r77
+%r78 = lshr i512 %r74, 32
+%r80 = getelementptr i32, i32* %r2, i32 9
+%r81 = load i32, i32* %r80
+%r82 = call i512 @mulPv480x32(i32* %r2, i32 %r81)
+%r83 = add i512 %r78, %r82
+%r84 = trunc i512 %r83 to i32
+%r86 = getelementptr i32, i32* %r1, i32 9
+store i32 %r84, i32* %r86
+%r87 = lshr i512 %r83, 32
+%r89 = getelementptr i32, i32* %r2, i32 10
+%r90 = load i32, i32* %r89
+%r91 = call i512 @mulPv480x32(i32* %r2, i32 %r90)
+%r92 = add i512 %r87, %r91
+%r93 = trunc i512 %r92 to i32
+%r95 = getelementptr i32, i32* %r1, i32 10
+store i32 %r93, i32* %r95
+%r96 = lshr i512 %r92, 32
+%r98 = getelementptr i32, i32* %r2, i32 11
+%r99 = load i32, i32* %r98
+%r100 = call i512 @mulPv480x32(i32* %r2, i32 %r99)
+%r101 = add i512 %r96, %r100
+%r102 = trunc i512 %r101 to i32
+%r104 = getelementptr i32, i32* %r1, i32 11
+store i32 %r102, i32* %r104
+%r105 = lshr i512 %r101, 32
+%r107 = getelementptr i32, i32* %r2, i32 12
+%r108 = load i32, i32* %r107
+%r109 = call i512 @mulPv480x32(i32* %r2, i32 %r108)
+%r110 = add i512 %r105, %r109
+%r111 = trunc i512 %r110 to i32
+%r113 = getelementptr i32, i32* %r1, i32 12
+store i32 %r111, i32* %r113
+%r114 = lshr i512 %r110, 32
+%r116 = getelementptr i32, i32* %r2, i32 13
+%r117 = load i32, i32* %r116
+%r118 = call i512 @mulPv480x32(i32* %r2, i32 %r117)
+%r119 = add i512 %r114, %r118
+%r120 = trunc i512 %r119 to i32
+%r122 = getelementptr i32, i32* %r1, i32 13
+store i32 %r120, i32* %r122
+%r123 = lshr i512 %r119, 32
+%r125 = getelementptr i32, i32* %r2, i32 14
+%r126 = load i32, i32* %r125
+%r127 = call i512 @mulPv480x32(i32* %r2, i32 %r126)
+%r128 = add i512 %r123, %r127
+%r130 = getelementptr i32, i32* %r1, i32 14
+%r131 = trunc i512 %r128 to i32
+%r133 = getelementptr i32, i32* %r130, i32 0
+store i32 %r131, i32* %r133
+%r134 = lshr i512 %r128, 32
+%r135 = trunc i512 %r134 to i32
+%r137 = getelementptr i32, i32* %r130, i32 1
+store i32 %r135, i32* %r137
+%r138 = lshr i512 %r134, 32
+%r139 = trunc i512 %r138 to i32
+%r141 = getelementptr i32, i32* %r130, i32 2
+store i32 %r139, i32* %r141
+%r142 = lshr i512 %r138, 32
+%r143 = trunc i512 %r142 to i32
+%r145 = getelementptr i32, i32* %r130, i32 3
+store i32 %r143, i32* %r145
+%r146 = lshr i512 %r142, 32
+%r147 = trunc i512 %r146 to i32
+%r149 = getelementptr i32, i32* %r130, i32 4
+store i32 %r147, i32* %r149
+%r150 = lshr i512 %r146, 32
+%r151 = trunc i512 %r150 to i32
+%r153 = getelementptr i32, i32* %r130, i32 5
+store i32 %r151, i32* %r153
+%r154 = lshr i512 %r150, 32
+%r155 = trunc i512 %r154 to i32
+%r157 = getelementptr i32, i32* %r130, i32 6
+store i32 %r155, i32* %r157
+%r158 = lshr i512 %r154, 32
+%r159 = trunc i512 %r158 to i32
+%r161 = getelementptr i32, i32* %r130, i32 7
+store i32 %r159, i32* %r161
+%r162 = lshr i512 %r158, 32
+%r163 = trunc i512 %r162 to i32
+%r165 = getelementptr i32, i32* %r130, i32 8
+store i32 %r163, i32* %r165
+%r166 = lshr i512 %r162, 32
+%r167 = trunc i512 %r166 to i32
+%r169 = getelementptr i32, i32* %r130, i32 9
+store i32 %r167, i32* %r169
+%r170 = lshr i512 %r166, 32
+%r171 = trunc i512 %r170 to i32
+%r173 = getelementptr i32, i32* %r130, i32 10
+store i32 %r171, i32* %r173
+%r174 = lshr i512 %r170, 32
+%r175 = trunc i512 %r174 to i32
+%r177 = getelementptr i32, i32* %r130, i32 11
+store i32 %r175, i32* %r177
+%r178 = lshr i512 %r174, 32
+%r179 = trunc i512 %r178 to i32
+%r181 = getelementptr i32, i32* %r130, i32 12
+store i32 %r179, i32* %r181
+%r182 = lshr i512 %r178, 32
+%r183 = trunc i512 %r182 to i32
+%r185 = getelementptr i32, i32* %r130, i32 13
+store i32 %r183, i32* %r185
+%r186 = lshr i512 %r182, 32
+%r187 = trunc i512 %r186 to i32
+%r189 = getelementptr i32, i32* %r130, i32 14
+store i32 %r187, i32* %r189
+%r190 = lshr i512 %r186, 32
+%r191 = trunc i512 %r190 to i32
+%r193 = getelementptr i32, i32* %r130, i32 15
+store i32 %r191, i32* %r193
+ret void
+}
+define void @mcl_fp_mont15L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i512 @mulPv480x32(i32* %r2, i32 %r10)
+%r12 = zext i512 %r11 to i544
+%r13 = trunc i512 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i512 @mulPv480x32(i32* %r4, i32 %r14)
+%r16 = zext i512 %r15 to i544
+%r17 = add i544 %r12, %r16
+%r18 = lshr i544 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i512 @mulPv480x32(i32* %r2, i32 %r21)
+%r23 = zext i512 %r22 to i544
+%r24 = add i544 %r18, %r23
+%r25 = trunc i544 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i512 @mulPv480x32(i32* %r4, i32 %r26)
+%r28 = zext i512 %r27 to i544
+%r29 = add i544 %r24, %r28
+%r30 = lshr i544 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i512 @mulPv480x32(i32* %r2, i32 %r33)
+%r35 = zext i512 %r34 to i544
+%r36 = add i544 %r30, %r35
+%r37 = trunc i544 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i512 @mulPv480x32(i32* %r4, i32 %r38)
+%r40 = zext i512 %r39 to i544
+%r41 = add i544 %r36, %r40
+%r42 = lshr i544 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i512 @mulPv480x32(i32* %r2, i32 %r45)
+%r47 = zext i512 %r46 to i544
+%r48 = add i544 %r42, %r47
+%r49 = trunc i544 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i512 @mulPv480x32(i32* %r4, i32 %r50)
+%r52 = zext i512 %r51 to i544
+%r53 = add i544 %r48, %r52
+%r54 = lshr i544 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i512 @mulPv480x32(i32* %r2, i32 %r57)
+%r59 = zext i512 %r58 to i544
+%r60 = add i544 %r54, %r59
+%r61 = trunc i544 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i512 @mulPv480x32(i32* %r4, i32 %r62)
+%r64 = zext i512 %r63 to i544
+%r65 = add i544 %r60, %r64
+%r66 = lshr i544 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i512 @mulPv480x32(i32* %r2, i32 %r69)
+%r71 = zext i512 %r70 to i544
+%r72 = add i544 %r66, %r71
+%r73 = trunc i544 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i512 @mulPv480x32(i32* %r4, i32 %r74)
+%r76 = zext i512 %r75 to i544
+%r77 = add i544 %r72, %r76
+%r78 = lshr i544 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i512 @mulPv480x32(i32* %r2, i32 %r81)
+%r83 = zext i512 %r82 to i544
+%r84 = add i544 %r78, %r83
+%r85 = trunc i544 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i512 @mulPv480x32(i32* %r4, i32 %r86)
+%r88 = zext i512 %r87 to i544
+%r89 = add i544 %r84, %r88
+%r90 = lshr i544 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i512 @mulPv480x32(i32* %r2, i32 %r93)
+%r95 = zext i512 %r94 to i544
+%r96 = add i544 %r90, %r95
+%r97 = trunc i544 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i512 @mulPv480x32(i32* %r4, i32 %r98)
+%r100 = zext i512 %r99 to i544
+%r101 = add i544 %r96, %r100
+%r102 = lshr i544 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i512 @mulPv480x32(i32* %r2, i32 %r105)
+%r107 = zext i512 %r106 to i544
+%r108 = add i544 %r102, %r107
+%r109 = trunc i544 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i512 @mulPv480x32(i32* %r4, i32 %r110)
+%r112 = zext i512 %r111 to i544
+%r113 = add i544 %r108, %r112
+%r114 = lshr i544 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 9
+%r117 = load i32, i32* %r116
+%r118 = call i512 @mulPv480x32(i32* %r2, i32 %r117)
+%r119 = zext i512 %r118 to i544
+%r120 = add i544 %r114, %r119
+%r121 = trunc i544 %r120 to i32
+%r122 = mul i32 %r121, %r7
+%r123 = call i512 @mulPv480x32(i32* %r4, i32 %r122)
+%r124 = zext i512 %r123 to i544
+%r125 = add i544 %r120, %r124
+%r126 = lshr i544 %r125, 32
+%r128 = getelementptr i32, i32* %r3, i32 10
+%r129 = load i32, i32* %r128
+%r130 = call i512 @mulPv480x32(i32* %r2, i32 %r129)
+%r131 = zext i512 %r130 to i544
+%r132 = add i544 %r126, %r131
+%r133 = trunc i544 %r132 to i32
+%r134 = mul i32 %r133, %r7
+%r135 = call i512 @mulPv480x32(i32* %r4, i32 %r134)
+%r136 = zext i512 %r135 to i544
+%r137 = add i544 %r132, %r136
+%r138 = lshr i544 %r137, 32
+%r140 = getelementptr i32, i32* %r3, i32 11
+%r141 = load i32, i32* %r140
+%r142 = call i512 @mulPv480x32(i32* %r2, i32 %r141)
+%r143 = zext i512 %r142 to i544
+%r144 = add i544 %r138, %r143
+%r145 = trunc i544 %r144 to i32
+%r146 = mul i32 %r145, %r7
+%r147 = call i512 @mulPv480x32(i32* %r4, i32 %r146)
+%r148 = zext i512 %r147 to i544
+%r149 = add i544 %r144, %r148
+%r150 = lshr i544 %r149, 32
+%r152 = getelementptr i32, i32* %r3, i32 12
+%r153 = load i32, i32* %r152
+%r154 = call i512 @mulPv480x32(i32* %r2, i32 %r153)
+%r155 = zext i512 %r154 to i544
+%r156 = add i544 %r150, %r155
+%r157 = trunc i544 %r156 to i32
+%r158 = mul i32 %r157, %r7
+%r159 = call i512 @mulPv480x32(i32* %r4, i32 %r158)
+%r160 = zext i512 %r159 to i544
+%r161 = add i544 %r156, %r160
+%r162 = lshr i544 %r161, 32
+%r164 = getelementptr i32, i32* %r3, i32 13
+%r165 = load i32, i32* %r164
+%r166 = call i512 @mulPv480x32(i32* %r2, i32 %r165)
+%r167 = zext i512 %r166 to i544
+%r168 = add i544 %r162, %r167
+%r169 = trunc i544 %r168 to i32
+%r170 = mul i32 %r169, %r7
+%r171 = call i512 @mulPv480x32(i32* %r4, i32 %r170)
+%r172 = zext i512 %r171 to i544
+%r173 = add i544 %r168, %r172
+%r174 = lshr i544 %r173, 32
+%r176 = getelementptr i32, i32* %r3, i32 14
+%r177 = load i32, i32* %r176
+%r178 = call i512 @mulPv480x32(i32* %r2, i32 %r177)
+%r179 = zext i512 %r178 to i544
+%r180 = add i544 %r174, %r179
+%r181 = trunc i544 %r180 to i32
+%r182 = mul i32 %r181, %r7
+%r183 = call i512 @mulPv480x32(i32* %r4, i32 %r182)
+%r184 = zext i512 %r183 to i544
+%r185 = add i544 %r180, %r184
+%r186 = lshr i544 %r185, 32
+%r187 = trunc i544 %r186 to i512
+%r188 = load i32, i32* %r4
+%r189 = zext i32 %r188 to i64
+%r191 = getelementptr i32, i32* %r4, i32 1
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i64
+%r194 = shl i64 %r193, 32
+%r195 = or i64 %r189, %r194
+%r196 = zext i64 %r195 to i96
+%r198 = getelementptr i32, i32* %r4, i32 2
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i96
+%r201 = shl i96 %r200, 64
+%r202 = or i96 %r196, %r201
+%r203 = zext i96 %r202 to i128
+%r205 = getelementptr i32, i32* %r4, i32 3
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i128
+%r208 = shl i128 %r207, 96
+%r209 = or i128 %r203, %r208
+%r210 = zext i128 %r209 to i160
+%r212 = getelementptr i32, i32* %r4, i32 4
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i160
+%r215 = shl i160 %r214, 128
+%r216 = or i160 %r210, %r215
+%r217 = zext i160 %r216 to i192
+%r219 = getelementptr i32, i32* %r4, i32 5
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i192
+%r222 = shl i192 %r221, 160
+%r223 = or i192 %r217, %r222
+%r224 = zext i192 %r223 to i224
+%r226 = getelementptr i32, i32* %r4, i32 6
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i224
+%r229 = shl i224 %r228, 192
+%r230 = or i224 %r224, %r229
+%r231 = zext i224 %r230 to i256
+%r233 = getelementptr i32, i32* %r4, i32 7
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i256
+%r236 = shl i256 %r235, 224
+%r237 = or i256 %r231, %r236
+%r238 = zext i256 %r237 to i288
+%r240 = getelementptr i32, i32* %r4, i32 8
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i288
+%r243 = shl i288 %r242, 256
+%r244 = or i288 %r238, %r243
+%r245 = zext i288 %r244 to i320
+%r247 = getelementptr i32, i32* %r4, i32 9
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i320
+%r250 = shl i320 %r249, 288
+%r251 = or i320 %r245, %r250
+%r252 = zext i320 %r251 to i352
+%r254 = getelementptr i32, i32* %r4, i32 10
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i352
+%r257 = shl i352 %r256, 320
+%r258 = or i352 %r252, %r257
+%r259 = zext i352 %r258 to i384
+%r261 = getelementptr i32, i32* %r4, i32 11
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i384
+%r264 = shl i384 %r263, 352
+%r265 = or i384 %r259, %r264
+%r266 = zext i384 %r265 to i416
+%r268 = getelementptr i32, i32* %r4, i32 12
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i416
+%r271 = shl i416 %r270, 384
+%r272 = or i416 %r266, %r271
+%r273 = zext i416 %r272 to i448
+%r275 = getelementptr i32, i32* %r4, i32 13
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i448
+%r278 = shl i448 %r277, 416
+%r279 = or i448 %r273, %r278
+%r280 = zext i448 %r279 to i480
+%r282 = getelementptr i32, i32* %r4, i32 14
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i480
+%r285 = shl i480 %r284, 448
+%r286 = or i480 %r280, %r285
+%r287 = zext i480 %r286 to i512
+%r288 = sub i512 %r187, %r287
+%r289 = lshr i512 %r288, 480
+%r290 = trunc i512 %r289 to i1
+%r291 = select i1 %r290, i512 %r187, i512 %r288
+%r292 = trunc i512 %r291 to i480
+%r293 = trunc i480 %r292 to i32
+%r295 = getelementptr i32, i32* %r1, i32 0
+store i32 %r293, i32* %r295
+%r296 = lshr i480 %r292, 32
+%r297 = trunc i480 %r296 to i32
+%r299 = getelementptr i32, i32* %r1, i32 1
+store i32 %r297, i32* %r299
+%r300 = lshr i480 %r296, 32
+%r301 = trunc i480 %r300 to i32
+%r303 = getelementptr i32, i32* %r1, i32 2
+store i32 %r301, i32* %r303
+%r304 = lshr i480 %r300, 32
+%r305 = trunc i480 %r304 to i32
+%r307 = getelementptr i32, i32* %r1, i32 3
+store i32 %r305, i32* %r307
+%r308 = lshr i480 %r304, 32
+%r309 = trunc i480 %r308 to i32
+%r311 = getelementptr i32, i32* %r1, i32 4
+store i32 %r309, i32* %r311
+%r312 = lshr i480 %r308, 32
+%r313 = trunc i480 %r312 to i32
+%r315 = getelementptr i32, i32* %r1, i32 5
+store i32 %r313, i32* %r315
+%r316 = lshr i480 %r312, 32
+%r317 = trunc i480 %r316 to i32
+%r319 = getelementptr i32, i32* %r1, i32 6
+store i32 %r317, i32* %r319
+%r320 = lshr i480 %r316, 32
+%r321 = trunc i480 %r320 to i32
+%r323 = getelementptr i32, i32* %r1, i32 7
+store i32 %r321, i32* %r323
+%r324 = lshr i480 %r320, 32
+%r325 = trunc i480 %r324 to i32
+%r327 = getelementptr i32, i32* %r1, i32 8
+store i32 %r325, i32* %r327
+%r328 = lshr i480 %r324, 32
+%r329 = trunc i480 %r328 to i32
+%r331 = getelementptr i32, i32* %r1, i32 9
+store i32 %r329, i32* %r331
+%r332 = lshr i480 %r328, 32
+%r333 = trunc i480 %r332 to i32
+%r335 = getelementptr i32, i32* %r1, i32 10
+store i32 %r333, i32* %r335
+%r336 = lshr i480 %r332, 32
+%r337 = trunc i480 %r336 to i32
+%r339 = getelementptr i32, i32* %r1, i32 11
+store i32 %r337, i32* %r339
+%r340 = lshr i480 %r336, 32
+%r341 = trunc i480 %r340 to i32
+%r343 = getelementptr i32, i32* %r1, i32 12
+store i32 %r341, i32* %r343
+%r344 = lshr i480 %r340, 32
+%r345 = trunc i480 %r344 to i32
+%r347 = getelementptr i32, i32* %r1, i32 13
+store i32 %r345, i32* %r347
+%r348 = lshr i480 %r344, 32
+%r349 = trunc i480 %r348 to i32
+%r351 = getelementptr i32, i32* %r1, i32 14
+store i32 %r349, i32* %r351
+ret void
+}
+define void @mcl_fp_montNF15L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i512 @mulPv480x32(i32* %r2, i32 %r8)
+%r10 = trunc i512 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i512 @mulPv480x32(i32* %r4, i32 %r11)
+%r13 = add i512 %r9, %r12
+%r14 = lshr i512 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i512 @mulPv480x32(i32* %r2, i32 %r17)
+%r19 = add i512 %r14, %r18
+%r20 = trunc i512 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i512 @mulPv480x32(i32* %r4, i32 %r21)
+%r23 = add i512 %r19, %r22
+%r24 = lshr i512 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i512 @mulPv480x32(i32* %r2, i32 %r27)
+%r29 = add i512 %r24, %r28
+%r30 = trunc i512 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i512 @mulPv480x32(i32* %r4, i32 %r31)
+%r33 = add i512 %r29, %r32
+%r34 = lshr i512 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i512 @mulPv480x32(i32* %r2, i32 %r37)
+%r39 = add i512 %r34, %r38
+%r40 = trunc i512 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i512 @mulPv480x32(i32* %r4, i32 %r41)
+%r43 = add i512 %r39, %r42
+%r44 = lshr i512 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i512 @mulPv480x32(i32* %r2, i32 %r47)
+%r49 = add i512 %r44, %r48
+%r50 = trunc i512 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i512 @mulPv480x32(i32* %r4, i32 %r51)
+%r53 = add i512 %r49, %r52
+%r54 = lshr i512 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i512 @mulPv480x32(i32* %r2, i32 %r57)
+%r59 = add i512 %r54, %r58
+%r60 = trunc i512 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i512 @mulPv480x32(i32* %r4, i32 %r61)
+%r63 = add i512 %r59, %r62
+%r64 = lshr i512 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i512 @mulPv480x32(i32* %r2, i32 %r67)
+%r69 = add i512 %r64, %r68
+%r70 = trunc i512 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i512 @mulPv480x32(i32* %r4, i32 %r71)
+%r73 = add i512 %r69, %r72
+%r74 = lshr i512 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i512 @mulPv480x32(i32* %r2, i32 %r77)
+%r79 = add i512 %r74, %r78
+%r80 = trunc i512 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i512 @mulPv480x32(i32* %r4, i32 %r81)
+%r83 = add i512 %r79, %r82
+%r84 = lshr i512 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i512 @mulPv480x32(i32* %r2, i32 %r87)
+%r89 = add i512 %r84, %r88
+%r90 = trunc i512 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i512 @mulPv480x32(i32* %r4, i32 %r91)
+%r93 = add i512 %r89, %r92
+%r94 = lshr i512 %r93, 32
+%r96 = getelementptr i32, i32* %r3, i32 9
+%r97 = load i32, i32* %r96
+%r98 = call i512 @mulPv480x32(i32* %r2, i32 %r97)
+%r99 = add i512 %r94, %r98
+%r100 = trunc i512 %r99 to i32
+%r101 = mul i32 %r100, %r7
+%r102 = call i512 @mulPv480x32(i32* %r4, i32 %r101)
+%r103 = add i512 %r99, %r102
+%r104 = lshr i512 %r103, 32
+%r106 = getelementptr i32, i32* %r3, i32 10
+%r107 = load i32, i32* %r106
+%r108 = call i512 @mulPv480x32(i32* %r2, i32 %r107)
+%r109 = add i512 %r104, %r108
+%r110 = trunc i512 %r109 to i32
+%r111 = mul i32 %r110, %r7
+%r112 = call i512 @mulPv480x32(i32* %r4, i32 %r111)
+%r113 = add i512 %r109, %r112
+%r114 = lshr i512 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 11
+%r117 = load i32, i32* %r116
+%r118 = call i512 @mulPv480x32(i32* %r2, i32 %r117)
+%r119 = add i512 %r114, %r118
+%r120 = trunc i512 %r119 to i32
+%r121 = mul i32 %r120, %r7
+%r122 = call i512 @mulPv480x32(i32* %r4, i32 %r121)
+%r123 = add i512 %r119, %r122
+%r124 = lshr i512 %r123, 32
+%r126 = getelementptr i32, i32* %r3, i32 12
+%r127 = load i32, i32* %r126
+%r128 = call i512 @mulPv480x32(i32* %r2, i32 %r127)
+%r129 = add i512 %r124, %r128
+%r130 = trunc i512 %r129 to i32
+%r131 = mul i32 %r130, %r7
+%r132 = call i512 @mulPv480x32(i32* %r4, i32 %r131)
+%r133 = add i512 %r129, %r132
+%r134 = lshr i512 %r133, 32
+%r136 = getelementptr i32, i32* %r3, i32 13
+%r137 = load i32, i32* %r136
+%r138 = call i512 @mulPv480x32(i32* %r2, i32 %r137)
+%r139 = add i512 %r134, %r138
+%r140 = trunc i512 %r139 to i32
+%r141 = mul i32 %r140, %r7
+%r142 = call i512 @mulPv480x32(i32* %r4, i32 %r141)
+%r143 = add i512 %r139, %r142
+%r144 = lshr i512 %r143, 32
+%r146 = getelementptr i32, i32* %r3, i32 14
+%r147 = load i32, i32* %r146
+%r148 = call i512 @mulPv480x32(i32* %r2, i32 %r147)
+%r149 = add i512 %r144, %r148
+%r150 = trunc i512 %r149 to i32
+%r151 = mul i32 %r150, %r7
+%r152 = call i512 @mulPv480x32(i32* %r4, i32 %r151)
+%r153 = add i512 %r149, %r152
+%r154 = lshr i512 %r153, 32
+%r155 = trunc i512 %r154 to i480
+%r156 = load i32, i32* %r4
+%r157 = zext i32 %r156 to i64
+%r159 = getelementptr i32, i32* %r4, i32 1
+%r160 = load i32, i32* %r159
+%r161 = zext i32 %r160 to i64
+%r162 = shl i64 %r161, 32
+%r163 = or i64 %r157, %r162
+%r164 = zext i64 %r163 to i96
+%r166 = getelementptr i32, i32* %r4, i32 2
+%r167 = load i32, i32* %r166
+%r168 = zext i32 %r167 to i96
+%r169 = shl i96 %r168, 64
+%r170 = or i96 %r164, %r169
+%r171 = zext i96 %r170 to i128
+%r173 = getelementptr i32, i32* %r4, i32 3
+%r174 = load i32, i32* %r173
+%r175 = zext i32 %r174 to i128
+%r176 = shl i128 %r175, 96
+%r177 = or i128 %r171, %r176
+%r178 = zext i128 %r177 to i160
+%r180 = getelementptr i32, i32* %r4, i32 4
+%r181 = load i32, i32* %r180
+%r182 = zext i32 %r181 to i160
+%r183 = shl i160 %r182, 128
+%r184 = or i160 %r178, %r183
+%r185 = zext i160 %r184 to i192
+%r187 = getelementptr i32, i32* %r4, i32 5
+%r188 = load i32, i32* %r187
+%r189 = zext i32 %r188 to i192
+%r190 = shl i192 %r189, 160
+%r191 = or i192 %r185, %r190
+%r192 = zext i192 %r191 to i224
+%r194 = getelementptr i32, i32* %r4, i32 6
+%r195 = load i32, i32* %r194
+%r196 = zext i32 %r195 to i224
+%r197 = shl i224 %r196, 192
+%r198 = or i224 %r192, %r197
+%r199 = zext i224 %r198 to i256
+%r201 = getelementptr i32, i32* %r4, i32 7
+%r202 = load i32, i32* %r201
+%r203 = zext i32 %r202 to i256
+%r204 = shl i256 %r203, 224
+%r205 = or i256 %r199, %r204
+%r206 = zext i256 %r205 to i288
+%r208 = getelementptr i32, i32* %r4, i32 8
+%r209 = load i32, i32* %r208
+%r210 = zext i32 %r209 to i288
+%r211 = shl i288 %r210, 256
+%r212 = or i288 %r206, %r211
+%r213 = zext i288 %r212 to i320
+%r215 = getelementptr i32, i32* %r4, i32 9
+%r216 = load i32, i32* %r215
+%r217 = zext i32 %r216 to i320
+%r218 = shl i320 %r217, 288
+%r219 = or i320 %r213, %r218
+%r220 = zext i320 %r219 to i352
+%r222 = getelementptr i32, i32* %r4, i32 10
+%r223 = load i32, i32* %r222
+%r224 = zext i32 %r223 to i352
+%r225 = shl i352 %r224, 320
+%r226 = or i352 %r220, %r225
+%r227 = zext i352 %r226 to i384
+%r229 = getelementptr i32, i32* %r4, i32 11
+%r230 = load i32, i32* %r229
+%r231 = zext i32 %r230 to i384
+%r232 = shl i384 %r231, 352
+%r233 = or i384 %r227, %r232
+%r234 = zext i384 %r233 to i416
+%r236 = getelementptr i32, i32* %r4, i32 12
+%r237 = load i32, i32* %r236
+%r238 = zext i32 %r237 to i416
+%r239 = shl i416 %r238, 384
+%r240 = or i416 %r234, %r239
+%r241 = zext i416 %r240 to i448
+%r243 = getelementptr i32, i32* %r4, i32 13
+%r244 = load i32, i32* %r243
+%r245 = zext i32 %r244 to i448
+%r246 = shl i448 %r245, 416
+%r247 = or i448 %r241, %r246
+%r248 = zext i448 %r247 to i480
+%r250 = getelementptr i32, i32* %r4, i32 14
+%r251 = load i32, i32* %r250
+%r252 = zext i32 %r251 to i480
+%r253 = shl i480 %r252, 448
+%r254 = or i480 %r248, %r253
+%r255 = sub i480 %r155, %r254
+%r256 = lshr i480 %r255, 479
+%r257 = trunc i480 %r256 to i1
+%r258 = select i1 %r257, i480 %r155, i480 %r255
+%r259 = trunc i480 %r258 to i32
+%r261 = getelementptr i32, i32* %r1, i32 0
+store i32 %r259, i32* %r261
+%r262 = lshr i480 %r258, 32
+%r263 = trunc i480 %r262 to i32
+%r265 = getelementptr i32, i32* %r1, i32 1
+store i32 %r263, i32* %r265
+%r266 = lshr i480 %r262, 32
+%r267 = trunc i480 %r266 to i32
+%r269 = getelementptr i32, i32* %r1, i32 2
+store i32 %r267, i32* %r269
+%r270 = lshr i480 %r266, 32
+%r271 = trunc i480 %r270 to i32
+%r273 = getelementptr i32, i32* %r1, i32 3
+store i32 %r271, i32* %r273
+%r274 = lshr i480 %r270, 32
+%r275 = trunc i480 %r274 to i32
+%r277 = getelementptr i32, i32* %r1, i32 4
+store i32 %r275, i32* %r277
+%r278 = lshr i480 %r274, 32
+%r279 = trunc i480 %r278 to i32
+%r281 = getelementptr i32, i32* %r1, i32 5
+store i32 %r279, i32* %r281
+%r282 = lshr i480 %r278, 32
+%r283 = trunc i480 %r282 to i32
+%r285 = getelementptr i32, i32* %r1, i32 6
+store i32 %r283, i32* %r285
+%r286 = lshr i480 %r282, 32
+%r287 = trunc i480 %r286 to i32
+%r289 = getelementptr i32, i32* %r1, i32 7
+store i32 %r287, i32* %r289
+%r290 = lshr i480 %r286, 32
+%r291 = trunc i480 %r290 to i32
+%r293 = getelementptr i32, i32* %r1, i32 8
+store i32 %r291, i32* %r293
+%r294 = lshr i480 %r290, 32
+%r295 = trunc i480 %r294 to i32
+%r297 = getelementptr i32, i32* %r1, i32 9
+store i32 %r295, i32* %r297
+%r298 = lshr i480 %r294, 32
+%r299 = trunc i480 %r298 to i32
+%r301 = getelementptr i32, i32* %r1, i32 10
+store i32 %r299, i32* %r301
+%r302 = lshr i480 %r298, 32
+%r303 = trunc i480 %r302 to i32
+%r305 = getelementptr i32, i32* %r1, i32 11
+store i32 %r303, i32* %r305
+%r306 = lshr i480 %r302, 32
+%r307 = trunc i480 %r306 to i32
+%r309 = getelementptr i32, i32* %r1, i32 12
+store i32 %r307, i32* %r309
+%r310 = lshr i480 %r306, 32
+%r311 = trunc i480 %r310 to i32
+%r313 = getelementptr i32, i32* %r1, i32 13
+store i32 %r311, i32* %r313
+%r314 = lshr i480 %r310, 32
+%r315 = trunc i480 %r314 to i32
+%r317 = getelementptr i32, i32* %r1, i32 14
+store i32 %r315, i32* %r317
+ret void
+}
+define void @mcl_fp_montRed15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i352
+%r73 = getelementptr i32, i32* %r3, i32 10
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i352
+%r76 = shl i352 %r75, 320
+%r77 = or i352 %r71, %r76
+%r78 = zext i352 %r77 to i384
+%r80 = getelementptr i32, i32* %r3, i32 11
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i384
+%r83 = shl i384 %r82, 352
+%r84 = or i384 %r78, %r83
+%r85 = zext i384 %r84 to i416
+%r87 = getelementptr i32, i32* %r3, i32 12
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i416
+%r90 = shl i416 %r89, 384
+%r91 = or i416 %r85, %r90
+%r92 = zext i416 %r91 to i448
+%r94 = getelementptr i32, i32* %r3, i32 13
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i448
+%r97 = shl i448 %r96, 416
+%r98 = or i448 %r92, %r97
+%r99 = zext i448 %r98 to i480
+%r101 = getelementptr i32, i32* %r3, i32 14
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i480
+%r104 = shl i480 %r103, 448
+%r105 = or i480 %r99, %r104
+%r106 = load i32, i32* %r2
+%r107 = zext i32 %r106 to i64
+%r109 = getelementptr i32, i32* %r2, i32 1
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i64
+%r112 = shl i64 %r111, 32
+%r113 = or i64 %r107, %r112
+%r114 = zext i64 %r113 to i96
+%r116 = getelementptr i32, i32* %r2, i32 2
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i96
+%r119 = shl i96 %r118, 64
+%r120 = or i96 %r114, %r119
+%r121 = zext i96 %r120 to i128
+%r123 = getelementptr i32, i32* %r2, i32 3
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i128
+%r126 = shl i128 %r125, 96
+%r127 = or i128 %r121, %r126
+%r128 = zext i128 %r127 to i160
+%r130 = getelementptr i32, i32* %r2, i32 4
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i160
+%r133 = shl i160 %r132, 128
+%r134 = or i160 %r128, %r133
+%r135 = zext i160 %r134 to i192
+%r137 = getelementptr i32, i32* %r2, i32 5
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i192
+%r140 = shl i192 %r139, 160
+%r141 = or i192 %r135, %r140
+%r142 = zext i192 %r141 to i224
+%r144 = getelementptr i32, i32* %r2, i32 6
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i224
+%r147 = shl i224 %r146, 192
+%r148 = or i224 %r142, %r147
+%r149 = zext i224 %r148 to i256
+%r151 = getelementptr i32, i32* %r2, i32 7
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i256
+%r154 = shl i256 %r153, 224
+%r155 = or i256 %r149, %r154
+%r156 = zext i256 %r155 to i288
+%r158 = getelementptr i32, i32* %r2, i32 8
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i288
+%r161 = shl i288 %r160, 256
+%r162 = or i288 %r156, %r161
+%r163 = zext i288 %r162 to i320
+%r165 = getelementptr i32, i32* %r2, i32 9
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i320
+%r168 = shl i320 %r167, 288
+%r169 = or i320 %r163, %r168
+%r170 = zext i320 %r169 to i352
+%r172 = getelementptr i32, i32* %r2, i32 10
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i352
+%r175 = shl i352 %r174, 320
+%r176 = or i352 %r170, %r175
+%r177 = zext i352 %r176 to i384
+%r179 = getelementptr i32, i32* %r2, i32 11
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i384
+%r182 = shl i384 %r181, 352
+%r183 = or i384 %r177, %r182
+%r184 = zext i384 %r183 to i416
+%r186 = getelementptr i32, i32* %r2, i32 12
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i416
+%r189 = shl i416 %r188, 384
+%r190 = or i416 %r184, %r189
+%r191 = zext i416 %r190 to i448
+%r193 = getelementptr i32, i32* %r2, i32 13
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i448
+%r196 = shl i448 %r195, 416
+%r197 = or i448 %r191, %r196
+%r198 = zext i448 %r197 to i480
+%r200 = getelementptr i32, i32* %r2, i32 14
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i480
+%r203 = shl i480 %r202, 448
+%r204 = or i480 %r198, %r203
+%r205 = zext i480 %r204 to i512
+%r207 = getelementptr i32, i32* %r2, i32 15
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i512
+%r210 = shl i512 %r209, 480
+%r211 = or i512 %r205, %r210
+%r212 = zext i512 %r211 to i544
+%r214 = getelementptr i32, i32* %r2, i32 16
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i544
+%r217 = shl i544 %r216, 512
+%r218 = or i544 %r212, %r217
+%r219 = zext i544 %r218 to i576
+%r221 = getelementptr i32, i32* %r2, i32 17
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i576
+%r224 = shl i576 %r223, 544
+%r225 = or i576 %r219, %r224
+%r226 = zext i576 %r225 to i608
+%r228 = getelementptr i32, i32* %r2, i32 18
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i608
+%r231 = shl i608 %r230, 576
+%r232 = or i608 %r226, %r231
+%r233 = zext i608 %r232 to i640
+%r235 = getelementptr i32, i32* %r2, i32 19
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i640
+%r238 = shl i640 %r237, 608
+%r239 = or i640 %r233, %r238
+%r240 = zext i640 %r239 to i672
+%r242 = getelementptr i32, i32* %r2, i32 20
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i672
+%r245 = shl i672 %r244, 640
+%r246 = or i672 %r240, %r245
+%r247 = zext i672 %r246 to i704
+%r249 = getelementptr i32, i32* %r2, i32 21
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i704
+%r252 = shl i704 %r251, 672
+%r253 = or i704 %r247, %r252
+%r254 = zext i704 %r253 to i736
+%r256 = getelementptr i32, i32* %r2, i32 22
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i736
+%r259 = shl i736 %r258, 704
+%r260 = or i736 %r254, %r259
+%r261 = zext i736 %r260 to i768
+%r263 = getelementptr i32, i32* %r2, i32 23
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i768
+%r266 = shl i768 %r265, 736
+%r267 = or i768 %r261, %r266
+%r268 = zext i768 %r267 to i800
+%r270 = getelementptr i32, i32* %r2, i32 24
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i800
+%r273 = shl i800 %r272, 768
+%r274 = or i800 %r268, %r273
+%r275 = zext i800 %r274 to i832
+%r277 = getelementptr i32, i32* %r2, i32 25
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i832
+%r280 = shl i832 %r279, 800
+%r281 = or i832 %r275, %r280
+%r282 = zext i832 %r281 to i864
+%r284 = getelementptr i32, i32* %r2, i32 26
+%r285 = load i32, i32* %r284
+%r286 = zext i32 %r285 to i864
+%r287 = shl i864 %r286, 832
+%r288 = or i864 %r282, %r287
+%r289 = zext i864 %r288 to i896
+%r291 = getelementptr i32, i32* %r2, i32 27
+%r292 = load i32, i32* %r291
+%r293 = zext i32 %r292 to i896
+%r294 = shl i896 %r293, 864
+%r295 = or i896 %r289, %r294
+%r296 = zext i896 %r295 to i928
+%r298 = getelementptr i32, i32* %r2, i32 28
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i928
+%r301 = shl i928 %r300, 896
+%r302 = or i928 %r296, %r301
+%r303 = zext i928 %r302 to i960
+%r305 = getelementptr i32, i32* %r2, i32 29
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i960
+%r308 = shl i960 %r307, 928
+%r309 = or i960 %r303, %r308
+%r310 = zext i960 %r309 to i992
+%r311 = trunc i992 %r310 to i32
+%r312 = mul i32 %r311, %r6
+%r313 = call i512 @mulPv480x32(i32* %r3, i32 %r312)
+%r314 = zext i512 %r313 to i992
+%r315 = add i992 %r310, %r314
+%r316 = lshr i992 %r315, 32
+%r317 = trunc i992 %r316 to i960
+%r318 = trunc i960 %r317 to i32
+%r319 = mul i32 %r318, %r6
+%r320 = call i512 @mulPv480x32(i32* %r3, i32 %r319)
+%r321 = zext i512 %r320 to i960
+%r322 = add i960 %r317, %r321
+%r323 = lshr i960 %r322, 32
+%r324 = trunc i960 %r323 to i928
+%r325 = trunc i928 %r324 to i32
+%r326 = mul i32 %r325, %r6
+%r327 = call i512 @mulPv480x32(i32* %r3, i32 %r326)
+%r328 = zext i512 %r327 to i928
+%r329 = add i928 %r324, %r328
+%r330 = lshr i928 %r329, 32
+%r331 = trunc i928 %r330 to i896
+%r332 = trunc i896 %r331 to i32
+%r333 = mul i32 %r332, %r6
+%r334 = call i512 @mulPv480x32(i32* %r3, i32 %r333)
+%r335 = zext i512 %r334 to i896
+%r336 = add i896 %r331, %r335
+%r337 = lshr i896 %r336, 32
+%r338 = trunc i896 %r337 to i864
+%r339 = trunc i864 %r338 to i32
+%r340 = mul i32 %r339, %r6
+%r341 = call i512 @mulPv480x32(i32* %r3, i32 %r340)
+%r342 = zext i512 %r341 to i864
+%r343 = add i864 %r338, %r342
+%r344 = lshr i864 %r343, 32
+%r345 = trunc i864 %r344 to i832
+%r346 = trunc i832 %r345 to i32
+%r347 = mul i32 %r346, %r6
+%r348 = call i512 @mulPv480x32(i32* %r3, i32 %r347)
+%r349 = zext i512 %r348 to i832
+%r350 = add i832 %r345, %r349
+%r351 = lshr i832 %r350, 32
+%r352 = trunc i832 %r351 to i800
+%r353 = trunc i800 %r352 to i32
+%r354 = mul i32 %r353, %r6
+%r355 = call i512 @mulPv480x32(i32* %r3, i32 %r354)
+%r356 = zext i512 %r355 to i800
+%r357 = add i800 %r352, %r356
+%r358 = lshr i800 %r357, 32
+%r359 = trunc i800 %r358 to i768
+%r360 = trunc i768 %r359 to i32
+%r361 = mul i32 %r360, %r6
+%r362 = call i512 @mulPv480x32(i32* %r3, i32 %r361)
+%r363 = zext i512 %r362 to i768
+%r364 = add i768 %r359, %r363
+%r365 = lshr i768 %r364, 32
+%r366 = trunc i768 %r365 to i736
+%r367 = trunc i736 %r366 to i32
+%r368 = mul i32 %r367, %r6
+%r369 = call i512 @mulPv480x32(i32* %r3, i32 %r368)
+%r370 = zext i512 %r369 to i736
+%r371 = add i736 %r366, %r370
+%r372 = lshr i736 %r371, 32
+%r373 = trunc i736 %r372 to i704
+%r374 = trunc i704 %r373 to i32
+%r375 = mul i32 %r374, %r6
+%r376 = call i512 @mulPv480x32(i32* %r3, i32 %r375)
+%r377 = zext i512 %r376 to i704
+%r378 = add i704 %r373, %r377
+%r379 = lshr i704 %r378, 32
+%r380 = trunc i704 %r379 to i672
+%r381 = trunc i672 %r380 to i32
+%r382 = mul i32 %r381, %r6
+%r383 = call i512 @mulPv480x32(i32* %r3, i32 %r382)
+%r384 = zext i512 %r383 to i672
+%r385 = add i672 %r380, %r384
+%r386 = lshr i672 %r385, 32
+%r387 = trunc i672 %r386 to i640
+%r388 = trunc i640 %r387 to i32
+%r389 = mul i32 %r388, %r6
+%r390 = call i512 @mulPv480x32(i32* %r3, i32 %r389)
+%r391 = zext i512 %r390 to i640
+%r392 = add i640 %r387, %r391
+%r393 = lshr i640 %r392, 32
+%r394 = trunc i640 %r393 to i608
+%r395 = trunc i608 %r394 to i32
+%r396 = mul i32 %r395, %r6
+%r397 = call i512 @mulPv480x32(i32* %r3, i32 %r396)
+%r398 = zext i512 %r397 to i608
+%r399 = add i608 %r394, %r398
+%r400 = lshr i608 %r399, 32
+%r401 = trunc i608 %r400 to i576
+%r402 = trunc i576 %r401 to i32
+%r403 = mul i32 %r402, %r6
+%r404 = call i512 @mulPv480x32(i32* %r3, i32 %r403)
+%r405 = zext i512 %r404 to i576
+%r406 = add i576 %r401, %r405
+%r407 = lshr i576 %r406, 32
+%r408 = trunc i576 %r407 to i544
+%r409 = trunc i544 %r408 to i32
+%r410 = mul i32 %r409, %r6
+%r411 = call i512 @mulPv480x32(i32* %r3, i32 %r410)
+%r412 = zext i512 %r411 to i544
+%r413 = add i544 %r408, %r412
+%r414 = lshr i544 %r413, 32
+%r415 = trunc i544 %r414 to i512
+%r416 = zext i480 %r105 to i512
+%r417 = sub i512 %r415, %r416
+%r418 = lshr i512 %r417, 480
+%r419 = trunc i512 %r418 to i1
+%r420 = select i1 %r419, i512 %r415, i512 %r417
+%r421 = trunc i512 %r420 to i480
+%r422 = trunc i480 %r421 to i32
+%r424 = getelementptr i32, i32* %r1, i32 0
+store i32 %r422, i32* %r424
+%r425 = lshr i480 %r421, 32
+%r426 = trunc i480 %r425 to i32
+%r428 = getelementptr i32, i32* %r1, i32 1
+store i32 %r426, i32* %r428
+%r429 = lshr i480 %r425, 32
+%r430 = trunc i480 %r429 to i32
+%r432 = getelementptr i32, i32* %r1, i32 2
+store i32 %r430, i32* %r432
+%r433 = lshr i480 %r429, 32
+%r434 = trunc i480 %r433 to i32
+%r436 = getelementptr i32, i32* %r1, i32 3
+store i32 %r434, i32* %r436
+%r437 = lshr i480 %r433, 32
+%r438 = trunc i480 %r437 to i32
+%r440 = getelementptr i32, i32* %r1, i32 4
+store i32 %r438, i32* %r440
+%r441 = lshr i480 %r437, 32
+%r442 = trunc i480 %r441 to i32
+%r444 = getelementptr i32, i32* %r1, i32 5
+store i32 %r442, i32* %r444
+%r445 = lshr i480 %r441, 32
+%r446 = trunc i480 %r445 to i32
+%r448 = getelementptr i32, i32* %r1, i32 6
+store i32 %r446, i32* %r448
+%r449 = lshr i480 %r445, 32
+%r450 = trunc i480 %r449 to i32
+%r452 = getelementptr i32, i32* %r1, i32 7
+store i32 %r450, i32* %r452
+%r453 = lshr i480 %r449, 32
+%r454 = trunc i480 %r453 to i32
+%r456 = getelementptr i32, i32* %r1, i32 8
+store i32 %r454, i32* %r456
+%r457 = lshr i480 %r453, 32
+%r458 = trunc i480 %r457 to i32
+%r460 = getelementptr i32, i32* %r1, i32 9
+store i32 %r458, i32* %r460
+%r461 = lshr i480 %r457, 32
+%r462 = trunc i480 %r461 to i32
+%r464 = getelementptr i32, i32* %r1, i32 10
+store i32 %r462, i32* %r464
+%r465 = lshr i480 %r461, 32
+%r466 = trunc i480 %r465 to i32
+%r468 = getelementptr i32, i32* %r1, i32 11
+store i32 %r466, i32* %r468
+%r469 = lshr i480 %r465, 32
+%r470 = trunc i480 %r469 to i32
+%r472 = getelementptr i32, i32* %r1, i32 12
+store i32 %r470, i32* %r472
+%r473 = lshr i480 %r469, 32
+%r474 = trunc i480 %r473 to i32
+%r476 = getelementptr i32, i32* %r1, i32 13
+store i32 %r474, i32* %r476
+%r477 = lshr i480 %r473, 32
+%r478 = trunc i480 %r477 to i32
+%r480 = getelementptr i32, i32* %r1, i32 14
+store i32 %r478, i32* %r480
+ret void
+}
+define i32 @mcl_fp_addPre15L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r3, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r3, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r3, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r105 = load i32, i32* %r4
+%r106 = zext i32 %r105 to i64
+%r108 = getelementptr i32, i32* %r4, i32 1
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i64
+%r111 = shl i64 %r110, 32
+%r112 = or i64 %r106, %r111
+%r113 = zext i64 %r112 to i96
+%r115 = getelementptr i32, i32* %r4, i32 2
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i96
+%r118 = shl i96 %r117, 64
+%r119 = or i96 %r113, %r118
+%r120 = zext i96 %r119 to i128
+%r122 = getelementptr i32, i32* %r4, i32 3
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i128
+%r125 = shl i128 %r124, 96
+%r126 = or i128 %r120, %r125
+%r127 = zext i128 %r126 to i160
+%r129 = getelementptr i32, i32* %r4, i32 4
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i160
+%r132 = shl i160 %r131, 128
+%r133 = or i160 %r127, %r132
+%r134 = zext i160 %r133 to i192
+%r136 = getelementptr i32, i32* %r4, i32 5
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i192
+%r139 = shl i192 %r138, 160
+%r140 = or i192 %r134, %r139
+%r141 = zext i192 %r140 to i224
+%r143 = getelementptr i32, i32* %r4, i32 6
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i224
+%r146 = shl i224 %r145, 192
+%r147 = or i224 %r141, %r146
+%r148 = zext i224 %r147 to i256
+%r150 = getelementptr i32, i32* %r4, i32 7
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i256
+%r153 = shl i256 %r152, 224
+%r154 = or i256 %r148, %r153
+%r155 = zext i256 %r154 to i288
+%r157 = getelementptr i32, i32* %r4, i32 8
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i288
+%r160 = shl i288 %r159, 256
+%r161 = or i288 %r155, %r160
+%r162 = zext i288 %r161 to i320
+%r164 = getelementptr i32, i32* %r4, i32 9
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i320
+%r167 = shl i320 %r166, 288
+%r168 = or i320 %r162, %r167
+%r169 = zext i320 %r168 to i352
+%r171 = getelementptr i32, i32* %r4, i32 10
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i352
+%r174 = shl i352 %r173, 320
+%r175 = or i352 %r169, %r174
+%r176 = zext i352 %r175 to i384
+%r178 = getelementptr i32, i32* %r4, i32 11
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i384
+%r181 = shl i384 %r180, 352
+%r182 = or i384 %r176, %r181
+%r183 = zext i384 %r182 to i416
+%r185 = getelementptr i32, i32* %r4, i32 12
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i416
+%r188 = shl i416 %r187, 384
+%r189 = or i416 %r183, %r188
+%r190 = zext i416 %r189 to i448
+%r192 = getelementptr i32, i32* %r4, i32 13
+%r193 = load i32, i32* %r192
+%r194 = zext i32 %r193 to i448
+%r195 = shl i448 %r194, 416
+%r196 = or i448 %r190, %r195
+%r197 = zext i448 %r196 to i480
+%r199 = getelementptr i32, i32* %r4, i32 14
+%r200 = load i32, i32* %r199
+%r201 = zext i32 %r200 to i480
+%r202 = shl i480 %r201, 448
+%r203 = or i480 %r197, %r202
+%r204 = zext i480 %r203 to i512
+%r205 = add i512 %r104, %r204
+%r206 = trunc i512 %r205 to i480
+%r207 = trunc i480 %r206 to i32
+%r209 = getelementptr i32, i32* %r2, i32 0
+store i32 %r207, i32* %r209
+%r210 = lshr i480 %r206, 32
+%r211 = trunc i480 %r210 to i32
+%r213 = getelementptr i32, i32* %r2, i32 1
+store i32 %r211, i32* %r213
+%r214 = lshr i480 %r210, 32
+%r215 = trunc i480 %r214 to i32
+%r217 = getelementptr i32, i32* %r2, i32 2
+store i32 %r215, i32* %r217
+%r218 = lshr i480 %r214, 32
+%r219 = trunc i480 %r218 to i32
+%r221 = getelementptr i32, i32* %r2, i32 3
+store i32 %r219, i32* %r221
+%r222 = lshr i480 %r218, 32
+%r223 = trunc i480 %r222 to i32
+%r225 = getelementptr i32, i32* %r2, i32 4
+store i32 %r223, i32* %r225
+%r226 = lshr i480 %r222, 32
+%r227 = trunc i480 %r226 to i32
+%r229 = getelementptr i32, i32* %r2, i32 5
+store i32 %r227, i32* %r229
+%r230 = lshr i480 %r226, 32
+%r231 = trunc i480 %r230 to i32
+%r233 = getelementptr i32, i32* %r2, i32 6
+store i32 %r231, i32* %r233
+%r234 = lshr i480 %r230, 32
+%r235 = trunc i480 %r234 to i32
+%r237 = getelementptr i32, i32* %r2, i32 7
+store i32 %r235, i32* %r237
+%r238 = lshr i480 %r234, 32
+%r239 = trunc i480 %r238 to i32
+%r241 = getelementptr i32, i32* %r2, i32 8
+store i32 %r239, i32* %r241
+%r242 = lshr i480 %r238, 32
+%r243 = trunc i480 %r242 to i32
+%r245 = getelementptr i32, i32* %r2, i32 9
+store i32 %r243, i32* %r245
+%r246 = lshr i480 %r242, 32
+%r247 = trunc i480 %r246 to i32
+%r249 = getelementptr i32, i32* %r2, i32 10
+store i32 %r247, i32* %r249
+%r250 = lshr i480 %r246, 32
+%r251 = trunc i480 %r250 to i32
+%r253 = getelementptr i32, i32* %r2, i32 11
+store i32 %r251, i32* %r253
+%r254 = lshr i480 %r250, 32
+%r255 = trunc i480 %r254 to i32
+%r257 = getelementptr i32, i32* %r2, i32 12
+store i32 %r255, i32* %r257
+%r258 = lshr i480 %r254, 32
+%r259 = trunc i480 %r258 to i32
+%r261 = getelementptr i32, i32* %r2, i32 13
+store i32 %r259, i32* %r261
+%r262 = lshr i480 %r258, 32
+%r263 = trunc i480 %r262 to i32
+%r265 = getelementptr i32, i32* %r2, i32 14
+store i32 %r263, i32* %r265
+%r266 = lshr i512 %r205, 480
+%r267 = trunc i512 %r266 to i32
+ret i32 %r267
+}
+define i32 @mcl_fp_subPre15L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r3, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r3, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r3, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r105 = load i32, i32* %r4
+%r106 = zext i32 %r105 to i64
+%r108 = getelementptr i32, i32* %r4, i32 1
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i64
+%r111 = shl i64 %r110, 32
+%r112 = or i64 %r106, %r111
+%r113 = zext i64 %r112 to i96
+%r115 = getelementptr i32, i32* %r4, i32 2
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i96
+%r118 = shl i96 %r117, 64
+%r119 = or i96 %r113, %r118
+%r120 = zext i96 %r119 to i128
+%r122 = getelementptr i32, i32* %r4, i32 3
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i128
+%r125 = shl i128 %r124, 96
+%r126 = or i128 %r120, %r125
+%r127 = zext i128 %r126 to i160
+%r129 = getelementptr i32, i32* %r4, i32 4
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i160
+%r132 = shl i160 %r131, 128
+%r133 = or i160 %r127, %r132
+%r134 = zext i160 %r133 to i192
+%r136 = getelementptr i32, i32* %r4, i32 5
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i192
+%r139 = shl i192 %r138, 160
+%r140 = or i192 %r134, %r139
+%r141 = zext i192 %r140 to i224
+%r143 = getelementptr i32, i32* %r4, i32 6
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i224
+%r146 = shl i224 %r145, 192
+%r147 = or i224 %r141, %r146
+%r148 = zext i224 %r147 to i256
+%r150 = getelementptr i32, i32* %r4, i32 7
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i256
+%r153 = shl i256 %r152, 224
+%r154 = or i256 %r148, %r153
+%r155 = zext i256 %r154 to i288
+%r157 = getelementptr i32, i32* %r4, i32 8
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i288
+%r160 = shl i288 %r159, 256
+%r161 = or i288 %r155, %r160
+%r162 = zext i288 %r161 to i320
+%r164 = getelementptr i32, i32* %r4, i32 9
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i320
+%r167 = shl i320 %r166, 288
+%r168 = or i320 %r162, %r167
+%r169 = zext i320 %r168 to i352
+%r171 = getelementptr i32, i32* %r4, i32 10
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i352
+%r174 = shl i352 %r173, 320
+%r175 = or i352 %r169, %r174
+%r176 = zext i352 %r175 to i384
+%r178 = getelementptr i32, i32* %r4, i32 11
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i384
+%r181 = shl i384 %r180, 352
+%r182 = or i384 %r176, %r181
+%r183 = zext i384 %r182 to i416
+%r185 = getelementptr i32, i32* %r4, i32 12
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i416
+%r188 = shl i416 %r187, 384
+%r189 = or i416 %r183, %r188
+%r190 = zext i416 %r189 to i448
+%r192 = getelementptr i32, i32* %r4, i32 13
+%r193 = load i32, i32* %r192
+%r194 = zext i32 %r193 to i448
+%r195 = shl i448 %r194, 416
+%r196 = or i448 %r190, %r195
+%r197 = zext i448 %r196 to i480
+%r199 = getelementptr i32, i32* %r4, i32 14
+%r200 = load i32, i32* %r199
+%r201 = zext i32 %r200 to i480
+%r202 = shl i480 %r201, 448
+%r203 = or i480 %r197, %r202
+%r204 = zext i480 %r203 to i512
+%r205 = sub i512 %r104, %r204
+%r206 = trunc i512 %r205 to i480
+%r207 = trunc i480 %r206 to i32
+%r209 = getelementptr i32, i32* %r2, i32 0
+store i32 %r207, i32* %r209
+%r210 = lshr i480 %r206, 32
+%r211 = trunc i480 %r210 to i32
+%r213 = getelementptr i32, i32* %r2, i32 1
+store i32 %r211, i32* %r213
+%r214 = lshr i480 %r210, 32
+%r215 = trunc i480 %r214 to i32
+%r217 = getelementptr i32, i32* %r2, i32 2
+store i32 %r215, i32* %r217
+%r218 = lshr i480 %r214, 32
+%r219 = trunc i480 %r218 to i32
+%r221 = getelementptr i32, i32* %r2, i32 3
+store i32 %r219, i32* %r221
+%r222 = lshr i480 %r218, 32
+%r223 = trunc i480 %r222 to i32
+%r225 = getelementptr i32, i32* %r2, i32 4
+store i32 %r223, i32* %r225
+%r226 = lshr i480 %r222, 32
+%r227 = trunc i480 %r226 to i32
+%r229 = getelementptr i32, i32* %r2, i32 5
+store i32 %r227, i32* %r229
+%r230 = lshr i480 %r226, 32
+%r231 = trunc i480 %r230 to i32
+%r233 = getelementptr i32, i32* %r2, i32 6
+store i32 %r231, i32* %r233
+%r234 = lshr i480 %r230, 32
+%r235 = trunc i480 %r234 to i32
+%r237 = getelementptr i32, i32* %r2, i32 7
+store i32 %r235, i32* %r237
+%r238 = lshr i480 %r234, 32
+%r239 = trunc i480 %r238 to i32
+%r241 = getelementptr i32, i32* %r2, i32 8
+store i32 %r239, i32* %r241
+%r242 = lshr i480 %r238, 32
+%r243 = trunc i480 %r242 to i32
+%r245 = getelementptr i32, i32* %r2, i32 9
+store i32 %r243, i32* %r245
+%r246 = lshr i480 %r242, 32
+%r247 = trunc i480 %r246 to i32
+%r249 = getelementptr i32, i32* %r2, i32 10
+store i32 %r247, i32* %r249
+%r250 = lshr i480 %r246, 32
+%r251 = trunc i480 %r250 to i32
+%r253 = getelementptr i32, i32* %r2, i32 11
+store i32 %r251, i32* %r253
+%r254 = lshr i480 %r250, 32
+%r255 = trunc i480 %r254 to i32
+%r257 = getelementptr i32, i32* %r2, i32 12
+store i32 %r255, i32* %r257
+%r258 = lshr i480 %r254, 32
+%r259 = trunc i480 %r258 to i32
+%r261 = getelementptr i32, i32* %r2, i32 13
+store i32 %r259, i32* %r261
+%r262 = lshr i480 %r258, 32
+%r263 = trunc i480 %r262 to i32
+%r265 = getelementptr i32, i32* %r2, i32 14
+store i32 %r263, i32* %r265
+%r266 = lshr i512 %r205, 480
+%r267 = trunc i512 %r266 to i32
+%r269 = and i32 %r267, 1
+ret i32 %r269
+}
+define void @mcl_fp_shr1_15L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = zext i256 %r52 to i288
+%r55 = getelementptr i32, i32* %r2, i32 8
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i288
+%r58 = shl i288 %r57, 256
+%r59 = or i288 %r53, %r58
+%r60 = zext i288 %r59 to i320
+%r62 = getelementptr i32, i32* %r2, i32 9
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i320
+%r65 = shl i320 %r64, 288
+%r66 = or i320 %r60, %r65
+%r67 = zext i320 %r66 to i352
+%r69 = getelementptr i32, i32* %r2, i32 10
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i352
+%r72 = shl i352 %r71, 320
+%r73 = or i352 %r67, %r72
+%r74 = zext i352 %r73 to i384
+%r76 = getelementptr i32, i32* %r2, i32 11
+%r77 = load i32, i32* %r76
+%r78 = zext i32 %r77 to i384
+%r79 = shl i384 %r78, 352
+%r80 = or i384 %r74, %r79
+%r81 = zext i384 %r80 to i416
+%r83 = getelementptr i32, i32* %r2, i32 12
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i416
+%r86 = shl i416 %r85, 384
+%r87 = or i416 %r81, %r86
+%r88 = zext i416 %r87 to i448
+%r90 = getelementptr i32, i32* %r2, i32 13
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i448
+%r93 = shl i448 %r92, 416
+%r94 = or i448 %r88, %r93
+%r95 = zext i448 %r94 to i480
+%r97 = getelementptr i32, i32* %r2, i32 14
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i480
+%r100 = shl i480 %r99, 448
+%r101 = or i480 %r95, %r100
+%r102 = lshr i480 %r101, 1
+%r103 = trunc i480 %r102 to i32
+%r105 = getelementptr i32, i32* %r1, i32 0
+store i32 %r103, i32* %r105
+%r106 = lshr i480 %r102, 32
+%r107 = trunc i480 %r106 to i32
+%r109 = getelementptr i32, i32* %r1, i32 1
+store i32 %r107, i32* %r109
+%r110 = lshr i480 %r106, 32
+%r111 = trunc i480 %r110 to i32
+%r113 = getelementptr i32, i32* %r1, i32 2
+store i32 %r111, i32* %r113
+%r114 = lshr i480 %r110, 32
+%r115 = trunc i480 %r114 to i32
+%r117 = getelementptr i32, i32* %r1, i32 3
+store i32 %r115, i32* %r117
+%r118 = lshr i480 %r114, 32
+%r119 = trunc i480 %r118 to i32
+%r121 = getelementptr i32, i32* %r1, i32 4
+store i32 %r119, i32* %r121
+%r122 = lshr i480 %r118, 32
+%r123 = trunc i480 %r122 to i32
+%r125 = getelementptr i32, i32* %r1, i32 5
+store i32 %r123, i32* %r125
+%r126 = lshr i480 %r122, 32
+%r127 = trunc i480 %r126 to i32
+%r129 = getelementptr i32, i32* %r1, i32 6
+store i32 %r127, i32* %r129
+%r130 = lshr i480 %r126, 32
+%r131 = trunc i480 %r130 to i32
+%r133 = getelementptr i32, i32* %r1, i32 7
+store i32 %r131, i32* %r133
+%r134 = lshr i480 %r130, 32
+%r135 = trunc i480 %r134 to i32
+%r137 = getelementptr i32, i32* %r1, i32 8
+store i32 %r135, i32* %r137
+%r138 = lshr i480 %r134, 32
+%r139 = trunc i480 %r138 to i32
+%r141 = getelementptr i32, i32* %r1, i32 9
+store i32 %r139, i32* %r141
+%r142 = lshr i480 %r138, 32
+%r143 = trunc i480 %r142 to i32
+%r145 = getelementptr i32, i32* %r1, i32 10
+store i32 %r143, i32* %r145
+%r146 = lshr i480 %r142, 32
+%r147 = trunc i480 %r146 to i32
+%r149 = getelementptr i32, i32* %r1, i32 11
+store i32 %r147, i32* %r149
+%r150 = lshr i480 %r146, 32
+%r151 = trunc i480 %r150 to i32
+%r153 = getelementptr i32, i32* %r1, i32 12
+store i32 %r151, i32* %r153
+%r154 = lshr i480 %r150, 32
+%r155 = trunc i480 %r154 to i32
+%r157 = getelementptr i32, i32* %r1, i32 13
+store i32 %r155, i32* %r157
+%r158 = lshr i480 %r154, 32
+%r159 = trunc i480 %r158 to i32
+%r161 = getelementptr i32, i32* %r1, i32 14
+store i32 %r159, i32* %r161
+ret void
+}
+define void @mcl_fp_add15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = load i32, i32* %r3
+%r105 = zext i32 %r104 to i64
+%r107 = getelementptr i32, i32* %r3, i32 1
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i64
+%r110 = shl i64 %r109, 32
+%r111 = or i64 %r105, %r110
+%r112 = zext i64 %r111 to i96
+%r114 = getelementptr i32, i32* %r3, i32 2
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i96
+%r117 = shl i96 %r116, 64
+%r118 = or i96 %r112, %r117
+%r119 = zext i96 %r118 to i128
+%r121 = getelementptr i32, i32* %r3, i32 3
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i128
+%r124 = shl i128 %r123, 96
+%r125 = or i128 %r119, %r124
+%r126 = zext i128 %r125 to i160
+%r128 = getelementptr i32, i32* %r3, i32 4
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i160
+%r131 = shl i160 %r130, 128
+%r132 = or i160 %r126, %r131
+%r133 = zext i160 %r132 to i192
+%r135 = getelementptr i32, i32* %r3, i32 5
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i192
+%r138 = shl i192 %r137, 160
+%r139 = or i192 %r133, %r138
+%r140 = zext i192 %r139 to i224
+%r142 = getelementptr i32, i32* %r3, i32 6
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i224
+%r145 = shl i224 %r144, 192
+%r146 = or i224 %r140, %r145
+%r147 = zext i224 %r146 to i256
+%r149 = getelementptr i32, i32* %r3, i32 7
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i256
+%r152 = shl i256 %r151, 224
+%r153 = or i256 %r147, %r152
+%r154 = zext i256 %r153 to i288
+%r156 = getelementptr i32, i32* %r3, i32 8
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i288
+%r159 = shl i288 %r158, 256
+%r160 = or i288 %r154, %r159
+%r161 = zext i288 %r160 to i320
+%r163 = getelementptr i32, i32* %r3, i32 9
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i320
+%r166 = shl i320 %r165, 288
+%r167 = or i320 %r161, %r166
+%r168 = zext i320 %r167 to i352
+%r170 = getelementptr i32, i32* %r3, i32 10
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i352
+%r173 = shl i352 %r172, 320
+%r174 = or i352 %r168, %r173
+%r175 = zext i352 %r174 to i384
+%r177 = getelementptr i32, i32* %r3, i32 11
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i384
+%r180 = shl i384 %r179, 352
+%r181 = or i384 %r175, %r180
+%r182 = zext i384 %r181 to i416
+%r184 = getelementptr i32, i32* %r3, i32 12
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i416
+%r187 = shl i416 %r186, 384
+%r188 = or i416 %r182, %r187
+%r189 = zext i416 %r188 to i448
+%r191 = getelementptr i32, i32* %r3, i32 13
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i448
+%r194 = shl i448 %r193, 416
+%r195 = or i448 %r189, %r194
+%r196 = zext i448 %r195 to i480
+%r198 = getelementptr i32, i32* %r3, i32 14
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i480
+%r201 = shl i480 %r200, 448
+%r202 = or i480 %r196, %r201
+%r203 = zext i480 %r103 to i512
+%r204 = zext i480 %r202 to i512
+%r205 = add i512 %r203, %r204
+%r206 = trunc i512 %r205 to i480
+%r207 = trunc i480 %r206 to i32
+%r209 = getelementptr i32, i32* %r1, i32 0
+store i32 %r207, i32* %r209
+%r210 = lshr i480 %r206, 32
+%r211 = trunc i480 %r210 to i32
+%r213 = getelementptr i32, i32* %r1, i32 1
+store i32 %r211, i32* %r213
+%r214 = lshr i480 %r210, 32
+%r215 = trunc i480 %r214 to i32
+%r217 = getelementptr i32, i32* %r1, i32 2
+store i32 %r215, i32* %r217
+%r218 = lshr i480 %r214, 32
+%r219 = trunc i480 %r218 to i32
+%r221 = getelementptr i32, i32* %r1, i32 3
+store i32 %r219, i32* %r221
+%r222 = lshr i480 %r218, 32
+%r223 = trunc i480 %r222 to i32
+%r225 = getelementptr i32, i32* %r1, i32 4
+store i32 %r223, i32* %r225
+%r226 = lshr i480 %r222, 32
+%r227 = trunc i480 %r226 to i32
+%r229 = getelementptr i32, i32* %r1, i32 5
+store i32 %r227, i32* %r229
+%r230 = lshr i480 %r226, 32
+%r231 = trunc i480 %r230 to i32
+%r233 = getelementptr i32, i32* %r1, i32 6
+store i32 %r231, i32* %r233
+%r234 = lshr i480 %r230, 32
+%r235 = trunc i480 %r234 to i32
+%r237 = getelementptr i32, i32* %r1, i32 7
+store i32 %r235, i32* %r237
+%r238 = lshr i480 %r234, 32
+%r239 = trunc i480 %r238 to i32
+%r241 = getelementptr i32, i32* %r1, i32 8
+store i32 %r239, i32* %r241
+%r242 = lshr i480 %r238, 32
+%r243 = trunc i480 %r242 to i32
+%r245 = getelementptr i32, i32* %r1, i32 9
+store i32 %r243, i32* %r245
+%r246 = lshr i480 %r242, 32
+%r247 = trunc i480 %r246 to i32
+%r249 = getelementptr i32, i32* %r1, i32 10
+store i32 %r247, i32* %r249
+%r250 = lshr i480 %r246, 32
+%r251 = trunc i480 %r250 to i32
+%r253 = getelementptr i32, i32* %r1, i32 11
+store i32 %r251, i32* %r253
+%r254 = lshr i480 %r250, 32
+%r255 = trunc i480 %r254 to i32
+%r257 = getelementptr i32, i32* %r1, i32 12
+store i32 %r255, i32* %r257
+%r258 = lshr i480 %r254, 32
+%r259 = trunc i480 %r258 to i32
+%r261 = getelementptr i32, i32* %r1, i32 13
+store i32 %r259, i32* %r261
+%r262 = lshr i480 %r258, 32
+%r263 = trunc i480 %r262 to i32
+%r265 = getelementptr i32, i32* %r1, i32 14
+store i32 %r263, i32* %r265
+%r266 = load i32, i32* %r4
+%r267 = zext i32 %r266 to i64
+%r269 = getelementptr i32, i32* %r4, i32 1
+%r270 = load i32, i32* %r269
+%r271 = zext i32 %r270 to i64
+%r272 = shl i64 %r271, 32
+%r273 = or i64 %r267, %r272
+%r274 = zext i64 %r273 to i96
+%r276 = getelementptr i32, i32* %r4, i32 2
+%r277 = load i32, i32* %r276
+%r278 = zext i32 %r277 to i96
+%r279 = shl i96 %r278, 64
+%r280 = or i96 %r274, %r279
+%r281 = zext i96 %r280 to i128
+%r283 = getelementptr i32, i32* %r4, i32 3
+%r284 = load i32, i32* %r283
+%r285 = zext i32 %r284 to i128
+%r286 = shl i128 %r285, 96
+%r287 = or i128 %r281, %r286
+%r288 = zext i128 %r287 to i160
+%r290 = getelementptr i32, i32* %r4, i32 4
+%r291 = load i32, i32* %r290
+%r292 = zext i32 %r291 to i160
+%r293 = shl i160 %r292, 128
+%r294 = or i160 %r288, %r293
+%r295 = zext i160 %r294 to i192
+%r297 = getelementptr i32, i32* %r4, i32 5
+%r298 = load i32, i32* %r297
+%r299 = zext i32 %r298 to i192
+%r300 = shl i192 %r299, 160
+%r301 = or i192 %r295, %r300
+%r302 = zext i192 %r301 to i224
+%r304 = getelementptr i32, i32* %r4, i32 6
+%r305 = load i32, i32* %r304
+%r306 = zext i32 %r305 to i224
+%r307 = shl i224 %r306, 192
+%r308 = or i224 %r302, %r307
+%r309 = zext i224 %r308 to i256
+%r311 = getelementptr i32, i32* %r4, i32 7
+%r312 = load i32, i32* %r311
+%r313 = zext i32 %r312 to i256
+%r314 = shl i256 %r313, 224
+%r315 = or i256 %r309, %r314
+%r316 = zext i256 %r315 to i288
+%r318 = getelementptr i32, i32* %r4, i32 8
+%r319 = load i32, i32* %r318
+%r320 = zext i32 %r319 to i288
+%r321 = shl i288 %r320, 256
+%r322 = or i288 %r316, %r321
+%r323 = zext i288 %r322 to i320
+%r325 = getelementptr i32, i32* %r4, i32 9
+%r326 = load i32, i32* %r325
+%r327 = zext i32 %r326 to i320
+%r328 = shl i320 %r327, 288
+%r329 = or i320 %r323, %r328
+%r330 = zext i320 %r329 to i352
+%r332 = getelementptr i32, i32* %r4, i32 10
+%r333 = load i32, i32* %r332
+%r334 = zext i32 %r333 to i352
+%r335 = shl i352 %r334, 320
+%r336 = or i352 %r330, %r335
+%r337 = zext i352 %r336 to i384
+%r339 = getelementptr i32, i32* %r4, i32 11
+%r340 = load i32, i32* %r339
+%r341 = zext i32 %r340 to i384
+%r342 = shl i384 %r341, 352
+%r343 = or i384 %r337, %r342
+%r344 = zext i384 %r343 to i416
+%r346 = getelementptr i32, i32* %r4, i32 12
+%r347 = load i32, i32* %r346
+%r348 = zext i32 %r347 to i416
+%r349 = shl i416 %r348, 384
+%r350 = or i416 %r344, %r349
+%r351 = zext i416 %r350 to i448
+%r353 = getelementptr i32, i32* %r4, i32 13
+%r354 = load i32, i32* %r353
+%r355 = zext i32 %r354 to i448
+%r356 = shl i448 %r355, 416
+%r357 = or i448 %r351, %r356
+%r358 = zext i448 %r357 to i480
+%r360 = getelementptr i32, i32* %r4, i32 14
+%r361 = load i32, i32* %r360
+%r362 = zext i32 %r361 to i480
+%r363 = shl i480 %r362, 448
+%r364 = or i480 %r358, %r363
+%r365 = zext i480 %r364 to i512
+%r366 = sub i512 %r205, %r365
+%r367 = lshr i512 %r366, 480
+%r368 = trunc i512 %r367 to i1
+br i1%r368, label %carry, label %nocarry
+nocarry:
+%r369 = trunc i512 %r366 to i480
+%r370 = trunc i480 %r369 to i32
+%r372 = getelementptr i32, i32* %r1, i32 0
+store i32 %r370, i32* %r372
+%r373 = lshr i480 %r369, 32
+%r374 = trunc i480 %r373 to i32
+%r376 = getelementptr i32, i32* %r1, i32 1
+store i32 %r374, i32* %r376
+%r377 = lshr i480 %r373, 32
+%r378 = trunc i480 %r377 to i32
+%r380 = getelementptr i32, i32* %r1, i32 2
+store i32 %r378, i32* %r380
+%r381 = lshr i480 %r377, 32
+%r382 = trunc i480 %r381 to i32
+%r384 = getelementptr i32, i32* %r1, i32 3
+store i32 %r382, i32* %r384
+%r385 = lshr i480 %r381, 32
+%r386 = trunc i480 %r385 to i32
+%r388 = getelementptr i32, i32* %r1, i32 4
+store i32 %r386, i32* %r388
+%r389 = lshr i480 %r385, 32
+%r390 = trunc i480 %r389 to i32
+%r392 = getelementptr i32, i32* %r1, i32 5
+store i32 %r390, i32* %r392
+%r393 = lshr i480 %r389, 32
+%r394 = trunc i480 %r393 to i32
+%r396 = getelementptr i32, i32* %r1, i32 6
+store i32 %r394, i32* %r396
+%r397 = lshr i480 %r393, 32
+%r398 = trunc i480 %r397 to i32
+%r400 = getelementptr i32, i32* %r1, i32 7
+store i32 %r398, i32* %r400
+%r401 = lshr i480 %r397, 32
+%r402 = trunc i480 %r401 to i32
+%r404 = getelementptr i32, i32* %r1, i32 8
+store i32 %r402, i32* %r404
+%r405 = lshr i480 %r401, 32
+%r406 = trunc i480 %r405 to i32
+%r408 = getelementptr i32, i32* %r1, i32 9
+store i32 %r406, i32* %r408
+%r409 = lshr i480 %r405, 32
+%r410 = trunc i480 %r409 to i32
+%r412 = getelementptr i32, i32* %r1, i32 10
+store i32 %r410, i32* %r412
+%r413 = lshr i480 %r409, 32
+%r414 = trunc i480 %r413 to i32
+%r416 = getelementptr i32, i32* %r1, i32 11
+store i32 %r414, i32* %r416
+%r417 = lshr i480 %r413, 32
+%r418 = trunc i480 %r417 to i32
+%r420 = getelementptr i32, i32* %r1, i32 12
+store i32 %r418, i32* %r420
+%r421 = lshr i480 %r417, 32
+%r422 = trunc i480 %r421 to i32
+%r424 = getelementptr i32, i32* %r1, i32 13
+store i32 %r422, i32* %r424
+%r425 = lshr i480 %r421, 32
+%r426 = trunc i480 %r425 to i32
+%r428 = getelementptr i32, i32* %r1, i32 14
+store i32 %r426, i32* %r428
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = load i32, i32* %r3
+%r105 = zext i32 %r104 to i64
+%r107 = getelementptr i32, i32* %r3, i32 1
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i64
+%r110 = shl i64 %r109, 32
+%r111 = or i64 %r105, %r110
+%r112 = zext i64 %r111 to i96
+%r114 = getelementptr i32, i32* %r3, i32 2
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i96
+%r117 = shl i96 %r116, 64
+%r118 = or i96 %r112, %r117
+%r119 = zext i96 %r118 to i128
+%r121 = getelementptr i32, i32* %r3, i32 3
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i128
+%r124 = shl i128 %r123, 96
+%r125 = or i128 %r119, %r124
+%r126 = zext i128 %r125 to i160
+%r128 = getelementptr i32, i32* %r3, i32 4
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i160
+%r131 = shl i160 %r130, 128
+%r132 = or i160 %r126, %r131
+%r133 = zext i160 %r132 to i192
+%r135 = getelementptr i32, i32* %r3, i32 5
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i192
+%r138 = shl i192 %r137, 160
+%r139 = or i192 %r133, %r138
+%r140 = zext i192 %r139 to i224
+%r142 = getelementptr i32, i32* %r3, i32 6
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i224
+%r145 = shl i224 %r144, 192
+%r146 = or i224 %r140, %r145
+%r147 = zext i224 %r146 to i256
+%r149 = getelementptr i32, i32* %r3, i32 7
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i256
+%r152 = shl i256 %r151, 224
+%r153 = or i256 %r147, %r152
+%r154 = zext i256 %r153 to i288
+%r156 = getelementptr i32, i32* %r3, i32 8
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i288
+%r159 = shl i288 %r158, 256
+%r160 = or i288 %r154, %r159
+%r161 = zext i288 %r160 to i320
+%r163 = getelementptr i32, i32* %r3, i32 9
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i320
+%r166 = shl i320 %r165, 288
+%r167 = or i320 %r161, %r166
+%r168 = zext i320 %r167 to i352
+%r170 = getelementptr i32, i32* %r3, i32 10
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i352
+%r173 = shl i352 %r172, 320
+%r174 = or i352 %r168, %r173
+%r175 = zext i352 %r174 to i384
+%r177 = getelementptr i32, i32* %r3, i32 11
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i384
+%r180 = shl i384 %r179, 352
+%r181 = or i384 %r175, %r180
+%r182 = zext i384 %r181 to i416
+%r184 = getelementptr i32, i32* %r3, i32 12
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i416
+%r187 = shl i416 %r186, 384
+%r188 = or i416 %r182, %r187
+%r189 = zext i416 %r188 to i448
+%r191 = getelementptr i32, i32* %r3, i32 13
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i448
+%r194 = shl i448 %r193, 416
+%r195 = or i448 %r189, %r194
+%r196 = zext i448 %r195 to i480
+%r198 = getelementptr i32, i32* %r3, i32 14
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i480
+%r201 = shl i480 %r200, 448
+%r202 = or i480 %r196, %r201
+%r203 = add i480 %r103, %r202
+%r204 = load i32, i32* %r4
+%r205 = zext i32 %r204 to i64
+%r207 = getelementptr i32, i32* %r4, i32 1
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i64
+%r210 = shl i64 %r209, 32
+%r211 = or i64 %r205, %r210
+%r212 = zext i64 %r211 to i96
+%r214 = getelementptr i32, i32* %r4, i32 2
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i96
+%r217 = shl i96 %r216, 64
+%r218 = or i96 %r212, %r217
+%r219 = zext i96 %r218 to i128
+%r221 = getelementptr i32, i32* %r4, i32 3
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i128
+%r224 = shl i128 %r223, 96
+%r225 = or i128 %r219, %r224
+%r226 = zext i128 %r225 to i160
+%r228 = getelementptr i32, i32* %r4, i32 4
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i160
+%r231 = shl i160 %r230, 128
+%r232 = or i160 %r226, %r231
+%r233 = zext i160 %r232 to i192
+%r235 = getelementptr i32, i32* %r4, i32 5
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i192
+%r238 = shl i192 %r237, 160
+%r239 = or i192 %r233, %r238
+%r240 = zext i192 %r239 to i224
+%r242 = getelementptr i32, i32* %r4, i32 6
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i224
+%r245 = shl i224 %r244, 192
+%r246 = or i224 %r240, %r245
+%r247 = zext i224 %r246 to i256
+%r249 = getelementptr i32, i32* %r4, i32 7
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i256
+%r252 = shl i256 %r251, 224
+%r253 = or i256 %r247, %r252
+%r254 = zext i256 %r253 to i288
+%r256 = getelementptr i32, i32* %r4, i32 8
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i288
+%r259 = shl i288 %r258, 256
+%r260 = or i288 %r254, %r259
+%r261 = zext i288 %r260 to i320
+%r263 = getelementptr i32, i32* %r4, i32 9
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i320
+%r266 = shl i320 %r265, 288
+%r267 = or i320 %r261, %r266
+%r268 = zext i320 %r267 to i352
+%r270 = getelementptr i32, i32* %r4, i32 10
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i352
+%r273 = shl i352 %r272, 320
+%r274 = or i352 %r268, %r273
+%r275 = zext i352 %r274 to i384
+%r277 = getelementptr i32, i32* %r4, i32 11
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i384
+%r280 = shl i384 %r279, 352
+%r281 = or i384 %r275, %r280
+%r282 = zext i384 %r281 to i416
+%r284 = getelementptr i32, i32* %r4, i32 12
+%r285 = load i32, i32* %r284
+%r286 = zext i32 %r285 to i416
+%r287 = shl i416 %r286, 384
+%r288 = or i416 %r282, %r287
+%r289 = zext i416 %r288 to i448
+%r291 = getelementptr i32, i32* %r4, i32 13
+%r292 = load i32, i32* %r291
+%r293 = zext i32 %r292 to i448
+%r294 = shl i448 %r293, 416
+%r295 = or i448 %r289, %r294
+%r296 = zext i448 %r295 to i480
+%r298 = getelementptr i32, i32* %r4, i32 14
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i480
+%r301 = shl i480 %r300, 448
+%r302 = or i480 %r296, %r301
+%r303 = sub i480 %r203, %r302
+%r304 = lshr i480 %r303, 479
+%r305 = trunc i480 %r304 to i1
+%r306 = select i1 %r305, i480 %r203, i480 %r303
+%r307 = trunc i480 %r306 to i32
+%r309 = getelementptr i32, i32* %r1, i32 0
+store i32 %r307, i32* %r309
+%r310 = lshr i480 %r306, 32
+%r311 = trunc i480 %r310 to i32
+%r313 = getelementptr i32, i32* %r1, i32 1
+store i32 %r311, i32* %r313
+%r314 = lshr i480 %r310, 32
+%r315 = trunc i480 %r314 to i32
+%r317 = getelementptr i32, i32* %r1, i32 2
+store i32 %r315, i32* %r317
+%r318 = lshr i480 %r314, 32
+%r319 = trunc i480 %r318 to i32
+%r321 = getelementptr i32, i32* %r1, i32 3
+store i32 %r319, i32* %r321
+%r322 = lshr i480 %r318, 32
+%r323 = trunc i480 %r322 to i32
+%r325 = getelementptr i32, i32* %r1, i32 4
+store i32 %r323, i32* %r325
+%r326 = lshr i480 %r322, 32
+%r327 = trunc i480 %r326 to i32
+%r329 = getelementptr i32, i32* %r1, i32 5
+store i32 %r327, i32* %r329
+%r330 = lshr i480 %r326, 32
+%r331 = trunc i480 %r330 to i32
+%r333 = getelementptr i32, i32* %r1, i32 6
+store i32 %r331, i32* %r333
+%r334 = lshr i480 %r330, 32
+%r335 = trunc i480 %r334 to i32
+%r337 = getelementptr i32, i32* %r1, i32 7
+store i32 %r335, i32* %r337
+%r338 = lshr i480 %r334, 32
+%r339 = trunc i480 %r338 to i32
+%r341 = getelementptr i32, i32* %r1, i32 8
+store i32 %r339, i32* %r341
+%r342 = lshr i480 %r338, 32
+%r343 = trunc i480 %r342 to i32
+%r345 = getelementptr i32, i32* %r1, i32 9
+store i32 %r343, i32* %r345
+%r346 = lshr i480 %r342, 32
+%r347 = trunc i480 %r346 to i32
+%r349 = getelementptr i32, i32* %r1, i32 10
+store i32 %r347, i32* %r349
+%r350 = lshr i480 %r346, 32
+%r351 = trunc i480 %r350 to i32
+%r353 = getelementptr i32, i32* %r1, i32 11
+store i32 %r351, i32* %r353
+%r354 = lshr i480 %r350, 32
+%r355 = trunc i480 %r354 to i32
+%r357 = getelementptr i32, i32* %r1, i32 12
+store i32 %r355, i32* %r357
+%r358 = lshr i480 %r354, 32
+%r359 = trunc i480 %r358 to i32
+%r361 = getelementptr i32, i32* %r1, i32 13
+store i32 %r359, i32* %r361
+%r362 = lshr i480 %r358, 32
+%r363 = trunc i480 %r362 to i32
+%r365 = getelementptr i32, i32* %r1, i32 14
+store i32 %r363, i32* %r365
+ret void
+}
+define void @mcl_fp_sub15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = load i32, i32* %r3
+%r105 = zext i32 %r104 to i64
+%r107 = getelementptr i32, i32* %r3, i32 1
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i64
+%r110 = shl i64 %r109, 32
+%r111 = or i64 %r105, %r110
+%r112 = zext i64 %r111 to i96
+%r114 = getelementptr i32, i32* %r3, i32 2
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i96
+%r117 = shl i96 %r116, 64
+%r118 = or i96 %r112, %r117
+%r119 = zext i96 %r118 to i128
+%r121 = getelementptr i32, i32* %r3, i32 3
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i128
+%r124 = shl i128 %r123, 96
+%r125 = or i128 %r119, %r124
+%r126 = zext i128 %r125 to i160
+%r128 = getelementptr i32, i32* %r3, i32 4
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i160
+%r131 = shl i160 %r130, 128
+%r132 = or i160 %r126, %r131
+%r133 = zext i160 %r132 to i192
+%r135 = getelementptr i32, i32* %r3, i32 5
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i192
+%r138 = shl i192 %r137, 160
+%r139 = or i192 %r133, %r138
+%r140 = zext i192 %r139 to i224
+%r142 = getelementptr i32, i32* %r3, i32 6
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i224
+%r145 = shl i224 %r144, 192
+%r146 = or i224 %r140, %r145
+%r147 = zext i224 %r146 to i256
+%r149 = getelementptr i32, i32* %r3, i32 7
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i256
+%r152 = shl i256 %r151, 224
+%r153 = or i256 %r147, %r152
+%r154 = zext i256 %r153 to i288
+%r156 = getelementptr i32, i32* %r3, i32 8
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i288
+%r159 = shl i288 %r158, 256
+%r160 = or i288 %r154, %r159
+%r161 = zext i288 %r160 to i320
+%r163 = getelementptr i32, i32* %r3, i32 9
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i320
+%r166 = shl i320 %r165, 288
+%r167 = or i320 %r161, %r166
+%r168 = zext i320 %r167 to i352
+%r170 = getelementptr i32, i32* %r3, i32 10
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i352
+%r173 = shl i352 %r172, 320
+%r174 = or i352 %r168, %r173
+%r175 = zext i352 %r174 to i384
+%r177 = getelementptr i32, i32* %r3, i32 11
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i384
+%r180 = shl i384 %r179, 352
+%r181 = or i384 %r175, %r180
+%r182 = zext i384 %r181 to i416
+%r184 = getelementptr i32, i32* %r3, i32 12
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i416
+%r187 = shl i416 %r186, 384
+%r188 = or i416 %r182, %r187
+%r189 = zext i416 %r188 to i448
+%r191 = getelementptr i32, i32* %r3, i32 13
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i448
+%r194 = shl i448 %r193, 416
+%r195 = or i448 %r189, %r194
+%r196 = zext i448 %r195 to i480
+%r198 = getelementptr i32, i32* %r3, i32 14
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i480
+%r201 = shl i480 %r200, 448
+%r202 = or i480 %r196, %r201
+%r203 = zext i480 %r103 to i512
+%r204 = zext i480 %r202 to i512
+%r205 = sub i512 %r203, %r204
+%r206 = trunc i512 %r205 to i480
+%r207 = lshr i512 %r205, 480
+%r208 = trunc i512 %r207 to i1
+%r209 = trunc i480 %r206 to i32
+%r211 = getelementptr i32, i32* %r1, i32 0
+store i32 %r209, i32* %r211
+%r212 = lshr i480 %r206, 32
+%r213 = trunc i480 %r212 to i32
+%r215 = getelementptr i32, i32* %r1, i32 1
+store i32 %r213, i32* %r215
+%r216 = lshr i480 %r212, 32
+%r217 = trunc i480 %r216 to i32
+%r219 = getelementptr i32, i32* %r1, i32 2
+store i32 %r217, i32* %r219
+%r220 = lshr i480 %r216, 32
+%r221 = trunc i480 %r220 to i32
+%r223 = getelementptr i32, i32* %r1, i32 3
+store i32 %r221, i32* %r223
+%r224 = lshr i480 %r220, 32
+%r225 = trunc i480 %r224 to i32
+%r227 = getelementptr i32, i32* %r1, i32 4
+store i32 %r225, i32* %r227
+%r228 = lshr i480 %r224, 32
+%r229 = trunc i480 %r228 to i32
+%r231 = getelementptr i32, i32* %r1, i32 5
+store i32 %r229, i32* %r231
+%r232 = lshr i480 %r228, 32
+%r233 = trunc i480 %r232 to i32
+%r235 = getelementptr i32, i32* %r1, i32 6
+store i32 %r233, i32* %r235
+%r236 = lshr i480 %r232, 32
+%r237 = trunc i480 %r236 to i32
+%r239 = getelementptr i32, i32* %r1, i32 7
+store i32 %r237, i32* %r239
+%r240 = lshr i480 %r236, 32
+%r241 = trunc i480 %r240 to i32
+%r243 = getelementptr i32, i32* %r1, i32 8
+store i32 %r241, i32* %r243
+%r244 = lshr i480 %r240, 32
+%r245 = trunc i480 %r244 to i32
+%r247 = getelementptr i32, i32* %r1, i32 9
+store i32 %r245, i32* %r247
+%r248 = lshr i480 %r244, 32
+%r249 = trunc i480 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 10
+store i32 %r249, i32* %r251
+%r252 = lshr i480 %r248, 32
+%r253 = trunc i480 %r252 to i32
+%r255 = getelementptr i32, i32* %r1, i32 11
+store i32 %r253, i32* %r255
+%r256 = lshr i480 %r252, 32
+%r257 = trunc i480 %r256 to i32
+%r259 = getelementptr i32, i32* %r1, i32 12
+store i32 %r257, i32* %r259
+%r260 = lshr i480 %r256, 32
+%r261 = trunc i480 %r260 to i32
+%r263 = getelementptr i32, i32* %r1, i32 13
+store i32 %r261, i32* %r263
+%r264 = lshr i480 %r260, 32
+%r265 = trunc i480 %r264 to i32
+%r267 = getelementptr i32, i32* %r1, i32 14
+store i32 %r265, i32* %r267
+br i1%r208, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r268 = load i32, i32* %r4
+%r269 = zext i32 %r268 to i64
+%r271 = getelementptr i32, i32* %r4, i32 1
+%r272 = load i32, i32* %r271
+%r273 = zext i32 %r272 to i64
+%r274 = shl i64 %r273, 32
+%r275 = or i64 %r269, %r274
+%r276 = zext i64 %r275 to i96
+%r278 = getelementptr i32, i32* %r4, i32 2
+%r279 = load i32, i32* %r278
+%r280 = zext i32 %r279 to i96
+%r281 = shl i96 %r280, 64
+%r282 = or i96 %r276, %r281
+%r283 = zext i96 %r282 to i128
+%r285 = getelementptr i32, i32* %r4, i32 3
+%r286 = load i32, i32* %r285
+%r287 = zext i32 %r286 to i128
+%r288 = shl i128 %r287, 96
+%r289 = or i128 %r283, %r288
+%r290 = zext i128 %r289 to i160
+%r292 = getelementptr i32, i32* %r4, i32 4
+%r293 = load i32, i32* %r292
+%r294 = zext i32 %r293 to i160
+%r295 = shl i160 %r294, 128
+%r296 = or i160 %r290, %r295
+%r297 = zext i160 %r296 to i192
+%r299 = getelementptr i32, i32* %r4, i32 5
+%r300 = load i32, i32* %r299
+%r301 = zext i32 %r300 to i192
+%r302 = shl i192 %r301, 160
+%r303 = or i192 %r297, %r302
+%r304 = zext i192 %r303 to i224
+%r306 = getelementptr i32, i32* %r4, i32 6
+%r307 = load i32, i32* %r306
+%r308 = zext i32 %r307 to i224
+%r309 = shl i224 %r308, 192
+%r310 = or i224 %r304, %r309
+%r311 = zext i224 %r310 to i256
+%r313 = getelementptr i32, i32* %r4, i32 7
+%r314 = load i32, i32* %r313
+%r315 = zext i32 %r314 to i256
+%r316 = shl i256 %r315, 224
+%r317 = or i256 %r311, %r316
+%r318 = zext i256 %r317 to i288
+%r320 = getelementptr i32, i32* %r4, i32 8
+%r321 = load i32, i32* %r320
+%r322 = zext i32 %r321 to i288
+%r323 = shl i288 %r322, 256
+%r324 = or i288 %r318, %r323
+%r325 = zext i288 %r324 to i320
+%r327 = getelementptr i32, i32* %r4, i32 9
+%r328 = load i32, i32* %r327
+%r329 = zext i32 %r328 to i320
+%r330 = shl i320 %r329, 288
+%r331 = or i320 %r325, %r330
+%r332 = zext i320 %r331 to i352
+%r334 = getelementptr i32, i32* %r4, i32 10
+%r335 = load i32, i32* %r334
+%r336 = zext i32 %r335 to i352
+%r337 = shl i352 %r336, 320
+%r338 = or i352 %r332, %r337
+%r339 = zext i352 %r338 to i384
+%r341 = getelementptr i32, i32* %r4, i32 11
+%r342 = load i32, i32* %r341
+%r343 = zext i32 %r342 to i384
+%r344 = shl i384 %r343, 352
+%r345 = or i384 %r339, %r344
+%r346 = zext i384 %r345 to i416
+%r348 = getelementptr i32, i32* %r4, i32 12
+%r349 = load i32, i32* %r348
+%r350 = zext i32 %r349 to i416
+%r351 = shl i416 %r350, 384
+%r352 = or i416 %r346, %r351
+%r353 = zext i416 %r352 to i448
+%r355 = getelementptr i32, i32* %r4, i32 13
+%r356 = load i32, i32* %r355
+%r357 = zext i32 %r356 to i448
+%r358 = shl i448 %r357, 416
+%r359 = or i448 %r353, %r358
+%r360 = zext i448 %r359 to i480
+%r362 = getelementptr i32, i32* %r4, i32 14
+%r363 = load i32, i32* %r362
+%r364 = zext i32 %r363 to i480
+%r365 = shl i480 %r364, 448
+%r366 = or i480 %r360, %r365
+%r367 = add i480 %r206, %r366
+%r368 = trunc i480 %r367 to i32
+%r370 = getelementptr i32, i32* %r1, i32 0
+store i32 %r368, i32* %r370
+%r371 = lshr i480 %r367, 32
+%r372 = trunc i480 %r371 to i32
+%r374 = getelementptr i32, i32* %r1, i32 1
+store i32 %r372, i32* %r374
+%r375 = lshr i480 %r371, 32
+%r376 = trunc i480 %r375 to i32
+%r378 = getelementptr i32, i32* %r1, i32 2
+store i32 %r376, i32* %r378
+%r379 = lshr i480 %r375, 32
+%r380 = trunc i480 %r379 to i32
+%r382 = getelementptr i32, i32* %r1, i32 3
+store i32 %r380, i32* %r382
+%r383 = lshr i480 %r379, 32
+%r384 = trunc i480 %r383 to i32
+%r386 = getelementptr i32, i32* %r1, i32 4
+store i32 %r384, i32* %r386
+%r387 = lshr i480 %r383, 32
+%r388 = trunc i480 %r387 to i32
+%r390 = getelementptr i32, i32* %r1, i32 5
+store i32 %r388, i32* %r390
+%r391 = lshr i480 %r387, 32
+%r392 = trunc i480 %r391 to i32
+%r394 = getelementptr i32, i32* %r1, i32 6
+store i32 %r392, i32* %r394
+%r395 = lshr i480 %r391, 32
+%r396 = trunc i480 %r395 to i32
+%r398 = getelementptr i32, i32* %r1, i32 7
+store i32 %r396, i32* %r398
+%r399 = lshr i480 %r395, 32
+%r400 = trunc i480 %r399 to i32
+%r402 = getelementptr i32, i32* %r1, i32 8
+store i32 %r400, i32* %r402
+%r403 = lshr i480 %r399, 32
+%r404 = trunc i480 %r403 to i32
+%r406 = getelementptr i32, i32* %r1, i32 9
+store i32 %r404, i32* %r406
+%r407 = lshr i480 %r403, 32
+%r408 = trunc i480 %r407 to i32
+%r410 = getelementptr i32, i32* %r1, i32 10
+store i32 %r408, i32* %r410
+%r411 = lshr i480 %r407, 32
+%r412 = trunc i480 %r411 to i32
+%r414 = getelementptr i32, i32* %r1, i32 11
+store i32 %r412, i32* %r414
+%r415 = lshr i480 %r411, 32
+%r416 = trunc i480 %r415 to i32
+%r418 = getelementptr i32, i32* %r1, i32 12
+store i32 %r416, i32* %r418
+%r419 = lshr i480 %r415, 32
+%r420 = trunc i480 %r419 to i32
+%r422 = getelementptr i32, i32* %r1, i32 13
+store i32 %r420, i32* %r422
+%r423 = lshr i480 %r419, 32
+%r424 = trunc i480 %r423 to i32
+%r426 = getelementptr i32, i32* %r1, i32 14
+store i32 %r424, i32* %r426
+ret void
+}
+define void @mcl_fp_subNF15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = load i32, i32* %r3
+%r105 = zext i32 %r104 to i64
+%r107 = getelementptr i32, i32* %r3, i32 1
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i64
+%r110 = shl i64 %r109, 32
+%r111 = or i64 %r105, %r110
+%r112 = zext i64 %r111 to i96
+%r114 = getelementptr i32, i32* %r3, i32 2
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i96
+%r117 = shl i96 %r116, 64
+%r118 = or i96 %r112, %r117
+%r119 = zext i96 %r118 to i128
+%r121 = getelementptr i32, i32* %r3, i32 3
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i128
+%r124 = shl i128 %r123, 96
+%r125 = or i128 %r119, %r124
+%r126 = zext i128 %r125 to i160
+%r128 = getelementptr i32, i32* %r3, i32 4
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i160
+%r131 = shl i160 %r130, 128
+%r132 = or i160 %r126, %r131
+%r133 = zext i160 %r132 to i192
+%r135 = getelementptr i32, i32* %r3, i32 5
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i192
+%r138 = shl i192 %r137, 160
+%r139 = or i192 %r133, %r138
+%r140 = zext i192 %r139 to i224
+%r142 = getelementptr i32, i32* %r3, i32 6
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i224
+%r145 = shl i224 %r144, 192
+%r146 = or i224 %r140, %r145
+%r147 = zext i224 %r146 to i256
+%r149 = getelementptr i32, i32* %r3, i32 7
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i256
+%r152 = shl i256 %r151, 224
+%r153 = or i256 %r147, %r152
+%r154 = zext i256 %r153 to i288
+%r156 = getelementptr i32, i32* %r3, i32 8
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i288
+%r159 = shl i288 %r158, 256
+%r160 = or i288 %r154, %r159
+%r161 = zext i288 %r160 to i320
+%r163 = getelementptr i32, i32* %r3, i32 9
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i320
+%r166 = shl i320 %r165, 288
+%r167 = or i320 %r161, %r166
+%r168 = zext i320 %r167 to i352
+%r170 = getelementptr i32, i32* %r3, i32 10
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i352
+%r173 = shl i352 %r172, 320
+%r174 = or i352 %r168, %r173
+%r175 = zext i352 %r174 to i384
+%r177 = getelementptr i32, i32* %r3, i32 11
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i384
+%r180 = shl i384 %r179, 352
+%r181 = or i384 %r175, %r180
+%r182 = zext i384 %r181 to i416
+%r184 = getelementptr i32, i32* %r3, i32 12
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i416
+%r187 = shl i416 %r186, 384
+%r188 = or i416 %r182, %r187
+%r189 = zext i416 %r188 to i448
+%r191 = getelementptr i32, i32* %r3, i32 13
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i448
+%r194 = shl i448 %r193, 416
+%r195 = or i448 %r189, %r194
+%r196 = zext i448 %r195 to i480
+%r198 = getelementptr i32, i32* %r3, i32 14
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i480
+%r201 = shl i480 %r200, 448
+%r202 = or i480 %r196, %r201
+%r203 = sub i480 %r103, %r202
+%r204 = lshr i480 %r203, 479
+%r205 = trunc i480 %r204 to i1
+%r206 = load i32, i32* %r4
+%r207 = zext i32 %r206 to i64
+%r209 = getelementptr i32, i32* %r4, i32 1
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i64
+%r212 = shl i64 %r211, 32
+%r213 = or i64 %r207, %r212
+%r214 = zext i64 %r213 to i96
+%r216 = getelementptr i32, i32* %r4, i32 2
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i96
+%r219 = shl i96 %r218, 64
+%r220 = or i96 %r214, %r219
+%r221 = zext i96 %r220 to i128
+%r223 = getelementptr i32, i32* %r4, i32 3
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i128
+%r226 = shl i128 %r225, 96
+%r227 = or i128 %r221, %r226
+%r228 = zext i128 %r227 to i160
+%r230 = getelementptr i32, i32* %r4, i32 4
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i160
+%r233 = shl i160 %r232, 128
+%r234 = or i160 %r228, %r233
+%r235 = zext i160 %r234 to i192
+%r237 = getelementptr i32, i32* %r4, i32 5
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i192
+%r240 = shl i192 %r239, 160
+%r241 = or i192 %r235, %r240
+%r242 = zext i192 %r241 to i224
+%r244 = getelementptr i32, i32* %r4, i32 6
+%r245 = load i32, i32* %r244
+%r246 = zext i32 %r245 to i224
+%r247 = shl i224 %r246, 192
+%r248 = or i224 %r242, %r247
+%r249 = zext i224 %r248 to i256
+%r251 = getelementptr i32, i32* %r4, i32 7
+%r252 = load i32, i32* %r251
+%r253 = zext i32 %r252 to i256
+%r254 = shl i256 %r253, 224
+%r255 = or i256 %r249, %r254
+%r256 = zext i256 %r255 to i288
+%r258 = getelementptr i32, i32* %r4, i32 8
+%r259 = load i32, i32* %r258
+%r260 = zext i32 %r259 to i288
+%r261 = shl i288 %r260, 256
+%r262 = or i288 %r256, %r261
+%r263 = zext i288 %r262 to i320
+%r265 = getelementptr i32, i32* %r4, i32 9
+%r266 = load i32, i32* %r265
+%r267 = zext i32 %r266 to i320
+%r268 = shl i320 %r267, 288
+%r269 = or i320 %r263, %r268
+%r270 = zext i320 %r269 to i352
+%r272 = getelementptr i32, i32* %r4, i32 10
+%r273 = load i32, i32* %r272
+%r274 = zext i32 %r273 to i352
+%r275 = shl i352 %r274, 320
+%r276 = or i352 %r270, %r275
+%r277 = zext i352 %r276 to i384
+%r279 = getelementptr i32, i32* %r4, i32 11
+%r280 = load i32, i32* %r279
+%r281 = zext i32 %r280 to i384
+%r282 = shl i384 %r281, 352
+%r283 = or i384 %r277, %r282
+%r284 = zext i384 %r283 to i416
+%r286 = getelementptr i32, i32* %r4, i32 12
+%r287 = load i32, i32* %r286
+%r288 = zext i32 %r287 to i416
+%r289 = shl i416 %r288, 384
+%r290 = or i416 %r284, %r289
+%r291 = zext i416 %r290 to i448
+%r293 = getelementptr i32, i32* %r4, i32 13
+%r294 = load i32, i32* %r293
+%r295 = zext i32 %r294 to i448
+%r296 = shl i448 %r295, 416
+%r297 = or i448 %r291, %r296
+%r298 = zext i448 %r297 to i480
+%r300 = getelementptr i32, i32* %r4, i32 14
+%r301 = load i32, i32* %r300
+%r302 = zext i32 %r301 to i480
+%r303 = shl i480 %r302, 448
+%r304 = or i480 %r298, %r303
+%r306 = select i1 %r205, i480 %r304, i480 0
+%r307 = add i480 %r203, %r306
+%r308 = trunc i480 %r307 to i32
+%r310 = getelementptr i32, i32* %r1, i32 0
+store i32 %r308, i32* %r310
+%r311 = lshr i480 %r307, 32
+%r312 = trunc i480 %r311 to i32
+%r314 = getelementptr i32, i32* %r1, i32 1
+store i32 %r312, i32* %r314
+%r315 = lshr i480 %r311, 32
+%r316 = trunc i480 %r315 to i32
+%r318 = getelementptr i32, i32* %r1, i32 2
+store i32 %r316, i32* %r318
+%r319 = lshr i480 %r315, 32
+%r320 = trunc i480 %r319 to i32
+%r322 = getelementptr i32, i32* %r1, i32 3
+store i32 %r320, i32* %r322
+%r323 = lshr i480 %r319, 32
+%r324 = trunc i480 %r323 to i32
+%r326 = getelementptr i32, i32* %r1, i32 4
+store i32 %r324, i32* %r326
+%r327 = lshr i480 %r323, 32
+%r328 = trunc i480 %r327 to i32
+%r330 = getelementptr i32, i32* %r1, i32 5
+store i32 %r328, i32* %r330
+%r331 = lshr i480 %r327, 32
+%r332 = trunc i480 %r331 to i32
+%r334 = getelementptr i32, i32* %r1, i32 6
+store i32 %r332, i32* %r334
+%r335 = lshr i480 %r331, 32
+%r336 = trunc i480 %r335 to i32
+%r338 = getelementptr i32, i32* %r1, i32 7
+store i32 %r336, i32* %r338
+%r339 = lshr i480 %r335, 32
+%r340 = trunc i480 %r339 to i32
+%r342 = getelementptr i32, i32* %r1, i32 8
+store i32 %r340, i32* %r342
+%r343 = lshr i480 %r339, 32
+%r344 = trunc i480 %r343 to i32
+%r346 = getelementptr i32, i32* %r1, i32 9
+store i32 %r344, i32* %r346
+%r347 = lshr i480 %r343, 32
+%r348 = trunc i480 %r347 to i32
+%r350 = getelementptr i32, i32* %r1, i32 10
+store i32 %r348, i32* %r350
+%r351 = lshr i480 %r347, 32
+%r352 = trunc i480 %r351 to i32
+%r354 = getelementptr i32, i32* %r1, i32 11
+store i32 %r352, i32* %r354
+%r355 = lshr i480 %r351, 32
+%r356 = trunc i480 %r355 to i32
+%r358 = getelementptr i32, i32* %r1, i32 12
+store i32 %r356, i32* %r358
+%r359 = lshr i480 %r355, 32
+%r360 = trunc i480 %r359 to i32
+%r362 = getelementptr i32, i32* %r1, i32 13
+store i32 %r360, i32* %r362
+%r363 = lshr i480 %r359, 32
+%r364 = trunc i480 %r363 to i32
+%r366 = getelementptr i32, i32* %r1, i32 14
+store i32 %r364, i32* %r366
+ret void
+}
+define void @mcl_fpDbl_add15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = zext i768 %r166 to i800
+%r169 = getelementptr i32, i32* %r2, i32 24
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i800
+%r172 = shl i800 %r171, 768
+%r173 = or i800 %r167, %r172
+%r174 = zext i800 %r173 to i832
+%r176 = getelementptr i32, i32* %r2, i32 25
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i832
+%r179 = shl i832 %r178, 800
+%r180 = or i832 %r174, %r179
+%r181 = zext i832 %r180 to i864
+%r183 = getelementptr i32, i32* %r2, i32 26
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i864
+%r186 = shl i864 %r185, 832
+%r187 = or i864 %r181, %r186
+%r188 = zext i864 %r187 to i896
+%r190 = getelementptr i32, i32* %r2, i32 27
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i896
+%r193 = shl i896 %r192, 864
+%r194 = or i896 %r188, %r193
+%r195 = zext i896 %r194 to i928
+%r197 = getelementptr i32, i32* %r2, i32 28
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i928
+%r200 = shl i928 %r199, 896
+%r201 = or i928 %r195, %r200
+%r202 = zext i928 %r201 to i960
+%r204 = getelementptr i32, i32* %r2, i32 29
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i960
+%r207 = shl i960 %r206, 928
+%r208 = or i960 %r202, %r207
+%r209 = load i32, i32* %r3
+%r210 = zext i32 %r209 to i64
+%r212 = getelementptr i32, i32* %r3, i32 1
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i64
+%r215 = shl i64 %r214, 32
+%r216 = or i64 %r210, %r215
+%r217 = zext i64 %r216 to i96
+%r219 = getelementptr i32, i32* %r3, i32 2
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i96
+%r222 = shl i96 %r221, 64
+%r223 = or i96 %r217, %r222
+%r224 = zext i96 %r223 to i128
+%r226 = getelementptr i32, i32* %r3, i32 3
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i128
+%r229 = shl i128 %r228, 96
+%r230 = or i128 %r224, %r229
+%r231 = zext i128 %r230 to i160
+%r233 = getelementptr i32, i32* %r3, i32 4
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i160
+%r236 = shl i160 %r235, 128
+%r237 = or i160 %r231, %r236
+%r238 = zext i160 %r237 to i192
+%r240 = getelementptr i32, i32* %r3, i32 5
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i192
+%r243 = shl i192 %r242, 160
+%r244 = or i192 %r238, %r243
+%r245 = zext i192 %r244 to i224
+%r247 = getelementptr i32, i32* %r3, i32 6
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i224
+%r250 = shl i224 %r249, 192
+%r251 = or i224 %r245, %r250
+%r252 = zext i224 %r251 to i256
+%r254 = getelementptr i32, i32* %r3, i32 7
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i256
+%r257 = shl i256 %r256, 224
+%r258 = or i256 %r252, %r257
+%r259 = zext i256 %r258 to i288
+%r261 = getelementptr i32, i32* %r3, i32 8
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i288
+%r264 = shl i288 %r263, 256
+%r265 = or i288 %r259, %r264
+%r266 = zext i288 %r265 to i320
+%r268 = getelementptr i32, i32* %r3, i32 9
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i320
+%r271 = shl i320 %r270, 288
+%r272 = or i320 %r266, %r271
+%r273 = zext i320 %r272 to i352
+%r275 = getelementptr i32, i32* %r3, i32 10
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i352
+%r278 = shl i352 %r277, 320
+%r279 = or i352 %r273, %r278
+%r280 = zext i352 %r279 to i384
+%r282 = getelementptr i32, i32* %r3, i32 11
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i384
+%r285 = shl i384 %r284, 352
+%r286 = or i384 %r280, %r285
+%r287 = zext i384 %r286 to i416
+%r289 = getelementptr i32, i32* %r3, i32 12
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i416
+%r292 = shl i416 %r291, 384
+%r293 = or i416 %r287, %r292
+%r294 = zext i416 %r293 to i448
+%r296 = getelementptr i32, i32* %r3, i32 13
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i448
+%r299 = shl i448 %r298, 416
+%r300 = or i448 %r294, %r299
+%r301 = zext i448 %r300 to i480
+%r303 = getelementptr i32, i32* %r3, i32 14
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i480
+%r306 = shl i480 %r305, 448
+%r307 = or i480 %r301, %r306
+%r308 = zext i480 %r307 to i512
+%r310 = getelementptr i32, i32* %r3, i32 15
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i512
+%r313 = shl i512 %r312, 480
+%r314 = or i512 %r308, %r313
+%r315 = zext i512 %r314 to i544
+%r317 = getelementptr i32, i32* %r3, i32 16
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i544
+%r320 = shl i544 %r319, 512
+%r321 = or i544 %r315, %r320
+%r322 = zext i544 %r321 to i576
+%r324 = getelementptr i32, i32* %r3, i32 17
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i576
+%r327 = shl i576 %r326, 544
+%r328 = or i576 %r322, %r327
+%r329 = zext i576 %r328 to i608
+%r331 = getelementptr i32, i32* %r3, i32 18
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i608
+%r334 = shl i608 %r333, 576
+%r335 = or i608 %r329, %r334
+%r336 = zext i608 %r335 to i640
+%r338 = getelementptr i32, i32* %r3, i32 19
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i640
+%r341 = shl i640 %r340, 608
+%r342 = or i640 %r336, %r341
+%r343 = zext i640 %r342 to i672
+%r345 = getelementptr i32, i32* %r3, i32 20
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i672
+%r348 = shl i672 %r347, 640
+%r349 = or i672 %r343, %r348
+%r350 = zext i672 %r349 to i704
+%r352 = getelementptr i32, i32* %r3, i32 21
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i704
+%r355 = shl i704 %r354, 672
+%r356 = or i704 %r350, %r355
+%r357 = zext i704 %r356 to i736
+%r359 = getelementptr i32, i32* %r3, i32 22
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i736
+%r362 = shl i736 %r361, 704
+%r363 = or i736 %r357, %r362
+%r364 = zext i736 %r363 to i768
+%r366 = getelementptr i32, i32* %r3, i32 23
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i768
+%r369 = shl i768 %r368, 736
+%r370 = or i768 %r364, %r369
+%r371 = zext i768 %r370 to i800
+%r373 = getelementptr i32, i32* %r3, i32 24
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i800
+%r376 = shl i800 %r375, 768
+%r377 = or i800 %r371, %r376
+%r378 = zext i800 %r377 to i832
+%r380 = getelementptr i32, i32* %r3, i32 25
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i832
+%r383 = shl i832 %r382, 800
+%r384 = or i832 %r378, %r383
+%r385 = zext i832 %r384 to i864
+%r387 = getelementptr i32, i32* %r3, i32 26
+%r388 = load i32, i32* %r387
+%r389 = zext i32 %r388 to i864
+%r390 = shl i864 %r389, 832
+%r391 = or i864 %r385, %r390
+%r392 = zext i864 %r391 to i896
+%r394 = getelementptr i32, i32* %r3, i32 27
+%r395 = load i32, i32* %r394
+%r396 = zext i32 %r395 to i896
+%r397 = shl i896 %r396, 864
+%r398 = or i896 %r392, %r397
+%r399 = zext i896 %r398 to i928
+%r401 = getelementptr i32, i32* %r3, i32 28
+%r402 = load i32, i32* %r401
+%r403 = zext i32 %r402 to i928
+%r404 = shl i928 %r403, 896
+%r405 = or i928 %r399, %r404
+%r406 = zext i928 %r405 to i960
+%r408 = getelementptr i32, i32* %r3, i32 29
+%r409 = load i32, i32* %r408
+%r410 = zext i32 %r409 to i960
+%r411 = shl i960 %r410, 928
+%r412 = or i960 %r406, %r411
+%r413 = zext i960 %r208 to i992
+%r414 = zext i960 %r412 to i992
+%r415 = add i992 %r413, %r414
+%r416 = trunc i992 %r415 to i480
+%r417 = trunc i480 %r416 to i32
+%r419 = getelementptr i32, i32* %r1, i32 0
+store i32 %r417, i32* %r419
+%r420 = lshr i480 %r416, 32
+%r421 = trunc i480 %r420 to i32
+%r423 = getelementptr i32, i32* %r1, i32 1
+store i32 %r421, i32* %r423
+%r424 = lshr i480 %r420, 32
+%r425 = trunc i480 %r424 to i32
+%r427 = getelementptr i32, i32* %r1, i32 2
+store i32 %r425, i32* %r427
+%r428 = lshr i480 %r424, 32
+%r429 = trunc i480 %r428 to i32
+%r431 = getelementptr i32, i32* %r1, i32 3
+store i32 %r429, i32* %r431
+%r432 = lshr i480 %r428, 32
+%r433 = trunc i480 %r432 to i32
+%r435 = getelementptr i32, i32* %r1, i32 4
+store i32 %r433, i32* %r435
+%r436 = lshr i480 %r432, 32
+%r437 = trunc i480 %r436 to i32
+%r439 = getelementptr i32, i32* %r1, i32 5
+store i32 %r437, i32* %r439
+%r440 = lshr i480 %r436, 32
+%r441 = trunc i480 %r440 to i32
+%r443 = getelementptr i32, i32* %r1, i32 6
+store i32 %r441, i32* %r443
+%r444 = lshr i480 %r440, 32
+%r445 = trunc i480 %r444 to i32
+%r447 = getelementptr i32, i32* %r1, i32 7
+store i32 %r445, i32* %r447
+%r448 = lshr i480 %r444, 32
+%r449 = trunc i480 %r448 to i32
+%r451 = getelementptr i32, i32* %r1, i32 8
+store i32 %r449, i32* %r451
+%r452 = lshr i480 %r448, 32
+%r453 = trunc i480 %r452 to i32
+%r455 = getelementptr i32, i32* %r1, i32 9
+store i32 %r453, i32* %r455
+%r456 = lshr i480 %r452, 32
+%r457 = trunc i480 %r456 to i32
+%r459 = getelementptr i32, i32* %r1, i32 10
+store i32 %r457, i32* %r459
+%r460 = lshr i480 %r456, 32
+%r461 = trunc i480 %r460 to i32
+%r463 = getelementptr i32, i32* %r1, i32 11
+store i32 %r461, i32* %r463
+%r464 = lshr i480 %r460, 32
+%r465 = trunc i480 %r464 to i32
+%r467 = getelementptr i32, i32* %r1, i32 12
+store i32 %r465, i32* %r467
+%r468 = lshr i480 %r464, 32
+%r469 = trunc i480 %r468 to i32
+%r471 = getelementptr i32, i32* %r1, i32 13
+store i32 %r469, i32* %r471
+%r472 = lshr i480 %r468, 32
+%r473 = trunc i480 %r472 to i32
+%r475 = getelementptr i32, i32* %r1, i32 14
+store i32 %r473, i32* %r475
+%r476 = lshr i992 %r415, 480
+%r477 = trunc i992 %r476 to i512
+%r478 = load i32, i32* %r4
+%r479 = zext i32 %r478 to i64
+%r481 = getelementptr i32, i32* %r4, i32 1
+%r482 = load i32, i32* %r481
+%r483 = zext i32 %r482 to i64
+%r484 = shl i64 %r483, 32
+%r485 = or i64 %r479, %r484
+%r486 = zext i64 %r485 to i96
+%r488 = getelementptr i32, i32* %r4, i32 2
+%r489 = load i32, i32* %r488
+%r490 = zext i32 %r489 to i96
+%r491 = shl i96 %r490, 64
+%r492 = or i96 %r486, %r491
+%r493 = zext i96 %r492 to i128
+%r495 = getelementptr i32, i32* %r4, i32 3
+%r496 = load i32, i32* %r495
+%r497 = zext i32 %r496 to i128
+%r498 = shl i128 %r497, 96
+%r499 = or i128 %r493, %r498
+%r500 = zext i128 %r499 to i160
+%r502 = getelementptr i32, i32* %r4, i32 4
+%r503 = load i32, i32* %r502
+%r504 = zext i32 %r503 to i160
+%r505 = shl i160 %r504, 128
+%r506 = or i160 %r500, %r505
+%r507 = zext i160 %r506 to i192
+%r509 = getelementptr i32, i32* %r4, i32 5
+%r510 = load i32, i32* %r509
+%r511 = zext i32 %r510 to i192
+%r512 = shl i192 %r511, 160
+%r513 = or i192 %r507, %r512
+%r514 = zext i192 %r513 to i224
+%r516 = getelementptr i32, i32* %r4, i32 6
+%r517 = load i32, i32* %r516
+%r518 = zext i32 %r517 to i224
+%r519 = shl i224 %r518, 192
+%r520 = or i224 %r514, %r519
+%r521 = zext i224 %r520 to i256
+%r523 = getelementptr i32, i32* %r4, i32 7
+%r524 = load i32, i32* %r523
+%r525 = zext i32 %r524 to i256
+%r526 = shl i256 %r525, 224
+%r527 = or i256 %r521, %r526
+%r528 = zext i256 %r527 to i288
+%r530 = getelementptr i32, i32* %r4, i32 8
+%r531 = load i32, i32* %r530
+%r532 = zext i32 %r531 to i288
+%r533 = shl i288 %r532, 256
+%r534 = or i288 %r528, %r533
+%r535 = zext i288 %r534 to i320
+%r537 = getelementptr i32, i32* %r4, i32 9
+%r538 = load i32, i32* %r537
+%r539 = zext i32 %r538 to i320
+%r540 = shl i320 %r539, 288
+%r541 = or i320 %r535, %r540
+%r542 = zext i320 %r541 to i352
+%r544 = getelementptr i32, i32* %r4, i32 10
+%r545 = load i32, i32* %r544
+%r546 = zext i32 %r545 to i352
+%r547 = shl i352 %r546, 320
+%r548 = or i352 %r542, %r547
+%r549 = zext i352 %r548 to i384
+%r551 = getelementptr i32, i32* %r4, i32 11
+%r552 = load i32, i32* %r551
+%r553 = zext i32 %r552 to i384
+%r554 = shl i384 %r553, 352
+%r555 = or i384 %r549, %r554
+%r556 = zext i384 %r555 to i416
+%r558 = getelementptr i32, i32* %r4, i32 12
+%r559 = load i32, i32* %r558
+%r560 = zext i32 %r559 to i416
+%r561 = shl i416 %r560, 384
+%r562 = or i416 %r556, %r561
+%r563 = zext i416 %r562 to i448
+%r565 = getelementptr i32, i32* %r4, i32 13
+%r566 = load i32, i32* %r565
+%r567 = zext i32 %r566 to i448
+%r568 = shl i448 %r567, 416
+%r569 = or i448 %r563, %r568
+%r570 = zext i448 %r569 to i480
+%r572 = getelementptr i32, i32* %r4, i32 14
+%r573 = load i32, i32* %r572
+%r574 = zext i32 %r573 to i480
+%r575 = shl i480 %r574, 448
+%r576 = or i480 %r570, %r575
+%r577 = zext i480 %r576 to i512
+%r578 = sub i512 %r477, %r577
+%r579 = lshr i512 %r578, 480
+%r580 = trunc i512 %r579 to i1
+%r581 = select i1 %r580, i512 %r477, i512 %r578
+%r582 = trunc i512 %r581 to i480
+%r584 = getelementptr i32, i32* %r1, i32 15
+%r585 = trunc i480 %r582 to i32
+%r587 = getelementptr i32, i32* %r584, i32 0
+store i32 %r585, i32* %r587
+%r588 = lshr i480 %r582, 32
+%r589 = trunc i480 %r588 to i32
+%r591 = getelementptr i32, i32* %r584, i32 1
+store i32 %r589, i32* %r591
+%r592 = lshr i480 %r588, 32
+%r593 = trunc i480 %r592 to i32
+%r595 = getelementptr i32, i32* %r584, i32 2
+store i32 %r593, i32* %r595
+%r596 = lshr i480 %r592, 32
+%r597 = trunc i480 %r596 to i32
+%r599 = getelementptr i32, i32* %r584, i32 3
+store i32 %r597, i32* %r599
+%r600 = lshr i480 %r596, 32
+%r601 = trunc i480 %r600 to i32
+%r603 = getelementptr i32, i32* %r584, i32 4
+store i32 %r601, i32* %r603
+%r604 = lshr i480 %r600, 32
+%r605 = trunc i480 %r604 to i32
+%r607 = getelementptr i32, i32* %r584, i32 5
+store i32 %r605, i32* %r607
+%r608 = lshr i480 %r604, 32
+%r609 = trunc i480 %r608 to i32
+%r611 = getelementptr i32, i32* %r584, i32 6
+store i32 %r609, i32* %r611
+%r612 = lshr i480 %r608, 32
+%r613 = trunc i480 %r612 to i32
+%r615 = getelementptr i32, i32* %r584, i32 7
+store i32 %r613, i32* %r615
+%r616 = lshr i480 %r612, 32
+%r617 = trunc i480 %r616 to i32
+%r619 = getelementptr i32, i32* %r584, i32 8
+store i32 %r617, i32* %r619
+%r620 = lshr i480 %r616, 32
+%r621 = trunc i480 %r620 to i32
+%r623 = getelementptr i32, i32* %r584, i32 9
+store i32 %r621, i32* %r623
+%r624 = lshr i480 %r620, 32
+%r625 = trunc i480 %r624 to i32
+%r627 = getelementptr i32, i32* %r584, i32 10
+store i32 %r625, i32* %r627
+%r628 = lshr i480 %r624, 32
+%r629 = trunc i480 %r628 to i32
+%r631 = getelementptr i32, i32* %r584, i32 11
+store i32 %r629, i32* %r631
+%r632 = lshr i480 %r628, 32
+%r633 = trunc i480 %r632 to i32
+%r635 = getelementptr i32, i32* %r584, i32 12
+store i32 %r633, i32* %r635
+%r636 = lshr i480 %r632, 32
+%r637 = trunc i480 %r636 to i32
+%r639 = getelementptr i32, i32* %r584, i32 13
+store i32 %r637, i32* %r639
+%r640 = lshr i480 %r636, 32
+%r641 = trunc i480 %r640 to i32
+%r643 = getelementptr i32, i32* %r584, i32 14
+store i32 %r641, i32* %r643
+ret void
+}
+define void @mcl_fpDbl_sub15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = zext i768 %r166 to i800
+%r169 = getelementptr i32, i32* %r2, i32 24
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i800
+%r172 = shl i800 %r171, 768
+%r173 = or i800 %r167, %r172
+%r174 = zext i800 %r173 to i832
+%r176 = getelementptr i32, i32* %r2, i32 25
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i832
+%r179 = shl i832 %r178, 800
+%r180 = or i832 %r174, %r179
+%r181 = zext i832 %r180 to i864
+%r183 = getelementptr i32, i32* %r2, i32 26
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i864
+%r186 = shl i864 %r185, 832
+%r187 = or i864 %r181, %r186
+%r188 = zext i864 %r187 to i896
+%r190 = getelementptr i32, i32* %r2, i32 27
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i896
+%r193 = shl i896 %r192, 864
+%r194 = or i896 %r188, %r193
+%r195 = zext i896 %r194 to i928
+%r197 = getelementptr i32, i32* %r2, i32 28
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i928
+%r200 = shl i928 %r199, 896
+%r201 = or i928 %r195, %r200
+%r202 = zext i928 %r201 to i960
+%r204 = getelementptr i32, i32* %r2, i32 29
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i960
+%r207 = shl i960 %r206, 928
+%r208 = or i960 %r202, %r207
+%r209 = load i32, i32* %r3
+%r210 = zext i32 %r209 to i64
+%r212 = getelementptr i32, i32* %r3, i32 1
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i64
+%r215 = shl i64 %r214, 32
+%r216 = or i64 %r210, %r215
+%r217 = zext i64 %r216 to i96
+%r219 = getelementptr i32, i32* %r3, i32 2
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i96
+%r222 = shl i96 %r221, 64
+%r223 = or i96 %r217, %r222
+%r224 = zext i96 %r223 to i128
+%r226 = getelementptr i32, i32* %r3, i32 3
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i128
+%r229 = shl i128 %r228, 96
+%r230 = or i128 %r224, %r229
+%r231 = zext i128 %r230 to i160
+%r233 = getelementptr i32, i32* %r3, i32 4
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i160
+%r236 = shl i160 %r235, 128
+%r237 = or i160 %r231, %r236
+%r238 = zext i160 %r237 to i192
+%r240 = getelementptr i32, i32* %r3, i32 5
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i192
+%r243 = shl i192 %r242, 160
+%r244 = or i192 %r238, %r243
+%r245 = zext i192 %r244 to i224
+%r247 = getelementptr i32, i32* %r3, i32 6
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i224
+%r250 = shl i224 %r249, 192
+%r251 = or i224 %r245, %r250
+%r252 = zext i224 %r251 to i256
+%r254 = getelementptr i32, i32* %r3, i32 7
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i256
+%r257 = shl i256 %r256, 224
+%r258 = or i256 %r252, %r257
+%r259 = zext i256 %r258 to i288
+%r261 = getelementptr i32, i32* %r3, i32 8
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i288
+%r264 = shl i288 %r263, 256
+%r265 = or i288 %r259, %r264
+%r266 = zext i288 %r265 to i320
+%r268 = getelementptr i32, i32* %r3, i32 9
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i320
+%r271 = shl i320 %r270, 288
+%r272 = or i320 %r266, %r271
+%r273 = zext i320 %r272 to i352
+%r275 = getelementptr i32, i32* %r3, i32 10
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i352
+%r278 = shl i352 %r277, 320
+%r279 = or i352 %r273, %r278
+%r280 = zext i352 %r279 to i384
+%r282 = getelementptr i32, i32* %r3, i32 11
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i384
+%r285 = shl i384 %r284, 352
+%r286 = or i384 %r280, %r285
+%r287 = zext i384 %r286 to i416
+%r289 = getelementptr i32, i32* %r3, i32 12
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i416
+%r292 = shl i416 %r291, 384
+%r293 = or i416 %r287, %r292
+%r294 = zext i416 %r293 to i448
+%r296 = getelementptr i32, i32* %r3, i32 13
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i448
+%r299 = shl i448 %r298, 416
+%r300 = or i448 %r294, %r299
+%r301 = zext i448 %r300 to i480
+%r303 = getelementptr i32, i32* %r3, i32 14
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i480
+%r306 = shl i480 %r305, 448
+%r307 = or i480 %r301, %r306
+%r308 = zext i480 %r307 to i512
+%r310 = getelementptr i32, i32* %r3, i32 15
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i512
+%r313 = shl i512 %r312, 480
+%r314 = or i512 %r308, %r313
+%r315 = zext i512 %r314 to i544
+%r317 = getelementptr i32, i32* %r3, i32 16
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i544
+%r320 = shl i544 %r319, 512
+%r321 = or i544 %r315, %r320
+%r322 = zext i544 %r321 to i576
+%r324 = getelementptr i32, i32* %r3, i32 17
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i576
+%r327 = shl i576 %r326, 544
+%r328 = or i576 %r322, %r327
+%r329 = zext i576 %r328 to i608
+%r331 = getelementptr i32, i32* %r3, i32 18
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i608
+%r334 = shl i608 %r333, 576
+%r335 = or i608 %r329, %r334
+%r336 = zext i608 %r335 to i640
+%r338 = getelementptr i32, i32* %r3, i32 19
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i640
+%r341 = shl i640 %r340, 608
+%r342 = or i640 %r336, %r341
+%r343 = zext i640 %r342 to i672
+%r345 = getelementptr i32, i32* %r3, i32 20
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i672
+%r348 = shl i672 %r347, 640
+%r349 = or i672 %r343, %r348
+%r350 = zext i672 %r349 to i704
+%r352 = getelementptr i32, i32* %r3, i32 21
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i704
+%r355 = shl i704 %r354, 672
+%r356 = or i704 %r350, %r355
+%r357 = zext i704 %r356 to i736
+%r359 = getelementptr i32, i32* %r3, i32 22
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i736
+%r362 = shl i736 %r361, 704
+%r363 = or i736 %r357, %r362
+%r364 = zext i736 %r363 to i768
+%r366 = getelementptr i32, i32* %r3, i32 23
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i768
+%r369 = shl i768 %r368, 736
+%r370 = or i768 %r364, %r369
+%r371 = zext i768 %r370 to i800
+%r373 = getelementptr i32, i32* %r3, i32 24
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i800
+%r376 = shl i800 %r375, 768
+%r377 = or i800 %r371, %r376
+%r378 = zext i800 %r377 to i832
+%r380 = getelementptr i32, i32* %r3, i32 25
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i832
+%r383 = shl i832 %r382, 800
+%r384 = or i832 %r378, %r383
+%r385 = zext i832 %r384 to i864
+%r387 = getelementptr i32, i32* %r3, i32 26
+%r388 = load i32, i32* %r387
+%r389 = zext i32 %r388 to i864
+%r390 = shl i864 %r389, 832
+%r391 = or i864 %r385, %r390
+%r392 = zext i864 %r391 to i896
+%r394 = getelementptr i32, i32* %r3, i32 27
+%r395 = load i32, i32* %r394
+%r396 = zext i32 %r395 to i896
+%r397 = shl i896 %r396, 864
+%r398 = or i896 %r392, %r397
+%r399 = zext i896 %r398 to i928
+%r401 = getelementptr i32, i32* %r3, i32 28
+%r402 = load i32, i32* %r401
+%r403 = zext i32 %r402 to i928
+%r404 = shl i928 %r403, 896
+%r405 = or i928 %r399, %r404
+%r406 = zext i928 %r405 to i960
+%r408 = getelementptr i32, i32* %r3, i32 29
+%r409 = load i32, i32* %r408
+%r410 = zext i32 %r409 to i960
+%r411 = shl i960 %r410, 928
+%r412 = or i960 %r406, %r411
+%r413 = zext i960 %r208 to i992
+%r414 = zext i960 %r412 to i992
+%r415 = sub i992 %r413, %r414
+%r416 = trunc i992 %r415 to i480
+%r417 = trunc i480 %r416 to i32
+%r419 = getelementptr i32, i32* %r1, i32 0
+store i32 %r417, i32* %r419
+%r420 = lshr i480 %r416, 32
+%r421 = trunc i480 %r420 to i32
+%r423 = getelementptr i32, i32* %r1, i32 1
+store i32 %r421, i32* %r423
+%r424 = lshr i480 %r420, 32
+%r425 = trunc i480 %r424 to i32
+%r427 = getelementptr i32, i32* %r1, i32 2
+store i32 %r425, i32* %r427
+%r428 = lshr i480 %r424, 32
+%r429 = trunc i480 %r428 to i32
+%r431 = getelementptr i32, i32* %r1, i32 3
+store i32 %r429, i32* %r431
+%r432 = lshr i480 %r428, 32
+%r433 = trunc i480 %r432 to i32
+%r435 = getelementptr i32, i32* %r1, i32 4
+store i32 %r433, i32* %r435
+%r436 = lshr i480 %r432, 32
+%r437 = trunc i480 %r436 to i32
+%r439 = getelementptr i32, i32* %r1, i32 5
+store i32 %r437, i32* %r439
+%r440 = lshr i480 %r436, 32
+%r441 = trunc i480 %r440 to i32
+%r443 = getelementptr i32, i32* %r1, i32 6
+store i32 %r441, i32* %r443
+%r444 = lshr i480 %r440, 32
+%r445 = trunc i480 %r444 to i32
+%r447 = getelementptr i32, i32* %r1, i32 7
+store i32 %r445, i32* %r447
+%r448 = lshr i480 %r444, 32
+%r449 = trunc i480 %r448 to i32
+%r451 = getelementptr i32, i32* %r1, i32 8
+store i32 %r449, i32* %r451
+%r452 = lshr i480 %r448, 32
+%r453 = trunc i480 %r452 to i32
+%r455 = getelementptr i32, i32* %r1, i32 9
+store i32 %r453, i32* %r455
+%r456 = lshr i480 %r452, 32
+%r457 = trunc i480 %r456 to i32
+%r459 = getelementptr i32, i32* %r1, i32 10
+store i32 %r457, i32* %r459
+%r460 = lshr i480 %r456, 32
+%r461 = trunc i480 %r460 to i32
+%r463 = getelementptr i32, i32* %r1, i32 11
+store i32 %r461, i32* %r463
+%r464 = lshr i480 %r460, 32
+%r465 = trunc i480 %r464 to i32
+%r467 = getelementptr i32, i32* %r1, i32 12
+store i32 %r465, i32* %r467
+%r468 = lshr i480 %r464, 32
+%r469 = trunc i480 %r468 to i32
+%r471 = getelementptr i32, i32* %r1, i32 13
+store i32 %r469, i32* %r471
+%r472 = lshr i480 %r468, 32
+%r473 = trunc i480 %r472 to i32
+%r475 = getelementptr i32, i32* %r1, i32 14
+store i32 %r473, i32* %r475
+%r476 = lshr i992 %r415, 480
+%r477 = trunc i992 %r476 to i480
+%r478 = lshr i992 %r415, 960
+%r479 = trunc i992 %r478 to i1
+%r480 = load i32, i32* %r4
+%r481 = zext i32 %r480 to i64
+%r483 = getelementptr i32, i32* %r4, i32 1
+%r484 = load i32, i32* %r483
+%r485 = zext i32 %r484 to i64
+%r486 = shl i64 %r485, 32
+%r487 = or i64 %r481, %r486
+%r488 = zext i64 %r487 to i96
+%r490 = getelementptr i32, i32* %r4, i32 2
+%r491 = load i32, i32* %r490
+%r492 = zext i32 %r491 to i96
+%r493 = shl i96 %r492, 64
+%r494 = or i96 %r488, %r493
+%r495 = zext i96 %r494 to i128
+%r497 = getelementptr i32, i32* %r4, i32 3
+%r498 = load i32, i32* %r497
+%r499 = zext i32 %r498 to i128
+%r500 = shl i128 %r499, 96
+%r501 = or i128 %r495, %r500
+%r502 = zext i128 %r501 to i160
+%r504 = getelementptr i32, i32* %r4, i32 4
+%r505 = load i32, i32* %r504
+%r506 = zext i32 %r505 to i160
+%r507 = shl i160 %r506, 128
+%r508 = or i160 %r502, %r507
+%r509 = zext i160 %r508 to i192
+%r511 = getelementptr i32, i32* %r4, i32 5
+%r512 = load i32, i32* %r511
+%r513 = zext i32 %r512 to i192
+%r514 = shl i192 %r513, 160
+%r515 = or i192 %r509, %r514
+%r516 = zext i192 %r515 to i224
+%r518 = getelementptr i32, i32* %r4, i32 6
+%r519 = load i32, i32* %r518
+%r520 = zext i32 %r519 to i224
+%r521 = shl i224 %r520, 192
+%r522 = or i224 %r516, %r521
+%r523 = zext i224 %r522 to i256
+%r525 = getelementptr i32, i32* %r4, i32 7
+%r526 = load i32, i32* %r525
+%r527 = zext i32 %r526 to i256
+%r528 = shl i256 %r527, 224
+%r529 = or i256 %r523, %r528
+%r530 = zext i256 %r529 to i288
+%r532 = getelementptr i32, i32* %r4, i32 8
+%r533 = load i32, i32* %r532
+%r534 = zext i32 %r533 to i288
+%r535 = shl i288 %r534, 256
+%r536 = or i288 %r530, %r535
+%r537 = zext i288 %r536 to i320
+%r539 = getelementptr i32, i32* %r4, i32 9
+%r540 = load i32, i32* %r539
+%r541 = zext i32 %r540 to i320
+%r542 = shl i320 %r541, 288
+%r543 = or i320 %r537, %r542
+%r544 = zext i320 %r543 to i352
+%r546 = getelementptr i32, i32* %r4, i32 10
+%r547 = load i32, i32* %r546
+%r548 = zext i32 %r547 to i352
+%r549 = shl i352 %r548, 320
+%r550 = or i352 %r544, %r549
+%r551 = zext i352 %r550 to i384
+%r553 = getelementptr i32, i32* %r4, i32 11
+%r554 = load i32, i32* %r553
+%r555 = zext i32 %r554 to i384
+%r556 = shl i384 %r555, 352
+%r557 = or i384 %r551, %r556
+%r558 = zext i384 %r557 to i416
+%r560 = getelementptr i32, i32* %r4, i32 12
+%r561 = load i32, i32* %r560
+%r562 = zext i32 %r561 to i416
+%r563 = shl i416 %r562, 384
+%r564 = or i416 %r558, %r563
+%r565 = zext i416 %r564 to i448
+%r567 = getelementptr i32, i32* %r4, i32 13
+%r568 = load i32, i32* %r567
+%r569 = zext i32 %r568 to i448
+%r570 = shl i448 %r569, 416
+%r571 = or i448 %r565, %r570
+%r572 = zext i448 %r571 to i480
+%r574 = getelementptr i32, i32* %r4, i32 14
+%r575 = load i32, i32* %r574
+%r576 = zext i32 %r575 to i480
+%r577 = shl i480 %r576, 448
+%r578 = or i480 %r572, %r577
+%r580 = select i1 %r479, i480 %r578, i480 0
+%r581 = add i480 %r477, %r580
+%r583 = getelementptr i32, i32* %r1, i32 15
+%r584 = trunc i480 %r581 to i32
+%r586 = getelementptr i32, i32* %r583, i32 0
+store i32 %r584, i32* %r586
+%r587 = lshr i480 %r581, 32
+%r588 = trunc i480 %r587 to i32
+%r590 = getelementptr i32, i32* %r583, i32 1
+store i32 %r588, i32* %r590
+%r591 = lshr i480 %r587, 32
+%r592 = trunc i480 %r591 to i32
+%r594 = getelementptr i32, i32* %r583, i32 2
+store i32 %r592, i32* %r594
+%r595 = lshr i480 %r591, 32
+%r596 = trunc i480 %r595 to i32
+%r598 = getelementptr i32, i32* %r583, i32 3
+store i32 %r596, i32* %r598
+%r599 = lshr i480 %r595, 32
+%r600 = trunc i480 %r599 to i32
+%r602 = getelementptr i32, i32* %r583, i32 4
+store i32 %r600, i32* %r602
+%r603 = lshr i480 %r599, 32
+%r604 = trunc i480 %r603 to i32
+%r606 = getelementptr i32, i32* %r583, i32 5
+store i32 %r604, i32* %r606
+%r607 = lshr i480 %r603, 32
+%r608 = trunc i480 %r607 to i32
+%r610 = getelementptr i32, i32* %r583, i32 6
+store i32 %r608, i32* %r610
+%r611 = lshr i480 %r607, 32
+%r612 = trunc i480 %r611 to i32
+%r614 = getelementptr i32, i32* %r583, i32 7
+store i32 %r612, i32* %r614
+%r615 = lshr i480 %r611, 32
+%r616 = trunc i480 %r615 to i32
+%r618 = getelementptr i32, i32* %r583, i32 8
+store i32 %r616, i32* %r618
+%r619 = lshr i480 %r615, 32
+%r620 = trunc i480 %r619 to i32
+%r622 = getelementptr i32, i32* %r583, i32 9
+store i32 %r620, i32* %r622
+%r623 = lshr i480 %r619, 32
+%r624 = trunc i480 %r623 to i32
+%r626 = getelementptr i32, i32* %r583, i32 10
+store i32 %r624, i32* %r626
+%r627 = lshr i480 %r623, 32
+%r628 = trunc i480 %r627 to i32
+%r630 = getelementptr i32, i32* %r583, i32 11
+store i32 %r628, i32* %r630
+%r631 = lshr i480 %r627, 32
+%r632 = trunc i480 %r631 to i32
+%r634 = getelementptr i32, i32* %r583, i32 12
+store i32 %r632, i32* %r634
+%r635 = lshr i480 %r631, 32
+%r636 = trunc i480 %r635 to i32
+%r638 = getelementptr i32, i32* %r583, i32 13
+store i32 %r636, i32* %r638
+%r639 = lshr i480 %r635, 32
+%r640 = trunc i480 %r639 to i32
+%r642 = getelementptr i32, i32* %r583, i32 14
+store i32 %r640, i32* %r642
+ret void
+}
+define i544 @mulPv512x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
+%r42 = trunc i64 %r41 to i32
+%r43 = call i32 @extractHigh32(i64 %r41)
+%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
+%r46 = trunc i64 %r45 to i32
+%r47 = call i32 @extractHigh32(i64 %r45)
+%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
+%r50 = trunc i64 %r49 to i32
+%r51 = call i32 @extractHigh32(i64 %r49)
+%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
+%r54 = trunc i64 %r53 to i32
+%r55 = call i32 @extractHigh32(i64 %r53)
+%r57 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 13)
+%r58 = trunc i64 %r57 to i32
+%r59 = call i32 @extractHigh32(i64 %r57)
+%r61 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 14)
+%r62 = trunc i64 %r61 to i32
+%r63 = call i32 @extractHigh32(i64 %r61)
+%r65 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 15)
+%r66 = trunc i64 %r65 to i32
+%r67 = call i32 @extractHigh32(i64 %r65)
+%r68 = zext i32 %r6 to i64
+%r69 = zext i32 %r10 to i64
+%r70 = shl i64 %r69, 32
+%r71 = or i64 %r68, %r70
+%r72 = zext i64 %r71 to i96
+%r73 = zext i32 %r14 to i96
+%r74 = shl i96 %r73, 64
+%r75 = or i96 %r72, %r74
+%r76 = zext i96 %r75 to i128
+%r77 = zext i32 %r18 to i128
+%r78 = shl i128 %r77, 96
+%r79 = or i128 %r76, %r78
+%r80 = zext i128 %r79 to i160
+%r81 = zext i32 %r22 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r80, %r82
+%r84 = zext i160 %r83 to i192
+%r85 = zext i32 %r26 to i192
+%r86 = shl i192 %r85, 160
+%r87 = or i192 %r84, %r86
+%r88 = zext i192 %r87 to i224
+%r89 = zext i32 %r30 to i224
+%r90 = shl i224 %r89, 192
+%r91 = or i224 %r88, %r90
+%r92 = zext i224 %r91 to i256
+%r93 = zext i32 %r34 to i256
+%r94 = shl i256 %r93, 224
+%r95 = or i256 %r92, %r94
+%r96 = zext i256 %r95 to i288
+%r97 = zext i32 %r38 to i288
+%r98 = shl i288 %r97, 256
+%r99 = or i288 %r96, %r98
+%r100 = zext i288 %r99 to i320
+%r101 = zext i32 %r42 to i320
+%r102 = shl i320 %r101, 288
+%r103 = or i320 %r100, %r102
+%r104 = zext i320 %r103 to i352
+%r105 = zext i32 %r46 to i352
+%r106 = shl i352 %r105, 320
+%r107 = or i352 %r104, %r106
+%r108 = zext i352 %r107 to i384
+%r109 = zext i32 %r50 to i384
+%r110 = shl i384 %r109, 352
+%r111 = or i384 %r108, %r110
+%r112 = zext i384 %r111 to i416
+%r113 = zext i32 %r54 to i416
+%r114 = shl i416 %r113, 384
+%r115 = or i416 %r112, %r114
+%r116 = zext i416 %r115 to i448
+%r117 = zext i32 %r58 to i448
+%r118 = shl i448 %r117, 416
+%r119 = or i448 %r116, %r118
+%r120 = zext i448 %r119 to i480
+%r121 = zext i32 %r62 to i480
+%r122 = shl i480 %r121, 448
+%r123 = or i480 %r120, %r122
+%r124 = zext i480 %r123 to i512
+%r125 = zext i32 %r66 to i512
+%r126 = shl i512 %r125, 480
+%r127 = or i512 %r124, %r126
+%r128 = zext i32 %r7 to i64
+%r129 = zext i32 %r11 to i64
+%r130 = shl i64 %r129, 32
+%r131 = or i64 %r128, %r130
+%r132 = zext i64 %r131 to i96
+%r133 = zext i32 %r15 to i96
+%r134 = shl i96 %r133, 64
+%r135 = or i96 %r132, %r134
+%r136 = zext i96 %r135 to i128
+%r137 = zext i32 %r19 to i128
+%r138 = shl i128 %r137, 96
+%r139 = or i128 %r136, %r138
+%r140 = zext i128 %r139 to i160
+%r141 = zext i32 %r23 to i160
+%r142 = shl i160 %r141, 128
+%r143 = or i160 %r140, %r142
+%r144 = zext i160 %r143 to i192
+%r145 = zext i32 %r27 to i192
+%r146 = shl i192 %r145, 160
+%r147 = or i192 %r144, %r146
+%r148 = zext i192 %r147 to i224
+%r149 = zext i32 %r31 to i224
+%r150 = shl i224 %r149, 192
+%r151 = or i224 %r148, %r150
+%r152 = zext i224 %r151 to i256
+%r153 = zext i32 %r35 to i256
+%r154 = shl i256 %r153, 224
+%r155 = or i256 %r152, %r154
+%r156 = zext i256 %r155 to i288
+%r157 = zext i32 %r39 to i288
+%r158 = shl i288 %r157, 256
+%r159 = or i288 %r156, %r158
+%r160 = zext i288 %r159 to i320
+%r161 = zext i32 %r43 to i320
+%r162 = shl i320 %r161, 288
+%r163 = or i320 %r160, %r162
+%r164 = zext i320 %r163 to i352
+%r165 = zext i32 %r47 to i352
+%r166 = shl i352 %r165, 320
+%r167 = or i352 %r164, %r166
+%r168 = zext i352 %r167 to i384
+%r169 = zext i32 %r51 to i384
+%r170 = shl i384 %r169, 352
+%r171 = or i384 %r168, %r170
+%r172 = zext i384 %r171 to i416
+%r173 = zext i32 %r55 to i416
+%r174 = shl i416 %r173, 384
+%r175 = or i416 %r172, %r174
+%r176 = zext i416 %r175 to i448
+%r177 = zext i32 %r59 to i448
+%r178 = shl i448 %r177, 416
+%r179 = or i448 %r176, %r178
+%r180 = zext i448 %r179 to i480
+%r181 = zext i32 %r63 to i480
+%r182 = shl i480 %r181, 448
+%r183 = or i480 %r180, %r182
+%r184 = zext i480 %r183 to i512
+%r185 = zext i32 %r67 to i512
+%r186 = shl i512 %r185, 480
+%r187 = or i512 %r184, %r186
+%r188 = zext i512 %r127 to i544
+%r189 = zext i512 %r187 to i544
+%r190 = shl i544 %r189, 32
+%r191 = add i544 %r188, %r190
+ret i544 %r191
+}
+define void @mcl_fp_mulUnitPre16L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i544 @mulPv512x32(i32* %r2, i32 %r3)
+%r5 = trunc i544 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i544 %r4, 32
+%r9 = trunc i544 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i544 %r8, 32
+%r13 = trunc i544 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i544 %r12, 32
+%r17 = trunc i544 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i544 %r16, 32
+%r21 = trunc i544 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i544 %r20, 32
+%r25 = trunc i544 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i544 %r24, 32
+%r29 = trunc i544 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i544 %r28, 32
+%r33 = trunc i544 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+%r36 = lshr i544 %r32, 32
+%r37 = trunc i544 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 8
+store i32 %r37, i32* %r39
+%r40 = lshr i544 %r36, 32
+%r41 = trunc i544 %r40 to i32
+%r43 = getelementptr i32, i32* %r1, i32 9
+store i32 %r41, i32* %r43
+%r44 = lshr i544 %r40, 32
+%r45 = trunc i544 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 10
+store i32 %r45, i32* %r47
+%r48 = lshr i544 %r44, 32
+%r49 = trunc i544 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 11
+store i32 %r49, i32* %r51
+%r52 = lshr i544 %r48, 32
+%r53 = trunc i544 %r52 to i32
+%r55 = getelementptr i32, i32* %r1, i32 12
+store i32 %r53, i32* %r55
+%r56 = lshr i544 %r52, 32
+%r57 = trunc i544 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 13
+store i32 %r57, i32* %r59
+%r60 = lshr i544 %r56, 32
+%r61 = trunc i544 %r60 to i32
+%r63 = getelementptr i32, i32* %r1, i32 14
+store i32 %r61, i32* %r63
+%r64 = lshr i544 %r60, 32
+%r65 = trunc i544 %r64 to i32
+%r67 = getelementptr i32, i32* %r1, i32 15
+store i32 %r65, i32* %r67
+%r68 = lshr i544 %r64, 32
+%r69 = trunc i544 %r68 to i32
+%r71 = getelementptr i32, i32* %r1, i32 16
+store i32 %r69, i32* %r71
+ret void
+}
+define void @mcl_fpDbl_mulPre16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r2, i32 8
+%r7 = getelementptr i32, i32* %r3, i32 8
+%r9 = getelementptr i32, i32* %r1, i32 16
+call void @mcl_fpDbl_mulPre8L(i32* %r1, i32* %r2, i32* %r3)
+call void @mcl_fpDbl_mulPre8L(i32* %r9, i32* %r5, i32* %r7)
+%r10 = load i32, i32* %r5
+%r11 = zext i32 %r10 to i64
+%r13 = getelementptr i32, i32* %r5, i32 1
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i64
+%r16 = shl i64 %r15, 32
+%r17 = or i64 %r11, %r16
+%r18 = zext i64 %r17 to i96
+%r20 = getelementptr i32, i32* %r5, i32 2
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i96
+%r23 = shl i96 %r22, 64
+%r24 = or i96 %r18, %r23
+%r25 = zext i96 %r24 to i128
+%r27 = getelementptr i32, i32* %r5, i32 3
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i128
+%r30 = shl i128 %r29, 96
+%r31 = or i128 %r25, %r30
+%r32 = zext i128 %r31 to i160
+%r34 = getelementptr i32, i32* %r5, i32 4
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i160
+%r37 = shl i160 %r36, 128
+%r38 = or i160 %r32, %r37
+%r39 = zext i160 %r38 to i192
+%r41 = getelementptr i32, i32* %r5, i32 5
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i192
+%r44 = shl i192 %r43, 160
+%r45 = or i192 %r39, %r44
+%r46 = zext i192 %r45 to i224
+%r48 = getelementptr i32, i32* %r5, i32 6
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i224
+%r51 = shl i224 %r50, 192
+%r52 = or i224 %r46, %r51
+%r53 = zext i224 %r52 to i256
+%r55 = getelementptr i32, i32* %r5, i32 7
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i256
+%r58 = shl i256 %r57, 224
+%r59 = or i256 %r53, %r58
+%r60 = zext i256 %r59 to i288
+%r61 = load i32, i32* %r2
+%r62 = zext i32 %r61 to i64
+%r64 = getelementptr i32, i32* %r2, i32 1
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i64
+%r67 = shl i64 %r66, 32
+%r68 = or i64 %r62, %r67
+%r69 = zext i64 %r68 to i96
+%r71 = getelementptr i32, i32* %r2, i32 2
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i96
+%r74 = shl i96 %r73, 64
+%r75 = or i96 %r69, %r74
+%r76 = zext i96 %r75 to i128
+%r78 = getelementptr i32, i32* %r2, i32 3
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i128
+%r81 = shl i128 %r80, 96
+%r82 = or i128 %r76, %r81
+%r83 = zext i128 %r82 to i160
+%r85 = getelementptr i32, i32* %r2, i32 4
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i160
+%r88 = shl i160 %r87, 128
+%r89 = or i160 %r83, %r88
+%r90 = zext i160 %r89 to i192
+%r92 = getelementptr i32, i32* %r2, i32 5
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i192
+%r95 = shl i192 %r94, 160
+%r96 = or i192 %r90, %r95
+%r97 = zext i192 %r96 to i224
+%r99 = getelementptr i32, i32* %r2, i32 6
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i224
+%r102 = shl i224 %r101, 192
+%r103 = or i224 %r97, %r102
+%r104 = zext i224 %r103 to i256
+%r106 = getelementptr i32, i32* %r2, i32 7
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i256
+%r109 = shl i256 %r108, 224
+%r110 = or i256 %r104, %r109
+%r111 = zext i256 %r110 to i288
+%r112 = load i32, i32* %r7
+%r113 = zext i32 %r112 to i64
+%r115 = getelementptr i32, i32* %r7, i32 1
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i64
+%r118 = shl i64 %r117, 32
+%r119 = or i64 %r113, %r118
+%r120 = zext i64 %r119 to i96
+%r122 = getelementptr i32, i32* %r7, i32 2
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i96
+%r125 = shl i96 %r124, 64
+%r126 = or i96 %r120, %r125
+%r127 = zext i96 %r126 to i128
+%r129 = getelementptr i32, i32* %r7, i32 3
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i128
+%r132 = shl i128 %r131, 96
+%r133 = or i128 %r127, %r132
+%r134 = zext i128 %r133 to i160
+%r136 = getelementptr i32, i32* %r7, i32 4
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i160
+%r139 = shl i160 %r138, 128
+%r140 = or i160 %r134, %r139
+%r141 = zext i160 %r140 to i192
+%r143 = getelementptr i32, i32* %r7, i32 5
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i192
+%r146 = shl i192 %r145, 160
+%r147 = or i192 %r141, %r146
+%r148 = zext i192 %r147 to i224
+%r150 = getelementptr i32, i32* %r7, i32 6
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i224
+%r153 = shl i224 %r152, 192
+%r154 = or i224 %r148, %r153
+%r155 = zext i224 %r154 to i256
+%r157 = getelementptr i32, i32* %r7, i32 7
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i256
+%r160 = shl i256 %r159, 224
+%r161 = or i256 %r155, %r160
+%r162 = zext i256 %r161 to i288
+%r163 = load i32, i32* %r3
+%r164 = zext i32 %r163 to i64
+%r166 = getelementptr i32, i32* %r3, i32 1
+%r167 = load i32, i32* %r166
+%r168 = zext i32 %r167 to i64
+%r169 = shl i64 %r168, 32
+%r170 = or i64 %r164, %r169
+%r171 = zext i64 %r170 to i96
+%r173 = getelementptr i32, i32* %r3, i32 2
+%r174 = load i32, i32* %r173
+%r175 = zext i32 %r174 to i96
+%r176 = shl i96 %r175, 64
+%r177 = or i96 %r171, %r176
+%r178 = zext i96 %r177 to i128
+%r180 = getelementptr i32, i32* %r3, i32 3
+%r181 = load i32, i32* %r180
+%r182 = zext i32 %r181 to i128
+%r183 = shl i128 %r182, 96
+%r184 = or i128 %r178, %r183
+%r185 = zext i128 %r184 to i160
+%r187 = getelementptr i32, i32* %r3, i32 4
+%r188 = load i32, i32* %r187
+%r189 = zext i32 %r188 to i160
+%r190 = shl i160 %r189, 128
+%r191 = or i160 %r185, %r190
+%r192 = zext i160 %r191 to i192
+%r194 = getelementptr i32, i32* %r3, i32 5
+%r195 = load i32, i32* %r194
+%r196 = zext i32 %r195 to i192
+%r197 = shl i192 %r196, 160
+%r198 = or i192 %r192, %r197
+%r199 = zext i192 %r198 to i224
+%r201 = getelementptr i32, i32* %r3, i32 6
+%r202 = load i32, i32* %r201
+%r203 = zext i32 %r202 to i224
+%r204 = shl i224 %r203, 192
+%r205 = or i224 %r199, %r204
+%r206 = zext i224 %r205 to i256
+%r208 = getelementptr i32, i32* %r3, i32 7
+%r209 = load i32, i32* %r208
+%r210 = zext i32 %r209 to i256
+%r211 = shl i256 %r210, 224
+%r212 = or i256 %r206, %r211
+%r213 = zext i256 %r212 to i288
+%r214 = add i288 %r60, %r111
+%r215 = add i288 %r162, %r213
+%r217 = alloca i32, i32 16
+%r218 = trunc i288 %r214 to i256
+%r219 = trunc i288 %r215 to i256
+%r220 = lshr i288 %r214, 256
+%r221 = trunc i288 %r220 to i1
+%r222 = lshr i288 %r215, 256
+%r223 = trunc i288 %r222 to i1
+%r224 = and i1 %r221, %r223
+%r226 = select i1 %r221, i256 %r219, i256 0
+%r228 = select i1 %r223, i256 %r218, i256 0
+%r230 = alloca i32, i32 8
+%r232 = alloca i32, i32 8
+%r233 = trunc i256 %r218 to i32
+%r235 = getelementptr i32, i32* %r230, i32 0
+store i32 %r233, i32* %r235
+%r236 = lshr i256 %r218, 32
+%r237 = trunc i256 %r236 to i32
+%r239 = getelementptr i32, i32* %r230, i32 1
+store i32 %r237, i32* %r239
+%r240 = lshr i256 %r236, 32
+%r241 = trunc i256 %r240 to i32
+%r243 = getelementptr i32, i32* %r230, i32 2
+store i32 %r241, i32* %r243
+%r244 = lshr i256 %r240, 32
+%r245 = trunc i256 %r244 to i32
+%r247 = getelementptr i32, i32* %r230, i32 3
+store i32 %r245, i32* %r247
+%r248 = lshr i256 %r244, 32
+%r249 = trunc i256 %r248 to i32
+%r251 = getelementptr i32, i32* %r230, i32 4
+store i32 %r249, i32* %r251
+%r252 = lshr i256 %r248, 32
+%r253 = trunc i256 %r252 to i32
+%r255 = getelementptr i32, i32* %r230, i32 5
+store i32 %r253, i32* %r255
+%r256 = lshr i256 %r252, 32
+%r257 = trunc i256 %r256 to i32
+%r259 = getelementptr i32, i32* %r230, i32 6
+store i32 %r257, i32* %r259
+%r260 = lshr i256 %r256, 32
+%r261 = trunc i256 %r260 to i32
+%r263 = getelementptr i32, i32* %r230, i32 7
+store i32 %r261, i32* %r263
+%r264 = trunc i256 %r219 to i32
+%r266 = getelementptr i32, i32* %r232, i32 0
+store i32 %r264, i32* %r266
+%r267 = lshr i256 %r219, 32
+%r268 = trunc i256 %r267 to i32
+%r270 = getelementptr i32, i32* %r232, i32 1
+store i32 %r268, i32* %r270
+%r271 = lshr i256 %r267, 32
+%r272 = trunc i256 %r271 to i32
+%r274 = getelementptr i32, i32* %r232, i32 2
+store i32 %r272, i32* %r274
+%r275 = lshr i256 %r271, 32
+%r276 = trunc i256 %r275 to i32
+%r278 = getelementptr i32, i32* %r232, i32 3
+store i32 %r276, i32* %r278
+%r279 = lshr i256 %r275, 32
+%r280 = trunc i256 %r279 to i32
+%r282 = getelementptr i32, i32* %r232, i32 4
+store i32 %r280, i32* %r282
+%r283 = lshr i256 %r279, 32
+%r284 = trunc i256 %r283 to i32
+%r286 = getelementptr i32, i32* %r232, i32 5
+store i32 %r284, i32* %r286
+%r287 = lshr i256 %r283, 32
+%r288 = trunc i256 %r287 to i32
+%r290 = getelementptr i32, i32* %r232, i32 6
+store i32 %r288, i32* %r290
+%r291 = lshr i256 %r287, 32
+%r292 = trunc i256 %r291 to i32
+%r294 = getelementptr i32, i32* %r232, i32 7
+store i32 %r292, i32* %r294
+call void @mcl_fpDbl_mulPre8L(i32* %r217, i32* %r230, i32* %r232)
+%r295 = load i32, i32* %r217
+%r296 = zext i32 %r295 to i64
+%r298 = getelementptr i32, i32* %r217, i32 1
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i64
+%r301 = shl i64 %r300, 32
+%r302 = or i64 %r296, %r301
+%r303 = zext i64 %r302 to i96
+%r305 = getelementptr i32, i32* %r217, i32 2
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i96
+%r308 = shl i96 %r307, 64
+%r309 = or i96 %r303, %r308
+%r310 = zext i96 %r309 to i128
+%r312 = getelementptr i32, i32* %r217, i32 3
+%r313 = load i32, i32* %r312
+%r314 = zext i32 %r313 to i128
+%r315 = shl i128 %r314, 96
+%r316 = or i128 %r310, %r315
+%r317 = zext i128 %r316 to i160
+%r319 = getelementptr i32, i32* %r217, i32 4
+%r320 = load i32, i32* %r319
+%r321 = zext i32 %r320 to i160
+%r322 = shl i160 %r321, 128
+%r323 = or i160 %r317, %r322
+%r324 = zext i160 %r323 to i192
+%r326 = getelementptr i32, i32* %r217, i32 5
+%r327 = load i32, i32* %r326
+%r328 = zext i32 %r327 to i192
+%r329 = shl i192 %r328, 160
+%r330 = or i192 %r324, %r329
+%r331 = zext i192 %r330 to i224
+%r333 = getelementptr i32, i32* %r217, i32 6
+%r334 = load i32, i32* %r333
+%r335 = zext i32 %r334 to i224
+%r336 = shl i224 %r335, 192
+%r337 = or i224 %r331, %r336
+%r338 = zext i224 %r337 to i256
+%r340 = getelementptr i32, i32* %r217, i32 7
+%r341 = load i32, i32* %r340
+%r342 = zext i32 %r341 to i256
+%r343 = shl i256 %r342, 224
+%r344 = or i256 %r338, %r343
+%r345 = zext i256 %r344 to i288
+%r347 = getelementptr i32, i32* %r217, i32 8
+%r348 = load i32, i32* %r347
+%r349 = zext i32 %r348 to i288
+%r350 = shl i288 %r349, 256
+%r351 = or i288 %r345, %r350
+%r352 = zext i288 %r351 to i320
+%r354 = getelementptr i32, i32* %r217, i32 9
+%r355 = load i32, i32* %r354
+%r356 = zext i32 %r355 to i320
+%r357 = shl i320 %r356, 288
+%r358 = or i320 %r352, %r357
+%r359 = zext i320 %r358 to i352
+%r361 = getelementptr i32, i32* %r217, i32 10
+%r362 = load i32, i32* %r361
+%r363 = zext i32 %r362 to i352
+%r364 = shl i352 %r363, 320
+%r365 = or i352 %r359, %r364
+%r366 = zext i352 %r365 to i384
+%r368 = getelementptr i32, i32* %r217, i32 11
+%r369 = load i32, i32* %r368
+%r370 = zext i32 %r369 to i384
+%r371 = shl i384 %r370, 352
+%r372 = or i384 %r366, %r371
+%r373 = zext i384 %r372 to i416
+%r375 = getelementptr i32, i32* %r217, i32 12
+%r376 = load i32, i32* %r375
+%r377 = zext i32 %r376 to i416
+%r378 = shl i416 %r377, 384
+%r379 = or i416 %r373, %r378
+%r380 = zext i416 %r379 to i448
+%r382 = getelementptr i32, i32* %r217, i32 13
+%r383 = load i32, i32* %r382
+%r384 = zext i32 %r383 to i448
+%r385 = shl i448 %r384, 416
+%r386 = or i448 %r380, %r385
+%r387 = zext i448 %r386 to i480
+%r389 = getelementptr i32, i32* %r217, i32 14
+%r390 = load i32, i32* %r389
+%r391 = zext i32 %r390 to i480
+%r392 = shl i480 %r391, 448
+%r393 = or i480 %r387, %r392
+%r394 = zext i480 %r393 to i512
+%r396 = getelementptr i32, i32* %r217, i32 15
+%r397 = load i32, i32* %r396
+%r398 = zext i32 %r397 to i512
+%r399 = shl i512 %r398, 480
+%r400 = or i512 %r394, %r399
+%r401 = zext i512 %r400 to i544
+%r402 = zext i1 %r224 to i544
+%r403 = shl i544 %r402, 512
+%r404 = or i544 %r401, %r403
+%r405 = zext i256 %r226 to i544
+%r406 = zext i256 %r228 to i544
+%r407 = shl i544 %r405, 256
+%r408 = shl i544 %r406, 256
+%r409 = add i544 %r404, %r407
+%r410 = add i544 %r409, %r408
+%r411 = load i32, i32* %r1
+%r412 = zext i32 %r411 to i64
+%r414 = getelementptr i32, i32* %r1, i32 1
+%r415 = load i32, i32* %r414
+%r416 = zext i32 %r415 to i64
+%r417 = shl i64 %r416, 32
+%r418 = or i64 %r412, %r417
+%r419 = zext i64 %r418 to i96
+%r421 = getelementptr i32, i32* %r1, i32 2
+%r422 = load i32, i32* %r421
+%r423 = zext i32 %r422 to i96
+%r424 = shl i96 %r423, 64
+%r425 = or i96 %r419, %r424
+%r426 = zext i96 %r425 to i128
+%r428 = getelementptr i32, i32* %r1, i32 3
+%r429 = load i32, i32* %r428
+%r430 = zext i32 %r429 to i128
+%r431 = shl i128 %r430, 96
+%r432 = or i128 %r426, %r431
+%r433 = zext i128 %r432 to i160
+%r435 = getelementptr i32, i32* %r1, i32 4
+%r436 = load i32, i32* %r435
+%r437 = zext i32 %r436 to i160
+%r438 = shl i160 %r437, 128
+%r439 = or i160 %r433, %r438
+%r440 = zext i160 %r439 to i192
+%r442 = getelementptr i32, i32* %r1, i32 5
+%r443 = load i32, i32* %r442
+%r444 = zext i32 %r443 to i192
+%r445 = shl i192 %r444, 160
+%r446 = or i192 %r440, %r445
+%r447 = zext i192 %r446 to i224
+%r449 = getelementptr i32, i32* %r1, i32 6
+%r450 = load i32, i32* %r449
+%r451 = zext i32 %r450 to i224
+%r452 = shl i224 %r451, 192
+%r453 = or i224 %r447, %r452
+%r454 = zext i224 %r453 to i256
+%r456 = getelementptr i32, i32* %r1, i32 7
+%r457 = load i32, i32* %r456
+%r458 = zext i32 %r457 to i256
+%r459 = shl i256 %r458, 224
+%r460 = or i256 %r454, %r459
+%r461 = zext i256 %r460 to i288
+%r463 = getelementptr i32, i32* %r1, i32 8
+%r464 = load i32, i32* %r463
+%r465 = zext i32 %r464 to i288
+%r466 = shl i288 %r465, 256
+%r467 = or i288 %r461, %r466
+%r468 = zext i288 %r467 to i320
+%r470 = getelementptr i32, i32* %r1, i32 9
+%r471 = load i32, i32* %r470
+%r472 = zext i32 %r471 to i320
+%r473 = shl i320 %r472, 288
+%r474 = or i320 %r468, %r473
+%r475 = zext i320 %r474 to i352
+%r477 = getelementptr i32, i32* %r1, i32 10
+%r478 = load i32, i32* %r477
+%r479 = zext i32 %r478 to i352
+%r480 = shl i352 %r479, 320
+%r481 = or i352 %r475, %r480
+%r482 = zext i352 %r481 to i384
+%r484 = getelementptr i32, i32* %r1, i32 11
+%r485 = load i32, i32* %r484
+%r486 = zext i32 %r485 to i384
+%r487 = shl i384 %r486, 352
+%r488 = or i384 %r482, %r487
+%r489 = zext i384 %r488 to i416
+%r491 = getelementptr i32, i32* %r1, i32 12
+%r492 = load i32, i32* %r491
+%r493 = zext i32 %r492 to i416
+%r494 = shl i416 %r493, 384
+%r495 = or i416 %r489, %r494
+%r496 = zext i416 %r495 to i448
+%r498 = getelementptr i32, i32* %r1, i32 13
+%r499 = load i32, i32* %r498
+%r500 = zext i32 %r499 to i448
+%r501 = shl i448 %r500, 416
+%r502 = or i448 %r496, %r501
+%r503 = zext i448 %r502 to i480
+%r505 = getelementptr i32, i32* %r1, i32 14
+%r506 = load i32, i32* %r505
+%r507 = zext i32 %r506 to i480
+%r508 = shl i480 %r507, 448
+%r509 = or i480 %r503, %r508
+%r510 = zext i480 %r509 to i512
+%r512 = getelementptr i32, i32* %r1, i32 15
+%r513 = load i32, i32* %r512
+%r514 = zext i32 %r513 to i512
+%r515 = shl i512 %r514, 480
+%r516 = or i512 %r510, %r515
+%r517 = zext i512 %r516 to i544
+%r518 = sub i544 %r410, %r517
+%r520 = getelementptr i32, i32* %r1, i32 16
+%r521 = load i32, i32* %r520
+%r522 = zext i32 %r521 to i64
+%r524 = getelementptr i32, i32* %r520, i32 1
+%r525 = load i32, i32* %r524
+%r526 = zext i32 %r525 to i64
+%r527 = shl i64 %r526, 32
+%r528 = or i64 %r522, %r527
+%r529 = zext i64 %r528 to i96
+%r531 = getelementptr i32, i32* %r520, i32 2
+%r532 = load i32, i32* %r531
+%r533 = zext i32 %r532 to i96
+%r534 = shl i96 %r533, 64
+%r535 = or i96 %r529, %r534
+%r536 = zext i96 %r535 to i128
+%r538 = getelementptr i32, i32* %r520, i32 3
+%r539 = load i32, i32* %r538
+%r540 = zext i32 %r539 to i128
+%r541 = shl i128 %r540, 96
+%r542 = or i128 %r536, %r541
+%r543 = zext i128 %r542 to i160
+%r545 = getelementptr i32, i32* %r520, i32 4
+%r546 = load i32, i32* %r545
+%r547 = zext i32 %r546 to i160
+%r548 = shl i160 %r547, 128
+%r549 = or i160 %r543, %r548
+%r550 = zext i160 %r549 to i192
+%r552 = getelementptr i32, i32* %r520, i32 5
+%r553 = load i32, i32* %r552
+%r554 = zext i32 %r553 to i192
+%r555 = shl i192 %r554, 160
+%r556 = or i192 %r550, %r555
+%r557 = zext i192 %r556 to i224
+%r559 = getelementptr i32, i32* %r520, i32 6
+%r560 = load i32, i32* %r559
+%r561 = zext i32 %r560 to i224
+%r562 = shl i224 %r561, 192
+%r563 = or i224 %r557, %r562
+%r564 = zext i224 %r563 to i256
+%r566 = getelementptr i32, i32* %r520, i32 7
+%r567 = load i32, i32* %r566
+%r568 = zext i32 %r567 to i256
+%r569 = shl i256 %r568, 224
+%r570 = or i256 %r564, %r569
+%r571 = zext i256 %r570 to i288
+%r573 = getelementptr i32, i32* %r520, i32 8
+%r574 = load i32, i32* %r573
+%r575 = zext i32 %r574 to i288
+%r576 = shl i288 %r575, 256
+%r577 = or i288 %r571, %r576
+%r578 = zext i288 %r577 to i320
+%r580 = getelementptr i32, i32* %r520, i32 9
+%r581 = load i32, i32* %r580
+%r582 = zext i32 %r581 to i320
+%r583 = shl i320 %r582, 288
+%r584 = or i320 %r578, %r583
+%r585 = zext i320 %r584 to i352
+%r587 = getelementptr i32, i32* %r520, i32 10
+%r588 = load i32, i32* %r587
+%r589 = zext i32 %r588 to i352
+%r590 = shl i352 %r589, 320
+%r591 = or i352 %r585, %r590
+%r592 = zext i352 %r591 to i384
+%r594 = getelementptr i32, i32* %r520, i32 11
+%r595 = load i32, i32* %r594
+%r596 = zext i32 %r595 to i384
+%r597 = shl i384 %r596, 352
+%r598 = or i384 %r592, %r597
+%r599 = zext i384 %r598 to i416
+%r601 = getelementptr i32, i32* %r520, i32 12
+%r602 = load i32, i32* %r601
+%r603 = zext i32 %r602 to i416
+%r604 = shl i416 %r603, 384
+%r605 = or i416 %r599, %r604
+%r606 = zext i416 %r605 to i448
+%r608 = getelementptr i32, i32* %r520, i32 13
+%r609 = load i32, i32* %r608
+%r610 = zext i32 %r609 to i448
+%r611 = shl i448 %r610, 416
+%r612 = or i448 %r606, %r611
+%r613 = zext i448 %r612 to i480
+%r615 = getelementptr i32, i32* %r520, i32 14
+%r616 = load i32, i32* %r615
+%r617 = zext i32 %r616 to i480
+%r618 = shl i480 %r617, 448
+%r619 = or i480 %r613, %r618
+%r620 = zext i480 %r619 to i512
+%r622 = getelementptr i32, i32* %r520, i32 15
+%r623 = load i32, i32* %r622
+%r624 = zext i32 %r623 to i512
+%r625 = shl i512 %r624, 480
+%r626 = or i512 %r620, %r625
+%r627 = zext i512 %r626 to i544
+%r628 = sub i544 %r518, %r627
+%r629 = zext i544 %r628 to i768
+%r631 = getelementptr i32, i32* %r1, i32 8
+%r632 = load i32, i32* %r631
+%r633 = zext i32 %r632 to i64
+%r635 = getelementptr i32, i32* %r631, i32 1
+%r636 = load i32, i32* %r635
+%r637 = zext i32 %r636 to i64
+%r638 = shl i64 %r637, 32
+%r639 = or i64 %r633, %r638
+%r640 = zext i64 %r639 to i96
+%r642 = getelementptr i32, i32* %r631, i32 2
+%r643 = load i32, i32* %r642
+%r644 = zext i32 %r643 to i96
+%r645 = shl i96 %r644, 64
+%r646 = or i96 %r640, %r645
+%r647 = zext i96 %r646 to i128
+%r649 = getelementptr i32, i32* %r631, i32 3
+%r650 = load i32, i32* %r649
+%r651 = zext i32 %r650 to i128
+%r652 = shl i128 %r651, 96
+%r653 = or i128 %r647, %r652
+%r654 = zext i128 %r653 to i160
+%r656 = getelementptr i32, i32* %r631, i32 4
+%r657 = load i32, i32* %r656
+%r658 = zext i32 %r657 to i160
+%r659 = shl i160 %r658, 128
+%r660 = or i160 %r654, %r659
+%r661 = zext i160 %r660 to i192
+%r663 = getelementptr i32, i32* %r631, i32 5
+%r664 = load i32, i32* %r663
+%r665 = zext i32 %r664 to i192
+%r666 = shl i192 %r665, 160
+%r667 = or i192 %r661, %r666
+%r668 = zext i192 %r667 to i224
+%r670 = getelementptr i32, i32* %r631, i32 6
+%r671 = load i32, i32* %r670
+%r672 = zext i32 %r671 to i224
+%r673 = shl i224 %r672, 192
+%r674 = or i224 %r668, %r673
+%r675 = zext i224 %r674 to i256
+%r677 = getelementptr i32, i32* %r631, i32 7
+%r678 = load i32, i32* %r677
+%r679 = zext i32 %r678 to i256
+%r680 = shl i256 %r679, 224
+%r681 = or i256 %r675, %r680
+%r682 = zext i256 %r681 to i288
+%r684 = getelementptr i32, i32* %r631, i32 8
+%r685 = load i32, i32* %r684
+%r686 = zext i32 %r685 to i288
+%r687 = shl i288 %r686, 256
+%r688 = or i288 %r682, %r687
+%r689 = zext i288 %r688 to i320
+%r691 = getelementptr i32, i32* %r631, i32 9
+%r692 = load i32, i32* %r691
+%r693 = zext i32 %r692 to i320
+%r694 = shl i320 %r693, 288
+%r695 = or i320 %r689, %r694
+%r696 = zext i320 %r695 to i352
+%r698 = getelementptr i32, i32* %r631, i32 10
+%r699 = load i32, i32* %r698
+%r700 = zext i32 %r699 to i352
+%r701 = shl i352 %r700, 320
+%r702 = or i352 %r696, %r701
+%r703 = zext i352 %r702 to i384
+%r705 = getelementptr i32, i32* %r631, i32 11
+%r706 = load i32, i32* %r705
+%r707 = zext i32 %r706 to i384
+%r708 = shl i384 %r707, 352
+%r709 = or i384 %r703, %r708
+%r710 = zext i384 %r709 to i416
+%r712 = getelementptr i32, i32* %r631, i32 12
+%r713 = load i32, i32* %r712
+%r714 = zext i32 %r713 to i416
+%r715 = shl i416 %r714, 384
+%r716 = or i416 %r710, %r715
+%r717 = zext i416 %r716 to i448
+%r719 = getelementptr i32, i32* %r631, i32 13
+%r720 = load i32, i32* %r719
+%r721 = zext i32 %r720 to i448
+%r722 = shl i448 %r721, 416
+%r723 = or i448 %r717, %r722
+%r724 = zext i448 %r723 to i480
+%r726 = getelementptr i32, i32* %r631, i32 14
+%r727 = load i32, i32* %r726
+%r728 = zext i32 %r727 to i480
+%r729 = shl i480 %r728, 448
+%r730 = or i480 %r724, %r729
+%r731 = zext i480 %r730 to i512
+%r733 = getelementptr i32, i32* %r631, i32 15
+%r734 = load i32, i32* %r733
+%r735 = zext i32 %r734 to i512
+%r736 = shl i512 %r735, 480
+%r737 = or i512 %r731, %r736
+%r738 = zext i512 %r737 to i544
+%r740 = getelementptr i32, i32* %r631, i32 16
+%r741 = load i32, i32* %r740
+%r742 = zext i32 %r741 to i544
+%r743 = shl i544 %r742, 512
+%r744 = or i544 %r738, %r743
+%r745 = zext i544 %r744 to i576
+%r747 = getelementptr i32, i32* %r631, i32 17
+%r748 = load i32, i32* %r747
+%r749 = zext i32 %r748 to i576
+%r750 = shl i576 %r749, 544
+%r751 = or i576 %r745, %r750
+%r752 = zext i576 %r751 to i608
+%r754 = getelementptr i32, i32* %r631, i32 18
+%r755 = load i32, i32* %r754
+%r756 = zext i32 %r755 to i608
+%r757 = shl i608 %r756, 576
+%r758 = or i608 %r752, %r757
+%r759 = zext i608 %r758 to i640
+%r761 = getelementptr i32, i32* %r631, i32 19
+%r762 = load i32, i32* %r761
+%r763 = zext i32 %r762 to i640
+%r764 = shl i640 %r763, 608
+%r765 = or i640 %r759, %r764
+%r766 = zext i640 %r765 to i672
+%r768 = getelementptr i32, i32* %r631, i32 20
+%r769 = load i32, i32* %r768
+%r770 = zext i32 %r769 to i672
+%r771 = shl i672 %r770, 640
+%r772 = or i672 %r766, %r771
+%r773 = zext i672 %r772 to i704
+%r775 = getelementptr i32, i32* %r631, i32 21
+%r776 = load i32, i32* %r775
+%r777 = zext i32 %r776 to i704
+%r778 = shl i704 %r777, 672
+%r779 = or i704 %r773, %r778
+%r780 = zext i704 %r779 to i736
+%r782 = getelementptr i32, i32* %r631, i32 22
+%r783 = load i32, i32* %r782
+%r784 = zext i32 %r783 to i736
+%r785 = shl i736 %r784, 704
+%r786 = or i736 %r780, %r785
+%r787 = zext i736 %r786 to i768
+%r789 = getelementptr i32, i32* %r631, i32 23
+%r790 = load i32, i32* %r789
+%r791 = zext i32 %r790 to i768
+%r792 = shl i768 %r791, 736
+%r793 = or i768 %r787, %r792
+%r794 = add i768 %r629, %r793
+%r796 = getelementptr i32, i32* %r1, i32 8
+%r797 = trunc i768 %r794 to i32
+%r799 = getelementptr i32, i32* %r796, i32 0
+store i32 %r797, i32* %r799
+%r800 = lshr i768 %r794, 32
+%r801 = trunc i768 %r800 to i32
+%r803 = getelementptr i32, i32* %r796, i32 1
+store i32 %r801, i32* %r803
+%r804 = lshr i768 %r800, 32
+%r805 = trunc i768 %r804 to i32
+%r807 = getelementptr i32, i32* %r796, i32 2
+store i32 %r805, i32* %r807
+%r808 = lshr i768 %r804, 32
+%r809 = trunc i768 %r808 to i32
+%r811 = getelementptr i32, i32* %r796, i32 3
+store i32 %r809, i32* %r811
+%r812 = lshr i768 %r808, 32
+%r813 = trunc i768 %r812 to i32
+%r815 = getelementptr i32, i32* %r796, i32 4
+store i32 %r813, i32* %r815
+%r816 = lshr i768 %r812, 32
+%r817 = trunc i768 %r816 to i32
+%r819 = getelementptr i32, i32* %r796, i32 5
+store i32 %r817, i32* %r819
+%r820 = lshr i768 %r816, 32
+%r821 = trunc i768 %r820 to i32
+%r823 = getelementptr i32, i32* %r796, i32 6
+store i32 %r821, i32* %r823
+%r824 = lshr i768 %r820, 32
+%r825 = trunc i768 %r824 to i32
+%r827 = getelementptr i32, i32* %r796, i32 7
+store i32 %r825, i32* %r827
+%r828 = lshr i768 %r824, 32
+%r829 = trunc i768 %r828 to i32
+%r831 = getelementptr i32, i32* %r796, i32 8
+store i32 %r829, i32* %r831
+%r832 = lshr i768 %r828, 32
+%r833 = trunc i768 %r832 to i32
+%r835 = getelementptr i32, i32* %r796, i32 9
+store i32 %r833, i32* %r835
+%r836 = lshr i768 %r832, 32
+%r837 = trunc i768 %r836 to i32
+%r839 = getelementptr i32, i32* %r796, i32 10
+store i32 %r837, i32* %r839
+%r840 = lshr i768 %r836, 32
+%r841 = trunc i768 %r840 to i32
+%r843 = getelementptr i32, i32* %r796, i32 11
+store i32 %r841, i32* %r843
+%r844 = lshr i768 %r840, 32
+%r845 = trunc i768 %r844 to i32
+%r847 = getelementptr i32, i32* %r796, i32 12
+store i32 %r845, i32* %r847
+%r848 = lshr i768 %r844, 32
+%r849 = trunc i768 %r848 to i32
+%r851 = getelementptr i32, i32* %r796, i32 13
+store i32 %r849, i32* %r851
+%r852 = lshr i768 %r848, 32
+%r853 = trunc i768 %r852 to i32
+%r855 = getelementptr i32, i32* %r796, i32 14
+store i32 %r853, i32* %r855
+%r856 = lshr i768 %r852, 32
+%r857 = trunc i768 %r856 to i32
+%r859 = getelementptr i32, i32* %r796, i32 15
+store i32 %r857, i32* %r859
+%r860 = lshr i768 %r856, 32
+%r861 = trunc i768 %r860 to i32
+%r863 = getelementptr i32, i32* %r796, i32 16
+store i32 %r861, i32* %r863
+%r864 = lshr i768 %r860, 32
+%r865 = trunc i768 %r864 to i32
+%r867 = getelementptr i32, i32* %r796, i32 17
+store i32 %r865, i32* %r867
+%r868 = lshr i768 %r864, 32
+%r869 = trunc i768 %r868 to i32
+%r871 = getelementptr i32, i32* %r796, i32 18
+store i32 %r869, i32* %r871
+%r872 = lshr i768 %r868, 32
+%r873 = trunc i768 %r872 to i32
+%r875 = getelementptr i32, i32* %r796, i32 19
+store i32 %r873, i32* %r875
+%r876 = lshr i768 %r872, 32
+%r877 = trunc i768 %r876 to i32
+%r879 = getelementptr i32, i32* %r796, i32 20
+store i32 %r877, i32* %r879
+%r880 = lshr i768 %r876, 32
+%r881 = trunc i768 %r880 to i32
+%r883 = getelementptr i32, i32* %r796, i32 21
+store i32 %r881, i32* %r883
+%r884 = lshr i768 %r880, 32
+%r885 = trunc i768 %r884 to i32
+%r887 = getelementptr i32, i32* %r796, i32 22
+store i32 %r885, i32* %r887
+%r888 = lshr i768 %r884, 32
+%r889 = trunc i768 %r888 to i32
+%r891 = getelementptr i32, i32* %r796, i32 23
+store i32 %r889, i32* %r891
+ret void
+}
+define void @mcl_fpDbl_sqrPre16L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r4 = getelementptr i32, i32* %r2, i32 8
+%r6 = getelementptr i32, i32* %r2, i32 8
+%r8 = getelementptr i32, i32* %r1, i32 16
+call void @mcl_fpDbl_mulPre8L(i32* %r1, i32* %r2, i32* %r2)
+call void @mcl_fpDbl_mulPre8L(i32* %r8, i32* %r4, i32* %r6)
+%r9 = load i32, i32* %r4
+%r10 = zext i32 %r9 to i64
+%r12 = getelementptr i32, i32* %r4, i32 1
+%r13 = load i32, i32* %r12
+%r14 = zext i32 %r13 to i64
+%r15 = shl i64 %r14, 32
+%r16 = or i64 %r10, %r15
+%r17 = zext i64 %r16 to i96
+%r19 = getelementptr i32, i32* %r4, i32 2
+%r20 = load i32, i32* %r19
+%r21 = zext i32 %r20 to i96
+%r22 = shl i96 %r21, 64
+%r23 = or i96 %r17, %r22
+%r24 = zext i96 %r23 to i128
+%r26 = getelementptr i32, i32* %r4, i32 3
+%r27 = load i32, i32* %r26
+%r28 = zext i32 %r27 to i128
+%r29 = shl i128 %r28, 96
+%r30 = or i128 %r24, %r29
+%r31 = zext i128 %r30 to i160
+%r33 = getelementptr i32, i32* %r4, i32 4
+%r34 = load i32, i32* %r33
+%r35 = zext i32 %r34 to i160
+%r36 = shl i160 %r35, 128
+%r37 = or i160 %r31, %r36
+%r38 = zext i160 %r37 to i192
+%r40 = getelementptr i32, i32* %r4, i32 5
+%r41 = load i32, i32* %r40
+%r42 = zext i32 %r41 to i192
+%r43 = shl i192 %r42, 160
+%r44 = or i192 %r38, %r43
+%r45 = zext i192 %r44 to i224
+%r47 = getelementptr i32, i32* %r4, i32 6
+%r48 = load i32, i32* %r47
+%r49 = zext i32 %r48 to i224
+%r50 = shl i224 %r49, 192
+%r51 = or i224 %r45, %r50
+%r52 = zext i224 %r51 to i256
+%r54 = getelementptr i32, i32* %r4, i32 7
+%r55 = load i32, i32* %r54
+%r56 = zext i32 %r55 to i256
+%r57 = shl i256 %r56, 224
+%r58 = or i256 %r52, %r57
+%r59 = zext i256 %r58 to i288
+%r60 = load i32, i32* %r2
+%r61 = zext i32 %r60 to i64
+%r63 = getelementptr i32, i32* %r2, i32 1
+%r64 = load i32, i32* %r63
+%r65 = zext i32 %r64 to i64
+%r66 = shl i64 %r65, 32
+%r67 = or i64 %r61, %r66
+%r68 = zext i64 %r67 to i96
+%r70 = getelementptr i32, i32* %r2, i32 2
+%r71 = load i32, i32* %r70
+%r72 = zext i32 %r71 to i96
+%r73 = shl i96 %r72, 64
+%r74 = or i96 %r68, %r73
+%r75 = zext i96 %r74 to i128
+%r77 = getelementptr i32, i32* %r2, i32 3
+%r78 = load i32, i32* %r77
+%r79 = zext i32 %r78 to i128
+%r80 = shl i128 %r79, 96
+%r81 = or i128 %r75, %r80
+%r82 = zext i128 %r81 to i160
+%r84 = getelementptr i32, i32* %r2, i32 4
+%r85 = load i32, i32* %r84
+%r86 = zext i32 %r85 to i160
+%r87 = shl i160 %r86, 128
+%r88 = or i160 %r82, %r87
+%r89 = zext i160 %r88 to i192
+%r91 = getelementptr i32, i32* %r2, i32 5
+%r92 = load i32, i32* %r91
+%r93 = zext i32 %r92 to i192
+%r94 = shl i192 %r93, 160
+%r95 = or i192 %r89, %r94
+%r96 = zext i192 %r95 to i224
+%r98 = getelementptr i32, i32* %r2, i32 6
+%r99 = load i32, i32* %r98
+%r100 = zext i32 %r99 to i224
+%r101 = shl i224 %r100, 192
+%r102 = or i224 %r96, %r101
+%r103 = zext i224 %r102 to i256
+%r105 = getelementptr i32, i32* %r2, i32 7
+%r106 = load i32, i32* %r105
+%r107 = zext i32 %r106 to i256
+%r108 = shl i256 %r107, 224
+%r109 = or i256 %r103, %r108
+%r110 = zext i256 %r109 to i288
+%r111 = load i32, i32* %r6
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r6, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r6, i32 2
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r6, i32 3
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r6, i32 4
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r6, i32 5
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r6, i32 6
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r6, i32 7
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r162 = load i32, i32* %r2
+%r163 = zext i32 %r162 to i64
+%r165 = getelementptr i32, i32* %r2, i32 1
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i64
+%r168 = shl i64 %r167, 32
+%r169 = or i64 %r163, %r168
+%r170 = zext i64 %r169 to i96
+%r172 = getelementptr i32, i32* %r2, i32 2
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i96
+%r175 = shl i96 %r174, 64
+%r176 = or i96 %r170, %r175
+%r177 = zext i96 %r176 to i128
+%r179 = getelementptr i32, i32* %r2, i32 3
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i128
+%r182 = shl i128 %r181, 96
+%r183 = or i128 %r177, %r182
+%r184 = zext i128 %r183 to i160
+%r186 = getelementptr i32, i32* %r2, i32 4
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i160
+%r189 = shl i160 %r188, 128
+%r190 = or i160 %r184, %r189
+%r191 = zext i160 %r190 to i192
+%r193 = getelementptr i32, i32* %r2, i32 5
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i192
+%r196 = shl i192 %r195, 160
+%r197 = or i192 %r191, %r196
+%r198 = zext i192 %r197 to i224
+%r200 = getelementptr i32, i32* %r2, i32 6
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i224
+%r203 = shl i224 %r202, 192
+%r204 = or i224 %r198, %r203
+%r205 = zext i224 %r204 to i256
+%r207 = getelementptr i32, i32* %r2, i32 7
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i256
+%r210 = shl i256 %r209, 224
+%r211 = or i256 %r205, %r210
+%r212 = zext i256 %r211 to i288
+%r213 = add i288 %r59, %r110
+%r214 = add i288 %r161, %r212
+%r216 = alloca i32, i32 16
+%r217 = trunc i288 %r213 to i256
+%r218 = trunc i288 %r214 to i256
+%r219 = lshr i288 %r213, 256
+%r220 = trunc i288 %r219 to i1
+%r221 = lshr i288 %r214, 256
+%r222 = trunc i288 %r221 to i1
+%r223 = and i1 %r220, %r222
+%r225 = select i1 %r220, i256 %r218, i256 0
+%r227 = select i1 %r222, i256 %r217, i256 0
+%r229 = alloca i32, i32 8
+%r231 = alloca i32, i32 8
+%r232 = trunc i256 %r217 to i32
+%r234 = getelementptr i32, i32* %r229, i32 0
+store i32 %r232, i32* %r234
+%r235 = lshr i256 %r217, 32
+%r236 = trunc i256 %r235 to i32
+%r238 = getelementptr i32, i32* %r229, i32 1
+store i32 %r236, i32* %r238
+%r239 = lshr i256 %r235, 32
+%r240 = trunc i256 %r239 to i32
+%r242 = getelementptr i32, i32* %r229, i32 2
+store i32 %r240, i32* %r242
+%r243 = lshr i256 %r239, 32
+%r244 = trunc i256 %r243 to i32
+%r246 = getelementptr i32, i32* %r229, i32 3
+store i32 %r244, i32* %r246
+%r247 = lshr i256 %r243, 32
+%r248 = trunc i256 %r247 to i32
+%r250 = getelementptr i32, i32* %r229, i32 4
+store i32 %r248, i32* %r250
+%r251 = lshr i256 %r247, 32
+%r252 = trunc i256 %r251 to i32
+%r254 = getelementptr i32, i32* %r229, i32 5
+store i32 %r252, i32* %r254
+%r255 = lshr i256 %r251, 32
+%r256 = trunc i256 %r255 to i32
+%r258 = getelementptr i32, i32* %r229, i32 6
+store i32 %r256, i32* %r258
+%r259 = lshr i256 %r255, 32
+%r260 = trunc i256 %r259 to i32
+%r262 = getelementptr i32, i32* %r229, i32 7
+store i32 %r260, i32* %r262
+%r263 = trunc i256 %r218 to i32
+%r265 = getelementptr i32, i32* %r231, i32 0
+store i32 %r263, i32* %r265
+%r266 = lshr i256 %r218, 32
+%r267 = trunc i256 %r266 to i32
+%r269 = getelementptr i32, i32* %r231, i32 1
+store i32 %r267, i32* %r269
+%r270 = lshr i256 %r266, 32
+%r271 = trunc i256 %r270 to i32
+%r273 = getelementptr i32, i32* %r231, i32 2
+store i32 %r271, i32* %r273
+%r274 = lshr i256 %r270, 32
+%r275 = trunc i256 %r274 to i32
+%r277 = getelementptr i32, i32* %r231, i32 3
+store i32 %r275, i32* %r277
+%r278 = lshr i256 %r274, 32
+%r279 = trunc i256 %r278 to i32
+%r281 = getelementptr i32, i32* %r231, i32 4
+store i32 %r279, i32* %r281
+%r282 = lshr i256 %r278, 32
+%r283 = trunc i256 %r282 to i32
+%r285 = getelementptr i32, i32* %r231, i32 5
+store i32 %r283, i32* %r285
+%r286 = lshr i256 %r282, 32
+%r287 = trunc i256 %r286 to i32
+%r289 = getelementptr i32, i32* %r231, i32 6
+store i32 %r287, i32* %r289
+%r290 = lshr i256 %r286, 32
+%r291 = trunc i256 %r290 to i32
+%r293 = getelementptr i32, i32* %r231, i32 7
+store i32 %r291, i32* %r293
+call void @mcl_fpDbl_mulPre8L(i32* %r216, i32* %r229, i32* %r231)
+%r294 = load i32, i32* %r216
+%r295 = zext i32 %r294 to i64
+%r297 = getelementptr i32, i32* %r216, i32 1
+%r298 = load i32, i32* %r297
+%r299 = zext i32 %r298 to i64
+%r300 = shl i64 %r299, 32
+%r301 = or i64 %r295, %r300
+%r302 = zext i64 %r301 to i96
+%r304 = getelementptr i32, i32* %r216, i32 2
+%r305 = load i32, i32* %r304
+%r306 = zext i32 %r305 to i96
+%r307 = shl i96 %r306, 64
+%r308 = or i96 %r302, %r307
+%r309 = zext i96 %r308 to i128
+%r311 = getelementptr i32, i32* %r216, i32 3
+%r312 = load i32, i32* %r311
+%r313 = zext i32 %r312 to i128
+%r314 = shl i128 %r313, 96
+%r315 = or i128 %r309, %r314
+%r316 = zext i128 %r315 to i160
+%r318 = getelementptr i32, i32* %r216, i32 4
+%r319 = load i32, i32* %r318
+%r320 = zext i32 %r319 to i160
+%r321 = shl i160 %r320, 128
+%r322 = or i160 %r316, %r321
+%r323 = zext i160 %r322 to i192
+%r325 = getelementptr i32, i32* %r216, i32 5
+%r326 = load i32, i32* %r325
+%r327 = zext i32 %r326 to i192
+%r328 = shl i192 %r327, 160
+%r329 = or i192 %r323, %r328
+%r330 = zext i192 %r329 to i224
+%r332 = getelementptr i32, i32* %r216, i32 6
+%r333 = load i32, i32* %r332
+%r334 = zext i32 %r333 to i224
+%r335 = shl i224 %r334, 192
+%r336 = or i224 %r330, %r335
+%r337 = zext i224 %r336 to i256
+%r339 = getelementptr i32, i32* %r216, i32 7
+%r340 = load i32, i32* %r339
+%r341 = zext i32 %r340 to i256
+%r342 = shl i256 %r341, 224
+%r343 = or i256 %r337, %r342
+%r344 = zext i256 %r343 to i288
+%r346 = getelementptr i32, i32* %r216, i32 8
+%r347 = load i32, i32* %r346
+%r348 = zext i32 %r347 to i288
+%r349 = shl i288 %r348, 256
+%r350 = or i288 %r344, %r349
+%r351 = zext i288 %r350 to i320
+%r353 = getelementptr i32, i32* %r216, i32 9
+%r354 = load i32, i32* %r353
+%r355 = zext i32 %r354 to i320
+%r356 = shl i320 %r355, 288
+%r357 = or i320 %r351, %r356
+%r358 = zext i320 %r357 to i352
+%r360 = getelementptr i32, i32* %r216, i32 10
+%r361 = load i32, i32* %r360
+%r362 = zext i32 %r361 to i352
+%r363 = shl i352 %r362, 320
+%r364 = or i352 %r358, %r363
+%r365 = zext i352 %r364 to i384
+%r367 = getelementptr i32, i32* %r216, i32 11
+%r368 = load i32, i32* %r367
+%r369 = zext i32 %r368 to i384
+%r370 = shl i384 %r369, 352
+%r371 = or i384 %r365, %r370
+%r372 = zext i384 %r371 to i416
+%r374 = getelementptr i32, i32* %r216, i32 12
+%r375 = load i32, i32* %r374
+%r376 = zext i32 %r375 to i416
+%r377 = shl i416 %r376, 384
+%r378 = or i416 %r372, %r377
+%r379 = zext i416 %r378 to i448
+%r381 = getelementptr i32, i32* %r216, i32 13
+%r382 = load i32, i32* %r381
+%r383 = zext i32 %r382 to i448
+%r384 = shl i448 %r383, 416
+%r385 = or i448 %r379, %r384
+%r386 = zext i448 %r385 to i480
+%r388 = getelementptr i32, i32* %r216, i32 14
+%r389 = load i32, i32* %r388
+%r390 = zext i32 %r389 to i480
+%r391 = shl i480 %r390, 448
+%r392 = or i480 %r386, %r391
+%r393 = zext i480 %r392 to i512
+%r395 = getelementptr i32, i32* %r216, i32 15
+%r396 = load i32, i32* %r395
+%r397 = zext i32 %r396 to i512
+%r398 = shl i512 %r397, 480
+%r399 = or i512 %r393, %r398
+%r400 = zext i512 %r399 to i544
+%r401 = zext i1 %r223 to i544
+%r402 = shl i544 %r401, 512
+%r403 = or i544 %r400, %r402
+%r404 = zext i256 %r225 to i544
+%r405 = zext i256 %r227 to i544
+%r406 = shl i544 %r404, 256
+%r407 = shl i544 %r405, 256
+%r408 = add i544 %r403, %r406
+%r409 = add i544 %r408, %r407
+%r410 = load i32, i32* %r1
+%r411 = zext i32 %r410 to i64
+%r413 = getelementptr i32, i32* %r1, i32 1
+%r414 = load i32, i32* %r413
+%r415 = zext i32 %r414 to i64
+%r416 = shl i64 %r415, 32
+%r417 = or i64 %r411, %r416
+%r418 = zext i64 %r417 to i96
+%r420 = getelementptr i32, i32* %r1, i32 2
+%r421 = load i32, i32* %r420
+%r422 = zext i32 %r421 to i96
+%r423 = shl i96 %r422, 64
+%r424 = or i96 %r418, %r423
+%r425 = zext i96 %r424 to i128
+%r427 = getelementptr i32, i32* %r1, i32 3
+%r428 = load i32, i32* %r427
+%r429 = zext i32 %r428 to i128
+%r430 = shl i128 %r429, 96
+%r431 = or i128 %r425, %r430
+%r432 = zext i128 %r431 to i160
+%r434 = getelementptr i32, i32* %r1, i32 4
+%r435 = load i32, i32* %r434
+%r436 = zext i32 %r435 to i160
+%r437 = shl i160 %r436, 128
+%r438 = or i160 %r432, %r437
+%r439 = zext i160 %r438 to i192
+%r441 = getelementptr i32, i32* %r1, i32 5
+%r442 = load i32, i32* %r441
+%r443 = zext i32 %r442 to i192
+%r444 = shl i192 %r443, 160
+%r445 = or i192 %r439, %r444
+%r446 = zext i192 %r445 to i224
+%r448 = getelementptr i32, i32* %r1, i32 6
+%r449 = load i32, i32* %r448
+%r450 = zext i32 %r449 to i224
+%r451 = shl i224 %r450, 192
+%r452 = or i224 %r446, %r451
+%r453 = zext i224 %r452 to i256
+%r455 = getelementptr i32, i32* %r1, i32 7
+%r456 = load i32, i32* %r455
+%r457 = zext i32 %r456 to i256
+%r458 = shl i256 %r457, 224
+%r459 = or i256 %r453, %r458
+%r460 = zext i256 %r459 to i288
+%r462 = getelementptr i32, i32* %r1, i32 8
+%r463 = load i32, i32* %r462
+%r464 = zext i32 %r463 to i288
+%r465 = shl i288 %r464, 256
+%r466 = or i288 %r460, %r465
+%r467 = zext i288 %r466 to i320
+%r469 = getelementptr i32, i32* %r1, i32 9
+%r470 = load i32, i32* %r469
+%r471 = zext i32 %r470 to i320
+%r472 = shl i320 %r471, 288
+%r473 = or i320 %r467, %r472
+%r474 = zext i320 %r473 to i352
+%r476 = getelementptr i32, i32* %r1, i32 10
+%r477 = load i32, i32* %r476
+%r478 = zext i32 %r477 to i352
+%r479 = shl i352 %r478, 320
+%r480 = or i352 %r474, %r479
+%r481 = zext i352 %r480 to i384
+%r483 = getelementptr i32, i32* %r1, i32 11
+%r484 = load i32, i32* %r483
+%r485 = zext i32 %r484 to i384
+%r486 = shl i384 %r485, 352
+%r487 = or i384 %r481, %r486
+%r488 = zext i384 %r487 to i416
+%r490 = getelementptr i32, i32* %r1, i32 12
+%r491 = load i32, i32* %r490
+%r492 = zext i32 %r491 to i416
+%r493 = shl i416 %r492, 384
+%r494 = or i416 %r488, %r493
+%r495 = zext i416 %r494 to i448
+%r497 = getelementptr i32, i32* %r1, i32 13
+%r498 = load i32, i32* %r497
+%r499 = zext i32 %r498 to i448
+%r500 = shl i448 %r499, 416
+%r501 = or i448 %r495, %r500
+%r502 = zext i448 %r501 to i480
+%r504 = getelementptr i32, i32* %r1, i32 14
+%r505 = load i32, i32* %r504
+%r506 = zext i32 %r505 to i480
+%r507 = shl i480 %r506, 448
+%r508 = or i480 %r502, %r507
+%r509 = zext i480 %r508 to i512
+%r511 = getelementptr i32, i32* %r1, i32 15
+%r512 = load i32, i32* %r511
+%r513 = zext i32 %r512 to i512
+%r514 = shl i512 %r513, 480
+%r515 = or i512 %r509, %r514
+%r516 = zext i512 %r515 to i544
+%r517 = sub i544 %r409, %r516
+%r519 = getelementptr i32, i32* %r1, i32 16
+%r520 = load i32, i32* %r519
+%r521 = zext i32 %r520 to i64
+%r523 = getelementptr i32, i32* %r519, i32 1
+%r524 = load i32, i32* %r523
+%r525 = zext i32 %r524 to i64
+%r526 = shl i64 %r525, 32
+%r527 = or i64 %r521, %r526
+%r528 = zext i64 %r527 to i96
+%r530 = getelementptr i32, i32* %r519, i32 2
+%r531 = load i32, i32* %r530
+%r532 = zext i32 %r531 to i96
+%r533 = shl i96 %r532, 64
+%r534 = or i96 %r528, %r533
+%r535 = zext i96 %r534 to i128
+%r537 = getelementptr i32, i32* %r519, i32 3
+%r538 = load i32, i32* %r537
+%r539 = zext i32 %r538 to i128
+%r540 = shl i128 %r539, 96
+%r541 = or i128 %r535, %r540
+%r542 = zext i128 %r541 to i160
+%r544 = getelementptr i32, i32* %r519, i32 4
+%r545 = load i32, i32* %r544
+%r546 = zext i32 %r545 to i160
+%r547 = shl i160 %r546, 128
+%r548 = or i160 %r542, %r547
+%r549 = zext i160 %r548 to i192
+%r551 = getelementptr i32, i32* %r519, i32 5
+%r552 = load i32, i32* %r551
+%r553 = zext i32 %r552 to i192
+%r554 = shl i192 %r553, 160
+%r555 = or i192 %r549, %r554
+%r556 = zext i192 %r555 to i224
+%r558 = getelementptr i32, i32* %r519, i32 6
+%r559 = load i32, i32* %r558
+%r560 = zext i32 %r559 to i224
+%r561 = shl i224 %r560, 192
+%r562 = or i224 %r556, %r561
+%r563 = zext i224 %r562 to i256
+%r565 = getelementptr i32, i32* %r519, i32 7
+%r566 = load i32, i32* %r565
+%r567 = zext i32 %r566 to i256
+%r568 = shl i256 %r567, 224
+%r569 = or i256 %r563, %r568
+%r570 = zext i256 %r569 to i288
+%r572 = getelementptr i32, i32* %r519, i32 8
+%r573 = load i32, i32* %r572
+%r574 = zext i32 %r573 to i288
+%r575 = shl i288 %r574, 256
+%r576 = or i288 %r570, %r575
+%r577 = zext i288 %r576 to i320
+%r579 = getelementptr i32, i32* %r519, i32 9
+%r580 = load i32, i32* %r579
+%r581 = zext i32 %r580 to i320
+%r582 = shl i320 %r581, 288
+%r583 = or i320 %r577, %r582
+%r584 = zext i320 %r583 to i352
+%r586 = getelementptr i32, i32* %r519, i32 10
+%r587 = load i32, i32* %r586
+%r588 = zext i32 %r587 to i352
+%r589 = shl i352 %r588, 320
+%r590 = or i352 %r584, %r589
+%r591 = zext i352 %r590 to i384
+%r593 = getelementptr i32, i32* %r519, i32 11
+%r594 = load i32, i32* %r593
+%r595 = zext i32 %r594 to i384
+%r596 = shl i384 %r595, 352
+%r597 = or i384 %r591, %r596
+%r598 = zext i384 %r597 to i416
+%r600 = getelementptr i32, i32* %r519, i32 12
+%r601 = load i32, i32* %r600
+%r602 = zext i32 %r601 to i416
+%r603 = shl i416 %r602, 384
+%r604 = or i416 %r598, %r603
+%r605 = zext i416 %r604 to i448
+%r607 = getelementptr i32, i32* %r519, i32 13
+%r608 = load i32, i32* %r607
+%r609 = zext i32 %r608 to i448
+%r610 = shl i448 %r609, 416
+%r611 = or i448 %r605, %r610
+%r612 = zext i448 %r611 to i480
+%r614 = getelementptr i32, i32* %r519, i32 14
+%r615 = load i32, i32* %r614
+%r616 = zext i32 %r615 to i480
+%r617 = shl i480 %r616, 448
+%r618 = or i480 %r612, %r617
+%r619 = zext i480 %r618 to i512
+%r621 = getelementptr i32, i32* %r519, i32 15
+%r622 = load i32, i32* %r621
+%r623 = zext i32 %r622 to i512
+%r624 = shl i512 %r623, 480
+%r625 = or i512 %r619, %r624
+%r626 = zext i512 %r625 to i544
+%r627 = sub i544 %r517, %r626
+%r628 = zext i544 %r627 to i768
+%r630 = getelementptr i32, i32* %r1, i32 8
+%r631 = load i32, i32* %r630
+%r632 = zext i32 %r631 to i64
+%r634 = getelementptr i32, i32* %r630, i32 1
+%r635 = load i32, i32* %r634
+%r636 = zext i32 %r635 to i64
+%r637 = shl i64 %r636, 32
+%r638 = or i64 %r632, %r637
+%r639 = zext i64 %r638 to i96
+%r641 = getelementptr i32, i32* %r630, i32 2
+%r642 = load i32, i32* %r641
+%r643 = zext i32 %r642 to i96
+%r644 = shl i96 %r643, 64
+%r645 = or i96 %r639, %r644
+%r646 = zext i96 %r645 to i128
+%r648 = getelementptr i32, i32* %r630, i32 3
+%r649 = load i32, i32* %r648
+%r650 = zext i32 %r649 to i128
+%r651 = shl i128 %r650, 96
+%r652 = or i128 %r646, %r651
+%r653 = zext i128 %r652 to i160
+%r655 = getelementptr i32, i32* %r630, i32 4
+%r656 = load i32, i32* %r655
+%r657 = zext i32 %r656 to i160
+%r658 = shl i160 %r657, 128
+%r659 = or i160 %r653, %r658
+%r660 = zext i160 %r659 to i192
+%r662 = getelementptr i32, i32* %r630, i32 5
+%r663 = load i32, i32* %r662
+%r664 = zext i32 %r663 to i192
+%r665 = shl i192 %r664, 160
+%r666 = or i192 %r660, %r665
+%r667 = zext i192 %r666 to i224
+%r669 = getelementptr i32, i32* %r630, i32 6
+%r670 = load i32, i32* %r669
+%r671 = zext i32 %r670 to i224
+%r672 = shl i224 %r671, 192
+%r673 = or i224 %r667, %r672
+%r674 = zext i224 %r673 to i256
+%r676 = getelementptr i32, i32* %r630, i32 7
+%r677 = load i32, i32* %r676
+%r678 = zext i32 %r677 to i256
+%r679 = shl i256 %r678, 224
+%r680 = or i256 %r674, %r679
+%r681 = zext i256 %r680 to i288
+%r683 = getelementptr i32, i32* %r630, i32 8
+%r684 = load i32, i32* %r683
+%r685 = zext i32 %r684 to i288
+%r686 = shl i288 %r685, 256
+%r687 = or i288 %r681, %r686
+%r688 = zext i288 %r687 to i320
+%r690 = getelementptr i32, i32* %r630, i32 9
+%r691 = load i32, i32* %r690
+%r692 = zext i32 %r691 to i320
+%r693 = shl i320 %r692, 288
+%r694 = or i320 %r688, %r693
+%r695 = zext i320 %r694 to i352
+%r697 = getelementptr i32, i32* %r630, i32 10
+%r698 = load i32, i32* %r697
+%r699 = zext i32 %r698 to i352
+%r700 = shl i352 %r699, 320
+%r701 = or i352 %r695, %r700
+%r702 = zext i352 %r701 to i384
+%r704 = getelementptr i32, i32* %r630, i32 11
+%r705 = load i32, i32* %r704
+%r706 = zext i32 %r705 to i384
+%r707 = shl i384 %r706, 352
+%r708 = or i384 %r702, %r707
+%r709 = zext i384 %r708 to i416
+%r711 = getelementptr i32, i32* %r630, i32 12
+%r712 = load i32, i32* %r711
+%r713 = zext i32 %r712 to i416
+%r714 = shl i416 %r713, 384
+%r715 = or i416 %r709, %r714
+%r716 = zext i416 %r715 to i448
+%r718 = getelementptr i32, i32* %r630, i32 13
+%r719 = load i32, i32* %r718
+%r720 = zext i32 %r719 to i448
+%r721 = shl i448 %r720, 416
+%r722 = or i448 %r716, %r721
+%r723 = zext i448 %r722 to i480
+%r725 = getelementptr i32, i32* %r630, i32 14
+%r726 = load i32, i32* %r725
+%r727 = zext i32 %r726 to i480
+%r728 = shl i480 %r727, 448
+%r729 = or i480 %r723, %r728
+%r730 = zext i480 %r729 to i512
+%r732 = getelementptr i32, i32* %r630, i32 15
+%r733 = load i32, i32* %r732
+%r734 = zext i32 %r733 to i512
+%r735 = shl i512 %r734, 480
+%r736 = or i512 %r730, %r735
+%r737 = zext i512 %r736 to i544
+%r739 = getelementptr i32, i32* %r630, i32 16
+%r740 = load i32, i32* %r739
+%r741 = zext i32 %r740 to i544
+%r742 = shl i544 %r741, 512
+%r743 = or i544 %r737, %r742
+%r744 = zext i544 %r743 to i576
+%r746 = getelementptr i32, i32* %r630, i32 17
+%r747 = load i32, i32* %r746
+%r748 = zext i32 %r747 to i576
+%r749 = shl i576 %r748, 544
+%r750 = or i576 %r744, %r749
+%r751 = zext i576 %r750 to i608
+%r753 = getelementptr i32, i32* %r630, i32 18
+%r754 = load i32, i32* %r753
+%r755 = zext i32 %r754 to i608
+%r756 = shl i608 %r755, 576
+%r757 = or i608 %r751, %r756
+%r758 = zext i608 %r757 to i640
+%r760 = getelementptr i32, i32* %r630, i32 19
+%r761 = load i32, i32* %r760
+%r762 = zext i32 %r761 to i640
+%r763 = shl i640 %r762, 608
+%r764 = or i640 %r758, %r763
+%r765 = zext i640 %r764 to i672
+%r767 = getelementptr i32, i32* %r630, i32 20
+%r768 = load i32, i32* %r767
+%r769 = zext i32 %r768 to i672
+%r770 = shl i672 %r769, 640
+%r771 = or i672 %r765, %r770
+%r772 = zext i672 %r771 to i704
+%r774 = getelementptr i32, i32* %r630, i32 21
+%r775 = load i32, i32* %r774
+%r776 = zext i32 %r775 to i704
+%r777 = shl i704 %r776, 672
+%r778 = or i704 %r772, %r777
+%r779 = zext i704 %r778 to i736
+%r781 = getelementptr i32, i32* %r630, i32 22
+%r782 = load i32, i32* %r781
+%r783 = zext i32 %r782 to i736
+%r784 = shl i736 %r783, 704
+%r785 = or i736 %r779, %r784
+%r786 = zext i736 %r785 to i768
+%r788 = getelementptr i32, i32* %r630, i32 23
+%r789 = load i32, i32* %r788
+%r790 = zext i32 %r789 to i768
+%r791 = shl i768 %r790, 736
+%r792 = or i768 %r786, %r791
+%r793 = add i768 %r628, %r792
+%r795 = getelementptr i32, i32* %r1, i32 8
+%r796 = trunc i768 %r793 to i32
+%r798 = getelementptr i32, i32* %r795, i32 0
+store i32 %r796, i32* %r798
+%r799 = lshr i768 %r793, 32
+%r800 = trunc i768 %r799 to i32
+%r802 = getelementptr i32, i32* %r795, i32 1
+store i32 %r800, i32* %r802
+%r803 = lshr i768 %r799, 32
+%r804 = trunc i768 %r803 to i32
+%r806 = getelementptr i32, i32* %r795, i32 2
+store i32 %r804, i32* %r806
+%r807 = lshr i768 %r803, 32
+%r808 = trunc i768 %r807 to i32
+%r810 = getelementptr i32, i32* %r795, i32 3
+store i32 %r808, i32* %r810
+%r811 = lshr i768 %r807, 32
+%r812 = trunc i768 %r811 to i32
+%r814 = getelementptr i32, i32* %r795, i32 4
+store i32 %r812, i32* %r814
+%r815 = lshr i768 %r811, 32
+%r816 = trunc i768 %r815 to i32
+%r818 = getelementptr i32, i32* %r795, i32 5
+store i32 %r816, i32* %r818
+%r819 = lshr i768 %r815, 32
+%r820 = trunc i768 %r819 to i32
+%r822 = getelementptr i32, i32* %r795, i32 6
+store i32 %r820, i32* %r822
+%r823 = lshr i768 %r819, 32
+%r824 = trunc i768 %r823 to i32
+%r826 = getelementptr i32, i32* %r795, i32 7
+store i32 %r824, i32* %r826
+%r827 = lshr i768 %r823, 32
+%r828 = trunc i768 %r827 to i32
+%r830 = getelementptr i32, i32* %r795, i32 8
+store i32 %r828, i32* %r830
+%r831 = lshr i768 %r827, 32
+%r832 = trunc i768 %r831 to i32
+%r834 = getelementptr i32, i32* %r795, i32 9
+store i32 %r832, i32* %r834
+%r835 = lshr i768 %r831, 32
+%r836 = trunc i768 %r835 to i32
+%r838 = getelementptr i32, i32* %r795, i32 10
+store i32 %r836, i32* %r838
+%r839 = lshr i768 %r835, 32
+%r840 = trunc i768 %r839 to i32
+%r842 = getelementptr i32, i32* %r795, i32 11
+store i32 %r840, i32* %r842
+%r843 = lshr i768 %r839, 32
+%r844 = trunc i768 %r843 to i32
+%r846 = getelementptr i32, i32* %r795, i32 12
+store i32 %r844, i32* %r846
+%r847 = lshr i768 %r843, 32
+%r848 = trunc i768 %r847 to i32
+%r850 = getelementptr i32, i32* %r795, i32 13
+store i32 %r848, i32* %r850
+%r851 = lshr i768 %r847, 32
+%r852 = trunc i768 %r851 to i32
+%r854 = getelementptr i32, i32* %r795, i32 14
+store i32 %r852, i32* %r854
+%r855 = lshr i768 %r851, 32
+%r856 = trunc i768 %r855 to i32
+%r858 = getelementptr i32, i32* %r795, i32 15
+store i32 %r856, i32* %r858
+%r859 = lshr i768 %r855, 32
+%r860 = trunc i768 %r859 to i32
+%r862 = getelementptr i32, i32* %r795, i32 16
+store i32 %r860, i32* %r862
+%r863 = lshr i768 %r859, 32
+%r864 = trunc i768 %r863 to i32
+%r866 = getelementptr i32, i32* %r795, i32 17
+store i32 %r864, i32* %r866
+%r867 = lshr i768 %r863, 32
+%r868 = trunc i768 %r867 to i32
+%r870 = getelementptr i32, i32* %r795, i32 18
+store i32 %r868, i32* %r870
+%r871 = lshr i768 %r867, 32
+%r872 = trunc i768 %r871 to i32
+%r874 = getelementptr i32, i32* %r795, i32 19
+store i32 %r872, i32* %r874
+%r875 = lshr i768 %r871, 32
+%r876 = trunc i768 %r875 to i32
+%r878 = getelementptr i32, i32* %r795, i32 20
+store i32 %r876, i32* %r878
+%r879 = lshr i768 %r875, 32
+%r880 = trunc i768 %r879 to i32
+%r882 = getelementptr i32, i32* %r795, i32 21
+store i32 %r880, i32* %r882
+%r883 = lshr i768 %r879, 32
+%r884 = trunc i768 %r883 to i32
+%r886 = getelementptr i32, i32* %r795, i32 22
+store i32 %r884, i32* %r886
+%r887 = lshr i768 %r883, 32
+%r888 = trunc i768 %r887 to i32
+%r890 = getelementptr i32, i32* %r795, i32 23
+store i32 %r888, i32* %r890
+ret void
+}
+define void @mcl_fp_mont16L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i544 @mulPv512x32(i32* %r2, i32 %r10)
+%r12 = zext i544 %r11 to i576
+%r13 = trunc i544 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i544 @mulPv512x32(i32* %r4, i32 %r14)
+%r16 = zext i544 %r15 to i576
+%r17 = add i576 %r12, %r16
+%r18 = lshr i576 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i544 @mulPv512x32(i32* %r2, i32 %r21)
+%r23 = zext i544 %r22 to i576
+%r24 = add i576 %r18, %r23
+%r25 = trunc i576 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i544 @mulPv512x32(i32* %r4, i32 %r26)
+%r28 = zext i544 %r27 to i576
+%r29 = add i576 %r24, %r28
+%r30 = lshr i576 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i544 @mulPv512x32(i32* %r2, i32 %r33)
+%r35 = zext i544 %r34 to i576
+%r36 = add i576 %r30, %r35
+%r37 = trunc i576 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i544 @mulPv512x32(i32* %r4, i32 %r38)
+%r40 = zext i544 %r39 to i576
+%r41 = add i576 %r36, %r40
+%r42 = lshr i576 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i544 @mulPv512x32(i32* %r2, i32 %r45)
+%r47 = zext i544 %r46 to i576
+%r48 = add i576 %r42, %r47
+%r49 = trunc i576 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i544 @mulPv512x32(i32* %r4, i32 %r50)
+%r52 = zext i544 %r51 to i576
+%r53 = add i576 %r48, %r52
+%r54 = lshr i576 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i544 @mulPv512x32(i32* %r2, i32 %r57)
+%r59 = zext i544 %r58 to i576
+%r60 = add i576 %r54, %r59
+%r61 = trunc i576 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i544 @mulPv512x32(i32* %r4, i32 %r62)
+%r64 = zext i544 %r63 to i576
+%r65 = add i576 %r60, %r64
+%r66 = lshr i576 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i544 @mulPv512x32(i32* %r2, i32 %r69)
+%r71 = zext i544 %r70 to i576
+%r72 = add i576 %r66, %r71
+%r73 = trunc i576 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i544 @mulPv512x32(i32* %r4, i32 %r74)
+%r76 = zext i544 %r75 to i576
+%r77 = add i576 %r72, %r76
+%r78 = lshr i576 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i544 @mulPv512x32(i32* %r2, i32 %r81)
+%r83 = zext i544 %r82 to i576
+%r84 = add i576 %r78, %r83
+%r85 = trunc i576 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i544 @mulPv512x32(i32* %r4, i32 %r86)
+%r88 = zext i544 %r87 to i576
+%r89 = add i576 %r84, %r88
+%r90 = lshr i576 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i544 @mulPv512x32(i32* %r2, i32 %r93)
+%r95 = zext i544 %r94 to i576
+%r96 = add i576 %r90, %r95
+%r97 = trunc i576 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i544 @mulPv512x32(i32* %r4, i32 %r98)
+%r100 = zext i544 %r99 to i576
+%r101 = add i576 %r96, %r100
+%r102 = lshr i576 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i544 @mulPv512x32(i32* %r2, i32 %r105)
+%r107 = zext i544 %r106 to i576
+%r108 = add i576 %r102, %r107
+%r109 = trunc i576 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i544 @mulPv512x32(i32* %r4, i32 %r110)
+%r112 = zext i544 %r111 to i576
+%r113 = add i576 %r108, %r112
+%r114 = lshr i576 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 9
+%r117 = load i32, i32* %r116
+%r118 = call i544 @mulPv512x32(i32* %r2, i32 %r117)
+%r119 = zext i544 %r118 to i576
+%r120 = add i576 %r114, %r119
+%r121 = trunc i576 %r120 to i32
+%r122 = mul i32 %r121, %r7
+%r123 = call i544 @mulPv512x32(i32* %r4, i32 %r122)
+%r124 = zext i544 %r123 to i576
+%r125 = add i576 %r120, %r124
+%r126 = lshr i576 %r125, 32
+%r128 = getelementptr i32, i32* %r3, i32 10
+%r129 = load i32, i32* %r128
+%r130 = call i544 @mulPv512x32(i32* %r2, i32 %r129)
+%r131 = zext i544 %r130 to i576
+%r132 = add i576 %r126, %r131
+%r133 = trunc i576 %r132 to i32
+%r134 = mul i32 %r133, %r7
+%r135 = call i544 @mulPv512x32(i32* %r4, i32 %r134)
+%r136 = zext i544 %r135 to i576
+%r137 = add i576 %r132, %r136
+%r138 = lshr i576 %r137, 32
+%r140 = getelementptr i32, i32* %r3, i32 11
+%r141 = load i32, i32* %r140
+%r142 = call i544 @mulPv512x32(i32* %r2, i32 %r141)
+%r143 = zext i544 %r142 to i576
+%r144 = add i576 %r138, %r143
+%r145 = trunc i576 %r144 to i32
+%r146 = mul i32 %r145, %r7
+%r147 = call i544 @mulPv512x32(i32* %r4, i32 %r146)
+%r148 = zext i544 %r147 to i576
+%r149 = add i576 %r144, %r148
+%r150 = lshr i576 %r149, 32
+%r152 = getelementptr i32, i32* %r3, i32 12
+%r153 = load i32, i32* %r152
+%r154 = call i544 @mulPv512x32(i32* %r2, i32 %r153)
+%r155 = zext i544 %r154 to i576
+%r156 = add i576 %r150, %r155
+%r157 = trunc i576 %r156 to i32
+%r158 = mul i32 %r157, %r7
+%r159 = call i544 @mulPv512x32(i32* %r4, i32 %r158)
+%r160 = zext i544 %r159 to i576
+%r161 = add i576 %r156, %r160
+%r162 = lshr i576 %r161, 32
+%r164 = getelementptr i32, i32* %r3, i32 13
+%r165 = load i32, i32* %r164
+%r166 = call i544 @mulPv512x32(i32* %r2, i32 %r165)
+%r167 = zext i544 %r166 to i576
+%r168 = add i576 %r162, %r167
+%r169 = trunc i576 %r168 to i32
+%r170 = mul i32 %r169, %r7
+%r171 = call i544 @mulPv512x32(i32* %r4, i32 %r170)
+%r172 = zext i544 %r171 to i576
+%r173 = add i576 %r168, %r172
+%r174 = lshr i576 %r173, 32
+%r176 = getelementptr i32, i32* %r3, i32 14
+%r177 = load i32, i32* %r176
+%r178 = call i544 @mulPv512x32(i32* %r2, i32 %r177)
+%r179 = zext i544 %r178 to i576
+%r180 = add i576 %r174, %r179
+%r181 = trunc i576 %r180 to i32
+%r182 = mul i32 %r181, %r7
+%r183 = call i544 @mulPv512x32(i32* %r4, i32 %r182)
+%r184 = zext i544 %r183 to i576
+%r185 = add i576 %r180, %r184
+%r186 = lshr i576 %r185, 32
+%r188 = getelementptr i32, i32* %r3, i32 15
+%r189 = load i32, i32* %r188
+%r190 = call i544 @mulPv512x32(i32* %r2, i32 %r189)
+%r191 = zext i544 %r190 to i576
+%r192 = add i576 %r186, %r191
+%r193 = trunc i576 %r192 to i32
+%r194 = mul i32 %r193, %r7
+%r195 = call i544 @mulPv512x32(i32* %r4, i32 %r194)
+%r196 = zext i544 %r195 to i576
+%r197 = add i576 %r192, %r196
+%r198 = lshr i576 %r197, 32
+%r199 = trunc i576 %r198 to i544
+%r200 = load i32, i32* %r4
+%r201 = zext i32 %r200 to i64
+%r203 = getelementptr i32, i32* %r4, i32 1
+%r204 = load i32, i32* %r203
+%r205 = zext i32 %r204 to i64
+%r206 = shl i64 %r205, 32
+%r207 = or i64 %r201, %r206
+%r208 = zext i64 %r207 to i96
+%r210 = getelementptr i32, i32* %r4, i32 2
+%r211 = load i32, i32* %r210
+%r212 = zext i32 %r211 to i96
+%r213 = shl i96 %r212, 64
+%r214 = or i96 %r208, %r213
+%r215 = zext i96 %r214 to i128
+%r217 = getelementptr i32, i32* %r4, i32 3
+%r218 = load i32, i32* %r217
+%r219 = zext i32 %r218 to i128
+%r220 = shl i128 %r219, 96
+%r221 = or i128 %r215, %r220
+%r222 = zext i128 %r221 to i160
+%r224 = getelementptr i32, i32* %r4, i32 4
+%r225 = load i32, i32* %r224
+%r226 = zext i32 %r225 to i160
+%r227 = shl i160 %r226, 128
+%r228 = or i160 %r222, %r227
+%r229 = zext i160 %r228 to i192
+%r231 = getelementptr i32, i32* %r4, i32 5
+%r232 = load i32, i32* %r231
+%r233 = zext i32 %r232 to i192
+%r234 = shl i192 %r233, 160
+%r235 = or i192 %r229, %r234
+%r236 = zext i192 %r235 to i224
+%r238 = getelementptr i32, i32* %r4, i32 6
+%r239 = load i32, i32* %r238
+%r240 = zext i32 %r239 to i224
+%r241 = shl i224 %r240, 192
+%r242 = or i224 %r236, %r241
+%r243 = zext i224 %r242 to i256
+%r245 = getelementptr i32, i32* %r4, i32 7
+%r246 = load i32, i32* %r245
+%r247 = zext i32 %r246 to i256
+%r248 = shl i256 %r247, 224
+%r249 = or i256 %r243, %r248
+%r250 = zext i256 %r249 to i288
+%r252 = getelementptr i32, i32* %r4, i32 8
+%r253 = load i32, i32* %r252
+%r254 = zext i32 %r253 to i288
+%r255 = shl i288 %r254, 256
+%r256 = or i288 %r250, %r255
+%r257 = zext i288 %r256 to i320
+%r259 = getelementptr i32, i32* %r4, i32 9
+%r260 = load i32, i32* %r259
+%r261 = zext i32 %r260 to i320
+%r262 = shl i320 %r261, 288
+%r263 = or i320 %r257, %r262
+%r264 = zext i320 %r263 to i352
+%r266 = getelementptr i32, i32* %r4, i32 10
+%r267 = load i32, i32* %r266
+%r268 = zext i32 %r267 to i352
+%r269 = shl i352 %r268, 320
+%r270 = or i352 %r264, %r269
+%r271 = zext i352 %r270 to i384
+%r273 = getelementptr i32, i32* %r4, i32 11
+%r274 = load i32, i32* %r273
+%r275 = zext i32 %r274 to i384
+%r276 = shl i384 %r275, 352
+%r277 = or i384 %r271, %r276
+%r278 = zext i384 %r277 to i416
+%r280 = getelementptr i32, i32* %r4, i32 12
+%r281 = load i32, i32* %r280
+%r282 = zext i32 %r281 to i416
+%r283 = shl i416 %r282, 384
+%r284 = or i416 %r278, %r283
+%r285 = zext i416 %r284 to i448
+%r287 = getelementptr i32, i32* %r4, i32 13
+%r288 = load i32, i32* %r287
+%r289 = zext i32 %r288 to i448
+%r290 = shl i448 %r289, 416
+%r291 = or i448 %r285, %r290
+%r292 = zext i448 %r291 to i480
+%r294 = getelementptr i32, i32* %r4, i32 14
+%r295 = load i32, i32* %r294
+%r296 = zext i32 %r295 to i480
+%r297 = shl i480 %r296, 448
+%r298 = or i480 %r292, %r297
+%r299 = zext i480 %r298 to i512
+%r301 = getelementptr i32, i32* %r4, i32 15
+%r302 = load i32, i32* %r301
+%r303 = zext i32 %r302 to i512
+%r304 = shl i512 %r303, 480
+%r305 = or i512 %r299, %r304
+%r306 = zext i512 %r305 to i544
+%r307 = sub i544 %r199, %r306
+%r308 = lshr i544 %r307, 512
+%r309 = trunc i544 %r308 to i1
+%r310 = select i1 %r309, i544 %r199, i544 %r307
+%r311 = trunc i544 %r310 to i512
+%r312 = trunc i512 %r311 to i32
+%r314 = getelementptr i32, i32* %r1, i32 0
+store i32 %r312, i32* %r314
+%r315 = lshr i512 %r311, 32
+%r316 = trunc i512 %r315 to i32
+%r318 = getelementptr i32, i32* %r1, i32 1
+store i32 %r316, i32* %r318
+%r319 = lshr i512 %r315, 32
+%r320 = trunc i512 %r319 to i32
+%r322 = getelementptr i32, i32* %r1, i32 2
+store i32 %r320, i32* %r322
+%r323 = lshr i512 %r319, 32
+%r324 = trunc i512 %r323 to i32
+%r326 = getelementptr i32, i32* %r1, i32 3
+store i32 %r324, i32* %r326
+%r327 = lshr i512 %r323, 32
+%r328 = trunc i512 %r327 to i32
+%r330 = getelementptr i32, i32* %r1, i32 4
+store i32 %r328, i32* %r330
+%r331 = lshr i512 %r327, 32
+%r332 = trunc i512 %r331 to i32
+%r334 = getelementptr i32, i32* %r1, i32 5
+store i32 %r332, i32* %r334
+%r335 = lshr i512 %r331, 32
+%r336 = trunc i512 %r335 to i32
+%r338 = getelementptr i32, i32* %r1, i32 6
+store i32 %r336, i32* %r338
+%r339 = lshr i512 %r335, 32
+%r340 = trunc i512 %r339 to i32
+%r342 = getelementptr i32, i32* %r1, i32 7
+store i32 %r340, i32* %r342
+%r343 = lshr i512 %r339, 32
+%r344 = trunc i512 %r343 to i32
+%r346 = getelementptr i32, i32* %r1, i32 8
+store i32 %r344, i32* %r346
+%r347 = lshr i512 %r343, 32
+%r348 = trunc i512 %r347 to i32
+%r350 = getelementptr i32, i32* %r1, i32 9
+store i32 %r348, i32* %r350
+%r351 = lshr i512 %r347, 32
+%r352 = trunc i512 %r351 to i32
+%r354 = getelementptr i32, i32* %r1, i32 10
+store i32 %r352, i32* %r354
+%r355 = lshr i512 %r351, 32
+%r356 = trunc i512 %r355 to i32
+%r358 = getelementptr i32, i32* %r1, i32 11
+store i32 %r356, i32* %r358
+%r359 = lshr i512 %r355, 32
+%r360 = trunc i512 %r359 to i32
+%r362 = getelementptr i32, i32* %r1, i32 12
+store i32 %r360, i32* %r362
+%r363 = lshr i512 %r359, 32
+%r364 = trunc i512 %r363 to i32
+%r366 = getelementptr i32, i32* %r1, i32 13
+store i32 %r364, i32* %r366
+%r367 = lshr i512 %r363, 32
+%r368 = trunc i512 %r367 to i32
+%r370 = getelementptr i32, i32* %r1, i32 14
+store i32 %r368, i32* %r370
+%r371 = lshr i512 %r367, 32
+%r372 = trunc i512 %r371 to i32
+%r374 = getelementptr i32, i32* %r1, i32 15
+store i32 %r372, i32* %r374
+ret void
+}
+define void @mcl_fp_montNF16L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i544 @mulPv512x32(i32* %r2, i32 %r8)
+%r10 = trunc i544 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i544 @mulPv512x32(i32* %r4, i32 %r11)
+%r13 = add i544 %r9, %r12
+%r14 = lshr i544 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i544 @mulPv512x32(i32* %r2, i32 %r17)
+%r19 = add i544 %r14, %r18
+%r20 = trunc i544 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i544 @mulPv512x32(i32* %r4, i32 %r21)
+%r23 = add i544 %r19, %r22
+%r24 = lshr i544 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i544 @mulPv512x32(i32* %r2, i32 %r27)
+%r29 = add i544 %r24, %r28
+%r30 = trunc i544 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i544 @mulPv512x32(i32* %r4, i32 %r31)
+%r33 = add i544 %r29, %r32
+%r34 = lshr i544 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i544 @mulPv512x32(i32* %r2, i32 %r37)
+%r39 = add i544 %r34, %r38
+%r40 = trunc i544 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i544 @mulPv512x32(i32* %r4, i32 %r41)
+%r43 = add i544 %r39, %r42
+%r44 = lshr i544 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i544 @mulPv512x32(i32* %r2, i32 %r47)
+%r49 = add i544 %r44, %r48
+%r50 = trunc i544 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i544 @mulPv512x32(i32* %r4, i32 %r51)
+%r53 = add i544 %r49, %r52
+%r54 = lshr i544 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i544 @mulPv512x32(i32* %r2, i32 %r57)
+%r59 = add i544 %r54, %r58
+%r60 = trunc i544 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i544 @mulPv512x32(i32* %r4, i32 %r61)
+%r63 = add i544 %r59, %r62
+%r64 = lshr i544 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i544 @mulPv512x32(i32* %r2, i32 %r67)
+%r69 = add i544 %r64, %r68
+%r70 = trunc i544 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i544 @mulPv512x32(i32* %r4, i32 %r71)
+%r73 = add i544 %r69, %r72
+%r74 = lshr i544 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i544 @mulPv512x32(i32* %r2, i32 %r77)
+%r79 = add i544 %r74, %r78
+%r80 = trunc i544 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i544 @mulPv512x32(i32* %r4, i32 %r81)
+%r83 = add i544 %r79, %r82
+%r84 = lshr i544 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i544 @mulPv512x32(i32* %r2, i32 %r87)
+%r89 = add i544 %r84, %r88
+%r90 = trunc i544 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i544 @mulPv512x32(i32* %r4, i32 %r91)
+%r93 = add i544 %r89, %r92
+%r94 = lshr i544 %r93, 32
+%r96 = getelementptr i32, i32* %r3, i32 9
+%r97 = load i32, i32* %r96
+%r98 = call i544 @mulPv512x32(i32* %r2, i32 %r97)
+%r99 = add i544 %r94, %r98
+%r100 = trunc i544 %r99 to i32
+%r101 = mul i32 %r100, %r7
+%r102 = call i544 @mulPv512x32(i32* %r4, i32 %r101)
+%r103 = add i544 %r99, %r102
+%r104 = lshr i544 %r103, 32
+%r106 = getelementptr i32, i32* %r3, i32 10
+%r107 = load i32, i32* %r106
+%r108 = call i544 @mulPv512x32(i32* %r2, i32 %r107)
+%r109 = add i544 %r104, %r108
+%r110 = trunc i544 %r109 to i32
+%r111 = mul i32 %r110, %r7
+%r112 = call i544 @mulPv512x32(i32* %r4, i32 %r111)
+%r113 = add i544 %r109, %r112
+%r114 = lshr i544 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 11
+%r117 = load i32, i32* %r116
+%r118 = call i544 @mulPv512x32(i32* %r2, i32 %r117)
+%r119 = add i544 %r114, %r118
+%r120 = trunc i544 %r119 to i32
+%r121 = mul i32 %r120, %r7
+%r122 = call i544 @mulPv512x32(i32* %r4, i32 %r121)
+%r123 = add i544 %r119, %r122
+%r124 = lshr i544 %r123, 32
+%r126 = getelementptr i32, i32* %r3, i32 12
+%r127 = load i32, i32* %r126
+%r128 = call i544 @mulPv512x32(i32* %r2, i32 %r127)
+%r129 = add i544 %r124, %r128
+%r130 = trunc i544 %r129 to i32
+%r131 = mul i32 %r130, %r7
+%r132 = call i544 @mulPv512x32(i32* %r4, i32 %r131)
+%r133 = add i544 %r129, %r132
+%r134 = lshr i544 %r133, 32
+%r136 = getelementptr i32, i32* %r3, i32 13
+%r137 = load i32, i32* %r136
+%r138 = call i544 @mulPv512x32(i32* %r2, i32 %r137)
+%r139 = add i544 %r134, %r138
+%r140 = trunc i544 %r139 to i32
+%r141 = mul i32 %r140, %r7
+%r142 = call i544 @mulPv512x32(i32* %r4, i32 %r141)
+%r143 = add i544 %r139, %r142
+%r144 = lshr i544 %r143, 32
+%r146 = getelementptr i32, i32* %r3, i32 14
+%r147 = load i32, i32* %r146
+%r148 = call i544 @mulPv512x32(i32* %r2, i32 %r147)
+%r149 = add i544 %r144, %r148
+%r150 = trunc i544 %r149 to i32
+%r151 = mul i32 %r150, %r7
+%r152 = call i544 @mulPv512x32(i32* %r4, i32 %r151)
+%r153 = add i544 %r149, %r152
+%r154 = lshr i544 %r153, 32
+%r156 = getelementptr i32, i32* %r3, i32 15
+%r157 = load i32, i32* %r156
+%r158 = call i544 @mulPv512x32(i32* %r2, i32 %r157)
+%r159 = add i544 %r154, %r158
+%r160 = trunc i544 %r159 to i32
+%r161 = mul i32 %r160, %r7
+%r162 = call i544 @mulPv512x32(i32* %r4, i32 %r161)
+%r163 = add i544 %r159, %r162
+%r164 = lshr i544 %r163, 32
+%r165 = trunc i544 %r164 to i512
+%r166 = load i32, i32* %r4
+%r167 = zext i32 %r166 to i64
+%r169 = getelementptr i32, i32* %r4, i32 1
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i64
+%r172 = shl i64 %r171, 32
+%r173 = or i64 %r167, %r172
+%r174 = zext i64 %r173 to i96
+%r176 = getelementptr i32, i32* %r4, i32 2
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i96
+%r179 = shl i96 %r178, 64
+%r180 = or i96 %r174, %r179
+%r181 = zext i96 %r180 to i128
+%r183 = getelementptr i32, i32* %r4, i32 3
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i128
+%r186 = shl i128 %r185, 96
+%r187 = or i128 %r181, %r186
+%r188 = zext i128 %r187 to i160
+%r190 = getelementptr i32, i32* %r4, i32 4
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i160
+%r193 = shl i160 %r192, 128
+%r194 = or i160 %r188, %r193
+%r195 = zext i160 %r194 to i192
+%r197 = getelementptr i32, i32* %r4, i32 5
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i192
+%r200 = shl i192 %r199, 160
+%r201 = or i192 %r195, %r200
+%r202 = zext i192 %r201 to i224
+%r204 = getelementptr i32, i32* %r4, i32 6
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i224
+%r207 = shl i224 %r206, 192
+%r208 = or i224 %r202, %r207
+%r209 = zext i224 %r208 to i256
+%r211 = getelementptr i32, i32* %r4, i32 7
+%r212 = load i32, i32* %r211
+%r213 = zext i32 %r212 to i256
+%r214 = shl i256 %r213, 224
+%r215 = or i256 %r209, %r214
+%r216 = zext i256 %r215 to i288
+%r218 = getelementptr i32, i32* %r4, i32 8
+%r219 = load i32, i32* %r218
+%r220 = zext i32 %r219 to i288
+%r221 = shl i288 %r220, 256
+%r222 = or i288 %r216, %r221
+%r223 = zext i288 %r222 to i320
+%r225 = getelementptr i32, i32* %r4, i32 9
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i320
+%r228 = shl i320 %r227, 288
+%r229 = or i320 %r223, %r228
+%r230 = zext i320 %r229 to i352
+%r232 = getelementptr i32, i32* %r4, i32 10
+%r233 = load i32, i32* %r232
+%r234 = zext i32 %r233 to i352
+%r235 = shl i352 %r234, 320
+%r236 = or i352 %r230, %r235
+%r237 = zext i352 %r236 to i384
+%r239 = getelementptr i32, i32* %r4, i32 11
+%r240 = load i32, i32* %r239
+%r241 = zext i32 %r240 to i384
+%r242 = shl i384 %r241, 352
+%r243 = or i384 %r237, %r242
+%r244 = zext i384 %r243 to i416
+%r246 = getelementptr i32, i32* %r4, i32 12
+%r247 = load i32, i32* %r246
+%r248 = zext i32 %r247 to i416
+%r249 = shl i416 %r248, 384
+%r250 = or i416 %r244, %r249
+%r251 = zext i416 %r250 to i448
+%r253 = getelementptr i32, i32* %r4, i32 13
+%r254 = load i32, i32* %r253
+%r255 = zext i32 %r254 to i448
+%r256 = shl i448 %r255, 416
+%r257 = or i448 %r251, %r256
+%r258 = zext i448 %r257 to i480
+%r260 = getelementptr i32, i32* %r4, i32 14
+%r261 = load i32, i32* %r260
+%r262 = zext i32 %r261 to i480
+%r263 = shl i480 %r262, 448
+%r264 = or i480 %r258, %r263
+%r265 = zext i480 %r264 to i512
+%r267 = getelementptr i32, i32* %r4, i32 15
+%r268 = load i32, i32* %r267
+%r269 = zext i32 %r268 to i512
+%r270 = shl i512 %r269, 480
+%r271 = or i512 %r265, %r270
+%r272 = sub i512 %r165, %r271
+%r273 = lshr i512 %r272, 511
+%r274 = trunc i512 %r273 to i1
+%r275 = select i1 %r274, i512 %r165, i512 %r272
+%r276 = trunc i512 %r275 to i32
+%r278 = getelementptr i32, i32* %r1, i32 0
+store i32 %r276, i32* %r278
+%r279 = lshr i512 %r275, 32
+%r280 = trunc i512 %r279 to i32
+%r282 = getelementptr i32, i32* %r1, i32 1
+store i32 %r280, i32* %r282
+%r283 = lshr i512 %r279, 32
+%r284 = trunc i512 %r283 to i32
+%r286 = getelementptr i32, i32* %r1, i32 2
+store i32 %r284, i32* %r286
+%r287 = lshr i512 %r283, 32
+%r288 = trunc i512 %r287 to i32
+%r290 = getelementptr i32, i32* %r1, i32 3
+store i32 %r288, i32* %r290
+%r291 = lshr i512 %r287, 32
+%r292 = trunc i512 %r291 to i32
+%r294 = getelementptr i32, i32* %r1, i32 4
+store i32 %r292, i32* %r294
+%r295 = lshr i512 %r291, 32
+%r296 = trunc i512 %r295 to i32
+%r298 = getelementptr i32, i32* %r1, i32 5
+store i32 %r296, i32* %r298
+%r299 = lshr i512 %r295, 32
+%r300 = trunc i512 %r299 to i32
+%r302 = getelementptr i32, i32* %r1, i32 6
+store i32 %r300, i32* %r302
+%r303 = lshr i512 %r299, 32
+%r304 = trunc i512 %r303 to i32
+%r306 = getelementptr i32, i32* %r1, i32 7
+store i32 %r304, i32* %r306
+%r307 = lshr i512 %r303, 32
+%r308 = trunc i512 %r307 to i32
+%r310 = getelementptr i32, i32* %r1, i32 8
+store i32 %r308, i32* %r310
+%r311 = lshr i512 %r307, 32
+%r312 = trunc i512 %r311 to i32
+%r314 = getelementptr i32, i32* %r1, i32 9
+store i32 %r312, i32* %r314
+%r315 = lshr i512 %r311, 32
+%r316 = trunc i512 %r315 to i32
+%r318 = getelementptr i32, i32* %r1, i32 10
+store i32 %r316, i32* %r318
+%r319 = lshr i512 %r315, 32
+%r320 = trunc i512 %r319 to i32
+%r322 = getelementptr i32, i32* %r1, i32 11
+store i32 %r320, i32* %r322
+%r323 = lshr i512 %r319, 32
+%r324 = trunc i512 %r323 to i32
+%r326 = getelementptr i32, i32* %r1, i32 12
+store i32 %r324, i32* %r326
+%r327 = lshr i512 %r323, 32
+%r328 = trunc i512 %r327 to i32
+%r330 = getelementptr i32, i32* %r1, i32 13
+store i32 %r328, i32* %r330
+%r331 = lshr i512 %r327, 32
+%r332 = trunc i512 %r331 to i32
+%r334 = getelementptr i32, i32* %r1, i32 14
+store i32 %r332, i32* %r334
+%r335 = lshr i512 %r331, 32
+%r336 = trunc i512 %r335 to i32
+%r338 = getelementptr i32, i32* %r1, i32 15
+store i32 %r336, i32* %r338
+ret void
+}
+define void @mcl_fp_montRed16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i352
+%r73 = getelementptr i32, i32* %r3, i32 10
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i352
+%r76 = shl i352 %r75, 320
+%r77 = or i352 %r71, %r76
+%r78 = zext i352 %r77 to i384
+%r80 = getelementptr i32, i32* %r3, i32 11
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i384
+%r83 = shl i384 %r82, 352
+%r84 = or i384 %r78, %r83
+%r85 = zext i384 %r84 to i416
+%r87 = getelementptr i32, i32* %r3, i32 12
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i416
+%r90 = shl i416 %r89, 384
+%r91 = or i416 %r85, %r90
+%r92 = zext i416 %r91 to i448
+%r94 = getelementptr i32, i32* %r3, i32 13
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i448
+%r97 = shl i448 %r96, 416
+%r98 = or i448 %r92, %r97
+%r99 = zext i448 %r98 to i480
+%r101 = getelementptr i32, i32* %r3, i32 14
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i480
+%r104 = shl i480 %r103, 448
+%r105 = or i480 %r99, %r104
+%r106 = zext i480 %r105 to i512
+%r108 = getelementptr i32, i32* %r3, i32 15
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i512
+%r111 = shl i512 %r110, 480
+%r112 = or i512 %r106, %r111
+%r113 = load i32, i32* %r2
+%r114 = zext i32 %r113 to i64
+%r116 = getelementptr i32, i32* %r2, i32 1
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i64
+%r119 = shl i64 %r118, 32
+%r120 = or i64 %r114, %r119
+%r121 = zext i64 %r120 to i96
+%r123 = getelementptr i32, i32* %r2, i32 2
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i96
+%r126 = shl i96 %r125, 64
+%r127 = or i96 %r121, %r126
+%r128 = zext i96 %r127 to i128
+%r130 = getelementptr i32, i32* %r2, i32 3
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i128
+%r133 = shl i128 %r132, 96
+%r134 = or i128 %r128, %r133
+%r135 = zext i128 %r134 to i160
+%r137 = getelementptr i32, i32* %r2, i32 4
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i160
+%r140 = shl i160 %r139, 128
+%r141 = or i160 %r135, %r140
+%r142 = zext i160 %r141 to i192
+%r144 = getelementptr i32, i32* %r2, i32 5
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i192
+%r147 = shl i192 %r146, 160
+%r148 = or i192 %r142, %r147
+%r149 = zext i192 %r148 to i224
+%r151 = getelementptr i32, i32* %r2, i32 6
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i224
+%r154 = shl i224 %r153, 192
+%r155 = or i224 %r149, %r154
+%r156 = zext i224 %r155 to i256
+%r158 = getelementptr i32, i32* %r2, i32 7
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i256
+%r161 = shl i256 %r160, 224
+%r162 = or i256 %r156, %r161
+%r163 = zext i256 %r162 to i288
+%r165 = getelementptr i32, i32* %r2, i32 8
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i288
+%r168 = shl i288 %r167, 256
+%r169 = or i288 %r163, %r168
+%r170 = zext i288 %r169 to i320
+%r172 = getelementptr i32, i32* %r2, i32 9
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i320
+%r175 = shl i320 %r174, 288
+%r176 = or i320 %r170, %r175
+%r177 = zext i320 %r176 to i352
+%r179 = getelementptr i32, i32* %r2, i32 10
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i352
+%r182 = shl i352 %r181, 320
+%r183 = or i352 %r177, %r182
+%r184 = zext i352 %r183 to i384
+%r186 = getelementptr i32, i32* %r2, i32 11
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i384
+%r189 = shl i384 %r188, 352
+%r190 = or i384 %r184, %r189
+%r191 = zext i384 %r190 to i416
+%r193 = getelementptr i32, i32* %r2, i32 12
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i416
+%r196 = shl i416 %r195, 384
+%r197 = or i416 %r191, %r196
+%r198 = zext i416 %r197 to i448
+%r200 = getelementptr i32, i32* %r2, i32 13
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i448
+%r203 = shl i448 %r202, 416
+%r204 = or i448 %r198, %r203
+%r205 = zext i448 %r204 to i480
+%r207 = getelementptr i32, i32* %r2, i32 14
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i480
+%r210 = shl i480 %r209, 448
+%r211 = or i480 %r205, %r210
+%r212 = zext i480 %r211 to i512
+%r214 = getelementptr i32, i32* %r2, i32 15
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i512
+%r217 = shl i512 %r216, 480
+%r218 = or i512 %r212, %r217
+%r219 = zext i512 %r218 to i544
+%r221 = getelementptr i32, i32* %r2, i32 16
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i544
+%r224 = shl i544 %r223, 512
+%r225 = or i544 %r219, %r224
+%r226 = zext i544 %r225 to i576
+%r228 = getelementptr i32, i32* %r2, i32 17
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i576
+%r231 = shl i576 %r230, 544
+%r232 = or i576 %r226, %r231
+%r233 = zext i576 %r232 to i608
+%r235 = getelementptr i32, i32* %r2, i32 18
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i608
+%r238 = shl i608 %r237, 576
+%r239 = or i608 %r233, %r238
+%r240 = zext i608 %r239 to i640
+%r242 = getelementptr i32, i32* %r2, i32 19
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i640
+%r245 = shl i640 %r244, 608
+%r246 = or i640 %r240, %r245
+%r247 = zext i640 %r246 to i672
+%r249 = getelementptr i32, i32* %r2, i32 20
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i672
+%r252 = shl i672 %r251, 640
+%r253 = or i672 %r247, %r252
+%r254 = zext i672 %r253 to i704
+%r256 = getelementptr i32, i32* %r2, i32 21
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i704
+%r259 = shl i704 %r258, 672
+%r260 = or i704 %r254, %r259
+%r261 = zext i704 %r260 to i736
+%r263 = getelementptr i32, i32* %r2, i32 22
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i736
+%r266 = shl i736 %r265, 704
+%r267 = or i736 %r261, %r266
+%r268 = zext i736 %r267 to i768
+%r270 = getelementptr i32, i32* %r2, i32 23
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i768
+%r273 = shl i768 %r272, 736
+%r274 = or i768 %r268, %r273
+%r275 = zext i768 %r274 to i800
+%r277 = getelementptr i32, i32* %r2, i32 24
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i800
+%r280 = shl i800 %r279, 768
+%r281 = or i800 %r275, %r280
+%r282 = zext i800 %r281 to i832
+%r284 = getelementptr i32, i32* %r2, i32 25
+%r285 = load i32, i32* %r284
+%r286 = zext i32 %r285 to i832
+%r287 = shl i832 %r286, 800
+%r288 = or i832 %r282, %r287
+%r289 = zext i832 %r288 to i864
+%r291 = getelementptr i32, i32* %r2, i32 26
+%r292 = load i32, i32* %r291
+%r293 = zext i32 %r292 to i864
+%r294 = shl i864 %r293, 832
+%r295 = or i864 %r289, %r294
+%r296 = zext i864 %r295 to i896
+%r298 = getelementptr i32, i32* %r2, i32 27
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i896
+%r301 = shl i896 %r300, 864
+%r302 = or i896 %r296, %r301
+%r303 = zext i896 %r302 to i928
+%r305 = getelementptr i32, i32* %r2, i32 28
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i928
+%r308 = shl i928 %r307, 896
+%r309 = or i928 %r303, %r308
+%r310 = zext i928 %r309 to i960
+%r312 = getelementptr i32, i32* %r2, i32 29
+%r313 = load i32, i32* %r312
+%r314 = zext i32 %r313 to i960
+%r315 = shl i960 %r314, 928
+%r316 = or i960 %r310, %r315
+%r317 = zext i960 %r316 to i992
+%r319 = getelementptr i32, i32* %r2, i32 30
+%r320 = load i32, i32* %r319
+%r321 = zext i32 %r320 to i992
+%r322 = shl i992 %r321, 960
+%r323 = or i992 %r317, %r322
+%r324 = zext i992 %r323 to i1024
+%r326 = getelementptr i32, i32* %r2, i32 31
+%r327 = load i32, i32* %r326
+%r328 = zext i32 %r327 to i1024
+%r329 = shl i1024 %r328, 992
+%r330 = or i1024 %r324, %r329
+%r331 = zext i1024 %r330 to i1056
+%r332 = trunc i1056 %r331 to i32
+%r333 = mul i32 %r332, %r6
+%r334 = call i544 @mulPv512x32(i32* %r3, i32 %r333)
+%r335 = zext i544 %r334 to i1056
+%r336 = add i1056 %r331, %r335
+%r337 = lshr i1056 %r336, 32
+%r338 = trunc i1056 %r337 to i1024
+%r339 = trunc i1024 %r338 to i32
+%r340 = mul i32 %r339, %r6
+%r341 = call i544 @mulPv512x32(i32* %r3, i32 %r340)
+%r342 = zext i544 %r341 to i1024
+%r343 = add i1024 %r338, %r342
+%r344 = lshr i1024 %r343, 32
+%r345 = trunc i1024 %r344 to i992
+%r346 = trunc i992 %r345 to i32
+%r347 = mul i32 %r346, %r6
+%r348 = call i544 @mulPv512x32(i32* %r3, i32 %r347)
+%r349 = zext i544 %r348 to i992
+%r350 = add i992 %r345, %r349
+%r351 = lshr i992 %r350, 32
+%r352 = trunc i992 %r351 to i960
+%r353 = trunc i960 %r352 to i32
+%r354 = mul i32 %r353, %r6
+%r355 = call i544 @mulPv512x32(i32* %r3, i32 %r354)
+%r356 = zext i544 %r355 to i960
+%r357 = add i960 %r352, %r356
+%r358 = lshr i960 %r357, 32
+%r359 = trunc i960 %r358 to i928
+%r360 = trunc i928 %r359 to i32
+%r361 = mul i32 %r360, %r6
+%r362 = call i544 @mulPv512x32(i32* %r3, i32 %r361)
+%r363 = zext i544 %r362 to i928
+%r364 = add i928 %r359, %r363
+%r365 = lshr i928 %r364, 32
+%r366 = trunc i928 %r365 to i896
+%r367 = trunc i896 %r366 to i32
+%r368 = mul i32 %r367, %r6
+%r369 = call i544 @mulPv512x32(i32* %r3, i32 %r368)
+%r370 = zext i544 %r369 to i896
+%r371 = add i896 %r366, %r370
+%r372 = lshr i896 %r371, 32
+%r373 = trunc i896 %r372 to i864
+%r374 = trunc i864 %r373 to i32
+%r375 = mul i32 %r374, %r6
+%r376 = call i544 @mulPv512x32(i32* %r3, i32 %r375)
+%r377 = zext i544 %r376 to i864
+%r378 = add i864 %r373, %r377
+%r379 = lshr i864 %r378, 32
+%r380 = trunc i864 %r379 to i832
+%r381 = trunc i832 %r380 to i32
+%r382 = mul i32 %r381, %r6
+%r383 = call i544 @mulPv512x32(i32* %r3, i32 %r382)
+%r384 = zext i544 %r383 to i832
+%r385 = add i832 %r380, %r384
+%r386 = lshr i832 %r385, 32
+%r387 = trunc i832 %r386 to i800
+%r388 = trunc i800 %r387 to i32
+%r389 = mul i32 %r388, %r6
+%r390 = call i544 @mulPv512x32(i32* %r3, i32 %r389)
+%r391 = zext i544 %r390 to i800
+%r392 = add i800 %r387, %r391
+%r393 = lshr i800 %r392, 32
+%r394 = trunc i800 %r393 to i768
+%r395 = trunc i768 %r394 to i32
+%r396 = mul i32 %r395, %r6
+%r397 = call i544 @mulPv512x32(i32* %r3, i32 %r396)
+%r398 = zext i544 %r397 to i768
+%r399 = add i768 %r394, %r398
+%r400 = lshr i768 %r399, 32
+%r401 = trunc i768 %r400 to i736
+%r402 = trunc i736 %r401 to i32
+%r403 = mul i32 %r402, %r6
+%r404 = call i544 @mulPv512x32(i32* %r3, i32 %r403)
+%r405 = zext i544 %r404 to i736
+%r406 = add i736 %r401, %r405
+%r407 = lshr i736 %r406, 32
+%r408 = trunc i736 %r407 to i704
+%r409 = trunc i704 %r408 to i32
+%r410 = mul i32 %r409, %r6
+%r411 = call i544 @mulPv512x32(i32* %r3, i32 %r410)
+%r412 = zext i544 %r411 to i704
+%r413 = add i704 %r408, %r412
+%r414 = lshr i704 %r413, 32
+%r415 = trunc i704 %r414 to i672
+%r416 = trunc i672 %r415 to i32
+%r417 = mul i32 %r416, %r6
+%r418 = call i544 @mulPv512x32(i32* %r3, i32 %r417)
+%r419 = zext i544 %r418 to i672
+%r420 = add i672 %r415, %r419
+%r421 = lshr i672 %r420, 32
+%r422 = trunc i672 %r421 to i640
+%r423 = trunc i640 %r422 to i32
+%r424 = mul i32 %r423, %r6
+%r425 = call i544 @mulPv512x32(i32* %r3, i32 %r424)
+%r426 = zext i544 %r425 to i640
+%r427 = add i640 %r422, %r426
+%r428 = lshr i640 %r427, 32
+%r429 = trunc i640 %r428 to i608
+%r430 = trunc i608 %r429 to i32
+%r431 = mul i32 %r430, %r6
+%r432 = call i544 @mulPv512x32(i32* %r3, i32 %r431)
+%r433 = zext i544 %r432 to i608
+%r434 = add i608 %r429, %r433
+%r435 = lshr i608 %r434, 32
+%r436 = trunc i608 %r435 to i576
+%r437 = trunc i576 %r436 to i32
+%r438 = mul i32 %r437, %r6
+%r439 = call i544 @mulPv512x32(i32* %r3, i32 %r438)
+%r440 = zext i544 %r439 to i576
+%r441 = add i576 %r436, %r440
+%r442 = lshr i576 %r441, 32
+%r443 = trunc i576 %r442 to i544
+%r444 = zext i512 %r112 to i544
+%r445 = sub i544 %r443, %r444
+%r446 = lshr i544 %r445, 512
+%r447 = trunc i544 %r446 to i1
+%r448 = select i1 %r447, i544 %r443, i544 %r445
+%r449 = trunc i544 %r448 to i512
+%r450 = trunc i512 %r449 to i32
+%r452 = getelementptr i32, i32* %r1, i32 0
+store i32 %r450, i32* %r452
+%r453 = lshr i512 %r449, 32
+%r454 = trunc i512 %r453 to i32
+%r456 = getelementptr i32, i32* %r1, i32 1
+store i32 %r454, i32* %r456
+%r457 = lshr i512 %r453, 32
+%r458 = trunc i512 %r457 to i32
+%r460 = getelementptr i32, i32* %r1, i32 2
+store i32 %r458, i32* %r460
+%r461 = lshr i512 %r457, 32
+%r462 = trunc i512 %r461 to i32
+%r464 = getelementptr i32, i32* %r1, i32 3
+store i32 %r462, i32* %r464
+%r465 = lshr i512 %r461, 32
+%r466 = trunc i512 %r465 to i32
+%r468 = getelementptr i32, i32* %r1, i32 4
+store i32 %r466, i32* %r468
+%r469 = lshr i512 %r465, 32
+%r470 = trunc i512 %r469 to i32
+%r472 = getelementptr i32, i32* %r1, i32 5
+store i32 %r470, i32* %r472
+%r473 = lshr i512 %r469, 32
+%r474 = trunc i512 %r473 to i32
+%r476 = getelementptr i32, i32* %r1, i32 6
+store i32 %r474, i32* %r476
+%r477 = lshr i512 %r473, 32
+%r478 = trunc i512 %r477 to i32
+%r480 = getelementptr i32, i32* %r1, i32 7
+store i32 %r478, i32* %r480
+%r481 = lshr i512 %r477, 32
+%r482 = trunc i512 %r481 to i32
+%r484 = getelementptr i32, i32* %r1, i32 8
+store i32 %r482, i32* %r484
+%r485 = lshr i512 %r481, 32
+%r486 = trunc i512 %r485 to i32
+%r488 = getelementptr i32, i32* %r1, i32 9
+store i32 %r486, i32* %r488
+%r489 = lshr i512 %r485, 32
+%r490 = trunc i512 %r489 to i32
+%r492 = getelementptr i32, i32* %r1, i32 10
+store i32 %r490, i32* %r492
+%r493 = lshr i512 %r489, 32
+%r494 = trunc i512 %r493 to i32
+%r496 = getelementptr i32, i32* %r1, i32 11
+store i32 %r494, i32* %r496
+%r497 = lshr i512 %r493, 32
+%r498 = trunc i512 %r497 to i32
+%r500 = getelementptr i32, i32* %r1, i32 12
+store i32 %r498, i32* %r500
+%r501 = lshr i512 %r497, 32
+%r502 = trunc i512 %r501 to i32
+%r504 = getelementptr i32, i32* %r1, i32 13
+store i32 %r502, i32* %r504
+%r505 = lshr i512 %r501, 32
+%r506 = trunc i512 %r505 to i32
+%r508 = getelementptr i32, i32* %r1, i32 14
+store i32 %r506, i32* %r508
+%r509 = lshr i512 %r505, 32
+%r510 = trunc i512 %r509 to i32
+%r512 = getelementptr i32, i32* %r1, i32 15
+store i32 %r510, i32* %r512
+ret void
+}
+define i32 @mcl_fp_addPre16L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r3, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r3, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r3, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r3, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r112 = load i32, i32* %r4
+%r113 = zext i32 %r112 to i64
+%r115 = getelementptr i32, i32* %r4, i32 1
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i64
+%r118 = shl i64 %r117, 32
+%r119 = or i64 %r113, %r118
+%r120 = zext i64 %r119 to i96
+%r122 = getelementptr i32, i32* %r4, i32 2
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i96
+%r125 = shl i96 %r124, 64
+%r126 = or i96 %r120, %r125
+%r127 = zext i96 %r126 to i128
+%r129 = getelementptr i32, i32* %r4, i32 3
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i128
+%r132 = shl i128 %r131, 96
+%r133 = or i128 %r127, %r132
+%r134 = zext i128 %r133 to i160
+%r136 = getelementptr i32, i32* %r4, i32 4
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i160
+%r139 = shl i160 %r138, 128
+%r140 = or i160 %r134, %r139
+%r141 = zext i160 %r140 to i192
+%r143 = getelementptr i32, i32* %r4, i32 5
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i192
+%r146 = shl i192 %r145, 160
+%r147 = or i192 %r141, %r146
+%r148 = zext i192 %r147 to i224
+%r150 = getelementptr i32, i32* %r4, i32 6
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i224
+%r153 = shl i224 %r152, 192
+%r154 = or i224 %r148, %r153
+%r155 = zext i224 %r154 to i256
+%r157 = getelementptr i32, i32* %r4, i32 7
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i256
+%r160 = shl i256 %r159, 224
+%r161 = or i256 %r155, %r160
+%r162 = zext i256 %r161 to i288
+%r164 = getelementptr i32, i32* %r4, i32 8
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i288
+%r167 = shl i288 %r166, 256
+%r168 = or i288 %r162, %r167
+%r169 = zext i288 %r168 to i320
+%r171 = getelementptr i32, i32* %r4, i32 9
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i320
+%r174 = shl i320 %r173, 288
+%r175 = or i320 %r169, %r174
+%r176 = zext i320 %r175 to i352
+%r178 = getelementptr i32, i32* %r4, i32 10
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i352
+%r181 = shl i352 %r180, 320
+%r182 = or i352 %r176, %r181
+%r183 = zext i352 %r182 to i384
+%r185 = getelementptr i32, i32* %r4, i32 11
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i384
+%r188 = shl i384 %r187, 352
+%r189 = or i384 %r183, %r188
+%r190 = zext i384 %r189 to i416
+%r192 = getelementptr i32, i32* %r4, i32 12
+%r193 = load i32, i32* %r192
+%r194 = zext i32 %r193 to i416
+%r195 = shl i416 %r194, 384
+%r196 = or i416 %r190, %r195
+%r197 = zext i416 %r196 to i448
+%r199 = getelementptr i32, i32* %r4, i32 13
+%r200 = load i32, i32* %r199
+%r201 = zext i32 %r200 to i448
+%r202 = shl i448 %r201, 416
+%r203 = or i448 %r197, %r202
+%r204 = zext i448 %r203 to i480
+%r206 = getelementptr i32, i32* %r4, i32 14
+%r207 = load i32, i32* %r206
+%r208 = zext i32 %r207 to i480
+%r209 = shl i480 %r208, 448
+%r210 = or i480 %r204, %r209
+%r211 = zext i480 %r210 to i512
+%r213 = getelementptr i32, i32* %r4, i32 15
+%r214 = load i32, i32* %r213
+%r215 = zext i32 %r214 to i512
+%r216 = shl i512 %r215, 480
+%r217 = or i512 %r211, %r216
+%r218 = zext i512 %r217 to i544
+%r219 = add i544 %r111, %r218
+%r220 = trunc i544 %r219 to i512
+%r221 = trunc i512 %r220 to i32
+%r223 = getelementptr i32, i32* %r2, i32 0
+store i32 %r221, i32* %r223
+%r224 = lshr i512 %r220, 32
+%r225 = trunc i512 %r224 to i32
+%r227 = getelementptr i32, i32* %r2, i32 1
+store i32 %r225, i32* %r227
+%r228 = lshr i512 %r224, 32
+%r229 = trunc i512 %r228 to i32
+%r231 = getelementptr i32, i32* %r2, i32 2
+store i32 %r229, i32* %r231
+%r232 = lshr i512 %r228, 32
+%r233 = trunc i512 %r232 to i32
+%r235 = getelementptr i32, i32* %r2, i32 3
+store i32 %r233, i32* %r235
+%r236 = lshr i512 %r232, 32
+%r237 = trunc i512 %r236 to i32
+%r239 = getelementptr i32, i32* %r2, i32 4
+store i32 %r237, i32* %r239
+%r240 = lshr i512 %r236, 32
+%r241 = trunc i512 %r240 to i32
+%r243 = getelementptr i32, i32* %r2, i32 5
+store i32 %r241, i32* %r243
+%r244 = lshr i512 %r240, 32
+%r245 = trunc i512 %r244 to i32
+%r247 = getelementptr i32, i32* %r2, i32 6
+store i32 %r245, i32* %r247
+%r248 = lshr i512 %r244, 32
+%r249 = trunc i512 %r248 to i32
+%r251 = getelementptr i32, i32* %r2, i32 7
+store i32 %r249, i32* %r251
+%r252 = lshr i512 %r248, 32
+%r253 = trunc i512 %r252 to i32
+%r255 = getelementptr i32, i32* %r2, i32 8
+store i32 %r253, i32* %r255
+%r256 = lshr i512 %r252, 32
+%r257 = trunc i512 %r256 to i32
+%r259 = getelementptr i32, i32* %r2, i32 9
+store i32 %r257, i32* %r259
+%r260 = lshr i512 %r256, 32
+%r261 = trunc i512 %r260 to i32
+%r263 = getelementptr i32, i32* %r2, i32 10
+store i32 %r261, i32* %r263
+%r264 = lshr i512 %r260, 32
+%r265 = trunc i512 %r264 to i32
+%r267 = getelementptr i32, i32* %r2, i32 11
+store i32 %r265, i32* %r267
+%r268 = lshr i512 %r264, 32
+%r269 = trunc i512 %r268 to i32
+%r271 = getelementptr i32, i32* %r2, i32 12
+store i32 %r269, i32* %r271
+%r272 = lshr i512 %r268, 32
+%r273 = trunc i512 %r272 to i32
+%r275 = getelementptr i32, i32* %r2, i32 13
+store i32 %r273, i32* %r275
+%r276 = lshr i512 %r272, 32
+%r277 = trunc i512 %r276 to i32
+%r279 = getelementptr i32, i32* %r2, i32 14
+store i32 %r277, i32* %r279
+%r280 = lshr i512 %r276, 32
+%r281 = trunc i512 %r280 to i32
+%r283 = getelementptr i32, i32* %r2, i32 15
+store i32 %r281, i32* %r283
+%r284 = lshr i544 %r219, 512
+%r285 = trunc i544 %r284 to i32
+ret i32 %r285
+}
+define i32 @mcl_fp_subPre16L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r3, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r3, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r3, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r3, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r112 = load i32, i32* %r4
+%r113 = zext i32 %r112 to i64
+%r115 = getelementptr i32, i32* %r4, i32 1
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i64
+%r118 = shl i64 %r117, 32
+%r119 = or i64 %r113, %r118
+%r120 = zext i64 %r119 to i96
+%r122 = getelementptr i32, i32* %r4, i32 2
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i96
+%r125 = shl i96 %r124, 64
+%r126 = or i96 %r120, %r125
+%r127 = zext i96 %r126 to i128
+%r129 = getelementptr i32, i32* %r4, i32 3
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i128
+%r132 = shl i128 %r131, 96
+%r133 = or i128 %r127, %r132
+%r134 = zext i128 %r133 to i160
+%r136 = getelementptr i32, i32* %r4, i32 4
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i160
+%r139 = shl i160 %r138, 128
+%r140 = or i160 %r134, %r139
+%r141 = zext i160 %r140 to i192
+%r143 = getelementptr i32, i32* %r4, i32 5
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i192
+%r146 = shl i192 %r145, 160
+%r147 = or i192 %r141, %r146
+%r148 = zext i192 %r147 to i224
+%r150 = getelementptr i32, i32* %r4, i32 6
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i224
+%r153 = shl i224 %r152, 192
+%r154 = or i224 %r148, %r153
+%r155 = zext i224 %r154 to i256
+%r157 = getelementptr i32, i32* %r4, i32 7
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i256
+%r160 = shl i256 %r159, 224
+%r161 = or i256 %r155, %r160
+%r162 = zext i256 %r161 to i288
+%r164 = getelementptr i32, i32* %r4, i32 8
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i288
+%r167 = shl i288 %r166, 256
+%r168 = or i288 %r162, %r167
+%r169 = zext i288 %r168 to i320
+%r171 = getelementptr i32, i32* %r4, i32 9
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i320
+%r174 = shl i320 %r173, 288
+%r175 = or i320 %r169, %r174
+%r176 = zext i320 %r175 to i352
+%r178 = getelementptr i32, i32* %r4, i32 10
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i352
+%r181 = shl i352 %r180, 320
+%r182 = or i352 %r176, %r181
+%r183 = zext i352 %r182 to i384
+%r185 = getelementptr i32, i32* %r4, i32 11
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i384
+%r188 = shl i384 %r187, 352
+%r189 = or i384 %r183, %r188
+%r190 = zext i384 %r189 to i416
+%r192 = getelementptr i32, i32* %r4, i32 12
+%r193 = load i32, i32* %r192
+%r194 = zext i32 %r193 to i416
+%r195 = shl i416 %r194, 384
+%r196 = or i416 %r190, %r195
+%r197 = zext i416 %r196 to i448
+%r199 = getelementptr i32, i32* %r4, i32 13
+%r200 = load i32, i32* %r199
+%r201 = zext i32 %r200 to i448
+%r202 = shl i448 %r201, 416
+%r203 = or i448 %r197, %r202
+%r204 = zext i448 %r203 to i480
+%r206 = getelementptr i32, i32* %r4, i32 14
+%r207 = load i32, i32* %r206
+%r208 = zext i32 %r207 to i480
+%r209 = shl i480 %r208, 448
+%r210 = or i480 %r204, %r209
+%r211 = zext i480 %r210 to i512
+%r213 = getelementptr i32, i32* %r4, i32 15
+%r214 = load i32, i32* %r213
+%r215 = zext i32 %r214 to i512
+%r216 = shl i512 %r215, 480
+%r217 = or i512 %r211, %r216
+%r218 = zext i512 %r217 to i544
+%r219 = sub i544 %r111, %r218
+%r220 = trunc i544 %r219 to i512
+%r221 = trunc i512 %r220 to i32
+%r223 = getelementptr i32, i32* %r2, i32 0
+store i32 %r221, i32* %r223
+%r224 = lshr i512 %r220, 32
+%r225 = trunc i512 %r224 to i32
+%r227 = getelementptr i32, i32* %r2, i32 1
+store i32 %r225, i32* %r227
+%r228 = lshr i512 %r224, 32
+%r229 = trunc i512 %r228 to i32
+%r231 = getelementptr i32, i32* %r2, i32 2
+store i32 %r229, i32* %r231
+%r232 = lshr i512 %r228, 32
+%r233 = trunc i512 %r232 to i32
+%r235 = getelementptr i32, i32* %r2, i32 3
+store i32 %r233, i32* %r235
+%r236 = lshr i512 %r232, 32
+%r237 = trunc i512 %r236 to i32
+%r239 = getelementptr i32, i32* %r2, i32 4
+store i32 %r237, i32* %r239
+%r240 = lshr i512 %r236, 32
+%r241 = trunc i512 %r240 to i32
+%r243 = getelementptr i32, i32* %r2, i32 5
+store i32 %r241, i32* %r243
+%r244 = lshr i512 %r240, 32
+%r245 = trunc i512 %r244 to i32
+%r247 = getelementptr i32, i32* %r2, i32 6
+store i32 %r245, i32* %r247
+%r248 = lshr i512 %r244, 32
+%r249 = trunc i512 %r248 to i32
+%r251 = getelementptr i32, i32* %r2, i32 7
+store i32 %r249, i32* %r251
+%r252 = lshr i512 %r248, 32
+%r253 = trunc i512 %r252 to i32
+%r255 = getelementptr i32, i32* %r2, i32 8
+store i32 %r253, i32* %r255
+%r256 = lshr i512 %r252, 32
+%r257 = trunc i512 %r256 to i32
+%r259 = getelementptr i32, i32* %r2, i32 9
+store i32 %r257, i32* %r259
+%r260 = lshr i512 %r256, 32
+%r261 = trunc i512 %r260 to i32
+%r263 = getelementptr i32, i32* %r2, i32 10
+store i32 %r261, i32* %r263
+%r264 = lshr i512 %r260, 32
+%r265 = trunc i512 %r264 to i32
+%r267 = getelementptr i32, i32* %r2, i32 11
+store i32 %r265, i32* %r267
+%r268 = lshr i512 %r264, 32
+%r269 = trunc i512 %r268 to i32
+%r271 = getelementptr i32, i32* %r2, i32 12
+store i32 %r269, i32* %r271
+%r272 = lshr i512 %r268, 32
+%r273 = trunc i512 %r272 to i32
+%r275 = getelementptr i32, i32* %r2, i32 13
+store i32 %r273, i32* %r275
+%r276 = lshr i512 %r272, 32
+%r277 = trunc i512 %r276 to i32
+%r279 = getelementptr i32, i32* %r2, i32 14
+store i32 %r277, i32* %r279
+%r280 = lshr i512 %r276, 32
+%r281 = trunc i512 %r280 to i32
+%r283 = getelementptr i32, i32* %r2, i32 15
+store i32 %r281, i32* %r283
+%r284 = lshr i544 %r219, 512
+%r285 = trunc i544 %r284 to i32
+%r287 = and i32 %r285, 1
+ret i32 %r287
+}
+define void @mcl_fp_shr1_16L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = zext i256 %r52 to i288
+%r55 = getelementptr i32, i32* %r2, i32 8
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i288
+%r58 = shl i288 %r57, 256
+%r59 = or i288 %r53, %r58
+%r60 = zext i288 %r59 to i320
+%r62 = getelementptr i32, i32* %r2, i32 9
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i320
+%r65 = shl i320 %r64, 288
+%r66 = or i320 %r60, %r65
+%r67 = zext i320 %r66 to i352
+%r69 = getelementptr i32, i32* %r2, i32 10
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i352
+%r72 = shl i352 %r71, 320
+%r73 = or i352 %r67, %r72
+%r74 = zext i352 %r73 to i384
+%r76 = getelementptr i32, i32* %r2, i32 11
+%r77 = load i32, i32* %r76
+%r78 = zext i32 %r77 to i384
+%r79 = shl i384 %r78, 352
+%r80 = or i384 %r74, %r79
+%r81 = zext i384 %r80 to i416
+%r83 = getelementptr i32, i32* %r2, i32 12
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i416
+%r86 = shl i416 %r85, 384
+%r87 = or i416 %r81, %r86
+%r88 = zext i416 %r87 to i448
+%r90 = getelementptr i32, i32* %r2, i32 13
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i448
+%r93 = shl i448 %r92, 416
+%r94 = or i448 %r88, %r93
+%r95 = zext i448 %r94 to i480
+%r97 = getelementptr i32, i32* %r2, i32 14
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i480
+%r100 = shl i480 %r99, 448
+%r101 = or i480 %r95, %r100
+%r102 = zext i480 %r101 to i512
+%r104 = getelementptr i32, i32* %r2, i32 15
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i512
+%r107 = shl i512 %r106, 480
+%r108 = or i512 %r102, %r107
+%r109 = lshr i512 %r108, 1
+%r110 = trunc i512 %r109 to i32
+%r112 = getelementptr i32, i32* %r1, i32 0
+store i32 %r110, i32* %r112
+%r113 = lshr i512 %r109, 32
+%r114 = trunc i512 %r113 to i32
+%r116 = getelementptr i32, i32* %r1, i32 1
+store i32 %r114, i32* %r116
+%r117 = lshr i512 %r113, 32
+%r118 = trunc i512 %r117 to i32
+%r120 = getelementptr i32, i32* %r1, i32 2
+store i32 %r118, i32* %r120
+%r121 = lshr i512 %r117, 32
+%r122 = trunc i512 %r121 to i32
+%r124 = getelementptr i32, i32* %r1, i32 3
+store i32 %r122, i32* %r124
+%r125 = lshr i512 %r121, 32
+%r126 = trunc i512 %r125 to i32
+%r128 = getelementptr i32, i32* %r1, i32 4
+store i32 %r126, i32* %r128
+%r129 = lshr i512 %r125, 32
+%r130 = trunc i512 %r129 to i32
+%r132 = getelementptr i32, i32* %r1, i32 5
+store i32 %r130, i32* %r132
+%r133 = lshr i512 %r129, 32
+%r134 = trunc i512 %r133 to i32
+%r136 = getelementptr i32, i32* %r1, i32 6
+store i32 %r134, i32* %r136
+%r137 = lshr i512 %r133, 32
+%r138 = trunc i512 %r137 to i32
+%r140 = getelementptr i32, i32* %r1, i32 7
+store i32 %r138, i32* %r140
+%r141 = lshr i512 %r137, 32
+%r142 = trunc i512 %r141 to i32
+%r144 = getelementptr i32, i32* %r1, i32 8
+store i32 %r142, i32* %r144
+%r145 = lshr i512 %r141, 32
+%r146 = trunc i512 %r145 to i32
+%r148 = getelementptr i32, i32* %r1, i32 9
+store i32 %r146, i32* %r148
+%r149 = lshr i512 %r145, 32
+%r150 = trunc i512 %r149 to i32
+%r152 = getelementptr i32, i32* %r1, i32 10
+store i32 %r150, i32* %r152
+%r153 = lshr i512 %r149, 32
+%r154 = trunc i512 %r153 to i32
+%r156 = getelementptr i32, i32* %r1, i32 11
+store i32 %r154, i32* %r156
+%r157 = lshr i512 %r153, 32
+%r158 = trunc i512 %r157 to i32
+%r160 = getelementptr i32, i32* %r1, i32 12
+store i32 %r158, i32* %r160
+%r161 = lshr i512 %r157, 32
+%r162 = trunc i512 %r161 to i32
+%r164 = getelementptr i32, i32* %r1, i32 13
+store i32 %r162, i32* %r164
+%r165 = lshr i512 %r161, 32
+%r166 = trunc i512 %r165 to i32
+%r168 = getelementptr i32, i32* %r1, i32 14
+store i32 %r166, i32* %r168
+%r169 = lshr i512 %r165, 32
+%r170 = trunc i512 %r169 to i32
+%r172 = getelementptr i32, i32* %r1, i32 15
+store i32 %r170, i32* %r172
+ret void
+}
+define void @mcl_fp_add16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = zext i512 %r110 to i544
+%r218 = zext i512 %r216 to i544
+%r219 = add i544 %r217, %r218
+%r220 = trunc i544 %r219 to i512
+%r221 = trunc i512 %r220 to i32
+%r223 = getelementptr i32, i32* %r1, i32 0
+store i32 %r221, i32* %r223
+%r224 = lshr i512 %r220, 32
+%r225 = trunc i512 %r224 to i32
+%r227 = getelementptr i32, i32* %r1, i32 1
+store i32 %r225, i32* %r227
+%r228 = lshr i512 %r224, 32
+%r229 = trunc i512 %r228 to i32
+%r231 = getelementptr i32, i32* %r1, i32 2
+store i32 %r229, i32* %r231
+%r232 = lshr i512 %r228, 32
+%r233 = trunc i512 %r232 to i32
+%r235 = getelementptr i32, i32* %r1, i32 3
+store i32 %r233, i32* %r235
+%r236 = lshr i512 %r232, 32
+%r237 = trunc i512 %r236 to i32
+%r239 = getelementptr i32, i32* %r1, i32 4
+store i32 %r237, i32* %r239
+%r240 = lshr i512 %r236, 32
+%r241 = trunc i512 %r240 to i32
+%r243 = getelementptr i32, i32* %r1, i32 5
+store i32 %r241, i32* %r243
+%r244 = lshr i512 %r240, 32
+%r245 = trunc i512 %r244 to i32
+%r247 = getelementptr i32, i32* %r1, i32 6
+store i32 %r245, i32* %r247
+%r248 = lshr i512 %r244, 32
+%r249 = trunc i512 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 7
+store i32 %r249, i32* %r251
+%r252 = lshr i512 %r248, 32
+%r253 = trunc i512 %r252 to i32
+%r255 = getelementptr i32, i32* %r1, i32 8
+store i32 %r253, i32* %r255
+%r256 = lshr i512 %r252, 32
+%r257 = trunc i512 %r256 to i32
+%r259 = getelementptr i32, i32* %r1, i32 9
+store i32 %r257, i32* %r259
+%r260 = lshr i512 %r256, 32
+%r261 = trunc i512 %r260 to i32
+%r263 = getelementptr i32, i32* %r1, i32 10
+store i32 %r261, i32* %r263
+%r264 = lshr i512 %r260, 32
+%r265 = trunc i512 %r264 to i32
+%r267 = getelementptr i32, i32* %r1, i32 11
+store i32 %r265, i32* %r267
+%r268 = lshr i512 %r264, 32
+%r269 = trunc i512 %r268 to i32
+%r271 = getelementptr i32, i32* %r1, i32 12
+store i32 %r269, i32* %r271
+%r272 = lshr i512 %r268, 32
+%r273 = trunc i512 %r272 to i32
+%r275 = getelementptr i32, i32* %r1, i32 13
+store i32 %r273, i32* %r275
+%r276 = lshr i512 %r272, 32
+%r277 = trunc i512 %r276 to i32
+%r279 = getelementptr i32, i32* %r1, i32 14
+store i32 %r277, i32* %r279
+%r280 = lshr i512 %r276, 32
+%r281 = trunc i512 %r280 to i32
+%r283 = getelementptr i32, i32* %r1, i32 15
+store i32 %r281, i32* %r283
+%r284 = load i32, i32* %r4
+%r285 = zext i32 %r284 to i64
+%r287 = getelementptr i32, i32* %r4, i32 1
+%r288 = load i32, i32* %r287
+%r289 = zext i32 %r288 to i64
+%r290 = shl i64 %r289, 32
+%r291 = or i64 %r285, %r290
+%r292 = zext i64 %r291 to i96
+%r294 = getelementptr i32, i32* %r4, i32 2
+%r295 = load i32, i32* %r294
+%r296 = zext i32 %r295 to i96
+%r297 = shl i96 %r296, 64
+%r298 = or i96 %r292, %r297
+%r299 = zext i96 %r298 to i128
+%r301 = getelementptr i32, i32* %r4, i32 3
+%r302 = load i32, i32* %r301
+%r303 = zext i32 %r302 to i128
+%r304 = shl i128 %r303, 96
+%r305 = or i128 %r299, %r304
+%r306 = zext i128 %r305 to i160
+%r308 = getelementptr i32, i32* %r4, i32 4
+%r309 = load i32, i32* %r308
+%r310 = zext i32 %r309 to i160
+%r311 = shl i160 %r310, 128
+%r312 = or i160 %r306, %r311
+%r313 = zext i160 %r312 to i192
+%r315 = getelementptr i32, i32* %r4, i32 5
+%r316 = load i32, i32* %r315
+%r317 = zext i32 %r316 to i192
+%r318 = shl i192 %r317, 160
+%r319 = or i192 %r313, %r318
+%r320 = zext i192 %r319 to i224
+%r322 = getelementptr i32, i32* %r4, i32 6
+%r323 = load i32, i32* %r322
+%r324 = zext i32 %r323 to i224
+%r325 = shl i224 %r324, 192
+%r326 = or i224 %r320, %r325
+%r327 = zext i224 %r326 to i256
+%r329 = getelementptr i32, i32* %r4, i32 7
+%r330 = load i32, i32* %r329
+%r331 = zext i32 %r330 to i256
+%r332 = shl i256 %r331, 224
+%r333 = or i256 %r327, %r332
+%r334 = zext i256 %r333 to i288
+%r336 = getelementptr i32, i32* %r4, i32 8
+%r337 = load i32, i32* %r336
+%r338 = zext i32 %r337 to i288
+%r339 = shl i288 %r338, 256
+%r340 = or i288 %r334, %r339
+%r341 = zext i288 %r340 to i320
+%r343 = getelementptr i32, i32* %r4, i32 9
+%r344 = load i32, i32* %r343
+%r345 = zext i32 %r344 to i320
+%r346 = shl i320 %r345, 288
+%r347 = or i320 %r341, %r346
+%r348 = zext i320 %r347 to i352
+%r350 = getelementptr i32, i32* %r4, i32 10
+%r351 = load i32, i32* %r350
+%r352 = zext i32 %r351 to i352
+%r353 = shl i352 %r352, 320
+%r354 = or i352 %r348, %r353
+%r355 = zext i352 %r354 to i384
+%r357 = getelementptr i32, i32* %r4, i32 11
+%r358 = load i32, i32* %r357
+%r359 = zext i32 %r358 to i384
+%r360 = shl i384 %r359, 352
+%r361 = or i384 %r355, %r360
+%r362 = zext i384 %r361 to i416
+%r364 = getelementptr i32, i32* %r4, i32 12
+%r365 = load i32, i32* %r364
+%r366 = zext i32 %r365 to i416
+%r367 = shl i416 %r366, 384
+%r368 = or i416 %r362, %r367
+%r369 = zext i416 %r368 to i448
+%r371 = getelementptr i32, i32* %r4, i32 13
+%r372 = load i32, i32* %r371
+%r373 = zext i32 %r372 to i448
+%r374 = shl i448 %r373, 416
+%r375 = or i448 %r369, %r374
+%r376 = zext i448 %r375 to i480
+%r378 = getelementptr i32, i32* %r4, i32 14
+%r379 = load i32, i32* %r378
+%r380 = zext i32 %r379 to i480
+%r381 = shl i480 %r380, 448
+%r382 = or i480 %r376, %r381
+%r383 = zext i480 %r382 to i512
+%r385 = getelementptr i32, i32* %r4, i32 15
+%r386 = load i32, i32* %r385
+%r387 = zext i32 %r386 to i512
+%r388 = shl i512 %r387, 480
+%r389 = or i512 %r383, %r388
+%r390 = zext i512 %r389 to i544
+%r391 = sub i544 %r219, %r390
+%r392 = lshr i544 %r391, 512
+%r393 = trunc i544 %r392 to i1
+br i1%r393, label %carry, label %nocarry
+nocarry:
+%r394 = trunc i544 %r391 to i512
+%r395 = trunc i512 %r394 to i32
+%r397 = getelementptr i32, i32* %r1, i32 0
+store i32 %r395, i32* %r397
+%r398 = lshr i512 %r394, 32
+%r399 = trunc i512 %r398 to i32
+%r401 = getelementptr i32, i32* %r1, i32 1
+store i32 %r399, i32* %r401
+%r402 = lshr i512 %r398, 32
+%r403 = trunc i512 %r402 to i32
+%r405 = getelementptr i32, i32* %r1, i32 2
+store i32 %r403, i32* %r405
+%r406 = lshr i512 %r402, 32
+%r407 = trunc i512 %r406 to i32
+%r409 = getelementptr i32, i32* %r1, i32 3
+store i32 %r407, i32* %r409
+%r410 = lshr i512 %r406, 32
+%r411 = trunc i512 %r410 to i32
+%r413 = getelementptr i32, i32* %r1, i32 4
+store i32 %r411, i32* %r413
+%r414 = lshr i512 %r410, 32
+%r415 = trunc i512 %r414 to i32
+%r417 = getelementptr i32, i32* %r1, i32 5
+store i32 %r415, i32* %r417
+%r418 = lshr i512 %r414, 32
+%r419 = trunc i512 %r418 to i32
+%r421 = getelementptr i32, i32* %r1, i32 6
+store i32 %r419, i32* %r421
+%r422 = lshr i512 %r418, 32
+%r423 = trunc i512 %r422 to i32
+%r425 = getelementptr i32, i32* %r1, i32 7
+store i32 %r423, i32* %r425
+%r426 = lshr i512 %r422, 32
+%r427 = trunc i512 %r426 to i32
+%r429 = getelementptr i32, i32* %r1, i32 8
+store i32 %r427, i32* %r429
+%r430 = lshr i512 %r426, 32
+%r431 = trunc i512 %r430 to i32
+%r433 = getelementptr i32, i32* %r1, i32 9
+store i32 %r431, i32* %r433
+%r434 = lshr i512 %r430, 32
+%r435 = trunc i512 %r434 to i32
+%r437 = getelementptr i32, i32* %r1, i32 10
+store i32 %r435, i32* %r437
+%r438 = lshr i512 %r434, 32
+%r439 = trunc i512 %r438 to i32
+%r441 = getelementptr i32, i32* %r1, i32 11
+store i32 %r439, i32* %r441
+%r442 = lshr i512 %r438, 32
+%r443 = trunc i512 %r442 to i32
+%r445 = getelementptr i32, i32* %r1, i32 12
+store i32 %r443, i32* %r445
+%r446 = lshr i512 %r442, 32
+%r447 = trunc i512 %r446 to i32
+%r449 = getelementptr i32, i32* %r1, i32 13
+store i32 %r447, i32* %r449
+%r450 = lshr i512 %r446, 32
+%r451 = trunc i512 %r450 to i32
+%r453 = getelementptr i32, i32* %r1, i32 14
+store i32 %r451, i32* %r453
+%r454 = lshr i512 %r450, 32
+%r455 = trunc i512 %r454 to i32
+%r457 = getelementptr i32, i32* %r1, i32 15
+store i32 %r455, i32* %r457
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = add i512 %r110, %r216
+%r218 = load i32, i32* %r4
+%r219 = zext i32 %r218 to i64
+%r221 = getelementptr i32, i32* %r4, i32 1
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i64
+%r224 = shl i64 %r223, 32
+%r225 = or i64 %r219, %r224
+%r226 = zext i64 %r225 to i96
+%r228 = getelementptr i32, i32* %r4, i32 2
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i96
+%r231 = shl i96 %r230, 64
+%r232 = or i96 %r226, %r231
+%r233 = zext i96 %r232 to i128
+%r235 = getelementptr i32, i32* %r4, i32 3
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i128
+%r238 = shl i128 %r237, 96
+%r239 = or i128 %r233, %r238
+%r240 = zext i128 %r239 to i160
+%r242 = getelementptr i32, i32* %r4, i32 4
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i160
+%r245 = shl i160 %r244, 128
+%r246 = or i160 %r240, %r245
+%r247 = zext i160 %r246 to i192
+%r249 = getelementptr i32, i32* %r4, i32 5
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i192
+%r252 = shl i192 %r251, 160
+%r253 = or i192 %r247, %r252
+%r254 = zext i192 %r253 to i224
+%r256 = getelementptr i32, i32* %r4, i32 6
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i224
+%r259 = shl i224 %r258, 192
+%r260 = or i224 %r254, %r259
+%r261 = zext i224 %r260 to i256
+%r263 = getelementptr i32, i32* %r4, i32 7
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i256
+%r266 = shl i256 %r265, 224
+%r267 = or i256 %r261, %r266
+%r268 = zext i256 %r267 to i288
+%r270 = getelementptr i32, i32* %r4, i32 8
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i288
+%r273 = shl i288 %r272, 256
+%r274 = or i288 %r268, %r273
+%r275 = zext i288 %r274 to i320
+%r277 = getelementptr i32, i32* %r4, i32 9
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i320
+%r280 = shl i320 %r279, 288
+%r281 = or i320 %r275, %r280
+%r282 = zext i320 %r281 to i352
+%r284 = getelementptr i32, i32* %r4, i32 10
+%r285 = load i32, i32* %r284
+%r286 = zext i32 %r285 to i352
+%r287 = shl i352 %r286, 320
+%r288 = or i352 %r282, %r287
+%r289 = zext i352 %r288 to i384
+%r291 = getelementptr i32, i32* %r4, i32 11
+%r292 = load i32, i32* %r291
+%r293 = zext i32 %r292 to i384
+%r294 = shl i384 %r293, 352
+%r295 = or i384 %r289, %r294
+%r296 = zext i384 %r295 to i416
+%r298 = getelementptr i32, i32* %r4, i32 12
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i416
+%r301 = shl i416 %r300, 384
+%r302 = or i416 %r296, %r301
+%r303 = zext i416 %r302 to i448
+%r305 = getelementptr i32, i32* %r4, i32 13
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i448
+%r308 = shl i448 %r307, 416
+%r309 = or i448 %r303, %r308
+%r310 = zext i448 %r309 to i480
+%r312 = getelementptr i32, i32* %r4, i32 14
+%r313 = load i32, i32* %r312
+%r314 = zext i32 %r313 to i480
+%r315 = shl i480 %r314, 448
+%r316 = or i480 %r310, %r315
+%r317 = zext i480 %r316 to i512
+%r319 = getelementptr i32, i32* %r4, i32 15
+%r320 = load i32, i32* %r319
+%r321 = zext i32 %r320 to i512
+%r322 = shl i512 %r321, 480
+%r323 = or i512 %r317, %r322
+%r324 = sub i512 %r217, %r323
+%r325 = lshr i512 %r324, 511
+%r326 = trunc i512 %r325 to i1
+%r327 = select i1 %r326, i512 %r217, i512 %r324
+%r328 = trunc i512 %r327 to i32
+%r330 = getelementptr i32, i32* %r1, i32 0
+store i32 %r328, i32* %r330
+%r331 = lshr i512 %r327, 32
+%r332 = trunc i512 %r331 to i32
+%r334 = getelementptr i32, i32* %r1, i32 1
+store i32 %r332, i32* %r334
+%r335 = lshr i512 %r331, 32
+%r336 = trunc i512 %r335 to i32
+%r338 = getelementptr i32, i32* %r1, i32 2
+store i32 %r336, i32* %r338
+%r339 = lshr i512 %r335, 32
+%r340 = trunc i512 %r339 to i32
+%r342 = getelementptr i32, i32* %r1, i32 3
+store i32 %r340, i32* %r342
+%r343 = lshr i512 %r339, 32
+%r344 = trunc i512 %r343 to i32
+%r346 = getelementptr i32, i32* %r1, i32 4
+store i32 %r344, i32* %r346
+%r347 = lshr i512 %r343, 32
+%r348 = trunc i512 %r347 to i32
+%r350 = getelementptr i32, i32* %r1, i32 5
+store i32 %r348, i32* %r350
+%r351 = lshr i512 %r347, 32
+%r352 = trunc i512 %r351 to i32
+%r354 = getelementptr i32, i32* %r1, i32 6
+store i32 %r352, i32* %r354
+%r355 = lshr i512 %r351, 32
+%r356 = trunc i512 %r355 to i32
+%r358 = getelementptr i32, i32* %r1, i32 7
+store i32 %r356, i32* %r358
+%r359 = lshr i512 %r355, 32
+%r360 = trunc i512 %r359 to i32
+%r362 = getelementptr i32, i32* %r1, i32 8
+store i32 %r360, i32* %r362
+%r363 = lshr i512 %r359, 32
+%r364 = trunc i512 %r363 to i32
+%r366 = getelementptr i32, i32* %r1, i32 9
+store i32 %r364, i32* %r366
+%r367 = lshr i512 %r363, 32
+%r368 = trunc i512 %r367 to i32
+%r370 = getelementptr i32, i32* %r1, i32 10
+store i32 %r368, i32* %r370
+%r371 = lshr i512 %r367, 32
+%r372 = trunc i512 %r371 to i32
+%r374 = getelementptr i32, i32* %r1, i32 11
+store i32 %r372, i32* %r374
+%r375 = lshr i512 %r371, 32
+%r376 = trunc i512 %r375 to i32
+%r378 = getelementptr i32, i32* %r1, i32 12
+store i32 %r376, i32* %r378
+%r379 = lshr i512 %r375, 32
+%r380 = trunc i512 %r379 to i32
+%r382 = getelementptr i32, i32* %r1, i32 13
+store i32 %r380, i32* %r382
+%r383 = lshr i512 %r379, 32
+%r384 = trunc i512 %r383 to i32
+%r386 = getelementptr i32, i32* %r1, i32 14
+store i32 %r384, i32* %r386
+%r387 = lshr i512 %r383, 32
+%r388 = trunc i512 %r387 to i32
+%r390 = getelementptr i32, i32* %r1, i32 15
+store i32 %r388, i32* %r390
+ret void
+}
+define void @mcl_fp_sub16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = zext i512 %r110 to i544
+%r218 = zext i512 %r216 to i544
+%r219 = sub i544 %r217, %r218
+%r220 = trunc i544 %r219 to i512
+%r221 = lshr i544 %r219, 512
+%r222 = trunc i544 %r221 to i1
+%r223 = trunc i512 %r220 to i32
+%r225 = getelementptr i32, i32* %r1, i32 0
+store i32 %r223, i32* %r225
+%r226 = lshr i512 %r220, 32
+%r227 = trunc i512 %r226 to i32
+%r229 = getelementptr i32, i32* %r1, i32 1
+store i32 %r227, i32* %r229
+%r230 = lshr i512 %r226, 32
+%r231 = trunc i512 %r230 to i32
+%r233 = getelementptr i32, i32* %r1, i32 2
+store i32 %r231, i32* %r233
+%r234 = lshr i512 %r230, 32
+%r235 = trunc i512 %r234 to i32
+%r237 = getelementptr i32, i32* %r1, i32 3
+store i32 %r235, i32* %r237
+%r238 = lshr i512 %r234, 32
+%r239 = trunc i512 %r238 to i32
+%r241 = getelementptr i32, i32* %r1, i32 4
+store i32 %r239, i32* %r241
+%r242 = lshr i512 %r238, 32
+%r243 = trunc i512 %r242 to i32
+%r245 = getelementptr i32, i32* %r1, i32 5
+store i32 %r243, i32* %r245
+%r246 = lshr i512 %r242, 32
+%r247 = trunc i512 %r246 to i32
+%r249 = getelementptr i32, i32* %r1, i32 6
+store i32 %r247, i32* %r249
+%r250 = lshr i512 %r246, 32
+%r251 = trunc i512 %r250 to i32
+%r253 = getelementptr i32, i32* %r1, i32 7
+store i32 %r251, i32* %r253
+%r254 = lshr i512 %r250, 32
+%r255 = trunc i512 %r254 to i32
+%r257 = getelementptr i32, i32* %r1, i32 8
+store i32 %r255, i32* %r257
+%r258 = lshr i512 %r254, 32
+%r259 = trunc i512 %r258 to i32
+%r261 = getelementptr i32, i32* %r1, i32 9
+store i32 %r259, i32* %r261
+%r262 = lshr i512 %r258, 32
+%r263 = trunc i512 %r262 to i32
+%r265 = getelementptr i32, i32* %r1, i32 10
+store i32 %r263, i32* %r265
+%r266 = lshr i512 %r262, 32
+%r267 = trunc i512 %r266 to i32
+%r269 = getelementptr i32, i32* %r1, i32 11
+store i32 %r267, i32* %r269
+%r270 = lshr i512 %r266, 32
+%r271 = trunc i512 %r270 to i32
+%r273 = getelementptr i32, i32* %r1, i32 12
+store i32 %r271, i32* %r273
+%r274 = lshr i512 %r270, 32
+%r275 = trunc i512 %r274 to i32
+%r277 = getelementptr i32, i32* %r1, i32 13
+store i32 %r275, i32* %r277
+%r278 = lshr i512 %r274, 32
+%r279 = trunc i512 %r278 to i32
+%r281 = getelementptr i32, i32* %r1, i32 14
+store i32 %r279, i32* %r281
+%r282 = lshr i512 %r278, 32
+%r283 = trunc i512 %r282 to i32
+%r285 = getelementptr i32, i32* %r1, i32 15
+store i32 %r283, i32* %r285
+br i1%r222, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r286 = load i32, i32* %r4
+%r287 = zext i32 %r286 to i64
+%r289 = getelementptr i32, i32* %r4, i32 1
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i64
+%r292 = shl i64 %r291, 32
+%r293 = or i64 %r287, %r292
+%r294 = zext i64 %r293 to i96
+%r296 = getelementptr i32, i32* %r4, i32 2
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i96
+%r299 = shl i96 %r298, 64
+%r300 = or i96 %r294, %r299
+%r301 = zext i96 %r300 to i128
+%r303 = getelementptr i32, i32* %r4, i32 3
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i128
+%r306 = shl i128 %r305, 96
+%r307 = or i128 %r301, %r306
+%r308 = zext i128 %r307 to i160
+%r310 = getelementptr i32, i32* %r4, i32 4
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i160
+%r313 = shl i160 %r312, 128
+%r314 = or i160 %r308, %r313
+%r315 = zext i160 %r314 to i192
+%r317 = getelementptr i32, i32* %r4, i32 5
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i192
+%r320 = shl i192 %r319, 160
+%r321 = or i192 %r315, %r320
+%r322 = zext i192 %r321 to i224
+%r324 = getelementptr i32, i32* %r4, i32 6
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i224
+%r327 = shl i224 %r326, 192
+%r328 = or i224 %r322, %r327
+%r329 = zext i224 %r328 to i256
+%r331 = getelementptr i32, i32* %r4, i32 7
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i256
+%r334 = shl i256 %r333, 224
+%r335 = or i256 %r329, %r334
+%r336 = zext i256 %r335 to i288
+%r338 = getelementptr i32, i32* %r4, i32 8
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i288
+%r341 = shl i288 %r340, 256
+%r342 = or i288 %r336, %r341
+%r343 = zext i288 %r342 to i320
+%r345 = getelementptr i32, i32* %r4, i32 9
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i320
+%r348 = shl i320 %r347, 288
+%r349 = or i320 %r343, %r348
+%r350 = zext i320 %r349 to i352
+%r352 = getelementptr i32, i32* %r4, i32 10
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i352
+%r355 = shl i352 %r354, 320
+%r356 = or i352 %r350, %r355
+%r357 = zext i352 %r356 to i384
+%r359 = getelementptr i32, i32* %r4, i32 11
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i384
+%r362 = shl i384 %r361, 352
+%r363 = or i384 %r357, %r362
+%r364 = zext i384 %r363 to i416
+%r366 = getelementptr i32, i32* %r4, i32 12
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i416
+%r369 = shl i416 %r368, 384
+%r370 = or i416 %r364, %r369
+%r371 = zext i416 %r370 to i448
+%r373 = getelementptr i32, i32* %r4, i32 13
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i448
+%r376 = shl i448 %r375, 416
+%r377 = or i448 %r371, %r376
+%r378 = zext i448 %r377 to i480
+%r380 = getelementptr i32, i32* %r4, i32 14
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i480
+%r383 = shl i480 %r382, 448
+%r384 = or i480 %r378, %r383
+%r385 = zext i480 %r384 to i512
+%r387 = getelementptr i32, i32* %r4, i32 15
+%r388 = load i32, i32* %r387
+%r389 = zext i32 %r388 to i512
+%r390 = shl i512 %r389, 480
+%r391 = or i512 %r385, %r390
+%r392 = add i512 %r220, %r391
+%r393 = trunc i512 %r392 to i32
+%r395 = getelementptr i32, i32* %r1, i32 0
+store i32 %r393, i32* %r395
+%r396 = lshr i512 %r392, 32
+%r397 = trunc i512 %r396 to i32
+%r399 = getelementptr i32, i32* %r1, i32 1
+store i32 %r397, i32* %r399
+%r400 = lshr i512 %r396, 32
+%r401 = trunc i512 %r400 to i32
+%r403 = getelementptr i32, i32* %r1, i32 2
+store i32 %r401, i32* %r403
+%r404 = lshr i512 %r400, 32
+%r405 = trunc i512 %r404 to i32
+%r407 = getelementptr i32, i32* %r1, i32 3
+store i32 %r405, i32* %r407
+%r408 = lshr i512 %r404, 32
+%r409 = trunc i512 %r408 to i32
+%r411 = getelementptr i32, i32* %r1, i32 4
+store i32 %r409, i32* %r411
+%r412 = lshr i512 %r408, 32
+%r413 = trunc i512 %r412 to i32
+%r415 = getelementptr i32, i32* %r1, i32 5
+store i32 %r413, i32* %r415
+%r416 = lshr i512 %r412, 32
+%r417 = trunc i512 %r416 to i32
+%r419 = getelementptr i32, i32* %r1, i32 6
+store i32 %r417, i32* %r419
+%r420 = lshr i512 %r416, 32
+%r421 = trunc i512 %r420 to i32
+%r423 = getelementptr i32, i32* %r1, i32 7
+store i32 %r421, i32* %r423
+%r424 = lshr i512 %r420, 32
+%r425 = trunc i512 %r424 to i32
+%r427 = getelementptr i32, i32* %r1, i32 8
+store i32 %r425, i32* %r427
+%r428 = lshr i512 %r424, 32
+%r429 = trunc i512 %r428 to i32
+%r431 = getelementptr i32, i32* %r1, i32 9
+store i32 %r429, i32* %r431
+%r432 = lshr i512 %r428, 32
+%r433 = trunc i512 %r432 to i32
+%r435 = getelementptr i32, i32* %r1, i32 10
+store i32 %r433, i32* %r435
+%r436 = lshr i512 %r432, 32
+%r437 = trunc i512 %r436 to i32
+%r439 = getelementptr i32, i32* %r1, i32 11
+store i32 %r437, i32* %r439
+%r440 = lshr i512 %r436, 32
+%r441 = trunc i512 %r440 to i32
+%r443 = getelementptr i32, i32* %r1, i32 12
+store i32 %r441, i32* %r443
+%r444 = lshr i512 %r440, 32
+%r445 = trunc i512 %r444 to i32
+%r447 = getelementptr i32, i32* %r1, i32 13
+store i32 %r445, i32* %r447
+%r448 = lshr i512 %r444, 32
+%r449 = trunc i512 %r448 to i32
+%r451 = getelementptr i32, i32* %r1, i32 14
+store i32 %r449, i32* %r451
+%r452 = lshr i512 %r448, 32
+%r453 = trunc i512 %r452 to i32
+%r455 = getelementptr i32, i32* %r1, i32 15
+store i32 %r453, i32* %r455
+ret void
+}
+define void @mcl_fp_subNF16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = sub i512 %r110, %r216
+%r218 = lshr i512 %r217, 511
+%r219 = trunc i512 %r218 to i1
+%r220 = load i32, i32* %r4
+%r221 = zext i32 %r220 to i64
+%r223 = getelementptr i32, i32* %r4, i32 1
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i64
+%r226 = shl i64 %r225, 32
+%r227 = or i64 %r221, %r226
+%r228 = zext i64 %r227 to i96
+%r230 = getelementptr i32, i32* %r4, i32 2
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i96
+%r233 = shl i96 %r232, 64
+%r234 = or i96 %r228, %r233
+%r235 = zext i96 %r234 to i128
+%r237 = getelementptr i32, i32* %r4, i32 3
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i128
+%r240 = shl i128 %r239, 96
+%r241 = or i128 %r235, %r240
+%r242 = zext i128 %r241 to i160
+%r244 = getelementptr i32, i32* %r4, i32 4
+%r245 = load i32, i32* %r244
+%r246 = zext i32 %r245 to i160
+%r247 = shl i160 %r246, 128
+%r248 = or i160 %r242, %r247
+%r249 = zext i160 %r248 to i192
+%r251 = getelementptr i32, i32* %r4, i32 5
+%r252 = load i32, i32* %r251
+%r253 = zext i32 %r252 to i192
+%r254 = shl i192 %r253, 160
+%r255 = or i192 %r249, %r254
+%r256 = zext i192 %r255 to i224
+%r258 = getelementptr i32, i32* %r4, i32 6
+%r259 = load i32, i32* %r258
+%r260 = zext i32 %r259 to i224
+%r261 = shl i224 %r260, 192
+%r262 = or i224 %r256, %r261
+%r263 = zext i224 %r262 to i256
+%r265 = getelementptr i32, i32* %r4, i32 7
+%r266 = load i32, i32* %r265
+%r267 = zext i32 %r266 to i256
+%r268 = shl i256 %r267, 224
+%r269 = or i256 %r263, %r268
+%r270 = zext i256 %r269 to i288
+%r272 = getelementptr i32, i32* %r4, i32 8
+%r273 = load i32, i32* %r272
+%r274 = zext i32 %r273 to i288
+%r275 = shl i288 %r274, 256
+%r276 = or i288 %r270, %r275
+%r277 = zext i288 %r276 to i320
+%r279 = getelementptr i32, i32* %r4, i32 9
+%r280 = load i32, i32* %r279
+%r281 = zext i32 %r280 to i320
+%r282 = shl i320 %r281, 288
+%r283 = or i320 %r277, %r282
+%r284 = zext i320 %r283 to i352
+%r286 = getelementptr i32, i32* %r4, i32 10
+%r287 = load i32, i32* %r286
+%r288 = zext i32 %r287 to i352
+%r289 = shl i352 %r288, 320
+%r290 = or i352 %r284, %r289
+%r291 = zext i352 %r290 to i384
+%r293 = getelementptr i32, i32* %r4, i32 11
+%r294 = load i32, i32* %r293
+%r295 = zext i32 %r294 to i384
+%r296 = shl i384 %r295, 352
+%r297 = or i384 %r291, %r296
+%r298 = zext i384 %r297 to i416
+%r300 = getelementptr i32, i32* %r4, i32 12
+%r301 = load i32, i32* %r300
+%r302 = zext i32 %r301 to i416
+%r303 = shl i416 %r302, 384
+%r304 = or i416 %r298, %r303
+%r305 = zext i416 %r304 to i448
+%r307 = getelementptr i32, i32* %r4, i32 13
+%r308 = load i32, i32* %r307
+%r309 = zext i32 %r308 to i448
+%r310 = shl i448 %r309, 416
+%r311 = or i448 %r305, %r310
+%r312 = zext i448 %r311 to i480
+%r314 = getelementptr i32, i32* %r4, i32 14
+%r315 = load i32, i32* %r314
+%r316 = zext i32 %r315 to i480
+%r317 = shl i480 %r316, 448
+%r318 = or i480 %r312, %r317
+%r319 = zext i480 %r318 to i512
+%r321 = getelementptr i32, i32* %r4, i32 15
+%r322 = load i32, i32* %r321
+%r323 = zext i32 %r322 to i512
+%r324 = shl i512 %r323, 480
+%r325 = or i512 %r319, %r324
+%r327 = select i1 %r219, i512 %r325, i512 0
+%r328 = add i512 %r217, %r327
+%r329 = trunc i512 %r328 to i32
+%r331 = getelementptr i32, i32* %r1, i32 0
+store i32 %r329, i32* %r331
+%r332 = lshr i512 %r328, 32
+%r333 = trunc i512 %r332 to i32
+%r335 = getelementptr i32, i32* %r1, i32 1
+store i32 %r333, i32* %r335
+%r336 = lshr i512 %r332, 32
+%r337 = trunc i512 %r336 to i32
+%r339 = getelementptr i32, i32* %r1, i32 2
+store i32 %r337, i32* %r339
+%r340 = lshr i512 %r336, 32
+%r341 = trunc i512 %r340 to i32
+%r343 = getelementptr i32, i32* %r1, i32 3
+store i32 %r341, i32* %r343
+%r344 = lshr i512 %r340, 32
+%r345 = trunc i512 %r344 to i32
+%r347 = getelementptr i32, i32* %r1, i32 4
+store i32 %r345, i32* %r347
+%r348 = lshr i512 %r344, 32
+%r349 = trunc i512 %r348 to i32
+%r351 = getelementptr i32, i32* %r1, i32 5
+store i32 %r349, i32* %r351
+%r352 = lshr i512 %r348, 32
+%r353 = trunc i512 %r352 to i32
+%r355 = getelementptr i32, i32* %r1, i32 6
+store i32 %r353, i32* %r355
+%r356 = lshr i512 %r352, 32
+%r357 = trunc i512 %r356 to i32
+%r359 = getelementptr i32, i32* %r1, i32 7
+store i32 %r357, i32* %r359
+%r360 = lshr i512 %r356, 32
+%r361 = trunc i512 %r360 to i32
+%r363 = getelementptr i32, i32* %r1, i32 8
+store i32 %r361, i32* %r363
+%r364 = lshr i512 %r360, 32
+%r365 = trunc i512 %r364 to i32
+%r367 = getelementptr i32, i32* %r1, i32 9
+store i32 %r365, i32* %r367
+%r368 = lshr i512 %r364, 32
+%r369 = trunc i512 %r368 to i32
+%r371 = getelementptr i32, i32* %r1, i32 10
+store i32 %r369, i32* %r371
+%r372 = lshr i512 %r368, 32
+%r373 = trunc i512 %r372 to i32
+%r375 = getelementptr i32, i32* %r1, i32 11
+store i32 %r373, i32* %r375
+%r376 = lshr i512 %r372, 32
+%r377 = trunc i512 %r376 to i32
+%r379 = getelementptr i32, i32* %r1, i32 12
+store i32 %r377, i32* %r379
+%r380 = lshr i512 %r376, 32
+%r381 = trunc i512 %r380 to i32
+%r383 = getelementptr i32, i32* %r1, i32 13
+store i32 %r381, i32* %r383
+%r384 = lshr i512 %r380, 32
+%r385 = trunc i512 %r384 to i32
+%r387 = getelementptr i32, i32* %r1, i32 14
+store i32 %r385, i32* %r387
+%r388 = lshr i512 %r384, 32
+%r389 = trunc i512 %r388 to i32
+%r391 = getelementptr i32, i32* %r1, i32 15
+store i32 %r389, i32* %r391
+ret void
+}
+define void @mcl_fpDbl_add16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = zext i768 %r166 to i800
+%r169 = getelementptr i32, i32* %r2, i32 24
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i800
+%r172 = shl i800 %r171, 768
+%r173 = or i800 %r167, %r172
+%r174 = zext i800 %r173 to i832
+%r176 = getelementptr i32, i32* %r2, i32 25
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i832
+%r179 = shl i832 %r178, 800
+%r180 = or i832 %r174, %r179
+%r181 = zext i832 %r180 to i864
+%r183 = getelementptr i32, i32* %r2, i32 26
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i864
+%r186 = shl i864 %r185, 832
+%r187 = or i864 %r181, %r186
+%r188 = zext i864 %r187 to i896
+%r190 = getelementptr i32, i32* %r2, i32 27
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i896
+%r193 = shl i896 %r192, 864
+%r194 = or i896 %r188, %r193
+%r195 = zext i896 %r194 to i928
+%r197 = getelementptr i32, i32* %r2, i32 28
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i928
+%r200 = shl i928 %r199, 896
+%r201 = or i928 %r195, %r200
+%r202 = zext i928 %r201 to i960
+%r204 = getelementptr i32, i32* %r2, i32 29
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i960
+%r207 = shl i960 %r206, 928
+%r208 = or i960 %r202, %r207
+%r209 = zext i960 %r208 to i992
+%r211 = getelementptr i32, i32* %r2, i32 30
+%r212 = load i32, i32* %r211
+%r213 = zext i32 %r212 to i992
+%r214 = shl i992 %r213, 960
+%r215 = or i992 %r209, %r214
+%r216 = zext i992 %r215 to i1024
+%r218 = getelementptr i32, i32* %r2, i32 31
+%r219 = load i32, i32* %r218
+%r220 = zext i32 %r219 to i1024
+%r221 = shl i1024 %r220, 992
+%r222 = or i1024 %r216, %r221
+%r223 = load i32, i32* %r3
+%r224 = zext i32 %r223 to i64
+%r226 = getelementptr i32, i32* %r3, i32 1
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i64
+%r229 = shl i64 %r228, 32
+%r230 = or i64 %r224, %r229
+%r231 = zext i64 %r230 to i96
+%r233 = getelementptr i32, i32* %r3, i32 2
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i96
+%r236 = shl i96 %r235, 64
+%r237 = or i96 %r231, %r236
+%r238 = zext i96 %r237 to i128
+%r240 = getelementptr i32, i32* %r3, i32 3
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i128
+%r243 = shl i128 %r242, 96
+%r244 = or i128 %r238, %r243
+%r245 = zext i128 %r244 to i160
+%r247 = getelementptr i32, i32* %r3, i32 4
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i160
+%r250 = shl i160 %r249, 128
+%r251 = or i160 %r245, %r250
+%r252 = zext i160 %r251 to i192
+%r254 = getelementptr i32, i32* %r3, i32 5
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i192
+%r257 = shl i192 %r256, 160
+%r258 = or i192 %r252, %r257
+%r259 = zext i192 %r258 to i224
+%r261 = getelementptr i32, i32* %r3, i32 6
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i224
+%r264 = shl i224 %r263, 192
+%r265 = or i224 %r259, %r264
+%r266 = zext i224 %r265 to i256
+%r268 = getelementptr i32, i32* %r3, i32 7
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i256
+%r271 = shl i256 %r270, 224
+%r272 = or i256 %r266, %r271
+%r273 = zext i256 %r272 to i288
+%r275 = getelementptr i32, i32* %r3, i32 8
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i288
+%r278 = shl i288 %r277, 256
+%r279 = or i288 %r273, %r278
+%r280 = zext i288 %r279 to i320
+%r282 = getelementptr i32, i32* %r3, i32 9
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i320
+%r285 = shl i320 %r284, 288
+%r286 = or i320 %r280, %r285
+%r287 = zext i320 %r286 to i352
+%r289 = getelementptr i32, i32* %r3, i32 10
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i352
+%r292 = shl i352 %r291, 320
+%r293 = or i352 %r287, %r292
+%r294 = zext i352 %r293 to i384
+%r296 = getelementptr i32, i32* %r3, i32 11
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i384
+%r299 = shl i384 %r298, 352
+%r300 = or i384 %r294, %r299
+%r301 = zext i384 %r300 to i416
+%r303 = getelementptr i32, i32* %r3, i32 12
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i416
+%r306 = shl i416 %r305, 384
+%r307 = or i416 %r301, %r306
+%r308 = zext i416 %r307 to i448
+%r310 = getelementptr i32, i32* %r3, i32 13
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i448
+%r313 = shl i448 %r312, 416
+%r314 = or i448 %r308, %r313
+%r315 = zext i448 %r314 to i480
+%r317 = getelementptr i32, i32* %r3, i32 14
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i480
+%r320 = shl i480 %r319, 448
+%r321 = or i480 %r315, %r320
+%r322 = zext i480 %r321 to i512
+%r324 = getelementptr i32, i32* %r3, i32 15
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i512
+%r327 = shl i512 %r326, 480
+%r328 = or i512 %r322, %r327
+%r329 = zext i512 %r328 to i544
+%r331 = getelementptr i32, i32* %r3, i32 16
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i544
+%r334 = shl i544 %r333, 512
+%r335 = or i544 %r329, %r334
+%r336 = zext i544 %r335 to i576
+%r338 = getelementptr i32, i32* %r3, i32 17
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i576
+%r341 = shl i576 %r340, 544
+%r342 = or i576 %r336, %r341
+%r343 = zext i576 %r342 to i608
+%r345 = getelementptr i32, i32* %r3, i32 18
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i608
+%r348 = shl i608 %r347, 576
+%r349 = or i608 %r343, %r348
+%r350 = zext i608 %r349 to i640
+%r352 = getelementptr i32, i32* %r3, i32 19
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i640
+%r355 = shl i640 %r354, 608
+%r356 = or i640 %r350, %r355
+%r357 = zext i640 %r356 to i672
+%r359 = getelementptr i32, i32* %r3, i32 20
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i672
+%r362 = shl i672 %r361, 640
+%r363 = or i672 %r357, %r362
+%r364 = zext i672 %r363 to i704
+%r366 = getelementptr i32, i32* %r3, i32 21
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i704
+%r369 = shl i704 %r368, 672
+%r370 = or i704 %r364, %r369
+%r371 = zext i704 %r370 to i736
+%r373 = getelementptr i32, i32* %r3, i32 22
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i736
+%r376 = shl i736 %r375, 704
+%r377 = or i736 %r371, %r376
+%r378 = zext i736 %r377 to i768
+%r380 = getelementptr i32, i32* %r3, i32 23
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i768
+%r383 = shl i768 %r382, 736
+%r384 = or i768 %r378, %r383
+%r385 = zext i768 %r384 to i800
+%r387 = getelementptr i32, i32* %r3, i32 24
+%r388 = load i32, i32* %r387
+%r389 = zext i32 %r388 to i800
+%r390 = shl i800 %r389, 768
+%r391 = or i800 %r385, %r390
+%r392 = zext i800 %r391 to i832
+%r394 = getelementptr i32, i32* %r3, i32 25
+%r395 = load i32, i32* %r394
+%r396 = zext i32 %r395 to i832
+%r397 = shl i832 %r396, 800
+%r398 = or i832 %r392, %r397
+%r399 = zext i832 %r398 to i864
+%r401 = getelementptr i32, i32* %r3, i32 26
+%r402 = load i32, i32* %r401
+%r403 = zext i32 %r402 to i864
+%r404 = shl i864 %r403, 832
+%r405 = or i864 %r399, %r404
+%r406 = zext i864 %r405 to i896
+%r408 = getelementptr i32, i32* %r3, i32 27
+%r409 = load i32, i32* %r408
+%r410 = zext i32 %r409 to i896
+%r411 = shl i896 %r410, 864
+%r412 = or i896 %r406, %r411
+%r413 = zext i896 %r412 to i928
+%r415 = getelementptr i32, i32* %r3, i32 28
+%r416 = load i32, i32* %r415
+%r417 = zext i32 %r416 to i928
+%r418 = shl i928 %r417, 896
+%r419 = or i928 %r413, %r418
+%r420 = zext i928 %r419 to i960
+%r422 = getelementptr i32, i32* %r3, i32 29
+%r423 = load i32, i32* %r422
+%r424 = zext i32 %r423 to i960
+%r425 = shl i960 %r424, 928
+%r426 = or i960 %r420, %r425
+%r427 = zext i960 %r426 to i992
+%r429 = getelementptr i32, i32* %r3, i32 30
+%r430 = load i32, i32* %r429
+%r431 = zext i32 %r430 to i992
+%r432 = shl i992 %r431, 960
+%r433 = or i992 %r427, %r432
+%r434 = zext i992 %r433 to i1024
+%r436 = getelementptr i32, i32* %r3, i32 31
+%r437 = load i32, i32* %r436
+%r438 = zext i32 %r437 to i1024
+%r439 = shl i1024 %r438, 992
+%r440 = or i1024 %r434, %r439
+%r441 = zext i1024 %r222 to i1056
+%r442 = zext i1024 %r440 to i1056
+%r443 = add i1056 %r441, %r442
+%r444 = trunc i1056 %r443 to i512
+%r445 = trunc i512 %r444 to i32
+%r447 = getelementptr i32, i32* %r1, i32 0
+store i32 %r445, i32* %r447
+%r448 = lshr i512 %r444, 32
+%r449 = trunc i512 %r448 to i32
+%r451 = getelementptr i32, i32* %r1, i32 1
+store i32 %r449, i32* %r451
+%r452 = lshr i512 %r448, 32
+%r453 = trunc i512 %r452 to i32
+%r455 = getelementptr i32, i32* %r1, i32 2
+store i32 %r453, i32* %r455
+%r456 = lshr i512 %r452, 32
+%r457 = trunc i512 %r456 to i32
+%r459 = getelementptr i32, i32* %r1, i32 3
+store i32 %r457, i32* %r459
+%r460 = lshr i512 %r456, 32
+%r461 = trunc i512 %r460 to i32
+%r463 = getelementptr i32, i32* %r1, i32 4
+store i32 %r461, i32* %r463
+%r464 = lshr i512 %r460, 32
+%r465 = trunc i512 %r464 to i32
+%r467 = getelementptr i32, i32* %r1, i32 5
+store i32 %r465, i32* %r467
+%r468 = lshr i512 %r464, 32
+%r469 = trunc i512 %r468 to i32
+%r471 = getelementptr i32, i32* %r1, i32 6
+store i32 %r469, i32* %r471
+%r472 = lshr i512 %r468, 32
+%r473 = trunc i512 %r472 to i32
+%r475 = getelementptr i32, i32* %r1, i32 7
+store i32 %r473, i32* %r475
+%r476 = lshr i512 %r472, 32
+%r477 = trunc i512 %r476 to i32
+%r479 = getelementptr i32, i32* %r1, i32 8
+store i32 %r477, i32* %r479
+%r480 = lshr i512 %r476, 32
+%r481 = trunc i512 %r480 to i32
+%r483 = getelementptr i32, i32* %r1, i32 9
+store i32 %r481, i32* %r483
+%r484 = lshr i512 %r480, 32
+%r485 = trunc i512 %r484 to i32
+%r487 = getelementptr i32, i32* %r1, i32 10
+store i32 %r485, i32* %r487
+%r488 = lshr i512 %r484, 32
+%r489 = trunc i512 %r488 to i32
+%r491 = getelementptr i32, i32* %r1, i32 11
+store i32 %r489, i32* %r491
+%r492 = lshr i512 %r488, 32
+%r493 = trunc i512 %r492 to i32
+%r495 = getelementptr i32, i32* %r1, i32 12
+store i32 %r493, i32* %r495
+%r496 = lshr i512 %r492, 32
+%r497 = trunc i512 %r496 to i32
+%r499 = getelementptr i32, i32* %r1, i32 13
+store i32 %r497, i32* %r499
+%r500 = lshr i512 %r496, 32
+%r501 = trunc i512 %r500 to i32
+%r503 = getelementptr i32, i32* %r1, i32 14
+store i32 %r501, i32* %r503
+%r504 = lshr i512 %r500, 32
+%r505 = trunc i512 %r504 to i32
+%r507 = getelementptr i32, i32* %r1, i32 15
+store i32 %r505, i32* %r507
+%r508 = lshr i1056 %r443, 512
+%r509 = trunc i1056 %r508 to i544
+%r510 = load i32, i32* %r4
+%r511 = zext i32 %r510 to i64
+%r513 = getelementptr i32, i32* %r4, i32 1
+%r514 = load i32, i32* %r513
+%r515 = zext i32 %r514 to i64
+%r516 = shl i64 %r515, 32
+%r517 = or i64 %r511, %r516
+%r518 = zext i64 %r517 to i96
+%r520 = getelementptr i32, i32* %r4, i32 2
+%r521 = load i32, i32* %r520
+%r522 = zext i32 %r521 to i96
+%r523 = shl i96 %r522, 64
+%r524 = or i96 %r518, %r523
+%r525 = zext i96 %r524 to i128
+%r527 = getelementptr i32, i32* %r4, i32 3
+%r528 = load i32, i32* %r527
+%r529 = zext i32 %r528 to i128
+%r530 = shl i128 %r529, 96
+%r531 = or i128 %r525, %r530
+%r532 = zext i128 %r531 to i160
+%r534 = getelementptr i32, i32* %r4, i32 4
+%r535 = load i32, i32* %r534
+%r536 = zext i32 %r535 to i160
+%r537 = shl i160 %r536, 128
+%r538 = or i160 %r532, %r537
+%r539 = zext i160 %r538 to i192
+%r541 = getelementptr i32, i32* %r4, i32 5
+%r542 = load i32, i32* %r541
+%r543 = zext i32 %r542 to i192
+%r544 = shl i192 %r543, 160
+%r545 = or i192 %r539, %r544
+%r546 = zext i192 %r545 to i224
+%r548 = getelementptr i32, i32* %r4, i32 6
+%r549 = load i32, i32* %r548
+%r550 = zext i32 %r549 to i224
+%r551 = shl i224 %r550, 192
+%r552 = or i224 %r546, %r551
+%r553 = zext i224 %r552 to i256
+%r555 = getelementptr i32, i32* %r4, i32 7
+%r556 = load i32, i32* %r555
+%r557 = zext i32 %r556 to i256
+%r558 = shl i256 %r557, 224
+%r559 = or i256 %r553, %r558
+%r560 = zext i256 %r559 to i288
+%r562 = getelementptr i32, i32* %r4, i32 8
+%r563 = load i32, i32* %r562
+%r564 = zext i32 %r563 to i288
+%r565 = shl i288 %r564, 256
+%r566 = or i288 %r560, %r565
+%r567 = zext i288 %r566 to i320
+%r569 = getelementptr i32, i32* %r4, i32 9
+%r570 = load i32, i32* %r569
+%r571 = zext i32 %r570 to i320
+%r572 = shl i320 %r571, 288
+%r573 = or i320 %r567, %r572
+%r574 = zext i320 %r573 to i352
+%r576 = getelementptr i32, i32* %r4, i32 10
+%r577 = load i32, i32* %r576
+%r578 = zext i32 %r577 to i352
+%r579 = shl i352 %r578, 320
+%r580 = or i352 %r574, %r579
+%r581 = zext i352 %r580 to i384
+%r583 = getelementptr i32, i32* %r4, i32 11
+%r584 = load i32, i32* %r583
+%r585 = zext i32 %r584 to i384
+%r586 = shl i384 %r585, 352
+%r587 = or i384 %r581, %r586
+%r588 = zext i384 %r587 to i416
+%r590 = getelementptr i32, i32* %r4, i32 12
+%r591 = load i32, i32* %r590
+%r592 = zext i32 %r591 to i416
+%r593 = shl i416 %r592, 384
+%r594 = or i416 %r588, %r593
+%r595 = zext i416 %r594 to i448
+%r597 = getelementptr i32, i32* %r4, i32 13
+%r598 = load i32, i32* %r597
+%r599 = zext i32 %r598 to i448
+%r600 = shl i448 %r599, 416
+%r601 = or i448 %r595, %r600
+%r602 = zext i448 %r601 to i480
+%r604 = getelementptr i32, i32* %r4, i32 14
+%r605 = load i32, i32* %r604
+%r606 = zext i32 %r605 to i480
+%r607 = shl i480 %r606, 448
+%r608 = or i480 %r602, %r607
+%r609 = zext i480 %r608 to i512
+%r611 = getelementptr i32, i32* %r4, i32 15
+%r612 = load i32, i32* %r611
+%r613 = zext i32 %r612 to i512
+%r614 = shl i512 %r613, 480
+%r615 = or i512 %r609, %r614
+%r616 = zext i512 %r615 to i544
+%r617 = sub i544 %r509, %r616
+%r618 = lshr i544 %r617, 512
+%r619 = trunc i544 %r618 to i1
+%r620 = select i1 %r619, i544 %r509, i544 %r617
+%r621 = trunc i544 %r620 to i512
+%r623 = getelementptr i32, i32* %r1, i32 16
+%r624 = trunc i512 %r621 to i32
+%r626 = getelementptr i32, i32* %r623, i32 0
+store i32 %r624, i32* %r626
+%r627 = lshr i512 %r621, 32
+%r628 = trunc i512 %r627 to i32
+%r630 = getelementptr i32, i32* %r623, i32 1
+store i32 %r628, i32* %r630
+%r631 = lshr i512 %r627, 32
+%r632 = trunc i512 %r631 to i32
+%r634 = getelementptr i32, i32* %r623, i32 2
+store i32 %r632, i32* %r634
+%r635 = lshr i512 %r631, 32
+%r636 = trunc i512 %r635 to i32
+%r638 = getelementptr i32, i32* %r623, i32 3
+store i32 %r636, i32* %r638
+%r639 = lshr i512 %r635, 32
+%r640 = trunc i512 %r639 to i32
+%r642 = getelementptr i32, i32* %r623, i32 4
+store i32 %r640, i32* %r642
+%r643 = lshr i512 %r639, 32
+%r644 = trunc i512 %r643 to i32
+%r646 = getelementptr i32, i32* %r623, i32 5
+store i32 %r644, i32* %r646
+%r647 = lshr i512 %r643, 32
+%r648 = trunc i512 %r647 to i32
+%r650 = getelementptr i32, i32* %r623, i32 6
+store i32 %r648, i32* %r650
+%r651 = lshr i512 %r647, 32
+%r652 = trunc i512 %r651 to i32
+%r654 = getelementptr i32, i32* %r623, i32 7
+store i32 %r652, i32* %r654
+%r655 = lshr i512 %r651, 32
+%r656 = trunc i512 %r655 to i32
+%r658 = getelementptr i32, i32* %r623, i32 8
+store i32 %r656, i32* %r658
+%r659 = lshr i512 %r655, 32
+%r660 = trunc i512 %r659 to i32
+%r662 = getelementptr i32, i32* %r623, i32 9
+store i32 %r660, i32* %r662
+%r663 = lshr i512 %r659, 32
+%r664 = trunc i512 %r663 to i32
+%r666 = getelementptr i32, i32* %r623, i32 10
+store i32 %r664, i32* %r666
+%r667 = lshr i512 %r663, 32
+%r668 = trunc i512 %r667 to i32
+%r670 = getelementptr i32, i32* %r623, i32 11
+store i32 %r668, i32* %r670
+%r671 = lshr i512 %r667, 32
+%r672 = trunc i512 %r671 to i32
+%r674 = getelementptr i32, i32* %r623, i32 12
+store i32 %r672, i32* %r674
+%r675 = lshr i512 %r671, 32
+%r676 = trunc i512 %r675 to i32
+%r678 = getelementptr i32, i32* %r623, i32 13
+store i32 %r676, i32* %r678
+%r679 = lshr i512 %r675, 32
+%r680 = trunc i512 %r679 to i32
+%r682 = getelementptr i32, i32* %r623, i32 14
+store i32 %r680, i32* %r682
+%r683 = lshr i512 %r679, 32
+%r684 = trunc i512 %r683 to i32
+%r686 = getelementptr i32, i32* %r623, i32 15
+store i32 %r684, i32* %r686
+ret void
+}
+define void @mcl_fpDbl_sub16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = zext i768 %r166 to i800
+%r169 = getelementptr i32, i32* %r2, i32 24
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i800
+%r172 = shl i800 %r171, 768
+%r173 = or i800 %r167, %r172
+%r174 = zext i800 %r173 to i832
+%r176 = getelementptr i32, i32* %r2, i32 25
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i832
+%r179 = shl i832 %r178, 800
+%r180 = or i832 %r174, %r179
+%r181 = zext i832 %r180 to i864
+%r183 = getelementptr i32, i32* %r2, i32 26
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i864
+%r186 = shl i864 %r185, 832
+%r187 = or i864 %r181, %r186
+%r188 = zext i864 %r187 to i896
+%r190 = getelementptr i32, i32* %r2, i32 27
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i896
+%r193 = shl i896 %r192, 864
+%r194 = or i896 %r188, %r193
+%r195 = zext i896 %r194 to i928
+%r197 = getelementptr i32, i32* %r2, i32 28
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i928
+%r200 = shl i928 %r199, 896
+%r201 = or i928 %r195, %r200
+%r202 = zext i928 %r201 to i960
+%r204 = getelementptr i32, i32* %r2, i32 29
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i960
+%r207 = shl i960 %r206, 928
+%r208 = or i960 %r202, %r207
+%r209 = zext i960 %r208 to i992
+%r211 = getelementptr i32, i32* %r2, i32 30
+%r212 = load i32, i32* %r211
+%r213 = zext i32 %r212 to i992
+%r214 = shl i992 %r213, 960
+%r215 = or i992 %r209, %r214
+%r216 = zext i992 %r215 to i1024
+%r218 = getelementptr i32, i32* %r2, i32 31
+%r219 = load i32, i32* %r218
+%r220 = zext i32 %r219 to i1024
+%r221 = shl i1024 %r220, 992
+%r222 = or i1024 %r216, %r221
+%r223 = load i32, i32* %r3
+%r224 = zext i32 %r223 to i64
+%r226 = getelementptr i32, i32* %r3, i32 1
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i64
+%r229 = shl i64 %r228, 32
+%r230 = or i64 %r224, %r229
+%r231 = zext i64 %r230 to i96
+%r233 = getelementptr i32, i32* %r3, i32 2
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i96
+%r236 = shl i96 %r235, 64
+%r237 = or i96 %r231, %r236
+%r238 = zext i96 %r237 to i128
+%r240 = getelementptr i32, i32* %r3, i32 3
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i128
+%r243 = shl i128 %r242, 96
+%r244 = or i128 %r238, %r243
+%r245 = zext i128 %r244 to i160
+%r247 = getelementptr i32, i32* %r3, i32 4
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i160
+%r250 = shl i160 %r249, 128
+%r251 = or i160 %r245, %r250
+%r252 = zext i160 %r251 to i192
+%r254 = getelementptr i32, i32* %r3, i32 5
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i192
+%r257 = shl i192 %r256, 160
+%r258 = or i192 %r252, %r257
+%r259 = zext i192 %r258 to i224
+%r261 = getelementptr i32, i32* %r3, i32 6
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i224
+%r264 = shl i224 %r263, 192
+%r265 = or i224 %r259, %r264
+%r266 = zext i224 %r265 to i256
+%r268 = getelementptr i32, i32* %r3, i32 7
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i256
+%r271 = shl i256 %r270, 224
+%r272 = or i256 %r266, %r271
+%r273 = zext i256 %r272 to i288
+%r275 = getelementptr i32, i32* %r3, i32 8
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i288
+%r278 = shl i288 %r277, 256
+%r279 = or i288 %r273, %r278
+%r280 = zext i288 %r279 to i320
+%r282 = getelementptr i32, i32* %r3, i32 9
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i320
+%r285 = shl i320 %r284, 288
+%r286 = or i320 %r280, %r285
+%r287 = zext i320 %r286 to i352
+%r289 = getelementptr i32, i32* %r3, i32 10
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i352
+%r292 = shl i352 %r291, 320
+%r293 = or i352 %r287, %r292
+%r294 = zext i352 %r293 to i384
+%r296 = getelementptr i32, i32* %r3, i32 11
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i384
+%r299 = shl i384 %r298, 352
+%r300 = or i384 %r294, %r299
+%r301 = zext i384 %r300 to i416
+%r303 = getelementptr i32, i32* %r3, i32 12
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i416
+%r306 = shl i416 %r305, 384
+%r307 = or i416 %r301, %r306
+%r308 = zext i416 %r307 to i448
+%r310 = getelementptr i32, i32* %r3, i32 13
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i448
+%r313 = shl i448 %r312, 416
+%r314 = or i448 %r308, %r313
+%r315 = zext i448 %r314 to i480
+%r317 = getelementptr i32, i32* %r3, i32 14
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i480
+%r320 = shl i480 %r319, 448
+%r321 = or i480 %r315, %r320
+%r322 = zext i480 %r321 to i512
+%r324 = getelementptr i32, i32* %r3, i32 15
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i512
+%r327 = shl i512 %r326, 480
+%r328 = or i512 %r322, %r327
+%r329 = zext i512 %r328 to i544
+%r331 = getelementptr i32, i32* %r3, i32 16
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i544
+%r334 = shl i544 %r333, 512
+%r335 = or i544 %r329, %r334
+%r336 = zext i544 %r335 to i576
+%r338 = getelementptr i32, i32* %r3, i32 17
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i576
+%r341 = shl i576 %r340, 544
+%r342 = or i576 %r336, %r341
+%r343 = zext i576 %r342 to i608
+%r345 = getelementptr i32, i32* %r3, i32 18
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i608
+%r348 = shl i608 %r347, 576
+%r349 = or i608 %r343, %r348
+%r350 = zext i608 %r349 to i640
+%r352 = getelementptr i32, i32* %r3, i32 19
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i640
+%r355 = shl i640 %r354, 608
+%r356 = or i640 %r350, %r355
+%r357 = zext i640 %r356 to i672
+%r359 = getelementptr i32, i32* %r3, i32 20
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i672
+%r362 = shl i672 %r361, 640
+%r363 = or i672 %r357, %r362
+%r364 = zext i672 %r363 to i704
+%r366 = getelementptr i32, i32* %r3, i32 21
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i704
+%r369 = shl i704 %r368, 672
+%r370 = or i704 %r364, %r369
+%r371 = zext i704 %r370 to i736
+%r373 = getelementptr i32, i32* %r3, i32 22
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i736
+%r376 = shl i736 %r375, 704
+%r377 = or i736 %r371, %r376
+%r378 = zext i736 %r377 to i768
+%r380 = getelementptr i32, i32* %r3, i32 23
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i768
+%r383 = shl i768 %r382, 736
+%r384 = or i768 %r378, %r383
+%r385 = zext i768 %r384 to i800
+%r387 = getelementptr i32, i32* %r3, i32 24
+%r388 = load i32, i32* %r387
+%r389 = zext i32 %r388 to i800
+%r390 = shl i800 %r389, 768
+%r391 = or i800 %r385, %r390
+%r392 = zext i800 %r391 to i832
+%r394 = getelementptr i32, i32* %r3, i32 25
+%r395 = load i32, i32* %r394
+%r396 = zext i32 %r395 to i832
+%r397 = shl i832 %r396, 800
+%r398 = or i832 %r392, %r397
+%r399 = zext i832 %r398 to i864
+%r401 = getelementptr i32, i32* %r3, i32 26
+%r402 = load i32, i32* %r401
+%r403 = zext i32 %r402 to i864
+%r404 = shl i864 %r403, 832
+%r405 = or i864 %r399, %r404
+%r406 = zext i864 %r405 to i896
+%r408 = getelementptr i32, i32* %r3, i32 27
+%r409 = load i32, i32* %r408
+%r410 = zext i32 %r409 to i896
+%r411 = shl i896 %r410, 864
+%r412 = or i896 %r406, %r411
+%r413 = zext i896 %r412 to i928
+%r415 = getelementptr i32, i32* %r3, i32 28
+%r416 = load i32, i32* %r415
+%r417 = zext i32 %r416 to i928
+%r418 = shl i928 %r417, 896
+%r419 = or i928 %r413, %r418
+%r420 = zext i928 %r419 to i960
+%r422 = getelementptr i32, i32* %r3, i32 29
+%r423 = load i32, i32* %r422
+%r424 = zext i32 %r423 to i960
+%r425 = shl i960 %r424, 928
+%r426 = or i960 %r420, %r425
+%r427 = zext i960 %r426 to i992
+%r429 = getelementptr i32, i32* %r3, i32 30
+%r430 = load i32, i32* %r429
+%r431 = zext i32 %r430 to i992
+%r432 = shl i992 %r431, 960
+%r433 = or i992 %r427, %r432
+%r434 = zext i992 %r433 to i1024
+%r436 = getelementptr i32, i32* %r3, i32 31
+%r437 = load i32, i32* %r436
+%r438 = zext i32 %r437 to i1024
+%r439 = shl i1024 %r438, 992
+%r440 = or i1024 %r434, %r439
+%r441 = zext i1024 %r222 to i1056
+%r442 = zext i1024 %r440 to i1056
+%r443 = sub i1056 %r441, %r442
+%r444 = trunc i1056 %r443 to i512
+%r445 = trunc i512 %r444 to i32
+%r447 = getelementptr i32, i32* %r1, i32 0
+store i32 %r445, i32* %r447
+%r448 = lshr i512 %r444, 32
+%r449 = trunc i512 %r448 to i32
+%r451 = getelementptr i32, i32* %r1, i32 1
+store i32 %r449, i32* %r451
+%r452 = lshr i512 %r448, 32
+%r453 = trunc i512 %r452 to i32
+%r455 = getelementptr i32, i32* %r1, i32 2
+store i32 %r453, i32* %r455
+%r456 = lshr i512 %r452, 32
+%r457 = trunc i512 %r456 to i32
+%r459 = getelementptr i32, i32* %r1, i32 3
+store i32 %r457, i32* %r459
+%r460 = lshr i512 %r456, 32
+%r461 = trunc i512 %r460 to i32
+%r463 = getelementptr i32, i32* %r1, i32 4
+store i32 %r461, i32* %r463
+%r464 = lshr i512 %r460, 32
+%r465 = trunc i512 %r464 to i32
+%r467 = getelementptr i32, i32* %r1, i32 5
+store i32 %r465, i32* %r467
+%r468 = lshr i512 %r464, 32
+%r469 = trunc i512 %r468 to i32
+%r471 = getelementptr i32, i32* %r1, i32 6
+store i32 %r469, i32* %r471
+%r472 = lshr i512 %r468, 32
+%r473 = trunc i512 %r472 to i32
+%r475 = getelementptr i32, i32* %r1, i32 7
+store i32 %r473, i32* %r475
+%r476 = lshr i512 %r472, 32
+%r477 = trunc i512 %r476 to i32
+%r479 = getelementptr i32, i32* %r1, i32 8
+store i32 %r477, i32* %r479
+%r480 = lshr i512 %r476, 32
+%r481 = trunc i512 %r480 to i32
+%r483 = getelementptr i32, i32* %r1, i32 9
+store i32 %r481, i32* %r483
+%r484 = lshr i512 %r480, 32
+%r485 = trunc i512 %r484 to i32
+%r487 = getelementptr i32, i32* %r1, i32 10
+store i32 %r485, i32* %r487
+%r488 = lshr i512 %r484, 32
+%r489 = trunc i512 %r488 to i32
+%r491 = getelementptr i32, i32* %r1, i32 11
+store i32 %r489, i32* %r491
+%r492 = lshr i512 %r488, 32
+%r493 = trunc i512 %r492 to i32
+%r495 = getelementptr i32, i32* %r1, i32 12
+store i32 %r493, i32* %r495
+%r496 = lshr i512 %r492, 32
+%r497 = trunc i512 %r496 to i32
+%r499 = getelementptr i32, i32* %r1, i32 13
+store i32 %r497, i32* %r499
+%r500 = lshr i512 %r496, 32
+%r501 = trunc i512 %r500 to i32
+%r503 = getelementptr i32, i32* %r1, i32 14
+store i32 %r501, i32* %r503
+%r504 = lshr i512 %r500, 32
+%r505 = trunc i512 %r504 to i32
+%r507 = getelementptr i32, i32* %r1, i32 15
+store i32 %r505, i32* %r507
+%r508 = lshr i1056 %r443, 512
+%r509 = trunc i1056 %r508 to i512
+%r510 = lshr i1056 %r443, 1024
+%r511 = trunc i1056 %r510 to i1
+%r512 = load i32, i32* %r4
+%r513 = zext i32 %r512 to i64
+%r515 = getelementptr i32, i32* %r4, i32 1
+%r516 = load i32, i32* %r515
+%r517 = zext i32 %r516 to i64
+%r518 = shl i64 %r517, 32
+%r519 = or i64 %r513, %r518
+%r520 = zext i64 %r519 to i96
+%r522 = getelementptr i32, i32* %r4, i32 2
+%r523 = load i32, i32* %r522
+%r524 = zext i32 %r523 to i96
+%r525 = shl i96 %r524, 64
+%r526 = or i96 %r520, %r525
+%r527 = zext i96 %r526 to i128
+%r529 = getelementptr i32, i32* %r4, i32 3
+%r530 = load i32, i32* %r529
+%r531 = zext i32 %r530 to i128
+%r532 = shl i128 %r531, 96
+%r533 = or i128 %r527, %r532
+%r534 = zext i128 %r533 to i160
+%r536 = getelementptr i32, i32* %r4, i32 4
+%r537 = load i32, i32* %r536
+%r538 = zext i32 %r537 to i160
+%r539 = shl i160 %r538, 128
+%r540 = or i160 %r534, %r539
+%r541 = zext i160 %r540 to i192
+%r543 = getelementptr i32, i32* %r4, i32 5
+%r544 = load i32, i32* %r543
+%r545 = zext i32 %r544 to i192
+%r546 = shl i192 %r545, 160
+%r547 = or i192 %r541, %r546
+%r548 = zext i192 %r547 to i224
+%r550 = getelementptr i32, i32* %r4, i32 6
+%r551 = load i32, i32* %r550
+%r552 = zext i32 %r551 to i224
+%r553 = shl i224 %r552, 192
+%r554 = or i224 %r548, %r553
+%r555 = zext i224 %r554 to i256
+%r557 = getelementptr i32, i32* %r4, i32 7
+%r558 = load i32, i32* %r557
+%r559 = zext i32 %r558 to i256
+%r560 = shl i256 %r559, 224
+%r561 = or i256 %r555, %r560
+%r562 = zext i256 %r561 to i288
+%r564 = getelementptr i32, i32* %r4, i32 8
+%r565 = load i32, i32* %r564
+%r566 = zext i32 %r565 to i288
+%r567 = shl i288 %r566, 256
+%r568 = or i288 %r562, %r567
+%r569 = zext i288 %r568 to i320
+%r571 = getelementptr i32, i32* %r4, i32 9
+%r572 = load i32, i32* %r571
+%r573 = zext i32 %r572 to i320
+%r574 = shl i320 %r573, 288
+%r575 = or i320 %r569, %r574
+%r576 = zext i320 %r575 to i352
+%r578 = getelementptr i32, i32* %r4, i32 10
+%r579 = load i32, i32* %r578
+%r580 = zext i32 %r579 to i352
+%r581 = shl i352 %r580, 320
+%r582 = or i352 %r576, %r581
+%r583 = zext i352 %r582 to i384
+%r585 = getelementptr i32, i32* %r4, i32 11
+%r586 = load i32, i32* %r585
+%r587 = zext i32 %r586 to i384
+%r588 = shl i384 %r587, 352
+%r589 = or i384 %r583, %r588
+%r590 = zext i384 %r589 to i416
+%r592 = getelementptr i32, i32* %r4, i32 12
+%r593 = load i32, i32* %r592
+%r594 = zext i32 %r593 to i416
+%r595 = shl i416 %r594, 384
+%r596 = or i416 %r590, %r595
+%r597 = zext i416 %r596 to i448
+%r599 = getelementptr i32, i32* %r4, i32 13
+%r600 = load i32, i32* %r599
+%r601 = zext i32 %r600 to i448
+%r602 = shl i448 %r601, 416
+%r603 = or i448 %r597, %r602
+%r604 = zext i448 %r603 to i480
+%r606 = getelementptr i32, i32* %r4, i32 14
+%r607 = load i32, i32* %r606
+%r608 = zext i32 %r607 to i480
+%r609 = shl i480 %r608, 448
+%r610 = or i480 %r604, %r609
+%r611 = zext i480 %r610 to i512
+%r613 = getelementptr i32, i32* %r4, i32 15
+%r614 = load i32, i32* %r613
+%r615 = zext i32 %r614 to i512
+%r616 = shl i512 %r615, 480
+%r617 = or i512 %r611, %r616
+%r619 = select i1 %r511, i512 %r617, i512 0
+%r620 = add i512 %r509, %r619
+%r622 = getelementptr i32, i32* %r1, i32 16
+%r623 = trunc i512 %r620 to i32
+%r625 = getelementptr i32, i32* %r622, i32 0
+store i32 %r623, i32* %r625
+%r626 = lshr i512 %r620, 32
+%r627 = trunc i512 %r626 to i32
+%r629 = getelementptr i32, i32* %r622, i32 1
+store i32 %r627, i32* %r629
+%r630 = lshr i512 %r626, 32
+%r631 = trunc i512 %r630 to i32
+%r633 = getelementptr i32, i32* %r622, i32 2
+store i32 %r631, i32* %r633
+%r634 = lshr i512 %r630, 32
+%r635 = trunc i512 %r634 to i32
+%r637 = getelementptr i32, i32* %r622, i32 3
+store i32 %r635, i32* %r637
+%r638 = lshr i512 %r634, 32
+%r639 = trunc i512 %r638 to i32
+%r641 = getelementptr i32, i32* %r622, i32 4
+store i32 %r639, i32* %r641
+%r642 = lshr i512 %r638, 32
+%r643 = trunc i512 %r642 to i32
+%r645 = getelementptr i32, i32* %r622, i32 5
+store i32 %r643, i32* %r645
+%r646 = lshr i512 %r642, 32
+%r647 = trunc i512 %r646 to i32
+%r649 = getelementptr i32, i32* %r622, i32 6
+store i32 %r647, i32* %r649
+%r650 = lshr i512 %r646, 32
+%r651 = trunc i512 %r650 to i32
+%r653 = getelementptr i32, i32* %r622, i32 7
+store i32 %r651, i32* %r653
+%r654 = lshr i512 %r650, 32
+%r655 = trunc i512 %r654 to i32
+%r657 = getelementptr i32, i32* %r622, i32 8
+store i32 %r655, i32* %r657
+%r658 = lshr i512 %r654, 32
+%r659 = trunc i512 %r658 to i32
+%r661 = getelementptr i32, i32* %r622, i32 9
+store i32 %r659, i32* %r661
+%r662 = lshr i512 %r658, 32
+%r663 = trunc i512 %r662 to i32
+%r665 = getelementptr i32, i32* %r622, i32 10
+store i32 %r663, i32* %r665
+%r666 = lshr i512 %r662, 32
+%r667 = trunc i512 %r666 to i32
+%r669 = getelementptr i32, i32* %r622, i32 11
+store i32 %r667, i32* %r669
+%r670 = lshr i512 %r666, 32
+%r671 = trunc i512 %r670 to i32
+%r673 = getelementptr i32, i32* %r622, i32 12
+store i32 %r671, i32* %r673
+%r674 = lshr i512 %r670, 32
+%r675 = trunc i512 %r674 to i32
+%r677 = getelementptr i32, i32* %r622, i32 13
+store i32 %r675, i32* %r677
+%r678 = lshr i512 %r674, 32
+%r679 = trunc i512 %r678 to i32
+%r681 = getelementptr i32, i32* %r622, i32 14
+store i32 %r679, i32* %r681
+%r682 = lshr i512 %r678, 32
+%r683 = trunc i512 %r682 to i32
+%r685 = getelementptr i32, i32* %r622, i32 15
+store i32 %r683, i32* %r685
+ret void
+}
+define i576 @mulPv544x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
+%r42 = trunc i64 %r41 to i32
+%r43 = call i32 @extractHigh32(i64 %r41)
+%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
+%r46 = trunc i64 %r45 to i32
+%r47 = call i32 @extractHigh32(i64 %r45)
+%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
+%r50 = trunc i64 %r49 to i32
+%r51 = call i32 @extractHigh32(i64 %r49)
+%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
+%r54 = trunc i64 %r53 to i32
+%r55 = call i32 @extractHigh32(i64 %r53)
+%r57 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 13)
+%r58 = trunc i64 %r57 to i32
+%r59 = call i32 @extractHigh32(i64 %r57)
+%r61 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 14)
+%r62 = trunc i64 %r61 to i32
+%r63 = call i32 @extractHigh32(i64 %r61)
+%r65 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 15)
+%r66 = trunc i64 %r65 to i32
+%r67 = call i32 @extractHigh32(i64 %r65)
+%r69 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 16)
+%r70 = trunc i64 %r69 to i32
+%r71 = call i32 @extractHigh32(i64 %r69)
+%r72 = zext i32 %r6 to i64
+%r73 = zext i32 %r10 to i64
+%r74 = shl i64 %r73, 32
+%r75 = or i64 %r72, %r74
+%r76 = zext i64 %r75 to i96
+%r77 = zext i32 %r14 to i96
+%r78 = shl i96 %r77, 64
+%r79 = or i96 %r76, %r78
+%r80 = zext i96 %r79 to i128
+%r81 = zext i32 %r18 to i128
+%r82 = shl i128 %r81, 96
+%r83 = or i128 %r80, %r82
+%r84 = zext i128 %r83 to i160
+%r85 = zext i32 %r22 to i160
+%r86 = shl i160 %r85, 128
+%r87 = or i160 %r84, %r86
+%r88 = zext i160 %r87 to i192
+%r89 = zext i32 %r26 to i192
+%r90 = shl i192 %r89, 160
+%r91 = or i192 %r88, %r90
+%r92 = zext i192 %r91 to i224
+%r93 = zext i32 %r30 to i224
+%r94 = shl i224 %r93, 192
+%r95 = or i224 %r92, %r94
+%r96 = zext i224 %r95 to i256
+%r97 = zext i32 %r34 to i256
+%r98 = shl i256 %r97, 224
+%r99 = or i256 %r96, %r98
+%r100 = zext i256 %r99 to i288
+%r101 = zext i32 %r38 to i288
+%r102 = shl i288 %r101, 256
+%r103 = or i288 %r100, %r102
+%r104 = zext i288 %r103 to i320
+%r105 = zext i32 %r42 to i320
+%r106 = shl i320 %r105, 288
+%r107 = or i320 %r104, %r106
+%r108 = zext i320 %r107 to i352
+%r109 = zext i32 %r46 to i352
+%r110 = shl i352 %r109, 320
+%r111 = or i352 %r108, %r110
+%r112 = zext i352 %r111 to i384
+%r113 = zext i32 %r50 to i384
+%r114 = shl i384 %r113, 352
+%r115 = or i384 %r112, %r114
+%r116 = zext i384 %r115 to i416
+%r117 = zext i32 %r54 to i416
+%r118 = shl i416 %r117, 384
+%r119 = or i416 %r116, %r118
+%r120 = zext i416 %r119 to i448
+%r121 = zext i32 %r58 to i448
+%r122 = shl i448 %r121, 416
+%r123 = or i448 %r120, %r122
+%r124 = zext i448 %r123 to i480
+%r125 = zext i32 %r62 to i480
+%r126 = shl i480 %r125, 448
+%r127 = or i480 %r124, %r126
+%r128 = zext i480 %r127 to i512
+%r129 = zext i32 %r66 to i512
+%r130 = shl i512 %r129, 480
+%r131 = or i512 %r128, %r130
+%r132 = zext i512 %r131 to i544
+%r133 = zext i32 %r70 to i544
+%r134 = shl i544 %r133, 512
+%r135 = or i544 %r132, %r134
+%r136 = zext i32 %r7 to i64
+%r137 = zext i32 %r11 to i64
+%r138 = shl i64 %r137, 32
+%r139 = or i64 %r136, %r138
+%r140 = zext i64 %r139 to i96
+%r141 = zext i32 %r15 to i96
+%r142 = shl i96 %r141, 64
+%r143 = or i96 %r140, %r142
+%r144 = zext i96 %r143 to i128
+%r145 = zext i32 %r19 to i128
+%r146 = shl i128 %r145, 96
+%r147 = or i128 %r144, %r146
+%r148 = zext i128 %r147 to i160
+%r149 = zext i32 %r23 to i160
+%r150 = shl i160 %r149, 128
+%r151 = or i160 %r148, %r150
+%r152 = zext i160 %r151 to i192
+%r153 = zext i32 %r27 to i192
+%r154 = shl i192 %r153, 160
+%r155 = or i192 %r152, %r154
+%r156 = zext i192 %r155 to i224
+%r157 = zext i32 %r31 to i224
+%r158 = shl i224 %r157, 192
+%r159 = or i224 %r156, %r158
+%r160 = zext i224 %r159 to i256
+%r161 = zext i32 %r35 to i256
+%r162 = shl i256 %r161, 224
+%r163 = or i256 %r160, %r162
+%r164 = zext i256 %r163 to i288
+%r165 = zext i32 %r39 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r164, %r166
+%r168 = zext i288 %r167 to i320
+%r169 = zext i32 %r43 to i320
+%r170 = shl i320 %r169, 288
+%r171 = or i320 %r168, %r170
+%r172 = zext i320 %r171 to i352
+%r173 = zext i32 %r47 to i352
+%r174 = shl i352 %r173, 320
+%r175 = or i352 %r172, %r174
+%r176 = zext i352 %r175 to i384
+%r177 = zext i32 %r51 to i384
+%r178 = shl i384 %r177, 352
+%r179 = or i384 %r176, %r178
+%r180 = zext i384 %r179 to i416
+%r181 = zext i32 %r55 to i416
+%r182 = shl i416 %r181, 384
+%r183 = or i416 %r180, %r182
+%r184 = zext i416 %r183 to i448
+%r185 = zext i32 %r59 to i448
+%r186 = shl i448 %r185, 416
+%r187 = or i448 %r184, %r186
+%r188 = zext i448 %r187 to i480
+%r189 = zext i32 %r63 to i480
+%r190 = shl i480 %r189, 448
+%r191 = or i480 %r188, %r190
+%r192 = zext i480 %r191 to i512
+%r193 = zext i32 %r67 to i512
+%r194 = shl i512 %r193, 480
+%r195 = or i512 %r192, %r194
+%r196 = zext i512 %r195 to i544
+%r197 = zext i32 %r71 to i544
+%r198 = shl i544 %r197, 512
+%r199 = or i544 %r196, %r198
+%r200 = zext i544 %r135 to i576
+%r201 = zext i544 %r199 to i576
+%r202 = shl i576 %r201, 32
+%r203 = add i576 %r200, %r202
+ret i576 %r203
+}
+define void @mcl_fp_mulUnitPre17L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i576 @mulPv544x32(i32* %r2, i32 %r3)
+%r5 = trunc i576 %r4 to i32
+%r7 = getelementptr i32, i32* %r1, i32 0
+store i32 %r5, i32* %r7
+%r8 = lshr i576 %r4, 32
+%r9 = trunc i576 %r8 to i32
+%r11 = getelementptr i32, i32* %r1, i32 1
+store i32 %r9, i32* %r11
+%r12 = lshr i576 %r8, 32
+%r13 = trunc i576 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 2
+store i32 %r13, i32* %r15
+%r16 = lshr i576 %r12, 32
+%r17 = trunc i576 %r16 to i32
+%r19 = getelementptr i32, i32* %r1, i32 3
+store i32 %r17, i32* %r19
+%r20 = lshr i576 %r16, 32
+%r21 = trunc i576 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 4
+store i32 %r21, i32* %r23
+%r24 = lshr i576 %r20, 32
+%r25 = trunc i576 %r24 to i32
+%r27 = getelementptr i32, i32* %r1, i32 5
+store i32 %r25, i32* %r27
+%r28 = lshr i576 %r24, 32
+%r29 = trunc i576 %r28 to i32
+%r31 = getelementptr i32, i32* %r1, i32 6
+store i32 %r29, i32* %r31
+%r32 = lshr i576 %r28, 32
+%r33 = trunc i576 %r32 to i32
+%r35 = getelementptr i32, i32* %r1, i32 7
+store i32 %r33, i32* %r35
+%r36 = lshr i576 %r32, 32
+%r37 = trunc i576 %r36 to i32
+%r39 = getelementptr i32, i32* %r1, i32 8
+store i32 %r37, i32* %r39
+%r40 = lshr i576 %r36, 32
+%r41 = trunc i576 %r40 to i32
+%r43 = getelementptr i32, i32* %r1, i32 9
+store i32 %r41, i32* %r43
+%r44 = lshr i576 %r40, 32
+%r45 = trunc i576 %r44 to i32
+%r47 = getelementptr i32, i32* %r1, i32 10
+store i32 %r45, i32* %r47
+%r48 = lshr i576 %r44, 32
+%r49 = trunc i576 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 11
+store i32 %r49, i32* %r51
+%r52 = lshr i576 %r48, 32
+%r53 = trunc i576 %r52 to i32
+%r55 = getelementptr i32, i32* %r1, i32 12
+store i32 %r53, i32* %r55
+%r56 = lshr i576 %r52, 32
+%r57 = trunc i576 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 13
+store i32 %r57, i32* %r59
+%r60 = lshr i576 %r56, 32
+%r61 = trunc i576 %r60 to i32
+%r63 = getelementptr i32, i32* %r1, i32 14
+store i32 %r61, i32* %r63
+%r64 = lshr i576 %r60, 32
+%r65 = trunc i576 %r64 to i32
+%r67 = getelementptr i32, i32* %r1, i32 15
+store i32 %r65, i32* %r67
+%r68 = lshr i576 %r64, 32
+%r69 = trunc i576 %r68 to i32
+%r71 = getelementptr i32, i32* %r1, i32 16
+store i32 %r69, i32* %r71
+%r72 = lshr i576 %r68, 32
+%r73 = trunc i576 %r72 to i32
+%r75 = getelementptr i32, i32* %r1, i32 17
+store i32 %r73, i32* %r75
+ret void
+}
+define void @mcl_fpDbl_mulPre17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i576 @mulPv544x32(i32* %r2, i32 %r4)
+%r6 = trunc i576 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i576 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i576 @mulPv544x32(i32* %r2, i32 %r10)
+%r12 = add i576 %r7, %r11
+%r13 = trunc i576 %r12 to i32
+%r15 = getelementptr i32, i32* %r1, i32 1
+store i32 %r13, i32* %r15
+%r16 = lshr i576 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i576 @mulPv544x32(i32* %r2, i32 %r19)
+%r21 = add i576 %r16, %r20
+%r22 = trunc i576 %r21 to i32
+%r24 = getelementptr i32, i32* %r1, i32 2
+store i32 %r22, i32* %r24
+%r25 = lshr i576 %r21, 32
+%r27 = getelementptr i32, i32* %r3, i32 3
+%r28 = load i32, i32* %r27
+%r29 = call i576 @mulPv544x32(i32* %r2, i32 %r28)
+%r30 = add i576 %r25, %r29
+%r31 = trunc i576 %r30 to i32
+%r33 = getelementptr i32, i32* %r1, i32 3
+store i32 %r31, i32* %r33
+%r34 = lshr i576 %r30, 32
+%r36 = getelementptr i32, i32* %r3, i32 4
+%r37 = load i32, i32* %r36
+%r38 = call i576 @mulPv544x32(i32* %r2, i32 %r37)
+%r39 = add i576 %r34, %r38
+%r40 = trunc i576 %r39 to i32
+%r42 = getelementptr i32, i32* %r1, i32 4
+store i32 %r40, i32* %r42
+%r43 = lshr i576 %r39, 32
+%r45 = getelementptr i32, i32* %r3, i32 5
+%r46 = load i32, i32* %r45
+%r47 = call i576 @mulPv544x32(i32* %r2, i32 %r46)
+%r48 = add i576 %r43, %r47
+%r49 = trunc i576 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 5
+store i32 %r49, i32* %r51
+%r52 = lshr i576 %r48, 32
+%r54 = getelementptr i32, i32* %r3, i32 6
+%r55 = load i32, i32* %r54
+%r56 = call i576 @mulPv544x32(i32* %r2, i32 %r55)
+%r57 = add i576 %r52, %r56
+%r58 = trunc i576 %r57 to i32
+%r60 = getelementptr i32, i32* %r1, i32 6
+store i32 %r58, i32* %r60
+%r61 = lshr i576 %r57, 32
+%r63 = getelementptr i32, i32* %r3, i32 7
+%r64 = load i32, i32* %r63
+%r65 = call i576 @mulPv544x32(i32* %r2, i32 %r64)
+%r66 = add i576 %r61, %r65
+%r67 = trunc i576 %r66 to i32
+%r69 = getelementptr i32, i32* %r1, i32 7
+store i32 %r67, i32* %r69
+%r70 = lshr i576 %r66, 32
+%r72 = getelementptr i32, i32* %r3, i32 8
+%r73 = load i32, i32* %r72
+%r74 = call i576 @mulPv544x32(i32* %r2, i32 %r73)
+%r75 = add i576 %r70, %r74
+%r76 = trunc i576 %r75 to i32
+%r78 = getelementptr i32, i32* %r1, i32 8
+store i32 %r76, i32* %r78
+%r79 = lshr i576 %r75, 32
+%r81 = getelementptr i32, i32* %r3, i32 9
+%r82 = load i32, i32* %r81
+%r83 = call i576 @mulPv544x32(i32* %r2, i32 %r82)
+%r84 = add i576 %r79, %r83
+%r85 = trunc i576 %r84 to i32
+%r87 = getelementptr i32, i32* %r1, i32 9
+store i32 %r85, i32* %r87
+%r88 = lshr i576 %r84, 32
+%r90 = getelementptr i32, i32* %r3, i32 10
+%r91 = load i32, i32* %r90
+%r92 = call i576 @mulPv544x32(i32* %r2, i32 %r91)
+%r93 = add i576 %r88, %r92
+%r94 = trunc i576 %r93 to i32
+%r96 = getelementptr i32, i32* %r1, i32 10
+store i32 %r94, i32* %r96
+%r97 = lshr i576 %r93, 32
+%r99 = getelementptr i32, i32* %r3, i32 11
+%r100 = load i32, i32* %r99
+%r101 = call i576 @mulPv544x32(i32* %r2, i32 %r100)
+%r102 = add i576 %r97, %r101
+%r103 = trunc i576 %r102 to i32
+%r105 = getelementptr i32, i32* %r1, i32 11
+store i32 %r103, i32* %r105
+%r106 = lshr i576 %r102, 32
+%r108 = getelementptr i32, i32* %r3, i32 12
+%r109 = load i32, i32* %r108
+%r110 = call i576 @mulPv544x32(i32* %r2, i32 %r109)
+%r111 = add i576 %r106, %r110
+%r112 = trunc i576 %r111 to i32
+%r114 = getelementptr i32, i32* %r1, i32 12
+store i32 %r112, i32* %r114
+%r115 = lshr i576 %r111, 32
+%r117 = getelementptr i32, i32* %r3, i32 13
+%r118 = load i32, i32* %r117
+%r119 = call i576 @mulPv544x32(i32* %r2, i32 %r118)
+%r120 = add i576 %r115, %r119
+%r121 = trunc i576 %r120 to i32
+%r123 = getelementptr i32, i32* %r1, i32 13
+store i32 %r121, i32* %r123
+%r124 = lshr i576 %r120, 32
+%r126 = getelementptr i32, i32* %r3, i32 14
+%r127 = load i32, i32* %r126
+%r128 = call i576 @mulPv544x32(i32* %r2, i32 %r127)
+%r129 = add i576 %r124, %r128
+%r130 = trunc i576 %r129 to i32
+%r132 = getelementptr i32, i32* %r1, i32 14
+store i32 %r130, i32* %r132
+%r133 = lshr i576 %r129, 32
+%r135 = getelementptr i32, i32* %r3, i32 15
+%r136 = load i32, i32* %r135
+%r137 = call i576 @mulPv544x32(i32* %r2, i32 %r136)
+%r138 = add i576 %r133, %r137
+%r139 = trunc i576 %r138 to i32
+%r141 = getelementptr i32, i32* %r1, i32 15
+store i32 %r139, i32* %r141
+%r142 = lshr i576 %r138, 32
+%r144 = getelementptr i32, i32* %r3, i32 16
+%r145 = load i32, i32* %r144
+%r146 = call i576 @mulPv544x32(i32* %r2, i32 %r145)
+%r147 = add i576 %r142, %r146
+%r149 = getelementptr i32, i32* %r1, i32 16
+%r150 = trunc i576 %r147 to i32
+%r152 = getelementptr i32, i32* %r149, i32 0
+store i32 %r150, i32* %r152
+%r153 = lshr i576 %r147, 32
+%r154 = trunc i576 %r153 to i32
+%r156 = getelementptr i32, i32* %r149, i32 1
+store i32 %r154, i32* %r156
+%r157 = lshr i576 %r153, 32
+%r158 = trunc i576 %r157 to i32
+%r160 = getelementptr i32, i32* %r149, i32 2
+store i32 %r158, i32* %r160
+%r161 = lshr i576 %r157, 32
+%r162 = trunc i576 %r161 to i32
+%r164 = getelementptr i32, i32* %r149, i32 3
+store i32 %r162, i32* %r164
+%r165 = lshr i576 %r161, 32
+%r166 = trunc i576 %r165 to i32
+%r168 = getelementptr i32, i32* %r149, i32 4
+store i32 %r166, i32* %r168
+%r169 = lshr i576 %r165, 32
+%r170 = trunc i576 %r169 to i32
+%r172 = getelementptr i32, i32* %r149, i32 5
+store i32 %r170, i32* %r172
+%r173 = lshr i576 %r169, 32
+%r174 = trunc i576 %r173 to i32
+%r176 = getelementptr i32, i32* %r149, i32 6
+store i32 %r174, i32* %r176
+%r177 = lshr i576 %r173, 32
+%r178 = trunc i576 %r177 to i32
+%r180 = getelementptr i32, i32* %r149, i32 7
+store i32 %r178, i32* %r180
+%r181 = lshr i576 %r177, 32
+%r182 = trunc i576 %r181 to i32
+%r184 = getelementptr i32, i32* %r149, i32 8
+store i32 %r182, i32* %r184
+%r185 = lshr i576 %r181, 32
+%r186 = trunc i576 %r185 to i32
+%r188 = getelementptr i32, i32* %r149, i32 9
+store i32 %r186, i32* %r188
+%r189 = lshr i576 %r185, 32
+%r190 = trunc i576 %r189 to i32
+%r192 = getelementptr i32, i32* %r149, i32 10
+store i32 %r190, i32* %r192
+%r193 = lshr i576 %r189, 32
+%r194 = trunc i576 %r193 to i32
+%r196 = getelementptr i32, i32* %r149, i32 11
+store i32 %r194, i32* %r196
+%r197 = lshr i576 %r193, 32
+%r198 = trunc i576 %r197 to i32
+%r200 = getelementptr i32, i32* %r149, i32 12
+store i32 %r198, i32* %r200
+%r201 = lshr i576 %r197, 32
+%r202 = trunc i576 %r201 to i32
+%r204 = getelementptr i32, i32* %r149, i32 13
+store i32 %r202, i32* %r204
+%r205 = lshr i576 %r201, 32
+%r206 = trunc i576 %r205 to i32
+%r208 = getelementptr i32, i32* %r149, i32 14
+store i32 %r206, i32* %r208
+%r209 = lshr i576 %r205, 32
+%r210 = trunc i576 %r209 to i32
+%r212 = getelementptr i32, i32* %r149, i32 15
+store i32 %r210, i32* %r212
+%r213 = lshr i576 %r209, 32
+%r214 = trunc i576 %r213 to i32
+%r216 = getelementptr i32, i32* %r149, i32 16
+store i32 %r214, i32* %r216
+%r217 = lshr i576 %r213, 32
+%r218 = trunc i576 %r217 to i32
+%r220 = getelementptr i32, i32* %r149, i32 17
+store i32 %r218, i32* %r220
+ret void
+}
+define void @mcl_fpDbl_sqrPre17L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = call i576 @mulPv544x32(i32* %r2, i32 %r3)
+%r5 = trunc i576 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i576 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i576 @mulPv544x32(i32* %r2, i32 %r9)
+%r11 = add i576 %r6, %r10
+%r12 = trunc i576 %r11 to i32
+%r14 = getelementptr i32, i32* %r1, i32 1
+store i32 %r12, i32* %r14
+%r15 = lshr i576 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i576 @mulPv544x32(i32* %r2, i32 %r18)
+%r20 = add i576 %r15, %r19
+%r21 = trunc i576 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 2
+store i32 %r21, i32* %r23
+%r24 = lshr i576 %r20, 32
+%r26 = getelementptr i32, i32* %r2, i32 3
+%r27 = load i32, i32* %r26
+%r28 = call i576 @mulPv544x32(i32* %r2, i32 %r27)
+%r29 = add i576 %r24, %r28
+%r30 = trunc i576 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 3
+store i32 %r30, i32* %r32
+%r33 = lshr i576 %r29, 32
+%r35 = getelementptr i32, i32* %r2, i32 4
+%r36 = load i32, i32* %r35
+%r37 = call i576 @mulPv544x32(i32* %r2, i32 %r36)
+%r38 = add i576 %r33, %r37
+%r39 = trunc i576 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 4
+store i32 %r39, i32* %r41
+%r42 = lshr i576 %r38, 32
+%r44 = getelementptr i32, i32* %r2, i32 5
+%r45 = load i32, i32* %r44
+%r46 = call i576 @mulPv544x32(i32* %r2, i32 %r45)
+%r47 = add i576 %r42, %r46
+%r48 = trunc i576 %r47 to i32
+%r50 = getelementptr i32, i32* %r1, i32 5
+store i32 %r48, i32* %r50
+%r51 = lshr i576 %r47, 32
+%r53 = getelementptr i32, i32* %r2, i32 6
+%r54 = load i32, i32* %r53
+%r55 = call i576 @mulPv544x32(i32* %r2, i32 %r54)
+%r56 = add i576 %r51, %r55
+%r57 = trunc i576 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 6
+store i32 %r57, i32* %r59
+%r60 = lshr i576 %r56, 32
+%r62 = getelementptr i32, i32* %r2, i32 7
+%r63 = load i32, i32* %r62
+%r64 = call i576 @mulPv544x32(i32* %r2, i32 %r63)
+%r65 = add i576 %r60, %r64
+%r66 = trunc i576 %r65 to i32
+%r68 = getelementptr i32, i32* %r1, i32 7
+store i32 %r66, i32* %r68
+%r69 = lshr i576 %r65, 32
+%r71 = getelementptr i32, i32* %r2, i32 8
+%r72 = load i32, i32* %r71
+%r73 = call i576 @mulPv544x32(i32* %r2, i32 %r72)
+%r74 = add i576 %r69, %r73
+%r75 = trunc i576 %r74 to i32
+%r77 = getelementptr i32, i32* %r1, i32 8
+store i32 %r75, i32* %r77
+%r78 = lshr i576 %r74, 32
+%r80 = getelementptr i32, i32* %r2, i32 9
+%r81 = load i32, i32* %r80
+%r82 = call i576 @mulPv544x32(i32* %r2, i32 %r81)
+%r83 = add i576 %r78, %r82
+%r84 = trunc i576 %r83 to i32
+%r86 = getelementptr i32, i32* %r1, i32 9
+store i32 %r84, i32* %r86
+%r87 = lshr i576 %r83, 32
+%r89 = getelementptr i32, i32* %r2, i32 10
+%r90 = load i32, i32* %r89
+%r91 = call i576 @mulPv544x32(i32* %r2, i32 %r90)
+%r92 = add i576 %r87, %r91
+%r93 = trunc i576 %r92 to i32
+%r95 = getelementptr i32, i32* %r1, i32 10
+store i32 %r93, i32* %r95
+%r96 = lshr i576 %r92, 32
+%r98 = getelementptr i32, i32* %r2, i32 11
+%r99 = load i32, i32* %r98
+%r100 = call i576 @mulPv544x32(i32* %r2, i32 %r99)
+%r101 = add i576 %r96, %r100
+%r102 = trunc i576 %r101 to i32
+%r104 = getelementptr i32, i32* %r1, i32 11
+store i32 %r102, i32* %r104
+%r105 = lshr i576 %r101, 32
+%r107 = getelementptr i32, i32* %r2, i32 12
+%r108 = load i32, i32* %r107
+%r109 = call i576 @mulPv544x32(i32* %r2, i32 %r108)
+%r110 = add i576 %r105, %r109
+%r111 = trunc i576 %r110 to i32
+%r113 = getelementptr i32, i32* %r1, i32 12
+store i32 %r111, i32* %r113
+%r114 = lshr i576 %r110, 32
+%r116 = getelementptr i32, i32* %r2, i32 13
+%r117 = load i32, i32* %r116
+%r118 = call i576 @mulPv544x32(i32* %r2, i32 %r117)
+%r119 = add i576 %r114, %r118
+%r120 = trunc i576 %r119 to i32
+%r122 = getelementptr i32, i32* %r1, i32 13
+store i32 %r120, i32* %r122
+%r123 = lshr i576 %r119, 32
+%r125 = getelementptr i32, i32* %r2, i32 14
+%r126 = load i32, i32* %r125
+%r127 = call i576 @mulPv544x32(i32* %r2, i32 %r126)
+%r128 = add i576 %r123, %r127
+%r129 = trunc i576 %r128 to i32
+%r131 = getelementptr i32, i32* %r1, i32 14
+store i32 %r129, i32* %r131
+%r132 = lshr i576 %r128, 32
+%r134 = getelementptr i32, i32* %r2, i32 15
+%r135 = load i32, i32* %r134
+%r136 = call i576 @mulPv544x32(i32* %r2, i32 %r135)
+%r137 = add i576 %r132, %r136
+%r138 = trunc i576 %r137 to i32
+%r140 = getelementptr i32, i32* %r1, i32 15
+store i32 %r138, i32* %r140
+%r141 = lshr i576 %r137, 32
+%r143 = getelementptr i32, i32* %r2, i32 16
+%r144 = load i32, i32* %r143
+%r145 = call i576 @mulPv544x32(i32* %r2, i32 %r144)
+%r146 = add i576 %r141, %r145
+%r148 = getelementptr i32, i32* %r1, i32 16
+%r149 = trunc i576 %r146 to i32
+%r151 = getelementptr i32, i32* %r148, i32 0
+store i32 %r149, i32* %r151
+%r152 = lshr i576 %r146, 32
+%r153 = trunc i576 %r152 to i32
+%r155 = getelementptr i32, i32* %r148, i32 1
+store i32 %r153, i32* %r155
+%r156 = lshr i576 %r152, 32
+%r157 = trunc i576 %r156 to i32
+%r159 = getelementptr i32, i32* %r148, i32 2
+store i32 %r157, i32* %r159
+%r160 = lshr i576 %r156, 32
+%r161 = trunc i576 %r160 to i32
+%r163 = getelementptr i32, i32* %r148, i32 3
+store i32 %r161, i32* %r163
+%r164 = lshr i576 %r160, 32
+%r165 = trunc i576 %r164 to i32
+%r167 = getelementptr i32, i32* %r148, i32 4
+store i32 %r165, i32* %r167
+%r168 = lshr i576 %r164, 32
+%r169 = trunc i576 %r168 to i32
+%r171 = getelementptr i32, i32* %r148, i32 5
+store i32 %r169, i32* %r171
+%r172 = lshr i576 %r168, 32
+%r173 = trunc i576 %r172 to i32
+%r175 = getelementptr i32, i32* %r148, i32 6
+store i32 %r173, i32* %r175
+%r176 = lshr i576 %r172, 32
+%r177 = trunc i576 %r176 to i32
+%r179 = getelementptr i32, i32* %r148, i32 7
+store i32 %r177, i32* %r179
+%r180 = lshr i576 %r176, 32
+%r181 = trunc i576 %r180 to i32
+%r183 = getelementptr i32, i32* %r148, i32 8
+store i32 %r181, i32* %r183
+%r184 = lshr i576 %r180, 32
+%r185 = trunc i576 %r184 to i32
+%r187 = getelementptr i32, i32* %r148, i32 9
+store i32 %r185, i32* %r187
+%r188 = lshr i576 %r184, 32
+%r189 = trunc i576 %r188 to i32
+%r191 = getelementptr i32, i32* %r148, i32 10
+store i32 %r189, i32* %r191
+%r192 = lshr i576 %r188, 32
+%r193 = trunc i576 %r192 to i32
+%r195 = getelementptr i32, i32* %r148, i32 11
+store i32 %r193, i32* %r195
+%r196 = lshr i576 %r192, 32
+%r197 = trunc i576 %r196 to i32
+%r199 = getelementptr i32, i32* %r148, i32 12
+store i32 %r197, i32* %r199
+%r200 = lshr i576 %r196, 32
+%r201 = trunc i576 %r200 to i32
+%r203 = getelementptr i32, i32* %r148, i32 13
+store i32 %r201, i32* %r203
+%r204 = lshr i576 %r200, 32
+%r205 = trunc i576 %r204 to i32
+%r207 = getelementptr i32, i32* %r148, i32 14
+store i32 %r205, i32* %r207
+%r208 = lshr i576 %r204, 32
+%r209 = trunc i576 %r208 to i32
+%r211 = getelementptr i32, i32* %r148, i32 15
+store i32 %r209, i32* %r211
+%r212 = lshr i576 %r208, 32
+%r213 = trunc i576 %r212 to i32
+%r215 = getelementptr i32, i32* %r148, i32 16
+store i32 %r213, i32* %r215
+%r216 = lshr i576 %r212, 32
+%r217 = trunc i576 %r216 to i32
+%r219 = getelementptr i32, i32* %r148, i32 17
+store i32 %r217, i32* %r219
+ret void
+}
+define void @mcl_fp_mont17L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i576 @mulPv544x32(i32* %r2, i32 %r10)
+%r12 = zext i576 %r11 to i608
+%r13 = trunc i576 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i576 @mulPv544x32(i32* %r4, i32 %r14)
+%r16 = zext i576 %r15 to i608
+%r17 = add i608 %r12, %r16
+%r18 = lshr i608 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i576 @mulPv544x32(i32* %r2, i32 %r21)
+%r23 = zext i576 %r22 to i608
+%r24 = add i608 %r18, %r23
+%r25 = trunc i608 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i576 @mulPv544x32(i32* %r4, i32 %r26)
+%r28 = zext i576 %r27 to i608
+%r29 = add i608 %r24, %r28
+%r30 = lshr i608 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i576 @mulPv544x32(i32* %r2, i32 %r33)
+%r35 = zext i576 %r34 to i608
+%r36 = add i608 %r30, %r35
+%r37 = trunc i608 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i576 @mulPv544x32(i32* %r4, i32 %r38)
+%r40 = zext i576 %r39 to i608
+%r41 = add i608 %r36, %r40
+%r42 = lshr i608 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i576 @mulPv544x32(i32* %r2, i32 %r45)
+%r47 = zext i576 %r46 to i608
+%r48 = add i608 %r42, %r47
+%r49 = trunc i608 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i576 @mulPv544x32(i32* %r4, i32 %r50)
+%r52 = zext i576 %r51 to i608
+%r53 = add i608 %r48, %r52
+%r54 = lshr i608 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i576 @mulPv544x32(i32* %r2, i32 %r57)
+%r59 = zext i576 %r58 to i608
+%r60 = add i608 %r54, %r59
+%r61 = trunc i608 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i576 @mulPv544x32(i32* %r4, i32 %r62)
+%r64 = zext i576 %r63 to i608
+%r65 = add i608 %r60, %r64
+%r66 = lshr i608 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i576 @mulPv544x32(i32* %r2, i32 %r69)
+%r71 = zext i576 %r70 to i608
+%r72 = add i608 %r66, %r71
+%r73 = trunc i608 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i576 @mulPv544x32(i32* %r4, i32 %r74)
+%r76 = zext i576 %r75 to i608
+%r77 = add i608 %r72, %r76
+%r78 = lshr i608 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i576 @mulPv544x32(i32* %r2, i32 %r81)
+%r83 = zext i576 %r82 to i608
+%r84 = add i608 %r78, %r83
+%r85 = trunc i608 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i576 @mulPv544x32(i32* %r4, i32 %r86)
+%r88 = zext i576 %r87 to i608
+%r89 = add i608 %r84, %r88
+%r90 = lshr i608 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i576 @mulPv544x32(i32* %r2, i32 %r93)
+%r95 = zext i576 %r94 to i608
+%r96 = add i608 %r90, %r95
+%r97 = trunc i608 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i576 @mulPv544x32(i32* %r4, i32 %r98)
+%r100 = zext i576 %r99 to i608
+%r101 = add i608 %r96, %r100
+%r102 = lshr i608 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i576 @mulPv544x32(i32* %r2, i32 %r105)
+%r107 = zext i576 %r106 to i608
+%r108 = add i608 %r102, %r107
+%r109 = trunc i608 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i576 @mulPv544x32(i32* %r4, i32 %r110)
+%r112 = zext i576 %r111 to i608
+%r113 = add i608 %r108, %r112
+%r114 = lshr i608 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 9
+%r117 = load i32, i32* %r116
+%r118 = call i576 @mulPv544x32(i32* %r2, i32 %r117)
+%r119 = zext i576 %r118 to i608
+%r120 = add i608 %r114, %r119
+%r121 = trunc i608 %r120 to i32
+%r122 = mul i32 %r121, %r7
+%r123 = call i576 @mulPv544x32(i32* %r4, i32 %r122)
+%r124 = zext i576 %r123 to i608
+%r125 = add i608 %r120, %r124
+%r126 = lshr i608 %r125, 32
+%r128 = getelementptr i32, i32* %r3, i32 10
+%r129 = load i32, i32* %r128
+%r130 = call i576 @mulPv544x32(i32* %r2, i32 %r129)
+%r131 = zext i576 %r130 to i608
+%r132 = add i608 %r126, %r131
+%r133 = trunc i608 %r132 to i32
+%r134 = mul i32 %r133, %r7
+%r135 = call i576 @mulPv544x32(i32* %r4, i32 %r134)
+%r136 = zext i576 %r135 to i608
+%r137 = add i608 %r132, %r136
+%r138 = lshr i608 %r137, 32
+%r140 = getelementptr i32, i32* %r3, i32 11
+%r141 = load i32, i32* %r140
+%r142 = call i576 @mulPv544x32(i32* %r2, i32 %r141)
+%r143 = zext i576 %r142 to i608
+%r144 = add i608 %r138, %r143
+%r145 = trunc i608 %r144 to i32
+%r146 = mul i32 %r145, %r7
+%r147 = call i576 @mulPv544x32(i32* %r4, i32 %r146)
+%r148 = zext i576 %r147 to i608
+%r149 = add i608 %r144, %r148
+%r150 = lshr i608 %r149, 32
+%r152 = getelementptr i32, i32* %r3, i32 12
+%r153 = load i32, i32* %r152
+%r154 = call i576 @mulPv544x32(i32* %r2, i32 %r153)
+%r155 = zext i576 %r154 to i608
+%r156 = add i608 %r150, %r155
+%r157 = trunc i608 %r156 to i32
+%r158 = mul i32 %r157, %r7
+%r159 = call i576 @mulPv544x32(i32* %r4, i32 %r158)
+%r160 = zext i576 %r159 to i608
+%r161 = add i608 %r156, %r160
+%r162 = lshr i608 %r161, 32
+%r164 = getelementptr i32, i32* %r3, i32 13
+%r165 = load i32, i32* %r164
+%r166 = call i576 @mulPv544x32(i32* %r2, i32 %r165)
+%r167 = zext i576 %r166 to i608
+%r168 = add i608 %r162, %r167
+%r169 = trunc i608 %r168 to i32
+%r170 = mul i32 %r169, %r7
+%r171 = call i576 @mulPv544x32(i32* %r4, i32 %r170)
+%r172 = zext i576 %r171 to i608
+%r173 = add i608 %r168, %r172
+%r174 = lshr i608 %r173, 32
+%r176 = getelementptr i32, i32* %r3, i32 14
+%r177 = load i32, i32* %r176
+%r178 = call i576 @mulPv544x32(i32* %r2, i32 %r177)
+%r179 = zext i576 %r178 to i608
+%r180 = add i608 %r174, %r179
+%r181 = trunc i608 %r180 to i32
+%r182 = mul i32 %r181, %r7
+%r183 = call i576 @mulPv544x32(i32* %r4, i32 %r182)
+%r184 = zext i576 %r183 to i608
+%r185 = add i608 %r180, %r184
+%r186 = lshr i608 %r185, 32
+%r188 = getelementptr i32, i32* %r3, i32 15
+%r189 = load i32, i32* %r188
+%r190 = call i576 @mulPv544x32(i32* %r2, i32 %r189)
+%r191 = zext i576 %r190 to i608
+%r192 = add i608 %r186, %r191
+%r193 = trunc i608 %r192 to i32
+%r194 = mul i32 %r193, %r7
+%r195 = call i576 @mulPv544x32(i32* %r4, i32 %r194)
+%r196 = zext i576 %r195 to i608
+%r197 = add i608 %r192, %r196
+%r198 = lshr i608 %r197, 32
+%r200 = getelementptr i32, i32* %r3, i32 16
+%r201 = load i32, i32* %r200
+%r202 = call i576 @mulPv544x32(i32* %r2, i32 %r201)
+%r203 = zext i576 %r202 to i608
+%r204 = add i608 %r198, %r203
+%r205 = trunc i608 %r204 to i32
+%r206 = mul i32 %r205, %r7
+%r207 = call i576 @mulPv544x32(i32* %r4, i32 %r206)
+%r208 = zext i576 %r207 to i608
+%r209 = add i608 %r204, %r208
+%r210 = lshr i608 %r209, 32
+%r211 = trunc i608 %r210 to i576
+%r212 = load i32, i32* %r4
+%r213 = zext i32 %r212 to i64
+%r215 = getelementptr i32, i32* %r4, i32 1
+%r216 = load i32, i32* %r215
+%r217 = zext i32 %r216 to i64
+%r218 = shl i64 %r217, 32
+%r219 = or i64 %r213, %r218
+%r220 = zext i64 %r219 to i96
+%r222 = getelementptr i32, i32* %r4, i32 2
+%r223 = load i32, i32* %r222
+%r224 = zext i32 %r223 to i96
+%r225 = shl i96 %r224, 64
+%r226 = or i96 %r220, %r225
+%r227 = zext i96 %r226 to i128
+%r229 = getelementptr i32, i32* %r4, i32 3
+%r230 = load i32, i32* %r229
+%r231 = zext i32 %r230 to i128
+%r232 = shl i128 %r231, 96
+%r233 = or i128 %r227, %r232
+%r234 = zext i128 %r233 to i160
+%r236 = getelementptr i32, i32* %r4, i32 4
+%r237 = load i32, i32* %r236
+%r238 = zext i32 %r237 to i160
+%r239 = shl i160 %r238, 128
+%r240 = or i160 %r234, %r239
+%r241 = zext i160 %r240 to i192
+%r243 = getelementptr i32, i32* %r4, i32 5
+%r244 = load i32, i32* %r243
+%r245 = zext i32 %r244 to i192
+%r246 = shl i192 %r245, 160
+%r247 = or i192 %r241, %r246
+%r248 = zext i192 %r247 to i224
+%r250 = getelementptr i32, i32* %r4, i32 6
+%r251 = load i32, i32* %r250
+%r252 = zext i32 %r251 to i224
+%r253 = shl i224 %r252, 192
+%r254 = or i224 %r248, %r253
+%r255 = zext i224 %r254 to i256
+%r257 = getelementptr i32, i32* %r4, i32 7
+%r258 = load i32, i32* %r257
+%r259 = zext i32 %r258 to i256
+%r260 = shl i256 %r259, 224
+%r261 = or i256 %r255, %r260
+%r262 = zext i256 %r261 to i288
+%r264 = getelementptr i32, i32* %r4, i32 8
+%r265 = load i32, i32* %r264
+%r266 = zext i32 %r265 to i288
+%r267 = shl i288 %r266, 256
+%r268 = or i288 %r262, %r267
+%r269 = zext i288 %r268 to i320
+%r271 = getelementptr i32, i32* %r4, i32 9
+%r272 = load i32, i32* %r271
+%r273 = zext i32 %r272 to i320
+%r274 = shl i320 %r273, 288
+%r275 = or i320 %r269, %r274
+%r276 = zext i320 %r275 to i352
+%r278 = getelementptr i32, i32* %r4, i32 10
+%r279 = load i32, i32* %r278
+%r280 = zext i32 %r279 to i352
+%r281 = shl i352 %r280, 320
+%r282 = or i352 %r276, %r281
+%r283 = zext i352 %r282 to i384
+%r285 = getelementptr i32, i32* %r4, i32 11
+%r286 = load i32, i32* %r285
+%r287 = zext i32 %r286 to i384
+%r288 = shl i384 %r287, 352
+%r289 = or i384 %r283, %r288
+%r290 = zext i384 %r289 to i416
+%r292 = getelementptr i32, i32* %r4, i32 12
+%r293 = load i32, i32* %r292
+%r294 = zext i32 %r293 to i416
+%r295 = shl i416 %r294, 384
+%r296 = or i416 %r290, %r295
+%r297 = zext i416 %r296 to i448
+%r299 = getelementptr i32, i32* %r4, i32 13
+%r300 = load i32, i32* %r299
+%r301 = zext i32 %r300 to i448
+%r302 = shl i448 %r301, 416
+%r303 = or i448 %r297, %r302
+%r304 = zext i448 %r303 to i480
+%r306 = getelementptr i32, i32* %r4, i32 14
+%r307 = load i32, i32* %r306
+%r308 = zext i32 %r307 to i480
+%r309 = shl i480 %r308, 448
+%r310 = or i480 %r304, %r309
+%r311 = zext i480 %r310 to i512
+%r313 = getelementptr i32, i32* %r4, i32 15
+%r314 = load i32, i32* %r313
+%r315 = zext i32 %r314 to i512
+%r316 = shl i512 %r315, 480
+%r317 = or i512 %r311, %r316
+%r318 = zext i512 %r317 to i544
+%r320 = getelementptr i32, i32* %r4, i32 16
+%r321 = load i32, i32* %r320
+%r322 = zext i32 %r321 to i544
+%r323 = shl i544 %r322, 512
+%r324 = or i544 %r318, %r323
+%r325 = zext i544 %r324 to i576
+%r326 = sub i576 %r211, %r325
+%r327 = lshr i576 %r326, 544
+%r328 = trunc i576 %r327 to i1
+%r329 = select i1 %r328, i576 %r211, i576 %r326
+%r330 = trunc i576 %r329 to i544
+%r331 = trunc i544 %r330 to i32
+%r333 = getelementptr i32, i32* %r1, i32 0
+store i32 %r331, i32* %r333
+%r334 = lshr i544 %r330, 32
+%r335 = trunc i544 %r334 to i32
+%r337 = getelementptr i32, i32* %r1, i32 1
+store i32 %r335, i32* %r337
+%r338 = lshr i544 %r334, 32
+%r339 = trunc i544 %r338 to i32
+%r341 = getelementptr i32, i32* %r1, i32 2
+store i32 %r339, i32* %r341
+%r342 = lshr i544 %r338, 32
+%r343 = trunc i544 %r342 to i32
+%r345 = getelementptr i32, i32* %r1, i32 3
+store i32 %r343, i32* %r345
+%r346 = lshr i544 %r342, 32
+%r347 = trunc i544 %r346 to i32
+%r349 = getelementptr i32, i32* %r1, i32 4
+store i32 %r347, i32* %r349
+%r350 = lshr i544 %r346, 32
+%r351 = trunc i544 %r350 to i32
+%r353 = getelementptr i32, i32* %r1, i32 5
+store i32 %r351, i32* %r353
+%r354 = lshr i544 %r350, 32
+%r355 = trunc i544 %r354 to i32
+%r357 = getelementptr i32, i32* %r1, i32 6
+store i32 %r355, i32* %r357
+%r358 = lshr i544 %r354, 32
+%r359 = trunc i544 %r358 to i32
+%r361 = getelementptr i32, i32* %r1, i32 7
+store i32 %r359, i32* %r361
+%r362 = lshr i544 %r358, 32
+%r363 = trunc i544 %r362 to i32
+%r365 = getelementptr i32, i32* %r1, i32 8
+store i32 %r363, i32* %r365
+%r366 = lshr i544 %r362, 32
+%r367 = trunc i544 %r366 to i32
+%r369 = getelementptr i32, i32* %r1, i32 9
+store i32 %r367, i32* %r369
+%r370 = lshr i544 %r366, 32
+%r371 = trunc i544 %r370 to i32
+%r373 = getelementptr i32, i32* %r1, i32 10
+store i32 %r371, i32* %r373
+%r374 = lshr i544 %r370, 32
+%r375 = trunc i544 %r374 to i32
+%r377 = getelementptr i32, i32* %r1, i32 11
+store i32 %r375, i32* %r377
+%r378 = lshr i544 %r374, 32
+%r379 = trunc i544 %r378 to i32
+%r381 = getelementptr i32, i32* %r1, i32 12
+store i32 %r379, i32* %r381
+%r382 = lshr i544 %r378, 32
+%r383 = trunc i544 %r382 to i32
+%r385 = getelementptr i32, i32* %r1, i32 13
+store i32 %r383, i32* %r385
+%r386 = lshr i544 %r382, 32
+%r387 = trunc i544 %r386 to i32
+%r389 = getelementptr i32, i32* %r1, i32 14
+store i32 %r387, i32* %r389
+%r390 = lshr i544 %r386, 32
+%r391 = trunc i544 %r390 to i32
+%r393 = getelementptr i32, i32* %r1, i32 15
+store i32 %r391, i32* %r393
+%r394 = lshr i544 %r390, 32
+%r395 = trunc i544 %r394 to i32
+%r397 = getelementptr i32, i32* %r1, i32 16
+store i32 %r395, i32* %r397
+ret void
+}
+define void @mcl_fp_montNF17L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i576 @mulPv544x32(i32* %r2, i32 %r8)
+%r10 = trunc i576 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i576 @mulPv544x32(i32* %r4, i32 %r11)
+%r13 = add i576 %r9, %r12
+%r14 = lshr i576 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i576 @mulPv544x32(i32* %r2, i32 %r17)
+%r19 = add i576 %r14, %r18
+%r20 = trunc i576 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i576 @mulPv544x32(i32* %r4, i32 %r21)
+%r23 = add i576 %r19, %r22
+%r24 = lshr i576 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i576 @mulPv544x32(i32* %r2, i32 %r27)
+%r29 = add i576 %r24, %r28
+%r30 = trunc i576 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i576 @mulPv544x32(i32* %r4, i32 %r31)
+%r33 = add i576 %r29, %r32
+%r34 = lshr i576 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i576 @mulPv544x32(i32* %r2, i32 %r37)
+%r39 = add i576 %r34, %r38
+%r40 = trunc i576 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i576 @mulPv544x32(i32* %r4, i32 %r41)
+%r43 = add i576 %r39, %r42
+%r44 = lshr i576 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i576 @mulPv544x32(i32* %r2, i32 %r47)
+%r49 = add i576 %r44, %r48
+%r50 = trunc i576 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i576 @mulPv544x32(i32* %r4, i32 %r51)
+%r53 = add i576 %r49, %r52
+%r54 = lshr i576 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i576 @mulPv544x32(i32* %r2, i32 %r57)
+%r59 = add i576 %r54, %r58
+%r60 = trunc i576 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i576 @mulPv544x32(i32* %r4, i32 %r61)
+%r63 = add i576 %r59, %r62
+%r64 = lshr i576 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i576 @mulPv544x32(i32* %r2, i32 %r67)
+%r69 = add i576 %r64, %r68
+%r70 = trunc i576 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i576 @mulPv544x32(i32* %r4, i32 %r71)
+%r73 = add i576 %r69, %r72
+%r74 = lshr i576 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i576 @mulPv544x32(i32* %r2, i32 %r77)
+%r79 = add i576 %r74, %r78
+%r80 = trunc i576 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i576 @mulPv544x32(i32* %r4, i32 %r81)
+%r83 = add i576 %r79, %r82
+%r84 = lshr i576 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i576 @mulPv544x32(i32* %r2, i32 %r87)
+%r89 = add i576 %r84, %r88
+%r90 = trunc i576 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i576 @mulPv544x32(i32* %r4, i32 %r91)
+%r93 = add i576 %r89, %r92
+%r94 = lshr i576 %r93, 32
+%r96 = getelementptr i32, i32* %r3, i32 9
+%r97 = load i32, i32* %r96
+%r98 = call i576 @mulPv544x32(i32* %r2, i32 %r97)
+%r99 = add i576 %r94, %r98
+%r100 = trunc i576 %r99 to i32
+%r101 = mul i32 %r100, %r7
+%r102 = call i576 @mulPv544x32(i32* %r4, i32 %r101)
+%r103 = add i576 %r99, %r102
+%r104 = lshr i576 %r103, 32
+%r106 = getelementptr i32, i32* %r3, i32 10
+%r107 = load i32, i32* %r106
+%r108 = call i576 @mulPv544x32(i32* %r2, i32 %r107)
+%r109 = add i576 %r104, %r108
+%r110 = trunc i576 %r109 to i32
+%r111 = mul i32 %r110, %r7
+%r112 = call i576 @mulPv544x32(i32* %r4, i32 %r111)
+%r113 = add i576 %r109, %r112
+%r114 = lshr i576 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 11
+%r117 = load i32, i32* %r116
+%r118 = call i576 @mulPv544x32(i32* %r2, i32 %r117)
+%r119 = add i576 %r114, %r118
+%r120 = trunc i576 %r119 to i32
+%r121 = mul i32 %r120, %r7
+%r122 = call i576 @mulPv544x32(i32* %r4, i32 %r121)
+%r123 = add i576 %r119, %r122
+%r124 = lshr i576 %r123, 32
+%r126 = getelementptr i32, i32* %r3, i32 12
+%r127 = load i32, i32* %r126
+%r128 = call i576 @mulPv544x32(i32* %r2, i32 %r127)
+%r129 = add i576 %r124, %r128
+%r130 = trunc i576 %r129 to i32
+%r131 = mul i32 %r130, %r7
+%r132 = call i576 @mulPv544x32(i32* %r4, i32 %r131)
+%r133 = add i576 %r129, %r132
+%r134 = lshr i576 %r133, 32
+%r136 = getelementptr i32, i32* %r3, i32 13
+%r137 = load i32, i32* %r136
+%r138 = call i576 @mulPv544x32(i32* %r2, i32 %r137)
+%r139 = add i576 %r134, %r138
+%r140 = trunc i576 %r139 to i32
+%r141 = mul i32 %r140, %r7
+%r142 = call i576 @mulPv544x32(i32* %r4, i32 %r141)
+%r143 = add i576 %r139, %r142
+%r144 = lshr i576 %r143, 32
+%r146 = getelementptr i32, i32* %r3, i32 14
+%r147 = load i32, i32* %r146
+%r148 = call i576 @mulPv544x32(i32* %r2, i32 %r147)
+%r149 = add i576 %r144, %r148
+%r150 = trunc i576 %r149 to i32
+%r151 = mul i32 %r150, %r7
+%r152 = call i576 @mulPv544x32(i32* %r4, i32 %r151)
+%r153 = add i576 %r149, %r152
+%r154 = lshr i576 %r153, 32
+%r156 = getelementptr i32, i32* %r3, i32 15
+%r157 = load i32, i32* %r156
+%r158 = call i576 @mulPv544x32(i32* %r2, i32 %r157)
+%r159 = add i576 %r154, %r158
+%r160 = trunc i576 %r159 to i32
+%r161 = mul i32 %r160, %r7
+%r162 = call i576 @mulPv544x32(i32* %r4, i32 %r161)
+%r163 = add i576 %r159, %r162
+%r164 = lshr i576 %r163, 32
+%r166 = getelementptr i32, i32* %r3, i32 16
+%r167 = load i32, i32* %r166
+%r168 = call i576 @mulPv544x32(i32* %r2, i32 %r167)
+%r169 = add i576 %r164, %r168
+%r170 = trunc i576 %r169 to i32
+%r171 = mul i32 %r170, %r7
+%r172 = call i576 @mulPv544x32(i32* %r4, i32 %r171)
+%r173 = add i576 %r169, %r172
+%r174 = lshr i576 %r173, 32
+%r175 = trunc i576 %r174 to i544
+%r176 = load i32, i32* %r4
+%r177 = zext i32 %r176 to i64
+%r179 = getelementptr i32, i32* %r4, i32 1
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i64
+%r182 = shl i64 %r181, 32
+%r183 = or i64 %r177, %r182
+%r184 = zext i64 %r183 to i96
+%r186 = getelementptr i32, i32* %r4, i32 2
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i96
+%r189 = shl i96 %r188, 64
+%r190 = or i96 %r184, %r189
+%r191 = zext i96 %r190 to i128
+%r193 = getelementptr i32, i32* %r4, i32 3
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i128
+%r196 = shl i128 %r195, 96
+%r197 = or i128 %r191, %r196
+%r198 = zext i128 %r197 to i160
+%r200 = getelementptr i32, i32* %r4, i32 4
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i160
+%r203 = shl i160 %r202, 128
+%r204 = or i160 %r198, %r203
+%r205 = zext i160 %r204 to i192
+%r207 = getelementptr i32, i32* %r4, i32 5
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i192
+%r210 = shl i192 %r209, 160
+%r211 = or i192 %r205, %r210
+%r212 = zext i192 %r211 to i224
+%r214 = getelementptr i32, i32* %r4, i32 6
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i224
+%r217 = shl i224 %r216, 192
+%r218 = or i224 %r212, %r217
+%r219 = zext i224 %r218 to i256
+%r221 = getelementptr i32, i32* %r4, i32 7
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i256
+%r224 = shl i256 %r223, 224
+%r225 = or i256 %r219, %r224
+%r226 = zext i256 %r225 to i288
+%r228 = getelementptr i32, i32* %r4, i32 8
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i288
+%r231 = shl i288 %r230, 256
+%r232 = or i288 %r226, %r231
+%r233 = zext i288 %r232 to i320
+%r235 = getelementptr i32, i32* %r4, i32 9
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i320
+%r238 = shl i320 %r237, 288
+%r239 = or i320 %r233, %r238
+%r240 = zext i320 %r239 to i352
+%r242 = getelementptr i32, i32* %r4, i32 10
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i352
+%r245 = shl i352 %r244, 320
+%r246 = or i352 %r240, %r245
+%r247 = zext i352 %r246 to i384
+%r249 = getelementptr i32, i32* %r4, i32 11
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i384
+%r252 = shl i384 %r251, 352
+%r253 = or i384 %r247, %r252
+%r254 = zext i384 %r253 to i416
+%r256 = getelementptr i32, i32* %r4, i32 12
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i416
+%r259 = shl i416 %r258, 384
+%r260 = or i416 %r254, %r259
+%r261 = zext i416 %r260 to i448
+%r263 = getelementptr i32, i32* %r4, i32 13
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i448
+%r266 = shl i448 %r265, 416
+%r267 = or i448 %r261, %r266
+%r268 = zext i448 %r267 to i480
+%r270 = getelementptr i32, i32* %r4, i32 14
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i480
+%r273 = shl i480 %r272, 448
+%r274 = or i480 %r268, %r273
+%r275 = zext i480 %r274 to i512
+%r277 = getelementptr i32, i32* %r4, i32 15
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i512
+%r280 = shl i512 %r279, 480
+%r281 = or i512 %r275, %r280
+%r282 = zext i512 %r281 to i544
+%r284 = getelementptr i32, i32* %r4, i32 16
+%r285 = load i32, i32* %r284
+%r286 = zext i32 %r285 to i544
+%r287 = shl i544 %r286, 512
+%r288 = or i544 %r282, %r287
+%r289 = sub i544 %r175, %r288
+%r290 = lshr i544 %r289, 543
+%r291 = trunc i544 %r290 to i1
+%r292 = select i1 %r291, i544 %r175, i544 %r289
+%r293 = trunc i544 %r292 to i32
+%r295 = getelementptr i32, i32* %r1, i32 0
+store i32 %r293, i32* %r295
+%r296 = lshr i544 %r292, 32
+%r297 = trunc i544 %r296 to i32
+%r299 = getelementptr i32, i32* %r1, i32 1
+store i32 %r297, i32* %r299
+%r300 = lshr i544 %r296, 32
+%r301 = trunc i544 %r300 to i32
+%r303 = getelementptr i32, i32* %r1, i32 2
+store i32 %r301, i32* %r303
+%r304 = lshr i544 %r300, 32
+%r305 = trunc i544 %r304 to i32
+%r307 = getelementptr i32, i32* %r1, i32 3
+store i32 %r305, i32* %r307
+%r308 = lshr i544 %r304, 32
+%r309 = trunc i544 %r308 to i32
+%r311 = getelementptr i32, i32* %r1, i32 4
+store i32 %r309, i32* %r311
+%r312 = lshr i544 %r308, 32
+%r313 = trunc i544 %r312 to i32
+%r315 = getelementptr i32, i32* %r1, i32 5
+store i32 %r313, i32* %r315
+%r316 = lshr i544 %r312, 32
+%r317 = trunc i544 %r316 to i32
+%r319 = getelementptr i32, i32* %r1, i32 6
+store i32 %r317, i32* %r319
+%r320 = lshr i544 %r316, 32
+%r321 = trunc i544 %r320 to i32
+%r323 = getelementptr i32, i32* %r1, i32 7
+store i32 %r321, i32* %r323
+%r324 = lshr i544 %r320, 32
+%r325 = trunc i544 %r324 to i32
+%r327 = getelementptr i32, i32* %r1, i32 8
+store i32 %r325, i32* %r327
+%r328 = lshr i544 %r324, 32
+%r329 = trunc i544 %r328 to i32
+%r331 = getelementptr i32, i32* %r1, i32 9
+store i32 %r329, i32* %r331
+%r332 = lshr i544 %r328, 32
+%r333 = trunc i544 %r332 to i32
+%r335 = getelementptr i32, i32* %r1, i32 10
+store i32 %r333, i32* %r335
+%r336 = lshr i544 %r332, 32
+%r337 = trunc i544 %r336 to i32
+%r339 = getelementptr i32, i32* %r1, i32 11
+store i32 %r337, i32* %r339
+%r340 = lshr i544 %r336, 32
+%r341 = trunc i544 %r340 to i32
+%r343 = getelementptr i32, i32* %r1, i32 12
+store i32 %r341, i32* %r343
+%r344 = lshr i544 %r340, 32
+%r345 = trunc i544 %r344 to i32
+%r347 = getelementptr i32, i32* %r1, i32 13
+store i32 %r345, i32* %r347
+%r348 = lshr i544 %r344, 32
+%r349 = trunc i544 %r348 to i32
+%r351 = getelementptr i32, i32* %r1, i32 14
+store i32 %r349, i32* %r351
+%r352 = lshr i544 %r348, 32
+%r353 = trunc i544 %r352 to i32
+%r355 = getelementptr i32, i32* %r1, i32 15
+store i32 %r353, i32* %r355
+%r356 = lshr i544 %r352, 32
+%r357 = trunc i544 %r356 to i32
+%r359 = getelementptr i32, i32* %r1, i32 16
+store i32 %r357, i32* %r359
+ret void
+}
+define void @mcl_fp_montRed17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i352
+%r73 = getelementptr i32, i32* %r3, i32 10
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i352
+%r76 = shl i352 %r75, 320
+%r77 = or i352 %r71, %r76
+%r78 = zext i352 %r77 to i384
+%r80 = getelementptr i32, i32* %r3, i32 11
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i384
+%r83 = shl i384 %r82, 352
+%r84 = or i384 %r78, %r83
+%r85 = zext i384 %r84 to i416
+%r87 = getelementptr i32, i32* %r3, i32 12
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i416
+%r90 = shl i416 %r89, 384
+%r91 = or i416 %r85, %r90
+%r92 = zext i416 %r91 to i448
+%r94 = getelementptr i32, i32* %r3, i32 13
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i448
+%r97 = shl i448 %r96, 416
+%r98 = or i448 %r92, %r97
+%r99 = zext i448 %r98 to i480
+%r101 = getelementptr i32, i32* %r3, i32 14
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i480
+%r104 = shl i480 %r103, 448
+%r105 = or i480 %r99, %r104
+%r106 = zext i480 %r105 to i512
+%r108 = getelementptr i32, i32* %r3, i32 15
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i512
+%r111 = shl i512 %r110, 480
+%r112 = or i512 %r106, %r111
+%r113 = zext i512 %r112 to i544
+%r115 = getelementptr i32, i32* %r3, i32 16
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i544
+%r118 = shl i544 %r117, 512
+%r119 = or i544 %r113, %r118
+%r120 = load i32, i32* %r2
+%r121 = zext i32 %r120 to i64
+%r123 = getelementptr i32, i32* %r2, i32 1
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i64
+%r126 = shl i64 %r125, 32
+%r127 = or i64 %r121, %r126
+%r128 = zext i64 %r127 to i96
+%r130 = getelementptr i32, i32* %r2, i32 2
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i96
+%r133 = shl i96 %r132, 64
+%r134 = or i96 %r128, %r133
+%r135 = zext i96 %r134 to i128
+%r137 = getelementptr i32, i32* %r2, i32 3
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i128
+%r140 = shl i128 %r139, 96
+%r141 = or i128 %r135, %r140
+%r142 = zext i128 %r141 to i160
+%r144 = getelementptr i32, i32* %r2, i32 4
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i160
+%r147 = shl i160 %r146, 128
+%r148 = or i160 %r142, %r147
+%r149 = zext i160 %r148 to i192
+%r151 = getelementptr i32, i32* %r2, i32 5
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i192
+%r154 = shl i192 %r153, 160
+%r155 = or i192 %r149, %r154
+%r156 = zext i192 %r155 to i224
+%r158 = getelementptr i32, i32* %r2, i32 6
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i224
+%r161 = shl i224 %r160, 192
+%r162 = or i224 %r156, %r161
+%r163 = zext i224 %r162 to i256
+%r165 = getelementptr i32, i32* %r2, i32 7
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i256
+%r168 = shl i256 %r167, 224
+%r169 = or i256 %r163, %r168
+%r170 = zext i256 %r169 to i288
+%r172 = getelementptr i32, i32* %r2, i32 8
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i288
+%r175 = shl i288 %r174, 256
+%r176 = or i288 %r170, %r175
+%r177 = zext i288 %r176 to i320
+%r179 = getelementptr i32, i32* %r2, i32 9
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i320
+%r182 = shl i320 %r181, 288
+%r183 = or i320 %r177, %r182
+%r184 = zext i320 %r183 to i352
+%r186 = getelementptr i32, i32* %r2, i32 10
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i352
+%r189 = shl i352 %r188, 320
+%r190 = or i352 %r184, %r189
+%r191 = zext i352 %r190 to i384
+%r193 = getelementptr i32, i32* %r2, i32 11
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i384
+%r196 = shl i384 %r195, 352
+%r197 = or i384 %r191, %r196
+%r198 = zext i384 %r197 to i416
+%r200 = getelementptr i32, i32* %r2, i32 12
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i416
+%r203 = shl i416 %r202, 384
+%r204 = or i416 %r198, %r203
+%r205 = zext i416 %r204 to i448
+%r207 = getelementptr i32, i32* %r2, i32 13
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i448
+%r210 = shl i448 %r209, 416
+%r211 = or i448 %r205, %r210
+%r212 = zext i448 %r211 to i480
+%r214 = getelementptr i32, i32* %r2, i32 14
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i480
+%r217 = shl i480 %r216, 448
+%r218 = or i480 %r212, %r217
+%r219 = zext i480 %r218 to i512
+%r221 = getelementptr i32, i32* %r2, i32 15
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i512
+%r224 = shl i512 %r223, 480
+%r225 = or i512 %r219, %r224
+%r226 = zext i512 %r225 to i544
+%r228 = getelementptr i32, i32* %r2, i32 16
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i544
+%r231 = shl i544 %r230, 512
+%r232 = or i544 %r226, %r231
+%r233 = zext i544 %r232 to i576
+%r235 = getelementptr i32, i32* %r2, i32 17
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i576
+%r238 = shl i576 %r237, 544
+%r239 = or i576 %r233, %r238
+%r240 = zext i576 %r239 to i608
+%r242 = getelementptr i32, i32* %r2, i32 18
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i608
+%r245 = shl i608 %r244, 576
+%r246 = or i608 %r240, %r245
+%r247 = zext i608 %r246 to i640
+%r249 = getelementptr i32, i32* %r2, i32 19
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i640
+%r252 = shl i640 %r251, 608
+%r253 = or i640 %r247, %r252
+%r254 = zext i640 %r253 to i672
+%r256 = getelementptr i32, i32* %r2, i32 20
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i672
+%r259 = shl i672 %r258, 640
+%r260 = or i672 %r254, %r259
+%r261 = zext i672 %r260 to i704
+%r263 = getelementptr i32, i32* %r2, i32 21
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i704
+%r266 = shl i704 %r265, 672
+%r267 = or i704 %r261, %r266
+%r268 = zext i704 %r267 to i736
+%r270 = getelementptr i32, i32* %r2, i32 22
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i736
+%r273 = shl i736 %r272, 704
+%r274 = or i736 %r268, %r273
+%r275 = zext i736 %r274 to i768
+%r277 = getelementptr i32, i32* %r2, i32 23
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i768
+%r280 = shl i768 %r279, 736
+%r281 = or i768 %r275, %r280
+%r282 = zext i768 %r281 to i800
+%r284 = getelementptr i32, i32* %r2, i32 24
+%r285 = load i32, i32* %r284
+%r286 = zext i32 %r285 to i800
+%r287 = shl i800 %r286, 768
+%r288 = or i800 %r282, %r287
+%r289 = zext i800 %r288 to i832
+%r291 = getelementptr i32, i32* %r2, i32 25
+%r292 = load i32, i32* %r291
+%r293 = zext i32 %r292 to i832
+%r294 = shl i832 %r293, 800
+%r295 = or i832 %r289, %r294
+%r296 = zext i832 %r295 to i864
+%r298 = getelementptr i32, i32* %r2, i32 26
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i864
+%r301 = shl i864 %r300, 832
+%r302 = or i864 %r296, %r301
+%r303 = zext i864 %r302 to i896
+%r305 = getelementptr i32, i32* %r2, i32 27
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i896
+%r308 = shl i896 %r307, 864
+%r309 = or i896 %r303, %r308
+%r310 = zext i896 %r309 to i928
+%r312 = getelementptr i32, i32* %r2, i32 28
+%r313 = load i32, i32* %r312
+%r314 = zext i32 %r313 to i928
+%r315 = shl i928 %r314, 896
+%r316 = or i928 %r310, %r315
+%r317 = zext i928 %r316 to i960
+%r319 = getelementptr i32, i32* %r2, i32 29
+%r320 = load i32, i32* %r319
+%r321 = zext i32 %r320 to i960
+%r322 = shl i960 %r321, 928
+%r323 = or i960 %r317, %r322
+%r324 = zext i960 %r323 to i992
+%r326 = getelementptr i32, i32* %r2, i32 30
+%r327 = load i32, i32* %r326
+%r328 = zext i32 %r327 to i992
+%r329 = shl i992 %r328, 960
+%r330 = or i992 %r324, %r329
+%r331 = zext i992 %r330 to i1024
+%r333 = getelementptr i32, i32* %r2, i32 31
+%r334 = load i32, i32* %r333
+%r335 = zext i32 %r334 to i1024
+%r336 = shl i1024 %r335, 992
+%r337 = or i1024 %r331, %r336
+%r338 = zext i1024 %r337 to i1056
+%r340 = getelementptr i32, i32* %r2, i32 32
+%r341 = load i32, i32* %r340
+%r342 = zext i32 %r341 to i1056
+%r343 = shl i1056 %r342, 1024
+%r344 = or i1056 %r338, %r343
+%r345 = zext i1056 %r344 to i1088
+%r347 = getelementptr i32, i32* %r2, i32 33
+%r348 = load i32, i32* %r347
+%r349 = zext i32 %r348 to i1088
+%r350 = shl i1088 %r349, 1056
+%r351 = or i1088 %r345, %r350
+%r352 = zext i1088 %r351 to i1120
+%r353 = trunc i1120 %r352 to i32
+%r354 = mul i32 %r353, %r6
+%r355 = call i576 @mulPv544x32(i32* %r3, i32 %r354)
+%r356 = zext i576 %r355 to i1120
+%r357 = add i1120 %r352, %r356
+%r358 = lshr i1120 %r357, 32
+%r359 = trunc i1120 %r358 to i1088
+%r360 = trunc i1088 %r359 to i32
+%r361 = mul i32 %r360, %r6
+%r362 = call i576 @mulPv544x32(i32* %r3, i32 %r361)
+%r363 = zext i576 %r362 to i1088
+%r364 = add i1088 %r359, %r363
+%r365 = lshr i1088 %r364, 32
+%r366 = trunc i1088 %r365 to i1056
+%r367 = trunc i1056 %r366 to i32
+%r368 = mul i32 %r367, %r6
+%r369 = call i576 @mulPv544x32(i32* %r3, i32 %r368)
+%r370 = zext i576 %r369 to i1056
+%r371 = add i1056 %r366, %r370
+%r372 = lshr i1056 %r371, 32
+%r373 = trunc i1056 %r372 to i1024
+%r374 = trunc i1024 %r373 to i32
+%r375 = mul i32 %r374, %r6
+%r376 = call i576 @mulPv544x32(i32* %r3, i32 %r375)
+%r377 = zext i576 %r376 to i1024
+%r378 = add i1024 %r373, %r377
+%r379 = lshr i1024 %r378, 32
+%r380 = trunc i1024 %r379 to i992
+%r381 = trunc i992 %r380 to i32
+%r382 = mul i32 %r381, %r6
+%r383 = call i576 @mulPv544x32(i32* %r3, i32 %r382)
+%r384 = zext i576 %r383 to i992
+%r385 = add i992 %r380, %r384
+%r386 = lshr i992 %r385, 32
+%r387 = trunc i992 %r386 to i960
+%r388 = trunc i960 %r387 to i32
+%r389 = mul i32 %r388, %r6
+%r390 = call i576 @mulPv544x32(i32* %r3, i32 %r389)
+%r391 = zext i576 %r390 to i960
+%r392 = add i960 %r387, %r391
+%r393 = lshr i960 %r392, 32
+%r394 = trunc i960 %r393 to i928
+%r395 = trunc i928 %r394 to i32
+%r396 = mul i32 %r395, %r6
+%r397 = call i576 @mulPv544x32(i32* %r3, i32 %r396)
+%r398 = zext i576 %r397 to i928
+%r399 = add i928 %r394, %r398
+%r400 = lshr i928 %r399, 32
+%r401 = trunc i928 %r400 to i896
+%r402 = trunc i896 %r401 to i32
+%r403 = mul i32 %r402, %r6
+%r404 = call i576 @mulPv544x32(i32* %r3, i32 %r403)
+%r405 = zext i576 %r404 to i896
+%r406 = add i896 %r401, %r405
+%r407 = lshr i896 %r406, 32
+%r408 = trunc i896 %r407 to i864
+%r409 = trunc i864 %r408 to i32
+%r410 = mul i32 %r409, %r6
+%r411 = call i576 @mulPv544x32(i32* %r3, i32 %r410)
+%r412 = zext i576 %r411 to i864
+%r413 = add i864 %r408, %r412
+%r414 = lshr i864 %r413, 32
+%r415 = trunc i864 %r414 to i832
+%r416 = trunc i832 %r415 to i32
+%r417 = mul i32 %r416, %r6
+%r418 = call i576 @mulPv544x32(i32* %r3, i32 %r417)
+%r419 = zext i576 %r418 to i832
+%r420 = add i832 %r415, %r419
+%r421 = lshr i832 %r420, 32
+%r422 = trunc i832 %r421 to i800
+%r423 = trunc i800 %r422 to i32
+%r424 = mul i32 %r423, %r6
+%r425 = call i576 @mulPv544x32(i32* %r3, i32 %r424)
+%r426 = zext i576 %r425 to i800
+%r427 = add i800 %r422, %r426
+%r428 = lshr i800 %r427, 32
+%r429 = trunc i800 %r428 to i768
+%r430 = trunc i768 %r429 to i32
+%r431 = mul i32 %r430, %r6
+%r432 = call i576 @mulPv544x32(i32* %r3, i32 %r431)
+%r433 = zext i576 %r432 to i768
+%r434 = add i768 %r429, %r433
+%r435 = lshr i768 %r434, 32
+%r436 = trunc i768 %r435 to i736
+%r437 = trunc i736 %r436 to i32
+%r438 = mul i32 %r437, %r6
+%r439 = call i576 @mulPv544x32(i32* %r3, i32 %r438)
+%r440 = zext i576 %r439 to i736
+%r441 = add i736 %r436, %r440
+%r442 = lshr i736 %r441, 32
+%r443 = trunc i736 %r442 to i704
+%r444 = trunc i704 %r443 to i32
+%r445 = mul i32 %r444, %r6
+%r446 = call i576 @mulPv544x32(i32* %r3, i32 %r445)
+%r447 = zext i576 %r446 to i704
+%r448 = add i704 %r443, %r447
+%r449 = lshr i704 %r448, 32
+%r450 = trunc i704 %r449 to i672
+%r451 = trunc i672 %r450 to i32
+%r452 = mul i32 %r451, %r6
+%r453 = call i576 @mulPv544x32(i32* %r3, i32 %r452)
+%r454 = zext i576 %r453 to i672
+%r455 = add i672 %r450, %r454
+%r456 = lshr i672 %r455, 32
+%r457 = trunc i672 %r456 to i640
+%r458 = trunc i640 %r457 to i32
+%r459 = mul i32 %r458, %r6
+%r460 = call i576 @mulPv544x32(i32* %r3, i32 %r459)
+%r461 = zext i576 %r460 to i640
+%r462 = add i640 %r457, %r461
+%r463 = lshr i640 %r462, 32
+%r464 = trunc i640 %r463 to i608
+%r465 = trunc i608 %r464 to i32
+%r466 = mul i32 %r465, %r6
+%r467 = call i576 @mulPv544x32(i32* %r3, i32 %r466)
+%r468 = zext i576 %r467 to i608
+%r469 = add i608 %r464, %r468
+%r470 = lshr i608 %r469, 32
+%r471 = trunc i608 %r470 to i576
+%r472 = zext i544 %r119 to i576
+%r473 = sub i576 %r471, %r472
+%r474 = lshr i576 %r473, 544
+%r475 = trunc i576 %r474 to i1
+%r476 = select i1 %r475, i576 %r471, i576 %r473
+%r477 = trunc i576 %r476 to i544
+%r478 = trunc i544 %r477 to i32
+%r480 = getelementptr i32, i32* %r1, i32 0
+store i32 %r478, i32* %r480
+%r481 = lshr i544 %r477, 32
+%r482 = trunc i544 %r481 to i32
+%r484 = getelementptr i32, i32* %r1, i32 1
+store i32 %r482, i32* %r484
+%r485 = lshr i544 %r481, 32
+%r486 = trunc i544 %r485 to i32
+%r488 = getelementptr i32, i32* %r1, i32 2
+store i32 %r486, i32* %r488
+%r489 = lshr i544 %r485, 32
+%r490 = trunc i544 %r489 to i32
+%r492 = getelementptr i32, i32* %r1, i32 3
+store i32 %r490, i32* %r492
+%r493 = lshr i544 %r489, 32
+%r494 = trunc i544 %r493 to i32
+%r496 = getelementptr i32, i32* %r1, i32 4
+store i32 %r494, i32* %r496
+%r497 = lshr i544 %r493, 32
+%r498 = trunc i544 %r497 to i32
+%r500 = getelementptr i32, i32* %r1, i32 5
+store i32 %r498, i32* %r500
+%r501 = lshr i544 %r497, 32
+%r502 = trunc i544 %r501 to i32
+%r504 = getelementptr i32, i32* %r1, i32 6
+store i32 %r502, i32* %r504
+%r505 = lshr i544 %r501, 32
+%r506 = trunc i544 %r505 to i32
+%r508 = getelementptr i32, i32* %r1, i32 7
+store i32 %r506, i32* %r508
+%r509 = lshr i544 %r505, 32
+%r510 = trunc i544 %r509 to i32
+%r512 = getelementptr i32, i32* %r1, i32 8
+store i32 %r510, i32* %r512
+%r513 = lshr i544 %r509, 32
+%r514 = trunc i544 %r513 to i32
+%r516 = getelementptr i32, i32* %r1, i32 9
+store i32 %r514, i32* %r516
+%r517 = lshr i544 %r513, 32
+%r518 = trunc i544 %r517 to i32
+%r520 = getelementptr i32, i32* %r1, i32 10
+store i32 %r518, i32* %r520
+%r521 = lshr i544 %r517, 32
+%r522 = trunc i544 %r521 to i32
+%r524 = getelementptr i32, i32* %r1, i32 11
+store i32 %r522, i32* %r524
+%r525 = lshr i544 %r521, 32
+%r526 = trunc i544 %r525 to i32
+%r528 = getelementptr i32, i32* %r1, i32 12
+store i32 %r526, i32* %r528
+%r529 = lshr i544 %r525, 32
+%r530 = trunc i544 %r529 to i32
+%r532 = getelementptr i32, i32* %r1, i32 13
+store i32 %r530, i32* %r532
+%r533 = lshr i544 %r529, 32
+%r534 = trunc i544 %r533 to i32
+%r536 = getelementptr i32, i32* %r1, i32 14
+store i32 %r534, i32* %r536
+%r537 = lshr i544 %r533, 32
+%r538 = trunc i544 %r537 to i32
+%r540 = getelementptr i32, i32* %r1, i32 15
+store i32 %r538, i32* %r540
+%r541 = lshr i544 %r537, 32
+%r542 = trunc i544 %r541 to i32
+%r544 = getelementptr i32, i32* %r1, i32 16
+store i32 %r542, i32* %r544
+ret void
+}
+define i32 @mcl_fp_addPre17L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r3, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r3, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r3, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r3, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r3, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r119 = load i32, i32* %r4
+%r120 = zext i32 %r119 to i64
+%r122 = getelementptr i32, i32* %r4, i32 1
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i64
+%r125 = shl i64 %r124, 32
+%r126 = or i64 %r120, %r125
+%r127 = zext i64 %r126 to i96
+%r129 = getelementptr i32, i32* %r4, i32 2
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i96
+%r132 = shl i96 %r131, 64
+%r133 = or i96 %r127, %r132
+%r134 = zext i96 %r133 to i128
+%r136 = getelementptr i32, i32* %r4, i32 3
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i128
+%r139 = shl i128 %r138, 96
+%r140 = or i128 %r134, %r139
+%r141 = zext i128 %r140 to i160
+%r143 = getelementptr i32, i32* %r4, i32 4
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i160
+%r146 = shl i160 %r145, 128
+%r147 = or i160 %r141, %r146
+%r148 = zext i160 %r147 to i192
+%r150 = getelementptr i32, i32* %r4, i32 5
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i192
+%r153 = shl i192 %r152, 160
+%r154 = or i192 %r148, %r153
+%r155 = zext i192 %r154 to i224
+%r157 = getelementptr i32, i32* %r4, i32 6
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i224
+%r160 = shl i224 %r159, 192
+%r161 = or i224 %r155, %r160
+%r162 = zext i224 %r161 to i256
+%r164 = getelementptr i32, i32* %r4, i32 7
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i256
+%r167 = shl i256 %r166, 224
+%r168 = or i256 %r162, %r167
+%r169 = zext i256 %r168 to i288
+%r171 = getelementptr i32, i32* %r4, i32 8
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i288
+%r174 = shl i288 %r173, 256
+%r175 = or i288 %r169, %r174
+%r176 = zext i288 %r175 to i320
+%r178 = getelementptr i32, i32* %r4, i32 9
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i320
+%r181 = shl i320 %r180, 288
+%r182 = or i320 %r176, %r181
+%r183 = zext i320 %r182 to i352
+%r185 = getelementptr i32, i32* %r4, i32 10
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i352
+%r188 = shl i352 %r187, 320
+%r189 = or i352 %r183, %r188
+%r190 = zext i352 %r189 to i384
+%r192 = getelementptr i32, i32* %r4, i32 11
+%r193 = load i32, i32* %r192
+%r194 = zext i32 %r193 to i384
+%r195 = shl i384 %r194, 352
+%r196 = or i384 %r190, %r195
+%r197 = zext i384 %r196 to i416
+%r199 = getelementptr i32, i32* %r4, i32 12
+%r200 = load i32, i32* %r199
+%r201 = zext i32 %r200 to i416
+%r202 = shl i416 %r201, 384
+%r203 = or i416 %r197, %r202
+%r204 = zext i416 %r203 to i448
+%r206 = getelementptr i32, i32* %r4, i32 13
+%r207 = load i32, i32* %r206
+%r208 = zext i32 %r207 to i448
+%r209 = shl i448 %r208, 416
+%r210 = or i448 %r204, %r209
+%r211 = zext i448 %r210 to i480
+%r213 = getelementptr i32, i32* %r4, i32 14
+%r214 = load i32, i32* %r213
+%r215 = zext i32 %r214 to i480
+%r216 = shl i480 %r215, 448
+%r217 = or i480 %r211, %r216
+%r218 = zext i480 %r217 to i512
+%r220 = getelementptr i32, i32* %r4, i32 15
+%r221 = load i32, i32* %r220
+%r222 = zext i32 %r221 to i512
+%r223 = shl i512 %r222, 480
+%r224 = or i512 %r218, %r223
+%r225 = zext i512 %r224 to i544
+%r227 = getelementptr i32, i32* %r4, i32 16
+%r228 = load i32, i32* %r227
+%r229 = zext i32 %r228 to i544
+%r230 = shl i544 %r229, 512
+%r231 = or i544 %r225, %r230
+%r232 = zext i544 %r231 to i576
+%r233 = add i576 %r118, %r232
+%r234 = trunc i576 %r233 to i544
+%r235 = trunc i544 %r234 to i32
+%r237 = getelementptr i32, i32* %r2, i32 0
+store i32 %r235, i32* %r237
+%r238 = lshr i544 %r234, 32
+%r239 = trunc i544 %r238 to i32
+%r241 = getelementptr i32, i32* %r2, i32 1
+store i32 %r239, i32* %r241
+%r242 = lshr i544 %r238, 32
+%r243 = trunc i544 %r242 to i32
+%r245 = getelementptr i32, i32* %r2, i32 2
+store i32 %r243, i32* %r245
+%r246 = lshr i544 %r242, 32
+%r247 = trunc i544 %r246 to i32
+%r249 = getelementptr i32, i32* %r2, i32 3
+store i32 %r247, i32* %r249
+%r250 = lshr i544 %r246, 32
+%r251 = trunc i544 %r250 to i32
+%r253 = getelementptr i32, i32* %r2, i32 4
+store i32 %r251, i32* %r253
+%r254 = lshr i544 %r250, 32
+%r255 = trunc i544 %r254 to i32
+%r257 = getelementptr i32, i32* %r2, i32 5
+store i32 %r255, i32* %r257
+%r258 = lshr i544 %r254, 32
+%r259 = trunc i544 %r258 to i32
+%r261 = getelementptr i32, i32* %r2, i32 6
+store i32 %r259, i32* %r261
+%r262 = lshr i544 %r258, 32
+%r263 = trunc i544 %r262 to i32
+%r265 = getelementptr i32, i32* %r2, i32 7
+store i32 %r263, i32* %r265
+%r266 = lshr i544 %r262, 32
+%r267 = trunc i544 %r266 to i32
+%r269 = getelementptr i32, i32* %r2, i32 8
+store i32 %r267, i32* %r269
+%r270 = lshr i544 %r266, 32
+%r271 = trunc i544 %r270 to i32
+%r273 = getelementptr i32, i32* %r2, i32 9
+store i32 %r271, i32* %r273
+%r274 = lshr i544 %r270, 32
+%r275 = trunc i544 %r274 to i32
+%r277 = getelementptr i32, i32* %r2, i32 10
+store i32 %r275, i32* %r277
+%r278 = lshr i544 %r274, 32
+%r279 = trunc i544 %r278 to i32
+%r281 = getelementptr i32, i32* %r2, i32 11
+store i32 %r279, i32* %r281
+%r282 = lshr i544 %r278, 32
+%r283 = trunc i544 %r282 to i32
+%r285 = getelementptr i32, i32* %r2, i32 12
+store i32 %r283, i32* %r285
+%r286 = lshr i544 %r282, 32
+%r287 = trunc i544 %r286 to i32
+%r289 = getelementptr i32, i32* %r2, i32 13
+store i32 %r287, i32* %r289
+%r290 = lshr i544 %r286, 32
+%r291 = trunc i544 %r290 to i32
+%r293 = getelementptr i32, i32* %r2, i32 14
+store i32 %r291, i32* %r293
+%r294 = lshr i544 %r290, 32
+%r295 = trunc i544 %r294 to i32
+%r297 = getelementptr i32, i32* %r2, i32 15
+store i32 %r295, i32* %r297
+%r298 = lshr i544 %r294, 32
+%r299 = trunc i544 %r298 to i32
+%r301 = getelementptr i32, i32* %r2, i32 16
+store i32 %r299, i32* %r301
+%r302 = lshr i576 %r233, 544
+%r303 = trunc i576 %r302 to i32
+ret i32 %r303
+}
+define i32 @mcl_fp_subPre17L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r3, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r3, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r3, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r3, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r3, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r119 = load i32, i32* %r4
+%r120 = zext i32 %r119 to i64
+%r122 = getelementptr i32, i32* %r4, i32 1
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i64
+%r125 = shl i64 %r124, 32
+%r126 = or i64 %r120, %r125
+%r127 = zext i64 %r126 to i96
+%r129 = getelementptr i32, i32* %r4, i32 2
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i96
+%r132 = shl i96 %r131, 64
+%r133 = or i96 %r127, %r132
+%r134 = zext i96 %r133 to i128
+%r136 = getelementptr i32, i32* %r4, i32 3
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i128
+%r139 = shl i128 %r138, 96
+%r140 = or i128 %r134, %r139
+%r141 = zext i128 %r140 to i160
+%r143 = getelementptr i32, i32* %r4, i32 4
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i160
+%r146 = shl i160 %r145, 128
+%r147 = or i160 %r141, %r146
+%r148 = zext i160 %r147 to i192
+%r150 = getelementptr i32, i32* %r4, i32 5
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i192
+%r153 = shl i192 %r152, 160
+%r154 = or i192 %r148, %r153
+%r155 = zext i192 %r154 to i224
+%r157 = getelementptr i32, i32* %r4, i32 6
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i224
+%r160 = shl i224 %r159, 192
+%r161 = or i224 %r155, %r160
+%r162 = zext i224 %r161 to i256
+%r164 = getelementptr i32, i32* %r4, i32 7
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i256
+%r167 = shl i256 %r166, 224
+%r168 = or i256 %r162, %r167
+%r169 = zext i256 %r168 to i288
+%r171 = getelementptr i32, i32* %r4, i32 8
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i288
+%r174 = shl i288 %r173, 256
+%r175 = or i288 %r169, %r174
+%r176 = zext i288 %r175 to i320
+%r178 = getelementptr i32, i32* %r4, i32 9
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i320
+%r181 = shl i320 %r180, 288
+%r182 = or i320 %r176, %r181
+%r183 = zext i320 %r182 to i352
+%r185 = getelementptr i32, i32* %r4, i32 10
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i352
+%r188 = shl i352 %r187, 320
+%r189 = or i352 %r183, %r188
+%r190 = zext i352 %r189 to i384
+%r192 = getelementptr i32, i32* %r4, i32 11
+%r193 = load i32, i32* %r192
+%r194 = zext i32 %r193 to i384
+%r195 = shl i384 %r194, 352
+%r196 = or i384 %r190, %r195
+%r197 = zext i384 %r196 to i416
+%r199 = getelementptr i32, i32* %r4, i32 12
+%r200 = load i32, i32* %r199
+%r201 = zext i32 %r200 to i416
+%r202 = shl i416 %r201, 384
+%r203 = or i416 %r197, %r202
+%r204 = zext i416 %r203 to i448
+%r206 = getelementptr i32, i32* %r4, i32 13
+%r207 = load i32, i32* %r206
+%r208 = zext i32 %r207 to i448
+%r209 = shl i448 %r208, 416
+%r210 = or i448 %r204, %r209
+%r211 = zext i448 %r210 to i480
+%r213 = getelementptr i32, i32* %r4, i32 14
+%r214 = load i32, i32* %r213
+%r215 = zext i32 %r214 to i480
+%r216 = shl i480 %r215, 448
+%r217 = or i480 %r211, %r216
+%r218 = zext i480 %r217 to i512
+%r220 = getelementptr i32, i32* %r4, i32 15
+%r221 = load i32, i32* %r220
+%r222 = zext i32 %r221 to i512
+%r223 = shl i512 %r222, 480
+%r224 = or i512 %r218, %r223
+%r225 = zext i512 %r224 to i544
+%r227 = getelementptr i32, i32* %r4, i32 16
+%r228 = load i32, i32* %r227
+%r229 = zext i32 %r228 to i544
+%r230 = shl i544 %r229, 512
+%r231 = or i544 %r225, %r230
+%r232 = zext i544 %r231 to i576
+%r233 = sub i576 %r118, %r232
+%r234 = trunc i576 %r233 to i544
+%r235 = trunc i544 %r234 to i32
+%r237 = getelementptr i32, i32* %r2, i32 0
+store i32 %r235, i32* %r237
+%r238 = lshr i544 %r234, 32
+%r239 = trunc i544 %r238 to i32
+%r241 = getelementptr i32, i32* %r2, i32 1
+store i32 %r239, i32* %r241
+%r242 = lshr i544 %r238, 32
+%r243 = trunc i544 %r242 to i32
+%r245 = getelementptr i32, i32* %r2, i32 2
+store i32 %r243, i32* %r245
+%r246 = lshr i544 %r242, 32
+%r247 = trunc i544 %r246 to i32
+%r249 = getelementptr i32, i32* %r2, i32 3
+store i32 %r247, i32* %r249
+%r250 = lshr i544 %r246, 32
+%r251 = trunc i544 %r250 to i32
+%r253 = getelementptr i32, i32* %r2, i32 4
+store i32 %r251, i32* %r253
+%r254 = lshr i544 %r250, 32
+%r255 = trunc i544 %r254 to i32
+%r257 = getelementptr i32, i32* %r2, i32 5
+store i32 %r255, i32* %r257
+%r258 = lshr i544 %r254, 32
+%r259 = trunc i544 %r258 to i32
+%r261 = getelementptr i32, i32* %r2, i32 6
+store i32 %r259, i32* %r261
+%r262 = lshr i544 %r258, 32
+%r263 = trunc i544 %r262 to i32
+%r265 = getelementptr i32, i32* %r2, i32 7
+store i32 %r263, i32* %r265
+%r266 = lshr i544 %r262, 32
+%r267 = trunc i544 %r266 to i32
+%r269 = getelementptr i32, i32* %r2, i32 8
+store i32 %r267, i32* %r269
+%r270 = lshr i544 %r266, 32
+%r271 = trunc i544 %r270 to i32
+%r273 = getelementptr i32, i32* %r2, i32 9
+store i32 %r271, i32* %r273
+%r274 = lshr i544 %r270, 32
+%r275 = trunc i544 %r274 to i32
+%r277 = getelementptr i32, i32* %r2, i32 10
+store i32 %r275, i32* %r277
+%r278 = lshr i544 %r274, 32
+%r279 = trunc i544 %r278 to i32
+%r281 = getelementptr i32, i32* %r2, i32 11
+store i32 %r279, i32* %r281
+%r282 = lshr i544 %r278, 32
+%r283 = trunc i544 %r282 to i32
+%r285 = getelementptr i32, i32* %r2, i32 12
+store i32 %r283, i32* %r285
+%r286 = lshr i544 %r282, 32
+%r287 = trunc i544 %r286 to i32
+%r289 = getelementptr i32, i32* %r2, i32 13
+store i32 %r287, i32* %r289
+%r290 = lshr i544 %r286, 32
+%r291 = trunc i544 %r290 to i32
+%r293 = getelementptr i32, i32* %r2, i32 14
+store i32 %r291, i32* %r293
+%r294 = lshr i544 %r290, 32
+%r295 = trunc i544 %r294 to i32
+%r297 = getelementptr i32, i32* %r2, i32 15
+store i32 %r295, i32* %r297
+%r298 = lshr i544 %r294, 32
+%r299 = trunc i544 %r298 to i32
+%r301 = getelementptr i32, i32* %r2, i32 16
+store i32 %r299, i32* %r301
+%r302 = lshr i576 %r233, 544
+%r303 = trunc i576 %r302 to i32
+%r305 = and i32 %r303, 1
+ret i32 %r305
+}
+define void @mcl_fp_shr1_17L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = zext i256 %r52 to i288
+%r55 = getelementptr i32, i32* %r2, i32 8
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i288
+%r58 = shl i288 %r57, 256
+%r59 = or i288 %r53, %r58
+%r60 = zext i288 %r59 to i320
+%r62 = getelementptr i32, i32* %r2, i32 9
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i320
+%r65 = shl i320 %r64, 288
+%r66 = or i320 %r60, %r65
+%r67 = zext i320 %r66 to i352
+%r69 = getelementptr i32, i32* %r2, i32 10
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i352
+%r72 = shl i352 %r71, 320
+%r73 = or i352 %r67, %r72
+%r74 = zext i352 %r73 to i384
+%r76 = getelementptr i32, i32* %r2, i32 11
+%r77 = load i32, i32* %r76
+%r78 = zext i32 %r77 to i384
+%r79 = shl i384 %r78, 352
+%r80 = or i384 %r74, %r79
+%r81 = zext i384 %r80 to i416
+%r83 = getelementptr i32, i32* %r2, i32 12
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i416
+%r86 = shl i416 %r85, 384
+%r87 = or i416 %r81, %r86
+%r88 = zext i416 %r87 to i448
+%r90 = getelementptr i32, i32* %r2, i32 13
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i448
+%r93 = shl i448 %r92, 416
+%r94 = or i448 %r88, %r93
+%r95 = zext i448 %r94 to i480
+%r97 = getelementptr i32, i32* %r2, i32 14
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i480
+%r100 = shl i480 %r99, 448
+%r101 = or i480 %r95, %r100
+%r102 = zext i480 %r101 to i512
+%r104 = getelementptr i32, i32* %r2, i32 15
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i512
+%r107 = shl i512 %r106, 480
+%r108 = or i512 %r102, %r107
+%r109 = zext i512 %r108 to i544
+%r111 = getelementptr i32, i32* %r2, i32 16
+%r112 = load i32, i32* %r111
+%r113 = zext i32 %r112 to i544
+%r114 = shl i544 %r113, 512
+%r115 = or i544 %r109, %r114
+%r116 = lshr i544 %r115, 1
+%r117 = trunc i544 %r116 to i32
+%r119 = getelementptr i32, i32* %r1, i32 0
+store i32 %r117, i32* %r119
+%r120 = lshr i544 %r116, 32
+%r121 = trunc i544 %r120 to i32
+%r123 = getelementptr i32, i32* %r1, i32 1
+store i32 %r121, i32* %r123
+%r124 = lshr i544 %r120, 32
+%r125 = trunc i544 %r124 to i32
+%r127 = getelementptr i32, i32* %r1, i32 2
+store i32 %r125, i32* %r127
+%r128 = lshr i544 %r124, 32
+%r129 = trunc i544 %r128 to i32
+%r131 = getelementptr i32, i32* %r1, i32 3
+store i32 %r129, i32* %r131
+%r132 = lshr i544 %r128, 32
+%r133 = trunc i544 %r132 to i32
+%r135 = getelementptr i32, i32* %r1, i32 4
+store i32 %r133, i32* %r135
+%r136 = lshr i544 %r132, 32
+%r137 = trunc i544 %r136 to i32
+%r139 = getelementptr i32, i32* %r1, i32 5
+store i32 %r137, i32* %r139
+%r140 = lshr i544 %r136, 32
+%r141 = trunc i544 %r140 to i32
+%r143 = getelementptr i32, i32* %r1, i32 6
+store i32 %r141, i32* %r143
+%r144 = lshr i544 %r140, 32
+%r145 = trunc i544 %r144 to i32
+%r147 = getelementptr i32, i32* %r1, i32 7
+store i32 %r145, i32* %r147
+%r148 = lshr i544 %r144, 32
+%r149 = trunc i544 %r148 to i32
+%r151 = getelementptr i32, i32* %r1, i32 8
+store i32 %r149, i32* %r151
+%r152 = lshr i544 %r148, 32
+%r153 = trunc i544 %r152 to i32
+%r155 = getelementptr i32, i32* %r1, i32 9
+store i32 %r153, i32* %r155
+%r156 = lshr i544 %r152, 32
+%r157 = trunc i544 %r156 to i32
+%r159 = getelementptr i32, i32* %r1, i32 10
+store i32 %r157, i32* %r159
+%r160 = lshr i544 %r156, 32
+%r161 = trunc i544 %r160 to i32
+%r163 = getelementptr i32, i32* %r1, i32 11
+store i32 %r161, i32* %r163
+%r164 = lshr i544 %r160, 32
+%r165 = trunc i544 %r164 to i32
+%r167 = getelementptr i32, i32* %r1, i32 12
+store i32 %r165, i32* %r167
+%r168 = lshr i544 %r164, 32
+%r169 = trunc i544 %r168 to i32
+%r171 = getelementptr i32, i32* %r1, i32 13
+store i32 %r169, i32* %r171
+%r172 = lshr i544 %r168, 32
+%r173 = trunc i544 %r172 to i32
+%r175 = getelementptr i32, i32* %r1, i32 14
+store i32 %r173, i32* %r175
+%r176 = lshr i544 %r172, 32
+%r177 = trunc i544 %r176 to i32
+%r179 = getelementptr i32, i32* %r1, i32 15
+store i32 %r177, i32* %r179
+%r180 = lshr i544 %r176, 32
+%r181 = trunc i544 %r180 to i32
+%r183 = getelementptr i32, i32* %r1, i32 16
+store i32 %r181, i32* %r183
+ret void
+}
+define void @mcl_fp_add17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = load i32, i32* %r3
+%r119 = zext i32 %r118 to i64
+%r121 = getelementptr i32, i32* %r3, i32 1
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i64
+%r124 = shl i64 %r123, 32
+%r125 = or i64 %r119, %r124
+%r126 = zext i64 %r125 to i96
+%r128 = getelementptr i32, i32* %r3, i32 2
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i96
+%r131 = shl i96 %r130, 64
+%r132 = or i96 %r126, %r131
+%r133 = zext i96 %r132 to i128
+%r135 = getelementptr i32, i32* %r3, i32 3
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i128
+%r138 = shl i128 %r137, 96
+%r139 = or i128 %r133, %r138
+%r140 = zext i128 %r139 to i160
+%r142 = getelementptr i32, i32* %r3, i32 4
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i160
+%r145 = shl i160 %r144, 128
+%r146 = or i160 %r140, %r145
+%r147 = zext i160 %r146 to i192
+%r149 = getelementptr i32, i32* %r3, i32 5
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i192
+%r152 = shl i192 %r151, 160
+%r153 = or i192 %r147, %r152
+%r154 = zext i192 %r153 to i224
+%r156 = getelementptr i32, i32* %r3, i32 6
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i224
+%r159 = shl i224 %r158, 192
+%r160 = or i224 %r154, %r159
+%r161 = zext i224 %r160 to i256
+%r163 = getelementptr i32, i32* %r3, i32 7
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i256
+%r166 = shl i256 %r165, 224
+%r167 = or i256 %r161, %r166
+%r168 = zext i256 %r167 to i288
+%r170 = getelementptr i32, i32* %r3, i32 8
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i288
+%r173 = shl i288 %r172, 256
+%r174 = or i288 %r168, %r173
+%r175 = zext i288 %r174 to i320
+%r177 = getelementptr i32, i32* %r3, i32 9
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i320
+%r180 = shl i320 %r179, 288
+%r181 = or i320 %r175, %r180
+%r182 = zext i320 %r181 to i352
+%r184 = getelementptr i32, i32* %r3, i32 10
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i352
+%r187 = shl i352 %r186, 320
+%r188 = or i352 %r182, %r187
+%r189 = zext i352 %r188 to i384
+%r191 = getelementptr i32, i32* %r3, i32 11
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i384
+%r194 = shl i384 %r193, 352
+%r195 = or i384 %r189, %r194
+%r196 = zext i384 %r195 to i416
+%r198 = getelementptr i32, i32* %r3, i32 12
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i416
+%r201 = shl i416 %r200, 384
+%r202 = or i416 %r196, %r201
+%r203 = zext i416 %r202 to i448
+%r205 = getelementptr i32, i32* %r3, i32 13
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i448
+%r208 = shl i448 %r207, 416
+%r209 = or i448 %r203, %r208
+%r210 = zext i448 %r209 to i480
+%r212 = getelementptr i32, i32* %r3, i32 14
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i480
+%r215 = shl i480 %r214, 448
+%r216 = or i480 %r210, %r215
+%r217 = zext i480 %r216 to i512
+%r219 = getelementptr i32, i32* %r3, i32 15
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i512
+%r222 = shl i512 %r221, 480
+%r223 = or i512 %r217, %r222
+%r224 = zext i512 %r223 to i544
+%r226 = getelementptr i32, i32* %r3, i32 16
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i544
+%r229 = shl i544 %r228, 512
+%r230 = or i544 %r224, %r229
+%r231 = zext i544 %r117 to i576
+%r232 = zext i544 %r230 to i576
+%r233 = add i576 %r231, %r232
+%r234 = trunc i576 %r233 to i544
+%r235 = trunc i544 %r234 to i32
+%r237 = getelementptr i32, i32* %r1, i32 0
+store i32 %r235, i32* %r237
+%r238 = lshr i544 %r234, 32
+%r239 = trunc i544 %r238 to i32
+%r241 = getelementptr i32, i32* %r1, i32 1
+store i32 %r239, i32* %r241
+%r242 = lshr i544 %r238, 32
+%r243 = trunc i544 %r242 to i32
+%r245 = getelementptr i32, i32* %r1, i32 2
+store i32 %r243, i32* %r245
+%r246 = lshr i544 %r242, 32
+%r247 = trunc i544 %r246 to i32
+%r249 = getelementptr i32, i32* %r1, i32 3
+store i32 %r247, i32* %r249
+%r250 = lshr i544 %r246, 32
+%r251 = trunc i544 %r250 to i32
+%r253 = getelementptr i32, i32* %r1, i32 4
+store i32 %r251, i32* %r253
+%r254 = lshr i544 %r250, 32
+%r255 = trunc i544 %r254 to i32
+%r257 = getelementptr i32, i32* %r1, i32 5
+store i32 %r255, i32* %r257
+%r258 = lshr i544 %r254, 32
+%r259 = trunc i544 %r258 to i32
+%r261 = getelementptr i32, i32* %r1, i32 6
+store i32 %r259, i32* %r261
+%r262 = lshr i544 %r258, 32
+%r263 = trunc i544 %r262 to i32
+%r265 = getelementptr i32, i32* %r1, i32 7
+store i32 %r263, i32* %r265
+%r266 = lshr i544 %r262, 32
+%r267 = trunc i544 %r266 to i32
+%r269 = getelementptr i32, i32* %r1, i32 8
+store i32 %r267, i32* %r269
+%r270 = lshr i544 %r266, 32
+%r271 = trunc i544 %r270 to i32
+%r273 = getelementptr i32, i32* %r1, i32 9
+store i32 %r271, i32* %r273
+%r274 = lshr i544 %r270, 32
+%r275 = trunc i544 %r274 to i32
+%r277 = getelementptr i32, i32* %r1, i32 10
+store i32 %r275, i32* %r277
+%r278 = lshr i544 %r274, 32
+%r279 = trunc i544 %r278 to i32
+%r281 = getelementptr i32, i32* %r1, i32 11
+store i32 %r279, i32* %r281
+%r282 = lshr i544 %r278, 32
+%r283 = trunc i544 %r282 to i32
+%r285 = getelementptr i32, i32* %r1, i32 12
+store i32 %r283, i32* %r285
+%r286 = lshr i544 %r282, 32
+%r287 = trunc i544 %r286 to i32
+%r289 = getelementptr i32, i32* %r1, i32 13
+store i32 %r287, i32* %r289
+%r290 = lshr i544 %r286, 32
+%r291 = trunc i544 %r290 to i32
+%r293 = getelementptr i32, i32* %r1, i32 14
+store i32 %r291, i32* %r293
+%r294 = lshr i544 %r290, 32
+%r295 = trunc i544 %r294 to i32
+%r297 = getelementptr i32, i32* %r1, i32 15
+store i32 %r295, i32* %r297
+%r298 = lshr i544 %r294, 32
+%r299 = trunc i544 %r298 to i32
+%r301 = getelementptr i32, i32* %r1, i32 16
+store i32 %r299, i32* %r301
+%r302 = load i32, i32* %r4
+%r303 = zext i32 %r302 to i64
+%r305 = getelementptr i32, i32* %r4, i32 1
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i64
+%r308 = shl i64 %r307, 32
+%r309 = or i64 %r303, %r308
+%r310 = zext i64 %r309 to i96
+%r312 = getelementptr i32, i32* %r4, i32 2
+%r313 = load i32, i32* %r312
+%r314 = zext i32 %r313 to i96
+%r315 = shl i96 %r314, 64
+%r316 = or i96 %r310, %r315
+%r317 = zext i96 %r316 to i128
+%r319 = getelementptr i32, i32* %r4, i32 3
+%r320 = load i32, i32* %r319
+%r321 = zext i32 %r320 to i128
+%r322 = shl i128 %r321, 96
+%r323 = or i128 %r317, %r322
+%r324 = zext i128 %r323 to i160
+%r326 = getelementptr i32, i32* %r4, i32 4
+%r327 = load i32, i32* %r326
+%r328 = zext i32 %r327 to i160
+%r329 = shl i160 %r328, 128
+%r330 = or i160 %r324, %r329
+%r331 = zext i160 %r330 to i192
+%r333 = getelementptr i32, i32* %r4, i32 5
+%r334 = load i32, i32* %r333
+%r335 = zext i32 %r334 to i192
+%r336 = shl i192 %r335, 160
+%r337 = or i192 %r331, %r336
+%r338 = zext i192 %r337 to i224
+%r340 = getelementptr i32, i32* %r4, i32 6
+%r341 = load i32, i32* %r340
+%r342 = zext i32 %r341 to i224
+%r343 = shl i224 %r342, 192
+%r344 = or i224 %r338, %r343
+%r345 = zext i224 %r344 to i256
+%r347 = getelementptr i32, i32* %r4, i32 7
+%r348 = load i32, i32* %r347
+%r349 = zext i32 %r348 to i256
+%r350 = shl i256 %r349, 224
+%r351 = or i256 %r345, %r350
+%r352 = zext i256 %r351 to i288
+%r354 = getelementptr i32, i32* %r4, i32 8
+%r355 = load i32, i32* %r354
+%r356 = zext i32 %r355 to i288
+%r357 = shl i288 %r356, 256
+%r358 = or i288 %r352, %r357
+%r359 = zext i288 %r358 to i320
+%r361 = getelementptr i32, i32* %r4, i32 9
+%r362 = load i32, i32* %r361
+%r363 = zext i32 %r362 to i320
+%r364 = shl i320 %r363, 288
+%r365 = or i320 %r359, %r364
+%r366 = zext i320 %r365 to i352
+%r368 = getelementptr i32, i32* %r4, i32 10
+%r369 = load i32, i32* %r368
+%r370 = zext i32 %r369 to i352
+%r371 = shl i352 %r370, 320
+%r372 = or i352 %r366, %r371
+%r373 = zext i352 %r372 to i384
+%r375 = getelementptr i32, i32* %r4, i32 11
+%r376 = load i32, i32* %r375
+%r377 = zext i32 %r376 to i384
+%r378 = shl i384 %r377, 352
+%r379 = or i384 %r373, %r378
+%r380 = zext i384 %r379 to i416
+%r382 = getelementptr i32, i32* %r4, i32 12
+%r383 = load i32, i32* %r382
+%r384 = zext i32 %r383 to i416
+%r385 = shl i416 %r384, 384
+%r386 = or i416 %r380, %r385
+%r387 = zext i416 %r386 to i448
+%r389 = getelementptr i32, i32* %r4, i32 13
+%r390 = load i32, i32* %r389
+%r391 = zext i32 %r390 to i448
+%r392 = shl i448 %r391, 416
+%r393 = or i448 %r387, %r392
+%r394 = zext i448 %r393 to i480
+%r396 = getelementptr i32, i32* %r4, i32 14
+%r397 = load i32, i32* %r396
+%r398 = zext i32 %r397 to i480
+%r399 = shl i480 %r398, 448
+%r400 = or i480 %r394, %r399
+%r401 = zext i480 %r400 to i512
+%r403 = getelementptr i32, i32* %r4, i32 15
+%r404 = load i32, i32* %r403
+%r405 = zext i32 %r404 to i512
+%r406 = shl i512 %r405, 480
+%r407 = or i512 %r401, %r406
+%r408 = zext i512 %r407 to i544
+%r410 = getelementptr i32, i32* %r4, i32 16
+%r411 = load i32, i32* %r410
+%r412 = zext i32 %r411 to i544
+%r413 = shl i544 %r412, 512
+%r414 = or i544 %r408, %r413
+%r415 = zext i544 %r414 to i576
+%r416 = sub i576 %r233, %r415
+%r417 = lshr i576 %r416, 544
+%r418 = trunc i576 %r417 to i1
+br i1%r418, label %carry, label %nocarry
+nocarry:
+%r419 = trunc i576 %r416 to i544
+%r420 = trunc i544 %r419 to i32
+%r422 = getelementptr i32, i32* %r1, i32 0
+store i32 %r420, i32* %r422
+%r423 = lshr i544 %r419, 32
+%r424 = trunc i544 %r423 to i32
+%r426 = getelementptr i32, i32* %r1, i32 1
+store i32 %r424, i32* %r426
+%r427 = lshr i544 %r423, 32
+%r428 = trunc i544 %r427 to i32
+%r430 = getelementptr i32, i32* %r1, i32 2
+store i32 %r428, i32* %r430
+%r431 = lshr i544 %r427, 32
+%r432 = trunc i544 %r431 to i32
+%r434 = getelementptr i32, i32* %r1, i32 3
+store i32 %r432, i32* %r434
+%r435 = lshr i544 %r431, 32
+%r436 = trunc i544 %r435 to i32
+%r438 = getelementptr i32, i32* %r1, i32 4
+store i32 %r436, i32* %r438
+%r439 = lshr i544 %r435, 32
+%r440 = trunc i544 %r439 to i32
+%r442 = getelementptr i32, i32* %r1, i32 5
+store i32 %r440, i32* %r442
+%r443 = lshr i544 %r439, 32
+%r444 = trunc i544 %r443 to i32
+%r446 = getelementptr i32, i32* %r1, i32 6
+store i32 %r444, i32* %r446
+%r447 = lshr i544 %r443, 32
+%r448 = trunc i544 %r447 to i32
+%r450 = getelementptr i32, i32* %r1, i32 7
+store i32 %r448, i32* %r450
+%r451 = lshr i544 %r447, 32
+%r452 = trunc i544 %r451 to i32
+%r454 = getelementptr i32, i32* %r1, i32 8
+store i32 %r452, i32* %r454
+%r455 = lshr i544 %r451, 32
+%r456 = trunc i544 %r455 to i32
+%r458 = getelementptr i32, i32* %r1, i32 9
+store i32 %r456, i32* %r458
+%r459 = lshr i544 %r455, 32
+%r460 = trunc i544 %r459 to i32
+%r462 = getelementptr i32, i32* %r1, i32 10
+store i32 %r460, i32* %r462
+%r463 = lshr i544 %r459, 32
+%r464 = trunc i544 %r463 to i32
+%r466 = getelementptr i32, i32* %r1, i32 11
+store i32 %r464, i32* %r466
+%r467 = lshr i544 %r463, 32
+%r468 = trunc i544 %r467 to i32
+%r470 = getelementptr i32, i32* %r1, i32 12
+store i32 %r468, i32* %r470
+%r471 = lshr i544 %r467, 32
+%r472 = trunc i544 %r471 to i32
+%r474 = getelementptr i32, i32* %r1, i32 13
+store i32 %r472, i32* %r474
+%r475 = lshr i544 %r471, 32
+%r476 = trunc i544 %r475 to i32
+%r478 = getelementptr i32, i32* %r1, i32 14
+store i32 %r476, i32* %r478
+%r479 = lshr i544 %r475, 32
+%r480 = trunc i544 %r479 to i32
+%r482 = getelementptr i32, i32* %r1, i32 15
+store i32 %r480, i32* %r482
+%r483 = lshr i544 %r479, 32
+%r484 = trunc i544 %r483 to i32
+%r486 = getelementptr i32, i32* %r1, i32 16
+store i32 %r484, i32* %r486
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = load i32, i32* %r3
+%r119 = zext i32 %r118 to i64
+%r121 = getelementptr i32, i32* %r3, i32 1
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i64
+%r124 = shl i64 %r123, 32
+%r125 = or i64 %r119, %r124
+%r126 = zext i64 %r125 to i96
+%r128 = getelementptr i32, i32* %r3, i32 2
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i96
+%r131 = shl i96 %r130, 64
+%r132 = or i96 %r126, %r131
+%r133 = zext i96 %r132 to i128
+%r135 = getelementptr i32, i32* %r3, i32 3
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i128
+%r138 = shl i128 %r137, 96
+%r139 = or i128 %r133, %r138
+%r140 = zext i128 %r139 to i160
+%r142 = getelementptr i32, i32* %r3, i32 4
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i160
+%r145 = shl i160 %r144, 128
+%r146 = or i160 %r140, %r145
+%r147 = zext i160 %r146 to i192
+%r149 = getelementptr i32, i32* %r3, i32 5
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i192
+%r152 = shl i192 %r151, 160
+%r153 = or i192 %r147, %r152
+%r154 = zext i192 %r153 to i224
+%r156 = getelementptr i32, i32* %r3, i32 6
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i224
+%r159 = shl i224 %r158, 192
+%r160 = or i224 %r154, %r159
+%r161 = zext i224 %r160 to i256
+%r163 = getelementptr i32, i32* %r3, i32 7
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i256
+%r166 = shl i256 %r165, 224
+%r167 = or i256 %r161, %r166
+%r168 = zext i256 %r167 to i288
+%r170 = getelementptr i32, i32* %r3, i32 8
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i288
+%r173 = shl i288 %r172, 256
+%r174 = or i288 %r168, %r173
+%r175 = zext i288 %r174 to i320
+%r177 = getelementptr i32, i32* %r3, i32 9
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i320
+%r180 = shl i320 %r179, 288
+%r181 = or i320 %r175, %r180
+%r182 = zext i320 %r181 to i352
+%r184 = getelementptr i32, i32* %r3, i32 10
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i352
+%r187 = shl i352 %r186, 320
+%r188 = or i352 %r182, %r187
+%r189 = zext i352 %r188 to i384
+%r191 = getelementptr i32, i32* %r3, i32 11
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i384
+%r194 = shl i384 %r193, 352
+%r195 = or i384 %r189, %r194
+%r196 = zext i384 %r195 to i416
+%r198 = getelementptr i32, i32* %r3, i32 12
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i416
+%r201 = shl i416 %r200, 384
+%r202 = or i416 %r196, %r201
+%r203 = zext i416 %r202 to i448
+%r205 = getelementptr i32, i32* %r3, i32 13
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i448
+%r208 = shl i448 %r207, 416
+%r209 = or i448 %r203, %r208
+%r210 = zext i448 %r209 to i480
+%r212 = getelementptr i32, i32* %r3, i32 14
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i480
+%r215 = shl i480 %r214, 448
+%r216 = or i480 %r210, %r215
+%r217 = zext i480 %r216 to i512
+%r219 = getelementptr i32, i32* %r3, i32 15
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i512
+%r222 = shl i512 %r221, 480
+%r223 = or i512 %r217, %r222
+%r224 = zext i512 %r223 to i544
+%r226 = getelementptr i32, i32* %r3, i32 16
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i544
+%r229 = shl i544 %r228, 512
+%r230 = or i544 %r224, %r229
+%r231 = add i544 %r117, %r230
+%r232 = load i32, i32* %r4
+%r233 = zext i32 %r232 to i64
+%r235 = getelementptr i32, i32* %r4, i32 1
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i64
+%r238 = shl i64 %r237, 32
+%r239 = or i64 %r233, %r238
+%r240 = zext i64 %r239 to i96
+%r242 = getelementptr i32, i32* %r4, i32 2
+%r243 = load i32, i32* %r242
+%r244 = zext i32 %r243 to i96
+%r245 = shl i96 %r244, 64
+%r246 = or i96 %r240, %r245
+%r247 = zext i96 %r246 to i128
+%r249 = getelementptr i32, i32* %r4, i32 3
+%r250 = load i32, i32* %r249
+%r251 = zext i32 %r250 to i128
+%r252 = shl i128 %r251, 96
+%r253 = or i128 %r247, %r252
+%r254 = zext i128 %r253 to i160
+%r256 = getelementptr i32, i32* %r4, i32 4
+%r257 = load i32, i32* %r256
+%r258 = zext i32 %r257 to i160
+%r259 = shl i160 %r258, 128
+%r260 = or i160 %r254, %r259
+%r261 = zext i160 %r260 to i192
+%r263 = getelementptr i32, i32* %r4, i32 5
+%r264 = load i32, i32* %r263
+%r265 = zext i32 %r264 to i192
+%r266 = shl i192 %r265, 160
+%r267 = or i192 %r261, %r266
+%r268 = zext i192 %r267 to i224
+%r270 = getelementptr i32, i32* %r4, i32 6
+%r271 = load i32, i32* %r270
+%r272 = zext i32 %r271 to i224
+%r273 = shl i224 %r272, 192
+%r274 = or i224 %r268, %r273
+%r275 = zext i224 %r274 to i256
+%r277 = getelementptr i32, i32* %r4, i32 7
+%r278 = load i32, i32* %r277
+%r279 = zext i32 %r278 to i256
+%r280 = shl i256 %r279, 224
+%r281 = or i256 %r275, %r280
+%r282 = zext i256 %r281 to i288
+%r284 = getelementptr i32, i32* %r4, i32 8
+%r285 = load i32, i32* %r284
+%r286 = zext i32 %r285 to i288
+%r287 = shl i288 %r286, 256
+%r288 = or i288 %r282, %r287
+%r289 = zext i288 %r288 to i320
+%r291 = getelementptr i32, i32* %r4, i32 9
+%r292 = load i32, i32* %r291
+%r293 = zext i32 %r292 to i320
+%r294 = shl i320 %r293, 288
+%r295 = or i320 %r289, %r294
+%r296 = zext i320 %r295 to i352
+%r298 = getelementptr i32, i32* %r4, i32 10
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i352
+%r301 = shl i352 %r300, 320
+%r302 = or i352 %r296, %r301
+%r303 = zext i352 %r302 to i384
+%r305 = getelementptr i32, i32* %r4, i32 11
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i384
+%r308 = shl i384 %r307, 352
+%r309 = or i384 %r303, %r308
+%r310 = zext i384 %r309 to i416
+%r312 = getelementptr i32, i32* %r4, i32 12
+%r313 = load i32, i32* %r312
+%r314 = zext i32 %r313 to i416
+%r315 = shl i416 %r314, 384
+%r316 = or i416 %r310, %r315
+%r317 = zext i416 %r316 to i448
+%r319 = getelementptr i32, i32* %r4, i32 13
+%r320 = load i32, i32* %r319
+%r321 = zext i32 %r320 to i448
+%r322 = shl i448 %r321, 416
+%r323 = or i448 %r317, %r322
+%r324 = zext i448 %r323 to i480
+%r326 = getelementptr i32, i32* %r4, i32 14
+%r327 = load i32, i32* %r326
+%r328 = zext i32 %r327 to i480
+%r329 = shl i480 %r328, 448
+%r330 = or i480 %r324, %r329
+%r331 = zext i480 %r330 to i512
+%r333 = getelementptr i32, i32* %r4, i32 15
+%r334 = load i32, i32* %r333
+%r335 = zext i32 %r334 to i512
+%r336 = shl i512 %r335, 480
+%r337 = or i512 %r331, %r336
+%r338 = zext i512 %r337 to i544
+%r340 = getelementptr i32, i32* %r4, i32 16
+%r341 = load i32, i32* %r340
+%r342 = zext i32 %r341 to i544
+%r343 = shl i544 %r342, 512
+%r344 = or i544 %r338, %r343
+%r345 = sub i544 %r231, %r344
+%r346 = lshr i544 %r345, 543
+%r347 = trunc i544 %r346 to i1
+%r348 = select i1 %r347, i544 %r231, i544 %r345
+%r349 = trunc i544 %r348 to i32
+%r351 = getelementptr i32, i32* %r1, i32 0
+store i32 %r349, i32* %r351
+%r352 = lshr i544 %r348, 32
+%r353 = trunc i544 %r352 to i32
+%r355 = getelementptr i32, i32* %r1, i32 1
+store i32 %r353, i32* %r355
+%r356 = lshr i544 %r352, 32
+%r357 = trunc i544 %r356 to i32
+%r359 = getelementptr i32, i32* %r1, i32 2
+store i32 %r357, i32* %r359
+%r360 = lshr i544 %r356, 32
+%r361 = trunc i544 %r360 to i32
+%r363 = getelementptr i32, i32* %r1, i32 3
+store i32 %r361, i32* %r363
+%r364 = lshr i544 %r360, 32
+%r365 = trunc i544 %r364 to i32
+%r367 = getelementptr i32, i32* %r1, i32 4
+store i32 %r365, i32* %r367
+%r368 = lshr i544 %r364, 32
+%r369 = trunc i544 %r368 to i32
+%r371 = getelementptr i32, i32* %r1, i32 5
+store i32 %r369, i32* %r371
+%r372 = lshr i544 %r368, 32
+%r373 = trunc i544 %r372 to i32
+%r375 = getelementptr i32, i32* %r1, i32 6
+store i32 %r373, i32* %r375
+%r376 = lshr i544 %r372, 32
+%r377 = trunc i544 %r376 to i32
+%r379 = getelementptr i32, i32* %r1, i32 7
+store i32 %r377, i32* %r379
+%r380 = lshr i544 %r376, 32
+%r381 = trunc i544 %r380 to i32
+%r383 = getelementptr i32, i32* %r1, i32 8
+store i32 %r381, i32* %r383
+%r384 = lshr i544 %r380, 32
+%r385 = trunc i544 %r384 to i32
+%r387 = getelementptr i32, i32* %r1, i32 9
+store i32 %r385, i32* %r387
+%r388 = lshr i544 %r384, 32
+%r389 = trunc i544 %r388 to i32
+%r391 = getelementptr i32, i32* %r1, i32 10
+store i32 %r389, i32* %r391
+%r392 = lshr i544 %r388, 32
+%r393 = trunc i544 %r392 to i32
+%r395 = getelementptr i32, i32* %r1, i32 11
+store i32 %r393, i32* %r395
+%r396 = lshr i544 %r392, 32
+%r397 = trunc i544 %r396 to i32
+%r399 = getelementptr i32, i32* %r1, i32 12
+store i32 %r397, i32* %r399
+%r400 = lshr i544 %r396, 32
+%r401 = trunc i544 %r400 to i32
+%r403 = getelementptr i32, i32* %r1, i32 13
+store i32 %r401, i32* %r403
+%r404 = lshr i544 %r400, 32
+%r405 = trunc i544 %r404 to i32
+%r407 = getelementptr i32, i32* %r1, i32 14
+store i32 %r405, i32* %r407
+%r408 = lshr i544 %r404, 32
+%r409 = trunc i544 %r408 to i32
+%r411 = getelementptr i32, i32* %r1, i32 15
+store i32 %r409, i32* %r411
+%r412 = lshr i544 %r408, 32
+%r413 = trunc i544 %r412 to i32
+%r415 = getelementptr i32, i32* %r1, i32 16
+store i32 %r413, i32* %r415
+ret void
+}
+define void @mcl_fp_sub17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = load i32, i32* %r3
+%r119 = zext i32 %r118 to i64
+%r121 = getelementptr i32, i32* %r3, i32 1
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i64
+%r124 = shl i64 %r123, 32
+%r125 = or i64 %r119, %r124
+%r126 = zext i64 %r125 to i96
+%r128 = getelementptr i32, i32* %r3, i32 2
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i96
+%r131 = shl i96 %r130, 64
+%r132 = or i96 %r126, %r131
+%r133 = zext i96 %r132 to i128
+%r135 = getelementptr i32, i32* %r3, i32 3
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i128
+%r138 = shl i128 %r137, 96
+%r139 = or i128 %r133, %r138
+%r140 = zext i128 %r139 to i160
+%r142 = getelementptr i32, i32* %r3, i32 4
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i160
+%r145 = shl i160 %r144, 128
+%r146 = or i160 %r140, %r145
+%r147 = zext i160 %r146 to i192
+%r149 = getelementptr i32, i32* %r3, i32 5
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i192
+%r152 = shl i192 %r151, 160
+%r153 = or i192 %r147, %r152
+%r154 = zext i192 %r153 to i224
+%r156 = getelementptr i32, i32* %r3, i32 6
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i224
+%r159 = shl i224 %r158, 192
+%r160 = or i224 %r154, %r159
+%r161 = zext i224 %r160 to i256
+%r163 = getelementptr i32, i32* %r3, i32 7
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i256
+%r166 = shl i256 %r165, 224
+%r167 = or i256 %r161, %r166
+%r168 = zext i256 %r167 to i288
+%r170 = getelementptr i32, i32* %r3, i32 8
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i288
+%r173 = shl i288 %r172, 256
+%r174 = or i288 %r168, %r173
+%r175 = zext i288 %r174 to i320
+%r177 = getelementptr i32, i32* %r3, i32 9
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i320
+%r180 = shl i320 %r179, 288
+%r181 = or i320 %r175, %r180
+%r182 = zext i320 %r181 to i352
+%r184 = getelementptr i32, i32* %r3, i32 10
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i352
+%r187 = shl i352 %r186, 320
+%r188 = or i352 %r182, %r187
+%r189 = zext i352 %r188 to i384
+%r191 = getelementptr i32, i32* %r3, i32 11
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i384
+%r194 = shl i384 %r193, 352
+%r195 = or i384 %r189, %r194
+%r196 = zext i384 %r195 to i416
+%r198 = getelementptr i32, i32* %r3, i32 12
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i416
+%r201 = shl i416 %r200, 384
+%r202 = or i416 %r196, %r201
+%r203 = zext i416 %r202 to i448
+%r205 = getelementptr i32, i32* %r3, i32 13
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i448
+%r208 = shl i448 %r207, 416
+%r209 = or i448 %r203, %r208
+%r210 = zext i448 %r209 to i480
+%r212 = getelementptr i32, i32* %r3, i32 14
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i480
+%r215 = shl i480 %r214, 448
+%r216 = or i480 %r210, %r215
+%r217 = zext i480 %r216 to i512
+%r219 = getelementptr i32, i32* %r3, i32 15
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i512
+%r222 = shl i512 %r221, 480
+%r223 = or i512 %r217, %r222
+%r224 = zext i512 %r223 to i544
+%r226 = getelementptr i32, i32* %r3, i32 16
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i544
+%r229 = shl i544 %r228, 512
+%r230 = or i544 %r224, %r229
+%r231 = zext i544 %r117 to i576
+%r232 = zext i544 %r230 to i576
+%r233 = sub i576 %r231, %r232
+%r234 = trunc i576 %r233 to i544
+%r235 = lshr i576 %r233, 544
+%r236 = trunc i576 %r235 to i1
+%r237 = trunc i544 %r234 to i32
+%r239 = getelementptr i32, i32* %r1, i32 0
+store i32 %r237, i32* %r239
+%r240 = lshr i544 %r234, 32
+%r241 = trunc i544 %r240 to i32
+%r243 = getelementptr i32, i32* %r1, i32 1
+store i32 %r241, i32* %r243
+%r244 = lshr i544 %r240, 32
+%r245 = trunc i544 %r244 to i32
+%r247 = getelementptr i32, i32* %r1, i32 2
+store i32 %r245, i32* %r247
+%r248 = lshr i544 %r244, 32
+%r249 = trunc i544 %r248 to i32
+%r251 = getelementptr i32, i32* %r1, i32 3
+store i32 %r249, i32* %r251
+%r252 = lshr i544 %r248, 32
+%r253 = trunc i544 %r252 to i32
+%r255 = getelementptr i32, i32* %r1, i32 4
+store i32 %r253, i32* %r255
+%r256 = lshr i544 %r252, 32
+%r257 = trunc i544 %r256 to i32
+%r259 = getelementptr i32, i32* %r1, i32 5
+store i32 %r257, i32* %r259
+%r260 = lshr i544 %r256, 32
+%r261 = trunc i544 %r260 to i32
+%r263 = getelementptr i32, i32* %r1, i32 6
+store i32 %r261, i32* %r263
+%r264 = lshr i544 %r260, 32
+%r265 = trunc i544 %r264 to i32
+%r267 = getelementptr i32, i32* %r1, i32 7
+store i32 %r265, i32* %r267
+%r268 = lshr i544 %r264, 32
+%r269 = trunc i544 %r268 to i32
+%r271 = getelementptr i32, i32* %r1, i32 8
+store i32 %r269, i32* %r271
+%r272 = lshr i544 %r268, 32
+%r273 = trunc i544 %r272 to i32
+%r275 = getelementptr i32, i32* %r1, i32 9
+store i32 %r273, i32* %r275
+%r276 = lshr i544 %r272, 32
+%r277 = trunc i544 %r276 to i32
+%r279 = getelementptr i32, i32* %r1, i32 10
+store i32 %r277, i32* %r279
+%r280 = lshr i544 %r276, 32
+%r281 = trunc i544 %r280 to i32
+%r283 = getelementptr i32, i32* %r1, i32 11
+store i32 %r281, i32* %r283
+%r284 = lshr i544 %r280, 32
+%r285 = trunc i544 %r284 to i32
+%r287 = getelementptr i32, i32* %r1, i32 12
+store i32 %r285, i32* %r287
+%r288 = lshr i544 %r284, 32
+%r289 = trunc i544 %r288 to i32
+%r291 = getelementptr i32, i32* %r1, i32 13
+store i32 %r289, i32* %r291
+%r292 = lshr i544 %r288, 32
+%r293 = trunc i544 %r292 to i32
+%r295 = getelementptr i32, i32* %r1, i32 14
+store i32 %r293, i32* %r295
+%r296 = lshr i544 %r292, 32
+%r297 = trunc i544 %r296 to i32
+%r299 = getelementptr i32, i32* %r1, i32 15
+store i32 %r297, i32* %r299
+%r300 = lshr i544 %r296, 32
+%r301 = trunc i544 %r300 to i32
+%r303 = getelementptr i32, i32* %r1, i32 16
+store i32 %r301, i32* %r303
+br i1%r236, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r304 = load i32, i32* %r4
+%r305 = zext i32 %r304 to i64
+%r307 = getelementptr i32, i32* %r4, i32 1
+%r308 = load i32, i32* %r307
+%r309 = zext i32 %r308 to i64
+%r310 = shl i64 %r309, 32
+%r311 = or i64 %r305, %r310
+%r312 = zext i64 %r311 to i96
+%r314 = getelementptr i32, i32* %r4, i32 2
+%r315 = load i32, i32* %r314
+%r316 = zext i32 %r315 to i96
+%r317 = shl i96 %r316, 64
+%r318 = or i96 %r312, %r317
+%r319 = zext i96 %r318 to i128
+%r321 = getelementptr i32, i32* %r4, i32 3
+%r322 = load i32, i32* %r321
+%r323 = zext i32 %r322 to i128
+%r324 = shl i128 %r323, 96
+%r325 = or i128 %r319, %r324
+%r326 = zext i128 %r325 to i160
+%r328 = getelementptr i32, i32* %r4, i32 4
+%r329 = load i32, i32* %r328
+%r330 = zext i32 %r329 to i160
+%r331 = shl i160 %r330, 128
+%r332 = or i160 %r326, %r331
+%r333 = zext i160 %r332 to i192
+%r335 = getelementptr i32, i32* %r4, i32 5
+%r336 = load i32, i32* %r335
+%r337 = zext i32 %r336 to i192
+%r338 = shl i192 %r337, 160
+%r339 = or i192 %r333, %r338
+%r340 = zext i192 %r339 to i224
+%r342 = getelementptr i32, i32* %r4, i32 6
+%r343 = load i32, i32* %r342
+%r344 = zext i32 %r343 to i224
+%r345 = shl i224 %r344, 192
+%r346 = or i224 %r340, %r345
+%r347 = zext i224 %r346 to i256
+%r349 = getelementptr i32, i32* %r4, i32 7
+%r350 = load i32, i32* %r349
+%r351 = zext i32 %r350 to i256
+%r352 = shl i256 %r351, 224
+%r353 = or i256 %r347, %r352
+%r354 = zext i256 %r353 to i288
+%r356 = getelementptr i32, i32* %r4, i32 8
+%r357 = load i32, i32* %r356
+%r358 = zext i32 %r357 to i288
+%r359 = shl i288 %r358, 256
+%r360 = or i288 %r354, %r359
+%r361 = zext i288 %r360 to i320
+%r363 = getelementptr i32, i32* %r4, i32 9
+%r364 = load i32, i32* %r363
+%r365 = zext i32 %r364 to i320
+%r366 = shl i320 %r365, 288
+%r367 = or i320 %r361, %r366
+%r368 = zext i320 %r367 to i352
+%r370 = getelementptr i32, i32* %r4, i32 10
+%r371 = load i32, i32* %r370
+%r372 = zext i32 %r371 to i352
+%r373 = shl i352 %r372, 320
+%r374 = or i352 %r368, %r373
+%r375 = zext i352 %r374 to i384
+%r377 = getelementptr i32, i32* %r4, i32 11
+%r378 = load i32, i32* %r377
+%r379 = zext i32 %r378 to i384
+%r380 = shl i384 %r379, 352
+%r381 = or i384 %r375, %r380
+%r382 = zext i384 %r381 to i416
+%r384 = getelementptr i32, i32* %r4, i32 12
+%r385 = load i32, i32* %r384
+%r386 = zext i32 %r385 to i416
+%r387 = shl i416 %r386, 384
+%r388 = or i416 %r382, %r387
+%r389 = zext i416 %r388 to i448
+%r391 = getelementptr i32, i32* %r4, i32 13
+%r392 = load i32, i32* %r391
+%r393 = zext i32 %r392 to i448
+%r394 = shl i448 %r393, 416
+%r395 = or i448 %r389, %r394
+%r396 = zext i448 %r395 to i480
+%r398 = getelementptr i32, i32* %r4, i32 14
+%r399 = load i32, i32* %r398
+%r400 = zext i32 %r399 to i480
+%r401 = shl i480 %r400, 448
+%r402 = or i480 %r396, %r401
+%r403 = zext i480 %r402 to i512
+%r405 = getelementptr i32, i32* %r4, i32 15
+%r406 = load i32, i32* %r405
+%r407 = zext i32 %r406 to i512
+%r408 = shl i512 %r407, 480
+%r409 = or i512 %r403, %r408
+%r410 = zext i512 %r409 to i544
+%r412 = getelementptr i32, i32* %r4, i32 16
+%r413 = load i32, i32* %r412
+%r414 = zext i32 %r413 to i544
+%r415 = shl i544 %r414, 512
+%r416 = or i544 %r410, %r415
+%r417 = add i544 %r234, %r416
+%r418 = trunc i544 %r417 to i32
+%r420 = getelementptr i32, i32* %r1, i32 0
+store i32 %r418, i32* %r420
+%r421 = lshr i544 %r417, 32
+%r422 = trunc i544 %r421 to i32
+%r424 = getelementptr i32, i32* %r1, i32 1
+store i32 %r422, i32* %r424
+%r425 = lshr i544 %r421, 32
+%r426 = trunc i544 %r425 to i32
+%r428 = getelementptr i32, i32* %r1, i32 2
+store i32 %r426, i32* %r428
+%r429 = lshr i544 %r425, 32
+%r430 = trunc i544 %r429 to i32
+%r432 = getelementptr i32, i32* %r1, i32 3
+store i32 %r430, i32* %r432
+%r433 = lshr i544 %r429, 32
+%r434 = trunc i544 %r433 to i32
+%r436 = getelementptr i32, i32* %r1, i32 4
+store i32 %r434, i32* %r436
+%r437 = lshr i544 %r433, 32
+%r438 = trunc i544 %r437 to i32
+%r440 = getelementptr i32, i32* %r1, i32 5
+store i32 %r438, i32* %r440
+%r441 = lshr i544 %r437, 32
+%r442 = trunc i544 %r441 to i32
+%r444 = getelementptr i32, i32* %r1, i32 6
+store i32 %r442, i32* %r444
+%r445 = lshr i544 %r441, 32
+%r446 = trunc i544 %r445 to i32
+%r448 = getelementptr i32, i32* %r1, i32 7
+store i32 %r446, i32* %r448
+%r449 = lshr i544 %r445, 32
+%r450 = trunc i544 %r449 to i32
+%r452 = getelementptr i32, i32* %r1, i32 8
+store i32 %r450, i32* %r452
+%r453 = lshr i544 %r449, 32
+%r454 = trunc i544 %r453 to i32
+%r456 = getelementptr i32, i32* %r1, i32 9
+store i32 %r454, i32* %r456
+%r457 = lshr i544 %r453, 32
+%r458 = trunc i544 %r457 to i32
+%r460 = getelementptr i32, i32* %r1, i32 10
+store i32 %r458, i32* %r460
+%r461 = lshr i544 %r457, 32
+%r462 = trunc i544 %r461 to i32
+%r464 = getelementptr i32, i32* %r1, i32 11
+store i32 %r462, i32* %r464
+%r465 = lshr i544 %r461, 32
+%r466 = trunc i544 %r465 to i32
+%r468 = getelementptr i32, i32* %r1, i32 12
+store i32 %r466, i32* %r468
+%r469 = lshr i544 %r465, 32
+%r470 = trunc i544 %r469 to i32
+%r472 = getelementptr i32, i32* %r1, i32 13
+store i32 %r470, i32* %r472
+%r473 = lshr i544 %r469, 32
+%r474 = trunc i544 %r473 to i32
+%r476 = getelementptr i32, i32* %r1, i32 14
+store i32 %r474, i32* %r476
+%r477 = lshr i544 %r473, 32
+%r478 = trunc i544 %r477 to i32
+%r480 = getelementptr i32, i32* %r1, i32 15
+store i32 %r478, i32* %r480
+%r481 = lshr i544 %r477, 32
+%r482 = trunc i544 %r481 to i32
+%r484 = getelementptr i32, i32* %r1, i32 16
+store i32 %r482, i32* %r484
+ret void
+}
+define void @mcl_fp_subNF17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = load i32, i32* %r3
+%r119 = zext i32 %r118 to i64
+%r121 = getelementptr i32, i32* %r3, i32 1
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i64
+%r124 = shl i64 %r123, 32
+%r125 = or i64 %r119, %r124
+%r126 = zext i64 %r125 to i96
+%r128 = getelementptr i32, i32* %r3, i32 2
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i96
+%r131 = shl i96 %r130, 64
+%r132 = or i96 %r126, %r131
+%r133 = zext i96 %r132 to i128
+%r135 = getelementptr i32, i32* %r3, i32 3
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i128
+%r138 = shl i128 %r137, 96
+%r139 = or i128 %r133, %r138
+%r140 = zext i128 %r139 to i160
+%r142 = getelementptr i32, i32* %r3, i32 4
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i160
+%r145 = shl i160 %r144, 128
+%r146 = or i160 %r140, %r145
+%r147 = zext i160 %r146 to i192
+%r149 = getelementptr i32, i32* %r3, i32 5
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i192
+%r152 = shl i192 %r151, 160
+%r153 = or i192 %r147, %r152
+%r154 = zext i192 %r153 to i224
+%r156 = getelementptr i32, i32* %r3, i32 6
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i224
+%r159 = shl i224 %r158, 192
+%r160 = or i224 %r154, %r159
+%r161 = zext i224 %r160 to i256
+%r163 = getelementptr i32, i32* %r3, i32 7
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i256
+%r166 = shl i256 %r165, 224
+%r167 = or i256 %r161, %r166
+%r168 = zext i256 %r167 to i288
+%r170 = getelementptr i32, i32* %r3, i32 8
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i288
+%r173 = shl i288 %r172, 256
+%r174 = or i288 %r168, %r173
+%r175 = zext i288 %r174 to i320
+%r177 = getelementptr i32, i32* %r3, i32 9
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i320
+%r180 = shl i320 %r179, 288
+%r181 = or i320 %r175, %r180
+%r182 = zext i320 %r181 to i352
+%r184 = getelementptr i32, i32* %r3, i32 10
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i352
+%r187 = shl i352 %r186, 320
+%r188 = or i352 %r182, %r187
+%r189 = zext i352 %r188 to i384
+%r191 = getelementptr i32, i32* %r3, i32 11
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i384
+%r194 = shl i384 %r193, 352
+%r195 = or i384 %r189, %r194
+%r196 = zext i384 %r195 to i416
+%r198 = getelementptr i32, i32* %r3, i32 12
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i416
+%r201 = shl i416 %r200, 384
+%r202 = or i416 %r196, %r201
+%r203 = zext i416 %r202 to i448
+%r205 = getelementptr i32, i32* %r3, i32 13
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i448
+%r208 = shl i448 %r207, 416
+%r209 = or i448 %r203, %r208
+%r210 = zext i448 %r209 to i480
+%r212 = getelementptr i32, i32* %r3, i32 14
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i480
+%r215 = shl i480 %r214, 448
+%r216 = or i480 %r210, %r215
+%r217 = zext i480 %r216 to i512
+%r219 = getelementptr i32, i32* %r3, i32 15
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i512
+%r222 = shl i512 %r221, 480
+%r223 = or i512 %r217, %r222
+%r224 = zext i512 %r223 to i544
+%r226 = getelementptr i32, i32* %r3, i32 16
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i544
+%r229 = shl i544 %r228, 512
+%r230 = or i544 %r224, %r229
+%r231 = sub i544 %r117, %r230
+%r232 = lshr i544 %r231, 543
+%r233 = trunc i544 %r232 to i1
+%r234 = load i32, i32* %r4
+%r235 = zext i32 %r234 to i64
+%r237 = getelementptr i32, i32* %r4, i32 1
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i64
+%r240 = shl i64 %r239, 32
+%r241 = or i64 %r235, %r240
+%r242 = zext i64 %r241 to i96
+%r244 = getelementptr i32, i32* %r4, i32 2
+%r245 = load i32, i32* %r244
+%r246 = zext i32 %r245 to i96
+%r247 = shl i96 %r246, 64
+%r248 = or i96 %r242, %r247
+%r249 = zext i96 %r248 to i128
+%r251 = getelementptr i32, i32* %r4, i32 3
+%r252 = load i32, i32* %r251
+%r253 = zext i32 %r252 to i128
+%r254 = shl i128 %r253, 96
+%r255 = or i128 %r249, %r254
+%r256 = zext i128 %r255 to i160
+%r258 = getelementptr i32, i32* %r4, i32 4
+%r259 = load i32, i32* %r258
+%r260 = zext i32 %r259 to i160
+%r261 = shl i160 %r260, 128
+%r262 = or i160 %r256, %r261
+%r263 = zext i160 %r262 to i192
+%r265 = getelementptr i32, i32* %r4, i32 5
+%r266 = load i32, i32* %r265
+%r267 = zext i32 %r266 to i192
+%r268 = shl i192 %r267, 160
+%r269 = or i192 %r263, %r268
+%r270 = zext i192 %r269 to i224
+%r272 = getelementptr i32, i32* %r4, i32 6
+%r273 = load i32, i32* %r272
+%r274 = zext i32 %r273 to i224
+%r275 = shl i224 %r274, 192
+%r276 = or i224 %r270, %r275
+%r277 = zext i224 %r276 to i256
+%r279 = getelementptr i32, i32* %r4, i32 7
+%r280 = load i32, i32* %r279
+%r281 = zext i32 %r280 to i256
+%r282 = shl i256 %r281, 224
+%r283 = or i256 %r277, %r282
+%r284 = zext i256 %r283 to i288
+%r286 = getelementptr i32, i32* %r4, i32 8
+%r287 = load i32, i32* %r286
+%r288 = zext i32 %r287 to i288
+%r289 = shl i288 %r288, 256
+%r290 = or i288 %r284, %r289
+%r291 = zext i288 %r290 to i320
+%r293 = getelementptr i32, i32* %r4, i32 9
+%r294 = load i32, i32* %r293
+%r295 = zext i32 %r294 to i320
+%r296 = shl i320 %r295, 288
+%r297 = or i320 %r291, %r296
+%r298 = zext i320 %r297 to i352
+%r300 = getelementptr i32, i32* %r4, i32 10
+%r301 = load i32, i32* %r300
+%r302 = zext i32 %r301 to i352
+%r303 = shl i352 %r302, 320
+%r304 = or i352 %r298, %r303
+%r305 = zext i352 %r304 to i384
+%r307 = getelementptr i32, i32* %r4, i32 11
+%r308 = load i32, i32* %r307
+%r309 = zext i32 %r308 to i384
+%r310 = shl i384 %r309, 352
+%r311 = or i384 %r305, %r310
+%r312 = zext i384 %r311 to i416
+%r314 = getelementptr i32, i32* %r4, i32 12
+%r315 = load i32, i32* %r314
+%r316 = zext i32 %r315 to i416
+%r317 = shl i416 %r316, 384
+%r318 = or i416 %r312, %r317
+%r319 = zext i416 %r318 to i448
+%r321 = getelementptr i32, i32* %r4, i32 13
+%r322 = load i32, i32* %r321
+%r323 = zext i32 %r322 to i448
+%r324 = shl i448 %r323, 416
+%r325 = or i448 %r319, %r324
+%r326 = zext i448 %r325 to i480
+%r328 = getelementptr i32, i32* %r4, i32 14
+%r329 = load i32, i32* %r328
+%r330 = zext i32 %r329 to i480
+%r331 = shl i480 %r330, 448
+%r332 = or i480 %r326, %r331
+%r333 = zext i480 %r332 to i512
+%r335 = getelementptr i32, i32* %r4, i32 15
+%r336 = load i32, i32* %r335
+%r337 = zext i32 %r336 to i512
+%r338 = shl i512 %r337, 480
+%r339 = or i512 %r333, %r338
+%r340 = zext i512 %r339 to i544
+%r342 = getelementptr i32, i32* %r4, i32 16
+%r343 = load i32, i32* %r342
+%r344 = zext i32 %r343 to i544
+%r345 = shl i544 %r344, 512
+%r346 = or i544 %r340, %r345
+%r348 = select i1 %r233, i544 %r346, i544 0
+%r349 = add i544 %r231, %r348
+%r350 = trunc i544 %r349 to i32
+%r352 = getelementptr i32, i32* %r1, i32 0
+store i32 %r350, i32* %r352
+%r353 = lshr i544 %r349, 32
+%r354 = trunc i544 %r353 to i32
+%r356 = getelementptr i32, i32* %r1, i32 1
+store i32 %r354, i32* %r356
+%r357 = lshr i544 %r353, 32
+%r358 = trunc i544 %r357 to i32
+%r360 = getelementptr i32, i32* %r1, i32 2
+store i32 %r358, i32* %r360
+%r361 = lshr i544 %r357, 32
+%r362 = trunc i544 %r361 to i32
+%r364 = getelementptr i32, i32* %r1, i32 3
+store i32 %r362, i32* %r364
+%r365 = lshr i544 %r361, 32
+%r366 = trunc i544 %r365 to i32
+%r368 = getelementptr i32, i32* %r1, i32 4
+store i32 %r366, i32* %r368
+%r369 = lshr i544 %r365, 32
+%r370 = trunc i544 %r369 to i32
+%r372 = getelementptr i32, i32* %r1, i32 5
+store i32 %r370, i32* %r372
+%r373 = lshr i544 %r369, 32
+%r374 = trunc i544 %r373 to i32
+%r376 = getelementptr i32, i32* %r1, i32 6
+store i32 %r374, i32* %r376
+%r377 = lshr i544 %r373, 32
+%r378 = trunc i544 %r377 to i32
+%r380 = getelementptr i32, i32* %r1, i32 7
+store i32 %r378, i32* %r380
+%r381 = lshr i544 %r377, 32
+%r382 = trunc i544 %r381 to i32
+%r384 = getelementptr i32, i32* %r1, i32 8
+store i32 %r382, i32* %r384
+%r385 = lshr i544 %r381, 32
+%r386 = trunc i544 %r385 to i32
+%r388 = getelementptr i32, i32* %r1, i32 9
+store i32 %r386, i32* %r388
+%r389 = lshr i544 %r385, 32
+%r390 = trunc i544 %r389 to i32
+%r392 = getelementptr i32, i32* %r1, i32 10
+store i32 %r390, i32* %r392
+%r393 = lshr i544 %r389, 32
+%r394 = trunc i544 %r393 to i32
+%r396 = getelementptr i32, i32* %r1, i32 11
+store i32 %r394, i32* %r396
+%r397 = lshr i544 %r393, 32
+%r398 = trunc i544 %r397 to i32
+%r400 = getelementptr i32, i32* %r1, i32 12
+store i32 %r398, i32* %r400
+%r401 = lshr i544 %r397, 32
+%r402 = trunc i544 %r401 to i32
+%r404 = getelementptr i32, i32* %r1, i32 13
+store i32 %r402, i32* %r404
+%r405 = lshr i544 %r401, 32
+%r406 = trunc i544 %r405 to i32
+%r408 = getelementptr i32, i32* %r1, i32 14
+store i32 %r406, i32* %r408
+%r409 = lshr i544 %r405, 32
+%r410 = trunc i544 %r409 to i32
+%r412 = getelementptr i32, i32* %r1, i32 15
+store i32 %r410, i32* %r412
+%r413 = lshr i544 %r409, 32
+%r414 = trunc i544 %r413 to i32
+%r416 = getelementptr i32, i32* %r1, i32 16
+store i32 %r414, i32* %r416
+ret void
+}
+define void @mcl_fpDbl_add17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = zext i768 %r166 to i800
+%r169 = getelementptr i32, i32* %r2, i32 24
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i800
+%r172 = shl i800 %r171, 768
+%r173 = or i800 %r167, %r172
+%r174 = zext i800 %r173 to i832
+%r176 = getelementptr i32, i32* %r2, i32 25
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i832
+%r179 = shl i832 %r178, 800
+%r180 = or i832 %r174, %r179
+%r181 = zext i832 %r180 to i864
+%r183 = getelementptr i32, i32* %r2, i32 26
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i864
+%r186 = shl i864 %r185, 832
+%r187 = or i864 %r181, %r186
+%r188 = zext i864 %r187 to i896
+%r190 = getelementptr i32, i32* %r2, i32 27
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i896
+%r193 = shl i896 %r192, 864
+%r194 = or i896 %r188, %r193
+%r195 = zext i896 %r194 to i928
+%r197 = getelementptr i32, i32* %r2, i32 28
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i928
+%r200 = shl i928 %r199, 896
+%r201 = or i928 %r195, %r200
+%r202 = zext i928 %r201 to i960
+%r204 = getelementptr i32, i32* %r2, i32 29
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i960
+%r207 = shl i960 %r206, 928
+%r208 = or i960 %r202, %r207
+%r209 = zext i960 %r208 to i992
+%r211 = getelementptr i32, i32* %r2, i32 30
+%r212 = load i32, i32* %r211
+%r213 = zext i32 %r212 to i992
+%r214 = shl i992 %r213, 960
+%r215 = or i992 %r209, %r214
+%r216 = zext i992 %r215 to i1024
+%r218 = getelementptr i32, i32* %r2, i32 31
+%r219 = load i32, i32* %r218
+%r220 = zext i32 %r219 to i1024
+%r221 = shl i1024 %r220, 992
+%r222 = or i1024 %r216, %r221
+%r223 = zext i1024 %r222 to i1056
+%r225 = getelementptr i32, i32* %r2, i32 32
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i1056
+%r228 = shl i1056 %r227, 1024
+%r229 = or i1056 %r223, %r228
+%r230 = zext i1056 %r229 to i1088
+%r232 = getelementptr i32, i32* %r2, i32 33
+%r233 = load i32, i32* %r232
+%r234 = zext i32 %r233 to i1088
+%r235 = shl i1088 %r234, 1056
+%r236 = or i1088 %r230, %r235
+%r237 = load i32, i32* %r3
+%r238 = zext i32 %r237 to i64
+%r240 = getelementptr i32, i32* %r3, i32 1
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i64
+%r243 = shl i64 %r242, 32
+%r244 = or i64 %r238, %r243
+%r245 = zext i64 %r244 to i96
+%r247 = getelementptr i32, i32* %r3, i32 2
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i96
+%r250 = shl i96 %r249, 64
+%r251 = or i96 %r245, %r250
+%r252 = zext i96 %r251 to i128
+%r254 = getelementptr i32, i32* %r3, i32 3
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i128
+%r257 = shl i128 %r256, 96
+%r258 = or i128 %r252, %r257
+%r259 = zext i128 %r258 to i160
+%r261 = getelementptr i32, i32* %r3, i32 4
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i160
+%r264 = shl i160 %r263, 128
+%r265 = or i160 %r259, %r264
+%r266 = zext i160 %r265 to i192
+%r268 = getelementptr i32, i32* %r3, i32 5
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i192
+%r271 = shl i192 %r270, 160
+%r272 = or i192 %r266, %r271
+%r273 = zext i192 %r272 to i224
+%r275 = getelementptr i32, i32* %r3, i32 6
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i224
+%r278 = shl i224 %r277, 192
+%r279 = or i224 %r273, %r278
+%r280 = zext i224 %r279 to i256
+%r282 = getelementptr i32, i32* %r3, i32 7
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i256
+%r285 = shl i256 %r284, 224
+%r286 = or i256 %r280, %r285
+%r287 = zext i256 %r286 to i288
+%r289 = getelementptr i32, i32* %r3, i32 8
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i288
+%r292 = shl i288 %r291, 256
+%r293 = or i288 %r287, %r292
+%r294 = zext i288 %r293 to i320
+%r296 = getelementptr i32, i32* %r3, i32 9
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i320
+%r299 = shl i320 %r298, 288
+%r300 = or i320 %r294, %r299
+%r301 = zext i320 %r300 to i352
+%r303 = getelementptr i32, i32* %r3, i32 10
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i352
+%r306 = shl i352 %r305, 320
+%r307 = or i352 %r301, %r306
+%r308 = zext i352 %r307 to i384
+%r310 = getelementptr i32, i32* %r3, i32 11
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i384
+%r313 = shl i384 %r312, 352
+%r314 = or i384 %r308, %r313
+%r315 = zext i384 %r314 to i416
+%r317 = getelementptr i32, i32* %r3, i32 12
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i416
+%r320 = shl i416 %r319, 384
+%r321 = or i416 %r315, %r320
+%r322 = zext i416 %r321 to i448
+%r324 = getelementptr i32, i32* %r3, i32 13
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i448
+%r327 = shl i448 %r326, 416
+%r328 = or i448 %r322, %r327
+%r329 = zext i448 %r328 to i480
+%r331 = getelementptr i32, i32* %r3, i32 14
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i480
+%r334 = shl i480 %r333, 448
+%r335 = or i480 %r329, %r334
+%r336 = zext i480 %r335 to i512
+%r338 = getelementptr i32, i32* %r3, i32 15
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i512
+%r341 = shl i512 %r340, 480
+%r342 = or i512 %r336, %r341
+%r343 = zext i512 %r342 to i544
+%r345 = getelementptr i32, i32* %r3, i32 16
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i544
+%r348 = shl i544 %r347, 512
+%r349 = or i544 %r343, %r348
+%r350 = zext i544 %r349 to i576
+%r352 = getelementptr i32, i32* %r3, i32 17
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i576
+%r355 = shl i576 %r354, 544
+%r356 = or i576 %r350, %r355
+%r357 = zext i576 %r356 to i608
+%r359 = getelementptr i32, i32* %r3, i32 18
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i608
+%r362 = shl i608 %r361, 576
+%r363 = or i608 %r357, %r362
+%r364 = zext i608 %r363 to i640
+%r366 = getelementptr i32, i32* %r3, i32 19
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i640
+%r369 = shl i640 %r368, 608
+%r370 = or i640 %r364, %r369
+%r371 = zext i640 %r370 to i672
+%r373 = getelementptr i32, i32* %r3, i32 20
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i672
+%r376 = shl i672 %r375, 640
+%r377 = or i672 %r371, %r376
+%r378 = zext i672 %r377 to i704
+%r380 = getelementptr i32, i32* %r3, i32 21
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i704
+%r383 = shl i704 %r382, 672
+%r384 = or i704 %r378, %r383
+%r385 = zext i704 %r384 to i736
+%r387 = getelementptr i32, i32* %r3, i32 22
+%r388 = load i32, i32* %r387
+%r389 = zext i32 %r388 to i736
+%r390 = shl i736 %r389, 704
+%r391 = or i736 %r385, %r390
+%r392 = zext i736 %r391 to i768
+%r394 = getelementptr i32, i32* %r3, i32 23
+%r395 = load i32, i32* %r394
+%r396 = zext i32 %r395 to i768
+%r397 = shl i768 %r396, 736
+%r398 = or i768 %r392, %r397
+%r399 = zext i768 %r398 to i800
+%r401 = getelementptr i32, i32* %r3, i32 24
+%r402 = load i32, i32* %r401
+%r403 = zext i32 %r402 to i800
+%r404 = shl i800 %r403, 768
+%r405 = or i800 %r399, %r404
+%r406 = zext i800 %r405 to i832
+%r408 = getelementptr i32, i32* %r3, i32 25
+%r409 = load i32, i32* %r408
+%r410 = zext i32 %r409 to i832
+%r411 = shl i832 %r410, 800
+%r412 = or i832 %r406, %r411
+%r413 = zext i832 %r412 to i864
+%r415 = getelementptr i32, i32* %r3, i32 26
+%r416 = load i32, i32* %r415
+%r417 = zext i32 %r416 to i864
+%r418 = shl i864 %r417, 832
+%r419 = or i864 %r413, %r418
+%r420 = zext i864 %r419 to i896
+%r422 = getelementptr i32, i32* %r3, i32 27
+%r423 = load i32, i32* %r422
+%r424 = zext i32 %r423 to i896
+%r425 = shl i896 %r424, 864
+%r426 = or i896 %r420, %r425
+%r427 = zext i896 %r426 to i928
+%r429 = getelementptr i32, i32* %r3, i32 28
+%r430 = load i32, i32* %r429
+%r431 = zext i32 %r430 to i928
+%r432 = shl i928 %r431, 896
+%r433 = or i928 %r427, %r432
+%r434 = zext i928 %r433 to i960
+%r436 = getelementptr i32, i32* %r3, i32 29
+%r437 = load i32, i32* %r436
+%r438 = zext i32 %r437 to i960
+%r439 = shl i960 %r438, 928
+%r440 = or i960 %r434, %r439
+%r441 = zext i960 %r440 to i992
+%r443 = getelementptr i32, i32* %r3, i32 30
+%r444 = load i32, i32* %r443
+%r445 = zext i32 %r444 to i992
+%r446 = shl i992 %r445, 960
+%r447 = or i992 %r441, %r446
+%r448 = zext i992 %r447 to i1024
+%r450 = getelementptr i32, i32* %r3, i32 31
+%r451 = load i32, i32* %r450
+%r452 = zext i32 %r451 to i1024
+%r453 = shl i1024 %r452, 992
+%r454 = or i1024 %r448, %r453
+%r455 = zext i1024 %r454 to i1056
+%r457 = getelementptr i32, i32* %r3, i32 32
+%r458 = load i32, i32* %r457
+%r459 = zext i32 %r458 to i1056
+%r460 = shl i1056 %r459, 1024
+%r461 = or i1056 %r455, %r460
+%r462 = zext i1056 %r461 to i1088
+%r464 = getelementptr i32, i32* %r3, i32 33
+%r465 = load i32, i32* %r464
+%r466 = zext i32 %r465 to i1088
+%r467 = shl i1088 %r466, 1056
+%r468 = or i1088 %r462, %r467
+%r469 = zext i1088 %r236 to i1120
+%r470 = zext i1088 %r468 to i1120
+%r471 = add i1120 %r469, %r470
+%r472 = trunc i1120 %r471 to i544
+%r473 = trunc i544 %r472 to i32
+%r475 = getelementptr i32, i32* %r1, i32 0
+store i32 %r473, i32* %r475
+%r476 = lshr i544 %r472, 32
+%r477 = trunc i544 %r476 to i32
+%r479 = getelementptr i32, i32* %r1, i32 1
+store i32 %r477, i32* %r479
+%r480 = lshr i544 %r476, 32
+%r481 = trunc i544 %r480 to i32
+%r483 = getelementptr i32, i32* %r1, i32 2
+store i32 %r481, i32* %r483
+%r484 = lshr i544 %r480, 32
+%r485 = trunc i544 %r484 to i32
+%r487 = getelementptr i32, i32* %r1, i32 3
+store i32 %r485, i32* %r487
+%r488 = lshr i544 %r484, 32
+%r489 = trunc i544 %r488 to i32
+%r491 = getelementptr i32, i32* %r1, i32 4
+store i32 %r489, i32* %r491
+%r492 = lshr i544 %r488, 32
+%r493 = trunc i544 %r492 to i32
+%r495 = getelementptr i32, i32* %r1, i32 5
+store i32 %r493, i32* %r495
+%r496 = lshr i544 %r492, 32
+%r497 = trunc i544 %r496 to i32
+%r499 = getelementptr i32, i32* %r1, i32 6
+store i32 %r497, i32* %r499
+%r500 = lshr i544 %r496, 32
+%r501 = trunc i544 %r500 to i32
+%r503 = getelementptr i32, i32* %r1, i32 7
+store i32 %r501, i32* %r503
+%r504 = lshr i544 %r500, 32
+%r505 = trunc i544 %r504 to i32
+%r507 = getelementptr i32, i32* %r1, i32 8
+store i32 %r505, i32* %r507
+%r508 = lshr i544 %r504, 32
+%r509 = trunc i544 %r508 to i32
+%r511 = getelementptr i32, i32* %r1, i32 9
+store i32 %r509, i32* %r511
+%r512 = lshr i544 %r508, 32
+%r513 = trunc i544 %r512 to i32
+%r515 = getelementptr i32, i32* %r1, i32 10
+store i32 %r513, i32* %r515
+%r516 = lshr i544 %r512, 32
+%r517 = trunc i544 %r516 to i32
+%r519 = getelementptr i32, i32* %r1, i32 11
+store i32 %r517, i32* %r519
+%r520 = lshr i544 %r516, 32
+%r521 = trunc i544 %r520 to i32
+%r523 = getelementptr i32, i32* %r1, i32 12
+store i32 %r521, i32* %r523
+%r524 = lshr i544 %r520, 32
+%r525 = trunc i544 %r524 to i32
+%r527 = getelementptr i32, i32* %r1, i32 13
+store i32 %r525, i32* %r527
+%r528 = lshr i544 %r524, 32
+%r529 = trunc i544 %r528 to i32
+%r531 = getelementptr i32, i32* %r1, i32 14
+store i32 %r529, i32* %r531
+%r532 = lshr i544 %r528, 32
+%r533 = trunc i544 %r532 to i32
+%r535 = getelementptr i32, i32* %r1, i32 15
+store i32 %r533, i32* %r535
+%r536 = lshr i544 %r532, 32
+%r537 = trunc i544 %r536 to i32
+%r539 = getelementptr i32, i32* %r1, i32 16
+store i32 %r537, i32* %r539
+%r540 = lshr i1120 %r471, 544
+%r541 = trunc i1120 %r540 to i576
+%r542 = load i32, i32* %r4
+%r543 = zext i32 %r542 to i64
+%r545 = getelementptr i32, i32* %r4, i32 1
+%r546 = load i32, i32* %r545
+%r547 = zext i32 %r546 to i64
+%r548 = shl i64 %r547, 32
+%r549 = or i64 %r543, %r548
+%r550 = zext i64 %r549 to i96
+%r552 = getelementptr i32, i32* %r4, i32 2
+%r553 = load i32, i32* %r552
+%r554 = zext i32 %r553 to i96
+%r555 = shl i96 %r554, 64
+%r556 = or i96 %r550, %r555
+%r557 = zext i96 %r556 to i128
+%r559 = getelementptr i32, i32* %r4, i32 3
+%r560 = load i32, i32* %r559
+%r561 = zext i32 %r560 to i128
+%r562 = shl i128 %r561, 96
+%r563 = or i128 %r557, %r562
+%r564 = zext i128 %r563 to i160
+%r566 = getelementptr i32, i32* %r4, i32 4
+%r567 = load i32, i32* %r566
+%r568 = zext i32 %r567 to i160
+%r569 = shl i160 %r568, 128
+%r570 = or i160 %r564, %r569
+%r571 = zext i160 %r570 to i192
+%r573 = getelementptr i32, i32* %r4, i32 5
+%r574 = load i32, i32* %r573
+%r575 = zext i32 %r574 to i192
+%r576 = shl i192 %r575, 160
+%r577 = or i192 %r571, %r576
+%r578 = zext i192 %r577 to i224
+%r580 = getelementptr i32, i32* %r4, i32 6
+%r581 = load i32, i32* %r580
+%r582 = zext i32 %r581 to i224
+%r583 = shl i224 %r582, 192
+%r584 = or i224 %r578, %r583
+%r585 = zext i224 %r584 to i256
+%r587 = getelementptr i32, i32* %r4, i32 7
+%r588 = load i32, i32* %r587
+%r589 = zext i32 %r588 to i256
+%r590 = shl i256 %r589, 224
+%r591 = or i256 %r585, %r590
+%r592 = zext i256 %r591 to i288
+%r594 = getelementptr i32, i32* %r4, i32 8
+%r595 = load i32, i32* %r594
+%r596 = zext i32 %r595 to i288
+%r597 = shl i288 %r596, 256
+%r598 = or i288 %r592, %r597
+%r599 = zext i288 %r598 to i320
+%r601 = getelementptr i32, i32* %r4, i32 9
+%r602 = load i32, i32* %r601
+%r603 = zext i32 %r602 to i320
+%r604 = shl i320 %r603, 288
+%r605 = or i320 %r599, %r604
+%r606 = zext i320 %r605 to i352
+%r608 = getelementptr i32, i32* %r4, i32 10
+%r609 = load i32, i32* %r608
+%r610 = zext i32 %r609 to i352
+%r611 = shl i352 %r610, 320
+%r612 = or i352 %r606, %r611
+%r613 = zext i352 %r612 to i384
+%r615 = getelementptr i32, i32* %r4, i32 11
+%r616 = load i32, i32* %r615
+%r617 = zext i32 %r616 to i384
+%r618 = shl i384 %r617, 352
+%r619 = or i384 %r613, %r618
+%r620 = zext i384 %r619 to i416
+%r622 = getelementptr i32, i32* %r4, i32 12
+%r623 = load i32, i32* %r622
+%r624 = zext i32 %r623 to i416
+%r625 = shl i416 %r624, 384
+%r626 = or i416 %r620, %r625
+%r627 = zext i416 %r626 to i448
+%r629 = getelementptr i32, i32* %r4, i32 13
+%r630 = load i32, i32* %r629
+%r631 = zext i32 %r630 to i448
+%r632 = shl i448 %r631, 416
+%r633 = or i448 %r627, %r632
+%r634 = zext i448 %r633 to i480
+%r636 = getelementptr i32, i32* %r4, i32 14
+%r637 = load i32, i32* %r636
+%r638 = zext i32 %r637 to i480
+%r639 = shl i480 %r638, 448
+%r640 = or i480 %r634, %r639
+%r641 = zext i480 %r640 to i512
+%r643 = getelementptr i32, i32* %r4, i32 15
+%r644 = load i32, i32* %r643
+%r645 = zext i32 %r644 to i512
+%r646 = shl i512 %r645, 480
+%r647 = or i512 %r641, %r646
+%r648 = zext i512 %r647 to i544
+%r650 = getelementptr i32, i32* %r4, i32 16
+%r651 = load i32, i32* %r650
+%r652 = zext i32 %r651 to i544
+%r653 = shl i544 %r652, 512
+%r654 = or i544 %r648, %r653
+%r655 = zext i544 %r654 to i576
+%r656 = sub i576 %r541, %r655
+%r657 = lshr i576 %r656, 544
+%r658 = trunc i576 %r657 to i1
+%r659 = select i1 %r658, i576 %r541, i576 %r656
+%r660 = trunc i576 %r659 to i544
+%r662 = getelementptr i32, i32* %r1, i32 17
+%r663 = trunc i544 %r660 to i32
+%r665 = getelementptr i32, i32* %r662, i32 0
+store i32 %r663, i32* %r665
+%r666 = lshr i544 %r660, 32
+%r667 = trunc i544 %r666 to i32
+%r669 = getelementptr i32, i32* %r662, i32 1
+store i32 %r667, i32* %r669
+%r670 = lshr i544 %r666, 32
+%r671 = trunc i544 %r670 to i32
+%r673 = getelementptr i32, i32* %r662, i32 2
+store i32 %r671, i32* %r673
+%r674 = lshr i544 %r670, 32
+%r675 = trunc i544 %r674 to i32
+%r677 = getelementptr i32, i32* %r662, i32 3
+store i32 %r675, i32* %r677
+%r678 = lshr i544 %r674, 32
+%r679 = trunc i544 %r678 to i32
+%r681 = getelementptr i32, i32* %r662, i32 4
+store i32 %r679, i32* %r681
+%r682 = lshr i544 %r678, 32
+%r683 = trunc i544 %r682 to i32
+%r685 = getelementptr i32, i32* %r662, i32 5
+store i32 %r683, i32* %r685
+%r686 = lshr i544 %r682, 32
+%r687 = trunc i544 %r686 to i32
+%r689 = getelementptr i32, i32* %r662, i32 6
+store i32 %r687, i32* %r689
+%r690 = lshr i544 %r686, 32
+%r691 = trunc i544 %r690 to i32
+%r693 = getelementptr i32, i32* %r662, i32 7
+store i32 %r691, i32* %r693
+%r694 = lshr i544 %r690, 32
+%r695 = trunc i544 %r694 to i32
+%r697 = getelementptr i32, i32* %r662, i32 8
+store i32 %r695, i32* %r697
+%r698 = lshr i544 %r694, 32
+%r699 = trunc i544 %r698 to i32
+%r701 = getelementptr i32, i32* %r662, i32 9
+store i32 %r699, i32* %r701
+%r702 = lshr i544 %r698, 32
+%r703 = trunc i544 %r702 to i32
+%r705 = getelementptr i32, i32* %r662, i32 10
+store i32 %r703, i32* %r705
+%r706 = lshr i544 %r702, 32
+%r707 = trunc i544 %r706 to i32
+%r709 = getelementptr i32, i32* %r662, i32 11
+store i32 %r707, i32* %r709
+%r710 = lshr i544 %r706, 32
+%r711 = trunc i544 %r710 to i32
+%r713 = getelementptr i32, i32* %r662, i32 12
+store i32 %r711, i32* %r713
+%r714 = lshr i544 %r710, 32
+%r715 = trunc i544 %r714 to i32
+%r717 = getelementptr i32, i32* %r662, i32 13
+store i32 %r715, i32* %r717
+%r718 = lshr i544 %r714, 32
+%r719 = trunc i544 %r718 to i32
+%r721 = getelementptr i32, i32* %r662, i32 14
+store i32 %r719, i32* %r721
+%r722 = lshr i544 %r718, 32
+%r723 = trunc i544 %r722 to i32
+%r725 = getelementptr i32, i32* %r662, i32 15
+store i32 %r723, i32* %r725
+%r726 = lshr i544 %r722, 32
+%r727 = trunc i544 %r726 to i32
+%r729 = getelementptr i32, i32* %r662, i32 16
+store i32 %r727, i32* %r729
+ret void
+}
+define void @mcl_fpDbl_sub17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = zext i768 %r166 to i800
+%r169 = getelementptr i32, i32* %r2, i32 24
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i800
+%r172 = shl i800 %r171, 768
+%r173 = or i800 %r167, %r172
+%r174 = zext i800 %r173 to i832
+%r176 = getelementptr i32, i32* %r2, i32 25
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i832
+%r179 = shl i832 %r178, 800
+%r180 = or i832 %r174, %r179
+%r181 = zext i832 %r180 to i864
+%r183 = getelementptr i32, i32* %r2, i32 26
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i864
+%r186 = shl i864 %r185, 832
+%r187 = or i864 %r181, %r186
+%r188 = zext i864 %r187 to i896
+%r190 = getelementptr i32, i32* %r2, i32 27
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i896
+%r193 = shl i896 %r192, 864
+%r194 = or i896 %r188, %r193
+%r195 = zext i896 %r194 to i928
+%r197 = getelementptr i32, i32* %r2, i32 28
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i928
+%r200 = shl i928 %r199, 896
+%r201 = or i928 %r195, %r200
+%r202 = zext i928 %r201 to i960
+%r204 = getelementptr i32, i32* %r2, i32 29
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i960
+%r207 = shl i960 %r206, 928
+%r208 = or i960 %r202, %r207
+%r209 = zext i960 %r208 to i992
+%r211 = getelementptr i32, i32* %r2, i32 30
+%r212 = load i32, i32* %r211
+%r213 = zext i32 %r212 to i992
+%r214 = shl i992 %r213, 960
+%r215 = or i992 %r209, %r214
+%r216 = zext i992 %r215 to i1024
+%r218 = getelementptr i32, i32* %r2, i32 31
+%r219 = load i32, i32* %r218
+%r220 = zext i32 %r219 to i1024
+%r221 = shl i1024 %r220, 992
+%r222 = or i1024 %r216, %r221
+%r223 = zext i1024 %r222 to i1056
+%r225 = getelementptr i32, i32* %r2, i32 32
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i1056
+%r228 = shl i1056 %r227, 1024
+%r229 = or i1056 %r223, %r228
+%r230 = zext i1056 %r229 to i1088
+%r232 = getelementptr i32, i32* %r2, i32 33
+%r233 = load i32, i32* %r232
+%r234 = zext i32 %r233 to i1088
+%r235 = shl i1088 %r234, 1056
+%r236 = or i1088 %r230, %r235
+%r237 = load i32, i32* %r3
+%r238 = zext i32 %r237 to i64
+%r240 = getelementptr i32, i32* %r3, i32 1
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i64
+%r243 = shl i64 %r242, 32
+%r244 = or i64 %r238, %r243
+%r245 = zext i64 %r244 to i96
+%r247 = getelementptr i32, i32* %r3, i32 2
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i96
+%r250 = shl i96 %r249, 64
+%r251 = or i96 %r245, %r250
+%r252 = zext i96 %r251 to i128
+%r254 = getelementptr i32, i32* %r3, i32 3
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i128
+%r257 = shl i128 %r256, 96
+%r258 = or i128 %r252, %r257
+%r259 = zext i128 %r258 to i160
+%r261 = getelementptr i32, i32* %r3, i32 4
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i160
+%r264 = shl i160 %r263, 128
+%r265 = or i160 %r259, %r264
+%r266 = zext i160 %r265 to i192
+%r268 = getelementptr i32, i32* %r3, i32 5
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i192
+%r271 = shl i192 %r270, 160
+%r272 = or i192 %r266, %r271
+%r273 = zext i192 %r272 to i224
+%r275 = getelementptr i32, i32* %r3, i32 6
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i224
+%r278 = shl i224 %r277, 192
+%r279 = or i224 %r273, %r278
+%r280 = zext i224 %r279 to i256
+%r282 = getelementptr i32, i32* %r3, i32 7
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i256
+%r285 = shl i256 %r284, 224
+%r286 = or i256 %r280, %r285
+%r287 = zext i256 %r286 to i288
+%r289 = getelementptr i32, i32* %r3, i32 8
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i288
+%r292 = shl i288 %r291, 256
+%r293 = or i288 %r287, %r292
+%r294 = zext i288 %r293 to i320
+%r296 = getelementptr i32, i32* %r3, i32 9
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i320
+%r299 = shl i320 %r298, 288
+%r300 = or i320 %r294, %r299
+%r301 = zext i320 %r300 to i352
+%r303 = getelementptr i32, i32* %r3, i32 10
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i352
+%r306 = shl i352 %r305, 320
+%r307 = or i352 %r301, %r306
+%r308 = zext i352 %r307 to i384
+%r310 = getelementptr i32, i32* %r3, i32 11
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i384
+%r313 = shl i384 %r312, 352
+%r314 = or i384 %r308, %r313
+%r315 = zext i384 %r314 to i416
+%r317 = getelementptr i32, i32* %r3, i32 12
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i416
+%r320 = shl i416 %r319, 384
+%r321 = or i416 %r315, %r320
+%r322 = zext i416 %r321 to i448
+%r324 = getelementptr i32, i32* %r3, i32 13
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i448
+%r327 = shl i448 %r326, 416
+%r328 = or i448 %r322, %r327
+%r329 = zext i448 %r328 to i480
+%r331 = getelementptr i32, i32* %r3, i32 14
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i480
+%r334 = shl i480 %r333, 448
+%r335 = or i480 %r329, %r334
+%r336 = zext i480 %r335 to i512
+%r338 = getelementptr i32, i32* %r3, i32 15
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i512
+%r341 = shl i512 %r340, 480
+%r342 = or i512 %r336, %r341
+%r343 = zext i512 %r342 to i544
+%r345 = getelementptr i32, i32* %r3, i32 16
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i544
+%r348 = shl i544 %r347, 512
+%r349 = or i544 %r343, %r348
+%r350 = zext i544 %r349 to i576
+%r352 = getelementptr i32, i32* %r3, i32 17
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i576
+%r355 = shl i576 %r354, 544
+%r356 = or i576 %r350, %r355
+%r357 = zext i576 %r356 to i608
+%r359 = getelementptr i32, i32* %r3, i32 18
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i608
+%r362 = shl i608 %r361, 576
+%r363 = or i608 %r357, %r362
+%r364 = zext i608 %r363 to i640
+%r366 = getelementptr i32, i32* %r3, i32 19
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i640
+%r369 = shl i640 %r368, 608
+%r370 = or i640 %r364, %r369
+%r371 = zext i640 %r370 to i672
+%r373 = getelementptr i32, i32* %r3, i32 20
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i672
+%r376 = shl i672 %r375, 640
+%r377 = or i672 %r371, %r376
+%r378 = zext i672 %r377 to i704
+%r380 = getelementptr i32, i32* %r3, i32 21
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i704
+%r383 = shl i704 %r382, 672
+%r384 = or i704 %r378, %r383
+%r385 = zext i704 %r384 to i736
+%r387 = getelementptr i32, i32* %r3, i32 22
+%r388 = load i32, i32* %r387
+%r389 = zext i32 %r388 to i736
+%r390 = shl i736 %r389, 704
+%r391 = or i736 %r385, %r390
+%r392 = zext i736 %r391 to i768
+%r394 = getelementptr i32, i32* %r3, i32 23
+%r395 = load i32, i32* %r394
+%r396 = zext i32 %r395 to i768
+%r397 = shl i768 %r396, 736
+%r398 = or i768 %r392, %r397
+%r399 = zext i768 %r398 to i800
+%r401 = getelementptr i32, i32* %r3, i32 24
+%r402 = load i32, i32* %r401
+%r403 = zext i32 %r402 to i800
+%r404 = shl i800 %r403, 768
+%r405 = or i800 %r399, %r404
+%r406 = zext i800 %r405 to i832
+%r408 = getelementptr i32, i32* %r3, i32 25
+%r409 = load i32, i32* %r408
+%r410 = zext i32 %r409 to i832
+%r411 = shl i832 %r410, 800
+%r412 = or i832 %r406, %r411
+%r413 = zext i832 %r412 to i864
+%r415 = getelementptr i32, i32* %r3, i32 26
+%r416 = load i32, i32* %r415
+%r417 = zext i32 %r416 to i864
+%r418 = shl i864 %r417, 832
+%r419 = or i864 %r413, %r418
+%r420 = zext i864 %r419 to i896
+%r422 = getelementptr i32, i32* %r3, i32 27
+%r423 = load i32, i32* %r422
+%r424 = zext i32 %r423 to i896
+%r425 = shl i896 %r424, 864
+%r426 = or i896 %r420, %r425
+%r427 = zext i896 %r426 to i928
+%r429 = getelementptr i32, i32* %r3, i32 28
+%r430 = load i32, i32* %r429
+%r431 = zext i32 %r430 to i928
+%r432 = shl i928 %r431, 896
+%r433 = or i928 %r427, %r432
+%r434 = zext i928 %r433 to i960
+%r436 = getelementptr i32, i32* %r3, i32 29
+%r437 = load i32, i32* %r436
+%r438 = zext i32 %r437 to i960
+%r439 = shl i960 %r438, 928
+%r440 = or i960 %r434, %r439
+%r441 = zext i960 %r440 to i992
+%r443 = getelementptr i32, i32* %r3, i32 30
+%r444 = load i32, i32* %r443
+%r445 = zext i32 %r444 to i992
+%r446 = shl i992 %r445, 960
+%r447 = or i992 %r441, %r446
+%r448 = zext i992 %r447 to i1024
+%r450 = getelementptr i32, i32* %r3, i32 31
+%r451 = load i32, i32* %r450
+%r452 = zext i32 %r451 to i1024
+%r453 = shl i1024 %r452, 992
+%r454 = or i1024 %r448, %r453
+%r455 = zext i1024 %r454 to i1056
+%r457 = getelementptr i32, i32* %r3, i32 32
+%r458 = load i32, i32* %r457
+%r459 = zext i32 %r458 to i1056
+%r460 = shl i1056 %r459, 1024
+%r461 = or i1056 %r455, %r460
+%r462 = zext i1056 %r461 to i1088
+%r464 = getelementptr i32, i32* %r3, i32 33
+%r465 = load i32, i32* %r464
+%r466 = zext i32 %r465 to i1088
+%r467 = shl i1088 %r466, 1056
+%r468 = or i1088 %r462, %r467
+%r469 = zext i1088 %r236 to i1120
+%r470 = zext i1088 %r468 to i1120
+%r471 = sub i1120 %r469, %r470
+%r472 = trunc i1120 %r471 to i544
+%r473 = trunc i544 %r472 to i32
+%r475 = getelementptr i32, i32* %r1, i32 0
+store i32 %r473, i32* %r475
+%r476 = lshr i544 %r472, 32
+%r477 = trunc i544 %r476 to i32
+%r479 = getelementptr i32, i32* %r1, i32 1
+store i32 %r477, i32* %r479
+%r480 = lshr i544 %r476, 32
+%r481 = trunc i544 %r480 to i32
+%r483 = getelementptr i32, i32* %r1, i32 2
+store i32 %r481, i32* %r483
+%r484 = lshr i544 %r480, 32
+%r485 = trunc i544 %r484 to i32
+%r487 = getelementptr i32, i32* %r1, i32 3
+store i32 %r485, i32* %r487
+%r488 = lshr i544 %r484, 32
+%r489 = trunc i544 %r488 to i32
+%r491 = getelementptr i32, i32* %r1, i32 4
+store i32 %r489, i32* %r491
+%r492 = lshr i544 %r488, 32
+%r493 = trunc i544 %r492 to i32
+%r495 = getelementptr i32, i32* %r1, i32 5
+store i32 %r493, i32* %r495
+%r496 = lshr i544 %r492, 32
+%r497 = trunc i544 %r496 to i32
+%r499 = getelementptr i32, i32* %r1, i32 6
+store i32 %r497, i32* %r499
+%r500 = lshr i544 %r496, 32
+%r501 = trunc i544 %r500 to i32
+%r503 = getelementptr i32, i32* %r1, i32 7
+store i32 %r501, i32* %r503
+%r504 = lshr i544 %r500, 32
+%r505 = trunc i544 %r504 to i32
+%r507 = getelementptr i32, i32* %r1, i32 8
+store i32 %r505, i32* %r507
+%r508 = lshr i544 %r504, 32
+%r509 = trunc i544 %r508 to i32
+%r511 = getelementptr i32, i32* %r1, i32 9
+store i32 %r509, i32* %r511
+%r512 = lshr i544 %r508, 32
+%r513 = trunc i544 %r512 to i32
+%r515 = getelementptr i32, i32* %r1, i32 10
+store i32 %r513, i32* %r515
+%r516 = lshr i544 %r512, 32
+%r517 = trunc i544 %r516 to i32
+%r519 = getelementptr i32, i32* %r1, i32 11
+store i32 %r517, i32* %r519
+%r520 = lshr i544 %r516, 32
+%r521 = trunc i544 %r520 to i32
+%r523 = getelementptr i32, i32* %r1, i32 12
+store i32 %r521, i32* %r523
+%r524 = lshr i544 %r520, 32
+%r525 = trunc i544 %r524 to i32
+%r527 = getelementptr i32, i32* %r1, i32 13
+store i32 %r525, i32* %r527
+%r528 = lshr i544 %r524, 32
+%r529 = trunc i544 %r528 to i32
+%r531 = getelementptr i32, i32* %r1, i32 14
+store i32 %r529, i32* %r531
+%r532 = lshr i544 %r528, 32
+%r533 = trunc i544 %r532 to i32
+%r535 = getelementptr i32, i32* %r1, i32 15
+store i32 %r533, i32* %r535
+%r536 = lshr i544 %r532, 32
+%r537 = trunc i544 %r536 to i32
+%r539 = getelementptr i32, i32* %r1, i32 16
+store i32 %r537, i32* %r539
+%r540 = lshr i1120 %r471, 544
+%r541 = trunc i1120 %r540 to i544
+%r542 = lshr i1120 %r471, 1088
+%r543 = trunc i1120 %r542 to i1
+%r544 = load i32, i32* %r4
+%r545 = zext i32 %r544 to i64
+%r547 = getelementptr i32, i32* %r4, i32 1
+%r548 = load i32, i32* %r547
+%r549 = zext i32 %r548 to i64
+%r550 = shl i64 %r549, 32
+%r551 = or i64 %r545, %r550
+%r552 = zext i64 %r551 to i96
+%r554 = getelementptr i32, i32* %r4, i32 2
+%r555 = load i32, i32* %r554
+%r556 = zext i32 %r555 to i96
+%r557 = shl i96 %r556, 64
+%r558 = or i96 %r552, %r557
+%r559 = zext i96 %r558 to i128
+%r561 = getelementptr i32, i32* %r4, i32 3
+%r562 = load i32, i32* %r561
+%r563 = zext i32 %r562 to i128
+%r564 = shl i128 %r563, 96
+%r565 = or i128 %r559, %r564
+%r566 = zext i128 %r565 to i160
+%r568 = getelementptr i32, i32* %r4, i32 4
+%r569 = load i32, i32* %r568
+%r570 = zext i32 %r569 to i160
+%r571 = shl i160 %r570, 128
+%r572 = or i160 %r566, %r571
+%r573 = zext i160 %r572 to i192
+%r575 = getelementptr i32, i32* %r4, i32 5
+%r576 = load i32, i32* %r575
+%r577 = zext i32 %r576 to i192
+%r578 = shl i192 %r577, 160
+%r579 = or i192 %r573, %r578
+%r580 = zext i192 %r579 to i224
+%r582 = getelementptr i32, i32* %r4, i32 6
+%r583 = load i32, i32* %r582
+%r584 = zext i32 %r583 to i224
+%r585 = shl i224 %r584, 192
+%r586 = or i224 %r580, %r585
+%r587 = zext i224 %r586 to i256
+%r589 = getelementptr i32, i32* %r4, i32 7
+%r590 = load i32, i32* %r589
+%r591 = zext i32 %r590 to i256
+%r592 = shl i256 %r591, 224
+%r593 = or i256 %r587, %r592
+%r594 = zext i256 %r593 to i288
+%r596 = getelementptr i32, i32* %r4, i32 8
+%r597 = load i32, i32* %r596
+%r598 = zext i32 %r597 to i288
+%r599 = shl i288 %r598, 256
+%r600 = or i288 %r594, %r599
+%r601 = zext i288 %r600 to i320
+%r603 = getelementptr i32, i32* %r4, i32 9
+%r604 = load i32, i32* %r603
+%r605 = zext i32 %r604 to i320
+%r606 = shl i320 %r605, 288
+%r607 = or i320 %r601, %r606
+%r608 = zext i320 %r607 to i352
+%r610 = getelementptr i32, i32* %r4, i32 10
+%r611 = load i32, i32* %r610
+%r612 = zext i32 %r611 to i352
+%r613 = shl i352 %r612, 320
+%r614 = or i352 %r608, %r613
+%r615 = zext i352 %r614 to i384
+%r617 = getelementptr i32, i32* %r4, i32 11
+%r618 = load i32, i32* %r617
+%r619 = zext i32 %r618 to i384
+%r620 = shl i384 %r619, 352
+%r621 = or i384 %r615, %r620
+%r622 = zext i384 %r621 to i416
+%r624 = getelementptr i32, i32* %r4, i32 12
+%r625 = load i32, i32* %r624
+%r626 = zext i32 %r625 to i416
+%r627 = shl i416 %r626, 384
+%r628 = or i416 %r622, %r627
+%r629 = zext i416 %r628 to i448
+%r631 = getelementptr i32, i32* %r4, i32 13
+%r632 = load i32, i32* %r631
+%r633 = zext i32 %r632 to i448
+%r634 = shl i448 %r633, 416
+%r635 = or i448 %r629, %r634
+%r636 = zext i448 %r635 to i480
+%r638 = getelementptr i32, i32* %r4, i32 14
+%r639 = load i32, i32* %r638
+%r640 = zext i32 %r639 to i480
+%r641 = shl i480 %r640, 448
+%r642 = or i480 %r636, %r641
+%r643 = zext i480 %r642 to i512
+%r645 = getelementptr i32, i32* %r4, i32 15
+%r646 = load i32, i32* %r645
+%r647 = zext i32 %r646 to i512
+%r648 = shl i512 %r647, 480
+%r649 = or i512 %r643, %r648
+%r650 = zext i512 %r649 to i544
+%r652 = getelementptr i32, i32* %r4, i32 16
+%r653 = load i32, i32* %r652
+%r654 = zext i32 %r653 to i544
+%r655 = shl i544 %r654, 512
+%r656 = or i544 %r650, %r655
+%r658 = select i1 %r543, i544 %r656, i544 0
+%r659 = add i544 %r541, %r658
+%r661 = getelementptr i32, i32* %r1, i32 17
+%r662 = trunc i544 %r659 to i32
+%r664 = getelementptr i32, i32* %r661, i32 0
+store i32 %r662, i32* %r664
+%r665 = lshr i544 %r659, 32
+%r666 = trunc i544 %r665 to i32
+%r668 = getelementptr i32, i32* %r661, i32 1
+store i32 %r666, i32* %r668
+%r669 = lshr i544 %r665, 32
+%r670 = trunc i544 %r669 to i32
+%r672 = getelementptr i32, i32* %r661, i32 2
+store i32 %r670, i32* %r672
+%r673 = lshr i544 %r669, 32
+%r674 = trunc i544 %r673 to i32
+%r676 = getelementptr i32, i32* %r661, i32 3
+store i32 %r674, i32* %r676
+%r677 = lshr i544 %r673, 32
+%r678 = trunc i544 %r677 to i32
+%r680 = getelementptr i32, i32* %r661, i32 4
+store i32 %r678, i32* %r680
+%r681 = lshr i544 %r677, 32
+%r682 = trunc i544 %r681 to i32
+%r684 = getelementptr i32, i32* %r661, i32 5
+store i32 %r682, i32* %r684
+%r685 = lshr i544 %r681, 32
+%r686 = trunc i544 %r685 to i32
+%r688 = getelementptr i32, i32* %r661, i32 6
+store i32 %r686, i32* %r688
+%r689 = lshr i544 %r685, 32
+%r690 = trunc i544 %r689 to i32
+%r692 = getelementptr i32, i32* %r661, i32 7
+store i32 %r690, i32* %r692
+%r693 = lshr i544 %r689, 32
+%r694 = trunc i544 %r693 to i32
+%r696 = getelementptr i32, i32* %r661, i32 8
+store i32 %r694, i32* %r696
+%r697 = lshr i544 %r693, 32
+%r698 = trunc i544 %r697 to i32
+%r700 = getelementptr i32, i32* %r661, i32 9
+store i32 %r698, i32* %r700
+%r701 = lshr i544 %r697, 32
+%r702 = trunc i544 %r701 to i32
+%r704 = getelementptr i32, i32* %r661, i32 10
+store i32 %r702, i32* %r704
+%r705 = lshr i544 %r701, 32
+%r706 = trunc i544 %r705 to i32
+%r708 = getelementptr i32, i32* %r661, i32 11
+store i32 %r706, i32* %r708
+%r709 = lshr i544 %r705, 32
+%r710 = trunc i544 %r709 to i32
+%r712 = getelementptr i32, i32* %r661, i32 12
+store i32 %r710, i32* %r712
+%r713 = lshr i544 %r709, 32
+%r714 = trunc i544 %r713 to i32
+%r716 = getelementptr i32, i32* %r661, i32 13
+store i32 %r714, i32* %r716
+%r717 = lshr i544 %r713, 32
+%r718 = trunc i544 %r717 to i32
+%r720 = getelementptr i32, i32* %r661, i32 14
+store i32 %r718, i32* %r720
+%r721 = lshr i544 %r717, 32
+%r722 = trunc i544 %r721 to i32
+%r724 = getelementptr i32, i32* %r661, i32 15
+store i32 %r722, i32* %r724
+%r725 = lshr i544 %r721, 32
+%r726 = trunc i544 %r725 to i32
+%r728 = getelementptr i32, i32* %r661, i32 16
+store i32 %r726, i32* %r728
+ret void
+}
diff --git a/src/base64.ll b/src/base64.ll
new file mode 100644
index 00000000..e64ee12a
--- /dev/null
+++ b/src/base64.ll
@@ -0,0 +1,15383 @@
+define private i128 @mul64x64L(i64 %r2, i64 %r3)
+{
+%r4 = zext i64 %r2 to i128
+%r5 = zext i64 %r3 to i128
+%r6 = mul i128 %r4, %r5
+ret i128 %r6
+}
+define private i64 @extractHigh64(i128 %r2)
+{
+%r3 = lshr i128 %r2, 64
+%r4 = trunc i128 %r3 to i64
+ret i64 %r4
+}
+define private i128 @mulPos64x64(i64* noalias  %r2, i64 %r3, i64 %r4)
+{
+%r5 = getelementptr i64, i64* %r2, i64 %r4
+%r6 = load i64, i64* %r5
+%r7 = call i128 @mul64x64L(i64 %r6, i64 %r3)
+ret i128 %r7
+}
+define i192 @makeNIST_P192L()
+{
+%r8 = sub i64 0, 1
+%r9 = sub i64 0, 2
+%r10 = sub i64 0, 1
+%r11 = zext i64 %r8 to i192
+%r12 = zext i64 %r9 to i192
+%r13 = zext i64 %r10 to i192
+%r14 = shl i192 %r12, 64
+%r15 = shl i192 %r13, 128
+%r16 = add i192 %r11, %r14
+%r17 = add i192 %r16, %r15
+ret i192 %r17
+}
+define void @mcl_fpDbl_mod_NIST_P192L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r4 = load i64, i64* %r2
+%r5 = zext i64 %r4 to i128
+%r7 = getelementptr i64, i64* %r2, i32 1
+%r8 = load i64, i64* %r7
+%r9 = zext i64 %r8 to i128
+%r10 = shl i128 %r9, 64
+%r11 = or i128 %r5, %r10
+%r12 = zext i128 %r11 to i192
+%r14 = getelementptr i64, i64* %r2, i32 2
+%r15 = load i64, i64* %r14
+%r16 = zext i64 %r15 to i192
+%r17 = shl i192 %r16, 128
+%r18 = or i192 %r12, %r17
+%r19 = zext i192 %r18 to i256
+%r21 = getelementptr i64, i64* %r2, i32 3
+%r22 = load i64, i64* %r21
+%r23 = zext i64 %r22 to i128
+%r25 = getelementptr i64, i64* %r21, i32 1
+%r26 = load i64, i64* %r25
+%r27 = zext i64 %r26 to i128
+%r28 = shl i128 %r27, 64
+%r29 = or i128 %r23, %r28
+%r30 = zext i128 %r29 to i192
+%r32 = getelementptr i64, i64* %r21, i32 2
+%r33 = load i64, i64* %r32
+%r34 = zext i64 %r33 to i192
+%r35 = shl i192 %r34, 128
+%r36 = or i192 %r30, %r35
+%r37 = zext i192 %r36 to i256
+%r38 = shl i192 %r36, 64
+%r39 = zext i192 %r38 to i256
+%r40 = lshr i192 %r36, 128
+%r41 = trunc i192 %r40 to i64
+%r42 = zext i64 %r41 to i256
+%r43 = or i256 %r39, %r42
+%r44 = shl i256 %r42, 64
+%r45 = add i256 %r19, %r37
+%r46 = add i256 %r45, %r43
+%r47 = add i256 %r46, %r44
+%r48 = lshr i256 %r47, 192
+%r49 = trunc i256 %r48 to i64
+%r50 = zext i64 %r49 to i256
+%r51 = shl i256 %r50, 64
+%r52 = or i256 %r50, %r51
+%r53 = trunc i256 %r47 to i192
+%r54 = zext i192 %r53 to i256
+%r55 = add i256 %r54, %r52
+%r56 = call i192 @makeNIST_P192L()
+%r57 = zext i192 %r56 to i256
+%r58 = sub i256 %r55, %r57
+%r59 = lshr i256 %r58, 192
+%r60 = trunc i256 %r59 to i1
+%r61 = select i1 %r60, i256 %r55, i256 %r58
+%r62 = trunc i256 %r61 to i192
+%r63 = trunc i192 %r62 to i64
+%r65 = getelementptr i64, i64* %r1, i32 0
+store i64 %r63, i64* %r65
+%r66 = lshr i192 %r62, 64
+%r67 = trunc i192 %r66 to i64
+%r69 = getelementptr i64, i64* %r1, i32 1
+store i64 %r67, i64* %r69
+%r70 = lshr i192 %r66, 64
+%r71 = trunc i192 %r70 to i64
+%r73 = getelementptr i64, i64* %r1, i32 2
+store i64 %r71, i64* %r73
+ret void
+}
+define void @mcl_fp_sqr_NIST_P192L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = alloca i64, i32 6
+call void @mcl_fpDbl_sqrPre3L(i64* %r5, i64* %r2)
+call void @mcl_fpDbl_mod_NIST_P192L(i64* %r1, i64* %r5, i64* %r5)
+ret void
+}
+define void @mcl_fp_mulNIST_P192L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r6 = alloca i64, i32 6
+call void @mcl_fpDbl_mulPre3L(i64* %r6, i64* %r2, i64* %r3)
+call void @mcl_fpDbl_mod_NIST_P192L(i64* %r1, i64* %r6, i64* %r6)
+ret void
+}
+define void @mcl_fpDbl_mod_NIST_P521L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r4 = load i64, i64* %r2
+%r5 = zext i64 %r4 to i128
+%r7 = getelementptr i64, i64* %r2, i32 1
+%r8 = load i64, i64* %r7
+%r9 = zext i64 %r8 to i128
+%r10 = shl i128 %r9, 64
+%r11 = or i128 %r5, %r10
+%r12 = zext i128 %r11 to i192
+%r14 = getelementptr i64, i64* %r2, i32 2
+%r15 = load i64, i64* %r14
+%r16 = zext i64 %r15 to i192
+%r17 = shl i192 %r16, 128
+%r18 = or i192 %r12, %r17
+%r19 = zext i192 %r18 to i256
+%r21 = getelementptr i64, i64* %r2, i32 3
+%r22 = load i64, i64* %r21
+%r23 = zext i64 %r22 to i256
+%r24 = shl i256 %r23, 192
+%r25 = or i256 %r19, %r24
+%r26 = zext i256 %r25 to i320
+%r28 = getelementptr i64, i64* %r2, i32 4
+%r29 = load i64, i64* %r28
+%r30 = zext i64 %r29 to i320
+%r31 = shl i320 %r30, 256
+%r32 = or i320 %r26, %r31
+%r33 = zext i320 %r32 to i384
+%r35 = getelementptr i64, i64* %r2, i32 5
+%r36 = load i64, i64* %r35
+%r37 = zext i64 %r36 to i384
+%r38 = shl i384 %r37, 320
+%r39 = or i384 %r33, %r38
+%r40 = zext i384 %r39 to i448
+%r42 = getelementptr i64, i64* %r2, i32 6
+%r43 = load i64, i64* %r42
+%r44 = zext i64 %r43 to i448
+%r45 = shl i448 %r44, 384
+%r46 = or i448 %r40, %r45
+%r47 = zext i448 %r46 to i512
+%r49 = getelementptr i64, i64* %r2, i32 7
+%r50 = load i64, i64* %r49
+%r51 = zext i64 %r50 to i512
+%r52 = shl i512 %r51, 448
+%r53 = or i512 %r47, %r52
+%r54 = zext i512 %r53 to i576
+%r56 = getelementptr i64, i64* %r2, i32 8
+%r57 = load i64, i64* %r56
+%r58 = zext i64 %r57 to i576
+%r59 = shl i576 %r58, 512
+%r60 = or i576 %r54, %r59
+%r61 = zext i576 %r60 to i640
+%r63 = getelementptr i64, i64* %r2, i32 9
+%r64 = load i64, i64* %r63
+%r65 = zext i64 %r64 to i640
+%r66 = shl i640 %r65, 576
+%r67 = or i640 %r61, %r66
+%r68 = zext i640 %r67 to i704
+%r70 = getelementptr i64, i64* %r2, i32 10
+%r71 = load i64, i64* %r70
+%r72 = zext i64 %r71 to i704
+%r73 = shl i704 %r72, 640
+%r74 = or i704 %r68, %r73
+%r75 = zext i704 %r74 to i768
+%r77 = getelementptr i64, i64* %r2, i32 11
+%r78 = load i64, i64* %r77
+%r79 = zext i64 %r78 to i768
+%r80 = shl i768 %r79, 704
+%r81 = or i768 %r75, %r80
+%r82 = zext i768 %r81 to i832
+%r84 = getelementptr i64, i64* %r2, i32 12
+%r85 = load i64, i64* %r84
+%r86 = zext i64 %r85 to i832
+%r87 = shl i832 %r86, 768
+%r88 = or i832 %r82, %r87
+%r89 = zext i832 %r88 to i896
+%r91 = getelementptr i64, i64* %r2, i32 13
+%r92 = load i64, i64* %r91
+%r93 = zext i64 %r92 to i896
+%r94 = shl i896 %r93, 832
+%r95 = or i896 %r89, %r94
+%r96 = zext i896 %r95 to i960
+%r98 = getelementptr i64, i64* %r2, i32 14
+%r99 = load i64, i64* %r98
+%r100 = zext i64 %r99 to i960
+%r101 = shl i960 %r100, 896
+%r102 = or i960 %r96, %r101
+%r103 = zext i960 %r102 to i1024
+%r105 = getelementptr i64, i64* %r2, i32 15
+%r106 = load i64, i64* %r105
+%r107 = zext i64 %r106 to i1024
+%r108 = shl i1024 %r107, 960
+%r109 = or i1024 %r103, %r108
+%r110 = zext i1024 %r109 to i1088
+%r112 = getelementptr i64, i64* %r2, i32 16
+%r113 = load i64, i64* %r112
+%r114 = zext i64 %r113 to i1088
+%r115 = shl i1088 %r114, 1024
+%r116 = or i1088 %r110, %r115
+%r117 = trunc i1088 %r116 to i521
+%r118 = zext i521 %r117 to i576
+%r119 = lshr i1088 %r116, 521
+%r120 = trunc i1088 %r119 to i576
+%r121 = add i576 %r118, %r120
+%r122 = lshr i576 %r121, 521
+%r124 = and i576 %r122, 1
+%r125 = add i576 %r121, %r124
+%r126 = trunc i576 %r125 to i521
+%r127 = zext i521 %r126 to i576
+%r128 = lshr i576 %r127, 512
+%r129 = trunc i576 %r128 to i64
+%r131 = or i64 %r129, -512
+%r132 = lshr i576 %r127, 0
+%r133 = trunc i576 %r132 to i64
+%r134 = and i64 %r131, %r133
+%r135 = lshr i576 %r127, 64
+%r136 = trunc i576 %r135 to i64
+%r137 = and i64 %r134, %r136
+%r138 = lshr i576 %r127, 128
+%r139 = trunc i576 %r138 to i64
+%r140 = and i64 %r137, %r139
+%r141 = lshr i576 %r127, 192
+%r142 = trunc i576 %r141 to i64
+%r143 = and i64 %r140, %r142
+%r144 = lshr i576 %r127, 256
+%r145 = trunc i576 %r144 to i64
+%r146 = and i64 %r143, %r145
+%r147 = lshr i576 %r127, 320
+%r148 = trunc i576 %r147 to i64
+%r149 = and i64 %r146, %r148
+%r150 = lshr i576 %r127, 384
+%r151 = trunc i576 %r150 to i64
+%r152 = and i64 %r149, %r151
+%r153 = lshr i576 %r127, 448
+%r154 = trunc i576 %r153 to i64
+%r155 = and i64 %r152, %r154
+%r157 = icmp eq i64 %r155, -1
+br i1%r157, label %zero, label %nonzero
+zero:
+store i64 0, i64* %r1
+%r161 = getelementptr i64, i64* %r1, i32 1
+store i64 0, i64* %r161
+%r164 = getelementptr i64, i64* %r1, i32 2
+store i64 0, i64* %r164
+%r167 = getelementptr i64, i64* %r1, i32 3
+store i64 0, i64* %r167
+%r170 = getelementptr i64, i64* %r1, i32 4
+store i64 0, i64* %r170
+%r173 = getelementptr i64, i64* %r1, i32 5
+store i64 0, i64* %r173
+%r176 = getelementptr i64, i64* %r1, i32 6
+store i64 0, i64* %r176
+%r179 = getelementptr i64, i64* %r1, i32 7
+store i64 0, i64* %r179
+%r182 = getelementptr i64, i64* %r1, i32 8
+store i64 0, i64* %r182
+ret void
+nonzero:
+%r183 = trunc i576 %r127 to i64
+%r185 = getelementptr i64, i64* %r1, i32 0
+store i64 %r183, i64* %r185
+%r186 = lshr i576 %r127, 64
+%r187 = trunc i576 %r186 to i64
+%r189 = getelementptr i64, i64* %r1, i32 1
+store i64 %r187, i64* %r189
+%r190 = lshr i576 %r186, 64
+%r191 = trunc i576 %r190 to i64
+%r193 = getelementptr i64, i64* %r1, i32 2
+store i64 %r191, i64* %r193
+%r194 = lshr i576 %r190, 64
+%r195 = trunc i576 %r194 to i64
+%r197 = getelementptr i64, i64* %r1, i32 3
+store i64 %r195, i64* %r197
+%r198 = lshr i576 %r194, 64
+%r199 = trunc i576 %r198 to i64
+%r201 = getelementptr i64, i64* %r1, i32 4
+store i64 %r199, i64* %r201
+%r202 = lshr i576 %r198, 64
+%r203 = trunc i576 %r202 to i64
+%r205 = getelementptr i64, i64* %r1, i32 5
+store i64 %r203, i64* %r205
+%r206 = lshr i576 %r202, 64
+%r207 = trunc i576 %r206 to i64
+%r209 = getelementptr i64, i64* %r1, i32 6
+store i64 %r207, i64* %r209
+%r210 = lshr i576 %r206, 64
+%r211 = trunc i576 %r210 to i64
+%r213 = getelementptr i64, i64* %r1, i32 7
+store i64 %r211, i64* %r213
+%r214 = lshr i576 %r210, 64
+%r215 = trunc i576 %r214 to i64
+%r217 = getelementptr i64, i64* %r1, i32 8
+store i64 %r215, i64* %r217
+ret void
+}
+define i128 @mulPv64x64(i64* noalias  %r2, i64 %r3)
+{
+%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
+%r6 = trunc i128 %r5 to i64
+%r7 = call i64 @extractHigh64(i128 %r5)
+%r8 = zext i64 %r6 to i128
+%r9 = zext i64 %r7 to i128
+%r10 = shl i128 %r9, 64
+%r11 = add i128 %r8, %r10
+ret i128 %r11
+}
+define void @mcl_fp_mulUnitPre1L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+{
+%r4 = call i128 @mulPv64x64(i64* %r2, i64 %r3)
+%r5 = trunc i128 %r4 to i64
+%r7 = getelementptr i64, i64* %r1, i32 0
+store i64 %r5, i64* %r7
+%r8 = lshr i128 %r4, 64
+%r9 = trunc i128 %r8 to i64
+%r11 = getelementptr i64, i64* %r1, i32 1
+store i64 %r9, i64* %r11
+ret void
+}
+define void @mcl_fpDbl_mulPre1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r4 = load i64, i64* %r2
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r4 to i128
+%r7 = zext i64 %r5 to i128
+%r8 = mul i128 %r6, %r7
+%r9 = trunc i128 %r8 to i64
+%r11 = getelementptr i64, i64* %r1, i32 0
+store i64 %r9, i64* %r11
+%r12 = lshr i128 %r8, 64
+%r13 = trunc i128 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 1
+store i64 %r13, i64* %r15
+ret void
+}
+define void @mcl_fpDbl_sqrPre1L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = load i64, i64* %r2
+%r5 = zext i64 %r3 to i128
+%r6 = zext i64 %r4 to i128
+%r7 = mul i128 %r5, %r6
+%r8 = trunc i128 %r7 to i64
+%r10 = getelementptr i64, i64* %r1, i32 0
+store i64 %r8, i64* %r10
+%r11 = lshr i128 %r7, 64
+%r12 = trunc i128 %r11 to i64
+%r14 = getelementptr i64, i64* %r1, i32 1
+store i64 %r12, i64* %r14
+ret void
+}
+define void @mcl_fp_mont1L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r9 = getelementptr i64, i64* %r3, i32 0
+%r10 = load i64, i64* %r9
+%r11 = call i128 @mulPv64x64(i64* %r2, i64 %r10)
+%r12 = zext i128 %r11 to i192
+%r13 = trunc i128 %r11 to i64
+%r14 = mul i64 %r13, %r7
+%r15 = call i128 @mulPv64x64(i64* %r4, i64 %r14)
+%r16 = zext i128 %r15 to i192
+%r17 = add i192 %r12, %r16
+%r18 = lshr i192 %r17, 64
+%r19 = trunc i192 %r18 to i128
+%r20 = load i64, i64* %r4
+%r21 = zext i64 %r20 to i128
+%r22 = sub i128 %r19, %r21
+%r23 = lshr i128 %r22, 64
+%r24 = trunc i128 %r23 to i1
+%r25 = select i1 %r24, i128 %r19, i128 %r22
+%r26 = trunc i128 %r25 to i64
+store i64 %r26, i64* %r1
+ret void
+}
+define void @mcl_fp_montNF1L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r8 = load i64, i64* %r3
+%r9 = call i128 @mulPv64x64(i64* %r2, i64 %r8)
+%r10 = trunc i128 %r9 to i64
+%r11 = mul i64 %r10, %r7
+%r12 = call i128 @mulPv64x64(i64* %r4, i64 %r11)
+%r13 = add i128 %r9, %r12
+%r14 = lshr i128 %r13, 64
+%r15 = trunc i128 %r14 to i64
+%r16 = load i64, i64* %r4
+%r17 = sub i64 %r15, %r16
+%r18 = lshr i64 %r17, 63
+%r19 = trunc i64 %r18 to i1
+%r20 = select i1 %r19, i64 %r15, i64 %r17
+store i64 %r20, i64* %r1
+ret void
+}
+define void @mcl_fp_montRed1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = load i64, i64* %r2
+%r9 = zext i64 %r8 to i128
+%r11 = getelementptr i64, i64* %r2, i32 1
+%r12 = load i64, i64* %r11
+%r13 = zext i64 %r12 to i128
+%r14 = shl i128 %r13, 64
+%r15 = or i128 %r9, %r14
+%r16 = zext i128 %r15 to i192
+%r17 = trunc i192 %r16 to i64
+%r18 = mul i64 %r17, %r6
+%r19 = call i128 @mulPv64x64(i64* %r3, i64 %r18)
+%r20 = zext i128 %r19 to i192
+%r21 = add i192 %r16, %r20
+%r22 = lshr i192 %r21, 64
+%r23 = trunc i192 %r22 to i128
+%r24 = zext i64 %r7 to i128
+%r25 = sub i128 %r23, %r24
+%r26 = lshr i128 %r25, 64
+%r27 = trunc i128 %r26 to i1
+%r28 = select i1 %r27, i128 %r23, i128 %r25
+%r29 = trunc i128 %r28 to i64
+store i64 %r29, i64* %r1
+ret void
+}
+define i64 @mcl_fp_addPre1L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r7 = load i64, i64* %r4
+%r8 = zext i64 %r7 to i128
+%r9 = add i128 %r6, %r8
+%r10 = trunc i128 %r9 to i64
+store i64 %r10, i64* %r2
+%r11 = lshr i128 %r9, 64
+%r12 = trunc i128 %r11 to i64
+ret i64 %r12
+}
+define i64 @mcl_fp_subPre1L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r7 = load i64, i64* %r4
+%r8 = zext i64 %r7 to i128
+%r9 = sub i128 %r6, %r8
+%r10 = trunc i128 %r9 to i64
+store i64 %r10, i64* %r2
+%r11 = lshr i128 %r9, 64
+%r12 = trunc i128 %r11 to i64
+%r14 = and i64 %r12, 1
+ret i64 %r14
+}
+define void @mcl_fp_shr1_1L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = lshr i64 %r3, 1
+store i64 %r4, i64* %r1
+ret void
+}
+define void @mcl_fp_add1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = load i64, i64* %r3
+%r7 = zext i64 %r5 to i128
+%r8 = zext i64 %r6 to i128
+%r9 = add i128 %r7, %r8
+%r10 = trunc i128 %r9 to i64
+store i64 %r10, i64* %r1
+%r11 = load i64, i64* %r4
+%r12 = zext i64 %r11 to i128
+%r13 = sub i128 %r9, %r12
+%r14 = lshr i128 %r13, 64
+%r15 = trunc i128 %r14 to i1
+br i1%r15, label %carry, label %nocarry
+nocarry:
+%r16 = trunc i128 %r13 to i64
+store i64 %r16, i64* %r1
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = load i64, i64* %r3
+%r7 = add i64 %r5, %r6
+%r8 = load i64, i64* %r4
+%r9 = sub i64 %r7, %r8
+%r10 = lshr i64 %r9, 63
+%r11 = trunc i64 %r10 to i1
+%r12 = select i1 %r11, i64 %r7, i64 %r9
+store i64 %r12, i64* %r1
+ret void
+}
+define void @mcl_fp_sub1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = load i64, i64* %r3
+%r7 = zext i64 %r5 to i128
+%r8 = zext i64 %r6 to i128
+%r9 = sub i128 %r7, %r8
+%r10 = trunc i128 %r9 to i64
+%r11 = lshr i128 %r9, 64
+%r12 = trunc i128 %r11 to i1
+store i64 %r10, i64* %r1
+br i1%r12, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r13 = load i64, i64* %r4
+%r14 = add i64 %r10, %r13
+store i64 %r14, i64* %r1
+ret void
+}
+define void @mcl_fp_subNF1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = load i64, i64* %r3
+%r7 = sub i64 %r5, %r6
+%r8 = lshr i64 %r7, 63
+%r9 = trunc i64 %r8 to i1
+%r10 = load i64, i64* %r4
+%r12 = select i1 %r9, i64 %r10, i64 0
+%r13 = add i64 %r7, %r12
+store i64 %r13, i64* %r1
+ret void
+}
+define void @mcl_fpDbl_add1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = load i64, i64* %r3
+%r14 = zext i64 %r13 to i128
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = zext i64 %r17 to i128
+%r19 = shl i128 %r18, 64
+%r20 = or i128 %r14, %r19
+%r21 = zext i128 %r12 to i192
+%r22 = zext i128 %r20 to i192
+%r23 = add i192 %r21, %r22
+%r24 = trunc i192 %r23 to i64
+store i64 %r24, i64* %r1
+%r25 = lshr i192 %r23, 64
+%r26 = trunc i192 %r25 to i128
+%r27 = load i64, i64* %r4
+%r28 = zext i64 %r27 to i128
+%r29 = sub i128 %r26, %r28
+%r30 = lshr i128 %r29, 64
+%r31 = trunc i128 %r30 to i1
+%r32 = select i1 %r31, i128 %r26, i128 %r29
+%r33 = trunc i128 %r32 to i64
+%r35 = getelementptr i64, i64* %r1, i32 1
+store i64 %r33, i64* %r35
+ret void
+}
+define void @mcl_fpDbl_sub1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = load i64, i64* %r3
+%r14 = zext i64 %r13 to i128
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = zext i64 %r17 to i128
+%r19 = shl i128 %r18, 64
+%r20 = or i128 %r14, %r19
+%r21 = zext i128 %r12 to i192
+%r22 = zext i128 %r20 to i192
+%r23 = sub i192 %r21, %r22
+%r24 = trunc i192 %r23 to i64
+store i64 %r24, i64* %r1
+%r25 = lshr i192 %r23, 64
+%r26 = trunc i192 %r25 to i64
+%r27 = lshr i192 %r23, 128
+%r28 = trunc i192 %r27 to i1
+%r29 = load i64, i64* %r4
+%r31 = select i1 %r28, i64 %r29, i64 0
+%r32 = add i64 %r26, %r31
+%r34 = getelementptr i64, i64* %r1, i32 1
+store i64 %r32, i64* %r34
+ret void
+}
+define i192 @mulPv128x64(i64* noalias  %r2, i64 %r3)
+{
+%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
+%r6 = trunc i128 %r5 to i64
+%r7 = call i64 @extractHigh64(i128 %r5)
+%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
+%r10 = trunc i128 %r9 to i64
+%r11 = call i64 @extractHigh64(i128 %r9)
+%r12 = zext i64 %r6 to i128
+%r13 = zext i64 %r10 to i128
+%r14 = shl i128 %r13, 64
+%r15 = or i128 %r12, %r14
+%r16 = zext i64 %r7 to i128
+%r17 = zext i64 %r11 to i128
+%r18 = shl i128 %r17, 64
+%r19 = or i128 %r16, %r18
+%r20 = zext i128 %r15 to i192
+%r21 = zext i128 %r19 to i192
+%r22 = shl i192 %r21, 64
+%r23 = add i192 %r20, %r22
+ret i192 %r23
+}
+define void @mcl_fp_mulUnitPre2L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+{
+%r4 = call i192 @mulPv128x64(i64* %r2, i64 %r3)
+%r5 = trunc i192 %r4 to i64
+%r7 = getelementptr i64, i64* %r1, i32 0
+store i64 %r5, i64* %r7
+%r8 = lshr i192 %r4, 64
+%r9 = trunc i192 %r8 to i64
+%r11 = getelementptr i64, i64* %r1, i32 1
+store i64 %r9, i64* %r11
+%r12 = lshr i192 %r8, 64
+%r13 = trunc i192 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 2
+store i64 %r13, i64* %r15
+ret void
+}
+define void @mcl_fpDbl_mulPre2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r4 = load i64, i64* %r3
+%r5 = call i192 @mulPv128x64(i64* %r2, i64 %r4)
+%r6 = trunc i192 %r5 to i64
+store i64 %r6, i64* %r1
+%r7 = lshr i192 %r5, 64
+%r9 = getelementptr i64, i64* %r3, i32 1
+%r10 = load i64, i64* %r9
+%r11 = call i192 @mulPv128x64(i64* %r2, i64 %r10)
+%r12 = add i192 %r7, %r11
+%r14 = getelementptr i64, i64* %r1, i32 1
+%r15 = trunc i192 %r12 to i64
+%r17 = getelementptr i64, i64* %r14, i32 0
+store i64 %r15, i64* %r17
+%r18 = lshr i192 %r12, 64
+%r19 = trunc i192 %r18 to i64
+%r21 = getelementptr i64, i64* %r14, i32 1
+store i64 %r19, i64* %r21
+%r22 = lshr i192 %r18, 64
+%r23 = trunc i192 %r22 to i64
+%r25 = getelementptr i64, i64* %r14, i32 2
+store i64 %r23, i64* %r25
+ret void
+}
+define void @mcl_fpDbl_sqrPre2L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = call i192 @mulPv128x64(i64* %r2, i64 %r3)
+%r5 = trunc i192 %r4 to i64
+store i64 %r5, i64* %r1
+%r6 = lshr i192 %r4, 64
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = call i192 @mulPv128x64(i64* %r2, i64 %r9)
+%r11 = add i192 %r6, %r10
+%r13 = getelementptr i64, i64* %r1, i32 1
+%r14 = trunc i192 %r11 to i64
+%r16 = getelementptr i64, i64* %r13, i32 0
+store i64 %r14, i64* %r16
+%r17 = lshr i192 %r11, 64
+%r18 = trunc i192 %r17 to i64
+%r20 = getelementptr i64, i64* %r13, i32 1
+store i64 %r18, i64* %r20
+%r21 = lshr i192 %r17, 64
+%r22 = trunc i192 %r21 to i64
+%r24 = getelementptr i64, i64* %r13, i32 2
+store i64 %r22, i64* %r24
+ret void
+}
+define void @mcl_fp_mont2L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r9 = getelementptr i64, i64* %r3, i32 0
+%r10 = load i64, i64* %r9
+%r11 = call i192 @mulPv128x64(i64* %r2, i64 %r10)
+%r12 = zext i192 %r11 to i256
+%r13 = trunc i192 %r11 to i64
+%r14 = mul i64 %r13, %r7
+%r15 = call i192 @mulPv128x64(i64* %r4, i64 %r14)
+%r16 = zext i192 %r15 to i256
+%r17 = add i256 %r12, %r16
+%r18 = lshr i256 %r17, 64
+%r20 = getelementptr i64, i64* %r3, i32 1
+%r21 = load i64, i64* %r20
+%r22 = call i192 @mulPv128x64(i64* %r2, i64 %r21)
+%r23 = zext i192 %r22 to i256
+%r24 = add i256 %r18, %r23
+%r25 = trunc i256 %r24 to i64
+%r26 = mul i64 %r25, %r7
+%r27 = call i192 @mulPv128x64(i64* %r4, i64 %r26)
+%r28 = zext i192 %r27 to i256
+%r29 = add i256 %r24, %r28
+%r30 = lshr i256 %r29, 64
+%r31 = trunc i256 %r30 to i192
+%r32 = load i64, i64* %r4
+%r33 = zext i64 %r32 to i128
+%r35 = getelementptr i64, i64* %r4, i32 1
+%r36 = load i64, i64* %r35
+%r37 = zext i64 %r36 to i128
+%r38 = shl i128 %r37, 64
+%r39 = or i128 %r33, %r38
+%r40 = zext i128 %r39 to i192
+%r41 = sub i192 %r31, %r40
+%r42 = lshr i192 %r41, 128
+%r43 = trunc i192 %r42 to i1
+%r44 = select i1 %r43, i192 %r31, i192 %r41
+%r45 = trunc i192 %r44 to i128
+%r46 = trunc i128 %r45 to i64
+%r48 = getelementptr i64, i64* %r1, i32 0
+store i64 %r46, i64* %r48
+%r49 = lshr i128 %r45, 64
+%r50 = trunc i128 %r49 to i64
+%r52 = getelementptr i64, i64* %r1, i32 1
+store i64 %r50, i64* %r52
+ret void
+}
+define void @mcl_fp_montNF2L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r8 = load i64, i64* %r3
+%r9 = call i192 @mulPv128x64(i64* %r2, i64 %r8)
+%r10 = trunc i192 %r9 to i64
+%r11 = mul i64 %r10, %r7
+%r12 = call i192 @mulPv128x64(i64* %r4, i64 %r11)
+%r13 = add i192 %r9, %r12
+%r14 = lshr i192 %r13, 64
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = call i192 @mulPv128x64(i64* %r2, i64 %r17)
+%r19 = add i192 %r14, %r18
+%r20 = trunc i192 %r19 to i64
+%r21 = mul i64 %r20, %r7
+%r22 = call i192 @mulPv128x64(i64* %r4, i64 %r21)
+%r23 = add i192 %r19, %r22
+%r24 = lshr i192 %r23, 64
+%r25 = trunc i192 %r24 to i128
+%r26 = load i64, i64* %r4
+%r27 = zext i64 %r26 to i128
+%r29 = getelementptr i64, i64* %r4, i32 1
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i128
+%r32 = shl i128 %r31, 64
+%r33 = or i128 %r27, %r32
+%r34 = sub i128 %r25, %r33
+%r35 = lshr i128 %r34, 127
+%r36 = trunc i128 %r35 to i1
+%r37 = select i1 %r36, i128 %r25, i128 %r34
+%r38 = trunc i128 %r37 to i64
+%r40 = getelementptr i64, i64* %r1, i32 0
+store i64 %r38, i64* %r40
+%r41 = lshr i128 %r37, 64
+%r42 = trunc i128 %r41 to i64
+%r44 = getelementptr i64, i64* %r1, i32 1
+store i64 %r42, i64* %r44
+ret void
+}
+define void @mcl_fp_montRed2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = load i64, i64* %r2
+%r16 = zext i64 %r15 to i128
+%r18 = getelementptr i64, i64* %r2, i32 1
+%r19 = load i64, i64* %r18
+%r20 = zext i64 %r19 to i128
+%r21 = shl i128 %r20, 64
+%r22 = or i128 %r16, %r21
+%r23 = zext i128 %r22 to i192
+%r25 = getelementptr i64, i64* %r2, i32 2
+%r26 = load i64, i64* %r25
+%r27 = zext i64 %r26 to i192
+%r28 = shl i192 %r27, 128
+%r29 = or i192 %r23, %r28
+%r30 = zext i192 %r29 to i256
+%r32 = getelementptr i64, i64* %r2, i32 3
+%r33 = load i64, i64* %r32
+%r34 = zext i64 %r33 to i256
+%r35 = shl i256 %r34, 192
+%r36 = or i256 %r30, %r35
+%r37 = zext i256 %r36 to i320
+%r38 = trunc i320 %r37 to i64
+%r39 = mul i64 %r38, %r6
+%r40 = call i192 @mulPv128x64(i64* %r3, i64 %r39)
+%r41 = zext i192 %r40 to i320
+%r42 = add i320 %r37, %r41
+%r43 = lshr i320 %r42, 64
+%r44 = trunc i320 %r43 to i256
+%r45 = trunc i256 %r44 to i64
+%r46 = mul i64 %r45, %r6
+%r47 = call i192 @mulPv128x64(i64* %r3, i64 %r46)
+%r48 = zext i192 %r47 to i256
+%r49 = add i256 %r44, %r48
+%r50 = lshr i256 %r49, 64
+%r51 = trunc i256 %r50 to i192
+%r52 = zext i128 %r14 to i192
+%r53 = sub i192 %r51, %r52
+%r54 = lshr i192 %r53, 128
+%r55 = trunc i192 %r54 to i1
+%r56 = select i1 %r55, i192 %r51, i192 %r53
+%r57 = trunc i192 %r56 to i128
+%r58 = trunc i128 %r57 to i64
+%r60 = getelementptr i64, i64* %r1, i32 0
+store i64 %r58, i64* %r60
+%r61 = lshr i128 %r57, 64
+%r62 = trunc i128 %r61 to i64
+%r64 = getelementptr i64, i64* %r1, i32 1
+store i64 %r62, i64* %r64
+ret void
+}
+define i64 @mcl_fp_addPre2L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r14 = load i64, i64* %r4
+%r15 = zext i64 %r14 to i128
+%r17 = getelementptr i64, i64* %r4, i32 1
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i128
+%r20 = shl i128 %r19, 64
+%r21 = or i128 %r15, %r20
+%r22 = zext i128 %r21 to i192
+%r23 = add i192 %r13, %r22
+%r24 = trunc i192 %r23 to i128
+%r25 = trunc i128 %r24 to i64
+%r27 = getelementptr i64, i64* %r2, i32 0
+store i64 %r25, i64* %r27
+%r28 = lshr i128 %r24, 64
+%r29 = trunc i128 %r28 to i64
+%r31 = getelementptr i64, i64* %r2, i32 1
+store i64 %r29, i64* %r31
+%r32 = lshr i192 %r23, 128
+%r33 = trunc i192 %r32 to i64
+ret i64 %r33
+}
+define i64 @mcl_fp_subPre2L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r14 = load i64, i64* %r4
+%r15 = zext i64 %r14 to i128
+%r17 = getelementptr i64, i64* %r4, i32 1
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i128
+%r20 = shl i128 %r19, 64
+%r21 = or i128 %r15, %r20
+%r22 = zext i128 %r21 to i192
+%r23 = sub i192 %r13, %r22
+%r24 = trunc i192 %r23 to i128
+%r25 = trunc i128 %r24 to i64
+%r27 = getelementptr i64, i64* %r2, i32 0
+store i64 %r25, i64* %r27
+%r28 = lshr i128 %r24, 64
+%r29 = trunc i128 %r28 to i64
+%r31 = getelementptr i64, i64* %r2, i32 1
+store i64 %r29, i64* %r31
+%r32 = lshr i192 %r23, 128
+%r33 = trunc i192 %r32 to i64
+%r35 = and i64 %r33, 1
+ret i64 %r35
+}
+define void @mcl_fp_shr1_2L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = zext i64 %r3 to i128
+%r6 = getelementptr i64, i64* %r2, i32 1
+%r7 = load i64, i64* %r6
+%r8 = zext i64 %r7 to i128
+%r9 = shl i128 %r8, 64
+%r10 = or i128 %r4, %r9
+%r11 = lshr i128 %r10, 1
+%r12 = trunc i128 %r11 to i64
+%r14 = getelementptr i64, i64* %r1, i32 0
+store i64 %r12, i64* %r14
+%r15 = lshr i128 %r11, 64
+%r16 = trunc i128 %r15 to i64
+%r18 = getelementptr i64, i64* %r1, i32 1
+store i64 %r16, i64* %r18
+ret void
+}
+define void @mcl_fp_add2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = load i64, i64* %r3
+%r14 = zext i64 %r13 to i128
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = zext i64 %r17 to i128
+%r19 = shl i128 %r18, 64
+%r20 = or i128 %r14, %r19
+%r21 = zext i128 %r12 to i192
+%r22 = zext i128 %r20 to i192
+%r23 = add i192 %r21, %r22
+%r24 = trunc i192 %r23 to i128
+%r25 = trunc i128 %r24 to i64
+%r27 = getelementptr i64, i64* %r1, i32 0
+store i64 %r25, i64* %r27
+%r28 = lshr i128 %r24, 64
+%r29 = trunc i128 %r28 to i64
+%r31 = getelementptr i64, i64* %r1, i32 1
+store i64 %r29, i64* %r31
+%r32 = load i64, i64* %r4
+%r33 = zext i64 %r32 to i128
+%r35 = getelementptr i64, i64* %r4, i32 1
+%r36 = load i64, i64* %r35
+%r37 = zext i64 %r36 to i128
+%r38 = shl i128 %r37, 64
+%r39 = or i128 %r33, %r38
+%r40 = zext i128 %r39 to i192
+%r41 = sub i192 %r23, %r40
+%r42 = lshr i192 %r41, 128
+%r43 = trunc i192 %r42 to i1
+br i1%r43, label %carry, label %nocarry
+nocarry:
+%r44 = trunc i192 %r41 to i128
+%r45 = trunc i128 %r44 to i64
+%r47 = getelementptr i64, i64* %r1, i32 0
+store i64 %r45, i64* %r47
+%r48 = lshr i128 %r44, 64
+%r49 = trunc i128 %r48 to i64
+%r51 = getelementptr i64, i64* %r1, i32 1
+store i64 %r49, i64* %r51
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = load i64, i64* %r3
+%r14 = zext i64 %r13 to i128
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = zext i64 %r17 to i128
+%r19 = shl i128 %r18, 64
+%r20 = or i128 %r14, %r19
+%r21 = add i128 %r12, %r20
+%r22 = load i64, i64* %r4
+%r23 = zext i64 %r22 to i128
+%r25 = getelementptr i64, i64* %r4, i32 1
+%r26 = load i64, i64* %r25
+%r27 = zext i64 %r26 to i128
+%r28 = shl i128 %r27, 64
+%r29 = or i128 %r23, %r28
+%r30 = sub i128 %r21, %r29
+%r31 = lshr i128 %r30, 127
+%r32 = trunc i128 %r31 to i1
+%r33 = select i1 %r32, i128 %r21, i128 %r30
+%r34 = trunc i128 %r33 to i64
+%r36 = getelementptr i64, i64* %r1, i32 0
+store i64 %r34, i64* %r36
+%r37 = lshr i128 %r33, 64
+%r38 = trunc i128 %r37 to i64
+%r40 = getelementptr i64, i64* %r1, i32 1
+store i64 %r38, i64* %r40
+ret void
+}
+define void @mcl_fp_sub2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = load i64, i64* %r3
+%r14 = zext i64 %r13 to i128
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = zext i64 %r17 to i128
+%r19 = shl i128 %r18, 64
+%r20 = or i128 %r14, %r19
+%r21 = zext i128 %r12 to i192
+%r22 = zext i128 %r20 to i192
+%r23 = sub i192 %r21, %r22
+%r24 = trunc i192 %r23 to i128
+%r25 = lshr i192 %r23, 128
+%r26 = trunc i192 %r25 to i1
+%r27 = trunc i128 %r24 to i64
+%r29 = getelementptr i64, i64* %r1, i32 0
+store i64 %r27, i64* %r29
+%r30 = lshr i128 %r24, 64
+%r31 = trunc i128 %r30 to i64
+%r33 = getelementptr i64, i64* %r1, i32 1
+store i64 %r31, i64* %r33
+br i1%r26, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r34 = load i64, i64* %r4
+%r35 = zext i64 %r34 to i128
+%r37 = getelementptr i64, i64* %r4, i32 1
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i128
+%r40 = shl i128 %r39, 64
+%r41 = or i128 %r35, %r40
+%r42 = add i128 %r24, %r41
+%r43 = trunc i128 %r42 to i64
+%r45 = getelementptr i64, i64* %r1, i32 0
+store i64 %r43, i64* %r45
+%r46 = lshr i128 %r42, 64
+%r47 = trunc i128 %r46 to i64
+%r49 = getelementptr i64, i64* %r1, i32 1
+store i64 %r47, i64* %r49
+ret void
+}
+define void @mcl_fp_subNF2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = load i64, i64* %r3
+%r14 = zext i64 %r13 to i128
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = zext i64 %r17 to i128
+%r19 = shl i128 %r18, 64
+%r20 = or i128 %r14, %r19
+%r21 = sub i128 %r12, %r20
+%r22 = lshr i128 %r21, 127
+%r23 = trunc i128 %r22 to i1
+%r24 = load i64, i64* %r4
+%r25 = zext i64 %r24 to i128
+%r27 = getelementptr i64, i64* %r4, i32 1
+%r28 = load i64, i64* %r27
+%r29 = zext i64 %r28 to i128
+%r30 = shl i128 %r29, 64
+%r31 = or i128 %r25, %r30
+%r33 = select i1 %r23, i128 %r31, i128 0
+%r34 = add i128 %r21, %r33
+%r35 = trunc i128 %r34 to i64
+%r37 = getelementptr i64, i64* %r1, i32 0
+store i64 %r35, i64* %r37
+%r38 = lshr i128 %r34, 64
+%r39 = trunc i128 %r38 to i64
+%r41 = getelementptr i64, i64* %r1, i32 1
+store i64 %r39, i64* %r41
+ret void
+}
+define void @mcl_fpDbl_add2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = load i64, i64* %r3
+%r28 = zext i64 %r27 to i128
+%r30 = getelementptr i64, i64* %r3, i32 1
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i128
+%r33 = shl i128 %r32, 64
+%r34 = or i128 %r28, %r33
+%r35 = zext i128 %r34 to i192
+%r37 = getelementptr i64, i64* %r3, i32 2
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i192
+%r40 = shl i192 %r39, 128
+%r41 = or i192 %r35, %r40
+%r42 = zext i192 %r41 to i256
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i256
+%r47 = shl i256 %r46, 192
+%r48 = or i256 %r42, %r47
+%r49 = zext i256 %r26 to i320
+%r50 = zext i256 %r48 to i320
+%r51 = add i320 %r49, %r50
+%r52 = trunc i320 %r51 to i128
+%r53 = trunc i128 %r52 to i64
+%r55 = getelementptr i64, i64* %r1, i32 0
+store i64 %r53, i64* %r55
+%r56 = lshr i128 %r52, 64
+%r57 = trunc i128 %r56 to i64
+%r59 = getelementptr i64, i64* %r1, i32 1
+store i64 %r57, i64* %r59
+%r60 = lshr i320 %r51, 128
+%r61 = trunc i320 %r60 to i192
+%r62 = load i64, i64* %r4
+%r63 = zext i64 %r62 to i128
+%r65 = getelementptr i64, i64* %r4, i32 1
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i128
+%r68 = shl i128 %r67, 64
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i192
+%r71 = sub i192 %r61, %r70
+%r72 = lshr i192 %r71, 128
+%r73 = trunc i192 %r72 to i1
+%r74 = select i1 %r73, i192 %r61, i192 %r71
+%r75 = trunc i192 %r74 to i128
+%r77 = getelementptr i64, i64* %r1, i32 2
+%r78 = trunc i128 %r75 to i64
+%r80 = getelementptr i64, i64* %r77, i32 0
+store i64 %r78, i64* %r80
+%r81 = lshr i128 %r75, 64
+%r82 = trunc i128 %r81 to i64
+%r84 = getelementptr i64, i64* %r77, i32 1
+store i64 %r82, i64* %r84
+ret void
+}
+define void @mcl_fpDbl_sub2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = load i64, i64* %r3
+%r28 = zext i64 %r27 to i128
+%r30 = getelementptr i64, i64* %r3, i32 1
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i128
+%r33 = shl i128 %r32, 64
+%r34 = or i128 %r28, %r33
+%r35 = zext i128 %r34 to i192
+%r37 = getelementptr i64, i64* %r3, i32 2
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i192
+%r40 = shl i192 %r39, 128
+%r41 = or i192 %r35, %r40
+%r42 = zext i192 %r41 to i256
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i256
+%r47 = shl i256 %r46, 192
+%r48 = or i256 %r42, %r47
+%r49 = zext i256 %r26 to i320
+%r50 = zext i256 %r48 to i320
+%r51 = sub i320 %r49, %r50
+%r52 = trunc i320 %r51 to i128
+%r53 = trunc i128 %r52 to i64
+%r55 = getelementptr i64, i64* %r1, i32 0
+store i64 %r53, i64* %r55
+%r56 = lshr i128 %r52, 64
+%r57 = trunc i128 %r56 to i64
+%r59 = getelementptr i64, i64* %r1, i32 1
+store i64 %r57, i64* %r59
+%r60 = lshr i320 %r51, 128
+%r61 = trunc i320 %r60 to i128
+%r62 = lshr i320 %r51, 256
+%r63 = trunc i320 %r62 to i1
+%r64 = load i64, i64* %r4
+%r65 = zext i64 %r64 to i128
+%r67 = getelementptr i64, i64* %r4, i32 1
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i128
+%r70 = shl i128 %r69, 64
+%r71 = or i128 %r65, %r70
+%r73 = select i1 %r63, i128 %r71, i128 0
+%r74 = add i128 %r61, %r73
+%r76 = getelementptr i64, i64* %r1, i32 2
+%r77 = trunc i128 %r74 to i64
+%r79 = getelementptr i64, i64* %r76, i32 0
+store i64 %r77, i64* %r79
+%r80 = lshr i128 %r74, 64
+%r81 = trunc i128 %r80 to i64
+%r83 = getelementptr i64, i64* %r76, i32 1
+store i64 %r81, i64* %r83
+ret void
+}
+define i256 @mulPv192x64(i64* noalias  %r2, i64 %r3)
+{
+%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
+%r6 = trunc i128 %r5 to i64
+%r7 = call i64 @extractHigh64(i128 %r5)
+%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
+%r10 = trunc i128 %r9 to i64
+%r11 = call i64 @extractHigh64(i128 %r9)
+%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
+%r14 = trunc i128 %r13 to i64
+%r15 = call i64 @extractHigh64(i128 %r13)
+%r16 = zext i64 %r6 to i128
+%r17 = zext i64 %r10 to i128
+%r18 = shl i128 %r17, 64
+%r19 = or i128 %r16, %r18
+%r20 = zext i128 %r19 to i192
+%r21 = zext i64 %r14 to i192
+%r22 = shl i192 %r21, 128
+%r23 = or i192 %r20, %r22
+%r24 = zext i64 %r7 to i128
+%r25 = zext i64 %r11 to i128
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r24, %r26
+%r28 = zext i128 %r27 to i192
+%r29 = zext i64 %r15 to i192
+%r30 = shl i192 %r29, 128
+%r31 = or i192 %r28, %r30
+%r32 = zext i192 %r23 to i256
+%r33 = zext i192 %r31 to i256
+%r34 = shl i256 %r33, 64
+%r35 = add i256 %r32, %r34
+ret i256 %r35
+}
+define void @mcl_fp_mulUnitPre3L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+{
+%r4 = call i256 @mulPv192x64(i64* %r2, i64 %r3)
+%r5 = trunc i256 %r4 to i64
+%r7 = getelementptr i64, i64* %r1, i32 0
+store i64 %r5, i64* %r7
+%r8 = lshr i256 %r4, 64
+%r9 = trunc i256 %r8 to i64
+%r11 = getelementptr i64, i64* %r1, i32 1
+store i64 %r9, i64* %r11
+%r12 = lshr i256 %r8, 64
+%r13 = trunc i256 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 2
+store i64 %r13, i64* %r15
+%r16 = lshr i256 %r12, 64
+%r17 = trunc i256 %r16 to i64
+%r19 = getelementptr i64, i64* %r1, i32 3
+store i64 %r17, i64* %r19
+ret void
+}
+define void @mcl_fpDbl_mulPre3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r4 = load i64, i64* %r3
+%r5 = call i256 @mulPv192x64(i64* %r2, i64 %r4)
+%r6 = trunc i256 %r5 to i64
+store i64 %r6, i64* %r1
+%r7 = lshr i256 %r5, 64
+%r9 = getelementptr i64, i64* %r3, i32 1
+%r10 = load i64, i64* %r9
+%r11 = call i256 @mulPv192x64(i64* %r2, i64 %r10)
+%r12 = add i256 %r7, %r11
+%r13 = trunc i256 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 1
+store i64 %r13, i64* %r15
+%r16 = lshr i256 %r12, 64
+%r18 = getelementptr i64, i64* %r3, i32 2
+%r19 = load i64, i64* %r18
+%r20 = call i256 @mulPv192x64(i64* %r2, i64 %r19)
+%r21 = add i256 %r16, %r20
+%r23 = getelementptr i64, i64* %r1, i32 2
+%r24 = trunc i256 %r21 to i64
+%r26 = getelementptr i64, i64* %r23, i32 0
+store i64 %r24, i64* %r26
+%r27 = lshr i256 %r21, 64
+%r28 = trunc i256 %r27 to i64
+%r30 = getelementptr i64, i64* %r23, i32 1
+store i64 %r28, i64* %r30
+%r31 = lshr i256 %r27, 64
+%r32 = trunc i256 %r31 to i64
+%r34 = getelementptr i64, i64* %r23, i32 2
+store i64 %r32, i64* %r34
+%r35 = lshr i256 %r31, 64
+%r36 = trunc i256 %r35 to i64
+%r38 = getelementptr i64, i64* %r23, i32 3
+store i64 %r36, i64* %r38
+ret void
+}
+define void @mcl_fpDbl_sqrPre3L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = call i256 @mulPv192x64(i64* %r2, i64 %r3)
+%r5 = trunc i256 %r4 to i64
+store i64 %r5, i64* %r1
+%r6 = lshr i256 %r4, 64
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = call i256 @mulPv192x64(i64* %r2, i64 %r9)
+%r11 = add i256 %r6, %r10
+%r12 = trunc i256 %r11 to i64
+%r14 = getelementptr i64, i64* %r1, i32 1
+store i64 %r12, i64* %r14
+%r15 = lshr i256 %r11, 64
+%r17 = getelementptr i64, i64* %r2, i32 2
+%r18 = load i64, i64* %r17
+%r19 = call i256 @mulPv192x64(i64* %r2, i64 %r18)
+%r20 = add i256 %r15, %r19
+%r22 = getelementptr i64, i64* %r1, i32 2
+%r23 = trunc i256 %r20 to i64
+%r25 = getelementptr i64, i64* %r22, i32 0
+store i64 %r23, i64* %r25
+%r26 = lshr i256 %r20, 64
+%r27 = trunc i256 %r26 to i64
+%r29 = getelementptr i64, i64* %r22, i32 1
+store i64 %r27, i64* %r29
+%r30 = lshr i256 %r26, 64
+%r31 = trunc i256 %r30 to i64
+%r33 = getelementptr i64, i64* %r22, i32 2
+store i64 %r31, i64* %r33
+%r34 = lshr i256 %r30, 64
+%r35 = trunc i256 %r34 to i64
+%r37 = getelementptr i64, i64* %r22, i32 3
+store i64 %r35, i64* %r37
+ret void
+}
+define void @mcl_fp_mont3L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r9 = getelementptr i64, i64* %r3, i32 0
+%r10 = load i64, i64* %r9
+%r11 = call i256 @mulPv192x64(i64* %r2, i64 %r10)
+%r12 = zext i256 %r11 to i320
+%r13 = trunc i256 %r11 to i64
+%r14 = mul i64 %r13, %r7
+%r15 = call i256 @mulPv192x64(i64* %r4, i64 %r14)
+%r16 = zext i256 %r15 to i320
+%r17 = add i320 %r12, %r16
+%r18 = lshr i320 %r17, 64
+%r20 = getelementptr i64, i64* %r3, i32 1
+%r21 = load i64, i64* %r20
+%r22 = call i256 @mulPv192x64(i64* %r2, i64 %r21)
+%r23 = zext i256 %r22 to i320
+%r24 = add i320 %r18, %r23
+%r25 = trunc i320 %r24 to i64
+%r26 = mul i64 %r25, %r7
+%r27 = call i256 @mulPv192x64(i64* %r4, i64 %r26)
+%r28 = zext i256 %r27 to i320
+%r29 = add i320 %r24, %r28
+%r30 = lshr i320 %r29, 64
+%r32 = getelementptr i64, i64* %r3, i32 2
+%r33 = load i64, i64* %r32
+%r34 = call i256 @mulPv192x64(i64* %r2, i64 %r33)
+%r35 = zext i256 %r34 to i320
+%r36 = add i320 %r30, %r35
+%r37 = trunc i320 %r36 to i64
+%r38 = mul i64 %r37, %r7
+%r39 = call i256 @mulPv192x64(i64* %r4, i64 %r38)
+%r40 = zext i256 %r39 to i320
+%r41 = add i320 %r36, %r40
+%r42 = lshr i320 %r41, 64
+%r43 = trunc i320 %r42 to i256
+%r44 = load i64, i64* %r4
+%r45 = zext i64 %r44 to i128
+%r47 = getelementptr i64, i64* %r4, i32 1
+%r48 = load i64, i64* %r47
+%r49 = zext i64 %r48 to i128
+%r50 = shl i128 %r49, 64
+%r51 = or i128 %r45, %r50
+%r52 = zext i128 %r51 to i192
+%r54 = getelementptr i64, i64* %r4, i32 2
+%r55 = load i64, i64* %r54
+%r56 = zext i64 %r55 to i192
+%r57 = shl i192 %r56, 128
+%r58 = or i192 %r52, %r57
+%r59 = zext i192 %r58 to i256
+%r60 = sub i256 %r43, %r59
+%r61 = lshr i256 %r60, 192
+%r62 = trunc i256 %r61 to i1
+%r63 = select i1 %r62, i256 %r43, i256 %r60
+%r64 = trunc i256 %r63 to i192
+%r65 = trunc i192 %r64 to i64
+%r67 = getelementptr i64, i64* %r1, i32 0
+store i64 %r65, i64* %r67
+%r68 = lshr i192 %r64, 64
+%r69 = trunc i192 %r68 to i64
+%r71 = getelementptr i64, i64* %r1, i32 1
+store i64 %r69, i64* %r71
+%r72 = lshr i192 %r68, 64
+%r73 = trunc i192 %r72 to i64
+%r75 = getelementptr i64, i64* %r1, i32 2
+store i64 %r73, i64* %r75
+ret void
+}
+define void @mcl_fp_montNF3L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r8 = load i64, i64* %r3
+%r9 = call i256 @mulPv192x64(i64* %r2, i64 %r8)
+%r10 = trunc i256 %r9 to i64
+%r11 = mul i64 %r10, %r7
+%r12 = call i256 @mulPv192x64(i64* %r4, i64 %r11)
+%r13 = add i256 %r9, %r12
+%r14 = lshr i256 %r13, 64
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = call i256 @mulPv192x64(i64* %r2, i64 %r17)
+%r19 = add i256 %r14, %r18
+%r20 = trunc i256 %r19 to i64
+%r21 = mul i64 %r20, %r7
+%r22 = call i256 @mulPv192x64(i64* %r4, i64 %r21)
+%r23 = add i256 %r19, %r22
+%r24 = lshr i256 %r23, 64
+%r26 = getelementptr i64, i64* %r3, i32 2
+%r27 = load i64, i64* %r26
+%r28 = call i256 @mulPv192x64(i64* %r2, i64 %r27)
+%r29 = add i256 %r24, %r28
+%r30 = trunc i256 %r29 to i64
+%r31 = mul i64 %r30, %r7
+%r32 = call i256 @mulPv192x64(i64* %r4, i64 %r31)
+%r33 = add i256 %r29, %r32
+%r34 = lshr i256 %r33, 64
+%r35 = trunc i256 %r34 to i192
+%r36 = load i64, i64* %r4
+%r37 = zext i64 %r36 to i128
+%r39 = getelementptr i64, i64* %r4, i32 1
+%r40 = load i64, i64* %r39
+%r41 = zext i64 %r40 to i128
+%r42 = shl i128 %r41, 64
+%r43 = or i128 %r37, %r42
+%r44 = zext i128 %r43 to i192
+%r46 = getelementptr i64, i64* %r4, i32 2
+%r47 = load i64, i64* %r46
+%r48 = zext i64 %r47 to i192
+%r49 = shl i192 %r48, 128
+%r50 = or i192 %r44, %r49
+%r51 = sub i192 %r35, %r50
+%r52 = lshr i192 %r51, 191
+%r53 = trunc i192 %r52 to i1
+%r54 = select i1 %r53, i192 %r35, i192 %r51
+%r55 = trunc i192 %r54 to i64
+%r57 = getelementptr i64, i64* %r1, i32 0
+store i64 %r55, i64* %r57
+%r58 = lshr i192 %r54, 64
+%r59 = trunc i192 %r58 to i64
+%r61 = getelementptr i64, i64* %r1, i32 1
+store i64 %r59, i64* %r61
+%r62 = lshr i192 %r58, 64
+%r63 = trunc i192 %r62 to i64
+%r65 = getelementptr i64, i64* %r1, i32 2
+store i64 %r63, i64* %r65
+ret void
+}
+define void @mcl_fp_montRed3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = load i64, i64* %r2
+%r23 = zext i64 %r22 to i128
+%r25 = getelementptr i64, i64* %r2, i32 1
+%r26 = load i64, i64* %r25
+%r27 = zext i64 %r26 to i128
+%r28 = shl i128 %r27, 64
+%r29 = or i128 %r23, %r28
+%r30 = zext i128 %r29 to i192
+%r32 = getelementptr i64, i64* %r2, i32 2
+%r33 = load i64, i64* %r32
+%r34 = zext i64 %r33 to i192
+%r35 = shl i192 %r34, 128
+%r36 = or i192 %r30, %r35
+%r37 = zext i192 %r36 to i256
+%r39 = getelementptr i64, i64* %r2, i32 3
+%r40 = load i64, i64* %r39
+%r41 = zext i64 %r40 to i256
+%r42 = shl i256 %r41, 192
+%r43 = or i256 %r37, %r42
+%r44 = zext i256 %r43 to i320
+%r46 = getelementptr i64, i64* %r2, i32 4
+%r47 = load i64, i64* %r46
+%r48 = zext i64 %r47 to i320
+%r49 = shl i320 %r48, 256
+%r50 = or i320 %r44, %r49
+%r51 = zext i320 %r50 to i384
+%r53 = getelementptr i64, i64* %r2, i32 5
+%r54 = load i64, i64* %r53
+%r55 = zext i64 %r54 to i384
+%r56 = shl i384 %r55, 320
+%r57 = or i384 %r51, %r56
+%r58 = zext i384 %r57 to i448
+%r59 = trunc i448 %r58 to i64
+%r60 = mul i64 %r59, %r6
+%r61 = call i256 @mulPv192x64(i64* %r3, i64 %r60)
+%r62 = zext i256 %r61 to i448
+%r63 = add i448 %r58, %r62
+%r64 = lshr i448 %r63, 64
+%r65 = trunc i448 %r64 to i384
+%r66 = trunc i384 %r65 to i64
+%r67 = mul i64 %r66, %r6
+%r68 = call i256 @mulPv192x64(i64* %r3, i64 %r67)
+%r69 = zext i256 %r68 to i384
+%r70 = add i384 %r65, %r69
+%r71 = lshr i384 %r70, 64
+%r72 = trunc i384 %r71 to i320
+%r73 = trunc i320 %r72 to i64
+%r74 = mul i64 %r73, %r6
+%r75 = call i256 @mulPv192x64(i64* %r3, i64 %r74)
+%r76 = zext i256 %r75 to i320
+%r77 = add i320 %r72, %r76
+%r78 = lshr i320 %r77, 64
+%r79 = trunc i320 %r78 to i256
+%r80 = zext i192 %r21 to i256
+%r81 = sub i256 %r79, %r80
+%r82 = lshr i256 %r81, 192
+%r83 = trunc i256 %r82 to i1
+%r84 = select i1 %r83, i256 %r79, i256 %r81
+%r85 = trunc i256 %r84 to i192
+%r86 = trunc i192 %r85 to i64
+%r88 = getelementptr i64, i64* %r1, i32 0
+store i64 %r86, i64* %r88
+%r89 = lshr i192 %r85, 64
+%r90 = trunc i192 %r89 to i64
+%r92 = getelementptr i64, i64* %r1, i32 1
+store i64 %r90, i64* %r92
+%r93 = lshr i192 %r89, 64
+%r94 = trunc i192 %r93 to i64
+%r96 = getelementptr i64, i64* %r1, i32 2
+store i64 %r94, i64* %r96
+ret void
+}
+define i64 @mcl_fp_addPre3L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r21 = load i64, i64* %r4
+%r22 = zext i64 %r21 to i128
+%r24 = getelementptr i64, i64* %r4, i32 1
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i128
+%r27 = shl i128 %r26, 64
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i192
+%r31 = getelementptr i64, i64* %r4, i32 2
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i192
+%r34 = shl i192 %r33, 128
+%r35 = or i192 %r29, %r34
+%r36 = zext i192 %r35 to i256
+%r37 = add i256 %r20, %r36
+%r38 = trunc i256 %r37 to i192
+%r39 = trunc i192 %r38 to i64
+%r41 = getelementptr i64, i64* %r2, i32 0
+store i64 %r39, i64* %r41
+%r42 = lshr i192 %r38, 64
+%r43 = trunc i192 %r42 to i64
+%r45 = getelementptr i64, i64* %r2, i32 1
+store i64 %r43, i64* %r45
+%r46 = lshr i192 %r42, 64
+%r47 = trunc i192 %r46 to i64
+%r49 = getelementptr i64, i64* %r2, i32 2
+store i64 %r47, i64* %r49
+%r50 = lshr i256 %r37, 192
+%r51 = trunc i256 %r50 to i64
+ret i64 %r51
+}
+define i64 @mcl_fp_subPre3L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r21 = load i64, i64* %r4
+%r22 = zext i64 %r21 to i128
+%r24 = getelementptr i64, i64* %r4, i32 1
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i128
+%r27 = shl i128 %r26, 64
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i192
+%r31 = getelementptr i64, i64* %r4, i32 2
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i192
+%r34 = shl i192 %r33, 128
+%r35 = or i192 %r29, %r34
+%r36 = zext i192 %r35 to i256
+%r37 = sub i256 %r20, %r36
+%r38 = trunc i256 %r37 to i192
+%r39 = trunc i192 %r38 to i64
+%r41 = getelementptr i64, i64* %r2, i32 0
+store i64 %r39, i64* %r41
+%r42 = lshr i192 %r38, 64
+%r43 = trunc i192 %r42 to i64
+%r45 = getelementptr i64, i64* %r2, i32 1
+store i64 %r43, i64* %r45
+%r46 = lshr i192 %r42, 64
+%r47 = trunc i192 %r46 to i64
+%r49 = getelementptr i64, i64* %r2, i32 2
+store i64 %r47, i64* %r49
+%r50 = lshr i256 %r37, 192
+%r51 = trunc i256 %r50 to i64
+%r53 = and i64 %r51, 1
+ret i64 %r53
+}
+define void @mcl_fp_shr1_3L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = zext i64 %r3 to i128
+%r6 = getelementptr i64, i64* %r2, i32 1
+%r7 = load i64, i64* %r6
+%r8 = zext i64 %r7 to i128
+%r9 = shl i128 %r8, 64
+%r10 = or i128 %r4, %r9
+%r11 = zext i128 %r10 to i192
+%r13 = getelementptr i64, i64* %r2, i32 2
+%r14 = load i64, i64* %r13
+%r15 = zext i64 %r14 to i192
+%r16 = shl i192 %r15, 128
+%r17 = or i192 %r11, %r16
+%r18 = lshr i192 %r17, 1
+%r19 = trunc i192 %r18 to i64
+%r21 = getelementptr i64, i64* %r1, i32 0
+store i64 %r19, i64* %r21
+%r22 = lshr i192 %r18, 64
+%r23 = trunc i192 %r22 to i64
+%r25 = getelementptr i64, i64* %r1, i32 1
+store i64 %r23, i64* %r25
+%r26 = lshr i192 %r22, 64
+%r27 = trunc i192 %r26 to i64
+%r29 = getelementptr i64, i64* %r1, i32 2
+store i64 %r27, i64* %r29
+ret void
+}
+define void @mcl_fp_add3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = load i64, i64* %r3
+%r21 = zext i64 %r20 to i128
+%r23 = getelementptr i64, i64* %r3, i32 1
+%r24 = load i64, i64* %r23
+%r25 = zext i64 %r24 to i128
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r21, %r26
+%r28 = zext i128 %r27 to i192
+%r30 = getelementptr i64, i64* %r3, i32 2
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i192
+%r33 = shl i192 %r32, 128
+%r34 = or i192 %r28, %r33
+%r35 = zext i192 %r19 to i256
+%r36 = zext i192 %r34 to i256
+%r37 = add i256 %r35, %r36
+%r38 = trunc i256 %r37 to i192
+%r39 = trunc i192 %r38 to i64
+%r41 = getelementptr i64, i64* %r1, i32 0
+store i64 %r39, i64* %r41
+%r42 = lshr i192 %r38, 64
+%r43 = trunc i192 %r42 to i64
+%r45 = getelementptr i64, i64* %r1, i32 1
+store i64 %r43, i64* %r45
+%r46 = lshr i192 %r42, 64
+%r47 = trunc i192 %r46 to i64
+%r49 = getelementptr i64, i64* %r1, i32 2
+store i64 %r47, i64* %r49
+%r50 = load i64, i64* %r4
+%r51 = zext i64 %r50 to i128
+%r53 = getelementptr i64, i64* %r4, i32 1
+%r54 = load i64, i64* %r53
+%r55 = zext i64 %r54 to i128
+%r56 = shl i128 %r55, 64
+%r57 = or i128 %r51, %r56
+%r58 = zext i128 %r57 to i192
+%r60 = getelementptr i64, i64* %r4, i32 2
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i192
+%r63 = shl i192 %r62, 128
+%r64 = or i192 %r58, %r63
+%r65 = zext i192 %r64 to i256
+%r66 = sub i256 %r37, %r65
+%r67 = lshr i256 %r66, 192
+%r68 = trunc i256 %r67 to i1
+br i1%r68, label %carry, label %nocarry
+nocarry:
+%r69 = trunc i256 %r66 to i192
+%r70 = trunc i192 %r69 to i64
+%r72 = getelementptr i64, i64* %r1, i32 0
+store i64 %r70, i64* %r72
+%r73 = lshr i192 %r69, 64
+%r74 = trunc i192 %r73 to i64
+%r76 = getelementptr i64, i64* %r1, i32 1
+store i64 %r74, i64* %r76
+%r77 = lshr i192 %r73, 64
+%r78 = trunc i192 %r77 to i64
+%r80 = getelementptr i64, i64* %r1, i32 2
+store i64 %r78, i64* %r80
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = load i64, i64* %r3
+%r21 = zext i64 %r20 to i128
+%r23 = getelementptr i64, i64* %r3, i32 1
+%r24 = load i64, i64* %r23
+%r25 = zext i64 %r24 to i128
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r21, %r26
+%r28 = zext i128 %r27 to i192
+%r30 = getelementptr i64, i64* %r3, i32 2
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i192
+%r33 = shl i192 %r32, 128
+%r34 = or i192 %r28, %r33
+%r35 = add i192 %r19, %r34
+%r36 = load i64, i64* %r4
+%r37 = zext i64 %r36 to i128
+%r39 = getelementptr i64, i64* %r4, i32 1
+%r40 = load i64, i64* %r39
+%r41 = zext i64 %r40 to i128
+%r42 = shl i128 %r41, 64
+%r43 = or i128 %r37, %r42
+%r44 = zext i128 %r43 to i192
+%r46 = getelementptr i64, i64* %r4, i32 2
+%r47 = load i64, i64* %r46
+%r48 = zext i64 %r47 to i192
+%r49 = shl i192 %r48, 128
+%r50 = or i192 %r44, %r49
+%r51 = sub i192 %r35, %r50
+%r52 = lshr i192 %r51, 191
+%r53 = trunc i192 %r52 to i1
+%r54 = select i1 %r53, i192 %r35, i192 %r51
+%r55 = trunc i192 %r54 to i64
+%r57 = getelementptr i64, i64* %r1, i32 0
+store i64 %r55, i64* %r57
+%r58 = lshr i192 %r54, 64
+%r59 = trunc i192 %r58 to i64
+%r61 = getelementptr i64, i64* %r1, i32 1
+store i64 %r59, i64* %r61
+%r62 = lshr i192 %r58, 64
+%r63 = trunc i192 %r62 to i64
+%r65 = getelementptr i64, i64* %r1, i32 2
+store i64 %r63, i64* %r65
+ret void
+}
+define void @mcl_fp_sub3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = load i64, i64* %r3
+%r21 = zext i64 %r20 to i128
+%r23 = getelementptr i64, i64* %r3, i32 1
+%r24 = load i64, i64* %r23
+%r25 = zext i64 %r24 to i128
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r21, %r26
+%r28 = zext i128 %r27 to i192
+%r30 = getelementptr i64, i64* %r3, i32 2
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i192
+%r33 = shl i192 %r32, 128
+%r34 = or i192 %r28, %r33
+%r35 = zext i192 %r19 to i256
+%r36 = zext i192 %r34 to i256
+%r37 = sub i256 %r35, %r36
+%r38 = trunc i256 %r37 to i192
+%r39 = lshr i256 %r37, 192
+%r40 = trunc i256 %r39 to i1
+%r41 = trunc i192 %r38 to i64
+%r43 = getelementptr i64, i64* %r1, i32 0
+store i64 %r41, i64* %r43
+%r44 = lshr i192 %r38, 64
+%r45 = trunc i192 %r44 to i64
+%r47 = getelementptr i64, i64* %r1, i32 1
+store i64 %r45, i64* %r47
+%r48 = lshr i192 %r44, 64
+%r49 = trunc i192 %r48 to i64
+%r51 = getelementptr i64, i64* %r1, i32 2
+store i64 %r49, i64* %r51
+br i1%r40, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r52 = load i64, i64* %r4
+%r53 = zext i64 %r52 to i128
+%r55 = getelementptr i64, i64* %r4, i32 1
+%r56 = load i64, i64* %r55
+%r57 = zext i64 %r56 to i128
+%r58 = shl i128 %r57, 64
+%r59 = or i128 %r53, %r58
+%r60 = zext i128 %r59 to i192
+%r62 = getelementptr i64, i64* %r4, i32 2
+%r63 = load i64, i64* %r62
+%r64 = zext i64 %r63 to i192
+%r65 = shl i192 %r64, 128
+%r66 = or i192 %r60, %r65
+%r67 = add i192 %r38, %r66
+%r68 = trunc i192 %r67 to i64
+%r70 = getelementptr i64, i64* %r1, i32 0
+store i64 %r68, i64* %r70
+%r71 = lshr i192 %r67, 64
+%r72 = trunc i192 %r71 to i64
+%r74 = getelementptr i64, i64* %r1, i32 1
+store i64 %r72, i64* %r74
+%r75 = lshr i192 %r71, 64
+%r76 = trunc i192 %r75 to i64
+%r78 = getelementptr i64, i64* %r1, i32 2
+store i64 %r76, i64* %r78
+ret void
+}
+define void @mcl_fp_subNF3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = load i64, i64* %r3
+%r21 = zext i64 %r20 to i128
+%r23 = getelementptr i64, i64* %r3, i32 1
+%r24 = load i64, i64* %r23
+%r25 = zext i64 %r24 to i128
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r21, %r26
+%r28 = zext i128 %r27 to i192
+%r30 = getelementptr i64, i64* %r3, i32 2
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i192
+%r33 = shl i192 %r32, 128
+%r34 = or i192 %r28, %r33
+%r35 = sub i192 %r19, %r34
+%r36 = lshr i192 %r35, 191
+%r37 = trunc i192 %r36 to i1
+%r38 = load i64, i64* %r4
+%r39 = zext i64 %r38 to i128
+%r41 = getelementptr i64, i64* %r4, i32 1
+%r42 = load i64, i64* %r41
+%r43 = zext i64 %r42 to i128
+%r44 = shl i128 %r43, 64
+%r45 = or i128 %r39, %r44
+%r46 = zext i128 %r45 to i192
+%r48 = getelementptr i64, i64* %r4, i32 2
+%r49 = load i64, i64* %r48
+%r50 = zext i64 %r49 to i192
+%r51 = shl i192 %r50, 128
+%r52 = or i192 %r46, %r51
+%r54 = select i1 %r37, i192 %r52, i192 0
+%r55 = add i192 %r35, %r54
+%r56 = trunc i192 %r55 to i64
+%r58 = getelementptr i64, i64* %r1, i32 0
+store i64 %r56, i64* %r58
+%r59 = lshr i192 %r55, 64
+%r60 = trunc i192 %r59 to i64
+%r62 = getelementptr i64, i64* %r1, i32 1
+store i64 %r60, i64* %r62
+%r63 = lshr i192 %r59, 64
+%r64 = trunc i192 %r63 to i64
+%r66 = getelementptr i64, i64* %r1, i32 2
+store i64 %r64, i64* %r66
+ret void
+}
+define void @mcl_fpDbl_add3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = zext i384 %r40 to i448
+%r78 = zext i384 %r76 to i448
+%r79 = add i448 %r77, %r78
+%r80 = trunc i448 %r79 to i192
+%r81 = trunc i192 %r80 to i64
+%r83 = getelementptr i64, i64* %r1, i32 0
+store i64 %r81, i64* %r83
+%r84 = lshr i192 %r80, 64
+%r85 = trunc i192 %r84 to i64
+%r87 = getelementptr i64, i64* %r1, i32 1
+store i64 %r85, i64* %r87
+%r88 = lshr i192 %r84, 64
+%r89 = trunc i192 %r88 to i64
+%r91 = getelementptr i64, i64* %r1, i32 2
+store i64 %r89, i64* %r91
+%r92 = lshr i448 %r79, 192
+%r93 = trunc i448 %r92 to i256
+%r94 = load i64, i64* %r4
+%r95 = zext i64 %r94 to i128
+%r97 = getelementptr i64, i64* %r4, i32 1
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i128
+%r100 = shl i128 %r99, 64
+%r101 = or i128 %r95, %r100
+%r102 = zext i128 %r101 to i192
+%r104 = getelementptr i64, i64* %r4, i32 2
+%r105 = load i64, i64* %r104
+%r106 = zext i64 %r105 to i192
+%r107 = shl i192 %r106, 128
+%r108 = or i192 %r102, %r107
+%r109 = zext i192 %r108 to i256
+%r110 = sub i256 %r93, %r109
+%r111 = lshr i256 %r110, 192
+%r112 = trunc i256 %r111 to i1
+%r113 = select i1 %r112, i256 %r93, i256 %r110
+%r114 = trunc i256 %r113 to i192
+%r116 = getelementptr i64, i64* %r1, i32 3
+%r117 = trunc i192 %r114 to i64
+%r119 = getelementptr i64, i64* %r116, i32 0
+store i64 %r117, i64* %r119
+%r120 = lshr i192 %r114, 64
+%r121 = trunc i192 %r120 to i64
+%r123 = getelementptr i64, i64* %r116, i32 1
+store i64 %r121, i64* %r123
+%r124 = lshr i192 %r120, 64
+%r125 = trunc i192 %r124 to i64
+%r127 = getelementptr i64, i64* %r116, i32 2
+store i64 %r125, i64* %r127
+ret void
+}
+define void @mcl_fpDbl_sub3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = zext i384 %r40 to i448
+%r78 = zext i384 %r76 to i448
+%r79 = sub i448 %r77, %r78
+%r80 = trunc i448 %r79 to i192
+%r81 = trunc i192 %r80 to i64
+%r83 = getelementptr i64, i64* %r1, i32 0
+store i64 %r81, i64* %r83
+%r84 = lshr i192 %r80, 64
+%r85 = trunc i192 %r84 to i64
+%r87 = getelementptr i64, i64* %r1, i32 1
+store i64 %r85, i64* %r87
+%r88 = lshr i192 %r84, 64
+%r89 = trunc i192 %r88 to i64
+%r91 = getelementptr i64, i64* %r1, i32 2
+store i64 %r89, i64* %r91
+%r92 = lshr i448 %r79, 192
+%r93 = trunc i448 %r92 to i192
+%r94 = lshr i448 %r79, 384
+%r95 = trunc i448 %r94 to i1
+%r96 = load i64, i64* %r4
+%r97 = zext i64 %r96 to i128
+%r99 = getelementptr i64, i64* %r4, i32 1
+%r100 = load i64, i64* %r99
+%r101 = zext i64 %r100 to i128
+%r102 = shl i128 %r101, 64
+%r103 = or i128 %r97, %r102
+%r104 = zext i128 %r103 to i192
+%r106 = getelementptr i64, i64* %r4, i32 2
+%r107 = load i64, i64* %r106
+%r108 = zext i64 %r107 to i192
+%r109 = shl i192 %r108, 128
+%r110 = or i192 %r104, %r109
+%r112 = select i1 %r95, i192 %r110, i192 0
+%r113 = add i192 %r93, %r112
+%r115 = getelementptr i64, i64* %r1, i32 3
+%r116 = trunc i192 %r113 to i64
+%r118 = getelementptr i64, i64* %r115, i32 0
+store i64 %r116, i64* %r118
+%r119 = lshr i192 %r113, 64
+%r120 = trunc i192 %r119 to i64
+%r122 = getelementptr i64, i64* %r115, i32 1
+store i64 %r120, i64* %r122
+%r123 = lshr i192 %r119, 64
+%r124 = trunc i192 %r123 to i64
+%r126 = getelementptr i64, i64* %r115, i32 2
+store i64 %r124, i64* %r126
+ret void
+}
+define i320 @mulPv256x64(i64* noalias  %r2, i64 %r3)
+{
+%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
+%r6 = trunc i128 %r5 to i64
+%r7 = call i64 @extractHigh64(i128 %r5)
+%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
+%r10 = trunc i128 %r9 to i64
+%r11 = call i64 @extractHigh64(i128 %r9)
+%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
+%r14 = trunc i128 %r13 to i64
+%r15 = call i64 @extractHigh64(i128 %r13)
+%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
+%r18 = trunc i128 %r17 to i64
+%r19 = call i64 @extractHigh64(i128 %r17)
+%r20 = zext i64 %r6 to i128
+%r21 = zext i64 %r10 to i128
+%r22 = shl i128 %r21, 64
+%r23 = or i128 %r20, %r22
+%r24 = zext i128 %r23 to i192
+%r25 = zext i64 %r14 to i192
+%r26 = shl i192 %r25, 128
+%r27 = or i192 %r24, %r26
+%r28 = zext i192 %r27 to i256
+%r29 = zext i64 %r18 to i256
+%r30 = shl i256 %r29, 192
+%r31 = or i256 %r28, %r30
+%r32 = zext i64 %r7 to i128
+%r33 = zext i64 %r11 to i128
+%r34 = shl i128 %r33, 64
+%r35 = or i128 %r32, %r34
+%r36 = zext i128 %r35 to i192
+%r37 = zext i64 %r15 to i192
+%r38 = shl i192 %r37, 128
+%r39 = or i192 %r36, %r38
+%r40 = zext i192 %r39 to i256
+%r41 = zext i64 %r19 to i256
+%r42 = shl i256 %r41, 192
+%r43 = or i256 %r40, %r42
+%r44 = zext i256 %r31 to i320
+%r45 = zext i256 %r43 to i320
+%r46 = shl i320 %r45, 64
+%r47 = add i320 %r44, %r46
+ret i320 %r47
+}
+define void @mcl_fp_mulUnitPre4L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+{
+%r4 = call i320 @mulPv256x64(i64* %r2, i64 %r3)
+%r5 = trunc i320 %r4 to i64
+%r7 = getelementptr i64, i64* %r1, i32 0
+store i64 %r5, i64* %r7
+%r8 = lshr i320 %r4, 64
+%r9 = trunc i320 %r8 to i64
+%r11 = getelementptr i64, i64* %r1, i32 1
+store i64 %r9, i64* %r11
+%r12 = lshr i320 %r8, 64
+%r13 = trunc i320 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 2
+store i64 %r13, i64* %r15
+%r16 = lshr i320 %r12, 64
+%r17 = trunc i320 %r16 to i64
+%r19 = getelementptr i64, i64* %r1, i32 3
+store i64 %r17, i64* %r19
+%r20 = lshr i320 %r16, 64
+%r21 = trunc i320 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 4
+store i64 %r21, i64* %r23
+ret void
+}
+define void @mcl_fpDbl_mulPre4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r4 = load i64, i64* %r3
+%r5 = call i320 @mulPv256x64(i64* %r2, i64 %r4)
+%r6 = trunc i320 %r5 to i64
+store i64 %r6, i64* %r1
+%r7 = lshr i320 %r5, 64
+%r9 = getelementptr i64, i64* %r3, i32 1
+%r10 = load i64, i64* %r9
+%r11 = call i320 @mulPv256x64(i64* %r2, i64 %r10)
+%r12 = add i320 %r7, %r11
+%r13 = trunc i320 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 1
+store i64 %r13, i64* %r15
+%r16 = lshr i320 %r12, 64
+%r18 = getelementptr i64, i64* %r3, i32 2
+%r19 = load i64, i64* %r18
+%r20 = call i320 @mulPv256x64(i64* %r2, i64 %r19)
+%r21 = add i320 %r16, %r20
+%r22 = trunc i320 %r21 to i64
+%r24 = getelementptr i64, i64* %r1, i32 2
+store i64 %r22, i64* %r24
+%r25 = lshr i320 %r21, 64
+%r27 = getelementptr i64, i64* %r3, i32 3
+%r28 = load i64, i64* %r27
+%r29 = call i320 @mulPv256x64(i64* %r2, i64 %r28)
+%r30 = add i320 %r25, %r29
+%r32 = getelementptr i64, i64* %r1, i32 3
+%r33 = trunc i320 %r30 to i64
+%r35 = getelementptr i64, i64* %r32, i32 0
+store i64 %r33, i64* %r35
+%r36 = lshr i320 %r30, 64
+%r37 = trunc i320 %r36 to i64
+%r39 = getelementptr i64, i64* %r32, i32 1
+store i64 %r37, i64* %r39
+%r40 = lshr i320 %r36, 64
+%r41 = trunc i320 %r40 to i64
+%r43 = getelementptr i64, i64* %r32, i32 2
+store i64 %r41, i64* %r43
+%r44 = lshr i320 %r40, 64
+%r45 = trunc i320 %r44 to i64
+%r47 = getelementptr i64, i64* %r32, i32 3
+store i64 %r45, i64* %r47
+%r48 = lshr i320 %r44, 64
+%r49 = trunc i320 %r48 to i64
+%r51 = getelementptr i64, i64* %r32, i32 4
+store i64 %r49, i64* %r51
+ret void
+}
+define void @mcl_fpDbl_sqrPre4L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = call i320 @mulPv256x64(i64* %r2, i64 %r3)
+%r5 = trunc i320 %r4 to i64
+store i64 %r5, i64* %r1
+%r6 = lshr i320 %r4, 64
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = call i320 @mulPv256x64(i64* %r2, i64 %r9)
+%r11 = add i320 %r6, %r10
+%r12 = trunc i320 %r11 to i64
+%r14 = getelementptr i64, i64* %r1, i32 1
+store i64 %r12, i64* %r14
+%r15 = lshr i320 %r11, 64
+%r17 = getelementptr i64, i64* %r2, i32 2
+%r18 = load i64, i64* %r17
+%r19 = call i320 @mulPv256x64(i64* %r2, i64 %r18)
+%r20 = add i320 %r15, %r19
+%r21 = trunc i320 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 2
+store i64 %r21, i64* %r23
+%r24 = lshr i320 %r20, 64
+%r26 = getelementptr i64, i64* %r2, i32 3
+%r27 = load i64, i64* %r26
+%r28 = call i320 @mulPv256x64(i64* %r2, i64 %r27)
+%r29 = add i320 %r24, %r28
+%r31 = getelementptr i64, i64* %r1, i32 3
+%r32 = trunc i320 %r29 to i64
+%r34 = getelementptr i64, i64* %r31, i32 0
+store i64 %r32, i64* %r34
+%r35 = lshr i320 %r29, 64
+%r36 = trunc i320 %r35 to i64
+%r38 = getelementptr i64, i64* %r31, i32 1
+store i64 %r36, i64* %r38
+%r39 = lshr i320 %r35, 64
+%r40 = trunc i320 %r39 to i64
+%r42 = getelementptr i64, i64* %r31, i32 2
+store i64 %r40, i64* %r42
+%r43 = lshr i320 %r39, 64
+%r44 = trunc i320 %r43 to i64
+%r46 = getelementptr i64, i64* %r31, i32 3
+store i64 %r44, i64* %r46
+%r47 = lshr i320 %r43, 64
+%r48 = trunc i320 %r47 to i64
+%r50 = getelementptr i64, i64* %r31, i32 4
+store i64 %r48, i64* %r50
+ret void
+}
+define void @mcl_fp_mont4L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r9 = getelementptr i64, i64* %r3, i32 0
+%r10 = load i64, i64* %r9
+%r11 = call i320 @mulPv256x64(i64* %r2, i64 %r10)
+%r12 = zext i320 %r11 to i384
+%r13 = trunc i320 %r11 to i64
+%r14 = mul i64 %r13, %r7
+%r15 = call i320 @mulPv256x64(i64* %r4, i64 %r14)
+%r16 = zext i320 %r15 to i384
+%r17 = add i384 %r12, %r16
+%r18 = lshr i384 %r17, 64
+%r20 = getelementptr i64, i64* %r3, i32 1
+%r21 = load i64, i64* %r20
+%r22 = call i320 @mulPv256x64(i64* %r2, i64 %r21)
+%r23 = zext i320 %r22 to i384
+%r24 = add i384 %r18, %r23
+%r25 = trunc i384 %r24 to i64
+%r26 = mul i64 %r25, %r7
+%r27 = call i320 @mulPv256x64(i64* %r4, i64 %r26)
+%r28 = zext i320 %r27 to i384
+%r29 = add i384 %r24, %r28
+%r30 = lshr i384 %r29, 64
+%r32 = getelementptr i64, i64* %r3, i32 2
+%r33 = load i64, i64* %r32
+%r34 = call i320 @mulPv256x64(i64* %r2, i64 %r33)
+%r35 = zext i320 %r34 to i384
+%r36 = add i384 %r30, %r35
+%r37 = trunc i384 %r36 to i64
+%r38 = mul i64 %r37, %r7
+%r39 = call i320 @mulPv256x64(i64* %r4, i64 %r38)
+%r40 = zext i320 %r39 to i384
+%r41 = add i384 %r36, %r40
+%r42 = lshr i384 %r41, 64
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = call i320 @mulPv256x64(i64* %r2, i64 %r45)
+%r47 = zext i320 %r46 to i384
+%r48 = add i384 %r42, %r47
+%r49 = trunc i384 %r48 to i64
+%r50 = mul i64 %r49, %r7
+%r51 = call i320 @mulPv256x64(i64* %r4, i64 %r50)
+%r52 = zext i320 %r51 to i384
+%r53 = add i384 %r48, %r52
+%r54 = lshr i384 %r53, 64
+%r55 = trunc i384 %r54 to i320
+%r56 = load i64, i64* %r4
+%r57 = zext i64 %r56 to i128
+%r59 = getelementptr i64, i64* %r4, i32 1
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i128
+%r62 = shl i128 %r61, 64
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i192
+%r66 = getelementptr i64, i64* %r4, i32 2
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i192
+%r69 = shl i192 %r68, 128
+%r70 = or i192 %r64, %r69
+%r71 = zext i192 %r70 to i256
+%r73 = getelementptr i64, i64* %r4, i32 3
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i256
+%r76 = shl i256 %r75, 192
+%r77 = or i256 %r71, %r76
+%r78 = zext i256 %r77 to i320
+%r79 = sub i320 %r55, %r78
+%r80 = lshr i320 %r79, 256
+%r81 = trunc i320 %r80 to i1
+%r82 = select i1 %r81, i320 %r55, i320 %r79
+%r83 = trunc i320 %r82 to i256
+%r84 = trunc i256 %r83 to i64
+%r86 = getelementptr i64, i64* %r1, i32 0
+store i64 %r84, i64* %r86
+%r87 = lshr i256 %r83, 64
+%r88 = trunc i256 %r87 to i64
+%r90 = getelementptr i64, i64* %r1, i32 1
+store i64 %r88, i64* %r90
+%r91 = lshr i256 %r87, 64
+%r92 = trunc i256 %r91 to i64
+%r94 = getelementptr i64, i64* %r1, i32 2
+store i64 %r92, i64* %r94
+%r95 = lshr i256 %r91, 64
+%r96 = trunc i256 %r95 to i64
+%r98 = getelementptr i64, i64* %r1, i32 3
+store i64 %r96, i64* %r98
+ret void
+}
+define void @mcl_fp_montNF4L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r8 = load i64, i64* %r3
+%r9 = call i320 @mulPv256x64(i64* %r2, i64 %r8)
+%r10 = trunc i320 %r9 to i64
+%r11 = mul i64 %r10, %r7
+%r12 = call i320 @mulPv256x64(i64* %r4, i64 %r11)
+%r13 = add i320 %r9, %r12
+%r14 = lshr i320 %r13, 64
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = call i320 @mulPv256x64(i64* %r2, i64 %r17)
+%r19 = add i320 %r14, %r18
+%r20 = trunc i320 %r19 to i64
+%r21 = mul i64 %r20, %r7
+%r22 = call i320 @mulPv256x64(i64* %r4, i64 %r21)
+%r23 = add i320 %r19, %r22
+%r24 = lshr i320 %r23, 64
+%r26 = getelementptr i64, i64* %r3, i32 2
+%r27 = load i64, i64* %r26
+%r28 = call i320 @mulPv256x64(i64* %r2, i64 %r27)
+%r29 = add i320 %r24, %r28
+%r30 = trunc i320 %r29 to i64
+%r31 = mul i64 %r30, %r7
+%r32 = call i320 @mulPv256x64(i64* %r4, i64 %r31)
+%r33 = add i320 %r29, %r32
+%r34 = lshr i320 %r33, 64
+%r36 = getelementptr i64, i64* %r3, i32 3
+%r37 = load i64, i64* %r36
+%r38 = call i320 @mulPv256x64(i64* %r2, i64 %r37)
+%r39 = add i320 %r34, %r38
+%r40 = trunc i320 %r39 to i64
+%r41 = mul i64 %r40, %r7
+%r42 = call i320 @mulPv256x64(i64* %r4, i64 %r41)
+%r43 = add i320 %r39, %r42
+%r44 = lshr i320 %r43, 64
+%r45 = trunc i320 %r44 to i256
+%r46 = load i64, i64* %r4
+%r47 = zext i64 %r46 to i128
+%r49 = getelementptr i64, i64* %r4, i32 1
+%r50 = load i64, i64* %r49
+%r51 = zext i64 %r50 to i128
+%r52 = shl i128 %r51, 64
+%r53 = or i128 %r47, %r52
+%r54 = zext i128 %r53 to i192
+%r56 = getelementptr i64, i64* %r4, i32 2
+%r57 = load i64, i64* %r56
+%r58 = zext i64 %r57 to i192
+%r59 = shl i192 %r58, 128
+%r60 = or i192 %r54, %r59
+%r61 = zext i192 %r60 to i256
+%r63 = getelementptr i64, i64* %r4, i32 3
+%r64 = load i64, i64* %r63
+%r65 = zext i64 %r64 to i256
+%r66 = shl i256 %r65, 192
+%r67 = or i256 %r61, %r66
+%r68 = sub i256 %r45, %r67
+%r69 = lshr i256 %r68, 255
+%r70 = trunc i256 %r69 to i1
+%r71 = select i1 %r70, i256 %r45, i256 %r68
+%r72 = trunc i256 %r71 to i64
+%r74 = getelementptr i64, i64* %r1, i32 0
+store i64 %r72, i64* %r74
+%r75 = lshr i256 %r71, 64
+%r76 = trunc i256 %r75 to i64
+%r78 = getelementptr i64, i64* %r1, i32 1
+store i64 %r76, i64* %r78
+%r79 = lshr i256 %r75, 64
+%r80 = trunc i256 %r79 to i64
+%r82 = getelementptr i64, i64* %r1, i32 2
+store i64 %r80, i64* %r82
+%r83 = lshr i256 %r79, 64
+%r84 = trunc i256 %r83 to i64
+%r86 = getelementptr i64, i64* %r1, i32 3
+store i64 %r84, i64* %r86
+ret void
+}
+define void @mcl_fp_montRed4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = zext i192 %r21 to i256
+%r24 = getelementptr i64, i64* %r3, i32 3
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i256
+%r27 = shl i256 %r26, 192
+%r28 = or i256 %r22, %r27
+%r29 = load i64, i64* %r2
+%r30 = zext i64 %r29 to i128
+%r32 = getelementptr i64, i64* %r2, i32 1
+%r33 = load i64, i64* %r32
+%r34 = zext i64 %r33 to i128
+%r35 = shl i128 %r34, 64
+%r36 = or i128 %r30, %r35
+%r37 = zext i128 %r36 to i192
+%r39 = getelementptr i64, i64* %r2, i32 2
+%r40 = load i64, i64* %r39
+%r41 = zext i64 %r40 to i192
+%r42 = shl i192 %r41, 128
+%r43 = or i192 %r37, %r42
+%r44 = zext i192 %r43 to i256
+%r46 = getelementptr i64, i64* %r2, i32 3
+%r47 = load i64, i64* %r46
+%r48 = zext i64 %r47 to i256
+%r49 = shl i256 %r48, 192
+%r50 = or i256 %r44, %r49
+%r51 = zext i256 %r50 to i320
+%r53 = getelementptr i64, i64* %r2, i32 4
+%r54 = load i64, i64* %r53
+%r55 = zext i64 %r54 to i320
+%r56 = shl i320 %r55, 256
+%r57 = or i320 %r51, %r56
+%r58 = zext i320 %r57 to i384
+%r60 = getelementptr i64, i64* %r2, i32 5
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i384
+%r63 = shl i384 %r62, 320
+%r64 = or i384 %r58, %r63
+%r65 = zext i384 %r64 to i448
+%r67 = getelementptr i64, i64* %r2, i32 6
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i448
+%r70 = shl i448 %r69, 384
+%r71 = or i448 %r65, %r70
+%r72 = zext i448 %r71 to i512
+%r74 = getelementptr i64, i64* %r2, i32 7
+%r75 = load i64, i64* %r74
+%r76 = zext i64 %r75 to i512
+%r77 = shl i512 %r76, 448
+%r78 = or i512 %r72, %r77
+%r79 = zext i512 %r78 to i576
+%r80 = trunc i576 %r79 to i64
+%r81 = mul i64 %r80, %r6
+%r82 = call i320 @mulPv256x64(i64* %r3, i64 %r81)
+%r83 = zext i320 %r82 to i576
+%r84 = add i576 %r79, %r83
+%r85 = lshr i576 %r84, 64
+%r86 = trunc i576 %r85 to i512
+%r87 = trunc i512 %r86 to i64
+%r88 = mul i64 %r87, %r6
+%r89 = call i320 @mulPv256x64(i64* %r3, i64 %r88)
+%r90 = zext i320 %r89 to i512
+%r91 = add i512 %r86, %r90
+%r92 = lshr i512 %r91, 64
+%r93 = trunc i512 %r92 to i448
+%r94 = trunc i448 %r93 to i64
+%r95 = mul i64 %r94, %r6
+%r96 = call i320 @mulPv256x64(i64* %r3, i64 %r95)
+%r97 = zext i320 %r96 to i448
+%r98 = add i448 %r93, %r97
+%r99 = lshr i448 %r98, 64
+%r100 = trunc i448 %r99 to i384
+%r101 = trunc i384 %r100 to i64
+%r102 = mul i64 %r101, %r6
+%r103 = call i320 @mulPv256x64(i64* %r3, i64 %r102)
+%r104 = zext i320 %r103 to i384
+%r105 = add i384 %r100, %r104
+%r106 = lshr i384 %r105, 64
+%r107 = trunc i384 %r106 to i320
+%r108 = zext i256 %r28 to i320
+%r109 = sub i320 %r107, %r108
+%r110 = lshr i320 %r109, 256
+%r111 = trunc i320 %r110 to i1
+%r112 = select i1 %r111, i320 %r107, i320 %r109
+%r113 = trunc i320 %r112 to i256
+%r114 = trunc i256 %r113 to i64
+%r116 = getelementptr i64, i64* %r1, i32 0
+store i64 %r114, i64* %r116
+%r117 = lshr i256 %r113, 64
+%r118 = trunc i256 %r117 to i64
+%r120 = getelementptr i64, i64* %r1, i32 1
+store i64 %r118, i64* %r120
+%r121 = lshr i256 %r117, 64
+%r122 = trunc i256 %r121 to i64
+%r124 = getelementptr i64, i64* %r1, i32 2
+store i64 %r122, i64* %r124
+%r125 = lshr i256 %r121, 64
+%r126 = trunc i256 %r125 to i64
+%r128 = getelementptr i64, i64* %r1, i32 3
+store i64 %r126, i64* %r128
+ret void
+}
+define i64 @mcl_fp_addPre4L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r28 = load i64, i64* %r4
+%r29 = zext i64 %r28 to i128
+%r31 = getelementptr i64, i64* %r4, i32 1
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i128
+%r34 = shl i128 %r33, 64
+%r35 = or i128 %r29, %r34
+%r36 = zext i128 %r35 to i192
+%r38 = getelementptr i64, i64* %r4, i32 2
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i192
+%r41 = shl i192 %r40, 128
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i256
+%r45 = getelementptr i64, i64* %r4, i32 3
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i256
+%r48 = shl i256 %r47, 192
+%r49 = or i256 %r43, %r48
+%r50 = zext i256 %r49 to i320
+%r51 = add i320 %r27, %r50
+%r52 = trunc i320 %r51 to i256
+%r53 = trunc i256 %r52 to i64
+%r55 = getelementptr i64, i64* %r2, i32 0
+store i64 %r53, i64* %r55
+%r56 = lshr i256 %r52, 64
+%r57 = trunc i256 %r56 to i64
+%r59 = getelementptr i64, i64* %r2, i32 1
+store i64 %r57, i64* %r59
+%r60 = lshr i256 %r56, 64
+%r61 = trunc i256 %r60 to i64
+%r63 = getelementptr i64, i64* %r2, i32 2
+store i64 %r61, i64* %r63
+%r64 = lshr i256 %r60, 64
+%r65 = trunc i256 %r64 to i64
+%r67 = getelementptr i64, i64* %r2, i32 3
+store i64 %r65, i64* %r67
+%r68 = lshr i320 %r51, 256
+%r69 = trunc i320 %r68 to i64
+ret i64 %r69
+}
+define i64 @mcl_fp_subPre4L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r28 = load i64, i64* %r4
+%r29 = zext i64 %r28 to i128
+%r31 = getelementptr i64, i64* %r4, i32 1
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i128
+%r34 = shl i128 %r33, 64
+%r35 = or i128 %r29, %r34
+%r36 = zext i128 %r35 to i192
+%r38 = getelementptr i64, i64* %r4, i32 2
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i192
+%r41 = shl i192 %r40, 128
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i256
+%r45 = getelementptr i64, i64* %r4, i32 3
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i256
+%r48 = shl i256 %r47, 192
+%r49 = or i256 %r43, %r48
+%r50 = zext i256 %r49 to i320
+%r51 = sub i320 %r27, %r50
+%r52 = trunc i320 %r51 to i256
+%r53 = trunc i256 %r52 to i64
+%r55 = getelementptr i64, i64* %r2, i32 0
+store i64 %r53, i64* %r55
+%r56 = lshr i256 %r52, 64
+%r57 = trunc i256 %r56 to i64
+%r59 = getelementptr i64, i64* %r2, i32 1
+store i64 %r57, i64* %r59
+%r60 = lshr i256 %r56, 64
+%r61 = trunc i256 %r60 to i64
+%r63 = getelementptr i64, i64* %r2, i32 2
+store i64 %r61, i64* %r63
+%r64 = lshr i256 %r60, 64
+%r65 = trunc i256 %r64 to i64
+%r67 = getelementptr i64, i64* %r2, i32 3
+store i64 %r65, i64* %r67
+%r68 = lshr i320 %r51, 256
+%r69 = trunc i320 %r68 to i64
+%r71 = and i64 %r69, 1
+ret i64 %r71
+}
+define void @mcl_fp_shr1_4L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = zext i64 %r3 to i128
+%r6 = getelementptr i64, i64* %r2, i32 1
+%r7 = load i64, i64* %r6
+%r8 = zext i64 %r7 to i128
+%r9 = shl i128 %r8, 64
+%r10 = or i128 %r4, %r9
+%r11 = zext i128 %r10 to i192
+%r13 = getelementptr i64, i64* %r2, i32 2
+%r14 = load i64, i64* %r13
+%r15 = zext i64 %r14 to i192
+%r16 = shl i192 %r15, 128
+%r17 = or i192 %r11, %r16
+%r18 = zext i192 %r17 to i256
+%r20 = getelementptr i64, i64* %r2, i32 3
+%r21 = load i64, i64* %r20
+%r22 = zext i64 %r21 to i256
+%r23 = shl i256 %r22, 192
+%r24 = or i256 %r18, %r23
+%r25 = lshr i256 %r24, 1
+%r26 = trunc i256 %r25 to i64
+%r28 = getelementptr i64, i64* %r1, i32 0
+store i64 %r26, i64* %r28
+%r29 = lshr i256 %r25, 64
+%r30 = trunc i256 %r29 to i64
+%r32 = getelementptr i64, i64* %r1, i32 1
+store i64 %r30, i64* %r32
+%r33 = lshr i256 %r29, 64
+%r34 = trunc i256 %r33 to i64
+%r36 = getelementptr i64, i64* %r1, i32 2
+store i64 %r34, i64* %r36
+%r37 = lshr i256 %r33, 64
+%r38 = trunc i256 %r37 to i64
+%r40 = getelementptr i64, i64* %r1, i32 3
+store i64 %r38, i64* %r40
+ret void
+}
+define void @mcl_fp_add4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = load i64, i64* %r3
+%r28 = zext i64 %r27 to i128
+%r30 = getelementptr i64, i64* %r3, i32 1
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i128
+%r33 = shl i128 %r32, 64
+%r34 = or i128 %r28, %r33
+%r35 = zext i128 %r34 to i192
+%r37 = getelementptr i64, i64* %r3, i32 2
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i192
+%r40 = shl i192 %r39, 128
+%r41 = or i192 %r35, %r40
+%r42 = zext i192 %r41 to i256
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i256
+%r47 = shl i256 %r46, 192
+%r48 = or i256 %r42, %r47
+%r49 = zext i256 %r26 to i320
+%r50 = zext i256 %r48 to i320
+%r51 = add i320 %r49, %r50
+%r52 = trunc i320 %r51 to i256
+%r53 = trunc i256 %r52 to i64
+%r55 = getelementptr i64, i64* %r1, i32 0
+store i64 %r53, i64* %r55
+%r56 = lshr i256 %r52, 64
+%r57 = trunc i256 %r56 to i64
+%r59 = getelementptr i64, i64* %r1, i32 1
+store i64 %r57, i64* %r59
+%r60 = lshr i256 %r56, 64
+%r61 = trunc i256 %r60 to i64
+%r63 = getelementptr i64, i64* %r1, i32 2
+store i64 %r61, i64* %r63
+%r64 = lshr i256 %r60, 64
+%r65 = trunc i256 %r64 to i64
+%r67 = getelementptr i64, i64* %r1, i32 3
+store i64 %r65, i64* %r67
+%r68 = load i64, i64* %r4
+%r69 = zext i64 %r68 to i128
+%r71 = getelementptr i64, i64* %r4, i32 1
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i128
+%r74 = shl i128 %r73, 64
+%r75 = or i128 %r69, %r74
+%r76 = zext i128 %r75 to i192
+%r78 = getelementptr i64, i64* %r4, i32 2
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i192
+%r81 = shl i192 %r80, 128
+%r82 = or i192 %r76, %r81
+%r83 = zext i192 %r82 to i256
+%r85 = getelementptr i64, i64* %r4, i32 3
+%r86 = load i64, i64* %r85
+%r87 = zext i64 %r86 to i256
+%r88 = shl i256 %r87, 192
+%r89 = or i256 %r83, %r88
+%r90 = zext i256 %r89 to i320
+%r91 = sub i320 %r51, %r90
+%r92 = lshr i320 %r91, 256
+%r93 = trunc i320 %r92 to i1
+br i1%r93, label %carry, label %nocarry
+nocarry:
+%r94 = trunc i320 %r91 to i256
+%r95 = trunc i256 %r94 to i64
+%r97 = getelementptr i64, i64* %r1, i32 0
+store i64 %r95, i64* %r97
+%r98 = lshr i256 %r94, 64
+%r99 = trunc i256 %r98 to i64
+%r101 = getelementptr i64, i64* %r1, i32 1
+store i64 %r99, i64* %r101
+%r102 = lshr i256 %r98, 64
+%r103 = trunc i256 %r102 to i64
+%r105 = getelementptr i64, i64* %r1, i32 2
+store i64 %r103, i64* %r105
+%r106 = lshr i256 %r102, 64
+%r107 = trunc i256 %r106 to i64
+%r109 = getelementptr i64, i64* %r1, i32 3
+store i64 %r107, i64* %r109
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = load i64, i64* %r3
+%r28 = zext i64 %r27 to i128
+%r30 = getelementptr i64, i64* %r3, i32 1
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i128
+%r33 = shl i128 %r32, 64
+%r34 = or i128 %r28, %r33
+%r35 = zext i128 %r34 to i192
+%r37 = getelementptr i64, i64* %r3, i32 2
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i192
+%r40 = shl i192 %r39, 128
+%r41 = or i192 %r35, %r40
+%r42 = zext i192 %r41 to i256
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i256
+%r47 = shl i256 %r46, 192
+%r48 = or i256 %r42, %r47
+%r49 = add i256 %r26, %r48
+%r50 = load i64, i64* %r4
+%r51 = zext i64 %r50 to i128
+%r53 = getelementptr i64, i64* %r4, i32 1
+%r54 = load i64, i64* %r53
+%r55 = zext i64 %r54 to i128
+%r56 = shl i128 %r55, 64
+%r57 = or i128 %r51, %r56
+%r58 = zext i128 %r57 to i192
+%r60 = getelementptr i64, i64* %r4, i32 2
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i192
+%r63 = shl i192 %r62, 128
+%r64 = or i192 %r58, %r63
+%r65 = zext i192 %r64 to i256
+%r67 = getelementptr i64, i64* %r4, i32 3
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i256
+%r70 = shl i256 %r69, 192
+%r71 = or i256 %r65, %r70
+%r72 = sub i256 %r49, %r71
+%r73 = lshr i256 %r72, 255
+%r74 = trunc i256 %r73 to i1
+%r75 = select i1 %r74, i256 %r49, i256 %r72
+%r76 = trunc i256 %r75 to i64
+%r78 = getelementptr i64, i64* %r1, i32 0
+store i64 %r76, i64* %r78
+%r79 = lshr i256 %r75, 64
+%r80 = trunc i256 %r79 to i64
+%r82 = getelementptr i64, i64* %r1, i32 1
+store i64 %r80, i64* %r82
+%r83 = lshr i256 %r79, 64
+%r84 = trunc i256 %r83 to i64
+%r86 = getelementptr i64, i64* %r1, i32 2
+store i64 %r84, i64* %r86
+%r87 = lshr i256 %r83, 64
+%r88 = trunc i256 %r87 to i64
+%r90 = getelementptr i64, i64* %r1, i32 3
+store i64 %r88, i64* %r90
+ret void
+}
+define void @mcl_fp_sub4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = load i64, i64* %r3
+%r28 = zext i64 %r27 to i128
+%r30 = getelementptr i64, i64* %r3, i32 1
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i128
+%r33 = shl i128 %r32, 64
+%r34 = or i128 %r28, %r33
+%r35 = zext i128 %r34 to i192
+%r37 = getelementptr i64, i64* %r3, i32 2
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i192
+%r40 = shl i192 %r39, 128
+%r41 = or i192 %r35, %r40
+%r42 = zext i192 %r41 to i256
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i256
+%r47 = shl i256 %r46, 192
+%r48 = or i256 %r42, %r47
+%r49 = zext i256 %r26 to i320
+%r50 = zext i256 %r48 to i320
+%r51 = sub i320 %r49, %r50
+%r52 = trunc i320 %r51 to i256
+%r53 = lshr i320 %r51, 256
+%r54 = trunc i320 %r53 to i1
+%r55 = trunc i256 %r52 to i64
+%r57 = getelementptr i64, i64* %r1, i32 0
+store i64 %r55, i64* %r57
+%r58 = lshr i256 %r52, 64
+%r59 = trunc i256 %r58 to i64
+%r61 = getelementptr i64, i64* %r1, i32 1
+store i64 %r59, i64* %r61
+%r62 = lshr i256 %r58, 64
+%r63 = trunc i256 %r62 to i64
+%r65 = getelementptr i64, i64* %r1, i32 2
+store i64 %r63, i64* %r65
+%r66 = lshr i256 %r62, 64
+%r67 = trunc i256 %r66 to i64
+%r69 = getelementptr i64, i64* %r1, i32 3
+store i64 %r67, i64* %r69
+br i1%r54, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r70 = load i64, i64* %r4
+%r71 = zext i64 %r70 to i128
+%r73 = getelementptr i64, i64* %r4, i32 1
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i128
+%r76 = shl i128 %r75, 64
+%r77 = or i128 %r71, %r76
+%r78 = zext i128 %r77 to i192
+%r80 = getelementptr i64, i64* %r4, i32 2
+%r81 = load i64, i64* %r80
+%r82 = zext i64 %r81 to i192
+%r83 = shl i192 %r82, 128
+%r84 = or i192 %r78, %r83
+%r85 = zext i192 %r84 to i256
+%r87 = getelementptr i64, i64* %r4, i32 3
+%r88 = load i64, i64* %r87
+%r89 = zext i64 %r88 to i256
+%r90 = shl i256 %r89, 192
+%r91 = or i256 %r85, %r90
+%r92 = add i256 %r52, %r91
+%r93 = trunc i256 %r92 to i64
+%r95 = getelementptr i64, i64* %r1, i32 0
+store i64 %r93, i64* %r95
+%r96 = lshr i256 %r92, 64
+%r97 = trunc i256 %r96 to i64
+%r99 = getelementptr i64, i64* %r1, i32 1
+store i64 %r97, i64* %r99
+%r100 = lshr i256 %r96, 64
+%r101 = trunc i256 %r100 to i64
+%r103 = getelementptr i64, i64* %r1, i32 2
+store i64 %r101, i64* %r103
+%r104 = lshr i256 %r100, 64
+%r105 = trunc i256 %r104 to i64
+%r107 = getelementptr i64, i64* %r1, i32 3
+store i64 %r105, i64* %r107
+ret void
+}
+define void @mcl_fp_subNF4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = load i64, i64* %r3
+%r28 = zext i64 %r27 to i128
+%r30 = getelementptr i64, i64* %r3, i32 1
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i128
+%r33 = shl i128 %r32, 64
+%r34 = or i128 %r28, %r33
+%r35 = zext i128 %r34 to i192
+%r37 = getelementptr i64, i64* %r3, i32 2
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i192
+%r40 = shl i192 %r39, 128
+%r41 = or i192 %r35, %r40
+%r42 = zext i192 %r41 to i256
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i256
+%r47 = shl i256 %r46, 192
+%r48 = or i256 %r42, %r47
+%r49 = sub i256 %r26, %r48
+%r50 = lshr i256 %r49, 255
+%r51 = trunc i256 %r50 to i1
+%r52 = load i64, i64* %r4
+%r53 = zext i64 %r52 to i128
+%r55 = getelementptr i64, i64* %r4, i32 1
+%r56 = load i64, i64* %r55
+%r57 = zext i64 %r56 to i128
+%r58 = shl i128 %r57, 64
+%r59 = or i128 %r53, %r58
+%r60 = zext i128 %r59 to i192
+%r62 = getelementptr i64, i64* %r4, i32 2
+%r63 = load i64, i64* %r62
+%r64 = zext i64 %r63 to i192
+%r65 = shl i192 %r64, 128
+%r66 = or i192 %r60, %r65
+%r67 = zext i192 %r66 to i256
+%r69 = getelementptr i64, i64* %r4, i32 3
+%r70 = load i64, i64* %r69
+%r71 = zext i64 %r70 to i256
+%r72 = shl i256 %r71, 192
+%r73 = or i256 %r67, %r72
+%r75 = select i1 %r51, i256 %r73, i256 0
+%r76 = add i256 %r49, %r75
+%r77 = trunc i256 %r76 to i64
+%r79 = getelementptr i64, i64* %r1, i32 0
+store i64 %r77, i64* %r79
+%r80 = lshr i256 %r76, 64
+%r81 = trunc i256 %r80 to i64
+%r83 = getelementptr i64, i64* %r1, i32 1
+store i64 %r81, i64* %r83
+%r84 = lshr i256 %r80, 64
+%r85 = trunc i256 %r84 to i64
+%r87 = getelementptr i64, i64* %r1, i32 2
+store i64 %r85, i64* %r87
+%r88 = lshr i256 %r84, 64
+%r89 = trunc i256 %r88 to i64
+%r91 = getelementptr i64, i64* %r1, i32 3
+store i64 %r89, i64* %r91
+ret void
+}
+define void @mcl_fpDbl_add4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = zext i512 %r54 to i576
+%r106 = zext i512 %r104 to i576
+%r107 = add i576 %r105, %r106
+%r108 = trunc i576 %r107 to i256
+%r109 = trunc i256 %r108 to i64
+%r111 = getelementptr i64, i64* %r1, i32 0
+store i64 %r109, i64* %r111
+%r112 = lshr i256 %r108, 64
+%r113 = trunc i256 %r112 to i64
+%r115 = getelementptr i64, i64* %r1, i32 1
+store i64 %r113, i64* %r115
+%r116 = lshr i256 %r112, 64
+%r117 = trunc i256 %r116 to i64
+%r119 = getelementptr i64, i64* %r1, i32 2
+store i64 %r117, i64* %r119
+%r120 = lshr i256 %r116, 64
+%r121 = trunc i256 %r120 to i64
+%r123 = getelementptr i64, i64* %r1, i32 3
+store i64 %r121, i64* %r123
+%r124 = lshr i576 %r107, 256
+%r125 = trunc i576 %r124 to i320
+%r126 = load i64, i64* %r4
+%r127 = zext i64 %r126 to i128
+%r129 = getelementptr i64, i64* %r4, i32 1
+%r130 = load i64, i64* %r129
+%r131 = zext i64 %r130 to i128
+%r132 = shl i128 %r131, 64
+%r133 = or i128 %r127, %r132
+%r134 = zext i128 %r133 to i192
+%r136 = getelementptr i64, i64* %r4, i32 2
+%r137 = load i64, i64* %r136
+%r138 = zext i64 %r137 to i192
+%r139 = shl i192 %r138, 128
+%r140 = or i192 %r134, %r139
+%r141 = zext i192 %r140 to i256
+%r143 = getelementptr i64, i64* %r4, i32 3
+%r144 = load i64, i64* %r143
+%r145 = zext i64 %r144 to i256
+%r146 = shl i256 %r145, 192
+%r147 = or i256 %r141, %r146
+%r148 = zext i256 %r147 to i320
+%r149 = sub i320 %r125, %r148
+%r150 = lshr i320 %r149, 256
+%r151 = trunc i320 %r150 to i1
+%r152 = select i1 %r151, i320 %r125, i320 %r149
+%r153 = trunc i320 %r152 to i256
+%r155 = getelementptr i64, i64* %r1, i32 4
+%r156 = trunc i256 %r153 to i64
+%r158 = getelementptr i64, i64* %r155, i32 0
+store i64 %r156, i64* %r158
+%r159 = lshr i256 %r153, 64
+%r160 = trunc i256 %r159 to i64
+%r162 = getelementptr i64, i64* %r155, i32 1
+store i64 %r160, i64* %r162
+%r163 = lshr i256 %r159, 64
+%r164 = trunc i256 %r163 to i64
+%r166 = getelementptr i64, i64* %r155, i32 2
+store i64 %r164, i64* %r166
+%r167 = lshr i256 %r163, 64
+%r168 = trunc i256 %r167 to i64
+%r170 = getelementptr i64, i64* %r155, i32 3
+store i64 %r168, i64* %r170
+ret void
+}
+define void @mcl_fpDbl_sub4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = zext i512 %r54 to i576
+%r106 = zext i512 %r104 to i576
+%r107 = sub i576 %r105, %r106
+%r108 = trunc i576 %r107 to i256
+%r109 = trunc i256 %r108 to i64
+%r111 = getelementptr i64, i64* %r1, i32 0
+store i64 %r109, i64* %r111
+%r112 = lshr i256 %r108, 64
+%r113 = trunc i256 %r112 to i64
+%r115 = getelementptr i64, i64* %r1, i32 1
+store i64 %r113, i64* %r115
+%r116 = lshr i256 %r112, 64
+%r117 = trunc i256 %r116 to i64
+%r119 = getelementptr i64, i64* %r1, i32 2
+store i64 %r117, i64* %r119
+%r120 = lshr i256 %r116, 64
+%r121 = trunc i256 %r120 to i64
+%r123 = getelementptr i64, i64* %r1, i32 3
+store i64 %r121, i64* %r123
+%r124 = lshr i576 %r107, 256
+%r125 = trunc i576 %r124 to i256
+%r126 = lshr i576 %r107, 512
+%r127 = trunc i576 %r126 to i1
+%r128 = load i64, i64* %r4
+%r129 = zext i64 %r128 to i128
+%r131 = getelementptr i64, i64* %r4, i32 1
+%r132 = load i64, i64* %r131
+%r133 = zext i64 %r132 to i128
+%r134 = shl i128 %r133, 64
+%r135 = or i128 %r129, %r134
+%r136 = zext i128 %r135 to i192
+%r138 = getelementptr i64, i64* %r4, i32 2
+%r139 = load i64, i64* %r138
+%r140 = zext i64 %r139 to i192
+%r141 = shl i192 %r140, 128
+%r142 = or i192 %r136, %r141
+%r143 = zext i192 %r142 to i256
+%r145 = getelementptr i64, i64* %r4, i32 3
+%r146 = load i64, i64* %r145
+%r147 = zext i64 %r146 to i256
+%r148 = shl i256 %r147, 192
+%r149 = or i256 %r143, %r148
+%r151 = select i1 %r127, i256 %r149, i256 0
+%r152 = add i256 %r125, %r151
+%r154 = getelementptr i64, i64* %r1, i32 4
+%r155 = trunc i256 %r152 to i64
+%r157 = getelementptr i64, i64* %r154, i32 0
+store i64 %r155, i64* %r157
+%r158 = lshr i256 %r152, 64
+%r159 = trunc i256 %r158 to i64
+%r161 = getelementptr i64, i64* %r154, i32 1
+store i64 %r159, i64* %r161
+%r162 = lshr i256 %r158, 64
+%r163 = trunc i256 %r162 to i64
+%r165 = getelementptr i64, i64* %r154, i32 2
+store i64 %r163, i64* %r165
+%r166 = lshr i256 %r162, 64
+%r167 = trunc i256 %r166 to i64
+%r169 = getelementptr i64, i64* %r154, i32 3
+store i64 %r167, i64* %r169
+ret void
+}
+define i384 @mulPv320x64(i64* noalias  %r2, i64 %r3)
+{
+%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
+%r6 = trunc i128 %r5 to i64
+%r7 = call i64 @extractHigh64(i128 %r5)
+%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
+%r10 = trunc i128 %r9 to i64
+%r11 = call i64 @extractHigh64(i128 %r9)
+%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
+%r14 = trunc i128 %r13 to i64
+%r15 = call i64 @extractHigh64(i128 %r13)
+%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
+%r18 = trunc i128 %r17 to i64
+%r19 = call i64 @extractHigh64(i128 %r17)
+%r21 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 4)
+%r22 = trunc i128 %r21 to i64
+%r23 = call i64 @extractHigh64(i128 %r21)
+%r24 = zext i64 %r6 to i128
+%r25 = zext i64 %r10 to i128
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r24, %r26
+%r28 = zext i128 %r27 to i192
+%r29 = zext i64 %r14 to i192
+%r30 = shl i192 %r29, 128
+%r31 = or i192 %r28, %r30
+%r32 = zext i192 %r31 to i256
+%r33 = zext i64 %r18 to i256
+%r34 = shl i256 %r33, 192
+%r35 = or i256 %r32, %r34
+%r36 = zext i256 %r35 to i320
+%r37 = zext i64 %r22 to i320
+%r38 = shl i320 %r37, 256
+%r39 = or i320 %r36, %r38
+%r40 = zext i64 %r7 to i128
+%r41 = zext i64 %r11 to i128
+%r42 = shl i128 %r41, 64
+%r43 = or i128 %r40, %r42
+%r44 = zext i128 %r43 to i192
+%r45 = zext i64 %r15 to i192
+%r46 = shl i192 %r45, 128
+%r47 = or i192 %r44, %r46
+%r48 = zext i192 %r47 to i256
+%r49 = zext i64 %r19 to i256
+%r50 = shl i256 %r49, 192
+%r51 = or i256 %r48, %r50
+%r52 = zext i256 %r51 to i320
+%r53 = zext i64 %r23 to i320
+%r54 = shl i320 %r53, 256
+%r55 = or i320 %r52, %r54
+%r56 = zext i320 %r39 to i384
+%r57 = zext i320 %r55 to i384
+%r58 = shl i384 %r57, 64
+%r59 = add i384 %r56, %r58
+ret i384 %r59
+}
+define void @mcl_fp_mulUnitPre5L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+{
+%r4 = call i384 @mulPv320x64(i64* %r2, i64 %r3)
+%r5 = trunc i384 %r4 to i64
+%r7 = getelementptr i64, i64* %r1, i32 0
+store i64 %r5, i64* %r7
+%r8 = lshr i384 %r4, 64
+%r9 = trunc i384 %r8 to i64
+%r11 = getelementptr i64, i64* %r1, i32 1
+store i64 %r9, i64* %r11
+%r12 = lshr i384 %r8, 64
+%r13 = trunc i384 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 2
+store i64 %r13, i64* %r15
+%r16 = lshr i384 %r12, 64
+%r17 = trunc i384 %r16 to i64
+%r19 = getelementptr i64, i64* %r1, i32 3
+store i64 %r17, i64* %r19
+%r20 = lshr i384 %r16, 64
+%r21 = trunc i384 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 4
+store i64 %r21, i64* %r23
+%r24 = lshr i384 %r20, 64
+%r25 = trunc i384 %r24 to i64
+%r27 = getelementptr i64, i64* %r1, i32 5
+store i64 %r25, i64* %r27
+ret void
+}
+define void @mcl_fpDbl_mulPre5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r4 = load i64, i64* %r3
+%r5 = call i384 @mulPv320x64(i64* %r2, i64 %r4)
+%r6 = trunc i384 %r5 to i64
+store i64 %r6, i64* %r1
+%r7 = lshr i384 %r5, 64
+%r9 = getelementptr i64, i64* %r3, i32 1
+%r10 = load i64, i64* %r9
+%r11 = call i384 @mulPv320x64(i64* %r2, i64 %r10)
+%r12 = add i384 %r7, %r11
+%r13 = trunc i384 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 1
+store i64 %r13, i64* %r15
+%r16 = lshr i384 %r12, 64
+%r18 = getelementptr i64, i64* %r3, i32 2
+%r19 = load i64, i64* %r18
+%r20 = call i384 @mulPv320x64(i64* %r2, i64 %r19)
+%r21 = add i384 %r16, %r20
+%r22 = trunc i384 %r21 to i64
+%r24 = getelementptr i64, i64* %r1, i32 2
+store i64 %r22, i64* %r24
+%r25 = lshr i384 %r21, 64
+%r27 = getelementptr i64, i64* %r3, i32 3
+%r28 = load i64, i64* %r27
+%r29 = call i384 @mulPv320x64(i64* %r2, i64 %r28)
+%r30 = add i384 %r25, %r29
+%r31 = trunc i384 %r30 to i64
+%r33 = getelementptr i64, i64* %r1, i32 3
+store i64 %r31, i64* %r33
+%r34 = lshr i384 %r30, 64
+%r36 = getelementptr i64, i64* %r3, i32 4
+%r37 = load i64, i64* %r36
+%r38 = call i384 @mulPv320x64(i64* %r2, i64 %r37)
+%r39 = add i384 %r34, %r38
+%r41 = getelementptr i64, i64* %r1, i32 4
+%r42 = trunc i384 %r39 to i64
+%r44 = getelementptr i64, i64* %r41, i32 0
+store i64 %r42, i64* %r44
+%r45 = lshr i384 %r39, 64
+%r46 = trunc i384 %r45 to i64
+%r48 = getelementptr i64, i64* %r41, i32 1
+store i64 %r46, i64* %r48
+%r49 = lshr i384 %r45, 64
+%r50 = trunc i384 %r49 to i64
+%r52 = getelementptr i64, i64* %r41, i32 2
+store i64 %r50, i64* %r52
+%r53 = lshr i384 %r49, 64
+%r54 = trunc i384 %r53 to i64
+%r56 = getelementptr i64, i64* %r41, i32 3
+store i64 %r54, i64* %r56
+%r57 = lshr i384 %r53, 64
+%r58 = trunc i384 %r57 to i64
+%r60 = getelementptr i64, i64* %r41, i32 4
+store i64 %r58, i64* %r60
+%r61 = lshr i384 %r57, 64
+%r62 = trunc i384 %r61 to i64
+%r64 = getelementptr i64, i64* %r41, i32 5
+store i64 %r62, i64* %r64
+ret void
+}
+define void @mcl_fpDbl_sqrPre5L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = call i384 @mulPv320x64(i64* %r2, i64 %r3)
+%r5 = trunc i384 %r4 to i64
+store i64 %r5, i64* %r1
+%r6 = lshr i384 %r4, 64
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = call i384 @mulPv320x64(i64* %r2, i64 %r9)
+%r11 = add i384 %r6, %r10
+%r12 = trunc i384 %r11 to i64
+%r14 = getelementptr i64, i64* %r1, i32 1
+store i64 %r12, i64* %r14
+%r15 = lshr i384 %r11, 64
+%r17 = getelementptr i64, i64* %r2, i32 2
+%r18 = load i64, i64* %r17
+%r19 = call i384 @mulPv320x64(i64* %r2, i64 %r18)
+%r20 = add i384 %r15, %r19
+%r21 = trunc i384 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 2
+store i64 %r21, i64* %r23
+%r24 = lshr i384 %r20, 64
+%r26 = getelementptr i64, i64* %r2, i32 3
+%r27 = load i64, i64* %r26
+%r28 = call i384 @mulPv320x64(i64* %r2, i64 %r27)
+%r29 = add i384 %r24, %r28
+%r30 = trunc i384 %r29 to i64
+%r32 = getelementptr i64, i64* %r1, i32 3
+store i64 %r30, i64* %r32
+%r33 = lshr i384 %r29, 64
+%r35 = getelementptr i64, i64* %r2, i32 4
+%r36 = load i64, i64* %r35
+%r37 = call i384 @mulPv320x64(i64* %r2, i64 %r36)
+%r38 = add i384 %r33, %r37
+%r40 = getelementptr i64, i64* %r1, i32 4
+%r41 = trunc i384 %r38 to i64
+%r43 = getelementptr i64, i64* %r40, i32 0
+store i64 %r41, i64* %r43
+%r44 = lshr i384 %r38, 64
+%r45 = trunc i384 %r44 to i64
+%r47 = getelementptr i64, i64* %r40, i32 1
+store i64 %r45, i64* %r47
+%r48 = lshr i384 %r44, 64
+%r49 = trunc i384 %r48 to i64
+%r51 = getelementptr i64, i64* %r40, i32 2
+store i64 %r49, i64* %r51
+%r52 = lshr i384 %r48, 64
+%r53 = trunc i384 %r52 to i64
+%r55 = getelementptr i64, i64* %r40, i32 3
+store i64 %r53, i64* %r55
+%r56 = lshr i384 %r52, 64
+%r57 = trunc i384 %r56 to i64
+%r59 = getelementptr i64, i64* %r40, i32 4
+store i64 %r57, i64* %r59
+%r60 = lshr i384 %r56, 64
+%r61 = trunc i384 %r60 to i64
+%r63 = getelementptr i64, i64* %r40, i32 5
+store i64 %r61, i64* %r63
+ret void
+}
+define void @mcl_fp_mont5L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r9 = getelementptr i64, i64* %r3, i32 0
+%r10 = load i64, i64* %r9
+%r11 = call i384 @mulPv320x64(i64* %r2, i64 %r10)
+%r12 = zext i384 %r11 to i448
+%r13 = trunc i384 %r11 to i64
+%r14 = mul i64 %r13, %r7
+%r15 = call i384 @mulPv320x64(i64* %r4, i64 %r14)
+%r16 = zext i384 %r15 to i448
+%r17 = add i448 %r12, %r16
+%r18 = lshr i448 %r17, 64
+%r20 = getelementptr i64, i64* %r3, i32 1
+%r21 = load i64, i64* %r20
+%r22 = call i384 @mulPv320x64(i64* %r2, i64 %r21)
+%r23 = zext i384 %r22 to i448
+%r24 = add i448 %r18, %r23
+%r25 = trunc i448 %r24 to i64
+%r26 = mul i64 %r25, %r7
+%r27 = call i384 @mulPv320x64(i64* %r4, i64 %r26)
+%r28 = zext i384 %r27 to i448
+%r29 = add i448 %r24, %r28
+%r30 = lshr i448 %r29, 64
+%r32 = getelementptr i64, i64* %r3, i32 2
+%r33 = load i64, i64* %r32
+%r34 = call i384 @mulPv320x64(i64* %r2, i64 %r33)
+%r35 = zext i384 %r34 to i448
+%r36 = add i448 %r30, %r35
+%r37 = trunc i448 %r36 to i64
+%r38 = mul i64 %r37, %r7
+%r39 = call i384 @mulPv320x64(i64* %r4, i64 %r38)
+%r40 = zext i384 %r39 to i448
+%r41 = add i448 %r36, %r40
+%r42 = lshr i448 %r41, 64
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = call i384 @mulPv320x64(i64* %r2, i64 %r45)
+%r47 = zext i384 %r46 to i448
+%r48 = add i448 %r42, %r47
+%r49 = trunc i448 %r48 to i64
+%r50 = mul i64 %r49, %r7
+%r51 = call i384 @mulPv320x64(i64* %r4, i64 %r50)
+%r52 = zext i384 %r51 to i448
+%r53 = add i448 %r48, %r52
+%r54 = lshr i448 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 4
+%r57 = load i64, i64* %r56
+%r58 = call i384 @mulPv320x64(i64* %r2, i64 %r57)
+%r59 = zext i384 %r58 to i448
+%r60 = add i448 %r54, %r59
+%r61 = trunc i448 %r60 to i64
+%r62 = mul i64 %r61, %r7
+%r63 = call i384 @mulPv320x64(i64* %r4, i64 %r62)
+%r64 = zext i384 %r63 to i448
+%r65 = add i448 %r60, %r64
+%r66 = lshr i448 %r65, 64
+%r67 = trunc i448 %r66 to i384
+%r68 = load i64, i64* %r4
+%r69 = zext i64 %r68 to i128
+%r71 = getelementptr i64, i64* %r4, i32 1
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i128
+%r74 = shl i128 %r73, 64
+%r75 = or i128 %r69, %r74
+%r76 = zext i128 %r75 to i192
+%r78 = getelementptr i64, i64* %r4, i32 2
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i192
+%r81 = shl i192 %r80, 128
+%r82 = or i192 %r76, %r81
+%r83 = zext i192 %r82 to i256
+%r85 = getelementptr i64, i64* %r4, i32 3
+%r86 = load i64, i64* %r85
+%r87 = zext i64 %r86 to i256
+%r88 = shl i256 %r87, 192
+%r89 = or i256 %r83, %r88
+%r90 = zext i256 %r89 to i320
+%r92 = getelementptr i64, i64* %r4, i32 4
+%r93 = load i64, i64* %r92
+%r94 = zext i64 %r93 to i320
+%r95 = shl i320 %r94, 256
+%r96 = or i320 %r90, %r95
+%r97 = zext i320 %r96 to i384
+%r98 = sub i384 %r67, %r97
+%r99 = lshr i384 %r98, 320
+%r100 = trunc i384 %r99 to i1
+%r101 = select i1 %r100, i384 %r67, i384 %r98
+%r102 = trunc i384 %r101 to i320
+%r103 = trunc i320 %r102 to i64
+%r105 = getelementptr i64, i64* %r1, i32 0
+store i64 %r103, i64* %r105
+%r106 = lshr i320 %r102, 64
+%r107 = trunc i320 %r106 to i64
+%r109 = getelementptr i64, i64* %r1, i32 1
+store i64 %r107, i64* %r109
+%r110 = lshr i320 %r106, 64
+%r111 = trunc i320 %r110 to i64
+%r113 = getelementptr i64, i64* %r1, i32 2
+store i64 %r111, i64* %r113
+%r114 = lshr i320 %r110, 64
+%r115 = trunc i320 %r114 to i64
+%r117 = getelementptr i64, i64* %r1, i32 3
+store i64 %r115, i64* %r117
+%r118 = lshr i320 %r114, 64
+%r119 = trunc i320 %r118 to i64
+%r121 = getelementptr i64, i64* %r1, i32 4
+store i64 %r119, i64* %r121
+ret void
+}
+define void @mcl_fp_montNF5L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r8 = load i64, i64* %r3
+%r9 = call i384 @mulPv320x64(i64* %r2, i64 %r8)
+%r10 = trunc i384 %r9 to i64
+%r11 = mul i64 %r10, %r7
+%r12 = call i384 @mulPv320x64(i64* %r4, i64 %r11)
+%r13 = add i384 %r9, %r12
+%r14 = lshr i384 %r13, 64
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = call i384 @mulPv320x64(i64* %r2, i64 %r17)
+%r19 = add i384 %r14, %r18
+%r20 = trunc i384 %r19 to i64
+%r21 = mul i64 %r20, %r7
+%r22 = call i384 @mulPv320x64(i64* %r4, i64 %r21)
+%r23 = add i384 %r19, %r22
+%r24 = lshr i384 %r23, 64
+%r26 = getelementptr i64, i64* %r3, i32 2
+%r27 = load i64, i64* %r26
+%r28 = call i384 @mulPv320x64(i64* %r2, i64 %r27)
+%r29 = add i384 %r24, %r28
+%r30 = trunc i384 %r29 to i64
+%r31 = mul i64 %r30, %r7
+%r32 = call i384 @mulPv320x64(i64* %r4, i64 %r31)
+%r33 = add i384 %r29, %r32
+%r34 = lshr i384 %r33, 64
+%r36 = getelementptr i64, i64* %r3, i32 3
+%r37 = load i64, i64* %r36
+%r38 = call i384 @mulPv320x64(i64* %r2, i64 %r37)
+%r39 = add i384 %r34, %r38
+%r40 = trunc i384 %r39 to i64
+%r41 = mul i64 %r40, %r7
+%r42 = call i384 @mulPv320x64(i64* %r4, i64 %r41)
+%r43 = add i384 %r39, %r42
+%r44 = lshr i384 %r43, 64
+%r46 = getelementptr i64, i64* %r3, i32 4
+%r47 = load i64, i64* %r46
+%r48 = call i384 @mulPv320x64(i64* %r2, i64 %r47)
+%r49 = add i384 %r44, %r48
+%r50 = trunc i384 %r49 to i64
+%r51 = mul i64 %r50, %r7
+%r52 = call i384 @mulPv320x64(i64* %r4, i64 %r51)
+%r53 = add i384 %r49, %r52
+%r54 = lshr i384 %r53, 64
+%r55 = trunc i384 %r54 to i320
+%r56 = load i64, i64* %r4
+%r57 = zext i64 %r56 to i128
+%r59 = getelementptr i64, i64* %r4, i32 1
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i128
+%r62 = shl i128 %r61, 64
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i192
+%r66 = getelementptr i64, i64* %r4, i32 2
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i192
+%r69 = shl i192 %r68, 128
+%r70 = or i192 %r64, %r69
+%r71 = zext i192 %r70 to i256
+%r73 = getelementptr i64, i64* %r4, i32 3
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i256
+%r76 = shl i256 %r75, 192
+%r77 = or i256 %r71, %r76
+%r78 = zext i256 %r77 to i320
+%r80 = getelementptr i64, i64* %r4, i32 4
+%r81 = load i64, i64* %r80
+%r82 = zext i64 %r81 to i320
+%r83 = shl i320 %r82, 256
+%r84 = or i320 %r78, %r83
+%r85 = sub i320 %r55, %r84
+%r86 = lshr i320 %r85, 319
+%r87 = trunc i320 %r86 to i1
+%r88 = select i1 %r87, i320 %r55, i320 %r85
+%r89 = trunc i320 %r88 to i64
+%r91 = getelementptr i64, i64* %r1, i32 0
+store i64 %r89, i64* %r91
+%r92 = lshr i320 %r88, 64
+%r93 = trunc i320 %r92 to i64
+%r95 = getelementptr i64, i64* %r1, i32 1
+store i64 %r93, i64* %r95
+%r96 = lshr i320 %r92, 64
+%r97 = trunc i320 %r96 to i64
+%r99 = getelementptr i64, i64* %r1, i32 2
+store i64 %r97, i64* %r99
+%r100 = lshr i320 %r96, 64
+%r101 = trunc i320 %r100 to i64
+%r103 = getelementptr i64, i64* %r1, i32 3
+store i64 %r101, i64* %r103
+%r104 = lshr i320 %r100, 64
+%r105 = trunc i320 %r104 to i64
+%r107 = getelementptr i64, i64* %r1, i32 4
+store i64 %r105, i64* %r107
+ret void
+}
+define void @mcl_fp_montRed5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = zext i192 %r21 to i256
+%r24 = getelementptr i64, i64* %r3, i32 3
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i256
+%r27 = shl i256 %r26, 192
+%r28 = or i256 %r22, %r27
+%r29 = zext i256 %r28 to i320
+%r31 = getelementptr i64, i64* %r3, i32 4
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i320
+%r34 = shl i320 %r33, 256
+%r35 = or i320 %r29, %r34
+%r36 = load i64, i64* %r2
+%r37 = zext i64 %r36 to i128
+%r39 = getelementptr i64, i64* %r2, i32 1
+%r40 = load i64, i64* %r39
+%r41 = zext i64 %r40 to i128
+%r42 = shl i128 %r41, 64
+%r43 = or i128 %r37, %r42
+%r44 = zext i128 %r43 to i192
+%r46 = getelementptr i64, i64* %r2, i32 2
+%r47 = load i64, i64* %r46
+%r48 = zext i64 %r47 to i192
+%r49 = shl i192 %r48, 128
+%r50 = or i192 %r44, %r49
+%r51 = zext i192 %r50 to i256
+%r53 = getelementptr i64, i64* %r2, i32 3
+%r54 = load i64, i64* %r53
+%r55 = zext i64 %r54 to i256
+%r56 = shl i256 %r55, 192
+%r57 = or i256 %r51, %r56
+%r58 = zext i256 %r57 to i320
+%r60 = getelementptr i64, i64* %r2, i32 4
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i320
+%r63 = shl i320 %r62, 256
+%r64 = or i320 %r58, %r63
+%r65 = zext i320 %r64 to i384
+%r67 = getelementptr i64, i64* %r2, i32 5
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i384
+%r70 = shl i384 %r69, 320
+%r71 = or i384 %r65, %r70
+%r72 = zext i384 %r71 to i448
+%r74 = getelementptr i64, i64* %r2, i32 6
+%r75 = load i64, i64* %r74
+%r76 = zext i64 %r75 to i448
+%r77 = shl i448 %r76, 384
+%r78 = or i448 %r72, %r77
+%r79 = zext i448 %r78 to i512
+%r81 = getelementptr i64, i64* %r2, i32 7
+%r82 = load i64, i64* %r81
+%r83 = zext i64 %r82 to i512
+%r84 = shl i512 %r83, 448
+%r85 = or i512 %r79, %r84
+%r86 = zext i512 %r85 to i576
+%r88 = getelementptr i64, i64* %r2, i32 8
+%r89 = load i64, i64* %r88
+%r90 = zext i64 %r89 to i576
+%r91 = shl i576 %r90, 512
+%r92 = or i576 %r86, %r91
+%r93 = zext i576 %r92 to i640
+%r95 = getelementptr i64, i64* %r2, i32 9
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i640
+%r98 = shl i640 %r97, 576
+%r99 = or i640 %r93, %r98
+%r100 = zext i640 %r99 to i704
+%r101 = trunc i704 %r100 to i64
+%r102 = mul i64 %r101, %r6
+%r103 = call i384 @mulPv320x64(i64* %r3, i64 %r102)
+%r104 = zext i384 %r103 to i704
+%r105 = add i704 %r100, %r104
+%r106 = lshr i704 %r105, 64
+%r107 = trunc i704 %r106 to i640
+%r108 = trunc i640 %r107 to i64
+%r109 = mul i64 %r108, %r6
+%r110 = call i384 @mulPv320x64(i64* %r3, i64 %r109)
+%r111 = zext i384 %r110 to i640
+%r112 = add i640 %r107, %r111
+%r113 = lshr i640 %r112, 64
+%r114 = trunc i640 %r113 to i576
+%r115 = trunc i576 %r114 to i64
+%r116 = mul i64 %r115, %r6
+%r117 = call i384 @mulPv320x64(i64* %r3, i64 %r116)
+%r118 = zext i384 %r117 to i576
+%r119 = add i576 %r114, %r118
+%r120 = lshr i576 %r119, 64
+%r121 = trunc i576 %r120 to i512
+%r122 = trunc i512 %r121 to i64
+%r123 = mul i64 %r122, %r6
+%r124 = call i384 @mulPv320x64(i64* %r3, i64 %r123)
+%r125 = zext i384 %r124 to i512
+%r126 = add i512 %r121, %r125
+%r127 = lshr i512 %r126, 64
+%r128 = trunc i512 %r127 to i448
+%r129 = trunc i448 %r128 to i64
+%r130 = mul i64 %r129, %r6
+%r131 = call i384 @mulPv320x64(i64* %r3, i64 %r130)
+%r132 = zext i384 %r131 to i448
+%r133 = add i448 %r128, %r132
+%r134 = lshr i448 %r133, 64
+%r135 = trunc i448 %r134 to i384
+%r136 = zext i320 %r35 to i384
+%r137 = sub i384 %r135, %r136
+%r138 = lshr i384 %r137, 320
+%r139 = trunc i384 %r138 to i1
+%r140 = select i1 %r139, i384 %r135, i384 %r137
+%r141 = trunc i384 %r140 to i320
+%r142 = trunc i320 %r141 to i64
+%r144 = getelementptr i64, i64* %r1, i32 0
+store i64 %r142, i64* %r144
+%r145 = lshr i320 %r141, 64
+%r146 = trunc i320 %r145 to i64
+%r148 = getelementptr i64, i64* %r1, i32 1
+store i64 %r146, i64* %r148
+%r149 = lshr i320 %r145, 64
+%r150 = trunc i320 %r149 to i64
+%r152 = getelementptr i64, i64* %r1, i32 2
+store i64 %r150, i64* %r152
+%r153 = lshr i320 %r149, 64
+%r154 = trunc i320 %r153 to i64
+%r156 = getelementptr i64, i64* %r1, i32 3
+store i64 %r154, i64* %r156
+%r157 = lshr i320 %r153, 64
+%r158 = trunc i320 %r157 to i64
+%r160 = getelementptr i64, i64* %r1, i32 4
+store i64 %r158, i64* %r160
+ret void
+}
+define i64 @mcl_fp_addPre5L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r35 = load i64, i64* %r4
+%r36 = zext i64 %r35 to i128
+%r38 = getelementptr i64, i64* %r4, i32 1
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i128
+%r41 = shl i128 %r40, 64
+%r42 = or i128 %r36, %r41
+%r43 = zext i128 %r42 to i192
+%r45 = getelementptr i64, i64* %r4, i32 2
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i192
+%r48 = shl i192 %r47, 128
+%r49 = or i192 %r43, %r48
+%r50 = zext i192 %r49 to i256
+%r52 = getelementptr i64, i64* %r4, i32 3
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i256
+%r55 = shl i256 %r54, 192
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i320
+%r59 = getelementptr i64, i64* %r4, i32 4
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i320
+%r62 = shl i320 %r61, 256
+%r63 = or i320 %r57, %r62
+%r64 = zext i320 %r63 to i384
+%r65 = add i384 %r34, %r64
+%r66 = trunc i384 %r65 to i320
+%r67 = trunc i320 %r66 to i64
+%r69 = getelementptr i64, i64* %r2, i32 0
+store i64 %r67, i64* %r69
+%r70 = lshr i320 %r66, 64
+%r71 = trunc i320 %r70 to i64
+%r73 = getelementptr i64, i64* %r2, i32 1
+store i64 %r71, i64* %r73
+%r74 = lshr i320 %r70, 64
+%r75 = trunc i320 %r74 to i64
+%r77 = getelementptr i64, i64* %r2, i32 2
+store i64 %r75, i64* %r77
+%r78 = lshr i320 %r74, 64
+%r79 = trunc i320 %r78 to i64
+%r81 = getelementptr i64, i64* %r2, i32 3
+store i64 %r79, i64* %r81
+%r82 = lshr i320 %r78, 64
+%r83 = trunc i320 %r82 to i64
+%r85 = getelementptr i64, i64* %r2, i32 4
+store i64 %r83, i64* %r85
+%r86 = lshr i384 %r65, 320
+%r87 = trunc i384 %r86 to i64
+ret i64 %r87
+}
+define i64 @mcl_fp_subPre5L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r35 = load i64, i64* %r4
+%r36 = zext i64 %r35 to i128
+%r38 = getelementptr i64, i64* %r4, i32 1
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i128
+%r41 = shl i128 %r40, 64
+%r42 = or i128 %r36, %r41
+%r43 = zext i128 %r42 to i192
+%r45 = getelementptr i64, i64* %r4, i32 2
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i192
+%r48 = shl i192 %r47, 128
+%r49 = or i192 %r43, %r48
+%r50 = zext i192 %r49 to i256
+%r52 = getelementptr i64, i64* %r4, i32 3
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i256
+%r55 = shl i256 %r54, 192
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i320
+%r59 = getelementptr i64, i64* %r4, i32 4
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i320
+%r62 = shl i320 %r61, 256
+%r63 = or i320 %r57, %r62
+%r64 = zext i320 %r63 to i384
+%r65 = sub i384 %r34, %r64
+%r66 = trunc i384 %r65 to i320
+%r67 = trunc i320 %r66 to i64
+%r69 = getelementptr i64, i64* %r2, i32 0
+store i64 %r67, i64* %r69
+%r70 = lshr i320 %r66, 64
+%r71 = trunc i320 %r70 to i64
+%r73 = getelementptr i64, i64* %r2, i32 1
+store i64 %r71, i64* %r73
+%r74 = lshr i320 %r70, 64
+%r75 = trunc i320 %r74 to i64
+%r77 = getelementptr i64, i64* %r2, i32 2
+store i64 %r75, i64* %r77
+%r78 = lshr i320 %r74, 64
+%r79 = trunc i320 %r78 to i64
+%r81 = getelementptr i64, i64* %r2, i32 3
+store i64 %r79, i64* %r81
+%r82 = lshr i320 %r78, 64
+%r83 = trunc i320 %r82 to i64
+%r85 = getelementptr i64, i64* %r2, i32 4
+store i64 %r83, i64* %r85
+%r86 = lshr i384 %r65, 320
+%r87 = trunc i384 %r86 to i64
+%r89 = and i64 %r87, 1
+ret i64 %r89
+}
+define void @mcl_fp_shr1_5L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = zext i64 %r3 to i128
+%r6 = getelementptr i64, i64* %r2, i32 1
+%r7 = load i64, i64* %r6
+%r8 = zext i64 %r7 to i128
+%r9 = shl i128 %r8, 64
+%r10 = or i128 %r4, %r9
+%r11 = zext i128 %r10 to i192
+%r13 = getelementptr i64, i64* %r2, i32 2
+%r14 = load i64, i64* %r13
+%r15 = zext i64 %r14 to i192
+%r16 = shl i192 %r15, 128
+%r17 = or i192 %r11, %r16
+%r18 = zext i192 %r17 to i256
+%r20 = getelementptr i64, i64* %r2, i32 3
+%r21 = load i64, i64* %r20
+%r22 = zext i64 %r21 to i256
+%r23 = shl i256 %r22, 192
+%r24 = or i256 %r18, %r23
+%r25 = zext i256 %r24 to i320
+%r27 = getelementptr i64, i64* %r2, i32 4
+%r28 = load i64, i64* %r27
+%r29 = zext i64 %r28 to i320
+%r30 = shl i320 %r29, 256
+%r31 = or i320 %r25, %r30
+%r32 = lshr i320 %r31, 1
+%r33 = trunc i320 %r32 to i64
+%r35 = getelementptr i64, i64* %r1, i32 0
+store i64 %r33, i64* %r35
+%r36 = lshr i320 %r32, 64
+%r37 = trunc i320 %r36 to i64
+%r39 = getelementptr i64, i64* %r1, i32 1
+store i64 %r37, i64* %r39
+%r40 = lshr i320 %r36, 64
+%r41 = trunc i320 %r40 to i64
+%r43 = getelementptr i64, i64* %r1, i32 2
+store i64 %r41, i64* %r43
+%r44 = lshr i320 %r40, 64
+%r45 = trunc i320 %r44 to i64
+%r47 = getelementptr i64, i64* %r1, i32 3
+store i64 %r45, i64* %r47
+%r48 = lshr i320 %r44, 64
+%r49 = trunc i320 %r48 to i64
+%r51 = getelementptr i64, i64* %r1, i32 4
+store i64 %r49, i64* %r51
+ret void
+}
+define void @mcl_fp_add5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = load i64, i64* %r3
+%r35 = zext i64 %r34 to i128
+%r37 = getelementptr i64, i64* %r3, i32 1
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i128
+%r40 = shl i128 %r39, 64
+%r41 = or i128 %r35, %r40
+%r42 = zext i128 %r41 to i192
+%r44 = getelementptr i64, i64* %r3, i32 2
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i192
+%r47 = shl i192 %r46, 128
+%r48 = or i192 %r42, %r47
+%r49 = zext i192 %r48 to i256
+%r51 = getelementptr i64, i64* %r3, i32 3
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i256
+%r54 = shl i256 %r53, 192
+%r55 = or i256 %r49, %r54
+%r56 = zext i256 %r55 to i320
+%r58 = getelementptr i64, i64* %r3, i32 4
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i320
+%r61 = shl i320 %r60, 256
+%r62 = or i320 %r56, %r61
+%r63 = zext i320 %r33 to i384
+%r64 = zext i320 %r62 to i384
+%r65 = add i384 %r63, %r64
+%r66 = trunc i384 %r65 to i320
+%r67 = trunc i320 %r66 to i64
+%r69 = getelementptr i64, i64* %r1, i32 0
+store i64 %r67, i64* %r69
+%r70 = lshr i320 %r66, 64
+%r71 = trunc i320 %r70 to i64
+%r73 = getelementptr i64, i64* %r1, i32 1
+store i64 %r71, i64* %r73
+%r74 = lshr i320 %r70, 64
+%r75 = trunc i320 %r74 to i64
+%r77 = getelementptr i64, i64* %r1, i32 2
+store i64 %r75, i64* %r77
+%r78 = lshr i320 %r74, 64
+%r79 = trunc i320 %r78 to i64
+%r81 = getelementptr i64, i64* %r1, i32 3
+store i64 %r79, i64* %r81
+%r82 = lshr i320 %r78, 64
+%r83 = trunc i320 %r82 to i64
+%r85 = getelementptr i64, i64* %r1, i32 4
+store i64 %r83, i64* %r85
+%r86 = load i64, i64* %r4
+%r87 = zext i64 %r86 to i128
+%r89 = getelementptr i64, i64* %r4, i32 1
+%r90 = load i64, i64* %r89
+%r91 = zext i64 %r90 to i128
+%r92 = shl i128 %r91, 64
+%r93 = or i128 %r87, %r92
+%r94 = zext i128 %r93 to i192
+%r96 = getelementptr i64, i64* %r4, i32 2
+%r97 = load i64, i64* %r96
+%r98 = zext i64 %r97 to i192
+%r99 = shl i192 %r98, 128
+%r100 = or i192 %r94, %r99
+%r101 = zext i192 %r100 to i256
+%r103 = getelementptr i64, i64* %r4, i32 3
+%r104 = load i64, i64* %r103
+%r105 = zext i64 %r104 to i256
+%r106 = shl i256 %r105, 192
+%r107 = or i256 %r101, %r106
+%r108 = zext i256 %r107 to i320
+%r110 = getelementptr i64, i64* %r4, i32 4
+%r111 = load i64, i64* %r110
+%r112 = zext i64 %r111 to i320
+%r113 = shl i320 %r112, 256
+%r114 = or i320 %r108, %r113
+%r115 = zext i320 %r114 to i384
+%r116 = sub i384 %r65, %r115
+%r117 = lshr i384 %r116, 320
+%r118 = trunc i384 %r117 to i1
+br i1%r118, label %carry, label %nocarry
+nocarry:
+%r119 = trunc i384 %r116 to i320
+%r120 = trunc i320 %r119 to i64
+%r122 = getelementptr i64, i64* %r1, i32 0
+store i64 %r120, i64* %r122
+%r123 = lshr i320 %r119, 64
+%r124 = trunc i320 %r123 to i64
+%r126 = getelementptr i64, i64* %r1, i32 1
+store i64 %r124, i64* %r126
+%r127 = lshr i320 %r123, 64
+%r128 = trunc i320 %r127 to i64
+%r130 = getelementptr i64, i64* %r1, i32 2
+store i64 %r128, i64* %r130
+%r131 = lshr i320 %r127, 64
+%r132 = trunc i320 %r131 to i64
+%r134 = getelementptr i64, i64* %r1, i32 3
+store i64 %r132, i64* %r134
+%r135 = lshr i320 %r131, 64
+%r136 = trunc i320 %r135 to i64
+%r138 = getelementptr i64, i64* %r1, i32 4
+store i64 %r136, i64* %r138
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = load i64, i64* %r3
+%r35 = zext i64 %r34 to i128
+%r37 = getelementptr i64, i64* %r3, i32 1
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i128
+%r40 = shl i128 %r39, 64
+%r41 = or i128 %r35, %r40
+%r42 = zext i128 %r41 to i192
+%r44 = getelementptr i64, i64* %r3, i32 2
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i192
+%r47 = shl i192 %r46, 128
+%r48 = or i192 %r42, %r47
+%r49 = zext i192 %r48 to i256
+%r51 = getelementptr i64, i64* %r3, i32 3
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i256
+%r54 = shl i256 %r53, 192
+%r55 = or i256 %r49, %r54
+%r56 = zext i256 %r55 to i320
+%r58 = getelementptr i64, i64* %r3, i32 4
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i320
+%r61 = shl i320 %r60, 256
+%r62 = or i320 %r56, %r61
+%r63 = add i320 %r33, %r62
+%r64 = load i64, i64* %r4
+%r65 = zext i64 %r64 to i128
+%r67 = getelementptr i64, i64* %r4, i32 1
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i128
+%r70 = shl i128 %r69, 64
+%r71 = or i128 %r65, %r70
+%r72 = zext i128 %r71 to i192
+%r74 = getelementptr i64, i64* %r4, i32 2
+%r75 = load i64, i64* %r74
+%r76 = zext i64 %r75 to i192
+%r77 = shl i192 %r76, 128
+%r78 = or i192 %r72, %r77
+%r79 = zext i192 %r78 to i256
+%r81 = getelementptr i64, i64* %r4, i32 3
+%r82 = load i64, i64* %r81
+%r83 = zext i64 %r82 to i256
+%r84 = shl i256 %r83, 192
+%r85 = or i256 %r79, %r84
+%r86 = zext i256 %r85 to i320
+%r88 = getelementptr i64, i64* %r4, i32 4
+%r89 = load i64, i64* %r88
+%r90 = zext i64 %r89 to i320
+%r91 = shl i320 %r90, 256
+%r92 = or i320 %r86, %r91
+%r93 = sub i320 %r63, %r92
+%r94 = lshr i320 %r93, 319
+%r95 = trunc i320 %r94 to i1
+%r96 = select i1 %r95, i320 %r63, i320 %r93
+%r97 = trunc i320 %r96 to i64
+%r99 = getelementptr i64, i64* %r1, i32 0
+store i64 %r97, i64* %r99
+%r100 = lshr i320 %r96, 64
+%r101 = trunc i320 %r100 to i64
+%r103 = getelementptr i64, i64* %r1, i32 1
+store i64 %r101, i64* %r103
+%r104 = lshr i320 %r100, 64
+%r105 = trunc i320 %r104 to i64
+%r107 = getelementptr i64, i64* %r1, i32 2
+store i64 %r105, i64* %r107
+%r108 = lshr i320 %r104, 64
+%r109 = trunc i320 %r108 to i64
+%r111 = getelementptr i64, i64* %r1, i32 3
+store i64 %r109, i64* %r111
+%r112 = lshr i320 %r108, 64
+%r113 = trunc i320 %r112 to i64
+%r115 = getelementptr i64, i64* %r1, i32 4
+store i64 %r113, i64* %r115
+ret void
+}
+define void @mcl_fp_sub5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = load i64, i64* %r3
+%r35 = zext i64 %r34 to i128
+%r37 = getelementptr i64, i64* %r3, i32 1
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i128
+%r40 = shl i128 %r39, 64
+%r41 = or i128 %r35, %r40
+%r42 = zext i128 %r41 to i192
+%r44 = getelementptr i64, i64* %r3, i32 2
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i192
+%r47 = shl i192 %r46, 128
+%r48 = or i192 %r42, %r47
+%r49 = zext i192 %r48 to i256
+%r51 = getelementptr i64, i64* %r3, i32 3
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i256
+%r54 = shl i256 %r53, 192
+%r55 = or i256 %r49, %r54
+%r56 = zext i256 %r55 to i320
+%r58 = getelementptr i64, i64* %r3, i32 4
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i320
+%r61 = shl i320 %r60, 256
+%r62 = or i320 %r56, %r61
+%r63 = zext i320 %r33 to i384
+%r64 = zext i320 %r62 to i384
+%r65 = sub i384 %r63, %r64
+%r66 = trunc i384 %r65 to i320
+%r67 = lshr i384 %r65, 320
+%r68 = trunc i384 %r67 to i1
+%r69 = trunc i320 %r66 to i64
+%r71 = getelementptr i64, i64* %r1, i32 0
+store i64 %r69, i64* %r71
+%r72 = lshr i320 %r66, 64
+%r73 = trunc i320 %r72 to i64
+%r75 = getelementptr i64, i64* %r1, i32 1
+store i64 %r73, i64* %r75
+%r76 = lshr i320 %r72, 64
+%r77 = trunc i320 %r76 to i64
+%r79 = getelementptr i64, i64* %r1, i32 2
+store i64 %r77, i64* %r79
+%r80 = lshr i320 %r76, 64
+%r81 = trunc i320 %r80 to i64
+%r83 = getelementptr i64, i64* %r1, i32 3
+store i64 %r81, i64* %r83
+%r84 = lshr i320 %r80, 64
+%r85 = trunc i320 %r84 to i64
+%r87 = getelementptr i64, i64* %r1, i32 4
+store i64 %r85, i64* %r87
+br i1%r68, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r88 = load i64, i64* %r4
+%r89 = zext i64 %r88 to i128
+%r91 = getelementptr i64, i64* %r4, i32 1
+%r92 = load i64, i64* %r91
+%r93 = zext i64 %r92 to i128
+%r94 = shl i128 %r93, 64
+%r95 = or i128 %r89, %r94
+%r96 = zext i128 %r95 to i192
+%r98 = getelementptr i64, i64* %r4, i32 2
+%r99 = load i64, i64* %r98
+%r100 = zext i64 %r99 to i192
+%r101 = shl i192 %r100, 128
+%r102 = or i192 %r96, %r101
+%r103 = zext i192 %r102 to i256
+%r105 = getelementptr i64, i64* %r4, i32 3
+%r106 = load i64, i64* %r105
+%r107 = zext i64 %r106 to i256
+%r108 = shl i256 %r107, 192
+%r109 = or i256 %r103, %r108
+%r110 = zext i256 %r109 to i320
+%r112 = getelementptr i64, i64* %r4, i32 4
+%r113 = load i64, i64* %r112
+%r114 = zext i64 %r113 to i320
+%r115 = shl i320 %r114, 256
+%r116 = or i320 %r110, %r115
+%r117 = add i320 %r66, %r116
+%r118 = trunc i320 %r117 to i64
+%r120 = getelementptr i64, i64* %r1, i32 0
+store i64 %r118, i64* %r120
+%r121 = lshr i320 %r117, 64
+%r122 = trunc i320 %r121 to i64
+%r124 = getelementptr i64, i64* %r1, i32 1
+store i64 %r122, i64* %r124
+%r125 = lshr i320 %r121, 64
+%r126 = trunc i320 %r125 to i64
+%r128 = getelementptr i64, i64* %r1, i32 2
+store i64 %r126, i64* %r128
+%r129 = lshr i320 %r125, 64
+%r130 = trunc i320 %r129 to i64
+%r132 = getelementptr i64, i64* %r1, i32 3
+store i64 %r130, i64* %r132
+%r133 = lshr i320 %r129, 64
+%r134 = trunc i320 %r133 to i64
+%r136 = getelementptr i64, i64* %r1, i32 4
+store i64 %r134, i64* %r136
+ret void
+}
+define void @mcl_fp_subNF5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = load i64, i64* %r3
+%r35 = zext i64 %r34 to i128
+%r37 = getelementptr i64, i64* %r3, i32 1
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i128
+%r40 = shl i128 %r39, 64
+%r41 = or i128 %r35, %r40
+%r42 = zext i128 %r41 to i192
+%r44 = getelementptr i64, i64* %r3, i32 2
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i192
+%r47 = shl i192 %r46, 128
+%r48 = or i192 %r42, %r47
+%r49 = zext i192 %r48 to i256
+%r51 = getelementptr i64, i64* %r3, i32 3
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i256
+%r54 = shl i256 %r53, 192
+%r55 = or i256 %r49, %r54
+%r56 = zext i256 %r55 to i320
+%r58 = getelementptr i64, i64* %r3, i32 4
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i320
+%r61 = shl i320 %r60, 256
+%r62 = or i320 %r56, %r61
+%r63 = sub i320 %r33, %r62
+%r64 = lshr i320 %r63, 319
+%r65 = trunc i320 %r64 to i1
+%r66 = load i64, i64* %r4
+%r67 = zext i64 %r66 to i128
+%r69 = getelementptr i64, i64* %r4, i32 1
+%r70 = load i64, i64* %r69
+%r71 = zext i64 %r70 to i128
+%r72 = shl i128 %r71, 64
+%r73 = or i128 %r67, %r72
+%r74 = zext i128 %r73 to i192
+%r76 = getelementptr i64, i64* %r4, i32 2
+%r77 = load i64, i64* %r76
+%r78 = zext i64 %r77 to i192
+%r79 = shl i192 %r78, 128
+%r80 = or i192 %r74, %r79
+%r81 = zext i192 %r80 to i256
+%r83 = getelementptr i64, i64* %r4, i32 3
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i256
+%r86 = shl i256 %r85, 192
+%r87 = or i256 %r81, %r86
+%r88 = zext i256 %r87 to i320
+%r90 = getelementptr i64, i64* %r4, i32 4
+%r91 = load i64, i64* %r90
+%r92 = zext i64 %r91 to i320
+%r93 = shl i320 %r92, 256
+%r94 = or i320 %r88, %r93
+%r96 = select i1 %r65, i320 %r94, i320 0
+%r97 = add i320 %r63, %r96
+%r98 = trunc i320 %r97 to i64
+%r100 = getelementptr i64, i64* %r1, i32 0
+store i64 %r98, i64* %r100
+%r101 = lshr i320 %r97, 64
+%r102 = trunc i320 %r101 to i64
+%r104 = getelementptr i64, i64* %r1, i32 1
+store i64 %r102, i64* %r104
+%r105 = lshr i320 %r101, 64
+%r106 = trunc i320 %r105 to i64
+%r108 = getelementptr i64, i64* %r1, i32 2
+store i64 %r106, i64* %r108
+%r109 = lshr i320 %r105, 64
+%r110 = trunc i320 %r109 to i64
+%r112 = getelementptr i64, i64* %r1, i32 3
+store i64 %r110, i64* %r112
+%r113 = lshr i320 %r109, 64
+%r114 = trunc i320 %r113 to i64
+%r116 = getelementptr i64, i64* %r1, i32 4
+store i64 %r114, i64* %r116
+ret void
+}
+define void @mcl_fpDbl_add5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r64 = getelementptr i64, i64* %r2, i32 9
+%r65 = load i64, i64* %r64
+%r66 = zext i64 %r65 to i640
+%r67 = shl i640 %r66, 576
+%r68 = or i640 %r62, %r67
+%r69 = load i64, i64* %r3
+%r70 = zext i64 %r69 to i128
+%r72 = getelementptr i64, i64* %r3, i32 1
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i128
+%r75 = shl i128 %r74, 64
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i192
+%r79 = getelementptr i64, i64* %r3, i32 2
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i192
+%r82 = shl i192 %r81, 128
+%r83 = or i192 %r77, %r82
+%r84 = zext i192 %r83 to i256
+%r86 = getelementptr i64, i64* %r3, i32 3
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i256
+%r89 = shl i256 %r88, 192
+%r90 = or i256 %r84, %r89
+%r91 = zext i256 %r90 to i320
+%r93 = getelementptr i64, i64* %r3, i32 4
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i320
+%r96 = shl i320 %r95, 256
+%r97 = or i320 %r91, %r96
+%r98 = zext i320 %r97 to i384
+%r100 = getelementptr i64, i64* %r3, i32 5
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i384
+%r103 = shl i384 %r102, 320
+%r104 = or i384 %r98, %r103
+%r105 = zext i384 %r104 to i448
+%r107 = getelementptr i64, i64* %r3, i32 6
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i448
+%r110 = shl i448 %r109, 384
+%r111 = or i448 %r105, %r110
+%r112 = zext i448 %r111 to i512
+%r114 = getelementptr i64, i64* %r3, i32 7
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i512
+%r117 = shl i512 %r116, 448
+%r118 = or i512 %r112, %r117
+%r119 = zext i512 %r118 to i576
+%r121 = getelementptr i64, i64* %r3, i32 8
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i576
+%r124 = shl i576 %r123, 512
+%r125 = or i576 %r119, %r124
+%r126 = zext i576 %r125 to i640
+%r128 = getelementptr i64, i64* %r3, i32 9
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i640
+%r131 = shl i640 %r130, 576
+%r132 = or i640 %r126, %r131
+%r133 = zext i640 %r68 to i704
+%r134 = zext i640 %r132 to i704
+%r135 = add i704 %r133, %r134
+%r136 = trunc i704 %r135 to i320
+%r137 = trunc i320 %r136 to i64
+%r139 = getelementptr i64, i64* %r1, i32 0
+store i64 %r137, i64* %r139
+%r140 = lshr i320 %r136, 64
+%r141 = trunc i320 %r140 to i64
+%r143 = getelementptr i64, i64* %r1, i32 1
+store i64 %r141, i64* %r143
+%r144 = lshr i320 %r140, 64
+%r145 = trunc i320 %r144 to i64
+%r147 = getelementptr i64, i64* %r1, i32 2
+store i64 %r145, i64* %r147
+%r148 = lshr i320 %r144, 64
+%r149 = trunc i320 %r148 to i64
+%r151 = getelementptr i64, i64* %r1, i32 3
+store i64 %r149, i64* %r151
+%r152 = lshr i320 %r148, 64
+%r153 = trunc i320 %r152 to i64
+%r155 = getelementptr i64, i64* %r1, i32 4
+store i64 %r153, i64* %r155
+%r156 = lshr i704 %r135, 320
+%r157 = trunc i704 %r156 to i384
+%r158 = load i64, i64* %r4
+%r159 = zext i64 %r158 to i128
+%r161 = getelementptr i64, i64* %r4, i32 1
+%r162 = load i64, i64* %r161
+%r163 = zext i64 %r162 to i128
+%r164 = shl i128 %r163, 64
+%r165 = or i128 %r159, %r164
+%r166 = zext i128 %r165 to i192
+%r168 = getelementptr i64, i64* %r4, i32 2
+%r169 = load i64, i64* %r168
+%r170 = zext i64 %r169 to i192
+%r171 = shl i192 %r170, 128
+%r172 = or i192 %r166, %r171
+%r173 = zext i192 %r172 to i256
+%r175 = getelementptr i64, i64* %r4, i32 3
+%r176 = load i64, i64* %r175
+%r177 = zext i64 %r176 to i256
+%r178 = shl i256 %r177, 192
+%r179 = or i256 %r173, %r178
+%r180 = zext i256 %r179 to i320
+%r182 = getelementptr i64, i64* %r4, i32 4
+%r183 = load i64, i64* %r182
+%r184 = zext i64 %r183 to i320
+%r185 = shl i320 %r184, 256
+%r186 = or i320 %r180, %r185
+%r187 = zext i320 %r186 to i384
+%r188 = sub i384 %r157, %r187
+%r189 = lshr i384 %r188, 320
+%r190 = trunc i384 %r189 to i1
+%r191 = select i1 %r190, i384 %r157, i384 %r188
+%r192 = trunc i384 %r191 to i320
+%r194 = getelementptr i64, i64* %r1, i32 5
+%r195 = trunc i320 %r192 to i64
+%r197 = getelementptr i64, i64* %r194, i32 0
+store i64 %r195, i64* %r197
+%r198 = lshr i320 %r192, 64
+%r199 = trunc i320 %r198 to i64
+%r201 = getelementptr i64, i64* %r194, i32 1
+store i64 %r199, i64* %r201
+%r202 = lshr i320 %r198, 64
+%r203 = trunc i320 %r202 to i64
+%r205 = getelementptr i64, i64* %r194, i32 2
+store i64 %r203, i64* %r205
+%r206 = lshr i320 %r202, 64
+%r207 = trunc i320 %r206 to i64
+%r209 = getelementptr i64, i64* %r194, i32 3
+store i64 %r207, i64* %r209
+%r210 = lshr i320 %r206, 64
+%r211 = trunc i320 %r210 to i64
+%r213 = getelementptr i64, i64* %r194, i32 4
+store i64 %r211, i64* %r213
+ret void
+}
+define void @mcl_fpDbl_sub5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r64 = getelementptr i64, i64* %r2, i32 9
+%r65 = load i64, i64* %r64
+%r66 = zext i64 %r65 to i640
+%r67 = shl i640 %r66, 576
+%r68 = or i640 %r62, %r67
+%r69 = load i64, i64* %r3
+%r70 = zext i64 %r69 to i128
+%r72 = getelementptr i64, i64* %r3, i32 1
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i128
+%r75 = shl i128 %r74, 64
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i192
+%r79 = getelementptr i64, i64* %r3, i32 2
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i192
+%r82 = shl i192 %r81, 128
+%r83 = or i192 %r77, %r82
+%r84 = zext i192 %r83 to i256
+%r86 = getelementptr i64, i64* %r3, i32 3
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i256
+%r89 = shl i256 %r88, 192
+%r90 = or i256 %r84, %r89
+%r91 = zext i256 %r90 to i320
+%r93 = getelementptr i64, i64* %r3, i32 4
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i320
+%r96 = shl i320 %r95, 256
+%r97 = or i320 %r91, %r96
+%r98 = zext i320 %r97 to i384
+%r100 = getelementptr i64, i64* %r3, i32 5
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i384
+%r103 = shl i384 %r102, 320
+%r104 = or i384 %r98, %r103
+%r105 = zext i384 %r104 to i448
+%r107 = getelementptr i64, i64* %r3, i32 6
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i448
+%r110 = shl i448 %r109, 384
+%r111 = or i448 %r105, %r110
+%r112 = zext i448 %r111 to i512
+%r114 = getelementptr i64, i64* %r3, i32 7
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i512
+%r117 = shl i512 %r116, 448
+%r118 = or i512 %r112, %r117
+%r119 = zext i512 %r118 to i576
+%r121 = getelementptr i64, i64* %r3, i32 8
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i576
+%r124 = shl i576 %r123, 512
+%r125 = or i576 %r119, %r124
+%r126 = zext i576 %r125 to i640
+%r128 = getelementptr i64, i64* %r3, i32 9
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i640
+%r131 = shl i640 %r130, 576
+%r132 = or i640 %r126, %r131
+%r133 = zext i640 %r68 to i704
+%r134 = zext i640 %r132 to i704
+%r135 = sub i704 %r133, %r134
+%r136 = trunc i704 %r135 to i320
+%r137 = trunc i320 %r136 to i64
+%r139 = getelementptr i64, i64* %r1, i32 0
+store i64 %r137, i64* %r139
+%r140 = lshr i320 %r136, 64
+%r141 = trunc i320 %r140 to i64
+%r143 = getelementptr i64, i64* %r1, i32 1
+store i64 %r141, i64* %r143
+%r144 = lshr i320 %r140, 64
+%r145 = trunc i320 %r144 to i64
+%r147 = getelementptr i64, i64* %r1, i32 2
+store i64 %r145, i64* %r147
+%r148 = lshr i320 %r144, 64
+%r149 = trunc i320 %r148 to i64
+%r151 = getelementptr i64, i64* %r1, i32 3
+store i64 %r149, i64* %r151
+%r152 = lshr i320 %r148, 64
+%r153 = trunc i320 %r152 to i64
+%r155 = getelementptr i64, i64* %r1, i32 4
+store i64 %r153, i64* %r155
+%r156 = lshr i704 %r135, 320
+%r157 = trunc i704 %r156 to i320
+%r158 = lshr i704 %r135, 640
+%r159 = trunc i704 %r158 to i1
+%r160 = load i64, i64* %r4
+%r161 = zext i64 %r160 to i128
+%r163 = getelementptr i64, i64* %r4, i32 1
+%r164 = load i64, i64* %r163
+%r165 = zext i64 %r164 to i128
+%r166 = shl i128 %r165, 64
+%r167 = or i128 %r161, %r166
+%r168 = zext i128 %r167 to i192
+%r170 = getelementptr i64, i64* %r4, i32 2
+%r171 = load i64, i64* %r170
+%r172 = zext i64 %r171 to i192
+%r173 = shl i192 %r172, 128
+%r174 = or i192 %r168, %r173
+%r175 = zext i192 %r174 to i256
+%r177 = getelementptr i64, i64* %r4, i32 3
+%r178 = load i64, i64* %r177
+%r179 = zext i64 %r178 to i256
+%r180 = shl i256 %r179, 192
+%r181 = or i256 %r175, %r180
+%r182 = zext i256 %r181 to i320
+%r184 = getelementptr i64, i64* %r4, i32 4
+%r185 = load i64, i64* %r184
+%r186 = zext i64 %r185 to i320
+%r187 = shl i320 %r186, 256
+%r188 = or i320 %r182, %r187
+%r190 = select i1 %r159, i320 %r188, i320 0
+%r191 = add i320 %r157, %r190
+%r193 = getelementptr i64, i64* %r1, i32 5
+%r194 = trunc i320 %r191 to i64
+%r196 = getelementptr i64, i64* %r193, i32 0
+store i64 %r194, i64* %r196
+%r197 = lshr i320 %r191, 64
+%r198 = trunc i320 %r197 to i64
+%r200 = getelementptr i64, i64* %r193, i32 1
+store i64 %r198, i64* %r200
+%r201 = lshr i320 %r197, 64
+%r202 = trunc i320 %r201 to i64
+%r204 = getelementptr i64, i64* %r193, i32 2
+store i64 %r202, i64* %r204
+%r205 = lshr i320 %r201, 64
+%r206 = trunc i320 %r205 to i64
+%r208 = getelementptr i64, i64* %r193, i32 3
+store i64 %r206, i64* %r208
+%r209 = lshr i320 %r205, 64
+%r210 = trunc i320 %r209 to i64
+%r212 = getelementptr i64, i64* %r193, i32 4
+store i64 %r210, i64* %r212
+ret void
+}
+define i448 @mulPv384x64(i64* noalias  %r2, i64 %r3)
+{
+%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
+%r6 = trunc i128 %r5 to i64
+%r7 = call i64 @extractHigh64(i128 %r5)
+%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
+%r10 = trunc i128 %r9 to i64
+%r11 = call i64 @extractHigh64(i128 %r9)
+%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
+%r14 = trunc i128 %r13 to i64
+%r15 = call i64 @extractHigh64(i128 %r13)
+%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
+%r18 = trunc i128 %r17 to i64
+%r19 = call i64 @extractHigh64(i128 %r17)
+%r21 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 4)
+%r22 = trunc i128 %r21 to i64
+%r23 = call i64 @extractHigh64(i128 %r21)
+%r25 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 5)
+%r26 = trunc i128 %r25 to i64
+%r27 = call i64 @extractHigh64(i128 %r25)
+%r28 = zext i64 %r6 to i128
+%r29 = zext i64 %r10 to i128
+%r30 = shl i128 %r29, 64
+%r31 = or i128 %r28, %r30
+%r32 = zext i128 %r31 to i192
+%r33 = zext i64 %r14 to i192
+%r34 = shl i192 %r33, 128
+%r35 = or i192 %r32, %r34
+%r36 = zext i192 %r35 to i256
+%r37 = zext i64 %r18 to i256
+%r38 = shl i256 %r37, 192
+%r39 = or i256 %r36, %r38
+%r40 = zext i256 %r39 to i320
+%r41 = zext i64 %r22 to i320
+%r42 = shl i320 %r41, 256
+%r43 = or i320 %r40, %r42
+%r44 = zext i320 %r43 to i384
+%r45 = zext i64 %r26 to i384
+%r46 = shl i384 %r45, 320
+%r47 = or i384 %r44, %r46
+%r48 = zext i64 %r7 to i128
+%r49 = zext i64 %r11 to i128
+%r50 = shl i128 %r49, 64
+%r51 = or i128 %r48, %r50
+%r52 = zext i128 %r51 to i192
+%r53 = zext i64 %r15 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r52, %r54
+%r56 = zext i192 %r55 to i256
+%r57 = zext i64 %r19 to i256
+%r58 = shl i256 %r57, 192
+%r59 = or i256 %r56, %r58
+%r60 = zext i256 %r59 to i320
+%r61 = zext i64 %r23 to i320
+%r62 = shl i320 %r61, 256
+%r63 = or i320 %r60, %r62
+%r64 = zext i320 %r63 to i384
+%r65 = zext i64 %r27 to i384
+%r66 = shl i384 %r65, 320
+%r67 = or i384 %r64, %r66
+%r68 = zext i384 %r47 to i448
+%r69 = zext i384 %r67 to i448
+%r70 = shl i448 %r69, 64
+%r71 = add i448 %r68, %r70
+ret i448 %r71
+}
+define void @mcl_fp_mulUnitPre6L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+{
+%r4 = call i448 @mulPv384x64(i64* %r2, i64 %r3)
+%r5 = trunc i448 %r4 to i64
+%r7 = getelementptr i64, i64* %r1, i32 0
+store i64 %r5, i64* %r7
+%r8 = lshr i448 %r4, 64
+%r9 = trunc i448 %r8 to i64
+%r11 = getelementptr i64, i64* %r1, i32 1
+store i64 %r9, i64* %r11
+%r12 = lshr i448 %r8, 64
+%r13 = trunc i448 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 2
+store i64 %r13, i64* %r15
+%r16 = lshr i448 %r12, 64
+%r17 = trunc i448 %r16 to i64
+%r19 = getelementptr i64, i64* %r1, i32 3
+store i64 %r17, i64* %r19
+%r20 = lshr i448 %r16, 64
+%r21 = trunc i448 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 4
+store i64 %r21, i64* %r23
+%r24 = lshr i448 %r20, 64
+%r25 = trunc i448 %r24 to i64
+%r27 = getelementptr i64, i64* %r1, i32 5
+store i64 %r25, i64* %r27
+%r28 = lshr i448 %r24, 64
+%r29 = trunc i448 %r28 to i64
+%r31 = getelementptr i64, i64* %r1, i32 6
+store i64 %r29, i64* %r31
+ret void
+}
+define void @mcl_fpDbl_mulPre6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r4 = load i64, i64* %r3
+%r5 = call i448 @mulPv384x64(i64* %r2, i64 %r4)
+%r6 = trunc i448 %r5 to i64
+store i64 %r6, i64* %r1
+%r7 = lshr i448 %r5, 64
+%r9 = getelementptr i64, i64* %r3, i32 1
+%r10 = load i64, i64* %r9
+%r11 = call i448 @mulPv384x64(i64* %r2, i64 %r10)
+%r12 = add i448 %r7, %r11
+%r13 = trunc i448 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 1
+store i64 %r13, i64* %r15
+%r16 = lshr i448 %r12, 64
+%r18 = getelementptr i64, i64* %r3, i32 2
+%r19 = load i64, i64* %r18
+%r20 = call i448 @mulPv384x64(i64* %r2, i64 %r19)
+%r21 = add i448 %r16, %r20
+%r22 = trunc i448 %r21 to i64
+%r24 = getelementptr i64, i64* %r1, i32 2
+store i64 %r22, i64* %r24
+%r25 = lshr i448 %r21, 64
+%r27 = getelementptr i64, i64* %r3, i32 3
+%r28 = load i64, i64* %r27
+%r29 = call i448 @mulPv384x64(i64* %r2, i64 %r28)
+%r30 = add i448 %r25, %r29
+%r31 = trunc i448 %r30 to i64
+%r33 = getelementptr i64, i64* %r1, i32 3
+store i64 %r31, i64* %r33
+%r34 = lshr i448 %r30, 64
+%r36 = getelementptr i64, i64* %r3, i32 4
+%r37 = load i64, i64* %r36
+%r38 = call i448 @mulPv384x64(i64* %r2, i64 %r37)
+%r39 = add i448 %r34, %r38
+%r40 = trunc i448 %r39 to i64
+%r42 = getelementptr i64, i64* %r1, i32 4
+store i64 %r40, i64* %r42
+%r43 = lshr i448 %r39, 64
+%r45 = getelementptr i64, i64* %r3, i32 5
+%r46 = load i64, i64* %r45
+%r47 = call i448 @mulPv384x64(i64* %r2, i64 %r46)
+%r48 = add i448 %r43, %r47
+%r50 = getelementptr i64, i64* %r1, i32 5
+%r51 = trunc i448 %r48 to i64
+%r53 = getelementptr i64, i64* %r50, i32 0
+store i64 %r51, i64* %r53
+%r54 = lshr i448 %r48, 64
+%r55 = trunc i448 %r54 to i64
+%r57 = getelementptr i64, i64* %r50, i32 1
+store i64 %r55, i64* %r57
+%r58 = lshr i448 %r54, 64
+%r59 = trunc i448 %r58 to i64
+%r61 = getelementptr i64, i64* %r50, i32 2
+store i64 %r59, i64* %r61
+%r62 = lshr i448 %r58, 64
+%r63 = trunc i448 %r62 to i64
+%r65 = getelementptr i64, i64* %r50, i32 3
+store i64 %r63, i64* %r65
+%r66 = lshr i448 %r62, 64
+%r67 = trunc i448 %r66 to i64
+%r69 = getelementptr i64, i64* %r50, i32 4
+store i64 %r67, i64* %r69
+%r70 = lshr i448 %r66, 64
+%r71 = trunc i448 %r70 to i64
+%r73 = getelementptr i64, i64* %r50, i32 5
+store i64 %r71, i64* %r73
+%r74 = lshr i448 %r70, 64
+%r75 = trunc i448 %r74 to i64
+%r77 = getelementptr i64, i64* %r50, i32 6
+store i64 %r75, i64* %r77
+ret void
+}
+define void @mcl_fpDbl_sqrPre6L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = call i448 @mulPv384x64(i64* %r2, i64 %r3)
+%r5 = trunc i448 %r4 to i64
+store i64 %r5, i64* %r1
+%r6 = lshr i448 %r4, 64
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = call i448 @mulPv384x64(i64* %r2, i64 %r9)
+%r11 = add i448 %r6, %r10
+%r12 = trunc i448 %r11 to i64
+%r14 = getelementptr i64, i64* %r1, i32 1
+store i64 %r12, i64* %r14
+%r15 = lshr i448 %r11, 64
+%r17 = getelementptr i64, i64* %r2, i32 2
+%r18 = load i64, i64* %r17
+%r19 = call i448 @mulPv384x64(i64* %r2, i64 %r18)
+%r20 = add i448 %r15, %r19
+%r21 = trunc i448 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 2
+store i64 %r21, i64* %r23
+%r24 = lshr i448 %r20, 64
+%r26 = getelementptr i64, i64* %r2, i32 3
+%r27 = load i64, i64* %r26
+%r28 = call i448 @mulPv384x64(i64* %r2, i64 %r27)
+%r29 = add i448 %r24, %r28
+%r30 = trunc i448 %r29 to i64
+%r32 = getelementptr i64, i64* %r1, i32 3
+store i64 %r30, i64* %r32
+%r33 = lshr i448 %r29, 64
+%r35 = getelementptr i64, i64* %r2, i32 4
+%r36 = load i64, i64* %r35
+%r37 = call i448 @mulPv384x64(i64* %r2, i64 %r36)
+%r38 = add i448 %r33, %r37
+%r39 = trunc i448 %r38 to i64
+%r41 = getelementptr i64, i64* %r1, i32 4
+store i64 %r39, i64* %r41
+%r42 = lshr i448 %r38, 64
+%r44 = getelementptr i64, i64* %r2, i32 5
+%r45 = load i64, i64* %r44
+%r46 = call i448 @mulPv384x64(i64* %r2, i64 %r45)
+%r47 = add i448 %r42, %r46
+%r49 = getelementptr i64, i64* %r1, i32 5
+%r50 = trunc i448 %r47 to i64
+%r52 = getelementptr i64, i64* %r49, i32 0
+store i64 %r50, i64* %r52
+%r53 = lshr i448 %r47, 64
+%r54 = trunc i448 %r53 to i64
+%r56 = getelementptr i64, i64* %r49, i32 1
+store i64 %r54, i64* %r56
+%r57 = lshr i448 %r53, 64
+%r58 = trunc i448 %r57 to i64
+%r60 = getelementptr i64, i64* %r49, i32 2
+store i64 %r58, i64* %r60
+%r61 = lshr i448 %r57, 64
+%r62 = trunc i448 %r61 to i64
+%r64 = getelementptr i64, i64* %r49, i32 3
+store i64 %r62, i64* %r64
+%r65 = lshr i448 %r61, 64
+%r66 = trunc i448 %r65 to i64
+%r68 = getelementptr i64, i64* %r49, i32 4
+store i64 %r66, i64* %r68
+%r69 = lshr i448 %r65, 64
+%r70 = trunc i448 %r69 to i64
+%r72 = getelementptr i64, i64* %r49, i32 5
+store i64 %r70, i64* %r72
+%r73 = lshr i448 %r69, 64
+%r74 = trunc i448 %r73 to i64
+%r76 = getelementptr i64, i64* %r49, i32 6
+store i64 %r74, i64* %r76
+ret void
+}
+define void @mcl_fp_mont6L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r9 = getelementptr i64, i64* %r3, i32 0
+%r10 = load i64, i64* %r9
+%r11 = call i448 @mulPv384x64(i64* %r2, i64 %r10)
+%r12 = zext i448 %r11 to i512
+%r13 = trunc i448 %r11 to i64
+%r14 = mul i64 %r13, %r7
+%r15 = call i448 @mulPv384x64(i64* %r4, i64 %r14)
+%r16 = zext i448 %r15 to i512
+%r17 = add i512 %r12, %r16
+%r18 = lshr i512 %r17, 64
+%r20 = getelementptr i64, i64* %r3, i32 1
+%r21 = load i64, i64* %r20
+%r22 = call i448 @mulPv384x64(i64* %r2, i64 %r21)
+%r23 = zext i448 %r22 to i512
+%r24 = add i512 %r18, %r23
+%r25 = trunc i512 %r24 to i64
+%r26 = mul i64 %r25, %r7
+%r27 = call i448 @mulPv384x64(i64* %r4, i64 %r26)
+%r28 = zext i448 %r27 to i512
+%r29 = add i512 %r24, %r28
+%r30 = lshr i512 %r29, 64
+%r32 = getelementptr i64, i64* %r3, i32 2
+%r33 = load i64, i64* %r32
+%r34 = call i448 @mulPv384x64(i64* %r2, i64 %r33)
+%r35 = zext i448 %r34 to i512
+%r36 = add i512 %r30, %r35
+%r37 = trunc i512 %r36 to i64
+%r38 = mul i64 %r37, %r7
+%r39 = call i448 @mulPv384x64(i64* %r4, i64 %r38)
+%r40 = zext i448 %r39 to i512
+%r41 = add i512 %r36, %r40
+%r42 = lshr i512 %r41, 64
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = call i448 @mulPv384x64(i64* %r2, i64 %r45)
+%r47 = zext i448 %r46 to i512
+%r48 = add i512 %r42, %r47
+%r49 = trunc i512 %r48 to i64
+%r50 = mul i64 %r49, %r7
+%r51 = call i448 @mulPv384x64(i64* %r4, i64 %r50)
+%r52 = zext i448 %r51 to i512
+%r53 = add i512 %r48, %r52
+%r54 = lshr i512 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 4
+%r57 = load i64, i64* %r56
+%r58 = call i448 @mulPv384x64(i64* %r2, i64 %r57)
+%r59 = zext i448 %r58 to i512
+%r60 = add i512 %r54, %r59
+%r61 = trunc i512 %r60 to i64
+%r62 = mul i64 %r61, %r7
+%r63 = call i448 @mulPv384x64(i64* %r4, i64 %r62)
+%r64 = zext i448 %r63 to i512
+%r65 = add i512 %r60, %r64
+%r66 = lshr i512 %r65, 64
+%r68 = getelementptr i64, i64* %r3, i32 5
+%r69 = load i64, i64* %r68
+%r70 = call i448 @mulPv384x64(i64* %r2, i64 %r69)
+%r71 = zext i448 %r70 to i512
+%r72 = add i512 %r66, %r71
+%r73 = trunc i512 %r72 to i64
+%r74 = mul i64 %r73, %r7
+%r75 = call i448 @mulPv384x64(i64* %r4, i64 %r74)
+%r76 = zext i448 %r75 to i512
+%r77 = add i512 %r72, %r76
+%r78 = lshr i512 %r77, 64
+%r79 = trunc i512 %r78 to i448
+%r80 = load i64, i64* %r4
+%r81 = zext i64 %r80 to i128
+%r83 = getelementptr i64, i64* %r4, i32 1
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i128
+%r86 = shl i128 %r85, 64
+%r87 = or i128 %r81, %r86
+%r88 = zext i128 %r87 to i192
+%r90 = getelementptr i64, i64* %r4, i32 2
+%r91 = load i64, i64* %r90
+%r92 = zext i64 %r91 to i192
+%r93 = shl i192 %r92, 128
+%r94 = or i192 %r88, %r93
+%r95 = zext i192 %r94 to i256
+%r97 = getelementptr i64, i64* %r4, i32 3
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i256
+%r100 = shl i256 %r99, 192
+%r101 = or i256 %r95, %r100
+%r102 = zext i256 %r101 to i320
+%r104 = getelementptr i64, i64* %r4, i32 4
+%r105 = load i64, i64* %r104
+%r106 = zext i64 %r105 to i320
+%r107 = shl i320 %r106, 256
+%r108 = or i320 %r102, %r107
+%r109 = zext i320 %r108 to i384
+%r111 = getelementptr i64, i64* %r4, i32 5
+%r112 = load i64, i64* %r111
+%r113 = zext i64 %r112 to i384
+%r114 = shl i384 %r113, 320
+%r115 = or i384 %r109, %r114
+%r116 = zext i384 %r115 to i448
+%r117 = sub i448 %r79, %r116
+%r118 = lshr i448 %r117, 384
+%r119 = trunc i448 %r118 to i1
+%r120 = select i1 %r119, i448 %r79, i448 %r117
+%r121 = trunc i448 %r120 to i384
+%r122 = trunc i384 %r121 to i64
+%r124 = getelementptr i64, i64* %r1, i32 0
+store i64 %r122, i64* %r124
+%r125 = lshr i384 %r121, 64
+%r126 = trunc i384 %r125 to i64
+%r128 = getelementptr i64, i64* %r1, i32 1
+store i64 %r126, i64* %r128
+%r129 = lshr i384 %r125, 64
+%r130 = trunc i384 %r129 to i64
+%r132 = getelementptr i64, i64* %r1, i32 2
+store i64 %r130, i64* %r132
+%r133 = lshr i384 %r129, 64
+%r134 = trunc i384 %r133 to i64
+%r136 = getelementptr i64, i64* %r1, i32 3
+store i64 %r134, i64* %r136
+%r137 = lshr i384 %r133, 64
+%r138 = trunc i384 %r137 to i64
+%r140 = getelementptr i64, i64* %r1, i32 4
+store i64 %r138, i64* %r140
+%r141 = lshr i384 %r137, 64
+%r142 = trunc i384 %r141 to i64
+%r144 = getelementptr i64, i64* %r1, i32 5
+store i64 %r142, i64* %r144
+ret void
+}
+define void @mcl_fp_montNF6L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r8 = load i64, i64* %r3
+%r9 = call i448 @mulPv384x64(i64* %r2, i64 %r8)
+%r10 = trunc i448 %r9 to i64
+%r11 = mul i64 %r10, %r7
+%r12 = call i448 @mulPv384x64(i64* %r4, i64 %r11)
+%r13 = add i448 %r9, %r12
+%r14 = lshr i448 %r13, 64
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = call i448 @mulPv384x64(i64* %r2, i64 %r17)
+%r19 = add i448 %r14, %r18
+%r20 = trunc i448 %r19 to i64
+%r21 = mul i64 %r20, %r7
+%r22 = call i448 @mulPv384x64(i64* %r4, i64 %r21)
+%r23 = add i448 %r19, %r22
+%r24 = lshr i448 %r23, 64
+%r26 = getelementptr i64, i64* %r3, i32 2
+%r27 = load i64, i64* %r26
+%r28 = call i448 @mulPv384x64(i64* %r2, i64 %r27)
+%r29 = add i448 %r24, %r28
+%r30 = trunc i448 %r29 to i64
+%r31 = mul i64 %r30, %r7
+%r32 = call i448 @mulPv384x64(i64* %r4, i64 %r31)
+%r33 = add i448 %r29, %r32
+%r34 = lshr i448 %r33, 64
+%r36 = getelementptr i64, i64* %r3, i32 3
+%r37 = load i64, i64* %r36
+%r38 = call i448 @mulPv384x64(i64* %r2, i64 %r37)
+%r39 = add i448 %r34, %r38
+%r40 = trunc i448 %r39 to i64
+%r41 = mul i64 %r40, %r7
+%r42 = call i448 @mulPv384x64(i64* %r4, i64 %r41)
+%r43 = add i448 %r39, %r42
+%r44 = lshr i448 %r43, 64
+%r46 = getelementptr i64, i64* %r3, i32 4
+%r47 = load i64, i64* %r46
+%r48 = call i448 @mulPv384x64(i64* %r2, i64 %r47)
+%r49 = add i448 %r44, %r48
+%r50 = trunc i448 %r49 to i64
+%r51 = mul i64 %r50, %r7
+%r52 = call i448 @mulPv384x64(i64* %r4, i64 %r51)
+%r53 = add i448 %r49, %r52
+%r54 = lshr i448 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 5
+%r57 = load i64, i64* %r56
+%r58 = call i448 @mulPv384x64(i64* %r2, i64 %r57)
+%r59 = add i448 %r54, %r58
+%r60 = trunc i448 %r59 to i64
+%r61 = mul i64 %r60, %r7
+%r62 = call i448 @mulPv384x64(i64* %r4, i64 %r61)
+%r63 = add i448 %r59, %r62
+%r64 = lshr i448 %r63, 64
+%r65 = trunc i448 %r64 to i384
+%r66 = load i64, i64* %r4
+%r67 = zext i64 %r66 to i128
+%r69 = getelementptr i64, i64* %r4, i32 1
+%r70 = load i64, i64* %r69
+%r71 = zext i64 %r70 to i128
+%r72 = shl i128 %r71, 64
+%r73 = or i128 %r67, %r72
+%r74 = zext i128 %r73 to i192
+%r76 = getelementptr i64, i64* %r4, i32 2
+%r77 = load i64, i64* %r76
+%r78 = zext i64 %r77 to i192
+%r79 = shl i192 %r78, 128
+%r80 = or i192 %r74, %r79
+%r81 = zext i192 %r80 to i256
+%r83 = getelementptr i64, i64* %r4, i32 3
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i256
+%r86 = shl i256 %r85, 192
+%r87 = or i256 %r81, %r86
+%r88 = zext i256 %r87 to i320
+%r90 = getelementptr i64, i64* %r4, i32 4
+%r91 = load i64, i64* %r90
+%r92 = zext i64 %r91 to i320
+%r93 = shl i320 %r92, 256
+%r94 = or i320 %r88, %r93
+%r95 = zext i320 %r94 to i384
+%r97 = getelementptr i64, i64* %r4, i32 5
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i384
+%r100 = shl i384 %r99, 320
+%r101 = or i384 %r95, %r100
+%r102 = sub i384 %r65, %r101
+%r103 = lshr i384 %r102, 383
+%r104 = trunc i384 %r103 to i1
+%r105 = select i1 %r104, i384 %r65, i384 %r102
+%r106 = trunc i384 %r105 to i64
+%r108 = getelementptr i64, i64* %r1, i32 0
+store i64 %r106, i64* %r108
+%r109 = lshr i384 %r105, 64
+%r110 = trunc i384 %r109 to i64
+%r112 = getelementptr i64, i64* %r1, i32 1
+store i64 %r110, i64* %r112
+%r113 = lshr i384 %r109, 64
+%r114 = trunc i384 %r113 to i64
+%r116 = getelementptr i64, i64* %r1, i32 2
+store i64 %r114, i64* %r116
+%r117 = lshr i384 %r113, 64
+%r118 = trunc i384 %r117 to i64
+%r120 = getelementptr i64, i64* %r1, i32 3
+store i64 %r118, i64* %r120
+%r121 = lshr i384 %r117, 64
+%r122 = trunc i384 %r121 to i64
+%r124 = getelementptr i64, i64* %r1, i32 4
+store i64 %r122, i64* %r124
+%r125 = lshr i384 %r121, 64
+%r126 = trunc i384 %r125 to i64
+%r128 = getelementptr i64, i64* %r1, i32 5
+store i64 %r126, i64* %r128
+ret void
+}
+define void @mcl_fp_montRed6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = zext i192 %r21 to i256
+%r24 = getelementptr i64, i64* %r3, i32 3
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i256
+%r27 = shl i256 %r26, 192
+%r28 = or i256 %r22, %r27
+%r29 = zext i256 %r28 to i320
+%r31 = getelementptr i64, i64* %r3, i32 4
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i320
+%r34 = shl i320 %r33, 256
+%r35 = or i320 %r29, %r34
+%r36 = zext i320 %r35 to i384
+%r38 = getelementptr i64, i64* %r3, i32 5
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i384
+%r41 = shl i384 %r40, 320
+%r42 = or i384 %r36, %r41
+%r43 = load i64, i64* %r2
+%r44 = zext i64 %r43 to i128
+%r46 = getelementptr i64, i64* %r2, i32 1
+%r47 = load i64, i64* %r46
+%r48 = zext i64 %r47 to i128
+%r49 = shl i128 %r48, 64
+%r50 = or i128 %r44, %r49
+%r51 = zext i128 %r50 to i192
+%r53 = getelementptr i64, i64* %r2, i32 2
+%r54 = load i64, i64* %r53
+%r55 = zext i64 %r54 to i192
+%r56 = shl i192 %r55, 128
+%r57 = or i192 %r51, %r56
+%r58 = zext i192 %r57 to i256
+%r60 = getelementptr i64, i64* %r2, i32 3
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i256
+%r63 = shl i256 %r62, 192
+%r64 = or i256 %r58, %r63
+%r65 = zext i256 %r64 to i320
+%r67 = getelementptr i64, i64* %r2, i32 4
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i320
+%r70 = shl i320 %r69, 256
+%r71 = or i320 %r65, %r70
+%r72 = zext i320 %r71 to i384
+%r74 = getelementptr i64, i64* %r2, i32 5
+%r75 = load i64, i64* %r74
+%r76 = zext i64 %r75 to i384
+%r77 = shl i384 %r76, 320
+%r78 = or i384 %r72, %r77
+%r79 = zext i384 %r78 to i448
+%r81 = getelementptr i64, i64* %r2, i32 6
+%r82 = load i64, i64* %r81
+%r83 = zext i64 %r82 to i448
+%r84 = shl i448 %r83, 384
+%r85 = or i448 %r79, %r84
+%r86 = zext i448 %r85 to i512
+%r88 = getelementptr i64, i64* %r2, i32 7
+%r89 = load i64, i64* %r88
+%r90 = zext i64 %r89 to i512
+%r91 = shl i512 %r90, 448
+%r92 = or i512 %r86, %r91
+%r93 = zext i512 %r92 to i576
+%r95 = getelementptr i64, i64* %r2, i32 8
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i576
+%r98 = shl i576 %r97, 512
+%r99 = or i576 %r93, %r98
+%r100 = zext i576 %r99 to i640
+%r102 = getelementptr i64, i64* %r2, i32 9
+%r103 = load i64, i64* %r102
+%r104 = zext i64 %r103 to i640
+%r105 = shl i640 %r104, 576
+%r106 = or i640 %r100, %r105
+%r107 = zext i640 %r106 to i704
+%r109 = getelementptr i64, i64* %r2, i32 10
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i704
+%r112 = shl i704 %r111, 640
+%r113 = or i704 %r107, %r112
+%r114 = zext i704 %r113 to i768
+%r116 = getelementptr i64, i64* %r2, i32 11
+%r117 = load i64, i64* %r116
+%r118 = zext i64 %r117 to i768
+%r119 = shl i768 %r118, 704
+%r120 = or i768 %r114, %r119
+%r121 = zext i768 %r120 to i832
+%r122 = trunc i832 %r121 to i64
+%r123 = mul i64 %r122, %r6
+%r124 = call i448 @mulPv384x64(i64* %r3, i64 %r123)
+%r125 = zext i448 %r124 to i832
+%r126 = add i832 %r121, %r125
+%r127 = lshr i832 %r126, 64
+%r128 = trunc i832 %r127 to i768
+%r129 = trunc i768 %r128 to i64
+%r130 = mul i64 %r129, %r6
+%r131 = call i448 @mulPv384x64(i64* %r3, i64 %r130)
+%r132 = zext i448 %r131 to i768
+%r133 = add i768 %r128, %r132
+%r134 = lshr i768 %r133, 64
+%r135 = trunc i768 %r134 to i704
+%r136 = trunc i704 %r135 to i64
+%r137 = mul i64 %r136, %r6
+%r138 = call i448 @mulPv384x64(i64* %r3, i64 %r137)
+%r139 = zext i448 %r138 to i704
+%r140 = add i704 %r135, %r139
+%r141 = lshr i704 %r140, 64
+%r142 = trunc i704 %r141 to i640
+%r143 = trunc i640 %r142 to i64
+%r144 = mul i64 %r143, %r6
+%r145 = call i448 @mulPv384x64(i64* %r3, i64 %r144)
+%r146 = zext i448 %r145 to i640
+%r147 = add i640 %r142, %r146
+%r148 = lshr i640 %r147, 64
+%r149 = trunc i640 %r148 to i576
+%r150 = trunc i576 %r149 to i64
+%r151 = mul i64 %r150, %r6
+%r152 = call i448 @mulPv384x64(i64* %r3, i64 %r151)
+%r153 = zext i448 %r152 to i576
+%r154 = add i576 %r149, %r153
+%r155 = lshr i576 %r154, 64
+%r156 = trunc i576 %r155 to i512
+%r157 = trunc i512 %r156 to i64
+%r158 = mul i64 %r157, %r6
+%r159 = call i448 @mulPv384x64(i64* %r3, i64 %r158)
+%r160 = zext i448 %r159 to i512
+%r161 = add i512 %r156, %r160
+%r162 = lshr i512 %r161, 64
+%r163 = trunc i512 %r162 to i448
+%r164 = zext i384 %r42 to i448
+%r165 = sub i448 %r163, %r164
+%r166 = lshr i448 %r165, 384
+%r167 = trunc i448 %r166 to i1
+%r168 = select i1 %r167, i448 %r163, i448 %r165
+%r169 = trunc i448 %r168 to i384
+%r170 = trunc i384 %r169 to i64
+%r172 = getelementptr i64, i64* %r1, i32 0
+store i64 %r170, i64* %r172
+%r173 = lshr i384 %r169, 64
+%r174 = trunc i384 %r173 to i64
+%r176 = getelementptr i64, i64* %r1, i32 1
+store i64 %r174, i64* %r176
+%r177 = lshr i384 %r173, 64
+%r178 = trunc i384 %r177 to i64
+%r180 = getelementptr i64, i64* %r1, i32 2
+store i64 %r178, i64* %r180
+%r181 = lshr i384 %r177, 64
+%r182 = trunc i384 %r181 to i64
+%r184 = getelementptr i64, i64* %r1, i32 3
+store i64 %r182, i64* %r184
+%r185 = lshr i384 %r181, 64
+%r186 = trunc i384 %r185 to i64
+%r188 = getelementptr i64, i64* %r1, i32 4
+store i64 %r186, i64* %r188
+%r189 = lshr i384 %r185, 64
+%r190 = trunc i384 %r189 to i64
+%r192 = getelementptr i64, i64* %r1, i32 5
+store i64 %r190, i64* %r192
+ret void
+}
+define i64 @mcl_fp_addPre6L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r3, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r42 = load i64, i64* %r4
+%r43 = zext i64 %r42 to i128
+%r45 = getelementptr i64, i64* %r4, i32 1
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i128
+%r48 = shl i128 %r47, 64
+%r49 = or i128 %r43, %r48
+%r50 = zext i128 %r49 to i192
+%r52 = getelementptr i64, i64* %r4, i32 2
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i192
+%r55 = shl i192 %r54, 128
+%r56 = or i192 %r50, %r55
+%r57 = zext i192 %r56 to i256
+%r59 = getelementptr i64, i64* %r4, i32 3
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i256
+%r62 = shl i256 %r61, 192
+%r63 = or i256 %r57, %r62
+%r64 = zext i256 %r63 to i320
+%r66 = getelementptr i64, i64* %r4, i32 4
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i320
+%r69 = shl i320 %r68, 256
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i384
+%r73 = getelementptr i64, i64* %r4, i32 5
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i384
+%r76 = shl i384 %r75, 320
+%r77 = or i384 %r71, %r76
+%r78 = zext i384 %r77 to i448
+%r79 = add i448 %r41, %r78
+%r80 = trunc i448 %r79 to i384
+%r81 = trunc i384 %r80 to i64
+%r83 = getelementptr i64, i64* %r2, i32 0
+store i64 %r81, i64* %r83
+%r84 = lshr i384 %r80, 64
+%r85 = trunc i384 %r84 to i64
+%r87 = getelementptr i64, i64* %r2, i32 1
+store i64 %r85, i64* %r87
+%r88 = lshr i384 %r84, 64
+%r89 = trunc i384 %r88 to i64
+%r91 = getelementptr i64, i64* %r2, i32 2
+store i64 %r89, i64* %r91
+%r92 = lshr i384 %r88, 64
+%r93 = trunc i384 %r92 to i64
+%r95 = getelementptr i64, i64* %r2, i32 3
+store i64 %r93, i64* %r95
+%r96 = lshr i384 %r92, 64
+%r97 = trunc i384 %r96 to i64
+%r99 = getelementptr i64, i64* %r2, i32 4
+store i64 %r97, i64* %r99
+%r100 = lshr i384 %r96, 64
+%r101 = trunc i384 %r100 to i64
+%r103 = getelementptr i64, i64* %r2, i32 5
+store i64 %r101, i64* %r103
+%r104 = lshr i448 %r79, 384
+%r105 = trunc i448 %r104 to i64
+ret i64 %r105
+}
+define i64 @mcl_fp_subPre6L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r3, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r42 = load i64, i64* %r4
+%r43 = zext i64 %r42 to i128
+%r45 = getelementptr i64, i64* %r4, i32 1
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i128
+%r48 = shl i128 %r47, 64
+%r49 = or i128 %r43, %r48
+%r50 = zext i128 %r49 to i192
+%r52 = getelementptr i64, i64* %r4, i32 2
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i192
+%r55 = shl i192 %r54, 128
+%r56 = or i192 %r50, %r55
+%r57 = zext i192 %r56 to i256
+%r59 = getelementptr i64, i64* %r4, i32 3
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i256
+%r62 = shl i256 %r61, 192
+%r63 = or i256 %r57, %r62
+%r64 = zext i256 %r63 to i320
+%r66 = getelementptr i64, i64* %r4, i32 4
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i320
+%r69 = shl i320 %r68, 256
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i384
+%r73 = getelementptr i64, i64* %r4, i32 5
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i384
+%r76 = shl i384 %r75, 320
+%r77 = or i384 %r71, %r76
+%r78 = zext i384 %r77 to i448
+%r79 = sub i448 %r41, %r78
+%r80 = trunc i448 %r79 to i384
+%r81 = trunc i384 %r80 to i64
+%r83 = getelementptr i64, i64* %r2, i32 0
+store i64 %r81, i64* %r83
+%r84 = lshr i384 %r80, 64
+%r85 = trunc i384 %r84 to i64
+%r87 = getelementptr i64, i64* %r2, i32 1
+store i64 %r85, i64* %r87
+%r88 = lshr i384 %r84, 64
+%r89 = trunc i384 %r88 to i64
+%r91 = getelementptr i64, i64* %r2, i32 2
+store i64 %r89, i64* %r91
+%r92 = lshr i384 %r88, 64
+%r93 = trunc i384 %r92 to i64
+%r95 = getelementptr i64, i64* %r2, i32 3
+store i64 %r93, i64* %r95
+%r96 = lshr i384 %r92, 64
+%r97 = trunc i384 %r96 to i64
+%r99 = getelementptr i64, i64* %r2, i32 4
+store i64 %r97, i64* %r99
+%r100 = lshr i384 %r96, 64
+%r101 = trunc i384 %r100 to i64
+%r103 = getelementptr i64, i64* %r2, i32 5
+store i64 %r101, i64* %r103
+%r104 = lshr i448 %r79, 384
+%r105 = trunc i448 %r104 to i64
+%r107 = and i64 %r105, 1
+ret i64 %r107
+}
+define void @mcl_fp_shr1_6L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = zext i64 %r3 to i128
+%r6 = getelementptr i64, i64* %r2, i32 1
+%r7 = load i64, i64* %r6
+%r8 = zext i64 %r7 to i128
+%r9 = shl i128 %r8, 64
+%r10 = or i128 %r4, %r9
+%r11 = zext i128 %r10 to i192
+%r13 = getelementptr i64, i64* %r2, i32 2
+%r14 = load i64, i64* %r13
+%r15 = zext i64 %r14 to i192
+%r16 = shl i192 %r15, 128
+%r17 = or i192 %r11, %r16
+%r18 = zext i192 %r17 to i256
+%r20 = getelementptr i64, i64* %r2, i32 3
+%r21 = load i64, i64* %r20
+%r22 = zext i64 %r21 to i256
+%r23 = shl i256 %r22, 192
+%r24 = or i256 %r18, %r23
+%r25 = zext i256 %r24 to i320
+%r27 = getelementptr i64, i64* %r2, i32 4
+%r28 = load i64, i64* %r27
+%r29 = zext i64 %r28 to i320
+%r30 = shl i320 %r29, 256
+%r31 = or i320 %r25, %r30
+%r32 = zext i320 %r31 to i384
+%r34 = getelementptr i64, i64* %r2, i32 5
+%r35 = load i64, i64* %r34
+%r36 = zext i64 %r35 to i384
+%r37 = shl i384 %r36, 320
+%r38 = or i384 %r32, %r37
+%r39 = lshr i384 %r38, 1
+%r40 = trunc i384 %r39 to i64
+%r42 = getelementptr i64, i64* %r1, i32 0
+store i64 %r40, i64* %r42
+%r43 = lshr i384 %r39, 64
+%r44 = trunc i384 %r43 to i64
+%r46 = getelementptr i64, i64* %r1, i32 1
+store i64 %r44, i64* %r46
+%r47 = lshr i384 %r43, 64
+%r48 = trunc i384 %r47 to i64
+%r50 = getelementptr i64, i64* %r1, i32 2
+store i64 %r48, i64* %r50
+%r51 = lshr i384 %r47, 64
+%r52 = trunc i384 %r51 to i64
+%r54 = getelementptr i64, i64* %r1, i32 3
+store i64 %r52, i64* %r54
+%r55 = lshr i384 %r51, 64
+%r56 = trunc i384 %r55 to i64
+%r58 = getelementptr i64, i64* %r1, i32 4
+store i64 %r56, i64* %r58
+%r59 = lshr i384 %r55, 64
+%r60 = trunc i384 %r59 to i64
+%r62 = getelementptr i64, i64* %r1, i32 5
+store i64 %r60, i64* %r62
+ret void
+}
+define void @mcl_fp_add6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = zext i384 %r40 to i448
+%r78 = zext i384 %r76 to i448
+%r79 = add i448 %r77, %r78
+%r80 = trunc i448 %r79 to i384
+%r81 = trunc i384 %r80 to i64
+%r83 = getelementptr i64, i64* %r1, i32 0
+store i64 %r81, i64* %r83
+%r84 = lshr i384 %r80, 64
+%r85 = trunc i384 %r84 to i64
+%r87 = getelementptr i64, i64* %r1, i32 1
+store i64 %r85, i64* %r87
+%r88 = lshr i384 %r84, 64
+%r89 = trunc i384 %r88 to i64
+%r91 = getelementptr i64, i64* %r1, i32 2
+store i64 %r89, i64* %r91
+%r92 = lshr i384 %r88, 64
+%r93 = trunc i384 %r92 to i64
+%r95 = getelementptr i64, i64* %r1, i32 3
+store i64 %r93, i64* %r95
+%r96 = lshr i384 %r92, 64
+%r97 = trunc i384 %r96 to i64
+%r99 = getelementptr i64, i64* %r1, i32 4
+store i64 %r97, i64* %r99
+%r100 = lshr i384 %r96, 64
+%r101 = trunc i384 %r100 to i64
+%r103 = getelementptr i64, i64* %r1, i32 5
+store i64 %r101, i64* %r103
+%r104 = load i64, i64* %r4
+%r105 = zext i64 %r104 to i128
+%r107 = getelementptr i64, i64* %r4, i32 1
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i128
+%r110 = shl i128 %r109, 64
+%r111 = or i128 %r105, %r110
+%r112 = zext i128 %r111 to i192
+%r114 = getelementptr i64, i64* %r4, i32 2
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i192
+%r117 = shl i192 %r116, 128
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i256
+%r121 = getelementptr i64, i64* %r4, i32 3
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i256
+%r124 = shl i256 %r123, 192
+%r125 = or i256 %r119, %r124
+%r126 = zext i256 %r125 to i320
+%r128 = getelementptr i64, i64* %r4, i32 4
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i320
+%r131 = shl i320 %r130, 256
+%r132 = or i320 %r126, %r131
+%r133 = zext i320 %r132 to i384
+%r135 = getelementptr i64, i64* %r4, i32 5
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i384
+%r138 = shl i384 %r137, 320
+%r139 = or i384 %r133, %r138
+%r140 = zext i384 %r139 to i448
+%r141 = sub i448 %r79, %r140
+%r142 = lshr i448 %r141, 384
+%r143 = trunc i448 %r142 to i1
+br i1%r143, label %carry, label %nocarry
+nocarry:
+%r144 = trunc i448 %r141 to i384
+%r145 = trunc i384 %r144 to i64
+%r147 = getelementptr i64, i64* %r1, i32 0
+store i64 %r145, i64* %r147
+%r148 = lshr i384 %r144, 64
+%r149 = trunc i384 %r148 to i64
+%r151 = getelementptr i64, i64* %r1, i32 1
+store i64 %r149, i64* %r151
+%r152 = lshr i384 %r148, 64
+%r153 = trunc i384 %r152 to i64
+%r155 = getelementptr i64, i64* %r1, i32 2
+store i64 %r153, i64* %r155
+%r156 = lshr i384 %r152, 64
+%r157 = trunc i384 %r156 to i64
+%r159 = getelementptr i64, i64* %r1, i32 3
+store i64 %r157, i64* %r159
+%r160 = lshr i384 %r156, 64
+%r161 = trunc i384 %r160 to i64
+%r163 = getelementptr i64, i64* %r1, i32 4
+store i64 %r161, i64* %r163
+%r164 = lshr i384 %r160, 64
+%r165 = trunc i384 %r164 to i64
+%r167 = getelementptr i64, i64* %r1, i32 5
+store i64 %r165, i64* %r167
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = add i384 %r40, %r76
+%r78 = load i64, i64* %r4
+%r79 = zext i64 %r78 to i128
+%r81 = getelementptr i64, i64* %r4, i32 1
+%r82 = load i64, i64* %r81
+%r83 = zext i64 %r82 to i128
+%r84 = shl i128 %r83, 64
+%r85 = or i128 %r79, %r84
+%r86 = zext i128 %r85 to i192
+%r88 = getelementptr i64, i64* %r4, i32 2
+%r89 = load i64, i64* %r88
+%r90 = zext i64 %r89 to i192
+%r91 = shl i192 %r90, 128
+%r92 = or i192 %r86, %r91
+%r93 = zext i192 %r92 to i256
+%r95 = getelementptr i64, i64* %r4, i32 3
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i256
+%r98 = shl i256 %r97, 192
+%r99 = or i256 %r93, %r98
+%r100 = zext i256 %r99 to i320
+%r102 = getelementptr i64, i64* %r4, i32 4
+%r103 = load i64, i64* %r102
+%r104 = zext i64 %r103 to i320
+%r105 = shl i320 %r104, 256
+%r106 = or i320 %r100, %r105
+%r107 = zext i320 %r106 to i384
+%r109 = getelementptr i64, i64* %r4, i32 5
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i384
+%r112 = shl i384 %r111, 320
+%r113 = or i384 %r107, %r112
+%r114 = sub i384 %r77, %r113
+%r115 = lshr i384 %r114, 383
+%r116 = trunc i384 %r115 to i1
+%r117 = select i1 %r116, i384 %r77, i384 %r114
+%r118 = trunc i384 %r117 to i64
+%r120 = getelementptr i64, i64* %r1, i32 0
+store i64 %r118, i64* %r120
+%r121 = lshr i384 %r117, 64
+%r122 = trunc i384 %r121 to i64
+%r124 = getelementptr i64, i64* %r1, i32 1
+store i64 %r122, i64* %r124
+%r125 = lshr i384 %r121, 64
+%r126 = trunc i384 %r125 to i64
+%r128 = getelementptr i64, i64* %r1, i32 2
+store i64 %r126, i64* %r128
+%r129 = lshr i384 %r125, 64
+%r130 = trunc i384 %r129 to i64
+%r132 = getelementptr i64, i64* %r1, i32 3
+store i64 %r130, i64* %r132
+%r133 = lshr i384 %r129, 64
+%r134 = trunc i384 %r133 to i64
+%r136 = getelementptr i64, i64* %r1, i32 4
+store i64 %r134, i64* %r136
+%r137 = lshr i384 %r133, 64
+%r138 = trunc i384 %r137 to i64
+%r140 = getelementptr i64, i64* %r1, i32 5
+store i64 %r138, i64* %r140
+ret void
+}
+define void @mcl_fp_sub6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = zext i384 %r40 to i448
+%r78 = zext i384 %r76 to i448
+%r79 = sub i448 %r77, %r78
+%r80 = trunc i448 %r79 to i384
+%r81 = lshr i448 %r79, 384
+%r82 = trunc i448 %r81 to i1
+%r83 = trunc i384 %r80 to i64
+%r85 = getelementptr i64, i64* %r1, i32 0
+store i64 %r83, i64* %r85
+%r86 = lshr i384 %r80, 64
+%r87 = trunc i384 %r86 to i64
+%r89 = getelementptr i64, i64* %r1, i32 1
+store i64 %r87, i64* %r89
+%r90 = lshr i384 %r86, 64
+%r91 = trunc i384 %r90 to i64
+%r93 = getelementptr i64, i64* %r1, i32 2
+store i64 %r91, i64* %r93
+%r94 = lshr i384 %r90, 64
+%r95 = trunc i384 %r94 to i64
+%r97 = getelementptr i64, i64* %r1, i32 3
+store i64 %r95, i64* %r97
+%r98 = lshr i384 %r94, 64
+%r99 = trunc i384 %r98 to i64
+%r101 = getelementptr i64, i64* %r1, i32 4
+store i64 %r99, i64* %r101
+%r102 = lshr i384 %r98, 64
+%r103 = trunc i384 %r102 to i64
+%r105 = getelementptr i64, i64* %r1, i32 5
+store i64 %r103, i64* %r105
+br i1%r82, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r106 = load i64, i64* %r4
+%r107 = zext i64 %r106 to i128
+%r109 = getelementptr i64, i64* %r4, i32 1
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i128
+%r112 = shl i128 %r111, 64
+%r113 = or i128 %r107, %r112
+%r114 = zext i128 %r113 to i192
+%r116 = getelementptr i64, i64* %r4, i32 2
+%r117 = load i64, i64* %r116
+%r118 = zext i64 %r117 to i192
+%r119 = shl i192 %r118, 128
+%r120 = or i192 %r114, %r119
+%r121 = zext i192 %r120 to i256
+%r123 = getelementptr i64, i64* %r4, i32 3
+%r124 = load i64, i64* %r123
+%r125 = zext i64 %r124 to i256
+%r126 = shl i256 %r125, 192
+%r127 = or i256 %r121, %r126
+%r128 = zext i256 %r127 to i320
+%r130 = getelementptr i64, i64* %r4, i32 4
+%r131 = load i64, i64* %r130
+%r132 = zext i64 %r131 to i320
+%r133 = shl i320 %r132, 256
+%r134 = or i320 %r128, %r133
+%r135 = zext i320 %r134 to i384
+%r137 = getelementptr i64, i64* %r4, i32 5
+%r138 = load i64, i64* %r137
+%r139 = zext i64 %r138 to i384
+%r140 = shl i384 %r139, 320
+%r141 = or i384 %r135, %r140
+%r142 = add i384 %r80, %r141
+%r143 = trunc i384 %r142 to i64
+%r145 = getelementptr i64, i64* %r1, i32 0
+store i64 %r143, i64* %r145
+%r146 = lshr i384 %r142, 64
+%r147 = trunc i384 %r146 to i64
+%r149 = getelementptr i64, i64* %r1, i32 1
+store i64 %r147, i64* %r149
+%r150 = lshr i384 %r146, 64
+%r151 = trunc i384 %r150 to i64
+%r153 = getelementptr i64, i64* %r1, i32 2
+store i64 %r151, i64* %r153
+%r154 = lshr i384 %r150, 64
+%r155 = trunc i384 %r154 to i64
+%r157 = getelementptr i64, i64* %r1, i32 3
+store i64 %r155, i64* %r157
+%r158 = lshr i384 %r154, 64
+%r159 = trunc i384 %r158 to i64
+%r161 = getelementptr i64, i64* %r1, i32 4
+store i64 %r159, i64* %r161
+%r162 = lshr i384 %r158, 64
+%r163 = trunc i384 %r162 to i64
+%r165 = getelementptr i64, i64* %r1, i32 5
+store i64 %r163, i64* %r165
+ret void
+}
+define void @mcl_fp_subNF6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = sub i384 %r40, %r76
+%r78 = lshr i384 %r77, 383
+%r79 = trunc i384 %r78 to i1
+%r80 = load i64, i64* %r4
+%r81 = zext i64 %r80 to i128
+%r83 = getelementptr i64, i64* %r4, i32 1
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i128
+%r86 = shl i128 %r85, 64
+%r87 = or i128 %r81, %r86
+%r88 = zext i128 %r87 to i192
+%r90 = getelementptr i64, i64* %r4, i32 2
+%r91 = load i64, i64* %r90
+%r92 = zext i64 %r91 to i192
+%r93 = shl i192 %r92, 128
+%r94 = or i192 %r88, %r93
+%r95 = zext i192 %r94 to i256
+%r97 = getelementptr i64, i64* %r4, i32 3
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i256
+%r100 = shl i256 %r99, 192
+%r101 = or i256 %r95, %r100
+%r102 = zext i256 %r101 to i320
+%r104 = getelementptr i64, i64* %r4, i32 4
+%r105 = load i64, i64* %r104
+%r106 = zext i64 %r105 to i320
+%r107 = shl i320 %r106, 256
+%r108 = or i320 %r102, %r107
+%r109 = zext i320 %r108 to i384
+%r111 = getelementptr i64, i64* %r4, i32 5
+%r112 = load i64, i64* %r111
+%r113 = zext i64 %r112 to i384
+%r114 = shl i384 %r113, 320
+%r115 = or i384 %r109, %r114
+%r117 = select i1 %r79, i384 %r115, i384 0
+%r118 = add i384 %r77, %r117
+%r119 = trunc i384 %r118 to i64
+%r121 = getelementptr i64, i64* %r1, i32 0
+store i64 %r119, i64* %r121
+%r122 = lshr i384 %r118, 64
+%r123 = trunc i384 %r122 to i64
+%r125 = getelementptr i64, i64* %r1, i32 1
+store i64 %r123, i64* %r125
+%r126 = lshr i384 %r122, 64
+%r127 = trunc i384 %r126 to i64
+%r129 = getelementptr i64, i64* %r1, i32 2
+store i64 %r127, i64* %r129
+%r130 = lshr i384 %r126, 64
+%r131 = trunc i384 %r130 to i64
+%r133 = getelementptr i64, i64* %r1, i32 3
+store i64 %r131, i64* %r133
+%r134 = lshr i384 %r130, 64
+%r135 = trunc i384 %r134 to i64
+%r137 = getelementptr i64, i64* %r1, i32 4
+store i64 %r135, i64* %r137
+%r138 = lshr i384 %r134, 64
+%r139 = trunc i384 %r138 to i64
+%r141 = getelementptr i64, i64* %r1, i32 5
+store i64 %r139, i64* %r141
+ret void
+}
+define void @mcl_fpDbl_add6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r64 = getelementptr i64, i64* %r2, i32 9
+%r65 = load i64, i64* %r64
+%r66 = zext i64 %r65 to i640
+%r67 = shl i640 %r66, 576
+%r68 = or i640 %r62, %r67
+%r69 = zext i640 %r68 to i704
+%r71 = getelementptr i64, i64* %r2, i32 10
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i704
+%r74 = shl i704 %r73, 640
+%r75 = or i704 %r69, %r74
+%r76 = zext i704 %r75 to i768
+%r78 = getelementptr i64, i64* %r2, i32 11
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i768
+%r81 = shl i768 %r80, 704
+%r82 = or i768 %r76, %r81
+%r83 = load i64, i64* %r3
+%r84 = zext i64 %r83 to i128
+%r86 = getelementptr i64, i64* %r3, i32 1
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i128
+%r89 = shl i128 %r88, 64
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i192
+%r93 = getelementptr i64, i64* %r3, i32 2
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i192
+%r96 = shl i192 %r95, 128
+%r97 = or i192 %r91, %r96
+%r98 = zext i192 %r97 to i256
+%r100 = getelementptr i64, i64* %r3, i32 3
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i256
+%r103 = shl i256 %r102, 192
+%r104 = or i256 %r98, %r103
+%r105 = zext i256 %r104 to i320
+%r107 = getelementptr i64, i64* %r3, i32 4
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i320
+%r110 = shl i320 %r109, 256
+%r111 = or i320 %r105, %r110
+%r112 = zext i320 %r111 to i384
+%r114 = getelementptr i64, i64* %r3, i32 5
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i384
+%r117 = shl i384 %r116, 320
+%r118 = or i384 %r112, %r117
+%r119 = zext i384 %r118 to i448
+%r121 = getelementptr i64, i64* %r3, i32 6
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i448
+%r124 = shl i448 %r123, 384
+%r125 = or i448 %r119, %r124
+%r126 = zext i448 %r125 to i512
+%r128 = getelementptr i64, i64* %r3, i32 7
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i512
+%r131 = shl i512 %r130, 448
+%r132 = or i512 %r126, %r131
+%r133 = zext i512 %r132 to i576
+%r135 = getelementptr i64, i64* %r3, i32 8
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i576
+%r138 = shl i576 %r137, 512
+%r139 = or i576 %r133, %r138
+%r140 = zext i576 %r139 to i640
+%r142 = getelementptr i64, i64* %r3, i32 9
+%r143 = load i64, i64* %r142
+%r144 = zext i64 %r143 to i640
+%r145 = shl i640 %r144, 576
+%r146 = or i640 %r140, %r145
+%r147 = zext i640 %r146 to i704
+%r149 = getelementptr i64, i64* %r3, i32 10
+%r150 = load i64, i64* %r149
+%r151 = zext i64 %r150 to i704
+%r152 = shl i704 %r151, 640
+%r153 = or i704 %r147, %r152
+%r154 = zext i704 %r153 to i768
+%r156 = getelementptr i64, i64* %r3, i32 11
+%r157 = load i64, i64* %r156
+%r158 = zext i64 %r157 to i768
+%r159 = shl i768 %r158, 704
+%r160 = or i768 %r154, %r159
+%r161 = zext i768 %r82 to i832
+%r162 = zext i768 %r160 to i832
+%r163 = add i832 %r161, %r162
+%r164 = trunc i832 %r163 to i384
+%r165 = trunc i384 %r164 to i64
+%r167 = getelementptr i64, i64* %r1, i32 0
+store i64 %r165, i64* %r167
+%r168 = lshr i384 %r164, 64
+%r169 = trunc i384 %r168 to i64
+%r171 = getelementptr i64, i64* %r1, i32 1
+store i64 %r169, i64* %r171
+%r172 = lshr i384 %r168, 64
+%r173 = trunc i384 %r172 to i64
+%r175 = getelementptr i64, i64* %r1, i32 2
+store i64 %r173, i64* %r175
+%r176 = lshr i384 %r172, 64
+%r177 = trunc i384 %r176 to i64
+%r179 = getelementptr i64, i64* %r1, i32 3
+store i64 %r177, i64* %r179
+%r180 = lshr i384 %r176, 64
+%r181 = trunc i384 %r180 to i64
+%r183 = getelementptr i64, i64* %r1, i32 4
+store i64 %r181, i64* %r183
+%r184 = lshr i384 %r180, 64
+%r185 = trunc i384 %r184 to i64
+%r187 = getelementptr i64, i64* %r1, i32 5
+store i64 %r185, i64* %r187
+%r188 = lshr i832 %r163, 384
+%r189 = trunc i832 %r188 to i448
+%r190 = load i64, i64* %r4
+%r191 = zext i64 %r190 to i128
+%r193 = getelementptr i64, i64* %r4, i32 1
+%r194 = load i64, i64* %r193
+%r195 = zext i64 %r194 to i128
+%r196 = shl i128 %r195, 64
+%r197 = or i128 %r191, %r196
+%r198 = zext i128 %r197 to i192
+%r200 = getelementptr i64, i64* %r4, i32 2
+%r201 = load i64, i64* %r200
+%r202 = zext i64 %r201 to i192
+%r203 = shl i192 %r202, 128
+%r204 = or i192 %r198, %r203
+%r205 = zext i192 %r204 to i256
+%r207 = getelementptr i64, i64* %r4, i32 3
+%r208 = load i64, i64* %r207
+%r209 = zext i64 %r208 to i256
+%r210 = shl i256 %r209, 192
+%r211 = or i256 %r205, %r210
+%r212 = zext i256 %r211 to i320
+%r214 = getelementptr i64, i64* %r4, i32 4
+%r215 = load i64, i64* %r214
+%r216 = zext i64 %r215 to i320
+%r217 = shl i320 %r216, 256
+%r218 = or i320 %r212, %r217
+%r219 = zext i320 %r218 to i384
+%r221 = getelementptr i64, i64* %r4, i32 5
+%r222 = load i64, i64* %r221
+%r223 = zext i64 %r222 to i384
+%r224 = shl i384 %r223, 320
+%r225 = or i384 %r219, %r224
+%r226 = zext i384 %r225 to i448
+%r227 = sub i448 %r189, %r226
+%r228 = lshr i448 %r227, 384
+%r229 = trunc i448 %r228 to i1
+%r230 = select i1 %r229, i448 %r189, i448 %r227
+%r231 = trunc i448 %r230 to i384
+%r233 = getelementptr i64, i64* %r1, i32 6
+%r234 = trunc i384 %r231 to i64
+%r236 = getelementptr i64, i64* %r233, i32 0
+store i64 %r234, i64* %r236
+%r237 = lshr i384 %r231, 64
+%r238 = trunc i384 %r237 to i64
+%r240 = getelementptr i64, i64* %r233, i32 1
+store i64 %r238, i64* %r240
+%r241 = lshr i384 %r237, 64
+%r242 = trunc i384 %r241 to i64
+%r244 = getelementptr i64, i64* %r233, i32 2
+store i64 %r242, i64* %r244
+%r245 = lshr i384 %r241, 64
+%r246 = trunc i384 %r245 to i64
+%r248 = getelementptr i64, i64* %r233, i32 3
+store i64 %r246, i64* %r248
+%r249 = lshr i384 %r245, 64
+%r250 = trunc i384 %r249 to i64
+%r252 = getelementptr i64, i64* %r233, i32 4
+store i64 %r250, i64* %r252
+%r253 = lshr i384 %r249, 64
+%r254 = trunc i384 %r253 to i64
+%r256 = getelementptr i64, i64* %r233, i32 5
+store i64 %r254, i64* %r256
+ret void
+}
+define void @mcl_fpDbl_sub6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r64 = getelementptr i64, i64* %r2, i32 9
+%r65 = load i64, i64* %r64
+%r66 = zext i64 %r65 to i640
+%r67 = shl i640 %r66, 576
+%r68 = or i640 %r62, %r67
+%r69 = zext i640 %r68 to i704
+%r71 = getelementptr i64, i64* %r2, i32 10
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i704
+%r74 = shl i704 %r73, 640
+%r75 = or i704 %r69, %r74
+%r76 = zext i704 %r75 to i768
+%r78 = getelementptr i64, i64* %r2, i32 11
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i768
+%r81 = shl i768 %r80, 704
+%r82 = or i768 %r76, %r81
+%r83 = load i64, i64* %r3
+%r84 = zext i64 %r83 to i128
+%r86 = getelementptr i64, i64* %r3, i32 1
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i128
+%r89 = shl i128 %r88, 64
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i192
+%r93 = getelementptr i64, i64* %r3, i32 2
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i192
+%r96 = shl i192 %r95, 128
+%r97 = or i192 %r91, %r96
+%r98 = zext i192 %r97 to i256
+%r100 = getelementptr i64, i64* %r3, i32 3
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i256
+%r103 = shl i256 %r102, 192
+%r104 = or i256 %r98, %r103
+%r105 = zext i256 %r104 to i320
+%r107 = getelementptr i64, i64* %r3, i32 4
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i320
+%r110 = shl i320 %r109, 256
+%r111 = or i320 %r105, %r110
+%r112 = zext i320 %r111 to i384
+%r114 = getelementptr i64, i64* %r3, i32 5
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i384
+%r117 = shl i384 %r116, 320
+%r118 = or i384 %r112, %r117
+%r119 = zext i384 %r118 to i448
+%r121 = getelementptr i64, i64* %r3, i32 6
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i448
+%r124 = shl i448 %r123, 384
+%r125 = or i448 %r119, %r124
+%r126 = zext i448 %r125 to i512
+%r128 = getelementptr i64, i64* %r3, i32 7
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i512
+%r131 = shl i512 %r130, 448
+%r132 = or i512 %r126, %r131
+%r133 = zext i512 %r132 to i576
+%r135 = getelementptr i64, i64* %r3, i32 8
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i576
+%r138 = shl i576 %r137, 512
+%r139 = or i576 %r133, %r138
+%r140 = zext i576 %r139 to i640
+%r142 = getelementptr i64, i64* %r3, i32 9
+%r143 = load i64, i64* %r142
+%r144 = zext i64 %r143 to i640
+%r145 = shl i640 %r144, 576
+%r146 = or i640 %r140, %r145
+%r147 = zext i640 %r146 to i704
+%r149 = getelementptr i64, i64* %r3, i32 10
+%r150 = load i64, i64* %r149
+%r151 = zext i64 %r150 to i704
+%r152 = shl i704 %r151, 640
+%r153 = or i704 %r147, %r152
+%r154 = zext i704 %r153 to i768
+%r156 = getelementptr i64, i64* %r3, i32 11
+%r157 = load i64, i64* %r156
+%r158 = zext i64 %r157 to i768
+%r159 = shl i768 %r158, 704
+%r160 = or i768 %r154, %r159
+%r161 = zext i768 %r82 to i832
+%r162 = zext i768 %r160 to i832
+%r163 = sub i832 %r161, %r162
+%r164 = trunc i832 %r163 to i384
+%r165 = trunc i384 %r164 to i64
+%r167 = getelementptr i64, i64* %r1, i32 0
+store i64 %r165, i64* %r167
+%r168 = lshr i384 %r164, 64
+%r169 = trunc i384 %r168 to i64
+%r171 = getelementptr i64, i64* %r1, i32 1
+store i64 %r169, i64* %r171
+%r172 = lshr i384 %r168, 64
+%r173 = trunc i384 %r172 to i64
+%r175 = getelementptr i64, i64* %r1, i32 2
+store i64 %r173, i64* %r175
+%r176 = lshr i384 %r172, 64
+%r177 = trunc i384 %r176 to i64
+%r179 = getelementptr i64, i64* %r1, i32 3
+store i64 %r177, i64* %r179
+%r180 = lshr i384 %r176, 64
+%r181 = trunc i384 %r180 to i64
+%r183 = getelementptr i64, i64* %r1, i32 4
+store i64 %r181, i64* %r183
+%r184 = lshr i384 %r180, 64
+%r185 = trunc i384 %r184 to i64
+%r187 = getelementptr i64, i64* %r1, i32 5
+store i64 %r185, i64* %r187
+%r188 = lshr i832 %r163, 384
+%r189 = trunc i832 %r188 to i384
+%r190 = lshr i832 %r163, 768
+%r191 = trunc i832 %r190 to i1
+%r192 = load i64, i64* %r4
+%r193 = zext i64 %r192 to i128
+%r195 = getelementptr i64, i64* %r4, i32 1
+%r196 = load i64, i64* %r195
+%r197 = zext i64 %r196 to i128
+%r198 = shl i128 %r197, 64
+%r199 = or i128 %r193, %r198
+%r200 = zext i128 %r199 to i192
+%r202 = getelementptr i64, i64* %r4, i32 2
+%r203 = load i64, i64* %r202
+%r204 = zext i64 %r203 to i192
+%r205 = shl i192 %r204, 128
+%r206 = or i192 %r200, %r205
+%r207 = zext i192 %r206 to i256
+%r209 = getelementptr i64, i64* %r4, i32 3
+%r210 = load i64, i64* %r209
+%r211 = zext i64 %r210 to i256
+%r212 = shl i256 %r211, 192
+%r213 = or i256 %r207, %r212
+%r214 = zext i256 %r213 to i320
+%r216 = getelementptr i64, i64* %r4, i32 4
+%r217 = load i64, i64* %r216
+%r218 = zext i64 %r217 to i320
+%r219 = shl i320 %r218, 256
+%r220 = or i320 %r214, %r219
+%r221 = zext i320 %r220 to i384
+%r223 = getelementptr i64, i64* %r4, i32 5
+%r224 = load i64, i64* %r223
+%r225 = zext i64 %r224 to i384
+%r226 = shl i384 %r225, 320
+%r227 = or i384 %r221, %r226
+%r229 = select i1 %r191, i384 %r227, i384 0
+%r230 = add i384 %r189, %r229
+%r232 = getelementptr i64, i64* %r1, i32 6
+%r233 = trunc i384 %r230 to i64
+%r235 = getelementptr i64, i64* %r232, i32 0
+store i64 %r233, i64* %r235
+%r236 = lshr i384 %r230, 64
+%r237 = trunc i384 %r236 to i64
+%r239 = getelementptr i64, i64* %r232, i32 1
+store i64 %r237, i64* %r239
+%r240 = lshr i384 %r236, 64
+%r241 = trunc i384 %r240 to i64
+%r243 = getelementptr i64, i64* %r232, i32 2
+store i64 %r241, i64* %r243
+%r244 = lshr i384 %r240, 64
+%r245 = trunc i384 %r244 to i64
+%r247 = getelementptr i64, i64* %r232, i32 3
+store i64 %r245, i64* %r247
+%r248 = lshr i384 %r244, 64
+%r249 = trunc i384 %r248 to i64
+%r251 = getelementptr i64, i64* %r232, i32 4
+store i64 %r249, i64* %r251
+%r252 = lshr i384 %r248, 64
+%r253 = trunc i384 %r252 to i64
+%r255 = getelementptr i64, i64* %r232, i32 5
+store i64 %r253, i64* %r255
+ret void
+}
+define i512 @mulPv448x64(i64* noalias  %r2, i64 %r3)
+{
+%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
+%r6 = trunc i128 %r5 to i64
+%r7 = call i64 @extractHigh64(i128 %r5)
+%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
+%r10 = trunc i128 %r9 to i64
+%r11 = call i64 @extractHigh64(i128 %r9)
+%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
+%r14 = trunc i128 %r13 to i64
+%r15 = call i64 @extractHigh64(i128 %r13)
+%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
+%r18 = trunc i128 %r17 to i64
+%r19 = call i64 @extractHigh64(i128 %r17)
+%r21 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 4)
+%r22 = trunc i128 %r21 to i64
+%r23 = call i64 @extractHigh64(i128 %r21)
+%r25 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 5)
+%r26 = trunc i128 %r25 to i64
+%r27 = call i64 @extractHigh64(i128 %r25)
+%r29 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 6)
+%r30 = trunc i128 %r29 to i64
+%r31 = call i64 @extractHigh64(i128 %r29)
+%r32 = zext i64 %r6 to i128
+%r33 = zext i64 %r10 to i128
+%r34 = shl i128 %r33, 64
+%r35 = or i128 %r32, %r34
+%r36 = zext i128 %r35 to i192
+%r37 = zext i64 %r14 to i192
+%r38 = shl i192 %r37, 128
+%r39 = or i192 %r36, %r38
+%r40 = zext i192 %r39 to i256
+%r41 = zext i64 %r18 to i256
+%r42 = shl i256 %r41, 192
+%r43 = or i256 %r40, %r42
+%r44 = zext i256 %r43 to i320
+%r45 = zext i64 %r22 to i320
+%r46 = shl i320 %r45, 256
+%r47 = or i320 %r44, %r46
+%r48 = zext i320 %r47 to i384
+%r49 = zext i64 %r26 to i384
+%r50 = shl i384 %r49, 320
+%r51 = or i384 %r48, %r50
+%r52 = zext i384 %r51 to i448
+%r53 = zext i64 %r30 to i448
+%r54 = shl i448 %r53, 384
+%r55 = or i448 %r52, %r54
+%r56 = zext i64 %r7 to i128
+%r57 = zext i64 %r11 to i128
+%r58 = shl i128 %r57, 64
+%r59 = or i128 %r56, %r58
+%r60 = zext i128 %r59 to i192
+%r61 = zext i64 %r15 to i192
+%r62 = shl i192 %r61, 128
+%r63 = or i192 %r60, %r62
+%r64 = zext i192 %r63 to i256
+%r65 = zext i64 %r19 to i256
+%r66 = shl i256 %r65, 192
+%r67 = or i256 %r64, %r66
+%r68 = zext i256 %r67 to i320
+%r69 = zext i64 %r23 to i320
+%r70 = shl i320 %r69, 256
+%r71 = or i320 %r68, %r70
+%r72 = zext i320 %r71 to i384
+%r73 = zext i64 %r27 to i384
+%r74 = shl i384 %r73, 320
+%r75 = or i384 %r72, %r74
+%r76 = zext i384 %r75 to i448
+%r77 = zext i64 %r31 to i448
+%r78 = shl i448 %r77, 384
+%r79 = or i448 %r76, %r78
+%r80 = zext i448 %r55 to i512
+%r81 = zext i448 %r79 to i512
+%r82 = shl i512 %r81, 64
+%r83 = add i512 %r80, %r82
+ret i512 %r83
+}
+define void @mcl_fp_mulUnitPre7L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+{
+%r4 = call i512 @mulPv448x64(i64* %r2, i64 %r3)
+%r5 = trunc i512 %r4 to i64
+%r7 = getelementptr i64, i64* %r1, i32 0
+store i64 %r5, i64* %r7
+%r8 = lshr i512 %r4, 64
+%r9 = trunc i512 %r8 to i64
+%r11 = getelementptr i64, i64* %r1, i32 1
+store i64 %r9, i64* %r11
+%r12 = lshr i512 %r8, 64
+%r13 = trunc i512 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 2
+store i64 %r13, i64* %r15
+%r16 = lshr i512 %r12, 64
+%r17 = trunc i512 %r16 to i64
+%r19 = getelementptr i64, i64* %r1, i32 3
+store i64 %r17, i64* %r19
+%r20 = lshr i512 %r16, 64
+%r21 = trunc i512 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 4
+store i64 %r21, i64* %r23
+%r24 = lshr i512 %r20, 64
+%r25 = trunc i512 %r24 to i64
+%r27 = getelementptr i64, i64* %r1, i32 5
+store i64 %r25, i64* %r27
+%r28 = lshr i512 %r24, 64
+%r29 = trunc i512 %r28 to i64
+%r31 = getelementptr i64, i64* %r1, i32 6
+store i64 %r29, i64* %r31
+%r32 = lshr i512 %r28, 64
+%r33 = trunc i512 %r32 to i64
+%r35 = getelementptr i64, i64* %r1, i32 7
+store i64 %r33, i64* %r35
+ret void
+}
+define void @mcl_fpDbl_mulPre7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r4 = load i64, i64* %r3
+%r5 = call i512 @mulPv448x64(i64* %r2, i64 %r4)
+%r6 = trunc i512 %r5 to i64
+store i64 %r6, i64* %r1
+%r7 = lshr i512 %r5, 64
+%r9 = getelementptr i64, i64* %r3, i32 1
+%r10 = load i64, i64* %r9
+%r11 = call i512 @mulPv448x64(i64* %r2, i64 %r10)
+%r12 = add i512 %r7, %r11
+%r13 = trunc i512 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 1
+store i64 %r13, i64* %r15
+%r16 = lshr i512 %r12, 64
+%r18 = getelementptr i64, i64* %r3, i32 2
+%r19 = load i64, i64* %r18
+%r20 = call i512 @mulPv448x64(i64* %r2, i64 %r19)
+%r21 = add i512 %r16, %r20
+%r22 = trunc i512 %r21 to i64
+%r24 = getelementptr i64, i64* %r1, i32 2
+store i64 %r22, i64* %r24
+%r25 = lshr i512 %r21, 64
+%r27 = getelementptr i64, i64* %r3, i32 3
+%r28 = load i64, i64* %r27
+%r29 = call i512 @mulPv448x64(i64* %r2, i64 %r28)
+%r30 = add i512 %r25, %r29
+%r31 = trunc i512 %r30 to i64
+%r33 = getelementptr i64, i64* %r1, i32 3
+store i64 %r31, i64* %r33
+%r34 = lshr i512 %r30, 64
+%r36 = getelementptr i64, i64* %r3, i32 4
+%r37 = load i64, i64* %r36
+%r38 = call i512 @mulPv448x64(i64* %r2, i64 %r37)
+%r39 = add i512 %r34, %r38
+%r40 = trunc i512 %r39 to i64
+%r42 = getelementptr i64, i64* %r1, i32 4
+store i64 %r40, i64* %r42
+%r43 = lshr i512 %r39, 64
+%r45 = getelementptr i64, i64* %r3, i32 5
+%r46 = load i64, i64* %r45
+%r47 = call i512 @mulPv448x64(i64* %r2, i64 %r46)
+%r48 = add i512 %r43, %r47
+%r49 = trunc i512 %r48 to i64
+%r51 = getelementptr i64, i64* %r1, i32 5
+store i64 %r49, i64* %r51
+%r52 = lshr i512 %r48, 64
+%r54 = getelementptr i64, i64* %r3, i32 6
+%r55 = load i64, i64* %r54
+%r56 = call i512 @mulPv448x64(i64* %r2, i64 %r55)
+%r57 = add i512 %r52, %r56
+%r59 = getelementptr i64, i64* %r1, i32 6
+%r60 = trunc i512 %r57 to i64
+%r62 = getelementptr i64, i64* %r59, i32 0
+store i64 %r60, i64* %r62
+%r63 = lshr i512 %r57, 64
+%r64 = trunc i512 %r63 to i64
+%r66 = getelementptr i64, i64* %r59, i32 1
+store i64 %r64, i64* %r66
+%r67 = lshr i512 %r63, 64
+%r68 = trunc i512 %r67 to i64
+%r70 = getelementptr i64, i64* %r59, i32 2
+store i64 %r68, i64* %r70
+%r71 = lshr i512 %r67, 64
+%r72 = trunc i512 %r71 to i64
+%r74 = getelementptr i64, i64* %r59, i32 3
+store i64 %r72, i64* %r74
+%r75 = lshr i512 %r71, 64
+%r76 = trunc i512 %r75 to i64
+%r78 = getelementptr i64, i64* %r59, i32 4
+store i64 %r76, i64* %r78
+%r79 = lshr i512 %r75, 64
+%r80 = trunc i512 %r79 to i64
+%r82 = getelementptr i64, i64* %r59, i32 5
+store i64 %r80, i64* %r82
+%r83 = lshr i512 %r79, 64
+%r84 = trunc i512 %r83 to i64
+%r86 = getelementptr i64, i64* %r59, i32 6
+store i64 %r84, i64* %r86
+%r87 = lshr i512 %r83, 64
+%r88 = trunc i512 %r87 to i64
+%r90 = getelementptr i64, i64* %r59, i32 7
+store i64 %r88, i64* %r90
+ret void
+}
+define void @mcl_fpDbl_sqrPre7L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = call i512 @mulPv448x64(i64* %r2, i64 %r3)
+%r5 = trunc i512 %r4 to i64
+store i64 %r5, i64* %r1
+%r6 = lshr i512 %r4, 64
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = call i512 @mulPv448x64(i64* %r2, i64 %r9)
+%r11 = add i512 %r6, %r10
+%r12 = trunc i512 %r11 to i64
+%r14 = getelementptr i64, i64* %r1, i32 1
+store i64 %r12, i64* %r14
+%r15 = lshr i512 %r11, 64
+%r17 = getelementptr i64, i64* %r2, i32 2
+%r18 = load i64, i64* %r17
+%r19 = call i512 @mulPv448x64(i64* %r2, i64 %r18)
+%r20 = add i512 %r15, %r19
+%r21 = trunc i512 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 2
+store i64 %r21, i64* %r23
+%r24 = lshr i512 %r20, 64
+%r26 = getelementptr i64, i64* %r2, i32 3
+%r27 = load i64, i64* %r26
+%r28 = call i512 @mulPv448x64(i64* %r2, i64 %r27)
+%r29 = add i512 %r24, %r28
+%r30 = trunc i512 %r29 to i64
+%r32 = getelementptr i64, i64* %r1, i32 3
+store i64 %r30, i64* %r32
+%r33 = lshr i512 %r29, 64
+%r35 = getelementptr i64, i64* %r2, i32 4
+%r36 = load i64, i64* %r35
+%r37 = call i512 @mulPv448x64(i64* %r2, i64 %r36)
+%r38 = add i512 %r33, %r37
+%r39 = trunc i512 %r38 to i64
+%r41 = getelementptr i64, i64* %r1, i32 4
+store i64 %r39, i64* %r41
+%r42 = lshr i512 %r38, 64
+%r44 = getelementptr i64, i64* %r2, i32 5
+%r45 = load i64, i64* %r44
+%r46 = call i512 @mulPv448x64(i64* %r2, i64 %r45)
+%r47 = add i512 %r42, %r46
+%r48 = trunc i512 %r47 to i64
+%r50 = getelementptr i64, i64* %r1, i32 5
+store i64 %r48, i64* %r50
+%r51 = lshr i512 %r47, 64
+%r53 = getelementptr i64, i64* %r2, i32 6
+%r54 = load i64, i64* %r53
+%r55 = call i512 @mulPv448x64(i64* %r2, i64 %r54)
+%r56 = add i512 %r51, %r55
+%r58 = getelementptr i64, i64* %r1, i32 6
+%r59 = trunc i512 %r56 to i64
+%r61 = getelementptr i64, i64* %r58, i32 0
+store i64 %r59, i64* %r61
+%r62 = lshr i512 %r56, 64
+%r63 = trunc i512 %r62 to i64
+%r65 = getelementptr i64, i64* %r58, i32 1
+store i64 %r63, i64* %r65
+%r66 = lshr i512 %r62, 64
+%r67 = trunc i512 %r66 to i64
+%r69 = getelementptr i64, i64* %r58, i32 2
+store i64 %r67, i64* %r69
+%r70 = lshr i512 %r66, 64
+%r71 = trunc i512 %r70 to i64
+%r73 = getelementptr i64, i64* %r58, i32 3
+store i64 %r71, i64* %r73
+%r74 = lshr i512 %r70, 64
+%r75 = trunc i512 %r74 to i64
+%r77 = getelementptr i64, i64* %r58, i32 4
+store i64 %r75, i64* %r77
+%r78 = lshr i512 %r74, 64
+%r79 = trunc i512 %r78 to i64
+%r81 = getelementptr i64, i64* %r58, i32 5
+store i64 %r79, i64* %r81
+%r82 = lshr i512 %r78, 64
+%r83 = trunc i512 %r82 to i64
+%r85 = getelementptr i64, i64* %r58, i32 6
+store i64 %r83, i64* %r85
+%r86 = lshr i512 %r82, 64
+%r87 = trunc i512 %r86 to i64
+%r89 = getelementptr i64, i64* %r58, i32 7
+store i64 %r87, i64* %r89
+ret void
+}
+define void @mcl_fp_mont7L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r9 = getelementptr i64, i64* %r3, i32 0
+%r10 = load i64, i64* %r9
+%r11 = call i512 @mulPv448x64(i64* %r2, i64 %r10)
+%r12 = zext i512 %r11 to i576
+%r13 = trunc i512 %r11 to i64
+%r14 = mul i64 %r13, %r7
+%r15 = call i512 @mulPv448x64(i64* %r4, i64 %r14)
+%r16 = zext i512 %r15 to i576
+%r17 = add i576 %r12, %r16
+%r18 = lshr i576 %r17, 64
+%r20 = getelementptr i64, i64* %r3, i32 1
+%r21 = load i64, i64* %r20
+%r22 = call i512 @mulPv448x64(i64* %r2, i64 %r21)
+%r23 = zext i512 %r22 to i576
+%r24 = add i576 %r18, %r23
+%r25 = trunc i576 %r24 to i64
+%r26 = mul i64 %r25, %r7
+%r27 = call i512 @mulPv448x64(i64* %r4, i64 %r26)
+%r28 = zext i512 %r27 to i576
+%r29 = add i576 %r24, %r28
+%r30 = lshr i576 %r29, 64
+%r32 = getelementptr i64, i64* %r3, i32 2
+%r33 = load i64, i64* %r32
+%r34 = call i512 @mulPv448x64(i64* %r2, i64 %r33)
+%r35 = zext i512 %r34 to i576
+%r36 = add i576 %r30, %r35
+%r37 = trunc i576 %r36 to i64
+%r38 = mul i64 %r37, %r7
+%r39 = call i512 @mulPv448x64(i64* %r4, i64 %r38)
+%r40 = zext i512 %r39 to i576
+%r41 = add i576 %r36, %r40
+%r42 = lshr i576 %r41, 64
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = call i512 @mulPv448x64(i64* %r2, i64 %r45)
+%r47 = zext i512 %r46 to i576
+%r48 = add i576 %r42, %r47
+%r49 = trunc i576 %r48 to i64
+%r50 = mul i64 %r49, %r7
+%r51 = call i512 @mulPv448x64(i64* %r4, i64 %r50)
+%r52 = zext i512 %r51 to i576
+%r53 = add i576 %r48, %r52
+%r54 = lshr i576 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 4
+%r57 = load i64, i64* %r56
+%r58 = call i512 @mulPv448x64(i64* %r2, i64 %r57)
+%r59 = zext i512 %r58 to i576
+%r60 = add i576 %r54, %r59
+%r61 = trunc i576 %r60 to i64
+%r62 = mul i64 %r61, %r7
+%r63 = call i512 @mulPv448x64(i64* %r4, i64 %r62)
+%r64 = zext i512 %r63 to i576
+%r65 = add i576 %r60, %r64
+%r66 = lshr i576 %r65, 64
+%r68 = getelementptr i64, i64* %r3, i32 5
+%r69 = load i64, i64* %r68
+%r70 = call i512 @mulPv448x64(i64* %r2, i64 %r69)
+%r71 = zext i512 %r70 to i576
+%r72 = add i576 %r66, %r71
+%r73 = trunc i576 %r72 to i64
+%r74 = mul i64 %r73, %r7
+%r75 = call i512 @mulPv448x64(i64* %r4, i64 %r74)
+%r76 = zext i512 %r75 to i576
+%r77 = add i576 %r72, %r76
+%r78 = lshr i576 %r77, 64
+%r80 = getelementptr i64, i64* %r3, i32 6
+%r81 = load i64, i64* %r80
+%r82 = call i512 @mulPv448x64(i64* %r2, i64 %r81)
+%r83 = zext i512 %r82 to i576
+%r84 = add i576 %r78, %r83
+%r85 = trunc i576 %r84 to i64
+%r86 = mul i64 %r85, %r7
+%r87 = call i512 @mulPv448x64(i64* %r4, i64 %r86)
+%r88 = zext i512 %r87 to i576
+%r89 = add i576 %r84, %r88
+%r90 = lshr i576 %r89, 64
+%r91 = trunc i576 %r90 to i512
+%r92 = load i64, i64* %r4
+%r93 = zext i64 %r92 to i128
+%r95 = getelementptr i64, i64* %r4, i32 1
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i128
+%r98 = shl i128 %r97, 64
+%r99 = or i128 %r93, %r98
+%r100 = zext i128 %r99 to i192
+%r102 = getelementptr i64, i64* %r4, i32 2
+%r103 = load i64, i64* %r102
+%r104 = zext i64 %r103 to i192
+%r105 = shl i192 %r104, 128
+%r106 = or i192 %r100, %r105
+%r107 = zext i192 %r106 to i256
+%r109 = getelementptr i64, i64* %r4, i32 3
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i256
+%r112 = shl i256 %r111, 192
+%r113 = or i256 %r107, %r112
+%r114 = zext i256 %r113 to i320
+%r116 = getelementptr i64, i64* %r4, i32 4
+%r117 = load i64, i64* %r116
+%r118 = zext i64 %r117 to i320
+%r119 = shl i320 %r118, 256
+%r120 = or i320 %r114, %r119
+%r121 = zext i320 %r120 to i384
+%r123 = getelementptr i64, i64* %r4, i32 5
+%r124 = load i64, i64* %r123
+%r125 = zext i64 %r124 to i384
+%r126 = shl i384 %r125, 320
+%r127 = or i384 %r121, %r126
+%r128 = zext i384 %r127 to i448
+%r130 = getelementptr i64, i64* %r4, i32 6
+%r131 = load i64, i64* %r130
+%r132 = zext i64 %r131 to i448
+%r133 = shl i448 %r132, 384
+%r134 = or i448 %r128, %r133
+%r135 = zext i448 %r134 to i512
+%r136 = sub i512 %r91, %r135
+%r137 = lshr i512 %r136, 448
+%r138 = trunc i512 %r137 to i1
+%r139 = select i1 %r138, i512 %r91, i512 %r136
+%r140 = trunc i512 %r139 to i448
+%r141 = trunc i448 %r140 to i64
+%r143 = getelementptr i64, i64* %r1, i32 0
+store i64 %r141, i64* %r143
+%r144 = lshr i448 %r140, 64
+%r145 = trunc i448 %r144 to i64
+%r147 = getelementptr i64, i64* %r1, i32 1
+store i64 %r145, i64* %r147
+%r148 = lshr i448 %r144, 64
+%r149 = trunc i448 %r148 to i64
+%r151 = getelementptr i64, i64* %r1, i32 2
+store i64 %r149, i64* %r151
+%r152 = lshr i448 %r148, 64
+%r153 = trunc i448 %r152 to i64
+%r155 = getelementptr i64, i64* %r1, i32 3
+store i64 %r153, i64* %r155
+%r156 = lshr i448 %r152, 64
+%r157 = trunc i448 %r156 to i64
+%r159 = getelementptr i64, i64* %r1, i32 4
+store i64 %r157, i64* %r159
+%r160 = lshr i448 %r156, 64
+%r161 = trunc i448 %r160 to i64
+%r163 = getelementptr i64, i64* %r1, i32 5
+store i64 %r161, i64* %r163
+%r164 = lshr i448 %r160, 64
+%r165 = trunc i448 %r164 to i64
+%r167 = getelementptr i64, i64* %r1, i32 6
+store i64 %r165, i64* %r167
+ret void
+}
+define void @mcl_fp_montNF7L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r8 = load i64, i64* %r3
+%r9 = call i512 @mulPv448x64(i64* %r2, i64 %r8)
+%r10 = trunc i512 %r9 to i64
+%r11 = mul i64 %r10, %r7
+%r12 = call i512 @mulPv448x64(i64* %r4, i64 %r11)
+%r13 = add i512 %r9, %r12
+%r14 = lshr i512 %r13, 64
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = call i512 @mulPv448x64(i64* %r2, i64 %r17)
+%r19 = add i512 %r14, %r18
+%r20 = trunc i512 %r19 to i64
+%r21 = mul i64 %r20, %r7
+%r22 = call i512 @mulPv448x64(i64* %r4, i64 %r21)
+%r23 = add i512 %r19, %r22
+%r24 = lshr i512 %r23, 64
+%r26 = getelementptr i64, i64* %r3, i32 2
+%r27 = load i64, i64* %r26
+%r28 = call i512 @mulPv448x64(i64* %r2, i64 %r27)
+%r29 = add i512 %r24, %r28
+%r30 = trunc i512 %r29 to i64
+%r31 = mul i64 %r30, %r7
+%r32 = call i512 @mulPv448x64(i64* %r4, i64 %r31)
+%r33 = add i512 %r29, %r32
+%r34 = lshr i512 %r33, 64
+%r36 = getelementptr i64, i64* %r3, i32 3
+%r37 = load i64, i64* %r36
+%r38 = call i512 @mulPv448x64(i64* %r2, i64 %r37)
+%r39 = add i512 %r34, %r38
+%r40 = trunc i512 %r39 to i64
+%r41 = mul i64 %r40, %r7
+%r42 = call i512 @mulPv448x64(i64* %r4, i64 %r41)
+%r43 = add i512 %r39, %r42
+%r44 = lshr i512 %r43, 64
+%r46 = getelementptr i64, i64* %r3, i32 4
+%r47 = load i64, i64* %r46
+%r48 = call i512 @mulPv448x64(i64* %r2, i64 %r47)
+%r49 = add i512 %r44, %r48
+%r50 = trunc i512 %r49 to i64
+%r51 = mul i64 %r50, %r7
+%r52 = call i512 @mulPv448x64(i64* %r4, i64 %r51)
+%r53 = add i512 %r49, %r52
+%r54 = lshr i512 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 5
+%r57 = load i64, i64* %r56
+%r58 = call i512 @mulPv448x64(i64* %r2, i64 %r57)
+%r59 = add i512 %r54, %r58
+%r60 = trunc i512 %r59 to i64
+%r61 = mul i64 %r60, %r7
+%r62 = call i512 @mulPv448x64(i64* %r4, i64 %r61)
+%r63 = add i512 %r59, %r62
+%r64 = lshr i512 %r63, 64
+%r66 = getelementptr i64, i64* %r3, i32 6
+%r67 = load i64, i64* %r66
+%r68 = call i512 @mulPv448x64(i64* %r2, i64 %r67)
+%r69 = add i512 %r64, %r68
+%r70 = trunc i512 %r69 to i64
+%r71 = mul i64 %r70, %r7
+%r72 = call i512 @mulPv448x64(i64* %r4, i64 %r71)
+%r73 = add i512 %r69, %r72
+%r74 = lshr i512 %r73, 64
+%r75 = trunc i512 %r74 to i448
+%r76 = load i64, i64* %r4
+%r77 = zext i64 %r76 to i128
+%r79 = getelementptr i64, i64* %r4, i32 1
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i128
+%r82 = shl i128 %r81, 64
+%r83 = or i128 %r77, %r82
+%r84 = zext i128 %r83 to i192
+%r86 = getelementptr i64, i64* %r4, i32 2
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i192
+%r89 = shl i192 %r88, 128
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i256
+%r93 = getelementptr i64, i64* %r4, i32 3
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i256
+%r96 = shl i256 %r95, 192
+%r97 = or i256 %r91, %r96
+%r98 = zext i256 %r97 to i320
+%r100 = getelementptr i64, i64* %r4, i32 4
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i320
+%r103 = shl i320 %r102, 256
+%r104 = or i320 %r98, %r103
+%r105 = zext i320 %r104 to i384
+%r107 = getelementptr i64, i64* %r4, i32 5
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i384
+%r110 = shl i384 %r109, 320
+%r111 = or i384 %r105, %r110
+%r112 = zext i384 %r111 to i448
+%r114 = getelementptr i64, i64* %r4, i32 6
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i448
+%r117 = shl i448 %r116, 384
+%r118 = or i448 %r112, %r117
+%r119 = sub i448 %r75, %r118
+%r120 = lshr i448 %r119, 447
+%r121 = trunc i448 %r120 to i1
+%r122 = select i1 %r121, i448 %r75, i448 %r119
+%r123 = trunc i448 %r122 to i64
+%r125 = getelementptr i64, i64* %r1, i32 0
+store i64 %r123, i64* %r125
+%r126 = lshr i448 %r122, 64
+%r127 = trunc i448 %r126 to i64
+%r129 = getelementptr i64, i64* %r1, i32 1
+store i64 %r127, i64* %r129
+%r130 = lshr i448 %r126, 64
+%r131 = trunc i448 %r130 to i64
+%r133 = getelementptr i64, i64* %r1, i32 2
+store i64 %r131, i64* %r133
+%r134 = lshr i448 %r130, 64
+%r135 = trunc i448 %r134 to i64
+%r137 = getelementptr i64, i64* %r1, i32 3
+store i64 %r135, i64* %r137
+%r138 = lshr i448 %r134, 64
+%r139 = trunc i448 %r138 to i64
+%r141 = getelementptr i64, i64* %r1, i32 4
+store i64 %r139, i64* %r141
+%r142 = lshr i448 %r138, 64
+%r143 = trunc i448 %r142 to i64
+%r145 = getelementptr i64, i64* %r1, i32 5
+store i64 %r143, i64* %r145
+%r146 = lshr i448 %r142, 64
+%r147 = trunc i448 %r146 to i64
+%r149 = getelementptr i64, i64* %r1, i32 6
+store i64 %r147, i64* %r149
+ret void
+}
+define void @mcl_fp_montRed7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = zext i192 %r21 to i256
+%r24 = getelementptr i64, i64* %r3, i32 3
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i256
+%r27 = shl i256 %r26, 192
+%r28 = or i256 %r22, %r27
+%r29 = zext i256 %r28 to i320
+%r31 = getelementptr i64, i64* %r3, i32 4
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i320
+%r34 = shl i320 %r33, 256
+%r35 = or i320 %r29, %r34
+%r36 = zext i320 %r35 to i384
+%r38 = getelementptr i64, i64* %r3, i32 5
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i384
+%r41 = shl i384 %r40, 320
+%r42 = or i384 %r36, %r41
+%r43 = zext i384 %r42 to i448
+%r45 = getelementptr i64, i64* %r3, i32 6
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i448
+%r48 = shl i448 %r47, 384
+%r49 = or i448 %r43, %r48
+%r50 = load i64, i64* %r2
+%r51 = zext i64 %r50 to i128
+%r53 = getelementptr i64, i64* %r2, i32 1
+%r54 = load i64, i64* %r53
+%r55 = zext i64 %r54 to i128
+%r56 = shl i128 %r55, 64
+%r57 = or i128 %r51, %r56
+%r58 = zext i128 %r57 to i192
+%r60 = getelementptr i64, i64* %r2, i32 2
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i192
+%r63 = shl i192 %r62, 128
+%r64 = or i192 %r58, %r63
+%r65 = zext i192 %r64 to i256
+%r67 = getelementptr i64, i64* %r2, i32 3
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i256
+%r70 = shl i256 %r69, 192
+%r71 = or i256 %r65, %r70
+%r72 = zext i256 %r71 to i320
+%r74 = getelementptr i64, i64* %r2, i32 4
+%r75 = load i64, i64* %r74
+%r76 = zext i64 %r75 to i320
+%r77 = shl i320 %r76, 256
+%r78 = or i320 %r72, %r77
+%r79 = zext i320 %r78 to i384
+%r81 = getelementptr i64, i64* %r2, i32 5
+%r82 = load i64, i64* %r81
+%r83 = zext i64 %r82 to i384
+%r84 = shl i384 %r83, 320
+%r85 = or i384 %r79, %r84
+%r86 = zext i384 %r85 to i448
+%r88 = getelementptr i64, i64* %r2, i32 6
+%r89 = load i64, i64* %r88
+%r90 = zext i64 %r89 to i448
+%r91 = shl i448 %r90, 384
+%r92 = or i448 %r86, %r91
+%r93 = zext i448 %r92 to i512
+%r95 = getelementptr i64, i64* %r2, i32 7
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i512
+%r98 = shl i512 %r97, 448
+%r99 = or i512 %r93, %r98
+%r100 = zext i512 %r99 to i576
+%r102 = getelementptr i64, i64* %r2, i32 8
+%r103 = load i64, i64* %r102
+%r104 = zext i64 %r103 to i576
+%r105 = shl i576 %r104, 512
+%r106 = or i576 %r100, %r105
+%r107 = zext i576 %r106 to i640
+%r109 = getelementptr i64, i64* %r2, i32 9
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i640
+%r112 = shl i640 %r111, 576
+%r113 = or i640 %r107, %r112
+%r114 = zext i640 %r113 to i704
+%r116 = getelementptr i64, i64* %r2, i32 10
+%r117 = load i64, i64* %r116
+%r118 = zext i64 %r117 to i704
+%r119 = shl i704 %r118, 640
+%r120 = or i704 %r114, %r119
+%r121 = zext i704 %r120 to i768
+%r123 = getelementptr i64, i64* %r2, i32 11
+%r124 = load i64, i64* %r123
+%r125 = zext i64 %r124 to i768
+%r126 = shl i768 %r125, 704
+%r127 = or i768 %r121, %r126
+%r128 = zext i768 %r127 to i832
+%r130 = getelementptr i64, i64* %r2, i32 12
+%r131 = load i64, i64* %r130
+%r132 = zext i64 %r131 to i832
+%r133 = shl i832 %r132, 768
+%r134 = or i832 %r128, %r133
+%r135 = zext i832 %r134 to i896
+%r137 = getelementptr i64, i64* %r2, i32 13
+%r138 = load i64, i64* %r137
+%r139 = zext i64 %r138 to i896
+%r140 = shl i896 %r139, 832
+%r141 = or i896 %r135, %r140
+%r142 = zext i896 %r141 to i960
+%r143 = trunc i960 %r142 to i64
+%r144 = mul i64 %r143, %r6
+%r145 = call i512 @mulPv448x64(i64* %r3, i64 %r144)
+%r146 = zext i512 %r145 to i960
+%r147 = add i960 %r142, %r146
+%r148 = lshr i960 %r147, 64
+%r149 = trunc i960 %r148 to i896
+%r150 = trunc i896 %r149 to i64
+%r151 = mul i64 %r150, %r6
+%r152 = call i512 @mulPv448x64(i64* %r3, i64 %r151)
+%r153 = zext i512 %r152 to i896
+%r154 = add i896 %r149, %r153
+%r155 = lshr i896 %r154, 64
+%r156 = trunc i896 %r155 to i832
+%r157 = trunc i832 %r156 to i64
+%r158 = mul i64 %r157, %r6
+%r159 = call i512 @mulPv448x64(i64* %r3, i64 %r158)
+%r160 = zext i512 %r159 to i832
+%r161 = add i832 %r156, %r160
+%r162 = lshr i832 %r161, 64
+%r163 = trunc i832 %r162 to i768
+%r164 = trunc i768 %r163 to i64
+%r165 = mul i64 %r164, %r6
+%r166 = call i512 @mulPv448x64(i64* %r3, i64 %r165)
+%r167 = zext i512 %r166 to i768
+%r168 = add i768 %r163, %r167
+%r169 = lshr i768 %r168, 64
+%r170 = trunc i768 %r169 to i704
+%r171 = trunc i704 %r170 to i64
+%r172 = mul i64 %r171, %r6
+%r173 = call i512 @mulPv448x64(i64* %r3, i64 %r172)
+%r174 = zext i512 %r173 to i704
+%r175 = add i704 %r170, %r174
+%r176 = lshr i704 %r175, 64
+%r177 = trunc i704 %r176 to i640
+%r178 = trunc i640 %r177 to i64
+%r179 = mul i64 %r178, %r6
+%r180 = call i512 @mulPv448x64(i64* %r3, i64 %r179)
+%r181 = zext i512 %r180 to i640
+%r182 = add i640 %r177, %r181
+%r183 = lshr i640 %r182, 64
+%r184 = trunc i640 %r183 to i576
+%r185 = trunc i576 %r184 to i64
+%r186 = mul i64 %r185, %r6
+%r187 = call i512 @mulPv448x64(i64* %r3, i64 %r186)
+%r188 = zext i512 %r187 to i576
+%r189 = add i576 %r184, %r188
+%r190 = lshr i576 %r189, 64
+%r191 = trunc i576 %r190 to i512
+%r192 = zext i448 %r49 to i512
+%r193 = sub i512 %r191, %r192
+%r194 = lshr i512 %r193, 448
+%r195 = trunc i512 %r194 to i1
+%r196 = select i1 %r195, i512 %r191, i512 %r193
+%r197 = trunc i512 %r196 to i448
+%r198 = trunc i448 %r197 to i64
+%r200 = getelementptr i64, i64* %r1, i32 0
+store i64 %r198, i64* %r200
+%r201 = lshr i448 %r197, 64
+%r202 = trunc i448 %r201 to i64
+%r204 = getelementptr i64, i64* %r1, i32 1
+store i64 %r202, i64* %r204
+%r205 = lshr i448 %r201, 64
+%r206 = trunc i448 %r205 to i64
+%r208 = getelementptr i64, i64* %r1, i32 2
+store i64 %r206, i64* %r208
+%r209 = lshr i448 %r205, 64
+%r210 = trunc i448 %r209 to i64
+%r212 = getelementptr i64, i64* %r1, i32 3
+store i64 %r210, i64* %r212
+%r213 = lshr i448 %r209, 64
+%r214 = trunc i448 %r213 to i64
+%r216 = getelementptr i64, i64* %r1, i32 4
+store i64 %r214, i64* %r216
+%r217 = lshr i448 %r213, 64
+%r218 = trunc i448 %r217 to i64
+%r220 = getelementptr i64, i64* %r1, i32 5
+store i64 %r218, i64* %r220
+%r221 = lshr i448 %r217, 64
+%r222 = trunc i448 %r221 to i64
+%r224 = getelementptr i64, i64* %r1, i32 6
+store i64 %r222, i64* %r224
+ret void
+}
+define i64 @mcl_fp_addPre7L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r3, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r3, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r49 = load i64, i64* %r4
+%r50 = zext i64 %r49 to i128
+%r52 = getelementptr i64, i64* %r4, i32 1
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i128
+%r55 = shl i128 %r54, 64
+%r56 = or i128 %r50, %r55
+%r57 = zext i128 %r56 to i192
+%r59 = getelementptr i64, i64* %r4, i32 2
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i192
+%r62 = shl i192 %r61, 128
+%r63 = or i192 %r57, %r62
+%r64 = zext i192 %r63 to i256
+%r66 = getelementptr i64, i64* %r4, i32 3
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i256
+%r69 = shl i256 %r68, 192
+%r70 = or i256 %r64, %r69
+%r71 = zext i256 %r70 to i320
+%r73 = getelementptr i64, i64* %r4, i32 4
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i320
+%r76 = shl i320 %r75, 256
+%r77 = or i320 %r71, %r76
+%r78 = zext i320 %r77 to i384
+%r80 = getelementptr i64, i64* %r4, i32 5
+%r81 = load i64, i64* %r80
+%r82 = zext i64 %r81 to i384
+%r83 = shl i384 %r82, 320
+%r84 = or i384 %r78, %r83
+%r85 = zext i384 %r84 to i448
+%r87 = getelementptr i64, i64* %r4, i32 6
+%r88 = load i64, i64* %r87
+%r89 = zext i64 %r88 to i448
+%r90 = shl i448 %r89, 384
+%r91 = or i448 %r85, %r90
+%r92 = zext i448 %r91 to i512
+%r93 = add i512 %r48, %r92
+%r94 = trunc i512 %r93 to i448
+%r95 = trunc i448 %r94 to i64
+%r97 = getelementptr i64, i64* %r2, i32 0
+store i64 %r95, i64* %r97
+%r98 = lshr i448 %r94, 64
+%r99 = trunc i448 %r98 to i64
+%r101 = getelementptr i64, i64* %r2, i32 1
+store i64 %r99, i64* %r101
+%r102 = lshr i448 %r98, 64
+%r103 = trunc i448 %r102 to i64
+%r105 = getelementptr i64, i64* %r2, i32 2
+store i64 %r103, i64* %r105
+%r106 = lshr i448 %r102, 64
+%r107 = trunc i448 %r106 to i64
+%r109 = getelementptr i64, i64* %r2, i32 3
+store i64 %r107, i64* %r109
+%r110 = lshr i448 %r106, 64
+%r111 = trunc i448 %r110 to i64
+%r113 = getelementptr i64, i64* %r2, i32 4
+store i64 %r111, i64* %r113
+%r114 = lshr i448 %r110, 64
+%r115 = trunc i448 %r114 to i64
+%r117 = getelementptr i64, i64* %r2, i32 5
+store i64 %r115, i64* %r117
+%r118 = lshr i448 %r114, 64
+%r119 = trunc i448 %r118 to i64
+%r121 = getelementptr i64, i64* %r2, i32 6
+store i64 %r119, i64* %r121
+%r122 = lshr i512 %r93, 448
+%r123 = trunc i512 %r122 to i64
+ret i64 %r123
+}
+define i64 @mcl_fp_subPre7L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r3, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r3, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r49 = load i64, i64* %r4
+%r50 = zext i64 %r49 to i128
+%r52 = getelementptr i64, i64* %r4, i32 1
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i128
+%r55 = shl i128 %r54, 64
+%r56 = or i128 %r50, %r55
+%r57 = zext i128 %r56 to i192
+%r59 = getelementptr i64, i64* %r4, i32 2
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i192
+%r62 = shl i192 %r61, 128
+%r63 = or i192 %r57, %r62
+%r64 = zext i192 %r63 to i256
+%r66 = getelementptr i64, i64* %r4, i32 3
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i256
+%r69 = shl i256 %r68, 192
+%r70 = or i256 %r64, %r69
+%r71 = zext i256 %r70 to i320
+%r73 = getelementptr i64, i64* %r4, i32 4
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i320
+%r76 = shl i320 %r75, 256
+%r77 = or i320 %r71, %r76
+%r78 = zext i320 %r77 to i384
+%r80 = getelementptr i64, i64* %r4, i32 5
+%r81 = load i64, i64* %r80
+%r82 = zext i64 %r81 to i384
+%r83 = shl i384 %r82, 320
+%r84 = or i384 %r78, %r83
+%r85 = zext i384 %r84 to i448
+%r87 = getelementptr i64, i64* %r4, i32 6
+%r88 = load i64, i64* %r87
+%r89 = zext i64 %r88 to i448
+%r90 = shl i448 %r89, 384
+%r91 = or i448 %r85, %r90
+%r92 = zext i448 %r91 to i512
+%r93 = sub i512 %r48, %r92
+%r94 = trunc i512 %r93 to i448
+%r95 = trunc i448 %r94 to i64
+%r97 = getelementptr i64, i64* %r2, i32 0
+store i64 %r95, i64* %r97
+%r98 = lshr i448 %r94, 64
+%r99 = trunc i448 %r98 to i64
+%r101 = getelementptr i64, i64* %r2, i32 1
+store i64 %r99, i64* %r101
+%r102 = lshr i448 %r98, 64
+%r103 = trunc i448 %r102 to i64
+%r105 = getelementptr i64, i64* %r2, i32 2
+store i64 %r103, i64* %r105
+%r106 = lshr i448 %r102, 64
+%r107 = trunc i448 %r106 to i64
+%r109 = getelementptr i64, i64* %r2, i32 3
+store i64 %r107, i64* %r109
+%r110 = lshr i448 %r106, 64
+%r111 = trunc i448 %r110 to i64
+%r113 = getelementptr i64, i64* %r2, i32 4
+store i64 %r111, i64* %r113
+%r114 = lshr i448 %r110, 64
+%r115 = trunc i448 %r114 to i64
+%r117 = getelementptr i64, i64* %r2, i32 5
+store i64 %r115, i64* %r117
+%r118 = lshr i448 %r114, 64
+%r119 = trunc i448 %r118 to i64
+%r121 = getelementptr i64, i64* %r2, i32 6
+store i64 %r119, i64* %r121
+%r122 = lshr i512 %r93, 448
+%r123 = trunc i512 %r122 to i64
+%r125 = and i64 %r123, 1
+ret i64 %r125
+}
+define void @mcl_fp_shr1_7L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = zext i64 %r3 to i128
+%r6 = getelementptr i64, i64* %r2, i32 1
+%r7 = load i64, i64* %r6
+%r8 = zext i64 %r7 to i128
+%r9 = shl i128 %r8, 64
+%r10 = or i128 %r4, %r9
+%r11 = zext i128 %r10 to i192
+%r13 = getelementptr i64, i64* %r2, i32 2
+%r14 = load i64, i64* %r13
+%r15 = zext i64 %r14 to i192
+%r16 = shl i192 %r15, 128
+%r17 = or i192 %r11, %r16
+%r18 = zext i192 %r17 to i256
+%r20 = getelementptr i64, i64* %r2, i32 3
+%r21 = load i64, i64* %r20
+%r22 = zext i64 %r21 to i256
+%r23 = shl i256 %r22, 192
+%r24 = or i256 %r18, %r23
+%r25 = zext i256 %r24 to i320
+%r27 = getelementptr i64, i64* %r2, i32 4
+%r28 = load i64, i64* %r27
+%r29 = zext i64 %r28 to i320
+%r30 = shl i320 %r29, 256
+%r31 = or i320 %r25, %r30
+%r32 = zext i320 %r31 to i384
+%r34 = getelementptr i64, i64* %r2, i32 5
+%r35 = load i64, i64* %r34
+%r36 = zext i64 %r35 to i384
+%r37 = shl i384 %r36, 320
+%r38 = or i384 %r32, %r37
+%r39 = zext i384 %r38 to i448
+%r41 = getelementptr i64, i64* %r2, i32 6
+%r42 = load i64, i64* %r41
+%r43 = zext i64 %r42 to i448
+%r44 = shl i448 %r43, 384
+%r45 = or i448 %r39, %r44
+%r46 = lshr i448 %r45, 1
+%r47 = trunc i448 %r46 to i64
+%r49 = getelementptr i64, i64* %r1, i32 0
+store i64 %r47, i64* %r49
+%r50 = lshr i448 %r46, 64
+%r51 = trunc i448 %r50 to i64
+%r53 = getelementptr i64, i64* %r1, i32 1
+store i64 %r51, i64* %r53
+%r54 = lshr i448 %r50, 64
+%r55 = trunc i448 %r54 to i64
+%r57 = getelementptr i64, i64* %r1, i32 2
+store i64 %r55, i64* %r57
+%r58 = lshr i448 %r54, 64
+%r59 = trunc i448 %r58 to i64
+%r61 = getelementptr i64, i64* %r1, i32 3
+store i64 %r59, i64* %r61
+%r62 = lshr i448 %r58, 64
+%r63 = trunc i448 %r62 to i64
+%r65 = getelementptr i64, i64* %r1, i32 4
+store i64 %r63, i64* %r65
+%r66 = lshr i448 %r62, 64
+%r67 = trunc i448 %r66 to i64
+%r69 = getelementptr i64, i64* %r1, i32 5
+store i64 %r67, i64* %r69
+%r70 = lshr i448 %r66, 64
+%r71 = trunc i448 %r70 to i64
+%r73 = getelementptr i64, i64* %r1, i32 6
+store i64 %r71, i64* %r73
+ret void
+}
+define void @mcl_fp_add7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = load i64, i64* %r3
+%r49 = zext i64 %r48 to i128
+%r51 = getelementptr i64, i64* %r3, i32 1
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i128
+%r54 = shl i128 %r53, 64
+%r55 = or i128 %r49, %r54
+%r56 = zext i128 %r55 to i192
+%r58 = getelementptr i64, i64* %r3, i32 2
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i192
+%r61 = shl i192 %r60, 128
+%r62 = or i192 %r56, %r61
+%r63 = zext i192 %r62 to i256
+%r65 = getelementptr i64, i64* %r3, i32 3
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i256
+%r68 = shl i256 %r67, 192
+%r69 = or i256 %r63, %r68
+%r70 = zext i256 %r69 to i320
+%r72 = getelementptr i64, i64* %r3, i32 4
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i320
+%r75 = shl i320 %r74, 256
+%r76 = or i320 %r70, %r75
+%r77 = zext i320 %r76 to i384
+%r79 = getelementptr i64, i64* %r3, i32 5
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i384
+%r82 = shl i384 %r81, 320
+%r83 = or i384 %r77, %r82
+%r84 = zext i384 %r83 to i448
+%r86 = getelementptr i64, i64* %r3, i32 6
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i448
+%r89 = shl i448 %r88, 384
+%r90 = or i448 %r84, %r89
+%r91 = zext i448 %r47 to i512
+%r92 = zext i448 %r90 to i512
+%r93 = add i512 %r91, %r92
+%r94 = trunc i512 %r93 to i448
+%r95 = trunc i448 %r94 to i64
+%r97 = getelementptr i64, i64* %r1, i32 0
+store i64 %r95, i64* %r97
+%r98 = lshr i448 %r94, 64
+%r99 = trunc i448 %r98 to i64
+%r101 = getelementptr i64, i64* %r1, i32 1
+store i64 %r99, i64* %r101
+%r102 = lshr i448 %r98, 64
+%r103 = trunc i448 %r102 to i64
+%r105 = getelementptr i64, i64* %r1, i32 2
+store i64 %r103, i64* %r105
+%r106 = lshr i448 %r102, 64
+%r107 = trunc i448 %r106 to i64
+%r109 = getelementptr i64, i64* %r1, i32 3
+store i64 %r107, i64* %r109
+%r110 = lshr i448 %r106, 64
+%r111 = trunc i448 %r110 to i64
+%r113 = getelementptr i64, i64* %r1, i32 4
+store i64 %r111, i64* %r113
+%r114 = lshr i448 %r110, 64
+%r115 = trunc i448 %r114 to i64
+%r117 = getelementptr i64, i64* %r1, i32 5
+store i64 %r115, i64* %r117
+%r118 = lshr i448 %r114, 64
+%r119 = trunc i448 %r118 to i64
+%r121 = getelementptr i64, i64* %r1, i32 6
+store i64 %r119, i64* %r121
+%r122 = load i64, i64* %r4
+%r123 = zext i64 %r122 to i128
+%r125 = getelementptr i64, i64* %r4, i32 1
+%r126 = load i64, i64* %r125
+%r127 = zext i64 %r126 to i128
+%r128 = shl i128 %r127, 64
+%r129 = or i128 %r123, %r128
+%r130 = zext i128 %r129 to i192
+%r132 = getelementptr i64, i64* %r4, i32 2
+%r133 = load i64, i64* %r132
+%r134 = zext i64 %r133 to i192
+%r135 = shl i192 %r134, 128
+%r136 = or i192 %r130, %r135
+%r137 = zext i192 %r136 to i256
+%r139 = getelementptr i64, i64* %r4, i32 3
+%r140 = load i64, i64* %r139
+%r141 = zext i64 %r140 to i256
+%r142 = shl i256 %r141, 192
+%r143 = or i256 %r137, %r142
+%r144 = zext i256 %r143 to i320
+%r146 = getelementptr i64, i64* %r4, i32 4
+%r147 = load i64, i64* %r146
+%r148 = zext i64 %r147 to i320
+%r149 = shl i320 %r148, 256
+%r150 = or i320 %r144, %r149
+%r151 = zext i320 %r150 to i384
+%r153 = getelementptr i64, i64* %r4, i32 5
+%r154 = load i64, i64* %r153
+%r155 = zext i64 %r154 to i384
+%r156 = shl i384 %r155, 320
+%r157 = or i384 %r151, %r156
+%r158 = zext i384 %r157 to i448
+%r160 = getelementptr i64, i64* %r4, i32 6
+%r161 = load i64, i64* %r160
+%r162 = zext i64 %r161 to i448
+%r163 = shl i448 %r162, 384
+%r164 = or i448 %r158, %r163
+%r165 = zext i448 %r164 to i512
+%r166 = sub i512 %r93, %r165
+%r167 = lshr i512 %r166, 448
+%r168 = trunc i512 %r167 to i1
+br i1%r168, label %carry, label %nocarry
+nocarry:
+%r169 = trunc i512 %r166 to i448
+%r170 = trunc i448 %r169 to i64
+%r172 = getelementptr i64, i64* %r1, i32 0
+store i64 %r170, i64* %r172
+%r173 = lshr i448 %r169, 64
+%r174 = trunc i448 %r173 to i64
+%r176 = getelementptr i64, i64* %r1, i32 1
+store i64 %r174, i64* %r176
+%r177 = lshr i448 %r173, 64
+%r178 = trunc i448 %r177 to i64
+%r180 = getelementptr i64, i64* %r1, i32 2
+store i64 %r178, i64* %r180
+%r181 = lshr i448 %r177, 64
+%r182 = trunc i448 %r181 to i64
+%r184 = getelementptr i64, i64* %r1, i32 3
+store i64 %r182, i64* %r184
+%r185 = lshr i448 %r181, 64
+%r186 = trunc i448 %r185 to i64
+%r188 = getelementptr i64, i64* %r1, i32 4
+store i64 %r186, i64* %r188
+%r189 = lshr i448 %r185, 64
+%r190 = trunc i448 %r189 to i64
+%r192 = getelementptr i64, i64* %r1, i32 5
+store i64 %r190, i64* %r192
+%r193 = lshr i448 %r189, 64
+%r194 = trunc i448 %r193 to i64
+%r196 = getelementptr i64, i64* %r1, i32 6
+store i64 %r194, i64* %r196
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = load i64, i64* %r3
+%r49 = zext i64 %r48 to i128
+%r51 = getelementptr i64, i64* %r3, i32 1
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i128
+%r54 = shl i128 %r53, 64
+%r55 = or i128 %r49, %r54
+%r56 = zext i128 %r55 to i192
+%r58 = getelementptr i64, i64* %r3, i32 2
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i192
+%r61 = shl i192 %r60, 128
+%r62 = or i192 %r56, %r61
+%r63 = zext i192 %r62 to i256
+%r65 = getelementptr i64, i64* %r3, i32 3
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i256
+%r68 = shl i256 %r67, 192
+%r69 = or i256 %r63, %r68
+%r70 = zext i256 %r69 to i320
+%r72 = getelementptr i64, i64* %r3, i32 4
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i320
+%r75 = shl i320 %r74, 256
+%r76 = or i320 %r70, %r75
+%r77 = zext i320 %r76 to i384
+%r79 = getelementptr i64, i64* %r3, i32 5
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i384
+%r82 = shl i384 %r81, 320
+%r83 = or i384 %r77, %r82
+%r84 = zext i384 %r83 to i448
+%r86 = getelementptr i64, i64* %r3, i32 6
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i448
+%r89 = shl i448 %r88, 384
+%r90 = or i448 %r84, %r89
+%r91 = add i448 %r47, %r90
+%r92 = load i64, i64* %r4
+%r93 = zext i64 %r92 to i128
+%r95 = getelementptr i64, i64* %r4, i32 1
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i128
+%r98 = shl i128 %r97, 64
+%r99 = or i128 %r93, %r98
+%r100 = zext i128 %r99 to i192
+%r102 = getelementptr i64, i64* %r4, i32 2
+%r103 = load i64, i64* %r102
+%r104 = zext i64 %r103 to i192
+%r105 = shl i192 %r104, 128
+%r106 = or i192 %r100, %r105
+%r107 = zext i192 %r106 to i256
+%r109 = getelementptr i64, i64* %r4, i32 3
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i256
+%r112 = shl i256 %r111, 192
+%r113 = or i256 %r107, %r112
+%r114 = zext i256 %r113 to i320
+%r116 = getelementptr i64, i64* %r4, i32 4
+%r117 = load i64, i64* %r116
+%r118 = zext i64 %r117 to i320
+%r119 = shl i320 %r118, 256
+%r120 = or i320 %r114, %r119
+%r121 = zext i320 %r120 to i384
+%r123 = getelementptr i64, i64* %r4, i32 5
+%r124 = load i64, i64* %r123
+%r125 = zext i64 %r124 to i384
+%r126 = shl i384 %r125, 320
+%r127 = or i384 %r121, %r126
+%r128 = zext i384 %r127 to i448
+%r130 = getelementptr i64, i64* %r4, i32 6
+%r131 = load i64, i64* %r130
+%r132 = zext i64 %r131 to i448
+%r133 = shl i448 %r132, 384
+%r134 = or i448 %r128, %r133
+%r135 = sub i448 %r91, %r134
+%r136 = lshr i448 %r135, 447
+%r137 = trunc i448 %r136 to i1
+%r138 = select i1 %r137, i448 %r91, i448 %r135
+%r139 = trunc i448 %r138 to i64
+%r141 = getelementptr i64, i64* %r1, i32 0
+store i64 %r139, i64* %r141
+%r142 = lshr i448 %r138, 64
+%r143 = trunc i448 %r142 to i64
+%r145 = getelementptr i64, i64* %r1, i32 1
+store i64 %r143, i64* %r145
+%r146 = lshr i448 %r142, 64
+%r147 = trunc i448 %r146 to i64
+%r149 = getelementptr i64, i64* %r1, i32 2
+store i64 %r147, i64* %r149
+%r150 = lshr i448 %r146, 64
+%r151 = trunc i448 %r150 to i64
+%r153 = getelementptr i64, i64* %r1, i32 3
+store i64 %r151, i64* %r153
+%r154 = lshr i448 %r150, 64
+%r155 = trunc i448 %r154 to i64
+%r157 = getelementptr i64, i64* %r1, i32 4
+store i64 %r155, i64* %r157
+%r158 = lshr i448 %r154, 64
+%r159 = trunc i448 %r158 to i64
+%r161 = getelementptr i64, i64* %r1, i32 5
+store i64 %r159, i64* %r161
+%r162 = lshr i448 %r158, 64
+%r163 = trunc i448 %r162 to i64
+%r165 = getelementptr i64, i64* %r1, i32 6
+store i64 %r163, i64* %r165
+ret void
+}
+define void @mcl_fp_sub7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = load i64, i64* %r3
+%r49 = zext i64 %r48 to i128
+%r51 = getelementptr i64, i64* %r3, i32 1
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i128
+%r54 = shl i128 %r53, 64
+%r55 = or i128 %r49, %r54
+%r56 = zext i128 %r55 to i192
+%r58 = getelementptr i64, i64* %r3, i32 2
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i192
+%r61 = shl i192 %r60, 128
+%r62 = or i192 %r56, %r61
+%r63 = zext i192 %r62 to i256
+%r65 = getelementptr i64, i64* %r3, i32 3
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i256
+%r68 = shl i256 %r67, 192
+%r69 = or i256 %r63, %r68
+%r70 = zext i256 %r69 to i320
+%r72 = getelementptr i64, i64* %r3, i32 4
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i320
+%r75 = shl i320 %r74, 256
+%r76 = or i320 %r70, %r75
+%r77 = zext i320 %r76 to i384
+%r79 = getelementptr i64, i64* %r3, i32 5
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i384
+%r82 = shl i384 %r81, 320
+%r83 = or i384 %r77, %r82
+%r84 = zext i384 %r83 to i448
+%r86 = getelementptr i64, i64* %r3, i32 6
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i448
+%r89 = shl i448 %r88, 384
+%r90 = or i448 %r84, %r89
+%r91 = zext i448 %r47 to i512
+%r92 = zext i448 %r90 to i512
+%r93 = sub i512 %r91, %r92
+%r94 = trunc i512 %r93 to i448
+%r95 = lshr i512 %r93, 448
+%r96 = trunc i512 %r95 to i1
+%r97 = trunc i448 %r94 to i64
+%r99 = getelementptr i64, i64* %r1, i32 0
+store i64 %r97, i64* %r99
+%r100 = lshr i448 %r94, 64
+%r101 = trunc i448 %r100 to i64
+%r103 = getelementptr i64, i64* %r1, i32 1
+store i64 %r101, i64* %r103
+%r104 = lshr i448 %r100, 64
+%r105 = trunc i448 %r104 to i64
+%r107 = getelementptr i64, i64* %r1, i32 2
+store i64 %r105, i64* %r107
+%r108 = lshr i448 %r104, 64
+%r109 = trunc i448 %r108 to i64
+%r111 = getelementptr i64, i64* %r1, i32 3
+store i64 %r109, i64* %r111
+%r112 = lshr i448 %r108, 64
+%r113 = trunc i448 %r112 to i64
+%r115 = getelementptr i64, i64* %r1, i32 4
+store i64 %r113, i64* %r115
+%r116 = lshr i448 %r112, 64
+%r117 = trunc i448 %r116 to i64
+%r119 = getelementptr i64, i64* %r1, i32 5
+store i64 %r117, i64* %r119
+%r120 = lshr i448 %r116, 64
+%r121 = trunc i448 %r120 to i64
+%r123 = getelementptr i64, i64* %r1, i32 6
+store i64 %r121, i64* %r123
+br i1%r96, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r124 = load i64, i64* %r4
+%r125 = zext i64 %r124 to i128
+%r127 = getelementptr i64, i64* %r4, i32 1
+%r128 = load i64, i64* %r127
+%r129 = zext i64 %r128 to i128
+%r130 = shl i128 %r129, 64
+%r131 = or i128 %r125, %r130
+%r132 = zext i128 %r131 to i192
+%r134 = getelementptr i64, i64* %r4, i32 2
+%r135 = load i64, i64* %r134
+%r136 = zext i64 %r135 to i192
+%r137 = shl i192 %r136, 128
+%r138 = or i192 %r132, %r137
+%r139 = zext i192 %r138 to i256
+%r141 = getelementptr i64, i64* %r4, i32 3
+%r142 = load i64, i64* %r141
+%r143 = zext i64 %r142 to i256
+%r144 = shl i256 %r143, 192
+%r145 = or i256 %r139, %r144
+%r146 = zext i256 %r145 to i320
+%r148 = getelementptr i64, i64* %r4, i32 4
+%r149 = load i64, i64* %r148
+%r150 = zext i64 %r149 to i320
+%r151 = shl i320 %r150, 256
+%r152 = or i320 %r146, %r151
+%r153 = zext i320 %r152 to i384
+%r155 = getelementptr i64, i64* %r4, i32 5
+%r156 = load i64, i64* %r155
+%r157 = zext i64 %r156 to i384
+%r158 = shl i384 %r157, 320
+%r159 = or i384 %r153, %r158
+%r160 = zext i384 %r159 to i448
+%r162 = getelementptr i64, i64* %r4, i32 6
+%r163 = load i64, i64* %r162
+%r164 = zext i64 %r163 to i448
+%r165 = shl i448 %r164, 384
+%r166 = or i448 %r160, %r165
+%r167 = add i448 %r94, %r166
+%r168 = trunc i448 %r167 to i64
+%r170 = getelementptr i64, i64* %r1, i32 0
+store i64 %r168, i64* %r170
+%r171 = lshr i448 %r167, 64
+%r172 = trunc i448 %r171 to i64
+%r174 = getelementptr i64, i64* %r1, i32 1
+store i64 %r172, i64* %r174
+%r175 = lshr i448 %r171, 64
+%r176 = trunc i448 %r175 to i64
+%r178 = getelementptr i64, i64* %r1, i32 2
+store i64 %r176, i64* %r178
+%r179 = lshr i448 %r175, 64
+%r180 = trunc i448 %r179 to i64
+%r182 = getelementptr i64, i64* %r1, i32 3
+store i64 %r180, i64* %r182
+%r183 = lshr i448 %r179, 64
+%r184 = trunc i448 %r183 to i64
+%r186 = getelementptr i64, i64* %r1, i32 4
+store i64 %r184, i64* %r186
+%r187 = lshr i448 %r183, 64
+%r188 = trunc i448 %r187 to i64
+%r190 = getelementptr i64, i64* %r1, i32 5
+store i64 %r188, i64* %r190
+%r191 = lshr i448 %r187, 64
+%r192 = trunc i448 %r191 to i64
+%r194 = getelementptr i64, i64* %r1, i32 6
+store i64 %r192, i64* %r194
+ret void
+}
+define void @mcl_fp_subNF7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = load i64, i64* %r3
+%r49 = zext i64 %r48 to i128
+%r51 = getelementptr i64, i64* %r3, i32 1
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i128
+%r54 = shl i128 %r53, 64
+%r55 = or i128 %r49, %r54
+%r56 = zext i128 %r55 to i192
+%r58 = getelementptr i64, i64* %r3, i32 2
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i192
+%r61 = shl i192 %r60, 128
+%r62 = or i192 %r56, %r61
+%r63 = zext i192 %r62 to i256
+%r65 = getelementptr i64, i64* %r3, i32 3
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i256
+%r68 = shl i256 %r67, 192
+%r69 = or i256 %r63, %r68
+%r70 = zext i256 %r69 to i320
+%r72 = getelementptr i64, i64* %r3, i32 4
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i320
+%r75 = shl i320 %r74, 256
+%r76 = or i320 %r70, %r75
+%r77 = zext i320 %r76 to i384
+%r79 = getelementptr i64, i64* %r3, i32 5
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i384
+%r82 = shl i384 %r81, 320
+%r83 = or i384 %r77, %r82
+%r84 = zext i384 %r83 to i448
+%r86 = getelementptr i64, i64* %r3, i32 6
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i448
+%r89 = shl i448 %r88, 384
+%r90 = or i448 %r84, %r89
+%r91 = sub i448 %r47, %r90
+%r92 = lshr i448 %r91, 447
+%r93 = trunc i448 %r92 to i1
+%r94 = load i64, i64* %r4
+%r95 = zext i64 %r94 to i128
+%r97 = getelementptr i64, i64* %r4, i32 1
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i128
+%r100 = shl i128 %r99, 64
+%r101 = or i128 %r95, %r100
+%r102 = zext i128 %r101 to i192
+%r104 = getelementptr i64, i64* %r4, i32 2
+%r105 = load i64, i64* %r104
+%r106 = zext i64 %r105 to i192
+%r107 = shl i192 %r106, 128
+%r108 = or i192 %r102, %r107
+%r109 = zext i192 %r108 to i256
+%r111 = getelementptr i64, i64* %r4, i32 3
+%r112 = load i64, i64* %r111
+%r113 = zext i64 %r112 to i256
+%r114 = shl i256 %r113, 192
+%r115 = or i256 %r109, %r114
+%r116 = zext i256 %r115 to i320
+%r118 = getelementptr i64, i64* %r4, i32 4
+%r119 = load i64, i64* %r118
+%r120 = zext i64 %r119 to i320
+%r121 = shl i320 %r120, 256
+%r122 = or i320 %r116, %r121
+%r123 = zext i320 %r122 to i384
+%r125 = getelementptr i64, i64* %r4, i32 5
+%r126 = load i64, i64* %r125
+%r127 = zext i64 %r126 to i384
+%r128 = shl i384 %r127, 320
+%r129 = or i384 %r123, %r128
+%r130 = zext i384 %r129 to i448
+%r132 = getelementptr i64, i64* %r4, i32 6
+%r133 = load i64, i64* %r132
+%r134 = zext i64 %r133 to i448
+%r135 = shl i448 %r134, 384
+%r136 = or i448 %r130, %r135
+%r138 = select i1 %r93, i448 %r136, i448 0
+%r139 = add i448 %r91, %r138
+%r140 = trunc i448 %r139 to i64
+%r142 = getelementptr i64, i64* %r1, i32 0
+store i64 %r140, i64* %r142
+%r143 = lshr i448 %r139, 64
+%r144 = trunc i448 %r143 to i64
+%r146 = getelementptr i64, i64* %r1, i32 1
+store i64 %r144, i64* %r146
+%r147 = lshr i448 %r143, 64
+%r148 = trunc i448 %r147 to i64
+%r150 = getelementptr i64, i64* %r1, i32 2
+store i64 %r148, i64* %r150
+%r151 = lshr i448 %r147, 64
+%r152 = trunc i448 %r151 to i64
+%r154 = getelementptr i64, i64* %r1, i32 3
+store i64 %r152, i64* %r154
+%r155 = lshr i448 %r151, 64
+%r156 = trunc i448 %r155 to i64
+%r158 = getelementptr i64, i64* %r1, i32 4
+store i64 %r156, i64* %r158
+%r159 = lshr i448 %r155, 64
+%r160 = trunc i448 %r159 to i64
+%r162 = getelementptr i64, i64* %r1, i32 5
+store i64 %r160, i64* %r162
+%r163 = lshr i448 %r159, 64
+%r164 = trunc i448 %r163 to i64
+%r166 = getelementptr i64, i64* %r1, i32 6
+store i64 %r164, i64* %r166
+ret void
+}
+define void @mcl_fpDbl_add7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r64 = getelementptr i64, i64* %r2, i32 9
+%r65 = load i64, i64* %r64
+%r66 = zext i64 %r65 to i640
+%r67 = shl i640 %r66, 576
+%r68 = or i640 %r62, %r67
+%r69 = zext i640 %r68 to i704
+%r71 = getelementptr i64, i64* %r2, i32 10
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i704
+%r74 = shl i704 %r73, 640
+%r75 = or i704 %r69, %r74
+%r76 = zext i704 %r75 to i768
+%r78 = getelementptr i64, i64* %r2, i32 11
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i768
+%r81 = shl i768 %r80, 704
+%r82 = or i768 %r76, %r81
+%r83 = zext i768 %r82 to i832
+%r85 = getelementptr i64, i64* %r2, i32 12
+%r86 = load i64, i64* %r85
+%r87 = zext i64 %r86 to i832
+%r88 = shl i832 %r87, 768
+%r89 = or i832 %r83, %r88
+%r90 = zext i832 %r89 to i896
+%r92 = getelementptr i64, i64* %r2, i32 13
+%r93 = load i64, i64* %r92
+%r94 = zext i64 %r93 to i896
+%r95 = shl i896 %r94, 832
+%r96 = or i896 %r90, %r95
+%r97 = load i64, i64* %r3
+%r98 = zext i64 %r97 to i128
+%r100 = getelementptr i64, i64* %r3, i32 1
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i128
+%r103 = shl i128 %r102, 64
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i192
+%r107 = getelementptr i64, i64* %r3, i32 2
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i192
+%r110 = shl i192 %r109, 128
+%r111 = or i192 %r105, %r110
+%r112 = zext i192 %r111 to i256
+%r114 = getelementptr i64, i64* %r3, i32 3
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i256
+%r117 = shl i256 %r116, 192
+%r118 = or i256 %r112, %r117
+%r119 = zext i256 %r118 to i320
+%r121 = getelementptr i64, i64* %r3, i32 4
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i320
+%r124 = shl i320 %r123, 256
+%r125 = or i320 %r119, %r124
+%r126 = zext i320 %r125 to i384
+%r128 = getelementptr i64, i64* %r3, i32 5
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i384
+%r131 = shl i384 %r130, 320
+%r132 = or i384 %r126, %r131
+%r133 = zext i384 %r132 to i448
+%r135 = getelementptr i64, i64* %r3, i32 6
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i448
+%r138 = shl i448 %r137, 384
+%r139 = or i448 %r133, %r138
+%r140 = zext i448 %r139 to i512
+%r142 = getelementptr i64, i64* %r3, i32 7
+%r143 = load i64, i64* %r142
+%r144 = zext i64 %r143 to i512
+%r145 = shl i512 %r144, 448
+%r146 = or i512 %r140, %r145
+%r147 = zext i512 %r146 to i576
+%r149 = getelementptr i64, i64* %r3, i32 8
+%r150 = load i64, i64* %r149
+%r151 = zext i64 %r150 to i576
+%r152 = shl i576 %r151, 512
+%r153 = or i576 %r147, %r152
+%r154 = zext i576 %r153 to i640
+%r156 = getelementptr i64, i64* %r3, i32 9
+%r157 = load i64, i64* %r156
+%r158 = zext i64 %r157 to i640
+%r159 = shl i640 %r158, 576
+%r160 = or i640 %r154, %r159
+%r161 = zext i640 %r160 to i704
+%r163 = getelementptr i64, i64* %r3, i32 10
+%r164 = load i64, i64* %r163
+%r165 = zext i64 %r164 to i704
+%r166 = shl i704 %r165, 640
+%r167 = or i704 %r161, %r166
+%r168 = zext i704 %r167 to i768
+%r170 = getelementptr i64, i64* %r3, i32 11
+%r171 = load i64, i64* %r170
+%r172 = zext i64 %r171 to i768
+%r173 = shl i768 %r172, 704
+%r174 = or i768 %r168, %r173
+%r175 = zext i768 %r174 to i832
+%r177 = getelementptr i64, i64* %r3, i32 12
+%r178 = load i64, i64* %r177
+%r179 = zext i64 %r178 to i832
+%r180 = shl i832 %r179, 768
+%r181 = or i832 %r175, %r180
+%r182 = zext i832 %r181 to i896
+%r184 = getelementptr i64, i64* %r3, i32 13
+%r185 = load i64, i64* %r184
+%r186 = zext i64 %r185 to i896
+%r187 = shl i896 %r186, 832
+%r188 = or i896 %r182, %r187
+%r189 = zext i896 %r96 to i960
+%r190 = zext i896 %r188 to i960
+%r191 = add i960 %r189, %r190
+%r192 = trunc i960 %r191 to i448
+%r193 = trunc i448 %r192 to i64
+%r195 = getelementptr i64, i64* %r1, i32 0
+store i64 %r193, i64* %r195
+%r196 = lshr i448 %r192, 64
+%r197 = trunc i448 %r196 to i64
+%r199 = getelementptr i64, i64* %r1, i32 1
+store i64 %r197, i64* %r199
+%r200 = lshr i448 %r196, 64
+%r201 = trunc i448 %r200 to i64
+%r203 = getelementptr i64, i64* %r1, i32 2
+store i64 %r201, i64* %r203
+%r204 = lshr i448 %r200, 64
+%r205 = trunc i448 %r204 to i64
+%r207 = getelementptr i64, i64* %r1, i32 3
+store i64 %r205, i64* %r207
+%r208 = lshr i448 %r204, 64
+%r209 = trunc i448 %r208 to i64
+%r211 = getelementptr i64, i64* %r1, i32 4
+store i64 %r209, i64* %r211
+%r212 = lshr i448 %r208, 64
+%r213 = trunc i448 %r212 to i64
+%r215 = getelementptr i64, i64* %r1, i32 5
+store i64 %r213, i64* %r215
+%r216 = lshr i448 %r212, 64
+%r217 = trunc i448 %r216 to i64
+%r219 = getelementptr i64, i64* %r1, i32 6
+store i64 %r217, i64* %r219
+%r220 = lshr i960 %r191, 448
+%r221 = trunc i960 %r220 to i512
+%r222 = load i64, i64* %r4
+%r223 = zext i64 %r222 to i128
+%r225 = getelementptr i64, i64* %r4, i32 1
+%r226 = load i64, i64* %r225
+%r227 = zext i64 %r226 to i128
+%r228 = shl i128 %r227, 64
+%r229 = or i128 %r223, %r228
+%r230 = zext i128 %r229 to i192
+%r232 = getelementptr i64, i64* %r4, i32 2
+%r233 = load i64, i64* %r232
+%r234 = zext i64 %r233 to i192
+%r235 = shl i192 %r234, 128
+%r236 = or i192 %r230, %r235
+%r237 = zext i192 %r236 to i256
+%r239 = getelementptr i64, i64* %r4, i32 3
+%r240 = load i64, i64* %r239
+%r241 = zext i64 %r240 to i256
+%r242 = shl i256 %r241, 192
+%r243 = or i256 %r237, %r242
+%r244 = zext i256 %r243 to i320
+%r246 = getelementptr i64, i64* %r4, i32 4
+%r247 = load i64, i64* %r246
+%r248 = zext i64 %r247 to i320
+%r249 = shl i320 %r248, 256
+%r250 = or i320 %r244, %r249
+%r251 = zext i320 %r250 to i384
+%r253 = getelementptr i64, i64* %r4, i32 5
+%r254 = load i64, i64* %r253
+%r255 = zext i64 %r254 to i384
+%r256 = shl i384 %r255, 320
+%r257 = or i384 %r251, %r256
+%r258 = zext i384 %r257 to i448
+%r260 = getelementptr i64, i64* %r4, i32 6
+%r261 = load i64, i64* %r260
+%r262 = zext i64 %r261 to i448
+%r263 = shl i448 %r262, 384
+%r264 = or i448 %r258, %r263
+%r265 = zext i448 %r264 to i512
+%r266 = sub i512 %r221, %r265
+%r267 = lshr i512 %r266, 448
+%r268 = trunc i512 %r267 to i1
+%r269 = select i1 %r268, i512 %r221, i512 %r266
+%r270 = trunc i512 %r269 to i448
+%r272 = getelementptr i64, i64* %r1, i32 7
+%r273 = trunc i448 %r270 to i64
+%r275 = getelementptr i64, i64* %r272, i32 0
+store i64 %r273, i64* %r275
+%r276 = lshr i448 %r270, 64
+%r277 = trunc i448 %r276 to i64
+%r279 = getelementptr i64, i64* %r272, i32 1
+store i64 %r277, i64* %r279
+%r280 = lshr i448 %r276, 64
+%r281 = trunc i448 %r280 to i64
+%r283 = getelementptr i64, i64* %r272, i32 2
+store i64 %r281, i64* %r283
+%r284 = lshr i448 %r280, 64
+%r285 = trunc i448 %r284 to i64
+%r287 = getelementptr i64, i64* %r272, i32 3
+store i64 %r285, i64* %r287
+%r288 = lshr i448 %r284, 64
+%r289 = trunc i448 %r288 to i64
+%r291 = getelementptr i64, i64* %r272, i32 4
+store i64 %r289, i64* %r291
+%r292 = lshr i448 %r288, 64
+%r293 = trunc i448 %r292 to i64
+%r295 = getelementptr i64, i64* %r272, i32 5
+store i64 %r293, i64* %r295
+%r296 = lshr i448 %r292, 64
+%r297 = trunc i448 %r296 to i64
+%r299 = getelementptr i64, i64* %r272, i32 6
+store i64 %r297, i64* %r299
+ret void
+}
+define void @mcl_fpDbl_sub7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r64 = getelementptr i64, i64* %r2, i32 9
+%r65 = load i64, i64* %r64
+%r66 = zext i64 %r65 to i640
+%r67 = shl i640 %r66, 576
+%r68 = or i640 %r62, %r67
+%r69 = zext i640 %r68 to i704
+%r71 = getelementptr i64, i64* %r2, i32 10
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i704
+%r74 = shl i704 %r73, 640
+%r75 = or i704 %r69, %r74
+%r76 = zext i704 %r75 to i768
+%r78 = getelementptr i64, i64* %r2, i32 11
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i768
+%r81 = shl i768 %r80, 704
+%r82 = or i768 %r76, %r81
+%r83 = zext i768 %r82 to i832
+%r85 = getelementptr i64, i64* %r2, i32 12
+%r86 = load i64, i64* %r85
+%r87 = zext i64 %r86 to i832
+%r88 = shl i832 %r87, 768
+%r89 = or i832 %r83, %r88
+%r90 = zext i832 %r89 to i896
+%r92 = getelementptr i64, i64* %r2, i32 13
+%r93 = load i64, i64* %r92
+%r94 = zext i64 %r93 to i896
+%r95 = shl i896 %r94, 832
+%r96 = or i896 %r90, %r95
+%r97 = load i64, i64* %r3
+%r98 = zext i64 %r97 to i128
+%r100 = getelementptr i64, i64* %r3, i32 1
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i128
+%r103 = shl i128 %r102, 64
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i192
+%r107 = getelementptr i64, i64* %r3, i32 2
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i192
+%r110 = shl i192 %r109, 128
+%r111 = or i192 %r105, %r110
+%r112 = zext i192 %r111 to i256
+%r114 = getelementptr i64, i64* %r3, i32 3
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i256
+%r117 = shl i256 %r116, 192
+%r118 = or i256 %r112, %r117
+%r119 = zext i256 %r118 to i320
+%r121 = getelementptr i64, i64* %r3, i32 4
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i320
+%r124 = shl i320 %r123, 256
+%r125 = or i320 %r119, %r124
+%r126 = zext i320 %r125 to i384
+%r128 = getelementptr i64, i64* %r3, i32 5
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i384
+%r131 = shl i384 %r130, 320
+%r132 = or i384 %r126, %r131
+%r133 = zext i384 %r132 to i448
+%r135 = getelementptr i64, i64* %r3, i32 6
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i448
+%r138 = shl i448 %r137, 384
+%r139 = or i448 %r133, %r138
+%r140 = zext i448 %r139 to i512
+%r142 = getelementptr i64, i64* %r3, i32 7
+%r143 = load i64, i64* %r142
+%r144 = zext i64 %r143 to i512
+%r145 = shl i512 %r144, 448
+%r146 = or i512 %r140, %r145
+%r147 = zext i512 %r146 to i576
+%r149 = getelementptr i64, i64* %r3, i32 8
+%r150 = load i64, i64* %r149
+%r151 = zext i64 %r150 to i576
+%r152 = shl i576 %r151, 512
+%r153 = or i576 %r147, %r152
+%r154 = zext i576 %r153 to i640
+%r156 = getelementptr i64, i64* %r3, i32 9
+%r157 = load i64, i64* %r156
+%r158 = zext i64 %r157 to i640
+%r159 = shl i640 %r158, 576
+%r160 = or i640 %r154, %r159
+%r161 = zext i640 %r160 to i704
+%r163 = getelementptr i64, i64* %r3, i32 10
+%r164 = load i64, i64* %r163
+%r165 = zext i64 %r164 to i704
+%r166 = shl i704 %r165, 640
+%r167 = or i704 %r161, %r166
+%r168 = zext i704 %r167 to i768
+%r170 = getelementptr i64, i64* %r3, i32 11
+%r171 = load i64, i64* %r170
+%r172 = zext i64 %r171 to i768
+%r173 = shl i768 %r172, 704
+%r174 = or i768 %r168, %r173
+%r175 = zext i768 %r174 to i832
+%r177 = getelementptr i64, i64* %r3, i32 12
+%r178 = load i64, i64* %r177
+%r179 = zext i64 %r178 to i832
+%r180 = shl i832 %r179, 768
+%r181 = or i832 %r175, %r180
+%r182 = zext i832 %r181 to i896
+%r184 = getelementptr i64, i64* %r3, i32 13
+%r185 = load i64, i64* %r184
+%r186 = zext i64 %r185 to i896
+%r187 = shl i896 %r186, 832
+%r188 = or i896 %r182, %r187
+%r189 = zext i896 %r96 to i960
+%r190 = zext i896 %r188 to i960
+%r191 = sub i960 %r189, %r190
+%r192 = trunc i960 %r191 to i448
+%r193 = trunc i448 %r192 to i64
+%r195 = getelementptr i64, i64* %r1, i32 0
+store i64 %r193, i64* %r195
+%r196 = lshr i448 %r192, 64
+%r197 = trunc i448 %r196 to i64
+%r199 = getelementptr i64, i64* %r1, i32 1
+store i64 %r197, i64* %r199
+%r200 = lshr i448 %r196, 64
+%r201 = trunc i448 %r200 to i64
+%r203 = getelementptr i64, i64* %r1, i32 2
+store i64 %r201, i64* %r203
+%r204 = lshr i448 %r200, 64
+%r205 = trunc i448 %r204 to i64
+%r207 = getelementptr i64, i64* %r1, i32 3
+store i64 %r205, i64* %r207
+%r208 = lshr i448 %r204, 64
+%r209 = trunc i448 %r208 to i64
+%r211 = getelementptr i64, i64* %r1, i32 4
+store i64 %r209, i64* %r211
+%r212 = lshr i448 %r208, 64
+%r213 = trunc i448 %r212 to i64
+%r215 = getelementptr i64, i64* %r1, i32 5
+store i64 %r213, i64* %r215
+%r216 = lshr i448 %r212, 64
+%r217 = trunc i448 %r216 to i64
+%r219 = getelementptr i64, i64* %r1, i32 6
+store i64 %r217, i64* %r219
+%r220 = lshr i960 %r191, 448
+%r221 = trunc i960 %r220 to i448
+%r222 = lshr i960 %r191, 896
+%r223 = trunc i960 %r222 to i1
+%r224 = load i64, i64* %r4
+%r225 = zext i64 %r224 to i128
+%r227 = getelementptr i64, i64* %r4, i32 1
+%r228 = load i64, i64* %r227
+%r229 = zext i64 %r228 to i128
+%r230 = shl i128 %r229, 64
+%r231 = or i128 %r225, %r230
+%r232 = zext i128 %r231 to i192
+%r234 = getelementptr i64, i64* %r4, i32 2
+%r235 = load i64, i64* %r234
+%r236 = zext i64 %r235 to i192
+%r237 = shl i192 %r236, 128
+%r238 = or i192 %r232, %r237
+%r239 = zext i192 %r238 to i256
+%r241 = getelementptr i64, i64* %r4, i32 3
+%r242 = load i64, i64* %r241
+%r243 = zext i64 %r242 to i256
+%r244 = shl i256 %r243, 192
+%r245 = or i256 %r239, %r244
+%r246 = zext i256 %r245 to i320
+%r248 = getelementptr i64, i64* %r4, i32 4
+%r249 = load i64, i64* %r248
+%r250 = zext i64 %r249 to i320
+%r251 = shl i320 %r250, 256
+%r252 = or i320 %r246, %r251
+%r253 = zext i320 %r252 to i384
+%r255 = getelementptr i64, i64* %r4, i32 5
+%r256 = load i64, i64* %r255
+%r257 = zext i64 %r256 to i384
+%r258 = shl i384 %r257, 320
+%r259 = or i384 %r253, %r258
+%r260 = zext i384 %r259 to i448
+%r262 = getelementptr i64, i64* %r4, i32 6
+%r263 = load i64, i64* %r262
+%r264 = zext i64 %r263 to i448
+%r265 = shl i448 %r264, 384
+%r266 = or i448 %r260, %r265
+%r268 = select i1 %r223, i448 %r266, i448 0
+%r269 = add i448 %r221, %r268
+%r271 = getelementptr i64, i64* %r1, i32 7
+%r272 = trunc i448 %r269 to i64
+%r274 = getelementptr i64, i64* %r271, i32 0
+store i64 %r272, i64* %r274
+%r275 = lshr i448 %r269, 64
+%r276 = trunc i448 %r275 to i64
+%r278 = getelementptr i64, i64* %r271, i32 1
+store i64 %r276, i64* %r278
+%r279 = lshr i448 %r275, 64
+%r280 = trunc i448 %r279 to i64
+%r282 = getelementptr i64, i64* %r271, i32 2
+store i64 %r280, i64* %r282
+%r283 = lshr i448 %r279, 64
+%r284 = trunc i448 %r283 to i64
+%r286 = getelementptr i64, i64* %r271, i32 3
+store i64 %r284, i64* %r286
+%r287 = lshr i448 %r283, 64
+%r288 = trunc i448 %r287 to i64
+%r290 = getelementptr i64, i64* %r271, i32 4
+store i64 %r288, i64* %r290
+%r291 = lshr i448 %r287, 64
+%r292 = trunc i448 %r291 to i64
+%r294 = getelementptr i64, i64* %r271, i32 5
+store i64 %r292, i64* %r294
+%r295 = lshr i448 %r291, 64
+%r296 = trunc i448 %r295 to i64
+%r298 = getelementptr i64, i64* %r271, i32 6
+store i64 %r296, i64* %r298
+ret void
+}
+define i576 @mulPv512x64(i64* noalias  %r2, i64 %r3)
+{
+%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
+%r6 = trunc i128 %r5 to i64
+%r7 = call i64 @extractHigh64(i128 %r5)
+%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
+%r10 = trunc i128 %r9 to i64
+%r11 = call i64 @extractHigh64(i128 %r9)
+%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
+%r14 = trunc i128 %r13 to i64
+%r15 = call i64 @extractHigh64(i128 %r13)
+%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
+%r18 = trunc i128 %r17 to i64
+%r19 = call i64 @extractHigh64(i128 %r17)
+%r21 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 4)
+%r22 = trunc i128 %r21 to i64
+%r23 = call i64 @extractHigh64(i128 %r21)
+%r25 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 5)
+%r26 = trunc i128 %r25 to i64
+%r27 = call i64 @extractHigh64(i128 %r25)
+%r29 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 6)
+%r30 = trunc i128 %r29 to i64
+%r31 = call i64 @extractHigh64(i128 %r29)
+%r33 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 7)
+%r34 = trunc i128 %r33 to i64
+%r35 = call i64 @extractHigh64(i128 %r33)
+%r36 = zext i64 %r6 to i128
+%r37 = zext i64 %r10 to i128
+%r38 = shl i128 %r37, 64
+%r39 = or i128 %r36, %r38
+%r40 = zext i128 %r39 to i192
+%r41 = zext i64 %r14 to i192
+%r42 = shl i192 %r41, 128
+%r43 = or i192 %r40, %r42
+%r44 = zext i192 %r43 to i256
+%r45 = zext i64 %r18 to i256
+%r46 = shl i256 %r45, 192
+%r47 = or i256 %r44, %r46
+%r48 = zext i256 %r47 to i320
+%r49 = zext i64 %r22 to i320
+%r50 = shl i320 %r49, 256
+%r51 = or i320 %r48, %r50
+%r52 = zext i320 %r51 to i384
+%r53 = zext i64 %r26 to i384
+%r54 = shl i384 %r53, 320
+%r55 = or i384 %r52, %r54
+%r56 = zext i384 %r55 to i448
+%r57 = zext i64 %r30 to i448
+%r58 = shl i448 %r57, 384
+%r59 = or i448 %r56, %r58
+%r60 = zext i448 %r59 to i512
+%r61 = zext i64 %r34 to i512
+%r62 = shl i512 %r61, 448
+%r63 = or i512 %r60, %r62
+%r64 = zext i64 %r7 to i128
+%r65 = zext i64 %r11 to i128
+%r66 = shl i128 %r65, 64
+%r67 = or i128 %r64, %r66
+%r68 = zext i128 %r67 to i192
+%r69 = zext i64 %r15 to i192
+%r70 = shl i192 %r69, 128
+%r71 = or i192 %r68, %r70
+%r72 = zext i192 %r71 to i256
+%r73 = zext i64 %r19 to i256
+%r74 = shl i256 %r73, 192
+%r75 = or i256 %r72, %r74
+%r76 = zext i256 %r75 to i320
+%r77 = zext i64 %r23 to i320
+%r78 = shl i320 %r77, 256
+%r79 = or i320 %r76, %r78
+%r80 = zext i320 %r79 to i384
+%r81 = zext i64 %r27 to i384
+%r82 = shl i384 %r81, 320
+%r83 = or i384 %r80, %r82
+%r84 = zext i384 %r83 to i448
+%r85 = zext i64 %r31 to i448
+%r86 = shl i448 %r85, 384
+%r87 = or i448 %r84, %r86
+%r88 = zext i448 %r87 to i512
+%r89 = zext i64 %r35 to i512
+%r90 = shl i512 %r89, 448
+%r91 = or i512 %r88, %r90
+%r92 = zext i512 %r63 to i576
+%r93 = zext i512 %r91 to i576
+%r94 = shl i576 %r93, 64
+%r95 = add i576 %r92, %r94
+ret i576 %r95
+}
+define void @mcl_fp_mulUnitPre8L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+{
+%r4 = call i576 @mulPv512x64(i64* %r2, i64 %r3)
+%r5 = trunc i576 %r4 to i64
+%r7 = getelementptr i64, i64* %r1, i32 0
+store i64 %r5, i64* %r7
+%r8 = lshr i576 %r4, 64
+%r9 = trunc i576 %r8 to i64
+%r11 = getelementptr i64, i64* %r1, i32 1
+store i64 %r9, i64* %r11
+%r12 = lshr i576 %r8, 64
+%r13 = trunc i576 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 2
+store i64 %r13, i64* %r15
+%r16 = lshr i576 %r12, 64
+%r17 = trunc i576 %r16 to i64
+%r19 = getelementptr i64, i64* %r1, i32 3
+store i64 %r17, i64* %r19
+%r20 = lshr i576 %r16, 64
+%r21 = trunc i576 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 4
+store i64 %r21, i64* %r23
+%r24 = lshr i576 %r20, 64
+%r25 = trunc i576 %r24 to i64
+%r27 = getelementptr i64, i64* %r1, i32 5
+store i64 %r25, i64* %r27
+%r28 = lshr i576 %r24, 64
+%r29 = trunc i576 %r28 to i64
+%r31 = getelementptr i64, i64* %r1, i32 6
+store i64 %r29, i64* %r31
+%r32 = lshr i576 %r28, 64
+%r33 = trunc i576 %r32 to i64
+%r35 = getelementptr i64, i64* %r1, i32 7
+store i64 %r33, i64* %r35
+%r36 = lshr i576 %r32, 64
+%r37 = trunc i576 %r36 to i64
+%r39 = getelementptr i64, i64* %r1, i32 8
+store i64 %r37, i64* %r39
+ret void
+}
+define void @mcl_fpDbl_mulPre8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r2, i32 4
+%r7 = getelementptr i64, i64* %r3, i32 4
+%r9 = getelementptr i64, i64* %r1, i32 8
+call void @mcl_fpDbl_mulPre4L(i64* %r1, i64* %r2, i64* %r3)
+call void @mcl_fpDbl_mulPre4L(i64* %r9, i64* %r5, i64* %r7)
+%r10 = load i64, i64* %r5
+%r11 = zext i64 %r10 to i128
+%r13 = getelementptr i64, i64* %r5, i32 1
+%r14 = load i64, i64* %r13
+%r15 = zext i64 %r14 to i128
+%r16 = shl i128 %r15, 64
+%r17 = or i128 %r11, %r16
+%r18 = zext i128 %r17 to i192
+%r20 = getelementptr i64, i64* %r5, i32 2
+%r21 = load i64, i64* %r20
+%r22 = zext i64 %r21 to i192
+%r23 = shl i192 %r22, 128
+%r24 = or i192 %r18, %r23
+%r25 = zext i192 %r24 to i256
+%r27 = getelementptr i64, i64* %r5, i32 3
+%r28 = load i64, i64* %r27
+%r29 = zext i64 %r28 to i256
+%r30 = shl i256 %r29, 192
+%r31 = or i256 %r25, %r30
+%r32 = zext i256 %r31 to i320
+%r33 = load i64, i64* %r2
+%r34 = zext i64 %r33 to i128
+%r36 = getelementptr i64, i64* %r2, i32 1
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i128
+%r39 = shl i128 %r38, 64
+%r40 = or i128 %r34, %r39
+%r41 = zext i128 %r40 to i192
+%r43 = getelementptr i64, i64* %r2, i32 2
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i192
+%r46 = shl i192 %r45, 128
+%r47 = or i192 %r41, %r46
+%r48 = zext i192 %r47 to i256
+%r50 = getelementptr i64, i64* %r2, i32 3
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i256
+%r53 = shl i256 %r52, 192
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i320
+%r56 = load i64, i64* %r7
+%r57 = zext i64 %r56 to i128
+%r59 = getelementptr i64, i64* %r7, i32 1
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i128
+%r62 = shl i128 %r61, 64
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i192
+%r66 = getelementptr i64, i64* %r7, i32 2
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i192
+%r69 = shl i192 %r68, 128
+%r70 = or i192 %r64, %r69
+%r71 = zext i192 %r70 to i256
+%r73 = getelementptr i64, i64* %r7, i32 3
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i256
+%r76 = shl i256 %r75, 192
+%r77 = or i256 %r71, %r76
+%r78 = zext i256 %r77 to i320
+%r79 = load i64, i64* %r3
+%r80 = zext i64 %r79 to i128
+%r82 = getelementptr i64, i64* %r3, i32 1
+%r83 = load i64, i64* %r82
+%r84 = zext i64 %r83 to i128
+%r85 = shl i128 %r84, 64
+%r86 = or i128 %r80, %r85
+%r87 = zext i128 %r86 to i192
+%r89 = getelementptr i64, i64* %r3, i32 2
+%r90 = load i64, i64* %r89
+%r91 = zext i64 %r90 to i192
+%r92 = shl i192 %r91, 128
+%r93 = or i192 %r87, %r92
+%r94 = zext i192 %r93 to i256
+%r96 = getelementptr i64, i64* %r3, i32 3
+%r97 = load i64, i64* %r96
+%r98 = zext i64 %r97 to i256
+%r99 = shl i256 %r98, 192
+%r100 = or i256 %r94, %r99
+%r101 = zext i256 %r100 to i320
+%r102 = add i320 %r32, %r55
+%r103 = add i320 %r78, %r101
+%r105 = alloca i64, i32 8
+%r106 = trunc i320 %r102 to i256
+%r107 = trunc i320 %r103 to i256
+%r108 = lshr i320 %r102, 256
+%r109 = trunc i320 %r108 to i1
+%r110 = lshr i320 %r103, 256
+%r111 = trunc i320 %r110 to i1
+%r112 = and i1 %r109, %r111
+%r114 = select i1 %r109, i256 %r107, i256 0
+%r116 = select i1 %r111, i256 %r106, i256 0
+%r118 = alloca i64, i32 4
+%r120 = alloca i64, i32 4
+%r121 = trunc i256 %r106 to i64
+%r123 = getelementptr i64, i64* %r118, i32 0
+store i64 %r121, i64* %r123
+%r124 = lshr i256 %r106, 64
+%r125 = trunc i256 %r124 to i64
+%r127 = getelementptr i64, i64* %r118, i32 1
+store i64 %r125, i64* %r127
+%r128 = lshr i256 %r124, 64
+%r129 = trunc i256 %r128 to i64
+%r131 = getelementptr i64, i64* %r118, i32 2
+store i64 %r129, i64* %r131
+%r132 = lshr i256 %r128, 64
+%r133 = trunc i256 %r132 to i64
+%r135 = getelementptr i64, i64* %r118, i32 3
+store i64 %r133, i64* %r135
+%r136 = trunc i256 %r107 to i64
+%r138 = getelementptr i64, i64* %r120, i32 0
+store i64 %r136, i64* %r138
+%r139 = lshr i256 %r107, 64
+%r140 = trunc i256 %r139 to i64
+%r142 = getelementptr i64, i64* %r120, i32 1
+store i64 %r140, i64* %r142
+%r143 = lshr i256 %r139, 64
+%r144 = trunc i256 %r143 to i64
+%r146 = getelementptr i64, i64* %r120, i32 2
+store i64 %r144, i64* %r146
+%r147 = lshr i256 %r143, 64
+%r148 = trunc i256 %r147 to i64
+%r150 = getelementptr i64, i64* %r120, i32 3
+store i64 %r148, i64* %r150
+call void @mcl_fpDbl_mulPre4L(i64* %r105, i64* %r118, i64* %r120)
+%r151 = load i64, i64* %r105
+%r152 = zext i64 %r151 to i128
+%r154 = getelementptr i64, i64* %r105, i32 1
+%r155 = load i64, i64* %r154
+%r156 = zext i64 %r155 to i128
+%r157 = shl i128 %r156, 64
+%r158 = or i128 %r152, %r157
+%r159 = zext i128 %r158 to i192
+%r161 = getelementptr i64, i64* %r105, i32 2
+%r162 = load i64, i64* %r161
+%r163 = zext i64 %r162 to i192
+%r164 = shl i192 %r163, 128
+%r165 = or i192 %r159, %r164
+%r166 = zext i192 %r165 to i256
+%r168 = getelementptr i64, i64* %r105, i32 3
+%r169 = load i64, i64* %r168
+%r170 = zext i64 %r169 to i256
+%r171 = shl i256 %r170, 192
+%r172 = or i256 %r166, %r171
+%r173 = zext i256 %r172 to i320
+%r175 = getelementptr i64, i64* %r105, i32 4
+%r176 = load i64, i64* %r175
+%r177 = zext i64 %r176 to i320
+%r178 = shl i320 %r177, 256
+%r179 = or i320 %r173, %r178
+%r180 = zext i320 %r179 to i384
+%r182 = getelementptr i64, i64* %r105, i32 5
+%r183 = load i64, i64* %r182
+%r184 = zext i64 %r183 to i384
+%r185 = shl i384 %r184, 320
+%r186 = or i384 %r180, %r185
+%r187 = zext i384 %r186 to i448
+%r189 = getelementptr i64, i64* %r105, i32 6
+%r190 = load i64, i64* %r189
+%r191 = zext i64 %r190 to i448
+%r192 = shl i448 %r191, 384
+%r193 = or i448 %r187, %r192
+%r194 = zext i448 %r193 to i512
+%r196 = getelementptr i64, i64* %r105, i32 7
+%r197 = load i64, i64* %r196
+%r198 = zext i64 %r197 to i512
+%r199 = shl i512 %r198, 448
+%r200 = or i512 %r194, %r199
+%r201 = zext i512 %r200 to i576
+%r202 = zext i1 %r112 to i576
+%r203 = shl i576 %r202, 512
+%r204 = or i576 %r201, %r203
+%r205 = zext i256 %r114 to i576
+%r206 = zext i256 %r116 to i576
+%r207 = shl i576 %r205, 256
+%r208 = shl i576 %r206, 256
+%r209 = add i576 %r204, %r207
+%r210 = add i576 %r209, %r208
+%r211 = load i64, i64* %r1
+%r212 = zext i64 %r211 to i128
+%r214 = getelementptr i64, i64* %r1, i32 1
+%r215 = load i64, i64* %r214
+%r216 = zext i64 %r215 to i128
+%r217 = shl i128 %r216, 64
+%r218 = or i128 %r212, %r217
+%r219 = zext i128 %r218 to i192
+%r221 = getelementptr i64, i64* %r1, i32 2
+%r222 = load i64, i64* %r221
+%r223 = zext i64 %r222 to i192
+%r224 = shl i192 %r223, 128
+%r225 = or i192 %r219, %r224
+%r226 = zext i192 %r225 to i256
+%r228 = getelementptr i64, i64* %r1, i32 3
+%r229 = load i64, i64* %r228
+%r230 = zext i64 %r229 to i256
+%r231 = shl i256 %r230, 192
+%r232 = or i256 %r226, %r231
+%r233 = zext i256 %r232 to i320
+%r235 = getelementptr i64, i64* %r1, i32 4
+%r236 = load i64, i64* %r235
+%r237 = zext i64 %r236 to i320
+%r238 = shl i320 %r237, 256
+%r239 = or i320 %r233, %r238
+%r240 = zext i320 %r239 to i384
+%r242 = getelementptr i64, i64* %r1, i32 5
+%r243 = load i64, i64* %r242
+%r244 = zext i64 %r243 to i384
+%r245 = shl i384 %r244, 320
+%r246 = or i384 %r240, %r245
+%r247 = zext i384 %r246 to i448
+%r249 = getelementptr i64, i64* %r1, i32 6
+%r250 = load i64, i64* %r249
+%r251 = zext i64 %r250 to i448
+%r252 = shl i448 %r251, 384
+%r253 = or i448 %r247, %r252
+%r254 = zext i448 %r253 to i512
+%r256 = getelementptr i64, i64* %r1, i32 7
+%r257 = load i64, i64* %r256
+%r258 = zext i64 %r257 to i512
+%r259 = shl i512 %r258, 448
+%r260 = or i512 %r254, %r259
+%r261 = zext i512 %r260 to i576
+%r262 = sub i576 %r210, %r261
+%r264 = getelementptr i64, i64* %r1, i32 8
+%r265 = load i64, i64* %r264
+%r266 = zext i64 %r265 to i128
+%r268 = getelementptr i64, i64* %r264, i32 1
+%r269 = load i64, i64* %r268
+%r270 = zext i64 %r269 to i128
+%r271 = shl i128 %r270, 64
+%r272 = or i128 %r266, %r271
+%r273 = zext i128 %r272 to i192
+%r275 = getelementptr i64, i64* %r264, i32 2
+%r276 = load i64, i64* %r275
+%r277 = zext i64 %r276 to i192
+%r278 = shl i192 %r277, 128
+%r279 = or i192 %r273, %r278
+%r280 = zext i192 %r279 to i256
+%r282 = getelementptr i64, i64* %r264, i32 3
+%r283 = load i64, i64* %r282
+%r284 = zext i64 %r283 to i256
+%r285 = shl i256 %r284, 192
+%r286 = or i256 %r280, %r285
+%r287 = zext i256 %r286 to i320
+%r289 = getelementptr i64, i64* %r264, i32 4
+%r290 = load i64, i64* %r289
+%r291 = zext i64 %r290 to i320
+%r292 = shl i320 %r291, 256
+%r293 = or i320 %r287, %r292
+%r294 = zext i320 %r293 to i384
+%r296 = getelementptr i64, i64* %r264, i32 5
+%r297 = load i64, i64* %r296
+%r298 = zext i64 %r297 to i384
+%r299 = shl i384 %r298, 320
+%r300 = or i384 %r294, %r299
+%r301 = zext i384 %r300 to i448
+%r303 = getelementptr i64, i64* %r264, i32 6
+%r304 = load i64, i64* %r303
+%r305 = zext i64 %r304 to i448
+%r306 = shl i448 %r305, 384
+%r307 = or i448 %r301, %r306
+%r308 = zext i448 %r307 to i512
+%r310 = getelementptr i64, i64* %r264, i32 7
+%r311 = load i64, i64* %r310
+%r312 = zext i64 %r311 to i512
+%r313 = shl i512 %r312, 448
+%r314 = or i512 %r308, %r313
+%r315 = zext i512 %r314 to i576
+%r316 = sub i576 %r262, %r315
+%r317 = zext i576 %r316 to i768
+%r319 = getelementptr i64, i64* %r1, i32 4
+%r320 = load i64, i64* %r319
+%r321 = zext i64 %r320 to i128
+%r323 = getelementptr i64, i64* %r319, i32 1
+%r324 = load i64, i64* %r323
+%r325 = zext i64 %r324 to i128
+%r326 = shl i128 %r325, 64
+%r327 = or i128 %r321, %r326
+%r328 = zext i128 %r327 to i192
+%r330 = getelementptr i64, i64* %r319, i32 2
+%r331 = load i64, i64* %r330
+%r332 = zext i64 %r331 to i192
+%r333 = shl i192 %r332, 128
+%r334 = or i192 %r328, %r333
+%r335 = zext i192 %r334 to i256
+%r337 = getelementptr i64, i64* %r319, i32 3
+%r338 = load i64, i64* %r337
+%r339 = zext i64 %r338 to i256
+%r340 = shl i256 %r339, 192
+%r341 = or i256 %r335, %r340
+%r342 = zext i256 %r341 to i320
+%r344 = getelementptr i64, i64* %r319, i32 4
+%r345 = load i64, i64* %r344
+%r346 = zext i64 %r345 to i320
+%r347 = shl i320 %r346, 256
+%r348 = or i320 %r342, %r347
+%r349 = zext i320 %r348 to i384
+%r351 = getelementptr i64, i64* %r319, i32 5
+%r352 = load i64, i64* %r351
+%r353 = zext i64 %r352 to i384
+%r354 = shl i384 %r353, 320
+%r355 = or i384 %r349, %r354
+%r356 = zext i384 %r355 to i448
+%r358 = getelementptr i64, i64* %r319, i32 6
+%r359 = load i64, i64* %r358
+%r360 = zext i64 %r359 to i448
+%r361 = shl i448 %r360, 384
+%r362 = or i448 %r356, %r361
+%r363 = zext i448 %r362 to i512
+%r365 = getelementptr i64, i64* %r319, i32 7
+%r366 = load i64, i64* %r365
+%r367 = zext i64 %r366 to i512
+%r368 = shl i512 %r367, 448
+%r369 = or i512 %r363, %r368
+%r370 = zext i512 %r369 to i576
+%r372 = getelementptr i64, i64* %r319, i32 8
+%r373 = load i64, i64* %r372
+%r374 = zext i64 %r373 to i576
+%r375 = shl i576 %r374, 512
+%r376 = or i576 %r370, %r375
+%r377 = zext i576 %r376 to i640
+%r379 = getelementptr i64, i64* %r319, i32 9
+%r380 = load i64, i64* %r379
+%r381 = zext i64 %r380 to i640
+%r382 = shl i640 %r381, 576
+%r383 = or i640 %r377, %r382
+%r384 = zext i640 %r383 to i704
+%r386 = getelementptr i64, i64* %r319, i32 10
+%r387 = load i64, i64* %r386
+%r388 = zext i64 %r387 to i704
+%r389 = shl i704 %r388, 640
+%r390 = or i704 %r384, %r389
+%r391 = zext i704 %r390 to i768
+%r393 = getelementptr i64, i64* %r319, i32 11
+%r394 = load i64, i64* %r393
+%r395 = zext i64 %r394 to i768
+%r396 = shl i768 %r395, 704
+%r397 = or i768 %r391, %r396
+%r398 = add i768 %r317, %r397
+%r400 = getelementptr i64, i64* %r1, i32 4
+%r401 = trunc i768 %r398 to i64
+%r403 = getelementptr i64, i64* %r400, i32 0
+store i64 %r401, i64* %r403
+%r404 = lshr i768 %r398, 64
+%r405 = trunc i768 %r404 to i64
+%r407 = getelementptr i64, i64* %r400, i32 1
+store i64 %r405, i64* %r407
+%r408 = lshr i768 %r404, 64
+%r409 = trunc i768 %r408 to i64
+%r411 = getelementptr i64, i64* %r400, i32 2
+store i64 %r409, i64* %r411
+%r412 = lshr i768 %r408, 64
+%r413 = trunc i768 %r412 to i64
+%r415 = getelementptr i64, i64* %r400, i32 3
+store i64 %r413, i64* %r415
+%r416 = lshr i768 %r412, 64
+%r417 = trunc i768 %r416 to i64
+%r419 = getelementptr i64, i64* %r400, i32 4
+store i64 %r417, i64* %r419
+%r420 = lshr i768 %r416, 64
+%r421 = trunc i768 %r420 to i64
+%r423 = getelementptr i64, i64* %r400, i32 5
+store i64 %r421, i64* %r423
+%r424 = lshr i768 %r420, 64
+%r425 = trunc i768 %r424 to i64
+%r427 = getelementptr i64, i64* %r400, i32 6
+store i64 %r425, i64* %r427
+%r428 = lshr i768 %r424, 64
+%r429 = trunc i768 %r428 to i64
+%r431 = getelementptr i64, i64* %r400, i32 7
+store i64 %r429, i64* %r431
+%r432 = lshr i768 %r428, 64
+%r433 = trunc i768 %r432 to i64
+%r435 = getelementptr i64, i64* %r400, i32 8
+store i64 %r433, i64* %r435
+%r436 = lshr i768 %r432, 64
+%r437 = trunc i768 %r436 to i64
+%r439 = getelementptr i64, i64* %r400, i32 9
+store i64 %r437, i64* %r439
+%r440 = lshr i768 %r436, 64
+%r441 = trunc i768 %r440 to i64
+%r443 = getelementptr i64, i64* %r400, i32 10
+store i64 %r441, i64* %r443
+%r444 = lshr i768 %r440, 64
+%r445 = trunc i768 %r444 to i64
+%r447 = getelementptr i64, i64* %r400, i32 11
+store i64 %r445, i64* %r447
+ret void
+}
+define void @mcl_fpDbl_sqrPre8L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r4 = getelementptr i64, i64* %r2, i32 4
+%r6 = getelementptr i64, i64* %r2, i32 4
+%r8 = getelementptr i64, i64* %r1, i32 8
+call void @mcl_fpDbl_mulPre4L(i64* %r1, i64* %r2, i64* %r2)
+call void @mcl_fpDbl_mulPre4L(i64* %r8, i64* %r4, i64* %r6)
+%r9 = load i64, i64* %r4
+%r10 = zext i64 %r9 to i128
+%r12 = getelementptr i64, i64* %r4, i32 1
+%r13 = load i64, i64* %r12
+%r14 = zext i64 %r13 to i128
+%r15 = shl i128 %r14, 64
+%r16 = or i128 %r10, %r15
+%r17 = zext i128 %r16 to i192
+%r19 = getelementptr i64, i64* %r4, i32 2
+%r20 = load i64, i64* %r19
+%r21 = zext i64 %r20 to i192
+%r22 = shl i192 %r21, 128
+%r23 = or i192 %r17, %r22
+%r24 = zext i192 %r23 to i256
+%r26 = getelementptr i64, i64* %r4, i32 3
+%r27 = load i64, i64* %r26
+%r28 = zext i64 %r27 to i256
+%r29 = shl i256 %r28, 192
+%r30 = or i256 %r24, %r29
+%r31 = zext i256 %r30 to i320
+%r32 = load i64, i64* %r2
+%r33 = zext i64 %r32 to i128
+%r35 = getelementptr i64, i64* %r2, i32 1
+%r36 = load i64, i64* %r35
+%r37 = zext i64 %r36 to i128
+%r38 = shl i128 %r37, 64
+%r39 = or i128 %r33, %r38
+%r40 = zext i128 %r39 to i192
+%r42 = getelementptr i64, i64* %r2, i32 2
+%r43 = load i64, i64* %r42
+%r44 = zext i64 %r43 to i192
+%r45 = shl i192 %r44, 128
+%r46 = or i192 %r40, %r45
+%r47 = zext i192 %r46 to i256
+%r49 = getelementptr i64, i64* %r2, i32 3
+%r50 = load i64, i64* %r49
+%r51 = zext i64 %r50 to i256
+%r52 = shl i256 %r51, 192
+%r53 = or i256 %r47, %r52
+%r54 = zext i256 %r53 to i320
+%r55 = load i64, i64* %r6
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r6, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r6, i32 2
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r6, i32 3
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r78 = load i64, i64* %r2
+%r79 = zext i64 %r78 to i128
+%r81 = getelementptr i64, i64* %r2, i32 1
+%r82 = load i64, i64* %r81
+%r83 = zext i64 %r82 to i128
+%r84 = shl i128 %r83, 64
+%r85 = or i128 %r79, %r84
+%r86 = zext i128 %r85 to i192
+%r88 = getelementptr i64, i64* %r2, i32 2
+%r89 = load i64, i64* %r88
+%r90 = zext i64 %r89 to i192
+%r91 = shl i192 %r90, 128
+%r92 = or i192 %r86, %r91
+%r93 = zext i192 %r92 to i256
+%r95 = getelementptr i64, i64* %r2, i32 3
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i256
+%r98 = shl i256 %r97, 192
+%r99 = or i256 %r93, %r98
+%r100 = zext i256 %r99 to i320
+%r101 = add i320 %r31, %r54
+%r102 = add i320 %r77, %r100
+%r104 = alloca i64, i32 8
+%r105 = trunc i320 %r101 to i256
+%r106 = trunc i320 %r102 to i256
+%r107 = lshr i320 %r101, 256
+%r108 = trunc i320 %r107 to i1
+%r109 = lshr i320 %r102, 256
+%r110 = trunc i320 %r109 to i1
+%r111 = and i1 %r108, %r110
+%r113 = select i1 %r108, i256 %r106, i256 0
+%r115 = select i1 %r110, i256 %r105, i256 0
+%r117 = alloca i64, i32 4
+%r119 = alloca i64, i32 4
+%r120 = trunc i256 %r105 to i64
+%r122 = getelementptr i64, i64* %r117, i32 0
+store i64 %r120, i64* %r122
+%r123 = lshr i256 %r105, 64
+%r124 = trunc i256 %r123 to i64
+%r126 = getelementptr i64, i64* %r117, i32 1
+store i64 %r124, i64* %r126
+%r127 = lshr i256 %r123, 64
+%r128 = trunc i256 %r127 to i64
+%r130 = getelementptr i64, i64* %r117, i32 2
+store i64 %r128, i64* %r130
+%r131 = lshr i256 %r127, 64
+%r132 = trunc i256 %r131 to i64
+%r134 = getelementptr i64, i64* %r117, i32 3
+store i64 %r132, i64* %r134
+%r135 = trunc i256 %r106 to i64
+%r137 = getelementptr i64, i64* %r119, i32 0
+store i64 %r135, i64* %r137
+%r138 = lshr i256 %r106, 64
+%r139 = trunc i256 %r138 to i64
+%r141 = getelementptr i64, i64* %r119, i32 1
+store i64 %r139, i64* %r141
+%r142 = lshr i256 %r138, 64
+%r143 = trunc i256 %r142 to i64
+%r145 = getelementptr i64, i64* %r119, i32 2
+store i64 %r143, i64* %r145
+%r146 = lshr i256 %r142, 64
+%r147 = trunc i256 %r146 to i64
+%r149 = getelementptr i64, i64* %r119, i32 3
+store i64 %r147, i64* %r149
+call void @mcl_fpDbl_mulPre4L(i64* %r104, i64* %r117, i64* %r119)
+%r150 = load i64, i64* %r104
+%r151 = zext i64 %r150 to i128
+%r153 = getelementptr i64, i64* %r104, i32 1
+%r154 = load i64, i64* %r153
+%r155 = zext i64 %r154 to i128
+%r156 = shl i128 %r155, 64
+%r157 = or i128 %r151, %r156
+%r158 = zext i128 %r157 to i192
+%r160 = getelementptr i64, i64* %r104, i32 2
+%r161 = load i64, i64* %r160
+%r162 = zext i64 %r161 to i192
+%r163 = shl i192 %r162, 128
+%r164 = or i192 %r158, %r163
+%r165 = zext i192 %r164 to i256
+%r167 = getelementptr i64, i64* %r104, i32 3
+%r168 = load i64, i64* %r167
+%r169 = zext i64 %r168 to i256
+%r170 = shl i256 %r169, 192
+%r171 = or i256 %r165, %r170
+%r172 = zext i256 %r171 to i320
+%r174 = getelementptr i64, i64* %r104, i32 4
+%r175 = load i64, i64* %r174
+%r176 = zext i64 %r175 to i320
+%r177 = shl i320 %r176, 256
+%r178 = or i320 %r172, %r177
+%r179 = zext i320 %r178 to i384
+%r181 = getelementptr i64, i64* %r104, i32 5
+%r182 = load i64, i64* %r181
+%r183 = zext i64 %r182 to i384
+%r184 = shl i384 %r183, 320
+%r185 = or i384 %r179, %r184
+%r186 = zext i384 %r185 to i448
+%r188 = getelementptr i64, i64* %r104, i32 6
+%r189 = load i64, i64* %r188
+%r190 = zext i64 %r189 to i448
+%r191 = shl i448 %r190, 384
+%r192 = or i448 %r186, %r191
+%r193 = zext i448 %r192 to i512
+%r195 = getelementptr i64, i64* %r104, i32 7
+%r196 = load i64, i64* %r195
+%r197 = zext i64 %r196 to i512
+%r198 = shl i512 %r197, 448
+%r199 = or i512 %r193, %r198
+%r200 = zext i512 %r199 to i576
+%r201 = zext i1 %r111 to i576
+%r202 = shl i576 %r201, 512
+%r203 = or i576 %r200, %r202
+%r204 = zext i256 %r113 to i576
+%r205 = zext i256 %r115 to i576
+%r206 = shl i576 %r204, 256
+%r207 = shl i576 %r205, 256
+%r208 = add i576 %r203, %r206
+%r209 = add i576 %r208, %r207
+%r210 = load i64, i64* %r1
+%r211 = zext i64 %r210 to i128
+%r213 = getelementptr i64, i64* %r1, i32 1
+%r214 = load i64, i64* %r213
+%r215 = zext i64 %r214 to i128
+%r216 = shl i128 %r215, 64
+%r217 = or i128 %r211, %r216
+%r218 = zext i128 %r217 to i192
+%r220 = getelementptr i64, i64* %r1, i32 2
+%r221 = load i64, i64* %r220
+%r222 = zext i64 %r221 to i192
+%r223 = shl i192 %r222, 128
+%r224 = or i192 %r218, %r223
+%r225 = zext i192 %r224 to i256
+%r227 = getelementptr i64, i64* %r1, i32 3
+%r228 = load i64, i64* %r227
+%r229 = zext i64 %r228 to i256
+%r230 = shl i256 %r229, 192
+%r231 = or i256 %r225, %r230
+%r232 = zext i256 %r231 to i320
+%r234 = getelementptr i64, i64* %r1, i32 4
+%r235 = load i64, i64* %r234
+%r236 = zext i64 %r235 to i320
+%r237 = shl i320 %r236, 256
+%r238 = or i320 %r232, %r237
+%r239 = zext i320 %r238 to i384
+%r241 = getelementptr i64, i64* %r1, i32 5
+%r242 = load i64, i64* %r241
+%r243 = zext i64 %r242 to i384
+%r244 = shl i384 %r243, 320
+%r245 = or i384 %r239, %r244
+%r246 = zext i384 %r245 to i448
+%r248 = getelementptr i64, i64* %r1, i32 6
+%r249 = load i64, i64* %r248
+%r250 = zext i64 %r249 to i448
+%r251 = shl i448 %r250, 384
+%r252 = or i448 %r246, %r251
+%r253 = zext i448 %r252 to i512
+%r255 = getelementptr i64, i64* %r1, i32 7
+%r256 = load i64, i64* %r255
+%r257 = zext i64 %r256 to i512
+%r258 = shl i512 %r257, 448
+%r259 = or i512 %r253, %r258
+%r260 = zext i512 %r259 to i576
+%r261 = sub i576 %r209, %r260
+%r263 = getelementptr i64, i64* %r1, i32 8
+%r264 = load i64, i64* %r263
+%r265 = zext i64 %r264 to i128
+%r267 = getelementptr i64, i64* %r263, i32 1
+%r268 = load i64, i64* %r267
+%r269 = zext i64 %r268 to i128
+%r270 = shl i128 %r269, 64
+%r271 = or i128 %r265, %r270
+%r272 = zext i128 %r271 to i192
+%r274 = getelementptr i64, i64* %r263, i32 2
+%r275 = load i64, i64* %r274
+%r276 = zext i64 %r275 to i192
+%r277 = shl i192 %r276, 128
+%r278 = or i192 %r272, %r277
+%r279 = zext i192 %r278 to i256
+%r281 = getelementptr i64, i64* %r263, i32 3
+%r282 = load i64, i64* %r281
+%r283 = zext i64 %r282 to i256
+%r284 = shl i256 %r283, 192
+%r285 = or i256 %r279, %r284
+%r286 = zext i256 %r285 to i320
+%r288 = getelementptr i64, i64* %r263, i32 4
+%r289 = load i64, i64* %r288
+%r290 = zext i64 %r289 to i320
+%r291 = shl i320 %r290, 256
+%r292 = or i320 %r286, %r291
+%r293 = zext i320 %r292 to i384
+%r295 = getelementptr i64, i64* %r263, i32 5
+%r296 = load i64, i64* %r295
+%r297 = zext i64 %r296 to i384
+%r298 = shl i384 %r297, 320
+%r299 = or i384 %r293, %r298
+%r300 = zext i384 %r299 to i448
+%r302 = getelementptr i64, i64* %r263, i32 6
+%r303 = load i64, i64* %r302
+%r304 = zext i64 %r303 to i448
+%r305 = shl i448 %r304, 384
+%r306 = or i448 %r300, %r305
+%r307 = zext i448 %r306 to i512
+%r309 = getelementptr i64, i64* %r263, i32 7
+%r310 = load i64, i64* %r309
+%r311 = zext i64 %r310 to i512
+%r312 = shl i512 %r311, 448
+%r313 = or i512 %r307, %r312
+%r314 = zext i512 %r313 to i576
+%r315 = sub i576 %r261, %r314
+%r316 = zext i576 %r315 to i768
+%r318 = getelementptr i64, i64* %r1, i32 4
+%r319 = load i64, i64* %r318
+%r320 = zext i64 %r319 to i128
+%r322 = getelementptr i64, i64* %r318, i32 1
+%r323 = load i64, i64* %r322
+%r324 = zext i64 %r323 to i128
+%r325 = shl i128 %r324, 64
+%r326 = or i128 %r320, %r325
+%r327 = zext i128 %r326 to i192
+%r329 = getelementptr i64, i64* %r318, i32 2
+%r330 = load i64, i64* %r329
+%r331 = zext i64 %r330 to i192
+%r332 = shl i192 %r331, 128
+%r333 = or i192 %r327, %r332
+%r334 = zext i192 %r333 to i256
+%r336 = getelementptr i64, i64* %r318, i32 3
+%r337 = load i64, i64* %r336
+%r338 = zext i64 %r337 to i256
+%r339 = shl i256 %r338, 192
+%r340 = or i256 %r334, %r339
+%r341 = zext i256 %r340 to i320
+%r343 = getelementptr i64, i64* %r318, i32 4
+%r344 = load i64, i64* %r343
+%r345 = zext i64 %r344 to i320
+%r346 = shl i320 %r345, 256
+%r347 = or i320 %r341, %r346
+%r348 = zext i320 %r347 to i384
+%r350 = getelementptr i64, i64* %r318, i32 5
+%r351 = load i64, i64* %r350
+%r352 = zext i64 %r351 to i384
+%r353 = shl i384 %r352, 320
+%r354 = or i384 %r348, %r353
+%r355 = zext i384 %r354 to i448
+%r357 = getelementptr i64, i64* %r318, i32 6
+%r358 = load i64, i64* %r357
+%r359 = zext i64 %r358 to i448
+%r360 = shl i448 %r359, 384
+%r361 = or i448 %r355, %r360
+%r362 = zext i448 %r361 to i512
+%r364 = getelementptr i64, i64* %r318, i32 7
+%r365 = load i64, i64* %r364
+%r366 = zext i64 %r365 to i512
+%r367 = shl i512 %r366, 448
+%r368 = or i512 %r362, %r367
+%r369 = zext i512 %r368 to i576
+%r371 = getelementptr i64, i64* %r318, i32 8
+%r372 = load i64, i64* %r371
+%r373 = zext i64 %r372 to i576
+%r374 = shl i576 %r373, 512
+%r375 = or i576 %r369, %r374
+%r376 = zext i576 %r375 to i640
+%r378 = getelementptr i64, i64* %r318, i32 9
+%r379 = load i64, i64* %r378
+%r380 = zext i64 %r379 to i640
+%r381 = shl i640 %r380, 576
+%r382 = or i640 %r376, %r381
+%r383 = zext i640 %r382 to i704
+%r385 = getelementptr i64, i64* %r318, i32 10
+%r386 = load i64, i64* %r385
+%r387 = zext i64 %r386 to i704
+%r388 = shl i704 %r387, 640
+%r389 = or i704 %r383, %r388
+%r390 = zext i704 %r389 to i768
+%r392 = getelementptr i64, i64* %r318, i32 11
+%r393 = load i64, i64* %r392
+%r394 = zext i64 %r393 to i768
+%r395 = shl i768 %r394, 704
+%r396 = or i768 %r390, %r395
+%r397 = add i768 %r316, %r396
+%r399 = getelementptr i64, i64* %r1, i32 4
+%r400 = trunc i768 %r397 to i64
+%r402 = getelementptr i64, i64* %r399, i32 0
+store i64 %r400, i64* %r402
+%r403 = lshr i768 %r397, 64
+%r404 = trunc i768 %r403 to i64
+%r406 = getelementptr i64, i64* %r399, i32 1
+store i64 %r404, i64* %r406
+%r407 = lshr i768 %r403, 64
+%r408 = trunc i768 %r407 to i64
+%r410 = getelementptr i64, i64* %r399, i32 2
+store i64 %r408, i64* %r410
+%r411 = lshr i768 %r407, 64
+%r412 = trunc i768 %r411 to i64
+%r414 = getelementptr i64, i64* %r399, i32 3
+store i64 %r412, i64* %r414
+%r415 = lshr i768 %r411, 64
+%r416 = trunc i768 %r415 to i64
+%r418 = getelementptr i64, i64* %r399, i32 4
+store i64 %r416, i64* %r418
+%r419 = lshr i768 %r415, 64
+%r420 = trunc i768 %r419 to i64
+%r422 = getelementptr i64, i64* %r399, i32 5
+store i64 %r420, i64* %r422
+%r423 = lshr i768 %r419, 64
+%r424 = trunc i768 %r423 to i64
+%r426 = getelementptr i64, i64* %r399, i32 6
+store i64 %r424, i64* %r426
+%r427 = lshr i768 %r423, 64
+%r428 = trunc i768 %r427 to i64
+%r430 = getelementptr i64, i64* %r399, i32 7
+store i64 %r428, i64* %r430
+%r431 = lshr i768 %r427, 64
+%r432 = trunc i768 %r431 to i64
+%r434 = getelementptr i64, i64* %r399, i32 8
+store i64 %r432, i64* %r434
+%r435 = lshr i768 %r431, 64
+%r436 = trunc i768 %r435 to i64
+%r438 = getelementptr i64, i64* %r399, i32 9
+store i64 %r436, i64* %r438
+%r439 = lshr i768 %r435, 64
+%r440 = trunc i768 %r439 to i64
+%r442 = getelementptr i64, i64* %r399, i32 10
+store i64 %r440, i64* %r442
+%r443 = lshr i768 %r439, 64
+%r444 = trunc i768 %r443 to i64
+%r446 = getelementptr i64, i64* %r399, i32 11
+store i64 %r444, i64* %r446
+ret void
+}
+define void @mcl_fp_mont8L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r9 = getelementptr i64, i64* %r3, i32 0
+%r10 = load i64, i64* %r9
+%r11 = call i576 @mulPv512x64(i64* %r2, i64 %r10)
+%r12 = zext i576 %r11 to i640
+%r13 = trunc i576 %r11 to i64
+%r14 = mul i64 %r13, %r7
+%r15 = call i576 @mulPv512x64(i64* %r4, i64 %r14)
+%r16 = zext i576 %r15 to i640
+%r17 = add i640 %r12, %r16
+%r18 = lshr i640 %r17, 64
+%r20 = getelementptr i64, i64* %r3, i32 1
+%r21 = load i64, i64* %r20
+%r22 = call i576 @mulPv512x64(i64* %r2, i64 %r21)
+%r23 = zext i576 %r22 to i640
+%r24 = add i640 %r18, %r23
+%r25 = trunc i640 %r24 to i64
+%r26 = mul i64 %r25, %r7
+%r27 = call i576 @mulPv512x64(i64* %r4, i64 %r26)
+%r28 = zext i576 %r27 to i640
+%r29 = add i640 %r24, %r28
+%r30 = lshr i640 %r29, 64
+%r32 = getelementptr i64, i64* %r3, i32 2
+%r33 = load i64, i64* %r32
+%r34 = call i576 @mulPv512x64(i64* %r2, i64 %r33)
+%r35 = zext i576 %r34 to i640
+%r36 = add i640 %r30, %r35
+%r37 = trunc i640 %r36 to i64
+%r38 = mul i64 %r37, %r7
+%r39 = call i576 @mulPv512x64(i64* %r4, i64 %r38)
+%r40 = zext i576 %r39 to i640
+%r41 = add i640 %r36, %r40
+%r42 = lshr i640 %r41, 64
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = call i576 @mulPv512x64(i64* %r2, i64 %r45)
+%r47 = zext i576 %r46 to i640
+%r48 = add i640 %r42, %r47
+%r49 = trunc i640 %r48 to i64
+%r50 = mul i64 %r49, %r7
+%r51 = call i576 @mulPv512x64(i64* %r4, i64 %r50)
+%r52 = zext i576 %r51 to i640
+%r53 = add i640 %r48, %r52
+%r54 = lshr i640 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 4
+%r57 = load i64, i64* %r56
+%r58 = call i576 @mulPv512x64(i64* %r2, i64 %r57)
+%r59 = zext i576 %r58 to i640
+%r60 = add i640 %r54, %r59
+%r61 = trunc i640 %r60 to i64
+%r62 = mul i64 %r61, %r7
+%r63 = call i576 @mulPv512x64(i64* %r4, i64 %r62)
+%r64 = zext i576 %r63 to i640
+%r65 = add i640 %r60, %r64
+%r66 = lshr i640 %r65, 64
+%r68 = getelementptr i64, i64* %r3, i32 5
+%r69 = load i64, i64* %r68
+%r70 = call i576 @mulPv512x64(i64* %r2, i64 %r69)
+%r71 = zext i576 %r70 to i640
+%r72 = add i640 %r66, %r71
+%r73 = trunc i640 %r72 to i64
+%r74 = mul i64 %r73, %r7
+%r75 = call i576 @mulPv512x64(i64* %r4, i64 %r74)
+%r76 = zext i576 %r75 to i640
+%r77 = add i640 %r72, %r76
+%r78 = lshr i640 %r77, 64
+%r80 = getelementptr i64, i64* %r3, i32 6
+%r81 = load i64, i64* %r80
+%r82 = call i576 @mulPv512x64(i64* %r2, i64 %r81)
+%r83 = zext i576 %r82 to i640
+%r84 = add i640 %r78, %r83
+%r85 = trunc i640 %r84 to i64
+%r86 = mul i64 %r85, %r7
+%r87 = call i576 @mulPv512x64(i64* %r4, i64 %r86)
+%r88 = zext i576 %r87 to i640
+%r89 = add i640 %r84, %r88
+%r90 = lshr i640 %r89, 64
+%r92 = getelementptr i64, i64* %r3, i32 7
+%r93 = load i64, i64* %r92
+%r94 = call i576 @mulPv512x64(i64* %r2, i64 %r93)
+%r95 = zext i576 %r94 to i640
+%r96 = add i640 %r90, %r95
+%r97 = trunc i640 %r96 to i64
+%r98 = mul i64 %r97, %r7
+%r99 = call i576 @mulPv512x64(i64* %r4, i64 %r98)
+%r100 = zext i576 %r99 to i640
+%r101 = add i640 %r96, %r100
+%r102 = lshr i640 %r101, 64
+%r103 = trunc i640 %r102 to i576
+%r104 = load i64, i64* %r4
+%r105 = zext i64 %r104 to i128
+%r107 = getelementptr i64, i64* %r4, i32 1
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i128
+%r110 = shl i128 %r109, 64
+%r111 = or i128 %r105, %r110
+%r112 = zext i128 %r111 to i192
+%r114 = getelementptr i64, i64* %r4, i32 2
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i192
+%r117 = shl i192 %r116, 128
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i256
+%r121 = getelementptr i64, i64* %r4, i32 3
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i256
+%r124 = shl i256 %r123, 192
+%r125 = or i256 %r119, %r124
+%r126 = zext i256 %r125 to i320
+%r128 = getelementptr i64, i64* %r4, i32 4
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i320
+%r131 = shl i320 %r130, 256
+%r132 = or i320 %r126, %r131
+%r133 = zext i320 %r132 to i384
+%r135 = getelementptr i64, i64* %r4, i32 5
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i384
+%r138 = shl i384 %r137, 320
+%r139 = or i384 %r133, %r138
+%r140 = zext i384 %r139 to i448
+%r142 = getelementptr i64, i64* %r4, i32 6
+%r143 = load i64, i64* %r142
+%r144 = zext i64 %r143 to i448
+%r145 = shl i448 %r144, 384
+%r146 = or i448 %r140, %r145
+%r147 = zext i448 %r146 to i512
+%r149 = getelementptr i64, i64* %r4, i32 7
+%r150 = load i64, i64* %r149
+%r151 = zext i64 %r150 to i512
+%r152 = shl i512 %r151, 448
+%r153 = or i512 %r147, %r152
+%r154 = zext i512 %r153 to i576
+%r155 = sub i576 %r103, %r154
+%r156 = lshr i576 %r155, 512
+%r157 = trunc i576 %r156 to i1
+%r158 = select i1 %r157, i576 %r103, i576 %r155
+%r159 = trunc i576 %r158 to i512
+%r160 = trunc i512 %r159 to i64
+%r162 = getelementptr i64, i64* %r1, i32 0
+store i64 %r160, i64* %r162
+%r163 = lshr i512 %r159, 64
+%r164 = trunc i512 %r163 to i64
+%r166 = getelementptr i64, i64* %r1, i32 1
+store i64 %r164, i64* %r166
+%r167 = lshr i512 %r163, 64
+%r168 = trunc i512 %r167 to i64
+%r170 = getelementptr i64, i64* %r1, i32 2
+store i64 %r168, i64* %r170
+%r171 = lshr i512 %r167, 64
+%r172 = trunc i512 %r171 to i64
+%r174 = getelementptr i64, i64* %r1, i32 3
+store i64 %r172, i64* %r174
+%r175 = lshr i512 %r171, 64
+%r176 = trunc i512 %r175 to i64
+%r178 = getelementptr i64, i64* %r1, i32 4
+store i64 %r176, i64* %r178
+%r179 = lshr i512 %r175, 64
+%r180 = trunc i512 %r179 to i64
+%r182 = getelementptr i64, i64* %r1, i32 5
+store i64 %r180, i64* %r182
+%r183 = lshr i512 %r179, 64
+%r184 = trunc i512 %r183 to i64
+%r186 = getelementptr i64, i64* %r1, i32 6
+store i64 %r184, i64* %r186
+%r187 = lshr i512 %r183, 64
+%r188 = trunc i512 %r187 to i64
+%r190 = getelementptr i64, i64* %r1, i32 7
+store i64 %r188, i64* %r190
+ret void
+}
+define void @mcl_fp_montNF8L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r8 = load i64, i64* %r3
+%r9 = call i576 @mulPv512x64(i64* %r2, i64 %r8)
+%r10 = trunc i576 %r9 to i64
+%r11 = mul i64 %r10, %r7
+%r12 = call i576 @mulPv512x64(i64* %r4, i64 %r11)
+%r13 = add i576 %r9, %r12
+%r14 = lshr i576 %r13, 64
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = call i576 @mulPv512x64(i64* %r2, i64 %r17)
+%r19 = add i576 %r14, %r18
+%r20 = trunc i576 %r19 to i64
+%r21 = mul i64 %r20, %r7
+%r22 = call i576 @mulPv512x64(i64* %r4, i64 %r21)
+%r23 = add i576 %r19, %r22
+%r24 = lshr i576 %r23, 64
+%r26 = getelementptr i64, i64* %r3, i32 2
+%r27 = load i64, i64* %r26
+%r28 = call i576 @mulPv512x64(i64* %r2, i64 %r27)
+%r29 = add i576 %r24, %r28
+%r30 = trunc i576 %r29 to i64
+%r31 = mul i64 %r30, %r7
+%r32 = call i576 @mulPv512x64(i64* %r4, i64 %r31)
+%r33 = add i576 %r29, %r32
+%r34 = lshr i576 %r33, 64
+%r36 = getelementptr i64, i64* %r3, i32 3
+%r37 = load i64, i64* %r36
+%r38 = call i576 @mulPv512x64(i64* %r2, i64 %r37)
+%r39 = add i576 %r34, %r38
+%r40 = trunc i576 %r39 to i64
+%r41 = mul i64 %r40, %r7
+%r42 = call i576 @mulPv512x64(i64* %r4, i64 %r41)
+%r43 = add i576 %r39, %r42
+%r44 = lshr i576 %r43, 64
+%r46 = getelementptr i64, i64* %r3, i32 4
+%r47 = load i64, i64* %r46
+%r48 = call i576 @mulPv512x64(i64* %r2, i64 %r47)
+%r49 = add i576 %r44, %r48
+%r50 = trunc i576 %r49 to i64
+%r51 = mul i64 %r50, %r7
+%r52 = call i576 @mulPv512x64(i64* %r4, i64 %r51)
+%r53 = add i576 %r49, %r52
+%r54 = lshr i576 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 5
+%r57 = load i64, i64* %r56
+%r58 = call i576 @mulPv512x64(i64* %r2, i64 %r57)
+%r59 = add i576 %r54, %r58
+%r60 = trunc i576 %r59 to i64
+%r61 = mul i64 %r60, %r7
+%r62 = call i576 @mulPv512x64(i64* %r4, i64 %r61)
+%r63 = add i576 %r59, %r62
+%r64 = lshr i576 %r63, 64
+%r66 = getelementptr i64, i64* %r3, i32 6
+%r67 = load i64, i64* %r66
+%r68 = call i576 @mulPv512x64(i64* %r2, i64 %r67)
+%r69 = add i576 %r64, %r68
+%r70 = trunc i576 %r69 to i64
+%r71 = mul i64 %r70, %r7
+%r72 = call i576 @mulPv512x64(i64* %r4, i64 %r71)
+%r73 = add i576 %r69, %r72
+%r74 = lshr i576 %r73, 64
+%r76 = getelementptr i64, i64* %r3, i32 7
+%r77 = load i64, i64* %r76
+%r78 = call i576 @mulPv512x64(i64* %r2, i64 %r77)
+%r79 = add i576 %r74, %r78
+%r80 = trunc i576 %r79 to i64
+%r81 = mul i64 %r80, %r7
+%r82 = call i576 @mulPv512x64(i64* %r4, i64 %r81)
+%r83 = add i576 %r79, %r82
+%r84 = lshr i576 %r83, 64
+%r85 = trunc i576 %r84 to i512
+%r86 = load i64, i64* %r4
+%r87 = zext i64 %r86 to i128
+%r89 = getelementptr i64, i64* %r4, i32 1
+%r90 = load i64, i64* %r89
+%r91 = zext i64 %r90 to i128
+%r92 = shl i128 %r91, 64
+%r93 = or i128 %r87, %r92
+%r94 = zext i128 %r93 to i192
+%r96 = getelementptr i64, i64* %r4, i32 2
+%r97 = load i64, i64* %r96
+%r98 = zext i64 %r97 to i192
+%r99 = shl i192 %r98, 128
+%r100 = or i192 %r94, %r99
+%r101 = zext i192 %r100 to i256
+%r103 = getelementptr i64, i64* %r4, i32 3
+%r104 = load i64, i64* %r103
+%r105 = zext i64 %r104 to i256
+%r106 = shl i256 %r105, 192
+%r107 = or i256 %r101, %r106
+%r108 = zext i256 %r107 to i320
+%r110 = getelementptr i64, i64* %r4, i32 4
+%r111 = load i64, i64* %r110
+%r112 = zext i64 %r111 to i320
+%r113 = shl i320 %r112, 256
+%r114 = or i320 %r108, %r113
+%r115 = zext i320 %r114 to i384
+%r117 = getelementptr i64, i64* %r4, i32 5
+%r118 = load i64, i64* %r117
+%r119 = zext i64 %r118 to i384
+%r120 = shl i384 %r119, 320
+%r121 = or i384 %r115, %r120
+%r122 = zext i384 %r121 to i448
+%r124 = getelementptr i64, i64* %r4, i32 6
+%r125 = load i64, i64* %r124
+%r126 = zext i64 %r125 to i448
+%r127 = shl i448 %r126, 384
+%r128 = or i448 %r122, %r127
+%r129 = zext i448 %r128 to i512
+%r131 = getelementptr i64, i64* %r4, i32 7
+%r132 = load i64, i64* %r131
+%r133 = zext i64 %r132 to i512
+%r134 = shl i512 %r133, 448
+%r135 = or i512 %r129, %r134
+%r136 = sub i512 %r85, %r135
+%r137 = lshr i512 %r136, 511
+%r138 = trunc i512 %r137 to i1
+%r139 = select i1 %r138, i512 %r85, i512 %r136
+%r140 = trunc i512 %r139 to i64
+%r142 = getelementptr i64, i64* %r1, i32 0
+store i64 %r140, i64* %r142
+%r143 = lshr i512 %r139, 64
+%r144 = trunc i512 %r143 to i64
+%r146 = getelementptr i64, i64* %r1, i32 1
+store i64 %r144, i64* %r146
+%r147 = lshr i512 %r143, 64
+%r148 = trunc i512 %r147 to i64
+%r150 = getelementptr i64, i64* %r1, i32 2
+store i64 %r148, i64* %r150
+%r151 = lshr i512 %r147, 64
+%r152 = trunc i512 %r151 to i64
+%r154 = getelementptr i64, i64* %r1, i32 3
+store i64 %r152, i64* %r154
+%r155 = lshr i512 %r151, 64
+%r156 = trunc i512 %r155 to i64
+%r158 = getelementptr i64, i64* %r1, i32 4
+store i64 %r156, i64* %r158
+%r159 = lshr i512 %r155, 64
+%r160 = trunc i512 %r159 to i64
+%r162 = getelementptr i64, i64* %r1, i32 5
+store i64 %r160, i64* %r162
+%r163 = lshr i512 %r159, 64
+%r164 = trunc i512 %r163 to i64
+%r166 = getelementptr i64, i64* %r1, i32 6
+store i64 %r164, i64* %r166
+%r167 = lshr i512 %r163, 64
+%r168 = trunc i512 %r167 to i64
+%r170 = getelementptr i64, i64* %r1, i32 7
+store i64 %r168, i64* %r170
+ret void
+}
+define void @mcl_fp_montRed8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = zext i192 %r21 to i256
+%r24 = getelementptr i64, i64* %r3, i32 3
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i256
+%r27 = shl i256 %r26, 192
+%r28 = or i256 %r22, %r27
+%r29 = zext i256 %r28 to i320
+%r31 = getelementptr i64, i64* %r3, i32 4
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i320
+%r34 = shl i320 %r33, 256
+%r35 = or i320 %r29, %r34
+%r36 = zext i320 %r35 to i384
+%r38 = getelementptr i64, i64* %r3, i32 5
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i384
+%r41 = shl i384 %r40, 320
+%r42 = or i384 %r36, %r41
+%r43 = zext i384 %r42 to i448
+%r45 = getelementptr i64, i64* %r3, i32 6
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i448
+%r48 = shl i448 %r47, 384
+%r49 = or i448 %r43, %r48
+%r50 = zext i448 %r49 to i512
+%r52 = getelementptr i64, i64* %r3, i32 7
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i512
+%r55 = shl i512 %r54, 448
+%r56 = or i512 %r50, %r55
+%r57 = load i64, i64* %r2
+%r58 = zext i64 %r57 to i128
+%r60 = getelementptr i64, i64* %r2, i32 1
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i128
+%r63 = shl i128 %r62, 64
+%r64 = or i128 %r58, %r63
+%r65 = zext i128 %r64 to i192
+%r67 = getelementptr i64, i64* %r2, i32 2
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i192
+%r70 = shl i192 %r69, 128
+%r71 = or i192 %r65, %r70
+%r72 = zext i192 %r71 to i256
+%r74 = getelementptr i64, i64* %r2, i32 3
+%r75 = load i64, i64* %r74
+%r76 = zext i64 %r75 to i256
+%r77 = shl i256 %r76, 192
+%r78 = or i256 %r72, %r77
+%r79 = zext i256 %r78 to i320
+%r81 = getelementptr i64, i64* %r2, i32 4
+%r82 = load i64, i64* %r81
+%r83 = zext i64 %r82 to i320
+%r84 = shl i320 %r83, 256
+%r85 = or i320 %r79, %r84
+%r86 = zext i320 %r85 to i384
+%r88 = getelementptr i64, i64* %r2, i32 5
+%r89 = load i64, i64* %r88
+%r90 = zext i64 %r89 to i384
+%r91 = shl i384 %r90, 320
+%r92 = or i384 %r86, %r91
+%r93 = zext i384 %r92 to i448
+%r95 = getelementptr i64, i64* %r2, i32 6
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i448
+%r98 = shl i448 %r97, 384
+%r99 = or i448 %r93, %r98
+%r100 = zext i448 %r99 to i512
+%r102 = getelementptr i64, i64* %r2, i32 7
+%r103 = load i64, i64* %r102
+%r104 = zext i64 %r103 to i512
+%r105 = shl i512 %r104, 448
+%r106 = or i512 %r100, %r105
+%r107 = zext i512 %r106 to i576
+%r109 = getelementptr i64, i64* %r2, i32 8
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i576
+%r112 = shl i576 %r111, 512
+%r113 = or i576 %r107, %r112
+%r114 = zext i576 %r113 to i640
+%r116 = getelementptr i64, i64* %r2, i32 9
+%r117 = load i64, i64* %r116
+%r118 = zext i64 %r117 to i640
+%r119 = shl i640 %r118, 576
+%r120 = or i640 %r114, %r119
+%r121 = zext i640 %r120 to i704
+%r123 = getelementptr i64, i64* %r2, i32 10
+%r124 = load i64, i64* %r123
+%r125 = zext i64 %r124 to i704
+%r126 = shl i704 %r125, 640
+%r127 = or i704 %r121, %r126
+%r128 = zext i704 %r127 to i768
+%r130 = getelementptr i64, i64* %r2, i32 11
+%r131 = load i64, i64* %r130
+%r132 = zext i64 %r131 to i768
+%r133 = shl i768 %r132, 704
+%r134 = or i768 %r128, %r133
+%r135 = zext i768 %r134 to i832
+%r137 = getelementptr i64, i64* %r2, i32 12
+%r138 = load i64, i64* %r137
+%r139 = zext i64 %r138 to i832
+%r140 = shl i832 %r139, 768
+%r141 = or i832 %r135, %r140
+%r142 = zext i832 %r141 to i896
+%r144 = getelementptr i64, i64* %r2, i32 13
+%r145 = load i64, i64* %r144
+%r146 = zext i64 %r145 to i896
+%r147 = shl i896 %r146, 832
+%r148 = or i896 %r142, %r147
+%r149 = zext i896 %r148 to i960
+%r151 = getelementptr i64, i64* %r2, i32 14
+%r152 = load i64, i64* %r151
+%r153 = zext i64 %r152 to i960
+%r154 = shl i960 %r153, 896
+%r155 = or i960 %r149, %r154
+%r156 = zext i960 %r155 to i1024
+%r158 = getelementptr i64, i64* %r2, i32 15
+%r159 = load i64, i64* %r158
+%r160 = zext i64 %r159 to i1024
+%r161 = shl i1024 %r160, 960
+%r162 = or i1024 %r156, %r161
+%r163 = zext i1024 %r162 to i1088
+%r164 = trunc i1088 %r163 to i64
+%r165 = mul i64 %r164, %r6
+%r166 = call i576 @mulPv512x64(i64* %r3, i64 %r165)
+%r167 = zext i576 %r166 to i1088
+%r168 = add i1088 %r163, %r167
+%r169 = lshr i1088 %r168, 64
+%r170 = trunc i1088 %r169 to i1024
+%r171 = trunc i1024 %r170 to i64
+%r172 = mul i64 %r171, %r6
+%r173 = call i576 @mulPv512x64(i64* %r3, i64 %r172)
+%r174 = zext i576 %r173 to i1024
+%r175 = add i1024 %r170, %r174
+%r176 = lshr i1024 %r175, 64
+%r177 = trunc i1024 %r176 to i960
+%r178 = trunc i960 %r177 to i64
+%r179 = mul i64 %r178, %r6
+%r180 = call i576 @mulPv512x64(i64* %r3, i64 %r179)
+%r181 = zext i576 %r180 to i960
+%r182 = add i960 %r177, %r181
+%r183 = lshr i960 %r182, 64
+%r184 = trunc i960 %r183 to i896
+%r185 = trunc i896 %r184 to i64
+%r186 = mul i64 %r185, %r6
+%r187 = call i576 @mulPv512x64(i64* %r3, i64 %r186)
+%r188 = zext i576 %r187 to i896
+%r189 = add i896 %r184, %r188
+%r190 = lshr i896 %r189, 64
+%r191 = trunc i896 %r190 to i832
+%r192 = trunc i832 %r191 to i64
+%r193 = mul i64 %r192, %r6
+%r194 = call i576 @mulPv512x64(i64* %r3, i64 %r193)
+%r195 = zext i576 %r194 to i832
+%r196 = add i832 %r191, %r195
+%r197 = lshr i832 %r196, 64
+%r198 = trunc i832 %r197 to i768
+%r199 = trunc i768 %r198 to i64
+%r200 = mul i64 %r199, %r6
+%r201 = call i576 @mulPv512x64(i64* %r3, i64 %r200)
+%r202 = zext i576 %r201 to i768
+%r203 = add i768 %r198, %r202
+%r204 = lshr i768 %r203, 64
+%r205 = trunc i768 %r204 to i704
+%r206 = trunc i704 %r205 to i64
+%r207 = mul i64 %r206, %r6
+%r208 = call i576 @mulPv512x64(i64* %r3, i64 %r207)
+%r209 = zext i576 %r208 to i704
+%r210 = add i704 %r205, %r209
+%r211 = lshr i704 %r210, 64
+%r212 = trunc i704 %r211 to i640
+%r213 = trunc i640 %r212 to i64
+%r214 = mul i64 %r213, %r6
+%r215 = call i576 @mulPv512x64(i64* %r3, i64 %r214)
+%r216 = zext i576 %r215 to i640
+%r217 = add i640 %r212, %r216
+%r218 = lshr i640 %r217, 64
+%r219 = trunc i640 %r218 to i576
+%r220 = zext i512 %r56 to i576
+%r221 = sub i576 %r219, %r220
+%r222 = lshr i576 %r221, 512
+%r223 = trunc i576 %r222 to i1
+%r224 = select i1 %r223, i576 %r219, i576 %r221
+%r225 = trunc i576 %r224 to i512
+%r226 = trunc i512 %r225 to i64
+%r228 = getelementptr i64, i64* %r1, i32 0
+store i64 %r226, i64* %r228
+%r229 = lshr i512 %r225, 64
+%r230 = trunc i512 %r229 to i64
+%r232 = getelementptr i64, i64* %r1, i32 1
+store i64 %r230, i64* %r232
+%r233 = lshr i512 %r229, 64
+%r234 = trunc i512 %r233 to i64
+%r236 = getelementptr i64, i64* %r1, i32 2
+store i64 %r234, i64* %r236
+%r237 = lshr i512 %r233, 64
+%r238 = trunc i512 %r237 to i64
+%r240 = getelementptr i64, i64* %r1, i32 3
+store i64 %r238, i64* %r240
+%r241 = lshr i512 %r237, 64
+%r242 = trunc i512 %r241 to i64
+%r244 = getelementptr i64, i64* %r1, i32 4
+store i64 %r242, i64* %r244
+%r245 = lshr i512 %r241, 64
+%r246 = trunc i512 %r245 to i64
+%r248 = getelementptr i64, i64* %r1, i32 5
+store i64 %r246, i64* %r248
+%r249 = lshr i512 %r245, 64
+%r250 = trunc i512 %r249 to i64
+%r252 = getelementptr i64, i64* %r1, i32 6
+store i64 %r250, i64* %r252
+%r253 = lshr i512 %r249, 64
+%r254 = trunc i512 %r253 to i64
+%r256 = getelementptr i64, i64* %r1, i32 7
+store i64 %r254, i64* %r256
+ret void
+}
+define i64 @mcl_fp_addPre8L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r3, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r3, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r3, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r56 = load i64, i64* %r4
+%r57 = zext i64 %r56 to i128
+%r59 = getelementptr i64, i64* %r4, i32 1
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i128
+%r62 = shl i128 %r61, 64
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i192
+%r66 = getelementptr i64, i64* %r4, i32 2
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i192
+%r69 = shl i192 %r68, 128
+%r70 = or i192 %r64, %r69
+%r71 = zext i192 %r70 to i256
+%r73 = getelementptr i64, i64* %r4, i32 3
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i256
+%r76 = shl i256 %r75, 192
+%r77 = or i256 %r71, %r76
+%r78 = zext i256 %r77 to i320
+%r80 = getelementptr i64, i64* %r4, i32 4
+%r81 = load i64, i64* %r80
+%r82 = zext i64 %r81 to i320
+%r83 = shl i320 %r82, 256
+%r84 = or i320 %r78, %r83
+%r85 = zext i320 %r84 to i384
+%r87 = getelementptr i64, i64* %r4, i32 5
+%r88 = load i64, i64* %r87
+%r89 = zext i64 %r88 to i384
+%r90 = shl i384 %r89, 320
+%r91 = or i384 %r85, %r90
+%r92 = zext i384 %r91 to i448
+%r94 = getelementptr i64, i64* %r4, i32 6
+%r95 = load i64, i64* %r94
+%r96 = zext i64 %r95 to i448
+%r97 = shl i448 %r96, 384
+%r98 = or i448 %r92, %r97
+%r99 = zext i448 %r98 to i512
+%r101 = getelementptr i64, i64* %r4, i32 7
+%r102 = load i64, i64* %r101
+%r103 = zext i64 %r102 to i512
+%r104 = shl i512 %r103, 448
+%r105 = or i512 %r99, %r104
+%r106 = zext i512 %r105 to i576
+%r107 = add i576 %r55, %r106
+%r108 = trunc i576 %r107 to i512
+%r109 = trunc i512 %r108 to i64
+%r111 = getelementptr i64, i64* %r2, i32 0
+store i64 %r109, i64* %r111
+%r112 = lshr i512 %r108, 64
+%r113 = trunc i512 %r112 to i64
+%r115 = getelementptr i64, i64* %r2, i32 1
+store i64 %r113, i64* %r115
+%r116 = lshr i512 %r112, 64
+%r117 = trunc i512 %r116 to i64
+%r119 = getelementptr i64, i64* %r2, i32 2
+store i64 %r117, i64* %r119
+%r120 = lshr i512 %r116, 64
+%r121 = trunc i512 %r120 to i64
+%r123 = getelementptr i64, i64* %r2, i32 3
+store i64 %r121, i64* %r123
+%r124 = lshr i512 %r120, 64
+%r125 = trunc i512 %r124 to i64
+%r127 = getelementptr i64, i64* %r2, i32 4
+store i64 %r125, i64* %r127
+%r128 = lshr i512 %r124, 64
+%r129 = trunc i512 %r128 to i64
+%r131 = getelementptr i64, i64* %r2, i32 5
+store i64 %r129, i64* %r131
+%r132 = lshr i512 %r128, 64
+%r133 = trunc i512 %r132 to i64
+%r135 = getelementptr i64, i64* %r2, i32 6
+store i64 %r133, i64* %r135
+%r136 = lshr i512 %r132, 64
+%r137 = trunc i512 %r136 to i64
+%r139 = getelementptr i64, i64* %r2, i32 7
+store i64 %r137, i64* %r139
+%r140 = lshr i576 %r107, 512
+%r141 = trunc i576 %r140 to i64
+ret i64 %r141
+}
+define i64 @mcl_fp_subPre8L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r3, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r3, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r3, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r56 = load i64, i64* %r4
+%r57 = zext i64 %r56 to i128
+%r59 = getelementptr i64, i64* %r4, i32 1
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i128
+%r62 = shl i128 %r61, 64
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i192
+%r66 = getelementptr i64, i64* %r4, i32 2
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i192
+%r69 = shl i192 %r68, 128
+%r70 = or i192 %r64, %r69
+%r71 = zext i192 %r70 to i256
+%r73 = getelementptr i64, i64* %r4, i32 3
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i256
+%r76 = shl i256 %r75, 192
+%r77 = or i256 %r71, %r76
+%r78 = zext i256 %r77 to i320
+%r80 = getelementptr i64, i64* %r4, i32 4
+%r81 = load i64, i64* %r80
+%r82 = zext i64 %r81 to i320
+%r83 = shl i320 %r82, 256
+%r84 = or i320 %r78, %r83
+%r85 = zext i320 %r84 to i384
+%r87 = getelementptr i64, i64* %r4, i32 5
+%r88 = load i64, i64* %r87
+%r89 = zext i64 %r88 to i384
+%r90 = shl i384 %r89, 320
+%r91 = or i384 %r85, %r90
+%r92 = zext i384 %r91 to i448
+%r94 = getelementptr i64, i64* %r4, i32 6
+%r95 = load i64, i64* %r94
+%r96 = zext i64 %r95 to i448
+%r97 = shl i448 %r96, 384
+%r98 = or i448 %r92, %r97
+%r99 = zext i448 %r98 to i512
+%r101 = getelementptr i64, i64* %r4, i32 7
+%r102 = load i64, i64* %r101
+%r103 = zext i64 %r102 to i512
+%r104 = shl i512 %r103, 448
+%r105 = or i512 %r99, %r104
+%r106 = zext i512 %r105 to i576
+%r107 = sub i576 %r55, %r106
+%r108 = trunc i576 %r107 to i512
+%r109 = trunc i512 %r108 to i64
+%r111 = getelementptr i64, i64* %r2, i32 0
+store i64 %r109, i64* %r111
+%r112 = lshr i512 %r108, 64
+%r113 = trunc i512 %r112 to i64
+%r115 = getelementptr i64, i64* %r2, i32 1
+store i64 %r113, i64* %r115
+%r116 = lshr i512 %r112, 64
+%r117 = trunc i512 %r116 to i64
+%r119 = getelementptr i64, i64* %r2, i32 2
+store i64 %r117, i64* %r119
+%r120 = lshr i512 %r116, 64
+%r121 = trunc i512 %r120 to i64
+%r123 = getelementptr i64, i64* %r2, i32 3
+store i64 %r121, i64* %r123
+%r124 = lshr i512 %r120, 64
+%r125 = trunc i512 %r124 to i64
+%r127 = getelementptr i64, i64* %r2, i32 4
+store i64 %r125, i64* %r127
+%r128 = lshr i512 %r124, 64
+%r129 = trunc i512 %r128 to i64
+%r131 = getelementptr i64, i64* %r2, i32 5
+store i64 %r129, i64* %r131
+%r132 = lshr i512 %r128, 64
+%r133 = trunc i512 %r132 to i64
+%r135 = getelementptr i64, i64* %r2, i32 6
+store i64 %r133, i64* %r135
+%r136 = lshr i512 %r132, 64
+%r137 = trunc i512 %r136 to i64
+%r139 = getelementptr i64, i64* %r2, i32 7
+store i64 %r137, i64* %r139
+%r140 = lshr i576 %r107, 512
+%r141 = trunc i576 %r140 to i64
+%r143 = and i64 %r141, 1
+ret i64 %r143
+}
+define void @mcl_fp_shr1_8L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = zext i64 %r3 to i128
+%r6 = getelementptr i64, i64* %r2, i32 1
+%r7 = load i64, i64* %r6
+%r8 = zext i64 %r7 to i128
+%r9 = shl i128 %r8, 64
+%r10 = or i128 %r4, %r9
+%r11 = zext i128 %r10 to i192
+%r13 = getelementptr i64, i64* %r2, i32 2
+%r14 = load i64, i64* %r13
+%r15 = zext i64 %r14 to i192
+%r16 = shl i192 %r15, 128
+%r17 = or i192 %r11, %r16
+%r18 = zext i192 %r17 to i256
+%r20 = getelementptr i64, i64* %r2, i32 3
+%r21 = load i64, i64* %r20
+%r22 = zext i64 %r21 to i256
+%r23 = shl i256 %r22, 192
+%r24 = or i256 %r18, %r23
+%r25 = zext i256 %r24 to i320
+%r27 = getelementptr i64, i64* %r2, i32 4
+%r28 = load i64, i64* %r27
+%r29 = zext i64 %r28 to i320
+%r30 = shl i320 %r29, 256
+%r31 = or i320 %r25, %r30
+%r32 = zext i320 %r31 to i384
+%r34 = getelementptr i64, i64* %r2, i32 5
+%r35 = load i64, i64* %r34
+%r36 = zext i64 %r35 to i384
+%r37 = shl i384 %r36, 320
+%r38 = or i384 %r32, %r37
+%r39 = zext i384 %r38 to i448
+%r41 = getelementptr i64, i64* %r2, i32 6
+%r42 = load i64, i64* %r41
+%r43 = zext i64 %r42 to i448
+%r44 = shl i448 %r43, 384
+%r45 = or i448 %r39, %r44
+%r46 = zext i448 %r45 to i512
+%r48 = getelementptr i64, i64* %r2, i32 7
+%r49 = load i64, i64* %r48
+%r50 = zext i64 %r49 to i512
+%r51 = shl i512 %r50, 448
+%r52 = or i512 %r46, %r51
+%r53 = lshr i512 %r52, 1
+%r54 = trunc i512 %r53 to i64
+%r56 = getelementptr i64, i64* %r1, i32 0
+store i64 %r54, i64* %r56
+%r57 = lshr i512 %r53, 64
+%r58 = trunc i512 %r57 to i64
+%r60 = getelementptr i64, i64* %r1, i32 1
+store i64 %r58, i64* %r60
+%r61 = lshr i512 %r57, 64
+%r62 = trunc i512 %r61 to i64
+%r64 = getelementptr i64, i64* %r1, i32 2
+store i64 %r62, i64* %r64
+%r65 = lshr i512 %r61, 64
+%r66 = trunc i512 %r65 to i64
+%r68 = getelementptr i64, i64* %r1, i32 3
+store i64 %r66, i64* %r68
+%r69 = lshr i512 %r65, 64
+%r70 = trunc i512 %r69 to i64
+%r72 = getelementptr i64, i64* %r1, i32 4
+store i64 %r70, i64* %r72
+%r73 = lshr i512 %r69, 64
+%r74 = trunc i512 %r73 to i64
+%r76 = getelementptr i64, i64* %r1, i32 5
+store i64 %r74, i64* %r76
+%r77 = lshr i512 %r73, 64
+%r78 = trunc i512 %r77 to i64
+%r80 = getelementptr i64, i64* %r1, i32 6
+store i64 %r78, i64* %r80
+%r81 = lshr i512 %r77, 64
+%r82 = trunc i512 %r81 to i64
+%r84 = getelementptr i64, i64* %r1, i32 7
+store i64 %r82, i64* %r84
+ret void
+}
+define void @mcl_fp_add8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = zext i512 %r54 to i576
+%r106 = zext i512 %r104 to i576
+%r107 = add i576 %r105, %r106
+%r108 = trunc i576 %r107 to i512
+%r109 = trunc i512 %r108 to i64
+%r111 = getelementptr i64, i64* %r1, i32 0
+store i64 %r109, i64* %r111
+%r112 = lshr i512 %r108, 64
+%r113 = trunc i512 %r112 to i64
+%r115 = getelementptr i64, i64* %r1, i32 1
+store i64 %r113, i64* %r115
+%r116 = lshr i512 %r112, 64
+%r117 = trunc i512 %r116 to i64
+%r119 = getelementptr i64, i64* %r1, i32 2
+store i64 %r117, i64* %r119
+%r120 = lshr i512 %r116, 64
+%r121 = trunc i512 %r120 to i64
+%r123 = getelementptr i64, i64* %r1, i32 3
+store i64 %r121, i64* %r123
+%r124 = lshr i512 %r120, 64
+%r125 = trunc i512 %r124 to i64
+%r127 = getelementptr i64, i64* %r1, i32 4
+store i64 %r125, i64* %r127
+%r128 = lshr i512 %r124, 64
+%r129 = trunc i512 %r128 to i64
+%r131 = getelementptr i64, i64* %r1, i32 5
+store i64 %r129, i64* %r131
+%r132 = lshr i512 %r128, 64
+%r133 = trunc i512 %r132 to i64
+%r135 = getelementptr i64, i64* %r1, i32 6
+store i64 %r133, i64* %r135
+%r136 = lshr i512 %r132, 64
+%r137 = trunc i512 %r136 to i64
+%r139 = getelementptr i64, i64* %r1, i32 7
+store i64 %r137, i64* %r139
+%r140 = load i64, i64* %r4
+%r141 = zext i64 %r140 to i128
+%r143 = getelementptr i64, i64* %r4, i32 1
+%r144 = load i64, i64* %r143
+%r145 = zext i64 %r144 to i128
+%r146 = shl i128 %r145, 64
+%r147 = or i128 %r141, %r146
+%r148 = zext i128 %r147 to i192
+%r150 = getelementptr i64, i64* %r4, i32 2
+%r151 = load i64, i64* %r150
+%r152 = zext i64 %r151 to i192
+%r153 = shl i192 %r152, 128
+%r154 = or i192 %r148, %r153
+%r155 = zext i192 %r154 to i256
+%r157 = getelementptr i64, i64* %r4, i32 3
+%r158 = load i64, i64* %r157
+%r159 = zext i64 %r158 to i256
+%r160 = shl i256 %r159, 192
+%r161 = or i256 %r155, %r160
+%r162 = zext i256 %r161 to i320
+%r164 = getelementptr i64, i64* %r4, i32 4
+%r165 = load i64, i64* %r164
+%r166 = zext i64 %r165 to i320
+%r167 = shl i320 %r166, 256
+%r168 = or i320 %r162, %r167
+%r169 = zext i320 %r168 to i384
+%r171 = getelementptr i64, i64* %r4, i32 5
+%r172 = load i64, i64* %r171
+%r173 = zext i64 %r172 to i384
+%r174 = shl i384 %r173, 320
+%r175 = or i384 %r169, %r174
+%r176 = zext i384 %r175 to i448
+%r178 = getelementptr i64, i64* %r4, i32 6
+%r179 = load i64, i64* %r178
+%r180 = zext i64 %r179 to i448
+%r181 = shl i448 %r180, 384
+%r182 = or i448 %r176, %r181
+%r183 = zext i448 %r182 to i512
+%r185 = getelementptr i64, i64* %r4, i32 7
+%r186 = load i64, i64* %r185
+%r187 = zext i64 %r186 to i512
+%r188 = shl i512 %r187, 448
+%r189 = or i512 %r183, %r188
+%r190 = zext i512 %r189 to i576
+%r191 = sub i576 %r107, %r190
+%r192 = lshr i576 %r191, 512
+%r193 = trunc i576 %r192 to i1
+br i1%r193, label %carry, label %nocarry
+nocarry:
+%r194 = trunc i576 %r191 to i512
+%r195 = trunc i512 %r194 to i64
+%r197 = getelementptr i64, i64* %r1, i32 0
+store i64 %r195, i64* %r197
+%r198 = lshr i512 %r194, 64
+%r199 = trunc i512 %r198 to i64
+%r201 = getelementptr i64, i64* %r1, i32 1
+store i64 %r199, i64* %r201
+%r202 = lshr i512 %r198, 64
+%r203 = trunc i512 %r202 to i64
+%r205 = getelementptr i64, i64* %r1, i32 2
+store i64 %r203, i64* %r205
+%r206 = lshr i512 %r202, 64
+%r207 = trunc i512 %r206 to i64
+%r209 = getelementptr i64, i64* %r1, i32 3
+store i64 %r207, i64* %r209
+%r210 = lshr i512 %r206, 64
+%r211 = trunc i512 %r210 to i64
+%r213 = getelementptr i64, i64* %r1, i32 4
+store i64 %r211, i64* %r213
+%r214 = lshr i512 %r210, 64
+%r215 = trunc i512 %r214 to i64
+%r217 = getelementptr i64, i64* %r1, i32 5
+store i64 %r215, i64* %r217
+%r218 = lshr i512 %r214, 64
+%r219 = trunc i512 %r218 to i64
+%r221 = getelementptr i64, i64* %r1, i32 6
+store i64 %r219, i64* %r221
+%r222 = lshr i512 %r218, 64
+%r223 = trunc i512 %r222 to i64
+%r225 = getelementptr i64, i64* %r1, i32 7
+store i64 %r223, i64* %r225
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = add i512 %r54, %r104
+%r106 = load i64, i64* %r4
+%r107 = zext i64 %r106 to i128
+%r109 = getelementptr i64, i64* %r4, i32 1
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i128
+%r112 = shl i128 %r111, 64
+%r113 = or i128 %r107, %r112
+%r114 = zext i128 %r113 to i192
+%r116 = getelementptr i64, i64* %r4, i32 2
+%r117 = load i64, i64* %r116
+%r118 = zext i64 %r117 to i192
+%r119 = shl i192 %r118, 128
+%r120 = or i192 %r114, %r119
+%r121 = zext i192 %r120 to i256
+%r123 = getelementptr i64, i64* %r4, i32 3
+%r124 = load i64, i64* %r123
+%r125 = zext i64 %r124 to i256
+%r126 = shl i256 %r125, 192
+%r127 = or i256 %r121, %r126
+%r128 = zext i256 %r127 to i320
+%r130 = getelementptr i64, i64* %r4, i32 4
+%r131 = load i64, i64* %r130
+%r132 = zext i64 %r131 to i320
+%r133 = shl i320 %r132, 256
+%r134 = or i320 %r128, %r133
+%r135 = zext i320 %r134 to i384
+%r137 = getelementptr i64, i64* %r4, i32 5
+%r138 = load i64, i64* %r137
+%r139 = zext i64 %r138 to i384
+%r140 = shl i384 %r139, 320
+%r141 = or i384 %r135, %r140
+%r142 = zext i384 %r141 to i448
+%r144 = getelementptr i64, i64* %r4, i32 6
+%r145 = load i64, i64* %r144
+%r146 = zext i64 %r145 to i448
+%r147 = shl i448 %r146, 384
+%r148 = or i448 %r142, %r147
+%r149 = zext i448 %r148 to i512
+%r151 = getelementptr i64, i64* %r4, i32 7
+%r152 = load i64, i64* %r151
+%r153 = zext i64 %r152 to i512
+%r154 = shl i512 %r153, 448
+%r155 = or i512 %r149, %r154
+%r156 = sub i512 %r105, %r155
+%r157 = lshr i512 %r156, 511
+%r158 = trunc i512 %r157 to i1
+%r159 = select i1 %r158, i512 %r105, i512 %r156
+%r160 = trunc i512 %r159 to i64
+%r162 = getelementptr i64, i64* %r1, i32 0
+store i64 %r160, i64* %r162
+%r163 = lshr i512 %r159, 64
+%r164 = trunc i512 %r163 to i64
+%r166 = getelementptr i64, i64* %r1, i32 1
+store i64 %r164, i64* %r166
+%r167 = lshr i512 %r163, 64
+%r168 = trunc i512 %r167 to i64
+%r170 = getelementptr i64, i64* %r1, i32 2
+store i64 %r168, i64* %r170
+%r171 = lshr i512 %r167, 64
+%r172 = trunc i512 %r171 to i64
+%r174 = getelementptr i64, i64* %r1, i32 3
+store i64 %r172, i64* %r174
+%r175 = lshr i512 %r171, 64
+%r176 = trunc i512 %r175 to i64
+%r178 = getelementptr i64, i64* %r1, i32 4
+store i64 %r176, i64* %r178
+%r179 = lshr i512 %r175, 64
+%r180 = trunc i512 %r179 to i64
+%r182 = getelementptr i64, i64* %r1, i32 5
+store i64 %r180, i64* %r182
+%r183 = lshr i512 %r179, 64
+%r184 = trunc i512 %r183 to i64
+%r186 = getelementptr i64, i64* %r1, i32 6
+store i64 %r184, i64* %r186
+%r187 = lshr i512 %r183, 64
+%r188 = trunc i512 %r187 to i64
+%r190 = getelementptr i64, i64* %r1, i32 7
+store i64 %r188, i64* %r190
+ret void
+}
+define void @mcl_fp_sub8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = zext i512 %r54 to i576
+%r106 = zext i512 %r104 to i576
+%r107 = sub i576 %r105, %r106
+%r108 = trunc i576 %r107 to i512
+%r109 = lshr i576 %r107, 512
+%r110 = trunc i576 %r109 to i1
+%r111 = trunc i512 %r108 to i64
+%r113 = getelementptr i64, i64* %r1, i32 0
+store i64 %r111, i64* %r113
+%r114 = lshr i512 %r108, 64
+%r115 = trunc i512 %r114 to i64
+%r117 = getelementptr i64, i64* %r1, i32 1
+store i64 %r115, i64* %r117
+%r118 = lshr i512 %r114, 64
+%r119 = trunc i512 %r118 to i64
+%r121 = getelementptr i64, i64* %r1, i32 2
+store i64 %r119, i64* %r121
+%r122 = lshr i512 %r118, 64
+%r123 = trunc i512 %r122 to i64
+%r125 = getelementptr i64, i64* %r1, i32 3
+store i64 %r123, i64* %r125
+%r126 = lshr i512 %r122, 64
+%r127 = trunc i512 %r126 to i64
+%r129 = getelementptr i64, i64* %r1, i32 4
+store i64 %r127, i64* %r129
+%r130 = lshr i512 %r126, 64
+%r131 = trunc i512 %r130 to i64
+%r133 = getelementptr i64, i64* %r1, i32 5
+store i64 %r131, i64* %r133
+%r134 = lshr i512 %r130, 64
+%r135 = trunc i512 %r134 to i64
+%r137 = getelementptr i64, i64* %r1, i32 6
+store i64 %r135, i64* %r137
+%r138 = lshr i512 %r134, 64
+%r139 = trunc i512 %r138 to i64
+%r141 = getelementptr i64, i64* %r1, i32 7
+store i64 %r139, i64* %r141
+br i1%r110, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r142 = load i64, i64* %r4
+%r143 = zext i64 %r142 to i128
+%r145 = getelementptr i64, i64* %r4, i32 1
+%r146 = load i64, i64* %r145
+%r147 = zext i64 %r146 to i128
+%r148 = shl i128 %r147, 64
+%r149 = or i128 %r143, %r148
+%r150 = zext i128 %r149 to i192
+%r152 = getelementptr i64, i64* %r4, i32 2
+%r153 = load i64, i64* %r152
+%r154 = zext i64 %r153 to i192
+%r155 = shl i192 %r154, 128
+%r156 = or i192 %r150, %r155
+%r157 = zext i192 %r156 to i256
+%r159 = getelementptr i64, i64* %r4, i32 3
+%r160 = load i64, i64* %r159
+%r161 = zext i64 %r160 to i256
+%r162 = shl i256 %r161, 192
+%r163 = or i256 %r157, %r162
+%r164 = zext i256 %r163 to i320
+%r166 = getelementptr i64, i64* %r4, i32 4
+%r167 = load i64, i64* %r166
+%r168 = zext i64 %r167 to i320
+%r169 = shl i320 %r168, 256
+%r170 = or i320 %r164, %r169
+%r171 = zext i320 %r170 to i384
+%r173 = getelementptr i64, i64* %r4, i32 5
+%r174 = load i64, i64* %r173
+%r175 = zext i64 %r174 to i384
+%r176 = shl i384 %r175, 320
+%r177 = or i384 %r171, %r176
+%r178 = zext i384 %r177 to i448
+%r180 = getelementptr i64, i64* %r4, i32 6
+%r181 = load i64, i64* %r180
+%r182 = zext i64 %r181 to i448
+%r183 = shl i448 %r182, 384
+%r184 = or i448 %r178, %r183
+%r185 = zext i448 %r184 to i512
+%r187 = getelementptr i64, i64* %r4, i32 7
+%r188 = load i64, i64* %r187
+%r189 = zext i64 %r188 to i512
+%r190 = shl i512 %r189, 448
+%r191 = or i512 %r185, %r190
+%r192 = add i512 %r108, %r191
+%r193 = trunc i512 %r192 to i64
+%r195 = getelementptr i64, i64* %r1, i32 0
+store i64 %r193, i64* %r195
+%r196 = lshr i512 %r192, 64
+%r197 = trunc i512 %r196 to i64
+%r199 = getelementptr i64, i64* %r1, i32 1
+store i64 %r197, i64* %r199
+%r200 = lshr i512 %r196, 64
+%r201 = trunc i512 %r200 to i64
+%r203 = getelementptr i64, i64* %r1, i32 2
+store i64 %r201, i64* %r203
+%r204 = lshr i512 %r200, 64
+%r205 = trunc i512 %r204 to i64
+%r207 = getelementptr i64, i64* %r1, i32 3
+store i64 %r205, i64* %r207
+%r208 = lshr i512 %r204, 64
+%r209 = trunc i512 %r208 to i64
+%r211 = getelementptr i64, i64* %r1, i32 4
+store i64 %r209, i64* %r211
+%r212 = lshr i512 %r208, 64
+%r213 = trunc i512 %r212 to i64
+%r215 = getelementptr i64, i64* %r1, i32 5
+store i64 %r213, i64* %r215
+%r216 = lshr i512 %r212, 64
+%r217 = trunc i512 %r216 to i64
+%r219 = getelementptr i64, i64* %r1, i32 6
+store i64 %r217, i64* %r219
+%r220 = lshr i512 %r216, 64
+%r221 = trunc i512 %r220 to i64
+%r223 = getelementptr i64, i64* %r1, i32 7
+store i64 %r221, i64* %r223
+ret void
+}
+define void @mcl_fp_subNF8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = sub i512 %r54, %r104
+%r106 = lshr i512 %r105, 511
+%r107 = trunc i512 %r106 to i1
+%r108 = load i64, i64* %r4
+%r109 = zext i64 %r108 to i128
+%r111 = getelementptr i64, i64* %r4, i32 1
+%r112 = load i64, i64* %r111
+%r113 = zext i64 %r112 to i128
+%r114 = shl i128 %r113, 64
+%r115 = or i128 %r109, %r114
+%r116 = zext i128 %r115 to i192
+%r118 = getelementptr i64, i64* %r4, i32 2
+%r119 = load i64, i64* %r118
+%r120 = zext i64 %r119 to i192
+%r121 = shl i192 %r120, 128
+%r122 = or i192 %r116, %r121
+%r123 = zext i192 %r122 to i256
+%r125 = getelementptr i64, i64* %r4, i32 3
+%r126 = load i64, i64* %r125
+%r127 = zext i64 %r126 to i256
+%r128 = shl i256 %r127, 192
+%r129 = or i256 %r123, %r128
+%r130 = zext i256 %r129 to i320
+%r132 = getelementptr i64, i64* %r4, i32 4
+%r133 = load i64, i64* %r132
+%r134 = zext i64 %r133 to i320
+%r135 = shl i320 %r134, 256
+%r136 = or i320 %r130, %r135
+%r137 = zext i320 %r136 to i384
+%r139 = getelementptr i64, i64* %r4, i32 5
+%r140 = load i64, i64* %r139
+%r141 = zext i64 %r140 to i384
+%r142 = shl i384 %r141, 320
+%r143 = or i384 %r137, %r142
+%r144 = zext i384 %r143 to i448
+%r146 = getelementptr i64, i64* %r4, i32 6
+%r147 = load i64, i64* %r146
+%r148 = zext i64 %r147 to i448
+%r149 = shl i448 %r148, 384
+%r150 = or i448 %r144, %r149
+%r151 = zext i448 %r150 to i512
+%r153 = getelementptr i64, i64* %r4, i32 7
+%r154 = load i64, i64* %r153
+%r155 = zext i64 %r154 to i512
+%r156 = shl i512 %r155, 448
+%r157 = or i512 %r151, %r156
+%r159 = select i1 %r107, i512 %r157, i512 0
+%r160 = add i512 %r105, %r159
+%r161 = trunc i512 %r160 to i64
+%r163 = getelementptr i64, i64* %r1, i32 0
+store i64 %r161, i64* %r163
+%r164 = lshr i512 %r160, 64
+%r165 = trunc i512 %r164 to i64
+%r167 = getelementptr i64, i64* %r1, i32 1
+store i64 %r165, i64* %r167
+%r168 = lshr i512 %r164, 64
+%r169 = trunc i512 %r168 to i64
+%r171 = getelementptr i64, i64* %r1, i32 2
+store i64 %r169, i64* %r171
+%r172 = lshr i512 %r168, 64
+%r173 = trunc i512 %r172 to i64
+%r175 = getelementptr i64, i64* %r1, i32 3
+store i64 %r173, i64* %r175
+%r176 = lshr i512 %r172, 64
+%r177 = trunc i512 %r176 to i64
+%r179 = getelementptr i64, i64* %r1, i32 4
+store i64 %r177, i64* %r179
+%r180 = lshr i512 %r176, 64
+%r181 = trunc i512 %r180 to i64
+%r183 = getelementptr i64, i64* %r1, i32 5
+store i64 %r181, i64* %r183
+%r184 = lshr i512 %r180, 64
+%r185 = trunc i512 %r184 to i64
+%r187 = getelementptr i64, i64* %r1, i32 6
+store i64 %r185, i64* %r187
+%r188 = lshr i512 %r184, 64
+%r189 = trunc i512 %r188 to i64
+%r191 = getelementptr i64, i64* %r1, i32 7
+store i64 %r189, i64* %r191
+ret void
+}
+define void @mcl_fpDbl_add8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r64 = getelementptr i64, i64* %r2, i32 9
+%r65 = load i64, i64* %r64
+%r66 = zext i64 %r65 to i640
+%r67 = shl i640 %r66, 576
+%r68 = or i640 %r62, %r67
+%r69 = zext i640 %r68 to i704
+%r71 = getelementptr i64, i64* %r2, i32 10
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i704
+%r74 = shl i704 %r73, 640
+%r75 = or i704 %r69, %r74
+%r76 = zext i704 %r75 to i768
+%r78 = getelementptr i64, i64* %r2, i32 11
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i768
+%r81 = shl i768 %r80, 704
+%r82 = or i768 %r76, %r81
+%r83 = zext i768 %r82 to i832
+%r85 = getelementptr i64, i64* %r2, i32 12
+%r86 = load i64, i64* %r85
+%r87 = zext i64 %r86 to i832
+%r88 = shl i832 %r87, 768
+%r89 = or i832 %r83, %r88
+%r90 = zext i832 %r89 to i896
+%r92 = getelementptr i64, i64* %r2, i32 13
+%r93 = load i64, i64* %r92
+%r94 = zext i64 %r93 to i896
+%r95 = shl i896 %r94, 832
+%r96 = or i896 %r90, %r95
+%r97 = zext i896 %r96 to i960
+%r99 = getelementptr i64, i64* %r2, i32 14
+%r100 = load i64, i64* %r99
+%r101 = zext i64 %r100 to i960
+%r102 = shl i960 %r101, 896
+%r103 = or i960 %r97, %r102
+%r104 = zext i960 %r103 to i1024
+%r106 = getelementptr i64, i64* %r2, i32 15
+%r107 = load i64, i64* %r106
+%r108 = zext i64 %r107 to i1024
+%r109 = shl i1024 %r108, 960
+%r110 = or i1024 %r104, %r109
+%r111 = load i64, i64* %r3
+%r112 = zext i64 %r111 to i128
+%r114 = getelementptr i64, i64* %r3, i32 1
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i128
+%r117 = shl i128 %r116, 64
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i192
+%r121 = getelementptr i64, i64* %r3, i32 2
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i192
+%r124 = shl i192 %r123, 128
+%r125 = or i192 %r119, %r124
+%r126 = zext i192 %r125 to i256
+%r128 = getelementptr i64, i64* %r3, i32 3
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i256
+%r131 = shl i256 %r130, 192
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i320
+%r135 = getelementptr i64, i64* %r3, i32 4
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i320
+%r138 = shl i320 %r137, 256
+%r139 = or i320 %r133, %r138
+%r140 = zext i320 %r139 to i384
+%r142 = getelementptr i64, i64* %r3, i32 5
+%r143 = load i64, i64* %r142
+%r144 = zext i64 %r143 to i384
+%r145 = shl i384 %r144, 320
+%r146 = or i384 %r140, %r145
+%r147 = zext i384 %r146 to i448
+%r149 = getelementptr i64, i64* %r3, i32 6
+%r150 = load i64, i64* %r149
+%r151 = zext i64 %r150 to i448
+%r152 = shl i448 %r151, 384
+%r153 = or i448 %r147, %r152
+%r154 = zext i448 %r153 to i512
+%r156 = getelementptr i64, i64* %r3, i32 7
+%r157 = load i64, i64* %r156
+%r158 = zext i64 %r157 to i512
+%r159 = shl i512 %r158, 448
+%r160 = or i512 %r154, %r159
+%r161 = zext i512 %r160 to i576
+%r163 = getelementptr i64, i64* %r3, i32 8
+%r164 = load i64, i64* %r163
+%r165 = zext i64 %r164 to i576
+%r166 = shl i576 %r165, 512
+%r167 = or i576 %r161, %r166
+%r168 = zext i576 %r167 to i640
+%r170 = getelementptr i64, i64* %r3, i32 9
+%r171 = load i64, i64* %r170
+%r172 = zext i64 %r171 to i640
+%r173 = shl i640 %r172, 576
+%r174 = or i640 %r168, %r173
+%r175 = zext i640 %r174 to i704
+%r177 = getelementptr i64, i64* %r3, i32 10
+%r178 = load i64, i64* %r177
+%r179 = zext i64 %r178 to i704
+%r180 = shl i704 %r179, 640
+%r181 = or i704 %r175, %r180
+%r182 = zext i704 %r181 to i768
+%r184 = getelementptr i64, i64* %r3, i32 11
+%r185 = load i64, i64* %r184
+%r186 = zext i64 %r185 to i768
+%r187 = shl i768 %r186, 704
+%r188 = or i768 %r182, %r187
+%r189 = zext i768 %r188 to i832
+%r191 = getelementptr i64, i64* %r3, i32 12
+%r192 = load i64, i64* %r191
+%r193 = zext i64 %r192 to i832
+%r194 = shl i832 %r193, 768
+%r195 = or i832 %r189, %r194
+%r196 = zext i832 %r195 to i896
+%r198 = getelementptr i64, i64* %r3, i32 13
+%r199 = load i64, i64* %r198
+%r200 = zext i64 %r199 to i896
+%r201 = shl i896 %r200, 832
+%r202 = or i896 %r196, %r201
+%r203 = zext i896 %r202 to i960
+%r205 = getelementptr i64, i64* %r3, i32 14
+%r206 = load i64, i64* %r205
+%r207 = zext i64 %r206 to i960
+%r208 = shl i960 %r207, 896
+%r209 = or i960 %r203, %r208
+%r210 = zext i960 %r209 to i1024
+%r212 = getelementptr i64, i64* %r3, i32 15
+%r213 = load i64, i64* %r212
+%r214 = zext i64 %r213 to i1024
+%r215 = shl i1024 %r214, 960
+%r216 = or i1024 %r210, %r215
+%r217 = zext i1024 %r110 to i1088
+%r218 = zext i1024 %r216 to i1088
+%r219 = add i1088 %r217, %r218
+%r220 = trunc i1088 %r219 to i512
+%r221 = trunc i512 %r220 to i64
+%r223 = getelementptr i64, i64* %r1, i32 0
+store i64 %r221, i64* %r223
+%r224 = lshr i512 %r220, 64
+%r225 = trunc i512 %r224 to i64
+%r227 = getelementptr i64, i64* %r1, i32 1
+store i64 %r225, i64* %r227
+%r228 = lshr i512 %r224, 64
+%r229 = trunc i512 %r228 to i64
+%r231 = getelementptr i64, i64* %r1, i32 2
+store i64 %r229, i64* %r231
+%r232 = lshr i512 %r228, 64
+%r233 = trunc i512 %r232 to i64
+%r235 = getelementptr i64, i64* %r1, i32 3
+store i64 %r233, i64* %r235
+%r236 = lshr i512 %r232, 64
+%r237 = trunc i512 %r236 to i64
+%r239 = getelementptr i64, i64* %r1, i32 4
+store i64 %r237, i64* %r239
+%r240 = lshr i512 %r236, 64
+%r241 = trunc i512 %r240 to i64
+%r243 = getelementptr i64, i64* %r1, i32 5
+store i64 %r241, i64* %r243
+%r244 = lshr i512 %r240, 64
+%r245 = trunc i512 %r244 to i64
+%r247 = getelementptr i64, i64* %r1, i32 6
+store i64 %r245, i64* %r247
+%r248 = lshr i512 %r244, 64
+%r249 = trunc i512 %r248 to i64
+%r251 = getelementptr i64, i64* %r1, i32 7
+store i64 %r249, i64* %r251
+%r252 = lshr i1088 %r219, 512
+%r253 = trunc i1088 %r252 to i576
+%r254 = load i64, i64* %r4
+%r255 = zext i64 %r254 to i128
+%r257 = getelementptr i64, i64* %r4, i32 1
+%r258 = load i64, i64* %r257
+%r259 = zext i64 %r258 to i128
+%r260 = shl i128 %r259, 64
+%r261 = or i128 %r255, %r260
+%r262 = zext i128 %r261 to i192
+%r264 = getelementptr i64, i64* %r4, i32 2
+%r265 = load i64, i64* %r264
+%r266 = zext i64 %r265 to i192
+%r267 = shl i192 %r266, 128
+%r268 = or i192 %r262, %r267
+%r269 = zext i192 %r268 to i256
+%r271 = getelementptr i64, i64* %r4, i32 3
+%r272 = load i64, i64* %r271
+%r273 = zext i64 %r272 to i256
+%r274 = shl i256 %r273, 192
+%r275 = or i256 %r269, %r274
+%r276 = zext i256 %r275 to i320
+%r278 = getelementptr i64, i64* %r4, i32 4
+%r279 = load i64, i64* %r278
+%r280 = zext i64 %r279 to i320
+%r281 = shl i320 %r280, 256
+%r282 = or i320 %r276, %r281
+%r283 = zext i320 %r282 to i384
+%r285 = getelementptr i64, i64* %r4, i32 5
+%r286 = load i64, i64* %r285
+%r287 = zext i64 %r286 to i384
+%r288 = shl i384 %r287, 320
+%r289 = or i384 %r283, %r288
+%r290 = zext i384 %r289 to i448
+%r292 = getelementptr i64, i64* %r4, i32 6
+%r293 = load i64, i64* %r292
+%r294 = zext i64 %r293 to i448
+%r295 = shl i448 %r294, 384
+%r296 = or i448 %r290, %r295
+%r297 = zext i448 %r296 to i512
+%r299 = getelementptr i64, i64* %r4, i32 7
+%r300 = load i64, i64* %r299
+%r301 = zext i64 %r300 to i512
+%r302 = shl i512 %r301, 448
+%r303 = or i512 %r297, %r302
+%r304 = zext i512 %r303 to i576
+%r305 = sub i576 %r253, %r304
+%r306 = lshr i576 %r305, 512
+%r307 = trunc i576 %r306 to i1
+%r308 = select i1 %r307, i576 %r253, i576 %r305
+%r309 = trunc i576 %r308 to i512
+%r311 = getelementptr i64, i64* %r1, i32 8
+%r312 = trunc i512 %r309 to i64
+%r314 = getelementptr i64, i64* %r311, i32 0
+store i64 %r312, i64* %r314
+%r315 = lshr i512 %r309, 64
+%r316 = trunc i512 %r315 to i64
+%r318 = getelementptr i64, i64* %r311, i32 1
+store i64 %r316, i64* %r318
+%r319 = lshr i512 %r315, 64
+%r320 = trunc i512 %r319 to i64
+%r322 = getelementptr i64, i64* %r311, i32 2
+store i64 %r320, i64* %r322
+%r323 = lshr i512 %r319, 64
+%r324 = trunc i512 %r323 to i64
+%r326 = getelementptr i64, i64* %r311, i32 3
+store i64 %r324, i64* %r326
+%r327 = lshr i512 %r323, 64
+%r328 = trunc i512 %r327 to i64
+%r330 = getelementptr i64, i64* %r311, i32 4
+store i64 %r328, i64* %r330
+%r331 = lshr i512 %r327, 64
+%r332 = trunc i512 %r331 to i64
+%r334 = getelementptr i64, i64* %r311, i32 5
+store i64 %r332, i64* %r334
+%r335 = lshr i512 %r331, 64
+%r336 = trunc i512 %r335 to i64
+%r338 = getelementptr i64, i64* %r311, i32 6
+store i64 %r336, i64* %r338
+%r339 = lshr i512 %r335, 64
+%r340 = trunc i512 %r339 to i64
+%r342 = getelementptr i64, i64* %r311, i32 7
+store i64 %r340, i64* %r342
+ret void
+}
+define void @mcl_fpDbl_sub8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r64 = getelementptr i64, i64* %r2, i32 9
+%r65 = load i64, i64* %r64
+%r66 = zext i64 %r65 to i640
+%r67 = shl i640 %r66, 576
+%r68 = or i640 %r62, %r67
+%r69 = zext i640 %r68 to i704
+%r71 = getelementptr i64, i64* %r2, i32 10
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i704
+%r74 = shl i704 %r73, 640
+%r75 = or i704 %r69, %r74
+%r76 = zext i704 %r75 to i768
+%r78 = getelementptr i64, i64* %r2, i32 11
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i768
+%r81 = shl i768 %r80, 704
+%r82 = or i768 %r76, %r81
+%r83 = zext i768 %r82 to i832
+%r85 = getelementptr i64, i64* %r2, i32 12
+%r86 = load i64, i64* %r85
+%r87 = zext i64 %r86 to i832
+%r88 = shl i832 %r87, 768
+%r89 = or i832 %r83, %r88
+%r90 = zext i832 %r89 to i896
+%r92 = getelementptr i64, i64* %r2, i32 13
+%r93 = load i64, i64* %r92
+%r94 = zext i64 %r93 to i896
+%r95 = shl i896 %r94, 832
+%r96 = or i896 %r90, %r95
+%r97 = zext i896 %r96 to i960
+%r99 = getelementptr i64, i64* %r2, i32 14
+%r100 = load i64, i64* %r99
+%r101 = zext i64 %r100 to i960
+%r102 = shl i960 %r101, 896
+%r103 = or i960 %r97, %r102
+%r104 = zext i960 %r103 to i1024
+%r106 = getelementptr i64, i64* %r2, i32 15
+%r107 = load i64, i64* %r106
+%r108 = zext i64 %r107 to i1024
+%r109 = shl i1024 %r108, 960
+%r110 = or i1024 %r104, %r109
+%r111 = load i64, i64* %r3
+%r112 = zext i64 %r111 to i128
+%r114 = getelementptr i64, i64* %r3, i32 1
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i128
+%r117 = shl i128 %r116, 64
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i192
+%r121 = getelementptr i64, i64* %r3, i32 2
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i192
+%r124 = shl i192 %r123, 128
+%r125 = or i192 %r119, %r124
+%r126 = zext i192 %r125 to i256
+%r128 = getelementptr i64, i64* %r3, i32 3
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i256
+%r131 = shl i256 %r130, 192
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i320
+%r135 = getelementptr i64, i64* %r3, i32 4
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i320
+%r138 = shl i320 %r137, 256
+%r139 = or i320 %r133, %r138
+%r140 = zext i320 %r139 to i384
+%r142 = getelementptr i64, i64* %r3, i32 5
+%r143 = load i64, i64* %r142
+%r144 = zext i64 %r143 to i384
+%r145 = shl i384 %r144, 320
+%r146 = or i384 %r140, %r145
+%r147 = zext i384 %r146 to i448
+%r149 = getelementptr i64, i64* %r3, i32 6
+%r150 = load i64, i64* %r149
+%r151 = zext i64 %r150 to i448
+%r152 = shl i448 %r151, 384
+%r153 = or i448 %r147, %r152
+%r154 = zext i448 %r153 to i512
+%r156 = getelementptr i64, i64* %r3, i32 7
+%r157 = load i64, i64* %r156
+%r158 = zext i64 %r157 to i512
+%r159 = shl i512 %r158, 448
+%r160 = or i512 %r154, %r159
+%r161 = zext i512 %r160 to i576
+%r163 = getelementptr i64, i64* %r3, i32 8
+%r164 = load i64, i64* %r163
+%r165 = zext i64 %r164 to i576
+%r166 = shl i576 %r165, 512
+%r167 = or i576 %r161, %r166
+%r168 = zext i576 %r167 to i640
+%r170 = getelementptr i64, i64* %r3, i32 9
+%r171 = load i64, i64* %r170
+%r172 = zext i64 %r171 to i640
+%r173 = shl i640 %r172, 576
+%r174 = or i640 %r168, %r173
+%r175 = zext i640 %r174 to i704
+%r177 = getelementptr i64, i64* %r3, i32 10
+%r178 = load i64, i64* %r177
+%r179 = zext i64 %r178 to i704
+%r180 = shl i704 %r179, 640
+%r181 = or i704 %r175, %r180
+%r182 = zext i704 %r181 to i768
+%r184 = getelementptr i64, i64* %r3, i32 11
+%r185 = load i64, i64* %r184
+%r186 = zext i64 %r185 to i768
+%r187 = shl i768 %r186, 704
+%r188 = or i768 %r182, %r187
+%r189 = zext i768 %r188 to i832
+%r191 = getelementptr i64, i64* %r3, i32 12
+%r192 = load i64, i64* %r191
+%r193 = zext i64 %r192 to i832
+%r194 = shl i832 %r193, 768
+%r195 = or i832 %r189, %r194
+%r196 = zext i832 %r195 to i896
+%r198 = getelementptr i64, i64* %r3, i32 13
+%r199 = load i64, i64* %r198
+%r200 = zext i64 %r199 to i896
+%r201 = shl i896 %r200, 832
+%r202 = or i896 %r196, %r201
+%r203 = zext i896 %r202 to i960
+%r205 = getelementptr i64, i64* %r3, i32 14
+%r206 = load i64, i64* %r205
+%r207 = zext i64 %r206 to i960
+%r208 = shl i960 %r207, 896
+%r209 = or i960 %r203, %r208
+%r210 = zext i960 %r209 to i1024
+%r212 = getelementptr i64, i64* %r3, i32 15
+%r213 = load i64, i64* %r212
+%r214 = zext i64 %r213 to i1024
+%r215 = shl i1024 %r214, 960
+%r216 = or i1024 %r210, %r215
+%r217 = zext i1024 %r110 to i1088
+%r218 = zext i1024 %r216 to i1088
+%r219 = sub i1088 %r217, %r218
+%r220 = trunc i1088 %r219 to i512
+%r221 = trunc i512 %r220 to i64
+%r223 = getelementptr i64, i64* %r1, i32 0
+store i64 %r221, i64* %r223
+%r224 = lshr i512 %r220, 64
+%r225 = trunc i512 %r224 to i64
+%r227 = getelementptr i64, i64* %r1, i32 1
+store i64 %r225, i64* %r227
+%r228 = lshr i512 %r224, 64
+%r229 = trunc i512 %r228 to i64
+%r231 = getelementptr i64, i64* %r1, i32 2
+store i64 %r229, i64* %r231
+%r232 = lshr i512 %r228, 64
+%r233 = trunc i512 %r232 to i64
+%r235 = getelementptr i64, i64* %r1, i32 3
+store i64 %r233, i64* %r235
+%r236 = lshr i512 %r232, 64
+%r237 = trunc i512 %r236 to i64
+%r239 = getelementptr i64, i64* %r1, i32 4
+store i64 %r237, i64* %r239
+%r240 = lshr i512 %r236, 64
+%r241 = trunc i512 %r240 to i64
+%r243 = getelementptr i64, i64* %r1, i32 5
+store i64 %r241, i64* %r243
+%r244 = lshr i512 %r240, 64
+%r245 = trunc i512 %r244 to i64
+%r247 = getelementptr i64, i64* %r1, i32 6
+store i64 %r245, i64* %r247
+%r248 = lshr i512 %r244, 64
+%r249 = trunc i512 %r248 to i64
+%r251 = getelementptr i64, i64* %r1, i32 7
+store i64 %r249, i64* %r251
+%r252 = lshr i1088 %r219, 512
+%r253 = trunc i1088 %r252 to i512
+%r254 = lshr i1088 %r219, 1024
+%r255 = trunc i1088 %r254 to i1
+%r256 = load i64, i64* %r4
+%r257 = zext i64 %r256 to i128
+%r259 = getelementptr i64, i64* %r4, i32 1
+%r260 = load i64, i64* %r259
+%r261 = zext i64 %r260 to i128
+%r262 = shl i128 %r261, 64
+%r263 = or i128 %r257, %r262
+%r264 = zext i128 %r263 to i192
+%r266 = getelementptr i64, i64* %r4, i32 2
+%r267 = load i64, i64* %r266
+%r268 = zext i64 %r267 to i192
+%r269 = shl i192 %r268, 128
+%r270 = or i192 %r264, %r269
+%r271 = zext i192 %r270 to i256
+%r273 = getelementptr i64, i64* %r4, i32 3
+%r274 = load i64, i64* %r273
+%r275 = zext i64 %r274 to i256
+%r276 = shl i256 %r275, 192
+%r277 = or i256 %r271, %r276
+%r278 = zext i256 %r277 to i320
+%r280 = getelementptr i64, i64* %r4, i32 4
+%r281 = load i64, i64* %r280
+%r282 = zext i64 %r281 to i320
+%r283 = shl i320 %r282, 256
+%r284 = or i320 %r278, %r283
+%r285 = zext i320 %r284 to i384
+%r287 = getelementptr i64, i64* %r4, i32 5
+%r288 = load i64, i64* %r287
+%r289 = zext i64 %r288 to i384
+%r290 = shl i384 %r289, 320
+%r291 = or i384 %r285, %r290
+%r292 = zext i384 %r291 to i448
+%r294 = getelementptr i64, i64* %r4, i32 6
+%r295 = load i64, i64* %r294
+%r296 = zext i64 %r295 to i448
+%r297 = shl i448 %r296, 384
+%r298 = or i448 %r292, %r297
+%r299 = zext i448 %r298 to i512
+%r301 = getelementptr i64, i64* %r4, i32 7
+%r302 = load i64, i64* %r301
+%r303 = zext i64 %r302 to i512
+%r304 = shl i512 %r303, 448
+%r305 = or i512 %r299, %r304
+%r307 = select i1 %r255, i512 %r305, i512 0
+%r308 = add i512 %r253, %r307
+%r310 = getelementptr i64, i64* %r1, i32 8
+%r311 = trunc i512 %r308 to i64
+%r313 = getelementptr i64, i64* %r310, i32 0
+store i64 %r311, i64* %r313
+%r314 = lshr i512 %r308, 64
+%r315 = trunc i512 %r314 to i64
+%r317 = getelementptr i64, i64* %r310, i32 1
+store i64 %r315, i64* %r317
+%r318 = lshr i512 %r314, 64
+%r319 = trunc i512 %r318 to i64
+%r321 = getelementptr i64, i64* %r310, i32 2
+store i64 %r319, i64* %r321
+%r322 = lshr i512 %r318, 64
+%r323 = trunc i512 %r322 to i64
+%r325 = getelementptr i64, i64* %r310, i32 3
+store i64 %r323, i64* %r325
+%r326 = lshr i512 %r322, 64
+%r327 = trunc i512 %r326 to i64
+%r329 = getelementptr i64, i64* %r310, i32 4
+store i64 %r327, i64* %r329
+%r330 = lshr i512 %r326, 64
+%r331 = trunc i512 %r330 to i64
+%r333 = getelementptr i64, i64* %r310, i32 5
+store i64 %r331, i64* %r333
+%r334 = lshr i512 %r330, 64
+%r335 = trunc i512 %r334 to i64
+%r337 = getelementptr i64, i64* %r310, i32 6
+store i64 %r335, i64* %r337
+%r338 = lshr i512 %r334, 64
+%r339 = trunc i512 %r338 to i64
+%r341 = getelementptr i64, i64* %r310, i32 7
+store i64 %r339, i64* %r341
+ret void
+}
+define i640 @mulPv576x64(i64* noalias  %r2, i64 %r3)
+{
+%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
+%r6 = trunc i128 %r5 to i64
+%r7 = call i64 @extractHigh64(i128 %r5)
+%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
+%r10 = trunc i128 %r9 to i64
+%r11 = call i64 @extractHigh64(i128 %r9)
+%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
+%r14 = trunc i128 %r13 to i64
+%r15 = call i64 @extractHigh64(i128 %r13)
+%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
+%r18 = trunc i128 %r17 to i64
+%r19 = call i64 @extractHigh64(i128 %r17)
+%r21 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 4)
+%r22 = trunc i128 %r21 to i64
+%r23 = call i64 @extractHigh64(i128 %r21)
+%r25 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 5)
+%r26 = trunc i128 %r25 to i64
+%r27 = call i64 @extractHigh64(i128 %r25)
+%r29 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 6)
+%r30 = trunc i128 %r29 to i64
+%r31 = call i64 @extractHigh64(i128 %r29)
+%r33 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 7)
+%r34 = trunc i128 %r33 to i64
+%r35 = call i64 @extractHigh64(i128 %r33)
+%r37 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 8)
+%r38 = trunc i128 %r37 to i64
+%r39 = call i64 @extractHigh64(i128 %r37)
+%r40 = zext i64 %r6 to i128
+%r41 = zext i64 %r10 to i128
+%r42 = shl i128 %r41, 64
+%r43 = or i128 %r40, %r42
+%r44 = zext i128 %r43 to i192
+%r45 = zext i64 %r14 to i192
+%r46 = shl i192 %r45, 128
+%r47 = or i192 %r44, %r46
+%r48 = zext i192 %r47 to i256
+%r49 = zext i64 %r18 to i256
+%r50 = shl i256 %r49, 192
+%r51 = or i256 %r48, %r50
+%r52 = zext i256 %r51 to i320
+%r53 = zext i64 %r22 to i320
+%r54 = shl i320 %r53, 256
+%r55 = or i320 %r52, %r54
+%r56 = zext i320 %r55 to i384
+%r57 = zext i64 %r26 to i384
+%r58 = shl i384 %r57, 320
+%r59 = or i384 %r56, %r58
+%r60 = zext i384 %r59 to i448
+%r61 = zext i64 %r30 to i448
+%r62 = shl i448 %r61, 384
+%r63 = or i448 %r60, %r62
+%r64 = zext i448 %r63 to i512
+%r65 = zext i64 %r34 to i512
+%r66 = shl i512 %r65, 448
+%r67 = or i512 %r64, %r66
+%r68 = zext i512 %r67 to i576
+%r69 = zext i64 %r38 to i576
+%r70 = shl i576 %r69, 512
+%r71 = or i576 %r68, %r70
+%r72 = zext i64 %r7 to i128
+%r73 = zext i64 %r11 to i128
+%r74 = shl i128 %r73, 64
+%r75 = or i128 %r72, %r74
+%r76 = zext i128 %r75 to i192
+%r77 = zext i64 %r15 to i192
+%r78 = shl i192 %r77, 128
+%r79 = or i192 %r76, %r78
+%r80 = zext i192 %r79 to i256
+%r81 = zext i64 %r19 to i256
+%r82 = shl i256 %r81, 192
+%r83 = or i256 %r80, %r82
+%r84 = zext i256 %r83 to i320
+%r85 = zext i64 %r23 to i320
+%r86 = shl i320 %r85, 256
+%r87 = or i320 %r84, %r86
+%r88 = zext i320 %r87 to i384
+%r89 = zext i64 %r27 to i384
+%r90 = shl i384 %r89, 320
+%r91 = or i384 %r88, %r90
+%r92 = zext i384 %r91 to i448
+%r93 = zext i64 %r31 to i448
+%r94 = shl i448 %r93, 384
+%r95 = or i448 %r92, %r94
+%r96 = zext i448 %r95 to i512
+%r97 = zext i64 %r35 to i512
+%r98 = shl i512 %r97, 448
+%r99 = or i512 %r96, %r98
+%r100 = zext i512 %r99 to i576
+%r101 = zext i64 %r39 to i576
+%r102 = shl i576 %r101, 512
+%r103 = or i576 %r100, %r102
+%r104 = zext i576 %r71 to i640
+%r105 = zext i576 %r103 to i640
+%r106 = shl i640 %r105, 64
+%r107 = add i640 %r104, %r106
+ret i640 %r107
+}
+define void @mcl_fp_mulUnitPre9L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+{
+%r4 = call i640 @mulPv576x64(i64* %r2, i64 %r3)
+%r5 = trunc i640 %r4 to i64
+%r7 = getelementptr i64, i64* %r1, i32 0
+store i64 %r5, i64* %r7
+%r8 = lshr i640 %r4, 64
+%r9 = trunc i640 %r8 to i64
+%r11 = getelementptr i64, i64* %r1, i32 1
+store i64 %r9, i64* %r11
+%r12 = lshr i640 %r8, 64
+%r13 = trunc i640 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 2
+store i64 %r13, i64* %r15
+%r16 = lshr i640 %r12, 64
+%r17 = trunc i640 %r16 to i64
+%r19 = getelementptr i64, i64* %r1, i32 3
+store i64 %r17, i64* %r19
+%r20 = lshr i640 %r16, 64
+%r21 = trunc i640 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 4
+store i64 %r21, i64* %r23
+%r24 = lshr i640 %r20, 64
+%r25 = trunc i640 %r24 to i64
+%r27 = getelementptr i64, i64* %r1, i32 5
+store i64 %r25, i64* %r27
+%r28 = lshr i640 %r24, 64
+%r29 = trunc i640 %r28 to i64
+%r31 = getelementptr i64, i64* %r1, i32 6
+store i64 %r29, i64* %r31
+%r32 = lshr i640 %r28, 64
+%r33 = trunc i640 %r32 to i64
+%r35 = getelementptr i64, i64* %r1, i32 7
+store i64 %r33, i64* %r35
+%r36 = lshr i640 %r32, 64
+%r37 = trunc i640 %r36 to i64
+%r39 = getelementptr i64, i64* %r1, i32 8
+store i64 %r37, i64* %r39
+%r40 = lshr i640 %r36, 64
+%r41 = trunc i640 %r40 to i64
+%r43 = getelementptr i64, i64* %r1, i32 9
+store i64 %r41, i64* %r43
+ret void
+}
+define void @mcl_fpDbl_mulPre9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r4 = load i64, i64* %r3
+%r5 = call i640 @mulPv576x64(i64* %r2, i64 %r4)
+%r6 = trunc i640 %r5 to i64
+store i64 %r6, i64* %r1
+%r7 = lshr i640 %r5, 64
+%r9 = getelementptr i64, i64* %r3, i32 1
+%r10 = load i64, i64* %r9
+%r11 = call i640 @mulPv576x64(i64* %r2, i64 %r10)
+%r12 = add i640 %r7, %r11
+%r13 = trunc i640 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 1
+store i64 %r13, i64* %r15
+%r16 = lshr i640 %r12, 64
+%r18 = getelementptr i64, i64* %r3, i32 2
+%r19 = load i64, i64* %r18
+%r20 = call i640 @mulPv576x64(i64* %r2, i64 %r19)
+%r21 = add i640 %r16, %r20
+%r22 = trunc i640 %r21 to i64
+%r24 = getelementptr i64, i64* %r1, i32 2
+store i64 %r22, i64* %r24
+%r25 = lshr i640 %r21, 64
+%r27 = getelementptr i64, i64* %r3, i32 3
+%r28 = load i64, i64* %r27
+%r29 = call i640 @mulPv576x64(i64* %r2, i64 %r28)
+%r30 = add i640 %r25, %r29
+%r31 = trunc i640 %r30 to i64
+%r33 = getelementptr i64, i64* %r1, i32 3
+store i64 %r31, i64* %r33
+%r34 = lshr i640 %r30, 64
+%r36 = getelementptr i64, i64* %r3, i32 4
+%r37 = load i64, i64* %r36
+%r38 = call i640 @mulPv576x64(i64* %r2, i64 %r37)
+%r39 = add i640 %r34, %r38
+%r40 = trunc i640 %r39 to i64
+%r42 = getelementptr i64, i64* %r1, i32 4
+store i64 %r40, i64* %r42
+%r43 = lshr i640 %r39, 64
+%r45 = getelementptr i64, i64* %r3, i32 5
+%r46 = load i64, i64* %r45
+%r47 = call i640 @mulPv576x64(i64* %r2, i64 %r46)
+%r48 = add i640 %r43, %r47
+%r49 = trunc i640 %r48 to i64
+%r51 = getelementptr i64, i64* %r1, i32 5
+store i64 %r49, i64* %r51
+%r52 = lshr i640 %r48, 64
+%r54 = getelementptr i64, i64* %r3, i32 6
+%r55 = load i64, i64* %r54
+%r56 = call i640 @mulPv576x64(i64* %r2, i64 %r55)
+%r57 = add i640 %r52, %r56
+%r58 = trunc i640 %r57 to i64
+%r60 = getelementptr i64, i64* %r1, i32 6
+store i64 %r58, i64* %r60
+%r61 = lshr i640 %r57, 64
+%r63 = getelementptr i64, i64* %r3, i32 7
+%r64 = load i64, i64* %r63
+%r65 = call i640 @mulPv576x64(i64* %r2, i64 %r64)
+%r66 = add i640 %r61, %r65
+%r67 = trunc i640 %r66 to i64
+%r69 = getelementptr i64, i64* %r1, i32 7
+store i64 %r67, i64* %r69
+%r70 = lshr i640 %r66, 64
+%r72 = getelementptr i64, i64* %r3, i32 8
+%r73 = load i64, i64* %r72
+%r74 = call i640 @mulPv576x64(i64* %r2, i64 %r73)
+%r75 = add i640 %r70, %r74
+%r77 = getelementptr i64, i64* %r1, i32 8
+%r78 = trunc i640 %r75 to i64
+%r80 = getelementptr i64, i64* %r77, i32 0
+store i64 %r78, i64* %r80
+%r81 = lshr i640 %r75, 64
+%r82 = trunc i640 %r81 to i64
+%r84 = getelementptr i64, i64* %r77, i32 1
+store i64 %r82, i64* %r84
+%r85 = lshr i640 %r81, 64
+%r86 = trunc i640 %r85 to i64
+%r88 = getelementptr i64, i64* %r77, i32 2
+store i64 %r86, i64* %r88
+%r89 = lshr i640 %r85, 64
+%r90 = trunc i640 %r89 to i64
+%r92 = getelementptr i64, i64* %r77, i32 3
+store i64 %r90, i64* %r92
+%r93 = lshr i640 %r89, 64
+%r94 = trunc i640 %r93 to i64
+%r96 = getelementptr i64, i64* %r77, i32 4
+store i64 %r94, i64* %r96
+%r97 = lshr i640 %r93, 64
+%r98 = trunc i640 %r97 to i64
+%r100 = getelementptr i64, i64* %r77, i32 5
+store i64 %r98, i64* %r100
+%r101 = lshr i640 %r97, 64
+%r102 = trunc i640 %r101 to i64
+%r104 = getelementptr i64, i64* %r77, i32 6
+store i64 %r102, i64* %r104
+%r105 = lshr i640 %r101, 64
+%r106 = trunc i640 %r105 to i64
+%r108 = getelementptr i64, i64* %r77, i32 7
+store i64 %r106, i64* %r108
+%r109 = lshr i640 %r105, 64
+%r110 = trunc i640 %r109 to i64
+%r112 = getelementptr i64, i64* %r77, i32 8
+store i64 %r110, i64* %r112
+%r113 = lshr i640 %r109, 64
+%r114 = trunc i640 %r113 to i64
+%r116 = getelementptr i64, i64* %r77, i32 9
+store i64 %r114, i64* %r116
+ret void
+}
+define void @mcl_fpDbl_sqrPre9L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = call i640 @mulPv576x64(i64* %r2, i64 %r3)
+%r5 = trunc i640 %r4 to i64
+store i64 %r5, i64* %r1
+%r6 = lshr i640 %r4, 64
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = call i640 @mulPv576x64(i64* %r2, i64 %r9)
+%r11 = add i640 %r6, %r10
+%r12 = trunc i640 %r11 to i64
+%r14 = getelementptr i64, i64* %r1, i32 1
+store i64 %r12, i64* %r14
+%r15 = lshr i640 %r11, 64
+%r17 = getelementptr i64, i64* %r2, i32 2
+%r18 = load i64, i64* %r17
+%r19 = call i640 @mulPv576x64(i64* %r2, i64 %r18)
+%r20 = add i640 %r15, %r19
+%r21 = trunc i640 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 2
+store i64 %r21, i64* %r23
+%r24 = lshr i640 %r20, 64
+%r26 = getelementptr i64, i64* %r2, i32 3
+%r27 = load i64, i64* %r26
+%r28 = call i640 @mulPv576x64(i64* %r2, i64 %r27)
+%r29 = add i640 %r24, %r28
+%r30 = trunc i640 %r29 to i64
+%r32 = getelementptr i64, i64* %r1, i32 3
+store i64 %r30, i64* %r32
+%r33 = lshr i640 %r29, 64
+%r35 = getelementptr i64, i64* %r2, i32 4
+%r36 = load i64, i64* %r35
+%r37 = call i640 @mulPv576x64(i64* %r2, i64 %r36)
+%r38 = add i640 %r33, %r37
+%r39 = trunc i640 %r38 to i64
+%r41 = getelementptr i64, i64* %r1, i32 4
+store i64 %r39, i64* %r41
+%r42 = lshr i640 %r38, 64
+%r44 = getelementptr i64, i64* %r2, i32 5
+%r45 = load i64, i64* %r44
+%r46 = call i640 @mulPv576x64(i64* %r2, i64 %r45)
+%r47 = add i640 %r42, %r46
+%r48 = trunc i640 %r47 to i64
+%r50 = getelementptr i64, i64* %r1, i32 5
+store i64 %r48, i64* %r50
+%r51 = lshr i640 %r47, 64
+%r53 = getelementptr i64, i64* %r2, i32 6
+%r54 = load i64, i64* %r53
+%r55 = call i640 @mulPv576x64(i64* %r2, i64 %r54)
+%r56 = add i640 %r51, %r55
+%r57 = trunc i640 %r56 to i64
+%r59 = getelementptr i64, i64* %r1, i32 6
+store i64 %r57, i64* %r59
+%r60 = lshr i640 %r56, 64
+%r62 = getelementptr i64, i64* %r2, i32 7
+%r63 = load i64, i64* %r62
+%r64 = call i640 @mulPv576x64(i64* %r2, i64 %r63)
+%r65 = add i640 %r60, %r64
+%r66 = trunc i640 %r65 to i64
+%r68 = getelementptr i64, i64* %r1, i32 7
+store i64 %r66, i64* %r68
+%r69 = lshr i640 %r65, 64
+%r71 = getelementptr i64, i64* %r2, i32 8
+%r72 = load i64, i64* %r71
+%r73 = call i640 @mulPv576x64(i64* %r2, i64 %r72)
+%r74 = add i640 %r69, %r73
+%r76 = getelementptr i64, i64* %r1, i32 8
+%r77 = trunc i640 %r74 to i64
+%r79 = getelementptr i64, i64* %r76, i32 0
+store i64 %r77, i64* %r79
+%r80 = lshr i640 %r74, 64
+%r81 = trunc i640 %r80 to i64
+%r83 = getelementptr i64, i64* %r76, i32 1
+store i64 %r81, i64* %r83
+%r84 = lshr i640 %r80, 64
+%r85 = trunc i640 %r84 to i64
+%r87 = getelementptr i64, i64* %r76, i32 2
+store i64 %r85, i64* %r87
+%r88 = lshr i640 %r84, 64
+%r89 = trunc i640 %r88 to i64
+%r91 = getelementptr i64, i64* %r76, i32 3
+store i64 %r89, i64* %r91
+%r92 = lshr i640 %r88, 64
+%r93 = trunc i640 %r92 to i64
+%r95 = getelementptr i64, i64* %r76, i32 4
+store i64 %r93, i64* %r95
+%r96 = lshr i640 %r92, 64
+%r97 = trunc i640 %r96 to i64
+%r99 = getelementptr i64, i64* %r76, i32 5
+store i64 %r97, i64* %r99
+%r100 = lshr i640 %r96, 64
+%r101 = trunc i640 %r100 to i64
+%r103 = getelementptr i64, i64* %r76, i32 6
+store i64 %r101, i64* %r103
+%r104 = lshr i640 %r100, 64
+%r105 = trunc i640 %r104 to i64
+%r107 = getelementptr i64, i64* %r76, i32 7
+store i64 %r105, i64* %r107
+%r108 = lshr i640 %r104, 64
+%r109 = trunc i640 %r108 to i64
+%r111 = getelementptr i64, i64* %r76, i32 8
+store i64 %r109, i64* %r111
+%r112 = lshr i640 %r108, 64
+%r113 = trunc i640 %r112 to i64
+%r115 = getelementptr i64, i64* %r76, i32 9
+store i64 %r113, i64* %r115
+ret void
+}
+define void @mcl_fp_mont9L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r9 = getelementptr i64, i64* %r3, i32 0
+%r10 = load i64, i64* %r9
+%r11 = call i640 @mulPv576x64(i64* %r2, i64 %r10)
+%r12 = zext i640 %r11 to i704
+%r13 = trunc i640 %r11 to i64
+%r14 = mul i64 %r13, %r7
+%r15 = call i640 @mulPv576x64(i64* %r4, i64 %r14)
+%r16 = zext i640 %r15 to i704
+%r17 = add i704 %r12, %r16
+%r18 = lshr i704 %r17, 64
+%r20 = getelementptr i64, i64* %r3, i32 1
+%r21 = load i64, i64* %r20
+%r22 = call i640 @mulPv576x64(i64* %r2, i64 %r21)
+%r23 = zext i640 %r22 to i704
+%r24 = add i704 %r18, %r23
+%r25 = trunc i704 %r24 to i64
+%r26 = mul i64 %r25, %r7
+%r27 = call i640 @mulPv576x64(i64* %r4, i64 %r26)
+%r28 = zext i640 %r27 to i704
+%r29 = add i704 %r24, %r28
+%r30 = lshr i704 %r29, 64
+%r32 = getelementptr i64, i64* %r3, i32 2
+%r33 = load i64, i64* %r32
+%r34 = call i640 @mulPv576x64(i64* %r2, i64 %r33)
+%r35 = zext i640 %r34 to i704
+%r36 = add i704 %r30, %r35
+%r37 = trunc i704 %r36 to i64
+%r38 = mul i64 %r37, %r7
+%r39 = call i640 @mulPv576x64(i64* %r4, i64 %r38)
+%r40 = zext i640 %r39 to i704
+%r41 = add i704 %r36, %r40
+%r42 = lshr i704 %r41, 64
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = call i640 @mulPv576x64(i64* %r2, i64 %r45)
+%r47 = zext i640 %r46 to i704
+%r48 = add i704 %r42, %r47
+%r49 = trunc i704 %r48 to i64
+%r50 = mul i64 %r49, %r7
+%r51 = call i640 @mulPv576x64(i64* %r4, i64 %r50)
+%r52 = zext i640 %r51 to i704
+%r53 = add i704 %r48, %r52
+%r54 = lshr i704 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 4
+%r57 = load i64, i64* %r56
+%r58 = call i640 @mulPv576x64(i64* %r2, i64 %r57)
+%r59 = zext i640 %r58 to i704
+%r60 = add i704 %r54, %r59
+%r61 = trunc i704 %r60 to i64
+%r62 = mul i64 %r61, %r7
+%r63 = call i640 @mulPv576x64(i64* %r4, i64 %r62)
+%r64 = zext i640 %r63 to i704
+%r65 = add i704 %r60, %r64
+%r66 = lshr i704 %r65, 64
+%r68 = getelementptr i64, i64* %r3, i32 5
+%r69 = load i64, i64* %r68
+%r70 = call i640 @mulPv576x64(i64* %r2, i64 %r69)
+%r71 = zext i640 %r70 to i704
+%r72 = add i704 %r66, %r71
+%r73 = trunc i704 %r72 to i64
+%r74 = mul i64 %r73, %r7
+%r75 = call i640 @mulPv576x64(i64* %r4, i64 %r74)
+%r76 = zext i640 %r75 to i704
+%r77 = add i704 %r72, %r76
+%r78 = lshr i704 %r77, 64
+%r80 = getelementptr i64, i64* %r3, i32 6
+%r81 = load i64, i64* %r80
+%r82 = call i640 @mulPv576x64(i64* %r2, i64 %r81)
+%r83 = zext i640 %r82 to i704
+%r84 = add i704 %r78, %r83
+%r85 = trunc i704 %r84 to i64
+%r86 = mul i64 %r85, %r7
+%r87 = call i640 @mulPv576x64(i64* %r4, i64 %r86)
+%r88 = zext i640 %r87 to i704
+%r89 = add i704 %r84, %r88
+%r90 = lshr i704 %r89, 64
+%r92 = getelementptr i64, i64* %r3, i32 7
+%r93 = load i64, i64* %r92
+%r94 = call i640 @mulPv576x64(i64* %r2, i64 %r93)
+%r95 = zext i640 %r94 to i704
+%r96 = add i704 %r90, %r95
+%r97 = trunc i704 %r96 to i64
+%r98 = mul i64 %r97, %r7
+%r99 = call i640 @mulPv576x64(i64* %r4, i64 %r98)
+%r100 = zext i640 %r99 to i704
+%r101 = add i704 %r96, %r100
+%r102 = lshr i704 %r101, 64
+%r104 = getelementptr i64, i64* %r3, i32 8
+%r105 = load i64, i64* %r104
+%r106 = call i640 @mulPv576x64(i64* %r2, i64 %r105)
+%r107 = zext i640 %r106 to i704
+%r108 = add i704 %r102, %r107
+%r109 = trunc i704 %r108 to i64
+%r110 = mul i64 %r109, %r7
+%r111 = call i640 @mulPv576x64(i64* %r4, i64 %r110)
+%r112 = zext i640 %r111 to i704
+%r113 = add i704 %r108, %r112
+%r114 = lshr i704 %r113, 64
+%r115 = trunc i704 %r114 to i640
+%r116 = load i64, i64* %r4
+%r117 = zext i64 %r116 to i128
+%r119 = getelementptr i64, i64* %r4, i32 1
+%r120 = load i64, i64* %r119
+%r121 = zext i64 %r120 to i128
+%r122 = shl i128 %r121, 64
+%r123 = or i128 %r117, %r122
+%r124 = zext i128 %r123 to i192
+%r126 = getelementptr i64, i64* %r4, i32 2
+%r127 = load i64, i64* %r126
+%r128 = zext i64 %r127 to i192
+%r129 = shl i192 %r128, 128
+%r130 = or i192 %r124, %r129
+%r131 = zext i192 %r130 to i256
+%r133 = getelementptr i64, i64* %r4, i32 3
+%r134 = load i64, i64* %r133
+%r135 = zext i64 %r134 to i256
+%r136 = shl i256 %r135, 192
+%r137 = or i256 %r131, %r136
+%r138 = zext i256 %r137 to i320
+%r140 = getelementptr i64, i64* %r4, i32 4
+%r141 = load i64, i64* %r140
+%r142 = zext i64 %r141 to i320
+%r143 = shl i320 %r142, 256
+%r144 = or i320 %r138, %r143
+%r145 = zext i320 %r144 to i384
+%r147 = getelementptr i64, i64* %r4, i32 5
+%r148 = load i64, i64* %r147
+%r149 = zext i64 %r148 to i384
+%r150 = shl i384 %r149, 320
+%r151 = or i384 %r145, %r150
+%r152 = zext i384 %r151 to i448
+%r154 = getelementptr i64, i64* %r4, i32 6
+%r155 = load i64, i64* %r154
+%r156 = zext i64 %r155 to i448
+%r157 = shl i448 %r156, 384
+%r158 = or i448 %r152, %r157
+%r159 = zext i448 %r158 to i512
+%r161 = getelementptr i64, i64* %r4, i32 7
+%r162 = load i64, i64* %r161
+%r163 = zext i64 %r162 to i512
+%r164 = shl i512 %r163, 448
+%r165 = or i512 %r159, %r164
+%r166 = zext i512 %r165 to i576
+%r168 = getelementptr i64, i64* %r4, i32 8
+%r169 = load i64, i64* %r168
+%r170 = zext i64 %r169 to i576
+%r171 = shl i576 %r170, 512
+%r172 = or i576 %r166, %r171
+%r173 = zext i576 %r172 to i640
+%r174 = sub i640 %r115, %r173
+%r175 = lshr i640 %r174, 576
+%r176 = trunc i640 %r175 to i1
+%r177 = select i1 %r176, i640 %r115, i640 %r174
+%r178 = trunc i640 %r177 to i576
+%r179 = trunc i576 %r178 to i64
+%r181 = getelementptr i64, i64* %r1, i32 0
+store i64 %r179, i64* %r181
+%r182 = lshr i576 %r178, 64
+%r183 = trunc i576 %r182 to i64
+%r185 = getelementptr i64, i64* %r1, i32 1
+store i64 %r183, i64* %r185
+%r186 = lshr i576 %r182, 64
+%r187 = trunc i576 %r186 to i64
+%r189 = getelementptr i64, i64* %r1, i32 2
+store i64 %r187, i64* %r189
+%r190 = lshr i576 %r186, 64
+%r191 = trunc i576 %r190 to i64
+%r193 = getelementptr i64, i64* %r1, i32 3
+store i64 %r191, i64* %r193
+%r194 = lshr i576 %r190, 64
+%r195 = trunc i576 %r194 to i64
+%r197 = getelementptr i64, i64* %r1, i32 4
+store i64 %r195, i64* %r197
+%r198 = lshr i576 %r194, 64
+%r199 = trunc i576 %r198 to i64
+%r201 = getelementptr i64, i64* %r1, i32 5
+store i64 %r199, i64* %r201
+%r202 = lshr i576 %r198, 64
+%r203 = trunc i576 %r202 to i64
+%r205 = getelementptr i64, i64* %r1, i32 6
+store i64 %r203, i64* %r205
+%r206 = lshr i576 %r202, 64
+%r207 = trunc i576 %r206 to i64
+%r209 = getelementptr i64, i64* %r1, i32 7
+store i64 %r207, i64* %r209
+%r210 = lshr i576 %r206, 64
+%r211 = trunc i576 %r210 to i64
+%r213 = getelementptr i64, i64* %r1, i32 8
+store i64 %r211, i64* %r213
+ret void
+}
+define void @mcl_fp_montNF9L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+{
+%r6 = getelementptr i64, i64* %r4, i32 -1
+%r7 = load i64, i64* %r6
+%r8 = load i64, i64* %r3
+%r9 = call i640 @mulPv576x64(i64* %r2, i64 %r8)
+%r10 = trunc i640 %r9 to i64
+%r11 = mul i64 %r10, %r7
+%r12 = call i640 @mulPv576x64(i64* %r4, i64 %r11)
+%r13 = add i640 %r9, %r12
+%r14 = lshr i640 %r13, 64
+%r16 = getelementptr i64, i64* %r3, i32 1
+%r17 = load i64, i64* %r16
+%r18 = call i640 @mulPv576x64(i64* %r2, i64 %r17)
+%r19 = add i640 %r14, %r18
+%r20 = trunc i640 %r19 to i64
+%r21 = mul i64 %r20, %r7
+%r22 = call i640 @mulPv576x64(i64* %r4, i64 %r21)
+%r23 = add i640 %r19, %r22
+%r24 = lshr i640 %r23, 64
+%r26 = getelementptr i64, i64* %r3, i32 2
+%r27 = load i64, i64* %r26
+%r28 = call i640 @mulPv576x64(i64* %r2, i64 %r27)
+%r29 = add i640 %r24, %r28
+%r30 = trunc i640 %r29 to i64
+%r31 = mul i64 %r30, %r7
+%r32 = call i640 @mulPv576x64(i64* %r4, i64 %r31)
+%r33 = add i640 %r29, %r32
+%r34 = lshr i640 %r33, 64
+%r36 = getelementptr i64, i64* %r3, i32 3
+%r37 = load i64, i64* %r36
+%r38 = call i640 @mulPv576x64(i64* %r2, i64 %r37)
+%r39 = add i640 %r34, %r38
+%r40 = trunc i640 %r39 to i64
+%r41 = mul i64 %r40, %r7
+%r42 = call i640 @mulPv576x64(i64* %r4, i64 %r41)
+%r43 = add i640 %r39, %r42
+%r44 = lshr i640 %r43, 64
+%r46 = getelementptr i64, i64* %r3, i32 4
+%r47 = load i64, i64* %r46
+%r48 = call i640 @mulPv576x64(i64* %r2, i64 %r47)
+%r49 = add i640 %r44, %r48
+%r50 = trunc i640 %r49 to i64
+%r51 = mul i64 %r50, %r7
+%r52 = call i640 @mulPv576x64(i64* %r4, i64 %r51)
+%r53 = add i640 %r49, %r52
+%r54 = lshr i640 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 5
+%r57 = load i64, i64* %r56
+%r58 = call i640 @mulPv576x64(i64* %r2, i64 %r57)
+%r59 = add i640 %r54, %r58
+%r60 = trunc i640 %r59 to i64
+%r61 = mul i64 %r60, %r7
+%r62 = call i640 @mulPv576x64(i64* %r4, i64 %r61)
+%r63 = add i640 %r59, %r62
+%r64 = lshr i640 %r63, 64
+%r66 = getelementptr i64, i64* %r3, i32 6
+%r67 = load i64, i64* %r66
+%r68 = call i640 @mulPv576x64(i64* %r2, i64 %r67)
+%r69 = add i640 %r64, %r68
+%r70 = trunc i640 %r69 to i64
+%r71 = mul i64 %r70, %r7
+%r72 = call i640 @mulPv576x64(i64* %r4, i64 %r71)
+%r73 = add i640 %r69, %r72
+%r74 = lshr i640 %r73, 64
+%r76 = getelementptr i64, i64* %r3, i32 7
+%r77 = load i64, i64* %r76
+%r78 = call i640 @mulPv576x64(i64* %r2, i64 %r77)
+%r79 = add i640 %r74, %r78
+%r80 = trunc i640 %r79 to i64
+%r81 = mul i64 %r80, %r7
+%r82 = call i640 @mulPv576x64(i64* %r4, i64 %r81)
+%r83 = add i640 %r79, %r82
+%r84 = lshr i640 %r83, 64
+%r86 = getelementptr i64, i64* %r3, i32 8
+%r87 = load i64, i64* %r86
+%r88 = call i640 @mulPv576x64(i64* %r2, i64 %r87)
+%r89 = add i640 %r84, %r88
+%r90 = trunc i640 %r89 to i64
+%r91 = mul i64 %r90, %r7
+%r92 = call i640 @mulPv576x64(i64* %r4, i64 %r91)
+%r93 = add i640 %r89, %r92
+%r94 = lshr i640 %r93, 64
+%r95 = trunc i640 %r94 to i576
+%r96 = load i64, i64* %r4
+%r97 = zext i64 %r96 to i128
+%r99 = getelementptr i64, i64* %r4, i32 1
+%r100 = load i64, i64* %r99
+%r101 = zext i64 %r100 to i128
+%r102 = shl i128 %r101, 64
+%r103 = or i128 %r97, %r102
+%r104 = zext i128 %r103 to i192
+%r106 = getelementptr i64, i64* %r4, i32 2
+%r107 = load i64, i64* %r106
+%r108 = zext i64 %r107 to i192
+%r109 = shl i192 %r108, 128
+%r110 = or i192 %r104, %r109
+%r111 = zext i192 %r110 to i256
+%r113 = getelementptr i64, i64* %r4, i32 3
+%r114 = load i64, i64* %r113
+%r115 = zext i64 %r114 to i256
+%r116 = shl i256 %r115, 192
+%r117 = or i256 %r111, %r116
+%r118 = zext i256 %r117 to i320
+%r120 = getelementptr i64, i64* %r4, i32 4
+%r121 = load i64, i64* %r120
+%r122 = zext i64 %r121 to i320
+%r123 = shl i320 %r122, 256
+%r124 = or i320 %r118, %r123
+%r125 = zext i320 %r124 to i384
+%r127 = getelementptr i64, i64* %r4, i32 5
+%r128 = load i64, i64* %r127
+%r129 = zext i64 %r128 to i384
+%r130 = shl i384 %r129, 320
+%r131 = or i384 %r125, %r130
+%r132 = zext i384 %r131 to i448
+%r134 = getelementptr i64, i64* %r4, i32 6
+%r135 = load i64, i64* %r134
+%r136 = zext i64 %r135 to i448
+%r137 = shl i448 %r136, 384
+%r138 = or i448 %r132, %r137
+%r139 = zext i448 %r138 to i512
+%r141 = getelementptr i64, i64* %r4, i32 7
+%r142 = load i64, i64* %r141
+%r143 = zext i64 %r142 to i512
+%r144 = shl i512 %r143, 448
+%r145 = or i512 %r139, %r144
+%r146 = zext i512 %r145 to i576
+%r148 = getelementptr i64, i64* %r4, i32 8
+%r149 = load i64, i64* %r148
+%r150 = zext i64 %r149 to i576
+%r151 = shl i576 %r150, 512
+%r152 = or i576 %r146, %r151
+%r153 = sub i576 %r95, %r152
+%r154 = lshr i576 %r153, 575
+%r155 = trunc i576 %r154 to i1
+%r156 = select i1 %r155, i576 %r95, i576 %r153
+%r157 = trunc i576 %r156 to i64
+%r159 = getelementptr i64, i64* %r1, i32 0
+store i64 %r157, i64* %r159
+%r160 = lshr i576 %r156, 64
+%r161 = trunc i576 %r160 to i64
+%r163 = getelementptr i64, i64* %r1, i32 1
+store i64 %r161, i64* %r163
+%r164 = lshr i576 %r160, 64
+%r165 = trunc i576 %r164 to i64
+%r167 = getelementptr i64, i64* %r1, i32 2
+store i64 %r165, i64* %r167
+%r168 = lshr i576 %r164, 64
+%r169 = trunc i576 %r168 to i64
+%r171 = getelementptr i64, i64* %r1, i32 3
+store i64 %r169, i64* %r171
+%r172 = lshr i576 %r168, 64
+%r173 = trunc i576 %r172 to i64
+%r175 = getelementptr i64, i64* %r1, i32 4
+store i64 %r173, i64* %r175
+%r176 = lshr i576 %r172, 64
+%r177 = trunc i576 %r176 to i64
+%r179 = getelementptr i64, i64* %r1, i32 5
+store i64 %r177, i64* %r179
+%r180 = lshr i576 %r176, 64
+%r181 = trunc i576 %r180 to i64
+%r183 = getelementptr i64, i64* %r1, i32 6
+store i64 %r181, i64* %r183
+%r184 = lshr i576 %r180, 64
+%r185 = trunc i576 %r184 to i64
+%r187 = getelementptr i64, i64* %r1, i32 7
+store i64 %r185, i64* %r187
+%r188 = lshr i576 %r184, 64
+%r189 = trunc i576 %r188 to i64
+%r191 = getelementptr i64, i64* %r1, i32 8
+store i64 %r189, i64* %r191
+ret void
+}
+define void @mcl_fp_montRed9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = zext i192 %r21 to i256
+%r24 = getelementptr i64, i64* %r3, i32 3
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i256
+%r27 = shl i256 %r26, 192
+%r28 = or i256 %r22, %r27
+%r29 = zext i256 %r28 to i320
+%r31 = getelementptr i64, i64* %r3, i32 4
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i320
+%r34 = shl i320 %r33, 256
+%r35 = or i320 %r29, %r34
+%r36 = zext i320 %r35 to i384
+%r38 = getelementptr i64, i64* %r3, i32 5
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i384
+%r41 = shl i384 %r40, 320
+%r42 = or i384 %r36, %r41
+%r43 = zext i384 %r42 to i448
+%r45 = getelementptr i64, i64* %r3, i32 6
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i448
+%r48 = shl i448 %r47, 384
+%r49 = or i448 %r43, %r48
+%r50 = zext i448 %r49 to i512
+%r52 = getelementptr i64, i64* %r3, i32 7
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i512
+%r55 = shl i512 %r54, 448
+%r56 = or i512 %r50, %r55
+%r57 = zext i512 %r56 to i576
+%r59 = getelementptr i64, i64* %r3, i32 8
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i576
+%r62 = shl i576 %r61, 512
+%r63 = or i576 %r57, %r62
+%r64 = load i64, i64* %r2
+%r65 = zext i64 %r64 to i128
+%r67 = getelementptr i64, i64* %r2, i32 1
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i128
+%r70 = shl i128 %r69, 64
+%r71 = or i128 %r65, %r70
+%r72 = zext i128 %r71 to i192
+%r74 = getelementptr i64, i64* %r2, i32 2
+%r75 = load i64, i64* %r74
+%r76 = zext i64 %r75 to i192
+%r77 = shl i192 %r76, 128
+%r78 = or i192 %r72, %r77
+%r79 = zext i192 %r78 to i256
+%r81 = getelementptr i64, i64* %r2, i32 3
+%r82 = load i64, i64* %r81
+%r83 = zext i64 %r82 to i256
+%r84 = shl i256 %r83, 192
+%r85 = or i256 %r79, %r84
+%r86 = zext i256 %r85 to i320
+%r88 = getelementptr i64, i64* %r2, i32 4
+%r89 = load i64, i64* %r88
+%r90 = zext i64 %r89 to i320
+%r91 = shl i320 %r90, 256
+%r92 = or i320 %r86, %r91
+%r93 = zext i320 %r92 to i384
+%r95 = getelementptr i64, i64* %r2, i32 5
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i384
+%r98 = shl i384 %r97, 320
+%r99 = or i384 %r93, %r98
+%r100 = zext i384 %r99 to i448
+%r102 = getelementptr i64, i64* %r2, i32 6
+%r103 = load i64, i64* %r102
+%r104 = zext i64 %r103 to i448
+%r105 = shl i448 %r104, 384
+%r106 = or i448 %r100, %r105
+%r107 = zext i448 %r106 to i512
+%r109 = getelementptr i64, i64* %r2, i32 7
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i512
+%r112 = shl i512 %r111, 448
+%r113 = or i512 %r107, %r112
+%r114 = zext i512 %r113 to i576
+%r116 = getelementptr i64, i64* %r2, i32 8
+%r117 = load i64, i64* %r116
+%r118 = zext i64 %r117 to i576
+%r119 = shl i576 %r118, 512
+%r120 = or i576 %r114, %r119
+%r121 = zext i576 %r120 to i640
+%r123 = getelementptr i64, i64* %r2, i32 9
+%r124 = load i64, i64* %r123
+%r125 = zext i64 %r124 to i640
+%r126 = shl i640 %r125, 576
+%r127 = or i640 %r121, %r126
+%r128 = zext i640 %r127 to i704
+%r130 = getelementptr i64, i64* %r2, i32 10
+%r131 = load i64, i64* %r130
+%r132 = zext i64 %r131 to i704
+%r133 = shl i704 %r132, 640
+%r134 = or i704 %r128, %r133
+%r135 = zext i704 %r134 to i768
+%r137 = getelementptr i64, i64* %r2, i32 11
+%r138 = load i64, i64* %r137
+%r139 = zext i64 %r138 to i768
+%r140 = shl i768 %r139, 704
+%r141 = or i768 %r135, %r140
+%r142 = zext i768 %r141 to i832
+%r144 = getelementptr i64, i64* %r2, i32 12
+%r145 = load i64, i64* %r144
+%r146 = zext i64 %r145 to i832
+%r147 = shl i832 %r146, 768
+%r148 = or i832 %r142, %r147
+%r149 = zext i832 %r148 to i896
+%r151 = getelementptr i64, i64* %r2, i32 13
+%r152 = load i64, i64* %r151
+%r153 = zext i64 %r152 to i896
+%r154 = shl i896 %r153, 832
+%r155 = or i896 %r149, %r154
+%r156 = zext i896 %r155 to i960
+%r158 = getelementptr i64, i64* %r2, i32 14
+%r159 = load i64, i64* %r158
+%r160 = zext i64 %r159 to i960
+%r161 = shl i960 %r160, 896
+%r162 = or i960 %r156, %r161
+%r163 = zext i960 %r162 to i1024
+%r165 = getelementptr i64, i64* %r2, i32 15
+%r166 = load i64, i64* %r165
+%r167 = zext i64 %r166 to i1024
+%r168 = shl i1024 %r167, 960
+%r169 = or i1024 %r163, %r168
+%r170 = zext i1024 %r169 to i1088
+%r172 = getelementptr i64, i64* %r2, i32 16
+%r173 = load i64, i64* %r172
+%r174 = zext i64 %r173 to i1088
+%r175 = shl i1088 %r174, 1024
+%r176 = or i1088 %r170, %r175
+%r177 = zext i1088 %r176 to i1152
+%r179 = getelementptr i64, i64* %r2, i32 17
+%r180 = load i64, i64* %r179
+%r181 = zext i64 %r180 to i1152
+%r182 = shl i1152 %r181, 1088
+%r183 = or i1152 %r177, %r182
+%r184 = zext i1152 %r183 to i1216
+%r185 = trunc i1216 %r184 to i64
+%r186 = mul i64 %r185, %r6
+%r187 = call i640 @mulPv576x64(i64* %r3, i64 %r186)
+%r188 = zext i640 %r187 to i1216
+%r189 = add i1216 %r184, %r188
+%r190 = lshr i1216 %r189, 64
+%r191 = trunc i1216 %r190 to i1152
+%r192 = trunc i1152 %r191 to i64
+%r193 = mul i64 %r192, %r6
+%r194 = call i640 @mulPv576x64(i64* %r3, i64 %r193)
+%r195 = zext i640 %r194 to i1152
+%r196 = add i1152 %r191, %r195
+%r197 = lshr i1152 %r196, 64
+%r198 = trunc i1152 %r197 to i1088
+%r199 = trunc i1088 %r198 to i64
+%r200 = mul i64 %r199, %r6
+%r201 = call i640 @mulPv576x64(i64* %r3, i64 %r200)
+%r202 = zext i640 %r201 to i1088
+%r203 = add i1088 %r198, %r202
+%r204 = lshr i1088 %r203, 64
+%r205 = trunc i1088 %r204 to i1024
+%r206 = trunc i1024 %r205 to i64
+%r207 = mul i64 %r206, %r6
+%r208 = call i640 @mulPv576x64(i64* %r3, i64 %r207)
+%r209 = zext i640 %r208 to i1024
+%r210 = add i1024 %r205, %r209
+%r211 = lshr i1024 %r210, 64
+%r212 = trunc i1024 %r211 to i960
+%r213 = trunc i960 %r212 to i64
+%r214 = mul i64 %r213, %r6
+%r215 = call i640 @mulPv576x64(i64* %r3, i64 %r214)
+%r216 = zext i640 %r215 to i960
+%r217 = add i960 %r212, %r216
+%r218 = lshr i960 %r217, 64
+%r219 = trunc i960 %r218 to i896
+%r220 = trunc i896 %r219 to i64
+%r221 = mul i64 %r220, %r6
+%r222 = call i640 @mulPv576x64(i64* %r3, i64 %r221)
+%r223 = zext i640 %r222 to i896
+%r224 = add i896 %r219, %r223
+%r225 = lshr i896 %r224, 64
+%r226 = trunc i896 %r225 to i832
+%r227 = trunc i832 %r226 to i64
+%r228 = mul i64 %r227, %r6
+%r229 = call i640 @mulPv576x64(i64* %r3, i64 %r228)
+%r230 = zext i640 %r229 to i832
+%r231 = add i832 %r226, %r230
+%r232 = lshr i832 %r231, 64
+%r233 = trunc i832 %r232 to i768
+%r234 = trunc i768 %r233 to i64
+%r235 = mul i64 %r234, %r6
+%r236 = call i640 @mulPv576x64(i64* %r3, i64 %r235)
+%r237 = zext i640 %r236 to i768
+%r238 = add i768 %r233, %r237
+%r239 = lshr i768 %r238, 64
+%r240 = trunc i768 %r239 to i704
+%r241 = trunc i704 %r240 to i64
+%r242 = mul i64 %r241, %r6
+%r243 = call i640 @mulPv576x64(i64* %r3, i64 %r242)
+%r244 = zext i640 %r243 to i704
+%r245 = add i704 %r240, %r244
+%r246 = lshr i704 %r245, 64
+%r247 = trunc i704 %r246 to i640
+%r248 = zext i576 %r63 to i640
+%r249 = sub i640 %r247, %r248
+%r250 = lshr i640 %r249, 576
+%r251 = trunc i640 %r250 to i1
+%r252 = select i1 %r251, i640 %r247, i640 %r249
+%r253 = trunc i640 %r252 to i576
+%r254 = trunc i576 %r253 to i64
+%r256 = getelementptr i64, i64* %r1, i32 0
+store i64 %r254, i64* %r256
+%r257 = lshr i576 %r253, 64
+%r258 = trunc i576 %r257 to i64
+%r260 = getelementptr i64, i64* %r1, i32 1
+store i64 %r258, i64* %r260
+%r261 = lshr i576 %r257, 64
+%r262 = trunc i576 %r261 to i64
+%r264 = getelementptr i64, i64* %r1, i32 2
+store i64 %r262, i64* %r264
+%r265 = lshr i576 %r261, 64
+%r266 = trunc i576 %r265 to i64
+%r268 = getelementptr i64, i64* %r1, i32 3
+store i64 %r266, i64* %r268
+%r269 = lshr i576 %r265, 64
+%r270 = trunc i576 %r269 to i64
+%r272 = getelementptr i64, i64* %r1, i32 4
+store i64 %r270, i64* %r272
+%r273 = lshr i576 %r269, 64
+%r274 = trunc i576 %r273 to i64
+%r276 = getelementptr i64, i64* %r1, i32 5
+store i64 %r274, i64* %r276
+%r277 = lshr i576 %r273, 64
+%r278 = trunc i576 %r277 to i64
+%r280 = getelementptr i64, i64* %r1, i32 6
+store i64 %r278, i64* %r280
+%r281 = lshr i576 %r277, 64
+%r282 = trunc i576 %r281 to i64
+%r284 = getelementptr i64, i64* %r1, i32 7
+store i64 %r282, i64* %r284
+%r285 = lshr i576 %r281, 64
+%r286 = trunc i576 %r285 to i64
+%r288 = getelementptr i64, i64* %r1, i32 8
+store i64 %r286, i64* %r288
+ret void
+}
+define i64 @mcl_fp_addPre9L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r3, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r3, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r3, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r3, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r63 = load i64, i64* %r4
+%r64 = zext i64 %r63 to i128
+%r66 = getelementptr i64, i64* %r4, i32 1
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i128
+%r69 = shl i128 %r68, 64
+%r70 = or i128 %r64, %r69
+%r71 = zext i128 %r70 to i192
+%r73 = getelementptr i64, i64* %r4, i32 2
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i192
+%r76 = shl i192 %r75, 128
+%r77 = or i192 %r71, %r76
+%r78 = zext i192 %r77 to i256
+%r80 = getelementptr i64, i64* %r4, i32 3
+%r81 = load i64, i64* %r80
+%r82 = zext i64 %r81 to i256
+%r83 = shl i256 %r82, 192
+%r84 = or i256 %r78, %r83
+%r85 = zext i256 %r84 to i320
+%r87 = getelementptr i64, i64* %r4, i32 4
+%r88 = load i64, i64* %r87
+%r89 = zext i64 %r88 to i320
+%r90 = shl i320 %r89, 256
+%r91 = or i320 %r85, %r90
+%r92 = zext i320 %r91 to i384
+%r94 = getelementptr i64, i64* %r4, i32 5
+%r95 = load i64, i64* %r94
+%r96 = zext i64 %r95 to i384
+%r97 = shl i384 %r96, 320
+%r98 = or i384 %r92, %r97
+%r99 = zext i384 %r98 to i448
+%r101 = getelementptr i64, i64* %r4, i32 6
+%r102 = load i64, i64* %r101
+%r103 = zext i64 %r102 to i448
+%r104 = shl i448 %r103, 384
+%r105 = or i448 %r99, %r104
+%r106 = zext i448 %r105 to i512
+%r108 = getelementptr i64, i64* %r4, i32 7
+%r109 = load i64, i64* %r108
+%r110 = zext i64 %r109 to i512
+%r111 = shl i512 %r110, 448
+%r112 = or i512 %r106, %r111
+%r113 = zext i512 %r112 to i576
+%r115 = getelementptr i64, i64* %r4, i32 8
+%r116 = load i64, i64* %r115
+%r117 = zext i64 %r116 to i576
+%r118 = shl i576 %r117, 512
+%r119 = or i576 %r113, %r118
+%r120 = zext i576 %r119 to i640
+%r121 = add i640 %r62, %r120
+%r122 = trunc i640 %r121 to i576
+%r123 = trunc i576 %r122 to i64
+%r125 = getelementptr i64, i64* %r2, i32 0
+store i64 %r123, i64* %r125
+%r126 = lshr i576 %r122, 64
+%r127 = trunc i576 %r126 to i64
+%r129 = getelementptr i64, i64* %r2, i32 1
+store i64 %r127, i64* %r129
+%r130 = lshr i576 %r126, 64
+%r131 = trunc i576 %r130 to i64
+%r133 = getelementptr i64, i64* %r2, i32 2
+store i64 %r131, i64* %r133
+%r134 = lshr i576 %r130, 64
+%r135 = trunc i576 %r134 to i64
+%r137 = getelementptr i64, i64* %r2, i32 3
+store i64 %r135, i64* %r137
+%r138 = lshr i576 %r134, 64
+%r139 = trunc i576 %r138 to i64
+%r141 = getelementptr i64, i64* %r2, i32 4
+store i64 %r139, i64* %r141
+%r142 = lshr i576 %r138, 64
+%r143 = trunc i576 %r142 to i64
+%r145 = getelementptr i64, i64* %r2, i32 5
+store i64 %r143, i64* %r145
+%r146 = lshr i576 %r142, 64
+%r147 = trunc i576 %r146 to i64
+%r149 = getelementptr i64, i64* %r2, i32 6
+store i64 %r147, i64* %r149
+%r150 = lshr i576 %r146, 64
+%r151 = trunc i576 %r150 to i64
+%r153 = getelementptr i64, i64* %r2, i32 7
+store i64 %r151, i64* %r153
+%r154 = lshr i576 %r150, 64
+%r155 = trunc i576 %r154 to i64
+%r157 = getelementptr i64, i64* %r2, i32 8
+store i64 %r155, i64* %r157
+%r158 = lshr i640 %r121, 576
+%r159 = trunc i640 %r158 to i64
+ret i64 %r159
+}
+define i64 @mcl_fp_subPre9L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r3, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r3, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r3, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r3, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r63 = load i64, i64* %r4
+%r64 = zext i64 %r63 to i128
+%r66 = getelementptr i64, i64* %r4, i32 1
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i128
+%r69 = shl i128 %r68, 64
+%r70 = or i128 %r64, %r69
+%r71 = zext i128 %r70 to i192
+%r73 = getelementptr i64, i64* %r4, i32 2
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i192
+%r76 = shl i192 %r75, 128
+%r77 = or i192 %r71, %r76
+%r78 = zext i192 %r77 to i256
+%r80 = getelementptr i64, i64* %r4, i32 3
+%r81 = load i64, i64* %r80
+%r82 = zext i64 %r81 to i256
+%r83 = shl i256 %r82, 192
+%r84 = or i256 %r78, %r83
+%r85 = zext i256 %r84 to i320
+%r87 = getelementptr i64, i64* %r4, i32 4
+%r88 = load i64, i64* %r87
+%r89 = zext i64 %r88 to i320
+%r90 = shl i320 %r89, 256
+%r91 = or i320 %r85, %r90
+%r92 = zext i320 %r91 to i384
+%r94 = getelementptr i64, i64* %r4, i32 5
+%r95 = load i64, i64* %r94
+%r96 = zext i64 %r95 to i384
+%r97 = shl i384 %r96, 320
+%r98 = or i384 %r92, %r97
+%r99 = zext i384 %r98 to i448
+%r101 = getelementptr i64, i64* %r4, i32 6
+%r102 = load i64, i64* %r101
+%r103 = zext i64 %r102 to i448
+%r104 = shl i448 %r103, 384
+%r105 = or i448 %r99, %r104
+%r106 = zext i448 %r105 to i512
+%r108 = getelementptr i64, i64* %r4, i32 7
+%r109 = load i64, i64* %r108
+%r110 = zext i64 %r109 to i512
+%r111 = shl i512 %r110, 448
+%r112 = or i512 %r106, %r111
+%r113 = zext i512 %r112 to i576
+%r115 = getelementptr i64, i64* %r4, i32 8
+%r116 = load i64, i64* %r115
+%r117 = zext i64 %r116 to i576
+%r118 = shl i576 %r117, 512
+%r119 = or i576 %r113, %r118
+%r120 = zext i576 %r119 to i640
+%r121 = sub i640 %r62, %r120
+%r122 = trunc i640 %r121 to i576
+%r123 = trunc i576 %r122 to i64
+%r125 = getelementptr i64, i64* %r2, i32 0
+store i64 %r123, i64* %r125
+%r126 = lshr i576 %r122, 64
+%r127 = trunc i576 %r126 to i64
+%r129 = getelementptr i64, i64* %r2, i32 1
+store i64 %r127, i64* %r129
+%r130 = lshr i576 %r126, 64
+%r131 = trunc i576 %r130 to i64
+%r133 = getelementptr i64, i64* %r2, i32 2
+store i64 %r131, i64* %r133
+%r134 = lshr i576 %r130, 64
+%r135 = trunc i576 %r134 to i64
+%r137 = getelementptr i64, i64* %r2, i32 3
+store i64 %r135, i64* %r137
+%r138 = lshr i576 %r134, 64
+%r139 = trunc i576 %r138 to i64
+%r141 = getelementptr i64, i64* %r2, i32 4
+store i64 %r139, i64* %r141
+%r142 = lshr i576 %r138, 64
+%r143 = trunc i576 %r142 to i64
+%r145 = getelementptr i64, i64* %r2, i32 5
+store i64 %r143, i64* %r145
+%r146 = lshr i576 %r142, 64
+%r147 = trunc i576 %r146 to i64
+%r149 = getelementptr i64, i64* %r2, i32 6
+store i64 %r147, i64* %r149
+%r150 = lshr i576 %r146, 64
+%r151 = trunc i576 %r150 to i64
+%r153 = getelementptr i64, i64* %r2, i32 7
+store i64 %r151, i64* %r153
+%r154 = lshr i576 %r150, 64
+%r155 = trunc i576 %r154 to i64
+%r157 = getelementptr i64, i64* %r2, i32 8
+store i64 %r155, i64* %r157
+%r158 = lshr i640 %r121, 576
+%r159 = trunc i640 %r158 to i64
+%r161 = and i64 %r159, 1
+ret i64 %r161
+}
+define void @mcl_fp_shr1_9L(i64* noalias  %r1, i64* noalias  %r2)
+{
+%r3 = load i64, i64* %r2
+%r4 = zext i64 %r3 to i128
+%r6 = getelementptr i64, i64* %r2, i32 1
+%r7 = load i64, i64* %r6
+%r8 = zext i64 %r7 to i128
+%r9 = shl i128 %r8, 64
+%r10 = or i128 %r4, %r9
+%r11 = zext i128 %r10 to i192
+%r13 = getelementptr i64, i64* %r2, i32 2
+%r14 = load i64, i64* %r13
+%r15 = zext i64 %r14 to i192
+%r16 = shl i192 %r15, 128
+%r17 = or i192 %r11, %r16
+%r18 = zext i192 %r17 to i256
+%r20 = getelementptr i64, i64* %r2, i32 3
+%r21 = load i64, i64* %r20
+%r22 = zext i64 %r21 to i256
+%r23 = shl i256 %r22, 192
+%r24 = or i256 %r18, %r23
+%r25 = zext i256 %r24 to i320
+%r27 = getelementptr i64, i64* %r2, i32 4
+%r28 = load i64, i64* %r27
+%r29 = zext i64 %r28 to i320
+%r30 = shl i320 %r29, 256
+%r31 = or i320 %r25, %r30
+%r32 = zext i320 %r31 to i384
+%r34 = getelementptr i64, i64* %r2, i32 5
+%r35 = load i64, i64* %r34
+%r36 = zext i64 %r35 to i384
+%r37 = shl i384 %r36, 320
+%r38 = or i384 %r32, %r37
+%r39 = zext i384 %r38 to i448
+%r41 = getelementptr i64, i64* %r2, i32 6
+%r42 = load i64, i64* %r41
+%r43 = zext i64 %r42 to i448
+%r44 = shl i448 %r43, 384
+%r45 = or i448 %r39, %r44
+%r46 = zext i448 %r45 to i512
+%r48 = getelementptr i64, i64* %r2, i32 7
+%r49 = load i64, i64* %r48
+%r50 = zext i64 %r49 to i512
+%r51 = shl i512 %r50, 448
+%r52 = or i512 %r46, %r51
+%r53 = zext i512 %r52 to i576
+%r55 = getelementptr i64, i64* %r2, i32 8
+%r56 = load i64, i64* %r55
+%r57 = zext i64 %r56 to i576
+%r58 = shl i576 %r57, 512
+%r59 = or i576 %r53, %r58
+%r60 = lshr i576 %r59, 1
+%r61 = trunc i576 %r60 to i64
+%r63 = getelementptr i64, i64* %r1, i32 0
+store i64 %r61, i64* %r63
+%r64 = lshr i576 %r60, 64
+%r65 = trunc i576 %r64 to i64
+%r67 = getelementptr i64, i64* %r1, i32 1
+store i64 %r65, i64* %r67
+%r68 = lshr i576 %r64, 64
+%r69 = trunc i576 %r68 to i64
+%r71 = getelementptr i64, i64* %r1, i32 2
+store i64 %r69, i64* %r71
+%r72 = lshr i576 %r68, 64
+%r73 = trunc i576 %r72 to i64
+%r75 = getelementptr i64, i64* %r1, i32 3
+store i64 %r73, i64* %r75
+%r76 = lshr i576 %r72, 64
+%r77 = trunc i576 %r76 to i64
+%r79 = getelementptr i64, i64* %r1, i32 4
+store i64 %r77, i64* %r79
+%r80 = lshr i576 %r76, 64
+%r81 = trunc i576 %r80 to i64
+%r83 = getelementptr i64, i64* %r1, i32 5
+store i64 %r81, i64* %r83
+%r84 = lshr i576 %r80, 64
+%r85 = trunc i576 %r84 to i64
+%r87 = getelementptr i64, i64* %r1, i32 6
+store i64 %r85, i64* %r87
+%r88 = lshr i576 %r84, 64
+%r89 = trunc i576 %r88 to i64
+%r91 = getelementptr i64, i64* %r1, i32 7
+store i64 %r89, i64* %r91
+%r92 = lshr i576 %r88, 64
+%r93 = trunc i576 %r92 to i64
+%r95 = getelementptr i64, i64* %r1, i32 8
+store i64 %r93, i64* %r95
+ret void
+}
+define void @mcl_fp_add9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = load i64, i64* %r3
+%r63 = zext i64 %r62 to i128
+%r65 = getelementptr i64, i64* %r3, i32 1
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i128
+%r68 = shl i128 %r67, 64
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i192
+%r72 = getelementptr i64, i64* %r3, i32 2
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i192
+%r75 = shl i192 %r74, 128
+%r76 = or i192 %r70, %r75
+%r77 = zext i192 %r76 to i256
+%r79 = getelementptr i64, i64* %r3, i32 3
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i256
+%r82 = shl i256 %r81, 192
+%r83 = or i256 %r77, %r82
+%r84 = zext i256 %r83 to i320
+%r86 = getelementptr i64, i64* %r3, i32 4
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i320
+%r89 = shl i320 %r88, 256
+%r90 = or i320 %r84, %r89
+%r91 = zext i320 %r90 to i384
+%r93 = getelementptr i64, i64* %r3, i32 5
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i384
+%r96 = shl i384 %r95, 320
+%r97 = or i384 %r91, %r96
+%r98 = zext i384 %r97 to i448
+%r100 = getelementptr i64, i64* %r3, i32 6
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i448
+%r103 = shl i448 %r102, 384
+%r104 = or i448 %r98, %r103
+%r105 = zext i448 %r104 to i512
+%r107 = getelementptr i64, i64* %r3, i32 7
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i512
+%r110 = shl i512 %r109, 448
+%r111 = or i512 %r105, %r110
+%r112 = zext i512 %r111 to i576
+%r114 = getelementptr i64, i64* %r3, i32 8
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i576
+%r117 = shl i576 %r116, 512
+%r118 = or i576 %r112, %r117
+%r119 = zext i576 %r61 to i640
+%r120 = zext i576 %r118 to i640
+%r121 = add i640 %r119, %r120
+%r122 = trunc i640 %r121 to i576
+%r123 = trunc i576 %r122 to i64
+%r125 = getelementptr i64, i64* %r1, i32 0
+store i64 %r123, i64* %r125
+%r126 = lshr i576 %r122, 64
+%r127 = trunc i576 %r126 to i64
+%r129 = getelementptr i64, i64* %r1, i32 1
+store i64 %r127, i64* %r129
+%r130 = lshr i576 %r126, 64
+%r131 = trunc i576 %r130 to i64
+%r133 = getelementptr i64, i64* %r1, i32 2
+store i64 %r131, i64* %r133
+%r134 = lshr i576 %r130, 64
+%r135 = trunc i576 %r134 to i64
+%r137 = getelementptr i64, i64* %r1, i32 3
+store i64 %r135, i64* %r137
+%r138 = lshr i576 %r134, 64
+%r139 = trunc i576 %r138 to i64
+%r141 = getelementptr i64, i64* %r1, i32 4
+store i64 %r139, i64* %r141
+%r142 = lshr i576 %r138, 64
+%r143 = trunc i576 %r142 to i64
+%r145 = getelementptr i64, i64* %r1, i32 5
+store i64 %r143, i64* %r145
+%r146 = lshr i576 %r142, 64
+%r147 = trunc i576 %r146 to i64
+%r149 = getelementptr i64, i64* %r1, i32 6
+store i64 %r147, i64* %r149
+%r150 = lshr i576 %r146, 64
+%r151 = trunc i576 %r150 to i64
+%r153 = getelementptr i64, i64* %r1, i32 7
+store i64 %r151, i64* %r153
+%r154 = lshr i576 %r150, 64
+%r155 = trunc i576 %r154 to i64
+%r157 = getelementptr i64, i64* %r1, i32 8
+store i64 %r155, i64* %r157
+%r158 = load i64, i64* %r4
+%r159 = zext i64 %r158 to i128
+%r161 = getelementptr i64, i64* %r4, i32 1
+%r162 = load i64, i64* %r161
+%r163 = zext i64 %r162 to i128
+%r164 = shl i128 %r163, 64
+%r165 = or i128 %r159, %r164
+%r166 = zext i128 %r165 to i192
+%r168 = getelementptr i64, i64* %r4, i32 2
+%r169 = load i64, i64* %r168
+%r170 = zext i64 %r169 to i192
+%r171 = shl i192 %r170, 128
+%r172 = or i192 %r166, %r171
+%r173 = zext i192 %r172 to i256
+%r175 = getelementptr i64, i64* %r4, i32 3
+%r176 = load i64, i64* %r175
+%r177 = zext i64 %r176 to i256
+%r178 = shl i256 %r177, 192
+%r179 = or i256 %r173, %r178
+%r180 = zext i256 %r179 to i320
+%r182 = getelementptr i64, i64* %r4, i32 4
+%r183 = load i64, i64* %r182
+%r184 = zext i64 %r183 to i320
+%r185 = shl i320 %r184, 256
+%r186 = or i320 %r180, %r185
+%r187 = zext i320 %r186 to i384
+%r189 = getelementptr i64, i64* %r4, i32 5
+%r190 = load i64, i64* %r189
+%r191 = zext i64 %r190 to i384
+%r192 = shl i384 %r191, 320
+%r193 = or i384 %r187, %r192
+%r194 = zext i384 %r193 to i448
+%r196 = getelementptr i64, i64* %r4, i32 6
+%r197 = load i64, i64* %r196
+%r198 = zext i64 %r197 to i448
+%r199 = shl i448 %r198, 384
+%r200 = or i448 %r194, %r199
+%r201 = zext i448 %r200 to i512
+%r203 = getelementptr i64, i64* %r4, i32 7
+%r204 = load i64, i64* %r203
+%r205 = zext i64 %r204 to i512
+%r206 = shl i512 %r205, 448
+%r207 = or i512 %r201, %r206
+%r208 = zext i512 %r207 to i576
+%r210 = getelementptr i64, i64* %r4, i32 8
+%r211 = load i64, i64* %r210
+%r212 = zext i64 %r211 to i576
+%r213 = shl i576 %r212, 512
+%r214 = or i576 %r208, %r213
+%r215 = zext i576 %r214 to i640
+%r216 = sub i640 %r121, %r215
+%r217 = lshr i640 %r216, 576
+%r218 = trunc i640 %r217 to i1
+br i1%r218, label %carry, label %nocarry
+nocarry:
+%r219 = trunc i640 %r216 to i576
+%r220 = trunc i576 %r219 to i64
+%r222 = getelementptr i64, i64* %r1, i32 0
+store i64 %r220, i64* %r222
+%r223 = lshr i576 %r219, 64
+%r224 = trunc i576 %r223 to i64
+%r226 = getelementptr i64, i64* %r1, i32 1
+store i64 %r224, i64* %r226
+%r227 = lshr i576 %r223, 64
+%r228 = trunc i576 %r227 to i64
+%r230 = getelementptr i64, i64* %r1, i32 2
+store i64 %r228, i64* %r230
+%r231 = lshr i576 %r227, 64
+%r232 = trunc i576 %r231 to i64
+%r234 = getelementptr i64, i64* %r1, i32 3
+store i64 %r232, i64* %r234
+%r235 = lshr i576 %r231, 64
+%r236 = trunc i576 %r235 to i64
+%r238 = getelementptr i64, i64* %r1, i32 4
+store i64 %r236, i64* %r238
+%r239 = lshr i576 %r235, 64
+%r240 = trunc i576 %r239 to i64
+%r242 = getelementptr i64, i64* %r1, i32 5
+store i64 %r240, i64* %r242
+%r243 = lshr i576 %r239, 64
+%r244 = trunc i576 %r243 to i64
+%r246 = getelementptr i64, i64* %r1, i32 6
+store i64 %r244, i64* %r246
+%r247 = lshr i576 %r243, 64
+%r248 = trunc i576 %r247 to i64
+%r250 = getelementptr i64, i64* %r1, i32 7
+store i64 %r248, i64* %r250
+%r251 = lshr i576 %r247, 64
+%r252 = trunc i576 %r251 to i64
+%r254 = getelementptr i64, i64* %r1, i32 8
+store i64 %r252, i64* %r254
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = load i64, i64* %r3
+%r63 = zext i64 %r62 to i128
+%r65 = getelementptr i64, i64* %r3, i32 1
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i128
+%r68 = shl i128 %r67, 64
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i192
+%r72 = getelementptr i64, i64* %r3, i32 2
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i192
+%r75 = shl i192 %r74, 128
+%r76 = or i192 %r70, %r75
+%r77 = zext i192 %r76 to i256
+%r79 = getelementptr i64, i64* %r3, i32 3
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i256
+%r82 = shl i256 %r81, 192
+%r83 = or i256 %r77, %r82
+%r84 = zext i256 %r83 to i320
+%r86 = getelementptr i64, i64* %r3, i32 4
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i320
+%r89 = shl i320 %r88, 256
+%r90 = or i320 %r84, %r89
+%r91 = zext i320 %r90 to i384
+%r93 = getelementptr i64, i64* %r3, i32 5
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i384
+%r96 = shl i384 %r95, 320
+%r97 = or i384 %r91, %r96
+%r98 = zext i384 %r97 to i448
+%r100 = getelementptr i64, i64* %r3, i32 6
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i448
+%r103 = shl i448 %r102, 384
+%r104 = or i448 %r98, %r103
+%r105 = zext i448 %r104 to i512
+%r107 = getelementptr i64, i64* %r3, i32 7
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i512
+%r110 = shl i512 %r109, 448
+%r111 = or i512 %r105, %r110
+%r112 = zext i512 %r111 to i576
+%r114 = getelementptr i64, i64* %r3, i32 8
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i576
+%r117 = shl i576 %r116, 512
+%r118 = or i576 %r112, %r117
+%r119 = add i576 %r61, %r118
+%r120 = load i64, i64* %r4
+%r121 = zext i64 %r120 to i128
+%r123 = getelementptr i64, i64* %r4, i32 1
+%r124 = load i64, i64* %r123
+%r125 = zext i64 %r124 to i128
+%r126 = shl i128 %r125, 64
+%r127 = or i128 %r121, %r126
+%r128 = zext i128 %r127 to i192
+%r130 = getelementptr i64, i64* %r4, i32 2
+%r131 = load i64, i64* %r130
+%r132 = zext i64 %r131 to i192
+%r133 = shl i192 %r132, 128
+%r134 = or i192 %r128, %r133
+%r135 = zext i192 %r134 to i256
+%r137 = getelementptr i64, i64* %r4, i32 3
+%r138 = load i64, i64* %r137
+%r139 = zext i64 %r138 to i256
+%r140 = shl i256 %r139, 192
+%r141 = or i256 %r135, %r140
+%r142 = zext i256 %r141 to i320
+%r144 = getelementptr i64, i64* %r4, i32 4
+%r145 = load i64, i64* %r144
+%r146 = zext i64 %r145 to i320
+%r147 = shl i320 %r146, 256
+%r148 = or i320 %r142, %r147
+%r149 = zext i320 %r148 to i384
+%r151 = getelementptr i64, i64* %r4, i32 5
+%r152 = load i64, i64* %r151
+%r153 = zext i64 %r152 to i384
+%r154 = shl i384 %r153, 320
+%r155 = or i384 %r149, %r154
+%r156 = zext i384 %r155 to i448
+%r158 = getelementptr i64, i64* %r4, i32 6
+%r159 = load i64, i64* %r158
+%r160 = zext i64 %r159 to i448
+%r161 = shl i448 %r160, 384
+%r162 = or i448 %r156, %r161
+%r163 = zext i448 %r162 to i512
+%r165 = getelementptr i64, i64* %r4, i32 7
+%r166 = load i64, i64* %r165
+%r167 = zext i64 %r166 to i512
+%r168 = shl i512 %r167, 448
+%r169 = or i512 %r163, %r168
+%r170 = zext i512 %r169 to i576
+%r172 = getelementptr i64, i64* %r4, i32 8
+%r173 = load i64, i64* %r172
+%r174 = zext i64 %r173 to i576
+%r175 = shl i576 %r174, 512
+%r176 = or i576 %r170, %r175
+%r177 = sub i576 %r119, %r176
+%r178 = lshr i576 %r177, 575
+%r179 = trunc i576 %r178 to i1
+%r180 = select i1 %r179, i576 %r119, i576 %r177
+%r181 = trunc i576 %r180 to i64
+%r183 = getelementptr i64, i64* %r1, i32 0
+store i64 %r181, i64* %r183
+%r184 = lshr i576 %r180, 64
+%r185 = trunc i576 %r184 to i64
+%r187 = getelementptr i64, i64* %r1, i32 1
+store i64 %r185, i64* %r187
+%r188 = lshr i576 %r184, 64
+%r189 = trunc i576 %r188 to i64
+%r191 = getelementptr i64, i64* %r1, i32 2
+store i64 %r189, i64* %r191
+%r192 = lshr i576 %r188, 64
+%r193 = trunc i576 %r192 to i64
+%r195 = getelementptr i64, i64* %r1, i32 3
+store i64 %r193, i64* %r195
+%r196 = lshr i576 %r192, 64
+%r197 = trunc i576 %r196 to i64
+%r199 = getelementptr i64, i64* %r1, i32 4
+store i64 %r197, i64* %r199
+%r200 = lshr i576 %r196, 64
+%r201 = trunc i576 %r200 to i64
+%r203 = getelementptr i64, i64* %r1, i32 5
+store i64 %r201, i64* %r203
+%r204 = lshr i576 %r200, 64
+%r205 = trunc i576 %r204 to i64
+%r207 = getelementptr i64, i64* %r1, i32 6
+store i64 %r205, i64* %r207
+%r208 = lshr i576 %r204, 64
+%r209 = trunc i576 %r208 to i64
+%r211 = getelementptr i64, i64* %r1, i32 7
+store i64 %r209, i64* %r211
+%r212 = lshr i576 %r208, 64
+%r213 = trunc i576 %r212 to i64
+%r215 = getelementptr i64, i64* %r1, i32 8
+store i64 %r213, i64* %r215
+ret void
+}
+define void @mcl_fp_sub9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = load i64, i64* %r3
+%r63 = zext i64 %r62 to i128
+%r65 = getelementptr i64, i64* %r3, i32 1
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i128
+%r68 = shl i128 %r67, 64
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i192
+%r72 = getelementptr i64, i64* %r3, i32 2
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i192
+%r75 = shl i192 %r74, 128
+%r76 = or i192 %r70, %r75
+%r77 = zext i192 %r76 to i256
+%r79 = getelementptr i64, i64* %r3, i32 3
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i256
+%r82 = shl i256 %r81, 192
+%r83 = or i256 %r77, %r82
+%r84 = zext i256 %r83 to i320
+%r86 = getelementptr i64, i64* %r3, i32 4
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i320
+%r89 = shl i320 %r88, 256
+%r90 = or i320 %r84, %r89
+%r91 = zext i320 %r90 to i384
+%r93 = getelementptr i64, i64* %r3, i32 5
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i384
+%r96 = shl i384 %r95, 320
+%r97 = or i384 %r91, %r96
+%r98 = zext i384 %r97 to i448
+%r100 = getelementptr i64, i64* %r3, i32 6
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i448
+%r103 = shl i448 %r102, 384
+%r104 = or i448 %r98, %r103
+%r105 = zext i448 %r104 to i512
+%r107 = getelementptr i64, i64* %r3, i32 7
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i512
+%r110 = shl i512 %r109, 448
+%r111 = or i512 %r105, %r110
+%r112 = zext i512 %r111 to i576
+%r114 = getelementptr i64, i64* %r3, i32 8
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i576
+%r117 = shl i576 %r116, 512
+%r118 = or i576 %r112, %r117
+%r119 = zext i576 %r61 to i640
+%r120 = zext i576 %r118 to i640
+%r121 = sub i640 %r119, %r120
+%r122 = trunc i640 %r121 to i576
+%r123 = lshr i640 %r121, 576
+%r124 = trunc i640 %r123 to i1
+%r125 = trunc i576 %r122 to i64
+%r127 = getelementptr i64, i64* %r1, i32 0
+store i64 %r125, i64* %r127
+%r128 = lshr i576 %r122, 64
+%r129 = trunc i576 %r128 to i64
+%r131 = getelementptr i64, i64* %r1, i32 1
+store i64 %r129, i64* %r131
+%r132 = lshr i576 %r128, 64
+%r133 = trunc i576 %r132 to i64
+%r135 = getelementptr i64, i64* %r1, i32 2
+store i64 %r133, i64* %r135
+%r136 = lshr i576 %r132, 64
+%r137 = trunc i576 %r136 to i64
+%r139 = getelementptr i64, i64* %r1, i32 3
+store i64 %r137, i64* %r139
+%r140 = lshr i576 %r136, 64
+%r141 = trunc i576 %r140 to i64
+%r143 = getelementptr i64, i64* %r1, i32 4
+store i64 %r141, i64* %r143
+%r144 = lshr i576 %r140, 64
+%r145 = trunc i576 %r144 to i64
+%r147 = getelementptr i64, i64* %r1, i32 5
+store i64 %r145, i64* %r147
+%r148 = lshr i576 %r144, 64
+%r149 = trunc i576 %r148 to i64
+%r151 = getelementptr i64, i64* %r1, i32 6
+store i64 %r149, i64* %r151
+%r152 = lshr i576 %r148, 64
+%r153 = trunc i576 %r152 to i64
+%r155 = getelementptr i64, i64* %r1, i32 7
+store i64 %r153, i64* %r155
+%r156 = lshr i576 %r152, 64
+%r157 = trunc i576 %r156 to i64
+%r159 = getelementptr i64, i64* %r1, i32 8
+store i64 %r157, i64* %r159
+br i1%r124, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r160 = load i64, i64* %r4
+%r161 = zext i64 %r160 to i128
+%r163 = getelementptr i64, i64* %r4, i32 1
+%r164 = load i64, i64* %r163
+%r165 = zext i64 %r164 to i128
+%r166 = shl i128 %r165, 64
+%r167 = or i128 %r161, %r166
+%r168 = zext i128 %r167 to i192
+%r170 = getelementptr i64, i64* %r4, i32 2
+%r171 = load i64, i64* %r170
+%r172 = zext i64 %r171 to i192
+%r173 = shl i192 %r172, 128
+%r174 = or i192 %r168, %r173
+%r175 = zext i192 %r174 to i256
+%r177 = getelementptr i64, i64* %r4, i32 3
+%r178 = load i64, i64* %r177
+%r179 = zext i64 %r178 to i256
+%r180 = shl i256 %r179, 192
+%r181 = or i256 %r175, %r180
+%r182 = zext i256 %r181 to i320
+%r184 = getelementptr i64, i64* %r4, i32 4
+%r185 = load i64, i64* %r184
+%r186 = zext i64 %r185 to i320
+%r187 = shl i320 %r186, 256
+%r188 = or i320 %r182, %r187
+%r189 = zext i320 %r188 to i384
+%r191 = getelementptr i64, i64* %r4, i32 5
+%r192 = load i64, i64* %r191
+%r193 = zext i64 %r192 to i384
+%r194 = shl i384 %r193, 320
+%r195 = or i384 %r189, %r194
+%r196 = zext i384 %r195 to i448
+%r198 = getelementptr i64, i64* %r4, i32 6
+%r199 = load i64, i64* %r198
+%r200 = zext i64 %r199 to i448
+%r201 = shl i448 %r200, 384
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i512
+%r205 = getelementptr i64, i64* %r4, i32 7
+%r206 = load i64, i64* %r205
+%r207 = zext i64 %r206 to i512
+%r208 = shl i512 %r207, 448
+%r209 = or i512 %r203, %r208
+%r210 = zext i512 %r209 to i576
+%r212 = getelementptr i64, i64* %r4, i32 8
+%r213 = load i64, i64* %r212
+%r214 = zext i64 %r213 to i576
+%r215 = shl i576 %r214, 512
+%r216 = or i576 %r210, %r215
+%r217 = add i576 %r122, %r216
+%r218 = trunc i576 %r217 to i64
+%r220 = getelementptr i64, i64* %r1, i32 0
+store i64 %r218, i64* %r220
+%r221 = lshr i576 %r217, 64
+%r222 = trunc i576 %r221 to i64
+%r224 = getelementptr i64, i64* %r1, i32 1
+store i64 %r222, i64* %r224
+%r225 = lshr i576 %r221, 64
+%r226 = trunc i576 %r225 to i64
+%r228 = getelementptr i64, i64* %r1, i32 2
+store i64 %r226, i64* %r228
+%r229 = lshr i576 %r225, 64
+%r230 = trunc i576 %r229 to i64
+%r232 = getelementptr i64, i64* %r1, i32 3
+store i64 %r230, i64* %r232
+%r233 = lshr i576 %r229, 64
+%r234 = trunc i576 %r233 to i64
+%r236 = getelementptr i64, i64* %r1, i32 4
+store i64 %r234, i64* %r236
+%r237 = lshr i576 %r233, 64
+%r238 = trunc i576 %r237 to i64
+%r240 = getelementptr i64, i64* %r1, i32 5
+store i64 %r238, i64* %r240
+%r241 = lshr i576 %r237, 64
+%r242 = trunc i576 %r241 to i64
+%r244 = getelementptr i64, i64* %r1, i32 6
+store i64 %r242, i64* %r244
+%r245 = lshr i576 %r241, 64
+%r246 = trunc i576 %r245 to i64
+%r248 = getelementptr i64, i64* %r1, i32 7
+store i64 %r246, i64* %r248
+%r249 = lshr i576 %r245, 64
+%r250 = trunc i576 %r249 to i64
+%r252 = getelementptr i64, i64* %r1, i32 8
+store i64 %r250, i64* %r252
+ret void
+}
+define void @mcl_fp_subNF9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = load i64, i64* %r3
+%r63 = zext i64 %r62 to i128
+%r65 = getelementptr i64, i64* %r3, i32 1
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i128
+%r68 = shl i128 %r67, 64
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i192
+%r72 = getelementptr i64, i64* %r3, i32 2
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i192
+%r75 = shl i192 %r74, 128
+%r76 = or i192 %r70, %r75
+%r77 = zext i192 %r76 to i256
+%r79 = getelementptr i64, i64* %r3, i32 3
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i256
+%r82 = shl i256 %r81, 192
+%r83 = or i256 %r77, %r82
+%r84 = zext i256 %r83 to i320
+%r86 = getelementptr i64, i64* %r3, i32 4
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i320
+%r89 = shl i320 %r88, 256
+%r90 = or i320 %r84, %r89
+%r91 = zext i320 %r90 to i384
+%r93 = getelementptr i64, i64* %r3, i32 5
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i384
+%r96 = shl i384 %r95, 320
+%r97 = or i384 %r91, %r96
+%r98 = zext i384 %r97 to i448
+%r100 = getelementptr i64, i64* %r3, i32 6
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i448
+%r103 = shl i448 %r102, 384
+%r104 = or i448 %r98, %r103
+%r105 = zext i448 %r104 to i512
+%r107 = getelementptr i64, i64* %r3, i32 7
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i512
+%r110 = shl i512 %r109, 448
+%r111 = or i512 %r105, %r110
+%r112 = zext i512 %r111 to i576
+%r114 = getelementptr i64, i64* %r3, i32 8
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i576
+%r117 = shl i576 %r116, 512
+%r118 = or i576 %r112, %r117
+%r119 = sub i576 %r61, %r118
+%r120 = lshr i576 %r119, 575
+%r121 = trunc i576 %r120 to i1
+%r122 = load i64, i64* %r4
+%r123 = zext i64 %r122 to i128
+%r125 = getelementptr i64, i64* %r4, i32 1
+%r126 = load i64, i64* %r125
+%r127 = zext i64 %r126 to i128
+%r128 = shl i128 %r127, 64
+%r129 = or i128 %r123, %r128
+%r130 = zext i128 %r129 to i192
+%r132 = getelementptr i64, i64* %r4, i32 2
+%r133 = load i64, i64* %r132
+%r134 = zext i64 %r133 to i192
+%r135 = shl i192 %r134, 128
+%r136 = or i192 %r130, %r135
+%r137 = zext i192 %r136 to i256
+%r139 = getelementptr i64, i64* %r4, i32 3
+%r140 = load i64, i64* %r139
+%r141 = zext i64 %r140 to i256
+%r142 = shl i256 %r141, 192
+%r143 = or i256 %r137, %r142
+%r144 = zext i256 %r143 to i320
+%r146 = getelementptr i64, i64* %r4, i32 4
+%r147 = load i64, i64* %r146
+%r148 = zext i64 %r147 to i320
+%r149 = shl i320 %r148, 256
+%r150 = or i320 %r144, %r149
+%r151 = zext i320 %r150 to i384
+%r153 = getelementptr i64, i64* %r4, i32 5
+%r154 = load i64, i64* %r153
+%r155 = zext i64 %r154 to i384
+%r156 = shl i384 %r155, 320
+%r157 = or i384 %r151, %r156
+%r158 = zext i384 %r157 to i448
+%r160 = getelementptr i64, i64* %r4, i32 6
+%r161 = load i64, i64* %r160
+%r162 = zext i64 %r161 to i448
+%r163 = shl i448 %r162, 384
+%r164 = or i448 %r158, %r163
+%r165 = zext i448 %r164 to i512
+%r167 = getelementptr i64, i64* %r4, i32 7
+%r168 = load i64, i64* %r167
+%r169 = zext i64 %r168 to i512
+%r170 = shl i512 %r169, 448
+%r171 = or i512 %r165, %r170
+%r172 = zext i512 %r171 to i576
+%r174 = getelementptr i64, i64* %r4, i32 8
+%r175 = load i64, i64* %r174
+%r176 = zext i64 %r175 to i576
+%r177 = shl i576 %r176, 512
+%r178 = or i576 %r172, %r177
+%r180 = select i1 %r121, i576 %r178, i576 0
+%r181 = add i576 %r119, %r180
+%r182 = trunc i576 %r181 to i64
+%r184 = getelementptr i64, i64* %r1, i32 0
+store i64 %r182, i64* %r184
+%r185 = lshr i576 %r181, 64
+%r186 = trunc i576 %r185 to i64
+%r188 = getelementptr i64, i64* %r1, i32 1
+store i64 %r186, i64* %r188
+%r189 = lshr i576 %r185, 64
+%r190 = trunc i576 %r189 to i64
+%r192 = getelementptr i64, i64* %r1, i32 2
+store i64 %r190, i64* %r192
+%r193 = lshr i576 %r189, 64
+%r194 = trunc i576 %r193 to i64
+%r196 = getelementptr i64, i64* %r1, i32 3
+store i64 %r194, i64* %r196
+%r197 = lshr i576 %r193, 64
+%r198 = trunc i576 %r197 to i64
+%r200 = getelementptr i64, i64* %r1, i32 4
+store i64 %r198, i64* %r200
+%r201 = lshr i576 %r197, 64
+%r202 = trunc i576 %r201 to i64
+%r204 = getelementptr i64, i64* %r1, i32 5
+store i64 %r202, i64* %r204
+%r205 = lshr i576 %r201, 64
+%r206 = trunc i576 %r205 to i64
+%r208 = getelementptr i64, i64* %r1, i32 6
+store i64 %r206, i64* %r208
+%r209 = lshr i576 %r205, 64
+%r210 = trunc i576 %r209 to i64
+%r212 = getelementptr i64, i64* %r1, i32 7
+store i64 %r210, i64* %r212
+%r213 = lshr i576 %r209, 64
+%r214 = trunc i576 %r213 to i64
+%r216 = getelementptr i64, i64* %r1, i32 8
+store i64 %r214, i64* %r216
+ret void
+}
+define void @mcl_fpDbl_add9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r64 = getelementptr i64, i64* %r2, i32 9
+%r65 = load i64, i64* %r64
+%r66 = zext i64 %r65 to i640
+%r67 = shl i640 %r66, 576
+%r68 = or i640 %r62, %r67
+%r69 = zext i640 %r68 to i704
+%r71 = getelementptr i64, i64* %r2, i32 10
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i704
+%r74 = shl i704 %r73, 640
+%r75 = or i704 %r69, %r74
+%r76 = zext i704 %r75 to i768
+%r78 = getelementptr i64, i64* %r2, i32 11
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i768
+%r81 = shl i768 %r80, 704
+%r82 = or i768 %r76, %r81
+%r83 = zext i768 %r82 to i832
+%r85 = getelementptr i64, i64* %r2, i32 12
+%r86 = load i64, i64* %r85
+%r87 = zext i64 %r86 to i832
+%r88 = shl i832 %r87, 768
+%r89 = or i832 %r83, %r88
+%r90 = zext i832 %r89 to i896
+%r92 = getelementptr i64, i64* %r2, i32 13
+%r93 = load i64, i64* %r92
+%r94 = zext i64 %r93 to i896
+%r95 = shl i896 %r94, 832
+%r96 = or i896 %r90, %r95
+%r97 = zext i896 %r96 to i960
+%r99 = getelementptr i64, i64* %r2, i32 14
+%r100 = load i64, i64* %r99
+%r101 = zext i64 %r100 to i960
+%r102 = shl i960 %r101, 896
+%r103 = or i960 %r97, %r102
+%r104 = zext i960 %r103 to i1024
+%r106 = getelementptr i64, i64* %r2, i32 15
+%r107 = load i64, i64* %r106
+%r108 = zext i64 %r107 to i1024
+%r109 = shl i1024 %r108, 960
+%r110 = or i1024 %r104, %r109
+%r111 = zext i1024 %r110 to i1088
+%r113 = getelementptr i64, i64* %r2, i32 16
+%r114 = load i64, i64* %r113
+%r115 = zext i64 %r114 to i1088
+%r116 = shl i1088 %r115, 1024
+%r117 = or i1088 %r111, %r116
+%r118 = zext i1088 %r117 to i1152
+%r120 = getelementptr i64, i64* %r2, i32 17
+%r121 = load i64, i64* %r120
+%r122 = zext i64 %r121 to i1152
+%r123 = shl i1152 %r122, 1088
+%r124 = or i1152 %r118, %r123
+%r125 = load i64, i64* %r3
+%r126 = zext i64 %r125 to i128
+%r128 = getelementptr i64, i64* %r3, i32 1
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i128
+%r131 = shl i128 %r130, 64
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i192
+%r135 = getelementptr i64, i64* %r3, i32 2
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i192
+%r138 = shl i192 %r137, 128
+%r139 = or i192 %r133, %r138
+%r140 = zext i192 %r139 to i256
+%r142 = getelementptr i64, i64* %r3, i32 3
+%r143 = load i64, i64* %r142
+%r144 = zext i64 %r143 to i256
+%r145 = shl i256 %r144, 192
+%r146 = or i256 %r140, %r145
+%r147 = zext i256 %r146 to i320
+%r149 = getelementptr i64, i64* %r3, i32 4
+%r150 = load i64, i64* %r149
+%r151 = zext i64 %r150 to i320
+%r152 = shl i320 %r151, 256
+%r153 = or i320 %r147, %r152
+%r154 = zext i320 %r153 to i384
+%r156 = getelementptr i64, i64* %r3, i32 5
+%r157 = load i64, i64* %r156
+%r158 = zext i64 %r157 to i384
+%r159 = shl i384 %r158, 320
+%r160 = or i384 %r154, %r159
+%r161 = zext i384 %r160 to i448
+%r163 = getelementptr i64, i64* %r3, i32 6
+%r164 = load i64, i64* %r163
+%r165 = zext i64 %r164 to i448
+%r166 = shl i448 %r165, 384
+%r167 = or i448 %r161, %r166
+%r168 = zext i448 %r167 to i512
+%r170 = getelementptr i64, i64* %r3, i32 7
+%r171 = load i64, i64* %r170
+%r172 = zext i64 %r171 to i512
+%r173 = shl i512 %r172, 448
+%r174 = or i512 %r168, %r173
+%r175 = zext i512 %r174 to i576
+%r177 = getelementptr i64, i64* %r3, i32 8
+%r178 = load i64, i64* %r177
+%r179 = zext i64 %r178 to i576
+%r180 = shl i576 %r179, 512
+%r181 = or i576 %r175, %r180
+%r182 = zext i576 %r181 to i640
+%r184 = getelementptr i64, i64* %r3, i32 9
+%r185 = load i64, i64* %r184
+%r186 = zext i64 %r185 to i640
+%r187 = shl i640 %r186, 576
+%r188 = or i640 %r182, %r187
+%r189 = zext i640 %r188 to i704
+%r191 = getelementptr i64, i64* %r3, i32 10
+%r192 = load i64, i64* %r191
+%r193 = zext i64 %r192 to i704
+%r194 = shl i704 %r193, 640
+%r195 = or i704 %r189, %r194
+%r196 = zext i704 %r195 to i768
+%r198 = getelementptr i64, i64* %r3, i32 11
+%r199 = load i64, i64* %r198
+%r200 = zext i64 %r199 to i768
+%r201 = shl i768 %r200, 704
+%r202 = or i768 %r196, %r201
+%r203 = zext i768 %r202 to i832
+%r205 = getelementptr i64, i64* %r3, i32 12
+%r206 = load i64, i64* %r205
+%r207 = zext i64 %r206 to i832
+%r208 = shl i832 %r207, 768
+%r209 = or i832 %r203, %r208
+%r210 = zext i832 %r209 to i896
+%r212 = getelementptr i64, i64* %r3, i32 13
+%r213 = load i64, i64* %r212
+%r214 = zext i64 %r213 to i896
+%r215 = shl i896 %r214, 832
+%r216 = or i896 %r210, %r215
+%r217 = zext i896 %r216 to i960
+%r219 = getelementptr i64, i64* %r3, i32 14
+%r220 = load i64, i64* %r219
+%r221 = zext i64 %r220 to i960
+%r222 = shl i960 %r221, 896
+%r223 = or i960 %r217, %r222
+%r224 = zext i960 %r223 to i1024
+%r226 = getelementptr i64, i64* %r3, i32 15
+%r227 = load i64, i64* %r226
+%r228 = zext i64 %r227 to i1024
+%r229 = shl i1024 %r228, 960
+%r230 = or i1024 %r224, %r229
+%r231 = zext i1024 %r230 to i1088
+%r233 = getelementptr i64, i64* %r3, i32 16
+%r234 = load i64, i64* %r233
+%r235 = zext i64 %r234 to i1088
+%r236 = shl i1088 %r235, 1024
+%r237 = or i1088 %r231, %r236
+%r238 = zext i1088 %r237 to i1152
+%r240 = getelementptr i64, i64* %r3, i32 17
+%r241 = load i64, i64* %r240
+%r242 = zext i64 %r241 to i1152
+%r243 = shl i1152 %r242, 1088
+%r244 = or i1152 %r238, %r243
+%r245 = zext i1152 %r124 to i1216
+%r246 = zext i1152 %r244 to i1216
+%r247 = add i1216 %r245, %r246
+%r248 = trunc i1216 %r247 to i576
+%r249 = trunc i576 %r248 to i64
+%r251 = getelementptr i64, i64* %r1, i32 0
+store i64 %r249, i64* %r251
+%r252 = lshr i576 %r248, 64
+%r253 = trunc i576 %r252 to i64
+%r255 = getelementptr i64, i64* %r1, i32 1
+store i64 %r253, i64* %r255
+%r256 = lshr i576 %r252, 64
+%r257 = trunc i576 %r256 to i64
+%r259 = getelementptr i64, i64* %r1, i32 2
+store i64 %r257, i64* %r259
+%r260 = lshr i576 %r256, 64
+%r261 = trunc i576 %r260 to i64
+%r263 = getelementptr i64, i64* %r1, i32 3
+store i64 %r261, i64* %r263
+%r264 = lshr i576 %r260, 64
+%r265 = trunc i576 %r264 to i64
+%r267 = getelementptr i64, i64* %r1, i32 4
+store i64 %r265, i64* %r267
+%r268 = lshr i576 %r264, 64
+%r269 = trunc i576 %r268 to i64
+%r271 = getelementptr i64, i64* %r1, i32 5
+store i64 %r269, i64* %r271
+%r272 = lshr i576 %r268, 64
+%r273 = trunc i576 %r272 to i64
+%r275 = getelementptr i64, i64* %r1, i32 6
+store i64 %r273, i64* %r275
+%r276 = lshr i576 %r272, 64
+%r277 = trunc i576 %r276 to i64
+%r279 = getelementptr i64, i64* %r1, i32 7
+store i64 %r277, i64* %r279
+%r280 = lshr i576 %r276, 64
+%r281 = trunc i576 %r280 to i64
+%r283 = getelementptr i64, i64* %r1, i32 8
+store i64 %r281, i64* %r283
+%r284 = lshr i1216 %r247, 576
+%r285 = trunc i1216 %r284 to i640
+%r286 = load i64, i64* %r4
+%r287 = zext i64 %r286 to i128
+%r289 = getelementptr i64, i64* %r4, i32 1
+%r290 = load i64, i64* %r289
+%r291 = zext i64 %r290 to i128
+%r292 = shl i128 %r291, 64
+%r293 = or i128 %r287, %r292
+%r294 = zext i128 %r293 to i192
+%r296 = getelementptr i64, i64* %r4, i32 2
+%r297 = load i64, i64* %r296
+%r298 = zext i64 %r297 to i192
+%r299 = shl i192 %r298, 128
+%r300 = or i192 %r294, %r299
+%r301 = zext i192 %r300 to i256
+%r303 = getelementptr i64, i64* %r4, i32 3
+%r304 = load i64, i64* %r303
+%r305 = zext i64 %r304 to i256
+%r306 = shl i256 %r305, 192
+%r307 = or i256 %r301, %r306
+%r308 = zext i256 %r307 to i320
+%r310 = getelementptr i64, i64* %r4, i32 4
+%r311 = load i64, i64* %r310
+%r312 = zext i64 %r311 to i320
+%r313 = shl i320 %r312, 256
+%r314 = or i320 %r308, %r313
+%r315 = zext i320 %r314 to i384
+%r317 = getelementptr i64, i64* %r4, i32 5
+%r318 = load i64, i64* %r317
+%r319 = zext i64 %r318 to i384
+%r320 = shl i384 %r319, 320
+%r321 = or i384 %r315, %r320
+%r322 = zext i384 %r321 to i448
+%r324 = getelementptr i64, i64* %r4, i32 6
+%r325 = load i64, i64* %r324
+%r326 = zext i64 %r325 to i448
+%r327 = shl i448 %r326, 384
+%r328 = or i448 %r322, %r327
+%r329 = zext i448 %r328 to i512
+%r331 = getelementptr i64, i64* %r4, i32 7
+%r332 = load i64, i64* %r331
+%r333 = zext i64 %r332 to i512
+%r334 = shl i512 %r333, 448
+%r335 = or i512 %r329, %r334
+%r336 = zext i512 %r335 to i576
+%r338 = getelementptr i64, i64* %r4, i32 8
+%r339 = load i64, i64* %r338
+%r340 = zext i64 %r339 to i576
+%r341 = shl i576 %r340, 512
+%r342 = or i576 %r336, %r341
+%r343 = zext i576 %r342 to i640
+%r344 = sub i640 %r285, %r343
+%r345 = lshr i640 %r344, 576
+%r346 = trunc i640 %r345 to i1
+%r347 = select i1 %r346, i640 %r285, i640 %r344
+%r348 = trunc i640 %r347 to i576
+%r350 = getelementptr i64, i64* %r1, i32 9
+%r351 = trunc i576 %r348 to i64
+%r353 = getelementptr i64, i64* %r350, i32 0
+store i64 %r351, i64* %r353
+%r354 = lshr i576 %r348, 64
+%r355 = trunc i576 %r354 to i64
+%r357 = getelementptr i64, i64* %r350, i32 1
+store i64 %r355, i64* %r357
+%r358 = lshr i576 %r354, 64
+%r359 = trunc i576 %r358 to i64
+%r361 = getelementptr i64, i64* %r350, i32 2
+store i64 %r359, i64* %r361
+%r362 = lshr i576 %r358, 64
+%r363 = trunc i576 %r362 to i64
+%r365 = getelementptr i64, i64* %r350, i32 3
+store i64 %r363, i64* %r365
+%r366 = lshr i576 %r362, 64
+%r367 = trunc i576 %r366 to i64
+%r369 = getelementptr i64, i64* %r350, i32 4
+store i64 %r367, i64* %r369
+%r370 = lshr i576 %r366, 64
+%r371 = trunc i576 %r370 to i64
+%r373 = getelementptr i64, i64* %r350, i32 5
+store i64 %r371, i64* %r373
+%r374 = lshr i576 %r370, 64
+%r375 = trunc i576 %r374 to i64
+%r377 = getelementptr i64, i64* %r350, i32 6
+store i64 %r375, i64* %r377
+%r378 = lshr i576 %r374, 64
+%r379 = trunc i576 %r378 to i64
+%r381 = getelementptr i64, i64* %r350, i32 7
+store i64 %r379, i64* %r381
+%r382 = lshr i576 %r378, 64
+%r383 = trunc i576 %r382 to i64
+%r385 = getelementptr i64, i64* %r350, i32 8
+store i64 %r383, i64* %r385
+ret void
+}
+define void @mcl_fpDbl_sub9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = zext i512 %r54 to i576
+%r57 = getelementptr i64, i64* %r2, i32 8
+%r58 = load i64, i64* %r57
+%r59 = zext i64 %r58 to i576
+%r60 = shl i576 %r59, 512
+%r61 = or i576 %r55, %r60
+%r62 = zext i576 %r61 to i640
+%r64 = getelementptr i64, i64* %r2, i32 9
+%r65 = load i64, i64* %r64
+%r66 = zext i64 %r65 to i640
+%r67 = shl i640 %r66, 576
+%r68 = or i640 %r62, %r67
+%r69 = zext i640 %r68 to i704
+%r71 = getelementptr i64, i64* %r2, i32 10
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i704
+%r74 = shl i704 %r73, 640
+%r75 = or i704 %r69, %r74
+%r76 = zext i704 %r75 to i768
+%r78 = getelementptr i64, i64* %r2, i32 11
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i768
+%r81 = shl i768 %r80, 704
+%r82 = or i768 %r76, %r81
+%r83 = zext i768 %r82 to i832
+%r85 = getelementptr i64, i64* %r2, i32 12
+%r86 = load i64, i64* %r85
+%r87 = zext i64 %r86 to i832
+%r88 = shl i832 %r87, 768
+%r89 = or i832 %r83, %r88
+%r90 = zext i832 %r89 to i896
+%r92 = getelementptr i64, i64* %r2, i32 13
+%r93 = load i64, i64* %r92
+%r94 = zext i64 %r93 to i896
+%r95 = shl i896 %r94, 832
+%r96 = or i896 %r90, %r95
+%r97 = zext i896 %r96 to i960
+%r99 = getelementptr i64, i64* %r2, i32 14
+%r100 = load i64, i64* %r99
+%r101 = zext i64 %r100 to i960
+%r102 = shl i960 %r101, 896
+%r103 = or i960 %r97, %r102
+%r104 = zext i960 %r103 to i1024
+%r106 = getelementptr i64, i64* %r2, i32 15
+%r107 = load i64, i64* %r106
+%r108 = zext i64 %r107 to i1024
+%r109 = shl i1024 %r108, 960
+%r110 = or i1024 %r104, %r109
+%r111 = zext i1024 %r110 to i1088
+%r113 = getelementptr i64, i64* %r2, i32 16
+%r114 = load i64, i64* %r113
+%r115 = zext i64 %r114 to i1088
+%r116 = shl i1088 %r115, 1024
+%r117 = or i1088 %r111, %r116
+%r118 = zext i1088 %r117 to i1152
+%r120 = getelementptr i64, i64* %r2, i32 17
+%r121 = load i64, i64* %r120
+%r122 = zext i64 %r121 to i1152
+%r123 = shl i1152 %r122, 1088
+%r124 = or i1152 %r118, %r123
+%r125 = load i64, i64* %r3
+%r126 = zext i64 %r125 to i128
+%r128 = getelementptr i64, i64* %r3, i32 1
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i128
+%r131 = shl i128 %r130, 64
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i192
+%r135 = getelementptr i64, i64* %r3, i32 2
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i192
+%r138 = shl i192 %r137, 128
+%r139 = or i192 %r133, %r138
+%r140 = zext i192 %r139 to i256
+%r142 = getelementptr i64, i64* %r3, i32 3
+%r143 = load i64, i64* %r142
+%r144 = zext i64 %r143 to i256
+%r145 = shl i256 %r144, 192
+%r146 = or i256 %r140, %r145
+%r147 = zext i256 %r146 to i320
+%r149 = getelementptr i64, i64* %r3, i32 4
+%r150 = load i64, i64* %r149
+%r151 = zext i64 %r150 to i320
+%r152 = shl i320 %r151, 256
+%r153 = or i320 %r147, %r152
+%r154 = zext i320 %r153 to i384
+%r156 = getelementptr i64, i64* %r3, i32 5
+%r157 = load i64, i64* %r156
+%r158 = zext i64 %r157 to i384
+%r159 = shl i384 %r158, 320
+%r160 = or i384 %r154, %r159
+%r161 = zext i384 %r160 to i448
+%r163 = getelementptr i64, i64* %r3, i32 6
+%r164 = load i64, i64* %r163
+%r165 = zext i64 %r164 to i448
+%r166 = shl i448 %r165, 384
+%r167 = or i448 %r161, %r166
+%r168 = zext i448 %r167 to i512
+%r170 = getelementptr i64, i64* %r3, i32 7
+%r171 = load i64, i64* %r170
+%r172 = zext i64 %r171 to i512
+%r173 = shl i512 %r172, 448
+%r174 = or i512 %r168, %r173
+%r175 = zext i512 %r174 to i576
+%r177 = getelementptr i64, i64* %r3, i32 8
+%r178 = load i64, i64* %r177
+%r179 = zext i64 %r178 to i576
+%r180 = shl i576 %r179, 512
+%r181 = or i576 %r175, %r180
+%r182 = zext i576 %r181 to i640
+%r184 = getelementptr i64, i64* %r3, i32 9
+%r185 = load i64, i64* %r184
+%r186 = zext i64 %r185 to i640
+%r187 = shl i640 %r186, 576
+%r188 = or i640 %r182, %r187
+%r189 = zext i640 %r188 to i704
+%r191 = getelementptr i64, i64* %r3, i32 10
+%r192 = load i64, i64* %r191
+%r193 = zext i64 %r192 to i704
+%r194 = shl i704 %r193, 640
+%r195 = or i704 %r189, %r194
+%r196 = zext i704 %r195 to i768
+%r198 = getelementptr i64, i64* %r3, i32 11
+%r199 = load i64, i64* %r198
+%r200 = zext i64 %r199 to i768
+%r201 = shl i768 %r200, 704
+%r202 = or i768 %r196, %r201
+%r203 = zext i768 %r202 to i832
+%r205 = getelementptr i64, i64* %r3, i32 12
+%r206 = load i64, i64* %r205
+%r207 = zext i64 %r206 to i832
+%r208 = shl i832 %r207, 768
+%r209 = or i832 %r203, %r208
+%r210 = zext i832 %r209 to i896
+%r212 = getelementptr i64, i64* %r3, i32 13
+%r213 = load i64, i64* %r212
+%r214 = zext i64 %r213 to i896
+%r215 = shl i896 %r214, 832
+%r216 = or i896 %r210, %r215
+%r217 = zext i896 %r216 to i960
+%r219 = getelementptr i64, i64* %r3, i32 14
+%r220 = load i64, i64* %r219
+%r221 = zext i64 %r220 to i960
+%r222 = shl i960 %r221, 896
+%r223 = or i960 %r217, %r222
+%r224 = zext i960 %r223 to i1024
+%r226 = getelementptr i64, i64* %r3, i32 15
+%r227 = load i64, i64* %r226
+%r228 = zext i64 %r227 to i1024
+%r229 = shl i1024 %r228, 960
+%r230 = or i1024 %r224, %r229
+%r231 = zext i1024 %r230 to i1088
+%r233 = getelementptr i64, i64* %r3, i32 16
+%r234 = load i64, i64* %r233
+%r235 = zext i64 %r234 to i1088
+%r236 = shl i1088 %r235, 1024
+%r237 = or i1088 %r231, %r236
+%r238 = zext i1088 %r237 to i1152
+%r240 = getelementptr i64, i64* %r3, i32 17
+%r241 = load i64, i64* %r240
+%r242 = zext i64 %r241 to i1152
+%r243 = shl i1152 %r242, 1088
+%r244 = or i1152 %r238, %r243
+%r245 = zext i1152 %r124 to i1216
+%r246 = zext i1152 %r244 to i1216
+%r247 = sub i1216 %r245, %r246
+%r248 = trunc i1216 %r247 to i576
+%r249 = trunc i576 %r248 to i64
+%r251 = getelementptr i64, i64* %r1, i32 0
+store i64 %r249, i64* %r251
+%r252 = lshr i576 %r248, 64
+%r253 = trunc i576 %r252 to i64
+%r255 = getelementptr i64, i64* %r1, i32 1
+store i64 %r253, i64* %r255
+%r256 = lshr i576 %r252, 64
+%r257 = trunc i576 %r256 to i64
+%r259 = getelementptr i64, i64* %r1, i32 2
+store i64 %r257, i64* %r259
+%r260 = lshr i576 %r256, 64
+%r261 = trunc i576 %r260 to i64
+%r263 = getelementptr i64, i64* %r1, i32 3
+store i64 %r261, i64* %r263
+%r264 = lshr i576 %r260, 64
+%r265 = trunc i576 %r264 to i64
+%r267 = getelementptr i64, i64* %r1, i32 4
+store i64 %r265, i64* %r267
+%r268 = lshr i576 %r264, 64
+%r269 = trunc i576 %r268 to i64
+%r271 = getelementptr i64, i64* %r1, i32 5
+store i64 %r269, i64* %r271
+%r272 = lshr i576 %r268, 64
+%r273 = trunc i576 %r272 to i64
+%r275 = getelementptr i64, i64* %r1, i32 6
+store i64 %r273, i64* %r275
+%r276 = lshr i576 %r272, 64
+%r277 = trunc i576 %r276 to i64
+%r279 = getelementptr i64, i64* %r1, i32 7
+store i64 %r277, i64* %r279
+%r280 = lshr i576 %r276, 64
+%r281 = trunc i576 %r280 to i64
+%r283 = getelementptr i64, i64* %r1, i32 8
+store i64 %r281, i64* %r283
+%r284 = lshr i1216 %r247, 576
+%r285 = trunc i1216 %r284 to i576
+%r286 = lshr i1216 %r247, 1152
+%r287 = trunc i1216 %r286 to i1
+%r288 = load i64, i64* %r4
+%r289 = zext i64 %r288 to i128
+%r291 = getelementptr i64, i64* %r4, i32 1
+%r292 = load i64, i64* %r291
+%r293 = zext i64 %r292 to i128
+%r294 = shl i128 %r293, 64
+%r295 = or i128 %r289, %r294
+%r296 = zext i128 %r295 to i192
+%r298 = getelementptr i64, i64* %r4, i32 2
+%r299 = load i64, i64* %r298
+%r300 = zext i64 %r299 to i192
+%r301 = shl i192 %r300, 128
+%r302 = or i192 %r296, %r301
+%r303 = zext i192 %r302 to i256
+%r305 = getelementptr i64, i64* %r4, i32 3
+%r306 = load i64, i64* %r305
+%r307 = zext i64 %r306 to i256
+%r308 = shl i256 %r307, 192
+%r309 = or i256 %r303, %r308
+%r310 = zext i256 %r309 to i320
+%r312 = getelementptr i64, i64* %r4, i32 4
+%r313 = load i64, i64* %r312
+%r314 = zext i64 %r313 to i320
+%r315 = shl i320 %r314, 256
+%r316 = or i320 %r310, %r315
+%r317 = zext i320 %r316 to i384
+%r319 = getelementptr i64, i64* %r4, i32 5
+%r320 = load i64, i64* %r319
+%r321 = zext i64 %r320 to i384
+%r322 = shl i384 %r321, 320
+%r323 = or i384 %r317, %r322
+%r324 = zext i384 %r323 to i448
+%r326 = getelementptr i64, i64* %r4, i32 6
+%r327 = load i64, i64* %r326
+%r328 = zext i64 %r327 to i448
+%r329 = shl i448 %r328, 384
+%r330 = or i448 %r324, %r329
+%r331 = zext i448 %r330 to i512
+%r333 = getelementptr i64, i64* %r4, i32 7
+%r334 = load i64, i64* %r333
+%r335 = zext i64 %r334 to i512
+%r336 = shl i512 %r335, 448
+%r337 = or i512 %r331, %r336
+%r338 = zext i512 %r337 to i576
+%r340 = getelementptr i64, i64* %r4, i32 8
+%r341 = load i64, i64* %r340
+%r342 = zext i64 %r341 to i576
+%r343 = shl i576 %r342, 512
+%r344 = or i576 %r338, %r343
+%r346 = select i1 %r287, i576 %r344, i576 0
+%r347 = add i576 %r285, %r346
+%r349 = getelementptr i64, i64* %r1, i32 9
+%r350 = trunc i576 %r347 to i64
+%r352 = getelementptr i64, i64* %r349, i32 0
+store i64 %r350, i64* %r352
+%r353 = lshr i576 %r347, 64
+%r354 = trunc i576 %r353 to i64
+%r356 = getelementptr i64, i64* %r349, i32 1
+store i64 %r354, i64* %r356
+%r357 = lshr i576 %r353, 64
+%r358 = trunc i576 %r357 to i64
+%r360 = getelementptr i64, i64* %r349, i32 2
+store i64 %r358, i64* %r360
+%r361 = lshr i576 %r357, 64
+%r362 = trunc i576 %r361 to i64
+%r364 = getelementptr i64, i64* %r349, i32 3
+store i64 %r362, i64* %r364
+%r365 = lshr i576 %r361, 64
+%r366 = trunc i576 %r365 to i64
+%r368 = getelementptr i64, i64* %r349, i32 4
+store i64 %r366, i64* %r368
+%r369 = lshr i576 %r365, 64
+%r370 = trunc i576 %r369 to i64
+%r372 = getelementptr i64, i64* %r349, i32 5
+store i64 %r370, i64* %r372
+%r373 = lshr i576 %r369, 64
+%r374 = trunc i576 %r373 to i64
+%r376 = getelementptr i64, i64* %r349, i32 6
+store i64 %r374, i64* %r376
+%r377 = lshr i576 %r373, 64
+%r378 = trunc i576 %r377 to i64
+%r380 = getelementptr i64, i64* %r349, i32 7
+store i64 %r378, i64* %r380
+%r381 = lshr i576 %r377, 64
+%r382 = trunc i576 %r381 to i64
+%r384 = getelementptr i64, i64* %r349, i32 8
+store i64 %r382, i64* %r384
+ret void
+}

From 5fd1dc64ef2ef04014bfadcb3c2ad0c54edf794b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 24 Nov 2020 10:46:58 +0900
Subject: [PATCH 352/553] [java] mk files for android

---
 ffi/java/android/jni/Android.mk     | 39 +++++++++++++++++++++++++++++
 ffi/java/android/jni/Application.mk |  4 +++
 2 files changed, 43 insertions(+)
 create mode 100644 ffi/java/android/jni/Android.mk
 create mode 100644 ffi/java/android/jni/Application.mk

diff --git a/ffi/java/android/jni/Android.mk b/ffi/java/android/jni/Android.mk
new file mode 100644
index 00000000..8e326984
--- /dev/null
+++ b/ffi/java/android/jni/Android.mk
@@ -0,0 +1,39 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_CPP_EXTENSION := .cpp .ll .cxx
+LOCAL_MODULE := mcljava
+
+LOCAL_MCL_DIR := $(LOCAL_PATH)/../../../../
+
+ifeq ($(TARGET_ARCH_ABI),x86_64)
+  MY_BIT := 64
+endif
+ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
+  MY_BIT := 64
+endif
+ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+  MY_BIT := 32
+endif
+ifeq ($(TARGET_ARCH_ABI),x86)
+  MY_BIT := 32
+endif
+ifeq ($(MY_BIT),64)
+  MY_BASE_LL := $(LOCAL_MCL_DIR)/src/base64.ll
+  LOCAL_CPPFLAGS += -DMCL_SIZEOF_UNIT=8
+endif
+ifeq ($(MY_BIT),32)
+  MY_BASE_LL := $(LOCAL_MCL_DIR)/src/base32.ll
+  LOCAL_CPPFLAGS += -DMCL_SIZEOF_UNIT=4
+endif
+LOCAL_SRC_FILES := $(LOCAL_MCL_DIR)/ffi/java/mcl_wrap.cxx $(LOCAL_MCL_DIR)/src/bn_c384_256.cpp $(LOCAL_MCL_DIR)/src/fp.cpp $(MY_BASE_LL)
+LOCAL_C_INCLUDES := $(LOCAL_MCL_DIR)/include $(LOCAL_MCL_DIR)/src $(LOCAL_MCL_DIR)/ffi/java
+LOCAL_CPPFLAGS += -DMCL_DONT_USE_XBYAK
+LOCAL_CPPFLAGS += -O3 -DNDEBUG -fPIC -DMCL_DONT_USE_OPENSSL -DMCL_LLVM_BMI2=0 -DMCL_USE_LLVM=1 -DMCL_USE_VINT -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=384
+LOCAL_CPPFLAGS += -fno-threadsafe-statics
+#LOCAL_CPPFLAGS+=-fno-exceptions -fno-rtti -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -std=c++03
+
+#LOCAL_CPPFLAGS += -DBLS_ETH
+#LOCAL_LDLIBS := -llog #-Wl,--no-warn-shared-textrel
+#include $(BUILD_STATIC_LIBRARY)
+include $(BUILD_SHARED_LIBRARY)
diff --git a/ffi/java/android/jni/Application.mk b/ffi/java/android/jni/Application.mk
new file mode 100644
index 00000000..939b15db
--- /dev/null
+++ b/ffi/java/android/jni/Application.mk
@@ -0,0 +1,4 @@
+APP_ABI := arm64-v8a armeabi-v7a x86_64
+APP_PLATFORM := android-19
+APP_STL := c++_static
+APP_CPPFLAGS := -fexceptions

From 48253e792fac324be555674bd6da5a7eb58c44d7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 30 Nov 2020 09:59:54 +0900
Subject: [PATCH 353/553] [she] add ZKP of dec(c)=0 for CipherTextGT

---
 misc/she/memo.txt | 58 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/misc/she/memo.txt b/misc/she/memo.txt
index 68ebe85e..5ee17097 100644
--- a/misc/she/memo.txt
+++ b/misc/she/memo.txt
@@ -28,4 +28,60 @@ pi = (d, h)
 
 Verifier
 Bi := d Pi - h Ai
-verify h = Hash(P2, A2, A2, B1, B2)
+verify h = Hash(P2, A1, A2, B1, B2)
+-----------------------------------------------------------------------------
+CipherTextGT
+P ; generator of GT
+x1, x2 ; secrect key
+(P0, P1, P2, P3) := (P, x1 P, x2 P, x1 x2 P) ; public information
+
+CipherText c = (A0, A1, A2, A3)
+dec(c) = 0 <=> A0 = x2 A1 + x1 A2 - x1 x2 A3 ; (*)
+
+construction of ZKP for dec(c) = 0, i.e., show (*)
+Prover:
+b1, b2, b3 ; random value
+Bi := bi P for i = 1, 2, 3
+X := b1 A2 + b2 A1 - b3 A3
+h := Hash(P0, ..., P3, A0, ..., A3, B1, B2, B3, X)
+d1 := b1 + h x1
+d2 := b2 + h x2
+d3 := b3 + h x1 x2
+pi := (d1, d2, d3, h)
+
+Verifier:
+(pi, {Pi}, {Ai}) given
+Bi' := di P - h Pi for i = 1, 2, 3
+X' := d1 A2 + d2 A1 - d3 A3 - h A0
+verify Hash({Pi}, {Ai}, {Bi'}, X') = h
+
+Completeness
+
+B1' = d1 P - h P1 = (b1 + h x1) P - h x1 P = b1 P = B1
+B2' = d2 P - h P2 = (b2 + h x2) P - h x2 P = b2 P = B2
+B3' = d3 P - h P3 = (b3 + h x1 x2) P - h x1 x2 P = B3
+X' = (b1 + h x1) A2 + (b2 + h x2) A1 - (b3 + h x1 x2)A3 - h A0
+= b1 A2 + b2 A1 - b3 A3 + h (x1 A2 + x2 A1 - x1 x2 A3 - A0) = b1 A2 + b2 A1 - b3 A3 = X
+OK
+
+Soundness
+{Ai}, pi=(d1, d2, d3, h) ; given
+compute Bi', X' as above
+Suppose Hash({Pi}, {Ai}, {Bi'}, X') = h
+
+define
+b1 := d1 - h x1
+b2 := d2 - h x2
+b3 := d3 - h x1 x2
+where x1, x2 are unknown
+d1, d2, d3 are free parameters, so b1, b2, b3 are also free.
+
+B1' = d1 P - h P1 = b1 P
+B2' = b2 P
+B3' = b3 P
+
+Y := x1 A2 + x2 A1 - x1 x2 A3 - A0 ; unknown, but it is fixed
+X' = b1 A2 + b2 A1 - b3 A3 + h Y
+
+Hash({Pi}, {Ai}, b1 P, b2 P, b3 P, b1 A2 + b2 A1 - b3 A3 + h Y) = h
+To found {b1, b2, b3, h} to hold this equation, Y must be 0.

From d0da379eb27fc0bd2e176d2e755f2c1e986a2365 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 2 Dec 2020 15:20:05 +0900
Subject: [PATCH 354/553] [she] refactor makeHash

---
 include/mcl/she.hpp | 118 +++++++++++++++++---------------------------
 1 file changed, 45 insertions(+), 73 deletions(-)

diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index ddbe86ee..54ec13c6 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -26,6 +26,7 @@
 #include <mcl/window_method.hpp>
 #include <cybozu/endian.hpp>
 #include <cybozu/serializer.hpp>
+#include <cybozu/sha2.hpp>
 #include <mcl/ecparam.hpp>
 
 namespace mcl { namespace she {
@@ -324,63 +325,26 @@ int log(const G& P, const G& xP)
 	}
 	throw cybozu::Exception("she:log:not found");
 }
-// 5
-template<class F, class T0, class T1, class T2, class T3, class T4>
-void makeHash(F& h, char *buf, const size_t bufSize, const T0 *t0, const T1 *t1, const T2 *t2, const T3 *t3, const T4 *t4)
-{
-	cybozu::MemoryOutputStream os(buf, bufSize);
-	t0->save(os);
-	t1->save(os);
-	t2->save(os);
-	t3->save(os);
-	t4->save(os);
-	h.setHashOf(buf, os.getPos());
-}
-// 6
-template<class F, class T0, class T1, class T2, class T3, class T4, class T5>
-void makeHash(F& h, char *buf, const size_t bufSize, const T0 *t0, const T1 *t1, const T2 *t2, const T3 *t3, const T4 *t4, const T5 *t5)
-{
-	cybozu::MemoryOutputStream os(buf, bufSize);
-	t0->save(os);
-	t1->save(os);
-	t2->save(os);
-	t3->save(os);
-	t4->save(os);
-	t5->save(os);
-	h.setHashOf(buf, os.getPos());
-}
-// 8
-template<class F, class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7>
-void makeHash(F& h, char *buf, const size_t bufSize, const T0 *t0, const T1 *t1, const T2 *t2, const T3 *t3, const T4 *t4, const T5 *t5, const T6 *t6, const T7 *t7)
-{
-	cybozu::MemoryOutputStream os(buf, bufSize);
-	t0->save(os);
-	t1->save(os);
-	t2->save(os);
-	t3->save(os);
-	t4->save(os);
-	t5->save(os);
-	t6->save(os);
-	t7->save(os);
-	h.setHashOf(buf, os.getPos());
-}
-// 10
-template<class F, class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
-void makeHash(F& h, char *buf, const size_t bufSize, const T0 *t0, const T1 *t1, const T2 *t2, const T3 *t3, const T4 *t4, const T5 *t5, const T6 *t6, const T7 *t7, const T8 *t8, const T9 *t9)
-{
-	cybozu::MemoryOutputStream os(buf, bufSize);
-	t0->save(os);
-	t1->save(os);
-	t2->save(os);
-	t3->save(os);
-	t4->save(os);
-	t5->save(os);
-	t6->save(os);
-	t7->save(os);
-	t8->save(os);
-	t9->save(os);
-	h.setHashOf(buf, os.getPos());
-}
+
+struct Hash {
+	cybozu::Sha256 h_;
+	template<class T>
+	Hash& operator<<(const T& t)
+	{
+		char buf[sizeof(T)];
+		cybozu::MemoryOutputStream os(buf, sizeof(buf));
+		t.save(os);
+		h_.update(buf, os.getPos());
+		return *this;
+	}
+	template<class F>
+	void get(F& x)
+	{
+		uint8_t md[32];
+		h_.digest(md, sizeof(md), 0, 0);
+		x.setArrayMask(md, sizeof(md));
+	}
+};
 
 } // mcl::she::local
 
@@ -862,8 +826,9 @@ struct SHET {
 			G1::mul(B2, P2, b);
 			Fr& d = zkp.d_[0];
 			Fr& h = zkp.d_[1];
-			char buf[sizeof(G1) * 5];
-			local::makeHash(h, buf, sizeof(buf), &P2, &A1, &A2, &B1, &B2);
+			local::Hash hash;
+			hash << P2 << A1 << A2 << B1 << B2;
+			hash.get(h);
 			Fr::mul(d, h, x_);
 			d += b;
 			return m;
@@ -987,9 +952,10 @@ struct SHET {
 		r.setRand();
 		Pmul.mul(static_cast<I&>(R[0][m]), r); // R[0][m] = r P
 		xPmul.mul(R[1][m], r); // R[1][m] = r xP
-		char buf[sizeof(G) * 2];
 		Fr c;
-		local::makeHash(c, buf, sizeof(buf), &S, &T, &R[0][0], &R[0][1], &R[1][0], &R[1][1]);
+		local::Hash hash;
+		hash << S << T << R[0][0] << R[0][1] << R[1][0] << R[1][1];
+		hash.get(c);
 		d[m] = c - d[1-m];
 		s[m] = r + d[m] * encRand;
 	}
@@ -1019,9 +985,10 @@ struct SHET {
 		G::sub(T2, S, P);
 		G::mul(T2, T2, d[1]);
 		G::sub(R[1][1], T1, T2);
-		char buf[sizeof(G) * 2];
 		Fr c;
-		local::makeHash(c, buf, sizeof(buf), &S, &T, &R[0][0], &R[0][1], &R[1][0], &R[1][1]);
+		local::Hash hash;
+		hash << S << T << R[0][0] << R[0][1] << R[1][0] << R[1][1];
+		hash.get(c);
 		return c == d[0] + d[1];
 	}
 	/*
@@ -1043,12 +1010,13 @@ struct SHET {
 		G2 R3, R4;
 		ElGamalEnc(R1, R2, rm, Pmul, xPmul, &rp);
 		ElGamalEnc(R3, R4, rm, Qmul, yQmul, &rs);
-		char buf[sizeof(G1) * 4 + sizeof(G2) * 4];
 		Fr& c = zkp.d_[0];
 		Fr& sp = zkp.d_[1];
 		Fr& ss = zkp.d_[2];
 		Fr& sm = zkp.d_[3];
-		local::makeHash(c, buf, sizeof(buf), &S1, &T1, &S2, &T2, &R1, &R2, &R3, &R4);
+		local::Hash hash;
+		hash << S1 << T1 << S2 << T2 << R1 << R2 << R3 << R4;
+		hash.get(c);
 		Fr::mul(sp, c, p);
 		sp += rp;
 		Fr::mul(ss, c, s);
@@ -1075,9 +1043,10 @@ struct SHET {
 		R3 -= X2;
 		G2::mul(X2, T2, c);
 		R4 -= X2;
-		char buf[sizeof(G1) * 4 + sizeof(G2) * 4];
 		Fr c2;
-		local::makeHash(c2, buf, sizeof(buf), &S1, &T1, &S2, &T2, &R1, &R2, &R3, &R4);
+		local::Hash hash;
+		hash << S1 << T1 << S2 << T2 << R1 << R2 << R3 << R4;
+		hash.get(c2);
 		return c == c2;
 	}
 	/*
@@ -1121,9 +1090,10 @@ struct SHET {
 		G2 R5, R6;
 		ElGamalEnc(R4, R3, rm, Pmul, xPmul, &rp);
 		ElGamalEnc(R6, R5, rm, Qmul, yQmul, &rs);
-		char buf[sizeof(Fp) * 12];
 		Fr c;
-		local::makeHash(c, buf, sizeof(buf), &S1, &T1, &R1[0], &R1[1], &R2[0], &R2[1], &R3, &R4, &R5, &R6);
+		local::Hash hash;
+		hash << S1 << T1 << R1[0] << R1[1] << R2[0] << R2[1] << R3 << R4 << R5 << R6;
+		hash.get(c);
 		Fr::sub(d[m], c, d[1-m]);
 		Fr::mul(spm[m], d[m], p);
 		spm[m] += rpm;
@@ -1170,9 +1140,10 @@ struct SHET {
 		R5 -= X2;
 		G2::mul(X2, S2, c);
 		R6 -= X2;
-		char buf[sizeof(Fp) * 12];
 		Fr c2;
-		local::makeHash(c2, buf, sizeof(buf), &S1, &T1, &R1[0], &R1[1], &R2[0], &R2[1], &R3, &R4, &R5, &R6);
+		local::Hash hash;
+		hash << S1 << T1 << R1[0] << R1[1] << R2[0] << R2[1] << R3 << R4 << R5 << R6;
+		hash.get(c2);
 		return c == c2;
 	}
 	/*
@@ -1355,9 +1326,10 @@ struct SHET {
 			B1 -= T;
 			G1::mul(T, A2, h);
 			B2 -= T;
-			char buf[sizeof(G1) * 5];
 			Fr h2;
-			local::makeHash(h2, buf, sizeof(buf), &P2, &A1, &A2, &B1, &B2);
+			local::Hash hash;
+			hash << P2 << A1 << A2 << B1 << B2;
+			hash.get(h2);
 			return h == h2;
 		}
 		bool verify(const CipherTextG2& c, const ZkpBin& zkp) const

From b0abe614ae838470d000b6ccc5025803401d4053 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 14 Dec 2020 16:26:00 +0900
Subject: [PATCH 355/553] support m1 mac

---
 Makefile  |  6 +++++-
 common.mk | 21 +++++++++++----------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/Makefile b/Makefile
index b60492d6..10fc5686 100644
--- a/Makefile
+++ b/Makefile
@@ -91,6 +91,10 @@ ifneq ($(CPU),)
   ASM_SRC=$(ASM_SRC_PATH_NAME).s
 endif
 ASM_OBJ=$(OBJ_DIR)/$(CPU).o
+ifeq ($(OS),mac-m1)
+  ASM_SRC=src/base64.ll
+  ASM_OBJ=$(OBJ_DIR)/base64.o
+endif
 BN256_OBJ=$(OBJ_DIR)/bn_c256.o
 BN384_OBJ=$(OBJ_DIR)/bn_c384.o
 BN384_256_OBJ=$(OBJ_DIR)/bn_c384_256.o
@@ -400,7 +404,7 @@ update_cybozulib:
 	cp -a $(addprefix ../cybozulib/,$(wildcard include/cybozu/*.hpp)) include/cybozu/
 
 clean:
-	$(RM) $(LIB_DIR)/*.a $(LIB_DIR)/*.$(LIB_SUF) $(OBJ_DIR)/*.o $(OBJ_DIR)/*.obj $(OBJ_DIR)/*.d $(EXE_DIR)/*.exe $(GEN_EXE) $(ASM_OBJ) $(LIB_OBJ) $(BN256_OBJ) $(BN384_OBJ) $(BN512_OBJ) $(FUNC_LIST) src/*.ll lib/*.a src/static_code.asm src/dump_code
+	$(RM) $(LIB_DIR)/*.a $(LIB_DIR)/*.$(LIB_SUF) $(OBJ_DIR)/*.o $(OBJ_DIR)/*.obj $(OBJ_DIR)/*.d $(EXE_DIR)/*.exe $(GEN_EXE) $(ASM_OBJ) $(LIB_OBJ) $(BN256_OBJ) $(BN384_OBJ) $(BN512_OBJ) $(FUNC_LIST) lib/*.a src/static_code.asm src/dump_code
 
 ALL_SRC=$(SRC_SRC) $(TEST_SRC) $(SAMPLE_SRC)
 DEPEND_FILE=$(addprefix $(OBJ_DIR)/, $(addsuffix .d,$(basename $(ALL_SRC))))
diff --git a/common.mk b/common.mk
index 29cd6505..707aa35f 100644
--- a/common.mk
+++ b/common.mk
@@ -1,5 +1,6 @@
 GCC_VER=$(shell $(PRE)$(CC) -dumpversion)
 UNAME_S=$(shell uname -s)
+ARCH?=$(shell uname -m)
 NASM_ELF_OPT=-felf64
 ifeq ($(UNAME_S),Linux)
   OS=Linux
@@ -13,8 +14,11 @@ ifeq ($(findstring CYGWIN,$(UNAME_S)),CYGWIN)
   OS=cygwin
 endif
 ifeq ($(UNAME_S),Darwin)
-  OS=mac
-  ARCH=x86_64
+  ifeq ($(ARCH),x86_64)
+    OS=mac
+  else
+    OS=mac-m1
+  endif
   LIB_SUF=dylib
   OPENSSL_DIR?=/usr/local/opt/openssl
   CFLAGS+=-I$(OPENSSL_DIR)/include
@@ -39,7 +43,6 @@ ifeq ($(UNAME_S),FreeBSD)
   LDFLAGS+=-L/usr/local/lib
 endif
 
-ARCH?=$(shell uname -m)
 ifneq ($(findstring $(ARCH),x86_64/amd64),)
   CPU=x86-64
   INTEL=1
@@ -63,11 +66,12 @@ ifneq ($(findstring $(ARCH),armv7l/armv6l),)
   BIT=32
   #LOW_ASM_SRC=src/asm/low_arm.s
 endif
-ifeq ($(ARCH),aarch64)
+#ifeq ($(ARCH),aarch64)
+ifneq ($(findstring $(ARCH),aarch64/arm64),)
   CPU=aarch64
   BIT=64
 endif
-ifeq ($(findstring $(OS),mac/mingw64/openbsd),)
+ifeq ($(findstring $(OS),mac/mac-m1/mingw64/openbsd),)
   LDFLAGS+=-lrt
 endif
 
@@ -111,11 +115,8 @@ CFLAGS+=$(CFLAGS_OPT_USER)
 endif
 CFLAGS+=$(CFLAGS_USER)
 MCL_USE_GMP?=1
-ifeq ($(OS),mac)
-  ifeq ($(shell sw_vers -productVersion),10.15)
-    # workaround because of GMP does not run well on Catalina
-    MCL_USE_GMP=0
-  endif
+ifneq ($(OS),mac/mac-m1,)
+  MCL_USE_GMP=0
 endif
 MCL_USE_OPENSSL?=0
 ifeq ($(MCL_USE_GMP),0)

From 250f7bddbc0a18dc4170b649588884a3ba725e9a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 14 Dec 2020 16:28:32 +0900
Subject: [PATCH 356/553] v1.28

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 72e40a32..3f7bd233 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x127; /* 0xABC = A.BC */
+static const int version = 0x128; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index c3f175dd..523f50c6 100644
--- a/readme.md
+++ b/readme.md
@@ -328,6 +328,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2020/Nov/14 v1.28 support M1 mac
 - 2020/Jun/07 v1.22 remove old hash-to-curve functions
 - 2020/Jun/04 v1.21 mapToG1 and hashAndMapToG1 are compatible to irtf/eip-2537
 - 2020/May/13 v1.09 support draft-irtf-cfrg-hash-to-curve-07

From 54cf1ca2bcb53c0cc2b2492c8f7380455851b619 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 14 Dec 2020 17:50:57 +0900
Subject: [PATCH 357/553] reduce benchmark time

---
 test/ec_test.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/ec_test.cpp b/test/ec_test.cpp
index f5447140..2e3615cd 100644
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@@ -600,9 +600,9 @@ void mulVec(const mcl::EcParam& para)
 		naiveMulVec(Q1, xVec, yVec, n);
 		Ec::mulVec(Q2, xVec, yVec, n);
 		CYBOZU_TEST_EQUAL(Q1, Q2);
-#ifndef NDEBUG
+#ifdef NDEBUG
 		printf("n=%zd\n", n);
-		const int C = 400;
+		const int C = 50;
 		CYBOZU_BENCH_C("naive ", C, naiveMulVec, Q1, xVec, yVec, n);
 		CYBOZU_BENCH_C("mulVec", C, Ec::mulVec, Q1, xVec, yVec, n);
 #endif

From 4407af6e330e0d90728576c23973d4c8e4b4992b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 14 Dec 2020 20:33:44 +0900
Subject: [PATCH 358/553] update doc

---
 readme.md | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/readme.md b/readme.md
index 523f50c6..7e645e7a 100644
--- a/readme.md
+++ b/readme.md
@@ -10,26 +10,18 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+- support M1 mac
 - dst for mapToG1 has changed to `BLS_SIG_BLS12381G1_XMD:SHA-256_SSWU_RO_POP_`.
 - `mclBn_eth*` functions are removed.
 - `mcl::bn::mapToG1(G1& out, const Fp& v)` supports `BLS12_MAP_FP_TO_G1` in [EIP 2537](https://eips.ethereum.org/EIPS/eip-2537).
 - `mcl::bn::hashAndMapToG1(G1& out, const void *msg, size_t msgSize)` supports ([hash-to-curve-09 BLS12381G1_XMD:SHA-256_SSWU_RO_](https://www.ietf.org/id/draft-irtf-cfrg-hash-to-curve-09.html#name-bls12381g1_xmdsha-256_sswu_))
 - `MCL_MAP_TO_MODE_HASH_TO_CURVE_07` is added for [hash-to-curve-draft-07](https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/07/).
-  - The older version will be removed in the future.
-- change DST of hash-to-curve for `MCL_MAP_TO_MODE_HASH_TO_CURVE_06`.
-- add new hash-to-curve function of [hash-to-curve-draft-06](https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/06/) at March 2020.
-  - call `setETHmode(MCL_MAP_TO_MODE_HASH_TO_CURVE_06);`
-  - The older `MAP_TO_MODE` will be removed after the draft is fixed.
-- add new hash functions corresponding to python-impl of [algorand/bls_sig_ref](https://github.com/algorand/bls_sigs_ref).
-  - `mclBn_ethMsgToFp2`(resp. `Hp2`)
-  - `mclBn_ethFp2ToG2`(resp. `opt_swu2_map`)
-  - `mclBn_ethMsgToG2`(resp. `map2curve_osswu2`)
 
 # Support architecture
 
 - x86-64 Windows + Visual Studio
 - x86, x86-64 Linux + gcc/clang
-- x86-64 macOS
+- x86-64, M1 macOS
 - ARM / ARM64 Linux
 - WebAssembly
 - Android

From 27f51cb987bf840421564afe2d9b637f6a125fab Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Dec 2020 10:21:31 +0900
Subject: [PATCH 359/553] update zkp for dec(c) = 0

---
 misc/she/memo.txt | 52 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/misc/she/memo.txt b/misc/she/memo.txt
index 5ee17097..8c40b78b 100644
--- a/misc/she/memo.txt
+++ b/misc/she/memo.txt
@@ -31,18 +31,46 @@ Bi := d Pi - h Ai
 verify h = Hash(P2, A1, A2, B1, B2)
 -----------------------------------------------------------------------------
 CipherTextGT
-P ; generator of GT
+P ; generator of GT, GT=<P>
 x1, x2 ; secrect key
 (P0, P1, P2, P3) := (P, x1 P, x2 P, x1 x2 P) ; public information
 
 CipherText c = (A0, A1, A2, A3)
 dec(c) = 0 <=> A0 = x2 A1 + x1 A2 - x1 x2 A3 ; (*)
 
-construction of ZKP for dec(c) = 0, i.e., show (*)
+F(a1, a2, a3) := a2 A1 + a1 A2 - a3 A3
+
+dec(c) = 0 <=> A0 = F(x1, x2, x1 x2)
+
+Sigma-protocol for dec(c) = 0, i.e., show (*)
+
+Prover:
+  b1, b2, b3 ; rand
+  Bi := bi P (i = 1, 2, 3)
+  X := F(b1, b2, b3)
+  send (B1, B2, B3, X) to Verfier
+
+Verifier:
+  takes h randomly and send to Prover
+
+Prover:
+  d1 := b1 + h x1
+  d2 := b2 + h x2
+  d3 := b3 + h x1 x2
+  send (d1, d2, d3) to Verifier
+
+Verifier:
+  verify
+    di P = Bi + h Pi (i = 1, 2, 3)
+    X = F(d1, d2, d3) - h A0
+    and accept it
+
+Fiat-Shamir transform:
+
 Prover:
 b1, b2, b3 ; random value
-Bi := bi P for i = 1, 2, 3
-X := b1 A2 + b2 A1 - b3 A3
+Bi := bi P (i = 1, 2, 3)
+X := F(b1, b2, b3)
 h := Hash(P0, ..., P3, A0, ..., A3, B1, B2, B3, X)
 d1 := b1 + h x1
 d2 := b2 + h x2
@@ -52,7 +80,7 @@ pi := (d1, d2, d3, h)
 Verifier:
 (pi, {Pi}, {Ai}) given
 Bi' := di P - h Pi for i = 1, 2, 3
-X' := d1 A2 + d2 A1 - d3 A3 - h A0
+X' := F(d1, d2, d3) - h A0
 verify Hash({Pi}, {Ai}, {Bi'}, X') = h
 
 Completeness
@@ -60,8 +88,10 @@ Completeness
 B1' = d1 P - h P1 = (b1 + h x1) P - h x1 P = b1 P = B1
 B2' = d2 P - h P2 = (b2 + h x2) P - h x2 P = b2 P = B2
 B3' = d3 P - h P3 = (b3 + h x1 x2) P - h x1 x2 P = B3
-X' = (b1 + h x1) A2 + (b2 + h x2) A1 - (b3 + h x1 x2)A3 - h A0
-= b1 A2 + b2 A1 - b3 A3 + h (x1 A2 + x2 A1 - x1 x2 A3 - A0) = b1 A2 + b2 A1 - b3 A3 = X
+X' = F(b1 + h x1, b2 + h x2, b3 + h x1 x2) - h A0
+ = F(b1, b2, b3) + h F(x1, x2, x1 x2) - h A0
+ = F(b1, b2, b3) + h (F(x1, x2, x1 x2) - A0)
+ = F(b1, b2, b3) = X
 OK
 
 Soundness
@@ -80,8 +110,10 @@ B1' = d1 P - h P1 = b1 P
 B2' = b2 P
 B3' = b3 P
 
-Y := x1 A2 + x2 A1 - x1 x2 A3 - A0 ; unknown, but it is fixed
-X' = b1 A2 + b2 A1 - b3 A3 + h Y
+Y := F(x1, x2, x1 x2) - A0; unknown, but it is fixed
+X' = F(d1, d2, d3) - h A0 = F(b1 + h x1, b2 + h x2, b3 + h x1 x2) - h A0
+ = F(b1, b2, b3) + h(F(x1, x2, x1 x2) - A0)
+ = F(b1, b2, b3) + h Y
 
-Hash({Pi}, {Ai}, b1 P, b2 P, b3 P, b1 A2 + b2 A1 - b3 A3 + h Y) = h
+Hash({Pi}, {Ai}, b1 P, b2 P, b3 P, F(b1, b2, b3) + h Y) = h
 To found {b1, b2, b3, h} to hold this equation, Y must be 0.

From 58836c5ddc6391ac943cbf8d8e8ea3cb016e70cb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Dec 2020 15:17:36 +0900
Subject: [PATCH 360/553] [she] add decWithZkpDec for CipherGT

---
 include/mcl/she.hpp | 108 +++++++++++++++++++++++++++++++++++++++++++-
 test/she_test.cpp   |  23 +++++++++-
 2 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index 54ec13c6..4b7fe0f2 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -370,6 +370,25 @@ struct SHET {
 	static bool useDecG1ViaGT_;
 	static bool useDecG2ViaGT_;
 	static bool isG1only_;
+	/*
+		auxiliary for ZkpDecGT
+		@note GT is multiplicative group though treating GT as additive group in comment
+	*/
+	struct AuxiliaryForZkpDecGT {
+		GT P[4]; // [R = e(P, Q), xR, yR, xyR]
+
+		// dst = v[1] a[0] + v[0] a[1] - v[2] a[2]
+		void f(GT& dst, const GT *v, const Fr *a) const
+		{
+			GT t;
+			GT::pow(dst, v[0], a[1]);
+			GT::pow(t, v[1], a[0]);
+			dst *= t;
+			GT::pow(t, v[2], a[2]);
+			GT::unitaryInv(t, t);
+			dst *= t;
+		}
+	};
 private:
 	template<class G>
 	class CipherTextAT : public fp::Serializable<CipherTextAT<G> > {
@@ -566,6 +585,7 @@ struct SHET {
 	struct ZkpEqTag; // d_[] = { c, sp, ss, sm }
 	struct ZkpBinEqTag; // d_[] = { d0, d1, sp0, sp1, ss, sp, sm }
 	struct ZkpDecTag; // d_[] = { c, h }
+	struct ZkpDecGTTag; // d_[] = { d1, d2, d3, h }
 public:
 	/*
 		Zkp for m = 0 or 1
@@ -580,9 +600,13 @@ struct SHET {
 	*/
 	typedef ZkpT<ZkpBinEqTag, 7> ZkpBinEq;
 	/*
-		Zkp for Dec(c) = m
+		Zkp for Dec(c) = m for c in G1
 	*/
 	typedef ZkpT<ZkpDecTag, 2> ZkpDec;
+	/*
+		Zkp for Dec(c) = m for c in GT
+	*/
+	typedef ZkpT<ZkpDecGTTag, 4> ZkpDecGT;
 
 	typedef CipherTextAT<G1> CipherTextG1;
 	typedef CipherTextAT<G2> CipherTextG2;
@@ -833,6 +857,42 @@ struct SHET {
 			d += b;
 			return m;
 		}
+		// @note GT is multiplicative group though treating GT as additive group in comment
+		int64_t decWithZkpDec(bool *pok, ZkpDecGT& zkp, const CipherTextGT& c, const AuxiliaryForZkpDecGT& aux) const
+		{
+			int64_t m = dec(c, pok);
+			if (!*pok) return 0;
+			// A = c - Enc(m; 0, 0, 0) = c - (m R, 0, 0, 0)
+			GT A[4];
+			GT t;
+			GT::pow(t, aux.P[0], m); // m R
+			GT::unitaryInv(t, t);
+			GT::mul(A[0], c.g_[0], t);
+			A[1] = c.g_[1];
+			A[2] = c.g_[2];
+			A[3] = c.g_[3];
+			// dec(A) = 0
+
+			Fr b[3];
+			GT B[3], X;
+			for (int i = 0; i < 3; i++) {
+				b[i].setByCSPRNG();
+				GT::pow(B[i], aux.P[0], b[i]);
+			}
+			aux.f(X, A + 1, b);
+			local::Hash hash;
+			hash << aux.P[1] << aux.P[2] << aux.P[3] << A[0] << A[1] << A[2] << A[3] << B[0] << B[1] << B[2] << X;
+			Fr *d = &zkp.d_[0];
+			Fr &h = zkp.d_[3];
+			hash.get(h);
+			Fr::mul(d[0], h, x_); // h x
+			Fr::mul(d[1], h, y_); // h y
+			Fr::mul(d[2], d[1], x_); // h xy
+			for (int i = 0; i < 3; i++) {
+				d[i] += b[i];
+			}
+			return m;
+		}
 		int64_t decWithZkpDec(ZkpDec& zkp, const CipherTextG1& c, const PublicKey& pub) const
 		{
 			bool b;
@@ -840,6 +900,13 @@ struct SHET {
 			if (!b) throw cybozu::Exception("she:SecretKey:decWithZkpDec");
 			return ret;
 		}
+		int64_t decWithZkpDec(ZkpDecGT& zkp, const CipherTextGT& c, const AuxiliaryForZkpDecGT& aux) const
+		{
+			bool b;
+			int64_t ret = decWithZkpDec(&b, zkp, c, aux);
+			if (!b) throw cybozu::Exception("she:SecretKey:decWithZkpDec");
+			return ret;
+		}
 		template<class InputStream>
 		void load(bool *pb, InputStream& is, int ioMode = IoSerialize)
 		{
@@ -1284,6 +1351,13 @@ struct SHET {
 			ElGamalEnc(c.S_, c.T_, m, QhashTbl_.getWM(), yQmul);
 		}
 public:
+		void getAuxiliaryForZkpDecGT(AuxiliaryForZkpDecGT& aux) const
+		{
+			aux.P[0] = ePQ_;
+			pairing(aux.P[1], xP_, Q_);
+			pairing(aux.P[2], P_, yQ_);
+			pairing(aux.P[3], xP_, yQ_);
+		}
 		void encWithZkpBin(CipherTextG1& c, ZkpBin& zkp, int m) const
 		{
 			Fr encRand;
@@ -1332,6 +1406,36 @@ struct SHET {
 			hash.get(h2);
 			return h == h2;
 		}
+		bool verify(const CipherTextGT& c, int64_t m, const ZkpDecGT& zkp, const AuxiliaryForZkpDecGT& aux) const
+		{
+			const Fr *d = &zkp.d_[0];
+			const Fr &h = zkp.d_[3];
+
+			GT A[4];
+			GT t;
+			GT::pow(t, aux.P[0], m); // m R
+			GT::unitaryInv(t, t);
+			GT::mul(A[0], c.g_[0], t);
+			A[1] = c.g_[1];
+			A[2] = c.g_[2];
+			A[3] = c.g_[3];
+			GT B[3], X;
+			for (int i = 0; i < 3; i++) {
+				GT::pow(B[i], aux.P[0], d[i]);
+				GT::pow(t, aux.P[i+1], h);
+				GT::unitaryInv(t, t);
+				B[i] *= t;
+			}
+			aux.f(X, A + 1, zkp.d_);
+			GT::pow(t, A[0], h);
+			GT::unitaryInv(t, t);
+			X *= t;
+			local::Hash hash;
+			hash << aux.P[1] << aux.P[2] << aux.P[3] << A[0] << A[1] << A[2] << A[3] << B[0] << B[1] << B[2] << X;
+			Fr h2;
+			hash.get(h2);
+			return h == h2;
+		}
 		bool verify(const CipherTextG2& c, const ZkpBin& zkp) const
 		{
 			const MulG<G2> yQmul(yQ_);
@@ -1941,6 +2045,8 @@ typedef SHE::ZkpBin ZkpBin;
 typedef SHE::ZkpEq ZkpEq;
 typedef SHE::ZkpBinEq ZkpBinEq;
 typedef SHE::ZkpDec ZkpDec;
+typedef SHE::AuxiliaryForZkpDecGT AuxiliaryForZkpDecGT;
+typedef SHE::ZkpDecGT ZkpDecGT;
 
 inline void init(const mcl::CurveParam& cp = mcl::BN254, size_t hashSize = 1024, size_t tryNum = local::defaultTryNum)
 {
diff --git a/test/she_test.cpp b/test/she_test.cpp
index f7095ec4..9165a36f 100644
--- a/test/she_test.cpp
+++ b/test/she_test.cpp
@@ -331,7 +331,7 @@ CYBOZU_TEST_AUTO(ZkpBinEq)
 	ZkpBinEqTest(sec, ppub);
 }
 
-CYBOZU_TEST_AUTO(ZkpDec)
+CYBOZU_TEST_AUTO(ZkpDecG1)
 {
 	const SecretKey& sec = g_sec;
 	PublicKey pub;
@@ -350,6 +350,27 @@ CYBOZU_TEST_AUTO(ZkpDec)
 	CYBOZU_TEST_ASSERT(!pub.verify(c, m, zkp));
 }
 
+CYBOZU_TEST_AUTO(ZkpDecGT)
+{
+	const SecretKey& sec = g_sec;
+	PublicKey pub;
+	sec.getPublicKey(pub);
+	AuxiliaryForZkpDecGT aux;
+	pub.getAuxiliaryForZkpDecGT(aux);
+	CipherTextGT c;
+	int m = 123;
+	pub.enc(c, m);
+	ZkpDecGT zkp;
+	CYBOZU_TEST_EQUAL(sec.decWithZkpDec(zkp, c, aux), m);
+	CYBOZU_TEST_ASSERT(pub.verify(c, m, zkp, aux));
+	CYBOZU_TEST_ASSERT(!pub.verify(c, m + 1, zkp, aux));
+	CipherTextGT c2;
+	pub.enc(c2, m);
+	CYBOZU_TEST_ASSERT(!pub.verify(c2, m, zkp, aux));
+	zkp.d_[0] += 1;
+	CYBOZU_TEST_ASSERT(!pub.verify(c, m, zkp, aux));
+}
+
 CYBOZU_TEST_AUTO(add_sub_mul)
 {
 	const SecretKey& sec = g_sec;

From 3374bec58bc1eebb8bfc1b745e88367dc0a76e97 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Dec 2020 15:38:51 +0900
Subject: [PATCH 361/553] [she] fix size of sheZkpDec

---
 include/mcl/she.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/she.h b/include/mcl/she.h
index 84a25a72..8a17150e 100644
--- a/include/mcl/she.h
+++ b/include/mcl/she.h
@@ -76,7 +76,7 @@ typedef struct {
 } sheZkpBinEq;
 
 typedef struct {
-	mclBnFr d[7];
+	mclBnFr d[2];
 } sheZkpDec;
 /*
 	initialize this library

From 0d123843dfc0b5224566d55e90d02545620300d5 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Dec 2020 16:03:54 +0900
Subject: [PATCH 362/553] [she] move PublicKey::verify to Aux::verify

---
 include/mcl/she.hpp | 113 ++++++++++++++++++++++----------------------
 test/she_test.cpp   |   8 ++--
 2 files changed, 61 insertions(+), 60 deletions(-)

diff --git a/include/mcl/she.hpp b/include/mcl/she.hpp
index 4b7fe0f2..fd9dba8a 100644
--- a/include/mcl/she.hpp
+++ b/include/mcl/she.hpp
@@ -370,25 +370,6 @@ struct SHET {
 	static bool useDecG1ViaGT_;
 	static bool useDecG2ViaGT_;
 	static bool isG1only_;
-	/*
-		auxiliary for ZkpDecGT
-		@note GT is multiplicative group though treating GT as additive group in comment
-	*/
-	struct AuxiliaryForZkpDecGT {
-		GT P[4]; // [R = e(P, Q), xR, yR, xyR]
-
-		// dst = v[1] a[0] + v[0] a[1] - v[2] a[2]
-		void f(GT& dst, const GT *v, const Fr *a) const
-		{
-			GT t;
-			GT::pow(dst, v[0], a[1]);
-			GT::pow(t, v[1], a[0]);
-			dst *= t;
-			GT::pow(t, v[2], a[2]);
-			GT::unitaryInv(t, t);
-			dst *= t;
-		}
-	};
 private:
 	template<class G>
 	class CipherTextAT : public fp::Serializable<CipherTextAT<G> > {
@@ -610,6 +591,55 @@ struct SHET {
 
 	typedef CipherTextAT<G1> CipherTextG1;
 	typedef CipherTextAT<G2> CipherTextG2;
+	/*
+		auxiliary for ZkpDecGT
+		@note GT is multiplicative group though treating GT as additive group in comment
+	*/
+	struct AuxiliaryForZkpDecGT {
+		GT R_[4]; // [R = e(R, Q), xR, yR, xyR]
+
+		// dst = v[1] a[0] + v[0] a[1] - v[2] a[2]
+		void f(GT& dst, const GT *v, const Fr *a) const
+		{
+			GT t;
+			GT::pow(dst, v[0], a[1]);
+			GT::pow(t, v[1], a[0]);
+			dst *= t;
+			GT::pow(t, v[2], a[2]);
+			GT::unitaryInv(t, t);
+			dst *= t;
+		}
+		bool verify(const CipherTextGT& c, int64_t m, const ZkpDecGT& zkp) const
+		{
+			const Fr *d = &zkp.d_[0];
+			const Fr &h = zkp.d_[3];
+
+			GT A[4];
+			GT t;
+			GT::pow(t, R_[0], m); // m R
+			GT::unitaryInv(t, t);
+			GT::mul(A[0], c.g_[0], t);
+			A[1] = c.g_[1];
+			A[2] = c.g_[2];
+			A[3] = c.g_[3];
+			GT B[3], X;
+			for (int i = 0; i < 3; i++) {
+				GT::pow(B[i], R_[0], d[i]);
+				GT::pow(t, R_[i+1], h);
+				GT::unitaryInv(t, t);
+				B[i] *= t;
+			}
+			f(X, A + 1, zkp.d_);
+			GT::pow(t, A[0], h);
+			GT::unitaryInv(t, t);
+			X *= t;
+			local::Hash hash;
+			hash << R_[1] << R_[2] << R_[3] << A[0] << A[1] << A[2] << A[3] << B[0] << B[1] << B[2] << X;
+			Fr h2;
+			hash.get(h2);
+			return h == h2;
+		}
+	};
 
 	static void init(const mcl::CurveParam& cp = mcl::BN254, size_t hashSize = 1024, size_t tryNum = local::defaultTryNum)
 	{
@@ -865,7 +895,7 @@ struct SHET {
 			// A = c - Enc(m; 0, 0, 0) = c - (m R, 0, 0, 0)
 			GT A[4];
 			GT t;
-			GT::pow(t, aux.P[0], m); // m R
+			GT::pow(t, aux.R_[0], m); // m R
 			GT::unitaryInv(t, t);
 			GT::mul(A[0], c.g_[0], t);
 			A[1] = c.g_[1];
@@ -877,11 +907,11 @@ struct SHET {
 			GT B[3], X;
 			for (int i = 0; i < 3; i++) {
 				b[i].setByCSPRNG();
-				GT::pow(B[i], aux.P[0], b[i]);
+				GT::pow(B[i], aux.R_[0], b[i]);
 			}
 			aux.f(X, A + 1, b);
 			local::Hash hash;
-			hash << aux.P[1] << aux.P[2] << aux.P[3] << A[0] << A[1] << A[2] << A[3] << B[0] << B[1] << B[2] << X;
+			hash << aux.R_[1] << aux.R_[2] << aux.R_[3] << A[0] << A[1] << A[2] << A[3] << B[0] << B[1] << B[2] << X;
 			Fr *d = &zkp.d_[0];
 			Fr &h = zkp.d_[3];
 			hash.get(h);
@@ -1353,10 +1383,10 @@ struct SHET {
 public:
 		void getAuxiliaryForZkpDecGT(AuxiliaryForZkpDecGT& aux) const
 		{
-			aux.P[0] = ePQ_;
-			pairing(aux.P[1], xP_, Q_);
-			pairing(aux.P[2], P_, yQ_);
-			pairing(aux.P[3], xP_, yQ_);
+			aux.R_[0] = ePQ_;
+			pairing(aux.R_[1], xP_, Q_);
+			pairing(aux.R_[2], P_, yQ_);
+			pairing(aux.R_[3], xP_, yQ_);
 		}
 		void encWithZkpBin(CipherTextG1& c, ZkpBin& zkp, int m) const
 		{
@@ -1406,36 +1436,6 @@ struct SHET {
 			hash.get(h2);
 			return h == h2;
 		}
-		bool verify(const CipherTextGT& c, int64_t m, const ZkpDecGT& zkp, const AuxiliaryForZkpDecGT& aux) const
-		{
-			const Fr *d = &zkp.d_[0];
-			const Fr &h = zkp.d_[3];
-
-			GT A[4];
-			GT t;
-			GT::pow(t, aux.P[0], m); // m R
-			GT::unitaryInv(t, t);
-			GT::mul(A[0], c.g_[0], t);
-			A[1] = c.g_[1];
-			A[2] = c.g_[2];
-			A[3] = c.g_[3];
-			GT B[3], X;
-			for (int i = 0; i < 3; i++) {
-				GT::pow(B[i], aux.P[0], d[i]);
-				GT::pow(t, aux.P[i+1], h);
-				GT::unitaryInv(t, t);
-				B[i] *= t;
-			}
-			aux.f(X, A + 1, zkp.d_);
-			GT::pow(t, A[0], h);
-			GT::unitaryInv(t, t);
-			X *= t;
-			local::Hash hash;
-			hash << aux.P[1] << aux.P[2] << aux.P[3] << A[0] << A[1] << A[2] << A[3] << B[0] << B[1] << B[2] << X;
-			Fr h2;
-			hash.get(h2);
-			return h == h2;
-		}
 		bool verify(const CipherTextG2& c, const ZkpBin& zkp) const
 		{
 			const MulG<G2> yQmul(yQ_);
@@ -1748,6 +1748,7 @@ struct SHET {
 		friend class PublicKey;
 		friend class PrecomputedPublicKey;
 		friend class CipherTextA;
+		friend struct AuxiliaryForZkpDecGT;
 		template<class T>
 		friend struct PublicKeyMethod;
 	public:
diff --git a/test/she_test.cpp b/test/she_test.cpp
index 9165a36f..9ef51c75 100644
--- a/test/she_test.cpp
+++ b/test/she_test.cpp
@@ -362,13 +362,13 @@ CYBOZU_TEST_AUTO(ZkpDecGT)
 	pub.enc(c, m);
 	ZkpDecGT zkp;
 	CYBOZU_TEST_EQUAL(sec.decWithZkpDec(zkp, c, aux), m);
-	CYBOZU_TEST_ASSERT(pub.verify(c, m, zkp, aux));
-	CYBOZU_TEST_ASSERT(!pub.verify(c, m + 1, zkp, aux));
+	CYBOZU_TEST_ASSERT(aux.verify(c, m, zkp));
+	CYBOZU_TEST_ASSERT(!aux.verify(c, m + 1, zkp));
 	CipherTextGT c2;
 	pub.enc(c2, m);
-	CYBOZU_TEST_ASSERT(!pub.verify(c2, m, zkp, aux));
+	CYBOZU_TEST_ASSERT(!aux.verify(c2, m, zkp));
 	zkp.d_[0] += 1;
-	CYBOZU_TEST_ASSERT(!pub.verify(c, m, zkp, aux));
+	CYBOZU_TEST_ASSERT(!aux.verify(c, m, zkp));
 }
 
 CYBOZU_TEST_AUTO(add_sub_mul)

From 5cd31b666d3dae62204ef36f2b33fc509d4edf2f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 18 Dec 2020 16:40:24 +0900
Subject: [PATCH 363/553] [she] add test of ZkpDecGT

---
 include/mcl/she.h   | 14 ++++++++++++++
 src/she_c_impl.hpp  | 33 +++++++++++++++++++++++++++++++++
 test/she_c_test.hpp | 28 ++++++++++++++++++++++++++--
 3 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/include/mcl/she.h b/include/mcl/she.h
index 8a17150e..c15fb0b1 100644
--- a/include/mcl/she.h
+++ b/include/mcl/she.h
@@ -78,6 +78,14 @@ typedef struct {
 typedef struct {
 	mclBnFr d[2];
 } sheZkpDec;
+
+typedef struct {
+	mclBnGT d[4];
+} sheAuxiliaryForZkpDecGT;
+
+typedef struct {
+	mclBnFr d[4];
+} sheZkpDecGT;
 /*
 	initialize this library
 	call this once before using the other functions
@@ -101,6 +109,7 @@ MCLSHE_DLL_API mclSize sheZkpBinSerialize(void *buf, mclSize maxBufSize, const s
 MCLSHE_DLL_API mclSize sheZkpEqSerialize(void *buf, mclSize maxBufSize, const sheZkpEq *zkp);
 MCLSHE_DLL_API mclSize sheZkpBinEqSerialize(void *buf, mclSize maxBufSize, const sheZkpBinEq *zkp);
 MCLSHE_DLL_API mclSize sheZkpDecSerialize(void *buf, mclSize maxBufSize, const sheZkpDec *zkp);
+MCLSHE_DLL_API mclSize sheZkpDecGTSerialize(void *buf, mclSize maxBufSize, const sheZkpDecGT *zkp);
 
 // return read byte size if sucess else 0
 MCLSHE_DLL_API mclSize sheSecretKeyDeserialize(sheSecretKey* sec, const void *buf, mclSize bufSize);
@@ -112,6 +121,7 @@ MCLSHE_DLL_API mclSize sheZkpBinDeserialize(sheZkpBin* zkp, const void *buf, mcl
 MCLSHE_DLL_API mclSize sheZkpEqDeserialize(sheZkpEq* zkp, const void *buf, mclSize bufSize);
 MCLSHE_DLL_API mclSize sheZkpBinEqDeserialize(sheZkpBinEq* zkp, const void *buf, mclSize bufSize);
 MCLSHE_DLL_API mclSize sheZkpDecDeserialize(sheZkpDec* zkp, const void *buf, mclSize bufSize);
+MCLSHE_DLL_API mclSize sheZkpDecGTDeserialize(sheZkpDecGT* zkp, const void *buf, mclSize bufSize);
 
 /*
 	set secretKey if system has /dev/urandom or CryptGenRandom
@@ -121,6 +131,8 @@ MCLSHE_DLL_API int sheSecretKeySetByCSPRNG(sheSecretKey *sec);
 
 MCLSHE_DLL_API void sheGetPublicKey(shePublicKey *pub, const sheSecretKey *sec);
 
+MCLSHE_DLL_API void sheGetAuxiliaryForZkpDecGT(sheAuxiliaryForZkpDecGT *aux, const shePublicKey *pub);
+
 /*
 	make table to decode DLP
 	return 0 if success
@@ -203,6 +215,7 @@ MCLSHE_DLL_API int shePrecomputedPublicKeyEncWithZkpEq(sheCipherTextG1 *c1, sheC
 	return 0 if success
 */
 MCLSHE_DLL_API int sheDecWithZkpDecG1(mclInt *m, sheZkpDec *zkp, const sheSecretKey *sec, const sheCipherTextG1 *c, const shePublicKey *pub);
+MCLSHE_DLL_API int sheDecWithZkpDecGT(mclInt *m, sheZkpDecGT *zkp, const sheSecretKey *sec, const sheCipherTextGT *c, const sheAuxiliaryForZkpDecGT *aux);
 
 /*
 	decode c and set m
@@ -224,6 +237,7 @@ MCLSHE_DLL_API int shePrecomputedPublicKeyVerifyZkpBinG2(const shePrecomputedPub
 MCLSHE_DLL_API int shePrecomputedPublicKeyVerifyZkpEq(const shePrecomputedPublicKey *ppub, const sheCipherTextG1 *c1, const sheCipherTextG2 *c2, const sheZkpEq *zkp);
 MCLSHE_DLL_API int shePrecomputedPublicKeyVerifyZkpBinEq(const shePrecomputedPublicKey *ppub, const sheCipherTextG1 *c1, const sheCipherTextG2 *c2, const sheZkpBinEq *zkp);
 MCLSHE_DLL_API int sheVerifyZkpDecG1(const shePublicKey *pub, const sheCipherTextG1 *c1, mclInt m, const sheZkpDec *zkp);
+MCLSHE_DLL_API int sheVerifyZkpDecGT(const sheAuxiliaryForZkpDecGT *aux, const sheCipherTextGT *ct, mclInt m, const sheZkpDecGT *zkp);
 /*
 	decode c via GT and set m
 	return 0 if success
diff --git a/src/she_c_impl.hpp b/src/she_c_impl.hpp
index 6fcb2d38..28781631 100644
--- a/src/she_c_impl.hpp
+++ b/src/she_c_impl.hpp
@@ -44,6 +44,12 @@ static const ZkpBinEq *cast(const sheZkpBinEq *p) { return reinterpret_cast<cons
 static ZkpDec *cast(sheZkpDec *p) { return reinterpret_cast<ZkpDec*>(p); }
 static const ZkpDec *cast(const sheZkpDec *p) { return reinterpret_cast<const ZkpDec*>(p); }
 
+static AuxiliaryForZkpDecGT *cast(sheAuxiliaryForZkpDecGT *p) { return reinterpret_cast<AuxiliaryForZkpDecGT*>(p); }
+static const AuxiliaryForZkpDecGT *cast(const sheAuxiliaryForZkpDecGT *p) { return reinterpret_cast<const AuxiliaryForZkpDecGT*>(p); }
+
+static ZkpDecGT *cast(sheZkpDecGT *p) { return reinterpret_cast<ZkpDecGT*>(p); }
+static const ZkpDecGT *cast(const sheZkpDecGT *p) { return reinterpret_cast<const ZkpDecGT*>(p); }
+
 int sheInit(int curve, int compiledTimeVar)
 	try
 {
@@ -124,6 +130,11 @@ mclSize sheZkpDecSerialize(void *buf, mclSize maxBufSize, const sheZkpDec *zkp)
 	return (mclSize)cast(zkp)->serialize(buf, maxBufSize);
 }
 
+mclSize sheZkpGTDecSerialize(void *buf, mclSize maxBufSize, const sheZkpDecGT *zkp)
+{
+	return (mclSize)cast(zkp)->serialize(buf, maxBufSize);
+}
+
 mclSize sheSecretKeyDeserialize(sheSecretKey* sec, const void *buf, mclSize bufSize)
 {
 	return (mclSize)cast(sec)->deserialize(buf, bufSize);
@@ -169,6 +180,11 @@ mclSize sheZkpDecDeserialize(sheZkpDec* zkp, const void *buf, mclSize bufSize)
 	return (mclSize)cast(zkp)->deserialize(buf, bufSize);
 }
 
+mclSize sheZkpDecGTDeserialize(sheZkpDecGT* zkp, const void *buf, mclSize bufSize)
+{
+	return (mclSize)cast(zkp)->deserialize(buf, bufSize);
+}
+
 int sheSecretKeySetByCSPRNG(sheSecretKey *sec)
 {
 	cast(sec)->setByCSPRNG();
@@ -180,6 +196,11 @@ void sheGetPublicKey(shePublicKey *pub, const sheSecretKey *sec)
 	cast(sec)->getPublicKey(*cast(pub));
 }
 
+void sheGetAuxiliaryForZkpDecGT(sheAuxiliaryForZkpDecGT *aux, const shePublicKey *pub)
+{
+	cast(pub)->getAuxiliaryForZkpDecGT(*cast(aux));
+}
+
 static int wrapSetRangeForDLP(void f(size_t), mclSize hashSize)
 	try
 {
@@ -788,8 +809,20 @@ int sheDecWithZkpDecG1(mclInt *m, sheZkpDec *zkp, const sheSecretKey *sec, const
 	return b ? 0 : -1;
 }
 
+int sheDecWithZkpDecGT(mclInt *m, sheZkpDecGT *zkp, const sheSecretKey *sec, const sheCipherTextGT *c, const sheAuxiliaryForZkpDecGT *aux)
+{
+	bool b;
+	*m = cast(sec)->decWithZkpDec(&b, *cast(zkp), *cast(c), *cast(aux));
+	return b ? 0 : -1;
+}
+
 int sheVerifyZkpDecG1(const shePublicKey *pub, const sheCipherTextG1 *c1, mclInt m, const sheZkpDec *zkp)
 {
 	return cast(pub)->verify(*cast(c1), m, *cast(zkp));
 }
 
+int sheVerifyZkpDecGT(const sheAuxiliaryForZkpDecGT *aux, const sheCipherTextGT *ct, mclInt m, const sheZkpDecGT *zkp)
+{
+	return cast(aux)->verify(*cast(ct), m, *cast(zkp));
+}
+
diff --git a/test/she_c_test.hpp b/test/she_c_test.hpp
index b00b6a2c..9f489426 100644
--- a/test/she_c_test.hpp
+++ b/test/she_c_test.hpp
@@ -442,7 +442,7 @@ void ZkpEqTest(const sheSecretKey *sec, const PK *pub, encWithZkpFunc encWithZkp
 	}
 }
 
-CYBOZU_TEST_AUTO(ZkpEq)
+CYBOZU_TEST_AUTO(ZkpDecG1)
 {
 	sheSecretKey sec;
 	sheSecretKeySetByCSPRNG(&sec);
@@ -464,7 +464,31 @@ CYBOZU_TEST_AUTO(ZkpEq)
 	CYBOZU_TEST_EQUAL(sheVerifyZkpDecG1(&pub, &c1, m, &zkp), 0);
 }
 
-CYBOZU_TEST_AUTO(ZkpDec)
+CYBOZU_TEST_AUTO(ZkpDecGT)
+{
+	sheSecretKey sec;
+	sheSecretKeySetByCSPRNG(&sec);
+	shePublicKey pub;
+	sheGetPublicKey(&pub, &sec);
+	sheAuxiliaryForZkpDecGT aux;
+	sheGetAuxiliaryForZkpDecGT(&aux, &pub);
+	int m = 123;
+	sheCipherTextGT c1;
+	sheEncGT(&c1, &pub, m);
+	sheZkpDecGT zkp;
+	int64_t dec;
+	CYBOZU_TEST_EQUAL(sheDecWithZkpDecGT(&dec, &zkp, &sec, &c1, &aux), 0);
+	CYBOZU_TEST_EQUAL(m, dec);
+	CYBOZU_TEST_EQUAL(sheVerifyZkpDecGT(&aux, &c1, m, &zkp), 1);
+	CYBOZU_TEST_EQUAL(sheVerifyZkpDecGT(&aux, &c1, m + 1, &zkp), 0);
+	sheCipherTextGT c2;
+	sheEncGT(&c2, &pub, m);
+	CYBOZU_TEST_EQUAL(sheVerifyZkpDecGT(&aux, &c2, m, &zkp), 0);
+	zkp.d[0].d[0]++;
+	CYBOZU_TEST_EQUAL(sheVerifyZkpDecGT(&aux, &c1, m, &zkp), 0);
+}
+
+CYBOZU_TEST_AUTO(ZkpEq)
 {
 	sheSecretKey sec;
 	sheSecretKeySetByCSPRNG(&sec);

From 8ecd8cf99fc28468a7001c6fefee7a8977284c7e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 19 Dec 2020 11:30:20 +0900
Subject: [PATCH 364/553] avoid computing in MCL_DUMP_JIT

---
 include/mcl/fp.hpp   | 3 +++
 src/fp_generator.hpp | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index 6c5b0b05..b7075d47 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -134,6 +134,9 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 	{
 		assert(maxBitSize <= MCL_MAX_BIT_SIZE);
 		*pb = op_.init(p, maxBitSize, xi_a, mode);
+#ifdef MCL_DUMP_JIT
+		return;
+#endif
 		if (!*pb) return;
 		{ // set oneRep
 			FpT& one = *reinterpret_cast<FpT*>(op_.oneRep);
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 8002a9a0..892a8f58 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -312,12 +312,16 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		useAdx_ = cpu.has(Xbyak::util::Cpu::tADX);
 #endif
 		reset(); // reset jit code for reuse
+#ifndef MCL_DUMP_JIT
 		setProtectModeRW(); // read/write memory
+#endif
 		init_inner(op);
 		// ToDo : recover op if false
 		if (Xbyak::GetError()) return false;
 //		printf("code size=%d\n", (int)getSize());
+#ifndef MCL_DUMP_JIT
 		setProtectModeRE(); // set read/exec memory
+#endif
 		return true;
 	}
 private:

From 700751ed0d25f35afdfeefde5fa7531221bd74b6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 20 Dec 2020 10:13:42 +0900
Subject: [PATCH 365/553] v1.29

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 3f7bd233..c3c22fa7 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x128; /* 0xABC = A.BC */
+static const int version = 0x129; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From 5fd72f06cf064e282abf907550d58845a6c87623 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 25 Dec 2020 10:19:16 +0900
Subject: [PATCH 366/553] [she] add desc of decWithZkpDec

---
 misc/she/she-api-ja.md | 12 +++++++++++-
 misc/she/she-api.md    | 12 +++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/misc/she/she-api-ja.md b/misc/she/she-api-ja.md
index 93dd8f7d..a6ba43c5 100644
--- a/misc/she/she-api-ja.md
+++ b/misc/she/she-api-ja.md
@@ -275,7 +275,9 @@ PrecomputedPublicKeyはPublicKeyの高速版
 * ZkpBinEq 暗号文encG1(m1), encG2(m2)についてm1 = m2 = 0または1であることを検証できる
 
 ### API
-PK = PublicKey or PrecomputedPublicKey
+- SK = SecretKey
+- PK = PublicKey or PrecomputedPublicKey
+- AUX = AuxiliaryForZkpDecGT
 
 * `void PK::encWithZkpBin(CipherTextG1& c, Zkp& zkp, int m) const`(C++)
 * `void PK::encWithZkpBin(CipherTextG2& c, Zkp& zkp, int m) const`(C++)
@@ -290,6 +292,14 @@ PK = PublicKey or PrecomputedPublicKey
 * `[CipherTextG1, CipherTextG2, ZkpEqBin] PK::encWithZkpBinEq(m)`(JS)
     * m(=0 or 1)を暗号化して暗号文c1, c2とゼロ知識証明zkpをセットする(または[c1, c2, zkp]を返す)
     * mが0でも1でもなければ例外
+* `SK::decWithZkp(DecZkpDec& zkp, const CipherTextG1& c, const PublicKey& pub) const`(C++)
+* `[m, ZkpDecG1] SK::decWithZkpDec(c, pub)`(JS)
+  * CipherTextG1暗号文`c`を復号して`m`と`zkp`を返す. `zkp`は`dec(c) = m`の証明
+  * `pub`は計算コストを減らすために利用する
+* `SK::decWithZkpDec(ZkpDecGT& zkp, const CipherTextGT& c, const AuxiliaryForZkpDecGT& aux) const`(C++)
+* `[m, ZkpDecGT] SK::decWithZkpDecGT(c, aux)`(JS)
+  * CipherTextGT暗号文`c`を復号して`m`と`zkp`を返す. `zkp`は`dec(c) = m`の証明
+  * `aux = pub.getAuxiliaryForZkpDecGT()`. auxは計算コストを減らすために利用する
 
 ## グローバル関数
 
diff --git a/misc/she/she-api.md b/misc/she/she-api.md
index fd2e0867..37071bec 100644
--- a/misc/she/she-api.md
+++ b/misc/she/she-api.md
@@ -281,7 +281,9 @@ PK means PublicKey or PrecomputedPublicKey
 * ZkpBinEq ; verify whether `m1 = m2 = 0` or `1` for ciphertexts `encG1(m1)` and `encG2(m2)`
 
 ### API
-PK = PublicKey or PrecomputedPublicKey
+- SK = SecretKey
+- PK = PublicKey or PrecomputedPublicKey
+- AUX = AuxiliaryForZkpDecGT
 
 * `void PK::encWithZkpBin(CipherTextG1& c, Zkp& zkp, int m) const`(C++)
 * `void PK::encWithZkpBin(CipherTextG2& c, Zkp& zkp, int m) const`(C++)
@@ -296,6 +298,14 @@ PK = PublicKey or PrecomputedPublicKey
 * `[CipherTextG1, CipherTextG2, ZkpEqBin] PK::encWithZkpBinEq(m)`(JS)
     * encrypt `m`(=0 or 1) and set ciphertexts `c1`, `c2` and zero-knowledge proof `zkp`(or returns [c1, c2, zkp])
     * throw exception if m != 0 and m != 1
+* `SK::decWithZkp(DecZkpDec& zkp, const CipherTextG1& c, const PublicKey& pub) const`(C++)
+* `[m, ZkpDecG1] SK::decWithZkpDec(c, pub)`(JS)
+  * decrypt CipherTextG1 `c` and get `m` and zkp, which proves that `dec(c) = m`.
+  * `pub` is used for reducing some computation.
+* `SK::decWithZkpDec(ZkpDecGT& zkp, const CipherTextGT& c, const AuxiliaryForZkpDecGT& aux) const`(C++)
+* `[m, ZkpDecGT] SK::decWithZkpDecGT(c, aux)`(JS)
+  * decrypt CipherTextGT `c` and get `m` and zkp, which proves that `dec(c) = m`.
+  * `aux = pub.getAuxiliaryForZkpDecGT()`, which is used for reducing some computation.
 
 ## Global functions
 

From 51292308d78478e5f2c96272980fddb363ea53f9 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 27 Dec 2020 11:12:08 +0900
Subject: [PATCH 367/553] remove entry mcl-wasm in Makefile

---
 Makefile | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/Makefile b/Makefile
index 10fc5686..d22ae9aa 100644
--- a/Makefile
+++ b/Makefile
@@ -366,17 +366,9 @@ endif
 ../she-wasm/she_c384.js: src/she_c384.cpp $(SHE_C_DEP)
 	emcc -o $@ src/fp.cpp src/she_c384.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -s TOTAL_MEMORY=67108864 -s DISABLE_EXCEPTION_CATCHING=1
 
-../mcl-wasm/mcl_c384_256.js: src/bn_c384_256.cpp $(MCL_C_DEP)
-	emcc -o $@ src/fp.cpp src/bn_c384_256.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions -MD -MP -MF obj/mcl_c384_256.d -s SINGLE_FILE=1
-
 ../ecdsa-wasm/ecdsa_c.js: src/ecdsa_c.cpp src/fp.cpp include/mcl/ecdsa.hpp include/mcl/ecdsa.h Makefile
 	emcc -o $@ src/fp.cpp src/ecdsa_c.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=256 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions
 
-mcl-wasm:
-	$(MAKE) ../mcl-wasm/mcl_c384_256.js
-#	$(MAKE) ../mcl-wasm/mcl_c.js
-#	$(MAKE) ../mcl-wasm/mcl_c512.js
-
 she-wasm:
 	$(MAKE) ../she-wasm/she_c.js
 	$(MAKE) ../she-wasm/she_c384.js
@@ -418,7 +410,7 @@ install: lib/libmcl.a lib/libmcl.$(LIB_SUF)
 	$(MKDIR) $(PREFIX)/lib
 	cp -a lib/libmcl.a lib/libmcl.$(LIB_SUF) $(PREFIX)/lib/
 
-.PHONY: test mcl-wasm she-wasm bin/emu
+.PHONY: test she-wasm bin/emu
 
 # don't remove these files automatically
 .SECONDARY: $(addprefix $(OBJ_DIR)/, $(ALL_SRC:.cpp=.o))

From 081db6d2294587813b2576591ce68e7617d835a1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 27 Dec 2020 15:24:17 +0900
Subject: [PATCH 368/553] mov she-wasm entry in Makefile to she-wasm

---
 Makefile | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/Makefile b/Makefile
index d22ae9aa..16004870 100644
--- a/Makefile
+++ b/Makefile
@@ -360,19 +360,10 @@ ifeq ($(MCL_USE_LLVM),2)
   EMCC_OPT+=src/base64m.ll -DMCL_USE_LLVM
   SHE_C_DEP+=src/base64m.ll
 endif
-../she-wasm/she_c.js: src/she_c256.cpp $(SHE_C_DEP)
-	emcc -o $@ src/fp.cpp src/she_c256.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=256 -s TOTAL_MEMORY=67108864 -s DISABLE_EXCEPTION_CATCHING=1
-
-../she-wasm/she_c384.js: src/she_c384.cpp $(SHE_C_DEP)
-	emcc -o $@ src/fp.cpp src/she_c384.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=384 -s TOTAL_MEMORY=67108864 -s DISABLE_EXCEPTION_CATCHING=1
 
 ../ecdsa-wasm/ecdsa_c.js: src/ecdsa_c.cpp src/fp.cpp include/mcl/ecdsa.hpp include/mcl/ecdsa.h Makefile
 	emcc -o $@ src/fp.cpp src/ecdsa_c.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=256 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions
 
-she-wasm:
-	$(MAKE) ../she-wasm/she_c.js
-	$(MAKE) ../she-wasm/she_c384.js
-
 ecdsa-wasm:
 	$(MAKE) ../ecdsa-wasm/ecdsa_c.js
 

From 5ea94c2f5127c963b44b23b8611d5e770937b225 Mon Sep 17 00:00:00 2001
From: Johannes Gallmann <gallmann@ubique.ch>
Date: Tue, 12 Jan 2021 16:34:28 +0100
Subject: [PATCH 369/553] Add Fr.setLittleEndianMod to JNI

---
 ffi/java/MclTest.java               |  2 ++
 ffi/java/com/herumi/mcl/Fr.java     |  4 ++++
 ffi/java/com/herumi/mcl/MclJNI.java |  1 +
 ffi/java/mcl_impl.hpp               | 10 ++++++++++
 ffi/java/mcl_wrap.cxx               | 31 +++++++++++++++++++++++++++++
 5 files changed, 48 insertions(+)

diff --git a/ffi/java/MclTest.java b/ffi/java/MclTest.java
index 3dd6dc73..1bbc3cef 100644
--- a/ffi/java/MclTest.java
+++ b/ffi/java/MclTest.java
@@ -51,6 +51,8 @@ public static void testCurve(int curveType, String name) {
 				Fr t = new Fr();
 				t.deserialize(b);
 				assertBool("serialize", x.equals(t));
+				t.setLittleEndianMod(b);
+				assertBool("setLittleEndianMod", x.equals(t));
 			}
 			G1 P = new G1();
 			System.out.println("P=" + P);
diff --git a/ffi/java/com/herumi/mcl/Fr.java b/ffi/java/com/herumi/mcl/Fr.java
index 8ed95dfa..b9926526 100644
--- a/ffi/java/com/herumi/mcl/Fr.java
+++ b/ffi/java/com/herumi/mcl/Fr.java
@@ -96,6 +96,10 @@ public void deserialize(byte[] cbuf) {
     MclJNI.Fr_deserialize(swigCPtr, this, cbuf);
   }
 
+  public void setLittleEndianMod(byte[] cbuf) {
+    MclJNI.Fr_setLittleEndianMod(swigCPtr, this, cbuf);
+  }
+
   public byte[] serialize() { return MclJNI.Fr_serialize(swigCPtr, this); }
 
 }
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
index 24e34cf0..05345c27 100644
--- a/ffi/java/com/herumi/mcl/MclJNI.java
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -33,6 +33,7 @@ public class MclJNI {
   public final static native String Fr_toString__SWIG_0(long jarg1, Fr jarg1_, int jarg2);
   public final static native String Fr_toString__SWIG_1(long jarg1, Fr jarg1_);
   public final static native void Fr_deserialize(long jarg1, Fr jarg1_, byte[] jarg2);
+  public final static native void Fr_setLittleEndianMod(long jarg1, Fr jarg1_, byte[] jarg2);
   public final static native byte[] Fr_serialize(long jarg1, Fr jarg1_);
   public final static native void delete_Fr(long jarg1);
   public final static native void neg__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index 9bd1ef62..23191140 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -28,6 +28,12 @@ void deserializeT(T& x, const char *cbuf, size_t bufSize)
 	}
 }
 
+template<class T>
+void setLittleEndianModT(T& x, const char *cbuf, size_t bufSize)
+{
+	x.setLittleEndianMod(cbuf, bufSize);
+}
+
 template<class T>
 void serializeT(std::string& out, const T& x)
 {
@@ -88,6 +94,10 @@ class Fr {
 	{
 		deserializeT(self_, cbuf, bufSize);
 	}
+	void setLittleEndianMod(const char *cbuf, size_t bufSize) throw(std::exception)
+	{
+		setLittleEndianModT(self_, cbuf, bufSize);
+	}
 	void serialize(std::string& out) const throw(std::exception)
 	{
 		serializeT(out, self_);
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index 1caec48f..802c722b 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -789,6 +789,37 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1deserialize(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setLittleEndianMod(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+  Fr *arg1 = (Fr *) 0 ;
+  char *arg2 = (char *) 0 ;
+  size_t arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  {
+    if (jarg2) {
+      arg2 = (char *) jenv->GetByteArrayElements(jarg2, 0);
+      arg3 = (size_t) jenv->GetArrayLength(jarg2);
+    } else {
+      arg2 = 0;
+      arg3 = 0;
+    }
+  }
+  try {
+    (arg1)->setLittleEndianMod((char const *)arg2,arg3);
+  } catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  {
+    if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
+  }
+  
+}
+
+
 SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fr_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   Fr *arg1 = (Fr *) 0 ;

From 1b08a14e8ec3c7e9d9e5b56a82173e9ae691257f Mon Sep 17 00:00:00 2001
From: Johannes Gallmann <gallmann@ubique.ch>
Date: Tue, 12 Jan 2021 16:35:06 +0100
Subject: [PATCH 370/553] Add G1.isValidOrder() to JNI

---
 ffi/java/com/herumi/mcl/G1.java     |  4 ++++
 ffi/java/com/herumi/mcl/MclJNI.java |  1 +
 ffi/java/mcl_impl.hpp               |  1 +
 ffi/java/mcl_wrap.cxx               | 13 +++++++++++++
 4 files changed, 19 insertions(+)

diff --git a/ffi/java/com/herumi/mcl/G1.java b/ffi/java/com/herumi/mcl/G1.java
index d46e3f14..ebfb851d 100644
--- a/ffi/java/com/herumi/mcl/G1.java
+++ b/ffi/java/com/herumi/mcl/G1.java
@@ -56,6 +56,10 @@ public boolean isZero() {
     return MclJNI.G1_isZero(swigCPtr, this);
   }
 
+  public boolean isValidOrder() {
+    return MclJNI.G1_isValidOrder(swigCPtr, this);
+  }
+
   public void set(Fp x, Fp y) {
     MclJNI.G1_set(swigCPtr, this, Fp.getCPtr(x), x, Fp.getCPtr(y), y);
   }
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
index 05345c27..9724511b 100644
--- a/ffi/java/com/herumi/mcl/MclJNI.java
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -69,6 +69,7 @@ public class MclJNI {
   public final static native long new_G1__SWIG_2(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
   public final static native boolean G1_equals(long jarg1, G1 jarg1_, long jarg2, G1 jarg2_);
   public final static native boolean G1_isZero(long jarg1, G1 jarg1_);
+  public final static native boolean G1_isValidOrder(long jarg1, G1 jarg1_);
   public final static native void G1_set(long jarg1, G1 jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
   public final static native void G1_clear(long jarg1, G1 jarg1_);
   public final static native void G1_setStr__SWIG_0(long jarg1, G1 jarg1_, String jarg2, int jarg3);
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index 23191140..108ecac5 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -222,6 +222,7 @@ class G1 {
 		: self_(x.self_, y.self_) { }
 	bool equals(const G1& rhs) const { return self_ == rhs.self_; }
 	bool isZero() const { return self_.isZero(); }
+	bool isValidOrder() const { return self_.isValidOrder(); }
 	void set(const Fp& x, const Fp& y) throw(std::exception)
 	{
 		self_.set(x.self_, y.self_);
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index 802c722b..5b350558 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -1592,6 +1592,19 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isZero(JNIEnv *jenv,
   return jresult;
 }
 
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isValidOrder(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+    jboolean jresult = 0 ;
+    G1 *arg1 = (G1 *) 0 ;
+    bool result;
+
+    (void)jenv;
+    (void)jcls;
+    (void)jarg1_;
+    arg1 = *(G1 **)&jarg1;
+    result = (bool)((G1 const *)arg1)->isValidOrder();
+    jresult = (jboolean)result;
+    return jresult;
+}
 
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = (G1 *) 0 ;

From 7fb2ee5bb77fb7635210067fed303517c01293ff Mon Sep 17 00:00:00 2001
From: Johannes Gallmann <gallmann@ubique.ch>
Date: Wed, 13 Jan 2021 10:40:31 +0100
Subject: [PATCH 371/553] Add Fr.setHash() to JNI

---
 ffi/java/MclTest.java               |  5 +++++
 ffi/java/com/herumi/mcl/Fr.java     |  4 ++++
 ffi/java/com/herumi/mcl/MclJNI.java |  1 +
 ffi/java/mcl_impl.hpp               | 10 ++++++++++
 ffi/java/mcl_wrap.cxx               | 31 +++++++++++++++++++++++++++++
 5 files changed, 51 insertions(+)

diff --git a/ffi/java/MclTest.java b/ffi/java/MclTest.java
index 1bbc3cef..e250d977 100644
--- a/ffi/java/MclTest.java
+++ b/ffi/java/MclTest.java
@@ -53,6 +53,11 @@ public static void testCurve(int curveType, String name) {
 				assertBool("serialize", x.equals(t));
 				t.setLittleEndianMod(b);
 				assertBool("setLittleEndianMod", x.equals(t));
+				t.setHashOf(b);
+				assertBool("setHashOf", !x.equals(t));
+				Fr u = new Fr();
+				u.setHashOf(new byte[]{1,2,3});
+				assertBool("setHashOf - different", !u.equals(t));
 			}
 			G1 P = new G1();
 			System.out.println("P=" + P);
diff --git a/ffi/java/com/herumi/mcl/Fr.java b/ffi/java/com/herumi/mcl/Fr.java
index b9926526..3b5f1c34 100644
--- a/ffi/java/com/herumi/mcl/Fr.java
+++ b/ffi/java/com/herumi/mcl/Fr.java
@@ -100,6 +100,10 @@ public void setLittleEndianMod(byte[] cbuf) {
     MclJNI.Fr_setLittleEndianMod(swigCPtr, this, cbuf);
   }
 
+  public void setHashOf(byte[] cbuf) {
+    MclJNI.Fr_setHashOf(swigCPtr, this, cbuf);
+  }
+
   public byte[] serialize() { return MclJNI.Fr_serialize(swigCPtr, this); }
 
 }
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
index 9724511b..4ba54c29 100644
--- a/ffi/java/com/herumi/mcl/MclJNI.java
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -34,6 +34,7 @@ public class MclJNI {
   public final static native String Fr_toString__SWIG_1(long jarg1, Fr jarg1_);
   public final static native void Fr_deserialize(long jarg1, Fr jarg1_, byte[] jarg2);
   public final static native void Fr_setLittleEndianMod(long jarg1, Fr jarg1_, byte[] jarg2);
+  public final static native void Fr_setHashOf(long jarg1, Fr jarg1_, byte[] jarg2);
   public final static native byte[] Fr_serialize(long jarg1, Fr jarg1_);
   public final static native void delete_Fr(long jarg1);
   public final static native void neg__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index 108ecac5..b78faf71 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -34,6 +34,12 @@ void setLittleEndianModT(T& x, const char *cbuf, size_t bufSize)
 	x.setLittleEndianMod(cbuf, bufSize);
 }
 
+template<class T>
+void setHashOfT(T& x, const char *cbuf, size_t bufSize)
+{
+	x.setHashOf(cbuf, bufSize);
+}
+
 template<class T>
 void serializeT(std::string& out, const T& x)
 {
@@ -98,6 +104,10 @@ class Fr {
 	{
 		setLittleEndianModT(self_, cbuf, bufSize);
 	}
+	void setHashOf(const char *cbuf, size_t bufSize) throw(std::exception)
+	{
+		setHashOfT(self_, cbuf, bufSize);
+	}
 	void serialize(std::string& out) const throw(std::exception)
 	{
 		serializeT(out, self_);
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index 5b350558..1cca3601 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -820,6 +820,37 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setLittleEndianMod(JNIEnv
 }
 
 
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setHashOf(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jbyteArray jarg2) {
+  Fr *arg1 = (Fr *) 0 ;
+  char *arg2 = (char *) 0 ;
+  size_t arg3 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  {
+    if (jarg2) {
+      arg2 = (char *) jenv->GetByteArrayElements(jarg2, 0);
+      arg3 = (size_t) jenv->GetArrayLength(jarg2);
+    } else {
+      arg2 = 0;
+      arg3 = 0;
+    }
+  }
+  try {
+    (arg1)->setHashOf((char const *)arg2,arg3);
+  } catch(std::exception &_e) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, (&_e)->what());
+    return ;
+  }
+  {
+    if (jarg2) jenv->ReleaseByteArrayElements(jarg2, (jbyte *)arg2, 0);
+  }
+  
+}
+
+
 SWIGEXPORT jbyteArray JNICALL Java_com_herumi_mcl_MclJNI_Fr_1serialize(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
   jbyteArray jresult = 0 ;
   Fr *arg1 = (Fr *) 0 ;

From df853a52c27397cb033891d44e44de7f620580b7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 15 Jan 2021 10:21:19 +0900
Subject: [PATCH 372/553] add sample of Fr::sub

---
 sample/pairing_c.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sample/pairing_c.c b/sample/pairing_c.c
index b174dbe8..c7576de1 100644
--- a/sample/pairing_c.c
+++ b/sample/pairing_c.c
@@ -24,6 +24,9 @@ int main()
 	mclBnFr_mul(&ab, &a, &b);
 	mclBnFr_getStr(buf, sizeof(buf), &ab, 10);
 	printf("%s x %s = %s\n", aStr, bStr, buf);
+	mclBnFr_sub(&a, &a, &b);
+	mclBnFr_getStr(buf, sizeof(buf), &a, 10);
+	printf("%s - %s = %s\n", aStr, bStr, buf);
 
 	ASSERT(!mclBnG1_hashAndMapTo(&P, "this", 4));
 	ASSERT(!mclBnG2_hashAndMapTo(&Q, "that", 4));

From 19128897a83cf2085f8d2daa8849b234cb8d2213 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 15 Jan 2021 18:14:27 +0900
Subject: [PATCH 373/553] sample/pairing.exe shows values

---
 sample/pairing.cpp | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/sample/pairing.cpp b/sample/pairing.cpp
index 51ebdc1d..d0e331aa 100644
--- a/sample/pairing.cpp
+++ b/sample/pairing.cpp
@@ -4,16 +4,18 @@ using namespace mcl::bn;
 
 void minimum_sample(const G1& P, const G2& Q)
 {
-	const mpz_class a = 123;
-	const mpz_class b = 456;
+	const Fr a = 123;
+	const Fr b = 456;
 	Fp12 e1, e2;
 	pairing(e1, P, Q);
 	G2 aQ;
 	G1 bP;
+	printf("a - b = %s\n", (a - b).getStr(16).c_str());
 	G2::mul(aQ, Q, a);
 	G1::mul(bP, P, b);
 	pairing(e2, bP, aQ);
 	Fp12::pow(e1, e1, a * b);
+	printf("pairing = %s\n", e1.getStr(16).c_str());
 	printf("%s\n", e1 == e2 ? "ok" : "ng");
 }
 
@@ -38,13 +40,21 @@ void precomputed(const G1& P, const G2& Q)
 	printf("%s\n", e1 == e2 ? "ok" : "ng");
 }
 
-int main()
+int main(int argc, char *[])
 {
-	initPairing(mcl::BLS12_381);
+	if (argc == 1) {
+		initPairing(mcl::BLS12_381);
+		puts("BLS12_381");
+	} else {
+		initPairing(mcl::BN254);//, mcl::fp::FP_GMP);
+		puts("BN254");
+	}
 	G1 P;
 	G2 Q;
 	hashAndMapToG1(P, "abc", 3);
 	hashAndMapToG2(Q, "abc", 3);
+	printf("P = %s\n", P.serializeToHexStr().c_str());
+	printf("Q = %s\n", Q.serializeToHexStr().c_str());
 
 	minimum_sample(P, Q);
 	miller_and_finel_exp(P, Q);

From 8befb75daa075ddbdbff814e4aaacd8caa172a7d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 15 Jan 2021 18:19:02 +0900
Subject: [PATCH 374/553] sample/pairing.exe uses Fr::setHashOf

---
 sample/pairing.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sample/pairing.cpp b/sample/pairing.cpp
index d0e331aa..b651219a 100644
--- a/sample/pairing.cpp
+++ b/sample/pairing.cpp
@@ -4,12 +4,14 @@ using namespace mcl::bn;
 
 void minimum_sample(const G1& P, const G2& Q)
 {
-	const Fr a = 123;
+	Fr a;
 	const Fr b = 456;
 	Fp12 e1, e2;
 	pairing(e1, P, Q);
 	G2 aQ;
 	G1 bP;
+	a.setHashOf("abc");
+	printf("a = %s\n", a.getStr(16).c_str());
 	printf("a - b = %s\n", (a - b).getStr(16).c_str());
 	G2::mul(aQ, Q, a);
 	G1::mul(bP, P, b);

From ee1eef38c82df079172502f850a2623d08233bf4 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 21 Jan 2021 16:41:47 +0900
Subject: [PATCH 375/553] remove one adox in Fp::mul

---
 src/fp_generator.hpp | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 892a8f58..9ccc986d 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1429,22 +1429,22 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	/*
 		c[n+2] = c[n+1] + px[n] * rdx
-		use rax
+		use rax, t0
 	*/
-	void mulAdd(const Pack& c, int n, const RegExp& px)
+	void mulAdd(const Pack& c, int n, const RegExp& px, const Reg64& t0)
 	{
 		const Reg64& a = rax;
-		xor_(a, a);
-		for (int i = 0; i < n; i++) {
-			mulx(c[n + 1], a, ptr [px + i * 8]);
+		xor_(c[n + 1], c[n + 1]); // c[n + 1] = 0
+		for (int i = 0; i < n - 1; i++) {
+			mulx(t0, a, ptr [px + i * 8]);
 			adox(c[i], a);
-			adcx(c[i + 1], c[n + 1]);
+			adcx(c[i + 1], t0);
 		}
-		mov(a, 0);
-		mov(c[n + 1], a);
-		adox(c[n], a);
-		adcx(c[n + 1], a);
-		adox(c[n + 1], a);
+		mulx(t0, a, ptr [px + (n - 1) * 8]);
+		adox(c[n - 1], a);
+		adox(t0, c[n + 1]); // carry o
+		adcx(c[n], t0);
+		adc(c[n + 1], 0);
 	}
 	/*
 		input
@@ -1481,18 +1481,17 @@ struct FpGenerator : Xbyak::CodeGenerator {
 				}
 				std::swap(pt0, pt1);
 			}
-			mov(c[n], 0);
-			adc(c[n], *pt0);
+			adc(*pt0, 0);
+			mov(c[n], *pt0);
 		} else {
 			// c[7..0] = c[6..0] + px[5..0] * rdx
-			mulAdd(c, 6, px);
+			mulAdd(c, 6, px, t1);
 		}
-		mov(a, rp_);
-		mul(c[0]); // q = a
-		mov(d, a);
-		lea(t1, ptr[rip+pL_]);
+		mov(d, rp_);
+		imul(d, c[0]); // q = d
+		lea(t0, ptr[rip+pL_]);
 		// c += p * q
-		mulAdd(c, 6, t1);
+		mulAdd(c, 6, t0, t1);
 	}
 	/*
 		input (z, x, y) = (p0, p1, p2)

From c50b06141adf7f4c306f3772e6d813e38135169f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 21 Jan 2021 17:33:57 +0900
Subject: [PATCH 376/553] adcx -> adc

---
 src/fp_generator.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 9ccc986d..4b9c1207 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1772,9 +1772,9 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			if (i == pd.size() - 1) break;
 			adcx(pd[i + 1], hi);
 		}
-		mov(d, 0);
-		adcx(hi, d);
-		adox(hi, d);
+		mov(a, 0);
+		adox(hi, a);
+		adc(hi, a);
 	}
 	/*
 		input : z[n], p[n-1], rdx(implicit)

From 758b1df7c4cae9424d239319f6c05b216ae80c56 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 21 Jan 2021 17:37:44 +0900
Subject: [PATCH 377/553] tweet a loop

---
 src/fp_generator.hpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 4b9c1207..a5edef14 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1435,13 +1435,12 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	{
 		const Reg64& a = rax;
 		xor_(c[n + 1], c[n + 1]); // c[n + 1] = 0
-		for (int i = 0; i < n - 1; i++) {
+		for (int i = 0; i < n; i++) {
 			mulx(t0, a, ptr [px + i * 8]);
 			adox(c[i], a);
+			if (i == n - 1) break;
 			adcx(c[i + 1], t0);
 		}
-		mulx(t0, a, ptr [px + (n - 1) * 8]);
-		adox(c[n - 1], a);
 		adox(t0, c[n + 1]); // carry o
 		adcx(c[n], t0);
 		adc(c[n + 1], 0);

From 106082b9ac5969c6451fc9c225301927dfec1a93 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 22 Jan 2021 17:56:26 +0900
Subject: [PATCH 378/553] clear top bit

---
 src/fp_generator.hpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index a5edef14..89926c71 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1520,16 +1520,22 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	L(fp_mulL);
 		mov(rdx, ptr [py + 0 * 8]);
 		montgomery6_1(Pack(t7, t6, t5, t4, t3, t2, t1, t0), px, t8, t9, true);
+xor_(t7, t7);
 		mov(rdx, ptr [py + 1 * 8]);
 		montgomery6_1(Pack(t0, t7, t6, t5, t4, t3, t2, t1), px, t8, t9, false);
+xor_(t0, t0);
 		mov(rdx, ptr [py + 2 * 8]);
 		montgomery6_1(Pack(t1, t0, t7, t6, t5, t4, t3, t2), px, t8, t9, false);
+xor_(t1, t1);
 		mov(rdx, ptr [py + 3 * 8]);
 		montgomery6_1(Pack(t2, t1, t0, t7, t6, t5, t4, t3), px, t8, t9, false);
+xor_(t2, t2);
 		mov(rdx, ptr [py + 4 * 8]);
 		montgomery6_1(Pack(t3, t2, t1, t0, t7, t6, t5, t4), px, t8, t9, false);
+xor_(t3, t3);
 		mov(rdx, ptr [py + 5 * 8]);
 		montgomery6_1(Pack(t4, t3, t2, t1, t0, t7, t6, t5), px, t8, t9, false);
+xor_(t4, t4);
 		// [t4:t3:t2:t1:t0:t7:t6]
 		const Pack z = Pack(t3, t2, t1, t0, t7, t6);
 		const Pack keep = Pack(rdx, rax, px, py, t8, t9);

From 0731c980b9f9af36156962937c51340733707b3c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 24 Jan 2021 10:28:13 +0900
Subject: [PATCH 379/553] remove unnecessary add

---
 src/fp_generator.hpp | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 89926c71..cd9bac32 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1428,11 +1428,12 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		ret();
 	}
 	/*
-		c[n+2] = c[n+1] + px[n] * rdx
+		c[n..0] = c[n..0] + px[n-1..0] * rdx
 		use rax, t0
 	*/
 	void mulAdd(const Pack& c, int n, const RegExp& px, const Reg64& t0)
 	{
+		assert(!isFullBit_);
 		const Reg64& a = rax;
 		xor_(c[n + 1], c[n + 1]); // c[n + 1] = 0
 		for (int i = 0; i < n; i++) {
@@ -1441,17 +1442,16 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			if (i == n - 1) break;
 			adcx(c[i + 1], t0);
 		}
-		adox(t0, c[n + 1]); // carry o
-		adcx(c[n], t0);
-		adc(c[n + 1], 0);
+		adox(c[n], t0);
+		adc(c[n], 0);
 	}
 	/*
 		input
-		c[6..0]
+		c[5..0]
 		rdx = yi
 		use rax, rdx
 		output
-		c[7..1]
+		c[6..1]
 
 		if first:
 		  c = x[5..0] * rdx
@@ -1463,6 +1463,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void montgomery6_1(const Pack& c, const RegExp& px, const Reg64& t0, const Reg64& t1, bool isFirst)
 	{
+		assert(!isFullBit_);
 		const int n = 6;
 		const Reg64& a = rax;
 		const Reg64& d = rdx;
@@ -1483,13 +1484,13 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			adc(*pt0, 0);
 			mov(c[n], *pt0);
 		} else {
-			// c[7..0] = c[6..0] + px[5..0] * rdx
+			// c[5..0] = c[5..0] + px[5..0] * rdx because of not fuill bit
 			mulAdd(c, 6, px, t1);
 		}
 		mov(d, rp_);
 		imul(d, c[0]); // q = d
 		lea(t0, ptr[rip+pL_]);
-		// c += p * q
+		// c[5..0] += p * q because of not fuill bit
 		mulAdd(c, 6, t0, t1);
 	}
 	/*
@@ -1520,22 +1521,16 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	L(fp_mulL);
 		mov(rdx, ptr [py + 0 * 8]);
 		montgomery6_1(Pack(t7, t6, t5, t4, t3, t2, t1, t0), px, t8, t9, true);
-xor_(t7, t7);
 		mov(rdx, ptr [py + 1 * 8]);
 		montgomery6_1(Pack(t0, t7, t6, t5, t4, t3, t2, t1), px, t8, t9, false);
-xor_(t0, t0);
 		mov(rdx, ptr [py + 2 * 8]);
 		montgomery6_1(Pack(t1, t0, t7, t6, t5, t4, t3, t2), px, t8, t9, false);
-xor_(t1, t1);
 		mov(rdx, ptr [py + 3 * 8]);
 		montgomery6_1(Pack(t2, t1, t0, t7, t6, t5, t4, t3), px, t8, t9, false);
-xor_(t2, t2);
 		mov(rdx, ptr [py + 4 * 8]);
 		montgomery6_1(Pack(t3, t2, t1, t0, t7, t6, t5, t4), px, t8, t9, false);
-xor_(t3, t3);
 		mov(rdx, ptr [py + 5 * 8]);
 		montgomery6_1(Pack(t4, t3, t2, t1, t0, t7, t6, t5), px, t8, t9, false);
-xor_(t4, t4);
 		// [t4:t3:t2:t1:t0:t7:t6]
 		const Pack z = Pack(t3, t2, t1, t0, t7, t6);
 		const Pack keep = Pack(rdx, rax, px, py, t8, t9);

From addd488d3d7c698d6de6559be4e4a85261962594 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 24 Jan 2021 17:12:47 +0900
Subject: [PATCH 380/553] tweat mulAdd

---
 src/fp_generator.hpp | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index cd9bac32..e854440b 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1428,20 +1428,22 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		ret();
 	}
 	/*
-		c[n..0] = c[n..0] + px[n-1..0] * rdx
+		c[n..0] = c[n-1..0] + px[n-1..0] * rdx if is_cn_zero = true
+		c[n..0] = c[n..0] + px[n-1..0] * rdx if is_cn_zero = false
 		use rax, t0
 	*/
-	void mulAdd(const Pack& c, int n, const RegExp& px, const Reg64& t0)
+	void mulAdd(const Pack& c, int n, const RegExp& px, const Reg64& t0, bool is_cn_zero)
 	{
 		assert(!isFullBit_);
 		const Reg64& a = rax;
-		xor_(c[n + 1], c[n + 1]); // c[n + 1] = 0
+		xor_(a, a);
 		for (int i = 0; i < n; i++) {
 			mulx(t0, a, ptr [px + i * 8]);
 			adox(c[i], a);
 			if (i == n - 1) break;
 			adcx(c[i + 1], t0);
 		}
+		if (is_cn_zero) mov(c[n], 0);
 		adox(c[n], t0);
 		adc(c[n], 0);
 	}
@@ -1484,14 +1486,14 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			adc(*pt0, 0);
 			mov(c[n], *pt0);
 		} else {
-			// c[5..0] = c[5..0] + px[5..0] * rdx because of not fuill bit
-			mulAdd(c, 6, px, t1);
+			// c[6..0] = c[5..0] + px[5..0] * rdx because of not fuill bit
+			mulAdd(c, 6, px, t1, true);
 		}
 		mov(d, rp_);
 		imul(d, c[0]); // q = d
 		lea(t0, ptr[rip+pL_]);
-		// c[5..0] += p * q because of not fuill bit
-		mulAdd(c, 6, t0, t1);
+		// c[6..0] += p * q because of not fuill bit
+		mulAdd(c, 6, t0, t1, false);
 	}
 	/*
 		input (z, x, y) = (p0, p1, p2)
@@ -1520,18 +1522,18 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& t9 = sf.t[9];
 	L(fp_mulL);
 		mov(rdx, ptr [py + 0 * 8]);
-		montgomery6_1(Pack(t7, t6, t5, t4, t3, t2, t1, t0), px, t8, t9, true);
+		montgomery6_1(Pack(t6, t5, t4, t3, t2, t1, t0), px, t8, t9, true);
 		mov(rdx, ptr [py + 1 * 8]);
-		montgomery6_1(Pack(t0, t7, t6, t5, t4, t3, t2, t1), px, t8, t9, false);
+		montgomery6_1(Pack(t7, t6, t5, t4, t3, t2, t1), px, t8, t9, false);
 		mov(rdx, ptr [py + 2 * 8]);
-		montgomery6_1(Pack(t1, t0, t7, t6, t5, t4, t3, t2), px, t8, t9, false);
+		montgomery6_1(Pack(t0, t7, t6, t5, t4, t3, t2), px, t8, t9, false);
 		mov(rdx, ptr [py + 3 * 8]);
-		montgomery6_1(Pack(t2, t1, t0, t7, t6, t5, t4, t3), px, t8, t9, false);
+		montgomery6_1(Pack(t1, t0, t7, t6, t5, t4, t3), px, t8, t9, false);
 		mov(rdx, ptr [py + 4 * 8]);
-		montgomery6_1(Pack(t3, t2, t1, t0, t7, t6, t5, t4), px, t8, t9, false);
+		montgomery6_1(Pack(t2, t1, t0, t7, t6, t5, t4), px, t8, t9, false);
 		mov(rdx, ptr [py + 5 * 8]);
-		montgomery6_1(Pack(t4, t3, t2, t1, t0, t7, t6, t5), px, t8, t9, false);
-		// [t4:t3:t2:t1:t0:t7:t6]
+		montgomery6_1(Pack(t3, t2, t1, t0, t7, t6, t5), px, t8, t9, false);
+		// [t3:t2:t1:t0:t7:t6]
 		const Pack z = Pack(t3, t2, t1, t0, t7, t6);
 		const Pack keep = Pack(rdx, rax, px, py, t8, t9);
 		mov_rr(keep, z);

From 6030ded9e7922e0d594531178673b4a269a84d6c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 24 Jan 2021 17:19:30 +0900
Subject: [PATCH 381/553] reorder registers

---
 src/fp_generator.hpp | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index e854440b..91a38c7f 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1436,14 +1436,17 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	{
 		assert(!isFullBit_);
 		const Reg64& a = rax;
-		xor_(a, a);
+		if (is_cn_zero) {
+			xor_(c[n], c[n]);
+		} else {
+			xor_(a, a);
+		}
 		for (int i = 0; i < n; i++) {
 			mulx(t0, a, ptr [px + i * 8]);
 			adox(c[i], a);
 			if (i == n - 1) break;
 			adcx(c[i + 1], t0);
 		}
-		if (is_cn_zero) mov(c[n], 0);
 		adox(c[n], t0);
 		adc(c[n], 0);
 	}
@@ -1524,17 +1527,17 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(rdx, ptr [py + 0 * 8]);
 		montgomery6_1(Pack(t6, t5, t4, t3, t2, t1, t0), px, t8, t9, true);
 		mov(rdx, ptr [py + 1 * 8]);
-		montgomery6_1(Pack(t7, t6, t5, t4, t3, t2, t1), px, t8, t9, false);
+		montgomery6_1(Pack(t0, t6, t5, t4, t3, t2, t1), px, t8, t9, false);
 		mov(rdx, ptr [py + 2 * 8]);
-		montgomery6_1(Pack(t0, t7, t6, t5, t4, t3, t2), px, t8, t9, false);
+		montgomery6_1(Pack(t1, t0, t6, t5, t4, t3, t2), px, t8, t9, false);
 		mov(rdx, ptr [py + 3 * 8]);
-		montgomery6_1(Pack(t1, t0, t7, t6, t5, t4, t3), px, t8, t9, false);
+		montgomery6_1(Pack(t2, t1, t0, t6, t5, t4, t3), px, t8, t9, false);
 		mov(rdx, ptr [py + 4 * 8]);
-		montgomery6_1(Pack(t2, t1, t0, t7, t6, t5, t4), px, t8, t9, false);
+		montgomery6_1(Pack(t3, t2, t1, t0, t6, t5, t4), px, t8, t9, false);
 		mov(rdx, ptr [py + 5 * 8]);
-		montgomery6_1(Pack(t3, t2, t1, t0, t7, t6, t5), px, t8, t9, false);
-		// [t3:t2:t1:t0:t7:t6]
-		const Pack z = Pack(t3, t2, t1, t0, t7, t6);
+		montgomery6_1(Pack(t4, t3, t2, t1, t0, t6, t5), px, t8, t9, false);
+		// [t4:t3:t2:t1:t0:t6]
+		const Pack z = Pack(t4, t3, t2, t1, t0, t6);
 		const Pack keep = Pack(rdx, rax, px, py, t8, t9);
 		mov_rr(keep, z);
 		lea(t5, ptr[rip+pL_]);

From 340389abfd16e1a78f86bc37faec8b6d9f43bd1e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 24 Jan 2021 17:30:59 +0900
Subject: [PATCH 382/553] keep rip+pL_

---
 src/fp_generator.hpp | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 91a38c7f..2cc2b1a2 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1466,7 +1466,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		c += p * q
 		c >>= 64
 	*/
-	void montgomery6_1(const Pack& c, const RegExp& px, const Reg64& t0, const Reg64& t1, bool isFirst)
+	void montgomery6_1(const Pack& c, const RegExp& px, const RegExp& pp, const Reg64& t0, const Reg64& t1, bool isFirst)
 	{
 		assert(!isFullBit_);
 		const int n = 6;
@@ -1494,9 +1494,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		}
 		mov(d, rp_);
 		imul(d, c[0]); // q = d
-		lea(t0, ptr[rip+pL_]);
 		// c[6..0] += p * q because of not fuill bit
-		mulAdd(c, 6, t0, t1, false);
+		mulAdd(c, 6, pp, t1, false);
 	}
 	/*
 		input (z, x, y) = (p0, p1, p2)
@@ -1522,26 +1521,26 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& t6 = sf.t[6];
 		const Reg64& t7 = sf.t[7];
 		const Reg64& t8 = sf.t[8];
-		const Reg64& t9 = sf.t[9];
+		const Reg64& pp = sf.t[9];
 	L(fp_mulL);
+		lea(pp, ptr[rip+pL_]);
 		mov(rdx, ptr [py + 0 * 8]);
-		montgomery6_1(Pack(t6, t5, t4, t3, t2, t1, t0), px, t8, t9, true);
+		montgomery6_1(Pack(t6, t5, t4, t3, t2, t1, t0), px, pp, t7, t8, true);
 		mov(rdx, ptr [py + 1 * 8]);
-		montgomery6_1(Pack(t0, t6, t5, t4, t3, t2, t1), px, t8, t9, false);
+		montgomery6_1(Pack(t0, t6, t5, t4, t3, t2, t1), px, pp, t7, t8, false);
 		mov(rdx, ptr [py + 2 * 8]);
-		montgomery6_1(Pack(t1, t0, t6, t5, t4, t3, t2), px, t8, t9, false);
+		montgomery6_1(Pack(t1, t0, t6, t5, t4, t3, t2), px, pp, t7, t8, false);
 		mov(rdx, ptr [py + 3 * 8]);
-		montgomery6_1(Pack(t2, t1, t0, t6, t5, t4, t3), px, t8, t9, false);
+		montgomery6_1(Pack(t2, t1, t0, t6, t5, t4, t3), px, pp, t7, t8, false);
 		mov(rdx, ptr [py + 4 * 8]);
-		montgomery6_1(Pack(t3, t2, t1, t0, t6, t5, t4), px, t8, t9, false);
+		montgomery6_1(Pack(t3, t2, t1, t0, t6, t5, t4), px, pp, t7, t8, false);
 		mov(rdx, ptr [py + 5 * 8]);
-		montgomery6_1(Pack(t4, t3, t2, t1, t0, t6, t5), px, t8, t9, false);
-		// [t4:t3:t2:t1:t0:t6]
+		montgomery6_1(Pack(t4, t3, t2, t1, t0, t6, t5), px, pp, t7, t8, false);
+
 		const Pack z = Pack(t4, t3, t2, t1, t0, t6);
-		const Pack keep = Pack(rdx, rax, px, py, t8, t9);
+		const Pack keep = Pack(rdx, rax, px, py, t7, t8);
 		mov_rr(keep, z);
-		lea(t5, ptr[rip+pL_]);
-		sub_rm(z, t5);
+		sub_rm(z, pp);
 		cmovc_rr(z, keep);
 		store_mr(pz, z);
 		ret();

From 184cada5d064d5cccf8bc7d890652bd02ea7026b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 24 Jan 2021 17:40:59 +0900
Subject: [PATCH 383/553] tweat montgomery6_1

---
 src/fp_generator.hpp | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 2cc2b1a2..f09c2653 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1473,21 +1473,17 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& a = rax;
 		const Reg64& d = rdx;
 		if (isFirst) {
-			const Reg64 *pt0 = &a;
-			const Reg64 *pt1 = &t0;
 			// c[6..0] = px[5..0] * rdx
-			mulx(*pt0, c[0], ptr [px + 0 * 8]);
+			mulx(c[1], c[0], ptr [px + 0 * 8]);
 			for (int i = 1; i < n; i++) {
-				mulx(*pt1, c[i], ptr[px + i * 8]);
+				mulx(c[i + 1], a, ptr[px + i * 8]);
 				if (i == 1) {
-					add(c[i], *pt0);
+					add(c[i], a);
 				} else {
-					adc(c[i], *pt0);
+					adc(c[i], a);
 				}
-				std::swap(pt0, pt1);
 			}
-			adc(*pt0, 0);
-			mov(c[n], *pt0);
+			adc(c[n], 0);
 		} else {
 			// c[6..0] = c[5..0] + px[5..0] * rdx because of not fuill bit
 			mulAdd(c, 6, px, t1, true);

From 70e8b3f992b1d121b6122c855067795ed7f480bb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 26 Jan 2021 16:42:33 +0900
Subject: [PATCH 384/553] refactor montgomery6_1

---
 src/fp_generator.hpp | 125 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 101 insertions(+), 24 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index f09c2653..fd5d59b7 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1450,6 +1450,27 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		adox(c[n], t0);
 		adc(c[n], 0);
 	}
+	/*
+		(h, c[n..0]) = c[n..0] + px[n-1..0] * rdx + (cc << n)
+		h = 0 or 1
+		use rax, t0
+	*/
+	void mulAdd2(const Reg64& h, const Pack& c, int n, const RegExp& px, const Reg64& t0, const Reg64 *cc = 0, bool updateCarry = true)
+	{
+		assert(!isFullBit_);
+		const Reg64& a = rax;
+		xor_(h, h); // h = 0
+		for (int i = 0; i < n; i++) {
+			mulx(t0, a, ptr [px + i * 8]);
+			adox(c[i], a);
+			if (i == n - 1) break;
+			adcx(c[i + 1], t0);
+		}
+		adox(t0, h); // no carry
+		if (cc) adox(t0, *cc); // no carry
+		adcx(c[n], t0);
+		if (updateCarry) adc(h, h);
+	}
 	/*
 		input
 		c[5..0]
@@ -1466,30 +1487,20 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		c += p * q
 		c >>= 64
 	*/
-	void montgomery6_1(const Pack& c, const RegExp& px, const RegExp& pp, const Reg64& t0, const Reg64& t1, bool isFirst)
+	void montgomery6_1(const Pack& c, const RegExp& px, const RegExp& pp, const Reg64& t1, bool isFirst)
 	{
 		assert(!isFullBit_);
 		const int n = 6;
-		const Reg64& a = rax;
 		const Reg64& d = rdx;
 		if (isFirst) {
 			// c[6..0] = px[5..0] * rdx
-			mulx(c[1], c[0], ptr [px + 0 * 8]);
-			for (int i = 1; i < n; i++) {
-				mulx(c[i + 1], a, ptr[px + i * 8]);
-				if (i == 1) {
-					add(c[i], a);
-				} else {
-					adc(c[i], a);
-				}
-			}
-			adc(c[n], 0);
+			mulPack1(c, n, px);
 		} else {
 			// c[6..0] = c[5..0] + px[5..0] * rdx because of not fuill bit
 			mulAdd(c, 6, px, t1, true);
 		}
 		mov(d, rp_);
-		imul(d, c[0]); // q = d
+		imul(d, c[0]); // d = q = uint64_t(d * c[0])
 		// c[6..0] += p * q because of not fuill bit
 		mulAdd(c, 6, pp, t1, false);
 	}
@@ -1521,17 +1532,17 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	L(fp_mulL);
 		lea(pp, ptr[rip+pL_]);
 		mov(rdx, ptr [py + 0 * 8]);
-		montgomery6_1(Pack(t6, t5, t4, t3, t2, t1, t0), px, pp, t7, t8, true);
+		montgomery6_1(Pack(t6, t5, t4, t3, t2, t1, t0), px, pp, t8, true);
 		mov(rdx, ptr [py + 1 * 8]);
-		montgomery6_1(Pack(t0, t6, t5, t4, t3, t2, t1), px, pp, t7, t8, false);
+		montgomery6_1(Pack(t0, t6, t5, t4, t3, t2, t1), px, pp, t8, false);
 		mov(rdx, ptr [py + 2 * 8]);
-		montgomery6_1(Pack(t1, t0, t6, t5, t4, t3, t2), px, pp, t7, t8, false);
+		montgomery6_1(Pack(t1, t0, t6, t5, t4, t3, t2), px, pp, t8, false);
 		mov(rdx, ptr [py + 3 * 8]);
-		montgomery6_1(Pack(t2, t1, t0, t6, t5, t4, t3), px, pp, t7, t8, false);
+		montgomery6_1(Pack(t2, t1, t0, t6, t5, t4, t3), px, pp, t8, false);
 		mov(rdx, ptr [py + 4 * 8]);
-		montgomery6_1(Pack(t3, t2, t1, t0, t6, t5, t4), px, pp, t7, t8, false);
+		montgomery6_1(Pack(t3, t2, t1, t0, t6, t5, t4), px, pp, t8, false);
 		mov(rdx, ptr [py + 5 * 8]);
-		montgomery6_1(Pack(t4, t3, t2, t1, t0, t6, t5), px, pp, t7, t8, false);
+		montgomery6_1(Pack(t4, t3, t2, t1, t0, t6, t5), px, pp, t8, false);
 
 		const Pack z = Pack(t4, t3, t2, t1, t0, t6);
 		const Pack keep = Pack(rdx, rax, px, py, t7, t8);
@@ -1736,6 +1747,23 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		adc(d, 0);
 		store_mr(py + 8 * 2, Pack(d, t7, t6, t2));
 	}
+	/*
+		c[n..0] = px[n-1..0] * rdx
+		use rax
+	*/
+	void mulPack1(const Pack& c, int n, const RegExp& px)
+	{
+		mulx(c[1], c[0], ptr [px + 0 * 8]);
+		for (int i = 1; i < n; i++) {
+			mulx(c[i + 1], rax, ptr[px + i * 8]);
+			if (i == 1) {
+				add(c[i], rax);
+			} else {
+				adc(c[i], rax);
+			}
+		}
+		adc(c[n], 0);
+	}
 	/*
 		[pd:pz[0]] <- py[n-1..0] * px[0]
 	*/
@@ -2301,17 +2329,65 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& t7 = t[7];
 		const Reg64& t8 = t[8];
 		const Reg64& t9 = t[9];
-		const Reg64& t10 = t[10];
 
 		const Reg64& a = rax;
 		const Reg64& d = rdx;
+#if 0
+		const Reg64& pp = t[10];
+		lea(pp, ptr[rip + pL_]);
+
+		mov(a, ptr[xy + 0 * 8]);
+		mov(d, rp_);
+		imul(d, a); // q
+		load_rm(Pack(t6, t5, t4, t3, t2, t1, t0), xy);
+		mulAdd2(t9, Pack(t6, t5, t4, t3, t2, t1, t0), 6, pp, t8);
+		// t9 : carry, [t6:t5:t4:t3:t2:t1:t0] += p * q
+
+		mov(a, ptr[xy + 1 * 8]);
+		mov(d, rp_);
+		imul(d, a);
+		mov(t0, ptr[xy + 7 * 8]);
+		mulAdd2(t7, Pack(t0, t6, t5, t4, t3, t2, t1), 6, pp, t8, &t9);
+
+		mov(a, ptr[xy + 2 * 8]);
+		mov(d, rp_);
+		imul(d, a);
+		mov(t1, ptr[xy + 8 * 8]);
+		mulAdd2(t9, Pack(t1, t7, t6, t5, t4, t3, t2), 6, pp, t8, &t7);
+
+		mov(a, ptr[xy + 3 * 8]);
+		mov(d, rp_);
+		imul(d, a);
+		mov(t2, ptr[xy + 9 * 8]);
+		mulAdd2(t7, Pack(t2, t1, t7, t6, t5, t4, t3), 6, pp, t8, &t9);
+
+		mov(a, ptr[xy + 4 * 8]);
+		mov(d, rp_);
+		imul(d, a);
+		mov(t3, ptr[xy + 10 * 8]);
+		mulAdd2(t9, Pack(t3, t2, t1, t7, t6, t5, t4), 6, pp, t8, &t7);
+
+		mov(a, ptr[xy + 5 * 8]);
+		mov(d, rp_);
+		imul(d, a);
+		mov(t4, ptr[xy + 11 * 8]);
+		mulAdd2(t7, Pack(t4, t3, t2, t1, t7, t6, t5), 6, pp, t8, &t9, false);
+
+		// z = [t4:t3:t2:t1:t7:t6]
+		Pack zp = Pack(t4, t3, t2, t1, t7, t6);
+		Pack keep = Pack(t0, xy, rax, rdx, t5, t8);
+		mov_rr(keep, zp);
+		sub_rm(zp, pp); // z -= p
+		cmovc_rr(zp, keep);
+		store_mr(z, zp);
+#else
+		const Reg64& t10 = t[10];
 		vmovq(xm0, z);
-		mov(z, ptr [xy + 0 * 8]);
-		mov(a, rp_);
-		mul(z);
+		mov(a, ptr [xy + 0 * 8]);
+		mov(d, rp_);
+		imul(d, a); // q
 		lea(t0, ptr [rip + pL_]);
 		load_rm(Pack(t7, t6, t5, t4, t3, t2, t1), xy);
-		mov(d, a); // q
 		mulPackAddShr(Pack(t7, t6, t5, t4, t3, t2, t1), t0, t10);
 		load_rm(Pack(t1, t0, t10, t9, t8), xy + 7 * 8);
 		adc(t8, rax);
@@ -2373,6 +2449,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		cmovc_rr(zp, keep);
 		vmovq(z, xm0);
 		store_mr(z, zp);
+#endif
 	}
 	void2u gen_fpDbl_sqrPre()
 	{

From f71698f60fe8712a0ed6197f078c229ee685f9cc Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 26 Jan 2021 17:23:13 +0900
Subject: [PATCH 385/553] optimize FpDbl::mod

---
 src/fp_generator.hpp | 112 +++++++------------------------------------
 1 file changed, 17 insertions(+), 95 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index fd5d59b7..3227d931 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -2330,126 +2330,48 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& t8 = t[8];
 		const Reg64& t9 = t[9];
 
-		const Reg64& a = rax;
 		const Reg64& d = rdx;
-#if 0
 		const Reg64& pp = t[10];
 		lea(pp, ptr[rip + pL_]);
 
-		mov(a, ptr[xy + 0 * 8]);
-		mov(d, rp_);
-		imul(d, a); // q
 		load_rm(Pack(t6, t5, t4, t3, t2, t1, t0), xy);
-		mulAdd2(t9, Pack(t6, t5, t4, t3, t2, t1, t0), 6, pp, t8);
-		// t9 : carry, [t6:t5:t4:t3:t2:t1:t0] += p * q
+		mov(d, rp_);
+		imul(d, t0); // q
+		mulAdd2(t7, Pack(t6, t5, t4, t3, t2, t1, t0), 6, pp, t8);
+		// t7 : carry, [t6:t5:t4:t3:t2:t1:t0] += p * q
 
-		mov(a, ptr[xy + 1 * 8]);
 		mov(d, rp_);
-		imul(d, a);
+		imul(d, t1);
 		mov(t0, ptr[xy + 7 * 8]);
-		mulAdd2(t7, Pack(t0, t6, t5, t4, t3, t2, t1), 6, pp, t8, &t9);
+		mulAdd2(t9, Pack(t0, t6, t5, t4, t3, t2, t1), 6, pp, t8, &t7);
 
-		mov(a, ptr[xy + 2 * 8]);
 		mov(d, rp_);
-		imul(d, a);
+		imul(d, t2);
 		mov(t1, ptr[xy + 8 * 8]);
-		mulAdd2(t9, Pack(t1, t7, t6, t5, t4, t3, t2), 6, pp, t8, &t7);
+		mulAdd2(t7, Pack(t1, t0, t6, t5, t4, t3, t2), 6, pp, t8, &t9);
 
-		mov(a, ptr[xy + 3 * 8]);
 		mov(d, rp_);
-		imul(d, a);
+		imul(d, t3);
 		mov(t2, ptr[xy + 9 * 8]);
-		mulAdd2(t7, Pack(t2, t1, t7, t6, t5, t4, t3), 6, pp, t8, &t9);
+		mulAdd2(t9, Pack(t2, t1, t0, t6, t5, t4, t3), 6, pp, t8, &t7);
 
-		mov(a, ptr[xy + 4 * 8]);
 		mov(d, rp_);
-		imul(d, a);
+		imul(d, t4);
 		mov(t3, ptr[xy + 10 * 8]);
-		mulAdd2(t9, Pack(t3, t2, t1, t7, t6, t5, t4), 6, pp, t8, &t7);
+		mulAdd2(t7, Pack(t3, t2, t1, t0, t6, t5, t4), 6, pp, t8, &t9);
 
-		mov(a, ptr[xy + 5 * 8]);
 		mov(d, rp_);
-		imul(d, a);
+		imul(d, t5);
 		mov(t4, ptr[xy + 11 * 8]);
-		mulAdd2(t7, Pack(t4, t3, t2, t1, t7, t6, t5), 6, pp, t8, &t9, false);
+		mulAdd2(t9, Pack(t4, t3, t2, t1, t0, t6, t5), 6, pp, t8, &t7, false);
 
-		// z = [t4:t3:t2:t1:t7:t6]
-		Pack zp = Pack(t4, t3, t2, t1, t7, t6);
-		Pack keep = Pack(t0, xy, rax, rdx, t5, t8);
+		// z = [t4:t3:t2:t1:t0:t6]
+		Pack zp = Pack(t4, t3, t2, t1, t0, t6);
+		Pack keep = Pack(t5, xy, rax, rdx, t7, t8);
 		mov_rr(keep, zp);
 		sub_rm(zp, pp); // z -= p
 		cmovc_rr(zp, keep);
 		store_mr(z, zp);
-#else
-		const Reg64& t10 = t[10];
-		vmovq(xm0, z);
-		mov(a, ptr [xy + 0 * 8]);
-		mov(d, rp_);
-		imul(d, a); // q
-		lea(t0, ptr [rip + pL_]);
-		load_rm(Pack(t7, t6, t5, t4, t3, t2, t1), xy);
-		mulPackAddShr(Pack(t7, t6, t5, t4, t3, t2, t1), t0, t10);
-		load_rm(Pack(t1, t0, t10, t9, t8), xy + 7 * 8);
-		adc(t8, rax);
-		adc(t9, rax);
-		adc(t10, rax);
-		adc(t0, rax);
-		adc(t1, rax);
-		// z = [t1:t0:t10:t9:t8:t7:t6:t5:t4:t3:t2]
-		mov(a, rp_);
-		mul(t2);
-		vmovq(xm1, t0); // save
-		lea(t0, ptr [rip + pL_]);
-		mov(d, a);
-		vmovq(xm2, t10);
-		mulPackAddShr(Pack(t8, t7, t6, t5, t4, t3, t2), t0, t10);
-		vmovq(t10, xm2);
-		adc(t9, rax);
-		adc(t10, rax);
-		vmovq(t0, xm1); // load
-		adc(t0, rax);
-		adc(t1, rax);
-		// z = [t1:t0:t10:t9:t8:t7:t6:t5:t4:t3]
-		mov(a, rp_);
-		mul(t3);
-		lea(t2, ptr [rip + pL_]);
-		mov(d, a);
-		vmovq(xm2, t10);
-		mulPackAddShr(Pack(t9, t8, t7, t6, t5, t4, t3), t2, t10);
-		vmovq(t10, xm2);
-		adc(t10, rax);
-		adc(t0, rax);
-		adc(t1, rax);
-		// z = [t1:t0:t10:t9:t8:t7:t6:t5:t4]
-		mov(a, rp_);
-		mul(t4);
-		lea(t2, ptr [rip + pL_]);
-		mov(d, a);
-		mulPackAddShr(Pack(t10, t9, t8, t7, t6, t5, t4), t2, t3);
-		adc(t0, rax);
-		adc(t1, rax);
-		// z = [t1:t0:t10:t9:t8:t7:t6:t5]
-		mov(a, rp_);
-		mul(t5);
-		lea(t2, ptr [rip + pL_]);
-		mov(d, a);
-		mulPackAddShr(Pack(t0, t10, t9, t8, t7, t6, t5), t2, t3);
-		adc(t1, a);
-		// z = [t1:t0:t10:t9:t8:t7:t6]
-		mov(a, rp_);
-		mul(t6);
-		lea(t2, ptr [rip + pL_]);
-		mov(d, a);
-		mulPackAddShr(Pack(t1, t0, t10, t9, t8, t7, t6), t2, t3, true);
-		// z = [t1:t0:t10:t9:t8:t7]
-		Pack zp = Pack(t1, t0, t10, t9, t8, t7);
-		Pack keep = Pack(z, xy, rax, rdx, t3, t6);
-		mov_rr(keep, zp);
-		sub_rm(zp, t2); // z -= p
-		cmovc_rr(zp, keep);
-		vmovq(z, xm0);
-		store_mr(z, zp);
-#endif
 	}
 	void2u gen_fpDbl_sqrPre()
 	{

From 6fcddcf9448788de38d719973a99107c21d642bd Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 28 Jan 2021 11:52:13 +0900
Subject: [PATCH 386/553] v1.30

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index c3c22fa7..816c553b 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x129; /* 0xABC = A.BC */
+static const int version = 0x130; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index 7e645e7a..e71c0b8d 100644
--- a/readme.md
+++ b/readme.md
@@ -320,6 +320,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2021/Jan/28 v1.30 a little optimization of Fp operations
 - 2020/Nov/14 v1.28 support M1 mac
 - 2020/Jun/07 v1.22 remove old hash-to-curve functions
 - 2020/Jun/04 v1.21 mapToG1 and hashAndMapToG1 are compatible to irtf/eip-2537

From 8b1bd275b7bbbbb1e40ba4acf7a079bb112e9289 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 28 Jan 2021 17:06:01 +0900
Subject: [PATCH 387/553] fix : call setOrder in init for isValidOrder

---
 include/mcl/bn.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 072efab5..f2a3885e 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -944,6 +944,8 @@ struct Param {
 		GLV1::initForBN(z, isBLS12, cp.curveType);
 		GLV2T<Fr>::init(z, isBLS12);
 		basePoint.clear();
+		G1::setOrder(r);
+		G2::setOrder(r);
 		*pb = true;
 	}
 	void initG1only(bool *pb, const mcl::EcParam& para)
@@ -2166,6 +2168,8 @@ inline void init(bool *pb, const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode
 	Fp12::setPowArrayGLV(local::powArrayGLV2, local::powVecNGLV2);
 	G1::setCompressedExpression();
 	G2::setCompressedExpression();
+	verifyOrderG1(false);
+	verifyOrderG2(false);
 	*pb = true;
 }
 

From c1bcf317a15868ee4a2192c8ad50e387253e1e64 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 28 Jan 2021 17:06:48 +0900
Subject: [PATCH 388/553] v1.31

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 816c553b..29ca9f85 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x130; /* 0xABC = A.BC */
+static const int version = 0x131; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index e71c0b8d..7a455409 100644
--- a/readme.md
+++ b/readme.md
@@ -320,6 +320,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2021/Jan/28 v1.31 fix : call setOrder in init for isValidOrder
 - 2021/Jan/28 v1.30 a little optimization of Fp operations
 - 2020/Nov/14 v1.28 support M1 mac
 - 2020/Jun/07 v1.22 remove old hash-to-curve functions

From 8d64a0cfed1c88c19d38f5ca220c6cdee390b268 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Feb 2021 09:40:31 +0900
Subject: [PATCH 389/553] test of wasm

---
 src/fp.cpp        | 18 +++++++++++++
 src/low_funct.hpp | 67 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 src/low_funct.hpp

diff --git a/src/fp.cpp b/src/fp.cpp
index eb8a7de8..3a70db55 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -3,6 +3,10 @@
 #include <cybozu/sha2.hpp>
 #include <cybozu/endian.hpp>
 #include <mcl/conversion.hpp>
+#if defined(__EMSCRIPTEN__) && MCL_SIZEOF_UNIT == 4
+#define FOR_WASM
+#include "low_funct.hpp"
+#endif
 
 #if defined(MCL_STATIC_CODE) || defined(MCL_USE_XBYAK) || (defined(MCL_USE_LLVM) && (CYBOZU_HOST == CYBOZU_HOST_INTEL))
 
@@ -268,6 +272,20 @@ void setOp2(Op& op)
 	} else {
 		op.fp_add = Add<N, false, Tag>::f;
 		op.fp_sub = Sub<N, false, Tag>::f;
+#ifdef FOR_WASM
+		switch (N) {
+		case 8:
+			op.fp_add = mcl::addModT<8>;
+			op.fp_sub = mcl::subModT<8>;
+			break;
+		case 12:
+			op.fp_add = mcl::addModT<12>;
+			op.fp_sub = mcl::subModT<12>;
+			break;
+		default:
+			break;
+		}
+#endif
 	}
 	if (op.isMont) {
 		if (op.isFullBit) {
diff --git a/src/low_funct.hpp b/src/low_funct.hpp
new file mode 100644
index 00000000..a9751ddb
--- /dev/null
+++ b/src/low_funct.hpp
@@ -0,0 +1,67 @@
+#pragma once
+/**
+	@file
+	@author MITSUNARI Shigeo(@herumi)
+	@license modified new BSD license
+	http://opensource.org/licenses/BSD-3-Clause
+*/
+#include <stdint.h>
+#include <stdlib.h>
+
+// for 32bit not full version
+
+namespace mcl {
+
+template<size_t N>
+void copyT(uint32_t *y, const uint32_t *x)
+{
+	for (size_t i = 0; i < N; i++) {
+		y[i] = x[i];
+	}
+}
+
+template<size_t N>
+void addT(uint32_t *z, const uint32_t *x, const uint32_t *y)
+{
+	bool c = false;
+	for (size_t i = 0; i < N; i++) {
+		uint64_t v = uint64_t(x[i]) + y[i] + c;
+		z[i] = uint32_t(v);
+		c = (v >> 32) != 0;
+	}
+}
+
+template<size_t N>
+bool subT(uint32_t *z, const uint32_t *x, const uint32_t *y)
+{
+	bool c = false;
+	for (size_t i = 0; i < N; i++) {
+		uint64_t v = uint64_t(x[i]) - y[i] - c;
+		z[i] = uint32_t(v);
+		c = (v >> 32) != 0;
+	}
+	return c;
+}
+
+template<size_t N>
+void addModT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p)
+{
+	uint32_t t[N];
+	addT<N>(z, x, y);
+	bool c = subT<N>(t, z, p);
+	if (!c) {
+		copyT<N>(z, t);
+	}
+}
+
+template<size_t N>
+void subModT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p)
+{
+	bool c = subT<N>(z, x, y);
+	if (c) {
+		addT<N>(z, z, p);
+	}
+}
+
+} // mcl
+

From a31bde314fc4124e5a3b791f73a12f301c0d13e1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Feb 2021 10:53:18 +0900
Subject: [PATCH 390/553] add mulT

---
 misc/low_test.cpp | 42 ++++++++++++++++++++++++++++++++++++++
 src/low_funct.hpp | 51 ++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 88 insertions(+), 5 deletions(-)
 create mode 100644 misc/low_test.cpp

diff --git a/misc/low_test.cpp b/misc/low_test.cpp
new file mode 100644
index 00000000..c5bef6c0
--- /dev/null
+++ b/misc/low_test.cpp
@@ -0,0 +1,42 @@
+#include "../src/low_funct.hpp"
+
+#define MCL_VINT_FIXED_BUFFER
+#define MCL_SIZEOF_UNIT 4
+#define MCL_MAX_BIT_SIZE 384
+#include <mcl/vint.hpp>
+#include <cybozu/test.hpp>
+#include <cybozu/xorshift.hpp>
+
+void mul3(uint32_t z[6], const uint32_t x[3], uint32_t y[3])
+{
+	return mcl::mulT<3>(z, x, y);
+}
+
+template<class RG>
+void setRand(uint32_t *x, size_t n, RG& rg)
+{
+	for (size_t i = 0; i < n; i++) {
+		x[i] = rg.get32();
+	}
+}
+
+CYBOZU_TEST_AUTO(mul3)
+{
+	cybozu::XorShift rg;
+	uint32_t x[3];
+	uint32_t y[3];
+	uint32_t z[6];
+	for (size_t i = 0; i < 1000; i++) {
+		setRand(x, 3, rg);
+		setRand(y, 3, rg);
+		mcl::Vint vx, vy;
+		vx.setArray(x, 3);
+		vy.setArray(y, 3);
+		printf("vx=%s\n", vx.getStr(16).c_str());
+		printf("vy=%s\n", vy.getStr(16).c_str());
+		vx *= vy;
+		printf("xy=%s\n", vx.getStr(16).c_str());
+		mul3(z, x, y);
+		CYBOZU_TEST_EQUAL_ARRAY(z, vx.getUnit(), 6);
+	}
+}
diff --git a/src/low_funct.hpp b/src/low_funct.hpp
index a9751ddb..af9dd794 100644
--- a/src/low_funct.hpp
+++ b/src/low_funct.hpp
@@ -7,13 +7,14 @@
 */
 #include <stdint.h>
 #include <stdlib.h>
+#include <assert.h>
 
-// for 32bit not full version
+// only for 32bit not full bit prime version
 
 namespace mcl {
 
 template<size_t N>
-void copyT(uint32_t *y, const uint32_t *x)
+void copyT(uint32_t y[N], const uint32_t x[N])
 {
 	for (size_t i = 0; i < N; i++) {
 		y[i] = x[i];
@@ -21,7 +22,7 @@ void copyT(uint32_t *y, const uint32_t *x)
 }
 
 template<size_t N>
-void addT(uint32_t *z, const uint32_t *x, const uint32_t *y)
+void addT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
 {
 	bool c = false;
 	for (size_t i = 0; i < N; i++) {
@@ -29,10 +30,11 @@ void addT(uint32_t *z, const uint32_t *x, const uint32_t *y)
 		z[i] = uint32_t(v);
 		c = (v >> 32) != 0;
 	}
+	assert(!c);
 }
 
 template<size_t N>
-bool subT(uint32_t *z, const uint32_t *x, const uint32_t *y)
+bool subT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
 {
 	bool c = false;
 	for (size_t i = 0; i < N; i++) {
@@ -43,8 +45,47 @@ bool subT(uint32_t *z, const uint32_t *x, const uint32_t *y)
 	return c;
 }
 
+// [return:z[N]] = x[N] * y
 template<size_t N>
-void addModT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p)
+uint32_t mulUnitT(uint32_t z[N], const uint32_t x[N], uint32_t y)
+{
+	uint32_t H = 0;
+	for (size_t i = 0; i < N; i++) {
+		uint64_t v = uint64_t(x[i]) * y;
+		v += H;
+		z[i] = uint32_t(v);
+		H = uint32_t(v >> 32);
+	}
+	return H;
+}
+
+// [return:z[N]] = z[N] + x[N] * z
+template<size_t N>
+uint32_t addMulUnitT(uint32_t z[N], const uint32_t x[N], uint32_t y)
+{
+	uint32_t H = 0;
+	for (size_t i = 0; i < N; i++) {
+		uint64_t v = uint64_t(x[i]) * y;
+		v += H;
+		v += z[i];
+		z[i] = uint32_t(v);
+		H = uint32_t(v >> 32);
+	}
+	return H;
+}
+
+// z[N * 2] = x[N] * y[N]
+template<size_t N>
+void mulT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
+{
+	z[N] = mulUnitT<N>(z, x, y[0]);
+	for (size_t i = 1; i < N; i++) {
+		z[N + i] = addMulUnitT<N>(&z[i], x, y[i]);
+	}
+}
+
+template<size_t N>
+void addModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint32_t p[N])
 {
 	uint32_t t[N];
 	addT<N>(z, x, y);

From a3dd0f8a55403899c4d651ba94a675bc6b810fd0 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Feb 2021 15:28:51 +0900
Subject: [PATCH 391/553] add karatsuba

---
 misc/low_test.cpp | 51 +++++++++++++++++++++++++---------------
 src/low_funct.hpp | 59 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 89 insertions(+), 21 deletions(-)

diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index c5bef6c0..a0e09055 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -1,16 +1,14 @@
+#include <stdio.h>
 #include "../src/low_funct.hpp"
 
+#define MCL_USE_VINT
 #define MCL_VINT_FIXED_BUFFER
 #define MCL_SIZEOF_UNIT 4
-#define MCL_MAX_BIT_SIZE 384
+#define MCL_MAX_BIT_SIZE 768
 #include <mcl/vint.hpp>
 #include <cybozu/test.hpp>
 #include <cybozu/xorshift.hpp>
-
-void mul3(uint32_t z[6], const uint32_t x[3], uint32_t y[3])
-{
-	return mcl::mulT<3>(z, x, y);
-}
+#include <cybozu/benchmark.hpp>
 
 template<class RG>
 void setRand(uint32_t *x, size_t n, RG& rg)
@@ -20,23 +18,38 @@ void setRand(uint32_t *x, size_t n, RG& rg)
 	}
 }
 
-CYBOZU_TEST_AUTO(mul3)
+/*
+g++ -Ofast -DNDEBUG -Wall -Wextra -m32 -I ./include/ misc/low_test.cpp
+Core i7-8700
+         mulT  karatsuba
+N =  6, 182clk   225clk
+N =  8, 300clk   350clk
+N = 12, 594clk   730clk
+*/
+CYBOZU_TEST_AUTO(mulT)
 {
 	cybozu::XorShift rg;
-	uint32_t x[3];
-	uint32_t y[3];
-	uint32_t z[6];
+	const size_t N = 12;
+	uint32_t x[N];
+	uint32_t y[N];
+	uint32_t z[N * 2];
 	for (size_t i = 0; i < 1000; i++) {
-		setRand(x, 3, rg);
-		setRand(y, 3, rg);
+		setRand(x, N, rg);
+		setRand(y, N, rg);
+		// remove MSB
+		x[N - 1] &= 0x7fffffff;
+		y[N - 1] &= 0x7fffffff;
 		mcl::Vint vx, vy;
-		vx.setArray(x, 3);
-		vy.setArray(y, 3);
-		printf("vx=%s\n", vx.getStr(16).c_str());
-		printf("vy=%s\n", vy.getStr(16).c_str());
+		vx.setArray(x, N);
+		vy.setArray(y, N);
 		vx *= vy;
-		printf("xy=%s\n", vx.getStr(16).c_str());
-		mul3(z, x, y);
-		CYBOZU_TEST_EQUAL_ARRAY(z, vx.getUnit(), 6);
+		mcl::mulT<N>(z, x, y);
+		CYBOZU_TEST_EQUAL_ARRAY(z, vx.getUnit(), N * 2);
+		memset(z, 0, sizeof(z));
+		mcl::karatsubaT<N>(z, x, y);
+		CYBOZU_TEST_EQUAL_ARRAY(z, vx.getUnit(), N * 2);
 	}
+	CYBOZU_BENCH_C("mulT", 10000, mcl::mulT<N>, z, x, y);
+	CYBOZU_BENCH_C("kara", 10000, mcl::karatsubaT<N>, z, x, y);
 }
+
diff --git a/src/low_funct.hpp b/src/low_funct.hpp
index af9dd794..919c574a 100644
--- a/src/low_funct.hpp
+++ b/src/low_funct.hpp
@@ -21,8 +21,24 @@ void copyT(uint32_t y[N], const uint32_t x[N])
 	}
 }
 
+// [return:y[N]] += x
 template<size_t N>
-void addT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
+inline bool addUnitT(uint32_t y[N], uint32_t x)
+{
+	uint64_t v = uint64_t(y[0]) + x;
+	y[0] = uint32_t(v);
+	bool c = (v >> 32) != 0;
+	if (!c) return false;
+	for (size_t i = 1; i < N; i++) {
+		v = uint64_t(y[i]) + 1;
+		y[i] = uint32_t(v);
+		if ((v >> 32) == 0) return false;
+	}
+	return true;
+}
+
+template<size_t N>
+bool addT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
 {
 	bool c = false;
 	for (size_t i = 0; i < N; i++) {
@@ -30,7 +46,7 @@ void addT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
 		z[i] = uint32_t(v);
 		c = (v >> 32) != 0;
 	}
-	assert(!c);
+	return c;
 }
 
 template<size_t N>
@@ -84,6 +100,45 @@ void mulT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
 	}
 }
 
+/*
+	z[N * 2] = x[N] * y[N]
+	H = N/2
+	W = 1 << (H * 32)
+	x = aW + b, y = cW + d
+	assume a < W/2, c < W/2
+	(aW + b)(cW + d) = acW^2 + (ad + bc)W + bd
+	ad + bc = (a + b)(c + d) - ac - bd < (1 << (N * 32))
+*/
+template<size_t N>
+void karatsubaT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
+{
+	assert((N % 2) == 0);
+	assert((x[N - 1] & 0x80000000) == 0);
+	assert((y[N - 1] & 0x80000000) == 0);
+	const size_t H = N / 2;
+	mulT<H>(z, x, y); // bd
+	mulT<H>(z + N, x + H, y + H); // ac
+	uint32_t a_b[H];
+	uint32_t c_d[H];
+	bool c1 = addT<H>(a_b, x, x + H); // a + b
+	bool c2 = addT<H>(c_d, y, y + H); // c + d
+	uint32_t tmp[N];
+	mulT<H>(tmp, a_b, c_d);
+	if (c1) {
+		addT<H>(tmp + H, tmp + H, c_d);
+	}
+	if (c2) {
+		addT<H>(tmp + H, tmp + H, a_b);
+	}
+	// c:tmp[N] = (a + b)(c + d)
+	subT<N>(tmp, tmp, z);
+	subT<N>(tmp, tmp, z + N);
+	// c:tmp[N] = ad + bc
+	if (addT<N>(z + H, z + H, tmp)) {
+		addUnitT<H>(z + N + H, 1);
+	}
+}
+
 template<size_t N>
 void addModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint32_t p[N])
 {

From 32b1f2f7e7116e447853bd7d234a5d5ba8a6a404 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Feb 2021 15:44:14 +0900
Subject: [PATCH 392/553] test N = 8, 12

---
 misc/low_test.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index a0e09055..71639026 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -26,10 +26,11 @@ N =  6, 182clk   225clk
 N =  8, 300clk   350clk
 N = 12, 594clk   730clk
 */
-CYBOZU_TEST_AUTO(mulT)
+template<size_t N>
+void mulTest()
 {
+	printf("N=%zd (%zdbit)\n", N, N * 32);
 	cybozu::XorShift rg;
-	const size_t N = 12;
 	uint32_t x[N];
 	uint32_t y[N];
 	uint32_t z[N * 2];
@@ -53,3 +54,8 @@ CYBOZU_TEST_AUTO(mulT)
 	CYBOZU_BENCH_C("kara", 10000, mcl::karatsubaT<N>, z, x, y);
 }
 
+CYBOZU_TEST_AUTO(mulT)
+{
+	mulTest<8>();
+	mulTest<12>();
+}

From 7dae2ec939adb09968f07ea967eb0615db511ee7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Feb 2021 18:20:02 +0900
Subject: [PATCH 393/553] add montT

---
 misc/low_test.cpp | 90 +++++++++++++++++++++++++++++++++++++++++++++++
 src/low_funct.hpp | 26 +++++++++++++-
 2 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index 71639026..62f84afb 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -1,4 +1,14 @@
 #include <stdio.h>
+#include <stdint.h>
+
+void dump(const char *msg, const uint32_t *x, size_t n)
+{
+	printf("%s", msg);
+	for (size_t i = 0; i < n; i++) {
+		printf("%08x", x[n - 1 - i]);
+	}
+	printf("\n");
+}
 #include "../src/low_funct.hpp"
 
 #define MCL_USE_VINT
@@ -9,6 +19,7 @@
 #include <cybozu/test.hpp>
 #include <cybozu/xorshift.hpp>
 #include <cybozu/benchmark.hpp>
+#include <mcl/util.hpp>
 
 template<class RG>
 void setRand(uint32_t *x, size_t n, RG& rg)
@@ -59,3 +70,82 @@ CYBOZU_TEST_AUTO(mulT)
 	mulTest<8>();
 	mulTest<12>();
 }
+
+struct Montgomery {
+	mcl::Vint p_;
+	mcl::Vint R_; // (1 << (pn_ * 64)) % p
+	mcl::Vint RR_; // (R * R) % p
+	uint32_t rp_; // rp * p = -1 mod M = 1 << 64
+	size_t pn_;
+	Montgomery() {}
+	explicit Montgomery(const mcl::Vint& p)
+	{
+		p_ = p;
+		rp_ = mcl::fp::getMontgomeryCoeff(p.getUnit()[0]);
+		pn_ = p.getUnitSize();
+		R_ = 1;
+		R_ = (R_ << (pn_ * 64)) % p_;
+		RR_ = (R_ * R_) % p_;
+	}
+
+	void toMont(mcl::Vint& x) const { mul(x, x, RR_); }
+	void fromMont(mcl::Vint& x) const { mul(x, x, 1); }
+
+	void mul(mcl::Vint& z, const mcl::Vint& x, const mcl::Vint& y) const
+	{
+		const size_t ySize = y.getUnitSize();
+		mcl::Vint c = x * y.getUnit()[0];
+		uint32_t q = c.getUnit()[0] * rp_;
+		c += p_ * q;
+		c >>= sizeof(uint32_t) * 8;
+		for (size_t i = 1; i < pn_; i++) {
+			if (i < ySize) {
+				c += x * y.getUnit()[i];
+			}
+			uint32_t q = c.getUnit()[0] * rp_;
+			c += p_ * q;
+			c >>= sizeof(uint32_t) * 8;
+		}
+		if (c >= p_) {
+			c -= p_;
+		}
+		z = c;
+	}
+};
+
+template<size_t N>
+void montTest(const char *pStr)
+{
+	mcl::Vint vp;
+	vp.setStr(pStr);
+	Montgomery mont(vp);
+
+	cybozu::XorShift rg;
+	uint32_t x[N];
+	uint32_t y[N];
+	uint32_t z[N];
+	uint32_t _p[N + 1];
+	uint32_t *const p = _p + 1;
+	vp.getArray(p, N);
+	p[-1] = mont.rp_;
+
+	for (size_t i = 0; i < 1000; i++) {
+		setRand(x, N, rg);
+		setRand(y, N, rg);
+		// remove MSB
+		x[N - 1] &= 0x7fffffff;
+		y[N - 1] &= 0x7fffffff;
+		mcl::Vint vx, vy, vz;
+		vx.setArray(x, N);
+		vy.setArray(y, N);
+		mont.mul(vz, vx, vy);
+		mcl::montT<N>(z, x, y, p);
+		CYBOZU_TEST_EQUAL_ARRAY(z, vz.getUnit(), N);
+	}
+}
+
+CYBOZU_TEST_AUTO(mont)
+{
+	const char *pStr = "0x2523648240000001ba344d80000000086121000000000013a700000000000013";
+	montTest<8>(pStr);
+}
diff --git a/src/low_funct.hpp b/src/low_funct.hpp
index 919c574a..23e58856 100644
--- a/src/low_funct.hpp
+++ b/src/low_funct.hpp
@@ -4,12 +4,13 @@
 	@author MITSUNARI Shigeo(@herumi)
 	@license modified new BSD license
 	http://opensource.org/licenses/BSD-3-Clause
+	@note for only 32bit not full bit prime version
+	assert((p[N - 1] & 0x80000000) == 0);
 */
 #include <stdint.h>
 #include <stdlib.h>
 #include <assert.h>
 
-// only for 32bit not full bit prime version
 
 namespace mcl {
 
@@ -159,5 +160,28 @@ void subModT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *
 	}
 }
 
+/*
+	z[N] = Montgomery(x[N], y[N], p[N])
+	@remark : assume p[-1] = rp
+*/
+template<size_t N>
+void montT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p)
+{
+	const uint32_t rp = p[-1];
+	assert((p[N - 1] & 0x80000000) == 0);
+	uint32_t buf[N * 2];
+	buf[N] = mulUnitT<N>(buf, x, y[0]);
+	uint32_t q = buf[0] * rp;
+	buf[N] += addMulUnitT<N>(buf, p, q);
+	for (size_t i = 1; i < N; i++) {
+		buf[N + i] = addMulUnitT<N>(buf + i, x, y[i]);
+		uint32_t q = buf[i] * rp;
+		buf[N + i] += addMulUnitT<N>(buf + i, p, q);
+	}
+	if (subT<N>(z, buf + N, p)) {
+		copyT<N>(z, buf + N);
+	}
+}
+
 } // mcl
 

From e61e56abdf4ed7b0d7bcfd86da06d14bf72c7beb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 3 Feb 2021 11:17:48 +0900
Subject: [PATCH 394/553] add test of mont12

---
 misc/Makefile     | 6 ++++++
 misc/low_test.cpp | 7 +++++--
 2 files changed, 11 insertions(+), 2 deletions(-)
 create mode 100644 misc/Makefile

diff --git a/misc/Makefile b/misc/Makefile
new file mode 100644
index 00000000..1a727f3d
--- /dev/null
+++ b/misc/Makefile
@@ -0,0 +1,6 @@
+all: low_test
+
+CFLAGS=-I ../include/ -m32 -Ofast -Wall -Wextra -DNDEBUG
+
+low_test: low_test.cpp
+	$(CXX) -o low_test low_test.cpp $(CFLAGS)
diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index 62f84afb..856be617 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -142,10 +142,13 @@ void montTest(const char *pStr)
 		mcl::montT<N>(z, x, y, p);
 		CYBOZU_TEST_EQUAL_ARRAY(z, vz.getUnit(), N);
 	}
+	CYBOZU_BENCH_C("montT", 10000, mcl::montT<N>, z, x, y, p);
 }
 
 CYBOZU_TEST_AUTO(mont)
 {
-	const char *pStr = "0x2523648240000001ba344d80000000086121000000000013a700000000000013";
-	montTest<8>(pStr);
+	const char *pBN254 = "0x2523648240000001ba344d80000000086121000000000013a700000000000013";
+	montTest<8>(pBN254);
+	const char *pBLS12_381 = "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab";
+	montTest<12>(pBLS12_381);
 }

From f2f39faf99db59ac1a10bc9f87c42ad5e5b854ac Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 3 Feb 2021 12:06:23 +0900
Subject: [PATCH 395/553] fix comment

---
 src/low_funct.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/low_funct.hpp b/src/low_funct.hpp
index 23e58856..5db871bd 100644
--- a/src/low_funct.hpp
+++ b/src/low_funct.hpp
@@ -76,7 +76,7 @@ uint32_t mulUnitT(uint32_t z[N], const uint32_t x[N], uint32_t y)
 	return H;
 }
 
-// [return:z[N]] = z[N] + x[N] * z
+// [return:z[N]] = z[N] + x[N] * y
 template<size_t N>
 uint32_t addMulUnitT(uint32_t z[N], const uint32_t x[N], uint32_t y)
 {

From a7728b22865189e4d107b94dab6c7499f4e4aeba Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 3 Feb 2021 12:18:21 +0900
Subject: [PATCH 396/553] add sqrT

---
 misc/low_test.cpp | 33 ++++++++++++++++++++++++++++++++
 src/low_funct.hpp | 48 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)

diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index 856be617..a77ed36a 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -69,6 +69,39 @@ CYBOZU_TEST_AUTO(mulT)
 {
 	mulTest<8>();
 	mulTest<12>();
+	mulTest<16>();
+}
+
+template<size_t N>
+void sqrTest()
+{
+	printf("N=%zd (%zdbit)\n", N, N * 32);
+	cybozu::XorShift rg;
+	uint32_t x[N];
+	uint32_t y[N * 2];
+	for (size_t i = 0; i < 1000; i++) {
+		setRand(x, N, rg);
+		// remove MSB
+		x[N - 1] &= 0x7fffffff;
+		mcl::Vint vx;
+		vx.setArray(x, N);
+		vx *= vx;
+		mcl::sqrT<N>(y, x);
+		CYBOZU_TEST_EQUAL_ARRAY(y, vx.getUnit(), N * 2);
+#if 0
+		memset(z, 0, sizeof(z));
+		mcl::karatsubaT<N>(z, x, y);
+		CYBOZU_TEST_EQUAL_ARRAY(z, vx.getUnit(), N * 2);
+#endif
+	}
+	CYBOZU_BENCH_C("sqrT", 10000, mcl::sqrT<N>, y, x);
+}
+
+CYBOZU_TEST_AUTO(sqrT)
+{
+	sqrTest<8>();
+	sqrTest<12>();
+	sqrTest<16>();
 }
 
 struct Montgomery {
diff --git a/src/low_funct.hpp b/src/low_funct.hpp
index 5db871bd..f25559c3 100644
--- a/src/low_funct.hpp
+++ b/src/low_funct.hpp
@@ -101,6 +101,54 @@ void mulT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
 	}
 }
 
+template<size_t N>
+uint32_t mulUnitWithTblT(uint32_t z[N], const uint64_t *tbl_j)
+{
+	uint32_t H = 0;
+	for (size_t i = 0; i < N; i++) {
+		uint64_t v = tbl_j[i];
+		v += H;
+		z[i] = uint32_t(v);
+		H = uint32_t(v >> 32);
+	}
+	return H;
+}
+
+template<size_t N>
+uint32_t addMulUnitWithTblT(uint32_t z[N], const uint64_t *tbl_j)
+{
+	uint32_t H = 0;
+	for (size_t i = 0; i < N; i++) {
+		uint64_t v = tbl_j[i];
+		v += H;
+		v += z[i];
+		z[i] = uint32_t(v);
+		H = uint32_t(v >> 32);
+	}
+	return H;
+}
+
+
+// y[N * 2] = x[N] * x[N]
+template<size_t N>
+void sqrT(uint32_t y[N * 2], const uint32_t x[N])
+{
+	uint64_t tbl[N * N]; // x[i]x[j]
+	for (size_t i = 0; i < N; i++) {
+		uint64_t xi = x[i];
+		tbl[i * N + i] = xi * xi;
+		for (size_t j = i + 1; j < N; j++) {
+			uint64_t v = xi * x[j];
+			tbl[i * N + j] = v;
+			tbl[j * N + i] = v;
+		}
+	}
+	y[N] = mulUnitWithTblT<N>(y, tbl);
+	for (size_t i = 1; i < N; i++) {
+		y[N + i] = addMulUnitWithTblT<N>(&y[i], tbl + N * i);
+	}
+}
+
 /*
 	z[N * 2] = x[N] * y[N]
 	H = N/2

From a3293c2c85582e4874e5e8599948711babfafdea Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 3 Feb 2021 17:06:29 +0900
Subject: [PATCH 397/553] add sqrT

---
 misc/Makefile     |  2 +-
 misc/low_test.cpp |  7 ------
 src/low_funct.hpp | 57 ++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/misc/Makefile b/misc/Makefile
index 1a727f3d..25a7c272 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -2,5 +2,5 @@ all: low_test
 
 CFLAGS=-I ../include/ -m32 -Ofast -Wall -Wextra -DNDEBUG
 
-low_test: low_test.cpp
+low_test: low_test.cpp ../src/low_funct.hpp
 	$(CXX) -o low_test low_test.cpp $(CFLAGS)
diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index a77ed36a..540e3d91 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -69,7 +69,6 @@ CYBOZU_TEST_AUTO(mulT)
 {
 	mulTest<8>();
 	mulTest<12>();
-	mulTest<16>();
 }
 
 template<size_t N>
@@ -88,11 +87,6 @@ void sqrTest()
 		vx *= vx;
 		mcl::sqrT<N>(y, x);
 		CYBOZU_TEST_EQUAL_ARRAY(y, vx.getUnit(), N * 2);
-#if 0
-		memset(z, 0, sizeof(z));
-		mcl::karatsubaT<N>(z, x, y);
-		CYBOZU_TEST_EQUAL_ARRAY(z, vx.getUnit(), N * 2);
-#endif
 	}
 	CYBOZU_BENCH_C("sqrT", 10000, mcl::sqrT<N>, y, x);
 }
@@ -101,7 +95,6 @@ CYBOZU_TEST_AUTO(sqrT)
 {
 	sqrTest<8>();
 	sqrTest<12>();
-	sqrTest<16>();
 }
 
 struct Montgomery {
diff --git a/src/low_funct.hpp b/src/low_funct.hpp
index f25559c3..2f3a2a15 100644
--- a/src/low_funct.hpp
+++ b/src/low_funct.hpp
@@ -22,6 +22,23 @@ void copyT(uint32_t y[N], const uint32_t x[N])
 	}
 }
 
+template<size_t N>
+uint32_t shlT(uint32_t y[N], const uint32_t x[N], size_t bit)
+{
+	assert(0 < bit && bit < 32);
+	assert((N % 2) == 0);
+	size_t rBit = sizeof(uint32_t) * 8 - bit;
+	uint32_t keep = x[N - 1];
+	uint32_t prev = keep;
+	for (size_t i = N - 1; i > 0; i--) {
+		uint32_t t = x[i - 1];
+		y[i] = (prev << bit) | (t >> rBit);
+		prev = t;
+	}
+	y[0] = prev << bit;
+	return keep >> rBit;
+}
+
 // [return:y[N]] += x
 template<size_t N>
 inline bool addUnitT(uint32_t y[N], uint32_t x)
@@ -101,6 +118,8 @@ void mulT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
 	}
 }
 
+#if 0
+// slower than mulT
 template<size_t N>
 uint32_t mulUnitWithTblT(uint32_t z[N], const uint64_t *tbl_j)
 {
@@ -128,7 +147,6 @@ uint32_t addMulUnitWithTblT(uint32_t z[N], const uint64_t *tbl_j)
 	return H;
 }
 
-
 // y[N * 2] = x[N] * x[N]
 template<size_t N>
 void sqrT(uint32_t y[N * 2], const uint32_t x[N])
@@ -148,6 +166,7 @@ void sqrT(uint32_t y[N * 2], const uint32_t x[N])
 		y[N + i] = addMulUnitWithTblT<N>(&y[i], tbl + N * i);
 	}
 }
+#endif
 
 /*
 	z[N * 2] = x[N] * y[N]
@@ -157,6 +176,7 @@ void sqrT(uint32_t y[N * 2], const uint32_t x[N])
 	assume a < W/2, c < W/2
 	(aW + b)(cW + d) = acW^2 + (ad + bc)W + bd
 	ad + bc = (a + b)(c + d) - ac - bd < (1 << (N * 32))
+	slower than mulT on Core i7 with -m32 for N <= 12
 */
 template<size_t N>
 void karatsubaT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
@@ -165,8 +185,6 @@ void karatsubaT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
 	assert((x[N - 1] & 0x80000000) == 0);
 	assert((y[N - 1] & 0x80000000) == 0);
 	const size_t H = N / 2;
-	mulT<H>(z, x, y); // bd
-	mulT<H>(z + N, x + H, y + H); // ac
 	uint32_t a_b[H];
 	uint32_t c_d[H];
 	bool c1 = addT<H>(a_b, x, x + H); // a + b
@@ -179,6 +197,8 @@ void karatsubaT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
 	if (c2) {
 		addT<H>(tmp + H, tmp + H, a_b);
 	}
+	mulT<H>(z, x, y); // bd
+	mulT<H>(z + N, x + H, y + H); // ac
 	// c:tmp[N] = (a + b)(c + d)
 	subT<N>(tmp, tmp, z);
 	subT<N>(tmp, tmp, z + N);
@@ -188,6 +208,37 @@ void karatsubaT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
 	}
 }
 
+/*
+	y[N * 2] = x[N] * x[N]
+	(aW + b)^2 = a^2 W + b^2 + 2abW
+	(a+b)^2 - a^2 - b^2
+*/
+template<size_t N>
+void sqrT(uint32_t y[N * 2], const uint32_t x[N])
+{
+	assert((N % 2) == 0);
+	assert((x[N - 1] & 0x80000000) == 0);
+	const size_t H = N / 2;
+	uint32_t a_b[H];
+	bool c = addT<H>(a_b, x, x + H); // a + b
+	uint32_t tmp[N];
+	mulT<H>(tmp, a_b, a_b);
+	if (c) {
+//		addT<H>(a_b, a_b, a_b);
+		shlT<H>(a_b, a_b, 1);
+		addT<H>(tmp + H, tmp + H, a_b);
+	}
+	mulT<H>(y, x, x); // b^2
+	mulT<H>(y + N, x + H, x + H); // a^2
+	// tmp[N] = (a + b)^2
+	subT<N>(tmp, tmp, y);
+	subT<N>(tmp, tmp, y + N);
+	// tmp[N] = 2ab
+	if (addT<N>(y + H, y + H, tmp)) {
+		addUnitT<H>(y + N + H, 1);
+	}
+}
+
 template<size_t N>
 void addModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint32_t p[N])
 {

From c346f747d7768c5f6497e888d858de003b9963c0 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 4 Feb 2021 15:09:45 +0900
Subject: [PATCH 398/553] add modT

---
 misc/low_test.cpp | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 src/low_funct.hpp | 44 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index 540e3d91..547e27c5 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -137,6 +137,19 @@ struct Montgomery {
 		}
 		z = c;
 	}
+	void mod(mcl::Vint& z, const mcl::Vint& xy) const
+	{
+		z = xy;
+		for (size_t i = 0; i < pn_; i++) {
+			uint32_t q = z.getUnit()[0] * rp_;
+			mcl::Vint t = q;
+			z += p_ * t;
+			z >>= 32;
+		}
+		if (z >= p_) {
+			z -= p_;
+		}
+	}
 };
 
 template<size_t N>
@@ -171,10 +184,44 @@ void montTest(const char *pStr)
 	CYBOZU_BENCH_C("montT", 10000, mcl::montT<N>, z, x, y, p);
 }
 
+template<size_t N>
+void modTest(const char *pStr)
+{
+	mcl::Vint vp;
+	vp.setStr(pStr);
+	Montgomery mont(vp);
+
+	cybozu::XorShift rg;
+	uint32_t xy[N * 2];
+	uint32_t z[N];
+	uint32_t _p[N + 1];
+	uint32_t *const p = _p + 1;
+	vp.getArray(p, N);
+	p[-1] = mont.rp_;
+
+	for (size_t i = 0; i < 1000; i++) {
+		setRand(xy, N * 2, rg);
+		// remove MSB
+		xy[N * 2 - 1] &= 0x7fffffff;
+		mcl::Vint vxy, vz;
+		vxy.setArray(xy, N * 2);
+		mont.mod(vz, vxy);
+		mcl::modT<N>(z, xy, p);
+		CYBOZU_TEST_EQUAL_ARRAY(z, vz.getUnit(), N);
+	}
+	CYBOZU_BENCH_C("modT", 10000, mcl::modT<N>, z, xy, p);
+}
+
 CYBOZU_TEST_AUTO(mont)
 {
 	const char *pBN254 = "0x2523648240000001ba344d80000000086121000000000013a700000000000013";
+	puts("BN254");
 	montTest<8>(pBN254);
+	modTest<8>(pBN254);
+
 	const char *pBLS12_381 = "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab";
+	puts("BLS12");
 	montTest<12>(pBLS12_381);
+	modTest<12>(pBLS12_381);
 }
+
diff --git a/src/low_funct.hpp b/src/low_funct.hpp
index 2f3a2a15..104d6fc0 100644
--- a/src/low_funct.hpp
+++ b/src/low_funct.hpp
@@ -251,7 +251,7 @@ void addModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint
 }
 
 template<size_t N>
-void subModT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p)
+void subModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint32_t p[N])
 {
 	bool c = subT<N>(z, x, y);
 	if (c) {
@@ -264,7 +264,7 @@ void subModT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *
 	@remark : assume p[-1] = rp
 */
 template<size_t N>
-void montT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p)
+void montT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint32_t p[N])
 {
 	const uint32_t rp = p[-1];
 	assert((p[N - 1] & 0x80000000) == 0);
@@ -282,5 +282,45 @@ void montT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p)
 	}
 }
 
+// [return:z[N+1]] = z[N+1] + x[N] * y + (cc << (N * 32))
+template<size_t N>
+bool addMulUnit2T(uint32_t z[N + 1], const uint32_t x[N], uint32_t y, const bool *cc = 0)
+{
+	uint32_t H = 0;
+	for (size_t i = 0; i < N; i++) {
+		uint64_t v = uint64_t(x[i]) * y;
+		v += H;
+		v += z[i];
+		z[i] = uint32_t(v);
+		H = uint32_t(v >> 32);
+	}
+	if (cc) H += *cc;
+	uint64_t v = uint64_t(z[N]);
+	v += H;
+	z[N] = uint32_t(v);
+	return (v >> 32) != 0;
+}
+
+/*
+	z[N] = Montgomery reduction(y[N], xy[N], p[N])
+	@remark : assume p[-1] = rp
+*/
+template<size_t N>
+void modT(uint32_t y[N], const uint32_t xy[N * 2], const uint32_t p[N])
+{
+	const uint32_t rp = p[-1];
+	assert((p[N - 1] & 0x80000000) == 0);
+	uint32_t buf[N * 2];
+	copyT<N * 2>(buf, xy);
+	bool c = 0;
+	for (size_t i = 0; i < N; i++) {
+		uint32_t q = buf[i] * rp;
+		c = addMulUnit2T<N>(buf + i, p, q, &c);
+	}
+	if (subT<N>(y, buf + N, p)) {
+		copyT<N>(y, buf + N);
+	}
+}
+
 } // mcl
 

From dc8964db5ab4326329354a7deccd60198c61118d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 4 Feb 2021 17:18:36 +0900
Subject: [PATCH 399/553] add sqrMont

---
 misc/low_test.cpp | 15 ++++++++++-----
 src/fp.cpp        | 40 ++++++++++++++++++++++++++--------------
 src/low_funct.hpp | 18 +++++++++++++++++-
 3 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index 547e27c5..91af412a 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -153,7 +153,7 @@ struct Montgomery {
 };
 
 template<size_t N>
-void montTest(const char *pStr)
+void mulMontTest(const char *pStr)
 {
 	mcl::Vint vp;
 	vp.setStr(pStr);
@@ -178,10 +178,15 @@ void montTest(const char *pStr)
 		vx.setArray(x, N);
 		vy.setArray(y, N);
 		mont.mul(vz, vx, vy);
-		mcl::montT<N>(z, x, y, p);
+		mcl::mulMontT<N>(z, x, y, p);
+		CYBOZU_TEST_EQUAL_ARRAY(z, vz.getUnit(), N);
+
+		mont.mul(vz, vx, vx);
+		mcl::sqrMontT<N>(z, x, p);
 		CYBOZU_TEST_EQUAL_ARRAY(z, vz.getUnit(), N);
 	}
-	CYBOZU_BENCH_C("montT", 10000, mcl::montT<N>, z, x, y, p);
+	CYBOZU_BENCH_C("mulMontT", 10000, mcl::mulMontT<N>, x, x, y, p);
+	CYBOZU_BENCH_C("sqrMontT", 10000, mcl::sqrMontT<N>, x, x, p);
 }
 
 template<size_t N>
@@ -216,12 +221,12 @@ CYBOZU_TEST_AUTO(mont)
 {
 	const char *pBN254 = "0x2523648240000001ba344d80000000086121000000000013a700000000000013";
 	puts("BN254");
-	montTest<8>(pBN254);
+	mulMontTest<8>(pBN254);
 	modTest<8>(pBN254);
 
 	const char *pBLS12_381 = "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab";
 	puts("BLS12");
-	montTest<12>(pBLS12_381);
+	mulMontTest<12>(pBLS12_381);
 	modTest<12>(pBLS12_381);
 }
 
diff --git a/src/fp.cpp b/src/fp.cpp
index 3a70db55..484ad432 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -272,20 +272,6 @@ void setOp2(Op& op)
 	} else {
 		op.fp_add = Add<N, false, Tag>::f;
 		op.fp_sub = Sub<N, false, Tag>::f;
-#ifdef FOR_WASM
-		switch (N) {
-		case 8:
-			op.fp_add = mcl::addModT<8>;
-			op.fp_sub = mcl::subModT<8>;
-			break;
-		case 12:
-			op.fp_add = mcl::addModT<12>;
-			op.fp_sub = mcl::subModT<12>;
-			break;
-		default:
-			break;
-		}
-#endif
 	}
 	if (op.isMont) {
 		if (op.isFullBit) {
@@ -425,6 +411,25 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 	return true;
 }
 
+#ifdef FOR_WASM
+template<size_t N>
+void setWasmOp(Op& op)
+{
+	if (!(op.isMont && !op.isFullBit)) return;
+EM_ASM({console.log($0)}, N);
+//	op.fp_addPre = mcl::addT<N>;
+//	op.fp_subPre = mcl::subT<N>;
+//	op.fpDbl_addPre = mcl::addT<N * 2>;
+//	op.fpDbl_subPre = mcl::subT<N * 2>;
+	op.fp_add = mcl::addModT<N>;
+	op.fp_sub = mcl::subModT<N>;
+	op.fp_mul = mcl::mulMontT<N>;
+	op.fp_sqr = mcl::sqrMontT<N>;
+	op.fpDbl_mulPre = mulT<N>;
+	op.fpDbl_mod = modT<N>;
+}
+#endif
+
 bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size_t mclMaxBitSize)
 {
 	if (mclMaxBitSize != MCL_MAX_BIT_SIZE) return false;
@@ -565,6 +570,13 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 	default:
 		return false;
 	}
+#ifdef FOR_WASM
+	if (N == 8) {
+		setWasmOp<8>(*this);
+	} else if (N == 12) {
+		setWasmOp<12>(*this);
+	}
+#endif
 #ifdef MCL_USE_LLVM
 	if (primeMode == PM_NIST_P192) {
 		fp_mul = &mcl_fp_mulNIST_P192L;
diff --git a/src/low_funct.hpp b/src/low_funct.hpp
index 104d6fc0..082c2df5 100644
--- a/src/low_funct.hpp
+++ b/src/low_funct.hpp
@@ -264,7 +264,7 @@ void subModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint
 	@remark : assume p[-1] = rp
 */
 template<size_t N>
-void montT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint32_t p[N])
+void mulMontT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint32_t p[N])
 {
 	const uint32_t rp = p[-1];
 	assert((p[N - 1] & 0x80000000) == 0);
@@ -322,5 +322,21 @@ void modT(uint32_t y[N], const uint32_t xy[N * 2], const uint32_t p[N])
 	}
 }
 
+/*
+	z[N] = Montgomery(x[N], y[N], p[N])
+	@remark : assume p[-1] = rp
+*/
+template<size_t N>
+void sqrMontT(uint32_t y[N], const uint32_t x[N], const uint32_t p[N])
+{
+#if 1
+	mulMontT<N>(y, x, x, p);
+#else
+	uint32_t xx[N * 2];
+	sqrT<N>(xx, x);
+	modT<N>(y, xx, p);
+#endif
+}
+
 } // mcl
 

From 215106e9e6365fbc3ccd98ea89fc583510843885 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 5 Feb 2021 17:00:47 +0900
Subject: [PATCH 400/553] replace bool to unit32_t

---
 misc/low_test.cpp | 14 ++++++++------
 src/low_funct.hpp | 40 ++++++++++++++++++++--------------------
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index 91af412a..06418f9a 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -21,6 +21,8 @@ void dump(const char *msg, const uint32_t *x, size_t n)
 #include <cybozu/benchmark.hpp>
 #include <mcl/util.hpp>
 
+const int C = 10000;
+
 template<class RG>
 void setRand(uint32_t *x, size_t n, RG& rg)
 {
@@ -61,8 +63,8 @@ void mulTest()
 		mcl::karatsubaT<N>(z, x, y);
 		CYBOZU_TEST_EQUAL_ARRAY(z, vx.getUnit(), N * 2);
 	}
-	CYBOZU_BENCH_C("mulT", 10000, mcl::mulT<N>, z, x, y);
-	CYBOZU_BENCH_C("kara", 10000, mcl::karatsubaT<N>, z, x, y);
+	CYBOZU_BENCH_C("mulT", C, mcl::mulT<N>, z, x, y);
+	CYBOZU_BENCH_C("kara", C, mcl::karatsubaT<N>, z, x, y);
 }
 
 CYBOZU_TEST_AUTO(mulT)
@@ -88,7 +90,7 @@ void sqrTest()
 		mcl::sqrT<N>(y, x);
 		CYBOZU_TEST_EQUAL_ARRAY(y, vx.getUnit(), N * 2);
 	}
-	CYBOZU_BENCH_C("sqrT", 10000, mcl::sqrT<N>, y, x);
+	CYBOZU_BENCH_C("sqrT", C, mcl::sqrT<N>, y, x);
 }
 
 CYBOZU_TEST_AUTO(sqrT)
@@ -185,8 +187,8 @@ void mulMontTest(const char *pStr)
 		mcl::sqrMontT<N>(z, x, p);
 		CYBOZU_TEST_EQUAL_ARRAY(z, vz.getUnit(), N);
 	}
-	CYBOZU_BENCH_C("mulMontT", 10000, mcl::mulMontT<N>, x, x, y, p);
-	CYBOZU_BENCH_C("sqrMontT", 10000, mcl::sqrMontT<N>, x, x, p);
+	CYBOZU_BENCH_C("mulMontT", C, mcl::mulMontT<N>, x, x, y, p);
+	CYBOZU_BENCH_C("sqrMontT", C, mcl::sqrMontT<N>, x, x, p);
 }
 
 template<size_t N>
@@ -214,7 +216,7 @@ void modTest(const char *pStr)
 		mcl::modT<N>(z, xy, p);
 		CYBOZU_TEST_EQUAL_ARRAY(z, vz.getUnit(), N);
 	}
-	CYBOZU_BENCH_C("modT", 10000, mcl::modT<N>, z, xy, p);
+	CYBOZU_BENCH_C("modT", C, mcl::modT<N>, z, xy, p);
 }
 
 CYBOZU_TEST_AUTO(mont)
diff --git a/src/low_funct.hpp b/src/low_funct.hpp
index 082c2df5..885b16ad 100644
--- a/src/low_funct.hpp
+++ b/src/low_funct.hpp
@@ -41,40 +41,40 @@ uint32_t shlT(uint32_t y[N], const uint32_t x[N], size_t bit)
 
 // [return:y[N]] += x
 template<size_t N>
-inline bool addUnitT(uint32_t y[N], uint32_t x)
+inline uint32_t addUnitT(uint32_t y[N], uint32_t x)
 {
 	uint64_t v = uint64_t(y[0]) + x;
 	y[0] = uint32_t(v);
-	bool c = (v >> 32) != 0;
-	if (!c) return false;
+	uint32_t c = v >> 32;
+	if (c == 0) return 0;
 	for (size_t i = 1; i < N; i++) {
 		v = uint64_t(y[i]) + 1;
 		y[i] = uint32_t(v);
-		if ((v >> 32) == 0) return false;
+		if ((v >> 32) == 0) return 0;
 	}
-	return true;
+	return 1;
 }
 
 template<size_t N>
-bool addT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
+uint32_t addT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
 {
-	bool c = false;
+	uint32_t c = 0;
 	for (size_t i = 0; i < N; i++) {
 		uint64_t v = uint64_t(x[i]) + y[i] + c;
 		z[i] = uint32_t(v);
-		c = (v >> 32) != 0;
+		c = uint32_t(v >> 32);
 	}
 	return c;
 }
 
 template<size_t N>
-bool subT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
+uint32_t subT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
 {
-	bool c = false;
+	uint32_t c = 0;
 	for (size_t i = 0; i < N; i++) {
 		uint64_t v = uint64_t(x[i]) - y[i] - c;
 		z[i] = uint32_t(v);
-		c = (v >> 32) != 0;
+		c = uint32_t(v >> 63);
 	}
 	return c;
 }
@@ -187,8 +187,8 @@ void karatsubaT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
 	const size_t H = N / 2;
 	uint32_t a_b[H];
 	uint32_t c_d[H];
-	bool c1 = addT<H>(a_b, x, x + H); // a + b
-	bool c2 = addT<H>(c_d, y, y + H); // c + d
+	uint32_t c1 = addT<H>(a_b, x, x + H); // a + b
+	uint32_t c2 = addT<H>(c_d, y, y + H); // c + d
 	uint32_t tmp[N];
 	mulT<H>(tmp, a_b, c_d);
 	if (c1) {
@@ -220,11 +220,10 @@ void sqrT(uint32_t y[N * 2], const uint32_t x[N])
 	assert((x[N - 1] & 0x80000000) == 0);
 	const size_t H = N / 2;
 	uint32_t a_b[H];
-	bool c = addT<H>(a_b, x, x + H); // a + b
+	uint32_t c = addT<H>(a_b, x, x + H); // a + b
 	uint32_t tmp[N];
 	mulT<H>(tmp, a_b, a_b);
 	if (c) {
-//		addT<H>(a_b, a_b, a_b);
 		shlT<H>(a_b, a_b, 1);
 		addT<H>(tmp + H, tmp + H, a_b);
 	}
@@ -244,7 +243,7 @@ void addModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint
 {
 	uint32_t t[N];
 	addT<N>(z, x, y);
-	bool c = subT<N>(t, z, p);
+	uint32_t c = subT<N>(t, z, p);
 	if (!c) {
 		copyT<N>(z, t);
 	}
@@ -253,7 +252,7 @@ void addModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint
 template<size_t N>
 void subModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint32_t p[N])
 {
-	bool c = subT<N>(z, x, y);
+	uint32_t c = subT<N>(z, x, y);
 	if (c) {
 		addT<N>(z, z, p);
 	}
@@ -284,7 +283,7 @@ void mulMontT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uin
 
 // [return:z[N+1]] = z[N+1] + x[N] * y + (cc << (N * 32))
 template<size_t N>
-bool addMulUnit2T(uint32_t z[N + 1], const uint32_t x[N], uint32_t y, const bool *cc = 0)
+uint32_t addMulUnit2T(uint32_t z[N + 1], const uint32_t x[N], uint32_t y, const uint32_t *cc = 0)
 {
 	uint32_t H = 0;
 	for (size_t i = 0; i < N; i++) {
@@ -298,7 +297,7 @@ bool addMulUnit2T(uint32_t z[N + 1], const uint32_t x[N], uint32_t y, const bool
 	uint64_t v = uint64_t(z[N]);
 	v += H;
 	z[N] = uint32_t(v);
-	return (v >> 32) != 0;
+	return uint32_t(v >> 32);
 }
 
 /*
@@ -312,7 +311,7 @@ void modT(uint32_t y[N], const uint32_t xy[N * 2], const uint32_t p[N])
 	assert((p[N - 1] & 0x80000000) == 0);
 	uint32_t buf[N * 2];
 	copyT<N * 2>(buf, xy);
-	bool c = 0;
+	uint32_t c = 0;
 	for (size_t i = 0; i < N; i++) {
 		uint32_t q = buf[i] * rp;
 		c = addMulUnit2T<N>(buf + i, p, q, &c);
@@ -332,6 +331,7 @@ void sqrMontT(uint32_t y[N], const uint32_t x[N], const uint32_t p[N])
 #if 1
 	mulMontT<N>(y, x, x, p);
 #else
+	// slower
 	uint32_t xx[N * 2];
 	sqrT<N>(xx, x);
 	modT<N>(y, xx, p);

From 769ca108f9ee6640621a28af65eab87e705cdef1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 5 Feb 2021 17:07:39 +0900
Subject: [PATCH 401/553] disable log

---
 src/fp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index 484ad432..5d03bf1e 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -416,7 +416,7 @@ template<size_t N>
 void setWasmOp(Op& op)
 {
 	if (!(op.isMont && !op.isFullBit)) return;
-EM_ASM({console.log($0)}, N);
+//EM_ASM({console.log($0)}, N);
 //	op.fp_addPre = mcl::addT<N>;
 //	op.fp_subPre = mcl::subT<N>;
 //	op.fpDbl_addPre = mcl::addT<N * 2>;

From c0be1e91cec8077335a41ca17237958eb4c05a0f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 7 Feb 2021 12:12:50 +0900
Subject: [PATCH 402/553] rename low_funct.hpp to low_func_wasm.hpp

---
 misc/Makefile                            |  2 +-
 misc/low_test.cpp                        |  2 +-
 src/fp.cpp                               | 10 +++++-----
 src/{low_funct.hpp => low_func_wasm.hpp} |  0
 4 files changed, 7 insertions(+), 7 deletions(-)
 rename src/{low_funct.hpp => low_func_wasm.hpp} (100%)

diff --git a/misc/Makefile b/misc/Makefile
index 25a7c272..c9dec203 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -2,5 +2,5 @@ all: low_test
 
 CFLAGS=-I ../include/ -m32 -Ofast -Wall -Wextra -DNDEBUG
 
-low_test: low_test.cpp ../src/low_funct.hpp
+low_test: low_test.cpp ../src/low_func_wasm.hpp
 	$(CXX) -o low_test low_test.cpp $(CFLAGS)
diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index 91af412a..6d3e9b4e 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -9,7 +9,7 @@ void dump(const char *msg, const uint32_t *x, size_t n)
 	}
 	printf("\n");
 }
-#include "../src/low_funct.hpp"
+#include "../src/low_func_wasm.hpp"
 
 #define MCL_USE_VINT
 #define MCL_VINT_FIXED_BUFFER
diff --git a/src/fp.cpp b/src/fp.cpp
index 484ad432..cd3266ed 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -4,8 +4,8 @@
 #include <cybozu/endian.hpp>
 #include <mcl/conversion.hpp>
 #if defined(__EMSCRIPTEN__) && MCL_SIZEOF_UNIT == 4
-#define FOR_WASM
-#include "low_funct.hpp"
+#define USE_WASM
+#include "low_func_wasm.hpp"
 #endif
 
 #if defined(MCL_STATIC_CODE) || defined(MCL_USE_XBYAK) || (defined(MCL_USE_LLVM) && (CYBOZU_HOST == CYBOZU_HOST_INTEL))
@@ -411,12 +411,12 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 	return true;
 }
 
-#ifdef FOR_WASM
+#ifdef USE_WASM
 template<size_t N>
 void setWasmOp(Op& op)
 {
 	if (!(op.isMont && !op.isFullBit)) return;
-EM_ASM({console.log($0)}, N);
+//EM_ASM({console.log($0)}, N);
 //	op.fp_addPre = mcl::addT<N>;
 //	op.fp_subPre = mcl::subT<N>;
 //	op.fpDbl_addPre = mcl::addT<N * 2>;
@@ -570,7 +570,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 	default:
 		return false;
 	}
-#ifdef FOR_WASM
+#ifdef USE_WASM
 	if (N == 8) {
 		setWasmOp<8>(*this);
 	} else if (N == 12) {
diff --git a/src/low_funct.hpp b/src/low_func_wasm.hpp
similarity index 100%
rename from src/low_funct.hpp
rename to src/low_func_wasm.hpp

From 54155986b4b64856739ea544add15701d9f20855 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 7 Feb 2021 12:13:08 +0900
Subject: [PATCH 403/553] replace bool to uint32_t

---
 misc/low_test.cpp     | 14 ++++++++------
 src/low_func_wasm.hpp | 40 ++++++++++++++++++++--------------------
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/misc/low_test.cpp b/misc/low_test.cpp
index 6d3e9b4e..735a7d6f 100644
--- a/misc/low_test.cpp
+++ b/misc/low_test.cpp
@@ -21,6 +21,8 @@ void dump(const char *msg, const uint32_t *x, size_t n)
 #include <cybozu/benchmark.hpp>
 #include <mcl/util.hpp>
 
+const int C = 10000;
+
 template<class RG>
 void setRand(uint32_t *x, size_t n, RG& rg)
 {
@@ -61,8 +63,8 @@ void mulTest()
 		mcl::karatsubaT<N>(z, x, y);
 		CYBOZU_TEST_EQUAL_ARRAY(z, vx.getUnit(), N * 2);
 	}
-	CYBOZU_BENCH_C("mulT", 10000, mcl::mulT<N>, z, x, y);
-	CYBOZU_BENCH_C("kara", 10000, mcl::karatsubaT<N>, z, x, y);
+	CYBOZU_BENCH_C("mulT", C, mcl::mulT<N>, z, x, y);
+	CYBOZU_BENCH_C("kara", C, mcl::karatsubaT<N>, z, x, y);
 }
 
 CYBOZU_TEST_AUTO(mulT)
@@ -88,7 +90,7 @@ void sqrTest()
 		mcl::sqrT<N>(y, x);
 		CYBOZU_TEST_EQUAL_ARRAY(y, vx.getUnit(), N * 2);
 	}
-	CYBOZU_BENCH_C("sqrT", 10000, mcl::sqrT<N>, y, x);
+	CYBOZU_BENCH_C("sqrT", C, mcl::sqrT<N>, y, x);
 }
 
 CYBOZU_TEST_AUTO(sqrT)
@@ -185,8 +187,8 @@ void mulMontTest(const char *pStr)
 		mcl::sqrMontT<N>(z, x, p);
 		CYBOZU_TEST_EQUAL_ARRAY(z, vz.getUnit(), N);
 	}
-	CYBOZU_BENCH_C("mulMontT", 10000, mcl::mulMontT<N>, x, x, y, p);
-	CYBOZU_BENCH_C("sqrMontT", 10000, mcl::sqrMontT<N>, x, x, p);
+	CYBOZU_BENCH_C("mulMontT", C, mcl::mulMontT<N>, x, x, y, p);
+	CYBOZU_BENCH_C("sqrMontT", C, mcl::sqrMontT<N>, x, x, p);
 }
 
 template<size_t N>
@@ -214,7 +216,7 @@ void modTest(const char *pStr)
 		mcl::modT<N>(z, xy, p);
 		CYBOZU_TEST_EQUAL_ARRAY(z, vz.getUnit(), N);
 	}
-	CYBOZU_BENCH_C("modT", 10000, mcl::modT<N>, z, xy, p);
+	CYBOZU_BENCH_C("modT", C, mcl::modT<N>, z, xy, p);
 }
 
 CYBOZU_TEST_AUTO(mont)
diff --git a/src/low_func_wasm.hpp b/src/low_func_wasm.hpp
index 082c2df5..885b16ad 100644
--- a/src/low_func_wasm.hpp
+++ b/src/low_func_wasm.hpp
@@ -41,40 +41,40 @@ uint32_t shlT(uint32_t y[N], const uint32_t x[N], size_t bit)
 
 // [return:y[N]] += x
 template<size_t N>
-inline bool addUnitT(uint32_t y[N], uint32_t x)
+inline uint32_t addUnitT(uint32_t y[N], uint32_t x)
 {
 	uint64_t v = uint64_t(y[0]) + x;
 	y[0] = uint32_t(v);
-	bool c = (v >> 32) != 0;
-	if (!c) return false;
+	uint32_t c = v >> 32;
+	if (c == 0) return 0;
 	for (size_t i = 1; i < N; i++) {
 		v = uint64_t(y[i]) + 1;
 		y[i] = uint32_t(v);
-		if ((v >> 32) == 0) return false;
+		if ((v >> 32) == 0) return 0;
 	}
-	return true;
+	return 1;
 }
 
 template<size_t N>
-bool addT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
+uint32_t addT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
 {
-	bool c = false;
+	uint32_t c = 0;
 	for (size_t i = 0; i < N; i++) {
 		uint64_t v = uint64_t(x[i]) + y[i] + c;
 		z[i] = uint32_t(v);
-		c = (v >> 32) != 0;
+		c = uint32_t(v >> 32);
 	}
 	return c;
 }
 
 template<size_t N>
-bool subT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
+uint32_t subT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N])
 {
-	bool c = false;
+	uint32_t c = 0;
 	for (size_t i = 0; i < N; i++) {
 		uint64_t v = uint64_t(x[i]) - y[i] - c;
 		z[i] = uint32_t(v);
-		c = (v >> 32) != 0;
+		c = uint32_t(v >> 63);
 	}
 	return c;
 }
@@ -187,8 +187,8 @@ void karatsubaT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
 	const size_t H = N / 2;
 	uint32_t a_b[H];
 	uint32_t c_d[H];
-	bool c1 = addT<H>(a_b, x, x + H); // a + b
-	bool c2 = addT<H>(c_d, y, y + H); // c + d
+	uint32_t c1 = addT<H>(a_b, x, x + H); // a + b
+	uint32_t c2 = addT<H>(c_d, y, y + H); // c + d
 	uint32_t tmp[N];
 	mulT<H>(tmp, a_b, c_d);
 	if (c1) {
@@ -220,11 +220,10 @@ void sqrT(uint32_t y[N * 2], const uint32_t x[N])
 	assert((x[N - 1] & 0x80000000) == 0);
 	const size_t H = N / 2;
 	uint32_t a_b[H];
-	bool c = addT<H>(a_b, x, x + H); // a + b
+	uint32_t c = addT<H>(a_b, x, x + H); // a + b
 	uint32_t tmp[N];
 	mulT<H>(tmp, a_b, a_b);
 	if (c) {
-//		addT<H>(a_b, a_b, a_b);
 		shlT<H>(a_b, a_b, 1);
 		addT<H>(tmp + H, tmp + H, a_b);
 	}
@@ -244,7 +243,7 @@ void addModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint
 {
 	uint32_t t[N];
 	addT<N>(z, x, y);
-	bool c = subT<N>(t, z, p);
+	uint32_t c = subT<N>(t, z, p);
 	if (!c) {
 		copyT<N>(z, t);
 	}
@@ -253,7 +252,7 @@ void addModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint
 template<size_t N>
 void subModT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uint32_t p[N])
 {
-	bool c = subT<N>(z, x, y);
+	uint32_t c = subT<N>(z, x, y);
 	if (c) {
 		addT<N>(z, z, p);
 	}
@@ -284,7 +283,7 @@ void mulMontT(uint32_t z[N], const uint32_t x[N], const uint32_t y[N], const uin
 
 // [return:z[N+1]] = z[N+1] + x[N] * y + (cc << (N * 32))
 template<size_t N>
-bool addMulUnit2T(uint32_t z[N + 1], const uint32_t x[N], uint32_t y, const bool *cc = 0)
+uint32_t addMulUnit2T(uint32_t z[N + 1], const uint32_t x[N], uint32_t y, const uint32_t *cc = 0)
 {
 	uint32_t H = 0;
 	for (size_t i = 0; i < N; i++) {
@@ -298,7 +297,7 @@ bool addMulUnit2T(uint32_t z[N + 1], const uint32_t x[N], uint32_t y, const bool
 	uint64_t v = uint64_t(z[N]);
 	v += H;
 	z[N] = uint32_t(v);
-	return (v >> 32) != 0;
+	return uint32_t(v >> 32);
 }
 
 /*
@@ -312,7 +311,7 @@ void modT(uint32_t y[N], const uint32_t xy[N * 2], const uint32_t p[N])
 	assert((p[N - 1] & 0x80000000) == 0);
 	uint32_t buf[N * 2];
 	copyT<N * 2>(buf, xy);
-	bool c = 0;
+	uint32_t c = 0;
 	for (size_t i = 0; i < N; i++) {
 		uint32_t q = buf[i] * rp;
 		c = addMulUnit2T<N>(buf + i, p, q, &c);
@@ -332,6 +331,7 @@ void sqrMontT(uint32_t y[N], const uint32_t x[N], const uint32_t p[N])
 #if 1
 	mulMontT<N>(y, x, x, p);
 #else
+	// slower
 	uint32_t xx[N * 2];
 	sqrT<N>(xx, x);
 	modT<N>(y, xx, p);

From c0d65655eaa853c7f43955d91d81b7d1d0ada182 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 7 Feb 2021 12:56:34 +0900
Subject: [PATCH 404/553] remove unused code

---
 src/low_func_wasm.hpp | 50 -------------------------------------------
 1 file changed, 50 deletions(-)

diff --git a/src/low_func_wasm.hpp b/src/low_func_wasm.hpp
index 885b16ad..352d4469 100644
--- a/src/low_func_wasm.hpp
+++ b/src/low_func_wasm.hpp
@@ -118,56 +118,6 @@ void mulT(uint32_t z[N * 2], const uint32_t x[N], const uint32_t y[N])
 	}
 }
 
-#if 0
-// slower than mulT
-template<size_t N>
-uint32_t mulUnitWithTblT(uint32_t z[N], const uint64_t *tbl_j)
-{
-	uint32_t H = 0;
-	for (size_t i = 0; i < N; i++) {
-		uint64_t v = tbl_j[i];
-		v += H;
-		z[i] = uint32_t(v);
-		H = uint32_t(v >> 32);
-	}
-	return H;
-}
-
-template<size_t N>
-uint32_t addMulUnitWithTblT(uint32_t z[N], const uint64_t *tbl_j)
-{
-	uint32_t H = 0;
-	for (size_t i = 0; i < N; i++) {
-		uint64_t v = tbl_j[i];
-		v += H;
-		v += z[i];
-		z[i] = uint32_t(v);
-		H = uint32_t(v >> 32);
-	}
-	return H;
-}
-
-// y[N * 2] = x[N] * x[N]
-template<size_t N>
-void sqrT(uint32_t y[N * 2], const uint32_t x[N])
-{
-	uint64_t tbl[N * N]; // x[i]x[j]
-	for (size_t i = 0; i < N; i++) {
-		uint64_t xi = x[i];
-		tbl[i * N + i] = xi * xi;
-		for (size_t j = i + 1; j < N; j++) {
-			uint64_t v = xi * x[j];
-			tbl[i * N + j] = v;
-			tbl[j * N + i] = v;
-		}
-	}
-	y[N] = mulUnitWithTblT<N>(y, tbl);
-	for (size_t i = 1; i < N; i++) {
-		y[N + i] = addMulUnitWithTblT<N>(&y[i], tbl + N * i);
-	}
-}
-#endif
-
 /*
 	z[N * 2] = x[N] * y[N]
 	H = N/2

From 2378fd27434810e2aadac4bec704959e1d2e4ce0 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 7 Feb 2021 14:56:24 +0900
Subject: [PATCH 405/553] a little optimization of portable mulUnit

---
 include/mcl/vint.hpp | 40 ++++++++++++----------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/include/mcl/vint.hpp b/include/mcl/vint.hpp
index 13c4483a..3bed5278 100644
--- a/include/mcl/vint.hpp
+++ b/include/mcl/vint.hpp
@@ -90,35 +90,19 @@ inline uint32_t mulUnit(uint32_t *pH, uint32_t x, uint32_t y)
 inline uint64_t mulUnit(uint64_t *pH, uint64_t x, uint64_t y)
 {
 #ifdef MCL_VINT_64BIT_PORTABLE
-	uint32_t a = uint32_t(x >> 32);
-	uint32_t b = uint32_t(x);
-	uint32_t c = uint32_t(y >> 32);
-	uint32_t d = uint32_t(y);
-
-	uint64_t ad = uint64_t(d) * a;
-	uint64_t bd = uint64_t(d) * b;
-	uint64_t L = uint32_t(bd);
-	ad += bd >> 32; // [ad:L]
-
-	uint64_t ac = uint64_t(c) * a;
-	uint64_t bc = uint64_t(c) * b;
-	uint64_t H = uint32_t(bc);
-	ac += bc >> 32; // [ac:H]
-	/*
-		  adL
-		 acH
-	*/
-	uint64_t t = (ac << 32) | H;
-	ac >>= 32;
-	H = t + ad;
-	if (H < t) {
-		ac++;
-	}
-	/*
-		ac:H:L
-	*/
+	const uint64_t mask = 0xffffffff;
+	uint64_t v = (x & mask) * (y & mask);
+	uint64_t L = uint32_t(v);
+	uint64_t H = v >> 32;
+	uint64_t ad = (x & mask) * uint32_t(y >> 32);
+	uint64_t bc = uint32_t(x >> 32) * (y & mask);
+	H += uint32_t(ad);
+	H += uint32_t(bc);
 	L |= H << 32;
-	H = (ac << 32) | uint32_t(H >> 32);
+	H >>= 32;
+	H += ad >> 32;
+	H += bc >> 32;
+	H += (x >> 32) * (y >> 32);
 	*pH = H;
 	return L;
 #elif defined(_WIN64) && !defined(__INTEL_COMPILER)

From 64be787592b97467312b70ccc03f259d9b192c96 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Feb 2021 14:20:18 +0900
Subject: [PATCH 406/553] reduce one wrapper of mulPre for wasm

---
 include/mcl/fp_tower.hpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 48020380..b39f7b31 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -113,24 +113,25 @@ class FpDblT : public fp::Serializable<FpDblT<Fp> > {
 	static void (*mod)(Fp& z, const FpDblT& xy);
 	static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
 	static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
+	static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y);
 	static void addC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
 	static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
 	static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
 	static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
 	static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
+	static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
 #else
 	static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
 	static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
 	static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
 	static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
 	static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
+	static void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
 #endif
-	static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
 	static void sqrPreC(FpDblT& xx, const Fp& x) { Fp::op_.fpDbl_sqrPre(xx.v_, x.v_); }
 	/*
 		mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
 	*/
-	static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y);
 	static void (*sqrPre)(FpDblT& xx, const Fp& x);
 	static void mulUnit(FpDblT& z, const FpDblT& x, Unit y)
 	{
@@ -151,12 +152,9 @@ class FpDblT : public fp::Serializable<FpDblT<Fp> > {
 		if (addPre == 0) addPre = addPreC;
 		subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre);
 		if (subPre == 0) subPre = subPreC;
+		mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPreA_);
+		if (mulPre == 0) mulPre = mulPreC;
 #endif
-		if (op.fpDbl_mulPreA_) {
-			mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPreA_);
-		} else {
-			mulPre = mulPreC;
-		}
 		if (op.fpDbl_sqrPreA_) {
 			sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPreA_);
 		} else {
@@ -173,8 +171,8 @@ template<class Fp> void (*FpDblT<Fp>::sub)(FpDblT&, const FpDblT&, const FpDblT&
 template<class Fp> void (*FpDblT<Fp>::mod)(Fp&, const FpDblT&);
 template<class Fp> void (*FpDblT<Fp>::addPre)(FpDblT&, const FpDblT&, const FpDblT&);
 template<class Fp> void (*FpDblT<Fp>::subPre)(FpDblT&, const FpDblT&, const FpDblT&);
-#endif
 template<class Fp> void (*FpDblT<Fp>::mulPre)(FpDblT&, const Fp&, const Fp&);
+#endif
 template<class Fp> void (*FpDblT<Fp>::sqrPre)(FpDblT&, const Fp&);
 
 template<class Fp> struct Fp12T;

From ebbb5cf6cc96b89ff76eb996d2a43d3e2d5a3668 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Feb 2021 14:47:48 +0900
Subject: [PATCH 407/553] remove fpDbl_sqrPreA_

---
 include/mcl/fp_tower.hpp | 7 +------
 include/mcl/op.hpp       | 2 --
 src/fp_generator.hpp     | 4 ++--
 src/fp_static_code.hpp   | 2 +-
 4 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index b39f7b31..65aedd77 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -128,7 +128,6 @@ class FpDblT : public fp::Serializable<FpDblT<Fp> > {
 	static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
 	static void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
 #endif
-	static void sqrPreC(FpDblT& xx, const Fp& x) { Fp::op_.fpDbl_sqrPre(xx.v_, x.v_); }
 	/*
 		mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
 	*/
@@ -155,11 +154,7 @@ class FpDblT : public fp::Serializable<FpDblT<Fp> > {
 		mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPreA_);
 		if (mulPre == 0) mulPre = mulPreC;
 #endif
-		if (op.fpDbl_sqrPreA_) {
-			sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPreA_);
-		} else {
-			sqrPre = sqrPreC;
-		}
+		sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPre);
 	}
 	void operator+=(const FpDblT& x) { add(*this, *this, x); }
 	void operator-=(const FpDblT& x) { sub(*this, *this, x); }
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 29ca9f85..b8c1dbee 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -222,7 +222,6 @@ struct Op {
 	void3u fpDbl_addA_;
 	void3u fpDbl_subA_;
 	void3u fpDbl_mulPreA_;
-	void2u fpDbl_sqrPreA_;
 	void2u fpDbl_modA_;
 	void3u fp2Dbl_mulPreA_;
 	void2u fp2Dbl_sqrPreA_;
@@ -309,7 +308,6 @@ struct Op {
 		fpDbl_addA_ = 0;
 		fpDbl_subA_ = 0;
 		fpDbl_mulPreA_ = 0;
-		fpDbl_sqrPreA_ = 0;
 		fpDbl_modA_ = 0;
 		fp2Dbl_mulPreA_ = 0;
 		fp2Dbl_sqrPreA_ = 0;
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 3227d931..5ab7f9c3 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -425,8 +425,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPreA_, getCurr());
 
 		align(16);
-		op.fpDbl_sqrPreA_ = gen_fpDbl_sqrPre();
-		setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPreA_, getCurr());
+		op.fpDbl_sqrPre = gen_fpDbl_sqrPre();
+		setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr());
 
 		align(16);
 		op.fp2_addA_ = gen_fp2_add();
diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
index 09d4d01d..65844105 100644
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@@ -66,7 +66,7 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fpDbl_addPre = mclx_FpDbl_addPre;
 		op.fpDbl_subPre = mclx_FpDbl_subPre;
 		op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre;
-		op.fpDbl_sqrPreA_ = mclx_FpDbl_sqrPre;
+		op.fpDbl_sqrPre = mclx_FpDbl_sqrPre;
 		op.fp2_addA_ = mclx_Fp2_add;
 		op.fp2_subA_ = mclx_Fp2_sub;
 		op.fp2_negA_ = mclx_Fp2_neg;

From 52a9f4d2135782a43fc2bf64a880ef97232e8f27 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Feb 2021 15:28:31 +0900
Subject: [PATCH 408/553] remove mulPreC

---
 include/mcl/fp_tower.hpp |  9 +++------
 include/mcl/op.hpp       |  2 --
 src/fp_generator.hpp     | 28 +++++++++++++---------------
 src/fp_static_code.hpp   |  2 +-
 4 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 65aedd77..730a044c 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -113,24 +113,22 @@ class FpDblT : public fp::Serializable<FpDblT<Fp> > {
 	static void (*mod)(Fp& z, const FpDblT& xy);
 	static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
 	static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
-	static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y);
 	static void addC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
 	static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
 	static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
 	static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
 	static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
-	static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
 #else
 	static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
 	static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
 	static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
 	static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
 	static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
-	static void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
 #endif
 	/*
 		mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
 	*/
+	static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y);
 	static void (*sqrPre)(FpDblT& xx, const Fp& x);
 	static void mulUnit(FpDblT& z, const FpDblT& x, Unit y)
 	{
@@ -151,9 +149,8 @@ class FpDblT : public fp::Serializable<FpDblT<Fp> > {
 		if (addPre == 0) addPre = addPreC;
 		subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre);
 		if (subPre == 0) subPre = subPreC;
-		mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPreA_);
-		if (mulPre == 0) mulPre = mulPreC;
 #endif
+		mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPre);
 		sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPre);
 	}
 	void operator+=(const FpDblT& x) { add(*this, *this, x); }
@@ -166,8 +163,8 @@ template<class Fp> void (*FpDblT<Fp>::sub)(FpDblT&, const FpDblT&, const FpDblT&
 template<class Fp> void (*FpDblT<Fp>::mod)(Fp&, const FpDblT&);
 template<class Fp> void (*FpDblT<Fp>::addPre)(FpDblT&, const FpDblT&, const FpDblT&);
 template<class Fp> void (*FpDblT<Fp>::subPre)(FpDblT&, const FpDblT&, const FpDblT&);
-template<class Fp> void (*FpDblT<Fp>::mulPre)(FpDblT&, const Fp&, const Fp&);
 #endif
+template<class Fp> void (*FpDblT<Fp>::mulPre)(FpDblT&, const Fp&, const Fp&);
 template<class Fp> void (*FpDblT<Fp>::sqrPre)(FpDblT&, const Fp&);
 
 template<class Fp> struct Fp12T;
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index b8c1dbee..e3d78d80 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -221,7 +221,6 @@ struct Op {
 	void2u fp2_sqrA_;
 	void3u fpDbl_addA_;
 	void3u fpDbl_subA_;
-	void3u fpDbl_mulPreA_;
 	void2u fpDbl_modA_;
 	void3u fp2Dbl_mulPreA_;
 	void2u fp2Dbl_sqrPreA_;
@@ -307,7 +306,6 @@ struct Op {
 		fp2_sqrA_ = 0;
 		fpDbl_addA_ = 0;
 		fpDbl_subA_ = 0;
-		fpDbl_mulPreA_ = 0;
 		fpDbl_modA_ = 0;
 		fp2Dbl_mulPreA_ = 0;
 		fp2Dbl_sqrPreA_ = 0;
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 5ab7f9c3..ef38b638 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -421,11 +421,11 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr());
 
 		align(16);
-		op.fpDbl_mulPreA_ = gen_fpDbl_mulPre();
-		setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPreA_, getCurr());
+		gen_fpDbl_mulPre(op.fpDbl_mulPre);
+		setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPre, getCurr());
 
 		align(16);
-		op.fpDbl_sqrPre = gen_fpDbl_sqrPre();
+		gen_fpDbl_sqrPre(op.fpDbl_sqrPre);
 		setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr());
 
 		align(16);
@@ -2373,36 +2373,35 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		cmovc_rr(zp, keep);
 		store_mr(z, zp);
 	}
-	void2u gen_fpDbl_sqrPre()
+	void gen_fpDbl_sqrPre(void2u& f)
 	{
 		void2u func = getCurr<void2u>();
 		if (pn_ == 2 && useMulx_) {
 			StackFrame sf(this, 2, 7 | UseRDX);
 			sqrPre2(sf.p[0], sf.p[1], sf.t);
-			return func;
+			f = func;
 		}
 		if (pn_ == 3) {
 			StackFrame sf(this, 3, 10 | UseRDX);
 			Pack t = sf.t;
 			t.append(sf.p[2]);
 			sqrPre3(sf.p[0], sf.p[1], t);
-			return func;
+			f = func;
 		}
 		if (pn_ == 4 && useMulx_) {
 			StackFrame sf(this, 3, 10 | UseRDX);
 			Pack t = sf.t;
 			t.append(sf.p[2]);
 			sqrPre4(sf.p[0], sf.p[1], t);
-			return func;
+			f = func;
 		}
 		if (pn_ == 6 && useMulx_ && useAdx_) {
 			StackFrame sf(this, 3, 10 | UseRDX, 6 * 8);
 			Pack t = sf.t;
 			t.append(sf.p[2]);
 			sqrPre6(sf.p[0], sf.p[1], t);
-			return func;
+			f = func;
 		}
-		return 0;
 #if 0
 #ifdef XBYAK64_WIN
 		mov(r8, rdx);
@@ -2413,18 +2412,18 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		return func;
 #endif
 	}
-	void3u gen_fpDbl_mulPre()
+	void gen_fpDbl_mulPre(void3u& f)
 	{
 		void3u func = getCurr<void3u>();
 		if (pn_ == 2 && useMulx_) {
 			StackFrame sf(this, 3, 5 | UseRDX);
 			mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t);
-			return func;
+			f = func;
 		}
 		if (pn_ == 3) {
 			StackFrame sf(this, 3, 10 | UseRDX);
 			mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t);
-			return func;
+			f = func;
 		}
 		if (pn_ == 4) {
 			/*
@@ -2437,7 +2436,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		L(mulPreL); // called only from asm code
 			mulPre4(gp0, gp1, gp2, sf.t);
 			ret();
-			return func;
+			f = func;
 		}
 		if (pn_ == 6 && useAdx_) {
 			StackFrame sf(this, 3, 10 | UseRDX, 0, false);
@@ -2446,9 +2445,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		L(mulPreL); // called only from asm code
 			mulPre6(sf.t);
 			ret();
-			return func;
+			f = func;
 		}
-		return 0;
 	}
 	static inline void debug_put_inner(const uint64_t *ptr, int n)
 	{
diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
index 65844105..7421f0ac 100644
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@@ -65,7 +65,7 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fpDbl_subA_ = mclx_FpDbl_sub;
 		op.fpDbl_addPre = mclx_FpDbl_addPre;
 		op.fpDbl_subPre = mclx_FpDbl_subPre;
-		op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre;
+		op.fpDbl_mulPre = mclx_FpDbl_mulPre;
 		op.fpDbl_sqrPre = mclx_FpDbl_sqrPre;
 		op.fp2_addA_ = mclx_Fp2_add;
 		op.fp2_subA_ = mclx_Fp2_sub;

From 6afa976ad2beb81a852bf39419235da440c0dc6c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Feb 2021 16:21:59 +0900
Subject: [PATCH 409/553] rename local template function

---
 include/mcl/fp_tower.hpp | 6 +++---
 src/fp.cpp               | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 730a044c..26d96e19 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -680,9 +680,9 @@ struct Fp2DblT {
 			mulPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&, const Fp2&)>(op.fp2Dbl_mulPreA_);
 		} else {
 			if (op.isFullBit) {
-				mulPre = fp2Dbl_mulPreW<true>;
+				mulPre = fp2Dbl_mulPreTW<true>;
 			} else {
-				mulPre = fp2Dbl_mulPreW<false>;
+				mulPre = fp2Dbl_mulPreTW<false>;
 			}
 		}
 		if (op.fp2Dbl_sqrPreA_) {
@@ -700,7 +700,7 @@ struct Fp2DblT {
 		@note mod of NIST_P192 is fast
 	*/
 	template<bool isFullBit>
-	static void fp2Dbl_mulPreW(Fp2DblT& z, const Fp2& x, const Fp2& y)
+	static void fp2Dbl_mulPreTW(Fp2DblT& z, const Fp2& x, const Fp2& y)
 	{
 		const Fp& a = x.a;
 		const Fp& b = x.b;
diff --git a/src/fp.cpp b/src/fp.cpp
index cd3266ed..05345807 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -426,6 +426,7 @@ void setWasmOp(Op& op)
 	op.fp_mul = mcl::mulMontT<N>;
 	op.fp_sqr = mcl::sqrMontT<N>;
 	op.fpDbl_mulPre = mulT<N>;
+//	op.fpDbl_sqrPre = sqrT<N>;
 	op.fpDbl_mod = modT<N>;
 }
 #endif

From 3be1f32788b9c13234fe6acc67a2b9fe807c7fc0 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Feb 2021 16:28:42 +0900
Subject: [PATCH 410/553] v1.32

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index e3d78d80..f39766d4 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x131; /* 0xABC = A.BC */
+static const int version = 0x132; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From b4abccd9a5581de28afbf3831b15e104b69c82b7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 9 Feb 2021 04:52:05 +0900
Subject: [PATCH 411/553] fix : disable bmi2 for old cpus

---
 src/fp.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fp.cpp b/src/fp.cpp
index 05345807..dd2a15b6 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -394,6 +394,7 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 	return true;
 #endif
 #elif defined(MCL_STATIC_CODE)
+	if (mode != FP_XBYAK) return true;
 	fp::setStaticCode(op);
 	bool enableInv = true;
 #endif // MCL_USE_XBYAK

From c08437c973004cf64895da197eb7076d44354aff Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 9 Feb 2021 04:52:42 +0900
Subject: [PATCH 412/553] v1.33

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index f39766d4..be7adb4e 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x132; /* 0xABC = A.BC */
+static const int version = 0x133; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From 9a3add8fe6646f64927a2f46800fffb13ffb05bc Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 9 Feb 2021 16:13:46 +0900
Subject: [PATCH 413/553] use adcx instead of adc

---
 src/fp_generator.hpp | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index ef38b638..aaaaa4a8 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1774,12 +1774,20 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(d, ptr [px]);
 		mulx(pd[0], a, ptr [py + 8 * 0]);
 		mov(ptr [pz + 8 * 0], a);
-		for (size_t i = 1; i < pd.size(); i++) {
-			mulx(pd[i], a, ptr [py + 8 * i]);
-			if (i == 1) {
-				add(pd[i - 1], a);
-			} else {
-				adc(pd[i - 1], a);
+		if (useAdx_) {
+			xor_(a, a);
+			for (size_t i = 1; i < pd.size(); i++) {
+				mulx(pd[i], a, ptr [py + 8 * i]);
+				adcx(pd[i - 1], a);
+			}
+		} else {
+			for (size_t i = 1; i < pd.size(); i++) {
+				mulx(pd[i], a, ptr [py + 8 * i]);
+				if (i == 1) {
+					add(pd[i - 1], a);
+				} else {
+					adc(pd[i - 1], a);
+				}
 			}
 		}
 		adc(pd[pd.size() - 1], 0);
@@ -3783,9 +3791,9 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		if (pn_ == 4) {
 			gen_raw_fp_sub((RegExp)d0 + pn_ * 8, (RegExp)d0 + pn_ * 8, (RegExp)d2 + pn_ * 8, Pack(gt0, gt1, gt2, gt3, gt4, gt5, gt6, gt7), true);
 		} else {
-			lea(gp0, ptr[d0]);
-			lea(gp2, ptr[d2]);
-			gen_raw_fp_sub6(gp0, gp0, gp2, pn_ * 8, sf.t.sub(0, 6), true);
+			lea(gp0, ptr[(RegExp)d0 + pn_ * 8]);
+			lea(gp2, ptr[(RegExp)d2 + pn_ * 8]);
+			gen_raw_fp_sub6(gp0, gp0, gp2, 0, sf.t.sub(0, 6), true);
 		}
 
 		mov(gp0, ptr [z]);

From 28c8a0b489df0b8468a42efbe6c6a3f30bc96b08 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Feb 2021 17:38:00 +0900
Subject: [PATCH 414/553] field extension supposes non-fullbit

---
 include/mcl/fp_tower.hpp | 1 +
 test/fp_tower_test.cpp   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 26d96e19..4c25e879 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -675,6 +675,7 @@ struct Fp2DblT {
 	void operator-=(const Fp2DblT& x) { sub(*this, *this, x); }
 	static void init()
  	{
+		assert(!Fp::getOp().isFullBit);
 		const mcl::fp::Op& op = Fp::getOp();
 		if (op.fp2Dbl_mulPreA_) {
 			mulPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&, const Fp2&)>(op.fp2Dbl_mulPreA_);
diff --git a/test/fp_tower_test.cpp b/test/fp_tower_test.cpp
index 45763762..b5e77db9 100644
--- a/test/fp_tower_test.cpp
+++ b/test/fp_tower_test.cpp
@@ -401,6 +401,7 @@ void test(const char *p, mcl::fp::Mode mode)
 	const int xi_a = 1;
 	Fp::init(xi_a, p, mode);
 	printf("mode=%s\n", mcl::fp::ModeToStr(mode));
+	if (Fp::getOp().isFullBit) return;
 	Fp2::init();
 	printf("bitSize=%d\n", (int)Fp::getBitSize());
 #if 0

From bcf5961f707b1cdd445970383e250936d798e444 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 11 Feb 2021 08:36:54 +0900
Subject: [PATCH 415/553] add mul2

---
 include/mcl/fp.hpp |  6 ++++++
 include/mcl/op.hpp |  4 ++++
 src/fp.cpp         | 24 ++++++++++++++++++++++++
 test/bench.hpp     |  2 ++
 test/fp_test.cpp   | 14 ++++++++++++++
 5 files changed, 50 insertions(+)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index b7075d47..d49b6bef 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -163,6 +163,8 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 		if (mul == 0) mul = mulC;
 		sqr = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_sqrA_);
 		if (sqr == 0) sqr = sqrC;
+		mul2 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul2A_);
+		if (mul2 == 0) mul2 = mul2C;
 #endif
 		*pb = true;
 	}
@@ -495,12 +497,15 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 	static inline void mulC(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); }
 	static void (*sqr)(FpT& y, const FpT& x);
 	static inline void sqrC(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
+	static void (*mul2)(FpT& y, const FpT& x);
+	static inline void mul2C(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
 #else
 	static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); }
 	static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); }
 	static inline void neg(FpT& y, const FpT& x) { op_.fp_neg(y.v_, x.v_, op_.p); }
 	static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); }
 	static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
+	static inline void mul2(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
 #endif
 	static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); }
 	static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); }
@@ -740,6 +745,7 @@ template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sub)(FpT& z,
 template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::neg)(FpT& y, const FpT& x);
 template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul)(FpT& z, const FpT& x, const FpT& y);
 template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sqr)(FpT& y, const FpT& x);
+template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul2)(FpT& y, const FpT& x);
 #endif
 
 } // mcl
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index be7adb4e..8753f762 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -214,6 +214,7 @@ struct Op {
 	void2u fp_negA_;
 	void3u fp_mulA_;
 	void2u fp_sqrA_;
+	void2u fp_mul2A_;
 	void3u fp2_addA_;
 	void3u fp2_subA_;
 	void2u fp2_negA_;
@@ -231,6 +232,7 @@ struct Op {
 	void1u fp_clear;
 	void2u fp_copy;
 	void2u fp_shr1;
+	void3u fp_mul2;
 	void3u fp_neg;
 	void4u fp_add;
 	void4u fp_sub;
@@ -299,6 +301,7 @@ struct Op {
 		fp_negA_ = 0;
 		fp_mulA_ = 0;
 		fp_sqrA_ = 0;
+		fp_mul2A_ = 0;
 		fp2_addA_ = 0;
 		fp2_subA_ = 0;
 		fp2_negA_ = 0;
@@ -316,6 +319,7 @@ struct Op {
 		fp_clear = 0;
 		fp_copy = 0;
 		fp_shr1 = 0;
+		fp_mul2 = 0;
 		fp_neg = 0;
 		fp_add = 0;
 		fp_sub = 0;
diff --git a/src/fp.cpp b/src/fp.cpp
index dd2a15b6..fbf0ab59 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -261,6 +261,28 @@ struct SetFpDbl<N, true> {
 	}
 };
 
+// assume !isFullBit
+template<size_t N>
+void Mul2(Unit *y, const Unit *x, const Unit *p)
+{
+	const size_t bit = 1;
+	const size_t rBit = sizeof(Unit) * 8 - bit;
+	Unit prev = x[N - 1];
+	for (size_t i = N - 1; i > 0; i--) {
+		Unit t = x[i - 1];
+		y[i] = (prev << bit) | (t >> rBit);
+		prev = t;
+	}
+	y[0] = prev << bit;
+	for (size_t i = 0; i < N; i++) {
+		Unit a = y[N - 1 - i];
+		Unit b = p[N - 1 - i];
+		if (a < b) return;
+		if (a > b) break;
+	}
+	SubPre<N, Gtag>::f(y, y, p);
+}
+
 template<size_t N, class Tag, bool enableFpDbl, bool gmpIsFasterThanLLVM>
 void setOp2(Op& op)
 {
@@ -269,9 +291,11 @@ void setOp2(Op& op)
 	if (op.isFullBit) {
 		op.fp_add = Add<N, true, Tag>::f;
 		op.fp_sub = Sub<N, true, Tag>::f;
+		op.fp_mul2 = 0; // not supported
 	} else {
 		op.fp_add = Add<N, false, Tag>::f;
 		op.fp_sub = Sub<N, false, Tag>::f;
+		op.fp_mul2 = Mul2<N>;
 	}
 	if (op.isMont) {
 		if (op.isFullBit) {
diff --git a/test/bench.hpp b/test/bench.hpp
index 09af59e2..094d0827 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -102,6 +102,8 @@ void testBench(const G1& P, const G2& Q)
 #endif
 	CYBOZU_BENCH_C("Fp::add       ", C3, Fp::add, x, x, y);
 	CYBOZU_BENCH_C("Fp::sub       ", C3, Fp::sub, x, x, y);
+	CYBOZU_BENCH_C("Fp::add 2     ", C3, Fp::add, x, x, x);
+	CYBOZU_BENCH_C("Fp::mul2      ", C3, Fp::mul2, x, x);
 	CYBOZU_BENCH_C("Fp::neg       ", C3, Fp::neg, x, x);
 	CYBOZU_BENCH_C("Fp::mul       ", C3, Fp::mul, x, x, y);
 	CYBOZU_BENCH_C("Fp::sqr       ", C3, Fp::sqr, x, x);
diff --git a/test/fp_test.cpp b/test/fp_test.cpp
index 70fef8a8..f2af6a23 100644
--- a/test/fp_test.cpp
+++ b/test/fp_test.cpp
@@ -919,6 +919,19 @@ CYBOZU_TEST_AUTO(mod_NIST_P521)
 }
 #endif
 
+void mul2Test()
+{
+	if (Fp::getOp().isFullBit) return;
+	const int x0 = 1234567;
+	Fp x = x0;
+	mpz_class mx = x0;
+	for (size_t i = 0; i < 100; i++) {
+		Fp::mul2(x, x);
+		mx = (mx * 2) % Fp::getOp().mp;
+		CYBOZU_TEST_EQUAL(mx, x.getMpz());
+	}
+}
+
 void sub(mcl::fp::Mode mode)
 {
 	printf("mode=%s\n", mcl::fp::ModeToStr(mode));
@@ -962,6 +975,7 @@ void sub(mcl::fp::Mode mode)
 		const char *pStr = tbl[i];
 		printf("prime=%s\n", pStr);
 		Fp::init(pStr, mode);
+		mul2Test();
 		cstrTest();
 		setStrTest();
 		streamTest();

From 40343a4a693025049328ef56630500e26ea37c31 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 11 Feb 2021 19:49:37 +0900
Subject: [PATCH 416/553] add mul2

---
 include/mcl/op.hpp     |  4 ++--
 src/fp_generator.hpp   | 31 +++++++++++++++++++++++++++++++
 src/fp_static_code.hpp |  4 ++++
 test/bench.hpp         |  2 ++
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 8753f762..ea5c379d 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -232,12 +232,12 @@ struct Op {
 	void1u fp_clear;
 	void2u fp_copy;
 	void2u fp_shr1;
-	void3u fp_mul2;
 	void3u fp_neg;
 	void4u fp_add;
 	void4u fp_sub;
 	void4u fp_mul;
 	void3u fp_sqr;
+	void3u fp_mul2;
 	void2uOp fp_invOp;
 	void2uIu fp_mulUnit; // fpN1_mod + fp_mulUnitPre
 
@@ -319,12 +319,12 @@ struct Op {
 		fp_clear = 0;
 		fp_copy = 0;
 		fp_shr1 = 0;
-		fp_mul2 = 0;
 		fp_neg = 0;
 		fp_add = 0;
 		fp_sub = 0;
 		fp_mul = 0;
 		fp_sqr = 0;
+		fp_mul2 = 0;
 		fp_invOp = 0;
 		fp_mulUnit = 0;
 
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index aaaaa4a8..684ecb39 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -379,6 +379,10 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		op.fp_shr1 = gen_shr1();
 		setFuncInfo(prof_, suf, "_shr1", op.fp_shr1, getCurr());
 
+		align(16);
+		op.fp_mul2A_ = gen_mul2();
+		setFuncInfo(prof_, suf, "_mul2", op.fp_mul2A_, getCurr());
+
 		align(16);
 		op.fp_negA_ = gen_fp_neg();
 		setFuncInfo(prof_, suf, "_neg", op.fp_negA_, getCurr());
@@ -915,6 +919,33 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(ptr [pz + (pn_ - 1) * 8], *t0);
 		return func;
 	}
+	void2u gen_mul2()
+	{
+		if (isFullBit_) return 0;
+		if (!(pn_ == 4 || pn_ == 6)) return 0;
+		void2u func = getCurr<void2u>();
+		const int n = pn_ * 2 - 2;
+		StackFrame sf(this, 2, n);
+		Pack x = sf.t.sub(0, pn_);
+		load_rm(x, sf.p[1]);
+#if 0
+		add_rr(x, x);
+#else
+		for (int i = pn_ - 1; i > 0; i--) {
+			shld(x[i], x[i - 1], 1);
+		}
+		shl(x[0], 1);
+#endif
+		Pack t = sf.t.sub(pn_, n - pn_);
+		t.append(sf.p[1]);
+		t.append(rax); // destroy last
+		mov_rr(t, x);
+		lea(rax, ptr[rip + pL_]);
+		sub_rm(t, rax);
+		cmovc_rr(t, x);
+		store_mr(sf.p[0], t);
+		return func;
+	}
 	void3u gen_mul()
 	{
 		void3u func = getCurr<void3u>();
diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
index 7421f0ac..705e46e4 100644
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@@ -23,6 +23,7 @@ void mclx_Fp_neg(Unit*, const Unit*);
 void mclx_FpDbl_mod(Unit*, const Unit*);
 void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
 void mclx_Fp_sqr(Unit*, const Unit*);
+void mclx_Fp_mul2(Unit*, const Unit*);
 void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
 int mclx_Fp_preInv(Unit*, const Unit*);
@@ -45,6 +46,7 @@ void mclx_Fr_shr1(Unit*, const Unit*);
 void mclx_Fr_neg(Unit*, const Unit*);
 void mclx_Fr_mul(Unit*, const Unit*, const Unit*);
 void mclx_Fr_sqr(Unit*, const Unit*);
+void mclx_Fr_mul2(Unit*, const Unit*);
 int mclx_Fr_preInv(Unit*, const Unit*);
 } // extern "C"
 
@@ -61,6 +63,7 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fpDbl_modA_ = mclx_FpDbl_mod;
 		op.fp_mulA_ = mclx_Fp_mul;
 		op.fp_sqrA_ = mclx_Fp_sqr;
+		op.fp_mul2A_ = mclx_Fp_mul2;
 		op.fpDbl_addA_ = mclx_FpDbl_add;
 		op.fpDbl_subA_ = mclx_FpDbl_sub;
 		op.fpDbl_addPre = mclx_FpDbl_addPre;
@@ -85,6 +88,7 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fp_negA_ = mclx_Fr_neg;
 		op.fp_mulA_ = mclx_Fr_mul;
 		op.fp_sqrA_ = mclx_Fr_sqr;
+		op.fp_mul2A_ = mclx_Fr_mul2;
 		op.fp_preInv = mclx_Fr_preInv;
 	}
 	op.fp_mul = fp::func_ptr_cast<void4u>(op.fp_mulA_);
diff --git a/test/bench.hpp b/test/bench.hpp
index 094d0827..7359181f 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -116,6 +116,8 @@ void testBench(const G1& P, const G2& Q)
 		CYBOZU_BENCH_C("Fr::add       ", C3, Fr::add, a, a, b);
 		CYBOZU_BENCH_C("Fr::sub       ", C3, Fr::sub, a, a, b);
 		CYBOZU_BENCH_C("Fr::neg       ", C3, Fr::neg, a, a);
+		CYBOZU_BENCH_C("Fr::add 2     ", C3, Fr::add, a, a, b);
+		CYBOZU_BENCH_C("Fr::mul2      ", C3, Fr::mul2, a, a);
 		CYBOZU_BENCH_C("Fr::mul       ", C3, Fr::mul, a, a, b);
 		CYBOZU_BENCH_C("Fr::sqr       ", C3, Fr::sqr, a, a);
 		CYBOZU_BENCH_C("Fr::inv       ", C3, invAdd, a, a, b);

From 630abea0c12468012782e726bbde345749da99db Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 11 Feb 2021 20:26:49 +0900
Subject: [PATCH 417/553] add Fp2::mul2

---
 include/mcl/fp_tower.hpp | 10 +++++++
 include/mcl/op.hpp       |  2 ++
 src/fp_generator.hpp     | 57 +++++++++++++++++++++++++++++-----------
 src/fp_static_code.hpp   |  2 ++
 test/bench.hpp           |  1 +
 test/common_test.hpp     | 24 +++++++++++++++++
 6 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 4c25e879..d85e636d 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -226,12 +226,14 @@ class Fp2T : public fp::Serializable<Fp2T<_Fp>,
 	static void (*neg)(Fp2T& y, const Fp2T& x);
 	static void (*mul)(Fp2T& z, const Fp2T& x, const Fp2T& y);
 	static void (*sqr)(Fp2T& y, const Fp2T& x);
+	static void (*mul2)(Fp2T& y, const Fp2T& x);
 #else
 	static void add(Fp2T& z, const Fp2T& x, const Fp2T& y) { addC(z, x, y); }
 	static void sub(Fp2T& z, const Fp2T& x, const Fp2T& y) { subC(z, x, y); }
 	static void neg(Fp2T& y, const Fp2T& x) { negC(y, x); }
 	static void mul(Fp2T& z, const Fp2T& x, const Fp2T& y) { mulC(z, x, y); }
 	static void sqr(Fp2T& y, const Fp2T& x) { sqrC(y, x); }
+	static void mul2(Fp2T& y, const Fp2T& x) { mul2C(y, x); }
 #endif
 	static void (*mul_xi)(Fp2T& y, const Fp2T& x);
 	static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); }
@@ -386,6 +388,8 @@ class Fp2T : public fp::Serializable<Fp2T<_Fp>,
 		if (mul == 0) mul = mulC;
 		sqr = fp::func_ptr_cast<void (*)(Fp2T& y, const Fp2T& x)>(op.fp2_sqrA_);
 		if (sqr == 0) sqr = sqrC;
+		mul2 = fp::func_ptr_cast<void (*)(Fp2T& y, const Fp2T& x)>(op.fp2_mul2A_);
+		if (mul2 == 0) mul2 = mul2C;
 		mul_xi = fp::func_ptr_cast<void (*)(Fp2T&, const Fp2T&)>(op.fp2_mul_xiA_);
 #endif
 		op.fp2_inv = fp2_invW;
@@ -483,6 +487,11 @@ class Fp2T : public fp::Serializable<Fp2T<_Fp>,
 		Fp::neg(y.a, x.a);
 		Fp::neg(y.b, x.b);
 	}
+	static void mul2C(Fp2T& y, const Fp2T& x)
+	{
+		Fp::mul2(y.a, x.a);
+		Fp::mul2(y.b, x.b);
+	}
 #if 0
 	/*
 		x = a + bi, y = c + di, i^2 = -1
@@ -607,6 +616,7 @@ template<class Fp_> void (*Fp2T<Fp_>::sub)(Fp2T& z, const Fp2T& x, const Fp2T& y
 template<class Fp_> void (*Fp2T<Fp_>::neg)(Fp2T& y, const Fp2T& x);
 template<class Fp_> void (*Fp2T<Fp_>::mul)(Fp2T& z, const Fp2T& x, const Fp2T& y);
 template<class Fp_> void (*Fp2T<Fp_>::sqr)(Fp2T& y, const Fp2T& x);
+template<class Fp_> void (*Fp2T<Fp_>::mul2)(Fp2T& y, const Fp2T& x);
 #endif
 template<class Fp_> void (*Fp2T<Fp_>::mul_xi)(Fp2T& y, const Fp2T& x);
 
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index ea5c379d..fc617982 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -220,6 +220,7 @@ struct Op {
 	void2u fp2_negA_;
 	void3u fp2_mulA_;
 	void2u fp2_sqrA_;
+	void2u fp2_mul2A_;
 	void3u fpDbl_addA_;
 	void3u fpDbl_subA_;
 	void2u fpDbl_modA_;
@@ -307,6 +308,7 @@ struct Op {
 		fp2_negA_ = 0;
 		fp2_mulA_ = 0;
 		fp2_sqrA_ = 0;
+		fp2_mul2A_ = 0;
 		fpDbl_addA_ = 0;
 		fpDbl_subA_ = 0;
 		fpDbl_modA_ = 0;
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 684ecb39..64e6ebe0 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -444,6 +444,10 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		op.fp2_negA_ = gen_fp2_neg();
 		setFuncInfo(prof_, suf, "2_neg", op.fp2_negA_, getCurr());
 
+		align(16);
+		op.fp2_mul2A_ = gen_fp2_mul2();
+		setFuncInfo(prof_, suf, "2_mul2", op.fp2_mul2A_, getCurr());
+
 		op.fp2_mulNF = 0;
 		align(16);
 		op.fp2Dbl_mulPreA_ = gen_fp2Dbl_mulPre();
@@ -919,33 +923,56 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(ptr [pz + (pn_ - 1) * 8], *t0);
 		return func;
 	}
+	// x = x << 1
+	void shl1(const Pack& x)
+	{
+		for (int i = x.size() - 1; i > 0; i--) {
+			shld(x[i], x[i - 1], 1);
+		}
+		shl(x[0], 1);
+	}
+	/*
+		y = (x >= p[]) x - p[] : x
+	*/
+	void sub_mod(const Pack& y, const Pack& x, const RegExp& p)
+	{
+		mov_rr(y, x);
+		sub_rm(y, p);
+		cmovc_rr(y, x);
+	}
 	void2u gen_mul2()
 	{
-		if (isFullBit_) return 0;
-		if (!(pn_ == 4 || pn_ == 6)) return 0;
+		if (isFullBit_ || pn_ > 6) return 0;
 		void2u func = getCurr<void2u>();
-		const int n = pn_ * 2 - 2;
+		const int n = pn_ * 2 - 1;
 		StackFrame sf(this, 2, n);
 		Pack x = sf.t.sub(0, pn_);
 		load_rm(x, sf.p[1]);
-#if 0
-		add_rr(x, x);
-#else
-		for (int i = pn_ - 1; i > 0; i--) {
-			shld(x[i], x[i - 1], 1);
-		}
-		shl(x[0], 1);
-#endif
+		shl1(x);
 		Pack t = sf.t.sub(pn_, n - pn_);
 		t.append(sf.p[1]);
-		t.append(rax); // destroy last
-		mov_rr(t, x);
 		lea(rax, ptr[rip + pL_]);
-		sub_rm(t, rax);
-		cmovc_rr(t, x);
+		sub_mod(t, x, rax);
 		store_mr(sf.p[0], t);
 		return func;
 	}
+	void2u gen_fp2_mul2()
+	{
+		if (isFullBit_ || pn_ > 6) return 0;
+		void2u func = getCurr<void2u>();
+		const int n = pn_ * 2;
+		StackFrame sf(this, 2, n);
+		Pack x = sf.t.sub(0, pn_);
+		Pack t = sf.t.sub(pn_, pn_);
+		lea(rax, ptr[rip + pL_]);
+		for (int i = 0; i < 2; i++) {
+			load_rm(x, sf.p[1] + FpByte_ * i);
+			shl1(x);
+			sub_mod(t, x, rax);
+			store_mr(sf.p[0] + FpByte_ * i, t);
+		}
+		return func;
+	}
 	void3u gen_mul()
 	{
 		void3u func = getCurr<void3u>();
diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
index 705e46e4..d3965260 100644
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@@ -36,6 +36,7 @@ void mclx_Fp2_sub(Unit*, const Unit*, const Unit*);
 void mclx_Fp2_neg(Unit*, const Unit*);
 void mclx_Fp2_mul(Unit*, const Unit*, const Unit*);
 void mclx_Fp2_sqr(Unit*, const Unit*);
+void mclx_Fp2_mul2(Unit*, const Unit*);
 void mclx_Fp2_mul_xi(Unit*, const Unit*);
 
 Unit mclx_Fr_addPre(Unit*, const Unit*, const Unit*);
@@ -76,6 +77,7 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fp2_mulNF = 0;
 		op.fp2_mulA_ = mclx_Fp2_mul;
 		op.fp2_sqrA_ = mclx_Fp2_sqr;
+		op.fp2_mul2A_ = mclx_Fp2_mul2;
 		op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
 		op.fp_preInv = mclx_Fp_preInv;
 	} else {
diff --git a/test/bench.hpp b/test/bench.hpp
index 7359181f..660aa133 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -135,6 +135,7 @@ void testBench(const G1& P, const G2& Q)
 	CYBOZU_BENCH_C("Fp2::add      ", C3, Fp2::add, xx, xx, yy);
 	CYBOZU_BENCH_C("Fp2::sub      ", C3, Fp2::sub, xx, xx, yy);
 	CYBOZU_BENCH_C("Fp2::neg      ", C3, Fp2::neg, xx, xx);
+	CYBOZU_BENCH_C("Fp2::mul2     ", C3, Fp2::mul2, xx, xx);
 	CYBOZU_BENCH_C("Fp2::mul      ", C3, Fp2::mul, xx, xx, yy);
 	CYBOZU_BENCH_C("Fp2::mul_xi   ", C3, Fp2::mul_xi, xx, xx);
 	CYBOZU_BENCH_C("Fp2::sqr      ", C3, Fp2::sqr, xx, xx);
diff --git a/test/common_test.hpp b/test/common_test.hpp
index 54d3beda..b35b9bc8 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -103,8 +103,32 @@ void testMulCT(const G& P)
 	}
 }
 
+void testMul2()
+{
+	puts("testMul2");
+	cybozu::XorShift rg;
+	Fp x1, x2;
+	x1.setByCSPRNG(rg);
+	x2 = x1;
+	for (int i = 0; i < 100; i++) {
+		Fp::mul2(x1, x1);
+		x2 += x2;
+		CYBOZU_TEST_EQUAL(x1, x2);
+	}
+	Fp2 y1;
+	y1.a = x1;
+	y1.b = -x1;
+	Fp2 y2 = y1;
+	for (int i = 0; i < 100; i++) {
+		Fp2::mul2(y1, y1);
+		y2 += y2;
+		CYBOZU_TEST_EQUAL(y1, y2);
+	}
+}
+
 void testCommon(const G1& P, const G2& Q)
 {
+	testMul2();
 	puts("G1");
 	testMulVec(P);
 	puts("G2");

From 744176ab8e1dd6d6836c52b44336bc709a54fd4a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 11 Feb 2021 20:56:52 +0900
Subject: [PATCH 418/553] use mul2

---
 include/mcl/bn.hpp       | 48 ++++++++++++++++++++++++++--------------
 include/mcl/fp_tower.hpp |  3 ++-
 2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index f2a3885e..031c552b 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -154,7 +154,8 @@ struct Compress {
 		assert(&nume != &denomi);
 
 		if (g2_.isZero()) {
-			Fp2::add(nume, g4_, g4_);
+//			Fp2::add(nume, g4_, g4_);
+			Fp2::mul2(nume, g4_);
 			nume *= g5_;
 			denomi = g3_;
 		} else {
@@ -163,7 +164,8 @@ struct Compress {
 			Fp2::mul_xi(denomi, nume);
 			Fp2::sqr(nume, g4_);
 			Fp2::sub(t, nume, g3_);
-			t += t;
+//			t += t;
+			Fp2::mul2(t, t);
 			t += nume;
 			Fp2::add(nume, denomi, t);
 			Fp2::divBy4(nume, nume);
@@ -180,7 +182,8 @@ struct Compress {
 		Fp2::sqr(t0, g1_);
 		Fp2::mul(t1, g3_, g4_);
 		t0 -= t1;
-		t0 += t0;
+//		t0 += t0;
+		Fp2::mul2(t0, t0);
 		t0 -= t1;
 		Fp2::mul(t1, g2_, g5_);
 		t0 += t1;
@@ -219,23 +222,27 @@ struct Compress {
 		Fp2Dbl::sqrPre(T2, z.g2_);
 		Fp2::mul_xi(t1, t0);
 		z.g2_ += t1;
-		z.g2_ += z.g2_;
+//		z.g2_ += z.g2_;
+		Fp2::mul2(z.g2_, z.g2_);
 		z.g2_ += t1;
 		Fp2::sub(t1, t2, z.g3_);
-		t1 += t1;
+//		t1 += t1;
+		Fp2::mul2(t1, t1);
 		Fp2Dbl::sqrPre(T1, z.g3_);
 		Fp2::add(z.g3_, t1, t2);
 		Fp2Dbl::mul_xi(T0, T1);
 		T0 += T2;
 		Fp2Dbl::mod(t0, T0);
 		Fp2::sub(z.g4_, t0, z.g4_);
-		z.g4_ += z.g4_;
+//		z.g4_ += z.g4_;
+		Fp2::mul2(z.g4_, z.g4_);
 		z.g4_ += t0;
 		Fp2Dbl::addPre(T2, T2, T1);
 		T3 -= T2;
 		Fp2Dbl::mod(t0, T3);
 		z.g5_ += t0;
-		z.g5_ += z.g5_;
+//		z.g5_ += z.g5_;
+		Fp2::mul2(z.g5_, z.g5_);
 		z.g5_ += t0;
 	}
 	static void square_n(Compress& z, int n)
@@ -1077,26 +1084,32 @@ inline void fasterSqr(Fp12& y, const Fp12& x)
 	Fp2 t0, t1;
 	sqrFp4(t0, t1, x0, x1);
 	Fp2::sub(y0, t0, x0);
-	y0 += y0;
+//	y0 += y0;
+	Fp2::mul2(y0, y0);
 	y0 += t0;
 	Fp2::add(y1, t1, x1);
-	y1 += y1;
+//	y1 += y1;
+	Fp2::mul2(y1, y1);
 	y1 += t1;
 	Fp2 t2, t3;
 	sqrFp4(t0, t1, x2, x3);
 	sqrFp4(t2, t3, x4, x5);
 	Fp2::sub(y4, t0, x4);
-	y4 += y4;
+//	y4 += y4;
+	Fp2::mul2(y4, y4);
 	y4 += t0;
 	Fp2::add(y5, t1, x5);
-	y5 += y5;
+//	y5 += y5;
+	Fp2::mul2(y5, y5);
 	y5 += t1;
 	Fp2::mul_xi(t0, t3);
 	Fp2::add(y2, t0, x2);
-	y2 += y2;
+//	y2 += y2;
+	Fp2::mul2(y2, y2);
 	y2 += t0;
 	Fp2::sub(y3, t2, x3);
-	y3 += y3;
+//	y3 += y3;
+	Fp2::mul2(y3, y3);
 	y3 += t2;
 #endif
 }
@@ -1174,13 +1187,15 @@ inline void dblLineWithoutP(Fp6& l, G2& Q)
 	Fp2::sqr(t0, Q.z);
 	Fp2::mul(t4, Q.x, Q.y);
 	Fp2::sqr(t1, Q.y);
-	Fp2::add(t3, t0, t0);
+//	Fp2::add(t3, t0, t0);
+	Fp2::mul2(t3, t0);
 	Fp2::divBy2(t4, t4);
 	Fp2::add(t5, t0, t1);
 	t0 += t3;
 	mul_twist_b(t2, t0);
 	Fp2::sqr(t0, Q.x);
-	Fp2::add(t3, t2, t2);
+//	Fp2::add(t3, t2, t2);
+	Fp2::mul2(t3, t2);
 	t3 += t2;
 	Fp2::sub(Q.x, t1, t3);
 	t3 += t1;
@@ -1598,7 +1613,8 @@ inline void expHardPartBN(Fp12& y, const Fp12& x)
 inline void makeAdjP(G1& adjP, const G1& P)
 {
 	Fp x2;
-	Fp::add(x2, P.x, P.x);
+//	Fp::add(x2, P.x, P.x);
+	Fp::mul2(x2, P.x);
 	Fp::add(adjP.x, x2, P.x);
 	Fp::neg(adjP.y, P.y);
 	// adjP.z.clear(); // not used
diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index d85e636d..a17a10b3 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -541,7 +541,8 @@ class Fp2T : public fp::Serializable<Fp2T<_Fp>,
 		const Fp& b = x.b;
 #if 1 // faster than using FpDbl
 		Fp t1, t2, t3;
-		Fp::add(t1, b, b); // 2b
+//		Fp::add(t1, b, b); // 2b
+		Fp::mul2(t1, b);
 		t1 *= a; // 2ab
 		Fp::add(t2, a, b); // a + b
 		Fp::sub(t3, a, b); // a - b

From 4c56c8e5922e157fd0c52f56b1ec59fc544ce4ed Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Feb 2021 11:37:47 +0900
Subject: [PATCH 419/553] remove unnecessary comments

---
 include/mcl/bn.hpp | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 031c552b..59f2407f 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -154,7 +154,6 @@ struct Compress {
 		assert(&nume != &denomi);
 
 		if (g2_.isZero()) {
-//			Fp2::add(nume, g4_, g4_);
 			Fp2::mul2(nume, g4_);
 			nume *= g5_;
 			denomi = g3_;
@@ -164,7 +163,6 @@ struct Compress {
 			Fp2::mul_xi(denomi, nume);
 			Fp2::sqr(nume, g4_);
 			Fp2::sub(t, nume, g3_);
-//			t += t;
 			Fp2::mul2(t, t);
 			t += nume;
 			Fp2::add(nume, denomi, t);
@@ -182,7 +180,6 @@ struct Compress {
 		Fp2::sqr(t0, g1_);
 		Fp2::mul(t1, g3_, g4_);
 		t0 -= t1;
-//		t0 += t0;
 		Fp2::mul2(t0, t0);
 		t0 -= t1;
 		Fp2::mul(t1, g2_, g5_);
@@ -222,11 +219,9 @@ struct Compress {
 		Fp2Dbl::sqrPre(T2, z.g2_);
 		Fp2::mul_xi(t1, t0);
 		z.g2_ += t1;
-//		z.g2_ += z.g2_;
 		Fp2::mul2(z.g2_, z.g2_);
 		z.g2_ += t1;
 		Fp2::sub(t1, t2, z.g3_);
-//		t1 += t1;
 		Fp2::mul2(t1, t1);
 		Fp2Dbl::sqrPre(T1, z.g3_);
 		Fp2::add(z.g3_, t1, t2);
@@ -234,14 +229,12 @@ struct Compress {
 		T0 += T2;
 		Fp2Dbl::mod(t0, T0);
 		Fp2::sub(z.g4_, t0, z.g4_);
-//		z.g4_ += z.g4_;
 		Fp2::mul2(z.g4_, z.g4_);
 		z.g4_ += t0;
 		Fp2Dbl::addPre(T2, T2, T1);
 		T3 -= T2;
 		Fp2Dbl::mod(t0, T3);
 		z.g5_ += t0;
-//		z.g5_ += z.g5_;
 		Fp2::mul2(z.g5_, z.g5_);
 		z.g5_ += t0;
 	}
@@ -1084,31 +1077,25 @@ inline void fasterSqr(Fp12& y, const Fp12& x)
 	Fp2 t0, t1;
 	sqrFp4(t0, t1, x0, x1);
 	Fp2::sub(y0, t0, x0);
-//	y0 += y0;
 	Fp2::mul2(y0, y0);
 	y0 += t0;
 	Fp2::add(y1, t1, x1);
-//	y1 += y1;
 	Fp2::mul2(y1, y1);
 	y1 += t1;
 	Fp2 t2, t3;
 	sqrFp4(t0, t1, x2, x3);
 	sqrFp4(t2, t3, x4, x5);
 	Fp2::sub(y4, t0, x4);
-//	y4 += y4;
 	Fp2::mul2(y4, y4);
 	y4 += t0;
 	Fp2::add(y5, t1, x5);
-//	y5 += y5;
 	Fp2::mul2(y5, y5);
 	y5 += t1;
 	Fp2::mul_xi(t0, t3);
 	Fp2::add(y2, t0, x2);
-//	y2 += y2;
 	Fp2::mul2(y2, y2);
 	y2 += t0;
 	Fp2::sub(y3, t2, x3);
-//	y3 += y3;
 	Fp2::mul2(y3, y3);
 	y3 += t2;
 #endif
@@ -1187,14 +1174,12 @@ inline void dblLineWithoutP(Fp6& l, G2& Q)
 	Fp2::sqr(t0, Q.z);
 	Fp2::mul(t4, Q.x, Q.y);
 	Fp2::sqr(t1, Q.y);
-//	Fp2::add(t3, t0, t0);
 	Fp2::mul2(t3, t0);
 	Fp2::divBy2(t4, t4);
 	Fp2::add(t5, t0, t1);
 	t0 += t3;
 	mul_twist_b(t2, t0);
 	Fp2::sqr(t0, Q.x);
-//	Fp2::add(t3, t2, t2);
 	Fp2::mul2(t3, t2);
 	t3 += t2;
 	Fp2::sub(Q.x, t1, t3);
@@ -1613,7 +1598,6 @@ inline void expHardPartBN(Fp12& y, const Fp12& x)
 inline void makeAdjP(G1& adjP, const G1& P)
 {
 	Fp x2;
-//	Fp::add(x2, P.x, P.x);
 	Fp::mul2(x2, P.x);
 	Fp::add(adjP.x, x2, P.x);
 	Fp::neg(adjP.y, P.y);

From 46fc61dc75bcb2c3c1b2a977cc6d0d7a3d31a102 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Feb 2021 11:56:58 +0900
Subject: [PATCH 420/553] enable mul2Test for isFullBit

---
 src/fp.cpp           | 28 ++++++++++++++++------------
 src/fp_generator.hpp | 37 ++++++++++++++++++++++++++-----------
 test/fp_test.cpp     |  1 -
 3 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index fbf0ab59..ee71e5c6 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -261,26 +261,30 @@ struct SetFpDbl<N, true> {
 	}
 };
 
-// assume !isFullBit
-template<size_t N>
+template<size_t N, bool isFullBit>
 void Mul2(Unit *y, const Unit *x, const Unit *p)
 {
 	const size_t bit = 1;
 	const size_t rBit = sizeof(Unit) * 8 - bit;
+	Unit tmp[N];
 	Unit prev = x[N - 1];
+	Unit H = isFullBit ? (x[N - 1] >> rBit) : 0;
 	for (size_t i = N - 1; i > 0; i--) {
 		Unit t = x[i - 1];
-		y[i] = (prev << bit) | (t >> rBit);
+		tmp[i] = (prev << bit) | (t >> rBit);
 		prev = t;
 	}
-	y[0] = prev << bit;
-	for (size_t i = 0; i < N; i++) {
-		Unit a = y[N - 1 - i];
-		Unit b = p[N - 1 - i];
-		if (a < b) return;
-		if (a > b) break;
+	tmp[0] = prev << bit;
+	bool c;
+	if (isFullBit) {
+		H -= SubPre<N, Gtag>::f(y, tmp, p);
+		c = H >> rBit;
+	} else {
+		c = SubPre<N, Gtag>::f(y, tmp, p);
+	}
+	if (c) {
+		copyC<N>(y, tmp);
 	}
-	SubPre<N, Gtag>::f(y, y, p);
 }
 
 template<size_t N, class Tag, bool enableFpDbl, bool gmpIsFasterThanLLVM>
@@ -291,11 +295,11 @@ void setOp2(Op& op)
 	if (op.isFullBit) {
 		op.fp_add = Add<N, true, Tag>::f;
 		op.fp_sub = Sub<N, true, Tag>::f;
-		op.fp_mul2 = 0; // not supported
+		op.fp_mul2 = Mul2<N, true>;
 	} else {
 		op.fp_add = Add<N, false, Tag>::f;
 		op.fp_sub = Sub<N, false, Tag>::f;
-		op.fp_mul2 = Mul2<N>;
+		op.fp_mul2 = Mul2<N, false>;
 	}
 	if (op.isMont) {
 		if (op.isFullBit) {
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 64e6ebe0..f5e34cf5 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -924,9 +924,15 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		return func;
 	}
 	// x = x << 1
-	void shl1(const Pack& x)
+	// H = top bit of x
+	void shl1(const Pack& x, const Reg64 *H = 0)
 	{
-		for (int i = x.size() - 1; i > 0; i--) {
+		const int n = (int)x.size();
+		if (H) {
+			mov(*H, x[n - 1]);
+			shr(*H, 63);
+		}
+		for (int i = n - 1; i > 0; i--) {
 			shld(x[i], x[i - 1], 1);
 		}
 		shl(x[0], 1);
@@ -934,25 +940,34 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	/*
 		y = (x >= p[]) x - p[] : x
 	*/
-	void sub_mod(const Pack& y, const Pack& x, const RegExp& p)
+	void sub_p_mod(const Pack& y, const Pack& x, const RegExp& p, const Reg64 *H = 0)
 	{
 		mov_rr(y, x);
 		sub_rm(y, p);
+		if (H) {
+			sbb(*H, 0);
+		}
 		cmovc_rr(y, x);
 	}
 	void2u gen_mul2()
 	{
-		if (isFullBit_ || pn_ > 6) return 0;
+		if (pn_ > 6) return 0;
 		void2u func = getCurr<void2u>();
-		const int n = pn_ * 2 - 1;
-		StackFrame sf(this, 2, n);
+		int n = pn_ * 2 - 1;
+		StackFrame sf(this, 2, n + (isFullBit_ ? 1 : 0));
 		Pack x = sf.t.sub(0, pn_);
-		load_rm(x, sf.p[1]);
-		shl1(x);
 		Pack t = sf.t.sub(pn_, n - pn_);
 		t.append(sf.p[1]);
 		lea(rax, ptr[rip + pL_]);
-		sub_mod(t, x, rax);
+		load_rm(x, sf.p[1]);
+		if (isFullBit_) {
+			const Reg64& H = sf.t[n];
+			shl1(x, &H);
+			sub_p_mod(t, x, rax, &H);
+		} else {
+			shl1(x);
+			sub_p_mod(t, x, rax);
+		}
 		store_mr(sf.p[0], t);
 		return func;
 	}
@@ -960,7 +975,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	{
 		if (isFullBit_ || pn_ > 6) return 0;
 		void2u func = getCurr<void2u>();
-		const int n = pn_ * 2;
+		int n = pn_ * 2;
 		StackFrame sf(this, 2, n);
 		Pack x = sf.t.sub(0, pn_);
 		Pack t = sf.t.sub(pn_, pn_);
@@ -968,7 +983,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		for (int i = 0; i < 2; i++) {
 			load_rm(x, sf.p[1] + FpByte_ * i);
 			shl1(x);
-			sub_mod(t, x, rax);
+			sub_p_mod(t, x, rax);
 			store_mr(sf.p[0] + FpByte_ * i, t);
 		}
 		return func;
diff --git a/test/fp_test.cpp b/test/fp_test.cpp
index f2af6a23..878bdb9d 100644
--- a/test/fp_test.cpp
+++ b/test/fp_test.cpp
@@ -921,7 +921,6 @@ CYBOZU_TEST_AUTO(mod_NIST_P521)
 
 void mul2Test()
 {
-	if (Fp::getOp().isFullBit) return;
 	const int x0 = 1234567;
 	Fp x = x0;
 	mpz_class mx = x0;

From d960faad6fee052db7ea43d7664e9ebb79a26d82 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Feb 2021 15:07:54 +0900
Subject: [PATCH 421/553] [go] strict check of the length in Deserialize

---
 ffi/go/mcl/mcl.go | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index 97f0c7d7..06fd756c 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -170,8 +170,8 @@ func (x *Fr) SetString(s string, base int) error {
 // Deserialize --
 func (x *Fr) Deserialize(buf []byte) error {
 	// #nosec
-	err := C.mclBnFr_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
-	if err == 0 {
+	n := C.mclBnFr_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if n == 0 || int(n) != len(buf) {
 		return fmt.Errorf("err mclBnFr_deserialize %x", buf)
 	}
 	return nil
@@ -340,8 +340,8 @@ func (x *Fp) SetString(s string, base int) error {
 // Deserialize --
 func (x *Fp) Deserialize(buf []byte) error {
 	// #nosec
-	err := C.mclBnFp_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
-	if err == 0 {
+	n := C.mclBnFp_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if n == 0 || int(n) != len(buf) {
 		return fmt.Errorf("err mclBnFp_deserialize %x", buf)
 	}
 	return nil
@@ -493,8 +493,8 @@ func (x *Fp2) Clear() {
 // Deserialize --
 func (x *Fp2) Deserialize(buf []byte) error {
 	// #nosec
-	err := C.mclBnFp2_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
-	if err == 0 {
+	n := C.mclBnFp2_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if n == 0 || int(n) != len(buf) {
 		return fmt.Errorf("err mclBnFp2_deserialize %x", buf)
 	}
 	return nil
@@ -599,8 +599,8 @@ func (x *G1) SetString(s string, base int) error {
 // Deserialize --
 func (x *G1) Deserialize(buf []byte) error {
 	// #nosec
-	err := C.mclBnG1_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
-	if err == 0 {
+	n := C.mclBnG1_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if n == 0 || int(n) != len(buf) {
 		return fmt.Errorf("err mclBnG1_deserialize %x", buf)
 	}
 	return nil
@@ -796,8 +796,8 @@ func (x *G2) SetString(s string, base int) error {
 // Deserialize --
 func (x *G2) Deserialize(buf []byte) error {
 	// #nosec
-	err := C.mclBnG2_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
-	if err == 0 {
+	n := C.mclBnG2_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if n == 0 || int(n) != len(buf) {
 		return fmt.Errorf("err mclBnG2_deserialize %x", buf)
 	}
 	return nil
@@ -976,8 +976,8 @@ func (x *GT) SetString(s string, base int) error {
 // Deserialize --
 func (x *GT) Deserialize(buf []byte) error {
 	// #nosec
-	err := C.mclBnGT_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
-	if err == 0 {
+	n := C.mclBnGT_deserialize(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if n == 0 || int(n) != len(buf) {
 		return fmt.Errorf("err mclBnGT_deserialize %x", buf)
 	}
 	return nil

From 6612374e3bc93abb33acc8be2347175433a7024e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 13 Feb 2021 17:40:33 +0900
Subject: [PATCH 422/553] [go] define const for SetMapToMode

---
 ffi/go/mcl/mcl.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index 06fd756c..887d08e5 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -31,6 +31,9 @@ const IO_EC_AFFINE = C.MCLBN_IO_EC_AFFINE
 // IO_EC_PROJ --
 const IO_EC_PROJ = C.MCLBN_IO_EC_PROJ
 
+// IRTF -- for SetMapToMode
+const IRTF = 5 /* MCL_MAP_TO_MODE_HASH_TO_CURVE_07 */
+
 // GetFrUnitSize --
 func GetFrUnitSize() int {
 	return int(C.MCLBN_FR_UNIT_SIZE)

From d33f8c6856db908f81498de776a483e86b2ce9bb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 13 Feb 2021 17:41:13 +0900
Subject: [PATCH 423/553] [go] add SetBigEndianMode

---
 ffi/go/mcl/mcl.go | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/ffi/go/mcl/mcl.go b/ffi/go/mcl/mcl.go
index 887d08e5..1dd9b845 100644
--- a/ffi/go/mcl/mcl.go
+++ b/ffi/go/mcl/mcl.go
@@ -200,6 +200,16 @@ func (x *Fr) SetLittleEndianMod(buf []byte) error {
 	return nil
 }
 
+// SetBigEndianMod --
+func (x *Fr) SetBigEndianMod(buf []byte) error {
+	// #nosec
+	err := C.mclBnFr_setBigEndianMod(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if err != 0 {
+		return fmt.Errorf("err mclBnFr_setBigEndianMod %x", err)
+	}
+	return nil
+}
+
 // IsEqual --
 func (x *Fr) IsEqual(rhs *Fr) bool {
 	return C.mclBnFr_isEqual(x.getPointer(), rhs.getPointer()) == 1
@@ -370,6 +380,16 @@ func (x *Fp) SetLittleEndianMod(buf []byte) error {
 	return nil
 }
 
+// SetBigEndianMod --
+func (x *Fp) SetBigEndianMod(buf []byte) error {
+	// #nosec
+	err := C.mclBnFp_setBigEndianMod(x.getPointer(), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
+	if err != 0 {
+		return fmt.Errorf("err mclBnFp_setBigEndianMod %x", err)
+	}
+	return nil
+}
+
 // IsEqual --
 func (x *Fp) IsEqual(rhs *Fp) bool {
 	return C.mclBnFp_isEqual(x.getPointer(), rhs.getPointer()) == 1

From b06f7f69567ca4eec6ea582b98dc54a030f7d014 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 13 Feb 2021 17:41:42 +0900
Subject: [PATCH 424/553] [go] add testMapToG1

---
 ffi/go/mcl/mcl_test.go | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/ffi/go/mcl/mcl_test.go b/ffi/go/mcl/mcl_test.go
index e128648f..a182acac 100644
--- a/ffi/go/mcl/mcl_test.go
+++ b/ffi/go/mcl/mcl_test.go
@@ -2,6 +2,7 @@ package mcl
 
 import "testing"
 import "fmt"
+import "encoding/hex"
 
 func testBadPointOfG2(t *testing.T) {
 	var Q G2
@@ -261,6 +262,29 @@ func testETHserialize(t *testing.T) {
 	fmt.Printf("AAA x=%s\n", x.GetString(16))
 }
 
+func testMapToG1(t *testing.T) {
+	SetMapToMode(IRTF)
+	fpHex := "0000000000000000000000000000000014406e5bfb9209256a3820879a29ac2f62d6aca82324bf3ae2aa7d3c54792043bd8c791fccdb080c1a52dc68b8b69350"
+	g1Hex := "ad7721bcdb7ce1047557776eb2659a444166dc6dd55c7ca6e240e21ae9aa18f529f04ac31d861b54faf3307692545db7"
+	fpBin, _ := hex.DecodeString(fpHex)
+	var x Fp
+	x.SetBigEndianMod(fpBin)
+	var P1 G1
+	err := MapToG1(&P1, &x)
+	if err != nil {
+		t.Fatal("MapToG1")
+	}
+	g1Str, _ := hex.DecodeString(g1Hex)
+	var P2 G1
+	err = P2.Deserialize(g1Str)
+	if err != nil {
+		t.Fatal("G1.Deserialize")
+	}
+	if !P1.IsEqual(&P2) {
+		t.Fatal("bad MapToG1")
+	}
+}
+
 func TestMclMain(t *testing.T) {
 	t.Logf("GetMaxOpUnitSize() = %d\n", GetMaxOpUnitSize())
 	t.Log("CurveFp254BNb")
@@ -273,5 +297,6 @@ func TestMclMain(t *testing.T) {
 		t.Log("BLS12_381")
 		testMcl(t, BLS12_381)
 		testETHserialize(t)
+		testMapToG1(t)
 	}
 }

From 3f65ce22acf0789ea265dbbf0cec179b99464e6a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 14 Feb 2021 16:45:48 +0900
Subject: [PATCH 425/553] move ecdsa code to ecdsa-wasm

---
 Makefile              |  19 +---
 include/mcl/ecdsa.h   | 105 ------------------
 include/mcl/ecdsa.hpp | 242 ------------------------------------------
 src/ecdsa_c.cpp       | 110 -------------------
 test/ecdsa_c_test.cpp |  51 ---------
 test/ecdsa_test.cpp   |  69 ------------
 6 files changed, 2 insertions(+), 594 deletions(-)
 delete mode 100644 include/mcl/ecdsa.h
 delete mode 100644 include/mcl/ecdsa.hpp
 delete mode 100644 src/ecdsa_c.cpp
 delete mode 100644 test/ecdsa_c_test.cpp
 delete mode 100644 test/ecdsa_test.cpp

diff --git a/Makefile b/Makefile
index 16004870..aef44c29 100644
--- a/Makefile
+++ b/Makefile
@@ -3,13 +3,12 @@ LIB_DIR=lib
 OBJ_DIR=obj
 EXE_DIR=bin
 SRC_SRC=fp.cpp bn_c256.cpp bn_c384.cpp bn_c384_256.cpp bn_c512.cpp she_c256.cpp
-TEST_SRC=fp_test.cpp ec_test.cpp fp_util_test.cpp window_method_test.cpp elgamal_test.cpp fp_tower_test.cpp gmp_test.cpp bn_test.cpp bn384_test.cpp glv_test.cpp paillier_test.cpp she_test.cpp vint_test.cpp bn512_test.cpp ecdsa_test.cpp conversion_test.cpp
+TEST_SRC=fp_test.cpp ec_test.cpp fp_util_test.cpp window_method_test.cpp elgamal_test.cpp fp_tower_test.cpp gmp_test.cpp bn_test.cpp bn384_test.cpp glv_test.cpp paillier_test.cpp she_test.cpp vint_test.cpp bn512_test.cpp conversion_test.cpp
 TEST_SRC+=bn_c256_test.cpp bn_c384_test.cpp bn_c384_256_test.cpp bn_c512_test.cpp
 TEST_SRC+=she_c256_test.cpp she_c384_test.cpp she_c384_256_test.cpp
 TEST_SRC+=aggregate_sig_test.cpp array_test.cpp
 TEST_SRC+=bls12_test.cpp
 TEST_SRC+=mapto_wb19_test.cpp
-TEST_SRC+=ecdsa_c_test.cpp
 TEST_SRC+=modp_test.cpp
 LIB_OBJ=$(OBJ_DIR)/fp.o
 ifeq ($(MCL_STATIC_CODE),1)
@@ -66,9 +65,8 @@ SHE384_LIB=$(LIB_DIR)/libmclshe384.a
 SHE384_SLIB=$(LIB_DIR)/lib$(SHE384_SNAME).$(LIB_SUF)
 SHE384_256_LIB=$(LIB_DIR)/libmclshe384_256.a
 SHE384_256_SLIB=$(LIB_DIR)/lib$(SHE384_256_SNAME).$(LIB_SUF)
-ECDSA_LIB=$(LIB_DIR)/libmclecdsa.a
 SHE_LIB_ALL=$(SHE256_LIB) $(SHE256_SLIB) $(SHE384_LIB) $(SHE384_SLIB) $(SHE384_256_LIB) $(SHE384_256_SLIB)
-all: $(MCL_LIB) $(MCL_SLIB) $(BN256_LIB) $(BN256_SLIB) $(BN384_LIB) $(BN384_SLIB) $(BN384_256_LIB) $(BN384_256_SLIB) $(BN512_LIB) $(BN512_SLIB) $(SHE_LIB_ALL) $(ECDSA_LIB)
+all: $(MCL_LIB) $(MCL_SLIB) $(BN256_LIB) $(BN256_SLIB) $(BN384_LIB) $(BN384_SLIB) $(BN384_256_LIB) $(BN384_256_SLIB) $(BN512_LIB) $(BN512_SLIB) $(SHE_LIB_ALL)
 
 #LLVM_VER=-3.8
 LLVM_LLC=llc$(LLVM_VER)
@@ -102,7 +100,6 @@ BN512_OBJ=$(OBJ_DIR)/bn_c512.o
 SHE256_OBJ=$(OBJ_DIR)/she_c256.o
 SHE384_OBJ=$(OBJ_DIR)/she_c384.o
 SHE384_256_OBJ=$(OBJ_DIR)/she_c384_256.o
-ECDSA_OBJ=$(OBJ_DIR)/ecdsa_c.o
 FUNC_LIST=src/func.list
 ifeq ($(findstring $(OS),mingw64/cygwin),)
   MCL_USE_LLVM?=1
@@ -191,9 +188,6 @@ $(SHE384_SLIB): $(SHE384_OBJ) $(MCL_LIB)
 $(SHE384_256_SLIB): $(SHE384_256_OBJ) $(MCL_LIB)
 	$(PRE)$(CXX) -o $@ $(SHE384_256_OBJ) $(MCL_LIB) -shared $(LDFLAGS) $(SHE384_256_SLIB_LDFLAGS)
 
-$(ECDSA_LIB): $(ECDSA_OBJ)
-	$(AR) $@ $(ECDSA_OBJ)
-
 $(BN256_SLIB): $(BN256_OBJ) $(MCL_SLIB)
 	$(PRE)$(CXX) -o $@ $(BN256_OBJ) -shared $(LDFLAGS) $(BN256_SLIB_LDFLAGS)
 
@@ -329,9 +323,6 @@ $(EXE_DIR)/she_c384_test.exe: $(OBJ_DIR)/she_c384_test.o $(SHE384_LIB) $(MCL_LIB
 $(EXE_DIR)/she_c384_256_test.exe: $(OBJ_DIR)/she_c384_256_test.o $(SHE384_256_LIB) $(MCL_LIB)
 	$(PRE)$(CXX) $< -o $@ $(SHE384_256_LIB) $(MCL_LIB) $(LDFLAGS)
 
-$(EXE_DIR)/ecdsa_c_test.exe: $(OBJ_DIR)/ecdsa_c_test.o $(ECDSA_LIB) $(MCL_LIB) src/ecdsa_c.cpp include/mcl/ecdsa.hpp include/mcl/ecdsa.h
-	$(PRE)$(CXX) $< -o $@ $(ECDSA_LIB) $(MCL_LIB) $(LDFLAGS)
-
 $(OBJ_DIR)/modp_test.o: test/modp_test.cpp
 	$(PRE)$(CXX) -c $< -o $@ -MMD -MP -MF $(@:.o=.d) -DMCL_USE_VINT -DMCL_MAX_BIT_SIZE=384 -DMCL_VINT_64BIT_PORTABLE -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -I./include -O2 $(CFLAGS_WARN)
 
@@ -361,12 +352,6 @@ ifeq ($(MCL_USE_LLVM),2)
   SHE_C_DEP+=src/base64m.ll
 endif
 
-../ecdsa-wasm/ecdsa_c.js: src/ecdsa_c.cpp src/fp.cpp include/mcl/ecdsa.hpp include/mcl/ecdsa.h Makefile
-	emcc -o $@ src/fp.cpp src/ecdsa_c.cpp $(EMCC_OPT) -DMCL_MAX_BIT_SIZE=256 -DMCL_USE_WEB_CRYPTO_API -s DISABLE_EXCEPTION_CATCHING=1 -DCYBOZU_DONT_USE_EXCEPTION -DCYBOZU_DONT_USE_STRING -fno-exceptions
-
-ecdsa-wasm:
-	$(MAKE) ../ecdsa-wasm/ecdsa_c.js
-
 # test
 bin/emu:
 	$(CXX) -g -o $@ src/fp.cpp src/bn_c256.cpp test/bn_c256_test.cpp -DMCL_DONT_USE_XBYAK -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_64BIT_PORTABLE -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=256 -I./include
diff --git a/include/mcl/ecdsa.h b/include/mcl/ecdsa.h
deleted file mode 100644
index daeb6be5..00000000
--- a/include/mcl/ecdsa.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#pragma once
-/**
-	@file
-	@brief C interface of ECDSA
-	@author MITSUNARI Shigeo(@herumi)
-	@license modified new BSD license
-	http://opensource.org/licenses/BSD-3-Clause
-*/
-#include <stdint.h> // for uint64_t, uint8_t
-#include <stdlib.h> // for size_t
-
-#if defined(_MSC_VER)
-	#ifdef ECDSA_DLL_EXPORT
-		#define ECDSA_DLL_API __declspec(dllexport)
-	#else
-		#define ECDSA_DLL_API __declspec(dllimport)
-		#ifndef ECDSA_NO_AUTOLINK
-			#pragma comment(lib, "mclecdsa.lib")
-		#endif
-	#endif
-#elif defined(__EMSCRIPTEN__)
-	#define ECDSA_DLL_API __attribute__((used))
-#else
-	#define ECDSA_DLL_API
-#endif
-
-#ifndef mclSize
-	#ifdef __EMSCRIPTEN__
-		// avoid 64-bit integer
-		#define mclSize unsigned int
-		#define mclInt int
-	#else
-		// use #define for cgo
-		#define mclSize size_t
-		#define mclInt int64_t
-	#endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef ECDSA_NOT_DEFINE_STRUCT
-
-typedef struct ecdsaSecretKey ecdsaSecretKey;
-typedef struct ecdsaPublicKey ecdsaPublicKey;
-typedef struct ecdsaSignature ecdsaSignature;
-
-#else
-
-typedef struct {
-	uint64_t d[4];
-} ecdsaSecretKey;
-
-typedef struct {
-	uint64_t d[4 * 3];
-} ecdsaPublicKey;
-
-typedef struct {
-	uint64_t d[4 * 2];
-} ecdsaSignature;
-
-#endif
-
-struct ecdsaPrecomputedPublicKey;
-
-/*
-	init library
-	return 0 if success
-	@note not threadsafe
-*/
-ECDSA_DLL_API int ecdsaInit(void);
-
-// return written byte size if success else 0
-ECDSA_DLL_API mclSize ecdsaSecretKeySerialize(void *buf, mclSize maxBufSize, const ecdsaSecretKey *sec);
-ECDSA_DLL_API mclSize ecdsaPublicKeySerialize(void *buf, mclSize maxBufSize, const ecdsaPublicKey *pub);
-ECDSA_DLL_API mclSize ecdsaSignatureSerialize(void *buf, mclSize maxBufSize, const ecdsaSignature *sig);
-
-// return read byte size if sucess else 0
-ECDSA_DLL_API mclSize ecdsaSecretKeyDeserialize(ecdsaSecretKey* sec, const void *buf, mclSize bufSize);
-ECDSA_DLL_API mclSize ecdsaPublicKeyDeserialize(ecdsaPublicKey* pub, const void *buf, mclSize bufSize);
-ECDSA_DLL_API mclSize ecdsaSignatureDeserialize(ecdsaSignature* sig, const void *buf, mclSize bufSize);
-
-//	return 0 if success
-ECDSA_DLL_API int ecdsaSecretKeySetByCSPRNG(ecdsaSecretKey *sec);
-
-ECDSA_DLL_API void ecdsaGetPublicKey(ecdsaPublicKey *pub, const ecdsaSecretKey *sec);
-
-ECDSA_DLL_API void ecdsaSign(ecdsaSignature *sig, const ecdsaSecretKey *sec, const void *m, mclSize size);
-
-// return 1 if valid
-ECDSA_DLL_API int ecdsaVerify(const ecdsaSignature *sig, const ecdsaPublicKey *pub, const void *m, mclSize size);
-ECDSA_DLL_API int ecdsaVerifyPrecomputed(const ecdsaSignature *sig, const ecdsaPrecomputedPublicKey *pub, const void *m, mclSize size);
-
-// return nonzero if success
-ECDSA_DLL_API ecdsaPrecomputedPublicKey *ecdsaPrecomputedPublicKeyCreate();
-// call this function to avoid memory leak
-ECDSA_DLL_API void ecdsaPrecomputedPublicKeyDestroy(ecdsaPrecomputedPublicKey *ppub);
-// return 0 if success
-ECDSA_DLL_API int ecdsaPrecomputedPublicKeyInit(ecdsaPrecomputedPublicKey *ppub, const ecdsaPublicKey *pub);
-
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/include/mcl/ecdsa.hpp b/include/mcl/ecdsa.hpp
deleted file mode 100644
index c92000ad..00000000
--- a/include/mcl/ecdsa.hpp
+++ /dev/null
@@ -1,242 +0,0 @@
-#pragma once
-/**
-	@file
-	@brief ECDSA
-	@author MITSUNARI Shigeo(@herumi)
-	@license modified new BSD license
-	http://opensource.org/licenses/BSD-3-Clause
-*/
-#include <mcl/fp.hpp>
-#include <mcl/ec.hpp>
-#include <mcl/ecparam.hpp>
-#include <mcl/window_method.hpp>
-
-namespace mcl { namespace ecdsa {
-
-namespace local {
-
-#ifndef MCLSHE_WIN_SIZE
-	#define MCLSHE_WIN_SIZE 10
-#endif
-static const size_t winSize = MCLSHE_WIN_SIZE;
-
-struct FpTag;
-struct ZnTag;
-
-} // mcl::ecdsa::local
-
-typedef mcl::FpT<local::FpTag, 256> Fp;
-typedef mcl::FpT<local::ZnTag, 256> Zn;
-typedef mcl::EcT<Fp> Ec;
-
-namespace local {
-
-struct Param {
-	Ec P;
-	mcl::fp::WindowMethod<Ec> Pbase;
-	size_t bitSize;
-};
-
-inline Param& getParam()
-{
-	static Param p;
-	return p;
-}
-
-inline void be32toZn(Zn& x, const mcl::fp::Unit *buf)
-{
-	const size_t n = 32;
-	const unsigned char *p = (const unsigned char*)buf;
-	unsigned char be[n];
-	for (size_t i = 0; i < n; i++) {
-		be[i] = p[n - 1 - i];
-	}
-	x.setArrayMaskMod(be, n);
-}
-
-/*
-	y = x mod n
-*/
-inline void FpToZn(Zn& y, const Fp& x)
-{
-	fp::Block b;
-	x.getBlock(b);
-	y.setArrayMaskMod(b.p, b.n);
-}
-
-inline void setHashOf(Zn& x, const void *msg, size_t msgSize)
-{
-	mcl::fp::Unit xBuf[256 / 8 / sizeof(mcl::fp::Unit)];
-	uint32_t hashSize = mcl::fp::sha256(xBuf, sizeof(xBuf), msg, (uint32_t)msgSize);
-	assert(hashSize == sizeof(xBuf));
-	(void)hashSize;
-	be32toZn(x, xBuf);
-}
-
-} // mcl::ecdsa::local
-
-const local::Param& param = local::getParam();
-
-inline void init(bool *pb)
-{
-	local::Param& p = local::getParam();
-	mcl::initCurve<Ec, Zn>(pb, MCL_SECP256K1, &p.P);
-	if (!*pb) return;
-	p.bitSize = 256;
-	p.Pbase.init(pb, p.P, p.bitSize, local::winSize);
-}
-
-#ifndef CYBOZU_DONT_USE_EXCEPTION
-inline void init()
-{
-	bool b;
-	init(&b);
-	if (!b) throw cybozu::Exception("ecdsa:init");
-}
-#endif
-
-typedef Zn SecretKey;
-typedef Ec PublicKey;
-
-struct PrecomputedPublicKey {
-	mcl::fp::WindowMethod<Ec> pubBase_;
-	void init(bool *pb, const PublicKey& pub)
-	{
-		pubBase_.init(pb, pub, param.bitSize, local::winSize);
-	}
-#ifndef CYBOZU_DONT_USE_EXCEPTION
-	void init(const PublicKey& pub)
-	{
-		bool b;
-		init(&b, pub);
-		if (!b) throw cybozu::Exception("ecdsa:PrecomputedPublicKey:init");
-	}
-#endif
-};
-
-inline void getPublicKey(PublicKey& pub, const SecretKey& sec)
-{
-	Ec::mul(pub, param.P, sec);
-	pub.normalize();
-}
-
-struct Signature : public mcl::fp::Serializable<Signature> {
-	Zn r, s;
-	template<class InputStream>
-	void load(bool *pb, InputStream& is, int ioMode = IoSerialize)
-	{
-		r.load(pb, is, ioMode); if (!*pb) return;
-		s.load(pb, is, ioMode);
-	}
-	template<class OutputStream>
-	void save(bool *pb, OutputStream& os, int ioMode = IoSerialize) const
-	{
-		const char sep = *fp::getIoSeparator(ioMode);
-		r.save(pb, os, ioMode); if (!*pb) return;
-		if (sep) {
-			cybozu::writeChar(pb, os, sep);
-			if (!*pb) return;
-		}
-		s.save(pb, os, ioMode);
-	}
-#ifndef CYBOZU_DONT_USE_EXCEPTION
-	template<class InputStream>
-	void load(InputStream& is, int ioMode = IoSerialize)
-	{
-		bool b;
-		load(&b, is, ioMode);
-		if (!b) throw cybozu::Exception("ecdsa:Signature:load");
-	}
-	template<class OutputStream>
-	void save(OutputStream& os, int ioMode = IoSerialize) const
-	{
-		bool b;
-		save(&b, os, ioMode);
-		if (!b) throw cybozu::Exception("ecdsa:Signature:save");
-	}
-#endif
-#ifndef CYBOZU_DONT_USE_STRING
-	friend std::istream& operator>>(std::istream& is, Signature& self)
-	{
-		self.load(is, fp::detectIoMode(Ec::getIoMode(), is));
-		return is;
-	}
-	friend std::ostream& operator<<(std::ostream& os, const Signature& self)
-	{
-		self.save(os, fp::detectIoMode(Ec::getIoMode(), os));
-		return os;
-	}
-#endif
-};
-
-inline void sign(Signature& sig, const SecretKey& sec, const void *msg, size_t msgSize)
-{
-	Zn& r = sig.r;
-	Zn& s = sig.s;
-	Zn z, k;
-	local::setHashOf(z, msg, msgSize);
-	Ec Q;
-	for (;;) {
-		k.setByCSPRNG();
-		param.Pbase.mul(Q, k);
-		if (Q.isZero()) continue;
-		Q.normalize();
-		local::FpToZn(r, Q.x);
-		if (r.isZero()) continue;
-		Zn::mul(s, r, sec);
-		s += z;
-		if (s.isZero()) continue;
-		s /= k;
-		return;
-	}
-}
-
-namespace local {
-
-inline void mulDispatch(Ec& Q, const PublicKey& pub, const Zn& y)
-{
-	Ec::mul(Q, pub, y);
-}
-
-inline void mulDispatch(Ec& Q, const PrecomputedPublicKey& ppub, const Zn& y)
-{
-	ppub.pubBase_.mul(Q, y);
-}
-
-template<class Pub>
-inline bool verify(const Signature& sig, const Pub& pub, const void *msg, size_t msgSize)
-{
-	const Zn& r = sig.r;
-	const Zn& s = sig.s;
-	if (r.isZero() || s.isZero()) return false;
-	Zn z, w, u1, u2;
-	local::setHashOf(z, msg, msgSize);
-	Zn::inv(w, s);
-	Zn::mul(u1, z, w);
-	Zn::mul(u2, r, w);
-	Ec Q1, Q2;
-	param.Pbase.mul(Q1, u1);
-//	Ec::mul(Q2, pub, u2);
-	local::mulDispatch(Q2, pub, u2);
-	Q1 += Q2;
-	if (Q1.isZero()) return false;
-	Q1.normalize();
-	Zn x;
-	local::FpToZn(x, Q1.x);
-	return r == x;
-}
-
-} // mcl::ecdsa::local
-
-inline bool verify(const Signature& sig, const PublicKey& pub, const void *msg, size_t msgSize)
-{
-	return local::verify(sig, pub, msg, msgSize);
-}
-
-inline bool verify(const Signature& sig, const PrecomputedPublicKey& ppub, const void *msg, size_t msgSize)
-{
-	return local::verify(sig, ppub, msg, msgSize);
-}
-
-} } // mcl::ecdsa
-
diff --git a/src/ecdsa_c.cpp b/src/ecdsa_c.cpp
deleted file mode 100644
index f2222a22..00000000
--- a/src/ecdsa_c.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-#define ECDSA_DLL_EXPORT
-#include <mcl/ecdsa.h>
-#include <mcl/ecdsa.hpp>
-#include <new>
-
-using namespace mcl::ecdsa;
-
-static SecretKey *cast(ecdsaSecretKey *p) { return reinterpret_cast<SecretKey*>(p); }
-static const SecretKey *cast(const ecdsaSecretKey *p) { return reinterpret_cast<const SecretKey*>(p); }
-
-static PublicKey *cast(ecdsaPublicKey *p) { return reinterpret_cast<PublicKey*>(p); }
-static const PublicKey *cast(const ecdsaPublicKey *p) { return reinterpret_cast<const PublicKey*>(p); }
-
-static Signature *cast(ecdsaSignature *p) { return reinterpret_cast<Signature*>(p); }
-static const Signature *cast(const ecdsaSignature *p) { return reinterpret_cast<const Signature*>(p); }
-
-static PrecomputedPublicKey *cast(ecdsaPrecomputedPublicKey *p) { return reinterpret_cast<PrecomputedPublicKey*>(p); }
-static const PrecomputedPublicKey *cast(const ecdsaPrecomputedPublicKey *p) { return reinterpret_cast<const PrecomputedPublicKey*>(p); }
-
-#ifdef __EMSCRIPTEN__
-// use these functions forcibly
-extern "C" ECDSA_DLL_API void *ecdsaMalloc(size_t n)
-{
-	return malloc(n);
-}
-extern "C" ECDSA_DLL_API void ecdsaFree(void *p)
-{
-	free(p);
-}
-#endif
-
-int ecdsaInit(void)
-{
-	bool b;
-	init(&b);
-	return b ? 0 : -1;
-}
-
-mclSize ecdsaSecretKeySerialize(void *buf, mclSize maxBufSize, const ecdsaSecretKey *sec)
-{
-	return (mclSize)cast(sec)->serialize(buf, maxBufSize);
-}
-mclSize ecdsaPublicKeySerialize(void *buf, mclSize maxBufSize, const ecdsaPublicKey *pub)
-{
-	return (mclSize)cast(pub)->serialize(buf, maxBufSize);
-}
-mclSize ecdsaSignatureSerialize(void *buf, mclSize maxBufSize, const ecdsaSignature *sig)
-{
-	return (mclSize)cast(sig)->serialize(buf, maxBufSize);
-}
-
-mclSize ecdsaSecretKeyDeserialize(ecdsaSecretKey* sec, const void *buf, mclSize bufSize)
-{
-	return (mclSize)cast(sec)->deserialize(buf, bufSize);
-}
-mclSize ecdsaPublicKeyDeserialize(ecdsaPublicKey* pub, const void *buf, mclSize bufSize)
-{
-	return (mclSize)cast(pub)->deserialize(buf, bufSize);
-}
-mclSize ecdsaSignatureDeserialize(ecdsaSignature* sig, const void *buf, mclSize bufSize)
-{
-	return (mclSize)cast(sig)->deserialize(buf, bufSize);
-}
-
-//	return 0 if success
-int ecdsaSecretKeySetByCSPRNG(ecdsaSecretKey *sec)
-{
-	cast(sec)->setByCSPRNG();
-	return 0;
-}
-
-void ecdsaGetPublicKey(ecdsaPublicKey *pub, const ecdsaSecretKey *sec)
-{
-	getPublicKey(*cast(pub), *cast(sec));
-}
-
-void ecdsaSign(ecdsaSignature *sig, const ecdsaSecretKey *sec, const void *m, mclSize size)
-{
-	sign(*cast(sig), *cast(sec), m, size);
-}
-
-int ecdsaVerify(const ecdsaSignature *sig, const ecdsaPublicKey *pub, const void *m, mclSize size)
-{
-	return verify(*cast(sig), *cast(pub), m, size);
-}
-int ecdsaVerifyPrecomputed(const ecdsaSignature *sig, const ecdsaPrecomputedPublicKey *ppub, const void *m, mclSize size)
-{
-	return verify(*cast(sig), *cast(ppub), m, size);
-}
-
-ecdsaPrecomputedPublicKey *ecdsaPrecomputedPublicKeyCreate()
-{
-	PrecomputedPublicKey *ppub = (PrecomputedPublicKey*)malloc(sizeof(PrecomputedPublicKey));
-	if (ppub == 0) return 0;
-	new(ppub) PrecomputedPublicKey();
-	return reinterpret_cast<ecdsaPrecomputedPublicKey*>(ppub);
-}
-
-void ecdsaPrecomputedPublicKeyDestroy(ecdsaPrecomputedPublicKey *ppub)
-{
-	cast(ppub)->~PrecomputedPublicKey();
-	free(ppub);
-}
-
-int ecdsaPrecomputedPublicKeyInit(ecdsaPrecomputedPublicKey *ppub, const ecdsaPublicKey *pub)
-{
-	bool b;
-	cast(ppub)->init(&b, *cast(pub));
-	return b ? 0 : -1;
-}
diff --git a/test/ecdsa_c_test.cpp b/test/ecdsa_c_test.cpp
deleted file mode 100644
index e0af3818..00000000
--- a/test/ecdsa_c_test.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <mcl/ecdsa.h>
-#include <cybozu/test.hpp>
-#include <string.h>
-
-template<class T, class Serializer, class Deserializer>
-void serializeTest(const T& x, const Serializer& serialize, const Deserializer& deserialize)
-{
-	char buf[128];
-	size_t n = serialize(buf, sizeof(buf), &x);
-	CYBOZU_TEST_ASSERT(n > 0);
-	T y;
-	size_t m = deserialize(&y, buf, n);
-	CYBOZU_TEST_EQUAL(m, n);
-	CYBOZU_TEST_ASSERT(memcmp(&x, &y, n) == 0);
-}
-
-CYBOZU_TEST_AUTO(ecdsa)
-{
-	int ret;
-	ret = ecdsaInit();
-	CYBOZU_TEST_EQUAL(ret, 0);
-	ecdsaSecretKey sec;
-	ecdsaPublicKey pub;
-	ecdsaPrecomputedPublicKey *ppub;
-	ecdsaSignature sig;
-	const char *msg = "hello";
-	mclSize msgSize = strlen(msg);
-
-	ret = ecdsaSecretKeySetByCSPRNG(&sec);
-	CYBOZU_TEST_EQUAL(ret, 0);
-	serializeTest(sec, ecdsaSecretKeySerialize, ecdsaSecretKeyDeserialize);
-
-	ecdsaGetPublicKey(&pub, &sec);
-	serializeTest(pub, ecdsaPublicKeySerialize, ecdsaPublicKeyDeserialize);
-	ecdsaSign(&sig, &sec, msg, msgSize);
-	serializeTest(sig, ecdsaSignatureSerialize, ecdsaSignatureDeserialize);
-	CYBOZU_TEST_ASSERT(ecdsaVerify(&sig, &pub, msg, msgSize));
-
-	ppub = ecdsaPrecomputedPublicKeyCreate();
-	CYBOZU_TEST_ASSERT(ppub);
-	ret = ecdsaPrecomputedPublicKeyInit(ppub, &pub);
-	CYBOZU_TEST_EQUAL(ret, 0);
-
-	CYBOZU_TEST_ASSERT(ecdsaVerifyPrecomputed(&sig, ppub, msg, msgSize));
-
-	sig.d[0]++;
-	CYBOZU_TEST_ASSERT(!ecdsaVerify(&sig, &pub, msg, msgSize));
-	CYBOZU_TEST_ASSERT(!ecdsaVerifyPrecomputed(&sig, ppub, msg, msgSize));
-
-	ecdsaPrecomputedPublicKeyDestroy(ppub);
-}
diff --git a/test/ecdsa_test.cpp b/test/ecdsa_test.cpp
deleted file mode 100644
index 80de88a2..00000000
--- a/test/ecdsa_test.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-#define PUT(x) std::cout << #x "=" << (x) << std::endl;
-#include <stdlib.h>
-#include <stdio.h>
-void put(const void *buf, size_t bufSize)
-{
-	const unsigned char* p = (const unsigned char*)buf;
-	for (size_t i = 0; i < bufSize; i++) {
-		printf("%02x", p[i]);
-	}
-	printf("\n");
-}
-#include <mcl/ecdsa.hpp>
-#include <cybozu/test.hpp>
-#include <cybozu/benchmark.hpp>
-
-using namespace mcl::ecdsa;
-
-CYBOZU_TEST_AUTO(ecdsa)
-{
-	init();
-	SecretKey sec;
-	PublicKey pub;
-	sec.setByCSPRNG();
-	getPublicKey(pub, sec);
-	Signature sig;
-	const std::string msg = "hello";
-	sign(sig, sec, msg.c_str(), msg.size());
-	CYBOZU_TEST_ASSERT(verify(sig, pub, msg.c_str(), msg.size()));
-	sig.s += 1;
-	CYBOZU_TEST_ASSERT(!verify(sig, pub, msg.c_str(), msg.size()));
-}
-
-CYBOZU_TEST_AUTO(value)
-{
-	const std::string msg = "hello";
-	const char *secStr  = "83ecb3984a4f9ff03e84d5f9c0d7f888a81833643047acc58eb6431e01d9bac8";
-	const char *pubxStr = "653bd02ba1367e5d4cd695b6f857d1cd90d4d8d42bc155d85377b7d2d0ed2e71";
-	const char *pubyStr = "04e8f5da403ab78decec1f19e2396739ea544e2b14159beb5091b30b418b813a";
-	const char *sigStr = "a598a8030da6d86c6bc7f2f5144ea549d28211ea58faa70ebf4c1e665c1fe9b5de5d79a2ba44e311d04fdca263639283965780bce9169822be9cc81756e95a24";
-
-	SecretKey sec;
-	sec.setStr(secStr, 16);
-	CYBOZU_TEST_EQUAL(sec.getStr(16), secStr);
-	PublicKey pub;
-	getPublicKey(pub, sec);
-	pub.normalize();
-	Ec t(Fp(pubxStr, 16), Fp(pubyStr, 16));
-	CYBOZU_TEST_EQUAL(pub, t);
-	Signature sig;
-	sig.r.setStr(std::string(sigStr, 64), 16);
-	sig.s.setStr(std::string(sigStr + 64, 64), 16);
-	PUT(sig);
-	CYBOZU_TEST_ASSERT(verify(sig, pub, msg.c_str(), msg.size()));
-}
-
-CYBOZU_TEST_AUTO(bench)
-{
-	const std::string msg = "hello";
-	SecretKey sec;
-	PublicKey pub;
-	PrecomputedPublicKey ppub;
-	sec.setByCSPRNG();
-	getPublicKey(pub, sec);
-	ppub.init(pub);
-	Signature sig;
-	CYBOZU_BENCH_C("sign", 1000, sign, sig, sec, msg.c_str(), msg.size());
-	CYBOZU_BENCH_C("pub.verify ", 1000, verify, sig, pub, msg.c_str(), msg.size());
-	CYBOZU_BENCH_C("ppub.verify", 1000, verify, sig, ppub, msg.c_str(), msg.size());
-}

From 4b5869ffad043094d1b88f3d3d8054e9e6f5f435 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Feb 2021 17:18:18 +0900
Subject: [PATCH 426/553] refactor Fp6::mul

---
 include/mcl/fp_tower.hpp | 61 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index a17a10b3..3ff0e1a6 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -135,6 +135,14 @@ class FpDblT : public fp::Serializable<FpDblT<Fp> > {
 		if (mulSmallUnit(z, x, y)) return;
 		assert(0); // not supported y
 	}
+	static void sub_p_if_possible(FpDblT& y, const FpDblT& x)
+	{
+		const size_t N = Fp::op_.N;
+		const Unit *xv = &x.v_[N];
+		Unit *yv = &y.v_[N];
+		static const Unit zero[Fp::maxSize] = {};
+		Fp::op_.fp_add(yv, xv, zero, Fp::op_.p);
+	}
 	static void init()
 	{
 		const mcl::fp::Op& op = Fp::getOp();
@@ -684,6 +692,11 @@ struct Fp2DblT {
 #endif
 	void operator+=(const Fp2DblT& x) { add(*this, *this, x); }
 	void operator-=(const Fp2DblT& x) { sub(*this, *this, x); }
+	static void sub_p_if_possible(Fp2DblT& y, const Fp2DblT& x)
+	{
+		FpDbl::sub_p_if_possible(y.a, x.a);
+		FpDbl::sub_p_if_possible(y.b, x.b);
+	}
 	static void init()
  	{
 		assert(!Fp::getOp().isFullBit);
@@ -986,6 +999,39 @@ struct Fp6DblT {
 		const Fp2& d = y.a;
 		const Fp2& e = y.b;
 		const Fp2& f = y.c;
+#if 1
+		Fp2Dbl& ZA = z.a;
+		Fp2Dbl& ZB = z.b;
+		Fp2Dbl& ZC = z.c;
+		Fp2 t1, t2;
+		Fp2Dbl BE, CF, AD;
+		Fp2::addPre(t1, b, c);
+		Fp2::addPre(t2, e, f);
+		Fp2Dbl::mulPre(ZA, t1, t2);
+		Fp2::addPre(t1, a, b);
+		Fp2::addPre(t2, e, d);
+		Fp2Dbl::mulPre(ZB, t1, t2);
+		Fp2::addPre(t1, a, c);
+		Fp2::addPre(t2, d, f);
+		Fp2Dbl::mulPre(ZC, t1, t2);
+		Fp2Dbl::mulPre(BE, b, e);
+		Fp2Dbl::mulPre(CF, c, f);
+		Fp2Dbl::mulPre(AD, a, d);
+		Fp2Dbl::sub(ZA, ZA, BE);
+		Fp2Dbl::sub(ZA, ZA, CF);
+//		Fp2Dbl::sub_p_if_possible(ZA, ZA);
+		Fp2Dbl::sub(ZB, ZB, AD);
+		Fp2Dbl::sub(ZB, ZB, BE);
+//		Fp2Dbl::sub_p_if_possible(ZB, ZB);
+		Fp2Dbl::sub(ZC, ZC, AD);
+		Fp2Dbl::sub(ZC, ZC, CF);
+//		Fp2Dbl::sub_p_if_possible(ZC, ZC);
+		Fp2Dbl::mul_xi(ZA, ZA);
+		Fp2Dbl::add(ZA, ZA, AD);
+		Fp2Dbl::mul_xi(CF, CF);
+		Fp2Dbl::add(ZB, ZB, CF);
+		Fp2Dbl::add(ZC, ZC, BE);
+#else
 		Fp2Dbl& za = z.a;
 		Fp2Dbl& zb = z.b;
 		Fp2Dbl& zc = z.c;
@@ -994,7 +1040,7 @@ struct Fp6DblT {
 		Fp2Dbl::mulPre(BE, b, e);
 		Fp2Dbl::mulPre(zb, c, f);
 
-		Fp2 t1, t2, t3, t4;
+		Fp2 t1, t2;
 		Fp2::add(t1, b, c);
 		Fp2::add(t2, e, f);
 		Fp2Dbl T1;
@@ -1003,16 +1049,16 @@ struct Fp6DblT {
 		Fp2Dbl::sub(T1, T1, zb);
 		Fp2Dbl::mul_xi(T1, T1);
 
-		Fp2::add(t2, a, b);
-		Fp2::add(t3, e, d);
+		Fp2::add(t1, a, b);
+		Fp2::add(t2, e, d);
 		Fp2Dbl T2;
-		Fp2Dbl::mulPre(T2, t2, t3);
+		Fp2Dbl::mulPre(T2, t1, t2);
 		Fp2Dbl::sub(T2, T2, za);
 		Fp2Dbl::sub(T2, T2, BE);
 
-		Fp2::add(t3, a, c);
-		Fp2::add(t4, d, f);
-		Fp2Dbl::mulPre(zc, t3, t4);
+		Fp2::add(t1, a, c);
+		Fp2::add(t2, d, f);
+		Fp2Dbl::mulPre(zc, t1, t2);
 		Fp2Dbl::sub(zc, zc, za);
 		Fp2Dbl::sub(zc, zc, zb);
 
@@ -1020,6 +1066,7 @@ struct Fp6DblT {
 		Fp2Dbl::mul_xi(zb, zb);
 		Fp2Dbl::add(zb, zb, T2);
 		Fp2Dbl::add(zc, zc, BE);
+#endif
 //clk.end();
 	}
 	static void mod(Fp6& y, const Fp6Dbl& x)

From 91a2ecf3587bb9aceb0259cce7c1c142cd8e39cd Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Feb 2021 17:49:34 +0900
Subject: [PATCH 427/553] add testABCD

---
 include/mcl/fp_tower.hpp |  2 +-
 test/common_test.hpp     | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 3ff0e1a6..31a54a44 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -999,7 +999,7 @@ struct Fp6DblT {
 		const Fp2& d = y.a;
 		const Fp2& e = y.b;
 		const Fp2& f = y.c;
-#if 1
+#if 0
 		Fp2Dbl& ZA = z.a;
 		Fp2Dbl& ZB = z.b;
 		Fp2Dbl& ZC = z.c;
diff --git a/test/common_test.hpp b/test/common_test.hpp
index b35b9bc8..54083d63 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -126,8 +126,37 @@ void testMul2()
 	}
 }
 
+void testABCDsub(const Fp2& a, const Fp2& b, const Fp2& c, const Fp2& d)
+{
+	Fp2 t1, t2;
+	Fp2::add(t1, a, b);
+	Fp2::add(t2, c, d);
+	Fp2Dbl T1, AC, BD;
+	Fp2Dbl::mulPre(T1, t1, t2);
+	Fp2Dbl::mulPre(AC, a, c);
+	Fp2Dbl::mulPre(BD, b, d);
+	Fp2Dbl::sub(T1, T1, AC);
+	Fp2Dbl::sub(T1, T1, BD);
+	Fp2Dbl::mod(t1, T1);
+	CYBOZU_TEST_EQUAL(t1, a * d + b * c);
+}
+
+void testABCD()
+{
+	puts("testMisc1");
+	// (a + b)(c + d) - ac - bd = ad + bc
+	Fp2 a, b, c, d;
+	a.a = -1;
+	a.b = -1;
+	b = a;
+	c = a;
+	d = a;
+	testABCDsub(a, b, c, d);
+}
+
 void testCommon(const G1& P, const G2& Q)
 {
+	testABCD();
 	testMul2();
 	puts("G1");
 	testMulVec(P);

From 10183458d3d4ff81d52c4f96bf3ef0ae6da1de87 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 16 Feb 2021 11:25:29 +0900
Subject: [PATCH 428/553] add cmake option MCL_STATIC_LIB

---
 CMakeLists.txt | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c4eb3e89..df7e4f11 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,11 @@ option(
 	"max bit size for Fp"
 	384
 )
+option(
+	MCL_STATIC_LIB
+	"build static library"
+	OFF
+)
 if(MSVC)
 	option(
 		MCL_DOWNLOAD_SOURCE
@@ -320,7 +325,11 @@ endif()
 
 # mclbnXXX
 foreach(bit IN ITEMS 256 384 384_256)
-	add_library(mclbn${bit} SHARED src/bn_c${bit}.cpp)
+	if (MCL_STATIC_LIB)
+		add_library(mclbn${bit} STATIC src/bn_c${bit}.cpp)
+	else()
+		add_library(mclbn${bit} SHARED src/bn_c${bit}.cpp)
+	endif()
 	add_library(mcl::mclbn${bit} ALIAS mclbn${bit})
 	set_target_properties(mclbn${bit} PROPERTIES
 		CXX_STANDARD 11

From f9d7d4005961a7d9d69ed1abf3755f995b04df8c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 16 Feb 2021 14:34:28 +0900
Subject: [PATCH 429/553] fp_tower assumes p < W/4

---
 include/mcl/fp_tower.hpp | 13 ++++++++++++-
 test/fp_tower_test.cpp   |  6 ++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 31a54a44..f2faebe7 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -379,11 +379,16 @@ class Fp2T : public fp::Serializable<Fp2T<_Fp>,
 	}
 
 	static uint32_t get_xi_a() { return Fp::getOp().xi_a; }
-	static void init()
+	static void init(bool *pb)
 	{
 //		assert(Fp::maxSize <= 256);
 		mcl::fp::Op& op = Fp::op_;
 		assert(op.xi_a);
+		// assume p < W/4 where W = 1 << (N * sizeof(Unit) * 8)
+		if ((op.p[op.N - 1] >> (sizeof(fp::Unit) * 8 - 2)) != 0) {
+			*pb = false;
+			return;
+		}
 		mul_xi = 0;
 #ifdef MCL_XBYAK_DIRECT_CALL
 		add = fp::func_ptr_cast<void (*)(Fp2T& z, const Fp2T& x, const Fp2T& y)>(op.fp2_addA_);
@@ -440,6 +445,12 @@ class Fp2T : public fp::Serializable<Fp2T<_Fp>,
 		}
 	}
 #ifndef CYBOZU_DONT_USE_EXCEPTION
+	static void init()
+	{
+		bool b;
+		init(&b);
+		if (!b) throw cybozu::Exception("Fp2::init");
+	}
 	template<class InputStream>
 	void load(InputStream& is, int ioMode = IoSerialize)
 	{
diff --git a/test/fp_tower_test.cpp b/test/fp_tower_test.cpp
index b5e77db9..5dd3a70e 100644
--- a/test/fp_tower_test.cpp
+++ b/test/fp_tower_test.cpp
@@ -400,9 +400,11 @@ void test(const char *p, mcl::fp::Mode mode)
 {
 	const int xi_a = 1;
 	Fp::init(xi_a, p, mode);
-	printf("mode=%s\n", mcl::fp::ModeToStr(mode));
 	if (Fp::getOp().isFullBit) return;
-	Fp2::init();
+	bool b;
+	Fp2::init(&b);
+	if (!b) return;
+	printf("mode=%s\n", mcl::fp::ModeToStr(mode));
 	printf("bitSize=%d\n", (int)Fp::getBitSize());
 #if 0
 	if (Fp::getBitSize() > 256) {

From 3423988fa04433b6e9993279c6f942d821821b88 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 16 Feb 2021 14:48:27 +0900
Subject: [PATCH 430/553] use no-throw-version

---
 include/mcl/bn.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 59f2407f..0c514de4 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -889,7 +889,8 @@ struct Param {
 		*pb = true;
 		return;
 #endif
-		Fp2::init();
+		Fp2::init(pb);
+		if (!*pb) return;
 		const Fp2 xi(cp.xi_a, 1);
 		g2 = Fp2::get_gTbl()[0];
 		g3 = Fp2::get_gTbl()[3];

From 6b5a64c3ec280c5cc724f7a61795e91296feb869 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 16 Feb 2021 15:20:39 +0900
Subject: [PATCH 431/553] a little optimize Fp6::mul

---
 include/mcl/fp_tower.hpp | 23 +++++++++++++----------
 test/common_test.hpp     | 32 +++++++++++++++++++++++---------
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index f2faebe7..1f3ec7e0 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -978,6 +978,7 @@ template<class Fp>
 struct Fp6DblT {
 	typedef Fp2T<Fp> Fp2;
 	typedef Fp6T<Fp> Fp6;
+	typedef FpDblT<Fp> FpDbl;
 	typedef Fp2DblT<Fp> Fp2Dbl;
 	typedef Fp6DblT<Fp> Fp6Dbl;
 	typedef fp::Unit Unit;
@@ -994,6 +995,11 @@ struct Fp6DblT {
 		Fp2Dbl::sub(z.b, x.b, y.b);
 		Fp2Dbl::sub(z.c, x.c, y.c);
 	}
+	static void sub2(Fp2Dbl& y, const Fp2Dbl& x)
+	{
+		FpDbl::sub(y.a, y.a, x.a);
+		FpDbl::subPre(y.b, y.b, x.b);
+	}
 	/*
 		x = a + bv + cv^2, y = d + ev + fv^2, v^3 = xi
 		xy = (ad + (bf + ce)xi) + ((ae + bd) + cf xi)v + ((af + cd) + be)v^2
@@ -1010,7 +1016,7 @@ struct Fp6DblT {
 		const Fp2& d = y.a;
 		const Fp2& e = y.b;
 		const Fp2& f = y.c;
-#if 0
+#if 1
 		Fp2Dbl& ZA = z.a;
 		Fp2Dbl& ZB = z.b;
 		Fp2Dbl& ZC = z.c;
@@ -1028,15 +1034,12 @@ struct Fp6DblT {
 		Fp2Dbl::mulPre(BE, b, e);
 		Fp2Dbl::mulPre(CF, c, f);
 		Fp2Dbl::mulPre(AD, a, d);
-		Fp2Dbl::sub(ZA, ZA, BE);
-		Fp2Dbl::sub(ZA, ZA, CF);
-//		Fp2Dbl::sub_p_if_possible(ZA, ZA);
-		Fp2Dbl::sub(ZB, ZB, AD);
-		Fp2Dbl::sub(ZB, ZB, BE);
-//		Fp2Dbl::sub_p_if_possible(ZB, ZB);
-		Fp2Dbl::sub(ZC, ZC, AD);
-		Fp2Dbl::sub(ZC, ZC, CF);
-//		Fp2Dbl::sub_p_if_possible(ZC, ZC);
+		sub2(ZA, BE);
+		sub2(ZA, CF);
+		sub2(ZB, AD);
+		sub2(ZB, BE);
+		sub2(ZC, AD);
+		sub2(ZC, CF);
 		Fp2Dbl::mul_xi(ZA, ZA);
 		Fp2Dbl::add(ZA, ZA, AD);
 		Fp2Dbl::mul_xi(CF, CF);
diff --git a/test/common_test.hpp b/test/common_test.hpp
index 54083d63..ba990f1f 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -129,14 +129,21 @@ void testMul2()
 void testABCDsub(const Fp2& a, const Fp2& b, const Fp2& c, const Fp2& d)
 {
 	Fp2 t1, t2;
-	Fp2::add(t1, a, b);
-	Fp2::add(t2, c, d);
+	Fp2::addPre(t1, a, b);
+	Fp2::addPre(t2, c, d);
 	Fp2Dbl T1, AC, BD;
 	Fp2Dbl::mulPre(T1, t1, t2);
 	Fp2Dbl::mulPre(AC, a, c);
 	Fp2Dbl::mulPre(BD, b, d);
+#if 0
 	Fp2Dbl::sub(T1, T1, AC);
 	Fp2Dbl::sub(T1, T1, BD);
+#else
+	FpDbl::sub(T1.a, T1.a, AC.a);
+	FpDbl::subPre(T1.b, T1.b, AC.b);
+	FpDbl::sub(T1.a, T1.a, BD.a);
+	FpDbl::subPre(T1.b, T1.b, BD.b);
+#endif
 	Fp2Dbl::mod(t1, T1);
 	CYBOZU_TEST_EQUAL(t1, a * d + b * c);
 }
@@ -145,13 +152,20 @@ void testABCD()
 {
 	puts("testMisc1");
 	// (a + b)(c + d) - ac - bd = ad + bc
-	Fp2 a, b, c, d;
-	a.a = -1;
-	a.b = -1;
-	b = a;
-	c = a;
-	d = a;
-	testABCDsub(a, b, c, d);
+	Fp2 a[4];
+	a[0].a = -1;
+	a[0].b = -1;
+	a[1] = a[0];
+	a[2] = a[0];
+	a[3] = a[0];
+	testABCDsub(a[0], a[1], a[2], a[3]);
+	for (int i = 0; i < 100; i++) {
+		for (int j = 0; j < 4; j++) {
+			a[j].a.setByCSPRNG();
+			a[j].b.setByCSPRNG();
+		}
+		testABCDsub(a[0], a[1], a[2], a[3]);
+	}
 }
 
 void testCommon(const G1& P, const G2& Q)

From ae5b3a733010bddda8074c8fc8f49f19ee2f6647 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 16 Feb 2021 16:29:53 +0900
Subject: [PATCH 432/553] _

---
 include/mcl/bn.hpp       |  1 +
 include/mcl/fp_tower.hpp | 57 +++++++++-------------------------------
 test/common_test.hpp     | 11 ++------
 3 files changed, 16 insertions(+), 53 deletions(-)

diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp
index 0c514de4..c8255aa6 100644
--- a/include/mcl/bn.hpp
+++ b/include/mcl/bn.hpp
@@ -57,6 +57,7 @@ typedef Fp12 GT;
 
 typedef mcl::FpDblT<Fp> FpDbl;
 typedef mcl::Fp2DblT<Fp> Fp2Dbl;
+typedef mcl::Fp6DblT<Fp> Fp6Dbl;
 
 inline void Frobenius(Fp2& y, const Fp2& x)
 {
diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 1f3ec7e0..55a2bbe5 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -995,7 +995,11 @@ struct Fp6DblT {
 		Fp2Dbl::sub(z.b, x.b, y.b);
 		Fp2Dbl::sub(z.c, x.c, y.c);
 	}
-	static void sub2(Fp2Dbl& y, const Fp2Dbl& x)
+	/*
+		imaginary part of Fp2Dbl::mul uses only add,
+		so it does not require mod.
+	*/
+	static void specialSub(Fp2Dbl& y, const Fp2Dbl& x)
 	{
 		FpDbl::sub(y.a, y.a, x.a);
 		FpDbl::subPre(y.b, y.b, x.b);
@@ -1006,6 +1010,8 @@ struct Fp6DblT {
 		bf + ce = (b + c)(e + f) - be - cf
 		ae + bd = (a + b)(e + d) - ad - be
 		af + cd = (a + c)(d + f) - ad - cf
+		assum p < W/4 where W = 1 << (sizeof(Unit) * 8 * N)
+		then (b + c)(e + f) < 4p^2 < pW
 	*/
 	static void mulPre(Fp6DblT& z, const Fp6& x, const Fp6& y)
 	{
@@ -1016,7 +1022,6 @@ struct Fp6DblT {
 		const Fp2& d = y.a;
 		const Fp2& e = y.b;
 		const Fp2& f = y.c;
-#if 1
 		Fp2Dbl& ZA = z.a;
 		Fp2Dbl& ZB = z.b;
 		Fp2Dbl& ZC = z.c;
@@ -1034,53 +1039,17 @@ struct Fp6DblT {
 		Fp2Dbl::mulPre(BE, b, e);
 		Fp2Dbl::mulPre(CF, c, f);
 		Fp2Dbl::mulPre(AD, a, d);
-		sub2(ZA, BE);
-		sub2(ZA, CF);
-		sub2(ZB, AD);
-		sub2(ZB, BE);
-		sub2(ZC, AD);
-		sub2(ZC, CF);
+		specialSub(ZA, BE);
+		specialSub(ZA, CF);
+		specialSub(ZB, AD);
+		specialSub(ZB, BE);
+		specialSub(ZC, AD);
+		specialSub(ZC, CF);
 		Fp2Dbl::mul_xi(ZA, ZA);
 		Fp2Dbl::add(ZA, ZA, AD);
 		Fp2Dbl::mul_xi(CF, CF);
 		Fp2Dbl::add(ZB, ZB, CF);
 		Fp2Dbl::add(ZC, ZC, BE);
-#else
-		Fp2Dbl& za = z.a;
-		Fp2Dbl& zb = z.b;
-		Fp2Dbl& zc = z.c;
-		Fp2Dbl BE;
-		Fp2Dbl::mulPre(za, a, d);
-		Fp2Dbl::mulPre(BE, b, e);
-		Fp2Dbl::mulPre(zb, c, f);
-
-		Fp2 t1, t2;
-		Fp2::add(t1, b, c);
-		Fp2::add(t2, e, f);
-		Fp2Dbl T1;
-		Fp2Dbl::mulPre(T1, t1, t2);
-		Fp2Dbl::sub(T1, T1, BE);
-		Fp2Dbl::sub(T1, T1, zb);
-		Fp2Dbl::mul_xi(T1, T1);
-
-		Fp2::add(t1, a, b);
-		Fp2::add(t2, e, d);
-		Fp2Dbl T2;
-		Fp2Dbl::mulPre(T2, t1, t2);
-		Fp2Dbl::sub(T2, T2, za);
-		Fp2Dbl::sub(T2, T2, BE);
-
-		Fp2::add(t1, a, c);
-		Fp2::add(t2, d, f);
-		Fp2Dbl::mulPre(zc, t1, t2);
-		Fp2Dbl::sub(zc, zc, za);
-		Fp2Dbl::sub(zc, zc, zb);
-
-		Fp2Dbl::add(za, za, T1);
-		Fp2Dbl::mul_xi(zb, zb);
-		Fp2Dbl::add(zb, zb, T2);
-		Fp2Dbl::add(zc, zc, BE);
-#endif
 //clk.end();
 	}
 	static void mod(Fp6& y, const Fp6Dbl& x)
diff --git a/test/common_test.hpp b/test/common_test.hpp
index ba990f1f..afe23eed 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -135,15 +135,8 @@ void testABCDsub(const Fp2& a, const Fp2& b, const Fp2& c, const Fp2& d)
 	Fp2Dbl::mulPre(T1, t1, t2);
 	Fp2Dbl::mulPre(AC, a, c);
 	Fp2Dbl::mulPre(BD, b, d);
-#if 0
-	Fp2Dbl::sub(T1, T1, AC);
-	Fp2Dbl::sub(T1, T1, BD);
-#else
-	FpDbl::sub(T1.a, T1.a, AC.a);
-	FpDbl::subPre(T1.b, T1.b, AC.b);
-	FpDbl::sub(T1.a, T1.a, BD.a);
-	FpDbl::subPre(T1.b, T1.b, BD.b);
-#endif
+	Fp6Dbl::specialSub(T1, AC);
+	Fp6Dbl::specialSub(T1, BD);
 	Fp2Dbl::mod(t1, T1);
 	CYBOZU_TEST_EQUAL(t1, a * d + b * c);
 }

From ed74c3bfdd0f4fb63f040b83c819b2395342a2b6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 16 Feb 2021 16:30:12 +0900
Subject: [PATCH 433/553] use mul2 in Fp6::mul

---
 include/mcl/fp_tower.hpp | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 55a2bbe5..c6987b33 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -915,9 +915,9 @@ struct Fp6T : public fp::Serializable<Fp6T<_Fp>,
 	{
 		Fp2 t1, t2, t3;
 		Fp2::mul(t1, x.a, x.b);
-		t1 += t1; // 2ab
+		Fp2::mul2(t1, t1); // 2ab
 		Fp2::mul(t2, x.b, x.c);
-		t2 += t2; // 2bc
+		Fp2::mul2(t2, t2); // 2bc
 		Fp2::sqr(t3, x.c); // c^2
 		Fp2::add(y.c, x.a, x.c); // a + c, destroy y.c
 		y.c += x.b; // a + b + c
@@ -1170,7 +1170,6 @@ struct Fp12T : public fp::Serializable<Fp12T<Fp>,
 		Fp6 t1, t2;
 		Fp6::add(t1, a, b);
 		Fp6::add(t2, c, d);
-#if 1
 		Fp6Dbl T, AC, BD;
 		Fp6Dbl::mulPre(AC, a, c);
 		Fp6Dbl::mulPre(BD, b, d);
@@ -1180,15 +1179,6 @@ struct Fp12T : public fp::Serializable<Fp12T<Fp>,
 		Fp6Dbl::sub(T, T, AC);
 		Fp6Dbl::sub(T, T, BD);
 		Fp6Dbl::mod(z.b, T);
-#else
-		Fp6 ac, bd;
-		t1 *= t2; // (a + b)(c + d)
-		Fp6::mul(ac, a, c);
-		Fp6::mul(bd, b, d);
-		mulVadd(z.a, bd, ac);
-		t1 -= ac;
-		Fp6::sub(z.b, t1, bd);
-#endif
 	}
 	/*
 		x = a + bw, w^2 = v

From 9f1df6d61c07f6ba4c8a57e621120ee8a05b561a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 16 Feb 2021 16:40:39 +0900
Subject: [PATCH 434/553] use mul2

---
 include/mcl/fp_tower.hpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index c6987b33..f03fd0d7 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -560,7 +560,6 @@ class Fp2T : public fp::Serializable<Fp2T<_Fp>,
 		const Fp& b = x.b;
 #if 1 // faster than using FpDbl
 		Fp t1, t2, t3;
-//		Fp::add(t1, b, b); // 2b
 		Fp::mul2(t1, b);
 		t1 *= a; // 2ab
 		Fp::add(t2, a, b); // a + b
@@ -905,6 +904,12 @@ struct Fp6T : public fp::Serializable<Fp6T<_Fp>,
 		Fp2::neg(y.b, x.b);
 		Fp2::neg(y.c, x.c);
 	}
+	static void mul2(Fp6T& y, const Fp6T& x)
+	{
+		Fp2::mul2(y.a, x.a);
+		Fp2::mul2(y.b, x.b);
+		Fp2::mul2(y.c, x.c);
+	}
 	/*
 		x = a + bv + cv^2, v^3 = xi
 		x^2 = (a^2 + 2bc xi) + (c^2 xi + 2ab)v + (b^2 + 2ac)v^2
@@ -1194,7 +1199,8 @@ struct Fp12T : public fp::Serializable<Fp12T<Fp>,
 		mulVadd(t1, b, a); // bv + a
 		t0 *= t1; // (a + b)(bv + a)
 		Fp6::mul(t1, a, b); // ab
-		Fp6::add(y.b, t1, t1); // 2ab
+//		Fp6::add(y.b, t1, t1); // 2ab
+		Fp6::mul2(y.b, t1); // 2ab
 		mulVadd(y.a, t1, t1); // abv + ab
 		Fp6::sub(y.a, t0, y.a);
 	}

From 0c2babc0c426f9181a70457627d29cddf2769649 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 16 Feb 2021 16:46:19 +0900
Subject: [PATCH 435/553] remove unused code

---
 include/mcl/fp_tower.hpp | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index f03fd0d7..493f6b8b 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -135,14 +135,6 @@ class FpDblT : public fp::Serializable<FpDblT<Fp> > {
 		if (mulSmallUnit(z, x, y)) return;
 		assert(0); // not supported y
 	}
-	static void sub_p_if_possible(FpDblT& y, const FpDblT& x)
-	{
-		const size_t N = Fp::op_.N;
-		const Unit *xv = &x.v_[N];
-		Unit *yv = &y.v_[N];
-		static const Unit zero[Fp::maxSize] = {};
-		Fp::op_.fp_add(yv, xv, zero, Fp::op_.p);
-	}
 	static void init()
 	{
 		const mcl::fp::Op& op = Fp::getOp();
@@ -702,11 +694,6 @@ struct Fp2DblT {
 #endif
 	void operator+=(const Fp2DblT& x) { add(*this, *this, x); }
 	void operator-=(const Fp2DblT& x) { sub(*this, *this, x); }
-	static void sub_p_if_possible(Fp2DblT& y, const Fp2DblT& x)
-	{
-		FpDbl::sub_p_if_possible(y.a, x.a);
-		FpDbl::sub_p_if_possible(y.b, x.b);
-	}
 	static void init()
  	{
 		assert(!Fp::getOp().isFullBit);
@@ -1199,7 +1186,6 @@ struct Fp12T : public fp::Serializable<Fp12T<Fp>,
 		mulVadd(t1, b, a); // bv + a
 		t0 *= t1; // (a + b)(bv + a)
 		Fp6::mul(t1, a, b); // ab
-//		Fp6::add(y.b, t1, t1); // 2ab
 		Fp6::mul2(y.b, t1); // 2ab
 		mulVadd(y.a, t1, t1); // abv + ab
 		Fp6::sub(y.a, t0, y.a);

From 988b1c6b4196aa446eccedc65c06eadd7c67c29f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 16 Feb 2021 16:57:54 +0900
Subject: [PATCH 436/553] v1.34

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index fc617982..7e314ae3 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x133; /* 0xABC = A.BC */
+static const int version = 0x134; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From b46aa28d8edf74786e182431b468b54e20005e07 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 17 Feb 2021 11:29:29 +0900
Subject: [PATCH 437/553] a little optimize Fp6::sqr

---
 include/mcl/fp_tower.hpp | 42 +++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 493f6b8b..c311aaee 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -905,24 +905,30 @@ struct Fp6T : public fp::Serializable<Fp6T<_Fp>,
 	*/
 	static void sqr(Fp6T& y, const Fp6T& x)
 	{
-		Fp2 t1, t2, t3;
-		Fp2::mul(t1, x.a, x.b);
-		Fp2::mul2(t1, t1); // 2ab
-		Fp2::mul(t2, x.b, x.c);
-		Fp2::mul2(t2, t2); // 2bc
-		Fp2::sqr(t3, x.c); // c^2
-		Fp2::add(y.c, x.a, x.c); // a + c, destroy y.c
-		y.c += x.b; // a + b + c
-		Fp2::sqr(y.b, y.c); // (a + b + c)^2, destroy y.b
-		y.b -= t2; // (a + b + c)^2 - 2bc
-		Fp2::mul_xi(t2, t2); // 2bc xi
-		Fp2::sqr(y.a, x.a); // a^2, destroy y.a
-		y.b -= y.a; // (a + b + c)^2 - 2bc - a^2
-		y.a += t2; // a^2 + 2bc xi
-		Fp2::sub(y.c, y.b, t3); // (a + b + c)^2 - 2bc - a^2 - c^2
-		Fp2::mul_xi(y.b, t3); // c^2 xi
-		y.b += t1; // c^2 xi + 2ab
-		y.c -= t1; // b^2 + 2ac
+		const Fp2& a = x.a;
+		const Fp2& b = x.b;
+		const Fp2& c = x.c;
+		Fp2 t;
+		Fp2Dbl BC2, AB2, AA, CC, T;
+		Fp2::mul2(t, b);
+		Fp2Dbl::mulPre(BC2, t, c); // 2bc
+		Fp2Dbl::mulPre(AB2, t, a); // 2ab
+		Fp2Dbl::sqrPre(AA, a);
+		Fp2Dbl::sqrPre(CC, c);
+		Fp2::add(t, a, b);
+		Fp2::add(t, t, c);
+		Fp2Dbl::sqrPre(T, t); // (a + b + c)^2
+		Fp2Dbl::sub(T, T, AA);
+		Fp2Dbl::sub(T, T, BC2);
+		Fp2Dbl::sub(T, T, CC);
+		Fp2Dbl::sub(T, T, AB2);
+		Fp2Dbl::mod(y.c, T);
+		Fp2Dbl::mul_xi(BC2, BC2);
+		Fp2Dbl::add(AA, AA, BC2);
+		Fp2Dbl::mod(y.a, AA);
+		Fp2Dbl::mul_xi(CC, CC);
+		Fp2Dbl::add(CC, CC, AB2);
+		Fp2Dbl::mod(y.b, CC);
 	}
 	static inline void mul(Fp6T& z, const Fp6T& x, const Fp6T& y);
 	/*

From 371399aa14ea35b85c3758d6f229e25de9f18469 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 17 Feb 2021 18:22:11 +0900
Subject: [PATCH 438/553] optimize Fp2Dbl::mul_xi

---
 include/mcl/fp_tower.hpp | 43 ++++++++++++++++++----------
 include/mcl/op.hpp       |  2 ++
 src/fp_generator.hpp     | 60 ++++++++++++++++++++++++++++++++++++++++
 src/fp_static_code.hpp   |  2 ++
 test/common_test.hpp     | 23 +++++++++++++++
 5 files changed, 116 insertions(+), 14 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index c311aaee..f79cba74 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -16,6 +16,7 @@ class FpDblT : public fp::Serializable<FpDblT<Fp> > {
 	Unit v_[Fp::maxSize * 2];
 public:
 	static size_t getUnitSize() { return Fp::op_.N * 2; }
+	const fp::Unit *getUnit() const { return v_; }
 	void dump() const
 	{
 		const size_t n = getUnitSize();
@@ -662,25 +663,26 @@ struct Fp2DblT {
 		FpDbl::neg(y.a, x.a);
 		FpDbl::neg(y.b, x.b);
 	}
-	static void mul_xi(Fp2DblT& y, const Fp2DblT& x)
+	static void mul_xi_1C(Fp2DblT& y, const Fp2DblT& x)
+	{
+		FpDbl t;
+		FpDbl::add(t, x.a, x.b);
+		FpDbl::sub(y.a, x.a, x.b);
+		y.b = t;
+	}
+	static void mul_xi_genericC(Fp2DblT& y, const Fp2DblT& x)
 	{
 		const uint32_t xi_a = Fp2::get_xi_a();
-		if (xi_a == 1) {
-			FpDbl t;
-			FpDbl::add(t, x.a, x.b);
-			FpDbl::sub(y.a, x.a, x.b);
-			y.b = t;
-		} else {
-			FpDbl t;
-			FpDbl::mulUnit(t, x.a, xi_a);
-			FpDbl::sub(t, t, x.b);
-			FpDbl::mulUnit(y.b, x.b, xi_a);
-			FpDbl::add(y.b, y.b, x.a);
-			y.a = t;
-		}
+		FpDbl t;
+		FpDbl::mulUnit(t, x.a, xi_a);
+		FpDbl::sub(t, t, x.b);
+		FpDbl::mulUnit(y.b, x.b, xi_a);
+		FpDbl::add(y.b, y.b, x.a);
+		y.a = t;
 	}
 	static void (*mulPre)(Fp2DblT&, const Fp2&, const Fp2&);
 	static void (*sqrPre)(Fp2DblT&, const Fp2&);
+	static void (*mul_xi)(Fp2DblT&, const Fp2DblT&);
 	static void mod(Fp2& y, const Fp2DblT& x)
 	{
 		FpDbl::mod(y.a, x.a);
@@ -716,6 +718,18 @@ struct Fp2DblT {
 				sqrPre = fp2Dbl_sqrPreW<false>;
 			}
 		}
+		const uint32_t xi_a = Fp2::get_xi_a();
+		switch (xi_a) {
+		case 1:
+			mul_xi = mul_xi_1C;
+			if (op.fp2Dbl_mul_xiA_) {
+				mul_xi = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2DblT&)>(op.fp2Dbl_mul_xiA_);
+			}
+			break;
+		default:
+			mul_xi = mul_xi_genericC;
+			break;
+		}
 	}
 	/*
 		Fp2Dbl::mulPre by FpDblT
@@ -770,6 +784,7 @@ struct Fp2DblT {
 
 template<class Fp> void (*Fp2DblT<Fp>::mulPre)(Fp2DblT&, const Fp2T<Fp>&, const Fp2T<Fp>&);
 template<class Fp> void (*Fp2DblT<Fp>::sqrPre)(Fp2DblT&, const Fp2T<Fp>&);
+template<class Fp> void (*Fp2DblT<Fp>::mul_xi)(Fp2DblT<Fp>&, const Fp2DblT<Fp>&);
 
 template<class Fp> Fp2T<Fp> Fp2T<Fp>::g[Fp2T<Fp>::gN];
 template<class Fp> Fp2T<Fp> Fp2T<Fp>::g2[Fp2T<Fp>::gN];
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 7e314ae3..76cf7f0b 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -226,6 +226,7 @@ struct Op {
 	void2u fpDbl_modA_;
 	void3u fp2Dbl_mulPreA_;
 	void2u fp2Dbl_sqrPreA_;
+	void2u fp2Dbl_mul_xiA_;
 	size_t maxN;
 	size_t N;
 	size_t bitSize;
@@ -314,6 +315,7 @@ struct Op {
 		fpDbl_modA_ = 0;
 		fp2Dbl_mulPreA_ = 0;
 		fp2Dbl_sqrPreA_ = 0;
+		fp2Dbl_mul_xiA_ = 0;
 		maxN = 0;
 		N = 0;
 		bitSize = 0;
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index f5e34cf5..cfbb53f8 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -457,6 +457,10 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		op.fp2Dbl_sqrPreA_ = gen_fp2Dbl_sqrPre();
 		if (op.fp2Dbl_sqrPreA_) setFuncInfo(prof_, suf, "2Dbl_sqrPre", op.fp2Dbl_sqrPreA_, getCurr());
 
+		align(16);
+		op.fp2Dbl_mul_xiA_ = gen_fp2Dbl_mul_xi();
+		if (op.fp2Dbl_mul_xiA_) setFuncInfo(prof_, suf, "2Dbl_mul_xi", op.fp2Dbl_mul_xiA_, getCurr());
+
 		align(16);
 		op.fp2_mulA_ = gen_fp2_mul();
 		setFuncInfo(prof_, suf, "2_mul", op.fp2_mulA_, getCurr());
@@ -3173,6 +3177,13 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			}
 		}
 	}
+	// y[i] &= t
+	void andPack(const Pack& y, const Reg64& t)
+	{
+		for (int i = 0; i < (int)y.size(); i++) {
+			and_(y[i], t);
+		}
+	}
 	/*
 		[rdx:x:t0] <- py[1:0] * x
 		destroy x, t
@@ -3647,6 +3658,55 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		call(mulPreL);
 		return func;
 	}
+	void2u gen_fp2Dbl_mul_xi()
+	{
+		if (isFullBit_) return 0;
+		if (op_->xi_a != 1) return 0;
+		void2u func = getCurr<void2u>();
+		// y = (x.a - x.b, x.a + x.b)
+		StackFrame sf(this, 2, pn_ * 2, FpByte_ * 2);
+		Pack t1 = sf.t.sub(0, pn_);
+		Pack t2 = sf.t.sub(pn_, pn_);
+		const RegExp& ya = sf.p[0];
+		const RegExp& yb = sf.p[0] + FpByte_ * 2;
+		const RegExp& xa = sf.p[1];
+		const RegExp& xb = sf.p[1] + FpByte_ * 2;
+		// [rsp] = x.a + x.b
+		for (int i = 0; i < pn_ * 2; i++) {
+			mov(rax, ptr[xa + i * 8]);
+			if (i == 0) {
+				add(rax, ptr[xb + i * 8]);
+			} else {
+				adc(rax, ptr[xb + i * 8]);
+			}
+			mov(ptr[rsp + i * 8], rax);
+		}
+		// low : x.a =  x.a - x.b
+		load_rm(t1, xa);
+		sub_rm(t1, xb);
+		store_mr(ya, t1);
+		// high : x.a = (x.a - x.b) % p
+		load_rm(t1, xa + FpByte_);
+		sub_rm(t1, xb + FpByte_, true);
+		lea(rax, ptr[rip + pL_]);
+		load_rm(t2, rax); // t2 = p
+		sbb(rax, rax);
+		andPack(t2, rax);
+		add_rr(t1, t2); // mod p
+		store_mr(ya + FpByte_, t1);
+
+		// low : y.b = [rsp]
+		for (int i = 0; i < pn_; i++) {
+			mov(rax, ptr[rsp + i * 8]);
+			mov(ptr[yb + i * 8], rax);
+		}
+		// high : y.b = (x.a + x.b) % p
+		load_rm(t1, rsp + FpByte_);
+		lea(rax, ptr[rip + pL_]);
+		sub_p_mod(t2, t1, rax);
+		store_mr(yb + FpByte_, t2);
+		return func;
+	}
 	void gen_fp2_add4()
 	{
 		assert(!isFullBit_);
diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
index d3965260..ef27f1bc 100644
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@@ -38,6 +38,7 @@ void mclx_Fp2_mul(Unit*, const Unit*, const Unit*);
 void mclx_Fp2_sqr(Unit*, const Unit*);
 void mclx_Fp2_mul2(Unit*, const Unit*);
 void mclx_Fp2_mul_xi(Unit*, const Unit*);
+void mclx_Fp2Dbl_mul_xi(Unit*, const Unit*);
 
 Unit mclx_Fr_addPre(Unit*, const Unit*, const Unit*);
 Unit mclx_Fr_subPre(Unit*, const Unit*, const Unit*);
@@ -79,6 +80,7 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fp2_sqrA_ = mclx_Fp2_sqr;
 		op.fp2_mul2A_ = mclx_Fp2_mul2;
 		op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
+		op.fp2Dbl_mul_xiA_ = mclx_Fp2Dbl_mul_xi;
 		op.fp_preInv = mclx_Fp_preInv;
 	} else {
 		// Fr, sizeof(Fr) = 32
diff --git a/test/common_test.hpp b/test/common_test.hpp
index afe23eed..0781ebb0 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -161,8 +161,31 @@ void testABCD()
 	}
 }
 
+void testFp2Dbl_mul_xi1()
+{
+	if (Fp2::get_xi_a() != 1) return;
+	puts("testFp2Dbl_mul_xi1");
+	cybozu::XorShift rg;
+	for (int i = 0; i < 100; i++) {
+		Fp a1, a2;
+		a1.setByCSPRNG(rg);
+		a2.setByCSPRNG(rg);
+		Fp2Dbl x;
+		FpDbl::mulPre(x.a, a1, a2);
+		a1.setByCSPRNG(rg);
+		a2.setByCSPRNG(rg);
+		FpDbl::mulPre(x.b, a1, a2);
+		Fp2Dbl ok;
+		Fp2Dbl::mul_xi_1C(x, x);
+		Fp2Dbl::mul_xi(x, x);
+		CYBOZU_TEST_EQUAL_ARRAY(ok.a.getUnit(), x.a.getUnit(), ok.a.getUnitSize());
+		CYBOZU_TEST_EQUAL_ARRAY(ok.b.getUnit(), x.b.getUnit(), ok.b.getUnitSize());
+	}
+}
+
 void testCommon(const G1& P, const G2& Q)
 {
+	testFp2Dbl_mul_xi1();
 	testABCD();
 	testMul2();
 	puts("G1");

From 8bd785dab516f285f6c33decb50dcc9d1b466487 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 17 Feb 2021 20:57:21 +0900
Subject: [PATCH 439/553] fix test of Fp2Dbl::mul_xi

---
 test/common_test.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/common_test.hpp b/test/common_test.hpp
index 0781ebb0..40f67c1f 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -176,7 +176,7 @@ void testFp2Dbl_mul_xi1()
 		a2.setByCSPRNG(rg);
 		FpDbl::mulPre(x.b, a1, a2);
 		Fp2Dbl ok;
-		Fp2Dbl::mul_xi_1C(x, x);
+		Fp2Dbl::mul_xi_1C(ok, x);
 		Fp2Dbl::mul_xi(x, x);
 		CYBOZU_TEST_EQUAL_ARRAY(ok.a.getUnit(), x.a.getUnit(), ok.a.getUnitSize());
 		CYBOZU_TEST_EQUAL_ARRAY(ok.b.getUnit(), x.b.getUnit(), ok.b.getUnitSize());

From a7f2f656f898d10f97f40d4d13fd3dbbffbe07db Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 17 Feb 2021 20:58:03 +0900
Subject: [PATCH 440/553] fix generic fp2Dbl_mul_xi

---
 src/fp_generator.hpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index cfbb53f8..400c11db 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -3662,6 +3662,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	{
 		if (isFullBit_) return 0;
 		if (op_->xi_a != 1) return 0;
+		if (pn_ > 6) return 0;
 		void2u func = getCurr<void2u>();
 		// y = (x.a - x.b, x.a + x.b)
 		StackFrame sf(this, 2, pn_ * 2, FpByte_ * 2);
@@ -3686,14 +3687,14 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		sub_rm(t1, xb);
 		store_mr(ya, t1);
 		// high : x.a = (x.a - x.b) % p
-		load_rm(t1, xa + FpByte_);
-		sub_rm(t1, xb + FpByte_, true);
+		load_rm(t1, xa + pn_ * 8);
+		sub_rm(t1, xb + pn_ * 8, true);
 		lea(rax, ptr[rip + pL_]);
 		load_rm(t2, rax); // t2 = p
 		sbb(rax, rax);
 		andPack(t2, rax);
 		add_rr(t1, t2); // mod p
-		store_mr(ya + FpByte_, t1);
+		store_mr(ya + pn_ * 8, t1);
 
 		// low : y.b = [rsp]
 		for (int i = 0; i < pn_; i++) {
@@ -3701,10 +3702,10 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			mov(ptr[yb + i * 8], rax);
 		}
 		// high : y.b = (x.a + x.b) % p
-		load_rm(t1, rsp + FpByte_);
+		load_rm(t1, rsp + pn_ * 8);
 		lea(rax, ptr[rip + pL_]);
 		sub_p_mod(t2, t1, rax);
-		store_mr(yb + FpByte_, t2);
+		store_mr(yb + pn_ * 8, t2);
 		return func;
 	}
 	void gen_fp2_add4()

From d2b2ad8144498b2bb62463e5a651ea82ed9d583f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 17 Feb 2021 20:58:40 +0900
Subject: [PATCH 441/553] use llvm-add instead of C for Mul2

---
 src/fp.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/fp.cpp b/src/fp.cpp
index ee71e5c6..3d7eed31 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -264,6 +264,9 @@ struct SetFpDbl<N, true> {
 template<size_t N, bool isFullBit>
 void Mul2(Unit *y, const Unit *x, const Unit *p)
 {
+#ifdef MCL_USE_LLVM
+	Add<N, isFullBit, Ltag>::f(y, x, x, p);
+#else
 	const size_t bit = 1;
 	const size_t rBit = sizeof(Unit) * 8 - bit;
 	Unit tmp[N];
@@ -285,6 +288,7 @@ void Mul2(Unit *y, const Unit *x, const Unit *p)
 	if (c) {
 		copyC<N>(y, tmp);
 	}
+#endif
 }
 
 template<size_t N, class Tag, bool enableFpDbl, bool gmpIsFasterThanLLVM>

From b19156bf5fdc62317a4e6cd92f6961d2272f6ab1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 18 Feb 2021 09:09:55 +0900
Subject: [PATCH 442/553] disable bench in debug mode

---
 test/bench.hpp         | 12 ++++++++++++
 test/bls12_test.cpp    | 12 ++++++++++++
 test/fp_tower_test.cpp |  4 ++++
 test/paillier_test.cpp |  4 ++++
 test/she_test.cpp      |  5 ++++-
 5 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/test/bench.hpp b/test/bench.hpp
index 660aa133..69aff500 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -3,6 +3,10 @@
 void benchAddDblG1()
 {
 	puts("benchAddDblG1");
+#ifndef NDEBUG
+	puts("skip in debug");
+	return;
+#endif
 	const int C = 100000;
 	G1 P1, P2, P3;
 	hashAndMapToG1(P1, "a");
@@ -29,6 +33,10 @@ void benchAddDblG1()
 void benchAddDblG2()
 {
 	puts("benchAddDblG2");
+#ifndef NDEBUG
+	puts("skip in debug");
+	return;
+#endif
 	const int C = 100000;
 	G2 P1, P2, P3;
 	hashAndMapToG2(P1, "a");
@@ -61,6 +69,10 @@ void invAdd(T& out, const T& x, const T& y)
 
 void testBench(const G1& P, const G2& Q)
 {
+#ifndef NDEBUG
+	puts("testBench skip in debug");
+	return;
+#endif
 	G1 Pa;
 	G2 Qa;
 	Fp12 e1, e2;
diff --git a/test/bls12_test.cpp b/test/bls12_test.cpp
index ec4204ca..e6c691a1 100644
--- a/test/bls12_test.cpp
+++ b/test/bls12_test.cpp
@@ -431,6 +431,10 @@ const char *e1Str =
 	finalExp(e2, e0);
 //	finalExpC(e2, e0);
 	CYBOZU_TEST_EQUAL(e1, e2);
+#ifndef NDEBUG
+	puts("skip bench of finalExp in debug");
+	return;
+#endif
 	CYBOZU_BENCH_C("finalExp", 100, finalExp, e2, e0);
 }
 
@@ -685,6 +689,10 @@ void testCurve(const mcl::CurveParam& cp)
 }
 CYBOZU_TEST_AUTO(multi)
 {
+#ifndef NDEBUG
+	puts("skip multi in debug");
+	return;
+#endif
 	G1 P;
 	G2 Q;
 	int i;
@@ -722,6 +730,10 @@ CYBOZU_TEST_AUTO(deserialize)
 	size_t n2 = Q.serialize(buf2, sizeof(buf2));
 	CYBOZU_TEST_ASSERT(n2 > 0);
 	CYBOZU_TEST_EQUAL(Q.deserialize(buf2, n2), n2);
+#ifndef NDEBUG
+	puts("skip bench in debug");
+	return;
+#endif
 	for (int i = 0; i < 2; i++) {
 		bool doVerify = i == 0;
 		printf("verifyOrder(%d)\n", doVerify);
diff --git a/test/fp_tower_test.cpp b/test/fp_tower_test.cpp
index 5dd3a70e..3a456189 100644
--- a/test/fp_tower_test.cpp
+++ b/test/fp_tower_test.cpp
@@ -378,6 +378,10 @@ void testIo()
 void benchFp2()
 {
 	puts(__FUNCTION__);
+#ifndef NDEBUG
+	puts("skip bench in debug");
+	return;
+#endif
 	Fp2 x, y;
 	x.a.setStr("4");
 	x.b.setStr("464652165165");
diff --git a/test/paillier_test.cpp b/test/paillier_test.cpp
index 31d2b26f..7bb6e99e 100644
--- a/test/paillier_test.cpp
+++ b/test/paillier_test.cpp
@@ -5,7 +5,11 @@ CYBOZU_TEST_AUTO(paillier)
 {
 	using namespace mcl::paillier;
 	SecretKey sec;
+#ifndef NDEBUG
+	sec.init(512);
+#else
 	sec.init(2048);
+#endif
 	PublicKey pub;
 	sec.getPublicKey(pub);
 	mpz_class m1("12342340928409"), m2("23049820498204");
diff --git a/test/she_test.cpp b/test/she_test.cpp
index 9ef51c75..f49f29f9 100644
--- a/test/she_test.cpp
+++ b/test/she_test.cpp
@@ -43,6 +43,9 @@ double clk2msec(const cybozu::CpuClock& clk, int n)
 
 CYBOZU_TEST_AUTO(bench2)
 {
+#ifndef NDEBUG
+	puts("skip bench2 in debug");
+#endif
 	puts("msec");
 	setTryNum(1 << 16);
 	useDecG1ViaGT(true);
@@ -571,7 +574,7 @@ CYBOZU_TEST_AUTO(io)
 	}
 }
 
-#ifndef PAPER
+#if !defined(PAPER) && defined(NDEBUG)
 CYBOZU_TEST_AUTO(bench)
 {
 	const SecretKey& sec = g_sec;

From b226409e4aee68ea3090c78b1f88ae1b2a7a3a70 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 18 Feb 2021 10:33:33 +0900
Subject: [PATCH 443/553] prepare for fp2Dbl_{mul,sqr}Pre

---
 src/fp_static_code.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
index ef27f1bc..bb7ab4b6 100644
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@@ -38,6 +38,8 @@ void mclx_Fp2_mul(Unit*, const Unit*, const Unit*);
 void mclx_Fp2_sqr(Unit*, const Unit*);
 void mclx_Fp2_mul2(Unit*, const Unit*);
 void mclx_Fp2_mul_xi(Unit*, const Unit*);
+void mclx_Fp2Dbl_mulPre(Unit*, const Unit*, const Unit*);
+void mclx_Fp2Dbl_sqrPre(Unit*, const Unit*);
 void mclx_Fp2Dbl_mul_xi(Unit*, const Unit*);
 
 Unit mclx_Fr_addPre(Unit*, const Unit*, const Unit*);
@@ -80,6 +82,8 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fp2_sqrA_ = mclx_Fp2_sqr;
 		op.fp2_mul2A_ = mclx_Fp2_mul2;
 		op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
+		op.fp2Dbl_mulPreA_ = 0;//mclx_Fp2Dbl_mulPre;
+		op.fp2Dbl_sqrPreA_ = 0;//mclx_Fp2Dbl_sqrPre;
 		op.fp2Dbl_mul_xiA_ = mclx_Fp2Dbl_mul_xi;
 		op.fp_preInv = mclx_Fp_preInv;
 	} else {

From 7c042156d9cfc279439f2e11863893ab040de238 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 18 Feb 2021 10:46:32 +0900
Subject: [PATCH 444/553] add -fPIC to cmake

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index df7e4f11..04c85897 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -154,7 +154,7 @@ if(MSVC)
 else()
 	# Set compiler flags for warnings
 	set(MCL_COMPILE_OPTIONS -Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align
-		-Wwrite-strings -Wfloat-equal -Wpointer-arith -march=native)
+		-Wwrite-strings -Wfloat-equal -Wpointer-arith -DNDEBUG -O3 -fPIC)
 
 	target_compile_options(mcl PRIVATE ${MCL_COMPILE_OPTIONS})
 	target_compile_options(mcl_st PRIVATE ${MCL_COMPILE_OPTIONS})

From d3d15040639577229df82b1c1f8a8490a6c3b544 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 18 Feb 2021 16:30:32 +0900
Subject: [PATCH 445/553] change build status to travis-ci.com

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 7a455409..f557f75e 100644
--- a/readme.md
+++ b/readme.md
@@ -1,4 +1,4 @@
-[![Build Status](https://travis-ci.org/herumi/mcl.png)](https://travis-ci.org/herumi/mcl)
+[![Build Status](https://api.travis-ci.com/herumi/mcl.svg?branch=master)](https://travis-ci.com/github/herumi/mcl)
 
 # mcl
 

From 765ca7b89e48f219ea6a9020ca35422ea7287007 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Feb 2021 14:53:26 +0900
Subject: [PATCH 446/553] under refactoring fp_add

---
 src/fp_generator.hpp | 86 ++++++++++++++++++++++++++++++++------------
 1 file changed, 63 insertions(+), 23 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 400c11db..b461d53a 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -252,6 +252,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	Label mulPreL;
 	Label fpDbl_modL;
 	Label fp_mulL;
+	Label fp_addL;
 	const uint64_t *p_;
 	uint64_t rp_;
 	int pn_;
@@ -490,12 +491,13 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void gen_raw_add(const RegExp& pz, const RegExp& px, const RegExp& py, const Reg64& t, int n)
 	{
-		mov(t, ptr [px]);
-		add(t, ptr [py]);
-		mov(ptr [pz], t);
-		for (int i = 1; i < n; i++) {
+		for (int i = 0; i < n; i++) {
 			mov(t, ptr [px + i * 8]);
-			adc(t, ptr [py + i * 8]);
+			if (i == 0) {
+				add(t, ptr [py + i * 8]);
+			} else {
+				adc(t, ptr [py + i * 8]);
+			}
 			mov(ptr [pz + i * 8], t);
 		}
 	}
@@ -659,19 +661,12 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64 *fullReg = isFullBit_ ? &t[pn_ * 2] : 0;
 		load_rm(p0, px);
 		add_rm(p0, py, withCarry);
-		mov_rr(p1, p0);
 		if (isFullBit_) {
 			mov(*fullReg, 0);
 			adc(*fullReg, 0);
 		}
 		lea(rax, ptr[rip+pL_]);
-		sub_rm(p1, rax);
-		if (fullReg) {
-			sbb(*fullReg, 0);
-		}
-		for (size_t i = 0; i < p1.size(); i++) {
-			cmovc(p1[i], p0[i]);
-		}
+		sub_p_mod(p1, p0, rax, fullReg);
 		store_mr(pz, p1);
 	}
 	/*
@@ -738,8 +733,36 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	L(exit);
 		store_mr(pz, t1);
 	}
+	void gen_raw_fp_add6_2(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry = false, const Reg64 *H = 0)
+	{
+		const Pack& t1 = t.sub(0, pn_);
+		const Pack& t2 = t.sub(pn_, pn_);
+		load_rm(t1, px);
+		add_rm(t1, py, withCarry);
+		if (H) {
+			mov(*H, 0);
+			adc(*H, 0);
+		}
+		sub_p_mod(t2, t1, rip + pL_, H);
+		store_mr(pz, t2);
+	}
 	void gen_fp_add6()
 	{
+#if 1
+		const int n = pn_ * 2 - 2;
+		StackFrame sf(this, 3, n | UseRDX, 0, false);
+		call(fp_addL);
+		sf.close();
+		const Reg64& pz = sf.p[0];
+		const Reg64& px = sf.p[1];
+		const Reg64& py = sf.p[2];
+		Pack t = sf.t;
+		t.append(rdx);
+		t.append(rax);
+	L(fp_addL);
+		gen_raw_fp_add6_2(pz, px, py, t);
+		ret();
+#else
 		/*
 			cmov is faster than jmp
 		*/
@@ -752,10 +775,33 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		t2.append(rax);
 		t2.append(px); // destory after used
 		gen_raw_fp_add6(pz, px, py, t1, t2, false);
+#endif
 	}
 	void3u gen_fp_add()
 	{
+		if (!(pn_ < 6 || (pn_ == 6 && !isFullBit_))) return 0;
 		void3u func = getCurr<void3u>();
+#if 1
+		int n = pn_ * 2 - 2;
+		if (isFullBit_) {
+			n++;
+		}
+		StackFrame sf(this, 3, n | UseRDX, 0, false);
+		call(fp_addL);
+		sf.close();
+
+		const Reg64& pz = sf.p[0];
+		const Reg64& px = sf.p[1];
+		const Reg64& py = sf.p[2];
+		Pack t = sf.t;
+		t.append(rdx);
+		t.append(rax);
+		const Reg64 *H = isFullBit_ ? &t[t.size() - 1] : 0;
+	L(fp_addL);
+		gen_raw_fp_add6_2(pz, px, py, t, false, H);
+		ret();
+		return func;
+#else
 		if (pn_ <= 4) {
 			gen_fp_add_le4();
 			return func;
@@ -799,6 +845,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 #endif
 		outLocalLabel();
 		return func;
+#endif
 	}
 	void3u gen_fpDbl_add()
 	{
@@ -944,7 +991,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	/*
 		y = (x >= p[]) x - p[] : x
 	*/
-	void sub_p_mod(const Pack& y, const Pack& x, const RegExp& p, const Reg64 *H = 0)
+	template<class ADDR>
+	void sub_p_mod(const Pack& y, const Pack& x, const ADDR& p, const Reg64 *H = 0)
 	{
 		mov_rr(y, x);
 		sub_rm(y, p);
@@ -3673,15 +3721,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const RegExp& xa = sf.p[1];
 		const RegExp& xb = sf.p[1] + FpByte_ * 2;
 		// [rsp] = x.a + x.b
-		for (int i = 0; i < pn_ * 2; i++) {
-			mov(rax, ptr[xa + i * 8]);
-			if (i == 0) {
-				add(rax, ptr[xb + i * 8]);
-			} else {
-				adc(rax, ptr[xb + i * 8]);
-			}
-			mov(ptr[rsp + i * 8], rax);
-		}
+		gen_raw_add(rsp, xa, xb, rax, pn_ * 2);
 		// low : x.a =  x.a - x.b
 		load_rm(t1, xa);
 		sub_rm(t1, xb);

From acd4ef5aadf2461218a22b1e65057948bd64ee75 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Feb 2021 17:08:48 +0900
Subject: [PATCH 447/553] under refactoring fp_add

---
 src/fp_generator.hpp | 66 +++++++++++++++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 19 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index b461d53a..5d4c55fc 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -252,7 +252,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	Label mulPreL;
 	Label fpDbl_modL;
 	Label fp_mulL;
-	Label fp_addL;
 	const uint64_t *p_;
 	uint64_t rp_;
 	int pn_;
@@ -733,7 +732,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	L(exit);
 		store_mr(pz, t1);
 	}
-	void gen_raw_fp_add6_2(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry = false, const Reg64 *H = 0)
+	void gen_raw_fp_add_2(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry = false, const Reg64 *H = 0)
 	{
 		const Pack& t1 = t.sub(0, pn_);
 		const Pack& t2 = t.sub(pn_, pn_);
@@ -750,18 +749,14 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	{
 #if 1
 		const int n = pn_ * 2 - 2;
-		StackFrame sf(this, 3, n | UseRDX, 0, false);
-		call(fp_addL);
-		sf.close();
+		StackFrame sf(this, 3, n | UseRDX);
 		const Reg64& pz = sf.p[0];
 		const Reg64& px = sf.p[1];
 		const Reg64& py = sf.p[2];
 		Pack t = sf.t;
 		t.append(rdx);
 		t.append(rax);
-	L(fp_addL);
-		gen_raw_fp_add6_2(pz, px, py, t);
-		ret();
+		gen_raw_fp_add_2(pz, px, py, t);
 #else
 		/*
 			cmov is faster than jmp
@@ -782,24 +777,19 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		if (!(pn_ < 6 || (pn_ == 6 && !isFullBit_))) return 0;
 		void3u func = getCurr<void3u>();
 #if 1
-		int n = pn_ * 2 - 2;
+		int n = pn_ * 2 - 1;
 		if (isFullBit_) {
 			n++;
 		}
-		StackFrame sf(this, 3, n | UseRDX, 0, false);
-		call(fp_addL);
-		sf.close();
+		StackFrame sf(this, 3, n);
 
 		const Reg64& pz = sf.p[0];
 		const Reg64& px = sf.p[1];
 		const Reg64& py = sf.p[2];
 		Pack t = sf.t;
-		t.append(rdx);
 		t.append(rax);
-		const Reg64 *H = isFullBit_ ? &t[t.size() - 1] : 0;
-	L(fp_addL);
-		gen_raw_fp_add6_2(pz, px, py, t, false, H);
-		ret();
+		const Reg64 *H = isFullBit_ ? &rax : 0;
+		gen_raw_fp_add_2(pz, px, py, t, false, H);
 		return func;
 #else
 		if (pn_ <= 4) {
@@ -849,6 +839,24 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fpDbl_add()
 	{
+#if 1
+		if (!(pn_ < 6 || (pn_ == 6 && !isFullBit_))) return 0;
+		void3u func = getCurr<void3u>();
+		int n = pn_ * 2 - 1;
+		if (isFullBit_) {
+			n++;
+		}
+		StackFrame sf(this, 3, n);
+		const Reg64& pz = sf.p[0];
+		const Reg64& px = sf.p[1];
+		const Reg64& py = sf.p[2];
+		Pack t = sf.t;
+		t.append(rax);
+		const Reg64 *H = isFullBit_ ? &rax : 0;
+		gen_raw_add(pz, px, py, rax, pn_);
+		gen_raw_fp_add_2(pz + 8 * pn_, px + 8 * pn_, py + 8 * pn_, t, true, H);
+		return func;
+#else
 		void3u func = getCurr<void3u>();
 		if (pn_ <= 4) {
 			int tn = pn_ * 2 + (isFullBit_ ? 1 : 0);
@@ -873,6 +881,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			return func;
 		}
 		return 0;
+#endif
 	}
 	void3u gen_fpDbl_sub()
 	{
@@ -3226,7 +3235,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		}
 	}
 	// y[i] &= t
-	void andPack(const Pack& y, const Reg64& t)
+	void and_pr(const Pack& y, const Reg64& t)
 	{
 		for (int i = 0; i < (int)y.size(); i++) {
 			and_(y[i], t);
@@ -3732,7 +3741,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		lea(rax, ptr[rip + pL_]);
 		load_rm(t2, rax); // t2 = p
 		sbb(rax, rax);
-		andPack(t2, rax);
+		and_pr(t2, rax);
 		add_rr(t1, t2); // mod p
 		store_mr(ya + pn_ * 8, t1);
 
@@ -3784,6 +3793,24 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fp2_add()
 	{
+#if 1
+		if (!(pn_ < 6 || (pn_ == 6 && !isFullBit_))) return 0;
+		void3u func = getCurr<void3u>();
+		int n = pn_ * 2 - 1;
+		if (isFullBit_) {
+			n++;
+		}
+		StackFrame sf(this, 3, n);
+		const Reg64& pz = sf.p[0];
+		const Reg64& px = sf.p[1];
+		const Reg64& py = sf.p[2];
+		Pack t = sf.t;
+		t.append(rax);
+		const Reg64 *H = isFullBit_ ? &rax : 0;
+		gen_raw_fp_add_2(pz, px, py, t, false, H);
+		gen_raw_fp_add_2(pz + FpByte_, px + FpByte_, py + FpByte_, t, false, H);
+		return func;
+#else
 		void3u func = getCurr<void3u>();
 		if (pn_ == 4 && !isFullBit_) {
 			gen_fp2_add4();
@@ -3794,6 +3821,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			return func;
 		}
 		return 0;
+#endif
 	}
 	void3u gen_fp2_sub()
 	{

From d59dd09a344661779eb2dd407c6a9dcf2c03bc76 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Feb 2021 17:12:10 +0900
Subject: [PATCH 448/553] remove unused code

---
 src/fp_generator.hpp | 136 -------------------------------------------
 1 file changed, 136 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 5d4c55fc..79c8768a 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -745,38 +745,10 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		sub_p_mod(t2, t1, rip + pL_, H);
 		store_mr(pz, t2);
 	}
-	void gen_fp_add6()
-	{
-#if 1
-		const int n = pn_ * 2 - 2;
-		StackFrame sf(this, 3, n | UseRDX);
-		const Reg64& pz = sf.p[0];
-		const Reg64& px = sf.p[1];
-		const Reg64& py = sf.p[2];
-		Pack t = sf.t;
-		t.append(rdx);
-		t.append(rax);
-		gen_raw_fp_add_2(pz, px, py, t);
-#else
-		/*
-			cmov is faster than jmp
-		*/
-		StackFrame sf(this, 3, 10);
-		const Reg64& pz = sf.p[0];
-		const Reg64& px = sf.p[1];
-		const Reg64& py = sf.p[2];
-		Pack t1 = sf.t.sub(0, 6);
-		Pack t2 = sf.t.sub(6);
-		t2.append(rax);
-		t2.append(px); // destory after used
-		gen_raw_fp_add6(pz, px, py, t1, t2, false);
-#endif
-	}
 	void3u gen_fp_add()
 	{
 		if (!(pn_ < 6 || (pn_ == 6 && !isFullBit_))) return 0;
 		void3u func = getCurr<void3u>();
-#if 1
 		int n = pn_ * 2 - 1;
 		if (isFullBit_) {
 			n++;
@@ -791,55 +763,9 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64 *H = isFullBit_ ? &rax : 0;
 		gen_raw_fp_add_2(pz, px, py, t, false, H);
 		return func;
-#else
-		if (pn_ <= 4) {
-			gen_fp_add_le4();
-			return func;
-		}
-		if (pn_ == 6) {
-			gen_fp_add6();
-			return func;
-		}
-		StackFrame sf(this, 3, 0, pn_ * 8);
-		const Reg64& pz = sf.p[0];
-		const Reg64& px = sf.p[1];
-		const Reg64& py = sf.p[2];
-		const Xbyak::CodeGenerator::LabelType jmpMode = pn_ < 5 ? T_AUTO : T_NEAR;
-
-		inLocalLabel();
-		gen_raw_add(pz, px, py, rax, pn_);
-		lea(px, ptr[rip+pL_]);
-		if (isFullBit_) {
-			jc(".over", jmpMode);
-		}
-#ifdef MCL_USE_JMP
-		for (int i = 0; i < pn_; i++) {
-			mov(py, ptr [pz + (pn_ - 1 - i) * 8]); // destroy py
-			cmp(py, ptr [px + (pn_ - 1 - i) * 8]);
-			jc(".exit", jmpMode);
-			jnz(".over", jmpMode);
-		}
-		L(".over");
-			gen_raw_sub(pz, pz, px, rax, pn_);
-		L(".exit");
-#else
-		gen_raw_sub(rsp, pz, px, rax, pn_);
-		jc(".exit", jmpMode);
-		gen_mov(pz, rsp, rax, pn_);
-		if (isFullBit_) {
-			jmp(".exit", jmpMode);
-			L(".over");
-			gen_raw_sub(pz, pz, px, rax, pn_);
-		}
-		L(".exit");
-#endif
-		outLocalLabel();
-		return func;
-#endif
 	}
 	void3u gen_fpDbl_add()
 	{
-#if 1
 		if (!(pn_ < 6 || (pn_ == 6 && !isFullBit_))) return 0;
 		void3u func = getCurr<void3u>();
 		int n = pn_ * 2 - 1;
@@ -856,32 +782,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		gen_raw_add(pz, px, py, rax, pn_);
 		gen_raw_fp_add_2(pz + 8 * pn_, px + 8 * pn_, py + 8 * pn_, t, true, H);
 		return func;
-#else
-		void3u func = getCurr<void3u>();
-		if (pn_ <= 4) {
-			int tn = pn_ * 2 + (isFullBit_ ? 1 : 0);
-			StackFrame sf(this, 3, tn);
-			const Reg64& pz = sf.p[0];
-			const Reg64& px = sf.p[1];
-			const Reg64& py = sf.p[2];
-			gen_raw_add(pz, px, py, rax, pn_);
-			gen_raw_fp_add(pz + 8 * pn_, px + 8 * pn_, py + 8 * pn_, sf.t, true);
-			return func;
-		} else if (pn_ == 6 && !isFullBit_) {
-			StackFrame sf(this, 3, 10);
-			const Reg64& pz = sf.p[0];
-			const Reg64& px = sf.p[1];
-			const Reg64& py = sf.p[2];
-			gen_raw_add(pz, px, py, rax, pn_);
-			Pack t1 = sf.t.sub(0, 6);
-			Pack t2 = sf.t.sub(6);
-			t2.append(rax);
-			t2.append(py);
-			gen_raw_fp_add6(pz + pn_ * 8, px + pn_ * 8, py + pn_ * 8, t1, t2, true);
-			return func;
-		}
-		return 0;
-#endif
 	}
 	void3u gen_fpDbl_sub()
 	{
@@ -3757,29 +3657,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		store_mr(yb + pn_ * 8, t2);
 		return func;
 	}
-	void gen_fp2_add4()
-	{
-		assert(!isFullBit_);
-		StackFrame sf(this, 3, 8);
-		gen_raw_fp_add(sf.p[0], sf.p[1], sf.p[2], sf.t, false);
-		gen_raw_fp_add(sf.p[0] + FpByte_, sf.p[1] + FpByte_, sf.p[2] + FpByte_, sf.t, false);
-	}
-	void gen_fp2_add6()
-	{
-		assert(!isFullBit_);
-		StackFrame sf(this, 3, 10);
-		const Reg64& pz = sf.p[0];
-		const Reg64& px = sf.p[1];
-		const Reg64& py = sf.p[2];
-		Pack t1 = sf.t.sub(0, 6);
-		Pack t2 = sf.t.sub(6);
-		t2.append(rax);
-		t2.append(px); // destory after used
-		vmovq(xm0, px);
-		gen_raw_fp_add6(pz, px, py, t1, t2, false);
-		vmovq(px, xm0);
-		gen_raw_fp_add6(pz + FpByte_, px + FpByte_, py + FpByte_, t1, t2, false);
-	}
 	void gen_fp2_sub6()
 	{
 		StackFrame sf(this, 3, 5);
@@ -3793,7 +3670,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fp2_add()
 	{
-#if 1
 		if (!(pn_ < 6 || (pn_ == 6 && !isFullBit_))) return 0;
 		void3u func = getCurr<void3u>();
 		int n = pn_ * 2 - 1;
@@ -3810,18 +3686,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		gen_raw_fp_add_2(pz, px, py, t, false, H);
 		gen_raw_fp_add_2(pz + FpByte_, px + FpByte_, py + FpByte_, t, false, H);
 		return func;
-#else
-		void3u func = getCurr<void3u>();
-		if (pn_ == 4 && !isFullBit_) {
-			gen_fp2_add4();
-			return func;
-		}
-		if (pn_ == 6 && !isFullBit_) {
-			gen_fp2_add6();
-			return func;
-		}
-		return 0;
-#endif
 	}
 	void3u gen_fp2_sub()
 	{

From aa6b6d0e15d07ebfb2be58777c9a84ea8abfc611 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Feb 2021 17:56:45 +0900
Subject: [PATCH 449/553] refactor fp_add

---
 src/fp_generator.hpp | 112 ++++++-------------------------------------
 1 file changed, 15 insertions(+), 97 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 79c8768a..3f727e82 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -649,25 +649,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			mov(ptr [pz + i * 8], t);
 		}
 	}
-	/*
-		pz[] = px[] + py[] mod p[]
-		use rax, t
-	*/
-	void gen_raw_fp_add(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry)
-	{
-		const Pack& p0 = t.sub(0, pn_);
-		const Pack& p1 = t.sub(pn_, pn_);
-		const Reg64 *fullReg = isFullBit_ ? &t[pn_ * 2] : 0;
-		load_rm(p0, px);
-		add_rm(p0, py, withCarry);
-		if (isFullBit_) {
-			mov(*fullReg, 0);
-			adc(*fullReg, 0);
-		}
-		lea(rax, ptr[rip+pL_]);
-		sub_p_mod(p1, p0, rax, fullReg);
-		store_mr(pz, p1);
-	}
 	/*
 		pz[] = px[] - py[] mod p[]
 		use rax, t
@@ -687,16 +668,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		add_rr(p0, p1);
 		store_mr(pz, p0);
 	}
-	void gen_fp_add_le4()
-	{
-		assert(pn_ <= 4);
-		const int tn = pn_ * 2 + (isFullBit_ ? 1 : 0);
-		StackFrame sf(this, 3, tn);
-		const Reg64& pz = sf.p[0];
-		const Reg64& px = sf.p[1];
-		const Reg64& py = sf.p[2];
-		gen_raw_fp_add(pz, px, py, sf.t, false);
-	}
 	void gen_fp_sub_le4()
 	{
 		assert(pn_ <= 4);
@@ -707,32 +678,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& py = sf.p[2];
 		gen_raw_fp_sub(pz, px, py, sf.t, false);
 	}
-	/*
-		add(pz, px, py);
-		size of t1, t2 == 6
-		destroy t0, t1
-	*/
-	void gen_raw_fp_add6(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t1, const Pack& t2, bool withCarry)
-	{
-		load_rm(t1, px);
-		add_rm(t1, py, withCarry);
-		Label exit;
-		if (isFullBit_) {
-			jnc("@f");
-			lea(t2[0], ptr[rip+pL_]); // t2[0] is not used
-			sub_rm(t1, t2[0]);
-			jmp(exit);
-		L("@@");
-		}
-		mov_rr(t2, t1);
-		sub_rm(t2, rip + pL_);
-		for (int i = 0; i < 6; i++) {
-			cmovnc(t1[i], t2[i]);
-		}
-	L(exit);
-		store_mr(pz, t1);
-	}
-	void gen_raw_fp_add_2(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry = false, const Reg64 *H = 0)
+	void gen_raw_fp_add(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry = false, const Reg64 *H = 0)
 	{
 		const Pack& t1 = t.sub(0, pn_);
 		const Pack& t2 = t.sub(pn_, pn_);
@@ -761,7 +707,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		Pack t = sf.t;
 		t.append(rax);
 		const Reg64 *H = isFullBit_ ? &rax : 0;
-		gen_raw_fp_add_2(pz, px, py, t, false, H);
+		gen_raw_fp_add(pz, px, py, t, false, H);
 		return func;
 	}
 	void3u gen_fpDbl_add()
@@ -780,7 +726,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		t.append(rax);
 		const Reg64 *H = isFullBit_ ? &rax : 0;
 		gen_raw_add(pz, px, py, rax, pn_);
-		gen_raw_fp_add_2(pz + 8 * pn_, px + 8 * pn_, py + 8 * pn_, t, true, H);
+		gen_raw_fp_add(pz + 8 * pn_, px + 8 * pn_, py + 8 * pn_, t, true, H);
 		return func;
 	}
 	void3u gen_fpDbl_sub()
@@ -2584,7 +2530,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			g_adc(z[i], x[i], t);
 		}
 	}
-	void add_m_m(const RegExp& mz, const RegExp& mx, const Reg64& t, int n)
+	void add_mm(const RegExp& mz, const RegExp& mx, const Reg64& t, int n)
 	{
 		for (int i = 0; i < n; i++) {
 			mov(t, ptr [mx + i * 8]);
@@ -2843,7 +2789,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		sub_m_mp_m(t3, t2, rr, t);
 		jnc("@f");
 		// pr[] += p[]
-		add_m_m(t3, t2, t, pn_);
+		add_mm(t3, t2, t, pn_);
 	L("@@");
 		outLocalLabel();
 	}
@@ -3683,8 +3629,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		Pack t = sf.t;
 		t.append(rax);
 		const Reg64 *H = isFullBit_ ? &rax : 0;
-		gen_raw_fp_add_2(pz, px, py, t, false, H);
-		gen_raw_fp_add_2(pz + FpByte_, px + FpByte_, py + FpByte_, t, false, H);
+		gen_raw_fp_add(pz, px, py, t, false, H);
+		gen_raw_fp_add(pz + FpByte_, px + FpByte_, py + FpByte_, t, false, H);
 		return func;
 	}
 	void3u gen_fp2_sub()
@@ -3876,6 +3822,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	{
 		if (isFullBit_) return 0;
 		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
+		bool nocarry = (p_[pn_ - 1] >> 62) == 0;
+		if (!nocarry) return 0;
 		void2u func = getCurr<void2u>();
 
 		const RegExp y = rsp + 0 * 8;
@@ -3883,39 +3831,23 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Ext1 t1(FpByte_, rsp, 2 * 8);
 		const Ext1 t2(FpByte_, rsp, t1.next);
 		const Ext1 t3(FpByte_, rsp, t2.next);
-		bool nocarry = (p_[pn_ - 1] >> 62) == 0;
 		StackFrame sf(this, 3, 10 | UseRDX, t3.next);
 		mov(ptr [y], gp0);
 		mov(ptr [x], gp1);
 		// t1 = b + b
 		lea(gp0, ptr [t1]);
-		if (nocarry) {
-			for (int i = 0; i < pn_; i++) {
-				mov(rax, ptr [gp1 + FpByte_ + i * 8]);
-				if (i == 0) {
-					add(rax, rax);
-				} else {
-					adc(rax, rax);
-				}
-				mov(ptr [gp0 + i * 8], rax);
-			}
-		} else {
-			if (pn_ == 4) {
-				gen_raw_fp_add(gp0, gp1 + FpByte_, gp1 + FpByte_, sf.t, false);
-			} else {
-				assert(pn_ == 6);
-				Pack t = sf.t.sub(6, 4);
-				t.append(rax);
-				t.append(rdx);
-				gen_raw_fp_add6(gp0, gp1 + FpByte_, gp1 + FpByte_, sf.t.sub(0, 6), t, false);
-			}
+		{
+			Pack t = sf.t.sub(0, pn_);
+			load_rm(t, gp1 + FpByte_);
+			shl1(t);
+			store_mr(gp0, t);
 		}
 		// t1 = 2ab
 		mov(gp1, gp0);
 		mov(gp2, ptr [x]);
 		call(fp_mulL);
 
-		if (nocarry) {
+		{
 			Pack t = sf.t;
 			t.append(rdx);
 			t.append(gp1);
@@ -3939,20 +3871,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			add_rm(a, rax);
 			sub_rr(a, b);
 			store_mr(t3, a);
-		} else {
-			mov(gp0, ptr [x]);
-			if (pn_ == 4) {
-				gen_raw_fp_add(t2, gp0, gp0 + FpByte_, sf.t, false);
-				gen_raw_fp_sub(t3, gp0, gp0 + FpByte_, sf.t, false);
-			} else {
-				assert(pn_ == 6);
-				Pack p1 = sf.t.sub(0, 6);
-				Pack p2 = sf.t.sub(6, 4);
-				p2.append(rax);
-				p2.append(rdx);
-				gen_raw_fp_add6(t2, gp0, gp0 + FpByte_, p1, p2, false);
-				gen_raw_fp_sub6(t3, gp0, gp0 + FpByte_, 0, p1, false);
-			}
 		}
 
 		mov(gp0, ptr [y]);

From 1fbb857884ed55d900495fe76aa6e9dffced7695 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 20 Feb 2021 18:03:49 +0900
Subject: [PATCH 450/553] fp_sub without jmp

---
 src/fp_generator.hpp | 54 ++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 3f727e82..3a4981d6 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -755,6 +755,21 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		}
 		return 0;
 	}
+	void gen_raw_fp_sub_2(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry)
+	{
+		Pack t1 = t.sub(0, pn_);
+		Pack t2 = t.sub(pn_, pn_);
+		load_rm(t1, px);
+		sub_rm(t1, py, withCarry);
+		push(t1[0]);
+		lea(t1[0], ptr[rip + pL_]);
+		load_rm(t2, t1[0]);
+		sbb(t1[0], t1[0]);
+		and_pr(t2, t1[0]);
+		pop(t1[0]);
+		add_rr(t1, t2);
+		store_mr(pz, t1);
+	}
 	void gen_raw_fp_sub6(const RegExp& pz, const RegExp& px, const RegExp& py, int offset, const Pack& t, bool withCarry)
 	{
 		load_rm(t, px + offset);
@@ -767,39 +782,22 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	L("@@");
 		store_mr(pz + offset, t);
 	}
-	void gen_fp_sub6()
-	{
-		StackFrame sf(this, 3, 4);
-		const Reg64& pz = sf.p[0];
-		const Reg64& px = sf.p[1];
-		const Reg64& py = sf.p[2];
-		Pack t = sf.t;
-		t.append(rax);
-		t.append(px); // |t| = 6
-		gen_raw_fp_sub6(pz, px, py, 0, t, false);
-	}
 	void3u gen_fp_sub()
 	{
+		if (pn_ > 6) return 0;
 		void3u func = getCurr<void3u>();
-		if (pn_ <= 4) {
-			gen_fp_sub_le4();
-			return func;
-		}
-		if (pn_ == 6) {
-			gen_fp_sub6();
-			return func;
-		}
-		StackFrame sf(this, 3);
+		/*
+			micro-benchmark of jmp is faster than and-mask
+			but it's slower for pairings
+		*/
+		int n = pn_ * 2 - 1;
+		StackFrame sf(this, 3, n);
 		const Reg64& pz = sf.p[0];
 		const Reg64& px = sf.p[1];
 		const Reg64& py = sf.p[2];
-		const Xbyak::CodeGenerator::LabelType jmpMode = pn_ < 5 ? T_AUTO : T_NEAR;
-		Label exit;
-		gen_raw_sub(pz, px, py, rax, pn_);
-		jnc(exit, jmpMode);
-		lea(px, ptr[rip+pL_]);
-		gen_raw_add(pz, pz, px, rax, pn_);
-	L(exit);
+		Pack t = sf.t;
+		t.append(rax);
+		gen_raw_fp_sub_2(pz, px, py, t, false);
 		return func;
 	}
 	void2u gen_fp_neg()
@@ -3513,8 +3511,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		// almost same for pn_ == 6
 		if (pn_ != 4) return 0;
 		void2u func = getCurr<void2u>();
-		// almost same for pn_ == 6
-		if (pn_ != 4) return 0;
 		const RegExp y = rsp + 0 * 8;
 		const RegExp x = rsp + 1 * 8;
 		const Ext1 t1(FpByte_, rsp, 2 * 8);

From 6ff8b167273d716809535e078fcfec633a18d6cd Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 20 Feb 2021 20:48:31 +0900
Subject: [PATCH 451/553] refactor Fp2::sub

---
 src/fp_generator.hpp | 38 +++++++++++---------------------------
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 3a4981d6..3861028b 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -3599,17 +3599,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		store_mr(yb + pn_ * 8, t2);
 		return func;
 	}
-	void gen_fp2_sub6()
-	{
-		StackFrame sf(this, 3, 5);
-		const Reg64& pz = sf.p[0];
-		const Reg64& px = sf.p[1];
-		const Reg64& py = sf.p[2];
-		Pack t = sf.t;
-		t.append(rax);
-		gen_raw_fp_sub6(pz, px, py, 0, t, false);
-		gen_raw_fp_sub6(pz, px, py, FpByte_, t, false);
-	}
 	void3u gen_fp2_add()
 	{
 		if (!(pn_ < 6 || (pn_ == 6 && !isFullBit_))) return 0;
@@ -3631,23 +3620,18 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fp2_sub()
 	{
+		if (pn_ > 6) return 0;
 		void3u func = getCurr<void3u>();
-		if (pn_ == 4 && !isFullBit_) {
-			gen_fp2_sub4();
-			return func;
-		}
-		if (pn_ == 6 && !isFullBit_) {
-			gen_fp2_sub6();
-			return func;
-		}
-		return 0;
-	}
-	void gen_fp2_sub4()
-	{
-		assert(!isFullBit_);
-		StackFrame sf(this, 3, 8);
-		gen_raw_fp_sub(sf.p[0], sf.p[1], sf.p[2], sf.t, false);
-		gen_raw_fp_sub(sf.p[0] + FpByte_, sf.p[1] + FpByte_, sf.p[2] + FpByte_, sf.t, false);
+		int n = pn_ * 2 - 1;
+		StackFrame sf(this, 3, n);
+		const Reg64& pz = sf.p[0];
+		const Reg64& px = sf.p[1];
+		const Reg64& py = sf.p[2];
+		Pack t = sf.t;
+		t.append(rax);
+		gen_raw_fp_sub_2(pz, px, py, t, false);
+		gen_raw_fp_sub_2(pz + FpByte_, px + FpByte_, py + FpByte_, t, false);
+		return func;
 	}
 	/*
 		for only xi_a = 1

From 022d37f762045b5378772e180789ccf2c1e31b93 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 21 Feb 2021 11:06:06 +0900
Subject: [PATCH 452/553] factor Fp2Dbl::sub

---
 src/fp_generator.hpp | 43 +++++++++++--------------------------------
 1 file changed, 11 insertions(+), 32 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 3861028b..7dd58d4a 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -668,16 +668,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		add_rr(p0, p1);
 		store_mr(pz, p0);
 	}
-	void gen_fp_sub_le4()
-	{
-		assert(pn_ <= 4);
-		const int tn = pn_ * 2;
-		StackFrame sf(this, 3, tn);
-		const Reg64& pz = sf.p[0];
-		const Reg64& px = sf.p[1];
-		const Reg64& py = sf.p[2];
-		gen_raw_fp_sub(pz, px, py, sf.t, false);
-	}
 	void gen_raw_fp_add(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry = false, const Reg64 *H = 0)
 	{
 		const Pack& t1 = t.sub(0, pn_);
@@ -731,29 +721,18 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void3u gen_fpDbl_sub()
 	{
+		if (pn_ > 6) return 0;
 		void3u func = getCurr<void3u>();
-		if (pn_ <= 4) {
-			int tn = pn_ * 2;
-			StackFrame sf(this, 3, tn);
-			const Reg64& pz = sf.p[0];
-			const Reg64& px = sf.p[1];
-			const Reg64& py = sf.p[2];
-			gen_raw_sub(pz, px, py, rax, pn_);
-			gen_raw_fp_sub(pz + 8 * pn_, px + 8 * pn_, py + 8 * pn_, sf.t, true);
-			return func;
-		} else if (pn_ == 6) {
-			StackFrame sf(this, 3, 4);
-			const Reg64& pz = sf.p[0];
-			const Reg64& px = sf.p[1];
-			const Reg64& py = sf.p[2];
-			gen_raw_sub(pz, px, py, rax, pn_);
-			Pack t = sf.t;
-			t.append(rax);
-			t.append(px);
-			gen_raw_fp_sub6(pz, px, py, pn_ * 8, t, true);
-			return func;
-		}
-		return 0;
+		int n = pn_ * 2 - 1;
+		StackFrame sf(this, 3, n);
+		const Reg64& pz = sf.p[0];
+		const Reg64& px = sf.p[1];
+		const Reg64& py = sf.p[2];
+		Pack t = sf.t;
+		t.append(rax);
+		gen_raw_sub(pz, px, py, rax, pn_);
+		gen_raw_fp_sub_2(pz + pn_ * 8, px + pn_ * 8, py + pn_ * 8, t, true);
+		return func;
 	}
 	void gen_raw_fp_sub_2(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry)
 	{

From b566c3a6e73e0dcbc0a466a6098e633a0ccff383 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 21 Feb 2021 13:49:56 +0900
Subject: [PATCH 453/553] refactor FpDbl2::mul_xi

---
 src/fp_generator.hpp | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 7dd58d4a..7ffde595 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -642,7 +642,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	/*
 		pz[] = px[]
 	*/
-	void gen_mov(const RegExp& pz, const RegExp& px, const Reg64& t, int n)
+	void mov_mm(const RegExp& pz, const RegExp& px, const Reg64& t, int n)
 	{
 		for (int i = 0; i < n; i++) {
 			mov(t, ptr [px + i * 8]);
@@ -3553,24 +3553,11 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		// [rsp] = x.a + x.b
 		gen_raw_add(rsp, xa, xb, rax, pn_ * 2);
 		// low : x.a =  x.a - x.b
-		load_rm(t1, xa);
-		sub_rm(t1, xb);
-		store_mr(ya, t1);
-		// high : x.a = (x.a - x.b) % p
-		load_rm(t1, xa + pn_ * 8);
-		sub_rm(t1, xb + pn_ * 8, true);
-		lea(rax, ptr[rip + pL_]);
-		load_rm(t2, rax); // t2 = p
-		sbb(rax, rax);
-		and_pr(t2, rax);
-		add_rr(t1, t2); // mod p
-		store_mr(ya + pn_ * 8, t1);
+		gen_raw_sub(ya, xa, xb, rax, pn_);
+		gen_raw_fp_sub_2(ya + pn_ * 8, xa + pn_ * 8, xb + pn_ * 8, sf.t, true);
 
 		// low : y.b = [rsp]
-		for (int i = 0; i < pn_; i++) {
-			mov(rax, ptr[rsp + i * 8]);
-			mov(ptr[yb + i * 8], rax);
-		}
+		mov_mm(yb, rsp, rax, pn_);
 		// high : y.b = (x.a + x.b) % p
 		load_rm(t1, rsp + pn_ * 8);
 		lea(rax, ptr[rip + pL_]);

From fd26ac6644df0c043d1752e77e1650aacfa45ecf Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 21 Feb 2021 14:28:09 +0900
Subject: [PATCH 454/553] reduce memory access in Fp2::mul

---
 src/fp_generator.hpp | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 7ffde595..f95aff74 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -734,6 +734,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		gen_raw_fp_sub_2(pz + pn_ * 8, px + pn_ * 8, py + pn_ * 8, t, true);
 		return func;
 	}
+	// require t.size() >= pn_ * 2
 	void gen_raw_fp_sub_2(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry)
 	{
 		Pack t1 = t.sub(0, pn_);
@@ -3742,16 +3743,25 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			call(mulPreL);
 		}
 
-		gen_raw_sub(d1, d1, d0, rax, pn_ * 2);
-		gen_raw_sub(d1, d1, d2, rax, pn_ * 2);
+		{
+			Pack t = sf.t;
+			if (pn_ == 4) {
+				t = t.sub(0, pn_ * 2);
+			} else if (pn_ == 6) {
+				t.append(gp0);
+				t.append(gp2);
+			}
+			assert(t.size() == pn_ * 2);
 
-		gen_raw_sub(d0, d0, d2, rax, pn_);
-		if (pn_ == 4) {
-			gen_raw_fp_sub((RegExp)d0 + pn_ * 8, (RegExp)d0 + pn_ * 8, (RegExp)d2 + pn_ * 8, Pack(gt0, gt1, gt2, gt3, gt4, gt5, gt6, gt7), true);
-		} else {
-			lea(gp0, ptr[(RegExp)d0 + pn_ * 8]);
-			lea(gp2, ptr[(RegExp)d2 + pn_ * 8]);
-			gen_raw_fp_sub6(gp0, gp0, gp2, 0, sf.t.sub(0, 6), true);
+			load_rm(t, (RegExp)d1);
+			sub_rm(t, (RegExp)d0); // d1 -= d0
+			sub_rm(t, (RegExp)d2); // d1 -= d2
+			store_mr((RegExp)d1, t);
+
+			gen_raw_sub(d0, d0, d2, rax, pn_);
+			const RegExp& d0H = (RegExp)d0 + pn_ * 8;
+			const RegExp& d2H = (RegExp)d2 + pn_ * 8;
+			gen_raw_fp_sub_2(d0H, d0H, d2H, t, true);
 		}
 
 		mov(gp0, ptr [z]);

From 194057fd02eb06149385ca4bd0ea4892df36888f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 21 Feb 2021 14:33:36 +0900
Subject: [PATCH 455/553] use mov_mm

---
 src/fp_generator.hpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index f95aff74..98f60895 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -3834,10 +3834,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		lea(gp2, ptr [t3]);
 		call(fp_mulL);
 		mov(gp0, ptr [y]);
-		for (int i = 0; i < pn_; i++) {
-			mov(rax, ptr [(RegExp)t1 + i * 8]);
-			mov(ptr [gp0 + FpByte_ + i * 8], rax);
-		}
+		mov_mm(gp0 + FpByte_, t1, rax, pn_);
 		return func;
 	}
 };

From bf703d617ae240eac841ef03d12e19c6502d006c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 22 Feb 2021 16:10:43 +0900
Subject: [PATCH 456/553] enable Fp2Dbl_mulPre

---
 src/fp_generator.hpp   | 77 ++++++++++++++++++++++++------------------
 src/fp_static_code.hpp |  2 +-
 2 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 98f60895..35ca26d1 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -3423,10 +3423,9 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	void3u gen_fp2Dbl_mulPre()
 	{
 		if (isFullBit_) return 0;
-//		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
-		// almost same for pn_ == 6
-		if (pn_ != 4) return 0;
+		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
 		void3u func = getCurr<void3u>();
+		bool embedded = pn_ == 4;
 
 		const RegExp z = rsp + 0 * 8;
 		const RegExp x = rsp + 1 * 8;
@@ -3436,51 +3435,63 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Ext1 d2(FpByte_ * 2, rsp, t.next);
 		const int SS = d2.next;
 		StackFrame sf(this, 3, 10 | UseRDX, SS);
-		mov(ptr [z], gp0);
-		mov(ptr [x], gp1);
-		mov(ptr [y], gp2);
+		mov(ptr[z], gp0);
+		mov(ptr[x], gp1);
+		mov(ptr[y], gp2);
 		// s = a + b
 		gen_raw_add(s, gp1, gp1 + FpByte_, rax, pn_);
 		// t = c + d
 		gen_raw_add(t, gp2, gp2 + FpByte_, rax, pn_);
 		// d1 = (a + b)(c + d)
-		mov(gp0, ptr [z]);
-		add(gp0, FpByte_ * 2); // d1
-		lea(gp1, ptr [s]);
-		lea(gp2, ptr [t]);
-		call(mulPreL);
-		// d0 = a c
+		lea(gp0, ptr [gp0 + FpByte_ * 2]);
+		if (embedded) {
+			mulPre4(gp0, s, t, sf.t);
+		} else {
+			lea(gp1, ptr [s]);
+			lea(gp2, ptr [t]);
+			call(mulPreL);
+		}
+		// d0 = z.a = a c
 		mov(gp0, ptr [z]);
 		mov(gp1, ptr [x]);
 		mov(gp2, ptr [y]);
-		call(mulPreL);
-
-		// d2 = b d
-		lea(gp0, ptr [d2]);
+		if (embedded) {
+			mulPre4(gp0, gp1, gp2, sf.t);
+		} else {
+			call(mulPreL);
+		}
+		// d2 = z.b = b d
 		mov(gp1, ptr [x]);
 		add(gp1, FpByte_);
 		mov(gp2, ptr [y]);
 		add(gp2, FpByte_);
-		call(mulPreL);
+		if (embedded) {
+			mulPre4(d2, gp1, gp2, sf.t);
+		} else {
+			lea(gp0, ptr [d2]);
+			call(mulPreL);
+		}
 
-		mov(gp0, ptr [z]);
-		add(gp0, FpByte_ * 2); // d1
-		mov(gp1, gp0);
-		mov(gp2, ptr [z]);
-		gen_raw_sub(gp0, gp1, gp2, rax, pn_ * 2);
-		lea(gp2, ptr [d2]);
-		gen_raw_sub(gp0, gp1, gp2, rax, pn_ * 2);
+		{
+			Pack t = sf.t;
+			if (pn_ == 4) {
+				t = t.sub(0, pn_ * 2);
+			} else if (pn_ == 6) {
+				t.append(gp1);
+				t.append(gp2);
+			}
+			assert(t.size() == pn_ * 2);
 
-		mov(gp0, ptr [z]);
-		mov(gp1, gp0);
-		lea(gp2, ptr [d2]);
+			mov(gp0, ptr [z]);
+			load_rm(t, gp0 + FpByte_ * 2);
+			sub_rm(t, gp0); // d1 -= d0
+			sub_rm(t, (RegExp)d2); // d1 -= d2
+			store_mr(gp0 + FpByte_ * 2, t);
 
-		gen_raw_sub(gp0, gp1, gp2, rax, pn_);
-		if (pn_ == 4) {
-			gen_raw_fp_sub(gp0 + pn_ * 8, gp1 + pn_ * 8, gp2 + pn_ * 8, Pack(gt0, gt1, gt2, gt3, gt4, gt5, gt6, gt7), true);
-		} else {
-			assert(pn_ == 6);
-			gen_raw_fp_sub6(gp0, gp1, gp2, pn_ * 8, sf.t.sub(0, 6), true);
+			gen_raw_sub(gp0, gp0, d2, rax, pn_);
+			const RegExp& d0H = gp0 + pn_ * 8;
+			const RegExp& d2H = (RegExp)d2 + pn_ * 8;
+			gen_raw_fp_sub_2(d0H, d0H, d2H, t, true);
 		}
 		return func;
 	}
diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
index bb7ab4b6..e2545620 100644
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@@ -82,7 +82,7 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fp2_sqrA_ = mclx_Fp2_sqr;
 		op.fp2_mul2A_ = mclx_Fp2_mul2;
 		op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
-		op.fp2Dbl_mulPreA_ = 0;//mclx_Fp2Dbl_mulPre;
+		op.fp2Dbl_mulPreA_ = mclx_Fp2Dbl_mulPre;
 		op.fp2Dbl_sqrPreA_ = 0;//mclx_Fp2Dbl_sqrPre;
 		op.fp2Dbl_mul_xiA_ = mclx_Fp2Dbl_mul_xi;
 		op.fp_preInv = mclx_Fp_preInv;

From fcc57b16fad7c8b416b7efdd0c3055f4e746bd36 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 22 Feb 2021 16:11:06 +0900
Subject: [PATCH 457/553] unify fp2_mul and fp2Dbl_mupPre

---
 src/fp_generator.hpp | 95 ++++++++++----------------------------------
 1 file changed, 20 insertions(+), 75 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 35ca26d1..cdba5259 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -252,6 +252,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	Label mulPreL;
 	Label fpDbl_modL;
 	Label fp_mulL;
+	Label fp2Dbl_mulPreL;
 	const uint64_t *p_;
 	uint64_t rp_;
 	int pn_;
@@ -3427,6 +3428,11 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		void3u func = getCurr<void3u>();
 		bool embedded = pn_ == 4;
 
+		StackFrame sf(this, 3, 10 | UseRDX, 0, false);
+		call(fp2Dbl_mulPreL);
+		sf.close();
+
+	L(fp2Dbl_mulPreL);
 		const RegExp z = rsp + 0 * 8;
 		const RegExp x = rsp + 1 * 8;
 		const RegExp y = rsp + 2 * 8;
@@ -3434,7 +3440,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Ext1 t(FpByte_, rsp, s.next);
 		const Ext1 d2(FpByte_ * 2, rsp, t.next);
 		const int SS = d2.next;
-		StackFrame sf(this, 3, 10 | UseRDX, SS);
+		sub(rsp, SS);
 		mov(ptr[z], gp0);
 		mov(ptr[x], gp1);
 		mov(ptr[y], gp2);
@@ -3493,6 +3499,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			const RegExp& d2H = (RegExp)d2 + pn_ * 8;
 			gen_raw_fp_sub_2(d0H, d0H, d2H, t, true);
 		}
+		add(rsp, SS);
+		ret();
 		return func;
 	}
 	void2u gen_fp2Dbl_sqrPre()
@@ -3705,83 +3713,20 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		if (isFullBit_) return 0;
 		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
 		void3u func = getCurr<void3u>();
-		bool embedded = pn_ == 4;
-
-		const RegExp z = rsp + 0 * 8;
-		const RegExp x = rsp + 1 * 8;
-		const RegExp y = rsp + 2 * 8;
-		const Ext1 s(FpByte_, rsp, 3 * 8);
-		const Ext1 t(FpByte_, rsp, s.next);
-		const Ext1 d0(FpByte_ * 2, rsp, t.next);
-		const Ext1 d1(FpByte_ * 2, rsp, d0.next);
-		const Ext1 d2(FpByte_ * 2, rsp, d1.next);
-		const int SS = d2.next;
-		StackFrame sf(this, 3, 10 | UseRDX, SS);
-		mov(ptr[z], gp0);
-		mov(ptr[x], gp1);
-		mov(ptr[y], gp2);
-		// s = a + b
-		gen_raw_add(s, gp1, gp1 + FpByte_, rax, pn_);
-		// t = c + d
-		gen_raw_add(t, gp2, gp2 + FpByte_, rax, pn_);
-		// d1 = (a + b)(c + d)
-		if (embedded) {
-			mulPre4(d1, s, t, sf.t);
-		} else {
-			lea(gp0, ptr [d1]);
-			lea(gp1, ptr [s]);
-			lea(gp2, ptr [t]);
-			call(mulPreL);
-		}
-		// d0 = a c
-		mov(gp1, ptr [x]);
-		mov(gp2, ptr [y]);
-		if (embedded) {
-			mulPre4(d0, gp1, gp2, sf.t);
-		} else {
-			lea(gp0, ptr [d0]);
-			call(mulPreL);
-		}
-		// d2 = b d
-		mov(gp1, ptr [x]);
-		add(gp1, FpByte_);
-		mov(gp2, ptr [y]);
-		add(gp2, FpByte_);
-		if (embedded) {
-			mulPre4(d2, gp1, gp2, sf.t);
-		} else {
-			lea(gp0, ptr [d2]);
-			call(mulPreL);
-		}
-
-		{
-			Pack t = sf.t;
-			if (pn_ == 4) {
-				t = t.sub(0, pn_ * 2);
-			} else if (pn_ == 6) {
-				t.append(gp0);
-				t.append(gp2);
-			}
-			assert(t.size() == pn_ * 2);
-
-			load_rm(t, (RegExp)d1);
-			sub_rm(t, (RegExp)d0); // d1 -= d0
-			sub_rm(t, (RegExp)d2); // d1 -= d2
-			store_mr((RegExp)d1, t);
-
-			gen_raw_sub(d0, d0, d2, rax, pn_);
-			const RegExp& d0H = (RegExp)d0 + pn_ * 8;
-			const RegExp& d2H = (RegExp)d2 + pn_ * 8;
-			gen_raw_fp_sub_2(d0H, d0H, d2H, t, true);
-		}
-
-		mov(gp0, ptr [z]);
-		lea(gp1, ptr[d0]);
+		int stackSize = 8 + FpByte_ * 4;
+		StackFrame sf(this, 3, 10 | UseRDX, stackSize);
+		const RegExp d = rsp + 8;
+		mov(ptr[rsp], gp0);
+		lea(gp0, ptr [d]);
+		// d <- x * y
+		call(fp2Dbl_mulPreL);
+		mov(gp0, ptr [rsp]);
+		lea(gp1, ptr [d]);
 		call(fpDbl_modL);
 
-		mov(gp0, ptr [z]);
+		mov(gp0, ptr [rsp]);
 		add(gp0, FpByte_);
-		lea(gp1, ptr[d1]);
+		lea(gp1, ptr[d + FpByte_ * 2]);
 		call(fpDbl_modL);
 		return func;
 	}

From 5dc683fea30d2217659516fdfb4f73b0684f9d19 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Feb 2021 09:35:31 +0900
Subject: [PATCH 458/553] enable Fp2Dbl::sqrPre

---
 src/fp_generator.hpp   | 13 +++----------
 src/fp_static_code.hpp |  2 +-
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index cdba5259..181e1032 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -3506,9 +3506,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	void2u gen_fp2Dbl_sqrPre()
 	{
 		if (isFullBit_) return 0;
-//		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
-		// almost same for pn_ == 6
-		if (pn_ != 4) return 0;
+		if (pn_ > 6) return 0;
 		void2u func = getCurr<void2u>();
 		const RegExp y = rsp + 0 * 8;
 		const RegExp x = rsp + 1 * 8;
@@ -3520,7 +3518,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(ptr [x], gp1);
 		Pack t = sf.t;
 		if (pn_ == 6) {
-			t.append(rax);
+			t.append(gp2);
 			t.append(rdx);
 		}
 		const Pack a = t.sub(0, pn_);
@@ -3544,12 +3542,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(gp2, ptr [x]);
 		call(mulPreL);
 		mov(gp0, ptr [x]);
-		if (pn_ == 4) {
-			gen_raw_fp_sub(t1, gp0, gp0 + FpByte_, sf.t, false);
-		} else {
-			assert(pn_ == 6);
-			gen_raw_fp_sub6(t1, gp0, gp0, FpByte_, a, false);
-		}
+		gen_raw_fp_sub_2(t1, gp0, gp0 + FpByte_, t, false);
 		mov(gp0, ptr [y]);
 		lea(gp1, ptr [t1]);
 		lea(gp2, ptr [t2]);
diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp
index e2545620..15002b11 100644
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@@ -83,7 +83,7 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fp2_mul2A_ = mclx_Fp2_mul2;
 		op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
 		op.fp2Dbl_mulPreA_ = mclx_Fp2Dbl_mulPre;
-		op.fp2Dbl_sqrPreA_ = 0;//mclx_Fp2Dbl_sqrPre;
+		op.fp2Dbl_sqrPreA_ = mclx_Fp2Dbl_sqrPre;
 		op.fp2Dbl_mul_xiA_ = mclx_Fp2Dbl_mul_xi;
 		op.fp_preInv = mclx_Fp_preInv;
 	} else {

From 7813e6a608f2d5e18df80313edb506c5d76f0e1a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Feb 2021 09:44:29 +0900
Subject: [PATCH 459/553] remove old fp_sub

---
 src/fp_generator.hpp | 47 ++++++++------------------------------------
 1 file changed, 8 insertions(+), 39 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 181e1032..f3d14c00 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -650,25 +650,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			mov(ptr [pz + i * 8], t);
 		}
 	}
-	/*
-		pz[] = px[] - py[] mod p[]
-		use rax, t
-	*/
-	void gen_raw_fp_sub(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry)
-	{
-		const Pack& p0 = t.sub(0, pn_);
-		const Pack& p1 = t.sub(pn_, pn_);
-		load_rm(p0, px);
-		sub_rm(p0, py, withCarry);
-		lea(rax, ptr[rip+pL_]);
-		load_rm(p1, rax);
-		sbb(rax, rax); // rax = (x > y) ? 0 : -1
-		for (size_t i = 0; i < p1.size(); i++) {
-			and_(p1[i], rax);
-		}
-		add_rr(p0, p1);
-		store_mr(pz, p0);
-	}
 	void gen_raw_fp_add(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry = false, const Reg64 *H = 0)
 	{
 		const Pack& t1 = t.sub(0, pn_);
@@ -732,11 +713,11 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		Pack t = sf.t;
 		t.append(rax);
 		gen_raw_sub(pz, px, py, rax, pn_);
-		gen_raw_fp_sub_2(pz + pn_ * 8, px + pn_ * 8, py + pn_ * 8, t, true);
+		gen_raw_fp_sub(pz + pn_ * 8, px + pn_ * 8, py + pn_ * 8, t, true);
 		return func;
 	}
 	// require t.size() >= pn_ * 2
-	void gen_raw_fp_sub_2(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry)
+	void gen_raw_fp_sub(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry)
 	{
 		Pack t1 = t.sub(0, pn_);
 		Pack t2 = t.sub(pn_, pn_);
@@ -751,18 +732,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		add_rr(t1, t2);
 		store_mr(pz, t1);
 	}
-	void gen_raw_fp_sub6(const RegExp& pz, const RegExp& px, const RegExp& py, int offset, const Pack& t, bool withCarry)
-	{
-		load_rm(t, px + offset);
-		sub_rm(t, py + offset, withCarry);
-		/*
-			jmp is faster than and-mask without jmp
-		*/
-		jnc("@f");
-		add_rm(t, rip + pL_);
-	L("@@");
-		store_mr(pz + offset, t);
-	}
 	void3u gen_fp_sub()
 	{
 		if (pn_ > 6) return 0;
@@ -778,7 +747,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& py = sf.p[2];
 		Pack t = sf.t;
 		t.append(rax);
-		gen_raw_fp_sub_2(pz, px, py, t, false);
+		gen_raw_fp_sub(pz, px, py, t, false);
 		return func;
 	}
 	void2u gen_fp_neg()
@@ -3497,7 +3466,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			gen_raw_sub(gp0, gp0, d2, rax, pn_);
 			const RegExp& d0H = gp0 + pn_ * 8;
 			const RegExp& d2H = (RegExp)d2 + pn_ * 8;
-			gen_raw_fp_sub_2(d0H, d0H, d2H, t, true);
+			gen_raw_fp_sub(d0H, d0H, d2H, t, true);
 		}
 		add(rsp, SS);
 		ret();
@@ -3542,7 +3511,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(gp2, ptr [x]);
 		call(mulPreL);
 		mov(gp0, ptr [x]);
-		gen_raw_fp_sub_2(t1, gp0, gp0 + FpByte_, t, false);
+		gen_raw_fp_sub(t1, gp0, gp0 + FpByte_, t, false);
 		mov(gp0, ptr [y]);
 		lea(gp1, ptr [t1]);
 		lea(gp2, ptr [t2]);
@@ -3567,7 +3536,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		gen_raw_add(rsp, xa, xb, rax, pn_ * 2);
 		// low : x.a =  x.a - x.b
 		gen_raw_sub(ya, xa, xb, rax, pn_);
-		gen_raw_fp_sub_2(ya + pn_ * 8, xa + pn_ * 8, xb + pn_ * 8, sf.t, true);
+		gen_raw_fp_sub(ya + pn_ * 8, xa + pn_ * 8, xb + pn_ * 8, sf.t, true);
 
 		// low : y.b = [rsp]
 		mov_mm(yb, rsp, rax, pn_);
@@ -3608,8 +3577,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& py = sf.p[2];
 		Pack t = sf.t;
 		t.append(rax);
-		gen_raw_fp_sub_2(pz, px, py, t, false);
-		gen_raw_fp_sub_2(pz + FpByte_, px + FpByte_, py + FpByte_, t, false);
+		gen_raw_fp_sub(pz, px, py, t, false);
+		gen_raw_fp_sub(pz + FpByte_, px + FpByte_, py + FpByte_, t, false);
 		return func;
 	}
 	/*

From f248a916916e6e93178dfd777d0520a08d0779fc Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Feb 2021 10:04:35 +0900
Subject: [PATCH 460/553] tweat Fp2Dbl::subSpecial

---
 include/mcl/fp_tower.hpp | 75 ++++++++++++++--------------------------
 test/common_test.hpp     |  4 +--
 2 files changed, 28 insertions(+), 51 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index f79cba74..455538c4 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -658,6 +658,15 @@ struct Fp2DblT {
 		FpDbl::subPre(z.a, x.a, y.a);
 		FpDbl::subPre(z.b, x.b, y.b);
 	}
+	/*
+		imaginary part of Fp2Dbl::mul uses only add,
+		so it does not require mod.
+	*/
+	static void subSpecial(Fp2DblT& y, const Fp2DblT& x)
+	{
+		FpDbl::sub(y.a, y.a, x.a);
+		FpDbl::subPre(y.b, y.b, x.b);
+	}
 	static void neg(Fp2DblT& y, const Fp2DblT& x)
 	{
 		FpDbl::neg(y.a, x.a);
@@ -703,20 +712,12 @@ struct Fp2DblT {
 		if (op.fp2Dbl_mulPreA_) {
 			mulPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&, const Fp2&)>(op.fp2Dbl_mulPreA_);
 		} else {
-			if (op.isFullBit) {
-				mulPre = fp2Dbl_mulPreTW<true>;
-			} else {
-				mulPre = fp2Dbl_mulPreTW<false>;
-			}
+			mulPre = fp2Dbl_mulPreW;
 		}
 		if (op.fp2Dbl_sqrPreA_) {
 			sqrPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&)>(op.fp2Dbl_sqrPreA_);
 		} else {
-			if (op.isFullBit) {
-				sqrPre = fp2Dbl_sqrPreW<true>;
-			} else {
-				sqrPre = fp2Dbl_sqrPreW<false>;
-			}
+			sqrPre = fp2Dbl_sqrPreW;
 		}
 		const uint32_t xi_a = Fp2::get_xi_a();
 		switch (xi_a) {
@@ -735,9 +736,9 @@ struct Fp2DblT {
 		Fp2Dbl::mulPre by FpDblT
 		@note mod of NIST_P192 is fast
 	*/
-	template<bool isFullBit>
-	static void fp2Dbl_mulPreTW(Fp2DblT& z, const Fp2& x, const Fp2& y)
+	static void fp2Dbl_mulPreW(Fp2DblT& z, const Fp2& x, const Fp2& y)
 	{
+		assert(!Fp::getOp().isFullBit);
 		const Fp& a = x.a;
 		const Fp& b = x.b;
 		const Fp& c = y.a;
@@ -746,36 +747,21 @@ struct Fp2DblT {
 		FpDbl& d1 = z.b;
 		FpDbl d2;
 		Fp s, t;
-		if (isFullBit) {
-			Fp::add(s, a, b);
-			Fp::add(t, c, d);
-		} else {
-			Fp::addPre(s, a, b);
-			Fp::addPre(t, c, d);
-		}
+		Fp::addPre(s, a, b);
+		Fp::addPre(t, c, d);
 		FpDbl::mulPre(d1, s, t); // (a + b)(c + d)
 		FpDbl::mulPre(d0, a, c);
 		FpDbl::mulPre(d2, b, d);
-		if (isFullBit) {
-			FpDbl::sub(d1, d1, d0); // (a + b)(c + d) - ac
-			FpDbl::sub(d1, d1, d2); // (a + b)(c + d) - ac - bd
-		} else {
-			FpDbl::subPre(d1, d1, d0);
-			FpDbl::subPre(d1, d1, d2);
-		}
+		FpDbl::subPre(d1, d1, d0);
+		FpDbl::subPre(d1, d1, d2);
 		FpDbl::sub(d0, d0, d2); // ac - bd
 	}
-	template<bool isFullBit>
 	static void fp2Dbl_sqrPreW(Fp2DblT& y, const Fp2& x)
 	{
+		assert(!Fp::getOp().isFullBit);
 		Fp t1, t2;
-		if (isFullBit) {
-			Fp::add(t1, x.b, x.b); // 2b
-			Fp::add(t2, x.a, x.b); // a + b
-		} else {
-			Fp::addPre(t1, x.b, x.b); // 2b
-			Fp::addPre(t2, x.a, x.b); // a + b
-		}
+		Fp::addPre(t1, x.b, x.b); // 2b
+		Fp::addPre(t2, x.a, x.b); // a + b
 		FpDbl::mulPre(y.b, t1, x.a); // 2ab
 		Fp::sub(t1, x.a, x.b); // a - b
 		FpDbl::mulPre(y.a, t1, t2); // (a + b)(a - b)
@@ -1008,15 +994,6 @@ struct Fp6DblT {
 		Fp2Dbl::sub(z.b, x.b, y.b);
 		Fp2Dbl::sub(z.c, x.c, y.c);
 	}
-	/*
-		imaginary part of Fp2Dbl::mul uses only add,
-		so it does not require mod.
-	*/
-	static void specialSub(Fp2Dbl& y, const Fp2Dbl& x)
-	{
-		FpDbl::sub(y.a, y.a, x.a);
-		FpDbl::subPre(y.b, y.b, x.b);
-	}
 	/*
 		x = a + bv + cv^2, y = d + ev + fv^2, v^3 = xi
 		xy = (ad + (bf + ce)xi) + ((ae + bd) + cf xi)v + ((af + cd) + be)v^2
@@ -1052,12 +1029,12 @@ struct Fp6DblT {
 		Fp2Dbl::mulPre(BE, b, e);
 		Fp2Dbl::mulPre(CF, c, f);
 		Fp2Dbl::mulPre(AD, a, d);
-		specialSub(ZA, BE);
-		specialSub(ZA, CF);
-		specialSub(ZB, AD);
-		specialSub(ZB, BE);
-		specialSub(ZC, AD);
-		specialSub(ZC, CF);
+		Fp2Dbl::subSpecial(ZA, BE);
+		Fp2Dbl::subSpecial(ZA, CF);
+		Fp2Dbl::subSpecial(ZB, AD);
+		Fp2Dbl::subSpecial(ZB, BE);
+		Fp2Dbl::subSpecial(ZC, AD);
+		Fp2Dbl::subSpecial(ZC, CF);
 		Fp2Dbl::mul_xi(ZA, ZA);
 		Fp2Dbl::add(ZA, ZA, AD);
 		Fp2Dbl::mul_xi(CF, CF);
diff --git a/test/common_test.hpp b/test/common_test.hpp
index 40f67c1f..338e7d3a 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -135,8 +135,8 @@ void testABCDsub(const Fp2& a, const Fp2& b, const Fp2& c, const Fp2& d)
 	Fp2Dbl::mulPre(T1, t1, t2);
 	Fp2Dbl::mulPre(AC, a, c);
 	Fp2Dbl::mulPre(BD, b, d);
-	Fp6Dbl::specialSub(T1, AC);
-	Fp6Dbl::specialSub(T1, BD);
+	Fp2Dbl::subSpecial(T1, AC);
+	Fp2Dbl::subSpecial(T1, BD);
 	Fp2Dbl::mod(t1, T1);
 	CYBOZU_TEST_EQUAL(t1, a * d + b * c);
 }

From 847e97664ca23236b6f05b41aa506a984a1ef889 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Feb 2021 10:36:25 +0900
Subject: [PATCH 461/553] remove unused code

---
 include/mcl/fp_tower.hpp | 37 +++----------------------------------
 src/fp_generator.hpp     |  2 +-
 2 files changed, 4 insertions(+), 35 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 455538c4..800bf556 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -504,38 +504,6 @@ class Fp2T : public fp::Serializable<Fp2T<_Fp>,
 		Fp::mul2(y.a, x.a);
 		Fp::mul2(y.b, x.b);
 	}
-#if 0
-	/*
-		x = a + bi, y = c + di, i^2 = -1
-		z = xy = (a + bi)(c + di) = (ac - bd) + (ad + bc)i
-		ad+bc = (a + b)(c + d) - ac - bd
-		# of mod = 3
-	*/
-	static void fp2_mulW(Unit *z, const Unit *x, const Unit *y)
-	{
-		const Fp *px = reinterpret_cast<const Fp*>(x);
-		const Fp *py = reinterpret_cast<const Fp*>(y);
-		const Fp& a = px[0];
-		const Fp& b = px[1];
-		const Fp& c = py[0];
-		const Fp& d = py[1];
-		Fp *pz = reinterpret_cast<Fp*>(z);
-		Fp t1, t2, ac, bd;
-		Fp::add(t1, a, b);
-		Fp::add(t2, c, d);
-		t1 *= t2; // (a + b)(c + d)
-		Fp::mul(ac, a, c);
-		Fp::mul(bd, b, d);
-		Fp::sub(pz[0], ac, bd); // ac - bd
-		Fp::sub(pz[1], t1, ac);
-		pz[1] -= bd;
-	}
-	static void fp2_mulNFW(Fp2T& z, const Fp2T& x, const Fp2T& y)
-	{
-		const fp::Op& op = Fp::op_;
-		op.fp2_mulNF((Unit*)&z, (const Unit*)&x, (const Unit*)&y, op.p);
-	}
-#endif
 	static void mulC(Fp2T& z, const Fp2T& x, const Fp2T& y)
 	{
 		Fp2Dbl d;
@@ -941,6 +909,8 @@ struct Fp6T : public fp::Serializable<Fp6T<_Fp>,
 	*/
 	static void inv(Fp6T& y, const Fp6T& x)
 	{
+// 8.5Kclk
+//clk.begin();
 		const Fp2& a = x.a;
 		const Fp2& b = x.b;
 		const Fp2& c = x.c;
@@ -970,6 +940,7 @@ struct Fp6T : public fp::Serializable<Fp6T<_Fp>,
 		Fp2::mul(y.a, p.a, q);
 		Fp2::mul(y.b, p.b, q);
 		Fp2::mul(y.c, p.c, q);
+//clk.end();
 	}
 };
 
@@ -1005,7 +976,6 @@ struct Fp6DblT {
 	*/
 	static void mulPre(Fp6DblT& z, const Fp6& x, const Fp6& y)
 	{
-//clk.begin();
 		const Fp2& a = x.a;
 		const Fp2& b = x.b;
 		const Fp2& c = x.c;
@@ -1040,7 +1010,6 @@ struct Fp6DblT {
 		Fp2Dbl::mul_xi(CF, CF);
 		Fp2Dbl::add(ZB, ZB, CF);
 		Fp2Dbl::add(ZC, ZC, BE);
-//clk.end();
 	}
 	static void mod(Fp6& y, const Fp6Dbl& x)
 	{
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index f3d14c00..2cf52298 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -3455,7 +3455,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 				t.append(gp1);
 				t.append(gp2);
 			}
-			assert(t.size() == pn_ * 2);
+			assert((int)t.size() == pn_ * 2);
 
 			mov(gp0, ptr [z]);
 			load_rm(t, gp0 + FpByte_ * 2);

From e261605998381b2f45a97d159b8aa252f9eb158a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Feb 2021 11:09:06 +0900
Subject: [PATCH 462/553] check it later

---
 src/fp_generator.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 2cf52298..1bb535b2 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -3475,7 +3475,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	void2u gen_fp2Dbl_sqrPre()
 	{
 		if (isFullBit_) return 0;
-		if (pn_ > 6) return 0;
+		if (pn_ != 4 && pn_ != 6) return 0;
 		void2u func = getCurr<void2u>();
 		const RegExp y = rsp + 0 * 8;
 		const RegExp x = rsp + 1 * 8;

From 35a39d27e270d1eb681da176fe4f0bbc72a03c56 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Feb 2021 11:47:39 +0900
Subject: [PATCH 463/553] v1.35

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 76cf7f0b..096c613c 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x134; /* 0xABC = A.BC */
+static const int version = 0x135; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From 0d01560a1492eeb0a8c99c9ee15413edd9abe365 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Feb 2021 15:25:25 +0900
Subject: [PATCH 464/553] a little optimize Fp6::inv

---
 include/mcl/fp_tower.hpp | 48 ++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 800bf556..5abcf35b 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -909,38 +909,42 @@ struct Fp6T : public fp::Serializable<Fp6T<_Fp>,
 	*/
 	static void inv(Fp6T& y, const Fp6T& x)
 	{
-// 8.5Kclk
-//clk.begin();
 		const Fp2& a = x.a;
 		const Fp2& b = x.b;
 		const Fp2& c = x.c;
-		Fp2 aa, bb, cc, ab, bc, ac;
-		Fp2::sqr(aa, a);
-		Fp2::sqr(bb, b);
-		Fp2::sqr(cc, c);
-		Fp2::mul(ab, a, b);
-		Fp2::mul(bc, b, c);
-		Fp2::mul(ac, c, a);
+		Fp2Dbl aa, bb, cc, ab, bc, ac;
+		Fp2Dbl::sqrPre(aa, a);
+		Fp2Dbl::sqrPre(bb, b);
+		Fp2Dbl::sqrPre(cc, c);
+		Fp2Dbl::mulPre(ab, a, b);
+		Fp2Dbl::mulPre(bc, b, c);
+		Fp2Dbl::mulPre(ac, c, a);
 
 		Fp6T p;
-		Fp2::mul_xi(p.a, bc);
-		Fp2::sub(p.a, aa, p.a); // a^2 - bc xi
-		Fp2::mul_xi(p.b, cc);
-		p.b -= ab; // c^2 xi - ab
-		Fp2::sub(p.c, bb, ac); // b^2 - ac
-		Fp2 q, t;
-		Fp2::mul(q, p.b, c);
-		Fp2::mul(t, p.c, b);
-		q += t;
-		Fp2::mul_xi(q, q);
-		Fp2::mul(t, p.a, a);
-		q += t;
+		Fp2Dbl T;
+		Fp2Dbl::mul_xi(T, bc);
+		Fp2Dbl::sub(T, aa, T); // a^2 - bc xi
+		Fp2Dbl::mod(p.a, T);
+		Fp2Dbl::mul_xi(T, cc);
+		Fp2Dbl::sub(T, T, ab); // c^2 xi - ab
+		Fp2Dbl::mod(p.b, T);
+		Fp2Dbl::sub(T, bb, ac); // b^2 - ac
+		Fp2Dbl::mod(p.c, T);
+
+		Fp2Dbl T2;
+		Fp2Dbl::mulPre(T, p.b, c);
+		Fp2Dbl::mulPre(T2, p.c, b);
+		Fp2Dbl::add(T, T, T2);
+		Fp2Dbl::mul_xi(T, T);
+		Fp2Dbl::mulPre(T2, p.a, a);
+		Fp2Dbl::addPre(T, T, T2);
+		Fp2 q;
+		Fp2Dbl::mod(q, T);
 		Fp2::inv(q, q);
 
 		Fp2::mul(y.a, p.a, q);
 		Fp2::mul(y.b, p.b, q);
 		Fp2::mul(y.c, p.c, q);
-//clk.end();
 	}
 };
 

From 5d703a98a80e81dc7de797c219b18819ee56a708 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Feb 2021 17:05:54 +0900
Subject: [PATCH 465/553] update Fp12::inv

---
 include/mcl/fp_tower.hpp | 85 +++++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 40 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 5abcf35b..417db266 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -866,38 +866,11 @@ struct Fp6T : public fp::Serializable<Fp6T<_Fp>,
 		Fp2::mul2(y.b, x.b);
 		Fp2::mul2(y.c, x.c);
 	}
-	/*
-		x = a + bv + cv^2, v^3 = xi
-		x^2 = (a^2 + 2bc xi) + (c^2 xi + 2ab)v + (b^2 + 2ac)v^2
-
-		b^2 + 2ac = (a + b + c)^2 - a^2 - 2bc - c^2 - 2ab
-	*/
 	static void sqr(Fp6T& y, const Fp6T& x)
 	{
-		const Fp2& a = x.a;
-		const Fp2& b = x.b;
-		const Fp2& c = x.c;
-		Fp2 t;
-		Fp2Dbl BC2, AB2, AA, CC, T;
-		Fp2::mul2(t, b);
-		Fp2Dbl::mulPre(BC2, t, c); // 2bc
-		Fp2Dbl::mulPre(AB2, t, a); // 2ab
-		Fp2Dbl::sqrPre(AA, a);
-		Fp2Dbl::sqrPre(CC, c);
-		Fp2::add(t, a, b);
-		Fp2::add(t, t, c);
-		Fp2Dbl::sqrPre(T, t); // (a + b + c)^2
-		Fp2Dbl::sub(T, T, AA);
-		Fp2Dbl::sub(T, T, BC2);
-		Fp2Dbl::sub(T, T, CC);
-		Fp2Dbl::sub(T, T, AB2);
-		Fp2Dbl::mod(y.c, T);
-		Fp2Dbl::mul_xi(BC2, BC2);
-		Fp2Dbl::add(AA, AA, BC2);
-		Fp2Dbl::mod(y.a, AA);
-		Fp2Dbl::mul_xi(CC, CC);
-		Fp2Dbl::add(CC, CC, AB2);
-		Fp2Dbl::mod(y.b, CC);
+		Fp6Dbl XX;
+		Fp6Dbl::sqrPre(XX, x);
+		Fp6Dbl::mod(y, XX);
 	}
 	static inline void mul(Fp6T& z, const Fp6T& x, const Fp6T& y);
 	/*
@@ -1015,6 +988,36 @@ struct Fp6DblT {
 		Fp2Dbl::add(ZB, ZB, CF);
 		Fp2Dbl::add(ZC, ZC, BE);
 	}
+	/*
+		x = a + bv + cv^2, v^3 = xi
+		x^2 = (a^2 + 2bc xi) + (c^2 xi + 2ab)v + (b^2 + 2ac)v^2
+
+		b^2 + 2ac = (a + b + c)^2 - a^2 - 2bc - c^2 - 2ab
+	*/
+	static void sqrPre(Fp6DblT& y, const Fp6& x)
+	{
+		const Fp2& a = x.a;
+		const Fp2& b = x.b;
+		const Fp2& c = x.c;
+		Fp2 t;
+		Fp2Dbl BC2, AB2, AA, CC, T;
+		Fp2::mul2(t, b);
+		Fp2Dbl::mulPre(BC2, t, c); // 2bc
+		Fp2Dbl::mulPre(AB2, t, a); // 2ab
+		Fp2Dbl::sqrPre(AA, a);
+		Fp2Dbl::sqrPre(CC, c);
+		Fp2::add(t, a, b);
+		Fp2::add(t, t, c);
+		Fp2Dbl::sqrPre(T, t); // (a + b + c)^2
+		Fp2Dbl::sub(T, T, AA);
+		Fp2Dbl::sub(T, T, BC2);
+		Fp2Dbl::sub(T, T, CC);
+		Fp2Dbl::sub(y.c, T, AB2);
+		Fp2Dbl::mul_xi(BC2, BC2);
+		Fp2Dbl::add(y.a, AA, BC2);
+		Fp2Dbl::mul_xi(CC, CC);
+		Fp2Dbl::add(y.b, CC, AB2);
+	}
 	static void mod(Fp6& y, const Fp6Dbl& x)
 	{
 		Fp2Dbl::mod(y.a, x.a);
@@ -1169,16 +1172,18 @@ struct Fp12T : public fp::Serializable<Fp12T<Fp>,
 	{
 		const Fp6& a = x.a;
 		const Fp6& b = x.b;
-		Fp6 t0, t1;
-		Fp6::sqr(t0, a);
-		Fp6::sqr(t1, b);
-		Fp2::mul_xi(t1.c, t1.c);
-		t0.a -= t1.c;
-		t0.b -= t1.a;
-		t0.c -= t1.b; // t0 = a^2 - b^2v
-		Fp6::inv(t0, t0);
-		Fp6::mul(y.a, x.a, t0);
-		Fp6::mul(y.b, x.b, t0);
+		Fp6Dbl AA, BB;
+		Fp6Dbl::sqrPre(AA, a);
+		Fp6Dbl::sqrPre(BB, b);
+		Fp2Dbl::mul_xi(BB.c, BB.c);
+		Fp2Dbl::sub(AA.a, AA.a, BB.c);
+		Fp2Dbl::sub(AA.b, AA.b, BB.a);
+		Fp2Dbl::sub(AA.c, AA.c, BB.b); // a^2 - b^2 v
+		Fp6 t;
+		Fp6Dbl::mod(t, AA);
+		Fp6::inv(t, t);
+		Fp6::mul(y.a, x.a, t);
+		Fp6::mul(y.b, x.b, t);
 		Fp6::neg(y.b, y.b);
 	}
 	/*

From 285b44546ae46eaf136037e01c259ff0d4eafa67 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Feb 2021 17:09:03 +0900
Subject: [PATCH 466/553] move Fp6::mul

---
 include/mcl/fp_tower.hpp | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp
index 417db266..5f2fa596 100644
--- a/include/mcl/fp_tower.hpp
+++ b/include/mcl/fp_tower.hpp
@@ -872,7 +872,12 @@ struct Fp6T : public fp::Serializable<Fp6T<_Fp>,
 		Fp6Dbl::sqrPre(XX, x);
 		Fp6Dbl::mod(y, XX);
 	}
-	static inline void mul(Fp6T& z, const Fp6T& x, const Fp6T& y);
+	static inline void mul(Fp6T& z, const Fp6T& x, const Fp6T& y)
+	{
+		Fp6Dbl XY;
+		Fp6Dbl::mulPre(XY, x, y);
+		Fp6Dbl::mod(z, XY);
+	}
 	/*
 		x = a + bv + cv^2, v^3 = xi
 		y = 1/x = p/q where
@@ -1026,14 +1031,6 @@ struct Fp6DblT {
 	}
 };
 
-template<class Fp>
-inline void Fp6T<Fp>::mul(Fp6T<Fp>& z, const Fp6T<Fp>& x, const Fp6T<Fp>& y)
-{
-	Fp6DblT<Fp> Z;
-	Fp6DblT<Fp>::mulPre(Z, x, y);
-	Fp6DblT<Fp>::mod(z, Z);
-}
-
 /*
 	Fp12T = Fp6[w] / (w^2 - v)
 	x = a + b w

From bc4ed309e301d63fe80d277d472f23789b0d5b2f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Feb 2021 10:47:42 +0900
Subject: [PATCH 467/553] fast Fp4::sqrPre

---
 src/fp_generator.hpp   | 74 ++++++++++++++++++++++++++++++++++++++++--
 test/fp_tower_test.cpp |  4 +++
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 1bb535b2..3c6c6134 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1936,12 +1936,83 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mul2x2(px, py, t4, t3, t2, t1, t0);
 		store_mr(pz, Pack(t3, t2, t1, t0));
 	}
+	/*
+		(3, 3)(2, 2)(1, 1)(0, 0)
+		   t5 t4 t3 t2 t1 t0
+		   (3, 2)(2, 1)(1, 0)x2
+		      (3, 1)(2, 0)x2
+		         (3, 0)x2
+	*/
+	void sqrPre4NF(const Reg64& py, const Reg64& px, const Pack& t)
+	{
+		const Reg64& t0 = t[0];
+		const Reg64& t1 = t[1];
+		const Reg64& t2 = t[2];
+		const Reg64& t3 = t[3];
+		const Reg64& t4 = t[4];
+		const Reg64& t5 = t[5];
+		const Reg64& x0 = t[6];
+		const Reg64& x1 = t[7];
+		const Reg64& x2 = t[8];
+		const Reg64& x3 = t[9];
+		const Reg64& H = t[10];
+
+		load_rm(Pack(x3, x2, x1, x0), px);
+		mov(rdx, x0);
+		mulx(t3, t2, x3); // (3, 0)
+		mulx(rax, t1, x2); // (2, 0)
+		add(t2, rax);
+		mov(rdx, x1);
+		mulx(t4, rax, x3); // (3, 1)
+		adc(t3, rax);
+		adc(t4, 0); // [t4:t3:t2:t1]
+		mulx(rax, t0, x0); // (1, 0)
+		add(t1, rax);
+		mulx(rdx, rax, x2); // (2, 1)
+		adc(t2, rax);
+		adc(t3, rdx);
+		mov(rdx, x3);
+		mulx(t5, rax, x2); // (3, 2)
+		adc(t4, rax);
+		adc(t5, 0);
+
+		shl1(Pack(t5, t4, t3, t2, t1, t0), &H);
+		mov(rdx, x0);
+		mulx(rdx, rax, rdx);
+		mov(ptr[py + 8 * 0], rax);
+		add(rdx, t0);
+		mov(ptr[py + 8 * 1], rdx);
+		mov(rdx, x1);
+		mulx(rdx, rax, rdx);
+		adc(rax, t1);
+		mov(ptr[py + 8 * 2], rax);
+		adc(rdx, t2);
+		mov(ptr[py + 8 * 3], rdx);
+		mov(rdx, x2);
+		mulx(rdx, rax, rdx);
+		adc(rax, t3);
+		mov(ptr[py + 8 * 4], rax);
+		adc(rdx, t4);
+		mov(ptr[py + 8 * 5], rdx);
+		mov(rdx, x3);
+		mulx(rdx, rax, rdx);
+		adc(rax, t5);
+		mov(ptr[py + 8 * 6], rax);
+		adc(rdx, H);
+		mov(ptr[py + 8 * 7], rdx);
+	}
 	/*
 		py[7..0] = px[3..0] ^ 2
 		use xmm0
 	*/
-	void sqrPre4(const RegExp& py, const RegExp& px, const Pack& t)
+	void sqrPre4(const Reg64& py, const Reg64& px, const Pack& t)
 	{
+#if 1
+		if (useMulx_ && useAdx_) {
+			sqrPre4NF(py, px, t);
+			return;
+		}
+#endif
 		const Reg64& t0 = t[0];
 		const Reg64& t1 = t[1];
 		const Reg64& t2 = t[2];
@@ -2250,7 +2321,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	/*
 		@input (z, xy)
 		z[5..0] <- montgomery reduction(x[11..0])
-		use xm0, xm1, xm2
 	*/
 	void gen_fpDbl_mod6(const Reg64& z, const Reg64& xy, const Pack& t)
 	{
diff --git a/test/fp_tower_test.cpp b/test/fp_tower_test.cpp
index 3a456189..d9ca03bb 100644
--- a/test/fp_tower_test.cpp
+++ b/test/fp_tower_test.cpp
@@ -453,11 +453,15 @@ void testAll()
 		"0x0000000000000001000000000000000000000000000000000000000000000085", // min prime
 		"0x2523648240000001ba344d80000000086121000000000013a700000000000013",
 		"0x7523648240000001ba344d80000000086121000000000013a700000000000017",
+		// max prime less than 2**256/4
+		"0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0b",
 		"0x800000000000000000000000000000000000000000000000000000000000005f",
 		"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff43", // max prime
 #if MCL_MAX_BIT_SIZE >= 384
 		// N = 6
 		"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab",
+		// max prime less than 2**384/4
+		"0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff97",
 		"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff",
 #endif
 #if MCL_MAX_BIT_SIZE >= 768

From 6f5801b104bbc8b4b1fd6b606bd429bc7add0df6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Feb 2021 13:31:06 +0900
Subject: [PATCH 468/553] clean

---
 src/fp_generator.hpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 3c6c6134..63498f0b 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1383,20 +1383,20 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		h = 0 or 1
 		use rax, t0
 	*/
-	void mulAdd2(const Reg64& h, const Pack& c, int n, const RegExp& px, const Reg64& t0, const Reg64 *cc = 0, bool updateCarry = true)
+	void mulAdd2(const Reg64& h, const Pack& c, const RegExp& px, const Reg64& t0, const Reg64 *cc = 0, bool updateCarry = true)
 	{
 		assert(!isFullBit_);
 		const Reg64& a = rax;
 		xor_(h, h); // h = 0
-		for (int i = 0; i < n; i++) {
+		for (int i = 0; i < pn_; i++) {
 			mulx(t0, a, ptr [px + i * 8]);
 			adox(c[i], a);
-			if (i == n - 1) break;
+			if (i == pn_ - 1) break;
 			adcx(c[i + 1], t0);
 		}
 		adox(t0, h); // no carry
 		if (cc) adox(t0, *cc); // no carry
-		adcx(c[n], t0);
+		adcx(c[pn_], t0);
 		if (updateCarry) adc(h, h);
 	}
 	/*
@@ -2343,33 +2343,33 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		load_rm(Pack(t6, t5, t4, t3, t2, t1, t0), xy);
 		mov(d, rp_);
 		imul(d, t0); // q
-		mulAdd2(t7, Pack(t6, t5, t4, t3, t2, t1, t0), 6, pp, t8);
+		mulAdd2(t7, Pack(t6, t5, t4, t3, t2, t1, t0), pp, t8);
 		// t7 : carry, [t6:t5:t4:t3:t2:t1:t0] += p * q
 
 		mov(d, rp_);
 		imul(d, t1);
 		mov(t0, ptr[xy + 7 * 8]);
-		mulAdd2(t9, Pack(t0, t6, t5, t4, t3, t2, t1), 6, pp, t8, &t7);
+		mulAdd2(t9, Pack(t0, t6, t5, t4, t3, t2, t1), pp, t8, &t7);
 
 		mov(d, rp_);
 		imul(d, t2);
 		mov(t1, ptr[xy + 8 * 8]);
-		mulAdd2(t7, Pack(t1, t0, t6, t5, t4, t3, t2), 6, pp, t8, &t9);
+		mulAdd2(t7, Pack(t1, t0, t6, t5, t4, t3, t2), pp, t8, &t9);
 
 		mov(d, rp_);
 		imul(d, t3);
 		mov(t2, ptr[xy + 9 * 8]);
-		mulAdd2(t9, Pack(t2, t1, t0, t6, t5, t4, t3), 6, pp, t8, &t7);
+		mulAdd2(t9, Pack(t2, t1, t0, t6, t5, t4, t3), pp, t8, &t7);
 
 		mov(d, rp_);
 		imul(d, t4);
 		mov(t3, ptr[xy + 10 * 8]);
-		mulAdd2(t7, Pack(t3, t2, t1, t0, t6, t5, t4), 6, pp, t8, &t9);
+		mulAdd2(t7, Pack(t3, t2, t1, t0, t6, t5, t4), pp, t8, &t9);
 
 		mov(d, rp_);
 		imul(d, t5);
 		mov(t4, ptr[xy + 11 * 8]);
-		mulAdd2(t9, Pack(t4, t3, t2, t1, t0, t6, t5), 6, pp, t8, &t7, false);
+		mulAdd2(t9, Pack(t4, t3, t2, t1, t0, t6, t5), pp, t8, &t7, false);
 
 		// z = [t4:t3:t2:t1:t0:t6]
 		Pack zp = Pack(t4, t3, t2, t1, t0, t6);

From 66c2852761013432e62d7b272e9401d00e0cd386 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Feb 2021 14:05:55 +0900
Subject: [PATCH 469/553] fast 256-bit Fp::mod

---
 src/fp_generator.hpp | 56 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 63498f0b..c8398731 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1039,14 +1039,62 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		cmovc_rr(Pack(t9, t8, t4), Pack(t2, t1, t10));
 		store_mr(z, Pack(t9, t8, t4));
 	}
+	void gen_fpDbl_mod4NF(const Reg64& z, const Reg64& xy, const Pack& t)
+	{
+		assert(!isFullBit_);
+		const Reg64& t0 = t[0];
+		const Reg64& t1 = t[1];
+		const Reg64& t2 = t[2];
+		const Reg64& t3 = t[3];
+		const Reg64& t4 = t[4];
+		const Reg64& t5 = t[5];
+		const Reg64& t6 = t[6];
+		const Reg64& t7 = t[7];
+
+		const Reg64& d = rdx;
+		const Reg64& pp = t[8];
+		lea(pp, ptr[rip + pL_]);
+
+		load_rm(Pack(t4, t3, t2, t1, t0), xy);
+		mov(d, rp_);
+		imul(d, t0); // q
+		mulAdd2(t5, Pack(t4, t3, t2, t1, t0), pp, t6);
+		// t5 : carry, [t4:t3:t2:t1:t0] += p * q
+
+		mov(d, rp_);
+		imul(d, t1);
+		mov(t0, ptr[xy + 5 * 8]);
+		mulAdd2(t7, Pack(t0, t4, t3, t2, t1), pp, t6, &t5);
+
+		mov(d, rp_);
+		imul(d, t2);
+		mov(t1, ptr[xy + 6 * 8]);
+		mulAdd2(t5, Pack(t1, t0, t4, t3, t2), pp, t6, &t7);
+
+		mov(d, rp_);
+		imul(d, t3);
+		mov(t2, ptr[xy + 7 * 8]);
+		mulAdd2(t7, Pack(t2, t1, t0, t4, t3), pp, t6, &t5, false);
+
+		Pack zp = Pack(t2, t1, t0, t4);
+		Pack keep = Pack(t7, t6, t5, t3);
+		mov_rr(keep, zp);
+		sub_rm(zp, pp); // z -= p
+		cmovc_rr(zp, keep);
+		store_mr(z, zp);
+	}
 	/*
 		@input (z, xy)
 		z[3..0] <- montgomery reduction(x[7..0])
 		@note destroy rax, rdx, t0, ..., t10, xm0, xm1
 		xm2 if isFullBit_
 	*/
-	void gen_fpDbl_mod4(const Reg64& z, const Reg64& xy, const Pack& t, const Reg64& t10)
+	void gen_fpDbl_mod4(const Reg64& z, const Reg64& xy, const Pack& t)
 	{
+		if (!isFullBit_) {
+			gen_fpDbl_mod4NF(z, xy, t);
+			return;
+		}
 		const Reg64& t0 = t[0];
 		const Reg64& t1 = t[1];
 		const Reg64& t2 = t[2];
@@ -1057,6 +1105,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& t7 = t[7];
 		const Reg64& t8 = t[8];
 		const Reg64& t9 = t[9];
+		const Reg64& t10 = t[10];
 
 		const Reg64& a = rax;
 		const Reg64& d = rdx;
@@ -1183,7 +1232,9 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			call(fpDbl_modL);
 			sf.close();
 		L(fpDbl_modL);
-			gen_fpDbl_mod4(gp0, gp1, sf.t, gp2);
+			Pack t = sf.t;
+			t.append(gp2);
+			gen_fpDbl_mod4(gp0, gp1, t);
 			ret();
 			return func;
 		}
@@ -2371,7 +2422,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t4, ptr[xy + 11 * 8]);
 		mulAdd2(t9, Pack(t4, t3, t2, t1, t0, t6, t5), pp, t8, &t7, false);
 
-		// z = [t4:t3:t2:t1:t0:t6]
 		Pack zp = Pack(t4, t3, t2, t1, t0, t6);
 		Pack keep = Pack(t5, xy, rax, rdx, t7, t8);
 		mov_rr(keep, zp);

From 35db3f90b3be29edbc0e3b4abc0d683be062c558 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Feb 2021 14:15:52 +0900
Subject: [PATCH 470/553] use fqrPre + mod for N = 4

---
 src/fp_generator.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index c8398731..6a592442 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1267,7 +1267,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			return func;
 		}
 		if (pn_ == 4 && useMulx_) {
-#if 1
+#if 0
 			// sqr(y, x) = mul(y, x, x)
 #ifdef XBYAK64_WIN
 			mov(r8, rdx);
@@ -1275,7 +1275,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			mov(rdx, rsi);
 #endif
 			jmp((const void*)op_->fp_mulA_);
-#else // (sqrPre + mod) is slower than mul
+#else // (sqrPre + mod) is faster than mul
 			StackFrame sf(this, 3, 10 | UseRDX, 8 * 8);
 			Pack t = sf.t;
 			t.append(sf.p[2]);

From a314e6a54b2ae919d4447b374e4a82d4af1b76da Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 25 Feb 2021 15:29:25 +0900
Subject: [PATCH 471/553] remove unused code

---
 src/fp_generator.hpp | 44 --------------------------------------------
 1 file changed, 44 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 6a592442..546ff3dc 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -881,12 +881,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 #endif
 			return func;
 		}
-#if 0
-		if (pn_ <= 9) {
-			gen_montMulN(p_, rp_, pn_);
-			return func;
-		}
-#endif
 		return 0;
 	}
 	/*
@@ -1308,44 +1302,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		}
 		return 0;
 	}
-	/*
-		input (pz[], px[], py[])
-		z[] <- montgomery(x[], y[])
-	*/
-	void gen_montMulN(const uint64_t *p, uint64_t pp, int n)
-	{
-		assert(1 <= pn_ && pn_ <= 9);
-		const int regNum = useMulx_ ? 4 : 3 + (std::min)(n - 1, 7);
-		const int stackSize = (n * 3 + (isFullBit_ ? 2 : 1)) * 8;
-		StackFrame sf(this, 3, regNum | UseRDX, stackSize);
-		const Reg64& pz = sf.p[0];
-		const Reg64& px = sf.p[1];
-		const Reg64& py = sf.p[2];
-		const Reg64& y = sf.t[0];
-		const Reg64& pAddr = sf.t[1];
-		const Reg64& t = sf.t[2];
-		Pack remain = sf.t.sub(3);
-		size_t rspPos = 0;
-
-		MixPack pw1(remain, rspPos, n - 1);
-		const RegExp pw2 = rsp + rspPos; // pw2[0..n-1]
-		const RegExp pc = pw2 + n * 8; // pc[0..n+1]
-		mov(pAddr, (size_t)p);
-
-		for (int i = 0; i < n; i++) {
-			mov(y, ptr [py + i * 8]);
-			montgomeryN_1(pp, n, pc, px, y, pAddr, t, pw1, pw2, i == 0);
-		}
-		// pz[] = pc[] - p[]
-		gen_raw_sub(pz, pc, pAddr, t, n);
-		if (isFullBit_) sbb(qword[pc + n * 8], 0);
-		jnc("@f");
-		for (int i = 0; i < n; i++) {
-			mov(t, ptr [pc + i * 8]);
-			mov(ptr [pz + i * 8], t);
-		}
-	L("@@");
-	}
 	/*
 		input (z, x, y) = (p0, p1, p2)
 		z[0..3] <- montgomery(x[0..3], y[0..3])

From 95f2d4afffcf1e545da9fbbf1bd7af1748d3bf67 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Feb 2021 11:55:37 +0900
Subject: [PATCH 472/553] check undefined label

---
 src/fp_generator.hpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 546ff3dc..9c9ac86d 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -319,6 +319,13 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		init_inner(op);
 		// ToDo : recover op if false
 		if (Xbyak::GetError()) return false;
+#ifndef NDEBUG
+		if (hasUndefinedLabel()) {
+			fprintf(stderr, "fp_generator has bugs.\n");
+			exit(1);
+			return false;
+		}
+#endif
 //		printf("code size=%d\n", (int)getSize());
 #ifndef MCL_DUMP_JIT
 		setProtectModeRE(); // set read/exec memory

From 5cad43bb102f8734b936f472777ec331b7468a7b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Feb 2021 11:56:11 +0900
Subject: [PATCH 473/553] make mulPre, sqrPre for mul/sqr

---
 src/fp_generator.hpp | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 9c9ac86d..6538d6a0 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -415,7 +415,17 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			gen_preInv();
 			setFuncInfo(prof_, suf, "_preInv", op.fp_preInv, getCurr());
 		}
+
+		// call from Fp::mul and Fp::sqr
+		align(16);
+		gen_fpDbl_mulPre(op.fpDbl_mulPre);
+		setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPre, getCurr());
+
+		align(16);
+		gen_fpDbl_sqrPre(op.fpDbl_sqrPre);
+		setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr());
 		if (op.xi_a == 0) return; // Fp2 is not used
+
 		align(16);
 		op.fpDbl_addA_ = gen_fpDbl_add();
 		setFuncInfo(prof_, suf, "Dbl_add", op.fpDbl_addA_, getCurr());
@@ -432,14 +442,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		op.fpDbl_subPre = gen_addSubPre(false, pn_ * 2);
 		setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr());
 
-		align(16);
-		gen_fpDbl_mulPre(op.fpDbl_mulPre);
-		setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPre, getCurr());
-
-		align(16);
-		gen_fpDbl_sqrPre(op.fpDbl_sqrPre);
-		setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr());
-
 		align(16);
 		op.fp2_addA_ = gen_fp2_add();
 		setFuncInfo(prof_, suf, "2_add", op.fp2_addA_, getCurr());

From b7a47dc519ec36fcd4286959a6f9703cf25a84bf Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Feb 2021 11:56:30 +0900
Subject: [PATCH 474/553] try mulPre + mod but it is slower

---
 src/fp_generator.hpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 6538d6a0..eaa706cf 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1321,6 +1321,19 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		StackFrame sf(this, 3, 10 | UseRDX, 0, false);
 		call(fp_mulL);
 		sf.close();
+#if 0 // slower than mont
+	L(fp_mulL);
+		int stackSize = 8 * 8 /* xy */ + 8;
+		sub(rsp, stackSize);
+		mov(ptr[rsp], gp0); // save z
+		lea(gp0, ptr[rsp + 8]);
+		call(mulPreL); // stack <- x * y
+		mov(gp0, ptr[rsp]);
+		lea(gp1, ptr[rsp + 8]);
+		call(fpDbl_modL); // z <- stack
+		add(rsp, stackSize);
+		ret();
+#else
 		const Reg64& p0 = sf.p[0];
 		const Reg64& p1 = sf.p[1];
 		const Reg64& p2 = sf.p[2];
@@ -1370,6 +1383,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		vmovq(p0, xm0); // load p0
 		store_mr(p0, Pack(t3, t2, t1, t0));
 		ret();
+#endif
 	}
 	/*
 		c[n..0] = c[n-1..0] + px[n-1..0] * rdx if is_cn_zero = true
@@ -2453,6 +2467,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			*/
 			StackFrame sf(this, 3, 10 | UseRDX, 0, false);
 			mulPre4(gp0, gp1, gp2, sf.t);
+//			call(mulPreL);
 			sf.close(); // make epilog
 		L(mulPreL); // called only from asm code
 			mulPre4(gp0, gp1, gp2, sf.t);

From e1fc81a5512761bdc19b10b870783d13f63a806c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Feb 2021 17:25:58 +0900
Subject: [PATCH 475/553] fast Fp::sqrPre

---
 src/fp_generator.hpp | 138 +++++++++++++++++++++++++++++++++++--------
 test/bench.hpp       |   1 -
 2 files changed, 112 insertions(+), 27 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index eaa706cf..7dbb05bf 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -2091,38 +2091,124 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		store_mr(py + 2 * 8, Pack(d, t8, t10, t9, t3, t2));
 	}
 	/*
-		py[11..0] = px[5..0] ^ 2
-		use rax, rdx, stack[6 * 8]
+		(5, 5)(4, 4)(3, 3)(2, 2)(1, 1)(0, 0)
+		   t9 t8 t7 t6 t5 t4 t3 t2 t1 t0
+		   (5, 4)(4, 3)(3, 2)(2, 1)(1, 0)
+		      (5, 3)(4, 2)(3, 1)(2, 0)
+		         (5, 2)(4, 1)(3, 0)
+		            (5, 1)(4, 0)
+		               (5, 0)
 	*/
 	void sqrPre6(const RegExp& py, const RegExp& px, const Pack& t)
 	{
 		const Reg64& t0 = t[0];
 		const Reg64& t1 = t[1];
 		const Reg64& t2 = t[2];
-		/*
-			(aN + b)^2 = a^2 N^2 + 2ab N + b^2
-		*/
-		sqrPre3(py, px, t); // [py] <- b^2
-		sqrPre3(py + 6 * 8, px + 3 * 8, t); // [py + 6 * 8] <- a^2
-		mulPre3(rsp, px, px + 3 * 8, t); // ab
-		Pack ab = t.sub(0, 6);
-		load_rm(ab, rsp);
-		xor_(rax, rax);
-		for (int i = 0; i < 6; i++) {
-			if (i == 0) {
-				add(ab[i], ab[i]);
-			} else {
-				adc(ab[i], ab[i]);
-			}
-		}
-		adc(rax, rax);
-		add_rm(ab, py + 3 * 8);
-		store_mr(py + 3 * 8, ab);
-		load_rm(Pack(t2, t1, t0), py + 9 * 8);
-		adc(t0, rax);
-		adc(t1, 0);
-		adc(t2, 0);
-		store_mr(py + 9 * 8, Pack(t2, t1, t0));
+		const Reg64& t3 = t[3];
+		const Reg64& t4 = t[4];
+		const Reg64& t5 = t[5];
+		const Reg64& t6 = t[6];
+		const Reg64& t7 = t[7];
+		const Reg64& t8 = t[8];
+		const Reg64& t9 = t[9];
+		const Reg64& H = t[10];
+
+		mov(rdx, ptr[px + 8 * 0]);
+		mulx(t5, t4, ptr[px + 8 * 5]); // [t5:t4] = (5, 0)
+		mulx(rax, t3, ptr[px + 8 * 4]); // (4, 0)
+		add(t4, rax);
+		mov(rdx, ptr[px + 8 * 1]);
+		mulx(t6, rax, ptr[px + 8 * 5]); // (5, 1)
+		adc(t5, rax);
+		adc(t6, 0); // [t6:t5:t4:t3]
+		mov(rdx, ptr[px + 8 * 0]);
+		mulx(rax, t2, ptr[px + 8 * 3]);
+		add(t3, rax);
+		mov(rdx, ptr[px + 8 * 1]);
+		mulx(H, rax, ptr[px + 8 * 4]);
+		adc(t4, rax);
+		adc(t5, H);
+		mov(rdx, ptr[px + 8 * 2]);
+		mulx(t7, rax, ptr[px + 8 * 5]);
+		adc(t6, rax);
+		adc(t7, 0); // [t7:...:t2]
+
+		mov(rdx, ptr[px + 8 * 0]);
+		mulx(H, t1, ptr[px + 8 * 2]);
+		adc(t2, H);
+		mov(rdx, ptr[px + 8 * 1]);
+		mulx(H, rax, ptr[px + 8 * 3]);
+		adc(t3, rax);
+		adc(t4, H);
+		mov(rdx, ptr[px + 8 * 2]);
+		mulx(H, rax, ptr[px + 8 * 4]);
+		adc(t5, rax);
+		adc(t6, H);
+		mov(rdx, ptr[px + 8 * 3]);
+		mulx(t8, rax, ptr[px + 8 * 5]);
+		adc(t7, rax);
+		adc(t8, 0); // [t8:...:t1]
+		mov(rdx, ptr[px + 8 * 0]);
+		mulx(H, t0, ptr[px + 8 * 1]);
+		add(t1, H);
+		mov(rdx, ptr[px + 8 * 1]);
+		mulx(H, rax, ptr[px + 8 * 2]);
+		adc(t2, rax);
+		adc(t3, H);
+		mov(rdx, ptr[px + 8 * 2]);
+		mulx(H, rax, ptr[px + 8 * 3]);
+		adc(t4, rax);
+		adc(t5, H);
+		mov(rdx, ptr[px + 8 * 3]);
+		mulx(H, rax, ptr[px + 8 * 4]);
+		adc(t6, rax);
+		adc(t7, H);
+		mov(rdx, ptr[px + 8 * 4]);
+		mulx(t9, rax, ptr[px + 8 * 5]);
+		adc(t8, rax);
+		adc(t9, 0); // [t9...:t0]
+		shl1(Pack(t9, t8, t7, t6, t5, t4, t3, t2, t1, t0), &H);
+
+		mov(rdx, ptr[px + 8 * 0]);
+		mulx(rdx, rax, rdx);
+		mov(ptr[py + 8 * 0], rax);
+		add(t0, rdx);
+		mov(ptr[py + 8 * 1], t0);
+
+		mov(rdx, ptr[px + 8 * 1]);
+		mulx(rdx, rax, rdx);
+		adc(t1, rax);
+		mov(ptr[py + 8 * 2], t1);
+		adc(t2, rdx);
+		mov(ptr[py + 8 * 3], t2);
+
+		mov(rdx, ptr[px + 8 * 2]);
+		mulx(rdx, rax, rdx);
+		adc(t3, rax);
+		mov(ptr[py + 8 * 4], t3);
+		adc(t4, edx);
+		mov(ptr[py + 8 * 5], t4);
+
+		mov(rdx, ptr[px + 8 * 3]);
+		mulx(rdx, rax, rdx);
+		adc(t5, rax);
+		mov(ptr[py + 8 * 6], t5);
+		adc(t6, rdx);
+		mov(ptr[py + 8 * 7], t6);
+
+		mov(rdx, ptr[px + 8 * 4]);
+		mulx(rdx, rax, rdx);
+		adc(t7, rax);
+		mov(ptr[py + 8 * 8], t7);
+		adc(t8, rdx);
+		mov(ptr[py + 8 * 9], t8);
+
+		mov(rdx, ptr[px + 8 * 5]);
+		mulx(rdx, rax, rdx);
+		adc(t9, rax);
+		mov(ptr[py + 8 * 10], t9);
+		adc(rdx, H);
+		mov(ptr[py + 8 * 11], rdx);
 	}
 	/*
 		pz[7..0] <- px[3..0] * py[3..0]
diff --git a/test/bench.hpp b/test/bench.hpp
index 69aff500..9a28db7e 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -168,7 +168,6 @@ void testBench(const G1& P, const G2& Q)
 	CYBOZU_BENCH_C("GT::sqr       ", C2, GT::sqr, e1, e1);
 	CYBOZU_BENCH_C("GT::inv       ", C2, GT::inv, e1, e1);
 #endif
-	CYBOZU_BENCH_C("FpDbl::mulPre ", C3, FpDbl::mulPre, d0, x, y);
 	CYBOZU_BENCH_C("pairing       ", 3000, pairing, e1, P, Q);
 	CYBOZU_BENCH_C("millerLoop    ", 3000, millerLoop, e1, P, Q);
 	CYBOZU_BENCH_C("finalExp      ", 3000, finalExp, e1, e1);

From 70e9032ac0df781d14a18ddae2bed85907aad912 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Feb 2021 18:13:58 +0900
Subject: [PATCH 476/553] define fp_sqrPreL

---
 src/fp_generator.hpp | 224 ++++++++++++++++++++++++-------------------
 1 file changed, 123 insertions(+), 101 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 7dbb05bf..ecd022c6 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -249,7 +249,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	const mcl::fp::Op *op_;
 	Label pL_; // pointer to p
 	// the following labels assume sf(this, 3, 10 | UseRDX)
-	Label mulPreL;
+	Label fp_mulPreL;
+	Label fp_sqrPreL;
 	Label fpDbl_modL;
 	Label fp_mulL;
 	Label fp2Dbl_mulPreL;
@@ -397,17 +398,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		align(16);
 		op.fpDbl_modA_ = gen_fpDbl_mod(op);
 		setFuncInfo(prof_, suf, "Dbl_mod", op.fpDbl_modA_, getCurr());
-		align(16);
-		op.fp_mulA_ = gen_mul();
-		setFuncInfo(prof_, suf, "_mul", op.fp_mulA_, getCurr());
-
-		if (op.fp_mulA_) {
-			op.fp_mul = fp::func_ptr_cast<void4u>(op.fp_mulA_); // used in toMont/fromMont
-		}
-
-		align(16);
-		op.fp_sqrA_ = gen_sqr();
-		setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr());
 
 		if (op.primeMode != PM_NIST_P192 && op.N <= 6) { // support general op.N but not fast for op.N > 4
 			align(16);
@@ -424,6 +414,17 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		align(16);
 		gen_fpDbl_sqrPre(op.fpDbl_sqrPre);
 		setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr());
+		align(16);
+		op.fp_mulA_ = gen_mul();
+		setFuncInfo(prof_, suf, "_mul", op.fp_mulA_, getCurr());
+
+		if (op.fp_mulA_) {
+			op.fp_mul = fp::func_ptr_cast<void4u>(op.fp_mulA_); // used in toMont/fromMont
+		}
+
+		align(16);
+		op.fp_sqrA_ = gen_sqr();
+		setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr());
 		if (op.xi_a == 0) return; // Fp2 is not used
 
 		align(16);
@@ -874,7 +875,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			// a little faster
 			gen_montMul6();
 #else
-			if (mulPreL.getAddress() == 0 || fpDbl_modL.getAddress() == 0) return 0;
+			if (fp_mulPreL.getAddress() == 0 || fpDbl_modL.getAddress() == 0) return 0;
 			StackFrame sf(this, 3, 10 | UseRDX, 12 * 8);
 			/*
 				use xm3
@@ -883,7 +884,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			*/
 			vmovq(xm3, gp0);
 			mov(gp0, rsp);
-			call(mulPreL); // gp0, x, y
+			call(fp_mulPreL); // gp0, x, y
 			vmovq(gp0, xm3);
 			mov(gp1, rsp);
 			call(fpDbl_modL);
@@ -1290,24 +1291,29 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			return func;
 		}
 		if (pn_ == 6 && !isFullBit_ && useMulx_ && useAdx_) {
-			if (fpDbl_modL.getAddress() == 0) return 0;
-			StackFrame sf(this, 3, 10 | UseRDX, (12 + 6) * 8);
-			/*
-				use xm3
-				rsp
-				[6 * 8, (12 + 6) * 8) ; sqrPre(x, x)
-				[0..6 * 8) ; stack for sqrPre6
-			*/
-			vmovq(xm3, gp0);
+#if 1
+			StackFrame sf(this, 3, 10 | UseRDX);
 			Pack t = sf.t;
 			t.append(sf.p[2]);
-			// sqrPre6 uses 6 * 8 bytes stack
-			sqrPre6(rsp + 6 * 8, sf.p[1], t);
-			mov(gp0, ptr[rsp + (12 + 6) * 8]);
-			vmovq(gp0, xm3);
-			lea(gp1, ptr[rsp + 6 * 8]);
+			int stackSize = 12 * 8 + 8;
+			sub(rsp, stackSize);
+			mov(ptr[rsp], gp0);
+			lea(gp0, ptr[rsp + 8]);
+			call(fp_sqrPreL);
+			mov(gp0, ptr[rsp]);
+			lea(gp1, ptr[rsp + 8]);
 			call(fpDbl_modL);
+			add(rsp, stackSize);
 			return func;
+#else
+			StackFrame sf(this, 3, 10 | UseRDX, 12 * 8);
+			Pack t = sf.t;
+			t.append(sf.p[2]);
+			sqrPre6(rsp, sf.p[1], t);
+			lea(gp1, ptr[rsp]);
+			call(fpDbl_modL);
+			return func;
+#endif
 		}
 		return 0;
 	}
@@ -1327,7 +1333,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		sub(rsp, stackSize);
 		mov(ptr[rsp], gp0); // save z
 		lea(gp0, ptr[rsp + 8]);
-		call(mulPreL); // stack <- x * y
+		call(fp_mulPreL); // stack <- x * y
 		mov(gp0, ptr[rsp]);
 		lea(gp1, ptr[rsp + 8]);
 		call(fpDbl_modL); // z <- stack
@@ -2496,78 +2502,94 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void gen_fpDbl_sqrPre(void2u& f)
 	{
+		if (!(useMulx_ && useAdx_)) return;
 		void2u func = getCurr<void2u>();
-		if (pn_ == 2 && useMulx_) {
-			StackFrame sf(this, 2, 7 | UseRDX);
-			sqrPre2(sf.p[0], sf.p[1], sf.t);
-			f = func;
-		}
-		if (pn_ == 3) {
-			StackFrame sf(this, 3, 10 | UseRDX);
-			Pack t = sf.t;
-			t.append(sf.p[2]);
-			sqrPre3(sf.p[0], sf.p[1], t);
-			f = func;
-		}
-		if (pn_ == 4 && useMulx_) {
-			StackFrame sf(this, 3, 10 | UseRDX);
-			Pack t = sf.t;
-			t.append(sf.p[2]);
-			sqrPre4(sf.p[0], sf.p[1], t);
-			f = func;
-		}
-		if (pn_ == 6 && useMulx_ && useAdx_) {
-			StackFrame sf(this, 3, 10 | UseRDX, 6 * 8);
-			Pack t = sf.t;
-			t.append(sf.p[2]);
-			sqrPre6(sf.p[0], sf.p[1], t);
-			f = func;
+		switch (pn_) {
+		case 2:
+			{
+				StackFrame sf(this, 2, 7 | UseRDX);
+				sqrPre2(sf.p[0], sf.p[1], sf.t);
+				f = func;
+			}
+			break;
+		case 3:
+			{
+				StackFrame sf(this, 3, 10 | UseRDX);
+				Pack t = sf.t;
+				t.append(sf.p[2]);
+				sqrPre3(sf.p[0], sf.p[1], t);
+				f = func;
+			}
+			break;
+		case 4:
+			{
+				StackFrame sf(this, 3, 10 | UseRDX);
+				Pack t = sf.t;
+				t.append(sf.p[2]);
+				sqrPre4(sf.p[0], sf.p[1], t);
+				f = func;
+			}
+			break;
+		case 6:
+			{
+				StackFrame sf(this, 3, 10 | UseRDX);
+				call(fp_sqrPreL);
+				sf.close();
+			L(fp_sqrPreL);
+				Pack t = sf.t;
+				t.append(sf.p[2]);
+				sqrPre6(sf.p[0], sf.p[1], t);
+				ret();
+				f = func;
+			}
+			break;
 		}
-#if 0
-#ifdef XBYAK64_WIN
-		mov(r8, rdx);
-#else
-		mov(rdx, rsi);
-#endif
-		jmp((void*)op.fpDbl_mulPreA_);
-		return func;
-#endif
 	}
 	void gen_fpDbl_mulPre(void3u& f)
 	{
+		if (!(useMulx_ && useAdx_)) return;
 		void3u func = getCurr<void3u>();
-		if (pn_ == 2 && useMulx_) {
-			StackFrame sf(this, 3, 5 | UseRDX);
-			mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t);
-			f = func;
-		}
-		if (pn_ == 3) {
-			StackFrame sf(this, 3, 10 | UseRDX);
-			mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t);
-			f = func;
-		}
-		if (pn_ == 4) {
-			/*
-				fpDbl_mulPre is available as C function
-				this function calls mulPreL directly.
-			*/
-			StackFrame sf(this, 3, 10 | UseRDX, 0, false);
-			mulPre4(gp0, gp1, gp2, sf.t);
-//			call(mulPreL);
-			sf.close(); // make epilog
-		L(mulPreL); // called only from asm code
-			mulPre4(gp0, gp1, gp2, sf.t);
-			ret();
-			f = func;
-		}
-		if (pn_ == 6 && useAdx_) {
-			StackFrame sf(this, 3, 10 | UseRDX, 0, false);
-			call(mulPreL);
-			sf.close(); // make epilog
-		L(mulPreL); // called only from asm code
-			mulPre6(sf.t);
-			ret();
-			f = func;
+		switch (pn_) {
+		case 2:
+			{
+				StackFrame sf(this, 3, 5 | UseRDX);
+				mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t);
+				f = func;
+			}
+			break;
+		case 3:
+			{
+				StackFrame sf(this, 3, 10 | UseRDX);
+				mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t);
+				f = func;
+			}
+			break;
+		case 4:
+			{
+				/*
+					fpDbl_mulPre is available as C function
+					this function calls fp_mulPreL directly.
+				*/
+				StackFrame sf(this, 3, 10 | UseRDX, 0, false);
+				mulPre4(gp0, gp1, gp2, sf.t);
+	//			call(fp_mulPreL);
+				sf.close(); // make epilog
+			L(fp_mulPreL); // called only from asm code
+				mulPre4(gp0, gp1, gp2, sf.t);
+				ret();
+				f = func;
+			}
+			break;
+		case 6:
+			{
+				StackFrame sf(this, 3, 10 | UseRDX, 0, false);
+				call(fp_mulPreL);
+				sf.close(); // make epilog
+			L(fp_mulPreL); // called only from asm code
+				mulPre6(sf.t);
+				ret();
+				f = func;
+			}
 		}
 	}
 	static inline void debug_put_inner(const uint64_t *ptr, int n)
@@ -3610,7 +3632,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		} else {
 			lea(gp1, ptr [s]);
 			lea(gp2, ptr [t]);
-			call(mulPreL);
+			call(fp_mulPreL);
 		}
 		// d0 = z.a = a c
 		mov(gp0, ptr [z]);
@@ -3619,7 +3641,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		if (embedded) {
 			mulPre4(gp0, gp1, gp2, sf.t);
 		} else {
-			call(mulPreL);
+			call(fp_mulPreL);
 		}
 		// d2 = z.b = b d
 		mov(gp1, ptr [x]);
@@ -3630,7 +3652,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			mulPre4(d2, gp1, gp2, sf.t);
 		} else {
 			lea(gp0, ptr [d2]);
-			call(mulPreL);
+			call(fp_mulPreL);
 		}
 
 		{
@@ -3667,7 +3689,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const RegExp x = rsp + 1 * 8;
 		const Ext1 t1(FpByte_, rsp, 2 * 8);
 		const Ext1 t2(FpByte_, rsp, t1.next);
-		// use mulPreL then use 3
+		// use fp_mulPreL then use 3
 		StackFrame sf(this, 3 /* not 2 */, 10 | UseRDX, t2.next);
 		mov(ptr [y], gp0);
 		mov(ptr [x], gp1);
@@ -3695,13 +3717,13 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		add(gp0, FpByte_ * 2);
 		lea(gp1, ptr [t1]);
 		mov(gp2, ptr [x]);
-		call(mulPreL);
+		call(fp_mulPreL);
 		mov(gp0, ptr [x]);
 		gen_raw_fp_sub(t1, gp0, gp0 + FpByte_, t, false);
 		mov(gp0, ptr [y]);
 		lea(gp1, ptr [t1]);
 		lea(gp2, ptr [t2]);
-		call(mulPreL);
+		call(fp_mulPreL);
 		return func;
 	}
 	void2u gen_fp2Dbl_mul_xi()

From 00ee0e81510f8e9a589f1efb22c21cbe09c82d2f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Feb 2021 18:34:42 +0900
Subject: [PATCH 477/553] skip bench2 in debug

---
 test/she_test.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/she_test.cpp b/test/she_test.cpp
index f49f29f9..1b6feec2 100644
--- a/test/she_test.cpp
+++ b/test/she_test.cpp
@@ -45,6 +45,7 @@ CYBOZU_TEST_AUTO(bench2)
 {
 #ifndef NDEBUG
 	puts("skip bench2 in debug");
+	return;
 #endif
 	puts("msec");
 	setTryNum(1 << 16);

From 4fb3fec3db24c1b180d78b6fe4af2b483b8c1a80 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Feb 2021 18:41:18 +0900
Subject: [PATCH 478/553] v1.36

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 096c613c..13fd0c52 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x135; /* 0xABC = A.BC */
+static const int version = 0x136; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index f557f75e..a1345843 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+- improve performance
 - support M1 mac
 - dst for mapToG1 has changed to `BLS_SIG_BLS12381G1_XMD:SHA-256_SSWU_RO_POP_`.
 - `mclBn_eth*` functions are removed.

From 952638edc3af420dc5aa24be6d318e42eab2814d Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 27 Feb 2021 13:36:05 +0900
Subject: [PATCH 479/553] use Fp::mul2 for ec

---
 include/mcl/ec.hpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp
index d8a0fc18..13bfe5f1 100644
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@@ -301,7 +301,7 @@ void dblJacobi(E& R, const E& P)
 	xy += xy; // 4xy^2
 	switch (E::specialA_) {
 	case Zero:
-		F::add(t, x2, x2);
+		F::mul2(t, x2);
 		x2 += t;
 		break;
 	case Minus3:
@@ -312,7 +312,7 @@ void dblJacobi(E& R, const E& P)
 			F::sqr(t, t);
 			x2 -= t;
 		}
-		F::add(t, x2, x2);
+		F::mul2(t, x2);
 		x2 += t;
 		break;
 	case GenericA:
@@ -325,7 +325,7 @@ void dblJacobi(E& R, const E& P)
 			t *= E::a_;
 		}
 		t += x2;
-		x2 += x2;
+		F::mul2(x2, x2);
 		x2 += t;
 		break;
 	}
@@ -337,12 +337,12 @@ void dblJacobi(E& R, const E& P)
 	} else {
 		F::mul(R.z, P.y, P.z);
 	}
-	R.z += R.z;
+	F::mul2(R.z, R.z);
 	F::sub(R.y, xy, R.x);
 	R.y *= x2;
-	y2 += y2;
-	y2 += y2;
-	y2 += y2;
+	F::mul2(y2, y2);
+	F::mul2(y2, y2);
+	F::mul2(y2, y2);
 	R.y -= y2;
 }
 

From 050a5770950c94660cf7d686f2aca4ed2c5ac091 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 28 Feb 2021 18:00:51 +0900
Subject: [PATCH 480/553] look prime behavior

---
 misc/snark-p.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 misc/snark-p.py

diff --git a/misc/snark-p.py b/misc/snark-p.py
new file mode 100644
index 00000000..8168f3bd
--- /dev/null
+++ b/misc/snark-p.py
@@ -0,0 +1,13 @@
+p=21888242871839275222246405745257275088696311157297823662689037894645226208583
+
+print("over 253 bit")
+for i in range (10):
+	print(i, (p * i) >> 253)
+
+def maxarg(x):
+	return x // p
+
+print("maxarg")
+for i in range(16):
+	print(i, maxarg(i << 253))
+

From 62b67cf838b6ecbccf61a2f358120d9ae2f32097 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 1 Mar 2021 20:39:57 +0900
Subject: [PATCH 481/553] add mulSmall

---
 include/mcl/fp.hpp       | 10 +++++
 include/mcl/gmp_util.hpp | 79 ++++++++++++++++++++++++++++++++++++++++
 include/mcl/op.hpp       |  1 +
 test/bench.hpp           |  4 ++
 test/common_test.hpp     | 16 ++++++++
 5 files changed, 110 insertions(+)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index d49b6bef..f53c7a65 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -509,6 +509,16 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 #endif
 	static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); }
 	static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); }
+	static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y)
+	{
+		assert(y <= op_.smallModp.maxMulN);
+		Unit xy[maxSize + 1];
+		op_.fp_mulUnitPre(xy, x.v_, y);
+		int v = op_.smallModp.approxMul(xy);
+		const Unit *pv = op_.smallModp.getPmul(v);
+		op_.fp_subPre(z.v_, xy, pv);
+		op_.fp_sub(z.v_, z.v_, op_.p, op_.p);
+	}
 	static inline void mulUnit(FpT& z, const FpT& x, const Unit y)
 	{
 		if (mulSmallUnit(z, x, y)) return;
diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index ed0880ba..9309bcd3 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -942,6 +942,85 @@ class SquareRoot {
 #endif
 };
 
+/*
+	x mod p for a small value x < (pMulTblN * p).
+*/
+struct SmallModp {
+	typedef mcl::fp::Unit Unit;
+	static const size_t unitBitSize = sizeof(Unit) * 8;
+	static const size_t maxTblSize = (MCL_MAX_BIT_SIZE + unitBitSize - 1) / unitBitSize + 1;
+	static const size_t maxMulN = 9;
+	static const size_t pMulTblN = maxMulN + 1;
+	int N_;
+	int shiftL_;
+	int shiftR_;
+	int maxIdx_;
+	// pMulTbl_[i] = (p * i) >> (pBitSize_ - 1)
+	Unit pMulTbl_[pMulTblN][maxTblSize];
+	// idxTbl_[x] = (x << (pBitSize_ - 1)) / p
+	int8_t idxTbl_[pMulTblN * 2];
+	// return x >> (pBitSize_ - 1)
+	SmallModp()
+		: N_(0)
+		, shiftL_(0)
+		, shiftR_(0)
+		, maxIdx_(0)
+		, pMulTbl_()
+		, idxTbl_()
+	{
+	}
+	// return argmax { i : x > i * p }
+	int approxMul(const Unit *x) const
+	{
+		int top = getTop(x);
+		assert(top <= maxIdx_);
+		return idxTbl_[top];
+	}
+	const Unit *getPmul(size_t v) const
+	{
+		assert(v < pMulTblN);
+		return pMulTbl_[v];
+	}
+	int getTop(const Unit *x) const
+	{
+		return (x[N_ - 1] >> shiftR_) | (x[N_] << shiftL_);
+	}
+	int cvtInt(const mpz_class& x) const
+	{
+		assert(mcl::gmp::getUnitSize(x) <= 1);
+		if (x == 0) {
+			return 0;
+		} else {
+			return int(mcl::gmp::getUnit(x)[0]);
+		}
+	}
+	void init(const mpz_class& p)
+	{
+		size_t pBitSize = mcl::gmp::getBitSize(p);
+		N_ = (pBitSize + unitBitSize - 1) / unitBitSize;
+		shiftR_ = (pBitSize - 1) % unitBitSize;
+		shiftL_ = unitBitSize - shiftR_;
+		mpz_class t = 0;
+		for (size_t i = 0; i < pMulTblN; i++) {
+			bool b;
+			mcl::gmp::getArray(&b, pMulTbl_[i], maxTblSize, t);
+			assert(b);
+			(void)b;
+			if (i == pMulTblN - 1) {
+				maxIdx_ = getTop(pMulTbl_[i]);
+				assert(maxIdx_ < CYBOZU_NUM_OF_ARRAY(idxTbl_));
+				break;
+			}
+			t += p;
+		}
+
+		for (int i = 0; i <= maxIdx_; i++) {
+			idxTbl_[i] = cvtInt((mpz_class(i) << (pBitSize - 1)) / p);
+		}
+	}
+};
+
+
 /*
 	Barrett Reduction
 	for non GMP version
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 13fd0c52..2dd358cc 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -191,6 +191,7 @@ struct Op {
 	uint32_t pmod4;
 	mcl::SquareRoot sq;
 	mcl::Modp modp;
+	mcl::SmallModp smallModp;
 	Unit half[maxUnitSize]; // (p + 1) / 2
 	Unit oneRep[maxUnitSize]; // 1(=inv R if Montgomery)
 	/*
diff --git a/test/bench.hpp b/test/bench.hpp
index 9a28db7e..8378674e 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -116,6 +116,10 @@ void testBench(const G1& P, const G2& Q)
 	CYBOZU_BENCH_C("Fp::sub       ", C3, Fp::sub, x, x, y);
 	CYBOZU_BENCH_C("Fp::add 2     ", C3, Fp::add, x, x, x);
 	CYBOZU_BENCH_C("Fp::mul2      ", C3, Fp::mul2, x, x);
+	CYBOZU_BENCH_C("Fp::mulSmall8 ", C3, Fp::mulSmall, x, x, 8);
+	CYBOZU_BENCH_C("Fp::mulUnit8  ", C3, Fp::mulUnit, x, x, 8);
+	CYBOZU_BENCH_C("Fp::mulSmall9 ", C3, Fp::mulSmall, x, x, 9);
+	CYBOZU_BENCH_C("Fp::mulUnit9  ", C3, Fp::mulUnit, x, x, 9);
 	CYBOZU_BENCH_C("Fp::neg       ", C3, Fp::neg, x, x);
 	CYBOZU_BENCH_C("Fp::mul       ", C3, Fp::mul, x, x, y);
 	CYBOZU_BENCH_C("Fp::sqr       ", C3, Fp::sqr, x, x);
diff --git a/test/common_test.hpp b/test/common_test.hpp
index 338e7d3a..74a745c2 100644
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@@ -183,8 +183,24 @@ void testFp2Dbl_mul_xi1()
 	}
 }
 
+void testMulSmall()
+{
+	puts("testMulSmall");
+	cybozu::XorShift rg;
+	for (int y = 0; y < 10; y++) {
+		for (int i = 0; i < 40; i++) {
+			Fp x, z1, z2;
+			x.setByCSPRNG(rg);
+			Fp::mulSmall(z1, x, y);
+			z2 = x * y;
+			CYBOZU_TEST_EQUAL(z1, z2);
+		}
+	}
+}
+
 void testCommon(const G1& P, const G2& Q)
 {
+	testMulSmall();
 	testFp2Dbl_mul_xi1();
 	testABCD();
 	testMul2();

From 2d55524eb44996653f640b478170897462cd1452 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Mar 2021 10:43:52 +0900
Subject: [PATCH 482/553] init smallModp

---
 src/fp.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fp.cpp b/src/fp.cpp
index 3d7eed31..9f3c47c2 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -639,6 +639,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 		if (!b) return false;
 	}
 	modp.init(mp);
+	smallModp.init(mp);
 	return fp::initForMont(*this, p, mode);
 }
 

From f52743fdbb487adb62e75a9e501d113fb2171009 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 2 Mar 2021 11:02:52 +0900
Subject: [PATCH 483/553] add Fp::mul9

---
 include/mcl/fp.hpp | 6 ++++++
 include/mcl/op.hpp | 2 ++
 test/bench.hpp     | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp
index f53c7a65..b85b6914 100644
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@@ -165,6 +165,8 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 		if (sqr == 0) sqr = sqrC;
 		mul2 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul2A_);
 		if (mul2 == 0) mul2 = mul2C;
+		mul9 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul9A_);
+		if (mul9 == 0) mul9 = mul9C;
 #endif
 		*pb = true;
 	}
@@ -499,6 +501,8 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 	static inline void sqrC(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
 	static void (*mul2)(FpT& y, const FpT& x);
 	static inline void mul2C(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
+	static void (*mul9)(FpT& y, const FpT& x);
+	static inline void mul9C(FpT& y, const FpT& x) { mulSmall(y, x, 9); }
 #else
 	static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); }
 	static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); }
@@ -506,6 +510,7 @@ class FpT : public fp::Serializable<FpT<tag, maxBitSize>,
 	static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); }
 	static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
 	static inline void mul2(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
+	static inline void mul9(FpT& y, const FpT& x) { mulSmall(y, x, 9); }
 #endif
 	static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); }
 	static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); }
@@ -756,6 +761,7 @@ template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::neg)(FpT& y,
 template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul)(FpT& z, const FpT& x, const FpT& y);
 template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sqr)(FpT& y, const FpT& x);
 template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul2)(FpT& y, const FpT& x);
+template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul9)(FpT& y, const FpT& x);
 #endif
 
 } // mcl
diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 2dd358cc..4fa89416 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -216,6 +216,7 @@ struct Op {
 	void3u fp_mulA_;
 	void2u fp_sqrA_;
 	void2u fp_mul2A_;
+	void2u fp_mul9A_;
 	void3u fp2_addA_;
 	void3u fp2_subA_;
 	void2u fp2_negA_;
@@ -305,6 +306,7 @@ struct Op {
 		fp_mulA_ = 0;
 		fp_sqrA_ = 0;
 		fp_mul2A_ = 0;
+		fp_mul9A_ = 0;
 		fp2_addA_ = 0;
 		fp2_subA_ = 0;
 		fp2_negA_ = 0;
diff --git a/test/bench.hpp b/test/bench.hpp
index 8378674e..11ced311 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -118,7 +118,7 @@ void testBench(const G1& P, const G2& Q)
 	CYBOZU_BENCH_C("Fp::mul2      ", C3, Fp::mul2, x, x);
 	CYBOZU_BENCH_C("Fp::mulSmall8 ", C3, Fp::mulSmall, x, x, 8);
 	CYBOZU_BENCH_C("Fp::mulUnit8  ", C3, Fp::mulUnit, x, x, 8);
-	CYBOZU_BENCH_C("Fp::mulSmall9 ", C3, Fp::mulSmall, x, x, 9);
+	CYBOZU_BENCH_C("Fp::mul9      ", C3, Fp::mul9, x, x);
 	CYBOZU_BENCH_C("Fp::mulUnit9  ", C3, Fp::mulUnit, x, x, 9);
 	CYBOZU_BENCH_C("Fp::neg       ", C3, Fp::neg, x, x);
 	CYBOZU_BENCH_C("Fp::mul       ", C3, Fp::mul, x, x, y);

From ef3643f5bc9c9fd5893a0b98d5f0c03b624e3acd Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 3 Mar 2021 16:47:13 +0900
Subject: [PATCH 484/553] use github action

---
 .github/workflows/main.yml | 13 +++++++++++++
 .travis.yml                | 17 -----------------
 readme.md                  |  2 +-
 3 files changed, 14 insertions(+), 18 deletions(-)
 create mode 100644 .github/workflows/main.yml
 delete mode 100644 .travis.yml

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 00000000..80ea7214
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,13 @@
+name: test
+on: [push]
+
+jobs:
+  build:
+    name: test
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - run: make test_ci DEBUG=1 -j3
+    - run: make clean
+    - run: make test_ci DEBUG=1 -j3 CXX=clang++
+
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 73a97e6a..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-sudo: true
-dist: trusty
-language: cpp
-compiler:
-  - gcc
-  - clang
-addons:
-  apt:
-    packages:
-      - libgmp-dev
-script:
-  - make test_ci DEBUG=1 -j3
-  - make clean
-  - make test_ci CFLAGS_USER=-DMCL_DONT_USE_XBYAK -j3
-  - make clean
-  - make test_go
- 
diff --git a/readme.md b/readme.md
index a1345843..5c9f7999 100644
--- a/readme.md
+++ b/readme.md
@@ -1,4 +1,4 @@
-[![Build Status](https://api.travis-ci.com/herumi/mcl.svg?branch=master)](https://travis-ci.com/github/herumi/mcl)
+[![Build Status](https://github.com/herumi/mcl/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/mcl/actions/workflows/main.yml)
 
 # mcl
 

From dbd58cc75761ec9f7b9348ef1b595e31887af177 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 3 Mar 2021 16:49:12 +0900
Subject: [PATCH 485/553] add test_go

---
 .github/workflows/main.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 80ea7214..575892f0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -10,4 +10,5 @@ jobs:
     - run: make test_ci DEBUG=1 -j3
     - run: make clean
     - run: make test_ci DEBUG=1 -j3 CXX=clang++
-
+    - run: make clean
+    - run: make test_go

From 8752e278edbff6e984f7d789cc5cb8e98d955168 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 3 Mar 2021 16:59:56 +0900
Subject: [PATCH 486/553] remove warning of gmp_util

---
 include/mcl/gmp_util.hpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index 9309bcd3..c5e9700a 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -952,13 +952,13 @@ struct SmallModp {
 	static const size_t maxMulN = 9;
 	static const size_t pMulTblN = maxMulN + 1;
 	int N_;
-	int shiftL_;
-	int shiftR_;
-	int maxIdx_;
+	uint32_t shiftL_;
+	uint32_t shiftR_;
+	uint32_t maxIdx_;
 	// pMulTbl_[i] = (p * i) >> (pBitSize_ - 1)
 	Unit pMulTbl_[pMulTblN][maxTblSize];
 	// idxTbl_[x] = (x << (pBitSize_ - 1)) / p
-	int8_t idxTbl_[pMulTblN * 2];
+	uint8_t idxTbl_[pMulTblN * 2];
 	// return x >> (pBitSize_ - 1)
 	SmallModp()
 		: N_(0)
@@ -970,9 +970,9 @@ struct SmallModp {
 	{
 	}
 	// return argmax { i : x > i * p }
-	int approxMul(const Unit *x) const
+	uint32_t approxMul(const Unit *x) const
 	{
-		int top = getTop(x);
+		uint32_t top = getTop(x);
 		assert(top <= maxIdx_);
 		return idxTbl_[top];
 	}
@@ -981,17 +981,17 @@ struct SmallModp {
 		assert(v < pMulTblN);
 		return pMulTbl_[v];
 	}
-	int getTop(const Unit *x) const
+	uint32_t getTop(const Unit *x) const
 	{
 		return (x[N_ - 1] >> shiftR_) | (x[N_] << shiftL_);
 	}
-	int cvtInt(const mpz_class& x) const
+	uint32_t cvtInt(const mpz_class& x) const
 	{
 		assert(mcl::gmp::getUnitSize(x) <= 1);
 		if (x == 0) {
 			return 0;
 		} else {
-			return int(mcl::gmp::getUnit(x)[0]);
+			return uint32_t(mcl::gmp::getUnit(x)[0]);
 		}
 	}
 	void init(const mpz_class& p)
@@ -1014,8 +1014,8 @@ struct SmallModp {
 			t += p;
 		}
 
-		for (int i = 0; i <= maxIdx_; i++) {
-			idxTbl_[i] = cvtInt((mpz_class(i) << (pBitSize - 1)) / p);
+		for (uint32_t i = 0; i <= maxIdx_; i++) {
+			idxTbl_[i] = cvtInt((mpz_class(int(i)) << (pBitSize - 1)) / p);
 		}
 	}
 };

From 98fc193f5f229a8dd11a333b5cf1e30c75592fd9 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 3 Mar 2021 18:22:59 +0900
Subject: [PATCH 487/553] use -O3 instead of -Ofast

---
 common.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common.mk b/common.mk
index 707aa35f..c42d1ca6 100644
--- a/common.mk
+++ b/common.mk
@@ -91,7 +91,7 @@ else
     CFLAGS_OPT+=-O3
   else
     ifeq ($(shell expr $(GCC_VER) \> 4.6.0),1)
-      CFLAGS_OPT+=-Ofast
+      CFLAGS_OPT+=-O3
     else
       CFLAGS_OPT+=-O3
     endif

From 1317ddc6a7dd39febaa919a06f10ff07ce7edcb1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Mar 2021 09:52:34 +0900
Subject: [PATCH 488/553] [java] add test to main.yml

---
 .github/workflows/main.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 575892f0..1aa177a6 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -12,3 +12,5 @@ jobs:
     - run: make test_ci DEBUG=1 -j3 CXX=clang++
     - run: make clean
     - run: make test_go
+    - run: sudo apt install openjdk-8-jdk
+    - run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include

From fd4b408e521451dd266fa797c526100b9f58cfb0 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Mar 2021 10:19:15 +0900
Subject: [PATCH 489/553] [ci] show dmesg if error

---
 .github/workflows/main.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 1aa177a6..201202a0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -7,7 +7,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - run: make test_ci DEBUG=1 -j3
+    - run: lscpu
+    - run: make test_ci DEBUG=1 -j3 || dmesg | tail
     - run: make clean
     - run: make test_ci DEBUG=1 -j3 CXX=clang++
     - run: make clean

From 922a935e266de6d6e7581cf199055d6c79fc57b7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Mar 2021 16:57:07 +0900
Subject: [PATCH 490/553] fix crash on haswell

---
 src/fp_generator.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index ecd022c6..45097f42 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -1095,7 +1095,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void gen_fpDbl_mod4(const Reg64& z, const Reg64& xy, const Pack& t)
 	{
-		if (!isFullBit_) {
+		if (!isFullBit_ && useMulx_ && useAdx_) {
 			gen_fpDbl_mod4NF(z, xy, t);
 			return;
 		}
@@ -2547,7 +2547,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void gen_fpDbl_mulPre(void3u& f)
 	{
-		if (!(useMulx_ && useAdx_)) return;
+		if (!useMulx_ || (pn_ == 6 && !useAdx_)) return;
 		void3u func = getCurr<void3u>();
 		switch (pn_) {
 		case 2:

From f52f4668ac334345d85488e012530e17a3f94522 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Mar 2021 17:04:24 +0900
Subject: [PATCH 491/553] remove warnings

---
 include/mcl/gmp_util.hpp |  4 ++--
 misc/snark-p.py          |  5 +++++
 src/fp_generator.hpp     | 20 ++++++++++----------
 test/bench.hpp           | 22 ++++++++++++----------
 4 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index c5e9700a..f3fcfa39 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -951,7 +951,7 @@ struct SmallModp {
 	static const size_t maxTblSize = (MCL_MAX_BIT_SIZE + unitBitSize - 1) / unitBitSize + 1;
 	static const size_t maxMulN = 9;
 	static const size_t pMulTblN = maxMulN + 1;
-	int N_;
+	uint32_t N_;
 	uint32_t shiftL_;
 	uint32_t shiftR_;
 	uint32_t maxIdx_;
@@ -997,7 +997,7 @@ struct SmallModp {
 	void init(const mpz_class& p)
 	{
 		size_t pBitSize = mcl::gmp::getBitSize(p);
-		N_ = (pBitSize + unitBitSize - 1) / unitBitSize;
+		N_ = uint32_t((pBitSize + unitBitSize - 1) / unitBitSize);
 		shiftR_ = (pBitSize - 1) % unitBitSize;
 		shiftL_ = unitBitSize - shiftR_;
 		mpz_class t = 0;
diff --git a/misc/snark-p.py b/misc/snark-p.py
index 8168f3bd..cbb7f5a3 100644
--- a/misc/snark-p.py
+++ b/misc/snark-p.py
@@ -11,3 +11,8 @@ def maxarg(x):
 for i in range(16):
 	print(i, maxarg(i << 253))
 
+
+x=0x2c130429c1d4802eb8703197d038ebd5109f96aee333bd027963094f5bb33ad
+
+y = x * 9
+print(hex(y))
diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 45097f42..d4209809 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -3656,25 +3656,25 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		}
 
 		{
-			Pack t = sf.t;
+			Pack t2 = sf.t;
 			if (pn_ == 4) {
-				t = t.sub(0, pn_ * 2);
+				t2 = t2.sub(0, pn_ * 2);
 			} else if (pn_ == 6) {
-				t.append(gp1);
-				t.append(gp2);
+				t2.append(gp1);
+				t2.append(gp2);
 			}
-			assert((int)t.size() == pn_ * 2);
+			assert((int)t2.size() == pn_ * 2);
 
 			mov(gp0, ptr [z]);
-			load_rm(t, gp0 + FpByte_ * 2);
-			sub_rm(t, gp0); // d1 -= d0
-			sub_rm(t, (RegExp)d2); // d1 -= d2
-			store_mr(gp0 + FpByte_ * 2, t);
+			load_rm(t2, gp0 + FpByte_ * 2);
+			sub_rm(t2, gp0); // d1 -= d0
+			sub_rm(t2, (RegExp)d2); // d1 -= d2
+			store_mr(gp0 + FpByte_ * 2, t2);
 
 			gen_raw_sub(gp0, gp0, d2, rax, pn_);
 			const RegExp& d0H = gp0 + pn_ * 8;
 			const RegExp& d2H = (RegExp)d2 + pn_ * 8;
-			gen_raw_fp_sub(d0H, d0H, d2H, t, true);
+			gen_raw_fp_sub(d0H, d0H, d2H, t2, true);
 		}
 		add(rsp, SS);
 		ret();
diff --git a/test/bench.hpp b/test/bench.hpp
index 11ced311..d407bc75 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -85,16 +85,18 @@ void testBench(const G1& P, const G2& Q)
 	const int C3 = 100000;
 #if 1
 	const int C2 = 3000;
-	mpz_class a = x.getMpz();
-	CYBOZU_BENCH_C("G1::mulCT     ", C, G1::mulCT, Pa, P, a);
-	CYBOZU_BENCH_C("G1::mul       ", C, G1::mul, Pa, Pa, a);
-	CYBOZU_BENCH_C("G1::add       ", C, G1::add, Pa, Pa, P);
-	CYBOZU_BENCH_C("G1::dbl       ", C, G1::dbl, Pa, Pa);
-	CYBOZU_BENCH_C("G2::mulCT     ", C, G2::mulCT, Qa, Q, a);
-	CYBOZU_BENCH_C("G2::mul       ", C, G2::mul, Qa, Qa, a);
-	CYBOZU_BENCH_C("G2::add       ", C, G2::add, Qa, Qa, Q);
-	CYBOZU_BENCH_C("G2::dbl       ", C, G2::dbl, Qa, Qa);
-	CYBOZU_BENCH_C("GT::pow       ", C, GT::pow, e1, e1, a);
+	{
+		mpz_class a = x.getMpz();
+		CYBOZU_BENCH_C("G1::mulCT     ", C, G1::mulCT, Pa, P, a);
+		CYBOZU_BENCH_C("G1::mul       ", C, G1::mul, Pa, Pa, a);
+		CYBOZU_BENCH_C("G1::add       ", C, G1::add, Pa, Pa, P);
+		CYBOZU_BENCH_C("G1::dbl       ", C, G1::dbl, Pa, Pa);
+		CYBOZU_BENCH_C("G2::mulCT     ", C, G2::mulCT, Qa, Q, a);
+		CYBOZU_BENCH_C("G2::mul       ", C, G2::mul, Qa, Qa, a);
+		CYBOZU_BENCH_C("G2::add       ", C, G2::add, Qa, Qa, Q);
+		CYBOZU_BENCH_C("G2::dbl       ", C, G2::dbl, Qa, Qa);
+		CYBOZU_BENCH_C("GT::pow       ", C, GT::pow, e1, e1, a);
+	}
 //	CYBOZU_BENCH_C("GT::powGLV    ", C, BN::param.glv2.pow, e1, e1, a);
 	G1 PP;
 	G2 QQ;

From 01c81c60ca97161dc718832e6336cb3f04f7f261 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Mar 2021 17:08:32 +0900
Subject: [PATCH 492/553] ci test on haswell

---
 .github/workflows/main.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 201202a0..5c2aecc0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -15,3 +15,6 @@ jobs:
     - run: make test_go
     - run: sudo apt install openjdk-8-jdk
     - run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include
+    - run: wget https://software.intel.com/content/dam/develop/external/us/en/documents/downloads/sde-external-8.63.0-2021-01-18-lin.tar.bz2
+    - run: bzip2 -dc sde-external-8.63.0-2021-01-18-lin.tar.bz2 | tar xvf -
+    - run: sde-external-8.63.0-2021-01-18-lin/sde64 -hsw -- bin/bn_test.exe

From 7d0087df69fa4a2db402f903f990a7013f81a038 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Mar 2021 17:24:27 +0900
Subject: [PATCH 493/553] fix main.yml

---
 .github/workflows/main.yml | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 5c2aecc0..29ef88e1 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -8,13 +8,15 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - run: lscpu
-    - run: make test_ci DEBUG=1 -j3 || dmesg | tail
+    - run: wget https://software.intel.com/content/dam/develop/external/us/en/documents/downloads/sde-external-8.63.0-2021-01-18-lin.tar.bz2
+    - run: bzip2 -dc sde-external-8.63.0-2021-01-18-lin.tar.bz2 | tar xvf -
+    - run: make bin/bn_test.exe -j4
+    - run: sde-external-8.63.0-2021-01-18-lin/sde64 -hsw -- bin/bn_test.exe
+    - run: make clean
+    - run: make test_ci DEBUG=1 -j4 || dmesg | tail
     - run: make clean
-    - run: make test_ci DEBUG=1 -j3 CXX=clang++
+    - run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail
     - run: make clean
     - run: make test_go
     - run: sudo apt install openjdk-8-jdk
     - run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include
-    - run: wget https://software.intel.com/content/dam/develop/external/us/en/documents/downloads/sde-external-8.63.0-2021-01-18-lin.tar.bz2
-    - run: bzip2 -dc sde-external-8.63.0-2021-01-18-lin.tar.bz2 | tar xvf -
-    - run: sde-external-8.63.0-2021-01-18-lin/sde64 -hsw -- bin/bn_test.exe

From b28f6088116b0166ddfd042934973b81a573b2d1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Mar 2021 17:30:34 +0900
Subject: [PATCH 494/553] ci test of bn_test with DEBUG=1

---
 .github/workflows/main.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 29ef88e1..b7c5964f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -10,9 +10,8 @@ jobs:
     - run: lscpu
     - run: wget https://software.intel.com/content/dam/develop/external/us/en/documents/downloads/sde-external-8.63.0-2021-01-18-lin.tar.bz2
     - run: bzip2 -dc sde-external-8.63.0-2021-01-18-lin.tar.bz2 | tar xvf -
-    - run: make bin/bn_test.exe -j4
+    - run: make bin/bn_test.exe DEBUG=1 -j4
     - run: sde-external-8.63.0-2021-01-18-lin/sde64 -hsw -- bin/bn_test.exe
-    - run: make clean
     - run: make test_ci DEBUG=1 -j4 || dmesg | tail
     - run: make clean
     - run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail

From b91dbed2e0f997fdcdfd3e2531a1e19364f89411 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 8 Mar 2021 17:45:52 +0900
Subject: [PATCH 495/553] disable ci test by sde

---
 .github/workflows/main.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index b7c5964f..74cafefd 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -8,10 +8,10 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - run: lscpu
-    - run: wget https://software.intel.com/content/dam/develop/external/us/en/documents/downloads/sde-external-8.63.0-2021-01-18-lin.tar.bz2
-    - run: bzip2 -dc sde-external-8.63.0-2021-01-18-lin.tar.bz2 | tar xvf -
-    - run: make bin/bn_test.exe DEBUG=1 -j4
-    - run: sde-external-8.63.0-2021-01-18-lin/sde64 -hsw -- bin/bn_test.exe
+#    - run: wget https://software.intel.com/content/dam/develop/external/us/en/documents/downloads/sde-external-8.63.0-2021-01-18-lin.tar.bz2
+#    - run: bzip2 -dc sde-external-8.63.0-2021-01-18-lin.tar.bz2 | tar xvf -
+#    - run: make bin/bn_test.exe DEBUG=1 -j4
+#    - run: sde-external-8.63.0-2021-01-18-lin/sde64 -hsw -- bin/bn_test.exe
     - run: make test_ci DEBUG=1 -j4 || dmesg | tail
     - run: make clean
     - run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail

From 48349a18bc7409145f0228b0c464269f0f812221 Mon Sep 17 00:00:00 2001
From: 0xflotus <0xflotus@gmail.com>
Date: Tue, 9 Mar 2021 16:20:25 +0100
Subject: [PATCH 496/553] fix: small errors

---
 misc/she/she-api.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/misc/she/she-api.md b/misc/she/she-api.md
index 37071bec..bf7772a6 100644
--- a/misc/she/she-api.md
+++ b/misc/she/she-api.md
@@ -1,6 +1,6 @@
 # she ; Two-level homomorphic encryption library for browser/Node.js by WebAssembly
 
-# Abstruct
+# Abstract
 she is a somewhat(two-level) homomorphic encryption library,
 which is based on pairings.
 This library supports polynomially many homomorphic additions and
@@ -31,7 +31,7 @@ Sum_i Enc(x_i) Enc(y_i) = Enc(Sum_i x_i y_i).
 * decrypt a ciphertext with a secret key
 
 ## Homomorphic operations
-* homomorphic addtion/substraction over ciphertexts of the same ciphertext class
+* homomorphic addition/subtraction over ciphertexts of the same ciphertext class
 * homomprphic multiplication over ciphertext of CipherTextG1 and CipherTextG2
     * The class of the result is CipherTextGT.
 
@@ -39,7 +39,7 @@ Sum_i Enc(x_i) Enc(y_i) = Enc(Sum_i x_i y_i).
 * This library requires to solve a small DLP to decrypt a ciphertext.
 * The decryption timing is O(m/s), where s is the size of table to solve DLP, and m is the size fo a plaintext.
 * call `setRangeForDLP(s)` to set the table size.
-    * The maximun `m/s` is set by `setTryNum(tryNum)`.
+    * The maximum `m/s` is set by `setTryNum(tryNum)`.
 
 ## Zero-knowledge proof class
 * A zero-knowledge proof is simultaneously created when encrypting a plaintext `m`.
@@ -66,7 +66,7 @@ and read `she.js`.
 
 ## A sample for JS
 
-```
+```js
 // initialize a library
 she.init().then(() => {
   const sec = new she.SecretKey()
@@ -103,7 +103,7 @@ she.init().then(() => {
 
 # A sample for C++
 How to build the library, see [mcl](https://github.com/herumi/mcl/#installation-requirements).
-```
+```c++
 #include <mcl/she.hpp>
 int main()
     try

From 4eb4b2bd36699889a3c5a3b5af7e856d359de9a2 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Mar 2021 16:32:45 +0900
Subject: [PATCH 497/553] [python] add makefile

---
 ffi/python/Makefile  | 9 +++++++++
 ffi/python/readme.md | 7 +++++++
 ffi/python/she.py    | 2 ++
 3 files changed, 18 insertions(+)
 create mode 100644 ffi/python/Makefile
 create mode 100644 ffi/python/readme.md

diff --git a/ffi/python/Makefile b/ffi/python/Makefile
new file mode 100644
index 00000000..1b4846f9
--- /dev/null
+++ b/ffi/python/Makefile
@@ -0,0 +1,9 @@
+include ../../common.mk
+
+SHE384_256_SLIB=libmclshe384_256.$(LIB_SUF)
+
+she_test: ../../lib/$(SHE384_256_SLIB)
+	env LD_LIBRARY_PATH=../../lib python3 she.py
+
+../../lib/$(SHE384_256_SLIB):
+	make -C ../../ lib/$(SHE384_256_SLIB)
diff --git a/ffi/python/readme.md b/ffi/python/readme.md
new file mode 100644
index 00000000..48479d14
--- /dev/null
+++ b/ffi/python/readme.md
@@ -0,0 +1,7 @@
+# sample for Python
+
+## SHE
+
+```
+make test_she
+```
diff --git a/ffi/python/she.py b/ffi/python/she.py
index 4234a676..538515fb 100644
--- a/ffi/python/she.py
+++ b/ffi/python/she.py
@@ -1,6 +1,7 @@
 import os
 import platform
 from ctypes import *
+#from ctypes.util import find_library
 
 BN254 = 0
 BLS12_381 = 5
@@ -34,6 +35,7 @@ def init(curveType=BN254):
 		libName = 'mclshe384_256.dll'
 	else:
 		raise RuntimeError("not support yet", name)
+#	lib = cdll.LoadLibrary(find_library(libName))
 	lib = cdll.LoadLibrary(libName)
 	ret = lib.sheInit(curveType, MCLBN_COMPILED_TIME_VAR)
 	if ret != 0:

From 317934bdfb885265f9de1a9b829ecc2e016d250a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Mar 2021 16:39:02 +0900
Subject: [PATCH 498/553] fix test

---
 ffi/python/Makefile  | 2 +-
 ffi/python/readme.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ffi/python/Makefile b/ffi/python/Makefile
index 1b4846f9..9d46068c 100644
--- a/ffi/python/Makefile
+++ b/ffi/python/Makefile
@@ -3,7 +3,7 @@ include ../../common.mk
 SHE384_256_SLIB=libmclshe384_256.$(LIB_SUF)
 
 she_test: ../../lib/$(SHE384_256_SLIB)
-	env LD_LIBRARY_PATH=../../lib python3 she.py
+	cd ../../lib && python3 ../ffi/python/she.py
 
 ../../lib/$(SHE384_256_SLIB):
 	make -C ../../ lib/$(SHE384_256_SLIB)
diff --git a/ffi/python/readme.md b/ffi/python/readme.md
index 48479d14..7f102bd3 100644
--- a/ffi/python/readme.md
+++ b/ffi/python/readme.md
@@ -3,5 +3,5 @@
 ## SHE
 
 ```
-make test_she
+make she_test
 ```

From e6cc4025e1908bf75fcd469f3dbd95d6db289fc3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 10 Mar 2021 16:43:09 +0900
Subject: [PATCH 499/553] [python] LD_LIBRARY_PATH is necessary for linux

---
 ffi/python/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ffi/python/Makefile b/ffi/python/Makefile
index 9d46068c..85d47973 100644
--- a/ffi/python/Makefile
+++ b/ffi/python/Makefile
@@ -3,7 +3,7 @@ include ../../common.mk
 SHE384_256_SLIB=libmclshe384_256.$(LIB_SUF)
 
 she_test: ../../lib/$(SHE384_256_SLIB)
-	cd ../../lib && python3 ../ffi/python/she.py
+	cd ../../lib && env LD_LIBRARY_PATH=./ python3 ../ffi/python/she.py
 
 ../../lib/$(SHE384_256_SLIB):
 	make -C ../../ lib/$(SHE384_256_SLIB)

From f4afd32e2f9842afcf45d88c5f7caddccc86cc75 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Thu, 11 Mar 2021 09:18:58 +0900
Subject: [PATCH 500/553] v1.37

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 4fa89416..8c973a23 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x136; /* 0xABC = A.BC */
+static const int version = 0x137; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From a06bfd3b8f39b967071e8758ca76ae31268ab81c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Mar 2021 11:32:34 +0900
Subject: [PATCH 501/553] [java] add JAVA_INC_DIR

---
 ffi/java/Makefile                            | 10 +++++---
 ffi/java/com/herumi/mcl/CipherText.java      |  2 +-
 ffi/java/com/herumi/mcl/Elgamal.java         |  2 +-
 ffi/java/com/herumi/mcl/ElgamalJNI.java      |  2 +-
 ffi/java/com/herumi/mcl/PrivateKey.java      |  2 +-
 ffi/java/com/herumi/mcl/PublicKey.java       |  2 +-
 ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java |  2 +-
 ffi/java/elgamal_wrap.cxx                    | 12 ++++++----
 ffi/java/mcl_wrap.cxx                        | 24 +++++++++++---------
 9 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/ffi/java/Makefile b/ffi/java/Makefile
index bb581dbc..eba3e417 100644
--- a/ffi/java/Makefile
+++ b/ffi/java/Makefile
@@ -1,14 +1,18 @@
 TOP_DIR=../..
 include $(TOP_DIR)/common.mk
 ifeq ($(UNAME_S),Darwin)
-  JAVA_INC?=-I/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/JavaVM.framework/Headers/
+  JAVA_INC_DIR?=/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/JavaVM.framework/Headers/
+  JAVA_INC?=-I$(JAVA_INC_DIR)
+  CFLAGS+=$(JAVA_INC)/darwin
 else
-  JAVA_INC?=-I/usr/lib/jvm/default-java/include
+  JAVA_INC_DIR?=/usr/lib/jvm/default-java/include
+  JAVA_INC?=-I$(JAVA_INC_DIR)
 #JAVA_INC=-I/usr/lib/jvm/java-7-openjdk-amd64/include
+  CFLAGS+=$(JAVA_INC)/linux
   CFLAGS+=-z noexecstack
   LDFLAGS+=-lrt
 endif
-CFLAGS+=$(JAVA_INC) $(JAVA_INC)/linux -I $(TOP_DIR)/include -I $(TOP_DIR)/../xbyak -I $(TOP_DIR)/../cybozulib/include -Wno-strict-aliasing
+CFLAGS+=$(JAVA_INC) -I $(TOP_DIR)/include -I -Wno-strict-aliasing
 MCL_LIB=$(TOP_DIR)/lib/libmcl.a
 
 PACKAGE_NAME=com.herumi.mcl
diff --git a/ffi/java/com/herumi/mcl/CipherText.java b/ffi/java/com/herumi/mcl/CipherText.java
index 87175bbb..ccb38373 100644
--- a/ffi/java/com/herumi/mcl/CipherText.java
+++ b/ffi/java/com/herumi/mcl/CipherText.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/com/herumi/mcl/Elgamal.java b/ffi/java/com/herumi/mcl/Elgamal.java
index 8249c842..f1b4c054 100644
--- a/ffi/java/com/herumi/mcl/Elgamal.java
+++ b/ffi/java/com/herumi/mcl/Elgamal.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/com/herumi/mcl/ElgamalJNI.java b/ffi/java/com/herumi/mcl/ElgamalJNI.java
index 67f0f220..3306f67f 100644
--- a/ffi/java/com/herumi/mcl/ElgamalJNI.java
+++ b/ffi/java/com/herumi/mcl/ElgamalJNI.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/com/herumi/mcl/PrivateKey.java b/ffi/java/com/herumi/mcl/PrivateKey.java
index 96603e52..7d9ac1ec 100644
--- a/ffi/java/com/herumi/mcl/PrivateKey.java
+++ b/ffi/java/com/herumi/mcl/PrivateKey.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/com/herumi/mcl/PublicKey.java b/ffi/java/com/herumi/mcl/PublicKey.java
index 8da13c0c..5bfb9efb 100644
--- a/ffi/java/com/herumi/mcl/PublicKey.java
+++ b/ffi/java/com/herumi/mcl/PublicKey.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java b/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java
index d49f742e..17942991 100644
--- a/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java
+++ b/ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * Do not make changes to this file unless you know what you are doing--modify
  * the SWIG interface file instead.
diff --git a/ffi/java/elgamal_wrap.cxx b/ffi/java/elgamal_wrap.cxx
index 15e29952..9935eeb3 100644
--- a/ffi/java/elgamal_wrap.cxx
+++ b/ffi/java/elgamal_wrap.cxx
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------------
  * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 4.0.0
+ * Version 4.0.2
  *
  * This file is not intended to be easily readable and contains a number of
  * coding conventions designed to improve portability and efficiency. Do not make
@@ -182,15 +182,16 @@ template <typename T> T SwigValueInit() {
 
 /* Support for throwing Java exceptions */
 typedef enum {
-  SWIG_JavaOutOfMemoryError = 1, 
-  SWIG_JavaIOException, 
-  SWIG_JavaRuntimeException, 
+  SWIG_JavaOutOfMemoryError = 1,
+  SWIG_JavaIOException,
+  SWIG_JavaRuntimeException,
   SWIG_JavaIndexOutOfBoundsException,
   SWIG_JavaArithmeticException,
   SWIG_JavaIllegalArgumentException,
   SWIG_JavaNullPointerException,
   SWIG_JavaDirectorPureVirtual,
-  SWIG_JavaUnknownError
+  SWIG_JavaUnknownError,
+  SWIG_JavaIllegalStateException,
 } SWIG_JavaExceptionCodes;
 
 typedef struct {
@@ -211,6 +212,7 @@ static void SWIGUNUSED SWIG_JavaThrowException(JNIEnv *jenv, SWIG_JavaExceptionC
     { SWIG_JavaNullPointerException, "java/lang/NullPointerException" },
     { SWIG_JavaDirectorPureVirtual, "java/lang/RuntimeException" },
     { SWIG_JavaUnknownError,  "java/lang/UnknownError" },
+    { SWIG_JavaIllegalStateException, "java/lang/IllegalStateException" },
     { (SWIG_JavaExceptionCodes)0,  "java/lang/UnknownError" }
   };
   const SWIG_JavaExceptions_t *except_ptr = java_exceptions;
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index 1cca3601..3c2d95f1 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -1623,20 +1623,22 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isZero(JNIEnv *jenv,
   return jresult;
 }
 
+
 SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isValidOrder(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
-    jboolean jresult = 0 ;
-    G1 *arg1 = (G1 *) 0 ;
-    bool result;
-
-    (void)jenv;
-    (void)jcls;
-    (void)jarg1_;
-    arg1 = *(G1 **)&jarg1;
-    result = (bool)((G1 const *)arg1)->isValidOrder();
-    jresult = (jboolean)result;
-    return jresult;
+  jboolean jresult = 0 ;
+  G1 *arg1 = (G1 *) 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(G1 **)&jarg1; 
+  result = (bool)((G1 const *)arg1)->isValidOrder();
+  jresult = (jboolean)result; 
+  return jresult;
 }
 
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   G1 *arg1 = (G1 *) 0 ;
   Fp *arg2 = 0 ;

From f5fbc3e343abde028cddf0da0ef11f4f3d7df3d7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Mar 2021 15:23:39 +0900
Subject: [PATCH 502/553] split montRed to w/wo NF

---
 src/fp.cpp            |  3 +-
 src/gen.cpp           | 67 +++++++++++++++++++++++++++++++------------
 src/low_func.hpp      | 14 ++++-----
 src/low_func_llvm.hpp |  6 ++--
 src/proto.hpp         |  1 +
 5 files changed, 60 insertions(+), 31 deletions(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index 9f3c47c2..5be1bcbd 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -309,11 +309,12 @@ void setOp2(Op& op)
 		if (op.isFullBit) {
 			op.fp_mul = Mont<N, true, Tag>::f;
 			op.fp_sqr = SqrMont<N, true, Tag>::f;
+			op.fpDbl_mod = MontRed<N, true, Tag>::f;
 		} else {
 			op.fp_mul = Mont<N, false, Tag>::f;
 			op.fp_sqr = SqrMont<N, false, Tag>::f;
+			op.fpDbl_mod = MontRed<N, false, Tag>::f;
 		}
-		op.fpDbl_mod = MontRed<N, Tag>::f;
 	} else {
 		op.fp_mul = Mul<N, Tag>::f;
 		op.fp_sqr = Sqr<N, Tag>::f;
diff --git a/src/gen.cpp b/src/gen.cpp
index ca8af98c..e2f4015d 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -883,37 +883,65 @@ struct Code : public mcl::Generator {
 		ret(Void);
 		endFunc();
 	}
-	void gen_mcl_fp_montRed()
+	void gen_mcl_fp_montRed(bool isFullBit = true)
 	{
-		const int bu = bit + unit;
-		const int b2 = bit * 2;
-		const int b2u = b2 + unit;
 		resetGlobalIdx();
 		Operand pz(IntPtr, unit);
 		Operand pxy(IntPtr, unit);
 		Operand pp(IntPtr, unit);
-		std::string name = "mcl_fp_montRed" + cybozu::itoa(N) + "L" + suf;
+		std::string name = "mcl_fp_montRed";
+		if (!isFullBit) {
+			name += "NF";
+		}
+		name += cybozu::itoa(N) + "L" + suf;
 		mcl_fp_montRedM[N] = Function(name, Void, pz, pxy, pp);
 		verifyAndSetPrivate(mcl_fp_montRedM[N]);
 		beginFunc(mcl_fp_montRedM[N]);
 		Operand rp = load(getelementptr(pp, -1));
 		Operand p = loadN(pp, N);
-		Operand xy = loadN(pxy, N * 2);
-		Operand t = zext(xy, b2 + unit);
+		const int bu = bit + unit;
+		const int bu2 = bit + unit * 2;
+		Operand t = loadN(pxy, N);
+		Operand H;
 		for (uint32_t i = 0; i < N; i++) {
-			Operand z = trunc(t, unit);
-			Operand q = mul(z, rp);
+			Operand q;
+			if (N == 1) {
+				q = mul(t, rp);
+			} else {
+				q = mul(trunc(t, unit), rp);
+			}
 			Operand pq = call(mulPvM[bit], pp, q);
-			pq = zext(pq, b2u - unit * i);
-			z = add(t, pq);
-			z = lshr(z, unit);
-			t = trunc(z, b2 - unit * i);
+			if (i > 0) {
+				H = zext(H, bu);
+				H = shl(H, bit);
+				pq = add(pq, H);
+			}
+			t = zext(t, bu);
+			Operand e = load(getelementptr(pxy, N + i));
+			e = zext(e, bu);
+			e = shl(e, bit);
+			t = _or(t, e);
+			t = zext(t, bu2);
+			pq = zext(pq, bu2);
+			t = add(t, pq);
+			t = lshr(t, unit);
+			H = lshr(t, bit);
+			H = trunc(H, bit);
+			t = trunc(t, bit);
+		}
+		Operand z;
+		if (isFullBit) {
+			p = zext(p, bu);
+			t = zext(t, bu);
+			Operand vc = sub(t, p);
+			Operand c = trunc(lshr(vc, bit), 1);
+			z = select(c, t, vc);
+			z = trunc(z, bit);
+		} else {
+			Operand vc = sub(t, p);
+			Operand c = trunc(lshr(vc, bit - 1), 1);
+			z = select(c, t, vc);
 		}
-		p = zext(p, bu);
-		Operand vc = sub(t, p);
-		Operand c = trunc(lshr(vc, bit), 1);
-		Operand z = select(c, t, vc);
-		z = trunc(z, bit);
 		storeN(z, pz);
 		ret(Void);
 		endFunc();
@@ -941,7 +969,8 @@ struct Code : public mcl::Generator {
 		gen_mcl_fpDbl_sqrPre();
 		gen_mcl_fp_mont(true);
 		gen_mcl_fp_mont(false);
-		gen_mcl_fp_montRed();
+		gen_mcl_fp_montRed(true);
+		gen_mcl_fp_montRed(false);
 	}
 	void setBit(uint32_t bit)
 	{
diff --git a/src/low_func.hpp b/src/low_func.hpp
index 9192e51d..77c3805b 100644
--- a/src/low_func.hpp
+++ b/src/low_func.hpp
@@ -510,7 +510,7 @@ const void4u DblSub<N, Tag>::f = DblSub<N, Tag>::func;
 	z[N] <- montRed(xy[N * 2], p[N])
 	REMARK : assume p[-1] = rp
 */
-template<size_t N, class Tag = Gtag>
+template<size_t N, bool isFullBit, class Tag = Gtag>
 struct MontRed {
 	static inline void func(Unit *z, const Unit *xy, const Unit *p)
 	{
@@ -546,8 +546,8 @@ struct MontRed {
 	static const void3u f;
 };
 
-template<size_t N, class Tag>
-const void3u MontRed<N, Tag>::f = MontRed<N, Tag>::func;
+template<size_t N, bool isFullBit, class Tag>
+const void3u MontRed<N, isFullBit, Tag>::f = MontRed<N, isFullBit, Tag>::func;
 
 /*
 	z[N] <- Montgomery(x[N], y[N], p[N])
@@ -560,7 +560,7 @@ struct Mont {
 #if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed
 		Unit xy[N * 2];
 		MulPre<N, Tag>::f(xy, x, y);
-		MontRed<N, Tag>::f(z, xy, p);
+		MontRed<N, isFullBit, Tag>::f(z, xy, p);
 #else
 		const Unit rp = p[-1];
 		if (isFullBit) {
@@ -644,7 +644,7 @@ struct SqrMont {
 #if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed
 		Unit xx[N * 2];
 		SqrPre<N, Tag>::f(xx, x);
-		MontRed<N, Tag>::f(y, xx, p);
+		MontRed<N, isFullBit, Tag>::f(y, xx, p);
 #else
 		Mont<N, isFullBit, Tag>::f(y, x, x, p);
 #endif
@@ -702,9 +702,9 @@ struct Fp2MulNF {
 		MulPre<N, Tag>::f(d2, b, d);
 		SubPre<N * 2, Tag>::f(d0, d0, d1);
 		SubPre<N * 2, Tag>::f(d0, d0, d2);
-		MontRed<N, Tag>::f(z + N, d0, p);
+		MontRed<N, false, Tag>::f(z + N, d0, p);
 		DblSub<N, Tag>::f(d1, d1, d2, p);
-		MontRed<N, Tag>::f(z, d1, p);
+		MontRed<N, false, Tag>::f(z, d1, p);
 	}
 	static const void4u f;
 };
diff --git a/src/low_func_llvm.hpp b/src/low_func_llvm.hpp
index a9e8a98c..43d875a8 100644
--- a/src/low_func_llvm.hpp
+++ b/src/low_func_llvm.hpp
@@ -37,7 +37,8 @@ template<>const void4u Sub<n, true, tag>::f = &mcl_fp_sub ## n ## suf; \
 template<>const void4u Sub<n, false, tag>::f = &mcl_fp_subNF ## n ## suf; \
 template<>const void4u Mont<n, true, tag>::f = &mcl_fp_mont ## n ## suf; \
 template<>const void4u Mont<n, false, tag>::f = &mcl_fp_montNF ## n ## suf; \
-template<>const void3u MontRed<n, tag>::f = &mcl_fp_montRed ## n ## suf; \
+template<>const void3u MontRed<n, true, tag>::f = &mcl_fp_montRed ## n ## suf; \
+template<>const void3u MontRed<n, false, tag>::f = &mcl_fp_montRedNF ## n ## suf; \
 template<>const void4u DblAdd<n, tag>::f = &mcl_fpDbl_add ## n ## suf; \
 template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \
 
@@ -81,9 +82,6 @@ MCL_DEF_LLVM_FUNC(15)
 #if MCL_SIZEOF_UNIT == 4
 MCL_DEF_LLVM_FUNC(16)
 #else
-/// QQQ : check speed
-template<>const void3u MontRed<16, Ltag>::f = &mcl_fp_montRed16L;
-template<>const void3u MontRed<16, LBMI2tag>::f = &mcl_fp_montRed16Lbmi2;
 #endif
 #endif
 #if MCL_MAX_UNIT_SIZE >= 17
diff --git a/src/proto.hpp b/src/proto.hpp
index 97c33119..c9f78a52 100644
--- a/src/proto.hpp
+++ b/src/proto.hpp
@@ -22,6 +22,7 @@ void mcl_fpDbl_sqrPre ## n ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
 void mcl_fp_mont ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
 void mcl_fp_montNF ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
 void mcl_fp_montRed ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p); \
+void mcl_fp_montRedNF ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p); \
 void mcl_fpDbl_add ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
 void mcl_fpDbl_sub ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p);
 

From 6d38eeea08d5ebe9ded4b05498cc22b2ae7eeedc Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Mar 2021 15:42:47 +0900
Subject: [PATCH 503/553] disable montRedNF after updating asm

---
 src/low_func_llvm.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/low_func_llvm.hpp b/src/low_func_llvm.hpp
index 43d875a8..117288f9 100644
--- a/src/low_func_llvm.hpp
+++ b/src/low_func_llvm.hpp
@@ -25,6 +25,7 @@ template<>const void3u MulPreCore<n, tag>::f = &mcl_fpDbl_mulPre ## n ## suf; \
 template<>const void2u SqrPreCore<n, tag>::f = &mcl_fpDbl_sqrPre ## n ## suf;
 #endif
 
+// QQQ : set mcl_fp_montRedNF after updating asm
 #define MCL_DEF_LLVM_FUNC2(n, tag, suf) \
 template<>const u3u AddPre<n, tag>::f = &mcl_fp_addPre ## n ## suf; \
 template<>const u3u SubPre<n, tag>::f = &mcl_fp_subPre ## n ## suf; \
@@ -38,7 +39,7 @@ template<>const void4u Sub<n, false, tag>::f = &mcl_fp_subNF ## n ## suf; \
 template<>const void4u Mont<n, true, tag>::f = &mcl_fp_mont ## n ## suf; \
 template<>const void4u Mont<n, false, tag>::f = &mcl_fp_montNF ## n ## suf; \
 template<>const void3u MontRed<n, true, tag>::f = &mcl_fp_montRed ## n ## suf; \
-template<>const void3u MontRed<n, false, tag>::f = &mcl_fp_montRedNF ## n ## suf; \
+template<>const void3u MontRed<n, false, tag>::f = &mcl_fp_montRed ## n ## suf; \
 template<>const void4u DblAdd<n, tag>::f = &mcl_fpDbl_add ## n ## suf; \
 template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \
 

From 9b5da4023eae3fdde9bf0eda9e4aeda95cc46f71 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Mar 2021 17:49:32 +0900
Subject: [PATCH 504/553] disable test_go in main.yml

---
 .github/workflows/main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 74cafefd..7c03fb47 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -15,7 +15,7 @@ jobs:
     - run: make test_ci DEBUG=1 -j4 || dmesg | tail
     - run: make clean
     - run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail
-    - run: make clean
-    - run: make test_go
+#    - run: make clean
+#    - run: make test_go
     - run: sudo apt install openjdk-8-jdk
     - run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include

From ac580a19abc6e303221c32f1d71afcbc0bd0454c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Mar 2021 17:52:31 +0900
Subject: [PATCH 505/553] [java] specify libmcl.a to build

---
 ffi/java/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ffi/java/Makefile b/ffi/java/Makefile
index eba3e417..269e6394 100644
--- a/ffi/java/Makefile
+++ b/ffi/java/Makefile
@@ -32,7 +32,7 @@ mcl_wrap.cxx: mcl.i mcl_impl.hpp
 	swig -java -package $(PACKAGE_NAME) -outdir $(PACKAGE_DIR) -c++ -Wall mcl.i
 
 $(MCL_LIB):
-	make -C $(TOP_DIR)
+	make -C $(TOP_DIR) lib/libmcl.a
 
 $(ELGAMAL_LIB): elgamal_wrap.cxx $(MCL_LIB)
 	$(PRE)$(CXX) $< -o $@ $(CFLAGS) $(MCL_LIB) $(LDFLAGS) -shared

From 6bdaec05670da3b192cd4298d09fb5bda4c1cb94 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Mar 2021 17:57:22 +0900
Subject: [PATCH 506/553] [java] disable NIST_P521 test

---
 ffi/java/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ffi/java/Makefile b/ffi/java/Makefile
index 269e6394..5b00f49c 100644
--- a/ffi/java/Makefile
+++ b/ffi/java/Makefile
@@ -55,7 +55,7 @@ test_elgamal: ElgamalTest.class $(ELGAMAL_LIB)
 	$(JAVA_EXE) ElgamalTest -e NIST_P256 -h sha256
 	$(JAVA_EXE) ElgamalTest -e secp256k1 -h sha256
 	$(JAVA_EXE) ElgamalTest -e NIST_P384 -h sha384
-	$(JAVA_EXE) ElgamalTest -e NIST_P521 -h sha512
+#	$(JAVA_EXE) ElgamalTest -e NIST_P521 -h sha512
 
 test_mcl: MclTest.class $(MCLJAVA_LIB)
 	$(JAVA_EXE) MclTest

From ec661dcd66601ceef9de2102c318cc8aefecae7f Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Mar 2021 17:57:45 +0900
Subject: [PATCH 507/553] set MCL_MAX_BIT_SIZE=512 and so disable test of
 NIST_P521

---
 include/mcl/gmp_util.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp
index f3fcfa39..b77c0224 100644
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@@ -28,7 +28,7 @@
 	#define MCL_USE_VINT
 #endif
 #ifndef MCL_MAX_BIT_SIZE
-	#define MCL_MAX_BIT_SIZE 521
+	#define MCL_MAX_BIT_SIZE 512
 #endif
 #ifdef MCL_USE_VINT
 #include <mcl/vint.hpp>

From 63d2707745ecec68ddb82301507b41997473327b Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Mar 2021 18:01:20 +0900
Subject: [PATCH 508/553] note for MCL_MAX_BIT_SIZE

---
 readme.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/readme.md b/readme.md
index 5c9f7999..b88f9c77 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+- set default `MCL_MAX_BIT_SIZE=512` so disable to support `NICT_P521`.
 - improve performance
 - support M1 mac
 - dst for mapToG1 has changed to `BLS_SIG_BLS12381G1_XMD:SHA-256_SSWU_RO_POP_`.

From 78392e25a1adb8281847f0e3bf3388e4b63795d0 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 12 Mar 2021 18:55:37 +0900
Subject: [PATCH 509/553] [java] add make clean before test

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 7c03fb47..871a9b01 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -15,7 +15,7 @@ jobs:
     - run: make test_ci DEBUG=1 -j4 || dmesg | tail
     - run: make clean
     - run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail
-#    - run: make clean
+    - run: make clean
 #    - run: make test_go
     - run: sudo apt install openjdk-8-jdk
     - run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include

From 1335112ef3203306cf1e62a18bbe87e582575534 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 13 Mar 2021 18:15:04 +0900
Subject: [PATCH 510/553] bench of llvm

---
 Makefile           |  6 +++
 test/llvm_test.cpp | 99 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+)
 create mode 100644 test/llvm_test.cpp

diff --git a/Makefile b/Makefile
index aef44c29..d7cdc769 100644
--- a/Makefile
+++ b/Makefile
@@ -358,6 +358,12 @@ bin/emu:
 bin/pairing_c_min.exe: sample/pairing_c.c include/mcl/vint.hpp src/fp.cpp include/mcl/bn.hpp
 	$(CXX) -std=c++03 -O3 -g -fno-threadsafe-statics -fno-exceptions -fno-rtti -o $@ sample/pairing_c.c src/fp.cpp src/bn_c384_256.cpp -I./include -DXBYAK_NO_EXCEPTION -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=384 -DMCL_VINT_64BIT_PORTABLE -DCYBOZU_DONT_USE_STRING -DCYBOZU_DONT_USE_EXCEPTION -DNDEBUG # -DMCL_DONT_USE_CSPRNG
 
+bin/llvm_test64.exe: test/llvm_test.cpp src/base64.ll
+	clang++$(LLVM_VER) -o $@ -Ofast -DNDEBUG -Wall -Wextra -I ./include test/llvm_test.cpp src/base64.ll
+
+bin/llvm_test32.exe: test/llvm_test.cpp src/base32.ll
+	clang++$(LLVM_VER) -o $@ -Ofast -DNDEBUG -Wall -Wextra -I ./include test/llvm_test.cpp src/base32.ll -m32
+
 make_tbl:
 	$(MAKE) ../bls/src/qcoeff-bn254.hpp
 
diff --git a/test/llvm_test.cpp b/test/llvm_test.cpp
new file mode 100644
index 00000000..6c8733a8
--- /dev/null
+++ b/test/llvm_test.cpp
@@ -0,0 +1,99 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <cybozu/inttype.hpp>
+#include <cybozu/benchmark.hpp>
+#include <cybozu/xorshift.hpp>
+
+typedef size_t Unit;
+
+template<size_t N>
+void mulPre(Unit*, const Unit*, const Unit*);
+
+template<size_t N>
+void sqrPre(Unit*, const Unit*);
+
+template<size_t N>
+void mod(Unit*, const Unit*, const Unit *);
+
+template<size_t N>
+void mont(Unit*, const Unit*, const Unit*, const Unit *);
+
+#define MCL_FP_DEF_FUNC_SUB(n, suf) \
+extern "C" { \
+void mcl_fp_add ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
+void mcl_fp_addNF ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
+void mcl_fp_sub ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
+void mcl_fp_subNF ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
+void mcl_fp_shr1_ ## n ## suf(Unit*y, const Unit* x); \
+Unit mcl_fp_addPre ## n ## suf(Unit* z, const Unit* x, const Unit* y); \
+Unit mcl_fp_subPre ## n ## suf(Unit* z, const Unit* x, const Unit* y); \
+void mcl_fp_mulUnitPre ## n ## suf(Unit* z, const Unit* x, Unit y); \
+void mcl_fpDbl_mulPre ## n ## suf(Unit* z, const Unit* x, const Unit* y); \
+void mcl_fpDbl_sqrPre ## n ## suf(Unit* y, const Unit* x); \
+void mcl_fp_mont ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
+void mcl_fp_montNF ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
+void mcl_fp_montRed ## n ## suf(Unit* z, const Unit* xy, const Unit* p); \
+void mcl_fp_montRedNF ## n ## suf(Unit* z, const Unit* xy, const Unit* p); \
+void mcl_fpDbl_add ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
+void mcl_fpDbl_sub ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
+} \
+template<>void mulPre<n>(Unit *z, const Unit *x, const Unit *y) { mcl_fpDbl_mulPre ## n ## suf(z, x, y); } \
+template<>void sqrPre<n>(Unit *z, const Unit *x) { mcl_fpDbl_sqrPre ## n ## suf(z, x); } \
+template<>void mod<n>(Unit *z, const Unit *x, const Unit *p) { mcl_fp_montRedNF ## n ## suf(z, x, p); } \
+template<>void mont<n>(Unit *z, const Unit *x, const Unit *y, const Unit *p) { mcl_fp_montNF ## n ## suf(z, x, y, p); }
+
+MCL_FP_DEF_FUNC_SUB(4, L)
+MCL_FP_DEF_FUNC_SUB(5, L)
+MCL_FP_DEF_FUNC_SUB(6, L)
+MCL_FP_DEF_FUNC_SUB(7, L)
+MCL_FP_DEF_FUNC_SUB(8, L)
+#if CYBOZU_OS_BIT == 32
+MCL_FP_DEF_FUNC_SUB(12)
+MCL_FP_DEF_FUNC_SUB(16)
+#endif
+
+
+template<class RG, class T>
+void setRand(T *x, size_t n, RG& rg)
+{
+	for (size_t i = 0; i < n; i++) {
+		if (sizeof(T) == 4) {
+			x[i] = rg.get32();
+		} else {
+			x[i] = rg.get64();
+		}
+	}
+}
+
+template<size_t N>
+void bench(Unit *x, Unit *y, const Unit *p)
+{
+	printf("N=%zd\n", N);
+	Unit xx[N * 2], yy[N * 2];
+	const int C = 1000;
+	CYBOZU_BENCH_C("mulPre", C, mulPre<N>, xx, x, y);
+	CYBOZU_BENCH_C("sqrPre", C, sqrPre<N>, yy, x);
+	CYBOZU_BENCH_C("mod   ", C, mod<N>, yy, xx, p);
+	CYBOZU_BENCH_C("mont  ", C, mont<N>, yy, x, y, p);
+}
+
+int main()
+{
+	printf("sizeof(Unit)=%zd\n", sizeof(Unit));
+	const size_t maxN = 16;
+	Unit x[maxN], y[maxN], p[maxN + 1];
+	cybozu::XorShift rg;
+	setRand(x, maxN, rg);
+	setRand(y, maxN, rg);
+	setRand(p, maxN + 1, rg);
+	bench<4>(x, y, p + 1);
+	bench<5>(x, y, p + 1);
+	bench<6>(x, y, p + 1);
+	bench<7>(x, y, p + 1);
+	bench<8>(x, y, p + 1);
+#if CYBOZU_OS_BIT == 32
+	bench<12>(x, y, p + 1);
+	bench<16>(x, y, p + 1);
+#endif
+}
+

From a9eef7fe4993d6c7ab2c97090eeaf60fedfca8a4 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 13 Mar 2021 18:17:55 +0900
Subject: [PATCH 511/553] _

---
 test/llvm_test.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/llvm_test.cpp b/test/llvm_test.cpp
index 6c8733a8..18bf7573 100644
--- a/test/llvm_test.cpp
+++ b/test/llvm_test.cpp
@@ -48,8 +48,8 @@ MCL_FP_DEF_FUNC_SUB(6, L)
 MCL_FP_DEF_FUNC_SUB(7, L)
 MCL_FP_DEF_FUNC_SUB(8, L)
 #if CYBOZU_OS_BIT == 32
-MCL_FP_DEF_FUNC_SUB(12)
-MCL_FP_DEF_FUNC_SUB(16)
+MCL_FP_DEF_FUNC_SUB(12, L)
+MCL_FP_DEF_FUNC_SUB(16, L)
 #endif
 
 

From 570396efd88d6334c24f611b00eaffeb18b9040c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 13 Mar 2021 18:30:07 +0900
Subject: [PATCH 512/553] use textbook mul for N = 8

---
 src/gen.cpp        | 3 ++-
 test/llvm_test.cpp | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gen.cpp b/src/gen.cpp
index e2f4015d..029a673e 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -715,11 +715,12 @@ struct Code : public mcl::Generator {
 			Operand z = mul(x, y);
 			storeN(z, pz);
 			ret(Void);
-		} else if (N >= 8 && (N % 2) == 0) {
+		} else if (N > 8 && (N % 2) == 0) {
 			/*
 				W = 1 << half
 				(aW + b)(cW + d) = acW^2 + (ad + bc)W + bd
 				ad + bc = (a + b)(c + d) - ac - bd
+				@note Karatsuba is slower for N = 8
 			*/
 			const int H = N / 2;
 			const int half = bit / 2;
diff --git a/test/llvm_test.cpp b/test/llvm_test.cpp
index 18bf7573..ab0d8216 100644
--- a/test/llvm_test.cpp
+++ b/test/llvm_test.cpp
@@ -70,7 +70,7 @@ void bench(Unit *x, Unit *y, const Unit *p)
 {
 	printf("N=%zd\n", N);
 	Unit xx[N * 2], yy[N * 2];
-	const int C = 1000;
+	const int C = 10000;
 	CYBOZU_BENCH_C("mulPre", C, mulPre<N>, xx, x, y);
 	CYBOZU_BENCH_C("sqrPre", C, sqrPre<N>, yy, x);
 	CYBOZU_BENCH_C("mod   ", C, mod<N>, yy, xx, p);

From 9325c347831e8ddcd4b0ad5b0accd1ec9ea36459 Mon Sep 17 00:00:00 2001
From: Valdas Rakutis <valdas@rakutis.lt>
Date: Sun, 14 Mar 2021 17:32:20 +0200
Subject: [PATCH 513/553] [cs] add missing functions, some arithmetic operator
 overloads, static factory

---
 ffi/cs/mcl/mcl.cs | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 8a7b6fea..133186de 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -19,6 +19,7 @@ public class MCL {
         [DllImport(dllName)] public static extern int mclBn_init(int curve, int compiledTimeVar);
         [DllImport(dllName)] public static extern void mclBn_setETHserialization(int enable);
         [DllImport(dllName)] public static extern int mclBn_setMapToMode(int mode);
+        [DllImport(dllName)] public static extern int mclBn_FrEvaluatePolynomial(ref Fr z, [In] Fr[] poly, long bufSize, in Fr y);
         [DllImport(dllName)] public static extern void mclBnFr_clear(ref Fr x);
         [DllImport(dllName)] public static extern void mclBnFr_setInt(ref Fr y, int x);
         [DllImport(dllName)] public static extern int mclBnFr_setStr(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
@@ -72,6 +73,7 @@ public class MCL {
         [DllImport(dllName)] public static extern void mclBnG1_add(ref G1 z, in G1 x, in G1 y);
         [DllImport(dllName)] public static extern void mclBnG1_sub(ref G1 z, in G1 x, in G1 y);
         [DllImport(dllName)] public static extern void mclBnG1_mul(ref G1 z, in G1 x, in Fr y);
+        [DllImport(dllName)] public static extern void mclBnG1_mulVec(ref G1 x, [In]G1[] vec1, [In]Fr[] vec2, long bufSize);
 
         [DllImport(dllName)] public static extern void mclBnG2_clear(ref G2 x);
         [DllImport(dllName)] public static extern int mclBnG2_setStr(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
@@ -291,6 +293,14 @@ struct U128 {
         [StructLayout(LayoutKind.Sequential)]
         public struct Fr {
             private U128 v0, v1;
+            public static Fr One()
+            {
+                var fr = new Fr();
+                fr.SetInt(1);
+
+                return fr;
+            }
+            public static Fr Zero() => new Fr();
             public void Clear()
             {
                 mclBnFr_clear(ref this);
@@ -433,6 +443,13 @@ public void Div(in Fr x, in Fr y)
         [StructLayout(LayoutKind.Sequential)]
         public struct Fp {
             private U128 v0, v1, v2;
+            public static Fp One()
+            {
+                var fp = new Fp();
+                fp.SetInt(1);
+                return fp;
+            }
+            public static Fp Zero() => new Fp();
             public void Clear()
             {
                 mclBnFp_clear(ref this);
@@ -649,6 +666,30 @@ public void Mul(in G1 x, in Fr y)
             {
                 MCL.Mul(ref this, x, y);
             }
+            public static G1 operator -(in G1 x)
+            {
+                var result = new G1();
+                result.Neg(x);
+                return result;
+            }
+            public static G1 operator +(in G1 left, in G1 right)
+            {
+                var result = new G1();
+                result.Add(left, right);
+                return result;
+            }
+            public static G1 operator -(in G1 left, in G1 right)
+            {
+                var result = new G1();
+                result.Sub(left, right);
+                return result;
+            }
+            public static G1 operator *(in G1 left, in Fr right)
+            {
+                var result = new G1();
+                result.Mul(left, right);
+                return result;
+            }
         }
         [StructLayout(LayoutKind.Sequential)]
         public struct G2 {

From 33643b9a6d1690f9c8081804fcbd74da16a64538 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sun, 14 Mar 2021 10:11:27 +0900
Subject: [PATCH 514/553] [llvm] add pack and split

---
 src/gen.cpp | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/gen.cpp b/src/gen.cpp
index 029a673e..e0f736c2 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -884,6 +884,24 @@ struct Code : public mcl::Generator {
 		ret(Void);
 		endFunc();
 	}
+	// return [H:L]
+	Operand pack(Operand H, Operand L)
+	{
+		int size = H.bit + L.bit;
+		H = zext(H, size);
+		H = shl(H, L.bit);
+		L = zext(L, size);
+		H = _or(H, L);
+		return H;
+	}
+	// split x to [ret:L] s.t. size of L = sizeL
+	Operand split(Operand *L, const Operand& x, int sizeL)
+	{
+		Operand ret = lshr(x, sizeL);
+		ret = trunc(ret, ret.bit - sizeL);
+		*L = trunc(x, sizeL);
+		return ret;
+	}
 	void gen_mcl_fp_montRed(bool isFullBit = true)
 	{
 		resetGlobalIdx();
@@ -917,18 +935,14 @@ struct Code : public mcl::Generator {
 				H = shl(H, bit);
 				pq = add(pq, H);
 			}
-			t = zext(t, bu);
-			Operand e = load(getelementptr(pxy, N + i));
-			e = zext(e, bu);
-			e = shl(e, bit);
-			t = _or(t, e);
+			Operand next = load(getelementptr(pxy, N + i));
+			t = pack(next, t);
 			t = zext(t, bu2);
 			pq = zext(pq, bu2);
 			t = add(t, pq);
 			t = lshr(t, unit);
-			H = lshr(t, bit);
-			H = trunc(H, bit);
-			t = trunc(t, bit);
+			t = trunc(t, bu);
+			H = split(&t, t, bit);
 		}
 		Operand z;
 		if (isFullBit) {

From 2e39b86af5ca9167968227b215e54773199ad575 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 11:56:51 +0900
Subject: [PATCH 515/553] remove tests for small p

---
 test/fp_generator_test.cpp |  4 ++++
 test/fp_test.cpp           | 28 +++++++++++++---------------
 test/fp_tower_test.cpp     | 10 ----------
 test/glv_test.cpp          |  6 ++++++
 test/mont_fp_test.cpp      | 10 ----------
 5 files changed, 23 insertions(+), 35 deletions(-)

diff --git a/test/fp_generator_test.cpp b/test/fp_generator_test.cpp
index 39cfa27f..960d2952 100644
--- a/test/fp_generator_test.cpp
+++ b/test/fp_generator_test.cpp
@@ -15,10 +15,14 @@ typedef mcl::FpT<> Fp;
 const int MAX_N = 4;
 
 const char *primeTable[] = {
+#if 0
 	"0x7fffffffffffffffffffffffffffffff", // 127bit(not full)
 	"0xffffffffffffffffffffffffffffff61", // 128bit(full)
+#endif
+	"0x7fffffffffffffffffffffffffffffffffffffffffffffed", // 191bit(not full)
 	"0xfffffffffffffffffffffffffffffffffffffffeffffee37", // 192bit(full)
 	"0x2523648240000001ba344d80000000086121000000000013a700000000000013", // 254bit(not full)
+	"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff43", // 256bit(full)
 };
 
 void strToArray(uint64_t *p, size_t n, const char *pStr)
diff --git a/test/fp_test.cpp b/test/fp_test.cpp
index 878bdb9d..41ce7a04 100644
--- a/test/fp_test.cpp
+++ b/test/fp_test.cpp
@@ -426,6 +426,7 @@ void powTest()
 	CYBOZU_TEST_EQUAL(z, 1);
 	Fp::pow(z, x, Fp::getOp().mp);
 	CYBOZU_TEST_EQUAL(z, x);
+#if 0
 	typedef mcl::FpT<tag2, 128> Fp_other;
 	Fp_other::init("1009");
 	x = 5;
@@ -436,6 +437,7 @@ void powTest()
 	x = 5;
 	Fp::pow(x, x, n);
 	CYBOZU_TEST_EQUAL(x, 125);
+#endif
 }
 
 void mulUnitTest()
@@ -487,6 +489,7 @@ void powGmp()
 
 struct TagAnother;
 
+#if 0
 void anotherFpTest(mcl::fp::Mode mode)
 {
 	typedef mcl::FpT<TagAnother, 128> G;
@@ -496,6 +499,7 @@ void anotherFpTest(mcl::fp::Mode mode)
 	a *= b;
 	CYBOZU_TEST_EQUAL(a, 1);
 }
+#endif
 
 void setArrayTest1()
 {
@@ -508,6 +512,7 @@ void setArrayTest1()
 	CYBOZU_TEST_EQUAL(x, Fp("0x3400000012"));
 }
 
+#if 0
 void setArrayTest2(mcl::fp::Mode mode)
 {
 	Fp::init("0x10000000000001234567a5", mode);
@@ -529,6 +534,7 @@ void setArrayTest2(mcl::fp::Mode mode)
 	uint32_t large[3] = { 0x234567a5, 0x00000001, 0x00100000};
 	CYBOZU_TEST_EXCEPTION(x.setArray(large, 3), cybozu::Exception);
 }
+#endif
 
 void setArrayMaskTest1()
 {
@@ -541,6 +547,7 @@ void setArrayMaskTest1()
 	CYBOZU_TEST_EQUAL(x, Fp("0x3400000012"));
 }
 
+#if 0
 void setArrayMaskTest2(mcl::fp::Mode mode)
 {
 	Fp::init("0x10000000000001234567a5", mode);
@@ -560,6 +567,7 @@ void setArrayMaskTest2(mcl::fp::Mode mode)
 		CYBOZU_TEST_EQUAL(x, Fp(tbl[i].expected));
 	}
 }
+#endif
 
 void setArrayModTest()
 {
@@ -602,13 +610,13 @@ void setArrayModTest()
 
 CYBOZU_TEST_AUTO(set64bit)
 {
-	Fp::init("0x1000000000000000000f");
+	Fp::init("3138550867693340381917894711603833208051177722232017256453");
 	const struct {
 		const char *p;
 		int64_t i;
 	} tbl[] = {
 		{ "0x1234567812345678", int64_t(0x1234567812345678ull) },
-		{ "0xfffedcba987edcba997", -int64_t(0x1234567812345678ull) },
+		{ "-5", -5 },
 	};
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
 		Fp x(tbl[i].p);
@@ -935,17 +943,7 @@ void sub(mcl::fp::Mode mode)
 {
 	printf("mode=%s\n", mcl::fp::ModeToStr(mode));
 	const char *tbl[] = {
-		// N = 2
-		"0x0000000000000001000000000000000d",
-		"0x7fffffffffffffffffffffffffffffff",
-		"0x8000000000000000000000000000001d",
-		"0xffffffffffffffffffffffffffffff61",
-
 		// N = 3
-		"0x000000000000000100000000000000000000000000000033", // min prime
-		"0x00000000fffffffffffffffffffffffffffffffeffffac73",
-		"0x0000000100000000000000000001b8fa16dfab9aca16b6b3",
-		"0x000000010000000000000000000000000000000000000007",
 		"0x30000000000000000000000000000000000000000000002b",
 		"0x70000000000000000000000000000000000000000000001f",
 		"0x800000000000000000000000000000000000000000000005",
@@ -1001,9 +999,9 @@ void sub(mcl::fp::Mode mode)
 		serializeTest();
 		modpTest();
 	}
-	anotherFpTest(mode);
-	setArrayTest2(mode);
-	setArrayMaskTest2(mode);
+//	anotherFpTest(mode);
+//	setArrayTest2(mode);
+//	setArrayMaskTest2(mode);
 }
 
 std::string g_mode;
diff --git a/test/fp_tower_test.cpp b/test/fp_tower_test.cpp
index d9ca03bb..39ed4f1f 100644
--- a/test/fp_tower_test.cpp
+++ b/test/fp_tower_test.cpp
@@ -430,17 +430,7 @@ void test(const char *p, mcl::fp::Mode mode)
 void testAll()
 {
 	const char *tbl[] = {
-		// N = 2
-		"0x0000000000000001000000000000000d",
-		"0x7fffffffffffffffffffffffffffffff",
-		"0x8000000000000000000000000000001d",
-		"0xffffffffffffffffffffffffffffff61",
-
 		// N = 3
-		"0x000000000000000100000000000000000000000000000033", // min prime
-		"0x00000000fffffffffffffffffffffffffffffffeffffac73",
-		"0x0000000100000000000000000001b8fa16dfab9aca16b6b3",
-		"0x000000010000000000000000000000000000000000000007",
 		"0x30000000000000000000000000000000000000000000002b",
 		"0x70000000000000000000000000000000000000000000001f",
 		"0x800000000000000000000000000000000000000000000005",
diff --git a/test/glv_test.cpp b/test/glv_test.cpp
index 59bdcdd2..8c9fea0d 100644
--- a/test/glv_test.cpp
+++ b/test/glv_test.cpp
@@ -153,10 +153,13 @@ void testGLV1()
 		GLV1::mul(P2, P0, ss, true);
 		CYBOZU_TEST_EQUAL(P1, P2);
 	}
+#ifndef NDEBUG
+	puts("skip testGLV1 in debug");
 	Fr s;
 	mapToG1(P0, 123);
 	CYBOZU_BENCH_C("Ec::mul", 100, P1 = P0; s.setRand(rg); G1::mulGeneric, P2, P1, s.getMpz());
 	CYBOZU_BENCH_C("Ec::glv", 100, P1 = P0; s.setRand(rg); GLV1::mul, P2, P1, s.getMpz());
+#endif
 }
 
 /*
@@ -188,10 +191,13 @@ void testGLV2()
 		GLV2::mul(Q2, Q0, n);
 		CYBOZU_TEST_EQUAL(Q1, Q2);
 	}
+#ifndef NDEBUG
+	puts("skip testGLV2 in debug");
 	Fr s;
 	mapToG2(Q0, 123);
 	CYBOZU_BENCH_C("G2::mul", 1000, Q2 = Q0; s.setRand(rg); G2::mulGeneric, Q2, Q1, s.getMpz());
 	CYBOZU_BENCH_C("G2::glv", 1000, Q1 = Q0; s.setRand(rg); GLV2::mul, Q2, Q1, s.getMpz());
+#endif
 }
 
 void testGT()
diff --git a/test/mont_fp_test.cpp b/test/mont_fp_test.cpp
index e41e77a5..32b79faa 100644
--- a/test/mont_fp_test.cpp
+++ b/test/mont_fp_test.cpp
@@ -244,17 +244,7 @@ CYBOZU_TEST_AUTO(test)
 	Test test;
 	const char *tbl[] = {
 #if 1
-		// N = 2
-		"0x0000000000000001000000000000000d",
-		"0x7fffffffffffffffffffffffffffffff",
-		"0x8000000000000000000000000000001d",
-		"0xffffffffffffffffffffffffffffff61",
-
 		// N = 3
-		"0x000000000000000100000000000000000000000000000033", // min prime
-		"0x00000000fffffffffffffffffffffffffffffffeffffac73",
-		"0x0000000100000000000000000001b8fa16dfab9aca16b6b3",
-		"0x000000010000000000000000000000000000000000000007",
 		"0x30000000000000000000000000000000000000000000002b",
 		"0x70000000000000000000000000000000000000000000001f",
 		"0x800000000000000000000000000000000000000000000005",

From 414204f508000ac3f294e3836f23c16e68601fe8 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 12:28:39 +0900
Subject: [PATCH 516/553] reduce supporting bitSize

---
 src/fp.cpp            | 40 ++++++++++------------------------------
 src/gen.cpp           | 20 +++++++++++++++++++-
 src/low_func_llvm.hpp | 38 +++++++++++++-------------------------
 src/proto.hpp         | 37 ++++++++++++++-----------------------
 4 files changed, 56 insertions(+), 79 deletions(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index 5be1bcbd..d8425a0b 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -569,38 +569,16 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 	}
 #endif
 	switch (N) {
-	case 1:  setOp<1>(*this, mode); break;
-	case 2:  setOp<2>(*this, mode); break;
-	case 3:  setOp<3>(*this, mode); break;
-	case 4:  setOp<4>(*this, mode); break; // 256 if 64-bit
-#if MCL_MAX_UNIT_SIZE >= 6
-	case 5:  setOp<5>(*this, mode); break;
-	case 6:  setOp<6>(*this, mode); break;
+	case 192/CYBOZU_OS_BIT:  setOp<192/CYBOZU_OS_BIT>(*this, mode); break;
+#if CYBOZU_OS_BIT == 32
+	case 224/CYBOZU_OS_BIT:  setOp<224/CYBOZU_OS_BIT>(*this, mode); break;
 #endif
-#if MCL_MAX_UNIT_SIZE >= 8
-	case 7:  setOp<7>(*this, mode); break;
-	case 8:  setOp<8>(*this, mode); break;
+	case 256/CYBOZU_OS_BIT:  setOp<256/CYBOZU_OS_BIT>(*this, mode); break;
+#if MCL_MAX_BIT_SIZE >= 384
+	case 384/CYBOZU_OS_BIT:  setOp<384/CYBOZU_OS_BIT>(*this, mode); break;
 #endif
-#if MCL_MAX_UNIT_SIZE >= 9
-	case 9:  setOp<9>(*this, mode); break; // 521 if 64-bit
-#endif
-#if MCL_MAX_UNIT_SIZE >= 10
-	case 10: setOp<10>(*this, mode); break;
-#endif
-#if MCL_MAX_UNIT_SIZE >= 12
-	case 11: setOp<11>(*this, mode); break;
-	case 12: setOp<12>(*this, mode); break; // 768 if 64-bit
-#endif
-#if MCL_MAX_UNIT_SIZE >= 14
-	case 13: setOp<13>(*this, mode); break;
-	case 14: setOp<14>(*this, mode); break;
-#endif
-#if MCL_MAX_UNIT_SIZE >= 16
-	case 15: setOp<15>(*this, mode); break;
-	case 16: setOp<16>(*this, mode); break; // 1024 if 64-bit
-#endif
-#if MCL_MAX_UNIT_SIZE >= 17
-	case 17: setOp<17>(*this, mode); break; // 521 if 32-bit
+#if MCL_MAX_BIT_SIZE >= 512
+	case 512/CYBOZU_OS_BIT:  setOp<512/CYBOZU_OS_BIT>(*this, mode); break;
 #endif
 	default:
 		return false;
@@ -618,10 +596,12 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 		fp_sqr = &mcl_fp_sqr_NIST_P192L;
 		fpDbl_mod = &mcl_fpDbl_mod_NIST_P192L;
 	}
+#if MCL_MAX_BIT_SIZE >= 521
 	if (primeMode == PM_NIST_P521) {
 		fpDbl_mod = &mcl_fpDbl_mod_NIST_P521L;
 	}
 #endif
+#endif
 #if defined(MCL_USE_VINT) && MCL_SIZEOF_UNIT == 8
 	if (primeMode == PM_SECP256K1) {
 		fp_mul = &mcl::vint::mcl_fp_mul_SECP256K1;
diff --git a/src/gen.cpp b/src/gen.cpp
index e0f736c2..1bae3675 100644
--- a/src/gen.cpp
+++ b/src/gen.cpp
@@ -669,7 +669,7 @@ struct Code : public mcl::Generator {
 		Operand z(Int, bu);
 		Operand px(IntPtr, unit);
 		Operand y(Int, unit);
-		std::string name = "mulPv" + cybozu::itoa(bit) + "x" + cybozu::itoa(unit);
+		std::string name = "mulPv" + cybozu::itoa(bit) + "x" + cybozu::itoa(unit) + suf;
 		mulPvM[bit] = Function(name, z, px, y);
 		// workaround at https://github.com/herumi/mcl/pull/82
 //		mulPvM[bit].setPrivate();
@@ -1006,6 +1006,23 @@ struct Code : public mcl::Generator {
 		gen_mulUU();
 #else
 		gen_once();
+#if 1
+		int bitTbl[] = {
+			192,
+			224,
+			256,
+			384,
+			512
+		};
+		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(bitTbl); i++) {
+			uint32_t bit = bitTbl[i];
+			if (unit == 64 && bit == 224) continue;
+			setBit(bit);
+			gen_mul();
+			gen_all();
+			gen_addsub();
+		}
+#else
 		uint32_t end = ((maxBitSize + unit - 1) / unit);
 		for (uint32_t n = 1; n <= end; n++) {
 			setBit(n * unit);
@@ -1013,6 +1030,7 @@ struct Code : public mcl::Generator {
 			gen_all();
 			gen_addsub();
 		}
+#endif
 		if (unit == 64 && maxBitSize == 768) {
 			for (uint32_t i = maxBitSize + unit * 2; i <= maxBitSize * 2; i += unit * 2) {
 				setBit(i);
diff --git a/src/low_func_llvm.hpp b/src/low_func_llvm.hpp
index 117288f9..c305ed16 100644
--- a/src/low_func_llvm.hpp
+++ b/src/low_func_llvm.hpp
@@ -52,41 +52,29 @@ template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \
 	MCL_DEF_LLVM_FUNC2(n, Ltag, L)
 #endif
 
-MCL_DEF_LLVM_FUNC(1)
-MCL_DEF_LLVM_FUNC(2)
-MCL_DEF_LLVM_FUNC(3)
-MCL_DEF_LLVM_FUNC(4)
-#if MCL_MAX_UNIT_SIZE >= 6
-MCL_DEF_LLVM_FUNC(5)
+#if CYBOZU_OS_BIT == 32
+
 MCL_DEF_LLVM_FUNC(6)
-#endif
-#if MCL_MAX_UNIT_SIZE >= 8
 MCL_DEF_LLVM_FUNC(7)
 MCL_DEF_LLVM_FUNC(8)
-#endif
-#if MCL_MAX_UNIT_SIZE >= 9
-MCL_DEF_LLVM_FUNC(9)
-#endif
-#if MCL_MAX_UNIT_SIZE >= 10
-MCL_DEF_LLVM_FUNC(10)
-#endif
 #if MCL_MAX_UNIT_SIZE >= 12
-MCL_DEF_LLVM_FUNC(11)
 MCL_DEF_LLVM_FUNC(12)
 #endif
-#if MCL_MAX_UNIT_SIZE >= 14
-MCL_DEF_LLVM_FUNC(13)
-MCL_DEF_LLVM_FUNC(14)
-#endif
 #if MCL_MAX_UNIT_SIZE >= 16
-MCL_DEF_LLVM_FUNC(15)
-#if MCL_SIZEOF_UNIT == 4
 MCL_DEF_LLVM_FUNC(16)
-#else
 #endif
+
+#else // 64
+
+MCL_DEF_LLVM_FUNC(3)
+MCL_DEF_LLVM_FUNC(4)
+#if MCL_MAX_UNIT_SIZE >= 6
+MCL_DEF_LLVM_FUNC(6)
 #endif
-#if MCL_MAX_UNIT_SIZE >= 17
-MCL_DEF_LLVM_FUNC(17)
+#if MCL_MAX_UNIT_SIZE >= 8
+MCL_DEF_LLVM_FUNC(8)
+#endif
+
 #endif
 
 } } // mcl::fp
diff --git a/src/proto.hpp b/src/proto.hpp
index c9f78a52..70588f74 100644
--- a/src/proto.hpp
+++ b/src/proto.hpp
@@ -38,38 +38,29 @@ void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, c
 
 extern "C" {
 
-MCL_FP_DEF_FUNC(1)
-MCL_FP_DEF_FUNC(2)
-MCL_FP_DEF_FUNC(3)
-MCL_FP_DEF_FUNC(4)
-#if MCL_MAX_UNIT_SIZE >= 6
-MCL_FP_DEF_FUNC(5)
+#if CYBOZU_OS_BIT == 32
+
 MCL_FP_DEF_FUNC(6)
-#endif
-#if MCL_MAX_UNIT_SIZE >= 8
 MCL_FP_DEF_FUNC(7)
 MCL_FP_DEF_FUNC(8)
-#endif
-#if MCL_MAX_UNIT_SIZE >= 9
-MCL_FP_DEF_FUNC(9)
-#endif
-#if MCL_MAX_UNIT_SIZE >= 10
-MCL_FP_DEF_FUNC(10)
-#endif
 #if MCL_MAX_UNIT_SIZE >= 12
-MCL_FP_DEF_FUNC(11)
 MCL_FP_DEF_FUNC(12)
 #endif
-#if MCL_MAX_UNIT_SIZE >= 14
-MCL_FP_DEF_FUNC(13)
-MCL_FP_DEF_FUNC(14)
-#endif
 #if MCL_MAX_UNIT_SIZE >= 16
-MCL_FP_DEF_FUNC(15)
 MCL_FP_DEF_FUNC(16)
 #endif
-#if MCL_MAX_UNIT_SIZE >= 17
-MCL_FP_DEF_FUNC(17)
+
+#else // 64
+
+MCL_FP_DEF_FUNC(3)
+MCL_FP_DEF_FUNC(4)
+#if MCL_MAX_UNIT_SIZE >= 6
+MCL_FP_DEF_FUNC(6)
+#endif
+#if MCL_MAX_UNIT_SIZE >= 8
+MCL_FP_DEF_FUNC(8)
+#endif
+
 #endif
 
 MCL_FP_DEF_FUNC_SPECIAL(L)

From b965c2c143ba1266f8027d7109bd8fe311a91812 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 12:30:45 +0900
Subject: [PATCH 517/553] reduced base.ll

---
 src/base32.ll | 68204 ++++++++++++------------------------------------
 src/base64.ll | 19538 +++++---------
 2 files changed, 23262 insertions(+), 64480 deletions(-)

diff --git a/src/base32.ll b/src/base32.ll
index 1cfbbe8e..fb4ff4e2 100644
--- a/src/base32.ll
+++ b/src/base32.ll
@@ -124,29 +124,29 @@ define void @mcl_fpDbl_mod_NIST_P192L(i32* noalias  %r1, i32* noalias  %r2, i32*
 %r102 = trunc i256 %r101 to i1
 %r103 = select i1 %r102, i256 %r97, i256 %r100
 %r104 = trunc i256 %r103 to i192
-%r105 = trunc i192 %r104 to i32
-%r107 = getelementptr i32, i32* %r1, i32 0
-store i32 %r105, i32* %r107
+%r106 = getelementptr i32, i32* %r1, i32 0
+%r107 = trunc i192 %r104 to i32
+store i32 %r107, i32* %r106
 %r108 = lshr i192 %r104, 32
-%r109 = trunc i192 %r108 to i32
-%r111 = getelementptr i32, i32* %r1, i32 1
-store i32 %r109, i32* %r111
+%r110 = getelementptr i32, i32* %r1, i32 1
+%r111 = trunc i192 %r108 to i32
+store i32 %r111, i32* %r110
 %r112 = lshr i192 %r108, 32
-%r113 = trunc i192 %r112 to i32
-%r115 = getelementptr i32, i32* %r1, i32 2
-store i32 %r113, i32* %r115
+%r114 = getelementptr i32, i32* %r1, i32 2
+%r115 = trunc i192 %r112 to i32
+store i32 %r115, i32* %r114
 %r116 = lshr i192 %r112, 32
-%r117 = trunc i192 %r116 to i32
-%r119 = getelementptr i32, i32* %r1, i32 3
-store i32 %r117, i32* %r119
+%r118 = getelementptr i32, i32* %r1, i32 3
+%r119 = trunc i192 %r116 to i32
+store i32 %r119, i32* %r118
 %r120 = lshr i192 %r116, 32
-%r121 = trunc i192 %r120 to i32
-%r123 = getelementptr i32, i32* %r1, i32 4
-store i32 %r121, i32* %r123
+%r122 = getelementptr i32, i32* %r1, i32 4
+%r123 = trunc i192 %r120 to i32
+store i32 %r123, i32* %r122
 %r124 = lshr i192 %r120, 32
-%r125 = trunc i192 %r124 to i32
-%r127 = getelementptr i32, i32* %r1, i32 5
-store i32 %r125, i32* %r127
+%r126 = getelementptr i32, i32* %r1, i32 5
+%r127 = trunc i192 %r124 to i32
+store i32 %r127, i32* %r126
 ret void
 }
 define void @mcl_fp_sqr_NIST_P192L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
@@ -457,550 +457,569 @@ store i32 0, i32* %r339
 store i32 0, i32* %r342
 ret void
 nonzero:
-%r343 = trunc i544 %r239 to i32
-%r345 = getelementptr i32, i32* %r1, i32 0
-store i32 %r343, i32* %r345
+%r344 = getelementptr i32, i32* %r1, i32 0
+%r345 = trunc i544 %r239 to i32
+store i32 %r345, i32* %r344
 %r346 = lshr i544 %r239, 32
-%r347 = trunc i544 %r346 to i32
-%r349 = getelementptr i32, i32* %r1, i32 1
-store i32 %r347, i32* %r349
+%r348 = getelementptr i32, i32* %r1, i32 1
+%r349 = trunc i544 %r346 to i32
+store i32 %r349, i32* %r348
 %r350 = lshr i544 %r346, 32
-%r351 = trunc i544 %r350 to i32
-%r353 = getelementptr i32, i32* %r1, i32 2
-store i32 %r351, i32* %r353
+%r352 = getelementptr i32, i32* %r1, i32 2
+%r353 = trunc i544 %r350 to i32
+store i32 %r353, i32* %r352
 %r354 = lshr i544 %r350, 32
-%r355 = trunc i544 %r354 to i32
-%r357 = getelementptr i32, i32* %r1, i32 3
-store i32 %r355, i32* %r357
+%r356 = getelementptr i32, i32* %r1, i32 3
+%r357 = trunc i544 %r354 to i32
+store i32 %r357, i32* %r356
 %r358 = lshr i544 %r354, 32
-%r359 = trunc i544 %r358 to i32
-%r361 = getelementptr i32, i32* %r1, i32 4
-store i32 %r359, i32* %r361
+%r360 = getelementptr i32, i32* %r1, i32 4
+%r361 = trunc i544 %r358 to i32
+store i32 %r361, i32* %r360
 %r362 = lshr i544 %r358, 32
-%r363 = trunc i544 %r362 to i32
-%r365 = getelementptr i32, i32* %r1, i32 5
-store i32 %r363, i32* %r365
+%r364 = getelementptr i32, i32* %r1, i32 5
+%r365 = trunc i544 %r362 to i32
+store i32 %r365, i32* %r364
 %r366 = lshr i544 %r362, 32
-%r367 = trunc i544 %r366 to i32
-%r369 = getelementptr i32, i32* %r1, i32 6
-store i32 %r367, i32* %r369
+%r368 = getelementptr i32, i32* %r1, i32 6
+%r369 = trunc i544 %r366 to i32
+store i32 %r369, i32* %r368
 %r370 = lshr i544 %r366, 32
-%r371 = trunc i544 %r370 to i32
-%r373 = getelementptr i32, i32* %r1, i32 7
-store i32 %r371, i32* %r373
+%r372 = getelementptr i32, i32* %r1, i32 7
+%r373 = trunc i544 %r370 to i32
+store i32 %r373, i32* %r372
 %r374 = lshr i544 %r370, 32
-%r375 = trunc i544 %r374 to i32
-%r377 = getelementptr i32, i32* %r1, i32 8
-store i32 %r375, i32* %r377
+%r376 = getelementptr i32, i32* %r1, i32 8
+%r377 = trunc i544 %r374 to i32
+store i32 %r377, i32* %r376
 %r378 = lshr i544 %r374, 32
-%r379 = trunc i544 %r378 to i32
-%r381 = getelementptr i32, i32* %r1, i32 9
-store i32 %r379, i32* %r381
+%r380 = getelementptr i32, i32* %r1, i32 9
+%r381 = trunc i544 %r378 to i32
+store i32 %r381, i32* %r380
 %r382 = lshr i544 %r378, 32
-%r383 = trunc i544 %r382 to i32
-%r385 = getelementptr i32, i32* %r1, i32 10
-store i32 %r383, i32* %r385
+%r384 = getelementptr i32, i32* %r1, i32 10
+%r385 = trunc i544 %r382 to i32
+store i32 %r385, i32* %r384
 %r386 = lshr i544 %r382, 32
-%r387 = trunc i544 %r386 to i32
-%r389 = getelementptr i32, i32* %r1, i32 11
-store i32 %r387, i32* %r389
+%r388 = getelementptr i32, i32* %r1, i32 11
+%r389 = trunc i544 %r386 to i32
+store i32 %r389, i32* %r388
 %r390 = lshr i544 %r386, 32
-%r391 = trunc i544 %r390 to i32
-%r393 = getelementptr i32, i32* %r1, i32 12
-store i32 %r391, i32* %r393
+%r392 = getelementptr i32, i32* %r1, i32 12
+%r393 = trunc i544 %r390 to i32
+store i32 %r393, i32* %r392
 %r394 = lshr i544 %r390, 32
-%r395 = trunc i544 %r394 to i32
-%r397 = getelementptr i32, i32* %r1, i32 13
-store i32 %r395, i32* %r397
+%r396 = getelementptr i32, i32* %r1, i32 13
+%r397 = trunc i544 %r394 to i32
+store i32 %r397, i32* %r396
 %r398 = lshr i544 %r394, 32
-%r399 = trunc i544 %r398 to i32
-%r401 = getelementptr i32, i32* %r1, i32 14
-store i32 %r399, i32* %r401
+%r400 = getelementptr i32, i32* %r1, i32 14
+%r401 = trunc i544 %r398 to i32
+store i32 %r401, i32* %r400
 %r402 = lshr i544 %r398, 32
-%r403 = trunc i544 %r402 to i32
-%r405 = getelementptr i32, i32* %r1, i32 15
-store i32 %r403, i32* %r405
+%r404 = getelementptr i32, i32* %r1, i32 15
+%r405 = trunc i544 %r402 to i32
+store i32 %r405, i32* %r404
 %r406 = lshr i544 %r402, 32
-%r407 = trunc i544 %r406 to i32
-%r409 = getelementptr i32, i32* %r1, i32 16
-store i32 %r407, i32* %r409
+%r408 = getelementptr i32, i32* %r1, i32 16
+%r409 = trunc i544 %r406 to i32
+store i32 %r409, i32* %r408
 ret void
 }
-define i64 @mulPv32x32(i32* noalias  %r2, i32 %r3)
+define i224 @mulPv192x32(i32* noalias  %r2, i32 %r3)
 {
 %r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
 %r6 = trunc i64 %r5 to i32
 %r7 = call i32 @extractHigh32(i64 %r5)
-%r8 = zext i32 %r6 to i64
-%r9 = zext i32 %r7 to i64
-%r10 = shl i64 %r9, 32
-%r11 = add i64 %r8, %r10
-ret i64 %r11
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r28 = zext i32 %r6 to i64
+%r29 = zext i32 %r10 to i64
+%r30 = shl i64 %r29, 32
+%r31 = or i64 %r28, %r30
+%r32 = zext i64 %r31 to i96
+%r33 = zext i32 %r14 to i96
+%r34 = shl i96 %r33, 64
+%r35 = or i96 %r32, %r34
+%r36 = zext i96 %r35 to i128
+%r37 = zext i32 %r18 to i128
+%r38 = shl i128 %r37, 96
+%r39 = or i128 %r36, %r38
+%r40 = zext i128 %r39 to i160
+%r41 = zext i32 %r22 to i160
+%r42 = shl i160 %r41, 128
+%r43 = or i160 %r40, %r42
+%r44 = zext i160 %r43 to i192
+%r45 = zext i32 %r26 to i192
+%r46 = shl i192 %r45, 160
+%r47 = or i192 %r44, %r46
+%r48 = zext i32 %r7 to i64
+%r49 = zext i32 %r11 to i64
+%r50 = shl i64 %r49, 32
+%r51 = or i64 %r48, %r50
+%r52 = zext i64 %r51 to i96
+%r53 = zext i32 %r15 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r52, %r54
+%r56 = zext i96 %r55 to i128
+%r57 = zext i32 %r19 to i128
+%r58 = shl i128 %r57, 96
+%r59 = or i128 %r56, %r58
+%r60 = zext i128 %r59 to i160
+%r61 = zext i32 %r23 to i160
+%r62 = shl i160 %r61, 128
+%r63 = or i160 %r60, %r62
+%r64 = zext i160 %r63 to i192
+%r65 = zext i32 %r27 to i192
+%r66 = shl i192 %r65, 160
+%r67 = or i192 %r64, %r66
+%r68 = zext i192 %r47 to i224
+%r69 = zext i192 %r67 to i224
+%r70 = shl i224 %r69, 32
+%r71 = add i224 %r68, %r70
+ret i224 %r71
 }
-define void @mcl_fp_mulUnitPre1L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+define void @mcl_fp_mulUnitPre6L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
 {
-%r4 = call i64 @mulPv32x32(i32* %r2, i32 %r3)
-%r5 = trunc i64 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i64 %r4, 32
-%r9 = trunc i64 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
+%r4 = call i224 @mulPv192x32(i32* %r2, i32 %r3)
+%r6 = getelementptr i32, i32* %r1, i32 0
+%r7 = trunc i224 %r4 to i32
+store i32 %r7, i32* %r6
+%r8 = lshr i224 %r4, 32
+%r10 = getelementptr i32, i32* %r1, i32 1
+%r11 = trunc i224 %r8 to i32
+store i32 %r11, i32* %r10
+%r12 = lshr i224 %r8, 32
+%r14 = getelementptr i32, i32* %r1, i32 2
+%r15 = trunc i224 %r12 to i32
+store i32 %r15, i32* %r14
+%r16 = lshr i224 %r12, 32
+%r18 = getelementptr i32, i32* %r1, i32 3
+%r19 = trunc i224 %r16 to i32
+store i32 %r19, i32* %r18
+%r20 = lshr i224 %r16, 32
+%r22 = getelementptr i32, i32* %r1, i32 4
+%r23 = trunc i224 %r20 to i32
+store i32 %r23, i32* %r22
+%r24 = lshr i224 %r20, 32
+%r26 = getelementptr i32, i32* %r1, i32 5
+%r27 = trunc i224 %r24 to i32
+store i32 %r27, i32* %r26
+%r28 = lshr i224 %r24, 32
+%r30 = getelementptr i32, i32* %r1, i32 6
+%r31 = trunc i224 %r28 to i32
+store i32 %r31, i32* %r30
 ret void
 }
-define void @mcl_fpDbl_mulPre1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+define void @mcl_fpDbl_mulPre6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
 {
-%r4 = load i32, i32* %r2
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r4 to i64
-%r7 = zext i32 %r5 to i64
-%r8 = mul i64 %r6, %r7
-%r9 = trunc i64 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 0
-store i32 %r9, i32* %r11
-%r12 = lshr i64 %r8, 32
-%r13 = trunc i64 %r12 to i32
+%r4 = load i32, i32* %r3
+%r5 = call i224 @mulPv192x32(i32* %r2, i32 %r4)
+%r6 = trunc i224 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i224 %r5, 32
+%r9 = getelementptr i32, i32* %r3, i32 1
+%r10 = load i32, i32* %r9
+%r11 = call i224 @mulPv192x32(i32* %r2, i32 %r10)
+%r12 = add i224 %r7, %r11
+%r13 = trunc i224 %r12 to i32
 %r15 = getelementptr i32, i32* %r1, i32 1
 store i32 %r13, i32* %r15
+%r16 = lshr i224 %r12, 32
+%r18 = getelementptr i32, i32* %r3, i32 2
+%r19 = load i32, i32* %r18
+%r20 = call i224 @mulPv192x32(i32* %r2, i32 %r19)
+%r21 = add i224 %r16, %r20
+%r22 = trunc i224 %r21 to i32
+%r24 = getelementptr i32, i32* %r1, i32 2
+store i32 %r22, i32* %r24
+%r25 = lshr i224 %r21, 32
+%r27 = getelementptr i32, i32* %r3, i32 3
+%r28 = load i32, i32* %r27
+%r29 = call i224 @mulPv192x32(i32* %r2, i32 %r28)
+%r30 = add i224 %r25, %r29
+%r31 = trunc i224 %r30 to i32
+%r33 = getelementptr i32, i32* %r1, i32 3
+store i32 %r31, i32* %r33
+%r34 = lshr i224 %r30, 32
+%r36 = getelementptr i32, i32* %r3, i32 4
+%r37 = load i32, i32* %r36
+%r38 = call i224 @mulPv192x32(i32* %r2, i32 %r37)
+%r39 = add i224 %r34, %r38
+%r40 = trunc i224 %r39 to i32
+%r42 = getelementptr i32, i32* %r1, i32 4
+store i32 %r40, i32* %r42
+%r43 = lshr i224 %r39, 32
+%r45 = getelementptr i32, i32* %r3, i32 5
+%r46 = load i32, i32* %r45
+%r47 = call i224 @mulPv192x32(i32* %r2, i32 %r46)
+%r48 = add i224 %r43, %r47
+%r50 = getelementptr i32, i32* %r1, i32 5
+%r52 = getelementptr i32, i32* %r50, i32 0
+%r53 = trunc i224 %r48 to i32
+store i32 %r53, i32* %r52
+%r54 = lshr i224 %r48, 32
+%r56 = getelementptr i32, i32* %r50, i32 1
+%r57 = trunc i224 %r54 to i32
+store i32 %r57, i32* %r56
+%r58 = lshr i224 %r54, 32
+%r60 = getelementptr i32, i32* %r50, i32 2
+%r61 = trunc i224 %r58 to i32
+store i32 %r61, i32* %r60
+%r62 = lshr i224 %r58, 32
+%r64 = getelementptr i32, i32* %r50, i32 3
+%r65 = trunc i224 %r62 to i32
+store i32 %r65, i32* %r64
+%r66 = lshr i224 %r62, 32
+%r68 = getelementptr i32, i32* %r50, i32 4
+%r69 = trunc i224 %r66 to i32
+store i32 %r69, i32* %r68
+%r70 = lshr i224 %r66, 32
+%r72 = getelementptr i32, i32* %r50, i32 5
+%r73 = trunc i224 %r70 to i32
+store i32 %r73, i32* %r72
+%r74 = lshr i224 %r70, 32
+%r76 = getelementptr i32, i32* %r50, i32 6
+%r77 = trunc i224 %r74 to i32
+store i32 %r77, i32* %r76
 ret void
 }
-define void @mcl_fpDbl_sqrPre1L(i32* noalias  %r1, i32* noalias  %r2)
+define void @mcl_fpDbl_sqrPre6L(i32* noalias  %r1, i32* noalias  %r2)
 {
 %r3 = load i32, i32* %r2
-%r4 = load i32, i32* %r2
-%r5 = zext i32 %r3 to i64
-%r6 = zext i32 %r4 to i64
-%r7 = mul i64 %r5, %r6
-%r8 = trunc i64 %r7 to i32
-%r10 = getelementptr i32, i32* %r1, i32 0
-store i32 %r8, i32* %r10
-%r11 = lshr i64 %r7, 32
-%r12 = trunc i64 %r11 to i32
+%r4 = call i224 @mulPv192x32(i32* %r2, i32 %r3)
+%r5 = trunc i224 %r4 to i32
+store i32 %r5, i32* %r1
+%r6 = lshr i224 %r4, 32
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = call i224 @mulPv192x32(i32* %r2, i32 %r9)
+%r11 = add i224 %r6, %r10
+%r12 = trunc i224 %r11 to i32
 %r14 = getelementptr i32, i32* %r1, i32 1
 store i32 %r12, i32* %r14
+%r15 = lshr i224 %r11, 32
+%r17 = getelementptr i32, i32* %r2, i32 2
+%r18 = load i32, i32* %r17
+%r19 = call i224 @mulPv192x32(i32* %r2, i32 %r18)
+%r20 = add i224 %r15, %r19
+%r21 = trunc i224 %r20 to i32
+%r23 = getelementptr i32, i32* %r1, i32 2
+store i32 %r21, i32* %r23
+%r24 = lshr i224 %r20, 32
+%r26 = getelementptr i32, i32* %r2, i32 3
+%r27 = load i32, i32* %r26
+%r28 = call i224 @mulPv192x32(i32* %r2, i32 %r27)
+%r29 = add i224 %r24, %r28
+%r30 = trunc i224 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 3
+store i32 %r30, i32* %r32
+%r33 = lshr i224 %r29, 32
+%r35 = getelementptr i32, i32* %r2, i32 4
+%r36 = load i32, i32* %r35
+%r37 = call i224 @mulPv192x32(i32* %r2, i32 %r36)
+%r38 = add i224 %r33, %r37
+%r39 = trunc i224 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 4
+store i32 %r39, i32* %r41
+%r42 = lshr i224 %r38, 32
+%r44 = getelementptr i32, i32* %r2, i32 5
+%r45 = load i32, i32* %r44
+%r46 = call i224 @mulPv192x32(i32* %r2, i32 %r45)
+%r47 = add i224 %r42, %r46
+%r49 = getelementptr i32, i32* %r1, i32 5
+%r51 = getelementptr i32, i32* %r49, i32 0
+%r52 = trunc i224 %r47 to i32
+store i32 %r52, i32* %r51
+%r53 = lshr i224 %r47, 32
+%r55 = getelementptr i32, i32* %r49, i32 1
+%r56 = trunc i224 %r53 to i32
+store i32 %r56, i32* %r55
+%r57 = lshr i224 %r53, 32
+%r59 = getelementptr i32, i32* %r49, i32 2
+%r60 = trunc i224 %r57 to i32
+store i32 %r60, i32* %r59
+%r61 = lshr i224 %r57, 32
+%r63 = getelementptr i32, i32* %r49, i32 3
+%r64 = trunc i224 %r61 to i32
+store i32 %r64, i32* %r63
+%r65 = lshr i224 %r61, 32
+%r67 = getelementptr i32, i32* %r49, i32 4
+%r68 = trunc i224 %r65 to i32
+store i32 %r68, i32* %r67
+%r69 = lshr i224 %r65, 32
+%r71 = getelementptr i32, i32* %r49, i32 5
+%r72 = trunc i224 %r69 to i32
+store i32 %r72, i32* %r71
+%r73 = lshr i224 %r69, 32
+%r75 = getelementptr i32, i32* %r49, i32 6
+%r76 = trunc i224 %r73 to i32
+store i32 %r76, i32* %r75
 ret void
 }
-define void @mcl_fp_mont1L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+define void @mcl_fp_mont6L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
 {
 %r6 = getelementptr i32, i32* %r4, i32 -1
 %r7 = load i32, i32* %r6
 %r9 = getelementptr i32, i32* %r3, i32 0
 %r10 = load i32, i32* %r9
-%r11 = call i64 @mulPv32x32(i32* %r2, i32 %r10)
-%r12 = zext i64 %r11 to i96
-%r13 = trunc i64 %r11 to i32
+%r11 = call i224 @mulPv192x32(i32* %r2, i32 %r10)
+%r12 = zext i224 %r11 to i256
+%r13 = trunc i224 %r11 to i32
 %r14 = mul i32 %r13, %r7
-%r15 = call i64 @mulPv32x32(i32* %r4, i32 %r14)
-%r16 = zext i64 %r15 to i96
-%r17 = add i96 %r12, %r16
-%r18 = lshr i96 %r17, 32
-%r19 = trunc i96 %r18 to i64
-%r20 = load i32, i32* %r4
-%r21 = zext i32 %r20 to i64
-%r22 = sub i64 %r19, %r21
-%r23 = lshr i64 %r22, 32
-%r24 = trunc i64 %r23 to i1
-%r25 = select i1 %r24, i64 %r19, i64 %r22
-%r26 = trunc i64 %r25 to i32
-store i32 %r26, i32* %r1
-ret void
-}
-define void @mcl_fp_montNF1L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i64 @mulPv32x32(i32* %r2, i32 %r8)
-%r10 = trunc i64 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i64 @mulPv32x32(i32* %r4, i32 %r11)
-%r13 = add i64 %r9, %r12
-%r14 = lshr i64 %r13, 32
-%r15 = trunc i64 %r14 to i32
-%r16 = load i32, i32* %r4
-%r17 = sub i32 %r15, %r16
-%r18 = lshr i32 %r17, 31
-%r19 = trunc i32 %r18 to i1
-%r20 = select i1 %r19, i32 %r15, i32 %r17
-store i32 %r20, i32* %r1
-ret void
-}
-define void @mcl_fp_montRed1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = load i32, i32* %r2
-%r9 = zext i32 %r8 to i64
-%r11 = getelementptr i32, i32* %r2, i32 1
-%r12 = load i32, i32* %r11
-%r13 = zext i32 %r12 to i64
-%r14 = shl i64 %r13, 32
-%r15 = or i64 %r9, %r14
-%r16 = zext i64 %r15 to i96
-%r17 = trunc i96 %r16 to i32
-%r18 = mul i32 %r17, %r6
-%r19 = call i64 @mulPv32x32(i32* %r3, i32 %r18)
-%r20 = zext i64 %r19 to i96
-%r21 = add i96 %r16, %r20
-%r22 = lshr i96 %r21, 32
-%r23 = trunc i96 %r22 to i64
-%r24 = zext i32 %r7 to i64
-%r25 = sub i64 %r23, %r24
-%r26 = lshr i64 %r25, 32
-%r27 = trunc i64 %r26 to i1
-%r28 = select i1 %r27, i64 %r23, i64 %r25
-%r29 = trunc i64 %r28 to i32
-store i32 %r29, i32* %r1
-ret void
-}
-define i32 @mcl_fp_addPre1L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r7 = load i32, i32* %r4
-%r8 = zext i32 %r7 to i64
-%r9 = add i64 %r6, %r8
-%r10 = trunc i64 %r9 to i32
-store i32 %r10, i32* %r2
-%r11 = lshr i64 %r9, 32
-%r12 = trunc i64 %r11 to i32
-ret i32 %r12
-}
-define i32 @mcl_fp_subPre1L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r7 = load i32, i32* %r4
-%r8 = zext i32 %r7 to i64
-%r9 = sub i64 %r6, %r8
-%r10 = trunc i64 %r9 to i32
-store i32 %r10, i32* %r2
-%r11 = lshr i64 %r9, 32
-%r12 = trunc i64 %r11 to i32
-%r14 = and i32 %r12, 1
-ret i32 %r14
-}
-define void @mcl_fp_shr1_1L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = lshr i32 %r3, 1
-store i32 %r4, i32* %r1
-ret void
-}
-define void @mcl_fp_add1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = load i32, i32* %r3
-%r7 = zext i32 %r5 to i64
-%r8 = zext i32 %r6 to i64
-%r9 = add i64 %r7, %r8
-%r10 = trunc i64 %r9 to i32
-store i32 %r10, i32* %r1
-%r11 = load i32, i32* %r4
-%r12 = zext i32 %r11 to i64
-%r13 = sub i64 %r9, %r12
-%r14 = lshr i64 %r13, 32
-%r15 = trunc i64 %r14 to i1
-br i1%r15, label %carry, label %nocarry
-nocarry:
-%r16 = trunc i64 %r13 to i32
-store i32 %r16, i32* %r1
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = load i32, i32* %r3
-%r7 = add i32 %r5, %r6
-%r8 = load i32, i32* %r4
-%r9 = sub i32 %r7, %r8
-%r10 = lshr i32 %r9, 31
-%r11 = trunc i32 %r10 to i1
-%r12 = select i1 %r11, i32 %r7, i32 %r9
-store i32 %r12, i32* %r1
-ret void
-}
-define void @mcl_fp_sub1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = load i32, i32* %r3
-%r7 = zext i32 %r5 to i64
-%r8 = zext i32 %r6 to i64
-%r9 = sub i64 %r7, %r8
-%r10 = trunc i64 %r9 to i32
-%r11 = lshr i64 %r9, 32
-%r12 = trunc i64 %r11 to i1
-store i32 %r10, i32* %r1
-br i1%r12, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r13 = load i32, i32* %r4
-%r14 = add i32 %r10, %r13
-store i32 %r14, i32* %r1
-ret void
-}
-define void @mcl_fp_subNF1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = load i32, i32* %r3
-%r7 = sub i32 %r5, %r6
-%r8 = lshr i32 %r7, 31
-%r9 = trunc i32 %r8 to i1
-%r10 = load i32, i32* %r4
-%r12 = select i1 %r9, i32 %r10, i32 0
-%r13 = add i32 %r7, %r12
-store i32 %r13, i32* %r1
-ret void
-}
-define void @mcl_fpDbl_add1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = load i32, i32* %r3
-%r14 = zext i32 %r13 to i64
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = zext i32 %r17 to i64
-%r19 = shl i64 %r18, 32
-%r20 = or i64 %r14, %r19
-%r21 = zext i64 %r12 to i96
-%r22 = zext i64 %r20 to i96
-%r23 = add i96 %r21, %r22
-%r24 = trunc i96 %r23 to i32
-store i32 %r24, i32* %r1
-%r25 = lshr i96 %r23, 32
-%r26 = trunc i96 %r25 to i64
-%r27 = load i32, i32* %r4
-%r28 = zext i32 %r27 to i64
-%r29 = sub i64 %r26, %r28
-%r30 = lshr i64 %r29, 32
-%r31 = trunc i64 %r30 to i1
-%r32 = select i1 %r31, i64 %r26, i64 %r29
-%r33 = trunc i64 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 1
-store i32 %r33, i32* %r35
-ret void
-}
-define void @mcl_fpDbl_sub1L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = load i32, i32* %r3
-%r14 = zext i32 %r13 to i64
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = zext i32 %r17 to i64
-%r19 = shl i64 %r18, 32
-%r20 = or i64 %r14, %r19
-%r21 = zext i64 %r12 to i96
-%r22 = zext i64 %r20 to i96
-%r23 = sub i96 %r21, %r22
-%r24 = trunc i96 %r23 to i32
-store i32 %r24, i32* %r1
-%r25 = lshr i96 %r23, 32
-%r26 = trunc i96 %r25 to i32
-%r27 = lshr i96 %r23, 64
-%r28 = trunc i96 %r27 to i1
-%r29 = load i32, i32* %r4
-%r31 = select i1 %r28, i32 %r29, i32 0
-%r32 = add i32 %r26, %r31
-%r34 = getelementptr i32, i32* %r1, i32 1
-store i32 %r32, i32* %r34
-ret void
-}
-define i96 @mulPv64x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r12 = zext i32 %r6 to i64
-%r13 = zext i32 %r10 to i64
-%r14 = shl i64 %r13, 32
-%r15 = or i64 %r12, %r14
-%r16 = zext i32 %r7 to i64
-%r17 = zext i32 %r11 to i64
-%r18 = shl i64 %r17, 32
-%r19 = or i64 %r16, %r18
-%r20 = zext i64 %r15 to i96
-%r21 = zext i64 %r19 to i96
-%r22 = shl i96 %r21, 32
-%r23 = add i96 %r20, %r22
-ret i96 %r23
-}
-define void @mcl_fp_mulUnitPre2L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i96 @mulPv64x32(i32* %r2, i32 %r3)
-%r5 = trunc i96 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i96 %r4, 32
-%r9 = trunc i96 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i96 %r8, 32
-%r13 = trunc i96 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-ret void
-}
-define void @mcl_fpDbl_mulPre2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r4 = load i32, i32* %r3
-%r5 = call i96 @mulPv64x32(i32* %r2, i32 %r4)
-%r6 = trunc i96 %r5 to i32
-store i32 %r6, i32* %r1
-%r7 = lshr i96 %r5, 32
-%r9 = getelementptr i32, i32* %r3, i32 1
-%r10 = load i32, i32* %r9
-%r11 = call i96 @mulPv64x32(i32* %r2, i32 %r10)
-%r12 = add i96 %r7, %r11
-%r14 = getelementptr i32, i32* %r1, i32 1
-%r15 = trunc i96 %r12 to i32
-%r17 = getelementptr i32, i32* %r14, i32 0
-store i32 %r15, i32* %r17
-%r18 = lshr i96 %r12, 32
-%r19 = trunc i96 %r18 to i32
-%r21 = getelementptr i32, i32* %r14, i32 1
-store i32 %r19, i32* %r21
-%r22 = lshr i96 %r18, 32
-%r23 = trunc i96 %r22 to i32
-%r25 = getelementptr i32, i32* %r14, i32 2
-store i32 %r23, i32* %r25
-ret void
-}
-define void @mcl_fpDbl_sqrPre2L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = call i96 @mulPv64x32(i32* %r2, i32 %r3)
-%r5 = trunc i96 %r4 to i32
-store i32 %r5, i32* %r1
-%r6 = lshr i96 %r4, 32
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = call i96 @mulPv64x32(i32* %r2, i32 %r9)
-%r11 = add i96 %r6, %r10
-%r13 = getelementptr i32, i32* %r1, i32 1
-%r14 = trunc i96 %r11 to i32
-%r16 = getelementptr i32, i32* %r13, i32 0
-store i32 %r14, i32* %r16
-%r17 = lshr i96 %r11, 32
-%r18 = trunc i96 %r17 to i32
-%r20 = getelementptr i32, i32* %r13, i32 1
-store i32 %r18, i32* %r20
-%r21 = lshr i96 %r17, 32
-%r22 = trunc i96 %r21 to i32
-%r24 = getelementptr i32, i32* %r13, i32 2
-store i32 %r22, i32* %r24
-ret void
-}
-define void @mcl_fp_mont2L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i96 @mulPv64x32(i32* %r2, i32 %r10)
-%r12 = zext i96 %r11 to i128
-%r13 = trunc i96 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i96 @mulPv64x32(i32* %r4, i32 %r14)
-%r16 = zext i96 %r15 to i128
-%r17 = add i128 %r12, %r16
-%r18 = lshr i128 %r17, 32
+%r15 = call i224 @mulPv192x32(i32* %r4, i32 %r14)
+%r16 = zext i224 %r15 to i256
+%r17 = add i256 %r12, %r16
+%r18 = lshr i256 %r17, 32
 %r20 = getelementptr i32, i32* %r3, i32 1
 %r21 = load i32, i32* %r20
-%r22 = call i96 @mulPv64x32(i32* %r2, i32 %r21)
-%r23 = zext i96 %r22 to i128
-%r24 = add i128 %r18, %r23
-%r25 = trunc i128 %r24 to i32
+%r22 = call i224 @mulPv192x32(i32* %r2, i32 %r21)
+%r23 = zext i224 %r22 to i256
+%r24 = add i256 %r18, %r23
+%r25 = trunc i256 %r24 to i32
 %r26 = mul i32 %r25, %r7
-%r27 = call i96 @mulPv64x32(i32* %r4, i32 %r26)
-%r28 = zext i96 %r27 to i128
-%r29 = add i128 %r24, %r28
-%r30 = lshr i128 %r29, 32
-%r31 = trunc i128 %r30 to i96
-%r32 = load i32, i32* %r4
-%r33 = zext i32 %r32 to i64
-%r35 = getelementptr i32, i32* %r4, i32 1
-%r36 = load i32, i32* %r35
-%r37 = zext i32 %r36 to i64
-%r38 = shl i64 %r37, 32
-%r39 = or i64 %r33, %r38
-%r40 = zext i64 %r39 to i96
-%r41 = sub i96 %r31, %r40
-%r42 = lshr i96 %r41, 64
-%r43 = trunc i96 %r42 to i1
-%r44 = select i1 %r43, i96 %r31, i96 %r41
-%r45 = trunc i96 %r44 to i64
-%r46 = trunc i64 %r45 to i32
-%r48 = getelementptr i32, i32* %r1, i32 0
-store i32 %r46, i32* %r48
-%r49 = lshr i64 %r45, 32
-%r50 = trunc i64 %r49 to i32
-%r52 = getelementptr i32, i32* %r1, i32 1
-store i32 %r50, i32* %r52
+%r27 = call i224 @mulPv192x32(i32* %r4, i32 %r26)
+%r28 = zext i224 %r27 to i256
+%r29 = add i256 %r24, %r28
+%r30 = lshr i256 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i224 @mulPv192x32(i32* %r2, i32 %r33)
+%r35 = zext i224 %r34 to i256
+%r36 = add i256 %r30, %r35
+%r37 = trunc i256 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i224 @mulPv192x32(i32* %r4, i32 %r38)
+%r40 = zext i224 %r39 to i256
+%r41 = add i256 %r36, %r40
+%r42 = lshr i256 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i224 @mulPv192x32(i32* %r2, i32 %r45)
+%r47 = zext i224 %r46 to i256
+%r48 = add i256 %r42, %r47
+%r49 = trunc i256 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i224 @mulPv192x32(i32* %r4, i32 %r50)
+%r52 = zext i224 %r51 to i256
+%r53 = add i256 %r48, %r52
+%r54 = lshr i256 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i224 @mulPv192x32(i32* %r2, i32 %r57)
+%r59 = zext i224 %r58 to i256
+%r60 = add i256 %r54, %r59
+%r61 = trunc i256 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i224 @mulPv192x32(i32* %r4, i32 %r62)
+%r64 = zext i224 %r63 to i256
+%r65 = add i256 %r60, %r64
+%r66 = lshr i256 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i224 @mulPv192x32(i32* %r2, i32 %r69)
+%r71 = zext i224 %r70 to i256
+%r72 = add i256 %r66, %r71
+%r73 = trunc i256 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i224 @mulPv192x32(i32* %r4, i32 %r74)
+%r76 = zext i224 %r75 to i256
+%r77 = add i256 %r72, %r76
+%r78 = lshr i256 %r77, 32
+%r79 = trunc i256 %r78 to i224
+%r80 = load i32, i32* %r4
+%r81 = zext i32 %r80 to i64
+%r83 = getelementptr i32, i32* %r4, i32 1
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i64
+%r86 = shl i64 %r85, 32
+%r87 = or i64 %r81, %r86
+%r88 = zext i64 %r87 to i96
+%r90 = getelementptr i32, i32* %r4, i32 2
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i96
+%r93 = shl i96 %r92, 64
+%r94 = or i96 %r88, %r93
+%r95 = zext i96 %r94 to i128
+%r97 = getelementptr i32, i32* %r4, i32 3
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i128
+%r100 = shl i128 %r99, 96
+%r101 = or i128 %r95, %r100
+%r102 = zext i128 %r101 to i160
+%r104 = getelementptr i32, i32* %r4, i32 4
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i160
+%r107 = shl i160 %r106, 128
+%r108 = or i160 %r102, %r107
+%r109 = zext i160 %r108 to i192
+%r111 = getelementptr i32, i32* %r4, i32 5
+%r112 = load i32, i32* %r111
+%r113 = zext i32 %r112 to i192
+%r114 = shl i192 %r113, 160
+%r115 = or i192 %r109, %r114
+%r116 = zext i192 %r115 to i224
+%r117 = sub i224 %r79, %r116
+%r118 = lshr i224 %r117, 192
+%r119 = trunc i224 %r118 to i1
+%r120 = select i1 %r119, i224 %r79, i224 %r117
+%r121 = trunc i224 %r120 to i192
+%r123 = getelementptr i32, i32* %r1, i32 0
+%r124 = trunc i192 %r121 to i32
+store i32 %r124, i32* %r123
+%r125 = lshr i192 %r121, 32
+%r127 = getelementptr i32, i32* %r1, i32 1
+%r128 = trunc i192 %r125 to i32
+store i32 %r128, i32* %r127
+%r129 = lshr i192 %r125, 32
+%r131 = getelementptr i32, i32* %r1, i32 2
+%r132 = trunc i192 %r129 to i32
+store i32 %r132, i32* %r131
+%r133 = lshr i192 %r129, 32
+%r135 = getelementptr i32, i32* %r1, i32 3
+%r136 = trunc i192 %r133 to i32
+store i32 %r136, i32* %r135
+%r137 = lshr i192 %r133, 32
+%r139 = getelementptr i32, i32* %r1, i32 4
+%r140 = trunc i192 %r137 to i32
+store i32 %r140, i32* %r139
+%r141 = lshr i192 %r137, 32
+%r143 = getelementptr i32, i32* %r1, i32 5
+%r144 = trunc i192 %r141 to i32
+store i32 %r144, i32* %r143
 ret void
 }
-define void @mcl_fp_montNF2L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+define void @mcl_fp_montNF6L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
 {
 %r6 = getelementptr i32, i32* %r4, i32 -1
 %r7 = load i32, i32* %r6
 %r8 = load i32, i32* %r3
-%r9 = call i96 @mulPv64x32(i32* %r2, i32 %r8)
-%r10 = trunc i96 %r9 to i32
+%r9 = call i224 @mulPv192x32(i32* %r2, i32 %r8)
+%r10 = trunc i224 %r9 to i32
 %r11 = mul i32 %r10, %r7
-%r12 = call i96 @mulPv64x32(i32* %r4, i32 %r11)
-%r13 = add i96 %r9, %r12
-%r14 = lshr i96 %r13, 32
+%r12 = call i224 @mulPv192x32(i32* %r4, i32 %r11)
+%r13 = add i224 %r9, %r12
+%r14 = lshr i224 %r13, 32
 %r16 = getelementptr i32, i32* %r3, i32 1
 %r17 = load i32, i32* %r16
-%r18 = call i96 @mulPv64x32(i32* %r2, i32 %r17)
-%r19 = add i96 %r14, %r18
-%r20 = trunc i96 %r19 to i32
+%r18 = call i224 @mulPv192x32(i32* %r2, i32 %r17)
+%r19 = add i224 %r14, %r18
+%r20 = trunc i224 %r19 to i32
 %r21 = mul i32 %r20, %r7
-%r22 = call i96 @mulPv64x32(i32* %r4, i32 %r21)
-%r23 = add i96 %r19, %r22
-%r24 = lshr i96 %r23, 32
-%r25 = trunc i96 %r24 to i64
-%r26 = load i32, i32* %r4
-%r27 = zext i32 %r26 to i64
-%r29 = getelementptr i32, i32* %r4, i32 1
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i64
-%r32 = shl i64 %r31, 32
-%r33 = or i64 %r27, %r32
-%r34 = sub i64 %r25, %r33
-%r35 = lshr i64 %r34, 63
-%r36 = trunc i64 %r35 to i1
-%r37 = select i1 %r36, i64 %r25, i64 %r34
-%r38 = trunc i64 %r37 to i32
-%r40 = getelementptr i32, i32* %r1, i32 0
-store i32 %r38, i32* %r40
-%r41 = lshr i64 %r37, 32
-%r42 = trunc i64 %r41 to i32
-%r44 = getelementptr i32, i32* %r1, i32 1
-store i32 %r42, i32* %r44
+%r22 = call i224 @mulPv192x32(i32* %r4, i32 %r21)
+%r23 = add i224 %r19, %r22
+%r24 = lshr i224 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i224 @mulPv192x32(i32* %r2, i32 %r27)
+%r29 = add i224 %r24, %r28
+%r30 = trunc i224 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i224 @mulPv192x32(i32* %r4, i32 %r31)
+%r33 = add i224 %r29, %r32
+%r34 = lshr i224 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i224 @mulPv192x32(i32* %r2, i32 %r37)
+%r39 = add i224 %r34, %r38
+%r40 = trunc i224 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i224 @mulPv192x32(i32* %r4, i32 %r41)
+%r43 = add i224 %r39, %r42
+%r44 = lshr i224 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i224 @mulPv192x32(i32* %r2, i32 %r47)
+%r49 = add i224 %r44, %r48
+%r50 = trunc i224 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i224 @mulPv192x32(i32* %r4, i32 %r51)
+%r53 = add i224 %r49, %r52
+%r54 = lshr i224 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i224 @mulPv192x32(i32* %r2, i32 %r57)
+%r59 = add i224 %r54, %r58
+%r60 = trunc i224 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i224 @mulPv192x32(i32* %r4, i32 %r61)
+%r63 = add i224 %r59, %r62
+%r64 = lshr i224 %r63, 32
+%r65 = trunc i224 %r64 to i192
+%r66 = load i32, i32* %r4
+%r67 = zext i32 %r66 to i64
+%r69 = getelementptr i32, i32* %r4, i32 1
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i64
+%r72 = shl i64 %r71, 32
+%r73 = or i64 %r67, %r72
+%r74 = zext i64 %r73 to i96
+%r76 = getelementptr i32, i32* %r4, i32 2
+%r77 = load i32, i32* %r76
+%r78 = zext i32 %r77 to i96
+%r79 = shl i96 %r78, 64
+%r80 = or i96 %r74, %r79
+%r81 = zext i96 %r80 to i128
+%r83 = getelementptr i32, i32* %r4, i32 3
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i128
+%r86 = shl i128 %r85, 96
+%r87 = or i128 %r81, %r86
+%r88 = zext i128 %r87 to i160
+%r90 = getelementptr i32, i32* %r4, i32 4
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i160
+%r93 = shl i160 %r92, 128
+%r94 = or i160 %r88, %r93
+%r95 = zext i160 %r94 to i192
+%r97 = getelementptr i32, i32* %r4, i32 5
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i192
+%r100 = shl i192 %r99, 160
+%r101 = or i192 %r95, %r100
+%r102 = sub i192 %r65, %r101
+%r103 = lshr i192 %r102, 191
+%r104 = trunc i192 %r103 to i1
+%r105 = select i1 %r104, i192 %r65, i192 %r102
+%r107 = getelementptr i32, i32* %r1, i32 0
+%r108 = trunc i192 %r105 to i32
+store i32 %r108, i32* %r107
+%r109 = lshr i192 %r105, 32
+%r111 = getelementptr i32, i32* %r1, i32 1
+%r112 = trunc i192 %r109 to i32
+store i32 %r112, i32* %r111
+%r113 = lshr i192 %r109, 32
+%r115 = getelementptr i32, i32* %r1, i32 2
+%r116 = trunc i192 %r113 to i32
+store i32 %r116, i32* %r115
+%r117 = lshr i192 %r113, 32
+%r119 = getelementptr i32, i32* %r1, i32 3
+%r120 = trunc i192 %r117 to i32
+store i32 %r120, i32* %r119
+%r121 = lshr i192 %r117, 32
+%r123 = getelementptr i32, i32* %r1, i32 4
+%r124 = trunc i192 %r121 to i32
+store i32 %r124, i32* %r123
+%r125 = lshr i192 %r121, 32
+%r127 = getelementptr i32, i32* %r1, i32 5
+%r128 = trunc i192 %r125 to i32
+store i32 %r128, i32* %r127
 ret void
 }
-define void @mcl_fp_montRed2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+define void @mcl_fp_montRed6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
 {
 %r5 = getelementptr i32, i32* %r3, i32 -1
 %r6 = load i32, i32* %r5
@@ -1011,57 +1030,424 @@ define void @mcl_fp_montRed2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r12 = zext i32 %r11 to i64
 %r13 = shl i64 %r12, 32
 %r14 = or i64 %r8, %r13
-%r15 = load i32, i32* %r2
-%r16 = zext i32 %r15 to i64
-%r18 = getelementptr i32, i32* %r2, i32 1
-%r19 = load i32, i32* %r18
-%r20 = zext i32 %r19 to i64
-%r21 = shl i64 %r20, 32
-%r22 = or i64 %r16, %r21
-%r23 = zext i64 %r22 to i96
-%r25 = getelementptr i32, i32* %r2, i32 2
-%r26 = load i32, i32* %r25
-%r27 = zext i32 %r26 to i96
-%r28 = shl i96 %r27, 64
-%r29 = or i96 %r23, %r28
-%r30 = zext i96 %r29 to i128
-%r32 = getelementptr i32, i32* %r2, i32 3
-%r33 = load i32, i32* %r32
-%r34 = zext i32 %r33 to i128
-%r35 = shl i128 %r34, 96
-%r36 = or i128 %r30, %r35
-%r37 = zext i128 %r36 to i160
-%r38 = trunc i160 %r37 to i32
-%r39 = mul i32 %r38, %r6
-%r40 = call i96 @mulPv64x32(i32* %r3, i32 %r39)
-%r41 = zext i96 %r40 to i160
-%r42 = add i160 %r37, %r41
-%r43 = lshr i160 %r42, 32
-%r44 = trunc i160 %r43 to i128
-%r45 = trunc i128 %r44 to i32
-%r46 = mul i32 %r45, %r6
-%r47 = call i96 @mulPv64x32(i32* %r3, i32 %r46)
-%r48 = zext i96 %r47 to i128
-%r49 = add i128 %r44, %r48
-%r50 = lshr i128 %r49, 32
-%r51 = trunc i128 %r50 to i96
-%r52 = zext i64 %r14 to i96
-%r53 = sub i96 %r51, %r52
-%r54 = lshr i96 %r53, 64
-%r55 = trunc i96 %r54 to i1
-%r56 = select i1 %r55, i96 %r51, i96 %r53
-%r57 = trunc i96 %r56 to i64
-%r58 = trunc i64 %r57 to i32
-%r60 = getelementptr i32, i32* %r1, i32 0
-store i32 %r58, i32* %r60
-%r61 = lshr i64 %r57, 32
-%r62 = trunc i64 %r61 to i32
-%r64 = getelementptr i32, i32* %r1, i32 1
-store i32 %r62, i32* %r64
-ret void
-}
-define i32 @mcl_fp_addPre2L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = load i32, i32* %r2
+%r44 = zext i32 %r43 to i64
+%r46 = getelementptr i32, i32* %r2, i32 1
+%r47 = load i32, i32* %r46
+%r48 = zext i32 %r47 to i64
+%r49 = shl i64 %r48, 32
+%r50 = or i64 %r44, %r49
+%r51 = zext i64 %r50 to i96
+%r53 = getelementptr i32, i32* %r2, i32 2
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i96
+%r56 = shl i96 %r55, 64
+%r57 = or i96 %r51, %r56
+%r58 = zext i96 %r57 to i128
+%r60 = getelementptr i32, i32* %r2, i32 3
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i128
+%r63 = shl i128 %r62, 96
+%r64 = or i128 %r58, %r63
+%r65 = zext i128 %r64 to i160
+%r67 = getelementptr i32, i32* %r2, i32 4
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i160
+%r70 = shl i160 %r69, 128
+%r71 = or i160 %r65, %r70
+%r72 = zext i160 %r71 to i192
+%r74 = getelementptr i32, i32* %r2, i32 5
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i192
+%r77 = shl i192 %r76, 160
+%r78 = or i192 %r72, %r77
+%r79 = trunc i192 %r78 to i32
+%r80 = mul i32 %r79, %r6
+%r81 = call i224 @mulPv192x32(i32* %r3, i32 %r80)
+%r83 = getelementptr i32, i32* %r2, i32 6
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i224
+%r86 = shl i224 %r85, 192
+%r87 = zext i192 %r78 to i224
+%r88 = or i224 %r86, %r87
+%r89 = zext i224 %r88 to i256
+%r90 = zext i224 %r81 to i256
+%r91 = add i256 %r89, %r90
+%r92 = lshr i256 %r91, 32
+%r93 = trunc i256 %r92 to i224
+%r94 = lshr i224 %r93, 192
+%r95 = trunc i224 %r94 to i32
+%r96 = trunc i224 %r93 to i192
+%r97 = trunc i192 %r96 to i32
+%r98 = mul i32 %r97, %r6
+%r99 = call i224 @mulPv192x32(i32* %r3, i32 %r98)
+%r100 = zext i32 %r95 to i224
+%r101 = shl i224 %r100, 192
+%r102 = add i224 %r99, %r101
+%r104 = getelementptr i32, i32* %r2, i32 7
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i224
+%r107 = shl i224 %r106, 192
+%r108 = zext i192 %r96 to i224
+%r109 = or i224 %r107, %r108
+%r110 = zext i224 %r109 to i256
+%r111 = zext i224 %r102 to i256
+%r112 = add i256 %r110, %r111
+%r113 = lshr i256 %r112, 32
+%r114 = trunc i256 %r113 to i224
+%r115 = lshr i224 %r114, 192
+%r116 = trunc i224 %r115 to i32
+%r117 = trunc i224 %r114 to i192
+%r118 = trunc i192 %r117 to i32
+%r119 = mul i32 %r118, %r6
+%r120 = call i224 @mulPv192x32(i32* %r3, i32 %r119)
+%r121 = zext i32 %r116 to i224
+%r122 = shl i224 %r121, 192
+%r123 = add i224 %r120, %r122
+%r125 = getelementptr i32, i32* %r2, i32 8
+%r126 = load i32, i32* %r125
+%r127 = zext i32 %r126 to i224
+%r128 = shl i224 %r127, 192
+%r129 = zext i192 %r117 to i224
+%r130 = or i224 %r128, %r129
+%r131 = zext i224 %r130 to i256
+%r132 = zext i224 %r123 to i256
+%r133 = add i256 %r131, %r132
+%r134 = lshr i256 %r133, 32
+%r135 = trunc i256 %r134 to i224
+%r136 = lshr i224 %r135, 192
+%r137 = trunc i224 %r136 to i32
+%r138 = trunc i224 %r135 to i192
+%r139 = trunc i192 %r138 to i32
+%r140 = mul i32 %r139, %r6
+%r141 = call i224 @mulPv192x32(i32* %r3, i32 %r140)
+%r142 = zext i32 %r137 to i224
+%r143 = shl i224 %r142, 192
+%r144 = add i224 %r141, %r143
+%r146 = getelementptr i32, i32* %r2, i32 9
+%r147 = load i32, i32* %r146
+%r148 = zext i32 %r147 to i224
+%r149 = shl i224 %r148, 192
+%r150 = zext i192 %r138 to i224
+%r151 = or i224 %r149, %r150
+%r152 = zext i224 %r151 to i256
+%r153 = zext i224 %r144 to i256
+%r154 = add i256 %r152, %r153
+%r155 = lshr i256 %r154, 32
+%r156 = trunc i256 %r155 to i224
+%r157 = lshr i224 %r156, 192
+%r158 = trunc i224 %r157 to i32
+%r159 = trunc i224 %r156 to i192
+%r160 = trunc i192 %r159 to i32
+%r161 = mul i32 %r160, %r6
+%r162 = call i224 @mulPv192x32(i32* %r3, i32 %r161)
+%r163 = zext i32 %r158 to i224
+%r164 = shl i224 %r163, 192
+%r165 = add i224 %r162, %r164
+%r167 = getelementptr i32, i32* %r2, i32 10
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i224
+%r170 = shl i224 %r169, 192
+%r171 = zext i192 %r159 to i224
+%r172 = or i224 %r170, %r171
+%r173 = zext i224 %r172 to i256
+%r174 = zext i224 %r165 to i256
+%r175 = add i256 %r173, %r174
+%r176 = lshr i256 %r175, 32
+%r177 = trunc i256 %r176 to i224
+%r178 = lshr i224 %r177, 192
+%r179 = trunc i224 %r178 to i32
+%r180 = trunc i224 %r177 to i192
+%r181 = trunc i192 %r180 to i32
+%r182 = mul i32 %r181, %r6
+%r183 = call i224 @mulPv192x32(i32* %r3, i32 %r182)
+%r184 = zext i32 %r179 to i224
+%r185 = shl i224 %r184, 192
+%r186 = add i224 %r183, %r185
+%r188 = getelementptr i32, i32* %r2, i32 11
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i224
+%r191 = shl i224 %r190, 192
+%r192 = zext i192 %r180 to i224
+%r193 = or i224 %r191, %r192
+%r194 = zext i224 %r193 to i256
+%r195 = zext i224 %r186 to i256
+%r196 = add i256 %r194, %r195
+%r197 = lshr i256 %r196, 32
+%r198 = trunc i256 %r197 to i224
+%r199 = lshr i224 %r198, 192
+%r200 = trunc i224 %r199 to i32
+%r201 = trunc i224 %r198 to i192
+%r202 = zext i192 %r42 to i224
+%r203 = zext i192 %r201 to i224
+%r204 = sub i224 %r203, %r202
+%r205 = lshr i224 %r204, 192
+%r206 = trunc i224 %r205 to i1
+%r207 = select i1 %r206, i224 %r203, i224 %r204
+%r208 = trunc i224 %r207 to i192
+%r210 = getelementptr i32, i32* %r1, i32 0
+%r211 = trunc i192 %r208 to i32
+store i32 %r211, i32* %r210
+%r212 = lshr i192 %r208, 32
+%r214 = getelementptr i32, i32* %r1, i32 1
+%r215 = trunc i192 %r212 to i32
+store i32 %r215, i32* %r214
+%r216 = lshr i192 %r212, 32
+%r218 = getelementptr i32, i32* %r1, i32 2
+%r219 = trunc i192 %r216 to i32
+store i32 %r219, i32* %r218
+%r220 = lshr i192 %r216, 32
+%r222 = getelementptr i32, i32* %r1, i32 3
+%r223 = trunc i192 %r220 to i32
+store i32 %r223, i32* %r222
+%r224 = lshr i192 %r220, 32
+%r226 = getelementptr i32, i32* %r1, i32 4
+%r227 = trunc i192 %r224 to i32
+store i32 %r227, i32* %r226
+%r228 = lshr i192 %r224, 32
+%r230 = getelementptr i32, i32* %r1, i32 5
+%r231 = trunc i192 %r228 to i32
+store i32 %r231, i32* %r230
+ret void
+}
+define void @mcl_fp_montRedNF6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = load i32, i32* %r2
+%r44 = zext i32 %r43 to i64
+%r46 = getelementptr i32, i32* %r2, i32 1
+%r47 = load i32, i32* %r46
+%r48 = zext i32 %r47 to i64
+%r49 = shl i64 %r48, 32
+%r50 = or i64 %r44, %r49
+%r51 = zext i64 %r50 to i96
+%r53 = getelementptr i32, i32* %r2, i32 2
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i96
+%r56 = shl i96 %r55, 64
+%r57 = or i96 %r51, %r56
+%r58 = zext i96 %r57 to i128
+%r60 = getelementptr i32, i32* %r2, i32 3
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i128
+%r63 = shl i128 %r62, 96
+%r64 = or i128 %r58, %r63
+%r65 = zext i128 %r64 to i160
+%r67 = getelementptr i32, i32* %r2, i32 4
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i160
+%r70 = shl i160 %r69, 128
+%r71 = or i160 %r65, %r70
+%r72 = zext i160 %r71 to i192
+%r74 = getelementptr i32, i32* %r2, i32 5
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i192
+%r77 = shl i192 %r76, 160
+%r78 = or i192 %r72, %r77
+%r79 = trunc i192 %r78 to i32
+%r80 = mul i32 %r79, %r6
+%r81 = call i224 @mulPv192x32(i32* %r3, i32 %r80)
+%r83 = getelementptr i32, i32* %r2, i32 6
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i224
+%r86 = shl i224 %r85, 192
+%r87 = zext i192 %r78 to i224
+%r88 = or i224 %r86, %r87
+%r89 = zext i224 %r88 to i256
+%r90 = zext i224 %r81 to i256
+%r91 = add i256 %r89, %r90
+%r92 = lshr i256 %r91, 32
+%r93 = trunc i256 %r92 to i224
+%r94 = lshr i224 %r93, 192
+%r95 = trunc i224 %r94 to i32
+%r96 = trunc i224 %r93 to i192
+%r97 = trunc i192 %r96 to i32
+%r98 = mul i32 %r97, %r6
+%r99 = call i224 @mulPv192x32(i32* %r3, i32 %r98)
+%r100 = zext i32 %r95 to i224
+%r101 = shl i224 %r100, 192
+%r102 = add i224 %r99, %r101
+%r104 = getelementptr i32, i32* %r2, i32 7
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i224
+%r107 = shl i224 %r106, 192
+%r108 = zext i192 %r96 to i224
+%r109 = or i224 %r107, %r108
+%r110 = zext i224 %r109 to i256
+%r111 = zext i224 %r102 to i256
+%r112 = add i256 %r110, %r111
+%r113 = lshr i256 %r112, 32
+%r114 = trunc i256 %r113 to i224
+%r115 = lshr i224 %r114, 192
+%r116 = trunc i224 %r115 to i32
+%r117 = trunc i224 %r114 to i192
+%r118 = trunc i192 %r117 to i32
+%r119 = mul i32 %r118, %r6
+%r120 = call i224 @mulPv192x32(i32* %r3, i32 %r119)
+%r121 = zext i32 %r116 to i224
+%r122 = shl i224 %r121, 192
+%r123 = add i224 %r120, %r122
+%r125 = getelementptr i32, i32* %r2, i32 8
+%r126 = load i32, i32* %r125
+%r127 = zext i32 %r126 to i224
+%r128 = shl i224 %r127, 192
+%r129 = zext i192 %r117 to i224
+%r130 = or i224 %r128, %r129
+%r131 = zext i224 %r130 to i256
+%r132 = zext i224 %r123 to i256
+%r133 = add i256 %r131, %r132
+%r134 = lshr i256 %r133, 32
+%r135 = trunc i256 %r134 to i224
+%r136 = lshr i224 %r135, 192
+%r137 = trunc i224 %r136 to i32
+%r138 = trunc i224 %r135 to i192
+%r139 = trunc i192 %r138 to i32
+%r140 = mul i32 %r139, %r6
+%r141 = call i224 @mulPv192x32(i32* %r3, i32 %r140)
+%r142 = zext i32 %r137 to i224
+%r143 = shl i224 %r142, 192
+%r144 = add i224 %r141, %r143
+%r146 = getelementptr i32, i32* %r2, i32 9
+%r147 = load i32, i32* %r146
+%r148 = zext i32 %r147 to i224
+%r149 = shl i224 %r148, 192
+%r150 = zext i192 %r138 to i224
+%r151 = or i224 %r149, %r150
+%r152 = zext i224 %r151 to i256
+%r153 = zext i224 %r144 to i256
+%r154 = add i256 %r152, %r153
+%r155 = lshr i256 %r154, 32
+%r156 = trunc i256 %r155 to i224
+%r157 = lshr i224 %r156, 192
+%r158 = trunc i224 %r157 to i32
+%r159 = trunc i224 %r156 to i192
+%r160 = trunc i192 %r159 to i32
+%r161 = mul i32 %r160, %r6
+%r162 = call i224 @mulPv192x32(i32* %r3, i32 %r161)
+%r163 = zext i32 %r158 to i224
+%r164 = shl i224 %r163, 192
+%r165 = add i224 %r162, %r164
+%r167 = getelementptr i32, i32* %r2, i32 10
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i224
+%r170 = shl i224 %r169, 192
+%r171 = zext i192 %r159 to i224
+%r172 = or i224 %r170, %r171
+%r173 = zext i224 %r172 to i256
+%r174 = zext i224 %r165 to i256
+%r175 = add i256 %r173, %r174
+%r176 = lshr i256 %r175, 32
+%r177 = trunc i256 %r176 to i224
+%r178 = lshr i224 %r177, 192
+%r179 = trunc i224 %r178 to i32
+%r180 = trunc i224 %r177 to i192
+%r181 = trunc i192 %r180 to i32
+%r182 = mul i32 %r181, %r6
+%r183 = call i224 @mulPv192x32(i32* %r3, i32 %r182)
+%r184 = zext i32 %r179 to i224
+%r185 = shl i224 %r184, 192
+%r186 = add i224 %r183, %r185
+%r188 = getelementptr i32, i32* %r2, i32 11
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i224
+%r191 = shl i224 %r190, 192
+%r192 = zext i192 %r180 to i224
+%r193 = or i224 %r191, %r192
+%r194 = zext i224 %r193 to i256
+%r195 = zext i224 %r186 to i256
+%r196 = add i256 %r194, %r195
+%r197 = lshr i256 %r196, 32
+%r198 = trunc i256 %r197 to i224
+%r199 = lshr i224 %r198, 192
+%r200 = trunc i224 %r199 to i32
+%r201 = trunc i224 %r198 to i192
+%r202 = sub i192 %r201, %r42
+%r203 = lshr i192 %r202, 191
+%r204 = trunc i192 %r203 to i1
+%r205 = select i1 %r204, i192 %r201, i192 %r202
+%r207 = getelementptr i32, i32* %r1, i32 0
+%r208 = trunc i192 %r205 to i32
+store i32 %r208, i32* %r207
+%r209 = lshr i192 %r205, 32
+%r211 = getelementptr i32, i32* %r1, i32 1
+%r212 = trunc i192 %r209 to i32
+store i32 %r212, i32* %r211
+%r213 = lshr i192 %r209, 32
+%r215 = getelementptr i32, i32* %r1, i32 2
+%r216 = trunc i192 %r213 to i32
+store i32 %r216, i32* %r215
+%r217 = lshr i192 %r213, 32
+%r219 = getelementptr i32, i32* %r1, i32 3
+%r220 = trunc i192 %r217 to i32
+store i32 %r220, i32* %r219
+%r221 = lshr i192 %r217, 32
+%r223 = getelementptr i32, i32* %r1, i32 4
+%r224 = trunc i192 %r221 to i32
+store i32 %r224, i32* %r223
+%r225 = lshr i192 %r221, 32
+%r227 = getelementptr i32, i32* %r1, i32 5
+%r228 = trunc i192 %r225 to i32
+store i32 %r228, i32* %r227
+ret void
+}
+define i32 @mcl_fp_addPre6L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
 %r5 = load i32, i32* %r3
 %r6 = zext i32 %r5 to i64
 %r8 = getelementptr i32, i32* %r3, i32 1
@@ -1070,28 +1456,92 @@ define i32 @mcl_fp_addPre2L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias
 %r11 = shl i64 %r10, 32
 %r12 = or i64 %r6, %r11
 %r13 = zext i64 %r12 to i96
-%r14 = load i32, i32* %r4
-%r15 = zext i32 %r14 to i64
-%r17 = getelementptr i32, i32* %r4, i32 1
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i64
-%r20 = shl i64 %r19, 32
-%r21 = or i64 %r15, %r20
-%r22 = zext i64 %r21 to i96
-%r23 = add i96 %r13, %r22
-%r24 = trunc i96 %r23 to i64
-%r25 = trunc i64 %r24 to i32
-%r27 = getelementptr i32, i32* %r2, i32 0
-store i32 %r25, i32* %r27
-%r28 = lshr i64 %r24, 32
-%r29 = trunc i64 %r28 to i32
-%r31 = getelementptr i32, i32* %r2, i32 1
-store i32 %r29, i32* %r31
-%r32 = lshr i96 %r23, 64
-%r33 = trunc i96 %r32 to i32
-ret i32 %r33
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r42 = load i32, i32* %r4
+%r43 = zext i32 %r42 to i64
+%r45 = getelementptr i32, i32* %r4, i32 1
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i64
+%r48 = shl i64 %r47, 32
+%r49 = or i64 %r43, %r48
+%r50 = zext i64 %r49 to i96
+%r52 = getelementptr i32, i32* %r4, i32 2
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i96
+%r55 = shl i96 %r54, 64
+%r56 = or i96 %r50, %r55
+%r57 = zext i96 %r56 to i128
+%r59 = getelementptr i32, i32* %r4, i32 3
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i128
+%r62 = shl i128 %r61, 96
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i160
+%r66 = getelementptr i32, i32* %r4, i32 4
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i160
+%r69 = shl i160 %r68, 128
+%r70 = or i160 %r64, %r69
+%r71 = zext i160 %r70 to i192
+%r73 = getelementptr i32, i32* %r4, i32 5
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i192
+%r76 = shl i192 %r75, 160
+%r77 = or i192 %r71, %r76
+%r78 = zext i192 %r77 to i224
+%r79 = add i224 %r41, %r78
+%r80 = trunc i224 %r79 to i192
+%r82 = getelementptr i32, i32* %r2, i32 0
+%r83 = trunc i192 %r80 to i32
+store i32 %r83, i32* %r82
+%r84 = lshr i192 %r80, 32
+%r86 = getelementptr i32, i32* %r2, i32 1
+%r87 = trunc i192 %r84 to i32
+store i32 %r87, i32* %r86
+%r88 = lshr i192 %r84, 32
+%r90 = getelementptr i32, i32* %r2, i32 2
+%r91 = trunc i192 %r88 to i32
+store i32 %r91, i32* %r90
+%r92 = lshr i192 %r88, 32
+%r94 = getelementptr i32, i32* %r2, i32 3
+%r95 = trunc i192 %r92 to i32
+store i32 %r95, i32* %r94
+%r96 = lshr i192 %r92, 32
+%r98 = getelementptr i32, i32* %r2, i32 4
+%r99 = trunc i192 %r96 to i32
+store i32 %r99, i32* %r98
+%r100 = lshr i192 %r96, 32
+%r102 = getelementptr i32, i32* %r2, i32 5
+%r103 = trunc i192 %r100 to i32
+store i32 %r103, i32* %r102
+%r104 = lshr i224 %r79, 192
+%r105 = trunc i224 %r104 to i32
+ret i32 %r105
 }
-define i32 @mcl_fp_subPre2L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define i32 @mcl_fp_subPre6L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r3
 %r6 = zext i32 %r5 to i64
@@ -1101,300 +1551,152 @@ define i32 @mcl_fp_subPre2L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias
 %r11 = shl i64 %r10, 32
 %r12 = or i64 %r6, %r11
 %r13 = zext i64 %r12 to i96
-%r14 = load i32, i32* %r4
-%r15 = zext i32 %r14 to i64
-%r17 = getelementptr i32, i32* %r4, i32 1
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i64
-%r20 = shl i64 %r19, 32
-%r21 = or i64 %r15, %r20
-%r22 = zext i64 %r21 to i96
-%r23 = sub i96 %r13, %r22
-%r24 = trunc i96 %r23 to i64
-%r25 = trunc i64 %r24 to i32
-%r27 = getelementptr i32, i32* %r2, i32 0
-store i32 %r25, i32* %r27
-%r28 = lshr i64 %r24, 32
-%r29 = trunc i64 %r28 to i32
-%r31 = getelementptr i32, i32* %r2, i32 1
-store i32 %r29, i32* %r31
-%r32 = lshr i96 %r23, 64
-%r33 = trunc i96 %r32 to i32
-%r35 = and i32 %r33, 1
-ret i32 %r35
-}
-define void @mcl_fp_shr1_2L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = lshr i64 %r10, 1
-%r12 = trunc i64 %r11 to i32
-%r14 = getelementptr i32, i32* %r1, i32 0
-store i32 %r12, i32* %r14
-%r15 = lshr i64 %r11, 32
-%r16 = trunc i64 %r15 to i32
-%r18 = getelementptr i32, i32* %r1, i32 1
-store i32 %r16, i32* %r18
-ret void
-}
-define void @mcl_fp_add2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = load i32, i32* %r3
-%r14 = zext i32 %r13 to i64
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = zext i32 %r17 to i64
-%r19 = shl i64 %r18, 32
-%r20 = or i64 %r14, %r19
-%r21 = zext i64 %r12 to i96
-%r22 = zext i64 %r20 to i96
-%r23 = add i96 %r21, %r22
-%r24 = trunc i96 %r23 to i64
-%r25 = trunc i64 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 0
-store i32 %r25, i32* %r27
-%r28 = lshr i64 %r24, 32
-%r29 = trunc i64 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 1
-store i32 %r29, i32* %r31
-%r32 = load i32, i32* %r4
-%r33 = zext i32 %r32 to i64
-%r35 = getelementptr i32, i32* %r4, i32 1
-%r36 = load i32, i32* %r35
-%r37 = zext i32 %r36 to i64
-%r38 = shl i64 %r37, 32
-%r39 = or i64 %r33, %r38
-%r40 = zext i64 %r39 to i96
-%r41 = sub i96 %r23, %r40
-%r42 = lshr i96 %r41, 64
-%r43 = trunc i96 %r42 to i1
-br i1%r43, label %carry, label %nocarry
-nocarry:
-%r44 = trunc i96 %r41 to i64
-%r45 = trunc i64 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 0
-store i32 %r45, i32* %r47
-%r48 = lshr i64 %r44, 32
-%r49 = trunc i64 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 1
-store i32 %r49, i32* %r51
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = load i32, i32* %r3
-%r14 = zext i32 %r13 to i64
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = zext i32 %r17 to i64
-%r19 = shl i64 %r18, 32
-%r20 = or i64 %r14, %r19
-%r21 = add i64 %r12, %r20
-%r22 = load i32, i32* %r4
-%r23 = zext i32 %r22 to i64
-%r25 = getelementptr i32, i32* %r4, i32 1
-%r26 = load i32, i32* %r25
-%r27 = zext i32 %r26 to i64
-%r28 = shl i64 %r27, 32
-%r29 = or i64 %r23, %r28
-%r30 = sub i64 %r21, %r29
-%r31 = lshr i64 %r30, 63
-%r32 = trunc i64 %r31 to i1
-%r33 = select i1 %r32, i64 %r21, i64 %r30
-%r34 = trunc i64 %r33 to i32
-%r36 = getelementptr i32, i32* %r1, i32 0
-store i32 %r34, i32* %r36
-%r37 = lshr i64 %r33, 32
-%r38 = trunc i64 %r37 to i32
-%r40 = getelementptr i32, i32* %r1, i32 1
-store i32 %r38, i32* %r40
-ret void
-}
-define void @mcl_fp_sub2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = load i32, i32* %r3
-%r14 = zext i32 %r13 to i64
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = zext i32 %r17 to i64
-%r19 = shl i64 %r18, 32
-%r20 = or i64 %r14, %r19
-%r21 = zext i64 %r12 to i96
-%r22 = zext i64 %r20 to i96
-%r23 = sub i96 %r21, %r22
-%r24 = trunc i96 %r23 to i64
-%r25 = lshr i96 %r23, 64
-%r26 = trunc i96 %r25 to i1
-%r27 = trunc i64 %r24 to i32
-%r29 = getelementptr i32, i32* %r1, i32 0
-store i32 %r27, i32* %r29
-%r30 = lshr i64 %r24, 32
-%r31 = trunc i64 %r30 to i32
-%r33 = getelementptr i32, i32* %r1, i32 1
-store i32 %r31, i32* %r33
-br i1%r26, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r34 = load i32, i32* %r4
-%r35 = zext i32 %r34 to i64
-%r37 = getelementptr i32, i32* %r4, i32 1
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i64
-%r40 = shl i64 %r39, 32
-%r41 = or i64 %r35, %r40
-%r42 = add i64 %r24, %r41
-%r43 = trunc i64 %r42 to i32
-%r45 = getelementptr i32, i32* %r1, i32 0
-store i32 %r43, i32* %r45
-%r46 = lshr i64 %r42, 32
-%r47 = trunc i64 %r46 to i32
-%r49 = getelementptr i32, i32* %r1, i32 1
-store i32 %r47, i32* %r49
-ret void
-}
-define void @mcl_fp_subNF2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = load i32, i32* %r3
-%r14 = zext i32 %r13 to i64
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = zext i32 %r17 to i64
-%r19 = shl i64 %r18, 32
-%r20 = or i64 %r14, %r19
-%r21 = sub i64 %r12, %r20
-%r22 = lshr i64 %r21, 63
-%r23 = trunc i64 %r22 to i1
-%r24 = load i32, i32* %r4
-%r25 = zext i32 %r24 to i64
-%r27 = getelementptr i32, i32* %r4, i32 1
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i64
-%r30 = shl i64 %r29, 32
-%r31 = or i64 %r25, %r30
-%r33 = select i1 %r23, i64 %r31, i64 0
-%r34 = add i64 %r21, %r33
-%r35 = trunc i64 %r34 to i32
-%r37 = getelementptr i32, i32* %r1, i32 0
-store i32 %r35, i32* %r37
-%r38 = lshr i64 %r34, 32
-%r39 = trunc i64 %r38 to i32
-%r41 = getelementptr i32, i32* %r1, i32 1
-store i32 %r39, i32* %r41
-ret void
-}
-define void @mcl_fpDbl_add2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
+%r15 = getelementptr i32, i32* %r3, i32 2
 %r16 = load i32, i32* %r15
 %r17 = zext i32 %r16 to i96
 %r18 = shl i96 %r17, 64
 %r19 = or i96 %r13, %r18
 %r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
+%r22 = getelementptr i32, i32* %r3, i32 3
 %r23 = load i32, i32* %r22
 %r24 = zext i32 %r23 to i128
 %r25 = shl i128 %r24, 96
 %r26 = or i128 %r20, %r25
-%r27 = load i32, i32* %r3
-%r28 = zext i32 %r27 to i64
-%r30 = getelementptr i32, i32* %r3, i32 1
-%r31 = load i32, i32* %r30
-%r32 = zext i32 %r31 to i64
-%r33 = shl i64 %r32, 32
-%r34 = or i64 %r28, %r33
-%r35 = zext i64 %r34 to i96
-%r37 = getelementptr i32, i32* %r3, i32 2
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i96
-%r40 = shl i96 %r39, 64
-%r41 = or i96 %r35, %r40
-%r42 = zext i96 %r41 to i128
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i128
-%r47 = shl i128 %r46, 96
-%r48 = or i128 %r42, %r47
-%r49 = zext i128 %r26 to i160
-%r50 = zext i128 %r48 to i160
-%r51 = add i160 %r49, %r50
-%r52 = trunc i160 %r51 to i64
-%r53 = trunc i64 %r52 to i32
-%r55 = getelementptr i32, i32* %r1, i32 0
-store i32 %r53, i32* %r55
-%r56 = lshr i64 %r52, 32
-%r57 = trunc i64 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 1
-store i32 %r57, i32* %r59
-%r60 = lshr i160 %r51, 64
-%r61 = trunc i160 %r60 to i96
-%r62 = load i32, i32* %r4
-%r63 = zext i32 %r62 to i64
-%r65 = getelementptr i32, i32* %r4, i32 1
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i64
-%r68 = shl i64 %r67, 32
-%r69 = or i64 %r63, %r68
-%r70 = zext i64 %r69 to i96
-%r71 = sub i96 %r61, %r70
-%r72 = lshr i96 %r71, 64
-%r73 = trunc i96 %r72 to i1
-%r74 = select i1 %r73, i96 %r61, i96 %r71
-%r75 = trunc i96 %r74 to i64
-%r77 = getelementptr i32, i32* %r1, i32 2
-%r78 = trunc i64 %r75 to i32
-%r80 = getelementptr i32, i32* %r77, i32 0
-store i32 %r78, i32* %r80
-%r81 = lshr i64 %r75, 32
-%r82 = trunc i64 %r81 to i32
-%r84 = getelementptr i32, i32* %r77, i32 1
-store i32 %r82, i32* %r84
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r42 = load i32, i32* %r4
+%r43 = zext i32 %r42 to i64
+%r45 = getelementptr i32, i32* %r4, i32 1
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i64
+%r48 = shl i64 %r47, 32
+%r49 = or i64 %r43, %r48
+%r50 = zext i64 %r49 to i96
+%r52 = getelementptr i32, i32* %r4, i32 2
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i96
+%r55 = shl i96 %r54, 64
+%r56 = or i96 %r50, %r55
+%r57 = zext i96 %r56 to i128
+%r59 = getelementptr i32, i32* %r4, i32 3
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i128
+%r62 = shl i128 %r61, 96
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i160
+%r66 = getelementptr i32, i32* %r4, i32 4
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i160
+%r69 = shl i160 %r68, 128
+%r70 = or i160 %r64, %r69
+%r71 = zext i160 %r70 to i192
+%r73 = getelementptr i32, i32* %r4, i32 5
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i192
+%r76 = shl i192 %r75, 160
+%r77 = or i192 %r71, %r76
+%r78 = zext i192 %r77 to i224
+%r79 = sub i224 %r41, %r78
+%r80 = trunc i224 %r79 to i192
+%r82 = getelementptr i32, i32* %r2, i32 0
+%r83 = trunc i192 %r80 to i32
+store i32 %r83, i32* %r82
+%r84 = lshr i192 %r80, 32
+%r86 = getelementptr i32, i32* %r2, i32 1
+%r87 = trunc i192 %r84 to i32
+store i32 %r87, i32* %r86
+%r88 = lshr i192 %r84, 32
+%r90 = getelementptr i32, i32* %r2, i32 2
+%r91 = trunc i192 %r88 to i32
+store i32 %r91, i32* %r90
+%r92 = lshr i192 %r88, 32
+%r94 = getelementptr i32, i32* %r2, i32 3
+%r95 = trunc i192 %r92 to i32
+store i32 %r95, i32* %r94
+%r96 = lshr i192 %r92, 32
+%r98 = getelementptr i32, i32* %r2, i32 4
+%r99 = trunc i192 %r96 to i32
+store i32 %r99, i32* %r98
+%r100 = lshr i192 %r96, 32
+%r102 = getelementptr i32, i32* %r2, i32 5
+%r103 = trunc i192 %r100 to i32
+store i32 %r103, i32* %r102
+%r105 = lshr i224 %r79, 192
+%r106 = trunc i224 %r105 to i32
+%r107 = and i32 %r106, 1
+ret i32 %r107
+}
+define void @mcl_fp_shr1_6L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = lshr i192 %r38, 1
+%r41 = getelementptr i32, i32* %r1, i32 0
+%r42 = trunc i192 %r39 to i32
+store i32 %r42, i32* %r41
+%r43 = lshr i192 %r39, 32
+%r45 = getelementptr i32, i32* %r1, i32 1
+%r46 = trunc i192 %r43 to i32
+store i32 %r46, i32* %r45
+%r47 = lshr i192 %r43, 32
+%r49 = getelementptr i32, i32* %r1, i32 2
+%r50 = trunc i192 %r47 to i32
+store i32 %r50, i32* %r49
+%r51 = lshr i192 %r47, 32
+%r53 = getelementptr i32, i32* %r1, i32 3
+%r54 = trunc i192 %r51 to i32
+store i32 %r54, i32* %r53
+%r55 = lshr i192 %r51, 32
+%r57 = getelementptr i32, i32* %r1, i32 4
+%r58 = trunc i192 %r55 to i32
+store i32 %r58, i32* %r57
+%r59 = lshr i192 %r55, 32
+%r61 = getelementptr i32, i32* %r1, i32 5
+%r62 = trunc i192 %r59 to i32
+store i32 %r62, i32* %r61
 ret void
 }
-define void @mcl_fpDbl_sub2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_add6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -1415,836 +1717,21 @@ define void @mcl_fpDbl_sub2L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r24 = zext i32 %r23 to i128
 %r25 = shl i128 %r24, 96
 %r26 = or i128 %r20, %r25
-%r27 = load i32, i32* %r3
-%r28 = zext i32 %r27 to i64
-%r30 = getelementptr i32, i32* %r3, i32 1
-%r31 = load i32, i32* %r30
-%r32 = zext i32 %r31 to i64
-%r33 = shl i64 %r32, 32
-%r34 = or i64 %r28, %r33
-%r35 = zext i64 %r34 to i96
-%r37 = getelementptr i32, i32* %r3, i32 2
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i96
-%r40 = shl i96 %r39, 64
-%r41 = or i96 %r35, %r40
-%r42 = zext i96 %r41 to i128
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i128
-%r47 = shl i128 %r46, 96
-%r48 = or i128 %r42, %r47
-%r49 = zext i128 %r26 to i160
-%r50 = zext i128 %r48 to i160
-%r51 = sub i160 %r49, %r50
-%r52 = trunc i160 %r51 to i64
-%r53 = trunc i64 %r52 to i32
-%r55 = getelementptr i32, i32* %r1, i32 0
-store i32 %r53, i32* %r55
-%r56 = lshr i64 %r52, 32
-%r57 = trunc i64 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 1
-store i32 %r57, i32* %r59
-%r60 = lshr i160 %r51, 64
-%r61 = trunc i160 %r60 to i64
-%r62 = lshr i160 %r51, 128
-%r63 = trunc i160 %r62 to i1
-%r64 = load i32, i32* %r4
-%r65 = zext i32 %r64 to i64
-%r67 = getelementptr i32, i32* %r4, i32 1
-%r68 = load i32, i32* %r67
-%r69 = zext i32 %r68 to i64
-%r70 = shl i64 %r69, 32
-%r71 = or i64 %r65, %r70
-%r73 = select i1 %r63, i64 %r71, i64 0
-%r74 = add i64 %r61, %r73
-%r76 = getelementptr i32, i32* %r1, i32 2
-%r77 = trunc i64 %r74 to i32
-%r79 = getelementptr i32, i32* %r76, i32 0
-store i32 %r77, i32* %r79
-%r80 = lshr i64 %r74, 32
-%r81 = trunc i64 %r80 to i32
-%r83 = getelementptr i32, i32* %r76, i32 1
-store i32 %r81, i32* %r83
-ret void
-}
-define i128 @mulPv96x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r16 = zext i32 %r6 to i64
-%r17 = zext i32 %r10 to i64
-%r18 = shl i64 %r17, 32
-%r19 = or i64 %r16, %r18
-%r20 = zext i64 %r19 to i96
-%r21 = zext i32 %r14 to i96
-%r22 = shl i96 %r21, 64
-%r23 = or i96 %r20, %r22
-%r24 = zext i32 %r7 to i64
-%r25 = zext i32 %r11 to i64
-%r26 = shl i64 %r25, 32
-%r27 = or i64 %r24, %r26
-%r28 = zext i64 %r27 to i96
-%r29 = zext i32 %r15 to i96
-%r30 = shl i96 %r29, 64
-%r31 = or i96 %r28, %r30
-%r32 = zext i96 %r23 to i128
-%r33 = zext i96 %r31 to i128
-%r34 = shl i128 %r33, 32
-%r35 = add i128 %r32, %r34
-ret i128 %r35
-}
-define void @mcl_fp_mulUnitPre3L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i128 @mulPv96x32(i32* %r2, i32 %r3)
-%r5 = trunc i128 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i128 %r4, 32
-%r9 = trunc i128 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i128 %r8, 32
-%r13 = trunc i128 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i128 %r12, 32
-%r17 = trunc i128 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-ret void
-}
-define void @mcl_fpDbl_mulPre3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r4 = load i32, i32* %r3
-%r5 = call i128 @mulPv96x32(i32* %r2, i32 %r4)
-%r6 = trunc i128 %r5 to i32
-store i32 %r6, i32* %r1
-%r7 = lshr i128 %r5, 32
-%r9 = getelementptr i32, i32* %r3, i32 1
-%r10 = load i32, i32* %r9
-%r11 = call i128 @mulPv96x32(i32* %r2, i32 %r10)
-%r12 = add i128 %r7, %r11
-%r13 = trunc i128 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 1
-store i32 %r13, i32* %r15
-%r16 = lshr i128 %r12, 32
-%r18 = getelementptr i32, i32* %r3, i32 2
-%r19 = load i32, i32* %r18
-%r20 = call i128 @mulPv96x32(i32* %r2, i32 %r19)
-%r21 = add i128 %r16, %r20
-%r23 = getelementptr i32, i32* %r1, i32 2
-%r24 = trunc i128 %r21 to i32
-%r26 = getelementptr i32, i32* %r23, i32 0
-store i32 %r24, i32* %r26
-%r27 = lshr i128 %r21, 32
-%r28 = trunc i128 %r27 to i32
-%r30 = getelementptr i32, i32* %r23, i32 1
-store i32 %r28, i32* %r30
-%r31 = lshr i128 %r27, 32
-%r32 = trunc i128 %r31 to i32
-%r34 = getelementptr i32, i32* %r23, i32 2
-store i32 %r32, i32* %r34
-%r35 = lshr i128 %r31, 32
-%r36 = trunc i128 %r35 to i32
-%r38 = getelementptr i32, i32* %r23, i32 3
-store i32 %r36, i32* %r38
-ret void
-}
-define void @mcl_fpDbl_sqrPre3L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = call i128 @mulPv96x32(i32* %r2, i32 %r3)
-%r5 = trunc i128 %r4 to i32
-store i32 %r5, i32* %r1
-%r6 = lshr i128 %r4, 32
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = call i128 @mulPv96x32(i32* %r2, i32 %r9)
-%r11 = add i128 %r6, %r10
-%r12 = trunc i128 %r11 to i32
-%r14 = getelementptr i32, i32* %r1, i32 1
-store i32 %r12, i32* %r14
-%r15 = lshr i128 %r11, 32
-%r17 = getelementptr i32, i32* %r2, i32 2
-%r18 = load i32, i32* %r17
-%r19 = call i128 @mulPv96x32(i32* %r2, i32 %r18)
-%r20 = add i128 %r15, %r19
-%r22 = getelementptr i32, i32* %r1, i32 2
-%r23 = trunc i128 %r20 to i32
-%r25 = getelementptr i32, i32* %r22, i32 0
-store i32 %r23, i32* %r25
-%r26 = lshr i128 %r20, 32
-%r27 = trunc i128 %r26 to i32
-%r29 = getelementptr i32, i32* %r22, i32 1
-store i32 %r27, i32* %r29
-%r30 = lshr i128 %r26, 32
-%r31 = trunc i128 %r30 to i32
-%r33 = getelementptr i32, i32* %r22, i32 2
-store i32 %r31, i32* %r33
-%r34 = lshr i128 %r30, 32
-%r35 = trunc i128 %r34 to i32
-%r37 = getelementptr i32, i32* %r22, i32 3
-store i32 %r35, i32* %r37
-ret void
-}
-define void @mcl_fp_mont3L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i128 @mulPv96x32(i32* %r2, i32 %r10)
-%r12 = zext i128 %r11 to i160
-%r13 = trunc i128 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i128 @mulPv96x32(i32* %r4, i32 %r14)
-%r16 = zext i128 %r15 to i160
-%r17 = add i160 %r12, %r16
-%r18 = lshr i160 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i128 @mulPv96x32(i32* %r2, i32 %r21)
-%r23 = zext i128 %r22 to i160
-%r24 = add i160 %r18, %r23
-%r25 = trunc i160 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i128 @mulPv96x32(i32* %r4, i32 %r26)
-%r28 = zext i128 %r27 to i160
-%r29 = add i160 %r24, %r28
-%r30 = lshr i160 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i128 @mulPv96x32(i32* %r2, i32 %r33)
-%r35 = zext i128 %r34 to i160
-%r36 = add i160 %r30, %r35
-%r37 = trunc i160 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i128 @mulPv96x32(i32* %r4, i32 %r38)
-%r40 = zext i128 %r39 to i160
-%r41 = add i160 %r36, %r40
-%r42 = lshr i160 %r41, 32
-%r43 = trunc i160 %r42 to i128
-%r44 = load i32, i32* %r4
-%r45 = zext i32 %r44 to i64
-%r47 = getelementptr i32, i32* %r4, i32 1
-%r48 = load i32, i32* %r47
-%r49 = zext i32 %r48 to i64
-%r50 = shl i64 %r49, 32
-%r51 = or i64 %r45, %r50
-%r52 = zext i64 %r51 to i96
-%r54 = getelementptr i32, i32* %r4, i32 2
-%r55 = load i32, i32* %r54
-%r56 = zext i32 %r55 to i96
-%r57 = shl i96 %r56, 64
-%r58 = or i96 %r52, %r57
-%r59 = zext i96 %r58 to i128
-%r60 = sub i128 %r43, %r59
-%r61 = lshr i128 %r60, 96
-%r62 = trunc i128 %r61 to i1
-%r63 = select i1 %r62, i128 %r43, i128 %r60
-%r64 = trunc i128 %r63 to i96
-%r65 = trunc i96 %r64 to i32
-%r67 = getelementptr i32, i32* %r1, i32 0
-store i32 %r65, i32* %r67
-%r68 = lshr i96 %r64, 32
-%r69 = trunc i96 %r68 to i32
-%r71 = getelementptr i32, i32* %r1, i32 1
-store i32 %r69, i32* %r71
-%r72 = lshr i96 %r68, 32
-%r73 = trunc i96 %r72 to i32
-%r75 = getelementptr i32, i32* %r1, i32 2
-store i32 %r73, i32* %r75
-ret void
-}
-define void @mcl_fp_montNF3L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i128 @mulPv96x32(i32* %r2, i32 %r8)
-%r10 = trunc i128 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i128 @mulPv96x32(i32* %r4, i32 %r11)
-%r13 = add i128 %r9, %r12
-%r14 = lshr i128 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i128 @mulPv96x32(i32* %r2, i32 %r17)
-%r19 = add i128 %r14, %r18
-%r20 = trunc i128 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i128 @mulPv96x32(i32* %r4, i32 %r21)
-%r23 = add i128 %r19, %r22
-%r24 = lshr i128 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i128 @mulPv96x32(i32* %r2, i32 %r27)
-%r29 = add i128 %r24, %r28
-%r30 = trunc i128 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i128 @mulPv96x32(i32* %r4, i32 %r31)
-%r33 = add i128 %r29, %r32
-%r34 = lshr i128 %r33, 32
-%r35 = trunc i128 %r34 to i96
-%r36 = load i32, i32* %r4
-%r37 = zext i32 %r36 to i64
-%r39 = getelementptr i32, i32* %r4, i32 1
-%r40 = load i32, i32* %r39
-%r41 = zext i32 %r40 to i64
-%r42 = shl i64 %r41, 32
-%r43 = or i64 %r37, %r42
-%r44 = zext i64 %r43 to i96
-%r46 = getelementptr i32, i32* %r4, i32 2
-%r47 = load i32, i32* %r46
-%r48 = zext i32 %r47 to i96
-%r49 = shl i96 %r48, 64
-%r50 = or i96 %r44, %r49
-%r51 = sub i96 %r35, %r50
-%r52 = lshr i96 %r51, 95
-%r53 = trunc i96 %r52 to i1
-%r54 = select i1 %r53, i96 %r35, i96 %r51
-%r55 = trunc i96 %r54 to i32
-%r57 = getelementptr i32, i32* %r1, i32 0
-store i32 %r55, i32* %r57
-%r58 = lshr i96 %r54, 32
-%r59 = trunc i96 %r58 to i32
-%r61 = getelementptr i32, i32* %r1, i32 1
-store i32 %r59, i32* %r61
-%r62 = lshr i96 %r58, 32
-%r63 = trunc i96 %r62 to i32
-%r65 = getelementptr i32, i32* %r1, i32 2
-store i32 %r63, i32* %r65
-ret void
-}
-define void @mcl_fp_montRed3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = load i32, i32* %r2
-%r23 = zext i32 %r22 to i64
-%r25 = getelementptr i32, i32* %r2, i32 1
-%r26 = load i32, i32* %r25
-%r27 = zext i32 %r26 to i64
-%r28 = shl i64 %r27, 32
-%r29 = or i64 %r23, %r28
-%r30 = zext i64 %r29 to i96
-%r32 = getelementptr i32, i32* %r2, i32 2
-%r33 = load i32, i32* %r32
-%r34 = zext i32 %r33 to i96
-%r35 = shl i96 %r34, 64
-%r36 = or i96 %r30, %r35
-%r37 = zext i96 %r36 to i128
-%r39 = getelementptr i32, i32* %r2, i32 3
-%r40 = load i32, i32* %r39
-%r41 = zext i32 %r40 to i128
-%r42 = shl i128 %r41, 96
-%r43 = or i128 %r37, %r42
-%r44 = zext i128 %r43 to i160
-%r46 = getelementptr i32, i32* %r2, i32 4
-%r47 = load i32, i32* %r46
-%r48 = zext i32 %r47 to i160
-%r49 = shl i160 %r48, 128
-%r50 = or i160 %r44, %r49
-%r51 = zext i160 %r50 to i192
-%r53 = getelementptr i32, i32* %r2, i32 5
-%r54 = load i32, i32* %r53
-%r55 = zext i32 %r54 to i192
-%r56 = shl i192 %r55, 160
-%r57 = or i192 %r51, %r56
-%r58 = zext i192 %r57 to i224
-%r59 = trunc i224 %r58 to i32
-%r60 = mul i32 %r59, %r6
-%r61 = call i128 @mulPv96x32(i32* %r3, i32 %r60)
-%r62 = zext i128 %r61 to i224
-%r63 = add i224 %r58, %r62
-%r64 = lshr i224 %r63, 32
-%r65 = trunc i224 %r64 to i192
-%r66 = trunc i192 %r65 to i32
-%r67 = mul i32 %r66, %r6
-%r68 = call i128 @mulPv96x32(i32* %r3, i32 %r67)
-%r69 = zext i128 %r68 to i192
-%r70 = add i192 %r65, %r69
-%r71 = lshr i192 %r70, 32
-%r72 = trunc i192 %r71 to i160
-%r73 = trunc i160 %r72 to i32
-%r74 = mul i32 %r73, %r6
-%r75 = call i128 @mulPv96x32(i32* %r3, i32 %r74)
-%r76 = zext i128 %r75 to i160
-%r77 = add i160 %r72, %r76
-%r78 = lshr i160 %r77, 32
-%r79 = trunc i160 %r78 to i128
-%r80 = zext i96 %r21 to i128
-%r81 = sub i128 %r79, %r80
-%r82 = lshr i128 %r81, 96
-%r83 = trunc i128 %r82 to i1
-%r84 = select i1 %r83, i128 %r79, i128 %r81
-%r85 = trunc i128 %r84 to i96
-%r86 = trunc i96 %r85 to i32
-%r88 = getelementptr i32, i32* %r1, i32 0
-store i32 %r86, i32* %r88
-%r89 = lshr i96 %r85, 32
-%r90 = trunc i96 %r89 to i32
-%r92 = getelementptr i32, i32* %r1, i32 1
-store i32 %r90, i32* %r92
-%r93 = lshr i96 %r89, 32
-%r94 = trunc i96 %r93 to i32
-%r96 = getelementptr i32, i32* %r1, i32 2
-store i32 %r94, i32* %r96
-ret void
-}
-define i32 @mcl_fp_addPre3L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r21 = load i32, i32* %r4
-%r22 = zext i32 %r21 to i64
-%r24 = getelementptr i32, i32* %r4, i32 1
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i64
-%r27 = shl i64 %r26, 32
-%r28 = or i64 %r22, %r27
-%r29 = zext i64 %r28 to i96
-%r31 = getelementptr i32, i32* %r4, i32 2
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i96
-%r34 = shl i96 %r33, 64
-%r35 = or i96 %r29, %r34
-%r36 = zext i96 %r35 to i128
-%r37 = add i128 %r20, %r36
-%r38 = trunc i128 %r37 to i96
-%r39 = trunc i96 %r38 to i32
-%r41 = getelementptr i32, i32* %r2, i32 0
-store i32 %r39, i32* %r41
-%r42 = lshr i96 %r38, 32
-%r43 = trunc i96 %r42 to i32
-%r45 = getelementptr i32, i32* %r2, i32 1
-store i32 %r43, i32* %r45
-%r46 = lshr i96 %r42, 32
-%r47 = trunc i96 %r46 to i32
-%r49 = getelementptr i32, i32* %r2, i32 2
-store i32 %r47, i32* %r49
-%r50 = lshr i128 %r37, 96
-%r51 = trunc i128 %r50 to i32
-ret i32 %r51
-}
-define i32 @mcl_fp_subPre3L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r21 = load i32, i32* %r4
-%r22 = zext i32 %r21 to i64
-%r24 = getelementptr i32, i32* %r4, i32 1
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i64
-%r27 = shl i64 %r26, 32
-%r28 = or i64 %r22, %r27
-%r29 = zext i64 %r28 to i96
-%r31 = getelementptr i32, i32* %r4, i32 2
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i96
-%r34 = shl i96 %r33, 64
-%r35 = or i96 %r29, %r34
-%r36 = zext i96 %r35 to i128
-%r37 = sub i128 %r20, %r36
-%r38 = trunc i128 %r37 to i96
-%r39 = trunc i96 %r38 to i32
-%r41 = getelementptr i32, i32* %r2, i32 0
-store i32 %r39, i32* %r41
-%r42 = lshr i96 %r38, 32
-%r43 = trunc i96 %r42 to i32
-%r45 = getelementptr i32, i32* %r2, i32 1
-store i32 %r43, i32* %r45
-%r46 = lshr i96 %r42, 32
-%r47 = trunc i96 %r46 to i32
-%r49 = getelementptr i32, i32* %r2, i32 2
-store i32 %r47, i32* %r49
-%r50 = lshr i128 %r37, 96
-%r51 = trunc i128 %r50 to i32
-%r53 = and i32 %r51, 1
-ret i32 %r53
-}
-define void @mcl_fp_shr1_3L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = lshr i96 %r17, 1
-%r19 = trunc i96 %r18 to i32
-%r21 = getelementptr i32, i32* %r1, i32 0
-store i32 %r19, i32* %r21
-%r22 = lshr i96 %r18, 32
-%r23 = trunc i96 %r22 to i32
-%r25 = getelementptr i32, i32* %r1, i32 1
-store i32 %r23, i32* %r25
-%r26 = lshr i96 %r22, 32
-%r27 = trunc i96 %r26 to i32
-%r29 = getelementptr i32, i32* %r1, i32 2
-store i32 %r27, i32* %r29
-ret void
-}
-define void @mcl_fp_add3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = load i32, i32* %r3
-%r21 = zext i32 %r20 to i64
-%r23 = getelementptr i32, i32* %r3, i32 1
-%r24 = load i32, i32* %r23
-%r25 = zext i32 %r24 to i64
-%r26 = shl i64 %r25, 32
-%r27 = or i64 %r21, %r26
-%r28 = zext i64 %r27 to i96
-%r30 = getelementptr i32, i32* %r3, i32 2
-%r31 = load i32, i32* %r30
-%r32 = zext i32 %r31 to i96
-%r33 = shl i96 %r32, 64
-%r34 = or i96 %r28, %r33
-%r35 = zext i96 %r19 to i128
-%r36 = zext i96 %r34 to i128
-%r37 = add i128 %r35, %r36
-%r38 = trunc i128 %r37 to i96
-%r39 = trunc i96 %r38 to i32
-%r41 = getelementptr i32, i32* %r1, i32 0
-store i32 %r39, i32* %r41
-%r42 = lshr i96 %r38, 32
-%r43 = trunc i96 %r42 to i32
-%r45 = getelementptr i32, i32* %r1, i32 1
-store i32 %r43, i32* %r45
-%r46 = lshr i96 %r42, 32
-%r47 = trunc i96 %r46 to i32
-%r49 = getelementptr i32, i32* %r1, i32 2
-store i32 %r47, i32* %r49
-%r50 = load i32, i32* %r4
-%r51 = zext i32 %r50 to i64
-%r53 = getelementptr i32, i32* %r4, i32 1
-%r54 = load i32, i32* %r53
-%r55 = zext i32 %r54 to i64
-%r56 = shl i64 %r55, 32
-%r57 = or i64 %r51, %r56
-%r58 = zext i64 %r57 to i96
-%r60 = getelementptr i32, i32* %r4, i32 2
-%r61 = load i32, i32* %r60
-%r62 = zext i32 %r61 to i96
-%r63 = shl i96 %r62, 64
-%r64 = or i96 %r58, %r63
-%r65 = zext i96 %r64 to i128
-%r66 = sub i128 %r37, %r65
-%r67 = lshr i128 %r66, 96
-%r68 = trunc i128 %r67 to i1
-br i1%r68, label %carry, label %nocarry
-nocarry:
-%r69 = trunc i128 %r66 to i96
-%r70 = trunc i96 %r69 to i32
-%r72 = getelementptr i32, i32* %r1, i32 0
-store i32 %r70, i32* %r72
-%r73 = lshr i96 %r69, 32
-%r74 = trunc i96 %r73 to i32
-%r76 = getelementptr i32, i32* %r1, i32 1
-store i32 %r74, i32* %r76
-%r77 = lshr i96 %r73, 32
-%r78 = trunc i96 %r77 to i32
-%r80 = getelementptr i32, i32* %r1, i32 2
-store i32 %r78, i32* %r80
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = load i32, i32* %r3
-%r21 = zext i32 %r20 to i64
-%r23 = getelementptr i32, i32* %r3, i32 1
-%r24 = load i32, i32* %r23
-%r25 = zext i32 %r24 to i64
-%r26 = shl i64 %r25, 32
-%r27 = or i64 %r21, %r26
-%r28 = zext i64 %r27 to i96
-%r30 = getelementptr i32, i32* %r3, i32 2
-%r31 = load i32, i32* %r30
-%r32 = zext i32 %r31 to i96
-%r33 = shl i96 %r32, 64
-%r34 = or i96 %r28, %r33
-%r35 = add i96 %r19, %r34
-%r36 = load i32, i32* %r4
-%r37 = zext i32 %r36 to i64
-%r39 = getelementptr i32, i32* %r4, i32 1
-%r40 = load i32, i32* %r39
-%r41 = zext i32 %r40 to i64
-%r42 = shl i64 %r41, 32
-%r43 = or i64 %r37, %r42
-%r44 = zext i64 %r43 to i96
-%r46 = getelementptr i32, i32* %r4, i32 2
-%r47 = load i32, i32* %r46
-%r48 = zext i32 %r47 to i96
-%r49 = shl i96 %r48, 64
-%r50 = or i96 %r44, %r49
-%r51 = sub i96 %r35, %r50
-%r52 = lshr i96 %r51, 95
-%r53 = trunc i96 %r52 to i1
-%r54 = select i1 %r53, i96 %r35, i96 %r51
-%r55 = trunc i96 %r54 to i32
-%r57 = getelementptr i32, i32* %r1, i32 0
-store i32 %r55, i32* %r57
-%r58 = lshr i96 %r54, 32
-%r59 = trunc i96 %r58 to i32
-%r61 = getelementptr i32, i32* %r1, i32 1
-store i32 %r59, i32* %r61
-%r62 = lshr i96 %r58, 32
-%r63 = trunc i96 %r62 to i32
-%r65 = getelementptr i32, i32* %r1, i32 2
-store i32 %r63, i32* %r65
-ret void
-}
-define void @mcl_fp_sub3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = load i32, i32* %r3
-%r21 = zext i32 %r20 to i64
-%r23 = getelementptr i32, i32* %r3, i32 1
-%r24 = load i32, i32* %r23
-%r25 = zext i32 %r24 to i64
-%r26 = shl i64 %r25, 32
-%r27 = or i64 %r21, %r26
-%r28 = zext i64 %r27 to i96
-%r30 = getelementptr i32, i32* %r3, i32 2
-%r31 = load i32, i32* %r30
-%r32 = zext i32 %r31 to i96
-%r33 = shl i96 %r32, 64
-%r34 = or i96 %r28, %r33
-%r35 = zext i96 %r19 to i128
-%r36 = zext i96 %r34 to i128
-%r37 = sub i128 %r35, %r36
-%r38 = trunc i128 %r37 to i96
-%r39 = lshr i128 %r37, 96
-%r40 = trunc i128 %r39 to i1
-%r41 = trunc i96 %r38 to i32
-%r43 = getelementptr i32, i32* %r1, i32 0
-store i32 %r41, i32* %r43
-%r44 = lshr i96 %r38, 32
-%r45 = trunc i96 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 1
-store i32 %r45, i32* %r47
-%r48 = lshr i96 %r44, 32
-%r49 = trunc i96 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 2
-store i32 %r49, i32* %r51
-br i1%r40, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r52 = load i32, i32* %r4
-%r53 = zext i32 %r52 to i64
-%r55 = getelementptr i32, i32* %r4, i32 1
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i64
-%r58 = shl i64 %r57, 32
-%r59 = or i64 %r53, %r58
-%r60 = zext i64 %r59 to i96
-%r62 = getelementptr i32, i32* %r4, i32 2
-%r63 = load i32, i32* %r62
-%r64 = zext i32 %r63 to i96
-%r65 = shl i96 %r64, 64
-%r66 = or i96 %r60, %r65
-%r67 = add i96 %r38, %r66
-%r68 = trunc i96 %r67 to i32
-%r70 = getelementptr i32, i32* %r1, i32 0
-store i32 %r68, i32* %r70
-%r71 = lshr i96 %r67, 32
-%r72 = trunc i96 %r71 to i32
-%r74 = getelementptr i32, i32* %r1, i32 1
-store i32 %r72, i32* %r74
-%r75 = lshr i96 %r71, 32
-%r76 = trunc i96 %r75 to i32
-%r78 = getelementptr i32, i32* %r1, i32 2
-store i32 %r76, i32* %r78
-ret void
-}
-define void @mcl_fp_subNF3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = load i32, i32* %r3
-%r21 = zext i32 %r20 to i64
-%r23 = getelementptr i32, i32* %r3, i32 1
-%r24 = load i32, i32* %r23
-%r25 = zext i32 %r24 to i64
-%r26 = shl i64 %r25, 32
-%r27 = or i64 %r21, %r26
-%r28 = zext i64 %r27 to i96
-%r30 = getelementptr i32, i32* %r3, i32 2
-%r31 = load i32, i32* %r30
-%r32 = zext i32 %r31 to i96
-%r33 = shl i96 %r32, 64
-%r34 = or i96 %r28, %r33
-%r35 = sub i96 %r19, %r34
-%r36 = lshr i96 %r35, 95
-%r37 = trunc i96 %r36 to i1
-%r38 = load i32, i32* %r4
-%r39 = zext i32 %r38 to i64
-%r41 = getelementptr i32, i32* %r4, i32 1
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i64
-%r44 = shl i64 %r43, 32
-%r45 = or i64 %r39, %r44
-%r46 = zext i64 %r45 to i96
-%r48 = getelementptr i32, i32* %r4, i32 2
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i96
-%r51 = shl i96 %r50, 64
-%r52 = or i96 %r46, %r51
-%r54 = select i1 %r37, i96 %r52, i96 0
-%r55 = add i96 %r35, %r54
-%r56 = trunc i96 %r55 to i32
-%r58 = getelementptr i32, i32* %r1, i32 0
-store i32 %r56, i32* %r58
-%r59 = lshr i96 %r55, 32
-%r60 = trunc i96 %r59 to i32
-%r62 = getelementptr i32, i32* %r1, i32 1
-store i32 %r60, i32* %r62
-%r63 = lshr i96 %r59, 32
-%r64 = trunc i96 %r63 to i32
-%r66 = getelementptr i32, i32* %r1, i32 2
-store i32 %r64, i32* %r66
-ret void
-}
-define void @mcl_fpDbl_add3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = load i32, i32* %r3
-%r42 = zext i32 %r41 to i64
-%r44 = getelementptr i32, i32* %r3, i32 1
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = load i32, i32* %r3
+%r42 = zext i32 %r41 to i64
+%r44 = getelementptr i32, i32* %r3, i32 1
 %r45 = load i32, i32* %r44
 %r46 = zext i32 %r45 to i64
 %r47 = shl i64 %r46, 32
@@ -2276,54 +1763,96 @@ define void @mcl_fpDbl_add3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r77 = zext i192 %r40 to i224
 %r78 = zext i192 %r76 to i224
 %r79 = add i224 %r77, %r78
-%r80 = trunc i224 %r79 to i96
-%r81 = trunc i96 %r80 to i32
-%r83 = getelementptr i32, i32* %r1, i32 0
-store i32 %r81, i32* %r83
-%r84 = lshr i96 %r80, 32
-%r85 = trunc i96 %r84 to i32
-%r87 = getelementptr i32, i32* %r1, i32 1
-store i32 %r85, i32* %r87
-%r88 = lshr i96 %r84, 32
-%r89 = trunc i96 %r88 to i32
-%r91 = getelementptr i32, i32* %r1, i32 2
-store i32 %r89, i32* %r91
-%r92 = lshr i224 %r79, 96
-%r93 = trunc i224 %r92 to i128
-%r94 = load i32, i32* %r4
-%r95 = zext i32 %r94 to i64
-%r97 = getelementptr i32, i32* %r4, i32 1
-%r98 = load i32, i32* %r97
-%r99 = zext i32 %r98 to i64
-%r100 = shl i64 %r99, 32
-%r101 = or i64 %r95, %r100
-%r102 = zext i64 %r101 to i96
-%r104 = getelementptr i32, i32* %r4, i32 2
-%r105 = load i32, i32* %r104
-%r106 = zext i32 %r105 to i96
-%r107 = shl i96 %r106, 64
-%r108 = or i96 %r102, %r107
-%r109 = zext i96 %r108 to i128
-%r110 = sub i128 %r93, %r109
-%r111 = lshr i128 %r110, 96
-%r112 = trunc i128 %r111 to i1
-%r113 = select i1 %r112, i128 %r93, i128 %r110
-%r114 = trunc i128 %r113 to i96
-%r116 = getelementptr i32, i32* %r1, i32 3
-%r117 = trunc i96 %r114 to i32
-%r119 = getelementptr i32, i32* %r116, i32 0
-store i32 %r117, i32* %r119
-%r120 = lshr i96 %r114, 32
-%r121 = trunc i96 %r120 to i32
-%r123 = getelementptr i32, i32* %r116, i32 1
-store i32 %r121, i32* %r123
-%r124 = lshr i96 %r120, 32
-%r125 = trunc i96 %r124 to i32
-%r127 = getelementptr i32, i32* %r116, i32 2
-store i32 %r125, i32* %r127
+%r80 = trunc i224 %r79 to i192
+%r82 = getelementptr i32, i32* %r1, i32 0
+%r83 = trunc i192 %r80 to i32
+store i32 %r83, i32* %r82
+%r84 = lshr i192 %r80, 32
+%r86 = getelementptr i32, i32* %r1, i32 1
+%r87 = trunc i192 %r84 to i32
+store i32 %r87, i32* %r86
+%r88 = lshr i192 %r84, 32
+%r90 = getelementptr i32, i32* %r1, i32 2
+%r91 = trunc i192 %r88 to i32
+store i32 %r91, i32* %r90
+%r92 = lshr i192 %r88, 32
+%r94 = getelementptr i32, i32* %r1, i32 3
+%r95 = trunc i192 %r92 to i32
+store i32 %r95, i32* %r94
+%r96 = lshr i192 %r92, 32
+%r98 = getelementptr i32, i32* %r1, i32 4
+%r99 = trunc i192 %r96 to i32
+store i32 %r99, i32* %r98
+%r100 = lshr i192 %r96, 32
+%r102 = getelementptr i32, i32* %r1, i32 5
+%r103 = trunc i192 %r100 to i32
+store i32 %r103, i32* %r102
+%r104 = load i32, i32* %r4
+%r105 = zext i32 %r104 to i64
+%r107 = getelementptr i32, i32* %r4, i32 1
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i64
+%r110 = shl i64 %r109, 32
+%r111 = or i64 %r105, %r110
+%r112 = zext i64 %r111 to i96
+%r114 = getelementptr i32, i32* %r4, i32 2
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i96
+%r117 = shl i96 %r116, 64
+%r118 = or i96 %r112, %r117
+%r119 = zext i96 %r118 to i128
+%r121 = getelementptr i32, i32* %r4, i32 3
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i128
+%r124 = shl i128 %r123, 96
+%r125 = or i128 %r119, %r124
+%r126 = zext i128 %r125 to i160
+%r128 = getelementptr i32, i32* %r4, i32 4
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i160
+%r131 = shl i160 %r130, 128
+%r132 = or i160 %r126, %r131
+%r133 = zext i160 %r132 to i192
+%r135 = getelementptr i32, i32* %r4, i32 5
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i192
+%r138 = shl i192 %r137, 160
+%r139 = or i192 %r133, %r138
+%r140 = zext i192 %r139 to i224
+%r141 = sub i224 %r79, %r140
+%r142 = lshr i224 %r141, 192
+%r143 = trunc i224 %r142 to i1
+br i1%r143, label %carry, label %nocarry
+nocarry:
+%r144 = trunc i224 %r141 to i192
+%r146 = getelementptr i32, i32* %r1, i32 0
+%r147 = trunc i192 %r144 to i32
+store i32 %r147, i32* %r146
+%r148 = lshr i192 %r144, 32
+%r150 = getelementptr i32, i32* %r1, i32 1
+%r151 = trunc i192 %r148 to i32
+store i32 %r151, i32* %r150
+%r152 = lshr i192 %r148, 32
+%r154 = getelementptr i32, i32* %r1, i32 2
+%r155 = trunc i192 %r152 to i32
+store i32 %r155, i32* %r154
+%r156 = lshr i192 %r152, 32
+%r158 = getelementptr i32, i32* %r1, i32 3
+%r159 = trunc i192 %r156 to i32
+store i32 %r159, i32* %r158
+%r160 = lshr i192 %r156, 32
+%r162 = getelementptr i32, i32* %r1, i32 4
+%r163 = trunc i192 %r160 to i32
+store i32 %r163, i32* %r162
+%r164 = lshr i192 %r160, 32
+%r166 = getelementptr i32, i32* %r1, i32 5
+%r167 = trunc i192 %r164 to i32
+store i32 %r167, i32* %r166
+ret void
+carry:
 ret void
 }
-define void @mcl_fpDbl_sub3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_addNF6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -2387,1046 +1916,351 @@ define void @mcl_fpDbl_sub3L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r74 = zext i32 %r73 to i192
 %r75 = shl i192 %r74, 160
 %r76 = or i192 %r70, %r75
-%r77 = zext i192 %r40 to i224
-%r78 = zext i192 %r76 to i224
-%r79 = sub i224 %r77, %r78
-%r80 = trunc i224 %r79 to i96
-%r81 = trunc i96 %r80 to i32
-%r83 = getelementptr i32, i32* %r1, i32 0
-store i32 %r81, i32* %r83
-%r84 = lshr i96 %r80, 32
-%r85 = trunc i96 %r84 to i32
-%r87 = getelementptr i32, i32* %r1, i32 1
-store i32 %r85, i32* %r87
-%r88 = lshr i96 %r84, 32
-%r89 = trunc i96 %r88 to i32
-%r91 = getelementptr i32, i32* %r1, i32 2
-store i32 %r89, i32* %r91
-%r92 = lshr i224 %r79, 96
-%r93 = trunc i224 %r92 to i96
-%r94 = lshr i224 %r79, 192
-%r95 = trunc i224 %r94 to i1
-%r96 = load i32, i32* %r4
-%r97 = zext i32 %r96 to i64
-%r99 = getelementptr i32, i32* %r4, i32 1
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i64
-%r102 = shl i64 %r101, 32
-%r103 = or i64 %r97, %r102
-%r104 = zext i64 %r103 to i96
-%r106 = getelementptr i32, i32* %r4, i32 2
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i96
-%r109 = shl i96 %r108, 64
-%r110 = or i96 %r104, %r109
-%r112 = select i1 %r95, i96 %r110, i96 0
-%r113 = add i96 %r93, %r112
-%r115 = getelementptr i32, i32* %r1, i32 3
-%r116 = trunc i96 %r113 to i32
-%r118 = getelementptr i32, i32* %r115, i32 0
-store i32 %r116, i32* %r118
-%r119 = lshr i96 %r113, 32
-%r120 = trunc i96 %r119 to i32
-%r122 = getelementptr i32, i32* %r115, i32 1
-store i32 %r120, i32* %r122
-%r123 = lshr i96 %r119, 32
-%r124 = trunc i96 %r123 to i32
-%r126 = getelementptr i32, i32* %r115, i32 2
-store i32 %r124, i32* %r126
-ret void
-}
-define i160 @mulPv128x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r20 = zext i32 %r6 to i64
-%r21 = zext i32 %r10 to i64
-%r22 = shl i64 %r21, 32
-%r23 = or i64 %r20, %r22
-%r24 = zext i64 %r23 to i96
-%r25 = zext i32 %r14 to i96
-%r26 = shl i96 %r25, 64
-%r27 = or i96 %r24, %r26
-%r28 = zext i96 %r27 to i128
-%r29 = zext i32 %r18 to i128
-%r30 = shl i128 %r29, 96
-%r31 = or i128 %r28, %r30
-%r32 = zext i32 %r7 to i64
-%r33 = zext i32 %r11 to i64
-%r34 = shl i64 %r33, 32
-%r35 = or i64 %r32, %r34
-%r36 = zext i64 %r35 to i96
-%r37 = zext i32 %r15 to i96
-%r38 = shl i96 %r37, 64
-%r39 = or i96 %r36, %r38
-%r40 = zext i96 %r39 to i128
-%r41 = zext i32 %r19 to i128
-%r42 = shl i128 %r41, 96
-%r43 = or i128 %r40, %r42
-%r44 = zext i128 %r31 to i160
-%r45 = zext i128 %r43 to i160
-%r46 = shl i160 %r45, 32
-%r47 = add i160 %r44, %r46
-ret i160 %r47
-}
-define void @mcl_fp_mulUnitPre4L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i160 @mulPv128x32(i32* %r2, i32 %r3)
-%r5 = trunc i160 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i160 %r4, 32
-%r9 = trunc i160 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i160 %r8, 32
-%r13 = trunc i160 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i160 %r12, 32
-%r17 = trunc i160 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i160 %r16, 32
-%r21 = trunc i160 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-ret void
-}
-define void @mcl_fpDbl_mulPre4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r4 = load i32, i32* %r3
-%r5 = call i160 @mulPv128x32(i32* %r2, i32 %r4)
-%r6 = trunc i160 %r5 to i32
-store i32 %r6, i32* %r1
-%r7 = lshr i160 %r5, 32
-%r9 = getelementptr i32, i32* %r3, i32 1
-%r10 = load i32, i32* %r9
-%r11 = call i160 @mulPv128x32(i32* %r2, i32 %r10)
-%r12 = add i160 %r7, %r11
-%r13 = trunc i160 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 1
-store i32 %r13, i32* %r15
-%r16 = lshr i160 %r12, 32
-%r18 = getelementptr i32, i32* %r3, i32 2
-%r19 = load i32, i32* %r18
-%r20 = call i160 @mulPv128x32(i32* %r2, i32 %r19)
-%r21 = add i160 %r16, %r20
-%r22 = trunc i160 %r21 to i32
-%r24 = getelementptr i32, i32* %r1, i32 2
-store i32 %r22, i32* %r24
-%r25 = lshr i160 %r21, 32
-%r27 = getelementptr i32, i32* %r3, i32 3
-%r28 = load i32, i32* %r27
-%r29 = call i160 @mulPv128x32(i32* %r2, i32 %r28)
-%r30 = add i160 %r25, %r29
-%r32 = getelementptr i32, i32* %r1, i32 3
-%r33 = trunc i160 %r30 to i32
-%r35 = getelementptr i32, i32* %r32, i32 0
-store i32 %r33, i32* %r35
-%r36 = lshr i160 %r30, 32
-%r37 = trunc i160 %r36 to i32
-%r39 = getelementptr i32, i32* %r32, i32 1
-store i32 %r37, i32* %r39
-%r40 = lshr i160 %r36, 32
-%r41 = trunc i160 %r40 to i32
-%r43 = getelementptr i32, i32* %r32, i32 2
-store i32 %r41, i32* %r43
-%r44 = lshr i160 %r40, 32
-%r45 = trunc i160 %r44 to i32
-%r47 = getelementptr i32, i32* %r32, i32 3
-store i32 %r45, i32* %r47
-%r48 = lshr i160 %r44, 32
-%r49 = trunc i160 %r48 to i32
-%r51 = getelementptr i32, i32* %r32, i32 4
-store i32 %r49, i32* %r51
+%r77 = add i192 %r40, %r76
+%r78 = load i32, i32* %r4
+%r79 = zext i32 %r78 to i64
+%r81 = getelementptr i32, i32* %r4, i32 1
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i64
+%r84 = shl i64 %r83, 32
+%r85 = or i64 %r79, %r84
+%r86 = zext i64 %r85 to i96
+%r88 = getelementptr i32, i32* %r4, i32 2
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i96
+%r91 = shl i96 %r90, 64
+%r92 = or i96 %r86, %r91
+%r93 = zext i96 %r92 to i128
+%r95 = getelementptr i32, i32* %r4, i32 3
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i128
+%r98 = shl i128 %r97, 96
+%r99 = or i128 %r93, %r98
+%r100 = zext i128 %r99 to i160
+%r102 = getelementptr i32, i32* %r4, i32 4
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i160
+%r105 = shl i160 %r104, 128
+%r106 = or i160 %r100, %r105
+%r107 = zext i160 %r106 to i192
+%r109 = getelementptr i32, i32* %r4, i32 5
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i192
+%r112 = shl i192 %r111, 160
+%r113 = or i192 %r107, %r112
+%r114 = sub i192 %r77, %r113
+%r115 = lshr i192 %r114, 191
+%r116 = trunc i192 %r115 to i1
+%r117 = select i1 %r116, i192 %r77, i192 %r114
+%r119 = getelementptr i32, i32* %r1, i32 0
+%r120 = trunc i192 %r117 to i32
+store i32 %r120, i32* %r119
+%r121 = lshr i192 %r117, 32
+%r123 = getelementptr i32, i32* %r1, i32 1
+%r124 = trunc i192 %r121 to i32
+store i32 %r124, i32* %r123
+%r125 = lshr i192 %r121, 32
+%r127 = getelementptr i32, i32* %r1, i32 2
+%r128 = trunc i192 %r125 to i32
+store i32 %r128, i32* %r127
+%r129 = lshr i192 %r125, 32
+%r131 = getelementptr i32, i32* %r1, i32 3
+%r132 = trunc i192 %r129 to i32
+store i32 %r132, i32* %r131
+%r133 = lshr i192 %r129, 32
+%r135 = getelementptr i32, i32* %r1, i32 4
+%r136 = trunc i192 %r133 to i32
+store i32 %r136, i32* %r135
+%r137 = lshr i192 %r133, 32
+%r139 = getelementptr i32, i32* %r1, i32 5
+%r140 = trunc i192 %r137 to i32
+store i32 %r140, i32* %r139
 ret void
 }
-define void @mcl_fpDbl_sqrPre4L(i32* noalias  %r1, i32* noalias  %r2)
+define void @mcl_fp_sub6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
-%r3 = load i32, i32* %r2
-%r4 = call i160 @mulPv128x32(i32* %r2, i32 %r3)
-%r5 = trunc i160 %r4 to i32
-store i32 %r5, i32* %r1
-%r6 = lshr i160 %r4, 32
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
 %r8 = getelementptr i32, i32* %r2, i32 1
 %r9 = load i32, i32* %r8
-%r10 = call i160 @mulPv128x32(i32* %r2, i32 %r9)
-%r11 = add i160 %r6, %r10
-%r12 = trunc i160 %r11 to i32
-%r14 = getelementptr i32, i32* %r1, i32 1
-store i32 %r12, i32* %r14
-%r15 = lshr i160 %r11, 32
-%r17 = getelementptr i32, i32* %r2, i32 2
-%r18 = load i32, i32* %r17
-%r19 = call i160 @mulPv128x32(i32* %r2, i32 %r18)
-%r20 = add i160 %r15, %r19
-%r21 = trunc i160 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 2
-store i32 %r21, i32* %r23
-%r24 = lshr i160 %r20, 32
-%r26 = getelementptr i32, i32* %r2, i32 3
-%r27 = load i32, i32* %r26
-%r28 = call i160 @mulPv128x32(i32* %r2, i32 %r27)
-%r29 = add i160 %r24, %r28
-%r31 = getelementptr i32, i32* %r1, i32 3
-%r32 = trunc i160 %r29 to i32
-%r34 = getelementptr i32, i32* %r31, i32 0
-store i32 %r32, i32* %r34
-%r35 = lshr i160 %r29, 32
-%r36 = trunc i160 %r35 to i32
-%r38 = getelementptr i32, i32* %r31, i32 1
-store i32 %r36, i32* %r38
-%r39 = lshr i160 %r35, 32
-%r40 = trunc i160 %r39 to i32
-%r42 = getelementptr i32, i32* %r31, i32 2
-store i32 %r40, i32* %r42
-%r43 = lshr i160 %r39, 32
-%r44 = trunc i160 %r43 to i32
-%r46 = getelementptr i32, i32* %r31, i32 3
-store i32 %r44, i32* %r46
-%r47 = lshr i160 %r43, 32
-%r48 = trunc i160 %r47 to i32
-%r50 = getelementptr i32, i32* %r31, i32 4
-store i32 %r48, i32* %r50
-ret void
-}
-define void @mcl_fp_mont4L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i160 @mulPv128x32(i32* %r2, i32 %r10)
-%r12 = zext i160 %r11 to i192
-%r13 = trunc i160 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i160 @mulPv128x32(i32* %r4, i32 %r14)
-%r16 = zext i160 %r15 to i192
-%r17 = add i192 %r12, %r16
-%r18 = lshr i192 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i160 @mulPv128x32(i32* %r2, i32 %r21)
-%r23 = zext i160 %r22 to i192
-%r24 = add i192 %r18, %r23
-%r25 = trunc i192 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i160 @mulPv128x32(i32* %r4, i32 %r26)
-%r28 = zext i160 %r27 to i192
-%r29 = add i192 %r24, %r28
-%r30 = lshr i192 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i160 @mulPv128x32(i32* %r2, i32 %r33)
-%r35 = zext i160 %r34 to i192
-%r36 = add i192 %r30, %r35
-%r37 = trunc i192 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i160 @mulPv128x32(i32* %r4, i32 %r38)
-%r40 = zext i160 %r39 to i192
-%r41 = add i192 %r36, %r40
-%r42 = lshr i192 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i160 @mulPv128x32(i32* %r2, i32 %r45)
-%r47 = zext i160 %r46 to i192
-%r48 = add i192 %r42, %r47
-%r49 = trunc i192 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i160 @mulPv128x32(i32* %r4, i32 %r50)
-%r52 = zext i160 %r51 to i192
-%r53 = add i192 %r48, %r52
-%r54 = lshr i192 %r53, 32
-%r55 = trunc i192 %r54 to i160
-%r56 = load i32, i32* %r4
-%r57 = zext i32 %r56 to i64
-%r59 = getelementptr i32, i32* %r4, i32 1
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i64
-%r62 = shl i64 %r61, 32
-%r63 = or i64 %r57, %r62
-%r64 = zext i64 %r63 to i96
-%r66 = getelementptr i32, i32* %r4, i32 2
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i96
-%r69 = shl i96 %r68, 64
-%r70 = or i96 %r64, %r69
-%r71 = zext i96 %r70 to i128
-%r73 = getelementptr i32, i32* %r4, i32 3
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i128
-%r76 = shl i128 %r75, 96
-%r77 = or i128 %r71, %r76
-%r78 = zext i128 %r77 to i160
-%r79 = sub i160 %r55, %r78
-%r80 = lshr i160 %r79, 128
-%r81 = trunc i160 %r80 to i1
-%r82 = select i1 %r81, i160 %r55, i160 %r79
-%r83 = trunc i160 %r82 to i128
-%r84 = trunc i128 %r83 to i32
-%r86 = getelementptr i32, i32* %r1, i32 0
-store i32 %r84, i32* %r86
-%r87 = lshr i128 %r83, 32
-%r88 = trunc i128 %r87 to i32
-%r90 = getelementptr i32, i32* %r1, i32 1
-store i32 %r88, i32* %r90
-%r91 = lshr i128 %r87, 32
-%r92 = trunc i128 %r91 to i32
-%r94 = getelementptr i32, i32* %r1, i32 2
-store i32 %r92, i32* %r94
-%r95 = lshr i128 %r91, 32
-%r96 = trunc i128 %r95 to i32
-%r98 = getelementptr i32, i32* %r1, i32 3
-store i32 %r96, i32* %r98
-ret void
-}
-define void @mcl_fp_montNF4L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i160 @mulPv128x32(i32* %r2, i32 %r8)
-%r10 = trunc i160 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i160 @mulPv128x32(i32* %r4, i32 %r11)
-%r13 = add i160 %r9, %r12
-%r14 = lshr i160 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i160 @mulPv128x32(i32* %r2, i32 %r17)
-%r19 = add i160 %r14, %r18
-%r20 = trunc i160 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i160 @mulPv128x32(i32* %r4, i32 %r21)
-%r23 = add i160 %r19, %r22
-%r24 = lshr i160 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i160 @mulPv128x32(i32* %r2, i32 %r27)
-%r29 = add i160 %r24, %r28
-%r30 = trunc i160 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i160 @mulPv128x32(i32* %r4, i32 %r31)
-%r33 = add i160 %r29, %r32
-%r34 = lshr i160 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
 %r37 = load i32, i32* %r36
-%r38 = call i160 @mulPv128x32(i32* %r2, i32 %r37)
-%r39 = add i160 %r34, %r38
-%r40 = trunc i160 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i160 @mulPv128x32(i32* %r4, i32 %r41)
-%r43 = add i160 %r39, %r42
-%r44 = lshr i160 %r43, 32
-%r45 = trunc i160 %r44 to i128
-%r46 = load i32, i32* %r4
-%r47 = zext i32 %r46 to i64
-%r49 = getelementptr i32, i32* %r4, i32 1
-%r50 = load i32, i32* %r49
-%r51 = zext i32 %r50 to i64
-%r52 = shl i64 %r51, 32
-%r53 = or i64 %r47, %r52
-%r54 = zext i64 %r53 to i96
-%r56 = getelementptr i32, i32* %r4, i32 2
-%r57 = load i32, i32* %r56
-%r58 = zext i32 %r57 to i96
-%r59 = shl i96 %r58, 64
-%r60 = or i96 %r54, %r59
-%r61 = zext i96 %r60 to i128
-%r63 = getelementptr i32, i32* %r4, i32 3
-%r64 = load i32, i32* %r63
-%r65 = zext i32 %r64 to i128
-%r66 = shl i128 %r65, 96
-%r67 = or i128 %r61, %r66
-%r68 = sub i128 %r45, %r67
-%r69 = lshr i128 %r68, 127
-%r70 = trunc i128 %r69 to i1
-%r71 = select i1 %r70, i128 %r45, i128 %r68
-%r72 = trunc i128 %r71 to i32
-%r74 = getelementptr i32, i32* %r1, i32 0
-store i32 %r72, i32* %r74
-%r75 = lshr i128 %r71, 32
-%r76 = trunc i128 %r75 to i32
-%r78 = getelementptr i32, i32* %r1, i32 1
-store i32 %r76, i32* %r78
-%r79 = lshr i128 %r75, 32
-%r80 = trunc i128 %r79 to i32
-%r82 = getelementptr i32, i32* %r1, i32 2
-store i32 %r80, i32* %r82
-%r83 = lshr i128 %r79, 32
-%r84 = trunc i128 %r83 to i32
-%r86 = getelementptr i32, i32* %r1, i32 3
-store i32 %r84, i32* %r86
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = load i32, i32* %r3
+%r42 = zext i32 %r41 to i64
+%r44 = getelementptr i32, i32* %r3, i32 1
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i64
+%r47 = shl i64 %r46, 32
+%r48 = or i64 %r42, %r47
+%r49 = zext i64 %r48 to i96
+%r51 = getelementptr i32, i32* %r3, i32 2
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r49, %r54
+%r56 = zext i96 %r55 to i128
+%r58 = getelementptr i32, i32* %r3, i32 3
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i128
+%r61 = shl i128 %r60, 96
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i160
+%r65 = getelementptr i32, i32* %r3, i32 4
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i160
+%r68 = shl i160 %r67, 128
+%r69 = or i160 %r63, %r68
+%r70 = zext i160 %r69 to i192
+%r72 = getelementptr i32, i32* %r3, i32 5
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i192
+%r75 = shl i192 %r74, 160
+%r76 = or i192 %r70, %r75
+%r77 = zext i192 %r40 to i224
+%r78 = zext i192 %r76 to i224
+%r79 = sub i224 %r77, %r78
+%r80 = trunc i224 %r79 to i192
+%r81 = lshr i224 %r79, 192
+%r82 = trunc i224 %r81 to i1
+%r84 = getelementptr i32, i32* %r1, i32 0
+%r85 = trunc i192 %r80 to i32
+store i32 %r85, i32* %r84
+%r86 = lshr i192 %r80, 32
+%r88 = getelementptr i32, i32* %r1, i32 1
+%r89 = trunc i192 %r86 to i32
+store i32 %r89, i32* %r88
+%r90 = lshr i192 %r86, 32
+%r92 = getelementptr i32, i32* %r1, i32 2
+%r93 = trunc i192 %r90 to i32
+store i32 %r93, i32* %r92
+%r94 = lshr i192 %r90, 32
+%r96 = getelementptr i32, i32* %r1, i32 3
+%r97 = trunc i192 %r94 to i32
+store i32 %r97, i32* %r96
+%r98 = lshr i192 %r94, 32
+%r100 = getelementptr i32, i32* %r1, i32 4
+%r101 = trunc i192 %r98 to i32
+store i32 %r101, i32* %r100
+%r102 = lshr i192 %r98, 32
+%r104 = getelementptr i32, i32* %r1, i32 5
+%r105 = trunc i192 %r102 to i32
+store i32 %r105, i32* %r104
+br i1%r82, label %carry, label %nocarry
+nocarry:
 ret void
-}
-define void @mcl_fp_montRed4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = load i32, i32* %r2
-%r30 = zext i32 %r29 to i64
-%r32 = getelementptr i32, i32* %r2, i32 1
-%r33 = load i32, i32* %r32
-%r34 = zext i32 %r33 to i64
-%r35 = shl i64 %r34, 32
-%r36 = or i64 %r30, %r35
-%r37 = zext i64 %r36 to i96
-%r39 = getelementptr i32, i32* %r2, i32 2
-%r40 = load i32, i32* %r39
-%r41 = zext i32 %r40 to i96
-%r42 = shl i96 %r41, 64
-%r43 = or i96 %r37, %r42
-%r44 = zext i96 %r43 to i128
-%r46 = getelementptr i32, i32* %r2, i32 3
-%r47 = load i32, i32* %r46
-%r48 = zext i32 %r47 to i128
-%r49 = shl i128 %r48, 96
-%r50 = or i128 %r44, %r49
-%r51 = zext i128 %r50 to i160
-%r53 = getelementptr i32, i32* %r2, i32 4
-%r54 = load i32, i32* %r53
-%r55 = zext i32 %r54 to i160
-%r56 = shl i160 %r55, 128
-%r57 = or i160 %r51, %r56
-%r58 = zext i160 %r57 to i192
-%r60 = getelementptr i32, i32* %r2, i32 5
-%r61 = load i32, i32* %r60
-%r62 = zext i32 %r61 to i192
-%r63 = shl i192 %r62, 160
-%r64 = or i192 %r58, %r63
-%r65 = zext i192 %r64 to i224
-%r67 = getelementptr i32, i32* %r2, i32 6
-%r68 = load i32, i32* %r67
-%r69 = zext i32 %r68 to i224
-%r70 = shl i224 %r69, 192
-%r71 = or i224 %r65, %r70
-%r72 = zext i224 %r71 to i256
-%r74 = getelementptr i32, i32* %r2, i32 7
-%r75 = load i32, i32* %r74
-%r76 = zext i32 %r75 to i256
-%r77 = shl i256 %r76, 224
-%r78 = or i256 %r72, %r77
-%r79 = zext i256 %r78 to i288
-%r80 = trunc i288 %r79 to i32
-%r81 = mul i32 %r80, %r6
-%r82 = call i160 @mulPv128x32(i32* %r3, i32 %r81)
-%r83 = zext i160 %r82 to i288
-%r84 = add i288 %r79, %r83
-%r85 = lshr i288 %r84, 32
-%r86 = trunc i288 %r85 to i256
-%r87 = trunc i256 %r86 to i32
-%r88 = mul i32 %r87, %r6
-%r89 = call i160 @mulPv128x32(i32* %r3, i32 %r88)
-%r90 = zext i160 %r89 to i256
-%r91 = add i256 %r86, %r90
-%r92 = lshr i256 %r91, 32
-%r93 = trunc i256 %r92 to i224
-%r94 = trunc i224 %r93 to i32
-%r95 = mul i32 %r94, %r6
-%r96 = call i160 @mulPv128x32(i32* %r3, i32 %r95)
-%r97 = zext i160 %r96 to i224
-%r98 = add i224 %r93, %r97
-%r99 = lshr i224 %r98, 32
-%r100 = trunc i224 %r99 to i192
-%r101 = trunc i192 %r100 to i32
-%r102 = mul i32 %r101, %r6
-%r103 = call i160 @mulPv128x32(i32* %r3, i32 %r102)
-%r104 = zext i160 %r103 to i192
-%r105 = add i192 %r100, %r104
-%r106 = lshr i192 %r105, 32
-%r107 = trunc i192 %r106 to i160
-%r108 = zext i128 %r28 to i160
-%r109 = sub i160 %r107, %r108
-%r110 = lshr i160 %r109, 128
-%r111 = trunc i160 %r110 to i1
-%r112 = select i1 %r111, i160 %r107, i160 %r109
-%r113 = trunc i160 %r112 to i128
-%r114 = trunc i128 %r113 to i32
-%r116 = getelementptr i32, i32* %r1, i32 0
-store i32 %r114, i32* %r116
-%r117 = lshr i128 %r113, 32
-%r118 = trunc i128 %r117 to i32
-%r120 = getelementptr i32, i32* %r1, i32 1
-store i32 %r118, i32* %r120
-%r121 = lshr i128 %r117, 32
-%r122 = trunc i128 %r121 to i32
-%r124 = getelementptr i32, i32* %r1, i32 2
-store i32 %r122, i32* %r124
-%r125 = lshr i128 %r121, 32
-%r126 = trunc i128 %r125 to i32
-%r128 = getelementptr i32, i32* %r1, i32 3
-store i32 %r126, i32* %r128
+carry:
+%r106 = load i32, i32* %r4
+%r107 = zext i32 %r106 to i64
+%r109 = getelementptr i32, i32* %r4, i32 1
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i64
+%r112 = shl i64 %r111, 32
+%r113 = or i64 %r107, %r112
+%r114 = zext i64 %r113 to i96
+%r116 = getelementptr i32, i32* %r4, i32 2
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i96
+%r119 = shl i96 %r118, 64
+%r120 = or i96 %r114, %r119
+%r121 = zext i96 %r120 to i128
+%r123 = getelementptr i32, i32* %r4, i32 3
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i128
+%r126 = shl i128 %r125, 96
+%r127 = or i128 %r121, %r126
+%r128 = zext i128 %r127 to i160
+%r130 = getelementptr i32, i32* %r4, i32 4
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i160
+%r133 = shl i160 %r132, 128
+%r134 = or i160 %r128, %r133
+%r135 = zext i160 %r134 to i192
+%r137 = getelementptr i32, i32* %r4, i32 5
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i192
+%r140 = shl i192 %r139, 160
+%r141 = or i192 %r135, %r140
+%r142 = add i192 %r80, %r141
+%r144 = getelementptr i32, i32* %r1, i32 0
+%r145 = trunc i192 %r142 to i32
+store i32 %r145, i32* %r144
+%r146 = lshr i192 %r142, 32
+%r148 = getelementptr i32, i32* %r1, i32 1
+%r149 = trunc i192 %r146 to i32
+store i32 %r149, i32* %r148
+%r150 = lshr i192 %r146, 32
+%r152 = getelementptr i32, i32* %r1, i32 2
+%r153 = trunc i192 %r150 to i32
+store i32 %r153, i32* %r152
+%r154 = lshr i192 %r150, 32
+%r156 = getelementptr i32, i32* %r1, i32 3
+%r157 = trunc i192 %r154 to i32
+store i32 %r157, i32* %r156
+%r158 = lshr i192 %r154, 32
+%r160 = getelementptr i32, i32* %r1, i32 4
+%r161 = trunc i192 %r158 to i32
+store i32 %r161, i32* %r160
+%r162 = lshr i192 %r158, 32
+%r164 = getelementptr i32, i32* %r1, i32 5
+%r165 = trunc i192 %r162 to i32
+store i32 %r165, i32* %r164
 ret void
 }
-define i32 @mcl_fp_addPre4L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_subNF6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
-%r5 = load i32, i32* %r3
+%r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
+%r8 = getelementptr i32, i32* %r2, i32 1
 %r9 = load i32, i32* %r8
 %r10 = zext i32 %r9 to i64
 %r11 = shl i64 %r10, 32
 %r12 = or i64 %r6, %r11
 %r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
+%r15 = getelementptr i32, i32* %r2, i32 2
 %r16 = load i32, i32* %r15
 %r17 = zext i32 %r16 to i96
 %r18 = shl i96 %r17, 64
 %r19 = or i96 %r13, %r18
 %r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
+%r22 = getelementptr i32, i32* %r2, i32 3
 %r23 = load i32, i32* %r22
 %r24 = zext i32 %r23 to i128
 %r25 = shl i128 %r24, 96
 %r26 = or i128 %r20, %r25
 %r27 = zext i128 %r26 to i160
-%r28 = load i32, i32* %r4
-%r29 = zext i32 %r28 to i64
-%r31 = getelementptr i32, i32* %r4, i32 1
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i64
-%r34 = shl i64 %r33, 32
-%r35 = or i64 %r29, %r34
-%r36 = zext i64 %r35 to i96
-%r38 = getelementptr i32, i32* %r4, i32 2
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i96
-%r41 = shl i96 %r40, 64
-%r42 = or i96 %r36, %r41
-%r43 = zext i96 %r42 to i128
-%r45 = getelementptr i32, i32* %r4, i32 3
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i128
-%r48 = shl i128 %r47, 96
-%r49 = or i128 %r43, %r48
-%r50 = zext i128 %r49 to i160
-%r51 = add i160 %r27, %r50
-%r52 = trunc i160 %r51 to i128
-%r53 = trunc i128 %r52 to i32
-%r55 = getelementptr i32, i32* %r2, i32 0
-store i32 %r53, i32* %r55
-%r56 = lshr i128 %r52, 32
-%r57 = trunc i128 %r56 to i32
-%r59 = getelementptr i32, i32* %r2, i32 1
-store i32 %r57, i32* %r59
-%r60 = lshr i128 %r56, 32
-%r61 = trunc i128 %r60 to i32
-%r63 = getelementptr i32, i32* %r2, i32 2
-store i32 %r61, i32* %r63
-%r64 = lshr i128 %r60, 32
-%r65 = trunc i128 %r64 to i32
-%r67 = getelementptr i32, i32* %r2, i32 3
-store i32 %r65, i32* %r67
-%r68 = lshr i160 %r51, 128
-%r69 = trunc i160 %r68 to i32
-ret i32 %r69
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = load i32, i32* %r3
+%r42 = zext i32 %r41 to i64
+%r44 = getelementptr i32, i32* %r3, i32 1
+%r45 = load i32, i32* %r44
+%r46 = zext i32 %r45 to i64
+%r47 = shl i64 %r46, 32
+%r48 = or i64 %r42, %r47
+%r49 = zext i64 %r48 to i96
+%r51 = getelementptr i32, i32* %r3, i32 2
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i96
+%r54 = shl i96 %r53, 64
+%r55 = or i96 %r49, %r54
+%r56 = zext i96 %r55 to i128
+%r58 = getelementptr i32, i32* %r3, i32 3
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i128
+%r61 = shl i128 %r60, 96
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i160
+%r65 = getelementptr i32, i32* %r3, i32 4
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i160
+%r68 = shl i160 %r67, 128
+%r69 = or i160 %r63, %r68
+%r70 = zext i160 %r69 to i192
+%r72 = getelementptr i32, i32* %r3, i32 5
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i192
+%r75 = shl i192 %r74, 160
+%r76 = or i192 %r70, %r75
+%r77 = sub i192 %r40, %r76
+%r78 = lshr i192 %r77, 191
+%r79 = trunc i192 %r78 to i1
+%r80 = load i32, i32* %r4
+%r81 = zext i32 %r80 to i64
+%r83 = getelementptr i32, i32* %r4, i32 1
+%r84 = load i32, i32* %r83
+%r85 = zext i32 %r84 to i64
+%r86 = shl i64 %r85, 32
+%r87 = or i64 %r81, %r86
+%r88 = zext i64 %r87 to i96
+%r90 = getelementptr i32, i32* %r4, i32 2
+%r91 = load i32, i32* %r90
+%r92 = zext i32 %r91 to i96
+%r93 = shl i96 %r92, 64
+%r94 = or i96 %r88, %r93
+%r95 = zext i96 %r94 to i128
+%r97 = getelementptr i32, i32* %r4, i32 3
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i128
+%r100 = shl i128 %r99, 96
+%r101 = or i128 %r95, %r100
+%r102 = zext i128 %r101 to i160
+%r104 = getelementptr i32, i32* %r4, i32 4
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i160
+%r107 = shl i160 %r106, 128
+%r108 = or i160 %r102, %r107
+%r109 = zext i160 %r108 to i192
+%r111 = getelementptr i32, i32* %r4, i32 5
+%r112 = load i32, i32* %r111
+%r113 = zext i32 %r112 to i192
+%r114 = shl i192 %r113, 160
+%r115 = or i192 %r109, %r114
+%r117 = select i1 %r79, i192 %r115, i192 0
+%r118 = add i192 %r77, %r117
+%r120 = getelementptr i32, i32* %r1, i32 0
+%r121 = trunc i192 %r118 to i32
+store i32 %r121, i32* %r120
+%r122 = lshr i192 %r118, 32
+%r124 = getelementptr i32, i32* %r1, i32 1
+%r125 = trunc i192 %r122 to i32
+store i32 %r125, i32* %r124
+%r126 = lshr i192 %r122, 32
+%r128 = getelementptr i32, i32* %r1, i32 2
+%r129 = trunc i192 %r126 to i32
+store i32 %r129, i32* %r128
+%r130 = lshr i192 %r126, 32
+%r132 = getelementptr i32, i32* %r1, i32 3
+%r133 = trunc i192 %r130 to i32
+store i32 %r133, i32* %r132
+%r134 = lshr i192 %r130, 32
+%r136 = getelementptr i32, i32* %r1, i32 4
+%r137 = trunc i192 %r134 to i32
+store i32 %r137, i32* %r136
+%r138 = lshr i192 %r134, 32
+%r140 = getelementptr i32, i32* %r1, i32 5
+%r141 = trunc i192 %r138 to i32
+store i32 %r141, i32* %r140
+ret void
 }
-define i32 @mcl_fp_subPre4L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fpDbl_add6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
-%r5 = load i32, i32* %r3
+%r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r28 = load i32, i32* %r4
-%r29 = zext i32 %r28 to i64
-%r31 = getelementptr i32, i32* %r4, i32 1
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i64
-%r34 = shl i64 %r33, 32
-%r35 = or i64 %r29, %r34
-%r36 = zext i64 %r35 to i96
-%r38 = getelementptr i32, i32* %r4, i32 2
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i96
-%r41 = shl i96 %r40, 64
-%r42 = or i96 %r36, %r41
-%r43 = zext i96 %r42 to i128
-%r45 = getelementptr i32, i32* %r4, i32 3
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i128
-%r48 = shl i128 %r47, 96
-%r49 = or i128 %r43, %r48
-%r50 = zext i128 %r49 to i160
-%r51 = sub i160 %r27, %r50
-%r52 = trunc i160 %r51 to i128
-%r53 = trunc i128 %r52 to i32
-%r55 = getelementptr i32, i32* %r2, i32 0
-store i32 %r53, i32* %r55
-%r56 = lshr i128 %r52, 32
-%r57 = trunc i128 %r56 to i32
-%r59 = getelementptr i32, i32* %r2, i32 1
-store i32 %r57, i32* %r59
-%r60 = lshr i128 %r56, 32
-%r61 = trunc i128 %r60 to i32
-%r63 = getelementptr i32, i32* %r2, i32 2
-store i32 %r61, i32* %r63
-%r64 = lshr i128 %r60, 32
-%r65 = trunc i128 %r64 to i32
-%r67 = getelementptr i32, i32* %r2, i32 3
-store i32 %r65, i32* %r67
-%r68 = lshr i160 %r51, 128
-%r69 = trunc i160 %r68 to i32
-%r71 = and i32 %r69, 1
-ret i32 %r71
-}
-define void @mcl_fp_shr1_4L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = lshr i128 %r24, 1
-%r26 = trunc i128 %r25 to i32
-%r28 = getelementptr i32, i32* %r1, i32 0
-store i32 %r26, i32* %r28
-%r29 = lshr i128 %r25, 32
-%r30 = trunc i128 %r29 to i32
-%r32 = getelementptr i32, i32* %r1, i32 1
-store i32 %r30, i32* %r32
-%r33 = lshr i128 %r29, 32
-%r34 = trunc i128 %r33 to i32
-%r36 = getelementptr i32, i32* %r1, i32 2
-store i32 %r34, i32* %r36
-%r37 = lshr i128 %r33, 32
-%r38 = trunc i128 %r37 to i32
-%r40 = getelementptr i32, i32* %r1, i32 3
-store i32 %r38, i32* %r40
-ret void
-}
-define void @mcl_fp_add4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = load i32, i32* %r3
-%r28 = zext i32 %r27 to i64
-%r30 = getelementptr i32, i32* %r3, i32 1
-%r31 = load i32, i32* %r30
-%r32 = zext i32 %r31 to i64
-%r33 = shl i64 %r32, 32
-%r34 = or i64 %r28, %r33
-%r35 = zext i64 %r34 to i96
-%r37 = getelementptr i32, i32* %r3, i32 2
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i96
-%r40 = shl i96 %r39, 64
-%r41 = or i96 %r35, %r40
-%r42 = zext i96 %r41 to i128
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i128
-%r47 = shl i128 %r46, 96
-%r48 = or i128 %r42, %r47
-%r49 = zext i128 %r26 to i160
-%r50 = zext i128 %r48 to i160
-%r51 = add i160 %r49, %r50
-%r52 = trunc i160 %r51 to i128
-%r53 = trunc i128 %r52 to i32
-%r55 = getelementptr i32, i32* %r1, i32 0
-store i32 %r53, i32* %r55
-%r56 = lshr i128 %r52, 32
-%r57 = trunc i128 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 1
-store i32 %r57, i32* %r59
-%r60 = lshr i128 %r56, 32
-%r61 = trunc i128 %r60 to i32
-%r63 = getelementptr i32, i32* %r1, i32 2
-store i32 %r61, i32* %r63
-%r64 = lshr i128 %r60, 32
-%r65 = trunc i128 %r64 to i32
-%r67 = getelementptr i32, i32* %r1, i32 3
-store i32 %r65, i32* %r67
-%r68 = load i32, i32* %r4
-%r69 = zext i32 %r68 to i64
-%r71 = getelementptr i32, i32* %r4, i32 1
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i64
-%r74 = shl i64 %r73, 32
-%r75 = or i64 %r69, %r74
-%r76 = zext i64 %r75 to i96
-%r78 = getelementptr i32, i32* %r4, i32 2
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i96
-%r81 = shl i96 %r80, 64
-%r82 = or i96 %r76, %r81
-%r83 = zext i96 %r82 to i128
-%r85 = getelementptr i32, i32* %r4, i32 3
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i128
-%r88 = shl i128 %r87, 96
-%r89 = or i128 %r83, %r88
-%r90 = zext i128 %r89 to i160
-%r91 = sub i160 %r51, %r90
-%r92 = lshr i160 %r91, 128
-%r93 = trunc i160 %r92 to i1
-br i1%r93, label %carry, label %nocarry
-nocarry:
-%r94 = trunc i160 %r91 to i128
-%r95 = trunc i128 %r94 to i32
-%r97 = getelementptr i32, i32* %r1, i32 0
-store i32 %r95, i32* %r97
-%r98 = lshr i128 %r94, 32
-%r99 = trunc i128 %r98 to i32
-%r101 = getelementptr i32, i32* %r1, i32 1
-store i32 %r99, i32* %r101
-%r102 = lshr i128 %r98, 32
-%r103 = trunc i128 %r102 to i32
-%r105 = getelementptr i32, i32* %r1, i32 2
-store i32 %r103, i32* %r105
-%r106 = lshr i128 %r102, 32
-%r107 = trunc i128 %r106 to i32
-%r109 = getelementptr i32, i32* %r1, i32 3
-store i32 %r107, i32* %r109
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = load i32, i32* %r3
-%r28 = zext i32 %r27 to i64
-%r30 = getelementptr i32, i32* %r3, i32 1
-%r31 = load i32, i32* %r30
-%r32 = zext i32 %r31 to i64
-%r33 = shl i64 %r32, 32
-%r34 = or i64 %r28, %r33
-%r35 = zext i64 %r34 to i96
-%r37 = getelementptr i32, i32* %r3, i32 2
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i96
-%r40 = shl i96 %r39, 64
-%r41 = or i96 %r35, %r40
-%r42 = zext i96 %r41 to i128
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i128
-%r47 = shl i128 %r46, 96
-%r48 = or i128 %r42, %r47
-%r49 = add i128 %r26, %r48
-%r50 = load i32, i32* %r4
-%r51 = zext i32 %r50 to i64
-%r53 = getelementptr i32, i32* %r4, i32 1
-%r54 = load i32, i32* %r53
-%r55 = zext i32 %r54 to i64
-%r56 = shl i64 %r55, 32
-%r57 = or i64 %r51, %r56
-%r58 = zext i64 %r57 to i96
-%r60 = getelementptr i32, i32* %r4, i32 2
-%r61 = load i32, i32* %r60
-%r62 = zext i32 %r61 to i96
-%r63 = shl i96 %r62, 64
-%r64 = or i96 %r58, %r63
-%r65 = zext i96 %r64 to i128
-%r67 = getelementptr i32, i32* %r4, i32 3
-%r68 = load i32, i32* %r67
-%r69 = zext i32 %r68 to i128
-%r70 = shl i128 %r69, 96
-%r71 = or i128 %r65, %r70
-%r72 = sub i128 %r49, %r71
-%r73 = lshr i128 %r72, 127
-%r74 = trunc i128 %r73 to i1
-%r75 = select i1 %r74, i128 %r49, i128 %r72
-%r76 = trunc i128 %r75 to i32
-%r78 = getelementptr i32, i32* %r1, i32 0
-store i32 %r76, i32* %r78
-%r79 = lshr i128 %r75, 32
-%r80 = trunc i128 %r79 to i32
-%r82 = getelementptr i32, i32* %r1, i32 1
-store i32 %r80, i32* %r82
-%r83 = lshr i128 %r79, 32
-%r84 = trunc i128 %r83 to i32
-%r86 = getelementptr i32, i32* %r1, i32 2
-store i32 %r84, i32* %r86
-%r87 = lshr i128 %r83, 32
-%r88 = trunc i128 %r87 to i32
-%r90 = getelementptr i32, i32* %r1, i32 3
-store i32 %r88, i32* %r90
-ret void
-}
-define void @mcl_fp_sub4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = load i32, i32* %r3
-%r28 = zext i32 %r27 to i64
-%r30 = getelementptr i32, i32* %r3, i32 1
-%r31 = load i32, i32* %r30
-%r32 = zext i32 %r31 to i64
-%r33 = shl i64 %r32, 32
-%r34 = or i64 %r28, %r33
-%r35 = zext i64 %r34 to i96
-%r37 = getelementptr i32, i32* %r3, i32 2
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i96
-%r40 = shl i96 %r39, 64
-%r41 = or i96 %r35, %r40
-%r42 = zext i96 %r41 to i128
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i128
-%r47 = shl i128 %r46, 96
-%r48 = or i128 %r42, %r47
-%r49 = zext i128 %r26 to i160
-%r50 = zext i128 %r48 to i160
-%r51 = sub i160 %r49, %r50
-%r52 = trunc i160 %r51 to i128
-%r53 = lshr i160 %r51, 128
-%r54 = trunc i160 %r53 to i1
-%r55 = trunc i128 %r52 to i32
-%r57 = getelementptr i32, i32* %r1, i32 0
-store i32 %r55, i32* %r57
-%r58 = lshr i128 %r52, 32
-%r59 = trunc i128 %r58 to i32
-%r61 = getelementptr i32, i32* %r1, i32 1
-store i32 %r59, i32* %r61
-%r62 = lshr i128 %r58, 32
-%r63 = trunc i128 %r62 to i32
-%r65 = getelementptr i32, i32* %r1, i32 2
-store i32 %r63, i32* %r65
-%r66 = lshr i128 %r62, 32
-%r67 = trunc i128 %r66 to i32
-%r69 = getelementptr i32, i32* %r1, i32 3
-store i32 %r67, i32* %r69
-br i1%r54, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r70 = load i32, i32* %r4
-%r71 = zext i32 %r70 to i64
-%r73 = getelementptr i32, i32* %r4, i32 1
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i64
-%r76 = shl i64 %r75, 32
-%r77 = or i64 %r71, %r76
-%r78 = zext i64 %r77 to i96
-%r80 = getelementptr i32, i32* %r4, i32 2
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i96
-%r83 = shl i96 %r82, 64
-%r84 = or i96 %r78, %r83
-%r85 = zext i96 %r84 to i128
-%r87 = getelementptr i32, i32* %r4, i32 3
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i128
-%r90 = shl i128 %r89, 96
-%r91 = or i128 %r85, %r90
-%r92 = add i128 %r52, %r91
-%r93 = trunc i128 %r92 to i32
-%r95 = getelementptr i32, i32* %r1, i32 0
-store i32 %r93, i32* %r95
-%r96 = lshr i128 %r92, 32
-%r97 = trunc i128 %r96 to i32
-%r99 = getelementptr i32, i32* %r1, i32 1
-store i32 %r97, i32* %r99
-%r100 = lshr i128 %r96, 32
-%r101 = trunc i128 %r100 to i32
-%r103 = getelementptr i32, i32* %r1, i32 2
-store i32 %r101, i32* %r103
-%r104 = lshr i128 %r100, 32
-%r105 = trunc i128 %r104 to i32
-%r107 = getelementptr i32, i32* %r1, i32 3
-store i32 %r105, i32* %r107
-ret void
-}
-define void @mcl_fp_subNF4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = load i32, i32* %r3
-%r28 = zext i32 %r27 to i64
-%r30 = getelementptr i32, i32* %r3, i32 1
-%r31 = load i32, i32* %r30
-%r32 = zext i32 %r31 to i64
-%r33 = shl i64 %r32, 32
-%r34 = or i64 %r28, %r33
-%r35 = zext i64 %r34 to i96
-%r37 = getelementptr i32, i32* %r3, i32 2
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i96
-%r40 = shl i96 %r39, 64
-%r41 = or i96 %r35, %r40
-%r42 = zext i96 %r41 to i128
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i128
-%r47 = shl i128 %r46, 96
-%r48 = or i128 %r42, %r47
-%r49 = sub i128 %r26, %r48
-%r50 = lshr i128 %r49, 127
-%r51 = trunc i128 %r50 to i1
-%r52 = load i32, i32* %r4
-%r53 = zext i32 %r52 to i64
-%r55 = getelementptr i32, i32* %r4, i32 1
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i64
-%r58 = shl i64 %r57, 32
-%r59 = or i64 %r53, %r58
-%r60 = zext i64 %r59 to i96
-%r62 = getelementptr i32, i32* %r4, i32 2
-%r63 = load i32, i32* %r62
-%r64 = zext i32 %r63 to i96
-%r65 = shl i96 %r64, 64
-%r66 = or i96 %r60, %r65
-%r67 = zext i96 %r66 to i128
-%r69 = getelementptr i32, i32* %r4, i32 3
-%r70 = load i32, i32* %r69
-%r71 = zext i32 %r70 to i128
-%r72 = shl i128 %r71, 96
-%r73 = or i128 %r67, %r72
-%r75 = select i1 %r51, i128 %r73, i128 0
-%r76 = add i128 %r49, %r75
-%r77 = trunc i128 %r76 to i32
-%r79 = getelementptr i32, i32* %r1, i32 0
-store i32 %r77, i32* %r79
-%r80 = lshr i128 %r76, 32
-%r81 = trunc i128 %r80 to i32
-%r83 = getelementptr i32, i32* %r1, i32 1
-store i32 %r81, i32* %r83
-%r84 = lshr i128 %r80, 32
-%r85 = trunc i128 %r84 to i32
-%r87 = getelementptr i32, i32* %r1, i32 2
-store i32 %r85, i32* %r87
-%r88 = lshr i128 %r84, 32
-%r89 = trunc i128 %r88 to i32
-%r91 = getelementptr i32, i32* %r1, i32 3
-store i32 %r89, i32* %r91
-ret void
-}
-define void @mcl_fpDbl_add4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
+%r8 = getelementptr i32, i32* %r2, i32 1
 %r9 = load i32, i32* %r8
 %r10 = zext i32 %r9 to i64
 %r11 = shl i64 %r10, 32
@@ -3467,114 +2301,190 @@ define void @mcl_fpDbl_add4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r52 = zext i32 %r51 to i256
 %r53 = shl i256 %r52, 224
 %r54 = or i256 %r48, %r53
-%r55 = load i32, i32* %r3
-%r56 = zext i32 %r55 to i64
-%r58 = getelementptr i32, i32* %r3, i32 1
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i64
-%r61 = shl i64 %r60, 32
-%r62 = or i64 %r56, %r61
-%r63 = zext i64 %r62 to i96
-%r65 = getelementptr i32, i32* %r3, i32 2
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i96
-%r68 = shl i96 %r67, 64
-%r69 = or i96 %r63, %r68
-%r70 = zext i96 %r69 to i128
-%r72 = getelementptr i32, i32* %r3, i32 3
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i128
-%r75 = shl i128 %r74, 96
-%r76 = or i128 %r70, %r75
-%r77 = zext i128 %r76 to i160
-%r79 = getelementptr i32, i32* %r3, i32 4
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i160
-%r82 = shl i160 %r81, 128
-%r83 = or i160 %r77, %r82
-%r84 = zext i160 %r83 to i192
-%r86 = getelementptr i32, i32* %r3, i32 5
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
 %r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i192
-%r89 = shl i192 %r88, 160
-%r90 = or i192 %r84, %r89
-%r91 = zext i192 %r90 to i224
-%r93 = getelementptr i32, i32* %r3, i32 6
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
 %r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i224
-%r96 = shl i224 %r95, 192
-%r97 = or i224 %r91, %r96
-%r98 = zext i224 %r97 to i256
-%r100 = getelementptr i32, i32* %r3, i32 7
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
 %r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i256
-%r103 = shl i256 %r102, 224
-%r104 = or i256 %r98, %r103
-%r105 = zext i256 %r54 to i288
-%r106 = zext i256 %r104 to i288
-%r107 = add i288 %r105, %r106
-%r108 = trunc i288 %r107 to i128
-%r109 = trunc i128 %r108 to i32
-%r111 = getelementptr i32, i32* %r1, i32 0
-store i32 %r109, i32* %r111
-%r112 = lshr i128 %r108, 32
-%r113 = trunc i128 %r112 to i32
-%r115 = getelementptr i32, i32* %r1, i32 1
-store i32 %r113, i32* %r115
-%r116 = lshr i128 %r112, 32
-%r117 = trunc i128 %r116 to i32
-%r119 = getelementptr i32, i32* %r1, i32 2
-store i32 %r117, i32* %r119
-%r120 = lshr i128 %r116, 32
-%r121 = trunc i128 %r120 to i32
-%r123 = getelementptr i32, i32* %r1, i32 3
-store i32 %r121, i32* %r123
-%r124 = lshr i288 %r107, 128
-%r125 = trunc i288 %r124 to i160
-%r126 = load i32, i32* %r4
-%r127 = zext i32 %r126 to i64
-%r129 = getelementptr i32, i32* %r4, i32 1
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i64
-%r132 = shl i64 %r131, 32
-%r133 = or i64 %r127, %r132
-%r134 = zext i64 %r133 to i96
-%r136 = getelementptr i32, i32* %r4, i32 2
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i96
-%r139 = shl i96 %r138, 64
-%r140 = or i96 %r134, %r139
-%r141 = zext i96 %r140 to i128
-%r143 = getelementptr i32, i32* %r4, i32 3
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i128
-%r146 = shl i128 %r145, 96
-%r147 = or i128 %r141, %r146
-%r148 = zext i128 %r147 to i160
-%r149 = sub i160 %r125, %r148
-%r150 = lshr i160 %r149, 128
-%r151 = trunc i160 %r150 to i1
-%r152 = select i1 %r151, i160 %r125, i160 %r149
-%r153 = trunc i160 %r152 to i128
-%r155 = getelementptr i32, i32* %r1, i32 4
-%r156 = trunc i128 %r153 to i32
-%r158 = getelementptr i32, i32* %r155, i32 0
-store i32 %r156, i32* %r158
-%r159 = lshr i128 %r153, 32
-%r160 = trunc i128 %r159 to i32
-%r162 = getelementptr i32, i32* %r155, i32 1
-store i32 %r160, i32* %r162
-%r163 = lshr i128 %r159, 32
-%r164 = trunc i128 %r163 to i32
-%r166 = getelementptr i32, i32* %r155, i32 2
-store i32 %r164, i32* %r166
-%r167 = lshr i128 %r163, 32
-%r168 = trunc i128 %r167 to i32
-%r170 = getelementptr i32, i32* %r155, i32 3
-store i32 %r168, i32* %r170
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = zext i384 %r82 to i416
+%r162 = zext i384 %r160 to i416
+%r163 = add i416 %r161, %r162
+%r164 = trunc i416 %r163 to i192
+%r166 = getelementptr i32, i32* %r1, i32 0
+%r167 = trunc i192 %r164 to i32
+store i32 %r167, i32* %r166
+%r168 = lshr i192 %r164, 32
+%r170 = getelementptr i32, i32* %r1, i32 1
+%r171 = trunc i192 %r168 to i32
+store i32 %r171, i32* %r170
+%r172 = lshr i192 %r168, 32
+%r174 = getelementptr i32, i32* %r1, i32 2
+%r175 = trunc i192 %r172 to i32
+store i32 %r175, i32* %r174
+%r176 = lshr i192 %r172, 32
+%r178 = getelementptr i32, i32* %r1, i32 3
+%r179 = trunc i192 %r176 to i32
+store i32 %r179, i32* %r178
+%r180 = lshr i192 %r176, 32
+%r182 = getelementptr i32, i32* %r1, i32 4
+%r183 = trunc i192 %r180 to i32
+store i32 %r183, i32* %r182
+%r184 = lshr i192 %r180, 32
+%r186 = getelementptr i32, i32* %r1, i32 5
+%r187 = trunc i192 %r184 to i32
+store i32 %r187, i32* %r186
+%r188 = lshr i416 %r163, 192
+%r189 = trunc i416 %r188 to i224
+%r190 = load i32, i32* %r4
+%r191 = zext i32 %r190 to i64
+%r193 = getelementptr i32, i32* %r4, i32 1
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i64
+%r196 = shl i64 %r195, 32
+%r197 = or i64 %r191, %r196
+%r198 = zext i64 %r197 to i96
+%r200 = getelementptr i32, i32* %r4, i32 2
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i96
+%r203 = shl i96 %r202, 64
+%r204 = or i96 %r198, %r203
+%r205 = zext i96 %r204 to i128
+%r207 = getelementptr i32, i32* %r4, i32 3
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i128
+%r210 = shl i128 %r209, 96
+%r211 = or i128 %r205, %r210
+%r212 = zext i128 %r211 to i160
+%r214 = getelementptr i32, i32* %r4, i32 4
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i160
+%r217 = shl i160 %r216, 128
+%r218 = or i160 %r212, %r217
+%r219 = zext i160 %r218 to i192
+%r221 = getelementptr i32, i32* %r4, i32 5
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i192
+%r224 = shl i192 %r223, 160
+%r225 = or i192 %r219, %r224
+%r226 = zext i192 %r225 to i224
+%r227 = sub i224 %r189, %r226
+%r228 = lshr i224 %r227, 192
+%r229 = trunc i224 %r228 to i1
+%r230 = select i1 %r229, i224 %r189, i224 %r227
+%r231 = trunc i224 %r230 to i192
+%r233 = getelementptr i32, i32* %r1, i32 6
+%r235 = getelementptr i32, i32* %r233, i32 0
+%r236 = trunc i192 %r231 to i32
+store i32 %r236, i32* %r235
+%r237 = lshr i192 %r231, 32
+%r239 = getelementptr i32, i32* %r233, i32 1
+%r240 = trunc i192 %r237 to i32
+store i32 %r240, i32* %r239
+%r241 = lshr i192 %r237, 32
+%r243 = getelementptr i32, i32* %r233, i32 2
+%r244 = trunc i192 %r241 to i32
+store i32 %r244, i32* %r243
+%r245 = lshr i192 %r241, 32
+%r247 = getelementptr i32, i32* %r233, i32 3
+%r248 = trunc i192 %r245 to i32
+store i32 %r248, i32* %r247
+%r249 = lshr i192 %r245, 32
+%r251 = getelementptr i32, i32* %r233, i32 4
+%r252 = trunc i192 %r249 to i32
+store i32 %r252, i32* %r251
+%r253 = lshr i192 %r249, 32
+%r255 = getelementptr i32, i32* %r233, i32 5
+%r256 = trunc i192 %r253 to i32
+store i32 %r256, i32* %r255
 ret void
 }
-define void @mcl_fpDbl_sub4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fpDbl_sub6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -3619,1535 +2529,1551 @@ define void @mcl_fpDbl_sub4L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r52 = zext i32 %r51 to i256
 %r53 = shl i256 %r52, 224
 %r54 = or i256 %r48, %r53
-%r55 = load i32, i32* %r3
-%r56 = zext i32 %r55 to i64
-%r58 = getelementptr i32, i32* %r3, i32 1
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i64
-%r61 = shl i64 %r60, 32
-%r62 = or i64 %r56, %r61
-%r63 = zext i64 %r62 to i96
-%r65 = getelementptr i32, i32* %r3, i32 2
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i96
-%r68 = shl i96 %r67, 64
-%r69 = or i96 %r63, %r68
-%r70 = zext i96 %r69 to i128
-%r72 = getelementptr i32, i32* %r3, i32 3
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i128
-%r75 = shl i128 %r74, 96
-%r76 = or i128 %r70, %r75
-%r77 = zext i128 %r76 to i160
-%r79 = getelementptr i32, i32* %r3, i32 4
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i160
-%r82 = shl i160 %r81, 128
-%r83 = or i160 %r77, %r82
-%r84 = zext i160 %r83 to i192
-%r86 = getelementptr i32, i32* %r3, i32 5
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
 %r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i192
-%r89 = shl i192 %r88, 160
-%r90 = or i192 %r84, %r89
-%r91 = zext i192 %r90 to i224
-%r93 = getelementptr i32, i32* %r3, i32 6
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
 %r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i224
-%r96 = shl i224 %r95, 192
-%r97 = or i224 %r91, %r96
-%r98 = zext i224 %r97 to i256
-%r100 = getelementptr i32, i32* %r3, i32 7
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
 %r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i256
-%r103 = shl i256 %r102, 224
-%r104 = or i256 %r98, %r103
-%r105 = zext i256 %r54 to i288
-%r106 = zext i256 %r104 to i288
-%r107 = sub i288 %r105, %r106
-%r108 = trunc i288 %r107 to i128
-%r109 = trunc i128 %r108 to i32
-%r111 = getelementptr i32, i32* %r1, i32 0
-store i32 %r109, i32* %r111
-%r112 = lshr i128 %r108, 32
-%r113 = trunc i128 %r112 to i32
-%r115 = getelementptr i32, i32* %r1, i32 1
-store i32 %r113, i32* %r115
-%r116 = lshr i128 %r112, 32
-%r117 = trunc i128 %r116 to i32
-%r119 = getelementptr i32, i32* %r1, i32 2
-store i32 %r117, i32* %r119
-%r120 = lshr i128 %r116, 32
-%r121 = trunc i128 %r120 to i32
-%r123 = getelementptr i32, i32* %r1, i32 3
-store i32 %r121, i32* %r123
-%r124 = lshr i288 %r107, 128
-%r125 = trunc i288 %r124 to i128
-%r126 = lshr i288 %r107, 256
-%r127 = trunc i288 %r126 to i1
-%r128 = load i32, i32* %r4
-%r129 = zext i32 %r128 to i64
-%r131 = getelementptr i32, i32* %r4, i32 1
-%r132 = load i32, i32* %r131
-%r133 = zext i32 %r132 to i64
-%r134 = shl i64 %r133, 32
-%r135 = or i64 %r129, %r134
-%r136 = zext i64 %r135 to i96
-%r138 = getelementptr i32, i32* %r4, i32 2
-%r139 = load i32, i32* %r138
-%r140 = zext i32 %r139 to i96
-%r141 = shl i96 %r140, 64
-%r142 = or i96 %r136, %r141
-%r143 = zext i96 %r142 to i128
-%r145 = getelementptr i32, i32* %r4, i32 3
-%r146 = load i32, i32* %r145
-%r147 = zext i32 %r146 to i128
-%r148 = shl i128 %r147, 96
-%r149 = or i128 %r143, %r148
-%r151 = select i1 %r127, i128 %r149, i128 0
-%r152 = add i128 %r125, %r151
-%r154 = getelementptr i32, i32* %r1, i32 4
-%r155 = trunc i128 %r152 to i32
-%r157 = getelementptr i32, i32* %r154, i32 0
-store i32 %r155, i32* %r157
-%r158 = lshr i128 %r152, 32
-%r159 = trunc i128 %r158 to i32
-%r161 = getelementptr i32, i32* %r154, i32 1
-store i32 %r159, i32* %r161
-%r162 = lshr i128 %r158, 32
-%r163 = trunc i128 %r162 to i32
-%r165 = getelementptr i32, i32* %r154, i32 2
-store i32 %r163, i32* %r165
-%r166 = lshr i128 %r162, 32
-%r167 = trunc i128 %r166 to i32
-%r169 = getelementptr i32, i32* %r154, i32 3
-store i32 %r167, i32* %r169
-ret void
-}
-define i192 @mulPv160x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r24 = zext i32 %r6 to i64
-%r25 = zext i32 %r10 to i64
-%r26 = shl i64 %r25, 32
-%r27 = or i64 %r24, %r26
-%r28 = zext i64 %r27 to i96
-%r29 = zext i32 %r14 to i96
-%r30 = shl i96 %r29, 64
-%r31 = or i96 %r28, %r30
-%r32 = zext i96 %r31 to i128
-%r33 = zext i32 %r18 to i128
-%r34 = shl i128 %r33, 96
-%r35 = or i128 %r32, %r34
-%r36 = zext i128 %r35 to i160
-%r37 = zext i32 %r22 to i160
-%r38 = shl i160 %r37, 128
-%r39 = or i160 %r36, %r38
-%r40 = zext i32 %r7 to i64
-%r41 = zext i32 %r11 to i64
-%r42 = shl i64 %r41, 32
-%r43 = or i64 %r40, %r42
-%r44 = zext i64 %r43 to i96
-%r45 = zext i32 %r15 to i96
-%r46 = shl i96 %r45, 64
-%r47 = or i96 %r44, %r46
-%r48 = zext i96 %r47 to i128
-%r49 = zext i32 %r19 to i128
-%r50 = shl i128 %r49, 96
-%r51 = or i128 %r48, %r50
-%r52 = zext i128 %r51 to i160
-%r53 = zext i32 %r23 to i160
-%r54 = shl i160 %r53, 128
-%r55 = or i160 %r52, %r54
-%r56 = zext i160 %r39 to i192
-%r57 = zext i160 %r55 to i192
-%r58 = shl i192 %r57, 32
-%r59 = add i192 %r56, %r58
-ret i192 %r59
-}
-define void @mcl_fp_mulUnitPre5L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i192 @mulPv160x32(i32* %r2, i32 %r3)
-%r5 = trunc i192 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i192 %r4, 32
-%r9 = trunc i192 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i192 %r8, 32
-%r13 = trunc i192 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i192 %r12, 32
-%r17 = trunc i192 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i192 %r16, 32
-%r21 = trunc i192 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i192 %r20, 32
-%r25 = trunc i192 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-ret void
-}
-define void @mcl_fpDbl_mulPre5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r4 = load i32, i32* %r3
-%r5 = call i192 @mulPv160x32(i32* %r2, i32 %r4)
-%r6 = trunc i192 %r5 to i32
-store i32 %r6, i32* %r1
-%r7 = lshr i192 %r5, 32
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = zext i384 %r82 to i416
+%r162 = zext i384 %r160 to i416
+%r163 = sub i416 %r161, %r162
+%r164 = trunc i416 %r163 to i192
+%r166 = getelementptr i32, i32* %r1, i32 0
+%r167 = trunc i192 %r164 to i32
+store i32 %r167, i32* %r166
+%r168 = lshr i192 %r164, 32
+%r170 = getelementptr i32, i32* %r1, i32 1
+%r171 = trunc i192 %r168 to i32
+store i32 %r171, i32* %r170
+%r172 = lshr i192 %r168, 32
+%r174 = getelementptr i32, i32* %r1, i32 2
+%r175 = trunc i192 %r172 to i32
+store i32 %r175, i32* %r174
+%r176 = lshr i192 %r172, 32
+%r178 = getelementptr i32, i32* %r1, i32 3
+%r179 = trunc i192 %r176 to i32
+store i32 %r179, i32* %r178
+%r180 = lshr i192 %r176, 32
+%r182 = getelementptr i32, i32* %r1, i32 4
+%r183 = trunc i192 %r180 to i32
+store i32 %r183, i32* %r182
+%r184 = lshr i192 %r180, 32
+%r186 = getelementptr i32, i32* %r1, i32 5
+%r187 = trunc i192 %r184 to i32
+store i32 %r187, i32* %r186
+%r188 = lshr i416 %r163, 192
+%r189 = trunc i416 %r188 to i192
+%r190 = lshr i416 %r163, 384
+%r191 = trunc i416 %r190 to i1
+%r192 = load i32, i32* %r4
+%r193 = zext i32 %r192 to i64
+%r195 = getelementptr i32, i32* %r4, i32 1
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i64
+%r198 = shl i64 %r197, 32
+%r199 = or i64 %r193, %r198
+%r200 = zext i64 %r199 to i96
+%r202 = getelementptr i32, i32* %r4, i32 2
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i96
+%r205 = shl i96 %r204, 64
+%r206 = or i96 %r200, %r205
+%r207 = zext i96 %r206 to i128
+%r209 = getelementptr i32, i32* %r4, i32 3
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i128
+%r212 = shl i128 %r211, 96
+%r213 = or i128 %r207, %r212
+%r214 = zext i128 %r213 to i160
+%r216 = getelementptr i32, i32* %r4, i32 4
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i160
+%r219 = shl i160 %r218, 128
+%r220 = or i160 %r214, %r219
+%r221 = zext i160 %r220 to i192
+%r223 = getelementptr i32, i32* %r4, i32 5
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i192
+%r226 = shl i192 %r225, 160
+%r227 = or i192 %r221, %r226
+%r229 = select i1 %r191, i192 %r227, i192 0
+%r230 = add i192 %r189, %r229
+%r232 = getelementptr i32, i32* %r1, i32 6
+%r234 = getelementptr i32, i32* %r232, i32 0
+%r235 = trunc i192 %r230 to i32
+store i32 %r235, i32* %r234
+%r236 = lshr i192 %r230, 32
+%r238 = getelementptr i32, i32* %r232, i32 1
+%r239 = trunc i192 %r236 to i32
+store i32 %r239, i32* %r238
+%r240 = lshr i192 %r236, 32
+%r242 = getelementptr i32, i32* %r232, i32 2
+%r243 = trunc i192 %r240 to i32
+store i32 %r243, i32* %r242
+%r244 = lshr i192 %r240, 32
+%r246 = getelementptr i32, i32* %r232, i32 3
+%r247 = trunc i192 %r244 to i32
+store i32 %r247, i32* %r246
+%r248 = lshr i192 %r244, 32
+%r250 = getelementptr i32, i32* %r232, i32 4
+%r251 = trunc i192 %r248 to i32
+store i32 %r251, i32* %r250
+%r252 = lshr i192 %r248, 32
+%r254 = getelementptr i32, i32* %r232, i32 5
+%r255 = trunc i192 %r252 to i32
+store i32 %r255, i32* %r254
+ret void
+}
+define i256 @mulPv224x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r32 = zext i32 %r6 to i64
+%r33 = zext i32 %r10 to i64
+%r34 = shl i64 %r33, 32
+%r35 = or i64 %r32, %r34
+%r36 = zext i64 %r35 to i96
+%r37 = zext i32 %r14 to i96
+%r38 = shl i96 %r37, 64
+%r39 = or i96 %r36, %r38
+%r40 = zext i96 %r39 to i128
+%r41 = zext i32 %r18 to i128
+%r42 = shl i128 %r41, 96
+%r43 = or i128 %r40, %r42
+%r44 = zext i128 %r43 to i160
+%r45 = zext i32 %r22 to i160
+%r46 = shl i160 %r45, 128
+%r47 = or i160 %r44, %r46
+%r48 = zext i160 %r47 to i192
+%r49 = zext i32 %r26 to i192
+%r50 = shl i192 %r49, 160
+%r51 = or i192 %r48, %r50
+%r52 = zext i192 %r51 to i224
+%r53 = zext i32 %r30 to i224
+%r54 = shl i224 %r53, 192
+%r55 = or i224 %r52, %r54
+%r56 = zext i32 %r7 to i64
+%r57 = zext i32 %r11 to i64
+%r58 = shl i64 %r57, 32
+%r59 = or i64 %r56, %r58
+%r60 = zext i64 %r59 to i96
+%r61 = zext i32 %r15 to i96
+%r62 = shl i96 %r61, 64
+%r63 = or i96 %r60, %r62
+%r64 = zext i96 %r63 to i128
+%r65 = zext i32 %r19 to i128
+%r66 = shl i128 %r65, 96
+%r67 = or i128 %r64, %r66
+%r68 = zext i128 %r67 to i160
+%r69 = zext i32 %r23 to i160
+%r70 = shl i160 %r69, 128
+%r71 = or i160 %r68, %r70
+%r72 = zext i160 %r71 to i192
+%r73 = zext i32 %r27 to i192
+%r74 = shl i192 %r73, 160
+%r75 = or i192 %r72, %r74
+%r76 = zext i192 %r75 to i224
+%r77 = zext i32 %r31 to i224
+%r78 = shl i224 %r77, 192
+%r79 = or i224 %r76, %r78
+%r80 = zext i224 %r55 to i256
+%r81 = zext i224 %r79 to i256
+%r82 = shl i256 %r81, 32
+%r83 = add i256 %r80, %r82
+ret i256 %r83
+}
+define void @mcl_fp_mulUnitPre7L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i256 @mulPv224x32(i32* %r2, i32 %r3)
+%r6 = getelementptr i32, i32* %r1, i32 0
+%r7 = trunc i256 %r4 to i32
+store i32 %r7, i32* %r6
+%r8 = lshr i256 %r4, 32
+%r10 = getelementptr i32, i32* %r1, i32 1
+%r11 = trunc i256 %r8 to i32
+store i32 %r11, i32* %r10
+%r12 = lshr i256 %r8, 32
+%r14 = getelementptr i32, i32* %r1, i32 2
+%r15 = trunc i256 %r12 to i32
+store i32 %r15, i32* %r14
+%r16 = lshr i256 %r12, 32
+%r18 = getelementptr i32, i32* %r1, i32 3
+%r19 = trunc i256 %r16 to i32
+store i32 %r19, i32* %r18
+%r20 = lshr i256 %r16, 32
+%r22 = getelementptr i32, i32* %r1, i32 4
+%r23 = trunc i256 %r20 to i32
+store i32 %r23, i32* %r22
+%r24 = lshr i256 %r20, 32
+%r26 = getelementptr i32, i32* %r1, i32 5
+%r27 = trunc i256 %r24 to i32
+store i32 %r27, i32* %r26
+%r28 = lshr i256 %r24, 32
+%r30 = getelementptr i32, i32* %r1, i32 6
+%r31 = trunc i256 %r28 to i32
+store i32 %r31, i32* %r30
+%r32 = lshr i256 %r28, 32
+%r34 = getelementptr i32, i32* %r1, i32 7
+%r35 = trunc i256 %r32 to i32
+store i32 %r35, i32* %r34
+ret void
+}
+define void @mcl_fpDbl_mulPre7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r4 = load i32, i32* %r3
+%r5 = call i256 @mulPv224x32(i32* %r2, i32 %r4)
+%r6 = trunc i256 %r5 to i32
+store i32 %r6, i32* %r1
+%r7 = lshr i256 %r5, 32
 %r9 = getelementptr i32, i32* %r3, i32 1
 %r10 = load i32, i32* %r9
-%r11 = call i192 @mulPv160x32(i32* %r2, i32 %r10)
-%r12 = add i192 %r7, %r11
-%r13 = trunc i192 %r12 to i32
+%r11 = call i256 @mulPv224x32(i32* %r2, i32 %r10)
+%r12 = add i256 %r7, %r11
+%r13 = trunc i256 %r12 to i32
 %r15 = getelementptr i32, i32* %r1, i32 1
 store i32 %r13, i32* %r15
-%r16 = lshr i192 %r12, 32
+%r16 = lshr i256 %r12, 32
 %r18 = getelementptr i32, i32* %r3, i32 2
 %r19 = load i32, i32* %r18
-%r20 = call i192 @mulPv160x32(i32* %r2, i32 %r19)
-%r21 = add i192 %r16, %r20
-%r22 = trunc i192 %r21 to i32
+%r20 = call i256 @mulPv224x32(i32* %r2, i32 %r19)
+%r21 = add i256 %r16, %r20
+%r22 = trunc i256 %r21 to i32
 %r24 = getelementptr i32, i32* %r1, i32 2
 store i32 %r22, i32* %r24
-%r25 = lshr i192 %r21, 32
+%r25 = lshr i256 %r21, 32
 %r27 = getelementptr i32, i32* %r3, i32 3
 %r28 = load i32, i32* %r27
-%r29 = call i192 @mulPv160x32(i32* %r2, i32 %r28)
-%r30 = add i192 %r25, %r29
-%r31 = trunc i192 %r30 to i32
+%r29 = call i256 @mulPv224x32(i32* %r2, i32 %r28)
+%r30 = add i256 %r25, %r29
+%r31 = trunc i256 %r30 to i32
 %r33 = getelementptr i32, i32* %r1, i32 3
 store i32 %r31, i32* %r33
-%r34 = lshr i192 %r30, 32
+%r34 = lshr i256 %r30, 32
 %r36 = getelementptr i32, i32* %r3, i32 4
 %r37 = load i32, i32* %r36
-%r38 = call i192 @mulPv160x32(i32* %r2, i32 %r37)
-%r39 = add i192 %r34, %r38
-%r41 = getelementptr i32, i32* %r1, i32 4
-%r42 = trunc i192 %r39 to i32
-%r44 = getelementptr i32, i32* %r41, i32 0
-store i32 %r42, i32* %r44
-%r45 = lshr i192 %r39, 32
-%r46 = trunc i192 %r45 to i32
-%r48 = getelementptr i32, i32* %r41, i32 1
-store i32 %r46, i32* %r48
-%r49 = lshr i192 %r45, 32
-%r50 = trunc i192 %r49 to i32
-%r52 = getelementptr i32, i32* %r41, i32 2
-store i32 %r50, i32* %r52
-%r53 = lshr i192 %r49, 32
-%r54 = trunc i192 %r53 to i32
-%r56 = getelementptr i32, i32* %r41, i32 3
-store i32 %r54, i32* %r56
-%r57 = lshr i192 %r53, 32
-%r58 = trunc i192 %r57 to i32
-%r60 = getelementptr i32, i32* %r41, i32 4
-store i32 %r58, i32* %r60
-%r61 = lshr i192 %r57, 32
-%r62 = trunc i192 %r61 to i32
-%r64 = getelementptr i32, i32* %r41, i32 5
-store i32 %r62, i32* %r64
+%r38 = call i256 @mulPv224x32(i32* %r2, i32 %r37)
+%r39 = add i256 %r34, %r38
+%r40 = trunc i256 %r39 to i32
+%r42 = getelementptr i32, i32* %r1, i32 4
+store i32 %r40, i32* %r42
+%r43 = lshr i256 %r39, 32
+%r45 = getelementptr i32, i32* %r3, i32 5
+%r46 = load i32, i32* %r45
+%r47 = call i256 @mulPv224x32(i32* %r2, i32 %r46)
+%r48 = add i256 %r43, %r47
+%r49 = trunc i256 %r48 to i32
+%r51 = getelementptr i32, i32* %r1, i32 5
+store i32 %r49, i32* %r51
+%r52 = lshr i256 %r48, 32
+%r54 = getelementptr i32, i32* %r3, i32 6
+%r55 = load i32, i32* %r54
+%r56 = call i256 @mulPv224x32(i32* %r2, i32 %r55)
+%r57 = add i256 %r52, %r56
+%r59 = getelementptr i32, i32* %r1, i32 6
+%r61 = getelementptr i32, i32* %r59, i32 0
+%r62 = trunc i256 %r57 to i32
+store i32 %r62, i32* %r61
+%r63 = lshr i256 %r57, 32
+%r65 = getelementptr i32, i32* %r59, i32 1
+%r66 = trunc i256 %r63 to i32
+store i32 %r66, i32* %r65
+%r67 = lshr i256 %r63, 32
+%r69 = getelementptr i32, i32* %r59, i32 2
+%r70 = trunc i256 %r67 to i32
+store i32 %r70, i32* %r69
+%r71 = lshr i256 %r67, 32
+%r73 = getelementptr i32, i32* %r59, i32 3
+%r74 = trunc i256 %r71 to i32
+store i32 %r74, i32* %r73
+%r75 = lshr i256 %r71, 32
+%r77 = getelementptr i32, i32* %r59, i32 4
+%r78 = trunc i256 %r75 to i32
+store i32 %r78, i32* %r77
+%r79 = lshr i256 %r75, 32
+%r81 = getelementptr i32, i32* %r59, i32 5
+%r82 = trunc i256 %r79 to i32
+store i32 %r82, i32* %r81
+%r83 = lshr i256 %r79, 32
+%r85 = getelementptr i32, i32* %r59, i32 6
+%r86 = trunc i256 %r83 to i32
+store i32 %r86, i32* %r85
+%r87 = lshr i256 %r83, 32
+%r89 = getelementptr i32, i32* %r59, i32 7
+%r90 = trunc i256 %r87 to i32
+store i32 %r90, i32* %r89
 ret void
 }
-define void @mcl_fpDbl_sqrPre5L(i32* noalias  %r1, i32* noalias  %r2)
+define void @mcl_fpDbl_sqrPre7L(i32* noalias  %r1, i32* noalias  %r2)
 {
 %r3 = load i32, i32* %r2
-%r4 = call i192 @mulPv160x32(i32* %r2, i32 %r3)
-%r5 = trunc i192 %r4 to i32
+%r4 = call i256 @mulPv224x32(i32* %r2, i32 %r3)
+%r5 = trunc i256 %r4 to i32
 store i32 %r5, i32* %r1
-%r6 = lshr i192 %r4, 32
+%r6 = lshr i256 %r4, 32
 %r8 = getelementptr i32, i32* %r2, i32 1
 %r9 = load i32, i32* %r8
-%r10 = call i192 @mulPv160x32(i32* %r2, i32 %r9)
-%r11 = add i192 %r6, %r10
-%r12 = trunc i192 %r11 to i32
+%r10 = call i256 @mulPv224x32(i32* %r2, i32 %r9)
+%r11 = add i256 %r6, %r10
+%r12 = trunc i256 %r11 to i32
 %r14 = getelementptr i32, i32* %r1, i32 1
 store i32 %r12, i32* %r14
-%r15 = lshr i192 %r11, 32
+%r15 = lshr i256 %r11, 32
 %r17 = getelementptr i32, i32* %r2, i32 2
 %r18 = load i32, i32* %r17
-%r19 = call i192 @mulPv160x32(i32* %r2, i32 %r18)
-%r20 = add i192 %r15, %r19
-%r21 = trunc i192 %r20 to i32
+%r19 = call i256 @mulPv224x32(i32* %r2, i32 %r18)
+%r20 = add i256 %r15, %r19
+%r21 = trunc i256 %r20 to i32
 %r23 = getelementptr i32, i32* %r1, i32 2
 store i32 %r21, i32* %r23
-%r24 = lshr i192 %r20, 32
+%r24 = lshr i256 %r20, 32
 %r26 = getelementptr i32, i32* %r2, i32 3
 %r27 = load i32, i32* %r26
-%r28 = call i192 @mulPv160x32(i32* %r2, i32 %r27)
-%r29 = add i192 %r24, %r28
-%r30 = trunc i192 %r29 to i32
-%r32 = getelementptr i32, i32* %r1, i32 3
-store i32 %r30, i32* %r32
-%r33 = lshr i192 %r29, 32
+%r28 = call i256 @mulPv224x32(i32* %r2, i32 %r27)
+%r29 = add i256 %r24, %r28
+%r30 = trunc i256 %r29 to i32
+%r32 = getelementptr i32, i32* %r1, i32 3
+store i32 %r30, i32* %r32
+%r33 = lshr i256 %r29, 32
 %r35 = getelementptr i32, i32* %r2, i32 4
 %r36 = load i32, i32* %r35
-%r37 = call i192 @mulPv160x32(i32* %r2, i32 %r36)
-%r38 = add i192 %r33, %r37
-%r40 = getelementptr i32, i32* %r1, i32 4
-%r41 = trunc i192 %r38 to i32
-%r43 = getelementptr i32, i32* %r40, i32 0
-store i32 %r41, i32* %r43
-%r44 = lshr i192 %r38, 32
-%r45 = trunc i192 %r44 to i32
-%r47 = getelementptr i32, i32* %r40, i32 1
-store i32 %r45, i32* %r47
-%r48 = lshr i192 %r44, 32
-%r49 = trunc i192 %r48 to i32
-%r51 = getelementptr i32, i32* %r40, i32 2
-store i32 %r49, i32* %r51
-%r52 = lshr i192 %r48, 32
-%r53 = trunc i192 %r52 to i32
-%r55 = getelementptr i32, i32* %r40, i32 3
-store i32 %r53, i32* %r55
-%r56 = lshr i192 %r52, 32
-%r57 = trunc i192 %r56 to i32
-%r59 = getelementptr i32, i32* %r40, i32 4
-store i32 %r57, i32* %r59
-%r60 = lshr i192 %r56, 32
-%r61 = trunc i192 %r60 to i32
-%r63 = getelementptr i32, i32* %r40, i32 5
-store i32 %r61, i32* %r63
+%r37 = call i256 @mulPv224x32(i32* %r2, i32 %r36)
+%r38 = add i256 %r33, %r37
+%r39 = trunc i256 %r38 to i32
+%r41 = getelementptr i32, i32* %r1, i32 4
+store i32 %r39, i32* %r41
+%r42 = lshr i256 %r38, 32
+%r44 = getelementptr i32, i32* %r2, i32 5
+%r45 = load i32, i32* %r44
+%r46 = call i256 @mulPv224x32(i32* %r2, i32 %r45)
+%r47 = add i256 %r42, %r46
+%r48 = trunc i256 %r47 to i32
+%r50 = getelementptr i32, i32* %r1, i32 5
+store i32 %r48, i32* %r50
+%r51 = lshr i256 %r47, 32
+%r53 = getelementptr i32, i32* %r2, i32 6
+%r54 = load i32, i32* %r53
+%r55 = call i256 @mulPv224x32(i32* %r2, i32 %r54)
+%r56 = add i256 %r51, %r55
+%r58 = getelementptr i32, i32* %r1, i32 6
+%r60 = getelementptr i32, i32* %r58, i32 0
+%r61 = trunc i256 %r56 to i32
+store i32 %r61, i32* %r60
+%r62 = lshr i256 %r56, 32
+%r64 = getelementptr i32, i32* %r58, i32 1
+%r65 = trunc i256 %r62 to i32
+store i32 %r65, i32* %r64
+%r66 = lshr i256 %r62, 32
+%r68 = getelementptr i32, i32* %r58, i32 2
+%r69 = trunc i256 %r66 to i32
+store i32 %r69, i32* %r68
+%r70 = lshr i256 %r66, 32
+%r72 = getelementptr i32, i32* %r58, i32 3
+%r73 = trunc i256 %r70 to i32
+store i32 %r73, i32* %r72
+%r74 = lshr i256 %r70, 32
+%r76 = getelementptr i32, i32* %r58, i32 4
+%r77 = trunc i256 %r74 to i32
+store i32 %r77, i32* %r76
+%r78 = lshr i256 %r74, 32
+%r80 = getelementptr i32, i32* %r58, i32 5
+%r81 = trunc i256 %r78 to i32
+store i32 %r81, i32* %r80
+%r82 = lshr i256 %r78, 32
+%r84 = getelementptr i32, i32* %r58, i32 6
+%r85 = trunc i256 %r82 to i32
+store i32 %r85, i32* %r84
+%r86 = lshr i256 %r82, 32
+%r88 = getelementptr i32, i32* %r58, i32 7
+%r89 = trunc i256 %r86 to i32
+store i32 %r89, i32* %r88
 ret void
 }
-define void @mcl_fp_mont5L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+define void @mcl_fp_mont7L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
 {
 %r6 = getelementptr i32, i32* %r4, i32 -1
 %r7 = load i32, i32* %r6
 %r9 = getelementptr i32, i32* %r3, i32 0
 %r10 = load i32, i32* %r9
-%r11 = call i192 @mulPv160x32(i32* %r2, i32 %r10)
-%r12 = zext i192 %r11 to i224
-%r13 = trunc i192 %r11 to i32
+%r11 = call i256 @mulPv224x32(i32* %r2, i32 %r10)
+%r12 = zext i256 %r11 to i288
+%r13 = trunc i256 %r11 to i32
 %r14 = mul i32 %r13, %r7
-%r15 = call i192 @mulPv160x32(i32* %r4, i32 %r14)
-%r16 = zext i192 %r15 to i224
-%r17 = add i224 %r12, %r16
-%r18 = lshr i224 %r17, 32
+%r15 = call i256 @mulPv224x32(i32* %r4, i32 %r14)
+%r16 = zext i256 %r15 to i288
+%r17 = add i288 %r12, %r16
+%r18 = lshr i288 %r17, 32
 %r20 = getelementptr i32, i32* %r3, i32 1
 %r21 = load i32, i32* %r20
-%r22 = call i192 @mulPv160x32(i32* %r2, i32 %r21)
-%r23 = zext i192 %r22 to i224
-%r24 = add i224 %r18, %r23
-%r25 = trunc i224 %r24 to i32
+%r22 = call i256 @mulPv224x32(i32* %r2, i32 %r21)
+%r23 = zext i256 %r22 to i288
+%r24 = add i288 %r18, %r23
+%r25 = trunc i288 %r24 to i32
 %r26 = mul i32 %r25, %r7
-%r27 = call i192 @mulPv160x32(i32* %r4, i32 %r26)
-%r28 = zext i192 %r27 to i224
-%r29 = add i224 %r24, %r28
-%r30 = lshr i224 %r29, 32
+%r27 = call i256 @mulPv224x32(i32* %r4, i32 %r26)
+%r28 = zext i256 %r27 to i288
+%r29 = add i288 %r24, %r28
+%r30 = lshr i288 %r29, 32
 %r32 = getelementptr i32, i32* %r3, i32 2
 %r33 = load i32, i32* %r32
-%r34 = call i192 @mulPv160x32(i32* %r2, i32 %r33)
-%r35 = zext i192 %r34 to i224
-%r36 = add i224 %r30, %r35
-%r37 = trunc i224 %r36 to i32
+%r34 = call i256 @mulPv224x32(i32* %r2, i32 %r33)
+%r35 = zext i256 %r34 to i288
+%r36 = add i288 %r30, %r35
+%r37 = trunc i288 %r36 to i32
 %r38 = mul i32 %r37, %r7
-%r39 = call i192 @mulPv160x32(i32* %r4, i32 %r38)
-%r40 = zext i192 %r39 to i224
-%r41 = add i224 %r36, %r40
-%r42 = lshr i224 %r41, 32
+%r39 = call i256 @mulPv224x32(i32* %r4, i32 %r38)
+%r40 = zext i256 %r39 to i288
+%r41 = add i288 %r36, %r40
+%r42 = lshr i288 %r41, 32
 %r44 = getelementptr i32, i32* %r3, i32 3
 %r45 = load i32, i32* %r44
-%r46 = call i192 @mulPv160x32(i32* %r2, i32 %r45)
-%r47 = zext i192 %r46 to i224
-%r48 = add i224 %r42, %r47
-%r49 = trunc i224 %r48 to i32
+%r46 = call i256 @mulPv224x32(i32* %r2, i32 %r45)
+%r47 = zext i256 %r46 to i288
+%r48 = add i288 %r42, %r47
+%r49 = trunc i288 %r48 to i32
 %r50 = mul i32 %r49, %r7
-%r51 = call i192 @mulPv160x32(i32* %r4, i32 %r50)
-%r52 = zext i192 %r51 to i224
-%r53 = add i224 %r48, %r52
-%r54 = lshr i224 %r53, 32
+%r51 = call i256 @mulPv224x32(i32* %r4, i32 %r50)
+%r52 = zext i256 %r51 to i288
+%r53 = add i288 %r48, %r52
+%r54 = lshr i288 %r53, 32
 %r56 = getelementptr i32, i32* %r3, i32 4
 %r57 = load i32, i32* %r56
-%r58 = call i192 @mulPv160x32(i32* %r2, i32 %r57)
-%r59 = zext i192 %r58 to i224
-%r60 = add i224 %r54, %r59
-%r61 = trunc i224 %r60 to i32
+%r58 = call i256 @mulPv224x32(i32* %r2, i32 %r57)
+%r59 = zext i256 %r58 to i288
+%r60 = add i288 %r54, %r59
+%r61 = trunc i288 %r60 to i32
 %r62 = mul i32 %r61, %r7
-%r63 = call i192 @mulPv160x32(i32* %r4, i32 %r62)
-%r64 = zext i192 %r63 to i224
-%r65 = add i224 %r60, %r64
-%r66 = lshr i224 %r65, 32
-%r67 = trunc i224 %r66 to i192
-%r68 = load i32, i32* %r4
-%r69 = zext i32 %r68 to i64
-%r71 = getelementptr i32, i32* %r4, i32 1
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i64
-%r74 = shl i64 %r73, 32
-%r75 = or i64 %r69, %r74
-%r76 = zext i64 %r75 to i96
-%r78 = getelementptr i32, i32* %r4, i32 2
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i96
-%r81 = shl i96 %r80, 64
-%r82 = or i96 %r76, %r81
-%r83 = zext i96 %r82 to i128
-%r85 = getelementptr i32, i32* %r4, i32 3
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i128
-%r88 = shl i128 %r87, 96
-%r89 = or i128 %r83, %r88
-%r90 = zext i128 %r89 to i160
-%r92 = getelementptr i32, i32* %r4, i32 4
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i160
-%r95 = shl i160 %r94, 128
-%r96 = or i160 %r90, %r95
-%r97 = zext i160 %r96 to i192
-%r98 = sub i192 %r67, %r97
-%r99 = lshr i192 %r98, 160
-%r100 = trunc i192 %r99 to i1
-%r101 = select i1 %r100, i192 %r67, i192 %r98
-%r102 = trunc i192 %r101 to i160
-%r103 = trunc i160 %r102 to i32
-%r105 = getelementptr i32, i32* %r1, i32 0
-store i32 %r103, i32* %r105
-%r106 = lshr i160 %r102, 32
-%r107 = trunc i160 %r106 to i32
-%r109 = getelementptr i32, i32* %r1, i32 1
-store i32 %r107, i32* %r109
-%r110 = lshr i160 %r106, 32
-%r111 = trunc i160 %r110 to i32
-%r113 = getelementptr i32, i32* %r1, i32 2
-store i32 %r111, i32* %r113
-%r114 = lshr i160 %r110, 32
-%r115 = trunc i160 %r114 to i32
-%r117 = getelementptr i32, i32* %r1, i32 3
-store i32 %r115, i32* %r117
-%r118 = lshr i160 %r114, 32
-%r119 = trunc i160 %r118 to i32
-%r121 = getelementptr i32, i32* %r1, i32 4
-store i32 %r119, i32* %r121
+%r63 = call i256 @mulPv224x32(i32* %r4, i32 %r62)
+%r64 = zext i256 %r63 to i288
+%r65 = add i288 %r60, %r64
+%r66 = lshr i288 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i256 @mulPv224x32(i32* %r2, i32 %r69)
+%r71 = zext i256 %r70 to i288
+%r72 = add i288 %r66, %r71
+%r73 = trunc i288 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i256 @mulPv224x32(i32* %r4, i32 %r74)
+%r76 = zext i256 %r75 to i288
+%r77 = add i288 %r72, %r76
+%r78 = lshr i288 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i256 @mulPv224x32(i32* %r2, i32 %r81)
+%r83 = zext i256 %r82 to i288
+%r84 = add i288 %r78, %r83
+%r85 = trunc i288 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i256 @mulPv224x32(i32* %r4, i32 %r86)
+%r88 = zext i256 %r87 to i288
+%r89 = add i288 %r84, %r88
+%r90 = lshr i288 %r89, 32
+%r91 = trunc i288 %r90 to i256
+%r92 = load i32, i32* %r4
+%r93 = zext i32 %r92 to i64
+%r95 = getelementptr i32, i32* %r4, i32 1
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i64
+%r98 = shl i64 %r97, 32
+%r99 = or i64 %r93, %r98
+%r100 = zext i64 %r99 to i96
+%r102 = getelementptr i32, i32* %r4, i32 2
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i96
+%r105 = shl i96 %r104, 64
+%r106 = or i96 %r100, %r105
+%r107 = zext i96 %r106 to i128
+%r109 = getelementptr i32, i32* %r4, i32 3
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i128
+%r112 = shl i128 %r111, 96
+%r113 = or i128 %r107, %r112
+%r114 = zext i128 %r113 to i160
+%r116 = getelementptr i32, i32* %r4, i32 4
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i160
+%r119 = shl i160 %r118, 128
+%r120 = or i160 %r114, %r119
+%r121 = zext i160 %r120 to i192
+%r123 = getelementptr i32, i32* %r4, i32 5
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i192
+%r126 = shl i192 %r125, 160
+%r127 = or i192 %r121, %r126
+%r128 = zext i192 %r127 to i224
+%r130 = getelementptr i32, i32* %r4, i32 6
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i224
+%r133 = shl i224 %r132, 192
+%r134 = or i224 %r128, %r133
+%r135 = zext i224 %r134 to i256
+%r136 = sub i256 %r91, %r135
+%r137 = lshr i256 %r136, 224
+%r138 = trunc i256 %r137 to i1
+%r139 = select i1 %r138, i256 %r91, i256 %r136
+%r140 = trunc i256 %r139 to i224
+%r142 = getelementptr i32, i32* %r1, i32 0
+%r143 = trunc i224 %r140 to i32
+store i32 %r143, i32* %r142
+%r144 = lshr i224 %r140, 32
+%r146 = getelementptr i32, i32* %r1, i32 1
+%r147 = trunc i224 %r144 to i32
+store i32 %r147, i32* %r146
+%r148 = lshr i224 %r144, 32
+%r150 = getelementptr i32, i32* %r1, i32 2
+%r151 = trunc i224 %r148 to i32
+store i32 %r151, i32* %r150
+%r152 = lshr i224 %r148, 32
+%r154 = getelementptr i32, i32* %r1, i32 3
+%r155 = trunc i224 %r152 to i32
+store i32 %r155, i32* %r154
+%r156 = lshr i224 %r152, 32
+%r158 = getelementptr i32, i32* %r1, i32 4
+%r159 = trunc i224 %r156 to i32
+store i32 %r159, i32* %r158
+%r160 = lshr i224 %r156, 32
+%r162 = getelementptr i32, i32* %r1, i32 5
+%r163 = trunc i224 %r160 to i32
+store i32 %r163, i32* %r162
+%r164 = lshr i224 %r160, 32
+%r166 = getelementptr i32, i32* %r1, i32 6
+%r167 = trunc i224 %r164 to i32
+store i32 %r167, i32* %r166
 ret void
 }
-define void @mcl_fp_montNF5L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+define void @mcl_fp_montNF7L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
 {
 %r6 = getelementptr i32, i32* %r4, i32 -1
 %r7 = load i32, i32* %r6
 %r8 = load i32, i32* %r3
-%r9 = call i192 @mulPv160x32(i32* %r2, i32 %r8)
-%r10 = trunc i192 %r9 to i32
+%r9 = call i256 @mulPv224x32(i32* %r2, i32 %r8)
+%r10 = trunc i256 %r9 to i32
 %r11 = mul i32 %r10, %r7
-%r12 = call i192 @mulPv160x32(i32* %r4, i32 %r11)
-%r13 = add i192 %r9, %r12
-%r14 = lshr i192 %r13, 32
+%r12 = call i256 @mulPv224x32(i32* %r4, i32 %r11)
+%r13 = add i256 %r9, %r12
+%r14 = lshr i256 %r13, 32
 %r16 = getelementptr i32, i32* %r3, i32 1
 %r17 = load i32, i32* %r16
-%r18 = call i192 @mulPv160x32(i32* %r2, i32 %r17)
-%r19 = add i192 %r14, %r18
-%r20 = trunc i192 %r19 to i32
+%r18 = call i256 @mulPv224x32(i32* %r2, i32 %r17)
+%r19 = add i256 %r14, %r18
+%r20 = trunc i256 %r19 to i32
 %r21 = mul i32 %r20, %r7
-%r22 = call i192 @mulPv160x32(i32* %r4, i32 %r21)
-%r23 = add i192 %r19, %r22
-%r24 = lshr i192 %r23, 32
+%r22 = call i256 @mulPv224x32(i32* %r4, i32 %r21)
+%r23 = add i256 %r19, %r22
+%r24 = lshr i256 %r23, 32
 %r26 = getelementptr i32, i32* %r3, i32 2
 %r27 = load i32, i32* %r26
-%r28 = call i192 @mulPv160x32(i32* %r2, i32 %r27)
-%r29 = add i192 %r24, %r28
-%r30 = trunc i192 %r29 to i32
+%r28 = call i256 @mulPv224x32(i32* %r2, i32 %r27)
+%r29 = add i256 %r24, %r28
+%r30 = trunc i256 %r29 to i32
 %r31 = mul i32 %r30, %r7
-%r32 = call i192 @mulPv160x32(i32* %r4, i32 %r31)
-%r33 = add i192 %r29, %r32
-%r34 = lshr i192 %r33, 32
+%r32 = call i256 @mulPv224x32(i32* %r4, i32 %r31)
+%r33 = add i256 %r29, %r32
+%r34 = lshr i256 %r33, 32
 %r36 = getelementptr i32, i32* %r3, i32 3
 %r37 = load i32, i32* %r36
-%r38 = call i192 @mulPv160x32(i32* %r2, i32 %r37)
-%r39 = add i192 %r34, %r38
-%r40 = trunc i192 %r39 to i32
+%r38 = call i256 @mulPv224x32(i32* %r2, i32 %r37)
+%r39 = add i256 %r34, %r38
+%r40 = trunc i256 %r39 to i32
 %r41 = mul i32 %r40, %r7
-%r42 = call i192 @mulPv160x32(i32* %r4, i32 %r41)
-%r43 = add i192 %r39, %r42
-%r44 = lshr i192 %r43, 32
+%r42 = call i256 @mulPv224x32(i32* %r4, i32 %r41)
+%r43 = add i256 %r39, %r42
+%r44 = lshr i256 %r43, 32
 %r46 = getelementptr i32, i32* %r3, i32 4
 %r47 = load i32, i32* %r46
-%r48 = call i192 @mulPv160x32(i32* %r2, i32 %r47)
-%r49 = add i192 %r44, %r48
-%r50 = trunc i192 %r49 to i32
+%r48 = call i256 @mulPv224x32(i32* %r2, i32 %r47)
+%r49 = add i256 %r44, %r48
+%r50 = trunc i256 %r49 to i32
 %r51 = mul i32 %r50, %r7
-%r52 = call i192 @mulPv160x32(i32* %r4, i32 %r51)
-%r53 = add i192 %r49, %r52
-%r54 = lshr i192 %r53, 32
-%r55 = trunc i192 %r54 to i160
-%r56 = load i32, i32* %r4
-%r57 = zext i32 %r56 to i64
-%r59 = getelementptr i32, i32* %r4, i32 1
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i64
-%r62 = shl i64 %r61, 32
-%r63 = or i64 %r57, %r62
-%r64 = zext i64 %r63 to i96
-%r66 = getelementptr i32, i32* %r4, i32 2
+%r52 = call i256 @mulPv224x32(i32* %r4, i32 %r51)
+%r53 = add i256 %r49, %r52
+%r54 = lshr i256 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i256 @mulPv224x32(i32* %r2, i32 %r57)
+%r59 = add i256 %r54, %r58
+%r60 = trunc i256 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i256 @mulPv224x32(i32* %r4, i32 %r61)
+%r63 = add i256 %r59, %r62
+%r64 = lshr i256 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
 %r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i96
-%r69 = shl i96 %r68, 64
-%r70 = or i96 %r64, %r69
-%r71 = zext i96 %r70 to i128
-%r73 = getelementptr i32, i32* %r4, i32 3
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i128
-%r76 = shl i128 %r75, 96
-%r77 = or i128 %r71, %r76
-%r78 = zext i128 %r77 to i160
-%r80 = getelementptr i32, i32* %r4, i32 4
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i160
-%r83 = shl i160 %r82, 128
-%r84 = or i160 %r78, %r83
-%r85 = sub i160 %r55, %r84
-%r86 = lshr i160 %r85, 159
-%r87 = trunc i160 %r86 to i1
-%r88 = select i1 %r87, i160 %r55, i160 %r85
-%r89 = trunc i160 %r88 to i32
-%r91 = getelementptr i32, i32* %r1, i32 0
-store i32 %r89, i32* %r91
-%r92 = lshr i160 %r88, 32
-%r93 = trunc i160 %r92 to i32
-%r95 = getelementptr i32, i32* %r1, i32 1
-store i32 %r93, i32* %r95
-%r96 = lshr i160 %r92, 32
-%r97 = trunc i160 %r96 to i32
-%r99 = getelementptr i32, i32* %r1, i32 2
-store i32 %r97, i32* %r99
-%r100 = lshr i160 %r96, 32
-%r101 = trunc i160 %r100 to i32
-%r103 = getelementptr i32, i32* %r1, i32 3
-store i32 %r101, i32* %r103
-%r104 = lshr i160 %r100, 32
-%r105 = trunc i160 %r104 to i32
-%r107 = getelementptr i32, i32* %r1, i32 4
-store i32 %r105, i32* %r107
-ret void
-}
-define void @mcl_fp_montRed5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = load i32, i32* %r2
-%r37 = zext i32 %r36 to i64
-%r39 = getelementptr i32, i32* %r2, i32 1
-%r40 = load i32, i32* %r39
-%r41 = zext i32 %r40 to i64
-%r42 = shl i64 %r41, 32
-%r43 = or i64 %r37, %r42
-%r44 = zext i64 %r43 to i96
-%r46 = getelementptr i32, i32* %r2, i32 2
-%r47 = load i32, i32* %r46
-%r48 = zext i32 %r47 to i96
-%r49 = shl i96 %r48, 64
-%r50 = or i96 %r44, %r49
-%r51 = zext i96 %r50 to i128
-%r53 = getelementptr i32, i32* %r2, i32 3
+%r68 = call i256 @mulPv224x32(i32* %r2, i32 %r67)
+%r69 = add i256 %r64, %r68
+%r70 = trunc i256 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i256 @mulPv224x32(i32* %r4, i32 %r71)
+%r73 = add i256 %r69, %r72
+%r74 = lshr i256 %r73, 32
+%r75 = trunc i256 %r74 to i224
+%r76 = load i32, i32* %r4
+%r77 = zext i32 %r76 to i64
+%r79 = getelementptr i32, i32* %r4, i32 1
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i64
+%r82 = shl i64 %r81, 32
+%r83 = or i64 %r77, %r82
+%r84 = zext i64 %r83 to i96
+%r86 = getelementptr i32, i32* %r4, i32 2
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i96
+%r89 = shl i96 %r88, 64
+%r90 = or i96 %r84, %r89
+%r91 = zext i96 %r90 to i128
+%r93 = getelementptr i32, i32* %r4, i32 3
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i128
+%r96 = shl i128 %r95, 96
+%r97 = or i128 %r91, %r96
+%r98 = zext i128 %r97 to i160
+%r100 = getelementptr i32, i32* %r4, i32 4
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i160
+%r103 = shl i160 %r102, 128
+%r104 = or i160 %r98, %r103
+%r105 = zext i160 %r104 to i192
+%r107 = getelementptr i32, i32* %r4, i32 5
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i192
+%r110 = shl i192 %r109, 160
+%r111 = or i192 %r105, %r110
+%r112 = zext i192 %r111 to i224
+%r114 = getelementptr i32, i32* %r4, i32 6
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i224
+%r117 = shl i224 %r116, 192
+%r118 = or i224 %r112, %r117
+%r119 = sub i224 %r75, %r118
+%r120 = lshr i224 %r119, 223
+%r121 = trunc i224 %r120 to i1
+%r122 = select i1 %r121, i224 %r75, i224 %r119
+%r124 = getelementptr i32, i32* %r1, i32 0
+%r125 = trunc i224 %r122 to i32
+store i32 %r125, i32* %r124
+%r126 = lshr i224 %r122, 32
+%r128 = getelementptr i32, i32* %r1, i32 1
+%r129 = trunc i224 %r126 to i32
+store i32 %r129, i32* %r128
+%r130 = lshr i224 %r126, 32
+%r132 = getelementptr i32, i32* %r1, i32 2
+%r133 = trunc i224 %r130 to i32
+store i32 %r133, i32* %r132
+%r134 = lshr i224 %r130, 32
+%r136 = getelementptr i32, i32* %r1, i32 3
+%r137 = trunc i224 %r134 to i32
+store i32 %r137, i32* %r136
+%r138 = lshr i224 %r134, 32
+%r140 = getelementptr i32, i32* %r1, i32 4
+%r141 = trunc i224 %r138 to i32
+store i32 %r141, i32* %r140
+%r142 = lshr i224 %r138, 32
+%r144 = getelementptr i32, i32* %r1, i32 5
+%r145 = trunc i224 %r142 to i32
+store i32 %r145, i32* %r144
+%r146 = lshr i224 %r142, 32
+%r148 = getelementptr i32, i32* %r1, i32 6
+%r149 = trunc i224 %r146 to i32
+store i32 %r149, i32* %r148
+ret void
+}
+define void @mcl_fp_montRed7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = load i32, i32* %r2
+%r51 = zext i32 %r50 to i64
+%r53 = getelementptr i32, i32* %r2, i32 1
 %r54 = load i32, i32* %r53
-%r55 = zext i32 %r54 to i128
-%r56 = shl i128 %r55, 96
-%r57 = or i128 %r51, %r56
-%r58 = zext i128 %r57 to i160
-%r60 = getelementptr i32, i32* %r2, i32 4
+%r55 = zext i32 %r54 to i64
+%r56 = shl i64 %r55, 32
+%r57 = or i64 %r51, %r56
+%r58 = zext i64 %r57 to i96
+%r60 = getelementptr i32, i32* %r2, i32 2
 %r61 = load i32, i32* %r60
-%r62 = zext i32 %r61 to i160
-%r63 = shl i160 %r62, 128
-%r64 = or i160 %r58, %r63
-%r65 = zext i160 %r64 to i192
-%r67 = getelementptr i32, i32* %r2, i32 5
+%r62 = zext i32 %r61 to i96
+%r63 = shl i96 %r62, 64
+%r64 = or i96 %r58, %r63
+%r65 = zext i96 %r64 to i128
+%r67 = getelementptr i32, i32* %r2, i32 3
 %r68 = load i32, i32* %r67
-%r69 = zext i32 %r68 to i192
-%r70 = shl i192 %r69, 160
-%r71 = or i192 %r65, %r70
-%r72 = zext i192 %r71 to i224
-%r74 = getelementptr i32, i32* %r2, i32 6
+%r69 = zext i32 %r68 to i128
+%r70 = shl i128 %r69, 96
+%r71 = or i128 %r65, %r70
+%r72 = zext i128 %r71 to i160
+%r74 = getelementptr i32, i32* %r2, i32 4
 %r75 = load i32, i32* %r74
-%r76 = zext i32 %r75 to i224
-%r77 = shl i224 %r76, 192
-%r78 = or i224 %r72, %r77
-%r79 = zext i224 %r78 to i256
-%r81 = getelementptr i32, i32* %r2, i32 7
+%r76 = zext i32 %r75 to i160
+%r77 = shl i160 %r76, 128
+%r78 = or i160 %r72, %r77
+%r79 = zext i160 %r78 to i192
+%r81 = getelementptr i32, i32* %r2, i32 5
 %r82 = load i32, i32* %r81
-%r83 = zext i32 %r82 to i256
-%r84 = shl i256 %r83, 224
-%r85 = or i256 %r79, %r84
-%r86 = zext i256 %r85 to i288
-%r88 = getelementptr i32, i32* %r2, i32 8
+%r83 = zext i32 %r82 to i192
+%r84 = shl i192 %r83, 160
+%r85 = or i192 %r79, %r84
+%r86 = zext i192 %r85 to i224
+%r88 = getelementptr i32, i32* %r2, i32 6
 %r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i288
-%r91 = shl i288 %r90, 256
-%r92 = or i288 %r86, %r91
-%r93 = zext i288 %r92 to i320
-%r95 = getelementptr i32, i32* %r2, i32 9
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i320
-%r98 = shl i320 %r97, 288
-%r99 = or i320 %r93, %r98
-%r100 = zext i320 %r99 to i352
-%r101 = trunc i352 %r100 to i32
-%r102 = mul i32 %r101, %r6
-%r103 = call i192 @mulPv160x32(i32* %r3, i32 %r102)
-%r104 = zext i192 %r103 to i352
-%r105 = add i352 %r100, %r104
-%r106 = lshr i352 %r105, 32
-%r107 = trunc i352 %r106 to i320
-%r108 = trunc i320 %r107 to i32
-%r109 = mul i32 %r108, %r6
-%r110 = call i192 @mulPv160x32(i32* %r3, i32 %r109)
-%r111 = zext i192 %r110 to i320
-%r112 = add i320 %r107, %r111
-%r113 = lshr i320 %r112, 32
-%r114 = trunc i320 %r113 to i288
-%r115 = trunc i288 %r114 to i32
-%r116 = mul i32 %r115, %r6
-%r117 = call i192 @mulPv160x32(i32* %r3, i32 %r116)
-%r118 = zext i192 %r117 to i288
-%r119 = add i288 %r114, %r118
-%r120 = lshr i288 %r119, 32
-%r121 = trunc i288 %r120 to i256
-%r122 = trunc i256 %r121 to i32
-%r123 = mul i32 %r122, %r6
-%r124 = call i192 @mulPv160x32(i32* %r3, i32 %r123)
-%r125 = zext i192 %r124 to i256
-%r126 = add i256 %r121, %r125
-%r127 = lshr i256 %r126, 32
-%r128 = trunc i256 %r127 to i224
-%r129 = trunc i224 %r128 to i32
-%r130 = mul i32 %r129, %r6
-%r131 = call i192 @mulPv160x32(i32* %r3, i32 %r130)
-%r132 = zext i192 %r131 to i224
-%r133 = add i224 %r128, %r132
-%r134 = lshr i224 %r133, 32
-%r135 = trunc i224 %r134 to i192
-%r136 = zext i160 %r35 to i192
-%r137 = sub i192 %r135, %r136
-%r138 = lshr i192 %r137, 160
-%r139 = trunc i192 %r138 to i1
-%r140 = select i1 %r139, i192 %r135, i192 %r137
-%r141 = trunc i192 %r140 to i160
-%r142 = trunc i160 %r141 to i32
-%r144 = getelementptr i32, i32* %r1, i32 0
-store i32 %r142, i32* %r144
-%r145 = lshr i160 %r141, 32
-%r146 = trunc i160 %r145 to i32
-%r148 = getelementptr i32, i32* %r1, i32 1
-store i32 %r146, i32* %r148
-%r149 = lshr i160 %r145, 32
-%r150 = trunc i160 %r149 to i32
-%r152 = getelementptr i32, i32* %r1, i32 2
-store i32 %r150, i32* %r152
-%r153 = lshr i160 %r149, 32
-%r154 = trunc i160 %r153 to i32
-%r156 = getelementptr i32, i32* %r1, i32 3
-store i32 %r154, i32* %r156
-%r157 = lshr i160 %r153, 32
-%r158 = trunc i160 %r157 to i32
-%r160 = getelementptr i32, i32* %r1, i32 4
-store i32 %r158, i32* %r160
+%r90 = zext i32 %r89 to i224
+%r91 = shl i224 %r90, 192
+%r92 = or i224 %r86, %r91
+%r93 = trunc i224 %r92 to i32
+%r94 = mul i32 %r93, %r6
+%r95 = call i256 @mulPv224x32(i32* %r3, i32 %r94)
+%r97 = getelementptr i32, i32* %r2, i32 7
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i256
+%r100 = shl i256 %r99, 224
+%r101 = zext i224 %r92 to i256
+%r102 = or i256 %r100, %r101
+%r103 = zext i256 %r102 to i288
+%r104 = zext i256 %r95 to i288
+%r105 = add i288 %r103, %r104
+%r106 = lshr i288 %r105, 32
+%r107 = trunc i288 %r106 to i256
+%r108 = lshr i256 %r107, 224
+%r109 = trunc i256 %r108 to i32
+%r110 = trunc i256 %r107 to i224
+%r111 = trunc i224 %r110 to i32
+%r112 = mul i32 %r111, %r6
+%r113 = call i256 @mulPv224x32(i32* %r3, i32 %r112)
+%r114 = zext i32 %r109 to i256
+%r115 = shl i256 %r114, 224
+%r116 = add i256 %r113, %r115
+%r118 = getelementptr i32, i32* %r2, i32 8
+%r119 = load i32, i32* %r118
+%r120 = zext i32 %r119 to i256
+%r121 = shl i256 %r120, 224
+%r122 = zext i224 %r110 to i256
+%r123 = or i256 %r121, %r122
+%r124 = zext i256 %r123 to i288
+%r125 = zext i256 %r116 to i288
+%r126 = add i288 %r124, %r125
+%r127 = lshr i288 %r126, 32
+%r128 = trunc i288 %r127 to i256
+%r129 = lshr i256 %r128, 224
+%r130 = trunc i256 %r129 to i32
+%r131 = trunc i256 %r128 to i224
+%r132 = trunc i224 %r131 to i32
+%r133 = mul i32 %r132, %r6
+%r134 = call i256 @mulPv224x32(i32* %r3, i32 %r133)
+%r135 = zext i32 %r130 to i256
+%r136 = shl i256 %r135, 224
+%r137 = add i256 %r134, %r136
+%r139 = getelementptr i32, i32* %r2, i32 9
+%r140 = load i32, i32* %r139
+%r141 = zext i32 %r140 to i256
+%r142 = shl i256 %r141, 224
+%r143 = zext i224 %r131 to i256
+%r144 = or i256 %r142, %r143
+%r145 = zext i256 %r144 to i288
+%r146 = zext i256 %r137 to i288
+%r147 = add i288 %r145, %r146
+%r148 = lshr i288 %r147, 32
+%r149 = trunc i288 %r148 to i256
+%r150 = lshr i256 %r149, 224
+%r151 = trunc i256 %r150 to i32
+%r152 = trunc i256 %r149 to i224
+%r153 = trunc i224 %r152 to i32
+%r154 = mul i32 %r153, %r6
+%r155 = call i256 @mulPv224x32(i32* %r3, i32 %r154)
+%r156 = zext i32 %r151 to i256
+%r157 = shl i256 %r156, 224
+%r158 = add i256 %r155, %r157
+%r160 = getelementptr i32, i32* %r2, i32 10
+%r161 = load i32, i32* %r160
+%r162 = zext i32 %r161 to i256
+%r163 = shl i256 %r162, 224
+%r164 = zext i224 %r152 to i256
+%r165 = or i256 %r163, %r164
+%r166 = zext i256 %r165 to i288
+%r167 = zext i256 %r158 to i288
+%r168 = add i288 %r166, %r167
+%r169 = lshr i288 %r168, 32
+%r170 = trunc i288 %r169 to i256
+%r171 = lshr i256 %r170, 224
+%r172 = trunc i256 %r171 to i32
+%r173 = trunc i256 %r170 to i224
+%r174 = trunc i224 %r173 to i32
+%r175 = mul i32 %r174, %r6
+%r176 = call i256 @mulPv224x32(i32* %r3, i32 %r175)
+%r177 = zext i32 %r172 to i256
+%r178 = shl i256 %r177, 224
+%r179 = add i256 %r176, %r178
+%r181 = getelementptr i32, i32* %r2, i32 11
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i256
+%r184 = shl i256 %r183, 224
+%r185 = zext i224 %r173 to i256
+%r186 = or i256 %r184, %r185
+%r187 = zext i256 %r186 to i288
+%r188 = zext i256 %r179 to i288
+%r189 = add i288 %r187, %r188
+%r190 = lshr i288 %r189, 32
+%r191 = trunc i288 %r190 to i256
+%r192 = lshr i256 %r191, 224
+%r193 = trunc i256 %r192 to i32
+%r194 = trunc i256 %r191 to i224
+%r195 = trunc i224 %r194 to i32
+%r196 = mul i32 %r195, %r6
+%r197 = call i256 @mulPv224x32(i32* %r3, i32 %r196)
+%r198 = zext i32 %r193 to i256
+%r199 = shl i256 %r198, 224
+%r200 = add i256 %r197, %r199
+%r202 = getelementptr i32, i32* %r2, i32 12
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i256
+%r205 = shl i256 %r204, 224
+%r206 = zext i224 %r194 to i256
+%r207 = or i256 %r205, %r206
+%r208 = zext i256 %r207 to i288
+%r209 = zext i256 %r200 to i288
+%r210 = add i288 %r208, %r209
+%r211 = lshr i288 %r210, 32
+%r212 = trunc i288 %r211 to i256
+%r213 = lshr i256 %r212, 224
+%r214 = trunc i256 %r213 to i32
+%r215 = trunc i256 %r212 to i224
+%r216 = trunc i224 %r215 to i32
+%r217 = mul i32 %r216, %r6
+%r218 = call i256 @mulPv224x32(i32* %r3, i32 %r217)
+%r219 = zext i32 %r214 to i256
+%r220 = shl i256 %r219, 224
+%r221 = add i256 %r218, %r220
+%r223 = getelementptr i32, i32* %r2, i32 13
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i256
+%r226 = shl i256 %r225, 224
+%r227 = zext i224 %r215 to i256
+%r228 = or i256 %r226, %r227
+%r229 = zext i256 %r228 to i288
+%r230 = zext i256 %r221 to i288
+%r231 = add i288 %r229, %r230
+%r232 = lshr i288 %r231, 32
+%r233 = trunc i288 %r232 to i256
+%r234 = lshr i256 %r233, 224
+%r235 = trunc i256 %r234 to i32
+%r236 = trunc i256 %r233 to i224
+%r237 = zext i224 %r49 to i256
+%r238 = zext i224 %r236 to i256
+%r239 = sub i256 %r238, %r237
+%r240 = lshr i256 %r239, 224
+%r241 = trunc i256 %r240 to i1
+%r242 = select i1 %r241, i256 %r238, i256 %r239
+%r243 = trunc i256 %r242 to i224
+%r245 = getelementptr i32, i32* %r1, i32 0
+%r246 = trunc i224 %r243 to i32
+store i32 %r246, i32* %r245
+%r247 = lshr i224 %r243, 32
+%r249 = getelementptr i32, i32* %r1, i32 1
+%r250 = trunc i224 %r247 to i32
+store i32 %r250, i32* %r249
+%r251 = lshr i224 %r247, 32
+%r253 = getelementptr i32, i32* %r1, i32 2
+%r254 = trunc i224 %r251 to i32
+store i32 %r254, i32* %r253
+%r255 = lshr i224 %r251, 32
+%r257 = getelementptr i32, i32* %r1, i32 3
+%r258 = trunc i224 %r255 to i32
+store i32 %r258, i32* %r257
+%r259 = lshr i224 %r255, 32
+%r261 = getelementptr i32, i32* %r1, i32 4
+%r262 = trunc i224 %r259 to i32
+store i32 %r262, i32* %r261
+%r263 = lshr i224 %r259, 32
+%r265 = getelementptr i32, i32* %r1, i32 5
+%r266 = trunc i224 %r263 to i32
+store i32 %r266, i32* %r265
+%r267 = lshr i224 %r263, 32
+%r269 = getelementptr i32, i32* %r1, i32 6
+%r270 = trunc i224 %r267 to i32
+store i32 %r270, i32* %r269
 ret void
 }
-define i32 @mcl_fp_addPre5L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_montRedNF7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
 {
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r35 = load i32, i32* %r4
-%r36 = zext i32 %r35 to i64
-%r38 = getelementptr i32, i32* %r4, i32 1
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
 %r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i64
-%r41 = shl i64 %r40, 32
-%r42 = or i64 %r36, %r41
-%r43 = zext i64 %r42 to i96
-%r45 = getelementptr i32, i32* %r4, i32 2
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
 %r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i96
-%r48 = shl i96 %r47, 64
-%r49 = or i96 %r43, %r48
-%r50 = zext i96 %r49 to i128
-%r52 = getelementptr i32, i32* %r4, i32 3
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i128
-%r55 = shl i128 %r54, 96
-%r56 = or i128 %r50, %r55
-%r57 = zext i128 %r56 to i160
-%r59 = getelementptr i32, i32* %r4, i32 4
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i160
-%r62 = shl i160 %r61, 128
-%r63 = or i160 %r57, %r62
-%r64 = zext i160 %r63 to i192
-%r65 = add i192 %r34, %r64
-%r66 = trunc i192 %r65 to i160
-%r67 = trunc i160 %r66 to i32
-%r69 = getelementptr i32, i32* %r2, i32 0
-store i32 %r67, i32* %r69
-%r70 = lshr i160 %r66, 32
-%r71 = trunc i160 %r70 to i32
-%r73 = getelementptr i32, i32* %r2, i32 1
-store i32 %r71, i32* %r73
-%r74 = lshr i160 %r70, 32
-%r75 = trunc i160 %r74 to i32
-%r77 = getelementptr i32, i32* %r2, i32 2
-store i32 %r75, i32* %r77
-%r78 = lshr i160 %r74, 32
-%r79 = trunc i160 %r78 to i32
-%r81 = getelementptr i32, i32* %r2, i32 3
-store i32 %r79, i32* %r81
-%r82 = lshr i160 %r78, 32
-%r83 = trunc i160 %r82 to i32
-%r85 = getelementptr i32, i32* %r2, i32 4
-store i32 %r83, i32* %r85
-%r86 = lshr i192 %r65, 160
-%r87 = trunc i192 %r86 to i32
-ret i32 %r87
-}
-define i32 @mcl_fp_subPre5L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r35 = load i32, i32* %r4
-%r36 = zext i32 %r35 to i64
-%r38 = getelementptr i32, i32* %r4, i32 1
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i64
-%r41 = shl i64 %r40, 32
-%r42 = or i64 %r36, %r41
-%r43 = zext i64 %r42 to i96
-%r45 = getelementptr i32, i32* %r4, i32 2
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i96
-%r48 = shl i96 %r47, 64
-%r49 = or i96 %r43, %r48
-%r50 = zext i96 %r49 to i128
-%r52 = getelementptr i32, i32* %r4, i32 3
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i128
-%r55 = shl i128 %r54, 96
-%r56 = or i128 %r50, %r55
-%r57 = zext i128 %r56 to i160
-%r59 = getelementptr i32, i32* %r4, i32 4
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i160
-%r62 = shl i160 %r61, 128
-%r63 = or i160 %r57, %r62
-%r64 = zext i160 %r63 to i192
-%r65 = sub i192 %r34, %r64
-%r66 = trunc i192 %r65 to i160
-%r67 = trunc i160 %r66 to i32
-%r69 = getelementptr i32, i32* %r2, i32 0
-store i32 %r67, i32* %r69
-%r70 = lshr i160 %r66, 32
-%r71 = trunc i160 %r70 to i32
-%r73 = getelementptr i32, i32* %r2, i32 1
-store i32 %r71, i32* %r73
-%r74 = lshr i160 %r70, 32
-%r75 = trunc i160 %r74 to i32
-%r77 = getelementptr i32, i32* %r2, i32 2
-store i32 %r75, i32* %r77
-%r78 = lshr i160 %r74, 32
-%r79 = trunc i160 %r78 to i32
-%r81 = getelementptr i32, i32* %r2, i32 3
-store i32 %r79, i32* %r81
-%r82 = lshr i160 %r78, 32
-%r83 = trunc i160 %r82 to i32
-%r85 = getelementptr i32, i32* %r2, i32 4
-store i32 %r83, i32* %r85
-%r86 = lshr i192 %r65, 160
-%r87 = trunc i192 %r86 to i32
-%r89 = and i32 %r87, 1
-ret i32 %r89
-}
-define void @mcl_fp_shr1_5L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = lshr i160 %r31, 1
-%r33 = trunc i160 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 0
-store i32 %r33, i32* %r35
-%r36 = lshr i160 %r32, 32
-%r37 = trunc i160 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 1
-store i32 %r37, i32* %r39
-%r40 = lshr i160 %r36, 32
-%r41 = trunc i160 %r40 to i32
-%r43 = getelementptr i32, i32* %r1, i32 2
-store i32 %r41, i32* %r43
-%r44 = lshr i160 %r40, 32
-%r45 = trunc i160 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 3
-store i32 %r45, i32* %r47
-%r48 = lshr i160 %r44, 32
-%r49 = trunc i160 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 4
-store i32 %r49, i32* %r51
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = load i32, i32* %r2
+%r51 = zext i32 %r50 to i64
+%r53 = getelementptr i32, i32* %r2, i32 1
+%r54 = load i32, i32* %r53
+%r55 = zext i32 %r54 to i64
+%r56 = shl i64 %r55, 32
+%r57 = or i64 %r51, %r56
+%r58 = zext i64 %r57 to i96
+%r60 = getelementptr i32, i32* %r2, i32 2
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i96
+%r63 = shl i96 %r62, 64
+%r64 = or i96 %r58, %r63
+%r65 = zext i96 %r64 to i128
+%r67 = getelementptr i32, i32* %r2, i32 3
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i128
+%r70 = shl i128 %r69, 96
+%r71 = or i128 %r65, %r70
+%r72 = zext i128 %r71 to i160
+%r74 = getelementptr i32, i32* %r2, i32 4
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i160
+%r77 = shl i160 %r76, 128
+%r78 = or i160 %r72, %r77
+%r79 = zext i160 %r78 to i192
+%r81 = getelementptr i32, i32* %r2, i32 5
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i192
+%r84 = shl i192 %r83, 160
+%r85 = or i192 %r79, %r84
+%r86 = zext i192 %r85 to i224
+%r88 = getelementptr i32, i32* %r2, i32 6
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i224
+%r91 = shl i224 %r90, 192
+%r92 = or i224 %r86, %r91
+%r93 = trunc i224 %r92 to i32
+%r94 = mul i32 %r93, %r6
+%r95 = call i256 @mulPv224x32(i32* %r3, i32 %r94)
+%r97 = getelementptr i32, i32* %r2, i32 7
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i256
+%r100 = shl i256 %r99, 224
+%r101 = zext i224 %r92 to i256
+%r102 = or i256 %r100, %r101
+%r103 = zext i256 %r102 to i288
+%r104 = zext i256 %r95 to i288
+%r105 = add i288 %r103, %r104
+%r106 = lshr i288 %r105, 32
+%r107 = trunc i288 %r106 to i256
+%r108 = lshr i256 %r107, 224
+%r109 = trunc i256 %r108 to i32
+%r110 = trunc i256 %r107 to i224
+%r111 = trunc i224 %r110 to i32
+%r112 = mul i32 %r111, %r6
+%r113 = call i256 @mulPv224x32(i32* %r3, i32 %r112)
+%r114 = zext i32 %r109 to i256
+%r115 = shl i256 %r114, 224
+%r116 = add i256 %r113, %r115
+%r118 = getelementptr i32, i32* %r2, i32 8
+%r119 = load i32, i32* %r118
+%r120 = zext i32 %r119 to i256
+%r121 = shl i256 %r120, 224
+%r122 = zext i224 %r110 to i256
+%r123 = or i256 %r121, %r122
+%r124 = zext i256 %r123 to i288
+%r125 = zext i256 %r116 to i288
+%r126 = add i288 %r124, %r125
+%r127 = lshr i288 %r126, 32
+%r128 = trunc i288 %r127 to i256
+%r129 = lshr i256 %r128, 224
+%r130 = trunc i256 %r129 to i32
+%r131 = trunc i256 %r128 to i224
+%r132 = trunc i224 %r131 to i32
+%r133 = mul i32 %r132, %r6
+%r134 = call i256 @mulPv224x32(i32* %r3, i32 %r133)
+%r135 = zext i32 %r130 to i256
+%r136 = shl i256 %r135, 224
+%r137 = add i256 %r134, %r136
+%r139 = getelementptr i32, i32* %r2, i32 9
+%r140 = load i32, i32* %r139
+%r141 = zext i32 %r140 to i256
+%r142 = shl i256 %r141, 224
+%r143 = zext i224 %r131 to i256
+%r144 = or i256 %r142, %r143
+%r145 = zext i256 %r144 to i288
+%r146 = zext i256 %r137 to i288
+%r147 = add i288 %r145, %r146
+%r148 = lshr i288 %r147, 32
+%r149 = trunc i288 %r148 to i256
+%r150 = lshr i256 %r149, 224
+%r151 = trunc i256 %r150 to i32
+%r152 = trunc i256 %r149 to i224
+%r153 = trunc i224 %r152 to i32
+%r154 = mul i32 %r153, %r6
+%r155 = call i256 @mulPv224x32(i32* %r3, i32 %r154)
+%r156 = zext i32 %r151 to i256
+%r157 = shl i256 %r156, 224
+%r158 = add i256 %r155, %r157
+%r160 = getelementptr i32, i32* %r2, i32 10
+%r161 = load i32, i32* %r160
+%r162 = zext i32 %r161 to i256
+%r163 = shl i256 %r162, 224
+%r164 = zext i224 %r152 to i256
+%r165 = or i256 %r163, %r164
+%r166 = zext i256 %r165 to i288
+%r167 = zext i256 %r158 to i288
+%r168 = add i288 %r166, %r167
+%r169 = lshr i288 %r168, 32
+%r170 = trunc i288 %r169 to i256
+%r171 = lshr i256 %r170, 224
+%r172 = trunc i256 %r171 to i32
+%r173 = trunc i256 %r170 to i224
+%r174 = trunc i224 %r173 to i32
+%r175 = mul i32 %r174, %r6
+%r176 = call i256 @mulPv224x32(i32* %r3, i32 %r175)
+%r177 = zext i32 %r172 to i256
+%r178 = shl i256 %r177, 224
+%r179 = add i256 %r176, %r178
+%r181 = getelementptr i32, i32* %r2, i32 11
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i256
+%r184 = shl i256 %r183, 224
+%r185 = zext i224 %r173 to i256
+%r186 = or i256 %r184, %r185
+%r187 = zext i256 %r186 to i288
+%r188 = zext i256 %r179 to i288
+%r189 = add i288 %r187, %r188
+%r190 = lshr i288 %r189, 32
+%r191 = trunc i288 %r190 to i256
+%r192 = lshr i256 %r191, 224
+%r193 = trunc i256 %r192 to i32
+%r194 = trunc i256 %r191 to i224
+%r195 = trunc i224 %r194 to i32
+%r196 = mul i32 %r195, %r6
+%r197 = call i256 @mulPv224x32(i32* %r3, i32 %r196)
+%r198 = zext i32 %r193 to i256
+%r199 = shl i256 %r198, 224
+%r200 = add i256 %r197, %r199
+%r202 = getelementptr i32, i32* %r2, i32 12
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i256
+%r205 = shl i256 %r204, 224
+%r206 = zext i224 %r194 to i256
+%r207 = or i256 %r205, %r206
+%r208 = zext i256 %r207 to i288
+%r209 = zext i256 %r200 to i288
+%r210 = add i288 %r208, %r209
+%r211 = lshr i288 %r210, 32
+%r212 = trunc i288 %r211 to i256
+%r213 = lshr i256 %r212, 224
+%r214 = trunc i256 %r213 to i32
+%r215 = trunc i256 %r212 to i224
+%r216 = trunc i224 %r215 to i32
+%r217 = mul i32 %r216, %r6
+%r218 = call i256 @mulPv224x32(i32* %r3, i32 %r217)
+%r219 = zext i32 %r214 to i256
+%r220 = shl i256 %r219, 224
+%r221 = add i256 %r218, %r220
+%r223 = getelementptr i32, i32* %r2, i32 13
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i256
+%r226 = shl i256 %r225, 224
+%r227 = zext i224 %r215 to i256
+%r228 = or i256 %r226, %r227
+%r229 = zext i256 %r228 to i288
+%r230 = zext i256 %r221 to i288
+%r231 = add i288 %r229, %r230
+%r232 = lshr i288 %r231, 32
+%r233 = trunc i288 %r232 to i256
+%r234 = lshr i256 %r233, 224
+%r235 = trunc i256 %r234 to i32
+%r236 = trunc i256 %r233 to i224
+%r237 = sub i224 %r236, %r49
+%r238 = lshr i224 %r237, 223
+%r239 = trunc i224 %r238 to i1
+%r240 = select i1 %r239, i224 %r236, i224 %r237
+%r242 = getelementptr i32, i32* %r1, i32 0
+%r243 = trunc i224 %r240 to i32
+store i32 %r243, i32* %r242
+%r244 = lshr i224 %r240, 32
+%r246 = getelementptr i32, i32* %r1, i32 1
+%r247 = trunc i224 %r244 to i32
+store i32 %r247, i32* %r246
+%r248 = lshr i224 %r244, 32
+%r250 = getelementptr i32, i32* %r1, i32 2
+%r251 = trunc i224 %r248 to i32
+store i32 %r251, i32* %r250
+%r252 = lshr i224 %r248, 32
+%r254 = getelementptr i32, i32* %r1, i32 3
+%r255 = trunc i224 %r252 to i32
+store i32 %r255, i32* %r254
+%r256 = lshr i224 %r252, 32
+%r258 = getelementptr i32, i32* %r1, i32 4
+%r259 = trunc i224 %r256 to i32
+store i32 %r259, i32* %r258
+%r260 = lshr i224 %r256, 32
+%r262 = getelementptr i32, i32* %r1, i32 5
+%r263 = trunc i224 %r260 to i32
+store i32 %r263, i32* %r262
+%r264 = lshr i224 %r260, 32
+%r266 = getelementptr i32, i32* %r1, i32 6
+%r267 = trunc i224 %r264 to i32
+store i32 %r267, i32* %r266
 ret void
 }
-define void @mcl_fp_add5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define i32 @mcl_fp_addPre7L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
-%r5 = load i32, i32* %r2
+%r5 = load i32, i32* %r3
 %r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
+%r8 = getelementptr i32, i32* %r3, i32 1
 %r9 = load i32, i32* %r8
 %r10 = zext i32 %r9 to i64
 %r11 = shl i64 %r10, 32
 %r12 = or i64 %r6, %r11
 %r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
+%r15 = getelementptr i32, i32* %r3, i32 2
 %r16 = load i32, i32* %r15
 %r17 = zext i32 %r16 to i96
 %r18 = shl i96 %r17, 64
 %r19 = or i96 %r13, %r18
 %r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
+%r22 = getelementptr i32, i32* %r3, i32 3
 %r23 = load i32, i32* %r22
 %r24 = zext i32 %r23 to i128
 %r25 = shl i128 %r24, 96
 %r26 = or i128 %r20, %r25
 %r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
+%r29 = getelementptr i32, i32* %r3, i32 4
 %r30 = load i32, i32* %r29
 %r31 = zext i32 %r30 to i160
 %r32 = shl i160 %r31, 128
 %r33 = or i160 %r27, %r32
-%r34 = load i32, i32* %r3
-%r35 = zext i32 %r34 to i64
-%r37 = getelementptr i32, i32* %r3, i32 1
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i64
-%r40 = shl i64 %r39, 32
-%r41 = or i64 %r35, %r40
-%r42 = zext i64 %r41 to i96
-%r44 = getelementptr i32, i32* %r3, i32 2
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i96
-%r47 = shl i96 %r46, 64
-%r48 = or i96 %r42, %r47
-%r49 = zext i96 %r48 to i128
-%r51 = getelementptr i32, i32* %r3, i32 3
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i128
-%r54 = shl i128 %r53, 96
-%r55 = or i128 %r49, %r54
-%r56 = zext i128 %r55 to i160
-%r58 = getelementptr i32, i32* %r3, i32 4
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i160
-%r61 = shl i160 %r60, 128
-%r62 = or i160 %r56, %r61
-%r63 = zext i160 %r33 to i192
-%r64 = zext i160 %r62 to i192
-%r65 = add i192 %r63, %r64
-%r66 = trunc i192 %r65 to i160
-%r67 = trunc i160 %r66 to i32
-%r69 = getelementptr i32, i32* %r1, i32 0
-store i32 %r67, i32* %r69
-%r70 = lshr i160 %r66, 32
-%r71 = trunc i160 %r70 to i32
-%r73 = getelementptr i32, i32* %r1, i32 1
-store i32 %r71, i32* %r73
-%r74 = lshr i160 %r70, 32
-%r75 = trunc i160 %r74 to i32
-%r77 = getelementptr i32, i32* %r1, i32 2
-store i32 %r75, i32* %r77
-%r78 = lshr i160 %r74, 32
-%r79 = trunc i160 %r78 to i32
-%r81 = getelementptr i32, i32* %r1, i32 3
-store i32 %r79, i32* %r81
-%r82 = lshr i160 %r78, 32
-%r83 = trunc i160 %r82 to i32
-%r85 = getelementptr i32, i32* %r1, i32 4
-store i32 %r83, i32* %r85
-%r86 = load i32, i32* %r4
-%r87 = zext i32 %r86 to i64
-%r89 = getelementptr i32, i32* %r4, i32 1
-%r90 = load i32, i32* %r89
-%r91 = zext i32 %r90 to i64
-%r92 = shl i64 %r91, 32
-%r93 = or i64 %r87, %r92
-%r94 = zext i64 %r93 to i96
-%r96 = getelementptr i32, i32* %r4, i32 2
-%r97 = load i32, i32* %r96
-%r98 = zext i32 %r97 to i96
-%r99 = shl i96 %r98, 64
-%r100 = or i96 %r94, %r99
-%r101 = zext i96 %r100 to i128
-%r103 = getelementptr i32, i32* %r4, i32 3
-%r104 = load i32, i32* %r103
-%r105 = zext i32 %r104 to i128
-%r106 = shl i128 %r105, 96
-%r107 = or i128 %r101, %r106
-%r108 = zext i128 %r107 to i160
-%r110 = getelementptr i32, i32* %r4, i32 4
-%r111 = load i32, i32* %r110
-%r112 = zext i32 %r111 to i160
-%r113 = shl i160 %r112, 128
-%r114 = or i160 %r108, %r113
-%r115 = zext i160 %r114 to i192
-%r116 = sub i192 %r65, %r115
-%r117 = lshr i192 %r116, 160
-%r118 = trunc i192 %r117 to i1
-br i1%r118, label %carry, label %nocarry
-nocarry:
-%r119 = trunc i192 %r116 to i160
-%r120 = trunc i160 %r119 to i32
-%r122 = getelementptr i32, i32* %r1, i32 0
-store i32 %r120, i32* %r122
-%r123 = lshr i160 %r119, 32
-%r124 = trunc i160 %r123 to i32
-%r126 = getelementptr i32, i32* %r1, i32 1
-store i32 %r124, i32* %r126
-%r127 = lshr i160 %r123, 32
-%r128 = trunc i160 %r127 to i32
-%r130 = getelementptr i32, i32* %r1, i32 2
-store i32 %r128, i32* %r130
-%r131 = lshr i160 %r127, 32
-%r132 = trunc i160 %r131 to i32
-%r134 = getelementptr i32, i32* %r1, i32 3
-store i32 %r132, i32* %r134
-%r135 = lshr i160 %r131, 32
-%r136 = trunc i160 %r135 to i32
-%r138 = getelementptr i32, i32* %r1, i32 4
-store i32 %r136, i32* %r138
-ret void
-carry:
-ret void
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r49 = load i32, i32* %r4
+%r50 = zext i32 %r49 to i64
+%r52 = getelementptr i32, i32* %r4, i32 1
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i64
+%r55 = shl i64 %r54, 32
+%r56 = or i64 %r50, %r55
+%r57 = zext i64 %r56 to i96
+%r59 = getelementptr i32, i32* %r4, i32 2
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i96
+%r62 = shl i96 %r61, 64
+%r63 = or i96 %r57, %r62
+%r64 = zext i96 %r63 to i128
+%r66 = getelementptr i32, i32* %r4, i32 3
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i128
+%r69 = shl i128 %r68, 96
+%r70 = or i128 %r64, %r69
+%r71 = zext i128 %r70 to i160
+%r73 = getelementptr i32, i32* %r4, i32 4
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i160
+%r76 = shl i160 %r75, 128
+%r77 = or i160 %r71, %r76
+%r78 = zext i160 %r77 to i192
+%r80 = getelementptr i32, i32* %r4, i32 5
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i192
+%r83 = shl i192 %r82, 160
+%r84 = or i192 %r78, %r83
+%r85 = zext i192 %r84 to i224
+%r87 = getelementptr i32, i32* %r4, i32 6
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i224
+%r90 = shl i224 %r89, 192
+%r91 = or i224 %r85, %r90
+%r92 = zext i224 %r91 to i256
+%r93 = add i256 %r48, %r92
+%r94 = trunc i256 %r93 to i224
+%r96 = getelementptr i32, i32* %r2, i32 0
+%r97 = trunc i224 %r94 to i32
+store i32 %r97, i32* %r96
+%r98 = lshr i224 %r94, 32
+%r100 = getelementptr i32, i32* %r2, i32 1
+%r101 = trunc i224 %r98 to i32
+store i32 %r101, i32* %r100
+%r102 = lshr i224 %r98, 32
+%r104 = getelementptr i32, i32* %r2, i32 2
+%r105 = trunc i224 %r102 to i32
+store i32 %r105, i32* %r104
+%r106 = lshr i224 %r102, 32
+%r108 = getelementptr i32, i32* %r2, i32 3
+%r109 = trunc i224 %r106 to i32
+store i32 %r109, i32* %r108
+%r110 = lshr i224 %r106, 32
+%r112 = getelementptr i32, i32* %r2, i32 4
+%r113 = trunc i224 %r110 to i32
+store i32 %r113, i32* %r112
+%r114 = lshr i224 %r110, 32
+%r116 = getelementptr i32, i32* %r2, i32 5
+%r117 = trunc i224 %r114 to i32
+store i32 %r117, i32* %r116
+%r118 = lshr i224 %r114, 32
+%r120 = getelementptr i32, i32* %r2, i32 6
+%r121 = trunc i224 %r118 to i32
+store i32 %r121, i32* %r120
+%r122 = lshr i256 %r93, 224
+%r123 = trunc i256 %r122 to i32
+ret i32 %r123
 }
-define void @mcl_fp_addNF5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define i32 @mcl_fp_subPre7L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
-%r5 = load i32, i32* %r2
+%r5 = load i32, i32* %r3
 %r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
+%r8 = getelementptr i32, i32* %r3, i32 1
 %r9 = load i32, i32* %r8
 %r10 = zext i32 %r9 to i64
 %r11 = shl i64 %r10, 32
 %r12 = or i64 %r6, %r11
 %r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
+%r15 = getelementptr i32, i32* %r3, i32 2
 %r16 = load i32, i32* %r15
 %r17 = zext i32 %r16 to i96
 %r18 = shl i96 %r17, 64
 %r19 = or i96 %r13, %r18
 %r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
+%r22 = getelementptr i32, i32* %r3, i32 3
 %r23 = load i32, i32* %r22
 %r24 = zext i32 %r23 to i128
 %r25 = shl i128 %r24, 96
 %r26 = or i128 %r20, %r25
 %r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = load i32, i32* %r3
-%r35 = zext i32 %r34 to i64
-%r37 = getelementptr i32, i32* %r3, i32 1
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i64
-%r40 = shl i64 %r39, 32
-%r41 = or i64 %r35, %r40
-%r42 = zext i64 %r41 to i96
-%r44 = getelementptr i32, i32* %r3, i32 2
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i96
-%r47 = shl i96 %r46, 64
-%r48 = or i96 %r42, %r47
-%r49 = zext i96 %r48 to i128
-%r51 = getelementptr i32, i32* %r3, i32 3
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i128
-%r54 = shl i128 %r53, 96
-%r55 = or i128 %r49, %r54
-%r56 = zext i128 %r55 to i160
-%r58 = getelementptr i32, i32* %r3, i32 4
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i160
-%r61 = shl i160 %r60, 128
-%r62 = or i160 %r56, %r61
-%r63 = add i160 %r33, %r62
-%r64 = load i32, i32* %r4
-%r65 = zext i32 %r64 to i64
-%r67 = getelementptr i32, i32* %r4, i32 1
-%r68 = load i32, i32* %r67
-%r69 = zext i32 %r68 to i64
-%r70 = shl i64 %r69, 32
-%r71 = or i64 %r65, %r70
-%r72 = zext i64 %r71 to i96
-%r74 = getelementptr i32, i32* %r4, i32 2
-%r75 = load i32, i32* %r74
-%r76 = zext i32 %r75 to i96
-%r77 = shl i96 %r76, 64
-%r78 = or i96 %r72, %r77
-%r79 = zext i96 %r78 to i128
-%r81 = getelementptr i32, i32* %r4, i32 3
-%r82 = load i32, i32* %r81
-%r83 = zext i32 %r82 to i128
-%r84 = shl i128 %r83, 96
-%r85 = or i128 %r79, %r84
-%r86 = zext i128 %r85 to i160
-%r88 = getelementptr i32, i32* %r4, i32 4
-%r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i160
-%r91 = shl i160 %r90, 128
-%r92 = or i160 %r86, %r91
-%r93 = sub i160 %r63, %r92
-%r94 = lshr i160 %r93, 159
-%r95 = trunc i160 %r94 to i1
-%r96 = select i1 %r95, i160 %r63, i160 %r93
-%r97 = trunc i160 %r96 to i32
-%r99 = getelementptr i32, i32* %r1, i32 0
-store i32 %r97, i32* %r99
-%r100 = lshr i160 %r96, 32
-%r101 = trunc i160 %r100 to i32
-%r103 = getelementptr i32, i32* %r1, i32 1
-store i32 %r101, i32* %r103
-%r104 = lshr i160 %r100, 32
-%r105 = trunc i160 %r104 to i32
-%r107 = getelementptr i32, i32* %r1, i32 2
-store i32 %r105, i32* %r107
-%r108 = lshr i160 %r104, 32
-%r109 = trunc i160 %r108 to i32
-%r111 = getelementptr i32, i32* %r1, i32 3
-store i32 %r109, i32* %r111
-%r112 = lshr i160 %r108, 32
-%r113 = trunc i160 %r112 to i32
-%r115 = getelementptr i32, i32* %r1, i32 4
-store i32 %r113, i32* %r115
-ret void
-}
-define void @mcl_fp_sub5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = load i32, i32* %r3
-%r35 = zext i32 %r34 to i64
-%r37 = getelementptr i32, i32* %r3, i32 1
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i64
-%r40 = shl i64 %r39, 32
-%r41 = or i64 %r35, %r40
-%r42 = zext i64 %r41 to i96
-%r44 = getelementptr i32, i32* %r3, i32 2
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i96
-%r47 = shl i96 %r46, 64
-%r48 = or i96 %r42, %r47
-%r49 = zext i96 %r48 to i128
-%r51 = getelementptr i32, i32* %r3, i32 3
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i128
-%r54 = shl i128 %r53, 96
-%r55 = or i128 %r49, %r54
-%r56 = zext i128 %r55 to i160
-%r58 = getelementptr i32, i32* %r3, i32 4
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i160
-%r61 = shl i160 %r60, 128
-%r62 = or i160 %r56, %r61
-%r63 = zext i160 %r33 to i192
-%r64 = zext i160 %r62 to i192
-%r65 = sub i192 %r63, %r64
-%r66 = trunc i192 %r65 to i160
-%r67 = lshr i192 %r65, 160
-%r68 = trunc i192 %r67 to i1
-%r69 = trunc i160 %r66 to i32
-%r71 = getelementptr i32, i32* %r1, i32 0
-store i32 %r69, i32* %r71
-%r72 = lshr i160 %r66, 32
-%r73 = trunc i160 %r72 to i32
-%r75 = getelementptr i32, i32* %r1, i32 1
-store i32 %r73, i32* %r75
-%r76 = lshr i160 %r72, 32
-%r77 = trunc i160 %r76 to i32
-%r79 = getelementptr i32, i32* %r1, i32 2
-store i32 %r77, i32* %r79
-%r80 = lshr i160 %r76, 32
-%r81 = trunc i160 %r80 to i32
-%r83 = getelementptr i32, i32* %r1, i32 3
-store i32 %r81, i32* %r83
-%r84 = lshr i160 %r80, 32
-%r85 = trunc i160 %r84 to i32
-%r87 = getelementptr i32, i32* %r1, i32 4
-store i32 %r85, i32* %r87
-br i1%r68, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r88 = load i32, i32* %r4
-%r89 = zext i32 %r88 to i64
-%r91 = getelementptr i32, i32* %r4, i32 1
-%r92 = load i32, i32* %r91
-%r93 = zext i32 %r92 to i64
-%r94 = shl i64 %r93, 32
-%r95 = or i64 %r89, %r94
-%r96 = zext i64 %r95 to i96
-%r98 = getelementptr i32, i32* %r4, i32 2
-%r99 = load i32, i32* %r98
-%r100 = zext i32 %r99 to i96
-%r101 = shl i96 %r100, 64
-%r102 = or i96 %r96, %r101
-%r103 = zext i96 %r102 to i128
-%r105 = getelementptr i32, i32* %r4, i32 3
-%r106 = load i32, i32* %r105
-%r107 = zext i32 %r106 to i128
-%r108 = shl i128 %r107, 96
-%r109 = or i128 %r103, %r108
-%r110 = zext i128 %r109 to i160
-%r112 = getelementptr i32, i32* %r4, i32 4
-%r113 = load i32, i32* %r112
-%r114 = zext i32 %r113 to i160
-%r115 = shl i160 %r114, 128
-%r116 = or i160 %r110, %r115
-%r117 = add i160 %r66, %r116
-%r118 = trunc i160 %r117 to i32
-%r120 = getelementptr i32, i32* %r1, i32 0
-store i32 %r118, i32* %r120
-%r121 = lshr i160 %r117, 32
-%r122 = trunc i160 %r121 to i32
-%r124 = getelementptr i32, i32* %r1, i32 1
-store i32 %r122, i32* %r124
-%r125 = lshr i160 %r121, 32
-%r126 = trunc i160 %r125 to i32
-%r128 = getelementptr i32, i32* %r1, i32 2
-store i32 %r126, i32* %r128
-%r129 = lshr i160 %r125, 32
-%r130 = trunc i160 %r129 to i32
-%r132 = getelementptr i32, i32* %r1, i32 3
-store i32 %r130, i32* %r132
-%r133 = lshr i160 %r129, 32
-%r134 = trunc i160 %r133 to i32
-%r136 = getelementptr i32, i32* %r1, i32 4
-store i32 %r134, i32* %r136
-ret void
-}
-define void @mcl_fp_subNF5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = load i32, i32* %r3
-%r35 = zext i32 %r34 to i64
-%r37 = getelementptr i32, i32* %r3, i32 1
-%r38 = load i32, i32* %r37
-%r39 = zext i32 %r38 to i64
-%r40 = shl i64 %r39, 32
-%r41 = or i64 %r35, %r40
-%r42 = zext i64 %r41 to i96
-%r44 = getelementptr i32, i32* %r3, i32 2
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i96
-%r47 = shl i96 %r46, 64
-%r48 = or i96 %r42, %r47
-%r49 = zext i96 %r48 to i128
-%r51 = getelementptr i32, i32* %r3, i32 3
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i128
-%r54 = shl i128 %r53, 96
-%r55 = or i128 %r49, %r54
-%r56 = zext i128 %r55 to i160
-%r58 = getelementptr i32, i32* %r3, i32 4
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i160
-%r61 = shl i160 %r60, 128
-%r62 = or i160 %r56, %r61
-%r63 = sub i160 %r33, %r62
-%r64 = lshr i160 %r63, 159
-%r65 = trunc i160 %r64 to i1
-%r66 = load i32, i32* %r4
-%r67 = zext i32 %r66 to i64
-%r69 = getelementptr i32, i32* %r4, i32 1
-%r70 = load i32, i32* %r69
-%r71 = zext i32 %r70 to i64
-%r72 = shl i64 %r71, 32
-%r73 = or i64 %r67, %r72
-%r74 = zext i64 %r73 to i96
-%r76 = getelementptr i32, i32* %r4, i32 2
-%r77 = load i32, i32* %r76
-%r78 = zext i32 %r77 to i96
-%r79 = shl i96 %r78, 64
-%r80 = or i96 %r74, %r79
-%r81 = zext i96 %r80 to i128
-%r83 = getelementptr i32, i32* %r4, i32 3
-%r84 = load i32, i32* %r83
-%r85 = zext i32 %r84 to i128
-%r86 = shl i128 %r85, 96
-%r87 = or i128 %r81, %r86
-%r88 = zext i128 %r87 to i160
-%r90 = getelementptr i32, i32* %r4, i32 4
-%r91 = load i32, i32* %r90
-%r92 = zext i32 %r91 to i160
-%r93 = shl i160 %r92, 128
-%r94 = or i160 %r88, %r93
-%r96 = select i1 %r65, i160 %r94, i160 0
-%r97 = add i160 %r63, %r96
-%r98 = trunc i160 %r97 to i32
-%r100 = getelementptr i32, i32* %r1, i32 0
-store i32 %r98, i32* %r100
-%r101 = lshr i160 %r97, 32
-%r102 = trunc i160 %r101 to i32
-%r104 = getelementptr i32, i32* %r1, i32 1
-store i32 %r102, i32* %r104
-%r105 = lshr i160 %r101, 32
-%r106 = trunc i160 %r105 to i32
-%r108 = getelementptr i32, i32* %r1, i32 2
-store i32 %r106, i32* %r108
-%r109 = lshr i160 %r105, 32
-%r110 = trunc i160 %r109 to i32
-%r112 = getelementptr i32, i32* %r1, i32 3
-store i32 %r110, i32* %r112
-%r113 = lshr i160 %r109, 32
-%r114 = trunc i160 %r113 to i32
-%r116 = getelementptr i32, i32* %r1, i32 4
-store i32 %r114, i32* %r116
-ret void
-}
-define void @mcl_fpDbl_add5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
+%r29 = getelementptr i32, i32* %r3, i32 4
 %r30 = load i32, i32* %r29
 %r31 = zext i32 %r30 to i160
 %r32 = shl i160 %r31, 128
 %r33 = or i160 %r27, %r32
 %r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
+%r36 = getelementptr i32, i32* %r3, i32 5
 %r37 = load i32, i32* %r36
 %r38 = zext i32 %r37 to i192
 %r39 = shl i192 %r38, 160
 %r40 = or i192 %r34, %r39
 %r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
+%r43 = getelementptr i32, i32* %r3, i32 6
 %r44 = load i32, i32* %r43
 %r45 = zext i32 %r44 to i224
 %r46 = shl i224 %r45, 192
 %r47 = or i224 %r41, %r46
 %r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = load i32, i32* %r3
-%r70 = zext i32 %r69 to i64
-%r72 = getelementptr i32, i32* %r3, i32 1
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i64
-%r75 = shl i64 %r74, 32
-%r76 = or i64 %r70, %r75
-%r77 = zext i64 %r76 to i96
-%r79 = getelementptr i32, i32* %r3, i32 2
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i96
-%r82 = shl i96 %r81, 64
-%r83 = or i96 %r77, %r82
-%r84 = zext i96 %r83 to i128
-%r86 = getelementptr i32, i32* %r3, i32 3
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i128
-%r89 = shl i128 %r88, 96
-%r90 = or i128 %r84, %r89
-%r91 = zext i128 %r90 to i160
-%r93 = getelementptr i32, i32* %r3, i32 4
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i160
-%r96 = shl i160 %r95, 128
-%r97 = or i160 %r91, %r96
-%r98 = zext i160 %r97 to i192
-%r100 = getelementptr i32, i32* %r3, i32 5
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i192
-%r103 = shl i192 %r102, 160
-%r104 = or i192 %r98, %r103
-%r105 = zext i192 %r104 to i224
-%r107 = getelementptr i32, i32* %r3, i32 6
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i224
-%r110 = shl i224 %r109, 192
-%r111 = or i224 %r105, %r110
-%r112 = zext i224 %r111 to i256
-%r114 = getelementptr i32, i32* %r3, i32 7
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i256
-%r117 = shl i256 %r116, 224
-%r118 = or i256 %r112, %r117
-%r119 = zext i256 %r118 to i288
-%r121 = getelementptr i32, i32* %r3, i32 8
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i288
-%r124 = shl i288 %r123, 256
-%r125 = or i288 %r119, %r124
-%r126 = zext i288 %r125 to i320
-%r128 = getelementptr i32, i32* %r3, i32 9
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i320
-%r131 = shl i320 %r130, 288
-%r132 = or i320 %r126, %r131
-%r133 = zext i320 %r68 to i352
-%r134 = zext i320 %r132 to i352
-%r135 = add i352 %r133, %r134
-%r136 = trunc i352 %r135 to i160
-%r137 = trunc i160 %r136 to i32
-%r139 = getelementptr i32, i32* %r1, i32 0
-store i32 %r137, i32* %r139
-%r140 = lshr i160 %r136, 32
-%r141 = trunc i160 %r140 to i32
-%r143 = getelementptr i32, i32* %r1, i32 1
-store i32 %r141, i32* %r143
-%r144 = lshr i160 %r140, 32
-%r145 = trunc i160 %r144 to i32
-%r147 = getelementptr i32, i32* %r1, i32 2
-store i32 %r145, i32* %r147
-%r148 = lshr i160 %r144, 32
-%r149 = trunc i160 %r148 to i32
-%r151 = getelementptr i32, i32* %r1, i32 3
-store i32 %r149, i32* %r151
-%r152 = lshr i160 %r148, 32
-%r153 = trunc i160 %r152 to i32
-%r155 = getelementptr i32, i32* %r1, i32 4
-store i32 %r153, i32* %r155
-%r156 = lshr i352 %r135, 160
-%r157 = trunc i352 %r156 to i192
-%r158 = load i32, i32* %r4
-%r159 = zext i32 %r158 to i64
-%r161 = getelementptr i32, i32* %r4, i32 1
-%r162 = load i32, i32* %r161
-%r163 = zext i32 %r162 to i64
-%r164 = shl i64 %r163, 32
-%r165 = or i64 %r159, %r164
-%r166 = zext i64 %r165 to i96
-%r168 = getelementptr i32, i32* %r4, i32 2
-%r169 = load i32, i32* %r168
-%r170 = zext i32 %r169 to i96
-%r171 = shl i96 %r170, 64
-%r172 = or i96 %r166, %r171
-%r173 = zext i96 %r172 to i128
-%r175 = getelementptr i32, i32* %r4, i32 3
-%r176 = load i32, i32* %r175
-%r177 = zext i32 %r176 to i128
-%r178 = shl i128 %r177, 96
-%r179 = or i128 %r173, %r178
-%r180 = zext i128 %r179 to i160
-%r182 = getelementptr i32, i32* %r4, i32 4
-%r183 = load i32, i32* %r182
-%r184 = zext i32 %r183 to i160
-%r185 = shl i160 %r184, 128
-%r186 = or i160 %r180, %r185
-%r187 = zext i160 %r186 to i192
-%r188 = sub i192 %r157, %r187
-%r189 = lshr i192 %r188, 160
-%r190 = trunc i192 %r189 to i1
-%r191 = select i1 %r190, i192 %r157, i192 %r188
-%r192 = trunc i192 %r191 to i160
-%r194 = getelementptr i32, i32* %r1, i32 5
-%r195 = trunc i160 %r192 to i32
-%r197 = getelementptr i32, i32* %r194, i32 0
-store i32 %r195, i32* %r197
-%r198 = lshr i160 %r192, 32
-%r199 = trunc i160 %r198 to i32
-%r201 = getelementptr i32, i32* %r194, i32 1
-store i32 %r199, i32* %r201
-%r202 = lshr i160 %r198, 32
-%r203 = trunc i160 %r202 to i32
-%r205 = getelementptr i32, i32* %r194, i32 2
-store i32 %r203, i32* %r205
-%r206 = lshr i160 %r202, 32
-%r207 = trunc i160 %r206 to i32
-%r209 = getelementptr i32, i32* %r194, i32 3
-store i32 %r207, i32* %r209
-%r210 = lshr i160 %r206, 32
-%r211 = trunc i160 %r210 to i32
-%r213 = getelementptr i32, i32* %r194, i32 4
-store i32 %r211, i32* %r213
+%r49 = load i32, i32* %r4
+%r50 = zext i32 %r49 to i64
+%r52 = getelementptr i32, i32* %r4, i32 1
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i64
+%r55 = shl i64 %r54, 32
+%r56 = or i64 %r50, %r55
+%r57 = zext i64 %r56 to i96
+%r59 = getelementptr i32, i32* %r4, i32 2
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i96
+%r62 = shl i96 %r61, 64
+%r63 = or i96 %r57, %r62
+%r64 = zext i96 %r63 to i128
+%r66 = getelementptr i32, i32* %r4, i32 3
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i128
+%r69 = shl i128 %r68, 96
+%r70 = or i128 %r64, %r69
+%r71 = zext i128 %r70 to i160
+%r73 = getelementptr i32, i32* %r4, i32 4
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i160
+%r76 = shl i160 %r75, 128
+%r77 = or i160 %r71, %r76
+%r78 = zext i160 %r77 to i192
+%r80 = getelementptr i32, i32* %r4, i32 5
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i192
+%r83 = shl i192 %r82, 160
+%r84 = or i192 %r78, %r83
+%r85 = zext i192 %r84 to i224
+%r87 = getelementptr i32, i32* %r4, i32 6
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i224
+%r90 = shl i224 %r89, 192
+%r91 = or i224 %r85, %r90
+%r92 = zext i224 %r91 to i256
+%r93 = sub i256 %r48, %r92
+%r94 = trunc i256 %r93 to i224
+%r96 = getelementptr i32, i32* %r2, i32 0
+%r97 = trunc i224 %r94 to i32
+store i32 %r97, i32* %r96
+%r98 = lshr i224 %r94, 32
+%r100 = getelementptr i32, i32* %r2, i32 1
+%r101 = trunc i224 %r98 to i32
+store i32 %r101, i32* %r100
+%r102 = lshr i224 %r98, 32
+%r104 = getelementptr i32, i32* %r2, i32 2
+%r105 = trunc i224 %r102 to i32
+store i32 %r105, i32* %r104
+%r106 = lshr i224 %r102, 32
+%r108 = getelementptr i32, i32* %r2, i32 3
+%r109 = trunc i224 %r106 to i32
+store i32 %r109, i32* %r108
+%r110 = lshr i224 %r106, 32
+%r112 = getelementptr i32, i32* %r2, i32 4
+%r113 = trunc i224 %r110 to i32
+store i32 %r113, i32* %r112
+%r114 = lshr i224 %r110, 32
+%r116 = getelementptr i32, i32* %r2, i32 5
+%r117 = trunc i224 %r114 to i32
+store i32 %r117, i32* %r116
+%r118 = lshr i224 %r114, 32
+%r120 = getelementptr i32, i32* %r2, i32 6
+%r121 = trunc i224 %r118 to i32
+store i32 %r121, i32* %r120
+%r123 = lshr i256 %r93, 224
+%r124 = trunc i256 %r123 to i32
+%r125 = and i32 %r124, 1
+ret i32 %r125
+}
+define void @mcl_fp_shr1_7L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = lshr i224 %r45, 1
+%r48 = getelementptr i32, i32* %r1, i32 0
+%r49 = trunc i224 %r46 to i32
+store i32 %r49, i32* %r48
+%r50 = lshr i224 %r46, 32
+%r52 = getelementptr i32, i32* %r1, i32 1
+%r53 = trunc i224 %r50 to i32
+store i32 %r53, i32* %r52
+%r54 = lshr i224 %r50, 32
+%r56 = getelementptr i32, i32* %r1, i32 2
+%r57 = trunc i224 %r54 to i32
+store i32 %r57, i32* %r56
+%r58 = lshr i224 %r54, 32
+%r60 = getelementptr i32, i32* %r1, i32 3
+%r61 = trunc i224 %r58 to i32
+store i32 %r61, i32* %r60
+%r62 = lshr i224 %r58, 32
+%r64 = getelementptr i32, i32* %r1, i32 4
+%r65 = trunc i224 %r62 to i32
+store i32 %r65, i32* %r64
+%r66 = lshr i224 %r62, 32
+%r68 = getelementptr i32, i32* %r1, i32 5
+%r69 = trunc i224 %r66 to i32
+store i32 %r69, i32* %r68
+%r70 = lshr i224 %r66, 32
+%r72 = getelementptr i32, i32* %r1, i32 6
+%r73 = trunc i224 %r70 to i32
+store i32 %r73, i32* %r72
 ret void
 }
-define void @mcl_fpDbl_sub5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_add7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -5186,1075 +4112,477 @@ define void @mcl_fpDbl_sub5L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r45 = zext i32 %r44 to i224
 %r46 = shl i224 %r45, 192
 %r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = load i32, i32* %r3
-%r70 = zext i32 %r69 to i64
-%r72 = getelementptr i32, i32* %r3, i32 1
+%r48 = load i32, i32* %r3
+%r49 = zext i32 %r48 to i64
+%r51 = getelementptr i32, i32* %r3, i32 1
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i64
+%r54 = shl i64 %r53, 32
+%r55 = or i64 %r49, %r54
+%r56 = zext i64 %r55 to i96
+%r58 = getelementptr i32, i32* %r3, i32 2
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i96
+%r61 = shl i96 %r60, 64
+%r62 = or i96 %r56, %r61
+%r63 = zext i96 %r62 to i128
+%r65 = getelementptr i32, i32* %r3, i32 3
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i128
+%r68 = shl i128 %r67, 96
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i160
+%r72 = getelementptr i32, i32* %r3, i32 4
 %r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i64
-%r75 = shl i64 %r74, 32
-%r76 = or i64 %r70, %r75
-%r77 = zext i64 %r76 to i96
-%r79 = getelementptr i32, i32* %r3, i32 2
+%r74 = zext i32 %r73 to i160
+%r75 = shl i160 %r74, 128
+%r76 = or i160 %r70, %r75
+%r77 = zext i160 %r76 to i192
+%r79 = getelementptr i32, i32* %r3, i32 5
 %r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i96
-%r82 = shl i96 %r81, 64
-%r83 = or i96 %r77, %r82
-%r84 = zext i96 %r83 to i128
-%r86 = getelementptr i32, i32* %r3, i32 3
+%r81 = zext i32 %r80 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r77, %r82
+%r84 = zext i192 %r83 to i224
+%r86 = getelementptr i32, i32* %r3, i32 6
 %r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i128
-%r89 = shl i128 %r88, 96
-%r90 = or i128 %r84, %r89
-%r91 = zext i128 %r90 to i160
-%r93 = getelementptr i32, i32* %r3, i32 4
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i160
-%r96 = shl i160 %r95, 128
-%r97 = or i160 %r91, %r96
-%r98 = zext i160 %r97 to i192
-%r100 = getelementptr i32, i32* %r3, i32 5
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i192
-%r103 = shl i192 %r102, 160
-%r104 = or i192 %r98, %r103
-%r105 = zext i192 %r104 to i224
-%r107 = getelementptr i32, i32* %r3, i32 6
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i224
-%r110 = shl i224 %r109, 192
-%r111 = or i224 %r105, %r110
-%r112 = zext i224 %r111 to i256
-%r114 = getelementptr i32, i32* %r3, i32 7
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i256
-%r117 = shl i256 %r116, 224
-%r118 = or i256 %r112, %r117
-%r119 = zext i256 %r118 to i288
-%r121 = getelementptr i32, i32* %r3, i32 8
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i288
-%r124 = shl i288 %r123, 256
-%r125 = or i288 %r119, %r124
-%r126 = zext i288 %r125 to i320
-%r128 = getelementptr i32, i32* %r3, i32 9
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i320
-%r131 = shl i320 %r130, 288
-%r132 = or i320 %r126, %r131
-%r133 = zext i320 %r68 to i352
-%r134 = zext i320 %r132 to i352
-%r135 = sub i352 %r133, %r134
-%r136 = trunc i352 %r135 to i160
-%r137 = trunc i160 %r136 to i32
-%r139 = getelementptr i32, i32* %r1, i32 0
-store i32 %r137, i32* %r139
-%r140 = lshr i160 %r136, 32
-%r141 = trunc i160 %r140 to i32
-%r143 = getelementptr i32, i32* %r1, i32 1
-store i32 %r141, i32* %r143
-%r144 = lshr i160 %r140, 32
-%r145 = trunc i160 %r144 to i32
-%r147 = getelementptr i32, i32* %r1, i32 2
-store i32 %r145, i32* %r147
-%r148 = lshr i160 %r144, 32
-%r149 = trunc i160 %r148 to i32
-%r151 = getelementptr i32, i32* %r1, i32 3
-store i32 %r149, i32* %r151
-%r152 = lshr i160 %r148, 32
-%r153 = trunc i160 %r152 to i32
-%r155 = getelementptr i32, i32* %r1, i32 4
-store i32 %r153, i32* %r155
-%r156 = lshr i352 %r135, 160
-%r157 = trunc i352 %r156 to i160
-%r158 = lshr i352 %r135, 320
-%r159 = trunc i352 %r158 to i1
-%r160 = load i32, i32* %r4
-%r161 = zext i32 %r160 to i64
-%r163 = getelementptr i32, i32* %r4, i32 1
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i64
-%r166 = shl i64 %r165, 32
-%r167 = or i64 %r161, %r166
-%r168 = zext i64 %r167 to i96
-%r170 = getelementptr i32, i32* %r4, i32 2
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i96
-%r173 = shl i96 %r172, 64
-%r174 = or i96 %r168, %r173
-%r175 = zext i96 %r174 to i128
-%r177 = getelementptr i32, i32* %r4, i32 3
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i128
-%r180 = shl i128 %r179, 96
-%r181 = or i128 %r175, %r180
-%r182 = zext i128 %r181 to i160
-%r184 = getelementptr i32, i32* %r4, i32 4
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i160
-%r187 = shl i160 %r186, 128
-%r188 = or i160 %r182, %r187
-%r190 = select i1 %r159, i160 %r188, i160 0
-%r191 = add i160 %r157, %r190
-%r193 = getelementptr i32, i32* %r1, i32 5
-%r194 = trunc i160 %r191 to i32
-%r196 = getelementptr i32, i32* %r193, i32 0
-store i32 %r194, i32* %r196
-%r197 = lshr i160 %r191, 32
-%r198 = trunc i160 %r197 to i32
-%r200 = getelementptr i32, i32* %r193, i32 1
-store i32 %r198, i32* %r200
-%r201 = lshr i160 %r197, 32
-%r202 = trunc i160 %r201 to i32
-%r204 = getelementptr i32, i32* %r193, i32 2
-store i32 %r202, i32* %r204
-%r205 = lshr i160 %r201, 32
-%r206 = trunc i160 %r205 to i32
-%r208 = getelementptr i32, i32* %r193, i32 3
-store i32 %r206, i32* %r208
-%r209 = lshr i160 %r205, 32
-%r210 = trunc i160 %r209 to i32
-%r212 = getelementptr i32, i32* %r193, i32 4
-store i32 %r210, i32* %r212
-ret void
-}
-define i224 @mulPv192x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r28 = zext i32 %r6 to i64
-%r29 = zext i32 %r10 to i64
-%r30 = shl i64 %r29, 32
-%r31 = or i64 %r28, %r30
-%r32 = zext i64 %r31 to i96
-%r33 = zext i32 %r14 to i96
-%r34 = shl i96 %r33, 64
-%r35 = or i96 %r32, %r34
-%r36 = zext i96 %r35 to i128
-%r37 = zext i32 %r18 to i128
-%r38 = shl i128 %r37, 96
-%r39 = or i128 %r36, %r38
-%r40 = zext i128 %r39 to i160
-%r41 = zext i32 %r22 to i160
-%r42 = shl i160 %r41, 128
-%r43 = or i160 %r40, %r42
-%r44 = zext i160 %r43 to i192
-%r45 = zext i32 %r26 to i192
-%r46 = shl i192 %r45, 160
-%r47 = or i192 %r44, %r46
-%r48 = zext i32 %r7 to i64
-%r49 = zext i32 %r11 to i64
-%r50 = shl i64 %r49, 32
-%r51 = or i64 %r48, %r50
-%r52 = zext i64 %r51 to i96
-%r53 = zext i32 %r15 to i96
-%r54 = shl i96 %r53, 64
-%r55 = or i96 %r52, %r54
-%r56 = zext i96 %r55 to i128
-%r57 = zext i32 %r19 to i128
-%r58 = shl i128 %r57, 96
-%r59 = or i128 %r56, %r58
-%r60 = zext i128 %r59 to i160
-%r61 = zext i32 %r23 to i160
-%r62 = shl i160 %r61, 128
-%r63 = or i160 %r60, %r62
-%r64 = zext i160 %r63 to i192
-%r65 = zext i32 %r27 to i192
-%r66 = shl i192 %r65, 160
-%r67 = or i192 %r64, %r66
-%r68 = zext i192 %r47 to i224
-%r69 = zext i192 %r67 to i224
-%r70 = shl i224 %r69, 32
-%r71 = add i224 %r68, %r70
-ret i224 %r71
-}
-define void @mcl_fp_mulUnitPre6L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i224 @mulPv192x32(i32* %r2, i32 %r3)
-%r5 = trunc i224 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i224 %r4, 32
-%r9 = trunc i224 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i224 %r8, 32
-%r13 = trunc i224 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i224 %r12, 32
-%r17 = trunc i224 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i224 %r16, 32
-%r21 = trunc i224 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i224 %r20, 32
-%r25 = trunc i224 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i224 %r24, 32
-%r29 = trunc i224 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
+%r88 = zext i32 %r87 to i224
+%r89 = shl i224 %r88, 192
+%r90 = or i224 %r84, %r89
+%r91 = zext i224 %r47 to i256
+%r92 = zext i224 %r90 to i256
+%r93 = add i256 %r91, %r92
+%r94 = trunc i256 %r93 to i224
+%r96 = getelementptr i32, i32* %r1, i32 0
+%r97 = trunc i224 %r94 to i32
+store i32 %r97, i32* %r96
+%r98 = lshr i224 %r94, 32
+%r100 = getelementptr i32, i32* %r1, i32 1
+%r101 = trunc i224 %r98 to i32
+store i32 %r101, i32* %r100
+%r102 = lshr i224 %r98, 32
+%r104 = getelementptr i32, i32* %r1, i32 2
+%r105 = trunc i224 %r102 to i32
+store i32 %r105, i32* %r104
+%r106 = lshr i224 %r102, 32
+%r108 = getelementptr i32, i32* %r1, i32 3
+%r109 = trunc i224 %r106 to i32
+store i32 %r109, i32* %r108
+%r110 = lshr i224 %r106, 32
+%r112 = getelementptr i32, i32* %r1, i32 4
+%r113 = trunc i224 %r110 to i32
+store i32 %r113, i32* %r112
+%r114 = lshr i224 %r110, 32
+%r116 = getelementptr i32, i32* %r1, i32 5
+%r117 = trunc i224 %r114 to i32
+store i32 %r117, i32* %r116
+%r118 = lshr i224 %r114, 32
+%r120 = getelementptr i32, i32* %r1, i32 6
+%r121 = trunc i224 %r118 to i32
+store i32 %r121, i32* %r120
+%r122 = load i32, i32* %r4
+%r123 = zext i32 %r122 to i64
+%r125 = getelementptr i32, i32* %r4, i32 1
+%r126 = load i32, i32* %r125
+%r127 = zext i32 %r126 to i64
+%r128 = shl i64 %r127, 32
+%r129 = or i64 %r123, %r128
+%r130 = zext i64 %r129 to i96
+%r132 = getelementptr i32, i32* %r4, i32 2
+%r133 = load i32, i32* %r132
+%r134 = zext i32 %r133 to i96
+%r135 = shl i96 %r134, 64
+%r136 = or i96 %r130, %r135
+%r137 = zext i96 %r136 to i128
+%r139 = getelementptr i32, i32* %r4, i32 3
+%r140 = load i32, i32* %r139
+%r141 = zext i32 %r140 to i128
+%r142 = shl i128 %r141, 96
+%r143 = or i128 %r137, %r142
+%r144 = zext i128 %r143 to i160
+%r146 = getelementptr i32, i32* %r4, i32 4
+%r147 = load i32, i32* %r146
+%r148 = zext i32 %r147 to i160
+%r149 = shl i160 %r148, 128
+%r150 = or i160 %r144, %r149
+%r151 = zext i160 %r150 to i192
+%r153 = getelementptr i32, i32* %r4, i32 5
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i192
+%r156 = shl i192 %r155, 160
+%r157 = or i192 %r151, %r156
+%r158 = zext i192 %r157 to i224
+%r160 = getelementptr i32, i32* %r4, i32 6
+%r161 = load i32, i32* %r160
+%r162 = zext i32 %r161 to i224
+%r163 = shl i224 %r162, 192
+%r164 = or i224 %r158, %r163
+%r165 = zext i224 %r164 to i256
+%r166 = sub i256 %r93, %r165
+%r167 = lshr i256 %r166, 224
+%r168 = trunc i256 %r167 to i1
+br i1%r168, label %carry, label %nocarry
+nocarry:
+%r169 = trunc i256 %r166 to i224
+%r171 = getelementptr i32, i32* %r1, i32 0
+%r172 = trunc i224 %r169 to i32
+store i32 %r172, i32* %r171
+%r173 = lshr i224 %r169, 32
+%r175 = getelementptr i32, i32* %r1, i32 1
+%r176 = trunc i224 %r173 to i32
+store i32 %r176, i32* %r175
+%r177 = lshr i224 %r173, 32
+%r179 = getelementptr i32, i32* %r1, i32 2
+%r180 = trunc i224 %r177 to i32
+store i32 %r180, i32* %r179
+%r181 = lshr i224 %r177, 32
+%r183 = getelementptr i32, i32* %r1, i32 3
+%r184 = trunc i224 %r181 to i32
+store i32 %r184, i32* %r183
+%r185 = lshr i224 %r181, 32
+%r187 = getelementptr i32, i32* %r1, i32 4
+%r188 = trunc i224 %r185 to i32
+store i32 %r188, i32* %r187
+%r189 = lshr i224 %r185, 32
+%r191 = getelementptr i32, i32* %r1, i32 5
+%r192 = trunc i224 %r189 to i32
+store i32 %r192, i32* %r191
+%r193 = lshr i224 %r189, 32
+%r195 = getelementptr i32, i32* %r1, i32 6
+%r196 = trunc i224 %r193 to i32
+store i32 %r196, i32* %r195
 ret void
-}
-define void @mcl_fpDbl_mulPre6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r4 = load i32, i32* %r3
-%r5 = call i224 @mulPv192x32(i32* %r2, i32 %r4)
-%r6 = trunc i224 %r5 to i32
-store i32 %r6, i32* %r1
-%r7 = lshr i224 %r5, 32
-%r9 = getelementptr i32, i32* %r3, i32 1
-%r10 = load i32, i32* %r9
-%r11 = call i224 @mulPv192x32(i32* %r2, i32 %r10)
-%r12 = add i224 %r7, %r11
-%r13 = trunc i224 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 1
-store i32 %r13, i32* %r15
-%r16 = lshr i224 %r12, 32
-%r18 = getelementptr i32, i32* %r3, i32 2
-%r19 = load i32, i32* %r18
-%r20 = call i224 @mulPv192x32(i32* %r2, i32 %r19)
-%r21 = add i224 %r16, %r20
-%r22 = trunc i224 %r21 to i32
-%r24 = getelementptr i32, i32* %r1, i32 2
-store i32 %r22, i32* %r24
-%r25 = lshr i224 %r21, 32
-%r27 = getelementptr i32, i32* %r3, i32 3
-%r28 = load i32, i32* %r27
-%r29 = call i224 @mulPv192x32(i32* %r2, i32 %r28)
-%r30 = add i224 %r25, %r29
-%r31 = trunc i224 %r30 to i32
-%r33 = getelementptr i32, i32* %r1, i32 3
-store i32 %r31, i32* %r33
-%r34 = lshr i224 %r30, 32
-%r36 = getelementptr i32, i32* %r3, i32 4
-%r37 = load i32, i32* %r36
-%r38 = call i224 @mulPv192x32(i32* %r2, i32 %r37)
-%r39 = add i224 %r34, %r38
-%r40 = trunc i224 %r39 to i32
-%r42 = getelementptr i32, i32* %r1, i32 4
-store i32 %r40, i32* %r42
-%r43 = lshr i224 %r39, 32
-%r45 = getelementptr i32, i32* %r3, i32 5
-%r46 = load i32, i32* %r45
-%r47 = call i224 @mulPv192x32(i32* %r2, i32 %r46)
-%r48 = add i224 %r43, %r47
-%r50 = getelementptr i32, i32* %r1, i32 5
-%r51 = trunc i224 %r48 to i32
-%r53 = getelementptr i32, i32* %r50, i32 0
-store i32 %r51, i32* %r53
-%r54 = lshr i224 %r48, 32
-%r55 = trunc i224 %r54 to i32
-%r57 = getelementptr i32, i32* %r50, i32 1
-store i32 %r55, i32* %r57
-%r58 = lshr i224 %r54, 32
-%r59 = trunc i224 %r58 to i32
-%r61 = getelementptr i32, i32* %r50, i32 2
-store i32 %r59, i32* %r61
-%r62 = lshr i224 %r58, 32
-%r63 = trunc i224 %r62 to i32
-%r65 = getelementptr i32, i32* %r50, i32 3
-store i32 %r63, i32* %r65
-%r66 = lshr i224 %r62, 32
-%r67 = trunc i224 %r66 to i32
-%r69 = getelementptr i32, i32* %r50, i32 4
-store i32 %r67, i32* %r69
-%r70 = lshr i224 %r66, 32
-%r71 = trunc i224 %r70 to i32
-%r73 = getelementptr i32, i32* %r50, i32 5
-store i32 %r71, i32* %r73
-%r74 = lshr i224 %r70, 32
-%r75 = trunc i224 %r74 to i32
-%r77 = getelementptr i32, i32* %r50, i32 6
-store i32 %r75, i32* %r77
+carry:
 ret void
 }
-define void @mcl_fpDbl_sqrPre6L(i32* noalias  %r1, i32* noalias  %r2)
+define void @mcl_fp_addNF7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
-%r3 = load i32, i32* %r2
-%r4 = call i224 @mulPv192x32(i32* %r2, i32 %r3)
-%r5 = trunc i224 %r4 to i32
-store i32 %r5, i32* %r1
-%r6 = lshr i224 %r4, 32
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
 %r8 = getelementptr i32, i32* %r2, i32 1
 %r9 = load i32, i32* %r8
-%r10 = call i224 @mulPv192x32(i32* %r2, i32 %r9)
-%r11 = add i224 %r6, %r10
-%r12 = trunc i224 %r11 to i32
-%r14 = getelementptr i32, i32* %r1, i32 1
-store i32 %r12, i32* %r14
-%r15 = lshr i224 %r11, 32
-%r17 = getelementptr i32, i32* %r2, i32 2
-%r18 = load i32, i32* %r17
-%r19 = call i224 @mulPv192x32(i32* %r2, i32 %r18)
-%r20 = add i224 %r15, %r19
-%r21 = trunc i224 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 2
-store i32 %r21, i32* %r23
-%r24 = lshr i224 %r20, 32
-%r26 = getelementptr i32, i32* %r2, i32 3
-%r27 = load i32, i32* %r26
-%r28 = call i224 @mulPv192x32(i32* %r2, i32 %r27)
-%r29 = add i224 %r24, %r28
-%r30 = trunc i224 %r29 to i32
-%r32 = getelementptr i32, i32* %r1, i32 3
-store i32 %r30, i32* %r32
-%r33 = lshr i224 %r29, 32
-%r35 = getelementptr i32, i32* %r2, i32 4
-%r36 = load i32, i32* %r35
-%r37 = call i224 @mulPv192x32(i32* %r2, i32 %r36)
-%r38 = add i224 %r33, %r37
-%r39 = trunc i224 %r38 to i32
-%r41 = getelementptr i32, i32* %r1, i32 4
-store i32 %r39, i32* %r41
-%r42 = lshr i224 %r38, 32
-%r44 = getelementptr i32, i32* %r2, i32 5
-%r45 = load i32, i32* %r44
-%r46 = call i224 @mulPv192x32(i32* %r2, i32 %r45)
-%r47 = add i224 %r42, %r46
-%r49 = getelementptr i32, i32* %r1, i32 5
-%r50 = trunc i224 %r47 to i32
-%r52 = getelementptr i32, i32* %r49, i32 0
-store i32 %r50, i32* %r52
-%r53 = lshr i224 %r47, 32
-%r54 = trunc i224 %r53 to i32
-%r56 = getelementptr i32, i32* %r49, i32 1
-store i32 %r54, i32* %r56
-%r57 = lshr i224 %r53, 32
-%r58 = trunc i224 %r57 to i32
-%r60 = getelementptr i32, i32* %r49, i32 2
-store i32 %r58, i32* %r60
-%r61 = lshr i224 %r57, 32
-%r62 = trunc i224 %r61 to i32
-%r64 = getelementptr i32, i32* %r49, i32 3
-store i32 %r62, i32* %r64
-%r65 = lshr i224 %r61, 32
-%r66 = trunc i224 %r65 to i32
-%r68 = getelementptr i32, i32* %r49, i32 4
-store i32 %r66, i32* %r68
-%r69 = lshr i224 %r65, 32
-%r70 = trunc i224 %r69 to i32
-%r72 = getelementptr i32, i32* %r49, i32 5
-store i32 %r70, i32* %r72
-%r73 = lshr i224 %r69, 32
-%r74 = trunc i224 %r73 to i32
-%r76 = getelementptr i32, i32* %r49, i32 6
-store i32 %r74, i32* %r76
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = load i32, i32* %r3
+%r49 = zext i32 %r48 to i64
+%r51 = getelementptr i32, i32* %r3, i32 1
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i64
+%r54 = shl i64 %r53, 32
+%r55 = or i64 %r49, %r54
+%r56 = zext i64 %r55 to i96
+%r58 = getelementptr i32, i32* %r3, i32 2
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i96
+%r61 = shl i96 %r60, 64
+%r62 = or i96 %r56, %r61
+%r63 = zext i96 %r62 to i128
+%r65 = getelementptr i32, i32* %r3, i32 3
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i128
+%r68 = shl i128 %r67, 96
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i160
+%r72 = getelementptr i32, i32* %r3, i32 4
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i160
+%r75 = shl i160 %r74, 128
+%r76 = or i160 %r70, %r75
+%r77 = zext i160 %r76 to i192
+%r79 = getelementptr i32, i32* %r3, i32 5
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r77, %r82
+%r84 = zext i192 %r83 to i224
+%r86 = getelementptr i32, i32* %r3, i32 6
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i224
+%r89 = shl i224 %r88, 192
+%r90 = or i224 %r84, %r89
+%r91 = add i224 %r47, %r90
+%r92 = load i32, i32* %r4
+%r93 = zext i32 %r92 to i64
+%r95 = getelementptr i32, i32* %r4, i32 1
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i64
+%r98 = shl i64 %r97, 32
+%r99 = or i64 %r93, %r98
+%r100 = zext i64 %r99 to i96
+%r102 = getelementptr i32, i32* %r4, i32 2
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i96
+%r105 = shl i96 %r104, 64
+%r106 = or i96 %r100, %r105
+%r107 = zext i96 %r106 to i128
+%r109 = getelementptr i32, i32* %r4, i32 3
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i128
+%r112 = shl i128 %r111, 96
+%r113 = or i128 %r107, %r112
+%r114 = zext i128 %r113 to i160
+%r116 = getelementptr i32, i32* %r4, i32 4
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i160
+%r119 = shl i160 %r118, 128
+%r120 = or i160 %r114, %r119
+%r121 = zext i160 %r120 to i192
+%r123 = getelementptr i32, i32* %r4, i32 5
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i192
+%r126 = shl i192 %r125, 160
+%r127 = or i192 %r121, %r126
+%r128 = zext i192 %r127 to i224
+%r130 = getelementptr i32, i32* %r4, i32 6
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i224
+%r133 = shl i224 %r132, 192
+%r134 = or i224 %r128, %r133
+%r135 = sub i224 %r91, %r134
+%r136 = lshr i224 %r135, 223
+%r137 = trunc i224 %r136 to i1
+%r138 = select i1 %r137, i224 %r91, i224 %r135
+%r140 = getelementptr i32, i32* %r1, i32 0
+%r141 = trunc i224 %r138 to i32
+store i32 %r141, i32* %r140
+%r142 = lshr i224 %r138, 32
+%r144 = getelementptr i32, i32* %r1, i32 1
+%r145 = trunc i224 %r142 to i32
+store i32 %r145, i32* %r144
+%r146 = lshr i224 %r142, 32
+%r148 = getelementptr i32, i32* %r1, i32 2
+%r149 = trunc i224 %r146 to i32
+store i32 %r149, i32* %r148
+%r150 = lshr i224 %r146, 32
+%r152 = getelementptr i32, i32* %r1, i32 3
+%r153 = trunc i224 %r150 to i32
+store i32 %r153, i32* %r152
+%r154 = lshr i224 %r150, 32
+%r156 = getelementptr i32, i32* %r1, i32 4
+%r157 = trunc i224 %r154 to i32
+store i32 %r157, i32* %r156
+%r158 = lshr i224 %r154, 32
+%r160 = getelementptr i32, i32* %r1, i32 5
+%r161 = trunc i224 %r158 to i32
+store i32 %r161, i32* %r160
+%r162 = lshr i224 %r158, 32
+%r164 = getelementptr i32, i32* %r1, i32 6
+%r165 = trunc i224 %r162 to i32
+store i32 %r165, i32* %r164
 ret void
 }
-define void @mcl_fp_mont6L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+define void @mcl_fp_sub7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i224 @mulPv192x32(i32* %r2, i32 %r10)
-%r12 = zext i224 %r11 to i256
-%r13 = trunc i224 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i224 @mulPv192x32(i32* %r4, i32 %r14)
-%r16 = zext i224 %r15 to i256
-%r17 = add i256 %r12, %r16
-%r18 = lshr i256 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i224 @mulPv192x32(i32* %r2, i32 %r21)
-%r23 = zext i224 %r22 to i256
-%r24 = add i256 %r18, %r23
-%r25 = trunc i256 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i224 @mulPv192x32(i32* %r4, i32 %r26)
-%r28 = zext i224 %r27 to i256
-%r29 = add i256 %r24, %r28
-%r30 = lshr i256 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i224 @mulPv192x32(i32* %r2, i32 %r33)
-%r35 = zext i224 %r34 to i256
-%r36 = add i256 %r30, %r35
-%r37 = trunc i256 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i224 @mulPv192x32(i32* %r4, i32 %r38)
-%r40 = zext i224 %r39 to i256
-%r41 = add i256 %r36, %r40
-%r42 = lshr i256 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i224 @mulPv192x32(i32* %r2, i32 %r45)
-%r47 = zext i224 %r46 to i256
-%r48 = add i256 %r42, %r47
-%r49 = trunc i256 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i224 @mulPv192x32(i32* %r4, i32 %r50)
-%r52 = zext i224 %r51 to i256
-%r53 = add i256 %r48, %r52
-%r54 = lshr i256 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i224 @mulPv192x32(i32* %r2, i32 %r57)
-%r59 = zext i224 %r58 to i256
-%r60 = add i256 %r54, %r59
-%r61 = trunc i256 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i224 @mulPv192x32(i32* %r4, i32 %r62)
-%r64 = zext i224 %r63 to i256
-%r65 = add i256 %r60, %r64
-%r66 = lshr i256 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i224 @mulPv192x32(i32* %r2, i32 %r69)
-%r71 = zext i224 %r70 to i256
-%r72 = add i256 %r66, %r71
-%r73 = trunc i256 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i224 @mulPv192x32(i32* %r4, i32 %r74)
-%r76 = zext i224 %r75 to i256
-%r77 = add i256 %r72, %r76
-%r78 = lshr i256 %r77, 32
-%r79 = trunc i256 %r78 to i224
-%r80 = load i32, i32* %r4
-%r81 = zext i32 %r80 to i64
-%r83 = getelementptr i32, i32* %r4, i32 1
-%r84 = load i32, i32* %r83
-%r85 = zext i32 %r84 to i64
-%r86 = shl i64 %r85, 32
-%r87 = or i64 %r81, %r86
-%r88 = zext i64 %r87 to i96
-%r90 = getelementptr i32, i32* %r4, i32 2
-%r91 = load i32, i32* %r90
-%r92 = zext i32 %r91 to i96
-%r93 = shl i96 %r92, 64
-%r94 = or i96 %r88, %r93
-%r95 = zext i96 %r94 to i128
-%r97 = getelementptr i32, i32* %r4, i32 3
-%r98 = load i32, i32* %r97
-%r99 = zext i32 %r98 to i128
-%r100 = shl i128 %r99, 96
-%r101 = or i128 %r95, %r100
-%r102 = zext i128 %r101 to i160
-%r104 = getelementptr i32, i32* %r4, i32 4
-%r105 = load i32, i32* %r104
-%r106 = zext i32 %r105 to i160
-%r107 = shl i160 %r106, 128
-%r108 = or i160 %r102, %r107
-%r109 = zext i160 %r108 to i192
-%r111 = getelementptr i32, i32* %r4, i32 5
-%r112 = load i32, i32* %r111
-%r113 = zext i32 %r112 to i192
-%r114 = shl i192 %r113, 160
-%r115 = or i192 %r109, %r114
-%r116 = zext i192 %r115 to i224
-%r117 = sub i224 %r79, %r116
-%r118 = lshr i224 %r117, 192
-%r119 = trunc i224 %r118 to i1
-%r120 = select i1 %r119, i224 %r79, i224 %r117
-%r121 = trunc i224 %r120 to i192
-%r122 = trunc i192 %r121 to i32
-%r124 = getelementptr i32, i32* %r1, i32 0
-store i32 %r122, i32* %r124
-%r125 = lshr i192 %r121, 32
-%r126 = trunc i192 %r125 to i32
-%r128 = getelementptr i32, i32* %r1, i32 1
-store i32 %r126, i32* %r128
-%r129 = lshr i192 %r125, 32
-%r130 = trunc i192 %r129 to i32
-%r132 = getelementptr i32, i32* %r1, i32 2
-store i32 %r130, i32* %r132
-%r133 = lshr i192 %r129, 32
-%r134 = trunc i192 %r133 to i32
-%r136 = getelementptr i32, i32* %r1, i32 3
-store i32 %r134, i32* %r136
-%r137 = lshr i192 %r133, 32
-%r138 = trunc i192 %r137 to i32
-%r140 = getelementptr i32, i32* %r1, i32 4
-store i32 %r138, i32* %r140
-%r141 = lshr i192 %r137, 32
-%r142 = trunc i192 %r141 to i32
-%r144 = getelementptr i32, i32* %r1, i32 5
-store i32 %r142, i32* %r144
-ret void
-}
-define void @mcl_fp_montNF6L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i224 @mulPv192x32(i32* %r2, i32 %r8)
-%r10 = trunc i224 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i224 @mulPv192x32(i32* %r4, i32 %r11)
-%r13 = add i224 %r9, %r12
-%r14 = lshr i224 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i224 @mulPv192x32(i32* %r2, i32 %r17)
-%r19 = add i224 %r14, %r18
-%r20 = trunc i224 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i224 @mulPv192x32(i32* %r4, i32 %r21)
-%r23 = add i224 %r19, %r22
-%r24 = lshr i224 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i224 @mulPv192x32(i32* %r2, i32 %r27)
-%r29 = add i224 %r24, %r28
-%r30 = trunc i224 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i224 @mulPv192x32(i32* %r4, i32 %r31)
-%r33 = add i224 %r29, %r32
-%r34 = lshr i224 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i224 @mulPv192x32(i32* %r2, i32 %r37)
-%r39 = add i224 %r34, %r38
-%r40 = trunc i224 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i224 @mulPv192x32(i32* %r4, i32 %r41)
-%r43 = add i224 %r39, %r42
-%r44 = lshr i224 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i224 @mulPv192x32(i32* %r2, i32 %r47)
-%r49 = add i224 %r44, %r48
-%r50 = trunc i224 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i224 @mulPv192x32(i32* %r4, i32 %r51)
-%r53 = add i224 %r49, %r52
-%r54 = lshr i224 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i224 @mulPv192x32(i32* %r2, i32 %r57)
-%r59 = add i224 %r54, %r58
-%r60 = trunc i224 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i224 @mulPv192x32(i32* %r4, i32 %r61)
-%r63 = add i224 %r59, %r62
-%r64 = lshr i224 %r63, 32
-%r65 = trunc i224 %r64 to i192
-%r66 = load i32, i32* %r4
-%r67 = zext i32 %r66 to i64
-%r69 = getelementptr i32, i32* %r4, i32 1
-%r70 = load i32, i32* %r69
-%r71 = zext i32 %r70 to i64
-%r72 = shl i64 %r71, 32
-%r73 = or i64 %r67, %r72
-%r74 = zext i64 %r73 to i96
-%r76 = getelementptr i32, i32* %r4, i32 2
-%r77 = load i32, i32* %r76
-%r78 = zext i32 %r77 to i96
-%r79 = shl i96 %r78, 64
-%r80 = or i96 %r74, %r79
-%r81 = zext i96 %r80 to i128
-%r83 = getelementptr i32, i32* %r4, i32 3
-%r84 = load i32, i32* %r83
-%r85 = zext i32 %r84 to i128
-%r86 = shl i128 %r85, 96
-%r87 = or i128 %r81, %r86
-%r88 = zext i128 %r87 to i160
-%r90 = getelementptr i32, i32* %r4, i32 4
-%r91 = load i32, i32* %r90
-%r92 = zext i32 %r91 to i160
-%r93 = shl i160 %r92, 128
-%r94 = or i160 %r88, %r93
-%r95 = zext i160 %r94 to i192
-%r97 = getelementptr i32, i32* %r4, i32 5
-%r98 = load i32, i32* %r97
-%r99 = zext i32 %r98 to i192
-%r100 = shl i192 %r99, 160
-%r101 = or i192 %r95, %r100
-%r102 = sub i192 %r65, %r101
-%r103 = lshr i192 %r102, 191
-%r104 = trunc i192 %r103 to i1
-%r105 = select i1 %r104, i192 %r65, i192 %r102
-%r106 = trunc i192 %r105 to i32
-%r108 = getelementptr i32, i32* %r1, i32 0
-store i32 %r106, i32* %r108
-%r109 = lshr i192 %r105, 32
-%r110 = trunc i192 %r109 to i32
-%r112 = getelementptr i32, i32* %r1, i32 1
-store i32 %r110, i32* %r112
-%r113 = lshr i192 %r109, 32
-%r114 = trunc i192 %r113 to i32
-%r116 = getelementptr i32, i32* %r1, i32 2
-store i32 %r114, i32* %r116
-%r117 = lshr i192 %r113, 32
-%r118 = trunc i192 %r117 to i32
-%r120 = getelementptr i32, i32* %r1, i32 3
-store i32 %r118, i32* %r120
-%r121 = lshr i192 %r117, 32
-%r122 = trunc i192 %r121 to i32
-%r124 = getelementptr i32, i32* %r1, i32 4
-store i32 %r122, i32* %r124
-%r125 = lshr i192 %r121, 32
-%r126 = trunc i192 %r125 to i32
-%r128 = getelementptr i32, i32* %r1, i32 5
-store i32 %r126, i32* %r128
-ret void
-}
-define void @mcl_fp_montRed6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = load i32, i32* %r2
-%r44 = zext i32 %r43 to i64
-%r46 = getelementptr i32, i32* %r2, i32 1
-%r47 = load i32, i32* %r46
-%r48 = zext i32 %r47 to i64
-%r49 = shl i64 %r48, 32
-%r50 = or i64 %r44, %r49
-%r51 = zext i64 %r50 to i96
-%r53 = getelementptr i32, i32* %r2, i32 2
-%r54 = load i32, i32* %r53
-%r55 = zext i32 %r54 to i96
-%r56 = shl i96 %r55, 64
-%r57 = or i96 %r51, %r56
-%r58 = zext i96 %r57 to i128
-%r60 = getelementptr i32, i32* %r2, i32 3
-%r61 = load i32, i32* %r60
-%r62 = zext i32 %r61 to i128
-%r63 = shl i128 %r62, 96
-%r64 = or i128 %r58, %r63
-%r65 = zext i128 %r64 to i160
-%r67 = getelementptr i32, i32* %r2, i32 4
-%r68 = load i32, i32* %r67
-%r69 = zext i32 %r68 to i160
-%r70 = shl i160 %r69, 128
-%r71 = or i160 %r65, %r70
-%r72 = zext i160 %r71 to i192
-%r74 = getelementptr i32, i32* %r2, i32 5
-%r75 = load i32, i32* %r74
-%r76 = zext i32 %r75 to i192
-%r77 = shl i192 %r76, 160
-%r78 = or i192 %r72, %r77
-%r79 = zext i192 %r78 to i224
-%r81 = getelementptr i32, i32* %r2, i32 6
-%r82 = load i32, i32* %r81
-%r83 = zext i32 %r82 to i224
-%r84 = shl i224 %r83, 192
-%r85 = or i224 %r79, %r84
-%r86 = zext i224 %r85 to i256
-%r88 = getelementptr i32, i32* %r2, i32 7
-%r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i256
-%r91 = shl i256 %r90, 224
-%r92 = or i256 %r86, %r91
-%r93 = zext i256 %r92 to i288
-%r95 = getelementptr i32, i32* %r2, i32 8
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i288
-%r98 = shl i288 %r97, 256
-%r99 = or i288 %r93, %r98
-%r100 = zext i288 %r99 to i320
-%r102 = getelementptr i32, i32* %r2, i32 9
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i320
-%r105 = shl i320 %r104, 288
-%r106 = or i320 %r100, %r105
-%r107 = zext i320 %r106 to i352
-%r109 = getelementptr i32, i32* %r2, i32 10
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i352
-%r112 = shl i352 %r111, 320
-%r113 = or i352 %r107, %r112
-%r114 = zext i352 %r113 to i384
-%r116 = getelementptr i32, i32* %r2, i32 11
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i384
-%r119 = shl i384 %r118, 352
-%r120 = or i384 %r114, %r119
-%r121 = zext i384 %r120 to i416
-%r122 = trunc i416 %r121 to i32
-%r123 = mul i32 %r122, %r6
-%r124 = call i224 @mulPv192x32(i32* %r3, i32 %r123)
-%r125 = zext i224 %r124 to i416
-%r126 = add i416 %r121, %r125
-%r127 = lshr i416 %r126, 32
-%r128 = trunc i416 %r127 to i384
-%r129 = trunc i384 %r128 to i32
-%r130 = mul i32 %r129, %r6
-%r131 = call i224 @mulPv192x32(i32* %r3, i32 %r130)
-%r132 = zext i224 %r131 to i384
-%r133 = add i384 %r128, %r132
-%r134 = lshr i384 %r133, 32
-%r135 = trunc i384 %r134 to i352
-%r136 = trunc i352 %r135 to i32
-%r137 = mul i32 %r136, %r6
-%r138 = call i224 @mulPv192x32(i32* %r3, i32 %r137)
-%r139 = zext i224 %r138 to i352
-%r140 = add i352 %r135, %r139
-%r141 = lshr i352 %r140, 32
-%r142 = trunc i352 %r141 to i320
-%r143 = trunc i320 %r142 to i32
-%r144 = mul i32 %r143, %r6
-%r145 = call i224 @mulPv192x32(i32* %r3, i32 %r144)
-%r146 = zext i224 %r145 to i320
-%r147 = add i320 %r142, %r146
-%r148 = lshr i320 %r147, 32
-%r149 = trunc i320 %r148 to i288
-%r150 = trunc i288 %r149 to i32
-%r151 = mul i32 %r150, %r6
-%r152 = call i224 @mulPv192x32(i32* %r3, i32 %r151)
-%r153 = zext i224 %r152 to i288
-%r154 = add i288 %r149, %r153
-%r155 = lshr i288 %r154, 32
-%r156 = trunc i288 %r155 to i256
-%r157 = trunc i256 %r156 to i32
-%r158 = mul i32 %r157, %r6
-%r159 = call i224 @mulPv192x32(i32* %r3, i32 %r158)
-%r160 = zext i224 %r159 to i256
-%r161 = add i256 %r156, %r160
-%r162 = lshr i256 %r161, 32
-%r163 = trunc i256 %r162 to i224
-%r164 = zext i192 %r42 to i224
-%r165 = sub i224 %r163, %r164
-%r166 = lshr i224 %r165, 192
-%r167 = trunc i224 %r166 to i1
-%r168 = select i1 %r167, i224 %r163, i224 %r165
-%r169 = trunc i224 %r168 to i192
-%r170 = trunc i192 %r169 to i32
-%r172 = getelementptr i32, i32* %r1, i32 0
-store i32 %r170, i32* %r172
-%r173 = lshr i192 %r169, 32
-%r174 = trunc i192 %r173 to i32
-%r176 = getelementptr i32, i32* %r1, i32 1
-store i32 %r174, i32* %r176
-%r177 = lshr i192 %r173, 32
-%r178 = trunc i192 %r177 to i32
-%r180 = getelementptr i32, i32* %r1, i32 2
-store i32 %r178, i32* %r180
-%r181 = lshr i192 %r177, 32
-%r182 = trunc i192 %r181 to i32
-%r184 = getelementptr i32, i32* %r1, i32 3
-store i32 %r182, i32* %r184
-%r185 = lshr i192 %r181, 32
-%r186 = trunc i192 %r185 to i32
-%r188 = getelementptr i32, i32* %r1, i32 4
-store i32 %r186, i32* %r188
-%r189 = lshr i192 %r185, 32
-%r190 = trunc i192 %r189 to i32
-%r192 = getelementptr i32, i32* %r1, i32 5
-store i32 %r190, i32* %r192
-ret void
-}
-define i32 @mcl_fp_addPre6L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
+%r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
+%r8 = getelementptr i32, i32* %r2, i32 1
 %r9 = load i32, i32* %r8
 %r10 = zext i32 %r9 to i64
 %r11 = shl i64 %r10, 32
 %r12 = or i64 %r6, %r11
 %r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
+%r15 = getelementptr i32, i32* %r2, i32 2
 %r16 = load i32, i32* %r15
 %r17 = zext i32 %r16 to i96
 %r18 = shl i96 %r17, 64
 %r19 = or i96 %r13, %r18
 %r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
+%r22 = getelementptr i32, i32* %r2, i32 3
 %r23 = load i32, i32* %r22
 %r24 = zext i32 %r23 to i128
 %r25 = shl i128 %r24, 96
 %r26 = or i128 %r20, %r25
 %r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
+%r29 = getelementptr i32, i32* %r2, i32 4
 %r30 = load i32, i32* %r29
 %r31 = zext i32 %r30 to i160
 %r32 = shl i160 %r31, 128
 %r33 = or i160 %r27, %r32
 %r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
+%r36 = getelementptr i32, i32* %r2, i32 5
 %r37 = load i32, i32* %r36
 %r38 = zext i32 %r37 to i192
 %r39 = shl i192 %r38, 160
 %r40 = or i192 %r34, %r39
 %r41 = zext i192 %r40 to i224
-%r42 = load i32, i32* %r4
-%r43 = zext i32 %r42 to i64
-%r45 = getelementptr i32, i32* %r4, i32 1
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i64
-%r48 = shl i64 %r47, 32
-%r49 = or i64 %r43, %r48
-%r50 = zext i64 %r49 to i96
-%r52 = getelementptr i32, i32* %r4, i32 2
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i96
-%r55 = shl i96 %r54, 64
-%r56 = or i96 %r50, %r55
-%r57 = zext i96 %r56 to i128
-%r59 = getelementptr i32, i32* %r4, i32 3
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i128
-%r62 = shl i128 %r61, 96
-%r63 = or i128 %r57, %r62
-%r64 = zext i128 %r63 to i160
-%r66 = getelementptr i32, i32* %r4, i32 4
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i160
-%r69 = shl i160 %r68, 128
-%r70 = or i160 %r64, %r69
-%r71 = zext i160 %r70 to i192
-%r73 = getelementptr i32, i32* %r4, i32 5
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i192
-%r76 = shl i192 %r75, 160
-%r77 = or i192 %r71, %r76
-%r78 = zext i192 %r77 to i224
-%r79 = add i224 %r41, %r78
-%r80 = trunc i224 %r79 to i192
-%r81 = trunc i192 %r80 to i32
-%r83 = getelementptr i32, i32* %r2, i32 0
-store i32 %r81, i32* %r83
-%r84 = lshr i192 %r80, 32
-%r85 = trunc i192 %r84 to i32
-%r87 = getelementptr i32, i32* %r2, i32 1
-store i32 %r85, i32* %r87
-%r88 = lshr i192 %r84, 32
-%r89 = trunc i192 %r88 to i32
-%r91 = getelementptr i32, i32* %r2, i32 2
-store i32 %r89, i32* %r91
-%r92 = lshr i192 %r88, 32
-%r93 = trunc i192 %r92 to i32
-%r95 = getelementptr i32, i32* %r2, i32 3
-store i32 %r93, i32* %r95
-%r96 = lshr i192 %r92, 32
-%r97 = trunc i192 %r96 to i32
-%r99 = getelementptr i32, i32* %r2, i32 4
-store i32 %r97, i32* %r99
-%r100 = lshr i192 %r96, 32
-%r101 = trunc i192 %r100 to i32
-%r103 = getelementptr i32, i32* %r2, i32 5
-store i32 %r101, i32* %r103
-%r104 = lshr i224 %r79, 192
-%r105 = trunc i224 %r104 to i32
-ret i32 %r105
-}
-define i32 @mcl_fp_subPre6L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r42 = load i32, i32* %r4
-%r43 = zext i32 %r42 to i64
-%r45 = getelementptr i32, i32* %r4, i32 1
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i64
-%r48 = shl i64 %r47, 32
-%r49 = or i64 %r43, %r48
-%r50 = zext i64 %r49 to i96
-%r52 = getelementptr i32, i32* %r4, i32 2
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i96
-%r55 = shl i96 %r54, 64
-%r56 = or i96 %r50, %r55
-%r57 = zext i96 %r56 to i128
-%r59 = getelementptr i32, i32* %r4, i32 3
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i128
-%r62 = shl i128 %r61, 96
-%r63 = or i128 %r57, %r62
-%r64 = zext i128 %r63 to i160
-%r66 = getelementptr i32, i32* %r4, i32 4
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i160
-%r69 = shl i160 %r68, 128
-%r70 = or i160 %r64, %r69
-%r71 = zext i160 %r70 to i192
-%r73 = getelementptr i32, i32* %r4, i32 5
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i192
-%r76 = shl i192 %r75, 160
-%r77 = or i192 %r71, %r76
-%r78 = zext i192 %r77 to i224
-%r79 = sub i224 %r41, %r78
-%r80 = trunc i224 %r79 to i192
-%r81 = trunc i192 %r80 to i32
-%r83 = getelementptr i32, i32* %r2, i32 0
-store i32 %r81, i32* %r83
-%r84 = lshr i192 %r80, 32
-%r85 = trunc i192 %r84 to i32
-%r87 = getelementptr i32, i32* %r2, i32 1
-store i32 %r85, i32* %r87
-%r88 = lshr i192 %r84, 32
-%r89 = trunc i192 %r88 to i32
-%r91 = getelementptr i32, i32* %r2, i32 2
-store i32 %r89, i32* %r91
-%r92 = lshr i192 %r88, 32
-%r93 = trunc i192 %r92 to i32
-%r95 = getelementptr i32, i32* %r2, i32 3
-store i32 %r93, i32* %r95
-%r96 = lshr i192 %r92, 32
-%r97 = trunc i192 %r96 to i32
-%r99 = getelementptr i32, i32* %r2, i32 4
-store i32 %r97, i32* %r99
-%r100 = lshr i192 %r96, 32
-%r101 = trunc i192 %r100 to i32
-%r103 = getelementptr i32, i32* %r2, i32 5
-store i32 %r101, i32* %r103
-%r104 = lshr i224 %r79, 192
-%r105 = trunc i224 %r104 to i32
-%r107 = and i32 %r105, 1
-ret i32 %r107
-}
-define void @mcl_fp_shr1_6L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = zext i160 %r31 to i192
-%r34 = getelementptr i32, i32* %r2, i32 5
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i192
-%r37 = shl i192 %r36, 160
-%r38 = or i192 %r32, %r37
-%r39 = lshr i192 %r38, 1
-%r40 = trunc i192 %r39 to i32
-%r42 = getelementptr i32, i32* %r1, i32 0
-store i32 %r40, i32* %r42
-%r43 = lshr i192 %r39, 32
-%r44 = trunc i192 %r43 to i32
-%r46 = getelementptr i32, i32* %r1, i32 1
-store i32 %r44, i32* %r46
-%r47 = lshr i192 %r43, 32
-%r48 = trunc i192 %r47 to i32
-%r50 = getelementptr i32, i32* %r1, i32 2
-store i32 %r48, i32* %r50
-%r51 = lshr i192 %r47, 32
-%r52 = trunc i192 %r51 to i32
-%r54 = getelementptr i32, i32* %r1, i32 3
-store i32 %r52, i32* %r54
-%r55 = lshr i192 %r51, 32
-%r56 = trunc i192 %r55 to i32
-%r58 = getelementptr i32, i32* %r1, i32 4
-store i32 %r56, i32* %r58
-%r59 = lshr i192 %r55, 32
-%r60 = trunc i192 %r59 to i32
-%r62 = getelementptr i32, i32* %r1, i32 5
-store i32 %r60, i32* %r62
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = load i32, i32* %r3
+%r49 = zext i32 %r48 to i64
+%r51 = getelementptr i32, i32* %r3, i32 1
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i64
+%r54 = shl i64 %r53, 32
+%r55 = or i64 %r49, %r54
+%r56 = zext i64 %r55 to i96
+%r58 = getelementptr i32, i32* %r3, i32 2
+%r59 = load i32, i32* %r58
+%r60 = zext i32 %r59 to i96
+%r61 = shl i96 %r60, 64
+%r62 = or i96 %r56, %r61
+%r63 = zext i96 %r62 to i128
+%r65 = getelementptr i32, i32* %r3, i32 3
+%r66 = load i32, i32* %r65
+%r67 = zext i32 %r66 to i128
+%r68 = shl i128 %r67, 96
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i160
+%r72 = getelementptr i32, i32* %r3, i32 4
+%r73 = load i32, i32* %r72
+%r74 = zext i32 %r73 to i160
+%r75 = shl i160 %r74, 128
+%r76 = or i160 %r70, %r75
+%r77 = zext i160 %r76 to i192
+%r79 = getelementptr i32, i32* %r3, i32 5
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r77, %r82
+%r84 = zext i192 %r83 to i224
+%r86 = getelementptr i32, i32* %r3, i32 6
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i224
+%r89 = shl i224 %r88, 192
+%r90 = or i224 %r84, %r89
+%r91 = zext i224 %r47 to i256
+%r92 = zext i224 %r90 to i256
+%r93 = sub i256 %r91, %r92
+%r94 = trunc i256 %r93 to i224
+%r95 = lshr i256 %r93, 224
+%r96 = trunc i256 %r95 to i1
+%r98 = getelementptr i32, i32* %r1, i32 0
+%r99 = trunc i224 %r94 to i32
+store i32 %r99, i32* %r98
+%r100 = lshr i224 %r94, 32
+%r102 = getelementptr i32, i32* %r1, i32 1
+%r103 = trunc i224 %r100 to i32
+store i32 %r103, i32* %r102
+%r104 = lshr i224 %r100, 32
+%r106 = getelementptr i32, i32* %r1, i32 2
+%r107 = trunc i224 %r104 to i32
+store i32 %r107, i32* %r106
+%r108 = lshr i224 %r104, 32
+%r110 = getelementptr i32, i32* %r1, i32 3
+%r111 = trunc i224 %r108 to i32
+store i32 %r111, i32* %r110
+%r112 = lshr i224 %r108, 32
+%r114 = getelementptr i32, i32* %r1, i32 4
+%r115 = trunc i224 %r112 to i32
+store i32 %r115, i32* %r114
+%r116 = lshr i224 %r112, 32
+%r118 = getelementptr i32, i32* %r1, i32 5
+%r119 = trunc i224 %r116 to i32
+store i32 %r119, i32* %r118
+%r120 = lshr i224 %r116, 32
+%r122 = getelementptr i32, i32* %r1, i32 6
+%r123 = trunc i224 %r120 to i32
+store i32 %r123, i32* %r122
+br i1%r96, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r124 = load i32, i32* %r4
+%r125 = zext i32 %r124 to i64
+%r127 = getelementptr i32, i32* %r4, i32 1
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i64
+%r130 = shl i64 %r129, 32
+%r131 = or i64 %r125, %r130
+%r132 = zext i64 %r131 to i96
+%r134 = getelementptr i32, i32* %r4, i32 2
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i96
+%r137 = shl i96 %r136, 64
+%r138 = or i96 %r132, %r137
+%r139 = zext i96 %r138 to i128
+%r141 = getelementptr i32, i32* %r4, i32 3
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i128
+%r144 = shl i128 %r143, 96
+%r145 = or i128 %r139, %r144
+%r146 = zext i128 %r145 to i160
+%r148 = getelementptr i32, i32* %r4, i32 4
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i160
+%r151 = shl i160 %r150, 128
+%r152 = or i160 %r146, %r151
+%r153 = zext i160 %r152 to i192
+%r155 = getelementptr i32, i32* %r4, i32 5
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i192
+%r158 = shl i192 %r157, 160
+%r159 = or i192 %r153, %r158
+%r160 = zext i192 %r159 to i224
+%r162 = getelementptr i32, i32* %r4, i32 6
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i224
+%r165 = shl i224 %r164, 192
+%r166 = or i224 %r160, %r165
+%r167 = add i224 %r94, %r166
+%r169 = getelementptr i32, i32* %r1, i32 0
+%r170 = trunc i224 %r167 to i32
+store i32 %r170, i32* %r169
+%r171 = lshr i224 %r167, 32
+%r173 = getelementptr i32, i32* %r1, i32 1
+%r174 = trunc i224 %r171 to i32
+store i32 %r174, i32* %r173
+%r175 = lshr i224 %r171, 32
+%r177 = getelementptr i32, i32* %r1, i32 2
+%r178 = trunc i224 %r175 to i32
+store i32 %r178, i32* %r177
+%r179 = lshr i224 %r175, 32
+%r181 = getelementptr i32, i32* %r1, i32 3
+%r182 = trunc i224 %r179 to i32
+store i32 %r182, i32* %r181
+%r183 = lshr i224 %r179, 32
+%r185 = getelementptr i32, i32* %r1, i32 4
+%r186 = trunc i224 %r183 to i32
+store i32 %r186, i32* %r185
+%r187 = lshr i224 %r183, 32
+%r189 = getelementptr i32, i32* %r1, i32 5
+%r190 = trunc i224 %r187 to i32
+store i32 %r190, i32* %r189
+%r191 = lshr i224 %r187, 32
+%r193 = getelementptr i32, i32* %r1, i32 6
+%r194 = trunc i224 %r191 to i32
+store i32 %r194, i32* %r193
 ret void
 }
-define void @mcl_fp_add6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_subNF7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -6287,130 +4615,121 @@ define void @mcl_fp_add6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r
 %r38 = zext i32 %r37 to i192
 %r39 = shl i192 %r38, 160
 %r40 = or i192 %r34, %r39
-%r41 = load i32, i32* %r3
-%r42 = zext i32 %r41 to i64
-%r44 = getelementptr i32, i32* %r3, i32 1
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i64
-%r47 = shl i64 %r46, 32
-%r48 = or i64 %r42, %r47
-%r49 = zext i64 %r48 to i96
-%r51 = getelementptr i32, i32* %r3, i32 2
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i96
-%r54 = shl i96 %r53, 64
-%r55 = or i96 %r49, %r54
-%r56 = zext i96 %r55 to i128
-%r58 = getelementptr i32, i32* %r3, i32 3
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = load i32, i32* %r3
+%r49 = zext i32 %r48 to i64
+%r51 = getelementptr i32, i32* %r3, i32 1
+%r52 = load i32, i32* %r51
+%r53 = zext i32 %r52 to i64
+%r54 = shl i64 %r53, 32
+%r55 = or i64 %r49, %r54
+%r56 = zext i64 %r55 to i96
+%r58 = getelementptr i32, i32* %r3, i32 2
 %r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i128
-%r61 = shl i128 %r60, 96
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i160
-%r65 = getelementptr i32, i32* %r3, i32 4
+%r60 = zext i32 %r59 to i96
+%r61 = shl i96 %r60, 64
+%r62 = or i96 %r56, %r61
+%r63 = zext i96 %r62 to i128
+%r65 = getelementptr i32, i32* %r3, i32 3
 %r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i160
-%r68 = shl i160 %r67, 128
-%r69 = or i160 %r63, %r68
-%r70 = zext i160 %r69 to i192
-%r72 = getelementptr i32, i32* %r3, i32 5
+%r67 = zext i32 %r66 to i128
+%r68 = shl i128 %r67, 96
+%r69 = or i128 %r63, %r68
+%r70 = zext i128 %r69 to i160
+%r72 = getelementptr i32, i32* %r3, i32 4
 %r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i192
-%r75 = shl i192 %r74, 160
-%r76 = or i192 %r70, %r75
-%r77 = zext i192 %r40 to i224
-%r78 = zext i192 %r76 to i224
-%r79 = add i224 %r77, %r78
-%r80 = trunc i224 %r79 to i192
-%r81 = trunc i192 %r80 to i32
-%r83 = getelementptr i32, i32* %r1, i32 0
-store i32 %r81, i32* %r83
-%r84 = lshr i192 %r80, 32
-%r85 = trunc i192 %r84 to i32
-%r87 = getelementptr i32, i32* %r1, i32 1
-store i32 %r85, i32* %r87
-%r88 = lshr i192 %r84, 32
-%r89 = trunc i192 %r88 to i32
-%r91 = getelementptr i32, i32* %r1, i32 2
-store i32 %r89, i32* %r91
-%r92 = lshr i192 %r88, 32
-%r93 = trunc i192 %r92 to i32
-%r95 = getelementptr i32, i32* %r1, i32 3
-store i32 %r93, i32* %r95
-%r96 = lshr i192 %r92, 32
-%r97 = trunc i192 %r96 to i32
-%r99 = getelementptr i32, i32* %r1, i32 4
-store i32 %r97, i32* %r99
-%r100 = lshr i192 %r96, 32
-%r101 = trunc i192 %r100 to i32
-%r103 = getelementptr i32, i32* %r1, i32 5
-store i32 %r101, i32* %r103
-%r104 = load i32, i32* %r4
-%r105 = zext i32 %r104 to i64
-%r107 = getelementptr i32, i32* %r4, i32 1
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i64
-%r110 = shl i64 %r109, 32
-%r111 = or i64 %r105, %r110
-%r112 = zext i64 %r111 to i96
-%r114 = getelementptr i32, i32* %r4, i32 2
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i96
-%r117 = shl i96 %r116, 64
-%r118 = or i96 %r112, %r117
-%r119 = zext i96 %r118 to i128
-%r121 = getelementptr i32, i32* %r4, i32 3
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i128
-%r124 = shl i128 %r123, 96
-%r125 = or i128 %r119, %r124
-%r126 = zext i128 %r125 to i160
-%r128 = getelementptr i32, i32* %r4, i32 4
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i160
-%r131 = shl i160 %r130, 128
-%r132 = or i160 %r126, %r131
-%r133 = zext i160 %r132 to i192
-%r135 = getelementptr i32, i32* %r4, i32 5
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i192
-%r138 = shl i192 %r137, 160
-%r139 = or i192 %r133, %r138
-%r140 = zext i192 %r139 to i224
-%r141 = sub i224 %r79, %r140
-%r142 = lshr i224 %r141, 192
-%r143 = trunc i224 %r142 to i1
-br i1%r143, label %carry, label %nocarry
-nocarry:
-%r144 = trunc i224 %r141 to i192
-%r145 = trunc i192 %r144 to i32
-%r147 = getelementptr i32, i32* %r1, i32 0
-store i32 %r145, i32* %r147
-%r148 = lshr i192 %r144, 32
-%r149 = trunc i192 %r148 to i32
-%r151 = getelementptr i32, i32* %r1, i32 1
-store i32 %r149, i32* %r151
-%r152 = lshr i192 %r148, 32
-%r153 = trunc i192 %r152 to i32
-%r155 = getelementptr i32, i32* %r1, i32 2
-store i32 %r153, i32* %r155
-%r156 = lshr i192 %r152, 32
-%r157 = trunc i192 %r156 to i32
-%r159 = getelementptr i32, i32* %r1, i32 3
-store i32 %r157, i32* %r159
-%r160 = lshr i192 %r156, 32
-%r161 = trunc i192 %r160 to i32
-%r163 = getelementptr i32, i32* %r1, i32 4
-store i32 %r161, i32* %r163
-%r164 = lshr i192 %r160, 32
-%r165 = trunc i192 %r164 to i32
-%r167 = getelementptr i32, i32* %r1, i32 5
-store i32 %r165, i32* %r167
-ret void
-carry:
+%r74 = zext i32 %r73 to i160
+%r75 = shl i160 %r74, 128
+%r76 = or i160 %r70, %r75
+%r77 = zext i160 %r76 to i192
+%r79 = getelementptr i32, i32* %r3, i32 5
+%r80 = load i32, i32* %r79
+%r81 = zext i32 %r80 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r77, %r82
+%r84 = zext i192 %r83 to i224
+%r86 = getelementptr i32, i32* %r3, i32 6
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i224
+%r89 = shl i224 %r88, 192
+%r90 = or i224 %r84, %r89
+%r91 = sub i224 %r47, %r90
+%r92 = lshr i224 %r91, 223
+%r93 = trunc i224 %r92 to i1
+%r94 = load i32, i32* %r4
+%r95 = zext i32 %r94 to i64
+%r97 = getelementptr i32, i32* %r4, i32 1
+%r98 = load i32, i32* %r97
+%r99 = zext i32 %r98 to i64
+%r100 = shl i64 %r99, 32
+%r101 = or i64 %r95, %r100
+%r102 = zext i64 %r101 to i96
+%r104 = getelementptr i32, i32* %r4, i32 2
+%r105 = load i32, i32* %r104
+%r106 = zext i32 %r105 to i96
+%r107 = shl i96 %r106, 64
+%r108 = or i96 %r102, %r107
+%r109 = zext i96 %r108 to i128
+%r111 = getelementptr i32, i32* %r4, i32 3
+%r112 = load i32, i32* %r111
+%r113 = zext i32 %r112 to i128
+%r114 = shl i128 %r113, 96
+%r115 = or i128 %r109, %r114
+%r116 = zext i128 %r115 to i160
+%r118 = getelementptr i32, i32* %r4, i32 4
+%r119 = load i32, i32* %r118
+%r120 = zext i32 %r119 to i160
+%r121 = shl i160 %r120, 128
+%r122 = or i160 %r116, %r121
+%r123 = zext i160 %r122 to i192
+%r125 = getelementptr i32, i32* %r4, i32 5
+%r126 = load i32, i32* %r125
+%r127 = zext i32 %r126 to i192
+%r128 = shl i192 %r127, 160
+%r129 = or i192 %r123, %r128
+%r130 = zext i192 %r129 to i224
+%r132 = getelementptr i32, i32* %r4, i32 6
+%r133 = load i32, i32* %r132
+%r134 = zext i32 %r133 to i224
+%r135 = shl i224 %r134, 192
+%r136 = or i224 %r130, %r135
+%r138 = select i1 %r93, i224 %r136, i224 0
+%r139 = add i224 %r91, %r138
+%r141 = getelementptr i32, i32* %r1, i32 0
+%r142 = trunc i224 %r139 to i32
+store i32 %r142, i32* %r141
+%r143 = lshr i224 %r139, 32
+%r145 = getelementptr i32, i32* %r1, i32 1
+%r146 = trunc i224 %r143 to i32
+store i32 %r146, i32* %r145
+%r147 = lshr i224 %r143, 32
+%r149 = getelementptr i32, i32* %r1, i32 2
+%r150 = trunc i224 %r147 to i32
+store i32 %r150, i32* %r149
+%r151 = lshr i224 %r147, 32
+%r153 = getelementptr i32, i32* %r1, i32 3
+%r154 = trunc i224 %r151 to i32
+store i32 %r154, i32* %r153
+%r155 = lshr i224 %r151, 32
+%r157 = getelementptr i32, i32* %r1, i32 4
+%r158 = trunc i224 %r155 to i32
+store i32 %r158, i32* %r157
+%r159 = lshr i224 %r155, 32
+%r161 = getelementptr i32, i32* %r1, i32 5
+%r162 = trunc i224 %r159 to i32
+store i32 %r162, i32* %r161
+%r163 = lshr i224 %r159, 32
+%r165 = getelementptr i32, i32* %r1, i32 6
+%r166 = trunc i224 %r163 to i32
+store i32 %r166, i32* %r165
 ret void
 }
-define void @mcl_fp_addNF6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fpDbl_add7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -6443,378 +4762,240 @@ define void @mcl_fp_addNF6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r38 = zext i32 %r37 to i192
 %r39 = shl i192 %r38, 160
 %r40 = or i192 %r34, %r39
-%r41 = load i32, i32* %r3
-%r42 = zext i32 %r41 to i64
-%r44 = getelementptr i32, i32* %r3, i32 1
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i64
-%r47 = shl i64 %r46, 32
-%r48 = or i64 %r42, %r47
-%r49 = zext i64 %r48 to i96
-%r51 = getelementptr i32, i32* %r3, i32 2
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i96
-%r54 = shl i96 %r53, 64
-%r55 = or i96 %r49, %r54
-%r56 = zext i96 %r55 to i128
-%r58 = getelementptr i32, i32* %r3, i32 3
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i128
-%r61 = shl i128 %r60, 96
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i160
-%r65 = getelementptr i32, i32* %r3, i32 4
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i160
-%r68 = shl i160 %r67, 128
-%r69 = or i160 %r63, %r68
-%r70 = zext i160 %r69 to i192
-%r72 = getelementptr i32, i32* %r3, i32 5
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i192
-%r75 = shl i192 %r74, 160
-%r76 = or i192 %r70, %r75
-%r77 = add i192 %r40, %r76
-%r78 = load i32, i32* %r4
-%r79 = zext i32 %r78 to i64
-%r81 = getelementptr i32, i32* %r4, i32 1
-%r82 = load i32, i32* %r81
-%r83 = zext i32 %r82 to i64
-%r84 = shl i64 %r83, 32
-%r85 = or i64 %r79, %r84
-%r86 = zext i64 %r85 to i96
-%r88 = getelementptr i32, i32* %r4, i32 2
-%r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i96
-%r91 = shl i96 %r90, 64
-%r92 = or i96 %r86, %r91
-%r93 = zext i96 %r92 to i128
-%r95 = getelementptr i32, i32* %r4, i32 3
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i128
-%r98 = shl i128 %r97, 96
-%r99 = or i128 %r93, %r98
-%r100 = zext i128 %r99 to i160
-%r102 = getelementptr i32, i32* %r4, i32 4
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i160
-%r105 = shl i160 %r104, 128
-%r106 = or i160 %r100, %r105
-%r107 = zext i160 %r106 to i192
-%r109 = getelementptr i32, i32* %r4, i32 5
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i192
-%r112 = shl i192 %r111, 160
-%r113 = or i192 %r107, %r112
-%r114 = sub i192 %r77, %r113
-%r115 = lshr i192 %r114, 191
-%r116 = trunc i192 %r115 to i1
-%r117 = select i1 %r116, i192 %r77, i192 %r114
-%r118 = trunc i192 %r117 to i32
-%r120 = getelementptr i32, i32* %r1, i32 0
-store i32 %r118, i32* %r120
-%r121 = lshr i192 %r117, 32
-%r122 = trunc i192 %r121 to i32
-%r124 = getelementptr i32, i32* %r1, i32 1
-store i32 %r122, i32* %r124
-%r125 = lshr i192 %r121, 32
-%r126 = trunc i192 %r125 to i32
-%r128 = getelementptr i32, i32* %r1, i32 2
-store i32 %r126, i32* %r128
-%r129 = lshr i192 %r125, 32
-%r130 = trunc i192 %r129 to i32
-%r132 = getelementptr i32, i32* %r1, i32 3
-store i32 %r130, i32* %r132
-%r133 = lshr i192 %r129, 32
-%r134 = trunc i192 %r133 to i32
-%r136 = getelementptr i32, i32* %r1, i32 4
-store i32 %r134, i32* %r136
-%r137 = lshr i192 %r133, 32
-%r138 = trunc i192 %r137 to i32
-%r140 = getelementptr i32, i32* %r1, i32 5
-store i32 %r138, i32* %r140
-ret void
-}
-define void @mcl_fp_sub6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = load i32, i32* %r3
-%r42 = zext i32 %r41 to i64
-%r44 = getelementptr i32, i32* %r3, i32 1
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i64
-%r47 = shl i64 %r46, 32
-%r48 = or i64 %r42, %r47
-%r49 = zext i64 %r48 to i96
-%r51 = getelementptr i32, i32* %r3, i32 2
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i96
-%r54 = shl i96 %r53, 64
-%r55 = or i96 %r49, %r54
-%r56 = zext i96 %r55 to i128
-%r58 = getelementptr i32, i32* %r3, i32 3
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i128
-%r61 = shl i128 %r60, 96
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i160
-%r65 = getelementptr i32, i32* %r3, i32 4
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i160
-%r68 = shl i160 %r67, 128
-%r69 = or i160 %r63, %r68
-%r70 = zext i160 %r69 to i192
-%r72 = getelementptr i32, i32* %r3, i32 5
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i192
-%r75 = shl i192 %r74, 160
-%r76 = or i192 %r70, %r75
-%r77 = zext i192 %r40 to i224
-%r78 = zext i192 %r76 to i224
-%r79 = sub i224 %r77, %r78
-%r80 = trunc i224 %r79 to i192
-%r81 = lshr i224 %r79, 192
-%r82 = trunc i224 %r81 to i1
-%r83 = trunc i192 %r80 to i32
-%r85 = getelementptr i32, i32* %r1, i32 0
-store i32 %r83, i32* %r85
-%r86 = lshr i192 %r80, 32
-%r87 = trunc i192 %r86 to i32
-%r89 = getelementptr i32, i32* %r1, i32 1
-store i32 %r87, i32* %r89
-%r90 = lshr i192 %r86, 32
-%r91 = trunc i192 %r90 to i32
-%r93 = getelementptr i32, i32* %r1, i32 2
-store i32 %r91, i32* %r93
-%r94 = lshr i192 %r90, 32
-%r95 = trunc i192 %r94 to i32
-%r97 = getelementptr i32, i32* %r1, i32 3
-store i32 %r95, i32* %r97
-%r98 = lshr i192 %r94, 32
-%r99 = trunc i192 %r98 to i32
-%r101 = getelementptr i32, i32* %r1, i32 4
-store i32 %r99, i32* %r101
-%r102 = lshr i192 %r98, 32
-%r103 = trunc i192 %r102 to i32
-%r105 = getelementptr i32, i32* %r1, i32 5
-store i32 %r103, i32* %r105
-br i1%r82, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r106 = load i32, i32* %r4
-%r107 = zext i32 %r106 to i64
-%r109 = getelementptr i32, i32* %r4, i32 1
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i64
-%r112 = shl i64 %r111, 32
-%r113 = or i64 %r107, %r112
-%r114 = zext i64 %r113 to i96
-%r116 = getelementptr i32, i32* %r4, i32 2
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i96
-%r119 = shl i96 %r118, 64
-%r120 = or i96 %r114, %r119
-%r121 = zext i96 %r120 to i128
-%r123 = getelementptr i32, i32* %r4, i32 3
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i128
-%r126 = shl i128 %r125, 96
-%r127 = or i128 %r121, %r126
-%r128 = zext i128 %r127 to i160
-%r130 = getelementptr i32, i32* %r4, i32 4
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i160
-%r133 = shl i160 %r132, 128
-%r134 = or i160 %r128, %r133
-%r135 = zext i160 %r134 to i192
-%r137 = getelementptr i32, i32* %r4, i32 5
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i192
-%r140 = shl i192 %r139, 160
-%r141 = or i192 %r135, %r140
-%r142 = add i192 %r80, %r141
-%r143 = trunc i192 %r142 to i32
-%r145 = getelementptr i32, i32* %r1, i32 0
-store i32 %r143, i32* %r145
-%r146 = lshr i192 %r142, 32
-%r147 = trunc i192 %r146 to i32
-%r149 = getelementptr i32, i32* %r1, i32 1
-store i32 %r147, i32* %r149
-%r150 = lshr i192 %r146, 32
-%r151 = trunc i192 %r150 to i32
-%r153 = getelementptr i32, i32* %r1, i32 2
-store i32 %r151, i32* %r153
-%r154 = lshr i192 %r150, 32
-%r155 = trunc i192 %r154 to i32
-%r157 = getelementptr i32, i32* %r1, i32 3
-store i32 %r155, i32* %r157
-%r158 = lshr i192 %r154, 32
-%r159 = trunc i192 %r158 to i32
-%r161 = getelementptr i32, i32* %r1, i32 4
-store i32 %r159, i32* %r161
-%r162 = lshr i192 %r158, 32
-%r163 = trunc i192 %r162 to i32
-%r165 = getelementptr i32, i32* %r1, i32 5
-store i32 %r163, i32* %r165
-ret void
-}
-define void @mcl_fp_subNF6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = load i32, i32* %r3
-%r42 = zext i32 %r41 to i64
-%r44 = getelementptr i32, i32* %r3, i32 1
-%r45 = load i32, i32* %r44
-%r46 = zext i32 %r45 to i64
-%r47 = shl i64 %r46, 32
-%r48 = or i64 %r42, %r47
-%r49 = zext i64 %r48 to i96
-%r51 = getelementptr i32, i32* %r3, i32 2
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i96
-%r54 = shl i96 %r53, 64
-%r55 = or i96 %r49, %r54
-%r56 = zext i96 %r55 to i128
-%r58 = getelementptr i32, i32* %r3, i32 3
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i128
-%r61 = shl i128 %r60, 96
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i160
-%r65 = getelementptr i32, i32* %r3, i32 4
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i160
-%r68 = shl i160 %r67, 128
-%r69 = or i160 %r63, %r68
-%r70 = zext i160 %r69 to i192
-%r72 = getelementptr i32, i32* %r3, i32 5
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i192
-%r75 = shl i192 %r74, 160
-%r76 = or i192 %r70, %r75
-%r77 = sub i192 %r40, %r76
-%r78 = lshr i192 %r77, 191
-%r79 = trunc i192 %r78 to i1
-%r80 = load i32, i32* %r4
-%r81 = zext i32 %r80 to i64
-%r83 = getelementptr i32, i32* %r4, i32 1
-%r84 = load i32, i32* %r83
-%r85 = zext i32 %r84 to i64
-%r86 = shl i64 %r85, 32
-%r87 = or i64 %r81, %r86
-%r88 = zext i64 %r87 to i96
-%r90 = getelementptr i32, i32* %r4, i32 2
-%r91 = load i32, i32* %r90
-%r92 = zext i32 %r91 to i96
-%r93 = shl i96 %r92, 64
-%r94 = or i96 %r88, %r93
-%r95 = zext i96 %r94 to i128
-%r97 = getelementptr i32, i32* %r4, i32 3
-%r98 = load i32, i32* %r97
-%r99 = zext i32 %r98 to i128
-%r100 = shl i128 %r99, 96
-%r101 = or i128 %r95, %r100
-%r102 = zext i128 %r101 to i160
-%r104 = getelementptr i32, i32* %r4, i32 4
-%r105 = load i32, i32* %r104
-%r106 = zext i32 %r105 to i160
-%r107 = shl i160 %r106, 128
-%r108 = or i160 %r102, %r107
-%r109 = zext i160 %r108 to i192
-%r111 = getelementptr i32, i32* %r4, i32 5
-%r112 = load i32, i32* %r111
-%r113 = zext i32 %r112 to i192
-%r114 = shl i192 %r113, 160
-%r115 = or i192 %r109, %r114
-%r117 = select i1 %r79, i192 %r115, i192 0
-%r118 = add i192 %r77, %r117
-%r119 = trunc i192 %r118 to i32
-%r121 = getelementptr i32, i32* %r1, i32 0
-store i32 %r119, i32* %r121
-%r122 = lshr i192 %r118, 32
-%r123 = trunc i192 %r122 to i32
-%r125 = getelementptr i32, i32* %r1, i32 1
-store i32 %r123, i32* %r125
-%r126 = lshr i192 %r122, 32
-%r127 = trunc i192 %r126 to i32
-%r129 = getelementptr i32, i32* %r1, i32 2
-store i32 %r127, i32* %r129
-%r130 = lshr i192 %r126, 32
-%r131 = trunc i192 %r130 to i32
-%r133 = getelementptr i32, i32* %r1, i32 3
-store i32 %r131, i32* %r133
-%r134 = lshr i192 %r130, 32
-%r135 = trunc i192 %r134 to i32
-%r137 = getelementptr i32, i32* %r1, i32 4
-store i32 %r135, i32* %r137
-%r138 = lshr i192 %r134, 32
-%r139 = trunc i192 %r138 to i32
-%r141 = getelementptr i32, i32* %r1, i32 5
-store i32 %r139, i32* %r141
-ret void
-}
-define void @mcl_fpDbl_add6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = load i32, i32* %r3
+%r98 = zext i32 %r97 to i64
+%r100 = getelementptr i32, i32* %r3, i32 1
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i64
+%r103 = shl i64 %r102, 32
+%r104 = or i64 %r98, %r103
+%r105 = zext i64 %r104 to i96
+%r107 = getelementptr i32, i32* %r3, i32 2
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i96
+%r110 = shl i96 %r109, 64
+%r111 = or i96 %r105, %r110
+%r112 = zext i96 %r111 to i128
+%r114 = getelementptr i32, i32* %r3, i32 3
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i128
+%r117 = shl i128 %r116, 96
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i160
+%r121 = getelementptr i32, i32* %r3, i32 4
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i160
+%r124 = shl i160 %r123, 128
+%r125 = or i160 %r119, %r124
+%r126 = zext i160 %r125 to i192
+%r128 = getelementptr i32, i32* %r3, i32 5
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i192
+%r131 = shl i192 %r130, 160
+%r132 = or i192 %r126, %r131
+%r133 = zext i192 %r132 to i224
+%r135 = getelementptr i32, i32* %r3, i32 6
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i224
+%r138 = shl i224 %r137, 192
+%r139 = or i224 %r133, %r138
+%r140 = zext i224 %r139 to i256
+%r142 = getelementptr i32, i32* %r3, i32 7
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i256
+%r145 = shl i256 %r144, 224
+%r146 = or i256 %r140, %r145
+%r147 = zext i256 %r146 to i288
+%r149 = getelementptr i32, i32* %r3, i32 8
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i288
+%r152 = shl i288 %r151, 256
+%r153 = or i288 %r147, %r152
+%r154 = zext i288 %r153 to i320
+%r156 = getelementptr i32, i32* %r3, i32 9
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i320
+%r159 = shl i320 %r158, 288
+%r160 = or i320 %r154, %r159
+%r161 = zext i320 %r160 to i352
+%r163 = getelementptr i32, i32* %r3, i32 10
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i352
+%r166 = shl i352 %r165, 320
+%r167 = or i352 %r161, %r166
+%r168 = zext i352 %r167 to i384
+%r170 = getelementptr i32, i32* %r3, i32 11
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i384
+%r173 = shl i384 %r172, 352
+%r174 = or i384 %r168, %r173
+%r175 = zext i384 %r174 to i416
+%r177 = getelementptr i32, i32* %r3, i32 12
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i416
+%r180 = shl i416 %r179, 384
+%r181 = or i416 %r175, %r180
+%r182 = zext i416 %r181 to i448
+%r184 = getelementptr i32, i32* %r3, i32 13
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i448
+%r187 = shl i448 %r186, 416
+%r188 = or i448 %r182, %r187
+%r189 = zext i448 %r96 to i480
+%r190 = zext i448 %r188 to i480
+%r191 = add i480 %r189, %r190
+%r192 = trunc i480 %r191 to i224
+%r194 = getelementptr i32, i32* %r1, i32 0
+%r195 = trunc i224 %r192 to i32
+store i32 %r195, i32* %r194
+%r196 = lshr i224 %r192, 32
+%r198 = getelementptr i32, i32* %r1, i32 1
+%r199 = trunc i224 %r196 to i32
+store i32 %r199, i32* %r198
+%r200 = lshr i224 %r196, 32
+%r202 = getelementptr i32, i32* %r1, i32 2
+%r203 = trunc i224 %r200 to i32
+store i32 %r203, i32* %r202
+%r204 = lshr i224 %r200, 32
+%r206 = getelementptr i32, i32* %r1, i32 3
+%r207 = trunc i224 %r204 to i32
+store i32 %r207, i32* %r206
+%r208 = lshr i224 %r204, 32
+%r210 = getelementptr i32, i32* %r1, i32 4
+%r211 = trunc i224 %r208 to i32
+store i32 %r211, i32* %r210
+%r212 = lshr i224 %r208, 32
+%r214 = getelementptr i32, i32* %r1, i32 5
+%r215 = trunc i224 %r212 to i32
+store i32 %r215, i32* %r214
+%r216 = lshr i224 %r212, 32
+%r218 = getelementptr i32, i32* %r1, i32 6
+%r219 = trunc i224 %r216 to i32
+store i32 %r219, i32* %r218
+%r220 = lshr i480 %r191, 224
+%r221 = trunc i480 %r220 to i256
+%r222 = load i32, i32* %r4
+%r223 = zext i32 %r222 to i64
+%r225 = getelementptr i32, i32* %r4, i32 1
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i64
+%r228 = shl i64 %r227, 32
+%r229 = or i64 %r223, %r228
+%r230 = zext i64 %r229 to i96
+%r232 = getelementptr i32, i32* %r4, i32 2
+%r233 = load i32, i32* %r232
+%r234 = zext i32 %r233 to i96
+%r235 = shl i96 %r234, 64
+%r236 = or i96 %r230, %r235
+%r237 = zext i96 %r236 to i128
+%r239 = getelementptr i32, i32* %r4, i32 3
+%r240 = load i32, i32* %r239
+%r241 = zext i32 %r240 to i128
+%r242 = shl i128 %r241, 96
+%r243 = or i128 %r237, %r242
+%r244 = zext i128 %r243 to i160
+%r246 = getelementptr i32, i32* %r4, i32 4
+%r247 = load i32, i32* %r246
+%r248 = zext i32 %r247 to i160
+%r249 = shl i160 %r248, 128
+%r250 = or i160 %r244, %r249
+%r251 = zext i160 %r250 to i192
+%r253 = getelementptr i32, i32* %r4, i32 5
+%r254 = load i32, i32* %r253
+%r255 = zext i32 %r254 to i192
+%r256 = shl i192 %r255, 160
+%r257 = or i192 %r251, %r256
+%r258 = zext i192 %r257 to i224
+%r260 = getelementptr i32, i32* %r4, i32 6
+%r261 = load i32, i32* %r260
+%r262 = zext i32 %r261 to i224
+%r263 = shl i224 %r262, 192
+%r264 = or i224 %r258, %r263
+%r265 = zext i224 %r264 to i256
+%r266 = sub i256 %r221, %r265
+%r267 = lshr i256 %r266, 224
+%r268 = trunc i256 %r267 to i1
+%r269 = select i1 %r268, i256 %r221, i256 %r266
+%r270 = trunc i256 %r269 to i224
+%r272 = getelementptr i32, i32* %r1, i32 7
+%r274 = getelementptr i32, i32* %r272, i32 0
+%r275 = trunc i224 %r270 to i32
+store i32 %r275, i32* %r274
+%r276 = lshr i224 %r270, 32
+%r278 = getelementptr i32, i32* %r272, i32 1
+%r279 = trunc i224 %r276 to i32
+store i32 %r279, i32* %r278
+%r280 = lshr i224 %r276, 32
+%r282 = getelementptr i32, i32* %r272, i32 2
+%r283 = trunc i224 %r280 to i32
+store i32 %r283, i32* %r282
+%r284 = lshr i224 %r280, 32
+%r286 = getelementptr i32, i32* %r272, i32 3
+%r287 = trunc i224 %r284 to i32
+store i32 %r287, i32* %r286
+%r288 = lshr i224 %r284, 32
+%r290 = getelementptr i32, i32* %r272, i32 4
+%r291 = trunc i224 %r288 to i32
+store i32 %r291, i32* %r290
+%r292 = lshr i224 %r288, 32
+%r294 = getelementptr i32, i32* %r272, i32 5
+%r295 = trunc i224 %r292 to i32
+store i32 %r295, i32* %r294
+%r296 = lshr i224 %r292, 32
+%r298 = getelementptr i32, i32* %r272, i32 6
+%r299 = trunc i224 %r296 to i32
+store i32 %r299, i32* %r298
+ret void
+}
+define void @mcl_fpDbl_sub7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -6883,392 +5064,202 @@ define void @mcl_fpDbl_add6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r80 = zext i32 %r79 to i384
 %r81 = shl i384 %r80, 352
 %r82 = or i384 %r76, %r81
-%r83 = load i32, i32* %r3
-%r84 = zext i32 %r83 to i64
-%r86 = getelementptr i32, i32* %r3, i32 1
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i64
-%r89 = shl i64 %r88, 32
-%r90 = or i64 %r84, %r89
-%r91 = zext i64 %r90 to i96
-%r93 = getelementptr i32, i32* %r3, i32 2
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i96
-%r96 = shl i96 %r95, 64
-%r97 = or i96 %r91, %r96
-%r98 = zext i96 %r97 to i128
-%r100 = getelementptr i32, i32* %r3, i32 3
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = load i32, i32* %r3
+%r98 = zext i32 %r97 to i64
+%r100 = getelementptr i32, i32* %r3, i32 1
 %r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i128
-%r103 = shl i128 %r102, 96
-%r104 = or i128 %r98, %r103
-%r105 = zext i128 %r104 to i160
-%r107 = getelementptr i32, i32* %r3, i32 4
+%r102 = zext i32 %r101 to i64
+%r103 = shl i64 %r102, 32
+%r104 = or i64 %r98, %r103
+%r105 = zext i64 %r104 to i96
+%r107 = getelementptr i32, i32* %r3, i32 2
 %r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i160
-%r110 = shl i160 %r109, 128
-%r111 = or i160 %r105, %r110
-%r112 = zext i160 %r111 to i192
-%r114 = getelementptr i32, i32* %r3, i32 5
+%r109 = zext i32 %r108 to i96
+%r110 = shl i96 %r109, 64
+%r111 = or i96 %r105, %r110
+%r112 = zext i96 %r111 to i128
+%r114 = getelementptr i32, i32* %r3, i32 3
 %r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i192
-%r117 = shl i192 %r116, 160
-%r118 = or i192 %r112, %r117
-%r119 = zext i192 %r118 to i224
-%r121 = getelementptr i32, i32* %r3, i32 6
+%r116 = zext i32 %r115 to i128
+%r117 = shl i128 %r116, 96
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i160
+%r121 = getelementptr i32, i32* %r3, i32 4
 %r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i224
-%r124 = shl i224 %r123, 192
-%r125 = or i224 %r119, %r124
-%r126 = zext i224 %r125 to i256
-%r128 = getelementptr i32, i32* %r3, i32 7
+%r123 = zext i32 %r122 to i160
+%r124 = shl i160 %r123, 128
+%r125 = or i160 %r119, %r124
+%r126 = zext i160 %r125 to i192
+%r128 = getelementptr i32, i32* %r3, i32 5
 %r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i256
-%r131 = shl i256 %r130, 224
-%r132 = or i256 %r126, %r131
-%r133 = zext i256 %r132 to i288
-%r135 = getelementptr i32, i32* %r3, i32 8
+%r130 = zext i32 %r129 to i192
+%r131 = shl i192 %r130, 160
+%r132 = or i192 %r126, %r131
+%r133 = zext i192 %r132 to i224
+%r135 = getelementptr i32, i32* %r3, i32 6
 %r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i288
-%r138 = shl i288 %r137, 256
-%r139 = or i288 %r133, %r138
-%r140 = zext i288 %r139 to i320
-%r142 = getelementptr i32, i32* %r3, i32 9
+%r137 = zext i32 %r136 to i224
+%r138 = shl i224 %r137, 192
+%r139 = or i224 %r133, %r138
+%r140 = zext i224 %r139 to i256
+%r142 = getelementptr i32, i32* %r3, i32 7
 %r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i320
-%r145 = shl i320 %r144, 288
-%r146 = or i320 %r140, %r145
-%r147 = zext i320 %r146 to i352
-%r149 = getelementptr i32, i32* %r3, i32 10
+%r144 = zext i32 %r143 to i256
+%r145 = shl i256 %r144, 224
+%r146 = or i256 %r140, %r145
+%r147 = zext i256 %r146 to i288
+%r149 = getelementptr i32, i32* %r3, i32 8
 %r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i352
-%r152 = shl i352 %r151, 320
-%r153 = or i352 %r147, %r152
-%r154 = zext i352 %r153 to i384
-%r156 = getelementptr i32, i32* %r3, i32 11
+%r151 = zext i32 %r150 to i288
+%r152 = shl i288 %r151, 256
+%r153 = or i288 %r147, %r152
+%r154 = zext i288 %r153 to i320
+%r156 = getelementptr i32, i32* %r3, i32 9
 %r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i384
-%r159 = shl i384 %r158, 352
-%r160 = or i384 %r154, %r159
-%r161 = zext i384 %r82 to i416
-%r162 = zext i384 %r160 to i416
-%r163 = add i416 %r161, %r162
-%r164 = trunc i416 %r163 to i192
-%r165 = trunc i192 %r164 to i32
-%r167 = getelementptr i32, i32* %r1, i32 0
-store i32 %r165, i32* %r167
-%r168 = lshr i192 %r164, 32
-%r169 = trunc i192 %r168 to i32
-%r171 = getelementptr i32, i32* %r1, i32 1
-store i32 %r169, i32* %r171
-%r172 = lshr i192 %r168, 32
-%r173 = trunc i192 %r172 to i32
-%r175 = getelementptr i32, i32* %r1, i32 2
-store i32 %r173, i32* %r175
-%r176 = lshr i192 %r172, 32
-%r177 = trunc i192 %r176 to i32
-%r179 = getelementptr i32, i32* %r1, i32 3
-store i32 %r177, i32* %r179
-%r180 = lshr i192 %r176, 32
-%r181 = trunc i192 %r180 to i32
-%r183 = getelementptr i32, i32* %r1, i32 4
-store i32 %r181, i32* %r183
-%r184 = lshr i192 %r180, 32
-%r185 = trunc i192 %r184 to i32
-%r187 = getelementptr i32, i32* %r1, i32 5
-store i32 %r185, i32* %r187
-%r188 = lshr i416 %r163, 192
-%r189 = trunc i416 %r188 to i224
-%r190 = load i32, i32* %r4
-%r191 = zext i32 %r190 to i64
-%r193 = getelementptr i32, i32* %r4, i32 1
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i64
-%r196 = shl i64 %r195, 32
-%r197 = or i64 %r191, %r196
-%r198 = zext i64 %r197 to i96
-%r200 = getelementptr i32, i32* %r4, i32 2
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i96
-%r203 = shl i96 %r202, 64
-%r204 = or i96 %r198, %r203
-%r205 = zext i96 %r204 to i128
-%r207 = getelementptr i32, i32* %r4, i32 3
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i128
-%r210 = shl i128 %r209, 96
-%r211 = or i128 %r205, %r210
-%r212 = zext i128 %r211 to i160
-%r214 = getelementptr i32, i32* %r4, i32 4
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i160
-%r217 = shl i160 %r216, 128
-%r218 = or i160 %r212, %r217
-%r219 = zext i160 %r218 to i192
-%r221 = getelementptr i32, i32* %r4, i32 5
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i192
-%r224 = shl i192 %r223, 160
-%r225 = or i192 %r219, %r224
-%r226 = zext i192 %r225 to i224
-%r227 = sub i224 %r189, %r226
-%r228 = lshr i224 %r227, 192
-%r229 = trunc i224 %r228 to i1
-%r230 = select i1 %r229, i224 %r189, i224 %r227
-%r231 = trunc i224 %r230 to i192
-%r233 = getelementptr i32, i32* %r1, i32 6
-%r234 = trunc i192 %r231 to i32
-%r236 = getelementptr i32, i32* %r233, i32 0
-store i32 %r234, i32* %r236
-%r237 = lshr i192 %r231, 32
-%r238 = trunc i192 %r237 to i32
-%r240 = getelementptr i32, i32* %r233, i32 1
-store i32 %r238, i32* %r240
-%r241 = lshr i192 %r237, 32
-%r242 = trunc i192 %r241 to i32
-%r244 = getelementptr i32, i32* %r233, i32 2
-store i32 %r242, i32* %r244
-%r245 = lshr i192 %r241, 32
-%r246 = trunc i192 %r245 to i32
-%r248 = getelementptr i32, i32* %r233, i32 3
-store i32 %r246, i32* %r248
-%r249 = lshr i192 %r245, 32
-%r250 = trunc i192 %r249 to i32
-%r252 = getelementptr i32, i32* %r233, i32 4
-store i32 %r250, i32* %r252
-%r253 = lshr i192 %r249, 32
-%r254 = trunc i192 %r253 to i32
-%r256 = getelementptr i32, i32* %r233, i32 5
-store i32 %r254, i32* %r256
+%r158 = zext i32 %r157 to i320
+%r159 = shl i320 %r158, 288
+%r160 = or i320 %r154, %r159
+%r161 = zext i320 %r160 to i352
+%r163 = getelementptr i32, i32* %r3, i32 10
+%r164 = load i32, i32* %r163
+%r165 = zext i32 %r164 to i352
+%r166 = shl i352 %r165, 320
+%r167 = or i352 %r161, %r166
+%r168 = zext i352 %r167 to i384
+%r170 = getelementptr i32, i32* %r3, i32 11
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i384
+%r173 = shl i384 %r172, 352
+%r174 = or i384 %r168, %r173
+%r175 = zext i384 %r174 to i416
+%r177 = getelementptr i32, i32* %r3, i32 12
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i416
+%r180 = shl i416 %r179, 384
+%r181 = or i416 %r175, %r180
+%r182 = zext i416 %r181 to i448
+%r184 = getelementptr i32, i32* %r3, i32 13
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i448
+%r187 = shl i448 %r186, 416
+%r188 = or i448 %r182, %r187
+%r189 = zext i448 %r96 to i480
+%r190 = zext i448 %r188 to i480
+%r191 = sub i480 %r189, %r190
+%r192 = trunc i480 %r191 to i224
+%r194 = getelementptr i32, i32* %r1, i32 0
+%r195 = trunc i224 %r192 to i32
+store i32 %r195, i32* %r194
+%r196 = lshr i224 %r192, 32
+%r198 = getelementptr i32, i32* %r1, i32 1
+%r199 = trunc i224 %r196 to i32
+store i32 %r199, i32* %r198
+%r200 = lshr i224 %r196, 32
+%r202 = getelementptr i32, i32* %r1, i32 2
+%r203 = trunc i224 %r200 to i32
+store i32 %r203, i32* %r202
+%r204 = lshr i224 %r200, 32
+%r206 = getelementptr i32, i32* %r1, i32 3
+%r207 = trunc i224 %r204 to i32
+store i32 %r207, i32* %r206
+%r208 = lshr i224 %r204, 32
+%r210 = getelementptr i32, i32* %r1, i32 4
+%r211 = trunc i224 %r208 to i32
+store i32 %r211, i32* %r210
+%r212 = lshr i224 %r208, 32
+%r214 = getelementptr i32, i32* %r1, i32 5
+%r215 = trunc i224 %r212 to i32
+store i32 %r215, i32* %r214
+%r216 = lshr i224 %r212, 32
+%r218 = getelementptr i32, i32* %r1, i32 6
+%r219 = trunc i224 %r216 to i32
+store i32 %r219, i32* %r218
+%r220 = lshr i480 %r191, 224
+%r221 = trunc i480 %r220 to i224
+%r222 = lshr i480 %r191, 448
+%r223 = trunc i480 %r222 to i1
+%r224 = load i32, i32* %r4
+%r225 = zext i32 %r224 to i64
+%r227 = getelementptr i32, i32* %r4, i32 1
+%r228 = load i32, i32* %r227
+%r229 = zext i32 %r228 to i64
+%r230 = shl i64 %r229, 32
+%r231 = or i64 %r225, %r230
+%r232 = zext i64 %r231 to i96
+%r234 = getelementptr i32, i32* %r4, i32 2
+%r235 = load i32, i32* %r234
+%r236 = zext i32 %r235 to i96
+%r237 = shl i96 %r236, 64
+%r238 = or i96 %r232, %r237
+%r239 = zext i96 %r238 to i128
+%r241 = getelementptr i32, i32* %r4, i32 3
+%r242 = load i32, i32* %r241
+%r243 = zext i32 %r242 to i128
+%r244 = shl i128 %r243, 96
+%r245 = or i128 %r239, %r244
+%r246 = zext i128 %r245 to i160
+%r248 = getelementptr i32, i32* %r4, i32 4
+%r249 = load i32, i32* %r248
+%r250 = zext i32 %r249 to i160
+%r251 = shl i160 %r250, 128
+%r252 = or i160 %r246, %r251
+%r253 = zext i160 %r252 to i192
+%r255 = getelementptr i32, i32* %r4, i32 5
+%r256 = load i32, i32* %r255
+%r257 = zext i32 %r256 to i192
+%r258 = shl i192 %r257, 160
+%r259 = or i192 %r253, %r258
+%r260 = zext i192 %r259 to i224
+%r262 = getelementptr i32, i32* %r4, i32 6
+%r263 = load i32, i32* %r262
+%r264 = zext i32 %r263 to i224
+%r265 = shl i224 %r264, 192
+%r266 = or i224 %r260, %r265
+%r268 = select i1 %r223, i224 %r266, i224 0
+%r269 = add i224 %r221, %r268
+%r271 = getelementptr i32, i32* %r1, i32 7
+%r273 = getelementptr i32, i32* %r271, i32 0
+%r274 = trunc i224 %r269 to i32
+store i32 %r274, i32* %r273
+%r275 = lshr i224 %r269, 32
+%r277 = getelementptr i32, i32* %r271, i32 1
+%r278 = trunc i224 %r275 to i32
+store i32 %r278, i32* %r277
+%r279 = lshr i224 %r275, 32
+%r281 = getelementptr i32, i32* %r271, i32 2
+%r282 = trunc i224 %r279 to i32
+store i32 %r282, i32* %r281
+%r283 = lshr i224 %r279, 32
+%r285 = getelementptr i32, i32* %r271, i32 3
+%r286 = trunc i224 %r283 to i32
+store i32 %r286, i32* %r285
+%r287 = lshr i224 %r283, 32
+%r289 = getelementptr i32, i32* %r271, i32 4
+%r290 = trunc i224 %r287 to i32
+store i32 %r290, i32* %r289
+%r291 = lshr i224 %r287, 32
+%r293 = getelementptr i32, i32* %r271, i32 5
+%r294 = trunc i224 %r291 to i32
+store i32 %r294, i32* %r293
+%r295 = lshr i224 %r291, 32
+%r297 = getelementptr i32, i32* %r271, i32 6
+%r298 = trunc i224 %r295 to i32
+store i32 %r298, i32* %r297
 ret void
 }
-define void @mcl_fpDbl_sub6L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = load i32, i32* %r3
-%r84 = zext i32 %r83 to i64
-%r86 = getelementptr i32, i32* %r3, i32 1
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i64
-%r89 = shl i64 %r88, 32
-%r90 = or i64 %r84, %r89
-%r91 = zext i64 %r90 to i96
-%r93 = getelementptr i32, i32* %r3, i32 2
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i96
-%r96 = shl i96 %r95, 64
-%r97 = or i96 %r91, %r96
-%r98 = zext i96 %r97 to i128
-%r100 = getelementptr i32, i32* %r3, i32 3
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i128
-%r103 = shl i128 %r102, 96
-%r104 = or i128 %r98, %r103
-%r105 = zext i128 %r104 to i160
-%r107 = getelementptr i32, i32* %r3, i32 4
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i160
-%r110 = shl i160 %r109, 128
-%r111 = or i160 %r105, %r110
-%r112 = zext i160 %r111 to i192
-%r114 = getelementptr i32, i32* %r3, i32 5
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i192
-%r117 = shl i192 %r116, 160
-%r118 = or i192 %r112, %r117
-%r119 = zext i192 %r118 to i224
-%r121 = getelementptr i32, i32* %r3, i32 6
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i224
-%r124 = shl i224 %r123, 192
-%r125 = or i224 %r119, %r124
-%r126 = zext i224 %r125 to i256
-%r128 = getelementptr i32, i32* %r3, i32 7
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i256
-%r131 = shl i256 %r130, 224
-%r132 = or i256 %r126, %r131
-%r133 = zext i256 %r132 to i288
-%r135 = getelementptr i32, i32* %r3, i32 8
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i288
-%r138 = shl i288 %r137, 256
-%r139 = or i288 %r133, %r138
-%r140 = zext i288 %r139 to i320
-%r142 = getelementptr i32, i32* %r3, i32 9
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i320
-%r145 = shl i320 %r144, 288
-%r146 = or i320 %r140, %r145
-%r147 = zext i320 %r146 to i352
-%r149 = getelementptr i32, i32* %r3, i32 10
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i352
-%r152 = shl i352 %r151, 320
-%r153 = or i352 %r147, %r152
-%r154 = zext i352 %r153 to i384
-%r156 = getelementptr i32, i32* %r3, i32 11
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i384
-%r159 = shl i384 %r158, 352
-%r160 = or i384 %r154, %r159
-%r161 = zext i384 %r82 to i416
-%r162 = zext i384 %r160 to i416
-%r163 = sub i416 %r161, %r162
-%r164 = trunc i416 %r163 to i192
-%r165 = trunc i192 %r164 to i32
-%r167 = getelementptr i32, i32* %r1, i32 0
-store i32 %r165, i32* %r167
-%r168 = lshr i192 %r164, 32
-%r169 = trunc i192 %r168 to i32
-%r171 = getelementptr i32, i32* %r1, i32 1
-store i32 %r169, i32* %r171
-%r172 = lshr i192 %r168, 32
-%r173 = trunc i192 %r172 to i32
-%r175 = getelementptr i32, i32* %r1, i32 2
-store i32 %r173, i32* %r175
-%r176 = lshr i192 %r172, 32
-%r177 = trunc i192 %r176 to i32
-%r179 = getelementptr i32, i32* %r1, i32 3
-store i32 %r177, i32* %r179
-%r180 = lshr i192 %r176, 32
-%r181 = trunc i192 %r180 to i32
-%r183 = getelementptr i32, i32* %r1, i32 4
-store i32 %r181, i32* %r183
-%r184 = lshr i192 %r180, 32
-%r185 = trunc i192 %r184 to i32
-%r187 = getelementptr i32, i32* %r1, i32 5
-store i32 %r185, i32* %r187
-%r188 = lshr i416 %r163, 192
-%r189 = trunc i416 %r188 to i192
-%r190 = lshr i416 %r163, 384
-%r191 = trunc i416 %r190 to i1
-%r192 = load i32, i32* %r4
-%r193 = zext i32 %r192 to i64
-%r195 = getelementptr i32, i32* %r4, i32 1
-%r196 = load i32, i32* %r195
-%r197 = zext i32 %r196 to i64
-%r198 = shl i64 %r197, 32
-%r199 = or i64 %r193, %r198
-%r200 = zext i64 %r199 to i96
-%r202 = getelementptr i32, i32* %r4, i32 2
-%r203 = load i32, i32* %r202
-%r204 = zext i32 %r203 to i96
-%r205 = shl i96 %r204, 64
-%r206 = or i96 %r200, %r205
-%r207 = zext i96 %r206 to i128
-%r209 = getelementptr i32, i32* %r4, i32 3
-%r210 = load i32, i32* %r209
-%r211 = zext i32 %r210 to i128
-%r212 = shl i128 %r211, 96
-%r213 = or i128 %r207, %r212
-%r214 = zext i128 %r213 to i160
-%r216 = getelementptr i32, i32* %r4, i32 4
-%r217 = load i32, i32* %r216
-%r218 = zext i32 %r217 to i160
-%r219 = shl i160 %r218, 128
-%r220 = or i160 %r214, %r219
-%r221 = zext i160 %r220 to i192
-%r223 = getelementptr i32, i32* %r4, i32 5
-%r224 = load i32, i32* %r223
-%r225 = zext i32 %r224 to i192
-%r226 = shl i192 %r225, 160
-%r227 = or i192 %r221, %r226
-%r229 = select i1 %r191, i192 %r227, i192 0
-%r230 = add i192 %r189, %r229
-%r232 = getelementptr i32, i32* %r1, i32 6
-%r233 = trunc i192 %r230 to i32
-%r235 = getelementptr i32, i32* %r232, i32 0
-store i32 %r233, i32* %r235
-%r236 = lshr i192 %r230, 32
-%r237 = trunc i192 %r236 to i32
-%r239 = getelementptr i32, i32* %r232, i32 1
-store i32 %r237, i32* %r239
-%r240 = lshr i192 %r236, 32
-%r241 = trunc i192 %r240 to i32
-%r243 = getelementptr i32, i32* %r232, i32 2
-store i32 %r241, i32* %r243
-%r244 = lshr i192 %r240, 32
-%r245 = trunc i192 %r244 to i32
-%r247 = getelementptr i32, i32* %r232, i32 3
-store i32 %r245, i32* %r247
-%r248 = lshr i192 %r244, 32
-%r249 = trunc i192 %r248 to i32
-%r251 = getelementptr i32, i32* %r232, i32 4
-store i32 %r249, i32* %r251
-%r252 = lshr i192 %r248, 32
-%r253 = trunc i192 %r252 to i32
-%r255 = getelementptr i32, i32* %r232, i32 5
-store i32 %r253, i32* %r255
-ret void
-}
-define i256 @mulPv224x32(i32* noalias  %r2, i32 %r3)
+define i288 @mulPv256x32(i32* noalias  %r2, i32 %r3)
 {
 %r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
 %r6 = trunc i64 %r5 to i32
@@ -7291,872 +5282,1206 @@ define i256 @mulPv224x32(i32* noalias  %r2, i32 %r3)
 %r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
 %r30 = trunc i64 %r29 to i32
 %r31 = call i32 @extractHigh32(i64 %r29)
-%r32 = zext i32 %r6 to i64
-%r33 = zext i32 %r10 to i64
-%r34 = shl i64 %r33, 32
-%r35 = or i64 %r32, %r34
-%r36 = zext i64 %r35 to i96
-%r37 = zext i32 %r14 to i96
-%r38 = shl i96 %r37, 64
-%r39 = or i96 %r36, %r38
-%r40 = zext i96 %r39 to i128
-%r41 = zext i32 %r18 to i128
-%r42 = shl i128 %r41, 96
-%r43 = or i128 %r40, %r42
-%r44 = zext i128 %r43 to i160
-%r45 = zext i32 %r22 to i160
-%r46 = shl i160 %r45, 128
-%r47 = or i160 %r44, %r46
-%r48 = zext i160 %r47 to i192
-%r49 = zext i32 %r26 to i192
-%r50 = shl i192 %r49, 160
-%r51 = or i192 %r48, %r50
-%r52 = zext i192 %r51 to i224
-%r53 = zext i32 %r30 to i224
-%r54 = shl i224 %r53, 192
-%r55 = or i224 %r52, %r54
-%r56 = zext i32 %r7 to i64
-%r57 = zext i32 %r11 to i64
-%r58 = shl i64 %r57, 32
-%r59 = or i64 %r56, %r58
-%r60 = zext i64 %r59 to i96
-%r61 = zext i32 %r15 to i96
-%r62 = shl i96 %r61, 64
-%r63 = or i96 %r60, %r62
-%r64 = zext i96 %r63 to i128
-%r65 = zext i32 %r19 to i128
-%r66 = shl i128 %r65, 96
-%r67 = or i128 %r64, %r66
-%r68 = zext i128 %r67 to i160
-%r69 = zext i32 %r23 to i160
-%r70 = shl i160 %r69, 128
-%r71 = or i160 %r68, %r70
-%r72 = zext i160 %r71 to i192
-%r73 = zext i32 %r27 to i192
-%r74 = shl i192 %r73, 160
-%r75 = or i192 %r72, %r74
-%r76 = zext i192 %r75 to i224
-%r77 = zext i32 %r31 to i224
-%r78 = shl i224 %r77, 192
-%r79 = or i224 %r76, %r78
-%r80 = zext i224 %r55 to i256
-%r81 = zext i224 %r79 to i256
-%r82 = shl i256 %r81, 32
-%r83 = add i256 %r80, %r82
-ret i256 %r83
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r36 = zext i32 %r6 to i64
+%r37 = zext i32 %r10 to i64
+%r38 = shl i64 %r37, 32
+%r39 = or i64 %r36, %r38
+%r40 = zext i64 %r39 to i96
+%r41 = zext i32 %r14 to i96
+%r42 = shl i96 %r41, 64
+%r43 = or i96 %r40, %r42
+%r44 = zext i96 %r43 to i128
+%r45 = zext i32 %r18 to i128
+%r46 = shl i128 %r45, 96
+%r47 = or i128 %r44, %r46
+%r48 = zext i128 %r47 to i160
+%r49 = zext i32 %r22 to i160
+%r50 = shl i160 %r49, 128
+%r51 = or i160 %r48, %r50
+%r52 = zext i160 %r51 to i192
+%r53 = zext i32 %r26 to i192
+%r54 = shl i192 %r53, 160
+%r55 = or i192 %r52, %r54
+%r56 = zext i192 %r55 to i224
+%r57 = zext i32 %r30 to i224
+%r58 = shl i224 %r57, 192
+%r59 = or i224 %r56, %r58
+%r60 = zext i224 %r59 to i256
+%r61 = zext i32 %r34 to i256
+%r62 = shl i256 %r61, 224
+%r63 = or i256 %r60, %r62
+%r64 = zext i32 %r7 to i64
+%r65 = zext i32 %r11 to i64
+%r66 = shl i64 %r65, 32
+%r67 = or i64 %r64, %r66
+%r68 = zext i64 %r67 to i96
+%r69 = zext i32 %r15 to i96
+%r70 = shl i96 %r69, 64
+%r71 = or i96 %r68, %r70
+%r72 = zext i96 %r71 to i128
+%r73 = zext i32 %r19 to i128
+%r74 = shl i128 %r73, 96
+%r75 = or i128 %r72, %r74
+%r76 = zext i128 %r75 to i160
+%r77 = zext i32 %r23 to i160
+%r78 = shl i160 %r77, 128
+%r79 = or i160 %r76, %r78
+%r80 = zext i160 %r79 to i192
+%r81 = zext i32 %r27 to i192
+%r82 = shl i192 %r81, 160
+%r83 = or i192 %r80, %r82
+%r84 = zext i192 %r83 to i224
+%r85 = zext i32 %r31 to i224
+%r86 = shl i224 %r85, 192
+%r87 = or i224 %r84, %r86
+%r88 = zext i224 %r87 to i256
+%r89 = zext i32 %r35 to i256
+%r90 = shl i256 %r89, 224
+%r91 = or i256 %r88, %r90
+%r92 = zext i256 %r63 to i288
+%r93 = zext i256 %r91 to i288
+%r94 = shl i288 %r93, 32
+%r95 = add i288 %r92, %r94
+ret i288 %r95
 }
-define void @mcl_fp_mulUnitPre7L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+define void @mcl_fp_mulUnitPre8L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
 {
-%r4 = call i256 @mulPv224x32(i32* %r2, i32 %r3)
-%r5 = trunc i256 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i256 %r4, 32
-%r9 = trunc i256 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i256 %r8, 32
-%r13 = trunc i256 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i256 %r12, 32
-%r17 = trunc i256 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i256 %r16, 32
-%r21 = trunc i256 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i256 %r20, 32
-%r25 = trunc i256 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i256 %r24, 32
-%r29 = trunc i256 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i256 %r28, 32
-%r33 = trunc i256 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
+%r4 = call i288 @mulPv256x32(i32* %r2, i32 %r3)
+%r6 = getelementptr i32, i32* %r1, i32 0
+%r7 = trunc i288 %r4 to i32
+store i32 %r7, i32* %r6
+%r8 = lshr i288 %r4, 32
+%r10 = getelementptr i32, i32* %r1, i32 1
+%r11 = trunc i288 %r8 to i32
+store i32 %r11, i32* %r10
+%r12 = lshr i288 %r8, 32
+%r14 = getelementptr i32, i32* %r1, i32 2
+%r15 = trunc i288 %r12 to i32
+store i32 %r15, i32* %r14
+%r16 = lshr i288 %r12, 32
+%r18 = getelementptr i32, i32* %r1, i32 3
+%r19 = trunc i288 %r16 to i32
+store i32 %r19, i32* %r18
+%r20 = lshr i288 %r16, 32
+%r22 = getelementptr i32, i32* %r1, i32 4
+%r23 = trunc i288 %r20 to i32
+store i32 %r23, i32* %r22
+%r24 = lshr i288 %r20, 32
+%r26 = getelementptr i32, i32* %r1, i32 5
+%r27 = trunc i288 %r24 to i32
+store i32 %r27, i32* %r26
+%r28 = lshr i288 %r24, 32
+%r30 = getelementptr i32, i32* %r1, i32 6
+%r31 = trunc i288 %r28 to i32
+store i32 %r31, i32* %r30
+%r32 = lshr i288 %r28, 32
+%r34 = getelementptr i32, i32* %r1, i32 7
+%r35 = trunc i288 %r32 to i32
+store i32 %r35, i32* %r34
+%r36 = lshr i288 %r32, 32
+%r38 = getelementptr i32, i32* %r1, i32 8
+%r39 = trunc i288 %r36 to i32
+store i32 %r39, i32* %r38
 ret void
 }
-define void @mcl_fpDbl_mulPre7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+define void @mcl_fpDbl_mulPre8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
 {
 %r4 = load i32, i32* %r3
-%r5 = call i256 @mulPv224x32(i32* %r2, i32 %r4)
-%r6 = trunc i256 %r5 to i32
+%r5 = call i288 @mulPv256x32(i32* %r2, i32 %r4)
+%r6 = trunc i288 %r5 to i32
 store i32 %r6, i32* %r1
-%r7 = lshr i256 %r5, 32
+%r7 = lshr i288 %r5, 32
 %r9 = getelementptr i32, i32* %r3, i32 1
 %r10 = load i32, i32* %r9
-%r11 = call i256 @mulPv224x32(i32* %r2, i32 %r10)
-%r12 = add i256 %r7, %r11
-%r13 = trunc i256 %r12 to i32
+%r11 = call i288 @mulPv256x32(i32* %r2, i32 %r10)
+%r12 = add i288 %r7, %r11
+%r13 = trunc i288 %r12 to i32
 %r15 = getelementptr i32, i32* %r1, i32 1
 store i32 %r13, i32* %r15
-%r16 = lshr i256 %r12, 32
+%r16 = lshr i288 %r12, 32
 %r18 = getelementptr i32, i32* %r3, i32 2
 %r19 = load i32, i32* %r18
-%r20 = call i256 @mulPv224x32(i32* %r2, i32 %r19)
-%r21 = add i256 %r16, %r20
-%r22 = trunc i256 %r21 to i32
+%r20 = call i288 @mulPv256x32(i32* %r2, i32 %r19)
+%r21 = add i288 %r16, %r20
+%r22 = trunc i288 %r21 to i32
 %r24 = getelementptr i32, i32* %r1, i32 2
 store i32 %r22, i32* %r24
-%r25 = lshr i256 %r21, 32
+%r25 = lshr i288 %r21, 32
 %r27 = getelementptr i32, i32* %r3, i32 3
 %r28 = load i32, i32* %r27
-%r29 = call i256 @mulPv224x32(i32* %r2, i32 %r28)
-%r30 = add i256 %r25, %r29
-%r31 = trunc i256 %r30 to i32
+%r29 = call i288 @mulPv256x32(i32* %r2, i32 %r28)
+%r30 = add i288 %r25, %r29
+%r31 = trunc i288 %r30 to i32
 %r33 = getelementptr i32, i32* %r1, i32 3
 store i32 %r31, i32* %r33
-%r34 = lshr i256 %r30, 32
+%r34 = lshr i288 %r30, 32
 %r36 = getelementptr i32, i32* %r3, i32 4
 %r37 = load i32, i32* %r36
-%r38 = call i256 @mulPv224x32(i32* %r2, i32 %r37)
-%r39 = add i256 %r34, %r38
-%r40 = trunc i256 %r39 to i32
+%r38 = call i288 @mulPv256x32(i32* %r2, i32 %r37)
+%r39 = add i288 %r34, %r38
+%r40 = trunc i288 %r39 to i32
 %r42 = getelementptr i32, i32* %r1, i32 4
 store i32 %r40, i32* %r42
-%r43 = lshr i256 %r39, 32
+%r43 = lshr i288 %r39, 32
 %r45 = getelementptr i32, i32* %r3, i32 5
 %r46 = load i32, i32* %r45
-%r47 = call i256 @mulPv224x32(i32* %r2, i32 %r46)
-%r48 = add i256 %r43, %r47
-%r49 = trunc i256 %r48 to i32
+%r47 = call i288 @mulPv256x32(i32* %r2, i32 %r46)
+%r48 = add i288 %r43, %r47
+%r49 = trunc i288 %r48 to i32
 %r51 = getelementptr i32, i32* %r1, i32 5
 store i32 %r49, i32* %r51
-%r52 = lshr i256 %r48, 32
+%r52 = lshr i288 %r48, 32
 %r54 = getelementptr i32, i32* %r3, i32 6
 %r55 = load i32, i32* %r54
-%r56 = call i256 @mulPv224x32(i32* %r2, i32 %r55)
-%r57 = add i256 %r52, %r56
-%r59 = getelementptr i32, i32* %r1, i32 6
-%r60 = trunc i256 %r57 to i32
-%r62 = getelementptr i32, i32* %r59, i32 0
-store i32 %r60, i32* %r62
-%r63 = lshr i256 %r57, 32
-%r64 = trunc i256 %r63 to i32
-%r66 = getelementptr i32, i32* %r59, i32 1
-store i32 %r64, i32* %r66
-%r67 = lshr i256 %r63, 32
-%r68 = trunc i256 %r67 to i32
-%r70 = getelementptr i32, i32* %r59, i32 2
-store i32 %r68, i32* %r70
-%r71 = lshr i256 %r67, 32
-%r72 = trunc i256 %r71 to i32
-%r74 = getelementptr i32, i32* %r59, i32 3
-store i32 %r72, i32* %r74
-%r75 = lshr i256 %r71, 32
-%r76 = trunc i256 %r75 to i32
-%r78 = getelementptr i32, i32* %r59, i32 4
-store i32 %r76, i32* %r78
-%r79 = lshr i256 %r75, 32
-%r80 = trunc i256 %r79 to i32
-%r82 = getelementptr i32, i32* %r59, i32 5
-store i32 %r80, i32* %r82
-%r83 = lshr i256 %r79, 32
-%r84 = trunc i256 %r83 to i32
-%r86 = getelementptr i32, i32* %r59, i32 6
-store i32 %r84, i32* %r86
-%r87 = lshr i256 %r83, 32
-%r88 = trunc i256 %r87 to i32
-%r90 = getelementptr i32, i32* %r59, i32 7
-store i32 %r88, i32* %r90
+%r56 = call i288 @mulPv256x32(i32* %r2, i32 %r55)
+%r57 = add i288 %r52, %r56
+%r58 = trunc i288 %r57 to i32
+%r60 = getelementptr i32, i32* %r1, i32 6
+store i32 %r58, i32* %r60
+%r61 = lshr i288 %r57, 32
+%r63 = getelementptr i32, i32* %r3, i32 7
+%r64 = load i32, i32* %r63
+%r65 = call i288 @mulPv256x32(i32* %r2, i32 %r64)
+%r66 = add i288 %r61, %r65
+%r68 = getelementptr i32, i32* %r1, i32 7
+%r70 = getelementptr i32, i32* %r68, i32 0
+%r71 = trunc i288 %r66 to i32
+store i32 %r71, i32* %r70
+%r72 = lshr i288 %r66, 32
+%r74 = getelementptr i32, i32* %r68, i32 1
+%r75 = trunc i288 %r72 to i32
+store i32 %r75, i32* %r74
+%r76 = lshr i288 %r72, 32
+%r78 = getelementptr i32, i32* %r68, i32 2
+%r79 = trunc i288 %r76 to i32
+store i32 %r79, i32* %r78
+%r80 = lshr i288 %r76, 32
+%r82 = getelementptr i32, i32* %r68, i32 3
+%r83 = trunc i288 %r80 to i32
+store i32 %r83, i32* %r82
+%r84 = lshr i288 %r80, 32
+%r86 = getelementptr i32, i32* %r68, i32 4
+%r87 = trunc i288 %r84 to i32
+store i32 %r87, i32* %r86
+%r88 = lshr i288 %r84, 32
+%r90 = getelementptr i32, i32* %r68, i32 5
+%r91 = trunc i288 %r88 to i32
+store i32 %r91, i32* %r90
+%r92 = lshr i288 %r88, 32
+%r94 = getelementptr i32, i32* %r68, i32 6
+%r95 = trunc i288 %r92 to i32
+store i32 %r95, i32* %r94
+%r96 = lshr i288 %r92, 32
+%r98 = getelementptr i32, i32* %r68, i32 7
+%r99 = trunc i288 %r96 to i32
+store i32 %r99, i32* %r98
+%r100 = lshr i288 %r96, 32
+%r102 = getelementptr i32, i32* %r68, i32 8
+%r103 = trunc i288 %r100 to i32
+store i32 %r103, i32* %r102
 ret void
 }
-define void @mcl_fpDbl_sqrPre7L(i32* noalias  %r1, i32* noalias  %r2)
+define void @mcl_fpDbl_sqrPre8L(i32* noalias  %r1, i32* noalias  %r2)
 {
 %r3 = load i32, i32* %r2
-%r4 = call i256 @mulPv224x32(i32* %r2, i32 %r3)
-%r5 = trunc i256 %r4 to i32
+%r4 = call i288 @mulPv256x32(i32* %r2, i32 %r3)
+%r5 = trunc i288 %r4 to i32
 store i32 %r5, i32* %r1
-%r6 = lshr i256 %r4, 32
+%r6 = lshr i288 %r4, 32
 %r8 = getelementptr i32, i32* %r2, i32 1
 %r9 = load i32, i32* %r8
-%r10 = call i256 @mulPv224x32(i32* %r2, i32 %r9)
-%r11 = add i256 %r6, %r10
-%r12 = trunc i256 %r11 to i32
+%r10 = call i288 @mulPv256x32(i32* %r2, i32 %r9)
+%r11 = add i288 %r6, %r10
+%r12 = trunc i288 %r11 to i32
 %r14 = getelementptr i32, i32* %r1, i32 1
 store i32 %r12, i32* %r14
-%r15 = lshr i256 %r11, 32
+%r15 = lshr i288 %r11, 32
 %r17 = getelementptr i32, i32* %r2, i32 2
 %r18 = load i32, i32* %r17
-%r19 = call i256 @mulPv224x32(i32* %r2, i32 %r18)
-%r20 = add i256 %r15, %r19
-%r21 = trunc i256 %r20 to i32
+%r19 = call i288 @mulPv256x32(i32* %r2, i32 %r18)
+%r20 = add i288 %r15, %r19
+%r21 = trunc i288 %r20 to i32
 %r23 = getelementptr i32, i32* %r1, i32 2
 store i32 %r21, i32* %r23
-%r24 = lshr i256 %r20, 32
+%r24 = lshr i288 %r20, 32
 %r26 = getelementptr i32, i32* %r2, i32 3
 %r27 = load i32, i32* %r26
-%r28 = call i256 @mulPv224x32(i32* %r2, i32 %r27)
-%r29 = add i256 %r24, %r28
-%r30 = trunc i256 %r29 to i32
+%r28 = call i288 @mulPv256x32(i32* %r2, i32 %r27)
+%r29 = add i288 %r24, %r28
+%r30 = trunc i288 %r29 to i32
 %r32 = getelementptr i32, i32* %r1, i32 3
 store i32 %r30, i32* %r32
-%r33 = lshr i256 %r29, 32
+%r33 = lshr i288 %r29, 32
 %r35 = getelementptr i32, i32* %r2, i32 4
 %r36 = load i32, i32* %r35
-%r37 = call i256 @mulPv224x32(i32* %r2, i32 %r36)
-%r38 = add i256 %r33, %r37
-%r39 = trunc i256 %r38 to i32
+%r37 = call i288 @mulPv256x32(i32* %r2, i32 %r36)
+%r38 = add i288 %r33, %r37
+%r39 = trunc i288 %r38 to i32
 %r41 = getelementptr i32, i32* %r1, i32 4
 store i32 %r39, i32* %r41
-%r42 = lshr i256 %r38, 32
+%r42 = lshr i288 %r38, 32
 %r44 = getelementptr i32, i32* %r2, i32 5
 %r45 = load i32, i32* %r44
-%r46 = call i256 @mulPv224x32(i32* %r2, i32 %r45)
-%r47 = add i256 %r42, %r46
-%r48 = trunc i256 %r47 to i32
+%r46 = call i288 @mulPv256x32(i32* %r2, i32 %r45)
+%r47 = add i288 %r42, %r46
+%r48 = trunc i288 %r47 to i32
 %r50 = getelementptr i32, i32* %r1, i32 5
 store i32 %r48, i32* %r50
-%r51 = lshr i256 %r47, 32
+%r51 = lshr i288 %r47, 32
 %r53 = getelementptr i32, i32* %r2, i32 6
 %r54 = load i32, i32* %r53
-%r55 = call i256 @mulPv224x32(i32* %r2, i32 %r54)
-%r56 = add i256 %r51, %r55
-%r58 = getelementptr i32, i32* %r1, i32 6
-%r59 = trunc i256 %r56 to i32
-%r61 = getelementptr i32, i32* %r58, i32 0
-store i32 %r59, i32* %r61
-%r62 = lshr i256 %r56, 32
-%r63 = trunc i256 %r62 to i32
-%r65 = getelementptr i32, i32* %r58, i32 1
-store i32 %r63, i32* %r65
-%r66 = lshr i256 %r62, 32
-%r67 = trunc i256 %r66 to i32
-%r69 = getelementptr i32, i32* %r58, i32 2
-store i32 %r67, i32* %r69
-%r70 = lshr i256 %r66, 32
-%r71 = trunc i256 %r70 to i32
-%r73 = getelementptr i32, i32* %r58, i32 3
-store i32 %r71, i32* %r73
-%r74 = lshr i256 %r70, 32
-%r75 = trunc i256 %r74 to i32
-%r77 = getelementptr i32, i32* %r58, i32 4
-store i32 %r75, i32* %r77
-%r78 = lshr i256 %r74, 32
-%r79 = trunc i256 %r78 to i32
-%r81 = getelementptr i32, i32* %r58, i32 5
-store i32 %r79, i32* %r81
-%r82 = lshr i256 %r78, 32
-%r83 = trunc i256 %r82 to i32
-%r85 = getelementptr i32, i32* %r58, i32 6
-store i32 %r83, i32* %r85
-%r86 = lshr i256 %r82, 32
-%r87 = trunc i256 %r86 to i32
-%r89 = getelementptr i32, i32* %r58, i32 7
-store i32 %r87, i32* %r89
+%r55 = call i288 @mulPv256x32(i32* %r2, i32 %r54)
+%r56 = add i288 %r51, %r55
+%r57 = trunc i288 %r56 to i32
+%r59 = getelementptr i32, i32* %r1, i32 6
+store i32 %r57, i32* %r59
+%r60 = lshr i288 %r56, 32
+%r62 = getelementptr i32, i32* %r2, i32 7
+%r63 = load i32, i32* %r62
+%r64 = call i288 @mulPv256x32(i32* %r2, i32 %r63)
+%r65 = add i288 %r60, %r64
+%r67 = getelementptr i32, i32* %r1, i32 7
+%r69 = getelementptr i32, i32* %r67, i32 0
+%r70 = trunc i288 %r65 to i32
+store i32 %r70, i32* %r69
+%r71 = lshr i288 %r65, 32
+%r73 = getelementptr i32, i32* %r67, i32 1
+%r74 = trunc i288 %r71 to i32
+store i32 %r74, i32* %r73
+%r75 = lshr i288 %r71, 32
+%r77 = getelementptr i32, i32* %r67, i32 2
+%r78 = trunc i288 %r75 to i32
+store i32 %r78, i32* %r77
+%r79 = lshr i288 %r75, 32
+%r81 = getelementptr i32, i32* %r67, i32 3
+%r82 = trunc i288 %r79 to i32
+store i32 %r82, i32* %r81
+%r83 = lshr i288 %r79, 32
+%r85 = getelementptr i32, i32* %r67, i32 4
+%r86 = trunc i288 %r83 to i32
+store i32 %r86, i32* %r85
+%r87 = lshr i288 %r83, 32
+%r89 = getelementptr i32, i32* %r67, i32 5
+%r90 = trunc i288 %r87 to i32
+store i32 %r90, i32* %r89
+%r91 = lshr i288 %r87, 32
+%r93 = getelementptr i32, i32* %r67, i32 6
+%r94 = trunc i288 %r91 to i32
+store i32 %r94, i32* %r93
+%r95 = lshr i288 %r91, 32
+%r97 = getelementptr i32, i32* %r67, i32 7
+%r98 = trunc i288 %r95 to i32
+store i32 %r98, i32* %r97
+%r99 = lshr i288 %r95, 32
+%r101 = getelementptr i32, i32* %r67, i32 8
+%r102 = trunc i288 %r99 to i32
+store i32 %r102, i32* %r101
 ret void
 }
-define void @mcl_fp_mont7L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+define void @mcl_fp_mont8L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
 {
 %r6 = getelementptr i32, i32* %r4, i32 -1
 %r7 = load i32, i32* %r6
 %r9 = getelementptr i32, i32* %r3, i32 0
 %r10 = load i32, i32* %r9
-%r11 = call i256 @mulPv224x32(i32* %r2, i32 %r10)
-%r12 = zext i256 %r11 to i288
-%r13 = trunc i256 %r11 to i32
+%r11 = call i288 @mulPv256x32(i32* %r2, i32 %r10)
+%r12 = zext i288 %r11 to i320
+%r13 = trunc i288 %r11 to i32
 %r14 = mul i32 %r13, %r7
-%r15 = call i256 @mulPv224x32(i32* %r4, i32 %r14)
-%r16 = zext i256 %r15 to i288
-%r17 = add i288 %r12, %r16
-%r18 = lshr i288 %r17, 32
+%r15 = call i288 @mulPv256x32(i32* %r4, i32 %r14)
+%r16 = zext i288 %r15 to i320
+%r17 = add i320 %r12, %r16
+%r18 = lshr i320 %r17, 32
 %r20 = getelementptr i32, i32* %r3, i32 1
 %r21 = load i32, i32* %r20
-%r22 = call i256 @mulPv224x32(i32* %r2, i32 %r21)
-%r23 = zext i256 %r22 to i288
-%r24 = add i288 %r18, %r23
-%r25 = trunc i288 %r24 to i32
+%r22 = call i288 @mulPv256x32(i32* %r2, i32 %r21)
+%r23 = zext i288 %r22 to i320
+%r24 = add i320 %r18, %r23
+%r25 = trunc i320 %r24 to i32
 %r26 = mul i32 %r25, %r7
-%r27 = call i256 @mulPv224x32(i32* %r4, i32 %r26)
-%r28 = zext i256 %r27 to i288
-%r29 = add i288 %r24, %r28
-%r30 = lshr i288 %r29, 32
+%r27 = call i288 @mulPv256x32(i32* %r4, i32 %r26)
+%r28 = zext i288 %r27 to i320
+%r29 = add i320 %r24, %r28
+%r30 = lshr i320 %r29, 32
 %r32 = getelementptr i32, i32* %r3, i32 2
 %r33 = load i32, i32* %r32
-%r34 = call i256 @mulPv224x32(i32* %r2, i32 %r33)
-%r35 = zext i256 %r34 to i288
-%r36 = add i288 %r30, %r35
-%r37 = trunc i288 %r36 to i32
+%r34 = call i288 @mulPv256x32(i32* %r2, i32 %r33)
+%r35 = zext i288 %r34 to i320
+%r36 = add i320 %r30, %r35
+%r37 = trunc i320 %r36 to i32
 %r38 = mul i32 %r37, %r7
-%r39 = call i256 @mulPv224x32(i32* %r4, i32 %r38)
-%r40 = zext i256 %r39 to i288
-%r41 = add i288 %r36, %r40
-%r42 = lshr i288 %r41, 32
+%r39 = call i288 @mulPv256x32(i32* %r4, i32 %r38)
+%r40 = zext i288 %r39 to i320
+%r41 = add i320 %r36, %r40
+%r42 = lshr i320 %r41, 32
 %r44 = getelementptr i32, i32* %r3, i32 3
 %r45 = load i32, i32* %r44
-%r46 = call i256 @mulPv224x32(i32* %r2, i32 %r45)
-%r47 = zext i256 %r46 to i288
-%r48 = add i288 %r42, %r47
-%r49 = trunc i288 %r48 to i32
+%r46 = call i288 @mulPv256x32(i32* %r2, i32 %r45)
+%r47 = zext i288 %r46 to i320
+%r48 = add i320 %r42, %r47
+%r49 = trunc i320 %r48 to i32
 %r50 = mul i32 %r49, %r7
-%r51 = call i256 @mulPv224x32(i32* %r4, i32 %r50)
-%r52 = zext i256 %r51 to i288
-%r53 = add i288 %r48, %r52
-%r54 = lshr i288 %r53, 32
+%r51 = call i288 @mulPv256x32(i32* %r4, i32 %r50)
+%r52 = zext i288 %r51 to i320
+%r53 = add i320 %r48, %r52
+%r54 = lshr i320 %r53, 32
 %r56 = getelementptr i32, i32* %r3, i32 4
 %r57 = load i32, i32* %r56
-%r58 = call i256 @mulPv224x32(i32* %r2, i32 %r57)
-%r59 = zext i256 %r58 to i288
-%r60 = add i288 %r54, %r59
-%r61 = trunc i288 %r60 to i32
+%r58 = call i288 @mulPv256x32(i32* %r2, i32 %r57)
+%r59 = zext i288 %r58 to i320
+%r60 = add i320 %r54, %r59
+%r61 = trunc i320 %r60 to i32
 %r62 = mul i32 %r61, %r7
-%r63 = call i256 @mulPv224x32(i32* %r4, i32 %r62)
-%r64 = zext i256 %r63 to i288
-%r65 = add i288 %r60, %r64
-%r66 = lshr i288 %r65, 32
+%r63 = call i288 @mulPv256x32(i32* %r4, i32 %r62)
+%r64 = zext i288 %r63 to i320
+%r65 = add i320 %r60, %r64
+%r66 = lshr i320 %r65, 32
 %r68 = getelementptr i32, i32* %r3, i32 5
 %r69 = load i32, i32* %r68
-%r70 = call i256 @mulPv224x32(i32* %r2, i32 %r69)
-%r71 = zext i256 %r70 to i288
-%r72 = add i288 %r66, %r71
-%r73 = trunc i288 %r72 to i32
+%r70 = call i288 @mulPv256x32(i32* %r2, i32 %r69)
+%r71 = zext i288 %r70 to i320
+%r72 = add i320 %r66, %r71
+%r73 = trunc i320 %r72 to i32
 %r74 = mul i32 %r73, %r7
-%r75 = call i256 @mulPv224x32(i32* %r4, i32 %r74)
-%r76 = zext i256 %r75 to i288
-%r77 = add i288 %r72, %r76
-%r78 = lshr i288 %r77, 32
+%r75 = call i288 @mulPv256x32(i32* %r4, i32 %r74)
+%r76 = zext i288 %r75 to i320
+%r77 = add i320 %r72, %r76
+%r78 = lshr i320 %r77, 32
 %r80 = getelementptr i32, i32* %r3, i32 6
 %r81 = load i32, i32* %r80
-%r82 = call i256 @mulPv224x32(i32* %r2, i32 %r81)
-%r83 = zext i256 %r82 to i288
-%r84 = add i288 %r78, %r83
-%r85 = trunc i288 %r84 to i32
+%r82 = call i288 @mulPv256x32(i32* %r2, i32 %r81)
+%r83 = zext i288 %r82 to i320
+%r84 = add i320 %r78, %r83
+%r85 = trunc i320 %r84 to i32
 %r86 = mul i32 %r85, %r7
-%r87 = call i256 @mulPv224x32(i32* %r4, i32 %r86)
-%r88 = zext i256 %r87 to i288
-%r89 = add i288 %r84, %r88
-%r90 = lshr i288 %r89, 32
-%r91 = trunc i288 %r90 to i256
-%r92 = load i32, i32* %r4
-%r93 = zext i32 %r92 to i64
-%r95 = getelementptr i32, i32* %r4, i32 1
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i64
-%r98 = shl i64 %r97, 32
-%r99 = or i64 %r93, %r98
-%r100 = zext i64 %r99 to i96
-%r102 = getelementptr i32, i32* %r4, i32 2
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i96
-%r105 = shl i96 %r104, 64
-%r106 = or i96 %r100, %r105
-%r107 = zext i96 %r106 to i128
-%r109 = getelementptr i32, i32* %r4, i32 3
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i128
-%r112 = shl i128 %r111, 96
-%r113 = or i128 %r107, %r112
-%r114 = zext i128 %r113 to i160
-%r116 = getelementptr i32, i32* %r4, i32 4
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i160
-%r119 = shl i160 %r118, 128
-%r120 = or i160 %r114, %r119
-%r121 = zext i160 %r120 to i192
-%r123 = getelementptr i32, i32* %r4, i32 5
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i192
-%r126 = shl i192 %r125, 160
-%r127 = or i192 %r121, %r126
-%r128 = zext i192 %r127 to i224
-%r130 = getelementptr i32, i32* %r4, i32 6
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i224
-%r133 = shl i224 %r132, 192
-%r134 = or i224 %r128, %r133
-%r135 = zext i224 %r134 to i256
-%r136 = sub i256 %r91, %r135
-%r137 = lshr i256 %r136, 224
-%r138 = trunc i256 %r137 to i1
-%r139 = select i1 %r138, i256 %r91, i256 %r136
-%r140 = trunc i256 %r139 to i224
-%r141 = trunc i224 %r140 to i32
-%r143 = getelementptr i32, i32* %r1, i32 0
-store i32 %r141, i32* %r143
-%r144 = lshr i224 %r140, 32
-%r145 = trunc i224 %r144 to i32
-%r147 = getelementptr i32, i32* %r1, i32 1
-store i32 %r145, i32* %r147
-%r148 = lshr i224 %r144, 32
-%r149 = trunc i224 %r148 to i32
-%r151 = getelementptr i32, i32* %r1, i32 2
-store i32 %r149, i32* %r151
-%r152 = lshr i224 %r148, 32
-%r153 = trunc i224 %r152 to i32
-%r155 = getelementptr i32, i32* %r1, i32 3
-store i32 %r153, i32* %r155
-%r156 = lshr i224 %r152, 32
-%r157 = trunc i224 %r156 to i32
-%r159 = getelementptr i32, i32* %r1, i32 4
-store i32 %r157, i32* %r159
-%r160 = lshr i224 %r156, 32
-%r161 = trunc i224 %r160 to i32
-%r163 = getelementptr i32, i32* %r1, i32 5
-store i32 %r161, i32* %r163
-%r164 = lshr i224 %r160, 32
-%r165 = trunc i224 %r164 to i32
-%r167 = getelementptr i32, i32* %r1, i32 6
-store i32 %r165, i32* %r167
-ret void
-}
-define void @mcl_fp_montNF7L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i256 @mulPv224x32(i32* %r2, i32 %r8)
-%r10 = trunc i256 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i256 @mulPv224x32(i32* %r4, i32 %r11)
-%r13 = add i256 %r9, %r12
-%r14 = lshr i256 %r13, 32
+%r87 = call i288 @mulPv256x32(i32* %r4, i32 %r86)
+%r88 = zext i288 %r87 to i320
+%r89 = add i320 %r84, %r88
+%r90 = lshr i320 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i288 @mulPv256x32(i32* %r2, i32 %r93)
+%r95 = zext i288 %r94 to i320
+%r96 = add i320 %r90, %r95
+%r97 = trunc i320 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i288 @mulPv256x32(i32* %r4, i32 %r98)
+%r100 = zext i288 %r99 to i320
+%r101 = add i320 %r96, %r100
+%r102 = lshr i320 %r101, 32
+%r103 = trunc i320 %r102 to i288
+%r104 = load i32, i32* %r4
+%r105 = zext i32 %r104 to i64
+%r107 = getelementptr i32, i32* %r4, i32 1
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i64
+%r110 = shl i64 %r109, 32
+%r111 = or i64 %r105, %r110
+%r112 = zext i64 %r111 to i96
+%r114 = getelementptr i32, i32* %r4, i32 2
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i96
+%r117 = shl i96 %r116, 64
+%r118 = or i96 %r112, %r117
+%r119 = zext i96 %r118 to i128
+%r121 = getelementptr i32, i32* %r4, i32 3
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i128
+%r124 = shl i128 %r123, 96
+%r125 = or i128 %r119, %r124
+%r126 = zext i128 %r125 to i160
+%r128 = getelementptr i32, i32* %r4, i32 4
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i160
+%r131 = shl i160 %r130, 128
+%r132 = or i160 %r126, %r131
+%r133 = zext i160 %r132 to i192
+%r135 = getelementptr i32, i32* %r4, i32 5
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i192
+%r138 = shl i192 %r137, 160
+%r139 = or i192 %r133, %r138
+%r140 = zext i192 %r139 to i224
+%r142 = getelementptr i32, i32* %r4, i32 6
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i224
+%r145 = shl i224 %r144, 192
+%r146 = or i224 %r140, %r145
+%r147 = zext i224 %r146 to i256
+%r149 = getelementptr i32, i32* %r4, i32 7
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i256
+%r152 = shl i256 %r151, 224
+%r153 = or i256 %r147, %r152
+%r154 = zext i256 %r153 to i288
+%r155 = sub i288 %r103, %r154
+%r156 = lshr i288 %r155, 256
+%r157 = trunc i288 %r156 to i1
+%r158 = select i1 %r157, i288 %r103, i288 %r155
+%r159 = trunc i288 %r158 to i256
+%r161 = getelementptr i32, i32* %r1, i32 0
+%r162 = trunc i256 %r159 to i32
+store i32 %r162, i32* %r161
+%r163 = lshr i256 %r159, 32
+%r165 = getelementptr i32, i32* %r1, i32 1
+%r166 = trunc i256 %r163 to i32
+store i32 %r166, i32* %r165
+%r167 = lshr i256 %r163, 32
+%r169 = getelementptr i32, i32* %r1, i32 2
+%r170 = trunc i256 %r167 to i32
+store i32 %r170, i32* %r169
+%r171 = lshr i256 %r167, 32
+%r173 = getelementptr i32, i32* %r1, i32 3
+%r174 = trunc i256 %r171 to i32
+store i32 %r174, i32* %r173
+%r175 = lshr i256 %r171, 32
+%r177 = getelementptr i32, i32* %r1, i32 4
+%r178 = trunc i256 %r175 to i32
+store i32 %r178, i32* %r177
+%r179 = lshr i256 %r175, 32
+%r181 = getelementptr i32, i32* %r1, i32 5
+%r182 = trunc i256 %r179 to i32
+store i32 %r182, i32* %r181
+%r183 = lshr i256 %r179, 32
+%r185 = getelementptr i32, i32* %r1, i32 6
+%r186 = trunc i256 %r183 to i32
+store i32 %r186, i32* %r185
+%r187 = lshr i256 %r183, 32
+%r189 = getelementptr i32, i32* %r1, i32 7
+%r190 = trunc i256 %r187 to i32
+store i32 %r190, i32* %r189
+ret void
+}
+define void @mcl_fp_montNF8L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i288 @mulPv256x32(i32* %r2, i32 %r8)
+%r10 = trunc i288 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i288 @mulPv256x32(i32* %r4, i32 %r11)
+%r13 = add i288 %r9, %r12
+%r14 = lshr i288 %r13, 32
 %r16 = getelementptr i32, i32* %r3, i32 1
 %r17 = load i32, i32* %r16
-%r18 = call i256 @mulPv224x32(i32* %r2, i32 %r17)
-%r19 = add i256 %r14, %r18
-%r20 = trunc i256 %r19 to i32
+%r18 = call i288 @mulPv256x32(i32* %r2, i32 %r17)
+%r19 = add i288 %r14, %r18
+%r20 = trunc i288 %r19 to i32
 %r21 = mul i32 %r20, %r7
-%r22 = call i256 @mulPv224x32(i32* %r4, i32 %r21)
-%r23 = add i256 %r19, %r22
-%r24 = lshr i256 %r23, 32
+%r22 = call i288 @mulPv256x32(i32* %r4, i32 %r21)
+%r23 = add i288 %r19, %r22
+%r24 = lshr i288 %r23, 32
 %r26 = getelementptr i32, i32* %r3, i32 2
 %r27 = load i32, i32* %r26
-%r28 = call i256 @mulPv224x32(i32* %r2, i32 %r27)
-%r29 = add i256 %r24, %r28
-%r30 = trunc i256 %r29 to i32
+%r28 = call i288 @mulPv256x32(i32* %r2, i32 %r27)
+%r29 = add i288 %r24, %r28
+%r30 = trunc i288 %r29 to i32
 %r31 = mul i32 %r30, %r7
-%r32 = call i256 @mulPv224x32(i32* %r4, i32 %r31)
-%r33 = add i256 %r29, %r32
-%r34 = lshr i256 %r33, 32
+%r32 = call i288 @mulPv256x32(i32* %r4, i32 %r31)
+%r33 = add i288 %r29, %r32
+%r34 = lshr i288 %r33, 32
 %r36 = getelementptr i32, i32* %r3, i32 3
 %r37 = load i32, i32* %r36
-%r38 = call i256 @mulPv224x32(i32* %r2, i32 %r37)
-%r39 = add i256 %r34, %r38
-%r40 = trunc i256 %r39 to i32
+%r38 = call i288 @mulPv256x32(i32* %r2, i32 %r37)
+%r39 = add i288 %r34, %r38
+%r40 = trunc i288 %r39 to i32
 %r41 = mul i32 %r40, %r7
-%r42 = call i256 @mulPv224x32(i32* %r4, i32 %r41)
-%r43 = add i256 %r39, %r42
-%r44 = lshr i256 %r43, 32
+%r42 = call i288 @mulPv256x32(i32* %r4, i32 %r41)
+%r43 = add i288 %r39, %r42
+%r44 = lshr i288 %r43, 32
 %r46 = getelementptr i32, i32* %r3, i32 4
 %r47 = load i32, i32* %r46
-%r48 = call i256 @mulPv224x32(i32* %r2, i32 %r47)
-%r49 = add i256 %r44, %r48
-%r50 = trunc i256 %r49 to i32
+%r48 = call i288 @mulPv256x32(i32* %r2, i32 %r47)
+%r49 = add i288 %r44, %r48
+%r50 = trunc i288 %r49 to i32
 %r51 = mul i32 %r50, %r7
-%r52 = call i256 @mulPv224x32(i32* %r4, i32 %r51)
-%r53 = add i256 %r49, %r52
-%r54 = lshr i256 %r53, 32
+%r52 = call i288 @mulPv256x32(i32* %r4, i32 %r51)
+%r53 = add i288 %r49, %r52
+%r54 = lshr i288 %r53, 32
 %r56 = getelementptr i32, i32* %r3, i32 5
 %r57 = load i32, i32* %r56
-%r58 = call i256 @mulPv224x32(i32* %r2, i32 %r57)
-%r59 = add i256 %r54, %r58
-%r60 = trunc i256 %r59 to i32
+%r58 = call i288 @mulPv256x32(i32* %r2, i32 %r57)
+%r59 = add i288 %r54, %r58
+%r60 = trunc i288 %r59 to i32
 %r61 = mul i32 %r60, %r7
-%r62 = call i256 @mulPv224x32(i32* %r4, i32 %r61)
-%r63 = add i256 %r59, %r62
-%r64 = lshr i256 %r63, 32
+%r62 = call i288 @mulPv256x32(i32* %r4, i32 %r61)
+%r63 = add i288 %r59, %r62
+%r64 = lshr i288 %r63, 32
 %r66 = getelementptr i32, i32* %r3, i32 6
 %r67 = load i32, i32* %r66
-%r68 = call i256 @mulPv224x32(i32* %r2, i32 %r67)
-%r69 = add i256 %r64, %r68
-%r70 = trunc i256 %r69 to i32
+%r68 = call i288 @mulPv256x32(i32* %r2, i32 %r67)
+%r69 = add i288 %r64, %r68
+%r70 = trunc i288 %r69 to i32
 %r71 = mul i32 %r70, %r7
-%r72 = call i256 @mulPv224x32(i32* %r4, i32 %r71)
-%r73 = add i256 %r69, %r72
-%r74 = lshr i256 %r73, 32
-%r75 = trunc i256 %r74 to i224
-%r76 = load i32, i32* %r4
-%r77 = zext i32 %r76 to i64
-%r79 = getelementptr i32, i32* %r4, i32 1
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i64
-%r82 = shl i64 %r81, 32
-%r83 = or i64 %r77, %r82
-%r84 = zext i64 %r83 to i96
-%r86 = getelementptr i32, i32* %r4, i32 2
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i96
-%r89 = shl i96 %r88, 64
-%r90 = or i96 %r84, %r89
-%r91 = zext i96 %r90 to i128
-%r93 = getelementptr i32, i32* %r4, i32 3
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i128
-%r96 = shl i128 %r95, 96
-%r97 = or i128 %r91, %r96
-%r98 = zext i128 %r97 to i160
-%r100 = getelementptr i32, i32* %r4, i32 4
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i160
-%r103 = shl i160 %r102, 128
-%r104 = or i160 %r98, %r103
-%r105 = zext i160 %r104 to i192
-%r107 = getelementptr i32, i32* %r4, i32 5
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i192
-%r110 = shl i192 %r109, 160
-%r111 = or i192 %r105, %r110
-%r112 = zext i192 %r111 to i224
-%r114 = getelementptr i32, i32* %r4, i32 6
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i224
-%r117 = shl i224 %r116, 192
-%r118 = or i224 %r112, %r117
-%r119 = sub i224 %r75, %r118
-%r120 = lshr i224 %r119, 223
-%r121 = trunc i224 %r120 to i1
-%r122 = select i1 %r121, i224 %r75, i224 %r119
-%r123 = trunc i224 %r122 to i32
-%r125 = getelementptr i32, i32* %r1, i32 0
-store i32 %r123, i32* %r125
-%r126 = lshr i224 %r122, 32
-%r127 = trunc i224 %r126 to i32
-%r129 = getelementptr i32, i32* %r1, i32 1
-store i32 %r127, i32* %r129
-%r130 = lshr i224 %r126, 32
-%r131 = trunc i224 %r130 to i32
-%r133 = getelementptr i32, i32* %r1, i32 2
-store i32 %r131, i32* %r133
-%r134 = lshr i224 %r130, 32
-%r135 = trunc i224 %r134 to i32
-%r137 = getelementptr i32, i32* %r1, i32 3
-store i32 %r135, i32* %r137
-%r138 = lshr i224 %r134, 32
-%r139 = trunc i224 %r138 to i32
-%r141 = getelementptr i32, i32* %r1, i32 4
-store i32 %r139, i32* %r141
-%r142 = lshr i224 %r138, 32
-%r143 = trunc i224 %r142 to i32
-%r145 = getelementptr i32, i32* %r1, i32 5
-store i32 %r143, i32* %r145
-%r146 = lshr i224 %r142, 32
-%r147 = trunc i224 %r146 to i32
-%r149 = getelementptr i32, i32* %r1, i32 6
-store i32 %r147, i32* %r149
-ret void
-}
-define void @mcl_fp_montRed7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i224
-%r45 = getelementptr i32, i32* %r3, i32 6
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i224
-%r48 = shl i224 %r47, 192
-%r49 = or i224 %r43, %r48
-%r50 = load i32, i32* %r2
-%r51 = zext i32 %r50 to i64
-%r53 = getelementptr i32, i32* %r2, i32 1
-%r54 = load i32, i32* %r53
-%r55 = zext i32 %r54 to i64
-%r56 = shl i64 %r55, 32
-%r57 = or i64 %r51, %r56
-%r58 = zext i64 %r57 to i96
-%r60 = getelementptr i32, i32* %r2, i32 2
+%r72 = call i288 @mulPv256x32(i32* %r4, i32 %r71)
+%r73 = add i288 %r69, %r72
+%r74 = lshr i288 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i288 @mulPv256x32(i32* %r2, i32 %r77)
+%r79 = add i288 %r74, %r78
+%r80 = trunc i288 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i288 @mulPv256x32(i32* %r4, i32 %r81)
+%r83 = add i288 %r79, %r82
+%r84 = lshr i288 %r83, 32
+%r85 = trunc i288 %r84 to i256
+%r86 = load i32, i32* %r4
+%r87 = zext i32 %r86 to i64
+%r89 = getelementptr i32, i32* %r4, i32 1
+%r90 = load i32, i32* %r89
+%r91 = zext i32 %r90 to i64
+%r92 = shl i64 %r91, 32
+%r93 = or i64 %r87, %r92
+%r94 = zext i64 %r93 to i96
+%r96 = getelementptr i32, i32* %r4, i32 2
+%r97 = load i32, i32* %r96
+%r98 = zext i32 %r97 to i96
+%r99 = shl i96 %r98, 64
+%r100 = or i96 %r94, %r99
+%r101 = zext i96 %r100 to i128
+%r103 = getelementptr i32, i32* %r4, i32 3
+%r104 = load i32, i32* %r103
+%r105 = zext i32 %r104 to i128
+%r106 = shl i128 %r105, 96
+%r107 = or i128 %r101, %r106
+%r108 = zext i128 %r107 to i160
+%r110 = getelementptr i32, i32* %r4, i32 4
+%r111 = load i32, i32* %r110
+%r112 = zext i32 %r111 to i160
+%r113 = shl i160 %r112, 128
+%r114 = or i160 %r108, %r113
+%r115 = zext i160 %r114 to i192
+%r117 = getelementptr i32, i32* %r4, i32 5
+%r118 = load i32, i32* %r117
+%r119 = zext i32 %r118 to i192
+%r120 = shl i192 %r119, 160
+%r121 = or i192 %r115, %r120
+%r122 = zext i192 %r121 to i224
+%r124 = getelementptr i32, i32* %r4, i32 6
+%r125 = load i32, i32* %r124
+%r126 = zext i32 %r125 to i224
+%r127 = shl i224 %r126, 192
+%r128 = or i224 %r122, %r127
+%r129 = zext i224 %r128 to i256
+%r131 = getelementptr i32, i32* %r4, i32 7
+%r132 = load i32, i32* %r131
+%r133 = zext i32 %r132 to i256
+%r134 = shl i256 %r133, 224
+%r135 = or i256 %r129, %r134
+%r136 = sub i256 %r85, %r135
+%r137 = lshr i256 %r136, 255
+%r138 = trunc i256 %r137 to i1
+%r139 = select i1 %r138, i256 %r85, i256 %r136
+%r141 = getelementptr i32, i32* %r1, i32 0
+%r142 = trunc i256 %r139 to i32
+store i32 %r142, i32* %r141
+%r143 = lshr i256 %r139, 32
+%r145 = getelementptr i32, i32* %r1, i32 1
+%r146 = trunc i256 %r143 to i32
+store i32 %r146, i32* %r145
+%r147 = lshr i256 %r143, 32
+%r149 = getelementptr i32, i32* %r1, i32 2
+%r150 = trunc i256 %r147 to i32
+store i32 %r150, i32* %r149
+%r151 = lshr i256 %r147, 32
+%r153 = getelementptr i32, i32* %r1, i32 3
+%r154 = trunc i256 %r151 to i32
+store i32 %r154, i32* %r153
+%r155 = lshr i256 %r151, 32
+%r157 = getelementptr i32, i32* %r1, i32 4
+%r158 = trunc i256 %r155 to i32
+store i32 %r158, i32* %r157
+%r159 = lshr i256 %r155, 32
+%r161 = getelementptr i32, i32* %r1, i32 5
+%r162 = trunc i256 %r159 to i32
+store i32 %r162, i32* %r161
+%r163 = lshr i256 %r159, 32
+%r165 = getelementptr i32, i32* %r1, i32 6
+%r166 = trunc i256 %r163 to i32
+store i32 %r166, i32* %r165
+%r167 = lshr i256 %r163, 32
+%r169 = getelementptr i32, i32* %r1, i32 7
+%r170 = trunc i256 %r167 to i32
+store i32 %r170, i32* %r169
+ret void
+}
+define void @mcl_fp_montRed8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = load i32, i32* %r2
+%r58 = zext i32 %r57 to i64
+%r60 = getelementptr i32, i32* %r2, i32 1
 %r61 = load i32, i32* %r60
-%r62 = zext i32 %r61 to i96
-%r63 = shl i96 %r62, 64
-%r64 = or i96 %r58, %r63
-%r65 = zext i96 %r64 to i128
-%r67 = getelementptr i32, i32* %r2, i32 3
+%r62 = zext i32 %r61 to i64
+%r63 = shl i64 %r62, 32
+%r64 = or i64 %r58, %r63
+%r65 = zext i64 %r64 to i96
+%r67 = getelementptr i32, i32* %r2, i32 2
 %r68 = load i32, i32* %r67
-%r69 = zext i32 %r68 to i128
-%r70 = shl i128 %r69, 96
-%r71 = or i128 %r65, %r70
-%r72 = zext i128 %r71 to i160
-%r74 = getelementptr i32, i32* %r2, i32 4
+%r69 = zext i32 %r68 to i96
+%r70 = shl i96 %r69, 64
+%r71 = or i96 %r65, %r70
+%r72 = zext i96 %r71 to i128
+%r74 = getelementptr i32, i32* %r2, i32 3
 %r75 = load i32, i32* %r74
-%r76 = zext i32 %r75 to i160
-%r77 = shl i160 %r76, 128
-%r78 = or i160 %r72, %r77
-%r79 = zext i160 %r78 to i192
-%r81 = getelementptr i32, i32* %r2, i32 5
+%r76 = zext i32 %r75 to i128
+%r77 = shl i128 %r76, 96
+%r78 = or i128 %r72, %r77
+%r79 = zext i128 %r78 to i160
+%r81 = getelementptr i32, i32* %r2, i32 4
 %r82 = load i32, i32* %r81
-%r83 = zext i32 %r82 to i192
-%r84 = shl i192 %r83, 160
-%r85 = or i192 %r79, %r84
-%r86 = zext i192 %r85 to i224
-%r88 = getelementptr i32, i32* %r2, i32 6
+%r83 = zext i32 %r82 to i160
+%r84 = shl i160 %r83, 128
+%r85 = or i160 %r79, %r84
+%r86 = zext i160 %r85 to i192
+%r88 = getelementptr i32, i32* %r2, i32 5
 %r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i224
-%r91 = shl i224 %r90, 192
-%r92 = or i224 %r86, %r91
-%r93 = zext i224 %r92 to i256
-%r95 = getelementptr i32, i32* %r2, i32 7
+%r90 = zext i32 %r89 to i192
+%r91 = shl i192 %r90, 160
+%r92 = or i192 %r86, %r91
+%r93 = zext i192 %r92 to i224
+%r95 = getelementptr i32, i32* %r2, i32 6
 %r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i256
-%r98 = shl i256 %r97, 224
-%r99 = or i256 %r93, %r98
-%r100 = zext i256 %r99 to i288
-%r102 = getelementptr i32, i32* %r2, i32 8
+%r97 = zext i32 %r96 to i224
+%r98 = shl i224 %r97, 192
+%r99 = or i224 %r93, %r98
+%r100 = zext i224 %r99 to i256
+%r102 = getelementptr i32, i32* %r2, i32 7
 %r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i288
-%r105 = shl i288 %r104, 256
-%r106 = or i288 %r100, %r105
-%r107 = zext i288 %r106 to i320
-%r109 = getelementptr i32, i32* %r2, i32 9
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i320
-%r112 = shl i320 %r111, 288
-%r113 = or i320 %r107, %r112
-%r114 = zext i320 %r113 to i352
-%r116 = getelementptr i32, i32* %r2, i32 10
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i352
-%r119 = shl i352 %r118, 320
-%r120 = or i352 %r114, %r119
-%r121 = zext i352 %r120 to i384
-%r123 = getelementptr i32, i32* %r2, i32 11
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i384
-%r126 = shl i384 %r125, 352
-%r127 = or i384 %r121, %r126
-%r128 = zext i384 %r127 to i416
-%r130 = getelementptr i32, i32* %r2, i32 12
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i416
-%r133 = shl i416 %r132, 384
-%r134 = or i416 %r128, %r133
-%r135 = zext i416 %r134 to i448
-%r137 = getelementptr i32, i32* %r2, i32 13
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i448
-%r140 = shl i448 %r139, 416
-%r141 = or i448 %r135, %r140
-%r142 = zext i448 %r141 to i480
-%r143 = trunc i480 %r142 to i32
-%r144 = mul i32 %r143, %r6
-%r145 = call i256 @mulPv224x32(i32* %r3, i32 %r144)
-%r146 = zext i256 %r145 to i480
-%r147 = add i480 %r142, %r146
-%r148 = lshr i480 %r147, 32
-%r149 = trunc i480 %r148 to i448
-%r150 = trunc i448 %r149 to i32
-%r151 = mul i32 %r150, %r6
-%r152 = call i256 @mulPv224x32(i32* %r3, i32 %r151)
-%r153 = zext i256 %r152 to i448
-%r154 = add i448 %r149, %r153
-%r155 = lshr i448 %r154, 32
-%r156 = trunc i448 %r155 to i416
-%r157 = trunc i416 %r156 to i32
-%r158 = mul i32 %r157, %r6
-%r159 = call i256 @mulPv224x32(i32* %r3, i32 %r158)
-%r160 = zext i256 %r159 to i416
-%r161 = add i416 %r156, %r160
-%r162 = lshr i416 %r161, 32
-%r163 = trunc i416 %r162 to i384
-%r164 = trunc i384 %r163 to i32
-%r165 = mul i32 %r164, %r6
-%r166 = call i256 @mulPv224x32(i32* %r3, i32 %r165)
-%r167 = zext i256 %r166 to i384
-%r168 = add i384 %r163, %r167
-%r169 = lshr i384 %r168, 32
-%r170 = trunc i384 %r169 to i352
-%r171 = trunc i352 %r170 to i32
-%r172 = mul i32 %r171, %r6
-%r173 = call i256 @mulPv224x32(i32* %r3, i32 %r172)
-%r174 = zext i256 %r173 to i352
-%r175 = add i352 %r170, %r174
-%r176 = lshr i352 %r175, 32
-%r177 = trunc i352 %r176 to i320
-%r178 = trunc i320 %r177 to i32
-%r179 = mul i32 %r178, %r6
-%r180 = call i256 @mulPv224x32(i32* %r3, i32 %r179)
-%r181 = zext i256 %r180 to i320
-%r182 = add i320 %r177, %r181
+%r104 = zext i32 %r103 to i256
+%r105 = shl i256 %r104, 224
+%r106 = or i256 %r100, %r105
+%r107 = trunc i256 %r106 to i32
+%r108 = mul i32 %r107, %r6
+%r109 = call i288 @mulPv256x32(i32* %r3, i32 %r108)
+%r111 = getelementptr i32, i32* %r2, i32 8
+%r112 = load i32, i32* %r111
+%r113 = zext i32 %r112 to i288
+%r114 = shl i288 %r113, 256
+%r115 = zext i256 %r106 to i288
+%r116 = or i288 %r114, %r115
+%r117 = zext i288 %r116 to i320
+%r118 = zext i288 %r109 to i320
+%r119 = add i320 %r117, %r118
+%r120 = lshr i320 %r119, 32
+%r121 = trunc i320 %r120 to i288
+%r122 = lshr i288 %r121, 256
+%r123 = trunc i288 %r122 to i32
+%r124 = trunc i288 %r121 to i256
+%r125 = trunc i256 %r124 to i32
+%r126 = mul i32 %r125, %r6
+%r127 = call i288 @mulPv256x32(i32* %r3, i32 %r126)
+%r128 = zext i32 %r123 to i288
+%r129 = shl i288 %r128, 256
+%r130 = add i288 %r127, %r129
+%r132 = getelementptr i32, i32* %r2, i32 9
+%r133 = load i32, i32* %r132
+%r134 = zext i32 %r133 to i288
+%r135 = shl i288 %r134, 256
+%r136 = zext i256 %r124 to i288
+%r137 = or i288 %r135, %r136
+%r138 = zext i288 %r137 to i320
+%r139 = zext i288 %r130 to i320
+%r140 = add i320 %r138, %r139
+%r141 = lshr i320 %r140, 32
+%r142 = trunc i320 %r141 to i288
+%r143 = lshr i288 %r142, 256
+%r144 = trunc i288 %r143 to i32
+%r145 = trunc i288 %r142 to i256
+%r146 = trunc i256 %r145 to i32
+%r147 = mul i32 %r146, %r6
+%r148 = call i288 @mulPv256x32(i32* %r3, i32 %r147)
+%r149 = zext i32 %r144 to i288
+%r150 = shl i288 %r149, 256
+%r151 = add i288 %r148, %r150
+%r153 = getelementptr i32, i32* %r2, i32 10
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i288
+%r156 = shl i288 %r155, 256
+%r157 = zext i256 %r145 to i288
+%r158 = or i288 %r156, %r157
+%r159 = zext i288 %r158 to i320
+%r160 = zext i288 %r151 to i320
+%r161 = add i320 %r159, %r160
+%r162 = lshr i320 %r161, 32
+%r163 = trunc i320 %r162 to i288
+%r164 = lshr i288 %r163, 256
+%r165 = trunc i288 %r164 to i32
+%r166 = trunc i288 %r163 to i256
+%r167 = trunc i256 %r166 to i32
+%r168 = mul i32 %r167, %r6
+%r169 = call i288 @mulPv256x32(i32* %r3, i32 %r168)
+%r170 = zext i32 %r165 to i288
+%r171 = shl i288 %r170, 256
+%r172 = add i288 %r169, %r171
+%r174 = getelementptr i32, i32* %r2, i32 11
+%r175 = load i32, i32* %r174
+%r176 = zext i32 %r175 to i288
+%r177 = shl i288 %r176, 256
+%r178 = zext i256 %r166 to i288
+%r179 = or i288 %r177, %r178
+%r180 = zext i288 %r179 to i320
+%r181 = zext i288 %r172 to i320
+%r182 = add i320 %r180, %r181
 %r183 = lshr i320 %r182, 32
 %r184 = trunc i320 %r183 to i288
-%r185 = trunc i288 %r184 to i32
-%r186 = mul i32 %r185, %r6
-%r187 = call i256 @mulPv224x32(i32* %r3, i32 %r186)
-%r188 = zext i256 %r187 to i288
-%r189 = add i288 %r184, %r188
-%r190 = lshr i288 %r189, 32
-%r191 = trunc i288 %r190 to i256
-%r192 = zext i224 %r49 to i256
-%r193 = sub i256 %r191, %r192
-%r194 = lshr i256 %r193, 224
-%r195 = trunc i256 %r194 to i1
-%r196 = select i1 %r195, i256 %r191, i256 %r193
-%r197 = trunc i256 %r196 to i224
-%r198 = trunc i224 %r197 to i32
-%r200 = getelementptr i32, i32* %r1, i32 0
-store i32 %r198, i32* %r200
-%r201 = lshr i224 %r197, 32
-%r202 = trunc i224 %r201 to i32
-%r204 = getelementptr i32, i32* %r1, i32 1
-store i32 %r202, i32* %r204
-%r205 = lshr i224 %r201, 32
-%r206 = trunc i224 %r205 to i32
-%r208 = getelementptr i32, i32* %r1, i32 2
-store i32 %r206, i32* %r208
-%r209 = lshr i224 %r205, 32
-%r210 = trunc i224 %r209 to i32
-%r212 = getelementptr i32, i32* %r1, i32 3
-store i32 %r210, i32* %r212
-%r213 = lshr i224 %r209, 32
-%r214 = trunc i224 %r213 to i32
-%r216 = getelementptr i32, i32* %r1, i32 4
-store i32 %r214, i32* %r216
-%r217 = lshr i224 %r213, 32
-%r218 = trunc i224 %r217 to i32
-%r220 = getelementptr i32, i32* %r1, i32 5
-store i32 %r218, i32* %r220
-%r221 = lshr i224 %r217, 32
-%r222 = trunc i224 %r221 to i32
-%r224 = getelementptr i32, i32* %r1, i32 6
-store i32 %r222, i32* %r224
-ret void
-}
-define i32 @mcl_fp_addPre7L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+%r185 = lshr i288 %r184, 256
+%r186 = trunc i288 %r185 to i32
+%r187 = trunc i288 %r184 to i256
+%r188 = trunc i256 %r187 to i32
+%r189 = mul i32 %r188, %r6
+%r190 = call i288 @mulPv256x32(i32* %r3, i32 %r189)
+%r191 = zext i32 %r186 to i288
+%r192 = shl i288 %r191, 256
+%r193 = add i288 %r190, %r192
+%r195 = getelementptr i32, i32* %r2, i32 12
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i288
+%r198 = shl i288 %r197, 256
+%r199 = zext i256 %r187 to i288
+%r200 = or i288 %r198, %r199
+%r201 = zext i288 %r200 to i320
+%r202 = zext i288 %r193 to i320
+%r203 = add i320 %r201, %r202
+%r204 = lshr i320 %r203, 32
+%r205 = trunc i320 %r204 to i288
+%r206 = lshr i288 %r205, 256
+%r207 = trunc i288 %r206 to i32
+%r208 = trunc i288 %r205 to i256
+%r209 = trunc i256 %r208 to i32
+%r210 = mul i32 %r209, %r6
+%r211 = call i288 @mulPv256x32(i32* %r3, i32 %r210)
+%r212 = zext i32 %r207 to i288
+%r213 = shl i288 %r212, 256
+%r214 = add i288 %r211, %r213
+%r216 = getelementptr i32, i32* %r2, i32 13
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i288
+%r219 = shl i288 %r218, 256
+%r220 = zext i256 %r208 to i288
+%r221 = or i288 %r219, %r220
+%r222 = zext i288 %r221 to i320
+%r223 = zext i288 %r214 to i320
+%r224 = add i320 %r222, %r223
+%r225 = lshr i320 %r224, 32
+%r226 = trunc i320 %r225 to i288
+%r227 = lshr i288 %r226, 256
+%r228 = trunc i288 %r227 to i32
+%r229 = trunc i288 %r226 to i256
+%r230 = trunc i256 %r229 to i32
+%r231 = mul i32 %r230, %r6
+%r232 = call i288 @mulPv256x32(i32* %r3, i32 %r231)
+%r233 = zext i32 %r228 to i288
+%r234 = shl i288 %r233, 256
+%r235 = add i288 %r232, %r234
+%r237 = getelementptr i32, i32* %r2, i32 14
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i288
+%r240 = shl i288 %r239, 256
+%r241 = zext i256 %r229 to i288
+%r242 = or i288 %r240, %r241
+%r243 = zext i288 %r242 to i320
+%r244 = zext i288 %r235 to i320
+%r245 = add i320 %r243, %r244
+%r246 = lshr i320 %r245, 32
+%r247 = trunc i320 %r246 to i288
+%r248 = lshr i288 %r247, 256
+%r249 = trunc i288 %r248 to i32
+%r250 = trunc i288 %r247 to i256
+%r251 = trunc i256 %r250 to i32
+%r252 = mul i32 %r251, %r6
+%r253 = call i288 @mulPv256x32(i32* %r3, i32 %r252)
+%r254 = zext i32 %r249 to i288
+%r255 = shl i288 %r254, 256
+%r256 = add i288 %r253, %r255
+%r258 = getelementptr i32, i32* %r2, i32 15
+%r259 = load i32, i32* %r258
+%r260 = zext i32 %r259 to i288
+%r261 = shl i288 %r260, 256
+%r262 = zext i256 %r250 to i288
+%r263 = or i288 %r261, %r262
+%r264 = zext i288 %r263 to i320
+%r265 = zext i288 %r256 to i320
+%r266 = add i320 %r264, %r265
+%r267 = lshr i320 %r266, 32
+%r268 = trunc i320 %r267 to i288
+%r269 = lshr i288 %r268, 256
+%r270 = trunc i288 %r269 to i32
+%r271 = trunc i288 %r268 to i256
+%r272 = zext i256 %r56 to i288
+%r273 = zext i256 %r271 to i288
+%r274 = sub i288 %r273, %r272
+%r275 = lshr i288 %r274, 256
+%r276 = trunc i288 %r275 to i1
+%r277 = select i1 %r276, i288 %r273, i288 %r274
+%r278 = trunc i288 %r277 to i256
+%r280 = getelementptr i32, i32* %r1, i32 0
+%r281 = trunc i256 %r278 to i32
+store i32 %r281, i32* %r280
+%r282 = lshr i256 %r278, 32
+%r284 = getelementptr i32, i32* %r1, i32 1
+%r285 = trunc i256 %r282 to i32
+store i32 %r285, i32* %r284
+%r286 = lshr i256 %r282, 32
+%r288 = getelementptr i32, i32* %r1, i32 2
+%r289 = trunc i256 %r286 to i32
+store i32 %r289, i32* %r288
+%r290 = lshr i256 %r286, 32
+%r292 = getelementptr i32, i32* %r1, i32 3
+%r293 = trunc i256 %r290 to i32
+store i32 %r293, i32* %r292
+%r294 = lshr i256 %r290, 32
+%r296 = getelementptr i32, i32* %r1, i32 4
+%r297 = trunc i256 %r294 to i32
+store i32 %r297, i32* %r296
+%r298 = lshr i256 %r294, 32
+%r300 = getelementptr i32, i32* %r1, i32 5
+%r301 = trunc i256 %r298 to i32
+store i32 %r301, i32* %r300
+%r302 = lshr i256 %r298, 32
+%r304 = getelementptr i32, i32* %r1, i32 6
+%r305 = trunc i256 %r302 to i32
+store i32 %r305, i32* %r304
+%r306 = lshr i256 %r302, 32
+%r308 = getelementptr i32, i32* %r1, i32 7
+%r309 = trunc i256 %r306 to i32
+store i32 %r309, i32* %r308
+ret void
+}
+define void @mcl_fp_montRedNF8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
 {
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r49 = load i32, i32* %r4
-%r50 = zext i32 %r49 to i64
-%r52 = getelementptr i32, i32* %r4, i32 1
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
 %r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i64
-%r55 = shl i64 %r54, 32
-%r56 = or i64 %r50, %r55
-%r57 = zext i64 %r56 to i96
-%r59 = getelementptr i32, i32* %r4, i32 2
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i96
-%r62 = shl i96 %r61, 64
-%r63 = or i96 %r57, %r62
-%r64 = zext i96 %r63 to i128
-%r66 = getelementptr i32, i32* %r4, i32 3
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i128
-%r69 = shl i128 %r68, 96
-%r70 = or i128 %r64, %r69
-%r71 = zext i128 %r70 to i160
-%r73 = getelementptr i32, i32* %r4, i32 4
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i160
-%r76 = shl i160 %r75, 128
-%r77 = or i160 %r71, %r76
-%r78 = zext i160 %r77 to i192
-%r80 = getelementptr i32, i32* %r4, i32 5
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i192
-%r83 = shl i192 %r82, 160
-%r84 = or i192 %r78, %r83
-%r85 = zext i192 %r84 to i224
-%r87 = getelementptr i32, i32* %r4, i32 6
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i224
-%r90 = shl i224 %r89, 192
-%r91 = or i224 %r85, %r90
-%r92 = zext i224 %r91 to i256
-%r93 = add i256 %r48, %r92
-%r94 = trunc i256 %r93 to i224
-%r95 = trunc i224 %r94 to i32
-%r97 = getelementptr i32, i32* %r2, i32 0
-store i32 %r95, i32* %r97
-%r98 = lshr i224 %r94, 32
-%r99 = trunc i224 %r98 to i32
-%r101 = getelementptr i32, i32* %r2, i32 1
-store i32 %r99, i32* %r101
-%r102 = lshr i224 %r98, 32
-%r103 = trunc i224 %r102 to i32
-%r105 = getelementptr i32, i32* %r2, i32 2
-store i32 %r103, i32* %r105
-%r106 = lshr i224 %r102, 32
-%r107 = trunc i224 %r106 to i32
-%r109 = getelementptr i32, i32* %r2, i32 3
-store i32 %r107, i32* %r109
-%r110 = lshr i224 %r106, 32
-%r111 = trunc i224 %r110 to i32
-%r113 = getelementptr i32, i32* %r2, i32 4
-store i32 %r111, i32* %r113
-%r114 = lshr i224 %r110, 32
-%r115 = trunc i224 %r114 to i32
-%r117 = getelementptr i32, i32* %r2, i32 5
-store i32 %r115, i32* %r117
-%r118 = lshr i224 %r114, 32
-%r119 = trunc i224 %r118 to i32
-%r121 = getelementptr i32, i32* %r2, i32 6
-store i32 %r119, i32* %r121
-%r122 = lshr i256 %r93, 224
-%r123 = trunc i256 %r122 to i32
-ret i32 %r123
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = load i32, i32* %r2
+%r58 = zext i32 %r57 to i64
+%r60 = getelementptr i32, i32* %r2, i32 1
+%r61 = load i32, i32* %r60
+%r62 = zext i32 %r61 to i64
+%r63 = shl i64 %r62, 32
+%r64 = or i64 %r58, %r63
+%r65 = zext i64 %r64 to i96
+%r67 = getelementptr i32, i32* %r2, i32 2
+%r68 = load i32, i32* %r67
+%r69 = zext i32 %r68 to i96
+%r70 = shl i96 %r69, 64
+%r71 = or i96 %r65, %r70
+%r72 = zext i96 %r71 to i128
+%r74 = getelementptr i32, i32* %r2, i32 3
+%r75 = load i32, i32* %r74
+%r76 = zext i32 %r75 to i128
+%r77 = shl i128 %r76, 96
+%r78 = or i128 %r72, %r77
+%r79 = zext i128 %r78 to i160
+%r81 = getelementptr i32, i32* %r2, i32 4
+%r82 = load i32, i32* %r81
+%r83 = zext i32 %r82 to i160
+%r84 = shl i160 %r83, 128
+%r85 = or i160 %r79, %r84
+%r86 = zext i160 %r85 to i192
+%r88 = getelementptr i32, i32* %r2, i32 5
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i192
+%r91 = shl i192 %r90, 160
+%r92 = or i192 %r86, %r91
+%r93 = zext i192 %r92 to i224
+%r95 = getelementptr i32, i32* %r2, i32 6
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i224
+%r98 = shl i224 %r97, 192
+%r99 = or i224 %r93, %r98
+%r100 = zext i224 %r99 to i256
+%r102 = getelementptr i32, i32* %r2, i32 7
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i256
+%r105 = shl i256 %r104, 224
+%r106 = or i256 %r100, %r105
+%r107 = trunc i256 %r106 to i32
+%r108 = mul i32 %r107, %r6
+%r109 = call i288 @mulPv256x32(i32* %r3, i32 %r108)
+%r111 = getelementptr i32, i32* %r2, i32 8
+%r112 = load i32, i32* %r111
+%r113 = zext i32 %r112 to i288
+%r114 = shl i288 %r113, 256
+%r115 = zext i256 %r106 to i288
+%r116 = or i288 %r114, %r115
+%r117 = zext i288 %r116 to i320
+%r118 = zext i288 %r109 to i320
+%r119 = add i320 %r117, %r118
+%r120 = lshr i320 %r119, 32
+%r121 = trunc i320 %r120 to i288
+%r122 = lshr i288 %r121, 256
+%r123 = trunc i288 %r122 to i32
+%r124 = trunc i288 %r121 to i256
+%r125 = trunc i256 %r124 to i32
+%r126 = mul i32 %r125, %r6
+%r127 = call i288 @mulPv256x32(i32* %r3, i32 %r126)
+%r128 = zext i32 %r123 to i288
+%r129 = shl i288 %r128, 256
+%r130 = add i288 %r127, %r129
+%r132 = getelementptr i32, i32* %r2, i32 9
+%r133 = load i32, i32* %r132
+%r134 = zext i32 %r133 to i288
+%r135 = shl i288 %r134, 256
+%r136 = zext i256 %r124 to i288
+%r137 = or i288 %r135, %r136
+%r138 = zext i288 %r137 to i320
+%r139 = zext i288 %r130 to i320
+%r140 = add i320 %r138, %r139
+%r141 = lshr i320 %r140, 32
+%r142 = trunc i320 %r141 to i288
+%r143 = lshr i288 %r142, 256
+%r144 = trunc i288 %r143 to i32
+%r145 = trunc i288 %r142 to i256
+%r146 = trunc i256 %r145 to i32
+%r147 = mul i32 %r146, %r6
+%r148 = call i288 @mulPv256x32(i32* %r3, i32 %r147)
+%r149 = zext i32 %r144 to i288
+%r150 = shl i288 %r149, 256
+%r151 = add i288 %r148, %r150
+%r153 = getelementptr i32, i32* %r2, i32 10
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i288
+%r156 = shl i288 %r155, 256
+%r157 = zext i256 %r145 to i288
+%r158 = or i288 %r156, %r157
+%r159 = zext i288 %r158 to i320
+%r160 = zext i288 %r151 to i320
+%r161 = add i320 %r159, %r160
+%r162 = lshr i320 %r161, 32
+%r163 = trunc i320 %r162 to i288
+%r164 = lshr i288 %r163, 256
+%r165 = trunc i288 %r164 to i32
+%r166 = trunc i288 %r163 to i256
+%r167 = trunc i256 %r166 to i32
+%r168 = mul i32 %r167, %r6
+%r169 = call i288 @mulPv256x32(i32* %r3, i32 %r168)
+%r170 = zext i32 %r165 to i288
+%r171 = shl i288 %r170, 256
+%r172 = add i288 %r169, %r171
+%r174 = getelementptr i32, i32* %r2, i32 11
+%r175 = load i32, i32* %r174
+%r176 = zext i32 %r175 to i288
+%r177 = shl i288 %r176, 256
+%r178 = zext i256 %r166 to i288
+%r179 = or i288 %r177, %r178
+%r180 = zext i288 %r179 to i320
+%r181 = zext i288 %r172 to i320
+%r182 = add i320 %r180, %r181
+%r183 = lshr i320 %r182, 32
+%r184 = trunc i320 %r183 to i288
+%r185 = lshr i288 %r184, 256
+%r186 = trunc i288 %r185 to i32
+%r187 = trunc i288 %r184 to i256
+%r188 = trunc i256 %r187 to i32
+%r189 = mul i32 %r188, %r6
+%r190 = call i288 @mulPv256x32(i32* %r3, i32 %r189)
+%r191 = zext i32 %r186 to i288
+%r192 = shl i288 %r191, 256
+%r193 = add i288 %r190, %r192
+%r195 = getelementptr i32, i32* %r2, i32 12
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i288
+%r198 = shl i288 %r197, 256
+%r199 = zext i256 %r187 to i288
+%r200 = or i288 %r198, %r199
+%r201 = zext i288 %r200 to i320
+%r202 = zext i288 %r193 to i320
+%r203 = add i320 %r201, %r202
+%r204 = lshr i320 %r203, 32
+%r205 = trunc i320 %r204 to i288
+%r206 = lshr i288 %r205, 256
+%r207 = trunc i288 %r206 to i32
+%r208 = trunc i288 %r205 to i256
+%r209 = trunc i256 %r208 to i32
+%r210 = mul i32 %r209, %r6
+%r211 = call i288 @mulPv256x32(i32* %r3, i32 %r210)
+%r212 = zext i32 %r207 to i288
+%r213 = shl i288 %r212, 256
+%r214 = add i288 %r211, %r213
+%r216 = getelementptr i32, i32* %r2, i32 13
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i288
+%r219 = shl i288 %r218, 256
+%r220 = zext i256 %r208 to i288
+%r221 = or i288 %r219, %r220
+%r222 = zext i288 %r221 to i320
+%r223 = zext i288 %r214 to i320
+%r224 = add i320 %r222, %r223
+%r225 = lshr i320 %r224, 32
+%r226 = trunc i320 %r225 to i288
+%r227 = lshr i288 %r226, 256
+%r228 = trunc i288 %r227 to i32
+%r229 = trunc i288 %r226 to i256
+%r230 = trunc i256 %r229 to i32
+%r231 = mul i32 %r230, %r6
+%r232 = call i288 @mulPv256x32(i32* %r3, i32 %r231)
+%r233 = zext i32 %r228 to i288
+%r234 = shl i288 %r233, 256
+%r235 = add i288 %r232, %r234
+%r237 = getelementptr i32, i32* %r2, i32 14
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i288
+%r240 = shl i288 %r239, 256
+%r241 = zext i256 %r229 to i288
+%r242 = or i288 %r240, %r241
+%r243 = zext i288 %r242 to i320
+%r244 = zext i288 %r235 to i320
+%r245 = add i320 %r243, %r244
+%r246 = lshr i320 %r245, 32
+%r247 = trunc i320 %r246 to i288
+%r248 = lshr i288 %r247, 256
+%r249 = trunc i288 %r248 to i32
+%r250 = trunc i288 %r247 to i256
+%r251 = trunc i256 %r250 to i32
+%r252 = mul i32 %r251, %r6
+%r253 = call i288 @mulPv256x32(i32* %r3, i32 %r252)
+%r254 = zext i32 %r249 to i288
+%r255 = shl i288 %r254, 256
+%r256 = add i288 %r253, %r255
+%r258 = getelementptr i32, i32* %r2, i32 15
+%r259 = load i32, i32* %r258
+%r260 = zext i32 %r259 to i288
+%r261 = shl i288 %r260, 256
+%r262 = zext i256 %r250 to i288
+%r263 = or i288 %r261, %r262
+%r264 = zext i288 %r263 to i320
+%r265 = zext i288 %r256 to i320
+%r266 = add i320 %r264, %r265
+%r267 = lshr i320 %r266, 32
+%r268 = trunc i320 %r267 to i288
+%r269 = lshr i288 %r268, 256
+%r270 = trunc i288 %r269 to i32
+%r271 = trunc i288 %r268 to i256
+%r272 = sub i256 %r271, %r56
+%r273 = lshr i256 %r272, 255
+%r274 = trunc i256 %r273 to i1
+%r275 = select i1 %r274, i256 %r271, i256 %r272
+%r277 = getelementptr i32, i32* %r1, i32 0
+%r278 = trunc i256 %r275 to i32
+store i32 %r278, i32* %r277
+%r279 = lshr i256 %r275, 32
+%r281 = getelementptr i32, i32* %r1, i32 1
+%r282 = trunc i256 %r279 to i32
+store i32 %r282, i32* %r281
+%r283 = lshr i256 %r279, 32
+%r285 = getelementptr i32, i32* %r1, i32 2
+%r286 = trunc i256 %r283 to i32
+store i32 %r286, i32* %r285
+%r287 = lshr i256 %r283, 32
+%r289 = getelementptr i32, i32* %r1, i32 3
+%r290 = trunc i256 %r287 to i32
+store i32 %r290, i32* %r289
+%r291 = lshr i256 %r287, 32
+%r293 = getelementptr i32, i32* %r1, i32 4
+%r294 = trunc i256 %r291 to i32
+store i32 %r294, i32* %r293
+%r295 = lshr i256 %r291, 32
+%r297 = getelementptr i32, i32* %r1, i32 5
+%r298 = trunc i256 %r295 to i32
+store i32 %r298, i32* %r297
+%r299 = lshr i256 %r295, 32
+%r301 = getelementptr i32, i32* %r1, i32 6
+%r302 = trunc i256 %r299 to i32
+store i32 %r302, i32* %r301
+%r303 = lshr i256 %r299, 32
+%r305 = getelementptr i32, i32* %r1, i32 7
+%r306 = trunc i256 %r303 to i32
+store i32 %r306, i32* %r305
+ret void
 }
-define i32 @mcl_fp_subPre7L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define i32 @mcl_fp_addPre8L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r3
 %r6 = zext i32 %r5 to i64
@@ -8196,79 +6521,222 @@ define i32 @mcl_fp_subPre7L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias
 %r46 = shl i224 %r45, 192
 %r47 = or i224 %r41, %r46
 %r48 = zext i224 %r47 to i256
-%r49 = load i32, i32* %r4
-%r50 = zext i32 %r49 to i64
-%r52 = getelementptr i32, i32* %r4, i32 1
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i64
-%r55 = shl i64 %r54, 32
-%r56 = or i64 %r50, %r55
-%r57 = zext i64 %r56 to i96
-%r59 = getelementptr i32, i32* %r4, i32 2
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r56 = load i32, i32* %r4
+%r57 = zext i32 %r56 to i64
+%r59 = getelementptr i32, i32* %r4, i32 1
 %r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i96
-%r62 = shl i96 %r61, 64
-%r63 = or i96 %r57, %r62
-%r64 = zext i96 %r63 to i128
-%r66 = getelementptr i32, i32* %r4, i32 3
+%r61 = zext i32 %r60 to i64
+%r62 = shl i64 %r61, 32
+%r63 = or i64 %r57, %r62
+%r64 = zext i64 %r63 to i96
+%r66 = getelementptr i32, i32* %r4, i32 2
 %r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i128
-%r69 = shl i128 %r68, 96
-%r70 = or i128 %r64, %r69
-%r71 = zext i128 %r70 to i160
-%r73 = getelementptr i32, i32* %r4, i32 4
+%r68 = zext i32 %r67 to i96
+%r69 = shl i96 %r68, 64
+%r70 = or i96 %r64, %r69
+%r71 = zext i96 %r70 to i128
+%r73 = getelementptr i32, i32* %r4, i32 3
 %r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i160
-%r76 = shl i160 %r75, 128
-%r77 = or i160 %r71, %r76
-%r78 = zext i160 %r77 to i192
-%r80 = getelementptr i32, i32* %r4, i32 5
+%r75 = zext i32 %r74 to i128
+%r76 = shl i128 %r75, 96
+%r77 = or i128 %r71, %r76
+%r78 = zext i128 %r77 to i160
+%r80 = getelementptr i32, i32* %r4, i32 4
 %r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i192
-%r83 = shl i192 %r82, 160
-%r84 = or i192 %r78, %r83
-%r85 = zext i192 %r84 to i224
-%r87 = getelementptr i32, i32* %r4, i32 6
+%r82 = zext i32 %r81 to i160
+%r83 = shl i160 %r82, 128
+%r84 = or i160 %r78, %r83
+%r85 = zext i160 %r84 to i192
+%r87 = getelementptr i32, i32* %r4, i32 5
 %r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i224
-%r90 = shl i224 %r89, 192
-%r91 = or i224 %r85, %r90
-%r92 = zext i224 %r91 to i256
-%r93 = sub i256 %r48, %r92
-%r94 = trunc i256 %r93 to i224
-%r95 = trunc i224 %r94 to i32
-%r97 = getelementptr i32, i32* %r2, i32 0
-store i32 %r95, i32* %r97
-%r98 = lshr i224 %r94, 32
-%r99 = trunc i224 %r98 to i32
-%r101 = getelementptr i32, i32* %r2, i32 1
-store i32 %r99, i32* %r101
-%r102 = lshr i224 %r98, 32
-%r103 = trunc i224 %r102 to i32
-%r105 = getelementptr i32, i32* %r2, i32 2
-store i32 %r103, i32* %r105
-%r106 = lshr i224 %r102, 32
-%r107 = trunc i224 %r106 to i32
-%r109 = getelementptr i32, i32* %r2, i32 3
-store i32 %r107, i32* %r109
-%r110 = lshr i224 %r106, 32
-%r111 = trunc i224 %r110 to i32
-%r113 = getelementptr i32, i32* %r2, i32 4
-store i32 %r111, i32* %r113
-%r114 = lshr i224 %r110, 32
-%r115 = trunc i224 %r114 to i32
-%r117 = getelementptr i32, i32* %r2, i32 5
-store i32 %r115, i32* %r117
-%r118 = lshr i224 %r114, 32
-%r119 = trunc i224 %r118 to i32
-%r121 = getelementptr i32, i32* %r2, i32 6
-store i32 %r119, i32* %r121
-%r122 = lshr i256 %r93, 224
-%r123 = trunc i256 %r122 to i32
-%r125 = and i32 %r123, 1
-ret i32 %r125
+%r89 = zext i32 %r88 to i192
+%r90 = shl i192 %r89, 160
+%r91 = or i192 %r85, %r90
+%r92 = zext i192 %r91 to i224
+%r94 = getelementptr i32, i32* %r4, i32 6
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i224
+%r97 = shl i224 %r96, 192
+%r98 = or i224 %r92, %r97
+%r99 = zext i224 %r98 to i256
+%r101 = getelementptr i32, i32* %r4, i32 7
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i256
+%r104 = shl i256 %r103, 224
+%r105 = or i256 %r99, %r104
+%r106 = zext i256 %r105 to i288
+%r107 = add i288 %r55, %r106
+%r108 = trunc i288 %r107 to i256
+%r110 = getelementptr i32, i32* %r2, i32 0
+%r111 = trunc i256 %r108 to i32
+store i32 %r111, i32* %r110
+%r112 = lshr i256 %r108, 32
+%r114 = getelementptr i32, i32* %r2, i32 1
+%r115 = trunc i256 %r112 to i32
+store i32 %r115, i32* %r114
+%r116 = lshr i256 %r112, 32
+%r118 = getelementptr i32, i32* %r2, i32 2
+%r119 = trunc i256 %r116 to i32
+store i32 %r119, i32* %r118
+%r120 = lshr i256 %r116, 32
+%r122 = getelementptr i32, i32* %r2, i32 3
+%r123 = trunc i256 %r120 to i32
+store i32 %r123, i32* %r122
+%r124 = lshr i256 %r120, 32
+%r126 = getelementptr i32, i32* %r2, i32 4
+%r127 = trunc i256 %r124 to i32
+store i32 %r127, i32* %r126
+%r128 = lshr i256 %r124, 32
+%r130 = getelementptr i32, i32* %r2, i32 5
+%r131 = trunc i256 %r128 to i32
+store i32 %r131, i32* %r130
+%r132 = lshr i256 %r128, 32
+%r134 = getelementptr i32, i32* %r2, i32 6
+%r135 = trunc i256 %r132 to i32
+store i32 %r135, i32* %r134
+%r136 = lshr i256 %r132, 32
+%r138 = getelementptr i32, i32* %r2, i32 7
+%r139 = trunc i256 %r136 to i32
+store i32 %r139, i32* %r138
+%r140 = lshr i288 %r107, 256
+%r141 = trunc i288 %r140 to i32
+ret i32 %r141
 }
-define void @mcl_fp_shr1_7L(i32* noalias  %r1, i32* noalias  %r2)
+define i32 @mcl_fp_subPre8L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r56 = load i32, i32* %r4
+%r57 = zext i32 %r56 to i64
+%r59 = getelementptr i32, i32* %r4, i32 1
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i64
+%r62 = shl i64 %r61, 32
+%r63 = or i64 %r57, %r62
+%r64 = zext i64 %r63 to i96
+%r66 = getelementptr i32, i32* %r4, i32 2
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i96
+%r69 = shl i96 %r68, 64
+%r70 = or i96 %r64, %r69
+%r71 = zext i96 %r70 to i128
+%r73 = getelementptr i32, i32* %r4, i32 3
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i128
+%r76 = shl i128 %r75, 96
+%r77 = or i128 %r71, %r76
+%r78 = zext i128 %r77 to i160
+%r80 = getelementptr i32, i32* %r4, i32 4
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i160
+%r83 = shl i160 %r82, 128
+%r84 = or i160 %r78, %r83
+%r85 = zext i160 %r84 to i192
+%r87 = getelementptr i32, i32* %r4, i32 5
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i192
+%r90 = shl i192 %r89, 160
+%r91 = or i192 %r85, %r90
+%r92 = zext i192 %r91 to i224
+%r94 = getelementptr i32, i32* %r4, i32 6
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i224
+%r97 = shl i224 %r96, 192
+%r98 = or i224 %r92, %r97
+%r99 = zext i224 %r98 to i256
+%r101 = getelementptr i32, i32* %r4, i32 7
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i256
+%r104 = shl i256 %r103, 224
+%r105 = or i256 %r99, %r104
+%r106 = zext i256 %r105 to i288
+%r107 = sub i288 %r55, %r106
+%r108 = trunc i288 %r107 to i256
+%r110 = getelementptr i32, i32* %r2, i32 0
+%r111 = trunc i256 %r108 to i32
+store i32 %r111, i32* %r110
+%r112 = lshr i256 %r108, 32
+%r114 = getelementptr i32, i32* %r2, i32 1
+%r115 = trunc i256 %r112 to i32
+store i32 %r115, i32* %r114
+%r116 = lshr i256 %r112, 32
+%r118 = getelementptr i32, i32* %r2, i32 2
+%r119 = trunc i256 %r116 to i32
+store i32 %r119, i32* %r118
+%r120 = lshr i256 %r116, 32
+%r122 = getelementptr i32, i32* %r2, i32 3
+%r123 = trunc i256 %r120 to i32
+store i32 %r123, i32* %r122
+%r124 = lshr i256 %r120, 32
+%r126 = getelementptr i32, i32* %r2, i32 4
+%r127 = trunc i256 %r124 to i32
+store i32 %r127, i32* %r126
+%r128 = lshr i256 %r124, 32
+%r130 = getelementptr i32, i32* %r2, i32 5
+%r131 = trunc i256 %r128 to i32
+store i32 %r131, i32* %r130
+%r132 = lshr i256 %r128, 32
+%r134 = getelementptr i32, i32* %r2, i32 6
+%r135 = trunc i256 %r132 to i32
+store i32 %r135, i32* %r134
+%r136 = lshr i256 %r132, 32
+%r138 = getelementptr i32, i32* %r2, i32 7
+%r139 = trunc i256 %r136 to i32
+store i32 %r139, i32* %r138
+%r141 = lshr i288 %r107, 256
+%r142 = trunc i288 %r141 to i32
+%r143 = and i32 %r142, 1
+ret i32 %r143
+}
+define void @mcl_fp_shr1_8L(i32* noalias  %r1, i32* noalias  %r2)
 {
 %r3 = load i32, i32* %r2
 %r4 = zext i32 %r3 to i64
@@ -8307,37 +6775,47 @@ define void @mcl_fp_shr1_7L(i32* noalias  %r1, i32* noalias  %r2)
 %r43 = zext i32 %r42 to i224
 %r44 = shl i224 %r43, 192
 %r45 = or i224 %r39, %r44
-%r46 = lshr i224 %r45, 1
-%r47 = trunc i224 %r46 to i32
-%r49 = getelementptr i32, i32* %r1, i32 0
-store i32 %r47, i32* %r49
-%r50 = lshr i224 %r46, 32
-%r51 = trunc i224 %r50 to i32
-%r53 = getelementptr i32, i32* %r1, i32 1
-store i32 %r51, i32* %r53
-%r54 = lshr i224 %r50, 32
-%r55 = trunc i224 %r54 to i32
-%r57 = getelementptr i32, i32* %r1, i32 2
-store i32 %r55, i32* %r57
-%r58 = lshr i224 %r54, 32
-%r59 = trunc i224 %r58 to i32
-%r61 = getelementptr i32, i32* %r1, i32 3
-store i32 %r59, i32* %r61
-%r62 = lshr i224 %r58, 32
-%r63 = trunc i224 %r62 to i32
-%r65 = getelementptr i32, i32* %r1, i32 4
-store i32 %r63, i32* %r65
-%r66 = lshr i224 %r62, 32
-%r67 = trunc i224 %r66 to i32
-%r69 = getelementptr i32, i32* %r1, i32 5
-store i32 %r67, i32* %r69
-%r70 = lshr i224 %r66, 32
-%r71 = trunc i224 %r70 to i32
-%r73 = getelementptr i32, i32* %r1, i32 6
-store i32 %r71, i32* %r73
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = lshr i256 %r52, 1
+%r55 = getelementptr i32, i32* %r1, i32 0
+%r56 = trunc i256 %r53 to i32
+store i32 %r56, i32* %r55
+%r57 = lshr i256 %r53, 32
+%r59 = getelementptr i32, i32* %r1, i32 1
+%r60 = trunc i256 %r57 to i32
+store i32 %r60, i32* %r59
+%r61 = lshr i256 %r57, 32
+%r63 = getelementptr i32, i32* %r1, i32 2
+%r64 = trunc i256 %r61 to i32
+store i32 %r64, i32* %r63
+%r65 = lshr i256 %r61, 32
+%r67 = getelementptr i32, i32* %r1, i32 3
+%r68 = trunc i256 %r65 to i32
+store i32 %r68, i32* %r67
+%r69 = lshr i256 %r65, 32
+%r71 = getelementptr i32, i32* %r1, i32 4
+%r72 = trunc i256 %r69 to i32
+store i32 %r72, i32* %r71
+%r73 = lshr i256 %r69, 32
+%r75 = getelementptr i32, i32* %r1, i32 5
+%r76 = trunc i256 %r73 to i32
+store i32 %r76, i32* %r75
+%r77 = lshr i256 %r73, 32
+%r79 = getelementptr i32, i32* %r1, i32 6
+%r80 = trunc i256 %r77 to i32
+store i32 %r80, i32* %r79
+%r81 = lshr i256 %r77, 32
+%r83 = getelementptr i32, i32* %r1, i32 7
+%r84 = trunc i256 %r81 to i32
+store i32 %r84, i32* %r83
 ret void
 }
-define void @mcl_fp_add7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_add8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -8376,150 +6854,176 @@ define void @mcl_fp_add7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r
 %r45 = zext i32 %r44 to i224
 %r46 = shl i224 %r45, 192
 %r47 = or i224 %r41, %r46
-%r48 = load i32, i32* %r3
-%r49 = zext i32 %r48 to i64
-%r51 = getelementptr i32, i32* %r3, i32 1
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i64
-%r54 = shl i64 %r53, 32
-%r55 = or i64 %r49, %r54
-%r56 = zext i64 %r55 to i96
-%r58 = getelementptr i32, i32* %r3, i32 2
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = load i32, i32* %r3
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r3, i32 1
 %r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i96
-%r61 = shl i96 %r60, 64
-%r62 = or i96 %r56, %r61
-%r63 = zext i96 %r62 to i128
-%r65 = getelementptr i32, i32* %r3, i32 3
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r3, i32 2
 %r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i128
-%r68 = shl i128 %r67, 96
-%r69 = or i128 %r63, %r68
-%r70 = zext i128 %r69 to i160
-%r72 = getelementptr i32, i32* %r3, i32 4
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r3, i32 3
 %r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i160
-%r75 = shl i160 %r74, 128
-%r76 = or i160 %r70, %r75
-%r77 = zext i160 %r76 to i192
-%r79 = getelementptr i32, i32* %r3, i32 5
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r79 = getelementptr i32, i32* %r3, i32 4
 %r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i192
-%r82 = shl i192 %r81, 160
-%r83 = or i192 %r77, %r82
-%r84 = zext i192 %r83 to i224
-%r86 = getelementptr i32, i32* %r3, i32 6
+%r81 = zext i32 %r80 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r77, %r82
+%r84 = zext i160 %r83 to i192
+%r86 = getelementptr i32, i32* %r3, i32 5
 %r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i224
-%r89 = shl i224 %r88, 192
-%r90 = or i224 %r84, %r89
-%r91 = zext i224 %r47 to i256
-%r92 = zext i224 %r90 to i256
-%r93 = add i256 %r91, %r92
-%r94 = trunc i256 %r93 to i224
-%r95 = trunc i224 %r94 to i32
-%r97 = getelementptr i32, i32* %r1, i32 0
-store i32 %r95, i32* %r97
-%r98 = lshr i224 %r94, 32
-%r99 = trunc i224 %r98 to i32
-%r101 = getelementptr i32, i32* %r1, i32 1
-store i32 %r99, i32* %r101
-%r102 = lshr i224 %r98, 32
-%r103 = trunc i224 %r102 to i32
-%r105 = getelementptr i32, i32* %r1, i32 2
-store i32 %r103, i32* %r105
-%r106 = lshr i224 %r102, 32
-%r107 = trunc i224 %r106 to i32
-%r109 = getelementptr i32, i32* %r1, i32 3
-store i32 %r107, i32* %r109
-%r110 = lshr i224 %r106, 32
-%r111 = trunc i224 %r110 to i32
-%r113 = getelementptr i32, i32* %r1, i32 4
-store i32 %r111, i32* %r113
-%r114 = lshr i224 %r110, 32
-%r115 = trunc i224 %r114 to i32
-%r117 = getelementptr i32, i32* %r1, i32 5
-store i32 %r115, i32* %r117
-%r118 = lshr i224 %r114, 32
-%r119 = trunc i224 %r118 to i32
-%r121 = getelementptr i32, i32* %r1, i32 6
-store i32 %r119, i32* %r121
-%r122 = load i32, i32* %r4
-%r123 = zext i32 %r122 to i64
-%r125 = getelementptr i32, i32* %r4, i32 1
-%r126 = load i32, i32* %r125
-%r127 = zext i32 %r126 to i64
-%r128 = shl i64 %r127, 32
-%r129 = or i64 %r123, %r128
-%r130 = zext i64 %r129 to i96
-%r132 = getelementptr i32, i32* %r4, i32 2
-%r133 = load i32, i32* %r132
-%r134 = zext i32 %r133 to i96
-%r135 = shl i96 %r134, 64
-%r136 = or i96 %r130, %r135
-%r137 = zext i96 %r136 to i128
-%r139 = getelementptr i32, i32* %r4, i32 3
-%r140 = load i32, i32* %r139
-%r141 = zext i32 %r140 to i128
-%r142 = shl i128 %r141, 96
-%r143 = or i128 %r137, %r142
-%r144 = zext i128 %r143 to i160
-%r146 = getelementptr i32, i32* %r4, i32 4
-%r147 = load i32, i32* %r146
-%r148 = zext i32 %r147 to i160
-%r149 = shl i160 %r148, 128
-%r150 = or i160 %r144, %r149
-%r151 = zext i160 %r150 to i192
-%r153 = getelementptr i32, i32* %r4, i32 5
-%r154 = load i32, i32* %r153
-%r155 = zext i32 %r154 to i192
-%r156 = shl i192 %r155, 160
-%r157 = or i192 %r151, %r156
-%r158 = zext i192 %r157 to i224
-%r160 = getelementptr i32, i32* %r4, i32 6
-%r161 = load i32, i32* %r160
-%r162 = zext i32 %r161 to i224
-%r163 = shl i224 %r162, 192
-%r164 = or i224 %r158, %r163
-%r165 = zext i224 %r164 to i256
-%r166 = sub i256 %r93, %r165
-%r167 = lshr i256 %r166, 224
-%r168 = trunc i256 %r167 to i1
-br i1%r168, label %carry, label %nocarry
+%r88 = zext i32 %r87 to i192
+%r89 = shl i192 %r88, 160
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i224
+%r93 = getelementptr i32, i32* %r3, i32 6
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i224
+%r96 = shl i224 %r95, 192
+%r97 = or i224 %r91, %r96
+%r98 = zext i224 %r97 to i256
+%r100 = getelementptr i32, i32* %r3, i32 7
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i256
+%r103 = shl i256 %r102, 224
+%r104 = or i256 %r98, %r103
+%r105 = zext i256 %r54 to i288
+%r106 = zext i256 %r104 to i288
+%r107 = add i288 %r105, %r106
+%r108 = trunc i288 %r107 to i256
+%r110 = getelementptr i32, i32* %r1, i32 0
+%r111 = trunc i256 %r108 to i32
+store i32 %r111, i32* %r110
+%r112 = lshr i256 %r108, 32
+%r114 = getelementptr i32, i32* %r1, i32 1
+%r115 = trunc i256 %r112 to i32
+store i32 %r115, i32* %r114
+%r116 = lshr i256 %r112, 32
+%r118 = getelementptr i32, i32* %r1, i32 2
+%r119 = trunc i256 %r116 to i32
+store i32 %r119, i32* %r118
+%r120 = lshr i256 %r116, 32
+%r122 = getelementptr i32, i32* %r1, i32 3
+%r123 = trunc i256 %r120 to i32
+store i32 %r123, i32* %r122
+%r124 = lshr i256 %r120, 32
+%r126 = getelementptr i32, i32* %r1, i32 4
+%r127 = trunc i256 %r124 to i32
+store i32 %r127, i32* %r126
+%r128 = lshr i256 %r124, 32
+%r130 = getelementptr i32, i32* %r1, i32 5
+%r131 = trunc i256 %r128 to i32
+store i32 %r131, i32* %r130
+%r132 = lshr i256 %r128, 32
+%r134 = getelementptr i32, i32* %r1, i32 6
+%r135 = trunc i256 %r132 to i32
+store i32 %r135, i32* %r134
+%r136 = lshr i256 %r132, 32
+%r138 = getelementptr i32, i32* %r1, i32 7
+%r139 = trunc i256 %r136 to i32
+store i32 %r139, i32* %r138
+%r140 = load i32, i32* %r4
+%r141 = zext i32 %r140 to i64
+%r143 = getelementptr i32, i32* %r4, i32 1
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i64
+%r146 = shl i64 %r145, 32
+%r147 = or i64 %r141, %r146
+%r148 = zext i64 %r147 to i96
+%r150 = getelementptr i32, i32* %r4, i32 2
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i96
+%r153 = shl i96 %r152, 64
+%r154 = or i96 %r148, %r153
+%r155 = zext i96 %r154 to i128
+%r157 = getelementptr i32, i32* %r4, i32 3
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i128
+%r160 = shl i128 %r159, 96
+%r161 = or i128 %r155, %r160
+%r162 = zext i128 %r161 to i160
+%r164 = getelementptr i32, i32* %r4, i32 4
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i160
+%r167 = shl i160 %r166, 128
+%r168 = or i160 %r162, %r167
+%r169 = zext i160 %r168 to i192
+%r171 = getelementptr i32, i32* %r4, i32 5
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i192
+%r174 = shl i192 %r173, 160
+%r175 = or i192 %r169, %r174
+%r176 = zext i192 %r175 to i224
+%r178 = getelementptr i32, i32* %r4, i32 6
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i224
+%r181 = shl i224 %r180, 192
+%r182 = or i224 %r176, %r181
+%r183 = zext i224 %r182 to i256
+%r185 = getelementptr i32, i32* %r4, i32 7
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i256
+%r188 = shl i256 %r187, 224
+%r189 = or i256 %r183, %r188
+%r190 = zext i256 %r189 to i288
+%r191 = sub i288 %r107, %r190
+%r192 = lshr i288 %r191, 256
+%r193 = trunc i288 %r192 to i1
+br i1%r193, label %carry, label %nocarry
 nocarry:
-%r169 = trunc i256 %r166 to i224
-%r170 = trunc i224 %r169 to i32
-%r172 = getelementptr i32, i32* %r1, i32 0
-store i32 %r170, i32* %r172
-%r173 = lshr i224 %r169, 32
-%r174 = trunc i224 %r173 to i32
-%r176 = getelementptr i32, i32* %r1, i32 1
-store i32 %r174, i32* %r176
-%r177 = lshr i224 %r173, 32
-%r178 = trunc i224 %r177 to i32
-%r180 = getelementptr i32, i32* %r1, i32 2
-store i32 %r178, i32* %r180
-%r181 = lshr i224 %r177, 32
-%r182 = trunc i224 %r181 to i32
-%r184 = getelementptr i32, i32* %r1, i32 3
-store i32 %r182, i32* %r184
-%r185 = lshr i224 %r181, 32
-%r186 = trunc i224 %r185 to i32
-%r188 = getelementptr i32, i32* %r1, i32 4
-store i32 %r186, i32* %r188
-%r189 = lshr i224 %r185, 32
-%r190 = trunc i224 %r189 to i32
-%r192 = getelementptr i32, i32* %r1, i32 5
-store i32 %r190, i32* %r192
-%r193 = lshr i224 %r189, 32
-%r194 = trunc i224 %r193 to i32
-%r196 = getelementptr i32, i32* %r1, i32 6
-store i32 %r194, i32* %r196
+%r194 = trunc i288 %r191 to i256
+%r196 = getelementptr i32, i32* %r1, i32 0
+%r197 = trunc i256 %r194 to i32
+store i32 %r197, i32* %r196
+%r198 = lshr i256 %r194, 32
+%r200 = getelementptr i32, i32* %r1, i32 1
+%r201 = trunc i256 %r198 to i32
+store i32 %r201, i32* %r200
+%r202 = lshr i256 %r198, 32
+%r204 = getelementptr i32, i32* %r1, i32 2
+%r205 = trunc i256 %r202 to i32
+store i32 %r205, i32* %r204
+%r206 = lshr i256 %r202, 32
+%r208 = getelementptr i32, i32* %r1, i32 3
+%r209 = trunc i256 %r206 to i32
+store i32 %r209, i32* %r208
+%r210 = lshr i256 %r206, 32
+%r212 = getelementptr i32, i32* %r1, i32 4
+%r213 = trunc i256 %r210 to i32
+store i32 %r213, i32* %r212
+%r214 = lshr i256 %r210, 32
+%r216 = getelementptr i32, i32* %r1, i32 5
+%r217 = trunc i256 %r214 to i32
+store i32 %r217, i32* %r216
+%r218 = lshr i256 %r214, 32
+%r220 = getelementptr i32, i32* %r1, i32 6
+%r221 = trunc i256 %r218 to i32
+store i32 %r221, i32* %r220
+%r222 = lshr i256 %r218, 32
+%r224 = getelementptr i32, i32* %r1, i32 7
+%r225 = trunc i256 %r222 to i32
+store i32 %r225, i32* %r224
 ret void
 carry:
 ret void
 }
-define void @mcl_fp_addNF7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_addNF8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -8558,115 +7062,137 @@ define void @mcl_fp_addNF7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r45 = zext i32 %r44 to i224
 %r46 = shl i224 %r45, 192
 %r47 = or i224 %r41, %r46
-%r48 = load i32, i32* %r3
-%r49 = zext i32 %r48 to i64
-%r51 = getelementptr i32, i32* %r3, i32 1
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i64
-%r54 = shl i64 %r53, 32
-%r55 = or i64 %r49, %r54
-%r56 = zext i64 %r55 to i96
-%r58 = getelementptr i32, i32* %r3, i32 2
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = load i32, i32* %r3
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r3, i32 1
 %r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i96
-%r61 = shl i96 %r60, 64
-%r62 = or i96 %r56, %r61
-%r63 = zext i96 %r62 to i128
-%r65 = getelementptr i32, i32* %r3, i32 3
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r3, i32 2
 %r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i128
-%r68 = shl i128 %r67, 96
-%r69 = or i128 %r63, %r68
-%r70 = zext i128 %r69 to i160
-%r72 = getelementptr i32, i32* %r3, i32 4
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r3, i32 3
 %r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i160
-%r75 = shl i160 %r74, 128
-%r76 = or i160 %r70, %r75
-%r77 = zext i160 %r76 to i192
-%r79 = getelementptr i32, i32* %r3, i32 5
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r79 = getelementptr i32, i32* %r3, i32 4
 %r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i192
-%r82 = shl i192 %r81, 160
-%r83 = or i192 %r77, %r82
-%r84 = zext i192 %r83 to i224
-%r86 = getelementptr i32, i32* %r3, i32 6
+%r81 = zext i32 %r80 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r77, %r82
+%r84 = zext i160 %r83 to i192
+%r86 = getelementptr i32, i32* %r3, i32 5
 %r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i224
-%r89 = shl i224 %r88, 192
-%r90 = or i224 %r84, %r89
-%r91 = add i224 %r47, %r90
-%r92 = load i32, i32* %r4
-%r93 = zext i32 %r92 to i64
-%r95 = getelementptr i32, i32* %r4, i32 1
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i64
-%r98 = shl i64 %r97, 32
-%r99 = or i64 %r93, %r98
-%r100 = zext i64 %r99 to i96
-%r102 = getelementptr i32, i32* %r4, i32 2
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i96
-%r105 = shl i96 %r104, 64
-%r106 = or i96 %r100, %r105
-%r107 = zext i96 %r106 to i128
-%r109 = getelementptr i32, i32* %r4, i32 3
+%r88 = zext i32 %r87 to i192
+%r89 = shl i192 %r88, 160
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i224
+%r93 = getelementptr i32, i32* %r3, i32 6
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i224
+%r96 = shl i224 %r95, 192
+%r97 = or i224 %r91, %r96
+%r98 = zext i224 %r97 to i256
+%r100 = getelementptr i32, i32* %r3, i32 7
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i256
+%r103 = shl i256 %r102, 224
+%r104 = or i256 %r98, %r103
+%r105 = add i256 %r54, %r104
+%r106 = load i32, i32* %r4
+%r107 = zext i32 %r106 to i64
+%r109 = getelementptr i32, i32* %r4, i32 1
 %r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i128
-%r112 = shl i128 %r111, 96
-%r113 = or i128 %r107, %r112
-%r114 = zext i128 %r113 to i160
-%r116 = getelementptr i32, i32* %r4, i32 4
+%r111 = zext i32 %r110 to i64
+%r112 = shl i64 %r111, 32
+%r113 = or i64 %r107, %r112
+%r114 = zext i64 %r113 to i96
+%r116 = getelementptr i32, i32* %r4, i32 2
 %r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i160
-%r119 = shl i160 %r118, 128
-%r120 = or i160 %r114, %r119
-%r121 = zext i160 %r120 to i192
-%r123 = getelementptr i32, i32* %r4, i32 5
+%r118 = zext i32 %r117 to i96
+%r119 = shl i96 %r118, 64
+%r120 = or i96 %r114, %r119
+%r121 = zext i96 %r120 to i128
+%r123 = getelementptr i32, i32* %r4, i32 3
 %r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i192
-%r126 = shl i192 %r125, 160
-%r127 = or i192 %r121, %r126
-%r128 = zext i192 %r127 to i224
-%r130 = getelementptr i32, i32* %r4, i32 6
+%r125 = zext i32 %r124 to i128
+%r126 = shl i128 %r125, 96
+%r127 = or i128 %r121, %r126
+%r128 = zext i128 %r127 to i160
+%r130 = getelementptr i32, i32* %r4, i32 4
 %r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i224
-%r133 = shl i224 %r132, 192
-%r134 = or i224 %r128, %r133
-%r135 = sub i224 %r91, %r134
-%r136 = lshr i224 %r135, 223
-%r137 = trunc i224 %r136 to i1
-%r138 = select i1 %r137, i224 %r91, i224 %r135
-%r139 = trunc i224 %r138 to i32
-%r141 = getelementptr i32, i32* %r1, i32 0
-store i32 %r139, i32* %r141
-%r142 = lshr i224 %r138, 32
-%r143 = trunc i224 %r142 to i32
-%r145 = getelementptr i32, i32* %r1, i32 1
-store i32 %r143, i32* %r145
-%r146 = lshr i224 %r142, 32
-%r147 = trunc i224 %r146 to i32
-%r149 = getelementptr i32, i32* %r1, i32 2
-store i32 %r147, i32* %r149
-%r150 = lshr i224 %r146, 32
-%r151 = trunc i224 %r150 to i32
-%r153 = getelementptr i32, i32* %r1, i32 3
-store i32 %r151, i32* %r153
-%r154 = lshr i224 %r150, 32
-%r155 = trunc i224 %r154 to i32
-%r157 = getelementptr i32, i32* %r1, i32 4
-store i32 %r155, i32* %r157
-%r158 = lshr i224 %r154, 32
-%r159 = trunc i224 %r158 to i32
-%r161 = getelementptr i32, i32* %r1, i32 5
-store i32 %r159, i32* %r161
-%r162 = lshr i224 %r158, 32
-%r163 = trunc i224 %r162 to i32
-%r165 = getelementptr i32, i32* %r1, i32 6
-store i32 %r163, i32* %r165
+%r132 = zext i32 %r131 to i160
+%r133 = shl i160 %r132, 128
+%r134 = or i160 %r128, %r133
+%r135 = zext i160 %r134 to i192
+%r137 = getelementptr i32, i32* %r4, i32 5
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i192
+%r140 = shl i192 %r139, 160
+%r141 = or i192 %r135, %r140
+%r142 = zext i192 %r141 to i224
+%r144 = getelementptr i32, i32* %r4, i32 6
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i224
+%r147 = shl i224 %r146, 192
+%r148 = or i224 %r142, %r147
+%r149 = zext i224 %r148 to i256
+%r151 = getelementptr i32, i32* %r4, i32 7
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i256
+%r154 = shl i256 %r153, 224
+%r155 = or i256 %r149, %r154
+%r156 = sub i256 %r105, %r155
+%r157 = lshr i256 %r156, 255
+%r158 = trunc i256 %r157 to i1
+%r159 = select i1 %r158, i256 %r105, i256 %r156
+%r161 = getelementptr i32, i32* %r1, i32 0
+%r162 = trunc i256 %r159 to i32
+store i32 %r162, i32* %r161
+%r163 = lshr i256 %r159, 32
+%r165 = getelementptr i32, i32* %r1, i32 1
+%r166 = trunc i256 %r163 to i32
+store i32 %r166, i32* %r165
+%r167 = lshr i256 %r163, 32
+%r169 = getelementptr i32, i32* %r1, i32 2
+%r170 = trunc i256 %r167 to i32
+store i32 %r170, i32* %r169
+%r171 = lshr i256 %r167, 32
+%r173 = getelementptr i32, i32* %r1, i32 3
+%r174 = trunc i256 %r171 to i32
+store i32 %r174, i32* %r173
+%r175 = lshr i256 %r171, 32
+%r177 = getelementptr i32, i32* %r1, i32 4
+%r178 = trunc i256 %r175 to i32
+store i32 %r178, i32* %r177
+%r179 = lshr i256 %r175, 32
+%r181 = getelementptr i32, i32* %r1, i32 5
+%r182 = trunc i256 %r179 to i32
+store i32 %r182, i32* %r181
+%r183 = lshr i256 %r179, 32
+%r185 = getelementptr i32, i32* %r1, i32 6
+%r186 = trunc i256 %r183 to i32
+store i32 %r186, i32* %r185
+%r187 = lshr i256 %r183, 32
+%r189 = getelementptr i32, i32* %r1, i32 7
+%r190 = trunc i256 %r187 to i32
+store i32 %r190, i32* %r189
 ret void
 }
-define void @mcl_fp_sub7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_sub8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -8705,148 +7231,174 @@ define void @mcl_fp_sub7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r
 %r45 = zext i32 %r44 to i224
 %r46 = shl i224 %r45, 192
 %r47 = or i224 %r41, %r46
-%r48 = load i32, i32* %r3
-%r49 = zext i32 %r48 to i64
-%r51 = getelementptr i32, i32* %r3, i32 1
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i64
-%r54 = shl i64 %r53, 32
-%r55 = or i64 %r49, %r54
-%r56 = zext i64 %r55 to i96
-%r58 = getelementptr i32, i32* %r3, i32 2
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = load i32, i32* %r3
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r3, i32 1
 %r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i96
-%r61 = shl i96 %r60, 64
-%r62 = or i96 %r56, %r61
-%r63 = zext i96 %r62 to i128
-%r65 = getelementptr i32, i32* %r3, i32 3
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r3, i32 2
 %r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i128
-%r68 = shl i128 %r67, 96
-%r69 = or i128 %r63, %r68
-%r70 = zext i128 %r69 to i160
-%r72 = getelementptr i32, i32* %r3, i32 4
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r3, i32 3
 %r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i160
-%r75 = shl i160 %r74, 128
-%r76 = or i160 %r70, %r75
-%r77 = zext i160 %r76 to i192
-%r79 = getelementptr i32, i32* %r3, i32 5
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r79 = getelementptr i32, i32* %r3, i32 4
 %r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i192
-%r82 = shl i192 %r81, 160
-%r83 = or i192 %r77, %r82
-%r84 = zext i192 %r83 to i224
-%r86 = getelementptr i32, i32* %r3, i32 6
+%r81 = zext i32 %r80 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r77, %r82
+%r84 = zext i160 %r83 to i192
+%r86 = getelementptr i32, i32* %r3, i32 5
 %r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i224
-%r89 = shl i224 %r88, 192
-%r90 = or i224 %r84, %r89
-%r91 = zext i224 %r47 to i256
-%r92 = zext i224 %r90 to i256
-%r93 = sub i256 %r91, %r92
-%r94 = trunc i256 %r93 to i224
-%r95 = lshr i256 %r93, 224
-%r96 = trunc i256 %r95 to i1
-%r97 = trunc i224 %r94 to i32
-%r99 = getelementptr i32, i32* %r1, i32 0
-store i32 %r97, i32* %r99
-%r100 = lshr i224 %r94, 32
-%r101 = trunc i224 %r100 to i32
-%r103 = getelementptr i32, i32* %r1, i32 1
-store i32 %r101, i32* %r103
-%r104 = lshr i224 %r100, 32
-%r105 = trunc i224 %r104 to i32
-%r107 = getelementptr i32, i32* %r1, i32 2
-store i32 %r105, i32* %r107
-%r108 = lshr i224 %r104, 32
-%r109 = trunc i224 %r108 to i32
-%r111 = getelementptr i32, i32* %r1, i32 3
-store i32 %r109, i32* %r111
-%r112 = lshr i224 %r108, 32
-%r113 = trunc i224 %r112 to i32
-%r115 = getelementptr i32, i32* %r1, i32 4
-store i32 %r113, i32* %r115
-%r116 = lshr i224 %r112, 32
-%r117 = trunc i224 %r116 to i32
-%r119 = getelementptr i32, i32* %r1, i32 5
-store i32 %r117, i32* %r119
-%r120 = lshr i224 %r116, 32
-%r121 = trunc i224 %r120 to i32
-%r123 = getelementptr i32, i32* %r1, i32 6
-store i32 %r121, i32* %r123
-br i1%r96, label %carry, label %nocarry
+%r88 = zext i32 %r87 to i192
+%r89 = shl i192 %r88, 160
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i224
+%r93 = getelementptr i32, i32* %r3, i32 6
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i224
+%r96 = shl i224 %r95, 192
+%r97 = or i224 %r91, %r96
+%r98 = zext i224 %r97 to i256
+%r100 = getelementptr i32, i32* %r3, i32 7
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i256
+%r103 = shl i256 %r102, 224
+%r104 = or i256 %r98, %r103
+%r105 = zext i256 %r54 to i288
+%r106 = zext i256 %r104 to i288
+%r107 = sub i288 %r105, %r106
+%r108 = trunc i288 %r107 to i256
+%r109 = lshr i288 %r107, 256
+%r110 = trunc i288 %r109 to i1
+%r112 = getelementptr i32, i32* %r1, i32 0
+%r113 = trunc i256 %r108 to i32
+store i32 %r113, i32* %r112
+%r114 = lshr i256 %r108, 32
+%r116 = getelementptr i32, i32* %r1, i32 1
+%r117 = trunc i256 %r114 to i32
+store i32 %r117, i32* %r116
+%r118 = lshr i256 %r114, 32
+%r120 = getelementptr i32, i32* %r1, i32 2
+%r121 = trunc i256 %r118 to i32
+store i32 %r121, i32* %r120
+%r122 = lshr i256 %r118, 32
+%r124 = getelementptr i32, i32* %r1, i32 3
+%r125 = trunc i256 %r122 to i32
+store i32 %r125, i32* %r124
+%r126 = lshr i256 %r122, 32
+%r128 = getelementptr i32, i32* %r1, i32 4
+%r129 = trunc i256 %r126 to i32
+store i32 %r129, i32* %r128
+%r130 = lshr i256 %r126, 32
+%r132 = getelementptr i32, i32* %r1, i32 5
+%r133 = trunc i256 %r130 to i32
+store i32 %r133, i32* %r132
+%r134 = lshr i256 %r130, 32
+%r136 = getelementptr i32, i32* %r1, i32 6
+%r137 = trunc i256 %r134 to i32
+store i32 %r137, i32* %r136
+%r138 = lshr i256 %r134, 32
+%r140 = getelementptr i32, i32* %r1, i32 7
+%r141 = trunc i256 %r138 to i32
+store i32 %r141, i32* %r140
+br i1%r110, label %carry, label %nocarry
 nocarry:
 ret void
 carry:
-%r124 = load i32, i32* %r4
-%r125 = zext i32 %r124 to i64
-%r127 = getelementptr i32, i32* %r4, i32 1
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i64
-%r130 = shl i64 %r129, 32
-%r131 = or i64 %r125, %r130
-%r132 = zext i64 %r131 to i96
-%r134 = getelementptr i32, i32* %r4, i32 2
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i96
-%r137 = shl i96 %r136, 64
-%r138 = or i96 %r132, %r137
-%r139 = zext i96 %r138 to i128
-%r141 = getelementptr i32, i32* %r4, i32 3
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i128
-%r144 = shl i128 %r143, 96
-%r145 = or i128 %r139, %r144
-%r146 = zext i128 %r145 to i160
-%r148 = getelementptr i32, i32* %r4, i32 4
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i160
-%r151 = shl i160 %r150, 128
-%r152 = or i160 %r146, %r151
-%r153 = zext i160 %r152 to i192
-%r155 = getelementptr i32, i32* %r4, i32 5
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i192
-%r158 = shl i192 %r157, 160
-%r159 = or i192 %r153, %r158
-%r160 = zext i192 %r159 to i224
-%r162 = getelementptr i32, i32* %r4, i32 6
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i224
-%r165 = shl i224 %r164, 192
-%r166 = or i224 %r160, %r165
-%r167 = add i224 %r94, %r166
-%r168 = trunc i224 %r167 to i32
-%r170 = getelementptr i32, i32* %r1, i32 0
-store i32 %r168, i32* %r170
-%r171 = lshr i224 %r167, 32
-%r172 = trunc i224 %r171 to i32
-%r174 = getelementptr i32, i32* %r1, i32 1
-store i32 %r172, i32* %r174
-%r175 = lshr i224 %r171, 32
-%r176 = trunc i224 %r175 to i32
-%r178 = getelementptr i32, i32* %r1, i32 2
-store i32 %r176, i32* %r178
-%r179 = lshr i224 %r175, 32
-%r180 = trunc i224 %r179 to i32
-%r182 = getelementptr i32, i32* %r1, i32 3
-store i32 %r180, i32* %r182
-%r183 = lshr i224 %r179, 32
-%r184 = trunc i224 %r183 to i32
-%r186 = getelementptr i32, i32* %r1, i32 4
-store i32 %r184, i32* %r186
-%r187 = lshr i224 %r183, 32
-%r188 = trunc i224 %r187 to i32
-%r190 = getelementptr i32, i32* %r1, i32 5
-store i32 %r188, i32* %r190
-%r191 = lshr i224 %r187, 32
-%r192 = trunc i224 %r191 to i32
-%r194 = getelementptr i32, i32* %r1, i32 6
-store i32 %r192, i32* %r194
+%r142 = load i32, i32* %r4
+%r143 = zext i32 %r142 to i64
+%r145 = getelementptr i32, i32* %r4, i32 1
+%r146 = load i32, i32* %r145
+%r147 = zext i32 %r146 to i64
+%r148 = shl i64 %r147, 32
+%r149 = or i64 %r143, %r148
+%r150 = zext i64 %r149 to i96
+%r152 = getelementptr i32, i32* %r4, i32 2
+%r153 = load i32, i32* %r152
+%r154 = zext i32 %r153 to i96
+%r155 = shl i96 %r154, 64
+%r156 = or i96 %r150, %r155
+%r157 = zext i96 %r156 to i128
+%r159 = getelementptr i32, i32* %r4, i32 3
+%r160 = load i32, i32* %r159
+%r161 = zext i32 %r160 to i128
+%r162 = shl i128 %r161, 96
+%r163 = or i128 %r157, %r162
+%r164 = zext i128 %r163 to i160
+%r166 = getelementptr i32, i32* %r4, i32 4
+%r167 = load i32, i32* %r166
+%r168 = zext i32 %r167 to i160
+%r169 = shl i160 %r168, 128
+%r170 = or i160 %r164, %r169
+%r171 = zext i160 %r170 to i192
+%r173 = getelementptr i32, i32* %r4, i32 5
+%r174 = load i32, i32* %r173
+%r175 = zext i32 %r174 to i192
+%r176 = shl i192 %r175, 160
+%r177 = or i192 %r171, %r176
+%r178 = zext i192 %r177 to i224
+%r180 = getelementptr i32, i32* %r4, i32 6
+%r181 = load i32, i32* %r180
+%r182 = zext i32 %r181 to i224
+%r183 = shl i224 %r182, 192
+%r184 = or i224 %r178, %r183
+%r185 = zext i224 %r184 to i256
+%r187 = getelementptr i32, i32* %r4, i32 7
+%r188 = load i32, i32* %r187
+%r189 = zext i32 %r188 to i256
+%r190 = shl i256 %r189, 224
+%r191 = or i256 %r185, %r190
+%r192 = add i256 %r108, %r191
+%r194 = getelementptr i32, i32* %r1, i32 0
+%r195 = trunc i256 %r192 to i32
+store i32 %r195, i32* %r194
+%r196 = lshr i256 %r192, 32
+%r198 = getelementptr i32, i32* %r1, i32 1
+%r199 = trunc i256 %r196 to i32
+store i32 %r199, i32* %r198
+%r200 = lshr i256 %r196, 32
+%r202 = getelementptr i32, i32* %r1, i32 2
+%r203 = trunc i256 %r200 to i32
+store i32 %r203, i32* %r202
+%r204 = lshr i256 %r200, 32
+%r206 = getelementptr i32, i32* %r1, i32 3
+%r207 = trunc i256 %r204 to i32
+store i32 %r207, i32* %r206
+%r208 = lshr i256 %r204, 32
+%r210 = getelementptr i32, i32* %r1, i32 4
+%r211 = trunc i256 %r208 to i32
+store i32 %r211, i32* %r210
+%r212 = lshr i256 %r208, 32
+%r214 = getelementptr i32, i32* %r1, i32 5
+%r215 = trunc i256 %r212 to i32
+store i32 %r215, i32* %r214
+%r216 = lshr i256 %r212, 32
+%r218 = getelementptr i32, i32* %r1, i32 6
+%r219 = trunc i256 %r216 to i32
+store i32 %r219, i32* %r218
+%r220 = lshr i256 %r216, 32
+%r222 = getelementptr i32, i32* %r1, i32 7
+%r223 = trunc i256 %r220 to i32
+store i32 %r223, i32* %r222
 ret void
 }
-define void @mcl_fp_subNF7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_subNF8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -8885,115 +7437,137 @@ define void @mcl_fp_subNF7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r45 = zext i32 %r44 to i224
 %r46 = shl i224 %r45, 192
 %r47 = or i224 %r41, %r46
-%r48 = load i32, i32* %r3
-%r49 = zext i32 %r48 to i64
-%r51 = getelementptr i32, i32* %r3, i32 1
-%r52 = load i32, i32* %r51
-%r53 = zext i32 %r52 to i64
-%r54 = shl i64 %r53, 32
-%r55 = or i64 %r49, %r54
-%r56 = zext i64 %r55 to i96
-%r58 = getelementptr i32, i32* %r3, i32 2
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = load i32, i32* %r3
+%r56 = zext i32 %r55 to i64
+%r58 = getelementptr i32, i32* %r3, i32 1
 %r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i96
-%r61 = shl i96 %r60, 64
-%r62 = or i96 %r56, %r61
-%r63 = zext i96 %r62 to i128
-%r65 = getelementptr i32, i32* %r3, i32 3
+%r60 = zext i32 %r59 to i64
+%r61 = shl i64 %r60, 32
+%r62 = or i64 %r56, %r61
+%r63 = zext i64 %r62 to i96
+%r65 = getelementptr i32, i32* %r3, i32 2
 %r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i128
-%r68 = shl i128 %r67, 96
-%r69 = or i128 %r63, %r68
-%r70 = zext i128 %r69 to i160
-%r72 = getelementptr i32, i32* %r3, i32 4
+%r67 = zext i32 %r66 to i96
+%r68 = shl i96 %r67, 64
+%r69 = or i96 %r63, %r68
+%r70 = zext i96 %r69 to i128
+%r72 = getelementptr i32, i32* %r3, i32 3
 %r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i160
-%r75 = shl i160 %r74, 128
-%r76 = or i160 %r70, %r75
-%r77 = zext i160 %r76 to i192
-%r79 = getelementptr i32, i32* %r3, i32 5
+%r74 = zext i32 %r73 to i128
+%r75 = shl i128 %r74, 96
+%r76 = or i128 %r70, %r75
+%r77 = zext i128 %r76 to i160
+%r79 = getelementptr i32, i32* %r3, i32 4
 %r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i192
-%r82 = shl i192 %r81, 160
-%r83 = or i192 %r77, %r82
-%r84 = zext i192 %r83 to i224
-%r86 = getelementptr i32, i32* %r3, i32 6
+%r81 = zext i32 %r80 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r77, %r82
+%r84 = zext i160 %r83 to i192
+%r86 = getelementptr i32, i32* %r3, i32 5
 %r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i224
-%r89 = shl i224 %r88, 192
-%r90 = or i224 %r84, %r89
-%r91 = sub i224 %r47, %r90
-%r92 = lshr i224 %r91, 223
-%r93 = trunc i224 %r92 to i1
-%r94 = load i32, i32* %r4
-%r95 = zext i32 %r94 to i64
-%r97 = getelementptr i32, i32* %r4, i32 1
-%r98 = load i32, i32* %r97
-%r99 = zext i32 %r98 to i64
-%r100 = shl i64 %r99, 32
-%r101 = or i64 %r95, %r100
-%r102 = zext i64 %r101 to i96
-%r104 = getelementptr i32, i32* %r4, i32 2
-%r105 = load i32, i32* %r104
-%r106 = zext i32 %r105 to i96
-%r107 = shl i96 %r106, 64
-%r108 = or i96 %r102, %r107
-%r109 = zext i96 %r108 to i128
-%r111 = getelementptr i32, i32* %r4, i32 3
+%r88 = zext i32 %r87 to i192
+%r89 = shl i192 %r88, 160
+%r90 = or i192 %r84, %r89
+%r91 = zext i192 %r90 to i224
+%r93 = getelementptr i32, i32* %r3, i32 6
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i224
+%r96 = shl i224 %r95, 192
+%r97 = or i224 %r91, %r96
+%r98 = zext i224 %r97 to i256
+%r100 = getelementptr i32, i32* %r3, i32 7
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i256
+%r103 = shl i256 %r102, 224
+%r104 = or i256 %r98, %r103
+%r105 = sub i256 %r54, %r104
+%r106 = lshr i256 %r105, 255
+%r107 = trunc i256 %r106 to i1
+%r108 = load i32, i32* %r4
+%r109 = zext i32 %r108 to i64
+%r111 = getelementptr i32, i32* %r4, i32 1
 %r112 = load i32, i32* %r111
-%r113 = zext i32 %r112 to i128
-%r114 = shl i128 %r113, 96
-%r115 = or i128 %r109, %r114
-%r116 = zext i128 %r115 to i160
-%r118 = getelementptr i32, i32* %r4, i32 4
+%r113 = zext i32 %r112 to i64
+%r114 = shl i64 %r113, 32
+%r115 = or i64 %r109, %r114
+%r116 = zext i64 %r115 to i96
+%r118 = getelementptr i32, i32* %r4, i32 2
 %r119 = load i32, i32* %r118
-%r120 = zext i32 %r119 to i160
-%r121 = shl i160 %r120, 128
-%r122 = or i160 %r116, %r121
-%r123 = zext i160 %r122 to i192
-%r125 = getelementptr i32, i32* %r4, i32 5
+%r120 = zext i32 %r119 to i96
+%r121 = shl i96 %r120, 64
+%r122 = or i96 %r116, %r121
+%r123 = zext i96 %r122 to i128
+%r125 = getelementptr i32, i32* %r4, i32 3
 %r126 = load i32, i32* %r125
-%r127 = zext i32 %r126 to i192
-%r128 = shl i192 %r127, 160
-%r129 = or i192 %r123, %r128
-%r130 = zext i192 %r129 to i224
-%r132 = getelementptr i32, i32* %r4, i32 6
+%r127 = zext i32 %r126 to i128
+%r128 = shl i128 %r127, 96
+%r129 = or i128 %r123, %r128
+%r130 = zext i128 %r129 to i160
+%r132 = getelementptr i32, i32* %r4, i32 4
 %r133 = load i32, i32* %r132
-%r134 = zext i32 %r133 to i224
-%r135 = shl i224 %r134, 192
-%r136 = or i224 %r130, %r135
-%r138 = select i1 %r93, i224 %r136, i224 0
-%r139 = add i224 %r91, %r138
-%r140 = trunc i224 %r139 to i32
-%r142 = getelementptr i32, i32* %r1, i32 0
-store i32 %r140, i32* %r142
-%r143 = lshr i224 %r139, 32
-%r144 = trunc i224 %r143 to i32
-%r146 = getelementptr i32, i32* %r1, i32 1
-store i32 %r144, i32* %r146
-%r147 = lshr i224 %r143, 32
-%r148 = trunc i224 %r147 to i32
-%r150 = getelementptr i32, i32* %r1, i32 2
-store i32 %r148, i32* %r150
-%r151 = lshr i224 %r147, 32
-%r152 = trunc i224 %r151 to i32
-%r154 = getelementptr i32, i32* %r1, i32 3
-store i32 %r152, i32* %r154
-%r155 = lshr i224 %r151, 32
-%r156 = trunc i224 %r155 to i32
-%r158 = getelementptr i32, i32* %r1, i32 4
-store i32 %r156, i32* %r158
-%r159 = lshr i224 %r155, 32
-%r160 = trunc i224 %r159 to i32
-%r162 = getelementptr i32, i32* %r1, i32 5
-store i32 %r160, i32* %r162
-%r163 = lshr i224 %r159, 32
-%r164 = trunc i224 %r163 to i32
-%r166 = getelementptr i32, i32* %r1, i32 6
-store i32 %r164, i32* %r166
+%r134 = zext i32 %r133 to i160
+%r135 = shl i160 %r134, 128
+%r136 = or i160 %r130, %r135
+%r137 = zext i160 %r136 to i192
+%r139 = getelementptr i32, i32* %r4, i32 5
+%r140 = load i32, i32* %r139
+%r141 = zext i32 %r140 to i192
+%r142 = shl i192 %r141, 160
+%r143 = or i192 %r137, %r142
+%r144 = zext i192 %r143 to i224
+%r146 = getelementptr i32, i32* %r4, i32 6
+%r147 = load i32, i32* %r146
+%r148 = zext i32 %r147 to i224
+%r149 = shl i224 %r148, 192
+%r150 = or i224 %r144, %r149
+%r151 = zext i224 %r150 to i256
+%r153 = getelementptr i32, i32* %r4, i32 7
+%r154 = load i32, i32* %r153
+%r155 = zext i32 %r154 to i256
+%r156 = shl i256 %r155, 224
+%r157 = or i256 %r151, %r156
+%r159 = select i1 %r107, i256 %r157, i256 0
+%r160 = add i256 %r105, %r159
+%r162 = getelementptr i32, i32* %r1, i32 0
+%r163 = trunc i256 %r160 to i32
+store i32 %r163, i32* %r162
+%r164 = lshr i256 %r160, 32
+%r166 = getelementptr i32, i32* %r1, i32 1
+%r167 = trunc i256 %r164 to i32
+store i32 %r167, i32* %r166
+%r168 = lshr i256 %r164, 32
+%r170 = getelementptr i32, i32* %r1, i32 2
+%r171 = trunc i256 %r168 to i32
+store i32 %r171, i32* %r170
+%r172 = lshr i256 %r168, 32
+%r174 = getelementptr i32, i32* %r1, i32 3
+%r175 = trunc i256 %r172 to i32
+store i32 %r175, i32* %r174
+%r176 = lshr i256 %r172, 32
+%r178 = getelementptr i32, i32* %r1, i32 4
+%r179 = trunc i256 %r176 to i32
+store i32 %r179, i32* %r178
+%r180 = lshr i256 %r176, 32
+%r182 = getelementptr i32, i32* %r1, i32 5
+%r183 = trunc i256 %r180 to i32
+store i32 %r183, i32* %r182
+%r184 = lshr i256 %r180, 32
+%r186 = getelementptr i32, i32* %r1, i32 6
+%r187 = trunc i256 %r184 to i32
+store i32 %r187, i32* %r186
+%r188 = lshr i256 %r184, 32
+%r190 = getelementptr i32, i32* %r1, i32 7
+%r191 = trunc i256 %r188 to i32
+store i32 %r191, i32* %r190
 ret void
 }
-define void @mcl_fpDbl_add7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fpDbl_add8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -9074,192 +7648,230 @@ define void @mcl_fpDbl_add7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r94 = zext i32 %r93 to i448
 %r95 = shl i448 %r94, 416
 %r96 = or i448 %r90, %r95
-%r97 = load i32, i32* %r3
-%r98 = zext i32 %r97 to i64
-%r100 = getelementptr i32, i32* %r3, i32 1
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i64
-%r103 = shl i64 %r102, 32
-%r104 = or i64 %r98, %r103
-%r105 = zext i64 %r104 to i96
-%r107 = getelementptr i32, i32* %r3, i32 2
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i96
-%r110 = shl i96 %r109, 64
-%r111 = or i96 %r105, %r110
-%r112 = zext i96 %r111 to i128
-%r114 = getelementptr i32, i32* %r3, i32 3
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
 %r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i128
-%r117 = shl i128 %r116, 96
-%r118 = or i128 %r112, %r117
-%r119 = zext i128 %r118 to i160
-%r121 = getelementptr i32, i32* %r3, i32 4
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
 %r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i160
-%r124 = shl i160 %r123, 128
-%r125 = or i160 %r119, %r124
-%r126 = zext i160 %r125 to i192
-%r128 = getelementptr i32, i32* %r3, i32 5
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
 %r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i192
-%r131 = shl i192 %r130, 160
-%r132 = or i192 %r126, %r131
-%r133 = zext i192 %r132 to i224
-%r135 = getelementptr i32, i32* %r3, i32 6
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
 %r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i224
-%r138 = shl i224 %r137, 192
-%r139 = or i224 %r133, %r138
-%r140 = zext i224 %r139 to i256
-%r142 = getelementptr i32, i32* %r3, i32 7
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
 %r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i256
-%r145 = shl i256 %r144, 224
-%r146 = or i256 %r140, %r145
-%r147 = zext i256 %r146 to i288
-%r149 = getelementptr i32, i32* %r3, i32 8
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
 %r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i288
-%r152 = shl i288 %r151, 256
-%r153 = or i288 %r147, %r152
-%r154 = zext i288 %r153 to i320
-%r156 = getelementptr i32, i32* %r3, i32 9
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
 %r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i320
-%r159 = shl i320 %r158, 288
-%r160 = or i320 %r154, %r159
-%r161 = zext i320 %r160 to i352
-%r163 = getelementptr i32, i32* %r3, i32 10
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
 %r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i352
-%r166 = shl i352 %r165, 320
-%r167 = or i352 %r161, %r166
-%r168 = zext i352 %r167 to i384
-%r170 = getelementptr i32, i32* %r3, i32 11
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
 %r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i384
-%r173 = shl i384 %r172, 352
-%r174 = or i384 %r168, %r173
-%r175 = zext i384 %r174 to i416
-%r177 = getelementptr i32, i32* %r3, i32 12
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
 %r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i416
-%r180 = shl i416 %r179, 384
-%r181 = or i416 %r175, %r180
-%r182 = zext i416 %r181 to i448
-%r184 = getelementptr i32, i32* %r3, i32 13
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
 %r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i448
-%r187 = shl i448 %r186, 416
-%r188 = or i448 %r182, %r187
-%r189 = zext i448 %r96 to i480
-%r190 = zext i448 %r188 to i480
-%r191 = add i480 %r189, %r190
-%r192 = trunc i480 %r191 to i224
-%r193 = trunc i224 %r192 to i32
-%r195 = getelementptr i32, i32* %r1, i32 0
-store i32 %r193, i32* %r195
-%r196 = lshr i224 %r192, 32
-%r197 = trunc i224 %r196 to i32
-%r199 = getelementptr i32, i32* %r1, i32 1
-store i32 %r197, i32* %r199
-%r200 = lshr i224 %r196, 32
-%r201 = trunc i224 %r200 to i32
-%r203 = getelementptr i32, i32* %r1, i32 2
-store i32 %r201, i32* %r203
-%r204 = lshr i224 %r200, 32
-%r205 = trunc i224 %r204 to i32
-%r207 = getelementptr i32, i32* %r1, i32 3
-store i32 %r205, i32* %r207
-%r208 = lshr i224 %r204, 32
-%r209 = trunc i224 %r208 to i32
-%r211 = getelementptr i32, i32* %r1, i32 4
-store i32 %r209, i32* %r211
-%r212 = lshr i224 %r208, 32
-%r213 = trunc i224 %r212 to i32
-%r215 = getelementptr i32, i32* %r1, i32 5
-store i32 %r213, i32* %r215
-%r216 = lshr i224 %r212, 32
-%r217 = trunc i224 %r216 to i32
-%r219 = getelementptr i32, i32* %r1, i32 6
-store i32 %r217, i32* %r219
-%r220 = lshr i480 %r191, 224
-%r221 = trunc i480 %r220 to i256
-%r222 = load i32, i32* %r4
-%r223 = zext i32 %r222 to i64
-%r225 = getelementptr i32, i32* %r4, i32 1
-%r226 = load i32, i32* %r225
-%r227 = zext i32 %r226 to i64
-%r228 = shl i64 %r227, 32
-%r229 = or i64 %r223, %r228
-%r230 = zext i64 %r229 to i96
-%r232 = getelementptr i32, i32* %r4, i32 2
-%r233 = load i32, i32* %r232
-%r234 = zext i32 %r233 to i96
-%r235 = shl i96 %r234, 64
-%r236 = or i96 %r230, %r235
-%r237 = zext i96 %r236 to i128
-%r239 = getelementptr i32, i32* %r4, i32 3
-%r240 = load i32, i32* %r239
-%r241 = zext i32 %r240 to i128
-%r242 = shl i128 %r241, 96
-%r243 = or i128 %r237, %r242
-%r244 = zext i128 %r243 to i160
-%r246 = getelementptr i32, i32* %r4, i32 4
-%r247 = load i32, i32* %r246
-%r248 = zext i32 %r247 to i160
-%r249 = shl i160 %r248, 128
-%r250 = or i160 %r244, %r249
-%r251 = zext i160 %r250 to i192
-%r253 = getelementptr i32, i32* %r4, i32 5
-%r254 = load i32, i32* %r253
-%r255 = zext i32 %r254 to i192
-%r256 = shl i192 %r255, 160
-%r257 = or i192 %r251, %r256
-%r258 = zext i192 %r257 to i224
-%r260 = getelementptr i32, i32* %r4, i32 6
-%r261 = load i32, i32* %r260
-%r262 = zext i32 %r261 to i224
-%r263 = shl i224 %r262, 192
-%r264 = or i224 %r258, %r263
-%r265 = zext i224 %r264 to i256
-%r266 = sub i256 %r221, %r265
-%r267 = lshr i256 %r266, 224
-%r268 = trunc i256 %r267 to i1
-%r269 = select i1 %r268, i256 %r221, i256 %r266
-%r270 = trunc i256 %r269 to i224
-%r272 = getelementptr i32, i32* %r1, i32 7
-%r273 = trunc i224 %r270 to i32
-%r275 = getelementptr i32, i32* %r272, i32 0
-store i32 %r273, i32* %r275
-%r276 = lshr i224 %r270, 32
-%r277 = trunc i224 %r276 to i32
-%r279 = getelementptr i32, i32* %r272, i32 1
-store i32 %r277, i32* %r279
-%r280 = lshr i224 %r276, 32
-%r281 = trunc i224 %r280 to i32
-%r283 = getelementptr i32, i32* %r272, i32 2
-store i32 %r281, i32* %r283
-%r284 = lshr i224 %r280, 32
-%r285 = trunc i224 %r284 to i32
-%r287 = getelementptr i32, i32* %r272, i32 3
-store i32 %r285, i32* %r287
-%r288 = lshr i224 %r284, 32
-%r289 = trunc i224 %r288 to i32
-%r291 = getelementptr i32, i32* %r272, i32 4
-store i32 %r289, i32* %r291
-%r292 = lshr i224 %r288, 32
-%r293 = trunc i224 %r292 to i32
-%r295 = getelementptr i32, i32* %r272, i32 5
-store i32 %r293, i32* %r295
-%r296 = lshr i224 %r292, 32
-%r297 = trunc i224 %r296 to i32
-%r299 = getelementptr i32, i32* %r272, i32 6
-store i32 %r297, i32* %r299
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = zext i512 %r110 to i544
+%r218 = zext i512 %r216 to i544
+%r219 = add i544 %r217, %r218
+%r220 = trunc i544 %r219 to i256
+%r222 = getelementptr i32, i32* %r1, i32 0
+%r223 = trunc i256 %r220 to i32
+store i32 %r223, i32* %r222
+%r224 = lshr i256 %r220, 32
+%r226 = getelementptr i32, i32* %r1, i32 1
+%r227 = trunc i256 %r224 to i32
+store i32 %r227, i32* %r226
+%r228 = lshr i256 %r224, 32
+%r230 = getelementptr i32, i32* %r1, i32 2
+%r231 = trunc i256 %r228 to i32
+store i32 %r231, i32* %r230
+%r232 = lshr i256 %r228, 32
+%r234 = getelementptr i32, i32* %r1, i32 3
+%r235 = trunc i256 %r232 to i32
+store i32 %r235, i32* %r234
+%r236 = lshr i256 %r232, 32
+%r238 = getelementptr i32, i32* %r1, i32 4
+%r239 = trunc i256 %r236 to i32
+store i32 %r239, i32* %r238
+%r240 = lshr i256 %r236, 32
+%r242 = getelementptr i32, i32* %r1, i32 5
+%r243 = trunc i256 %r240 to i32
+store i32 %r243, i32* %r242
+%r244 = lshr i256 %r240, 32
+%r246 = getelementptr i32, i32* %r1, i32 6
+%r247 = trunc i256 %r244 to i32
+store i32 %r247, i32* %r246
+%r248 = lshr i256 %r244, 32
+%r250 = getelementptr i32, i32* %r1, i32 7
+%r251 = trunc i256 %r248 to i32
+store i32 %r251, i32* %r250
+%r252 = lshr i544 %r219, 256
+%r253 = trunc i544 %r252 to i288
+%r254 = load i32, i32* %r4
+%r255 = zext i32 %r254 to i64
+%r257 = getelementptr i32, i32* %r4, i32 1
+%r258 = load i32, i32* %r257
+%r259 = zext i32 %r258 to i64
+%r260 = shl i64 %r259, 32
+%r261 = or i64 %r255, %r260
+%r262 = zext i64 %r261 to i96
+%r264 = getelementptr i32, i32* %r4, i32 2
+%r265 = load i32, i32* %r264
+%r266 = zext i32 %r265 to i96
+%r267 = shl i96 %r266, 64
+%r268 = or i96 %r262, %r267
+%r269 = zext i96 %r268 to i128
+%r271 = getelementptr i32, i32* %r4, i32 3
+%r272 = load i32, i32* %r271
+%r273 = zext i32 %r272 to i128
+%r274 = shl i128 %r273, 96
+%r275 = or i128 %r269, %r274
+%r276 = zext i128 %r275 to i160
+%r278 = getelementptr i32, i32* %r4, i32 4
+%r279 = load i32, i32* %r278
+%r280 = zext i32 %r279 to i160
+%r281 = shl i160 %r280, 128
+%r282 = or i160 %r276, %r281
+%r283 = zext i160 %r282 to i192
+%r285 = getelementptr i32, i32* %r4, i32 5
+%r286 = load i32, i32* %r285
+%r287 = zext i32 %r286 to i192
+%r288 = shl i192 %r287, 160
+%r289 = or i192 %r283, %r288
+%r290 = zext i192 %r289 to i224
+%r292 = getelementptr i32, i32* %r4, i32 6
+%r293 = load i32, i32* %r292
+%r294 = zext i32 %r293 to i224
+%r295 = shl i224 %r294, 192
+%r296 = or i224 %r290, %r295
+%r297 = zext i224 %r296 to i256
+%r299 = getelementptr i32, i32* %r4, i32 7
+%r300 = load i32, i32* %r299
+%r301 = zext i32 %r300 to i256
+%r302 = shl i256 %r301, 224
+%r303 = or i256 %r297, %r302
+%r304 = zext i256 %r303 to i288
+%r305 = sub i288 %r253, %r304
+%r306 = lshr i288 %r305, 256
+%r307 = trunc i288 %r306 to i1
+%r308 = select i1 %r307, i288 %r253, i288 %r305
+%r309 = trunc i288 %r308 to i256
+%r311 = getelementptr i32, i32* %r1, i32 8
+%r313 = getelementptr i32, i32* %r311, i32 0
+%r314 = trunc i256 %r309 to i32
+store i32 %r314, i32* %r313
+%r315 = lshr i256 %r309, 32
+%r317 = getelementptr i32, i32* %r311, i32 1
+%r318 = trunc i256 %r315 to i32
+store i32 %r318, i32* %r317
+%r319 = lshr i256 %r315, 32
+%r321 = getelementptr i32, i32* %r311, i32 2
+%r322 = trunc i256 %r319 to i32
+store i32 %r322, i32* %r321
+%r323 = lshr i256 %r319, 32
+%r325 = getelementptr i32, i32* %r311, i32 3
+%r326 = trunc i256 %r323 to i32
+store i32 %r326, i32* %r325
+%r327 = lshr i256 %r323, 32
+%r329 = getelementptr i32, i32* %r311, i32 4
+%r330 = trunc i256 %r327 to i32
+store i32 %r330, i32* %r329
+%r331 = lshr i256 %r327, 32
+%r333 = getelementptr i32, i32* %r311, i32 5
+%r334 = trunc i256 %r331 to i32
+store i32 %r334, i32* %r333
+%r335 = lshr i256 %r331, 32
+%r337 = getelementptr i32, i32* %r311, i32 6
+%r338 = trunc i256 %r335 to i32
+store i32 %r338, i32* %r337
+%r339 = lshr i256 %r335, 32
+%r341 = getelementptr i32, i32* %r311, i32 7
+%r342 = trunc i256 %r339 to i32
+store i32 %r342, i32* %r341
 ret void
 }
-define void @mcl_fpDbl_sub7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fpDbl_sub8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -9340,38839 +7952,5042 @@ define void @mcl_fpDbl_sub7L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r94 = zext i32 %r93 to i448
 %r95 = shl i448 %r94, 416
 %r96 = or i448 %r90, %r95
-%r97 = load i32, i32* %r3
-%r98 = zext i32 %r97 to i64
-%r100 = getelementptr i32, i32* %r3, i32 1
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i64
-%r103 = shl i64 %r102, 32
-%r104 = or i64 %r98, %r103
-%r105 = zext i64 %r104 to i96
-%r107 = getelementptr i32, i32* %r3, i32 2
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i96
-%r110 = shl i96 %r109, 64
-%r111 = or i96 %r105, %r110
-%r112 = zext i96 %r111 to i128
-%r114 = getelementptr i32, i32* %r3, i32 3
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i128
-%r117 = shl i128 %r116, 96
-%r118 = or i128 %r112, %r117
-%r119 = zext i128 %r118 to i160
-%r121 = getelementptr i32, i32* %r3, i32 4
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i160
-%r124 = shl i160 %r123, 128
-%r125 = or i160 %r119, %r124
-%r126 = zext i160 %r125 to i192
-%r128 = getelementptr i32, i32* %r3, i32 5
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
 %r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i192
-%r131 = shl i192 %r130, 160
-%r132 = or i192 %r126, %r131
-%r133 = zext i192 %r132 to i224
-%r135 = getelementptr i32, i32* %r3, i32 6
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
 %r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i224
-%r138 = shl i224 %r137, 192
-%r139 = or i224 %r133, %r138
-%r140 = zext i224 %r139 to i256
-%r142 = getelementptr i32, i32* %r3, i32 7
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
 %r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i256
-%r145 = shl i256 %r144, 224
-%r146 = or i256 %r140, %r145
-%r147 = zext i256 %r146 to i288
-%r149 = getelementptr i32, i32* %r3, i32 8
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
 %r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i288
-%r152 = shl i288 %r151, 256
-%r153 = or i288 %r147, %r152
-%r154 = zext i288 %r153 to i320
-%r156 = getelementptr i32, i32* %r3, i32 9
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
 %r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i320
-%r159 = shl i320 %r158, 288
-%r160 = or i320 %r154, %r159
-%r161 = zext i320 %r160 to i352
-%r163 = getelementptr i32, i32* %r3, i32 10
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
 %r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i352
-%r166 = shl i352 %r165, 320
-%r167 = or i352 %r161, %r166
-%r168 = zext i352 %r167 to i384
-%r170 = getelementptr i32, i32* %r3, i32 11
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
 %r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i384
-%r173 = shl i384 %r172, 352
-%r174 = or i384 %r168, %r173
-%r175 = zext i384 %r174 to i416
-%r177 = getelementptr i32, i32* %r3, i32 12
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
 %r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i416
-%r180 = shl i416 %r179, 384
-%r181 = or i416 %r175, %r180
-%r182 = zext i416 %r181 to i448
-%r184 = getelementptr i32, i32* %r3, i32 13
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
 %r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i448
-%r187 = shl i448 %r186, 416
-%r188 = or i448 %r182, %r187
-%r189 = zext i448 %r96 to i480
-%r190 = zext i448 %r188 to i480
-%r191 = sub i480 %r189, %r190
-%r192 = trunc i480 %r191 to i224
-%r193 = trunc i224 %r192 to i32
-%r195 = getelementptr i32, i32* %r1, i32 0
-store i32 %r193, i32* %r195
-%r196 = lshr i224 %r192, 32
-%r197 = trunc i224 %r196 to i32
-%r199 = getelementptr i32, i32* %r1, i32 1
-store i32 %r197, i32* %r199
-%r200 = lshr i224 %r196, 32
-%r201 = trunc i224 %r200 to i32
-%r203 = getelementptr i32, i32* %r1, i32 2
-store i32 %r201, i32* %r203
-%r204 = lshr i224 %r200, 32
-%r205 = trunc i224 %r204 to i32
-%r207 = getelementptr i32, i32* %r1, i32 3
-store i32 %r205, i32* %r207
-%r208 = lshr i224 %r204, 32
-%r209 = trunc i224 %r208 to i32
-%r211 = getelementptr i32, i32* %r1, i32 4
-store i32 %r209, i32* %r211
-%r212 = lshr i224 %r208, 32
-%r213 = trunc i224 %r212 to i32
-%r215 = getelementptr i32, i32* %r1, i32 5
-store i32 %r213, i32* %r215
-%r216 = lshr i224 %r212, 32
-%r217 = trunc i224 %r216 to i32
-%r219 = getelementptr i32, i32* %r1, i32 6
-store i32 %r217, i32* %r219
-%r220 = lshr i480 %r191, 224
-%r221 = trunc i480 %r220 to i224
-%r222 = lshr i480 %r191, 448
-%r223 = trunc i480 %r222 to i1
-%r224 = load i32, i32* %r4
-%r225 = zext i32 %r224 to i64
-%r227 = getelementptr i32, i32* %r4, i32 1
-%r228 = load i32, i32* %r227
-%r229 = zext i32 %r228 to i64
-%r230 = shl i64 %r229, 32
-%r231 = or i64 %r225, %r230
-%r232 = zext i64 %r231 to i96
-%r234 = getelementptr i32, i32* %r4, i32 2
-%r235 = load i32, i32* %r234
-%r236 = zext i32 %r235 to i96
-%r237 = shl i96 %r236, 64
-%r238 = or i96 %r232, %r237
-%r239 = zext i96 %r238 to i128
-%r241 = getelementptr i32, i32* %r4, i32 3
-%r242 = load i32, i32* %r241
-%r243 = zext i32 %r242 to i128
-%r244 = shl i128 %r243, 96
-%r245 = or i128 %r239, %r244
-%r246 = zext i128 %r245 to i160
-%r248 = getelementptr i32, i32* %r4, i32 4
-%r249 = load i32, i32* %r248
-%r250 = zext i32 %r249 to i160
-%r251 = shl i160 %r250, 128
-%r252 = or i160 %r246, %r251
-%r253 = zext i160 %r252 to i192
-%r255 = getelementptr i32, i32* %r4, i32 5
-%r256 = load i32, i32* %r255
-%r257 = zext i32 %r256 to i192
-%r258 = shl i192 %r257, 160
-%r259 = or i192 %r253, %r258
-%r260 = zext i192 %r259 to i224
-%r262 = getelementptr i32, i32* %r4, i32 6
-%r263 = load i32, i32* %r262
-%r264 = zext i32 %r263 to i224
-%r265 = shl i224 %r264, 192
-%r266 = or i224 %r260, %r265
-%r268 = select i1 %r223, i224 %r266, i224 0
-%r269 = add i224 %r221, %r268
-%r271 = getelementptr i32, i32* %r1, i32 7
-%r272 = trunc i224 %r269 to i32
-%r274 = getelementptr i32, i32* %r271, i32 0
-store i32 %r272, i32* %r274
-%r275 = lshr i224 %r269, 32
-%r276 = trunc i224 %r275 to i32
-%r278 = getelementptr i32, i32* %r271, i32 1
-store i32 %r276, i32* %r278
-%r279 = lshr i224 %r275, 32
-%r280 = trunc i224 %r279 to i32
-%r282 = getelementptr i32, i32* %r271, i32 2
-store i32 %r280, i32* %r282
-%r283 = lshr i224 %r279, 32
-%r284 = trunc i224 %r283 to i32
-%r286 = getelementptr i32, i32* %r271, i32 3
-store i32 %r284, i32* %r286
-%r287 = lshr i224 %r283, 32
-%r288 = trunc i224 %r287 to i32
-%r290 = getelementptr i32, i32* %r271, i32 4
-store i32 %r288, i32* %r290
-%r291 = lshr i224 %r287, 32
-%r292 = trunc i224 %r291 to i32
-%r294 = getelementptr i32, i32* %r271, i32 5
-store i32 %r292, i32* %r294
-%r295 = lshr i224 %r291, 32
-%r296 = trunc i224 %r295 to i32
-%r298 = getelementptr i32, i32* %r271, i32 6
-store i32 %r296, i32* %r298
-ret void
-}
-define i288 @mulPv256x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
-%r30 = trunc i64 %r29 to i32
-%r31 = call i32 @extractHigh32(i64 %r29)
-%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
-%r34 = trunc i64 %r33 to i32
-%r35 = call i32 @extractHigh32(i64 %r33)
-%r36 = zext i32 %r6 to i64
-%r37 = zext i32 %r10 to i64
-%r38 = shl i64 %r37, 32
-%r39 = or i64 %r36, %r38
-%r40 = zext i64 %r39 to i96
-%r41 = zext i32 %r14 to i96
-%r42 = shl i96 %r41, 64
-%r43 = or i96 %r40, %r42
-%r44 = zext i96 %r43 to i128
-%r45 = zext i32 %r18 to i128
-%r46 = shl i128 %r45, 96
-%r47 = or i128 %r44, %r46
-%r48 = zext i128 %r47 to i160
-%r49 = zext i32 %r22 to i160
-%r50 = shl i160 %r49, 128
-%r51 = or i160 %r48, %r50
-%r52 = zext i160 %r51 to i192
-%r53 = zext i32 %r26 to i192
-%r54 = shl i192 %r53, 160
-%r55 = or i192 %r52, %r54
-%r56 = zext i192 %r55 to i224
-%r57 = zext i32 %r30 to i224
-%r58 = shl i224 %r57, 192
-%r59 = or i224 %r56, %r58
-%r60 = zext i224 %r59 to i256
-%r61 = zext i32 %r34 to i256
-%r62 = shl i256 %r61, 224
-%r63 = or i256 %r60, %r62
-%r64 = zext i32 %r7 to i64
-%r65 = zext i32 %r11 to i64
-%r66 = shl i64 %r65, 32
-%r67 = or i64 %r64, %r66
-%r68 = zext i64 %r67 to i96
-%r69 = zext i32 %r15 to i96
-%r70 = shl i96 %r69, 64
-%r71 = or i96 %r68, %r70
-%r72 = zext i96 %r71 to i128
-%r73 = zext i32 %r19 to i128
-%r74 = shl i128 %r73, 96
-%r75 = or i128 %r72, %r74
-%r76 = zext i128 %r75 to i160
-%r77 = zext i32 %r23 to i160
-%r78 = shl i160 %r77, 128
-%r79 = or i160 %r76, %r78
-%r80 = zext i160 %r79 to i192
-%r81 = zext i32 %r27 to i192
-%r82 = shl i192 %r81, 160
-%r83 = or i192 %r80, %r82
-%r84 = zext i192 %r83 to i224
-%r85 = zext i32 %r31 to i224
-%r86 = shl i224 %r85, 192
-%r87 = or i224 %r84, %r86
-%r88 = zext i224 %r87 to i256
-%r89 = zext i32 %r35 to i256
-%r90 = shl i256 %r89, 224
-%r91 = or i256 %r88, %r90
-%r92 = zext i256 %r63 to i288
-%r93 = zext i256 %r91 to i288
-%r94 = shl i288 %r93, 32
-%r95 = add i288 %r92, %r94
-ret i288 %r95
-}
-define void @mcl_fp_mulUnitPre8L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i288 @mulPv256x32(i32* %r2, i32 %r3)
-%r5 = trunc i288 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i288 %r4, 32
-%r9 = trunc i288 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i288 %r8, 32
-%r13 = trunc i288 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i288 %r12, 32
-%r17 = trunc i288 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i288 %r16, 32
-%r21 = trunc i288 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i288 %r20, 32
-%r25 = trunc i288 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i288 %r24, 32
-%r29 = trunc i288 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i288 %r28, 32
-%r33 = trunc i288 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
-%r36 = lshr i288 %r32, 32
-%r37 = trunc i288 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 8
-store i32 %r37, i32* %r39
-ret void
-}
-define void @mcl_fpDbl_mulPre8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r2, i32 4
-%r7 = getelementptr i32, i32* %r3, i32 4
-%r9 = getelementptr i32, i32* %r1, i32 8
-call void @mcl_fpDbl_mulPre4L(i32* %r1, i32* %r2, i32* %r3)
-call void @mcl_fpDbl_mulPre4L(i32* %r9, i32* %r5, i32* %r7)
-%r10 = load i32, i32* %r5
-%r11 = zext i32 %r10 to i64
-%r13 = getelementptr i32, i32* %r5, i32 1
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i64
-%r16 = shl i64 %r15, 32
-%r17 = or i64 %r11, %r16
-%r18 = zext i64 %r17 to i96
-%r20 = getelementptr i32, i32* %r5, i32 2
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i96
-%r23 = shl i96 %r22, 64
-%r24 = or i96 %r18, %r23
-%r25 = zext i96 %r24 to i128
-%r27 = getelementptr i32, i32* %r5, i32 3
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i128
-%r30 = shl i128 %r29, 96
-%r31 = or i128 %r25, %r30
-%r32 = zext i128 %r31 to i160
-%r33 = load i32, i32* %r2
-%r34 = zext i32 %r33 to i64
-%r36 = getelementptr i32, i32* %r2, i32 1
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i64
-%r39 = shl i64 %r38, 32
-%r40 = or i64 %r34, %r39
-%r41 = zext i64 %r40 to i96
-%r43 = getelementptr i32, i32* %r2, i32 2
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i96
-%r46 = shl i96 %r45, 64
-%r47 = or i96 %r41, %r46
-%r48 = zext i96 %r47 to i128
-%r50 = getelementptr i32, i32* %r2, i32 3
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i128
-%r53 = shl i128 %r52, 96
-%r54 = or i128 %r48, %r53
-%r55 = zext i128 %r54 to i160
-%r56 = load i32, i32* %r7
-%r57 = zext i32 %r56 to i64
-%r59 = getelementptr i32, i32* %r7, i32 1
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i64
-%r62 = shl i64 %r61, 32
-%r63 = or i64 %r57, %r62
-%r64 = zext i64 %r63 to i96
-%r66 = getelementptr i32, i32* %r7, i32 2
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i96
-%r69 = shl i96 %r68, 64
-%r70 = or i96 %r64, %r69
-%r71 = zext i96 %r70 to i128
-%r73 = getelementptr i32, i32* %r7, i32 3
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i128
-%r76 = shl i128 %r75, 96
-%r77 = or i128 %r71, %r76
-%r78 = zext i128 %r77 to i160
-%r79 = load i32, i32* %r3
-%r80 = zext i32 %r79 to i64
-%r82 = getelementptr i32, i32* %r3, i32 1
-%r83 = load i32, i32* %r82
-%r84 = zext i32 %r83 to i64
-%r85 = shl i64 %r84, 32
-%r86 = or i64 %r80, %r85
-%r87 = zext i64 %r86 to i96
-%r89 = getelementptr i32, i32* %r3, i32 2
-%r90 = load i32, i32* %r89
-%r91 = zext i32 %r90 to i96
-%r92 = shl i96 %r91, 64
-%r93 = or i96 %r87, %r92
-%r94 = zext i96 %r93 to i128
-%r96 = getelementptr i32, i32* %r3, i32 3
-%r97 = load i32, i32* %r96
-%r98 = zext i32 %r97 to i128
-%r99 = shl i128 %r98, 96
-%r100 = or i128 %r94, %r99
-%r101 = zext i128 %r100 to i160
-%r102 = add i160 %r32, %r55
-%r103 = add i160 %r78, %r101
-%r105 = alloca i32, i32 8
-%r106 = trunc i160 %r102 to i128
-%r107 = trunc i160 %r103 to i128
-%r108 = lshr i160 %r102, 128
-%r109 = trunc i160 %r108 to i1
-%r110 = lshr i160 %r103, 128
-%r111 = trunc i160 %r110 to i1
-%r112 = and i1 %r109, %r111
-%r114 = select i1 %r109, i128 %r107, i128 0
-%r116 = select i1 %r111, i128 %r106, i128 0
-%r118 = alloca i32, i32 4
-%r120 = alloca i32, i32 4
-%r121 = trunc i128 %r106 to i32
-%r123 = getelementptr i32, i32* %r118, i32 0
-store i32 %r121, i32* %r123
-%r124 = lshr i128 %r106, 32
-%r125 = trunc i128 %r124 to i32
-%r127 = getelementptr i32, i32* %r118, i32 1
-store i32 %r125, i32* %r127
-%r128 = lshr i128 %r124, 32
-%r129 = trunc i128 %r128 to i32
-%r131 = getelementptr i32, i32* %r118, i32 2
-store i32 %r129, i32* %r131
-%r132 = lshr i128 %r128, 32
-%r133 = trunc i128 %r132 to i32
-%r135 = getelementptr i32, i32* %r118, i32 3
-store i32 %r133, i32* %r135
-%r136 = trunc i128 %r107 to i32
-%r138 = getelementptr i32, i32* %r120, i32 0
-store i32 %r136, i32* %r138
-%r139 = lshr i128 %r107, 32
-%r140 = trunc i128 %r139 to i32
-%r142 = getelementptr i32, i32* %r120, i32 1
-store i32 %r140, i32* %r142
-%r143 = lshr i128 %r139, 32
-%r144 = trunc i128 %r143 to i32
-%r146 = getelementptr i32, i32* %r120, i32 2
-store i32 %r144, i32* %r146
-%r147 = lshr i128 %r143, 32
-%r148 = trunc i128 %r147 to i32
-%r150 = getelementptr i32, i32* %r120, i32 3
-store i32 %r148, i32* %r150
-call void @mcl_fpDbl_mulPre4L(i32* %r105, i32* %r118, i32* %r120)
-%r151 = load i32, i32* %r105
-%r152 = zext i32 %r151 to i64
-%r154 = getelementptr i32, i32* %r105, i32 1
-%r155 = load i32, i32* %r154
-%r156 = zext i32 %r155 to i64
-%r157 = shl i64 %r156, 32
-%r158 = or i64 %r152, %r157
-%r159 = zext i64 %r158 to i96
-%r161 = getelementptr i32, i32* %r105, i32 2
-%r162 = load i32, i32* %r161
-%r163 = zext i32 %r162 to i96
-%r164 = shl i96 %r163, 64
-%r165 = or i96 %r159, %r164
-%r166 = zext i96 %r165 to i128
-%r168 = getelementptr i32, i32* %r105, i32 3
-%r169 = load i32, i32* %r168
-%r170 = zext i32 %r169 to i128
-%r171 = shl i128 %r170, 96
-%r172 = or i128 %r166, %r171
-%r173 = zext i128 %r172 to i160
-%r175 = getelementptr i32, i32* %r105, i32 4
-%r176 = load i32, i32* %r175
-%r177 = zext i32 %r176 to i160
-%r178 = shl i160 %r177, 128
-%r179 = or i160 %r173, %r178
-%r180 = zext i160 %r179 to i192
-%r182 = getelementptr i32, i32* %r105, i32 5
-%r183 = load i32, i32* %r182
-%r184 = zext i32 %r183 to i192
-%r185 = shl i192 %r184, 160
-%r186 = or i192 %r180, %r185
-%r187 = zext i192 %r186 to i224
-%r189 = getelementptr i32, i32* %r105, i32 6
-%r190 = load i32, i32* %r189
-%r191 = zext i32 %r190 to i224
-%r192 = shl i224 %r191, 192
-%r193 = or i224 %r187, %r192
-%r194 = zext i224 %r193 to i256
-%r196 = getelementptr i32, i32* %r105, i32 7
-%r197 = load i32, i32* %r196
-%r198 = zext i32 %r197 to i256
-%r199 = shl i256 %r198, 224
-%r200 = or i256 %r194, %r199
-%r201 = zext i256 %r200 to i288
-%r202 = zext i1 %r112 to i288
-%r203 = shl i288 %r202, 256
-%r204 = or i288 %r201, %r203
-%r205 = zext i128 %r114 to i288
-%r206 = zext i128 %r116 to i288
-%r207 = shl i288 %r205, 128
-%r208 = shl i288 %r206, 128
-%r209 = add i288 %r204, %r207
-%r210 = add i288 %r209, %r208
-%r211 = load i32, i32* %r1
-%r212 = zext i32 %r211 to i64
-%r214 = getelementptr i32, i32* %r1, i32 1
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i64
-%r217 = shl i64 %r216, 32
-%r218 = or i64 %r212, %r217
-%r219 = zext i64 %r218 to i96
-%r221 = getelementptr i32, i32* %r1, i32 2
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i96
-%r224 = shl i96 %r223, 64
-%r225 = or i96 %r219, %r224
-%r226 = zext i96 %r225 to i128
-%r228 = getelementptr i32, i32* %r1, i32 3
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i128
-%r231 = shl i128 %r230, 96
-%r232 = or i128 %r226, %r231
-%r233 = zext i128 %r232 to i160
-%r235 = getelementptr i32, i32* %r1, i32 4
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i160
-%r238 = shl i160 %r237, 128
-%r239 = or i160 %r233, %r238
-%r240 = zext i160 %r239 to i192
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = zext i512 %r110 to i544
+%r218 = zext i512 %r216 to i544
+%r219 = sub i544 %r217, %r218
+%r220 = trunc i544 %r219 to i256
+%r222 = getelementptr i32, i32* %r1, i32 0
+%r223 = trunc i256 %r220 to i32
+store i32 %r223, i32* %r222
+%r224 = lshr i256 %r220, 32
+%r226 = getelementptr i32, i32* %r1, i32 1
+%r227 = trunc i256 %r224 to i32
+store i32 %r227, i32* %r226
+%r228 = lshr i256 %r224, 32
+%r230 = getelementptr i32, i32* %r1, i32 2
+%r231 = trunc i256 %r228 to i32
+store i32 %r231, i32* %r230
+%r232 = lshr i256 %r228, 32
+%r234 = getelementptr i32, i32* %r1, i32 3
+%r235 = trunc i256 %r232 to i32
+store i32 %r235, i32* %r234
+%r236 = lshr i256 %r232, 32
+%r238 = getelementptr i32, i32* %r1, i32 4
+%r239 = trunc i256 %r236 to i32
+store i32 %r239, i32* %r238
+%r240 = lshr i256 %r236, 32
 %r242 = getelementptr i32, i32* %r1, i32 5
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i192
-%r245 = shl i192 %r244, 160
-%r246 = or i192 %r240, %r245
-%r247 = zext i192 %r246 to i224
-%r249 = getelementptr i32, i32* %r1, i32 6
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i224
-%r252 = shl i224 %r251, 192
-%r253 = or i224 %r247, %r252
-%r254 = zext i224 %r253 to i256
-%r256 = getelementptr i32, i32* %r1, i32 7
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i256
-%r259 = shl i256 %r258, 224
-%r260 = or i256 %r254, %r259
-%r261 = zext i256 %r260 to i288
-%r262 = sub i288 %r210, %r261
-%r264 = getelementptr i32, i32* %r1, i32 8
-%r265 = load i32, i32* %r264
-%r266 = zext i32 %r265 to i64
-%r268 = getelementptr i32, i32* %r264, i32 1
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i64
-%r271 = shl i64 %r270, 32
-%r272 = or i64 %r266, %r271
-%r273 = zext i64 %r272 to i96
-%r275 = getelementptr i32, i32* %r264, i32 2
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i96
-%r278 = shl i96 %r277, 64
-%r279 = or i96 %r273, %r278
-%r280 = zext i96 %r279 to i128
-%r282 = getelementptr i32, i32* %r264, i32 3
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i128
-%r285 = shl i128 %r284, 96
-%r286 = or i128 %r280, %r285
-%r287 = zext i128 %r286 to i160
-%r289 = getelementptr i32, i32* %r264, i32 4
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i160
-%r292 = shl i160 %r291, 128
-%r293 = or i160 %r287, %r292
-%r294 = zext i160 %r293 to i192
-%r296 = getelementptr i32, i32* %r264, i32 5
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i192
-%r299 = shl i192 %r298, 160
-%r300 = or i192 %r294, %r299
-%r301 = zext i192 %r300 to i224
-%r303 = getelementptr i32, i32* %r264, i32 6
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i224
-%r306 = shl i224 %r305, 192
-%r307 = or i224 %r301, %r306
-%r308 = zext i224 %r307 to i256
-%r310 = getelementptr i32, i32* %r264, i32 7
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i256
-%r313 = shl i256 %r312, 224
-%r314 = or i256 %r308, %r313
-%r315 = zext i256 %r314 to i288
-%r316 = sub i288 %r262, %r315
-%r317 = zext i288 %r316 to i384
-%r319 = getelementptr i32, i32* %r1, i32 4
-%r320 = load i32, i32* %r319
-%r321 = zext i32 %r320 to i64
-%r323 = getelementptr i32, i32* %r319, i32 1
-%r324 = load i32, i32* %r323
-%r325 = zext i32 %r324 to i64
-%r326 = shl i64 %r325, 32
-%r327 = or i64 %r321, %r326
-%r328 = zext i64 %r327 to i96
-%r330 = getelementptr i32, i32* %r319, i32 2
-%r331 = load i32, i32* %r330
-%r332 = zext i32 %r331 to i96
-%r333 = shl i96 %r332, 64
-%r334 = or i96 %r328, %r333
-%r335 = zext i96 %r334 to i128
-%r337 = getelementptr i32, i32* %r319, i32 3
-%r338 = load i32, i32* %r337
-%r339 = zext i32 %r338 to i128
-%r340 = shl i128 %r339, 96
-%r341 = or i128 %r335, %r340
-%r342 = zext i128 %r341 to i160
-%r344 = getelementptr i32, i32* %r319, i32 4
-%r345 = load i32, i32* %r344
-%r346 = zext i32 %r345 to i160
-%r347 = shl i160 %r346, 128
-%r348 = or i160 %r342, %r347
-%r349 = zext i160 %r348 to i192
-%r351 = getelementptr i32, i32* %r319, i32 5
-%r352 = load i32, i32* %r351
-%r353 = zext i32 %r352 to i192
-%r354 = shl i192 %r353, 160
-%r355 = or i192 %r349, %r354
-%r356 = zext i192 %r355 to i224
-%r358 = getelementptr i32, i32* %r319, i32 6
-%r359 = load i32, i32* %r358
-%r360 = zext i32 %r359 to i224
-%r361 = shl i224 %r360, 192
-%r362 = or i224 %r356, %r361
-%r363 = zext i224 %r362 to i256
-%r365 = getelementptr i32, i32* %r319, i32 7
-%r366 = load i32, i32* %r365
-%r367 = zext i32 %r366 to i256
-%r368 = shl i256 %r367, 224
-%r369 = or i256 %r363, %r368
-%r370 = zext i256 %r369 to i288
-%r372 = getelementptr i32, i32* %r319, i32 8
-%r373 = load i32, i32* %r372
-%r374 = zext i32 %r373 to i288
-%r375 = shl i288 %r374, 256
-%r376 = or i288 %r370, %r375
-%r377 = zext i288 %r376 to i320
-%r379 = getelementptr i32, i32* %r319, i32 9
-%r380 = load i32, i32* %r379
-%r381 = zext i32 %r380 to i320
-%r382 = shl i320 %r381, 288
-%r383 = or i320 %r377, %r382
-%r384 = zext i320 %r383 to i352
-%r386 = getelementptr i32, i32* %r319, i32 10
-%r387 = load i32, i32* %r386
-%r388 = zext i32 %r387 to i352
-%r389 = shl i352 %r388, 320
-%r390 = or i352 %r384, %r389
-%r391 = zext i352 %r390 to i384
-%r393 = getelementptr i32, i32* %r319, i32 11
-%r394 = load i32, i32* %r393
-%r395 = zext i32 %r394 to i384
-%r396 = shl i384 %r395, 352
-%r397 = or i384 %r391, %r396
-%r398 = add i384 %r317, %r397
-%r400 = getelementptr i32, i32* %r1, i32 4
-%r401 = trunc i384 %r398 to i32
-%r403 = getelementptr i32, i32* %r400, i32 0
-store i32 %r401, i32* %r403
-%r404 = lshr i384 %r398, 32
-%r405 = trunc i384 %r404 to i32
-%r407 = getelementptr i32, i32* %r400, i32 1
-store i32 %r405, i32* %r407
-%r408 = lshr i384 %r404, 32
-%r409 = trunc i384 %r408 to i32
-%r411 = getelementptr i32, i32* %r400, i32 2
-store i32 %r409, i32* %r411
-%r412 = lshr i384 %r408, 32
-%r413 = trunc i384 %r412 to i32
-%r415 = getelementptr i32, i32* %r400, i32 3
-store i32 %r413, i32* %r415
-%r416 = lshr i384 %r412, 32
-%r417 = trunc i384 %r416 to i32
-%r419 = getelementptr i32, i32* %r400, i32 4
-store i32 %r417, i32* %r419
-%r420 = lshr i384 %r416, 32
-%r421 = trunc i384 %r420 to i32
-%r423 = getelementptr i32, i32* %r400, i32 5
-store i32 %r421, i32* %r423
-%r424 = lshr i384 %r420, 32
-%r425 = trunc i384 %r424 to i32
-%r427 = getelementptr i32, i32* %r400, i32 6
-store i32 %r425, i32* %r427
-%r428 = lshr i384 %r424, 32
-%r429 = trunc i384 %r428 to i32
-%r431 = getelementptr i32, i32* %r400, i32 7
-store i32 %r429, i32* %r431
-%r432 = lshr i384 %r428, 32
-%r433 = trunc i384 %r432 to i32
-%r435 = getelementptr i32, i32* %r400, i32 8
-store i32 %r433, i32* %r435
-%r436 = lshr i384 %r432, 32
-%r437 = trunc i384 %r436 to i32
-%r439 = getelementptr i32, i32* %r400, i32 9
-store i32 %r437, i32* %r439
-%r440 = lshr i384 %r436, 32
-%r441 = trunc i384 %r440 to i32
-%r443 = getelementptr i32, i32* %r400, i32 10
-store i32 %r441, i32* %r443
-%r444 = lshr i384 %r440, 32
-%r445 = trunc i384 %r444 to i32
-%r447 = getelementptr i32, i32* %r400, i32 11
-store i32 %r445, i32* %r447
+%r243 = trunc i256 %r240 to i32
+store i32 %r243, i32* %r242
+%r244 = lshr i256 %r240, 32
+%r246 = getelementptr i32, i32* %r1, i32 6
+%r247 = trunc i256 %r244 to i32
+store i32 %r247, i32* %r246
+%r248 = lshr i256 %r244, 32
+%r250 = getelementptr i32, i32* %r1, i32 7
+%r251 = trunc i256 %r248 to i32
+store i32 %r251, i32* %r250
+%r252 = lshr i544 %r219, 256
+%r253 = trunc i544 %r252 to i256
+%r254 = lshr i544 %r219, 512
+%r255 = trunc i544 %r254 to i1
+%r256 = load i32, i32* %r4
+%r257 = zext i32 %r256 to i64
+%r259 = getelementptr i32, i32* %r4, i32 1
+%r260 = load i32, i32* %r259
+%r261 = zext i32 %r260 to i64
+%r262 = shl i64 %r261, 32
+%r263 = or i64 %r257, %r262
+%r264 = zext i64 %r263 to i96
+%r266 = getelementptr i32, i32* %r4, i32 2
+%r267 = load i32, i32* %r266
+%r268 = zext i32 %r267 to i96
+%r269 = shl i96 %r268, 64
+%r270 = or i96 %r264, %r269
+%r271 = zext i96 %r270 to i128
+%r273 = getelementptr i32, i32* %r4, i32 3
+%r274 = load i32, i32* %r273
+%r275 = zext i32 %r274 to i128
+%r276 = shl i128 %r275, 96
+%r277 = or i128 %r271, %r276
+%r278 = zext i128 %r277 to i160
+%r280 = getelementptr i32, i32* %r4, i32 4
+%r281 = load i32, i32* %r280
+%r282 = zext i32 %r281 to i160
+%r283 = shl i160 %r282, 128
+%r284 = or i160 %r278, %r283
+%r285 = zext i160 %r284 to i192
+%r287 = getelementptr i32, i32* %r4, i32 5
+%r288 = load i32, i32* %r287
+%r289 = zext i32 %r288 to i192
+%r290 = shl i192 %r289, 160
+%r291 = or i192 %r285, %r290
+%r292 = zext i192 %r291 to i224
+%r294 = getelementptr i32, i32* %r4, i32 6
+%r295 = load i32, i32* %r294
+%r296 = zext i32 %r295 to i224
+%r297 = shl i224 %r296, 192
+%r298 = or i224 %r292, %r297
+%r299 = zext i224 %r298 to i256
+%r301 = getelementptr i32, i32* %r4, i32 7
+%r302 = load i32, i32* %r301
+%r303 = zext i32 %r302 to i256
+%r304 = shl i256 %r303, 224
+%r305 = or i256 %r299, %r304
+%r307 = select i1 %r255, i256 %r305, i256 0
+%r308 = add i256 %r253, %r307
+%r310 = getelementptr i32, i32* %r1, i32 8
+%r312 = getelementptr i32, i32* %r310, i32 0
+%r313 = trunc i256 %r308 to i32
+store i32 %r313, i32* %r312
+%r314 = lshr i256 %r308, 32
+%r316 = getelementptr i32, i32* %r310, i32 1
+%r317 = trunc i256 %r314 to i32
+store i32 %r317, i32* %r316
+%r318 = lshr i256 %r314, 32
+%r320 = getelementptr i32, i32* %r310, i32 2
+%r321 = trunc i256 %r318 to i32
+store i32 %r321, i32* %r320
+%r322 = lshr i256 %r318, 32
+%r324 = getelementptr i32, i32* %r310, i32 3
+%r325 = trunc i256 %r322 to i32
+store i32 %r325, i32* %r324
+%r326 = lshr i256 %r322, 32
+%r328 = getelementptr i32, i32* %r310, i32 4
+%r329 = trunc i256 %r326 to i32
+store i32 %r329, i32* %r328
+%r330 = lshr i256 %r326, 32
+%r332 = getelementptr i32, i32* %r310, i32 5
+%r333 = trunc i256 %r330 to i32
+store i32 %r333, i32* %r332
+%r334 = lshr i256 %r330, 32
+%r336 = getelementptr i32, i32* %r310, i32 6
+%r337 = trunc i256 %r334 to i32
+store i32 %r337, i32* %r336
+%r338 = lshr i256 %r334, 32
+%r340 = getelementptr i32, i32* %r310, i32 7
+%r341 = trunc i256 %r338 to i32
+store i32 %r341, i32* %r340
 ret void
 }
-define void @mcl_fpDbl_sqrPre8L(i32* noalias  %r1, i32* noalias  %r2)
+define i416 @mulPv384x32(i32* noalias  %r2, i32 %r3)
 {
-%r4 = getelementptr i32, i32* %r2, i32 4
-%r6 = getelementptr i32, i32* %r2, i32 4
-%r8 = getelementptr i32, i32* %r1, i32 8
-call void @mcl_fpDbl_mulPre4L(i32* %r1, i32* %r2, i32* %r2)
-call void @mcl_fpDbl_mulPre4L(i32* %r8, i32* %r4, i32* %r6)
-%r9 = load i32, i32* %r4
-%r10 = zext i32 %r9 to i64
-%r12 = getelementptr i32, i32* %r4, i32 1
-%r13 = load i32, i32* %r12
-%r14 = zext i32 %r13 to i64
-%r15 = shl i64 %r14, 32
-%r16 = or i64 %r10, %r15
-%r17 = zext i64 %r16 to i96
-%r19 = getelementptr i32, i32* %r4, i32 2
-%r20 = load i32, i32* %r19
-%r21 = zext i32 %r20 to i96
-%r22 = shl i96 %r21, 64
-%r23 = or i96 %r17, %r22
-%r24 = zext i96 %r23 to i128
-%r26 = getelementptr i32, i32* %r4, i32 3
-%r27 = load i32, i32* %r26
-%r28 = zext i32 %r27 to i128
-%r29 = shl i128 %r28, 96
-%r30 = or i128 %r24, %r29
-%r31 = zext i128 %r30 to i160
-%r32 = load i32, i32* %r2
-%r33 = zext i32 %r32 to i64
-%r35 = getelementptr i32, i32* %r2, i32 1
-%r36 = load i32, i32* %r35
-%r37 = zext i32 %r36 to i64
-%r38 = shl i64 %r37, 32
-%r39 = or i64 %r33, %r38
-%r40 = zext i64 %r39 to i96
-%r42 = getelementptr i32, i32* %r2, i32 2
-%r43 = load i32, i32* %r42
-%r44 = zext i32 %r43 to i96
-%r45 = shl i96 %r44, 64
-%r46 = or i96 %r40, %r45
-%r47 = zext i96 %r46 to i128
-%r49 = getelementptr i32, i32* %r2, i32 3
-%r50 = load i32, i32* %r49
-%r51 = zext i32 %r50 to i128
-%r52 = shl i128 %r51, 96
-%r53 = or i128 %r47, %r52
-%r54 = zext i128 %r53 to i160
-%r55 = load i32, i32* %r6
-%r56 = zext i32 %r55 to i64
-%r58 = getelementptr i32, i32* %r6, i32 1
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i64
-%r61 = shl i64 %r60, 32
-%r62 = or i64 %r56, %r61
-%r63 = zext i64 %r62 to i96
-%r65 = getelementptr i32, i32* %r6, i32 2
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i96
-%r68 = shl i96 %r67, 64
-%r69 = or i96 %r63, %r68
-%r70 = zext i96 %r69 to i128
-%r72 = getelementptr i32, i32* %r6, i32 3
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i128
-%r75 = shl i128 %r74, 96
-%r76 = or i128 %r70, %r75
-%r77 = zext i128 %r76 to i160
-%r78 = load i32, i32* %r2
-%r79 = zext i32 %r78 to i64
-%r81 = getelementptr i32, i32* %r2, i32 1
-%r82 = load i32, i32* %r81
-%r83 = zext i32 %r82 to i64
-%r84 = shl i64 %r83, 32
-%r85 = or i64 %r79, %r84
-%r86 = zext i64 %r85 to i96
-%r88 = getelementptr i32, i32* %r2, i32 2
-%r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i96
-%r91 = shl i96 %r90, 64
-%r92 = or i96 %r86, %r91
-%r93 = zext i96 %r92 to i128
-%r95 = getelementptr i32, i32* %r2, i32 3
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i128
-%r98 = shl i128 %r97, 96
-%r99 = or i128 %r93, %r98
-%r100 = zext i128 %r99 to i160
-%r101 = add i160 %r31, %r54
-%r102 = add i160 %r77, %r100
-%r104 = alloca i32, i32 8
-%r105 = trunc i160 %r101 to i128
-%r106 = trunc i160 %r102 to i128
-%r107 = lshr i160 %r101, 128
-%r108 = trunc i160 %r107 to i1
-%r109 = lshr i160 %r102, 128
-%r110 = trunc i160 %r109 to i1
-%r111 = and i1 %r108, %r110
-%r113 = select i1 %r108, i128 %r106, i128 0
-%r115 = select i1 %r110, i128 %r105, i128 0
-%r117 = alloca i32, i32 4
-%r119 = alloca i32, i32 4
-%r120 = trunc i128 %r105 to i32
-%r122 = getelementptr i32, i32* %r117, i32 0
-store i32 %r120, i32* %r122
-%r123 = lshr i128 %r105, 32
-%r124 = trunc i128 %r123 to i32
-%r126 = getelementptr i32, i32* %r117, i32 1
-store i32 %r124, i32* %r126
-%r127 = lshr i128 %r123, 32
-%r128 = trunc i128 %r127 to i32
-%r130 = getelementptr i32, i32* %r117, i32 2
-store i32 %r128, i32* %r130
-%r131 = lshr i128 %r127, 32
-%r132 = trunc i128 %r131 to i32
-%r134 = getelementptr i32, i32* %r117, i32 3
-store i32 %r132, i32* %r134
-%r135 = trunc i128 %r106 to i32
-%r137 = getelementptr i32, i32* %r119, i32 0
-store i32 %r135, i32* %r137
-%r138 = lshr i128 %r106, 32
-%r139 = trunc i128 %r138 to i32
-%r141 = getelementptr i32, i32* %r119, i32 1
-store i32 %r139, i32* %r141
-%r142 = lshr i128 %r138, 32
-%r143 = trunc i128 %r142 to i32
-%r145 = getelementptr i32, i32* %r119, i32 2
-store i32 %r143, i32* %r145
-%r146 = lshr i128 %r142, 32
-%r147 = trunc i128 %r146 to i32
-%r149 = getelementptr i32, i32* %r119, i32 3
-store i32 %r147, i32* %r149
-call void @mcl_fpDbl_mulPre4L(i32* %r104, i32* %r117, i32* %r119)
-%r150 = load i32, i32* %r104
-%r151 = zext i32 %r150 to i64
-%r153 = getelementptr i32, i32* %r104, i32 1
-%r154 = load i32, i32* %r153
-%r155 = zext i32 %r154 to i64
-%r156 = shl i64 %r155, 32
-%r157 = or i64 %r151, %r156
-%r158 = zext i64 %r157 to i96
-%r160 = getelementptr i32, i32* %r104, i32 2
-%r161 = load i32, i32* %r160
-%r162 = zext i32 %r161 to i96
-%r163 = shl i96 %r162, 64
-%r164 = or i96 %r158, %r163
-%r165 = zext i96 %r164 to i128
-%r167 = getelementptr i32, i32* %r104, i32 3
-%r168 = load i32, i32* %r167
-%r169 = zext i32 %r168 to i128
-%r170 = shl i128 %r169, 96
-%r171 = or i128 %r165, %r170
-%r172 = zext i128 %r171 to i160
-%r174 = getelementptr i32, i32* %r104, i32 4
-%r175 = load i32, i32* %r174
-%r176 = zext i32 %r175 to i160
-%r177 = shl i160 %r176, 128
-%r178 = or i160 %r172, %r177
-%r179 = zext i160 %r178 to i192
-%r181 = getelementptr i32, i32* %r104, i32 5
-%r182 = load i32, i32* %r181
-%r183 = zext i32 %r182 to i192
-%r184 = shl i192 %r183, 160
-%r185 = or i192 %r179, %r184
-%r186 = zext i192 %r185 to i224
-%r188 = getelementptr i32, i32* %r104, i32 6
-%r189 = load i32, i32* %r188
-%r190 = zext i32 %r189 to i224
-%r191 = shl i224 %r190, 192
-%r192 = or i224 %r186, %r191
-%r193 = zext i224 %r192 to i256
-%r195 = getelementptr i32, i32* %r104, i32 7
-%r196 = load i32, i32* %r195
-%r197 = zext i32 %r196 to i256
-%r198 = shl i256 %r197, 224
-%r199 = or i256 %r193, %r198
-%r200 = zext i256 %r199 to i288
-%r201 = zext i1 %r111 to i288
-%r202 = shl i288 %r201, 256
-%r203 = or i288 %r200, %r202
-%r204 = zext i128 %r113 to i288
-%r205 = zext i128 %r115 to i288
-%r206 = shl i288 %r204, 128
-%r207 = shl i288 %r205, 128
-%r208 = add i288 %r203, %r206
-%r209 = add i288 %r208, %r207
-%r210 = load i32, i32* %r1
-%r211 = zext i32 %r210 to i64
-%r213 = getelementptr i32, i32* %r1, i32 1
-%r214 = load i32, i32* %r213
-%r215 = zext i32 %r214 to i64
-%r216 = shl i64 %r215, 32
-%r217 = or i64 %r211, %r216
-%r218 = zext i64 %r217 to i96
-%r220 = getelementptr i32, i32* %r1, i32 2
-%r221 = load i32, i32* %r220
-%r222 = zext i32 %r221 to i96
-%r223 = shl i96 %r222, 64
-%r224 = or i96 %r218, %r223
-%r225 = zext i96 %r224 to i128
-%r227 = getelementptr i32, i32* %r1, i32 3
-%r228 = load i32, i32* %r227
-%r229 = zext i32 %r228 to i128
-%r230 = shl i128 %r229, 96
-%r231 = or i128 %r225, %r230
-%r232 = zext i128 %r231 to i160
-%r234 = getelementptr i32, i32* %r1, i32 4
-%r235 = load i32, i32* %r234
-%r236 = zext i32 %r235 to i160
-%r237 = shl i160 %r236, 128
-%r238 = or i160 %r232, %r237
-%r239 = zext i160 %r238 to i192
-%r241 = getelementptr i32, i32* %r1, i32 5
-%r242 = load i32, i32* %r241
-%r243 = zext i32 %r242 to i192
-%r244 = shl i192 %r243, 160
-%r245 = or i192 %r239, %r244
-%r246 = zext i192 %r245 to i224
-%r248 = getelementptr i32, i32* %r1, i32 6
-%r249 = load i32, i32* %r248
-%r250 = zext i32 %r249 to i224
-%r251 = shl i224 %r250, 192
-%r252 = or i224 %r246, %r251
-%r253 = zext i224 %r252 to i256
-%r255 = getelementptr i32, i32* %r1, i32 7
-%r256 = load i32, i32* %r255
-%r257 = zext i32 %r256 to i256
-%r258 = shl i256 %r257, 224
-%r259 = or i256 %r253, %r258
-%r260 = zext i256 %r259 to i288
-%r261 = sub i288 %r209, %r260
-%r263 = getelementptr i32, i32* %r1, i32 8
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i64
-%r267 = getelementptr i32, i32* %r263, i32 1
-%r268 = load i32, i32* %r267
-%r269 = zext i32 %r268 to i64
-%r270 = shl i64 %r269, 32
-%r271 = or i64 %r265, %r270
-%r272 = zext i64 %r271 to i96
-%r274 = getelementptr i32, i32* %r263, i32 2
-%r275 = load i32, i32* %r274
-%r276 = zext i32 %r275 to i96
-%r277 = shl i96 %r276, 64
-%r278 = or i96 %r272, %r277
-%r279 = zext i96 %r278 to i128
-%r281 = getelementptr i32, i32* %r263, i32 3
-%r282 = load i32, i32* %r281
-%r283 = zext i32 %r282 to i128
-%r284 = shl i128 %r283, 96
-%r285 = or i128 %r279, %r284
-%r286 = zext i128 %r285 to i160
-%r288 = getelementptr i32, i32* %r263, i32 4
-%r289 = load i32, i32* %r288
-%r290 = zext i32 %r289 to i160
-%r291 = shl i160 %r290, 128
-%r292 = or i160 %r286, %r291
-%r293 = zext i160 %r292 to i192
-%r295 = getelementptr i32, i32* %r263, i32 5
-%r296 = load i32, i32* %r295
-%r297 = zext i32 %r296 to i192
-%r298 = shl i192 %r297, 160
-%r299 = or i192 %r293, %r298
-%r300 = zext i192 %r299 to i224
-%r302 = getelementptr i32, i32* %r263, i32 6
-%r303 = load i32, i32* %r302
-%r304 = zext i32 %r303 to i224
-%r305 = shl i224 %r304, 192
-%r306 = or i224 %r300, %r305
-%r307 = zext i224 %r306 to i256
-%r309 = getelementptr i32, i32* %r263, i32 7
-%r310 = load i32, i32* %r309
-%r311 = zext i32 %r310 to i256
-%r312 = shl i256 %r311, 224
-%r313 = or i256 %r307, %r312
-%r314 = zext i256 %r313 to i288
-%r315 = sub i288 %r261, %r314
-%r316 = zext i288 %r315 to i384
-%r318 = getelementptr i32, i32* %r1, i32 4
-%r319 = load i32, i32* %r318
-%r320 = zext i32 %r319 to i64
-%r322 = getelementptr i32, i32* %r318, i32 1
-%r323 = load i32, i32* %r322
-%r324 = zext i32 %r323 to i64
-%r325 = shl i64 %r324, 32
-%r326 = or i64 %r320, %r325
-%r327 = zext i64 %r326 to i96
-%r329 = getelementptr i32, i32* %r318, i32 2
-%r330 = load i32, i32* %r329
-%r331 = zext i32 %r330 to i96
-%r332 = shl i96 %r331, 64
-%r333 = or i96 %r327, %r332
-%r334 = zext i96 %r333 to i128
-%r336 = getelementptr i32, i32* %r318, i32 3
-%r337 = load i32, i32* %r336
-%r338 = zext i32 %r337 to i128
-%r339 = shl i128 %r338, 96
-%r340 = or i128 %r334, %r339
-%r341 = zext i128 %r340 to i160
-%r343 = getelementptr i32, i32* %r318, i32 4
-%r344 = load i32, i32* %r343
-%r345 = zext i32 %r344 to i160
-%r346 = shl i160 %r345, 128
-%r347 = or i160 %r341, %r346
-%r348 = zext i160 %r347 to i192
-%r350 = getelementptr i32, i32* %r318, i32 5
-%r351 = load i32, i32* %r350
-%r352 = zext i32 %r351 to i192
-%r353 = shl i192 %r352, 160
-%r354 = or i192 %r348, %r353
-%r355 = zext i192 %r354 to i224
-%r357 = getelementptr i32, i32* %r318, i32 6
-%r358 = load i32, i32* %r357
-%r359 = zext i32 %r358 to i224
-%r360 = shl i224 %r359, 192
-%r361 = or i224 %r355, %r360
-%r362 = zext i224 %r361 to i256
-%r364 = getelementptr i32, i32* %r318, i32 7
-%r365 = load i32, i32* %r364
-%r366 = zext i32 %r365 to i256
-%r367 = shl i256 %r366, 224
-%r368 = or i256 %r362, %r367
-%r369 = zext i256 %r368 to i288
-%r371 = getelementptr i32, i32* %r318, i32 8
-%r372 = load i32, i32* %r371
-%r373 = zext i32 %r372 to i288
-%r374 = shl i288 %r373, 256
-%r375 = or i288 %r369, %r374
-%r376 = zext i288 %r375 to i320
-%r378 = getelementptr i32, i32* %r318, i32 9
-%r379 = load i32, i32* %r378
-%r380 = zext i32 %r379 to i320
-%r381 = shl i320 %r380, 288
-%r382 = or i320 %r376, %r381
-%r383 = zext i320 %r382 to i352
-%r385 = getelementptr i32, i32* %r318, i32 10
-%r386 = load i32, i32* %r385
-%r387 = zext i32 %r386 to i352
-%r388 = shl i352 %r387, 320
-%r389 = or i352 %r383, %r388
-%r390 = zext i352 %r389 to i384
-%r392 = getelementptr i32, i32* %r318, i32 11
-%r393 = load i32, i32* %r392
-%r394 = zext i32 %r393 to i384
-%r395 = shl i384 %r394, 352
-%r396 = or i384 %r390, %r395
-%r397 = add i384 %r316, %r396
-%r399 = getelementptr i32, i32* %r1, i32 4
-%r400 = trunc i384 %r397 to i32
-%r402 = getelementptr i32, i32* %r399, i32 0
-store i32 %r400, i32* %r402
-%r403 = lshr i384 %r397, 32
-%r404 = trunc i384 %r403 to i32
-%r406 = getelementptr i32, i32* %r399, i32 1
-store i32 %r404, i32* %r406
-%r407 = lshr i384 %r403, 32
-%r408 = trunc i384 %r407 to i32
-%r410 = getelementptr i32, i32* %r399, i32 2
-store i32 %r408, i32* %r410
-%r411 = lshr i384 %r407, 32
-%r412 = trunc i384 %r411 to i32
-%r414 = getelementptr i32, i32* %r399, i32 3
-store i32 %r412, i32* %r414
-%r415 = lshr i384 %r411, 32
-%r416 = trunc i384 %r415 to i32
-%r418 = getelementptr i32, i32* %r399, i32 4
-store i32 %r416, i32* %r418
-%r419 = lshr i384 %r415, 32
-%r420 = trunc i384 %r419 to i32
-%r422 = getelementptr i32, i32* %r399, i32 5
-store i32 %r420, i32* %r422
-%r423 = lshr i384 %r419, 32
-%r424 = trunc i384 %r423 to i32
-%r426 = getelementptr i32, i32* %r399, i32 6
-store i32 %r424, i32* %r426
-%r427 = lshr i384 %r423, 32
-%r428 = trunc i384 %r427 to i32
-%r430 = getelementptr i32, i32* %r399, i32 7
-store i32 %r428, i32* %r430
-%r431 = lshr i384 %r427, 32
-%r432 = trunc i384 %r431 to i32
-%r434 = getelementptr i32, i32* %r399, i32 8
-store i32 %r432, i32* %r434
-%r435 = lshr i384 %r431, 32
-%r436 = trunc i384 %r435 to i32
-%r438 = getelementptr i32, i32* %r399, i32 9
-store i32 %r436, i32* %r438
-%r439 = lshr i384 %r435, 32
-%r440 = trunc i384 %r439 to i32
-%r442 = getelementptr i32, i32* %r399, i32 10
-store i32 %r440, i32* %r442
-%r443 = lshr i384 %r439, 32
-%r444 = trunc i384 %r443 to i32
-%r446 = getelementptr i32, i32* %r399, i32 11
-store i32 %r444, i32* %r446
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
+%r42 = trunc i64 %r41 to i32
+%r43 = call i32 @extractHigh32(i64 %r41)
+%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
+%r46 = trunc i64 %r45 to i32
+%r47 = call i32 @extractHigh32(i64 %r45)
+%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
+%r50 = trunc i64 %r49 to i32
+%r51 = call i32 @extractHigh32(i64 %r49)
+%r52 = zext i32 %r6 to i64
+%r53 = zext i32 %r10 to i64
+%r54 = shl i64 %r53, 32
+%r55 = or i64 %r52, %r54
+%r56 = zext i64 %r55 to i96
+%r57 = zext i32 %r14 to i96
+%r58 = shl i96 %r57, 64
+%r59 = or i96 %r56, %r58
+%r60 = zext i96 %r59 to i128
+%r61 = zext i32 %r18 to i128
+%r62 = shl i128 %r61, 96
+%r63 = or i128 %r60, %r62
+%r64 = zext i128 %r63 to i160
+%r65 = zext i32 %r22 to i160
+%r66 = shl i160 %r65, 128
+%r67 = or i160 %r64, %r66
+%r68 = zext i160 %r67 to i192
+%r69 = zext i32 %r26 to i192
+%r70 = shl i192 %r69, 160
+%r71 = or i192 %r68, %r70
+%r72 = zext i192 %r71 to i224
+%r73 = zext i32 %r30 to i224
+%r74 = shl i224 %r73, 192
+%r75 = or i224 %r72, %r74
+%r76 = zext i224 %r75 to i256
+%r77 = zext i32 %r34 to i256
+%r78 = shl i256 %r77, 224
+%r79 = or i256 %r76, %r78
+%r80 = zext i256 %r79 to i288
+%r81 = zext i32 %r38 to i288
+%r82 = shl i288 %r81, 256
+%r83 = or i288 %r80, %r82
+%r84 = zext i288 %r83 to i320
+%r85 = zext i32 %r42 to i320
+%r86 = shl i320 %r85, 288
+%r87 = or i320 %r84, %r86
+%r88 = zext i320 %r87 to i352
+%r89 = zext i32 %r46 to i352
+%r90 = shl i352 %r89, 320
+%r91 = or i352 %r88, %r90
+%r92 = zext i352 %r91 to i384
+%r93 = zext i32 %r50 to i384
+%r94 = shl i384 %r93, 352
+%r95 = or i384 %r92, %r94
+%r96 = zext i32 %r7 to i64
+%r97 = zext i32 %r11 to i64
+%r98 = shl i64 %r97, 32
+%r99 = or i64 %r96, %r98
+%r100 = zext i64 %r99 to i96
+%r101 = zext i32 %r15 to i96
+%r102 = shl i96 %r101, 64
+%r103 = or i96 %r100, %r102
+%r104 = zext i96 %r103 to i128
+%r105 = zext i32 %r19 to i128
+%r106 = shl i128 %r105, 96
+%r107 = or i128 %r104, %r106
+%r108 = zext i128 %r107 to i160
+%r109 = zext i32 %r23 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r108, %r110
+%r112 = zext i160 %r111 to i192
+%r113 = zext i32 %r27 to i192
+%r114 = shl i192 %r113, 160
+%r115 = or i192 %r112, %r114
+%r116 = zext i192 %r115 to i224
+%r117 = zext i32 %r31 to i224
+%r118 = shl i224 %r117, 192
+%r119 = or i224 %r116, %r118
+%r120 = zext i224 %r119 to i256
+%r121 = zext i32 %r35 to i256
+%r122 = shl i256 %r121, 224
+%r123 = or i256 %r120, %r122
+%r124 = zext i256 %r123 to i288
+%r125 = zext i32 %r39 to i288
+%r126 = shl i288 %r125, 256
+%r127 = or i288 %r124, %r126
+%r128 = zext i288 %r127 to i320
+%r129 = zext i32 %r43 to i320
+%r130 = shl i320 %r129, 288
+%r131 = or i320 %r128, %r130
+%r132 = zext i320 %r131 to i352
+%r133 = zext i32 %r47 to i352
+%r134 = shl i352 %r133, 320
+%r135 = or i352 %r132, %r134
+%r136 = zext i352 %r135 to i384
+%r137 = zext i32 %r51 to i384
+%r138 = shl i384 %r137, 352
+%r139 = or i384 %r136, %r138
+%r140 = zext i384 %r95 to i416
+%r141 = zext i384 %r139 to i416
+%r142 = shl i416 %r141, 32
+%r143 = add i416 %r140, %r142
+ret i416 %r143
+}
+define void @mcl_fp_mulUnitPre12L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i416 @mulPv384x32(i32* %r2, i32 %r3)
+%r6 = getelementptr i32, i32* %r1, i32 0
+%r7 = trunc i416 %r4 to i32
+store i32 %r7, i32* %r6
+%r8 = lshr i416 %r4, 32
+%r10 = getelementptr i32, i32* %r1, i32 1
+%r11 = trunc i416 %r8 to i32
+store i32 %r11, i32* %r10
+%r12 = lshr i416 %r8, 32
+%r14 = getelementptr i32, i32* %r1, i32 2
+%r15 = trunc i416 %r12 to i32
+store i32 %r15, i32* %r14
+%r16 = lshr i416 %r12, 32
+%r18 = getelementptr i32, i32* %r1, i32 3
+%r19 = trunc i416 %r16 to i32
+store i32 %r19, i32* %r18
+%r20 = lshr i416 %r16, 32
+%r22 = getelementptr i32, i32* %r1, i32 4
+%r23 = trunc i416 %r20 to i32
+store i32 %r23, i32* %r22
+%r24 = lshr i416 %r20, 32
+%r26 = getelementptr i32, i32* %r1, i32 5
+%r27 = trunc i416 %r24 to i32
+store i32 %r27, i32* %r26
+%r28 = lshr i416 %r24, 32
+%r30 = getelementptr i32, i32* %r1, i32 6
+%r31 = trunc i416 %r28 to i32
+store i32 %r31, i32* %r30
+%r32 = lshr i416 %r28, 32
+%r34 = getelementptr i32, i32* %r1, i32 7
+%r35 = trunc i416 %r32 to i32
+store i32 %r35, i32* %r34
+%r36 = lshr i416 %r32, 32
+%r38 = getelementptr i32, i32* %r1, i32 8
+%r39 = trunc i416 %r36 to i32
+store i32 %r39, i32* %r38
+%r40 = lshr i416 %r36, 32
+%r42 = getelementptr i32, i32* %r1, i32 9
+%r43 = trunc i416 %r40 to i32
+store i32 %r43, i32* %r42
+%r44 = lshr i416 %r40, 32
+%r46 = getelementptr i32, i32* %r1, i32 10
+%r47 = trunc i416 %r44 to i32
+store i32 %r47, i32* %r46
+%r48 = lshr i416 %r44, 32
+%r50 = getelementptr i32, i32* %r1, i32 11
+%r51 = trunc i416 %r48 to i32
+store i32 %r51, i32* %r50
+%r52 = lshr i416 %r48, 32
+%r54 = getelementptr i32, i32* %r1, i32 12
+%r55 = trunc i416 %r52 to i32
+store i32 %r55, i32* %r54
 ret void
 }
-define void @mcl_fp_mont8L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+define void @mcl_fpDbl_mulPre12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
 {
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i288 @mulPv256x32(i32* %r2, i32 %r10)
-%r12 = zext i288 %r11 to i320
-%r13 = trunc i288 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i288 @mulPv256x32(i32* %r4, i32 %r14)
-%r16 = zext i288 %r15 to i320
-%r17 = add i320 %r12, %r16
-%r18 = lshr i320 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
+%r5 = getelementptr i32, i32* %r2, i32 6
+%r7 = getelementptr i32, i32* %r3, i32 6
+%r9 = getelementptr i32, i32* %r1, i32 12
+call void @mcl_fpDbl_mulPre6L(i32* %r1, i32* %r2, i32* %r3)
+call void @mcl_fpDbl_mulPre6L(i32* %r9, i32* %r5, i32* %r7)
+%r10 = load i32, i32* %r5
+%r11 = zext i32 %r10 to i64
+%r13 = getelementptr i32, i32* %r5, i32 1
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i64
+%r16 = shl i64 %r15, 32
+%r17 = or i64 %r11, %r16
+%r18 = zext i64 %r17 to i96
+%r20 = getelementptr i32, i32* %r5, i32 2
 %r21 = load i32, i32* %r20
-%r22 = call i288 @mulPv256x32(i32* %r2, i32 %r21)
-%r23 = zext i288 %r22 to i320
-%r24 = add i320 %r18, %r23
-%r25 = trunc i320 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i288 @mulPv256x32(i32* %r4, i32 %r26)
-%r28 = zext i288 %r27 to i320
-%r29 = add i320 %r24, %r28
-%r30 = lshr i320 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i288 @mulPv256x32(i32* %r2, i32 %r33)
-%r35 = zext i288 %r34 to i320
-%r36 = add i320 %r30, %r35
-%r37 = trunc i320 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i288 @mulPv256x32(i32* %r4, i32 %r38)
-%r40 = zext i288 %r39 to i320
-%r41 = add i320 %r36, %r40
-%r42 = lshr i320 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i288 @mulPv256x32(i32* %r2, i32 %r45)
-%r47 = zext i288 %r46 to i320
-%r48 = add i320 %r42, %r47
-%r49 = trunc i320 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i288 @mulPv256x32(i32* %r4, i32 %r50)
-%r52 = zext i288 %r51 to i320
-%r53 = add i320 %r48, %r52
-%r54 = lshr i320 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i288 @mulPv256x32(i32* %r2, i32 %r57)
-%r59 = zext i288 %r58 to i320
-%r60 = add i320 %r54, %r59
-%r61 = trunc i320 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i288 @mulPv256x32(i32* %r4, i32 %r62)
-%r64 = zext i288 %r63 to i320
-%r65 = add i320 %r60, %r64
-%r66 = lshr i320 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i288 @mulPv256x32(i32* %r2, i32 %r69)
-%r71 = zext i288 %r70 to i320
-%r72 = add i320 %r66, %r71
-%r73 = trunc i320 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i288 @mulPv256x32(i32* %r4, i32 %r74)
-%r76 = zext i288 %r75 to i320
-%r77 = add i320 %r72, %r76
-%r78 = lshr i320 %r77, 32
-%r80 = getelementptr i32, i32* %r3, i32 6
-%r81 = load i32, i32* %r80
-%r82 = call i288 @mulPv256x32(i32* %r2, i32 %r81)
-%r83 = zext i288 %r82 to i320
-%r84 = add i320 %r78, %r83
-%r85 = trunc i320 %r84 to i32
-%r86 = mul i32 %r85, %r7
-%r87 = call i288 @mulPv256x32(i32* %r4, i32 %r86)
-%r88 = zext i288 %r87 to i320
-%r89 = add i320 %r84, %r88
-%r90 = lshr i320 %r89, 32
-%r92 = getelementptr i32, i32* %r3, i32 7
-%r93 = load i32, i32* %r92
-%r94 = call i288 @mulPv256x32(i32* %r2, i32 %r93)
-%r95 = zext i288 %r94 to i320
-%r96 = add i320 %r90, %r95
-%r97 = trunc i320 %r96 to i32
-%r98 = mul i32 %r97, %r7
-%r99 = call i288 @mulPv256x32(i32* %r4, i32 %r98)
-%r100 = zext i288 %r99 to i320
-%r101 = add i320 %r96, %r100
-%r102 = lshr i320 %r101, 32
-%r103 = trunc i320 %r102 to i288
-%r104 = load i32, i32* %r4
-%r105 = zext i32 %r104 to i64
-%r107 = getelementptr i32, i32* %r4, i32 1
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i64
-%r110 = shl i64 %r109, 32
-%r111 = or i64 %r105, %r110
-%r112 = zext i64 %r111 to i96
-%r114 = getelementptr i32, i32* %r4, i32 2
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i96
-%r117 = shl i96 %r116, 64
-%r118 = or i96 %r112, %r117
-%r119 = zext i96 %r118 to i128
-%r121 = getelementptr i32, i32* %r4, i32 3
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i128
-%r124 = shl i128 %r123, 96
-%r125 = or i128 %r119, %r124
-%r126 = zext i128 %r125 to i160
-%r128 = getelementptr i32, i32* %r4, i32 4
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i160
-%r131 = shl i160 %r130, 128
-%r132 = or i160 %r126, %r131
-%r133 = zext i160 %r132 to i192
-%r135 = getelementptr i32, i32* %r4, i32 5
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i192
-%r138 = shl i192 %r137, 160
-%r139 = or i192 %r133, %r138
-%r140 = zext i192 %r139 to i224
-%r142 = getelementptr i32, i32* %r4, i32 6
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i224
-%r145 = shl i224 %r144, 192
-%r146 = or i224 %r140, %r145
-%r147 = zext i224 %r146 to i256
-%r149 = getelementptr i32, i32* %r4, i32 7
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i256
-%r152 = shl i256 %r151, 224
-%r153 = or i256 %r147, %r152
-%r154 = zext i256 %r153 to i288
-%r155 = sub i288 %r103, %r154
-%r156 = lshr i288 %r155, 256
-%r157 = trunc i288 %r156 to i1
-%r158 = select i1 %r157, i288 %r103, i288 %r155
-%r159 = trunc i288 %r158 to i256
-%r160 = trunc i256 %r159 to i32
-%r162 = getelementptr i32, i32* %r1, i32 0
-store i32 %r160, i32* %r162
-%r163 = lshr i256 %r159, 32
-%r164 = trunc i256 %r163 to i32
-%r166 = getelementptr i32, i32* %r1, i32 1
-store i32 %r164, i32* %r166
-%r167 = lshr i256 %r163, 32
-%r168 = trunc i256 %r167 to i32
-%r170 = getelementptr i32, i32* %r1, i32 2
-store i32 %r168, i32* %r170
-%r171 = lshr i256 %r167, 32
-%r172 = trunc i256 %r171 to i32
-%r174 = getelementptr i32, i32* %r1, i32 3
-store i32 %r172, i32* %r174
-%r175 = lshr i256 %r171, 32
-%r176 = trunc i256 %r175 to i32
-%r178 = getelementptr i32, i32* %r1, i32 4
-store i32 %r176, i32* %r178
-%r179 = lshr i256 %r175, 32
-%r180 = trunc i256 %r179 to i32
-%r182 = getelementptr i32, i32* %r1, i32 5
-store i32 %r180, i32* %r182
-%r183 = lshr i256 %r179, 32
-%r184 = trunc i256 %r183 to i32
-%r186 = getelementptr i32, i32* %r1, i32 6
-store i32 %r184, i32* %r186
-%r187 = lshr i256 %r183, 32
-%r188 = trunc i256 %r187 to i32
-%r190 = getelementptr i32, i32* %r1, i32 7
-store i32 %r188, i32* %r190
-ret void
-}
-define void @mcl_fp_montNF8L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i288 @mulPv256x32(i32* %r2, i32 %r8)
-%r10 = trunc i288 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i288 @mulPv256x32(i32* %r4, i32 %r11)
-%r13 = add i288 %r9, %r12
-%r14 = lshr i288 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i288 @mulPv256x32(i32* %r2, i32 %r17)
-%r19 = add i288 %r14, %r18
-%r20 = trunc i288 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i288 @mulPv256x32(i32* %r4, i32 %r21)
-%r23 = add i288 %r19, %r22
-%r24 = lshr i288 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i288 @mulPv256x32(i32* %r2, i32 %r27)
-%r29 = add i288 %r24, %r28
-%r30 = trunc i288 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i288 @mulPv256x32(i32* %r4, i32 %r31)
-%r33 = add i288 %r29, %r32
-%r34 = lshr i288 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i288 @mulPv256x32(i32* %r2, i32 %r37)
-%r39 = add i288 %r34, %r38
-%r40 = trunc i288 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i288 @mulPv256x32(i32* %r4, i32 %r41)
-%r43 = add i288 %r39, %r42
-%r44 = lshr i288 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i288 @mulPv256x32(i32* %r2, i32 %r47)
-%r49 = add i288 %r44, %r48
-%r50 = trunc i288 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i288 @mulPv256x32(i32* %r4, i32 %r51)
-%r53 = add i288 %r49, %r52
-%r54 = lshr i288 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i288 @mulPv256x32(i32* %r2, i32 %r57)
-%r59 = add i288 %r54, %r58
-%r60 = trunc i288 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i288 @mulPv256x32(i32* %r4, i32 %r61)
-%r63 = add i288 %r59, %r62
-%r64 = lshr i288 %r63, 32
-%r66 = getelementptr i32, i32* %r3, i32 6
-%r67 = load i32, i32* %r66
-%r68 = call i288 @mulPv256x32(i32* %r2, i32 %r67)
-%r69 = add i288 %r64, %r68
-%r70 = trunc i288 %r69 to i32
-%r71 = mul i32 %r70, %r7
-%r72 = call i288 @mulPv256x32(i32* %r4, i32 %r71)
-%r73 = add i288 %r69, %r72
-%r74 = lshr i288 %r73, 32
-%r76 = getelementptr i32, i32* %r3, i32 7
-%r77 = load i32, i32* %r76
-%r78 = call i288 @mulPv256x32(i32* %r2, i32 %r77)
-%r79 = add i288 %r74, %r78
-%r80 = trunc i288 %r79 to i32
-%r81 = mul i32 %r80, %r7
-%r82 = call i288 @mulPv256x32(i32* %r4, i32 %r81)
-%r83 = add i288 %r79, %r82
-%r84 = lshr i288 %r83, 32
-%r85 = trunc i288 %r84 to i256
-%r86 = load i32, i32* %r4
-%r87 = zext i32 %r86 to i64
-%r89 = getelementptr i32, i32* %r4, i32 1
-%r90 = load i32, i32* %r89
-%r91 = zext i32 %r90 to i64
-%r92 = shl i64 %r91, 32
-%r93 = or i64 %r87, %r92
-%r94 = zext i64 %r93 to i96
-%r96 = getelementptr i32, i32* %r4, i32 2
-%r97 = load i32, i32* %r96
-%r98 = zext i32 %r97 to i96
-%r99 = shl i96 %r98, 64
-%r100 = or i96 %r94, %r99
-%r101 = zext i96 %r100 to i128
-%r103 = getelementptr i32, i32* %r4, i32 3
-%r104 = load i32, i32* %r103
-%r105 = zext i32 %r104 to i128
-%r106 = shl i128 %r105, 96
-%r107 = or i128 %r101, %r106
-%r108 = zext i128 %r107 to i160
-%r110 = getelementptr i32, i32* %r4, i32 4
-%r111 = load i32, i32* %r110
-%r112 = zext i32 %r111 to i160
-%r113 = shl i160 %r112, 128
-%r114 = or i160 %r108, %r113
-%r115 = zext i160 %r114 to i192
-%r117 = getelementptr i32, i32* %r4, i32 5
-%r118 = load i32, i32* %r117
-%r119 = zext i32 %r118 to i192
-%r120 = shl i192 %r119, 160
-%r121 = or i192 %r115, %r120
-%r122 = zext i192 %r121 to i224
-%r124 = getelementptr i32, i32* %r4, i32 6
-%r125 = load i32, i32* %r124
-%r126 = zext i32 %r125 to i224
-%r127 = shl i224 %r126, 192
-%r128 = or i224 %r122, %r127
-%r129 = zext i224 %r128 to i256
-%r131 = getelementptr i32, i32* %r4, i32 7
-%r132 = load i32, i32* %r131
-%r133 = zext i32 %r132 to i256
-%r134 = shl i256 %r133, 224
-%r135 = or i256 %r129, %r134
-%r136 = sub i256 %r85, %r135
-%r137 = lshr i256 %r136, 255
-%r138 = trunc i256 %r137 to i1
-%r139 = select i1 %r138, i256 %r85, i256 %r136
-%r140 = trunc i256 %r139 to i32
-%r142 = getelementptr i32, i32* %r1, i32 0
-store i32 %r140, i32* %r142
-%r143 = lshr i256 %r139, 32
-%r144 = trunc i256 %r143 to i32
-%r146 = getelementptr i32, i32* %r1, i32 1
-store i32 %r144, i32* %r146
-%r147 = lshr i256 %r143, 32
-%r148 = trunc i256 %r147 to i32
-%r150 = getelementptr i32, i32* %r1, i32 2
-store i32 %r148, i32* %r150
-%r151 = lshr i256 %r147, 32
-%r152 = trunc i256 %r151 to i32
-%r154 = getelementptr i32, i32* %r1, i32 3
-store i32 %r152, i32* %r154
-%r155 = lshr i256 %r151, 32
-%r156 = trunc i256 %r155 to i32
-%r158 = getelementptr i32, i32* %r1, i32 4
-store i32 %r156, i32* %r158
-%r159 = lshr i256 %r155, 32
-%r160 = trunc i256 %r159 to i32
-%r162 = getelementptr i32, i32* %r1, i32 5
-store i32 %r160, i32* %r162
-%r163 = lshr i256 %r159, 32
-%r164 = trunc i256 %r163 to i32
-%r166 = getelementptr i32, i32* %r1, i32 6
-store i32 %r164, i32* %r166
-%r167 = lshr i256 %r163, 32
-%r168 = trunc i256 %r167 to i32
-%r170 = getelementptr i32, i32* %r1, i32 7
-store i32 %r168, i32* %r170
-ret void
-}
-define void @mcl_fp_montRed8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i224
-%r45 = getelementptr i32, i32* %r3, i32 6
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i224
-%r48 = shl i224 %r47, 192
-%r49 = or i224 %r43, %r48
-%r50 = zext i224 %r49 to i256
-%r52 = getelementptr i32, i32* %r3, i32 7
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i256
-%r55 = shl i256 %r54, 224
-%r56 = or i256 %r50, %r55
-%r57 = load i32, i32* %r2
-%r58 = zext i32 %r57 to i64
-%r60 = getelementptr i32, i32* %r2, i32 1
-%r61 = load i32, i32* %r60
-%r62 = zext i32 %r61 to i64
-%r63 = shl i64 %r62, 32
-%r64 = or i64 %r58, %r63
-%r65 = zext i64 %r64 to i96
-%r67 = getelementptr i32, i32* %r2, i32 2
-%r68 = load i32, i32* %r67
-%r69 = zext i32 %r68 to i96
-%r70 = shl i96 %r69, 64
-%r71 = or i96 %r65, %r70
-%r72 = zext i96 %r71 to i128
-%r74 = getelementptr i32, i32* %r2, i32 3
-%r75 = load i32, i32* %r74
-%r76 = zext i32 %r75 to i128
-%r77 = shl i128 %r76, 96
-%r78 = or i128 %r72, %r77
-%r79 = zext i128 %r78 to i160
-%r81 = getelementptr i32, i32* %r2, i32 4
-%r82 = load i32, i32* %r81
-%r83 = zext i32 %r82 to i160
-%r84 = shl i160 %r83, 128
-%r85 = or i160 %r79, %r84
-%r86 = zext i160 %r85 to i192
-%r88 = getelementptr i32, i32* %r2, i32 5
-%r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i192
-%r91 = shl i192 %r90, 160
-%r92 = or i192 %r86, %r91
-%r93 = zext i192 %r92 to i224
-%r95 = getelementptr i32, i32* %r2, i32 6
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i224
-%r98 = shl i224 %r97, 192
-%r99 = or i224 %r93, %r98
-%r100 = zext i224 %r99 to i256
-%r102 = getelementptr i32, i32* %r2, i32 7
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i256
-%r105 = shl i256 %r104, 224
-%r106 = or i256 %r100, %r105
-%r107 = zext i256 %r106 to i288
-%r109 = getelementptr i32, i32* %r2, i32 8
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i288
-%r112 = shl i288 %r111, 256
-%r113 = or i288 %r107, %r112
-%r114 = zext i288 %r113 to i320
-%r116 = getelementptr i32, i32* %r2, i32 9
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i320
-%r119 = shl i320 %r118, 288
-%r120 = or i320 %r114, %r119
-%r121 = zext i320 %r120 to i352
-%r123 = getelementptr i32, i32* %r2, i32 10
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i352
-%r126 = shl i352 %r125, 320
-%r127 = or i352 %r121, %r126
-%r128 = zext i352 %r127 to i384
-%r130 = getelementptr i32, i32* %r2, i32 11
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i384
-%r133 = shl i384 %r132, 352
-%r134 = or i384 %r128, %r133
-%r135 = zext i384 %r134 to i416
-%r137 = getelementptr i32, i32* %r2, i32 12
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i416
-%r140 = shl i416 %r139, 384
-%r141 = or i416 %r135, %r140
-%r142 = zext i416 %r141 to i448
-%r144 = getelementptr i32, i32* %r2, i32 13
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i448
-%r147 = shl i448 %r146, 416
-%r148 = or i448 %r142, %r147
-%r149 = zext i448 %r148 to i480
-%r151 = getelementptr i32, i32* %r2, i32 14
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i480
-%r154 = shl i480 %r153, 448
-%r155 = or i480 %r149, %r154
-%r156 = zext i480 %r155 to i512
-%r158 = getelementptr i32, i32* %r2, i32 15
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i512
-%r161 = shl i512 %r160, 480
-%r162 = or i512 %r156, %r161
-%r163 = zext i512 %r162 to i544
-%r164 = trunc i544 %r163 to i32
-%r165 = mul i32 %r164, %r6
-%r166 = call i288 @mulPv256x32(i32* %r3, i32 %r165)
-%r167 = zext i288 %r166 to i544
-%r168 = add i544 %r163, %r167
-%r169 = lshr i544 %r168, 32
-%r170 = trunc i544 %r169 to i512
-%r171 = trunc i512 %r170 to i32
-%r172 = mul i32 %r171, %r6
-%r173 = call i288 @mulPv256x32(i32* %r3, i32 %r172)
-%r174 = zext i288 %r173 to i512
-%r175 = add i512 %r170, %r174
-%r176 = lshr i512 %r175, 32
-%r177 = trunc i512 %r176 to i480
-%r178 = trunc i480 %r177 to i32
-%r179 = mul i32 %r178, %r6
-%r180 = call i288 @mulPv256x32(i32* %r3, i32 %r179)
-%r181 = zext i288 %r180 to i480
-%r182 = add i480 %r177, %r181
-%r183 = lshr i480 %r182, 32
-%r184 = trunc i480 %r183 to i448
-%r185 = trunc i448 %r184 to i32
-%r186 = mul i32 %r185, %r6
-%r187 = call i288 @mulPv256x32(i32* %r3, i32 %r186)
-%r188 = zext i288 %r187 to i448
-%r189 = add i448 %r184, %r188
-%r190 = lshr i448 %r189, 32
-%r191 = trunc i448 %r190 to i416
-%r192 = trunc i416 %r191 to i32
-%r193 = mul i32 %r192, %r6
-%r194 = call i288 @mulPv256x32(i32* %r3, i32 %r193)
-%r195 = zext i288 %r194 to i416
-%r196 = add i416 %r191, %r195
-%r197 = lshr i416 %r196, 32
-%r198 = trunc i416 %r197 to i384
-%r199 = trunc i384 %r198 to i32
-%r200 = mul i32 %r199, %r6
-%r201 = call i288 @mulPv256x32(i32* %r3, i32 %r200)
-%r202 = zext i288 %r201 to i384
-%r203 = add i384 %r198, %r202
-%r204 = lshr i384 %r203, 32
-%r205 = trunc i384 %r204 to i352
-%r206 = trunc i352 %r205 to i32
-%r207 = mul i32 %r206, %r6
-%r208 = call i288 @mulPv256x32(i32* %r3, i32 %r207)
-%r209 = zext i288 %r208 to i352
-%r210 = add i352 %r205, %r209
-%r211 = lshr i352 %r210, 32
-%r212 = trunc i352 %r211 to i320
-%r213 = trunc i320 %r212 to i32
-%r214 = mul i32 %r213, %r6
-%r215 = call i288 @mulPv256x32(i32* %r3, i32 %r214)
-%r216 = zext i288 %r215 to i320
-%r217 = add i320 %r212, %r216
-%r218 = lshr i320 %r217, 32
-%r219 = trunc i320 %r218 to i288
-%r220 = zext i256 %r56 to i288
-%r221 = sub i288 %r219, %r220
-%r222 = lshr i288 %r221, 256
-%r223 = trunc i288 %r222 to i1
-%r224 = select i1 %r223, i288 %r219, i288 %r221
-%r225 = trunc i288 %r224 to i256
-%r226 = trunc i256 %r225 to i32
-%r228 = getelementptr i32, i32* %r1, i32 0
-store i32 %r226, i32* %r228
-%r229 = lshr i256 %r225, 32
-%r230 = trunc i256 %r229 to i32
-%r232 = getelementptr i32, i32* %r1, i32 1
-store i32 %r230, i32* %r232
-%r233 = lshr i256 %r229, 32
-%r234 = trunc i256 %r233 to i32
-%r236 = getelementptr i32, i32* %r1, i32 2
-store i32 %r234, i32* %r236
-%r237 = lshr i256 %r233, 32
-%r238 = trunc i256 %r237 to i32
-%r240 = getelementptr i32, i32* %r1, i32 3
-store i32 %r238, i32* %r240
-%r241 = lshr i256 %r237, 32
-%r242 = trunc i256 %r241 to i32
-%r244 = getelementptr i32, i32* %r1, i32 4
-store i32 %r242, i32* %r244
-%r245 = lshr i256 %r241, 32
-%r246 = trunc i256 %r245 to i32
-%r248 = getelementptr i32, i32* %r1, i32 5
-store i32 %r246, i32* %r248
-%r249 = lshr i256 %r245, 32
-%r250 = trunc i256 %r249 to i32
-%r252 = getelementptr i32, i32* %r1, i32 6
-store i32 %r250, i32* %r252
-%r253 = lshr i256 %r249, 32
-%r254 = trunc i256 %r253 to i32
-%r256 = getelementptr i32, i32* %r1, i32 7
-store i32 %r254, i32* %r256
-ret void
-}
-define i32 @mcl_fp_addPre8L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r56 = load i32, i32* %r4
-%r57 = zext i32 %r56 to i64
-%r59 = getelementptr i32, i32* %r4, i32 1
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i64
-%r62 = shl i64 %r61, 32
-%r63 = or i64 %r57, %r62
-%r64 = zext i64 %r63 to i96
-%r66 = getelementptr i32, i32* %r4, i32 2
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i96
-%r69 = shl i96 %r68, 64
-%r70 = or i96 %r64, %r69
-%r71 = zext i96 %r70 to i128
-%r73 = getelementptr i32, i32* %r4, i32 3
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i128
-%r76 = shl i128 %r75, 96
-%r77 = or i128 %r71, %r76
-%r78 = zext i128 %r77 to i160
-%r80 = getelementptr i32, i32* %r4, i32 4
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i160
-%r83 = shl i160 %r82, 128
-%r84 = or i160 %r78, %r83
-%r85 = zext i160 %r84 to i192
-%r87 = getelementptr i32, i32* %r4, i32 5
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i192
-%r90 = shl i192 %r89, 160
-%r91 = or i192 %r85, %r90
-%r92 = zext i192 %r91 to i224
-%r94 = getelementptr i32, i32* %r4, i32 6
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i224
-%r97 = shl i224 %r96, 192
-%r98 = or i224 %r92, %r97
-%r99 = zext i224 %r98 to i256
-%r101 = getelementptr i32, i32* %r4, i32 7
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i256
-%r104 = shl i256 %r103, 224
-%r105 = or i256 %r99, %r104
-%r106 = zext i256 %r105 to i288
-%r107 = add i288 %r55, %r106
-%r108 = trunc i288 %r107 to i256
-%r109 = trunc i256 %r108 to i32
-%r111 = getelementptr i32, i32* %r2, i32 0
-store i32 %r109, i32* %r111
-%r112 = lshr i256 %r108, 32
-%r113 = trunc i256 %r112 to i32
-%r115 = getelementptr i32, i32* %r2, i32 1
-store i32 %r113, i32* %r115
-%r116 = lshr i256 %r112, 32
-%r117 = trunc i256 %r116 to i32
-%r119 = getelementptr i32, i32* %r2, i32 2
-store i32 %r117, i32* %r119
-%r120 = lshr i256 %r116, 32
-%r121 = trunc i256 %r120 to i32
-%r123 = getelementptr i32, i32* %r2, i32 3
-store i32 %r121, i32* %r123
-%r124 = lshr i256 %r120, 32
-%r125 = trunc i256 %r124 to i32
-%r127 = getelementptr i32, i32* %r2, i32 4
-store i32 %r125, i32* %r127
-%r128 = lshr i256 %r124, 32
-%r129 = trunc i256 %r128 to i32
-%r131 = getelementptr i32, i32* %r2, i32 5
-store i32 %r129, i32* %r131
-%r132 = lshr i256 %r128, 32
-%r133 = trunc i256 %r132 to i32
-%r135 = getelementptr i32, i32* %r2, i32 6
-store i32 %r133, i32* %r135
-%r136 = lshr i256 %r132, 32
-%r137 = trunc i256 %r136 to i32
-%r139 = getelementptr i32, i32* %r2, i32 7
-store i32 %r137, i32* %r139
-%r140 = lshr i288 %r107, 256
-%r141 = trunc i288 %r140 to i32
-ret i32 %r141
-}
-define i32 @mcl_fp_subPre8L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r56 = load i32, i32* %r4
-%r57 = zext i32 %r56 to i64
-%r59 = getelementptr i32, i32* %r4, i32 1
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i64
-%r62 = shl i64 %r61, 32
-%r63 = or i64 %r57, %r62
-%r64 = zext i64 %r63 to i96
-%r66 = getelementptr i32, i32* %r4, i32 2
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i96
-%r69 = shl i96 %r68, 64
-%r70 = or i96 %r64, %r69
-%r71 = zext i96 %r70 to i128
-%r73 = getelementptr i32, i32* %r4, i32 3
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i128
-%r76 = shl i128 %r75, 96
-%r77 = or i128 %r71, %r76
-%r78 = zext i128 %r77 to i160
-%r80 = getelementptr i32, i32* %r4, i32 4
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i160
-%r83 = shl i160 %r82, 128
-%r84 = or i160 %r78, %r83
-%r85 = zext i160 %r84 to i192
-%r87 = getelementptr i32, i32* %r4, i32 5
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i192
-%r90 = shl i192 %r89, 160
-%r91 = or i192 %r85, %r90
-%r92 = zext i192 %r91 to i224
-%r94 = getelementptr i32, i32* %r4, i32 6
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i224
-%r97 = shl i224 %r96, 192
-%r98 = or i224 %r92, %r97
-%r99 = zext i224 %r98 to i256
-%r101 = getelementptr i32, i32* %r4, i32 7
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i256
-%r104 = shl i256 %r103, 224
-%r105 = or i256 %r99, %r104
-%r106 = zext i256 %r105 to i288
-%r107 = sub i288 %r55, %r106
-%r108 = trunc i288 %r107 to i256
-%r109 = trunc i256 %r108 to i32
-%r111 = getelementptr i32, i32* %r2, i32 0
-store i32 %r109, i32* %r111
-%r112 = lshr i256 %r108, 32
-%r113 = trunc i256 %r112 to i32
-%r115 = getelementptr i32, i32* %r2, i32 1
-store i32 %r113, i32* %r115
-%r116 = lshr i256 %r112, 32
-%r117 = trunc i256 %r116 to i32
-%r119 = getelementptr i32, i32* %r2, i32 2
-store i32 %r117, i32* %r119
-%r120 = lshr i256 %r116, 32
-%r121 = trunc i256 %r120 to i32
-%r123 = getelementptr i32, i32* %r2, i32 3
-store i32 %r121, i32* %r123
-%r124 = lshr i256 %r120, 32
-%r125 = trunc i256 %r124 to i32
-%r127 = getelementptr i32, i32* %r2, i32 4
-store i32 %r125, i32* %r127
-%r128 = lshr i256 %r124, 32
-%r129 = trunc i256 %r128 to i32
-%r131 = getelementptr i32, i32* %r2, i32 5
-store i32 %r129, i32* %r131
-%r132 = lshr i256 %r128, 32
-%r133 = trunc i256 %r132 to i32
-%r135 = getelementptr i32, i32* %r2, i32 6
-store i32 %r133, i32* %r135
-%r136 = lshr i256 %r132, 32
-%r137 = trunc i256 %r136 to i32
-%r139 = getelementptr i32, i32* %r2, i32 7
-store i32 %r137, i32* %r139
-%r140 = lshr i288 %r107, 256
-%r141 = trunc i288 %r140 to i32
-%r143 = and i32 %r141, 1
-ret i32 %r143
-}
-define void @mcl_fp_shr1_8L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = zext i160 %r31 to i192
-%r34 = getelementptr i32, i32* %r2, i32 5
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i192
-%r37 = shl i192 %r36, 160
-%r38 = or i192 %r32, %r37
-%r39 = zext i192 %r38 to i224
-%r41 = getelementptr i32, i32* %r2, i32 6
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i224
-%r44 = shl i224 %r43, 192
-%r45 = or i224 %r39, %r44
-%r46 = zext i224 %r45 to i256
-%r48 = getelementptr i32, i32* %r2, i32 7
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i256
-%r51 = shl i256 %r50, 224
-%r52 = or i256 %r46, %r51
-%r53 = lshr i256 %r52, 1
-%r54 = trunc i256 %r53 to i32
-%r56 = getelementptr i32, i32* %r1, i32 0
-store i32 %r54, i32* %r56
-%r57 = lshr i256 %r53, 32
-%r58 = trunc i256 %r57 to i32
-%r60 = getelementptr i32, i32* %r1, i32 1
-store i32 %r58, i32* %r60
-%r61 = lshr i256 %r57, 32
-%r62 = trunc i256 %r61 to i32
-%r64 = getelementptr i32, i32* %r1, i32 2
-store i32 %r62, i32* %r64
-%r65 = lshr i256 %r61, 32
-%r66 = trunc i256 %r65 to i32
-%r68 = getelementptr i32, i32* %r1, i32 3
-store i32 %r66, i32* %r68
-%r69 = lshr i256 %r65, 32
-%r70 = trunc i256 %r69 to i32
-%r72 = getelementptr i32, i32* %r1, i32 4
-store i32 %r70, i32* %r72
-%r73 = lshr i256 %r69, 32
-%r74 = trunc i256 %r73 to i32
-%r76 = getelementptr i32, i32* %r1, i32 5
-store i32 %r74, i32* %r76
-%r77 = lshr i256 %r73, 32
-%r78 = trunc i256 %r77 to i32
-%r80 = getelementptr i32, i32* %r1, i32 6
-store i32 %r78, i32* %r80
-%r81 = lshr i256 %r77, 32
-%r82 = trunc i256 %r81 to i32
-%r84 = getelementptr i32, i32* %r1, i32 7
-store i32 %r82, i32* %r84
-ret void
-}
-define void @mcl_fp_add8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = load i32, i32* %r3
-%r56 = zext i32 %r55 to i64
-%r58 = getelementptr i32, i32* %r3, i32 1
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i64
-%r61 = shl i64 %r60, 32
-%r62 = or i64 %r56, %r61
-%r63 = zext i64 %r62 to i96
-%r65 = getelementptr i32, i32* %r3, i32 2
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i96
-%r68 = shl i96 %r67, 64
-%r69 = or i96 %r63, %r68
-%r70 = zext i96 %r69 to i128
-%r72 = getelementptr i32, i32* %r3, i32 3
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i128
-%r75 = shl i128 %r74, 96
-%r76 = or i128 %r70, %r75
-%r77 = zext i128 %r76 to i160
-%r79 = getelementptr i32, i32* %r3, i32 4
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i160
-%r82 = shl i160 %r81, 128
-%r83 = or i160 %r77, %r82
-%r84 = zext i160 %r83 to i192
-%r86 = getelementptr i32, i32* %r3, i32 5
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i192
-%r89 = shl i192 %r88, 160
-%r90 = or i192 %r84, %r89
-%r91 = zext i192 %r90 to i224
-%r93 = getelementptr i32, i32* %r3, i32 6
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i224
-%r96 = shl i224 %r95, 192
-%r97 = or i224 %r91, %r96
-%r98 = zext i224 %r97 to i256
-%r100 = getelementptr i32, i32* %r3, i32 7
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i256
-%r103 = shl i256 %r102, 224
-%r104 = or i256 %r98, %r103
-%r105 = zext i256 %r54 to i288
-%r106 = zext i256 %r104 to i288
-%r107 = add i288 %r105, %r106
-%r108 = trunc i288 %r107 to i256
-%r109 = trunc i256 %r108 to i32
-%r111 = getelementptr i32, i32* %r1, i32 0
-store i32 %r109, i32* %r111
-%r112 = lshr i256 %r108, 32
-%r113 = trunc i256 %r112 to i32
-%r115 = getelementptr i32, i32* %r1, i32 1
-store i32 %r113, i32* %r115
-%r116 = lshr i256 %r112, 32
-%r117 = trunc i256 %r116 to i32
-%r119 = getelementptr i32, i32* %r1, i32 2
-store i32 %r117, i32* %r119
-%r120 = lshr i256 %r116, 32
-%r121 = trunc i256 %r120 to i32
-%r123 = getelementptr i32, i32* %r1, i32 3
-store i32 %r121, i32* %r123
-%r124 = lshr i256 %r120, 32
-%r125 = trunc i256 %r124 to i32
-%r127 = getelementptr i32, i32* %r1, i32 4
-store i32 %r125, i32* %r127
-%r128 = lshr i256 %r124, 32
-%r129 = trunc i256 %r128 to i32
-%r131 = getelementptr i32, i32* %r1, i32 5
-store i32 %r129, i32* %r131
-%r132 = lshr i256 %r128, 32
-%r133 = trunc i256 %r132 to i32
-%r135 = getelementptr i32, i32* %r1, i32 6
-store i32 %r133, i32* %r135
-%r136 = lshr i256 %r132, 32
-%r137 = trunc i256 %r136 to i32
-%r139 = getelementptr i32, i32* %r1, i32 7
-store i32 %r137, i32* %r139
-%r140 = load i32, i32* %r4
-%r141 = zext i32 %r140 to i64
-%r143 = getelementptr i32, i32* %r4, i32 1
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i64
-%r146 = shl i64 %r145, 32
-%r147 = or i64 %r141, %r146
-%r148 = zext i64 %r147 to i96
-%r150 = getelementptr i32, i32* %r4, i32 2
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i96
-%r153 = shl i96 %r152, 64
-%r154 = or i96 %r148, %r153
-%r155 = zext i96 %r154 to i128
-%r157 = getelementptr i32, i32* %r4, i32 3
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i128
-%r160 = shl i128 %r159, 96
-%r161 = or i128 %r155, %r160
-%r162 = zext i128 %r161 to i160
-%r164 = getelementptr i32, i32* %r4, i32 4
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i160
-%r167 = shl i160 %r166, 128
-%r168 = or i160 %r162, %r167
-%r169 = zext i160 %r168 to i192
-%r171 = getelementptr i32, i32* %r4, i32 5
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i192
-%r174 = shl i192 %r173, 160
-%r175 = or i192 %r169, %r174
-%r176 = zext i192 %r175 to i224
-%r178 = getelementptr i32, i32* %r4, i32 6
-%r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i224
-%r181 = shl i224 %r180, 192
-%r182 = or i224 %r176, %r181
-%r183 = zext i224 %r182 to i256
-%r185 = getelementptr i32, i32* %r4, i32 7
-%r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i256
-%r188 = shl i256 %r187, 224
-%r189 = or i256 %r183, %r188
-%r190 = zext i256 %r189 to i288
-%r191 = sub i288 %r107, %r190
-%r192 = lshr i288 %r191, 256
-%r193 = trunc i288 %r192 to i1
-br i1%r193, label %carry, label %nocarry
-nocarry:
-%r194 = trunc i288 %r191 to i256
-%r195 = trunc i256 %r194 to i32
-%r197 = getelementptr i32, i32* %r1, i32 0
-store i32 %r195, i32* %r197
-%r198 = lshr i256 %r194, 32
-%r199 = trunc i256 %r198 to i32
-%r201 = getelementptr i32, i32* %r1, i32 1
-store i32 %r199, i32* %r201
-%r202 = lshr i256 %r198, 32
-%r203 = trunc i256 %r202 to i32
-%r205 = getelementptr i32, i32* %r1, i32 2
-store i32 %r203, i32* %r205
-%r206 = lshr i256 %r202, 32
-%r207 = trunc i256 %r206 to i32
-%r209 = getelementptr i32, i32* %r1, i32 3
-store i32 %r207, i32* %r209
-%r210 = lshr i256 %r206, 32
-%r211 = trunc i256 %r210 to i32
-%r213 = getelementptr i32, i32* %r1, i32 4
-store i32 %r211, i32* %r213
-%r214 = lshr i256 %r210, 32
-%r215 = trunc i256 %r214 to i32
-%r217 = getelementptr i32, i32* %r1, i32 5
-store i32 %r215, i32* %r217
-%r218 = lshr i256 %r214, 32
-%r219 = trunc i256 %r218 to i32
-%r221 = getelementptr i32, i32* %r1, i32 6
-store i32 %r219, i32* %r221
-%r222 = lshr i256 %r218, 32
-%r223 = trunc i256 %r222 to i32
-%r225 = getelementptr i32, i32* %r1, i32 7
-store i32 %r223, i32* %r225
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = load i32, i32* %r3
-%r56 = zext i32 %r55 to i64
-%r58 = getelementptr i32, i32* %r3, i32 1
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i64
-%r61 = shl i64 %r60, 32
-%r62 = or i64 %r56, %r61
-%r63 = zext i64 %r62 to i96
-%r65 = getelementptr i32, i32* %r3, i32 2
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i96
-%r68 = shl i96 %r67, 64
-%r69 = or i96 %r63, %r68
-%r70 = zext i96 %r69 to i128
-%r72 = getelementptr i32, i32* %r3, i32 3
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i128
-%r75 = shl i128 %r74, 96
-%r76 = or i128 %r70, %r75
-%r77 = zext i128 %r76 to i160
-%r79 = getelementptr i32, i32* %r3, i32 4
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i160
-%r82 = shl i160 %r81, 128
-%r83 = or i160 %r77, %r82
-%r84 = zext i160 %r83 to i192
-%r86 = getelementptr i32, i32* %r3, i32 5
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i192
-%r89 = shl i192 %r88, 160
-%r90 = or i192 %r84, %r89
-%r91 = zext i192 %r90 to i224
-%r93 = getelementptr i32, i32* %r3, i32 6
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i224
-%r96 = shl i224 %r95, 192
-%r97 = or i224 %r91, %r96
-%r98 = zext i224 %r97 to i256
-%r100 = getelementptr i32, i32* %r3, i32 7
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i256
-%r103 = shl i256 %r102, 224
-%r104 = or i256 %r98, %r103
-%r105 = add i256 %r54, %r104
-%r106 = load i32, i32* %r4
-%r107 = zext i32 %r106 to i64
-%r109 = getelementptr i32, i32* %r4, i32 1
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i64
-%r112 = shl i64 %r111, 32
-%r113 = or i64 %r107, %r112
-%r114 = zext i64 %r113 to i96
-%r116 = getelementptr i32, i32* %r4, i32 2
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i96
-%r119 = shl i96 %r118, 64
-%r120 = or i96 %r114, %r119
-%r121 = zext i96 %r120 to i128
-%r123 = getelementptr i32, i32* %r4, i32 3
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i128
-%r126 = shl i128 %r125, 96
-%r127 = or i128 %r121, %r126
-%r128 = zext i128 %r127 to i160
-%r130 = getelementptr i32, i32* %r4, i32 4
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i160
-%r133 = shl i160 %r132, 128
-%r134 = or i160 %r128, %r133
-%r135 = zext i160 %r134 to i192
-%r137 = getelementptr i32, i32* %r4, i32 5
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i192
-%r140 = shl i192 %r139, 160
-%r141 = or i192 %r135, %r140
-%r142 = zext i192 %r141 to i224
-%r144 = getelementptr i32, i32* %r4, i32 6
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i224
-%r147 = shl i224 %r146, 192
-%r148 = or i224 %r142, %r147
-%r149 = zext i224 %r148 to i256
-%r151 = getelementptr i32, i32* %r4, i32 7
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i256
-%r154 = shl i256 %r153, 224
-%r155 = or i256 %r149, %r154
-%r156 = sub i256 %r105, %r155
-%r157 = lshr i256 %r156, 255
-%r158 = trunc i256 %r157 to i1
-%r159 = select i1 %r158, i256 %r105, i256 %r156
-%r160 = trunc i256 %r159 to i32
-%r162 = getelementptr i32, i32* %r1, i32 0
-store i32 %r160, i32* %r162
-%r163 = lshr i256 %r159, 32
-%r164 = trunc i256 %r163 to i32
-%r166 = getelementptr i32, i32* %r1, i32 1
-store i32 %r164, i32* %r166
-%r167 = lshr i256 %r163, 32
-%r168 = trunc i256 %r167 to i32
-%r170 = getelementptr i32, i32* %r1, i32 2
-store i32 %r168, i32* %r170
-%r171 = lshr i256 %r167, 32
-%r172 = trunc i256 %r171 to i32
-%r174 = getelementptr i32, i32* %r1, i32 3
-store i32 %r172, i32* %r174
-%r175 = lshr i256 %r171, 32
-%r176 = trunc i256 %r175 to i32
-%r178 = getelementptr i32, i32* %r1, i32 4
-store i32 %r176, i32* %r178
-%r179 = lshr i256 %r175, 32
-%r180 = trunc i256 %r179 to i32
-%r182 = getelementptr i32, i32* %r1, i32 5
-store i32 %r180, i32* %r182
-%r183 = lshr i256 %r179, 32
-%r184 = trunc i256 %r183 to i32
-%r186 = getelementptr i32, i32* %r1, i32 6
-store i32 %r184, i32* %r186
-%r187 = lshr i256 %r183, 32
-%r188 = trunc i256 %r187 to i32
-%r190 = getelementptr i32, i32* %r1, i32 7
-store i32 %r188, i32* %r190
-ret void
-}
-define void @mcl_fp_sub8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = load i32, i32* %r3
-%r56 = zext i32 %r55 to i64
-%r58 = getelementptr i32, i32* %r3, i32 1
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i64
-%r61 = shl i64 %r60, 32
-%r62 = or i64 %r56, %r61
-%r63 = zext i64 %r62 to i96
-%r65 = getelementptr i32, i32* %r3, i32 2
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i96
-%r68 = shl i96 %r67, 64
-%r69 = or i96 %r63, %r68
-%r70 = zext i96 %r69 to i128
-%r72 = getelementptr i32, i32* %r3, i32 3
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i128
-%r75 = shl i128 %r74, 96
-%r76 = or i128 %r70, %r75
-%r77 = zext i128 %r76 to i160
-%r79 = getelementptr i32, i32* %r3, i32 4
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i160
-%r82 = shl i160 %r81, 128
-%r83 = or i160 %r77, %r82
-%r84 = zext i160 %r83 to i192
-%r86 = getelementptr i32, i32* %r3, i32 5
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i192
-%r89 = shl i192 %r88, 160
-%r90 = or i192 %r84, %r89
-%r91 = zext i192 %r90 to i224
-%r93 = getelementptr i32, i32* %r3, i32 6
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i224
-%r96 = shl i224 %r95, 192
-%r97 = or i224 %r91, %r96
-%r98 = zext i224 %r97 to i256
-%r100 = getelementptr i32, i32* %r3, i32 7
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i256
-%r103 = shl i256 %r102, 224
-%r104 = or i256 %r98, %r103
-%r105 = zext i256 %r54 to i288
-%r106 = zext i256 %r104 to i288
-%r107 = sub i288 %r105, %r106
-%r108 = trunc i288 %r107 to i256
-%r109 = lshr i288 %r107, 256
-%r110 = trunc i288 %r109 to i1
-%r111 = trunc i256 %r108 to i32
-%r113 = getelementptr i32, i32* %r1, i32 0
-store i32 %r111, i32* %r113
-%r114 = lshr i256 %r108, 32
-%r115 = trunc i256 %r114 to i32
-%r117 = getelementptr i32, i32* %r1, i32 1
-store i32 %r115, i32* %r117
-%r118 = lshr i256 %r114, 32
-%r119 = trunc i256 %r118 to i32
-%r121 = getelementptr i32, i32* %r1, i32 2
-store i32 %r119, i32* %r121
-%r122 = lshr i256 %r118, 32
-%r123 = trunc i256 %r122 to i32
-%r125 = getelementptr i32, i32* %r1, i32 3
-store i32 %r123, i32* %r125
-%r126 = lshr i256 %r122, 32
-%r127 = trunc i256 %r126 to i32
-%r129 = getelementptr i32, i32* %r1, i32 4
-store i32 %r127, i32* %r129
-%r130 = lshr i256 %r126, 32
-%r131 = trunc i256 %r130 to i32
-%r133 = getelementptr i32, i32* %r1, i32 5
-store i32 %r131, i32* %r133
-%r134 = lshr i256 %r130, 32
-%r135 = trunc i256 %r134 to i32
-%r137 = getelementptr i32, i32* %r1, i32 6
-store i32 %r135, i32* %r137
-%r138 = lshr i256 %r134, 32
-%r139 = trunc i256 %r138 to i32
-%r141 = getelementptr i32, i32* %r1, i32 7
-store i32 %r139, i32* %r141
-br i1%r110, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r142 = load i32, i32* %r4
-%r143 = zext i32 %r142 to i64
-%r145 = getelementptr i32, i32* %r4, i32 1
-%r146 = load i32, i32* %r145
-%r147 = zext i32 %r146 to i64
-%r148 = shl i64 %r147, 32
-%r149 = or i64 %r143, %r148
-%r150 = zext i64 %r149 to i96
-%r152 = getelementptr i32, i32* %r4, i32 2
-%r153 = load i32, i32* %r152
-%r154 = zext i32 %r153 to i96
-%r155 = shl i96 %r154, 64
-%r156 = or i96 %r150, %r155
-%r157 = zext i96 %r156 to i128
-%r159 = getelementptr i32, i32* %r4, i32 3
-%r160 = load i32, i32* %r159
-%r161 = zext i32 %r160 to i128
-%r162 = shl i128 %r161, 96
-%r163 = or i128 %r157, %r162
-%r164 = zext i128 %r163 to i160
-%r166 = getelementptr i32, i32* %r4, i32 4
-%r167 = load i32, i32* %r166
-%r168 = zext i32 %r167 to i160
-%r169 = shl i160 %r168, 128
-%r170 = or i160 %r164, %r169
-%r171 = zext i160 %r170 to i192
-%r173 = getelementptr i32, i32* %r4, i32 5
-%r174 = load i32, i32* %r173
-%r175 = zext i32 %r174 to i192
-%r176 = shl i192 %r175, 160
-%r177 = or i192 %r171, %r176
-%r178 = zext i192 %r177 to i224
-%r180 = getelementptr i32, i32* %r4, i32 6
-%r181 = load i32, i32* %r180
-%r182 = zext i32 %r181 to i224
-%r183 = shl i224 %r182, 192
-%r184 = or i224 %r178, %r183
-%r185 = zext i224 %r184 to i256
-%r187 = getelementptr i32, i32* %r4, i32 7
-%r188 = load i32, i32* %r187
-%r189 = zext i32 %r188 to i256
-%r190 = shl i256 %r189, 224
-%r191 = or i256 %r185, %r190
-%r192 = add i256 %r108, %r191
-%r193 = trunc i256 %r192 to i32
-%r195 = getelementptr i32, i32* %r1, i32 0
-store i32 %r193, i32* %r195
-%r196 = lshr i256 %r192, 32
-%r197 = trunc i256 %r196 to i32
-%r199 = getelementptr i32, i32* %r1, i32 1
-store i32 %r197, i32* %r199
-%r200 = lshr i256 %r196, 32
-%r201 = trunc i256 %r200 to i32
-%r203 = getelementptr i32, i32* %r1, i32 2
-store i32 %r201, i32* %r203
-%r204 = lshr i256 %r200, 32
-%r205 = trunc i256 %r204 to i32
-%r207 = getelementptr i32, i32* %r1, i32 3
-store i32 %r205, i32* %r207
-%r208 = lshr i256 %r204, 32
-%r209 = trunc i256 %r208 to i32
-%r211 = getelementptr i32, i32* %r1, i32 4
-store i32 %r209, i32* %r211
-%r212 = lshr i256 %r208, 32
-%r213 = trunc i256 %r212 to i32
-%r215 = getelementptr i32, i32* %r1, i32 5
-store i32 %r213, i32* %r215
-%r216 = lshr i256 %r212, 32
-%r217 = trunc i256 %r216 to i32
-%r219 = getelementptr i32, i32* %r1, i32 6
-store i32 %r217, i32* %r219
-%r220 = lshr i256 %r216, 32
-%r221 = trunc i256 %r220 to i32
-%r223 = getelementptr i32, i32* %r1, i32 7
-store i32 %r221, i32* %r223
-ret void
-}
-define void @mcl_fp_subNF8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = load i32, i32* %r3
-%r56 = zext i32 %r55 to i64
-%r58 = getelementptr i32, i32* %r3, i32 1
-%r59 = load i32, i32* %r58
-%r60 = zext i32 %r59 to i64
-%r61 = shl i64 %r60, 32
-%r62 = or i64 %r56, %r61
-%r63 = zext i64 %r62 to i96
-%r65 = getelementptr i32, i32* %r3, i32 2
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i96
-%r68 = shl i96 %r67, 64
-%r69 = or i96 %r63, %r68
-%r70 = zext i96 %r69 to i128
-%r72 = getelementptr i32, i32* %r3, i32 3
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i128
-%r75 = shl i128 %r74, 96
-%r76 = or i128 %r70, %r75
-%r77 = zext i128 %r76 to i160
-%r79 = getelementptr i32, i32* %r3, i32 4
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i160
-%r82 = shl i160 %r81, 128
-%r83 = or i160 %r77, %r82
-%r84 = zext i160 %r83 to i192
-%r86 = getelementptr i32, i32* %r3, i32 5
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i192
-%r89 = shl i192 %r88, 160
-%r90 = or i192 %r84, %r89
-%r91 = zext i192 %r90 to i224
-%r93 = getelementptr i32, i32* %r3, i32 6
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i224
-%r96 = shl i224 %r95, 192
-%r97 = or i224 %r91, %r96
-%r98 = zext i224 %r97 to i256
-%r100 = getelementptr i32, i32* %r3, i32 7
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i256
-%r103 = shl i256 %r102, 224
-%r104 = or i256 %r98, %r103
-%r105 = sub i256 %r54, %r104
-%r106 = lshr i256 %r105, 255
-%r107 = trunc i256 %r106 to i1
-%r108 = load i32, i32* %r4
-%r109 = zext i32 %r108 to i64
-%r111 = getelementptr i32, i32* %r4, i32 1
-%r112 = load i32, i32* %r111
-%r113 = zext i32 %r112 to i64
-%r114 = shl i64 %r113, 32
-%r115 = or i64 %r109, %r114
-%r116 = zext i64 %r115 to i96
-%r118 = getelementptr i32, i32* %r4, i32 2
-%r119 = load i32, i32* %r118
-%r120 = zext i32 %r119 to i96
-%r121 = shl i96 %r120, 64
-%r122 = or i96 %r116, %r121
-%r123 = zext i96 %r122 to i128
-%r125 = getelementptr i32, i32* %r4, i32 3
-%r126 = load i32, i32* %r125
-%r127 = zext i32 %r126 to i128
-%r128 = shl i128 %r127, 96
-%r129 = or i128 %r123, %r128
-%r130 = zext i128 %r129 to i160
-%r132 = getelementptr i32, i32* %r4, i32 4
-%r133 = load i32, i32* %r132
-%r134 = zext i32 %r133 to i160
-%r135 = shl i160 %r134, 128
-%r136 = or i160 %r130, %r135
-%r137 = zext i160 %r136 to i192
-%r139 = getelementptr i32, i32* %r4, i32 5
-%r140 = load i32, i32* %r139
-%r141 = zext i32 %r140 to i192
-%r142 = shl i192 %r141, 160
-%r143 = or i192 %r137, %r142
-%r144 = zext i192 %r143 to i224
-%r146 = getelementptr i32, i32* %r4, i32 6
-%r147 = load i32, i32* %r146
-%r148 = zext i32 %r147 to i224
-%r149 = shl i224 %r148, 192
-%r150 = or i224 %r144, %r149
-%r151 = zext i224 %r150 to i256
-%r153 = getelementptr i32, i32* %r4, i32 7
-%r154 = load i32, i32* %r153
-%r155 = zext i32 %r154 to i256
-%r156 = shl i256 %r155, 224
-%r157 = or i256 %r151, %r156
-%r159 = select i1 %r107, i256 %r157, i256 0
-%r160 = add i256 %r105, %r159
-%r161 = trunc i256 %r160 to i32
-%r163 = getelementptr i32, i32* %r1, i32 0
-store i32 %r161, i32* %r163
-%r164 = lshr i256 %r160, 32
-%r165 = trunc i256 %r164 to i32
-%r167 = getelementptr i32, i32* %r1, i32 1
-store i32 %r165, i32* %r167
-%r168 = lshr i256 %r164, 32
-%r169 = trunc i256 %r168 to i32
-%r171 = getelementptr i32, i32* %r1, i32 2
-store i32 %r169, i32* %r171
-%r172 = lshr i256 %r168, 32
-%r173 = trunc i256 %r172 to i32
-%r175 = getelementptr i32, i32* %r1, i32 3
-store i32 %r173, i32* %r175
-%r176 = lshr i256 %r172, 32
-%r177 = trunc i256 %r176 to i32
-%r179 = getelementptr i32, i32* %r1, i32 4
-store i32 %r177, i32* %r179
-%r180 = lshr i256 %r176, 32
-%r181 = trunc i256 %r180 to i32
-%r183 = getelementptr i32, i32* %r1, i32 5
-store i32 %r181, i32* %r183
-%r184 = lshr i256 %r180, 32
-%r185 = trunc i256 %r184 to i32
-%r187 = getelementptr i32, i32* %r1, i32 6
-store i32 %r185, i32* %r187
-%r188 = lshr i256 %r184, 32
-%r189 = trunc i256 %r188 to i32
-%r191 = getelementptr i32, i32* %r1, i32 7
-store i32 %r189, i32* %r191
-ret void
-}
-define void @mcl_fpDbl_add8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = load i32, i32* %r3
-%r112 = zext i32 %r111 to i64
-%r114 = getelementptr i32, i32* %r3, i32 1
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i64
-%r117 = shl i64 %r116, 32
-%r118 = or i64 %r112, %r117
-%r119 = zext i64 %r118 to i96
-%r121 = getelementptr i32, i32* %r3, i32 2
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i96
-%r124 = shl i96 %r123, 64
-%r125 = or i96 %r119, %r124
-%r126 = zext i96 %r125 to i128
-%r128 = getelementptr i32, i32* %r3, i32 3
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i128
-%r131 = shl i128 %r130, 96
-%r132 = or i128 %r126, %r131
-%r133 = zext i128 %r132 to i160
-%r135 = getelementptr i32, i32* %r3, i32 4
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i160
-%r138 = shl i160 %r137, 128
-%r139 = or i160 %r133, %r138
-%r140 = zext i160 %r139 to i192
-%r142 = getelementptr i32, i32* %r3, i32 5
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i192
-%r145 = shl i192 %r144, 160
-%r146 = or i192 %r140, %r145
-%r147 = zext i192 %r146 to i224
-%r149 = getelementptr i32, i32* %r3, i32 6
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i224
-%r152 = shl i224 %r151, 192
-%r153 = or i224 %r147, %r152
-%r154 = zext i224 %r153 to i256
-%r156 = getelementptr i32, i32* %r3, i32 7
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i256
-%r159 = shl i256 %r158, 224
-%r160 = or i256 %r154, %r159
-%r161 = zext i256 %r160 to i288
-%r163 = getelementptr i32, i32* %r3, i32 8
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i288
-%r166 = shl i288 %r165, 256
-%r167 = or i288 %r161, %r166
-%r168 = zext i288 %r167 to i320
-%r170 = getelementptr i32, i32* %r3, i32 9
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i320
-%r173 = shl i320 %r172, 288
-%r174 = or i320 %r168, %r173
-%r175 = zext i320 %r174 to i352
-%r177 = getelementptr i32, i32* %r3, i32 10
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i352
-%r180 = shl i352 %r179, 320
-%r181 = or i352 %r175, %r180
-%r182 = zext i352 %r181 to i384
-%r184 = getelementptr i32, i32* %r3, i32 11
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i384
-%r187 = shl i384 %r186, 352
-%r188 = or i384 %r182, %r187
-%r189 = zext i384 %r188 to i416
-%r191 = getelementptr i32, i32* %r3, i32 12
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i416
-%r194 = shl i416 %r193, 384
-%r195 = or i416 %r189, %r194
-%r196 = zext i416 %r195 to i448
-%r198 = getelementptr i32, i32* %r3, i32 13
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i448
-%r201 = shl i448 %r200, 416
-%r202 = or i448 %r196, %r201
-%r203 = zext i448 %r202 to i480
-%r205 = getelementptr i32, i32* %r3, i32 14
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i480
-%r208 = shl i480 %r207, 448
-%r209 = or i480 %r203, %r208
-%r210 = zext i480 %r209 to i512
-%r212 = getelementptr i32, i32* %r3, i32 15
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i512
-%r215 = shl i512 %r214, 480
-%r216 = or i512 %r210, %r215
-%r217 = zext i512 %r110 to i544
-%r218 = zext i512 %r216 to i544
-%r219 = add i544 %r217, %r218
-%r220 = trunc i544 %r219 to i256
-%r221 = trunc i256 %r220 to i32
-%r223 = getelementptr i32, i32* %r1, i32 0
-store i32 %r221, i32* %r223
-%r224 = lshr i256 %r220, 32
-%r225 = trunc i256 %r224 to i32
-%r227 = getelementptr i32, i32* %r1, i32 1
-store i32 %r225, i32* %r227
-%r228 = lshr i256 %r224, 32
-%r229 = trunc i256 %r228 to i32
-%r231 = getelementptr i32, i32* %r1, i32 2
-store i32 %r229, i32* %r231
-%r232 = lshr i256 %r228, 32
-%r233 = trunc i256 %r232 to i32
-%r235 = getelementptr i32, i32* %r1, i32 3
-store i32 %r233, i32* %r235
-%r236 = lshr i256 %r232, 32
-%r237 = trunc i256 %r236 to i32
-%r239 = getelementptr i32, i32* %r1, i32 4
-store i32 %r237, i32* %r239
-%r240 = lshr i256 %r236, 32
-%r241 = trunc i256 %r240 to i32
-%r243 = getelementptr i32, i32* %r1, i32 5
-store i32 %r241, i32* %r243
-%r244 = lshr i256 %r240, 32
-%r245 = trunc i256 %r244 to i32
-%r247 = getelementptr i32, i32* %r1, i32 6
-store i32 %r245, i32* %r247
-%r248 = lshr i256 %r244, 32
-%r249 = trunc i256 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 7
-store i32 %r249, i32* %r251
-%r252 = lshr i544 %r219, 256
-%r253 = trunc i544 %r252 to i288
-%r254 = load i32, i32* %r4
-%r255 = zext i32 %r254 to i64
-%r257 = getelementptr i32, i32* %r4, i32 1
-%r258 = load i32, i32* %r257
-%r259 = zext i32 %r258 to i64
-%r260 = shl i64 %r259, 32
-%r261 = or i64 %r255, %r260
-%r262 = zext i64 %r261 to i96
-%r264 = getelementptr i32, i32* %r4, i32 2
-%r265 = load i32, i32* %r264
-%r266 = zext i32 %r265 to i96
-%r267 = shl i96 %r266, 64
-%r268 = or i96 %r262, %r267
-%r269 = zext i96 %r268 to i128
-%r271 = getelementptr i32, i32* %r4, i32 3
-%r272 = load i32, i32* %r271
-%r273 = zext i32 %r272 to i128
-%r274 = shl i128 %r273, 96
-%r275 = or i128 %r269, %r274
-%r276 = zext i128 %r275 to i160
-%r278 = getelementptr i32, i32* %r4, i32 4
-%r279 = load i32, i32* %r278
-%r280 = zext i32 %r279 to i160
-%r281 = shl i160 %r280, 128
-%r282 = or i160 %r276, %r281
-%r283 = zext i160 %r282 to i192
-%r285 = getelementptr i32, i32* %r4, i32 5
-%r286 = load i32, i32* %r285
-%r287 = zext i32 %r286 to i192
-%r288 = shl i192 %r287, 160
-%r289 = or i192 %r283, %r288
-%r290 = zext i192 %r289 to i224
-%r292 = getelementptr i32, i32* %r4, i32 6
-%r293 = load i32, i32* %r292
-%r294 = zext i32 %r293 to i224
-%r295 = shl i224 %r294, 192
-%r296 = or i224 %r290, %r295
-%r297 = zext i224 %r296 to i256
-%r299 = getelementptr i32, i32* %r4, i32 7
-%r300 = load i32, i32* %r299
-%r301 = zext i32 %r300 to i256
-%r302 = shl i256 %r301, 224
-%r303 = or i256 %r297, %r302
-%r304 = zext i256 %r303 to i288
-%r305 = sub i288 %r253, %r304
-%r306 = lshr i288 %r305, 256
-%r307 = trunc i288 %r306 to i1
-%r308 = select i1 %r307, i288 %r253, i288 %r305
-%r309 = trunc i288 %r308 to i256
-%r311 = getelementptr i32, i32* %r1, i32 8
-%r312 = trunc i256 %r309 to i32
-%r314 = getelementptr i32, i32* %r311, i32 0
-store i32 %r312, i32* %r314
-%r315 = lshr i256 %r309, 32
-%r316 = trunc i256 %r315 to i32
-%r318 = getelementptr i32, i32* %r311, i32 1
-store i32 %r316, i32* %r318
-%r319 = lshr i256 %r315, 32
-%r320 = trunc i256 %r319 to i32
-%r322 = getelementptr i32, i32* %r311, i32 2
-store i32 %r320, i32* %r322
-%r323 = lshr i256 %r319, 32
-%r324 = trunc i256 %r323 to i32
-%r326 = getelementptr i32, i32* %r311, i32 3
-store i32 %r324, i32* %r326
-%r327 = lshr i256 %r323, 32
-%r328 = trunc i256 %r327 to i32
-%r330 = getelementptr i32, i32* %r311, i32 4
-store i32 %r328, i32* %r330
-%r331 = lshr i256 %r327, 32
-%r332 = trunc i256 %r331 to i32
-%r334 = getelementptr i32, i32* %r311, i32 5
-store i32 %r332, i32* %r334
-%r335 = lshr i256 %r331, 32
-%r336 = trunc i256 %r335 to i32
-%r338 = getelementptr i32, i32* %r311, i32 6
-store i32 %r336, i32* %r338
-%r339 = lshr i256 %r335, 32
-%r340 = trunc i256 %r339 to i32
-%r342 = getelementptr i32, i32* %r311, i32 7
-store i32 %r340, i32* %r342
-ret void
-}
-define void @mcl_fpDbl_sub8L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = load i32, i32* %r3
-%r112 = zext i32 %r111 to i64
-%r114 = getelementptr i32, i32* %r3, i32 1
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i64
-%r117 = shl i64 %r116, 32
-%r118 = or i64 %r112, %r117
-%r119 = zext i64 %r118 to i96
-%r121 = getelementptr i32, i32* %r3, i32 2
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i96
-%r124 = shl i96 %r123, 64
-%r125 = or i96 %r119, %r124
-%r126 = zext i96 %r125 to i128
-%r128 = getelementptr i32, i32* %r3, i32 3
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i128
-%r131 = shl i128 %r130, 96
-%r132 = or i128 %r126, %r131
-%r133 = zext i128 %r132 to i160
-%r135 = getelementptr i32, i32* %r3, i32 4
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i160
-%r138 = shl i160 %r137, 128
-%r139 = or i160 %r133, %r138
-%r140 = zext i160 %r139 to i192
-%r142 = getelementptr i32, i32* %r3, i32 5
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i192
-%r145 = shl i192 %r144, 160
-%r146 = or i192 %r140, %r145
-%r147 = zext i192 %r146 to i224
-%r149 = getelementptr i32, i32* %r3, i32 6
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i224
-%r152 = shl i224 %r151, 192
-%r153 = or i224 %r147, %r152
-%r154 = zext i224 %r153 to i256
-%r156 = getelementptr i32, i32* %r3, i32 7
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i256
-%r159 = shl i256 %r158, 224
-%r160 = or i256 %r154, %r159
-%r161 = zext i256 %r160 to i288
-%r163 = getelementptr i32, i32* %r3, i32 8
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i288
-%r166 = shl i288 %r165, 256
-%r167 = or i288 %r161, %r166
-%r168 = zext i288 %r167 to i320
-%r170 = getelementptr i32, i32* %r3, i32 9
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i320
-%r173 = shl i320 %r172, 288
-%r174 = or i320 %r168, %r173
-%r175 = zext i320 %r174 to i352
-%r177 = getelementptr i32, i32* %r3, i32 10
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i352
-%r180 = shl i352 %r179, 320
-%r181 = or i352 %r175, %r180
-%r182 = zext i352 %r181 to i384
-%r184 = getelementptr i32, i32* %r3, i32 11
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i384
-%r187 = shl i384 %r186, 352
-%r188 = or i384 %r182, %r187
-%r189 = zext i384 %r188 to i416
-%r191 = getelementptr i32, i32* %r3, i32 12
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i416
-%r194 = shl i416 %r193, 384
-%r195 = or i416 %r189, %r194
-%r196 = zext i416 %r195 to i448
-%r198 = getelementptr i32, i32* %r3, i32 13
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i448
-%r201 = shl i448 %r200, 416
-%r202 = or i448 %r196, %r201
-%r203 = zext i448 %r202 to i480
-%r205 = getelementptr i32, i32* %r3, i32 14
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i480
-%r208 = shl i480 %r207, 448
-%r209 = or i480 %r203, %r208
-%r210 = zext i480 %r209 to i512
-%r212 = getelementptr i32, i32* %r3, i32 15
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i512
-%r215 = shl i512 %r214, 480
-%r216 = or i512 %r210, %r215
-%r217 = zext i512 %r110 to i544
-%r218 = zext i512 %r216 to i544
-%r219 = sub i544 %r217, %r218
-%r220 = trunc i544 %r219 to i256
-%r221 = trunc i256 %r220 to i32
-%r223 = getelementptr i32, i32* %r1, i32 0
-store i32 %r221, i32* %r223
-%r224 = lshr i256 %r220, 32
-%r225 = trunc i256 %r224 to i32
-%r227 = getelementptr i32, i32* %r1, i32 1
-store i32 %r225, i32* %r227
-%r228 = lshr i256 %r224, 32
-%r229 = trunc i256 %r228 to i32
-%r231 = getelementptr i32, i32* %r1, i32 2
-store i32 %r229, i32* %r231
-%r232 = lshr i256 %r228, 32
-%r233 = trunc i256 %r232 to i32
-%r235 = getelementptr i32, i32* %r1, i32 3
-store i32 %r233, i32* %r235
-%r236 = lshr i256 %r232, 32
-%r237 = trunc i256 %r236 to i32
-%r239 = getelementptr i32, i32* %r1, i32 4
-store i32 %r237, i32* %r239
-%r240 = lshr i256 %r236, 32
-%r241 = trunc i256 %r240 to i32
-%r243 = getelementptr i32, i32* %r1, i32 5
-store i32 %r241, i32* %r243
-%r244 = lshr i256 %r240, 32
-%r245 = trunc i256 %r244 to i32
-%r247 = getelementptr i32, i32* %r1, i32 6
-store i32 %r245, i32* %r247
-%r248 = lshr i256 %r244, 32
-%r249 = trunc i256 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 7
-store i32 %r249, i32* %r251
-%r252 = lshr i544 %r219, 256
-%r253 = trunc i544 %r252 to i256
-%r254 = lshr i544 %r219, 512
-%r255 = trunc i544 %r254 to i1
-%r256 = load i32, i32* %r4
-%r257 = zext i32 %r256 to i64
-%r259 = getelementptr i32, i32* %r4, i32 1
-%r260 = load i32, i32* %r259
-%r261 = zext i32 %r260 to i64
-%r262 = shl i64 %r261, 32
-%r263 = or i64 %r257, %r262
-%r264 = zext i64 %r263 to i96
-%r266 = getelementptr i32, i32* %r4, i32 2
-%r267 = load i32, i32* %r266
-%r268 = zext i32 %r267 to i96
-%r269 = shl i96 %r268, 64
-%r270 = or i96 %r264, %r269
-%r271 = zext i96 %r270 to i128
-%r273 = getelementptr i32, i32* %r4, i32 3
-%r274 = load i32, i32* %r273
-%r275 = zext i32 %r274 to i128
-%r276 = shl i128 %r275, 96
-%r277 = or i128 %r271, %r276
-%r278 = zext i128 %r277 to i160
-%r280 = getelementptr i32, i32* %r4, i32 4
-%r281 = load i32, i32* %r280
-%r282 = zext i32 %r281 to i160
-%r283 = shl i160 %r282, 128
-%r284 = or i160 %r278, %r283
-%r285 = zext i160 %r284 to i192
-%r287 = getelementptr i32, i32* %r4, i32 5
-%r288 = load i32, i32* %r287
-%r289 = zext i32 %r288 to i192
-%r290 = shl i192 %r289, 160
-%r291 = or i192 %r285, %r290
-%r292 = zext i192 %r291 to i224
-%r294 = getelementptr i32, i32* %r4, i32 6
-%r295 = load i32, i32* %r294
-%r296 = zext i32 %r295 to i224
-%r297 = shl i224 %r296, 192
-%r298 = or i224 %r292, %r297
-%r299 = zext i224 %r298 to i256
-%r301 = getelementptr i32, i32* %r4, i32 7
-%r302 = load i32, i32* %r301
-%r303 = zext i32 %r302 to i256
-%r304 = shl i256 %r303, 224
-%r305 = or i256 %r299, %r304
-%r307 = select i1 %r255, i256 %r305, i256 0
-%r308 = add i256 %r253, %r307
-%r310 = getelementptr i32, i32* %r1, i32 8
-%r311 = trunc i256 %r308 to i32
-%r313 = getelementptr i32, i32* %r310, i32 0
-store i32 %r311, i32* %r313
-%r314 = lshr i256 %r308, 32
-%r315 = trunc i256 %r314 to i32
-%r317 = getelementptr i32, i32* %r310, i32 1
-store i32 %r315, i32* %r317
-%r318 = lshr i256 %r314, 32
-%r319 = trunc i256 %r318 to i32
-%r321 = getelementptr i32, i32* %r310, i32 2
-store i32 %r319, i32* %r321
-%r322 = lshr i256 %r318, 32
-%r323 = trunc i256 %r322 to i32
-%r325 = getelementptr i32, i32* %r310, i32 3
-store i32 %r323, i32* %r325
-%r326 = lshr i256 %r322, 32
-%r327 = trunc i256 %r326 to i32
-%r329 = getelementptr i32, i32* %r310, i32 4
-store i32 %r327, i32* %r329
-%r330 = lshr i256 %r326, 32
-%r331 = trunc i256 %r330 to i32
-%r333 = getelementptr i32, i32* %r310, i32 5
-store i32 %r331, i32* %r333
-%r334 = lshr i256 %r330, 32
-%r335 = trunc i256 %r334 to i32
-%r337 = getelementptr i32, i32* %r310, i32 6
-store i32 %r335, i32* %r337
-%r338 = lshr i256 %r334, 32
-%r339 = trunc i256 %r338 to i32
-%r341 = getelementptr i32, i32* %r310, i32 7
-store i32 %r339, i32* %r341
-ret void
-}
-define i320 @mulPv288x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
-%r30 = trunc i64 %r29 to i32
-%r31 = call i32 @extractHigh32(i64 %r29)
-%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
-%r34 = trunc i64 %r33 to i32
-%r35 = call i32 @extractHigh32(i64 %r33)
-%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
-%r38 = trunc i64 %r37 to i32
-%r39 = call i32 @extractHigh32(i64 %r37)
-%r40 = zext i32 %r6 to i64
-%r41 = zext i32 %r10 to i64
-%r42 = shl i64 %r41, 32
-%r43 = or i64 %r40, %r42
-%r44 = zext i64 %r43 to i96
-%r45 = zext i32 %r14 to i96
-%r46 = shl i96 %r45, 64
-%r47 = or i96 %r44, %r46
-%r48 = zext i96 %r47 to i128
-%r49 = zext i32 %r18 to i128
-%r50 = shl i128 %r49, 96
-%r51 = or i128 %r48, %r50
-%r52 = zext i128 %r51 to i160
-%r53 = zext i32 %r22 to i160
-%r54 = shl i160 %r53, 128
-%r55 = or i160 %r52, %r54
-%r56 = zext i160 %r55 to i192
-%r57 = zext i32 %r26 to i192
-%r58 = shl i192 %r57, 160
-%r59 = or i192 %r56, %r58
-%r60 = zext i192 %r59 to i224
-%r61 = zext i32 %r30 to i224
-%r62 = shl i224 %r61, 192
-%r63 = or i224 %r60, %r62
-%r64 = zext i224 %r63 to i256
-%r65 = zext i32 %r34 to i256
-%r66 = shl i256 %r65, 224
-%r67 = or i256 %r64, %r66
-%r68 = zext i256 %r67 to i288
-%r69 = zext i32 %r38 to i288
-%r70 = shl i288 %r69, 256
-%r71 = or i288 %r68, %r70
-%r72 = zext i32 %r7 to i64
-%r73 = zext i32 %r11 to i64
-%r74 = shl i64 %r73, 32
-%r75 = or i64 %r72, %r74
-%r76 = zext i64 %r75 to i96
-%r77 = zext i32 %r15 to i96
-%r78 = shl i96 %r77, 64
-%r79 = or i96 %r76, %r78
-%r80 = zext i96 %r79 to i128
-%r81 = zext i32 %r19 to i128
-%r82 = shl i128 %r81, 96
-%r83 = or i128 %r80, %r82
-%r84 = zext i128 %r83 to i160
-%r85 = zext i32 %r23 to i160
-%r86 = shl i160 %r85, 128
-%r87 = or i160 %r84, %r86
-%r88 = zext i160 %r87 to i192
-%r89 = zext i32 %r27 to i192
-%r90 = shl i192 %r89, 160
-%r91 = or i192 %r88, %r90
-%r92 = zext i192 %r91 to i224
-%r93 = zext i32 %r31 to i224
-%r94 = shl i224 %r93, 192
-%r95 = or i224 %r92, %r94
-%r96 = zext i224 %r95 to i256
-%r97 = zext i32 %r35 to i256
-%r98 = shl i256 %r97, 224
-%r99 = or i256 %r96, %r98
-%r100 = zext i256 %r99 to i288
-%r101 = zext i32 %r39 to i288
-%r102 = shl i288 %r101, 256
-%r103 = or i288 %r100, %r102
-%r104 = zext i288 %r71 to i320
-%r105 = zext i288 %r103 to i320
-%r106 = shl i320 %r105, 32
-%r107 = add i320 %r104, %r106
-ret i320 %r107
-}
-define void @mcl_fp_mulUnitPre9L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i320 @mulPv288x32(i32* %r2, i32 %r3)
-%r5 = trunc i320 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i320 %r4, 32
-%r9 = trunc i320 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i320 %r8, 32
-%r13 = trunc i320 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i320 %r12, 32
-%r17 = trunc i320 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i320 %r16, 32
-%r21 = trunc i320 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i320 %r20, 32
-%r25 = trunc i320 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i320 %r24, 32
-%r29 = trunc i320 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i320 %r28, 32
-%r33 = trunc i320 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
-%r36 = lshr i320 %r32, 32
-%r37 = trunc i320 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 8
-store i32 %r37, i32* %r39
-%r40 = lshr i320 %r36, 32
-%r41 = trunc i320 %r40 to i32
-%r43 = getelementptr i32, i32* %r1, i32 9
-store i32 %r41, i32* %r43
-ret void
-}
-define void @mcl_fpDbl_mulPre9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r4 = load i32, i32* %r3
-%r5 = call i320 @mulPv288x32(i32* %r2, i32 %r4)
-%r6 = trunc i320 %r5 to i32
-store i32 %r6, i32* %r1
-%r7 = lshr i320 %r5, 32
-%r9 = getelementptr i32, i32* %r3, i32 1
-%r10 = load i32, i32* %r9
-%r11 = call i320 @mulPv288x32(i32* %r2, i32 %r10)
-%r12 = add i320 %r7, %r11
-%r13 = trunc i320 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 1
-store i32 %r13, i32* %r15
-%r16 = lshr i320 %r12, 32
-%r18 = getelementptr i32, i32* %r3, i32 2
-%r19 = load i32, i32* %r18
-%r20 = call i320 @mulPv288x32(i32* %r2, i32 %r19)
-%r21 = add i320 %r16, %r20
-%r22 = trunc i320 %r21 to i32
-%r24 = getelementptr i32, i32* %r1, i32 2
-store i32 %r22, i32* %r24
-%r25 = lshr i320 %r21, 32
-%r27 = getelementptr i32, i32* %r3, i32 3
-%r28 = load i32, i32* %r27
-%r29 = call i320 @mulPv288x32(i32* %r2, i32 %r28)
-%r30 = add i320 %r25, %r29
-%r31 = trunc i320 %r30 to i32
-%r33 = getelementptr i32, i32* %r1, i32 3
-store i32 %r31, i32* %r33
-%r34 = lshr i320 %r30, 32
-%r36 = getelementptr i32, i32* %r3, i32 4
-%r37 = load i32, i32* %r36
-%r38 = call i320 @mulPv288x32(i32* %r2, i32 %r37)
-%r39 = add i320 %r34, %r38
-%r40 = trunc i320 %r39 to i32
-%r42 = getelementptr i32, i32* %r1, i32 4
-store i32 %r40, i32* %r42
-%r43 = lshr i320 %r39, 32
-%r45 = getelementptr i32, i32* %r3, i32 5
-%r46 = load i32, i32* %r45
-%r47 = call i320 @mulPv288x32(i32* %r2, i32 %r46)
-%r48 = add i320 %r43, %r47
-%r49 = trunc i320 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 5
-store i32 %r49, i32* %r51
-%r52 = lshr i320 %r48, 32
-%r54 = getelementptr i32, i32* %r3, i32 6
-%r55 = load i32, i32* %r54
-%r56 = call i320 @mulPv288x32(i32* %r2, i32 %r55)
-%r57 = add i320 %r52, %r56
-%r58 = trunc i320 %r57 to i32
-%r60 = getelementptr i32, i32* %r1, i32 6
-store i32 %r58, i32* %r60
-%r61 = lshr i320 %r57, 32
-%r63 = getelementptr i32, i32* %r3, i32 7
-%r64 = load i32, i32* %r63
-%r65 = call i320 @mulPv288x32(i32* %r2, i32 %r64)
-%r66 = add i320 %r61, %r65
-%r67 = trunc i320 %r66 to i32
-%r69 = getelementptr i32, i32* %r1, i32 7
-store i32 %r67, i32* %r69
-%r70 = lshr i320 %r66, 32
-%r72 = getelementptr i32, i32* %r3, i32 8
-%r73 = load i32, i32* %r72
-%r74 = call i320 @mulPv288x32(i32* %r2, i32 %r73)
-%r75 = add i320 %r70, %r74
-%r77 = getelementptr i32, i32* %r1, i32 8
-%r78 = trunc i320 %r75 to i32
-%r80 = getelementptr i32, i32* %r77, i32 0
-store i32 %r78, i32* %r80
-%r81 = lshr i320 %r75, 32
-%r82 = trunc i320 %r81 to i32
-%r84 = getelementptr i32, i32* %r77, i32 1
-store i32 %r82, i32* %r84
-%r85 = lshr i320 %r81, 32
-%r86 = trunc i320 %r85 to i32
-%r88 = getelementptr i32, i32* %r77, i32 2
-store i32 %r86, i32* %r88
-%r89 = lshr i320 %r85, 32
-%r90 = trunc i320 %r89 to i32
-%r92 = getelementptr i32, i32* %r77, i32 3
-store i32 %r90, i32* %r92
-%r93 = lshr i320 %r89, 32
-%r94 = trunc i320 %r93 to i32
-%r96 = getelementptr i32, i32* %r77, i32 4
-store i32 %r94, i32* %r96
-%r97 = lshr i320 %r93, 32
-%r98 = trunc i320 %r97 to i32
-%r100 = getelementptr i32, i32* %r77, i32 5
-store i32 %r98, i32* %r100
-%r101 = lshr i320 %r97, 32
-%r102 = trunc i320 %r101 to i32
-%r104 = getelementptr i32, i32* %r77, i32 6
-store i32 %r102, i32* %r104
-%r105 = lshr i320 %r101, 32
-%r106 = trunc i320 %r105 to i32
-%r108 = getelementptr i32, i32* %r77, i32 7
-store i32 %r106, i32* %r108
-%r109 = lshr i320 %r105, 32
-%r110 = trunc i320 %r109 to i32
-%r112 = getelementptr i32, i32* %r77, i32 8
-store i32 %r110, i32* %r112
-%r113 = lshr i320 %r109, 32
-%r114 = trunc i320 %r113 to i32
-%r116 = getelementptr i32, i32* %r77, i32 9
-store i32 %r114, i32* %r116
-ret void
-}
-define void @mcl_fpDbl_sqrPre9L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = call i320 @mulPv288x32(i32* %r2, i32 %r3)
-%r5 = trunc i320 %r4 to i32
-store i32 %r5, i32* %r1
-%r6 = lshr i320 %r4, 32
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = call i320 @mulPv288x32(i32* %r2, i32 %r9)
-%r11 = add i320 %r6, %r10
-%r12 = trunc i320 %r11 to i32
-%r14 = getelementptr i32, i32* %r1, i32 1
-store i32 %r12, i32* %r14
-%r15 = lshr i320 %r11, 32
-%r17 = getelementptr i32, i32* %r2, i32 2
-%r18 = load i32, i32* %r17
-%r19 = call i320 @mulPv288x32(i32* %r2, i32 %r18)
-%r20 = add i320 %r15, %r19
-%r21 = trunc i320 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 2
-store i32 %r21, i32* %r23
-%r24 = lshr i320 %r20, 32
-%r26 = getelementptr i32, i32* %r2, i32 3
-%r27 = load i32, i32* %r26
-%r28 = call i320 @mulPv288x32(i32* %r2, i32 %r27)
-%r29 = add i320 %r24, %r28
-%r30 = trunc i320 %r29 to i32
-%r32 = getelementptr i32, i32* %r1, i32 3
-store i32 %r30, i32* %r32
-%r33 = lshr i320 %r29, 32
-%r35 = getelementptr i32, i32* %r2, i32 4
-%r36 = load i32, i32* %r35
-%r37 = call i320 @mulPv288x32(i32* %r2, i32 %r36)
-%r38 = add i320 %r33, %r37
-%r39 = trunc i320 %r38 to i32
-%r41 = getelementptr i32, i32* %r1, i32 4
-store i32 %r39, i32* %r41
-%r42 = lshr i320 %r38, 32
-%r44 = getelementptr i32, i32* %r2, i32 5
-%r45 = load i32, i32* %r44
-%r46 = call i320 @mulPv288x32(i32* %r2, i32 %r45)
-%r47 = add i320 %r42, %r46
-%r48 = trunc i320 %r47 to i32
-%r50 = getelementptr i32, i32* %r1, i32 5
-store i32 %r48, i32* %r50
-%r51 = lshr i320 %r47, 32
-%r53 = getelementptr i32, i32* %r2, i32 6
-%r54 = load i32, i32* %r53
-%r55 = call i320 @mulPv288x32(i32* %r2, i32 %r54)
-%r56 = add i320 %r51, %r55
-%r57 = trunc i320 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 6
-store i32 %r57, i32* %r59
-%r60 = lshr i320 %r56, 32
-%r62 = getelementptr i32, i32* %r2, i32 7
-%r63 = load i32, i32* %r62
-%r64 = call i320 @mulPv288x32(i32* %r2, i32 %r63)
-%r65 = add i320 %r60, %r64
-%r66 = trunc i320 %r65 to i32
-%r68 = getelementptr i32, i32* %r1, i32 7
-store i32 %r66, i32* %r68
-%r69 = lshr i320 %r65, 32
-%r71 = getelementptr i32, i32* %r2, i32 8
-%r72 = load i32, i32* %r71
-%r73 = call i320 @mulPv288x32(i32* %r2, i32 %r72)
-%r74 = add i320 %r69, %r73
-%r76 = getelementptr i32, i32* %r1, i32 8
-%r77 = trunc i320 %r74 to i32
-%r79 = getelementptr i32, i32* %r76, i32 0
-store i32 %r77, i32* %r79
-%r80 = lshr i320 %r74, 32
-%r81 = trunc i320 %r80 to i32
-%r83 = getelementptr i32, i32* %r76, i32 1
-store i32 %r81, i32* %r83
-%r84 = lshr i320 %r80, 32
-%r85 = trunc i320 %r84 to i32
-%r87 = getelementptr i32, i32* %r76, i32 2
-store i32 %r85, i32* %r87
-%r88 = lshr i320 %r84, 32
-%r89 = trunc i320 %r88 to i32
-%r91 = getelementptr i32, i32* %r76, i32 3
-store i32 %r89, i32* %r91
-%r92 = lshr i320 %r88, 32
-%r93 = trunc i320 %r92 to i32
-%r95 = getelementptr i32, i32* %r76, i32 4
-store i32 %r93, i32* %r95
-%r96 = lshr i320 %r92, 32
-%r97 = trunc i320 %r96 to i32
-%r99 = getelementptr i32, i32* %r76, i32 5
-store i32 %r97, i32* %r99
-%r100 = lshr i320 %r96, 32
-%r101 = trunc i320 %r100 to i32
-%r103 = getelementptr i32, i32* %r76, i32 6
-store i32 %r101, i32* %r103
-%r104 = lshr i320 %r100, 32
-%r105 = trunc i320 %r104 to i32
-%r107 = getelementptr i32, i32* %r76, i32 7
-store i32 %r105, i32* %r107
-%r108 = lshr i320 %r104, 32
-%r109 = trunc i320 %r108 to i32
-%r111 = getelementptr i32, i32* %r76, i32 8
-store i32 %r109, i32* %r111
-%r112 = lshr i320 %r108, 32
-%r113 = trunc i320 %r112 to i32
-%r115 = getelementptr i32, i32* %r76, i32 9
-store i32 %r113, i32* %r115
-ret void
-}
-define void @mcl_fp_mont9L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i320 @mulPv288x32(i32* %r2, i32 %r10)
-%r12 = zext i320 %r11 to i352
-%r13 = trunc i320 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i320 @mulPv288x32(i32* %r4, i32 %r14)
-%r16 = zext i320 %r15 to i352
-%r17 = add i352 %r12, %r16
-%r18 = lshr i352 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i320 @mulPv288x32(i32* %r2, i32 %r21)
-%r23 = zext i320 %r22 to i352
-%r24 = add i352 %r18, %r23
-%r25 = trunc i352 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i320 @mulPv288x32(i32* %r4, i32 %r26)
-%r28 = zext i320 %r27 to i352
-%r29 = add i352 %r24, %r28
-%r30 = lshr i352 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i320 @mulPv288x32(i32* %r2, i32 %r33)
-%r35 = zext i320 %r34 to i352
-%r36 = add i352 %r30, %r35
-%r37 = trunc i352 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i320 @mulPv288x32(i32* %r4, i32 %r38)
-%r40 = zext i320 %r39 to i352
-%r41 = add i352 %r36, %r40
-%r42 = lshr i352 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i320 @mulPv288x32(i32* %r2, i32 %r45)
-%r47 = zext i320 %r46 to i352
-%r48 = add i352 %r42, %r47
-%r49 = trunc i352 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i320 @mulPv288x32(i32* %r4, i32 %r50)
-%r52 = zext i320 %r51 to i352
-%r53 = add i352 %r48, %r52
-%r54 = lshr i352 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i320 @mulPv288x32(i32* %r2, i32 %r57)
-%r59 = zext i320 %r58 to i352
-%r60 = add i352 %r54, %r59
-%r61 = trunc i352 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i320 @mulPv288x32(i32* %r4, i32 %r62)
-%r64 = zext i320 %r63 to i352
-%r65 = add i352 %r60, %r64
-%r66 = lshr i352 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i320 @mulPv288x32(i32* %r2, i32 %r69)
-%r71 = zext i320 %r70 to i352
-%r72 = add i352 %r66, %r71
-%r73 = trunc i352 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i320 @mulPv288x32(i32* %r4, i32 %r74)
-%r76 = zext i320 %r75 to i352
-%r77 = add i352 %r72, %r76
-%r78 = lshr i352 %r77, 32
-%r80 = getelementptr i32, i32* %r3, i32 6
-%r81 = load i32, i32* %r80
-%r82 = call i320 @mulPv288x32(i32* %r2, i32 %r81)
-%r83 = zext i320 %r82 to i352
-%r84 = add i352 %r78, %r83
-%r85 = trunc i352 %r84 to i32
-%r86 = mul i32 %r85, %r7
-%r87 = call i320 @mulPv288x32(i32* %r4, i32 %r86)
-%r88 = zext i320 %r87 to i352
-%r89 = add i352 %r84, %r88
-%r90 = lshr i352 %r89, 32
-%r92 = getelementptr i32, i32* %r3, i32 7
-%r93 = load i32, i32* %r92
-%r94 = call i320 @mulPv288x32(i32* %r2, i32 %r93)
-%r95 = zext i320 %r94 to i352
-%r96 = add i352 %r90, %r95
-%r97 = trunc i352 %r96 to i32
-%r98 = mul i32 %r97, %r7
-%r99 = call i320 @mulPv288x32(i32* %r4, i32 %r98)
-%r100 = zext i320 %r99 to i352
-%r101 = add i352 %r96, %r100
-%r102 = lshr i352 %r101, 32
-%r104 = getelementptr i32, i32* %r3, i32 8
-%r105 = load i32, i32* %r104
-%r106 = call i320 @mulPv288x32(i32* %r2, i32 %r105)
-%r107 = zext i320 %r106 to i352
-%r108 = add i352 %r102, %r107
-%r109 = trunc i352 %r108 to i32
-%r110 = mul i32 %r109, %r7
-%r111 = call i320 @mulPv288x32(i32* %r4, i32 %r110)
-%r112 = zext i320 %r111 to i352
-%r113 = add i352 %r108, %r112
-%r114 = lshr i352 %r113, 32
-%r115 = trunc i352 %r114 to i320
-%r116 = load i32, i32* %r4
-%r117 = zext i32 %r116 to i64
-%r119 = getelementptr i32, i32* %r4, i32 1
-%r120 = load i32, i32* %r119
-%r121 = zext i32 %r120 to i64
-%r122 = shl i64 %r121, 32
-%r123 = or i64 %r117, %r122
-%r124 = zext i64 %r123 to i96
-%r126 = getelementptr i32, i32* %r4, i32 2
-%r127 = load i32, i32* %r126
-%r128 = zext i32 %r127 to i96
-%r129 = shl i96 %r128, 64
-%r130 = or i96 %r124, %r129
-%r131 = zext i96 %r130 to i128
-%r133 = getelementptr i32, i32* %r4, i32 3
-%r134 = load i32, i32* %r133
-%r135 = zext i32 %r134 to i128
-%r136 = shl i128 %r135, 96
-%r137 = or i128 %r131, %r136
-%r138 = zext i128 %r137 to i160
-%r140 = getelementptr i32, i32* %r4, i32 4
-%r141 = load i32, i32* %r140
-%r142 = zext i32 %r141 to i160
-%r143 = shl i160 %r142, 128
-%r144 = or i160 %r138, %r143
-%r145 = zext i160 %r144 to i192
-%r147 = getelementptr i32, i32* %r4, i32 5
-%r148 = load i32, i32* %r147
-%r149 = zext i32 %r148 to i192
-%r150 = shl i192 %r149, 160
-%r151 = or i192 %r145, %r150
-%r152 = zext i192 %r151 to i224
-%r154 = getelementptr i32, i32* %r4, i32 6
-%r155 = load i32, i32* %r154
-%r156 = zext i32 %r155 to i224
-%r157 = shl i224 %r156, 192
-%r158 = or i224 %r152, %r157
-%r159 = zext i224 %r158 to i256
-%r161 = getelementptr i32, i32* %r4, i32 7
-%r162 = load i32, i32* %r161
-%r163 = zext i32 %r162 to i256
-%r164 = shl i256 %r163, 224
-%r165 = or i256 %r159, %r164
-%r166 = zext i256 %r165 to i288
-%r168 = getelementptr i32, i32* %r4, i32 8
-%r169 = load i32, i32* %r168
-%r170 = zext i32 %r169 to i288
-%r171 = shl i288 %r170, 256
-%r172 = or i288 %r166, %r171
-%r173 = zext i288 %r172 to i320
-%r174 = sub i320 %r115, %r173
-%r175 = lshr i320 %r174, 288
-%r176 = trunc i320 %r175 to i1
-%r177 = select i1 %r176, i320 %r115, i320 %r174
-%r178 = trunc i320 %r177 to i288
-%r179 = trunc i288 %r178 to i32
-%r181 = getelementptr i32, i32* %r1, i32 0
-store i32 %r179, i32* %r181
-%r182 = lshr i288 %r178, 32
-%r183 = trunc i288 %r182 to i32
-%r185 = getelementptr i32, i32* %r1, i32 1
-store i32 %r183, i32* %r185
-%r186 = lshr i288 %r182, 32
-%r187 = trunc i288 %r186 to i32
-%r189 = getelementptr i32, i32* %r1, i32 2
-store i32 %r187, i32* %r189
-%r190 = lshr i288 %r186, 32
-%r191 = trunc i288 %r190 to i32
-%r193 = getelementptr i32, i32* %r1, i32 3
-store i32 %r191, i32* %r193
-%r194 = lshr i288 %r190, 32
-%r195 = trunc i288 %r194 to i32
-%r197 = getelementptr i32, i32* %r1, i32 4
-store i32 %r195, i32* %r197
-%r198 = lshr i288 %r194, 32
-%r199 = trunc i288 %r198 to i32
-%r201 = getelementptr i32, i32* %r1, i32 5
-store i32 %r199, i32* %r201
-%r202 = lshr i288 %r198, 32
-%r203 = trunc i288 %r202 to i32
-%r205 = getelementptr i32, i32* %r1, i32 6
-store i32 %r203, i32* %r205
-%r206 = lshr i288 %r202, 32
-%r207 = trunc i288 %r206 to i32
-%r209 = getelementptr i32, i32* %r1, i32 7
-store i32 %r207, i32* %r209
-%r210 = lshr i288 %r206, 32
-%r211 = trunc i288 %r210 to i32
-%r213 = getelementptr i32, i32* %r1, i32 8
-store i32 %r211, i32* %r213
-ret void
-}
-define void @mcl_fp_montNF9L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i320 @mulPv288x32(i32* %r2, i32 %r8)
-%r10 = trunc i320 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i320 @mulPv288x32(i32* %r4, i32 %r11)
-%r13 = add i320 %r9, %r12
-%r14 = lshr i320 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i320 @mulPv288x32(i32* %r2, i32 %r17)
-%r19 = add i320 %r14, %r18
-%r20 = trunc i320 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i320 @mulPv288x32(i32* %r4, i32 %r21)
-%r23 = add i320 %r19, %r22
-%r24 = lshr i320 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i320 @mulPv288x32(i32* %r2, i32 %r27)
-%r29 = add i320 %r24, %r28
-%r30 = trunc i320 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i320 @mulPv288x32(i32* %r4, i32 %r31)
-%r33 = add i320 %r29, %r32
-%r34 = lshr i320 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i320 @mulPv288x32(i32* %r2, i32 %r37)
-%r39 = add i320 %r34, %r38
-%r40 = trunc i320 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i320 @mulPv288x32(i32* %r4, i32 %r41)
-%r43 = add i320 %r39, %r42
-%r44 = lshr i320 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i320 @mulPv288x32(i32* %r2, i32 %r47)
-%r49 = add i320 %r44, %r48
-%r50 = trunc i320 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i320 @mulPv288x32(i32* %r4, i32 %r51)
-%r53 = add i320 %r49, %r52
-%r54 = lshr i320 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i320 @mulPv288x32(i32* %r2, i32 %r57)
-%r59 = add i320 %r54, %r58
-%r60 = trunc i320 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i320 @mulPv288x32(i32* %r4, i32 %r61)
-%r63 = add i320 %r59, %r62
-%r64 = lshr i320 %r63, 32
-%r66 = getelementptr i32, i32* %r3, i32 6
-%r67 = load i32, i32* %r66
-%r68 = call i320 @mulPv288x32(i32* %r2, i32 %r67)
-%r69 = add i320 %r64, %r68
-%r70 = trunc i320 %r69 to i32
-%r71 = mul i32 %r70, %r7
-%r72 = call i320 @mulPv288x32(i32* %r4, i32 %r71)
-%r73 = add i320 %r69, %r72
-%r74 = lshr i320 %r73, 32
-%r76 = getelementptr i32, i32* %r3, i32 7
-%r77 = load i32, i32* %r76
-%r78 = call i320 @mulPv288x32(i32* %r2, i32 %r77)
-%r79 = add i320 %r74, %r78
-%r80 = trunc i320 %r79 to i32
-%r81 = mul i32 %r80, %r7
-%r82 = call i320 @mulPv288x32(i32* %r4, i32 %r81)
-%r83 = add i320 %r79, %r82
-%r84 = lshr i320 %r83, 32
-%r86 = getelementptr i32, i32* %r3, i32 8
-%r87 = load i32, i32* %r86
-%r88 = call i320 @mulPv288x32(i32* %r2, i32 %r87)
-%r89 = add i320 %r84, %r88
-%r90 = trunc i320 %r89 to i32
-%r91 = mul i32 %r90, %r7
-%r92 = call i320 @mulPv288x32(i32* %r4, i32 %r91)
-%r93 = add i320 %r89, %r92
-%r94 = lshr i320 %r93, 32
-%r95 = trunc i320 %r94 to i288
-%r96 = load i32, i32* %r4
-%r97 = zext i32 %r96 to i64
-%r99 = getelementptr i32, i32* %r4, i32 1
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i64
-%r102 = shl i64 %r101, 32
-%r103 = or i64 %r97, %r102
-%r104 = zext i64 %r103 to i96
-%r106 = getelementptr i32, i32* %r4, i32 2
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i96
-%r109 = shl i96 %r108, 64
-%r110 = or i96 %r104, %r109
-%r111 = zext i96 %r110 to i128
-%r113 = getelementptr i32, i32* %r4, i32 3
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i128
-%r116 = shl i128 %r115, 96
-%r117 = or i128 %r111, %r116
-%r118 = zext i128 %r117 to i160
-%r120 = getelementptr i32, i32* %r4, i32 4
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i160
-%r123 = shl i160 %r122, 128
-%r124 = or i160 %r118, %r123
-%r125 = zext i160 %r124 to i192
-%r127 = getelementptr i32, i32* %r4, i32 5
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i192
-%r130 = shl i192 %r129, 160
-%r131 = or i192 %r125, %r130
-%r132 = zext i192 %r131 to i224
-%r134 = getelementptr i32, i32* %r4, i32 6
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i224
-%r137 = shl i224 %r136, 192
-%r138 = or i224 %r132, %r137
-%r139 = zext i224 %r138 to i256
-%r141 = getelementptr i32, i32* %r4, i32 7
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i256
-%r144 = shl i256 %r143, 224
-%r145 = or i256 %r139, %r144
-%r146 = zext i256 %r145 to i288
-%r148 = getelementptr i32, i32* %r4, i32 8
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i288
-%r151 = shl i288 %r150, 256
-%r152 = or i288 %r146, %r151
-%r153 = sub i288 %r95, %r152
-%r154 = lshr i288 %r153, 287
-%r155 = trunc i288 %r154 to i1
-%r156 = select i1 %r155, i288 %r95, i288 %r153
-%r157 = trunc i288 %r156 to i32
-%r159 = getelementptr i32, i32* %r1, i32 0
-store i32 %r157, i32* %r159
-%r160 = lshr i288 %r156, 32
-%r161 = trunc i288 %r160 to i32
-%r163 = getelementptr i32, i32* %r1, i32 1
-store i32 %r161, i32* %r163
-%r164 = lshr i288 %r160, 32
-%r165 = trunc i288 %r164 to i32
-%r167 = getelementptr i32, i32* %r1, i32 2
-store i32 %r165, i32* %r167
-%r168 = lshr i288 %r164, 32
-%r169 = trunc i288 %r168 to i32
-%r171 = getelementptr i32, i32* %r1, i32 3
-store i32 %r169, i32* %r171
-%r172 = lshr i288 %r168, 32
-%r173 = trunc i288 %r172 to i32
-%r175 = getelementptr i32, i32* %r1, i32 4
-store i32 %r173, i32* %r175
-%r176 = lshr i288 %r172, 32
-%r177 = trunc i288 %r176 to i32
-%r179 = getelementptr i32, i32* %r1, i32 5
-store i32 %r177, i32* %r179
-%r180 = lshr i288 %r176, 32
-%r181 = trunc i288 %r180 to i32
-%r183 = getelementptr i32, i32* %r1, i32 6
-store i32 %r181, i32* %r183
-%r184 = lshr i288 %r180, 32
-%r185 = trunc i288 %r184 to i32
-%r187 = getelementptr i32, i32* %r1, i32 7
-store i32 %r185, i32* %r187
-%r188 = lshr i288 %r184, 32
-%r189 = trunc i288 %r188 to i32
-%r191 = getelementptr i32, i32* %r1, i32 8
-store i32 %r189, i32* %r191
-ret void
-}
-define void @mcl_fp_montRed9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i224
-%r45 = getelementptr i32, i32* %r3, i32 6
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i224
-%r48 = shl i224 %r47, 192
-%r49 = or i224 %r43, %r48
-%r50 = zext i224 %r49 to i256
-%r52 = getelementptr i32, i32* %r3, i32 7
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i256
-%r55 = shl i256 %r54, 224
-%r56 = or i256 %r50, %r55
-%r57 = zext i256 %r56 to i288
-%r59 = getelementptr i32, i32* %r3, i32 8
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i288
-%r62 = shl i288 %r61, 256
-%r63 = or i288 %r57, %r62
-%r64 = load i32, i32* %r2
-%r65 = zext i32 %r64 to i64
-%r67 = getelementptr i32, i32* %r2, i32 1
-%r68 = load i32, i32* %r67
-%r69 = zext i32 %r68 to i64
-%r70 = shl i64 %r69, 32
-%r71 = or i64 %r65, %r70
-%r72 = zext i64 %r71 to i96
-%r74 = getelementptr i32, i32* %r2, i32 2
-%r75 = load i32, i32* %r74
-%r76 = zext i32 %r75 to i96
-%r77 = shl i96 %r76, 64
-%r78 = or i96 %r72, %r77
-%r79 = zext i96 %r78 to i128
-%r81 = getelementptr i32, i32* %r2, i32 3
-%r82 = load i32, i32* %r81
-%r83 = zext i32 %r82 to i128
-%r84 = shl i128 %r83, 96
-%r85 = or i128 %r79, %r84
-%r86 = zext i128 %r85 to i160
-%r88 = getelementptr i32, i32* %r2, i32 4
-%r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i160
-%r91 = shl i160 %r90, 128
-%r92 = or i160 %r86, %r91
-%r93 = zext i160 %r92 to i192
-%r95 = getelementptr i32, i32* %r2, i32 5
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i192
-%r98 = shl i192 %r97, 160
-%r99 = or i192 %r93, %r98
-%r100 = zext i192 %r99 to i224
-%r102 = getelementptr i32, i32* %r2, i32 6
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i224
-%r105 = shl i224 %r104, 192
-%r106 = or i224 %r100, %r105
-%r107 = zext i224 %r106 to i256
-%r109 = getelementptr i32, i32* %r2, i32 7
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i256
-%r112 = shl i256 %r111, 224
-%r113 = or i256 %r107, %r112
-%r114 = zext i256 %r113 to i288
-%r116 = getelementptr i32, i32* %r2, i32 8
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i288
-%r119 = shl i288 %r118, 256
-%r120 = or i288 %r114, %r119
-%r121 = zext i288 %r120 to i320
-%r123 = getelementptr i32, i32* %r2, i32 9
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i320
-%r126 = shl i320 %r125, 288
-%r127 = or i320 %r121, %r126
-%r128 = zext i320 %r127 to i352
-%r130 = getelementptr i32, i32* %r2, i32 10
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i352
-%r133 = shl i352 %r132, 320
-%r134 = or i352 %r128, %r133
-%r135 = zext i352 %r134 to i384
-%r137 = getelementptr i32, i32* %r2, i32 11
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i384
-%r140 = shl i384 %r139, 352
-%r141 = or i384 %r135, %r140
-%r142 = zext i384 %r141 to i416
-%r144 = getelementptr i32, i32* %r2, i32 12
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i416
-%r147 = shl i416 %r146, 384
-%r148 = or i416 %r142, %r147
-%r149 = zext i416 %r148 to i448
-%r151 = getelementptr i32, i32* %r2, i32 13
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i448
-%r154 = shl i448 %r153, 416
-%r155 = or i448 %r149, %r154
-%r156 = zext i448 %r155 to i480
-%r158 = getelementptr i32, i32* %r2, i32 14
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i480
-%r161 = shl i480 %r160, 448
-%r162 = or i480 %r156, %r161
-%r163 = zext i480 %r162 to i512
-%r165 = getelementptr i32, i32* %r2, i32 15
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i512
-%r168 = shl i512 %r167, 480
-%r169 = or i512 %r163, %r168
-%r170 = zext i512 %r169 to i544
-%r172 = getelementptr i32, i32* %r2, i32 16
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i544
-%r175 = shl i544 %r174, 512
-%r176 = or i544 %r170, %r175
-%r177 = zext i544 %r176 to i576
-%r179 = getelementptr i32, i32* %r2, i32 17
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i576
-%r182 = shl i576 %r181, 544
-%r183 = or i576 %r177, %r182
-%r184 = zext i576 %r183 to i608
-%r185 = trunc i608 %r184 to i32
-%r186 = mul i32 %r185, %r6
-%r187 = call i320 @mulPv288x32(i32* %r3, i32 %r186)
-%r188 = zext i320 %r187 to i608
-%r189 = add i608 %r184, %r188
-%r190 = lshr i608 %r189, 32
-%r191 = trunc i608 %r190 to i576
-%r192 = trunc i576 %r191 to i32
-%r193 = mul i32 %r192, %r6
-%r194 = call i320 @mulPv288x32(i32* %r3, i32 %r193)
-%r195 = zext i320 %r194 to i576
-%r196 = add i576 %r191, %r195
-%r197 = lshr i576 %r196, 32
-%r198 = trunc i576 %r197 to i544
-%r199 = trunc i544 %r198 to i32
-%r200 = mul i32 %r199, %r6
-%r201 = call i320 @mulPv288x32(i32* %r3, i32 %r200)
-%r202 = zext i320 %r201 to i544
-%r203 = add i544 %r198, %r202
-%r204 = lshr i544 %r203, 32
-%r205 = trunc i544 %r204 to i512
-%r206 = trunc i512 %r205 to i32
-%r207 = mul i32 %r206, %r6
-%r208 = call i320 @mulPv288x32(i32* %r3, i32 %r207)
-%r209 = zext i320 %r208 to i512
-%r210 = add i512 %r205, %r209
-%r211 = lshr i512 %r210, 32
-%r212 = trunc i512 %r211 to i480
-%r213 = trunc i480 %r212 to i32
-%r214 = mul i32 %r213, %r6
-%r215 = call i320 @mulPv288x32(i32* %r3, i32 %r214)
-%r216 = zext i320 %r215 to i480
-%r217 = add i480 %r212, %r216
-%r218 = lshr i480 %r217, 32
-%r219 = trunc i480 %r218 to i448
-%r220 = trunc i448 %r219 to i32
-%r221 = mul i32 %r220, %r6
-%r222 = call i320 @mulPv288x32(i32* %r3, i32 %r221)
-%r223 = zext i320 %r222 to i448
-%r224 = add i448 %r219, %r223
-%r225 = lshr i448 %r224, 32
-%r226 = trunc i448 %r225 to i416
-%r227 = trunc i416 %r226 to i32
-%r228 = mul i32 %r227, %r6
-%r229 = call i320 @mulPv288x32(i32* %r3, i32 %r228)
-%r230 = zext i320 %r229 to i416
-%r231 = add i416 %r226, %r230
-%r232 = lshr i416 %r231, 32
-%r233 = trunc i416 %r232 to i384
-%r234 = trunc i384 %r233 to i32
-%r235 = mul i32 %r234, %r6
-%r236 = call i320 @mulPv288x32(i32* %r3, i32 %r235)
-%r237 = zext i320 %r236 to i384
-%r238 = add i384 %r233, %r237
-%r239 = lshr i384 %r238, 32
-%r240 = trunc i384 %r239 to i352
-%r241 = trunc i352 %r240 to i32
-%r242 = mul i32 %r241, %r6
-%r243 = call i320 @mulPv288x32(i32* %r3, i32 %r242)
-%r244 = zext i320 %r243 to i352
-%r245 = add i352 %r240, %r244
-%r246 = lshr i352 %r245, 32
-%r247 = trunc i352 %r246 to i320
-%r248 = zext i288 %r63 to i320
-%r249 = sub i320 %r247, %r248
-%r250 = lshr i320 %r249, 288
-%r251 = trunc i320 %r250 to i1
-%r252 = select i1 %r251, i320 %r247, i320 %r249
-%r253 = trunc i320 %r252 to i288
-%r254 = trunc i288 %r253 to i32
-%r256 = getelementptr i32, i32* %r1, i32 0
-store i32 %r254, i32* %r256
-%r257 = lshr i288 %r253, 32
-%r258 = trunc i288 %r257 to i32
-%r260 = getelementptr i32, i32* %r1, i32 1
-store i32 %r258, i32* %r260
-%r261 = lshr i288 %r257, 32
-%r262 = trunc i288 %r261 to i32
-%r264 = getelementptr i32, i32* %r1, i32 2
-store i32 %r262, i32* %r264
-%r265 = lshr i288 %r261, 32
-%r266 = trunc i288 %r265 to i32
-%r268 = getelementptr i32, i32* %r1, i32 3
-store i32 %r266, i32* %r268
-%r269 = lshr i288 %r265, 32
-%r270 = trunc i288 %r269 to i32
-%r272 = getelementptr i32, i32* %r1, i32 4
-store i32 %r270, i32* %r272
-%r273 = lshr i288 %r269, 32
-%r274 = trunc i288 %r273 to i32
-%r276 = getelementptr i32, i32* %r1, i32 5
-store i32 %r274, i32* %r276
-%r277 = lshr i288 %r273, 32
-%r278 = trunc i288 %r277 to i32
-%r280 = getelementptr i32, i32* %r1, i32 6
-store i32 %r278, i32* %r280
-%r281 = lshr i288 %r277, 32
-%r282 = trunc i288 %r281 to i32
-%r284 = getelementptr i32, i32* %r1, i32 7
-store i32 %r282, i32* %r284
-%r285 = lshr i288 %r281, 32
-%r286 = trunc i288 %r285 to i32
-%r288 = getelementptr i32, i32* %r1, i32 8
-store i32 %r286, i32* %r288
-ret void
-}
-define i32 @mcl_fp_addPre9L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r63 = load i32, i32* %r4
-%r64 = zext i32 %r63 to i64
-%r66 = getelementptr i32, i32* %r4, i32 1
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i64
-%r69 = shl i64 %r68, 32
-%r70 = or i64 %r64, %r69
-%r71 = zext i64 %r70 to i96
-%r73 = getelementptr i32, i32* %r4, i32 2
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i96
-%r76 = shl i96 %r75, 64
-%r77 = or i96 %r71, %r76
-%r78 = zext i96 %r77 to i128
-%r80 = getelementptr i32, i32* %r4, i32 3
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i128
-%r83 = shl i128 %r82, 96
-%r84 = or i128 %r78, %r83
-%r85 = zext i128 %r84 to i160
-%r87 = getelementptr i32, i32* %r4, i32 4
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i160
-%r90 = shl i160 %r89, 128
-%r91 = or i160 %r85, %r90
-%r92 = zext i160 %r91 to i192
-%r94 = getelementptr i32, i32* %r4, i32 5
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i192
-%r97 = shl i192 %r96, 160
-%r98 = or i192 %r92, %r97
-%r99 = zext i192 %r98 to i224
-%r101 = getelementptr i32, i32* %r4, i32 6
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i224
-%r104 = shl i224 %r103, 192
-%r105 = or i224 %r99, %r104
-%r106 = zext i224 %r105 to i256
-%r108 = getelementptr i32, i32* %r4, i32 7
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i256
-%r111 = shl i256 %r110, 224
-%r112 = or i256 %r106, %r111
-%r113 = zext i256 %r112 to i288
-%r115 = getelementptr i32, i32* %r4, i32 8
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i288
-%r118 = shl i288 %r117, 256
-%r119 = or i288 %r113, %r118
-%r120 = zext i288 %r119 to i320
-%r121 = add i320 %r62, %r120
-%r122 = trunc i320 %r121 to i288
-%r123 = trunc i288 %r122 to i32
-%r125 = getelementptr i32, i32* %r2, i32 0
-store i32 %r123, i32* %r125
-%r126 = lshr i288 %r122, 32
-%r127 = trunc i288 %r126 to i32
-%r129 = getelementptr i32, i32* %r2, i32 1
-store i32 %r127, i32* %r129
-%r130 = lshr i288 %r126, 32
-%r131 = trunc i288 %r130 to i32
-%r133 = getelementptr i32, i32* %r2, i32 2
-store i32 %r131, i32* %r133
-%r134 = lshr i288 %r130, 32
-%r135 = trunc i288 %r134 to i32
-%r137 = getelementptr i32, i32* %r2, i32 3
-store i32 %r135, i32* %r137
-%r138 = lshr i288 %r134, 32
-%r139 = trunc i288 %r138 to i32
-%r141 = getelementptr i32, i32* %r2, i32 4
-store i32 %r139, i32* %r141
-%r142 = lshr i288 %r138, 32
-%r143 = trunc i288 %r142 to i32
-%r145 = getelementptr i32, i32* %r2, i32 5
-store i32 %r143, i32* %r145
-%r146 = lshr i288 %r142, 32
-%r147 = trunc i288 %r146 to i32
-%r149 = getelementptr i32, i32* %r2, i32 6
-store i32 %r147, i32* %r149
-%r150 = lshr i288 %r146, 32
-%r151 = trunc i288 %r150 to i32
-%r153 = getelementptr i32, i32* %r2, i32 7
-store i32 %r151, i32* %r153
-%r154 = lshr i288 %r150, 32
-%r155 = trunc i288 %r154 to i32
-%r157 = getelementptr i32, i32* %r2, i32 8
-store i32 %r155, i32* %r157
-%r158 = lshr i320 %r121, 288
-%r159 = trunc i320 %r158 to i32
-ret i32 %r159
-}
-define i32 @mcl_fp_subPre9L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r63 = load i32, i32* %r4
-%r64 = zext i32 %r63 to i64
-%r66 = getelementptr i32, i32* %r4, i32 1
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i64
-%r69 = shl i64 %r68, 32
-%r70 = or i64 %r64, %r69
-%r71 = zext i64 %r70 to i96
-%r73 = getelementptr i32, i32* %r4, i32 2
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i96
-%r76 = shl i96 %r75, 64
-%r77 = or i96 %r71, %r76
-%r78 = zext i96 %r77 to i128
-%r80 = getelementptr i32, i32* %r4, i32 3
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i128
-%r83 = shl i128 %r82, 96
-%r84 = or i128 %r78, %r83
-%r85 = zext i128 %r84 to i160
-%r87 = getelementptr i32, i32* %r4, i32 4
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i160
-%r90 = shl i160 %r89, 128
-%r91 = or i160 %r85, %r90
-%r92 = zext i160 %r91 to i192
-%r94 = getelementptr i32, i32* %r4, i32 5
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i192
-%r97 = shl i192 %r96, 160
-%r98 = or i192 %r92, %r97
-%r99 = zext i192 %r98 to i224
-%r101 = getelementptr i32, i32* %r4, i32 6
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i224
-%r104 = shl i224 %r103, 192
-%r105 = or i224 %r99, %r104
-%r106 = zext i224 %r105 to i256
-%r108 = getelementptr i32, i32* %r4, i32 7
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i256
-%r111 = shl i256 %r110, 224
-%r112 = or i256 %r106, %r111
-%r113 = zext i256 %r112 to i288
-%r115 = getelementptr i32, i32* %r4, i32 8
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i288
-%r118 = shl i288 %r117, 256
-%r119 = or i288 %r113, %r118
-%r120 = zext i288 %r119 to i320
-%r121 = sub i320 %r62, %r120
-%r122 = trunc i320 %r121 to i288
-%r123 = trunc i288 %r122 to i32
-%r125 = getelementptr i32, i32* %r2, i32 0
-store i32 %r123, i32* %r125
-%r126 = lshr i288 %r122, 32
-%r127 = trunc i288 %r126 to i32
-%r129 = getelementptr i32, i32* %r2, i32 1
-store i32 %r127, i32* %r129
-%r130 = lshr i288 %r126, 32
-%r131 = trunc i288 %r130 to i32
-%r133 = getelementptr i32, i32* %r2, i32 2
-store i32 %r131, i32* %r133
-%r134 = lshr i288 %r130, 32
-%r135 = trunc i288 %r134 to i32
-%r137 = getelementptr i32, i32* %r2, i32 3
-store i32 %r135, i32* %r137
-%r138 = lshr i288 %r134, 32
-%r139 = trunc i288 %r138 to i32
-%r141 = getelementptr i32, i32* %r2, i32 4
-store i32 %r139, i32* %r141
-%r142 = lshr i288 %r138, 32
-%r143 = trunc i288 %r142 to i32
-%r145 = getelementptr i32, i32* %r2, i32 5
-store i32 %r143, i32* %r145
-%r146 = lshr i288 %r142, 32
-%r147 = trunc i288 %r146 to i32
-%r149 = getelementptr i32, i32* %r2, i32 6
-store i32 %r147, i32* %r149
-%r150 = lshr i288 %r146, 32
-%r151 = trunc i288 %r150 to i32
-%r153 = getelementptr i32, i32* %r2, i32 7
-store i32 %r151, i32* %r153
-%r154 = lshr i288 %r150, 32
-%r155 = trunc i288 %r154 to i32
-%r157 = getelementptr i32, i32* %r2, i32 8
-store i32 %r155, i32* %r157
-%r158 = lshr i320 %r121, 288
-%r159 = trunc i320 %r158 to i32
-%r161 = and i32 %r159, 1
-ret i32 %r161
-}
-define void @mcl_fp_shr1_9L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = zext i160 %r31 to i192
-%r34 = getelementptr i32, i32* %r2, i32 5
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i192
-%r37 = shl i192 %r36, 160
-%r38 = or i192 %r32, %r37
-%r39 = zext i192 %r38 to i224
-%r41 = getelementptr i32, i32* %r2, i32 6
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i224
-%r44 = shl i224 %r43, 192
-%r45 = or i224 %r39, %r44
-%r46 = zext i224 %r45 to i256
-%r48 = getelementptr i32, i32* %r2, i32 7
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i256
-%r51 = shl i256 %r50, 224
-%r52 = or i256 %r46, %r51
-%r53 = zext i256 %r52 to i288
-%r55 = getelementptr i32, i32* %r2, i32 8
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i288
-%r58 = shl i288 %r57, 256
-%r59 = or i288 %r53, %r58
-%r60 = lshr i288 %r59, 1
-%r61 = trunc i288 %r60 to i32
-%r63 = getelementptr i32, i32* %r1, i32 0
-store i32 %r61, i32* %r63
-%r64 = lshr i288 %r60, 32
-%r65 = trunc i288 %r64 to i32
-%r67 = getelementptr i32, i32* %r1, i32 1
-store i32 %r65, i32* %r67
-%r68 = lshr i288 %r64, 32
-%r69 = trunc i288 %r68 to i32
-%r71 = getelementptr i32, i32* %r1, i32 2
-store i32 %r69, i32* %r71
-%r72 = lshr i288 %r68, 32
-%r73 = trunc i288 %r72 to i32
-%r75 = getelementptr i32, i32* %r1, i32 3
-store i32 %r73, i32* %r75
-%r76 = lshr i288 %r72, 32
-%r77 = trunc i288 %r76 to i32
-%r79 = getelementptr i32, i32* %r1, i32 4
-store i32 %r77, i32* %r79
-%r80 = lshr i288 %r76, 32
-%r81 = trunc i288 %r80 to i32
-%r83 = getelementptr i32, i32* %r1, i32 5
-store i32 %r81, i32* %r83
-%r84 = lshr i288 %r80, 32
-%r85 = trunc i288 %r84 to i32
-%r87 = getelementptr i32, i32* %r1, i32 6
-store i32 %r85, i32* %r87
-%r88 = lshr i288 %r84, 32
-%r89 = trunc i288 %r88 to i32
-%r91 = getelementptr i32, i32* %r1, i32 7
-store i32 %r89, i32* %r91
-%r92 = lshr i288 %r88, 32
-%r93 = trunc i288 %r92 to i32
-%r95 = getelementptr i32, i32* %r1, i32 8
-store i32 %r93, i32* %r95
-ret void
-}
-define void @mcl_fp_add9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = load i32, i32* %r3
-%r63 = zext i32 %r62 to i64
-%r65 = getelementptr i32, i32* %r3, i32 1
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i64
-%r68 = shl i64 %r67, 32
-%r69 = or i64 %r63, %r68
-%r70 = zext i64 %r69 to i96
-%r72 = getelementptr i32, i32* %r3, i32 2
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i96
-%r75 = shl i96 %r74, 64
-%r76 = or i96 %r70, %r75
-%r77 = zext i96 %r76 to i128
-%r79 = getelementptr i32, i32* %r3, i32 3
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i128
-%r82 = shl i128 %r81, 96
-%r83 = or i128 %r77, %r82
-%r84 = zext i128 %r83 to i160
-%r86 = getelementptr i32, i32* %r3, i32 4
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i160
-%r89 = shl i160 %r88, 128
-%r90 = or i160 %r84, %r89
-%r91 = zext i160 %r90 to i192
-%r93 = getelementptr i32, i32* %r3, i32 5
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i192
-%r96 = shl i192 %r95, 160
-%r97 = or i192 %r91, %r96
-%r98 = zext i192 %r97 to i224
-%r100 = getelementptr i32, i32* %r3, i32 6
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i224
-%r103 = shl i224 %r102, 192
-%r104 = or i224 %r98, %r103
-%r105 = zext i224 %r104 to i256
-%r107 = getelementptr i32, i32* %r3, i32 7
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i256
-%r110 = shl i256 %r109, 224
-%r111 = or i256 %r105, %r110
-%r112 = zext i256 %r111 to i288
-%r114 = getelementptr i32, i32* %r3, i32 8
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i288
-%r117 = shl i288 %r116, 256
-%r118 = or i288 %r112, %r117
-%r119 = zext i288 %r61 to i320
-%r120 = zext i288 %r118 to i320
-%r121 = add i320 %r119, %r120
-%r122 = trunc i320 %r121 to i288
-%r123 = trunc i288 %r122 to i32
-%r125 = getelementptr i32, i32* %r1, i32 0
-store i32 %r123, i32* %r125
-%r126 = lshr i288 %r122, 32
-%r127 = trunc i288 %r126 to i32
-%r129 = getelementptr i32, i32* %r1, i32 1
-store i32 %r127, i32* %r129
-%r130 = lshr i288 %r126, 32
-%r131 = trunc i288 %r130 to i32
-%r133 = getelementptr i32, i32* %r1, i32 2
-store i32 %r131, i32* %r133
-%r134 = lshr i288 %r130, 32
-%r135 = trunc i288 %r134 to i32
-%r137 = getelementptr i32, i32* %r1, i32 3
-store i32 %r135, i32* %r137
-%r138 = lshr i288 %r134, 32
-%r139 = trunc i288 %r138 to i32
-%r141 = getelementptr i32, i32* %r1, i32 4
-store i32 %r139, i32* %r141
-%r142 = lshr i288 %r138, 32
-%r143 = trunc i288 %r142 to i32
-%r145 = getelementptr i32, i32* %r1, i32 5
-store i32 %r143, i32* %r145
-%r146 = lshr i288 %r142, 32
-%r147 = trunc i288 %r146 to i32
-%r149 = getelementptr i32, i32* %r1, i32 6
-store i32 %r147, i32* %r149
-%r150 = lshr i288 %r146, 32
-%r151 = trunc i288 %r150 to i32
-%r153 = getelementptr i32, i32* %r1, i32 7
-store i32 %r151, i32* %r153
-%r154 = lshr i288 %r150, 32
-%r155 = trunc i288 %r154 to i32
-%r157 = getelementptr i32, i32* %r1, i32 8
-store i32 %r155, i32* %r157
-%r158 = load i32, i32* %r4
-%r159 = zext i32 %r158 to i64
-%r161 = getelementptr i32, i32* %r4, i32 1
-%r162 = load i32, i32* %r161
-%r163 = zext i32 %r162 to i64
-%r164 = shl i64 %r163, 32
-%r165 = or i64 %r159, %r164
-%r166 = zext i64 %r165 to i96
-%r168 = getelementptr i32, i32* %r4, i32 2
-%r169 = load i32, i32* %r168
-%r170 = zext i32 %r169 to i96
-%r171 = shl i96 %r170, 64
-%r172 = or i96 %r166, %r171
-%r173 = zext i96 %r172 to i128
-%r175 = getelementptr i32, i32* %r4, i32 3
-%r176 = load i32, i32* %r175
-%r177 = zext i32 %r176 to i128
-%r178 = shl i128 %r177, 96
-%r179 = or i128 %r173, %r178
-%r180 = zext i128 %r179 to i160
-%r182 = getelementptr i32, i32* %r4, i32 4
-%r183 = load i32, i32* %r182
-%r184 = zext i32 %r183 to i160
-%r185 = shl i160 %r184, 128
-%r186 = or i160 %r180, %r185
-%r187 = zext i160 %r186 to i192
-%r189 = getelementptr i32, i32* %r4, i32 5
-%r190 = load i32, i32* %r189
-%r191 = zext i32 %r190 to i192
-%r192 = shl i192 %r191, 160
-%r193 = or i192 %r187, %r192
-%r194 = zext i192 %r193 to i224
-%r196 = getelementptr i32, i32* %r4, i32 6
-%r197 = load i32, i32* %r196
-%r198 = zext i32 %r197 to i224
-%r199 = shl i224 %r198, 192
-%r200 = or i224 %r194, %r199
-%r201 = zext i224 %r200 to i256
-%r203 = getelementptr i32, i32* %r4, i32 7
-%r204 = load i32, i32* %r203
-%r205 = zext i32 %r204 to i256
-%r206 = shl i256 %r205, 224
-%r207 = or i256 %r201, %r206
-%r208 = zext i256 %r207 to i288
-%r210 = getelementptr i32, i32* %r4, i32 8
-%r211 = load i32, i32* %r210
-%r212 = zext i32 %r211 to i288
-%r213 = shl i288 %r212, 256
-%r214 = or i288 %r208, %r213
-%r215 = zext i288 %r214 to i320
-%r216 = sub i320 %r121, %r215
-%r217 = lshr i320 %r216, 288
-%r218 = trunc i320 %r217 to i1
-br i1%r218, label %carry, label %nocarry
-nocarry:
-%r219 = trunc i320 %r216 to i288
-%r220 = trunc i288 %r219 to i32
-%r222 = getelementptr i32, i32* %r1, i32 0
-store i32 %r220, i32* %r222
-%r223 = lshr i288 %r219, 32
-%r224 = trunc i288 %r223 to i32
-%r226 = getelementptr i32, i32* %r1, i32 1
-store i32 %r224, i32* %r226
-%r227 = lshr i288 %r223, 32
-%r228 = trunc i288 %r227 to i32
-%r230 = getelementptr i32, i32* %r1, i32 2
-store i32 %r228, i32* %r230
-%r231 = lshr i288 %r227, 32
-%r232 = trunc i288 %r231 to i32
-%r234 = getelementptr i32, i32* %r1, i32 3
-store i32 %r232, i32* %r234
-%r235 = lshr i288 %r231, 32
-%r236 = trunc i288 %r235 to i32
-%r238 = getelementptr i32, i32* %r1, i32 4
-store i32 %r236, i32* %r238
-%r239 = lshr i288 %r235, 32
-%r240 = trunc i288 %r239 to i32
-%r242 = getelementptr i32, i32* %r1, i32 5
-store i32 %r240, i32* %r242
-%r243 = lshr i288 %r239, 32
-%r244 = trunc i288 %r243 to i32
-%r246 = getelementptr i32, i32* %r1, i32 6
-store i32 %r244, i32* %r246
-%r247 = lshr i288 %r243, 32
-%r248 = trunc i288 %r247 to i32
-%r250 = getelementptr i32, i32* %r1, i32 7
-store i32 %r248, i32* %r250
-%r251 = lshr i288 %r247, 32
-%r252 = trunc i288 %r251 to i32
-%r254 = getelementptr i32, i32* %r1, i32 8
-store i32 %r252, i32* %r254
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = load i32, i32* %r3
-%r63 = zext i32 %r62 to i64
-%r65 = getelementptr i32, i32* %r3, i32 1
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i64
-%r68 = shl i64 %r67, 32
-%r69 = or i64 %r63, %r68
-%r70 = zext i64 %r69 to i96
-%r72 = getelementptr i32, i32* %r3, i32 2
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i96
-%r75 = shl i96 %r74, 64
-%r76 = or i96 %r70, %r75
-%r77 = zext i96 %r76 to i128
-%r79 = getelementptr i32, i32* %r3, i32 3
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i128
-%r82 = shl i128 %r81, 96
-%r83 = or i128 %r77, %r82
-%r84 = zext i128 %r83 to i160
-%r86 = getelementptr i32, i32* %r3, i32 4
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i160
-%r89 = shl i160 %r88, 128
-%r90 = or i160 %r84, %r89
-%r91 = zext i160 %r90 to i192
-%r93 = getelementptr i32, i32* %r3, i32 5
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i192
-%r96 = shl i192 %r95, 160
-%r97 = or i192 %r91, %r96
-%r98 = zext i192 %r97 to i224
-%r100 = getelementptr i32, i32* %r3, i32 6
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i224
-%r103 = shl i224 %r102, 192
-%r104 = or i224 %r98, %r103
-%r105 = zext i224 %r104 to i256
-%r107 = getelementptr i32, i32* %r3, i32 7
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i256
-%r110 = shl i256 %r109, 224
-%r111 = or i256 %r105, %r110
-%r112 = zext i256 %r111 to i288
-%r114 = getelementptr i32, i32* %r3, i32 8
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i288
-%r117 = shl i288 %r116, 256
-%r118 = or i288 %r112, %r117
-%r119 = add i288 %r61, %r118
-%r120 = load i32, i32* %r4
-%r121 = zext i32 %r120 to i64
-%r123 = getelementptr i32, i32* %r4, i32 1
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i64
-%r126 = shl i64 %r125, 32
-%r127 = or i64 %r121, %r126
-%r128 = zext i64 %r127 to i96
-%r130 = getelementptr i32, i32* %r4, i32 2
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i96
-%r133 = shl i96 %r132, 64
-%r134 = or i96 %r128, %r133
-%r135 = zext i96 %r134 to i128
-%r137 = getelementptr i32, i32* %r4, i32 3
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i128
-%r140 = shl i128 %r139, 96
-%r141 = or i128 %r135, %r140
-%r142 = zext i128 %r141 to i160
-%r144 = getelementptr i32, i32* %r4, i32 4
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i160
-%r147 = shl i160 %r146, 128
-%r148 = or i160 %r142, %r147
-%r149 = zext i160 %r148 to i192
-%r151 = getelementptr i32, i32* %r4, i32 5
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i192
-%r154 = shl i192 %r153, 160
-%r155 = or i192 %r149, %r154
-%r156 = zext i192 %r155 to i224
-%r158 = getelementptr i32, i32* %r4, i32 6
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i224
-%r161 = shl i224 %r160, 192
-%r162 = or i224 %r156, %r161
-%r163 = zext i224 %r162 to i256
-%r165 = getelementptr i32, i32* %r4, i32 7
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i256
-%r168 = shl i256 %r167, 224
-%r169 = or i256 %r163, %r168
-%r170 = zext i256 %r169 to i288
-%r172 = getelementptr i32, i32* %r4, i32 8
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i288
-%r175 = shl i288 %r174, 256
-%r176 = or i288 %r170, %r175
-%r177 = sub i288 %r119, %r176
-%r178 = lshr i288 %r177, 287
-%r179 = trunc i288 %r178 to i1
-%r180 = select i1 %r179, i288 %r119, i288 %r177
-%r181 = trunc i288 %r180 to i32
-%r183 = getelementptr i32, i32* %r1, i32 0
-store i32 %r181, i32* %r183
-%r184 = lshr i288 %r180, 32
-%r185 = trunc i288 %r184 to i32
-%r187 = getelementptr i32, i32* %r1, i32 1
-store i32 %r185, i32* %r187
-%r188 = lshr i288 %r184, 32
-%r189 = trunc i288 %r188 to i32
-%r191 = getelementptr i32, i32* %r1, i32 2
-store i32 %r189, i32* %r191
-%r192 = lshr i288 %r188, 32
-%r193 = trunc i288 %r192 to i32
-%r195 = getelementptr i32, i32* %r1, i32 3
-store i32 %r193, i32* %r195
-%r196 = lshr i288 %r192, 32
-%r197 = trunc i288 %r196 to i32
-%r199 = getelementptr i32, i32* %r1, i32 4
-store i32 %r197, i32* %r199
-%r200 = lshr i288 %r196, 32
-%r201 = trunc i288 %r200 to i32
-%r203 = getelementptr i32, i32* %r1, i32 5
-store i32 %r201, i32* %r203
-%r204 = lshr i288 %r200, 32
-%r205 = trunc i288 %r204 to i32
-%r207 = getelementptr i32, i32* %r1, i32 6
-store i32 %r205, i32* %r207
-%r208 = lshr i288 %r204, 32
-%r209 = trunc i288 %r208 to i32
-%r211 = getelementptr i32, i32* %r1, i32 7
-store i32 %r209, i32* %r211
-%r212 = lshr i288 %r208, 32
-%r213 = trunc i288 %r212 to i32
-%r215 = getelementptr i32, i32* %r1, i32 8
-store i32 %r213, i32* %r215
-ret void
-}
-define void @mcl_fp_sub9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = load i32, i32* %r3
-%r63 = zext i32 %r62 to i64
-%r65 = getelementptr i32, i32* %r3, i32 1
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i64
-%r68 = shl i64 %r67, 32
-%r69 = or i64 %r63, %r68
-%r70 = zext i64 %r69 to i96
-%r72 = getelementptr i32, i32* %r3, i32 2
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i96
-%r75 = shl i96 %r74, 64
-%r76 = or i96 %r70, %r75
-%r77 = zext i96 %r76 to i128
-%r79 = getelementptr i32, i32* %r3, i32 3
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i128
-%r82 = shl i128 %r81, 96
-%r83 = or i128 %r77, %r82
-%r84 = zext i128 %r83 to i160
-%r86 = getelementptr i32, i32* %r3, i32 4
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i160
-%r89 = shl i160 %r88, 128
-%r90 = or i160 %r84, %r89
-%r91 = zext i160 %r90 to i192
-%r93 = getelementptr i32, i32* %r3, i32 5
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i192
-%r96 = shl i192 %r95, 160
-%r97 = or i192 %r91, %r96
-%r98 = zext i192 %r97 to i224
-%r100 = getelementptr i32, i32* %r3, i32 6
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i224
-%r103 = shl i224 %r102, 192
-%r104 = or i224 %r98, %r103
-%r105 = zext i224 %r104 to i256
-%r107 = getelementptr i32, i32* %r3, i32 7
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i256
-%r110 = shl i256 %r109, 224
-%r111 = or i256 %r105, %r110
-%r112 = zext i256 %r111 to i288
-%r114 = getelementptr i32, i32* %r3, i32 8
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i288
-%r117 = shl i288 %r116, 256
-%r118 = or i288 %r112, %r117
-%r119 = zext i288 %r61 to i320
-%r120 = zext i288 %r118 to i320
-%r121 = sub i320 %r119, %r120
-%r122 = trunc i320 %r121 to i288
-%r123 = lshr i320 %r121, 288
-%r124 = trunc i320 %r123 to i1
-%r125 = trunc i288 %r122 to i32
-%r127 = getelementptr i32, i32* %r1, i32 0
-store i32 %r125, i32* %r127
-%r128 = lshr i288 %r122, 32
-%r129 = trunc i288 %r128 to i32
-%r131 = getelementptr i32, i32* %r1, i32 1
-store i32 %r129, i32* %r131
-%r132 = lshr i288 %r128, 32
-%r133 = trunc i288 %r132 to i32
-%r135 = getelementptr i32, i32* %r1, i32 2
-store i32 %r133, i32* %r135
-%r136 = lshr i288 %r132, 32
-%r137 = trunc i288 %r136 to i32
-%r139 = getelementptr i32, i32* %r1, i32 3
-store i32 %r137, i32* %r139
-%r140 = lshr i288 %r136, 32
-%r141 = trunc i288 %r140 to i32
-%r143 = getelementptr i32, i32* %r1, i32 4
-store i32 %r141, i32* %r143
-%r144 = lshr i288 %r140, 32
-%r145 = trunc i288 %r144 to i32
-%r147 = getelementptr i32, i32* %r1, i32 5
-store i32 %r145, i32* %r147
-%r148 = lshr i288 %r144, 32
-%r149 = trunc i288 %r148 to i32
-%r151 = getelementptr i32, i32* %r1, i32 6
-store i32 %r149, i32* %r151
-%r152 = lshr i288 %r148, 32
-%r153 = trunc i288 %r152 to i32
-%r155 = getelementptr i32, i32* %r1, i32 7
-store i32 %r153, i32* %r155
-%r156 = lshr i288 %r152, 32
-%r157 = trunc i288 %r156 to i32
-%r159 = getelementptr i32, i32* %r1, i32 8
-store i32 %r157, i32* %r159
-br i1%r124, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r160 = load i32, i32* %r4
-%r161 = zext i32 %r160 to i64
-%r163 = getelementptr i32, i32* %r4, i32 1
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i64
-%r166 = shl i64 %r165, 32
-%r167 = or i64 %r161, %r166
-%r168 = zext i64 %r167 to i96
-%r170 = getelementptr i32, i32* %r4, i32 2
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i96
-%r173 = shl i96 %r172, 64
-%r174 = or i96 %r168, %r173
-%r175 = zext i96 %r174 to i128
-%r177 = getelementptr i32, i32* %r4, i32 3
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i128
-%r180 = shl i128 %r179, 96
-%r181 = or i128 %r175, %r180
-%r182 = zext i128 %r181 to i160
-%r184 = getelementptr i32, i32* %r4, i32 4
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i160
-%r187 = shl i160 %r186, 128
-%r188 = or i160 %r182, %r187
-%r189 = zext i160 %r188 to i192
-%r191 = getelementptr i32, i32* %r4, i32 5
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i192
-%r194 = shl i192 %r193, 160
-%r195 = or i192 %r189, %r194
-%r196 = zext i192 %r195 to i224
-%r198 = getelementptr i32, i32* %r4, i32 6
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i224
-%r201 = shl i224 %r200, 192
-%r202 = or i224 %r196, %r201
-%r203 = zext i224 %r202 to i256
-%r205 = getelementptr i32, i32* %r4, i32 7
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i256
-%r208 = shl i256 %r207, 224
-%r209 = or i256 %r203, %r208
-%r210 = zext i256 %r209 to i288
-%r212 = getelementptr i32, i32* %r4, i32 8
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i288
-%r215 = shl i288 %r214, 256
-%r216 = or i288 %r210, %r215
-%r217 = add i288 %r122, %r216
-%r218 = trunc i288 %r217 to i32
-%r220 = getelementptr i32, i32* %r1, i32 0
-store i32 %r218, i32* %r220
-%r221 = lshr i288 %r217, 32
-%r222 = trunc i288 %r221 to i32
-%r224 = getelementptr i32, i32* %r1, i32 1
-store i32 %r222, i32* %r224
-%r225 = lshr i288 %r221, 32
-%r226 = trunc i288 %r225 to i32
-%r228 = getelementptr i32, i32* %r1, i32 2
-store i32 %r226, i32* %r228
-%r229 = lshr i288 %r225, 32
-%r230 = trunc i288 %r229 to i32
-%r232 = getelementptr i32, i32* %r1, i32 3
-store i32 %r230, i32* %r232
-%r233 = lshr i288 %r229, 32
-%r234 = trunc i288 %r233 to i32
-%r236 = getelementptr i32, i32* %r1, i32 4
-store i32 %r234, i32* %r236
-%r237 = lshr i288 %r233, 32
-%r238 = trunc i288 %r237 to i32
-%r240 = getelementptr i32, i32* %r1, i32 5
-store i32 %r238, i32* %r240
-%r241 = lshr i288 %r237, 32
-%r242 = trunc i288 %r241 to i32
-%r244 = getelementptr i32, i32* %r1, i32 6
-store i32 %r242, i32* %r244
-%r245 = lshr i288 %r241, 32
-%r246 = trunc i288 %r245 to i32
-%r248 = getelementptr i32, i32* %r1, i32 7
-store i32 %r246, i32* %r248
-%r249 = lshr i288 %r245, 32
-%r250 = trunc i288 %r249 to i32
-%r252 = getelementptr i32, i32* %r1, i32 8
-store i32 %r250, i32* %r252
-ret void
-}
-define void @mcl_fp_subNF9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = load i32, i32* %r3
-%r63 = zext i32 %r62 to i64
-%r65 = getelementptr i32, i32* %r3, i32 1
-%r66 = load i32, i32* %r65
-%r67 = zext i32 %r66 to i64
-%r68 = shl i64 %r67, 32
-%r69 = or i64 %r63, %r68
-%r70 = zext i64 %r69 to i96
-%r72 = getelementptr i32, i32* %r3, i32 2
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i96
-%r75 = shl i96 %r74, 64
-%r76 = or i96 %r70, %r75
-%r77 = zext i96 %r76 to i128
-%r79 = getelementptr i32, i32* %r3, i32 3
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i128
-%r82 = shl i128 %r81, 96
-%r83 = or i128 %r77, %r82
-%r84 = zext i128 %r83 to i160
-%r86 = getelementptr i32, i32* %r3, i32 4
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i160
-%r89 = shl i160 %r88, 128
-%r90 = or i160 %r84, %r89
-%r91 = zext i160 %r90 to i192
-%r93 = getelementptr i32, i32* %r3, i32 5
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i192
-%r96 = shl i192 %r95, 160
-%r97 = or i192 %r91, %r96
-%r98 = zext i192 %r97 to i224
-%r100 = getelementptr i32, i32* %r3, i32 6
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i224
-%r103 = shl i224 %r102, 192
-%r104 = or i224 %r98, %r103
-%r105 = zext i224 %r104 to i256
-%r107 = getelementptr i32, i32* %r3, i32 7
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i256
-%r110 = shl i256 %r109, 224
-%r111 = or i256 %r105, %r110
-%r112 = zext i256 %r111 to i288
-%r114 = getelementptr i32, i32* %r3, i32 8
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i288
-%r117 = shl i288 %r116, 256
-%r118 = or i288 %r112, %r117
-%r119 = sub i288 %r61, %r118
-%r120 = lshr i288 %r119, 287
-%r121 = trunc i288 %r120 to i1
-%r122 = load i32, i32* %r4
-%r123 = zext i32 %r122 to i64
-%r125 = getelementptr i32, i32* %r4, i32 1
-%r126 = load i32, i32* %r125
-%r127 = zext i32 %r126 to i64
-%r128 = shl i64 %r127, 32
-%r129 = or i64 %r123, %r128
-%r130 = zext i64 %r129 to i96
-%r132 = getelementptr i32, i32* %r4, i32 2
-%r133 = load i32, i32* %r132
-%r134 = zext i32 %r133 to i96
-%r135 = shl i96 %r134, 64
-%r136 = or i96 %r130, %r135
-%r137 = zext i96 %r136 to i128
-%r139 = getelementptr i32, i32* %r4, i32 3
-%r140 = load i32, i32* %r139
-%r141 = zext i32 %r140 to i128
-%r142 = shl i128 %r141, 96
-%r143 = or i128 %r137, %r142
-%r144 = zext i128 %r143 to i160
-%r146 = getelementptr i32, i32* %r4, i32 4
-%r147 = load i32, i32* %r146
-%r148 = zext i32 %r147 to i160
-%r149 = shl i160 %r148, 128
-%r150 = or i160 %r144, %r149
-%r151 = zext i160 %r150 to i192
-%r153 = getelementptr i32, i32* %r4, i32 5
-%r154 = load i32, i32* %r153
-%r155 = zext i32 %r154 to i192
-%r156 = shl i192 %r155, 160
-%r157 = or i192 %r151, %r156
-%r158 = zext i192 %r157 to i224
-%r160 = getelementptr i32, i32* %r4, i32 6
-%r161 = load i32, i32* %r160
-%r162 = zext i32 %r161 to i224
-%r163 = shl i224 %r162, 192
-%r164 = or i224 %r158, %r163
-%r165 = zext i224 %r164 to i256
-%r167 = getelementptr i32, i32* %r4, i32 7
-%r168 = load i32, i32* %r167
-%r169 = zext i32 %r168 to i256
-%r170 = shl i256 %r169, 224
-%r171 = or i256 %r165, %r170
-%r172 = zext i256 %r171 to i288
-%r174 = getelementptr i32, i32* %r4, i32 8
-%r175 = load i32, i32* %r174
-%r176 = zext i32 %r175 to i288
-%r177 = shl i288 %r176, 256
-%r178 = or i288 %r172, %r177
-%r180 = select i1 %r121, i288 %r178, i288 0
-%r181 = add i288 %r119, %r180
-%r182 = trunc i288 %r181 to i32
-%r184 = getelementptr i32, i32* %r1, i32 0
-store i32 %r182, i32* %r184
-%r185 = lshr i288 %r181, 32
-%r186 = trunc i288 %r185 to i32
-%r188 = getelementptr i32, i32* %r1, i32 1
-store i32 %r186, i32* %r188
-%r189 = lshr i288 %r185, 32
-%r190 = trunc i288 %r189 to i32
-%r192 = getelementptr i32, i32* %r1, i32 2
-store i32 %r190, i32* %r192
-%r193 = lshr i288 %r189, 32
-%r194 = trunc i288 %r193 to i32
-%r196 = getelementptr i32, i32* %r1, i32 3
-store i32 %r194, i32* %r196
-%r197 = lshr i288 %r193, 32
-%r198 = trunc i288 %r197 to i32
-%r200 = getelementptr i32, i32* %r1, i32 4
-store i32 %r198, i32* %r200
-%r201 = lshr i288 %r197, 32
-%r202 = trunc i288 %r201 to i32
-%r204 = getelementptr i32, i32* %r1, i32 5
-store i32 %r202, i32* %r204
-%r205 = lshr i288 %r201, 32
-%r206 = trunc i288 %r205 to i32
-%r208 = getelementptr i32, i32* %r1, i32 6
-store i32 %r206, i32* %r208
-%r209 = lshr i288 %r205, 32
-%r210 = trunc i288 %r209 to i32
-%r212 = getelementptr i32, i32* %r1, i32 7
-store i32 %r210, i32* %r212
-%r213 = lshr i288 %r209, 32
-%r214 = trunc i288 %r213 to i32
-%r216 = getelementptr i32, i32* %r1, i32 8
-store i32 %r214, i32* %r216
-ret void
-}
-define void @mcl_fpDbl_add9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = load i32, i32* %r3
-%r126 = zext i32 %r125 to i64
-%r128 = getelementptr i32, i32* %r3, i32 1
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i64
-%r131 = shl i64 %r130, 32
-%r132 = or i64 %r126, %r131
-%r133 = zext i64 %r132 to i96
-%r135 = getelementptr i32, i32* %r3, i32 2
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i96
-%r138 = shl i96 %r137, 64
-%r139 = or i96 %r133, %r138
-%r140 = zext i96 %r139 to i128
-%r142 = getelementptr i32, i32* %r3, i32 3
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i128
-%r145 = shl i128 %r144, 96
-%r146 = or i128 %r140, %r145
-%r147 = zext i128 %r146 to i160
-%r149 = getelementptr i32, i32* %r3, i32 4
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i160
-%r152 = shl i160 %r151, 128
-%r153 = or i160 %r147, %r152
-%r154 = zext i160 %r153 to i192
-%r156 = getelementptr i32, i32* %r3, i32 5
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i192
-%r159 = shl i192 %r158, 160
-%r160 = or i192 %r154, %r159
-%r161 = zext i192 %r160 to i224
-%r163 = getelementptr i32, i32* %r3, i32 6
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i224
-%r166 = shl i224 %r165, 192
-%r167 = or i224 %r161, %r166
-%r168 = zext i224 %r167 to i256
-%r170 = getelementptr i32, i32* %r3, i32 7
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i256
-%r173 = shl i256 %r172, 224
-%r174 = or i256 %r168, %r173
-%r175 = zext i256 %r174 to i288
-%r177 = getelementptr i32, i32* %r3, i32 8
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i288
-%r180 = shl i288 %r179, 256
-%r181 = or i288 %r175, %r180
-%r182 = zext i288 %r181 to i320
-%r184 = getelementptr i32, i32* %r3, i32 9
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i320
-%r187 = shl i320 %r186, 288
-%r188 = or i320 %r182, %r187
-%r189 = zext i320 %r188 to i352
-%r191 = getelementptr i32, i32* %r3, i32 10
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i352
-%r194 = shl i352 %r193, 320
-%r195 = or i352 %r189, %r194
-%r196 = zext i352 %r195 to i384
-%r198 = getelementptr i32, i32* %r3, i32 11
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i384
-%r201 = shl i384 %r200, 352
-%r202 = or i384 %r196, %r201
-%r203 = zext i384 %r202 to i416
-%r205 = getelementptr i32, i32* %r3, i32 12
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i416
-%r208 = shl i416 %r207, 384
-%r209 = or i416 %r203, %r208
-%r210 = zext i416 %r209 to i448
-%r212 = getelementptr i32, i32* %r3, i32 13
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i448
-%r215 = shl i448 %r214, 416
-%r216 = or i448 %r210, %r215
-%r217 = zext i448 %r216 to i480
-%r219 = getelementptr i32, i32* %r3, i32 14
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i480
-%r222 = shl i480 %r221, 448
-%r223 = or i480 %r217, %r222
-%r224 = zext i480 %r223 to i512
-%r226 = getelementptr i32, i32* %r3, i32 15
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i512
-%r229 = shl i512 %r228, 480
-%r230 = or i512 %r224, %r229
-%r231 = zext i512 %r230 to i544
-%r233 = getelementptr i32, i32* %r3, i32 16
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i544
-%r236 = shl i544 %r235, 512
-%r237 = or i544 %r231, %r236
-%r238 = zext i544 %r237 to i576
-%r240 = getelementptr i32, i32* %r3, i32 17
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i576
-%r243 = shl i576 %r242, 544
-%r244 = or i576 %r238, %r243
-%r245 = zext i576 %r124 to i608
-%r246 = zext i576 %r244 to i608
-%r247 = add i608 %r245, %r246
-%r248 = trunc i608 %r247 to i288
-%r249 = trunc i288 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 0
-store i32 %r249, i32* %r251
-%r252 = lshr i288 %r248, 32
-%r253 = trunc i288 %r252 to i32
-%r255 = getelementptr i32, i32* %r1, i32 1
-store i32 %r253, i32* %r255
-%r256 = lshr i288 %r252, 32
-%r257 = trunc i288 %r256 to i32
-%r259 = getelementptr i32, i32* %r1, i32 2
-store i32 %r257, i32* %r259
-%r260 = lshr i288 %r256, 32
-%r261 = trunc i288 %r260 to i32
-%r263 = getelementptr i32, i32* %r1, i32 3
-store i32 %r261, i32* %r263
-%r264 = lshr i288 %r260, 32
-%r265 = trunc i288 %r264 to i32
-%r267 = getelementptr i32, i32* %r1, i32 4
-store i32 %r265, i32* %r267
-%r268 = lshr i288 %r264, 32
-%r269 = trunc i288 %r268 to i32
-%r271 = getelementptr i32, i32* %r1, i32 5
-store i32 %r269, i32* %r271
-%r272 = lshr i288 %r268, 32
-%r273 = trunc i288 %r272 to i32
-%r275 = getelementptr i32, i32* %r1, i32 6
-store i32 %r273, i32* %r275
-%r276 = lshr i288 %r272, 32
-%r277 = trunc i288 %r276 to i32
-%r279 = getelementptr i32, i32* %r1, i32 7
-store i32 %r277, i32* %r279
-%r280 = lshr i288 %r276, 32
-%r281 = trunc i288 %r280 to i32
-%r283 = getelementptr i32, i32* %r1, i32 8
-store i32 %r281, i32* %r283
-%r284 = lshr i608 %r247, 288
-%r285 = trunc i608 %r284 to i320
-%r286 = load i32, i32* %r4
-%r287 = zext i32 %r286 to i64
-%r289 = getelementptr i32, i32* %r4, i32 1
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i64
-%r292 = shl i64 %r291, 32
-%r293 = or i64 %r287, %r292
-%r294 = zext i64 %r293 to i96
-%r296 = getelementptr i32, i32* %r4, i32 2
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i96
-%r299 = shl i96 %r298, 64
-%r300 = or i96 %r294, %r299
-%r301 = zext i96 %r300 to i128
-%r303 = getelementptr i32, i32* %r4, i32 3
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i128
-%r306 = shl i128 %r305, 96
-%r307 = or i128 %r301, %r306
-%r308 = zext i128 %r307 to i160
-%r310 = getelementptr i32, i32* %r4, i32 4
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i160
-%r313 = shl i160 %r312, 128
-%r314 = or i160 %r308, %r313
-%r315 = zext i160 %r314 to i192
-%r317 = getelementptr i32, i32* %r4, i32 5
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i192
-%r320 = shl i192 %r319, 160
-%r321 = or i192 %r315, %r320
-%r322 = zext i192 %r321 to i224
-%r324 = getelementptr i32, i32* %r4, i32 6
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i224
-%r327 = shl i224 %r326, 192
-%r328 = or i224 %r322, %r327
-%r329 = zext i224 %r328 to i256
-%r331 = getelementptr i32, i32* %r4, i32 7
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i256
-%r334 = shl i256 %r333, 224
-%r335 = or i256 %r329, %r334
-%r336 = zext i256 %r335 to i288
-%r338 = getelementptr i32, i32* %r4, i32 8
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i288
-%r341 = shl i288 %r340, 256
-%r342 = or i288 %r336, %r341
-%r343 = zext i288 %r342 to i320
-%r344 = sub i320 %r285, %r343
-%r345 = lshr i320 %r344, 288
-%r346 = trunc i320 %r345 to i1
-%r347 = select i1 %r346, i320 %r285, i320 %r344
-%r348 = trunc i320 %r347 to i288
-%r350 = getelementptr i32, i32* %r1, i32 9
-%r351 = trunc i288 %r348 to i32
-%r353 = getelementptr i32, i32* %r350, i32 0
-store i32 %r351, i32* %r353
-%r354 = lshr i288 %r348, 32
-%r355 = trunc i288 %r354 to i32
-%r357 = getelementptr i32, i32* %r350, i32 1
-store i32 %r355, i32* %r357
-%r358 = lshr i288 %r354, 32
-%r359 = trunc i288 %r358 to i32
-%r361 = getelementptr i32, i32* %r350, i32 2
-store i32 %r359, i32* %r361
-%r362 = lshr i288 %r358, 32
-%r363 = trunc i288 %r362 to i32
-%r365 = getelementptr i32, i32* %r350, i32 3
-store i32 %r363, i32* %r365
-%r366 = lshr i288 %r362, 32
-%r367 = trunc i288 %r366 to i32
-%r369 = getelementptr i32, i32* %r350, i32 4
-store i32 %r367, i32* %r369
-%r370 = lshr i288 %r366, 32
-%r371 = trunc i288 %r370 to i32
-%r373 = getelementptr i32, i32* %r350, i32 5
-store i32 %r371, i32* %r373
-%r374 = lshr i288 %r370, 32
-%r375 = trunc i288 %r374 to i32
-%r377 = getelementptr i32, i32* %r350, i32 6
-store i32 %r375, i32* %r377
-%r378 = lshr i288 %r374, 32
-%r379 = trunc i288 %r378 to i32
-%r381 = getelementptr i32, i32* %r350, i32 7
-store i32 %r379, i32* %r381
-%r382 = lshr i288 %r378, 32
-%r383 = trunc i288 %r382 to i32
-%r385 = getelementptr i32, i32* %r350, i32 8
-store i32 %r383, i32* %r385
-ret void
-}
-define void @mcl_fpDbl_sub9L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = load i32, i32* %r3
-%r126 = zext i32 %r125 to i64
-%r128 = getelementptr i32, i32* %r3, i32 1
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i64
-%r131 = shl i64 %r130, 32
-%r132 = or i64 %r126, %r131
-%r133 = zext i64 %r132 to i96
-%r135 = getelementptr i32, i32* %r3, i32 2
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i96
-%r138 = shl i96 %r137, 64
-%r139 = or i96 %r133, %r138
-%r140 = zext i96 %r139 to i128
-%r142 = getelementptr i32, i32* %r3, i32 3
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i128
-%r145 = shl i128 %r144, 96
-%r146 = or i128 %r140, %r145
-%r147 = zext i128 %r146 to i160
-%r149 = getelementptr i32, i32* %r3, i32 4
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i160
-%r152 = shl i160 %r151, 128
-%r153 = or i160 %r147, %r152
-%r154 = zext i160 %r153 to i192
-%r156 = getelementptr i32, i32* %r3, i32 5
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i192
-%r159 = shl i192 %r158, 160
-%r160 = or i192 %r154, %r159
-%r161 = zext i192 %r160 to i224
-%r163 = getelementptr i32, i32* %r3, i32 6
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i224
-%r166 = shl i224 %r165, 192
-%r167 = or i224 %r161, %r166
-%r168 = zext i224 %r167 to i256
-%r170 = getelementptr i32, i32* %r3, i32 7
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i256
-%r173 = shl i256 %r172, 224
-%r174 = or i256 %r168, %r173
-%r175 = zext i256 %r174 to i288
-%r177 = getelementptr i32, i32* %r3, i32 8
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i288
-%r180 = shl i288 %r179, 256
-%r181 = or i288 %r175, %r180
-%r182 = zext i288 %r181 to i320
-%r184 = getelementptr i32, i32* %r3, i32 9
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i320
-%r187 = shl i320 %r186, 288
-%r188 = or i320 %r182, %r187
-%r189 = zext i320 %r188 to i352
-%r191 = getelementptr i32, i32* %r3, i32 10
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i352
-%r194 = shl i352 %r193, 320
-%r195 = or i352 %r189, %r194
-%r196 = zext i352 %r195 to i384
-%r198 = getelementptr i32, i32* %r3, i32 11
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i384
-%r201 = shl i384 %r200, 352
-%r202 = or i384 %r196, %r201
-%r203 = zext i384 %r202 to i416
-%r205 = getelementptr i32, i32* %r3, i32 12
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i416
-%r208 = shl i416 %r207, 384
-%r209 = or i416 %r203, %r208
-%r210 = zext i416 %r209 to i448
-%r212 = getelementptr i32, i32* %r3, i32 13
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i448
-%r215 = shl i448 %r214, 416
-%r216 = or i448 %r210, %r215
-%r217 = zext i448 %r216 to i480
-%r219 = getelementptr i32, i32* %r3, i32 14
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i480
-%r222 = shl i480 %r221, 448
-%r223 = or i480 %r217, %r222
-%r224 = zext i480 %r223 to i512
-%r226 = getelementptr i32, i32* %r3, i32 15
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i512
-%r229 = shl i512 %r228, 480
-%r230 = or i512 %r224, %r229
-%r231 = zext i512 %r230 to i544
-%r233 = getelementptr i32, i32* %r3, i32 16
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i544
-%r236 = shl i544 %r235, 512
-%r237 = or i544 %r231, %r236
-%r238 = zext i544 %r237 to i576
-%r240 = getelementptr i32, i32* %r3, i32 17
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i576
-%r243 = shl i576 %r242, 544
-%r244 = or i576 %r238, %r243
-%r245 = zext i576 %r124 to i608
-%r246 = zext i576 %r244 to i608
-%r247 = sub i608 %r245, %r246
-%r248 = trunc i608 %r247 to i288
-%r249 = trunc i288 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 0
-store i32 %r249, i32* %r251
-%r252 = lshr i288 %r248, 32
-%r253 = trunc i288 %r252 to i32
-%r255 = getelementptr i32, i32* %r1, i32 1
-store i32 %r253, i32* %r255
-%r256 = lshr i288 %r252, 32
-%r257 = trunc i288 %r256 to i32
-%r259 = getelementptr i32, i32* %r1, i32 2
-store i32 %r257, i32* %r259
-%r260 = lshr i288 %r256, 32
-%r261 = trunc i288 %r260 to i32
-%r263 = getelementptr i32, i32* %r1, i32 3
-store i32 %r261, i32* %r263
-%r264 = lshr i288 %r260, 32
-%r265 = trunc i288 %r264 to i32
-%r267 = getelementptr i32, i32* %r1, i32 4
-store i32 %r265, i32* %r267
-%r268 = lshr i288 %r264, 32
-%r269 = trunc i288 %r268 to i32
-%r271 = getelementptr i32, i32* %r1, i32 5
-store i32 %r269, i32* %r271
-%r272 = lshr i288 %r268, 32
-%r273 = trunc i288 %r272 to i32
-%r275 = getelementptr i32, i32* %r1, i32 6
-store i32 %r273, i32* %r275
-%r276 = lshr i288 %r272, 32
-%r277 = trunc i288 %r276 to i32
-%r279 = getelementptr i32, i32* %r1, i32 7
-store i32 %r277, i32* %r279
-%r280 = lshr i288 %r276, 32
-%r281 = trunc i288 %r280 to i32
-%r283 = getelementptr i32, i32* %r1, i32 8
-store i32 %r281, i32* %r283
-%r284 = lshr i608 %r247, 288
-%r285 = trunc i608 %r284 to i288
-%r286 = lshr i608 %r247, 576
-%r287 = trunc i608 %r286 to i1
-%r288 = load i32, i32* %r4
-%r289 = zext i32 %r288 to i64
-%r291 = getelementptr i32, i32* %r4, i32 1
-%r292 = load i32, i32* %r291
-%r293 = zext i32 %r292 to i64
-%r294 = shl i64 %r293, 32
-%r295 = or i64 %r289, %r294
-%r296 = zext i64 %r295 to i96
-%r298 = getelementptr i32, i32* %r4, i32 2
-%r299 = load i32, i32* %r298
-%r300 = zext i32 %r299 to i96
-%r301 = shl i96 %r300, 64
-%r302 = or i96 %r296, %r301
-%r303 = zext i96 %r302 to i128
-%r305 = getelementptr i32, i32* %r4, i32 3
-%r306 = load i32, i32* %r305
-%r307 = zext i32 %r306 to i128
-%r308 = shl i128 %r307, 96
-%r309 = or i128 %r303, %r308
-%r310 = zext i128 %r309 to i160
-%r312 = getelementptr i32, i32* %r4, i32 4
-%r313 = load i32, i32* %r312
-%r314 = zext i32 %r313 to i160
-%r315 = shl i160 %r314, 128
-%r316 = or i160 %r310, %r315
-%r317 = zext i160 %r316 to i192
-%r319 = getelementptr i32, i32* %r4, i32 5
-%r320 = load i32, i32* %r319
-%r321 = zext i32 %r320 to i192
-%r322 = shl i192 %r321, 160
-%r323 = or i192 %r317, %r322
-%r324 = zext i192 %r323 to i224
-%r326 = getelementptr i32, i32* %r4, i32 6
-%r327 = load i32, i32* %r326
-%r328 = zext i32 %r327 to i224
-%r329 = shl i224 %r328, 192
-%r330 = or i224 %r324, %r329
-%r331 = zext i224 %r330 to i256
-%r333 = getelementptr i32, i32* %r4, i32 7
-%r334 = load i32, i32* %r333
-%r335 = zext i32 %r334 to i256
-%r336 = shl i256 %r335, 224
-%r337 = or i256 %r331, %r336
-%r338 = zext i256 %r337 to i288
-%r340 = getelementptr i32, i32* %r4, i32 8
-%r341 = load i32, i32* %r340
-%r342 = zext i32 %r341 to i288
-%r343 = shl i288 %r342, 256
-%r344 = or i288 %r338, %r343
-%r346 = select i1 %r287, i288 %r344, i288 0
-%r347 = add i288 %r285, %r346
-%r349 = getelementptr i32, i32* %r1, i32 9
-%r350 = trunc i288 %r347 to i32
-%r352 = getelementptr i32, i32* %r349, i32 0
-store i32 %r350, i32* %r352
-%r353 = lshr i288 %r347, 32
-%r354 = trunc i288 %r353 to i32
-%r356 = getelementptr i32, i32* %r349, i32 1
-store i32 %r354, i32* %r356
-%r357 = lshr i288 %r353, 32
-%r358 = trunc i288 %r357 to i32
-%r360 = getelementptr i32, i32* %r349, i32 2
-store i32 %r358, i32* %r360
-%r361 = lshr i288 %r357, 32
-%r362 = trunc i288 %r361 to i32
-%r364 = getelementptr i32, i32* %r349, i32 3
-store i32 %r362, i32* %r364
-%r365 = lshr i288 %r361, 32
-%r366 = trunc i288 %r365 to i32
-%r368 = getelementptr i32, i32* %r349, i32 4
-store i32 %r366, i32* %r368
-%r369 = lshr i288 %r365, 32
-%r370 = trunc i288 %r369 to i32
-%r372 = getelementptr i32, i32* %r349, i32 5
-store i32 %r370, i32* %r372
-%r373 = lshr i288 %r369, 32
-%r374 = trunc i288 %r373 to i32
-%r376 = getelementptr i32, i32* %r349, i32 6
-store i32 %r374, i32* %r376
-%r377 = lshr i288 %r373, 32
-%r378 = trunc i288 %r377 to i32
-%r380 = getelementptr i32, i32* %r349, i32 7
-store i32 %r378, i32* %r380
-%r381 = lshr i288 %r377, 32
-%r382 = trunc i288 %r381 to i32
-%r384 = getelementptr i32, i32* %r349, i32 8
-store i32 %r382, i32* %r384
-ret void
-}
-define i352 @mulPv320x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
-%r30 = trunc i64 %r29 to i32
-%r31 = call i32 @extractHigh32(i64 %r29)
-%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
-%r34 = trunc i64 %r33 to i32
-%r35 = call i32 @extractHigh32(i64 %r33)
-%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
-%r38 = trunc i64 %r37 to i32
-%r39 = call i32 @extractHigh32(i64 %r37)
-%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
-%r42 = trunc i64 %r41 to i32
-%r43 = call i32 @extractHigh32(i64 %r41)
-%r44 = zext i32 %r6 to i64
-%r45 = zext i32 %r10 to i64
-%r46 = shl i64 %r45, 32
-%r47 = or i64 %r44, %r46
-%r48 = zext i64 %r47 to i96
-%r49 = zext i32 %r14 to i96
-%r50 = shl i96 %r49, 64
-%r51 = or i96 %r48, %r50
-%r52 = zext i96 %r51 to i128
-%r53 = zext i32 %r18 to i128
-%r54 = shl i128 %r53, 96
-%r55 = or i128 %r52, %r54
-%r56 = zext i128 %r55 to i160
-%r57 = zext i32 %r22 to i160
-%r58 = shl i160 %r57, 128
-%r59 = or i160 %r56, %r58
-%r60 = zext i160 %r59 to i192
-%r61 = zext i32 %r26 to i192
-%r62 = shl i192 %r61, 160
-%r63 = or i192 %r60, %r62
-%r64 = zext i192 %r63 to i224
-%r65 = zext i32 %r30 to i224
-%r66 = shl i224 %r65, 192
-%r67 = or i224 %r64, %r66
-%r68 = zext i224 %r67 to i256
-%r69 = zext i32 %r34 to i256
-%r70 = shl i256 %r69, 224
-%r71 = or i256 %r68, %r70
-%r72 = zext i256 %r71 to i288
-%r73 = zext i32 %r38 to i288
-%r74 = shl i288 %r73, 256
-%r75 = or i288 %r72, %r74
-%r76 = zext i288 %r75 to i320
-%r77 = zext i32 %r42 to i320
-%r78 = shl i320 %r77, 288
-%r79 = or i320 %r76, %r78
-%r80 = zext i32 %r7 to i64
-%r81 = zext i32 %r11 to i64
-%r82 = shl i64 %r81, 32
-%r83 = or i64 %r80, %r82
-%r84 = zext i64 %r83 to i96
-%r85 = zext i32 %r15 to i96
-%r86 = shl i96 %r85, 64
-%r87 = or i96 %r84, %r86
-%r88 = zext i96 %r87 to i128
-%r89 = zext i32 %r19 to i128
-%r90 = shl i128 %r89, 96
-%r91 = or i128 %r88, %r90
-%r92 = zext i128 %r91 to i160
-%r93 = zext i32 %r23 to i160
-%r94 = shl i160 %r93, 128
-%r95 = or i160 %r92, %r94
-%r96 = zext i160 %r95 to i192
-%r97 = zext i32 %r27 to i192
-%r98 = shl i192 %r97, 160
-%r99 = or i192 %r96, %r98
-%r100 = zext i192 %r99 to i224
-%r101 = zext i32 %r31 to i224
-%r102 = shl i224 %r101, 192
-%r103 = or i224 %r100, %r102
-%r104 = zext i224 %r103 to i256
-%r105 = zext i32 %r35 to i256
-%r106 = shl i256 %r105, 224
-%r107 = or i256 %r104, %r106
-%r108 = zext i256 %r107 to i288
-%r109 = zext i32 %r39 to i288
-%r110 = shl i288 %r109, 256
-%r111 = or i288 %r108, %r110
-%r112 = zext i288 %r111 to i320
-%r113 = zext i32 %r43 to i320
-%r114 = shl i320 %r113, 288
-%r115 = or i320 %r112, %r114
-%r116 = zext i320 %r79 to i352
-%r117 = zext i320 %r115 to i352
-%r118 = shl i352 %r117, 32
-%r119 = add i352 %r116, %r118
-ret i352 %r119
-}
-define void @mcl_fp_mulUnitPre10L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i352 @mulPv320x32(i32* %r2, i32 %r3)
-%r5 = trunc i352 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i352 %r4, 32
-%r9 = trunc i352 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i352 %r8, 32
-%r13 = trunc i352 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i352 %r12, 32
-%r17 = trunc i352 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i352 %r16, 32
-%r21 = trunc i352 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i352 %r20, 32
-%r25 = trunc i352 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i352 %r24, 32
-%r29 = trunc i352 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i352 %r28, 32
-%r33 = trunc i352 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
-%r36 = lshr i352 %r32, 32
-%r37 = trunc i352 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 8
-store i32 %r37, i32* %r39
-%r40 = lshr i352 %r36, 32
-%r41 = trunc i352 %r40 to i32
-%r43 = getelementptr i32, i32* %r1, i32 9
-store i32 %r41, i32* %r43
-%r44 = lshr i352 %r40, 32
-%r45 = trunc i352 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 10
-store i32 %r45, i32* %r47
-ret void
-}
-define void @mcl_fpDbl_mulPre10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r2, i32 5
-%r7 = getelementptr i32, i32* %r3, i32 5
-%r9 = getelementptr i32, i32* %r1, i32 10
-call void @mcl_fpDbl_mulPre5L(i32* %r1, i32* %r2, i32* %r3)
-call void @mcl_fpDbl_mulPre5L(i32* %r9, i32* %r5, i32* %r7)
-%r10 = load i32, i32* %r5
-%r11 = zext i32 %r10 to i64
-%r13 = getelementptr i32, i32* %r5, i32 1
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i64
-%r16 = shl i64 %r15, 32
-%r17 = or i64 %r11, %r16
-%r18 = zext i64 %r17 to i96
-%r20 = getelementptr i32, i32* %r5, i32 2
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i96
-%r23 = shl i96 %r22, 64
-%r24 = or i96 %r18, %r23
-%r25 = zext i96 %r24 to i128
-%r27 = getelementptr i32, i32* %r5, i32 3
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i128
-%r30 = shl i128 %r29, 96
-%r31 = or i128 %r25, %r30
-%r32 = zext i128 %r31 to i160
-%r34 = getelementptr i32, i32* %r5, i32 4
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i160
-%r37 = shl i160 %r36, 128
-%r38 = or i160 %r32, %r37
-%r39 = zext i160 %r38 to i192
-%r40 = load i32, i32* %r2
-%r41 = zext i32 %r40 to i64
-%r43 = getelementptr i32, i32* %r2, i32 1
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i64
-%r46 = shl i64 %r45, 32
-%r47 = or i64 %r41, %r46
-%r48 = zext i64 %r47 to i96
-%r50 = getelementptr i32, i32* %r2, i32 2
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i96
-%r53 = shl i96 %r52, 64
-%r54 = or i96 %r48, %r53
-%r55 = zext i96 %r54 to i128
-%r57 = getelementptr i32, i32* %r2, i32 3
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i128
-%r60 = shl i128 %r59, 96
-%r61 = or i128 %r55, %r60
-%r62 = zext i128 %r61 to i160
-%r64 = getelementptr i32, i32* %r2, i32 4
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i160
-%r67 = shl i160 %r66, 128
-%r68 = or i160 %r62, %r67
-%r69 = zext i160 %r68 to i192
-%r70 = load i32, i32* %r7
-%r71 = zext i32 %r70 to i64
-%r73 = getelementptr i32, i32* %r7, i32 1
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i64
-%r76 = shl i64 %r75, 32
-%r77 = or i64 %r71, %r76
-%r78 = zext i64 %r77 to i96
-%r80 = getelementptr i32, i32* %r7, i32 2
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i96
-%r83 = shl i96 %r82, 64
-%r84 = or i96 %r78, %r83
-%r85 = zext i96 %r84 to i128
-%r87 = getelementptr i32, i32* %r7, i32 3
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i128
-%r90 = shl i128 %r89, 96
-%r91 = or i128 %r85, %r90
-%r92 = zext i128 %r91 to i160
-%r94 = getelementptr i32, i32* %r7, i32 4
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i160
-%r97 = shl i160 %r96, 128
-%r98 = or i160 %r92, %r97
-%r99 = zext i160 %r98 to i192
-%r100 = load i32, i32* %r3
-%r101 = zext i32 %r100 to i64
-%r103 = getelementptr i32, i32* %r3, i32 1
-%r104 = load i32, i32* %r103
-%r105 = zext i32 %r104 to i64
-%r106 = shl i64 %r105, 32
-%r107 = or i64 %r101, %r106
-%r108 = zext i64 %r107 to i96
-%r110 = getelementptr i32, i32* %r3, i32 2
-%r111 = load i32, i32* %r110
-%r112 = zext i32 %r111 to i96
-%r113 = shl i96 %r112, 64
-%r114 = or i96 %r108, %r113
-%r115 = zext i96 %r114 to i128
-%r117 = getelementptr i32, i32* %r3, i32 3
-%r118 = load i32, i32* %r117
-%r119 = zext i32 %r118 to i128
-%r120 = shl i128 %r119, 96
-%r121 = or i128 %r115, %r120
-%r122 = zext i128 %r121 to i160
-%r124 = getelementptr i32, i32* %r3, i32 4
-%r125 = load i32, i32* %r124
-%r126 = zext i32 %r125 to i160
-%r127 = shl i160 %r126, 128
-%r128 = or i160 %r122, %r127
-%r129 = zext i160 %r128 to i192
-%r130 = add i192 %r39, %r69
-%r131 = add i192 %r99, %r129
-%r133 = alloca i32, i32 10
-%r134 = trunc i192 %r130 to i160
-%r135 = trunc i192 %r131 to i160
-%r136 = lshr i192 %r130, 160
-%r137 = trunc i192 %r136 to i1
-%r138 = lshr i192 %r131, 160
-%r139 = trunc i192 %r138 to i1
-%r140 = and i1 %r137, %r139
-%r142 = select i1 %r137, i160 %r135, i160 0
-%r144 = select i1 %r139, i160 %r134, i160 0
-%r146 = alloca i32, i32 5
-%r148 = alloca i32, i32 5
-%r149 = trunc i160 %r134 to i32
-%r151 = getelementptr i32, i32* %r146, i32 0
-store i32 %r149, i32* %r151
-%r152 = lshr i160 %r134, 32
-%r153 = trunc i160 %r152 to i32
-%r155 = getelementptr i32, i32* %r146, i32 1
-store i32 %r153, i32* %r155
-%r156 = lshr i160 %r152, 32
-%r157 = trunc i160 %r156 to i32
-%r159 = getelementptr i32, i32* %r146, i32 2
-store i32 %r157, i32* %r159
-%r160 = lshr i160 %r156, 32
-%r161 = trunc i160 %r160 to i32
-%r163 = getelementptr i32, i32* %r146, i32 3
-store i32 %r161, i32* %r163
-%r164 = lshr i160 %r160, 32
-%r165 = trunc i160 %r164 to i32
-%r167 = getelementptr i32, i32* %r146, i32 4
-store i32 %r165, i32* %r167
-%r168 = trunc i160 %r135 to i32
-%r170 = getelementptr i32, i32* %r148, i32 0
-store i32 %r168, i32* %r170
-%r171 = lshr i160 %r135, 32
-%r172 = trunc i160 %r171 to i32
-%r174 = getelementptr i32, i32* %r148, i32 1
-store i32 %r172, i32* %r174
-%r175 = lshr i160 %r171, 32
-%r176 = trunc i160 %r175 to i32
-%r178 = getelementptr i32, i32* %r148, i32 2
-store i32 %r176, i32* %r178
-%r179 = lshr i160 %r175, 32
-%r180 = trunc i160 %r179 to i32
-%r182 = getelementptr i32, i32* %r148, i32 3
-store i32 %r180, i32* %r182
-%r183 = lshr i160 %r179, 32
-%r184 = trunc i160 %r183 to i32
-%r186 = getelementptr i32, i32* %r148, i32 4
-store i32 %r184, i32* %r186
-call void @mcl_fpDbl_mulPre5L(i32* %r133, i32* %r146, i32* %r148)
-%r187 = load i32, i32* %r133
-%r188 = zext i32 %r187 to i64
-%r190 = getelementptr i32, i32* %r133, i32 1
-%r191 = load i32, i32* %r190
-%r192 = zext i32 %r191 to i64
-%r193 = shl i64 %r192, 32
-%r194 = or i64 %r188, %r193
-%r195 = zext i64 %r194 to i96
-%r197 = getelementptr i32, i32* %r133, i32 2
-%r198 = load i32, i32* %r197
-%r199 = zext i32 %r198 to i96
-%r200 = shl i96 %r199, 64
-%r201 = or i96 %r195, %r200
-%r202 = zext i96 %r201 to i128
-%r204 = getelementptr i32, i32* %r133, i32 3
-%r205 = load i32, i32* %r204
-%r206 = zext i32 %r205 to i128
-%r207 = shl i128 %r206, 96
-%r208 = or i128 %r202, %r207
-%r209 = zext i128 %r208 to i160
-%r211 = getelementptr i32, i32* %r133, i32 4
-%r212 = load i32, i32* %r211
-%r213 = zext i32 %r212 to i160
-%r214 = shl i160 %r213, 128
-%r215 = or i160 %r209, %r214
-%r216 = zext i160 %r215 to i192
-%r218 = getelementptr i32, i32* %r133, i32 5
-%r219 = load i32, i32* %r218
-%r220 = zext i32 %r219 to i192
-%r221 = shl i192 %r220, 160
-%r222 = or i192 %r216, %r221
-%r223 = zext i192 %r222 to i224
-%r225 = getelementptr i32, i32* %r133, i32 6
-%r226 = load i32, i32* %r225
-%r227 = zext i32 %r226 to i224
-%r228 = shl i224 %r227, 192
-%r229 = or i224 %r223, %r228
-%r230 = zext i224 %r229 to i256
-%r232 = getelementptr i32, i32* %r133, i32 7
-%r233 = load i32, i32* %r232
-%r234 = zext i32 %r233 to i256
-%r235 = shl i256 %r234, 224
-%r236 = or i256 %r230, %r235
-%r237 = zext i256 %r236 to i288
-%r239 = getelementptr i32, i32* %r133, i32 8
-%r240 = load i32, i32* %r239
-%r241 = zext i32 %r240 to i288
-%r242 = shl i288 %r241, 256
-%r243 = or i288 %r237, %r242
-%r244 = zext i288 %r243 to i320
-%r246 = getelementptr i32, i32* %r133, i32 9
-%r247 = load i32, i32* %r246
-%r248 = zext i32 %r247 to i320
-%r249 = shl i320 %r248, 288
-%r250 = or i320 %r244, %r249
-%r251 = zext i320 %r250 to i352
-%r252 = zext i1 %r140 to i352
-%r253 = shl i352 %r252, 320
-%r254 = or i352 %r251, %r253
-%r255 = zext i160 %r142 to i352
-%r256 = zext i160 %r144 to i352
-%r257 = shl i352 %r255, 160
-%r258 = shl i352 %r256, 160
-%r259 = add i352 %r254, %r257
-%r260 = add i352 %r259, %r258
-%r261 = load i32, i32* %r1
-%r262 = zext i32 %r261 to i64
-%r264 = getelementptr i32, i32* %r1, i32 1
-%r265 = load i32, i32* %r264
-%r266 = zext i32 %r265 to i64
-%r267 = shl i64 %r266, 32
-%r268 = or i64 %r262, %r267
-%r269 = zext i64 %r268 to i96
-%r271 = getelementptr i32, i32* %r1, i32 2
-%r272 = load i32, i32* %r271
-%r273 = zext i32 %r272 to i96
-%r274 = shl i96 %r273, 64
-%r275 = or i96 %r269, %r274
-%r276 = zext i96 %r275 to i128
-%r278 = getelementptr i32, i32* %r1, i32 3
-%r279 = load i32, i32* %r278
-%r280 = zext i32 %r279 to i128
-%r281 = shl i128 %r280, 96
-%r282 = or i128 %r276, %r281
-%r283 = zext i128 %r282 to i160
-%r285 = getelementptr i32, i32* %r1, i32 4
-%r286 = load i32, i32* %r285
-%r287 = zext i32 %r286 to i160
-%r288 = shl i160 %r287, 128
-%r289 = or i160 %r283, %r288
-%r290 = zext i160 %r289 to i192
-%r292 = getelementptr i32, i32* %r1, i32 5
-%r293 = load i32, i32* %r292
-%r294 = zext i32 %r293 to i192
-%r295 = shl i192 %r294, 160
-%r296 = or i192 %r290, %r295
-%r297 = zext i192 %r296 to i224
-%r299 = getelementptr i32, i32* %r1, i32 6
-%r300 = load i32, i32* %r299
-%r301 = zext i32 %r300 to i224
-%r302 = shl i224 %r301, 192
-%r303 = or i224 %r297, %r302
-%r304 = zext i224 %r303 to i256
-%r306 = getelementptr i32, i32* %r1, i32 7
-%r307 = load i32, i32* %r306
-%r308 = zext i32 %r307 to i256
-%r309 = shl i256 %r308, 224
-%r310 = or i256 %r304, %r309
-%r311 = zext i256 %r310 to i288
-%r313 = getelementptr i32, i32* %r1, i32 8
-%r314 = load i32, i32* %r313
-%r315 = zext i32 %r314 to i288
-%r316 = shl i288 %r315, 256
-%r317 = or i288 %r311, %r316
-%r318 = zext i288 %r317 to i320
-%r320 = getelementptr i32, i32* %r1, i32 9
-%r321 = load i32, i32* %r320
-%r322 = zext i32 %r321 to i320
-%r323 = shl i320 %r322, 288
-%r324 = or i320 %r318, %r323
-%r325 = zext i320 %r324 to i352
-%r326 = sub i352 %r260, %r325
-%r328 = getelementptr i32, i32* %r1, i32 10
-%r329 = load i32, i32* %r328
-%r330 = zext i32 %r329 to i64
-%r332 = getelementptr i32, i32* %r328, i32 1
-%r333 = load i32, i32* %r332
-%r334 = zext i32 %r333 to i64
-%r335 = shl i64 %r334, 32
-%r336 = or i64 %r330, %r335
-%r337 = zext i64 %r336 to i96
-%r339 = getelementptr i32, i32* %r328, i32 2
-%r340 = load i32, i32* %r339
-%r341 = zext i32 %r340 to i96
-%r342 = shl i96 %r341, 64
-%r343 = or i96 %r337, %r342
-%r344 = zext i96 %r343 to i128
-%r346 = getelementptr i32, i32* %r328, i32 3
-%r347 = load i32, i32* %r346
-%r348 = zext i32 %r347 to i128
-%r349 = shl i128 %r348, 96
-%r350 = or i128 %r344, %r349
-%r351 = zext i128 %r350 to i160
-%r353 = getelementptr i32, i32* %r328, i32 4
-%r354 = load i32, i32* %r353
-%r355 = zext i32 %r354 to i160
-%r356 = shl i160 %r355, 128
-%r357 = or i160 %r351, %r356
-%r358 = zext i160 %r357 to i192
-%r360 = getelementptr i32, i32* %r328, i32 5
-%r361 = load i32, i32* %r360
-%r362 = zext i32 %r361 to i192
-%r363 = shl i192 %r362, 160
-%r364 = or i192 %r358, %r363
-%r365 = zext i192 %r364 to i224
-%r367 = getelementptr i32, i32* %r328, i32 6
-%r368 = load i32, i32* %r367
-%r369 = zext i32 %r368 to i224
-%r370 = shl i224 %r369, 192
-%r371 = or i224 %r365, %r370
-%r372 = zext i224 %r371 to i256
-%r374 = getelementptr i32, i32* %r328, i32 7
-%r375 = load i32, i32* %r374
-%r376 = zext i32 %r375 to i256
-%r377 = shl i256 %r376, 224
-%r378 = or i256 %r372, %r377
-%r379 = zext i256 %r378 to i288
-%r381 = getelementptr i32, i32* %r328, i32 8
-%r382 = load i32, i32* %r381
-%r383 = zext i32 %r382 to i288
-%r384 = shl i288 %r383, 256
-%r385 = or i288 %r379, %r384
-%r386 = zext i288 %r385 to i320
-%r388 = getelementptr i32, i32* %r328, i32 9
-%r389 = load i32, i32* %r388
-%r390 = zext i32 %r389 to i320
-%r391 = shl i320 %r390, 288
-%r392 = or i320 %r386, %r391
-%r393 = zext i320 %r392 to i352
-%r394 = sub i352 %r326, %r393
-%r395 = zext i352 %r394 to i480
-%r397 = getelementptr i32, i32* %r1, i32 5
-%r398 = load i32, i32* %r397
-%r399 = zext i32 %r398 to i64
-%r401 = getelementptr i32, i32* %r397, i32 1
-%r402 = load i32, i32* %r401
-%r403 = zext i32 %r402 to i64
-%r404 = shl i64 %r403, 32
-%r405 = or i64 %r399, %r404
-%r406 = zext i64 %r405 to i96
-%r408 = getelementptr i32, i32* %r397, i32 2
-%r409 = load i32, i32* %r408
-%r410 = zext i32 %r409 to i96
-%r411 = shl i96 %r410, 64
-%r412 = or i96 %r406, %r411
-%r413 = zext i96 %r412 to i128
-%r415 = getelementptr i32, i32* %r397, i32 3
-%r416 = load i32, i32* %r415
-%r417 = zext i32 %r416 to i128
-%r418 = shl i128 %r417, 96
-%r419 = or i128 %r413, %r418
-%r420 = zext i128 %r419 to i160
-%r422 = getelementptr i32, i32* %r397, i32 4
-%r423 = load i32, i32* %r422
-%r424 = zext i32 %r423 to i160
-%r425 = shl i160 %r424, 128
-%r426 = or i160 %r420, %r425
-%r427 = zext i160 %r426 to i192
-%r429 = getelementptr i32, i32* %r397, i32 5
-%r430 = load i32, i32* %r429
-%r431 = zext i32 %r430 to i192
-%r432 = shl i192 %r431, 160
-%r433 = or i192 %r427, %r432
-%r434 = zext i192 %r433 to i224
-%r436 = getelementptr i32, i32* %r397, i32 6
-%r437 = load i32, i32* %r436
-%r438 = zext i32 %r437 to i224
-%r439 = shl i224 %r438, 192
-%r440 = or i224 %r434, %r439
-%r441 = zext i224 %r440 to i256
-%r443 = getelementptr i32, i32* %r397, i32 7
-%r444 = load i32, i32* %r443
-%r445 = zext i32 %r444 to i256
-%r446 = shl i256 %r445, 224
-%r447 = or i256 %r441, %r446
-%r448 = zext i256 %r447 to i288
-%r450 = getelementptr i32, i32* %r397, i32 8
-%r451 = load i32, i32* %r450
-%r452 = zext i32 %r451 to i288
-%r453 = shl i288 %r452, 256
-%r454 = or i288 %r448, %r453
-%r455 = zext i288 %r454 to i320
-%r457 = getelementptr i32, i32* %r397, i32 9
-%r458 = load i32, i32* %r457
-%r459 = zext i32 %r458 to i320
-%r460 = shl i320 %r459, 288
-%r461 = or i320 %r455, %r460
-%r462 = zext i320 %r461 to i352
-%r464 = getelementptr i32, i32* %r397, i32 10
-%r465 = load i32, i32* %r464
-%r466 = zext i32 %r465 to i352
-%r467 = shl i352 %r466, 320
-%r468 = or i352 %r462, %r467
-%r469 = zext i352 %r468 to i384
-%r471 = getelementptr i32, i32* %r397, i32 11
-%r472 = load i32, i32* %r471
-%r473 = zext i32 %r472 to i384
-%r474 = shl i384 %r473, 352
-%r475 = or i384 %r469, %r474
-%r476 = zext i384 %r475 to i416
-%r478 = getelementptr i32, i32* %r397, i32 12
-%r479 = load i32, i32* %r478
-%r480 = zext i32 %r479 to i416
-%r481 = shl i416 %r480, 384
-%r482 = or i416 %r476, %r481
-%r483 = zext i416 %r482 to i448
-%r485 = getelementptr i32, i32* %r397, i32 13
-%r486 = load i32, i32* %r485
-%r487 = zext i32 %r486 to i448
-%r488 = shl i448 %r487, 416
-%r489 = or i448 %r483, %r488
-%r490 = zext i448 %r489 to i480
-%r492 = getelementptr i32, i32* %r397, i32 14
-%r493 = load i32, i32* %r492
-%r494 = zext i32 %r493 to i480
-%r495 = shl i480 %r494, 448
-%r496 = or i480 %r490, %r495
-%r497 = add i480 %r395, %r496
-%r499 = getelementptr i32, i32* %r1, i32 5
-%r500 = trunc i480 %r497 to i32
-%r502 = getelementptr i32, i32* %r499, i32 0
-store i32 %r500, i32* %r502
-%r503 = lshr i480 %r497, 32
-%r504 = trunc i480 %r503 to i32
-%r506 = getelementptr i32, i32* %r499, i32 1
-store i32 %r504, i32* %r506
-%r507 = lshr i480 %r503, 32
-%r508 = trunc i480 %r507 to i32
-%r510 = getelementptr i32, i32* %r499, i32 2
-store i32 %r508, i32* %r510
-%r511 = lshr i480 %r507, 32
-%r512 = trunc i480 %r511 to i32
-%r514 = getelementptr i32, i32* %r499, i32 3
-store i32 %r512, i32* %r514
-%r515 = lshr i480 %r511, 32
-%r516 = trunc i480 %r515 to i32
-%r518 = getelementptr i32, i32* %r499, i32 4
-store i32 %r516, i32* %r518
-%r519 = lshr i480 %r515, 32
-%r520 = trunc i480 %r519 to i32
-%r522 = getelementptr i32, i32* %r499, i32 5
-store i32 %r520, i32* %r522
-%r523 = lshr i480 %r519, 32
-%r524 = trunc i480 %r523 to i32
-%r526 = getelementptr i32, i32* %r499, i32 6
-store i32 %r524, i32* %r526
-%r527 = lshr i480 %r523, 32
-%r528 = trunc i480 %r527 to i32
-%r530 = getelementptr i32, i32* %r499, i32 7
-store i32 %r528, i32* %r530
-%r531 = lshr i480 %r527, 32
-%r532 = trunc i480 %r531 to i32
-%r534 = getelementptr i32, i32* %r499, i32 8
-store i32 %r532, i32* %r534
-%r535 = lshr i480 %r531, 32
-%r536 = trunc i480 %r535 to i32
-%r538 = getelementptr i32, i32* %r499, i32 9
-store i32 %r536, i32* %r538
-%r539 = lshr i480 %r535, 32
-%r540 = trunc i480 %r539 to i32
-%r542 = getelementptr i32, i32* %r499, i32 10
-store i32 %r540, i32* %r542
-%r543 = lshr i480 %r539, 32
-%r544 = trunc i480 %r543 to i32
-%r546 = getelementptr i32, i32* %r499, i32 11
-store i32 %r544, i32* %r546
-%r547 = lshr i480 %r543, 32
-%r548 = trunc i480 %r547 to i32
-%r550 = getelementptr i32, i32* %r499, i32 12
-store i32 %r548, i32* %r550
-%r551 = lshr i480 %r547, 32
-%r552 = trunc i480 %r551 to i32
-%r554 = getelementptr i32, i32* %r499, i32 13
-store i32 %r552, i32* %r554
-%r555 = lshr i480 %r551, 32
-%r556 = trunc i480 %r555 to i32
-%r558 = getelementptr i32, i32* %r499, i32 14
-store i32 %r556, i32* %r558
-ret void
-}
-define void @mcl_fpDbl_sqrPre10L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r4 = getelementptr i32, i32* %r2, i32 5
-%r6 = getelementptr i32, i32* %r2, i32 5
-%r8 = getelementptr i32, i32* %r1, i32 10
-call void @mcl_fpDbl_mulPre5L(i32* %r1, i32* %r2, i32* %r2)
-call void @mcl_fpDbl_mulPre5L(i32* %r8, i32* %r4, i32* %r6)
-%r9 = load i32, i32* %r4
-%r10 = zext i32 %r9 to i64
-%r12 = getelementptr i32, i32* %r4, i32 1
-%r13 = load i32, i32* %r12
-%r14 = zext i32 %r13 to i64
-%r15 = shl i64 %r14, 32
-%r16 = or i64 %r10, %r15
-%r17 = zext i64 %r16 to i96
-%r19 = getelementptr i32, i32* %r4, i32 2
-%r20 = load i32, i32* %r19
-%r21 = zext i32 %r20 to i96
-%r22 = shl i96 %r21, 64
-%r23 = or i96 %r17, %r22
-%r24 = zext i96 %r23 to i128
-%r26 = getelementptr i32, i32* %r4, i32 3
-%r27 = load i32, i32* %r26
-%r28 = zext i32 %r27 to i128
-%r29 = shl i128 %r28, 96
-%r30 = or i128 %r24, %r29
-%r31 = zext i128 %r30 to i160
-%r33 = getelementptr i32, i32* %r4, i32 4
-%r34 = load i32, i32* %r33
-%r35 = zext i32 %r34 to i160
-%r36 = shl i160 %r35, 128
-%r37 = or i160 %r31, %r36
-%r38 = zext i160 %r37 to i192
-%r39 = load i32, i32* %r2
-%r40 = zext i32 %r39 to i64
-%r42 = getelementptr i32, i32* %r2, i32 1
-%r43 = load i32, i32* %r42
-%r44 = zext i32 %r43 to i64
-%r45 = shl i64 %r44, 32
-%r46 = or i64 %r40, %r45
-%r47 = zext i64 %r46 to i96
-%r49 = getelementptr i32, i32* %r2, i32 2
-%r50 = load i32, i32* %r49
-%r51 = zext i32 %r50 to i96
-%r52 = shl i96 %r51, 64
-%r53 = or i96 %r47, %r52
-%r54 = zext i96 %r53 to i128
-%r56 = getelementptr i32, i32* %r2, i32 3
-%r57 = load i32, i32* %r56
-%r58 = zext i32 %r57 to i128
-%r59 = shl i128 %r58, 96
-%r60 = or i128 %r54, %r59
-%r61 = zext i128 %r60 to i160
-%r63 = getelementptr i32, i32* %r2, i32 4
-%r64 = load i32, i32* %r63
-%r65 = zext i32 %r64 to i160
-%r66 = shl i160 %r65, 128
-%r67 = or i160 %r61, %r66
-%r68 = zext i160 %r67 to i192
-%r69 = load i32, i32* %r6
-%r70 = zext i32 %r69 to i64
-%r72 = getelementptr i32, i32* %r6, i32 1
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i64
-%r75 = shl i64 %r74, 32
-%r76 = or i64 %r70, %r75
-%r77 = zext i64 %r76 to i96
-%r79 = getelementptr i32, i32* %r6, i32 2
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i96
-%r82 = shl i96 %r81, 64
-%r83 = or i96 %r77, %r82
-%r84 = zext i96 %r83 to i128
-%r86 = getelementptr i32, i32* %r6, i32 3
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i128
-%r89 = shl i128 %r88, 96
-%r90 = or i128 %r84, %r89
-%r91 = zext i128 %r90 to i160
-%r93 = getelementptr i32, i32* %r6, i32 4
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i160
-%r96 = shl i160 %r95, 128
-%r97 = or i160 %r91, %r96
-%r98 = zext i160 %r97 to i192
-%r99 = load i32, i32* %r2
-%r100 = zext i32 %r99 to i64
-%r102 = getelementptr i32, i32* %r2, i32 1
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i64
-%r105 = shl i64 %r104, 32
-%r106 = or i64 %r100, %r105
-%r107 = zext i64 %r106 to i96
-%r109 = getelementptr i32, i32* %r2, i32 2
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i96
-%r112 = shl i96 %r111, 64
-%r113 = or i96 %r107, %r112
-%r114 = zext i96 %r113 to i128
-%r116 = getelementptr i32, i32* %r2, i32 3
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i128
-%r119 = shl i128 %r118, 96
-%r120 = or i128 %r114, %r119
-%r121 = zext i128 %r120 to i160
-%r123 = getelementptr i32, i32* %r2, i32 4
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i160
-%r126 = shl i160 %r125, 128
-%r127 = or i160 %r121, %r126
-%r128 = zext i160 %r127 to i192
-%r129 = add i192 %r38, %r68
-%r130 = add i192 %r98, %r128
-%r132 = alloca i32, i32 10
-%r133 = trunc i192 %r129 to i160
-%r134 = trunc i192 %r130 to i160
-%r135 = lshr i192 %r129, 160
-%r136 = trunc i192 %r135 to i1
-%r137 = lshr i192 %r130, 160
-%r138 = trunc i192 %r137 to i1
-%r139 = and i1 %r136, %r138
-%r141 = select i1 %r136, i160 %r134, i160 0
-%r143 = select i1 %r138, i160 %r133, i160 0
-%r145 = alloca i32, i32 5
-%r147 = alloca i32, i32 5
-%r148 = trunc i160 %r133 to i32
-%r150 = getelementptr i32, i32* %r145, i32 0
-store i32 %r148, i32* %r150
-%r151 = lshr i160 %r133, 32
-%r152 = trunc i160 %r151 to i32
-%r154 = getelementptr i32, i32* %r145, i32 1
-store i32 %r152, i32* %r154
-%r155 = lshr i160 %r151, 32
-%r156 = trunc i160 %r155 to i32
-%r158 = getelementptr i32, i32* %r145, i32 2
-store i32 %r156, i32* %r158
-%r159 = lshr i160 %r155, 32
-%r160 = trunc i160 %r159 to i32
-%r162 = getelementptr i32, i32* %r145, i32 3
-store i32 %r160, i32* %r162
-%r163 = lshr i160 %r159, 32
-%r164 = trunc i160 %r163 to i32
-%r166 = getelementptr i32, i32* %r145, i32 4
-store i32 %r164, i32* %r166
-%r167 = trunc i160 %r134 to i32
-%r169 = getelementptr i32, i32* %r147, i32 0
-store i32 %r167, i32* %r169
-%r170 = lshr i160 %r134, 32
-%r171 = trunc i160 %r170 to i32
-%r173 = getelementptr i32, i32* %r147, i32 1
-store i32 %r171, i32* %r173
-%r174 = lshr i160 %r170, 32
-%r175 = trunc i160 %r174 to i32
-%r177 = getelementptr i32, i32* %r147, i32 2
-store i32 %r175, i32* %r177
-%r178 = lshr i160 %r174, 32
-%r179 = trunc i160 %r178 to i32
-%r181 = getelementptr i32, i32* %r147, i32 3
-store i32 %r179, i32* %r181
-%r182 = lshr i160 %r178, 32
-%r183 = trunc i160 %r182 to i32
-%r185 = getelementptr i32, i32* %r147, i32 4
-store i32 %r183, i32* %r185
-call void @mcl_fpDbl_mulPre5L(i32* %r132, i32* %r145, i32* %r147)
-%r186 = load i32, i32* %r132
-%r187 = zext i32 %r186 to i64
-%r189 = getelementptr i32, i32* %r132, i32 1
-%r190 = load i32, i32* %r189
-%r191 = zext i32 %r190 to i64
-%r192 = shl i64 %r191, 32
-%r193 = or i64 %r187, %r192
-%r194 = zext i64 %r193 to i96
-%r196 = getelementptr i32, i32* %r132, i32 2
-%r197 = load i32, i32* %r196
-%r198 = zext i32 %r197 to i96
-%r199 = shl i96 %r198, 64
-%r200 = or i96 %r194, %r199
-%r201 = zext i96 %r200 to i128
-%r203 = getelementptr i32, i32* %r132, i32 3
-%r204 = load i32, i32* %r203
-%r205 = zext i32 %r204 to i128
-%r206 = shl i128 %r205, 96
-%r207 = or i128 %r201, %r206
-%r208 = zext i128 %r207 to i160
-%r210 = getelementptr i32, i32* %r132, i32 4
-%r211 = load i32, i32* %r210
-%r212 = zext i32 %r211 to i160
-%r213 = shl i160 %r212, 128
-%r214 = or i160 %r208, %r213
-%r215 = zext i160 %r214 to i192
-%r217 = getelementptr i32, i32* %r132, i32 5
-%r218 = load i32, i32* %r217
-%r219 = zext i32 %r218 to i192
-%r220 = shl i192 %r219, 160
-%r221 = or i192 %r215, %r220
-%r222 = zext i192 %r221 to i224
-%r224 = getelementptr i32, i32* %r132, i32 6
-%r225 = load i32, i32* %r224
-%r226 = zext i32 %r225 to i224
-%r227 = shl i224 %r226, 192
-%r228 = or i224 %r222, %r227
-%r229 = zext i224 %r228 to i256
-%r231 = getelementptr i32, i32* %r132, i32 7
-%r232 = load i32, i32* %r231
-%r233 = zext i32 %r232 to i256
-%r234 = shl i256 %r233, 224
-%r235 = or i256 %r229, %r234
-%r236 = zext i256 %r235 to i288
-%r238 = getelementptr i32, i32* %r132, i32 8
-%r239 = load i32, i32* %r238
-%r240 = zext i32 %r239 to i288
-%r241 = shl i288 %r240, 256
-%r242 = or i288 %r236, %r241
-%r243 = zext i288 %r242 to i320
-%r245 = getelementptr i32, i32* %r132, i32 9
-%r246 = load i32, i32* %r245
-%r247 = zext i32 %r246 to i320
-%r248 = shl i320 %r247, 288
-%r249 = or i320 %r243, %r248
-%r250 = zext i320 %r249 to i352
-%r251 = zext i1 %r139 to i352
-%r252 = shl i352 %r251, 320
-%r253 = or i352 %r250, %r252
-%r254 = zext i160 %r141 to i352
-%r255 = zext i160 %r143 to i352
-%r256 = shl i352 %r254, 160
-%r257 = shl i352 %r255, 160
-%r258 = add i352 %r253, %r256
-%r259 = add i352 %r258, %r257
-%r260 = load i32, i32* %r1
-%r261 = zext i32 %r260 to i64
-%r263 = getelementptr i32, i32* %r1, i32 1
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i64
-%r266 = shl i64 %r265, 32
-%r267 = or i64 %r261, %r266
-%r268 = zext i64 %r267 to i96
-%r270 = getelementptr i32, i32* %r1, i32 2
-%r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i96
-%r273 = shl i96 %r272, 64
-%r274 = or i96 %r268, %r273
-%r275 = zext i96 %r274 to i128
-%r277 = getelementptr i32, i32* %r1, i32 3
-%r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i128
-%r280 = shl i128 %r279, 96
-%r281 = or i128 %r275, %r280
-%r282 = zext i128 %r281 to i160
-%r284 = getelementptr i32, i32* %r1, i32 4
-%r285 = load i32, i32* %r284
-%r286 = zext i32 %r285 to i160
-%r287 = shl i160 %r286, 128
-%r288 = or i160 %r282, %r287
-%r289 = zext i160 %r288 to i192
-%r291 = getelementptr i32, i32* %r1, i32 5
-%r292 = load i32, i32* %r291
-%r293 = zext i32 %r292 to i192
-%r294 = shl i192 %r293, 160
-%r295 = or i192 %r289, %r294
-%r296 = zext i192 %r295 to i224
-%r298 = getelementptr i32, i32* %r1, i32 6
-%r299 = load i32, i32* %r298
-%r300 = zext i32 %r299 to i224
-%r301 = shl i224 %r300, 192
-%r302 = or i224 %r296, %r301
-%r303 = zext i224 %r302 to i256
-%r305 = getelementptr i32, i32* %r1, i32 7
-%r306 = load i32, i32* %r305
-%r307 = zext i32 %r306 to i256
-%r308 = shl i256 %r307, 224
-%r309 = or i256 %r303, %r308
-%r310 = zext i256 %r309 to i288
-%r312 = getelementptr i32, i32* %r1, i32 8
-%r313 = load i32, i32* %r312
-%r314 = zext i32 %r313 to i288
-%r315 = shl i288 %r314, 256
-%r316 = or i288 %r310, %r315
-%r317 = zext i288 %r316 to i320
-%r319 = getelementptr i32, i32* %r1, i32 9
-%r320 = load i32, i32* %r319
-%r321 = zext i32 %r320 to i320
-%r322 = shl i320 %r321, 288
-%r323 = or i320 %r317, %r322
-%r324 = zext i320 %r323 to i352
-%r325 = sub i352 %r259, %r324
-%r327 = getelementptr i32, i32* %r1, i32 10
-%r328 = load i32, i32* %r327
-%r329 = zext i32 %r328 to i64
-%r331 = getelementptr i32, i32* %r327, i32 1
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i64
-%r334 = shl i64 %r333, 32
-%r335 = or i64 %r329, %r334
-%r336 = zext i64 %r335 to i96
-%r338 = getelementptr i32, i32* %r327, i32 2
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i96
-%r341 = shl i96 %r340, 64
-%r342 = or i96 %r336, %r341
-%r343 = zext i96 %r342 to i128
-%r345 = getelementptr i32, i32* %r327, i32 3
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i128
-%r348 = shl i128 %r347, 96
-%r349 = or i128 %r343, %r348
-%r350 = zext i128 %r349 to i160
-%r352 = getelementptr i32, i32* %r327, i32 4
-%r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i160
-%r355 = shl i160 %r354, 128
-%r356 = or i160 %r350, %r355
-%r357 = zext i160 %r356 to i192
-%r359 = getelementptr i32, i32* %r327, i32 5
-%r360 = load i32, i32* %r359
-%r361 = zext i32 %r360 to i192
-%r362 = shl i192 %r361, 160
-%r363 = or i192 %r357, %r362
-%r364 = zext i192 %r363 to i224
-%r366 = getelementptr i32, i32* %r327, i32 6
-%r367 = load i32, i32* %r366
-%r368 = zext i32 %r367 to i224
-%r369 = shl i224 %r368, 192
-%r370 = or i224 %r364, %r369
-%r371 = zext i224 %r370 to i256
-%r373 = getelementptr i32, i32* %r327, i32 7
-%r374 = load i32, i32* %r373
-%r375 = zext i32 %r374 to i256
-%r376 = shl i256 %r375, 224
-%r377 = or i256 %r371, %r376
-%r378 = zext i256 %r377 to i288
-%r380 = getelementptr i32, i32* %r327, i32 8
-%r381 = load i32, i32* %r380
-%r382 = zext i32 %r381 to i288
-%r383 = shl i288 %r382, 256
-%r384 = or i288 %r378, %r383
-%r385 = zext i288 %r384 to i320
-%r387 = getelementptr i32, i32* %r327, i32 9
-%r388 = load i32, i32* %r387
-%r389 = zext i32 %r388 to i320
-%r390 = shl i320 %r389, 288
-%r391 = or i320 %r385, %r390
-%r392 = zext i320 %r391 to i352
-%r393 = sub i352 %r325, %r392
-%r394 = zext i352 %r393 to i480
-%r396 = getelementptr i32, i32* %r1, i32 5
-%r397 = load i32, i32* %r396
-%r398 = zext i32 %r397 to i64
-%r400 = getelementptr i32, i32* %r396, i32 1
-%r401 = load i32, i32* %r400
-%r402 = zext i32 %r401 to i64
-%r403 = shl i64 %r402, 32
-%r404 = or i64 %r398, %r403
-%r405 = zext i64 %r404 to i96
-%r407 = getelementptr i32, i32* %r396, i32 2
-%r408 = load i32, i32* %r407
-%r409 = zext i32 %r408 to i96
-%r410 = shl i96 %r409, 64
-%r411 = or i96 %r405, %r410
-%r412 = zext i96 %r411 to i128
-%r414 = getelementptr i32, i32* %r396, i32 3
-%r415 = load i32, i32* %r414
-%r416 = zext i32 %r415 to i128
-%r417 = shl i128 %r416, 96
-%r418 = or i128 %r412, %r417
-%r419 = zext i128 %r418 to i160
-%r421 = getelementptr i32, i32* %r396, i32 4
-%r422 = load i32, i32* %r421
-%r423 = zext i32 %r422 to i160
-%r424 = shl i160 %r423, 128
-%r425 = or i160 %r419, %r424
-%r426 = zext i160 %r425 to i192
-%r428 = getelementptr i32, i32* %r396, i32 5
-%r429 = load i32, i32* %r428
-%r430 = zext i32 %r429 to i192
-%r431 = shl i192 %r430, 160
-%r432 = or i192 %r426, %r431
-%r433 = zext i192 %r432 to i224
-%r435 = getelementptr i32, i32* %r396, i32 6
-%r436 = load i32, i32* %r435
-%r437 = zext i32 %r436 to i224
-%r438 = shl i224 %r437, 192
-%r439 = or i224 %r433, %r438
-%r440 = zext i224 %r439 to i256
-%r442 = getelementptr i32, i32* %r396, i32 7
-%r443 = load i32, i32* %r442
-%r444 = zext i32 %r443 to i256
-%r445 = shl i256 %r444, 224
-%r446 = or i256 %r440, %r445
-%r447 = zext i256 %r446 to i288
-%r449 = getelementptr i32, i32* %r396, i32 8
-%r450 = load i32, i32* %r449
-%r451 = zext i32 %r450 to i288
-%r452 = shl i288 %r451, 256
-%r453 = or i288 %r447, %r452
-%r454 = zext i288 %r453 to i320
-%r456 = getelementptr i32, i32* %r396, i32 9
-%r457 = load i32, i32* %r456
-%r458 = zext i32 %r457 to i320
-%r459 = shl i320 %r458, 288
-%r460 = or i320 %r454, %r459
-%r461 = zext i320 %r460 to i352
-%r463 = getelementptr i32, i32* %r396, i32 10
-%r464 = load i32, i32* %r463
-%r465 = zext i32 %r464 to i352
-%r466 = shl i352 %r465, 320
-%r467 = or i352 %r461, %r466
-%r468 = zext i352 %r467 to i384
-%r470 = getelementptr i32, i32* %r396, i32 11
-%r471 = load i32, i32* %r470
-%r472 = zext i32 %r471 to i384
-%r473 = shl i384 %r472, 352
-%r474 = or i384 %r468, %r473
-%r475 = zext i384 %r474 to i416
-%r477 = getelementptr i32, i32* %r396, i32 12
-%r478 = load i32, i32* %r477
-%r479 = zext i32 %r478 to i416
-%r480 = shl i416 %r479, 384
-%r481 = or i416 %r475, %r480
-%r482 = zext i416 %r481 to i448
-%r484 = getelementptr i32, i32* %r396, i32 13
-%r485 = load i32, i32* %r484
-%r486 = zext i32 %r485 to i448
-%r487 = shl i448 %r486, 416
-%r488 = or i448 %r482, %r487
-%r489 = zext i448 %r488 to i480
-%r491 = getelementptr i32, i32* %r396, i32 14
-%r492 = load i32, i32* %r491
-%r493 = zext i32 %r492 to i480
-%r494 = shl i480 %r493, 448
-%r495 = or i480 %r489, %r494
-%r496 = add i480 %r394, %r495
-%r498 = getelementptr i32, i32* %r1, i32 5
-%r499 = trunc i480 %r496 to i32
-%r501 = getelementptr i32, i32* %r498, i32 0
-store i32 %r499, i32* %r501
-%r502 = lshr i480 %r496, 32
-%r503 = trunc i480 %r502 to i32
-%r505 = getelementptr i32, i32* %r498, i32 1
-store i32 %r503, i32* %r505
-%r506 = lshr i480 %r502, 32
-%r507 = trunc i480 %r506 to i32
-%r509 = getelementptr i32, i32* %r498, i32 2
-store i32 %r507, i32* %r509
-%r510 = lshr i480 %r506, 32
-%r511 = trunc i480 %r510 to i32
-%r513 = getelementptr i32, i32* %r498, i32 3
-store i32 %r511, i32* %r513
-%r514 = lshr i480 %r510, 32
-%r515 = trunc i480 %r514 to i32
-%r517 = getelementptr i32, i32* %r498, i32 4
-store i32 %r515, i32* %r517
-%r518 = lshr i480 %r514, 32
-%r519 = trunc i480 %r518 to i32
-%r521 = getelementptr i32, i32* %r498, i32 5
-store i32 %r519, i32* %r521
-%r522 = lshr i480 %r518, 32
-%r523 = trunc i480 %r522 to i32
-%r525 = getelementptr i32, i32* %r498, i32 6
-store i32 %r523, i32* %r525
-%r526 = lshr i480 %r522, 32
-%r527 = trunc i480 %r526 to i32
-%r529 = getelementptr i32, i32* %r498, i32 7
-store i32 %r527, i32* %r529
-%r530 = lshr i480 %r526, 32
-%r531 = trunc i480 %r530 to i32
-%r533 = getelementptr i32, i32* %r498, i32 8
-store i32 %r531, i32* %r533
-%r534 = lshr i480 %r530, 32
-%r535 = trunc i480 %r534 to i32
-%r537 = getelementptr i32, i32* %r498, i32 9
-store i32 %r535, i32* %r537
-%r538 = lshr i480 %r534, 32
-%r539 = trunc i480 %r538 to i32
-%r541 = getelementptr i32, i32* %r498, i32 10
-store i32 %r539, i32* %r541
-%r542 = lshr i480 %r538, 32
-%r543 = trunc i480 %r542 to i32
-%r545 = getelementptr i32, i32* %r498, i32 11
-store i32 %r543, i32* %r545
-%r546 = lshr i480 %r542, 32
-%r547 = trunc i480 %r546 to i32
-%r549 = getelementptr i32, i32* %r498, i32 12
-store i32 %r547, i32* %r549
-%r550 = lshr i480 %r546, 32
-%r551 = trunc i480 %r550 to i32
-%r553 = getelementptr i32, i32* %r498, i32 13
-store i32 %r551, i32* %r553
-%r554 = lshr i480 %r550, 32
-%r555 = trunc i480 %r554 to i32
-%r557 = getelementptr i32, i32* %r498, i32 14
-store i32 %r555, i32* %r557
-ret void
-}
-define void @mcl_fp_mont10L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i352 @mulPv320x32(i32* %r2, i32 %r10)
-%r12 = zext i352 %r11 to i384
-%r13 = trunc i352 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i352 @mulPv320x32(i32* %r4, i32 %r14)
-%r16 = zext i352 %r15 to i384
-%r17 = add i384 %r12, %r16
-%r18 = lshr i384 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i352 @mulPv320x32(i32* %r2, i32 %r21)
-%r23 = zext i352 %r22 to i384
-%r24 = add i384 %r18, %r23
-%r25 = trunc i384 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i352 @mulPv320x32(i32* %r4, i32 %r26)
-%r28 = zext i352 %r27 to i384
-%r29 = add i384 %r24, %r28
-%r30 = lshr i384 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i352 @mulPv320x32(i32* %r2, i32 %r33)
-%r35 = zext i352 %r34 to i384
-%r36 = add i384 %r30, %r35
-%r37 = trunc i384 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i352 @mulPv320x32(i32* %r4, i32 %r38)
-%r40 = zext i352 %r39 to i384
-%r41 = add i384 %r36, %r40
-%r42 = lshr i384 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i352 @mulPv320x32(i32* %r2, i32 %r45)
-%r47 = zext i352 %r46 to i384
-%r48 = add i384 %r42, %r47
-%r49 = trunc i384 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i352 @mulPv320x32(i32* %r4, i32 %r50)
-%r52 = zext i352 %r51 to i384
-%r53 = add i384 %r48, %r52
-%r54 = lshr i384 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i352 @mulPv320x32(i32* %r2, i32 %r57)
-%r59 = zext i352 %r58 to i384
-%r60 = add i384 %r54, %r59
-%r61 = trunc i384 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i352 @mulPv320x32(i32* %r4, i32 %r62)
-%r64 = zext i352 %r63 to i384
-%r65 = add i384 %r60, %r64
-%r66 = lshr i384 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i352 @mulPv320x32(i32* %r2, i32 %r69)
-%r71 = zext i352 %r70 to i384
-%r72 = add i384 %r66, %r71
-%r73 = trunc i384 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i352 @mulPv320x32(i32* %r4, i32 %r74)
-%r76 = zext i352 %r75 to i384
-%r77 = add i384 %r72, %r76
-%r78 = lshr i384 %r77, 32
-%r80 = getelementptr i32, i32* %r3, i32 6
-%r81 = load i32, i32* %r80
-%r82 = call i352 @mulPv320x32(i32* %r2, i32 %r81)
-%r83 = zext i352 %r82 to i384
-%r84 = add i384 %r78, %r83
-%r85 = trunc i384 %r84 to i32
-%r86 = mul i32 %r85, %r7
-%r87 = call i352 @mulPv320x32(i32* %r4, i32 %r86)
-%r88 = zext i352 %r87 to i384
-%r89 = add i384 %r84, %r88
-%r90 = lshr i384 %r89, 32
-%r92 = getelementptr i32, i32* %r3, i32 7
-%r93 = load i32, i32* %r92
-%r94 = call i352 @mulPv320x32(i32* %r2, i32 %r93)
-%r95 = zext i352 %r94 to i384
-%r96 = add i384 %r90, %r95
-%r97 = trunc i384 %r96 to i32
-%r98 = mul i32 %r97, %r7
-%r99 = call i352 @mulPv320x32(i32* %r4, i32 %r98)
-%r100 = zext i352 %r99 to i384
-%r101 = add i384 %r96, %r100
-%r102 = lshr i384 %r101, 32
-%r104 = getelementptr i32, i32* %r3, i32 8
-%r105 = load i32, i32* %r104
-%r106 = call i352 @mulPv320x32(i32* %r2, i32 %r105)
-%r107 = zext i352 %r106 to i384
-%r108 = add i384 %r102, %r107
-%r109 = trunc i384 %r108 to i32
-%r110 = mul i32 %r109, %r7
-%r111 = call i352 @mulPv320x32(i32* %r4, i32 %r110)
-%r112 = zext i352 %r111 to i384
-%r113 = add i384 %r108, %r112
-%r114 = lshr i384 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 9
-%r117 = load i32, i32* %r116
-%r118 = call i352 @mulPv320x32(i32* %r2, i32 %r117)
-%r119 = zext i352 %r118 to i384
-%r120 = add i384 %r114, %r119
-%r121 = trunc i384 %r120 to i32
-%r122 = mul i32 %r121, %r7
-%r123 = call i352 @mulPv320x32(i32* %r4, i32 %r122)
-%r124 = zext i352 %r123 to i384
-%r125 = add i384 %r120, %r124
-%r126 = lshr i384 %r125, 32
-%r127 = trunc i384 %r126 to i352
-%r128 = load i32, i32* %r4
-%r129 = zext i32 %r128 to i64
-%r131 = getelementptr i32, i32* %r4, i32 1
-%r132 = load i32, i32* %r131
-%r133 = zext i32 %r132 to i64
-%r134 = shl i64 %r133, 32
-%r135 = or i64 %r129, %r134
-%r136 = zext i64 %r135 to i96
-%r138 = getelementptr i32, i32* %r4, i32 2
-%r139 = load i32, i32* %r138
-%r140 = zext i32 %r139 to i96
-%r141 = shl i96 %r140, 64
-%r142 = or i96 %r136, %r141
-%r143 = zext i96 %r142 to i128
-%r145 = getelementptr i32, i32* %r4, i32 3
-%r146 = load i32, i32* %r145
-%r147 = zext i32 %r146 to i128
-%r148 = shl i128 %r147, 96
-%r149 = or i128 %r143, %r148
-%r150 = zext i128 %r149 to i160
-%r152 = getelementptr i32, i32* %r4, i32 4
-%r153 = load i32, i32* %r152
-%r154 = zext i32 %r153 to i160
-%r155 = shl i160 %r154, 128
-%r156 = or i160 %r150, %r155
-%r157 = zext i160 %r156 to i192
-%r159 = getelementptr i32, i32* %r4, i32 5
-%r160 = load i32, i32* %r159
-%r161 = zext i32 %r160 to i192
-%r162 = shl i192 %r161, 160
-%r163 = or i192 %r157, %r162
-%r164 = zext i192 %r163 to i224
-%r166 = getelementptr i32, i32* %r4, i32 6
-%r167 = load i32, i32* %r166
-%r168 = zext i32 %r167 to i224
-%r169 = shl i224 %r168, 192
-%r170 = or i224 %r164, %r169
-%r171 = zext i224 %r170 to i256
-%r173 = getelementptr i32, i32* %r4, i32 7
-%r174 = load i32, i32* %r173
-%r175 = zext i32 %r174 to i256
-%r176 = shl i256 %r175, 224
-%r177 = or i256 %r171, %r176
-%r178 = zext i256 %r177 to i288
-%r180 = getelementptr i32, i32* %r4, i32 8
-%r181 = load i32, i32* %r180
-%r182 = zext i32 %r181 to i288
-%r183 = shl i288 %r182, 256
-%r184 = or i288 %r178, %r183
-%r185 = zext i288 %r184 to i320
-%r187 = getelementptr i32, i32* %r4, i32 9
-%r188 = load i32, i32* %r187
-%r189 = zext i32 %r188 to i320
-%r190 = shl i320 %r189, 288
-%r191 = or i320 %r185, %r190
-%r192 = zext i320 %r191 to i352
-%r193 = sub i352 %r127, %r192
-%r194 = lshr i352 %r193, 320
-%r195 = trunc i352 %r194 to i1
-%r196 = select i1 %r195, i352 %r127, i352 %r193
-%r197 = trunc i352 %r196 to i320
-%r198 = trunc i320 %r197 to i32
-%r200 = getelementptr i32, i32* %r1, i32 0
-store i32 %r198, i32* %r200
-%r201 = lshr i320 %r197, 32
-%r202 = trunc i320 %r201 to i32
-%r204 = getelementptr i32, i32* %r1, i32 1
-store i32 %r202, i32* %r204
-%r205 = lshr i320 %r201, 32
-%r206 = trunc i320 %r205 to i32
-%r208 = getelementptr i32, i32* %r1, i32 2
-store i32 %r206, i32* %r208
-%r209 = lshr i320 %r205, 32
-%r210 = trunc i320 %r209 to i32
-%r212 = getelementptr i32, i32* %r1, i32 3
-store i32 %r210, i32* %r212
-%r213 = lshr i320 %r209, 32
-%r214 = trunc i320 %r213 to i32
-%r216 = getelementptr i32, i32* %r1, i32 4
-store i32 %r214, i32* %r216
-%r217 = lshr i320 %r213, 32
-%r218 = trunc i320 %r217 to i32
-%r220 = getelementptr i32, i32* %r1, i32 5
-store i32 %r218, i32* %r220
-%r221 = lshr i320 %r217, 32
-%r222 = trunc i320 %r221 to i32
-%r224 = getelementptr i32, i32* %r1, i32 6
-store i32 %r222, i32* %r224
-%r225 = lshr i320 %r221, 32
-%r226 = trunc i320 %r225 to i32
-%r228 = getelementptr i32, i32* %r1, i32 7
-store i32 %r226, i32* %r228
-%r229 = lshr i320 %r225, 32
-%r230 = trunc i320 %r229 to i32
-%r232 = getelementptr i32, i32* %r1, i32 8
-store i32 %r230, i32* %r232
-%r233 = lshr i320 %r229, 32
-%r234 = trunc i320 %r233 to i32
-%r236 = getelementptr i32, i32* %r1, i32 9
-store i32 %r234, i32* %r236
-ret void
-}
-define void @mcl_fp_montNF10L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i352 @mulPv320x32(i32* %r2, i32 %r8)
-%r10 = trunc i352 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i352 @mulPv320x32(i32* %r4, i32 %r11)
-%r13 = add i352 %r9, %r12
-%r14 = lshr i352 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i352 @mulPv320x32(i32* %r2, i32 %r17)
-%r19 = add i352 %r14, %r18
-%r20 = trunc i352 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i352 @mulPv320x32(i32* %r4, i32 %r21)
-%r23 = add i352 %r19, %r22
-%r24 = lshr i352 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i352 @mulPv320x32(i32* %r2, i32 %r27)
-%r29 = add i352 %r24, %r28
-%r30 = trunc i352 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i352 @mulPv320x32(i32* %r4, i32 %r31)
-%r33 = add i352 %r29, %r32
-%r34 = lshr i352 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i352 @mulPv320x32(i32* %r2, i32 %r37)
-%r39 = add i352 %r34, %r38
-%r40 = trunc i352 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i352 @mulPv320x32(i32* %r4, i32 %r41)
-%r43 = add i352 %r39, %r42
-%r44 = lshr i352 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i352 @mulPv320x32(i32* %r2, i32 %r47)
-%r49 = add i352 %r44, %r48
-%r50 = trunc i352 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i352 @mulPv320x32(i32* %r4, i32 %r51)
-%r53 = add i352 %r49, %r52
-%r54 = lshr i352 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i352 @mulPv320x32(i32* %r2, i32 %r57)
-%r59 = add i352 %r54, %r58
-%r60 = trunc i352 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i352 @mulPv320x32(i32* %r4, i32 %r61)
-%r63 = add i352 %r59, %r62
-%r64 = lshr i352 %r63, 32
-%r66 = getelementptr i32, i32* %r3, i32 6
-%r67 = load i32, i32* %r66
-%r68 = call i352 @mulPv320x32(i32* %r2, i32 %r67)
-%r69 = add i352 %r64, %r68
-%r70 = trunc i352 %r69 to i32
-%r71 = mul i32 %r70, %r7
-%r72 = call i352 @mulPv320x32(i32* %r4, i32 %r71)
-%r73 = add i352 %r69, %r72
-%r74 = lshr i352 %r73, 32
-%r76 = getelementptr i32, i32* %r3, i32 7
-%r77 = load i32, i32* %r76
-%r78 = call i352 @mulPv320x32(i32* %r2, i32 %r77)
-%r79 = add i352 %r74, %r78
-%r80 = trunc i352 %r79 to i32
-%r81 = mul i32 %r80, %r7
-%r82 = call i352 @mulPv320x32(i32* %r4, i32 %r81)
-%r83 = add i352 %r79, %r82
-%r84 = lshr i352 %r83, 32
-%r86 = getelementptr i32, i32* %r3, i32 8
-%r87 = load i32, i32* %r86
-%r88 = call i352 @mulPv320x32(i32* %r2, i32 %r87)
-%r89 = add i352 %r84, %r88
-%r90 = trunc i352 %r89 to i32
-%r91 = mul i32 %r90, %r7
-%r92 = call i352 @mulPv320x32(i32* %r4, i32 %r91)
-%r93 = add i352 %r89, %r92
-%r94 = lshr i352 %r93, 32
-%r96 = getelementptr i32, i32* %r3, i32 9
-%r97 = load i32, i32* %r96
-%r98 = call i352 @mulPv320x32(i32* %r2, i32 %r97)
-%r99 = add i352 %r94, %r98
-%r100 = trunc i352 %r99 to i32
-%r101 = mul i32 %r100, %r7
-%r102 = call i352 @mulPv320x32(i32* %r4, i32 %r101)
-%r103 = add i352 %r99, %r102
-%r104 = lshr i352 %r103, 32
-%r105 = trunc i352 %r104 to i320
-%r106 = load i32, i32* %r4
-%r107 = zext i32 %r106 to i64
-%r109 = getelementptr i32, i32* %r4, i32 1
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i64
-%r112 = shl i64 %r111, 32
-%r113 = or i64 %r107, %r112
-%r114 = zext i64 %r113 to i96
-%r116 = getelementptr i32, i32* %r4, i32 2
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i96
-%r119 = shl i96 %r118, 64
-%r120 = or i96 %r114, %r119
-%r121 = zext i96 %r120 to i128
-%r123 = getelementptr i32, i32* %r4, i32 3
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i128
-%r126 = shl i128 %r125, 96
-%r127 = or i128 %r121, %r126
-%r128 = zext i128 %r127 to i160
-%r130 = getelementptr i32, i32* %r4, i32 4
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i160
-%r133 = shl i160 %r132, 128
-%r134 = or i160 %r128, %r133
-%r135 = zext i160 %r134 to i192
-%r137 = getelementptr i32, i32* %r4, i32 5
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i192
-%r140 = shl i192 %r139, 160
-%r141 = or i192 %r135, %r140
-%r142 = zext i192 %r141 to i224
-%r144 = getelementptr i32, i32* %r4, i32 6
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i224
-%r147 = shl i224 %r146, 192
-%r148 = or i224 %r142, %r147
-%r149 = zext i224 %r148 to i256
-%r151 = getelementptr i32, i32* %r4, i32 7
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i256
-%r154 = shl i256 %r153, 224
-%r155 = or i256 %r149, %r154
-%r156 = zext i256 %r155 to i288
-%r158 = getelementptr i32, i32* %r4, i32 8
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i288
-%r161 = shl i288 %r160, 256
-%r162 = or i288 %r156, %r161
-%r163 = zext i288 %r162 to i320
-%r165 = getelementptr i32, i32* %r4, i32 9
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i320
-%r168 = shl i320 %r167, 288
-%r169 = or i320 %r163, %r168
-%r170 = sub i320 %r105, %r169
-%r171 = lshr i320 %r170, 319
-%r172 = trunc i320 %r171 to i1
-%r173 = select i1 %r172, i320 %r105, i320 %r170
-%r174 = trunc i320 %r173 to i32
-%r176 = getelementptr i32, i32* %r1, i32 0
-store i32 %r174, i32* %r176
-%r177 = lshr i320 %r173, 32
-%r178 = trunc i320 %r177 to i32
-%r180 = getelementptr i32, i32* %r1, i32 1
-store i32 %r178, i32* %r180
-%r181 = lshr i320 %r177, 32
-%r182 = trunc i320 %r181 to i32
-%r184 = getelementptr i32, i32* %r1, i32 2
-store i32 %r182, i32* %r184
-%r185 = lshr i320 %r181, 32
-%r186 = trunc i320 %r185 to i32
-%r188 = getelementptr i32, i32* %r1, i32 3
-store i32 %r186, i32* %r188
-%r189 = lshr i320 %r185, 32
-%r190 = trunc i320 %r189 to i32
-%r192 = getelementptr i32, i32* %r1, i32 4
-store i32 %r190, i32* %r192
-%r193 = lshr i320 %r189, 32
-%r194 = trunc i320 %r193 to i32
-%r196 = getelementptr i32, i32* %r1, i32 5
-store i32 %r194, i32* %r196
-%r197 = lshr i320 %r193, 32
-%r198 = trunc i320 %r197 to i32
-%r200 = getelementptr i32, i32* %r1, i32 6
-store i32 %r198, i32* %r200
-%r201 = lshr i320 %r197, 32
-%r202 = trunc i320 %r201 to i32
-%r204 = getelementptr i32, i32* %r1, i32 7
-store i32 %r202, i32* %r204
-%r205 = lshr i320 %r201, 32
-%r206 = trunc i320 %r205 to i32
-%r208 = getelementptr i32, i32* %r1, i32 8
-store i32 %r206, i32* %r208
-%r209 = lshr i320 %r205, 32
-%r210 = trunc i320 %r209 to i32
-%r212 = getelementptr i32, i32* %r1, i32 9
-store i32 %r210, i32* %r212
-ret void
-}
-define void @mcl_fp_montRed10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i224
-%r45 = getelementptr i32, i32* %r3, i32 6
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i224
-%r48 = shl i224 %r47, 192
-%r49 = or i224 %r43, %r48
-%r50 = zext i224 %r49 to i256
-%r52 = getelementptr i32, i32* %r3, i32 7
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i256
-%r55 = shl i256 %r54, 224
-%r56 = or i256 %r50, %r55
-%r57 = zext i256 %r56 to i288
-%r59 = getelementptr i32, i32* %r3, i32 8
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i288
-%r62 = shl i288 %r61, 256
-%r63 = or i288 %r57, %r62
-%r64 = zext i288 %r63 to i320
-%r66 = getelementptr i32, i32* %r3, i32 9
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i320
-%r69 = shl i320 %r68, 288
-%r70 = or i320 %r64, %r69
-%r71 = load i32, i32* %r2
-%r72 = zext i32 %r71 to i64
-%r74 = getelementptr i32, i32* %r2, i32 1
-%r75 = load i32, i32* %r74
-%r76 = zext i32 %r75 to i64
-%r77 = shl i64 %r76, 32
-%r78 = or i64 %r72, %r77
-%r79 = zext i64 %r78 to i96
-%r81 = getelementptr i32, i32* %r2, i32 2
-%r82 = load i32, i32* %r81
-%r83 = zext i32 %r82 to i96
-%r84 = shl i96 %r83, 64
-%r85 = or i96 %r79, %r84
-%r86 = zext i96 %r85 to i128
-%r88 = getelementptr i32, i32* %r2, i32 3
-%r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i128
-%r91 = shl i128 %r90, 96
-%r92 = or i128 %r86, %r91
-%r93 = zext i128 %r92 to i160
-%r95 = getelementptr i32, i32* %r2, i32 4
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i160
-%r98 = shl i160 %r97, 128
-%r99 = or i160 %r93, %r98
-%r100 = zext i160 %r99 to i192
-%r102 = getelementptr i32, i32* %r2, i32 5
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i192
-%r105 = shl i192 %r104, 160
-%r106 = or i192 %r100, %r105
-%r107 = zext i192 %r106 to i224
-%r109 = getelementptr i32, i32* %r2, i32 6
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i224
-%r112 = shl i224 %r111, 192
-%r113 = or i224 %r107, %r112
-%r114 = zext i224 %r113 to i256
-%r116 = getelementptr i32, i32* %r2, i32 7
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i256
-%r119 = shl i256 %r118, 224
-%r120 = or i256 %r114, %r119
-%r121 = zext i256 %r120 to i288
-%r123 = getelementptr i32, i32* %r2, i32 8
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i288
-%r126 = shl i288 %r125, 256
-%r127 = or i288 %r121, %r126
-%r128 = zext i288 %r127 to i320
-%r130 = getelementptr i32, i32* %r2, i32 9
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i320
-%r133 = shl i320 %r132, 288
-%r134 = or i320 %r128, %r133
-%r135 = zext i320 %r134 to i352
-%r137 = getelementptr i32, i32* %r2, i32 10
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i352
-%r140 = shl i352 %r139, 320
-%r141 = or i352 %r135, %r140
-%r142 = zext i352 %r141 to i384
-%r144 = getelementptr i32, i32* %r2, i32 11
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i384
-%r147 = shl i384 %r146, 352
-%r148 = or i384 %r142, %r147
-%r149 = zext i384 %r148 to i416
-%r151 = getelementptr i32, i32* %r2, i32 12
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i416
-%r154 = shl i416 %r153, 384
-%r155 = or i416 %r149, %r154
-%r156 = zext i416 %r155 to i448
-%r158 = getelementptr i32, i32* %r2, i32 13
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i448
-%r161 = shl i448 %r160, 416
-%r162 = or i448 %r156, %r161
-%r163 = zext i448 %r162 to i480
-%r165 = getelementptr i32, i32* %r2, i32 14
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i480
-%r168 = shl i480 %r167, 448
-%r169 = or i480 %r163, %r168
-%r170 = zext i480 %r169 to i512
-%r172 = getelementptr i32, i32* %r2, i32 15
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i512
-%r175 = shl i512 %r174, 480
-%r176 = or i512 %r170, %r175
-%r177 = zext i512 %r176 to i544
-%r179 = getelementptr i32, i32* %r2, i32 16
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i544
-%r182 = shl i544 %r181, 512
-%r183 = or i544 %r177, %r182
-%r184 = zext i544 %r183 to i576
-%r186 = getelementptr i32, i32* %r2, i32 17
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i576
-%r189 = shl i576 %r188, 544
-%r190 = or i576 %r184, %r189
-%r191 = zext i576 %r190 to i608
-%r193 = getelementptr i32, i32* %r2, i32 18
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i608
-%r196 = shl i608 %r195, 576
-%r197 = or i608 %r191, %r196
-%r198 = zext i608 %r197 to i640
-%r200 = getelementptr i32, i32* %r2, i32 19
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i640
-%r203 = shl i640 %r202, 608
-%r204 = or i640 %r198, %r203
-%r205 = zext i640 %r204 to i672
-%r206 = trunc i672 %r205 to i32
-%r207 = mul i32 %r206, %r6
-%r208 = call i352 @mulPv320x32(i32* %r3, i32 %r207)
-%r209 = zext i352 %r208 to i672
-%r210 = add i672 %r205, %r209
-%r211 = lshr i672 %r210, 32
-%r212 = trunc i672 %r211 to i640
-%r213 = trunc i640 %r212 to i32
-%r214 = mul i32 %r213, %r6
-%r215 = call i352 @mulPv320x32(i32* %r3, i32 %r214)
-%r216 = zext i352 %r215 to i640
-%r217 = add i640 %r212, %r216
-%r218 = lshr i640 %r217, 32
-%r219 = trunc i640 %r218 to i608
-%r220 = trunc i608 %r219 to i32
-%r221 = mul i32 %r220, %r6
-%r222 = call i352 @mulPv320x32(i32* %r3, i32 %r221)
-%r223 = zext i352 %r222 to i608
-%r224 = add i608 %r219, %r223
-%r225 = lshr i608 %r224, 32
-%r226 = trunc i608 %r225 to i576
-%r227 = trunc i576 %r226 to i32
-%r228 = mul i32 %r227, %r6
-%r229 = call i352 @mulPv320x32(i32* %r3, i32 %r228)
-%r230 = zext i352 %r229 to i576
-%r231 = add i576 %r226, %r230
-%r232 = lshr i576 %r231, 32
-%r233 = trunc i576 %r232 to i544
-%r234 = trunc i544 %r233 to i32
-%r235 = mul i32 %r234, %r6
-%r236 = call i352 @mulPv320x32(i32* %r3, i32 %r235)
-%r237 = zext i352 %r236 to i544
-%r238 = add i544 %r233, %r237
-%r239 = lshr i544 %r238, 32
-%r240 = trunc i544 %r239 to i512
-%r241 = trunc i512 %r240 to i32
-%r242 = mul i32 %r241, %r6
-%r243 = call i352 @mulPv320x32(i32* %r3, i32 %r242)
-%r244 = zext i352 %r243 to i512
-%r245 = add i512 %r240, %r244
-%r246 = lshr i512 %r245, 32
-%r247 = trunc i512 %r246 to i480
-%r248 = trunc i480 %r247 to i32
-%r249 = mul i32 %r248, %r6
-%r250 = call i352 @mulPv320x32(i32* %r3, i32 %r249)
-%r251 = zext i352 %r250 to i480
-%r252 = add i480 %r247, %r251
-%r253 = lshr i480 %r252, 32
-%r254 = trunc i480 %r253 to i448
-%r255 = trunc i448 %r254 to i32
-%r256 = mul i32 %r255, %r6
-%r257 = call i352 @mulPv320x32(i32* %r3, i32 %r256)
-%r258 = zext i352 %r257 to i448
-%r259 = add i448 %r254, %r258
-%r260 = lshr i448 %r259, 32
-%r261 = trunc i448 %r260 to i416
-%r262 = trunc i416 %r261 to i32
-%r263 = mul i32 %r262, %r6
-%r264 = call i352 @mulPv320x32(i32* %r3, i32 %r263)
-%r265 = zext i352 %r264 to i416
-%r266 = add i416 %r261, %r265
-%r267 = lshr i416 %r266, 32
-%r268 = trunc i416 %r267 to i384
-%r269 = trunc i384 %r268 to i32
-%r270 = mul i32 %r269, %r6
-%r271 = call i352 @mulPv320x32(i32* %r3, i32 %r270)
-%r272 = zext i352 %r271 to i384
-%r273 = add i384 %r268, %r272
-%r274 = lshr i384 %r273, 32
-%r275 = trunc i384 %r274 to i352
-%r276 = zext i320 %r70 to i352
-%r277 = sub i352 %r275, %r276
-%r278 = lshr i352 %r277, 320
-%r279 = trunc i352 %r278 to i1
-%r280 = select i1 %r279, i352 %r275, i352 %r277
-%r281 = trunc i352 %r280 to i320
-%r282 = trunc i320 %r281 to i32
-%r284 = getelementptr i32, i32* %r1, i32 0
-store i32 %r282, i32* %r284
-%r285 = lshr i320 %r281, 32
-%r286 = trunc i320 %r285 to i32
-%r288 = getelementptr i32, i32* %r1, i32 1
-store i32 %r286, i32* %r288
-%r289 = lshr i320 %r285, 32
-%r290 = trunc i320 %r289 to i32
-%r292 = getelementptr i32, i32* %r1, i32 2
-store i32 %r290, i32* %r292
-%r293 = lshr i320 %r289, 32
-%r294 = trunc i320 %r293 to i32
-%r296 = getelementptr i32, i32* %r1, i32 3
-store i32 %r294, i32* %r296
-%r297 = lshr i320 %r293, 32
-%r298 = trunc i320 %r297 to i32
-%r300 = getelementptr i32, i32* %r1, i32 4
-store i32 %r298, i32* %r300
-%r301 = lshr i320 %r297, 32
-%r302 = trunc i320 %r301 to i32
-%r304 = getelementptr i32, i32* %r1, i32 5
-store i32 %r302, i32* %r304
-%r305 = lshr i320 %r301, 32
-%r306 = trunc i320 %r305 to i32
-%r308 = getelementptr i32, i32* %r1, i32 6
-store i32 %r306, i32* %r308
-%r309 = lshr i320 %r305, 32
-%r310 = trunc i320 %r309 to i32
-%r312 = getelementptr i32, i32* %r1, i32 7
-store i32 %r310, i32* %r312
-%r313 = lshr i320 %r309, 32
-%r314 = trunc i320 %r313 to i32
-%r316 = getelementptr i32, i32* %r1, i32 8
-store i32 %r314, i32* %r316
-%r317 = lshr i320 %r313, 32
-%r318 = trunc i320 %r317 to i32
-%r320 = getelementptr i32, i32* %r1, i32 9
-store i32 %r318, i32* %r320
-ret void
-}
-define i32 @mcl_fp_addPre10L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r70 = load i32, i32* %r4
-%r71 = zext i32 %r70 to i64
-%r73 = getelementptr i32, i32* %r4, i32 1
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i64
-%r76 = shl i64 %r75, 32
-%r77 = or i64 %r71, %r76
-%r78 = zext i64 %r77 to i96
-%r80 = getelementptr i32, i32* %r4, i32 2
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i96
-%r83 = shl i96 %r82, 64
-%r84 = or i96 %r78, %r83
-%r85 = zext i96 %r84 to i128
-%r87 = getelementptr i32, i32* %r4, i32 3
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i128
-%r90 = shl i128 %r89, 96
-%r91 = or i128 %r85, %r90
-%r92 = zext i128 %r91 to i160
-%r94 = getelementptr i32, i32* %r4, i32 4
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i160
-%r97 = shl i160 %r96, 128
-%r98 = or i160 %r92, %r97
-%r99 = zext i160 %r98 to i192
-%r101 = getelementptr i32, i32* %r4, i32 5
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i192
-%r104 = shl i192 %r103, 160
-%r105 = or i192 %r99, %r104
-%r106 = zext i192 %r105 to i224
-%r108 = getelementptr i32, i32* %r4, i32 6
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i224
-%r111 = shl i224 %r110, 192
-%r112 = or i224 %r106, %r111
-%r113 = zext i224 %r112 to i256
-%r115 = getelementptr i32, i32* %r4, i32 7
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i256
-%r118 = shl i256 %r117, 224
-%r119 = or i256 %r113, %r118
-%r120 = zext i256 %r119 to i288
-%r122 = getelementptr i32, i32* %r4, i32 8
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i288
-%r125 = shl i288 %r124, 256
-%r126 = or i288 %r120, %r125
-%r127 = zext i288 %r126 to i320
-%r129 = getelementptr i32, i32* %r4, i32 9
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i320
-%r132 = shl i320 %r131, 288
-%r133 = or i320 %r127, %r132
-%r134 = zext i320 %r133 to i352
-%r135 = add i352 %r69, %r134
-%r136 = trunc i352 %r135 to i320
-%r137 = trunc i320 %r136 to i32
-%r139 = getelementptr i32, i32* %r2, i32 0
-store i32 %r137, i32* %r139
-%r140 = lshr i320 %r136, 32
-%r141 = trunc i320 %r140 to i32
-%r143 = getelementptr i32, i32* %r2, i32 1
-store i32 %r141, i32* %r143
-%r144 = lshr i320 %r140, 32
-%r145 = trunc i320 %r144 to i32
-%r147 = getelementptr i32, i32* %r2, i32 2
-store i32 %r145, i32* %r147
-%r148 = lshr i320 %r144, 32
-%r149 = trunc i320 %r148 to i32
-%r151 = getelementptr i32, i32* %r2, i32 3
-store i32 %r149, i32* %r151
-%r152 = lshr i320 %r148, 32
-%r153 = trunc i320 %r152 to i32
-%r155 = getelementptr i32, i32* %r2, i32 4
-store i32 %r153, i32* %r155
-%r156 = lshr i320 %r152, 32
-%r157 = trunc i320 %r156 to i32
-%r159 = getelementptr i32, i32* %r2, i32 5
-store i32 %r157, i32* %r159
-%r160 = lshr i320 %r156, 32
-%r161 = trunc i320 %r160 to i32
-%r163 = getelementptr i32, i32* %r2, i32 6
-store i32 %r161, i32* %r163
-%r164 = lshr i320 %r160, 32
-%r165 = trunc i320 %r164 to i32
-%r167 = getelementptr i32, i32* %r2, i32 7
-store i32 %r165, i32* %r167
-%r168 = lshr i320 %r164, 32
-%r169 = trunc i320 %r168 to i32
-%r171 = getelementptr i32, i32* %r2, i32 8
-store i32 %r169, i32* %r171
-%r172 = lshr i320 %r168, 32
-%r173 = trunc i320 %r172 to i32
-%r175 = getelementptr i32, i32* %r2, i32 9
-store i32 %r173, i32* %r175
-%r176 = lshr i352 %r135, 320
-%r177 = trunc i352 %r176 to i32
-ret i32 %r177
-}
-define i32 @mcl_fp_subPre10L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r70 = load i32, i32* %r4
-%r71 = zext i32 %r70 to i64
-%r73 = getelementptr i32, i32* %r4, i32 1
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i64
-%r76 = shl i64 %r75, 32
-%r77 = or i64 %r71, %r76
-%r78 = zext i64 %r77 to i96
-%r80 = getelementptr i32, i32* %r4, i32 2
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i96
-%r83 = shl i96 %r82, 64
-%r84 = or i96 %r78, %r83
-%r85 = zext i96 %r84 to i128
-%r87 = getelementptr i32, i32* %r4, i32 3
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i128
-%r90 = shl i128 %r89, 96
-%r91 = or i128 %r85, %r90
-%r92 = zext i128 %r91 to i160
-%r94 = getelementptr i32, i32* %r4, i32 4
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i160
-%r97 = shl i160 %r96, 128
-%r98 = or i160 %r92, %r97
-%r99 = zext i160 %r98 to i192
-%r101 = getelementptr i32, i32* %r4, i32 5
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i192
-%r104 = shl i192 %r103, 160
-%r105 = or i192 %r99, %r104
-%r106 = zext i192 %r105 to i224
-%r108 = getelementptr i32, i32* %r4, i32 6
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i224
-%r111 = shl i224 %r110, 192
-%r112 = or i224 %r106, %r111
-%r113 = zext i224 %r112 to i256
-%r115 = getelementptr i32, i32* %r4, i32 7
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i256
-%r118 = shl i256 %r117, 224
-%r119 = or i256 %r113, %r118
-%r120 = zext i256 %r119 to i288
-%r122 = getelementptr i32, i32* %r4, i32 8
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i288
-%r125 = shl i288 %r124, 256
-%r126 = or i288 %r120, %r125
-%r127 = zext i288 %r126 to i320
-%r129 = getelementptr i32, i32* %r4, i32 9
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i320
-%r132 = shl i320 %r131, 288
-%r133 = or i320 %r127, %r132
-%r134 = zext i320 %r133 to i352
-%r135 = sub i352 %r69, %r134
-%r136 = trunc i352 %r135 to i320
-%r137 = trunc i320 %r136 to i32
-%r139 = getelementptr i32, i32* %r2, i32 0
-store i32 %r137, i32* %r139
-%r140 = lshr i320 %r136, 32
-%r141 = trunc i320 %r140 to i32
-%r143 = getelementptr i32, i32* %r2, i32 1
-store i32 %r141, i32* %r143
-%r144 = lshr i320 %r140, 32
-%r145 = trunc i320 %r144 to i32
-%r147 = getelementptr i32, i32* %r2, i32 2
-store i32 %r145, i32* %r147
-%r148 = lshr i320 %r144, 32
-%r149 = trunc i320 %r148 to i32
-%r151 = getelementptr i32, i32* %r2, i32 3
-store i32 %r149, i32* %r151
-%r152 = lshr i320 %r148, 32
-%r153 = trunc i320 %r152 to i32
-%r155 = getelementptr i32, i32* %r2, i32 4
-store i32 %r153, i32* %r155
-%r156 = lshr i320 %r152, 32
-%r157 = trunc i320 %r156 to i32
-%r159 = getelementptr i32, i32* %r2, i32 5
-store i32 %r157, i32* %r159
-%r160 = lshr i320 %r156, 32
-%r161 = trunc i320 %r160 to i32
-%r163 = getelementptr i32, i32* %r2, i32 6
-store i32 %r161, i32* %r163
-%r164 = lshr i320 %r160, 32
-%r165 = trunc i320 %r164 to i32
-%r167 = getelementptr i32, i32* %r2, i32 7
-store i32 %r165, i32* %r167
-%r168 = lshr i320 %r164, 32
-%r169 = trunc i320 %r168 to i32
-%r171 = getelementptr i32, i32* %r2, i32 8
-store i32 %r169, i32* %r171
-%r172 = lshr i320 %r168, 32
-%r173 = trunc i320 %r172 to i32
-%r175 = getelementptr i32, i32* %r2, i32 9
-store i32 %r173, i32* %r175
-%r176 = lshr i352 %r135, 320
-%r177 = trunc i352 %r176 to i32
-%r179 = and i32 %r177, 1
-ret i32 %r179
-}
-define void @mcl_fp_shr1_10L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = zext i160 %r31 to i192
-%r34 = getelementptr i32, i32* %r2, i32 5
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i192
-%r37 = shl i192 %r36, 160
-%r38 = or i192 %r32, %r37
-%r39 = zext i192 %r38 to i224
-%r41 = getelementptr i32, i32* %r2, i32 6
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i224
-%r44 = shl i224 %r43, 192
-%r45 = or i224 %r39, %r44
-%r46 = zext i224 %r45 to i256
-%r48 = getelementptr i32, i32* %r2, i32 7
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i256
-%r51 = shl i256 %r50, 224
-%r52 = or i256 %r46, %r51
-%r53 = zext i256 %r52 to i288
-%r55 = getelementptr i32, i32* %r2, i32 8
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i288
-%r58 = shl i288 %r57, 256
-%r59 = or i288 %r53, %r58
-%r60 = zext i288 %r59 to i320
-%r62 = getelementptr i32, i32* %r2, i32 9
-%r63 = load i32, i32* %r62
-%r64 = zext i32 %r63 to i320
-%r65 = shl i320 %r64, 288
-%r66 = or i320 %r60, %r65
-%r67 = lshr i320 %r66, 1
-%r68 = trunc i320 %r67 to i32
-%r70 = getelementptr i32, i32* %r1, i32 0
-store i32 %r68, i32* %r70
-%r71 = lshr i320 %r67, 32
-%r72 = trunc i320 %r71 to i32
-%r74 = getelementptr i32, i32* %r1, i32 1
-store i32 %r72, i32* %r74
-%r75 = lshr i320 %r71, 32
-%r76 = trunc i320 %r75 to i32
-%r78 = getelementptr i32, i32* %r1, i32 2
-store i32 %r76, i32* %r78
-%r79 = lshr i320 %r75, 32
-%r80 = trunc i320 %r79 to i32
-%r82 = getelementptr i32, i32* %r1, i32 3
-store i32 %r80, i32* %r82
-%r83 = lshr i320 %r79, 32
-%r84 = trunc i320 %r83 to i32
-%r86 = getelementptr i32, i32* %r1, i32 4
-store i32 %r84, i32* %r86
-%r87 = lshr i320 %r83, 32
-%r88 = trunc i320 %r87 to i32
-%r90 = getelementptr i32, i32* %r1, i32 5
-store i32 %r88, i32* %r90
-%r91 = lshr i320 %r87, 32
-%r92 = trunc i320 %r91 to i32
-%r94 = getelementptr i32, i32* %r1, i32 6
-store i32 %r92, i32* %r94
-%r95 = lshr i320 %r91, 32
-%r96 = trunc i320 %r95 to i32
-%r98 = getelementptr i32, i32* %r1, i32 7
-store i32 %r96, i32* %r98
-%r99 = lshr i320 %r95, 32
-%r100 = trunc i320 %r99 to i32
-%r102 = getelementptr i32, i32* %r1, i32 8
-store i32 %r100, i32* %r102
-%r103 = lshr i320 %r99, 32
-%r104 = trunc i320 %r103 to i32
-%r106 = getelementptr i32, i32* %r1, i32 9
-store i32 %r104, i32* %r106
-ret void
-}
-define void @mcl_fp_add10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = load i32, i32* %r3
-%r70 = zext i32 %r69 to i64
-%r72 = getelementptr i32, i32* %r3, i32 1
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i64
-%r75 = shl i64 %r74, 32
-%r76 = or i64 %r70, %r75
-%r77 = zext i64 %r76 to i96
-%r79 = getelementptr i32, i32* %r3, i32 2
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i96
-%r82 = shl i96 %r81, 64
-%r83 = or i96 %r77, %r82
-%r84 = zext i96 %r83 to i128
-%r86 = getelementptr i32, i32* %r3, i32 3
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i128
-%r89 = shl i128 %r88, 96
-%r90 = or i128 %r84, %r89
-%r91 = zext i128 %r90 to i160
-%r93 = getelementptr i32, i32* %r3, i32 4
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i160
-%r96 = shl i160 %r95, 128
-%r97 = or i160 %r91, %r96
-%r98 = zext i160 %r97 to i192
-%r100 = getelementptr i32, i32* %r3, i32 5
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i192
-%r103 = shl i192 %r102, 160
-%r104 = or i192 %r98, %r103
-%r105 = zext i192 %r104 to i224
-%r107 = getelementptr i32, i32* %r3, i32 6
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i224
-%r110 = shl i224 %r109, 192
-%r111 = or i224 %r105, %r110
-%r112 = zext i224 %r111 to i256
-%r114 = getelementptr i32, i32* %r3, i32 7
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i256
-%r117 = shl i256 %r116, 224
-%r118 = or i256 %r112, %r117
-%r119 = zext i256 %r118 to i288
-%r121 = getelementptr i32, i32* %r3, i32 8
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i288
-%r124 = shl i288 %r123, 256
-%r125 = or i288 %r119, %r124
-%r126 = zext i288 %r125 to i320
-%r128 = getelementptr i32, i32* %r3, i32 9
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i320
-%r131 = shl i320 %r130, 288
-%r132 = or i320 %r126, %r131
-%r133 = zext i320 %r68 to i352
-%r134 = zext i320 %r132 to i352
-%r135 = add i352 %r133, %r134
-%r136 = trunc i352 %r135 to i320
-%r137 = trunc i320 %r136 to i32
-%r139 = getelementptr i32, i32* %r1, i32 0
-store i32 %r137, i32* %r139
-%r140 = lshr i320 %r136, 32
-%r141 = trunc i320 %r140 to i32
-%r143 = getelementptr i32, i32* %r1, i32 1
-store i32 %r141, i32* %r143
-%r144 = lshr i320 %r140, 32
-%r145 = trunc i320 %r144 to i32
-%r147 = getelementptr i32, i32* %r1, i32 2
-store i32 %r145, i32* %r147
-%r148 = lshr i320 %r144, 32
-%r149 = trunc i320 %r148 to i32
-%r151 = getelementptr i32, i32* %r1, i32 3
-store i32 %r149, i32* %r151
-%r152 = lshr i320 %r148, 32
-%r153 = trunc i320 %r152 to i32
-%r155 = getelementptr i32, i32* %r1, i32 4
-store i32 %r153, i32* %r155
-%r156 = lshr i320 %r152, 32
-%r157 = trunc i320 %r156 to i32
-%r159 = getelementptr i32, i32* %r1, i32 5
-store i32 %r157, i32* %r159
-%r160 = lshr i320 %r156, 32
-%r161 = trunc i320 %r160 to i32
-%r163 = getelementptr i32, i32* %r1, i32 6
-store i32 %r161, i32* %r163
-%r164 = lshr i320 %r160, 32
-%r165 = trunc i320 %r164 to i32
-%r167 = getelementptr i32, i32* %r1, i32 7
-store i32 %r165, i32* %r167
-%r168 = lshr i320 %r164, 32
-%r169 = trunc i320 %r168 to i32
-%r171 = getelementptr i32, i32* %r1, i32 8
-store i32 %r169, i32* %r171
-%r172 = lshr i320 %r168, 32
-%r173 = trunc i320 %r172 to i32
-%r175 = getelementptr i32, i32* %r1, i32 9
-store i32 %r173, i32* %r175
-%r176 = load i32, i32* %r4
-%r177 = zext i32 %r176 to i64
-%r179 = getelementptr i32, i32* %r4, i32 1
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i64
-%r182 = shl i64 %r181, 32
-%r183 = or i64 %r177, %r182
-%r184 = zext i64 %r183 to i96
-%r186 = getelementptr i32, i32* %r4, i32 2
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i96
-%r189 = shl i96 %r188, 64
-%r190 = or i96 %r184, %r189
-%r191 = zext i96 %r190 to i128
-%r193 = getelementptr i32, i32* %r4, i32 3
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i128
-%r196 = shl i128 %r195, 96
-%r197 = or i128 %r191, %r196
-%r198 = zext i128 %r197 to i160
-%r200 = getelementptr i32, i32* %r4, i32 4
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i160
-%r203 = shl i160 %r202, 128
-%r204 = or i160 %r198, %r203
-%r205 = zext i160 %r204 to i192
-%r207 = getelementptr i32, i32* %r4, i32 5
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i192
-%r210 = shl i192 %r209, 160
-%r211 = or i192 %r205, %r210
-%r212 = zext i192 %r211 to i224
-%r214 = getelementptr i32, i32* %r4, i32 6
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i224
-%r217 = shl i224 %r216, 192
-%r218 = or i224 %r212, %r217
-%r219 = zext i224 %r218 to i256
-%r221 = getelementptr i32, i32* %r4, i32 7
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i256
-%r224 = shl i256 %r223, 224
-%r225 = or i256 %r219, %r224
-%r226 = zext i256 %r225 to i288
-%r228 = getelementptr i32, i32* %r4, i32 8
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i288
-%r231 = shl i288 %r230, 256
-%r232 = or i288 %r226, %r231
-%r233 = zext i288 %r232 to i320
-%r235 = getelementptr i32, i32* %r4, i32 9
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i320
-%r238 = shl i320 %r237, 288
-%r239 = or i320 %r233, %r238
-%r240 = zext i320 %r239 to i352
-%r241 = sub i352 %r135, %r240
-%r242 = lshr i352 %r241, 320
-%r243 = trunc i352 %r242 to i1
-br i1%r243, label %carry, label %nocarry
-nocarry:
-%r244 = trunc i352 %r241 to i320
-%r245 = trunc i320 %r244 to i32
-%r247 = getelementptr i32, i32* %r1, i32 0
-store i32 %r245, i32* %r247
-%r248 = lshr i320 %r244, 32
-%r249 = trunc i320 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 1
-store i32 %r249, i32* %r251
-%r252 = lshr i320 %r248, 32
-%r253 = trunc i320 %r252 to i32
-%r255 = getelementptr i32, i32* %r1, i32 2
-store i32 %r253, i32* %r255
-%r256 = lshr i320 %r252, 32
-%r257 = trunc i320 %r256 to i32
-%r259 = getelementptr i32, i32* %r1, i32 3
-store i32 %r257, i32* %r259
-%r260 = lshr i320 %r256, 32
-%r261 = trunc i320 %r260 to i32
-%r263 = getelementptr i32, i32* %r1, i32 4
-store i32 %r261, i32* %r263
-%r264 = lshr i320 %r260, 32
-%r265 = trunc i320 %r264 to i32
-%r267 = getelementptr i32, i32* %r1, i32 5
-store i32 %r265, i32* %r267
-%r268 = lshr i320 %r264, 32
-%r269 = trunc i320 %r268 to i32
-%r271 = getelementptr i32, i32* %r1, i32 6
-store i32 %r269, i32* %r271
-%r272 = lshr i320 %r268, 32
-%r273 = trunc i320 %r272 to i32
-%r275 = getelementptr i32, i32* %r1, i32 7
-store i32 %r273, i32* %r275
-%r276 = lshr i320 %r272, 32
-%r277 = trunc i320 %r276 to i32
-%r279 = getelementptr i32, i32* %r1, i32 8
-store i32 %r277, i32* %r279
-%r280 = lshr i320 %r276, 32
-%r281 = trunc i320 %r280 to i32
-%r283 = getelementptr i32, i32* %r1, i32 9
-store i32 %r281, i32* %r283
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = load i32, i32* %r3
-%r70 = zext i32 %r69 to i64
-%r72 = getelementptr i32, i32* %r3, i32 1
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i64
-%r75 = shl i64 %r74, 32
-%r76 = or i64 %r70, %r75
-%r77 = zext i64 %r76 to i96
-%r79 = getelementptr i32, i32* %r3, i32 2
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i96
-%r82 = shl i96 %r81, 64
-%r83 = or i96 %r77, %r82
-%r84 = zext i96 %r83 to i128
-%r86 = getelementptr i32, i32* %r3, i32 3
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i128
-%r89 = shl i128 %r88, 96
-%r90 = or i128 %r84, %r89
-%r91 = zext i128 %r90 to i160
-%r93 = getelementptr i32, i32* %r3, i32 4
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i160
-%r96 = shl i160 %r95, 128
-%r97 = or i160 %r91, %r96
-%r98 = zext i160 %r97 to i192
-%r100 = getelementptr i32, i32* %r3, i32 5
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i192
-%r103 = shl i192 %r102, 160
-%r104 = or i192 %r98, %r103
-%r105 = zext i192 %r104 to i224
-%r107 = getelementptr i32, i32* %r3, i32 6
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i224
-%r110 = shl i224 %r109, 192
-%r111 = or i224 %r105, %r110
-%r112 = zext i224 %r111 to i256
-%r114 = getelementptr i32, i32* %r3, i32 7
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i256
-%r117 = shl i256 %r116, 224
-%r118 = or i256 %r112, %r117
-%r119 = zext i256 %r118 to i288
-%r121 = getelementptr i32, i32* %r3, i32 8
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i288
-%r124 = shl i288 %r123, 256
-%r125 = or i288 %r119, %r124
-%r126 = zext i288 %r125 to i320
-%r128 = getelementptr i32, i32* %r3, i32 9
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i320
-%r131 = shl i320 %r130, 288
-%r132 = or i320 %r126, %r131
-%r133 = add i320 %r68, %r132
-%r134 = load i32, i32* %r4
-%r135 = zext i32 %r134 to i64
-%r137 = getelementptr i32, i32* %r4, i32 1
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i64
-%r140 = shl i64 %r139, 32
-%r141 = or i64 %r135, %r140
-%r142 = zext i64 %r141 to i96
-%r144 = getelementptr i32, i32* %r4, i32 2
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i96
-%r147 = shl i96 %r146, 64
-%r148 = or i96 %r142, %r147
-%r149 = zext i96 %r148 to i128
-%r151 = getelementptr i32, i32* %r4, i32 3
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i128
-%r154 = shl i128 %r153, 96
-%r155 = or i128 %r149, %r154
-%r156 = zext i128 %r155 to i160
-%r158 = getelementptr i32, i32* %r4, i32 4
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i160
-%r161 = shl i160 %r160, 128
-%r162 = or i160 %r156, %r161
-%r163 = zext i160 %r162 to i192
-%r165 = getelementptr i32, i32* %r4, i32 5
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i192
-%r168 = shl i192 %r167, 160
-%r169 = or i192 %r163, %r168
-%r170 = zext i192 %r169 to i224
-%r172 = getelementptr i32, i32* %r4, i32 6
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i224
-%r175 = shl i224 %r174, 192
-%r176 = or i224 %r170, %r175
-%r177 = zext i224 %r176 to i256
-%r179 = getelementptr i32, i32* %r4, i32 7
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i256
-%r182 = shl i256 %r181, 224
-%r183 = or i256 %r177, %r182
-%r184 = zext i256 %r183 to i288
-%r186 = getelementptr i32, i32* %r4, i32 8
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i288
-%r189 = shl i288 %r188, 256
-%r190 = or i288 %r184, %r189
-%r191 = zext i288 %r190 to i320
-%r193 = getelementptr i32, i32* %r4, i32 9
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i320
-%r196 = shl i320 %r195, 288
-%r197 = or i320 %r191, %r196
-%r198 = sub i320 %r133, %r197
-%r199 = lshr i320 %r198, 319
-%r200 = trunc i320 %r199 to i1
-%r201 = select i1 %r200, i320 %r133, i320 %r198
-%r202 = trunc i320 %r201 to i32
-%r204 = getelementptr i32, i32* %r1, i32 0
-store i32 %r202, i32* %r204
-%r205 = lshr i320 %r201, 32
-%r206 = trunc i320 %r205 to i32
-%r208 = getelementptr i32, i32* %r1, i32 1
-store i32 %r206, i32* %r208
-%r209 = lshr i320 %r205, 32
-%r210 = trunc i320 %r209 to i32
-%r212 = getelementptr i32, i32* %r1, i32 2
-store i32 %r210, i32* %r212
-%r213 = lshr i320 %r209, 32
-%r214 = trunc i320 %r213 to i32
-%r216 = getelementptr i32, i32* %r1, i32 3
-store i32 %r214, i32* %r216
-%r217 = lshr i320 %r213, 32
-%r218 = trunc i320 %r217 to i32
-%r220 = getelementptr i32, i32* %r1, i32 4
-store i32 %r218, i32* %r220
-%r221 = lshr i320 %r217, 32
-%r222 = trunc i320 %r221 to i32
-%r224 = getelementptr i32, i32* %r1, i32 5
-store i32 %r222, i32* %r224
-%r225 = lshr i320 %r221, 32
-%r226 = trunc i320 %r225 to i32
-%r228 = getelementptr i32, i32* %r1, i32 6
-store i32 %r226, i32* %r228
-%r229 = lshr i320 %r225, 32
-%r230 = trunc i320 %r229 to i32
-%r232 = getelementptr i32, i32* %r1, i32 7
-store i32 %r230, i32* %r232
-%r233 = lshr i320 %r229, 32
-%r234 = trunc i320 %r233 to i32
-%r236 = getelementptr i32, i32* %r1, i32 8
-store i32 %r234, i32* %r236
-%r237 = lshr i320 %r233, 32
-%r238 = trunc i320 %r237 to i32
-%r240 = getelementptr i32, i32* %r1, i32 9
-store i32 %r238, i32* %r240
-ret void
-}
-define void @mcl_fp_sub10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = load i32, i32* %r3
-%r70 = zext i32 %r69 to i64
-%r72 = getelementptr i32, i32* %r3, i32 1
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i64
-%r75 = shl i64 %r74, 32
-%r76 = or i64 %r70, %r75
-%r77 = zext i64 %r76 to i96
-%r79 = getelementptr i32, i32* %r3, i32 2
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i96
-%r82 = shl i96 %r81, 64
-%r83 = or i96 %r77, %r82
-%r84 = zext i96 %r83 to i128
-%r86 = getelementptr i32, i32* %r3, i32 3
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i128
-%r89 = shl i128 %r88, 96
-%r90 = or i128 %r84, %r89
-%r91 = zext i128 %r90 to i160
-%r93 = getelementptr i32, i32* %r3, i32 4
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i160
-%r96 = shl i160 %r95, 128
-%r97 = or i160 %r91, %r96
-%r98 = zext i160 %r97 to i192
-%r100 = getelementptr i32, i32* %r3, i32 5
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i192
-%r103 = shl i192 %r102, 160
-%r104 = or i192 %r98, %r103
-%r105 = zext i192 %r104 to i224
-%r107 = getelementptr i32, i32* %r3, i32 6
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i224
-%r110 = shl i224 %r109, 192
-%r111 = or i224 %r105, %r110
-%r112 = zext i224 %r111 to i256
-%r114 = getelementptr i32, i32* %r3, i32 7
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i256
-%r117 = shl i256 %r116, 224
-%r118 = or i256 %r112, %r117
-%r119 = zext i256 %r118 to i288
-%r121 = getelementptr i32, i32* %r3, i32 8
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i288
-%r124 = shl i288 %r123, 256
-%r125 = or i288 %r119, %r124
-%r126 = zext i288 %r125 to i320
-%r128 = getelementptr i32, i32* %r3, i32 9
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i320
-%r131 = shl i320 %r130, 288
-%r132 = or i320 %r126, %r131
-%r133 = zext i320 %r68 to i352
-%r134 = zext i320 %r132 to i352
-%r135 = sub i352 %r133, %r134
-%r136 = trunc i352 %r135 to i320
-%r137 = lshr i352 %r135, 320
-%r138 = trunc i352 %r137 to i1
-%r139 = trunc i320 %r136 to i32
-%r141 = getelementptr i32, i32* %r1, i32 0
-store i32 %r139, i32* %r141
-%r142 = lshr i320 %r136, 32
-%r143 = trunc i320 %r142 to i32
-%r145 = getelementptr i32, i32* %r1, i32 1
-store i32 %r143, i32* %r145
-%r146 = lshr i320 %r142, 32
-%r147 = trunc i320 %r146 to i32
-%r149 = getelementptr i32, i32* %r1, i32 2
-store i32 %r147, i32* %r149
-%r150 = lshr i320 %r146, 32
-%r151 = trunc i320 %r150 to i32
-%r153 = getelementptr i32, i32* %r1, i32 3
-store i32 %r151, i32* %r153
-%r154 = lshr i320 %r150, 32
-%r155 = trunc i320 %r154 to i32
-%r157 = getelementptr i32, i32* %r1, i32 4
-store i32 %r155, i32* %r157
-%r158 = lshr i320 %r154, 32
-%r159 = trunc i320 %r158 to i32
-%r161 = getelementptr i32, i32* %r1, i32 5
-store i32 %r159, i32* %r161
-%r162 = lshr i320 %r158, 32
-%r163 = trunc i320 %r162 to i32
-%r165 = getelementptr i32, i32* %r1, i32 6
-store i32 %r163, i32* %r165
-%r166 = lshr i320 %r162, 32
-%r167 = trunc i320 %r166 to i32
-%r169 = getelementptr i32, i32* %r1, i32 7
-store i32 %r167, i32* %r169
-%r170 = lshr i320 %r166, 32
-%r171 = trunc i320 %r170 to i32
-%r173 = getelementptr i32, i32* %r1, i32 8
-store i32 %r171, i32* %r173
-%r174 = lshr i320 %r170, 32
-%r175 = trunc i320 %r174 to i32
-%r177 = getelementptr i32, i32* %r1, i32 9
-store i32 %r175, i32* %r177
-br i1%r138, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r178 = load i32, i32* %r4
-%r179 = zext i32 %r178 to i64
-%r181 = getelementptr i32, i32* %r4, i32 1
-%r182 = load i32, i32* %r181
-%r183 = zext i32 %r182 to i64
-%r184 = shl i64 %r183, 32
-%r185 = or i64 %r179, %r184
-%r186 = zext i64 %r185 to i96
-%r188 = getelementptr i32, i32* %r4, i32 2
-%r189 = load i32, i32* %r188
-%r190 = zext i32 %r189 to i96
-%r191 = shl i96 %r190, 64
-%r192 = or i96 %r186, %r191
-%r193 = zext i96 %r192 to i128
-%r195 = getelementptr i32, i32* %r4, i32 3
-%r196 = load i32, i32* %r195
-%r197 = zext i32 %r196 to i128
-%r198 = shl i128 %r197, 96
-%r199 = or i128 %r193, %r198
-%r200 = zext i128 %r199 to i160
-%r202 = getelementptr i32, i32* %r4, i32 4
-%r203 = load i32, i32* %r202
-%r204 = zext i32 %r203 to i160
-%r205 = shl i160 %r204, 128
-%r206 = or i160 %r200, %r205
-%r207 = zext i160 %r206 to i192
-%r209 = getelementptr i32, i32* %r4, i32 5
-%r210 = load i32, i32* %r209
-%r211 = zext i32 %r210 to i192
-%r212 = shl i192 %r211, 160
-%r213 = or i192 %r207, %r212
-%r214 = zext i192 %r213 to i224
-%r216 = getelementptr i32, i32* %r4, i32 6
-%r217 = load i32, i32* %r216
-%r218 = zext i32 %r217 to i224
-%r219 = shl i224 %r218, 192
-%r220 = or i224 %r214, %r219
-%r221 = zext i224 %r220 to i256
-%r223 = getelementptr i32, i32* %r4, i32 7
-%r224 = load i32, i32* %r223
-%r225 = zext i32 %r224 to i256
-%r226 = shl i256 %r225, 224
-%r227 = or i256 %r221, %r226
-%r228 = zext i256 %r227 to i288
-%r230 = getelementptr i32, i32* %r4, i32 8
-%r231 = load i32, i32* %r230
-%r232 = zext i32 %r231 to i288
-%r233 = shl i288 %r232, 256
-%r234 = or i288 %r228, %r233
-%r235 = zext i288 %r234 to i320
-%r237 = getelementptr i32, i32* %r4, i32 9
-%r238 = load i32, i32* %r237
-%r239 = zext i32 %r238 to i320
-%r240 = shl i320 %r239, 288
-%r241 = or i320 %r235, %r240
-%r242 = add i320 %r136, %r241
-%r243 = trunc i320 %r242 to i32
-%r245 = getelementptr i32, i32* %r1, i32 0
-store i32 %r243, i32* %r245
-%r246 = lshr i320 %r242, 32
-%r247 = trunc i320 %r246 to i32
-%r249 = getelementptr i32, i32* %r1, i32 1
-store i32 %r247, i32* %r249
-%r250 = lshr i320 %r246, 32
-%r251 = trunc i320 %r250 to i32
-%r253 = getelementptr i32, i32* %r1, i32 2
-store i32 %r251, i32* %r253
-%r254 = lshr i320 %r250, 32
-%r255 = trunc i320 %r254 to i32
-%r257 = getelementptr i32, i32* %r1, i32 3
-store i32 %r255, i32* %r257
-%r258 = lshr i320 %r254, 32
-%r259 = trunc i320 %r258 to i32
-%r261 = getelementptr i32, i32* %r1, i32 4
-store i32 %r259, i32* %r261
-%r262 = lshr i320 %r258, 32
-%r263 = trunc i320 %r262 to i32
-%r265 = getelementptr i32, i32* %r1, i32 5
-store i32 %r263, i32* %r265
-%r266 = lshr i320 %r262, 32
-%r267 = trunc i320 %r266 to i32
-%r269 = getelementptr i32, i32* %r1, i32 6
-store i32 %r267, i32* %r269
-%r270 = lshr i320 %r266, 32
-%r271 = trunc i320 %r270 to i32
-%r273 = getelementptr i32, i32* %r1, i32 7
-store i32 %r271, i32* %r273
-%r274 = lshr i320 %r270, 32
-%r275 = trunc i320 %r274 to i32
-%r277 = getelementptr i32, i32* %r1, i32 8
-store i32 %r275, i32* %r277
-%r278 = lshr i320 %r274, 32
-%r279 = trunc i320 %r278 to i32
-%r281 = getelementptr i32, i32* %r1, i32 9
-store i32 %r279, i32* %r281
-ret void
-}
-define void @mcl_fp_subNF10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = load i32, i32* %r3
-%r70 = zext i32 %r69 to i64
-%r72 = getelementptr i32, i32* %r3, i32 1
-%r73 = load i32, i32* %r72
-%r74 = zext i32 %r73 to i64
-%r75 = shl i64 %r74, 32
-%r76 = or i64 %r70, %r75
-%r77 = zext i64 %r76 to i96
-%r79 = getelementptr i32, i32* %r3, i32 2
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i96
-%r82 = shl i96 %r81, 64
-%r83 = or i96 %r77, %r82
-%r84 = zext i96 %r83 to i128
-%r86 = getelementptr i32, i32* %r3, i32 3
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i128
-%r89 = shl i128 %r88, 96
-%r90 = or i128 %r84, %r89
-%r91 = zext i128 %r90 to i160
-%r93 = getelementptr i32, i32* %r3, i32 4
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i160
-%r96 = shl i160 %r95, 128
-%r97 = or i160 %r91, %r96
-%r98 = zext i160 %r97 to i192
-%r100 = getelementptr i32, i32* %r3, i32 5
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i192
-%r103 = shl i192 %r102, 160
-%r104 = or i192 %r98, %r103
-%r105 = zext i192 %r104 to i224
-%r107 = getelementptr i32, i32* %r3, i32 6
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i224
-%r110 = shl i224 %r109, 192
-%r111 = or i224 %r105, %r110
-%r112 = zext i224 %r111 to i256
-%r114 = getelementptr i32, i32* %r3, i32 7
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i256
-%r117 = shl i256 %r116, 224
-%r118 = or i256 %r112, %r117
-%r119 = zext i256 %r118 to i288
-%r121 = getelementptr i32, i32* %r3, i32 8
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i288
-%r124 = shl i288 %r123, 256
-%r125 = or i288 %r119, %r124
-%r126 = zext i288 %r125 to i320
-%r128 = getelementptr i32, i32* %r3, i32 9
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i320
-%r131 = shl i320 %r130, 288
-%r132 = or i320 %r126, %r131
-%r133 = sub i320 %r68, %r132
-%r134 = lshr i320 %r133, 319
-%r135 = trunc i320 %r134 to i1
-%r136 = load i32, i32* %r4
-%r137 = zext i32 %r136 to i64
-%r139 = getelementptr i32, i32* %r4, i32 1
-%r140 = load i32, i32* %r139
-%r141 = zext i32 %r140 to i64
-%r142 = shl i64 %r141, 32
-%r143 = or i64 %r137, %r142
-%r144 = zext i64 %r143 to i96
-%r146 = getelementptr i32, i32* %r4, i32 2
-%r147 = load i32, i32* %r146
-%r148 = zext i32 %r147 to i96
-%r149 = shl i96 %r148, 64
-%r150 = or i96 %r144, %r149
-%r151 = zext i96 %r150 to i128
-%r153 = getelementptr i32, i32* %r4, i32 3
-%r154 = load i32, i32* %r153
-%r155 = zext i32 %r154 to i128
-%r156 = shl i128 %r155, 96
-%r157 = or i128 %r151, %r156
-%r158 = zext i128 %r157 to i160
-%r160 = getelementptr i32, i32* %r4, i32 4
-%r161 = load i32, i32* %r160
-%r162 = zext i32 %r161 to i160
-%r163 = shl i160 %r162, 128
-%r164 = or i160 %r158, %r163
-%r165 = zext i160 %r164 to i192
-%r167 = getelementptr i32, i32* %r4, i32 5
-%r168 = load i32, i32* %r167
-%r169 = zext i32 %r168 to i192
-%r170 = shl i192 %r169, 160
-%r171 = or i192 %r165, %r170
-%r172 = zext i192 %r171 to i224
-%r174 = getelementptr i32, i32* %r4, i32 6
-%r175 = load i32, i32* %r174
-%r176 = zext i32 %r175 to i224
-%r177 = shl i224 %r176, 192
-%r178 = or i224 %r172, %r177
-%r179 = zext i224 %r178 to i256
-%r181 = getelementptr i32, i32* %r4, i32 7
-%r182 = load i32, i32* %r181
-%r183 = zext i32 %r182 to i256
-%r184 = shl i256 %r183, 224
-%r185 = or i256 %r179, %r184
-%r186 = zext i256 %r185 to i288
-%r188 = getelementptr i32, i32* %r4, i32 8
-%r189 = load i32, i32* %r188
-%r190 = zext i32 %r189 to i288
-%r191 = shl i288 %r190, 256
-%r192 = or i288 %r186, %r191
-%r193 = zext i288 %r192 to i320
-%r195 = getelementptr i32, i32* %r4, i32 9
-%r196 = load i32, i32* %r195
-%r197 = zext i32 %r196 to i320
-%r198 = shl i320 %r197, 288
-%r199 = or i320 %r193, %r198
-%r201 = select i1 %r135, i320 %r199, i320 0
-%r202 = add i320 %r133, %r201
-%r203 = trunc i320 %r202 to i32
-%r205 = getelementptr i32, i32* %r1, i32 0
-store i32 %r203, i32* %r205
-%r206 = lshr i320 %r202, 32
-%r207 = trunc i320 %r206 to i32
-%r209 = getelementptr i32, i32* %r1, i32 1
-store i32 %r207, i32* %r209
-%r210 = lshr i320 %r206, 32
-%r211 = trunc i320 %r210 to i32
-%r213 = getelementptr i32, i32* %r1, i32 2
-store i32 %r211, i32* %r213
-%r214 = lshr i320 %r210, 32
-%r215 = trunc i320 %r214 to i32
-%r217 = getelementptr i32, i32* %r1, i32 3
-store i32 %r215, i32* %r217
-%r218 = lshr i320 %r214, 32
-%r219 = trunc i320 %r218 to i32
-%r221 = getelementptr i32, i32* %r1, i32 4
-store i32 %r219, i32* %r221
-%r222 = lshr i320 %r218, 32
-%r223 = trunc i320 %r222 to i32
-%r225 = getelementptr i32, i32* %r1, i32 5
-store i32 %r223, i32* %r225
-%r226 = lshr i320 %r222, 32
-%r227 = trunc i320 %r226 to i32
-%r229 = getelementptr i32, i32* %r1, i32 6
-store i32 %r227, i32* %r229
-%r230 = lshr i320 %r226, 32
-%r231 = trunc i320 %r230 to i32
-%r233 = getelementptr i32, i32* %r1, i32 7
-store i32 %r231, i32* %r233
-%r234 = lshr i320 %r230, 32
-%r235 = trunc i320 %r234 to i32
-%r237 = getelementptr i32, i32* %r1, i32 8
-store i32 %r235, i32* %r237
-%r238 = lshr i320 %r234, 32
-%r239 = trunc i320 %r238 to i32
-%r241 = getelementptr i32, i32* %r1, i32 9
-store i32 %r239, i32* %r241
-ret void
-}
-define void @mcl_fpDbl_add10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = load i32, i32* %r3
-%r140 = zext i32 %r139 to i64
-%r142 = getelementptr i32, i32* %r3, i32 1
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i64
-%r145 = shl i64 %r144, 32
-%r146 = or i64 %r140, %r145
-%r147 = zext i64 %r146 to i96
-%r149 = getelementptr i32, i32* %r3, i32 2
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i96
-%r152 = shl i96 %r151, 64
-%r153 = or i96 %r147, %r152
-%r154 = zext i96 %r153 to i128
-%r156 = getelementptr i32, i32* %r3, i32 3
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i128
-%r159 = shl i128 %r158, 96
-%r160 = or i128 %r154, %r159
-%r161 = zext i128 %r160 to i160
-%r163 = getelementptr i32, i32* %r3, i32 4
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i160
-%r166 = shl i160 %r165, 128
-%r167 = or i160 %r161, %r166
-%r168 = zext i160 %r167 to i192
-%r170 = getelementptr i32, i32* %r3, i32 5
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i192
-%r173 = shl i192 %r172, 160
-%r174 = or i192 %r168, %r173
-%r175 = zext i192 %r174 to i224
-%r177 = getelementptr i32, i32* %r3, i32 6
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i224
-%r180 = shl i224 %r179, 192
-%r181 = or i224 %r175, %r180
-%r182 = zext i224 %r181 to i256
-%r184 = getelementptr i32, i32* %r3, i32 7
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i256
-%r187 = shl i256 %r186, 224
-%r188 = or i256 %r182, %r187
-%r189 = zext i256 %r188 to i288
-%r191 = getelementptr i32, i32* %r3, i32 8
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i288
-%r194 = shl i288 %r193, 256
-%r195 = or i288 %r189, %r194
-%r196 = zext i288 %r195 to i320
-%r198 = getelementptr i32, i32* %r3, i32 9
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i320
-%r201 = shl i320 %r200, 288
-%r202 = or i320 %r196, %r201
-%r203 = zext i320 %r202 to i352
-%r205 = getelementptr i32, i32* %r3, i32 10
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i352
-%r208 = shl i352 %r207, 320
-%r209 = or i352 %r203, %r208
-%r210 = zext i352 %r209 to i384
-%r212 = getelementptr i32, i32* %r3, i32 11
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i384
-%r215 = shl i384 %r214, 352
-%r216 = or i384 %r210, %r215
-%r217 = zext i384 %r216 to i416
-%r219 = getelementptr i32, i32* %r3, i32 12
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i416
-%r222 = shl i416 %r221, 384
-%r223 = or i416 %r217, %r222
-%r224 = zext i416 %r223 to i448
-%r226 = getelementptr i32, i32* %r3, i32 13
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i448
-%r229 = shl i448 %r228, 416
-%r230 = or i448 %r224, %r229
-%r231 = zext i448 %r230 to i480
-%r233 = getelementptr i32, i32* %r3, i32 14
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i480
-%r236 = shl i480 %r235, 448
-%r237 = or i480 %r231, %r236
-%r238 = zext i480 %r237 to i512
-%r240 = getelementptr i32, i32* %r3, i32 15
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i512
-%r243 = shl i512 %r242, 480
-%r244 = or i512 %r238, %r243
-%r245 = zext i512 %r244 to i544
-%r247 = getelementptr i32, i32* %r3, i32 16
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i544
-%r250 = shl i544 %r249, 512
-%r251 = or i544 %r245, %r250
-%r252 = zext i544 %r251 to i576
-%r254 = getelementptr i32, i32* %r3, i32 17
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i576
-%r257 = shl i576 %r256, 544
-%r258 = or i576 %r252, %r257
-%r259 = zext i576 %r258 to i608
-%r261 = getelementptr i32, i32* %r3, i32 18
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i608
-%r264 = shl i608 %r263, 576
-%r265 = or i608 %r259, %r264
-%r266 = zext i608 %r265 to i640
-%r268 = getelementptr i32, i32* %r3, i32 19
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i640
-%r271 = shl i640 %r270, 608
-%r272 = or i640 %r266, %r271
-%r273 = zext i640 %r138 to i672
-%r274 = zext i640 %r272 to i672
-%r275 = add i672 %r273, %r274
-%r276 = trunc i672 %r275 to i320
-%r277 = trunc i320 %r276 to i32
-%r279 = getelementptr i32, i32* %r1, i32 0
-store i32 %r277, i32* %r279
-%r280 = lshr i320 %r276, 32
-%r281 = trunc i320 %r280 to i32
-%r283 = getelementptr i32, i32* %r1, i32 1
-store i32 %r281, i32* %r283
-%r284 = lshr i320 %r280, 32
-%r285 = trunc i320 %r284 to i32
-%r287 = getelementptr i32, i32* %r1, i32 2
-store i32 %r285, i32* %r287
-%r288 = lshr i320 %r284, 32
-%r289 = trunc i320 %r288 to i32
-%r291 = getelementptr i32, i32* %r1, i32 3
-store i32 %r289, i32* %r291
-%r292 = lshr i320 %r288, 32
-%r293 = trunc i320 %r292 to i32
-%r295 = getelementptr i32, i32* %r1, i32 4
-store i32 %r293, i32* %r295
-%r296 = lshr i320 %r292, 32
-%r297 = trunc i320 %r296 to i32
-%r299 = getelementptr i32, i32* %r1, i32 5
-store i32 %r297, i32* %r299
-%r300 = lshr i320 %r296, 32
-%r301 = trunc i320 %r300 to i32
-%r303 = getelementptr i32, i32* %r1, i32 6
-store i32 %r301, i32* %r303
-%r304 = lshr i320 %r300, 32
-%r305 = trunc i320 %r304 to i32
-%r307 = getelementptr i32, i32* %r1, i32 7
-store i32 %r305, i32* %r307
-%r308 = lshr i320 %r304, 32
-%r309 = trunc i320 %r308 to i32
-%r311 = getelementptr i32, i32* %r1, i32 8
-store i32 %r309, i32* %r311
-%r312 = lshr i320 %r308, 32
-%r313 = trunc i320 %r312 to i32
-%r315 = getelementptr i32, i32* %r1, i32 9
-store i32 %r313, i32* %r315
-%r316 = lshr i672 %r275, 320
-%r317 = trunc i672 %r316 to i352
-%r318 = load i32, i32* %r4
-%r319 = zext i32 %r318 to i64
-%r321 = getelementptr i32, i32* %r4, i32 1
-%r322 = load i32, i32* %r321
-%r323 = zext i32 %r322 to i64
-%r324 = shl i64 %r323, 32
-%r325 = or i64 %r319, %r324
-%r326 = zext i64 %r325 to i96
-%r328 = getelementptr i32, i32* %r4, i32 2
-%r329 = load i32, i32* %r328
-%r330 = zext i32 %r329 to i96
-%r331 = shl i96 %r330, 64
-%r332 = or i96 %r326, %r331
-%r333 = zext i96 %r332 to i128
-%r335 = getelementptr i32, i32* %r4, i32 3
-%r336 = load i32, i32* %r335
-%r337 = zext i32 %r336 to i128
-%r338 = shl i128 %r337, 96
-%r339 = or i128 %r333, %r338
-%r340 = zext i128 %r339 to i160
-%r342 = getelementptr i32, i32* %r4, i32 4
-%r343 = load i32, i32* %r342
-%r344 = zext i32 %r343 to i160
-%r345 = shl i160 %r344, 128
-%r346 = or i160 %r340, %r345
-%r347 = zext i160 %r346 to i192
-%r349 = getelementptr i32, i32* %r4, i32 5
-%r350 = load i32, i32* %r349
-%r351 = zext i32 %r350 to i192
-%r352 = shl i192 %r351, 160
-%r353 = or i192 %r347, %r352
-%r354 = zext i192 %r353 to i224
-%r356 = getelementptr i32, i32* %r4, i32 6
-%r357 = load i32, i32* %r356
-%r358 = zext i32 %r357 to i224
-%r359 = shl i224 %r358, 192
-%r360 = or i224 %r354, %r359
-%r361 = zext i224 %r360 to i256
-%r363 = getelementptr i32, i32* %r4, i32 7
-%r364 = load i32, i32* %r363
-%r365 = zext i32 %r364 to i256
-%r366 = shl i256 %r365, 224
-%r367 = or i256 %r361, %r366
-%r368 = zext i256 %r367 to i288
-%r370 = getelementptr i32, i32* %r4, i32 8
-%r371 = load i32, i32* %r370
-%r372 = zext i32 %r371 to i288
-%r373 = shl i288 %r372, 256
-%r374 = or i288 %r368, %r373
-%r375 = zext i288 %r374 to i320
-%r377 = getelementptr i32, i32* %r4, i32 9
-%r378 = load i32, i32* %r377
-%r379 = zext i32 %r378 to i320
-%r380 = shl i320 %r379, 288
-%r381 = or i320 %r375, %r380
-%r382 = zext i320 %r381 to i352
-%r383 = sub i352 %r317, %r382
-%r384 = lshr i352 %r383, 320
-%r385 = trunc i352 %r384 to i1
-%r386 = select i1 %r385, i352 %r317, i352 %r383
-%r387 = trunc i352 %r386 to i320
-%r389 = getelementptr i32, i32* %r1, i32 10
-%r390 = trunc i320 %r387 to i32
-%r392 = getelementptr i32, i32* %r389, i32 0
-store i32 %r390, i32* %r392
-%r393 = lshr i320 %r387, 32
-%r394 = trunc i320 %r393 to i32
-%r396 = getelementptr i32, i32* %r389, i32 1
-store i32 %r394, i32* %r396
-%r397 = lshr i320 %r393, 32
-%r398 = trunc i320 %r397 to i32
-%r400 = getelementptr i32, i32* %r389, i32 2
-store i32 %r398, i32* %r400
-%r401 = lshr i320 %r397, 32
-%r402 = trunc i320 %r401 to i32
-%r404 = getelementptr i32, i32* %r389, i32 3
-store i32 %r402, i32* %r404
-%r405 = lshr i320 %r401, 32
-%r406 = trunc i320 %r405 to i32
-%r408 = getelementptr i32, i32* %r389, i32 4
-store i32 %r406, i32* %r408
-%r409 = lshr i320 %r405, 32
-%r410 = trunc i320 %r409 to i32
-%r412 = getelementptr i32, i32* %r389, i32 5
-store i32 %r410, i32* %r412
-%r413 = lshr i320 %r409, 32
-%r414 = trunc i320 %r413 to i32
-%r416 = getelementptr i32, i32* %r389, i32 6
-store i32 %r414, i32* %r416
-%r417 = lshr i320 %r413, 32
-%r418 = trunc i320 %r417 to i32
-%r420 = getelementptr i32, i32* %r389, i32 7
-store i32 %r418, i32* %r420
-%r421 = lshr i320 %r417, 32
-%r422 = trunc i320 %r421 to i32
-%r424 = getelementptr i32, i32* %r389, i32 8
-store i32 %r422, i32* %r424
-%r425 = lshr i320 %r421, 32
-%r426 = trunc i320 %r425 to i32
-%r428 = getelementptr i32, i32* %r389, i32 9
-store i32 %r426, i32* %r428
-ret void
-}
-define void @mcl_fpDbl_sub10L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = load i32, i32* %r3
-%r140 = zext i32 %r139 to i64
-%r142 = getelementptr i32, i32* %r3, i32 1
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i64
-%r145 = shl i64 %r144, 32
-%r146 = or i64 %r140, %r145
-%r147 = zext i64 %r146 to i96
-%r149 = getelementptr i32, i32* %r3, i32 2
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i96
-%r152 = shl i96 %r151, 64
-%r153 = or i96 %r147, %r152
-%r154 = zext i96 %r153 to i128
-%r156 = getelementptr i32, i32* %r3, i32 3
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i128
-%r159 = shl i128 %r158, 96
-%r160 = or i128 %r154, %r159
-%r161 = zext i128 %r160 to i160
-%r163 = getelementptr i32, i32* %r3, i32 4
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i160
-%r166 = shl i160 %r165, 128
-%r167 = or i160 %r161, %r166
-%r168 = zext i160 %r167 to i192
-%r170 = getelementptr i32, i32* %r3, i32 5
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i192
-%r173 = shl i192 %r172, 160
-%r174 = or i192 %r168, %r173
-%r175 = zext i192 %r174 to i224
-%r177 = getelementptr i32, i32* %r3, i32 6
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i224
-%r180 = shl i224 %r179, 192
-%r181 = or i224 %r175, %r180
-%r182 = zext i224 %r181 to i256
-%r184 = getelementptr i32, i32* %r3, i32 7
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i256
-%r187 = shl i256 %r186, 224
-%r188 = or i256 %r182, %r187
-%r189 = zext i256 %r188 to i288
-%r191 = getelementptr i32, i32* %r3, i32 8
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i288
-%r194 = shl i288 %r193, 256
-%r195 = or i288 %r189, %r194
-%r196 = zext i288 %r195 to i320
-%r198 = getelementptr i32, i32* %r3, i32 9
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i320
-%r201 = shl i320 %r200, 288
-%r202 = or i320 %r196, %r201
-%r203 = zext i320 %r202 to i352
-%r205 = getelementptr i32, i32* %r3, i32 10
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i352
-%r208 = shl i352 %r207, 320
-%r209 = or i352 %r203, %r208
-%r210 = zext i352 %r209 to i384
-%r212 = getelementptr i32, i32* %r3, i32 11
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i384
-%r215 = shl i384 %r214, 352
-%r216 = or i384 %r210, %r215
-%r217 = zext i384 %r216 to i416
-%r219 = getelementptr i32, i32* %r3, i32 12
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i416
-%r222 = shl i416 %r221, 384
-%r223 = or i416 %r217, %r222
-%r224 = zext i416 %r223 to i448
-%r226 = getelementptr i32, i32* %r3, i32 13
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i448
-%r229 = shl i448 %r228, 416
-%r230 = or i448 %r224, %r229
-%r231 = zext i448 %r230 to i480
-%r233 = getelementptr i32, i32* %r3, i32 14
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i480
-%r236 = shl i480 %r235, 448
-%r237 = or i480 %r231, %r236
-%r238 = zext i480 %r237 to i512
-%r240 = getelementptr i32, i32* %r3, i32 15
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i512
-%r243 = shl i512 %r242, 480
-%r244 = or i512 %r238, %r243
-%r245 = zext i512 %r244 to i544
-%r247 = getelementptr i32, i32* %r3, i32 16
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i544
-%r250 = shl i544 %r249, 512
-%r251 = or i544 %r245, %r250
-%r252 = zext i544 %r251 to i576
-%r254 = getelementptr i32, i32* %r3, i32 17
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i576
-%r257 = shl i576 %r256, 544
-%r258 = or i576 %r252, %r257
-%r259 = zext i576 %r258 to i608
-%r261 = getelementptr i32, i32* %r3, i32 18
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i608
-%r264 = shl i608 %r263, 576
-%r265 = or i608 %r259, %r264
-%r266 = zext i608 %r265 to i640
-%r268 = getelementptr i32, i32* %r3, i32 19
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i640
-%r271 = shl i640 %r270, 608
-%r272 = or i640 %r266, %r271
-%r273 = zext i640 %r138 to i672
-%r274 = zext i640 %r272 to i672
-%r275 = sub i672 %r273, %r274
-%r276 = trunc i672 %r275 to i320
-%r277 = trunc i320 %r276 to i32
-%r279 = getelementptr i32, i32* %r1, i32 0
-store i32 %r277, i32* %r279
-%r280 = lshr i320 %r276, 32
-%r281 = trunc i320 %r280 to i32
-%r283 = getelementptr i32, i32* %r1, i32 1
-store i32 %r281, i32* %r283
-%r284 = lshr i320 %r280, 32
-%r285 = trunc i320 %r284 to i32
-%r287 = getelementptr i32, i32* %r1, i32 2
-store i32 %r285, i32* %r287
-%r288 = lshr i320 %r284, 32
-%r289 = trunc i320 %r288 to i32
-%r291 = getelementptr i32, i32* %r1, i32 3
-store i32 %r289, i32* %r291
-%r292 = lshr i320 %r288, 32
-%r293 = trunc i320 %r292 to i32
-%r295 = getelementptr i32, i32* %r1, i32 4
-store i32 %r293, i32* %r295
-%r296 = lshr i320 %r292, 32
-%r297 = trunc i320 %r296 to i32
-%r299 = getelementptr i32, i32* %r1, i32 5
-store i32 %r297, i32* %r299
-%r300 = lshr i320 %r296, 32
-%r301 = trunc i320 %r300 to i32
-%r303 = getelementptr i32, i32* %r1, i32 6
-store i32 %r301, i32* %r303
-%r304 = lshr i320 %r300, 32
-%r305 = trunc i320 %r304 to i32
-%r307 = getelementptr i32, i32* %r1, i32 7
-store i32 %r305, i32* %r307
-%r308 = lshr i320 %r304, 32
-%r309 = trunc i320 %r308 to i32
-%r311 = getelementptr i32, i32* %r1, i32 8
-store i32 %r309, i32* %r311
-%r312 = lshr i320 %r308, 32
-%r313 = trunc i320 %r312 to i32
-%r315 = getelementptr i32, i32* %r1, i32 9
-store i32 %r313, i32* %r315
-%r316 = lshr i672 %r275, 320
-%r317 = trunc i672 %r316 to i320
-%r318 = lshr i672 %r275, 640
-%r319 = trunc i672 %r318 to i1
-%r320 = load i32, i32* %r4
-%r321 = zext i32 %r320 to i64
-%r323 = getelementptr i32, i32* %r4, i32 1
-%r324 = load i32, i32* %r323
-%r325 = zext i32 %r324 to i64
-%r326 = shl i64 %r325, 32
-%r327 = or i64 %r321, %r326
-%r328 = zext i64 %r327 to i96
-%r330 = getelementptr i32, i32* %r4, i32 2
-%r331 = load i32, i32* %r330
-%r332 = zext i32 %r331 to i96
-%r333 = shl i96 %r332, 64
-%r334 = or i96 %r328, %r333
-%r335 = zext i96 %r334 to i128
-%r337 = getelementptr i32, i32* %r4, i32 3
-%r338 = load i32, i32* %r337
-%r339 = zext i32 %r338 to i128
-%r340 = shl i128 %r339, 96
-%r341 = or i128 %r335, %r340
-%r342 = zext i128 %r341 to i160
-%r344 = getelementptr i32, i32* %r4, i32 4
-%r345 = load i32, i32* %r344
-%r346 = zext i32 %r345 to i160
-%r347 = shl i160 %r346, 128
-%r348 = or i160 %r342, %r347
-%r349 = zext i160 %r348 to i192
-%r351 = getelementptr i32, i32* %r4, i32 5
-%r352 = load i32, i32* %r351
-%r353 = zext i32 %r352 to i192
-%r354 = shl i192 %r353, 160
-%r355 = or i192 %r349, %r354
-%r356 = zext i192 %r355 to i224
-%r358 = getelementptr i32, i32* %r4, i32 6
-%r359 = load i32, i32* %r358
-%r360 = zext i32 %r359 to i224
-%r361 = shl i224 %r360, 192
-%r362 = or i224 %r356, %r361
-%r363 = zext i224 %r362 to i256
-%r365 = getelementptr i32, i32* %r4, i32 7
-%r366 = load i32, i32* %r365
-%r367 = zext i32 %r366 to i256
-%r368 = shl i256 %r367, 224
-%r369 = or i256 %r363, %r368
-%r370 = zext i256 %r369 to i288
-%r372 = getelementptr i32, i32* %r4, i32 8
-%r373 = load i32, i32* %r372
-%r374 = zext i32 %r373 to i288
-%r375 = shl i288 %r374, 256
-%r376 = or i288 %r370, %r375
-%r377 = zext i288 %r376 to i320
-%r379 = getelementptr i32, i32* %r4, i32 9
-%r380 = load i32, i32* %r379
-%r381 = zext i32 %r380 to i320
-%r382 = shl i320 %r381, 288
-%r383 = or i320 %r377, %r382
-%r385 = select i1 %r319, i320 %r383, i320 0
-%r386 = add i320 %r317, %r385
-%r388 = getelementptr i32, i32* %r1, i32 10
-%r389 = trunc i320 %r386 to i32
-%r391 = getelementptr i32, i32* %r388, i32 0
-store i32 %r389, i32* %r391
-%r392 = lshr i320 %r386, 32
-%r393 = trunc i320 %r392 to i32
-%r395 = getelementptr i32, i32* %r388, i32 1
-store i32 %r393, i32* %r395
-%r396 = lshr i320 %r392, 32
-%r397 = trunc i320 %r396 to i32
-%r399 = getelementptr i32, i32* %r388, i32 2
-store i32 %r397, i32* %r399
-%r400 = lshr i320 %r396, 32
-%r401 = trunc i320 %r400 to i32
-%r403 = getelementptr i32, i32* %r388, i32 3
-store i32 %r401, i32* %r403
-%r404 = lshr i320 %r400, 32
-%r405 = trunc i320 %r404 to i32
-%r407 = getelementptr i32, i32* %r388, i32 4
-store i32 %r405, i32* %r407
-%r408 = lshr i320 %r404, 32
-%r409 = trunc i320 %r408 to i32
-%r411 = getelementptr i32, i32* %r388, i32 5
-store i32 %r409, i32* %r411
-%r412 = lshr i320 %r408, 32
-%r413 = trunc i320 %r412 to i32
-%r415 = getelementptr i32, i32* %r388, i32 6
-store i32 %r413, i32* %r415
-%r416 = lshr i320 %r412, 32
-%r417 = trunc i320 %r416 to i32
-%r419 = getelementptr i32, i32* %r388, i32 7
-store i32 %r417, i32* %r419
-%r420 = lshr i320 %r416, 32
-%r421 = trunc i320 %r420 to i32
-%r423 = getelementptr i32, i32* %r388, i32 8
-store i32 %r421, i32* %r423
-%r424 = lshr i320 %r420, 32
-%r425 = trunc i320 %r424 to i32
-%r427 = getelementptr i32, i32* %r388, i32 9
-store i32 %r425, i32* %r427
-ret void
-}
-define i384 @mulPv352x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
-%r30 = trunc i64 %r29 to i32
-%r31 = call i32 @extractHigh32(i64 %r29)
-%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
-%r34 = trunc i64 %r33 to i32
-%r35 = call i32 @extractHigh32(i64 %r33)
-%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
-%r38 = trunc i64 %r37 to i32
-%r39 = call i32 @extractHigh32(i64 %r37)
-%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
-%r42 = trunc i64 %r41 to i32
-%r43 = call i32 @extractHigh32(i64 %r41)
-%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
-%r46 = trunc i64 %r45 to i32
-%r47 = call i32 @extractHigh32(i64 %r45)
-%r48 = zext i32 %r6 to i64
-%r49 = zext i32 %r10 to i64
-%r50 = shl i64 %r49, 32
-%r51 = or i64 %r48, %r50
-%r52 = zext i64 %r51 to i96
-%r53 = zext i32 %r14 to i96
-%r54 = shl i96 %r53, 64
-%r55 = or i96 %r52, %r54
-%r56 = zext i96 %r55 to i128
-%r57 = zext i32 %r18 to i128
-%r58 = shl i128 %r57, 96
-%r59 = or i128 %r56, %r58
-%r60 = zext i128 %r59 to i160
-%r61 = zext i32 %r22 to i160
-%r62 = shl i160 %r61, 128
-%r63 = or i160 %r60, %r62
-%r64 = zext i160 %r63 to i192
-%r65 = zext i32 %r26 to i192
-%r66 = shl i192 %r65, 160
-%r67 = or i192 %r64, %r66
-%r68 = zext i192 %r67 to i224
-%r69 = zext i32 %r30 to i224
-%r70 = shl i224 %r69, 192
-%r71 = or i224 %r68, %r70
-%r72 = zext i224 %r71 to i256
-%r73 = zext i32 %r34 to i256
-%r74 = shl i256 %r73, 224
-%r75 = or i256 %r72, %r74
-%r76 = zext i256 %r75 to i288
-%r77 = zext i32 %r38 to i288
-%r78 = shl i288 %r77, 256
-%r79 = or i288 %r76, %r78
-%r80 = zext i288 %r79 to i320
-%r81 = zext i32 %r42 to i320
-%r82 = shl i320 %r81, 288
-%r83 = or i320 %r80, %r82
-%r84 = zext i320 %r83 to i352
-%r85 = zext i32 %r46 to i352
-%r86 = shl i352 %r85, 320
-%r87 = or i352 %r84, %r86
-%r88 = zext i32 %r7 to i64
-%r89 = zext i32 %r11 to i64
-%r90 = shl i64 %r89, 32
-%r91 = or i64 %r88, %r90
-%r92 = zext i64 %r91 to i96
-%r93 = zext i32 %r15 to i96
-%r94 = shl i96 %r93, 64
-%r95 = or i96 %r92, %r94
-%r96 = zext i96 %r95 to i128
-%r97 = zext i32 %r19 to i128
-%r98 = shl i128 %r97, 96
-%r99 = or i128 %r96, %r98
-%r100 = zext i128 %r99 to i160
-%r101 = zext i32 %r23 to i160
-%r102 = shl i160 %r101, 128
-%r103 = or i160 %r100, %r102
-%r104 = zext i160 %r103 to i192
-%r105 = zext i32 %r27 to i192
-%r106 = shl i192 %r105, 160
-%r107 = or i192 %r104, %r106
-%r108 = zext i192 %r107 to i224
-%r109 = zext i32 %r31 to i224
-%r110 = shl i224 %r109, 192
-%r111 = or i224 %r108, %r110
-%r112 = zext i224 %r111 to i256
-%r113 = zext i32 %r35 to i256
-%r114 = shl i256 %r113, 224
-%r115 = or i256 %r112, %r114
-%r116 = zext i256 %r115 to i288
-%r117 = zext i32 %r39 to i288
-%r118 = shl i288 %r117, 256
-%r119 = or i288 %r116, %r118
-%r120 = zext i288 %r119 to i320
-%r121 = zext i32 %r43 to i320
-%r122 = shl i320 %r121, 288
-%r123 = or i320 %r120, %r122
-%r124 = zext i320 %r123 to i352
-%r125 = zext i32 %r47 to i352
-%r126 = shl i352 %r125, 320
-%r127 = or i352 %r124, %r126
-%r128 = zext i352 %r87 to i384
-%r129 = zext i352 %r127 to i384
-%r130 = shl i384 %r129, 32
-%r131 = add i384 %r128, %r130
-ret i384 %r131
-}
-define void @mcl_fp_mulUnitPre11L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i384 @mulPv352x32(i32* %r2, i32 %r3)
-%r5 = trunc i384 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i384 %r4, 32
-%r9 = trunc i384 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i384 %r8, 32
-%r13 = trunc i384 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i384 %r12, 32
-%r17 = trunc i384 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i384 %r16, 32
-%r21 = trunc i384 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i384 %r20, 32
-%r25 = trunc i384 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i384 %r24, 32
-%r29 = trunc i384 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i384 %r28, 32
-%r33 = trunc i384 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
-%r36 = lshr i384 %r32, 32
-%r37 = trunc i384 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 8
-store i32 %r37, i32* %r39
-%r40 = lshr i384 %r36, 32
-%r41 = trunc i384 %r40 to i32
-%r43 = getelementptr i32, i32* %r1, i32 9
-store i32 %r41, i32* %r43
-%r44 = lshr i384 %r40, 32
-%r45 = trunc i384 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 10
-store i32 %r45, i32* %r47
-%r48 = lshr i384 %r44, 32
-%r49 = trunc i384 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 11
-store i32 %r49, i32* %r51
-ret void
-}
-define void @mcl_fpDbl_mulPre11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r4 = load i32, i32* %r3
-%r5 = call i384 @mulPv352x32(i32* %r2, i32 %r4)
-%r6 = trunc i384 %r5 to i32
-store i32 %r6, i32* %r1
-%r7 = lshr i384 %r5, 32
-%r9 = getelementptr i32, i32* %r3, i32 1
-%r10 = load i32, i32* %r9
-%r11 = call i384 @mulPv352x32(i32* %r2, i32 %r10)
-%r12 = add i384 %r7, %r11
-%r13 = trunc i384 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 1
-store i32 %r13, i32* %r15
-%r16 = lshr i384 %r12, 32
-%r18 = getelementptr i32, i32* %r3, i32 2
-%r19 = load i32, i32* %r18
-%r20 = call i384 @mulPv352x32(i32* %r2, i32 %r19)
-%r21 = add i384 %r16, %r20
-%r22 = trunc i384 %r21 to i32
-%r24 = getelementptr i32, i32* %r1, i32 2
-store i32 %r22, i32* %r24
-%r25 = lshr i384 %r21, 32
-%r27 = getelementptr i32, i32* %r3, i32 3
-%r28 = load i32, i32* %r27
-%r29 = call i384 @mulPv352x32(i32* %r2, i32 %r28)
-%r30 = add i384 %r25, %r29
-%r31 = trunc i384 %r30 to i32
-%r33 = getelementptr i32, i32* %r1, i32 3
-store i32 %r31, i32* %r33
-%r34 = lshr i384 %r30, 32
-%r36 = getelementptr i32, i32* %r3, i32 4
-%r37 = load i32, i32* %r36
-%r38 = call i384 @mulPv352x32(i32* %r2, i32 %r37)
-%r39 = add i384 %r34, %r38
-%r40 = trunc i384 %r39 to i32
-%r42 = getelementptr i32, i32* %r1, i32 4
-store i32 %r40, i32* %r42
-%r43 = lshr i384 %r39, 32
-%r45 = getelementptr i32, i32* %r3, i32 5
-%r46 = load i32, i32* %r45
-%r47 = call i384 @mulPv352x32(i32* %r2, i32 %r46)
-%r48 = add i384 %r43, %r47
-%r49 = trunc i384 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 5
-store i32 %r49, i32* %r51
-%r52 = lshr i384 %r48, 32
-%r54 = getelementptr i32, i32* %r3, i32 6
-%r55 = load i32, i32* %r54
-%r56 = call i384 @mulPv352x32(i32* %r2, i32 %r55)
-%r57 = add i384 %r52, %r56
-%r58 = trunc i384 %r57 to i32
-%r60 = getelementptr i32, i32* %r1, i32 6
-store i32 %r58, i32* %r60
-%r61 = lshr i384 %r57, 32
-%r63 = getelementptr i32, i32* %r3, i32 7
-%r64 = load i32, i32* %r63
-%r65 = call i384 @mulPv352x32(i32* %r2, i32 %r64)
-%r66 = add i384 %r61, %r65
-%r67 = trunc i384 %r66 to i32
-%r69 = getelementptr i32, i32* %r1, i32 7
-store i32 %r67, i32* %r69
-%r70 = lshr i384 %r66, 32
-%r72 = getelementptr i32, i32* %r3, i32 8
-%r73 = load i32, i32* %r72
-%r74 = call i384 @mulPv352x32(i32* %r2, i32 %r73)
-%r75 = add i384 %r70, %r74
-%r76 = trunc i384 %r75 to i32
-%r78 = getelementptr i32, i32* %r1, i32 8
-store i32 %r76, i32* %r78
-%r79 = lshr i384 %r75, 32
-%r81 = getelementptr i32, i32* %r3, i32 9
-%r82 = load i32, i32* %r81
-%r83 = call i384 @mulPv352x32(i32* %r2, i32 %r82)
-%r84 = add i384 %r79, %r83
-%r85 = trunc i384 %r84 to i32
-%r87 = getelementptr i32, i32* %r1, i32 9
-store i32 %r85, i32* %r87
-%r88 = lshr i384 %r84, 32
-%r90 = getelementptr i32, i32* %r3, i32 10
-%r91 = load i32, i32* %r90
-%r92 = call i384 @mulPv352x32(i32* %r2, i32 %r91)
-%r93 = add i384 %r88, %r92
-%r95 = getelementptr i32, i32* %r1, i32 10
-%r96 = trunc i384 %r93 to i32
-%r98 = getelementptr i32, i32* %r95, i32 0
-store i32 %r96, i32* %r98
-%r99 = lshr i384 %r93, 32
-%r100 = trunc i384 %r99 to i32
-%r102 = getelementptr i32, i32* %r95, i32 1
-store i32 %r100, i32* %r102
-%r103 = lshr i384 %r99, 32
-%r104 = trunc i384 %r103 to i32
-%r106 = getelementptr i32, i32* %r95, i32 2
-store i32 %r104, i32* %r106
-%r107 = lshr i384 %r103, 32
-%r108 = trunc i384 %r107 to i32
-%r110 = getelementptr i32, i32* %r95, i32 3
-store i32 %r108, i32* %r110
-%r111 = lshr i384 %r107, 32
-%r112 = trunc i384 %r111 to i32
-%r114 = getelementptr i32, i32* %r95, i32 4
-store i32 %r112, i32* %r114
-%r115 = lshr i384 %r111, 32
-%r116 = trunc i384 %r115 to i32
-%r118 = getelementptr i32, i32* %r95, i32 5
-store i32 %r116, i32* %r118
-%r119 = lshr i384 %r115, 32
-%r120 = trunc i384 %r119 to i32
-%r122 = getelementptr i32, i32* %r95, i32 6
-store i32 %r120, i32* %r122
-%r123 = lshr i384 %r119, 32
-%r124 = trunc i384 %r123 to i32
-%r126 = getelementptr i32, i32* %r95, i32 7
-store i32 %r124, i32* %r126
-%r127 = lshr i384 %r123, 32
-%r128 = trunc i384 %r127 to i32
-%r130 = getelementptr i32, i32* %r95, i32 8
-store i32 %r128, i32* %r130
-%r131 = lshr i384 %r127, 32
-%r132 = trunc i384 %r131 to i32
-%r134 = getelementptr i32, i32* %r95, i32 9
-store i32 %r132, i32* %r134
-%r135 = lshr i384 %r131, 32
-%r136 = trunc i384 %r135 to i32
-%r138 = getelementptr i32, i32* %r95, i32 10
-store i32 %r136, i32* %r138
-%r139 = lshr i384 %r135, 32
-%r140 = trunc i384 %r139 to i32
-%r142 = getelementptr i32, i32* %r95, i32 11
-store i32 %r140, i32* %r142
-ret void
-}
-define void @mcl_fpDbl_sqrPre11L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = call i384 @mulPv352x32(i32* %r2, i32 %r3)
-%r5 = trunc i384 %r4 to i32
-store i32 %r5, i32* %r1
-%r6 = lshr i384 %r4, 32
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = call i384 @mulPv352x32(i32* %r2, i32 %r9)
-%r11 = add i384 %r6, %r10
-%r12 = trunc i384 %r11 to i32
-%r14 = getelementptr i32, i32* %r1, i32 1
-store i32 %r12, i32* %r14
-%r15 = lshr i384 %r11, 32
-%r17 = getelementptr i32, i32* %r2, i32 2
-%r18 = load i32, i32* %r17
-%r19 = call i384 @mulPv352x32(i32* %r2, i32 %r18)
-%r20 = add i384 %r15, %r19
-%r21 = trunc i384 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 2
-store i32 %r21, i32* %r23
-%r24 = lshr i384 %r20, 32
-%r26 = getelementptr i32, i32* %r2, i32 3
-%r27 = load i32, i32* %r26
-%r28 = call i384 @mulPv352x32(i32* %r2, i32 %r27)
-%r29 = add i384 %r24, %r28
-%r30 = trunc i384 %r29 to i32
-%r32 = getelementptr i32, i32* %r1, i32 3
-store i32 %r30, i32* %r32
-%r33 = lshr i384 %r29, 32
-%r35 = getelementptr i32, i32* %r2, i32 4
-%r36 = load i32, i32* %r35
-%r37 = call i384 @mulPv352x32(i32* %r2, i32 %r36)
-%r38 = add i384 %r33, %r37
-%r39 = trunc i384 %r38 to i32
-%r41 = getelementptr i32, i32* %r1, i32 4
-store i32 %r39, i32* %r41
-%r42 = lshr i384 %r38, 32
-%r44 = getelementptr i32, i32* %r2, i32 5
-%r45 = load i32, i32* %r44
-%r46 = call i384 @mulPv352x32(i32* %r2, i32 %r45)
-%r47 = add i384 %r42, %r46
-%r48 = trunc i384 %r47 to i32
-%r50 = getelementptr i32, i32* %r1, i32 5
-store i32 %r48, i32* %r50
-%r51 = lshr i384 %r47, 32
-%r53 = getelementptr i32, i32* %r2, i32 6
-%r54 = load i32, i32* %r53
-%r55 = call i384 @mulPv352x32(i32* %r2, i32 %r54)
-%r56 = add i384 %r51, %r55
-%r57 = trunc i384 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 6
-store i32 %r57, i32* %r59
-%r60 = lshr i384 %r56, 32
-%r62 = getelementptr i32, i32* %r2, i32 7
-%r63 = load i32, i32* %r62
-%r64 = call i384 @mulPv352x32(i32* %r2, i32 %r63)
-%r65 = add i384 %r60, %r64
-%r66 = trunc i384 %r65 to i32
-%r68 = getelementptr i32, i32* %r1, i32 7
-store i32 %r66, i32* %r68
-%r69 = lshr i384 %r65, 32
-%r71 = getelementptr i32, i32* %r2, i32 8
-%r72 = load i32, i32* %r71
-%r73 = call i384 @mulPv352x32(i32* %r2, i32 %r72)
-%r74 = add i384 %r69, %r73
-%r75 = trunc i384 %r74 to i32
-%r77 = getelementptr i32, i32* %r1, i32 8
-store i32 %r75, i32* %r77
-%r78 = lshr i384 %r74, 32
-%r80 = getelementptr i32, i32* %r2, i32 9
-%r81 = load i32, i32* %r80
-%r82 = call i384 @mulPv352x32(i32* %r2, i32 %r81)
-%r83 = add i384 %r78, %r82
-%r84 = trunc i384 %r83 to i32
-%r86 = getelementptr i32, i32* %r1, i32 9
-store i32 %r84, i32* %r86
-%r87 = lshr i384 %r83, 32
-%r89 = getelementptr i32, i32* %r2, i32 10
-%r90 = load i32, i32* %r89
-%r91 = call i384 @mulPv352x32(i32* %r2, i32 %r90)
-%r92 = add i384 %r87, %r91
-%r94 = getelementptr i32, i32* %r1, i32 10
-%r95 = trunc i384 %r92 to i32
-%r97 = getelementptr i32, i32* %r94, i32 0
-store i32 %r95, i32* %r97
-%r98 = lshr i384 %r92, 32
-%r99 = trunc i384 %r98 to i32
-%r101 = getelementptr i32, i32* %r94, i32 1
-store i32 %r99, i32* %r101
-%r102 = lshr i384 %r98, 32
-%r103 = trunc i384 %r102 to i32
-%r105 = getelementptr i32, i32* %r94, i32 2
-store i32 %r103, i32* %r105
-%r106 = lshr i384 %r102, 32
-%r107 = trunc i384 %r106 to i32
-%r109 = getelementptr i32, i32* %r94, i32 3
-store i32 %r107, i32* %r109
-%r110 = lshr i384 %r106, 32
-%r111 = trunc i384 %r110 to i32
-%r113 = getelementptr i32, i32* %r94, i32 4
-store i32 %r111, i32* %r113
-%r114 = lshr i384 %r110, 32
-%r115 = trunc i384 %r114 to i32
-%r117 = getelementptr i32, i32* %r94, i32 5
-store i32 %r115, i32* %r117
-%r118 = lshr i384 %r114, 32
-%r119 = trunc i384 %r118 to i32
-%r121 = getelementptr i32, i32* %r94, i32 6
-store i32 %r119, i32* %r121
-%r122 = lshr i384 %r118, 32
-%r123 = trunc i384 %r122 to i32
-%r125 = getelementptr i32, i32* %r94, i32 7
-store i32 %r123, i32* %r125
-%r126 = lshr i384 %r122, 32
-%r127 = trunc i384 %r126 to i32
-%r129 = getelementptr i32, i32* %r94, i32 8
-store i32 %r127, i32* %r129
-%r130 = lshr i384 %r126, 32
-%r131 = trunc i384 %r130 to i32
-%r133 = getelementptr i32, i32* %r94, i32 9
-store i32 %r131, i32* %r133
-%r134 = lshr i384 %r130, 32
-%r135 = trunc i384 %r134 to i32
-%r137 = getelementptr i32, i32* %r94, i32 10
-store i32 %r135, i32* %r137
-%r138 = lshr i384 %r134, 32
-%r139 = trunc i384 %r138 to i32
-%r141 = getelementptr i32, i32* %r94, i32 11
-store i32 %r139, i32* %r141
-ret void
-}
-define void @mcl_fp_mont11L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i384 @mulPv352x32(i32* %r2, i32 %r10)
-%r12 = zext i384 %r11 to i416
-%r13 = trunc i384 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i384 @mulPv352x32(i32* %r4, i32 %r14)
-%r16 = zext i384 %r15 to i416
-%r17 = add i416 %r12, %r16
-%r18 = lshr i416 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i384 @mulPv352x32(i32* %r2, i32 %r21)
-%r23 = zext i384 %r22 to i416
-%r24 = add i416 %r18, %r23
-%r25 = trunc i416 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i384 @mulPv352x32(i32* %r4, i32 %r26)
-%r28 = zext i384 %r27 to i416
-%r29 = add i416 %r24, %r28
-%r30 = lshr i416 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i384 @mulPv352x32(i32* %r2, i32 %r33)
-%r35 = zext i384 %r34 to i416
-%r36 = add i416 %r30, %r35
-%r37 = trunc i416 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i384 @mulPv352x32(i32* %r4, i32 %r38)
-%r40 = zext i384 %r39 to i416
-%r41 = add i416 %r36, %r40
-%r42 = lshr i416 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i384 @mulPv352x32(i32* %r2, i32 %r45)
-%r47 = zext i384 %r46 to i416
-%r48 = add i416 %r42, %r47
-%r49 = trunc i416 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i384 @mulPv352x32(i32* %r4, i32 %r50)
-%r52 = zext i384 %r51 to i416
-%r53 = add i416 %r48, %r52
-%r54 = lshr i416 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i384 @mulPv352x32(i32* %r2, i32 %r57)
-%r59 = zext i384 %r58 to i416
-%r60 = add i416 %r54, %r59
-%r61 = trunc i416 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i384 @mulPv352x32(i32* %r4, i32 %r62)
-%r64 = zext i384 %r63 to i416
-%r65 = add i416 %r60, %r64
-%r66 = lshr i416 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i384 @mulPv352x32(i32* %r2, i32 %r69)
-%r71 = zext i384 %r70 to i416
-%r72 = add i416 %r66, %r71
-%r73 = trunc i416 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i384 @mulPv352x32(i32* %r4, i32 %r74)
-%r76 = zext i384 %r75 to i416
-%r77 = add i416 %r72, %r76
-%r78 = lshr i416 %r77, 32
-%r80 = getelementptr i32, i32* %r3, i32 6
-%r81 = load i32, i32* %r80
-%r82 = call i384 @mulPv352x32(i32* %r2, i32 %r81)
-%r83 = zext i384 %r82 to i416
-%r84 = add i416 %r78, %r83
-%r85 = trunc i416 %r84 to i32
-%r86 = mul i32 %r85, %r7
-%r87 = call i384 @mulPv352x32(i32* %r4, i32 %r86)
-%r88 = zext i384 %r87 to i416
-%r89 = add i416 %r84, %r88
-%r90 = lshr i416 %r89, 32
-%r92 = getelementptr i32, i32* %r3, i32 7
-%r93 = load i32, i32* %r92
-%r94 = call i384 @mulPv352x32(i32* %r2, i32 %r93)
-%r95 = zext i384 %r94 to i416
-%r96 = add i416 %r90, %r95
-%r97 = trunc i416 %r96 to i32
-%r98 = mul i32 %r97, %r7
-%r99 = call i384 @mulPv352x32(i32* %r4, i32 %r98)
-%r100 = zext i384 %r99 to i416
-%r101 = add i416 %r96, %r100
-%r102 = lshr i416 %r101, 32
-%r104 = getelementptr i32, i32* %r3, i32 8
-%r105 = load i32, i32* %r104
-%r106 = call i384 @mulPv352x32(i32* %r2, i32 %r105)
-%r107 = zext i384 %r106 to i416
-%r108 = add i416 %r102, %r107
-%r109 = trunc i416 %r108 to i32
-%r110 = mul i32 %r109, %r7
-%r111 = call i384 @mulPv352x32(i32* %r4, i32 %r110)
-%r112 = zext i384 %r111 to i416
-%r113 = add i416 %r108, %r112
-%r114 = lshr i416 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 9
-%r117 = load i32, i32* %r116
-%r118 = call i384 @mulPv352x32(i32* %r2, i32 %r117)
-%r119 = zext i384 %r118 to i416
-%r120 = add i416 %r114, %r119
-%r121 = trunc i416 %r120 to i32
-%r122 = mul i32 %r121, %r7
-%r123 = call i384 @mulPv352x32(i32* %r4, i32 %r122)
-%r124 = zext i384 %r123 to i416
-%r125 = add i416 %r120, %r124
-%r126 = lshr i416 %r125, 32
-%r128 = getelementptr i32, i32* %r3, i32 10
-%r129 = load i32, i32* %r128
-%r130 = call i384 @mulPv352x32(i32* %r2, i32 %r129)
-%r131 = zext i384 %r130 to i416
-%r132 = add i416 %r126, %r131
-%r133 = trunc i416 %r132 to i32
-%r134 = mul i32 %r133, %r7
-%r135 = call i384 @mulPv352x32(i32* %r4, i32 %r134)
-%r136 = zext i384 %r135 to i416
-%r137 = add i416 %r132, %r136
-%r138 = lshr i416 %r137, 32
-%r139 = trunc i416 %r138 to i384
-%r140 = load i32, i32* %r4
-%r141 = zext i32 %r140 to i64
-%r143 = getelementptr i32, i32* %r4, i32 1
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i64
-%r146 = shl i64 %r145, 32
-%r147 = or i64 %r141, %r146
-%r148 = zext i64 %r147 to i96
-%r150 = getelementptr i32, i32* %r4, i32 2
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i96
-%r153 = shl i96 %r152, 64
-%r154 = or i96 %r148, %r153
-%r155 = zext i96 %r154 to i128
-%r157 = getelementptr i32, i32* %r4, i32 3
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i128
-%r160 = shl i128 %r159, 96
-%r161 = or i128 %r155, %r160
-%r162 = zext i128 %r161 to i160
-%r164 = getelementptr i32, i32* %r4, i32 4
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i160
-%r167 = shl i160 %r166, 128
-%r168 = or i160 %r162, %r167
-%r169 = zext i160 %r168 to i192
-%r171 = getelementptr i32, i32* %r4, i32 5
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i192
-%r174 = shl i192 %r173, 160
-%r175 = or i192 %r169, %r174
-%r176 = zext i192 %r175 to i224
-%r178 = getelementptr i32, i32* %r4, i32 6
-%r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i224
-%r181 = shl i224 %r180, 192
-%r182 = or i224 %r176, %r181
-%r183 = zext i224 %r182 to i256
-%r185 = getelementptr i32, i32* %r4, i32 7
-%r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i256
-%r188 = shl i256 %r187, 224
-%r189 = or i256 %r183, %r188
-%r190 = zext i256 %r189 to i288
-%r192 = getelementptr i32, i32* %r4, i32 8
-%r193 = load i32, i32* %r192
-%r194 = zext i32 %r193 to i288
-%r195 = shl i288 %r194, 256
-%r196 = or i288 %r190, %r195
-%r197 = zext i288 %r196 to i320
-%r199 = getelementptr i32, i32* %r4, i32 9
-%r200 = load i32, i32* %r199
-%r201 = zext i32 %r200 to i320
-%r202 = shl i320 %r201, 288
-%r203 = or i320 %r197, %r202
-%r204 = zext i320 %r203 to i352
-%r206 = getelementptr i32, i32* %r4, i32 10
-%r207 = load i32, i32* %r206
-%r208 = zext i32 %r207 to i352
-%r209 = shl i352 %r208, 320
-%r210 = or i352 %r204, %r209
-%r211 = zext i352 %r210 to i384
-%r212 = sub i384 %r139, %r211
-%r213 = lshr i384 %r212, 352
-%r214 = trunc i384 %r213 to i1
-%r215 = select i1 %r214, i384 %r139, i384 %r212
-%r216 = trunc i384 %r215 to i352
-%r217 = trunc i352 %r216 to i32
-%r219 = getelementptr i32, i32* %r1, i32 0
-store i32 %r217, i32* %r219
-%r220 = lshr i352 %r216, 32
-%r221 = trunc i352 %r220 to i32
-%r223 = getelementptr i32, i32* %r1, i32 1
-store i32 %r221, i32* %r223
-%r224 = lshr i352 %r220, 32
-%r225 = trunc i352 %r224 to i32
-%r227 = getelementptr i32, i32* %r1, i32 2
-store i32 %r225, i32* %r227
-%r228 = lshr i352 %r224, 32
-%r229 = trunc i352 %r228 to i32
-%r231 = getelementptr i32, i32* %r1, i32 3
-store i32 %r229, i32* %r231
-%r232 = lshr i352 %r228, 32
-%r233 = trunc i352 %r232 to i32
-%r235 = getelementptr i32, i32* %r1, i32 4
-store i32 %r233, i32* %r235
-%r236 = lshr i352 %r232, 32
-%r237 = trunc i352 %r236 to i32
-%r239 = getelementptr i32, i32* %r1, i32 5
-store i32 %r237, i32* %r239
-%r240 = lshr i352 %r236, 32
-%r241 = trunc i352 %r240 to i32
-%r243 = getelementptr i32, i32* %r1, i32 6
-store i32 %r241, i32* %r243
-%r244 = lshr i352 %r240, 32
-%r245 = trunc i352 %r244 to i32
-%r247 = getelementptr i32, i32* %r1, i32 7
-store i32 %r245, i32* %r247
-%r248 = lshr i352 %r244, 32
-%r249 = trunc i352 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 8
-store i32 %r249, i32* %r251
-%r252 = lshr i352 %r248, 32
-%r253 = trunc i352 %r252 to i32
-%r255 = getelementptr i32, i32* %r1, i32 9
-store i32 %r253, i32* %r255
-%r256 = lshr i352 %r252, 32
-%r257 = trunc i352 %r256 to i32
-%r259 = getelementptr i32, i32* %r1, i32 10
-store i32 %r257, i32* %r259
-ret void
-}
-define void @mcl_fp_montNF11L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i384 @mulPv352x32(i32* %r2, i32 %r8)
-%r10 = trunc i384 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i384 @mulPv352x32(i32* %r4, i32 %r11)
-%r13 = add i384 %r9, %r12
-%r14 = lshr i384 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i384 @mulPv352x32(i32* %r2, i32 %r17)
-%r19 = add i384 %r14, %r18
-%r20 = trunc i384 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i384 @mulPv352x32(i32* %r4, i32 %r21)
-%r23 = add i384 %r19, %r22
-%r24 = lshr i384 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i384 @mulPv352x32(i32* %r2, i32 %r27)
-%r29 = add i384 %r24, %r28
-%r30 = trunc i384 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i384 @mulPv352x32(i32* %r4, i32 %r31)
-%r33 = add i384 %r29, %r32
-%r34 = lshr i384 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i384 @mulPv352x32(i32* %r2, i32 %r37)
-%r39 = add i384 %r34, %r38
-%r40 = trunc i384 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i384 @mulPv352x32(i32* %r4, i32 %r41)
-%r43 = add i384 %r39, %r42
-%r44 = lshr i384 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i384 @mulPv352x32(i32* %r2, i32 %r47)
-%r49 = add i384 %r44, %r48
-%r50 = trunc i384 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i384 @mulPv352x32(i32* %r4, i32 %r51)
-%r53 = add i384 %r49, %r52
-%r54 = lshr i384 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i384 @mulPv352x32(i32* %r2, i32 %r57)
-%r59 = add i384 %r54, %r58
-%r60 = trunc i384 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i384 @mulPv352x32(i32* %r4, i32 %r61)
-%r63 = add i384 %r59, %r62
-%r64 = lshr i384 %r63, 32
-%r66 = getelementptr i32, i32* %r3, i32 6
-%r67 = load i32, i32* %r66
-%r68 = call i384 @mulPv352x32(i32* %r2, i32 %r67)
-%r69 = add i384 %r64, %r68
-%r70 = trunc i384 %r69 to i32
-%r71 = mul i32 %r70, %r7
-%r72 = call i384 @mulPv352x32(i32* %r4, i32 %r71)
-%r73 = add i384 %r69, %r72
-%r74 = lshr i384 %r73, 32
-%r76 = getelementptr i32, i32* %r3, i32 7
-%r77 = load i32, i32* %r76
-%r78 = call i384 @mulPv352x32(i32* %r2, i32 %r77)
-%r79 = add i384 %r74, %r78
-%r80 = trunc i384 %r79 to i32
-%r81 = mul i32 %r80, %r7
-%r82 = call i384 @mulPv352x32(i32* %r4, i32 %r81)
-%r83 = add i384 %r79, %r82
-%r84 = lshr i384 %r83, 32
-%r86 = getelementptr i32, i32* %r3, i32 8
-%r87 = load i32, i32* %r86
-%r88 = call i384 @mulPv352x32(i32* %r2, i32 %r87)
-%r89 = add i384 %r84, %r88
-%r90 = trunc i384 %r89 to i32
-%r91 = mul i32 %r90, %r7
-%r92 = call i384 @mulPv352x32(i32* %r4, i32 %r91)
-%r93 = add i384 %r89, %r92
-%r94 = lshr i384 %r93, 32
-%r96 = getelementptr i32, i32* %r3, i32 9
-%r97 = load i32, i32* %r96
-%r98 = call i384 @mulPv352x32(i32* %r2, i32 %r97)
-%r99 = add i384 %r94, %r98
-%r100 = trunc i384 %r99 to i32
-%r101 = mul i32 %r100, %r7
-%r102 = call i384 @mulPv352x32(i32* %r4, i32 %r101)
-%r103 = add i384 %r99, %r102
-%r104 = lshr i384 %r103, 32
-%r106 = getelementptr i32, i32* %r3, i32 10
-%r107 = load i32, i32* %r106
-%r108 = call i384 @mulPv352x32(i32* %r2, i32 %r107)
-%r109 = add i384 %r104, %r108
-%r110 = trunc i384 %r109 to i32
-%r111 = mul i32 %r110, %r7
-%r112 = call i384 @mulPv352x32(i32* %r4, i32 %r111)
-%r113 = add i384 %r109, %r112
-%r114 = lshr i384 %r113, 32
-%r115 = trunc i384 %r114 to i352
-%r116 = load i32, i32* %r4
-%r117 = zext i32 %r116 to i64
-%r119 = getelementptr i32, i32* %r4, i32 1
-%r120 = load i32, i32* %r119
-%r121 = zext i32 %r120 to i64
-%r122 = shl i64 %r121, 32
-%r123 = or i64 %r117, %r122
-%r124 = zext i64 %r123 to i96
-%r126 = getelementptr i32, i32* %r4, i32 2
-%r127 = load i32, i32* %r126
-%r128 = zext i32 %r127 to i96
-%r129 = shl i96 %r128, 64
-%r130 = or i96 %r124, %r129
-%r131 = zext i96 %r130 to i128
-%r133 = getelementptr i32, i32* %r4, i32 3
-%r134 = load i32, i32* %r133
-%r135 = zext i32 %r134 to i128
-%r136 = shl i128 %r135, 96
-%r137 = or i128 %r131, %r136
-%r138 = zext i128 %r137 to i160
-%r140 = getelementptr i32, i32* %r4, i32 4
-%r141 = load i32, i32* %r140
-%r142 = zext i32 %r141 to i160
-%r143 = shl i160 %r142, 128
-%r144 = or i160 %r138, %r143
-%r145 = zext i160 %r144 to i192
-%r147 = getelementptr i32, i32* %r4, i32 5
-%r148 = load i32, i32* %r147
-%r149 = zext i32 %r148 to i192
-%r150 = shl i192 %r149, 160
-%r151 = or i192 %r145, %r150
-%r152 = zext i192 %r151 to i224
-%r154 = getelementptr i32, i32* %r4, i32 6
-%r155 = load i32, i32* %r154
-%r156 = zext i32 %r155 to i224
-%r157 = shl i224 %r156, 192
-%r158 = or i224 %r152, %r157
-%r159 = zext i224 %r158 to i256
-%r161 = getelementptr i32, i32* %r4, i32 7
-%r162 = load i32, i32* %r161
-%r163 = zext i32 %r162 to i256
-%r164 = shl i256 %r163, 224
-%r165 = or i256 %r159, %r164
-%r166 = zext i256 %r165 to i288
-%r168 = getelementptr i32, i32* %r4, i32 8
-%r169 = load i32, i32* %r168
-%r170 = zext i32 %r169 to i288
-%r171 = shl i288 %r170, 256
-%r172 = or i288 %r166, %r171
-%r173 = zext i288 %r172 to i320
-%r175 = getelementptr i32, i32* %r4, i32 9
-%r176 = load i32, i32* %r175
-%r177 = zext i32 %r176 to i320
-%r178 = shl i320 %r177, 288
-%r179 = or i320 %r173, %r178
-%r180 = zext i320 %r179 to i352
-%r182 = getelementptr i32, i32* %r4, i32 10
-%r183 = load i32, i32* %r182
-%r184 = zext i32 %r183 to i352
-%r185 = shl i352 %r184, 320
-%r186 = or i352 %r180, %r185
-%r187 = sub i352 %r115, %r186
-%r188 = lshr i352 %r187, 351
-%r189 = trunc i352 %r188 to i1
-%r190 = select i1 %r189, i352 %r115, i352 %r187
-%r191 = trunc i352 %r190 to i32
-%r193 = getelementptr i32, i32* %r1, i32 0
-store i32 %r191, i32* %r193
-%r194 = lshr i352 %r190, 32
-%r195 = trunc i352 %r194 to i32
-%r197 = getelementptr i32, i32* %r1, i32 1
-store i32 %r195, i32* %r197
-%r198 = lshr i352 %r194, 32
-%r199 = trunc i352 %r198 to i32
-%r201 = getelementptr i32, i32* %r1, i32 2
-store i32 %r199, i32* %r201
-%r202 = lshr i352 %r198, 32
-%r203 = trunc i352 %r202 to i32
-%r205 = getelementptr i32, i32* %r1, i32 3
-store i32 %r203, i32* %r205
-%r206 = lshr i352 %r202, 32
-%r207 = trunc i352 %r206 to i32
-%r209 = getelementptr i32, i32* %r1, i32 4
-store i32 %r207, i32* %r209
-%r210 = lshr i352 %r206, 32
-%r211 = trunc i352 %r210 to i32
-%r213 = getelementptr i32, i32* %r1, i32 5
-store i32 %r211, i32* %r213
-%r214 = lshr i352 %r210, 32
-%r215 = trunc i352 %r214 to i32
-%r217 = getelementptr i32, i32* %r1, i32 6
-store i32 %r215, i32* %r217
-%r218 = lshr i352 %r214, 32
-%r219 = trunc i352 %r218 to i32
-%r221 = getelementptr i32, i32* %r1, i32 7
-store i32 %r219, i32* %r221
-%r222 = lshr i352 %r218, 32
-%r223 = trunc i352 %r222 to i32
-%r225 = getelementptr i32, i32* %r1, i32 8
-store i32 %r223, i32* %r225
-%r226 = lshr i352 %r222, 32
-%r227 = trunc i352 %r226 to i32
-%r229 = getelementptr i32, i32* %r1, i32 9
-store i32 %r227, i32* %r229
-%r230 = lshr i352 %r226, 32
-%r231 = trunc i352 %r230 to i32
-%r233 = getelementptr i32, i32* %r1, i32 10
-store i32 %r231, i32* %r233
-ret void
-}
-define void @mcl_fp_montRed11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i224
-%r45 = getelementptr i32, i32* %r3, i32 6
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i224
-%r48 = shl i224 %r47, 192
-%r49 = or i224 %r43, %r48
-%r50 = zext i224 %r49 to i256
-%r52 = getelementptr i32, i32* %r3, i32 7
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i256
-%r55 = shl i256 %r54, 224
-%r56 = or i256 %r50, %r55
-%r57 = zext i256 %r56 to i288
-%r59 = getelementptr i32, i32* %r3, i32 8
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i288
-%r62 = shl i288 %r61, 256
-%r63 = or i288 %r57, %r62
-%r64 = zext i288 %r63 to i320
-%r66 = getelementptr i32, i32* %r3, i32 9
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i320
-%r69 = shl i320 %r68, 288
-%r70 = or i320 %r64, %r69
-%r71 = zext i320 %r70 to i352
-%r73 = getelementptr i32, i32* %r3, i32 10
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i352
-%r76 = shl i352 %r75, 320
-%r77 = or i352 %r71, %r76
-%r78 = load i32, i32* %r2
-%r79 = zext i32 %r78 to i64
-%r81 = getelementptr i32, i32* %r2, i32 1
-%r82 = load i32, i32* %r81
-%r83 = zext i32 %r82 to i64
-%r84 = shl i64 %r83, 32
-%r85 = or i64 %r79, %r84
-%r86 = zext i64 %r85 to i96
-%r88 = getelementptr i32, i32* %r2, i32 2
-%r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i96
-%r91 = shl i96 %r90, 64
-%r92 = or i96 %r86, %r91
-%r93 = zext i96 %r92 to i128
-%r95 = getelementptr i32, i32* %r2, i32 3
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i128
-%r98 = shl i128 %r97, 96
-%r99 = or i128 %r93, %r98
-%r100 = zext i128 %r99 to i160
-%r102 = getelementptr i32, i32* %r2, i32 4
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i160
-%r105 = shl i160 %r104, 128
-%r106 = or i160 %r100, %r105
-%r107 = zext i160 %r106 to i192
-%r109 = getelementptr i32, i32* %r2, i32 5
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i192
-%r112 = shl i192 %r111, 160
-%r113 = or i192 %r107, %r112
-%r114 = zext i192 %r113 to i224
-%r116 = getelementptr i32, i32* %r2, i32 6
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i224
-%r119 = shl i224 %r118, 192
-%r120 = or i224 %r114, %r119
-%r121 = zext i224 %r120 to i256
-%r123 = getelementptr i32, i32* %r2, i32 7
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i256
-%r126 = shl i256 %r125, 224
-%r127 = or i256 %r121, %r126
-%r128 = zext i256 %r127 to i288
-%r130 = getelementptr i32, i32* %r2, i32 8
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i288
-%r133 = shl i288 %r132, 256
-%r134 = or i288 %r128, %r133
-%r135 = zext i288 %r134 to i320
-%r137 = getelementptr i32, i32* %r2, i32 9
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i320
-%r140 = shl i320 %r139, 288
-%r141 = or i320 %r135, %r140
-%r142 = zext i320 %r141 to i352
-%r144 = getelementptr i32, i32* %r2, i32 10
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i352
-%r147 = shl i352 %r146, 320
-%r148 = or i352 %r142, %r147
-%r149 = zext i352 %r148 to i384
-%r151 = getelementptr i32, i32* %r2, i32 11
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i384
-%r154 = shl i384 %r153, 352
-%r155 = or i384 %r149, %r154
-%r156 = zext i384 %r155 to i416
-%r158 = getelementptr i32, i32* %r2, i32 12
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i416
-%r161 = shl i416 %r160, 384
-%r162 = or i416 %r156, %r161
-%r163 = zext i416 %r162 to i448
-%r165 = getelementptr i32, i32* %r2, i32 13
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i448
-%r168 = shl i448 %r167, 416
-%r169 = or i448 %r163, %r168
-%r170 = zext i448 %r169 to i480
-%r172 = getelementptr i32, i32* %r2, i32 14
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i480
-%r175 = shl i480 %r174, 448
-%r176 = or i480 %r170, %r175
-%r177 = zext i480 %r176 to i512
-%r179 = getelementptr i32, i32* %r2, i32 15
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i512
-%r182 = shl i512 %r181, 480
-%r183 = or i512 %r177, %r182
-%r184 = zext i512 %r183 to i544
-%r186 = getelementptr i32, i32* %r2, i32 16
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i544
-%r189 = shl i544 %r188, 512
-%r190 = or i544 %r184, %r189
-%r191 = zext i544 %r190 to i576
-%r193 = getelementptr i32, i32* %r2, i32 17
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i576
-%r196 = shl i576 %r195, 544
-%r197 = or i576 %r191, %r196
-%r198 = zext i576 %r197 to i608
-%r200 = getelementptr i32, i32* %r2, i32 18
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i608
-%r203 = shl i608 %r202, 576
-%r204 = or i608 %r198, %r203
-%r205 = zext i608 %r204 to i640
-%r207 = getelementptr i32, i32* %r2, i32 19
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i640
-%r210 = shl i640 %r209, 608
-%r211 = or i640 %r205, %r210
-%r212 = zext i640 %r211 to i672
-%r214 = getelementptr i32, i32* %r2, i32 20
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i672
-%r217 = shl i672 %r216, 640
-%r218 = or i672 %r212, %r217
-%r219 = zext i672 %r218 to i704
-%r221 = getelementptr i32, i32* %r2, i32 21
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i704
-%r224 = shl i704 %r223, 672
-%r225 = or i704 %r219, %r224
-%r226 = zext i704 %r225 to i736
-%r227 = trunc i736 %r226 to i32
-%r228 = mul i32 %r227, %r6
-%r229 = call i384 @mulPv352x32(i32* %r3, i32 %r228)
-%r230 = zext i384 %r229 to i736
-%r231 = add i736 %r226, %r230
-%r232 = lshr i736 %r231, 32
-%r233 = trunc i736 %r232 to i704
-%r234 = trunc i704 %r233 to i32
-%r235 = mul i32 %r234, %r6
-%r236 = call i384 @mulPv352x32(i32* %r3, i32 %r235)
-%r237 = zext i384 %r236 to i704
-%r238 = add i704 %r233, %r237
-%r239 = lshr i704 %r238, 32
-%r240 = trunc i704 %r239 to i672
-%r241 = trunc i672 %r240 to i32
-%r242 = mul i32 %r241, %r6
-%r243 = call i384 @mulPv352x32(i32* %r3, i32 %r242)
-%r244 = zext i384 %r243 to i672
-%r245 = add i672 %r240, %r244
-%r246 = lshr i672 %r245, 32
-%r247 = trunc i672 %r246 to i640
-%r248 = trunc i640 %r247 to i32
-%r249 = mul i32 %r248, %r6
-%r250 = call i384 @mulPv352x32(i32* %r3, i32 %r249)
-%r251 = zext i384 %r250 to i640
-%r252 = add i640 %r247, %r251
-%r253 = lshr i640 %r252, 32
-%r254 = trunc i640 %r253 to i608
-%r255 = trunc i608 %r254 to i32
-%r256 = mul i32 %r255, %r6
-%r257 = call i384 @mulPv352x32(i32* %r3, i32 %r256)
-%r258 = zext i384 %r257 to i608
-%r259 = add i608 %r254, %r258
-%r260 = lshr i608 %r259, 32
-%r261 = trunc i608 %r260 to i576
-%r262 = trunc i576 %r261 to i32
-%r263 = mul i32 %r262, %r6
-%r264 = call i384 @mulPv352x32(i32* %r3, i32 %r263)
-%r265 = zext i384 %r264 to i576
-%r266 = add i576 %r261, %r265
-%r267 = lshr i576 %r266, 32
-%r268 = trunc i576 %r267 to i544
-%r269 = trunc i544 %r268 to i32
-%r270 = mul i32 %r269, %r6
-%r271 = call i384 @mulPv352x32(i32* %r3, i32 %r270)
-%r272 = zext i384 %r271 to i544
-%r273 = add i544 %r268, %r272
-%r274 = lshr i544 %r273, 32
-%r275 = trunc i544 %r274 to i512
-%r276 = trunc i512 %r275 to i32
-%r277 = mul i32 %r276, %r6
-%r278 = call i384 @mulPv352x32(i32* %r3, i32 %r277)
-%r279 = zext i384 %r278 to i512
-%r280 = add i512 %r275, %r279
-%r281 = lshr i512 %r280, 32
-%r282 = trunc i512 %r281 to i480
-%r283 = trunc i480 %r282 to i32
-%r284 = mul i32 %r283, %r6
-%r285 = call i384 @mulPv352x32(i32* %r3, i32 %r284)
-%r286 = zext i384 %r285 to i480
-%r287 = add i480 %r282, %r286
-%r288 = lshr i480 %r287, 32
-%r289 = trunc i480 %r288 to i448
-%r290 = trunc i448 %r289 to i32
-%r291 = mul i32 %r290, %r6
-%r292 = call i384 @mulPv352x32(i32* %r3, i32 %r291)
-%r293 = zext i384 %r292 to i448
-%r294 = add i448 %r289, %r293
-%r295 = lshr i448 %r294, 32
-%r296 = trunc i448 %r295 to i416
-%r297 = trunc i416 %r296 to i32
-%r298 = mul i32 %r297, %r6
-%r299 = call i384 @mulPv352x32(i32* %r3, i32 %r298)
-%r300 = zext i384 %r299 to i416
-%r301 = add i416 %r296, %r300
-%r302 = lshr i416 %r301, 32
-%r303 = trunc i416 %r302 to i384
-%r304 = zext i352 %r77 to i384
-%r305 = sub i384 %r303, %r304
-%r306 = lshr i384 %r305, 352
-%r307 = trunc i384 %r306 to i1
-%r308 = select i1 %r307, i384 %r303, i384 %r305
-%r309 = trunc i384 %r308 to i352
-%r310 = trunc i352 %r309 to i32
-%r312 = getelementptr i32, i32* %r1, i32 0
-store i32 %r310, i32* %r312
-%r313 = lshr i352 %r309, 32
-%r314 = trunc i352 %r313 to i32
-%r316 = getelementptr i32, i32* %r1, i32 1
-store i32 %r314, i32* %r316
-%r317 = lshr i352 %r313, 32
-%r318 = trunc i352 %r317 to i32
-%r320 = getelementptr i32, i32* %r1, i32 2
-store i32 %r318, i32* %r320
-%r321 = lshr i352 %r317, 32
-%r322 = trunc i352 %r321 to i32
-%r324 = getelementptr i32, i32* %r1, i32 3
-store i32 %r322, i32* %r324
-%r325 = lshr i352 %r321, 32
-%r326 = trunc i352 %r325 to i32
-%r328 = getelementptr i32, i32* %r1, i32 4
-store i32 %r326, i32* %r328
-%r329 = lshr i352 %r325, 32
-%r330 = trunc i352 %r329 to i32
-%r332 = getelementptr i32, i32* %r1, i32 5
-store i32 %r330, i32* %r332
-%r333 = lshr i352 %r329, 32
-%r334 = trunc i352 %r333 to i32
-%r336 = getelementptr i32, i32* %r1, i32 6
-store i32 %r334, i32* %r336
-%r337 = lshr i352 %r333, 32
-%r338 = trunc i352 %r337 to i32
-%r340 = getelementptr i32, i32* %r1, i32 7
-store i32 %r338, i32* %r340
-%r341 = lshr i352 %r337, 32
-%r342 = trunc i352 %r341 to i32
-%r344 = getelementptr i32, i32* %r1, i32 8
-store i32 %r342, i32* %r344
-%r345 = lshr i352 %r341, 32
-%r346 = trunc i352 %r345 to i32
-%r348 = getelementptr i32, i32* %r1, i32 9
-store i32 %r346, i32* %r348
-%r349 = lshr i352 %r345, 32
-%r350 = trunc i352 %r349 to i32
-%r352 = getelementptr i32, i32* %r1, i32 10
-store i32 %r350, i32* %r352
-ret void
-}
-define i32 @mcl_fp_addPre11L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r77 = load i32, i32* %r4
-%r78 = zext i32 %r77 to i64
-%r80 = getelementptr i32, i32* %r4, i32 1
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i64
-%r83 = shl i64 %r82, 32
-%r84 = or i64 %r78, %r83
-%r85 = zext i64 %r84 to i96
-%r87 = getelementptr i32, i32* %r4, i32 2
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i96
-%r90 = shl i96 %r89, 64
-%r91 = or i96 %r85, %r90
-%r92 = zext i96 %r91 to i128
-%r94 = getelementptr i32, i32* %r4, i32 3
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i128
-%r97 = shl i128 %r96, 96
-%r98 = or i128 %r92, %r97
-%r99 = zext i128 %r98 to i160
-%r101 = getelementptr i32, i32* %r4, i32 4
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i160
-%r104 = shl i160 %r103, 128
-%r105 = or i160 %r99, %r104
-%r106 = zext i160 %r105 to i192
-%r108 = getelementptr i32, i32* %r4, i32 5
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i192
-%r111 = shl i192 %r110, 160
-%r112 = or i192 %r106, %r111
-%r113 = zext i192 %r112 to i224
-%r115 = getelementptr i32, i32* %r4, i32 6
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i224
-%r118 = shl i224 %r117, 192
-%r119 = or i224 %r113, %r118
-%r120 = zext i224 %r119 to i256
-%r122 = getelementptr i32, i32* %r4, i32 7
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i256
-%r125 = shl i256 %r124, 224
-%r126 = or i256 %r120, %r125
-%r127 = zext i256 %r126 to i288
-%r129 = getelementptr i32, i32* %r4, i32 8
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i288
-%r132 = shl i288 %r131, 256
-%r133 = or i288 %r127, %r132
-%r134 = zext i288 %r133 to i320
-%r136 = getelementptr i32, i32* %r4, i32 9
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i320
-%r139 = shl i320 %r138, 288
-%r140 = or i320 %r134, %r139
-%r141 = zext i320 %r140 to i352
-%r143 = getelementptr i32, i32* %r4, i32 10
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i352
-%r146 = shl i352 %r145, 320
-%r147 = or i352 %r141, %r146
-%r148 = zext i352 %r147 to i384
-%r149 = add i384 %r76, %r148
-%r150 = trunc i384 %r149 to i352
-%r151 = trunc i352 %r150 to i32
-%r153 = getelementptr i32, i32* %r2, i32 0
-store i32 %r151, i32* %r153
-%r154 = lshr i352 %r150, 32
-%r155 = trunc i352 %r154 to i32
-%r157 = getelementptr i32, i32* %r2, i32 1
-store i32 %r155, i32* %r157
-%r158 = lshr i352 %r154, 32
-%r159 = trunc i352 %r158 to i32
-%r161 = getelementptr i32, i32* %r2, i32 2
-store i32 %r159, i32* %r161
-%r162 = lshr i352 %r158, 32
-%r163 = trunc i352 %r162 to i32
-%r165 = getelementptr i32, i32* %r2, i32 3
-store i32 %r163, i32* %r165
-%r166 = lshr i352 %r162, 32
-%r167 = trunc i352 %r166 to i32
-%r169 = getelementptr i32, i32* %r2, i32 4
-store i32 %r167, i32* %r169
-%r170 = lshr i352 %r166, 32
-%r171 = trunc i352 %r170 to i32
-%r173 = getelementptr i32, i32* %r2, i32 5
-store i32 %r171, i32* %r173
-%r174 = lshr i352 %r170, 32
-%r175 = trunc i352 %r174 to i32
-%r177 = getelementptr i32, i32* %r2, i32 6
-store i32 %r175, i32* %r177
-%r178 = lshr i352 %r174, 32
-%r179 = trunc i352 %r178 to i32
-%r181 = getelementptr i32, i32* %r2, i32 7
-store i32 %r179, i32* %r181
-%r182 = lshr i352 %r178, 32
-%r183 = trunc i352 %r182 to i32
-%r185 = getelementptr i32, i32* %r2, i32 8
-store i32 %r183, i32* %r185
-%r186 = lshr i352 %r182, 32
-%r187 = trunc i352 %r186 to i32
-%r189 = getelementptr i32, i32* %r2, i32 9
-store i32 %r187, i32* %r189
-%r190 = lshr i352 %r186, 32
-%r191 = trunc i352 %r190 to i32
-%r193 = getelementptr i32, i32* %r2, i32 10
-store i32 %r191, i32* %r193
-%r194 = lshr i384 %r149, 352
-%r195 = trunc i384 %r194 to i32
-ret i32 %r195
-}
-define i32 @mcl_fp_subPre11L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r77 = load i32, i32* %r4
-%r78 = zext i32 %r77 to i64
-%r80 = getelementptr i32, i32* %r4, i32 1
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i64
-%r83 = shl i64 %r82, 32
-%r84 = or i64 %r78, %r83
-%r85 = zext i64 %r84 to i96
-%r87 = getelementptr i32, i32* %r4, i32 2
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i96
-%r90 = shl i96 %r89, 64
-%r91 = or i96 %r85, %r90
-%r92 = zext i96 %r91 to i128
-%r94 = getelementptr i32, i32* %r4, i32 3
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i128
-%r97 = shl i128 %r96, 96
-%r98 = or i128 %r92, %r97
-%r99 = zext i128 %r98 to i160
-%r101 = getelementptr i32, i32* %r4, i32 4
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i160
-%r104 = shl i160 %r103, 128
-%r105 = or i160 %r99, %r104
-%r106 = zext i160 %r105 to i192
-%r108 = getelementptr i32, i32* %r4, i32 5
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i192
-%r111 = shl i192 %r110, 160
-%r112 = or i192 %r106, %r111
-%r113 = zext i192 %r112 to i224
-%r115 = getelementptr i32, i32* %r4, i32 6
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i224
-%r118 = shl i224 %r117, 192
-%r119 = or i224 %r113, %r118
-%r120 = zext i224 %r119 to i256
-%r122 = getelementptr i32, i32* %r4, i32 7
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i256
-%r125 = shl i256 %r124, 224
-%r126 = or i256 %r120, %r125
-%r127 = zext i256 %r126 to i288
-%r129 = getelementptr i32, i32* %r4, i32 8
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i288
-%r132 = shl i288 %r131, 256
-%r133 = or i288 %r127, %r132
-%r134 = zext i288 %r133 to i320
-%r136 = getelementptr i32, i32* %r4, i32 9
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i320
-%r139 = shl i320 %r138, 288
-%r140 = or i320 %r134, %r139
-%r141 = zext i320 %r140 to i352
-%r143 = getelementptr i32, i32* %r4, i32 10
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i352
-%r146 = shl i352 %r145, 320
-%r147 = or i352 %r141, %r146
-%r148 = zext i352 %r147 to i384
-%r149 = sub i384 %r76, %r148
-%r150 = trunc i384 %r149 to i352
-%r151 = trunc i352 %r150 to i32
-%r153 = getelementptr i32, i32* %r2, i32 0
-store i32 %r151, i32* %r153
-%r154 = lshr i352 %r150, 32
-%r155 = trunc i352 %r154 to i32
-%r157 = getelementptr i32, i32* %r2, i32 1
-store i32 %r155, i32* %r157
-%r158 = lshr i352 %r154, 32
-%r159 = trunc i352 %r158 to i32
-%r161 = getelementptr i32, i32* %r2, i32 2
-store i32 %r159, i32* %r161
-%r162 = lshr i352 %r158, 32
-%r163 = trunc i352 %r162 to i32
-%r165 = getelementptr i32, i32* %r2, i32 3
-store i32 %r163, i32* %r165
-%r166 = lshr i352 %r162, 32
-%r167 = trunc i352 %r166 to i32
-%r169 = getelementptr i32, i32* %r2, i32 4
-store i32 %r167, i32* %r169
-%r170 = lshr i352 %r166, 32
-%r171 = trunc i352 %r170 to i32
-%r173 = getelementptr i32, i32* %r2, i32 5
-store i32 %r171, i32* %r173
-%r174 = lshr i352 %r170, 32
-%r175 = trunc i352 %r174 to i32
-%r177 = getelementptr i32, i32* %r2, i32 6
-store i32 %r175, i32* %r177
-%r178 = lshr i352 %r174, 32
-%r179 = trunc i352 %r178 to i32
-%r181 = getelementptr i32, i32* %r2, i32 7
-store i32 %r179, i32* %r181
-%r182 = lshr i352 %r178, 32
-%r183 = trunc i352 %r182 to i32
-%r185 = getelementptr i32, i32* %r2, i32 8
-store i32 %r183, i32* %r185
-%r186 = lshr i352 %r182, 32
-%r187 = trunc i352 %r186 to i32
-%r189 = getelementptr i32, i32* %r2, i32 9
-store i32 %r187, i32* %r189
-%r190 = lshr i352 %r186, 32
-%r191 = trunc i352 %r190 to i32
-%r193 = getelementptr i32, i32* %r2, i32 10
-store i32 %r191, i32* %r193
-%r194 = lshr i384 %r149, 352
-%r195 = trunc i384 %r194 to i32
-%r197 = and i32 %r195, 1
-ret i32 %r197
-}
-define void @mcl_fp_shr1_11L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = zext i160 %r31 to i192
-%r34 = getelementptr i32, i32* %r2, i32 5
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i192
-%r37 = shl i192 %r36, 160
-%r38 = or i192 %r32, %r37
-%r39 = zext i192 %r38 to i224
-%r41 = getelementptr i32, i32* %r2, i32 6
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i224
-%r44 = shl i224 %r43, 192
-%r45 = or i224 %r39, %r44
-%r46 = zext i224 %r45 to i256
-%r48 = getelementptr i32, i32* %r2, i32 7
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i256
-%r51 = shl i256 %r50, 224
-%r52 = or i256 %r46, %r51
-%r53 = zext i256 %r52 to i288
-%r55 = getelementptr i32, i32* %r2, i32 8
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i288
-%r58 = shl i288 %r57, 256
-%r59 = or i288 %r53, %r58
-%r60 = zext i288 %r59 to i320
-%r62 = getelementptr i32, i32* %r2, i32 9
-%r63 = load i32, i32* %r62
-%r64 = zext i32 %r63 to i320
-%r65 = shl i320 %r64, 288
-%r66 = or i320 %r60, %r65
-%r67 = zext i320 %r66 to i352
-%r69 = getelementptr i32, i32* %r2, i32 10
-%r70 = load i32, i32* %r69
-%r71 = zext i32 %r70 to i352
-%r72 = shl i352 %r71, 320
-%r73 = or i352 %r67, %r72
-%r74 = lshr i352 %r73, 1
-%r75 = trunc i352 %r74 to i32
-%r77 = getelementptr i32, i32* %r1, i32 0
-store i32 %r75, i32* %r77
-%r78 = lshr i352 %r74, 32
-%r79 = trunc i352 %r78 to i32
-%r81 = getelementptr i32, i32* %r1, i32 1
-store i32 %r79, i32* %r81
-%r82 = lshr i352 %r78, 32
-%r83 = trunc i352 %r82 to i32
-%r85 = getelementptr i32, i32* %r1, i32 2
-store i32 %r83, i32* %r85
-%r86 = lshr i352 %r82, 32
-%r87 = trunc i352 %r86 to i32
-%r89 = getelementptr i32, i32* %r1, i32 3
-store i32 %r87, i32* %r89
-%r90 = lshr i352 %r86, 32
-%r91 = trunc i352 %r90 to i32
-%r93 = getelementptr i32, i32* %r1, i32 4
-store i32 %r91, i32* %r93
-%r94 = lshr i352 %r90, 32
-%r95 = trunc i352 %r94 to i32
-%r97 = getelementptr i32, i32* %r1, i32 5
-store i32 %r95, i32* %r97
-%r98 = lshr i352 %r94, 32
-%r99 = trunc i352 %r98 to i32
-%r101 = getelementptr i32, i32* %r1, i32 6
-store i32 %r99, i32* %r101
-%r102 = lshr i352 %r98, 32
-%r103 = trunc i352 %r102 to i32
-%r105 = getelementptr i32, i32* %r1, i32 7
-store i32 %r103, i32* %r105
-%r106 = lshr i352 %r102, 32
-%r107 = trunc i352 %r106 to i32
-%r109 = getelementptr i32, i32* %r1, i32 8
-store i32 %r107, i32* %r109
-%r110 = lshr i352 %r106, 32
-%r111 = trunc i352 %r110 to i32
-%r113 = getelementptr i32, i32* %r1, i32 9
-store i32 %r111, i32* %r113
-%r114 = lshr i352 %r110, 32
-%r115 = trunc i352 %r114 to i32
-%r117 = getelementptr i32, i32* %r1, i32 10
-store i32 %r115, i32* %r117
-ret void
-}
-define void @mcl_fp_add11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = load i32, i32* %r3
-%r77 = zext i32 %r76 to i64
-%r79 = getelementptr i32, i32* %r3, i32 1
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i64
-%r82 = shl i64 %r81, 32
-%r83 = or i64 %r77, %r82
-%r84 = zext i64 %r83 to i96
-%r86 = getelementptr i32, i32* %r3, i32 2
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i96
-%r89 = shl i96 %r88, 64
-%r90 = or i96 %r84, %r89
-%r91 = zext i96 %r90 to i128
-%r93 = getelementptr i32, i32* %r3, i32 3
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i128
-%r96 = shl i128 %r95, 96
-%r97 = or i128 %r91, %r96
-%r98 = zext i128 %r97 to i160
-%r100 = getelementptr i32, i32* %r3, i32 4
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i160
-%r103 = shl i160 %r102, 128
-%r104 = or i160 %r98, %r103
-%r105 = zext i160 %r104 to i192
-%r107 = getelementptr i32, i32* %r3, i32 5
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i192
-%r110 = shl i192 %r109, 160
-%r111 = or i192 %r105, %r110
-%r112 = zext i192 %r111 to i224
-%r114 = getelementptr i32, i32* %r3, i32 6
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i224
-%r117 = shl i224 %r116, 192
-%r118 = or i224 %r112, %r117
-%r119 = zext i224 %r118 to i256
-%r121 = getelementptr i32, i32* %r3, i32 7
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i256
-%r124 = shl i256 %r123, 224
-%r125 = or i256 %r119, %r124
-%r126 = zext i256 %r125 to i288
-%r128 = getelementptr i32, i32* %r3, i32 8
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i288
-%r131 = shl i288 %r130, 256
-%r132 = or i288 %r126, %r131
-%r133 = zext i288 %r132 to i320
-%r135 = getelementptr i32, i32* %r3, i32 9
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i320
-%r138 = shl i320 %r137, 288
-%r139 = or i320 %r133, %r138
-%r140 = zext i320 %r139 to i352
-%r142 = getelementptr i32, i32* %r3, i32 10
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i352
-%r145 = shl i352 %r144, 320
-%r146 = or i352 %r140, %r145
-%r147 = zext i352 %r75 to i384
-%r148 = zext i352 %r146 to i384
-%r149 = add i384 %r147, %r148
-%r150 = trunc i384 %r149 to i352
-%r151 = trunc i352 %r150 to i32
-%r153 = getelementptr i32, i32* %r1, i32 0
-store i32 %r151, i32* %r153
-%r154 = lshr i352 %r150, 32
-%r155 = trunc i352 %r154 to i32
-%r157 = getelementptr i32, i32* %r1, i32 1
-store i32 %r155, i32* %r157
-%r158 = lshr i352 %r154, 32
-%r159 = trunc i352 %r158 to i32
-%r161 = getelementptr i32, i32* %r1, i32 2
-store i32 %r159, i32* %r161
-%r162 = lshr i352 %r158, 32
-%r163 = trunc i352 %r162 to i32
-%r165 = getelementptr i32, i32* %r1, i32 3
-store i32 %r163, i32* %r165
-%r166 = lshr i352 %r162, 32
-%r167 = trunc i352 %r166 to i32
-%r169 = getelementptr i32, i32* %r1, i32 4
-store i32 %r167, i32* %r169
-%r170 = lshr i352 %r166, 32
-%r171 = trunc i352 %r170 to i32
-%r173 = getelementptr i32, i32* %r1, i32 5
-store i32 %r171, i32* %r173
-%r174 = lshr i352 %r170, 32
-%r175 = trunc i352 %r174 to i32
-%r177 = getelementptr i32, i32* %r1, i32 6
-store i32 %r175, i32* %r177
-%r178 = lshr i352 %r174, 32
-%r179 = trunc i352 %r178 to i32
-%r181 = getelementptr i32, i32* %r1, i32 7
-store i32 %r179, i32* %r181
-%r182 = lshr i352 %r178, 32
-%r183 = trunc i352 %r182 to i32
-%r185 = getelementptr i32, i32* %r1, i32 8
-store i32 %r183, i32* %r185
-%r186 = lshr i352 %r182, 32
-%r187 = trunc i352 %r186 to i32
-%r189 = getelementptr i32, i32* %r1, i32 9
-store i32 %r187, i32* %r189
-%r190 = lshr i352 %r186, 32
-%r191 = trunc i352 %r190 to i32
-%r193 = getelementptr i32, i32* %r1, i32 10
-store i32 %r191, i32* %r193
-%r194 = load i32, i32* %r4
-%r195 = zext i32 %r194 to i64
-%r197 = getelementptr i32, i32* %r4, i32 1
-%r198 = load i32, i32* %r197
-%r199 = zext i32 %r198 to i64
-%r200 = shl i64 %r199, 32
-%r201 = or i64 %r195, %r200
-%r202 = zext i64 %r201 to i96
-%r204 = getelementptr i32, i32* %r4, i32 2
-%r205 = load i32, i32* %r204
-%r206 = zext i32 %r205 to i96
-%r207 = shl i96 %r206, 64
-%r208 = or i96 %r202, %r207
-%r209 = zext i96 %r208 to i128
-%r211 = getelementptr i32, i32* %r4, i32 3
-%r212 = load i32, i32* %r211
-%r213 = zext i32 %r212 to i128
-%r214 = shl i128 %r213, 96
-%r215 = or i128 %r209, %r214
-%r216 = zext i128 %r215 to i160
-%r218 = getelementptr i32, i32* %r4, i32 4
-%r219 = load i32, i32* %r218
-%r220 = zext i32 %r219 to i160
-%r221 = shl i160 %r220, 128
-%r222 = or i160 %r216, %r221
-%r223 = zext i160 %r222 to i192
-%r225 = getelementptr i32, i32* %r4, i32 5
-%r226 = load i32, i32* %r225
-%r227 = zext i32 %r226 to i192
-%r228 = shl i192 %r227, 160
-%r229 = or i192 %r223, %r228
-%r230 = zext i192 %r229 to i224
-%r232 = getelementptr i32, i32* %r4, i32 6
-%r233 = load i32, i32* %r232
-%r234 = zext i32 %r233 to i224
-%r235 = shl i224 %r234, 192
-%r236 = or i224 %r230, %r235
-%r237 = zext i224 %r236 to i256
-%r239 = getelementptr i32, i32* %r4, i32 7
-%r240 = load i32, i32* %r239
-%r241 = zext i32 %r240 to i256
-%r242 = shl i256 %r241, 224
-%r243 = or i256 %r237, %r242
-%r244 = zext i256 %r243 to i288
-%r246 = getelementptr i32, i32* %r4, i32 8
-%r247 = load i32, i32* %r246
-%r248 = zext i32 %r247 to i288
-%r249 = shl i288 %r248, 256
-%r250 = or i288 %r244, %r249
-%r251 = zext i288 %r250 to i320
-%r253 = getelementptr i32, i32* %r4, i32 9
-%r254 = load i32, i32* %r253
-%r255 = zext i32 %r254 to i320
-%r256 = shl i320 %r255, 288
-%r257 = or i320 %r251, %r256
-%r258 = zext i320 %r257 to i352
-%r260 = getelementptr i32, i32* %r4, i32 10
-%r261 = load i32, i32* %r260
-%r262 = zext i32 %r261 to i352
-%r263 = shl i352 %r262, 320
-%r264 = or i352 %r258, %r263
-%r265 = zext i352 %r264 to i384
-%r266 = sub i384 %r149, %r265
-%r267 = lshr i384 %r266, 352
-%r268 = trunc i384 %r267 to i1
-br i1%r268, label %carry, label %nocarry
-nocarry:
-%r269 = trunc i384 %r266 to i352
-%r270 = trunc i352 %r269 to i32
-%r272 = getelementptr i32, i32* %r1, i32 0
-store i32 %r270, i32* %r272
-%r273 = lshr i352 %r269, 32
-%r274 = trunc i352 %r273 to i32
-%r276 = getelementptr i32, i32* %r1, i32 1
-store i32 %r274, i32* %r276
-%r277 = lshr i352 %r273, 32
-%r278 = trunc i352 %r277 to i32
-%r280 = getelementptr i32, i32* %r1, i32 2
-store i32 %r278, i32* %r280
-%r281 = lshr i352 %r277, 32
-%r282 = trunc i352 %r281 to i32
-%r284 = getelementptr i32, i32* %r1, i32 3
-store i32 %r282, i32* %r284
-%r285 = lshr i352 %r281, 32
-%r286 = trunc i352 %r285 to i32
-%r288 = getelementptr i32, i32* %r1, i32 4
-store i32 %r286, i32* %r288
-%r289 = lshr i352 %r285, 32
-%r290 = trunc i352 %r289 to i32
-%r292 = getelementptr i32, i32* %r1, i32 5
-store i32 %r290, i32* %r292
-%r293 = lshr i352 %r289, 32
-%r294 = trunc i352 %r293 to i32
-%r296 = getelementptr i32, i32* %r1, i32 6
-store i32 %r294, i32* %r296
-%r297 = lshr i352 %r293, 32
-%r298 = trunc i352 %r297 to i32
-%r300 = getelementptr i32, i32* %r1, i32 7
-store i32 %r298, i32* %r300
-%r301 = lshr i352 %r297, 32
-%r302 = trunc i352 %r301 to i32
-%r304 = getelementptr i32, i32* %r1, i32 8
-store i32 %r302, i32* %r304
-%r305 = lshr i352 %r301, 32
-%r306 = trunc i352 %r305 to i32
-%r308 = getelementptr i32, i32* %r1, i32 9
-store i32 %r306, i32* %r308
-%r309 = lshr i352 %r305, 32
-%r310 = trunc i352 %r309 to i32
-%r312 = getelementptr i32, i32* %r1, i32 10
-store i32 %r310, i32* %r312
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = load i32, i32* %r3
-%r77 = zext i32 %r76 to i64
-%r79 = getelementptr i32, i32* %r3, i32 1
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i64
-%r82 = shl i64 %r81, 32
-%r83 = or i64 %r77, %r82
-%r84 = zext i64 %r83 to i96
-%r86 = getelementptr i32, i32* %r3, i32 2
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i96
-%r89 = shl i96 %r88, 64
-%r90 = or i96 %r84, %r89
-%r91 = zext i96 %r90 to i128
-%r93 = getelementptr i32, i32* %r3, i32 3
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i128
-%r96 = shl i128 %r95, 96
-%r97 = or i128 %r91, %r96
-%r98 = zext i128 %r97 to i160
-%r100 = getelementptr i32, i32* %r3, i32 4
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i160
-%r103 = shl i160 %r102, 128
-%r104 = or i160 %r98, %r103
-%r105 = zext i160 %r104 to i192
-%r107 = getelementptr i32, i32* %r3, i32 5
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i192
-%r110 = shl i192 %r109, 160
-%r111 = or i192 %r105, %r110
-%r112 = zext i192 %r111 to i224
-%r114 = getelementptr i32, i32* %r3, i32 6
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i224
-%r117 = shl i224 %r116, 192
-%r118 = or i224 %r112, %r117
-%r119 = zext i224 %r118 to i256
-%r121 = getelementptr i32, i32* %r3, i32 7
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i256
-%r124 = shl i256 %r123, 224
-%r125 = or i256 %r119, %r124
-%r126 = zext i256 %r125 to i288
-%r128 = getelementptr i32, i32* %r3, i32 8
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i288
-%r131 = shl i288 %r130, 256
-%r132 = or i288 %r126, %r131
-%r133 = zext i288 %r132 to i320
-%r135 = getelementptr i32, i32* %r3, i32 9
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i320
-%r138 = shl i320 %r137, 288
-%r139 = or i320 %r133, %r138
-%r140 = zext i320 %r139 to i352
-%r142 = getelementptr i32, i32* %r3, i32 10
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i352
-%r145 = shl i352 %r144, 320
-%r146 = or i352 %r140, %r145
-%r147 = add i352 %r75, %r146
-%r148 = load i32, i32* %r4
-%r149 = zext i32 %r148 to i64
-%r151 = getelementptr i32, i32* %r4, i32 1
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i64
-%r154 = shl i64 %r153, 32
-%r155 = or i64 %r149, %r154
-%r156 = zext i64 %r155 to i96
-%r158 = getelementptr i32, i32* %r4, i32 2
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i96
-%r161 = shl i96 %r160, 64
-%r162 = or i96 %r156, %r161
-%r163 = zext i96 %r162 to i128
-%r165 = getelementptr i32, i32* %r4, i32 3
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i128
-%r168 = shl i128 %r167, 96
-%r169 = or i128 %r163, %r168
-%r170 = zext i128 %r169 to i160
-%r172 = getelementptr i32, i32* %r4, i32 4
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i160
-%r175 = shl i160 %r174, 128
-%r176 = or i160 %r170, %r175
-%r177 = zext i160 %r176 to i192
-%r179 = getelementptr i32, i32* %r4, i32 5
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i192
-%r182 = shl i192 %r181, 160
-%r183 = or i192 %r177, %r182
-%r184 = zext i192 %r183 to i224
-%r186 = getelementptr i32, i32* %r4, i32 6
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i224
-%r189 = shl i224 %r188, 192
-%r190 = or i224 %r184, %r189
-%r191 = zext i224 %r190 to i256
-%r193 = getelementptr i32, i32* %r4, i32 7
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i256
-%r196 = shl i256 %r195, 224
-%r197 = or i256 %r191, %r196
-%r198 = zext i256 %r197 to i288
-%r200 = getelementptr i32, i32* %r4, i32 8
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i288
-%r203 = shl i288 %r202, 256
-%r204 = or i288 %r198, %r203
-%r205 = zext i288 %r204 to i320
-%r207 = getelementptr i32, i32* %r4, i32 9
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i320
-%r210 = shl i320 %r209, 288
-%r211 = or i320 %r205, %r210
-%r212 = zext i320 %r211 to i352
-%r214 = getelementptr i32, i32* %r4, i32 10
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i352
-%r217 = shl i352 %r216, 320
-%r218 = or i352 %r212, %r217
-%r219 = sub i352 %r147, %r218
-%r220 = lshr i352 %r219, 351
-%r221 = trunc i352 %r220 to i1
-%r222 = select i1 %r221, i352 %r147, i352 %r219
-%r223 = trunc i352 %r222 to i32
-%r225 = getelementptr i32, i32* %r1, i32 0
-store i32 %r223, i32* %r225
-%r226 = lshr i352 %r222, 32
-%r227 = trunc i352 %r226 to i32
-%r229 = getelementptr i32, i32* %r1, i32 1
-store i32 %r227, i32* %r229
-%r230 = lshr i352 %r226, 32
-%r231 = trunc i352 %r230 to i32
-%r233 = getelementptr i32, i32* %r1, i32 2
-store i32 %r231, i32* %r233
-%r234 = lshr i352 %r230, 32
-%r235 = trunc i352 %r234 to i32
-%r237 = getelementptr i32, i32* %r1, i32 3
-store i32 %r235, i32* %r237
-%r238 = lshr i352 %r234, 32
-%r239 = trunc i352 %r238 to i32
-%r241 = getelementptr i32, i32* %r1, i32 4
-store i32 %r239, i32* %r241
-%r242 = lshr i352 %r238, 32
-%r243 = trunc i352 %r242 to i32
-%r245 = getelementptr i32, i32* %r1, i32 5
-store i32 %r243, i32* %r245
-%r246 = lshr i352 %r242, 32
-%r247 = trunc i352 %r246 to i32
-%r249 = getelementptr i32, i32* %r1, i32 6
-store i32 %r247, i32* %r249
-%r250 = lshr i352 %r246, 32
-%r251 = trunc i352 %r250 to i32
-%r253 = getelementptr i32, i32* %r1, i32 7
-store i32 %r251, i32* %r253
-%r254 = lshr i352 %r250, 32
-%r255 = trunc i352 %r254 to i32
-%r257 = getelementptr i32, i32* %r1, i32 8
-store i32 %r255, i32* %r257
-%r258 = lshr i352 %r254, 32
-%r259 = trunc i352 %r258 to i32
-%r261 = getelementptr i32, i32* %r1, i32 9
-store i32 %r259, i32* %r261
-%r262 = lshr i352 %r258, 32
-%r263 = trunc i352 %r262 to i32
-%r265 = getelementptr i32, i32* %r1, i32 10
-store i32 %r263, i32* %r265
-ret void
-}
-define void @mcl_fp_sub11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = load i32, i32* %r3
-%r77 = zext i32 %r76 to i64
-%r79 = getelementptr i32, i32* %r3, i32 1
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i64
-%r82 = shl i64 %r81, 32
-%r83 = or i64 %r77, %r82
-%r84 = zext i64 %r83 to i96
-%r86 = getelementptr i32, i32* %r3, i32 2
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i96
-%r89 = shl i96 %r88, 64
-%r90 = or i96 %r84, %r89
-%r91 = zext i96 %r90 to i128
-%r93 = getelementptr i32, i32* %r3, i32 3
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i128
-%r96 = shl i128 %r95, 96
-%r97 = or i128 %r91, %r96
-%r98 = zext i128 %r97 to i160
-%r100 = getelementptr i32, i32* %r3, i32 4
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i160
-%r103 = shl i160 %r102, 128
-%r104 = or i160 %r98, %r103
-%r105 = zext i160 %r104 to i192
-%r107 = getelementptr i32, i32* %r3, i32 5
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i192
-%r110 = shl i192 %r109, 160
-%r111 = or i192 %r105, %r110
-%r112 = zext i192 %r111 to i224
-%r114 = getelementptr i32, i32* %r3, i32 6
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i224
-%r117 = shl i224 %r116, 192
-%r118 = or i224 %r112, %r117
-%r119 = zext i224 %r118 to i256
-%r121 = getelementptr i32, i32* %r3, i32 7
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i256
-%r124 = shl i256 %r123, 224
-%r125 = or i256 %r119, %r124
-%r126 = zext i256 %r125 to i288
-%r128 = getelementptr i32, i32* %r3, i32 8
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i288
-%r131 = shl i288 %r130, 256
-%r132 = or i288 %r126, %r131
-%r133 = zext i288 %r132 to i320
-%r135 = getelementptr i32, i32* %r3, i32 9
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i320
-%r138 = shl i320 %r137, 288
-%r139 = or i320 %r133, %r138
-%r140 = zext i320 %r139 to i352
-%r142 = getelementptr i32, i32* %r3, i32 10
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i352
-%r145 = shl i352 %r144, 320
-%r146 = or i352 %r140, %r145
-%r147 = zext i352 %r75 to i384
-%r148 = zext i352 %r146 to i384
-%r149 = sub i384 %r147, %r148
-%r150 = trunc i384 %r149 to i352
-%r151 = lshr i384 %r149, 352
-%r152 = trunc i384 %r151 to i1
-%r153 = trunc i352 %r150 to i32
-%r155 = getelementptr i32, i32* %r1, i32 0
-store i32 %r153, i32* %r155
-%r156 = lshr i352 %r150, 32
-%r157 = trunc i352 %r156 to i32
-%r159 = getelementptr i32, i32* %r1, i32 1
-store i32 %r157, i32* %r159
-%r160 = lshr i352 %r156, 32
-%r161 = trunc i352 %r160 to i32
-%r163 = getelementptr i32, i32* %r1, i32 2
-store i32 %r161, i32* %r163
-%r164 = lshr i352 %r160, 32
-%r165 = trunc i352 %r164 to i32
-%r167 = getelementptr i32, i32* %r1, i32 3
-store i32 %r165, i32* %r167
-%r168 = lshr i352 %r164, 32
-%r169 = trunc i352 %r168 to i32
-%r171 = getelementptr i32, i32* %r1, i32 4
-store i32 %r169, i32* %r171
-%r172 = lshr i352 %r168, 32
-%r173 = trunc i352 %r172 to i32
-%r175 = getelementptr i32, i32* %r1, i32 5
-store i32 %r173, i32* %r175
-%r176 = lshr i352 %r172, 32
-%r177 = trunc i352 %r176 to i32
-%r179 = getelementptr i32, i32* %r1, i32 6
-store i32 %r177, i32* %r179
-%r180 = lshr i352 %r176, 32
-%r181 = trunc i352 %r180 to i32
-%r183 = getelementptr i32, i32* %r1, i32 7
-store i32 %r181, i32* %r183
-%r184 = lshr i352 %r180, 32
-%r185 = trunc i352 %r184 to i32
-%r187 = getelementptr i32, i32* %r1, i32 8
-store i32 %r185, i32* %r187
-%r188 = lshr i352 %r184, 32
-%r189 = trunc i352 %r188 to i32
-%r191 = getelementptr i32, i32* %r1, i32 9
-store i32 %r189, i32* %r191
-%r192 = lshr i352 %r188, 32
-%r193 = trunc i352 %r192 to i32
-%r195 = getelementptr i32, i32* %r1, i32 10
-store i32 %r193, i32* %r195
-br i1%r152, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r196 = load i32, i32* %r4
-%r197 = zext i32 %r196 to i64
-%r199 = getelementptr i32, i32* %r4, i32 1
-%r200 = load i32, i32* %r199
-%r201 = zext i32 %r200 to i64
-%r202 = shl i64 %r201, 32
-%r203 = or i64 %r197, %r202
-%r204 = zext i64 %r203 to i96
-%r206 = getelementptr i32, i32* %r4, i32 2
-%r207 = load i32, i32* %r206
-%r208 = zext i32 %r207 to i96
-%r209 = shl i96 %r208, 64
-%r210 = or i96 %r204, %r209
-%r211 = zext i96 %r210 to i128
-%r213 = getelementptr i32, i32* %r4, i32 3
-%r214 = load i32, i32* %r213
-%r215 = zext i32 %r214 to i128
-%r216 = shl i128 %r215, 96
-%r217 = or i128 %r211, %r216
-%r218 = zext i128 %r217 to i160
-%r220 = getelementptr i32, i32* %r4, i32 4
-%r221 = load i32, i32* %r220
-%r222 = zext i32 %r221 to i160
-%r223 = shl i160 %r222, 128
-%r224 = or i160 %r218, %r223
-%r225 = zext i160 %r224 to i192
-%r227 = getelementptr i32, i32* %r4, i32 5
-%r228 = load i32, i32* %r227
-%r229 = zext i32 %r228 to i192
-%r230 = shl i192 %r229, 160
-%r231 = or i192 %r225, %r230
-%r232 = zext i192 %r231 to i224
-%r234 = getelementptr i32, i32* %r4, i32 6
-%r235 = load i32, i32* %r234
-%r236 = zext i32 %r235 to i224
-%r237 = shl i224 %r236, 192
-%r238 = or i224 %r232, %r237
-%r239 = zext i224 %r238 to i256
-%r241 = getelementptr i32, i32* %r4, i32 7
-%r242 = load i32, i32* %r241
-%r243 = zext i32 %r242 to i256
-%r244 = shl i256 %r243, 224
-%r245 = or i256 %r239, %r244
-%r246 = zext i256 %r245 to i288
-%r248 = getelementptr i32, i32* %r4, i32 8
-%r249 = load i32, i32* %r248
-%r250 = zext i32 %r249 to i288
-%r251 = shl i288 %r250, 256
-%r252 = or i288 %r246, %r251
-%r253 = zext i288 %r252 to i320
-%r255 = getelementptr i32, i32* %r4, i32 9
-%r256 = load i32, i32* %r255
-%r257 = zext i32 %r256 to i320
-%r258 = shl i320 %r257, 288
-%r259 = or i320 %r253, %r258
-%r260 = zext i320 %r259 to i352
-%r262 = getelementptr i32, i32* %r4, i32 10
-%r263 = load i32, i32* %r262
-%r264 = zext i32 %r263 to i352
-%r265 = shl i352 %r264, 320
-%r266 = or i352 %r260, %r265
-%r267 = add i352 %r150, %r266
-%r268 = trunc i352 %r267 to i32
-%r270 = getelementptr i32, i32* %r1, i32 0
-store i32 %r268, i32* %r270
-%r271 = lshr i352 %r267, 32
-%r272 = trunc i352 %r271 to i32
-%r274 = getelementptr i32, i32* %r1, i32 1
-store i32 %r272, i32* %r274
-%r275 = lshr i352 %r271, 32
-%r276 = trunc i352 %r275 to i32
-%r278 = getelementptr i32, i32* %r1, i32 2
-store i32 %r276, i32* %r278
-%r279 = lshr i352 %r275, 32
-%r280 = trunc i352 %r279 to i32
-%r282 = getelementptr i32, i32* %r1, i32 3
-store i32 %r280, i32* %r282
-%r283 = lshr i352 %r279, 32
-%r284 = trunc i352 %r283 to i32
-%r286 = getelementptr i32, i32* %r1, i32 4
-store i32 %r284, i32* %r286
-%r287 = lshr i352 %r283, 32
-%r288 = trunc i352 %r287 to i32
-%r290 = getelementptr i32, i32* %r1, i32 5
-store i32 %r288, i32* %r290
-%r291 = lshr i352 %r287, 32
-%r292 = trunc i352 %r291 to i32
-%r294 = getelementptr i32, i32* %r1, i32 6
-store i32 %r292, i32* %r294
-%r295 = lshr i352 %r291, 32
-%r296 = trunc i352 %r295 to i32
-%r298 = getelementptr i32, i32* %r1, i32 7
-store i32 %r296, i32* %r298
-%r299 = lshr i352 %r295, 32
-%r300 = trunc i352 %r299 to i32
-%r302 = getelementptr i32, i32* %r1, i32 8
-store i32 %r300, i32* %r302
-%r303 = lshr i352 %r299, 32
-%r304 = trunc i352 %r303 to i32
-%r306 = getelementptr i32, i32* %r1, i32 9
-store i32 %r304, i32* %r306
-%r307 = lshr i352 %r303, 32
-%r308 = trunc i352 %r307 to i32
-%r310 = getelementptr i32, i32* %r1, i32 10
-store i32 %r308, i32* %r310
-ret void
-}
-define void @mcl_fp_subNF11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = load i32, i32* %r3
-%r77 = zext i32 %r76 to i64
-%r79 = getelementptr i32, i32* %r3, i32 1
-%r80 = load i32, i32* %r79
-%r81 = zext i32 %r80 to i64
-%r82 = shl i64 %r81, 32
-%r83 = or i64 %r77, %r82
-%r84 = zext i64 %r83 to i96
-%r86 = getelementptr i32, i32* %r3, i32 2
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i96
-%r89 = shl i96 %r88, 64
-%r90 = or i96 %r84, %r89
-%r91 = zext i96 %r90 to i128
-%r93 = getelementptr i32, i32* %r3, i32 3
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i128
-%r96 = shl i128 %r95, 96
-%r97 = or i128 %r91, %r96
-%r98 = zext i128 %r97 to i160
-%r100 = getelementptr i32, i32* %r3, i32 4
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i160
-%r103 = shl i160 %r102, 128
-%r104 = or i160 %r98, %r103
-%r105 = zext i160 %r104 to i192
-%r107 = getelementptr i32, i32* %r3, i32 5
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i192
-%r110 = shl i192 %r109, 160
-%r111 = or i192 %r105, %r110
-%r112 = zext i192 %r111 to i224
-%r114 = getelementptr i32, i32* %r3, i32 6
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i224
-%r117 = shl i224 %r116, 192
-%r118 = or i224 %r112, %r117
-%r119 = zext i224 %r118 to i256
-%r121 = getelementptr i32, i32* %r3, i32 7
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i256
-%r124 = shl i256 %r123, 224
-%r125 = or i256 %r119, %r124
-%r126 = zext i256 %r125 to i288
-%r128 = getelementptr i32, i32* %r3, i32 8
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i288
-%r131 = shl i288 %r130, 256
-%r132 = or i288 %r126, %r131
-%r133 = zext i288 %r132 to i320
-%r135 = getelementptr i32, i32* %r3, i32 9
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i320
-%r138 = shl i320 %r137, 288
-%r139 = or i320 %r133, %r138
-%r140 = zext i320 %r139 to i352
-%r142 = getelementptr i32, i32* %r3, i32 10
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i352
-%r145 = shl i352 %r144, 320
-%r146 = or i352 %r140, %r145
-%r147 = sub i352 %r75, %r146
-%r148 = lshr i352 %r147, 351
-%r149 = trunc i352 %r148 to i1
-%r150 = load i32, i32* %r4
-%r151 = zext i32 %r150 to i64
-%r153 = getelementptr i32, i32* %r4, i32 1
-%r154 = load i32, i32* %r153
-%r155 = zext i32 %r154 to i64
-%r156 = shl i64 %r155, 32
-%r157 = or i64 %r151, %r156
-%r158 = zext i64 %r157 to i96
-%r160 = getelementptr i32, i32* %r4, i32 2
-%r161 = load i32, i32* %r160
-%r162 = zext i32 %r161 to i96
-%r163 = shl i96 %r162, 64
-%r164 = or i96 %r158, %r163
-%r165 = zext i96 %r164 to i128
-%r167 = getelementptr i32, i32* %r4, i32 3
-%r168 = load i32, i32* %r167
-%r169 = zext i32 %r168 to i128
-%r170 = shl i128 %r169, 96
-%r171 = or i128 %r165, %r170
-%r172 = zext i128 %r171 to i160
-%r174 = getelementptr i32, i32* %r4, i32 4
-%r175 = load i32, i32* %r174
-%r176 = zext i32 %r175 to i160
-%r177 = shl i160 %r176, 128
-%r178 = or i160 %r172, %r177
-%r179 = zext i160 %r178 to i192
-%r181 = getelementptr i32, i32* %r4, i32 5
-%r182 = load i32, i32* %r181
-%r183 = zext i32 %r182 to i192
-%r184 = shl i192 %r183, 160
-%r185 = or i192 %r179, %r184
-%r186 = zext i192 %r185 to i224
-%r188 = getelementptr i32, i32* %r4, i32 6
-%r189 = load i32, i32* %r188
-%r190 = zext i32 %r189 to i224
-%r191 = shl i224 %r190, 192
-%r192 = or i224 %r186, %r191
-%r193 = zext i224 %r192 to i256
-%r195 = getelementptr i32, i32* %r4, i32 7
-%r196 = load i32, i32* %r195
-%r197 = zext i32 %r196 to i256
-%r198 = shl i256 %r197, 224
-%r199 = or i256 %r193, %r198
-%r200 = zext i256 %r199 to i288
-%r202 = getelementptr i32, i32* %r4, i32 8
-%r203 = load i32, i32* %r202
-%r204 = zext i32 %r203 to i288
-%r205 = shl i288 %r204, 256
-%r206 = or i288 %r200, %r205
-%r207 = zext i288 %r206 to i320
-%r209 = getelementptr i32, i32* %r4, i32 9
-%r210 = load i32, i32* %r209
-%r211 = zext i32 %r210 to i320
-%r212 = shl i320 %r211, 288
-%r213 = or i320 %r207, %r212
-%r214 = zext i320 %r213 to i352
-%r216 = getelementptr i32, i32* %r4, i32 10
-%r217 = load i32, i32* %r216
-%r218 = zext i32 %r217 to i352
-%r219 = shl i352 %r218, 320
-%r220 = or i352 %r214, %r219
-%r222 = select i1 %r149, i352 %r220, i352 0
-%r223 = add i352 %r147, %r222
-%r224 = trunc i352 %r223 to i32
-%r226 = getelementptr i32, i32* %r1, i32 0
-store i32 %r224, i32* %r226
-%r227 = lshr i352 %r223, 32
-%r228 = trunc i352 %r227 to i32
-%r230 = getelementptr i32, i32* %r1, i32 1
-store i32 %r228, i32* %r230
-%r231 = lshr i352 %r227, 32
-%r232 = trunc i352 %r231 to i32
-%r234 = getelementptr i32, i32* %r1, i32 2
-store i32 %r232, i32* %r234
-%r235 = lshr i352 %r231, 32
-%r236 = trunc i352 %r235 to i32
-%r238 = getelementptr i32, i32* %r1, i32 3
-store i32 %r236, i32* %r238
-%r239 = lshr i352 %r235, 32
-%r240 = trunc i352 %r239 to i32
-%r242 = getelementptr i32, i32* %r1, i32 4
-store i32 %r240, i32* %r242
-%r243 = lshr i352 %r239, 32
-%r244 = trunc i352 %r243 to i32
-%r246 = getelementptr i32, i32* %r1, i32 5
-store i32 %r244, i32* %r246
-%r247 = lshr i352 %r243, 32
-%r248 = trunc i352 %r247 to i32
-%r250 = getelementptr i32, i32* %r1, i32 6
-store i32 %r248, i32* %r250
-%r251 = lshr i352 %r247, 32
-%r252 = trunc i352 %r251 to i32
-%r254 = getelementptr i32, i32* %r1, i32 7
-store i32 %r252, i32* %r254
-%r255 = lshr i352 %r251, 32
-%r256 = trunc i352 %r255 to i32
-%r258 = getelementptr i32, i32* %r1, i32 8
-store i32 %r256, i32* %r258
-%r259 = lshr i352 %r255, 32
-%r260 = trunc i352 %r259 to i32
-%r262 = getelementptr i32, i32* %r1, i32 9
-store i32 %r260, i32* %r262
-%r263 = lshr i352 %r259, 32
-%r264 = trunc i352 %r263 to i32
-%r266 = getelementptr i32, i32* %r1, i32 10
-store i32 %r264, i32* %r266
-ret void
-}
-define void @mcl_fpDbl_add11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = load i32, i32* %r3
-%r154 = zext i32 %r153 to i64
-%r156 = getelementptr i32, i32* %r3, i32 1
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i64
-%r159 = shl i64 %r158, 32
-%r160 = or i64 %r154, %r159
-%r161 = zext i64 %r160 to i96
-%r163 = getelementptr i32, i32* %r3, i32 2
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i96
-%r166 = shl i96 %r165, 64
-%r167 = or i96 %r161, %r166
-%r168 = zext i96 %r167 to i128
-%r170 = getelementptr i32, i32* %r3, i32 3
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i128
-%r173 = shl i128 %r172, 96
-%r174 = or i128 %r168, %r173
-%r175 = zext i128 %r174 to i160
-%r177 = getelementptr i32, i32* %r3, i32 4
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i160
-%r180 = shl i160 %r179, 128
-%r181 = or i160 %r175, %r180
-%r182 = zext i160 %r181 to i192
-%r184 = getelementptr i32, i32* %r3, i32 5
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i192
-%r187 = shl i192 %r186, 160
-%r188 = or i192 %r182, %r187
-%r189 = zext i192 %r188 to i224
-%r191 = getelementptr i32, i32* %r3, i32 6
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i224
-%r194 = shl i224 %r193, 192
-%r195 = or i224 %r189, %r194
-%r196 = zext i224 %r195 to i256
-%r198 = getelementptr i32, i32* %r3, i32 7
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i256
-%r201 = shl i256 %r200, 224
-%r202 = or i256 %r196, %r201
-%r203 = zext i256 %r202 to i288
-%r205 = getelementptr i32, i32* %r3, i32 8
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i288
-%r208 = shl i288 %r207, 256
-%r209 = or i288 %r203, %r208
-%r210 = zext i288 %r209 to i320
-%r212 = getelementptr i32, i32* %r3, i32 9
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i320
-%r215 = shl i320 %r214, 288
-%r216 = or i320 %r210, %r215
-%r217 = zext i320 %r216 to i352
-%r219 = getelementptr i32, i32* %r3, i32 10
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i352
-%r222 = shl i352 %r221, 320
-%r223 = or i352 %r217, %r222
-%r224 = zext i352 %r223 to i384
-%r226 = getelementptr i32, i32* %r3, i32 11
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i384
-%r229 = shl i384 %r228, 352
-%r230 = or i384 %r224, %r229
-%r231 = zext i384 %r230 to i416
-%r233 = getelementptr i32, i32* %r3, i32 12
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i416
-%r236 = shl i416 %r235, 384
-%r237 = or i416 %r231, %r236
-%r238 = zext i416 %r237 to i448
-%r240 = getelementptr i32, i32* %r3, i32 13
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i448
-%r243 = shl i448 %r242, 416
-%r244 = or i448 %r238, %r243
-%r245 = zext i448 %r244 to i480
-%r247 = getelementptr i32, i32* %r3, i32 14
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i480
-%r250 = shl i480 %r249, 448
-%r251 = or i480 %r245, %r250
-%r252 = zext i480 %r251 to i512
-%r254 = getelementptr i32, i32* %r3, i32 15
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i512
-%r257 = shl i512 %r256, 480
-%r258 = or i512 %r252, %r257
-%r259 = zext i512 %r258 to i544
-%r261 = getelementptr i32, i32* %r3, i32 16
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i544
-%r264 = shl i544 %r263, 512
-%r265 = or i544 %r259, %r264
-%r266 = zext i544 %r265 to i576
-%r268 = getelementptr i32, i32* %r3, i32 17
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i576
-%r271 = shl i576 %r270, 544
-%r272 = or i576 %r266, %r271
-%r273 = zext i576 %r272 to i608
-%r275 = getelementptr i32, i32* %r3, i32 18
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i608
-%r278 = shl i608 %r277, 576
-%r279 = or i608 %r273, %r278
-%r280 = zext i608 %r279 to i640
-%r282 = getelementptr i32, i32* %r3, i32 19
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i640
-%r285 = shl i640 %r284, 608
-%r286 = or i640 %r280, %r285
-%r287 = zext i640 %r286 to i672
-%r289 = getelementptr i32, i32* %r3, i32 20
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i672
-%r292 = shl i672 %r291, 640
-%r293 = or i672 %r287, %r292
-%r294 = zext i672 %r293 to i704
-%r296 = getelementptr i32, i32* %r3, i32 21
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i704
-%r299 = shl i704 %r298, 672
-%r300 = or i704 %r294, %r299
-%r301 = zext i704 %r152 to i736
-%r302 = zext i704 %r300 to i736
-%r303 = add i736 %r301, %r302
-%r304 = trunc i736 %r303 to i352
-%r305 = trunc i352 %r304 to i32
-%r307 = getelementptr i32, i32* %r1, i32 0
-store i32 %r305, i32* %r307
-%r308 = lshr i352 %r304, 32
-%r309 = trunc i352 %r308 to i32
-%r311 = getelementptr i32, i32* %r1, i32 1
-store i32 %r309, i32* %r311
-%r312 = lshr i352 %r308, 32
-%r313 = trunc i352 %r312 to i32
-%r315 = getelementptr i32, i32* %r1, i32 2
-store i32 %r313, i32* %r315
-%r316 = lshr i352 %r312, 32
-%r317 = trunc i352 %r316 to i32
-%r319 = getelementptr i32, i32* %r1, i32 3
-store i32 %r317, i32* %r319
-%r320 = lshr i352 %r316, 32
-%r321 = trunc i352 %r320 to i32
-%r323 = getelementptr i32, i32* %r1, i32 4
-store i32 %r321, i32* %r323
-%r324 = lshr i352 %r320, 32
-%r325 = trunc i352 %r324 to i32
-%r327 = getelementptr i32, i32* %r1, i32 5
-store i32 %r325, i32* %r327
-%r328 = lshr i352 %r324, 32
-%r329 = trunc i352 %r328 to i32
-%r331 = getelementptr i32, i32* %r1, i32 6
-store i32 %r329, i32* %r331
-%r332 = lshr i352 %r328, 32
-%r333 = trunc i352 %r332 to i32
-%r335 = getelementptr i32, i32* %r1, i32 7
-store i32 %r333, i32* %r335
-%r336 = lshr i352 %r332, 32
-%r337 = trunc i352 %r336 to i32
-%r339 = getelementptr i32, i32* %r1, i32 8
-store i32 %r337, i32* %r339
-%r340 = lshr i352 %r336, 32
-%r341 = trunc i352 %r340 to i32
-%r343 = getelementptr i32, i32* %r1, i32 9
-store i32 %r341, i32* %r343
-%r344 = lshr i352 %r340, 32
-%r345 = trunc i352 %r344 to i32
-%r347 = getelementptr i32, i32* %r1, i32 10
-store i32 %r345, i32* %r347
-%r348 = lshr i736 %r303, 352
-%r349 = trunc i736 %r348 to i384
-%r350 = load i32, i32* %r4
-%r351 = zext i32 %r350 to i64
-%r353 = getelementptr i32, i32* %r4, i32 1
-%r354 = load i32, i32* %r353
-%r355 = zext i32 %r354 to i64
-%r356 = shl i64 %r355, 32
-%r357 = or i64 %r351, %r356
-%r358 = zext i64 %r357 to i96
-%r360 = getelementptr i32, i32* %r4, i32 2
-%r361 = load i32, i32* %r360
-%r362 = zext i32 %r361 to i96
-%r363 = shl i96 %r362, 64
-%r364 = or i96 %r358, %r363
-%r365 = zext i96 %r364 to i128
-%r367 = getelementptr i32, i32* %r4, i32 3
-%r368 = load i32, i32* %r367
-%r369 = zext i32 %r368 to i128
-%r370 = shl i128 %r369, 96
-%r371 = or i128 %r365, %r370
-%r372 = zext i128 %r371 to i160
-%r374 = getelementptr i32, i32* %r4, i32 4
-%r375 = load i32, i32* %r374
-%r376 = zext i32 %r375 to i160
-%r377 = shl i160 %r376, 128
-%r378 = or i160 %r372, %r377
-%r379 = zext i160 %r378 to i192
-%r381 = getelementptr i32, i32* %r4, i32 5
-%r382 = load i32, i32* %r381
-%r383 = zext i32 %r382 to i192
-%r384 = shl i192 %r383, 160
-%r385 = or i192 %r379, %r384
-%r386 = zext i192 %r385 to i224
-%r388 = getelementptr i32, i32* %r4, i32 6
-%r389 = load i32, i32* %r388
-%r390 = zext i32 %r389 to i224
-%r391 = shl i224 %r390, 192
-%r392 = or i224 %r386, %r391
-%r393 = zext i224 %r392 to i256
-%r395 = getelementptr i32, i32* %r4, i32 7
-%r396 = load i32, i32* %r395
-%r397 = zext i32 %r396 to i256
-%r398 = shl i256 %r397, 224
-%r399 = or i256 %r393, %r398
-%r400 = zext i256 %r399 to i288
-%r402 = getelementptr i32, i32* %r4, i32 8
-%r403 = load i32, i32* %r402
-%r404 = zext i32 %r403 to i288
-%r405 = shl i288 %r404, 256
-%r406 = or i288 %r400, %r405
-%r407 = zext i288 %r406 to i320
-%r409 = getelementptr i32, i32* %r4, i32 9
-%r410 = load i32, i32* %r409
-%r411 = zext i32 %r410 to i320
-%r412 = shl i320 %r411, 288
-%r413 = or i320 %r407, %r412
-%r414 = zext i320 %r413 to i352
-%r416 = getelementptr i32, i32* %r4, i32 10
-%r417 = load i32, i32* %r416
-%r418 = zext i32 %r417 to i352
-%r419 = shl i352 %r418, 320
-%r420 = or i352 %r414, %r419
-%r421 = zext i352 %r420 to i384
-%r422 = sub i384 %r349, %r421
-%r423 = lshr i384 %r422, 352
-%r424 = trunc i384 %r423 to i1
-%r425 = select i1 %r424, i384 %r349, i384 %r422
-%r426 = trunc i384 %r425 to i352
-%r428 = getelementptr i32, i32* %r1, i32 11
-%r429 = trunc i352 %r426 to i32
-%r431 = getelementptr i32, i32* %r428, i32 0
-store i32 %r429, i32* %r431
-%r432 = lshr i352 %r426, 32
-%r433 = trunc i352 %r432 to i32
-%r435 = getelementptr i32, i32* %r428, i32 1
-store i32 %r433, i32* %r435
-%r436 = lshr i352 %r432, 32
-%r437 = trunc i352 %r436 to i32
-%r439 = getelementptr i32, i32* %r428, i32 2
-store i32 %r437, i32* %r439
-%r440 = lshr i352 %r436, 32
-%r441 = trunc i352 %r440 to i32
-%r443 = getelementptr i32, i32* %r428, i32 3
-store i32 %r441, i32* %r443
-%r444 = lshr i352 %r440, 32
-%r445 = trunc i352 %r444 to i32
-%r447 = getelementptr i32, i32* %r428, i32 4
-store i32 %r445, i32* %r447
-%r448 = lshr i352 %r444, 32
-%r449 = trunc i352 %r448 to i32
-%r451 = getelementptr i32, i32* %r428, i32 5
-store i32 %r449, i32* %r451
-%r452 = lshr i352 %r448, 32
-%r453 = trunc i352 %r452 to i32
-%r455 = getelementptr i32, i32* %r428, i32 6
-store i32 %r453, i32* %r455
-%r456 = lshr i352 %r452, 32
-%r457 = trunc i352 %r456 to i32
-%r459 = getelementptr i32, i32* %r428, i32 7
-store i32 %r457, i32* %r459
-%r460 = lshr i352 %r456, 32
-%r461 = trunc i352 %r460 to i32
-%r463 = getelementptr i32, i32* %r428, i32 8
-store i32 %r461, i32* %r463
-%r464 = lshr i352 %r460, 32
-%r465 = trunc i352 %r464 to i32
-%r467 = getelementptr i32, i32* %r428, i32 9
-store i32 %r465, i32* %r467
-%r468 = lshr i352 %r464, 32
-%r469 = trunc i352 %r468 to i32
-%r471 = getelementptr i32, i32* %r428, i32 10
-store i32 %r469, i32* %r471
-ret void
-}
-define void @mcl_fpDbl_sub11L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = load i32, i32* %r3
-%r154 = zext i32 %r153 to i64
-%r156 = getelementptr i32, i32* %r3, i32 1
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i64
-%r159 = shl i64 %r158, 32
-%r160 = or i64 %r154, %r159
-%r161 = zext i64 %r160 to i96
-%r163 = getelementptr i32, i32* %r3, i32 2
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i96
-%r166 = shl i96 %r165, 64
-%r167 = or i96 %r161, %r166
-%r168 = zext i96 %r167 to i128
-%r170 = getelementptr i32, i32* %r3, i32 3
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i128
-%r173 = shl i128 %r172, 96
-%r174 = or i128 %r168, %r173
-%r175 = zext i128 %r174 to i160
-%r177 = getelementptr i32, i32* %r3, i32 4
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i160
-%r180 = shl i160 %r179, 128
-%r181 = or i160 %r175, %r180
-%r182 = zext i160 %r181 to i192
-%r184 = getelementptr i32, i32* %r3, i32 5
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i192
-%r187 = shl i192 %r186, 160
-%r188 = or i192 %r182, %r187
-%r189 = zext i192 %r188 to i224
-%r191 = getelementptr i32, i32* %r3, i32 6
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i224
-%r194 = shl i224 %r193, 192
-%r195 = or i224 %r189, %r194
-%r196 = zext i224 %r195 to i256
-%r198 = getelementptr i32, i32* %r3, i32 7
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i256
-%r201 = shl i256 %r200, 224
-%r202 = or i256 %r196, %r201
-%r203 = zext i256 %r202 to i288
-%r205 = getelementptr i32, i32* %r3, i32 8
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i288
-%r208 = shl i288 %r207, 256
-%r209 = or i288 %r203, %r208
-%r210 = zext i288 %r209 to i320
-%r212 = getelementptr i32, i32* %r3, i32 9
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i320
-%r215 = shl i320 %r214, 288
-%r216 = or i320 %r210, %r215
-%r217 = zext i320 %r216 to i352
-%r219 = getelementptr i32, i32* %r3, i32 10
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i352
-%r222 = shl i352 %r221, 320
-%r223 = or i352 %r217, %r222
-%r224 = zext i352 %r223 to i384
-%r226 = getelementptr i32, i32* %r3, i32 11
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i384
-%r229 = shl i384 %r228, 352
-%r230 = or i384 %r224, %r229
-%r231 = zext i384 %r230 to i416
-%r233 = getelementptr i32, i32* %r3, i32 12
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i416
-%r236 = shl i416 %r235, 384
-%r237 = or i416 %r231, %r236
-%r238 = zext i416 %r237 to i448
-%r240 = getelementptr i32, i32* %r3, i32 13
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i448
-%r243 = shl i448 %r242, 416
-%r244 = or i448 %r238, %r243
-%r245 = zext i448 %r244 to i480
-%r247 = getelementptr i32, i32* %r3, i32 14
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i480
-%r250 = shl i480 %r249, 448
-%r251 = or i480 %r245, %r250
-%r252 = zext i480 %r251 to i512
-%r254 = getelementptr i32, i32* %r3, i32 15
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i512
-%r257 = shl i512 %r256, 480
-%r258 = or i512 %r252, %r257
-%r259 = zext i512 %r258 to i544
-%r261 = getelementptr i32, i32* %r3, i32 16
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i544
-%r264 = shl i544 %r263, 512
-%r265 = or i544 %r259, %r264
-%r266 = zext i544 %r265 to i576
-%r268 = getelementptr i32, i32* %r3, i32 17
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i576
-%r271 = shl i576 %r270, 544
-%r272 = or i576 %r266, %r271
-%r273 = zext i576 %r272 to i608
-%r275 = getelementptr i32, i32* %r3, i32 18
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i608
-%r278 = shl i608 %r277, 576
-%r279 = or i608 %r273, %r278
-%r280 = zext i608 %r279 to i640
-%r282 = getelementptr i32, i32* %r3, i32 19
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i640
-%r285 = shl i640 %r284, 608
-%r286 = or i640 %r280, %r285
-%r287 = zext i640 %r286 to i672
-%r289 = getelementptr i32, i32* %r3, i32 20
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i672
-%r292 = shl i672 %r291, 640
-%r293 = or i672 %r287, %r292
-%r294 = zext i672 %r293 to i704
-%r296 = getelementptr i32, i32* %r3, i32 21
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i704
-%r299 = shl i704 %r298, 672
-%r300 = or i704 %r294, %r299
-%r301 = zext i704 %r152 to i736
-%r302 = zext i704 %r300 to i736
-%r303 = sub i736 %r301, %r302
-%r304 = trunc i736 %r303 to i352
-%r305 = trunc i352 %r304 to i32
-%r307 = getelementptr i32, i32* %r1, i32 0
-store i32 %r305, i32* %r307
-%r308 = lshr i352 %r304, 32
-%r309 = trunc i352 %r308 to i32
-%r311 = getelementptr i32, i32* %r1, i32 1
-store i32 %r309, i32* %r311
-%r312 = lshr i352 %r308, 32
-%r313 = trunc i352 %r312 to i32
-%r315 = getelementptr i32, i32* %r1, i32 2
-store i32 %r313, i32* %r315
-%r316 = lshr i352 %r312, 32
-%r317 = trunc i352 %r316 to i32
-%r319 = getelementptr i32, i32* %r1, i32 3
-store i32 %r317, i32* %r319
-%r320 = lshr i352 %r316, 32
-%r321 = trunc i352 %r320 to i32
-%r323 = getelementptr i32, i32* %r1, i32 4
-store i32 %r321, i32* %r323
-%r324 = lshr i352 %r320, 32
-%r325 = trunc i352 %r324 to i32
-%r327 = getelementptr i32, i32* %r1, i32 5
-store i32 %r325, i32* %r327
-%r328 = lshr i352 %r324, 32
-%r329 = trunc i352 %r328 to i32
-%r331 = getelementptr i32, i32* %r1, i32 6
-store i32 %r329, i32* %r331
-%r332 = lshr i352 %r328, 32
-%r333 = trunc i352 %r332 to i32
-%r335 = getelementptr i32, i32* %r1, i32 7
-store i32 %r333, i32* %r335
-%r336 = lshr i352 %r332, 32
-%r337 = trunc i352 %r336 to i32
-%r339 = getelementptr i32, i32* %r1, i32 8
-store i32 %r337, i32* %r339
-%r340 = lshr i352 %r336, 32
-%r341 = trunc i352 %r340 to i32
-%r343 = getelementptr i32, i32* %r1, i32 9
-store i32 %r341, i32* %r343
-%r344 = lshr i352 %r340, 32
-%r345 = trunc i352 %r344 to i32
-%r347 = getelementptr i32, i32* %r1, i32 10
-store i32 %r345, i32* %r347
-%r348 = lshr i736 %r303, 352
-%r349 = trunc i736 %r348 to i352
-%r350 = lshr i736 %r303, 704
-%r351 = trunc i736 %r350 to i1
-%r352 = load i32, i32* %r4
-%r353 = zext i32 %r352 to i64
-%r355 = getelementptr i32, i32* %r4, i32 1
-%r356 = load i32, i32* %r355
-%r357 = zext i32 %r356 to i64
-%r358 = shl i64 %r357, 32
-%r359 = or i64 %r353, %r358
-%r360 = zext i64 %r359 to i96
-%r362 = getelementptr i32, i32* %r4, i32 2
-%r363 = load i32, i32* %r362
-%r364 = zext i32 %r363 to i96
-%r365 = shl i96 %r364, 64
-%r366 = or i96 %r360, %r365
-%r367 = zext i96 %r366 to i128
-%r369 = getelementptr i32, i32* %r4, i32 3
-%r370 = load i32, i32* %r369
-%r371 = zext i32 %r370 to i128
-%r372 = shl i128 %r371, 96
-%r373 = or i128 %r367, %r372
-%r374 = zext i128 %r373 to i160
-%r376 = getelementptr i32, i32* %r4, i32 4
-%r377 = load i32, i32* %r376
-%r378 = zext i32 %r377 to i160
-%r379 = shl i160 %r378, 128
-%r380 = or i160 %r374, %r379
-%r381 = zext i160 %r380 to i192
-%r383 = getelementptr i32, i32* %r4, i32 5
-%r384 = load i32, i32* %r383
-%r385 = zext i32 %r384 to i192
-%r386 = shl i192 %r385, 160
-%r387 = or i192 %r381, %r386
-%r388 = zext i192 %r387 to i224
-%r390 = getelementptr i32, i32* %r4, i32 6
-%r391 = load i32, i32* %r390
-%r392 = zext i32 %r391 to i224
-%r393 = shl i224 %r392, 192
-%r394 = or i224 %r388, %r393
-%r395 = zext i224 %r394 to i256
-%r397 = getelementptr i32, i32* %r4, i32 7
-%r398 = load i32, i32* %r397
-%r399 = zext i32 %r398 to i256
-%r400 = shl i256 %r399, 224
-%r401 = or i256 %r395, %r400
-%r402 = zext i256 %r401 to i288
-%r404 = getelementptr i32, i32* %r4, i32 8
-%r405 = load i32, i32* %r404
-%r406 = zext i32 %r405 to i288
-%r407 = shl i288 %r406, 256
-%r408 = or i288 %r402, %r407
-%r409 = zext i288 %r408 to i320
-%r411 = getelementptr i32, i32* %r4, i32 9
-%r412 = load i32, i32* %r411
-%r413 = zext i32 %r412 to i320
-%r414 = shl i320 %r413, 288
-%r415 = or i320 %r409, %r414
-%r416 = zext i320 %r415 to i352
-%r418 = getelementptr i32, i32* %r4, i32 10
-%r419 = load i32, i32* %r418
-%r420 = zext i32 %r419 to i352
-%r421 = shl i352 %r420, 320
-%r422 = or i352 %r416, %r421
-%r424 = select i1 %r351, i352 %r422, i352 0
-%r425 = add i352 %r349, %r424
-%r427 = getelementptr i32, i32* %r1, i32 11
-%r428 = trunc i352 %r425 to i32
-%r430 = getelementptr i32, i32* %r427, i32 0
-store i32 %r428, i32* %r430
-%r431 = lshr i352 %r425, 32
-%r432 = trunc i352 %r431 to i32
-%r434 = getelementptr i32, i32* %r427, i32 1
-store i32 %r432, i32* %r434
-%r435 = lshr i352 %r431, 32
-%r436 = trunc i352 %r435 to i32
-%r438 = getelementptr i32, i32* %r427, i32 2
-store i32 %r436, i32* %r438
-%r439 = lshr i352 %r435, 32
-%r440 = trunc i352 %r439 to i32
-%r442 = getelementptr i32, i32* %r427, i32 3
-store i32 %r440, i32* %r442
-%r443 = lshr i352 %r439, 32
-%r444 = trunc i352 %r443 to i32
-%r446 = getelementptr i32, i32* %r427, i32 4
-store i32 %r444, i32* %r446
-%r447 = lshr i352 %r443, 32
-%r448 = trunc i352 %r447 to i32
-%r450 = getelementptr i32, i32* %r427, i32 5
-store i32 %r448, i32* %r450
-%r451 = lshr i352 %r447, 32
-%r452 = trunc i352 %r451 to i32
-%r454 = getelementptr i32, i32* %r427, i32 6
-store i32 %r452, i32* %r454
-%r455 = lshr i352 %r451, 32
-%r456 = trunc i352 %r455 to i32
-%r458 = getelementptr i32, i32* %r427, i32 7
-store i32 %r456, i32* %r458
-%r459 = lshr i352 %r455, 32
-%r460 = trunc i352 %r459 to i32
-%r462 = getelementptr i32, i32* %r427, i32 8
-store i32 %r460, i32* %r462
-%r463 = lshr i352 %r459, 32
-%r464 = trunc i352 %r463 to i32
-%r466 = getelementptr i32, i32* %r427, i32 9
-store i32 %r464, i32* %r466
-%r467 = lshr i352 %r463, 32
-%r468 = trunc i352 %r467 to i32
-%r470 = getelementptr i32, i32* %r427, i32 10
-store i32 %r468, i32* %r470
-ret void
-}
-define i416 @mulPv384x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
-%r30 = trunc i64 %r29 to i32
-%r31 = call i32 @extractHigh32(i64 %r29)
-%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
-%r34 = trunc i64 %r33 to i32
-%r35 = call i32 @extractHigh32(i64 %r33)
-%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
-%r38 = trunc i64 %r37 to i32
-%r39 = call i32 @extractHigh32(i64 %r37)
-%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
-%r42 = trunc i64 %r41 to i32
-%r43 = call i32 @extractHigh32(i64 %r41)
-%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
-%r46 = trunc i64 %r45 to i32
-%r47 = call i32 @extractHigh32(i64 %r45)
-%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
-%r50 = trunc i64 %r49 to i32
-%r51 = call i32 @extractHigh32(i64 %r49)
-%r52 = zext i32 %r6 to i64
-%r53 = zext i32 %r10 to i64
-%r54 = shl i64 %r53, 32
-%r55 = or i64 %r52, %r54
-%r56 = zext i64 %r55 to i96
-%r57 = zext i32 %r14 to i96
-%r58 = shl i96 %r57, 64
-%r59 = or i96 %r56, %r58
-%r60 = zext i96 %r59 to i128
-%r61 = zext i32 %r18 to i128
-%r62 = shl i128 %r61, 96
-%r63 = or i128 %r60, %r62
-%r64 = zext i128 %r63 to i160
-%r65 = zext i32 %r22 to i160
-%r66 = shl i160 %r65, 128
-%r67 = or i160 %r64, %r66
-%r68 = zext i160 %r67 to i192
-%r69 = zext i32 %r26 to i192
-%r70 = shl i192 %r69, 160
-%r71 = or i192 %r68, %r70
-%r72 = zext i192 %r71 to i224
-%r73 = zext i32 %r30 to i224
-%r74 = shl i224 %r73, 192
-%r75 = or i224 %r72, %r74
-%r76 = zext i224 %r75 to i256
-%r77 = zext i32 %r34 to i256
-%r78 = shl i256 %r77, 224
-%r79 = or i256 %r76, %r78
-%r80 = zext i256 %r79 to i288
-%r81 = zext i32 %r38 to i288
-%r82 = shl i288 %r81, 256
-%r83 = or i288 %r80, %r82
-%r84 = zext i288 %r83 to i320
-%r85 = zext i32 %r42 to i320
-%r86 = shl i320 %r85, 288
-%r87 = or i320 %r84, %r86
-%r88 = zext i320 %r87 to i352
-%r89 = zext i32 %r46 to i352
-%r90 = shl i352 %r89, 320
-%r91 = or i352 %r88, %r90
-%r92 = zext i352 %r91 to i384
-%r93 = zext i32 %r50 to i384
-%r94 = shl i384 %r93, 352
-%r95 = or i384 %r92, %r94
-%r96 = zext i32 %r7 to i64
-%r97 = zext i32 %r11 to i64
-%r98 = shl i64 %r97, 32
-%r99 = or i64 %r96, %r98
-%r100 = zext i64 %r99 to i96
-%r101 = zext i32 %r15 to i96
-%r102 = shl i96 %r101, 64
-%r103 = or i96 %r100, %r102
-%r104 = zext i96 %r103 to i128
-%r105 = zext i32 %r19 to i128
-%r106 = shl i128 %r105, 96
-%r107 = or i128 %r104, %r106
-%r108 = zext i128 %r107 to i160
-%r109 = zext i32 %r23 to i160
-%r110 = shl i160 %r109, 128
-%r111 = or i160 %r108, %r110
-%r112 = zext i160 %r111 to i192
-%r113 = zext i32 %r27 to i192
-%r114 = shl i192 %r113, 160
-%r115 = or i192 %r112, %r114
-%r116 = zext i192 %r115 to i224
-%r117 = zext i32 %r31 to i224
-%r118 = shl i224 %r117, 192
-%r119 = or i224 %r116, %r118
-%r120 = zext i224 %r119 to i256
-%r121 = zext i32 %r35 to i256
-%r122 = shl i256 %r121, 224
-%r123 = or i256 %r120, %r122
-%r124 = zext i256 %r123 to i288
-%r125 = zext i32 %r39 to i288
-%r126 = shl i288 %r125, 256
-%r127 = or i288 %r124, %r126
-%r128 = zext i288 %r127 to i320
-%r129 = zext i32 %r43 to i320
-%r130 = shl i320 %r129, 288
-%r131 = or i320 %r128, %r130
-%r132 = zext i320 %r131 to i352
-%r133 = zext i32 %r47 to i352
-%r134 = shl i352 %r133, 320
-%r135 = or i352 %r132, %r134
-%r136 = zext i352 %r135 to i384
-%r137 = zext i32 %r51 to i384
-%r138 = shl i384 %r137, 352
-%r139 = or i384 %r136, %r138
-%r140 = zext i384 %r95 to i416
-%r141 = zext i384 %r139 to i416
-%r142 = shl i416 %r141, 32
-%r143 = add i416 %r140, %r142
-ret i416 %r143
-}
-define void @mcl_fp_mulUnitPre12L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i416 @mulPv384x32(i32* %r2, i32 %r3)
-%r5 = trunc i416 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i416 %r4, 32
-%r9 = trunc i416 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i416 %r8, 32
-%r13 = trunc i416 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i416 %r12, 32
-%r17 = trunc i416 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i416 %r16, 32
-%r21 = trunc i416 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i416 %r20, 32
-%r25 = trunc i416 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i416 %r24, 32
-%r29 = trunc i416 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i416 %r28, 32
-%r33 = trunc i416 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
-%r36 = lshr i416 %r32, 32
-%r37 = trunc i416 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 8
-store i32 %r37, i32* %r39
-%r40 = lshr i416 %r36, 32
-%r41 = trunc i416 %r40 to i32
-%r43 = getelementptr i32, i32* %r1, i32 9
-store i32 %r41, i32* %r43
-%r44 = lshr i416 %r40, 32
-%r45 = trunc i416 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 10
-store i32 %r45, i32* %r47
-%r48 = lshr i416 %r44, 32
-%r49 = trunc i416 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 11
-store i32 %r49, i32* %r51
-%r52 = lshr i416 %r48, 32
-%r53 = trunc i416 %r52 to i32
-%r55 = getelementptr i32, i32* %r1, i32 12
-store i32 %r53, i32* %r55
-ret void
-}
-define void @mcl_fpDbl_mulPre12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r2, i32 6
-%r7 = getelementptr i32, i32* %r3, i32 6
-%r9 = getelementptr i32, i32* %r1, i32 12
-call void @mcl_fpDbl_mulPre6L(i32* %r1, i32* %r2, i32* %r3)
-call void @mcl_fpDbl_mulPre6L(i32* %r9, i32* %r5, i32* %r7)
-%r10 = load i32, i32* %r5
-%r11 = zext i32 %r10 to i64
-%r13 = getelementptr i32, i32* %r5, i32 1
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i64
-%r16 = shl i64 %r15, 32
-%r17 = or i64 %r11, %r16
-%r18 = zext i64 %r17 to i96
-%r20 = getelementptr i32, i32* %r5, i32 2
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i96
-%r23 = shl i96 %r22, 64
-%r24 = or i96 %r18, %r23
-%r25 = zext i96 %r24 to i128
-%r27 = getelementptr i32, i32* %r5, i32 3
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i128
-%r30 = shl i128 %r29, 96
-%r31 = or i128 %r25, %r30
-%r32 = zext i128 %r31 to i160
-%r34 = getelementptr i32, i32* %r5, i32 4
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i160
-%r37 = shl i160 %r36, 128
-%r38 = or i160 %r32, %r37
-%r39 = zext i160 %r38 to i192
-%r41 = getelementptr i32, i32* %r5, i32 5
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i192
-%r44 = shl i192 %r43, 160
-%r45 = or i192 %r39, %r44
-%r46 = zext i192 %r45 to i224
-%r47 = load i32, i32* %r2
-%r48 = zext i32 %r47 to i64
-%r50 = getelementptr i32, i32* %r2, i32 1
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i64
-%r53 = shl i64 %r52, 32
-%r54 = or i64 %r48, %r53
-%r55 = zext i64 %r54 to i96
-%r57 = getelementptr i32, i32* %r2, i32 2
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i96
-%r60 = shl i96 %r59, 64
-%r61 = or i96 %r55, %r60
-%r62 = zext i96 %r61 to i128
-%r64 = getelementptr i32, i32* %r2, i32 3
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i128
-%r67 = shl i128 %r66, 96
-%r68 = or i128 %r62, %r67
-%r69 = zext i128 %r68 to i160
-%r71 = getelementptr i32, i32* %r2, i32 4
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i160
-%r74 = shl i160 %r73, 128
-%r75 = or i160 %r69, %r74
-%r76 = zext i160 %r75 to i192
-%r78 = getelementptr i32, i32* %r2, i32 5
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i192
-%r81 = shl i192 %r80, 160
-%r82 = or i192 %r76, %r81
-%r83 = zext i192 %r82 to i224
-%r84 = load i32, i32* %r7
-%r85 = zext i32 %r84 to i64
-%r87 = getelementptr i32, i32* %r7, i32 1
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i64
-%r90 = shl i64 %r89, 32
-%r91 = or i64 %r85, %r90
-%r92 = zext i64 %r91 to i96
-%r94 = getelementptr i32, i32* %r7, i32 2
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i96
-%r97 = shl i96 %r96, 64
-%r98 = or i96 %r92, %r97
-%r99 = zext i96 %r98 to i128
-%r101 = getelementptr i32, i32* %r7, i32 3
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i128
-%r104 = shl i128 %r103, 96
-%r105 = or i128 %r99, %r104
-%r106 = zext i128 %r105 to i160
-%r108 = getelementptr i32, i32* %r7, i32 4
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i160
-%r111 = shl i160 %r110, 128
-%r112 = or i160 %r106, %r111
-%r113 = zext i160 %r112 to i192
-%r115 = getelementptr i32, i32* %r7, i32 5
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i192
-%r118 = shl i192 %r117, 160
-%r119 = or i192 %r113, %r118
-%r120 = zext i192 %r119 to i224
-%r121 = load i32, i32* %r3
-%r122 = zext i32 %r121 to i64
-%r124 = getelementptr i32, i32* %r3, i32 1
-%r125 = load i32, i32* %r124
-%r126 = zext i32 %r125 to i64
-%r127 = shl i64 %r126, 32
-%r128 = or i64 %r122, %r127
-%r129 = zext i64 %r128 to i96
-%r131 = getelementptr i32, i32* %r3, i32 2
-%r132 = load i32, i32* %r131
-%r133 = zext i32 %r132 to i96
-%r134 = shl i96 %r133, 64
-%r135 = or i96 %r129, %r134
-%r136 = zext i96 %r135 to i128
-%r138 = getelementptr i32, i32* %r3, i32 3
-%r139 = load i32, i32* %r138
-%r140 = zext i32 %r139 to i128
-%r141 = shl i128 %r140, 96
-%r142 = or i128 %r136, %r141
-%r143 = zext i128 %r142 to i160
-%r145 = getelementptr i32, i32* %r3, i32 4
-%r146 = load i32, i32* %r145
-%r147 = zext i32 %r146 to i160
-%r148 = shl i160 %r147, 128
-%r149 = or i160 %r143, %r148
-%r150 = zext i160 %r149 to i192
-%r152 = getelementptr i32, i32* %r3, i32 5
-%r153 = load i32, i32* %r152
-%r154 = zext i32 %r153 to i192
-%r155 = shl i192 %r154, 160
-%r156 = or i192 %r150, %r155
-%r157 = zext i192 %r156 to i224
-%r158 = add i224 %r46, %r83
-%r159 = add i224 %r120, %r157
-%r161 = alloca i32, i32 12
-%r162 = trunc i224 %r158 to i192
-%r163 = trunc i224 %r159 to i192
-%r164 = lshr i224 %r158, 192
-%r165 = trunc i224 %r164 to i1
-%r166 = lshr i224 %r159, 192
-%r167 = trunc i224 %r166 to i1
-%r168 = and i1 %r165, %r167
-%r170 = select i1 %r165, i192 %r163, i192 0
-%r172 = select i1 %r167, i192 %r162, i192 0
-%r174 = alloca i32, i32 6
-%r176 = alloca i32, i32 6
-%r177 = trunc i192 %r162 to i32
-%r179 = getelementptr i32, i32* %r174, i32 0
-store i32 %r177, i32* %r179
-%r180 = lshr i192 %r162, 32
-%r181 = trunc i192 %r180 to i32
-%r183 = getelementptr i32, i32* %r174, i32 1
-store i32 %r181, i32* %r183
-%r184 = lshr i192 %r180, 32
-%r185 = trunc i192 %r184 to i32
-%r187 = getelementptr i32, i32* %r174, i32 2
-store i32 %r185, i32* %r187
-%r188 = lshr i192 %r184, 32
-%r189 = trunc i192 %r188 to i32
-%r191 = getelementptr i32, i32* %r174, i32 3
-store i32 %r189, i32* %r191
-%r192 = lshr i192 %r188, 32
-%r193 = trunc i192 %r192 to i32
-%r195 = getelementptr i32, i32* %r174, i32 4
-store i32 %r193, i32* %r195
-%r196 = lshr i192 %r192, 32
-%r197 = trunc i192 %r196 to i32
-%r199 = getelementptr i32, i32* %r174, i32 5
-store i32 %r197, i32* %r199
-%r200 = trunc i192 %r163 to i32
-%r202 = getelementptr i32, i32* %r176, i32 0
-store i32 %r200, i32* %r202
-%r203 = lshr i192 %r163, 32
-%r204 = trunc i192 %r203 to i32
-%r206 = getelementptr i32, i32* %r176, i32 1
-store i32 %r204, i32* %r206
-%r207 = lshr i192 %r203, 32
-%r208 = trunc i192 %r207 to i32
-%r210 = getelementptr i32, i32* %r176, i32 2
-store i32 %r208, i32* %r210
-%r211 = lshr i192 %r207, 32
-%r212 = trunc i192 %r211 to i32
-%r214 = getelementptr i32, i32* %r176, i32 3
-store i32 %r212, i32* %r214
-%r215 = lshr i192 %r211, 32
-%r216 = trunc i192 %r215 to i32
-%r218 = getelementptr i32, i32* %r176, i32 4
-store i32 %r216, i32* %r218
-%r219 = lshr i192 %r215, 32
-%r220 = trunc i192 %r219 to i32
-%r222 = getelementptr i32, i32* %r176, i32 5
-store i32 %r220, i32* %r222
-call void @mcl_fpDbl_mulPre6L(i32* %r161, i32* %r174, i32* %r176)
-%r223 = load i32, i32* %r161
-%r224 = zext i32 %r223 to i64
-%r226 = getelementptr i32, i32* %r161, i32 1
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i64
-%r229 = shl i64 %r228, 32
-%r230 = or i64 %r224, %r229
-%r231 = zext i64 %r230 to i96
-%r233 = getelementptr i32, i32* %r161, i32 2
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i96
-%r236 = shl i96 %r235, 64
-%r237 = or i96 %r231, %r236
-%r238 = zext i96 %r237 to i128
-%r240 = getelementptr i32, i32* %r161, i32 3
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i128
-%r243 = shl i128 %r242, 96
-%r244 = or i128 %r238, %r243
-%r245 = zext i128 %r244 to i160
-%r247 = getelementptr i32, i32* %r161, i32 4
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i160
-%r250 = shl i160 %r249, 128
-%r251 = or i160 %r245, %r250
-%r252 = zext i160 %r251 to i192
-%r254 = getelementptr i32, i32* %r161, i32 5
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i192
-%r257 = shl i192 %r256, 160
-%r258 = or i192 %r252, %r257
-%r259 = zext i192 %r258 to i224
-%r261 = getelementptr i32, i32* %r161, i32 6
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i224
-%r264 = shl i224 %r263, 192
-%r265 = or i224 %r259, %r264
-%r266 = zext i224 %r265 to i256
-%r268 = getelementptr i32, i32* %r161, i32 7
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i256
-%r271 = shl i256 %r270, 224
-%r272 = or i256 %r266, %r271
-%r273 = zext i256 %r272 to i288
-%r275 = getelementptr i32, i32* %r161, i32 8
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i288
-%r278 = shl i288 %r277, 256
-%r279 = or i288 %r273, %r278
-%r280 = zext i288 %r279 to i320
-%r282 = getelementptr i32, i32* %r161, i32 9
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i320
-%r285 = shl i320 %r284, 288
-%r286 = or i320 %r280, %r285
-%r287 = zext i320 %r286 to i352
-%r289 = getelementptr i32, i32* %r161, i32 10
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i352
-%r292 = shl i352 %r291, 320
-%r293 = or i352 %r287, %r292
-%r294 = zext i352 %r293 to i384
-%r296 = getelementptr i32, i32* %r161, i32 11
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i384
-%r299 = shl i384 %r298, 352
-%r300 = or i384 %r294, %r299
-%r301 = zext i384 %r300 to i416
-%r302 = zext i1 %r168 to i416
-%r303 = shl i416 %r302, 384
-%r304 = or i416 %r301, %r303
-%r305 = zext i192 %r170 to i416
-%r306 = zext i192 %r172 to i416
-%r307 = shl i416 %r305, 192
-%r308 = shl i416 %r306, 192
-%r309 = add i416 %r304, %r307
-%r310 = add i416 %r309, %r308
-%r311 = load i32, i32* %r1
-%r312 = zext i32 %r311 to i64
-%r314 = getelementptr i32, i32* %r1, i32 1
-%r315 = load i32, i32* %r314
-%r316 = zext i32 %r315 to i64
-%r317 = shl i64 %r316, 32
-%r318 = or i64 %r312, %r317
-%r319 = zext i64 %r318 to i96
-%r321 = getelementptr i32, i32* %r1, i32 2
-%r322 = load i32, i32* %r321
-%r323 = zext i32 %r322 to i96
-%r324 = shl i96 %r323, 64
-%r325 = or i96 %r319, %r324
-%r326 = zext i96 %r325 to i128
-%r328 = getelementptr i32, i32* %r1, i32 3
-%r329 = load i32, i32* %r328
-%r330 = zext i32 %r329 to i128
-%r331 = shl i128 %r330, 96
-%r332 = or i128 %r326, %r331
-%r333 = zext i128 %r332 to i160
-%r335 = getelementptr i32, i32* %r1, i32 4
-%r336 = load i32, i32* %r335
-%r337 = zext i32 %r336 to i160
-%r338 = shl i160 %r337, 128
-%r339 = or i160 %r333, %r338
-%r340 = zext i160 %r339 to i192
-%r342 = getelementptr i32, i32* %r1, i32 5
-%r343 = load i32, i32* %r342
-%r344 = zext i32 %r343 to i192
-%r345 = shl i192 %r344, 160
-%r346 = or i192 %r340, %r345
-%r347 = zext i192 %r346 to i224
-%r349 = getelementptr i32, i32* %r1, i32 6
-%r350 = load i32, i32* %r349
-%r351 = zext i32 %r350 to i224
-%r352 = shl i224 %r351, 192
-%r353 = or i224 %r347, %r352
-%r354 = zext i224 %r353 to i256
-%r356 = getelementptr i32, i32* %r1, i32 7
-%r357 = load i32, i32* %r356
-%r358 = zext i32 %r357 to i256
-%r359 = shl i256 %r358, 224
-%r360 = or i256 %r354, %r359
-%r361 = zext i256 %r360 to i288
-%r363 = getelementptr i32, i32* %r1, i32 8
-%r364 = load i32, i32* %r363
-%r365 = zext i32 %r364 to i288
-%r366 = shl i288 %r365, 256
-%r367 = or i288 %r361, %r366
-%r368 = zext i288 %r367 to i320
-%r370 = getelementptr i32, i32* %r1, i32 9
-%r371 = load i32, i32* %r370
-%r372 = zext i32 %r371 to i320
-%r373 = shl i320 %r372, 288
-%r374 = or i320 %r368, %r373
-%r375 = zext i320 %r374 to i352
-%r377 = getelementptr i32, i32* %r1, i32 10
-%r378 = load i32, i32* %r377
-%r379 = zext i32 %r378 to i352
-%r380 = shl i352 %r379, 320
-%r381 = or i352 %r375, %r380
-%r382 = zext i352 %r381 to i384
-%r384 = getelementptr i32, i32* %r1, i32 11
-%r385 = load i32, i32* %r384
-%r386 = zext i32 %r385 to i384
-%r387 = shl i384 %r386, 352
-%r388 = or i384 %r382, %r387
-%r389 = zext i384 %r388 to i416
-%r390 = sub i416 %r310, %r389
-%r392 = getelementptr i32, i32* %r1, i32 12
-%r393 = load i32, i32* %r392
-%r394 = zext i32 %r393 to i64
-%r396 = getelementptr i32, i32* %r392, i32 1
-%r397 = load i32, i32* %r396
-%r398 = zext i32 %r397 to i64
-%r399 = shl i64 %r398, 32
-%r400 = or i64 %r394, %r399
-%r401 = zext i64 %r400 to i96
-%r403 = getelementptr i32, i32* %r392, i32 2
-%r404 = load i32, i32* %r403
-%r405 = zext i32 %r404 to i96
-%r406 = shl i96 %r405, 64
-%r407 = or i96 %r401, %r406
-%r408 = zext i96 %r407 to i128
-%r410 = getelementptr i32, i32* %r392, i32 3
-%r411 = load i32, i32* %r410
-%r412 = zext i32 %r411 to i128
-%r413 = shl i128 %r412, 96
-%r414 = or i128 %r408, %r413
-%r415 = zext i128 %r414 to i160
-%r417 = getelementptr i32, i32* %r392, i32 4
-%r418 = load i32, i32* %r417
-%r419 = zext i32 %r418 to i160
-%r420 = shl i160 %r419, 128
-%r421 = or i160 %r415, %r420
-%r422 = zext i160 %r421 to i192
-%r424 = getelementptr i32, i32* %r392, i32 5
-%r425 = load i32, i32* %r424
-%r426 = zext i32 %r425 to i192
-%r427 = shl i192 %r426, 160
-%r428 = or i192 %r422, %r427
-%r429 = zext i192 %r428 to i224
-%r431 = getelementptr i32, i32* %r392, i32 6
-%r432 = load i32, i32* %r431
-%r433 = zext i32 %r432 to i224
-%r434 = shl i224 %r433, 192
-%r435 = or i224 %r429, %r434
-%r436 = zext i224 %r435 to i256
-%r438 = getelementptr i32, i32* %r392, i32 7
-%r439 = load i32, i32* %r438
-%r440 = zext i32 %r439 to i256
-%r441 = shl i256 %r440, 224
-%r442 = or i256 %r436, %r441
-%r443 = zext i256 %r442 to i288
-%r445 = getelementptr i32, i32* %r392, i32 8
-%r446 = load i32, i32* %r445
-%r447 = zext i32 %r446 to i288
-%r448 = shl i288 %r447, 256
-%r449 = or i288 %r443, %r448
-%r450 = zext i288 %r449 to i320
-%r452 = getelementptr i32, i32* %r392, i32 9
-%r453 = load i32, i32* %r452
-%r454 = zext i32 %r453 to i320
-%r455 = shl i320 %r454, 288
-%r456 = or i320 %r450, %r455
-%r457 = zext i320 %r456 to i352
-%r459 = getelementptr i32, i32* %r392, i32 10
-%r460 = load i32, i32* %r459
-%r461 = zext i32 %r460 to i352
-%r462 = shl i352 %r461, 320
-%r463 = or i352 %r457, %r462
-%r464 = zext i352 %r463 to i384
-%r466 = getelementptr i32, i32* %r392, i32 11
-%r467 = load i32, i32* %r466
-%r468 = zext i32 %r467 to i384
-%r469 = shl i384 %r468, 352
-%r470 = or i384 %r464, %r469
-%r471 = zext i384 %r470 to i416
-%r472 = sub i416 %r390, %r471
-%r473 = zext i416 %r472 to i576
-%r475 = getelementptr i32, i32* %r1, i32 6
-%r476 = load i32, i32* %r475
-%r477 = zext i32 %r476 to i64
-%r479 = getelementptr i32, i32* %r475, i32 1
-%r480 = load i32, i32* %r479
-%r481 = zext i32 %r480 to i64
-%r482 = shl i64 %r481, 32
-%r483 = or i64 %r477, %r482
-%r484 = zext i64 %r483 to i96
-%r486 = getelementptr i32, i32* %r475, i32 2
-%r487 = load i32, i32* %r486
-%r488 = zext i32 %r487 to i96
-%r489 = shl i96 %r488, 64
-%r490 = or i96 %r484, %r489
-%r491 = zext i96 %r490 to i128
-%r493 = getelementptr i32, i32* %r475, i32 3
-%r494 = load i32, i32* %r493
-%r495 = zext i32 %r494 to i128
-%r496 = shl i128 %r495, 96
-%r497 = or i128 %r491, %r496
-%r498 = zext i128 %r497 to i160
-%r500 = getelementptr i32, i32* %r475, i32 4
-%r501 = load i32, i32* %r500
-%r502 = zext i32 %r501 to i160
-%r503 = shl i160 %r502, 128
-%r504 = or i160 %r498, %r503
-%r505 = zext i160 %r504 to i192
-%r507 = getelementptr i32, i32* %r475, i32 5
-%r508 = load i32, i32* %r507
-%r509 = zext i32 %r508 to i192
-%r510 = shl i192 %r509, 160
-%r511 = or i192 %r505, %r510
-%r512 = zext i192 %r511 to i224
-%r514 = getelementptr i32, i32* %r475, i32 6
-%r515 = load i32, i32* %r514
-%r516 = zext i32 %r515 to i224
-%r517 = shl i224 %r516, 192
-%r518 = or i224 %r512, %r517
-%r519 = zext i224 %r518 to i256
-%r521 = getelementptr i32, i32* %r475, i32 7
-%r522 = load i32, i32* %r521
-%r523 = zext i32 %r522 to i256
-%r524 = shl i256 %r523, 224
-%r525 = or i256 %r519, %r524
-%r526 = zext i256 %r525 to i288
-%r528 = getelementptr i32, i32* %r475, i32 8
-%r529 = load i32, i32* %r528
-%r530 = zext i32 %r529 to i288
-%r531 = shl i288 %r530, 256
-%r532 = or i288 %r526, %r531
-%r533 = zext i288 %r532 to i320
-%r535 = getelementptr i32, i32* %r475, i32 9
-%r536 = load i32, i32* %r535
-%r537 = zext i32 %r536 to i320
-%r538 = shl i320 %r537, 288
-%r539 = or i320 %r533, %r538
-%r540 = zext i320 %r539 to i352
-%r542 = getelementptr i32, i32* %r475, i32 10
-%r543 = load i32, i32* %r542
-%r544 = zext i32 %r543 to i352
-%r545 = shl i352 %r544, 320
-%r546 = or i352 %r540, %r545
-%r547 = zext i352 %r546 to i384
-%r549 = getelementptr i32, i32* %r475, i32 11
-%r550 = load i32, i32* %r549
-%r551 = zext i32 %r550 to i384
-%r552 = shl i384 %r551, 352
-%r553 = or i384 %r547, %r552
-%r554 = zext i384 %r553 to i416
-%r556 = getelementptr i32, i32* %r475, i32 12
-%r557 = load i32, i32* %r556
-%r558 = zext i32 %r557 to i416
-%r559 = shl i416 %r558, 384
-%r560 = or i416 %r554, %r559
-%r561 = zext i416 %r560 to i448
-%r563 = getelementptr i32, i32* %r475, i32 13
-%r564 = load i32, i32* %r563
-%r565 = zext i32 %r564 to i448
-%r566 = shl i448 %r565, 416
-%r567 = or i448 %r561, %r566
-%r568 = zext i448 %r567 to i480
-%r570 = getelementptr i32, i32* %r475, i32 14
-%r571 = load i32, i32* %r570
-%r572 = zext i32 %r571 to i480
-%r573 = shl i480 %r572, 448
-%r574 = or i480 %r568, %r573
-%r575 = zext i480 %r574 to i512
-%r577 = getelementptr i32, i32* %r475, i32 15
-%r578 = load i32, i32* %r577
-%r579 = zext i32 %r578 to i512
-%r580 = shl i512 %r579, 480
-%r581 = or i512 %r575, %r580
-%r582 = zext i512 %r581 to i544
-%r584 = getelementptr i32, i32* %r475, i32 16
-%r585 = load i32, i32* %r584
-%r586 = zext i32 %r585 to i544
-%r587 = shl i544 %r586, 512
-%r588 = or i544 %r582, %r587
-%r589 = zext i544 %r588 to i576
-%r591 = getelementptr i32, i32* %r475, i32 17
-%r592 = load i32, i32* %r591
-%r593 = zext i32 %r592 to i576
-%r594 = shl i576 %r593, 544
-%r595 = or i576 %r589, %r594
-%r596 = add i576 %r473, %r595
-%r598 = getelementptr i32, i32* %r1, i32 6
-%r599 = trunc i576 %r596 to i32
-%r601 = getelementptr i32, i32* %r598, i32 0
-store i32 %r599, i32* %r601
-%r602 = lshr i576 %r596, 32
-%r603 = trunc i576 %r602 to i32
-%r605 = getelementptr i32, i32* %r598, i32 1
-store i32 %r603, i32* %r605
-%r606 = lshr i576 %r602, 32
-%r607 = trunc i576 %r606 to i32
-%r609 = getelementptr i32, i32* %r598, i32 2
-store i32 %r607, i32* %r609
-%r610 = lshr i576 %r606, 32
-%r611 = trunc i576 %r610 to i32
-%r613 = getelementptr i32, i32* %r598, i32 3
-store i32 %r611, i32* %r613
-%r614 = lshr i576 %r610, 32
-%r615 = trunc i576 %r614 to i32
-%r617 = getelementptr i32, i32* %r598, i32 4
-store i32 %r615, i32* %r617
-%r618 = lshr i576 %r614, 32
-%r619 = trunc i576 %r618 to i32
-%r621 = getelementptr i32, i32* %r598, i32 5
-store i32 %r619, i32* %r621
-%r622 = lshr i576 %r618, 32
-%r623 = trunc i576 %r622 to i32
-%r625 = getelementptr i32, i32* %r598, i32 6
-store i32 %r623, i32* %r625
-%r626 = lshr i576 %r622, 32
-%r627 = trunc i576 %r626 to i32
-%r629 = getelementptr i32, i32* %r598, i32 7
-store i32 %r627, i32* %r629
-%r630 = lshr i576 %r626, 32
-%r631 = trunc i576 %r630 to i32
-%r633 = getelementptr i32, i32* %r598, i32 8
-store i32 %r631, i32* %r633
-%r634 = lshr i576 %r630, 32
-%r635 = trunc i576 %r634 to i32
-%r637 = getelementptr i32, i32* %r598, i32 9
-store i32 %r635, i32* %r637
-%r638 = lshr i576 %r634, 32
-%r639 = trunc i576 %r638 to i32
-%r641 = getelementptr i32, i32* %r598, i32 10
-store i32 %r639, i32* %r641
-%r642 = lshr i576 %r638, 32
-%r643 = trunc i576 %r642 to i32
-%r645 = getelementptr i32, i32* %r598, i32 11
-store i32 %r643, i32* %r645
-%r646 = lshr i576 %r642, 32
-%r647 = trunc i576 %r646 to i32
-%r649 = getelementptr i32, i32* %r598, i32 12
-store i32 %r647, i32* %r649
-%r650 = lshr i576 %r646, 32
-%r651 = trunc i576 %r650 to i32
-%r653 = getelementptr i32, i32* %r598, i32 13
-store i32 %r651, i32* %r653
-%r654 = lshr i576 %r650, 32
-%r655 = trunc i576 %r654 to i32
-%r657 = getelementptr i32, i32* %r598, i32 14
-store i32 %r655, i32* %r657
-%r658 = lshr i576 %r654, 32
-%r659 = trunc i576 %r658 to i32
-%r661 = getelementptr i32, i32* %r598, i32 15
-store i32 %r659, i32* %r661
-%r662 = lshr i576 %r658, 32
-%r663 = trunc i576 %r662 to i32
-%r665 = getelementptr i32, i32* %r598, i32 16
-store i32 %r663, i32* %r665
-%r666 = lshr i576 %r662, 32
-%r667 = trunc i576 %r666 to i32
-%r669 = getelementptr i32, i32* %r598, i32 17
-store i32 %r667, i32* %r669
-ret void
-}
-define void @mcl_fpDbl_sqrPre12L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r4 = getelementptr i32, i32* %r2, i32 6
-%r6 = getelementptr i32, i32* %r2, i32 6
-%r8 = getelementptr i32, i32* %r1, i32 12
-call void @mcl_fpDbl_mulPre6L(i32* %r1, i32* %r2, i32* %r2)
-call void @mcl_fpDbl_mulPre6L(i32* %r8, i32* %r4, i32* %r6)
-%r9 = load i32, i32* %r4
-%r10 = zext i32 %r9 to i64
-%r12 = getelementptr i32, i32* %r4, i32 1
-%r13 = load i32, i32* %r12
-%r14 = zext i32 %r13 to i64
-%r15 = shl i64 %r14, 32
-%r16 = or i64 %r10, %r15
-%r17 = zext i64 %r16 to i96
-%r19 = getelementptr i32, i32* %r4, i32 2
-%r20 = load i32, i32* %r19
-%r21 = zext i32 %r20 to i96
-%r22 = shl i96 %r21, 64
-%r23 = or i96 %r17, %r22
-%r24 = zext i96 %r23 to i128
-%r26 = getelementptr i32, i32* %r4, i32 3
-%r27 = load i32, i32* %r26
-%r28 = zext i32 %r27 to i128
-%r29 = shl i128 %r28, 96
-%r30 = or i128 %r24, %r29
-%r31 = zext i128 %r30 to i160
-%r33 = getelementptr i32, i32* %r4, i32 4
-%r34 = load i32, i32* %r33
-%r35 = zext i32 %r34 to i160
-%r36 = shl i160 %r35, 128
-%r37 = or i160 %r31, %r36
-%r38 = zext i160 %r37 to i192
-%r40 = getelementptr i32, i32* %r4, i32 5
-%r41 = load i32, i32* %r40
-%r42 = zext i32 %r41 to i192
-%r43 = shl i192 %r42, 160
-%r44 = or i192 %r38, %r43
-%r45 = zext i192 %r44 to i224
-%r46 = load i32, i32* %r2
-%r47 = zext i32 %r46 to i64
-%r49 = getelementptr i32, i32* %r2, i32 1
-%r50 = load i32, i32* %r49
-%r51 = zext i32 %r50 to i64
-%r52 = shl i64 %r51, 32
-%r53 = or i64 %r47, %r52
-%r54 = zext i64 %r53 to i96
-%r56 = getelementptr i32, i32* %r2, i32 2
-%r57 = load i32, i32* %r56
-%r58 = zext i32 %r57 to i96
-%r59 = shl i96 %r58, 64
-%r60 = or i96 %r54, %r59
-%r61 = zext i96 %r60 to i128
-%r63 = getelementptr i32, i32* %r2, i32 3
-%r64 = load i32, i32* %r63
-%r65 = zext i32 %r64 to i128
-%r66 = shl i128 %r65, 96
-%r67 = or i128 %r61, %r66
-%r68 = zext i128 %r67 to i160
-%r70 = getelementptr i32, i32* %r2, i32 4
-%r71 = load i32, i32* %r70
-%r72 = zext i32 %r71 to i160
-%r73 = shl i160 %r72, 128
-%r74 = or i160 %r68, %r73
-%r75 = zext i160 %r74 to i192
-%r77 = getelementptr i32, i32* %r2, i32 5
-%r78 = load i32, i32* %r77
-%r79 = zext i32 %r78 to i192
-%r80 = shl i192 %r79, 160
-%r81 = or i192 %r75, %r80
-%r82 = zext i192 %r81 to i224
-%r83 = load i32, i32* %r6
-%r84 = zext i32 %r83 to i64
-%r86 = getelementptr i32, i32* %r6, i32 1
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i64
-%r89 = shl i64 %r88, 32
-%r90 = or i64 %r84, %r89
-%r91 = zext i64 %r90 to i96
-%r93 = getelementptr i32, i32* %r6, i32 2
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i96
-%r96 = shl i96 %r95, 64
-%r97 = or i96 %r91, %r96
-%r98 = zext i96 %r97 to i128
-%r100 = getelementptr i32, i32* %r6, i32 3
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i128
-%r103 = shl i128 %r102, 96
-%r104 = or i128 %r98, %r103
-%r105 = zext i128 %r104 to i160
-%r107 = getelementptr i32, i32* %r6, i32 4
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i160
-%r110 = shl i160 %r109, 128
-%r111 = or i160 %r105, %r110
-%r112 = zext i160 %r111 to i192
-%r114 = getelementptr i32, i32* %r6, i32 5
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i192
-%r117 = shl i192 %r116, 160
-%r118 = or i192 %r112, %r117
-%r119 = zext i192 %r118 to i224
-%r120 = load i32, i32* %r2
-%r121 = zext i32 %r120 to i64
-%r123 = getelementptr i32, i32* %r2, i32 1
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i64
-%r126 = shl i64 %r125, 32
-%r127 = or i64 %r121, %r126
-%r128 = zext i64 %r127 to i96
-%r130 = getelementptr i32, i32* %r2, i32 2
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i96
-%r133 = shl i96 %r132, 64
-%r134 = or i96 %r128, %r133
-%r135 = zext i96 %r134 to i128
-%r137 = getelementptr i32, i32* %r2, i32 3
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i128
-%r140 = shl i128 %r139, 96
-%r141 = or i128 %r135, %r140
-%r142 = zext i128 %r141 to i160
-%r144 = getelementptr i32, i32* %r2, i32 4
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i160
-%r147 = shl i160 %r146, 128
-%r148 = or i160 %r142, %r147
-%r149 = zext i160 %r148 to i192
-%r151 = getelementptr i32, i32* %r2, i32 5
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i192
-%r154 = shl i192 %r153, 160
-%r155 = or i192 %r149, %r154
-%r156 = zext i192 %r155 to i224
-%r157 = add i224 %r45, %r82
-%r158 = add i224 %r119, %r156
-%r160 = alloca i32, i32 12
-%r161 = trunc i224 %r157 to i192
-%r162 = trunc i224 %r158 to i192
-%r163 = lshr i224 %r157, 192
-%r164 = trunc i224 %r163 to i1
-%r165 = lshr i224 %r158, 192
-%r166 = trunc i224 %r165 to i1
-%r167 = and i1 %r164, %r166
-%r169 = select i1 %r164, i192 %r162, i192 0
-%r171 = select i1 %r166, i192 %r161, i192 0
-%r173 = alloca i32, i32 6
-%r175 = alloca i32, i32 6
-%r176 = trunc i192 %r161 to i32
-%r178 = getelementptr i32, i32* %r173, i32 0
-store i32 %r176, i32* %r178
-%r179 = lshr i192 %r161, 32
-%r180 = trunc i192 %r179 to i32
-%r182 = getelementptr i32, i32* %r173, i32 1
-store i32 %r180, i32* %r182
-%r183 = lshr i192 %r179, 32
-%r184 = trunc i192 %r183 to i32
-%r186 = getelementptr i32, i32* %r173, i32 2
-store i32 %r184, i32* %r186
-%r187 = lshr i192 %r183, 32
-%r188 = trunc i192 %r187 to i32
-%r190 = getelementptr i32, i32* %r173, i32 3
-store i32 %r188, i32* %r190
-%r191 = lshr i192 %r187, 32
-%r192 = trunc i192 %r191 to i32
-%r194 = getelementptr i32, i32* %r173, i32 4
-store i32 %r192, i32* %r194
-%r195 = lshr i192 %r191, 32
-%r196 = trunc i192 %r195 to i32
-%r198 = getelementptr i32, i32* %r173, i32 5
-store i32 %r196, i32* %r198
-%r199 = trunc i192 %r162 to i32
-%r201 = getelementptr i32, i32* %r175, i32 0
-store i32 %r199, i32* %r201
-%r202 = lshr i192 %r162, 32
-%r203 = trunc i192 %r202 to i32
-%r205 = getelementptr i32, i32* %r175, i32 1
-store i32 %r203, i32* %r205
-%r206 = lshr i192 %r202, 32
-%r207 = trunc i192 %r206 to i32
-%r209 = getelementptr i32, i32* %r175, i32 2
-store i32 %r207, i32* %r209
-%r210 = lshr i192 %r206, 32
-%r211 = trunc i192 %r210 to i32
-%r213 = getelementptr i32, i32* %r175, i32 3
-store i32 %r211, i32* %r213
-%r214 = lshr i192 %r210, 32
-%r215 = trunc i192 %r214 to i32
-%r217 = getelementptr i32, i32* %r175, i32 4
-store i32 %r215, i32* %r217
-%r218 = lshr i192 %r214, 32
-%r219 = trunc i192 %r218 to i32
-%r221 = getelementptr i32, i32* %r175, i32 5
-store i32 %r219, i32* %r221
-call void @mcl_fpDbl_mulPre6L(i32* %r160, i32* %r173, i32* %r175)
-%r222 = load i32, i32* %r160
-%r223 = zext i32 %r222 to i64
-%r225 = getelementptr i32, i32* %r160, i32 1
-%r226 = load i32, i32* %r225
-%r227 = zext i32 %r226 to i64
-%r228 = shl i64 %r227, 32
-%r229 = or i64 %r223, %r228
-%r230 = zext i64 %r229 to i96
-%r232 = getelementptr i32, i32* %r160, i32 2
-%r233 = load i32, i32* %r232
-%r234 = zext i32 %r233 to i96
-%r235 = shl i96 %r234, 64
-%r236 = or i96 %r230, %r235
-%r237 = zext i96 %r236 to i128
-%r239 = getelementptr i32, i32* %r160, i32 3
-%r240 = load i32, i32* %r239
-%r241 = zext i32 %r240 to i128
-%r242 = shl i128 %r241, 96
-%r243 = or i128 %r237, %r242
-%r244 = zext i128 %r243 to i160
-%r246 = getelementptr i32, i32* %r160, i32 4
-%r247 = load i32, i32* %r246
-%r248 = zext i32 %r247 to i160
-%r249 = shl i160 %r248, 128
-%r250 = or i160 %r244, %r249
-%r251 = zext i160 %r250 to i192
-%r253 = getelementptr i32, i32* %r160, i32 5
-%r254 = load i32, i32* %r253
-%r255 = zext i32 %r254 to i192
-%r256 = shl i192 %r255, 160
-%r257 = or i192 %r251, %r256
-%r258 = zext i192 %r257 to i224
-%r260 = getelementptr i32, i32* %r160, i32 6
-%r261 = load i32, i32* %r260
-%r262 = zext i32 %r261 to i224
-%r263 = shl i224 %r262, 192
-%r264 = or i224 %r258, %r263
-%r265 = zext i224 %r264 to i256
-%r267 = getelementptr i32, i32* %r160, i32 7
-%r268 = load i32, i32* %r267
-%r269 = zext i32 %r268 to i256
-%r270 = shl i256 %r269, 224
-%r271 = or i256 %r265, %r270
-%r272 = zext i256 %r271 to i288
-%r274 = getelementptr i32, i32* %r160, i32 8
-%r275 = load i32, i32* %r274
-%r276 = zext i32 %r275 to i288
-%r277 = shl i288 %r276, 256
-%r278 = or i288 %r272, %r277
-%r279 = zext i288 %r278 to i320
-%r281 = getelementptr i32, i32* %r160, i32 9
-%r282 = load i32, i32* %r281
-%r283 = zext i32 %r282 to i320
-%r284 = shl i320 %r283, 288
-%r285 = or i320 %r279, %r284
-%r286 = zext i320 %r285 to i352
-%r288 = getelementptr i32, i32* %r160, i32 10
-%r289 = load i32, i32* %r288
-%r290 = zext i32 %r289 to i352
-%r291 = shl i352 %r290, 320
-%r292 = or i352 %r286, %r291
-%r293 = zext i352 %r292 to i384
-%r295 = getelementptr i32, i32* %r160, i32 11
-%r296 = load i32, i32* %r295
-%r297 = zext i32 %r296 to i384
-%r298 = shl i384 %r297, 352
-%r299 = or i384 %r293, %r298
-%r300 = zext i384 %r299 to i416
-%r301 = zext i1 %r167 to i416
-%r302 = shl i416 %r301, 384
-%r303 = or i416 %r300, %r302
-%r304 = zext i192 %r169 to i416
-%r305 = zext i192 %r171 to i416
-%r306 = shl i416 %r304, 192
-%r307 = shl i416 %r305, 192
-%r308 = add i416 %r303, %r306
-%r309 = add i416 %r308, %r307
-%r310 = load i32, i32* %r1
-%r311 = zext i32 %r310 to i64
-%r313 = getelementptr i32, i32* %r1, i32 1
-%r314 = load i32, i32* %r313
-%r315 = zext i32 %r314 to i64
-%r316 = shl i64 %r315, 32
-%r317 = or i64 %r311, %r316
-%r318 = zext i64 %r317 to i96
-%r320 = getelementptr i32, i32* %r1, i32 2
-%r321 = load i32, i32* %r320
-%r322 = zext i32 %r321 to i96
-%r323 = shl i96 %r322, 64
-%r324 = or i96 %r318, %r323
-%r325 = zext i96 %r324 to i128
-%r327 = getelementptr i32, i32* %r1, i32 3
-%r328 = load i32, i32* %r327
-%r329 = zext i32 %r328 to i128
-%r330 = shl i128 %r329, 96
-%r331 = or i128 %r325, %r330
-%r332 = zext i128 %r331 to i160
-%r334 = getelementptr i32, i32* %r1, i32 4
-%r335 = load i32, i32* %r334
-%r336 = zext i32 %r335 to i160
-%r337 = shl i160 %r336, 128
-%r338 = or i160 %r332, %r337
-%r339 = zext i160 %r338 to i192
-%r341 = getelementptr i32, i32* %r1, i32 5
-%r342 = load i32, i32* %r341
-%r343 = zext i32 %r342 to i192
-%r344 = shl i192 %r343, 160
-%r345 = or i192 %r339, %r344
-%r346 = zext i192 %r345 to i224
-%r348 = getelementptr i32, i32* %r1, i32 6
-%r349 = load i32, i32* %r348
-%r350 = zext i32 %r349 to i224
-%r351 = shl i224 %r350, 192
-%r352 = or i224 %r346, %r351
-%r353 = zext i224 %r352 to i256
-%r355 = getelementptr i32, i32* %r1, i32 7
-%r356 = load i32, i32* %r355
-%r357 = zext i32 %r356 to i256
-%r358 = shl i256 %r357, 224
-%r359 = or i256 %r353, %r358
-%r360 = zext i256 %r359 to i288
-%r362 = getelementptr i32, i32* %r1, i32 8
-%r363 = load i32, i32* %r362
-%r364 = zext i32 %r363 to i288
-%r365 = shl i288 %r364, 256
-%r366 = or i288 %r360, %r365
-%r367 = zext i288 %r366 to i320
-%r369 = getelementptr i32, i32* %r1, i32 9
-%r370 = load i32, i32* %r369
-%r371 = zext i32 %r370 to i320
-%r372 = shl i320 %r371, 288
-%r373 = or i320 %r367, %r372
-%r374 = zext i320 %r373 to i352
-%r376 = getelementptr i32, i32* %r1, i32 10
-%r377 = load i32, i32* %r376
-%r378 = zext i32 %r377 to i352
-%r379 = shl i352 %r378, 320
-%r380 = or i352 %r374, %r379
-%r381 = zext i352 %r380 to i384
-%r383 = getelementptr i32, i32* %r1, i32 11
-%r384 = load i32, i32* %r383
-%r385 = zext i32 %r384 to i384
-%r386 = shl i384 %r385, 352
-%r387 = or i384 %r381, %r386
-%r388 = zext i384 %r387 to i416
-%r389 = sub i416 %r309, %r388
-%r391 = getelementptr i32, i32* %r1, i32 12
-%r392 = load i32, i32* %r391
-%r393 = zext i32 %r392 to i64
-%r395 = getelementptr i32, i32* %r391, i32 1
-%r396 = load i32, i32* %r395
-%r397 = zext i32 %r396 to i64
-%r398 = shl i64 %r397, 32
-%r399 = or i64 %r393, %r398
-%r400 = zext i64 %r399 to i96
-%r402 = getelementptr i32, i32* %r391, i32 2
-%r403 = load i32, i32* %r402
-%r404 = zext i32 %r403 to i96
-%r405 = shl i96 %r404, 64
-%r406 = or i96 %r400, %r405
-%r407 = zext i96 %r406 to i128
-%r409 = getelementptr i32, i32* %r391, i32 3
-%r410 = load i32, i32* %r409
-%r411 = zext i32 %r410 to i128
-%r412 = shl i128 %r411, 96
-%r413 = or i128 %r407, %r412
-%r414 = zext i128 %r413 to i160
-%r416 = getelementptr i32, i32* %r391, i32 4
-%r417 = load i32, i32* %r416
-%r418 = zext i32 %r417 to i160
-%r419 = shl i160 %r418, 128
-%r420 = or i160 %r414, %r419
-%r421 = zext i160 %r420 to i192
-%r423 = getelementptr i32, i32* %r391, i32 5
-%r424 = load i32, i32* %r423
-%r425 = zext i32 %r424 to i192
-%r426 = shl i192 %r425, 160
-%r427 = or i192 %r421, %r426
-%r428 = zext i192 %r427 to i224
-%r430 = getelementptr i32, i32* %r391, i32 6
-%r431 = load i32, i32* %r430
-%r432 = zext i32 %r431 to i224
-%r433 = shl i224 %r432, 192
-%r434 = or i224 %r428, %r433
-%r435 = zext i224 %r434 to i256
-%r437 = getelementptr i32, i32* %r391, i32 7
-%r438 = load i32, i32* %r437
-%r439 = zext i32 %r438 to i256
-%r440 = shl i256 %r439, 224
-%r441 = or i256 %r435, %r440
-%r442 = zext i256 %r441 to i288
-%r444 = getelementptr i32, i32* %r391, i32 8
-%r445 = load i32, i32* %r444
-%r446 = zext i32 %r445 to i288
-%r447 = shl i288 %r446, 256
-%r448 = or i288 %r442, %r447
-%r449 = zext i288 %r448 to i320
-%r451 = getelementptr i32, i32* %r391, i32 9
-%r452 = load i32, i32* %r451
-%r453 = zext i32 %r452 to i320
-%r454 = shl i320 %r453, 288
-%r455 = or i320 %r449, %r454
-%r456 = zext i320 %r455 to i352
-%r458 = getelementptr i32, i32* %r391, i32 10
-%r459 = load i32, i32* %r458
-%r460 = zext i32 %r459 to i352
-%r461 = shl i352 %r460, 320
-%r462 = or i352 %r456, %r461
-%r463 = zext i352 %r462 to i384
-%r465 = getelementptr i32, i32* %r391, i32 11
-%r466 = load i32, i32* %r465
-%r467 = zext i32 %r466 to i384
-%r468 = shl i384 %r467, 352
-%r469 = or i384 %r463, %r468
-%r470 = zext i384 %r469 to i416
-%r471 = sub i416 %r389, %r470
-%r472 = zext i416 %r471 to i576
-%r474 = getelementptr i32, i32* %r1, i32 6
-%r475 = load i32, i32* %r474
-%r476 = zext i32 %r475 to i64
-%r478 = getelementptr i32, i32* %r474, i32 1
-%r479 = load i32, i32* %r478
-%r480 = zext i32 %r479 to i64
-%r481 = shl i64 %r480, 32
-%r482 = or i64 %r476, %r481
-%r483 = zext i64 %r482 to i96
-%r485 = getelementptr i32, i32* %r474, i32 2
-%r486 = load i32, i32* %r485
-%r487 = zext i32 %r486 to i96
-%r488 = shl i96 %r487, 64
-%r489 = or i96 %r483, %r488
-%r490 = zext i96 %r489 to i128
-%r492 = getelementptr i32, i32* %r474, i32 3
-%r493 = load i32, i32* %r492
-%r494 = zext i32 %r493 to i128
-%r495 = shl i128 %r494, 96
-%r496 = or i128 %r490, %r495
-%r497 = zext i128 %r496 to i160
-%r499 = getelementptr i32, i32* %r474, i32 4
-%r500 = load i32, i32* %r499
-%r501 = zext i32 %r500 to i160
-%r502 = shl i160 %r501, 128
-%r503 = or i160 %r497, %r502
-%r504 = zext i160 %r503 to i192
-%r506 = getelementptr i32, i32* %r474, i32 5
-%r507 = load i32, i32* %r506
-%r508 = zext i32 %r507 to i192
-%r509 = shl i192 %r508, 160
-%r510 = or i192 %r504, %r509
-%r511 = zext i192 %r510 to i224
-%r513 = getelementptr i32, i32* %r474, i32 6
-%r514 = load i32, i32* %r513
-%r515 = zext i32 %r514 to i224
-%r516 = shl i224 %r515, 192
-%r517 = or i224 %r511, %r516
-%r518 = zext i224 %r517 to i256
-%r520 = getelementptr i32, i32* %r474, i32 7
-%r521 = load i32, i32* %r520
-%r522 = zext i32 %r521 to i256
-%r523 = shl i256 %r522, 224
-%r524 = or i256 %r518, %r523
-%r525 = zext i256 %r524 to i288
-%r527 = getelementptr i32, i32* %r474, i32 8
-%r528 = load i32, i32* %r527
-%r529 = zext i32 %r528 to i288
-%r530 = shl i288 %r529, 256
-%r531 = or i288 %r525, %r530
-%r532 = zext i288 %r531 to i320
-%r534 = getelementptr i32, i32* %r474, i32 9
-%r535 = load i32, i32* %r534
-%r536 = zext i32 %r535 to i320
-%r537 = shl i320 %r536, 288
-%r538 = or i320 %r532, %r537
-%r539 = zext i320 %r538 to i352
-%r541 = getelementptr i32, i32* %r474, i32 10
-%r542 = load i32, i32* %r541
-%r543 = zext i32 %r542 to i352
-%r544 = shl i352 %r543, 320
-%r545 = or i352 %r539, %r544
-%r546 = zext i352 %r545 to i384
-%r548 = getelementptr i32, i32* %r474, i32 11
-%r549 = load i32, i32* %r548
-%r550 = zext i32 %r549 to i384
-%r551 = shl i384 %r550, 352
-%r552 = or i384 %r546, %r551
-%r553 = zext i384 %r552 to i416
-%r555 = getelementptr i32, i32* %r474, i32 12
-%r556 = load i32, i32* %r555
-%r557 = zext i32 %r556 to i416
-%r558 = shl i416 %r557, 384
-%r559 = or i416 %r553, %r558
-%r560 = zext i416 %r559 to i448
-%r562 = getelementptr i32, i32* %r474, i32 13
-%r563 = load i32, i32* %r562
-%r564 = zext i32 %r563 to i448
-%r565 = shl i448 %r564, 416
-%r566 = or i448 %r560, %r565
-%r567 = zext i448 %r566 to i480
-%r569 = getelementptr i32, i32* %r474, i32 14
-%r570 = load i32, i32* %r569
-%r571 = zext i32 %r570 to i480
-%r572 = shl i480 %r571, 448
-%r573 = or i480 %r567, %r572
-%r574 = zext i480 %r573 to i512
-%r576 = getelementptr i32, i32* %r474, i32 15
-%r577 = load i32, i32* %r576
-%r578 = zext i32 %r577 to i512
-%r579 = shl i512 %r578, 480
-%r580 = or i512 %r574, %r579
-%r581 = zext i512 %r580 to i544
-%r583 = getelementptr i32, i32* %r474, i32 16
-%r584 = load i32, i32* %r583
-%r585 = zext i32 %r584 to i544
-%r586 = shl i544 %r585, 512
-%r587 = or i544 %r581, %r586
-%r588 = zext i544 %r587 to i576
-%r590 = getelementptr i32, i32* %r474, i32 17
-%r591 = load i32, i32* %r590
-%r592 = zext i32 %r591 to i576
-%r593 = shl i576 %r592, 544
-%r594 = or i576 %r588, %r593
-%r595 = add i576 %r472, %r594
-%r597 = getelementptr i32, i32* %r1, i32 6
-%r598 = trunc i576 %r595 to i32
-%r600 = getelementptr i32, i32* %r597, i32 0
-store i32 %r598, i32* %r600
-%r601 = lshr i576 %r595, 32
-%r602 = trunc i576 %r601 to i32
-%r604 = getelementptr i32, i32* %r597, i32 1
-store i32 %r602, i32* %r604
-%r605 = lshr i576 %r601, 32
-%r606 = trunc i576 %r605 to i32
-%r608 = getelementptr i32, i32* %r597, i32 2
-store i32 %r606, i32* %r608
-%r609 = lshr i576 %r605, 32
-%r610 = trunc i576 %r609 to i32
-%r612 = getelementptr i32, i32* %r597, i32 3
-store i32 %r610, i32* %r612
-%r613 = lshr i576 %r609, 32
-%r614 = trunc i576 %r613 to i32
-%r616 = getelementptr i32, i32* %r597, i32 4
-store i32 %r614, i32* %r616
-%r617 = lshr i576 %r613, 32
-%r618 = trunc i576 %r617 to i32
-%r620 = getelementptr i32, i32* %r597, i32 5
-store i32 %r618, i32* %r620
-%r621 = lshr i576 %r617, 32
-%r622 = trunc i576 %r621 to i32
-%r624 = getelementptr i32, i32* %r597, i32 6
-store i32 %r622, i32* %r624
-%r625 = lshr i576 %r621, 32
-%r626 = trunc i576 %r625 to i32
-%r628 = getelementptr i32, i32* %r597, i32 7
-store i32 %r626, i32* %r628
-%r629 = lshr i576 %r625, 32
-%r630 = trunc i576 %r629 to i32
-%r632 = getelementptr i32, i32* %r597, i32 8
-store i32 %r630, i32* %r632
-%r633 = lshr i576 %r629, 32
-%r634 = trunc i576 %r633 to i32
-%r636 = getelementptr i32, i32* %r597, i32 9
-store i32 %r634, i32* %r636
-%r637 = lshr i576 %r633, 32
-%r638 = trunc i576 %r637 to i32
-%r640 = getelementptr i32, i32* %r597, i32 10
-store i32 %r638, i32* %r640
-%r641 = lshr i576 %r637, 32
-%r642 = trunc i576 %r641 to i32
-%r644 = getelementptr i32, i32* %r597, i32 11
-store i32 %r642, i32* %r644
-%r645 = lshr i576 %r641, 32
-%r646 = trunc i576 %r645 to i32
-%r648 = getelementptr i32, i32* %r597, i32 12
-store i32 %r646, i32* %r648
-%r649 = lshr i576 %r645, 32
-%r650 = trunc i576 %r649 to i32
-%r652 = getelementptr i32, i32* %r597, i32 13
-store i32 %r650, i32* %r652
-%r653 = lshr i576 %r649, 32
-%r654 = trunc i576 %r653 to i32
-%r656 = getelementptr i32, i32* %r597, i32 14
-store i32 %r654, i32* %r656
-%r657 = lshr i576 %r653, 32
-%r658 = trunc i576 %r657 to i32
-%r660 = getelementptr i32, i32* %r597, i32 15
-store i32 %r658, i32* %r660
-%r661 = lshr i576 %r657, 32
-%r662 = trunc i576 %r661 to i32
-%r664 = getelementptr i32, i32* %r597, i32 16
-store i32 %r662, i32* %r664
-%r665 = lshr i576 %r661, 32
-%r666 = trunc i576 %r665 to i32
-%r668 = getelementptr i32, i32* %r597, i32 17
-store i32 %r666, i32* %r668
-ret void
-}
-define void @mcl_fp_mont12L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i416 @mulPv384x32(i32* %r2, i32 %r10)
-%r12 = zext i416 %r11 to i448
-%r13 = trunc i416 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i416 @mulPv384x32(i32* %r4, i32 %r14)
-%r16 = zext i416 %r15 to i448
-%r17 = add i448 %r12, %r16
-%r18 = lshr i448 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i416 @mulPv384x32(i32* %r2, i32 %r21)
-%r23 = zext i416 %r22 to i448
-%r24 = add i448 %r18, %r23
-%r25 = trunc i448 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i416 @mulPv384x32(i32* %r4, i32 %r26)
-%r28 = zext i416 %r27 to i448
-%r29 = add i448 %r24, %r28
-%r30 = lshr i448 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i416 @mulPv384x32(i32* %r2, i32 %r33)
-%r35 = zext i416 %r34 to i448
-%r36 = add i448 %r30, %r35
-%r37 = trunc i448 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i416 @mulPv384x32(i32* %r4, i32 %r38)
-%r40 = zext i416 %r39 to i448
-%r41 = add i448 %r36, %r40
-%r42 = lshr i448 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i416 @mulPv384x32(i32* %r2, i32 %r45)
-%r47 = zext i416 %r46 to i448
-%r48 = add i448 %r42, %r47
-%r49 = trunc i448 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i416 @mulPv384x32(i32* %r4, i32 %r50)
-%r52 = zext i416 %r51 to i448
-%r53 = add i448 %r48, %r52
-%r54 = lshr i448 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i416 @mulPv384x32(i32* %r2, i32 %r57)
-%r59 = zext i416 %r58 to i448
-%r60 = add i448 %r54, %r59
-%r61 = trunc i448 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i416 @mulPv384x32(i32* %r4, i32 %r62)
-%r64 = zext i416 %r63 to i448
-%r65 = add i448 %r60, %r64
-%r66 = lshr i448 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i416 @mulPv384x32(i32* %r2, i32 %r69)
-%r71 = zext i416 %r70 to i448
-%r72 = add i448 %r66, %r71
-%r73 = trunc i448 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i416 @mulPv384x32(i32* %r4, i32 %r74)
-%r76 = zext i416 %r75 to i448
-%r77 = add i448 %r72, %r76
-%r78 = lshr i448 %r77, 32
-%r80 = getelementptr i32, i32* %r3, i32 6
-%r81 = load i32, i32* %r80
-%r82 = call i416 @mulPv384x32(i32* %r2, i32 %r81)
-%r83 = zext i416 %r82 to i448
-%r84 = add i448 %r78, %r83
-%r85 = trunc i448 %r84 to i32
-%r86 = mul i32 %r85, %r7
-%r87 = call i416 @mulPv384x32(i32* %r4, i32 %r86)
-%r88 = zext i416 %r87 to i448
-%r89 = add i448 %r84, %r88
-%r90 = lshr i448 %r89, 32
-%r92 = getelementptr i32, i32* %r3, i32 7
-%r93 = load i32, i32* %r92
-%r94 = call i416 @mulPv384x32(i32* %r2, i32 %r93)
-%r95 = zext i416 %r94 to i448
-%r96 = add i448 %r90, %r95
-%r97 = trunc i448 %r96 to i32
-%r98 = mul i32 %r97, %r7
-%r99 = call i416 @mulPv384x32(i32* %r4, i32 %r98)
-%r100 = zext i416 %r99 to i448
-%r101 = add i448 %r96, %r100
-%r102 = lshr i448 %r101, 32
-%r104 = getelementptr i32, i32* %r3, i32 8
-%r105 = load i32, i32* %r104
-%r106 = call i416 @mulPv384x32(i32* %r2, i32 %r105)
-%r107 = zext i416 %r106 to i448
-%r108 = add i448 %r102, %r107
-%r109 = trunc i448 %r108 to i32
-%r110 = mul i32 %r109, %r7
-%r111 = call i416 @mulPv384x32(i32* %r4, i32 %r110)
-%r112 = zext i416 %r111 to i448
-%r113 = add i448 %r108, %r112
-%r114 = lshr i448 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 9
-%r117 = load i32, i32* %r116
-%r118 = call i416 @mulPv384x32(i32* %r2, i32 %r117)
-%r119 = zext i416 %r118 to i448
-%r120 = add i448 %r114, %r119
-%r121 = trunc i448 %r120 to i32
-%r122 = mul i32 %r121, %r7
-%r123 = call i416 @mulPv384x32(i32* %r4, i32 %r122)
-%r124 = zext i416 %r123 to i448
-%r125 = add i448 %r120, %r124
-%r126 = lshr i448 %r125, 32
-%r128 = getelementptr i32, i32* %r3, i32 10
-%r129 = load i32, i32* %r128
-%r130 = call i416 @mulPv384x32(i32* %r2, i32 %r129)
-%r131 = zext i416 %r130 to i448
-%r132 = add i448 %r126, %r131
-%r133 = trunc i448 %r132 to i32
-%r134 = mul i32 %r133, %r7
-%r135 = call i416 @mulPv384x32(i32* %r4, i32 %r134)
-%r136 = zext i416 %r135 to i448
-%r137 = add i448 %r132, %r136
-%r138 = lshr i448 %r137, 32
-%r140 = getelementptr i32, i32* %r3, i32 11
-%r141 = load i32, i32* %r140
-%r142 = call i416 @mulPv384x32(i32* %r2, i32 %r141)
-%r143 = zext i416 %r142 to i448
-%r144 = add i448 %r138, %r143
-%r145 = trunc i448 %r144 to i32
-%r146 = mul i32 %r145, %r7
-%r147 = call i416 @mulPv384x32(i32* %r4, i32 %r146)
-%r148 = zext i416 %r147 to i448
-%r149 = add i448 %r144, %r148
-%r150 = lshr i448 %r149, 32
-%r151 = trunc i448 %r150 to i416
-%r152 = load i32, i32* %r4
-%r153 = zext i32 %r152 to i64
-%r155 = getelementptr i32, i32* %r4, i32 1
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i64
-%r158 = shl i64 %r157, 32
-%r159 = or i64 %r153, %r158
-%r160 = zext i64 %r159 to i96
-%r162 = getelementptr i32, i32* %r4, i32 2
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i96
-%r165 = shl i96 %r164, 64
-%r166 = or i96 %r160, %r165
-%r167 = zext i96 %r166 to i128
-%r169 = getelementptr i32, i32* %r4, i32 3
-%r170 = load i32, i32* %r169
-%r171 = zext i32 %r170 to i128
-%r172 = shl i128 %r171, 96
-%r173 = or i128 %r167, %r172
-%r174 = zext i128 %r173 to i160
-%r176 = getelementptr i32, i32* %r4, i32 4
-%r177 = load i32, i32* %r176
-%r178 = zext i32 %r177 to i160
-%r179 = shl i160 %r178, 128
-%r180 = or i160 %r174, %r179
-%r181 = zext i160 %r180 to i192
-%r183 = getelementptr i32, i32* %r4, i32 5
-%r184 = load i32, i32* %r183
-%r185 = zext i32 %r184 to i192
-%r186 = shl i192 %r185, 160
-%r187 = or i192 %r181, %r186
-%r188 = zext i192 %r187 to i224
-%r190 = getelementptr i32, i32* %r4, i32 6
-%r191 = load i32, i32* %r190
-%r192 = zext i32 %r191 to i224
-%r193 = shl i224 %r192, 192
-%r194 = or i224 %r188, %r193
-%r195 = zext i224 %r194 to i256
-%r197 = getelementptr i32, i32* %r4, i32 7
-%r198 = load i32, i32* %r197
-%r199 = zext i32 %r198 to i256
-%r200 = shl i256 %r199, 224
-%r201 = or i256 %r195, %r200
-%r202 = zext i256 %r201 to i288
-%r204 = getelementptr i32, i32* %r4, i32 8
-%r205 = load i32, i32* %r204
-%r206 = zext i32 %r205 to i288
-%r207 = shl i288 %r206, 256
-%r208 = or i288 %r202, %r207
-%r209 = zext i288 %r208 to i320
-%r211 = getelementptr i32, i32* %r4, i32 9
-%r212 = load i32, i32* %r211
-%r213 = zext i32 %r212 to i320
-%r214 = shl i320 %r213, 288
-%r215 = or i320 %r209, %r214
-%r216 = zext i320 %r215 to i352
-%r218 = getelementptr i32, i32* %r4, i32 10
-%r219 = load i32, i32* %r218
-%r220 = zext i32 %r219 to i352
-%r221 = shl i352 %r220, 320
-%r222 = or i352 %r216, %r221
-%r223 = zext i352 %r222 to i384
-%r225 = getelementptr i32, i32* %r4, i32 11
-%r226 = load i32, i32* %r225
-%r227 = zext i32 %r226 to i384
-%r228 = shl i384 %r227, 352
-%r229 = or i384 %r223, %r228
-%r230 = zext i384 %r229 to i416
-%r231 = sub i416 %r151, %r230
-%r232 = lshr i416 %r231, 384
-%r233 = trunc i416 %r232 to i1
-%r234 = select i1 %r233, i416 %r151, i416 %r231
-%r235 = trunc i416 %r234 to i384
-%r236 = trunc i384 %r235 to i32
-%r238 = getelementptr i32, i32* %r1, i32 0
-store i32 %r236, i32* %r238
-%r239 = lshr i384 %r235, 32
-%r240 = trunc i384 %r239 to i32
-%r242 = getelementptr i32, i32* %r1, i32 1
-store i32 %r240, i32* %r242
-%r243 = lshr i384 %r239, 32
-%r244 = trunc i384 %r243 to i32
-%r246 = getelementptr i32, i32* %r1, i32 2
-store i32 %r244, i32* %r246
-%r247 = lshr i384 %r243, 32
-%r248 = trunc i384 %r247 to i32
-%r250 = getelementptr i32, i32* %r1, i32 3
-store i32 %r248, i32* %r250
-%r251 = lshr i384 %r247, 32
-%r252 = trunc i384 %r251 to i32
-%r254 = getelementptr i32, i32* %r1, i32 4
-store i32 %r252, i32* %r254
-%r255 = lshr i384 %r251, 32
-%r256 = trunc i384 %r255 to i32
-%r258 = getelementptr i32, i32* %r1, i32 5
-store i32 %r256, i32* %r258
-%r259 = lshr i384 %r255, 32
-%r260 = trunc i384 %r259 to i32
-%r262 = getelementptr i32, i32* %r1, i32 6
-store i32 %r260, i32* %r262
-%r263 = lshr i384 %r259, 32
-%r264 = trunc i384 %r263 to i32
-%r266 = getelementptr i32, i32* %r1, i32 7
-store i32 %r264, i32* %r266
-%r267 = lshr i384 %r263, 32
-%r268 = trunc i384 %r267 to i32
-%r270 = getelementptr i32, i32* %r1, i32 8
-store i32 %r268, i32* %r270
-%r271 = lshr i384 %r267, 32
-%r272 = trunc i384 %r271 to i32
-%r274 = getelementptr i32, i32* %r1, i32 9
-store i32 %r272, i32* %r274
-%r275 = lshr i384 %r271, 32
-%r276 = trunc i384 %r275 to i32
-%r278 = getelementptr i32, i32* %r1, i32 10
-store i32 %r276, i32* %r278
-%r279 = lshr i384 %r275, 32
-%r280 = trunc i384 %r279 to i32
-%r282 = getelementptr i32, i32* %r1, i32 11
-store i32 %r280, i32* %r282
-ret void
-}
-define void @mcl_fp_montNF12L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i416 @mulPv384x32(i32* %r2, i32 %r8)
-%r10 = trunc i416 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i416 @mulPv384x32(i32* %r4, i32 %r11)
-%r13 = add i416 %r9, %r12
-%r14 = lshr i416 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i416 @mulPv384x32(i32* %r2, i32 %r17)
-%r19 = add i416 %r14, %r18
-%r20 = trunc i416 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i416 @mulPv384x32(i32* %r4, i32 %r21)
-%r23 = add i416 %r19, %r22
-%r24 = lshr i416 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i416 @mulPv384x32(i32* %r2, i32 %r27)
-%r29 = add i416 %r24, %r28
-%r30 = trunc i416 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i416 @mulPv384x32(i32* %r4, i32 %r31)
-%r33 = add i416 %r29, %r32
-%r34 = lshr i416 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i416 @mulPv384x32(i32* %r2, i32 %r37)
-%r39 = add i416 %r34, %r38
-%r40 = trunc i416 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i416 @mulPv384x32(i32* %r4, i32 %r41)
-%r43 = add i416 %r39, %r42
-%r44 = lshr i416 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i416 @mulPv384x32(i32* %r2, i32 %r47)
-%r49 = add i416 %r44, %r48
-%r50 = trunc i416 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i416 @mulPv384x32(i32* %r4, i32 %r51)
-%r53 = add i416 %r49, %r52
-%r54 = lshr i416 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i416 @mulPv384x32(i32* %r2, i32 %r57)
-%r59 = add i416 %r54, %r58
-%r60 = trunc i416 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i416 @mulPv384x32(i32* %r4, i32 %r61)
-%r63 = add i416 %r59, %r62
-%r64 = lshr i416 %r63, 32
-%r66 = getelementptr i32, i32* %r3, i32 6
-%r67 = load i32, i32* %r66
-%r68 = call i416 @mulPv384x32(i32* %r2, i32 %r67)
-%r69 = add i416 %r64, %r68
-%r70 = trunc i416 %r69 to i32
-%r71 = mul i32 %r70, %r7
-%r72 = call i416 @mulPv384x32(i32* %r4, i32 %r71)
-%r73 = add i416 %r69, %r72
-%r74 = lshr i416 %r73, 32
-%r76 = getelementptr i32, i32* %r3, i32 7
-%r77 = load i32, i32* %r76
-%r78 = call i416 @mulPv384x32(i32* %r2, i32 %r77)
-%r79 = add i416 %r74, %r78
-%r80 = trunc i416 %r79 to i32
-%r81 = mul i32 %r80, %r7
-%r82 = call i416 @mulPv384x32(i32* %r4, i32 %r81)
-%r83 = add i416 %r79, %r82
-%r84 = lshr i416 %r83, 32
-%r86 = getelementptr i32, i32* %r3, i32 8
-%r87 = load i32, i32* %r86
-%r88 = call i416 @mulPv384x32(i32* %r2, i32 %r87)
-%r89 = add i416 %r84, %r88
-%r90 = trunc i416 %r89 to i32
-%r91 = mul i32 %r90, %r7
-%r92 = call i416 @mulPv384x32(i32* %r4, i32 %r91)
-%r93 = add i416 %r89, %r92
-%r94 = lshr i416 %r93, 32
-%r96 = getelementptr i32, i32* %r3, i32 9
-%r97 = load i32, i32* %r96
-%r98 = call i416 @mulPv384x32(i32* %r2, i32 %r97)
-%r99 = add i416 %r94, %r98
-%r100 = trunc i416 %r99 to i32
-%r101 = mul i32 %r100, %r7
-%r102 = call i416 @mulPv384x32(i32* %r4, i32 %r101)
-%r103 = add i416 %r99, %r102
-%r104 = lshr i416 %r103, 32
-%r106 = getelementptr i32, i32* %r3, i32 10
-%r107 = load i32, i32* %r106
-%r108 = call i416 @mulPv384x32(i32* %r2, i32 %r107)
-%r109 = add i416 %r104, %r108
-%r110 = trunc i416 %r109 to i32
-%r111 = mul i32 %r110, %r7
-%r112 = call i416 @mulPv384x32(i32* %r4, i32 %r111)
-%r113 = add i416 %r109, %r112
-%r114 = lshr i416 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 11
-%r117 = load i32, i32* %r116
-%r118 = call i416 @mulPv384x32(i32* %r2, i32 %r117)
-%r119 = add i416 %r114, %r118
-%r120 = trunc i416 %r119 to i32
-%r121 = mul i32 %r120, %r7
-%r122 = call i416 @mulPv384x32(i32* %r4, i32 %r121)
-%r123 = add i416 %r119, %r122
-%r124 = lshr i416 %r123, 32
-%r125 = trunc i416 %r124 to i384
-%r126 = load i32, i32* %r4
-%r127 = zext i32 %r126 to i64
-%r129 = getelementptr i32, i32* %r4, i32 1
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i64
-%r132 = shl i64 %r131, 32
-%r133 = or i64 %r127, %r132
-%r134 = zext i64 %r133 to i96
-%r136 = getelementptr i32, i32* %r4, i32 2
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i96
-%r139 = shl i96 %r138, 64
-%r140 = or i96 %r134, %r139
-%r141 = zext i96 %r140 to i128
-%r143 = getelementptr i32, i32* %r4, i32 3
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i128
-%r146 = shl i128 %r145, 96
-%r147 = or i128 %r141, %r146
-%r148 = zext i128 %r147 to i160
-%r150 = getelementptr i32, i32* %r4, i32 4
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i160
-%r153 = shl i160 %r152, 128
-%r154 = or i160 %r148, %r153
-%r155 = zext i160 %r154 to i192
-%r157 = getelementptr i32, i32* %r4, i32 5
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i192
-%r160 = shl i192 %r159, 160
-%r161 = or i192 %r155, %r160
-%r162 = zext i192 %r161 to i224
-%r164 = getelementptr i32, i32* %r4, i32 6
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i224
-%r167 = shl i224 %r166, 192
-%r168 = or i224 %r162, %r167
-%r169 = zext i224 %r168 to i256
-%r171 = getelementptr i32, i32* %r4, i32 7
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i256
-%r174 = shl i256 %r173, 224
-%r175 = or i256 %r169, %r174
-%r176 = zext i256 %r175 to i288
-%r178 = getelementptr i32, i32* %r4, i32 8
-%r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i288
-%r181 = shl i288 %r180, 256
-%r182 = or i288 %r176, %r181
-%r183 = zext i288 %r182 to i320
-%r185 = getelementptr i32, i32* %r4, i32 9
-%r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i320
-%r188 = shl i320 %r187, 288
-%r189 = or i320 %r183, %r188
-%r190 = zext i320 %r189 to i352
-%r192 = getelementptr i32, i32* %r4, i32 10
-%r193 = load i32, i32* %r192
-%r194 = zext i32 %r193 to i352
-%r195 = shl i352 %r194, 320
-%r196 = or i352 %r190, %r195
-%r197 = zext i352 %r196 to i384
-%r199 = getelementptr i32, i32* %r4, i32 11
-%r200 = load i32, i32* %r199
-%r201 = zext i32 %r200 to i384
-%r202 = shl i384 %r201, 352
-%r203 = or i384 %r197, %r202
-%r204 = sub i384 %r125, %r203
-%r205 = lshr i384 %r204, 383
-%r206 = trunc i384 %r205 to i1
-%r207 = select i1 %r206, i384 %r125, i384 %r204
-%r208 = trunc i384 %r207 to i32
-%r210 = getelementptr i32, i32* %r1, i32 0
-store i32 %r208, i32* %r210
-%r211 = lshr i384 %r207, 32
-%r212 = trunc i384 %r211 to i32
-%r214 = getelementptr i32, i32* %r1, i32 1
-store i32 %r212, i32* %r214
-%r215 = lshr i384 %r211, 32
-%r216 = trunc i384 %r215 to i32
-%r218 = getelementptr i32, i32* %r1, i32 2
-store i32 %r216, i32* %r218
-%r219 = lshr i384 %r215, 32
-%r220 = trunc i384 %r219 to i32
-%r222 = getelementptr i32, i32* %r1, i32 3
-store i32 %r220, i32* %r222
-%r223 = lshr i384 %r219, 32
-%r224 = trunc i384 %r223 to i32
-%r226 = getelementptr i32, i32* %r1, i32 4
-store i32 %r224, i32* %r226
-%r227 = lshr i384 %r223, 32
-%r228 = trunc i384 %r227 to i32
-%r230 = getelementptr i32, i32* %r1, i32 5
-store i32 %r228, i32* %r230
-%r231 = lshr i384 %r227, 32
-%r232 = trunc i384 %r231 to i32
-%r234 = getelementptr i32, i32* %r1, i32 6
-store i32 %r232, i32* %r234
-%r235 = lshr i384 %r231, 32
-%r236 = trunc i384 %r235 to i32
-%r238 = getelementptr i32, i32* %r1, i32 7
-store i32 %r236, i32* %r238
-%r239 = lshr i384 %r235, 32
-%r240 = trunc i384 %r239 to i32
-%r242 = getelementptr i32, i32* %r1, i32 8
-store i32 %r240, i32* %r242
-%r243 = lshr i384 %r239, 32
-%r244 = trunc i384 %r243 to i32
-%r246 = getelementptr i32, i32* %r1, i32 9
-store i32 %r244, i32* %r246
-%r247 = lshr i384 %r243, 32
-%r248 = trunc i384 %r247 to i32
-%r250 = getelementptr i32, i32* %r1, i32 10
-store i32 %r248, i32* %r250
-%r251 = lshr i384 %r247, 32
-%r252 = trunc i384 %r251 to i32
-%r254 = getelementptr i32, i32* %r1, i32 11
-store i32 %r252, i32* %r254
-ret void
-}
-define void @mcl_fp_montRed12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i224
-%r45 = getelementptr i32, i32* %r3, i32 6
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i224
-%r48 = shl i224 %r47, 192
-%r49 = or i224 %r43, %r48
-%r50 = zext i224 %r49 to i256
-%r52 = getelementptr i32, i32* %r3, i32 7
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i256
-%r55 = shl i256 %r54, 224
-%r56 = or i256 %r50, %r55
-%r57 = zext i256 %r56 to i288
-%r59 = getelementptr i32, i32* %r3, i32 8
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i288
-%r62 = shl i288 %r61, 256
-%r63 = or i288 %r57, %r62
-%r64 = zext i288 %r63 to i320
-%r66 = getelementptr i32, i32* %r3, i32 9
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i320
-%r69 = shl i320 %r68, 288
-%r70 = or i320 %r64, %r69
-%r71 = zext i320 %r70 to i352
-%r73 = getelementptr i32, i32* %r3, i32 10
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i352
-%r76 = shl i352 %r75, 320
-%r77 = or i352 %r71, %r76
-%r78 = zext i352 %r77 to i384
-%r80 = getelementptr i32, i32* %r3, i32 11
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i384
-%r83 = shl i384 %r82, 352
-%r84 = or i384 %r78, %r83
-%r85 = load i32, i32* %r2
-%r86 = zext i32 %r85 to i64
-%r88 = getelementptr i32, i32* %r2, i32 1
-%r89 = load i32, i32* %r88
-%r90 = zext i32 %r89 to i64
-%r91 = shl i64 %r90, 32
-%r92 = or i64 %r86, %r91
-%r93 = zext i64 %r92 to i96
-%r95 = getelementptr i32, i32* %r2, i32 2
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i96
-%r98 = shl i96 %r97, 64
-%r99 = or i96 %r93, %r98
-%r100 = zext i96 %r99 to i128
-%r102 = getelementptr i32, i32* %r2, i32 3
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i128
-%r105 = shl i128 %r104, 96
-%r106 = or i128 %r100, %r105
-%r107 = zext i128 %r106 to i160
-%r109 = getelementptr i32, i32* %r2, i32 4
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i160
-%r112 = shl i160 %r111, 128
-%r113 = or i160 %r107, %r112
-%r114 = zext i160 %r113 to i192
-%r116 = getelementptr i32, i32* %r2, i32 5
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i192
-%r119 = shl i192 %r118, 160
-%r120 = or i192 %r114, %r119
-%r121 = zext i192 %r120 to i224
-%r123 = getelementptr i32, i32* %r2, i32 6
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i224
-%r126 = shl i224 %r125, 192
-%r127 = or i224 %r121, %r126
-%r128 = zext i224 %r127 to i256
-%r130 = getelementptr i32, i32* %r2, i32 7
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i256
-%r133 = shl i256 %r132, 224
-%r134 = or i256 %r128, %r133
-%r135 = zext i256 %r134 to i288
-%r137 = getelementptr i32, i32* %r2, i32 8
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i288
-%r140 = shl i288 %r139, 256
-%r141 = or i288 %r135, %r140
-%r142 = zext i288 %r141 to i320
-%r144 = getelementptr i32, i32* %r2, i32 9
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i320
-%r147 = shl i320 %r146, 288
-%r148 = or i320 %r142, %r147
-%r149 = zext i320 %r148 to i352
-%r151 = getelementptr i32, i32* %r2, i32 10
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i352
-%r154 = shl i352 %r153, 320
-%r155 = or i352 %r149, %r154
-%r156 = zext i352 %r155 to i384
-%r158 = getelementptr i32, i32* %r2, i32 11
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i384
-%r161 = shl i384 %r160, 352
-%r162 = or i384 %r156, %r161
-%r163 = zext i384 %r162 to i416
-%r165 = getelementptr i32, i32* %r2, i32 12
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i416
-%r168 = shl i416 %r167, 384
-%r169 = or i416 %r163, %r168
-%r170 = zext i416 %r169 to i448
-%r172 = getelementptr i32, i32* %r2, i32 13
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i448
-%r175 = shl i448 %r174, 416
-%r176 = or i448 %r170, %r175
-%r177 = zext i448 %r176 to i480
-%r179 = getelementptr i32, i32* %r2, i32 14
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i480
-%r182 = shl i480 %r181, 448
-%r183 = or i480 %r177, %r182
-%r184 = zext i480 %r183 to i512
-%r186 = getelementptr i32, i32* %r2, i32 15
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i512
-%r189 = shl i512 %r188, 480
-%r190 = or i512 %r184, %r189
-%r191 = zext i512 %r190 to i544
-%r193 = getelementptr i32, i32* %r2, i32 16
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i544
-%r196 = shl i544 %r195, 512
-%r197 = or i544 %r191, %r196
-%r198 = zext i544 %r197 to i576
-%r200 = getelementptr i32, i32* %r2, i32 17
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i576
-%r203 = shl i576 %r202, 544
-%r204 = or i576 %r198, %r203
-%r205 = zext i576 %r204 to i608
-%r207 = getelementptr i32, i32* %r2, i32 18
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i608
-%r210 = shl i608 %r209, 576
-%r211 = or i608 %r205, %r210
-%r212 = zext i608 %r211 to i640
-%r214 = getelementptr i32, i32* %r2, i32 19
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i640
-%r217 = shl i640 %r216, 608
-%r218 = or i640 %r212, %r217
-%r219 = zext i640 %r218 to i672
-%r221 = getelementptr i32, i32* %r2, i32 20
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i672
-%r224 = shl i672 %r223, 640
-%r225 = or i672 %r219, %r224
-%r226 = zext i672 %r225 to i704
-%r228 = getelementptr i32, i32* %r2, i32 21
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i704
-%r231 = shl i704 %r230, 672
-%r232 = or i704 %r226, %r231
-%r233 = zext i704 %r232 to i736
-%r235 = getelementptr i32, i32* %r2, i32 22
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i736
-%r238 = shl i736 %r237, 704
-%r239 = or i736 %r233, %r238
-%r240 = zext i736 %r239 to i768
-%r242 = getelementptr i32, i32* %r2, i32 23
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i768
-%r245 = shl i768 %r244, 736
-%r246 = or i768 %r240, %r245
-%r247 = zext i768 %r246 to i800
-%r248 = trunc i800 %r247 to i32
-%r249 = mul i32 %r248, %r6
-%r250 = call i416 @mulPv384x32(i32* %r3, i32 %r249)
-%r251 = zext i416 %r250 to i800
-%r252 = add i800 %r247, %r251
-%r253 = lshr i800 %r252, 32
-%r254 = trunc i800 %r253 to i768
-%r255 = trunc i768 %r254 to i32
-%r256 = mul i32 %r255, %r6
-%r257 = call i416 @mulPv384x32(i32* %r3, i32 %r256)
-%r258 = zext i416 %r257 to i768
-%r259 = add i768 %r254, %r258
-%r260 = lshr i768 %r259, 32
-%r261 = trunc i768 %r260 to i736
-%r262 = trunc i736 %r261 to i32
-%r263 = mul i32 %r262, %r6
-%r264 = call i416 @mulPv384x32(i32* %r3, i32 %r263)
-%r265 = zext i416 %r264 to i736
-%r266 = add i736 %r261, %r265
-%r267 = lshr i736 %r266, 32
-%r268 = trunc i736 %r267 to i704
-%r269 = trunc i704 %r268 to i32
-%r270 = mul i32 %r269, %r6
-%r271 = call i416 @mulPv384x32(i32* %r3, i32 %r270)
-%r272 = zext i416 %r271 to i704
-%r273 = add i704 %r268, %r272
-%r274 = lshr i704 %r273, 32
-%r275 = trunc i704 %r274 to i672
-%r276 = trunc i672 %r275 to i32
-%r277 = mul i32 %r276, %r6
-%r278 = call i416 @mulPv384x32(i32* %r3, i32 %r277)
-%r279 = zext i416 %r278 to i672
-%r280 = add i672 %r275, %r279
-%r281 = lshr i672 %r280, 32
-%r282 = trunc i672 %r281 to i640
-%r283 = trunc i640 %r282 to i32
-%r284 = mul i32 %r283, %r6
-%r285 = call i416 @mulPv384x32(i32* %r3, i32 %r284)
-%r286 = zext i416 %r285 to i640
-%r287 = add i640 %r282, %r286
-%r288 = lshr i640 %r287, 32
-%r289 = trunc i640 %r288 to i608
-%r290 = trunc i608 %r289 to i32
-%r291 = mul i32 %r290, %r6
-%r292 = call i416 @mulPv384x32(i32* %r3, i32 %r291)
-%r293 = zext i416 %r292 to i608
-%r294 = add i608 %r289, %r293
-%r295 = lshr i608 %r294, 32
-%r296 = trunc i608 %r295 to i576
-%r297 = trunc i576 %r296 to i32
-%r298 = mul i32 %r297, %r6
-%r299 = call i416 @mulPv384x32(i32* %r3, i32 %r298)
-%r300 = zext i416 %r299 to i576
-%r301 = add i576 %r296, %r300
-%r302 = lshr i576 %r301, 32
-%r303 = trunc i576 %r302 to i544
-%r304 = trunc i544 %r303 to i32
-%r305 = mul i32 %r304, %r6
-%r306 = call i416 @mulPv384x32(i32* %r3, i32 %r305)
-%r307 = zext i416 %r306 to i544
-%r308 = add i544 %r303, %r307
-%r309 = lshr i544 %r308, 32
-%r310 = trunc i544 %r309 to i512
-%r311 = trunc i512 %r310 to i32
-%r312 = mul i32 %r311, %r6
-%r313 = call i416 @mulPv384x32(i32* %r3, i32 %r312)
-%r314 = zext i416 %r313 to i512
-%r315 = add i512 %r310, %r314
-%r316 = lshr i512 %r315, 32
-%r317 = trunc i512 %r316 to i480
-%r318 = trunc i480 %r317 to i32
-%r319 = mul i32 %r318, %r6
-%r320 = call i416 @mulPv384x32(i32* %r3, i32 %r319)
-%r321 = zext i416 %r320 to i480
-%r322 = add i480 %r317, %r321
-%r323 = lshr i480 %r322, 32
-%r324 = trunc i480 %r323 to i448
-%r325 = trunc i448 %r324 to i32
-%r326 = mul i32 %r325, %r6
-%r327 = call i416 @mulPv384x32(i32* %r3, i32 %r326)
-%r328 = zext i416 %r327 to i448
-%r329 = add i448 %r324, %r328
-%r330 = lshr i448 %r329, 32
-%r331 = trunc i448 %r330 to i416
-%r332 = zext i384 %r84 to i416
-%r333 = sub i416 %r331, %r332
-%r334 = lshr i416 %r333, 384
-%r335 = trunc i416 %r334 to i1
-%r336 = select i1 %r335, i416 %r331, i416 %r333
-%r337 = trunc i416 %r336 to i384
-%r338 = trunc i384 %r337 to i32
-%r340 = getelementptr i32, i32* %r1, i32 0
-store i32 %r338, i32* %r340
-%r341 = lshr i384 %r337, 32
-%r342 = trunc i384 %r341 to i32
-%r344 = getelementptr i32, i32* %r1, i32 1
-store i32 %r342, i32* %r344
-%r345 = lshr i384 %r341, 32
-%r346 = trunc i384 %r345 to i32
-%r348 = getelementptr i32, i32* %r1, i32 2
-store i32 %r346, i32* %r348
-%r349 = lshr i384 %r345, 32
-%r350 = trunc i384 %r349 to i32
-%r352 = getelementptr i32, i32* %r1, i32 3
-store i32 %r350, i32* %r352
-%r353 = lshr i384 %r349, 32
-%r354 = trunc i384 %r353 to i32
-%r356 = getelementptr i32, i32* %r1, i32 4
-store i32 %r354, i32* %r356
-%r357 = lshr i384 %r353, 32
-%r358 = trunc i384 %r357 to i32
-%r360 = getelementptr i32, i32* %r1, i32 5
-store i32 %r358, i32* %r360
-%r361 = lshr i384 %r357, 32
-%r362 = trunc i384 %r361 to i32
-%r364 = getelementptr i32, i32* %r1, i32 6
-store i32 %r362, i32* %r364
-%r365 = lshr i384 %r361, 32
-%r366 = trunc i384 %r365 to i32
-%r368 = getelementptr i32, i32* %r1, i32 7
-store i32 %r366, i32* %r368
-%r369 = lshr i384 %r365, 32
-%r370 = trunc i384 %r369 to i32
-%r372 = getelementptr i32, i32* %r1, i32 8
-store i32 %r370, i32* %r372
-%r373 = lshr i384 %r369, 32
-%r374 = trunc i384 %r373 to i32
-%r376 = getelementptr i32, i32* %r1, i32 9
-store i32 %r374, i32* %r376
-%r377 = lshr i384 %r373, 32
-%r378 = trunc i384 %r377 to i32
-%r380 = getelementptr i32, i32* %r1, i32 10
-store i32 %r378, i32* %r380
-%r381 = lshr i384 %r377, 32
-%r382 = trunc i384 %r381 to i32
-%r384 = getelementptr i32, i32* %r1, i32 11
-store i32 %r382, i32* %r384
-ret void
-}
-define i32 @mcl_fp_addPre12L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r3, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r84 = load i32, i32* %r4
-%r85 = zext i32 %r84 to i64
-%r87 = getelementptr i32, i32* %r4, i32 1
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i64
-%r90 = shl i64 %r89, 32
-%r91 = or i64 %r85, %r90
-%r92 = zext i64 %r91 to i96
-%r94 = getelementptr i32, i32* %r4, i32 2
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i96
-%r97 = shl i96 %r96, 64
-%r98 = or i96 %r92, %r97
-%r99 = zext i96 %r98 to i128
-%r101 = getelementptr i32, i32* %r4, i32 3
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i128
-%r104 = shl i128 %r103, 96
-%r105 = or i128 %r99, %r104
-%r106 = zext i128 %r105 to i160
-%r108 = getelementptr i32, i32* %r4, i32 4
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i160
-%r111 = shl i160 %r110, 128
-%r112 = or i160 %r106, %r111
-%r113 = zext i160 %r112 to i192
-%r115 = getelementptr i32, i32* %r4, i32 5
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i192
-%r118 = shl i192 %r117, 160
-%r119 = or i192 %r113, %r118
-%r120 = zext i192 %r119 to i224
-%r122 = getelementptr i32, i32* %r4, i32 6
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i224
-%r125 = shl i224 %r124, 192
-%r126 = or i224 %r120, %r125
-%r127 = zext i224 %r126 to i256
-%r129 = getelementptr i32, i32* %r4, i32 7
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i256
-%r132 = shl i256 %r131, 224
-%r133 = or i256 %r127, %r132
-%r134 = zext i256 %r133 to i288
-%r136 = getelementptr i32, i32* %r4, i32 8
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i288
-%r139 = shl i288 %r138, 256
-%r140 = or i288 %r134, %r139
-%r141 = zext i288 %r140 to i320
-%r143 = getelementptr i32, i32* %r4, i32 9
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i320
-%r146 = shl i320 %r145, 288
-%r147 = or i320 %r141, %r146
-%r148 = zext i320 %r147 to i352
-%r150 = getelementptr i32, i32* %r4, i32 10
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i352
-%r153 = shl i352 %r152, 320
-%r154 = or i352 %r148, %r153
-%r155 = zext i352 %r154 to i384
-%r157 = getelementptr i32, i32* %r4, i32 11
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i384
-%r160 = shl i384 %r159, 352
-%r161 = or i384 %r155, %r160
-%r162 = zext i384 %r161 to i416
-%r163 = add i416 %r83, %r162
-%r164 = trunc i416 %r163 to i384
-%r165 = trunc i384 %r164 to i32
-%r167 = getelementptr i32, i32* %r2, i32 0
-store i32 %r165, i32* %r167
-%r168 = lshr i384 %r164, 32
-%r169 = trunc i384 %r168 to i32
-%r171 = getelementptr i32, i32* %r2, i32 1
-store i32 %r169, i32* %r171
-%r172 = lshr i384 %r168, 32
-%r173 = trunc i384 %r172 to i32
-%r175 = getelementptr i32, i32* %r2, i32 2
-store i32 %r173, i32* %r175
-%r176 = lshr i384 %r172, 32
-%r177 = trunc i384 %r176 to i32
-%r179 = getelementptr i32, i32* %r2, i32 3
-store i32 %r177, i32* %r179
-%r180 = lshr i384 %r176, 32
-%r181 = trunc i384 %r180 to i32
-%r183 = getelementptr i32, i32* %r2, i32 4
-store i32 %r181, i32* %r183
-%r184 = lshr i384 %r180, 32
-%r185 = trunc i384 %r184 to i32
-%r187 = getelementptr i32, i32* %r2, i32 5
-store i32 %r185, i32* %r187
-%r188 = lshr i384 %r184, 32
-%r189 = trunc i384 %r188 to i32
-%r191 = getelementptr i32, i32* %r2, i32 6
-store i32 %r189, i32* %r191
-%r192 = lshr i384 %r188, 32
-%r193 = trunc i384 %r192 to i32
-%r195 = getelementptr i32, i32* %r2, i32 7
-store i32 %r193, i32* %r195
-%r196 = lshr i384 %r192, 32
-%r197 = trunc i384 %r196 to i32
-%r199 = getelementptr i32, i32* %r2, i32 8
-store i32 %r197, i32* %r199
-%r200 = lshr i384 %r196, 32
-%r201 = trunc i384 %r200 to i32
-%r203 = getelementptr i32, i32* %r2, i32 9
-store i32 %r201, i32* %r203
-%r204 = lshr i384 %r200, 32
-%r205 = trunc i384 %r204 to i32
-%r207 = getelementptr i32, i32* %r2, i32 10
-store i32 %r205, i32* %r207
-%r208 = lshr i384 %r204, 32
-%r209 = trunc i384 %r208 to i32
-%r211 = getelementptr i32, i32* %r2, i32 11
-store i32 %r209, i32* %r211
-%r212 = lshr i416 %r163, 384
-%r213 = trunc i416 %r212 to i32
-ret i32 %r213
-}
-define i32 @mcl_fp_subPre12L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r3, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r84 = load i32, i32* %r4
-%r85 = zext i32 %r84 to i64
-%r87 = getelementptr i32, i32* %r4, i32 1
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i64
-%r90 = shl i64 %r89, 32
-%r91 = or i64 %r85, %r90
-%r92 = zext i64 %r91 to i96
-%r94 = getelementptr i32, i32* %r4, i32 2
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i96
-%r97 = shl i96 %r96, 64
-%r98 = or i96 %r92, %r97
-%r99 = zext i96 %r98 to i128
-%r101 = getelementptr i32, i32* %r4, i32 3
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i128
-%r104 = shl i128 %r103, 96
-%r105 = or i128 %r99, %r104
-%r106 = zext i128 %r105 to i160
-%r108 = getelementptr i32, i32* %r4, i32 4
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i160
-%r111 = shl i160 %r110, 128
-%r112 = or i160 %r106, %r111
-%r113 = zext i160 %r112 to i192
-%r115 = getelementptr i32, i32* %r4, i32 5
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i192
-%r118 = shl i192 %r117, 160
-%r119 = or i192 %r113, %r118
-%r120 = zext i192 %r119 to i224
-%r122 = getelementptr i32, i32* %r4, i32 6
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i224
-%r125 = shl i224 %r124, 192
-%r126 = or i224 %r120, %r125
-%r127 = zext i224 %r126 to i256
-%r129 = getelementptr i32, i32* %r4, i32 7
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i256
-%r132 = shl i256 %r131, 224
-%r133 = or i256 %r127, %r132
-%r134 = zext i256 %r133 to i288
-%r136 = getelementptr i32, i32* %r4, i32 8
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i288
-%r139 = shl i288 %r138, 256
-%r140 = or i288 %r134, %r139
-%r141 = zext i288 %r140 to i320
-%r143 = getelementptr i32, i32* %r4, i32 9
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i320
-%r146 = shl i320 %r145, 288
-%r147 = or i320 %r141, %r146
-%r148 = zext i320 %r147 to i352
-%r150 = getelementptr i32, i32* %r4, i32 10
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i352
-%r153 = shl i352 %r152, 320
-%r154 = or i352 %r148, %r153
-%r155 = zext i352 %r154 to i384
-%r157 = getelementptr i32, i32* %r4, i32 11
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i384
-%r160 = shl i384 %r159, 352
-%r161 = or i384 %r155, %r160
-%r162 = zext i384 %r161 to i416
-%r163 = sub i416 %r83, %r162
-%r164 = trunc i416 %r163 to i384
-%r165 = trunc i384 %r164 to i32
-%r167 = getelementptr i32, i32* %r2, i32 0
-store i32 %r165, i32* %r167
-%r168 = lshr i384 %r164, 32
-%r169 = trunc i384 %r168 to i32
-%r171 = getelementptr i32, i32* %r2, i32 1
-store i32 %r169, i32* %r171
-%r172 = lshr i384 %r168, 32
-%r173 = trunc i384 %r172 to i32
-%r175 = getelementptr i32, i32* %r2, i32 2
-store i32 %r173, i32* %r175
-%r176 = lshr i384 %r172, 32
-%r177 = trunc i384 %r176 to i32
-%r179 = getelementptr i32, i32* %r2, i32 3
-store i32 %r177, i32* %r179
-%r180 = lshr i384 %r176, 32
-%r181 = trunc i384 %r180 to i32
-%r183 = getelementptr i32, i32* %r2, i32 4
-store i32 %r181, i32* %r183
-%r184 = lshr i384 %r180, 32
-%r185 = trunc i384 %r184 to i32
-%r187 = getelementptr i32, i32* %r2, i32 5
-store i32 %r185, i32* %r187
-%r188 = lshr i384 %r184, 32
-%r189 = trunc i384 %r188 to i32
-%r191 = getelementptr i32, i32* %r2, i32 6
-store i32 %r189, i32* %r191
-%r192 = lshr i384 %r188, 32
-%r193 = trunc i384 %r192 to i32
-%r195 = getelementptr i32, i32* %r2, i32 7
-store i32 %r193, i32* %r195
-%r196 = lshr i384 %r192, 32
-%r197 = trunc i384 %r196 to i32
-%r199 = getelementptr i32, i32* %r2, i32 8
-store i32 %r197, i32* %r199
-%r200 = lshr i384 %r196, 32
-%r201 = trunc i384 %r200 to i32
-%r203 = getelementptr i32, i32* %r2, i32 9
-store i32 %r201, i32* %r203
-%r204 = lshr i384 %r200, 32
-%r205 = trunc i384 %r204 to i32
-%r207 = getelementptr i32, i32* %r2, i32 10
-store i32 %r205, i32* %r207
-%r208 = lshr i384 %r204, 32
-%r209 = trunc i384 %r208 to i32
-%r211 = getelementptr i32, i32* %r2, i32 11
-store i32 %r209, i32* %r211
-%r212 = lshr i416 %r163, 384
-%r213 = trunc i416 %r212 to i32
-%r215 = and i32 %r213, 1
-ret i32 %r215
-}
-define void @mcl_fp_shr1_12L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = zext i160 %r31 to i192
-%r34 = getelementptr i32, i32* %r2, i32 5
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i192
-%r37 = shl i192 %r36, 160
-%r38 = or i192 %r32, %r37
-%r39 = zext i192 %r38 to i224
-%r41 = getelementptr i32, i32* %r2, i32 6
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i224
-%r44 = shl i224 %r43, 192
-%r45 = or i224 %r39, %r44
-%r46 = zext i224 %r45 to i256
-%r48 = getelementptr i32, i32* %r2, i32 7
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i256
-%r51 = shl i256 %r50, 224
-%r52 = or i256 %r46, %r51
-%r53 = zext i256 %r52 to i288
-%r55 = getelementptr i32, i32* %r2, i32 8
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i288
-%r58 = shl i288 %r57, 256
-%r59 = or i288 %r53, %r58
-%r60 = zext i288 %r59 to i320
-%r62 = getelementptr i32, i32* %r2, i32 9
-%r63 = load i32, i32* %r62
-%r64 = zext i32 %r63 to i320
-%r65 = shl i320 %r64, 288
-%r66 = or i320 %r60, %r65
-%r67 = zext i320 %r66 to i352
-%r69 = getelementptr i32, i32* %r2, i32 10
-%r70 = load i32, i32* %r69
-%r71 = zext i32 %r70 to i352
-%r72 = shl i352 %r71, 320
-%r73 = or i352 %r67, %r72
-%r74 = zext i352 %r73 to i384
-%r76 = getelementptr i32, i32* %r2, i32 11
-%r77 = load i32, i32* %r76
-%r78 = zext i32 %r77 to i384
-%r79 = shl i384 %r78, 352
-%r80 = or i384 %r74, %r79
-%r81 = lshr i384 %r80, 1
-%r82 = trunc i384 %r81 to i32
-%r84 = getelementptr i32, i32* %r1, i32 0
-store i32 %r82, i32* %r84
-%r85 = lshr i384 %r81, 32
-%r86 = trunc i384 %r85 to i32
-%r88 = getelementptr i32, i32* %r1, i32 1
-store i32 %r86, i32* %r88
-%r89 = lshr i384 %r85, 32
-%r90 = trunc i384 %r89 to i32
-%r92 = getelementptr i32, i32* %r1, i32 2
-store i32 %r90, i32* %r92
-%r93 = lshr i384 %r89, 32
-%r94 = trunc i384 %r93 to i32
-%r96 = getelementptr i32, i32* %r1, i32 3
-store i32 %r94, i32* %r96
-%r97 = lshr i384 %r93, 32
-%r98 = trunc i384 %r97 to i32
-%r100 = getelementptr i32, i32* %r1, i32 4
-store i32 %r98, i32* %r100
-%r101 = lshr i384 %r97, 32
-%r102 = trunc i384 %r101 to i32
-%r104 = getelementptr i32, i32* %r1, i32 5
-store i32 %r102, i32* %r104
-%r105 = lshr i384 %r101, 32
-%r106 = trunc i384 %r105 to i32
-%r108 = getelementptr i32, i32* %r1, i32 6
-store i32 %r106, i32* %r108
-%r109 = lshr i384 %r105, 32
-%r110 = trunc i384 %r109 to i32
-%r112 = getelementptr i32, i32* %r1, i32 7
-store i32 %r110, i32* %r112
-%r113 = lshr i384 %r109, 32
-%r114 = trunc i384 %r113 to i32
-%r116 = getelementptr i32, i32* %r1, i32 8
-store i32 %r114, i32* %r116
-%r117 = lshr i384 %r113, 32
-%r118 = trunc i384 %r117 to i32
-%r120 = getelementptr i32, i32* %r1, i32 9
-store i32 %r118, i32* %r120
-%r121 = lshr i384 %r117, 32
-%r122 = trunc i384 %r121 to i32
-%r124 = getelementptr i32, i32* %r1, i32 10
-store i32 %r122, i32* %r124
-%r125 = lshr i384 %r121, 32
-%r126 = trunc i384 %r125 to i32
-%r128 = getelementptr i32, i32* %r1, i32 11
-store i32 %r126, i32* %r128
-ret void
-}
-define void @mcl_fp_add12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = load i32, i32* %r3
-%r84 = zext i32 %r83 to i64
-%r86 = getelementptr i32, i32* %r3, i32 1
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i64
-%r89 = shl i64 %r88, 32
-%r90 = or i64 %r84, %r89
-%r91 = zext i64 %r90 to i96
-%r93 = getelementptr i32, i32* %r3, i32 2
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i96
-%r96 = shl i96 %r95, 64
-%r97 = or i96 %r91, %r96
-%r98 = zext i96 %r97 to i128
-%r100 = getelementptr i32, i32* %r3, i32 3
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i128
-%r103 = shl i128 %r102, 96
-%r104 = or i128 %r98, %r103
-%r105 = zext i128 %r104 to i160
-%r107 = getelementptr i32, i32* %r3, i32 4
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i160
-%r110 = shl i160 %r109, 128
-%r111 = or i160 %r105, %r110
-%r112 = zext i160 %r111 to i192
-%r114 = getelementptr i32, i32* %r3, i32 5
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i192
-%r117 = shl i192 %r116, 160
-%r118 = or i192 %r112, %r117
-%r119 = zext i192 %r118 to i224
-%r121 = getelementptr i32, i32* %r3, i32 6
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i224
-%r124 = shl i224 %r123, 192
-%r125 = or i224 %r119, %r124
-%r126 = zext i224 %r125 to i256
-%r128 = getelementptr i32, i32* %r3, i32 7
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i256
-%r131 = shl i256 %r130, 224
-%r132 = or i256 %r126, %r131
-%r133 = zext i256 %r132 to i288
-%r135 = getelementptr i32, i32* %r3, i32 8
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i288
-%r138 = shl i288 %r137, 256
-%r139 = or i288 %r133, %r138
-%r140 = zext i288 %r139 to i320
-%r142 = getelementptr i32, i32* %r3, i32 9
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i320
-%r145 = shl i320 %r144, 288
-%r146 = or i320 %r140, %r145
-%r147 = zext i320 %r146 to i352
-%r149 = getelementptr i32, i32* %r3, i32 10
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i352
-%r152 = shl i352 %r151, 320
-%r153 = or i352 %r147, %r152
-%r154 = zext i352 %r153 to i384
-%r156 = getelementptr i32, i32* %r3, i32 11
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i384
-%r159 = shl i384 %r158, 352
-%r160 = or i384 %r154, %r159
-%r161 = zext i384 %r82 to i416
-%r162 = zext i384 %r160 to i416
-%r163 = add i416 %r161, %r162
-%r164 = trunc i416 %r163 to i384
-%r165 = trunc i384 %r164 to i32
-%r167 = getelementptr i32, i32* %r1, i32 0
-store i32 %r165, i32* %r167
-%r168 = lshr i384 %r164, 32
-%r169 = trunc i384 %r168 to i32
-%r171 = getelementptr i32, i32* %r1, i32 1
-store i32 %r169, i32* %r171
-%r172 = lshr i384 %r168, 32
-%r173 = trunc i384 %r172 to i32
-%r175 = getelementptr i32, i32* %r1, i32 2
-store i32 %r173, i32* %r175
-%r176 = lshr i384 %r172, 32
-%r177 = trunc i384 %r176 to i32
-%r179 = getelementptr i32, i32* %r1, i32 3
-store i32 %r177, i32* %r179
-%r180 = lshr i384 %r176, 32
-%r181 = trunc i384 %r180 to i32
-%r183 = getelementptr i32, i32* %r1, i32 4
-store i32 %r181, i32* %r183
-%r184 = lshr i384 %r180, 32
-%r185 = trunc i384 %r184 to i32
-%r187 = getelementptr i32, i32* %r1, i32 5
-store i32 %r185, i32* %r187
-%r188 = lshr i384 %r184, 32
-%r189 = trunc i384 %r188 to i32
-%r191 = getelementptr i32, i32* %r1, i32 6
-store i32 %r189, i32* %r191
-%r192 = lshr i384 %r188, 32
-%r193 = trunc i384 %r192 to i32
-%r195 = getelementptr i32, i32* %r1, i32 7
-store i32 %r193, i32* %r195
-%r196 = lshr i384 %r192, 32
-%r197 = trunc i384 %r196 to i32
-%r199 = getelementptr i32, i32* %r1, i32 8
-store i32 %r197, i32* %r199
-%r200 = lshr i384 %r196, 32
-%r201 = trunc i384 %r200 to i32
-%r203 = getelementptr i32, i32* %r1, i32 9
-store i32 %r201, i32* %r203
-%r204 = lshr i384 %r200, 32
-%r205 = trunc i384 %r204 to i32
-%r207 = getelementptr i32, i32* %r1, i32 10
-store i32 %r205, i32* %r207
-%r208 = lshr i384 %r204, 32
-%r209 = trunc i384 %r208 to i32
-%r211 = getelementptr i32, i32* %r1, i32 11
-store i32 %r209, i32* %r211
-%r212 = load i32, i32* %r4
-%r213 = zext i32 %r212 to i64
-%r215 = getelementptr i32, i32* %r4, i32 1
-%r216 = load i32, i32* %r215
-%r217 = zext i32 %r216 to i64
-%r218 = shl i64 %r217, 32
-%r219 = or i64 %r213, %r218
-%r220 = zext i64 %r219 to i96
-%r222 = getelementptr i32, i32* %r4, i32 2
-%r223 = load i32, i32* %r222
-%r224 = zext i32 %r223 to i96
-%r225 = shl i96 %r224, 64
-%r226 = or i96 %r220, %r225
-%r227 = zext i96 %r226 to i128
-%r229 = getelementptr i32, i32* %r4, i32 3
-%r230 = load i32, i32* %r229
-%r231 = zext i32 %r230 to i128
-%r232 = shl i128 %r231, 96
-%r233 = or i128 %r227, %r232
-%r234 = zext i128 %r233 to i160
-%r236 = getelementptr i32, i32* %r4, i32 4
-%r237 = load i32, i32* %r236
-%r238 = zext i32 %r237 to i160
-%r239 = shl i160 %r238, 128
-%r240 = or i160 %r234, %r239
-%r241 = zext i160 %r240 to i192
-%r243 = getelementptr i32, i32* %r4, i32 5
-%r244 = load i32, i32* %r243
-%r245 = zext i32 %r244 to i192
-%r246 = shl i192 %r245, 160
-%r247 = or i192 %r241, %r246
-%r248 = zext i192 %r247 to i224
-%r250 = getelementptr i32, i32* %r4, i32 6
-%r251 = load i32, i32* %r250
-%r252 = zext i32 %r251 to i224
-%r253 = shl i224 %r252, 192
-%r254 = or i224 %r248, %r253
-%r255 = zext i224 %r254 to i256
-%r257 = getelementptr i32, i32* %r4, i32 7
-%r258 = load i32, i32* %r257
-%r259 = zext i32 %r258 to i256
-%r260 = shl i256 %r259, 224
-%r261 = or i256 %r255, %r260
-%r262 = zext i256 %r261 to i288
-%r264 = getelementptr i32, i32* %r4, i32 8
-%r265 = load i32, i32* %r264
-%r266 = zext i32 %r265 to i288
-%r267 = shl i288 %r266, 256
-%r268 = or i288 %r262, %r267
-%r269 = zext i288 %r268 to i320
-%r271 = getelementptr i32, i32* %r4, i32 9
-%r272 = load i32, i32* %r271
-%r273 = zext i32 %r272 to i320
-%r274 = shl i320 %r273, 288
-%r275 = or i320 %r269, %r274
-%r276 = zext i320 %r275 to i352
-%r278 = getelementptr i32, i32* %r4, i32 10
-%r279 = load i32, i32* %r278
-%r280 = zext i32 %r279 to i352
-%r281 = shl i352 %r280, 320
-%r282 = or i352 %r276, %r281
-%r283 = zext i352 %r282 to i384
-%r285 = getelementptr i32, i32* %r4, i32 11
-%r286 = load i32, i32* %r285
-%r287 = zext i32 %r286 to i384
-%r288 = shl i384 %r287, 352
-%r289 = or i384 %r283, %r288
-%r290 = zext i384 %r289 to i416
-%r291 = sub i416 %r163, %r290
-%r292 = lshr i416 %r291, 384
-%r293 = trunc i416 %r292 to i1
-br i1%r293, label %carry, label %nocarry
-nocarry:
-%r294 = trunc i416 %r291 to i384
-%r295 = trunc i384 %r294 to i32
-%r297 = getelementptr i32, i32* %r1, i32 0
-store i32 %r295, i32* %r297
-%r298 = lshr i384 %r294, 32
-%r299 = trunc i384 %r298 to i32
-%r301 = getelementptr i32, i32* %r1, i32 1
-store i32 %r299, i32* %r301
-%r302 = lshr i384 %r298, 32
-%r303 = trunc i384 %r302 to i32
-%r305 = getelementptr i32, i32* %r1, i32 2
-store i32 %r303, i32* %r305
-%r306 = lshr i384 %r302, 32
-%r307 = trunc i384 %r306 to i32
-%r309 = getelementptr i32, i32* %r1, i32 3
-store i32 %r307, i32* %r309
-%r310 = lshr i384 %r306, 32
-%r311 = trunc i384 %r310 to i32
-%r313 = getelementptr i32, i32* %r1, i32 4
-store i32 %r311, i32* %r313
-%r314 = lshr i384 %r310, 32
-%r315 = trunc i384 %r314 to i32
-%r317 = getelementptr i32, i32* %r1, i32 5
-store i32 %r315, i32* %r317
-%r318 = lshr i384 %r314, 32
-%r319 = trunc i384 %r318 to i32
-%r321 = getelementptr i32, i32* %r1, i32 6
-store i32 %r319, i32* %r321
-%r322 = lshr i384 %r318, 32
-%r323 = trunc i384 %r322 to i32
-%r325 = getelementptr i32, i32* %r1, i32 7
-store i32 %r323, i32* %r325
-%r326 = lshr i384 %r322, 32
-%r327 = trunc i384 %r326 to i32
-%r329 = getelementptr i32, i32* %r1, i32 8
-store i32 %r327, i32* %r329
-%r330 = lshr i384 %r326, 32
-%r331 = trunc i384 %r330 to i32
-%r333 = getelementptr i32, i32* %r1, i32 9
-store i32 %r331, i32* %r333
-%r334 = lshr i384 %r330, 32
-%r335 = trunc i384 %r334 to i32
-%r337 = getelementptr i32, i32* %r1, i32 10
-store i32 %r335, i32* %r337
-%r338 = lshr i384 %r334, 32
-%r339 = trunc i384 %r338 to i32
-%r341 = getelementptr i32, i32* %r1, i32 11
-store i32 %r339, i32* %r341
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = load i32, i32* %r3
-%r84 = zext i32 %r83 to i64
-%r86 = getelementptr i32, i32* %r3, i32 1
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i64
-%r89 = shl i64 %r88, 32
-%r90 = or i64 %r84, %r89
-%r91 = zext i64 %r90 to i96
-%r93 = getelementptr i32, i32* %r3, i32 2
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i96
-%r96 = shl i96 %r95, 64
-%r97 = or i96 %r91, %r96
-%r98 = zext i96 %r97 to i128
-%r100 = getelementptr i32, i32* %r3, i32 3
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i128
-%r103 = shl i128 %r102, 96
-%r104 = or i128 %r98, %r103
-%r105 = zext i128 %r104 to i160
-%r107 = getelementptr i32, i32* %r3, i32 4
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i160
-%r110 = shl i160 %r109, 128
-%r111 = or i160 %r105, %r110
-%r112 = zext i160 %r111 to i192
-%r114 = getelementptr i32, i32* %r3, i32 5
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i192
-%r117 = shl i192 %r116, 160
-%r118 = or i192 %r112, %r117
-%r119 = zext i192 %r118 to i224
-%r121 = getelementptr i32, i32* %r3, i32 6
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i224
-%r124 = shl i224 %r123, 192
-%r125 = or i224 %r119, %r124
-%r126 = zext i224 %r125 to i256
-%r128 = getelementptr i32, i32* %r3, i32 7
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i256
-%r131 = shl i256 %r130, 224
-%r132 = or i256 %r126, %r131
-%r133 = zext i256 %r132 to i288
-%r135 = getelementptr i32, i32* %r3, i32 8
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i288
-%r138 = shl i288 %r137, 256
-%r139 = or i288 %r133, %r138
-%r140 = zext i288 %r139 to i320
-%r142 = getelementptr i32, i32* %r3, i32 9
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i320
-%r145 = shl i320 %r144, 288
-%r146 = or i320 %r140, %r145
-%r147 = zext i320 %r146 to i352
-%r149 = getelementptr i32, i32* %r3, i32 10
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i352
-%r152 = shl i352 %r151, 320
-%r153 = or i352 %r147, %r152
-%r154 = zext i352 %r153 to i384
-%r156 = getelementptr i32, i32* %r3, i32 11
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i384
-%r159 = shl i384 %r158, 352
-%r160 = or i384 %r154, %r159
-%r161 = add i384 %r82, %r160
-%r162 = load i32, i32* %r4
-%r163 = zext i32 %r162 to i64
-%r165 = getelementptr i32, i32* %r4, i32 1
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i64
-%r168 = shl i64 %r167, 32
-%r169 = or i64 %r163, %r168
-%r170 = zext i64 %r169 to i96
-%r172 = getelementptr i32, i32* %r4, i32 2
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i96
-%r175 = shl i96 %r174, 64
-%r176 = or i96 %r170, %r175
-%r177 = zext i96 %r176 to i128
-%r179 = getelementptr i32, i32* %r4, i32 3
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i128
-%r182 = shl i128 %r181, 96
-%r183 = or i128 %r177, %r182
-%r184 = zext i128 %r183 to i160
-%r186 = getelementptr i32, i32* %r4, i32 4
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i160
-%r189 = shl i160 %r188, 128
-%r190 = or i160 %r184, %r189
-%r191 = zext i160 %r190 to i192
-%r193 = getelementptr i32, i32* %r4, i32 5
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i192
-%r196 = shl i192 %r195, 160
-%r197 = or i192 %r191, %r196
-%r198 = zext i192 %r197 to i224
-%r200 = getelementptr i32, i32* %r4, i32 6
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i224
-%r203 = shl i224 %r202, 192
-%r204 = or i224 %r198, %r203
-%r205 = zext i224 %r204 to i256
-%r207 = getelementptr i32, i32* %r4, i32 7
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i256
-%r210 = shl i256 %r209, 224
-%r211 = or i256 %r205, %r210
-%r212 = zext i256 %r211 to i288
-%r214 = getelementptr i32, i32* %r4, i32 8
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i288
-%r217 = shl i288 %r216, 256
-%r218 = or i288 %r212, %r217
-%r219 = zext i288 %r218 to i320
-%r221 = getelementptr i32, i32* %r4, i32 9
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i320
-%r224 = shl i320 %r223, 288
-%r225 = or i320 %r219, %r224
-%r226 = zext i320 %r225 to i352
-%r228 = getelementptr i32, i32* %r4, i32 10
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i352
-%r231 = shl i352 %r230, 320
-%r232 = or i352 %r226, %r231
-%r233 = zext i352 %r232 to i384
-%r235 = getelementptr i32, i32* %r4, i32 11
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i384
-%r238 = shl i384 %r237, 352
-%r239 = or i384 %r233, %r238
-%r240 = sub i384 %r161, %r239
-%r241 = lshr i384 %r240, 383
-%r242 = trunc i384 %r241 to i1
-%r243 = select i1 %r242, i384 %r161, i384 %r240
-%r244 = trunc i384 %r243 to i32
-%r246 = getelementptr i32, i32* %r1, i32 0
-store i32 %r244, i32* %r246
-%r247 = lshr i384 %r243, 32
-%r248 = trunc i384 %r247 to i32
-%r250 = getelementptr i32, i32* %r1, i32 1
-store i32 %r248, i32* %r250
-%r251 = lshr i384 %r247, 32
-%r252 = trunc i384 %r251 to i32
-%r254 = getelementptr i32, i32* %r1, i32 2
-store i32 %r252, i32* %r254
-%r255 = lshr i384 %r251, 32
-%r256 = trunc i384 %r255 to i32
-%r258 = getelementptr i32, i32* %r1, i32 3
-store i32 %r256, i32* %r258
-%r259 = lshr i384 %r255, 32
-%r260 = trunc i384 %r259 to i32
-%r262 = getelementptr i32, i32* %r1, i32 4
-store i32 %r260, i32* %r262
-%r263 = lshr i384 %r259, 32
-%r264 = trunc i384 %r263 to i32
-%r266 = getelementptr i32, i32* %r1, i32 5
-store i32 %r264, i32* %r266
-%r267 = lshr i384 %r263, 32
-%r268 = trunc i384 %r267 to i32
-%r270 = getelementptr i32, i32* %r1, i32 6
-store i32 %r268, i32* %r270
-%r271 = lshr i384 %r267, 32
-%r272 = trunc i384 %r271 to i32
-%r274 = getelementptr i32, i32* %r1, i32 7
-store i32 %r272, i32* %r274
-%r275 = lshr i384 %r271, 32
-%r276 = trunc i384 %r275 to i32
-%r278 = getelementptr i32, i32* %r1, i32 8
-store i32 %r276, i32* %r278
-%r279 = lshr i384 %r275, 32
-%r280 = trunc i384 %r279 to i32
-%r282 = getelementptr i32, i32* %r1, i32 9
-store i32 %r280, i32* %r282
-%r283 = lshr i384 %r279, 32
-%r284 = trunc i384 %r283 to i32
-%r286 = getelementptr i32, i32* %r1, i32 10
-store i32 %r284, i32* %r286
-%r287 = lshr i384 %r283, 32
-%r288 = trunc i384 %r287 to i32
-%r290 = getelementptr i32, i32* %r1, i32 11
-store i32 %r288, i32* %r290
-ret void
-}
-define void @mcl_fp_sub12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = load i32, i32* %r3
-%r84 = zext i32 %r83 to i64
-%r86 = getelementptr i32, i32* %r3, i32 1
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i64
-%r89 = shl i64 %r88, 32
-%r90 = or i64 %r84, %r89
-%r91 = zext i64 %r90 to i96
-%r93 = getelementptr i32, i32* %r3, i32 2
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i96
-%r96 = shl i96 %r95, 64
-%r97 = or i96 %r91, %r96
-%r98 = zext i96 %r97 to i128
-%r100 = getelementptr i32, i32* %r3, i32 3
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i128
-%r103 = shl i128 %r102, 96
-%r104 = or i128 %r98, %r103
-%r105 = zext i128 %r104 to i160
-%r107 = getelementptr i32, i32* %r3, i32 4
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i160
-%r110 = shl i160 %r109, 128
-%r111 = or i160 %r105, %r110
-%r112 = zext i160 %r111 to i192
-%r114 = getelementptr i32, i32* %r3, i32 5
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i192
-%r117 = shl i192 %r116, 160
-%r118 = or i192 %r112, %r117
-%r119 = zext i192 %r118 to i224
-%r121 = getelementptr i32, i32* %r3, i32 6
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i224
-%r124 = shl i224 %r123, 192
-%r125 = or i224 %r119, %r124
-%r126 = zext i224 %r125 to i256
-%r128 = getelementptr i32, i32* %r3, i32 7
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i256
-%r131 = shl i256 %r130, 224
-%r132 = or i256 %r126, %r131
-%r133 = zext i256 %r132 to i288
-%r135 = getelementptr i32, i32* %r3, i32 8
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i288
-%r138 = shl i288 %r137, 256
-%r139 = or i288 %r133, %r138
-%r140 = zext i288 %r139 to i320
-%r142 = getelementptr i32, i32* %r3, i32 9
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i320
-%r145 = shl i320 %r144, 288
-%r146 = or i320 %r140, %r145
-%r147 = zext i320 %r146 to i352
-%r149 = getelementptr i32, i32* %r3, i32 10
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i352
-%r152 = shl i352 %r151, 320
-%r153 = or i352 %r147, %r152
-%r154 = zext i352 %r153 to i384
-%r156 = getelementptr i32, i32* %r3, i32 11
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i384
-%r159 = shl i384 %r158, 352
-%r160 = or i384 %r154, %r159
-%r161 = zext i384 %r82 to i416
-%r162 = zext i384 %r160 to i416
-%r163 = sub i416 %r161, %r162
-%r164 = trunc i416 %r163 to i384
-%r165 = lshr i416 %r163, 384
-%r166 = trunc i416 %r165 to i1
-%r167 = trunc i384 %r164 to i32
-%r169 = getelementptr i32, i32* %r1, i32 0
-store i32 %r167, i32* %r169
-%r170 = lshr i384 %r164, 32
-%r171 = trunc i384 %r170 to i32
-%r173 = getelementptr i32, i32* %r1, i32 1
-store i32 %r171, i32* %r173
-%r174 = lshr i384 %r170, 32
-%r175 = trunc i384 %r174 to i32
-%r177 = getelementptr i32, i32* %r1, i32 2
-store i32 %r175, i32* %r177
-%r178 = lshr i384 %r174, 32
-%r179 = trunc i384 %r178 to i32
-%r181 = getelementptr i32, i32* %r1, i32 3
-store i32 %r179, i32* %r181
-%r182 = lshr i384 %r178, 32
-%r183 = trunc i384 %r182 to i32
-%r185 = getelementptr i32, i32* %r1, i32 4
-store i32 %r183, i32* %r185
-%r186 = lshr i384 %r182, 32
-%r187 = trunc i384 %r186 to i32
-%r189 = getelementptr i32, i32* %r1, i32 5
-store i32 %r187, i32* %r189
-%r190 = lshr i384 %r186, 32
-%r191 = trunc i384 %r190 to i32
-%r193 = getelementptr i32, i32* %r1, i32 6
-store i32 %r191, i32* %r193
-%r194 = lshr i384 %r190, 32
-%r195 = trunc i384 %r194 to i32
-%r197 = getelementptr i32, i32* %r1, i32 7
-store i32 %r195, i32* %r197
-%r198 = lshr i384 %r194, 32
-%r199 = trunc i384 %r198 to i32
-%r201 = getelementptr i32, i32* %r1, i32 8
-store i32 %r199, i32* %r201
-%r202 = lshr i384 %r198, 32
-%r203 = trunc i384 %r202 to i32
-%r205 = getelementptr i32, i32* %r1, i32 9
-store i32 %r203, i32* %r205
-%r206 = lshr i384 %r202, 32
-%r207 = trunc i384 %r206 to i32
-%r209 = getelementptr i32, i32* %r1, i32 10
-store i32 %r207, i32* %r209
-%r210 = lshr i384 %r206, 32
-%r211 = trunc i384 %r210 to i32
-%r213 = getelementptr i32, i32* %r1, i32 11
-store i32 %r211, i32* %r213
-br i1%r166, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r214 = load i32, i32* %r4
-%r215 = zext i32 %r214 to i64
-%r217 = getelementptr i32, i32* %r4, i32 1
-%r218 = load i32, i32* %r217
-%r219 = zext i32 %r218 to i64
-%r220 = shl i64 %r219, 32
-%r221 = or i64 %r215, %r220
-%r222 = zext i64 %r221 to i96
-%r224 = getelementptr i32, i32* %r4, i32 2
-%r225 = load i32, i32* %r224
-%r226 = zext i32 %r225 to i96
-%r227 = shl i96 %r226, 64
-%r228 = or i96 %r222, %r227
-%r229 = zext i96 %r228 to i128
-%r231 = getelementptr i32, i32* %r4, i32 3
-%r232 = load i32, i32* %r231
-%r233 = zext i32 %r232 to i128
-%r234 = shl i128 %r233, 96
-%r235 = or i128 %r229, %r234
-%r236 = zext i128 %r235 to i160
-%r238 = getelementptr i32, i32* %r4, i32 4
-%r239 = load i32, i32* %r238
-%r240 = zext i32 %r239 to i160
-%r241 = shl i160 %r240, 128
-%r242 = or i160 %r236, %r241
-%r243 = zext i160 %r242 to i192
-%r245 = getelementptr i32, i32* %r4, i32 5
-%r246 = load i32, i32* %r245
-%r247 = zext i32 %r246 to i192
-%r248 = shl i192 %r247, 160
-%r249 = or i192 %r243, %r248
-%r250 = zext i192 %r249 to i224
-%r252 = getelementptr i32, i32* %r4, i32 6
-%r253 = load i32, i32* %r252
-%r254 = zext i32 %r253 to i224
-%r255 = shl i224 %r254, 192
-%r256 = or i224 %r250, %r255
-%r257 = zext i224 %r256 to i256
-%r259 = getelementptr i32, i32* %r4, i32 7
-%r260 = load i32, i32* %r259
-%r261 = zext i32 %r260 to i256
-%r262 = shl i256 %r261, 224
-%r263 = or i256 %r257, %r262
-%r264 = zext i256 %r263 to i288
-%r266 = getelementptr i32, i32* %r4, i32 8
-%r267 = load i32, i32* %r266
-%r268 = zext i32 %r267 to i288
-%r269 = shl i288 %r268, 256
-%r270 = or i288 %r264, %r269
-%r271 = zext i288 %r270 to i320
-%r273 = getelementptr i32, i32* %r4, i32 9
-%r274 = load i32, i32* %r273
-%r275 = zext i32 %r274 to i320
-%r276 = shl i320 %r275, 288
-%r277 = or i320 %r271, %r276
-%r278 = zext i320 %r277 to i352
-%r280 = getelementptr i32, i32* %r4, i32 10
-%r281 = load i32, i32* %r280
-%r282 = zext i32 %r281 to i352
-%r283 = shl i352 %r282, 320
-%r284 = or i352 %r278, %r283
-%r285 = zext i352 %r284 to i384
-%r287 = getelementptr i32, i32* %r4, i32 11
-%r288 = load i32, i32* %r287
-%r289 = zext i32 %r288 to i384
-%r290 = shl i384 %r289, 352
-%r291 = or i384 %r285, %r290
-%r292 = add i384 %r164, %r291
-%r293 = trunc i384 %r292 to i32
-%r295 = getelementptr i32, i32* %r1, i32 0
-store i32 %r293, i32* %r295
-%r296 = lshr i384 %r292, 32
-%r297 = trunc i384 %r296 to i32
-%r299 = getelementptr i32, i32* %r1, i32 1
-store i32 %r297, i32* %r299
-%r300 = lshr i384 %r296, 32
-%r301 = trunc i384 %r300 to i32
-%r303 = getelementptr i32, i32* %r1, i32 2
-store i32 %r301, i32* %r303
-%r304 = lshr i384 %r300, 32
-%r305 = trunc i384 %r304 to i32
-%r307 = getelementptr i32, i32* %r1, i32 3
-store i32 %r305, i32* %r307
-%r308 = lshr i384 %r304, 32
-%r309 = trunc i384 %r308 to i32
-%r311 = getelementptr i32, i32* %r1, i32 4
-store i32 %r309, i32* %r311
-%r312 = lshr i384 %r308, 32
-%r313 = trunc i384 %r312 to i32
-%r315 = getelementptr i32, i32* %r1, i32 5
-store i32 %r313, i32* %r315
-%r316 = lshr i384 %r312, 32
-%r317 = trunc i384 %r316 to i32
-%r319 = getelementptr i32, i32* %r1, i32 6
-store i32 %r317, i32* %r319
-%r320 = lshr i384 %r316, 32
-%r321 = trunc i384 %r320 to i32
-%r323 = getelementptr i32, i32* %r1, i32 7
-store i32 %r321, i32* %r323
-%r324 = lshr i384 %r320, 32
-%r325 = trunc i384 %r324 to i32
-%r327 = getelementptr i32, i32* %r1, i32 8
-store i32 %r325, i32* %r327
-%r328 = lshr i384 %r324, 32
-%r329 = trunc i384 %r328 to i32
-%r331 = getelementptr i32, i32* %r1, i32 9
-store i32 %r329, i32* %r331
-%r332 = lshr i384 %r328, 32
-%r333 = trunc i384 %r332 to i32
-%r335 = getelementptr i32, i32* %r1, i32 10
-store i32 %r333, i32* %r335
-%r336 = lshr i384 %r332, 32
-%r337 = trunc i384 %r336 to i32
-%r339 = getelementptr i32, i32* %r1, i32 11
-store i32 %r337, i32* %r339
-ret void
-}
-define void @mcl_fp_subNF12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = load i32, i32* %r3
-%r84 = zext i32 %r83 to i64
-%r86 = getelementptr i32, i32* %r3, i32 1
-%r87 = load i32, i32* %r86
-%r88 = zext i32 %r87 to i64
-%r89 = shl i64 %r88, 32
-%r90 = or i64 %r84, %r89
-%r91 = zext i64 %r90 to i96
-%r93 = getelementptr i32, i32* %r3, i32 2
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i96
-%r96 = shl i96 %r95, 64
-%r97 = or i96 %r91, %r96
-%r98 = zext i96 %r97 to i128
-%r100 = getelementptr i32, i32* %r3, i32 3
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i128
-%r103 = shl i128 %r102, 96
-%r104 = or i128 %r98, %r103
-%r105 = zext i128 %r104 to i160
-%r107 = getelementptr i32, i32* %r3, i32 4
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i160
-%r110 = shl i160 %r109, 128
-%r111 = or i160 %r105, %r110
-%r112 = zext i160 %r111 to i192
-%r114 = getelementptr i32, i32* %r3, i32 5
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i192
-%r117 = shl i192 %r116, 160
-%r118 = or i192 %r112, %r117
-%r119 = zext i192 %r118 to i224
-%r121 = getelementptr i32, i32* %r3, i32 6
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i224
-%r124 = shl i224 %r123, 192
-%r125 = or i224 %r119, %r124
-%r126 = zext i224 %r125 to i256
-%r128 = getelementptr i32, i32* %r3, i32 7
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i256
-%r131 = shl i256 %r130, 224
-%r132 = or i256 %r126, %r131
-%r133 = zext i256 %r132 to i288
-%r135 = getelementptr i32, i32* %r3, i32 8
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i288
-%r138 = shl i288 %r137, 256
-%r139 = or i288 %r133, %r138
-%r140 = zext i288 %r139 to i320
-%r142 = getelementptr i32, i32* %r3, i32 9
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i320
-%r145 = shl i320 %r144, 288
-%r146 = or i320 %r140, %r145
-%r147 = zext i320 %r146 to i352
-%r149 = getelementptr i32, i32* %r3, i32 10
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i352
-%r152 = shl i352 %r151, 320
-%r153 = or i352 %r147, %r152
-%r154 = zext i352 %r153 to i384
-%r156 = getelementptr i32, i32* %r3, i32 11
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i384
-%r159 = shl i384 %r158, 352
-%r160 = or i384 %r154, %r159
-%r161 = sub i384 %r82, %r160
-%r162 = lshr i384 %r161, 383
-%r163 = trunc i384 %r162 to i1
-%r164 = load i32, i32* %r4
-%r165 = zext i32 %r164 to i64
-%r167 = getelementptr i32, i32* %r4, i32 1
-%r168 = load i32, i32* %r167
-%r169 = zext i32 %r168 to i64
-%r170 = shl i64 %r169, 32
-%r171 = or i64 %r165, %r170
-%r172 = zext i64 %r171 to i96
-%r174 = getelementptr i32, i32* %r4, i32 2
-%r175 = load i32, i32* %r174
-%r176 = zext i32 %r175 to i96
-%r177 = shl i96 %r176, 64
-%r178 = or i96 %r172, %r177
-%r179 = zext i96 %r178 to i128
-%r181 = getelementptr i32, i32* %r4, i32 3
-%r182 = load i32, i32* %r181
-%r183 = zext i32 %r182 to i128
-%r184 = shl i128 %r183, 96
-%r185 = or i128 %r179, %r184
-%r186 = zext i128 %r185 to i160
-%r188 = getelementptr i32, i32* %r4, i32 4
-%r189 = load i32, i32* %r188
-%r190 = zext i32 %r189 to i160
-%r191 = shl i160 %r190, 128
-%r192 = or i160 %r186, %r191
-%r193 = zext i160 %r192 to i192
-%r195 = getelementptr i32, i32* %r4, i32 5
-%r196 = load i32, i32* %r195
-%r197 = zext i32 %r196 to i192
-%r198 = shl i192 %r197, 160
-%r199 = or i192 %r193, %r198
-%r200 = zext i192 %r199 to i224
-%r202 = getelementptr i32, i32* %r4, i32 6
-%r203 = load i32, i32* %r202
-%r204 = zext i32 %r203 to i224
-%r205 = shl i224 %r204, 192
-%r206 = or i224 %r200, %r205
-%r207 = zext i224 %r206 to i256
-%r209 = getelementptr i32, i32* %r4, i32 7
-%r210 = load i32, i32* %r209
-%r211 = zext i32 %r210 to i256
-%r212 = shl i256 %r211, 224
-%r213 = or i256 %r207, %r212
-%r214 = zext i256 %r213 to i288
-%r216 = getelementptr i32, i32* %r4, i32 8
-%r217 = load i32, i32* %r216
-%r218 = zext i32 %r217 to i288
-%r219 = shl i288 %r218, 256
-%r220 = or i288 %r214, %r219
-%r221 = zext i288 %r220 to i320
-%r223 = getelementptr i32, i32* %r4, i32 9
-%r224 = load i32, i32* %r223
-%r225 = zext i32 %r224 to i320
-%r226 = shl i320 %r225, 288
-%r227 = or i320 %r221, %r226
-%r228 = zext i320 %r227 to i352
-%r230 = getelementptr i32, i32* %r4, i32 10
-%r231 = load i32, i32* %r230
-%r232 = zext i32 %r231 to i352
-%r233 = shl i352 %r232, 320
-%r234 = or i352 %r228, %r233
-%r235 = zext i352 %r234 to i384
-%r237 = getelementptr i32, i32* %r4, i32 11
-%r238 = load i32, i32* %r237
-%r239 = zext i32 %r238 to i384
-%r240 = shl i384 %r239, 352
-%r241 = or i384 %r235, %r240
-%r243 = select i1 %r163, i384 %r241, i384 0
-%r244 = add i384 %r161, %r243
-%r245 = trunc i384 %r244 to i32
-%r247 = getelementptr i32, i32* %r1, i32 0
-store i32 %r245, i32* %r247
-%r248 = lshr i384 %r244, 32
-%r249 = trunc i384 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 1
-store i32 %r249, i32* %r251
-%r252 = lshr i384 %r248, 32
-%r253 = trunc i384 %r252 to i32
-%r255 = getelementptr i32, i32* %r1, i32 2
-store i32 %r253, i32* %r255
-%r256 = lshr i384 %r252, 32
-%r257 = trunc i384 %r256 to i32
-%r259 = getelementptr i32, i32* %r1, i32 3
-store i32 %r257, i32* %r259
-%r260 = lshr i384 %r256, 32
-%r261 = trunc i384 %r260 to i32
-%r263 = getelementptr i32, i32* %r1, i32 4
-store i32 %r261, i32* %r263
-%r264 = lshr i384 %r260, 32
-%r265 = trunc i384 %r264 to i32
-%r267 = getelementptr i32, i32* %r1, i32 5
-store i32 %r265, i32* %r267
-%r268 = lshr i384 %r264, 32
-%r269 = trunc i384 %r268 to i32
-%r271 = getelementptr i32, i32* %r1, i32 6
-store i32 %r269, i32* %r271
-%r272 = lshr i384 %r268, 32
-%r273 = trunc i384 %r272 to i32
-%r275 = getelementptr i32, i32* %r1, i32 7
-store i32 %r273, i32* %r275
-%r276 = lshr i384 %r272, 32
-%r277 = trunc i384 %r276 to i32
-%r279 = getelementptr i32, i32* %r1, i32 8
-store i32 %r277, i32* %r279
-%r280 = lshr i384 %r276, 32
-%r281 = trunc i384 %r280 to i32
-%r283 = getelementptr i32, i32* %r1, i32 9
-store i32 %r281, i32* %r283
-%r284 = lshr i384 %r280, 32
-%r285 = trunc i384 %r284 to i32
-%r287 = getelementptr i32, i32* %r1, i32 10
-store i32 %r285, i32* %r287
-%r288 = lshr i384 %r284, 32
-%r289 = trunc i384 %r288 to i32
-%r291 = getelementptr i32, i32* %r1, i32 11
-store i32 %r289, i32* %r291
-ret void
-}
-define void @mcl_fpDbl_add12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = zext i704 %r152 to i736
-%r155 = getelementptr i32, i32* %r2, i32 22
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i736
-%r158 = shl i736 %r157, 704
-%r159 = or i736 %r153, %r158
-%r160 = zext i736 %r159 to i768
-%r162 = getelementptr i32, i32* %r2, i32 23
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i768
-%r165 = shl i768 %r164, 736
-%r166 = or i768 %r160, %r165
-%r167 = load i32, i32* %r3
-%r168 = zext i32 %r167 to i64
-%r170 = getelementptr i32, i32* %r3, i32 1
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i64
-%r173 = shl i64 %r172, 32
-%r174 = or i64 %r168, %r173
-%r175 = zext i64 %r174 to i96
-%r177 = getelementptr i32, i32* %r3, i32 2
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i96
-%r180 = shl i96 %r179, 64
-%r181 = or i96 %r175, %r180
-%r182 = zext i96 %r181 to i128
-%r184 = getelementptr i32, i32* %r3, i32 3
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i128
-%r187 = shl i128 %r186, 96
-%r188 = or i128 %r182, %r187
-%r189 = zext i128 %r188 to i160
-%r191 = getelementptr i32, i32* %r3, i32 4
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i160
-%r194 = shl i160 %r193, 128
-%r195 = or i160 %r189, %r194
-%r196 = zext i160 %r195 to i192
-%r198 = getelementptr i32, i32* %r3, i32 5
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i192
-%r201 = shl i192 %r200, 160
-%r202 = or i192 %r196, %r201
-%r203 = zext i192 %r202 to i224
-%r205 = getelementptr i32, i32* %r3, i32 6
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i224
-%r208 = shl i224 %r207, 192
-%r209 = or i224 %r203, %r208
-%r210 = zext i224 %r209 to i256
-%r212 = getelementptr i32, i32* %r3, i32 7
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i256
-%r215 = shl i256 %r214, 224
-%r216 = or i256 %r210, %r215
-%r217 = zext i256 %r216 to i288
-%r219 = getelementptr i32, i32* %r3, i32 8
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i288
-%r222 = shl i288 %r221, 256
-%r223 = or i288 %r217, %r222
-%r224 = zext i288 %r223 to i320
-%r226 = getelementptr i32, i32* %r3, i32 9
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i320
-%r229 = shl i320 %r228, 288
-%r230 = or i320 %r224, %r229
-%r231 = zext i320 %r230 to i352
-%r233 = getelementptr i32, i32* %r3, i32 10
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i352
-%r236 = shl i352 %r235, 320
-%r237 = or i352 %r231, %r236
-%r238 = zext i352 %r237 to i384
-%r240 = getelementptr i32, i32* %r3, i32 11
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i384
-%r243 = shl i384 %r242, 352
-%r244 = or i384 %r238, %r243
-%r245 = zext i384 %r244 to i416
-%r247 = getelementptr i32, i32* %r3, i32 12
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i416
-%r250 = shl i416 %r249, 384
-%r251 = or i416 %r245, %r250
-%r252 = zext i416 %r251 to i448
-%r254 = getelementptr i32, i32* %r3, i32 13
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i448
-%r257 = shl i448 %r256, 416
-%r258 = or i448 %r252, %r257
-%r259 = zext i448 %r258 to i480
-%r261 = getelementptr i32, i32* %r3, i32 14
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i480
-%r264 = shl i480 %r263, 448
-%r265 = or i480 %r259, %r264
-%r266 = zext i480 %r265 to i512
-%r268 = getelementptr i32, i32* %r3, i32 15
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i512
-%r271 = shl i512 %r270, 480
-%r272 = or i512 %r266, %r271
-%r273 = zext i512 %r272 to i544
-%r275 = getelementptr i32, i32* %r3, i32 16
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i544
-%r278 = shl i544 %r277, 512
-%r279 = or i544 %r273, %r278
-%r280 = zext i544 %r279 to i576
-%r282 = getelementptr i32, i32* %r3, i32 17
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i576
-%r285 = shl i576 %r284, 544
-%r286 = or i576 %r280, %r285
-%r287 = zext i576 %r286 to i608
-%r289 = getelementptr i32, i32* %r3, i32 18
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i608
-%r292 = shl i608 %r291, 576
-%r293 = or i608 %r287, %r292
-%r294 = zext i608 %r293 to i640
-%r296 = getelementptr i32, i32* %r3, i32 19
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i640
-%r299 = shl i640 %r298, 608
-%r300 = or i640 %r294, %r299
-%r301 = zext i640 %r300 to i672
-%r303 = getelementptr i32, i32* %r3, i32 20
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i672
-%r306 = shl i672 %r305, 640
-%r307 = or i672 %r301, %r306
-%r308 = zext i672 %r307 to i704
-%r310 = getelementptr i32, i32* %r3, i32 21
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i704
-%r313 = shl i704 %r312, 672
-%r314 = or i704 %r308, %r313
-%r315 = zext i704 %r314 to i736
-%r317 = getelementptr i32, i32* %r3, i32 22
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i736
-%r320 = shl i736 %r319, 704
-%r321 = or i736 %r315, %r320
-%r322 = zext i736 %r321 to i768
-%r324 = getelementptr i32, i32* %r3, i32 23
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i768
-%r327 = shl i768 %r326, 736
-%r328 = or i768 %r322, %r327
-%r329 = zext i768 %r166 to i800
-%r330 = zext i768 %r328 to i800
-%r331 = add i800 %r329, %r330
-%r332 = trunc i800 %r331 to i384
-%r333 = trunc i384 %r332 to i32
-%r335 = getelementptr i32, i32* %r1, i32 0
-store i32 %r333, i32* %r335
-%r336 = lshr i384 %r332, 32
-%r337 = trunc i384 %r336 to i32
-%r339 = getelementptr i32, i32* %r1, i32 1
-store i32 %r337, i32* %r339
-%r340 = lshr i384 %r336, 32
-%r341 = trunc i384 %r340 to i32
-%r343 = getelementptr i32, i32* %r1, i32 2
-store i32 %r341, i32* %r343
-%r344 = lshr i384 %r340, 32
-%r345 = trunc i384 %r344 to i32
-%r347 = getelementptr i32, i32* %r1, i32 3
-store i32 %r345, i32* %r347
-%r348 = lshr i384 %r344, 32
-%r349 = trunc i384 %r348 to i32
-%r351 = getelementptr i32, i32* %r1, i32 4
-store i32 %r349, i32* %r351
-%r352 = lshr i384 %r348, 32
-%r353 = trunc i384 %r352 to i32
-%r355 = getelementptr i32, i32* %r1, i32 5
-store i32 %r353, i32* %r355
-%r356 = lshr i384 %r352, 32
-%r357 = trunc i384 %r356 to i32
-%r359 = getelementptr i32, i32* %r1, i32 6
-store i32 %r357, i32* %r359
-%r360 = lshr i384 %r356, 32
-%r361 = trunc i384 %r360 to i32
-%r363 = getelementptr i32, i32* %r1, i32 7
-store i32 %r361, i32* %r363
-%r364 = lshr i384 %r360, 32
-%r365 = trunc i384 %r364 to i32
-%r367 = getelementptr i32, i32* %r1, i32 8
-store i32 %r365, i32* %r367
-%r368 = lshr i384 %r364, 32
-%r369 = trunc i384 %r368 to i32
-%r371 = getelementptr i32, i32* %r1, i32 9
-store i32 %r369, i32* %r371
-%r372 = lshr i384 %r368, 32
-%r373 = trunc i384 %r372 to i32
-%r375 = getelementptr i32, i32* %r1, i32 10
-store i32 %r373, i32* %r375
-%r376 = lshr i384 %r372, 32
-%r377 = trunc i384 %r376 to i32
-%r379 = getelementptr i32, i32* %r1, i32 11
-store i32 %r377, i32* %r379
-%r380 = lshr i800 %r331, 384
-%r381 = trunc i800 %r380 to i416
-%r382 = load i32, i32* %r4
-%r383 = zext i32 %r382 to i64
-%r385 = getelementptr i32, i32* %r4, i32 1
-%r386 = load i32, i32* %r385
-%r387 = zext i32 %r386 to i64
-%r388 = shl i64 %r387, 32
-%r389 = or i64 %r383, %r388
-%r390 = zext i64 %r389 to i96
-%r392 = getelementptr i32, i32* %r4, i32 2
-%r393 = load i32, i32* %r392
-%r394 = zext i32 %r393 to i96
-%r395 = shl i96 %r394, 64
-%r396 = or i96 %r390, %r395
-%r397 = zext i96 %r396 to i128
-%r399 = getelementptr i32, i32* %r4, i32 3
-%r400 = load i32, i32* %r399
-%r401 = zext i32 %r400 to i128
-%r402 = shl i128 %r401, 96
-%r403 = or i128 %r397, %r402
-%r404 = zext i128 %r403 to i160
-%r406 = getelementptr i32, i32* %r4, i32 4
-%r407 = load i32, i32* %r406
-%r408 = zext i32 %r407 to i160
-%r409 = shl i160 %r408, 128
-%r410 = or i160 %r404, %r409
-%r411 = zext i160 %r410 to i192
-%r413 = getelementptr i32, i32* %r4, i32 5
-%r414 = load i32, i32* %r413
-%r415 = zext i32 %r414 to i192
-%r416 = shl i192 %r415, 160
-%r417 = or i192 %r411, %r416
-%r418 = zext i192 %r417 to i224
-%r420 = getelementptr i32, i32* %r4, i32 6
-%r421 = load i32, i32* %r420
-%r422 = zext i32 %r421 to i224
-%r423 = shl i224 %r422, 192
-%r424 = or i224 %r418, %r423
-%r425 = zext i224 %r424 to i256
-%r427 = getelementptr i32, i32* %r4, i32 7
-%r428 = load i32, i32* %r427
-%r429 = zext i32 %r428 to i256
-%r430 = shl i256 %r429, 224
-%r431 = or i256 %r425, %r430
-%r432 = zext i256 %r431 to i288
-%r434 = getelementptr i32, i32* %r4, i32 8
-%r435 = load i32, i32* %r434
-%r436 = zext i32 %r435 to i288
-%r437 = shl i288 %r436, 256
-%r438 = or i288 %r432, %r437
-%r439 = zext i288 %r438 to i320
-%r441 = getelementptr i32, i32* %r4, i32 9
-%r442 = load i32, i32* %r441
-%r443 = zext i32 %r442 to i320
-%r444 = shl i320 %r443, 288
-%r445 = or i320 %r439, %r444
-%r446 = zext i320 %r445 to i352
-%r448 = getelementptr i32, i32* %r4, i32 10
-%r449 = load i32, i32* %r448
-%r450 = zext i32 %r449 to i352
-%r451 = shl i352 %r450, 320
-%r452 = or i352 %r446, %r451
-%r453 = zext i352 %r452 to i384
-%r455 = getelementptr i32, i32* %r4, i32 11
-%r456 = load i32, i32* %r455
-%r457 = zext i32 %r456 to i384
-%r458 = shl i384 %r457, 352
-%r459 = or i384 %r453, %r458
-%r460 = zext i384 %r459 to i416
-%r461 = sub i416 %r381, %r460
-%r462 = lshr i416 %r461, 384
-%r463 = trunc i416 %r462 to i1
-%r464 = select i1 %r463, i416 %r381, i416 %r461
-%r465 = trunc i416 %r464 to i384
-%r467 = getelementptr i32, i32* %r1, i32 12
-%r468 = trunc i384 %r465 to i32
-%r470 = getelementptr i32, i32* %r467, i32 0
-store i32 %r468, i32* %r470
-%r471 = lshr i384 %r465, 32
-%r472 = trunc i384 %r471 to i32
-%r474 = getelementptr i32, i32* %r467, i32 1
-store i32 %r472, i32* %r474
-%r475 = lshr i384 %r471, 32
-%r476 = trunc i384 %r475 to i32
-%r478 = getelementptr i32, i32* %r467, i32 2
-store i32 %r476, i32* %r478
-%r479 = lshr i384 %r475, 32
-%r480 = trunc i384 %r479 to i32
-%r482 = getelementptr i32, i32* %r467, i32 3
-store i32 %r480, i32* %r482
-%r483 = lshr i384 %r479, 32
-%r484 = trunc i384 %r483 to i32
-%r486 = getelementptr i32, i32* %r467, i32 4
-store i32 %r484, i32* %r486
-%r487 = lshr i384 %r483, 32
-%r488 = trunc i384 %r487 to i32
-%r490 = getelementptr i32, i32* %r467, i32 5
-store i32 %r488, i32* %r490
-%r491 = lshr i384 %r487, 32
-%r492 = trunc i384 %r491 to i32
-%r494 = getelementptr i32, i32* %r467, i32 6
-store i32 %r492, i32* %r494
-%r495 = lshr i384 %r491, 32
-%r496 = trunc i384 %r495 to i32
-%r498 = getelementptr i32, i32* %r467, i32 7
-store i32 %r496, i32* %r498
-%r499 = lshr i384 %r495, 32
-%r500 = trunc i384 %r499 to i32
-%r502 = getelementptr i32, i32* %r467, i32 8
-store i32 %r500, i32* %r502
-%r503 = lshr i384 %r499, 32
-%r504 = trunc i384 %r503 to i32
-%r506 = getelementptr i32, i32* %r467, i32 9
-store i32 %r504, i32* %r506
-%r507 = lshr i384 %r503, 32
-%r508 = trunc i384 %r507 to i32
-%r510 = getelementptr i32, i32* %r467, i32 10
-store i32 %r508, i32* %r510
-%r511 = lshr i384 %r507, 32
-%r512 = trunc i384 %r511 to i32
-%r514 = getelementptr i32, i32* %r467, i32 11
-store i32 %r512, i32* %r514
-ret void
-}
-define void @mcl_fpDbl_sub12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = zext i704 %r152 to i736
-%r155 = getelementptr i32, i32* %r2, i32 22
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i736
-%r158 = shl i736 %r157, 704
-%r159 = or i736 %r153, %r158
-%r160 = zext i736 %r159 to i768
-%r162 = getelementptr i32, i32* %r2, i32 23
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i768
-%r165 = shl i768 %r164, 736
-%r166 = or i768 %r160, %r165
-%r167 = load i32, i32* %r3
-%r168 = zext i32 %r167 to i64
-%r170 = getelementptr i32, i32* %r3, i32 1
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i64
-%r173 = shl i64 %r172, 32
-%r174 = or i64 %r168, %r173
-%r175 = zext i64 %r174 to i96
-%r177 = getelementptr i32, i32* %r3, i32 2
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i96
-%r180 = shl i96 %r179, 64
-%r181 = or i96 %r175, %r180
-%r182 = zext i96 %r181 to i128
-%r184 = getelementptr i32, i32* %r3, i32 3
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i128
-%r187 = shl i128 %r186, 96
-%r188 = or i128 %r182, %r187
-%r189 = zext i128 %r188 to i160
-%r191 = getelementptr i32, i32* %r3, i32 4
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i160
-%r194 = shl i160 %r193, 128
-%r195 = or i160 %r189, %r194
-%r196 = zext i160 %r195 to i192
-%r198 = getelementptr i32, i32* %r3, i32 5
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i192
-%r201 = shl i192 %r200, 160
-%r202 = or i192 %r196, %r201
-%r203 = zext i192 %r202 to i224
-%r205 = getelementptr i32, i32* %r3, i32 6
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i224
-%r208 = shl i224 %r207, 192
-%r209 = or i224 %r203, %r208
-%r210 = zext i224 %r209 to i256
-%r212 = getelementptr i32, i32* %r3, i32 7
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i256
-%r215 = shl i256 %r214, 224
-%r216 = or i256 %r210, %r215
-%r217 = zext i256 %r216 to i288
-%r219 = getelementptr i32, i32* %r3, i32 8
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i288
-%r222 = shl i288 %r221, 256
-%r223 = or i288 %r217, %r222
-%r224 = zext i288 %r223 to i320
-%r226 = getelementptr i32, i32* %r3, i32 9
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i320
-%r229 = shl i320 %r228, 288
-%r230 = or i320 %r224, %r229
-%r231 = zext i320 %r230 to i352
-%r233 = getelementptr i32, i32* %r3, i32 10
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i352
-%r236 = shl i352 %r235, 320
-%r237 = or i352 %r231, %r236
-%r238 = zext i352 %r237 to i384
-%r240 = getelementptr i32, i32* %r3, i32 11
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i384
-%r243 = shl i384 %r242, 352
-%r244 = or i384 %r238, %r243
-%r245 = zext i384 %r244 to i416
-%r247 = getelementptr i32, i32* %r3, i32 12
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i416
-%r250 = shl i416 %r249, 384
-%r251 = or i416 %r245, %r250
-%r252 = zext i416 %r251 to i448
-%r254 = getelementptr i32, i32* %r3, i32 13
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i448
-%r257 = shl i448 %r256, 416
-%r258 = or i448 %r252, %r257
-%r259 = zext i448 %r258 to i480
-%r261 = getelementptr i32, i32* %r3, i32 14
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i480
-%r264 = shl i480 %r263, 448
-%r265 = or i480 %r259, %r264
-%r266 = zext i480 %r265 to i512
-%r268 = getelementptr i32, i32* %r3, i32 15
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i512
-%r271 = shl i512 %r270, 480
-%r272 = or i512 %r266, %r271
-%r273 = zext i512 %r272 to i544
-%r275 = getelementptr i32, i32* %r3, i32 16
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i544
-%r278 = shl i544 %r277, 512
-%r279 = or i544 %r273, %r278
-%r280 = zext i544 %r279 to i576
-%r282 = getelementptr i32, i32* %r3, i32 17
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i576
-%r285 = shl i576 %r284, 544
-%r286 = or i576 %r280, %r285
-%r287 = zext i576 %r286 to i608
-%r289 = getelementptr i32, i32* %r3, i32 18
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i608
-%r292 = shl i608 %r291, 576
-%r293 = or i608 %r287, %r292
-%r294 = zext i608 %r293 to i640
-%r296 = getelementptr i32, i32* %r3, i32 19
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i640
-%r299 = shl i640 %r298, 608
-%r300 = or i640 %r294, %r299
-%r301 = zext i640 %r300 to i672
-%r303 = getelementptr i32, i32* %r3, i32 20
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i672
-%r306 = shl i672 %r305, 640
-%r307 = or i672 %r301, %r306
-%r308 = zext i672 %r307 to i704
-%r310 = getelementptr i32, i32* %r3, i32 21
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i704
-%r313 = shl i704 %r312, 672
-%r314 = or i704 %r308, %r313
-%r315 = zext i704 %r314 to i736
-%r317 = getelementptr i32, i32* %r3, i32 22
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i736
-%r320 = shl i736 %r319, 704
-%r321 = or i736 %r315, %r320
-%r322 = zext i736 %r321 to i768
-%r324 = getelementptr i32, i32* %r3, i32 23
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i768
-%r327 = shl i768 %r326, 736
-%r328 = or i768 %r322, %r327
-%r329 = zext i768 %r166 to i800
-%r330 = zext i768 %r328 to i800
-%r331 = sub i800 %r329, %r330
-%r332 = trunc i800 %r331 to i384
-%r333 = trunc i384 %r332 to i32
-%r335 = getelementptr i32, i32* %r1, i32 0
-store i32 %r333, i32* %r335
-%r336 = lshr i384 %r332, 32
-%r337 = trunc i384 %r336 to i32
-%r339 = getelementptr i32, i32* %r1, i32 1
-store i32 %r337, i32* %r339
-%r340 = lshr i384 %r336, 32
-%r341 = trunc i384 %r340 to i32
-%r343 = getelementptr i32, i32* %r1, i32 2
-store i32 %r341, i32* %r343
-%r344 = lshr i384 %r340, 32
-%r345 = trunc i384 %r344 to i32
-%r347 = getelementptr i32, i32* %r1, i32 3
-store i32 %r345, i32* %r347
-%r348 = lshr i384 %r344, 32
-%r349 = trunc i384 %r348 to i32
-%r351 = getelementptr i32, i32* %r1, i32 4
-store i32 %r349, i32* %r351
-%r352 = lshr i384 %r348, 32
-%r353 = trunc i384 %r352 to i32
-%r355 = getelementptr i32, i32* %r1, i32 5
-store i32 %r353, i32* %r355
-%r356 = lshr i384 %r352, 32
-%r357 = trunc i384 %r356 to i32
-%r359 = getelementptr i32, i32* %r1, i32 6
-store i32 %r357, i32* %r359
-%r360 = lshr i384 %r356, 32
-%r361 = trunc i384 %r360 to i32
-%r363 = getelementptr i32, i32* %r1, i32 7
-store i32 %r361, i32* %r363
-%r364 = lshr i384 %r360, 32
-%r365 = trunc i384 %r364 to i32
-%r367 = getelementptr i32, i32* %r1, i32 8
-store i32 %r365, i32* %r367
-%r368 = lshr i384 %r364, 32
-%r369 = trunc i384 %r368 to i32
-%r371 = getelementptr i32, i32* %r1, i32 9
-store i32 %r369, i32* %r371
-%r372 = lshr i384 %r368, 32
-%r373 = trunc i384 %r372 to i32
-%r375 = getelementptr i32, i32* %r1, i32 10
-store i32 %r373, i32* %r375
-%r376 = lshr i384 %r372, 32
-%r377 = trunc i384 %r376 to i32
-%r379 = getelementptr i32, i32* %r1, i32 11
-store i32 %r377, i32* %r379
-%r380 = lshr i800 %r331, 384
-%r381 = trunc i800 %r380 to i384
-%r382 = lshr i800 %r331, 768
-%r383 = trunc i800 %r382 to i1
-%r384 = load i32, i32* %r4
-%r385 = zext i32 %r384 to i64
-%r387 = getelementptr i32, i32* %r4, i32 1
-%r388 = load i32, i32* %r387
-%r389 = zext i32 %r388 to i64
-%r390 = shl i64 %r389, 32
-%r391 = or i64 %r385, %r390
-%r392 = zext i64 %r391 to i96
-%r394 = getelementptr i32, i32* %r4, i32 2
-%r395 = load i32, i32* %r394
-%r396 = zext i32 %r395 to i96
-%r397 = shl i96 %r396, 64
-%r398 = or i96 %r392, %r397
-%r399 = zext i96 %r398 to i128
-%r401 = getelementptr i32, i32* %r4, i32 3
-%r402 = load i32, i32* %r401
-%r403 = zext i32 %r402 to i128
-%r404 = shl i128 %r403, 96
-%r405 = or i128 %r399, %r404
-%r406 = zext i128 %r405 to i160
-%r408 = getelementptr i32, i32* %r4, i32 4
-%r409 = load i32, i32* %r408
-%r410 = zext i32 %r409 to i160
-%r411 = shl i160 %r410, 128
-%r412 = or i160 %r406, %r411
-%r413 = zext i160 %r412 to i192
-%r415 = getelementptr i32, i32* %r4, i32 5
-%r416 = load i32, i32* %r415
-%r417 = zext i32 %r416 to i192
-%r418 = shl i192 %r417, 160
-%r419 = or i192 %r413, %r418
-%r420 = zext i192 %r419 to i224
-%r422 = getelementptr i32, i32* %r4, i32 6
-%r423 = load i32, i32* %r422
-%r424 = zext i32 %r423 to i224
-%r425 = shl i224 %r424, 192
-%r426 = or i224 %r420, %r425
-%r427 = zext i224 %r426 to i256
-%r429 = getelementptr i32, i32* %r4, i32 7
-%r430 = load i32, i32* %r429
-%r431 = zext i32 %r430 to i256
-%r432 = shl i256 %r431, 224
-%r433 = or i256 %r427, %r432
-%r434 = zext i256 %r433 to i288
-%r436 = getelementptr i32, i32* %r4, i32 8
-%r437 = load i32, i32* %r436
-%r438 = zext i32 %r437 to i288
-%r439 = shl i288 %r438, 256
-%r440 = or i288 %r434, %r439
-%r441 = zext i288 %r440 to i320
-%r443 = getelementptr i32, i32* %r4, i32 9
-%r444 = load i32, i32* %r443
-%r445 = zext i32 %r444 to i320
-%r446 = shl i320 %r445, 288
-%r447 = or i320 %r441, %r446
-%r448 = zext i320 %r447 to i352
-%r450 = getelementptr i32, i32* %r4, i32 10
-%r451 = load i32, i32* %r450
-%r452 = zext i32 %r451 to i352
-%r453 = shl i352 %r452, 320
-%r454 = or i352 %r448, %r453
-%r455 = zext i352 %r454 to i384
-%r457 = getelementptr i32, i32* %r4, i32 11
-%r458 = load i32, i32* %r457
-%r459 = zext i32 %r458 to i384
-%r460 = shl i384 %r459, 352
-%r461 = or i384 %r455, %r460
-%r463 = select i1 %r383, i384 %r461, i384 0
-%r464 = add i384 %r381, %r463
-%r466 = getelementptr i32, i32* %r1, i32 12
-%r467 = trunc i384 %r464 to i32
-%r469 = getelementptr i32, i32* %r466, i32 0
-store i32 %r467, i32* %r469
-%r470 = lshr i384 %r464, 32
-%r471 = trunc i384 %r470 to i32
-%r473 = getelementptr i32, i32* %r466, i32 1
-store i32 %r471, i32* %r473
-%r474 = lshr i384 %r470, 32
-%r475 = trunc i384 %r474 to i32
-%r477 = getelementptr i32, i32* %r466, i32 2
-store i32 %r475, i32* %r477
-%r478 = lshr i384 %r474, 32
-%r479 = trunc i384 %r478 to i32
-%r481 = getelementptr i32, i32* %r466, i32 3
-store i32 %r479, i32* %r481
-%r482 = lshr i384 %r478, 32
-%r483 = trunc i384 %r482 to i32
-%r485 = getelementptr i32, i32* %r466, i32 4
-store i32 %r483, i32* %r485
-%r486 = lshr i384 %r482, 32
-%r487 = trunc i384 %r486 to i32
-%r489 = getelementptr i32, i32* %r466, i32 5
-store i32 %r487, i32* %r489
-%r490 = lshr i384 %r486, 32
-%r491 = trunc i384 %r490 to i32
-%r493 = getelementptr i32, i32* %r466, i32 6
-store i32 %r491, i32* %r493
-%r494 = lshr i384 %r490, 32
-%r495 = trunc i384 %r494 to i32
-%r497 = getelementptr i32, i32* %r466, i32 7
-store i32 %r495, i32* %r497
-%r498 = lshr i384 %r494, 32
-%r499 = trunc i384 %r498 to i32
-%r501 = getelementptr i32, i32* %r466, i32 8
-store i32 %r499, i32* %r501
-%r502 = lshr i384 %r498, 32
-%r503 = trunc i384 %r502 to i32
-%r505 = getelementptr i32, i32* %r466, i32 9
-store i32 %r503, i32* %r505
-%r506 = lshr i384 %r502, 32
-%r507 = trunc i384 %r506 to i32
-%r509 = getelementptr i32, i32* %r466, i32 10
-store i32 %r507, i32* %r509
-%r510 = lshr i384 %r506, 32
-%r511 = trunc i384 %r510 to i32
-%r513 = getelementptr i32, i32* %r466, i32 11
-store i32 %r511, i32* %r513
-ret void
-}
-define i448 @mulPv416x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
-%r30 = trunc i64 %r29 to i32
-%r31 = call i32 @extractHigh32(i64 %r29)
-%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
-%r34 = trunc i64 %r33 to i32
-%r35 = call i32 @extractHigh32(i64 %r33)
-%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
-%r38 = trunc i64 %r37 to i32
-%r39 = call i32 @extractHigh32(i64 %r37)
-%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
-%r42 = trunc i64 %r41 to i32
-%r43 = call i32 @extractHigh32(i64 %r41)
-%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
-%r46 = trunc i64 %r45 to i32
-%r47 = call i32 @extractHigh32(i64 %r45)
-%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
-%r50 = trunc i64 %r49 to i32
-%r51 = call i32 @extractHigh32(i64 %r49)
-%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
-%r54 = trunc i64 %r53 to i32
-%r55 = call i32 @extractHigh32(i64 %r53)
-%r56 = zext i32 %r6 to i64
-%r57 = zext i32 %r10 to i64
-%r58 = shl i64 %r57, 32
-%r59 = or i64 %r56, %r58
-%r60 = zext i64 %r59 to i96
-%r61 = zext i32 %r14 to i96
-%r62 = shl i96 %r61, 64
-%r63 = or i96 %r60, %r62
-%r64 = zext i96 %r63 to i128
-%r65 = zext i32 %r18 to i128
-%r66 = shl i128 %r65, 96
-%r67 = or i128 %r64, %r66
-%r68 = zext i128 %r67 to i160
-%r69 = zext i32 %r22 to i160
-%r70 = shl i160 %r69, 128
-%r71 = or i160 %r68, %r70
-%r72 = zext i160 %r71 to i192
-%r73 = zext i32 %r26 to i192
-%r74 = shl i192 %r73, 160
-%r75 = or i192 %r72, %r74
-%r76 = zext i192 %r75 to i224
-%r77 = zext i32 %r30 to i224
-%r78 = shl i224 %r77, 192
-%r79 = or i224 %r76, %r78
-%r80 = zext i224 %r79 to i256
-%r81 = zext i32 %r34 to i256
-%r82 = shl i256 %r81, 224
-%r83 = or i256 %r80, %r82
-%r84 = zext i256 %r83 to i288
-%r85 = zext i32 %r38 to i288
-%r86 = shl i288 %r85, 256
-%r87 = or i288 %r84, %r86
-%r88 = zext i288 %r87 to i320
-%r89 = zext i32 %r42 to i320
-%r90 = shl i320 %r89, 288
-%r91 = or i320 %r88, %r90
-%r92 = zext i320 %r91 to i352
-%r93 = zext i32 %r46 to i352
-%r94 = shl i352 %r93, 320
-%r95 = or i352 %r92, %r94
-%r96 = zext i352 %r95 to i384
-%r97 = zext i32 %r50 to i384
-%r98 = shl i384 %r97, 352
-%r99 = or i384 %r96, %r98
-%r100 = zext i384 %r99 to i416
-%r101 = zext i32 %r54 to i416
-%r102 = shl i416 %r101, 384
-%r103 = or i416 %r100, %r102
-%r104 = zext i32 %r7 to i64
-%r105 = zext i32 %r11 to i64
-%r106 = shl i64 %r105, 32
-%r107 = or i64 %r104, %r106
-%r108 = zext i64 %r107 to i96
-%r109 = zext i32 %r15 to i96
-%r110 = shl i96 %r109, 64
-%r111 = or i96 %r108, %r110
-%r112 = zext i96 %r111 to i128
-%r113 = zext i32 %r19 to i128
-%r114 = shl i128 %r113, 96
-%r115 = or i128 %r112, %r114
-%r116 = zext i128 %r115 to i160
-%r117 = zext i32 %r23 to i160
-%r118 = shl i160 %r117, 128
-%r119 = or i160 %r116, %r118
-%r120 = zext i160 %r119 to i192
-%r121 = zext i32 %r27 to i192
-%r122 = shl i192 %r121, 160
-%r123 = or i192 %r120, %r122
-%r124 = zext i192 %r123 to i224
-%r125 = zext i32 %r31 to i224
-%r126 = shl i224 %r125, 192
-%r127 = or i224 %r124, %r126
-%r128 = zext i224 %r127 to i256
-%r129 = zext i32 %r35 to i256
-%r130 = shl i256 %r129, 224
-%r131 = or i256 %r128, %r130
-%r132 = zext i256 %r131 to i288
-%r133 = zext i32 %r39 to i288
-%r134 = shl i288 %r133, 256
-%r135 = or i288 %r132, %r134
-%r136 = zext i288 %r135 to i320
-%r137 = zext i32 %r43 to i320
-%r138 = shl i320 %r137, 288
-%r139 = or i320 %r136, %r138
-%r140 = zext i320 %r139 to i352
-%r141 = zext i32 %r47 to i352
-%r142 = shl i352 %r141, 320
-%r143 = or i352 %r140, %r142
-%r144 = zext i352 %r143 to i384
-%r145 = zext i32 %r51 to i384
-%r146 = shl i384 %r145, 352
-%r147 = or i384 %r144, %r146
-%r148 = zext i384 %r147 to i416
-%r149 = zext i32 %r55 to i416
-%r150 = shl i416 %r149, 384
-%r151 = or i416 %r148, %r150
-%r152 = zext i416 %r103 to i448
-%r153 = zext i416 %r151 to i448
-%r154 = shl i448 %r153, 32
-%r155 = add i448 %r152, %r154
-ret i448 %r155
-}
-define void @mcl_fp_mulUnitPre13L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i448 @mulPv416x32(i32* %r2, i32 %r3)
-%r5 = trunc i448 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i448 %r4, 32
-%r9 = trunc i448 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i448 %r8, 32
-%r13 = trunc i448 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i448 %r12, 32
-%r17 = trunc i448 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i448 %r16, 32
-%r21 = trunc i448 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i448 %r20, 32
-%r25 = trunc i448 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i448 %r24, 32
-%r29 = trunc i448 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i448 %r28, 32
-%r33 = trunc i448 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
-%r36 = lshr i448 %r32, 32
-%r37 = trunc i448 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 8
-store i32 %r37, i32* %r39
-%r40 = lshr i448 %r36, 32
-%r41 = trunc i448 %r40 to i32
-%r43 = getelementptr i32, i32* %r1, i32 9
-store i32 %r41, i32* %r43
-%r44 = lshr i448 %r40, 32
-%r45 = trunc i448 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 10
-store i32 %r45, i32* %r47
-%r48 = lshr i448 %r44, 32
-%r49 = trunc i448 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 11
-store i32 %r49, i32* %r51
-%r52 = lshr i448 %r48, 32
-%r53 = trunc i448 %r52 to i32
-%r55 = getelementptr i32, i32* %r1, i32 12
-store i32 %r53, i32* %r55
-%r56 = lshr i448 %r52, 32
-%r57 = trunc i448 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 13
-store i32 %r57, i32* %r59
-ret void
-}
-define void @mcl_fpDbl_mulPre13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r4 = load i32, i32* %r3
-%r5 = call i448 @mulPv416x32(i32* %r2, i32 %r4)
-%r6 = trunc i448 %r5 to i32
-store i32 %r6, i32* %r1
-%r7 = lshr i448 %r5, 32
-%r9 = getelementptr i32, i32* %r3, i32 1
-%r10 = load i32, i32* %r9
-%r11 = call i448 @mulPv416x32(i32* %r2, i32 %r10)
-%r12 = add i448 %r7, %r11
-%r13 = trunc i448 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 1
-store i32 %r13, i32* %r15
-%r16 = lshr i448 %r12, 32
-%r18 = getelementptr i32, i32* %r3, i32 2
-%r19 = load i32, i32* %r18
-%r20 = call i448 @mulPv416x32(i32* %r2, i32 %r19)
-%r21 = add i448 %r16, %r20
-%r22 = trunc i448 %r21 to i32
-%r24 = getelementptr i32, i32* %r1, i32 2
-store i32 %r22, i32* %r24
-%r25 = lshr i448 %r21, 32
-%r27 = getelementptr i32, i32* %r3, i32 3
-%r28 = load i32, i32* %r27
-%r29 = call i448 @mulPv416x32(i32* %r2, i32 %r28)
-%r30 = add i448 %r25, %r29
-%r31 = trunc i448 %r30 to i32
-%r33 = getelementptr i32, i32* %r1, i32 3
-store i32 %r31, i32* %r33
-%r34 = lshr i448 %r30, 32
-%r36 = getelementptr i32, i32* %r3, i32 4
-%r37 = load i32, i32* %r36
-%r38 = call i448 @mulPv416x32(i32* %r2, i32 %r37)
-%r39 = add i448 %r34, %r38
-%r40 = trunc i448 %r39 to i32
-%r42 = getelementptr i32, i32* %r1, i32 4
-store i32 %r40, i32* %r42
-%r43 = lshr i448 %r39, 32
-%r45 = getelementptr i32, i32* %r3, i32 5
-%r46 = load i32, i32* %r45
-%r47 = call i448 @mulPv416x32(i32* %r2, i32 %r46)
-%r48 = add i448 %r43, %r47
-%r49 = trunc i448 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 5
-store i32 %r49, i32* %r51
-%r52 = lshr i448 %r48, 32
-%r54 = getelementptr i32, i32* %r3, i32 6
-%r55 = load i32, i32* %r54
-%r56 = call i448 @mulPv416x32(i32* %r2, i32 %r55)
-%r57 = add i448 %r52, %r56
-%r58 = trunc i448 %r57 to i32
-%r60 = getelementptr i32, i32* %r1, i32 6
-store i32 %r58, i32* %r60
-%r61 = lshr i448 %r57, 32
-%r63 = getelementptr i32, i32* %r3, i32 7
-%r64 = load i32, i32* %r63
-%r65 = call i448 @mulPv416x32(i32* %r2, i32 %r64)
-%r66 = add i448 %r61, %r65
-%r67 = trunc i448 %r66 to i32
-%r69 = getelementptr i32, i32* %r1, i32 7
-store i32 %r67, i32* %r69
-%r70 = lshr i448 %r66, 32
-%r72 = getelementptr i32, i32* %r3, i32 8
-%r73 = load i32, i32* %r72
-%r74 = call i448 @mulPv416x32(i32* %r2, i32 %r73)
-%r75 = add i448 %r70, %r74
-%r76 = trunc i448 %r75 to i32
-%r78 = getelementptr i32, i32* %r1, i32 8
-store i32 %r76, i32* %r78
-%r79 = lshr i448 %r75, 32
-%r81 = getelementptr i32, i32* %r3, i32 9
-%r82 = load i32, i32* %r81
-%r83 = call i448 @mulPv416x32(i32* %r2, i32 %r82)
-%r84 = add i448 %r79, %r83
-%r85 = trunc i448 %r84 to i32
-%r87 = getelementptr i32, i32* %r1, i32 9
-store i32 %r85, i32* %r87
-%r88 = lshr i448 %r84, 32
-%r90 = getelementptr i32, i32* %r3, i32 10
-%r91 = load i32, i32* %r90
-%r92 = call i448 @mulPv416x32(i32* %r2, i32 %r91)
-%r93 = add i448 %r88, %r92
-%r94 = trunc i448 %r93 to i32
-%r96 = getelementptr i32, i32* %r1, i32 10
-store i32 %r94, i32* %r96
-%r97 = lshr i448 %r93, 32
-%r99 = getelementptr i32, i32* %r3, i32 11
-%r100 = load i32, i32* %r99
-%r101 = call i448 @mulPv416x32(i32* %r2, i32 %r100)
-%r102 = add i448 %r97, %r101
-%r103 = trunc i448 %r102 to i32
-%r105 = getelementptr i32, i32* %r1, i32 11
-store i32 %r103, i32* %r105
-%r106 = lshr i448 %r102, 32
-%r108 = getelementptr i32, i32* %r3, i32 12
-%r109 = load i32, i32* %r108
-%r110 = call i448 @mulPv416x32(i32* %r2, i32 %r109)
-%r111 = add i448 %r106, %r110
-%r113 = getelementptr i32, i32* %r1, i32 12
-%r114 = trunc i448 %r111 to i32
-%r116 = getelementptr i32, i32* %r113, i32 0
-store i32 %r114, i32* %r116
-%r117 = lshr i448 %r111, 32
-%r118 = trunc i448 %r117 to i32
-%r120 = getelementptr i32, i32* %r113, i32 1
-store i32 %r118, i32* %r120
-%r121 = lshr i448 %r117, 32
-%r122 = trunc i448 %r121 to i32
-%r124 = getelementptr i32, i32* %r113, i32 2
-store i32 %r122, i32* %r124
-%r125 = lshr i448 %r121, 32
-%r126 = trunc i448 %r125 to i32
-%r128 = getelementptr i32, i32* %r113, i32 3
-store i32 %r126, i32* %r128
-%r129 = lshr i448 %r125, 32
-%r130 = trunc i448 %r129 to i32
-%r132 = getelementptr i32, i32* %r113, i32 4
-store i32 %r130, i32* %r132
-%r133 = lshr i448 %r129, 32
-%r134 = trunc i448 %r133 to i32
-%r136 = getelementptr i32, i32* %r113, i32 5
-store i32 %r134, i32* %r136
-%r137 = lshr i448 %r133, 32
-%r138 = trunc i448 %r137 to i32
-%r140 = getelementptr i32, i32* %r113, i32 6
-store i32 %r138, i32* %r140
-%r141 = lshr i448 %r137, 32
-%r142 = trunc i448 %r141 to i32
-%r144 = getelementptr i32, i32* %r113, i32 7
-store i32 %r142, i32* %r144
-%r145 = lshr i448 %r141, 32
-%r146 = trunc i448 %r145 to i32
-%r148 = getelementptr i32, i32* %r113, i32 8
-store i32 %r146, i32* %r148
-%r149 = lshr i448 %r145, 32
-%r150 = trunc i448 %r149 to i32
-%r152 = getelementptr i32, i32* %r113, i32 9
-store i32 %r150, i32* %r152
-%r153 = lshr i448 %r149, 32
-%r154 = trunc i448 %r153 to i32
-%r156 = getelementptr i32, i32* %r113, i32 10
-store i32 %r154, i32* %r156
-%r157 = lshr i448 %r153, 32
-%r158 = trunc i448 %r157 to i32
-%r160 = getelementptr i32, i32* %r113, i32 11
-store i32 %r158, i32* %r160
-%r161 = lshr i448 %r157, 32
-%r162 = trunc i448 %r161 to i32
-%r164 = getelementptr i32, i32* %r113, i32 12
-store i32 %r162, i32* %r164
-%r165 = lshr i448 %r161, 32
-%r166 = trunc i448 %r165 to i32
-%r168 = getelementptr i32, i32* %r113, i32 13
-store i32 %r166, i32* %r168
-ret void
-}
-define void @mcl_fpDbl_sqrPre13L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = call i448 @mulPv416x32(i32* %r2, i32 %r3)
-%r5 = trunc i448 %r4 to i32
-store i32 %r5, i32* %r1
-%r6 = lshr i448 %r4, 32
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = call i448 @mulPv416x32(i32* %r2, i32 %r9)
-%r11 = add i448 %r6, %r10
-%r12 = trunc i448 %r11 to i32
-%r14 = getelementptr i32, i32* %r1, i32 1
-store i32 %r12, i32* %r14
-%r15 = lshr i448 %r11, 32
-%r17 = getelementptr i32, i32* %r2, i32 2
-%r18 = load i32, i32* %r17
-%r19 = call i448 @mulPv416x32(i32* %r2, i32 %r18)
-%r20 = add i448 %r15, %r19
-%r21 = trunc i448 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 2
-store i32 %r21, i32* %r23
-%r24 = lshr i448 %r20, 32
-%r26 = getelementptr i32, i32* %r2, i32 3
-%r27 = load i32, i32* %r26
-%r28 = call i448 @mulPv416x32(i32* %r2, i32 %r27)
-%r29 = add i448 %r24, %r28
-%r30 = trunc i448 %r29 to i32
-%r32 = getelementptr i32, i32* %r1, i32 3
-store i32 %r30, i32* %r32
-%r33 = lshr i448 %r29, 32
-%r35 = getelementptr i32, i32* %r2, i32 4
-%r36 = load i32, i32* %r35
-%r37 = call i448 @mulPv416x32(i32* %r2, i32 %r36)
-%r38 = add i448 %r33, %r37
-%r39 = trunc i448 %r38 to i32
-%r41 = getelementptr i32, i32* %r1, i32 4
-store i32 %r39, i32* %r41
-%r42 = lshr i448 %r38, 32
-%r44 = getelementptr i32, i32* %r2, i32 5
-%r45 = load i32, i32* %r44
-%r46 = call i448 @mulPv416x32(i32* %r2, i32 %r45)
-%r47 = add i448 %r42, %r46
-%r48 = trunc i448 %r47 to i32
-%r50 = getelementptr i32, i32* %r1, i32 5
-store i32 %r48, i32* %r50
-%r51 = lshr i448 %r47, 32
-%r53 = getelementptr i32, i32* %r2, i32 6
-%r54 = load i32, i32* %r53
-%r55 = call i448 @mulPv416x32(i32* %r2, i32 %r54)
-%r56 = add i448 %r51, %r55
-%r57 = trunc i448 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 6
-store i32 %r57, i32* %r59
-%r60 = lshr i448 %r56, 32
-%r62 = getelementptr i32, i32* %r2, i32 7
-%r63 = load i32, i32* %r62
-%r64 = call i448 @mulPv416x32(i32* %r2, i32 %r63)
-%r65 = add i448 %r60, %r64
-%r66 = trunc i448 %r65 to i32
-%r68 = getelementptr i32, i32* %r1, i32 7
-store i32 %r66, i32* %r68
-%r69 = lshr i448 %r65, 32
-%r71 = getelementptr i32, i32* %r2, i32 8
-%r72 = load i32, i32* %r71
-%r73 = call i448 @mulPv416x32(i32* %r2, i32 %r72)
-%r74 = add i448 %r69, %r73
-%r75 = trunc i448 %r74 to i32
-%r77 = getelementptr i32, i32* %r1, i32 8
-store i32 %r75, i32* %r77
-%r78 = lshr i448 %r74, 32
-%r80 = getelementptr i32, i32* %r2, i32 9
-%r81 = load i32, i32* %r80
-%r82 = call i448 @mulPv416x32(i32* %r2, i32 %r81)
-%r83 = add i448 %r78, %r82
-%r84 = trunc i448 %r83 to i32
-%r86 = getelementptr i32, i32* %r1, i32 9
-store i32 %r84, i32* %r86
-%r87 = lshr i448 %r83, 32
-%r89 = getelementptr i32, i32* %r2, i32 10
-%r90 = load i32, i32* %r89
-%r91 = call i448 @mulPv416x32(i32* %r2, i32 %r90)
-%r92 = add i448 %r87, %r91
-%r93 = trunc i448 %r92 to i32
-%r95 = getelementptr i32, i32* %r1, i32 10
-store i32 %r93, i32* %r95
-%r96 = lshr i448 %r92, 32
-%r98 = getelementptr i32, i32* %r2, i32 11
-%r99 = load i32, i32* %r98
-%r100 = call i448 @mulPv416x32(i32* %r2, i32 %r99)
-%r101 = add i448 %r96, %r100
-%r102 = trunc i448 %r101 to i32
-%r104 = getelementptr i32, i32* %r1, i32 11
-store i32 %r102, i32* %r104
-%r105 = lshr i448 %r101, 32
-%r107 = getelementptr i32, i32* %r2, i32 12
-%r108 = load i32, i32* %r107
-%r109 = call i448 @mulPv416x32(i32* %r2, i32 %r108)
-%r110 = add i448 %r105, %r109
-%r112 = getelementptr i32, i32* %r1, i32 12
-%r113 = trunc i448 %r110 to i32
-%r115 = getelementptr i32, i32* %r112, i32 0
-store i32 %r113, i32* %r115
-%r116 = lshr i448 %r110, 32
-%r117 = trunc i448 %r116 to i32
-%r119 = getelementptr i32, i32* %r112, i32 1
-store i32 %r117, i32* %r119
-%r120 = lshr i448 %r116, 32
-%r121 = trunc i448 %r120 to i32
-%r123 = getelementptr i32, i32* %r112, i32 2
-store i32 %r121, i32* %r123
-%r124 = lshr i448 %r120, 32
-%r125 = trunc i448 %r124 to i32
-%r127 = getelementptr i32, i32* %r112, i32 3
-store i32 %r125, i32* %r127
-%r128 = lshr i448 %r124, 32
-%r129 = trunc i448 %r128 to i32
-%r131 = getelementptr i32, i32* %r112, i32 4
-store i32 %r129, i32* %r131
-%r132 = lshr i448 %r128, 32
-%r133 = trunc i448 %r132 to i32
-%r135 = getelementptr i32, i32* %r112, i32 5
-store i32 %r133, i32* %r135
-%r136 = lshr i448 %r132, 32
-%r137 = trunc i448 %r136 to i32
-%r139 = getelementptr i32, i32* %r112, i32 6
-store i32 %r137, i32* %r139
-%r140 = lshr i448 %r136, 32
-%r141 = trunc i448 %r140 to i32
-%r143 = getelementptr i32, i32* %r112, i32 7
-store i32 %r141, i32* %r143
-%r144 = lshr i448 %r140, 32
-%r145 = trunc i448 %r144 to i32
-%r147 = getelementptr i32, i32* %r112, i32 8
-store i32 %r145, i32* %r147
-%r148 = lshr i448 %r144, 32
-%r149 = trunc i448 %r148 to i32
-%r151 = getelementptr i32, i32* %r112, i32 9
-store i32 %r149, i32* %r151
-%r152 = lshr i448 %r148, 32
-%r153 = trunc i448 %r152 to i32
-%r155 = getelementptr i32, i32* %r112, i32 10
-store i32 %r153, i32* %r155
-%r156 = lshr i448 %r152, 32
-%r157 = trunc i448 %r156 to i32
-%r159 = getelementptr i32, i32* %r112, i32 11
-store i32 %r157, i32* %r159
-%r160 = lshr i448 %r156, 32
-%r161 = trunc i448 %r160 to i32
-%r163 = getelementptr i32, i32* %r112, i32 12
-store i32 %r161, i32* %r163
-%r164 = lshr i448 %r160, 32
-%r165 = trunc i448 %r164 to i32
-%r167 = getelementptr i32, i32* %r112, i32 13
-store i32 %r165, i32* %r167
-ret void
-}
-define void @mcl_fp_mont13L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i448 @mulPv416x32(i32* %r2, i32 %r10)
-%r12 = zext i448 %r11 to i480
-%r13 = trunc i448 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i448 @mulPv416x32(i32* %r4, i32 %r14)
-%r16 = zext i448 %r15 to i480
-%r17 = add i480 %r12, %r16
-%r18 = lshr i480 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i448 @mulPv416x32(i32* %r2, i32 %r21)
-%r23 = zext i448 %r22 to i480
-%r24 = add i480 %r18, %r23
-%r25 = trunc i480 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i448 @mulPv416x32(i32* %r4, i32 %r26)
-%r28 = zext i448 %r27 to i480
-%r29 = add i480 %r24, %r28
-%r30 = lshr i480 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i448 @mulPv416x32(i32* %r2, i32 %r33)
-%r35 = zext i448 %r34 to i480
-%r36 = add i480 %r30, %r35
-%r37 = trunc i480 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i448 @mulPv416x32(i32* %r4, i32 %r38)
-%r40 = zext i448 %r39 to i480
-%r41 = add i480 %r36, %r40
-%r42 = lshr i480 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i448 @mulPv416x32(i32* %r2, i32 %r45)
-%r47 = zext i448 %r46 to i480
-%r48 = add i480 %r42, %r47
-%r49 = trunc i480 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i448 @mulPv416x32(i32* %r4, i32 %r50)
-%r52 = zext i448 %r51 to i480
-%r53 = add i480 %r48, %r52
-%r54 = lshr i480 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i448 @mulPv416x32(i32* %r2, i32 %r57)
-%r59 = zext i448 %r58 to i480
-%r60 = add i480 %r54, %r59
-%r61 = trunc i480 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i448 @mulPv416x32(i32* %r4, i32 %r62)
-%r64 = zext i448 %r63 to i480
-%r65 = add i480 %r60, %r64
-%r66 = lshr i480 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i448 @mulPv416x32(i32* %r2, i32 %r69)
-%r71 = zext i448 %r70 to i480
-%r72 = add i480 %r66, %r71
-%r73 = trunc i480 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i448 @mulPv416x32(i32* %r4, i32 %r74)
-%r76 = zext i448 %r75 to i480
-%r77 = add i480 %r72, %r76
-%r78 = lshr i480 %r77, 32
-%r80 = getelementptr i32, i32* %r3, i32 6
-%r81 = load i32, i32* %r80
-%r82 = call i448 @mulPv416x32(i32* %r2, i32 %r81)
-%r83 = zext i448 %r82 to i480
-%r84 = add i480 %r78, %r83
-%r85 = trunc i480 %r84 to i32
-%r86 = mul i32 %r85, %r7
-%r87 = call i448 @mulPv416x32(i32* %r4, i32 %r86)
-%r88 = zext i448 %r87 to i480
-%r89 = add i480 %r84, %r88
-%r90 = lshr i480 %r89, 32
-%r92 = getelementptr i32, i32* %r3, i32 7
-%r93 = load i32, i32* %r92
-%r94 = call i448 @mulPv416x32(i32* %r2, i32 %r93)
-%r95 = zext i448 %r94 to i480
-%r96 = add i480 %r90, %r95
-%r97 = trunc i480 %r96 to i32
-%r98 = mul i32 %r97, %r7
-%r99 = call i448 @mulPv416x32(i32* %r4, i32 %r98)
-%r100 = zext i448 %r99 to i480
-%r101 = add i480 %r96, %r100
-%r102 = lshr i480 %r101, 32
-%r104 = getelementptr i32, i32* %r3, i32 8
-%r105 = load i32, i32* %r104
-%r106 = call i448 @mulPv416x32(i32* %r2, i32 %r105)
-%r107 = zext i448 %r106 to i480
-%r108 = add i480 %r102, %r107
-%r109 = trunc i480 %r108 to i32
-%r110 = mul i32 %r109, %r7
-%r111 = call i448 @mulPv416x32(i32* %r4, i32 %r110)
-%r112 = zext i448 %r111 to i480
-%r113 = add i480 %r108, %r112
-%r114 = lshr i480 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 9
-%r117 = load i32, i32* %r116
-%r118 = call i448 @mulPv416x32(i32* %r2, i32 %r117)
-%r119 = zext i448 %r118 to i480
-%r120 = add i480 %r114, %r119
-%r121 = trunc i480 %r120 to i32
-%r122 = mul i32 %r121, %r7
-%r123 = call i448 @mulPv416x32(i32* %r4, i32 %r122)
-%r124 = zext i448 %r123 to i480
-%r125 = add i480 %r120, %r124
-%r126 = lshr i480 %r125, 32
-%r128 = getelementptr i32, i32* %r3, i32 10
-%r129 = load i32, i32* %r128
-%r130 = call i448 @mulPv416x32(i32* %r2, i32 %r129)
-%r131 = zext i448 %r130 to i480
-%r132 = add i480 %r126, %r131
-%r133 = trunc i480 %r132 to i32
-%r134 = mul i32 %r133, %r7
-%r135 = call i448 @mulPv416x32(i32* %r4, i32 %r134)
-%r136 = zext i448 %r135 to i480
-%r137 = add i480 %r132, %r136
-%r138 = lshr i480 %r137, 32
-%r140 = getelementptr i32, i32* %r3, i32 11
-%r141 = load i32, i32* %r140
-%r142 = call i448 @mulPv416x32(i32* %r2, i32 %r141)
-%r143 = zext i448 %r142 to i480
-%r144 = add i480 %r138, %r143
-%r145 = trunc i480 %r144 to i32
-%r146 = mul i32 %r145, %r7
-%r147 = call i448 @mulPv416x32(i32* %r4, i32 %r146)
-%r148 = zext i448 %r147 to i480
-%r149 = add i480 %r144, %r148
-%r150 = lshr i480 %r149, 32
-%r152 = getelementptr i32, i32* %r3, i32 12
-%r153 = load i32, i32* %r152
-%r154 = call i448 @mulPv416x32(i32* %r2, i32 %r153)
-%r155 = zext i448 %r154 to i480
-%r156 = add i480 %r150, %r155
-%r157 = trunc i480 %r156 to i32
-%r158 = mul i32 %r157, %r7
-%r159 = call i448 @mulPv416x32(i32* %r4, i32 %r158)
-%r160 = zext i448 %r159 to i480
-%r161 = add i480 %r156, %r160
-%r162 = lshr i480 %r161, 32
-%r163 = trunc i480 %r162 to i448
-%r164 = load i32, i32* %r4
-%r165 = zext i32 %r164 to i64
-%r167 = getelementptr i32, i32* %r4, i32 1
-%r168 = load i32, i32* %r167
-%r169 = zext i32 %r168 to i64
-%r170 = shl i64 %r169, 32
-%r171 = or i64 %r165, %r170
-%r172 = zext i64 %r171 to i96
-%r174 = getelementptr i32, i32* %r4, i32 2
-%r175 = load i32, i32* %r174
-%r176 = zext i32 %r175 to i96
-%r177 = shl i96 %r176, 64
-%r178 = or i96 %r172, %r177
-%r179 = zext i96 %r178 to i128
-%r181 = getelementptr i32, i32* %r4, i32 3
-%r182 = load i32, i32* %r181
-%r183 = zext i32 %r182 to i128
-%r184 = shl i128 %r183, 96
-%r185 = or i128 %r179, %r184
-%r186 = zext i128 %r185 to i160
-%r188 = getelementptr i32, i32* %r4, i32 4
-%r189 = load i32, i32* %r188
-%r190 = zext i32 %r189 to i160
-%r191 = shl i160 %r190, 128
-%r192 = or i160 %r186, %r191
-%r193 = zext i160 %r192 to i192
-%r195 = getelementptr i32, i32* %r4, i32 5
-%r196 = load i32, i32* %r195
-%r197 = zext i32 %r196 to i192
-%r198 = shl i192 %r197, 160
-%r199 = or i192 %r193, %r198
-%r200 = zext i192 %r199 to i224
-%r202 = getelementptr i32, i32* %r4, i32 6
-%r203 = load i32, i32* %r202
-%r204 = zext i32 %r203 to i224
-%r205 = shl i224 %r204, 192
-%r206 = or i224 %r200, %r205
-%r207 = zext i224 %r206 to i256
-%r209 = getelementptr i32, i32* %r4, i32 7
-%r210 = load i32, i32* %r209
-%r211 = zext i32 %r210 to i256
-%r212 = shl i256 %r211, 224
-%r213 = or i256 %r207, %r212
-%r214 = zext i256 %r213 to i288
-%r216 = getelementptr i32, i32* %r4, i32 8
-%r217 = load i32, i32* %r216
-%r218 = zext i32 %r217 to i288
-%r219 = shl i288 %r218, 256
-%r220 = or i288 %r214, %r219
-%r221 = zext i288 %r220 to i320
-%r223 = getelementptr i32, i32* %r4, i32 9
-%r224 = load i32, i32* %r223
-%r225 = zext i32 %r224 to i320
-%r226 = shl i320 %r225, 288
-%r227 = or i320 %r221, %r226
-%r228 = zext i320 %r227 to i352
-%r230 = getelementptr i32, i32* %r4, i32 10
-%r231 = load i32, i32* %r230
-%r232 = zext i32 %r231 to i352
-%r233 = shl i352 %r232, 320
-%r234 = or i352 %r228, %r233
-%r235 = zext i352 %r234 to i384
-%r237 = getelementptr i32, i32* %r4, i32 11
-%r238 = load i32, i32* %r237
-%r239 = zext i32 %r238 to i384
-%r240 = shl i384 %r239, 352
-%r241 = or i384 %r235, %r240
-%r242 = zext i384 %r241 to i416
-%r244 = getelementptr i32, i32* %r4, i32 12
-%r245 = load i32, i32* %r244
-%r246 = zext i32 %r245 to i416
-%r247 = shl i416 %r246, 384
-%r248 = or i416 %r242, %r247
-%r249 = zext i416 %r248 to i448
-%r250 = sub i448 %r163, %r249
-%r251 = lshr i448 %r250, 416
-%r252 = trunc i448 %r251 to i1
-%r253 = select i1 %r252, i448 %r163, i448 %r250
-%r254 = trunc i448 %r253 to i416
-%r255 = trunc i416 %r254 to i32
-%r257 = getelementptr i32, i32* %r1, i32 0
-store i32 %r255, i32* %r257
-%r258 = lshr i416 %r254, 32
-%r259 = trunc i416 %r258 to i32
-%r261 = getelementptr i32, i32* %r1, i32 1
-store i32 %r259, i32* %r261
-%r262 = lshr i416 %r258, 32
-%r263 = trunc i416 %r262 to i32
-%r265 = getelementptr i32, i32* %r1, i32 2
-store i32 %r263, i32* %r265
-%r266 = lshr i416 %r262, 32
-%r267 = trunc i416 %r266 to i32
-%r269 = getelementptr i32, i32* %r1, i32 3
-store i32 %r267, i32* %r269
-%r270 = lshr i416 %r266, 32
-%r271 = trunc i416 %r270 to i32
-%r273 = getelementptr i32, i32* %r1, i32 4
-store i32 %r271, i32* %r273
-%r274 = lshr i416 %r270, 32
-%r275 = trunc i416 %r274 to i32
-%r277 = getelementptr i32, i32* %r1, i32 5
-store i32 %r275, i32* %r277
-%r278 = lshr i416 %r274, 32
-%r279 = trunc i416 %r278 to i32
-%r281 = getelementptr i32, i32* %r1, i32 6
-store i32 %r279, i32* %r281
-%r282 = lshr i416 %r278, 32
-%r283 = trunc i416 %r282 to i32
-%r285 = getelementptr i32, i32* %r1, i32 7
-store i32 %r283, i32* %r285
-%r286 = lshr i416 %r282, 32
-%r287 = trunc i416 %r286 to i32
-%r289 = getelementptr i32, i32* %r1, i32 8
-store i32 %r287, i32* %r289
-%r290 = lshr i416 %r286, 32
-%r291 = trunc i416 %r290 to i32
-%r293 = getelementptr i32, i32* %r1, i32 9
-store i32 %r291, i32* %r293
-%r294 = lshr i416 %r290, 32
-%r295 = trunc i416 %r294 to i32
-%r297 = getelementptr i32, i32* %r1, i32 10
-store i32 %r295, i32* %r297
-%r298 = lshr i416 %r294, 32
-%r299 = trunc i416 %r298 to i32
-%r301 = getelementptr i32, i32* %r1, i32 11
-store i32 %r299, i32* %r301
-%r302 = lshr i416 %r298, 32
-%r303 = trunc i416 %r302 to i32
-%r305 = getelementptr i32, i32* %r1, i32 12
-store i32 %r303, i32* %r305
-ret void
-}
-define void @mcl_fp_montNF13L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i448 @mulPv416x32(i32* %r2, i32 %r8)
-%r10 = trunc i448 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i448 @mulPv416x32(i32* %r4, i32 %r11)
-%r13 = add i448 %r9, %r12
-%r14 = lshr i448 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i448 @mulPv416x32(i32* %r2, i32 %r17)
-%r19 = add i448 %r14, %r18
-%r20 = trunc i448 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i448 @mulPv416x32(i32* %r4, i32 %r21)
-%r23 = add i448 %r19, %r22
-%r24 = lshr i448 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i448 @mulPv416x32(i32* %r2, i32 %r27)
-%r29 = add i448 %r24, %r28
-%r30 = trunc i448 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i448 @mulPv416x32(i32* %r4, i32 %r31)
-%r33 = add i448 %r29, %r32
-%r34 = lshr i448 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i448 @mulPv416x32(i32* %r2, i32 %r37)
-%r39 = add i448 %r34, %r38
-%r40 = trunc i448 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i448 @mulPv416x32(i32* %r4, i32 %r41)
-%r43 = add i448 %r39, %r42
-%r44 = lshr i448 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i448 @mulPv416x32(i32* %r2, i32 %r47)
-%r49 = add i448 %r44, %r48
-%r50 = trunc i448 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i448 @mulPv416x32(i32* %r4, i32 %r51)
-%r53 = add i448 %r49, %r52
-%r54 = lshr i448 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i448 @mulPv416x32(i32* %r2, i32 %r57)
-%r59 = add i448 %r54, %r58
-%r60 = trunc i448 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i448 @mulPv416x32(i32* %r4, i32 %r61)
-%r63 = add i448 %r59, %r62
-%r64 = lshr i448 %r63, 32
-%r66 = getelementptr i32, i32* %r3, i32 6
-%r67 = load i32, i32* %r66
-%r68 = call i448 @mulPv416x32(i32* %r2, i32 %r67)
-%r69 = add i448 %r64, %r68
-%r70 = trunc i448 %r69 to i32
-%r71 = mul i32 %r70, %r7
-%r72 = call i448 @mulPv416x32(i32* %r4, i32 %r71)
-%r73 = add i448 %r69, %r72
-%r74 = lshr i448 %r73, 32
-%r76 = getelementptr i32, i32* %r3, i32 7
-%r77 = load i32, i32* %r76
-%r78 = call i448 @mulPv416x32(i32* %r2, i32 %r77)
-%r79 = add i448 %r74, %r78
-%r80 = trunc i448 %r79 to i32
-%r81 = mul i32 %r80, %r7
-%r82 = call i448 @mulPv416x32(i32* %r4, i32 %r81)
-%r83 = add i448 %r79, %r82
-%r84 = lshr i448 %r83, 32
-%r86 = getelementptr i32, i32* %r3, i32 8
-%r87 = load i32, i32* %r86
-%r88 = call i448 @mulPv416x32(i32* %r2, i32 %r87)
-%r89 = add i448 %r84, %r88
-%r90 = trunc i448 %r89 to i32
-%r91 = mul i32 %r90, %r7
-%r92 = call i448 @mulPv416x32(i32* %r4, i32 %r91)
-%r93 = add i448 %r89, %r92
-%r94 = lshr i448 %r93, 32
-%r96 = getelementptr i32, i32* %r3, i32 9
-%r97 = load i32, i32* %r96
-%r98 = call i448 @mulPv416x32(i32* %r2, i32 %r97)
-%r99 = add i448 %r94, %r98
-%r100 = trunc i448 %r99 to i32
-%r101 = mul i32 %r100, %r7
-%r102 = call i448 @mulPv416x32(i32* %r4, i32 %r101)
-%r103 = add i448 %r99, %r102
-%r104 = lshr i448 %r103, 32
-%r106 = getelementptr i32, i32* %r3, i32 10
-%r107 = load i32, i32* %r106
-%r108 = call i448 @mulPv416x32(i32* %r2, i32 %r107)
-%r109 = add i448 %r104, %r108
-%r110 = trunc i448 %r109 to i32
-%r111 = mul i32 %r110, %r7
-%r112 = call i448 @mulPv416x32(i32* %r4, i32 %r111)
-%r113 = add i448 %r109, %r112
-%r114 = lshr i448 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 11
-%r117 = load i32, i32* %r116
-%r118 = call i448 @mulPv416x32(i32* %r2, i32 %r117)
-%r119 = add i448 %r114, %r118
-%r120 = trunc i448 %r119 to i32
-%r121 = mul i32 %r120, %r7
-%r122 = call i448 @mulPv416x32(i32* %r4, i32 %r121)
-%r123 = add i448 %r119, %r122
-%r124 = lshr i448 %r123, 32
-%r126 = getelementptr i32, i32* %r3, i32 12
-%r127 = load i32, i32* %r126
-%r128 = call i448 @mulPv416x32(i32* %r2, i32 %r127)
-%r129 = add i448 %r124, %r128
-%r130 = trunc i448 %r129 to i32
-%r131 = mul i32 %r130, %r7
-%r132 = call i448 @mulPv416x32(i32* %r4, i32 %r131)
-%r133 = add i448 %r129, %r132
-%r134 = lshr i448 %r133, 32
-%r135 = trunc i448 %r134 to i416
-%r136 = load i32, i32* %r4
-%r137 = zext i32 %r136 to i64
-%r139 = getelementptr i32, i32* %r4, i32 1
-%r140 = load i32, i32* %r139
-%r141 = zext i32 %r140 to i64
-%r142 = shl i64 %r141, 32
-%r143 = or i64 %r137, %r142
-%r144 = zext i64 %r143 to i96
-%r146 = getelementptr i32, i32* %r4, i32 2
-%r147 = load i32, i32* %r146
-%r148 = zext i32 %r147 to i96
-%r149 = shl i96 %r148, 64
-%r150 = or i96 %r144, %r149
-%r151 = zext i96 %r150 to i128
-%r153 = getelementptr i32, i32* %r4, i32 3
-%r154 = load i32, i32* %r153
-%r155 = zext i32 %r154 to i128
-%r156 = shl i128 %r155, 96
-%r157 = or i128 %r151, %r156
-%r158 = zext i128 %r157 to i160
-%r160 = getelementptr i32, i32* %r4, i32 4
-%r161 = load i32, i32* %r160
-%r162 = zext i32 %r161 to i160
-%r163 = shl i160 %r162, 128
-%r164 = or i160 %r158, %r163
-%r165 = zext i160 %r164 to i192
-%r167 = getelementptr i32, i32* %r4, i32 5
-%r168 = load i32, i32* %r167
-%r169 = zext i32 %r168 to i192
-%r170 = shl i192 %r169, 160
-%r171 = or i192 %r165, %r170
-%r172 = zext i192 %r171 to i224
-%r174 = getelementptr i32, i32* %r4, i32 6
-%r175 = load i32, i32* %r174
-%r176 = zext i32 %r175 to i224
-%r177 = shl i224 %r176, 192
-%r178 = or i224 %r172, %r177
-%r179 = zext i224 %r178 to i256
-%r181 = getelementptr i32, i32* %r4, i32 7
-%r182 = load i32, i32* %r181
-%r183 = zext i32 %r182 to i256
-%r184 = shl i256 %r183, 224
-%r185 = or i256 %r179, %r184
-%r186 = zext i256 %r185 to i288
-%r188 = getelementptr i32, i32* %r4, i32 8
-%r189 = load i32, i32* %r188
-%r190 = zext i32 %r189 to i288
-%r191 = shl i288 %r190, 256
-%r192 = or i288 %r186, %r191
-%r193 = zext i288 %r192 to i320
-%r195 = getelementptr i32, i32* %r4, i32 9
-%r196 = load i32, i32* %r195
-%r197 = zext i32 %r196 to i320
-%r198 = shl i320 %r197, 288
-%r199 = or i320 %r193, %r198
-%r200 = zext i320 %r199 to i352
-%r202 = getelementptr i32, i32* %r4, i32 10
-%r203 = load i32, i32* %r202
-%r204 = zext i32 %r203 to i352
-%r205 = shl i352 %r204, 320
-%r206 = or i352 %r200, %r205
-%r207 = zext i352 %r206 to i384
-%r209 = getelementptr i32, i32* %r4, i32 11
-%r210 = load i32, i32* %r209
-%r211 = zext i32 %r210 to i384
-%r212 = shl i384 %r211, 352
-%r213 = or i384 %r207, %r212
-%r214 = zext i384 %r213 to i416
-%r216 = getelementptr i32, i32* %r4, i32 12
-%r217 = load i32, i32* %r216
-%r218 = zext i32 %r217 to i416
-%r219 = shl i416 %r218, 384
-%r220 = or i416 %r214, %r219
-%r221 = sub i416 %r135, %r220
-%r222 = lshr i416 %r221, 415
-%r223 = trunc i416 %r222 to i1
-%r224 = select i1 %r223, i416 %r135, i416 %r221
-%r225 = trunc i416 %r224 to i32
-%r227 = getelementptr i32, i32* %r1, i32 0
-store i32 %r225, i32* %r227
-%r228 = lshr i416 %r224, 32
-%r229 = trunc i416 %r228 to i32
-%r231 = getelementptr i32, i32* %r1, i32 1
-store i32 %r229, i32* %r231
-%r232 = lshr i416 %r228, 32
-%r233 = trunc i416 %r232 to i32
-%r235 = getelementptr i32, i32* %r1, i32 2
-store i32 %r233, i32* %r235
-%r236 = lshr i416 %r232, 32
-%r237 = trunc i416 %r236 to i32
-%r239 = getelementptr i32, i32* %r1, i32 3
-store i32 %r237, i32* %r239
-%r240 = lshr i416 %r236, 32
-%r241 = trunc i416 %r240 to i32
-%r243 = getelementptr i32, i32* %r1, i32 4
-store i32 %r241, i32* %r243
-%r244 = lshr i416 %r240, 32
-%r245 = trunc i416 %r244 to i32
-%r247 = getelementptr i32, i32* %r1, i32 5
-store i32 %r245, i32* %r247
-%r248 = lshr i416 %r244, 32
-%r249 = trunc i416 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 6
-store i32 %r249, i32* %r251
-%r252 = lshr i416 %r248, 32
-%r253 = trunc i416 %r252 to i32
-%r255 = getelementptr i32, i32* %r1, i32 7
-store i32 %r253, i32* %r255
-%r256 = lshr i416 %r252, 32
-%r257 = trunc i416 %r256 to i32
-%r259 = getelementptr i32, i32* %r1, i32 8
-store i32 %r257, i32* %r259
-%r260 = lshr i416 %r256, 32
-%r261 = trunc i416 %r260 to i32
-%r263 = getelementptr i32, i32* %r1, i32 9
-store i32 %r261, i32* %r263
-%r264 = lshr i416 %r260, 32
-%r265 = trunc i416 %r264 to i32
-%r267 = getelementptr i32, i32* %r1, i32 10
-store i32 %r265, i32* %r267
-%r268 = lshr i416 %r264, 32
-%r269 = trunc i416 %r268 to i32
-%r271 = getelementptr i32, i32* %r1, i32 11
-store i32 %r269, i32* %r271
-%r272 = lshr i416 %r268, 32
-%r273 = trunc i416 %r272 to i32
-%r275 = getelementptr i32, i32* %r1, i32 12
-store i32 %r273, i32* %r275
-ret void
-}
-define void @mcl_fp_montRed13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i224
-%r45 = getelementptr i32, i32* %r3, i32 6
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i224
-%r48 = shl i224 %r47, 192
-%r49 = or i224 %r43, %r48
-%r50 = zext i224 %r49 to i256
-%r52 = getelementptr i32, i32* %r3, i32 7
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i256
-%r55 = shl i256 %r54, 224
-%r56 = or i256 %r50, %r55
-%r57 = zext i256 %r56 to i288
-%r59 = getelementptr i32, i32* %r3, i32 8
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i288
-%r62 = shl i288 %r61, 256
-%r63 = or i288 %r57, %r62
-%r64 = zext i288 %r63 to i320
-%r66 = getelementptr i32, i32* %r3, i32 9
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i320
-%r69 = shl i320 %r68, 288
-%r70 = or i320 %r64, %r69
-%r71 = zext i320 %r70 to i352
-%r73 = getelementptr i32, i32* %r3, i32 10
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i352
-%r76 = shl i352 %r75, 320
-%r77 = or i352 %r71, %r76
-%r78 = zext i352 %r77 to i384
-%r80 = getelementptr i32, i32* %r3, i32 11
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i384
-%r83 = shl i384 %r82, 352
-%r84 = or i384 %r78, %r83
-%r85 = zext i384 %r84 to i416
-%r87 = getelementptr i32, i32* %r3, i32 12
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i416
-%r90 = shl i416 %r89, 384
-%r91 = or i416 %r85, %r90
-%r92 = load i32, i32* %r2
-%r93 = zext i32 %r92 to i64
-%r95 = getelementptr i32, i32* %r2, i32 1
-%r96 = load i32, i32* %r95
-%r97 = zext i32 %r96 to i64
-%r98 = shl i64 %r97, 32
-%r99 = or i64 %r93, %r98
-%r100 = zext i64 %r99 to i96
-%r102 = getelementptr i32, i32* %r2, i32 2
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i96
-%r105 = shl i96 %r104, 64
-%r106 = or i96 %r100, %r105
-%r107 = zext i96 %r106 to i128
-%r109 = getelementptr i32, i32* %r2, i32 3
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i128
-%r112 = shl i128 %r111, 96
-%r113 = or i128 %r107, %r112
-%r114 = zext i128 %r113 to i160
-%r116 = getelementptr i32, i32* %r2, i32 4
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i160
-%r119 = shl i160 %r118, 128
-%r120 = or i160 %r114, %r119
-%r121 = zext i160 %r120 to i192
-%r123 = getelementptr i32, i32* %r2, i32 5
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i192
-%r126 = shl i192 %r125, 160
-%r127 = or i192 %r121, %r126
-%r128 = zext i192 %r127 to i224
-%r130 = getelementptr i32, i32* %r2, i32 6
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i224
-%r133 = shl i224 %r132, 192
-%r134 = or i224 %r128, %r133
-%r135 = zext i224 %r134 to i256
-%r137 = getelementptr i32, i32* %r2, i32 7
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i256
-%r140 = shl i256 %r139, 224
-%r141 = or i256 %r135, %r140
-%r142 = zext i256 %r141 to i288
-%r144 = getelementptr i32, i32* %r2, i32 8
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i288
-%r147 = shl i288 %r146, 256
-%r148 = or i288 %r142, %r147
-%r149 = zext i288 %r148 to i320
-%r151 = getelementptr i32, i32* %r2, i32 9
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i320
-%r154 = shl i320 %r153, 288
-%r155 = or i320 %r149, %r154
-%r156 = zext i320 %r155 to i352
-%r158 = getelementptr i32, i32* %r2, i32 10
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i352
-%r161 = shl i352 %r160, 320
-%r162 = or i352 %r156, %r161
-%r163 = zext i352 %r162 to i384
-%r165 = getelementptr i32, i32* %r2, i32 11
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i384
-%r168 = shl i384 %r167, 352
-%r169 = or i384 %r163, %r168
-%r170 = zext i384 %r169 to i416
-%r172 = getelementptr i32, i32* %r2, i32 12
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i416
-%r175 = shl i416 %r174, 384
-%r176 = or i416 %r170, %r175
-%r177 = zext i416 %r176 to i448
-%r179 = getelementptr i32, i32* %r2, i32 13
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i448
-%r182 = shl i448 %r181, 416
-%r183 = or i448 %r177, %r182
-%r184 = zext i448 %r183 to i480
-%r186 = getelementptr i32, i32* %r2, i32 14
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i480
-%r189 = shl i480 %r188, 448
-%r190 = or i480 %r184, %r189
-%r191 = zext i480 %r190 to i512
-%r193 = getelementptr i32, i32* %r2, i32 15
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i512
-%r196 = shl i512 %r195, 480
-%r197 = or i512 %r191, %r196
-%r198 = zext i512 %r197 to i544
-%r200 = getelementptr i32, i32* %r2, i32 16
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i544
-%r203 = shl i544 %r202, 512
-%r204 = or i544 %r198, %r203
-%r205 = zext i544 %r204 to i576
-%r207 = getelementptr i32, i32* %r2, i32 17
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i576
-%r210 = shl i576 %r209, 544
-%r211 = or i576 %r205, %r210
-%r212 = zext i576 %r211 to i608
-%r214 = getelementptr i32, i32* %r2, i32 18
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i608
-%r217 = shl i608 %r216, 576
-%r218 = or i608 %r212, %r217
-%r219 = zext i608 %r218 to i640
-%r221 = getelementptr i32, i32* %r2, i32 19
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i640
-%r224 = shl i640 %r223, 608
-%r225 = or i640 %r219, %r224
-%r226 = zext i640 %r225 to i672
-%r228 = getelementptr i32, i32* %r2, i32 20
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i672
-%r231 = shl i672 %r230, 640
-%r232 = or i672 %r226, %r231
-%r233 = zext i672 %r232 to i704
-%r235 = getelementptr i32, i32* %r2, i32 21
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i704
-%r238 = shl i704 %r237, 672
-%r239 = or i704 %r233, %r238
-%r240 = zext i704 %r239 to i736
-%r242 = getelementptr i32, i32* %r2, i32 22
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i736
-%r245 = shl i736 %r244, 704
-%r246 = or i736 %r240, %r245
-%r247 = zext i736 %r246 to i768
-%r249 = getelementptr i32, i32* %r2, i32 23
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i768
-%r252 = shl i768 %r251, 736
-%r253 = or i768 %r247, %r252
-%r254 = zext i768 %r253 to i800
-%r256 = getelementptr i32, i32* %r2, i32 24
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i800
-%r259 = shl i800 %r258, 768
-%r260 = or i800 %r254, %r259
-%r261 = zext i800 %r260 to i832
-%r263 = getelementptr i32, i32* %r2, i32 25
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i832
-%r266 = shl i832 %r265, 800
-%r267 = or i832 %r261, %r266
-%r268 = zext i832 %r267 to i864
-%r269 = trunc i864 %r268 to i32
-%r270 = mul i32 %r269, %r6
-%r271 = call i448 @mulPv416x32(i32* %r3, i32 %r270)
-%r272 = zext i448 %r271 to i864
-%r273 = add i864 %r268, %r272
-%r274 = lshr i864 %r273, 32
-%r275 = trunc i864 %r274 to i832
-%r276 = trunc i832 %r275 to i32
-%r277 = mul i32 %r276, %r6
-%r278 = call i448 @mulPv416x32(i32* %r3, i32 %r277)
-%r279 = zext i448 %r278 to i832
-%r280 = add i832 %r275, %r279
-%r281 = lshr i832 %r280, 32
-%r282 = trunc i832 %r281 to i800
-%r283 = trunc i800 %r282 to i32
-%r284 = mul i32 %r283, %r6
-%r285 = call i448 @mulPv416x32(i32* %r3, i32 %r284)
-%r286 = zext i448 %r285 to i800
-%r287 = add i800 %r282, %r286
-%r288 = lshr i800 %r287, 32
-%r289 = trunc i800 %r288 to i768
-%r290 = trunc i768 %r289 to i32
-%r291 = mul i32 %r290, %r6
-%r292 = call i448 @mulPv416x32(i32* %r3, i32 %r291)
-%r293 = zext i448 %r292 to i768
-%r294 = add i768 %r289, %r293
-%r295 = lshr i768 %r294, 32
-%r296 = trunc i768 %r295 to i736
-%r297 = trunc i736 %r296 to i32
-%r298 = mul i32 %r297, %r6
-%r299 = call i448 @mulPv416x32(i32* %r3, i32 %r298)
-%r300 = zext i448 %r299 to i736
-%r301 = add i736 %r296, %r300
-%r302 = lshr i736 %r301, 32
-%r303 = trunc i736 %r302 to i704
-%r304 = trunc i704 %r303 to i32
-%r305 = mul i32 %r304, %r6
-%r306 = call i448 @mulPv416x32(i32* %r3, i32 %r305)
-%r307 = zext i448 %r306 to i704
-%r308 = add i704 %r303, %r307
-%r309 = lshr i704 %r308, 32
-%r310 = trunc i704 %r309 to i672
-%r311 = trunc i672 %r310 to i32
-%r312 = mul i32 %r311, %r6
-%r313 = call i448 @mulPv416x32(i32* %r3, i32 %r312)
-%r314 = zext i448 %r313 to i672
-%r315 = add i672 %r310, %r314
-%r316 = lshr i672 %r315, 32
-%r317 = trunc i672 %r316 to i640
-%r318 = trunc i640 %r317 to i32
-%r319 = mul i32 %r318, %r6
-%r320 = call i448 @mulPv416x32(i32* %r3, i32 %r319)
-%r321 = zext i448 %r320 to i640
-%r322 = add i640 %r317, %r321
-%r323 = lshr i640 %r322, 32
-%r324 = trunc i640 %r323 to i608
-%r325 = trunc i608 %r324 to i32
-%r326 = mul i32 %r325, %r6
-%r327 = call i448 @mulPv416x32(i32* %r3, i32 %r326)
-%r328 = zext i448 %r327 to i608
-%r329 = add i608 %r324, %r328
-%r330 = lshr i608 %r329, 32
-%r331 = trunc i608 %r330 to i576
-%r332 = trunc i576 %r331 to i32
-%r333 = mul i32 %r332, %r6
-%r334 = call i448 @mulPv416x32(i32* %r3, i32 %r333)
-%r335 = zext i448 %r334 to i576
-%r336 = add i576 %r331, %r335
-%r337 = lshr i576 %r336, 32
-%r338 = trunc i576 %r337 to i544
-%r339 = trunc i544 %r338 to i32
-%r340 = mul i32 %r339, %r6
-%r341 = call i448 @mulPv416x32(i32* %r3, i32 %r340)
-%r342 = zext i448 %r341 to i544
-%r343 = add i544 %r338, %r342
-%r344 = lshr i544 %r343, 32
-%r345 = trunc i544 %r344 to i512
-%r346 = trunc i512 %r345 to i32
-%r347 = mul i32 %r346, %r6
-%r348 = call i448 @mulPv416x32(i32* %r3, i32 %r347)
-%r349 = zext i448 %r348 to i512
-%r350 = add i512 %r345, %r349
-%r351 = lshr i512 %r350, 32
-%r352 = trunc i512 %r351 to i480
-%r353 = trunc i480 %r352 to i32
-%r354 = mul i32 %r353, %r6
-%r355 = call i448 @mulPv416x32(i32* %r3, i32 %r354)
-%r356 = zext i448 %r355 to i480
-%r357 = add i480 %r352, %r356
-%r358 = lshr i480 %r357, 32
-%r359 = trunc i480 %r358 to i448
-%r360 = zext i416 %r91 to i448
-%r361 = sub i448 %r359, %r360
-%r362 = lshr i448 %r361, 416
-%r363 = trunc i448 %r362 to i1
-%r364 = select i1 %r363, i448 %r359, i448 %r361
-%r365 = trunc i448 %r364 to i416
-%r366 = trunc i416 %r365 to i32
-%r368 = getelementptr i32, i32* %r1, i32 0
-store i32 %r366, i32* %r368
-%r369 = lshr i416 %r365, 32
-%r370 = trunc i416 %r369 to i32
-%r372 = getelementptr i32, i32* %r1, i32 1
-store i32 %r370, i32* %r372
-%r373 = lshr i416 %r369, 32
-%r374 = trunc i416 %r373 to i32
-%r376 = getelementptr i32, i32* %r1, i32 2
-store i32 %r374, i32* %r376
-%r377 = lshr i416 %r373, 32
-%r378 = trunc i416 %r377 to i32
-%r380 = getelementptr i32, i32* %r1, i32 3
-store i32 %r378, i32* %r380
-%r381 = lshr i416 %r377, 32
-%r382 = trunc i416 %r381 to i32
-%r384 = getelementptr i32, i32* %r1, i32 4
-store i32 %r382, i32* %r384
-%r385 = lshr i416 %r381, 32
-%r386 = trunc i416 %r385 to i32
-%r388 = getelementptr i32, i32* %r1, i32 5
-store i32 %r386, i32* %r388
-%r389 = lshr i416 %r385, 32
-%r390 = trunc i416 %r389 to i32
-%r392 = getelementptr i32, i32* %r1, i32 6
-store i32 %r390, i32* %r392
-%r393 = lshr i416 %r389, 32
-%r394 = trunc i416 %r393 to i32
-%r396 = getelementptr i32, i32* %r1, i32 7
-store i32 %r394, i32* %r396
-%r397 = lshr i416 %r393, 32
-%r398 = trunc i416 %r397 to i32
-%r400 = getelementptr i32, i32* %r1, i32 8
-store i32 %r398, i32* %r400
-%r401 = lshr i416 %r397, 32
-%r402 = trunc i416 %r401 to i32
-%r404 = getelementptr i32, i32* %r1, i32 9
-store i32 %r402, i32* %r404
-%r405 = lshr i416 %r401, 32
-%r406 = trunc i416 %r405 to i32
-%r408 = getelementptr i32, i32* %r1, i32 10
-store i32 %r406, i32* %r408
-%r409 = lshr i416 %r405, 32
-%r410 = trunc i416 %r409 to i32
-%r412 = getelementptr i32, i32* %r1, i32 11
-store i32 %r410, i32* %r412
-%r413 = lshr i416 %r409, 32
-%r414 = trunc i416 %r413 to i32
-%r416 = getelementptr i32, i32* %r1, i32 12
-store i32 %r414, i32* %r416
-ret void
-}
-define i32 @mcl_fp_addPre13L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r3, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r3, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r91 = load i32, i32* %r4
-%r92 = zext i32 %r91 to i64
-%r94 = getelementptr i32, i32* %r4, i32 1
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i64
-%r97 = shl i64 %r96, 32
-%r98 = or i64 %r92, %r97
-%r99 = zext i64 %r98 to i96
-%r101 = getelementptr i32, i32* %r4, i32 2
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i96
-%r104 = shl i96 %r103, 64
-%r105 = or i96 %r99, %r104
-%r106 = zext i96 %r105 to i128
-%r108 = getelementptr i32, i32* %r4, i32 3
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i128
-%r111 = shl i128 %r110, 96
-%r112 = or i128 %r106, %r111
-%r113 = zext i128 %r112 to i160
-%r115 = getelementptr i32, i32* %r4, i32 4
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i160
-%r118 = shl i160 %r117, 128
-%r119 = or i160 %r113, %r118
-%r120 = zext i160 %r119 to i192
-%r122 = getelementptr i32, i32* %r4, i32 5
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i192
-%r125 = shl i192 %r124, 160
-%r126 = or i192 %r120, %r125
-%r127 = zext i192 %r126 to i224
-%r129 = getelementptr i32, i32* %r4, i32 6
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i224
-%r132 = shl i224 %r131, 192
-%r133 = or i224 %r127, %r132
-%r134 = zext i224 %r133 to i256
-%r136 = getelementptr i32, i32* %r4, i32 7
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i256
-%r139 = shl i256 %r138, 224
-%r140 = or i256 %r134, %r139
-%r141 = zext i256 %r140 to i288
-%r143 = getelementptr i32, i32* %r4, i32 8
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i288
-%r146 = shl i288 %r145, 256
-%r147 = or i288 %r141, %r146
-%r148 = zext i288 %r147 to i320
-%r150 = getelementptr i32, i32* %r4, i32 9
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i320
-%r153 = shl i320 %r152, 288
-%r154 = or i320 %r148, %r153
-%r155 = zext i320 %r154 to i352
-%r157 = getelementptr i32, i32* %r4, i32 10
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i352
-%r160 = shl i352 %r159, 320
-%r161 = or i352 %r155, %r160
-%r162 = zext i352 %r161 to i384
-%r164 = getelementptr i32, i32* %r4, i32 11
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i384
-%r167 = shl i384 %r166, 352
-%r168 = or i384 %r162, %r167
-%r169 = zext i384 %r168 to i416
-%r171 = getelementptr i32, i32* %r4, i32 12
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i416
-%r174 = shl i416 %r173, 384
-%r175 = or i416 %r169, %r174
-%r176 = zext i416 %r175 to i448
-%r177 = add i448 %r90, %r176
-%r178 = trunc i448 %r177 to i416
-%r179 = trunc i416 %r178 to i32
-%r181 = getelementptr i32, i32* %r2, i32 0
-store i32 %r179, i32* %r181
-%r182 = lshr i416 %r178, 32
-%r183 = trunc i416 %r182 to i32
-%r185 = getelementptr i32, i32* %r2, i32 1
-store i32 %r183, i32* %r185
-%r186 = lshr i416 %r182, 32
-%r187 = trunc i416 %r186 to i32
-%r189 = getelementptr i32, i32* %r2, i32 2
-store i32 %r187, i32* %r189
-%r190 = lshr i416 %r186, 32
-%r191 = trunc i416 %r190 to i32
-%r193 = getelementptr i32, i32* %r2, i32 3
-store i32 %r191, i32* %r193
-%r194 = lshr i416 %r190, 32
-%r195 = trunc i416 %r194 to i32
-%r197 = getelementptr i32, i32* %r2, i32 4
-store i32 %r195, i32* %r197
-%r198 = lshr i416 %r194, 32
-%r199 = trunc i416 %r198 to i32
-%r201 = getelementptr i32, i32* %r2, i32 5
-store i32 %r199, i32* %r201
-%r202 = lshr i416 %r198, 32
-%r203 = trunc i416 %r202 to i32
-%r205 = getelementptr i32, i32* %r2, i32 6
-store i32 %r203, i32* %r205
-%r206 = lshr i416 %r202, 32
-%r207 = trunc i416 %r206 to i32
-%r209 = getelementptr i32, i32* %r2, i32 7
-store i32 %r207, i32* %r209
-%r210 = lshr i416 %r206, 32
-%r211 = trunc i416 %r210 to i32
-%r213 = getelementptr i32, i32* %r2, i32 8
-store i32 %r211, i32* %r213
-%r214 = lshr i416 %r210, 32
-%r215 = trunc i416 %r214 to i32
-%r217 = getelementptr i32, i32* %r2, i32 9
-store i32 %r215, i32* %r217
-%r218 = lshr i416 %r214, 32
-%r219 = trunc i416 %r218 to i32
-%r221 = getelementptr i32, i32* %r2, i32 10
-store i32 %r219, i32* %r221
-%r222 = lshr i416 %r218, 32
-%r223 = trunc i416 %r222 to i32
-%r225 = getelementptr i32, i32* %r2, i32 11
-store i32 %r223, i32* %r225
-%r226 = lshr i416 %r222, 32
-%r227 = trunc i416 %r226 to i32
-%r229 = getelementptr i32, i32* %r2, i32 12
-store i32 %r227, i32* %r229
-%r230 = lshr i448 %r177, 416
-%r231 = trunc i448 %r230 to i32
-ret i32 %r231
-}
-define i32 @mcl_fp_subPre13L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r3, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r3, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r91 = load i32, i32* %r4
-%r92 = zext i32 %r91 to i64
-%r94 = getelementptr i32, i32* %r4, i32 1
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i64
-%r97 = shl i64 %r96, 32
-%r98 = or i64 %r92, %r97
-%r99 = zext i64 %r98 to i96
-%r101 = getelementptr i32, i32* %r4, i32 2
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i96
-%r104 = shl i96 %r103, 64
-%r105 = or i96 %r99, %r104
-%r106 = zext i96 %r105 to i128
-%r108 = getelementptr i32, i32* %r4, i32 3
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i128
-%r111 = shl i128 %r110, 96
-%r112 = or i128 %r106, %r111
-%r113 = zext i128 %r112 to i160
-%r115 = getelementptr i32, i32* %r4, i32 4
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i160
-%r118 = shl i160 %r117, 128
-%r119 = or i160 %r113, %r118
-%r120 = zext i160 %r119 to i192
-%r122 = getelementptr i32, i32* %r4, i32 5
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i192
-%r125 = shl i192 %r124, 160
-%r126 = or i192 %r120, %r125
-%r127 = zext i192 %r126 to i224
-%r129 = getelementptr i32, i32* %r4, i32 6
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i224
-%r132 = shl i224 %r131, 192
-%r133 = or i224 %r127, %r132
-%r134 = zext i224 %r133 to i256
-%r136 = getelementptr i32, i32* %r4, i32 7
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i256
-%r139 = shl i256 %r138, 224
-%r140 = or i256 %r134, %r139
-%r141 = zext i256 %r140 to i288
-%r143 = getelementptr i32, i32* %r4, i32 8
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i288
-%r146 = shl i288 %r145, 256
-%r147 = or i288 %r141, %r146
-%r148 = zext i288 %r147 to i320
-%r150 = getelementptr i32, i32* %r4, i32 9
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i320
-%r153 = shl i320 %r152, 288
-%r154 = or i320 %r148, %r153
-%r155 = zext i320 %r154 to i352
-%r157 = getelementptr i32, i32* %r4, i32 10
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i352
-%r160 = shl i352 %r159, 320
-%r161 = or i352 %r155, %r160
-%r162 = zext i352 %r161 to i384
-%r164 = getelementptr i32, i32* %r4, i32 11
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i384
-%r167 = shl i384 %r166, 352
-%r168 = or i384 %r162, %r167
-%r169 = zext i384 %r168 to i416
-%r171 = getelementptr i32, i32* %r4, i32 12
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i416
-%r174 = shl i416 %r173, 384
-%r175 = or i416 %r169, %r174
-%r176 = zext i416 %r175 to i448
-%r177 = sub i448 %r90, %r176
-%r178 = trunc i448 %r177 to i416
-%r179 = trunc i416 %r178 to i32
-%r181 = getelementptr i32, i32* %r2, i32 0
-store i32 %r179, i32* %r181
-%r182 = lshr i416 %r178, 32
-%r183 = trunc i416 %r182 to i32
-%r185 = getelementptr i32, i32* %r2, i32 1
-store i32 %r183, i32* %r185
-%r186 = lshr i416 %r182, 32
-%r187 = trunc i416 %r186 to i32
-%r189 = getelementptr i32, i32* %r2, i32 2
-store i32 %r187, i32* %r189
-%r190 = lshr i416 %r186, 32
-%r191 = trunc i416 %r190 to i32
-%r193 = getelementptr i32, i32* %r2, i32 3
-store i32 %r191, i32* %r193
-%r194 = lshr i416 %r190, 32
-%r195 = trunc i416 %r194 to i32
-%r197 = getelementptr i32, i32* %r2, i32 4
-store i32 %r195, i32* %r197
-%r198 = lshr i416 %r194, 32
-%r199 = trunc i416 %r198 to i32
-%r201 = getelementptr i32, i32* %r2, i32 5
-store i32 %r199, i32* %r201
-%r202 = lshr i416 %r198, 32
-%r203 = trunc i416 %r202 to i32
-%r205 = getelementptr i32, i32* %r2, i32 6
-store i32 %r203, i32* %r205
-%r206 = lshr i416 %r202, 32
-%r207 = trunc i416 %r206 to i32
-%r209 = getelementptr i32, i32* %r2, i32 7
-store i32 %r207, i32* %r209
-%r210 = lshr i416 %r206, 32
-%r211 = trunc i416 %r210 to i32
-%r213 = getelementptr i32, i32* %r2, i32 8
-store i32 %r211, i32* %r213
-%r214 = lshr i416 %r210, 32
-%r215 = trunc i416 %r214 to i32
-%r217 = getelementptr i32, i32* %r2, i32 9
-store i32 %r215, i32* %r217
-%r218 = lshr i416 %r214, 32
-%r219 = trunc i416 %r218 to i32
-%r221 = getelementptr i32, i32* %r2, i32 10
-store i32 %r219, i32* %r221
-%r222 = lshr i416 %r218, 32
-%r223 = trunc i416 %r222 to i32
-%r225 = getelementptr i32, i32* %r2, i32 11
-store i32 %r223, i32* %r225
-%r226 = lshr i416 %r222, 32
-%r227 = trunc i416 %r226 to i32
-%r229 = getelementptr i32, i32* %r2, i32 12
-store i32 %r227, i32* %r229
-%r230 = lshr i448 %r177, 416
-%r231 = trunc i448 %r230 to i32
-%r233 = and i32 %r231, 1
-ret i32 %r233
-}
-define void @mcl_fp_shr1_13L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = zext i160 %r31 to i192
-%r34 = getelementptr i32, i32* %r2, i32 5
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i192
-%r37 = shl i192 %r36, 160
-%r38 = or i192 %r32, %r37
-%r39 = zext i192 %r38 to i224
-%r41 = getelementptr i32, i32* %r2, i32 6
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i224
-%r44 = shl i224 %r43, 192
-%r45 = or i224 %r39, %r44
-%r46 = zext i224 %r45 to i256
-%r48 = getelementptr i32, i32* %r2, i32 7
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i256
-%r51 = shl i256 %r50, 224
-%r52 = or i256 %r46, %r51
-%r53 = zext i256 %r52 to i288
-%r55 = getelementptr i32, i32* %r2, i32 8
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i288
-%r58 = shl i288 %r57, 256
-%r59 = or i288 %r53, %r58
-%r60 = zext i288 %r59 to i320
-%r62 = getelementptr i32, i32* %r2, i32 9
-%r63 = load i32, i32* %r62
-%r64 = zext i32 %r63 to i320
-%r65 = shl i320 %r64, 288
-%r66 = or i320 %r60, %r65
-%r67 = zext i320 %r66 to i352
-%r69 = getelementptr i32, i32* %r2, i32 10
-%r70 = load i32, i32* %r69
-%r71 = zext i32 %r70 to i352
-%r72 = shl i352 %r71, 320
-%r73 = or i352 %r67, %r72
-%r74 = zext i352 %r73 to i384
-%r76 = getelementptr i32, i32* %r2, i32 11
-%r77 = load i32, i32* %r76
-%r78 = zext i32 %r77 to i384
-%r79 = shl i384 %r78, 352
-%r80 = or i384 %r74, %r79
-%r81 = zext i384 %r80 to i416
-%r83 = getelementptr i32, i32* %r2, i32 12
-%r84 = load i32, i32* %r83
-%r85 = zext i32 %r84 to i416
-%r86 = shl i416 %r85, 384
-%r87 = or i416 %r81, %r86
-%r88 = lshr i416 %r87, 1
-%r89 = trunc i416 %r88 to i32
-%r91 = getelementptr i32, i32* %r1, i32 0
-store i32 %r89, i32* %r91
-%r92 = lshr i416 %r88, 32
-%r93 = trunc i416 %r92 to i32
-%r95 = getelementptr i32, i32* %r1, i32 1
-store i32 %r93, i32* %r95
-%r96 = lshr i416 %r92, 32
-%r97 = trunc i416 %r96 to i32
-%r99 = getelementptr i32, i32* %r1, i32 2
-store i32 %r97, i32* %r99
-%r100 = lshr i416 %r96, 32
-%r101 = trunc i416 %r100 to i32
-%r103 = getelementptr i32, i32* %r1, i32 3
-store i32 %r101, i32* %r103
-%r104 = lshr i416 %r100, 32
-%r105 = trunc i416 %r104 to i32
-%r107 = getelementptr i32, i32* %r1, i32 4
-store i32 %r105, i32* %r107
-%r108 = lshr i416 %r104, 32
-%r109 = trunc i416 %r108 to i32
-%r111 = getelementptr i32, i32* %r1, i32 5
-store i32 %r109, i32* %r111
-%r112 = lshr i416 %r108, 32
-%r113 = trunc i416 %r112 to i32
-%r115 = getelementptr i32, i32* %r1, i32 6
-store i32 %r113, i32* %r115
-%r116 = lshr i416 %r112, 32
-%r117 = trunc i416 %r116 to i32
-%r119 = getelementptr i32, i32* %r1, i32 7
-store i32 %r117, i32* %r119
-%r120 = lshr i416 %r116, 32
-%r121 = trunc i416 %r120 to i32
-%r123 = getelementptr i32, i32* %r1, i32 8
-store i32 %r121, i32* %r123
-%r124 = lshr i416 %r120, 32
-%r125 = trunc i416 %r124 to i32
-%r127 = getelementptr i32, i32* %r1, i32 9
-store i32 %r125, i32* %r127
-%r128 = lshr i416 %r124, 32
-%r129 = trunc i416 %r128 to i32
-%r131 = getelementptr i32, i32* %r1, i32 10
-store i32 %r129, i32* %r131
-%r132 = lshr i416 %r128, 32
-%r133 = trunc i416 %r132 to i32
-%r135 = getelementptr i32, i32* %r1, i32 11
-store i32 %r133, i32* %r135
-%r136 = lshr i416 %r132, 32
-%r137 = trunc i416 %r136 to i32
-%r139 = getelementptr i32, i32* %r1, i32 12
-store i32 %r137, i32* %r139
-ret void
-}
-define void @mcl_fp_add13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = load i32, i32* %r3
-%r91 = zext i32 %r90 to i64
-%r93 = getelementptr i32, i32* %r3, i32 1
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i64
-%r96 = shl i64 %r95, 32
-%r97 = or i64 %r91, %r96
-%r98 = zext i64 %r97 to i96
-%r100 = getelementptr i32, i32* %r3, i32 2
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i96
-%r103 = shl i96 %r102, 64
-%r104 = or i96 %r98, %r103
-%r105 = zext i96 %r104 to i128
-%r107 = getelementptr i32, i32* %r3, i32 3
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i128
-%r110 = shl i128 %r109, 96
-%r111 = or i128 %r105, %r110
-%r112 = zext i128 %r111 to i160
-%r114 = getelementptr i32, i32* %r3, i32 4
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i160
-%r117 = shl i160 %r116, 128
-%r118 = or i160 %r112, %r117
-%r119 = zext i160 %r118 to i192
-%r121 = getelementptr i32, i32* %r3, i32 5
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i192
-%r124 = shl i192 %r123, 160
-%r125 = or i192 %r119, %r124
-%r126 = zext i192 %r125 to i224
-%r128 = getelementptr i32, i32* %r3, i32 6
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i224
-%r131 = shl i224 %r130, 192
-%r132 = or i224 %r126, %r131
-%r133 = zext i224 %r132 to i256
-%r135 = getelementptr i32, i32* %r3, i32 7
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i256
-%r138 = shl i256 %r137, 224
-%r139 = or i256 %r133, %r138
-%r140 = zext i256 %r139 to i288
-%r142 = getelementptr i32, i32* %r3, i32 8
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i288
-%r145 = shl i288 %r144, 256
-%r146 = or i288 %r140, %r145
-%r147 = zext i288 %r146 to i320
-%r149 = getelementptr i32, i32* %r3, i32 9
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i320
-%r152 = shl i320 %r151, 288
-%r153 = or i320 %r147, %r152
-%r154 = zext i320 %r153 to i352
-%r156 = getelementptr i32, i32* %r3, i32 10
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i352
-%r159 = shl i352 %r158, 320
-%r160 = or i352 %r154, %r159
-%r161 = zext i352 %r160 to i384
-%r163 = getelementptr i32, i32* %r3, i32 11
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i384
-%r166 = shl i384 %r165, 352
-%r167 = or i384 %r161, %r166
-%r168 = zext i384 %r167 to i416
-%r170 = getelementptr i32, i32* %r3, i32 12
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i416
-%r173 = shl i416 %r172, 384
-%r174 = or i416 %r168, %r173
-%r175 = zext i416 %r89 to i448
-%r176 = zext i416 %r174 to i448
-%r177 = add i448 %r175, %r176
-%r178 = trunc i448 %r177 to i416
-%r179 = trunc i416 %r178 to i32
-%r181 = getelementptr i32, i32* %r1, i32 0
-store i32 %r179, i32* %r181
-%r182 = lshr i416 %r178, 32
-%r183 = trunc i416 %r182 to i32
-%r185 = getelementptr i32, i32* %r1, i32 1
-store i32 %r183, i32* %r185
-%r186 = lshr i416 %r182, 32
-%r187 = trunc i416 %r186 to i32
-%r189 = getelementptr i32, i32* %r1, i32 2
-store i32 %r187, i32* %r189
-%r190 = lshr i416 %r186, 32
-%r191 = trunc i416 %r190 to i32
-%r193 = getelementptr i32, i32* %r1, i32 3
-store i32 %r191, i32* %r193
-%r194 = lshr i416 %r190, 32
-%r195 = trunc i416 %r194 to i32
-%r197 = getelementptr i32, i32* %r1, i32 4
-store i32 %r195, i32* %r197
-%r198 = lshr i416 %r194, 32
-%r199 = trunc i416 %r198 to i32
-%r201 = getelementptr i32, i32* %r1, i32 5
-store i32 %r199, i32* %r201
-%r202 = lshr i416 %r198, 32
-%r203 = trunc i416 %r202 to i32
-%r205 = getelementptr i32, i32* %r1, i32 6
-store i32 %r203, i32* %r205
-%r206 = lshr i416 %r202, 32
-%r207 = trunc i416 %r206 to i32
-%r209 = getelementptr i32, i32* %r1, i32 7
-store i32 %r207, i32* %r209
-%r210 = lshr i416 %r206, 32
-%r211 = trunc i416 %r210 to i32
-%r213 = getelementptr i32, i32* %r1, i32 8
-store i32 %r211, i32* %r213
-%r214 = lshr i416 %r210, 32
-%r215 = trunc i416 %r214 to i32
-%r217 = getelementptr i32, i32* %r1, i32 9
-store i32 %r215, i32* %r217
-%r218 = lshr i416 %r214, 32
-%r219 = trunc i416 %r218 to i32
-%r221 = getelementptr i32, i32* %r1, i32 10
-store i32 %r219, i32* %r221
-%r222 = lshr i416 %r218, 32
-%r223 = trunc i416 %r222 to i32
-%r225 = getelementptr i32, i32* %r1, i32 11
-store i32 %r223, i32* %r225
-%r226 = lshr i416 %r222, 32
-%r227 = trunc i416 %r226 to i32
-%r229 = getelementptr i32, i32* %r1, i32 12
-store i32 %r227, i32* %r229
-%r230 = load i32, i32* %r4
-%r231 = zext i32 %r230 to i64
-%r233 = getelementptr i32, i32* %r4, i32 1
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i64
-%r236 = shl i64 %r235, 32
-%r237 = or i64 %r231, %r236
-%r238 = zext i64 %r237 to i96
-%r240 = getelementptr i32, i32* %r4, i32 2
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i96
-%r243 = shl i96 %r242, 64
-%r244 = or i96 %r238, %r243
-%r245 = zext i96 %r244 to i128
-%r247 = getelementptr i32, i32* %r4, i32 3
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i128
-%r250 = shl i128 %r249, 96
-%r251 = or i128 %r245, %r250
-%r252 = zext i128 %r251 to i160
-%r254 = getelementptr i32, i32* %r4, i32 4
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i160
-%r257 = shl i160 %r256, 128
-%r258 = or i160 %r252, %r257
-%r259 = zext i160 %r258 to i192
-%r261 = getelementptr i32, i32* %r4, i32 5
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i192
-%r264 = shl i192 %r263, 160
-%r265 = or i192 %r259, %r264
-%r266 = zext i192 %r265 to i224
-%r268 = getelementptr i32, i32* %r4, i32 6
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i224
-%r271 = shl i224 %r270, 192
-%r272 = or i224 %r266, %r271
-%r273 = zext i224 %r272 to i256
-%r275 = getelementptr i32, i32* %r4, i32 7
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i256
-%r278 = shl i256 %r277, 224
-%r279 = or i256 %r273, %r278
-%r280 = zext i256 %r279 to i288
-%r282 = getelementptr i32, i32* %r4, i32 8
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i288
-%r285 = shl i288 %r284, 256
-%r286 = or i288 %r280, %r285
-%r287 = zext i288 %r286 to i320
-%r289 = getelementptr i32, i32* %r4, i32 9
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i320
-%r292 = shl i320 %r291, 288
-%r293 = or i320 %r287, %r292
-%r294 = zext i320 %r293 to i352
-%r296 = getelementptr i32, i32* %r4, i32 10
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i352
-%r299 = shl i352 %r298, 320
-%r300 = or i352 %r294, %r299
-%r301 = zext i352 %r300 to i384
-%r303 = getelementptr i32, i32* %r4, i32 11
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i384
-%r306 = shl i384 %r305, 352
-%r307 = or i384 %r301, %r306
-%r308 = zext i384 %r307 to i416
-%r310 = getelementptr i32, i32* %r4, i32 12
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i416
-%r313 = shl i416 %r312, 384
-%r314 = or i416 %r308, %r313
-%r315 = zext i416 %r314 to i448
-%r316 = sub i448 %r177, %r315
-%r317 = lshr i448 %r316, 416
-%r318 = trunc i448 %r317 to i1
-br i1%r318, label %carry, label %nocarry
-nocarry:
-%r319 = trunc i448 %r316 to i416
-%r320 = trunc i416 %r319 to i32
-%r322 = getelementptr i32, i32* %r1, i32 0
-store i32 %r320, i32* %r322
-%r323 = lshr i416 %r319, 32
-%r324 = trunc i416 %r323 to i32
-%r326 = getelementptr i32, i32* %r1, i32 1
-store i32 %r324, i32* %r326
-%r327 = lshr i416 %r323, 32
-%r328 = trunc i416 %r327 to i32
-%r330 = getelementptr i32, i32* %r1, i32 2
-store i32 %r328, i32* %r330
-%r331 = lshr i416 %r327, 32
-%r332 = trunc i416 %r331 to i32
-%r334 = getelementptr i32, i32* %r1, i32 3
-store i32 %r332, i32* %r334
-%r335 = lshr i416 %r331, 32
-%r336 = trunc i416 %r335 to i32
-%r338 = getelementptr i32, i32* %r1, i32 4
-store i32 %r336, i32* %r338
-%r339 = lshr i416 %r335, 32
-%r340 = trunc i416 %r339 to i32
-%r342 = getelementptr i32, i32* %r1, i32 5
-store i32 %r340, i32* %r342
-%r343 = lshr i416 %r339, 32
-%r344 = trunc i416 %r343 to i32
-%r346 = getelementptr i32, i32* %r1, i32 6
-store i32 %r344, i32* %r346
-%r347 = lshr i416 %r343, 32
-%r348 = trunc i416 %r347 to i32
-%r350 = getelementptr i32, i32* %r1, i32 7
-store i32 %r348, i32* %r350
-%r351 = lshr i416 %r347, 32
-%r352 = trunc i416 %r351 to i32
-%r354 = getelementptr i32, i32* %r1, i32 8
-store i32 %r352, i32* %r354
-%r355 = lshr i416 %r351, 32
-%r356 = trunc i416 %r355 to i32
-%r358 = getelementptr i32, i32* %r1, i32 9
-store i32 %r356, i32* %r358
-%r359 = lshr i416 %r355, 32
-%r360 = trunc i416 %r359 to i32
-%r362 = getelementptr i32, i32* %r1, i32 10
-store i32 %r360, i32* %r362
-%r363 = lshr i416 %r359, 32
-%r364 = trunc i416 %r363 to i32
-%r366 = getelementptr i32, i32* %r1, i32 11
-store i32 %r364, i32* %r366
-%r367 = lshr i416 %r363, 32
-%r368 = trunc i416 %r367 to i32
-%r370 = getelementptr i32, i32* %r1, i32 12
-store i32 %r368, i32* %r370
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = load i32, i32* %r3
-%r91 = zext i32 %r90 to i64
-%r93 = getelementptr i32, i32* %r3, i32 1
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i64
-%r96 = shl i64 %r95, 32
-%r97 = or i64 %r91, %r96
-%r98 = zext i64 %r97 to i96
-%r100 = getelementptr i32, i32* %r3, i32 2
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i96
-%r103 = shl i96 %r102, 64
-%r104 = or i96 %r98, %r103
-%r105 = zext i96 %r104 to i128
-%r107 = getelementptr i32, i32* %r3, i32 3
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i128
-%r110 = shl i128 %r109, 96
-%r111 = or i128 %r105, %r110
-%r112 = zext i128 %r111 to i160
-%r114 = getelementptr i32, i32* %r3, i32 4
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i160
-%r117 = shl i160 %r116, 128
-%r118 = or i160 %r112, %r117
-%r119 = zext i160 %r118 to i192
-%r121 = getelementptr i32, i32* %r3, i32 5
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i192
-%r124 = shl i192 %r123, 160
-%r125 = or i192 %r119, %r124
-%r126 = zext i192 %r125 to i224
-%r128 = getelementptr i32, i32* %r3, i32 6
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i224
-%r131 = shl i224 %r130, 192
-%r132 = or i224 %r126, %r131
-%r133 = zext i224 %r132 to i256
-%r135 = getelementptr i32, i32* %r3, i32 7
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i256
-%r138 = shl i256 %r137, 224
-%r139 = or i256 %r133, %r138
-%r140 = zext i256 %r139 to i288
-%r142 = getelementptr i32, i32* %r3, i32 8
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i288
-%r145 = shl i288 %r144, 256
-%r146 = or i288 %r140, %r145
-%r147 = zext i288 %r146 to i320
-%r149 = getelementptr i32, i32* %r3, i32 9
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i320
-%r152 = shl i320 %r151, 288
-%r153 = or i320 %r147, %r152
-%r154 = zext i320 %r153 to i352
-%r156 = getelementptr i32, i32* %r3, i32 10
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i352
-%r159 = shl i352 %r158, 320
-%r160 = or i352 %r154, %r159
-%r161 = zext i352 %r160 to i384
-%r163 = getelementptr i32, i32* %r3, i32 11
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i384
-%r166 = shl i384 %r165, 352
-%r167 = or i384 %r161, %r166
-%r168 = zext i384 %r167 to i416
-%r170 = getelementptr i32, i32* %r3, i32 12
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i416
-%r173 = shl i416 %r172, 384
-%r174 = or i416 %r168, %r173
-%r175 = add i416 %r89, %r174
-%r176 = load i32, i32* %r4
-%r177 = zext i32 %r176 to i64
-%r179 = getelementptr i32, i32* %r4, i32 1
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i64
-%r182 = shl i64 %r181, 32
-%r183 = or i64 %r177, %r182
-%r184 = zext i64 %r183 to i96
-%r186 = getelementptr i32, i32* %r4, i32 2
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i96
-%r189 = shl i96 %r188, 64
-%r190 = or i96 %r184, %r189
-%r191 = zext i96 %r190 to i128
-%r193 = getelementptr i32, i32* %r4, i32 3
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i128
-%r196 = shl i128 %r195, 96
-%r197 = or i128 %r191, %r196
-%r198 = zext i128 %r197 to i160
-%r200 = getelementptr i32, i32* %r4, i32 4
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i160
-%r203 = shl i160 %r202, 128
-%r204 = or i160 %r198, %r203
-%r205 = zext i160 %r204 to i192
-%r207 = getelementptr i32, i32* %r4, i32 5
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i192
-%r210 = shl i192 %r209, 160
-%r211 = or i192 %r205, %r210
-%r212 = zext i192 %r211 to i224
-%r214 = getelementptr i32, i32* %r4, i32 6
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i224
-%r217 = shl i224 %r216, 192
-%r218 = or i224 %r212, %r217
-%r219 = zext i224 %r218 to i256
-%r221 = getelementptr i32, i32* %r4, i32 7
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i256
-%r224 = shl i256 %r223, 224
-%r225 = or i256 %r219, %r224
-%r226 = zext i256 %r225 to i288
-%r228 = getelementptr i32, i32* %r4, i32 8
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i288
-%r231 = shl i288 %r230, 256
-%r232 = or i288 %r226, %r231
-%r233 = zext i288 %r232 to i320
-%r235 = getelementptr i32, i32* %r4, i32 9
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i320
-%r238 = shl i320 %r237, 288
-%r239 = or i320 %r233, %r238
-%r240 = zext i320 %r239 to i352
-%r242 = getelementptr i32, i32* %r4, i32 10
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i352
-%r245 = shl i352 %r244, 320
-%r246 = or i352 %r240, %r245
-%r247 = zext i352 %r246 to i384
-%r249 = getelementptr i32, i32* %r4, i32 11
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i384
-%r252 = shl i384 %r251, 352
-%r253 = or i384 %r247, %r252
-%r254 = zext i384 %r253 to i416
-%r256 = getelementptr i32, i32* %r4, i32 12
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i416
-%r259 = shl i416 %r258, 384
-%r260 = or i416 %r254, %r259
-%r261 = sub i416 %r175, %r260
-%r262 = lshr i416 %r261, 415
-%r263 = trunc i416 %r262 to i1
-%r264 = select i1 %r263, i416 %r175, i416 %r261
-%r265 = trunc i416 %r264 to i32
-%r267 = getelementptr i32, i32* %r1, i32 0
-store i32 %r265, i32* %r267
-%r268 = lshr i416 %r264, 32
-%r269 = trunc i416 %r268 to i32
-%r271 = getelementptr i32, i32* %r1, i32 1
-store i32 %r269, i32* %r271
-%r272 = lshr i416 %r268, 32
-%r273 = trunc i416 %r272 to i32
-%r275 = getelementptr i32, i32* %r1, i32 2
-store i32 %r273, i32* %r275
-%r276 = lshr i416 %r272, 32
-%r277 = trunc i416 %r276 to i32
-%r279 = getelementptr i32, i32* %r1, i32 3
-store i32 %r277, i32* %r279
-%r280 = lshr i416 %r276, 32
-%r281 = trunc i416 %r280 to i32
-%r283 = getelementptr i32, i32* %r1, i32 4
-store i32 %r281, i32* %r283
-%r284 = lshr i416 %r280, 32
-%r285 = trunc i416 %r284 to i32
-%r287 = getelementptr i32, i32* %r1, i32 5
-store i32 %r285, i32* %r287
-%r288 = lshr i416 %r284, 32
-%r289 = trunc i416 %r288 to i32
-%r291 = getelementptr i32, i32* %r1, i32 6
-store i32 %r289, i32* %r291
-%r292 = lshr i416 %r288, 32
-%r293 = trunc i416 %r292 to i32
-%r295 = getelementptr i32, i32* %r1, i32 7
-store i32 %r293, i32* %r295
-%r296 = lshr i416 %r292, 32
-%r297 = trunc i416 %r296 to i32
-%r299 = getelementptr i32, i32* %r1, i32 8
-store i32 %r297, i32* %r299
-%r300 = lshr i416 %r296, 32
-%r301 = trunc i416 %r300 to i32
-%r303 = getelementptr i32, i32* %r1, i32 9
-store i32 %r301, i32* %r303
-%r304 = lshr i416 %r300, 32
-%r305 = trunc i416 %r304 to i32
-%r307 = getelementptr i32, i32* %r1, i32 10
-store i32 %r305, i32* %r307
-%r308 = lshr i416 %r304, 32
-%r309 = trunc i416 %r308 to i32
-%r311 = getelementptr i32, i32* %r1, i32 11
-store i32 %r309, i32* %r311
-%r312 = lshr i416 %r308, 32
-%r313 = trunc i416 %r312 to i32
-%r315 = getelementptr i32, i32* %r1, i32 12
-store i32 %r313, i32* %r315
-ret void
-}
-define void @mcl_fp_sub13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = load i32, i32* %r3
-%r91 = zext i32 %r90 to i64
-%r93 = getelementptr i32, i32* %r3, i32 1
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i64
-%r96 = shl i64 %r95, 32
-%r97 = or i64 %r91, %r96
-%r98 = zext i64 %r97 to i96
-%r100 = getelementptr i32, i32* %r3, i32 2
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i96
-%r103 = shl i96 %r102, 64
-%r104 = or i96 %r98, %r103
-%r105 = zext i96 %r104 to i128
-%r107 = getelementptr i32, i32* %r3, i32 3
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i128
-%r110 = shl i128 %r109, 96
-%r111 = or i128 %r105, %r110
-%r112 = zext i128 %r111 to i160
-%r114 = getelementptr i32, i32* %r3, i32 4
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i160
-%r117 = shl i160 %r116, 128
-%r118 = or i160 %r112, %r117
-%r119 = zext i160 %r118 to i192
-%r121 = getelementptr i32, i32* %r3, i32 5
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i192
-%r124 = shl i192 %r123, 160
-%r125 = or i192 %r119, %r124
-%r126 = zext i192 %r125 to i224
-%r128 = getelementptr i32, i32* %r3, i32 6
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i224
-%r131 = shl i224 %r130, 192
-%r132 = or i224 %r126, %r131
-%r133 = zext i224 %r132 to i256
-%r135 = getelementptr i32, i32* %r3, i32 7
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i256
-%r138 = shl i256 %r137, 224
-%r139 = or i256 %r133, %r138
-%r140 = zext i256 %r139 to i288
-%r142 = getelementptr i32, i32* %r3, i32 8
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i288
-%r145 = shl i288 %r144, 256
-%r146 = or i288 %r140, %r145
-%r147 = zext i288 %r146 to i320
-%r149 = getelementptr i32, i32* %r3, i32 9
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i320
-%r152 = shl i320 %r151, 288
-%r153 = or i320 %r147, %r152
-%r154 = zext i320 %r153 to i352
-%r156 = getelementptr i32, i32* %r3, i32 10
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i352
-%r159 = shl i352 %r158, 320
-%r160 = or i352 %r154, %r159
-%r161 = zext i352 %r160 to i384
-%r163 = getelementptr i32, i32* %r3, i32 11
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i384
-%r166 = shl i384 %r165, 352
-%r167 = or i384 %r161, %r166
-%r168 = zext i384 %r167 to i416
-%r170 = getelementptr i32, i32* %r3, i32 12
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i416
-%r173 = shl i416 %r172, 384
-%r174 = or i416 %r168, %r173
-%r175 = zext i416 %r89 to i448
-%r176 = zext i416 %r174 to i448
-%r177 = sub i448 %r175, %r176
-%r178 = trunc i448 %r177 to i416
-%r179 = lshr i448 %r177, 416
-%r180 = trunc i448 %r179 to i1
-%r181 = trunc i416 %r178 to i32
-%r183 = getelementptr i32, i32* %r1, i32 0
-store i32 %r181, i32* %r183
-%r184 = lshr i416 %r178, 32
-%r185 = trunc i416 %r184 to i32
-%r187 = getelementptr i32, i32* %r1, i32 1
-store i32 %r185, i32* %r187
-%r188 = lshr i416 %r184, 32
-%r189 = trunc i416 %r188 to i32
-%r191 = getelementptr i32, i32* %r1, i32 2
-store i32 %r189, i32* %r191
-%r192 = lshr i416 %r188, 32
-%r193 = trunc i416 %r192 to i32
-%r195 = getelementptr i32, i32* %r1, i32 3
-store i32 %r193, i32* %r195
-%r196 = lshr i416 %r192, 32
-%r197 = trunc i416 %r196 to i32
-%r199 = getelementptr i32, i32* %r1, i32 4
-store i32 %r197, i32* %r199
-%r200 = lshr i416 %r196, 32
-%r201 = trunc i416 %r200 to i32
-%r203 = getelementptr i32, i32* %r1, i32 5
-store i32 %r201, i32* %r203
-%r204 = lshr i416 %r200, 32
-%r205 = trunc i416 %r204 to i32
-%r207 = getelementptr i32, i32* %r1, i32 6
-store i32 %r205, i32* %r207
-%r208 = lshr i416 %r204, 32
-%r209 = trunc i416 %r208 to i32
-%r211 = getelementptr i32, i32* %r1, i32 7
-store i32 %r209, i32* %r211
-%r212 = lshr i416 %r208, 32
-%r213 = trunc i416 %r212 to i32
-%r215 = getelementptr i32, i32* %r1, i32 8
-store i32 %r213, i32* %r215
-%r216 = lshr i416 %r212, 32
-%r217 = trunc i416 %r216 to i32
-%r219 = getelementptr i32, i32* %r1, i32 9
-store i32 %r217, i32* %r219
-%r220 = lshr i416 %r216, 32
-%r221 = trunc i416 %r220 to i32
-%r223 = getelementptr i32, i32* %r1, i32 10
-store i32 %r221, i32* %r223
-%r224 = lshr i416 %r220, 32
-%r225 = trunc i416 %r224 to i32
-%r227 = getelementptr i32, i32* %r1, i32 11
-store i32 %r225, i32* %r227
-%r228 = lshr i416 %r224, 32
-%r229 = trunc i416 %r228 to i32
-%r231 = getelementptr i32, i32* %r1, i32 12
-store i32 %r229, i32* %r231
-br i1%r180, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r232 = load i32, i32* %r4
-%r233 = zext i32 %r232 to i64
-%r235 = getelementptr i32, i32* %r4, i32 1
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i64
-%r238 = shl i64 %r237, 32
-%r239 = or i64 %r233, %r238
-%r240 = zext i64 %r239 to i96
-%r242 = getelementptr i32, i32* %r4, i32 2
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i96
-%r245 = shl i96 %r244, 64
-%r246 = or i96 %r240, %r245
-%r247 = zext i96 %r246 to i128
-%r249 = getelementptr i32, i32* %r4, i32 3
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i128
-%r252 = shl i128 %r251, 96
-%r253 = or i128 %r247, %r252
-%r254 = zext i128 %r253 to i160
-%r256 = getelementptr i32, i32* %r4, i32 4
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i160
-%r259 = shl i160 %r258, 128
-%r260 = or i160 %r254, %r259
-%r261 = zext i160 %r260 to i192
-%r263 = getelementptr i32, i32* %r4, i32 5
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i192
-%r266 = shl i192 %r265, 160
-%r267 = or i192 %r261, %r266
-%r268 = zext i192 %r267 to i224
-%r270 = getelementptr i32, i32* %r4, i32 6
-%r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i224
-%r273 = shl i224 %r272, 192
-%r274 = or i224 %r268, %r273
-%r275 = zext i224 %r274 to i256
-%r277 = getelementptr i32, i32* %r4, i32 7
-%r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i256
-%r280 = shl i256 %r279, 224
-%r281 = or i256 %r275, %r280
-%r282 = zext i256 %r281 to i288
-%r284 = getelementptr i32, i32* %r4, i32 8
-%r285 = load i32, i32* %r284
-%r286 = zext i32 %r285 to i288
-%r287 = shl i288 %r286, 256
-%r288 = or i288 %r282, %r287
-%r289 = zext i288 %r288 to i320
-%r291 = getelementptr i32, i32* %r4, i32 9
-%r292 = load i32, i32* %r291
-%r293 = zext i32 %r292 to i320
-%r294 = shl i320 %r293, 288
-%r295 = or i320 %r289, %r294
-%r296 = zext i320 %r295 to i352
-%r298 = getelementptr i32, i32* %r4, i32 10
-%r299 = load i32, i32* %r298
-%r300 = zext i32 %r299 to i352
-%r301 = shl i352 %r300, 320
-%r302 = or i352 %r296, %r301
-%r303 = zext i352 %r302 to i384
-%r305 = getelementptr i32, i32* %r4, i32 11
-%r306 = load i32, i32* %r305
-%r307 = zext i32 %r306 to i384
-%r308 = shl i384 %r307, 352
-%r309 = or i384 %r303, %r308
-%r310 = zext i384 %r309 to i416
-%r312 = getelementptr i32, i32* %r4, i32 12
-%r313 = load i32, i32* %r312
-%r314 = zext i32 %r313 to i416
-%r315 = shl i416 %r314, 384
-%r316 = or i416 %r310, %r315
-%r317 = add i416 %r178, %r316
-%r318 = trunc i416 %r317 to i32
-%r320 = getelementptr i32, i32* %r1, i32 0
-store i32 %r318, i32* %r320
-%r321 = lshr i416 %r317, 32
-%r322 = trunc i416 %r321 to i32
-%r324 = getelementptr i32, i32* %r1, i32 1
-store i32 %r322, i32* %r324
-%r325 = lshr i416 %r321, 32
-%r326 = trunc i416 %r325 to i32
-%r328 = getelementptr i32, i32* %r1, i32 2
-store i32 %r326, i32* %r328
-%r329 = lshr i416 %r325, 32
-%r330 = trunc i416 %r329 to i32
-%r332 = getelementptr i32, i32* %r1, i32 3
-store i32 %r330, i32* %r332
-%r333 = lshr i416 %r329, 32
-%r334 = trunc i416 %r333 to i32
-%r336 = getelementptr i32, i32* %r1, i32 4
-store i32 %r334, i32* %r336
-%r337 = lshr i416 %r333, 32
-%r338 = trunc i416 %r337 to i32
-%r340 = getelementptr i32, i32* %r1, i32 5
-store i32 %r338, i32* %r340
-%r341 = lshr i416 %r337, 32
-%r342 = trunc i416 %r341 to i32
-%r344 = getelementptr i32, i32* %r1, i32 6
-store i32 %r342, i32* %r344
-%r345 = lshr i416 %r341, 32
-%r346 = trunc i416 %r345 to i32
-%r348 = getelementptr i32, i32* %r1, i32 7
-store i32 %r346, i32* %r348
-%r349 = lshr i416 %r345, 32
-%r350 = trunc i416 %r349 to i32
-%r352 = getelementptr i32, i32* %r1, i32 8
-store i32 %r350, i32* %r352
-%r353 = lshr i416 %r349, 32
-%r354 = trunc i416 %r353 to i32
-%r356 = getelementptr i32, i32* %r1, i32 9
-store i32 %r354, i32* %r356
-%r357 = lshr i416 %r353, 32
-%r358 = trunc i416 %r357 to i32
-%r360 = getelementptr i32, i32* %r1, i32 10
-store i32 %r358, i32* %r360
-%r361 = lshr i416 %r357, 32
-%r362 = trunc i416 %r361 to i32
-%r364 = getelementptr i32, i32* %r1, i32 11
-store i32 %r362, i32* %r364
-%r365 = lshr i416 %r361, 32
-%r366 = trunc i416 %r365 to i32
-%r368 = getelementptr i32, i32* %r1, i32 12
-store i32 %r366, i32* %r368
-ret void
-}
-define void @mcl_fp_subNF13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = load i32, i32* %r3
-%r91 = zext i32 %r90 to i64
-%r93 = getelementptr i32, i32* %r3, i32 1
-%r94 = load i32, i32* %r93
-%r95 = zext i32 %r94 to i64
-%r96 = shl i64 %r95, 32
-%r97 = or i64 %r91, %r96
-%r98 = zext i64 %r97 to i96
-%r100 = getelementptr i32, i32* %r3, i32 2
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i96
-%r103 = shl i96 %r102, 64
-%r104 = or i96 %r98, %r103
-%r105 = zext i96 %r104 to i128
-%r107 = getelementptr i32, i32* %r3, i32 3
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i128
-%r110 = shl i128 %r109, 96
-%r111 = or i128 %r105, %r110
-%r112 = zext i128 %r111 to i160
-%r114 = getelementptr i32, i32* %r3, i32 4
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i160
-%r117 = shl i160 %r116, 128
-%r118 = or i160 %r112, %r117
-%r119 = zext i160 %r118 to i192
-%r121 = getelementptr i32, i32* %r3, i32 5
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i192
-%r124 = shl i192 %r123, 160
-%r125 = or i192 %r119, %r124
-%r126 = zext i192 %r125 to i224
-%r128 = getelementptr i32, i32* %r3, i32 6
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i224
-%r131 = shl i224 %r130, 192
-%r132 = or i224 %r126, %r131
-%r133 = zext i224 %r132 to i256
-%r135 = getelementptr i32, i32* %r3, i32 7
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i256
-%r138 = shl i256 %r137, 224
-%r139 = or i256 %r133, %r138
-%r140 = zext i256 %r139 to i288
-%r142 = getelementptr i32, i32* %r3, i32 8
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i288
-%r145 = shl i288 %r144, 256
-%r146 = or i288 %r140, %r145
-%r147 = zext i288 %r146 to i320
-%r149 = getelementptr i32, i32* %r3, i32 9
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i320
-%r152 = shl i320 %r151, 288
-%r153 = or i320 %r147, %r152
-%r154 = zext i320 %r153 to i352
-%r156 = getelementptr i32, i32* %r3, i32 10
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i352
-%r159 = shl i352 %r158, 320
-%r160 = or i352 %r154, %r159
-%r161 = zext i352 %r160 to i384
-%r163 = getelementptr i32, i32* %r3, i32 11
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i384
-%r166 = shl i384 %r165, 352
-%r167 = or i384 %r161, %r166
-%r168 = zext i384 %r167 to i416
-%r170 = getelementptr i32, i32* %r3, i32 12
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i416
-%r173 = shl i416 %r172, 384
-%r174 = or i416 %r168, %r173
-%r175 = sub i416 %r89, %r174
-%r176 = lshr i416 %r175, 415
-%r177 = trunc i416 %r176 to i1
-%r178 = load i32, i32* %r4
-%r179 = zext i32 %r178 to i64
-%r181 = getelementptr i32, i32* %r4, i32 1
-%r182 = load i32, i32* %r181
-%r183 = zext i32 %r182 to i64
-%r184 = shl i64 %r183, 32
-%r185 = or i64 %r179, %r184
-%r186 = zext i64 %r185 to i96
-%r188 = getelementptr i32, i32* %r4, i32 2
-%r189 = load i32, i32* %r188
-%r190 = zext i32 %r189 to i96
-%r191 = shl i96 %r190, 64
-%r192 = or i96 %r186, %r191
-%r193 = zext i96 %r192 to i128
-%r195 = getelementptr i32, i32* %r4, i32 3
-%r196 = load i32, i32* %r195
-%r197 = zext i32 %r196 to i128
-%r198 = shl i128 %r197, 96
-%r199 = or i128 %r193, %r198
-%r200 = zext i128 %r199 to i160
-%r202 = getelementptr i32, i32* %r4, i32 4
-%r203 = load i32, i32* %r202
-%r204 = zext i32 %r203 to i160
-%r205 = shl i160 %r204, 128
-%r206 = or i160 %r200, %r205
-%r207 = zext i160 %r206 to i192
-%r209 = getelementptr i32, i32* %r4, i32 5
-%r210 = load i32, i32* %r209
-%r211 = zext i32 %r210 to i192
-%r212 = shl i192 %r211, 160
-%r213 = or i192 %r207, %r212
-%r214 = zext i192 %r213 to i224
-%r216 = getelementptr i32, i32* %r4, i32 6
-%r217 = load i32, i32* %r216
-%r218 = zext i32 %r217 to i224
-%r219 = shl i224 %r218, 192
-%r220 = or i224 %r214, %r219
-%r221 = zext i224 %r220 to i256
-%r223 = getelementptr i32, i32* %r4, i32 7
-%r224 = load i32, i32* %r223
-%r225 = zext i32 %r224 to i256
-%r226 = shl i256 %r225, 224
-%r227 = or i256 %r221, %r226
-%r228 = zext i256 %r227 to i288
-%r230 = getelementptr i32, i32* %r4, i32 8
-%r231 = load i32, i32* %r230
-%r232 = zext i32 %r231 to i288
-%r233 = shl i288 %r232, 256
-%r234 = or i288 %r228, %r233
-%r235 = zext i288 %r234 to i320
-%r237 = getelementptr i32, i32* %r4, i32 9
-%r238 = load i32, i32* %r237
-%r239 = zext i32 %r238 to i320
-%r240 = shl i320 %r239, 288
-%r241 = or i320 %r235, %r240
-%r242 = zext i320 %r241 to i352
-%r244 = getelementptr i32, i32* %r4, i32 10
-%r245 = load i32, i32* %r244
-%r246 = zext i32 %r245 to i352
-%r247 = shl i352 %r246, 320
-%r248 = or i352 %r242, %r247
-%r249 = zext i352 %r248 to i384
-%r251 = getelementptr i32, i32* %r4, i32 11
-%r252 = load i32, i32* %r251
-%r253 = zext i32 %r252 to i384
-%r254 = shl i384 %r253, 352
-%r255 = or i384 %r249, %r254
-%r256 = zext i384 %r255 to i416
-%r258 = getelementptr i32, i32* %r4, i32 12
-%r259 = load i32, i32* %r258
-%r260 = zext i32 %r259 to i416
-%r261 = shl i416 %r260, 384
-%r262 = or i416 %r256, %r261
-%r264 = select i1 %r177, i416 %r262, i416 0
-%r265 = add i416 %r175, %r264
-%r266 = trunc i416 %r265 to i32
-%r268 = getelementptr i32, i32* %r1, i32 0
-store i32 %r266, i32* %r268
-%r269 = lshr i416 %r265, 32
-%r270 = trunc i416 %r269 to i32
-%r272 = getelementptr i32, i32* %r1, i32 1
-store i32 %r270, i32* %r272
-%r273 = lshr i416 %r269, 32
-%r274 = trunc i416 %r273 to i32
-%r276 = getelementptr i32, i32* %r1, i32 2
-store i32 %r274, i32* %r276
-%r277 = lshr i416 %r273, 32
-%r278 = trunc i416 %r277 to i32
-%r280 = getelementptr i32, i32* %r1, i32 3
-store i32 %r278, i32* %r280
-%r281 = lshr i416 %r277, 32
-%r282 = trunc i416 %r281 to i32
-%r284 = getelementptr i32, i32* %r1, i32 4
-store i32 %r282, i32* %r284
-%r285 = lshr i416 %r281, 32
-%r286 = trunc i416 %r285 to i32
-%r288 = getelementptr i32, i32* %r1, i32 5
-store i32 %r286, i32* %r288
-%r289 = lshr i416 %r285, 32
-%r290 = trunc i416 %r289 to i32
-%r292 = getelementptr i32, i32* %r1, i32 6
-store i32 %r290, i32* %r292
-%r293 = lshr i416 %r289, 32
-%r294 = trunc i416 %r293 to i32
-%r296 = getelementptr i32, i32* %r1, i32 7
-store i32 %r294, i32* %r296
-%r297 = lshr i416 %r293, 32
-%r298 = trunc i416 %r297 to i32
-%r300 = getelementptr i32, i32* %r1, i32 8
-store i32 %r298, i32* %r300
-%r301 = lshr i416 %r297, 32
-%r302 = trunc i416 %r301 to i32
-%r304 = getelementptr i32, i32* %r1, i32 9
-store i32 %r302, i32* %r304
-%r305 = lshr i416 %r301, 32
-%r306 = trunc i416 %r305 to i32
-%r308 = getelementptr i32, i32* %r1, i32 10
-store i32 %r306, i32* %r308
-%r309 = lshr i416 %r305, 32
-%r310 = trunc i416 %r309 to i32
-%r312 = getelementptr i32, i32* %r1, i32 11
-store i32 %r310, i32* %r312
-%r313 = lshr i416 %r309, 32
-%r314 = trunc i416 %r313 to i32
-%r316 = getelementptr i32, i32* %r1, i32 12
-store i32 %r314, i32* %r316
-ret void
-}
-define void @mcl_fpDbl_add13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = zext i704 %r152 to i736
-%r155 = getelementptr i32, i32* %r2, i32 22
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i736
-%r158 = shl i736 %r157, 704
-%r159 = or i736 %r153, %r158
-%r160 = zext i736 %r159 to i768
-%r162 = getelementptr i32, i32* %r2, i32 23
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i768
-%r165 = shl i768 %r164, 736
-%r166 = or i768 %r160, %r165
-%r167 = zext i768 %r166 to i800
-%r169 = getelementptr i32, i32* %r2, i32 24
-%r170 = load i32, i32* %r169
-%r171 = zext i32 %r170 to i800
-%r172 = shl i800 %r171, 768
-%r173 = or i800 %r167, %r172
-%r174 = zext i800 %r173 to i832
-%r176 = getelementptr i32, i32* %r2, i32 25
-%r177 = load i32, i32* %r176
-%r178 = zext i32 %r177 to i832
-%r179 = shl i832 %r178, 800
-%r180 = or i832 %r174, %r179
-%r181 = load i32, i32* %r3
-%r182 = zext i32 %r181 to i64
-%r184 = getelementptr i32, i32* %r3, i32 1
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i64
-%r187 = shl i64 %r186, 32
-%r188 = or i64 %r182, %r187
-%r189 = zext i64 %r188 to i96
-%r191 = getelementptr i32, i32* %r3, i32 2
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i96
-%r194 = shl i96 %r193, 64
-%r195 = or i96 %r189, %r194
-%r196 = zext i96 %r195 to i128
-%r198 = getelementptr i32, i32* %r3, i32 3
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i128
-%r201 = shl i128 %r200, 96
-%r202 = or i128 %r196, %r201
-%r203 = zext i128 %r202 to i160
-%r205 = getelementptr i32, i32* %r3, i32 4
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i160
-%r208 = shl i160 %r207, 128
-%r209 = or i160 %r203, %r208
-%r210 = zext i160 %r209 to i192
-%r212 = getelementptr i32, i32* %r3, i32 5
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i192
-%r215 = shl i192 %r214, 160
-%r216 = or i192 %r210, %r215
-%r217 = zext i192 %r216 to i224
-%r219 = getelementptr i32, i32* %r3, i32 6
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i224
-%r222 = shl i224 %r221, 192
-%r223 = or i224 %r217, %r222
-%r224 = zext i224 %r223 to i256
-%r226 = getelementptr i32, i32* %r3, i32 7
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i256
-%r229 = shl i256 %r228, 224
-%r230 = or i256 %r224, %r229
-%r231 = zext i256 %r230 to i288
-%r233 = getelementptr i32, i32* %r3, i32 8
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i288
-%r236 = shl i288 %r235, 256
-%r237 = or i288 %r231, %r236
-%r238 = zext i288 %r237 to i320
-%r240 = getelementptr i32, i32* %r3, i32 9
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i320
-%r243 = shl i320 %r242, 288
-%r244 = or i320 %r238, %r243
-%r245 = zext i320 %r244 to i352
-%r247 = getelementptr i32, i32* %r3, i32 10
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i352
-%r250 = shl i352 %r249, 320
-%r251 = or i352 %r245, %r250
-%r252 = zext i352 %r251 to i384
-%r254 = getelementptr i32, i32* %r3, i32 11
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i384
-%r257 = shl i384 %r256, 352
-%r258 = or i384 %r252, %r257
-%r259 = zext i384 %r258 to i416
-%r261 = getelementptr i32, i32* %r3, i32 12
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i416
-%r264 = shl i416 %r263, 384
-%r265 = or i416 %r259, %r264
-%r266 = zext i416 %r265 to i448
-%r268 = getelementptr i32, i32* %r3, i32 13
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i448
-%r271 = shl i448 %r270, 416
-%r272 = or i448 %r266, %r271
-%r273 = zext i448 %r272 to i480
-%r275 = getelementptr i32, i32* %r3, i32 14
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i480
-%r278 = shl i480 %r277, 448
-%r279 = or i480 %r273, %r278
-%r280 = zext i480 %r279 to i512
-%r282 = getelementptr i32, i32* %r3, i32 15
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i512
-%r285 = shl i512 %r284, 480
-%r286 = or i512 %r280, %r285
-%r287 = zext i512 %r286 to i544
-%r289 = getelementptr i32, i32* %r3, i32 16
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i544
-%r292 = shl i544 %r291, 512
-%r293 = or i544 %r287, %r292
-%r294 = zext i544 %r293 to i576
-%r296 = getelementptr i32, i32* %r3, i32 17
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i576
-%r299 = shl i576 %r298, 544
-%r300 = or i576 %r294, %r299
-%r301 = zext i576 %r300 to i608
-%r303 = getelementptr i32, i32* %r3, i32 18
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i608
-%r306 = shl i608 %r305, 576
-%r307 = or i608 %r301, %r306
-%r308 = zext i608 %r307 to i640
-%r310 = getelementptr i32, i32* %r3, i32 19
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i640
-%r313 = shl i640 %r312, 608
-%r314 = or i640 %r308, %r313
-%r315 = zext i640 %r314 to i672
-%r317 = getelementptr i32, i32* %r3, i32 20
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i672
-%r320 = shl i672 %r319, 640
-%r321 = or i672 %r315, %r320
-%r322 = zext i672 %r321 to i704
-%r324 = getelementptr i32, i32* %r3, i32 21
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i704
-%r327 = shl i704 %r326, 672
-%r328 = or i704 %r322, %r327
-%r329 = zext i704 %r328 to i736
-%r331 = getelementptr i32, i32* %r3, i32 22
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i736
-%r334 = shl i736 %r333, 704
-%r335 = or i736 %r329, %r334
-%r336 = zext i736 %r335 to i768
-%r338 = getelementptr i32, i32* %r3, i32 23
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i768
-%r341 = shl i768 %r340, 736
-%r342 = or i768 %r336, %r341
-%r343 = zext i768 %r342 to i800
-%r345 = getelementptr i32, i32* %r3, i32 24
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i800
-%r348 = shl i800 %r347, 768
-%r349 = or i800 %r343, %r348
-%r350 = zext i800 %r349 to i832
-%r352 = getelementptr i32, i32* %r3, i32 25
-%r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i832
-%r355 = shl i832 %r354, 800
-%r356 = or i832 %r350, %r355
-%r357 = zext i832 %r180 to i864
-%r358 = zext i832 %r356 to i864
-%r359 = add i864 %r357, %r358
-%r360 = trunc i864 %r359 to i416
-%r361 = trunc i416 %r360 to i32
-%r363 = getelementptr i32, i32* %r1, i32 0
-store i32 %r361, i32* %r363
-%r364 = lshr i416 %r360, 32
-%r365 = trunc i416 %r364 to i32
-%r367 = getelementptr i32, i32* %r1, i32 1
-store i32 %r365, i32* %r367
-%r368 = lshr i416 %r364, 32
-%r369 = trunc i416 %r368 to i32
-%r371 = getelementptr i32, i32* %r1, i32 2
-store i32 %r369, i32* %r371
-%r372 = lshr i416 %r368, 32
-%r373 = trunc i416 %r372 to i32
-%r375 = getelementptr i32, i32* %r1, i32 3
-store i32 %r373, i32* %r375
-%r376 = lshr i416 %r372, 32
-%r377 = trunc i416 %r376 to i32
-%r379 = getelementptr i32, i32* %r1, i32 4
-store i32 %r377, i32* %r379
-%r380 = lshr i416 %r376, 32
-%r381 = trunc i416 %r380 to i32
-%r383 = getelementptr i32, i32* %r1, i32 5
-store i32 %r381, i32* %r383
-%r384 = lshr i416 %r380, 32
-%r385 = trunc i416 %r384 to i32
-%r387 = getelementptr i32, i32* %r1, i32 6
-store i32 %r385, i32* %r387
-%r388 = lshr i416 %r384, 32
-%r389 = trunc i416 %r388 to i32
-%r391 = getelementptr i32, i32* %r1, i32 7
-store i32 %r389, i32* %r391
-%r392 = lshr i416 %r388, 32
-%r393 = trunc i416 %r392 to i32
-%r395 = getelementptr i32, i32* %r1, i32 8
-store i32 %r393, i32* %r395
-%r396 = lshr i416 %r392, 32
-%r397 = trunc i416 %r396 to i32
-%r399 = getelementptr i32, i32* %r1, i32 9
-store i32 %r397, i32* %r399
-%r400 = lshr i416 %r396, 32
-%r401 = trunc i416 %r400 to i32
-%r403 = getelementptr i32, i32* %r1, i32 10
-store i32 %r401, i32* %r403
-%r404 = lshr i416 %r400, 32
-%r405 = trunc i416 %r404 to i32
-%r407 = getelementptr i32, i32* %r1, i32 11
-store i32 %r405, i32* %r407
-%r408 = lshr i416 %r404, 32
-%r409 = trunc i416 %r408 to i32
-%r411 = getelementptr i32, i32* %r1, i32 12
-store i32 %r409, i32* %r411
-%r412 = lshr i864 %r359, 416
-%r413 = trunc i864 %r412 to i448
-%r414 = load i32, i32* %r4
-%r415 = zext i32 %r414 to i64
-%r417 = getelementptr i32, i32* %r4, i32 1
-%r418 = load i32, i32* %r417
-%r419 = zext i32 %r418 to i64
-%r420 = shl i64 %r419, 32
-%r421 = or i64 %r415, %r420
-%r422 = zext i64 %r421 to i96
-%r424 = getelementptr i32, i32* %r4, i32 2
-%r425 = load i32, i32* %r424
-%r426 = zext i32 %r425 to i96
-%r427 = shl i96 %r426, 64
-%r428 = or i96 %r422, %r427
-%r429 = zext i96 %r428 to i128
-%r431 = getelementptr i32, i32* %r4, i32 3
-%r432 = load i32, i32* %r431
-%r433 = zext i32 %r432 to i128
-%r434 = shl i128 %r433, 96
-%r435 = or i128 %r429, %r434
-%r436 = zext i128 %r435 to i160
-%r438 = getelementptr i32, i32* %r4, i32 4
-%r439 = load i32, i32* %r438
-%r440 = zext i32 %r439 to i160
-%r441 = shl i160 %r440, 128
-%r442 = or i160 %r436, %r441
-%r443 = zext i160 %r442 to i192
-%r445 = getelementptr i32, i32* %r4, i32 5
-%r446 = load i32, i32* %r445
-%r447 = zext i32 %r446 to i192
-%r448 = shl i192 %r447, 160
-%r449 = or i192 %r443, %r448
-%r450 = zext i192 %r449 to i224
-%r452 = getelementptr i32, i32* %r4, i32 6
-%r453 = load i32, i32* %r452
-%r454 = zext i32 %r453 to i224
-%r455 = shl i224 %r454, 192
-%r456 = or i224 %r450, %r455
-%r457 = zext i224 %r456 to i256
-%r459 = getelementptr i32, i32* %r4, i32 7
-%r460 = load i32, i32* %r459
-%r461 = zext i32 %r460 to i256
-%r462 = shl i256 %r461, 224
-%r463 = or i256 %r457, %r462
-%r464 = zext i256 %r463 to i288
-%r466 = getelementptr i32, i32* %r4, i32 8
-%r467 = load i32, i32* %r466
-%r468 = zext i32 %r467 to i288
-%r469 = shl i288 %r468, 256
-%r470 = or i288 %r464, %r469
-%r471 = zext i288 %r470 to i320
-%r473 = getelementptr i32, i32* %r4, i32 9
-%r474 = load i32, i32* %r473
-%r475 = zext i32 %r474 to i320
-%r476 = shl i320 %r475, 288
-%r477 = or i320 %r471, %r476
-%r478 = zext i320 %r477 to i352
-%r480 = getelementptr i32, i32* %r4, i32 10
-%r481 = load i32, i32* %r480
-%r482 = zext i32 %r481 to i352
-%r483 = shl i352 %r482, 320
-%r484 = or i352 %r478, %r483
-%r485 = zext i352 %r484 to i384
-%r487 = getelementptr i32, i32* %r4, i32 11
-%r488 = load i32, i32* %r487
-%r489 = zext i32 %r488 to i384
-%r490 = shl i384 %r489, 352
-%r491 = or i384 %r485, %r490
-%r492 = zext i384 %r491 to i416
-%r494 = getelementptr i32, i32* %r4, i32 12
-%r495 = load i32, i32* %r494
-%r496 = zext i32 %r495 to i416
-%r497 = shl i416 %r496, 384
-%r498 = or i416 %r492, %r497
-%r499 = zext i416 %r498 to i448
-%r500 = sub i448 %r413, %r499
-%r501 = lshr i448 %r500, 416
-%r502 = trunc i448 %r501 to i1
-%r503 = select i1 %r502, i448 %r413, i448 %r500
-%r504 = trunc i448 %r503 to i416
-%r506 = getelementptr i32, i32* %r1, i32 13
-%r507 = trunc i416 %r504 to i32
-%r509 = getelementptr i32, i32* %r506, i32 0
-store i32 %r507, i32* %r509
-%r510 = lshr i416 %r504, 32
-%r511 = trunc i416 %r510 to i32
-%r513 = getelementptr i32, i32* %r506, i32 1
-store i32 %r511, i32* %r513
-%r514 = lshr i416 %r510, 32
-%r515 = trunc i416 %r514 to i32
-%r517 = getelementptr i32, i32* %r506, i32 2
-store i32 %r515, i32* %r517
-%r518 = lshr i416 %r514, 32
-%r519 = trunc i416 %r518 to i32
-%r521 = getelementptr i32, i32* %r506, i32 3
-store i32 %r519, i32* %r521
-%r522 = lshr i416 %r518, 32
-%r523 = trunc i416 %r522 to i32
-%r525 = getelementptr i32, i32* %r506, i32 4
-store i32 %r523, i32* %r525
-%r526 = lshr i416 %r522, 32
-%r527 = trunc i416 %r526 to i32
-%r529 = getelementptr i32, i32* %r506, i32 5
-store i32 %r527, i32* %r529
-%r530 = lshr i416 %r526, 32
-%r531 = trunc i416 %r530 to i32
-%r533 = getelementptr i32, i32* %r506, i32 6
-store i32 %r531, i32* %r533
-%r534 = lshr i416 %r530, 32
-%r535 = trunc i416 %r534 to i32
-%r537 = getelementptr i32, i32* %r506, i32 7
-store i32 %r535, i32* %r537
-%r538 = lshr i416 %r534, 32
-%r539 = trunc i416 %r538 to i32
-%r541 = getelementptr i32, i32* %r506, i32 8
-store i32 %r539, i32* %r541
-%r542 = lshr i416 %r538, 32
-%r543 = trunc i416 %r542 to i32
-%r545 = getelementptr i32, i32* %r506, i32 9
-store i32 %r543, i32* %r545
-%r546 = lshr i416 %r542, 32
-%r547 = trunc i416 %r546 to i32
-%r549 = getelementptr i32, i32* %r506, i32 10
-store i32 %r547, i32* %r549
-%r550 = lshr i416 %r546, 32
-%r551 = trunc i416 %r550 to i32
-%r553 = getelementptr i32, i32* %r506, i32 11
-store i32 %r551, i32* %r553
-%r554 = lshr i416 %r550, 32
-%r555 = trunc i416 %r554 to i32
-%r557 = getelementptr i32, i32* %r506, i32 12
-store i32 %r555, i32* %r557
-ret void
-}
-define void @mcl_fpDbl_sub13L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = zext i704 %r152 to i736
-%r155 = getelementptr i32, i32* %r2, i32 22
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i736
-%r158 = shl i736 %r157, 704
-%r159 = or i736 %r153, %r158
-%r160 = zext i736 %r159 to i768
-%r162 = getelementptr i32, i32* %r2, i32 23
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i768
-%r165 = shl i768 %r164, 736
-%r166 = or i768 %r160, %r165
-%r167 = zext i768 %r166 to i800
-%r169 = getelementptr i32, i32* %r2, i32 24
-%r170 = load i32, i32* %r169
-%r171 = zext i32 %r170 to i800
-%r172 = shl i800 %r171, 768
-%r173 = or i800 %r167, %r172
-%r174 = zext i800 %r173 to i832
-%r176 = getelementptr i32, i32* %r2, i32 25
-%r177 = load i32, i32* %r176
-%r178 = zext i32 %r177 to i832
-%r179 = shl i832 %r178, 800
-%r180 = or i832 %r174, %r179
-%r181 = load i32, i32* %r3
-%r182 = zext i32 %r181 to i64
-%r184 = getelementptr i32, i32* %r3, i32 1
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i64
-%r187 = shl i64 %r186, 32
-%r188 = or i64 %r182, %r187
-%r189 = zext i64 %r188 to i96
-%r191 = getelementptr i32, i32* %r3, i32 2
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i96
-%r194 = shl i96 %r193, 64
-%r195 = or i96 %r189, %r194
-%r196 = zext i96 %r195 to i128
-%r198 = getelementptr i32, i32* %r3, i32 3
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i128
-%r201 = shl i128 %r200, 96
-%r202 = or i128 %r196, %r201
-%r203 = zext i128 %r202 to i160
-%r205 = getelementptr i32, i32* %r3, i32 4
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i160
-%r208 = shl i160 %r207, 128
-%r209 = or i160 %r203, %r208
-%r210 = zext i160 %r209 to i192
-%r212 = getelementptr i32, i32* %r3, i32 5
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i192
-%r215 = shl i192 %r214, 160
-%r216 = or i192 %r210, %r215
-%r217 = zext i192 %r216 to i224
-%r219 = getelementptr i32, i32* %r3, i32 6
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i224
-%r222 = shl i224 %r221, 192
-%r223 = or i224 %r217, %r222
-%r224 = zext i224 %r223 to i256
-%r226 = getelementptr i32, i32* %r3, i32 7
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i256
-%r229 = shl i256 %r228, 224
-%r230 = or i256 %r224, %r229
-%r231 = zext i256 %r230 to i288
-%r233 = getelementptr i32, i32* %r3, i32 8
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i288
-%r236 = shl i288 %r235, 256
-%r237 = or i288 %r231, %r236
-%r238 = zext i288 %r237 to i320
-%r240 = getelementptr i32, i32* %r3, i32 9
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i320
-%r243 = shl i320 %r242, 288
-%r244 = or i320 %r238, %r243
-%r245 = zext i320 %r244 to i352
-%r247 = getelementptr i32, i32* %r3, i32 10
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i352
-%r250 = shl i352 %r249, 320
-%r251 = or i352 %r245, %r250
-%r252 = zext i352 %r251 to i384
-%r254 = getelementptr i32, i32* %r3, i32 11
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i384
-%r257 = shl i384 %r256, 352
-%r258 = or i384 %r252, %r257
-%r259 = zext i384 %r258 to i416
-%r261 = getelementptr i32, i32* %r3, i32 12
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i416
-%r264 = shl i416 %r263, 384
-%r265 = or i416 %r259, %r264
-%r266 = zext i416 %r265 to i448
-%r268 = getelementptr i32, i32* %r3, i32 13
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i448
-%r271 = shl i448 %r270, 416
-%r272 = or i448 %r266, %r271
-%r273 = zext i448 %r272 to i480
-%r275 = getelementptr i32, i32* %r3, i32 14
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i480
-%r278 = shl i480 %r277, 448
-%r279 = or i480 %r273, %r278
-%r280 = zext i480 %r279 to i512
-%r282 = getelementptr i32, i32* %r3, i32 15
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i512
-%r285 = shl i512 %r284, 480
-%r286 = or i512 %r280, %r285
-%r287 = zext i512 %r286 to i544
-%r289 = getelementptr i32, i32* %r3, i32 16
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i544
-%r292 = shl i544 %r291, 512
-%r293 = or i544 %r287, %r292
-%r294 = zext i544 %r293 to i576
-%r296 = getelementptr i32, i32* %r3, i32 17
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i576
-%r299 = shl i576 %r298, 544
-%r300 = or i576 %r294, %r299
-%r301 = zext i576 %r300 to i608
-%r303 = getelementptr i32, i32* %r3, i32 18
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i608
-%r306 = shl i608 %r305, 576
-%r307 = or i608 %r301, %r306
-%r308 = zext i608 %r307 to i640
-%r310 = getelementptr i32, i32* %r3, i32 19
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i640
-%r313 = shl i640 %r312, 608
-%r314 = or i640 %r308, %r313
-%r315 = zext i640 %r314 to i672
-%r317 = getelementptr i32, i32* %r3, i32 20
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i672
-%r320 = shl i672 %r319, 640
-%r321 = or i672 %r315, %r320
-%r322 = zext i672 %r321 to i704
-%r324 = getelementptr i32, i32* %r3, i32 21
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i704
-%r327 = shl i704 %r326, 672
-%r328 = or i704 %r322, %r327
-%r329 = zext i704 %r328 to i736
-%r331 = getelementptr i32, i32* %r3, i32 22
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i736
-%r334 = shl i736 %r333, 704
-%r335 = or i736 %r329, %r334
-%r336 = zext i736 %r335 to i768
-%r338 = getelementptr i32, i32* %r3, i32 23
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i768
-%r341 = shl i768 %r340, 736
-%r342 = or i768 %r336, %r341
-%r343 = zext i768 %r342 to i800
-%r345 = getelementptr i32, i32* %r3, i32 24
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i800
-%r348 = shl i800 %r347, 768
-%r349 = or i800 %r343, %r348
-%r350 = zext i800 %r349 to i832
-%r352 = getelementptr i32, i32* %r3, i32 25
-%r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i832
-%r355 = shl i832 %r354, 800
-%r356 = or i832 %r350, %r355
-%r357 = zext i832 %r180 to i864
-%r358 = zext i832 %r356 to i864
-%r359 = sub i864 %r357, %r358
-%r360 = trunc i864 %r359 to i416
-%r361 = trunc i416 %r360 to i32
-%r363 = getelementptr i32, i32* %r1, i32 0
-store i32 %r361, i32* %r363
-%r364 = lshr i416 %r360, 32
-%r365 = trunc i416 %r364 to i32
-%r367 = getelementptr i32, i32* %r1, i32 1
-store i32 %r365, i32* %r367
-%r368 = lshr i416 %r364, 32
-%r369 = trunc i416 %r368 to i32
-%r371 = getelementptr i32, i32* %r1, i32 2
-store i32 %r369, i32* %r371
-%r372 = lshr i416 %r368, 32
-%r373 = trunc i416 %r372 to i32
-%r375 = getelementptr i32, i32* %r1, i32 3
-store i32 %r373, i32* %r375
-%r376 = lshr i416 %r372, 32
-%r377 = trunc i416 %r376 to i32
-%r379 = getelementptr i32, i32* %r1, i32 4
-store i32 %r377, i32* %r379
-%r380 = lshr i416 %r376, 32
-%r381 = trunc i416 %r380 to i32
-%r383 = getelementptr i32, i32* %r1, i32 5
-store i32 %r381, i32* %r383
-%r384 = lshr i416 %r380, 32
-%r385 = trunc i416 %r384 to i32
-%r387 = getelementptr i32, i32* %r1, i32 6
-store i32 %r385, i32* %r387
-%r388 = lshr i416 %r384, 32
-%r389 = trunc i416 %r388 to i32
-%r391 = getelementptr i32, i32* %r1, i32 7
-store i32 %r389, i32* %r391
-%r392 = lshr i416 %r388, 32
-%r393 = trunc i416 %r392 to i32
-%r395 = getelementptr i32, i32* %r1, i32 8
-store i32 %r393, i32* %r395
-%r396 = lshr i416 %r392, 32
-%r397 = trunc i416 %r396 to i32
-%r399 = getelementptr i32, i32* %r1, i32 9
-store i32 %r397, i32* %r399
-%r400 = lshr i416 %r396, 32
-%r401 = trunc i416 %r400 to i32
-%r403 = getelementptr i32, i32* %r1, i32 10
-store i32 %r401, i32* %r403
-%r404 = lshr i416 %r400, 32
-%r405 = trunc i416 %r404 to i32
-%r407 = getelementptr i32, i32* %r1, i32 11
-store i32 %r405, i32* %r407
-%r408 = lshr i416 %r404, 32
-%r409 = trunc i416 %r408 to i32
-%r411 = getelementptr i32, i32* %r1, i32 12
-store i32 %r409, i32* %r411
-%r412 = lshr i864 %r359, 416
-%r413 = trunc i864 %r412 to i416
-%r414 = lshr i864 %r359, 832
-%r415 = trunc i864 %r414 to i1
-%r416 = load i32, i32* %r4
-%r417 = zext i32 %r416 to i64
-%r419 = getelementptr i32, i32* %r4, i32 1
-%r420 = load i32, i32* %r419
-%r421 = zext i32 %r420 to i64
-%r422 = shl i64 %r421, 32
-%r423 = or i64 %r417, %r422
-%r424 = zext i64 %r423 to i96
-%r426 = getelementptr i32, i32* %r4, i32 2
-%r427 = load i32, i32* %r426
-%r428 = zext i32 %r427 to i96
-%r429 = shl i96 %r428, 64
-%r430 = or i96 %r424, %r429
-%r431 = zext i96 %r430 to i128
-%r433 = getelementptr i32, i32* %r4, i32 3
-%r434 = load i32, i32* %r433
-%r435 = zext i32 %r434 to i128
-%r436 = shl i128 %r435, 96
-%r437 = or i128 %r431, %r436
-%r438 = zext i128 %r437 to i160
-%r440 = getelementptr i32, i32* %r4, i32 4
-%r441 = load i32, i32* %r440
-%r442 = zext i32 %r441 to i160
-%r443 = shl i160 %r442, 128
-%r444 = or i160 %r438, %r443
-%r445 = zext i160 %r444 to i192
-%r447 = getelementptr i32, i32* %r4, i32 5
-%r448 = load i32, i32* %r447
-%r449 = zext i32 %r448 to i192
-%r450 = shl i192 %r449, 160
-%r451 = or i192 %r445, %r450
-%r452 = zext i192 %r451 to i224
-%r454 = getelementptr i32, i32* %r4, i32 6
-%r455 = load i32, i32* %r454
-%r456 = zext i32 %r455 to i224
-%r457 = shl i224 %r456, 192
-%r458 = or i224 %r452, %r457
-%r459 = zext i224 %r458 to i256
-%r461 = getelementptr i32, i32* %r4, i32 7
-%r462 = load i32, i32* %r461
-%r463 = zext i32 %r462 to i256
-%r464 = shl i256 %r463, 224
-%r465 = or i256 %r459, %r464
-%r466 = zext i256 %r465 to i288
-%r468 = getelementptr i32, i32* %r4, i32 8
-%r469 = load i32, i32* %r468
-%r470 = zext i32 %r469 to i288
-%r471 = shl i288 %r470, 256
-%r472 = or i288 %r466, %r471
-%r473 = zext i288 %r472 to i320
-%r475 = getelementptr i32, i32* %r4, i32 9
-%r476 = load i32, i32* %r475
-%r477 = zext i32 %r476 to i320
-%r478 = shl i320 %r477, 288
-%r479 = or i320 %r473, %r478
-%r480 = zext i320 %r479 to i352
-%r482 = getelementptr i32, i32* %r4, i32 10
-%r483 = load i32, i32* %r482
-%r484 = zext i32 %r483 to i352
-%r485 = shl i352 %r484, 320
-%r486 = or i352 %r480, %r485
-%r487 = zext i352 %r486 to i384
-%r489 = getelementptr i32, i32* %r4, i32 11
-%r490 = load i32, i32* %r489
-%r491 = zext i32 %r490 to i384
-%r492 = shl i384 %r491, 352
-%r493 = or i384 %r487, %r492
-%r494 = zext i384 %r493 to i416
-%r496 = getelementptr i32, i32* %r4, i32 12
-%r497 = load i32, i32* %r496
-%r498 = zext i32 %r497 to i416
-%r499 = shl i416 %r498, 384
-%r500 = or i416 %r494, %r499
-%r502 = select i1 %r415, i416 %r500, i416 0
-%r503 = add i416 %r413, %r502
-%r505 = getelementptr i32, i32* %r1, i32 13
-%r506 = trunc i416 %r503 to i32
-%r508 = getelementptr i32, i32* %r505, i32 0
-store i32 %r506, i32* %r508
-%r509 = lshr i416 %r503, 32
-%r510 = trunc i416 %r509 to i32
-%r512 = getelementptr i32, i32* %r505, i32 1
-store i32 %r510, i32* %r512
-%r513 = lshr i416 %r509, 32
-%r514 = trunc i416 %r513 to i32
-%r516 = getelementptr i32, i32* %r505, i32 2
-store i32 %r514, i32* %r516
-%r517 = lshr i416 %r513, 32
-%r518 = trunc i416 %r517 to i32
-%r520 = getelementptr i32, i32* %r505, i32 3
-store i32 %r518, i32* %r520
-%r521 = lshr i416 %r517, 32
-%r522 = trunc i416 %r521 to i32
-%r524 = getelementptr i32, i32* %r505, i32 4
-store i32 %r522, i32* %r524
-%r525 = lshr i416 %r521, 32
-%r526 = trunc i416 %r525 to i32
-%r528 = getelementptr i32, i32* %r505, i32 5
-store i32 %r526, i32* %r528
-%r529 = lshr i416 %r525, 32
-%r530 = trunc i416 %r529 to i32
-%r532 = getelementptr i32, i32* %r505, i32 6
-store i32 %r530, i32* %r532
-%r533 = lshr i416 %r529, 32
-%r534 = trunc i416 %r533 to i32
-%r536 = getelementptr i32, i32* %r505, i32 7
-store i32 %r534, i32* %r536
-%r537 = lshr i416 %r533, 32
-%r538 = trunc i416 %r537 to i32
-%r540 = getelementptr i32, i32* %r505, i32 8
-store i32 %r538, i32* %r540
-%r541 = lshr i416 %r537, 32
-%r542 = trunc i416 %r541 to i32
-%r544 = getelementptr i32, i32* %r505, i32 9
-store i32 %r542, i32* %r544
-%r545 = lshr i416 %r541, 32
-%r546 = trunc i416 %r545 to i32
-%r548 = getelementptr i32, i32* %r505, i32 10
-store i32 %r546, i32* %r548
-%r549 = lshr i416 %r545, 32
-%r550 = trunc i416 %r549 to i32
-%r552 = getelementptr i32, i32* %r505, i32 11
-store i32 %r550, i32* %r552
-%r553 = lshr i416 %r549, 32
-%r554 = trunc i416 %r553 to i32
-%r556 = getelementptr i32, i32* %r505, i32 12
-store i32 %r554, i32* %r556
-ret void
-}
-define i480 @mulPv448x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
-%r30 = trunc i64 %r29 to i32
-%r31 = call i32 @extractHigh32(i64 %r29)
-%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
-%r34 = trunc i64 %r33 to i32
-%r35 = call i32 @extractHigh32(i64 %r33)
-%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
-%r38 = trunc i64 %r37 to i32
-%r39 = call i32 @extractHigh32(i64 %r37)
-%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
-%r42 = trunc i64 %r41 to i32
-%r43 = call i32 @extractHigh32(i64 %r41)
-%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
-%r46 = trunc i64 %r45 to i32
-%r47 = call i32 @extractHigh32(i64 %r45)
-%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
-%r50 = trunc i64 %r49 to i32
-%r51 = call i32 @extractHigh32(i64 %r49)
-%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
-%r54 = trunc i64 %r53 to i32
-%r55 = call i32 @extractHigh32(i64 %r53)
-%r57 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 13)
-%r58 = trunc i64 %r57 to i32
-%r59 = call i32 @extractHigh32(i64 %r57)
-%r60 = zext i32 %r6 to i64
-%r61 = zext i32 %r10 to i64
-%r62 = shl i64 %r61, 32
-%r63 = or i64 %r60, %r62
-%r64 = zext i64 %r63 to i96
-%r65 = zext i32 %r14 to i96
-%r66 = shl i96 %r65, 64
-%r67 = or i96 %r64, %r66
-%r68 = zext i96 %r67 to i128
-%r69 = zext i32 %r18 to i128
-%r70 = shl i128 %r69, 96
-%r71 = or i128 %r68, %r70
-%r72 = zext i128 %r71 to i160
-%r73 = zext i32 %r22 to i160
-%r74 = shl i160 %r73, 128
-%r75 = or i160 %r72, %r74
-%r76 = zext i160 %r75 to i192
-%r77 = zext i32 %r26 to i192
-%r78 = shl i192 %r77, 160
-%r79 = or i192 %r76, %r78
-%r80 = zext i192 %r79 to i224
-%r81 = zext i32 %r30 to i224
-%r82 = shl i224 %r81, 192
-%r83 = or i224 %r80, %r82
-%r84 = zext i224 %r83 to i256
-%r85 = zext i32 %r34 to i256
-%r86 = shl i256 %r85, 224
-%r87 = or i256 %r84, %r86
-%r88 = zext i256 %r87 to i288
-%r89 = zext i32 %r38 to i288
-%r90 = shl i288 %r89, 256
-%r91 = or i288 %r88, %r90
-%r92 = zext i288 %r91 to i320
-%r93 = zext i32 %r42 to i320
-%r94 = shl i320 %r93, 288
-%r95 = or i320 %r92, %r94
-%r96 = zext i320 %r95 to i352
-%r97 = zext i32 %r46 to i352
-%r98 = shl i352 %r97, 320
-%r99 = or i352 %r96, %r98
-%r100 = zext i352 %r99 to i384
-%r101 = zext i32 %r50 to i384
-%r102 = shl i384 %r101, 352
-%r103 = or i384 %r100, %r102
-%r104 = zext i384 %r103 to i416
-%r105 = zext i32 %r54 to i416
-%r106 = shl i416 %r105, 384
-%r107 = or i416 %r104, %r106
-%r108 = zext i416 %r107 to i448
-%r109 = zext i32 %r58 to i448
-%r110 = shl i448 %r109, 416
-%r111 = or i448 %r108, %r110
-%r112 = zext i32 %r7 to i64
-%r113 = zext i32 %r11 to i64
-%r114 = shl i64 %r113, 32
-%r115 = or i64 %r112, %r114
-%r116 = zext i64 %r115 to i96
-%r117 = zext i32 %r15 to i96
-%r118 = shl i96 %r117, 64
-%r119 = or i96 %r116, %r118
-%r120 = zext i96 %r119 to i128
-%r121 = zext i32 %r19 to i128
-%r122 = shl i128 %r121, 96
-%r123 = or i128 %r120, %r122
-%r124 = zext i128 %r123 to i160
-%r125 = zext i32 %r23 to i160
-%r126 = shl i160 %r125, 128
-%r127 = or i160 %r124, %r126
-%r128 = zext i160 %r127 to i192
-%r129 = zext i32 %r27 to i192
-%r130 = shl i192 %r129, 160
-%r131 = or i192 %r128, %r130
-%r132 = zext i192 %r131 to i224
-%r133 = zext i32 %r31 to i224
-%r134 = shl i224 %r133, 192
-%r135 = or i224 %r132, %r134
-%r136 = zext i224 %r135 to i256
-%r137 = zext i32 %r35 to i256
-%r138 = shl i256 %r137, 224
-%r139 = or i256 %r136, %r138
-%r140 = zext i256 %r139 to i288
-%r141 = zext i32 %r39 to i288
-%r142 = shl i288 %r141, 256
-%r143 = or i288 %r140, %r142
-%r144 = zext i288 %r143 to i320
-%r145 = zext i32 %r43 to i320
-%r146 = shl i320 %r145, 288
-%r147 = or i320 %r144, %r146
-%r148 = zext i320 %r147 to i352
-%r149 = zext i32 %r47 to i352
-%r150 = shl i352 %r149, 320
-%r151 = or i352 %r148, %r150
-%r152 = zext i352 %r151 to i384
-%r153 = zext i32 %r51 to i384
-%r154 = shl i384 %r153, 352
-%r155 = or i384 %r152, %r154
-%r156 = zext i384 %r155 to i416
-%r157 = zext i32 %r55 to i416
-%r158 = shl i416 %r157, 384
-%r159 = or i416 %r156, %r158
-%r160 = zext i416 %r159 to i448
-%r161 = zext i32 %r59 to i448
-%r162 = shl i448 %r161, 416
-%r163 = or i448 %r160, %r162
-%r164 = zext i448 %r111 to i480
-%r165 = zext i448 %r163 to i480
-%r166 = shl i480 %r165, 32
-%r167 = add i480 %r164, %r166
-ret i480 %r167
-}
-define void @mcl_fp_mulUnitPre14L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i480 @mulPv448x32(i32* %r2, i32 %r3)
-%r5 = trunc i480 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i480 %r4, 32
-%r9 = trunc i480 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i480 %r8, 32
-%r13 = trunc i480 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i480 %r12, 32
-%r17 = trunc i480 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i480 %r16, 32
-%r21 = trunc i480 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i480 %r20, 32
-%r25 = trunc i480 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i480 %r24, 32
-%r29 = trunc i480 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i480 %r28, 32
-%r33 = trunc i480 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
-%r36 = lshr i480 %r32, 32
-%r37 = trunc i480 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 8
-store i32 %r37, i32* %r39
-%r40 = lshr i480 %r36, 32
-%r41 = trunc i480 %r40 to i32
-%r43 = getelementptr i32, i32* %r1, i32 9
-store i32 %r41, i32* %r43
-%r44 = lshr i480 %r40, 32
-%r45 = trunc i480 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 10
-store i32 %r45, i32* %r47
-%r48 = lshr i480 %r44, 32
-%r49 = trunc i480 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 11
-store i32 %r49, i32* %r51
-%r52 = lshr i480 %r48, 32
-%r53 = trunc i480 %r52 to i32
-%r55 = getelementptr i32, i32* %r1, i32 12
-store i32 %r53, i32* %r55
-%r56 = lshr i480 %r52, 32
-%r57 = trunc i480 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 13
-store i32 %r57, i32* %r59
-%r60 = lshr i480 %r56, 32
-%r61 = trunc i480 %r60 to i32
-%r63 = getelementptr i32, i32* %r1, i32 14
-store i32 %r61, i32* %r63
-ret void
-}
-define void @mcl_fpDbl_mulPre14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r2, i32 7
-%r7 = getelementptr i32, i32* %r3, i32 7
-%r9 = getelementptr i32, i32* %r1, i32 14
-call void @mcl_fpDbl_mulPre7L(i32* %r1, i32* %r2, i32* %r3)
-call void @mcl_fpDbl_mulPre7L(i32* %r9, i32* %r5, i32* %r7)
-%r10 = load i32, i32* %r5
-%r11 = zext i32 %r10 to i64
-%r13 = getelementptr i32, i32* %r5, i32 1
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i64
-%r16 = shl i64 %r15, 32
-%r17 = or i64 %r11, %r16
-%r18 = zext i64 %r17 to i96
-%r20 = getelementptr i32, i32* %r5, i32 2
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i96
-%r23 = shl i96 %r22, 64
-%r24 = or i96 %r18, %r23
-%r25 = zext i96 %r24 to i128
-%r27 = getelementptr i32, i32* %r5, i32 3
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i128
-%r30 = shl i128 %r29, 96
-%r31 = or i128 %r25, %r30
-%r32 = zext i128 %r31 to i160
-%r34 = getelementptr i32, i32* %r5, i32 4
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i160
-%r37 = shl i160 %r36, 128
-%r38 = or i160 %r32, %r37
-%r39 = zext i160 %r38 to i192
-%r41 = getelementptr i32, i32* %r5, i32 5
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i192
-%r44 = shl i192 %r43, 160
-%r45 = or i192 %r39, %r44
-%r46 = zext i192 %r45 to i224
-%r48 = getelementptr i32, i32* %r5, i32 6
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i224
-%r51 = shl i224 %r50, 192
-%r52 = or i224 %r46, %r51
-%r53 = zext i224 %r52 to i256
-%r54 = load i32, i32* %r2
-%r55 = zext i32 %r54 to i64
-%r57 = getelementptr i32, i32* %r2, i32 1
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i64
-%r60 = shl i64 %r59, 32
-%r61 = or i64 %r55, %r60
-%r62 = zext i64 %r61 to i96
-%r64 = getelementptr i32, i32* %r2, i32 2
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i96
-%r67 = shl i96 %r66, 64
-%r68 = or i96 %r62, %r67
-%r69 = zext i96 %r68 to i128
-%r71 = getelementptr i32, i32* %r2, i32 3
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i128
-%r74 = shl i128 %r73, 96
-%r75 = or i128 %r69, %r74
-%r76 = zext i128 %r75 to i160
-%r78 = getelementptr i32, i32* %r2, i32 4
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i160
-%r81 = shl i160 %r80, 128
-%r82 = or i160 %r76, %r81
-%r83 = zext i160 %r82 to i192
-%r85 = getelementptr i32, i32* %r2, i32 5
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i192
-%r88 = shl i192 %r87, 160
-%r89 = or i192 %r83, %r88
-%r90 = zext i192 %r89 to i224
-%r92 = getelementptr i32, i32* %r2, i32 6
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i224
-%r95 = shl i224 %r94, 192
-%r96 = or i224 %r90, %r95
-%r97 = zext i224 %r96 to i256
-%r98 = load i32, i32* %r7
-%r99 = zext i32 %r98 to i64
-%r101 = getelementptr i32, i32* %r7, i32 1
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i64
-%r104 = shl i64 %r103, 32
-%r105 = or i64 %r99, %r104
-%r106 = zext i64 %r105 to i96
-%r108 = getelementptr i32, i32* %r7, i32 2
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i96
-%r111 = shl i96 %r110, 64
-%r112 = or i96 %r106, %r111
-%r113 = zext i96 %r112 to i128
-%r115 = getelementptr i32, i32* %r7, i32 3
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i128
-%r118 = shl i128 %r117, 96
-%r119 = or i128 %r113, %r118
-%r120 = zext i128 %r119 to i160
-%r122 = getelementptr i32, i32* %r7, i32 4
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i160
-%r125 = shl i160 %r124, 128
-%r126 = or i160 %r120, %r125
-%r127 = zext i160 %r126 to i192
-%r129 = getelementptr i32, i32* %r7, i32 5
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i192
-%r132 = shl i192 %r131, 160
-%r133 = or i192 %r127, %r132
-%r134 = zext i192 %r133 to i224
-%r136 = getelementptr i32, i32* %r7, i32 6
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i224
-%r139 = shl i224 %r138, 192
-%r140 = or i224 %r134, %r139
-%r141 = zext i224 %r140 to i256
-%r142 = load i32, i32* %r3
-%r143 = zext i32 %r142 to i64
-%r145 = getelementptr i32, i32* %r3, i32 1
-%r146 = load i32, i32* %r145
-%r147 = zext i32 %r146 to i64
-%r148 = shl i64 %r147, 32
-%r149 = or i64 %r143, %r148
-%r150 = zext i64 %r149 to i96
-%r152 = getelementptr i32, i32* %r3, i32 2
-%r153 = load i32, i32* %r152
-%r154 = zext i32 %r153 to i96
-%r155 = shl i96 %r154, 64
-%r156 = or i96 %r150, %r155
-%r157 = zext i96 %r156 to i128
-%r159 = getelementptr i32, i32* %r3, i32 3
-%r160 = load i32, i32* %r159
-%r161 = zext i32 %r160 to i128
-%r162 = shl i128 %r161, 96
-%r163 = or i128 %r157, %r162
-%r164 = zext i128 %r163 to i160
-%r166 = getelementptr i32, i32* %r3, i32 4
-%r167 = load i32, i32* %r166
-%r168 = zext i32 %r167 to i160
-%r169 = shl i160 %r168, 128
-%r170 = or i160 %r164, %r169
-%r171 = zext i160 %r170 to i192
-%r173 = getelementptr i32, i32* %r3, i32 5
-%r174 = load i32, i32* %r173
-%r175 = zext i32 %r174 to i192
-%r176 = shl i192 %r175, 160
-%r177 = or i192 %r171, %r176
-%r178 = zext i192 %r177 to i224
-%r180 = getelementptr i32, i32* %r3, i32 6
-%r181 = load i32, i32* %r180
-%r182 = zext i32 %r181 to i224
-%r183 = shl i224 %r182, 192
-%r184 = or i224 %r178, %r183
-%r185 = zext i224 %r184 to i256
-%r186 = add i256 %r53, %r97
-%r187 = add i256 %r141, %r185
-%r189 = alloca i32, i32 14
-%r190 = trunc i256 %r186 to i224
-%r191 = trunc i256 %r187 to i224
-%r192 = lshr i256 %r186, 224
-%r193 = trunc i256 %r192 to i1
-%r194 = lshr i256 %r187, 224
-%r195 = trunc i256 %r194 to i1
-%r196 = and i1 %r193, %r195
-%r198 = select i1 %r193, i224 %r191, i224 0
-%r200 = select i1 %r195, i224 %r190, i224 0
-%r202 = alloca i32, i32 7
-%r204 = alloca i32, i32 7
-%r205 = trunc i224 %r190 to i32
-%r207 = getelementptr i32, i32* %r202, i32 0
-store i32 %r205, i32* %r207
-%r208 = lshr i224 %r190, 32
-%r209 = trunc i224 %r208 to i32
-%r211 = getelementptr i32, i32* %r202, i32 1
-store i32 %r209, i32* %r211
-%r212 = lshr i224 %r208, 32
-%r213 = trunc i224 %r212 to i32
-%r215 = getelementptr i32, i32* %r202, i32 2
-store i32 %r213, i32* %r215
-%r216 = lshr i224 %r212, 32
-%r217 = trunc i224 %r216 to i32
-%r219 = getelementptr i32, i32* %r202, i32 3
-store i32 %r217, i32* %r219
-%r220 = lshr i224 %r216, 32
-%r221 = trunc i224 %r220 to i32
-%r223 = getelementptr i32, i32* %r202, i32 4
-store i32 %r221, i32* %r223
-%r224 = lshr i224 %r220, 32
-%r225 = trunc i224 %r224 to i32
-%r227 = getelementptr i32, i32* %r202, i32 5
-store i32 %r225, i32* %r227
-%r228 = lshr i224 %r224, 32
-%r229 = trunc i224 %r228 to i32
-%r231 = getelementptr i32, i32* %r202, i32 6
-store i32 %r229, i32* %r231
-%r232 = trunc i224 %r191 to i32
-%r234 = getelementptr i32, i32* %r204, i32 0
-store i32 %r232, i32* %r234
-%r235 = lshr i224 %r191, 32
-%r236 = trunc i224 %r235 to i32
-%r238 = getelementptr i32, i32* %r204, i32 1
-store i32 %r236, i32* %r238
-%r239 = lshr i224 %r235, 32
-%r240 = trunc i224 %r239 to i32
-%r242 = getelementptr i32, i32* %r204, i32 2
-store i32 %r240, i32* %r242
-%r243 = lshr i224 %r239, 32
-%r244 = trunc i224 %r243 to i32
-%r246 = getelementptr i32, i32* %r204, i32 3
-store i32 %r244, i32* %r246
-%r247 = lshr i224 %r243, 32
-%r248 = trunc i224 %r247 to i32
-%r250 = getelementptr i32, i32* %r204, i32 4
-store i32 %r248, i32* %r250
-%r251 = lshr i224 %r247, 32
-%r252 = trunc i224 %r251 to i32
-%r254 = getelementptr i32, i32* %r204, i32 5
-store i32 %r252, i32* %r254
-%r255 = lshr i224 %r251, 32
-%r256 = trunc i224 %r255 to i32
-%r258 = getelementptr i32, i32* %r204, i32 6
-store i32 %r256, i32* %r258
-call void @mcl_fpDbl_mulPre7L(i32* %r189, i32* %r202, i32* %r204)
-%r259 = load i32, i32* %r189
-%r260 = zext i32 %r259 to i64
-%r262 = getelementptr i32, i32* %r189, i32 1
-%r263 = load i32, i32* %r262
-%r264 = zext i32 %r263 to i64
-%r265 = shl i64 %r264, 32
-%r266 = or i64 %r260, %r265
-%r267 = zext i64 %r266 to i96
-%r269 = getelementptr i32, i32* %r189, i32 2
-%r270 = load i32, i32* %r269
-%r271 = zext i32 %r270 to i96
-%r272 = shl i96 %r271, 64
-%r273 = or i96 %r267, %r272
-%r274 = zext i96 %r273 to i128
-%r276 = getelementptr i32, i32* %r189, i32 3
-%r277 = load i32, i32* %r276
-%r278 = zext i32 %r277 to i128
-%r279 = shl i128 %r278, 96
-%r280 = or i128 %r274, %r279
-%r281 = zext i128 %r280 to i160
-%r283 = getelementptr i32, i32* %r189, i32 4
-%r284 = load i32, i32* %r283
-%r285 = zext i32 %r284 to i160
-%r286 = shl i160 %r285, 128
-%r287 = or i160 %r281, %r286
-%r288 = zext i160 %r287 to i192
-%r290 = getelementptr i32, i32* %r189, i32 5
-%r291 = load i32, i32* %r290
-%r292 = zext i32 %r291 to i192
-%r293 = shl i192 %r292, 160
-%r294 = or i192 %r288, %r293
-%r295 = zext i192 %r294 to i224
-%r297 = getelementptr i32, i32* %r189, i32 6
-%r298 = load i32, i32* %r297
-%r299 = zext i32 %r298 to i224
-%r300 = shl i224 %r299, 192
-%r301 = or i224 %r295, %r300
-%r302 = zext i224 %r301 to i256
-%r304 = getelementptr i32, i32* %r189, i32 7
-%r305 = load i32, i32* %r304
-%r306 = zext i32 %r305 to i256
-%r307 = shl i256 %r306, 224
-%r308 = or i256 %r302, %r307
-%r309 = zext i256 %r308 to i288
-%r311 = getelementptr i32, i32* %r189, i32 8
-%r312 = load i32, i32* %r311
-%r313 = zext i32 %r312 to i288
-%r314 = shl i288 %r313, 256
-%r315 = or i288 %r309, %r314
-%r316 = zext i288 %r315 to i320
-%r318 = getelementptr i32, i32* %r189, i32 9
-%r319 = load i32, i32* %r318
-%r320 = zext i32 %r319 to i320
-%r321 = shl i320 %r320, 288
-%r322 = or i320 %r316, %r321
-%r323 = zext i320 %r322 to i352
-%r325 = getelementptr i32, i32* %r189, i32 10
-%r326 = load i32, i32* %r325
-%r327 = zext i32 %r326 to i352
-%r328 = shl i352 %r327, 320
-%r329 = or i352 %r323, %r328
-%r330 = zext i352 %r329 to i384
-%r332 = getelementptr i32, i32* %r189, i32 11
-%r333 = load i32, i32* %r332
-%r334 = zext i32 %r333 to i384
-%r335 = shl i384 %r334, 352
-%r336 = or i384 %r330, %r335
-%r337 = zext i384 %r336 to i416
-%r339 = getelementptr i32, i32* %r189, i32 12
-%r340 = load i32, i32* %r339
-%r341 = zext i32 %r340 to i416
-%r342 = shl i416 %r341, 384
-%r343 = or i416 %r337, %r342
-%r344 = zext i416 %r343 to i448
-%r346 = getelementptr i32, i32* %r189, i32 13
-%r347 = load i32, i32* %r346
-%r348 = zext i32 %r347 to i448
-%r349 = shl i448 %r348, 416
-%r350 = or i448 %r344, %r349
-%r351 = zext i448 %r350 to i480
-%r352 = zext i1 %r196 to i480
-%r353 = shl i480 %r352, 448
-%r354 = or i480 %r351, %r353
-%r355 = zext i224 %r198 to i480
-%r356 = zext i224 %r200 to i480
-%r357 = shl i480 %r355, 224
-%r358 = shl i480 %r356, 224
-%r359 = add i480 %r354, %r357
-%r360 = add i480 %r359, %r358
-%r361 = load i32, i32* %r1
-%r362 = zext i32 %r361 to i64
-%r364 = getelementptr i32, i32* %r1, i32 1
-%r365 = load i32, i32* %r364
-%r366 = zext i32 %r365 to i64
-%r367 = shl i64 %r366, 32
-%r368 = or i64 %r362, %r367
-%r369 = zext i64 %r368 to i96
-%r371 = getelementptr i32, i32* %r1, i32 2
-%r372 = load i32, i32* %r371
-%r373 = zext i32 %r372 to i96
-%r374 = shl i96 %r373, 64
-%r375 = or i96 %r369, %r374
-%r376 = zext i96 %r375 to i128
-%r378 = getelementptr i32, i32* %r1, i32 3
-%r379 = load i32, i32* %r378
-%r380 = zext i32 %r379 to i128
-%r381 = shl i128 %r380, 96
-%r382 = or i128 %r376, %r381
-%r383 = zext i128 %r382 to i160
-%r385 = getelementptr i32, i32* %r1, i32 4
-%r386 = load i32, i32* %r385
-%r387 = zext i32 %r386 to i160
-%r388 = shl i160 %r387, 128
-%r389 = or i160 %r383, %r388
-%r390 = zext i160 %r389 to i192
-%r392 = getelementptr i32, i32* %r1, i32 5
-%r393 = load i32, i32* %r392
-%r394 = zext i32 %r393 to i192
-%r395 = shl i192 %r394, 160
-%r396 = or i192 %r390, %r395
-%r397 = zext i192 %r396 to i224
-%r399 = getelementptr i32, i32* %r1, i32 6
-%r400 = load i32, i32* %r399
-%r401 = zext i32 %r400 to i224
-%r402 = shl i224 %r401, 192
-%r403 = or i224 %r397, %r402
-%r404 = zext i224 %r403 to i256
-%r406 = getelementptr i32, i32* %r1, i32 7
-%r407 = load i32, i32* %r406
-%r408 = zext i32 %r407 to i256
-%r409 = shl i256 %r408, 224
-%r410 = or i256 %r404, %r409
-%r411 = zext i256 %r410 to i288
-%r413 = getelementptr i32, i32* %r1, i32 8
-%r414 = load i32, i32* %r413
-%r415 = zext i32 %r414 to i288
-%r416 = shl i288 %r415, 256
-%r417 = or i288 %r411, %r416
-%r418 = zext i288 %r417 to i320
-%r420 = getelementptr i32, i32* %r1, i32 9
-%r421 = load i32, i32* %r420
-%r422 = zext i32 %r421 to i320
-%r423 = shl i320 %r422, 288
-%r424 = or i320 %r418, %r423
-%r425 = zext i320 %r424 to i352
-%r427 = getelementptr i32, i32* %r1, i32 10
-%r428 = load i32, i32* %r427
-%r429 = zext i32 %r428 to i352
-%r430 = shl i352 %r429, 320
-%r431 = or i352 %r425, %r430
-%r432 = zext i352 %r431 to i384
-%r434 = getelementptr i32, i32* %r1, i32 11
-%r435 = load i32, i32* %r434
-%r436 = zext i32 %r435 to i384
-%r437 = shl i384 %r436, 352
-%r438 = or i384 %r432, %r437
-%r439 = zext i384 %r438 to i416
-%r441 = getelementptr i32, i32* %r1, i32 12
-%r442 = load i32, i32* %r441
-%r443 = zext i32 %r442 to i416
-%r444 = shl i416 %r443, 384
-%r445 = or i416 %r439, %r444
-%r446 = zext i416 %r445 to i448
-%r448 = getelementptr i32, i32* %r1, i32 13
-%r449 = load i32, i32* %r448
-%r450 = zext i32 %r449 to i448
-%r451 = shl i448 %r450, 416
-%r452 = or i448 %r446, %r451
-%r453 = zext i448 %r452 to i480
-%r454 = sub i480 %r360, %r453
-%r456 = getelementptr i32, i32* %r1, i32 14
-%r457 = load i32, i32* %r456
-%r458 = zext i32 %r457 to i64
-%r460 = getelementptr i32, i32* %r456, i32 1
-%r461 = load i32, i32* %r460
-%r462 = zext i32 %r461 to i64
-%r463 = shl i64 %r462, 32
-%r464 = or i64 %r458, %r463
-%r465 = zext i64 %r464 to i96
-%r467 = getelementptr i32, i32* %r456, i32 2
-%r468 = load i32, i32* %r467
-%r469 = zext i32 %r468 to i96
-%r470 = shl i96 %r469, 64
-%r471 = or i96 %r465, %r470
-%r472 = zext i96 %r471 to i128
-%r474 = getelementptr i32, i32* %r456, i32 3
-%r475 = load i32, i32* %r474
-%r476 = zext i32 %r475 to i128
-%r477 = shl i128 %r476, 96
-%r478 = or i128 %r472, %r477
-%r479 = zext i128 %r478 to i160
-%r481 = getelementptr i32, i32* %r456, i32 4
-%r482 = load i32, i32* %r481
-%r483 = zext i32 %r482 to i160
-%r484 = shl i160 %r483, 128
-%r485 = or i160 %r479, %r484
-%r486 = zext i160 %r485 to i192
-%r488 = getelementptr i32, i32* %r456, i32 5
-%r489 = load i32, i32* %r488
-%r490 = zext i32 %r489 to i192
-%r491 = shl i192 %r490, 160
-%r492 = or i192 %r486, %r491
-%r493 = zext i192 %r492 to i224
-%r495 = getelementptr i32, i32* %r456, i32 6
-%r496 = load i32, i32* %r495
-%r497 = zext i32 %r496 to i224
-%r498 = shl i224 %r497, 192
-%r499 = or i224 %r493, %r498
-%r500 = zext i224 %r499 to i256
-%r502 = getelementptr i32, i32* %r456, i32 7
-%r503 = load i32, i32* %r502
-%r504 = zext i32 %r503 to i256
-%r505 = shl i256 %r504, 224
-%r506 = or i256 %r500, %r505
-%r507 = zext i256 %r506 to i288
-%r509 = getelementptr i32, i32* %r456, i32 8
-%r510 = load i32, i32* %r509
-%r511 = zext i32 %r510 to i288
-%r512 = shl i288 %r511, 256
-%r513 = or i288 %r507, %r512
-%r514 = zext i288 %r513 to i320
-%r516 = getelementptr i32, i32* %r456, i32 9
-%r517 = load i32, i32* %r516
-%r518 = zext i32 %r517 to i320
-%r519 = shl i320 %r518, 288
-%r520 = or i320 %r514, %r519
-%r521 = zext i320 %r520 to i352
-%r523 = getelementptr i32, i32* %r456, i32 10
-%r524 = load i32, i32* %r523
-%r525 = zext i32 %r524 to i352
-%r526 = shl i352 %r525, 320
-%r527 = or i352 %r521, %r526
-%r528 = zext i352 %r527 to i384
-%r530 = getelementptr i32, i32* %r456, i32 11
-%r531 = load i32, i32* %r530
-%r532 = zext i32 %r531 to i384
-%r533 = shl i384 %r532, 352
-%r534 = or i384 %r528, %r533
-%r535 = zext i384 %r534 to i416
-%r537 = getelementptr i32, i32* %r456, i32 12
-%r538 = load i32, i32* %r537
-%r539 = zext i32 %r538 to i416
-%r540 = shl i416 %r539, 384
-%r541 = or i416 %r535, %r540
-%r542 = zext i416 %r541 to i448
-%r544 = getelementptr i32, i32* %r456, i32 13
-%r545 = load i32, i32* %r544
-%r546 = zext i32 %r545 to i448
-%r547 = shl i448 %r546, 416
-%r548 = or i448 %r542, %r547
-%r549 = zext i448 %r548 to i480
-%r550 = sub i480 %r454, %r549
-%r551 = zext i480 %r550 to i672
-%r553 = getelementptr i32, i32* %r1, i32 7
-%r554 = load i32, i32* %r553
-%r555 = zext i32 %r554 to i64
-%r557 = getelementptr i32, i32* %r553, i32 1
-%r558 = load i32, i32* %r557
-%r559 = zext i32 %r558 to i64
-%r560 = shl i64 %r559, 32
-%r561 = or i64 %r555, %r560
-%r562 = zext i64 %r561 to i96
-%r564 = getelementptr i32, i32* %r553, i32 2
-%r565 = load i32, i32* %r564
-%r566 = zext i32 %r565 to i96
-%r567 = shl i96 %r566, 64
-%r568 = or i96 %r562, %r567
-%r569 = zext i96 %r568 to i128
-%r571 = getelementptr i32, i32* %r553, i32 3
-%r572 = load i32, i32* %r571
-%r573 = zext i32 %r572 to i128
-%r574 = shl i128 %r573, 96
-%r575 = or i128 %r569, %r574
-%r576 = zext i128 %r575 to i160
-%r578 = getelementptr i32, i32* %r553, i32 4
-%r579 = load i32, i32* %r578
-%r580 = zext i32 %r579 to i160
-%r581 = shl i160 %r580, 128
-%r582 = or i160 %r576, %r581
-%r583 = zext i160 %r582 to i192
-%r585 = getelementptr i32, i32* %r553, i32 5
-%r586 = load i32, i32* %r585
-%r587 = zext i32 %r586 to i192
-%r588 = shl i192 %r587, 160
-%r589 = or i192 %r583, %r588
-%r590 = zext i192 %r589 to i224
-%r592 = getelementptr i32, i32* %r553, i32 6
-%r593 = load i32, i32* %r592
-%r594 = zext i32 %r593 to i224
-%r595 = shl i224 %r594, 192
-%r596 = or i224 %r590, %r595
-%r597 = zext i224 %r596 to i256
-%r599 = getelementptr i32, i32* %r553, i32 7
-%r600 = load i32, i32* %r599
-%r601 = zext i32 %r600 to i256
-%r602 = shl i256 %r601, 224
-%r603 = or i256 %r597, %r602
-%r604 = zext i256 %r603 to i288
-%r606 = getelementptr i32, i32* %r553, i32 8
-%r607 = load i32, i32* %r606
-%r608 = zext i32 %r607 to i288
-%r609 = shl i288 %r608, 256
-%r610 = or i288 %r604, %r609
-%r611 = zext i288 %r610 to i320
-%r613 = getelementptr i32, i32* %r553, i32 9
-%r614 = load i32, i32* %r613
-%r615 = zext i32 %r614 to i320
-%r616 = shl i320 %r615, 288
-%r617 = or i320 %r611, %r616
-%r618 = zext i320 %r617 to i352
-%r620 = getelementptr i32, i32* %r553, i32 10
-%r621 = load i32, i32* %r620
-%r622 = zext i32 %r621 to i352
-%r623 = shl i352 %r622, 320
-%r624 = or i352 %r618, %r623
-%r625 = zext i352 %r624 to i384
-%r627 = getelementptr i32, i32* %r553, i32 11
-%r628 = load i32, i32* %r627
-%r629 = zext i32 %r628 to i384
-%r630 = shl i384 %r629, 352
-%r631 = or i384 %r625, %r630
-%r632 = zext i384 %r631 to i416
-%r634 = getelementptr i32, i32* %r553, i32 12
-%r635 = load i32, i32* %r634
-%r636 = zext i32 %r635 to i416
-%r637 = shl i416 %r636, 384
-%r638 = or i416 %r632, %r637
-%r639 = zext i416 %r638 to i448
-%r641 = getelementptr i32, i32* %r553, i32 13
-%r642 = load i32, i32* %r641
-%r643 = zext i32 %r642 to i448
-%r644 = shl i448 %r643, 416
-%r645 = or i448 %r639, %r644
-%r646 = zext i448 %r645 to i480
-%r648 = getelementptr i32, i32* %r553, i32 14
-%r649 = load i32, i32* %r648
-%r650 = zext i32 %r649 to i480
-%r651 = shl i480 %r650, 448
-%r652 = or i480 %r646, %r651
-%r653 = zext i480 %r652 to i512
-%r655 = getelementptr i32, i32* %r553, i32 15
-%r656 = load i32, i32* %r655
-%r657 = zext i32 %r656 to i512
-%r658 = shl i512 %r657, 480
-%r659 = or i512 %r653, %r658
-%r660 = zext i512 %r659 to i544
-%r662 = getelementptr i32, i32* %r553, i32 16
-%r663 = load i32, i32* %r662
-%r664 = zext i32 %r663 to i544
-%r665 = shl i544 %r664, 512
-%r666 = or i544 %r660, %r665
-%r667 = zext i544 %r666 to i576
-%r669 = getelementptr i32, i32* %r553, i32 17
-%r670 = load i32, i32* %r669
-%r671 = zext i32 %r670 to i576
-%r672 = shl i576 %r671, 544
-%r673 = or i576 %r667, %r672
-%r674 = zext i576 %r673 to i608
-%r676 = getelementptr i32, i32* %r553, i32 18
-%r677 = load i32, i32* %r676
-%r678 = zext i32 %r677 to i608
-%r679 = shl i608 %r678, 576
-%r680 = or i608 %r674, %r679
-%r681 = zext i608 %r680 to i640
-%r683 = getelementptr i32, i32* %r553, i32 19
-%r684 = load i32, i32* %r683
-%r685 = zext i32 %r684 to i640
-%r686 = shl i640 %r685, 608
-%r687 = or i640 %r681, %r686
-%r688 = zext i640 %r687 to i672
-%r690 = getelementptr i32, i32* %r553, i32 20
-%r691 = load i32, i32* %r690
-%r692 = zext i32 %r691 to i672
-%r693 = shl i672 %r692, 640
-%r694 = or i672 %r688, %r693
-%r695 = add i672 %r551, %r694
-%r697 = getelementptr i32, i32* %r1, i32 7
-%r698 = trunc i672 %r695 to i32
-%r700 = getelementptr i32, i32* %r697, i32 0
-store i32 %r698, i32* %r700
-%r701 = lshr i672 %r695, 32
-%r702 = trunc i672 %r701 to i32
-%r704 = getelementptr i32, i32* %r697, i32 1
-store i32 %r702, i32* %r704
-%r705 = lshr i672 %r701, 32
-%r706 = trunc i672 %r705 to i32
-%r708 = getelementptr i32, i32* %r697, i32 2
-store i32 %r706, i32* %r708
-%r709 = lshr i672 %r705, 32
-%r710 = trunc i672 %r709 to i32
-%r712 = getelementptr i32, i32* %r697, i32 3
-store i32 %r710, i32* %r712
-%r713 = lshr i672 %r709, 32
-%r714 = trunc i672 %r713 to i32
-%r716 = getelementptr i32, i32* %r697, i32 4
-store i32 %r714, i32* %r716
-%r717 = lshr i672 %r713, 32
-%r718 = trunc i672 %r717 to i32
-%r720 = getelementptr i32, i32* %r697, i32 5
-store i32 %r718, i32* %r720
-%r721 = lshr i672 %r717, 32
-%r722 = trunc i672 %r721 to i32
-%r724 = getelementptr i32, i32* %r697, i32 6
-store i32 %r722, i32* %r724
-%r725 = lshr i672 %r721, 32
-%r726 = trunc i672 %r725 to i32
-%r728 = getelementptr i32, i32* %r697, i32 7
-store i32 %r726, i32* %r728
-%r729 = lshr i672 %r725, 32
-%r730 = trunc i672 %r729 to i32
-%r732 = getelementptr i32, i32* %r697, i32 8
-store i32 %r730, i32* %r732
-%r733 = lshr i672 %r729, 32
-%r734 = trunc i672 %r733 to i32
-%r736 = getelementptr i32, i32* %r697, i32 9
-store i32 %r734, i32* %r736
-%r737 = lshr i672 %r733, 32
-%r738 = trunc i672 %r737 to i32
-%r740 = getelementptr i32, i32* %r697, i32 10
-store i32 %r738, i32* %r740
-%r741 = lshr i672 %r737, 32
-%r742 = trunc i672 %r741 to i32
-%r744 = getelementptr i32, i32* %r697, i32 11
-store i32 %r742, i32* %r744
-%r745 = lshr i672 %r741, 32
-%r746 = trunc i672 %r745 to i32
-%r748 = getelementptr i32, i32* %r697, i32 12
-store i32 %r746, i32* %r748
-%r749 = lshr i672 %r745, 32
-%r750 = trunc i672 %r749 to i32
-%r752 = getelementptr i32, i32* %r697, i32 13
-store i32 %r750, i32* %r752
-%r753 = lshr i672 %r749, 32
-%r754 = trunc i672 %r753 to i32
-%r756 = getelementptr i32, i32* %r697, i32 14
-store i32 %r754, i32* %r756
-%r757 = lshr i672 %r753, 32
-%r758 = trunc i672 %r757 to i32
-%r760 = getelementptr i32, i32* %r697, i32 15
-store i32 %r758, i32* %r760
-%r761 = lshr i672 %r757, 32
-%r762 = trunc i672 %r761 to i32
-%r764 = getelementptr i32, i32* %r697, i32 16
-store i32 %r762, i32* %r764
-%r765 = lshr i672 %r761, 32
-%r766 = trunc i672 %r765 to i32
-%r768 = getelementptr i32, i32* %r697, i32 17
-store i32 %r766, i32* %r768
-%r769 = lshr i672 %r765, 32
-%r770 = trunc i672 %r769 to i32
-%r772 = getelementptr i32, i32* %r697, i32 18
-store i32 %r770, i32* %r772
-%r773 = lshr i672 %r769, 32
-%r774 = trunc i672 %r773 to i32
-%r776 = getelementptr i32, i32* %r697, i32 19
-store i32 %r774, i32* %r776
-%r777 = lshr i672 %r773, 32
-%r778 = trunc i672 %r777 to i32
-%r780 = getelementptr i32, i32* %r697, i32 20
-store i32 %r778, i32* %r780
-ret void
-}
-define void @mcl_fpDbl_sqrPre14L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r4 = getelementptr i32, i32* %r2, i32 7
-%r6 = getelementptr i32, i32* %r2, i32 7
-%r8 = getelementptr i32, i32* %r1, i32 14
-call void @mcl_fpDbl_mulPre7L(i32* %r1, i32* %r2, i32* %r2)
-call void @mcl_fpDbl_mulPre7L(i32* %r8, i32* %r4, i32* %r6)
-%r9 = load i32, i32* %r4
-%r10 = zext i32 %r9 to i64
-%r12 = getelementptr i32, i32* %r4, i32 1
-%r13 = load i32, i32* %r12
-%r14 = zext i32 %r13 to i64
-%r15 = shl i64 %r14, 32
-%r16 = or i64 %r10, %r15
-%r17 = zext i64 %r16 to i96
-%r19 = getelementptr i32, i32* %r4, i32 2
-%r20 = load i32, i32* %r19
-%r21 = zext i32 %r20 to i96
-%r22 = shl i96 %r21, 64
-%r23 = or i96 %r17, %r22
-%r24 = zext i96 %r23 to i128
-%r26 = getelementptr i32, i32* %r4, i32 3
-%r27 = load i32, i32* %r26
-%r28 = zext i32 %r27 to i128
-%r29 = shl i128 %r28, 96
-%r30 = or i128 %r24, %r29
-%r31 = zext i128 %r30 to i160
-%r33 = getelementptr i32, i32* %r4, i32 4
-%r34 = load i32, i32* %r33
-%r35 = zext i32 %r34 to i160
-%r36 = shl i160 %r35, 128
-%r37 = or i160 %r31, %r36
-%r38 = zext i160 %r37 to i192
-%r40 = getelementptr i32, i32* %r4, i32 5
-%r41 = load i32, i32* %r40
-%r42 = zext i32 %r41 to i192
-%r43 = shl i192 %r42, 160
-%r44 = or i192 %r38, %r43
-%r45 = zext i192 %r44 to i224
-%r47 = getelementptr i32, i32* %r4, i32 6
-%r48 = load i32, i32* %r47
-%r49 = zext i32 %r48 to i224
-%r50 = shl i224 %r49, 192
-%r51 = or i224 %r45, %r50
-%r52 = zext i224 %r51 to i256
-%r53 = load i32, i32* %r2
-%r54 = zext i32 %r53 to i64
-%r56 = getelementptr i32, i32* %r2, i32 1
-%r57 = load i32, i32* %r56
-%r58 = zext i32 %r57 to i64
-%r59 = shl i64 %r58, 32
-%r60 = or i64 %r54, %r59
-%r61 = zext i64 %r60 to i96
-%r63 = getelementptr i32, i32* %r2, i32 2
-%r64 = load i32, i32* %r63
-%r65 = zext i32 %r64 to i96
-%r66 = shl i96 %r65, 64
-%r67 = or i96 %r61, %r66
-%r68 = zext i96 %r67 to i128
-%r70 = getelementptr i32, i32* %r2, i32 3
-%r71 = load i32, i32* %r70
-%r72 = zext i32 %r71 to i128
-%r73 = shl i128 %r72, 96
-%r74 = or i128 %r68, %r73
-%r75 = zext i128 %r74 to i160
-%r77 = getelementptr i32, i32* %r2, i32 4
-%r78 = load i32, i32* %r77
-%r79 = zext i32 %r78 to i160
-%r80 = shl i160 %r79, 128
-%r81 = or i160 %r75, %r80
-%r82 = zext i160 %r81 to i192
-%r84 = getelementptr i32, i32* %r2, i32 5
-%r85 = load i32, i32* %r84
-%r86 = zext i32 %r85 to i192
-%r87 = shl i192 %r86, 160
-%r88 = or i192 %r82, %r87
-%r89 = zext i192 %r88 to i224
-%r91 = getelementptr i32, i32* %r2, i32 6
-%r92 = load i32, i32* %r91
-%r93 = zext i32 %r92 to i224
-%r94 = shl i224 %r93, 192
-%r95 = or i224 %r89, %r94
-%r96 = zext i224 %r95 to i256
-%r97 = load i32, i32* %r6
-%r98 = zext i32 %r97 to i64
-%r100 = getelementptr i32, i32* %r6, i32 1
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i64
-%r103 = shl i64 %r102, 32
-%r104 = or i64 %r98, %r103
-%r105 = zext i64 %r104 to i96
-%r107 = getelementptr i32, i32* %r6, i32 2
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i96
-%r110 = shl i96 %r109, 64
-%r111 = or i96 %r105, %r110
-%r112 = zext i96 %r111 to i128
-%r114 = getelementptr i32, i32* %r6, i32 3
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i128
-%r117 = shl i128 %r116, 96
-%r118 = or i128 %r112, %r117
-%r119 = zext i128 %r118 to i160
-%r121 = getelementptr i32, i32* %r6, i32 4
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i160
-%r124 = shl i160 %r123, 128
-%r125 = or i160 %r119, %r124
-%r126 = zext i160 %r125 to i192
-%r128 = getelementptr i32, i32* %r6, i32 5
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i192
-%r131 = shl i192 %r130, 160
-%r132 = or i192 %r126, %r131
-%r133 = zext i192 %r132 to i224
-%r135 = getelementptr i32, i32* %r6, i32 6
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i224
-%r138 = shl i224 %r137, 192
-%r139 = or i224 %r133, %r138
-%r140 = zext i224 %r139 to i256
-%r141 = load i32, i32* %r2
-%r142 = zext i32 %r141 to i64
-%r144 = getelementptr i32, i32* %r2, i32 1
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i64
-%r147 = shl i64 %r146, 32
-%r148 = or i64 %r142, %r147
-%r149 = zext i64 %r148 to i96
-%r151 = getelementptr i32, i32* %r2, i32 2
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i96
-%r154 = shl i96 %r153, 64
-%r155 = or i96 %r149, %r154
-%r156 = zext i96 %r155 to i128
-%r158 = getelementptr i32, i32* %r2, i32 3
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i128
-%r161 = shl i128 %r160, 96
-%r162 = or i128 %r156, %r161
-%r163 = zext i128 %r162 to i160
-%r165 = getelementptr i32, i32* %r2, i32 4
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i160
-%r168 = shl i160 %r167, 128
-%r169 = or i160 %r163, %r168
-%r170 = zext i160 %r169 to i192
-%r172 = getelementptr i32, i32* %r2, i32 5
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i192
-%r175 = shl i192 %r174, 160
-%r176 = or i192 %r170, %r175
-%r177 = zext i192 %r176 to i224
-%r179 = getelementptr i32, i32* %r2, i32 6
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i224
-%r182 = shl i224 %r181, 192
-%r183 = or i224 %r177, %r182
-%r184 = zext i224 %r183 to i256
-%r185 = add i256 %r52, %r96
-%r186 = add i256 %r140, %r184
-%r188 = alloca i32, i32 14
-%r189 = trunc i256 %r185 to i224
-%r190 = trunc i256 %r186 to i224
-%r191 = lshr i256 %r185, 224
-%r192 = trunc i256 %r191 to i1
-%r193 = lshr i256 %r186, 224
-%r194 = trunc i256 %r193 to i1
-%r195 = and i1 %r192, %r194
-%r197 = select i1 %r192, i224 %r190, i224 0
-%r199 = select i1 %r194, i224 %r189, i224 0
-%r201 = alloca i32, i32 7
-%r203 = alloca i32, i32 7
-%r204 = trunc i224 %r189 to i32
-%r206 = getelementptr i32, i32* %r201, i32 0
-store i32 %r204, i32* %r206
-%r207 = lshr i224 %r189, 32
-%r208 = trunc i224 %r207 to i32
-%r210 = getelementptr i32, i32* %r201, i32 1
-store i32 %r208, i32* %r210
-%r211 = lshr i224 %r207, 32
-%r212 = trunc i224 %r211 to i32
-%r214 = getelementptr i32, i32* %r201, i32 2
-store i32 %r212, i32* %r214
-%r215 = lshr i224 %r211, 32
-%r216 = trunc i224 %r215 to i32
-%r218 = getelementptr i32, i32* %r201, i32 3
-store i32 %r216, i32* %r218
-%r219 = lshr i224 %r215, 32
-%r220 = trunc i224 %r219 to i32
-%r222 = getelementptr i32, i32* %r201, i32 4
-store i32 %r220, i32* %r222
-%r223 = lshr i224 %r219, 32
-%r224 = trunc i224 %r223 to i32
-%r226 = getelementptr i32, i32* %r201, i32 5
-store i32 %r224, i32* %r226
-%r227 = lshr i224 %r223, 32
-%r228 = trunc i224 %r227 to i32
-%r230 = getelementptr i32, i32* %r201, i32 6
-store i32 %r228, i32* %r230
-%r231 = trunc i224 %r190 to i32
-%r233 = getelementptr i32, i32* %r203, i32 0
-store i32 %r231, i32* %r233
-%r234 = lshr i224 %r190, 32
-%r235 = trunc i224 %r234 to i32
-%r237 = getelementptr i32, i32* %r203, i32 1
-store i32 %r235, i32* %r237
-%r238 = lshr i224 %r234, 32
-%r239 = trunc i224 %r238 to i32
-%r241 = getelementptr i32, i32* %r203, i32 2
-store i32 %r239, i32* %r241
-%r242 = lshr i224 %r238, 32
-%r243 = trunc i224 %r242 to i32
-%r245 = getelementptr i32, i32* %r203, i32 3
-store i32 %r243, i32* %r245
-%r246 = lshr i224 %r242, 32
-%r247 = trunc i224 %r246 to i32
-%r249 = getelementptr i32, i32* %r203, i32 4
-store i32 %r247, i32* %r249
-%r250 = lshr i224 %r246, 32
-%r251 = trunc i224 %r250 to i32
-%r253 = getelementptr i32, i32* %r203, i32 5
-store i32 %r251, i32* %r253
-%r254 = lshr i224 %r250, 32
-%r255 = trunc i224 %r254 to i32
-%r257 = getelementptr i32, i32* %r203, i32 6
-store i32 %r255, i32* %r257
-call void @mcl_fpDbl_mulPre7L(i32* %r188, i32* %r201, i32* %r203)
-%r258 = load i32, i32* %r188
-%r259 = zext i32 %r258 to i64
-%r261 = getelementptr i32, i32* %r188, i32 1
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i64
-%r264 = shl i64 %r263, 32
-%r265 = or i64 %r259, %r264
-%r266 = zext i64 %r265 to i96
-%r268 = getelementptr i32, i32* %r188, i32 2
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i96
-%r271 = shl i96 %r270, 64
-%r272 = or i96 %r266, %r271
-%r273 = zext i96 %r272 to i128
-%r275 = getelementptr i32, i32* %r188, i32 3
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i128
-%r278 = shl i128 %r277, 96
-%r279 = or i128 %r273, %r278
-%r280 = zext i128 %r279 to i160
-%r282 = getelementptr i32, i32* %r188, i32 4
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i160
-%r285 = shl i160 %r284, 128
-%r286 = or i160 %r280, %r285
-%r287 = zext i160 %r286 to i192
-%r289 = getelementptr i32, i32* %r188, i32 5
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i192
-%r292 = shl i192 %r291, 160
-%r293 = or i192 %r287, %r292
-%r294 = zext i192 %r293 to i224
-%r296 = getelementptr i32, i32* %r188, i32 6
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i224
-%r299 = shl i224 %r298, 192
-%r300 = or i224 %r294, %r299
-%r301 = zext i224 %r300 to i256
-%r303 = getelementptr i32, i32* %r188, i32 7
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i256
-%r306 = shl i256 %r305, 224
-%r307 = or i256 %r301, %r306
-%r308 = zext i256 %r307 to i288
-%r310 = getelementptr i32, i32* %r188, i32 8
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i288
-%r313 = shl i288 %r312, 256
-%r314 = or i288 %r308, %r313
-%r315 = zext i288 %r314 to i320
-%r317 = getelementptr i32, i32* %r188, i32 9
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i320
-%r320 = shl i320 %r319, 288
-%r321 = or i320 %r315, %r320
-%r322 = zext i320 %r321 to i352
-%r324 = getelementptr i32, i32* %r188, i32 10
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i352
-%r327 = shl i352 %r326, 320
-%r328 = or i352 %r322, %r327
-%r329 = zext i352 %r328 to i384
-%r331 = getelementptr i32, i32* %r188, i32 11
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i384
-%r334 = shl i384 %r333, 352
-%r335 = or i384 %r329, %r334
-%r336 = zext i384 %r335 to i416
-%r338 = getelementptr i32, i32* %r188, i32 12
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i416
-%r341 = shl i416 %r340, 384
-%r342 = or i416 %r336, %r341
-%r343 = zext i416 %r342 to i448
-%r345 = getelementptr i32, i32* %r188, i32 13
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i448
-%r348 = shl i448 %r347, 416
-%r349 = or i448 %r343, %r348
-%r350 = zext i448 %r349 to i480
-%r351 = zext i1 %r195 to i480
-%r352 = shl i480 %r351, 448
-%r353 = or i480 %r350, %r352
-%r354 = zext i224 %r197 to i480
-%r355 = zext i224 %r199 to i480
-%r356 = shl i480 %r354, 224
-%r357 = shl i480 %r355, 224
-%r358 = add i480 %r353, %r356
-%r359 = add i480 %r358, %r357
-%r360 = load i32, i32* %r1
-%r361 = zext i32 %r360 to i64
-%r363 = getelementptr i32, i32* %r1, i32 1
-%r364 = load i32, i32* %r363
-%r365 = zext i32 %r364 to i64
-%r366 = shl i64 %r365, 32
-%r367 = or i64 %r361, %r366
-%r368 = zext i64 %r367 to i96
-%r370 = getelementptr i32, i32* %r1, i32 2
-%r371 = load i32, i32* %r370
-%r372 = zext i32 %r371 to i96
-%r373 = shl i96 %r372, 64
-%r374 = or i96 %r368, %r373
-%r375 = zext i96 %r374 to i128
-%r377 = getelementptr i32, i32* %r1, i32 3
-%r378 = load i32, i32* %r377
-%r379 = zext i32 %r378 to i128
-%r380 = shl i128 %r379, 96
-%r381 = or i128 %r375, %r380
-%r382 = zext i128 %r381 to i160
-%r384 = getelementptr i32, i32* %r1, i32 4
-%r385 = load i32, i32* %r384
-%r386 = zext i32 %r385 to i160
-%r387 = shl i160 %r386, 128
-%r388 = or i160 %r382, %r387
-%r389 = zext i160 %r388 to i192
-%r391 = getelementptr i32, i32* %r1, i32 5
-%r392 = load i32, i32* %r391
-%r393 = zext i32 %r392 to i192
-%r394 = shl i192 %r393, 160
-%r395 = or i192 %r389, %r394
-%r396 = zext i192 %r395 to i224
-%r398 = getelementptr i32, i32* %r1, i32 6
-%r399 = load i32, i32* %r398
-%r400 = zext i32 %r399 to i224
-%r401 = shl i224 %r400, 192
-%r402 = or i224 %r396, %r401
-%r403 = zext i224 %r402 to i256
-%r405 = getelementptr i32, i32* %r1, i32 7
-%r406 = load i32, i32* %r405
-%r407 = zext i32 %r406 to i256
-%r408 = shl i256 %r407, 224
-%r409 = or i256 %r403, %r408
-%r410 = zext i256 %r409 to i288
-%r412 = getelementptr i32, i32* %r1, i32 8
-%r413 = load i32, i32* %r412
-%r414 = zext i32 %r413 to i288
-%r415 = shl i288 %r414, 256
-%r416 = or i288 %r410, %r415
-%r417 = zext i288 %r416 to i320
-%r419 = getelementptr i32, i32* %r1, i32 9
-%r420 = load i32, i32* %r419
-%r421 = zext i32 %r420 to i320
-%r422 = shl i320 %r421, 288
-%r423 = or i320 %r417, %r422
-%r424 = zext i320 %r423 to i352
-%r426 = getelementptr i32, i32* %r1, i32 10
-%r427 = load i32, i32* %r426
-%r428 = zext i32 %r427 to i352
-%r429 = shl i352 %r428, 320
-%r430 = or i352 %r424, %r429
-%r431 = zext i352 %r430 to i384
-%r433 = getelementptr i32, i32* %r1, i32 11
-%r434 = load i32, i32* %r433
-%r435 = zext i32 %r434 to i384
-%r436 = shl i384 %r435, 352
-%r437 = or i384 %r431, %r436
-%r438 = zext i384 %r437 to i416
-%r440 = getelementptr i32, i32* %r1, i32 12
-%r441 = load i32, i32* %r440
-%r442 = zext i32 %r441 to i416
-%r443 = shl i416 %r442, 384
-%r444 = or i416 %r438, %r443
-%r445 = zext i416 %r444 to i448
-%r447 = getelementptr i32, i32* %r1, i32 13
-%r448 = load i32, i32* %r447
-%r449 = zext i32 %r448 to i448
-%r450 = shl i448 %r449, 416
-%r451 = or i448 %r445, %r450
-%r452 = zext i448 %r451 to i480
-%r453 = sub i480 %r359, %r452
-%r455 = getelementptr i32, i32* %r1, i32 14
-%r456 = load i32, i32* %r455
-%r457 = zext i32 %r456 to i64
-%r459 = getelementptr i32, i32* %r455, i32 1
-%r460 = load i32, i32* %r459
-%r461 = zext i32 %r460 to i64
-%r462 = shl i64 %r461, 32
-%r463 = or i64 %r457, %r462
-%r464 = zext i64 %r463 to i96
-%r466 = getelementptr i32, i32* %r455, i32 2
-%r467 = load i32, i32* %r466
-%r468 = zext i32 %r467 to i96
-%r469 = shl i96 %r468, 64
-%r470 = or i96 %r464, %r469
-%r471 = zext i96 %r470 to i128
-%r473 = getelementptr i32, i32* %r455, i32 3
-%r474 = load i32, i32* %r473
-%r475 = zext i32 %r474 to i128
-%r476 = shl i128 %r475, 96
-%r477 = or i128 %r471, %r476
-%r478 = zext i128 %r477 to i160
-%r480 = getelementptr i32, i32* %r455, i32 4
-%r481 = load i32, i32* %r480
-%r482 = zext i32 %r481 to i160
-%r483 = shl i160 %r482, 128
-%r484 = or i160 %r478, %r483
-%r485 = zext i160 %r484 to i192
-%r487 = getelementptr i32, i32* %r455, i32 5
-%r488 = load i32, i32* %r487
-%r489 = zext i32 %r488 to i192
-%r490 = shl i192 %r489, 160
-%r491 = or i192 %r485, %r490
-%r492 = zext i192 %r491 to i224
-%r494 = getelementptr i32, i32* %r455, i32 6
-%r495 = load i32, i32* %r494
-%r496 = zext i32 %r495 to i224
-%r497 = shl i224 %r496, 192
-%r498 = or i224 %r492, %r497
-%r499 = zext i224 %r498 to i256
-%r501 = getelementptr i32, i32* %r455, i32 7
-%r502 = load i32, i32* %r501
-%r503 = zext i32 %r502 to i256
-%r504 = shl i256 %r503, 224
-%r505 = or i256 %r499, %r504
-%r506 = zext i256 %r505 to i288
-%r508 = getelementptr i32, i32* %r455, i32 8
-%r509 = load i32, i32* %r508
-%r510 = zext i32 %r509 to i288
-%r511 = shl i288 %r510, 256
-%r512 = or i288 %r506, %r511
-%r513 = zext i288 %r512 to i320
-%r515 = getelementptr i32, i32* %r455, i32 9
-%r516 = load i32, i32* %r515
-%r517 = zext i32 %r516 to i320
-%r518 = shl i320 %r517, 288
-%r519 = or i320 %r513, %r518
-%r520 = zext i320 %r519 to i352
-%r522 = getelementptr i32, i32* %r455, i32 10
-%r523 = load i32, i32* %r522
-%r524 = zext i32 %r523 to i352
-%r525 = shl i352 %r524, 320
-%r526 = or i352 %r520, %r525
-%r527 = zext i352 %r526 to i384
-%r529 = getelementptr i32, i32* %r455, i32 11
-%r530 = load i32, i32* %r529
-%r531 = zext i32 %r530 to i384
-%r532 = shl i384 %r531, 352
-%r533 = or i384 %r527, %r532
-%r534 = zext i384 %r533 to i416
-%r536 = getelementptr i32, i32* %r455, i32 12
-%r537 = load i32, i32* %r536
-%r538 = zext i32 %r537 to i416
-%r539 = shl i416 %r538, 384
-%r540 = or i416 %r534, %r539
-%r541 = zext i416 %r540 to i448
-%r543 = getelementptr i32, i32* %r455, i32 13
-%r544 = load i32, i32* %r543
-%r545 = zext i32 %r544 to i448
-%r546 = shl i448 %r545, 416
-%r547 = or i448 %r541, %r546
-%r548 = zext i448 %r547 to i480
-%r549 = sub i480 %r453, %r548
-%r550 = zext i480 %r549 to i672
-%r552 = getelementptr i32, i32* %r1, i32 7
-%r553 = load i32, i32* %r552
-%r554 = zext i32 %r553 to i64
-%r556 = getelementptr i32, i32* %r552, i32 1
-%r557 = load i32, i32* %r556
-%r558 = zext i32 %r557 to i64
-%r559 = shl i64 %r558, 32
-%r560 = or i64 %r554, %r559
-%r561 = zext i64 %r560 to i96
-%r563 = getelementptr i32, i32* %r552, i32 2
-%r564 = load i32, i32* %r563
-%r565 = zext i32 %r564 to i96
-%r566 = shl i96 %r565, 64
-%r567 = or i96 %r561, %r566
-%r568 = zext i96 %r567 to i128
-%r570 = getelementptr i32, i32* %r552, i32 3
-%r571 = load i32, i32* %r570
-%r572 = zext i32 %r571 to i128
-%r573 = shl i128 %r572, 96
-%r574 = or i128 %r568, %r573
-%r575 = zext i128 %r574 to i160
-%r577 = getelementptr i32, i32* %r552, i32 4
-%r578 = load i32, i32* %r577
-%r579 = zext i32 %r578 to i160
-%r580 = shl i160 %r579, 128
-%r581 = or i160 %r575, %r580
-%r582 = zext i160 %r581 to i192
-%r584 = getelementptr i32, i32* %r552, i32 5
-%r585 = load i32, i32* %r584
-%r586 = zext i32 %r585 to i192
-%r587 = shl i192 %r586, 160
-%r588 = or i192 %r582, %r587
-%r589 = zext i192 %r588 to i224
-%r591 = getelementptr i32, i32* %r552, i32 6
-%r592 = load i32, i32* %r591
-%r593 = zext i32 %r592 to i224
-%r594 = shl i224 %r593, 192
-%r595 = or i224 %r589, %r594
-%r596 = zext i224 %r595 to i256
-%r598 = getelementptr i32, i32* %r552, i32 7
-%r599 = load i32, i32* %r598
-%r600 = zext i32 %r599 to i256
-%r601 = shl i256 %r600, 224
-%r602 = or i256 %r596, %r601
-%r603 = zext i256 %r602 to i288
-%r605 = getelementptr i32, i32* %r552, i32 8
-%r606 = load i32, i32* %r605
-%r607 = zext i32 %r606 to i288
-%r608 = shl i288 %r607, 256
-%r609 = or i288 %r603, %r608
-%r610 = zext i288 %r609 to i320
-%r612 = getelementptr i32, i32* %r552, i32 9
-%r613 = load i32, i32* %r612
-%r614 = zext i32 %r613 to i320
-%r615 = shl i320 %r614, 288
-%r616 = or i320 %r610, %r615
-%r617 = zext i320 %r616 to i352
-%r619 = getelementptr i32, i32* %r552, i32 10
-%r620 = load i32, i32* %r619
-%r621 = zext i32 %r620 to i352
-%r622 = shl i352 %r621, 320
-%r623 = or i352 %r617, %r622
-%r624 = zext i352 %r623 to i384
-%r626 = getelementptr i32, i32* %r552, i32 11
-%r627 = load i32, i32* %r626
-%r628 = zext i32 %r627 to i384
-%r629 = shl i384 %r628, 352
-%r630 = or i384 %r624, %r629
-%r631 = zext i384 %r630 to i416
-%r633 = getelementptr i32, i32* %r552, i32 12
-%r634 = load i32, i32* %r633
-%r635 = zext i32 %r634 to i416
-%r636 = shl i416 %r635, 384
-%r637 = or i416 %r631, %r636
-%r638 = zext i416 %r637 to i448
-%r640 = getelementptr i32, i32* %r552, i32 13
-%r641 = load i32, i32* %r640
-%r642 = zext i32 %r641 to i448
-%r643 = shl i448 %r642, 416
-%r644 = or i448 %r638, %r643
-%r645 = zext i448 %r644 to i480
-%r647 = getelementptr i32, i32* %r552, i32 14
-%r648 = load i32, i32* %r647
-%r649 = zext i32 %r648 to i480
-%r650 = shl i480 %r649, 448
-%r651 = or i480 %r645, %r650
-%r652 = zext i480 %r651 to i512
-%r654 = getelementptr i32, i32* %r552, i32 15
-%r655 = load i32, i32* %r654
-%r656 = zext i32 %r655 to i512
-%r657 = shl i512 %r656, 480
-%r658 = or i512 %r652, %r657
-%r659 = zext i512 %r658 to i544
-%r661 = getelementptr i32, i32* %r552, i32 16
-%r662 = load i32, i32* %r661
-%r663 = zext i32 %r662 to i544
-%r664 = shl i544 %r663, 512
-%r665 = or i544 %r659, %r664
-%r666 = zext i544 %r665 to i576
-%r668 = getelementptr i32, i32* %r552, i32 17
-%r669 = load i32, i32* %r668
-%r670 = zext i32 %r669 to i576
-%r671 = shl i576 %r670, 544
-%r672 = or i576 %r666, %r671
-%r673 = zext i576 %r672 to i608
-%r675 = getelementptr i32, i32* %r552, i32 18
-%r676 = load i32, i32* %r675
-%r677 = zext i32 %r676 to i608
-%r678 = shl i608 %r677, 576
-%r679 = or i608 %r673, %r678
-%r680 = zext i608 %r679 to i640
-%r682 = getelementptr i32, i32* %r552, i32 19
-%r683 = load i32, i32* %r682
-%r684 = zext i32 %r683 to i640
-%r685 = shl i640 %r684, 608
-%r686 = or i640 %r680, %r685
-%r687 = zext i640 %r686 to i672
-%r689 = getelementptr i32, i32* %r552, i32 20
-%r690 = load i32, i32* %r689
-%r691 = zext i32 %r690 to i672
-%r692 = shl i672 %r691, 640
-%r693 = or i672 %r687, %r692
-%r694 = add i672 %r550, %r693
-%r696 = getelementptr i32, i32* %r1, i32 7
-%r697 = trunc i672 %r694 to i32
-%r699 = getelementptr i32, i32* %r696, i32 0
-store i32 %r697, i32* %r699
-%r700 = lshr i672 %r694, 32
-%r701 = trunc i672 %r700 to i32
-%r703 = getelementptr i32, i32* %r696, i32 1
-store i32 %r701, i32* %r703
-%r704 = lshr i672 %r700, 32
-%r705 = trunc i672 %r704 to i32
-%r707 = getelementptr i32, i32* %r696, i32 2
-store i32 %r705, i32* %r707
-%r708 = lshr i672 %r704, 32
-%r709 = trunc i672 %r708 to i32
-%r711 = getelementptr i32, i32* %r696, i32 3
-store i32 %r709, i32* %r711
-%r712 = lshr i672 %r708, 32
-%r713 = trunc i672 %r712 to i32
-%r715 = getelementptr i32, i32* %r696, i32 4
-store i32 %r713, i32* %r715
-%r716 = lshr i672 %r712, 32
-%r717 = trunc i672 %r716 to i32
-%r719 = getelementptr i32, i32* %r696, i32 5
-store i32 %r717, i32* %r719
-%r720 = lshr i672 %r716, 32
-%r721 = trunc i672 %r720 to i32
-%r723 = getelementptr i32, i32* %r696, i32 6
-store i32 %r721, i32* %r723
-%r724 = lshr i672 %r720, 32
-%r725 = trunc i672 %r724 to i32
-%r727 = getelementptr i32, i32* %r696, i32 7
-store i32 %r725, i32* %r727
-%r728 = lshr i672 %r724, 32
-%r729 = trunc i672 %r728 to i32
-%r731 = getelementptr i32, i32* %r696, i32 8
-store i32 %r729, i32* %r731
-%r732 = lshr i672 %r728, 32
-%r733 = trunc i672 %r732 to i32
-%r735 = getelementptr i32, i32* %r696, i32 9
-store i32 %r733, i32* %r735
-%r736 = lshr i672 %r732, 32
-%r737 = trunc i672 %r736 to i32
-%r739 = getelementptr i32, i32* %r696, i32 10
-store i32 %r737, i32* %r739
-%r740 = lshr i672 %r736, 32
-%r741 = trunc i672 %r740 to i32
-%r743 = getelementptr i32, i32* %r696, i32 11
-store i32 %r741, i32* %r743
-%r744 = lshr i672 %r740, 32
-%r745 = trunc i672 %r744 to i32
-%r747 = getelementptr i32, i32* %r696, i32 12
-store i32 %r745, i32* %r747
-%r748 = lshr i672 %r744, 32
-%r749 = trunc i672 %r748 to i32
-%r751 = getelementptr i32, i32* %r696, i32 13
-store i32 %r749, i32* %r751
-%r752 = lshr i672 %r748, 32
-%r753 = trunc i672 %r752 to i32
-%r755 = getelementptr i32, i32* %r696, i32 14
-store i32 %r753, i32* %r755
-%r756 = lshr i672 %r752, 32
-%r757 = trunc i672 %r756 to i32
-%r759 = getelementptr i32, i32* %r696, i32 15
-store i32 %r757, i32* %r759
-%r760 = lshr i672 %r756, 32
-%r761 = trunc i672 %r760 to i32
-%r763 = getelementptr i32, i32* %r696, i32 16
-store i32 %r761, i32* %r763
-%r764 = lshr i672 %r760, 32
-%r765 = trunc i672 %r764 to i32
-%r767 = getelementptr i32, i32* %r696, i32 17
-store i32 %r765, i32* %r767
-%r768 = lshr i672 %r764, 32
-%r769 = trunc i672 %r768 to i32
-%r771 = getelementptr i32, i32* %r696, i32 18
-store i32 %r769, i32* %r771
-%r772 = lshr i672 %r768, 32
-%r773 = trunc i672 %r772 to i32
-%r775 = getelementptr i32, i32* %r696, i32 19
-store i32 %r773, i32* %r775
-%r776 = lshr i672 %r772, 32
-%r777 = trunc i672 %r776 to i32
-%r779 = getelementptr i32, i32* %r696, i32 20
-store i32 %r777, i32* %r779
-ret void
-}
-define void @mcl_fp_mont14L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i480 @mulPv448x32(i32* %r2, i32 %r10)
-%r12 = zext i480 %r11 to i512
-%r13 = trunc i480 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i480 @mulPv448x32(i32* %r4, i32 %r14)
-%r16 = zext i480 %r15 to i512
-%r17 = add i512 %r12, %r16
-%r18 = lshr i512 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i480 @mulPv448x32(i32* %r2, i32 %r21)
-%r23 = zext i480 %r22 to i512
-%r24 = add i512 %r18, %r23
-%r25 = trunc i512 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i480 @mulPv448x32(i32* %r4, i32 %r26)
-%r28 = zext i480 %r27 to i512
-%r29 = add i512 %r24, %r28
-%r30 = lshr i512 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i480 @mulPv448x32(i32* %r2, i32 %r33)
-%r35 = zext i480 %r34 to i512
-%r36 = add i512 %r30, %r35
-%r37 = trunc i512 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i480 @mulPv448x32(i32* %r4, i32 %r38)
-%r40 = zext i480 %r39 to i512
-%r41 = add i512 %r36, %r40
-%r42 = lshr i512 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i480 @mulPv448x32(i32* %r2, i32 %r45)
-%r47 = zext i480 %r46 to i512
-%r48 = add i512 %r42, %r47
-%r49 = trunc i512 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i480 @mulPv448x32(i32* %r4, i32 %r50)
-%r52 = zext i480 %r51 to i512
-%r53 = add i512 %r48, %r52
-%r54 = lshr i512 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i480 @mulPv448x32(i32* %r2, i32 %r57)
-%r59 = zext i480 %r58 to i512
-%r60 = add i512 %r54, %r59
-%r61 = trunc i512 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i480 @mulPv448x32(i32* %r4, i32 %r62)
-%r64 = zext i480 %r63 to i512
-%r65 = add i512 %r60, %r64
-%r66 = lshr i512 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i480 @mulPv448x32(i32* %r2, i32 %r69)
-%r71 = zext i480 %r70 to i512
-%r72 = add i512 %r66, %r71
-%r73 = trunc i512 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i480 @mulPv448x32(i32* %r4, i32 %r74)
-%r76 = zext i480 %r75 to i512
-%r77 = add i512 %r72, %r76
-%r78 = lshr i512 %r77, 32
-%r80 = getelementptr i32, i32* %r3, i32 6
-%r81 = load i32, i32* %r80
-%r82 = call i480 @mulPv448x32(i32* %r2, i32 %r81)
-%r83 = zext i480 %r82 to i512
-%r84 = add i512 %r78, %r83
-%r85 = trunc i512 %r84 to i32
-%r86 = mul i32 %r85, %r7
-%r87 = call i480 @mulPv448x32(i32* %r4, i32 %r86)
-%r88 = zext i480 %r87 to i512
-%r89 = add i512 %r84, %r88
-%r90 = lshr i512 %r89, 32
-%r92 = getelementptr i32, i32* %r3, i32 7
-%r93 = load i32, i32* %r92
-%r94 = call i480 @mulPv448x32(i32* %r2, i32 %r93)
-%r95 = zext i480 %r94 to i512
-%r96 = add i512 %r90, %r95
-%r97 = trunc i512 %r96 to i32
-%r98 = mul i32 %r97, %r7
-%r99 = call i480 @mulPv448x32(i32* %r4, i32 %r98)
-%r100 = zext i480 %r99 to i512
-%r101 = add i512 %r96, %r100
-%r102 = lshr i512 %r101, 32
-%r104 = getelementptr i32, i32* %r3, i32 8
-%r105 = load i32, i32* %r104
-%r106 = call i480 @mulPv448x32(i32* %r2, i32 %r105)
-%r107 = zext i480 %r106 to i512
-%r108 = add i512 %r102, %r107
-%r109 = trunc i512 %r108 to i32
-%r110 = mul i32 %r109, %r7
-%r111 = call i480 @mulPv448x32(i32* %r4, i32 %r110)
-%r112 = zext i480 %r111 to i512
-%r113 = add i512 %r108, %r112
-%r114 = lshr i512 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 9
-%r117 = load i32, i32* %r116
-%r118 = call i480 @mulPv448x32(i32* %r2, i32 %r117)
-%r119 = zext i480 %r118 to i512
-%r120 = add i512 %r114, %r119
-%r121 = trunc i512 %r120 to i32
-%r122 = mul i32 %r121, %r7
-%r123 = call i480 @mulPv448x32(i32* %r4, i32 %r122)
-%r124 = zext i480 %r123 to i512
-%r125 = add i512 %r120, %r124
-%r126 = lshr i512 %r125, 32
-%r128 = getelementptr i32, i32* %r3, i32 10
-%r129 = load i32, i32* %r128
-%r130 = call i480 @mulPv448x32(i32* %r2, i32 %r129)
-%r131 = zext i480 %r130 to i512
-%r132 = add i512 %r126, %r131
-%r133 = trunc i512 %r132 to i32
-%r134 = mul i32 %r133, %r7
-%r135 = call i480 @mulPv448x32(i32* %r4, i32 %r134)
-%r136 = zext i480 %r135 to i512
-%r137 = add i512 %r132, %r136
-%r138 = lshr i512 %r137, 32
-%r140 = getelementptr i32, i32* %r3, i32 11
-%r141 = load i32, i32* %r140
-%r142 = call i480 @mulPv448x32(i32* %r2, i32 %r141)
-%r143 = zext i480 %r142 to i512
-%r144 = add i512 %r138, %r143
-%r145 = trunc i512 %r144 to i32
-%r146 = mul i32 %r145, %r7
-%r147 = call i480 @mulPv448x32(i32* %r4, i32 %r146)
-%r148 = zext i480 %r147 to i512
-%r149 = add i512 %r144, %r148
-%r150 = lshr i512 %r149, 32
-%r152 = getelementptr i32, i32* %r3, i32 12
-%r153 = load i32, i32* %r152
-%r154 = call i480 @mulPv448x32(i32* %r2, i32 %r153)
-%r155 = zext i480 %r154 to i512
-%r156 = add i512 %r150, %r155
-%r157 = trunc i512 %r156 to i32
-%r158 = mul i32 %r157, %r7
-%r159 = call i480 @mulPv448x32(i32* %r4, i32 %r158)
-%r160 = zext i480 %r159 to i512
-%r161 = add i512 %r156, %r160
-%r162 = lshr i512 %r161, 32
-%r164 = getelementptr i32, i32* %r3, i32 13
-%r165 = load i32, i32* %r164
-%r166 = call i480 @mulPv448x32(i32* %r2, i32 %r165)
-%r167 = zext i480 %r166 to i512
-%r168 = add i512 %r162, %r167
-%r169 = trunc i512 %r168 to i32
-%r170 = mul i32 %r169, %r7
-%r171 = call i480 @mulPv448x32(i32* %r4, i32 %r170)
-%r172 = zext i480 %r171 to i512
-%r173 = add i512 %r168, %r172
-%r174 = lshr i512 %r173, 32
-%r175 = trunc i512 %r174 to i480
-%r176 = load i32, i32* %r4
-%r177 = zext i32 %r176 to i64
-%r179 = getelementptr i32, i32* %r4, i32 1
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i64
-%r182 = shl i64 %r181, 32
-%r183 = or i64 %r177, %r182
-%r184 = zext i64 %r183 to i96
-%r186 = getelementptr i32, i32* %r4, i32 2
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i96
-%r189 = shl i96 %r188, 64
-%r190 = or i96 %r184, %r189
-%r191 = zext i96 %r190 to i128
-%r193 = getelementptr i32, i32* %r4, i32 3
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i128
-%r196 = shl i128 %r195, 96
-%r197 = or i128 %r191, %r196
-%r198 = zext i128 %r197 to i160
-%r200 = getelementptr i32, i32* %r4, i32 4
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i160
-%r203 = shl i160 %r202, 128
-%r204 = or i160 %r198, %r203
-%r205 = zext i160 %r204 to i192
-%r207 = getelementptr i32, i32* %r4, i32 5
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i192
-%r210 = shl i192 %r209, 160
-%r211 = or i192 %r205, %r210
-%r212 = zext i192 %r211 to i224
-%r214 = getelementptr i32, i32* %r4, i32 6
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i224
-%r217 = shl i224 %r216, 192
-%r218 = or i224 %r212, %r217
-%r219 = zext i224 %r218 to i256
-%r221 = getelementptr i32, i32* %r4, i32 7
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i256
-%r224 = shl i256 %r223, 224
-%r225 = or i256 %r219, %r224
-%r226 = zext i256 %r225 to i288
-%r228 = getelementptr i32, i32* %r4, i32 8
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i288
-%r231 = shl i288 %r230, 256
-%r232 = or i288 %r226, %r231
-%r233 = zext i288 %r232 to i320
-%r235 = getelementptr i32, i32* %r4, i32 9
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i320
-%r238 = shl i320 %r237, 288
-%r239 = or i320 %r233, %r238
-%r240 = zext i320 %r239 to i352
-%r242 = getelementptr i32, i32* %r4, i32 10
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i352
-%r245 = shl i352 %r244, 320
-%r246 = or i352 %r240, %r245
-%r247 = zext i352 %r246 to i384
-%r249 = getelementptr i32, i32* %r4, i32 11
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i384
-%r252 = shl i384 %r251, 352
-%r253 = or i384 %r247, %r252
-%r254 = zext i384 %r253 to i416
-%r256 = getelementptr i32, i32* %r4, i32 12
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i416
-%r259 = shl i416 %r258, 384
-%r260 = or i416 %r254, %r259
-%r261 = zext i416 %r260 to i448
-%r263 = getelementptr i32, i32* %r4, i32 13
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i448
-%r266 = shl i448 %r265, 416
-%r267 = or i448 %r261, %r266
-%r268 = zext i448 %r267 to i480
-%r269 = sub i480 %r175, %r268
-%r270 = lshr i480 %r269, 448
-%r271 = trunc i480 %r270 to i1
-%r272 = select i1 %r271, i480 %r175, i480 %r269
-%r273 = trunc i480 %r272 to i448
-%r274 = trunc i448 %r273 to i32
-%r276 = getelementptr i32, i32* %r1, i32 0
-store i32 %r274, i32* %r276
-%r277 = lshr i448 %r273, 32
-%r278 = trunc i448 %r277 to i32
-%r280 = getelementptr i32, i32* %r1, i32 1
-store i32 %r278, i32* %r280
-%r281 = lshr i448 %r277, 32
-%r282 = trunc i448 %r281 to i32
-%r284 = getelementptr i32, i32* %r1, i32 2
-store i32 %r282, i32* %r284
-%r285 = lshr i448 %r281, 32
-%r286 = trunc i448 %r285 to i32
-%r288 = getelementptr i32, i32* %r1, i32 3
-store i32 %r286, i32* %r288
-%r289 = lshr i448 %r285, 32
-%r290 = trunc i448 %r289 to i32
-%r292 = getelementptr i32, i32* %r1, i32 4
-store i32 %r290, i32* %r292
-%r293 = lshr i448 %r289, 32
-%r294 = trunc i448 %r293 to i32
-%r296 = getelementptr i32, i32* %r1, i32 5
-store i32 %r294, i32* %r296
-%r297 = lshr i448 %r293, 32
-%r298 = trunc i448 %r297 to i32
-%r300 = getelementptr i32, i32* %r1, i32 6
-store i32 %r298, i32* %r300
-%r301 = lshr i448 %r297, 32
-%r302 = trunc i448 %r301 to i32
-%r304 = getelementptr i32, i32* %r1, i32 7
-store i32 %r302, i32* %r304
-%r305 = lshr i448 %r301, 32
-%r306 = trunc i448 %r305 to i32
-%r308 = getelementptr i32, i32* %r1, i32 8
-store i32 %r306, i32* %r308
-%r309 = lshr i448 %r305, 32
-%r310 = trunc i448 %r309 to i32
-%r312 = getelementptr i32, i32* %r1, i32 9
-store i32 %r310, i32* %r312
-%r313 = lshr i448 %r309, 32
-%r314 = trunc i448 %r313 to i32
-%r316 = getelementptr i32, i32* %r1, i32 10
-store i32 %r314, i32* %r316
-%r317 = lshr i448 %r313, 32
-%r318 = trunc i448 %r317 to i32
-%r320 = getelementptr i32, i32* %r1, i32 11
-store i32 %r318, i32* %r320
-%r321 = lshr i448 %r317, 32
-%r322 = trunc i448 %r321 to i32
-%r324 = getelementptr i32, i32* %r1, i32 12
-store i32 %r322, i32* %r324
-%r325 = lshr i448 %r321, 32
-%r326 = trunc i448 %r325 to i32
-%r328 = getelementptr i32, i32* %r1, i32 13
-store i32 %r326, i32* %r328
-ret void
-}
-define void @mcl_fp_montNF14L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i480 @mulPv448x32(i32* %r2, i32 %r8)
-%r10 = trunc i480 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i480 @mulPv448x32(i32* %r4, i32 %r11)
-%r13 = add i480 %r9, %r12
-%r14 = lshr i480 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i480 @mulPv448x32(i32* %r2, i32 %r17)
-%r19 = add i480 %r14, %r18
-%r20 = trunc i480 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i480 @mulPv448x32(i32* %r4, i32 %r21)
-%r23 = add i480 %r19, %r22
-%r24 = lshr i480 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i480 @mulPv448x32(i32* %r2, i32 %r27)
-%r29 = add i480 %r24, %r28
-%r30 = trunc i480 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i480 @mulPv448x32(i32* %r4, i32 %r31)
-%r33 = add i480 %r29, %r32
-%r34 = lshr i480 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i480 @mulPv448x32(i32* %r2, i32 %r37)
-%r39 = add i480 %r34, %r38
-%r40 = trunc i480 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i480 @mulPv448x32(i32* %r4, i32 %r41)
-%r43 = add i480 %r39, %r42
-%r44 = lshr i480 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i480 @mulPv448x32(i32* %r2, i32 %r47)
-%r49 = add i480 %r44, %r48
-%r50 = trunc i480 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i480 @mulPv448x32(i32* %r4, i32 %r51)
-%r53 = add i480 %r49, %r52
-%r54 = lshr i480 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i480 @mulPv448x32(i32* %r2, i32 %r57)
-%r59 = add i480 %r54, %r58
-%r60 = trunc i480 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i480 @mulPv448x32(i32* %r4, i32 %r61)
-%r63 = add i480 %r59, %r62
-%r64 = lshr i480 %r63, 32
-%r66 = getelementptr i32, i32* %r3, i32 6
-%r67 = load i32, i32* %r66
-%r68 = call i480 @mulPv448x32(i32* %r2, i32 %r67)
-%r69 = add i480 %r64, %r68
-%r70 = trunc i480 %r69 to i32
-%r71 = mul i32 %r70, %r7
-%r72 = call i480 @mulPv448x32(i32* %r4, i32 %r71)
-%r73 = add i480 %r69, %r72
-%r74 = lshr i480 %r73, 32
-%r76 = getelementptr i32, i32* %r3, i32 7
-%r77 = load i32, i32* %r76
-%r78 = call i480 @mulPv448x32(i32* %r2, i32 %r77)
-%r79 = add i480 %r74, %r78
-%r80 = trunc i480 %r79 to i32
-%r81 = mul i32 %r80, %r7
-%r82 = call i480 @mulPv448x32(i32* %r4, i32 %r81)
-%r83 = add i480 %r79, %r82
-%r84 = lshr i480 %r83, 32
-%r86 = getelementptr i32, i32* %r3, i32 8
-%r87 = load i32, i32* %r86
-%r88 = call i480 @mulPv448x32(i32* %r2, i32 %r87)
-%r89 = add i480 %r84, %r88
-%r90 = trunc i480 %r89 to i32
-%r91 = mul i32 %r90, %r7
-%r92 = call i480 @mulPv448x32(i32* %r4, i32 %r91)
-%r93 = add i480 %r89, %r92
-%r94 = lshr i480 %r93, 32
-%r96 = getelementptr i32, i32* %r3, i32 9
-%r97 = load i32, i32* %r96
-%r98 = call i480 @mulPv448x32(i32* %r2, i32 %r97)
-%r99 = add i480 %r94, %r98
-%r100 = trunc i480 %r99 to i32
-%r101 = mul i32 %r100, %r7
-%r102 = call i480 @mulPv448x32(i32* %r4, i32 %r101)
-%r103 = add i480 %r99, %r102
-%r104 = lshr i480 %r103, 32
-%r106 = getelementptr i32, i32* %r3, i32 10
-%r107 = load i32, i32* %r106
-%r108 = call i480 @mulPv448x32(i32* %r2, i32 %r107)
-%r109 = add i480 %r104, %r108
-%r110 = trunc i480 %r109 to i32
-%r111 = mul i32 %r110, %r7
-%r112 = call i480 @mulPv448x32(i32* %r4, i32 %r111)
-%r113 = add i480 %r109, %r112
-%r114 = lshr i480 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 11
-%r117 = load i32, i32* %r116
-%r118 = call i480 @mulPv448x32(i32* %r2, i32 %r117)
-%r119 = add i480 %r114, %r118
-%r120 = trunc i480 %r119 to i32
-%r121 = mul i32 %r120, %r7
-%r122 = call i480 @mulPv448x32(i32* %r4, i32 %r121)
-%r123 = add i480 %r119, %r122
-%r124 = lshr i480 %r123, 32
-%r126 = getelementptr i32, i32* %r3, i32 12
-%r127 = load i32, i32* %r126
-%r128 = call i480 @mulPv448x32(i32* %r2, i32 %r127)
-%r129 = add i480 %r124, %r128
-%r130 = trunc i480 %r129 to i32
-%r131 = mul i32 %r130, %r7
-%r132 = call i480 @mulPv448x32(i32* %r4, i32 %r131)
-%r133 = add i480 %r129, %r132
-%r134 = lshr i480 %r133, 32
-%r136 = getelementptr i32, i32* %r3, i32 13
-%r137 = load i32, i32* %r136
-%r138 = call i480 @mulPv448x32(i32* %r2, i32 %r137)
-%r139 = add i480 %r134, %r138
-%r140 = trunc i480 %r139 to i32
-%r141 = mul i32 %r140, %r7
-%r142 = call i480 @mulPv448x32(i32* %r4, i32 %r141)
-%r143 = add i480 %r139, %r142
-%r144 = lshr i480 %r143, 32
-%r145 = trunc i480 %r144 to i448
-%r146 = load i32, i32* %r4
-%r147 = zext i32 %r146 to i64
-%r149 = getelementptr i32, i32* %r4, i32 1
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i64
-%r152 = shl i64 %r151, 32
-%r153 = or i64 %r147, %r152
-%r154 = zext i64 %r153 to i96
-%r156 = getelementptr i32, i32* %r4, i32 2
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i96
-%r159 = shl i96 %r158, 64
-%r160 = or i96 %r154, %r159
-%r161 = zext i96 %r160 to i128
-%r163 = getelementptr i32, i32* %r4, i32 3
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i128
-%r166 = shl i128 %r165, 96
-%r167 = or i128 %r161, %r166
-%r168 = zext i128 %r167 to i160
-%r170 = getelementptr i32, i32* %r4, i32 4
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i160
-%r173 = shl i160 %r172, 128
-%r174 = or i160 %r168, %r173
-%r175 = zext i160 %r174 to i192
-%r177 = getelementptr i32, i32* %r4, i32 5
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i192
-%r180 = shl i192 %r179, 160
-%r181 = or i192 %r175, %r180
-%r182 = zext i192 %r181 to i224
-%r184 = getelementptr i32, i32* %r4, i32 6
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i224
-%r187 = shl i224 %r186, 192
-%r188 = or i224 %r182, %r187
-%r189 = zext i224 %r188 to i256
-%r191 = getelementptr i32, i32* %r4, i32 7
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i256
-%r194 = shl i256 %r193, 224
-%r195 = or i256 %r189, %r194
-%r196 = zext i256 %r195 to i288
-%r198 = getelementptr i32, i32* %r4, i32 8
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i288
-%r201 = shl i288 %r200, 256
-%r202 = or i288 %r196, %r201
-%r203 = zext i288 %r202 to i320
-%r205 = getelementptr i32, i32* %r4, i32 9
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i320
-%r208 = shl i320 %r207, 288
-%r209 = or i320 %r203, %r208
-%r210 = zext i320 %r209 to i352
-%r212 = getelementptr i32, i32* %r4, i32 10
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i352
-%r215 = shl i352 %r214, 320
-%r216 = or i352 %r210, %r215
-%r217 = zext i352 %r216 to i384
-%r219 = getelementptr i32, i32* %r4, i32 11
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i384
-%r222 = shl i384 %r221, 352
-%r223 = or i384 %r217, %r222
-%r224 = zext i384 %r223 to i416
-%r226 = getelementptr i32, i32* %r4, i32 12
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i416
-%r229 = shl i416 %r228, 384
-%r230 = or i416 %r224, %r229
-%r231 = zext i416 %r230 to i448
-%r233 = getelementptr i32, i32* %r4, i32 13
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i448
-%r236 = shl i448 %r235, 416
-%r237 = or i448 %r231, %r236
-%r238 = sub i448 %r145, %r237
-%r239 = lshr i448 %r238, 447
-%r240 = trunc i448 %r239 to i1
-%r241 = select i1 %r240, i448 %r145, i448 %r238
-%r242 = trunc i448 %r241 to i32
-%r244 = getelementptr i32, i32* %r1, i32 0
-store i32 %r242, i32* %r244
-%r245 = lshr i448 %r241, 32
-%r246 = trunc i448 %r245 to i32
-%r248 = getelementptr i32, i32* %r1, i32 1
-store i32 %r246, i32* %r248
-%r249 = lshr i448 %r245, 32
-%r250 = trunc i448 %r249 to i32
-%r252 = getelementptr i32, i32* %r1, i32 2
-store i32 %r250, i32* %r252
-%r253 = lshr i448 %r249, 32
-%r254 = trunc i448 %r253 to i32
-%r256 = getelementptr i32, i32* %r1, i32 3
-store i32 %r254, i32* %r256
-%r257 = lshr i448 %r253, 32
-%r258 = trunc i448 %r257 to i32
-%r260 = getelementptr i32, i32* %r1, i32 4
-store i32 %r258, i32* %r260
-%r261 = lshr i448 %r257, 32
-%r262 = trunc i448 %r261 to i32
-%r264 = getelementptr i32, i32* %r1, i32 5
-store i32 %r262, i32* %r264
-%r265 = lshr i448 %r261, 32
-%r266 = trunc i448 %r265 to i32
-%r268 = getelementptr i32, i32* %r1, i32 6
-store i32 %r266, i32* %r268
-%r269 = lshr i448 %r265, 32
-%r270 = trunc i448 %r269 to i32
-%r272 = getelementptr i32, i32* %r1, i32 7
-store i32 %r270, i32* %r272
-%r273 = lshr i448 %r269, 32
-%r274 = trunc i448 %r273 to i32
-%r276 = getelementptr i32, i32* %r1, i32 8
-store i32 %r274, i32* %r276
-%r277 = lshr i448 %r273, 32
-%r278 = trunc i448 %r277 to i32
-%r280 = getelementptr i32, i32* %r1, i32 9
-store i32 %r278, i32* %r280
-%r281 = lshr i448 %r277, 32
-%r282 = trunc i448 %r281 to i32
-%r284 = getelementptr i32, i32* %r1, i32 10
-store i32 %r282, i32* %r284
-%r285 = lshr i448 %r281, 32
-%r286 = trunc i448 %r285 to i32
-%r288 = getelementptr i32, i32* %r1, i32 11
-store i32 %r286, i32* %r288
-%r289 = lshr i448 %r285, 32
-%r290 = trunc i448 %r289 to i32
-%r292 = getelementptr i32, i32* %r1, i32 12
-store i32 %r290, i32* %r292
-%r293 = lshr i448 %r289, 32
-%r294 = trunc i448 %r293 to i32
-%r296 = getelementptr i32, i32* %r1, i32 13
-store i32 %r294, i32* %r296
-ret void
-}
-define void @mcl_fp_montRed14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i224
-%r45 = getelementptr i32, i32* %r3, i32 6
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i224
-%r48 = shl i224 %r47, 192
-%r49 = or i224 %r43, %r48
-%r50 = zext i224 %r49 to i256
-%r52 = getelementptr i32, i32* %r3, i32 7
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i256
-%r55 = shl i256 %r54, 224
-%r56 = or i256 %r50, %r55
-%r57 = zext i256 %r56 to i288
-%r59 = getelementptr i32, i32* %r3, i32 8
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i288
-%r62 = shl i288 %r61, 256
-%r63 = or i288 %r57, %r62
-%r64 = zext i288 %r63 to i320
-%r66 = getelementptr i32, i32* %r3, i32 9
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i320
-%r69 = shl i320 %r68, 288
-%r70 = or i320 %r64, %r69
-%r71 = zext i320 %r70 to i352
-%r73 = getelementptr i32, i32* %r3, i32 10
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i352
-%r76 = shl i352 %r75, 320
-%r77 = or i352 %r71, %r76
-%r78 = zext i352 %r77 to i384
-%r80 = getelementptr i32, i32* %r3, i32 11
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i384
-%r83 = shl i384 %r82, 352
-%r84 = or i384 %r78, %r83
-%r85 = zext i384 %r84 to i416
-%r87 = getelementptr i32, i32* %r3, i32 12
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i416
-%r90 = shl i416 %r89, 384
-%r91 = or i416 %r85, %r90
-%r92 = zext i416 %r91 to i448
-%r94 = getelementptr i32, i32* %r3, i32 13
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i448
-%r97 = shl i448 %r96, 416
-%r98 = or i448 %r92, %r97
-%r99 = load i32, i32* %r2
-%r100 = zext i32 %r99 to i64
-%r102 = getelementptr i32, i32* %r2, i32 1
-%r103 = load i32, i32* %r102
-%r104 = zext i32 %r103 to i64
-%r105 = shl i64 %r104, 32
-%r106 = or i64 %r100, %r105
-%r107 = zext i64 %r106 to i96
-%r109 = getelementptr i32, i32* %r2, i32 2
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i96
-%r112 = shl i96 %r111, 64
-%r113 = or i96 %r107, %r112
-%r114 = zext i96 %r113 to i128
-%r116 = getelementptr i32, i32* %r2, i32 3
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i128
-%r119 = shl i128 %r118, 96
-%r120 = or i128 %r114, %r119
-%r121 = zext i128 %r120 to i160
-%r123 = getelementptr i32, i32* %r2, i32 4
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i160
-%r126 = shl i160 %r125, 128
-%r127 = or i160 %r121, %r126
-%r128 = zext i160 %r127 to i192
-%r130 = getelementptr i32, i32* %r2, i32 5
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i192
-%r133 = shl i192 %r132, 160
-%r134 = or i192 %r128, %r133
-%r135 = zext i192 %r134 to i224
-%r137 = getelementptr i32, i32* %r2, i32 6
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i224
-%r140 = shl i224 %r139, 192
-%r141 = or i224 %r135, %r140
-%r142 = zext i224 %r141 to i256
-%r144 = getelementptr i32, i32* %r2, i32 7
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i256
-%r147 = shl i256 %r146, 224
-%r148 = or i256 %r142, %r147
-%r149 = zext i256 %r148 to i288
-%r151 = getelementptr i32, i32* %r2, i32 8
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i288
-%r154 = shl i288 %r153, 256
-%r155 = or i288 %r149, %r154
-%r156 = zext i288 %r155 to i320
-%r158 = getelementptr i32, i32* %r2, i32 9
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i320
-%r161 = shl i320 %r160, 288
-%r162 = or i320 %r156, %r161
-%r163 = zext i320 %r162 to i352
-%r165 = getelementptr i32, i32* %r2, i32 10
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i352
-%r168 = shl i352 %r167, 320
-%r169 = or i352 %r163, %r168
-%r170 = zext i352 %r169 to i384
-%r172 = getelementptr i32, i32* %r2, i32 11
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i384
-%r175 = shl i384 %r174, 352
-%r176 = or i384 %r170, %r175
-%r177 = zext i384 %r176 to i416
-%r179 = getelementptr i32, i32* %r2, i32 12
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i416
-%r182 = shl i416 %r181, 384
-%r183 = or i416 %r177, %r182
-%r184 = zext i416 %r183 to i448
-%r186 = getelementptr i32, i32* %r2, i32 13
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i448
-%r189 = shl i448 %r188, 416
-%r190 = or i448 %r184, %r189
-%r191 = zext i448 %r190 to i480
-%r193 = getelementptr i32, i32* %r2, i32 14
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i480
-%r196 = shl i480 %r195, 448
-%r197 = or i480 %r191, %r196
-%r198 = zext i480 %r197 to i512
-%r200 = getelementptr i32, i32* %r2, i32 15
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i512
-%r203 = shl i512 %r202, 480
-%r204 = or i512 %r198, %r203
-%r205 = zext i512 %r204 to i544
-%r207 = getelementptr i32, i32* %r2, i32 16
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i544
-%r210 = shl i544 %r209, 512
-%r211 = or i544 %r205, %r210
-%r212 = zext i544 %r211 to i576
-%r214 = getelementptr i32, i32* %r2, i32 17
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i576
-%r217 = shl i576 %r216, 544
-%r218 = or i576 %r212, %r217
-%r219 = zext i576 %r218 to i608
-%r221 = getelementptr i32, i32* %r2, i32 18
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i608
-%r224 = shl i608 %r223, 576
-%r225 = or i608 %r219, %r224
-%r226 = zext i608 %r225 to i640
-%r228 = getelementptr i32, i32* %r2, i32 19
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i640
-%r231 = shl i640 %r230, 608
-%r232 = or i640 %r226, %r231
-%r233 = zext i640 %r232 to i672
-%r235 = getelementptr i32, i32* %r2, i32 20
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i672
-%r238 = shl i672 %r237, 640
-%r239 = or i672 %r233, %r238
-%r240 = zext i672 %r239 to i704
-%r242 = getelementptr i32, i32* %r2, i32 21
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i704
-%r245 = shl i704 %r244, 672
-%r246 = or i704 %r240, %r245
-%r247 = zext i704 %r246 to i736
-%r249 = getelementptr i32, i32* %r2, i32 22
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i736
-%r252 = shl i736 %r251, 704
-%r253 = or i736 %r247, %r252
-%r254 = zext i736 %r253 to i768
-%r256 = getelementptr i32, i32* %r2, i32 23
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i768
-%r259 = shl i768 %r258, 736
-%r260 = or i768 %r254, %r259
-%r261 = zext i768 %r260 to i800
-%r263 = getelementptr i32, i32* %r2, i32 24
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i800
-%r266 = shl i800 %r265, 768
-%r267 = or i800 %r261, %r266
-%r268 = zext i800 %r267 to i832
-%r270 = getelementptr i32, i32* %r2, i32 25
-%r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i832
-%r273 = shl i832 %r272, 800
-%r274 = or i832 %r268, %r273
-%r275 = zext i832 %r274 to i864
-%r277 = getelementptr i32, i32* %r2, i32 26
-%r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i864
-%r280 = shl i864 %r279, 832
-%r281 = or i864 %r275, %r280
-%r282 = zext i864 %r281 to i896
-%r284 = getelementptr i32, i32* %r2, i32 27
-%r285 = load i32, i32* %r284
-%r286 = zext i32 %r285 to i896
-%r287 = shl i896 %r286, 864
-%r288 = or i896 %r282, %r287
-%r289 = zext i896 %r288 to i928
-%r290 = trunc i928 %r289 to i32
-%r291 = mul i32 %r290, %r6
-%r292 = call i480 @mulPv448x32(i32* %r3, i32 %r291)
-%r293 = zext i480 %r292 to i928
-%r294 = add i928 %r289, %r293
-%r295 = lshr i928 %r294, 32
-%r296 = trunc i928 %r295 to i896
-%r297 = trunc i896 %r296 to i32
-%r298 = mul i32 %r297, %r6
-%r299 = call i480 @mulPv448x32(i32* %r3, i32 %r298)
-%r300 = zext i480 %r299 to i896
-%r301 = add i896 %r296, %r300
-%r302 = lshr i896 %r301, 32
-%r303 = trunc i896 %r302 to i864
-%r304 = trunc i864 %r303 to i32
-%r305 = mul i32 %r304, %r6
-%r306 = call i480 @mulPv448x32(i32* %r3, i32 %r305)
-%r307 = zext i480 %r306 to i864
-%r308 = add i864 %r303, %r307
-%r309 = lshr i864 %r308, 32
-%r310 = trunc i864 %r309 to i832
-%r311 = trunc i832 %r310 to i32
-%r312 = mul i32 %r311, %r6
-%r313 = call i480 @mulPv448x32(i32* %r3, i32 %r312)
-%r314 = zext i480 %r313 to i832
-%r315 = add i832 %r310, %r314
-%r316 = lshr i832 %r315, 32
-%r317 = trunc i832 %r316 to i800
-%r318 = trunc i800 %r317 to i32
-%r319 = mul i32 %r318, %r6
-%r320 = call i480 @mulPv448x32(i32* %r3, i32 %r319)
-%r321 = zext i480 %r320 to i800
-%r322 = add i800 %r317, %r321
-%r323 = lshr i800 %r322, 32
-%r324 = trunc i800 %r323 to i768
-%r325 = trunc i768 %r324 to i32
-%r326 = mul i32 %r325, %r6
-%r327 = call i480 @mulPv448x32(i32* %r3, i32 %r326)
-%r328 = zext i480 %r327 to i768
-%r329 = add i768 %r324, %r328
-%r330 = lshr i768 %r329, 32
-%r331 = trunc i768 %r330 to i736
-%r332 = trunc i736 %r331 to i32
-%r333 = mul i32 %r332, %r6
-%r334 = call i480 @mulPv448x32(i32* %r3, i32 %r333)
-%r335 = zext i480 %r334 to i736
-%r336 = add i736 %r331, %r335
-%r337 = lshr i736 %r336, 32
-%r338 = trunc i736 %r337 to i704
-%r339 = trunc i704 %r338 to i32
-%r340 = mul i32 %r339, %r6
-%r341 = call i480 @mulPv448x32(i32* %r3, i32 %r340)
-%r342 = zext i480 %r341 to i704
-%r343 = add i704 %r338, %r342
-%r344 = lshr i704 %r343, 32
-%r345 = trunc i704 %r344 to i672
-%r346 = trunc i672 %r345 to i32
-%r347 = mul i32 %r346, %r6
-%r348 = call i480 @mulPv448x32(i32* %r3, i32 %r347)
-%r349 = zext i480 %r348 to i672
-%r350 = add i672 %r345, %r349
-%r351 = lshr i672 %r350, 32
-%r352 = trunc i672 %r351 to i640
-%r353 = trunc i640 %r352 to i32
-%r354 = mul i32 %r353, %r6
-%r355 = call i480 @mulPv448x32(i32* %r3, i32 %r354)
-%r356 = zext i480 %r355 to i640
-%r357 = add i640 %r352, %r356
-%r358 = lshr i640 %r357, 32
-%r359 = trunc i640 %r358 to i608
-%r360 = trunc i608 %r359 to i32
-%r361 = mul i32 %r360, %r6
-%r362 = call i480 @mulPv448x32(i32* %r3, i32 %r361)
-%r363 = zext i480 %r362 to i608
-%r364 = add i608 %r359, %r363
-%r365 = lshr i608 %r364, 32
-%r366 = trunc i608 %r365 to i576
-%r367 = trunc i576 %r366 to i32
-%r368 = mul i32 %r367, %r6
-%r369 = call i480 @mulPv448x32(i32* %r3, i32 %r368)
-%r370 = zext i480 %r369 to i576
-%r371 = add i576 %r366, %r370
-%r372 = lshr i576 %r371, 32
-%r373 = trunc i576 %r372 to i544
-%r374 = trunc i544 %r373 to i32
-%r375 = mul i32 %r374, %r6
-%r376 = call i480 @mulPv448x32(i32* %r3, i32 %r375)
-%r377 = zext i480 %r376 to i544
-%r378 = add i544 %r373, %r377
-%r379 = lshr i544 %r378, 32
-%r380 = trunc i544 %r379 to i512
-%r381 = trunc i512 %r380 to i32
-%r382 = mul i32 %r381, %r6
-%r383 = call i480 @mulPv448x32(i32* %r3, i32 %r382)
-%r384 = zext i480 %r383 to i512
-%r385 = add i512 %r380, %r384
-%r386 = lshr i512 %r385, 32
-%r387 = trunc i512 %r386 to i480
-%r388 = zext i448 %r98 to i480
-%r389 = sub i480 %r387, %r388
-%r390 = lshr i480 %r389, 448
-%r391 = trunc i480 %r390 to i1
-%r392 = select i1 %r391, i480 %r387, i480 %r389
-%r393 = trunc i480 %r392 to i448
-%r394 = trunc i448 %r393 to i32
-%r396 = getelementptr i32, i32* %r1, i32 0
-store i32 %r394, i32* %r396
-%r397 = lshr i448 %r393, 32
-%r398 = trunc i448 %r397 to i32
-%r400 = getelementptr i32, i32* %r1, i32 1
-store i32 %r398, i32* %r400
-%r401 = lshr i448 %r397, 32
-%r402 = trunc i448 %r401 to i32
-%r404 = getelementptr i32, i32* %r1, i32 2
-store i32 %r402, i32* %r404
-%r405 = lshr i448 %r401, 32
-%r406 = trunc i448 %r405 to i32
-%r408 = getelementptr i32, i32* %r1, i32 3
-store i32 %r406, i32* %r408
-%r409 = lshr i448 %r405, 32
-%r410 = trunc i448 %r409 to i32
-%r412 = getelementptr i32, i32* %r1, i32 4
-store i32 %r410, i32* %r412
-%r413 = lshr i448 %r409, 32
-%r414 = trunc i448 %r413 to i32
-%r416 = getelementptr i32, i32* %r1, i32 5
-store i32 %r414, i32* %r416
-%r417 = lshr i448 %r413, 32
-%r418 = trunc i448 %r417 to i32
-%r420 = getelementptr i32, i32* %r1, i32 6
-store i32 %r418, i32* %r420
-%r421 = lshr i448 %r417, 32
-%r422 = trunc i448 %r421 to i32
-%r424 = getelementptr i32, i32* %r1, i32 7
-store i32 %r422, i32* %r424
-%r425 = lshr i448 %r421, 32
-%r426 = trunc i448 %r425 to i32
-%r428 = getelementptr i32, i32* %r1, i32 8
-store i32 %r426, i32* %r428
-%r429 = lshr i448 %r425, 32
-%r430 = trunc i448 %r429 to i32
-%r432 = getelementptr i32, i32* %r1, i32 9
-store i32 %r430, i32* %r432
-%r433 = lshr i448 %r429, 32
-%r434 = trunc i448 %r433 to i32
-%r436 = getelementptr i32, i32* %r1, i32 10
-store i32 %r434, i32* %r436
-%r437 = lshr i448 %r433, 32
-%r438 = trunc i448 %r437 to i32
-%r440 = getelementptr i32, i32* %r1, i32 11
-store i32 %r438, i32* %r440
-%r441 = lshr i448 %r437, 32
-%r442 = trunc i448 %r441 to i32
-%r444 = getelementptr i32, i32* %r1, i32 12
-store i32 %r442, i32* %r444
-%r445 = lshr i448 %r441, 32
-%r446 = trunc i448 %r445 to i32
-%r448 = getelementptr i32, i32* %r1, i32 13
-store i32 %r446, i32* %r448
-ret void
-}
-define i32 @mcl_fp_addPre14L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r3, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r3, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r3, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r98 = load i32, i32* %r4
-%r99 = zext i32 %r98 to i64
-%r101 = getelementptr i32, i32* %r4, i32 1
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i64
-%r104 = shl i64 %r103, 32
-%r105 = or i64 %r99, %r104
-%r106 = zext i64 %r105 to i96
-%r108 = getelementptr i32, i32* %r4, i32 2
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i96
-%r111 = shl i96 %r110, 64
-%r112 = or i96 %r106, %r111
-%r113 = zext i96 %r112 to i128
-%r115 = getelementptr i32, i32* %r4, i32 3
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i128
-%r118 = shl i128 %r117, 96
-%r119 = or i128 %r113, %r118
-%r120 = zext i128 %r119 to i160
-%r122 = getelementptr i32, i32* %r4, i32 4
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i160
-%r125 = shl i160 %r124, 128
-%r126 = or i160 %r120, %r125
-%r127 = zext i160 %r126 to i192
-%r129 = getelementptr i32, i32* %r4, i32 5
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i192
-%r132 = shl i192 %r131, 160
-%r133 = or i192 %r127, %r132
-%r134 = zext i192 %r133 to i224
-%r136 = getelementptr i32, i32* %r4, i32 6
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i224
-%r139 = shl i224 %r138, 192
-%r140 = or i224 %r134, %r139
-%r141 = zext i224 %r140 to i256
-%r143 = getelementptr i32, i32* %r4, i32 7
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i256
-%r146 = shl i256 %r145, 224
-%r147 = or i256 %r141, %r146
-%r148 = zext i256 %r147 to i288
-%r150 = getelementptr i32, i32* %r4, i32 8
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i288
-%r153 = shl i288 %r152, 256
-%r154 = or i288 %r148, %r153
-%r155 = zext i288 %r154 to i320
-%r157 = getelementptr i32, i32* %r4, i32 9
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i320
-%r160 = shl i320 %r159, 288
-%r161 = or i320 %r155, %r160
-%r162 = zext i320 %r161 to i352
-%r164 = getelementptr i32, i32* %r4, i32 10
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i352
-%r167 = shl i352 %r166, 320
-%r168 = or i352 %r162, %r167
-%r169 = zext i352 %r168 to i384
-%r171 = getelementptr i32, i32* %r4, i32 11
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i384
-%r174 = shl i384 %r173, 352
-%r175 = or i384 %r169, %r174
-%r176 = zext i384 %r175 to i416
-%r178 = getelementptr i32, i32* %r4, i32 12
-%r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i416
-%r181 = shl i416 %r180, 384
-%r182 = or i416 %r176, %r181
-%r183 = zext i416 %r182 to i448
-%r185 = getelementptr i32, i32* %r4, i32 13
-%r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i448
-%r188 = shl i448 %r187, 416
-%r189 = or i448 %r183, %r188
-%r190 = zext i448 %r189 to i480
-%r191 = add i480 %r97, %r190
-%r192 = trunc i480 %r191 to i448
-%r193 = trunc i448 %r192 to i32
-%r195 = getelementptr i32, i32* %r2, i32 0
-store i32 %r193, i32* %r195
-%r196 = lshr i448 %r192, 32
-%r197 = trunc i448 %r196 to i32
-%r199 = getelementptr i32, i32* %r2, i32 1
-store i32 %r197, i32* %r199
-%r200 = lshr i448 %r196, 32
-%r201 = trunc i448 %r200 to i32
-%r203 = getelementptr i32, i32* %r2, i32 2
-store i32 %r201, i32* %r203
-%r204 = lshr i448 %r200, 32
-%r205 = trunc i448 %r204 to i32
-%r207 = getelementptr i32, i32* %r2, i32 3
-store i32 %r205, i32* %r207
-%r208 = lshr i448 %r204, 32
-%r209 = trunc i448 %r208 to i32
-%r211 = getelementptr i32, i32* %r2, i32 4
-store i32 %r209, i32* %r211
-%r212 = lshr i448 %r208, 32
-%r213 = trunc i448 %r212 to i32
-%r215 = getelementptr i32, i32* %r2, i32 5
-store i32 %r213, i32* %r215
-%r216 = lshr i448 %r212, 32
-%r217 = trunc i448 %r216 to i32
-%r219 = getelementptr i32, i32* %r2, i32 6
-store i32 %r217, i32* %r219
-%r220 = lshr i448 %r216, 32
-%r221 = trunc i448 %r220 to i32
-%r223 = getelementptr i32, i32* %r2, i32 7
-store i32 %r221, i32* %r223
-%r224 = lshr i448 %r220, 32
-%r225 = trunc i448 %r224 to i32
-%r227 = getelementptr i32, i32* %r2, i32 8
-store i32 %r225, i32* %r227
-%r228 = lshr i448 %r224, 32
-%r229 = trunc i448 %r228 to i32
-%r231 = getelementptr i32, i32* %r2, i32 9
-store i32 %r229, i32* %r231
-%r232 = lshr i448 %r228, 32
-%r233 = trunc i448 %r232 to i32
-%r235 = getelementptr i32, i32* %r2, i32 10
-store i32 %r233, i32* %r235
-%r236 = lshr i448 %r232, 32
-%r237 = trunc i448 %r236 to i32
-%r239 = getelementptr i32, i32* %r2, i32 11
-store i32 %r237, i32* %r239
-%r240 = lshr i448 %r236, 32
-%r241 = trunc i448 %r240 to i32
-%r243 = getelementptr i32, i32* %r2, i32 12
-store i32 %r241, i32* %r243
-%r244 = lshr i448 %r240, 32
-%r245 = trunc i448 %r244 to i32
-%r247 = getelementptr i32, i32* %r2, i32 13
-store i32 %r245, i32* %r247
-%r248 = lshr i480 %r191, 448
-%r249 = trunc i480 %r248 to i32
-ret i32 %r249
-}
-define i32 @mcl_fp_subPre14L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r3, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r3, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r3, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r98 = load i32, i32* %r4
-%r99 = zext i32 %r98 to i64
-%r101 = getelementptr i32, i32* %r4, i32 1
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i64
-%r104 = shl i64 %r103, 32
-%r105 = or i64 %r99, %r104
-%r106 = zext i64 %r105 to i96
-%r108 = getelementptr i32, i32* %r4, i32 2
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i96
-%r111 = shl i96 %r110, 64
-%r112 = or i96 %r106, %r111
-%r113 = zext i96 %r112 to i128
-%r115 = getelementptr i32, i32* %r4, i32 3
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i128
-%r118 = shl i128 %r117, 96
-%r119 = or i128 %r113, %r118
-%r120 = zext i128 %r119 to i160
-%r122 = getelementptr i32, i32* %r4, i32 4
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i160
-%r125 = shl i160 %r124, 128
-%r126 = or i160 %r120, %r125
-%r127 = zext i160 %r126 to i192
-%r129 = getelementptr i32, i32* %r4, i32 5
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i192
-%r132 = shl i192 %r131, 160
-%r133 = or i192 %r127, %r132
-%r134 = zext i192 %r133 to i224
-%r136 = getelementptr i32, i32* %r4, i32 6
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i224
-%r139 = shl i224 %r138, 192
-%r140 = or i224 %r134, %r139
-%r141 = zext i224 %r140 to i256
-%r143 = getelementptr i32, i32* %r4, i32 7
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i256
-%r146 = shl i256 %r145, 224
-%r147 = or i256 %r141, %r146
-%r148 = zext i256 %r147 to i288
-%r150 = getelementptr i32, i32* %r4, i32 8
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i288
-%r153 = shl i288 %r152, 256
-%r154 = or i288 %r148, %r153
-%r155 = zext i288 %r154 to i320
-%r157 = getelementptr i32, i32* %r4, i32 9
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i320
-%r160 = shl i320 %r159, 288
-%r161 = or i320 %r155, %r160
-%r162 = zext i320 %r161 to i352
-%r164 = getelementptr i32, i32* %r4, i32 10
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i352
-%r167 = shl i352 %r166, 320
-%r168 = or i352 %r162, %r167
-%r169 = zext i352 %r168 to i384
-%r171 = getelementptr i32, i32* %r4, i32 11
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i384
-%r174 = shl i384 %r173, 352
-%r175 = or i384 %r169, %r174
-%r176 = zext i384 %r175 to i416
-%r178 = getelementptr i32, i32* %r4, i32 12
-%r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i416
-%r181 = shl i416 %r180, 384
-%r182 = or i416 %r176, %r181
-%r183 = zext i416 %r182 to i448
-%r185 = getelementptr i32, i32* %r4, i32 13
-%r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i448
-%r188 = shl i448 %r187, 416
-%r189 = or i448 %r183, %r188
-%r190 = zext i448 %r189 to i480
-%r191 = sub i480 %r97, %r190
-%r192 = trunc i480 %r191 to i448
-%r193 = trunc i448 %r192 to i32
-%r195 = getelementptr i32, i32* %r2, i32 0
-store i32 %r193, i32* %r195
-%r196 = lshr i448 %r192, 32
-%r197 = trunc i448 %r196 to i32
-%r199 = getelementptr i32, i32* %r2, i32 1
-store i32 %r197, i32* %r199
-%r200 = lshr i448 %r196, 32
-%r201 = trunc i448 %r200 to i32
-%r203 = getelementptr i32, i32* %r2, i32 2
-store i32 %r201, i32* %r203
-%r204 = lshr i448 %r200, 32
-%r205 = trunc i448 %r204 to i32
-%r207 = getelementptr i32, i32* %r2, i32 3
-store i32 %r205, i32* %r207
-%r208 = lshr i448 %r204, 32
-%r209 = trunc i448 %r208 to i32
-%r211 = getelementptr i32, i32* %r2, i32 4
-store i32 %r209, i32* %r211
-%r212 = lshr i448 %r208, 32
-%r213 = trunc i448 %r212 to i32
-%r215 = getelementptr i32, i32* %r2, i32 5
-store i32 %r213, i32* %r215
-%r216 = lshr i448 %r212, 32
-%r217 = trunc i448 %r216 to i32
-%r219 = getelementptr i32, i32* %r2, i32 6
-store i32 %r217, i32* %r219
-%r220 = lshr i448 %r216, 32
-%r221 = trunc i448 %r220 to i32
-%r223 = getelementptr i32, i32* %r2, i32 7
-store i32 %r221, i32* %r223
-%r224 = lshr i448 %r220, 32
-%r225 = trunc i448 %r224 to i32
-%r227 = getelementptr i32, i32* %r2, i32 8
-store i32 %r225, i32* %r227
-%r228 = lshr i448 %r224, 32
-%r229 = trunc i448 %r228 to i32
-%r231 = getelementptr i32, i32* %r2, i32 9
-store i32 %r229, i32* %r231
-%r232 = lshr i448 %r228, 32
-%r233 = trunc i448 %r232 to i32
-%r235 = getelementptr i32, i32* %r2, i32 10
-store i32 %r233, i32* %r235
-%r236 = lshr i448 %r232, 32
-%r237 = trunc i448 %r236 to i32
-%r239 = getelementptr i32, i32* %r2, i32 11
-store i32 %r237, i32* %r239
-%r240 = lshr i448 %r236, 32
-%r241 = trunc i448 %r240 to i32
-%r243 = getelementptr i32, i32* %r2, i32 12
-store i32 %r241, i32* %r243
-%r244 = lshr i448 %r240, 32
-%r245 = trunc i448 %r244 to i32
-%r247 = getelementptr i32, i32* %r2, i32 13
-store i32 %r245, i32* %r247
-%r248 = lshr i480 %r191, 448
-%r249 = trunc i480 %r248 to i32
-%r251 = and i32 %r249, 1
-ret i32 %r251
-}
-define void @mcl_fp_shr1_14L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = zext i160 %r31 to i192
-%r34 = getelementptr i32, i32* %r2, i32 5
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i192
-%r37 = shl i192 %r36, 160
-%r38 = or i192 %r32, %r37
-%r39 = zext i192 %r38 to i224
-%r41 = getelementptr i32, i32* %r2, i32 6
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i224
-%r44 = shl i224 %r43, 192
-%r45 = or i224 %r39, %r44
-%r46 = zext i224 %r45 to i256
-%r48 = getelementptr i32, i32* %r2, i32 7
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i256
-%r51 = shl i256 %r50, 224
-%r52 = or i256 %r46, %r51
-%r53 = zext i256 %r52 to i288
-%r55 = getelementptr i32, i32* %r2, i32 8
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i288
-%r58 = shl i288 %r57, 256
-%r59 = or i288 %r53, %r58
-%r60 = zext i288 %r59 to i320
-%r62 = getelementptr i32, i32* %r2, i32 9
-%r63 = load i32, i32* %r62
-%r64 = zext i32 %r63 to i320
-%r65 = shl i320 %r64, 288
-%r66 = or i320 %r60, %r65
-%r67 = zext i320 %r66 to i352
-%r69 = getelementptr i32, i32* %r2, i32 10
-%r70 = load i32, i32* %r69
-%r71 = zext i32 %r70 to i352
-%r72 = shl i352 %r71, 320
-%r73 = or i352 %r67, %r72
-%r74 = zext i352 %r73 to i384
-%r76 = getelementptr i32, i32* %r2, i32 11
-%r77 = load i32, i32* %r76
-%r78 = zext i32 %r77 to i384
-%r79 = shl i384 %r78, 352
-%r80 = or i384 %r74, %r79
-%r81 = zext i384 %r80 to i416
-%r83 = getelementptr i32, i32* %r2, i32 12
-%r84 = load i32, i32* %r83
-%r85 = zext i32 %r84 to i416
-%r86 = shl i416 %r85, 384
-%r87 = or i416 %r81, %r86
-%r88 = zext i416 %r87 to i448
-%r90 = getelementptr i32, i32* %r2, i32 13
-%r91 = load i32, i32* %r90
-%r92 = zext i32 %r91 to i448
-%r93 = shl i448 %r92, 416
-%r94 = or i448 %r88, %r93
-%r95 = lshr i448 %r94, 1
-%r96 = trunc i448 %r95 to i32
-%r98 = getelementptr i32, i32* %r1, i32 0
-store i32 %r96, i32* %r98
-%r99 = lshr i448 %r95, 32
-%r100 = trunc i448 %r99 to i32
-%r102 = getelementptr i32, i32* %r1, i32 1
-store i32 %r100, i32* %r102
-%r103 = lshr i448 %r99, 32
-%r104 = trunc i448 %r103 to i32
-%r106 = getelementptr i32, i32* %r1, i32 2
-store i32 %r104, i32* %r106
-%r107 = lshr i448 %r103, 32
-%r108 = trunc i448 %r107 to i32
-%r110 = getelementptr i32, i32* %r1, i32 3
-store i32 %r108, i32* %r110
-%r111 = lshr i448 %r107, 32
-%r112 = trunc i448 %r111 to i32
-%r114 = getelementptr i32, i32* %r1, i32 4
-store i32 %r112, i32* %r114
-%r115 = lshr i448 %r111, 32
-%r116 = trunc i448 %r115 to i32
-%r118 = getelementptr i32, i32* %r1, i32 5
-store i32 %r116, i32* %r118
-%r119 = lshr i448 %r115, 32
-%r120 = trunc i448 %r119 to i32
-%r122 = getelementptr i32, i32* %r1, i32 6
-store i32 %r120, i32* %r122
-%r123 = lshr i448 %r119, 32
-%r124 = trunc i448 %r123 to i32
-%r126 = getelementptr i32, i32* %r1, i32 7
-store i32 %r124, i32* %r126
-%r127 = lshr i448 %r123, 32
-%r128 = trunc i448 %r127 to i32
-%r130 = getelementptr i32, i32* %r1, i32 8
-store i32 %r128, i32* %r130
-%r131 = lshr i448 %r127, 32
-%r132 = trunc i448 %r131 to i32
-%r134 = getelementptr i32, i32* %r1, i32 9
-store i32 %r132, i32* %r134
-%r135 = lshr i448 %r131, 32
-%r136 = trunc i448 %r135 to i32
-%r138 = getelementptr i32, i32* %r1, i32 10
-store i32 %r136, i32* %r138
-%r139 = lshr i448 %r135, 32
-%r140 = trunc i448 %r139 to i32
-%r142 = getelementptr i32, i32* %r1, i32 11
-store i32 %r140, i32* %r142
-%r143 = lshr i448 %r139, 32
-%r144 = trunc i448 %r143 to i32
-%r146 = getelementptr i32, i32* %r1, i32 12
-store i32 %r144, i32* %r146
-%r147 = lshr i448 %r143, 32
-%r148 = trunc i448 %r147 to i32
-%r150 = getelementptr i32, i32* %r1, i32 13
-store i32 %r148, i32* %r150
-ret void
-}
-define void @mcl_fp_add14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = load i32, i32* %r3
-%r98 = zext i32 %r97 to i64
-%r100 = getelementptr i32, i32* %r3, i32 1
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i64
-%r103 = shl i64 %r102, 32
-%r104 = or i64 %r98, %r103
-%r105 = zext i64 %r104 to i96
-%r107 = getelementptr i32, i32* %r3, i32 2
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i96
-%r110 = shl i96 %r109, 64
-%r111 = or i96 %r105, %r110
-%r112 = zext i96 %r111 to i128
-%r114 = getelementptr i32, i32* %r3, i32 3
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i128
-%r117 = shl i128 %r116, 96
-%r118 = or i128 %r112, %r117
-%r119 = zext i128 %r118 to i160
-%r121 = getelementptr i32, i32* %r3, i32 4
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i160
-%r124 = shl i160 %r123, 128
-%r125 = or i160 %r119, %r124
-%r126 = zext i160 %r125 to i192
-%r128 = getelementptr i32, i32* %r3, i32 5
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i192
-%r131 = shl i192 %r130, 160
-%r132 = or i192 %r126, %r131
-%r133 = zext i192 %r132 to i224
-%r135 = getelementptr i32, i32* %r3, i32 6
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i224
-%r138 = shl i224 %r137, 192
-%r139 = or i224 %r133, %r138
-%r140 = zext i224 %r139 to i256
-%r142 = getelementptr i32, i32* %r3, i32 7
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i256
-%r145 = shl i256 %r144, 224
-%r146 = or i256 %r140, %r145
-%r147 = zext i256 %r146 to i288
-%r149 = getelementptr i32, i32* %r3, i32 8
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i288
-%r152 = shl i288 %r151, 256
-%r153 = or i288 %r147, %r152
-%r154 = zext i288 %r153 to i320
-%r156 = getelementptr i32, i32* %r3, i32 9
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i320
-%r159 = shl i320 %r158, 288
-%r160 = or i320 %r154, %r159
-%r161 = zext i320 %r160 to i352
-%r163 = getelementptr i32, i32* %r3, i32 10
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i352
-%r166 = shl i352 %r165, 320
-%r167 = or i352 %r161, %r166
-%r168 = zext i352 %r167 to i384
-%r170 = getelementptr i32, i32* %r3, i32 11
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i384
-%r173 = shl i384 %r172, 352
-%r174 = or i384 %r168, %r173
-%r175 = zext i384 %r174 to i416
-%r177 = getelementptr i32, i32* %r3, i32 12
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i416
-%r180 = shl i416 %r179, 384
-%r181 = or i416 %r175, %r180
-%r182 = zext i416 %r181 to i448
-%r184 = getelementptr i32, i32* %r3, i32 13
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i448
-%r187 = shl i448 %r186, 416
-%r188 = or i448 %r182, %r187
-%r189 = zext i448 %r96 to i480
-%r190 = zext i448 %r188 to i480
-%r191 = add i480 %r189, %r190
-%r192 = trunc i480 %r191 to i448
-%r193 = trunc i448 %r192 to i32
-%r195 = getelementptr i32, i32* %r1, i32 0
-store i32 %r193, i32* %r195
-%r196 = lshr i448 %r192, 32
-%r197 = trunc i448 %r196 to i32
-%r199 = getelementptr i32, i32* %r1, i32 1
-store i32 %r197, i32* %r199
-%r200 = lshr i448 %r196, 32
-%r201 = trunc i448 %r200 to i32
-%r203 = getelementptr i32, i32* %r1, i32 2
-store i32 %r201, i32* %r203
-%r204 = lshr i448 %r200, 32
-%r205 = trunc i448 %r204 to i32
-%r207 = getelementptr i32, i32* %r1, i32 3
-store i32 %r205, i32* %r207
-%r208 = lshr i448 %r204, 32
-%r209 = trunc i448 %r208 to i32
-%r211 = getelementptr i32, i32* %r1, i32 4
-store i32 %r209, i32* %r211
-%r212 = lshr i448 %r208, 32
-%r213 = trunc i448 %r212 to i32
-%r215 = getelementptr i32, i32* %r1, i32 5
-store i32 %r213, i32* %r215
-%r216 = lshr i448 %r212, 32
-%r217 = trunc i448 %r216 to i32
-%r219 = getelementptr i32, i32* %r1, i32 6
-store i32 %r217, i32* %r219
-%r220 = lshr i448 %r216, 32
-%r221 = trunc i448 %r220 to i32
-%r223 = getelementptr i32, i32* %r1, i32 7
-store i32 %r221, i32* %r223
-%r224 = lshr i448 %r220, 32
-%r225 = trunc i448 %r224 to i32
-%r227 = getelementptr i32, i32* %r1, i32 8
-store i32 %r225, i32* %r227
-%r228 = lshr i448 %r224, 32
-%r229 = trunc i448 %r228 to i32
-%r231 = getelementptr i32, i32* %r1, i32 9
-store i32 %r229, i32* %r231
-%r232 = lshr i448 %r228, 32
-%r233 = trunc i448 %r232 to i32
-%r235 = getelementptr i32, i32* %r1, i32 10
-store i32 %r233, i32* %r235
-%r236 = lshr i448 %r232, 32
-%r237 = trunc i448 %r236 to i32
-%r239 = getelementptr i32, i32* %r1, i32 11
-store i32 %r237, i32* %r239
-%r240 = lshr i448 %r236, 32
-%r241 = trunc i448 %r240 to i32
-%r243 = getelementptr i32, i32* %r1, i32 12
-store i32 %r241, i32* %r243
-%r244 = lshr i448 %r240, 32
-%r245 = trunc i448 %r244 to i32
-%r247 = getelementptr i32, i32* %r1, i32 13
-store i32 %r245, i32* %r247
-%r248 = load i32, i32* %r4
-%r249 = zext i32 %r248 to i64
-%r251 = getelementptr i32, i32* %r4, i32 1
-%r252 = load i32, i32* %r251
-%r253 = zext i32 %r252 to i64
-%r254 = shl i64 %r253, 32
-%r255 = or i64 %r249, %r254
-%r256 = zext i64 %r255 to i96
-%r258 = getelementptr i32, i32* %r4, i32 2
-%r259 = load i32, i32* %r258
-%r260 = zext i32 %r259 to i96
-%r261 = shl i96 %r260, 64
-%r262 = or i96 %r256, %r261
-%r263 = zext i96 %r262 to i128
-%r265 = getelementptr i32, i32* %r4, i32 3
-%r266 = load i32, i32* %r265
-%r267 = zext i32 %r266 to i128
-%r268 = shl i128 %r267, 96
-%r269 = or i128 %r263, %r268
-%r270 = zext i128 %r269 to i160
-%r272 = getelementptr i32, i32* %r4, i32 4
-%r273 = load i32, i32* %r272
-%r274 = zext i32 %r273 to i160
-%r275 = shl i160 %r274, 128
-%r276 = or i160 %r270, %r275
-%r277 = zext i160 %r276 to i192
-%r279 = getelementptr i32, i32* %r4, i32 5
-%r280 = load i32, i32* %r279
-%r281 = zext i32 %r280 to i192
-%r282 = shl i192 %r281, 160
-%r283 = or i192 %r277, %r282
-%r284 = zext i192 %r283 to i224
-%r286 = getelementptr i32, i32* %r4, i32 6
-%r287 = load i32, i32* %r286
-%r288 = zext i32 %r287 to i224
-%r289 = shl i224 %r288, 192
-%r290 = or i224 %r284, %r289
-%r291 = zext i224 %r290 to i256
-%r293 = getelementptr i32, i32* %r4, i32 7
-%r294 = load i32, i32* %r293
-%r295 = zext i32 %r294 to i256
-%r296 = shl i256 %r295, 224
-%r297 = or i256 %r291, %r296
-%r298 = zext i256 %r297 to i288
-%r300 = getelementptr i32, i32* %r4, i32 8
-%r301 = load i32, i32* %r300
-%r302 = zext i32 %r301 to i288
-%r303 = shl i288 %r302, 256
-%r304 = or i288 %r298, %r303
-%r305 = zext i288 %r304 to i320
-%r307 = getelementptr i32, i32* %r4, i32 9
-%r308 = load i32, i32* %r307
-%r309 = zext i32 %r308 to i320
-%r310 = shl i320 %r309, 288
-%r311 = or i320 %r305, %r310
-%r312 = zext i320 %r311 to i352
-%r314 = getelementptr i32, i32* %r4, i32 10
-%r315 = load i32, i32* %r314
-%r316 = zext i32 %r315 to i352
-%r317 = shl i352 %r316, 320
-%r318 = or i352 %r312, %r317
-%r319 = zext i352 %r318 to i384
-%r321 = getelementptr i32, i32* %r4, i32 11
-%r322 = load i32, i32* %r321
-%r323 = zext i32 %r322 to i384
-%r324 = shl i384 %r323, 352
-%r325 = or i384 %r319, %r324
-%r326 = zext i384 %r325 to i416
-%r328 = getelementptr i32, i32* %r4, i32 12
-%r329 = load i32, i32* %r328
-%r330 = zext i32 %r329 to i416
-%r331 = shl i416 %r330, 384
-%r332 = or i416 %r326, %r331
-%r333 = zext i416 %r332 to i448
-%r335 = getelementptr i32, i32* %r4, i32 13
-%r336 = load i32, i32* %r335
-%r337 = zext i32 %r336 to i448
-%r338 = shl i448 %r337, 416
-%r339 = or i448 %r333, %r338
-%r340 = zext i448 %r339 to i480
-%r341 = sub i480 %r191, %r340
-%r342 = lshr i480 %r341, 448
-%r343 = trunc i480 %r342 to i1
-br i1%r343, label %carry, label %nocarry
-nocarry:
-%r344 = trunc i480 %r341 to i448
-%r345 = trunc i448 %r344 to i32
-%r347 = getelementptr i32, i32* %r1, i32 0
-store i32 %r345, i32* %r347
-%r348 = lshr i448 %r344, 32
-%r349 = trunc i448 %r348 to i32
-%r351 = getelementptr i32, i32* %r1, i32 1
-store i32 %r349, i32* %r351
-%r352 = lshr i448 %r348, 32
-%r353 = trunc i448 %r352 to i32
-%r355 = getelementptr i32, i32* %r1, i32 2
-store i32 %r353, i32* %r355
-%r356 = lshr i448 %r352, 32
-%r357 = trunc i448 %r356 to i32
-%r359 = getelementptr i32, i32* %r1, i32 3
-store i32 %r357, i32* %r359
-%r360 = lshr i448 %r356, 32
-%r361 = trunc i448 %r360 to i32
-%r363 = getelementptr i32, i32* %r1, i32 4
-store i32 %r361, i32* %r363
-%r364 = lshr i448 %r360, 32
-%r365 = trunc i448 %r364 to i32
-%r367 = getelementptr i32, i32* %r1, i32 5
-store i32 %r365, i32* %r367
-%r368 = lshr i448 %r364, 32
-%r369 = trunc i448 %r368 to i32
-%r371 = getelementptr i32, i32* %r1, i32 6
-store i32 %r369, i32* %r371
-%r372 = lshr i448 %r368, 32
-%r373 = trunc i448 %r372 to i32
-%r375 = getelementptr i32, i32* %r1, i32 7
-store i32 %r373, i32* %r375
-%r376 = lshr i448 %r372, 32
-%r377 = trunc i448 %r376 to i32
-%r379 = getelementptr i32, i32* %r1, i32 8
-store i32 %r377, i32* %r379
-%r380 = lshr i448 %r376, 32
-%r381 = trunc i448 %r380 to i32
-%r383 = getelementptr i32, i32* %r1, i32 9
-store i32 %r381, i32* %r383
-%r384 = lshr i448 %r380, 32
-%r385 = trunc i448 %r384 to i32
-%r387 = getelementptr i32, i32* %r1, i32 10
-store i32 %r385, i32* %r387
-%r388 = lshr i448 %r384, 32
-%r389 = trunc i448 %r388 to i32
-%r391 = getelementptr i32, i32* %r1, i32 11
-store i32 %r389, i32* %r391
-%r392 = lshr i448 %r388, 32
-%r393 = trunc i448 %r392 to i32
-%r395 = getelementptr i32, i32* %r1, i32 12
-store i32 %r393, i32* %r395
-%r396 = lshr i448 %r392, 32
-%r397 = trunc i448 %r396 to i32
-%r399 = getelementptr i32, i32* %r1, i32 13
-store i32 %r397, i32* %r399
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = load i32, i32* %r3
-%r98 = zext i32 %r97 to i64
-%r100 = getelementptr i32, i32* %r3, i32 1
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i64
-%r103 = shl i64 %r102, 32
-%r104 = or i64 %r98, %r103
-%r105 = zext i64 %r104 to i96
-%r107 = getelementptr i32, i32* %r3, i32 2
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i96
-%r110 = shl i96 %r109, 64
-%r111 = or i96 %r105, %r110
-%r112 = zext i96 %r111 to i128
-%r114 = getelementptr i32, i32* %r3, i32 3
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i128
-%r117 = shl i128 %r116, 96
-%r118 = or i128 %r112, %r117
-%r119 = zext i128 %r118 to i160
-%r121 = getelementptr i32, i32* %r3, i32 4
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i160
-%r124 = shl i160 %r123, 128
-%r125 = or i160 %r119, %r124
-%r126 = zext i160 %r125 to i192
-%r128 = getelementptr i32, i32* %r3, i32 5
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i192
-%r131 = shl i192 %r130, 160
-%r132 = or i192 %r126, %r131
-%r133 = zext i192 %r132 to i224
-%r135 = getelementptr i32, i32* %r3, i32 6
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i224
-%r138 = shl i224 %r137, 192
-%r139 = or i224 %r133, %r138
-%r140 = zext i224 %r139 to i256
-%r142 = getelementptr i32, i32* %r3, i32 7
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i256
-%r145 = shl i256 %r144, 224
-%r146 = or i256 %r140, %r145
-%r147 = zext i256 %r146 to i288
-%r149 = getelementptr i32, i32* %r3, i32 8
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i288
-%r152 = shl i288 %r151, 256
-%r153 = or i288 %r147, %r152
-%r154 = zext i288 %r153 to i320
-%r156 = getelementptr i32, i32* %r3, i32 9
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i320
-%r159 = shl i320 %r158, 288
-%r160 = or i320 %r154, %r159
-%r161 = zext i320 %r160 to i352
-%r163 = getelementptr i32, i32* %r3, i32 10
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i352
-%r166 = shl i352 %r165, 320
-%r167 = or i352 %r161, %r166
-%r168 = zext i352 %r167 to i384
-%r170 = getelementptr i32, i32* %r3, i32 11
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i384
-%r173 = shl i384 %r172, 352
-%r174 = or i384 %r168, %r173
-%r175 = zext i384 %r174 to i416
-%r177 = getelementptr i32, i32* %r3, i32 12
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i416
-%r180 = shl i416 %r179, 384
-%r181 = or i416 %r175, %r180
-%r182 = zext i416 %r181 to i448
-%r184 = getelementptr i32, i32* %r3, i32 13
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i448
-%r187 = shl i448 %r186, 416
-%r188 = or i448 %r182, %r187
-%r189 = add i448 %r96, %r188
-%r190 = load i32, i32* %r4
-%r191 = zext i32 %r190 to i64
-%r193 = getelementptr i32, i32* %r4, i32 1
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i64
-%r196 = shl i64 %r195, 32
-%r197 = or i64 %r191, %r196
-%r198 = zext i64 %r197 to i96
-%r200 = getelementptr i32, i32* %r4, i32 2
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i96
-%r203 = shl i96 %r202, 64
-%r204 = or i96 %r198, %r203
-%r205 = zext i96 %r204 to i128
-%r207 = getelementptr i32, i32* %r4, i32 3
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i128
-%r210 = shl i128 %r209, 96
-%r211 = or i128 %r205, %r210
-%r212 = zext i128 %r211 to i160
-%r214 = getelementptr i32, i32* %r4, i32 4
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i160
-%r217 = shl i160 %r216, 128
-%r218 = or i160 %r212, %r217
-%r219 = zext i160 %r218 to i192
-%r221 = getelementptr i32, i32* %r4, i32 5
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i192
-%r224 = shl i192 %r223, 160
-%r225 = or i192 %r219, %r224
-%r226 = zext i192 %r225 to i224
-%r228 = getelementptr i32, i32* %r4, i32 6
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i224
-%r231 = shl i224 %r230, 192
-%r232 = or i224 %r226, %r231
-%r233 = zext i224 %r232 to i256
-%r235 = getelementptr i32, i32* %r4, i32 7
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i256
-%r238 = shl i256 %r237, 224
-%r239 = or i256 %r233, %r238
-%r240 = zext i256 %r239 to i288
-%r242 = getelementptr i32, i32* %r4, i32 8
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i288
-%r245 = shl i288 %r244, 256
-%r246 = or i288 %r240, %r245
-%r247 = zext i288 %r246 to i320
-%r249 = getelementptr i32, i32* %r4, i32 9
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i320
-%r252 = shl i320 %r251, 288
-%r253 = or i320 %r247, %r252
-%r254 = zext i320 %r253 to i352
-%r256 = getelementptr i32, i32* %r4, i32 10
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i352
-%r259 = shl i352 %r258, 320
-%r260 = or i352 %r254, %r259
-%r261 = zext i352 %r260 to i384
-%r263 = getelementptr i32, i32* %r4, i32 11
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i384
-%r266 = shl i384 %r265, 352
-%r267 = or i384 %r261, %r266
-%r268 = zext i384 %r267 to i416
-%r270 = getelementptr i32, i32* %r4, i32 12
-%r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i416
-%r273 = shl i416 %r272, 384
-%r274 = or i416 %r268, %r273
-%r275 = zext i416 %r274 to i448
-%r277 = getelementptr i32, i32* %r4, i32 13
-%r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i448
-%r280 = shl i448 %r279, 416
-%r281 = or i448 %r275, %r280
-%r282 = sub i448 %r189, %r281
-%r283 = lshr i448 %r282, 447
-%r284 = trunc i448 %r283 to i1
-%r285 = select i1 %r284, i448 %r189, i448 %r282
-%r286 = trunc i448 %r285 to i32
-%r288 = getelementptr i32, i32* %r1, i32 0
-store i32 %r286, i32* %r288
-%r289 = lshr i448 %r285, 32
-%r290 = trunc i448 %r289 to i32
-%r292 = getelementptr i32, i32* %r1, i32 1
-store i32 %r290, i32* %r292
-%r293 = lshr i448 %r289, 32
-%r294 = trunc i448 %r293 to i32
-%r296 = getelementptr i32, i32* %r1, i32 2
-store i32 %r294, i32* %r296
-%r297 = lshr i448 %r293, 32
-%r298 = trunc i448 %r297 to i32
-%r300 = getelementptr i32, i32* %r1, i32 3
-store i32 %r298, i32* %r300
-%r301 = lshr i448 %r297, 32
-%r302 = trunc i448 %r301 to i32
-%r304 = getelementptr i32, i32* %r1, i32 4
-store i32 %r302, i32* %r304
-%r305 = lshr i448 %r301, 32
-%r306 = trunc i448 %r305 to i32
-%r308 = getelementptr i32, i32* %r1, i32 5
-store i32 %r306, i32* %r308
-%r309 = lshr i448 %r305, 32
-%r310 = trunc i448 %r309 to i32
-%r312 = getelementptr i32, i32* %r1, i32 6
-store i32 %r310, i32* %r312
-%r313 = lshr i448 %r309, 32
-%r314 = trunc i448 %r313 to i32
-%r316 = getelementptr i32, i32* %r1, i32 7
-store i32 %r314, i32* %r316
-%r317 = lshr i448 %r313, 32
-%r318 = trunc i448 %r317 to i32
-%r320 = getelementptr i32, i32* %r1, i32 8
-store i32 %r318, i32* %r320
-%r321 = lshr i448 %r317, 32
-%r322 = trunc i448 %r321 to i32
-%r324 = getelementptr i32, i32* %r1, i32 9
-store i32 %r322, i32* %r324
-%r325 = lshr i448 %r321, 32
-%r326 = trunc i448 %r325 to i32
-%r328 = getelementptr i32, i32* %r1, i32 10
-store i32 %r326, i32* %r328
-%r329 = lshr i448 %r325, 32
-%r330 = trunc i448 %r329 to i32
-%r332 = getelementptr i32, i32* %r1, i32 11
-store i32 %r330, i32* %r332
-%r333 = lshr i448 %r329, 32
-%r334 = trunc i448 %r333 to i32
-%r336 = getelementptr i32, i32* %r1, i32 12
-store i32 %r334, i32* %r336
-%r337 = lshr i448 %r333, 32
-%r338 = trunc i448 %r337 to i32
-%r340 = getelementptr i32, i32* %r1, i32 13
-store i32 %r338, i32* %r340
-ret void
-}
-define void @mcl_fp_sub14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = load i32, i32* %r3
-%r98 = zext i32 %r97 to i64
-%r100 = getelementptr i32, i32* %r3, i32 1
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i64
-%r103 = shl i64 %r102, 32
-%r104 = or i64 %r98, %r103
-%r105 = zext i64 %r104 to i96
-%r107 = getelementptr i32, i32* %r3, i32 2
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i96
-%r110 = shl i96 %r109, 64
-%r111 = or i96 %r105, %r110
-%r112 = zext i96 %r111 to i128
-%r114 = getelementptr i32, i32* %r3, i32 3
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i128
-%r117 = shl i128 %r116, 96
-%r118 = or i128 %r112, %r117
-%r119 = zext i128 %r118 to i160
-%r121 = getelementptr i32, i32* %r3, i32 4
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i160
-%r124 = shl i160 %r123, 128
-%r125 = or i160 %r119, %r124
-%r126 = zext i160 %r125 to i192
-%r128 = getelementptr i32, i32* %r3, i32 5
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i192
-%r131 = shl i192 %r130, 160
-%r132 = or i192 %r126, %r131
-%r133 = zext i192 %r132 to i224
-%r135 = getelementptr i32, i32* %r3, i32 6
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i224
-%r138 = shl i224 %r137, 192
-%r139 = or i224 %r133, %r138
-%r140 = zext i224 %r139 to i256
-%r142 = getelementptr i32, i32* %r3, i32 7
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i256
-%r145 = shl i256 %r144, 224
-%r146 = or i256 %r140, %r145
-%r147 = zext i256 %r146 to i288
-%r149 = getelementptr i32, i32* %r3, i32 8
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i288
-%r152 = shl i288 %r151, 256
-%r153 = or i288 %r147, %r152
-%r154 = zext i288 %r153 to i320
-%r156 = getelementptr i32, i32* %r3, i32 9
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i320
-%r159 = shl i320 %r158, 288
-%r160 = or i320 %r154, %r159
-%r161 = zext i320 %r160 to i352
-%r163 = getelementptr i32, i32* %r3, i32 10
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i352
-%r166 = shl i352 %r165, 320
-%r167 = or i352 %r161, %r166
-%r168 = zext i352 %r167 to i384
-%r170 = getelementptr i32, i32* %r3, i32 11
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i384
-%r173 = shl i384 %r172, 352
-%r174 = or i384 %r168, %r173
-%r175 = zext i384 %r174 to i416
-%r177 = getelementptr i32, i32* %r3, i32 12
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i416
-%r180 = shl i416 %r179, 384
-%r181 = or i416 %r175, %r180
-%r182 = zext i416 %r181 to i448
-%r184 = getelementptr i32, i32* %r3, i32 13
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i448
-%r187 = shl i448 %r186, 416
-%r188 = or i448 %r182, %r187
-%r189 = zext i448 %r96 to i480
-%r190 = zext i448 %r188 to i480
-%r191 = sub i480 %r189, %r190
-%r192 = trunc i480 %r191 to i448
-%r193 = lshr i480 %r191, 448
-%r194 = trunc i480 %r193 to i1
-%r195 = trunc i448 %r192 to i32
-%r197 = getelementptr i32, i32* %r1, i32 0
-store i32 %r195, i32* %r197
-%r198 = lshr i448 %r192, 32
-%r199 = trunc i448 %r198 to i32
-%r201 = getelementptr i32, i32* %r1, i32 1
-store i32 %r199, i32* %r201
-%r202 = lshr i448 %r198, 32
-%r203 = trunc i448 %r202 to i32
-%r205 = getelementptr i32, i32* %r1, i32 2
-store i32 %r203, i32* %r205
-%r206 = lshr i448 %r202, 32
-%r207 = trunc i448 %r206 to i32
-%r209 = getelementptr i32, i32* %r1, i32 3
-store i32 %r207, i32* %r209
-%r210 = lshr i448 %r206, 32
-%r211 = trunc i448 %r210 to i32
-%r213 = getelementptr i32, i32* %r1, i32 4
-store i32 %r211, i32* %r213
-%r214 = lshr i448 %r210, 32
-%r215 = trunc i448 %r214 to i32
-%r217 = getelementptr i32, i32* %r1, i32 5
-store i32 %r215, i32* %r217
-%r218 = lshr i448 %r214, 32
-%r219 = trunc i448 %r218 to i32
-%r221 = getelementptr i32, i32* %r1, i32 6
-store i32 %r219, i32* %r221
-%r222 = lshr i448 %r218, 32
-%r223 = trunc i448 %r222 to i32
-%r225 = getelementptr i32, i32* %r1, i32 7
-store i32 %r223, i32* %r225
-%r226 = lshr i448 %r222, 32
-%r227 = trunc i448 %r226 to i32
-%r229 = getelementptr i32, i32* %r1, i32 8
-store i32 %r227, i32* %r229
-%r230 = lshr i448 %r226, 32
-%r231 = trunc i448 %r230 to i32
-%r233 = getelementptr i32, i32* %r1, i32 9
-store i32 %r231, i32* %r233
-%r234 = lshr i448 %r230, 32
-%r235 = trunc i448 %r234 to i32
-%r237 = getelementptr i32, i32* %r1, i32 10
-store i32 %r235, i32* %r237
-%r238 = lshr i448 %r234, 32
-%r239 = trunc i448 %r238 to i32
-%r241 = getelementptr i32, i32* %r1, i32 11
-store i32 %r239, i32* %r241
-%r242 = lshr i448 %r238, 32
-%r243 = trunc i448 %r242 to i32
-%r245 = getelementptr i32, i32* %r1, i32 12
-store i32 %r243, i32* %r245
-%r246 = lshr i448 %r242, 32
-%r247 = trunc i448 %r246 to i32
-%r249 = getelementptr i32, i32* %r1, i32 13
-store i32 %r247, i32* %r249
-br i1%r194, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r250 = load i32, i32* %r4
-%r251 = zext i32 %r250 to i64
-%r253 = getelementptr i32, i32* %r4, i32 1
-%r254 = load i32, i32* %r253
-%r255 = zext i32 %r254 to i64
-%r256 = shl i64 %r255, 32
-%r257 = or i64 %r251, %r256
-%r258 = zext i64 %r257 to i96
-%r260 = getelementptr i32, i32* %r4, i32 2
-%r261 = load i32, i32* %r260
-%r262 = zext i32 %r261 to i96
-%r263 = shl i96 %r262, 64
-%r264 = or i96 %r258, %r263
-%r265 = zext i96 %r264 to i128
-%r267 = getelementptr i32, i32* %r4, i32 3
-%r268 = load i32, i32* %r267
-%r269 = zext i32 %r268 to i128
-%r270 = shl i128 %r269, 96
-%r271 = or i128 %r265, %r270
-%r272 = zext i128 %r271 to i160
-%r274 = getelementptr i32, i32* %r4, i32 4
-%r275 = load i32, i32* %r274
-%r276 = zext i32 %r275 to i160
-%r277 = shl i160 %r276, 128
-%r278 = or i160 %r272, %r277
-%r279 = zext i160 %r278 to i192
-%r281 = getelementptr i32, i32* %r4, i32 5
-%r282 = load i32, i32* %r281
-%r283 = zext i32 %r282 to i192
-%r284 = shl i192 %r283, 160
-%r285 = or i192 %r279, %r284
-%r286 = zext i192 %r285 to i224
-%r288 = getelementptr i32, i32* %r4, i32 6
-%r289 = load i32, i32* %r288
-%r290 = zext i32 %r289 to i224
-%r291 = shl i224 %r290, 192
-%r292 = or i224 %r286, %r291
-%r293 = zext i224 %r292 to i256
-%r295 = getelementptr i32, i32* %r4, i32 7
-%r296 = load i32, i32* %r295
-%r297 = zext i32 %r296 to i256
-%r298 = shl i256 %r297, 224
-%r299 = or i256 %r293, %r298
-%r300 = zext i256 %r299 to i288
-%r302 = getelementptr i32, i32* %r4, i32 8
-%r303 = load i32, i32* %r302
-%r304 = zext i32 %r303 to i288
-%r305 = shl i288 %r304, 256
-%r306 = or i288 %r300, %r305
-%r307 = zext i288 %r306 to i320
-%r309 = getelementptr i32, i32* %r4, i32 9
-%r310 = load i32, i32* %r309
-%r311 = zext i32 %r310 to i320
-%r312 = shl i320 %r311, 288
-%r313 = or i320 %r307, %r312
-%r314 = zext i320 %r313 to i352
-%r316 = getelementptr i32, i32* %r4, i32 10
-%r317 = load i32, i32* %r316
-%r318 = zext i32 %r317 to i352
-%r319 = shl i352 %r318, 320
-%r320 = or i352 %r314, %r319
-%r321 = zext i352 %r320 to i384
-%r323 = getelementptr i32, i32* %r4, i32 11
-%r324 = load i32, i32* %r323
-%r325 = zext i32 %r324 to i384
-%r326 = shl i384 %r325, 352
-%r327 = or i384 %r321, %r326
-%r328 = zext i384 %r327 to i416
-%r330 = getelementptr i32, i32* %r4, i32 12
-%r331 = load i32, i32* %r330
-%r332 = zext i32 %r331 to i416
-%r333 = shl i416 %r332, 384
-%r334 = or i416 %r328, %r333
-%r335 = zext i416 %r334 to i448
-%r337 = getelementptr i32, i32* %r4, i32 13
-%r338 = load i32, i32* %r337
-%r339 = zext i32 %r338 to i448
-%r340 = shl i448 %r339, 416
-%r341 = or i448 %r335, %r340
-%r342 = add i448 %r192, %r341
-%r343 = trunc i448 %r342 to i32
-%r345 = getelementptr i32, i32* %r1, i32 0
-store i32 %r343, i32* %r345
-%r346 = lshr i448 %r342, 32
-%r347 = trunc i448 %r346 to i32
-%r349 = getelementptr i32, i32* %r1, i32 1
-store i32 %r347, i32* %r349
-%r350 = lshr i448 %r346, 32
-%r351 = trunc i448 %r350 to i32
-%r353 = getelementptr i32, i32* %r1, i32 2
-store i32 %r351, i32* %r353
-%r354 = lshr i448 %r350, 32
-%r355 = trunc i448 %r354 to i32
-%r357 = getelementptr i32, i32* %r1, i32 3
-store i32 %r355, i32* %r357
-%r358 = lshr i448 %r354, 32
-%r359 = trunc i448 %r358 to i32
-%r361 = getelementptr i32, i32* %r1, i32 4
-store i32 %r359, i32* %r361
-%r362 = lshr i448 %r358, 32
-%r363 = trunc i448 %r362 to i32
-%r365 = getelementptr i32, i32* %r1, i32 5
-store i32 %r363, i32* %r365
-%r366 = lshr i448 %r362, 32
-%r367 = trunc i448 %r366 to i32
-%r369 = getelementptr i32, i32* %r1, i32 6
-store i32 %r367, i32* %r369
-%r370 = lshr i448 %r366, 32
-%r371 = trunc i448 %r370 to i32
-%r373 = getelementptr i32, i32* %r1, i32 7
-store i32 %r371, i32* %r373
-%r374 = lshr i448 %r370, 32
-%r375 = trunc i448 %r374 to i32
-%r377 = getelementptr i32, i32* %r1, i32 8
-store i32 %r375, i32* %r377
-%r378 = lshr i448 %r374, 32
-%r379 = trunc i448 %r378 to i32
-%r381 = getelementptr i32, i32* %r1, i32 9
-store i32 %r379, i32* %r381
-%r382 = lshr i448 %r378, 32
-%r383 = trunc i448 %r382 to i32
-%r385 = getelementptr i32, i32* %r1, i32 10
-store i32 %r383, i32* %r385
-%r386 = lshr i448 %r382, 32
-%r387 = trunc i448 %r386 to i32
-%r389 = getelementptr i32, i32* %r1, i32 11
-store i32 %r387, i32* %r389
-%r390 = lshr i448 %r386, 32
-%r391 = trunc i448 %r390 to i32
-%r393 = getelementptr i32, i32* %r1, i32 12
-store i32 %r391, i32* %r393
-%r394 = lshr i448 %r390, 32
-%r395 = trunc i448 %r394 to i32
-%r397 = getelementptr i32, i32* %r1, i32 13
-store i32 %r395, i32* %r397
-ret void
-}
-define void @mcl_fp_subNF14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = load i32, i32* %r3
-%r98 = zext i32 %r97 to i64
-%r100 = getelementptr i32, i32* %r3, i32 1
-%r101 = load i32, i32* %r100
-%r102 = zext i32 %r101 to i64
-%r103 = shl i64 %r102, 32
-%r104 = or i64 %r98, %r103
-%r105 = zext i64 %r104 to i96
-%r107 = getelementptr i32, i32* %r3, i32 2
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i96
-%r110 = shl i96 %r109, 64
-%r111 = or i96 %r105, %r110
-%r112 = zext i96 %r111 to i128
-%r114 = getelementptr i32, i32* %r3, i32 3
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i128
-%r117 = shl i128 %r116, 96
-%r118 = or i128 %r112, %r117
-%r119 = zext i128 %r118 to i160
-%r121 = getelementptr i32, i32* %r3, i32 4
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i160
-%r124 = shl i160 %r123, 128
-%r125 = or i160 %r119, %r124
-%r126 = zext i160 %r125 to i192
-%r128 = getelementptr i32, i32* %r3, i32 5
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i192
-%r131 = shl i192 %r130, 160
-%r132 = or i192 %r126, %r131
-%r133 = zext i192 %r132 to i224
-%r135 = getelementptr i32, i32* %r3, i32 6
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i224
-%r138 = shl i224 %r137, 192
-%r139 = or i224 %r133, %r138
-%r140 = zext i224 %r139 to i256
-%r142 = getelementptr i32, i32* %r3, i32 7
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i256
-%r145 = shl i256 %r144, 224
-%r146 = or i256 %r140, %r145
-%r147 = zext i256 %r146 to i288
-%r149 = getelementptr i32, i32* %r3, i32 8
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i288
-%r152 = shl i288 %r151, 256
-%r153 = or i288 %r147, %r152
-%r154 = zext i288 %r153 to i320
-%r156 = getelementptr i32, i32* %r3, i32 9
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i320
-%r159 = shl i320 %r158, 288
-%r160 = or i320 %r154, %r159
-%r161 = zext i320 %r160 to i352
-%r163 = getelementptr i32, i32* %r3, i32 10
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i352
-%r166 = shl i352 %r165, 320
-%r167 = or i352 %r161, %r166
-%r168 = zext i352 %r167 to i384
-%r170 = getelementptr i32, i32* %r3, i32 11
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i384
-%r173 = shl i384 %r172, 352
-%r174 = or i384 %r168, %r173
-%r175 = zext i384 %r174 to i416
-%r177 = getelementptr i32, i32* %r3, i32 12
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i416
-%r180 = shl i416 %r179, 384
-%r181 = or i416 %r175, %r180
-%r182 = zext i416 %r181 to i448
-%r184 = getelementptr i32, i32* %r3, i32 13
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i448
-%r187 = shl i448 %r186, 416
-%r188 = or i448 %r182, %r187
-%r189 = sub i448 %r96, %r188
-%r190 = lshr i448 %r189, 447
-%r191 = trunc i448 %r190 to i1
-%r192 = load i32, i32* %r4
-%r193 = zext i32 %r192 to i64
-%r195 = getelementptr i32, i32* %r4, i32 1
-%r196 = load i32, i32* %r195
-%r197 = zext i32 %r196 to i64
-%r198 = shl i64 %r197, 32
-%r199 = or i64 %r193, %r198
-%r200 = zext i64 %r199 to i96
-%r202 = getelementptr i32, i32* %r4, i32 2
-%r203 = load i32, i32* %r202
-%r204 = zext i32 %r203 to i96
-%r205 = shl i96 %r204, 64
-%r206 = or i96 %r200, %r205
-%r207 = zext i96 %r206 to i128
-%r209 = getelementptr i32, i32* %r4, i32 3
-%r210 = load i32, i32* %r209
-%r211 = zext i32 %r210 to i128
-%r212 = shl i128 %r211, 96
-%r213 = or i128 %r207, %r212
-%r214 = zext i128 %r213 to i160
-%r216 = getelementptr i32, i32* %r4, i32 4
-%r217 = load i32, i32* %r216
-%r218 = zext i32 %r217 to i160
-%r219 = shl i160 %r218, 128
-%r220 = or i160 %r214, %r219
-%r221 = zext i160 %r220 to i192
-%r223 = getelementptr i32, i32* %r4, i32 5
-%r224 = load i32, i32* %r223
-%r225 = zext i32 %r224 to i192
-%r226 = shl i192 %r225, 160
-%r227 = or i192 %r221, %r226
-%r228 = zext i192 %r227 to i224
-%r230 = getelementptr i32, i32* %r4, i32 6
-%r231 = load i32, i32* %r230
-%r232 = zext i32 %r231 to i224
-%r233 = shl i224 %r232, 192
-%r234 = or i224 %r228, %r233
-%r235 = zext i224 %r234 to i256
-%r237 = getelementptr i32, i32* %r4, i32 7
-%r238 = load i32, i32* %r237
-%r239 = zext i32 %r238 to i256
-%r240 = shl i256 %r239, 224
-%r241 = or i256 %r235, %r240
-%r242 = zext i256 %r241 to i288
-%r244 = getelementptr i32, i32* %r4, i32 8
-%r245 = load i32, i32* %r244
-%r246 = zext i32 %r245 to i288
-%r247 = shl i288 %r246, 256
-%r248 = or i288 %r242, %r247
-%r249 = zext i288 %r248 to i320
-%r251 = getelementptr i32, i32* %r4, i32 9
-%r252 = load i32, i32* %r251
-%r253 = zext i32 %r252 to i320
-%r254 = shl i320 %r253, 288
-%r255 = or i320 %r249, %r254
-%r256 = zext i320 %r255 to i352
-%r258 = getelementptr i32, i32* %r4, i32 10
-%r259 = load i32, i32* %r258
-%r260 = zext i32 %r259 to i352
-%r261 = shl i352 %r260, 320
-%r262 = or i352 %r256, %r261
-%r263 = zext i352 %r262 to i384
-%r265 = getelementptr i32, i32* %r4, i32 11
-%r266 = load i32, i32* %r265
-%r267 = zext i32 %r266 to i384
-%r268 = shl i384 %r267, 352
-%r269 = or i384 %r263, %r268
-%r270 = zext i384 %r269 to i416
-%r272 = getelementptr i32, i32* %r4, i32 12
-%r273 = load i32, i32* %r272
-%r274 = zext i32 %r273 to i416
-%r275 = shl i416 %r274, 384
-%r276 = or i416 %r270, %r275
-%r277 = zext i416 %r276 to i448
-%r279 = getelementptr i32, i32* %r4, i32 13
-%r280 = load i32, i32* %r279
-%r281 = zext i32 %r280 to i448
-%r282 = shl i448 %r281, 416
-%r283 = or i448 %r277, %r282
-%r285 = select i1 %r191, i448 %r283, i448 0
-%r286 = add i448 %r189, %r285
-%r287 = trunc i448 %r286 to i32
-%r289 = getelementptr i32, i32* %r1, i32 0
-store i32 %r287, i32* %r289
-%r290 = lshr i448 %r286, 32
-%r291 = trunc i448 %r290 to i32
-%r293 = getelementptr i32, i32* %r1, i32 1
-store i32 %r291, i32* %r293
-%r294 = lshr i448 %r290, 32
-%r295 = trunc i448 %r294 to i32
-%r297 = getelementptr i32, i32* %r1, i32 2
-store i32 %r295, i32* %r297
-%r298 = lshr i448 %r294, 32
-%r299 = trunc i448 %r298 to i32
-%r301 = getelementptr i32, i32* %r1, i32 3
-store i32 %r299, i32* %r301
-%r302 = lshr i448 %r298, 32
-%r303 = trunc i448 %r302 to i32
-%r305 = getelementptr i32, i32* %r1, i32 4
-store i32 %r303, i32* %r305
-%r306 = lshr i448 %r302, 32
-%r307 = trunc i448 %r306 to i32
-%r309 = getelementptr i32, i32* %r1, i32 5
-store i32 %r307, i32* %r309
-%r310 = lshr i448 %r306, 32
-%r311 = trunc i448 %r310 to i32
-%r313 = getelementptr i32, i32* %r1, i32 6
-store i32 %r311, i32* %r313
-%r314 = lshr i448 %r310, 32
-%r315 = trunc i448 %r314 to i32
-%r317 = getelementptr i32, i32* %r1, i32 7
-store i32 %r315, i32* %r317
-%r318 = lshr i448 %r314, 32
-%r319 = trunc i448 %r318 to i32
-%r321 = getelementptr i32, i32* %r1, i32 8
-store i32 %r319, i32* %r321
-%r322 = lshr i448 %r318, 32
-%r323 = trunc i448 %r322 to i32
-%r325 = getelementptr i32, i32* %r1, i32 9
-store i32 %r323, i32* %r325
-%r326 = lshr i448 %r322, 32
-%r327 = trunc i448 %r326 to i32
-%r329 = getelementptr i32, i32* %r1, i32 10
-store i32 %r327, i32* %r329
-%r330 = lshr i448 %r326, 32
-%r331 = trunc i448 %r330 to i32
-%r333 = getelementptr i32, i32* %r1, i32 11
-store i32 %r331, i32* %r333
-%r334 = lshr i448 %r330, 32
-%r335 = trunc i448 %r334 to i32
-%r337 = getelementptr i32, i32* %r1, i32 12
-store i32 %r335, i32* %r337
-%r338 = lshr i448 %r334, 32
-%r339 = trunc i448 %r338 to i32
-%r341 = getelementptr i32, i32* %r1, i32 13
-store i32 %r339, i32* %r341
-ret void
-}
-define void @mcl_fpDbl_add14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = zext i704 %r152 to i736
-%r155 = getelementptr i32, i32* %r2, i32 22
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i736
-%r158 = shl i736 %r157, 704
-%r159 = or i736 %r153, %r158
-%r160 = zext i736 %r159 to i768
-%r162 = getelementptr i32, i32* %r2, i32 23
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i768
-%r165 = shl i768 %r164, 736
-%r166 = or i768 %r160, %r165
-%r167 = zext i768 %r166 to i800
-%r169 = getelementptr i32, i32* %r2, i32 24
-%r170 = load i32, i32* %r169
-%r171 = zext i32 %r170 to i800
-%r172 = shl i800 %r171, 768
-%r173 = or i800 %r167, %r172
-%r174 = zext i800 %r173 to i832
-%r176 = getelementptr i32, i32* %r2, i32 25
-%r177 = load i32, i32* %r176
-%r178 = zext i32 %r177 to i832
-%r179 = shl i832 %r178, 800
-%r180 = or i832 %r174, %r179
-%r181 = zext i832 %r180 to i864
-%r183 = getelementptr i32, i32* %r2, i32 26
-%r184 = load i32, i32* %r183
-%r185 = zext i32 %r184 to i864
-%r186 = shl i864 %r185, 832
-%r187 = or i864 %r181, %r186
-%r188 = zext i864 %r187 to i896
-%r190 = getelementptr i32, i32* %r2, i32 27
-%r191 = load i32, i32* %r190
-%r192 = zext i32 %r191 to i896
-%r193 = shl i896 %r192, 864
-%r194 = or i896 %r188, %r193
-%r195 = load i32, i32* %r3
-%r196 = zext i32 %r195 to i64
-%r198 = getelementptr i32, i32* %r3, i32 1
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i64
-%r201 = shl i64 %r200, 32
-%r202 = or i64 %r196, %r201
-%r203 = zext i64 %r202 to i96
-%r205 = getelementptr i32, i32* %r3, i32 2
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i96
-%r208 = shl i96 %r207, 64
-%r209 = or i96 %r203, %r208
-%r210 = zext i96 %r209 to i128
-%r212 = getelementptr i32, i32* %r3, i32 3
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i128
-%r215 = shl i128 %r214, 96
-%r216 = or i128 %r210, %r215
-%r217 = zext i128 %r216 to i160
-%r219 = getelementptr i32, i32* %r3, i32 4
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i160
-%r222 = shl i160 %r221, 128
-%r223 = or i160 %r217, %r222
-%r224 = zext i160 %r223 to i192
-%r226 = getelementptr i32, i32* %r3, i32 5
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i192
-%r229 = shl i192 %r228, 160
-%r230 = or i192 %r224, %r229
-%r231 = zext i192 %r230 to i224
-%r233 = getelementptr i32, i32* %r3, i32 6
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i224
-%r236 = shl i224 %r235, 192
-%r237 = or i224 %r231, %r236
-%r238 = zext i224 %r237 to i256
-%r240 = getelementptr i32, i32* %r3, i32 7
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i256
-%r243 = shl i256 %r242, 224
-%r244 = or i256 %r238, %r243
-%r245 = zext i256 %r244 to i288
-%r247 = getelementptr i32, i32* %r3, i32 8
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i288
-%r250 = shl i288 %r249, 256
-%r251 = or i288 %r245, %r250
-%r252 = zext i288 %r251 to i320
-%r254 = getelementptr i32, i32* %r3, i32 9
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i320
-%r257 = shl i320 %r256, 288
-%r258 = or i320 %r252, %r257
-%r259 = zext i320 %r258 to i352
-%r261 = getelementptr i32, i32* %r3, i32 10
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i352
-%r264 = shl i352 %r263, 320
-%r265 = or i352 %r259, %r264
-%r266 = zext i352 %r265 to i384
-%r268 = getelementptr i32, i32* %r3, i32 11
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i384
-%r271 = shl i384 %r270, 352
-%r272 = or i384 %r266, %r271
-%r273 = zext i384 %r272 to i416
-%r275 = getelementptr i32, i32* %r3, i32 12
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i416
-%r278 = shl i416 %r277, 384
-%r279 = or i416 %r273, %r278
-%r280 = zext i416 %r279 to i448
-%r282 = getelementptr i32, i32* %r3, i32 13
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i448
-%r285 = shl i448 %r284, 416
-%r286 = or i448 %r280, %r285
-%r287 = zext i448 %r286 to i480
-%r289 = getelementptr i32, i32* %r3, i32 14
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i480
-%r292 = shl i480 %r291, 448
-%r293 = or i480 %r287, %r292
-%r294 = zext i480 %r293 to i512
-%r296 = getelementptr i32, i32* %r3, i32 15
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i512
-%r299 = shl i512 %r298, 480
-%r300 = or i512 %r294, %r299
-%r301 = zext i512 %r300 to i544
-%r303 = getelementptr i32, i32* %r3, i32 16
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i544
-%r306 = shl i544 %r305, 512
-%r307 = or i544 %r301, %r306
-%r308 = zext i544 %r307 to i576
-%r310 = getelementptr i32, i32* %r3, i32 17
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i576
-%r313 = shl i576 %r312, 544
-%r314 = or i576 %r308, %r313
-%r315 = zext i576 %r314 to i608
-%r317 = getelementptr i32, i32* %r3, i32 18
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i608
-%r320 = shl i608 %r319, 576
-%r321 = or i608 %r315, %r320
-%r322 = zext i608 %r321 to i640
-%r324 = getelementptr i32, i32* %r3, i32 19
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i640
-%r327 = shl i640 %r326, 608
-%r328 = or i640 %r322, %r327
-%r329 = zext i640 %r328 to i672
-%r331 = getelementptr i32, i32* %r3, i32 20
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i672
-%r334 = shl i672 %r333, 640
-%r335 = or i672 %r329, %r334
-%r336 = zext i672 %r335 to i704
-%r338 = getelementptr i32, i32* %r3, i32 21
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i704
-%r341 = shl i704 %r340, 672
-%r342 = or i704 %r336, %r341
-%r343 = zext i704 %r342 to i736
-%r345 = getelementptr i32, i32* %r3, i32 22
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i736
-%r348 = shl i736 %r347, 704
-%r349 = or i736 %r343, %r348
-%r350 = zext i736 %r349 to i768
-%r352 = getelementptr i32, i32* %r3, i32 23
-%r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i768
-%r355 = shl i768 %r354, 736
-%r356 = or i768 %r350, %r355
-%r357 = zext i768 %r356 to i800
-%r359 = getelementptr i32, i32* %r3, i32 24
-%r360 = load i32, i32* %r359
-%r361 = zext i32 %r360 to i800
-%r362 = shl i800 %r361, 768
-%r363 = or i800 %r357, %r362
-%r364 = zext i800 %r363 to i832
-%r366 = getelementptr i32, i32* %r3, i32 25
-%r367 = load i32, i32* %r366
-%r368 = zext i32 %r367 to i832
-%r369 = shl i832 %r368, 800
-%r370 = or i832 %r364, %r369
-%r371 = zext i832 %r370 to i864
-%r373 = getelementptr i32, i32* %r3, i32 26
-%r374 = load i32, i32* %r373
-%r375 = zext i32 %r374 to i864
-%r376 = shl i864 %r375, 832
-%r377 = or i864 %r371, %r376
-%r378 = zext i864 %r377 to i896
-%r380 = getelementptr i32, i32* %r3, i32 27
-%r381 = load i32, i32* %r380
-%r382 = zext i32 %r381 to i896
-%r383 = shl i896 %r382, 864
-%r384 = or i896 %r378, %r383
-%r385 = zext i896 %r194 to i928
-%r386 = zext i896 %r384 to i928
-%r387 = add i928 %r385, %r386
-%r388 = trunc i928 %r387 to i448
-%r389 = trunc i448 %r388 to i32
-%r391 = getelementptr i32, i32* %r1, i32 0
-store i32 %r389, i32* %r391
-%r392 = lshr i448 %r388, 32
-%r393 = trunc i448 %r392 to i32
-%r395 = getelementptr i32, i32* %r1, i32 1
-store i32 %r393, i32* %r395
-%r396 = lshr i448 %r392, 32
-%r397 = trunc i448 %r396 to i32
-%r399 = getelementptr i32, i32* %r1, i32 2
-store i32 %r397, i32* %r399
-%r400 = lshr i448 %r396, 32
-%r401 = trunc i448 %r400 to i32
-%r403 = getelementptr i32, i32* %r1, i32 3
-store i32 %r401, i32* %r403
-%r404 = lshr i448 %r400, 32
-%r405 = trunc i448 %r404 to i32
-%r407 = getelementptr i32, i32* %r1, i32 4
-store i32 %r405, i32* %r407
-%r408 = lshr i448 %r404, 32
-%r409 = trunc i448 %r408 to i32
-%r411 = getelementptr i32, i32* %r1, i32 5
-store i32 %r409, i32* %r411
-%r412 = lshr i448 %r408, 32
-%r413 = trunc i448 %r412 to i32
-%r415 = getelementptr i32, i32* %r1, i32 6
-store i32 %r413, i32* %r415
-%r416 = lshr i448 %r412, 32
-%r417 = trunc i448 %r416 to i32
-%r419 = getelementptr i32, i32* %r1, i32 7
-store i32 %r417, i32* %r419
-%r420 = lshr i448 %r416, 32
-%r421 = trunc i448 %r420 to i32
-%r423 = getelementptr i32, i32* %r1, i32 8
-store i32 %r421, i32* %r423
-%r424 = lshr i448 %r420, 32
-%r425 = trunc i448 %r424 to i32
-%r427 = getelementptr i32, i32* %r1, i32 9
-store i32 %r425, i32* %r427
-%r428 = lshr i448 %r424, 32
-%r429 = trunc i448 %r428 to i32
-%r431 = getelementptr i32, i32* %r1, i32 10
-store i32 %r429, i32* %r431
-%r432 = lshr i448 %r428, 32
-%r433 = trunc i448 %r432 to i32
-%r435 = getelementptr i32, i32* %r1, i32 11
-store i32 %r433, i32* %r435
-%r436 = lshr i448 %r432, 32
-%r437 = trunc i448 %r436 to i32
-%r439 = getelementptr i32, i32* %r1, i32 12
-store i32 %r437, i32* %r439
-%r440 = lshr i448 %r436, 32
-%r441 = trunc i448 %r440 to i32
-%r443 = getelementptr i32, i32* %r1, i32 13
-store i32 %r441, i32* %r443
-%r444 = lshr i928 %r387, 448
-%r445 = trunc i928 %r444 to i480
-%r446 = load i32, i32* %r4
-%r447 = zext i32 %r446 to i64
-%r449 = getelementptr i32, i32* %r4, i32 1
-%r450 = load i32, i32* %r449
-%r451 = zext i32 %r450 to i64
-%r452 = shl i64 %r451, 32
-%r453 = or i64 %r447, %r452
-%r454 = zext i64 %r453 to i96
-%r456 = getelementptr i32, i32* %r4, i32 2
-%r457 = load i32, i32* %r456
-%r458 = zext i32 %r457 to i96
-%r459 = shl i96 %r458, 64
-%r460 = or i96 %r454, %r459
-%r461 = zext i96 %r460 to i128
-%r463 = getelementptr i32, i32* %r4, i32 3
-%r464 = load i32, i32* %r463
-%r465 = zext i32 %r464 to i128
-%r466 = shl i128 %r465, 96
-%r467 = or i128 %r461, %r466
-%r468 = zext i128 %r467 to i160
-%r470 = getelementptr i32, i32* %r4, i32 4
-%r471 = load i32, i32* %r470
-%r472 = zext i32 %r471 to i160
-%r473 = shl i160 %r472, 128
-%r474 = or i160 %r468, %r473
-%r475 = zext i160 %r474 to i192
-%r477 = getelementptr i32, i32* %r4, i32 5
-%r478 = load i32, i32* %r477
-%r479 = zext i32 %r478 to i192
-%r480 = shl i192 %r479, 160
-%r481 = or i192 %r475, %r480
-%r482 = zext i192 %r481 to i224
-%r484 = getelementptr i32, i32* %r4, i32 6
-%r485 = load i32, i32* %r484
-%r486 = zext i32 %r485 to i224
-%r487 = shl i224 %r486, 192
-%r488 = or i224 %r482, %r487
-%r489 = zext i224 %r488 to i256
-%r491 = getelementptr i32, i32* %r4, i32 7
-%r492 = load i32, i32* %r491
-%r493 = zext i32 %r492 to i256
-%r494 = shl i256 %r493, 224
-%r495 = or i256 %r489, %r494
-%r496 = zext i256 %r495 to i288
-%r498 = getelementptr i32, i32* %r4, i32 8
-%r499 = load i32, i32* %r498
-%r500 = zext i32 %r499 to i288
-%r501 = shl i288 %r500, 256
-%r502 = or i288 %r496, %r501
-%r503 = zext i288 %r502 to i320
-%r505 = getelementptr i32, i32* %r4, i32 9
-%r506 = load i32, i32* %r505
-%r507 = zext i32 %r506 to i320
-%r508 = shl i320 %r507, 288
-%r509 = or i320 %r503, %r508
-%r510 = zext i320 %r509 to i352
-%r512 = getelementptr i32, i32* %r4, i32 10
-%r513 = load i32, i32* %r512
-%r514 = zext i32 %r513 to i352
-%r515 = shl i352 %r514, 320
-%r516 = or i352 %r510, %r515
-%r517 = zext i352 %r516 to i384
-%r519 = getelementptr i32, i32* %r4, i32 11
-%r520 = load i32, i32* %r519
-%r521 = zext i32 %r520 to i384
-%r522 = shl i384 %r521, 352
-%r523 = or i384 %r517, %r522
-%r524 = zext i384 %r523 to i416
-%r526 = getelementptr i32, i32* %r4, i32 12
-%r527 = load i32, i32* %r526
-%r528 = zext i32 %r527 to i416
-%r529 = shl i416 %r528, 384
-%r530 = or i416 %r524, %r529
-%r531 = zext i416 %r530 to i448
-%r533 = getelementptr i32, i32* %r4, i32 13
-%r534 = load i32, i32* %r533
-%r535 = zext i32 %r534 to i448
-%r536 = shl i448 %r535, 416
-%r537 = or i448 %r531, %r536
-%r538 = zext i448 %r537 to i480
-%r539 = sub i480 %r445, %r538
-%r540 = lshr i480 %r539, 448
-%r541 = trunc i480 %r540 to i1
-%r542 = select i1 %r541, i480 %r445, i480 %r539
-%r543 = trunc i480 %r542 to i448
-%r545 = getelementptr i32, i32* %r1, i32 14
-%r546 = trunc i448 %r543 to i32
-%r548 = getelementptr i32, i32* %r545, i32 0
-store i32 %r546, i32* %r548
-%r549 = lshr i448 %r543, 32
-%r550 = trunc i448 %r549 to i32
-%r552 = getelementptr i32, i32* %r545, i32 1
-store i32 %r550, i32* %r552
-%r553 = lshr i448 %r549, 32
-%r554 = trunc i448 %r553 to i32
-%r556 = getelementptr i32, i32* %r545, i32 2
-store i32 %r554, i32* %r556
-%r557 = lshr i448 %r553, 32
-%r558 = trunc i448 %r557 to i32
-%r560 = getelementptr i32, i32* %r545, i32 3
-store i32 %r558, i32* %r560
-%r561 = lshr i448 %r557, 32
-%r562 = trunc i448 %r561 to i32
-%r564 = getelementptr i32, i32* %r545, i32 4
-store i32 %r562, i32* %r564
-%r565 = lshr i448 %r561, 32
-%r566 = trunc i448 %r565 to i32
-%r568 = getelementptr i32, i32* %r545, i32 5
-store i32 %r566, i32* %r568
-%r569 = lshr i448 %r565, 32
-%r570 = trunc i448 %r569 to i32
-%r572 = getelementptr i32, i32* %r545, i32 6
-store i32 %r570, i32* %r572
-%r573 = lshr i448 %r569, 32
-%r574 = trunc i448 %r573 to i32
-%r576 = getelementptr i32, i32* %r545, i32 7
-store i32 %r574, i32* %r576
-%r577 = lshr i448 %r573, 32
-%r578 = trunc i448 %r577 to i32
-%r580 = getelementptr i32, i32* %r545, i32 8
-store i32 %r578, i32* %r580
-%r581 = lshr i448 %r577, 32
-%r582 = trunc i448 %r581 to i32
-%r584 = getelementptr i32, i32* %r545, i32 9
-store i32 %r582, i32* %r584
-%r585 = lshr i448 %r581, 32
-%r586 = trunc i448 %r585 to i32
-%r588 = getelementptr i32, i32* %r545, i32 10
-store i32 %r586, i32* %r588
-%r589 = lshr i448 %r585, 32
-%r590 = trunc i448 %r589 to i32
-%r592 = getelementptr i32, i32* %r545, i32 11
-store i32 %r590, i32* %r592
-%r593 = lshr i448 %r589, 32
-%r594 = trunc i448 %r593 to i32
-%r596 = getelementptr i32, i32* %r545, i32 12
-store i32 %r594, i32* %r596
-%r597 = lshr i448 %r593, 32
-%r598 = trunc i448 %r597 to i32
-%r600 = getelementptr i32, i32* %r545, i32 13
-store i32 %r598, i32* %r600
-ret void
-}
-define void @mcl_fpDbl_sub14L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = zext i704 %r152 to i736
-%r155 = getelementptr i32, i32* %r2, i32 22
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i736
-%r158 = shl i736 %r157, 704
-%r159 = or i736 %r153, %r158
-%r160 = zext i736 %r159 to i768
-%r162 = getelementptr i32, i32* %r2, i32 23
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i768
-%r165 = shl i768 %r164, 736
-%r166 = or i768 %r160, %r165
-%r167 = zext i768 %r166 to i800
-%r169 = getelementptr i32, i32* %r2, i32 24
-%r170 = load i32, i32* %r169
-%r171 = zext i32 %r170 to i800
-%r172 = shl i800 %r171, 768
-%r173 = or i800 %r167, %r172
-%r174 = zext i800 %r173 to i832
-%r176 = getelementptr i32, i32* %r2, i32 25
-%r177 = load i32, i32* %r176
-%r178 = zext i32 %r177 to i832
-%r179 = shl i832 %r178, 800
-%r180 = or i832 %r174, %r179
-%r181 = zext i832 %r180 to i864
-%r183 = getelementptr i32, i32* %r2, i32 26
-%r184 = load i32, i32* %r183
-%r185 = zext i32 %r184 to i864
-%r186 = shl i864 %r185, 832
-%r187 = or i864 %r181, %r186
-%r188 = zext i864 %r187 to i896
-%r190 = getelementptr i32, i32* %r2, i32 27
-%r191 = load i32, i32* %r190
-%r192 = zext i32 %r191 to i896
-%r193 = shl i896 %r192, 864
-%r194 = or i896 %r188, %r193
-%r195 = load i32, i32* %r3
-%r196 = zext i32 %r195 to i64
-%r198 = getelementptr i32, i32* %r3, i32 1
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i64
-%r201 = shl i64 %r200, 32
-%r202 = or i64 %r196, %r201
-%r203 = zext i64 %r202 to i96
-%r205 = getelementptr i32, i32* %r3, i32 2
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i96
-%r208 = shl i96 %r207, 64
-%r209 = or i96 %r203, %r208
-%r210 = zext i96 %r209 to i128
-%r212 = getelementptr i32, i32* %r3, i32 3
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i128
-%r215 = shl i128 %r214, 96
-%r216 = or i128 %r210, %r215
-%r217 = zext i128 %r216 to i160
-%r219 = getelementptr i32, i32* %r3, i32 4
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i160
-%r222 = shl i160 %r221, 128
-%r223 = or i160 %r217, %r222
-%r224 = zext i160 %r223 to i192
-%r226 = getelementptr i32, i32* %r3, i32 5
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i192
-%r229 = shl i192 %r228, 160
-%r230 = or i192 %r224, %r229
-%r231 = zext i192 %r230 to i224
-%r233 = getelementptr i32, i32* %r3, i32 6
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i224
-%r236 = shl i224 %r235, 192
-%r237 = or i224 %r231, %r236
-%r238 = zext i224 %r237 to i256
-%r240 = getelementptr i32, i32* %r3, i32 7
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i256
-%r243 = shl i256 %r242, 224
-%r244 = or i256 %r238, %r243
-%r245 = zext i256 %r244 to i288
-%r247 = getelementptr i32, i32* %r3, i32 8
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i288
-%r250 = shl i288 %r249, 256
-%r251 = or i288 %r245, %r250
-%r252 = zext i288 %r251 to i320
-%r254 = getelementptr i32, i32* %r3, i32 9
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i320
-%r257 = shl i320 %r256, 288
-%r258 = or i320 %r252, %r257
-%r259 = zext i320 %r258 to i352
-%r261 = getelementptr i32, i32* %r3, i32 10
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i352
-%r264 = shl i352 %r263, 320
-%r265 = or i352 %r259, %r264
-%r266 = zext i352 %r265 to i384
-%r268 = getelementptr i32, i32* %r3, i32 11
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i384
-%r271 = shl i384 %r270, 352
-%r272 = or i384 %r266, %r271
-%r273 = zext i384 %r272 to i416
-%r275 = getelementptr i32, i32* %r3, i32 12
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i416
-%r278 = shl i416 %r277, 384
-%r279 = or i416 %r273, %r278
-%r280 = zext i416 %r279 to i448
-%r282 = getelementptr i32, i32* %r3, i32 13
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i448
-%r285 = shl i448 %r284, 416
-%r286 = or i448 %r280, %r285
-%r287 = zext i448 %r286 to i480
-%r289 = getelementptr i32, i32* %r3, i32 14
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i480
-%r292 = shl i480 %r291, 448
-%r293 = or i480 %r287, %r292
-%r294 = zext i480 %r293 to i512
-%r296 = getelementptr i32, i32* %r3, i32 15
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i512
-%r299 = shl i512 %r298, 480
-%r300 = or i512 %r294, %r299
-%r301 = zext i512 %r300 to i544
-%r303 = getelementptr i32, i32* %r3, i32 16
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i544
-%r306 = shl i544 %r305, 512
-%r307 = or i544 %r301, %r306
-%r308 = zext i544 %r307 to i576
-%r310 = getelementptr i32, i32* %r3, i32 17
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i576
-%r313 = shl i576 %r312, 544
-%r314 = or i576 %r308, %r313
-%r315 = zext i576 %r314 to i608
-%r317 = getelementptr i32, i32* %r3, i32 18
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i608
-%r320 = shl i608 %r319, 576
-%r321 = or i608 %r315, %r320
-%r322 = zext i608 %r321 to i640
-%r324 = getelementptr i32, i32* %r3, i32 19
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i640
-%r327 = shl i640 %r326, 608
-%r328 = or i640 %r322, %r327
-%r329 = zext i640 %r328 to i672
-%r331 = getelementptr i32, i32* %r3, i32 20
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i672
-%r334 = shl i672 %r333, 640
-%r335 = or i672 %r329, %r334
-%r336 = zext i672 %r335 to i704
-%r338 = getelementptr i32, i32* %r3, i32 21
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i704
-%r341 = shl i704 %r340, 672
-%r342 = or i704 %r336, %r341
-%r343 = zext i704 %r342 to i736
-%r345 = getelementptr i32, i32* %r3, i32 22
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i736
-%r348 = shl i736 %r347, 704
-%r349 = or i736 %r343, %r348
-%r350 = zext i736 %r349 to i768
-%r352 = getelementptr i32, i32* %r3, i32 23
-%r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i768
-%r355 = shl i768 %r354, 736
-%r356 = or i768 %r350, %r355
-%r357 = zext i768 %r356 to i800
-%r359 = getelementptr i32, i32* %r3, i32 24
-%r360 = load i32, i32* %r359
-%r361 = zext i32 %r360 to i800
-%r362 = shl i800 %r361, 768
-%r363 = or i800 %r357, %r362
-%r364 = zext i800 %r363 to i832
-%r366 = getelementptr i32, i32* %r3, i32 25
-%r367 = load i32, i32* %r366
-%r368 = zext i32 %r367 to i832
-%r369 = shl i832 %r368, 800
-%r370 = or i832 %r364, %r369
-%r371 = zext i832 %r370 to i864
-%r373 = getelementptr i32, i32* %r3, i32 26
-%r374 = load i32, i32* %r373
-%r375 = zext i32 %r374 to i864
-%r376 = shl i864 %r375, 832
-%r377 = or i864 %r371, %r376
-%r378 = zext i864 %r377 to i896
-%r380 = getelementptr i32, i32* %r3, i32 27
-%r381 = load i32, i32* %r380
-%r382 = zext i32 %r381 to i896
-%r383 = shl i896 %r382, 864
-%r384 = or i896 %r378, %r383
-%r385 = zext i896 %r194 to i928
-%r386 = zext i896 %r384 to i928
-%r387 = sub i928 %r385, %r386
-%r388 = trunc i928 %r387 to i448
-%r389 = trunc i448 %r388 to i32
-%r391 = getelementptr i32, i32* %r1, i32 0
-store i32 %r389, i32* %r391
-%r392 = lshr i448 %r388, 32
-%r393 = trunc i448 %r392 to i32
-%r395 = getelementptr i32, i32* %r1, i32 1
-store i32 %r393, i32* %r395
-%r396 = lshr i448 %r392, 32
-%r397 = trunc i448 %r396 to i32
-%r399 = getelementptr i32, i32* %r1, i32 2
-store i32 %r397, i32* %r399
-%r400 = lshr i448 %r396, 32
-%r401 = trunc i448 %r400 to i32
-%r403 = getelementptr i32, i32* %r1, i32 3
-store i32 %r401, i32* %r403
-%r404 = lshr i448 %r400, 32
-%r405 = trunc i448 %r404 to i32
-%r407 = getelementptr i32, i32* %r1, i32 4
-store i32 %r405, i32* %r407
-%r408 = lshr i448 %r404, 32
-%r409 = trunc i448 %r408 to i32
-%r411 = getelementptr i32, i32* %r1, i32 5
-store i32 %r409, i32* %r411
-%r412 = lshr i448 %r408, 32
-%r413 = trunc i448 %r412 to i32
-%r415 = getelementptr i32, i32* %r1, i32 6
-store i32 %r413, i32* %r415
-%r416 = lshr i448 %r412, 32
-%r417 = trunc i448 %r416 to i32
-%r419 = getelementptr i32, i32* %r1, i32 7
-store i32 %r417, i32* %r419
-%r420 = lshr i448 %r416, 32
-%r421 = trunc i448 %r420 to i32
-%r423 = getelementptr i32, i32* %r1, i32 8
-store i32 %r421, i32* %r423
-%r424 = lshr i448 %r420, 32
-%r425 = trunc i448 %r424 to i32
-%r427 = getelementptr i32, i32* %r1, i32 9
-store i32 %r425, i32* %r427
-%r428 = lshr i448 %r424, 32
-%r429 = trunc i448 %r428 to i32
-%r431 = getelementptr i32, i32* %r1, i32 10
-store i32 %r429, i32* %r431
-%r432 = lshr i448 %r428, 32
-%r433 = trunc i448 %r432 to i32
-%r435 = getelementptr i32, i32* %r1, i32 11
-store i32 %r433, i32* %r435
-%r436 = lshr i448 %r432, 32
-%r437 = trunc i448 %r436 to i32
-%r439 = getelementptr i32, i32* %r1, i32 12
-store i32 %r437, i32* %r439
-%r440 = lshr i448 %r436, 32
-%r441 = trunc i448 %r440 to i32
-%r443 = getelementptr i32, i32* %r1, i32 13
-store i32 %r441, i32* %r443
-%r444 = lshr i928 %r387, 448
-%r445 = trunc i928 %r444 to i448
-%r446 = lshr i928 %r387, 896
-%r447 = trunc i928 %r446 to i1
-%r448 = load i32, i32* %r4
-%r449 = zext i32 %r448 to i64
-%r451 = getelementptr i32, i32* %r4, i32 1
-%r452 = load i32, i32* %r451
-%r453 = zext i32 %r452 to i64
-%r454 = shl i64 %r453, 32
-%r455 = or i64 %r449, %r454
-%r456 = zext i64 %r455 to i96
-%r458 = getelementptr i32, i32* %r4, i32 2
-%r459 = load i32, i32* %r458
-%r460 = zext i32 %r459 to i96
-%r461 = shl i96 %r460, 64
-%r462 = or i96 %r456, %r461
-%r463 = zext i96 %r462 to i128
-%r465 = getelementptr i32, i32* %r4, i32 3
-%r466 = load i32, i32* %r465
-%r467 = zext i32 %r466 to i128
-%r468 = shl i128 %r467, 96
-%r469 = or i128 %r463, %r468
-%r470 = zext i128 %r469 to i160
-%r472 = getelementptr i32, i32* %r4, i32 4
-%r473 = load i32, i32* %r472
-%r474 = zext i32 %r473 to i160
-%r475 = shl i160 %r474, 128
-%r476 = or i160 %r470, %r475
-%r477 = zext i160 %r476 to i192
-%r479 = getelementptr i32, i32* %r4, i32 5
-%r480 = load i32, i32* %r479
-%r481 = zext i32 %r480 to i192
-%r482 = shl i192 %r481, 160
-%r483 = or i192 %r477, %r482
-%r484 = zext i192 %r483 to i224
-%r486 = getelementptr i32, i32* %r4, i32 6
-%r487 = load i32, i32* %r486
-%r488 = zext i32 %r487 to i224
-%r489 = shl i224 %r488, 192
-%r490 = or i224 %r484, %r489
-%r491 = zext i224 %r490 to i256
-%r493 = getelementptr i32, i32* %r4, i32 7
-%r494 = load i32, i32* %r493
-%r495 = zext i32 %r494 to i256
-%r496 = shl i256 %r495, 224
-%r497 = or i256 %r491, %r496
-%r498 = zext i256 %r497 to i288
-%r500 = getelementptr i32, i32* %r4, i32 8
-%r501 = load i32, i32* %r500
-%r502 = zext i32 %r501 to i288
-%r503 = shl i288 %r502, 256
-%r504 = or i288 %r498, %r503
-%r505 = zext i288 %r504 to i320
-%r507 = getelementptr i32, i32* %r4, i32 9
-%r508 = load i32, i32* %r507
-%r509 = zext i32 %r508 to i320
-%r510 = shl i320 %r509, 288
-%r511 = or i320 %r505, %r510
-%r512 = zext i320 %r511 to i352
-%r514 = getelementptr i32, i32* %r4, i32 10
-%r515 = load i32, i32* %r514
-%r516 = zext i32 %r515 to i352
-%r517 = shl i352 %r516, 320
-%r518 = or i352 %r512, %r517
-%r519 = zext i352 %r518 to i384
-%r521 = getelementptr i32, i32* %r4, i32 11
-%r522 = load i32, i32* %r521
-%r523 = zext i32 %r522 to i384
-%r524 = shl i384 %r523, 352
-%r525 = or i384 %r519, %r524
-%r526 = zext i384 %r525 to i416
-%r528 = getelementptr i32, i32* %r4, i32 12
-%r529 = load i32, i32* %r528
-%r530 = zext i32 %r529 to i416
-%r531 = shl i416 %r530, 384
-%r532 = or i416 %r526, %r531
-%r533 = zext i416 %r532 to i448
-%r535 = getelementptr i32, i32* %r4, i32 13
-%r536 = load i32, i32* %r535
-%r537 = zext i32 %r536 to i448
-%r538 = shl i448 %r537, 416
-%r539 = or i448 %r533, %r538
-%r541 = select i1 %r447, i448 %r539, i448 0
-%r542 = add i448 %r445, %r541
-%r544 = getelementptr i32, i32* %r1, i32 14
-%r545 = trunc i448 %r542 to i32
-%r547 = getelementptr i32, i32* %r544, i32 0
-store i32 %r545, i32* %r547
-%r548 = lshr i448 %r542, 32
-%r549 = trunc i448 %r548 to i32
-%r551 = getelementptr i32, i32* %r544, i32 1
-store i32 %r549, i32* %r551
-%r552 = lshr i448 %r548, 32
-%r553 = trunc i448 %r552 to i32
-%r555 = getelementptr i32, i32* %r544, i32 2
-store i32 %r553, i32* %r555
-%r556 = lshr i448 %r552, 32
-%r557 = trunc i448 %r556 to i32
-%r559 = getelementptr i32, i32* %r544, i32 3
-store i32 %r557, i32* %r559
-%r560 = lshr i448 %r556, 32
-%r561 = trunc i448 %r560 to i32
-%r563 = getelementptr i32, i32* %r544, i32 4
-store i32 %r561, i32* %r563
-%r564 = lshr i448 %r560, 32
-%r565 = trunc i448 %r564 to i32
-%r567 = getelementptr i32, i32* %r544, i32 5
-store i32 %r565, i32* %r567
-%r568 = lshr i448 %r564, 32
-%r569 = trunc i448 %r568 to i32
-%r571 = getelementptr i32, i32* %r544, i32 6
-store i32 %r569, i32* %r571
-%r572 = lshr i448 %r568, 32
-%r573 = trunc i448 %r572 to i32
-%r575 = getelementptr i32, i32* %r544, i32 7
-store i32 %r573, i32* %r575
-%r576 = lshr i448 %r572, 32
-%r577 = trunc i448 %r576 to i32
-%r579 = getelementptr i32, i32* %r544, i32 8
-store i32 %r577, i32* %r579
-%r580 = lshr i448 %r576, 32
-%r581 = trunc i448 %r580 to i32
-%r583 = getelementptr i32, i32* %r544, i32 9
-store i32 %r581, i32* %r583
-%r584 = lshr i448 %r580, 32
-%r585 = trunc i448 %r584 to i32
-%r587 = getelementptr i32, i32* %r544, i32 10
-store i32 %r585, i32* %r587
-%r588 = lshr i448 %r584, 32
-%r589 = trunc i448 %r588 to i32
-%r591 = getelementptr i32, i32* %r544, i32 11
-store i32 %r589, i32* %r591
-%r592 = lshr i448 %r588, 32
-%r593 = trunc i448 %r592 to i32
-%r595 = getelementptr i32, i32* %r544, i32 12
-store i32 %r593, i32* %r595
-%r596 = lshr i448 %r592, 32
-%r597 = trunc i448 %r596 to i32
-%r599 = getelementptr i32, i32* %r544, i32 13
-store i32 %r597, i32* %r599
-ret void
-}
-define i512 @mulPv480x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
-%r30 = trunc i64 %r29 to i32
-%r31 = call i32 @extractHigh32(i64 %r29)
-%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
-%r34 = trunc i64 %r33 to i32
-%r35 = call i32 @extractHigh32(i64 %r33)
-%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
-%r38 = trunc i64 %r37 to i32
-%r39 = call i32 @extractHigh32(i64 %r37)
-%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
-%r42 = trunc i64 %r41 to i32
-%r43 = call i32 @extractHigh32(i64 %r41)
-%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
-%r46 = trunc i64 %r45 to i32
-%r47 = call i32 @extractHigh32(i64 %r45)
-%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
-%r50 = trunc i64 %r49 to i32
-%r51 = call i32 @extractHigh32(i64 %r49)
-%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
-%r54 = trunc i64 %r53 to i32
-%r55 = call i32 @extractHigh32(i64 %r53)
-%r57 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 13)
-%r58 = trunc i64 %r57 to i32
-%r59 = call i32 @extractHigh32(i64 %r57)
-%r61 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 14)
-%r62 = trunc i64 %r61 to i32
-%r63 = call i32 @extractHigh32(i64 %r61)
-%r64 = zext i32 %r6 to i64
-%r65 = zext i32 %r10 to i64
-%r66 = shl i64 %r65, 32
-%r67 = or i64 %r64, %r66
-%r68 = zext i64 %r67 to i96
-%r69 = zext i32 %r14 to i96
-%r70 = shl i96 %r69, 64
-%r71 = or i96 %r68, %r70
-%r72 = zext i96 %r71 to i128
-%r73 = zext i32 %r18 to i128
-%r74 = shl i128 %r73, 96
-%r75 = or i128 %r72, %r74
-%r76 = zext i128 %r75 to i160
-%r77 = zext i32 %r22 to i160
-%r78 = shl i160 %r77, 128
-%r79 = or i160 %r76, %r78
-%r80 = zext i160 %r79 to i192
-%r81 = zext i32 %r26 to i192
-%r82 = shl i192 %r81, 160
-%r83 = or i192 %r80, %r82
-%r84 = zext i192 %r83 to i224
-%r85 = zext i32 %r30 to i224
-%r86 = shl i224 %r85, 192
-%r87 = or i224 %r84, %r86
-%r88 = zext i224 %r87 to i256
-%r89 = zext i32 %r34 to i256
-%r90 = shl i256 %r89, 224
-%r91 = or i256 %r88, %r90
-%r92 = zext i256 %r91 to i288
-%r93 = zext i32 %r38 to i288
-%r94 = shl i288 %r93, 256
-%r95 = or i288 %r92, %r94
-%r96 = zext i288 %r95 to i320
-%r97 = zext i32 %r42 to i320
-%r98 = shl i320 %r97, 288
-%r99 = or i320 %r96, %r98
-%r100 = zext i320 %r99 to i352
-%r101 = zext i32 %r46 to i352
-%r102 = shl i352 %r101, 320
-%r103 = or i352 %r100, %r102
-%r104 = zext i352 %r103 to i384
-%r105 = zext i32 %r50 to i384
-%r106 = shl i384 %r105, 352
-%r107 = or i384 %r104, %r106
-%r108 = zext i384 %r107 to i416
-%r109 = zext i32 %r54 to i416
-%r110 = shl i416 %r109, 384
-%r111 = or i416 %r108, %r110
-%r112 = zext i416 %r111 to i448
-%r113 = zext i32 %r58 to i448
-%r114 = shl i448 %r113, 416
-%r115 = or i448 %r112, %r114
-%r116 = zext i448 %r115 to i480
-%r117 = zext i32 %r62 to i480
-%r118 = shl i480 %r117, 448
-%r119 = or i480 %r116, %r118
-%r120 = zext i32 %r7 to i64
-%r121 = zext i32 %r11 to i64
-%r122 = shl i64 %r121, 32
-%r123 = or i64 %r120, %r122
-%r124 = zext i64 %r123 to i96
-%r125 = zext i32 %r15 to i96
-%r126 = shl i96 %r125, 64
-%r127 = or i96 %r124, %r126
-%r128 = zext i96 %r127 to i128
-%r129 = zext i32 %r19 to i128
-%r130 = shl i128 %r129, 96
-%r131 = or i128 %r128, %r130
-%r132 = zext i128 %r131 to i160
-%r133 = zext i32 %r23 to i160
-%r134 = shl i160 %r133, 128
-%r135 = or i160 %r132, %r134
-%r136 = zext i160 %r135 to i192
-%r137 = zext i32 %r27 to i192
-%r138 = shl i192 %r137, 160
-%r139 = or i192 %r136, %r138
-%r140 = zext i192 %r139 to i224
-%r141 = zext i32 %r31 to i224
-%r142 = shl i224 %r141, 192
-%r143 = or i224 %r140, %r142
-%r144 = zext i224 %r143 to i256
-%r145 = zext i32 %r35 to i256
-%r146 = shl i256 %r145, 224
-%r147 = or i256 %r144, %r146
-%r148 = zext i256 %r147 to i288
-%r149 = zext i32 %r39 to i288
-%r150 = shl i288 %r149, 256
-%r151 = or i288 %r148, %r150
-%r152 = zext i288 %r151 to i320
-%r153 = zext i32 %r43 to i320
-%r154 = shl i320 %r153, 288
-%r155 = or i320 %r152, %r154
-%r156 = zext i320 %r155 to i352
-%r157 = zext i32 %r47 to i352
-%r158 = shl i352 %r157, 320
-%r159 = or i352 %r156, %r158
-%r160 = zext i352 %r159 to i384
-%r161 = zext i32 %r51 to i384
-%r162 = shl i384 %r161, 352
-%r163 = or i384 %r160, %r162
-%r164 = zext i384 %r163 to i416
-%r165 = zext i32 %r55 to i416
-%r166 = shl i416 %r165, 384
-%r167 = or i416 %r164, %r166
-%r168 = zext i416 %r167 to i448
-%r169 = zext i32 %r59 to i448
-%r170 = shl i448 %r169, 416
-%r171 = or i448 %r168, %r170
-%r172 = zext i448 %r171 to i480
-%r173 = zext i32 %r63 to i480
-%r174 = shl i480 %r173, 448
-%r175 = or i480 %r172, %r174
-%r176 = zext i480 %r119 to i512
-%r177 = zext i480 %r175 to i512
-%r178 = shl i512 %r177, 32
-%r179 = add i512 %r176, %r178
-ret i512 %r179
-}
-define void @mcl_fp_mulUnitPre15L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i512 @mulPv480x32(i32* %r2, i32 %r3)
-%r5 = trunc i512 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i512 %r4, 32
-%r9 = trunc i512 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i512 %r8, 32
-%r13 = trunc i512 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i512 %r12, 32
-%r17 = trunc i512 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i512 %r16, 32
-%r21 = trunc i512 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i512 %r20, 32
-%r25 = trunc i512 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i512 %r24, 32
-%r29 = trunc i512 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i512 %r28, 32
-%r33 = trunc i512 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
-%r36 = lshr i512 %r32, 32
-%r37 = trunc i512 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 8
-store i32 %r37, i32* %r39
-%r40 = lshr i512 %r36, 32
-%r41 = trunc i512 %r40 to i32
-%r43 = getelementptr i32, i32* %r1, i32 9
-store i32 %r41, i32* %r43
-%r44 = lshr i512 %r40, 32
-%r45 = trunc i512 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 10
-store i32 %r45, i32* %r47
-%r48 = lshr i512 %r44, 32
-%r49 = trunc i512 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 11
-store i32 %r49, i32* %r51
-%r52 = lshr i512 %r48, 32
-%r53 = trunc i512 %r52 to i32
-%r55 = getelementptr i32, i32* %r1, i32 12
-store i32 %r53, i32* %r55
-%r56 = lshr i512 %r52, 32
-%r57 = trunc i512 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 13
-store i32 %r57, i32* %r59
-%r60 = lshr i512 %r56, 32
-%r61 = trunc i512 %r60 to i32
-%r63 = getelementptr i32, i32* %r1, i32 14
-store i32 %r61, i32* %r63
-%r64 = lshr i512 %r60, 32
-%r65 = trunc i512 %r64 to i32
-%r67 = getelementptr i32, i32* %r1, i32 15
-store i32 %r65, i32* %r67
-ret void
-}
-define void @mcl_fpDbl_mulPre15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r4 = load i32, i32* %r3
-%r5 = call i512 @mulPv480x32(i32* %r2, i32 %r4)
-%r6 = trunc i512 %r5 to i32
-store i32 %r6, i32* %r1
-%r7 = lshr i512 %r5, 32
-%r9 = getelementptr i32, i32* %r3, i32 1
-%r10 = load i32, i32* %r9
-%r11 = call i512 @mulPv480x32(i32* %r2, i32 %r10)
-%r12 = add i512 %r7, %r11
-%r13 = trunc i512 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 1
-store i32 %r13, i32* %r15
-%r16 = lshr i512 %r12, 32
-%r18 = getelementptr i32, i32* %r3, i32 2
-%r19 = load i32, i32* %r18
-%r20 = call i512 @mulPv480x32(i32* %r2, i32 %r19)
-%r21 = add i512 %r16, %r20
-%r22 = trunc i512 %r21 to i32
-%r24 = getelementptr i32, i32* %r1, i32 2
-store i32 %r22, i32* %r24
-%r25 = lshr i512 %r21, 32
-%r27 = getelementptr i32, i32* %r3, i32 3
-%r28 = load i32, i32* %r27
-%r29 = call i512 @mulPv480x32(i32* %r2, i32 %r28)
-%r30 = add i512 %r25, %r29
-%r31 = trunc i512 %r30 to i32
-%r33 = getelementptr i32, i32* %r1, i32 3
-store i32 %r31, i32* %r33
-%r34 = lshr i512 %r30, 32
-%r36 = getelementptr i32, i32* %r3, i32 4
-%r37 = load i32, i32* %r36
-%r38 = call i512 @mulPv480x32(i32* %r2, i32 %r37)
-%r39 = add i512 %r34, %r38
-%r40 = trunc i512 %r39 to i32
-%r42 = getelementptr i32, i32* %r1, i32 4
-store i32 %r40, i32* %r42
-%r43 = lshr i512 %r39, 32
-%r45 = getelementptr i32, i32* %r3, i32 5
-%r46 = load i32, i32* %r45
-%r47 = call i512 @mulPv480x32(i32* %r2, i32 %r46)
-%r48 = add i512 %r43, %r47
-%r49 = trunc i512 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 5
-store i32 %r49, i32* %r51
-%r52 = lshr i512 %r48, 32
-%r54 = getelementptr i32, i32* %r3, i32 6
-%r55 = load i32, i32* %r54
-%r56 = call i512 @mulPv480x32(i32* %r2, i32 %r55)
-%r57 = add i512 %r52, %r56
-%r58 = trunc i512 %r57 to i32
-%r60 = getelementptr i32, i32* %r1, i32 6
-store i32 %r58, i32* %r60
-%r61 = lshr i512 %r57, 32
-%r63 = getelementptr i32, i32* %r3, i32 7
-%r64 = load i32, i32* %r63
-%r65 = call i512 @mulPv480x32(i32* %r2, i32 %r64)
-%r66 = add i512 %r61, %r65
-%r67 = trunc i512 %r66 to i32
-%r69 = getelementptr i32, i32* %r1, i32 7
-store i32 %r67, i32* %r69
-%r70 = lshr i512 %r66, 32
-%r72 = getelementptr i32, i32* %r3, i32 8
-%r73 = load i32, i32* %r72
-%r74 = call i512 @mulPv480x32(i32* %r2, i32 %r73)
-%r75 = add i512 %r70, %r74
-%r76 = trunc i512 %r75 to i32
-%r78 = getelementptr i32, i32* %r1, i32 8
-store i32 %r76, i32* %r78
-%r79 = lshr i512 %r75, 32
-%r81 = getelementptr i32, i32* %r3, i32 9
-%r82 = load i32, i32* %r81
-%r83 = call i512 @mulPv480x32(i32* %r2, i32 %r82)
-%r84 = add i512 %r79, %r83
-%r85 = trunc i512 %r84 to i32
-%r87 = getelementptr i32, i32* %r1, i32 9
-store i32 %r85, i32* %r87
-%r88 = lshr i512 %r84, 32
-%r90 = getelementptr i32, i32* %r3, i32 10
-%r91 = load i32, i32* %r90
-%r92 = call i512 @mulPv480x32(i32* %r2, i32 %r91)
-%r93 = add i512 %r88, %r92
-%r94 = trunc i512 %r93 to i32
-%r96 = getelementptr i32, i32* %r1, i32 10
-store i32 %r94, i32* %r96
-%r97 = lshr i512 %r93, 32
-%r99 = getelementptr i32, i32* %r3, i32 11
-%r100 = load i32, i32* %r99
-%r101 = call i512 @mulPv480x32(i32* %r2, i32 %r100)
-%r102 = add i512 %r97, %r101
-%r103 = trunc i512 %r102 to i32
-%r105 = getelementptr i32, i32* %r1, i32 11
-store i32 %r103, i32* %r105
-%r106 = lshr i512 %r102, 32
-%r108 = getelementptr i32, i32* %r3, i32 12
-%r109 = load i32, i32* %r108
-%r110 = call i512 @mulPv480x32(i32* %r2, i32 %r109)
-%r111 = add i512 %r106, %r110
-%r112 = trunc i512 %r111 to i32
-%r114 = getelementptr i32, i32* %r1, i32 12
-store i32 %r112, i32* %r114
-%r115 = lshr i512 %r111, 32
-%r117 = getelementptr i32, i32* %r3, i32 13
-%r118 = load i32, i32* %r117
-%r119 = call i512 @mulPv480x32(i32* %r2, i32 %r118)
-%r120 = add i512 %r115, %r119
-%r121 = trunc i512 %r120 to i32
-%r123 = getelementptr i32, i32* %r1, i32 13
-store i32 %r121, i32* %r123
-%r124 = lshr i512 %r120, 32
-%r126 = getelementptr i32, i32* %r3, i32 14
-%r127 = load i32, i32* %r126
-%r128 = call i512 @mulPv480x32(i32* %r2, i32 %r127)
-%r129 = add i512 %r124, %r128
-%r131 = getelementptr i32, i32* %r1, i32 14
-%r132 = trunc i512 %r129 to i32
-%r134 = getelementptr i32, i32* %r131, i32 0
-store i32 %r132, i32* %r134
-%r135 = lshr i512 %r129, 32
-%r136 = trunc i512 %r135 to i32
-%r138 = getelementptr i32, i32* %r131, i32 1
-store i32 %r136, i32* %r138
-%r139 = lshr i512 %r135, 32
-%r140 = trunc i512 %r139 to i32
-%r142 = getelementptr i32, i32* %r131, i32 2
-store i32 %r140, i32* %r142
-%r143 = lshr i512 %r139, 32
-%r144 = trunc i512 %r143 to i32
-%r146 = getelementptr i32, i32* %r131, i32 3
-store i32 %r144, i32* %r146
-%r147 = lshr i512 %r143, 32
-%r148 = trunc i512 %r147 to i32
-%r150 = getelementptr i32, i32* %r131, i32 4
-store i32 %r148, i32* %r150
-%r151 = lshr i512 %r147, 32
-%r152 = trunc i512 %r151 to i32
-%r154 = getelementptr i32, i32* %r131, i32 5
-store i32 %r152, i32* %r154
-%r155 = lshr i512 %r151, 32
-%r156 = trunc i512 %r155 to i32
-%r158 = getelementptr i32, i32* %r131, i32 6
-store i32 %r156, i32* %r158
-%r159 = lshr i512 %r155, 32
-%r160 = trunc i512 %r159 to i32
-%r162 = getelementptr i32, i32* %r131, i32 7
-store i32 %r160, i32* %r162
-%r163 = lshr i512 %r159, 32
-%r164 = trunc i512 %r163 to i32
-%r166 = getelementptr i32, i32* %r131, i32 8
-store i32 %r164, i32* %r166
-%r167 = lshr i512 %r163, 32
-%r168 = trunc i512 %r167 to i32
-%r170 = getelementptr i32, i32* %r131, i32 9
-store i32 %r168, i32* %r170
-%r171 = lshr i512 %r167, 32
-%r172 = trunc i512 %r171 to i32
-%r174 = getelementptr i32, i32* %r131, i32 10
-store i32 %r172, i32* %r174
-%r175 = lshr i512 %r171, 32
-%r176 = trunc i512 %r175 to i32
-%r178 = getelementptr i32, i32* %r131, i32 11
-store i32 %r176, i32* %r178
-%r179 = lshr i512 %r175, 32
-%r180 = trunc i512 %r179 to i32
-%r182 = getelementptr i32, i32* %r131, i32 12
-store i32 %r180, i32* %r182
-%r183 = lshr i512 %r179, 32
-%r184 = trunc i512 %r183 to i32
-%r186 = getelementptr i32, i32* %r131, i32 13
-store i32 %r184, i32* %r186
-%r187 = lshr i512 %r183, 32
-%r188 = trunc i512 %r187 to i32
-%r190 = getelementptr i32, i32* %r131, i32 14
-store i32 %r188, i32* %r190
-%r191 = lshr i512 %r187, 32
-%r192 = trunc i512 %r191 to i32
-%r194 = getelementptr i32, i32* %r131, i32 15
-store i32 %r192, i32* %r194
-ret void
-}
-define void @mcl_fpDbl_sqrPre15L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = call i512 @mulPv480x32(i32* %r2, i32 %r3)
-%r5 = trunc i512 %r4 to i32
-store i32 %r5, i32* %r1
-%r6 = lshr i512 %r4, 32
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = call i512 @mulPv480x32(i32* %r2, i32 %r9)
-%r11 = add i512 %r6, %r10
-%r12 = trunc i512 %r11 to i32
-%r14 = getelementptr i32, i32* %r1, i32 1
-store i32 %r12, i32* %r14
-%r15 = lshr i512 %r11, 32
-%r17 = getelementptr i32, i32* %r2, i32 2
-%r18 = load i32, i32* %r17
-%r19 = call i512 @mulPv480x32(i32* %r2, i32 %r18)
-%r20 = add i512 %r15, %r19
-%r21 = trunc i512 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 2
-store i32 %r21, i32* %r23
-%r24 = lshr i512 %r20, 32
-%r26 = getelementptr i32, i32* %r2, i32 3
-%r27 = load i32, i32* %r26
-%r28 = call i512 @mulPv480x32(i32* %r2, i32 %r27)
-%r29 = add i512 %r24, %r28
-%r30 = trunc i512 %r29 to i32
-%r32 = getelementptr i32, i32* %r1, i32 3
-store i32 %r30, i32* %r32
-%r33 = lshr i512 %r29, 32
-%r35 = getelementptr i32, i32* %r2, i32 4
-%r36 = load i32, i32* %r35
-%r37 = call i512 @mulPv480x32(i32* %r2, i32 %r36)
-%r38 = add i512 %r33, %r37
-%r39 = trunc i512 %r38 to i32
-%r41 = getelementptr i32, i32* %r1, i32 4
-store i32 %r39, i32* %r41
-%r42 = lshr i512 %r38, 32
-%r44 = getelementptr i32, i32* %r2, i32 5
-%r45 = load i32, i32* %r44
-%r46 = call i512 @mulPv480x32(i32* %r2, i32 %r45)
-%r47 = add i512 %r42, %r46
-%r48 = trunc i512 %r47 to i32
-%r50 = getelementptr i32, i32* %r1, i32 5
-store i32 %r48, i32* %r50
-%r51 = lshr i512 %r47, 32
-%r53 = getelementptr i32, i32* %r2, i32 6
-%r54 = load i32, i32* %r53
-%r55 = call i512 @mulPv480x32(i32* %r2, i32 %r54)
-%r56 = add i512 %r51, %r55
-%r57 = trunc i512 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 6
-store i32 %r57, i32* %r59
-%r60 = lshr i512 %r56, 32
-%r62 = getelementptr i32, i32* %r2, i32 7
-%r63 = load i32, i32* %r62
-%r64 = call i512 @mulPv480x32(i32* %r2, i32 %r63)
-%r65 = add i512 %r60, %r64
-%r66 = trunc i512 %r65 to i32
-%r68 = getelementptr i32, i32* %r1, i32 7
-store i32 %r66, i32* %r68
-%r69 = lshr i512 %r65, 32
-%r71 = getelementptr i32, i32* %r2, i32 8
-%r72 = load i32, i32* %r71
-%r73 = call i512 @mulPv480x32(i32* %r2, i32 %r72)
-%r74 = add i512 %r69, %r73
-%r75 = trunc i512 %r74 to i32
-%r77 = getelementptr i32, i32* %r1, i32 8
-store i32 %r75, i32* %r77
-%r78 = lshr i512 %r74, 32
-%r80 = getelementptr i32, i32* %r2, i32 9
-%r81 = load i32, i32* %r80
-%r82 = call i512 @mulPv480x32(i32* %r2, i32 %r81)
-%r83 = add i512 %r78, %r82
-%r84 = trunc i512 %r83 to i32
-%r86 = getelementptr i32, i32* %r1, i32 9
-store i32 %r84, i32* %r86
-%r87 = lshr i512 %r83, 32
-%r89 = getelementptr i32, i32* %r2, i32 10
-%r90 = load i32, i32* %r89
-%r91 = call i512 @mulPv480x32(i32* %r2, i32 %r90)
-%r92 = add i512 %r87, %r91
-%r93 = trunc i512 %r92 to i32
-%r95 = getelementptr i32, i32* %r1, i32 10
-store i32 %r93, i32* %r95
-%r96 = lshr i512 %r92, 32
-%r98 = getelementptr i32, i32* %r2, i32 11
-%r99 = load i32, i32* %r98
-%r100 = call i512 @mulPv480x32(i32* %r2, i32 %r99)
-%r101 = add i512 %r96, %r100
-%r102 = trunc i512 %r101 to i32
-%r104 = getelementptr i32, i32* %r1, i32 11
-store i32 %r102, i32* %r104
-%r105 = lshr i512 %r101, 32
-%r107 = getelementptr i32, i32* %r2, i32 12
-%r108 = load i32, i32* %r107
-%r109 = call i512 @mulPv480x32(i32* %r2, i32 %r108)
-%r110 = add i512 %r105, %r109
-%r111 = trunc i512 %r110 to i32
-%r113 = getelementptr i32, i32* %r1, i32 12
-store i32 %r111, i32* %r113
-%r114 = lshr i512 %r110, 32
-%r116 = getelementptr i32, i32* %r2, i32 13
-%r117 = load i32, i32* %r116
-%r118 = call i512 @mulPv480x32(i32* %r2, i32 %r117)
-%r119 = add i512 %r114, %r118
-%r120 = trunc i512 %r119 to i32
-%r122 = getelementptr i32, i32* %r1, i32 13
-store i32 %r120, i32* %r122
-%r123 = lshr i512 %r119, 32
-%r125 = getelementptr i32, i32* %r2, i32 14
-%r126 = load i32, i32* %r125
-%r127 = call i512 @mulPv480x32(i32* %r2, i32 %r126)
-%r128 = add i512 %r123, %r127
-%r130 = getelementptr i32, i32* %r1, i32 14
-%r131 = trunc i512 %r128 to i32
-%r133 = getelementptr i32, i32* %r130, i32 0
-store i32 %r131, i32* %r133
-%r134 = lshr i512 %r128, 32
-%r135 = trunc i512 %r134 to i32
-%r137 = getelementptr i32, i32* %r130, i32 1
-store i32 %r135, i32* %r137
-%r138 = lshr i512 %r134, 32
-%r139 = trunc i512 %r138 to i32
-%r141 = getelementptr i32, i32* %r130, i32 2
-store i32 %r139, i32* %r141
-%r142 = lshr i512 %r138, 32
-%r143 = trunc i512 %r142 to i32
-%r145 = getelementptr i32, i32* %r130, i32 3
-store i32 %r143, i32* %r145
-%r146 = lshr i512 %r142, 32
-%r147 = trunc i512 %r146 to i32
-%r149 = getelementptr i32, i32* %r130, i32 4
-store i32 %r147, i32* %r149
-%r150 = lshr i512 %r146, 32
-%r151 = trunc i512 %r150 to i32
-%r153 = getelementptr i32, i32* %r130, i32 5
-store i32 %r151, i32* %r153
-%r154 = lshr i512 %r150, 32
-%r155 = trunc i512 %r154 to i32
-%r157 = getelementptr i32, i32* %r130, i32 6
-store i32 %r155, i32* %r157
-%r158 = lshr i512 %r154, 32
-%r159 = trunc i512 %r158 to i32
-%r161 = getelementptr i32, i32* %r130, i32 7
-store i32 %r159, i32* %r161
-%r162 = lshr i512 %r158, 32
-%r163 = trunc i512 %r162 to i32
-%r165 = getelementptr i32, i32* %r130, i32 8
-store i32 %r163, i32* %r165
-%r166 = lshr i512 %r162, 32
-%r167 = trunc i512 %r166 to i32
-%r169 = getelementptr i32, i32* %r130, i32 9
-store i32 %r167, i32* %r169
-%r170 = lshr i512 %r166, 32
-%r171 = trunc i512 %r170 to i32
-%r173 = getelementptr i32, i32* %r130, i32 10
-store i32 %r171, i32* %r173
-%r174 = lshr i512 %r170, 32
-%r175 = trunc i512 %r174 to i32
-%r177 = getelementptr i32, i32* %r130, i32 11
-store i32 %r175, i32* %r177
-%r178 = lshr i512 %r174, 32
-%r179 = trunc i512 %r178 to i32
-%r181 = getelementptr i32, i32* %r130, i32 12
-store i32 %r179, i32* %r181
-%r182 = lshr i512 %r178, 32
-%r183 = trunc i512 %r182 to i32
-%r185 = getelementptr i32, i32* %r130, i32 13
-store i32 %r183, i32* %r185
-%r186 = lshr i512 %r182, 32
-%r187 = trunc i512 %r186 to i32
-%r189 = getelementptr i32, i32* %r130, i32 14
-store i32 %r187, i32* %r189
-%r190 = lshr i512 %r186, 32
-%r191 = trunc i512 %r190 to i32
-%r193 = getelementptr i32, i32* %r130, i32 15
-store i32 %r191, i32* %r193
-ret void
-}
-define void @mcl_fp_mont15L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i512 @mulPv480x32(i32* %r2, i32 %r10)
-%r12 = zext i512 %r11 to i544
-%r13 = trunc i512 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i512 @mulPv480x32(i32* %r4, i32 %r14)
-%r16 = zext i512 %r15 to i544
-%r17 = add i544 %r12, %r16
-%r18 = lshr i544 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i512 @mulPv480x32(i32* %r2, i32 %r21)
-%r23 = zext i512 %r22 to i544
-%r24 = add i544 %r18, %r23
-%r25 = trunc i544 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i512 @mulPv480x32(i32* %r4, i32 %r26)
-%r28 = zext i512 %r27 to i544
-%r29 = add i544 %r24, %r28
-%r30 = lshr i544 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i512 @mulPv480x32(i32* %r2, i32 %r33)
-%r35 = zext i512 %r34 to i544
-%r36 = add i544 %r30, %r35
-%r37 = trunc i544 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i512 @mulPv480x32(i32* %r4, i32 %r38)
-%r40 = zext i512 %r39 to i544
-%r41 = add i544 %r36, %r40
-%r42 = lshr i544 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i512 @mulPv480x32(i32* %r2, i32 %r45)
-%r47 = zext i512 %r46 to i544
-%r48 = add i544 %r42, %r47
-%r49 = trunc i544 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i512 @mulPv480x32(i32* %r4, i32 %r50)
-%r52 = zext i512 %r51 to i544
-%r53 = add i544 %r48, %r52
-%r54 = lshr i544 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i512 @mulPv480x32(i32* %r2, i32 %r57)
-%r59 = zext i512 %r58 to i544
-%r60 = add i544 %r54, %r59
-%r61 = trunc i544 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i512 @mulPv480x32(i32* %r4, i32 %r62)
-%r64 = zext i512 %r63 to i544
-%r65 = add i544 %r60, %r64
-%r66 = lshr i544 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i512 @mulPv480x32(i32* %r2, i32 %r69)
-%r71 = zext i512 %r70 to i544
-%r72 = add i544 %r66, %r71
-%r73 = trunc i544 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i512 @mulPv480x32(i32* %r4, i32 %r74)
-%r76 = zext i512 %r75 to i544
-%r77 = add i544 %r72, %r76
-%r78 = lshr i544 %r77, 32
-%r80 = getelementptr i32, i32* %r3, i32 6
-%r81 = load i32, i32* %r80
-%r82 = call i512 @mulPv480x32(i32* %r2, i32 %r81)
-%r83 = zext i512 %r82 to i544
-%r84 = add i544 %r78, %r83
-%r85 = trunc i544 %r84 to i32
-%r86 = mul i32 %r85, %r7
-%r87 = call i512 @mulPv480x32(i32* %r4, i32 %r86)
-%r88 = zext i512 %r87 to i544
-%r89 = add i544 %r84, %r88
-%r90 = lshr i544 %r89, 32
-%r92 = getelementptr i32, i32* %r3, i32 7
-%r93 = load i32, i32* %r92
-%r94 = call i512 @mulPv480x32(i32* %r2, i32 %r93)
-%r95 = zext i512 %r94 to i544
-%r96 = add i544 %r90, %r95
-%r97 = trunc i544 %r96 to i32
-%r98 = mul i32 %r97, %r7
-%r99 = call i512 @mulPv480x32(i32* %r4, i32 %r98)
-%r100 = zext i512 %r99 to i544
-%r101 = add i544 %r96, %r100
-%r102 = lshr i544 %r101, 32
-%r104 = getelementptr i32, i32* %r3, i32 8
-%r105 = load i32, i32* %r104
-%r106 = call i512 @mulPv480x32(i32* %r2, i32 %r105)
-%r107 = zext i512 %r106 to i544
-%r108 = add i544 %r102, %r107
-%r109 = trunc i544 %r108 to i32
-%r110 = mul i32 %r109, %r7
-%r111 = call i512 @mulPv480x32(i32* %r4, i32 %r110)
-%r112 = zext i512 %r111 to i544
-%r113 = add i544 %r108, %r112
-%r114 = lshr i544 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 9
-%r117 = load i32, i32* %r116
-%r118 = call i512 @mulPv480x32(i32* %r2, i32 %r117)
-%r119 = zext i512 %r118 to i544
-%r120 = add i544 %r114, %r119
-%r121 = trunc i544 %r120 to i32
-%r122 = mul i32 %r121, %r7
-%r123 = call i512 @mulPv480x32(i32* %r4, i32 %r122)
-%r124 = zext i512 %r123 to i544
-%r125 = add i544 %r120, %r124
-%r126 = lshr i544 %r125, 32
-%r128 = getelementptr i32, i32* %r3, i32 10
-%r129 = load i32, i32* %r128
-%r130 = call i512 @mulPv480x32(i32* %r2, i32 %r129)
-%r131 = zext i512 %r130 to i544
-%r132 = add i544 %r126, %r131
-%r133 = trunc i544 %r132 to i32
-%r134 = mul i32 %r133, %r7
-%r135 = call i512 @mulPv480x32(i32* %r4, i32 %r134)
-%r136 = zext i512 %r135 to i544
-%r137 = add i544 %r132, %r136
-%r138 = lshr i544 %r137, 32
-%r140 = getelementptr i32, i32* %r3, i32 11
-%r141 = load i32, i32* %r140
-%r142 = call i512 @mulPv480x32(i32* %r2, i32 %r141)
-%r143 = zext i512 %r142 to i544
-%r144 = add i544 %r138, %r143
-%r145 = trunc i544 %r144 to i32
-%r146 = mul i32 %r145, %r7
-%r147 = call i512 @mulPv480x32(i32* %r4, i32 %r146)
-%r148 = zext i512 %r147 to i544
-%r149 = add i544 %r144, %r148
-%r150 = lshr i544 %r149, 32
-%r152 = getelementptr i32, i32* %r3, i32 12
-%r153 = load i32, i32* %r152
-%r154 = call i512 @mulPv480x32(i32* %r2, i32 %r153)
-%r155 = zext i512 %r154 to i544
-%r156 = add i544 %r150, %r155
-%r157 = trunc i544 %r156 to i32
-%r158 = mul i32 %r157, %r7
-%r159 = call i512 @mulPv480x32(i32* %r4, i32 %r158)
-%r160 = zext i512 %r159 to i544
-%r161 = add i544 %r156, %r160
-%r162 = lshr i544 %r161, 32
-%r164 = getelementptr i32, i32* %r3, i32 13
-%r165 = load i32, i32* %r164
-%r166 = call i512 @mulPv480x32(i32* %r2, i32 %r165)
-%r167 = zext i512 %r166 to i544
-%r168 = add i544 %r162, %r167
-%r169 = trunc i544 %r168 to i32
-%r170 = mul i32 %r169, %r7
-%r171 = call i512 @mulPv480x32(i32* %r4, i32 %r170)
-%r172 = zext i512 %r171 to i544
-%r173 = add i544 %r168, %r172
-%r174 = lshr i544 %r173, 32
-%r176 = getelementptr i32, i32* %r3, i32 14
-%r177 = load i32, i32* %r176
-%r178 = call i512 @mulPv480x32(i32* %r2, i32 %r177)
-%r179 = zext i512 %r178 to i544
-%r180 = add i544 %r174, %r179
-%r181 = trunc i544 %r180 to i32
-%r182 = mul i32 %r181, %r7
-%r183 = call i512 @mulPv480x32(i32* %r4, i32 %r182)
-%r184 = zext i512 %r183 to i544
-%r185 = add i544 %r180, %r184
-%r186 = lshr i544 %r185, 32
-%r187 = trunc i544 %r186 to i512
-%r188 = load i32, i32* %r4
-%r189 = zext i32 %r188 to i64
-%r191 = getelementptr i32, i32* %r4, i32 1
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i64
-%r194 = shl i64 %r193, 32
-%r195 = or i64 %r189, %r194
-%r196 = zext i64 %r195 to i96
-%r198 = getelementptr i32, i32* %r4, i32 2
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i96
-%r201 = shl i96 %r200, 64
-%r202 = or i96 %r196, %r201
-%r203 = zext i96 %r202 to i128
-%r205 = getelementptr i32, i32* %r4, i32 3
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i128
-%r208 = shl i128 %r207, 96
-%r209 = or i128 %r203, %r208
-%r210 = zext i128 %r209 to i160
-%r212 = getelementptr i32, i32* %r4, i32 4
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i160
-%r215 = shl i160 %r214, 128
-%r216 = or i160 %r210, %r215
-%r217 = zext i160 %r216 to i192
-%r219 = getelementptr i32, i32* %r4, i32 5
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i192
-%r222 = shl i192 %r221, 160
-%r223 = or i192 %r217, %r222
-%r224 = zext i192 %r223 to i224
-%r226 = getelementptr i32, i32* %r4, i32 6
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i224
-%r229 = shl i224 %r228, 192
-%r230 = or i224 %r224, %r229
-%r231 = zext i224 %r230 to i256
-%r233 = getelementptr i32, i32* %r4, i32 7
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i256
-%r236 = shl i256 %r235, 224
-%r237 = or i256 %r231, %r236
-%r238 = zext i256 %r237 to i288
-%r240 = getelementptr i32, i32* %r4, i32 8
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i288
-%r243 = shl i288 %r242, 256
-%r244 = or i288 %r238, %r243
-%r245 = zext i288 %r244 to i320
-%r247 = getelementptr i32, i32* %r4, i32 9
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i320
-%r250 = shl i320 %r249, 288
-%r251 = or i320 %r245, %r250
-%r252 = zext i320 %r251 to i352
-%r254 = getelementptr i32, i32* %r4, i32 10
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i352
-%r257 = shl i352 %r256, 320
-%r258 = or i352 %r252, %r257
-%r259 = zext i352 %r258 to i384
-%r261 = getelementptr i32, i32* %r4, i32 11
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i384
-%r264 = shl i384 %r263, 352
-%r265 = or i384 %r259, %r264
-%r266 = zext i384 %r265 to i416
-%r268 = getelementptr i32, i32* %r4, i32 12
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i416
-%r271 = shl i416 %r270, 384
-%r272 = or i416 %r266, %r271
-%r273 = zext i416 %r272 to i448
-%r275 = getelementptr i32, i32* %r4, i32 13
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i448
-%r278 = shl i448 %r277, 416
-%r279 = or i448 %r273, %r278
-%r280 = zext i448 %r279 to i480
-%r282 = getelementptr i32, i32* %r4, i32 14
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i480
-%r285 = shl i480 %r284, 448
-%r286 = or i480 %r280, %r285
-%r287 = zext i480 %r286 to i512
-%r288 = sub i512 %r187, %r287
-%r289 = lshr i512 %r288, 480
-%r290 = trunc i512 %r289 to i1
-%r291 = select i1 %r290, i512 %r187, i512 %r288
-%r292 = trunc i512 %r291 to i480
-%r293 = trunc i480 %r292 to i32
-%r295 = getelementptr i32, i32* %r1, i32 0
-store i32 %r293, i32* %r295
-%r296 = lshr i480 %r292, 32
-%r297 = trunc i480 %r296 to i32
-%r299 = getelementptr i32, i32* %r1, i32 1
-store i32 %r297, i32* %r299
-%r300 = lshr i480 %r296, 32
-%r301 = trunc i480 %r300 to i32
-%r303 = getelementptr i32, i32* %r1, i32 2
-store i32 %r301, i32* %r303
-%r304 = lshr i480 %r300, 32
-%r305 = trunc i480 %r304 to i32
-%r307 = getelementptr i32, i32* %r1, i32 3
-store i32 %r305, i32* %r307
-%r308 = lshr i480 %r304, 32
-%r309 = trunc i480 %r308 to i32
-%r311 = getelementptr i32, i32* %r1, i32 4
-store i32 %r309, i32* %r311
-%r312 = lshr i480 %r308, 32
-%r313 = trunc i480 %r312 to i32
-%r315 = getelementptr i32, i32* %r1, i32 5
-store i32 %r313, i32* %r315
-%r316 = lshr i480 %r312, 32
-%r317 = trunc i480 %r316 to i32
-%r319 = getelementptr i32, i32* %r1, i32 6
-store i32 %r317, i32* %r319
-%r320 = lshr i480 %r316, 32
-%r321 = trunc i480 %r320 to i32
-%r323 = getelementptr i32, i32* %r1, i32 7
-store i32 %r321, i32* %r323
-%r324 = lshr i480 %r320, 32
-%r325 = trunc i480 %r324 to i32
-%r327 = getelementptr i32, i32* %r1, i32 8
-store i32 %r325, i32* %r327
-%r328 = lshr i480 %r324, 32
-%r329 = trunc i480 %r328 to i32
-%r331 = getelementptr i32, i32* %r1, i32 9
-store i32 %r329, i32* %r331
-%r332 = lshr i480 %r328, 32
-%r333 = trunc i480 %r332 to i32
-%r335 = getelementptr i32, i32* %r1, i32 10
-store i32 %r333, i32* %r335
-%r336 = lshr i480 %r332, 32
-%r337 = trunc i480 %r336 to i32
-%r339 = getelementptr i32, i32* %r1, i32 11
-store i32 %r337, i32* %r339
-%r340 = lshr i480 %r336, 32
-%r341 = trunc i480 %r340 to i32
-%r343 = getelementptr i32, i32* %r1, i32 12
-store i32 %r341, i32* %r343
-%r344 = lshr i480 %r340, 32
-%r345 = trunc i480 %r344 to i32
-%r347 = getelementptr i32, i32* %r1, i32 13
-store i32 %r345, i32* %r347
-%r348 = lshr i480 %r344, 32
-%r349 = trunc i480 %r348 to i32
-%r351 = getelementptr i32, i32* %r1, i32 14
-store i32 %r349, i32* %r351
-ret void
-}
-define void @mcl_fp_montNF15L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i512 @mulPv480x32(i32* %r2, i32 %r8)
-%r10 = trunc i512 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i512 @mulPv480x32(i32* %r4, i32 %r11)
-%r13 = add i512 %r9, %r12
-%r14 = lshr i512 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i512 @mulPv480x32(i32* %r2, i32 %r17)
-%r19 = add i512 %r14, %r18
-%r20 = trunc i512 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i512 @mulPv480x32(i32* %r4, i32 %r21)
-%r23 = add i512 %r19, %r22
-%r24 = lshr i512 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i512 @mulPv480x32(i32* %r2, i32 %r27)
-%r29 = add i512 %r24, %r28
-%r30 = trunc i512 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i512 @mulPv480x32(i32* %r4, i32 %r31)
-%r33 = add i512 %r29, %r32
-%r34 = lshr i512 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i512 @mulPv480x32(i32* %r2, i32 %r37)
-%r39 = add i512 %r34, %r38
-%r40 = trunc i512 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i512 @mulPv480x32(i32* %r4, i32 %r41)
-%r43 = add i512 %r39, %r42
-%r44 = lshr i512 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i512 @mulPv480x32(i32* %r2, i32 %r47)
-%r49 = add i512 %r44, %r48
-%r50 = trunc i512 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i512 @mulPv480x32(i32* %r4, i32 %r51)
-%r53 = add i512 %r49, %r52
-%r54 = lshr i512 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i512 @mulPv480x32(i32* %r2, i32 %r57)
-%r59 = add i512 %r54, %r58
-%r60 = trunc i512 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i512 @mulPv480x32(i32* %r4, i32 %r61)
-%r63 = add i512 %r59, %r62
-%r64 = lshr i512 %r63, 32
-%r66 = getelementptr i32, i32* %r3, i32 6
-%r67 = load i32, i32* %r66
-%r68 = call i512 @mulPv480x32(i32* %r2, i32 %r67)
-%r69 = add i512 %r64, %r68
-%r70 = trunc i512 %r69 to i32
-%r71 = mul i32 %r70, %r7
-%r72 = call i512 @mulPv480x32(i32* %r4, i32 %r71)
-%r73 = add i512 %r69, %r72
-%r74 = lshr i512 %r73, 32
-%r76 = getelementptr i32, i32* %r3, i32 7
-%r77 = load i32, i32* %r76
-%r78 = call i512 @mulPv480x32(i32* %r2, i32 %r77)
-%r79 = add i512 %r74, %r78
-%r80 = trunc i512 %r79 to i32
-%r81 = mul i32 %r80, %r7
-%r82 = call i512 @mulPv480x32(i32* %r4, i32 %r81)
-%r83 = add i512 %r79, %r82
-%r84 = lshr i512 %r83, 32
-%r86 = getelementptr i32, i32* %r3, i32 8
-%r87 = load i32, i32* %r86
-%r88 = call i512 @mulPv480x32(i32* %r2, i32 %r87)
-%r89 = add i512 %r84, %r88
-%r90 = trunc i512 %r89 to i32
-%r91 = mul i32 %r90, %r7
-%r92 = call i512 @mulPv480x32(i32* %r4, i32 %r91)
-%r93 = add i512 %r89, %r92
-%r94 = lshr i512 %r93, 32
-%r96 = getelementptr i32, i32* %r3, i32 9
-%r97 = load i32, i32* %r96
-%r98 = call i512 @mulPv480x32(i32* %r2, i32 %r97)
-%r99 = add i512 %r94, %r98
-%r100 = trunc i512 %r99 to i32
-%r101 = mul i32 %r100, %r7
-%r102 = call i512 @mulPv480x32(i32* %r4, i32 %r101)
-%r103 = add i512 %r99, %r102
-%r104 = lshr i512 %r103, 32
-%r106 = getelementptr i32, i32* %r3, i32 10
-%r107 = load i32, i32* %r106
-%r108 = call i512 @mulPv480x32(i32* %r2, i32 %r107)
-%r109 = add i512 %r104, %r108
-%r110 = trunc i512 %r109 to i32
-%r111 = mul i32 %r110, %r7
-%r112 = call i512 @mulPv480x32(i32* %r4, i32 %r111)
-%r113 = add i512 %r109, %r112
-%r114 = lshr i512 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 11
-%r117 = load i32, i32* %r116
-%r118 = call i512 @mulPv480x32(i32* %r2, i32 %r117)
-%r119 = add i512 %r114, %r118
-%r120 = trunc i512 %r119 to i32
-%r121 = mul i32 %r120, %r7
-%r122 = call i512 @mulPv480x32(i32* %r4, i32 %r121)
-%r123 = add i512 %r119, %r122
-%r124 = lshr i512 %r123, 32
-%r126 = getelementptr i32, i32* %r3, i32 12
-%r127 = load i32, i32* %r126
-%r128 = call i512 @mulPv480x32(i32* %r2, i32 %r127)
-%r129 = add i512 %r124, %r128
-%r130 = trunc i512 %r129 to i32
-%r131 = mul i32 %r130, %r7
-%r132 = call i512 @mulPv480x32(i32* %r4, i32 %r131)
-%r133 = add i512 %r129, %r132
-%r134 = lshr i512 %r133, 32
-%r136 = getelementptr i32, i32* %r3, i32 13
-%r137 = load i32, i32* %r136
-%r138 = call i512 @mulPv480x32(i32* %r2, i32 %r137)
-%r139 = add i512 %r134, %r138
-%r140 = trunc i512 %r139 to i32
-%r141 = mul i32 %r140, %r7
-%r142 = call i512 @mulPv480x32(i32* %r4, i32 %r141)
-%r143 = add i512 %r139, %r142
-%r144 = lshr i512 %r143, 32
-%r146 = getelementptr i32, i32* %r3, i32 14
-%r147 = load i32, i32* %r146
-%r148 = call i512 @mulPv480x32(i32* %r2, i32 %r147)
-%r149 = add i512 %r144, %r148
-%r150 = trunc i512 %r149 to i32
-%r151 = mul i32 %r150, %r7
-%r152 = call i512 @mulPv480x32(i32* %r4, i32 %r151)
-%r153 = add i512 %r149, %r152
-%r154 = lshr i512 %r153, 32
-%r155 = trunc i512 %r154 to i480
-%r156 = load i32, i32* %r4
-%r157 = zext i32 %r156 to i64
-%r159 = getelementptr i32, i32* %r4, i32 1
-%r160 = load i32, i32* %r159
-%r161 = zext i32 %r160 to i64
-%r162 = shl i64 %r161, 32
-%r163 = or i64 %r157, %r162
-%r164 = zext i64 %r163 to i96
-%r166 = getelementptr i32, i32* %r4, i32 2
-%r167 = load i32, i32* %r166
-%r168 = zext i32 %r167 to i96
-%r169 = shl i96 %r168, 64
-%r170 = or i96 %r164, %r169
-%r171 = zext i96 %r170 to i128
-%r173 = getelementptr i32, i32* %r4, i32 3
-%r174 = load i32, i32* %r173
-%r175 = zext i32 %r174 to i128
-%r176 = shl i128 %r175, 96
-%r177 = or i128 %r171, %r176
-%r178 = zext i128 %r177 to i160
-%r180 = getelementptr i32, i32* %r4, i32 4
-%r181 = load i32, i32* %r180
-%r182 = zext i32 %r181 to i160
-%r183 = shl i160 %r182, 128
-%r184 = or i160 %r178, %r183
-%r185 = zext i160 %r184 to i192
-%r187 = getelementptr i32, i32* %r4, i32 5
-%r188 = load i32, i32* %r187
-%r189 = zext i32 %r188 to i192
-%r190 = shl i192 %r189, 160
-%r191 = or i192 %r185, %r190
-%r192 = zext i192 %r191 to i224
-%r194 = getelementptr i32, i32* %r4, i32 6
-%r195 = load i32, i32* %r194
-%r196 = zext i32 %r195 to i224
-%r197 = shl i224 %r196, 192
-%r198 = or i224 %r192, %r197
-%r199 = zext i224 %r198 to i256
-%r201 = getelementptr i32, i32* %r4, i32 7
-%r202 = load i32, i32* %r201
-%r203 = zext i32 %r202 to i256
-%r204 = shl i256 %r203, 224
-%r205 = or i256 %r199, %r204
-%r206 = zext i256 %r205 to i288
-%r208 = getelementptr i32, i32* %r4, i32 8
-%r209 = load i32, i32* %r208
-%r210 = zext i32 %r209 to i288
-%r211 = shl i288 %r210, 256
-%r212 = or i288 %r206, %r211
-%r213 = zext i288 %r212 to i320
-%r215 = getelementptr i32, i32* %r4, i32 9
-%r216 = load i32, i32* %r215
-%r217 = zext i32 %r216 to i320
-%r218 = shl i320 %r217, 288
-%r219 = or i320 %r213, %r218
-%r220 = zext i320 %r219 to i352
-%r222 = getelementptr i32, i32* %r4, i32 10
-%r223 = load i32, i32* %r222
-%r224 = zext i32 %r223 to i352
-%r225 = shl i352 %r224, 320
-%r226 = or i352 %r220, %r225
-%r227 = zext i352 %r226 to i384
-%r229 = getelementptr i32, i32* %r4, i32 11
-%r230 = load i32, i32* %r229
-%r231 = zext i32 %r230 to i384
-%r232 = shl i384 %r231, 352
-%r233 = or i384 %r227, %r232
-%r234 = zext i384 %r233 to i416
-%r236 = getelementptr i32, i32* %r4, i32 12
-%r237 = load i32, i32* %r236
-%r238 = zext i32 %r237 to i416
-%r239 = shl i416 %r238, 384
-%r240 = or i416 %r234, %r239
-%r241 = zext i416 %r240 to i448
-%r243 = getelementptr i32, i32* %r4, i32 13
-%r244 = load i32, i32* %r243
-%r245 = zext i32 %r244 to i448
-%r246 = shl i448 %r245, 416
-%r247 = or i448 %r241, %r246
-%r248 = zext i448 %r247 to i480
-%r250 = getelementptr i32, i32* %r4, i32 14
-%r251 = load i32, i32* %r250
-%r252 = zext i32 %r251 to i480
-%r253 = shl i480 %r252, 448
-%r254 = or i480 %r248, %r253
-%r255 = sub i480 %r155, %r254
-%r256 = lshr i480 %r255, 479
-%r257 = trunc i480 %r256 to i1
-%r258 = select i1 %r257, i480 %r155, i480 %r255
-%r259 = trunc i480 %r258 to i32
-%r261 = getelementptr i32, i32* %r1, i32 0
-store i32 %r259, i32* %r261
-%r262 = lshr i480 %r258, 32
-%r263 = trunc i480 %r262 to i32
-%r265 = getelementptr i32, i32* %r1, i32 1
-store i32 %r263, i32* %r265
-%r266 = lshr i480 %r262, 32
-%r267 = trunc i480 %r266 to i32
-%r269 = getelementptr i32, i32* %r1, i32 2
-store i32 %r267, i32* %r269
-%r270 = lshr i480 %r266, 32
-%r271 = trunc i480 %r270 to i32
-%r273 = getelementptr i32, i32* %r1, i32 3
-store i32 %r271, i32* %r273
-%r274 = lshr i480 %r270, 32
-%r275 = trunc i480 %r274 to i32
-%r277 = getelementptr i32, i32* %r1, i32 4
-store i32 %r275, i32* %r277
-%r278 = lshr i480 %r274, 32
-%r279 = trunc i480 %r278 to i32
-%r281 = getelementptr i32, i32* %r1, i32 5
-store i32 %r279, i32* %r281
-%r282 = lshr i480 %r278, 32
-%r283 = trunc i480 %r282 to i32
-%r285 = getelementptr i32, i32* %r1, i32 6
-store i32 %r283, i32* %r285
-%r286 = lshr i480 %r282, 32
-%r287 = trunc i480 %r286 to i32
-%r289 = getelementptr i32, i32* %r1, i32 7
-store i32 %r287, i32* %r289
-%r290 = lshr i480 %r286, 32
-%r291 = trunc i480 %r290 to i32
-%r293 = getelementptr i32, i32* %r1, i32 8
-store i32 %r291, i32* %r293
-%r294 = lshr i480 %r290, 32
-%r295 = trunc i480 %r294 to i32
-%r297 = getelementptr i32, i32* %r1, i32 9
-store i32 %r295, i32* %r297
-%r298 = lshr i480 %r294, 32
-%r299 = trunc i480 %r298 to i32
-%r301 = getelementptr i32, i32* %r1, i32 10
-store i32 %r299, i32* %r301
-%r302 = lshr i480 %r298, 32
-%r303 = trunc i480 %r302 to i32
-%r305 = getelementptr i32, i32* %r1, i32 11
-store i32 %r303, i32* %r305
-%r306 = lshr i480 %r302, 32
-%r307 = trunc i480 %r306 to i32
-%r309 = getelementptr i32, i32* %r1, i32 12
-store i32 %r307, i32* %r309
-%r310 = lshr i480 %r306, 32
-%r311 = trunc i480 %r310 to i32
-%r313 = getelementptr i32, i32* %r1, i32 13
-store i32 %r311, i32* %r313
-%r314 = lshr i480 %r310, 32
-%r315 = trunc i480 %r314 to i32
-%r317 = getelementptr i32, i32* %r1, i32 14
-store i32 %r315, i32* %r317
-ret void
-}
-define void @mcl_fp_montRed15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i224
-%r45 = getelementptr i32, i32* %r3, i32 6
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i224
-%r48 = shl i224 %r47, 192
-%r49 = or i224 %r43, %r48
-%r50 = zext i224 %r49 to i256
-%r52 = getelementptr i32, i32* %r3, i32 7
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i256
-%r55 = shl i256 %r54, 224
-%r56 = or i256 %r50, %r55
-%r57 = zext i256 %r56 to i288
-%r59 = getelementptr i32, i32* %r3, i32 8
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i288
-%r62 = shl i288 %r61, 256
-%r63 = or i288 %r57, %r62
-%r64 = zext i288 %r63 to i320
-%r66 = getelementptr i32, i32* %r3, i32 9
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i320
-%r69 = shl i320 %r68, 288
-%r70 = or i320 %r64, %r69
-%r71 = zext i320 %r70 to i352
-%r73 = getelementptr i32, i32* %r3, i32 10
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i352
-%r76 = shl i352 %r75, 320
-%r77 = or i352 %r71, %r76
-%r78 = zext i352 %r77 to i384
-%r80 = getelementptr i32, i32* %r3, i32 11
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i384
-%r83 = shl i384 %r82, 352
-%r84 = or i384 %r78, %r83
-%r85 = zext i384 %r84 to i416
-%r87 = getelementptr i32, i32* %r3, i32 12
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i416
-%r90 = shl i416 %r89, 384
-%r91 = or i416 %r85, %r90
-%r92 = zext i416 %r91 to i448
-%r94 = getelementptr i32, i32* %r3, i32 13
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i448
-%r97 = shl i448 %r96, 416
-%r98 = or i448 %r92, %r97
-%r99 = zext i448 %r98 to i480
-%r101 = getelementptr i32, i32* %r3, i32 14
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i480
-%r104 = shl i480 %r103, 448
-%r105 = or i480 %r99, %r104
-%r106 = load i32, i32* %r2
-%r107 = zext i32 %r106 to i64
-%r109 = getelementptr i32, i32* %r2, i32 1
-%r110 = load i32, i32* %r109
-%r111 = zext i32 %r110 to i64
-%r112 = shl i64 %r111, 32
-%r113 = or i64 %r107, %r112
-%r114 = zext i64 %r113 to i96
-%r116 = getelementptr i32, i32* %r2, i32 2
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i96
-%r119 = shl i96 %r118, 64
-%r120 = or i96 %r114, %r119
-%r121 = zext i96 %r120 to i128
-%r123 = getelementptr i32, i32* %r2, i32 3
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i128
-%r126 = shl i128 %r125, 96
-%r127 = or i128 %r121, %r126
-%r128 = zext i128 %r127 to i160
-%r130 = getelementptr i32, i32* %r2, i32 4
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i160
-%r133 = shl i160 %r132, 128
-%r134 = or i160 %r128, %r133
-%r135 = zext i160 %r134 to i192
-%r137 = getelementptr i32, i32* %r2, i32 5
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i192
-%r140 = shl i192 %r139, 160
-%r141 = or i192 %r135, %r140
-%r142 = zext i192 %r141 to i224
-%r144 = getelementptr i32, i32* %r2, i32 6
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i224
-%r147 = shl i224 %r146, 192
-%r148 = or i224 %r142, %r147
-%r149 = zext i224 %r148 to i256
-%r151 = getelementptr i32, i32* %r2, i32 7
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i256
-%r154 = shl i256 %r153, 224
-%r155 = or i256 %r149, %r154
-%r156 = zext i256 %r155 to i288
-%r158 = getelementptr i32, i32* %r2, i32 8
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i288
-%r161 = shl i288 %r160, 256
-%r162 = or i288 %r156, %r161
-%r163 = zext i288 %r162 to i320
-%r165 = getelementptr i32, i32* %r2, i32 9
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i320
-%r168 = shl i320 %r167, 288
-%r169 = or i320 %r163, %r168
-%r170 = zext i320 %r169 to i352
-%r172 = getelementptr i32, i32* %r2, i32 10
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i352
-%r175 = shl i352 %r174, 320
-%r176 = or i352 %r170, %r175
-%r177 = zext i352 %r176 to i384
-%r179 = getelementptr i32, i32* %r2, i32 11
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i384
-%r182 = shl i384 %r181, 352
-%r183 = or i384 %r177, %r182
-%r184 = zext i384 %r183 to i416
-%r186 = getelementptr i32, i32* %r2, i32 12
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i416
-%r189 = shl i416 %r188, 384
-%r190 = or i416 %r184, %r189
-%r191 = zext i416 %r190 to i448
-%r193 = getelementptr i32, i32* %r2, i32 13
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i448
-%r196 = shl i448 %r195, 416
-%r197 = or i448 %r191, %r196
-%r198 = zext i448 %r197 to i480
-%r200 = getelementptr i32, i32* %r2, i32 14
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i480
-%r203 = shl i480 %r202, 448
-%r204 = or i480 %r198, %r203
-%r205 = zext i480 %r204 to i512
-%r207 = getelementptr i32, i32* %r2, i32 15
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i512
-%r210 = shl i512 %r209, 480
-%r211 = or i512 %r205, %r210
-%r212 = zext i512 %r211 to i544
-%r214 = getelementptr i32, i32* %r2, i32 16
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i544
-%r217 = shl i544 %r216, 512
-%r218 = or i544 %r212, %r217
-%r219 = zext i544 %r218 to i576
-%r221 = getelementptr i32, i32* %r2, i32 17
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i576
-%r224 = shl i576 %r223, 544
-%r225 = or i576 %r219, %r224
-%r226 = zext i576 %r225 to i608
-%r228 = getelementptr i32, i32* %r2, i32 18
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i608
-%r231 = shl i608 %r230, 576
-%r232 = or i608 %r226, %r231
-%r233 = zext i608 %r232 to i640
-%r235 = getelementptr i32, i32* %r2, i32 19
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i640
-%r238 = shl i640 %r237, 608
-%r239 = or i640 %r233, %r238
-%r240 = zext i640 %r239 to i672
-%r242 = getelementptr i32, i32* %r2, i32 20
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i672
-%r245 = shl i672 %r244, 640
-%r246 = or i672 %r240, %r245
-%r247 = zext i672 %r246 to i704
-%r249 = getelementptr i32, i32* %r2, i32 21
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i704
-%r252 = shl i704 %r251, 672
-%r253 = or i704 %r247, %r252
-%r254 = zext i704 %r253 to i736
-%r256 = getelementptr i32, i32* %r2, i32 22
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i736
-%r259 = shl i736 %r258, 704
-%r260 = or i736 %r254, %r259
-%r261 = zext i736 %r260 to i768
-%r263 = getelementptr i32, i32* %r2, i32 23
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i768
-%r266 = shl i768 %r265, 736
-%r267 = or i768 %r261, %r266
-%r268 = zext i768 %r267 to i800
-%r270 = getelementptr i32, i32* %r2, i32 24
-%r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i800
-%r273 = shl i800 %r272, 768
-%r274 = or i800 %r268, %r273
-%r275 = zext i800 %r274 to i832
-%r277 = getelementptr i32, i32* %r2, i32 25
-%r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i832
-%r280 = shl i832 %r279, 800
-%r281 = or i832 %r275, %r280
-%r282 = zext i832 %r281 to i864
-%r284 = getelementptr i32, i32* %r2, i32 26
-%r285 = load i32, i32* %r284
-%r286 = zext i32 %r285 to i864
-%r287 = shl i864 %r286, 832
-%r288 = or i864 %r282, %r287
-%r289 = zext i864 %r288 to i896
-%r291 = getelementptr i32, i32* %r2, i32 27
-%r292 = load i32, i32* %r291
-%r293 = zext i32 %r292 to i896
-%r294 = shl i896 %r293, 864
-%r295 = or i896 %r289, %r294
-%r296 = zext i896 %r295 to i928
-%r298 = getelementptr i32, i32* %r2, i32 28
-%r299 = load i32, i32* %r298
-%r300 = zext i32 %r299 to i928
-%r301 = shl i928 %r300, 896
-%r302 = or i928 %r296, %r301
-%r303 = zext i928 %r302 to i960
-%r305 = getelementptr i32, i32* %r2, i32 29
-%r306 = load i32, i32* %r305
-%r307 = zext i32 %r306 to i960
-%r308 = shl i960 %r307, 928
-%r309 = or i960 %r303, %r308
-%r310 = zext i960 %r309 to i992
-%r311 = trunc i992 %r310 to i32
-%r312 = mul i32 %r311, %r6
-%r313 = call i512 @mulPv480x32(i32* %r3, i32 %r312)
-%r314 = zext i512 %r313 to i992
-%r315 = add i992 %r310, %r314
-%r316 = lshr i992 %r315, 32
-%r317 = trunc i992 %r316 to i960
-%r318 = trunc i960 %r317 to i32
-%r319 = mul i32 %r318, %r6
-%r320 = call i512 @mulPv480x32(i32* %r3, i32 %r319)
-%r321 = zext i512 %r320 to i960
-%r322 = add i960 %r317, %r321
-%r323 = lshr i960 %r322, 32
-%r324 = trunc i960 %r323 to i928
-%r325 = trunc i928 %r324 to i32
-%r326 = mul i32 %r325, %r6
-%r327 = call i512 @mulPv480x32(i32* %r3, i32 %r326)
-%r328 = zext i512 %r327 to i928
-%r329 = add i928 %r324, %r328
-%r330 = lshr i928 %r329, 32
-%r331 = trunc i928 %r330 to i896
-%r332 = trunc i896 %r331 to i32
-%r333 = mul i32 %r332, %r6
-%r334 = call i512 @mulPv480x32(i32* %r3, i32 %r333)
-%r335 = zext i512 %r334 to i896
-%r336 = add i896 %r331, %r335
-%r337 = lshr i896 %r336, 32
-%r338 = trunc i896 %r337 to i864
-%r339 = trunc i864 %r338 to i32
-%r340 = mul i32 %r339, %r6
-%r341 = call i512 @mulPv480x32(i32* %r3, i32 %r340)
-%r342 = zext i512 %r341 to i864
-%r343 = add i864 %r338, %r342
-%r344 = lshr i864 %r343, 32
-%r345 = trunc i864 %r344 to i832
-%r346 = trunc i832 %r345 to i32
-%r347 = mul i32 %r346, %r6
-%r348 = call i512 @mulPv480x32(i32* %r3, i32 %r347)
-%r349 = zext i512 %r348 to i832
-%r350 = add i832 %r345, %r349
-%r351 = lshr i832 %r350, 32
-%r352 = trunc i832 %r351 to i800
-%r353 = trunc i800 %r352 to i32
-%r354 = mul i32 %r353, %r6
-%r355 = call i512 @mulPv480x32(i32* %r3, i32 %r354)
-%r356 = zext i512 %r355 to i800
-%r357 = add i800 %r352, %r356
-%r358 = lshr i800 %r357, 32
-%r359 = trunc i800 %r358 to i768
-%r360 = trunc i768 %r359 to i32
-%r361 = mul i32 %r360, %r6
-%r362 = call i512 @mulPv480x32(i32* %r3, i32 %r361)
-%r363 = zext i512 %r362 to i768
-%r364 = add i768 %r359, %r363
-%r365 = lshr i768 %r364, 32
-%r366 = trunc i768 %r365 to i736
-%r367 = trunc i736 %r366 to i32
-%r368 = mul i32 %r367, %r6
-%r369 = call i512 @mulPv480x32(i32* %r3, i32 %r368)
-%r370 = zext i512 %r369 to i736
-%r371 = add i736 %r366, %r370
-%r372 = lshr i736 %r371, 32
-%r373 = trunc i736 %r372 to i704
-%r374 = trunc i704 %r373 to i32
-%r375 = mul i32 %r374, %r6
-%r376 = call i512 @mulPv480x32(i32* %r3, i32 %r375)
-%r377 = zext i512 %r376 to i704
-%r378 = add i704 %r373, %r377
-%r379 = lshr i704 %r378, 32
-%r380 = trunc i704 %r379 to i672
-%r381 = trunc i672 %r380 to i32
-%r382 = mul i32 %r381, %r6
-%r383 = call i512 @mulPv480x32(i32* %r3, i32 %r382)
-%r384 = zext i512 %r383 to i672
-%r385 = add i672 %r380, %r384
-%r386 = lshr i672 %r385, 32
-%r387 = trunc i672 %r386 to i640
-%r388 = trunc i640 %r387 to i32
-%r389 = mul i32 %r388, %r6
-%r390 = call i512 @mulPv480x32(i32* %r3, i32 %r389)
-%r391 = zext i512 %r390 to i640
-%r392 = add i640 %r387, %r391
-%r393 = lshr i640 %r392, 32
-%r394 = trunc i640 %r393 to i608
-%r395 = trunc i608 %r394 to i32
-%r396 = mul i32 %r395, %r6
-%r397 = call i512 @mulPv480x32(i32* %r3, i32 %r396)
-%r398 = zext i512 %r397 to i608
-%r399 = add i608 %r394, %r398
-%r400 = lshr i608 %r399, 32
-%r401 = trunc i608 %r400 to i576
-%r402 = trunc i576 %r401 to i32
-%r403 = mul i32 %r402, %r6
-%r404 = call i512 @mulPv480x32(i32* %r3, i32 %r403)
-%r405 = zext i512 %r404 to i576
-%r406 = add i576 %r401, %r405
-%r407 = lshr i576 %r406, 32
-%r408 = trunc i576 %r407 to i544
-%r409 = trunc i544 %r408 to i32
-%r410 = mul i32 %r409, %r6
-%r411 = call i512 @mulPv480x32(i32* %r3, i32 %r410)
-%r412 = zext i512 %r411 to i544
-%r413 = add i544 %r408, %r412
-%r414 = lshr i544 %r413, 32
-%r415 = trunc i544 %r414 to i512
-%r416 = zext i480 %r105 to i512
-%r417 = sub i512 %r415, %r416
-%r418 = lshr i512 %r417, 480
-%r419 = trunc i512 %r418 to i1
-%r420 = select i1 %r419, i512 %r415, i512 %r417
-%r421 = trunc i512 %r420 to i480
-%r422 = trunc i480 %r421 to i32
-%r424 = getelementptr i32, i32* %r1, i32 0
-store i32 %r422, i32* %r424
-%r425 = lshr i480 %r421, 32
-%r426 = trunc i480 %r425 to i32
-%r428 = getelementptr i32, i32* %r1, i32 1
-store i32 %r426, i32* %r428
-%r429 = lshr i480 %r425, 32
-%r430 = trunc i480 %r429 to i32
-%r432 = getelementptr i32, i32* %r1, i32 2
-store i32 %r430, i32* %r432
-%r433 = lshr i480 %r429, 32
-%r434 = trunc i480 %r433 to i32
-%r436 = getelementptr i32, i32* %r1, i32 3
-store i32 %r434, i32* %r436
-%r437 = lshr i480 %r433, 32
-%r438 = trunc i480 %r437 to i32
-%r440 = getelementptr i32, i32* %r1, i32 4
-store i32 %r438, i32* %r440
-%r441 = lshr i480 %r437, 32
-%r442 = trunc i480 %r441 to i32
-%r444 = getelementptr i32, i32* %r1, i32 5
-store i32 %r442, i32* %r444
-%r445 = lshr i480 %r441, 32
-%r446 = trunc i480 %r445 to i32
-%r448 = getelementptr i32, i32* %r1, i32 6
-store i32 %r446, i32* %r448
-%r449 = lshr i480 %r445, 32
-%r450 = trunc i480 %r449 to i32
-%r452 = getelementptr i32, i32* %r1, i32 7
-store i32 %r450, i32* %r452
-%r453 = lshr i480 %r449, 32
-%r454 = trunc i480 %r453 to i32
-%r456 = getelementptr i32, i32* %r1, i32 8
-store i32 %r454, i32* %r456
-%r457 = lshr i480 %r453, 32
-%r458 = trunc i480 %r457 to i32
-%r460 = getelementptr i32, i32* %r1, i32 9
-store i32 %r458, i32* %r460
-%r461 = lshr i480 %r457, 32
-%r462 = trunc i480 %r461 to i32
-%r464 = getelementptr i32, i32* %r1, i32 10
-store i32 %r462, i32* %r464
-%r465 = lshr i480 %r461, 32
-%r466 = trunc i480 %r465 to i32
-%r468 = getelementptr i32, i32* %r1, i32 11
-store i32 %r466, i32* %r468
-%r469 = lshr i480 %r465, 32
-%r470 = trunc i480 %r469 to i32
-%r472 = getelementptr i32, i32* %r1, i32 12
-store i32 %r470, i32* %r472
-%r473 = lshr i480 %r469, 32
-%r474 = trunc i480 %r473 to i32
-%r476 = getelementptr i32, i32* %r1, i32 13
-store i32 %r474, i32* %r476
-%r477 = lshr i480 %r473, 32
-%r478 = trunc i480 %r477 to i32
-%r480 = getelementptr i32, i32* %r1, i32 14
-store i32 %r478, i32* %r480
-ret void
-}
-define i32 @mcl_fp_addPre15L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r3, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r3, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r3, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r3, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r105 = load i32, i32* %r4
-%r106 = zext i32 %r105 to i64
-%r108 = getelementptr i32, i32* %r4, i32 1
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i64
-%r111 = shl i64 %r110, 32
-%r112 = or i64 %r106, %r111
-%r113 = zext i64 %r112 to i96
-%r115 = getelementptr i32, i32* %r4, i32 2
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i96
-%r118 = shl i96 %r117, 64
-%r119 = or i96 %r113, %r118
-%r120 = zext i96 %r119 to i128
-%r122 = getelementptr i32, i32* %r4, i32 3
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i128
-%r125 = shl i128 %r124, 96
-%r126 = or i128 %r120, %r125
-%r127 = zext i128 %r126 to i160
-%r129 = getelementptr i32, i32* %r4, i32 4
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i160
-%r132 = shl i160 %r131, 128
-%r133 = or i160 %r127, %r132
-%r134 = zext i160 %r133 to i192
-%r136 = getelementptr i32, i32* %r4, i32 5
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i192
-%r139 = shl i192 %r138, 160
-%r140 = or i192 %r134, %r139
-%r141 = zext i192 %r140 to i224
-%r143 = getelementptr i32, i32* %r4, i32 6
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i224
-%r146 = shl i224 %r145, 192
-%r147 = or i224 %r141, %r146
-%r148 = zext i224 %r147 to i256
-%r150 = getelementptr i32, i32* %r4, i32 7
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i256
-%r153 = shl i256 %r152, 224
-%r154 = or i256 %r148, %r153
-%r155 = zext i256 %r154 to i288
-%r157 = getelementptr i32, i32* %r4, i32 8
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i288
-%r160 = shl i288 %r159, 256
-%r161 = or i288 %r155, %r160
-%r162 = zext i288 %r161 to i320
-%r164 = getelementptr i32, i32* %r4, i32 9
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i320
-%r167 = shl i320 %r166, 288
-%r168 = or i320 %r162, %r167
-%r169 = zext i320 %r168 to i352
-%r171 = getelementptr i32, i32* %r4, i32 10
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i352
-%r174 = shl i352 %r173, 320
-%r175 = or i352 %r169, %r174
-%r176 = zext i352 %r175 to i384
-%r178 = getelementptr i32, i32* %r4, i32 11
-%r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i384
-%r181 = shl i384 %r180, 352
-%r182 = or i384 %r176, %r181
-%r183 = zext i384 %r182 to i416
-%r185 = getelementptr i32, i32* %r4, i32 12
-%r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i416
-%r188 = shl i416 %r187, 384
-%r189 = or i416 %r183, %r188
-%r190 = zext i416 %r189 to i448
-%r192 = getelementptr i32, i32* %r4, i32 13
-%r193 = load i32, i32* %r192
-%r194 = zext i32 %r193 to i448
-%r195 = shl i448 %r194, 416
-%r196 = or i448 %r190, %r195
-%r197 = zext i448 %r196 to i480
-%r199 = getelementptr i32, i32* %r4, i32 14
-%r200 = load i32, i32* %r199
-%r201 = zext i32 %r200 to i480
-%r202 = shl i480 %r201, 448
-%r203 = or i480 %r197, %r202
-%r204 = zext i480 %r203 to i512
-%r205 = add i512 %r104, %r204
-%r206 = trunc i512 %r205 to i480
-%r207 = trunc i480 %r206 to i32
-%r209 = getelementptr i32, i32* %r2, i32 0
-store i32 %r207, i32* %r209
-%r210 = lshr i480 %r206, 32
-%r211 = trunc i480 %r210 to i32
-%r213 = getelementptr i32, i32* %r2, i32 1
-store i32 %r211, i32* %r213
-%r214 = lshr i480 %r210, 32
-%r215 = trunc i480 %r214 to i32
-%r217 = getelementptr i32, i32* %r2, i32 2
-store i32 %r215, i32* %r217
-%r218 = lshr i480 %r214, 32
-%r219 = trunc i480 %r218 to i32
-%r221 = getelementptr i32, i32* %r2, i32 3
-store i32 %r219, i32* %r221
-%r222 = lshr i480 %r218, 32
-%r223 = trunc i480 %r222 to i32
-%r225 = getelementptr i32, i32* %r2, i32 4
-store i32 %r223, i32* %r225
-%r226 = lshr i480 %r222, 32
-%r227 = trunc i480 %r226 to i32
-%r229 = getelementptr i32, i32* %r2, i32 5
-store i32 %r227, i32* %r229
-%r230 = lshr i480 %r226, 32
-%r231 = trunc i480 %r230 to i32
-%r233 = getelementptr i32, i32* %r2, i32 6
-store i32 %r231, i32* %r233
-%r234 = lshr i480 %r230, 32
-%r235 = trunc i480 %r234 to i32
-%r237 = getelementptr i32, i32* %r2, i32 7
-store i32 %r235, i32* %r237
-%r238 = lshr i480 %r234, 32
-%r239 = trunc i480 %r238 to i32
-%r241 = getelementptr i32, i32* %r2, i32 8
-store i32 %r239, i32* %r241
-%r242 = lshr i480 %r238, 32
-%r243 = trunc i480 %r242 to i32
-%r245 = getelementptr i32, i32* %r2, i32 9
-store i32 %r243, i32* %r245
-%r246 = lshr i480 %r242, 32
-%r247 = trunc i480 %r246 to i32
-%r249 = getelementptr i32, i32* %r2, i32 10
-store i32 %r247, i32* %r249
-%r250 = lshr i480 %r246, 32
-%r251 = trunc i480 %r250 to i32
-%r253 = getelementptr i32, i32* %r2, i32 11
-store i32 %r251, i32* %r253
-%r254 = lshr i480 %r250, 32
-%r255 = trunc i480 %r254 to i32
-%r257 = getelementptr i32, i32* %r2, i32 12
-store i32 %r255, i32* %r257
-%r258 = lshr i480 %r254, 32
-%r259 = trunc i480 %r258 to i32
-%r261 = getelementptr i32, i32* %r2, i32 13
-store i32 %r259, i32* %r261
-%r262 = lshr i480 %r258, 32
-%r263 = trunc i480 %r262 to i32
-%r265 = getelementptr i32, i32* %r2, i32 14
-store i32 %r263, i32* %r265
-%r266 = lshr i512 %r205, 480
-%r267 = trunc i512 %r266 to i32
-ret i32 %r267
-}
-define i32 @mcl_fp_subPre15L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r3, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r3, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r3, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r3, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r105 = load i32, i32* %r4
-%r106 = zext i32 %r105 to i64
-%r108 = getelementptr i32, i32* %r4, i32 1
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i64
-%r111 = shl i64 %r110, 32
-%r112 = or i64 %r106, %r111
-%r113 = zext i64 %r112 to i96
-%r115 = getelementptr i32, i32* %r4, i32 2
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i96
-%r118 = shl i96 %r117, 64
-%r119 = or i96 %r113, %r118
-%r120 = zext i96 %r119 to i128
-%r122 = getelementptr i32, i32* %r4, i32 3
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i128
-%r125 = shl i128 %r124, 96
-%r126 = or i128 %r120, %r125
-%r127 = zext i128 %r126 to i160
-%r129 = getelementptr i32, i32* %r4, i32 4
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i160
-%r132 = shl i160 %r131, 128
-%r133 = or i160 %r127, %r132
-%r134 = zext i160 %r133 to i192
-%r136 = getelementptr i32, i32* %r4, i32 5
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i192
-%r139 = shl i192 %r138, 160
-%r140 = or i192 %r134, %r139
-%r141 = zext i192 %r140 to i224
-%r143 = getelementptr i32, i32* %r4, i32 6
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i224
-%r146 = shl i224 %r145, 192
-%r147 = or i224 %r141, %r146
-%r148 = zext i224 %r147 to i256
-%r150 = getelementptr i32, i32* %r4, i32 7
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i256
-%r153 = shl i256 %r152, 224
-%r154 = or i256 %r148, %r153
-%r155 = zext i256 %r154 to i288
-%r157 = getelementptr i32, i32* %r4, i32 8
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i288
-%r160 = shl i288 %r159, 256
-%r161 = or i288 %r155, %r160
-%r162 = zext i288 %r161 to i320
-%r164 = getelementptr i32, i32* %r4, i32 9
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i320
-%r167 = shl i320 %r166, 288
-%r168 = or i320 %r162, %r167
-%r169 = zext i320 %r168 to i352
-%r171 = getelementptr i32, i32* %r4, i32 10
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i352
-%r174 = shl i352 %r173, 320
-%r175 = or i352 %r169, %r174
-%r176 = zext i352 %r175 to i384
-%r178 = getelementptr i32, i32* %r4, i32 11
-%r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i384
-%r181 = shl i384 %r180, 352
-%r182 = or i384 %r176, %r181
-%r183 = zext i384 %r182 to i416
-%r185 = getelementptr i32, i32* %r4, i32 12
-%r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i416
-%r188 = shl i416 %r187, 384
-%r189 = or i416 %r183, %r188
-%r190 = zext i416 %r189 to i448
-%r192 = getelementptr i32, i32* %r4, i32 13
-%r193 = load i32, i32* %r192
-%r194 = zext i32 %r193 to i448
-%r195 = shl i448 %r194, 416
-%r196 = or i448 %r190, %r195
-%r197 = zext i448 %r196 to i480
-%r199 = getelementptr i32, i32* %r4, i32 14
-%r200 = load i32, i32* %r199
-%r201 = zext i32 %r200 to i480
-%r202 = shl i480 %r201, 448
-%r203 = or i480 %r197, %r202
-%r204 = zext i480 %r203 to i512
-%r205 = sub i512 %r104, %r204
-%r206 = trunc i512 %r205 to i480
-%r207 = trunc i480 %r206 to i32
-%r209 = getelementptr i32, i32* %r2, i32 0
-store i32 %r207, i32* %r209
-%r210 = lshr i480 %r206, 32
-%r211 = trunc i480 %r210 to i32
-%r213 = getelementptr i32, i32* %r2, i32 1
-store i32 %r211, i32* %r213
-%r214 = lshr i480 %r210, 32
-%r215 = trunc i480 %r214 to i32
-%r217 = getelementptr i32, i32* %r2, i32 2
-store i32 %r215, i32* %r217
-%r218 = lshr i480 %r214, 32
-%r219 = trunc i480 %r218 to i32
-%r221 = getelementptr i32, i32* %r2, i32 3
-store i32 %r219, i32* %r221
-%r222 = lshr i480 %r218, 32
-%r223 = trunc i480 %r222 to i32
-%r225 = getelementptr i32, i32* %r2, i32 4
-store i32 %r223, i32* %r225
-%r226 = lshr i480 %r222, 32
-%r227 = trunc i480 %r226 to i32
-%r229 = getelementptr i32, i32* %r2, i32 5
-store i32 %r227, i32* %r229
-%r230 = lshr i480 %r226, 32
-%r231 = trunc i480 %r230 to i32
-%r233 = getelementptr i32, i32* %r2, i32 6
-store i32 %r231, i32* %r233
-%r234 = lshr i480 %r230, 32
-%r235 = trunc i480 %r234 to i32
-%r237 = getelementptr i32, i32* %r2, i32 7
-store i32 %r235, i32* %r237
-%r238 = lshr i480 %r234, 32
-%r239 = trunc i480 %r238 to i32
-%r241 = getelementptr i32, i32* %r2, i32 8
-store i32 %r239, i32* %r241
-%r242 = lshr i480 %r238, 32
-%r243 = trunc i480 %r242 to i32
-%r245 = getelementptr i32, i32* %r2, i32 9
-store i32 %r243, i32* %r245
-%r246 = lshr i480 %r242, 32
-%r247 = trunc i480 %r246 to i32
-%r249 = getelementptr i32, i32* %r2, i32 10
-store i32 %r247, i32* %r249
-%r250 = lshr i480 %r246, 32
-%r251 = trunc i480 %r250 to i32
-%r253 = getelementptr i32, i32* %r2, i32 11
-store i32 %r251, i32* %r253
-%r254 = lshr i480 %r250, 32
-%r255 = trunc i480 %r254 to i32
-%r257 = getelementptr i32, i32* %r2, i32 12
-store i32 %r255, i32* %r257
-%r258 = lshr i480 %r254, 32
-%r259 = trunc i480 %r258 to i32
-%r261 = getelementptr i32, i32* %r2, i32 13
-store i32 %r259, i32* %r261
-%r262 = lshr i480 %r258, 32
-%r263 = trunc i480 %r262 to i32
-%r265 = getelementptr i32, i32* %r2, i32 14
-store i32 %r263, i32* %r265
-%r266 = lshr i512 %r205, 480
-%r267 = trunc i512 %r266 to i32
-%r269 = and i32 %r267, 1
-ret i32 %r269
-}
-define void @mcl_fp_shr1_15L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = zext i160 %r31 to i192
-%r34 = getelementptr i32, i32* %r2, i32 5
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i192
-%r37 = shl i192 %r36, 160
-%r38 = or i192 %r32, %r37
-%r39 = zext i192 %r38 to i224
-%r41 = getelementptr i32, i32* %r2, i32 6
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i224
-%r44 = shl i224 %r43, 192
-%r45 = or i224 %r39, %r44
-%r46 = zext i224 %r45 to i256
-%r48 = getelementptr i32, i32* %r2, i32 7
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i256
-%r51 = shl i256 %r50, 224
-%r52 = or i256 %r46, %r51
-%r53 = zext i256 %r52 to i288
-%r55 = getelementptr i32, i32* %r2, i32 8
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i288
-%r58 = shl i288 %r57, 256
-%r59 = or i288 %r53, %r58
-%r60 = zext i288 %r59 to i320
-%r62 = getelementptr i32, i32* %r2, i32 9
-%r63 = load i32, i32* %r62
-%r64 = zext i32 %r63 to i320
-%r65 = shl i320 %r64, 288
-%r66 = or i320 %r60, %r65
-%r67 = zext i320 %r66 to i352
-%r69 = getelementptr i32, i32* %r2, i32 10
-%r70 = load i32, i32* %r69
-%r71 = zext i32 %r70 to i352
-%r72 = shl i352 %r71, 320
-%r73 = or i352 %r67, %r72
-%r74 = zext i352 %r73 to i384
-%r76 = getelementptr i32, i32* %r2, i32 11
-%r77 = load i32, i32* %r76
-%r78 = zext i32 %r77 to i384
-%r79 = shl i384 %r78, 352
-%r80 = or i384 %r74, %r79
-%r81 = zext i384 %r80 to i416
-%r83 = getelementptr i32, i32* %r2, i32 12
-%r84 = load i32, i32* %r83
-%r85 = zext i32 %r84 to i416
-%r86 = shl i416 %r85, 384
-%r87 = or i416 %r81, %r86
-%r88 = zext i416 %r87 to i448
-%r90 = getelementptr i32, i32* %r2, i32 13
-%r91 = load i32, i32* %r90
-%r92 = zext i32 %r91 to i448
-%r93 = shl i448 %r92, 416
-%r94 = or i448 %r88, %r93
-%r95 = zext i448 %r94 to i480
-%r97 = getelementptr i32, i32* %r2, i32 14
-%r98 = load i32, i32* %r97
-%r99 = zext i32 %r98 to i480
-%r100 = shl i480 %r99, 448
-%r101 = or i480 %r95, %r100
-%r102 = lshr i480 %r101, 1
-%r103 = trunc i480 %r102 to i32
-%r105 = getelementptr i32, i32* %r1, i32 0
-store i32 %r103, i32* %r105
-%r106 = lshr i480 %r102, 32
-%r107 = trunc i480 %r106 to i32
-%r109 = getelementptr i32, i32* %r1, i32 1
-store i32 %r107, i32* %r109
-%r110 = lshr i480 %r106, 32
-%r111 = trunc i480 %r110 to i32
-%r113 = getelementptr i32, i32* %r1, i32 2
-store i32 %r111, i32* %r113
-%r114 = lshr i480 %r110, 32
-%r115 = trunc i480 %r114 to i32
-%r117 = getelementptr i32, i32* %r1, i32 3
-store i32 %r115, i32* %r117
-%r118 = lshr i480 %r114, 32
-%r119 = trunc i480 %r118 to i32
-%r121 = getelementptr i32, i32* %r1, i32 4
-store i32 %r119, i32* %r121
-%r122 = lshr i480 %r118, 32
-%r123 = trunc i480 %r122 to i32
-%r125 = getelementptr i32, i32* %r1, i32 5
-store i32 %r123, i32* %r125
-%r126 = lshr i480 %r122, 32
-%r127 = trunc i480 %r126 to i32
-%r129 = getelementptr i32, i32* %r1, i32 6
-store i32 %r127, i32* %r129
-%r130 = lshr i480 %r126, 32
-%r131 = trunc i480 %r130 to i32
-%r133 = getelementptr i32, i32* %r1, i32 7
-store i32 %r131, i32* %r133
-%r134 = lshr i480 %r130, 32
-%r135 = trunc i480 %r134 to i32
-%r137 = getelementptr i32, i32* %r1, i32 8
-store i32 %r135, i32* %r137
-%r138 = lshr i480 %r134, 32
-%r139 = trunc i480 %r138 to i32
-%r141 = getelementptr i32, i32* %r1, i32 9
-store i32 %r139, i32* %r141
-%r142 = lshr i480 %r138, 32
-%r143 = trunc i480 %r142 to i32
-%r145 = getelementptr i32, i32* %r1, i32 10
-store i32 %r143, i32* %r145
-%r146 = lshr i480 %r142, 32
-%r147 = trunc i480 %r146 to i32
-%r149 = getelementptr i32, i32* %r1, i32 11
-store i32 %r147, i32* %r149
-%r150 = lshr i480 %r146, 32
-%r151 = trunc i480 %r150 to i32
-%r153 = getelementptr i32, i32* %r1, i32 12
-store i32 %r151, i32* %r153
-%r154 = lshr i480 %r150, 32
-%r155 = trunc i480 %r154 to i32
-%r157 = getelementptr i32, i32* %r1, i32 13
-store i32 %r155, i32* %r157
-%r158 = lshr i480 %r154, 32
-%r159 = trunc i480 %r158 to i32
-%r161 = getelementptr i32, i32* %r1, i32 14
-store i32 %r159, i32* %r161
-ret void
-}
-define void @mcl_fp_add15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = load i32, i32* %r3
-%r105 = zext i32 %r104 to i64
-%r107 = getelementptr i32, i32* %r3, i32 1
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i64
-%r110 = shl i64 %r109, 32
-%r111 = or i64 %r105, %r110
-%r112 = zext i64 %r111 to i96
-%r114 = getelementptr i32, i32* %r3, i32 2
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i96
-%r117 = shl i96 %r116, 64
-%r118 = or i96 %r112, %r117
-%r119 = zext i96 %r118 to i128
-%r121 = getelementptr i32, i32* %r3, i32 3
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i128
-%r124 = shl i128 %r123, 96
-%r125 = or i128 %r119, %r124
-%r126 = zext i128 %r125 to i160
-%r128 = getelementptr i32, i32* %r3, i32 4
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i160
-%r131 = shl i160 %r130, 128
-%r132 = or i160 %r126, %r131
-%r133 = zext i160 %r132 to i192
-%r135 = getelementptr i32, i32* %r3, i32 5
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i192
-%r138 = shl i192 %r137, 160
-%r139 = or i192 %r133, %r138
-%r140 = zext i192 %r139 to i224
-%r142 = getelementptr i32, i32* %r3, i32 6
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i224
-%r145 = shl i224 %r144, 192
-%r146 = or i224 %r140, %r145
-%r147 = zext i224 %r146 to i256
-%r149 = getelementptr i32, i32* %r3, i32 7
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i256
-%r152 = shl i256 %r151, 224
-%r153 = or i256 %r147, %r152
-%r154 = zext i256 %r153 to i288
-%r156 = getelementptr i32, i32* %r3, i32 8
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i288
-%r159 = shl i288 %r158, 256
-%r160 = or i288 %r154, %r159
-%r161 = zext i288 %r160 to i320
-%r163 = getelementptr i32, i32* %r3, i32 9
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i320
-%r166 = shl i320 %r165, 288
-%r167 = or i320 %r161, %r166
-%r168 = zext i320 %r167 to i352
-%r170 = getelementptr i32, i32* %r3, i32 10
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i352
-%r173 = shl i352 %r172, 320
-%r174 = or i352 %r168, %r173
-%r175 = zext i352 %r174 to i384
-%r177 = getelementptr i32, i32* %r3, i32 11
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i384
-%r180 = shl i384 %r179, 352
-%r181 = or i384 %r175, %r180
-%r182 = zext i384 %r181 to i416
-%r184 = getelementptr i32, i32* %r3, i32 12
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i416
-%r187 = shl i416 %r186, 384
-%r188 = or i416 %r182, %r187
-%r189 = zext i416 %r188 to i448
-%r191 = getelementptr i32, i32* %r3, i32 13
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i448
-%r194 = shl i448 %r193, 416
-%r195 = or i448 %r189, %r194
-%r196 = zext i448 %r195 to i480
-%r198 = getelementptr i32, i32* %r3, i32 14
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i480
-%r201 = shl i480 %r200, 448
-%r202 = or i480 %r196, %r201
-%r203 = zext i480 %r103 to i512
-%r204 = zext i480 %r202 to i512
-%r205 = add i512 %r203, %r204
-%r206 = trunc i512 %r205 to i480
-%r207 = trunc i480 %r206 to i32
-%r209 = getelementptr i32, i32* %r1, i32 0
-store i32 %r207, i32* %r209
-%r210 = lshr i480 %r206, 32
-%r211 = trunc i480 %r210 to i32
-%r213 = getelementptr i32, i32* %r1, i32 1
-store i32 %r211, i32* %r213
-%r214 = lshr i480 %r210, 32
-%r215 = trunc i480 %r214 to i32
-%r217 = getelementptr i32, i32* %r1, i32 2
-store i32 %r215, i32* %r217
-%r218 = lshr i480 %r214, 32
-%r219 = trunc i480 %r218 to i32
-%r221 = getelementptr i32, i32* %r1, i32 3
-store i32 %r219, i32* %r221
-%r222 = lshr i480 %r218, 32
-%r223 = trunc i480 %r222 to i32
-%r225 = getelementptr i32, i32* %r1, i32 4
-store i32 %r223, i32* %r225
-%r226 = lshr i480 %r222, 32
-%r227 = trunc i480 %r226 to i32
-%r229 = getelementptr i32, i32* %r1, i32 5
-store i32 %r227, i32* %r229
-%r230 = lshr i480 %r226, 32
-%r231 = trunc i480 %r230 to i32
-%r233 = getelementptr i32, i32* %r1, i32 6
-store i32 %r231, i32* %r233
-%r234 = lshr i480 %r230, 32
-%r235 = trunc i480 %r234 to i32
-%r237 = getelementptr i32, i32* %r1, i32 7
-store i32 %r235, i32* %r237
-%r238 = lshr i480 %r234, 32
-%r239 = trunc i480 %r238 to i32
-%r241 = getelementptr i32, i32* %r1, i32 8
-store i32 %r239, i32* %r241
-%r242 = lshr i480 %r238, 32
-%r243 = trunc i480 %r242 to i32
-%r245 = getelementptr i32, i32* %r1, i32 9
-store i32 %r243, i32* %r245
-%r246 = lshr i480 %r242, 32
-%r247 = trunc i480 %r246 to i32
-%r249 = getelementptr i32, i32* %r1, i32 10
-store i32 %r247, i32* %r249
-%r250 = lshr i480 %r246, 32
-%r251 = trunc i480 %r250 to i32
-%r253 = getelementptr i32, i32* %r1, i32 11
-store i32 %r251, i32* %r253
-%r254 = lshr i480 %r250, 32
-%r255 = trunc i480 %r254 to i32
-%r257 = getelementptr i32, i32* %r1, i32 12
-store i32 %r255, i32* %r257
-%r258 = lshr i480 %r254, 32
-%r259 = trunc i480 %r258 to i32
-%r261 = getelementptr i32, i32* %r1, i32 13
-store i32 %r259, i32* %r261
-%r262 = lshr i480 %r258, 32
-%r263 = trunc i480 %r262 to i32
-%r265 = getelementptr i32, i32* %r1, i32 14
-store i32 %r263, i32* %r265
-%r266 = load i32, i32* %r4
-%r267 = zext i32 %r266 to i64
-%r269 = getelementptr i32, i32* %r4, i32 1
-%r270 = load i32, i32* %r269
-%r271 = zext i32 %r270 to i64
-%r272 = shl i64 %r271, 32
-%r273 = or i64 %r267, %r272
-%r274 = zext i64 %r273 to i96
-%r276 = getelementptr i32, i32* %r4, i32 2
-%r277 = load i32, i32* %r276
-%r278 = zext i32 %r277 to i96
-%r279 = shl i96 %r278, 64
-%r280 = or i96 %r274, %r279
-%r281 = zext i96 %r280 to i128
-%r283 = getelementptr i32, i32* %r4, i32 3
-%r284 = load i32, i32* %r283
-%r285 = zext i32 %r284 to i128
-%r286 = shl i128 %r285, 96
-%r287 = or i128 %r281, %r286
-%r288 = zext i128 %r287 to i160
-%r290 = getelementptr i32, i32* %r4, i32 4
-%r291 = load i32, i32* %r290
-%r292 = zext i32 %r291 to i160
-%r293 = shl i160 %r292, 128
-%r294 = or i160 %r288, %r293
-%r295 = zext i160 %r294 to i192
-%r297 = getelementptr i32, i32* %r4, i32 5
-%r298 = load i32, i32* %r297
-%r299 = zext i32 %r298 to i192
-%r300 = shl i192 %r299, 160
-%r301 = or i192 %r295, %r300
-%r302 = zext i192 %r301 to i224
-%r304 = getelementptr i32, i32* %r4, i32 6
-%r305 = load i32, i32* %r304
-%r306 = zext i32 %r305 to i224
-%r307 = shl i224 %r306, 192
-%r308 = or i224 %r302, %r307
-%r309 = zext i224 %r308 to i256
-%r311 = getelementptr i32, i32* %r4, i32 7
-%r312 = load i32, i32* %r311
-%r313 = zext i32 %r312 to i256
-%r314 = shl i256 %r313, 224
-%r315 = or i256 %r309, %r314
-%r316 = zext i256 %r315 to i288
-%r318 = getelementptr i32, i32* %r4, i32 8
-%r319 = load i32, i32* %r318
-%r320 = zext i32 %r319 to i288
-%r321 = shl i288 %r320, 256
-%r322 = or i288 %r316, %r321
-%r323 = zext i288 %r322 to i320
-%r325 = getelementptr i32, i32* %r4, i32 9
-%r326 = load i32, i32* %r325
-%r327 = zext i32 %r326 to i320
-%r328 = shl i320 %r327, 288
-%r329 = or i320 %r323, %r328
-%r330 = zext i320 %r329 to i352
-%r332 = getelementptr i32, i32* %r4, i32 10
-%r333 = load i32, i32* %r332
-%r334 = zext i32 %r333 to i352
-%r335 = shl i352 %r334, 320
-%r336 = or i352 %r330, %r335
-%r337 = zext i352 %r336 to i384
-%r339 = getelementptr i32, i32* %r4, i32 11
-%r340 = load i32, i32* %r339
-%r341 = zext i32 %r340 to i384
-%r342 = shl i384 %r341, 352
-%r343 = or i384 %r337, %r342
-%r344 = zext i384 %r343 to i416
-%r346 = getelementptr i32, i32* %r4, i32 12
-%r347 = load i32, i32* %r346
-%r348 = zext i32 %r347 to i416
-%r349 = shl i416 %r348, 384
-%r350 = or i416 %r344, %r349
-%r351 = zext i416 %r350 to i448
-%r353 = getelementptr i32, i32* %r4, i32 13
-%r354 = load i32, i32* %r353
-%r355 = zext i32 %r354 to i448
-%r356 = shl i448 %r355, 416
-%r357 = or i448 %r351, %r356
-%r358 = zext i448 %r357 to i480
-%r360 = getelementptr i32, i32* %r4, i32 14
-%r361 = load i32, i32* %r360
-%r362 = zext i32 %r361 to i480
-%r363 = shl i480 %r362, 448
-%r364 = or i480 %r358, %r363
-%r365 = zext i480 %r364 to i512
-%r366 = sub i512 %r205, %r365
-%r367 = lshr i512 %r366, 480
-%r368 = trunc i512 %r367 to i1
-br i1%r368, label %carry, label %nocarry
-nocarry:
-%r369 = trunc i512 %r366 to i480
-%r370 = trunc i480 %r369 to i32
-%r372 = getelementptr i32, i32* %r1, i32 0
-store i32 %r370, i32* %r372
-%r373 = lshr i480 %r369, 32
-%r374 = trunc i480 %r373 to i32
-%r376 = getelementptr i32, i32* %r1, i32 1
-store i32 %r374, i32* %r376
-%r377 = lshr i480 %r373, 32
-%r378 = trunc i480 %r377 to i32
-%r380 = getelementptr i32, i32* %r1, i32 2
-store i32 %r378, i32* %r380
-%r381 = lshr i480 %r377, 32
-%r382 = trunc i480 %r381 to i32
-%r384 = getelementptr i32, i32* %r1, i32 3
-store i32 %r382, i32* %r384
-%r385 = lshr i480 %r381, 32
-%r386 = trunc i480 %r385 to i32
-%r388 = getelementptr i32, i32* %r1, i32 4
-store i32 %r386, i32* %r388
-%r389 = lshr i480 %r385, 32
-%r390 = trunc i480 %r389 to i32
-%r392 = getelementptr i32, i32* %r1, i32 5
-store i32 %r390, i32* %r392
-%r393 = lshr i480 %r389, 32
-%r394 = trunc i480 %r393 to i32
-%r396 = getelementptr i32, i32* %r1, i32 6
-store i32 %r394, i32* %r396
-%r397 = lshr i480 %r393, 32
-%r398 = trunc i480 %r397 to i32
-%r400 = getelementptr i32, i32* %r1, i32 7
-store i32 %r398, i32* %r400
-%r401 = lshr i480 %r397, 32
-%r402 = trunc i480 %r401 to i32
-%r404 = getelementptr i32, i32* %r1, i32 8
-store i32 %r402, i32* %r404
-%r405 = lshr i480 %r401, 32
-%r406 = trunc i480 %r405 to i32
-%r408 = getelementptr i32, i32* %r1, i32 9
-store i32 %r406, i32* %r408
-%r409 = lshr i480 %r405, 32
-%r410 = trunc i480 %r409 to i32
-%r412 = getelementptr i32, i32* %r1, i32 10
-store i32 %r410, i32* %r412
-%r413 = lshr i480 %r409, 32
-%r414 = trunc i480 %r413 to i32
-%r416 = getelementptr i32, i32* %r1, i32 11
-store i32 %r414, i32* %r416
-%r417 = lshr i480 %r413, 32
-%r418 = trunc i480 %r417 to i32
-%r420 = getelementptr i32, i32* %r1, i32 12
-store i32 %r418, i32* %r420
-%r421 = lshr i480 %r417, 32
-%r422 = trunc i480 %r421 to i32
-%r424 = getelementptr i32, i32* %r1, i32 13
-store i32 %r422, i32* %r424
-%r425 = lshr i480 %r421, 32
-%r426 = trunc i480 %r425 to i32
-%r428 = getelementptr i32, i32* %r1, i32 14
-store i32 %r426, i32* %r428
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = load i32, i32* %r3
-%r105 = zext i32 %r104 to i64
-%r107 = getelementptr i32, i32* %r3, i32 1
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i64
-%r110 = shl i64 %r109, 32
-%r111 = or i64 %r105, %r110
-%r112 = zext i64 %r111 to i96
-%r114 = getelementptr i32, i32* %r3, i32 2
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i96
-%r117 = shl i96 %r116, 64
-%r118 = or i96 %r112, %r117
-%r119 = zext i96 %r118 to i128
-%r121 = getelementptr i32, i32* %r3, i32 3
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i128
-%r124 = shl i128 %r123, 96
-%r125 = or i128 %r119, %r124
-%r126 = zext i128 %r125 to i160
-%r128 = getelementptr i32, i32* %r3, i32 4
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i160
-%r131 = shl i160 %r130, 128
-%r132 = or i160 %r126, %r131
-%r133 = zext i160 %r132 to i192
-%r135 = getelementptr i32, i32* %r3, i32 5
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i192
-%r138 = shl i192 %r137, 160
-%r139 = or i192 %r133, %r138
-%r140 = zext i192 %r139 to i224
-%r142 = getelementptr i32, i32* %r3, i32 6
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i224
-%r145 = shl i224 %r144, 192
-%r146 = or i224 %r140, %r145
-%r147 = zext i224 %r146 to i256
-%r149 = getelementptr i32, i32* %r3, i32 7
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i256
-%r152 = shl i256 %r151, 224
-%r153 = or i256 %r147, %r152
-%r154 = zext i256 %r153 to i288
-%r156 = getelementptr i32, i32* %r3, i32 8
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i288
-%r159 = shl i288 %r158, 256
-%r160 = or i288 %r154, %r159
-%r161 = zext i288 %r160 to i320
-%r163 = getelementptr i32, i32* %r3, i32 9
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i320
-%r166 = shl i320 %r165, 288
-%r167 = or i320 %r161, %r166
-%r168 = zext i320 %r167 to i352
-%r170 = getelementptr i32, i32* %r3, i32 10
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i352
-%r173 = shl i352 %r172, 320
-%r174 = or i352 %r168, %r173
-%r175 = zext i352 %r174 to i384
-%r177 = getelementptr i32, i32* %r3, i32 11
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i384
-%r180 = shl i384 %r179, 352
-%r181 = or i384 %r175, %r180
-%r182 = zext i384 %r181 to i416
-%r184 = getelementptr i32, i32* %r3, i32 12
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i416
-%r187 = shl i416 %r186, 384
-%r188 = or i416 %r182, %r187
-%r189 = zext i416 %r188 to i448
-%r191 = getelementptr i32, i32* %r3, i32 13
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i448
-%r194 = shl i448 %r193, 416
-%r195 = or i448 %r189, %r194
-%r196 = zext i448 %r195 to i480
-%r198 = getelementptr i32, i32* %r3, i32 14
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i480
-%r201 = shl i480 %r200, 448
-%r202 = or i480 %r196, %r201
-%r203 = add i480 %r103, %r202
-%r204 = load i32, i32* %r4
-%r205 = zext i32 %r204 to i64
-%r207 = getelementptr i32, i32* %r4, i32 1
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i64
-%r210 = shl i64 %r209, 32
-%r211 = or i64 %r205, %r210
-%r212 = zext i64 %r211 to i96
-%r214 = getelementptr i32, i32* %r4, i32 2
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i96
-%r217 = shl i96 %r216, 64
-%r218 = or i96 %r212, %r217
-%r219 = zext i96 %r218 to i128
-%r221 = getelementptr i32, i32* %r4, i32 3
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i128
-%r224 = shl i128 %r223, 96
-%r225 = or i128 %r219, %r224
-%r226 = zext i128 %r225 to i160
-%r228 = getelementptr i32, i32* %r4, i32 4
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i160
-%r231 = shl i160 %r230, 128
-%r232 = or i160 %r226, %r231
-%r233 = zext i160 %r232 to i192
-%r235 = getelementptr i32, i32* %r4, i32 5
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i192
-%r238 = shl i192 %r237, 160
-%r239 = or i192 %r233, %r238
-%r240 = zext i192 %r239 to i224
-%r242 = getelementptr i32, i32* %r4, i32 6
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i224
-%r245 = shl i224 %r244, 192
-%r246 = or i224 %r240, %r245
-%r247 = zext i224 %r246 to i256
-%r249 = getelementptr i32, i32* %r4, i32 7
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i256
-%r252 = shl i256 %r251, 224
-%r253 = or i256 %r247, %r252
-%r254 = zext i256 %r253 to i288
-%r256 = getelementptr i32, i32* %r4, i32 8
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i288
-%r259 = shl i288 %r258, 256
-%r260 = or i288 %r254, %r259
-%r261 = zext i288 %r260 to i320
-%r263 = getelementptr i32, i32* %r4, i32 9
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i320
-%r266 = shl i320 %r265, 288
-%r267 = or i320 %r261, %r266
-%r268 = zext i320 %r267 to i352
-%r270 = getelementptr i32, i32* %r4, i32 10
-%r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i352
-%r273 = shl i352 %r272, 320
-%r274 = or i352 %r268, %r273
-%r275 = zext i352 %r274 to i384
-%r277 = getelementptr i32, i32* %r4, i32 11
-%r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i384
-%r280 = shl i384 %r279, 352
-%r281 = or i384 %r275, %r280
-%r282 = zext i384 %r281 to i416
-%r284 = getelementptr i32, i32* %r4, i32 12
-%r285 = load i32, i32* %r284
-%r286 = zext i32 %r285 to i416
-%r287 = shl i416 %r286, 384
-%r288 = or i416 %r282, %r287
-%r289 = zext i416 %r288 to i448
-%r291 = getelementptr i32, i32* %r4, i32 13
-%r292 = load i32, i32* %r291
-%r293 = zext i32 %r292 to i448
-%r294 = shl i448 %r293, 416
-%r295 = or i448 %r289, %r294
-%r296 = zext i448 %r295 to i480
-%r298 = getelementptr i32, i32* %r4, i32 14
-%r299 = load i32, i32* %r298
-%r300 = zext i32 %r299 to i480
-%r301 = shl i480 %r300, 448
-%r302 = or i480 %r296, %r301
-%r303 = sub i480 %r203, %r302
-%r304 = lshr i480 %r303, 479
-%r305 = trunc i480 %r304 to i1
-%r306 = select i1 %r305, i480 %r203, i480 %r303
-%r307 = trunc i480 %r306 to i32
-%r309 = getelementptr i32, i32* %r1, i32 0
-store i32 %r307, i32* %r309
-%r310 = lshr i480 %r306, 32
-%r311 = trunc i480 %r310 to i32
-%r313 = getelementptr i32, i32* %r1, i32 1
-store i32 %r311, i32* %r313
-%r314 = lshr i480 %r310, 32
-%r315 = trunc i480 %r314 to i32
-%r317 = getelementptr i32, i32* %r1, i32 2
-store i32 %r315, i32* %r317
-%r318 = lshr i480 %r314, 32
-%r319 = trunc i480 %r318 to i32
-%r321 = getelementptr i32, i32* %r1, i32 3
-store i32 %r319, i32* %r321
-%r322 = lshr i480 %r318, 32
-%r323 = trunc i480 %r322 to i32
-%r325 = getelementptr i32, i32* %r1, i32 4
-store i32 %r323, i32* %r325
-%r326 = lshr i480 %r322, 32
-%r327 = trunc i480 %r326 to i32
-%r329 = getelementptr i32, i32* %r1, i32 5
-store i32 %r327, i32* %r329
-%r330 = lshr i480 %r326, 32
-%r331 = trunc i480 %r330 to i32
-%r333 = getelementptr i32, i32* %r1, i32 6
-store i32 %r331, i32* %r333
-%r334 = lshr i480 %r330, 32
-%r335 = trunc i480 %r334 to i32
-%r337 = getelementptr i32, i32* %r1, i32 7
-store i32 %r335, i32* %r337
-%r338 = lshr i480 %r334, 32
-%r339 = trunc i480 %r338 to i32
-%r341 = getelementptr i32, i32* %r1, i32 8
-store i32 %r339, i32* %r341
-%r342 = lshr i480 %r338, 32
-%r343 = trunc i480 %r342 to i32
-%r345 = getelementptr i32, i32* %r1, i32 9
-store i32 %r343, i32* %r345
-%r346 = lshr i480 %r342, 32
-%r347 = trunc i480 %r346 to i32
-%r349 = getelementptr i32, i32* %r1, i32 10
-store i32 %r347, i32* %r349
-%r350 = lshr i480 %r346, 32
-%r351 = trunc i480 %r350 to i32
-%r353 = getelementptr i32, i32* %r1, i32 11
-store i32 %r351, i32* %r353
-%r354 = lshr i480 %r350, 32
-%r355 = trunc i480 %r354 to i32
-%r357 = getelementptr i32, i32* %r1, i32 12
-store i32 %r355, i32* %r357
-%r358 = lshr i480 %r354, 32
-%r359 = trunc i480 %r358 to i32
-%r361 = getelementptr i32, i32* %r1, i32 13
-store i32 %r359, i32* %r361
-%r362 = lshr i480 %r358, 32
-%r363 = trunc i480 %r362 to i32
-%r365 = getelementptr i32, i32* %r1, i32 14
-store i32 %r363, i32* %r365
-ret void
-}
-define void @mcl_fp_sub15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = load i32, i32* %r3
-%r105 = zext i32 %r104 to i64
-%r107 = getelementptr i32, i32* %r3, i32 1
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i64
-%r110 = shl i64 %r109, 32
-%r111 = or i64 %r105, %r110
-%r112 = zext i64 %r111 to i96
-%r114 = getelementptr i32, i32* %r3, i32 2
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i96
-%r117 = shl i96 %r116, 64
-%r118 = or i96 %r112, %r117
-%r119 = zext i96 %r118 to i128
-%r121 = getelementptr i32, i32* %r3, i32 3
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i128
-%r124 = shl i128 %r123, 96
-%r125 = or i128 %r119, %r124
-%r126 = zext i128 %r125 to i160
-%r128 = getelementptr i32, i32* %r3, i32 4
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i160
-%r131 = shl i160 %r130, 128
-%r132 = or i160 %r126, %r131
-%r133 = zext i160 %r132 to i192
-%r135 = getelementptr i32, i32* %r3, i32 5
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i192
-%r138 = shl i192 %r137, 160
-%r139 = or i192 %r133, %r138
-%r140 = zext i192 %r139 to i224
-%r142 = getelementptr i32, i32* %r3, i32 6
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i224
-%r145 = shl i224 %r144, 192
-%r146 = or i224 %r140, %r145
-%r147 = zext i224 %r146 to i256
-%r149 = getelementptr i32, i32* %r3, i32 7
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i256
-%r152 = shl i256 %r151, 224
-%r153 = or i256 %r147, %r152
-%r154 = zext i256 %r153 to i288
-%r156 = getelementptr i32, i32* %r3, i32 8
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i288
-%r159 = shl i288 %r158, 256
-%r160 = or i288 %r154, %r159
-%r161 = zext i288 %r160 to i320
-%r163 = getelementptr i32, i32* %r3, i32 9
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i320
-%r166 = shl i320 %r165, 288
-%r167 = or i320 %r161, %r166
-%r168 = zext i320 %r167 to i352
-%r170 = getelementptr i32, i32* %r3, i32 10
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i352
-%r173 = shl i352 %r172, 320
-%r174 = or i352 %r168, %r173
-%r175 = zext i352 %r174 to i384
-%r177 = getelementptr i32, i32* %r3, i32 11
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i384
-%r180 = shl i384 %r179, 352
-%r181 = or i384 %r175, %r180
-%r182 = zext i384 %r181 to i416
-%r184 = getelementptr i32, i32* %r3, i32 12
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i416
-%r187 = shl i416 %r186, 384
-%r188 = or i416 %r182, %r187
-%r189 = zext i416 %r188 to i448
-%r191 = getelementptr i32, i32* %r3, i32 13
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i448
-%r194 = shl i448 %r193, 416
-%r195 = or i448 %r189, %r194
-%r196 = zext i448 %r195 to i480
-%r198 = getelementptr i32, i32* %r3, i32 14
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i480
-%r201 = shl i480 %r200, 448
-%r202 = or i480 %r196, %r201
-%r203 = zext i480 %r103 to i512
-%r204 = zext i480 %r202 to i512
-%r205 = sub i512 %r203, %r204
-%r206 = trunc i512 %r205 to i480
-%r207 = lshr i512 %r205, 480
-%r208 = trunc i512 %r207 to i1
-%r209 = trunc i480 %r206 to i32
-%r211 = getelementptr i32, i32* %r1, i32 0
-store i32 %r209, i32* %r211
-%r212 = lshr i480 %r206, 32
-%r213 = trunc i480 %r212 to i32
-%r215 = getelementptr i32, i32* %r1, i32 1
-store i32 %r213, i32* %r215
-%r216 = lshr i480 %r212, 32
-%r217 = trunc i480 %r216 to i32
-%r219 = getelementptr i32, i32* %r1, i32 2
-store i32 %r217, i32* %r219
-%r220 = lshr i480 %r216, 32
-%r221 = trunc i480 %r220 to i32
-%r223 = getelementptr i32, i32* %r1, i32 3
-store i32 %r221, i32* %r223
-%r224 = lshr i480 %r220, 32
-%r225 = trunc i480 %r224 to i32
-%r227 = getelementptr i32, i32* %r1, i32 4
-store i32 %r225, i32* %r227
-%r228 = lshr i480 %r224, 32
-%r229 = trunc i480 %r228 to i32
-%r231 = getelementptr i32, i32* %r1, i32 5
-store i32 %r229, i32* %r231
-%r232 = lshr i480 %r228, 32
-%r233 = trunc i480 %r232 to i32
-%r235 = getelementptr i32, i32* %r1, i32 6
-store i32 %r233, i32* %r235
-%r236 = lshr i480 %r232, 32
-%r237 = trunc i480 %r236 to i32
-%r239 = getelementptr i32, i32* %r1, i32 7
-store i32 %r237, i32* %r239
-%r240 = lshr i480 %r236, 32
-%r241 = trunc i480 %r240 to i32
-%r243 = getelementptr i32, i32* %r1, i32 8
-store i32 %r241, i32* %r243
-%r244 = lshr i480 %r240, 32
-%r245 = trunc i480 %r244 to i32
-%r247 = getelementptr i32, i32* %r1, i32 9
-store i32 %r245, i32* %r247
-%r248 = lshr i480 %r244, 32
-%r249 = trunc i480 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 10
-store i32 %r249, i32* %r251
-%r252 = lshr i480 %r248, 32
-%r253 = trunc i480 %r252 to i32
-%r255 = getelementptr i32, i32* %r1, i32 11
-store i32 %r253, i32* %r255
-%r256 = lshr i480 %r252, 32
-%r257 = trunc i480 %r256 to i32
-%r259 = getelementptr i32, i32* %r1, i32 12
-store i32 %r257, i32* %r259
-%r260 = lshr i480 %r256, 32
-%r261 = trunc i480 %r260 to i32
-%r263 = getelementptr i32, i32* %r1, i32 13
-store i32 %r261, i32* %r263
-%r264 = lshr i480 %r260, 32
-%r265 = trunc i480 %r264 to i32
-%r267 = getelementptr i32, i32* %r1, i32 14
-store i32 %r265, i32* %r267
-br i1%r208, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r268 = load i32, i32* %r4
-%r269 = zext i32 %r268 to i64
-%r271 = getelementptr i32, i32* %r4, i32 1
-%r272 = load i32, i32* %r271
-%r273 = zext i32 %r272 to i64
-%r274 = shl i64 %r273, 32
-%r275 = or i64 %r269, %r274
-%r276 = zext i64 %r275 to i96
-%r278 = getelementptr i32, i32* %r4, i32 2
-%r279 = load i32, i32* %r278
-%r280 = zext i32 %r279 to i96
-%r281 = shl i96 %r280, 64
-%r282 = or i96 %r276, %r281
-%r283 = zext i96 %r282 to i128
-%r285 = getelementptr i32, i32* %r4, i32 3
-%r286 = load i32, i32* %r285
-%r287 = zext i32 %r286 to i128
-%r288 = shl i128 %r287, 96
-%r289 = or i128 %r283, %r288
-%r290 = zext i128 %r289 to i160
-%r292 = getelementptr i32, i32* %r4, i32 4
-%r293 = load i32, i32* %r292
-%r294 = zext i32 %r293 to i160
-%r295 = shl i160 %r294, 128
-%r296 = or i160 %r290, %r295
-%r297 = zext i160 %r296 to i192
-%r299 = getelementptr i32, i32* %r4, i32 5
-%r300 = load i32, i32* %r299
-%r301 = zext i32 %r300 to i192
-%r302 = shl i192 %r301, 160
-%r303 = or i192 %r297, %r302
-%r304 = zext i192 %r303 to i224
-%r306 = getelementptr i32, i32* %r4, i32 6
-%r307 = load i32, i32* %r306
-%r308 = zext i32 %r307 to i224
-%r309 = shl i224 %r308, 192
-%r310 = or i224 %r304, %r309
-%r311 = zext i224 %r310 to i256
-%r313 = getelementptr i32, i32* %r4, i32 7
-%r314 = load i32, i32* %r313
-%r315 = zext i32 %r314 to i256
-%r316 = shl i256 %r315, 224
-%r317 = or i256 %r311, %r316
-%r318 = zext i256 %r317 to i288
-%r320 = getelementptr i32, i32* %r4, i32 8
-%r321 = load i32, i32* %r320
-%r322 = zext i32 %r321 to i288
-%r323 = shl i288 %r322, 256
-%r324 = or i288 %r318, %r323
-%r325 = zext i288 %r324 to i320
-%r327 = getelementptr i32, i32* %r4, i32 9
-%r328 = load i32, i32* %r327
-%r329 = zext i32 %r328 to i320
-%r330 = shl i320 %r329, 288
-%r331 = or i320 %r325, %r330
-%r332 = zext i320 %r331 to i352
-%r334 = getelementptr i32, i32* %r4, i32 10
-%r335 = load i32, i32* %r334
-%r336 = zext i32 %r335 to i352
-%r337 = shl i352 %r336, 320
-%r338 = or i352 %r332, %r337
-%r339 = zext i352 %r338 to i384
-%r341 = getelementptr i32, i32* %r4, i32 11
-%r342 = load i32, i32* %r341
-%r343 = zext i32 %r342 to i384
-%r344 = shl i384 %r343, 352
-%r345 = or i384 %r339, %r344
-%r346 = zext i384 %r345 to i416
-%r348 = getelementptr i32, i32* %r4, i32 12
-%r349 = load i32, i32* %r348
-%r350 = zext i32 %r349 to i416
-%r351 = shl i416 %r350, 384
-%r352 = or i416 %r346, %r351
-%r353 = zext i416 %r352 to i448
-%r355 = getelementptr i32, i32* %r4, i32 13
-%r356 = load i32, i32* %r355
-%r357 = zext i32 %r356 to i448
-%r358 = shl i448 %r357, 416
-%r359 = or i448 %r353, %r358
-%r360 = zext i448 %r359 to i480
-%r362 = getelementptr i32, i32* %r4, i32 14
-%r363 = load i32, i32* %r362
-%r364 = zext i32 %r363 to i480
-%r365 = shl i480 %r364, 448
-%r366 = or i480 %r360, %r365
-%r367 = add i480 %r206, %r366
-%r368 = trunc i480 %r367 to i32
-%r370 = getelementptr i32, i32* %r1, i32 0
-store i32 %r368, i32* %r370
-%r371 = lshr i480 %r367, 32
-%r372 = trunc i480 %r371 to i32
-%r374 = getelementptr i32, i32* %r1, i32 1
-store i32 %r372, i32* %r374
-%r375 = lshr i480 %r371, 32
-%r376 = trunc i480 %r375 to i32
-%r378 = getelementptr i32, i32* %r1, i32 2
-store i32 %r376, i32* %r378
-%r379 = lshr i480 %r375, 32
-%r380 = trunc i480 %r379 to i32
-%r382 = getelementptr i32, i32* %r1, i32 3
-store i32 %r380, i32* %r382
-%r383 = lshr i480 %r379, 32
-%r384 = trunc i480 %r383 to i32
-%r386 = getelementptr i32, i32* %r1, i32 4
-store i32 %r384, i32* %r386
-%r387 = lshr i480 %r383, 32
-%r388 = trunc i480 %r387 to i32
-%r390 = getelementptr i32, i32* %r1, i32 5
-store i32 %r388, i32* %r390
-%r391 = lshr i480 %r387, 32
-%r392 = trunc i480 %r391 to i32
-%r394 = getelementptr i32, i32* %r1, i32 6
-store i32 %r392, i32* %r394
-%r395 = lshr i480 %r391, 32
-%r396 = trunc i480 %r395 to i32
-%r398 = getelementptr i32, i32* %r1, i32 7
-store i32 %r396, i32* %r398
-%r399 = lshr i480 %r395, 32
-%r400 = trunc i480 %r399 to i32
-%r402 = getelementptr i32, i32* %r1, i32 8
-store i32 %r400, i32* %r402
-%r403 = lshr i480 %r399, 32
-%r404 = trunc i480 %r403 to i32
-%r406 = getelementptr i32, i32* %r1, i32 9
-store i32 %r404, i32* %r406
-%r407 = lshr i480 %r403, 32
-%r408 = trunc i480 %r407 to i32
-%r410 = getelementptr i32, i32* %r1, i32 10
-store i32 %r408, i32* %r410
-%r411 = lshr i480 %r407, 32
-%r412 = trunc i480 %r411 to i32
-%r414 = getelementptr i32, i32* %r1, i32 11
-store i32 %r412, i32* %r414
-%r415 = lshr i480 %r411, 32
-%r416 = trunc i480 %r415 to i32
-%r418 = getelementptr i32, i32* %r1, i32 12
-store i32 %r416, i32* %r418
-%r419 = lshr i480 %r415, 32
-%r420 = trunc i480 %r419 to i32
-%r422 = getelementptr i32, i32* %r1, i32 13
-store i32 %r420, i32* %r422
-%r423 = lshr i480 %r419, 32
-%r424 = trunc i480 %r423 to i32
-%r426 = getelementptr i32, i32* %r1, i32 14
-store i32 %r424, i32* %r426
-ret void
-}
-define void @mcl_fp_subNF15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = load i32, i32* %r3
-%r105 = zext i32 %r104 to i64
-%r107 = getelementptr i32, i32* %r3, i32 1
-%r108 = load i32, i32* %r107
-%r109 = zext i32 %r108 to i64
-%r110 = shl i64 %r109, 32
-%r111 = or i64 %r105, %r110
-%r112 = zext i64 %r111 to i96
-%r114 = getelementptr i32, i32* %r3, i32 2
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i96
-%r117 = shl i96 %r116, 64
-%r118 = or i96 %r112, %r117
-%r119 = zext i96 %r118 to i128
-%r121 = getelementptr i32, i32* %r3, i32 3
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i128
-%r124 = shl i128 %r123, 96
-%r125 = or i128 %r119, %r124
-%r126 = zext i128 %r125 to i160
-%r128 = getelementptr i32, i32* %r3, i32 4
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i160
-%r131 = shl i160 %r130, 128
-%r132 = or i160 %r126, %r131
-%r133 = zext i160 %r132 to i192
-%r135 = getelementptr i32, i32* %r3, i32 5
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i192
-%r138 = shl i192 %r137, 160
-%r139 = or i192 %r133, %r138
-%r140 = zext i192 %r139 to i224
-%r142 = getelementptr i32, i32* %r3, i32 6
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i224
-%r145 = shl i224 %r144, 192
-%r146 = or i224 %r140, %r145
-%r147 = zext i224 %r146 to i256
-%r149 = getelementptr i32, i32* %r3, i32 7
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i256
-%r152 = shl i256 %r151, 224
-%r153 = or i256 %r147, %r152
-%r154 = zext i256 %r153 to i288
-%r156 = getelementptr i32, i32* %r3, i32 8
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i288
-%r159 = shl i288 %r158, 256
-%r160 = or i288 %r154, %r159
-%r161 = zext i288 %r160 to i320
-%r163 = getelementptr i32, i32* %r3, i32 9
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i320
-%r166 = shl i320 %r165, 288
-%r167 = or i320 %r161, %r166
-%r168 = zext i320 %r167 to i352
-%r170 = getelementptr i32, i32* %r3, i32 10
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i352
-%r173 = shl i352 %r172, 320
-%r174 = or i352 %r168, %r173
-%r175 = zext i352 %r174 to i384
-%r177 = getelementptr i32, i32* %r3, i32 11
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i384
-%r180 = shl i384 %r179, 352
-%r181 = or i384 %r175, %r180
-%r182 = zext i384 %r181 to i416
-%r184 = getelementptr i32, i32* %r3, i32 12
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i416
-%r187 = shl i416 %r186, 384
-%r188 = or i416 %r182, %r187
-%r189 = zext i416 %r188 to i448
-%r191 = getelementptr i32, i32* %r3, i32 13
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i448
-%r194 = shl i448 %r193, 416
-%r195 = or i448 %r189, %r194
-%r196 = zext i448 %r195 to i480
-%r198 = getelementptr i32, i32* %r3, i32 14
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i480
-%r201 = shl i480 %r200, 448
-%r202 = or i480 %r196, %r201
-%r203 = sub i480 %r103, %r202
-%r204 = lshr i480 %r203, 479
-%r205 = trunc i480 %r204 to i1
-%r206 = load i32, i32* %r4
-%r207 = zext i32 %r206 to i64
-%r209 = getelementptr i32, i32* %r4, i32 1
-%r210 = load i32, i32* %r209
-%r211 = zext i32 %r210 to i64
-%r212 = shl i64 %r211, 32
-%r213 = or i64 %r207, %r212
-%r214 = zext i64 %r213 to i96
-%r216 = getelementptr i32, i32* %r4, i32 2
-%r217 = load i32, i32* %r216
-%r218 = zext i32 %r217 to i96
-%r219 = shl i96 %r218, 64
-%r220 = or i96 %r214, %r219
-%r221 = zext i96 %r220 to i128
-%r223 = getelementptr i32, i32* %r4, i32 3
-%r224 = load i32, i32* %r223
-%r225 = zext i32 %r224 to i128
-%r226 = shl i128 %r225, 96
-%r227 = or i128 %r221, %r226
-%r228 = zext i128 %r227 to i160
-%r230 = getelementptr i32, i32* %r4, i32 4
-%r231 = load i32, i32* %r230
-%r232 = zext i32 %r231 to i160
-%r233 = shl i160 %r232, 128
-%r234 = or i160 %r228, %r233
-%r235 = zext i160 %r234 to i192
-%r237 = getelementptr i32, i32* %r4, i32 5
-%r238 = load i32, i32* %r237
-%r239 = zext i32 %r238 to i192
-%r240 = shl i192 %r239, 160
-%r241 = or i192 %r235, %r240
-%r242 = zext i192 %r241 to i224
-%r244 = getelementptr i32, i32* %r4, i32 6
-%r245 = load i32, i32* %r244
-%r246 = zext i32 %r245 to i224
-%r247 = shl i224 %r246, 192
-%r248 = or i224 %r242, %r247
-%r249 = zext i224 %r248 to i256
-%r251 = getelementptr i32, i32* %r4, i32 7
-%r252 = load i32, i32* %r251
-%r253 = zext i32 %r252 to i256
-%r254 = shl i256 %r253, 224
-%r255 = or i256 %r249, %r254
-%r256 = zext i256 %r255 to i288
-%r258 = getelementptr i32, i32* %r4, i32 8
-%r259 = load i32, i32* %r258
-%r260 = zext i32 %r259 to i288
-%r261 = shl i288 %r260, 256
-%r262 = or i288 %r256, %r261
-%r263 = zext i288 %r262 to i320
-%r265 = getelementptr i32, i32* %r4, i32 9
-%r266 = load i32, i32* %r265
-%r267 = zext i32 %r266 to i320
-%r268 = shl i320 %r267, 288
-%r269 = or i320 %r263, %r268
-%r270 = zext i320 %r269 to i352
-%r272 = getelementptr i32, i32* %r4, i32 10
-%r273 = load i32, i32* %r272
-%r274 = zext i32 %r273 to i352
-%r275 = shl i352 %r274, 320
-%r276 = or i352 %r270, %r275
-%r277 = zext i352 %r276 to i384
-%r279 = getelementptr i32, i32* %r4, i32 11
-%r280 = load i32, i32* %r279
-%r281 = zext i32 %r280 to i384
-%r282 = shl i384 %r281, 352
-%r283 = or i384 %r277, %r282
-%r284 = zext i384 %r283 to i416
-%r286 = getelementptr i32, i32* %r4, i32 12
-%r287 = load i32, i32* %r286
-%r288 = zext i32 %r287 to i416
-%r289 = shl i416 %r288, 384
-%r290 = or i416 %r284, %r289
-%r291 = zext i416 %r290 to i448
-%r293 = getelementptr i32, i32* %r4, i32 13
-%r294 = load i32, i32* %r293
-%r295 = zext i32 %r294 to i448
-%r296 = shl i448 %r295, 416
-%r297 = or i448 %r291, %r296
-%r298 = zext i448 %r297 to i480
-%r300 = getelementptr i32, i32* %r4, i32 14
-%r301 = load i32, i32* %r300
-%r302 = zext i32 %r301 to i480
-%r303 = shl i480 %r302, 448
-%r304 = or i480 %r298, %r303
-%r306 = select i1 %r205, i480 %r304, i480 0
-%r307 = add i480 %r203, %r306
-%r308 = trunc i480 %r307 to i32
-%r310 = getelementptr i32, i32* %r1, i32 0
-store i32 %r308, i32* %r310
-%r311 = lshr i480 %r307, 32
-%r312 = trunc i480 %r311 to i32
-%r314 = getelementptr i32, i32* %r1, i32 1
-store i32 %r312, i32* %r314
-%r315 = lshr i480 %r311, 32
-%r316 = trunc i480 %r315 to i32
-%r318 = getelementptr i32, i32* %r1, i32 2
-store i32 %r316, i32* %r318
-%r319 = lshr i480 %r315, 32
-%r320 = trunc i480 %r319 to i32
-%r322 = getelementptr i32, i32* %r1, i32 3
-store i32 %r320, i32* %r322
-%r323 = lshr i480 %r319, 32
-%r324 = trunc i480 %r323 to i32
-%r326 = getelementptr i32, i32* %r1, i32 4
-store i32 %r324, i32* %r326
-%r327 = lshr i480 %r323, 32
-%r328 = trunc i480 %r327 to i32
-%r330 = getelementptr i32, i32* %r1, i32 5
-store i32 %r328, i32* %r330
-%r331 = lshr i480 %r327, 32
-%r332 = trunc i480 %r331 to i32
-%r334 = getelementptr i32, i32* %r1, i32 6
-store i32 %r332, i32* %r334
-%r335 = lshr i480 %r331, 32
-%r336 = trunc i480 %r335 to i32
-%r338 = getelementptr i32, i32* %r1, i32 7
-store i32 %r336, i32* %r338
-%r339 = lshr i480 %r335, 32
-%r340 = trunc i480 %r339 to i32
-%r342 = getelementptr i32, i32* %r1, i32 8
-store i32 %r340, i32* %r342
-%r343 = lshr i480 %r339, 32
-%r344 = trunc i480 %r343 to i32
-%r346 = getelementptr i32, i32* %r1, i32 9
-store i32 %r344, i32* %r346
-%r347 = lshr i480 %r343, 32
-%r348 = trunc i480 %r347 to i32
-%r350 = getelementptr i32, i32* %r1, i32 10
-store i32 %r348, i32* %r350
-%r351 = lshr i480 %r347, 32
-%r352 = trunc i480 %r351 to i32
-%r354 = getelementptr i32, i32* %r1, i32 11
-store i32 %r352, i32* %r354
-%r355 = lshr i480 %r351, 32
-%r356 = trunc i480 %r355 to i32
-%r358 = getelementptr i32, i32* %r1, i32 12
-store i32 %r356, i32* %r358
-%r359 = lshr i480 %r355, 32
-%r360 = trunc i480 %r359 to i32
-%r362 = getelementptr i32, i32* %r1, i32 13
-store i32 %r360, i32* %r362
-%r363 = lshr i480 %r359, 32
-%r364 = trunc i480 %r363 to i32
-%r366 = getelementptr i32, i32* %r1, i32 14
-store i32 %r364, i32* %r366
-ret void
-}
-define void @mcl_fpDbl_add15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = zext i704 %r152 to i736
-%r155 = getelementptr i32, i32* %r2, i32 22
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i736
-%r158 = shl i736 %r157, 704
-%r159 = or i736 %r153, %r158
-%r160 = zext i736 %r159 to i768
-%r162 = getelementptr i32, i32* %r2, i32 23
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i768
-%r165 = shl i768 %r164, 736
-%r166 = or i768 %r160, %r165
-%r167 = zext i768 %r166 to i800
-%r169 = getelementptr i32, i32* %r2, i32 24
-%r170 = load i32, i32* %r169
-%r171 = zext i32 %r170 to i800
-%r172 = shl i800 %r171, 768
-%r173 = or i800 %r167, %r172
-%r174 = zext i800 %r173 to i832
-%r176 = getelementptr i32, i32* %r2, i32 25
-%r177 = load i32, i32* %r176
-%r178 = zext i32 %r177 to i832
-%r179 = shl i832 %r178, 800
-%r180 = or i832 %r174, %r179
-%r181 = zext i832 %r180 to i864
-%r183 = getelementptr i32, i32* %r2, i32 26
-%r184 = load i32, i32* %r183
-%r185 = zext i32 %r184 to i864
-%r186 = shl i864 %r185, 832
-%r187 = or i864 %r181, %r186
-%r188 = zext i864 %r187 to i896
-%r190 = getelementptr i32, i32* %r2, i32 27
-%r191 = load i32, i32* %r190
-%r192 = zext i32 %r191 to i896
-%r193 = shl i896 %r192, 864
-%r194 = or i896 %r188, %r193
-%r195 = zext i896 %r194 to i928
-%r197 = getelementptr i32, i32* %r2, i32 28
-%r198 = load i32, i32* %r197
-%r199 = zext i32 %r198 to i928
-%r200 = shl i928 %r199, 896
-%r201 = or i928 %r195, %r200
-%r202 = zext i928 %r201 to i960
-%r204 = getelementptr i32, i32* %r2, i32 29
-%r205 = load i32, i32* %r204
-%r206 = zext i32 %r205 to i960
-%r207 = shl i960 %r206, 928
-%r208 = or i960 %r202, %r207
-%r209 = load i32, i32* %r3
-%r210 = zext i32 %r209 to i64
-%r212 = getelementptr i32, i32* %r3, i32 1
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i64
-%r215 = shl i64 %r214, 32
-%r216 = or i64 %r210, %r215
-%r217 = zext i64 %r216 to i96
-%r219 = getelementptr i32, i32* %r3, i32 2
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i96
-%r222 = shl i96 %r221, 64
-%r223 = or i96 %r217, %r222
-%r224 = zext i96 %r223 to i128
-%r226 = getelementptr i32, i32* %r3, i32 3
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i128
-%r229 = shl i128 %r228, 96
-%r230 = or i128 %r224, %r229
-%r231 = zext i128 %r230 to i160
-%r233 = getelementptr i32, i32* %r3, i32 4
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i160
-%r236 = shl i160 %r235, 128
-%r237 = or i160 %r231, %r236
-%r238 = zext i160 %r237 to i192
-%r240 = getelementptr i32, i32* %r3, i32 5
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i192
-%r243 = shl i192 %r242, 160
-%r244 = or i192 %r238, %r243
-%r245 = zext i192 %r244 to i224
-%r247 = getelementptr i32, i32* %r3, i32 6
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i224
-%r250 = shl i224 %r249, 192
-%r251 = or i224 %r245, %r250
-%r252 = zext i224 %r251 to i256
-%r254 = getelementptr i32, i32* %r3, i32 7
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i256
-%r257 = shl i256 %r256, 224
-%r258 = or i256 %r252, %r257
-%r259 = zext i256 %r258 to i288
-%r261 = getelementptr i32, i32* %r3, i32 8
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i288
-%r264 = shl i288 %r263, 256
-%r265 = or i288 %r259, %r264
-%r266 = zext i288 %r265 to i320
-%r268 = getelementptr i32, i32* %r3, i32 9
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i320
-%r271 = shl i320 %r270, 288
-%r272 = or i320 %r266, %r271
-%r273 = zext i320 %r272 to i352
-%r275 = getelementptr i32, i32* %r3, i32 10
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i352
-%r278 = shl i352 %r277, 320
-%r279 = or i352 %r273, %r278
-%r280 = zext i352 %r279 to i384
-%r282 = getelementptr i32, i32* %r3, i32 11
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i384
-%r285 = shl i384 %r284, 352
-%r286 = or i384 %r280, %r285
-%r287 = zext i384 %r286 to i416
-%r289 = getelementptr i32, i32* %r3, i32 12
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i416
-%r292 = shl i416 %r291, 384
-%r293 = or i416 %r287, %r292
-%r294 = zext i416 %r293 to i448
-%r296 = getelementptr i32, i32* %r3, i32 13
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i448
-%r299 = shl i448 %r298, 416
-%r300 = or i448 %r294, %r299
-%r301 = zext i448 %r300 to i480
-%r303 = getelementptr i32, i32* %r3, i32 14
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i480
-%r306 = shl i480 %r305, 448
-%r307 = or i480 %r301, %r306
-%r308 = zext i480 %r307 to i512
-%r310 = getelementptr i32, i32* %r3, i32 15
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i512
-%r313 = shl i512 %r312, 480
-%r314 = or i512 %r308, %r313
-%r315 = zext i512 %r314 to i544
-%r317 = getelementptr i32, i32* %r3, i32 16
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i544
-%r320 = shl i544 %r319, 512
-%r321 = or i544 %r315, %r320
-%r322 = zext i544 %r321 to i576
-%r324 = getelementptr i32, i32* %r3, i32 17
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i576
-%r327 = shl i576 %r326, 544
-%r328 = or i576 %r322, %r327
-%r329 = zext i576 %r328 to i608
-%r331 = getelementptr i32, i32* %r3, i32 18
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i608
-%r334 = shl i608 %r333, 576
-%r335 = or i608 %r329, %r334
-%r336 = zext i608 %r335 to i640
-%r338 = getelementptr i32, i32* %r3, i32 19
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i640
-%r341 = shl i640 %r340, 608
-%r342 = or i640 %r336, %r341
-%r343 = zext i640 %r342 to i672
-%r345 = getelementptr i32, i32* %r3, i32 20
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i672
-%r348 = shl i672 %r347, 640
-%r349 = or i672 %r343, %r348
-%r350 = zext i672 %r349 to i704
-%r352 = getelementptr i32, i32* %r3, i32 21
-%r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i704
-%r355 = shl i704 %r354, 672
-%r356 = or i704 %r350, %r355
-%r357 = zext i704 %r356 to i736
-%r359 = getelementptr i32, i32* %r3, i32 22
-%r360 = load i32, i32* %r359
-%r361 = zext i32 %r360 to i736
-%r362 = shl i736 %r361, 704
-%r363 = or i736 %r357, %r362
-%r364 = zext i736 %r363 to i768
-%r366 = getelementptr i32, i32* %r3, i32 23
-%r367 = load i32, i32* %r366
-%r368 = zext i32 %r367 to i768
-%r369 = shl i768 %r368, 736
-%r370 = or i768 %r364, %r369
-%r371 = zext i768 %r370 to i800
-%r373 = getelementptr i32, i32* %r3, i32 24
-%r374 = load i32, i32* %r373
-%r375 = zext i32 %r374 to i800
-%r376 = shl i800 %r375, 768
-%r377 = or i800 %r371, %r376
-%r378 = zext i800 %r377 to i832
-%r380 = getelementptr i32, i32* %r3, i32 25
-%r381 = load i32, i32* %r380
-%r382 = zext i32 %r381 to i832
-%r383 = shl i832 %r382, 800
-%r384 = or i832 %r378, %r383
-%r385 = zext i832 %r384 to i864
-%r387 = getelementptr i32, i32* %r3, i32 26
-%r388 = load i32, i32* %r387
-%r389 = zext i32 %r388 to i864
-%r390 = shl i864 %r389, 832
-%r391 = or i864 %r385, %r390
-%r392 = zext i864 %r391 to i896
-%r394 = getelementptr i32, i32* %r3, i32 27
-%r395 = load i32, i32* %r394
-%r396 = zext i32 %r395 to i896
-%r397 = shl i896 %r396, 864
-%r398 = or i896 %r392, %r397
-%r399 = zext i896 %r398 to i928
-%r401 = getelementptr i32, i32* %r3, i32 28
-%r402 = load i32, i32* %r401
-%r403 = zext i32 %r402 to i928
-%r404 = shl i928 %r403, 896
-%r405 = or i928 %r399, %r404
-%r406 = zext i928 %r405 to i960
-%r408 = getelementptr i32, i32* %r3, i32 29
-%r409 = load i32, i32* %r408
-%r410 = zext i32 %r409 to i960
-%r411 = shl i960 %r410, 928
-%r412 = or i960 %r406, %r411
-%r413 = zext i960 %r208 to i992
-%r414 = zext i960 %r412 to i992
-%r415 = add i992 %r413, %r414
-%r416 = trunc i992 %r415 to i480
-%r417 = trunc i480 %r416 to i32
-%r419 = getelementptr i32, i32* %r1, i32 0
-store i32 %r417, i32* %r419
-%r420 = lshr i480 %r416, 32
-%r421 = trunc i480 %r420 to i32
-%r423 = getelementptr i32, i32* %r1, i32 1
-store i32 %r421, i32* %r423
-%r424 = lshr i480 %r420, 32
-%r425 = trunc i480 %r424 to i32
-%r427 = getelementptr i32, i32* %r1, i32 2
-store i32 %r425, i32* %r427
-%r428 = lshr i480 %r424, 32
-%r429 = trunc i480 %r428 to i32
-%r431 = getelementptr i32, i32* %r1, i32 3
-store i32 %r429, i32* %r431
-%r432 = lshr i480 %r428, 32
-%r433 = trunc i480 %r432 to i32
-%r435 = getelementptr i32, i32* %r1, i32 4
-store i32 %r433, i32* %r435
-%r436 = lshr i480 %r432, 32
-%r437 = trunc i480 %r436 to i32
-%r439 = getelementptr i32, i32* %r1, i32 5
-store i32 %r437, i32* %r439
-%r440 = lshr i480 %r436, 32
-%r441 = trunc i480 %r440 to i32
-%r443 = getelementptr i32, i32* %r1, i32 6
-store i32 %r441, i32* %r443
-%r444 = lshr i480 %r440, 32
-%r445 = trunc i480 %r444 to i32
-%r447 = getelementptr i32, i32* %r1, i32 7
-store i32 %r445, i32* %r447
-%r448 = lshr i480 %r444, 32
-%r449 = trunc i480 %r448 to i32
-%r451 = getelementptr i32, i32* %r1, i32 8
-store i32 %r449, i32* %r451
-%r452 = lshr i480 %r448, 32
-%r453 = trunc i480 %r452 to i32
-%r455 = getelementptr i32, i32* %r1, i32 9
-store i32 %r453, i32* %r455
-%r456 = lshr i480 %r452, 32
-%r457 = trunc i480 %r456 to i32
-%r459 = getelementptr i32, i32* %r1, i32 10
-store i32 %r457, i32* %r459
-%r460 = lshr i480 %r456, 32
-%r461 = trunc i480 %r460 to i32
-%r463 = getelementptr i32, i32* %r1, i32 11
-store i32 %r461, i32* %r463
-%r464 = lshr i480 %r460, 32
-%r465 = trunc i480 %r464 to i32
-%r467 = getelementptr i32, i32* %r1, i32 12
-store i32 %r465, i32* %r467
-%r468 = lshr i480 %r464, 32
-%r469 = trunc i480 %r468 to i32
-%r471 = getelementptr i32, i32* %r1, i32 13
-store i32 %r469, i32* %r471
-%r472 = lshr i480 %r468, 32
-%r473 = trunc i480 %r472 to i32
-%r475 = getelementptr i32, i32* %r1, i32 14
-store i32 %r473, i32* %r475
-%r476 = lshr i992 %r415, 480
-%r477 = trunc i992 %r476 to i512
-%r478 = load i32, i32* %r4
-%r479 = zext i32 %r478 to i64
-%r481 = getelementptr i32, i32* %r4, i32 1
-%r482 = load i32, i32* %r481
-%r483 = zext i32 %r482 to i64
-%r484 = shl i64 %r483, 32
-%r485 = or i64 %r479, %r484
-%r486 = zext i64 %r485 to i96
-%r488 = getelementptr i32, i32* %r4, i32 2
-%r489 = load i32, i32* %r488
-%r490 = zext i32 %r489 to i96
-%r491 = shl i96 %r490, 64
-%r492 = or i96 %r486, %r491
-%r493 = zext i96 %r492 to i128
-%r495 = getelementptr i32, i32* %r4, i32 3
-%r496 = load i32, i32* %r495
-%r497 = zext i32 %r496 to i128
-%r498 = shl i128 %r497, 96
-%r499 = or i128 %r493, %r498
-%r500 = zext i128 %r499 to i160
-%r502 = getelementptr i32, i32* %r4, i32 4
-%r503 = load i32, i32* %r502
-%r504 = zext i32 %r503 to i160
-%r505 = shl i160 %r504, 128
-%r506 = or i160 %r500, %r505
-%r507 = zext i160 %r506 to i192
-%r509 = getelementptr i32, i32* %r4, i32 5
-%r510 = load i32, i32* %r509
-%r511 = zext i32 %r510 to i192
-%r512 = shl i192 %r511, 160
-%r513 = or i192 %r507, %r512
-%r514 = zext i192 %r513 to i224
-%r516 = getelementptr i32, i32* %r4, i32 6
-%r517 = load i32, i32* %r516
-%r518 = zext i32 %r517 to i224
-%r519 = shl i224 %r518, 192
-%r520 = or i224 %r514, %r519
-%r521 = zext i224 %r520 to i256
-%r523 = getelementptr i32, i32* %r4, i32 7
-%r524 = load i32, i32* %r523
-%r525 = zext i32 %r524 to i256
-%r526 = shl i256 %r525, 224
-%r527 = or i256 %r521, %r526
-%r528 = zext i256 %r527 to i288
-%r530 = getelementptr i32, i32* %r4, i32 8
-%r531 = load i32, i32* %r530
-%r532 = zext i32 %r531 to i288
-%r533 = shl i288 %r532, 256
-%r534 = or i288 %r528, %r533
-%r535 = zext i288 %r534 to i320
-%r537 = getelementptr i32, i32* %r4, i32 9
-%r538 = load i32, i32* %r537
-%r539 = zext i32 %r538 to i320
-%r540 = shl i320 %r539, 288
-%r541 = or i320 %r535, %r540
-%r542 = zext i320 %r541 to i352
-%r544 = getelementptr i32, i32* %r4, i32 10
-%r545 = load i32, i32* %r544
-%r546 = zext i32 %r545 to i352
-%r547 = shl i352 %r546, 320
-%r548 = or i352 %r542, %r547
-%r549 = zext i352 %r548 to i384
-%r551 = getelementptr i32, i32* %r4, i32 11
-%r552 = load i32, i32* %r551
-%r553 = zext i32 %r552 to i384
-%r554 = shl i384 %r553, 352
-%r555 = or i384 %r549, %r554
-%r556 = zext i384 %r555 to i416
-%r558 = getelementptr i32, i32* %r4, i32 12
-%r559 = load i32, i32* %r558
-%r560 = zext i32 %r559 to i416
-%r561 = shl i416 %r560, 384
-%r562 = or i416 %r556, %r561
-%r563 = zext i416 %r562 to i448
-%r565 = getelementptr i32, i32* %r4, i32 13
-%r566 = load i32, i32* %r565
-%r567 = zext i32 %r566 to i448
-%r568 = shl i448 %r567, 416
-%r569 = or i448 %r563, %r568
-%r570 = zext i448 %r569 to i480
-%r572 = getelementptr i32, i32* %r4, i32 14
-%r573 = load i32, i32* %r572
-%r574 = zext i32 %r573 to i480
-%r575 = shl i480 %r574, 448
-%r576 = or i480 %r570, %r575
-%r577 = zext i480 %r576 to i512
-%r578 = sub i512 %r477, %r577
-%r579 = lshr i512 %r578, 480
-%r580 = trunc i512 %r579 to i1
-%r581 = select i1 %r580, i512 %r477, i512 %r578
-%r582 = trunc i512 %r581 to i480
-%r584 = getelementptr i32, i32* %r1, i32 15
-%r585 = trunc i480 %r582 to i32
-%r587 = getelementptr i32, i32* %r584, i32 0
-store i32 %r585, i32* %r587
-%r588 = lshr i480 %r582, 32
-%r589 = trunc i480 %r588 to i32
-%r591 = getelementptr i32, i32* %r584, i32 1
-store i32 %r589, i32* %r591
-%r592 = lshr i480 %r588, 32
-%r593 = trunc i480 %r592 to i32
-%r595 = getelementptr i32, i32* %r584, i32 2
-store i32 %r593, i32* %r595
-%r596 = lshr i480 %r592, 32
-%r597 = trunc i480 %r596 to i32
-%r599 = getelementptr i32, i32* %r584, i32 3
-store i32 %r597, i32* %r599
-%r600 = lshr i480 %r596, 32
-%r601 = trunc i480 %r600 to i32
-%r603 = getelementptr i32, i32* %r584, i32 4
-store i32 %r601, i32* %r603
-%r604 = lshr i480 %r600, 32
-%r605 = trunc i480 %r604 to i32
-%r607 = getelementptr i32, i32* %r584, i32 5
-store i32 %r605, i32* %r607
-%r608 = lshr i480 %r604, 32
-%r609 = trunc i480 %r608 to i32
-%r611 = getelementptr i32, i32* %r584, i32 6
-store i32 %r609, i32* %r611
-%r612 = lshr i480 %r608, 32
-%r613 = trunc i480 %r612 to i32
-%r615 = getelementptr i32, i32* %r584, i32 7
-store i32 %r613, i32* %r615
-%r616 = lshr i480 %r612, 32
-%r617 = trunc i480 %r616 to i32
-%r619 = getelementptr i32, i32* %r584, i32 8
-store i32 %r617, i32* %r619
-%r620 = lshr i480 %r616, 32
-%r621 = trunc i480 %r620 to i32
-%r623 = getelementptr i32, i32* %r584, i32 9
-store i32 %r621, i32* %r623
-%r624 = lshr i480 %r620, 32
-%r625 = trunc i480 %r624 to i32
-%r627 = getelementptr i32, i32* %r584, i32 10
-store i32 %r625, i32* %r627
-%r628 = lshr i480 %r624, 32
-%r629 = trunc i480 %r628 to i32
-%r631 = getelementptr i32, i32* %r584, i32 11
-store i32 %r629, i32* %r631
-%r632 = lshr i480 %r628, 32
-%r633 = trunc i480 %r632 to i32
-%r635 = getelementptr i32, i32* %r584, i32 12
-store i32 %r633, i32* %r635
-%r636 = lshr i480 %r632, 32
-%r637 = trunc i480 %r636 to i32
-%r639 = getelementptr i32, i32* %r584, i32 13
-store i32 %r637, i32* %r639
-%r640 = lshr i480 %r636, 32
-%r641 = trunc i480 %r640 to i32
-%r643 = getelementptr i32, i32* %r584, i32 14
-store i32 %r641, i32* %r643
-ret void
-}
-define void @mcl_fpDbl_sub15L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = zext i704 %r152 to i736
-%r155 = getelementptr i32, i32* %r2, i32 22
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i736
-%r158 = shl i736 %r157, 704
-%r159 = or i736 %r153, %r158
-%r160 = zext i736 %r159 to i768
-%r162 = getelementptr i32, i32* %r2, i32 23
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i768
-%r165 = shl i768 %r164, 736
-%r166 = or i768 %r160, %r165
-%r167 = zext i768 %r166 to i800
-%r169 = getelementptr i32, i32* %r2, i32 24
-%r170 = load i32, i32* %r169
-%r171 = zext i32 %r170 to i800
-%r172 = shl i800 %r171, 768
-%r173 = or i800 %r167, %r172
-%r174 = zext i800 %r173 to i832
-%r176 = getelementptr i32, i32* %r2, i32 25
-%r177 = load i32, i32* %r176
-%r178 = zext i32 %r177 to i832
-%r179 = shl i832 %r178, 800
-%r180 = or i832 %r174, %r179
-%r181 = zext i832 %r180 to i864
-%r183 = getelementptr i32, i32* %r2, i32 26
-%r184 = load i32, i32* %r183
-%r185 = zext i32 %r184 to i864
-%r186 = shl i864 %r185, 832
-%r187 = or i864 %r181, %r186
-%r188 = zext i864 %r187 to i896
-%r190 = getelementptr i32, i32* %r2, i32 27
-%r191 = load i32, i32* %r190
-%r192 = zext i32 %r191 to i896
-%r193 = shl i896 %r192, 864
-%r194 = or i896 %r188, %r193
-%r195 = zext i896 %r194 to i928
-%r197 = getelementptr i32, i32* %r2, i32 28
-%r198 = load i32, i32* %r197
-%r199 = zext i32 %r198 to i928
-%r200 = shl i928 %r199, 896
-%r201 = or i928 %r195, %r200
-%r202 = zext i928 %r201 to i960
-%r204 = getelementptr i32, i32* %r2, i32 29
-%r205 = load i32, i32* %r204
-%r206 = zext i32 %r205 to i960
-%r207 = shl i960 %r206, 928
-%r208 = or i960 %r202, %r207
-%r209 = load i32, i32* %r3
-%r210 = zext i32 %r209 to i64
-%r212 = getelementptr i32, i32* %r3, i32 1
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i64
-%r215 = shl i64 %r214, 32
-%r216 = or i64 %r210, %r215
-%r217 = zext i64 %r216 to i96
-%r219 = getelementptr i32, i32* %r3, i32 2
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i96
-%r222 = shl i96 %r221, 64
-%r223 = or i96 %r217, %r222
-%r224 = zext i96 %r223 to i128
-%r226 = getelementptr i32, i32* %r3, i32 3
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i128
-%r229 = shl i128 %r228, 96
-%r230 = or i128 %r224, %r229
-%r231 = zext i128 %r230 to i160
-%r233 = getelementptr i32, i32* %r3, i32 4
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i160
-%r236 = shl i160 %r235, 128
-%r237 = or i160 %r231, %r236
-%r238 = zext i160 %r237 to i192
-%r240 = getelementptr i32, i32* %r3, i32 5
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i192
-%r243 = shl i192 %r242, 160
-%r244 = or i192 %r238, %r243
-%r245 = zext i192 %r244 to i224
-%r247 = getelementptr i32, i32* %r3, i32 6
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i224
-%r250 = shl i224 %r249, 192
-%r251 = or i224 %r245, %r250
-%r252 = zext i224 %r251 to i256
-%r254 = getelementptr i32, i32* %r3, i32 7
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i256
-%r257 = shl i256 %r256, 224
-%r258 = or i256 %r252, %r257
-%r259 = zext i256 %r258 to i288
-%r261 = getelementptr i32, i32* %r3, i32 8
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i288
-%r264 = shl i288 %r263, 256
-%r265 = or i288 %r259, %r264
-%r266 = zext i288 %r265 to i320
-%r268 = getelementptr i32, i32* %r3, i32 9
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i320
-%r271 = shl i320 %r270, 288
-%r272 = or i320 %r266, %r271
-%r273 = zext i320 %r272 to i352
-%r275 = getelementptr i32, i32* %r3, i32 10
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i352
-%r278 = shl i352 %r277, 320
-%r279 = or i352 %r273, %r278
-%r280 = zext i352 %r279 to i384
-%r282 = getelementptr i32, i32* %r3, i32 11
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i384
-%r285 = shl i384 %r284, 352
-%r286 = or i384 %r280, %r285
-%r287 = zext i384 %r286 to i416
-%r289 = getelementptr i32, i32* %r3, i32 12
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i416
-%r292 = shl i416 %r291, 384
-%r293 = or i416 %r287, %r292
-%r294 = zext i416 %r293 to i448
-%r296 = getelementptr i32, i32* %r3, i32 13
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i448
-%r299 = shl i448 %r298, 416
-%r300 = or i448 %r294, %r299
-%r301 = zext i448 %r300 to i480
-%r303 = getelementptr i32, i32* %r3, i32 14
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i480
-%r306 = shl i480 %r305, 448
-%r307 = or i480 %r301, %r306
-%r308 = zext i480 %r307 to i512
-%r310 = getelementptr i32, i32* %r3, i32 15
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i512
-%r313 = shl i512 %r312, 480
-%r314 = or i512 %r308, %r313
-%r315 = zext i512 %r314 to i544
-%r317 = getelementptr i32, i32* %r3, i32 16
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i544
-%r320 = shl i544 %r319, 512
-%r321 = or i544 %r315, %r320
-%r322 = zext i544 %r321 to i576
-%r324 = getelementptr i32, i32* %r3, i32 17
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i576
-%r327 = shl i576 %r326, 544
-%r328 = or i576 %r322, %r327
-%r329 = zext i576 %r328 to i608
-%r331 = getelementptr i32, i32* %r3, i32 18
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i608
-%r334 = shl i608 %r333, 576
-%r335 = or i608 %r329, %r334
-%r336 = zext i608 %r335 to i640
-%r338 = getelementptr i32, i32* %r3, i32 19
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i640
-%r341 = shl i640 %r340, 608
-%r342 = or i640 %r336, %r341
-%r343 = zext i640 %r342 to i672
-%r345 = getelementptr i32, i32* %r3, i32 20
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i672
-%r348 = shl i672 %r347, 640
-%r349 = or i672 %r343, %r348
-%r350 = zext i672 %r349 to i704
-%r352 = getelementptr i32, i32* %r3, i32 21
-%r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i704
-%r355 = shl i704 %r354, 672
-%r356 = or i704 %r350, %r355
-%r357 = zext i704 %r356 to i736
-%r359 = getelementptr i32, i32* %r3, i32 22
-%r360 = load i32, i32* %r359
-%r361 = zext i32 %r360 to i736
-%r362 = shl i736 %r361, 704
-%r363 = or i736 %r357, %r362
-%r364 = zext i736 %r363 to i768
-%r366 = getelementptr i32, i32* %r3, i32 23
-%r367 = load i32, i32* %r366
-%r368 = zext i32 %r367 to i768
-%r369 = shl i768 %r368, 736
-%r370 = or i768 %r364, %r369
-%r371 = zext i768 %r370 to i800
-%r373 = getelementptr i32, i32* %r3, i32 24
-%r374 = load i32, i32* %r373
-%r375 = zext i32 %r374 to i800
-%r376 = shl i800 %r375, 768
-%r377 = or i800 %r371, %r376
-%r378 = zext i800 %r377 to i832
-%r380 = getelementptr i32, i32* %r3, i32 25
-%r381 = load i32, i32* %r380
-%r382 = zext i32 %r381 to i832
-%r383 = shl i832 %r382, 800
-%r384 = or i832 %r378, %r383
-%r385 = zext i832 %r384 to i864
-%r387 = getelementptr i32, i32* %r3, i32 26
-%r388 = load i32, i32* %r387
-%r389 = zext i32 %r388 to i864
-%r390 = shl i864 %r389, 832
-%r391 = or i864 %r385, %r390
-%r392 = zext i864 %r391 to i896
-%r394 = getelementptr i32, i32* %r3, i32 27
-%r395 = load i32, i32* %r394
-%r396 = zext i32 %r395 to i896
-%r397 = shl i896 %r396, 864
-%r398 = or i896 %r392, %r397
-%r399 = zext i896 %r398 to i928
-%r401 = getelementptr i32, i32* %r3, i32 28
-%r402 = load i32, i32* %r401
-%r403 = zext i32 %r402 to i928
-%r404 = shl i928 %r403, 896
-%r405 = or i928 %r399, %r404
-%r406 = zext i928 %r405 to i960
-%r408 = getelementptr i32, i32* %r3, i32 29
-%r409 = load i32, i32* %r408
-%r410 = zext i32 %r409 to i960
-%r411 = shl i960 %r410, 928
-%r412 = or i960 %r406, %r411
-%r413 = zext i960 %r208 to i992
-%r414 = zext i960 %r412 to i992
-%r415 = sub i992 %r413, %r414
-%r416 = trunc i992 %r415 to i480
-%r417 = trunc i480 %r416 to i32
-%r419 = getelementptr i32, i32* %r1, i32 0
-store i32 %r417, i32* %r419
-%r420 = lshr i480 %r416, 32
-%r421 = trunc i480 %r420 to i32
-%r423 = getelementptr i32, i32* %r1, i32 1
-store i32 %r421, i32* %r423
-%r424 = lshr i480 %r420, 32
-%r425 = trunc i480 %r424 to i32
-%r427 = getelementptr i32, i32* %r1, i32 2
-store i32 %r425, i32* %r427
-%r428 = lshr i480 %r424, 32
-%r429 = trunc i480 %r428 to i32
-%r431 = getelementptr i32, i32* %r1, i32 3
-store i32 %r429, i32* %r431
-%r432 = lshr i480 %r428, 32
-%r433 = trunc i480 %r432 to i32
-%r435 = getelementptr i32, i32* %r1, i32 4
-store i32 %r433, i32* %r435
-%r436 = lshr i480 %r432, 32
-%r437 = trunc i480 %r436 to i32
-%r439 = getelementptr i32, i32* %r1, i32 5
-store i32 %r437, i32* %r439
-%r440 = lshr i480 %r436, 32
-%r441 = trunc i480 %r440 to i32
-%r443 = getelementptr i32, i32* %r1, i32 6
-store i32 %r441, i32* %r443
-%r444 = lshr i480 %r440, 32
-%r445 = trunc i480 %r444 to i32
-%r447 = getelementptr i32, i32* %r1, i32 7
-store i32 %r445, i32* %r447
-%r448 = lshr i480 %r444, 32
-%r449 = trunc i480 %r448 to i32
-%r451 = getelementptr i32, i32* %r1, i32 8
-store i32 %r449, i32* %r451
-%r452 = lshr i480 %r448, 32
-%r453 = trunc i480 %r452 to i32
-%r455 = getelementptr i32, i32* %r1, i32 9
-store i32 %r453, i32* %r455
-%r456 = lshr i480 %r452, 32
-%r457 = trunc i480 %r456 to i32
-%r459 = getelementptr i32, i32* %r1, i32 10
-store i32 %r457, i32* %r459
-%r460 = lshr i480 %r456, 32
-%r461 = trunc i480 %r460 to i32
-%r463 = getelementptr i32, i32* %r1, i32 11
-store i32 %r461, i32* %r463
-%r464 = lshr i480 %r460, 32
-%r465 = trunc i480 %r464 to i32
-%r467 = getelementptr i32, i32* %r1, i32 12
-store i32 %r465, i32* %r467
-%r468 = lshr i480 %r464, 32
-%r469 = trunc i480 %r468 to i32
-%r471 = getelementptr i32, i32* %r1, i32 13
-store i32 %r469, i32* %r471
-%r472 = lshr i480 %r468, 32
-%r473 = trunc i480 %r472 to i32
-%r475 = getelementptr i32, i32* %r1, i32 14
-store i32 %r473, i32* %r475
-%r476 = lshr i992 %r415, 480
-%r477 = trunc i992 %r476 to i480
-%r478 = lshr i992 %r415, 960
-%r479 = trunc i992 %r478 to i1
-%r480 = load i32, i32* %r4
-%r481 = zext i32 %r480 to i64
-%r483 = getelementptr i32, i32* %r4, i32 1
-%r484 = load i32, i32* %r483
-%r485 = zext i32 %r484 to i64
-%r486 = shl i64 %r485, 32
-%r487 = or i64 %r481, %r486
-%r488 = zext i64 %r487 to i96
-%r490 = getelementptr i32, i32* %r4, i32 2
-%r491 = load i32, i32* %r490
-%r492 = zext i32 %r491 to i96
-%r493 = shl i96 %r492, 64
-%r494 = or i96 %r488, %r493
-%r495 = zext i96 %r494 to i128
-%r497 = getelementptr i32, i32* %r4, i32 3
-%r498 = load i32, i32* %r497
-%r499 = zext i32 %r498 to i128
-%r500 = shl i128 %r499, 96
-%r501 = or i128 %r495, %r500
-%r502 = zext i128 %r501 to i160
-%r504 = getelementptr i32, i32* %r4, i32 4
-%r505 = load i32, i32* %r504
-%r506 = zext i32 %r505 to i160
-%r507 = shl i160 %r506, 128
-%r508 = or i160 %r502, %r507
-%r509 = zext i160 %r508 to i192
-%r511 = getelementptr i32, i32* %r4, i32 5
-%r512 = load i32, i32* %r511
-%r513 = zext i32 %r512 to i192
-%r514 = shl i192 %r513, 160
-%r515 = or i192 %r509, %r514
-%r516 = zext i192 %r515 to i224
-%r518 = getelementptr i32, i32* %r4, i32 6
-%r519 = load i32, i32* %r518
-%r520 = zext i32 %r519 to i224
-%r521 = shl i224 %r520, 192
-%r522 = or i224 %r516, %r521
-%r523 = zext i224 %r522 to i256
-%r525 = getelementptr i32, i32* %r4, i32 7
-%r526 = load i32, i32* %r525
-%r527 = zext i32 %r526 to i256
-%r528 = shl i256 %r527, 224
-%r529 = or i256 %r523, %r528
-%r530 = zext i256 %r529 to i288
-%r532 = getelementptr i32, i32* %r4, i32 8
-%r533 = load i32, i32* %r532
-%r534 = zext i32 %r533 to i288
-%r535 = shl i288 %r534, 256
-%r536 = or i288 %r530, %r535
-%r537 = zext i288 %r536 to i320
-%r539 = getelementptr i32, i32* %r4, i32 9
-%r540 = load i32, i32* %r539
-%r541 = zext i32 %r540 to i320
-%r542 = shl i320 %r541, 288
-%r543 = or i320 %r537, %r542
-%r544 = zext i320 %r543 to i352
-%r546 = getelementptr i32, i32* %r4, i32 10
-%r547 = load i32, i32* %r546
-%r548 = zext i32 %r547 to i352
-%r549 = shl i352 %r548, 320
-%r550 = or i352 %r544, %r549
-%r551 = zext i352 %r550 to i384
-%r553 = getelementptr i32, i32* %r4, i32 11
-%r554 = load i32, i32* %r553
-%r555 = zext i32 %r554 to i384
-%r556 = shl i384 %r555, 352
-%r557 = or i384 %r551, %r556
-%r558 = zext i384 %r557 to i416
-%r560 = getelementptr i32, i32* %r4, i32 12
-%r561 = load i32, i32* %r560
-%r562 = zext i32 %r561 to i416
-%r563 = shl i416 %r562, 384
-%r564 = or i416 %r558, %r563
-%r565 = zext i416 %r564 to i448
-%r567 = getelementptr i32, i32* %r4, i32 13
-%r568 = load i32, i32* %r567
-%r569 = zext i32 %r568 to i448
-%r570 = shl i448 %r569, 416
-%r571 = or i448 %r565, %r570
-%r572 = zext i448 %r571 to i480
-%r574 = getelementptr i32, i32* %r4, i32 14
-%r575 = load i32, i32* %r574
-%r576 = zext i32 %r575 to i480
-%r577 = shl i480 %r576, 448
-%r578 = or i480 %r572, %r577
-%r580 = select i1 %r479, i480 %r578, i480 0
-%r581 = add i480 %r477, %r580
-%r583 = getelementptr i32, i32* %r1, i32 15
-%r584 = trunc i480 %r581 to i32
-%r586 = getelementptr i32, i32* %r583, i32 0
-store i32 %r584, i32* %r586
-%r587 = lshr i480 %r581, 32
-%r588 = trunc i480 %r587 to i32
-%r590 = getelementptr i32, i32* %r583, i32 1
-store i32 %r588, i32* %r590
-%r591 = lshr i480 %r587, 32
-%r592 = trunc i480 %r591 to i32
-%r594 = getelementptr i32, i32* %r583, i32 2
-store i32 %r592, i32* %r594
-%r595 = lshr i480 %r591, 32
-%r596 = trunc i480 %r595 to i32
-%r598 = getelementptr i32, i32* %r583, i32 3
-store i32 %r596, i32* %r598
-%r599 = lshr i480 %r595, 32
-%r600 = trunc i480 %r599 to i32
-%r602 = getelementptr i32, i32* %r583, i32 4
-store i32 %r600, i32* %r602
-%r603 = lshr i480 %r599, 32
-%r604 = trunc i480 %r603 to i32
-%r606 = getelementptr i32, i32* %r583, i32 5
-store i32 %r604, i32* %r606
-%r607 = lshr i480 %r603, 32
-%r608 = trunc i480 %r607 to i32
-%r610 = getelementptr i32, i32* %r583, i32 6
-store i32 %r608, i32* %r610
-%r611 = lshr i480 %r607, 32
-%r612 = trunc i480 %r611 to i32
-%r614 = getelementptr i32, i32* %r583, i32 7
-store i32 %r612, i32* %r614
-%r615 = lshr i480 %r611, 32
-%r616 = trunc i480 %r615 to i32
-%r618 = getelementptr i32, i32* %r583, i32 8
-store i32 %r616, i32* %r618
-%r619 = lshr i480 %r615, 32
-%r620 = trunc i480 %r619 to i32
-%r622 = getelementptr i32, i32* %r583, i32 9
-store i32 %r620, i32* %r622
-%r623 = lshr i480 %r619, 32
-%r624 = trunc i480 %r623 to i32
-%r626 = getelementptr i32, i32* %r583, i32 10
-store i32 %r624, i32* %r626
-%r627 = lshr i480 %r623, 32
-%r628 = trunc i480 %r627 to i32
-%r630 = getelementptr i32, i32* %r583, i32 11
-store i32 %r628, i32* %r630
-%r631 = lshr i480 %r627, 32
-%r632 = trunc i480 %r631 to i32
-%r634 = getelementptr i32, i32* %r583, i32 12
-store i32 %r632, i32* %r634
-%r635 = lshr i480 %r631, 32
-%r636 = trunc i480 %r635 to i32
-%r638 = getelementptr i32, i32* %r583, i32 13
-store i32 %r636, i32* %r638
-%r639 = lshr i480 %r635, 32
-%r640 = trunc i480 %r639 to i32
-%r642 = getelementptr i32, i32* %r583, i32 14
-store i32 %r640, i32* %r642
-ret void
-}
-define i544 @mulPv512x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
-%r30 = trunc i64 %r29 to i32
-%r31 = call i32 @extractHigh32(i64 %r29)
-%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
-%r34 = trunc i64 %r33 to i32
-%r35 = call i32 @extractHigh32(i64 %r33)
-%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
-%r38 = trunc i64 %r37 to i32
-%r39 = call i32 @extractHigh32(i64 %r37)
-%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
-%r42 = trunc i64 %r41 to i32
-%r43 = call i32 @extractHigh32(i64 %r41)
-%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
-%r46 = trunc i64 %r45 to i32
-%r47 = call i32 @extractHigh32(i64 %r45)
-%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
-%r50 = trunc i64 %r49 to i32
-%r51 = call i32 @extractHigh32(i64 %r49)
-%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
-%r54 = trunc i64 %r53 to i32
-%r55 = call i32 @extractHigh32(i64 %r53)
-%r57 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 13)
-%r58 = trunc i64 %r57 to i32
-%r59 = call i32 @extractHigh32(i64 %r57)
-%r61 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 14)
-%r62 = trunc i64 %r61 to i32
-%r63 = call i32 @extractHigh32(i64 %r61)
-%r65 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 15)
-%r66 = trunc i64 %r65 to i32
-%r67 = call i32 @extractHigh32(i64 %r65)
-%r68 = zext i32 %r6 to i64
-%r69 = zext i32 %r10 to i64
-%r70 = shl i64 %r69, 32
-%r71 = or i64 %r68, %r70
-%r72 = zext i64 %r71 to i96
-%r73 = zext i32 %r14 to i96
-%r74 = shl i96 %r73, 64
-%r75 = or i96 %r72, %r74
-%r76 = zext i96 %r75 to i128
-%r77 = zext i32 %r18 to i128
-%r78 = shl i128 %r77, 96
-%r79 = or i128 %r76, %r78
-%r80 = zext i128 %r79 to i160
-%r81 = zext i32 %r22 to i160
-%r82 = shl i160 %r81, 128
-%r83 = or i160 %r80, %r82
-%r84 = zext i160 %r83 to i192
-%r85 = zext i32 %r26 to i192
-%r86 = shl i192 %r85, 160
-%r87 = or i192 %r84, %r86
-%r88 = zext i192 %r87 to i224
-%r89 = zext i32 %r30 to i224
-%r90 = shl i224 %r89, 192
-%r91 = or i224 %r88, %r90
-%r92 = zext i224 %r91 to i256
-%r93 = zext i32 %r34 to i256
-%r94 = shl i256 %r93, 224
-%r95 = or i256 %r92, %r94
-%r96 = zext i256 %r95 to i288
-%r97 = zext i32 %r38 to i288
-%r98 = shl i288 %r97, 256
-%r99 = or i288 %r96, %r98
-%r100 = zext i288 %r99 to i320
-%r101 = zext i32 %r42 to i320
-%r102 = shl i320 %r101, 288
-%r103 = or i320 %r100, %r102
-%r104 = zext i320 %r103 to i352
-%r105 = zext i32 %r46 to i352
-%r106 = shl i352 %r105, 320
-%r107 = or i352 %r104, %r106
-%r108 = zext i352 %r107 to i384
-%r109 = zext i32 %r50 to i384
-%r110 = shl i384 %r109, 352
-%r111 = or i384 %r108, %r110
-%r112 = zext i384 %r111 to i416
-%r113 = zext i32 %r54 to i416
-%r114 = shl i416 %r113, 384
-%r115 = or i416 %r112, %r114
-%r116 = zext i416 %r115 to i448
-%r117 = zext i32 %r58 to i448
-%r118 = shl i448 %r117, 416
-%r119 = or i448 %r116, %r118
-%r120 = zext i448 %r119 to i480
-%r121 = zext i32 %r62 to i480
-%r122 = shl i480 %r121, 448
-%r123 = or i480 %r120, %r122
-%r124 = zext i480 %r123 to i512
-%r125 = zext i32 %r66 to i512
-%r126 = shl i512 %r125, 480
-%r127 = or i512 %r124, %r126
-%r128 = zext i32 %r7 to i64
-%r129 = zext i32 %r11 to i64
-%r130 = shl i64 %r129, 32
-%r131 = or i64 %r128, %r130
-%r132 = zext i64 %r131 to i96
-%r133 = zext i32 %r15 to i96
-%r134 = shl i96 %r133, 64
-%r135 = or i96 %r132, %r134
-%r136 = zext i96 %r135 to i128
-%r137 = zext i32 %r19 to i128
-%r138 = shl i128 %r137, 96
-%r139 = or i128 %r136, %r138
-%r140 = zext i128 %r139 to i160
-%r141 = zext i32 %r23 to i160
-%r142 = shl i160 %r141, 128
-%r143 = or i160 %r140, %r142
-%r144 = zext i160 %r143 to i192
-%r145 = zext i32 %r27 to i192
-%r146 = shl i192 %r145, 160
-%r147 = or i192 %r144, %r146
-%r148 = zext i192 %r147 to i224
-%r149 = zext i32 %r31 to i224
-%r150 = shl i224 %r149, 192
-%r151 = or i224 %r148, %r150
-%r152 = zext i224 %r151 to i256
-%r153 = zext i32 %r35 to i256
-%r154 = shl i256 %r153, 224
-%r155 = or i256 %r152, %r154
-%r156 = zext i256 %r155 to i288
-%r157 = zext i32 %r39 to i288
-%r158 = shl i288 %r157, 256
-%r159 = or i288 %r156, %r158
-%r160 = zext i288 %r159 to i320
-%r161 = zext i32 %r43 to i320
-%r162 = shl i320 %r161, 288
-%r163 = or i320 %r160, %r162
-%r164 = zext i320 %r163 to i352
-%r165 = zext i32 %r47 to i352
-%r166 = shl i352 %r165, 320
-%r167 = or i352 %r164, %r166
-%r168 = zext i352 %r167 to i384
-%r169 = zext i32 %r51 to i384
-%r170 = shl i384 %r169, 352
-%r171 = or i384 %r168, %r170
-%r172 = zext i384 %r171 to i416
-%r173 = zext i32 %r55 to i416
-%r174 = shl i416 %r173, 384
-%r175 = or i416 %r172, %r174
-%r176 = zext i416 %r175 to i448
-%r177 = zext i32 %r59 to i448
-%r178 = shl i448 %r177, 416
-%r179 = or i448 %r176, %r178
-%r180 = zext i448 %r179 to i480
-%r181 = zext i32 %r63 to i480
-%r182 = shl i480 %r181, 448
-%r183 = or i480 %r180, %r182
-%r184 = zext i480 %r183 to i512
-%r185 = zext i32 %r67 to i512
-%r186 = shl i512 %r185, 480
-%r187 = or i512 %r184, %r186
-%r188 = zext i512 %r127 to i544
-%r189 = zext i512 %r187 to i544
-%r190 = shl i544 %r189, 32
-%r191 = add i544 %r188, %r190
-ret i544 %r191
-}
-define void @mcl_fp_mulUnitPre16L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
-{
-%r4 = call i544 @mulPv512x32(i32* %r2, i32 %r3)
-%r5 = trunc i544 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i544 %r4, 32
-%r9 = trunc i544 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i544 %r8, 32
-%r13 = trunc i544 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i544 %r12, 32
-%r17 = trunc i544 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i544 %r16, 32
-%r21 = trunc i544 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i544 %r20, 32
-%r25 = trunc i544 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i544 %r24, 32
-%r29 = trunc i544 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i544 %r28, 32
-%r33 = trunc i544 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
-%r36 = lshr i544 %r32, 32
-%r37 = trunc i544 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 8
-store i32 %r37, i32* %r39
-%r40 = lshr i544 %r36, 32
-%r41 = trunc i544 %r40 to i32
-%r43 = getelementptr i32, i32* %r1, i32 9
-store i32 %r41, i32* %r43
-%r44 = lshr i544 %r40, 32
-%r45 = trunc i544 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 10
-store i32 %r45, i32* %r47
-%r48 = lshr i544 %r44, 32
-%r49 = trunc i544 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 11
-store i32 %r49, i32* %r51
-%r52 = lshr i544 %r48, 32
-%r53 = trunc i544 %r52 to i32
-%r55 = getelementptr i32, i32* %r1, i32 12
-store i32 %r53, i32* %r55
-%r56 = lshr i544 %r52, 32
-%r57 = trunc i544 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 13
-store i32 %r57, i32* %r59
-%r60 = lshr i544 %r56, 32
-%r61 = trunc i544 %r60 to i32
-%r63 = getelementptr i32, i32* %r1, i32 14
-store i32 %r61, i32* %r63
-%r64 = lshr i544 %r60, 32
-%r65 = trunc i544 %r64 to i32
-%r67 = getelementptr i32, i32* %r1, i32 15
-store i32 %r65, i32* %r67
-%r68 = lshr i544 %r64, 32
-%r69 = trunc i544 %r68 to i32
-%r71 = getelementptr i32, i32* %r1, i32 16
-store i32 %r69, i32* %r71
-ret void
-}
-define void @mcl_fpDbl_mulPre16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r2, i32 8
-%r7 = getelementptr i32, i32* %r3, i32 8
-%r9 = getelementptr i32, i32* %r1, i32 16
-call void @mcl_fpDbl_mulPre8L(i32* %r1, i32* %r2, i32* %r3)
-call void @mcl_fpDbl_mulPre8L(i32* %r9, i32* %r5, i32* %r7)
-%r10 = load i32, i32* %r5
-%r11 = zext i32 %r10 to i64
-%r13 = getelementptr i32, i32* %r5, i32 1
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i64
-%r16 = shl i64 %r15, 32
-%r17 = or i64 %r11, %r16
-%r18 = zext i64 %r17 to i96
-%r20 = getelementptr i32, i32* %r5, i32 2
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i96
-%r23 = shl i96 %r22, 64
-%r24 = or i96 %r18, %r23
-%r25 = zext i96 %r24 to i128
-%r27 = getelementptr i32, i32* %r5, i32 3
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i128
-%r30 = shl i128 %r29, 96
-%r31 = or i128 %r25, %r30
-%r32 = zext i128 %r31 to i160
-%r34 = getelementptr i32, i32* %r5, i32 4
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i160
-%r37 = shl i160 %r36, 128
-%r38 = or i160 %r32, %r37
-%r39 = zext i160 %r38 to i192
-%r41 = getelementptr i32, i32* %r5, i32 5
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i192
-%r44 = shl i192 %r43, 160
-%r45 = or i192 %r39, %r44
-%r46 = zext i192 %r45 to i224
-%r48 = getelementptr i32, i32* %r5, i32 6
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i224
-%r51 = shl i224 %r50, 192
-%r52 = or i224 %r46, %r51
-%r53 = zext i224 %r52 to i256
-%r55 = getelementptr i32, i32* %r5, i32 7
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i256
-%r58 = shl i256 %r57, 224
-%r59 = or i256 %r53, %r58
-%r60 = zext i256 %r59 to i288
-%r61 = load i32, i32* %r2
-%r62 = zext i32 %r61 to i64
-%r64 = getelementptr i32, i32* %r2, i32 1
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i64
-%r67 = shl i64 %r66, 32
-%r68 = or i64 %r62, %r67
-%r69 = zext i64 %r68 to i96
-%r71 = getelementptr i32, i32* %r2, i32 2
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i96
-%r74 = shl i96 %r73, 64
-%r75 = or i96 %r69, %r74
-%r76 = zext i96 %r75 to i128
-%r78 = getelementptr i32, i32* %r2, i32 3
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i128
-%r81 = shl i128 %r80, 96
-%r82 = or i128 %r76, %r81
-%r83 = zext i128 %r82 to i160
-%r85 = getelementptr i32, i32* %r2, i32 4
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i160
-%r88 = shl i160 %r87, 128
-%r89 = or i160 %r83, %r88
-%r90 = zext i160 %r89 to i192
-%r92 = getelementptr i32, i32* %r2, i32 5
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i192
-%r95 = shl i192 %r94, 160
-%r96 = or i192 %r90, %r95
-%r97 = zext i192 %r96 to i224
-%r99 = getelementptr i32, i32* %r2, i32 6
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i224
-%r102 = shl i224 %r101, 192
-%r103 = or i224 %r97, %r102
-%r104 = zext i224 %r103 to i256
-%r106 = getelementptr i32, i32* %r2, i32 7
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i256
-%r109 = shl i256 %r108, 224
-%r110 = or i256 %r104, %r109
-%r111 = zext i256 %r110 to i288
-%r112 = load i32, i32* %r7
-%r113 = zext i32 %r112 to i64
-%r115 = getelementptr i32, i32* %r7, i32 1
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i64
-%r118 = shl i64 %r117, 32
-%r119 = or i64 %r113, %r118
-%r120 = zext i64 %r119 to i96
-%r122 = getelementptr i32, i32* %r7, i32 2
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i96
-%r125 = shl i96 %r124, 64
-%r126 = or i96 %r120, %r125
-%r127 = zext i96 %r126 to i128
-%r129 = getelementptr i32, i32* %r7, i32 3
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i128
-%r132 = shl i128 %r131, 96
-%r133 = or i128 %r127, %r132
-%r134 = zext i128 %r133 to i160
-%r136 = getelementptr i32, i32* %r7, i32 4
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i160
-%r139 = shl i160 %r138, 128
-%r140 = or i160 %r134, %r139
-%r141 = zext i160 %r140 to i192
-%r143 = getelementptr i32, i32* %r7, i32 5
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i192
-%r146 = shl i192 %r145, 160
-%r147 = or i192 %r141, %r146
-%r148 = zext i192 %r147 to i224
-%r150 = getelementptr i32, i32* %r7, i32 6
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i224
-%r153 = shl i224 %r152, 192
-%r154 = or i224 %r148, %r153
-%r155 = zext i224 %r154 to i256
-%r157 = getelementptr i32, i32* %r7, i32 7
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i256
-%r160 = shl i256 %r159, 224
-%r161 = or i256 %r155, %r160
-%r162 = zext i256 %r161 to i288
-%r163 = load i32, i32* %r3
-%r164 = zext i32 %r163 to i64
-%r166 = getelementptr i32, i32* %r3, i32 1
-%r167 = load i32, i32* %r166
-%r168 = zext i32 %r167 to i64
-%r169 = shl i64 %r168, 32
-%r170 = or i64 %r164, %r169
-%r171 = zext i64 %r170 to i96
-%r173 = getelementptr i32, i32* %r3, i32 2
-%r174 = load i32, i32* %r173
-%r175 = zext i32 %r174 to i96
-%r176 = shl i96 %r175, 64
-%r177 = or i96 %r171, %r176
-%r178 = zext i96 %r177 to i128
-%r180 = getelementptr i32, i32* %r3, i32 3
-%r181 = load i32, i32* %r180
-%r182 = zext i32 %r181 to i128
-%r183 = shl i128 %r182, 96
-%r184 = or i128 %r178, %r183
-%r185 = zext i128 %r184 to i160
-%r187 = getelementptr i32, i32* %r3, i32 4
-%r188 = load i32, i32* %r187
-%r189 = zext i32 %r188 to i160
-%r190 = shl i160 %r189, 128
-%r191 = or i160 %r185, %r190
-%r192 = zext i160 %r191 to i192
-%r194 = getelementptr i32, i32* %r3, i32 5
-%r195 = load i32, i32* %r194
-%r196 = zext i32 %r195 to i192
-%r197 = shl i192 %r196, 160
-%r198 = or i192 %r192, %r197
-%r199 = zext i192 %r198 to i224
-%r201 = getelementptr i32, i32* %r3, i32 6
-%r202 = load i32, i32* %r201
-%r203 = zext i32 %r202 to i224
-%r204 = shl i224 %r203, 192
-%r205 = or i224 %r199, %r204
-%r206 = zext i224 %r205 to i256
-%r208 = getelementptr i32, i32* %r3, i32 7
-%r209 = load i32, i32* %r208
-%r210 = zext i32 %r209 to i256
-%r211 = shl i256 %r210, 224
-%r212 = or i256 %r206, %r211
-%r213 = zext i256 %r212 to i288
-%r214 = add i288 %r60, %r111
-%r215 = add i288 %r162, %r213
-%r217 = alloca i32, i32 16
-%r218 = trunc i288 %r214 to i256
-%r219 = trunc i288 %r215 to i256
-%r220 = lshr i288 %r214, 256
-%r221 = trunc i288 %r220 to i1
-%r222 = lshr i288 %r215, 256
-%r223 = trunc i288 %r222 to i1
-%r224 = and i1 %r221, %r223
-%r226 = select i1 %r221, i256 %r219, i256 0
-%r228 = select i1 %r223, i256 %r218, i256 0
-%r230 = alloca i32, i32 8
-%r232 = alloca i32, i32 8
-%r233 = trunc i256 %r218 to i32
-%r235 = getelementptr i32, i32* %r230, i32 0
-store i32 %r233, i32* %r235
-%r236 = lshr i256 %r218, 32
-%r237 = trunc i256 %r236 to i32
-%r239 = getelementptr i32, i32* %r230, i32 1
-store i32 %r237, i32* %r239
-%r240 = lshr i256 %r236, 32
-%r241 = trunc i256 %r240 to i32
-%r243 = getelementptr i32, i32* %r230, i32 2
-store i32 %r241, i32* %r243
-%r244 = lshr i256 %r240, 32
-%r245 = trunc i256 %r244 to i32
-%r247 = getelementptr i32, i32* %r230, i32 3
-store i32 %r245, i32* %r247
-%r248 = lshr i256 %r244, 32
-%r249 = trunc i256 %r248 to i32
-%r251 = getelementptr i32, i32* %r230, i32 4
-store i32 %r249, i32* %r251
-%r252 = lshr i256 %r248, 32
-%r253 = trunc i256 %r252 to i32
-%r255 = getelementptr i32, i32* %r230, i32 5
-store i32 %r253, i32* %r255
-%r256 = lshr i256 %r252, 32
-%r257 = trunc i256 %r256 to i32
-%r259 = getelementptr i32, i32* %r230, i32 6
-store i32 %r257, i32* %r259
-%r260 = lshr i256 %r256, 32
-%r261 = trunc i256 %r260 to i32
-%r263 = getelementptr i32, i32* %r230, i32 7
-store i32 %r261, i32* %r263
-%r264 = trunc i256 %r219 to i32
-%r266 = getelementptr i32, i32* %r232, i32 0
-store i32 %r264, i32* %r266
-%r267 = lshr i256 %r219, 32
-%r268 = trunc i256 %r267 to i32
-%r270 = getelementptr i32, i32* %r232, i32 1
-store i32 %r268, i32* %r270
-%r271 = lshr i256 %r267, 32
-%r272 = trunc i256 %r271 to i32
-%r274 = getelementptr i32, i32* %r232, i32 2
-store i32 %r272, i32* %r274
-%r275 = lshr i256 %r271, 32
-%r276 = trunc i256 %r275 to i32
-%r278 = getelementptr i32, i32* %r232, i32 3
-store i32 %r276, i32* %r278
-%r279 = lshr i256 %r275, 32
-%r280 = trunc i256 %r279 to i32
-%r282 = getelementptr i32, i32* %r232, i32 4
-store i32 %r280, i32* %r282
-%r283 = lshr i256 %r279, 32
-%r284 = trunc i256 %r283 to i32
-%r286 = getelementptr i32, i32* %r232, i32 5
-store i32 %r284, i32* %r286
-%r287 = lshr i256 %r283, 32
-%r288 = trunc i256 %r287 to i32
-%r290 = getelementptr i32, i32* %r232, i32 6
-store i32 %r288, i32* %r290
-%r291 = lshr i256 %r287, 32
-%r292 = trunc i256 %r291 to i32
-%r294 = getelementptr i32, i32* %r232, i32 7
-store i32 %r292, i32* %r294
-call void @mcl_fpDbl_mulPre8L(i32* %r217, i32* %r230, i32* %r232)
-%r295 = load i32, i32* %r217
-%r296 = zext i32 %r295 to i64
-%r298 = getelementptr i32, i32* %r217, i32 1
-%r299 = load i32, i32* %r298
-%r300 = zext i32 %r299 to i64
-%r301 = shl i64 %r300, 32
-%r302 = or i64 %r296, %r301
-%r303 = zext i64 %r302 to i96
-%r305 = getelementptr i32, i32* %r217, i32 2
-%r306 = load i32, i32* %r305
-%r307 = zext i32 %r306 to i96
-%r308 = shl i96 %r307, 64
-%r309 = or i96 %r303, %r308
-%r310 = zext i96 %r309 to i128
-%r312 = getelementptr i32, i32* %r217, i32 3
-%r313 = load i32, i32* %r312
-%r314 = zext i32 %r313 to i128
-%r315 = shl i128 %r314, 96
-%r316 = or i128 %r310, %r315
-%r317 = zext i128 %r316 to i160
-%r319 = getelementptr i32, i32* %r217, i32 4
-%r320 = load i32, i32* %r319
-%r321 = zext i32 %r320 to i160
-%r322 = shl i160 %r321, 128
-%r323 = or i160 %r317, %r322
-%r324 = zext i160 %r323 to i192
-%r326 = getelementptr i32, i32* %r217, i32 5
-%r327 = load i32, i32* %r326
-%r328 = zext i32 %r327 to i192
-%r329 = shl i192 %r328, 160
-%r330 = or i192 %r324, %r329
-%r331 = zext i192 %r330 to i224
-%r333 = getelementptr i32, i32* %r217, i32 6
-%r334 = load i32, i32* %r333
-%r335 = zext i32 %r334 to i224
-%r336 = shl i224 %r335, 192
-%r337 = or i224 %r331, %r336
-%r338 = zext i224 %r337 to i256
-%r340 = getelementptr i32, i32* %r217, i32 7
-%r341 = load i32, i32* %r340
-%r342 = zext i32 %r341 to i256
-%r343 = shl i256 %r342, 224
-%r344 = or i256 %r338, %r343
-%r345 = zext i256 %r344 to i288
-%r347 = getelementptr i32, i32* %r217, i32 8
-%r348 = load i32, i32* %r347
-%r349 = zext i32 %r348 to i288
-%r350 = shl i288 %r349, 256
-%r351 = or i288 %r345, %r350
-%r352 = zext i288 %r351 to i320
-%r354 = getelementptr i32, i32* %r217, i32 9
-%r355 = load i32, i32* %r354
-%r356 = zext i32 %r355 to i320
-%r357 = shl i320 %r356, 288
-%r358 = or i320 %r352, %r357
-%r359 = zext i320 %r358 to i352
-%r361 = getelementptr i32, i32* %r217, i32 10
-%r362 = load i32, i32* %r361
-%r363 = zext i32 %r362 to i352
-%r364 = shl i352 %r363, 320
-%r365 = or i352 %r359, %r364
-%r366 = zext i352 %r365 to i384
-%r368 = getelementptr i32, i32* %r217, i32 11
-%r369 = load i32, i32* %r368
-%r370 = zext i32 %r369 to i384
-%r371 = shl i384 %r370, 352
-%r372 = or i384 %r366, %r371
-%r373 = zext i384 %r372 to i416
-%r375 = getelementptr i32, i32* %r217, i32 12
-%r376 = load i32, i32* %r375
-%r377 = zext i32 %r376 to i416
-%r378 = shl i416 %r377, 384
-%r379 = or i416 %r373, %r378
-%r380 = zext i416 %r379 to i448
-%r382 = getelementptr i32, i32* %r217, i32 13
-%r383 = load i32, i32* %r382
-%r384 = zext i32 %r383 to i448
-%r385 = shl i448 %r384, 416
-%r386 = or i448 %r380, %r385
-%r387 = zext i448 %r386 to i480
-%r389 = getelementptr i32, i32* %r217, i32 14
-%r390 = load i32, i32* %r389
-%r391 = zext i32 %r390 to i480
-%r392 = shl i480 %r391, 448
-%r393 = or i480 %r387, %r392
-%r394 = zext i480 %r393 to i512
-%r396 = getelementptr i32, i32* %r217, i32 15
-%r397 = load i32, i32* %r396
-%r398 = zext i32 %r397 to i512
-%r399 = shl i512 %r398, 480
-%r400 = or i512 %r394, %r399
-%r401 = zext i512 %r400 to i544
-%r402 = zext i1 %r224 to i544
-%r403 = shl i544 %r402, 512
-%r404 = or i544 %r401, %r403
-%r405 = zext i256 %r226 to i544
-%r406 = zext i256 %r228 to i544
-%r407 = shl i544 %r405, 256
-%r408 = shl i544 %r406, 256
-%r409 = add i544 %r404, %r407
-%r410 = add i544 %r409, %r408
-%r411 = load i32, i32* %r1
-%r412 = zext i32 %r411 to i64
-%r414 = getelementptr i32, i32* %r1, i32 1
-%r415 = load i32, i32* %r414
-%r416 = zext i32 %r415 to i64
-%r417 = shl i64 %r416, 32
-%r418 = or i64 %r412, %r417
-%r419 = zext i64 %r418 to i96
-%r421 = getelementptr i32, i32* %r1, i32 2
-%r422 = load i32, i32* %r421
-%r423 = zext i32 %r422 to i96
-%r424 = shl i96 %r423, 64
-%r425 = or i96 %r419, %r424
-%r426 = zext i96 %r425 to i128
-%r428 = getelementptr i32, i32* %r1, i32 3
-%r429 = load i32, i32* %r428
-%r430 = zext i32 %r429 to i128
-%r431 = shl i128 %r430, 96
-%r432 = or i128 %r426, %r431
-%r433 = zext i128 %r432 to i160
-%r435 = getelementptr i32, i32* %r1, i32 4
-%r436 = load i32, i32* %r435
-%r437 = zext i32 %r436 to i160
-%r438 = shl i160 %r437, 128
-%r439 = or i160 %r433, %r438
-%r440 = zext i160 %r439 to i192
-%r442 = getelementptr i32, i32* %r1, i32 5
-%r443 = load i32, i32* %r442
-%r444 = zext i32 %r443 to i192
-%r445 = shl i192 %r444, 160
-%r446 = or i192 %r440, %r445
-%r447 = zext i192 %r446 to i224
-%r449 = getelementptr i32, i32* %r1, i32 6
-%r450 = load i32, i32* %r449
-%r451 = zext i32 %r450 to i224
-%r452 = shl i224 %r451, 192
-%r453 = or i224 %r447, %r452
-%r454 = zext i224 %r453 to i256
-%r456 = getelementptr i32, i32* %r1, i32 7
-%r457 = load i32, i32* %r456
-%r458 = zext i32 %r457 to i256
-%r459 = shl i256 %r458, 224
-%r460 = or i256 %r454, %r459
-%r461 = zext i256 %r460 to i288
-%r463 = getelementptr i32, i32* %r1, i32 8
-%r464 = load i32, i32* %r463
-%r465 = zext i32 %r464 to i288
-%r466 = shl i288 %r465, 256
-%r467 = or i288 %r461, %r466
-%r468 = zext i288 %r467 to i320
-%r470 = getelementptr i32, i32* %r1, i32 9
-%r471 = load i32, i32* %r470
-%r472 = zext i32 %r471 to i320
-%r473 = shl i320 %r472, 288
-%r474 = or i320 %r468, %r473
-%r475 = zext i320 %r474 to i352
-%r477 = getelementptr i32, i32* %r1, i32 10
-%r478 = load i32, i32* %r477
-%r479 = zext i32 %r478 to i352
-%r480 = shl i352 %r479, 320
-%r481 = or i352 %r475, %r480
-%r482 = zext i352 %r481 to i384
-%r484 = getelementptr i32, i32* %r1, i32 11
-%r485 = load i32, i32* %r484
-%r486 = zext i32 %r485 to i384
-%r487 = shl i384 %r486, 352
-%r488 = or i384 %r482, %r487
-%r489 = zext i384 %r488 to i416
-%r491 = getelementptr i32, i32* %r1, i32 12
-%r492 = load i32, i32* %r491
-%r493 = zext i32 %r492 to i416
-%r494 = shl i416 %r493, 384
-%r495 = or i416 %r489, %r494
-%r496 = zext i416 %r495 to i448
-%r498 = getelementptr i32, i32* %r1, i32 13
-%r499 = load i32, i32* %r498
-%r500 = zext i32 %r499 to i448
-%r501 = shl i448 %r500, 416
-%r502 = or i448 %r496, %r501
-%r503 = zext i448 %r502 to i480
-%r505 = getelementptr i32, i32* %r1, i32 14
-%r506 = load i32, i32* %r505
-%r507 = zext i32 %r506 to i480
-%r508 = shl i480 %r507, 448
-%r509 = or i480 %r503, %r508
-%r510 = zext i480 %r509 to i512
-%r512 = getelementptr i32, i32* %r1, i32 15
-%r513 = load i32, i32* %r512
-%r514 = zext i32 %r513 to i512
-%r515 = shl i512 %r514, 480
-%r516 = or i512 %r510, %r515
-%r517 = zext i512 %r516 to i544
-%r518 = sub i544 %r410, %r517
-%r520 = getelementptr i32, i32* %r1, i32 16
-%r521 = load i32, i32* %r520
-%r522 = zext i32 %r521 to i64
-%r524 = getelementptr i32, i32* %r520, i32 1
-%r525 = load i32, i32* %r524
-%r526 = zext i32 %r525 to i64
-%r527 = shl i64 %r526, 32
-%r528 = or i64 %r522, %r527
-%r529 = zext i64 %r528 to i96
-%r531 = getelementptr i32, i32* %r520, i32 2
-%r532 = load i32, i32* %r531
-%r533 = zext i32 %r532 to i96
-%r534 = shl i96 %r533, 64
-%r535 = or i96 %r529, %r534
-%r536 = zext i96 %r535 to i128
-%r538 = getelementptr i32, i32* %r520, i32 3
-%r539 = load i32, i32* %r538
-%r540 = zext i32 %r539 to i128
-%r541 = shl i128 %r540, 96
-%r542 = or i128 %r536, %r541
-%r543 = zext i128 %r542 to i160
-%r545 = getelementptr i32, i32* %r520, i32 4
-%r546 = load i32, i32* %r545
-%r547 = zext i32 %r546 to i160
-%r548 = shl i160 %r547, 128
-%r549 = or i160 %r543, %r548
-%r550 = zext i160 %r549 to i192
-%r552 = getelementptr i32, i32* %r520, i32 5
-%r553 = load i32, i32* %r552
-%r554 = zext i32 %r553 to i192
-%r555 = shl i192 %r554, 160
-%r556 = or i192 %r550, %r555
-%r557 = zext i192 %r556 to i224
-%r559 = getelementptr i32, i32* %r520, i32 6
-%r560 = load i32, i32* %r559
-%r561 = zext i32 %r560 to i224
-%r562 = shl i224 %r561, 192
-%r563 = or i224 %r557, %r562
-%r564 = zext i224 %r563 to i256
-%r566 = getelementptr i32, i32* %r520, i32 7
-%r567 = load i32, i32* %r566
-%r568 = zext i32 %r567 to i256
-%r569 = shl i256 %r568, 224
-%r570 = or i256 %r564, %r569
-%r571 = zext i256 %r570 to i288
-%r573 = getelementptr i32, i32* %r520, i32 8
-%r574 = load i32, i32* %r573
-%r575 = zext i32 %r574 to i288
-%r576 = shl i288 %r575, 256
-%r577 = or i288 %r571, %r576
-%r578 = zext i288 %r577 to i320
-%r580 = getelementptr i32, i32* %r520, i32 9
-%r581 = load i32, i32* %r580
-%r582 = zext i32 %r581 to i320
-%r583 = shl i320 %r582, 288
-%r584 = or i320 %r578, %r583
-%r585 = zext i320 %r584 to i352
-%r587 = getelementptr i32, i32* %r520, i32 10
-%r588 = load i32, i32* %r587
-%r589 = zext i32 %r588 to i352
-%r590 = shl i352 %r589, 320
-%r591 = or i352 %r585, %r590
-%r592 = zext i352 %r591 to i384
-%r594 = getelementptr i32, i32* %r520, i32 11
-%r595 = load i32, i32* %r594
-%r596 = zext i32 %r595 to i384
-%r597 = shl i384 %r596, 352
-%r598 = or i384 %r592, %r597
-%r599 = zext i384 %r598 to i416
-%r601 = getelementptr i32, i32* %r520, i32 12
-%r602 = load i32, i32* %r601
-%r603 = zext i32 %r602 to i416
-%r604 = shl i416 %r603, 384
-%r605 = or i416 %r599, %r604
-%r606 = zext i416 %r605 to i448
-%r608 = getelementptr i32, i32* %r520, i32 13
-%r609 = load i32, i32* %r608
-%r610 = zext i32 %r609 to i448
-%r611 = shl i448 %r610, 416
-%r612 = or i448 %r606, %r611
-%r613 = zext i448 %r612 to i480
-%r615 = getelementptr i32, i32* %r520, i32 14
-%r616 = load i32, i32* %r615
-%r617 = zext i32 %r616 to i480
-%r618 = shl i480 %r617, 448
-%r619 = or i480 %r613, %r618
-%r620 = zext i480 %r619 to i512
-%r622 = getelementptr i32, i32* %r520, i32 15
-%r623 = load i32, i32* %r622
-%r624 = zext i32 %r623 to i512
-%r625 = shl i512 %r624, 480
-%r626 = or i512 %r620, %r625
-%r627 = zext i512 %r626 to i544
-%r628 = sub i544 %r518, %r627
-%r629 = zext i544 %r628 to i768
-%r631 = getelementptr i32, i32* %r1, i32 8
-%r632 = load i32, i32* %r631
-%r633 = zext i32 %r632 to i64
-%r635 = getelementptr i32, i32* %r631, i32 1
-%r636 = load i32, i32* %r635
-%r637 = zext i32 %r636 to i64
-%r638 = shl i64 %r637, 32
-%r639 = or i64 %r633, %r638
-%r640 = zext i64 %r639 to i96
-%r642 = getelementptr i32, i32* %r631, i32 2
-%r643 = load i32, i32* %r642
-%r644 = zext i32 %r643 to i96
-%r645 = shl i96 %r644, 64
-%r646 = or i96 %r640, %r645
-%r647 = zext i96 %r646 to i128
-%r649 = getelementptr i32, i32* %r631, i32 3
-%r650 = load i32, i32* %r649
-%r651 = zext i32 %r650 to i128
-%r652 = shl i128 %r651, 96
-%r653 = or i128 %r647, %r652
-%r654 = zext i128 %r653 to i160
-%r656 = getelementptr i32, i32* %r631, i32 4
-%r657 = load i32, i32* %r656
-%r658 = zext i32 %r657 to i160
-%r659 = shl i160 %r658, 128
-%r660 = or i160 %r654, %r659
-%r661 = zext i160 %r660 to i192
-%r663 = getelementptr i32, i32* %r631, i32 5
-%r664 = load i32, i32* %r663
-%r665 = zext i32 %r664 to i192
-%r666 = shl i192 %r665, 160
-%r667 = or i192 %r661, %r666
-%r668 = zext i192 %r667 to i224
-%r670 = getelementptr i32, i32* %r631, i32 6
-%r671 = load i32, i32* %r670
-%r672 = zext i32 %r671 to i224
-%r673 = shl i224 %r672, 192
-%r674 = or i224 %r668, %r673
-%r675 = zext i224 %r674 to i256
-%r677 = getelementptr i32, i32* %r631, i32 7
-%r678 = load i32, i32* %r677
-%r679 = zext i32 %r678 to i256
-%r680 = shl i256 %r679, 224
-%r681 = or i256 %r675, %r680
-%r682 = zext i256 %r681 to i288
-%r684 = getelementptr i32, i32* %r631, i32 8
-%r685 = load i32, i32* %r684
-%r686 = zext i32 %r685 to i288
-%r687 = shl i288 %r686, 256
-%r688 = or i288 %r682, %r687
-%r689 = zext i288 %r688 to i320
-%r691 = getelementptr i32, i32* %r631, i32 9
-%r692 = load i32, i32* %r691
-%r693 = zext i32 %r692 to i320
-%r694 = shl i320 %r693, 288
-%r695 = or i320 %r689, %r694
-%r696 = zext i320 %r695 to i352
-%r698 = getelementptr i32, i32* %r631, i32 10
-%r699 = load i32, i32* %r698
-%r700 = zext i32 %r699 to i352
-%r701 = shl i352 %r700, 320
-%r702 = or i352 %r696, %r701
-%r703 = zext i352 %r702 to i384
-%r705 = getelementptr i32, i32* %r631, i32 11
-%r706 = load i32, i32* %r705
-%r707 = zext i32 %r706 to i384
-%r708 = shl i384 %r707, 352
-%r709 = or i384 %r703, %r708
-%r710 = zext i384 %r709 to i416
-%r712 = getelementptr i32, i32* %r631, i32 12
-%r713 = load i32, i32* %r712
-%r714 = zext i32 %r713 to i416
-%r715 = shl i416 %r714, 384
-%r716 = or i416 %r710, %r715
-%r717 = zext i416 %r716 to i448
-%r719 = getelementptr i32, i32* %r631, i32 13
-%r720 = load i32, i32* %r719
-%r721 = zext i32 %r720 to i448
-%r722 = shl i448 %r721, 416
-%r723 = or i448 %r717, %r722
-%r724 = zext i448 %r723 to i480
-%r726 = getelementptr i32, i32* %r631, i32 14
-%r727 = load i32, i32* %r726
-%r728 = zext i32 %r727 to i480
-%r729 = shl i480 %r728, 448
-%r730 = or i480 %r724, %r729
-%r731 = zext i480 %r730 to i512
-%r733 = getelementptr i32, i32* %r631, i32 15
-%r734 = load i32, i32* %r733
-%r735 = zext i32 %r734 to i512
-%r736 = shl i512 %r735, 480
-%r737 = or i512 %r731, %r736
-%r738 = zext i512 %r737 to i544
-%r740 = getelementptr i32, i32* %r631, i32 16
-%r741 = load i32, i32* %r740
-%r742 = zext i32 %r741 to i544
-%r743 = shl i544 %r742, 512
-%r744 = or i544 %r738, %r743
-%r745 = zext i544 %r744 to i576
-%r747 = getelementptr i32, i32* %r631, i32 17
-%r748 = load i32, i32* %r747
-%r749 = zext i32 %r748 to i576
-%r750 = shl i576 %r749, 544
-%r751 = or i576 %r745, %r750
-%r752 = zext i576 %r751 to i608
-%r754 = getelementptr i32, i32* %r631, i32 18
-%r755 = load i32, i32* %r754
-%r756 = zext i32 %r755 to i608
-%r757 = shl i608 %r756, 576
-%r758 = or i608 %r752, %r757
-%r759 = zext i608 %r758 to i640
-%r761 = getelementptr i32, i32* %r631, i32 19
-%r762 = load i32, i32* %r761
-%r763 = zext i32 %r762 to i640
-%r764 = shl i640 %r763, 608
-%r765 = or i640 %r759, %r764
-%r766 = zext i640 %r765 to i672
-%r768 = getelementptr i32, i32* %r631, i32 20
-%r769 = load i32, i32* %r768
-%r770 = zext i32 %r769 to i672
-%r771 = shl i672 %r770, 640
-%r772 = or i672 %r766, %r771
-%r773 = zext i672 %r772 to i704
-%r775 = getelementptr i32, i32* %r631, i32 21
-%r776 = load i32, i32* %r775
-%r777 = zext i32 %r776 to i704
-%r778 = shl i704 %r777, 672
-%r779 = or i704 %r773, %r778
-%r780 = zext i704 %r779 to i736
-%r782 = getelementptr i32, i32* %r631, i32 22
-%r783 = load i32, i32* %r782
-%r784 = zext i32 %r783 to i736
-%r785 = shl i736 %r784, 704
-%r786 = or i736 %r780, %r785
-%r787 = zext i736 %r786 to i768
-%r789 = getelementptr i32, i32* %r631, i32 23
-%r790 = load i32, i32* %r789
-%r791 = zext i32 %r790 to i768
-%r792 = shl i768 %r791, 736
-%r793 = or i768 %r787, %r792
-%r794 = add i768 %r629, %r793
-%r796 = getelementptr i32, i32* %r1, i32 8
-%r797 = trunc i768 %r794 to i32
-%r799 = getelementptr i32, i32* %r796, i32 0
-store i32 %r797, i32* %r799
-%r800 = lshr i768 %r794, 32
-%r801 = trunc i768 %r800 to i32
-%r803 = getelementptr i32, i32* %r796, i32 1
-store i32 %r801, i32* %r803
-%r804 = lshr i768 %r800, 32
-%r805 = trunc i768 %r804 to i32
-%r807 = getelementptr i32, i32* %r796, i32 2
-store i32 %r805, i32* %r807
-%r808 = lshr i768 %r804, 32
-%r809 = trunc i768 %r808 to i32
-%r811 = getelementptr i32, i32* %r796, i32 3
-store i32 %r809, i32* %r811
-%r812 = lshr i768 %r808, 32
-%r813 = trunc i768 %r812 to i32
-%r815 = getelementptr i32, i32* %r796, i32 4
-store i32 %r813, i32* %r815
-%r816 = lshr i768 %r812, 32
-%r817 = trunc i768 %r816 to i32
-%r819 = getelementptr i32, i32* %r796, i32 5
-store i32 %r817, i32* %r819
-%r820 = lshr i768 %r816, 32
-%r821 = trunc i768 %r820 to i32
-%r823 = getelementptr i32, i32* %r796, i32 6
-store i32 %r821, i32* %r823
-%r824 = lshr i768 %r820, 32
-%r825 = trunc i768 %r824 to i32
-%r827 = getelementptr i32, i32* %r796, i32 7
-store i32 %r825, i32* %r827
-%r828 = lshr i768 %r824, 32
-%r829 = trunc i768 %r828 to i32
-%r831 = getelementptr i32, i32* %r796, i32 8
-store i32 %r829, i32* %r831
-%r832 = lshr i768 %r828, 32
-%r833 = trunc i768 %r832 to i32
-%r835 = getelementptr i32, i32* %r796, i32 9
-store i32 %r833, i32* %r835
-%r836 = lshr i768 %r832, 32
-%r837 = trunc i768 %r836 to i32
-%r839 = getelementptr i32, i32* %r796, i32 10
-store i32 %r837, i32* %r839
-%r840 = lshr i768 %r836, 32
-%r841 = trunc i768 %r840 to i32
-%r843 = getelementptr i32, i32* %r796, i32 11
-store i32 %r841, i32* %r843
-%r844 = lshr i768 %r840, 32
-%r845 = trunc i768 %r844 to i32
-%r847 = getelementptr i32, i32* %r796, i32 12
-store i32 %r845, i32* %r847
-%r848 = lshr i768 %r844, 32
-%r849 = trunc i768 %r848 to i32
-%r851 = getelementptr i32, i32* %r796, i32 13
-store i32 %r849, i32* %r851
-%r852 = lshr i768 %r848, 32
-%r853 = trunc i768 %r852 to i32
-%r855 = getelementptr i32, i32* %r796, i32 14
-store i32 %r853, i32* %r855
-%r856 = lshr i768 %r852, 32
-%r857 = trunc i768 %r856 to i32
-%r859 = getelementptr i32, i32* %r796, i32 15
-store i32 %r857, i32* %r859
-%r860 = lshr i768 %r856, 32
-%r861 = trunc i768 %r860 to i32
-%r863 = getelementptr i32, i32* %r796, i32 16
-store i32 %r861, i32* %r863
-%r864 = lshr i768 %r860, 32
-%r865 = trunc i768 %r864 to i32
-%r867 = getelementptr i32, i32* %r796, i32 17
-store i32 %r865, i32* %r867
-%r868 = lshr i768 %r864, 32
-%r869 = trunc i768 %r868 to i32
-%r871 = getelementptr i32, i32* %r796, i32 18
-store i32 %r869, i32* %r871
-%r872 = lshr i768 %r868, 32
-%r873 = trunc i768 %r872 to i32
-%r875 = getelementptr i32, i32* %r796, i32 19
-store i32 %r873, i32* %r875
-%r876 = lshr i768 %r872, 32
-%r877 = trunc i768 %r876 to i32
-%r879 = getelementptr i32, i32* %r796, i32 20
-store i32 %r877, i32* %r879
-%r880 = lshr i768 %r876, 32
-%r881 = trunc i768 %r880 to i32
-%r883 = getelementptr i32, i32* %r796, i32 21
-store i32 %r881, i32* %r883
-%r884 = lshr i768 %r880, 32
-%r885 = trunc i768 %r884 to i32
-%r887 = getelementptr i32, i32* %r796, i32 22
-store i32 %r885, i32* %r887
-%r888 = lshr i768 %r884, 32
-%r889 = trunc i768 %r888 to i32
-%r891 = getelementptr i32, i32* %r796, i32 23
-store i32 %r889, i32* %r891
-ret void
-}
-define void @mcl_fpDbl_sqrPre16L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r4 = getelementptr i32, i32* %r2, i32 8
-%r6 = getelementptr i32, i32* %r2, i32 8
-%r8 = getelementptr i32, i32* %r1, i32 16
-call void @mcl_fpDbl_mulPre8L(i32* %r1, i32* %r2, i32* %r2)
-call void @mcl_fpDbl_mulPre8L(i32* %r8, i32* %r4, i32* %r6)
-%r9 = load i32, i32* %r4
-%r10 = zext i32 %r9 to i64
-%r12 = getelementptr i32, i32* %r4, i32 1
-%r13 = load i32, i32* %r12
-%r14 = zext i32 %r13 to i64
-%r15 = shl i64 %r14, 32
-%r16 = or i64 %r10, %r15
-%r17 = zext i64 %r16 to i96
-%r19 = getelementptr i32, i32* %r4, i32 2
-%r20 = load i32, i32* %r19
-%r21 = zext i32 %r20 to i96
-%r22 = shl i96 %r21, 64
-%r23 = or i96 %r17, %r22
-%r24 = zext i96 %r23 to i128
-%r26 = getelementptr i32, i32* %r4, i32 3
-%r27 = load i32, i32* %r26
-%r28 = zext i32 %r27 to i128
-%r29 = shl i128 %r28, 96
-%r30 = or i128 %r24, %r29
-%r31 = zext i128 %r30 to i160
-%r33 = getelementptr i32, i32* %r4, i32 4
-%r34 = load i32, i32* %r33
-%r35 = zext i32 %r34 to i160
-%r36 = shl i160 %r35, 128
-%r37 = or i160 %r31, %r36
-%r38 = zext i160 %r37 to i192
-%r40 = getelementptr i32, i32* %r4, i32 5
-%r41 = load i32, i32* %r40
-%r42 = zext i32 %r41 to i192
-%r43 = shl i192 %r42, 160
-%r44 = or i192 %r38, %r43
-%r45 = zext i192 %r44 to i224
-%r47 = getelementptr i32, i32* %r4, i32 6
-%r48 = load i32, i32* %r47
-%r49 = zext i32 %r48 to i224
-%r50 = shl i224 %r49, 192
-%r51 = or i224 %r45, %r50
-%r52 = zext i224 %r51 to i256
-%r54 = getelementptr i32, i32* %r4, i32 7
-%r55 = load i32, i32* %r54
-%r56 = zext i32 %r55 to i256
-%r57 = shl i256 %r56, 224
-%r58 = or i256 %r52, %r57
-%r59 = zext i256 %r58 to i288
-%r60 = load i32, i32* %r2
-%r61 = zext i32 %r60 to i64
-%r63 = getelementptr i32, i32* %r2, i32 1
-%r64 = load i32, i32* %r63
-%r65 = zext i32 %r64 to i64
-%r66 = shl i64 %r65, 32
-%r67 = or i64 %r61, %r66
-%r68 = zext i64 %r67 to i96
-%r70 = getelementptr i32, i32* %r2, i32 2
-%r71 = load i32, i32* %r70
-%r72 = zext i32 %r71 to i96
-%r73 = shl i96 %r72, 64
-%r74 = or i96 %r68, %r73
-%r75 = zext i96 %r74 to i128
-%r77 = getelementptr i32, i32* %r2, i32 3
-%r78 = load i32, i32* %r77
-%r79 = zext i32 %r78 to i128
-%r80 = shl i128 %r79, 96
-%r81 = or i128 %r75, %r80
-%r82 = zext i128 %r81 to i160
-%r84 = getelementptr i32, i32* %r2, i32 4
-%r85 = load i32, i32* %r84
-%r86 = zext i32 %r85 to i160
-%r87 = shl i160 %r86, 128
-%r88 = or i160 %r82, %r87
-%r89 = zext i160 %r88 to i192
-%r91 = getelementptr i32, i32* %r2, i32 5
-%r92 = load i32, i32* %r91
-%r93 = zext i32 %r92 to i192
-%r94 = shl i192 %r93, 160
-%r95 = or i192 %r89, %r94
-%r96 = zext i192 %r95 to i224
-%r98 = getelementptr i32, i32* %r2, i32 6
-%r99 = load i32, i32* %r98
-%r100 = zext i32 %r99 to i224
-%r101 = shl i224 %r100, 192
-%r102 = or i224 %r96, %r101
-%r103 = zext i224 %r102 to i256
-%r105 = getelementptr i32, i32* %r2, i32 7
-%r106 = load i32, i32* %r105
-%r107 = zext i32 %r106 to i256
-%r108 = shl i256 %r107, 224
-%r109 = or i256 %r103, %r108
-%r110 = zext i256 %r109 to i288
-%r111 = load i32, i32* %r6
-%r112 = zext i32 %r111 to i64
-%r114 = getelementptr i32, i32* %r6, i32 1
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i64
-%r117 = shl i64 %r116, 32
-%r118 = or i64 %r112, %r117
-%r119 = zext i64 %r118 to i96
-%r121 = getelementptr i32, i32* %r6, i32 2
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i96
-%r124 = shl i96 %r123, 64
-%r125 = or i96 %r119, %r124
-%r126 = zext i96 %r125 to i128
-%r128 = getelementptr i32, i32* %r6, i32 3
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i128
-%r131 = shl i128 %r130, 96
-%r132 = or i128 %r126, %r131
-%r133 = zext i128 %r132 to i160
-%r135 = getelementptr i32, i32* %r6, i32 4
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i160
-%r138 = shl i160 %r137, 128
-%r139 = or i160 %r133, %r138
-%r140 = zext i160 %r139 to i192
-%r142 = getelementptr i32, i32* %r6, i32 5
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i192
-%r145 = shl i192 %r144, 160
-%r146 = or i192 %r140, %r145
-%r147 = zext i192 %r146 to i224
-%r149 = getelementptr i32, i32* %r6, i32 6
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i224
-%r152 = shl i224 %r151, 192
-%r153 = or i224 %r147, %r152
-%r154 = zext i224 %r153 to i256
-%r156 = getelementptr i32, i32* %r6, i32 7
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i256
-%r159 = shl i256 %r158, 224
-%r160 = or i256 %r154, %r159
-%r161 = zext i256 %r160 to i288
-%r162 = load i32, i32* %r2
-%r163 = zext i32 %r162 to i64
-%r165 = getelementptr i32, i32* %r2, i32 1
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i64
-%r168 = shl i64 %r167, 32
-%r169 = or i64 %r163, %r168
-%r170 = zext i64 %r169 to i96
-%r172 = getelementptr i32, i32* %r2, i32 2
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i96
-%r175 = shl i96 %r174, 64
-%r176 = or i96 %r170, %r175
-%r177 = zext i96 %r176 to i128
-%r179 = getelementptr i32, i32* %r2, i32 3
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i128
-%r182 = shl i128 %r181, 96
-%r183 = or i128 %r177, %r182
-%r184 = zext i128 %r183 to i160
-%r186 = getelementptr i32, i32* %r2, i32 4
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i160
-%r189 = shl i160 %r188, 128
-%r190 = or i160 %r184, %r189
-%r191 = zext i160 %r190 to i192
-%r193 = getelementptr i32, i32* %r2, i32 5
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i192
-%r196 = shl i192 %r195, 160
-%r197 = or i192 %r191, %r196
-%r198 = zext i192 %r197 to i224
-%r200 = getelementptr i32, i32* %r2, i32 6
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i224
-%r203 = shl i224 %r202, 192
-%r204 = or i224 %r198, %r203
-%r205 = zext i224 %r204 to i256
-%r207 = getelementptr i32, i32* %r2, i32 7
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i256
-%r210 = shl i256 %r209, 224
-%r211 = or i256 %r205, %r210
-%r212 = zext i256 %r211 to i288
-%r213 = add i288 %r59, %r110
-%r214 = add i288 %r161, %r212
-%r216 = alloca i32, i32 16
-%r217 = trunc i288 %r213 to i256
-%r218 = trunc i288 %r214 to i256
-%r219 = lshr i288 %r213, 256
-%r220 = trunc i288 %r219 to i1
-%r221 = lshr i288 %r214, 256
-%r222 = trunc i288 %r221 to i1
-%r223 = and i1 %r220, %r222
-%r225 = select i1 %r220, i256 %r218, i256 0
-%r227 = select i1 %r222, i256 %r217, i256 0
-%r229 = alloca i32, i32 8
-%r231 = alloca i32, i32 8
-%r232 = trunc i256 %r217 to i32
-%r234 = getelementptr i32, i32* %r229, i32 0
-store i32 %r232, i32* %r234
-%r235 = lshr i256 %r217, 32
-%r236 = trunc i256 %r235 to i32
-%r238 = getelementptr i32, i32* %r229, i32 1
-store i32 %r236, i32* %r238
-%r239 = lshr i256 %r235, 32
-%r240 = trunc i256 %r239 to i32
-%r242 = getelementptr i32, i32* %r229, i32 2
-store i32 %r240, i32* %r242
-%r243 = lshr i256 %r239, 32
-%r244 = trunc i256 %r243 to i32
-%r246 = getelementptr i32, i32* %r229, i32 3
-store i32 %r244, i32* %r246
-%r247 = lshr i256 %r243, 32
-%r248 = trunc i256 %r247 to i32
-%r250 = getelementptr i32, i32* %r229, i32 4
-store i32 %r248, i32* %r250
-%r251 = lshr i256 %r247, 32
-%r252 = trunc i256 %r251 to i32
-%r254 = getelementptr i32, i32* %r229, i32 5
-store i32 %r252, i32* %r254
-%r255 = lshr i256 %r251, 32
-%r256 = trunc i256 %r255 to i32
-%r258 = getelementptr i32, i32* %r229, i32 6
-store i32 %r256, i32* %r258
-%r259 = lshr i256 %r255, 32
-%r260 = trunc i256 %r259 to i32
-%r262 = getelementptr i32, i32* %r229, i32 7
-store i32 %r260, i32* %r262
-%r263 = trunc i256 %r218 to i32
-%r265 = getelementptr i32, i32* %r231, i32 0
-store i32 %r263, i32* %r265
-%r266 = lshr i256 %r218, 32
-%r267 = trunc i256 %r266 to i32
-%r269 = getelementptr i32, i32* %r231, i32 1
-store i32 %r267, i32* %r269
-%r270 = lshr i256 %r266, 32
-%r271 = trunc i256 %r270 to i32
-%r273 = getelementptr i32, i32* %r231, i32 2
-store i32 %r271, i32* %r273
-%r274 = lshr i256 %r270, 32
-%r275 = trunc i256 %r274 to i32
-%r277 = getelementptr i32, i32* %r231, i32 3
-store i32 %r275, i32* %r277
-%r278 = lshr i256 %r274, 32
-%r279 = trunc i256 %r278 to i32
-%r281 = getelementptr i32, i32* %r231, i32 4
-store i32 %r279, i32* %r281
-%r282 = lshr i256 %r278, 32
-%r283 = trunc i256 %r282 to i32
-%r285 = getelementptr i32, i32* %r231, i32 5
-store i32 %r283, i32* %r285
-%r286 = lshr i256 %r282, 32
-%r287 = trunc i256 %r286 to i32
-%r289 = getelementptr i32, i32* %r231, i32 6
-store i32 %r287, i32* %r289
-%r290 = lshr i256 %r286, 32
-%r291 = trunc i256 %r290 to i32
-%r293 = getelementptr i32, i32* %r231, i32 7
-store i32 %r291, i32* %r293
-call void @mcl_fpDbl_mulPre8L(i32* %r216, i32* %r229, i32* %r231)
-%r294 = load i32, i32* %r216
-%r295 = zext i32 %r294 to i64
-%r297 = getelementptr i32, i32* %r216, i32 1
-%r298 = load i32, i32* %r297
-%r299 = zext i32 %r298 to i64
-%r300 = shl i64 %r299, 32
-%r301 = or i64 %r295, %r300
-%r302 = zext i64 %r301 to i96
-%r304 = getelementptr i32, i32* %r216, i32 2
-%r305 = load i32, i32* %r304
-%r306 = zext i32 %r305 to i96
-%r307 = shl i96 %r306, 64
-%r308 = or i96 %r302, %r307
-%r309 = zext i96 %r308 to i128
-%r311 = getelementptr i32, i32* %r216, i32 3
-%r312 = load i32, i32* %r311
-%r313 = zext i32 %r312 to i128
-%r314 = shl i128 %r313, 96
-%r315 = or i128 %r309, %r314
-%r316 = zext i128 %r315 to i160
-%r318 = getelementptr i32, i32* %r216, i32 4
-%r319 = load i32, i32* %r318
-%r320 = zext i32 %r319 to i160
-%r321 = shl i160 %r320, 128
-%r322 = or i160 %r316, %r321
-%r323 = zext i160 %r322 to i192
-%r325 = getelementptr i32, i32* %r216, i32 5
-%r326 = load i32, i32* %r325
-%r327 = zext i32 %r326 to i192
-%r328 = shl i192 %r327, 160
-%r329 = or i192 %r323, %r328
-%r330 = zext i192 %r329 to i224
-%r332 = getelementptr i32, i32* %r216, i32 6
-%r333 = load i32, i32* %r332
-%r334 = zext i32 %r333 to i224
-%r335 = shl i224 %r334, 192
-%r336 = or i224 %r330, %r335
-%r337 = zext i224 %r336 to i256
-%r339 = getelementptr i32, i32* %r216, i32 7
-%r340 = load i32, i32* %r339
-%r341 = zext i32 %r340 to i256
-%r342 = shl i256 %r341, 224
-%r343 = or i256 %r337, %r342
-%r344 = zext i256 %r343 to i288
-%r346 = getelementptr i32, i32* %r216, i32 8
-%r347 = load i32, i32* %r346
-%r348 = zext i32 %r347 to i288
-%r349 = shl i288 %r348, 256
-%r350 = or i288 %r344, %r349
-%r351 = zext i288 %r350 to i320
-%r353 = getelementptr i32, i32* %r216, i32 9
-%r354 = load i32, i32* %r353
-%r355 = zext i32 %r354 to i320
-%r356 = shl i320 %r355, 288
-%r357 = or i320 %r351, %r356
-%r358 = zext i320 %r357 to i352
-%r360 = getelementptr i32, i32* %r216, i32 10
-%r361 = load i32, i32* %r360
-%r362 = zext i32 %r361 to i352
-%r363 = shl i352 %r362, 320
-%r364 = or i352 %r358, %r363
-%r365 = zext i352 %r364 to i384
-%r367 = getelementptr i32, i32* %r216, i32 11
-%r368 = load i32, i32* %r367
-%r369 = zext i32 %r368 to i384
-%r370 = shl i384 %r369, 352
-%r371 = or i384 %r365, %r370
-%r372 = zext i384 %r371 to i416
-%r374 = getelementptr i32, i32* %r216, i32 12
-%r375 = load i32, i32* %r374
-%r376 = zext i32 %r375 to i416
-%r377 = shl i416 %r376, 384
-%r378 = or i416 %r372, %r377
-%r379 = zext i416 %r378 to i448
-%r381 = getelementptr i32, i32* %r216, i32 13
-%r382 = load i32, i32* %r381
-%r383 = zext i32 %r382 to i448
-%r384 = shl i448 %r383, 416
-%r385 = or i448 %r379, %r384
-%r386 = zext i448 %r385 to i480
-%r388 = getelementptr i32, i32* %r216, i32 14
-%r389 = load i32, i32* %r388
-%r390 = zext i32 %r389 to i480
-%r391 = shl i480 %r390, 448
-%r392 = or i480 %r386, %r391
-%r393 = zext i480 %r392 to i512
-%r395 = getelementptr i32, i32* %r216, i32 15
-%r396 = load i32, i32* %r395
-%r397 = zext i32 %r396 to i512
-%r398 = shl i512 %r397, 480
-%r399 = or i512 %r393, %r398
-%r400 = zext i512 %r399 to i544
-%r401 = zext i1 %r223 to i544
-%r402 = shl i544 %r401, 512
-%r403 = or i544 %r400, %r402
-%r404 = zext i256 %r225 to i544
-%r405 = zext i256 %r227 to i544
-%r406 = shl i544 %r404, 256
-%r407 = shl i544 %r405, 256
-%r408 = add i544 %r403, %r406
-%r409 = add i544 %r408, %r407
-%r410 = load i32, i32* %r1
-%r411 = zext i32 %r410 to i64
-%r413 = getelementptr i32, i32* %r1, i32 1
-%r414 = load i32, i32* %r413
-%r415 = zext i32 %r414 to i64
-%r416 = shl i64 %r415, 32
-%r417 = or i64 %r411, %r416
-%r418 = zext i64 %r417 to i96
-%r420 = getelementptr i32, i32* %r1, i32 2
-%r421 = load i32, i32* %r420
-%r422 = zext i32 %r421 to i96
-%r423 = shl i96 %r422, 64
-%r424 = or i96 %r418, %r423
-%r425 = zext i96 %r424 to i128
-%r427 = getelementptr i32, i32* %r1, i32 3
-%r428 = load i32, i32* %r427
-%r429 = zext i32 %r428 to i128
-%r430 = shl i128 %r429, 96
-%r431 = or i128 %r425, %r430
-%r432 = zext i128 %r431 to i160
-%r434 = getelementptr i32, i32* %r1, i32 4
-%r435 = load i32, i32* %r434
-%r436 = zext i32 %r435 to i160
-%r437 = shl i160 %r436, 128
-%r438 = or i160 %r432, %r437
-%r439 = zext i160 %r438 to i192
-%r441 = getelementptr i32, i32* %r1, i32 5
-%r442 = load i32, i32* %r441
-%r443 = zext i32 %r442 to i192
-%r444 = shl i192 %r443, 160
-%r445 = or i192 %r439, %r444
-%r446 = zext i192 %r445 to i224
-%r448 = getelementptr i32, i32* %r1, i32 6
-%r449 = load i32, i32* %r448
-%r450 = zext i32 %r449 to i224
-%r451 = shl i224 %r450, 192
-%r452 = or i224 %r446, %r451
-%r453 = zext i224 %r452 to i256
-%r455 = getelementptr i32, i32* %r1, i32 7
-%r456 = load i32, i32* %r455
-%r457 = zext i32 %r456 to i256
-%r458 = shl i256 %r457, 224
-%r459 = or i256 %r453, %r458
-%r460 = zext i256 %r459 to i288
-%r462 = getelementptr i32, i32* %r1, i32 8
-%r463 = load i32, i32* %r462
-%r464 = zext i32 %r463 to i288
-%r465 = shl i288 %r464, 256
-%r466 = or i288 %r460, %r465
-%r467 = zext i288 %r466 to i320
-%r469 = getelementptr i32, i32* %r1, i32 9
-%r470 = load i32, i32* %r469
-%r471 = zext i32 %r470 to i320
-%r472 = shl i320 %r471, 288
-%r473 = or i320 %r467, %r472
-%r474 = zext i320 %r473 to i352
-%r476 = getelementptr i32, i32* %r1, i32 10
-%r477 = load i32, i32* %r476
-%r478 = zext i32 %r477 to i352
-%r479 = shl i352 %r478, 320
-%r480 = or i352 %r474, %r479
-%r481 = zext i352 %r480 to i384
-%r483 = getelementptr i32, i32* %r1, i32 11
-%r484 = load i32, i32* %r483
-%r485 = zext i32 %r484 to i384
-%r486 = shl i384 %r485, 352
-%r487 = or i384 %r481, %r486
-%r488 = zext i384 %r487 to i416
-%r490 = getelementptr i32, i32* %r1, i32 12
-%r491 = load i32, i32* %r490
-%r492 = zext i32 %r491 to i416
-%r493 = shl i416 %r492, 384
-%r494 = or i416 %r488, %r493
-%r495 = zext i416 %r494 to i448
-%r497 = getelementptr i32, i32* %r1, i32 13
-%r498 = load i32, i32* %r497
-%r499 = zext i32 %r498 to i448
-%r500 = shl i448 %r499, 416
-%r501 = or i448 %r495, %r500
-%r502 = zext i448 %r501 to i480
-%r504 = getelementptr i32, i32* %r1, i32 14
-%r505 = load i32, i32* %r504
-%r506 = zext i32 %r505 to i480
-%r507 = shl i480 %r506, 448
-%r508 = or i480 %r502, %r507
-%r509 = zext i480 %r508 to i512
-%r511 = getelementptr i32, i32* %r1, i32 15
-%r512 = load i32, i32* %r511
-%r513 = zext i32 %r512 to i512
-%r514 = shl i512 %r513, 480
-%r515 = or i512 %r509, %r514
-%r516 = zext i512 %r515 to i544
-%r517 = sub i544 %r409, %r516
-%r519 = getelementptr i32, i32* %r1, i32 16
-%r520 = load i32, i32* %r519
-%r521 = zext i32 %r520 to i64
-%r523 = getelementptr i32, i32* %r519, i32 1
-%r524 = load i32, i32* %r523
-%r525 = zext i32 %r524 to i64
-%r526 = shl i64 %r525, 32
-%r527 = or i64 %r521, %r526
-%r528 = zext i64 %r527 to i96
-%r530 = getelementptr i32, i32* %r519, i32 2
-%r531 = load i32, i32* %r530
-%r532 = zext i32 %r531 to i96
-%r533 = shl i96 %r532, 64
-%r534 = or i96 %r528, %r533
-%r535 = zext i96 %r534 to i128
-%r537 = getelementptr i32, i32* %r519, i32 3
-%r538 = load i32, i32* %r537
-%r539 = zext i32 %r538 to i128
-%r540 = shl i128 %r539, 96
-%r541 = or i128 %r535, %r540
-%r542 = zext i128 %r541 to i160
-%r544 = getelementptr i32, i32* %r519, i32 4
-%r545 = load i32, i32* %r544
-%r546 = zext i32 %r545 to i160
-%r547 = shl i160 %r546, 128
-%r548 = or i160 %r542, %r547
-%r549 = zext i160 %r548 to i192
-%r551 = getelementptr i32, i32* %r519, i32 5
-%r552 = load i32, i32* %r551
-%r553 = zext i32 %r552 to i192
-%r554 = shl i192 %r553, 160
-%r555 = or i192 %r549, %r554
-%r556 = zext i192 %r555 to i224
-%r558 = getelementptr i32, i32* %r519, i32 6
-%r559 = load i32, i32* %r558
-%r560 = zext i32 %r559 to i224
-%r561 = shl i224 %r560, 192
-%r562 = or i224 %r556, %r561
-%r563 = zext i224 %r562 to i256
-%r565 = getelementptr i32, i32* %r519, i32 7
-%r566 = load i32, i32* %r565
-%r567 = zext i32 %r566 to i256
-%r568 = shl i256 %r567, 224
-%r569 = or i256 %r563, %r568
-%r570 = zext i256 %r569 to i288
-%r572 = getelementptr i32, i32* %r519, i32 8
-%r573 = load i32, i32* %r572
-%r574 = zext i32 %r573 to i288
-%r575 = shl i288 %r574, 256
-%r576 = or i288 %r570, %r575
-%r577 = zext i288 %r576 to i320
-%r579 = getelementptr i32, i32* %r519, i32 9
-%r580 = load i32, i32* %r579
-%r581 = zext i32 %r580 to i320
-%r582 = shl i320 %r581, 288
-%r583 = or i320 %r577, %r582
-%r584 = zext i320 %r583 to i352
-%r586 = getelementptr i32, i32* %r519, i32 10
-%r587 = load i32, i32* %r586
-%r588 = zext i32 %r587 to i352
-%r589 = shl i352 %r588, 320
-%r590 = or i352 %r584, %r589
-%r591 = zext i352 %r590 to i384
-%r593 = getelementptr i32, i32* %r519, i32 11
-%r594 = load i32, i32* %r593
-%r595 = zext i32 %r594 to i384
-%r596 = shl i384 %r595, 352
-%r597 = or i384 %r591, %r596
-%r598 = zext i384 %r597 to i416
-%r600 = getelementptr i32, i32* %r519, i32 12
-%r601 = load i32, i32* %r600
-%r602 = zext i32 %r601 to i416
-%r603 = shl i416 %r602, 384
-%r604 = or i416 %r598, %r603
-%r605 = zext i416 %r604 to i448
-%r607 = getelementptr i32, i32* %r519, i32 13
-%r608 = load i32, i32* %r607
-%r609 = zext i32 %r608 to i448
-%r610 = shl i448 %r609, 416
-%r611 = or i448 %r605, %r610
-%r612 = zext i448 %r611 to i480
-%r614 = getelementptr i32, i32* %r519, i32 14
-%r615 = load i32, i32* %r614
-%r616 = zext i32 %r615 to i480
-%r617 = shl i480 %r616, 448
-%r618 = or i480 %r612, %r617
-%r619 = zext i480 %r618 to i512
-%r621 = getelementptr i32, i32* %r519, i32 15
-%r622 = load i32, i32* %r621
-%r623 = zext i32 %r622 to i512
-%r624 = shl i512 %r623, 480
-%r625 = or i512 %r619, %r624
-%r626 = zext i512 %r625 to i544
-%r627 = sub i544 %r517, %r626
-%r628 = zext i544 %r627 to i768
-%r630 = getelementptr i32, i32* %r1, i32 8
-%r631 = load i32, i32* %r630
-%r632 = zext i32 %r631 to i64
-%r634 = getelementptr i32, i32* %r630, i32 1
-%r635 = load i32, i32* %r634
-%r636 = zext i32 %r635 to i64
-%r637 = shl i64 %r636, 32
-%r638 = or i64 %r632, %r637
-%r639 = zext i64 %r638 to i96
-%r641 = getelementptr i32, i32* %r630, i32 2
-%r642 = load i32, i32* %r641
-%r643 = zext i32 %r642 to i96
-%r644 = shl i96 %r643, 64
-%r645 = or i96 %r639, %r644
-%r646 = zext i96 %r645 to i128
-%r648 = getelementptr i32, i32* %r630, i32 3
-%r649 = load i32, i32* %r648
-%r650 = zext i32 %r649 to i128
-%r651 = shl i128 %r650, 96
-%r652 = or i128 %r646, %r651
-%r653 = zext i128 %r652 to i160
-%r655 = getelementptr i32, i32* %r630, i32 4
-%r656 = load i32, i32* %r655
-%r657 = zext i32 %r656 to i160
-%r658 = shl i160 %r657, 128
-%r659 = or i160 %r653, %r658
-%r660 = zext i160 %r659 to i192
-%r662 = getelementptr i32, i32* %r630, i32 5
-%r663 = load i32, i32* %r662
-%r664 = zext i32 %r663 to i192
-%r665 = shl i192 %r664, 160
-%r666 = or i192 %r660, %r665
-%r667 = zext i192 %r666 to i224
-%r669 = getelementptr i32, i32* %r630, i32 6
-%r670 = load i32, i32* %r669
-%r671 = zext i32 %r670 to i224
-%r672 = shl i224 %r671, 192
-%r673 = or i224 %r667, %r672
-%r674 = zext i224 %r673 to i256
-%r676 = getelementptr i32, i32* %r630, i32 7
-%r677 = load i32, i32* %r676
-%r678 = zext i32 %r677 to i256
-%r679 = shl i256 %r678, 224
-%r680 = or i256 %r674, %r679
-%r681 = zext i256 %r680 to i288
-%r683 = getelementptr i32, i32* %r630, i32 8
-%r684 = load i32, i32* %r683
-%r685 = zext i32 %r684 to i288
-%r686 = shl i288 %r685, 256
-%r687 = or i288 %r681, %r686
-%r688 = zext i288 %r687 to i320
-%r690 = getelementptr i32, i32* %r630, i32 9
-%r691 = load i32, i32* %r690
-%r692 = zext i32 %r691 to i320
-%r693 = shl i320 %r692, 288
-%r694 = or i320 %r688, %r693
-%r695 = zext i320 %r694 to i352
-%r697 = getelementptr i32, i32* %r630, i32 10
-%r698 = load i32, i32* %r697
-%r699 = zext i32 %r698 to i352
-%r700 = shl i352 %r699, 320
-%r701 = or i352 %r695, %r700
-%r702 = zext i352 %r701 to i384
-%r704 = getelementptr i32, i32* %r630, i32 11
-%r705 = load i32, i32* %r704
-%r706 = zext i32 %r705 to i384
-%r707 = shl i384 %r706, 352
-%r708 = or i384 %r702, %r707
-%r709 = zext i384 %r708 to i416
-%r711 = getelementptr i32, i32* %r630, i32 12
-%r712 = load i32, i32* %r711
-%r713 = zext i32 %r712 to i416
-%r714 = shl i416 %r713, 384
-%r715 = or i416 %r709, %r714
-%r716 = zext i416 %r715 to i448
-%r718 = getelementptr i32, i32* %r630, i32 13
-%r719 = load i32, i32* %r718
-%r720 = zext i32 %r719 to i448
-%r721 = shl i448 %r720, 416
-%r722 = or i448 %r716, %r721
-%r723 = zext i448 %r722 to i480
-%r725 = getelementptr i32, i32* %r630, i32 14
-%r726 = load i32, i32* %r725
-%r727 = zext i32 %r726 to i480
-%r728 = shl i480 %r727, 448
-%r729 = or i480 %r723, %r728
-%r730 = zext i480 %r729 to i512
-%r732 = getelementptr i32, i32* %r630, i32 15
-%r733 = load i32, i32* %r732
-%r734 = zext i32 %r733 to i512
-%r735 = shl i512 %r734, 480
-%r736 = or i512 %r730, %r735
-%r737 = zext i512 %r736 to i544
-%r739 = getelementptr i32, i32* %r630, i32 16
-%r740 = load i32, i32* %r739
-%r741 = zext i32 %r740 to i544
-%r742 = shl i544 %r741, 512
-%r743 = or i544 %r737, %r742
-%r744 = zext i544 %r743 to i576
-%r746 = getelementptr i32, i32* %r630, i32 17
-%r747 = load i32, i32* %r746
-%r748 = zext i32 %r747 to i576
-%r749 = shl i576 %r748, 544
-%r750 = or i576 %r744, %r749
-%r751 = zext i576 %r750 to i608
-%r753 = getelementptr i32, i32* %r630, i32 18
-%r754 = load i32, i32* %r753
-%r755 = zext i32 %r754 to i608
-%r756 = shl i608 %r755, 576
-%r757 = or i608 %r751, %r756
-%r758 = zext i608 %r757 to i640
-%r760 = getelementptr i32, i32* %r630, i32 19
-%r761 = load i32, i32* %r760
-%r762 = zext i32 %r761 to i640
-%r763 = shl i640 %r762, 608
-%r764 = or i640 %r758, %r763
-%r765 = zext i640 %r764 to i672
-%r767 = getelementptr i32, i32* %r630, i32 20
-%r768 = load i32, i32* %r767
-%r769 = zext i32 %r768 to i672
-%r770 = shl i672 %r769, 640
-%r771 = or i672 %r765, %r770
-%r772 = zext i672 %r771 to i704
-%r774 = getelementptr i32, i32* %r630, i32 21
-%r775 = load i32, i32* %r774
-%r776 = zext i32 %r775 to i704
-%r777 = shl i704 %r776, 672
-%r778 = or i704 %r772, %r777
-%r779 = zext i704 %r778 to i736
-%r781 = getelementptr i32, i32* %r630, i32 22
-%r782 = load i32, i32* %r781
-%r783 = zext i32 %r782 to i736
-%r784 = shl i736 %r783, 704
-%r785 = or i736 %r779, %r784
-%r786 = zext i736 %r785 to i768
-%r788 = getelementptr i32, i32* %r630, i32 23
-%r789 = load i32, i32* %r788
-%r790 = zext i32 %r789 to i768
-%r791 = shl i768 %r790, 736
-%r792 = or i768 %r786, %r791
-%r793 = add i768 %r628, %r792
-%r795 = getelementptr i32, i32* %r1, i32 8
-%r796 = trunc i768 %r793 to i32
-%r798 = getelementptr i32, i32* %r795, i32 0
-store i32 %r796, i32* %r798
-%r799 = lshr i768 %r793, 32
-%r800 = trunc i768 %r799 to i32
-%r802 = getelementptr i32, i32* %r795, i32 1
-store i32 %r800, i32* %r802
-%r803 = lshr i768 %r799, 32
-%r804 = trunc i768 %r803 to i32
-%r806 = getelementptr i32, i32* %r795, i32 2
-store i32 %r804, i32* %r806
-%r807 = lshr i768 %r803, 32
-%r808 = trunc i768 %r807 to i32
-%r810 = getelementptr i32, i32* %r795, i32 3
-store i32 %r808, i32* %r810
-%r811 = lshr i768 %r807, 32
-%r812 = trunc i768 %r811 to i32
-%r814 = getelementptr i32, i32* %r795, i32 4
-store i32 %r812, i32* %r814
-%r815 = lshr i768 %r811, 32
-%r816 = trunc i768 %r815 to i32
-%r818 = getelementptr i32, i32* %r795, i32 5
-store i32 %r816, i32* %r818
-%r819 = lshr i768 %r815, 32
-%r820 = trunc i768 %r819 to i32
-%r822 = getelementptr i32, i32* %r795, i32 6
-store i32 %r820, i32* %r822
-%r823 = lshr i768 %r819, 32
-%r824 = trunc i768 %r823 to i32
-%r826 = getelementptr i32, i32* %r795, i32 7
-store i32 %r824, i32* %r826
-%r827 = lshr i768 %r823, 32
-%r828 = trunc i768 %r827 to i32
-%r830 = getelementptr i32, i32* %r795, i32 8
-store i32 %r828, i32* %r830
-%r831 = lshr i768 %r827, 32
-%r832 = trunc i768 %r831 to i32
-%r834 = getelementptr i32, i32* %r795, i32 9
-store i32 %r832, i32* %r834
-%r835 = lshr i768 %r831, 32
-%r836 = trunc i768 %r835 to i32
-%r838 = getelementptr i32, i32* %r795, i32 10
-store i32 %r836, i32* %r838
-%r839 = lshr i768 %r835, 32
-%r840 = trunc i768 %r839 to i32
-%r842 = getelementptr i32, i32* %r795, i32 11
-store i32 %r840, i32* %r842
-%r843 = lshr i768 %r839, 32
-%r844 = trunc i768 %r843 to i32
-%r846 = getelementptr i32, i32* %r795, i32 12
-store i32 %r844, i32* %r846
-%r847 = lshr i768 %r843, 32
-%r848 = trunc i768 %r847 to i32
-%r850 = getelementptr i32, i32* %r795, i32 13
-store i32 %r848, i32* %r850
-%r851 = lshr i768 %r847, 32
-%r852 = trunc i768 %r851 to i32
-%r854 = getelementptr i32, i32* %r795, i32 14
-store i32 %r852, i32* %r854
-%r855 = lshr i768 %r851, 32
-%r856 = trunc i768 %r855 to i32
-%r858 = getelementptr i32, i32* %r795, i32 15
-store i32 %r856, i32* %r858
-%r859 = lshr i768 %r855, 32
-%r860 = trunc i768 %r859 to i32
-%r862 = getelementptr i32, i32* %r795, i32 16
-store i32 %r860, i32* %r862
-%r863 = lshr i768 %r859, 32
-%r864 = trunc i768 %r863 to i32
-%r866 = getelementptr i32, i32* %r795, i32 17
-store i32 %r864, i32* %r866
-%r867 = lshr i768 %r863, 32
-%r868 = trunc i768 %r867 to i32
-%r870 = getelementptr i32, i32* %r795, i32 18
-store i32 %r868, i32* %r870
-%r871 = lshr i768 %r867, 32
-%r872 = trunc i768 %r871 to i32
-%r874 = getelementptr i32, i32* %r795, i32 19
-store i32 %r872, i32* %r874
-%r875 = lshr i768 %r871, 32
-%r876 = trunc i768 %r875 to i32
-%r878 = getelementptr i32, i32* %r795, i32 20
-store i32 %r876, i32* %r878
-%r879 = lshr i768 %r875, 32
-%r880 = trunc i768 %r879 to i32
-%r882 = getelementptr i32, i32* %r795, i32 21
-store i32 %r880, i32* %r882
-%r883 = lshr i768 %r879, 32
-%r884 = trunc i768 %r883 to i32
-%r886 = getelementptr i32, i32* %r795, i32 22
-store i32 %r884, i32* %r886
-%r887 = lshr i768 %r883, 32
-%r888 = trunc i768 %r887 to i32
-%r890 = getelementptr i32, i32* %r795, i32 23
-store i32 %r888, i32* %r890
-ret void
-}
-define void @mcl_fp_mont16L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i544 @mulPv512x32(i32* %r2, i32 %r10)
-%r12 = zext i544 %r11 to i576
-%r13 = trunc i544 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i544 @mulPv512x32(i32* %r4, i32 %r14)
-%r16 = zext i544 %r15 to i576
-%r17 = add i576 %r12, %r16
-%r18 = lshr i576 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i544 @mulPv512x32(i32* %r2, i32 %r21)
-%r23 = zext i544 %r22 to i576
-%r24 = add i576 %r18, %r23
-%r25 = trunc i576 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i544 @mulPv512x32(i32* %r4, i32 %r26)
-%r28 = zext i544 %r27 to i576
-%r29 = add i576 %r24, %r28
-%r30 = lshr i576 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i544 @mulPv512x32(i32* %r2, i32 %r33)
-%r35 = zext i544 %r34 to i576
-%r36 = add i576 %r30, %r35
-%r37 = trunc i576 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i544 @mulPv512x32(i32* %r4, i32 %r38)
-%r40 = zext i544 %r39 to i576
-%r41 = add i576 %r36, %r40
-%r42 = lshr i576 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i544 @mulPv512x32(i32* %r2, i32 %r45)
-%r47 = zext i544 %r46 to i576
-%r48 = add i576 %r42, %r47
-%r49 = trunc i576 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i544 @mulPv512x32(i32* %r4, i32 %r50)
-%r52 = zext i544 %r51 to i576
-%r53 = add i576 %r48, %r52
-%r54 = lshr i576 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i544 @mulPv512x32(i32* %r2, i32 %r57)
-%r59 = zext i544 %r58 to i576
-%r60 = add i576 %r54, %r59
-%r61 = trunc i576 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i544 @mulPv512x32(i32* %r4, i32 %r62)
-%r64 = zext i544 %r63 to i576
-%r65 = add i576 %r60, %r64
-%r66 = lshr i576 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i544 @mulPv512x32(i32* %r2, i32 %r69)
-%r71 = zext i544 %r70 to i576
-%r72 = add i576 %r66, %r71
-%r73 = trunc i576 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i544 @mulPv512x32(i32* %r4, i32 %r74)
-%r76 = zext i544 %r75 to i576
-%r77 = add i576 %r72, %r76
-%r78 = lshr i576 %r77, 32
-%r80 = getelementptr i32, i32* %r3, i32 6
-%r81 = load i32, i32* %r80
-%r82 = call i544 @mulPv512x32(i32* %r2, i32 %r81)
-%r83 = zext i544 %r82 to i576
-%r84 = add i576 %r78, %r83
-%r85 = trunc i576 %r84 to i32
-%r86 = mul i32 %r85, %r7
-%r87 = call i544 @mulPv512x32(i32* %r4, i32 %r86)
-%r88 = zext i544 %r87 to i576
-%r89 = add i576 %r84, %r88
-%r90 = lshr i576 %r89, 32
-%r92 = getelementptr i32, i32* %r3, i32 7
-%r93 = load i32, i32* %r92
-%r94 = call i544 @mulPv512x32(i32* %r2, i32 %r93)
-%r95 = zext i544 %r94 to i576
-%r96 = add i576 %r90, %r95
-%r97 = trunc i576 %r96 to i32
-%r98 = mul i32 %r97, %r7
-%r99 = call i544 @mulPv512x32(i32* %r4, i32 %r98)
-%r100 = zext i544 %r99 to i576
-%r101 = add i576 %r96, %r100
-%r102 = lshr i576 %r101, 32
-%r104 = getelementptr i32, i32* %r3, i32 8
-%r105 = load i32, i32* %r104
-%r106 = call i544 @mulPv512x32(i32* %r2, i32 %r105)
-%r107 = zext i544 %r106 to i576
-%r108 = add i576 %r102, %r107
-%r109 = trunc i576 %r108 to i32
-%r110 = mul i32 %r109, %r7
-%r111 = call i544 @mulPv512x32(i32* %r4, i32 %r110)
-%r112 = zext i544 %r111 to i576
-%r113 = add i576 %r108, %r112
-%r114 = lshr i576 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 9
-%r117 = load i32, i32* %r116
-%r118 = call i544 @mulPv512x32(i32* %r2, i32 %r117)
-%r119 = zext i544 %r118 to i576
-%r120 = add i576 %r114, %r119
-%r121 = trunc i576 %r120 to i32
-%r122 = mul i32 %r121, %r7
-%r123 = call i544 @mulPv512x32(i32* %r4, i32 %r122)
-%r124 = zext i544 %r123 to i576
-%r125 = add i576 %r120, %r124
-%r126 = lshr i576 %r125, 32
-%r128 = getelementptr i32, i32* %r3, i32 10
-%r129 = load i32, i32* %r128
-%r130 = call i544 @mulPv512x32(i32* %r2, i32 %r129)
-%r131 = zext i544 %r130 to i576
-%r132 = add i576 %r126, %r131
-%r133 = trunc i576 %r132 to i32
-%r134 = mul i32 %r133, %r7
-%r135 = call i544 @mulPv512x32(i32* %r4, i32 %r134)
-%r136 = zext i544 %r135 to i576
-%r137 = add i576 %r132, %r136
-%r138 = lshr i576 %r137, 32
-%r140 = getelementptr i32, i32* %r3, i32 11
-%r141 = load i32, i32* %r140
-%r142 = call i544 @mulPv512x32(i32* %r2, i32 %r141)
-%r143 = zext i544 %r142 to i576
-%r144 = add i576 %r138, %r143
-%r145 = trunc i576 %r144 to i32
-%r146 = mul i32 %r145, %r7
-%r147 = call i544 @mulPv512x32(i32* %r4, i32 %r146)
-%r148 = zext i544 %r147 to i576
-%r149 = add i576 %r144, %r148
-%r150 = lshr i576 %r149, 32
-%r152 = getelementptr i32, i32* %r3, i32 12
-%r153 = load i32, i32* %r152
-%r154 = call i544 @mulPv512x32(i32* %r2, i32 %r153)
-%r155 = zext i544 %r154 to i576
-%r156 = add i576 %r150, %r155
-%r157 = trunc i576 %r156 to i32
-%r158 = mul i32 %r157, %r7
-%r159 = call i544 @mulPv512x32(i32* %r4, i32 %r158)
-%r160 = zext i544 %r159 to i576
-%r161 = add i576 %r156, %r160
-%r162 = lshr i576 %r161, 32
-%r164 = getelementptr i32, i32* %r3, i32 13
-%r165 = load i32, i32* %r164
-%r166 = call i544 @mulPv512x32(i32* %r2, i32 %r165)
-%r167 = zext i544 %r166 to i576
-%r168 = add i576 %r162, %r167
-%r169 = trunc i576 %r168 to i32
-%r170 = mul i32 %r169, %r7
-%r171 = call i544 @mulPv512x32(i32* %r4, i32 %r170)
-%r172 = zext i544 %r171 to i576
-%r173 = add i576 %r168, %r172
-%r174 = lshr i576 %r173, 32
-%r176 = getelementptr i32, i32* %r3, i32 14
-%r177 = load i32, i32* %r176
-%r178 = call i544 @mulPv512x32(i32* %r2, i32 %r177)
-%r179 = zext i544 %r178 to i576
-%r180 = add i576 %r174, %r179
-%r181 = trunc i576 %r180 to i32
-%r182 = mul i32 %r181, %r7
-%r183 = call i544 @mulPv512x32(i32* %r4, i32 %r182)
-%r184 = zext i544 %r183 to i576
-%r185 = add i576 %r180, %r184
-%r186 = lshr i576 %r185, 32
-%r188 = getelementptr i32, i32* %r3, i32 15
-%r189 = load i32, i32* %r188
-%r190 = call i544 @mulPv512x32(i32* %r2, i32 %r189)
-%r191 = zext i544 %r190 to i576
-%r192 = add i576 %r186, %r191
-%r193 = trunc i576 %r192 to i32
-%r194 = mul i32 %r193, %r7
-%r195 = call i544 @mulPv512x32(i32* %r4, i32 %r194)
-%r196 = zext i544 %r195 to i576
-%r197 = add i576 %r192, %r196
-%r198 = lshr i576 %r197, 32
-%r199 = trunc i576 %r198 to i544
-%r200 = load i32, i32* %r4
-%r201 = zext i32 %r200 to i64
-%r203 = getelementptr i32, i32* %r4, i32 1
-%r204 = load i32, i32* %r203
-%r205 = zext i32 %r204 to i64
-%r206 = shl i64 %r205, 32
-%r207 = or i64 %r201, %r206
-%r208 = zext i64 %r207 to i96
-%r210 = getelementptr i32, i32* %r4, i32 2
-%r211 = load i32, i32* %r210
-%r212 = zext i32 %r211 to i96
-%r213 = shl i96 %r212, 64
-%r214 = or i96 %r208, %r213
-%r215 = zext i96 %r214 to i128
-%r217 = getelementptr i32, i32* %r4, i32 3
-%r218 = load i32, i32* %r217
-%r219 = zext i32 %r218 to i128
-%r220 = shl i128 %r219, 96
-%r221 = or i128 %r215, %r220
-%r222 = zext i128 %r221 to i160
-%r224 = getelementptr i32, i32* %r4, i32 4
-%r225 = load i32, i32* %r224
-%r226 = zext i32 %r225 to i160
-%r227 = shl i160 %r226, 128
-%r228 = or i160 %r222, %r227
-%r229 = zext i160 %r228 to i192
-%r231 = getelementptr i32, i32* %r4, i32 5
-%r232 = load i32, i32* %r231
-%r233 = zext i32 %r232 to i192
-%r234 = shl i192 %r233, 160
-%r235 = or i192 %r229, %r234
-%r236 = zext i192 %r235 to i224
-%r238 = getelementptr i32, i32* %r4, i32 6
-%r239 = load i32, i32* %r238
-%r240 = zext i32 %r239 to i224
-%r241 = shl i224 %r240, 192
-%r242 = or i224 %r236, %r241
-%r243 = zext i224 %r242 to i256
-%r245 = getelementptr i32, i32* %r4, i32 7
-%r246 = load i32, i32* %r245
-%r247 = zext i32 %r246 to i256
-%r248 = shl i256 %r247, 224
-%r249 = or i256 %r243, %r248
-%r250 = zext i256 %r249 to i288
-%r252 = getelementptr i32, i32* %r4, i32 8
-%r253 = load i32, i32* %r252
-%r254 = zext i32 %r253 to i288
-%r255 = shl i288 %r254, 256
-%r256 = or i288 %r250, %r255
-%r257 = zext i288 %r256 to i320
-%r259 = getelementptr i32, i32* %r4, i32 9
-%r260 = load i32, i32* %r259
-%r261 = zext i32 %r260 to i320
-%r262 = shl i320 %r261, 288
-%r263 = or i320 %r257, %r262
-%r264 = zext i320 %r263 to i352
-%r266 = getelementptr i32, i32* %r4, i32 10
-%r267 = load i32, i32* %r266
-%r268 = zext i32 %r267 to i352
-%r269 = shl i352 %r268, 320
-%r270 = or i352 %r264, %r269
-%r271 = zext i352 %r270 to i384
-%r273 = getelementptr i32, i32* %r4, i32 11
-%r274 = load i32, i32* %r273
-%r275 = zext i32 %r274 to i384
-%r276 = shl i384 %r275, 352
-%r277 = or i384 %r271, %r276
-%r278 = zext i384 %r277 to i416
-%r280 = getelementptr i32, i32* %r4, i32 12
-%r281 = load i32, i32* %r280
-%r282 = zext i32 %r281 to i416
-%r283 = shl i416 %r282, 384
-%r284 = or i416 %r278, %r283
-%r285 = zext i416 %r284 to i448
-%r287 = getelementptr i32, i32* %r4, i32 13
-%r288 = load i32, i32* %r287
-%r289 = zext i32 %r288 to i448
-%r290 = shl i448 %r289, 416
-%r291 = or i448 %r285, %r290
-%r292 = zext i448 %r291 to i480
-%r294 = getelementptr i32, i32* %r4, i32 14
-%r295 = load i32, i32* %r294
-%r296 = zext i32 %r295 to i480
-%r297 = shl i480 %r296, 448
-%r298 = or i480 %r292, %r297
-%r299 = zext i480 %r298 to i512
-%r301 = getelementptr i32, i32* %r4, i32 15
-%r302 = load i32, i32* %r301
-%r303 = zext i32 %r302 to i512
-%r304 = shl i512 %r303, 480
-%r305 = or i512 %r299, %r304
-%r306 = zext i512 %r305 to i544
-%r307 = sub i544 %r199, %r306
-%r308 = lshr i544 %r307, 512
-%r309 = trunc i544 %r308 to i1
-%r310 = select i1 %r309, i544 %r199, i544 %r307
-%r311 = trunc i544 %r310 to i512
-%r312 = trunc i512 %r311 to i32
-%r314 = getelementptr i32, i32* %r1, i32 0
-store i32 %r312, i32* %r314
-%r315 = lshr i512 %r311, 32
-%r316 = trunc i512 %r315 to i32
-%r318 = getelementptr i32, i32* %r1, i32 1
-store i32 %r316, i32* %r318
-%r319 = lshr i512 %r315, 32
-%r320 = trunc i512 %r319 to i32
-%r322 = getelementptr i32, i32* %r1, i32 2
-store i32 %r320, i32* %r322
-%r323 = lshr i512 %r319, 32
-%r324 = trunc i512 %r323 to i32
-%r326 = getelementptr i32, i32* %r1, i32 3
-store i32 %r324, i32* %r326
-%r327 = lshr i512 %r323, 32
-%r328 = trunc i512 %r327 to i32
-%r330 = getelementptr i32, i32* %r1, i32 4
-store i32 %r328, i32* %r330
-%r331 = lshr i512 %r327, 32
-%r332 = trunc i512 %r331 to i32
-%r334 = getelementptr i32, i32* %r1, i32 5
-store i32 %r332, i32* %r334
-%r335 = lshr i512 %r331, 32
-%r336 = trunc i512 %r335 to i32
-%r338 = getelementptr i32, i32* %r1, i32 6
-store i32 %r336, i32* %r338
-%r339 = lshr i512 %r335, 32
-%r340 = trunc i512 %r339 to i32
-%r342 = getelementptr i32, i32* %r1, i32 7
-store i32 %r340, i32* %r342
-%r343 = lshr i512 %r339, 32
-%r344 = trunc i512 %r343 to i32
-%r346 = getelementptr i32, i32* %r1, i32 8
-store i32 %r344, i32* %r346
-%r347 = lshr i512 %r343, 32
-%r348 = trunc i512 %r347 to i32
-%r350 = getelementptr i32, i32* %r1, i32 9
-store i32 %r348, i32* %r350
-%r351 = lshr i512 %r347, 32
-%r352 = trunc i512 %r351 to i32
-%r354 = getelementptr i32, i32* %r1, i32 10
-store i32 %r352, i32* %r354
-%r355 = lshr i512 %r351, 32
-%r356 = trunc i512 %r355 to i32
-%r358 = getelementptr i32, i32* %r1, i32 11
-store i32 %r356, i32* %r358
-%r359 = lshr i512 %r355, 32
-%r360 = trunc i512 %r359 to i32
-%r362 = getelementptr i32, i32* %r1, i32 12
-store i32 %r360, i32* %r362
-%r363 = lshr i512 %r359, 32
-%r364 = trunc i512 %r363 to i32
-%r366 = getelementptr i32, i32* %r1, i32 13
-store i32 %r364, i32* %r366
-%r367 = lshr i512 %r363, 32
-%r368 = trunc i512 %r367 to i32
-%r370 = getelementptr i32, i32* %r1, i32 14
-store i32 %r368, i32* %r370
-%r371 = lshr i512 %r367, 32
-%r372 = trunc i512 %r371 to i32
-%r374 = getelementptr i32, i32* %r1, i32 15
-store i32 %r372, i32* %r374
-ret void
-}
-define void @mcl_fp_montNF16L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
-{
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i544 @mulPv512x32(i32* %r2, i32 %r8)
-%r10 = trunc i544 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i544 @mulPv512x32(i32* %r4, i32 %r11)
-%r13 = add i544 %r9, %r12
-%r14 = lshr i544 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i544 @mulPv512x32(i32* %r2, i32 %r17)
-%r19 = add i544 %r14, %r18
-%r20 = trunc i544 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i544 @mulPv512x32(i32* %r4, i32 %r21)
-%r23 = add i544 %r19, %r22
-%r24 = lshr i544 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i544 @mulPv512x32(i32* %r2, i32 %r27)
-%r29 = add i544 %r24, %r28
-%r30 = trunc i544 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i544 @mulPv512x32(i32* %r4, i32 %r31)
-%r33 = add i544 %r29, %r32
-%r34 = lshr i544 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i544 @mulPv512x32(i32* %r2, i32 %r37)
-%r39 = add i544 %r34, %r38
-%r40 = trunc i544 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i544 @mulPv512x32(i32* %r4, i32 %r41)
-%r43 = add i544 %r39, %r42
-%r44 = lshr i544 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i544 @mulPv512x32(i32* %r2, i32 %r47)
-%r49 = add i544 %r44, %r48
-%r50 = trunc i544 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i544 @mulPv512x32(i32* %r4, i32 %r51)
-%r53 = add i544 %r49, %r52
-%r54 = lshr i544 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i544 @mulPv512x32(i32* %r2, i32 %r57)
-%r59 = add i544 %r54, %r58
-%r60 = trunc i544 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i544 @mulPv512x32(i32* %r4, i32 %r61)
-%r63 = add i544 %r59, %r62
-%r64 = lshr i544 %r63, 32
-%r66 = getelementptr i32, i32* %r3, i32 6
-%r67 = load i32, i32* %r66
-%r68 = call i544 @mulPv512x32(i32* %r2, i32 %r67)
-%r69 = add i544 %r64, %r68
-%r70 = trunc i544 %r69 to i32
-%r71 = mul i32 %r70, %r7
-%r72 = call i544 @mulPv512x32(i32* %r4, i32 %r71)
-%r73 = add i544 %r69, %r72
-%r74 = lshr i544 %r73, 32
-%r76 = getelementptr i32, i32* %r3, i32 7
-%r77 = load i32, i32* %r76
-%r78 = call i544 @mulPv512x32(i32* %r2, i32 %r77)
-%r79 = add i544 %r74, %r78
-%r80 = trunc i544 %r79 to i32
-%r81 = mul i32 %r80, %r7
-%r82 = call i544 @mulPv512x32(i32* %r4, i32 %r81)
-%r83 = add i544 %r79, %r82
-%r84 = lshr i544 %r83, 32
-%r86 = getelementptr i32, i32* %r3, i32 8
-%r87 = load i32, i32* %r86
-%r88 = call i544 @mulPv512x32(i32* %r2, i32 %r87)
-%r89 = add i544 %r84, %r88
-%r90 = trunc i544 %r89 to i32
-%r91 = mul i32 %r90, %r7
-%r92 = call i544 @mulPv512x32(i32* %r4, i32 %r91)
-%r93 = add i544 %r89, %r92
-%r94 = lshr i544 %r93, 32
-%r96 = getelementptr i32, i32* %r3, i32 9
-%r97 = load i32, i32* %r96
-%r98 = call i544 @mulPv512x32(i32* %r2, i32 %r97)
-%r99 = add i544 %r94, %r98
-%r100 = trunc i544 %r99 to i32
-%r101 = mul i32 %r100, %r7
-%r102 = call i544 @mulPv512x32(i32* %r4, i32 %r101)
-%r103 = add i544 %r99, %r102
-%r104 = lshr i544 %r103, 32
-%r106 = getelementptr i32, i32* %r3, i32 10
-%r107 = load i32, i32* %r106
-%r108 = call i544 @mulPv512x32(i32* %r2, i32 %r107)
-%r109 = add i544 %r104, %r108
-%r110 = trunc i544 %r109 to i32
-%r111 = mul i32 %r110, %r7
-%r112 = call i544 @mulPv512x32(i32* %r4, i32 %r111)
-%r113 = add i544 %r109, %r112
-%r114 = lshr i544 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 11
-%r117 = load i32, i32* %r116
-%r118 = call i544 @mulPv512x32(i32* %r2, i32 %r117)
-%r119 = add i544 %r114, %r118
-%r120 = trunc i544 %r119 to i32
-%r121 = mul i32 %r120, %r7
-%r122 = call i544 @mulPv512x32(i32* %r4, i32 %r121)
-%r123 = add i544 %r119, %r122
-%r124 = lshr i544 %r123, 32
-%r126 = getelementptr i32, i32* %r3, i32 12
-%r127 = load i32, i32* %r126
-%r128 = call i544 @mulPv512x32(i32* %r2, i32 %r127)
-%r129 = add i544 %r124, %r128
-%r130 = trunc i544 %r129 to i32
-%r131 = mul i32 %r130, %r7
-%r132 = call i544 @mulPv512x32(i32* %r4, i32 %r131)
-%r133 = add i544 %r129, %r132
-%r134 = lshr i544 %r133, 32
-%r136 = getelementptr i32, i32* %r3, i32 13
-%r137 = load i32, i32* %r136
-%r138 = call i544 @mulPv512x32(i32* %r2, i32 %r137)
-%r139 = add i544 %r134, %r138
-%r140 = trunc i544 %r139 to i32
-%r141 = mul i32 %r140, %r7
-%r142 = call i544 @mulPv512x32(i32* %r4, i32 %r141)
-%r143 = add i544 %r139, %r142
-%r144 = lshr i544 %r143, 32
-%r146 = getelementptr i32, i32* %r3, i32 14
-%r147 = load i32, i32* %r146
-%r148 = call i544 @mulPv512x32(i32* %r2, i32 %r147)
-%r149 = add i544 %r144, %r148
-%r150 = trunc i544 %r149 to i32
-%r151 = mul i32 %r150, %r7
-%r152 = call i544 @mulPv512x32(i32* %r4, i32 %r151)
-%r153 = add i544 %r149, %r152
-%r154 = lshr i544 %r153, 32
-%r156 = getelementptr i32, i32* %r3, i32 15
-%r157 = load i32, i32* %r156
-%r158 = call i544 @mulPv512x32(i32* %r2, i32 %r157)
-%r159 = add i544 %r154, %r158
-%r160 = trunc i544 %r159 to i32
-%r161 = mul i32 %r160, %r7
-%r162 = call i544 @mulPv512x32(i32* %r4, i32 %r161)
-%r163 = add i544 %r159, %r162
-%r164 = lshr i544 %r163, 32
-%r165 = trunc i544 %r164 to i512
-%r166 = load i32, i32* %r4
-%r167 = zext i32 %r166 to i64
-%r169 = getelementptr i32, i32* %r4, i32 1
-%r170 = load i32, i32* %r169
-%r171 = zext i32 %r170 to i64
-%r172 = shl i64 %r171, 32
-%r173 = or i64 %r167, %r172
-%r174 = zext i64 %r173 to i96
-%r176 = getelementptr i32, i32* %r4, i32 2
-%r177 = load i32, i32* %r176
-%r178 = zext i32 %r177 to i96
-%r179 = shl i96 %r178, 64
-%r180 = or i96 %r174, %r179
-%r181 = zext i96 %r180 to i128
-%r183 = getelementptr i32, i32* %r4, i32 3
-%r184 = load i32, i32* %r183
-%r185 = zext i32 %r184 to i128
-%r186 = shl i128 %r185, 96
-%r187 = or i128 %r181, %r186
-%r188 = zext i128 %r187 to i160
-%r190 = getelementptr i32, i32* %r4, i32 4
-%r191 = load i32, i32* %r190
-%r192 = zext i32 %r191 to i160
-%r193 = shl i160 %r192, 128
-%r194 = or i160 %r188, %r193
-%r195 = zext i160 %r194 to i192
-%r197 = getelementptr i32, i32* %r4, i32 5
-%r198 = load i32, i32* %r197
-%r199 = zext i32 %r198 to i192
-%r200 = shl i192 %r199, 160
-%r201 = or i192 %r195, %r200
-%r202 = zext i192 %r201 to i224
-%r204 = getelementptr i32, i32* %r4, i32 6
-%r205 = load i32, i32* %r204
-%r206 = zext i32 %r205 to i224
-%r207 = shl i224 %r206, 192
-%r208 = or i224 %r202, %r207
-%r209 = zext i224 %r208 to i256
-%r211 = getelementptr i32, i32* %r4, i32 7
-%r212 = load i32, i32* %r211
-%r213 = zext i32 %r212 to i256
-%r214 = shl i256 %r213, 224
-%r215 = or i256 %r209, %r214
-%r216 = zext i256 %r215 to i288
-%r218 = getelementptr i32, i32* %r4, i32 8
-%r219 = load i32, i32* %r218
-%r220 = zext i32 %r219 to i288
-%r221 = shl i288 %r220, 256
-%r222 = or i288 %r216, %r221
-%r223 = zext i288 %r222 to i320
-%r225 = getelementptr i32, i32* %r4, i32 9
-%r226 = load i32, i32* %r225
-%r227 = zext i32 %r226 to i320
-%r228 = shl i320 %r227, 288
-%r229 = or i320 %r223, %r228
-%r230 = zext i320 %r229 to i352
-%r232 = getelementptr i32, i32* %r4, i32 10
-%r233 = load i32, i32* %r232
-%r234 = zext i32 %r233 to i352
-%r235 = shl i352 %r234, 320
-%r236 = or i352 %r230, %r235
-%r237 = zext i352 %r236 to i384
-%r239 = getelementptr i32, i32* %r4, i32 11
-%r240 = load i32, i32* %r239
-%r241 = zext i32 %r240 to i384
-%r242 = shl i384 %r241, 352
-%r243 = or i384 %r237, %r242
-%r244 = zext i384 %r243 to i416
-%r246 = getelementptr i32, i32* %r4, i32 12
-%r247 = load i32, i32* %r246
-%r248 = zext i32 %r247 to i416
-%r249 = shl i416 %r248, 384
-%r250 = or i416 %r244, %r249
-%r251 = zext i416 %r250 to i448
-%r253 = getelementptr i32, i32* %r4, i32 13
-%r254 = load i32, i32* %r253
-%r255 = zext i32 %r254 to i448
-%r256 = shl i448 %r255, 416
-%r257 = or i448 %r251, %r256
-%r258 = zext i448 %r257 to i480
-%r260 = getelementptr i32, i32* %r4, i32 14
-%r261 = load i32, i32* %r260
-%r262 = zext i32 %r261 to i480
-%r263 = shl i480 %r262, 448
-%r264 = or i480 %r258, %r263
-%r265 = zext i480 %r264 to i512
-%r267 = getelementptr i32, i32* %r4, i32 15
-%r268 = load i32, i32* %r267
-%r269 = zext i32 %r268 to i512
-%r270 = shl i512 %r269, 480
-%r271 = or i512 %r265, %r270
-%r272 = sub i512 %r165, %r271
-%r273 = lshr i512 %r272, 511
-%r274 = trunc i512 %r273 to i1
-%r275 = select i1 %r274, i512 %r165, i512 %r272
-%r276 = trunc i512 %r275 to i32
-%r278 = getelementptr i32, i32* %r1, i32 0
-store i32 %r276, i32* %r278
-%r279 = lshr i512 %r275, 32
-%r280 = trunc i512 %r279 to i32
-%r282 = getelementptr i32, i32* %r1, i32 1
-store i32 %r280, i32* %r282
-%r283 = lshr i512 %r279, 32
-%r284 = trunc i512 %r283 to i32
-%r286 = getelementptr i32, i32* %r1, i32 2
-store i32 %r284, i32* %r286
-%r287 = lshr i512 %r283, 32
-%r288 = trunc i512 %r287 to i32
-%r290 = getelementptr i32, i32* %r1, i32 3
-store i32 %r288, i32* %r290
-%r291 = lshr i512 %r287, 32
-%r292 = trunc i512 %r291 to i32
-%r294 = getelementptr i32, i32* %r1, i32 4
-store i32 %r292, i32* %r294
-%r295 = lshr i512 %r291, 32
-%r296 = trunc i512 %r295 to i32
-%r298 = getelementptr i32, i32* %r1, i32 5
-store i32 %r296, i32* %r298
-%r299 = lshr i512 %r295, 32
-%r300 = trunc i512 %r299 to i32
-%r302 = getelementptr i32, i32* %r1, i32 6
-store i32 %r300, i32* %r302
-%r303 = lshr i512 %r299, 32
-%r304 = trunc i512 %r303 to i32
-%r306 = getelementptr i32, i32* %r1, i32 7
-store i32 %r304, i32* %r306
-%r307 = lshr i512 %r303, 32
-%r308 = trunc i512 %r307 to i32
-%r310 = getelementptr i32, i32* %r1, i32 8
-store i32 %r308, i32* %r310
-%r311 = lshr i512 %r307, 32
-%r312 = trunc i512 %r311 to i32
-%r314 = getelementptr i32, i32* %r1, i32 9
-store i32 %r312, i32* %r314
-%r315 = lshr i512 %r311, 32
-%r316 = trunc i512 %r315 to i32
-%r318 = getelementptr i32, i32* %r1, i32 10
-store i32 %r316, i32* %r318
-%r319 = lshr i512 %r315, 32
-%r320 = trunc i512 %r319 to i32
-%r322 = getelementptr i32, i32* %r1, i32 11
-store i32 %r320, i32* %r322
-%r323 = lshr i512 %r319, 32
-%r324 = trunc i512 %r323 to i32
-%r326 = getelementptr i32, i32* %r1, i32 12
-store i32 %r324, i32* %r326
-%r327 = lshr i512 %r323, 32
-%r328 = trunc i512 %r327 to i32
-%r330 = getelementptr i32, i32* %r1, i32 13
-store i32 %r328, i32* %r330
-%r331 = lshr i512 %r327, 32
-%r332 = trunc i512 %r331 to i32
-%r334 = getelementptr i32, i32* %r1, i32 14
-store i32 %r332, i32* %r334
-%r335 = lshr i512 %r331, 32
-%r336 = trunc i512 %r335 to i32
-%r338 = getelementptr i32, i32* %r1, i32 15
-store i32 %r336, i32* %r338
-ret void
-}
-define void @mcl_fp_montRed16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r5 = getelementptr i32, i32* %r3, i32 -1
-%r6 = load i32, i32* %r5
-%r7 = load i32, i32* %r3
-%r8 = zext i32 %r7 to i64
-%r10 = getelementptr i32, i32* %r3, i32 1
-%r11 = load i32, i32* %r10
-%r12 = zext i32 %r11 to i64
-%r13 = shl i64 %r12, 32
-%r14 = or i64 %r8, %r13
-%r15 = zext i64 %r14 to i96
-%r17 = getelementptr i32, i32* %r3, i32 2
-%r18 = load i32, i32* %r17
-%r19 = zext i32 %r18 to i96
-%r20 = shl i96 %r19, 64
-%r21 = or i96 %r15, %r20
-%r22 = zext i96 %r21 to i128
-%r24 = getelementptr i32, i32* %r3, i32 3
-%r25 = load i32, i32* %r24
-%r26 = zext i32 %r25 to i128
-%r27 = shl i128 %r26, 96
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i160
-%r31 = getelementptr i32, i32* %r3, i32 4
-%r32 = load i32, i32* %r31
-%r33 = zext i32 %r32 to i160
-%r34 = shl i160 %r33, 128
-%r35 = or i160 %r29, %r34
-%r36 = zext i160 %r35 to i192
-%r38 = getelementptr i32, i32* %r3, i32 5
-%r39 = load i32, i32* %r38
-%r40 = zext i32 %r39 to i192
-%r41 = shl i192 %r40, 160
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i224
-%r45 = getelementptr i32, i32* %r3, i32 6
-%r46 = load i32, i32* %r45
-%r47 = zext i32 %r46 to i224
-%r48 = shl i224 %r47, 192
-%r49 = or i224 %r43, %r48
-%r50 = zext i224 %r49 to i256
-%r52 = getelementptr i32, i32* %r3, i32 7
-%r53 = load i32, i32* %r52
-%r54 = zext i32 %r53 to i256
-%r55 = shl i256 %r54, 224
-%r56 = or i256 %r50, %r55
-%r57 = zext i256 %r56 to i288
-%r59 = getelementptr i32, i32* %r3, i32 8
-%r60 = load i32, i32* %r59
-%r61 = zext i32 %r60 to i288
-%r62 = shl i288 %r61, 256
-%r63 = or i288 %r57, %r62
-%r64 = zext i288 %r63 to i320
-%r66 = getelementptr i32, i32* %r3, i32 9
-%r67 = load i32, i32* %r66
-%r68 = zext i32 %r67 to i320
-%r69 = shl i320 %r68, 288
-%r70 = or i320 %r64, %r69
-%r71 = zext i320 %r70 to i352
-%r73 = getelementptr i32, i32* %r3, i32 10
-%r74 = load i32, i32* %r73
-%r75 = zext i32 %r74 to i352
-%r76 = shl i352 %r75, 320
-%r77 = or i352 %r71, %r76
-%r78 = zext i352 %r77 to i384
-%r80 = getelementptr i32, i32* %r3, i32 11
-%r81 = load i32, i32* %r80
-%r82 = zext i32 %r81 to i384
-%r83 = shl i384 %r82, 352
-%r84 = or i384 %r78, %r83
-%r85 = zext i384 %r84 to i416
-%r87 = getelementptr i32, i32* %r3, i32 12
-%r88 = load i32, i32* %r87
-%r89 = zext i32 %r88 to i416
-%r90 = shl i416 %r89, 384
-%r91 = or i416 %r85, %r90
-%r92 = zext i416 %r91 to i448
-%r94 = getelementptr i32, i32* %r3, i32 13
-%r95 = load i32, i32* %r94
-%r96 = zext i32 %r95 to i448
-%r97 = shl i448 %r96, 416
-%r98 = or i448 %r92, %r97
-%r99 = zext i448 %r98 to i480
-%r101 = getelementptr i32, i32* %r3, i32 14
-%r102 = load i32, i32* %r101
-%r103 = zext i32 %r102 to i480
-%r104 = shl i480 %r103, 448
-%r105 = or i480 %r99, %r104
-%r106 = zext i480 %r105 to i512
-%r108 = getelementptr i32, i32* %r3, i32 15
-%r109 = load i32, i32* %r108
-%r110 = zext i32 %r109 to i512
-%r111 = shl i512 %r110, 480
-%r112 = or i512 %r106, %r111
-%r113 = load i32, i32* %r2
-%r114 = zext i32 %r113 to i64
-%r116 = getelementptr i32, i32* %r2, i32 1
-%r117 = load i32, i32* %r116
-%r118 = zext i32 %r117 to i64
-%r119 = shl i64 %r118, 32
-%r120 = or i64 %r114, %r119
-%r121 = zext i64 %r120 to i96
-%r123 = getelementptr i32, i32* %r2, i32 2
-%r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i96
-%r126 = shl i96 %r125, 64
-%r127 = or i96 %r121, %r126
-%r128 = zext i96 %r127 to i128
-%r130 = getelementptr i32, i32* %r2, i32 3
-%r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i128
-%r133 = shl i128 %r132, 96
-%r134 = or i128 %r128, %r133
-%r135 = zext i128 %r134 to i160
-%r137 = getelementptr i32, i32* %r2, i32 4
-%r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i160
-%r140 = shl i160 %r139, 128
-%r141 = or i160 %r135, %r140
-%r142 = zext i160 %r141 to i192
-%r144 = getelementptr i32, i32* %r2, i32 5
-%r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i192
-%r147 = shl i192 %r146, 160
-%r148 = or i192 %r142, %r147
-%r149 = zext i192 %r148 to i224
-%r151 = getelementptr i32, i32* %r2, i32 6
-%r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i224
-%r154 = shl i224 %r153, 192
-%r155 = or i224 %r149, %r154
-%r156 = zext i224 %r155 to i256
-%r158 = getelementptr i32, i32* %r2, i32 7
-%r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i256
-%r161 = shl i256 %r160, 224
-%r162 = or i256 %r156, %r161
-%r163 = zext i256 %r162 to i288
-%r165 = getelementptr i32, i32* %r2, i32 8
-%r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i288
-%r168 = shl i288 %r167, 256
-%r169 = or i288 %r163, %r168
-%r170 = zext i288 %r169 to i320
-%r172 = getelementptr i32, i32* %r2, i32 9
-%r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i320
-%r175 = shl i320 %r174, 288
-%r176 = or i320 %r170, %r175
-%r177 = zext i320 %r176 to i352
-%r179 = getelementptr i32, i32* %r2, i32 10
-%r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i352
-%r182 = shl i352 %r181, 320
-%r183 = or i352 %r177, %r182
-%r184 = zext i352 %r183 to i384
-%r186 = getelementptr i32, i32* %r2, i32 11
-%r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i384
-%r189 = shl i384 %r188, 352
-%r190 = or i384 %r184, %r189
-%r191 = zext i384 %r190 to i416
-%r193 = getelementptr i32, i32* %r2, i32 12
-%r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i416
-%r196 = shl i416 %r195, 384
-%r197 = or i416 %r191, %r196
-%r198 = zext i416 %r197 to i448
-%r200 = getelementptr i32, i32* %r2, i32 13
-%r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i448
-%r203 = shl i448 %r202, 416
-%r204 = or i448 %r198, %r203
-%r205 = zext i448 %r204 to i480
-%r207 = getelementptr i32, i32* %r2, i32 14
-%r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i480
-%r210 = shl i480 %r209, 448
-%r211 = or i480 %r205, %r210
-%r212 = zext i480 %r211 to i512
-%r214 = getelementptr i32, i32* %r2, i32 15
-%r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i512
-%r217 = shl i512 %r216, 480
-%r218 = or i512 %r212, %r217
-%r219 = zext i512 %r218 to i544
-%r221 = getelementptr i32, i32* %r2, i32 16
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i544
-%r224 = shl i544 %r223, 512
-%r225 = or i544 %r219, %r224
-%r226 = zext i544 %r225 to i576
-%r228 = getelementptr i32, i32* %r2, i32 17
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i576
-%r231 = shl i576 %r230, 544
-%r232 = or i576 %r226, %r231
-%r233 = zext i576 %r232 to i608
-%r235 = getelementptr i32, i32* %r2, i32 18
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i608
-%r238 = shl i608 %r237, 576
-%r239 = or i608 %r233, %r238
-%r240 = zext i608 %r239 to i640
-%r242 = getelementptr i32, i32* %r2, i32 19
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i640
-%r245 = shl i640 %r244, 608
-%r246 = or i640 %r240, %r245
-%r247 = zext i640 %r246 to i672
-%r249 = getelementptr i32, i32* %r2, i32 20
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i672
-%r252 = shl i672 %r251, 640
-%r253 = or i672 %r247, %r252
-%r254 = zext i672 %r253 to i704
-%r256 = getelementptr i32, i32* %r2, i32 21
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i704
-%r259 = shl i704 %r258, 672
-%r260 = or i704 %r254, %r259
-%r261 = zext i704 %r260 to i736
-%r263 = getelementptr i32, i32* %r2, i32 22
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i736
-%r266 = shl i736 %r265, 704
-%r267 = or i736 %r261, %r266
-%r268 = zext i736 %r267 to i768
-%r270 = getelementptr i32, i32* %r2, i32 23
-%r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i768
-%r273 = shl i768 %r272, 736
-%r274 = or i768 %r268, %r273
-%r275 = zext i768 %r274 to i800
-%r277 = getelementptr i32, i32* %r2, i32 24
-%r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i800
-%r280 = shl i800 %r279, 768
-%r281 = or i800 %r275, %r280
-%r282 = zext i800 %r281 to i832
-%r284 = getelementptr i32, i32* %r2, i32 25
-%r285 = load i32, i32* %r284
-%r286 = zext i32 %r285 to i832
-%r287 = shl i832 %r286, 800
-%r288 = or i832 %r282, %r287
-%r289 = zext i832 %r288 to i864
-%r291 = getelementptr i32, i32* %r2, i32 26
-%r292 = load i32, i32* %r291
-%r293 = zext i32 %r292 to i864
-%r294 = shl i864 %r293, 832
-%r295 = or i864 %r289, %r294
-%r296 = zext i864 %r295 to i896
-%r298 = getelementptr i32, i32* %r2, i32 27
-%r299 = load i32, i32* %r298
-%r300 = zext i32 %r299 to i896
-%r301 = shl i896 %r300, 864
-%r302 = or i896 %r296, %r301
-%r303 = zext i896 %r302 to i928
-%r305 = getelementptr i32, i32* %r2, i32 28
-%r306 = load i32, i32* %r305
-%r307 = zext i32 %r306 to i928
-%r308 = shl i928 %r307, 896
-%r309 = or i928 %r303, %r308
-%r310 = zext i928 %r309 to i960
-%r312 = getelementptr i32, i32* %r2, i32 29
-%r313 = load i32, i32* %r312
-%r314 = zext i32 %r313 to i960
-%r315 = shl i960 %r314, 928
-%r316 = or i960 %r310, %r315
-%r317 = zext i960 %r316 to i992
-%r319 = getelementptr i32, i32* %r2, i32 30
-%r320 = load i32, i32* %r319
-%r321 = zext i32 %r320 to i992
-%r322 = shl i992 %r321, 960
-%r323 = or i992 %r317, %r322
-%r324 = zext i992 %r323 to i1024
-%r326 = getelementptr i32, i32* %r2, i32 31
-%r327 = load i32, i32* %r326
-%r328 = zext i32 %r327 to i1024
-%r329 = shl i1024 %r328, 992
-%r330 = or i1024 %r324, %r329
-%r331 = zext i1024 %r330 to i1056
-%r332 = trunc i1056 %r331 to i32
-%r333 = mul i32 %r332, %r6
-%r334 = call i544 @mulPv512x32(i32* %r3, i32 %r333)
-%r335 = zext i544 %r334 to i1056
-%r336 = add i1056 %r331, %r335
-%r337 = lshr i1056 %r336, 32
-%r338 = trunc i1056 %r337 to i1024
-%r339 = trunc i1024 %r338 to i32
-%r340 = mul i32 %r339, %r6
-%r341 = call i544 @mulPv512x32(i32* %r3, i32 %r340)
-%r342 = zext i544 %r341 to i1024
-%r343 = add i1024 %r338, %r342
-%r344 = lshr i1024 %r343, 32
-%r345 = trunc i1024 %r344 to i992
-%r346 = trunc i992 %r345 to i32
-%r347 = mul i32 %r346, %r6
-%r348 = call i544 @mulPv512x32(i32* %r3, i32 %r347)
-%r349 = zext i544 %r348 to i992
-%r350 = add i992 %r345, %r349
-%r351 = lshr i992 %r350, 32
-%r352 = trunc i992 %r351 to i960
-%r353 = trunc i960 %r352 to i32
-%r354 = mul i32 %r353, %r6
-%r355 = call i544 @mulPv512x32(i32* %r3, i32 %r354)
-%r356 = zext i544 %r355 to i960
-%r357 = add i960 %r352, %r356
-%r358 = lshr i960 %r357, 32
-%r359 = trunc i960 %r358 to i928
-%r360 = trunc i928 %r359 to i32
-%r361 = mul i32 %r360, %r6
-%r362 = call i544 @mulPv512x32(i32* %r3, i32 %r361)
-%r363 = zext i544 %r362 to i928
-%r364 = add i928 %r359, %r363
-%r365 = lshr i928 %r364, 32
-%r366 = trunc i928 %r365 to i896
-%r367 = trunc i896 %r366 to i32
-%r368 = mul i32 %r367, %r6
-%r369 = call i544 @mulPv512x32(i32* %r3, i32 %r368)
-%r370 = zext i544 %r369 to i896
-%r371 = add i896 %r366, %r370
-%r372 = lshr i896 %r371, 32
-%r373 = trunc i896 %r372 to i864
-%r374 = trunc i864 %r373 to i32
-%r375 = mul i32 %r374, %r6
-%r376 = call i544 @mulPv512x32(i32* %r3, i32 %r375)
-%r377 = zext i544 %r376 to i864
-%r378 = add i864 %r373, %r377
-%r379 = lshr i864 %r378, 32
-%r380 = trunc i864 %r379 to i832
-%r381 = trunc i832 %r380 to i32
-%r382 = mul i32 %r381, %r6
-%r383 = call i544 @mulPv512x32(i32* %r3, i32 %r382)
-%r384 = zext i544 %r383 to i832
-%r385 = add i832 %r380, %r384
-%r386 = lshr i832 %r385, 32
-%r387 = trunc i832 %r386 to i800
-%r388 = trunc i800 %r387 to i32
-%r389 = mul i32 %r388, %r6
-%r390 = call i544 @mulPv512x32(i32* %r3, i32 %r389)
-%r391 = zext i544 %r390 to i800
-%r392 = add i800 %r387, %r391
-%r393 = lshr i800 %r392, 32
-%r394 = trunc i800 %r393 to i768
-%r395 = trunc i768 %r394 to i32
-%r396 = mul i32 %r395, %r6
-%r397 = call i544 @mulPv512x32(i32* %r3, i32 %r396)
-%r398 = zext i544 %r397 to i768
-%r399 = add i768 %r394, %r398
-%r400 = lshr i768 %r399, 32
-%r401 = trunc i768 %r400 to i736
-%r402 = trunc i736 %r401 to i32
-%r403 = mul i32 %r402, %r6
-%r404 = call i544 @mulPv512x32(i32* %r3, i32 %r403)
-%r405 = zext i544 %r404 to i736
-%r406 = add i736 %r401, %r405
-%r407 = lshr i736 %r406, 32
-%r408 = trunc i736 %r407 to i704
-%r409 = trunc i704 %r408 to i32
-%r410 = mul i32 %r409, %r6
-%r411 = call i544 @mulPv512x32(i32* %r3, i32 %r410)
-%r412 = zext i544 %r411 to i704
-%r413 = add i704 %r408, %r412
-%r414 = lshr i704 %r413, 32
-%r415 = trunc i704 %r414 to i672
-%r416 = trunc i672 %r415 to i32
-%r417 = mul i32 %r416, %r6
-%r418 = call i544 @mulPv512x32(i32* %r3, i32 %r417)
-%r419 = zext i544 %r418 to i672
-%r420 = add i672 %r415, %r419
-%r421 = lshr i672 %r420, 32
-%r422 = trunc i672 %r421 to i640
-%r423 = trunc i640 %r422 to i32
-%r424 = mul i32 %r423, %r6
-%r425 = call i544 @mulPv512x32(i32* %r3, i32 %r424)
-%r426 = zext i544 %r425 to i640
-%r427 = add i640 %r422, %r426
-%r428 = lshr i640 %r427, 32
-%r429 = trunc i640 %r428 to i608
-%r430 = trunc i608 %r429 to i32
-%r431 = mul i32 %r430, %r6
-%r432 = call i544 @mulPv512x32(i32* %r3, i32 %r431)
-%r433 = zext i544 %r432 to i608
-%r434 = add i608 %r429, %r433
-%r435 = lshr i608 %r434, 32
-%r436 = trunc i608 %r435 to i576
-%r437 = trunc i576 %r436 to i32
-%r438 = mul i32 %r437, %r6
-%r439 = call i544 @mulPv512x32(i32* %r3, i32 %r438)
-%r440 = zext i544 %r439 to i576
-%r441 = add i576 %r436, %r440
-%r442 = lshr i576 %r441, 32
-%r443 = trunc i576 %r442 to i544
-%r444 = zext i512 %r112 to i544
-%r445 = sub i544 %r443, %r444
-%r446 = lshr i544 %r445, 512
-%r447 = trunc i544 %r446 to i1
-%r448 = select i1 %r447, i544 %r443, i544 %r445
-%r449 = trunc i544 %r448 to i512
-%r450 = trunc i512 %r449 to i32
-%r452 = getelementptr i32, i32* %r1, i32 0
-store i32 %r450, i32* %r452
-%r453 = lshr i512 %r449, 32
-%r454 = trunc i512 %r453 to i32
-%r456 = getelementptr i32, i32* %r1, i32 1
-store i32 %r454, i32* %r456
-%r457 = lshr i512 %r453, 32
-%r458 = trunc i512 %r457 to i32
-%r460 = getelementptr i32, i32* %r1, i32 2
-store i32 %r458, i32* %r460
-%r461 = lshr i512 %r457, 32
-%r462 = trunc i512 %r461 to i32
-%r464 = getelementptr i32, i32* %r1, i32 3
-store i32 %r462, i32* %r464
-%r465 = lshr i512 %r461, 32
-%r466 = trunc i512 %r465 to i32
-%r468 = getelementptr i32, i32* %r1, i32 4
-store i32 %r466, i32* %r468
-%r469 = lshr i512 %r465, 32
-%r470 = trunc i512 %r469 to i32
-%r472 = getelementptr i32, i32* %r1, i32 5
-store i32 %r470, i32* %r472
-%r473 = lshr i512 %r469, 32
-%r474 = trunc i512 %r473 to i32
-%r476 = getelementptr i32, i32* %r1, i32 6
-store i32 %r474, i32* %r476
-%r477 = lshr i512 %r473, 32
-%r478 = trunc i512 %r477 to i32
-%r480 = getelementptr i32, i32* %r1, i32 7
-store i32 %r478, i32* %r480
-%r481 = lshr i512 %r477, 32
-%r482 = trunc i512 %r481 to i32
-%r484 = getelementptr i32, i32* %r1, i32 8
-store i32 %r482, i32* %r484
-%r485 = lshr i512 %r481, 32
-%r486 = trunc i512 %r485 to i32
-%r488 = getelementptr i32, i32* %r1, i32 9
-store i32 %r486, i32* %r488
-%r489 = lshr i512 %r485, 32
-%r490 = trunc i512 %r489 to i32
-%r492 = getelementptr i32, i32* %r1, i32 10
-store i32 %r490, i32* %r492
-%r493 = lshr i512 %r489, 32
-%r494 = trunc i512 %r493 to i32
-%r496 = getelementptr i32, i32* %r1, i32 11
-store i32 %r494, i32* %r496
-%r497 = lshr i512 %r493, 32
-%r498 = trunc i512 %r497 to i32
-%r500 = getelementptr i32, i32* %r1, i32 12
-store i32 %r498, i32* %r500
-%r501 = lshr i512 %r497, 32
-%r502 = trunc i512 %r501 to i32
-%r504 = getelementptr i32, i32* %r1, i32 13
-store i32 %r502, i32* %r504
-%r505 = lshr i512 %r501, 32
-%r506 = trunc i512 %r505 to i32
-%r508 = getelementptr i32, i32* %r1, i32 14
-store i32 %r506, i32* %r508
-%r509 = lshr i512 %r505, 32
-%r510 = trunc i512 %r509 to i32
-%r512 = getelementptr i32, i32* %r1, i32 15
-store i32 %r510, i32* %r512
-ret void
-}
-define i32 @mcl_fp_addPre16L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r3, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r3, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r3, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r3, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r3, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r112 = load i32, i32* %r4
-%r113 = zext i32 %r112 to i64
-%r115 = getelementptr i32, i32* %r4, i32 1
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i64
-%r118 = shl i64 %r117, 32
-%r119 = or i64 %r113, %r118
-%r120 = zext i64 %r119 to i96
-%r122 = getelementptr i32, i32* %r4, i32 2
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i96
-%r125 = shl i96 %r124, 64
-%r126 = or i96 %r120, %r125
-%r127 = zext i96 %r126 to i128
-%r129 = getelementptr i32, i32* %r4, i32 3
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i128
-%r132 = shl i128 %r131, 96
-%r133 = or i128 %r127, %r132
-%r134 = zext i128 %r133 to i160
-%r136 = getelementptr i32, i32* %r4, i32 4
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i160
-%r139 = shl i160 %r138, 128
-%r140 = or i160 %r134, %r139
-%r141 = zext i160 %r140 to i192
-%r143 = getelementptr i32, i32* %r4, i32 5
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i192
-%r146 = shl i192 %r145, 160
-%r147 = or i192 %r141, %r146
-%r148 = zext i192 %r147 to i224
-%r150 = getelementptr i32, i32* %r4, i32 6
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i224
-%r153 = shl i224 %r152, 192
-%r154 = or i224 %r148, %r153
-%r155 = zext i224 %r154 to i256
-%r157 = getelementptr i32, i32* %r4, i32 7
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i256
-%r160 = shl i256 %r159, 224
-%r161 = or i256 %r155, %r160
-%r162 = zext i256 %r161 to i288
-%r164 = getelementptr i32, i32* %r4, i32 8
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i288
-%r167 = shl i288 %r166, 256
-%r168 = or i288 %r162, %r167
-%r169 = zext i288 %r168 to i320
-%r171 = getelementptr i32, i32* %r4, i32 9
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i320
-%r174 = shl i320 %r173, 288
-%r175 = or i320 %r169, %r174
-%r176 = zext i320 %r175 to i352
-%r178 = getelementptr i32, i32* %r4, i32 10
-%r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i352
-%r181 = shl i352 %r180, 320
-%r182 = or i352 %r176, %r181
-%r183 = zext i352 %r182 to i384
-%r185 = getelementptr i32, i32* %r4, i32 11
-%r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i384
-%r188 = shl i384 %r187, 352
-%r189 = or i384 %r183, %r188
-%r190 = zext i384 %r189 to i416
-%r192 = getelementptr i32, i32* %r4, i32 12
-%r193 = load i32, i32* %r192
-%r194 = zext i32 %r193 to i416
-%r195 = shl i416 %r194, 384
-%r196 = or i416 %r190, %r195
-%r197 = zext i416 %r196 to i448
-%r199 = getelementptr i32, i32* %r4, i32 13
-%r200 = load i32, i32* %r199
-%r201 = zext i32 %r200 to i448
-%r202 = shl i448 %r201, 416
-%r203 = or i448 %r197, %r202
-%r204 = zext i448 %r203 to i480
-%r206 = getelementptr i32, i32* %r4, i32 14
-%r207 = load i32, i32* %r206
-%r208 = zext i32 %r207 to i480
-%r209 = shl i480 %r208, 448
-%r210 = or i480 %r204, %r209
-%r211 = zext i480 %r210 to i512
-%r213 = getelementptr i32, i32* %r4, i32 15
-%r214 = load i32, i32* %r213
-%r215 = zext i32 %r214 to i512
-%r216 = shl i512 %r215, 480
-%r217 = or i512 %r211, %r216
-%r218 = zext i512 %r217 to i544
-%r219 = add i544 %r111, %r218
-%r220 = trunc i544 %r219 to i512
-%r221 = trunc i512 %r220 to i32
-%r223 = getelementptr i32, i32* %r2, i32 0
-store i32 %r221, i32* %r223
-%r224 = lshr i512 %r220, 32
-%r225 = trunc i512 %r224 to i32
-%r227 = getelementptr i32, i32* %r2, i32 1
-store i32 %r225, i32* %r227
-%r228 = lshr i512 %r224, 32
-%r229 = trunc i512 %r228 to i32
-%r231 = getelementptr i32, i32* %r2, i32 2
-store i32 %r229, i32* %r231
-%r232 = lshr i512 %r228, 32
-%r233 = trunc i512 %r232 to i32
-%r235 = getelementptr i32, i32* %r2, i32 3
-store i32 %r233, i32* %r235
-%r236 = lshr i512 %r232, 32
-%r237 = trunc i512 %r236 to i32
-%r239 = getelementptr i32, i32* %r2, i32 4
-store i32 %r237, i32* %r239
-%r240 = lshr i512 %r236, 32
-%r241 = trunc i512 %r240 to i32
-%r243 = getelementptr i32, i32* %r2, i32 5
-store i32 %r241, i32* %r243
-%r244 = lshr i512 %r240, 32
-%r245 = trunc i512 %r244 to i32
-%r247 = getelementptr i32, i32* %r2, i32 6
-store i32 %r245, i32* %r247
-%r248 = lshr i512 %r244, 32
-%r249 = trunc i512 %r248 to i32
-%r251 = getelementptr i32, i32* %r2, i32 7
-store i32 %r249, i32* %r251
-%r252 = lshr i512 %r248, 32
-%r253 = trunc i512 %r252 to i32
-%r255 = getelementptr i32, i32* %r2, i32 8
-store i32 %r253, i32* %r255
-%r256 = lshr i512 %r252, 32
-%r257 = trunc i512 %r256 to i32
-%r259 = getelementptr i32, i32* %r2, i32 9
-store i32 %r257, i32* %r259
-%r260 = lshr i512 %r256, 32
-%r261 = trunc i512 %r260 to i32
-%r263 = getelementptr i32, i32* %r2, i32 10
-store i32 %r261, i32* %r263
-%r264 = lshr i512 %r260, 32
-%r265 = trunc i512 %r264 to i32
-%r267 = getelementptr i32, i32* %r2, i32 11
-store i32 %r265, i32* %r267
-%r268 = lshr i512 %r264, 32
-%r269 = trunc i512 %r268 to i32
-%r271 = getelementptr i32, i32* %r2, i32 12
-store i32 %r269, i32* %r271
-%r272 = lshr i512 %r268, 32
-%r273 = trunc i512 %r272 to i32
-%r275 = getelementptr i32, i32* %r2, i32 13
-store i32 %r273, i32* %r275
-%r276 = lshr i512 %r272, 32
-%r277 = trunc i512 %r276 to i32
-%r279 = getelementptr i32, i32* %r2, i32 14
-store i32 %r277, i32* %r279
-%r280 = lshr i512 %r276, 32
-%r281 = trunc i512 %r280 to i32
-%r283 = getelementptr i32, i32* %r2, i32 15
-store i32 %r281, i32* %r283
-%r284 = lshr i544 %r219, 512
-%r285 = trunc i544 %r284 to i32
-ret i32 %r285
-}
-define i32 @mcl_fp_subPre16L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r3
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r3, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r3, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r3, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r3, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r3, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r3, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r3, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r3, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r3, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r3, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r3, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r3, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r3, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r3, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r3, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r112 = load i32, i32* %r4
-%r113 = zext i32 %r112 to i64
-%r115 = getelementptr i32, i32* %r4, i32 1
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i64
-%r118 = shl i64 %r117, 32
-%r119 = or i64 %r113, %r118
-%r120 = zext i64 %r119 to i96
-%r122 = getelementptr i32, i32* %r4, i32 2
-%r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i96
-%r125 = shl i96 %r124, 64
-%r126 = or i96 %r120, %r125
-%r127 = zext i96 %r126 to i128
-%r129 = getelementptr i32, i32* %r4, i32 3
-%r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i128
-%r132 = shl i128 %r131, 96
-%r133 = or i128 %r127, %r132
-%r134 = zext i128 %r133 to i160
-%r136 = getelementptr i32, i32* %r4, i32 4
-%r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i160
-%r139 = shl i160 %r138, 128
-%r140 = or i160 %r134, %r139
-%r141 = zext i160 %r140 to i192
-%r143 = getelementptr i32, i32* %r4, i32 5
-%r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i192
-%r146 = shl i192 %r145, 160
-%r147 = or i192 %r141, %r146
-%r148 = zext i192 %r147 to i224
-%r150 = getelementptr i32, i32* %r4, i32 6
-%r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i224
-%r153 = shl i224 %r152, 192
-%r154 = or i224 %r148, %r153
-%r155 = zext i224 %r154 to i256
-%r157 = getelementptr i32, i32* %r4, i32 7
-%r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i256
-%r160 = shl i256 %r159, 224
-%r161 = or i256 %r155, %r160
-%r162 = zext i256 %r161 to i288
-%r164 = getelementptr i32, i32* %r4, i32 8
-%r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i288
-%r167 = shl i288 %r166, 256
-%r168 = or i288 %r162, %r167
-%r169 = zext i288 %r168 to i320
-%r171 = getelementptr i32, i32* %r4, i32 9
-%r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i320
-%r174 = shl i320 %r173, 288
-%r175 = or i320 %r169, %r174
-%r176 = zext i320 %r175 to i352
-%r178 = getelementptr i32, i32* %r4, i32 10
-%r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i352
-%r181 = shl i352 %r180, 320
-%r182 = or i352 %r176, %r181
-%r183 = zext i352 %r182 to i384
-%r185 = getelementptr i32, i32* %r4, i32 11
-%r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i384
-%r188 = shl i384 %r187, 352
-%r189 = or i384 %r183, %r188
-%r190 = zext i384 %r189 to i416
-%r192 = getelementptr i32, i32* %r4, i32 12
-%r193 = load i32, i32* %r192
-%r194 = zext i32 %r193 to i416
-%r195 = shl i416 %r194, 384
-%r196 = or i416 %r190, %r195
-%r197 = zext i416 %r196 to i448
-%r199 = getelementptr i32, i32* %r4, i32 13
-%r200 = load i32, i32* %r199
-%r201 = zext i32 %r200 to i448
-%r202 = shl i448 %r201, 416
-%r203 = or i448 %r197, %r202
-%r204 = zext i448 %r203 to i480
-%r206 = getelementptr i32, i32* %r4, i32 14
-%r207 = load i32, i32* %r206
-%r208 = zext i32 %r207 to i480
-%r209 = shl i480 %r208, 448
-%r210 = or i480 %r204, %r209
-%r211 = zext i480 %r210 to i512
-%r213 = getelementptr i32, i32* %r4, i32 15
-%r214 = load i32, i32* %r213
-%r215 = zext i32 %r214 to i512
-%r216 = shl i512 %r215, 480
-%r217 = or i512 %r211, %r216
-%r218 = zext i512 %r217 to i544
-%r219 = sub i544 %r111, %r218
-%r220 = trunc i544 %r219 to i512
-%r221 = trunc i512 %r220 to i32
-%r223 = getelementptr i32, i32* %r2, i32 0
-store i32 %r221, i32* %r223
-%r224 = lshr i512 %r220, 32
-%r225 = trunc i512 %r224 to i32
-%r227 = getelementptr i32, i32* %r2, i32 1
-store i32 %r225, i32* %r227
-%r228 = lshr i512 %r224, 32
-%r229 = trunc i512 %r228 to i32
-%r231 = getelementptr i32, i32* %r2, i32 2
-store i32 %r229, i32* %r231
-%r232 = lshr i512 %r228, 32
-%r233 = trunc i512 %r232 to i32
-%r235 = getelementptr i32, i32* %r2, i32 3
-store i32 %r233, i32* %r235
-%r236 = lshr i512 %r232, 32
-%r237 = trunc i512 %r236 to i32
-%r239 = getelementptr i32, i32* %r2, i32 4
-store i32 %r237, i32* %r239
-%r240 = lshr i512 %r236, 32
-%r241 = trunc i512 %r240 to i32
-%r243 = getelementptr i32, i32* %r2, i32 5
-store i32 %r241, i32* %r243
-%r244 = lshr i512 %r240, 32
-%r245 = trunc i512 %r244 to i32
-%r247 = getelementptr i32, i32* %r2, i32 6
-store i32 %r245, i32* %r247
-%r248 = lshr i512 %r244, 32
-%r249 = trunc i512 %r248 to i32
-%r251 = getelementptr i32, i32* %r2, i32 7
-store i32 %r249, i32* %r251
-%r252 = lshr i512 %r248, 32
-%r253 = trunc i512 %r252 to i32
-%r255 = getelementptr i32, i32* %r2, i32 8
-store i32 %r253, i32* %r255
-%r256 = lshr i512 %r252, 32
-%r257 = trunc i512 %r256 to i32
-%r259 = getelementptr i32, i32* %r2, i32 9
-store i32 %r257, i32* %r259
-%r260 = lshr i512 %r256, 32
-%r261 = trunc i512 %r260 to i32
-%r263 = getelementptr i32, i32* %r2, i32 10
-store i32 %r261, i32* %r263
-%r264 = lshr i512 %r260, 32
-%r265 = trunc i512 %r264 to i32
-%r267 = getelementptr i32, i32* %r2, i32 11
-store i32 %r265, i32* %r267
-%r268 = lshr i512 %r264, 32
-%r269 = trunc i512 %r268 to i32
-%r271 = getelementptr i32, i32* %r2, i32 12
-store i32 %r269, i32* %r271
-%r272 = lshr i512 %r268, 32
-%r273 = trunc i512 %r272 to i32
-%r275 = getelementptr i32, i32* %r2, i32 13
-store i32 %r273, i32* %r275
-%r276 = lshr i512 %r272, 32
-%r277 = trunc i512 %r276 to i32
-%r279 = getelementptr i32, i32* %r2, i32 14
-store i32 %r277, i32* %r279
-%r280 = lshr i512 %r276, 32
-%r281 = trunc i512 %r280 to i32
-%r283 = getelementptr i32, i32* %r2, i32 15
-store i32 %r281, i32* %r283
-%r284 = lshr i544 %r219, 512
-%r285 = trunc i544 %r284 to i32
-%r287 = and i32 %r285, 1
-ret i32 %r287
-}
-define void @mcl_fp_shr1_16L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = zext i32 %r3 to i64
-%r6 = getelementptr i32, i32* %r2, i32 1
-%r7 = load i32, i32* %r6
-%r8 = zext i32 %r7 to i64
-%r9 = shl i64 %r8, 32
-%r10 = or i64 %r4, %r9
-%r11 = zext i64 %r10 to i96
-%r13 = getelementptr i32, i32* %r2, i32 2
-%r14 = load i32, i32* %r13
-%r15 = zext i32 %r14 to i96
-%r16 = shl i96 %r15, 64
-%r17 = or i96 %r11, %r16
-%r18 = zext i96 %r17 to i128
-%r20 = getelementptr i32, i32* %r2, i32 3
-%r21 = load i32, i32* %r20
-%r22 = zext i32 %r21 to i128
-%r23 = shl i128 %r22, 96
-%r24 = or i128 %r18, %r23
-%r25 = zext i128 %r24 to i160
-%r27 = getelementptr i32, i32* %r2, i32 4
-%r28 = load i32, i32* %r27
-%r29 = zext i32 %r28 to i160
-%r30 = shl i160 %r29, 128
-%r31 = or i160 %r25, %r30
-%r32 = zext i160 %r31 to i192
-%r34 = getelementptr i32, i32* %r2, i32 5
-%r35 = load i32, i32* %r34
-%r36 = zext i32 %r35 to i192
-%r37 = shl i192 %r36, 160
-%r38 = or i192 %r32, %r37
-%r39 = zext i192 %r38 to i224
-%r41 = getelementptr i32, i32* %r2, i32 6
-%r42 = load i32, i32* %r41
-%r43 = zext i32 %r42 to i224
-%r44 = shl i224 %r43, 192
-%r45 = or i224 %r39, %r44
-%r46 = zext i224 %r45 to i256
-%r48 = getelementptr i32, i32* %r2, i32 7
-%r49 = load i32, i32* %r48
-%r50 = zext i32 %r49 to i256
-%r51 = shl i256 %r50, 224
-%r52 = or i256 %r46, %r51
-%r53 = zext i256 %r52 to i288
-%r55 = getelementptr i32, i32* %r2, i32 8
-%r56 = load i32, i32* %r55
-%r57 = zext i32 %r56 to i288
-%r58 = shl i288 %r57, 256
-%r59 = or i288 %r53, %r58
-%r60 = zext i288 %r59 to i320
-%r62 = getelementptr i32, i32* %r2, i32 9
-%r63 = load i32, i32* %r62
-%r64 = zext i32 %r63 to i320
-%r65 = shl i320 %r64, 288
-%r66 = or i320 %r60, %r65
-%r67 = zext i320 %r66 to i352
-%r69 = getelementptr i32, i32* %r2, i32 10
-%r70 = load i32, i32* %r69
-%r71 = zext i32 %r70 to i352
-%r72 = shl i352 %r71, 320
-%r73 = or i352 %r67, %r72
-%r74 = zext i352 %r73 to i384
-%r76 = getelementptr i32, i32* %r2, i32 11
-%r77 = load i32, i32* %r76
-%r78 = zext i32 %r77 to i384
-%r79 = shl i384 %r78, 352
-%r80 = or i384 %r74, %r79
-%r81 = zext i384 %r80 to i416
-%r83 = getelementptr i32, i32* %r2, i32 12
-%r84 = load i32, i32* %r83
-%r85 = zext i32 %r84 to i416
-%r86 = shl i416 %r85, 384
-%r87 = or i416 %r81, %r86
-%r88 = zext i416 %r87 to i448
-%r90 = getelementptr i32, i32* %r2, i32 13
-%r91 = load i32, i32* %r90
-%r92 = zext i32 %r91 to i448
-%r93 = shl i448 %r92, 416
-%r94 = or i448 %r88, %r93
-%r95 = zext i448 %r94 to i480
-%r97 = getelementptr i32, i32* %r2, i32 14
-%r98 = load i32, i32* %r97
-%r99 = zext i32 %r98 to i480
-%r100 = shl i480 %r99, 448
-%r101 = or i480 %r95, %r100
-%r102 = zext i480 %r101 to i512
-%r104 = getelementptr i32, i32* %r2, i32 15
-%r105 = load i32, i32* %r104
-%r106 = zext i32 %r105 to i512
-%r107 = shl i512 %r106, 480
-%r108 = or i512 %r102, %r107
-%r109 = lshr i512 %r108, 1
-%r110 = trunc i512 %r109 to i32
-%r112 = getelementptr i32, i32* %r1, i32 0
-store i32 %r110, i32* %r112
-%r113 = lshr i512 %r109, 32
-%r114 = trunc i512 %r113 to i32
-%r116 = getelementptr i32, i32* %r1, i32 1
-store i32 %r114, i32* %r116
-%r117 = lshr i512 %r113, 32
-%r118 = trunc i512 %r117 to i32
-%r120 = getelementptr i32, i32* %r1, i32 2
-store i32 %r118, i32* %r120
-%r121 = lshr i512 %r117, 32
-%r122 = trunc i512 %r121 to i32
-%r124 = getelementptr i32, i32* %r1, i32 3
-store i32 %r122, i32* %r124
-%r125 = lshr i512 %r121, 32
-%r126 = trunc i512 %r125 to i32
-%r128 = getelementptr i32, i32* %r1, i32 4
-store i32 %r126, i32* %r128
-%r129 = lshr i512 %r125, 32
-%r130 = trunc i512 %r129 to i32
-%r132 = getelementptr i32, i32* %r1, i32 5
-store i32 %r130, i32* %r132
-%r133 = lshr i512 %r129, 32
-%r134 = trunc i512 %r133 to i32
-%r136 = getelementptr i32, i32* %r1, i32 6
-store i32 %r134, i32* %r136
-%r137 = lshr i512 %r133, 32
-%r138 = trunc i512 %r137 to i32
-%r140 = getelementptr i32, i32* %r1, i32 7
-store i32 %r138, i32* %r140
-%r141 = lshr i512 %r137, 32
-%r142 = trunc i512 %r141 to i32
-%r144 = getelementptr i32, i32* %r1, i32 8
-store i32 %r142, i32* %r144
-%r145 = lshr i512 %r141, 32
-%r146 = trunc i512 %r145 to i32
-%r148 = getelementptr i32, i32* %r1, i32 9
-store i32 %r146, i32* %r148
-%r149 = lshr i512 %r145, 32
-%r150 = trunc i512 %r149 to i32
-%r152 = getelementptr i32, i32* %r1, i32 10
-store i32 %r150, i32* %r152
-%r153 = lshr i512 %r149, 32
-%r154 = trunc i512 %r153 to i32
-%r156 = getelementptr i32, i32* %r1, i32 11
-store i32 %r154, i32* %r156
-%r157 = lshr i512 %r153, 32
-%r158 = trunc i512 %r157 to i32
-%r160 = getelementptr i32, i32* %r1, i32 12
-store i32 %r158, i32* %r160
-%r161 = lshr i512 %r157, 32
-%r162 = trunc i512 %r161 to i32
-%r164 = getelementptr i32, i32* %r1, i32 13
-store i32 %r162, i32* %r164
-%r165 = lshr i512 %r161, 32
-%r166 = trunc i512 %r165 to i32
-%r168 = getelementptr i32, i32* %r1, i32 14
-store i32 %r166, i32* %r168
-%r169 = lshr i512 %r165, 32
-%r170 = trunc i512 %r169 to i32
-%r172 = getelementptr i32, i32* %r1, i32 15
-store i32 %r170, i32* %r172
-ret void
-}
-define void @mcl_fp_add16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = load i32, i32* %r3
-%r112 = zext i32 %r111 to i64
-%r114 = getelementptr i32, i32* %r3, i32 1
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i64
-%r117 = shl i64 %r116, 32
-%r118 = or i64 %r112, %r117
-%r119 = zext i64 %r118 to i96
-%r121 = getelementptr i32, i32* %r3, i32 2
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i96
-%r124 = shl i96 %r123, 64
-%r125 = or i96 %r119, %r124
-%r126 = zext i96 %r125 to i128
-%r128 = getelementptr i32, i32* %r3, i32 3
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i128
-%r131 = shl i128 %r130, 96
-%r132 = or i128 %r126, %r131
-%r133 = zext i128 %r132 to i160
-%r135 = getelementptr i32, i32* %r3, i32 4
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i160
-%r138 = shl i160 %r137, 128
-%r139 = or i160 %r133, %r138
-%r140 = zext i160 %r139 to i192
-%r142 = getelementptr i32, i32* %r3, i32 5
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i192
-%r145 = shl i192 %r144, 160
-%r146 = or i192 %r140, %r145
-%r147 = zext i192 %r146 to i224
-%r149 = getelementptr i32, i32* %r3, i32 6
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i224
-%r152 = shl i224 %r151, 192
-%r153 = or i224 %r147, %r152
-%r154 = zext i224 %r153 to i256
-%r156 = getelementptr i32, i32* %r3, i32 7
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i256
-%r159 = shl i256 %r158, 224
-%r160 = or i256 %r154, %r159
-%r161 = zext i256 %r160 to i288
-%r163 = getelementptr i32, i32* %r3, i32 8
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i288
-%r166 = shl i288 %r165, 256
-%r167 = or i288 %r161, %r166
-%r168 = zext i288 %r167 to i320
-%r170 = getelementptr i32, i32* %r3, i32 9
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i320
-%r173 = shl i320 %r172, 288
-%r174 = or i320 %r168, %r173
-%r175 = zext i320 %r174 to i352
-%r177 = getelementptr i32, i32* %r3, i32 10
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i352
-%r180 = shl i352 %r179, 320
-%r181 = or i352 %r175, %r180
-%r182 = zext i352 %r181 to i384
-%r184 = getelementptr i32, i32* %r3, i32 11
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i384
-%r187 = shl i384 %r186, 352
-%r188 = or i384 %r182, %r187
-%r189 = zext i384 %r188 to i416
-%r191 = getelementptr i32, i32* %r3, i32 12
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i416
-%r194 = shl i416 %r193, 384
-%r195 = or i416 %r189, %r194
-%r196 = zext i416 %r195 to i448
-%r198 = getelementptr i32, i32* %r3, i32 13
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i448
-%r201 = shl i448 %r200, 416
-%r202 = or i448 %r196, %r201
-%r203 = zext i448 %r202 to i480
-%r205 = getelementptr i32, i32* %r3, i32 14
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i480
-%r208 = shl i480 %r207, 448
-%r209 = or i480 %r203, %r208
-%r210 = zext i480 %r209 to i512
-%r212 = getelementptr i32, i32* %r3, i32 15
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i512
-%r215 = shl i512 %r214, 480
-%r216 = or i512 %r210, %r215
-%r217 = zext i512 %r110 to i544
-%r218 = zext i512 %r216 to i544
-%r219 = add i544 %r217, %r218
-%r220 = trunc i544 %r219 to i512
-%r221 = trunc i512 %r220 to i32
-%r223 = getelementptr i32, i32* %r1, i32 0
-store i32 %r221, i32* %r223
-%r224 = lshr i512 %r220, 32
-%r225 = trunc i512 %r224 to i32
-%r227 = getelementptr i32, i32* %r1, i32 1
-store i32 %r225, i32* %r227
-%r228 = lshr i512 %r224, 32
-%r229 = trunc i512 %r228 to i32
-%r231 = getelementptr i32, i32* %r1, i32 2
-store i32 %r229, i32* %r231
-%r232 = lshr i512 %r228, 32
-%r233 = trunc i512 %r232 to i32
-%r235 = getelementptr i32, i32* %r1, i32 3
-store i32 %r233, i32* %r235
-%r236 = lshr i512 %r232, 32
-%r237 = trunc i512 %r236 to i32
-%r239 = getelementptr i32, i32* %r1, i32 4
-store i32 %r237, i32* %r239
-%r240 = lshr i512 %r236, 32
-%r241 = trunc i512 %r240 to i32
-%r243 = getelementptr i32, i32* %r1, i32 5
-store i32 %r241, i32* %r243
-%r244 = lshr i512 %r240, 32
-%r245 = trunc i512 %r244 to i32
-%r247 = getelementptr i32, i32* %r1, i32 6
-store i32 %r245, i32* %r247
-%r248 = lshr i512 %r244, 32
-%r249 = trunc i512 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 7
-store i32 %r249, i32* %r251
-%r252 = lshr i512 %r248, 32
-%r253 = trunc i512 %r252 to i32
-%r255 = getelementptr i32, i32* %r1, i32 8
-store i32 %r253, i32* %r255
-%r256 = lshr i512 %r252, 32
-%r257 = trunc i512 %r256 to i32
-%r259 = getelementptr i32, i32* %r1, i32 9
-store i32 %r257, i32* %r259
-%r260 = lshr i512 %r256, 32
-%r261 = trunc i512 %r260 to i32
-%r263 = getelementptr i32, i32* %r1, i32 10
-store i32 %r261, i32* %r263
-%r264 = lshr i512 %r260, 32
-%r265 = trunc i512 %r264 to i32
-%r267 = getelementptr i32, i32* %r1, i32 11
-store i32 %r265, i32* %r267
-%r268 = lshr i512 %r264, 32
-%r269 = trunc i512 %r268 to i32
-%r271 = getelementptr i32, i32* %r1, i32 12
-store i32 %r269, i32* %r271
-%r272 = lshr i512 %r268, 32
-%r273 = trunc i512 %r272 to i32
-%r275 = getelementptr i32, i32* %r1, i32 13
-store i32 %r273, i32* %r275
-%r276 = lshr i512 %r272, 32
-%r277 = trunc i512 %r276 to i32
-%r279 = getelementptr i32, i32* %r1, i32 14
-store i32 %r277, i32* %r279
-%r280 = lshr i512 %r276, 32
-%r281 = trunc i512 %r280 to i32
-%r283 = getelementptr i32, i32* %r1, i32 15
-store i32 %r281, i32* %r283
-%r284 = load i32, i32* %r4
-%r285 = zext i32 %r284 to i64
-%r287 = getelementptr i32, i32* %r4, i32 1
-%r288 = load i32, i32* %r287
-%r289 = zext i32 %r288 to i64
-%r290 = shl i64 %r289, 32
-%r291 = or i64 %r285, %r290
-%r292 = zext i64 %r291 to i96
-%r294 = getelementptr i32, i32* %r4, i32 2
-%r295 = load i32, i32* %r294
-%r296 = zext i32 %r295 to i96
-%r297 = shl i96 %r296, 64
-%r298 = or i96 %r292, %r297
-%r299 = zext i96 %r298 to i128
-%r301 = getelementptr i32, i32* %r4, i32 3
-%r302 = load i32, i32* %r301
-%r303 = zext i32 %r302 to i128
-%r304 = shl i128 %r303, 96
-%r305 = or i128 %r299, %r304
-%r306 = zext i128 %r305 to i160
-%r308 = getelementptr i32, i32* %r4, i32 4
-%r309 = load i32, i32* %r308
-%r310 = zext i32 %r309 to i160
-%r311 = shl i160 %r310, 128
-%r312 = or i160 %r306, %r311
-%r313 = zext i160 %r312 to i192
-%r315 = getelementptr i32, i32* %r4, i32 5
-%r316 = load i32, i32* %r315
-%r317 = zext i32 %r316 to i192
-%r318 = shl i192 %r317, 160
-%r319 = or i192 %r313, %r318
-%r320 = zext i192 %r319 to i224
-%r322 = getelementptr i32, i32* %r4, i32 6
-%r323 = load i32, i32* %r322
-%r324 = zext i32 %r323 to i224
-%r325 = shl i224 %r324, 192
-%r326 = or i224 %r320, %r325
-%r327 = zext i224 %r326 to i256
-%r329 = getelementptr i32, i32* %r4, i32 7
-%r330 = load i32, i32* %r329
-%r331 = zext i32 %r330 to i256
-%r332 = shl i256 %r331, 224
-%r333 = or i256 %r327, %r332
-%r334 = zext i256 %r333 to i288
-%r336 = getelementptr i32, i32* %r4, i32 8
-%r337 = load i32, i32* %r336
-%r338 = zext i32 %r337 to i288
-%r339 = shl i288 %r338, 256
-%r340 = or i288 %r334, %r339
-%r341 = zext i288 %r340 to i320
-%r343 = getelementptr i32, i32* %r4, i32 9
-%r344 = load i32, i32* %r343
-%r345 = zext i32 %r344 to i320
-%r346 = shl i320 %r345, 288
-%r347 = or i320 %r341, %r346
-%r348 = zext i320 %r347 to i352
-%r350 = getelementptr i32, i32* %r4, i32 10
-%r351 = load i32, i32* %r350
-%r352 = zext i32 %r351 to i352
-%r353 = shl i352 %r352, 320
-%r354 = or i352 %r348, %r353
-%r355 = zext i352 %r354 to i384
-%r357 = getelementptr i32, i32* %r4, i32 11
-%r358 = load i32, i32* %r357
-%r359 = zext i32 %r358 to i384
-%r360 = shl i384 %r359, 352
-%r361 = or i384 %r355, %r360
-%r362 = zext i384 %r361 to i416
-%r364 = getelementptr i32, i32* %r4, i32 12
-%r365 = load i32, i32* %r364
-%r366 = zext i32 %r365 to i416
-%r367 = shl i416 %r366, 384
-%r368 = or i416 %r362, %r367
-%r369 = zext i416 %r368 to i448
-%r371 = getelementptr i32, i32* %r4, i32 13
-%r372 = load i32, i32* %r371
-%r373 = zext i32 %r372 to i448
-%r374 = shl i448 %r373, 416
-%r375 = or i448 %r369, %r374
-%r376 = zext i448 %r375 to i480
-%r378 = getelementptr i32, i32* %r4, i32 14
-%r379 = load i32, i32* %r378
-%r380 = zext i32 %r379 to i480
-%r381 = shl i480 %r380, 448
-%r382 = or i480 %r376, %r381
-%r383 = zext i480 %r382 to i512
-%r385 = getelementptr i32, i32* %r4, i32 15
-%r386 = load i32, i32* %r385
-%r387 = zext i32 %r386 to i512
-%r388 = shl i512 %r387, 480
-%r389 = or i512 %r383, %r388
-%r390 = zext i512 %r389 to i544
-%r391 = sub i544 %r219, %r390
-%r392 = lshr i544 %r391, 512
-%r393 = trunc i544 %r392 to i1
-br i1%r393, label %carry, label %nocarry
-nocarry:
-%r394 = trunc i544 %r391 to i512
-%r395 = trunc i512 %r394 to i32
-%r397 = getelementptr i32, i32* %r1, i32 0
-store i32 %r395, i32* %r397
-%r398 = lshr i512 %r394, 32
-%r399 = trunc i512 %r398 to i32
-%r401 = getelementptr i32, i32* %r1, i32 1
-store i32 %r399, i32* %r401
-%r402 = lshr i512 %r398, 32
-%r403 = trunc i512 %r402 to i32
-%r405 = getelementptr i32, i32* %r1, i32 2
-store i32 %r403, i32* %r405
-%r406 = lshr i512 %r402, 32
-%r407 = trunc i512 %r406 to i32
-%r409 = getelementptr i32, i32* %r1, i32 3
-store i32 %r407, i32* %r409
-%r410 = lshr i512 %r406, 32
-%r411 = trunc i512 %r410 to i32
-%r413 = getelementptr i32, i32* %r1, i32 4
-store i32 %r411, i32* %r413
-%r414 = lshr i512 %r410, 32
-%r415 = trunc i512 %r414 to i32
-%r417 = getelementptr i32, i32* %r1, i32 5
-store i32 %r415, i32* %r417
-%r418 = lshr i512 %r414, 32
-%r419 = trunc i512 %r418 to i32
-%r421 = getelementptr i32, i32* %r1, i32 6
-store i32 %r419, i32* %r421
-%r422 = lshr i512 %r418, 32
-%r423 = trunc i512 %r422 to i32
-%r425 = getelementptr i32, i32* %r1, i32 7
-store i32 %r423, i32* %r425
-%r426 = lshr i512 %r422, 32
-%r427 = trunc i512 %r426 to i32
-%r429 = getelementptr i32, i32* %r1, i32 8
-store i32 %r427, i32* %r429
-%r430 = lshr i512 %r426, 32
-%r431 = trunc i512 %r430 to i32
-%r433 = getelementptr i32, i32* %r1, i32 9
-store i32 %r431, i32* %r433
-%r434 = lshr i512 %r430, 32
-%r435 = trunc i512 %r434 to i32
-%r437 = getelementptr i32, i32* %r1, i32 10
-store i32 %r435, i32* %r437
-%r438 = lshr i512 %r434, 32
-%r439 = trunc i512 %r438 to i32
-%r441 = getelementptr i32, i32* %r1, i32 11
-store i32 %r439, i32* %r441
-%r442 = lshr i512 %r438, 32
-%r443 = trunc i512 %r442 to i32
-%r445 = getelementptr i32, i32* %r1, i32 12
-store i32 %r443, i32* %r445
-%r446 = lshr i512 %r442, 32
-%r447 = trunc i512 %r446 to i32
-%r449 = getelementptr i32, i32* %r1, i32 13
-store i32 %r447, i32* %r449
-%r450 = lshr i512 %r446, 32
-%r451 = trunc i512 %r450 to i32
-%r453 = getelementptr i32, i32* %r1, i32 14
-store i32 %r451, i32* %r453
-%r454 = lshr i512 %r450, 32
-%r455 = trunc i512 %r454 to i32
-%r457 = getelementptr i32, i32* %r1, i32 15
-store i32 %r455, i32* %r457
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = load i32, i32* %r3
-%r112 = zext i32 %r111 to i64
-%r114 = getelementptr i32, i32* %r3, i32 1
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i64
-%r117 = shl i64 %r116, 32
-%r118 = or i64 %r112, %r117
-%r119 = zext i64 %r118 to i96
-%r121 = getelementptr i32, i32* %r3, i32 2
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i96
-%r124 = shl i96 %r123, 64
-%r125 = or i96 %r119, %r124
-%r126 = zext i96 %r125 to i128
-%r128 = getelementptr i32, i32* %r3, i32 3
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i128
-%r131 = shl i128 %r130, 96
-%r132 = or i128 %r126, %r131
-%r133 = zext i128 %r132 to i160
-%r135 = getelementptr i32, i32* %r3, i32 4
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i160
-%r138 = shl i160 %r137, 128
-%r139 = or i160 %r133, %r138
-%r140 = zext i160 %r139 to i192
-%r142 = getelementptr i32, i32* %r3, i32 5
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i192
-%r145 = shl i192 %r144, 160
-%r146 = or i192 %r140, %r145
-%r147 = zext i192 %r146 to i224
-%r149 = getelementptr i32, i32* %r3, i32 6
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i224
-%r152 = shl i224 %r151, 192
-%r153 = or i224 %r147, %r152
-%r154 = zext i224 %r153 to i256
-%r156 = getelementptr i32, i32* %r3, i32 7
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i256
-%r159 = shl i256 %r158, 224
-%r160 = or i256 %r154, %r159
-%r161 = zext i256 %r160 to i288
-%r163 = getelementptr i32, i32* %r3, i32 8
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i288
-%r166 = shl i288 %r165, 256
-%r167 = or i288 %r161, %r166
-%r168 = zext i288 %r167 to i320
-%r170 = getelementptr i32, i32* %r3, i32 9
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i320
-%r173 = shl i320 %r172, 288
-%r174 = or i320 %r168, %r173
-%r175 = zext i320 %r174 to i352
-%r177 = getelementptr i32, i32* %r3, i32 10
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i352
-%r180 = shl i352 %r179, 320
-%r181 = or i352 %r175, %r180
-%r182 = zext i352 %r181 to i384
-%r184 = getelementptr i32, i32* %r3, i32 11
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i384
-%r187 = shl i384 %r186, 352
-%r188 = or i384 %r182, %r187
-%r189 = zext i384 %r188 to i416
-%r191 = getelementptr i32, i32* %r3, i32 12
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i416
-%r194 = shl i416 %r193, 384
-%r195 = or i416 %r189, %r194
-%r196 = zext i416 %r195 to i448
-%r198 = getelementptr i32, i32* %r3, i32 13
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i448
-%r201 = shl i448 %r200, 416
-%r202 = or i448 %r196, %r201
-%r203 = zext i448 %r202 to i480
-%r205 = getelementptr i32, i32* %r3, i32 14
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i480
-%r208 = shl i480 %r207, 448
-%r209 = or i480 %r203, %r208
-%r210 = zext i480 %r209 to i512
-%r212 = getelementptr i32, i32* %r3, i32 15
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i512
-%r215 = shl i512 %r214, 480
-%r216 = or i512 %r210, %r215
-%r217 = add i512 %r110, %r216
-%r218 = load i32, i32* %r4
-%r219 = zext i32 %r218 to i64
-%r221 = getelementptr i32, i32* %r4, i32 1
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i64
-%r224 = shl i64 %r223, 32
-%r225 = or i64 %r219, %r224
-%r226 = zext i64 %r225 to i96
-%r228 = getelementptr i32, i32* %r4, i32 2
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i96
-%r231 = shl i96 %r230, 64
-%r232 = or i96 %r226, %r231
-%r233 = zext i96 %r232 to i128
-%r235 = getelementptr i32, i32* %r4, i32 3
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i128
-%r238 = shl i128 %r237, 96
-%r239 = or i128 %r233, %r238
-%r240 = zext i128 %r239 to i160
-%r242 = getelementptr i32, i32* %r4, i32 4
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i160
-%r245 = shl i160 %r244, 128
-%r246 = or i160 %r240, %r245
-%r247 = zext i160 %r246 to i192
-%r249 = getelementptr i32, i32* %r4, i32 5
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i192
-%r252 = shl i192 %r251, 160
-%r253 = or i192 %r247, %r252
-%r254 = zext i192 %r253 to i224
-%r256 = getelementptr i32, i32* %r4, i32 6
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i224
-%r259 = shl i224 %r258, 192
-%r260 = or i224 %r254, %r259
-%r261 = zext i224 %r260 to i256
-%r263 = getelementptr i32, i32* %r4, i32 7
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i256
-%r266 = shl i256 %r265, 224
-%r267 = or i256 %r261, %r266
-%r268 = zext i256 %r267 to i288
-%r270 = getelementptr i32, i32* %r4, i32 8
-%r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i288
-%r273 = shl i288 %r272, 256
-%r274 = or i288 %r268, %r273
-%r275 = zext i288 %r274 to i320
-%r277 = getelementptr i32, i32* %r4, i32 9
-%r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i320
-%r280 = shl i320 %r279, 288
-%r281 = or i320 %r275, %r280
-%r282 = zext i320 %r281 to i352
-%r284 = getelementptr i32, i32* %r4, i32 10
-%r285 = load i32, i32* %r284
-%r286 = zext i32 %r285 to i352
-%r287 = shl i352 %r286, 320
-%r288 = or i352 %r282, %r287
-%r289 = zext i352 %r288 to i384
-%r291 = getelementptr i32, i32* %r4, i32 11
-%r292 = load i32, i32* %r291
-%r293 = zext i32 %r292 to i384
-%r294 = shl i384 %r293, 352
-%r295 = or i384 %r289, %r294
-%r296 = zext i384 %r295 to i416
-%r298 = getelementptr i32, i32* %r4, i32 12
-%r299 = load i32, i32* %r298
-%r300 = zext i32 %r299 to i416
-%r301 = shl i416 %r300, 384
-%r302 = or i416 %r296, %r301
-%r303 = zext i416 %r302 to i448
-%r305 = getelementptr i32, i32* %r4, i32 13
-%r306 = load i32, i32* %r305
-%r307 = zext i32 %r306 to i448
-%r308 = shl i448 %r307, 416
-%r309 = or i448 %r303, %r308
-%r310 = zext i448 %r309 to i480
-%r312 = getelementptr i32, i32* %r4, i32 14
-%r313 = load i32, i32* %r312
-%r314 = zext i32 %r313 to i480
-%r315 = shl i480 %r314, 448
-%r316 = or i480 %r310, %r315
-%r317 = zext i480 %r316 to i512
-%r319 = getelementptr i32, i32* %r4, i32 15
-%r320 = load i32, i32* %r319
-%r321 = zext i32 %r320 to i512
-%r322 = shl i512 %r321, 480
-%r323 = or i512 %r317, %r322
-%r324 = sub i512 %r217, %r323
-%r325 = lshr i512 %r324, 511
-%r326 = trunc i512 %r325 to i1
-%r327 = select i1 %r326, i512 %r217, i512 %r324
-%r328 = trunc i512 %r327 to i32
-%r330 = getelementptr i32, i32* %r1, i32 0
-store i32 %r328, i32* %r330
-%r331 = lshr i512 %r327, 32
-%r332 = trunc i512 %r331 to i32
-%r334 = getelementptr i32, i32* %r1, i32 1
-store i32 %r332, i32* %r334
-%r335 = lshr i512 %r331, 32
-%r336 = trunc i512 %r335 to i32
-%r338 = getelementptr i32, i32* %r1, i32 2
-store i32 %r336, i32* %r338
-%r339 = lshr i512 %r335, 32
-%r340 = trunc i512 %r339 to i32
-%r342 = getelementptr i32, i32* %r1, i32 3
-store i32 %r340, i32* %r342
-%r343 = lshr i512 %r339, 32
-%r344 = trunc i512 %r343 to i32
-%r346 = getelementptr i32, i32* %r1, i32 4
-store i32 %r344, i32* %r346
-%r347 = lshr i512 %r343, 32
-%r348 = trunc i512 %r347 to i32
-%r350 = getelementptr i32, i32* %r1, i32 5
-store i32 %r348, i32* %r350
-%r351 = lshr i512 %r347, 32
-%r352 = trunc i512 %r351 to i32
-%r354 = getelementptr i32, i32* %r1, i32 6
-store i32 %r352, i32* %r354
-%r355 = lshr i512 %r351, 32
-%r356 = trunc i512 %r355 to i32
-%r358 = getelementptr i32, i32* %r1, i32 7
-store i32 %r356, i32* %r358
-%r359 = lshr i512 %r355, 32
-%r360 = trunc i512 %r359 to i32
-%r362 = getelementptr i32, i32* %r1, i32 8
-store i32 %r360, i32* %r362
-%r363 = lshr i512 %r359, 32
-%r364 = trunc i512 %r363 to i32
-%r366 = getelementptr i32, i32* %r1, i32 9
-store i32 %r364, i32* %r366
-%r367 = lshr i512 %r363, 32
-%r368 = trunc i512 %r367 to i32
-%r370 = getelementptr i32, i32* %r1, i32 10
-store i32 %r368, i32* %r370
-%r371 = lshr i512 %r367, 32
-%r372 = trunc i512 %r371 to i32
-%r374 = getelementptr i32, i32* %r1, i32 11
-store i32 %r372, i32* %r374
-%r375 = lshr i512 %r371, 32
-%r376 = trunc i512 %r375 to i32
-%r378 = getelementptr i32, i32* %r1, i32 12
-store i32 %r376, i32* %r378
-%r379 = lshr i512 %r375, 32
-%r380 = trunc i512 %r379 to i32
-%r382 = getelementptr i32, i32* %r1, i32 13
-store i32 %r380, i32* %r382
-%r383 = lshr i512 %r379, 32
-%r384 = trunc i512 %r383 to i32
-%r386 = getelementptr i32, i32* %r1, i32 14
-store i32 %r384, i32* %r386
-%r387 = lshr i512 %r383, 32
-%r388 = trunc i512 %r387 to i32
-%r390 = getelementptr i32, i32* %r1, i32 15
-store i32 %r388, i32* %r390
-ret void
-}
-define void @mcl_fp_sub16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = load i32, i32* %r3
-%r112 = zext i32 %r111 to i64
-%r114 = getelementptr i32, i32* %r3, i32 1
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i64
-%r117 = shl i64 %r116, 32
-%r118 = or i64 %r112, %r117
-%r119 = zext i64 %r118 to i96
-%r121 = getelementptr i32, i32* %r3, i32 2
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i96
-%r124 = shl i96 %r123, 64
-%r125 = or i96 %r119, %r124
-%r126 = zext i96 %r125 to i128
-%r128 = getelementptr i32, i32* %r3, i32 3
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i128
-%r131 = shl i128 %r130, 96
-%r132 = or i128 %r126, %r131
-%r133 = zext i128 %r132 to i160
-%r135 = getelementptr i32, i32* %r3, i32 4
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i160
-%r138 = shl i160 %r137, 128
-%r139 = or i160 %r133, %r138
-%r140 = zext i160 %r139 to i192
-%r142 = getelementptr i32, i32* %r3, i32 5
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i192
-%r145 = shl i192 %r144, 160
-%r146 = or i192 %r140, %r145
-%r147 = zext i192 %r146 to i224
-%r149 = getelementptr i32, i32* %r3, i32 6
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i224
-%r152 = shl i224 %r151, 192
-%r153 = or i224 %r147, %r152
-%r154 = zext i224 %r153 to i256
-%r156 = getelementptr i32, i32* %r3, i32 7
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i256
-%r159 = shl i256 %r158, 224
-%r160 = or i256 %r154, %r159
-%r161 = zext i256 %r160 to i288
-%r163 = getelementptr i32, i32* %r3, i32 8
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i288
-%r166 = shl i288 %r165, 256
-%r167 = or i288 %r161, %r166
-%r168 = zext i288 %r167 to i320
-%r170 = getelementptr i32, i32* %r3, i32 9
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i320
-%r173 = shl i320 %r172, 288
-%r174 = or i320 %r168, %r173
-%r175 = zext i320 %r174 to i352
-%r177 = getelementptr i32, i32* %r3, i32 10
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i352
-%r180 = shl i352 %r179, 320
-%r181 = or i352 %r175, %r180
-%r182 = zext i352 %r181 to i384
-%r184 = getelementptr i32, i32* %r3, i32 11
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i384
-%r187 = shl i384 %r186, 352
-%r188 = or i384 %r182, %r187
-%r189 = zext i384 %r188 to i416
-%r191 = getelementptr i32, i32* %r3, i32 12
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i416
-%r194 = shl i416 %r193, 384
-%r195 = or i416 %r189, %r194
-%r196 = zext i416 %r195 to i448
-%r198 = getelementptr i32, i32* %r3, i32 13
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i448
-%r201 = shl i448 %r200, 416
-%r202 = or i448 %r196, %r201
-%r203 = zext i448 %r202 to i480
-%r205 = getelementptr i32, i32* %r3, i32 14
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i480
-%r208 = shl i480 %r207, 448
-%r209 = or i480 %r203, %r208
-%r210 = zext i480 %r209 to i512
-%r212 = getelementptr i32, i32* %r3, i32 15
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i512
-%r215 = shl i512 %r214, 480
-%r216 = or i512 %r210, %r215
-%r217 = zext i512 %r110 to i544
-%r218 = zext i512 %r216 to i544
-%r219 = sub i544 %r217, %r218
-%r220 = trunc i544 %r219 to i512
-%r221 = lshr i544 %r219, 512
-%r222 = trunc i544 %r221 to i1
-%r223 = trunc i512 %r220 to i32
-%r225 = getelementptr i32, i32* %r1, i32 0
-store i32 %r223, i32* %r225
-%r226 = lshr i512 %r220, 32
-%r227 = trunc i512 %r226 to i32
-%r229 = getelementptr i32, i32* %r1, i32 1
-store i32 %r227, i32* %r229
-%r230 = lshr i512 %r226, 32
-%r231 = trunc i512 %r230 to i32
-%r233 = getelementptr i32, i32* %r1, i32 2
-store i32 %r231, i32* %r233
-%r234 = lshr i512 %r230, 32
-%r235 = trunc i512 %r234 to i32
-%r237 = getelementptr i32, i32* %r1, i32 3
-store i32 %r235, i32* %r237
-%r238 = lshr i512 %r234, 32
-%r239 = trunc i512 %r238 to i32
-%r241 = getelementptr i32, i32* %r1, i32 4
-store i32 %r239, i32* %r241
-%r242 = lshr i512 %r238, 32
-%r243 = trunc i512 %r242 to i32
-%r245 = getelementptr i32, i32* %r1, i32 5
-store i32 %r243, i32* %r245
-%r246 = lshr i512 %r242, 32
-%r247 = trunc i512 %r246 to i32
-%r249 = getelementptr i32, i32* %r1, i32 6
-store i32 %r247, i32* %r249
-%r250 = lshr i512 %r246, 32
-%r251 = trunc i512 %r250 to i32
-%r253 = getelementptr i32, i32* %r1, i32 7
-store i32 %r251, i32* %r253
-%r254 = lshr i512 %r250, 32
-%r255 = trunc i512 %r254 to i32
-%r257 = getelementptr i32, i32* %r1, i32 8
-store i32 %r255, i32* %r257
-%r258 = lshr i512 %r254, 32
-%r259 = trunc i512 %r258 to i32
-%r261 = getelementptr i32, i32* %r1, i32 9
-store i32 %r259, i32* %r261
-%r262 = lshr i512 %r258, 32
-%r263 = trunc i512 %r262 to i32
-%r265 = getelementptr i32, i32* %r1, i32 10
-store i32 %r263, i32* %r265
-%r266 = lshr i512 %r262, 32
-%r267 = trunc i512 %r266 to i32
-%r269 = getelementptr i32, i32* %r1, i32 11
-store i32 %r267, i32* %r269
-%r270 = lshr i512 %r266, 32
-%r271 = trunc i512 %r270 to i32
-%r273 = getelementptr i32, i32* %r1, i32 12
-store i32 %r271, i32* %r273
-%r274 = lshr i512 %r270, 32
-%r275 = trunc i512 %r274 to i32
-%r277 = getelementptr i32, i32* %r1, i32 13
-store i32 %r275, i32* %r277
-%r278 = lshr i512 %r274, 32
-%r279 = trunc i512 %r278 to i32
-%r281 = getelementptr i32, i32* %r1, i32 14
-store i32 %r279, i32* %r281
-%r282 = lshr i512 %r278, 32
-%r283 = trunc i512 %r282 to i32
-%r285 = getelementptr i32, i32* %r1, i32 15
-store i32 %r283, i32* %r285
-br i1%r222, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r286 = load i32, i32* %r4
-%r287 = zext i32 %r286 to i64
-%r289 = getelementptr i32, i32* %r4, i32 1
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i64
-%r292 = shl i64 %r291, 32
-%r293 = or i64 %r287, %r292
-%r294 = zext i64 %r293 to i96
-%r296 = getelementptr i32, i32* %r4, i32 2
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i96
-%r299 = shl i96 %r298, 64
-%r300 = or i96 %r294, %r299
-%r301 = zext i96 %r300 to i128
-%r303 = getelementptr i32, i32* %r4, i32 3
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i128
-%r306 = shl i128 %r305, 96
-%r307 = or i128 %r301, %r306
-%r308 = zext i128 %r307 to i160
-%r310 = getelementptr i32, i32* %r4, i32 4
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i160
-%r313 = shl i160 %r312, 128
-%r314 = or i160 %r308, %r313
-%r315 = zext i160 %r314 to i192
-%r317 = getelementptr i32, i32* %r4, i32 5
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i192
-%r320 = shl i192 %r319, 160
-%r321 = or i192 %r315, %r320
-%r322 = zext i192 %r321 to i224
-%r324 = getelementptr i32, i32* %r4, i32 6
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i224
-%r327 = shl i224 %r326, 192
-%r328 = or i224 %r322, %r327
-%r329 = zext i224 %r328 to i256
-%r331 = getelementptr i32, i32* %r4, i32 7
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i256
-%r334 = shl i256 %r333, 224
-%r335 = or i256 %r329, %r334
-%r336 = zext i256 %r335 to i288
-%r338 = getelementptr i32, i32* %r4, i32 8
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i288
-%r341 = shl i288 %r340, 256
-%r342 = or i288 %r336, %r341
-%r343 = zext i288 %r342 to i320
-%r345 = getelementptr i32, i32* %r4, i32 9
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i320
-%r348 = shl i320 %r347, 288
-%r349 = or i320 %r343, %r348
-%r350 = zext i320 %r349 to i352
-%r352 = getelementptr i32, i32* %r4, i32 10
-%r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i352
-%r355 = shl i352 %r354, 320
-%r356 = or i352 %r350, %r355
-%r357 = zext i352 %r356 to i384
-%r359 = getelementptr i32, i32* %r4, i32 11
-%r360 = load i32, i32* %r359
-%r361 = zext i32 %r360 to i384
-%r362 = shl i384 %r361, 352
-%r363 = or i384 %r357, %r362
-%r364 = zext i384 %r363 to i416
-%r366 = getelementptr i32, i32* %r4, i32 12
-%r367 = load i32, i32* %r366
-%r368 = zext i32 %r367 to i416
-%r369 = shl i416 %r368, 384
-%r370 = or i416 %r364, %r369
-%r371 = zext i416 %r370 to i448
-%r373 = getelementptr i32, i32* %r4, i32 13
-%r374 = load i32, i32* %r373
-%r375 = zext i32 %r374 to i448
-%r376 = shl i448 %r375, 416
-%r377 = or i448 %r371, %r376
-%r378 = zext i448 %r377 to i480
-%r380 = getelementptr i32, i32* %r4, i32 14
-%r381 = load i32, i32* %r380
-%r382 = zext i32 %r381 to i480
-%r383 = shl i480 %r382, 448
-%r384 = or i480 %r378, %r383
-%r385 = zext i480 %r384 to i512
-%r387 = getelementptr i32, i32* %r4, i32 15
-%r388 = load i32, i32* %r387
-%r389 = zext i32 %r388 to i512
-%r390 = shl i512 %r389, 480
-%r391 = or i512 %r385, %r390
-%r392 = add i512 %r220, %r391
-%r393 = trunc i512 %r392 to i32
-%r395 = getelementptr i32, i32* %r1, i32 0
-store i32 %r393, i32* %r395
-%r396 = lshr i512 %r392, 32
-%r397 = trunc i512 %r396 to i32
-%r399 = getelementptr i32, i32* %r1, i32 1
-store i32 %r397, i32* %r399
-%r400 = lshr i512 %r396, 32
-%r401 = trunc i512 %r400 to i32
-%r403 = getelementptr i32, i32* %r1, i32 2
-store i32 %r401, i32* %r403
-%r404 = lshr i512 %r400, 32
-%r405 = trunc i512 %r404 to i32
-%r407 = getelementptr i32, i32* %r1, i32 3
-store i32 %r405, i32* %r407
-%r408 = lshr i512 %r404, 32
-%r409 = trunc i512 %r408 to i32
-%r411 = getelementptr i32, i32* %r1, i32 4
-store i32 %r409, i32* %r411
-%r412 = lshr i512 %r408, 32
-%r413 = trunc i512 %r412 to i32
-%r415 = getelementptr i32, i32* %r1, i32 5
-store i32 %r413, i32* %r415
-%r416 = lshr i512 %r412, 32
-%r417 = trunc i512 %r416 to i32
-%r419 = getelementptr i32, i32* %r1, i32 6
-store i32 %r417, i32* %r419
-%r420 = lshr i512 %r416, 32
-%r421 = trunc i512 %r420 to i32
-%r423 = getelementptr i32, i32* %r1, i32 7
-store i32 %r421, i32* %r423
-%r424 = lshr i512 %r420, 32
-%r425 = trunc i512 %r424 to i32
-%r427 = getelementptr i32, i32* %r1, i32 8
-store i32 %r425, i32* %r427
-%r428 = lshr i512 %r424, 32
-%r429 = trunc i512 %r428 to i32
-%r431 = getelementptr i32, i32* %r1, i32 9
-store i32 %r429, i32* %r431
-%r432 = lshr i512 %r428, 32
-%r433 = trunc i512 %r432 to i32
-%r435 = getelementptr i32, i32* %r1, i32 10
-store i32 %r433, i32* %r435
-%r436 = lshr i512 %r432, 32
-%r437 = trunc i512 %r436 to i32
-%r439 = getelementptr i32, i32* %r1, i32 11
-store i32 %r437, i32* %r439
-%r440 = lshr i512 %r436, 32
-%r441 = trunc i512 %r440 to i32
-%r443 = getelementptr i32, i32* %r1, i32 12
-store i32 %r441, i32* %r443
-%r444 = lshr i512 %r440, 32
-%r445 = trunc i512 %r444 to i32
-%r447 = getelementptr i32, i32* %r1, i32 13
-store i32 %r445, i32* %r447
-%r448 = lshr i512 %r444, 32
-%r449 = trunc i512 %r448 to i32
-%r451 = getelementptr i32, i32* %r1, i32 14
-store i32 %r449, i32* %r451
-%r452 = lshr i512 %r448, 32
-%r453 = trunc i512 %r452 to i32
-%r455 = getelementptr i32, i32* %r1, i32 15
-store i32 %r453, i32* %r455
-ret void
-}
-define void @mcl_fp_subNF16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = load i32, i32* %r3
-%r112 = zext i32 %r111 to i64
-%r114 = getelementptr i32, i32* %r3, i32 1
-%r115 = load i32, i32* %r114
-%r116 = zext i32 %r115 to i64
-%r117 = shl i64 %r116, 32
-%r118 = or i64 %r112, %r117
-%r119 = zext i64 %r118 to i96
-%r121 = getelementptr i32, i32* %r3, i32 2
-%r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i96
-%r124 = shl i96 %r123, 64
-%r125 = or i96 %r119, %r124
-%r126 = zext i96 %r125 to i128
-%r128 = getelementptr i32, i32* %r3, i32 3
-%r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i128
-%r131 = shl i128 %r130, 96
-%r132 = or i128 %r126, %r131
-%r133 = zext i128 %r132 to i160
-%r135 = getelementptr i32, i32* %r3, i32 4
-%r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i160
-%r138 = shl i160 %r137, 128
-%r139 = or i160 %r133, %r138
-%r140 = zext i160 %r139 to i192
-%r142 = getelementptr i32, i32* %r3, i32 5
-%r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i192
-%r145 = shl i192 %r144, 160
-%r146 = or i192 %r140, %r145
-%r147 = zext i192 %r146 to i224
-%r149 = getelementptr i32, i32* %r3, i32 6
-%r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i224
-%r152 = shl i224 %r151, 192
-%r153 = or i224 %r147, %r152
-%r154 = zext i224 %r153 to i256
-%r156 = getelementptr i32, i32* %r3, i32 7
-%r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i256
-%r159 = shl i256 %r158, 224
-%r160 = or i256 %r154, %r159
-%r161 = zext i256 %r160 to i288
-%r163 = getelementptr i32, i32* %r3, i32 8
-%r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i288
-%r166 = shl i288 %r165, 256
-%r167 = or i288 %r161, %r166
-%r168 = zext i288 %r167 to i320
-%r170 = getelementptr i32, i32* %r3, i32 9
-%r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i320
-%r173 = shl i320 %r172, 288
-%r174 = or i320 %r168, %r173
-%r175 = zext i320 %r174 to i352
-%r177 = getelementptr i32, i32* %r3, i32 10
-%r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i352
-%r180 = shl i352 %r179, 320
-%r181 = or i352 %r175, %r180
-%r182 = zext i352 %r181 to i384
-%r184 = getelementptr i32, i32* %r3, i32 11
-%r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i384
-%r187 = shl i384 %r186, 352
-%r188 = or i384 %r182, %r187
-%r189 = zext i384 %r188 to i416
-%r191 = getelementptr i32, i32* %r3, i32 12
-%r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i416
-%r194 = shl i416 %r193, 384
-%r195 = or i416 %r189, %r194
-%r196 = zext i416 %r195 to i448
-%r198 = getelementptr i32, i32* %r3, i32 13
-%r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i448
-%r201 = shl i448 %r200, 416
-%r202 = or i448 %r196, %r201
-%r203 = zext i448 %r202 to i480
-%r205 = getelementptr i32, i32* %r3, i32 14
-%r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i480
-%r208 = shl i480 %r207, 448
-%r209 = or i480 %r203, %r208
-%r210 = zext i480 %r209 to i512
-%r212 = getelementptr i32, i32* %r3, i32 15
-%r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i512
-%r215 = shl i512 %r214, 480
-%r216 = or i512 %r210, %r215
-%r217 = sub i512 %r110, %r216
-%r218 = lshr i512 %r217, 511
-%r219 = trunc i512 %r218 to i1
-%r220 = load i32, i32* %r4
-%r221 = zext i32 %r220 to i64
-%r223 = getelementptr i32, i32* %r4, i32 1
-%r224 = load i32, i32* %r223
-%r225 = zext i32 %r224 to i64
-%r226 = shl i64 %r225, 32
-%r227 = or i64 %r221, %r226
-%r228 = zext i64 %r227 to i96
-%r230 = getelementptr i32, i32* %r4, i32 2
-%r231 = load i32, i32* %r230
-%r232 = zext i32 %r231 to i96
-%r233 = shl i96 %r232, 64
-%r234 = or i96 %r228, %r233
-%r235 = zext i96 %r234 to i128
-%r237 = getelementptr i32, i32* %r4, i32 3
-%r238 = load i32, i32* %r237
-%r239 = zext i32 %r238 to i128
-%r240 = shl i128 %r239, 96
-%r241 = or i128 %r235, %r240
-%r242 = zext i128 %r241 to i160
-%r244 = getelementptr i32, i32* %r4, i32 4
-%r245 = load i32, i32* %r244
-%r246 = zext i32 %r245 to i160
-%r247 = shl i160 %r246, 128
-%r248 = or i160 %r242, %r247
-%r249 = zext i160 %r248 to i192
-%r251 = getelementptr i32, i32* %r4, i32 5
-%r252 = load i32, i32* %r251
-%r253 = zext i32 %r252 to i192
-%r254 = shl i192 %r253, 160
-%r255 = or i192 %r249, %r254
-%r256 = zext i192 %r255 to i224
-%r258 = getelementptr i32, i32* %r4, i32 6
-%r259 = load i32, i32* %r258
-%r260 = zext i32 %r259 to i224
-%r261 = shl i224 %r260, 192
-%r262 = or i224 %r256, %r261
-%r263 = zext i224 %r262 to i256
-%r265 = getelementptr i32, i32* %r4, i32 7
-%r266 = load i32, i32* %r265
-%r267 = zext i32 %r266 to i256
-%r268 = shl i256 %r267, 224
-%r269 = or i256 %r263, %r268
-%r270 = zext i256 %r269 to i288
-%r272 = getelementptr i32, i32* %r4, i32 8
-%r273 = load i32, i32* %r272
-%r274 = zext i32 %r273 to i288
-%r275 = shl i288 %r274, 256
-%r276 = or i288 %r270, %r275
-%r277 = zext i288 %r276 to i320
-%r279 = getelementptr i32, i32* %r4, i32 9
-%r280 = load i32, i32* %r279
-%r281 = zext i32 %r280 to i320
-%r282 = shl i320 %r281, 288
-%r283 = or i320 %r277, %r282
-%r284 = zext i320 %r283 to i352
-%r286 = getelementptr i32, i32* %r4, i32 10
-%r287 = load i32, i32* %r286
-%r288 = zext i32 %r287 to i352
-%r289 = shl i352 %r288, 320
-%r290 = or i352 %r284, %r289
-%r291 = zext i352 %r290 to i384
-%r293 = getelementptr i32, i32* %r4, i32 11
-%r294 = load i32, i32* %r293
-%r295 = zext i32 %r294 to i384
-%r296 = shl i384 %r295, 352
-%r297 = or i384 %r291, %r296
-%r298 = zext i384 %r297 to i416
-%r300 = getelementptr i32, i32* %r4, i32 12
-%r301 = load i32, i32* %r300
-%r302 = zext i32 %r301 to i416
-%r303 = shl i416 %r302, 384
-%r304 = or i416 %r298, %r303
-%r305 = zext i416 %r304 to i448
-%r307 = getelementptr i32, i32* %r4, i32 13
-%r308 = load i32, i32* %r307
-%r309 = zext i32 %r308 to i448
-%r310 = shl i448 %r309, 416
-%r311 = or i448 %r305, %r310
-%r312 = zext i448 %r311 to i480
-%r314 = getelementptr i32, i32* %r4, i32 14
-%r315 = load i32, i32* %r314
-%r316 = zext i32 %r315 to i480
-%r317 = shl i480 %r316, 448
-%r318 = or i480 %r312, %r317
-%r319 = zext i480 %r318 to i512
-%r321 = getelementptr i32, i32* %r4, i32 15
-%r322 = load i32, i32* %r321
-%r323 = zext i32 %r322 to i512
-%r324 = shl i512 %r323, 480
-%r325 = or i512 %r319, %r324
-%r327 = select i1 %r219, i512 %r325, i512 0
-%r328 = add i512 %r217, %r327
-%r329 = trunc i512 %r328 to i32
-%r331 = getelementptr i32, i32* %r1, i32 0
-store i32 %r329, i32* %r331
-%r332 = lshr i512 %r328, 32
-%r333 = trunc i512 %r332 to i32
-%r335 = getelementptr i32, i32* %r1, i32 1
-store i32 %r333, i32* %r335
-%r336 = lshr i512 %r332, 32
-%r337 = trunc i512 %r336 to i32
-%r339 = getelementptr i32, i32* %r1, i32 2
-store i32 %r337, i32* %r339
-%r340 = lshr i512 %r336, 32
-%r341 = trunc i512 %r340 to i32
-%r343 = getelementptr i32, i32* %r1, i32 3
-store i32 %r341, i32* %r343
-%r344 = lshr i512 %r340, 32
-%r345 = trunc i512 %r344 to i32
-%r347 = getelementptr i32, i32* %r1, i32 4
-store i32 %r345, i32* %r347
-%r348 = lshr i512 %r344, 32
-%r349 = trunc i512 %r348 to i32
-%r351 = getelementptr i32, i32* %r1, i32 5
-store i32 %r349, i32* %r351
-%r352 = lshr i512 %r348, 32
-%r353 = trunc i512 %r352 to i32
-%r355 = getelementptr i32, i32* %r1, i32 6
-store i32 %r353, i32* %r355
-%r356 = lshr i512 %r352, 32
-%r357 = trunc i512 %r356 to i32
-%r359 = getelementptr i32, i32* %r1, i32 7
-store i32 %r357, i32* %r359
-%r360 = lshr i512 %r356, 32
-%r361 = trunc i512 %r360 to i32
-%r363 = getelementptr i32, i32* %r1, i32 8
-store i32 %r361, i32* %r363
-%r364 = lshr i512 %r360, 32
-%r365 = trunc i512 %r364 to i32
-%r367 = getelementptr i32, i32* %r1, i32 9
-store i32 %r365, i32* %r367
-%r368 = lshr i512 %r364, 32
-%r369 = trunc i512 %r368 to i32
-%r371 = getelementptr i32, i32* %r1, i32 10
-store i32 %r369, i32* %r371
-%r372 = lshr i512 %r368, 32
-%r373 = trunc i512 %r372 to i32
-%r375 = getelementptr i32, i32* %r1, i32 11
-store i32 %r373, i32* %r375
-%r376 = lshr i512 %r372, 32
-%r377 = trunc i512 %r376 to i32
-%r379 = getelementptr i32, i32* %r1, i32 12
-store i32 %r377, i32* %r379
-%r380 = lshr i512 %r376, 32
-%r381 = trunc i512 %r380 to i32
-%r383 = getelementptr i32, i32* %r1, i32 13
-store i32 %r381, i32* %r383
-%r384 = lshr i512 %r380, 32
-%r385 = trunc i512 %r384 to i32
-%r387 = getelementptr i32, i32* %r1, i32 14
-store i32 %r385, i32* %r387
-%r388 = lshr i512 %r384, 32
-%r389 = trunc i512 %r388 to i32
-%r391 = getelementptr i32, i32* %r1, i32 15
-store i32 %r389, i32* %r391
-ret void
-}
-define void @mcl_fpDbl_add16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
-{
-%r5 = load i32, i32* %r2
-%r6 = zext i32 %r5 to i64
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = zext i32 %r9 to i64
-%r11 = shl i64 %r10, 32
-%r12 = or i64 %r6, %r11
-%r13 = zext i64 %r12 to i96
-%r15 = getelementptr i32, i32* %r2, i32 2
-%r16 = load i32, i32* %r15
-%r17 = zext i32 %r16 to i96
-%r18 = shl i96 %r17, 64
-%r19 = or i96 %r13, %r18
-%r20 = zext i96 %r19 to i128
-%r22 = getelementptr i32, i32* %r2, i32 3
-%r23 = load i32, i32* %r22
-%r24 = zext i32 %r23 to i128
-%r25 = shl i128 %r24, 96
-%r26 = or i128 %r20, %r25
-%r27 = zext i128 %r26 to i160
-%r29 = getelementptr i32, i32* %r2, i32 4
-%r30 = load i32, i32* %r29
-%r31 = zext i32 %r30 to i160
-%r32 = shl i160 %r31, 128
-%r33 = or i160 %r27, %r32
-%r34 = zext i160 %r33 to i192
-%r36 = getelementptr i32, i32* %r2, i32 5
-%r37 = load i32, i32* %r36
-%r38 = zext i32 %r37 to i192
-%r39 = shl i192 %r38, 160
-%r40 = or i192 %r34, %r39
-%r41 = zext i192 %r40 to i224
-%r43 = getelementptr i32, i32* %r2, i32 6
-%r44 = load i32, i32* %r43
-%r45 = zext i32 %r44 to i224
-%r46 = shl i224 %r45, 192
-%r47 = or i224 %r41, %r46
-%r48 = zext i224 %r47 to i256
-%r50 = getelementptr i32, i32* %r2, i32 7
-%r51 = load i32, i32* %r50
-%r52 = zext i32 %r51 to i256
-%r53 = shl i256 %r52, 224
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i288
-%r57 = getelementptr i32, i32* %r2, i32 8
-%r58 = load i32, i32* %r57
-%r59 = zext i32 %r58 to i288
-%r60 = shl i288 %r59, 256
-%r61 = or i288 %r55, %r60
-%r62 = zext i288 %r61 to i320
-%r64 = getelementptr i32, i32* %r2, i32 9
-%r65 = load i32, i32* %r64
-%r66 = zext i32 %r65 to i320
-%r67 = shl i320 %r66, 288
-%r68 = or i320 %r62, %r67
-%r69 = zext i320 %r68 to i352
-%r71 = getelementptr i32, i32* %r2, i32 10
-%r72 = load i32, i32* %r71
-%r73 = zext i32 %r72 to i352
-%r74 = shl i352 %r73, 320
-%r75 = or i352 %r69, %r74
-%r76 = zext i352 %r75 to i384
-%r78 = getelementptr i32, i32* %r2, i32 11
-%r79 = load i32, i32* %r78
-%r80 = zext i32 %r79 to i384
-%r81 = shl i384 %r80, 352
-%r82 = or i384 %r76, %r81
-%r83 = zext i384 %r82 to i416
-%r85 = getelementptr i32, i32* %r2, i32 12
-%r86 = load i32, i32* %r85
-%r87 = zext i32 %r86 to i416
-%r88 = shl i416 %r87, 384
-%r89 = or i416 %r83, %r88
-%r90 = zext i416 %r89 to i448
-%r92 = getelementptr i32, i32* %r2, i32 13
-%r93 = load i32, i32* %r92
-%r94 = zext i32 %r93 to i448
-%r95 = shl i448 %r94, 416
-%r96 = or i448 %r90, %r95
-%r97 = zext i448 %r96 to i480
-%r99 = getelementptr i32, i32* %r2, i32 14
-%r100 = load i32, i32* %r99
-%r101 = zext i32 %r100 to i480
-%r102 = shl i480 %r101, 448
-%r103 = or i480 %r97, %r102
-%r104 = zext i480 %r103 to i512
-%r106 = getelementptr i32, i32* %r2, i32 15
-%r107 = load i32, i32* %r106
-%r108 = zext i32 %r107 to i512
-%r109 = shl i512 %r108, 480
-%r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r120 = getelementptr i32, i32* %r2, i32 17
-%r121 = load i32, i32* %r120
-%r122 = zext i32 %r121 to i576
-%r123 = shl i576 %r122, 544
-%r124 = or i576 %r118, %r123
-%r125 = zext i576 %r124 to i608
-%r127 = getelementptr i32, i32* %r2, i32 18
-%r128 = load i32, i32* %r127
-%r129 = zext i32 %r128 to i608
-%r130 = shl i608 %r129, 576
-%r131 = or i608 %r125, %r130
-%r132 = zext i608 %r131 to i640
-%r134 = getelementptr i32, i32* %r2, i32 19
-%r135 = load i32, i32* %r134
-%r136 = zext i32 %r135 to i640
-%r137 = shl i640 %r136, 608
-%r138 = or i640 %r132, %r137
-%r139 = zext i640 %r138 to i672
-%r141 = getelementptr i32, i32* %r2, i32 20
-%r142 = load i32, i32* %r141
-%r143 = zext i32 %r142 to i672
-%r144 = shl i672 %r143, 640
-%r145 = or i672 %r139, %r144
-%r146 = zext i672 %r145 to i704
-%r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = zext i704 %r152 to i736
-%r155 = getelementptr i32, i32* %r2, i32 22
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i736
-%r158 = shl i736 %r157, 704
-%r159 = or i736 %r153, %r158
-%r160 = zext i736 %r159 to i768
-%r162 = getelementptr i32, i32* %r2, i32 23
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i768
-%r165 = shl i768 %r164, 736
-%r166 = or i768 %r160, %r165
-%r167 = zext i768 %r166 to i800
-%r169 = getelementptr i32, i32* %r2, i32 24
-%r170 = load i32, i32* %r169
-%r171 = zext i32 %r170 to i800
-%r172 = shl i800 %r171, 768
-%r173 = or i800 %r167, %r172
-%r174 = zext i800 %r173 to i832
-%r176 = getelementptr i32, i32* %r2, i32 25
-%r177 = load i32, i32* %r176
-%r178 = zext i32 %r177 to i832
-%r179 = shl i832 %r178, 800
-%r180 = or i832 %r174, %r179
-%r181 = zext i832 %r180 to i864
-%r183 = getelementptr i32, i32* %r2, i32 26
-%r184 = load i32, i32* %r183
-%r185 = zext i32 %r184 to i864
-%r186 = shl i864 %r185, 832
-%r187 = or i864 %r181, %r186
-%r188 = zext i864 %r187 to i896
-%r190 = getelementptr i32, i32* %r2, i32 27
-%r191 = load i32, i32* %r190
-%r192 = zext i32 %r191 to i896
-%r193 = shl i896 %r192, 864
-%r194 = or i896 %r188, %r193
-%r195 = zext i896 %r194 to i928
-%r197 = getelementptr i32, i32* %r2, i32 28
-%r198 = load i32, i32* %r197
-%r199 = zext i32 %r198 to i928
-%r200 = shl i928 %r199, 896
-%r201 = or i928 %r195, %r200
-%r202 = zext i928 %r201 to i960
-%r204 = getelementptr i32, i32* %r2, i32 29
-%r205 = load i32, i32* %r204
-%r206 = zext i32 %r205 to i960
-%r207 = shl i960 %r206, 928
-%r208 = or i960 %r202, %r207
-%r209 = zext i960 %r208 to i992
-%r211 = getelementptr i32, i32* %r2, i32 30
-%r212 = load i32, i32* %r211
-%r213 = zext i32 %r212 to i992
-%r214 = shl i992 %r213, 960
-%r215 = or i992 %r209, %r214
-%r216 = zext i992 %r215 to i1024
-%r218 = getelementptr i32, i32* %r2, i32 31
-%r219 = load i32, i32* %r218
-%r220 = zext i32 %r219 to i1024
-%r221 = shl i1024 %r220, 992
-%r222 = or i1024 %r216, %r221
-%r223 = load i32, i32* %r3
-%r224 = zext i32 %r223 to i64
-%r226 = getelementptr i32, i32* %r3, i32 1
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i64
-%r229 = shl i64 %r228, 32
-%r230 = or i64 %r224, %r229
-%r231 = zext i64 %r230 to i96
-%r233 = getelementptr i32, i32* %r3, i32 2
-%r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i96
-%r236 = shl i96 %r235, 64
-%r237 = or i96 %r231, %r236
-%r238 = zext i96 %r237 to i128
-%r240 = getelementptr i32, i32* %r3, i32 3
-%r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i128
-%r243 = shl i128 %r242, 96
-%r244 = or i128 %r238, %r243
-%r245 = zext i128 %r244 to i160
-%r247 = getelementptr i32, i32* %r3, i32 4
-%r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i160
-%r250 = shl i160 %r249, 128
-%r251 = or i160 %r245, %r250
-%r252 = zext i160 %r251 to i192
-%r254 = getelementptr i32, i32* %r3, i32 5
-%r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i192
-%r257 = shl i192 %r256, 160
-%r258 = or i192 %r252, %r257
-%r259 = zext i192 %r258 to i224
-%r261 = getelementptr i32, i32* %r3, i32 6
-%r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i224
-%r264 = shl i224 %r263, 192
-%r265 = or i224 %r259, %r264
-%r266 = zext i224 %r265 to i256
-%r268 = getelementptr i32, i32* %r3, i32 7
-%r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i256
-%r271 = shl i256 %r270, 224
-%r272 = or i256 %r266, %r271
-%r273 = zext i256 %r272 to i288
-%r275 = getelementptr i32, i32* %r3, i32 8
-%r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i288
-%r278 = shl i288 %r277, 256
-%r279 = or i288 %r273, %r278
-%r280 = zext i288 %r279 to i320
-%r282 = getelementptr i32, i32* %r3, i32 9
-%r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i320
-%r285 = shl i320 %r284, 288
-%r286 = or i320 %r280, %r285
-%r287 = zext i320 %r286 to i352
-%r289 = getelementptr i32, i32* %r3, i32 10
-%r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i352
-%r292 = shl i352 %r291, 320
-%r293 = or i352 %r287, %r292
-%r294 = zext i352 %r293 to i384
-%r296 = getelementptr i32, i32* %r3, i32 11
-%r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i384
-%r299 = shl i384 %r298, 352
-%r300 = or i384 %r294, %r299
-%r301 = zext i384 %r300 to i416
-%r303 = getelementptr i32, i32* %r3, i32 12
-%r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i416
-%r306 = shl i416 %r305, 384
-%r307 = or i416 %r301, %r306
-%r308 = zext i416 %r307 to i448
-%r310 = getelementptr i32, i32* %r3, i32 13
-%r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i448
-%r313 = shl i448 %r312, 416
-%r314 = or i448 %r308, %r313
-%r315 = zext i448 %r314 to i480
-%r317 = getelementptr i32, i32* %r3, i32 14
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i480
-%r320 = shl i480 %r319, 448
-%r321 = or i480 %r315, %r320
-%r322 = zext i480 %r321 to i512
-%r324 = getelementptr i32, i32* %r3, i32 15
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i512
-%r327 = shl i512 %r326, 480
-%r328 = or i512 %r322, %r327
-%r329 = zext i512 %r328 to i544
-%r331 = getelementptr i32, i32* %r3, i32 16
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i544
-%r334 = shl i544 %r333, 512
-%r335 = or i544 %r329, %r334
-%r336 = zext i544 %r335 to i576
-%r338 = getelementptr i32, i32* %r3, i32 17
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i576
-%r341 = shl i576 %r340, 544
-%r342 = or i576 %r336, %r341
-%r343 = zext i576 %r342 to i608
-%r345 = getelementptr i32, i32* %r3, i32 18
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i608
-%r348 = shl i608 %r347, 576
-%r349 = or i608 %r343, %r348
-%r350 = zext i608 %r349 to i640
-%r352 = getelementptr i32, i32* %r3, i32 19
-%r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i640
-%r355 = shl i640 %r354, 608
-%r356 = or i640 %r350, %r355
-%r357 = zext i640 %r356 to i672
-%r359 = getelementptr i32, i32* %r3, i32 20
-%r360 = load i32, i32* %r359
-%r361 = zext i32 %r360 to i672
-%r362 = shl i672 %r361, 640
-%r363 = or i672 %r357, %r362
-%r364 = zext i672 %r363 to i704
-%r366 = getelementptr i32, i32* %r3, i32 21
-%r367 = load i32, i32* %r366
-%r368 = zext i32 %r367 to i704
-%r369 = shl i704 %r368, 672
-%r370 = or i704 %r364, %r369
-%r371 = zext i704 %r370 to i736
-%r373 = getelementptr i32, i32* %r3, i32 22
-%r374 = load i32, i32* %r373
-%r375 = zext i32 %r374 to i736
-%r376 = shl i736 %r375, 704
-%r377 = or i736 %r371, %r376
-%r378 = zext i736 %r377 to i768
-%r380 = getelementptr i32, i32* %r3, i32 23
-%r381 = load i32, i32* %r380
-%r382 = zext i32 %r381 to i768
-%r383 = shl i768 %r382, 736
-%r384 = or i768 %r378, %r383
-%r385 = zext i768 %r384 to i800
-%r387 = getelementptr i32, i32* %r3, i32 24
-%r388 = load i32, i32* %r387
-%r389 = zext i32 %r388 to i800
-%r390 = shl i800 %r389, 768
-%r391 = or i800 %r385, %r390
-%r392 = zext i800 %r391 to i832
-%r394 = getelementptr i32, i32* %r3, i32 25
-%r395 = load i32, i32* %r394
-%r396 = zext i32 %r395 to i832
-%r397 = shl i832 %r396, 800
-%r398 = or i832 %r392, %r397
-%r399 = zext i832 %r398 to i864
-%r401 = getelementptr i32, i32* %r3, i32 26
-%r402 = load i32, i32* %r401
-%r403 = zext i32 %r402 to i864
-%r404 = shl i864 %r403, 832
-%r405 = or i864 %r399, %r404
-%r406 = zext i864 %r405 to i896
-%r408 = getelementptr i32, i32* %r3, i32 27
-%r409 = load i32, i32* %r408
-%r410 = zext i32 %r409 to i896
-%r411 = shl i896 %r410, 864
-%r412 = or i896 %r406, %r411
-%r413 = zext i896 %r412 to i928
-%r415 = getelementptr i32, i32* %r3, i32 28
-%r416 = load i32, i32* %r415
-%r417 = zext i32 %r416 to i928
-%r418 = shl i928 %r417, 896
-%r419 = or i928 %r413, %r418
-%r420 = zext i928 %r419 to i960
-%r422 = getelementptr i32, i32* %r3, i32 29
-%r423 = load i32, i32* %r422
-%r424 = zext i32 %r423 to i960
-%r425 = shl i960 %r424, 928
-%r426 = or i960 %r420, %r425
-%r427 = zext i960 %r426 to i992
-%r429 = getelementptr i32, i32* %r3, i32 30
-%r430 = load i32, i32* %r429
-%r431 = zext i32 %r430 to i992
-%r432 = shl i992 %r431, 960
-%r433 = or i992 %r427, %r432
-%r434 = zext i992 %r433 to i1024
-%r436 = getelementptr i32, i32* %r3, i32 31
-%r437 = load i32, i32* %r436
-%r438 = zext i32 %r437 to i1024
-%r439 = shl i1024 %r438, 992
-%r440 = or i1024 %r434, %r439
-%r441 = zext i1024 %r222 to i1056
-%r442 = zext i1024 %r440 to i1056
-%r443 = add i1056 %r441, %r442
-%r444 = trunc i1056 %r443 to i512
-%r445 = trunc i512 %r444 to i32
-%r447 = getelementptr i32, i32* %r1, i32 0
-store i32 %r445, i32* %r447
-%r448 = lshr i512 %r444, 32
-%r449 = trunc i512 %r448 to i32
-%r451 = getelementptr i32, i32* %r1, i32 1
-store i32 %r449, i32* %r451
-%r452 = lshr i512 %r448, 32
-%r453 = trunc i512 %r452 to i32
-%r455 = getelementptr i32, i32* %r1, i32 2
-store i32 %r453, i32* %r455
-%r456 = lshr i512 %r452, 32
-%r457 = trunc i512 %r456 to i32
-%r459 = getelementptr i32, i32* %r1, i32 3
-store i32 %r457, i32* %r459
-%r460 = lshr i512 %r456, 32
-%r461 = trunc i512 %r460 to i32
-%r463 = getelementptr i32, i32* %r1, i32 4
-store i32 %r461, i32* %r463
-%r464 = lshr i512 %r460, 32
-%r465 = trunc i512 %r464 to i32
-%r467 = getelementptr i32, i32* %r1, i32 5
-store i32 %r465, i32* %r467
-%r468 = lshr i512 %r464, 32
-%r469 = trunc i512 %r468 to i32
-%r471 = getelementptr i32, i32* %r1, i32 6
-store i32 %r469, i32* %r471
-%r472 = lshr i512 %r468, 32
-%r473 = trunc i512 %r472 to i32
-%r475 = getelementptr i32, i32* %r1, i32 7
-store i32 %r473, i32* %r475
-%r476 = lshr i512 %r472, 32
-%r477 = trunc i512 %r476 to i32
-%r479 = getelementptr i32, i32* %r1, i32 8
-store i32 %r477, i32* %r479
-%r480 = lshr i512 %r476, 32
-%r481 = trunc i512 %r480 to i32
-%r483 = getelementptr i32, i32* %r1, i32 9
-store i32 %r481, i32* %r483
-%r484 = lshr i512 %r480, 32
-%r485 = trunc i512 %r484 to i32
-%r487 = getelementptr i32, i32* %r1, i32 10
-store i32 %r485, i32* %r487
-%r488 = lshr i512 %r484, 32
-%r489 = trunc i512 %r488 to i32
-%r491 = getelementptr i32, i32* %r1, i32 11
-store i32 %r489, i32* %r491
-%r492 = lshr i512 %r488, 32
-%r493 = trunc i512 %r492 to i32
-%r495 = getelementptr i32, i32* %r1, i32 12
-store i32 %r493, i32* %r495
-%r496 = lshr i512 %r492, 32
-%r497 = trunc i512 %r496 to i32
-%r499 = getelementptr i32, i32* %r1, i32 13
-store i32 %r497, i32* %r499
-%r500 = lshr i512 %r496, 32
-%r501 = trunc i512 %r500 to i32
-%r503 = getelementptr i32, i32* %r1, i32 14
-store i32 %r501, i32* %r503
-%r504 = lshr i512 %r500, 32
-%r505 = trunc i512 %r504 to i32
-%r507 = getelementptr i32, i32* %r1, i32 15
-store i32 %r505, i32* %r507
-%r508 = lshr i1056 %r443, 512
-%r509 = trunc i1056 %r508 to i544
-%r510 = load i32, i32* %r4
-%r511 = zext i32 %r510 to i64
-%r513 = getelementptr i32, i32* %r4, i32 1
+%r22 = zext i32 %r21 to i96
+%r23 = shl i96 %r22, 64
+%r24 = or i96 %r18, %r23
+%r25 = zext i96 %r24 to i128
+%r27 = getelementptr i32, i32* %r5, i32 3
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i128
+%r30 = shl i128 %r29, 96
+%r31 = or i128 %r25, %r30
+%r32 = zext i128 %r31 to i160
+%r34 = getelementptr i32, i32* %r5, i32 4
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i160
+%r37 = shl i160 %r36, 128
+%r38 = or i160 %r32, %r37
+%r39 = zext i160 %r38 to i192
+%r41 = getelementptr i32, i32* %r5, i32 5
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i192
+%r44 = shl i192 %r43, 160
+%r45 = or i192 %r39, %r44
+%r46 = zext i192 %r45 to i224
+%r47 = load i32, i32* %r2
+%r48 = zext i32 %r47 to i64
+%r50 = getelementptr i32, i32* %r2, i32 1
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i64
+%r53 = shl i64 %r52, 32
+%r54 = or i64 %r48, %r53
+%r55 = zext i64 %r54 to i96
+%r57 = getelementptr i32, i32* %r2, i32 2
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i96
+%r60 = shl i96 %r59, 64
+%r61 = or i96 %r55, %r60
+%r62 = zext i96 %r61 to i128
+%r64 = getelementptr i32, i32* %r2, i32 3
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i128
+%r67 = shl i128 %r66, 96
+%r68 = or i128 %r62, %r67
+%r69 = zext i128 %r68 to i160
+%r71 = getelementptr i32, i32* %r2, i32 4
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i160
+%r74 = shl i160 %r73, 128
+%r75 = or i160 %r69, %r74
+%r76 = zext i160 %r75 to i192
+%r78 = getelementptr i32, i32* %r2, i32 5
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i192
+%r81 = shl i192 %r80, 160
+%r82 = or i192 %r76, %r81
+%r83 = zext i192 %r82 to i224
+%r84 = load i32, i32* %r7
+%r85 = zext i32 %r84 to i64
+%r87 = getelementptr i32, i32* %r7, i32 1
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i64
+%r90 = shl i64 %r89, 32
+%r91 = or i64 %r85, %r90
+%r92 = zext i64 %r91 to i96
+%r94 = getelementptr i32, i32* %r7, i32 2
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i96
+%r97 = shl i96 %r96, 64
+%r98 = or i96 %r92, %r97
+%r99 = zext i96 %r98 to i128
+%r101 = getelementptr i32, i32* %r7, i32 3
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i128
+%r104 = shl i128 %r103, 96
+%r105 = or i128 %r99, %r104
+%r106 = zext i128 %r105 to i160
+%r108 = getelementptr i32, i32* %r7, i32 4
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i160
+%r111 = shl i160 %r110, 128
+%r112 = or i160 %r106, %r111
+%r113 = zext i160 %r112 to i192
+%r115 = getelementptr i32, i32* %r7, i32 5
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i192
+%r118 = shl i192 %r117, 160
+%r119 = or i192 %r113, %r118
+%r120 = zext i192 %r119 to i224
+%r121 = load i32, i32* %r3
+%r122 = zext i32 %r121 to i64
+%r124 = getelementptr i32, i32* %r3, i32 1
+%r125 = load i32, i32* %r124
+%r126 = zext i32 %r125 to i64
+%r127 = shl i64 %r126, 32
+%r128 = or i64 %r122, %r127
+%r129 = zext i64 %r128 to i96
+%r131 = getelementptr i32, i32* %r3, i32 2
+%r132 = load i32, i32* %r131
+%r133 = zext i32 %r132 to i96
+%r134 = shl i96 %r133, 64
+%r135 = or i96 %r129, %r134
+%r136 = zext i96 %r135 to i128
+%r138 = getelementptr i32, i32* %r3, i32 3
+%r139 = load i32, i32* %r138
+%r140 = zext i32 %r139 to i128
+%r141 = shl i128 %r140, 96
+%r142 = or i128 %r136, %r141
+%r143 = zext i128 %r142 to i160
+%r145 = getelementptr i32, i32* %r3, i32 4
+%r146 = load i32, i32* %r145
+%r147 = zext i32 %r146 to i160
+%r148 = shl i160 %r147, 128
+%r149 = or i160 %r143, %r148
+%r150 = zext i160 %r149 to i192
+%r152 = getelementptr i32, i32* %r3, i32 5
+%r153 = load i32, i32* %r152
+%r154 = zext i32 %r153 to i192
+%r155 = shl i192 %r154, 160
+%r156 = or i192 %r150, %r155
+%r157 = zext i192 %r156 to i224
+%r158 = add i224 %r46, %r83
+%r159 = add i224 %r120, %r157
+%r161 = alloca i32, i32 12
+%r162 = trunc i224 %r158 to i192
+%r163 = trunc i224 %r159 to i192
+%r164 = lshr i224 %r158, 192
+%r165 = trunc i224 %r164 to i1
+%r166 = lshr i224 %r159, 192
+%r167 = trunc i224 %r166 to i1
+%r168 = and i1 %r165, %r167
+%r170 = select i1 %r165, i192 %r163, i192 0
+%r172 = select i1 %r167, i192 %r162, i192 0
+%r174 = alloca i32, i32 6
+%r176 = alloca i32, i32 6
+%r178 = getelementptr i32, i32* %r174, i32 0
+%r179 = trunc i192 %r162 to i32
+store i32 %r179, i32* %r178
+%r180 = lshr i192 %r162, 32
+%r182 = getelementptr i32, i32* %r174, i32 1
+%r183 = trunc i192 %r180 to i32
+store i32 %r183, i32* %r182
+%r184 = lshr i192 %r180, 32
+%r186 = getelementptr i32, i32* %r174, i32 2
+%r187 = trunc i192 %r184 to i32
+store i32 %r187, i32* %r186
+%r188 = lshr i192 %r184, 32
+%r190 = getelementptr i32, i32* %r174, i32 3
+%r191 = trunc i192 %r188 to i32
+store i32 %r191, i32* %r190
+%r192 = lshr i192 %r188, 32
+%r194 = getelementptr i32, i32* %r174, i32 4
+%r195 = trunc i192 %r192 to i32
+store i32 %r195, i32* %r194
+%r196 = lshr i192 %r192, 32
+%r198 = getelementptr i32, i32* %r174, i32 5
+%r199 = trunc i192 %r196 to i32
+store i32 %r199, i32* %r198
+%r201 = getelementptr i32, i32* %r176, i32 0
+%r202 = trunc i192 %r163 to i32
+store i32 %r202, i32* %r201
+%r203 = lshr i192 %r163, 32
+%r205 = getelementptr i32, i32* %r176, i32 1
+%r206 = trunc i192 %r203 to i32
+store i32 %r206, i32* %r205
+%r207 = lshr i192 %r203, 32
+%r209 = getelementptr i32, i32* %r176, i32 2
+%r210 = trunc i192 %r207 to i32
+store i32 %r210, i32* %r209
+%r211 = lshr i192 %r207, 32
+%r213 = getelementptr i32, i32* %r176, i32 3
+%r214 = trunc i192 %r211 to i32
+store i32 %r214, i32* %r213
+%r215 = lshr i192 %r211, 32
+%r217 = getelementptr i32, i32* %r176, i32 4
+%r218 = trunc i192 %r215 to i32
+store i32 %r218, i32* %r217
+%r219 = lshr i192 %r215, 32
+%r221 = getelementptr i32, i32* %r176, i32 5
+%r222 = trunc i192 %r219 to i32
+store i32 %r222, i32* %r221
+call void @mcl_fpDbl_mulPre6L(i32* %r161, i32* %r174, i32* %r176)
+%r223 = load i32, i32* %r161
+%r224 = zext i32 %r223 to i64
+%r226 = getelementptr i32, i32* %r161, i32 1
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i64
+%r229 = shl i64 %r228, 32
+%r230 = or i64 %r224, %r229
+%r231 = zext i64 %r230 to i96
+%r233 = getelementptr i32, i32* %r161, i32 2
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i96
+%r236 = shl i96 %r235, 64
+%r237 = or i96 %r231, %r236
+%r238 = zext i96 %r237 to i128
+%r240 = getelementptr i32, i32* %r161, i32 3
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i128
+%r243 = shl i128 %r242, 96
+%r244 = or i128 %r238, %r243
+%r245 = zext i128 %r244 to i160
+%r247 = getelementptr i32, i32* %r161, i32 4
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i160
+%r250 = shl i160 %r249, 128
+%r251 = or i160 %r245, %r250
+%r252 = zext i160 %r251 to i192
+%r254 = getelementptr i32, i32* %r161, i32 5
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i192
+%r257 = shl i192 %r256, 160
+%r258 = or i192 %r252, %r257
+%r259 = zext i192 %r258 to i224
+%r261 = getelementptr i32, i32* %r161, i32 6
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i224
+%r264 = shl i224 %r263, 192
+%r265 = or i224 %r259, %r264
+%r266 = zext i224 %r265 to i256
+%r268 = getelementptr i32, i32* %r161, i32 7
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i256
+%r271 = shl i256 %r270, 224
+%r272 = or i256 %r266, %r271
+%r273 = zext i256 %r272 to i288
+%r275 = getelementptr i32, i32* %r161, i32 8
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i288
+%r278 = shl i288 %r277, 256
+%r279 = or i288 %r273, %r278
+%r280 = zext i288 %r279 to i320
+%r282 = getelementptr i32, i32* %r161, i32 9
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i320
+%r285 = shl i320 %r284, 288
+%r286 = or i320 %r280, %r285
+%r287 = zext i320 %r286 to i352
+%r289 = getelementptr i32, i32* %r161, i32 10
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i352
+%r292 = shl i352 %r291, 320
+%r293 = or i352 %r287, %r292
+%r294 = zext i352 %r293 to i384
+%r296 = getelementptr i32, i32* %r161, i32 11
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i384
+%r299 = shl i384 %r298, 352
+%r300 = or i384 %r294, %r299
+%r301 = zext i384 %r300 to i416
+%r302 = zext i1 %r168 to i416
+%r303 = shl i416 %r302, 384
+%r304 = or i416 %r301, %r303
+%r305 = zext i192 %r170 to i416
+%r306 = zext i192 %r172 to i416
+%r307 = shl i416 %r305, 192
+%r308 = shl i416 %r306, 192
+%r309 = add i416 %r304, %r307
+%r310 = add i416 %r309, %r308
+%r311 = load i32, i32* %r1
+%r312 = zext i32 %r311 to i64
+%r314 = getelementptr i32, i32* %r1, i32 1
+%r315 = load i32, i32* %r314
+%r316 = zext i32 %r315 to i64
+%r317 = shl i64 %r316, 32
+%r318 = or i64 %r312, %r317
+%r319 = zext i64 %r318 to i96
+%r321 = getelementptr i32, i32* %r1, i32 2
+%r322 = load i32, i32* %r321
+%r323 = zext i32 %r322 to i96
+%r324 = shl i96 %r323, 64
+%r325 = or i96 %r319, %r324
+%r326 = zext i96 %r325 to i128
+%r328 = getelementptr i32, i32* %r1, i32 3
+%r329 = load i32, i32* %r328
+%r330 = zext i32 %r329 to i128
+%r331 = shl i128 %r330, 96
+%r332 = or i128 %r326, %r331
+%r333 = zext i128 %r332 to i160
+%r335 = getelementptr i32, i32* %r1, i32 4
+%r336 = load i32, i32* %r335
+%r337 = zext i32 %r336 to i160
+%r338 = shl i160 %r337, 128
+%r339 = or i160 %r333, %r338
+%r340 = zext i160 %r339 to i192
+%r342 = getelementptr i32, i32* %r1, i32 5
+%r343 = load i32, i32* %r342
+%r344 = zext i32 %r343 to i192
+%r345 = shl i192 %r344, 160
+%r346 = or i192 %r340, %r345
+%r347 = zext i192 %r346 to i224
+%r349 = getelementptr i32, i32* %r1, i32 6
+%r350 = load i32, i32* %r349
+%r351 = zext i32 %r350 to i224
+%r352 = shl i224 %r351, 192
+%r353 = or i224 %r347, %r352
+%r354 = zext i224 %r353 to i256
+%r356 = getelementptr i32, i32* %r1, i32 7
+%r357 = load i32, i32* %r356
+%r358 = zext i32 %r357 to i256
+%r359 = shl i256 %r358, 224
+%r360 = or i256 %r354, %r359
+%r361 = zext i256 %r360 to i288
+%r363 = getelementptr i32, i32* %r1, i32 8
+%r364 = load i32, i32* %r363
+%r365 = zext i32 %r364 to i288
+%r366 = shl i288 %r365, 256
+%r367 = or i288 %r361, %r366
+%r368 = zext i288 %r367 to i320
+%r370 = getelementptr i32, i32* %r1, i32 9
+%r371 = load i32, i32* %r370
+%r372 = zext i32 %r371 to i320
+%r373 = shl i320 %r372, 288
+%r374 = or i320 %r368, %r373
+%r375 = zext i320 %r374 to i352
+%r377 = getelementptr i32, i32* %r1, i32 10
+%r378 = load i32, i32* %r377
+%r379 = zext i32 %r378 to i352
+%r380 = shl i352 %r379, 320
+%r381 = or i352 %r375, %r380
+%r382 = zext i352 %r381 to i384
+%r384 = getelementptr i32, i32* %r1, i32 11
+%r385 = load i32, i32* %r384
+%r386 = zext i32 %r385 to i384
+%r387 = shl i384 %r386, 352
+%r388 = or i384 %r382, %r387
+%r389 = zext i384 %r388 to i416
+%r390 = sub i416 %r310, %r389
+%r392 = getelementptr i32, i32* %r1, i32 12
+%r393 = load i32, i32* %r392
+%r394 = zext i32 %r393 to i64
+%r396 = getelementptr i32, i32* %r392, i32 1
+%r397 = load i32, i32* %r396
+%r398 = zext i32 %r397 to i64
+%r399 = shl i64 %r398, 32
+%r400 = or i64 %r394, %r399
+%r401 = zext i64 %r400 to i96
+%r403 = getelementptr i32, i32* %r392, i32 2
+%r404 = load i32, i32* %r403
+%r405 = zext i32 %r404 to i96
+%r406 = shl i96 %r405, 64
+%r407 = or i96 %r401, %r406
+%r408 = zext i96 %r407 to i128
+%r410 = getelementptr i32, i32* %r392, i32 3
+%r411 = load i32, i32* %r410
+%r412 = zext i32 %r411 to i128
+%r413 = shl i128 %r412, 96
+%r414 = or i128 %r408, %r413
+%r415 = zext i128 %r414 to i160
+%r417 = getelementptr i32, i32* %r392, i32 4
+%r418 = load i32, i32* %r417
+%r419 = zext i32 %r418 to i160
+%r420 = shl i160 %r419, 128
+%r421 = or i160 %r415, %r420
+%r422 = zext i160 %r421 to i192
+%r424 = getelementptr i32, i32* %r392, i32 5
+%r425 = load i32, i32* %r424
+%r426 = zext i32 %r425 to i192
+%r427 = shl i192 %r426, 160
+%r428 = or i192 %r422, %r427
+%r429 = zext i192 %r428 to i224
+%r431 = getelementptr i32, i32* %r392, i32 6
+%r432 = load i32, i32* %r431
+%r433 = zext i32 %r432 to i224
+%r434 = shl i224 %r433, 192
+%r435 = or i224 %r429, %r434
+%r436 = zext i224 %r435 to i256
+%r438 = getelementptr i32, i32* %r392, i32 7
+%r439 = load i32, i32* %r438
+%r440 = zext i32 %r439 to i256
+%r441 = shl i256 %r440, 224
+%r442 = or i256 %r436, %r441
+%r443 = zext i256 %r442 to i288
+%r445 = getelementptr i32, i32* %r392, i32 8
+%r446 = load i32, i32* %r445
+%r447 = zext i32 %r446 to i288
+%r448 = shl i288 %r447, 256
+%r449 = or i288 %r443, %r448
+%r450 = zext i288 %r449 to i320
+%r452 = getelementptr i32, i32* %r392, i32 9
+%r453 = load i32, i32* %r452
+%r454 = zext i32 %r453 to i320
+%r455 = shl i320 %r454, 288
+%r456 = or i320 %r450, %r455
+%r457 = zext i320 %r456 to i352
+%r459 = getelementptr i32, i32* %r392, i32 10
+%r460 = load i32, i32* %r459
+%r461 = zext i32 %r460 to i352
+%r462 = shl i352 %r461, 320
+%r463 = or i352 %r457, %r462
+%r464 = zext i352 %r463 to i384
+%r466 = getelementptr i32, i32* %r392, i32 11
+%r467 = load i32, i32* %r466
+%r468 = zext i32 %r467 to i384
+%r469 = shl i384 %r468, 352
+%r470 = or i384 %r464, %r469
+%r471 = zext i384 %r470 to i416
+%r472 = sub i416 %r390, %r471
+%r473 = zext i416 %r472 to i576
+%r475 = getelementptr i32, i32* %r1, i32 6
+%r476 = load i32, i32* %r475
+%r477 = zext i32 %r476 to i64
+%r479 = getelementptr i32, i32* %r475, i32 1
+%r480 = load i32, i32* %r479
+%r481 = zext i32 %r480 to i64
+%r482 = shl i64 %r481, 32
+%r483 = or i64 %r477, %r482
+%r484 = zext i64 %r483 to i96
+%r486 = getelementptr i32, i32* %r475, i32 2
+%r487 = load i32, i32* %r486
+%r488 = zext i32 %r487 to i96
+%r489 = shl i96 %r488, 64
+%r490 = or i96 %r484, %r489
+%r491 = zext i96 %r490 to i128
+%r493 = getelementptr i32, i32* %r475, i32 3
+%r494 = load i32, i32* %r493
+%r495 = zext i32 %r494 to i128
+%r496 = shl i128 %r495, 96
+%r497 = or i128 %r491, %r496
+%r498 = zext i128 %r497 to i160
+%r500 = getelementptr i32, i32* %r475, i32 4
+%r501 = load i32, i32* %r500
+%r502 = zext i32 %r501 to i160
+%r503 = shl i160 %r502, 128
+%r504 = or i160 %r498, %r503
+%r505 = zext i160 %r504 to i192
+%r507 = getelementptr i32, i32* %r475, i32 5
+%r508 = load i32, i32* %r507
+%r509 = zext i32 %r508 to i192
+%r510 = shl i192 %r509, 160
+%r511 = or i192 %r505, %r510
+%r512 = zext i192 %r511 to i224
+%r514 = getelementptr i32, i32* %r475, i32 6
+%r515 = load i32, i32* %r514
+%r516 = zext i32 %r515 to i224
+%r517 = shl i224 %r516, 192
+%r518 = or i224 %r512, %r517
+%r519 = zext i224 %r518 to i256
+%r521 = getelementptr i32, i32* %r475, i32 7
+%r522 = load i32, i32* %r521
+%r523 = zext i32 %r522 to i256
+%r524 = shl i256 %r523, 224
+%r525 = or i256 %r519, %r524
+%r526 = zext i256 %r525 to i288
+%r528 = getelementptr i32, i32* %r475, i32 8
+%r529 = load i32, i32* %r528
+%r530 = zext i32 %r529 to i288
+%r531 = shl i288 %r530, 256
+%r532 = or i288 %r526, %r531
+%r533 = zext i288 %r532 to i320
+%r535 = getelementptr i32, i32* %r475, i32 9
+%r536 = load i32, i32* %r535
+%r537 = zext i32 %r536 to i320
+%r538 = shl i320 %r537, 288
+%r539 = or i320 %r533, %r538
+%r540 = zext i320 %r539 to i352
+%r542 = getelementptr i32, i32* %r475, i32 10
+%r543 = load i32, i32* %r542
+%r544 = zext i32 %r543 to i352
+%r545 = shl i352 %r544, 320
+%r546 = or i352 %r540, %r545
+%r547 = zext i352 %r546 to i384
+%r549 = getelementptr i32, i32* %r475, i32 11
+%r550 = load i32, i32* %r549
+%r551 = zext i32 %r550 to i384
+%r552 = shl i384 %r551, 352
+%r553 = or i384 %r547, %r552
+%r554 = zext i384 %r553 to i416
+%r556 = getelementptr i32, i32* %r475, i32 12
+%r557 = load i32, i32* %r556
+%r558 = zext i32 %r557 to i416
+%r559 = shl i416 %r558, 384
+%r560 = or i416 %r554, %r559
+%r561 = zext i416 %r560 to i448
+%r563 = getelementptr i32, i32* %r475, i32 13
+%r564 = load i32, i32* %r563
+%r565 = zext i32 %r564 to i448
+%r566 = shl i448 %r565, 416
+%r567 = or i448 %r561, %r566
+%r568 = zext i448 %r567 to i480
+%r570 = getelementptr i32, i32* %r475, i32 14
+%r571 = load i32, i32* %r570
+%r572 = zext i32 %r571 to i480
+%r573 = shl i480 %r572, 448
+%r574 = or i480 %r568, %r573
+%r575 = zext i480 %r574 to i512
+%r577 = getelementptr i32, i32* %r475, i32 15
+%r578 = load i32, i32* %r577
+%r579 = zext i32 %r578 to i512
+%r580 = shl i512 %r579, 480
+%r581 = or i512 %r575, %r580
+%r582 = zext i512 %r581 to i544
+%r584 = getelementptr i32, i32* %r475, i32 16
+%r585 = load i32, i32* %r584
+%r586 = zext i32 %r585 to i544
+%r587 = shl i544 %r586, 512
+%r588 = or i544 %r582, %r587
+%r589 = zext i544 %r588 to i576
+%r591 = getelementptr i32, i32* %r475, i32 17
+%r592 = load i32, i32* %r591
+%r593 = zext i32 %r592 to i576
+%r594 = shl i576 %r593, 544
+%r595 = or i576 %r589, %r594
+%r596 = add i576 %r473, %r595
+%r598 = getelementptr i32, i32* %r1, i32 6
+%r600 = getelementptr i32, i32* %r598, i32 0
+%r601 = trunc i576 %r596 to i32
+store i32 %r601, i32* %r600
+%r602 = lshr i576 %r596, 32
+%r604 = getelementptr i32, i32* %r598, i32 1
+%r605 = trunc i576 %r602 to i32
+store i32 %r605, i32* %r604
+%r606 = lshr i576 %r602, 32
+%r608 = getelementptr i32, i32* %r598, i32 2
+%r609 = trunc i576 %r606 to i32
+store i32 %r609, i32* %r608
+%r610 = lshr i576 %r606, 32
+%r612 = getelementptr i32, i32* %r598, i32 3
+%r613 = trunc i576 %r610 to i32
+store i32 %r613, i32* %r612
+%r614 = lshr i576 %r610, 32
+%r616 = getelementptr i32, i32* %r598, i32 4
+%r617 = trunc i576 %r614 to i32
+store i32 %r617, i32* %r616
+%r618 = lshr i576 %r614, 32
+%r620 = getelementptr i32, i32* %r598, i32 5
+%r621 = trunc i576 %r618 to i32
+store i32 %r621, i32* %r620
+%r622 = lshr i576 %r618, 32
+%r624 = getelementptr i32, i32* %r598, i32 6
+%r625 = trunc i576 %r622 to i32
+store i32 %r625, i32* %r624
+%r626 = lshr i576 %r622, 32
+%r628 = getelementptr i32, i32* %r598, i32 7
+%r629 = trunc i576 %r626 to i32
+store i32 %r629, i32* %r628
+%r630 = lshr i576 %r626, 32
+%r632 = getelementptr i32, i32* %r598, i32 8
+%r633 = trunc i576 %r630 to i32
+store i32 %r633, i32* %r632
+%r634 = lshr i576 %r630, 32
+%r636 = getelementptr i32, i32* %r598, i32 9
+%r637 = trunc i576 %r634 to i32
+store i32 %r637, i32* %r636
+%r638 = lshr i576 %r634, 32
+%r640 = getelementptr i32, i32* %r598, i32 10
+%r641 = trunc i576 %r638 to i32
+store i32 %r641, i32* %r640
+%r642 = lshr i576 %r638, 32
+%r644 = getelementptr i32, i32* %r598, i32 11
+%r645 = trunc i576 %r642 to i32
+store i32 %r645, i32* %r644
+%r646 = lshr i576 %r642, 32
+%r648 = getelementptr i32, i32* %r598, i32 12
+%r649 = trunc i576 %r646 to i32
+store i32 %r649, i32* %r648
+%r650 = lshr i576 %r646, 32
+%r652 = getelementptr i32, i32* %r598, i32 13
+%r653 = trunc i576 %r650 to i32
+store i32 %r653, i32* %r652
+%r654 = lshr i576 %r650, 32
+%r656 = getelementptr i32, i32* %r598, i32 14
+%r657 = trunc i576 %r654 to i32
+store i32 %r657, i32* %r656
+%r658 = lshr i576 %r654, 32
+%r660 = getelementptr i32, i32* %r598, i32 15
+%r661 = trunc i576 %r658 to i32
+store i32 %r661, i32* %r660
+%r662 = lshr i576 %r658, 32
+%r664 = getelementptr i32, i32* %r598, i32 16
+%r665 = trunc i576 %r662 to i32
+store i32 %r665, i32* %r664
+%r666 = lshr i576 %r662, 32
+%r668 = getelementptr i32, i32* %r598, i32 17
+%r669 = trunc i576 %r666 to i32
+store i32 %r669, i32* %r668
+ret void
+}
+define void @mcl_fpDbl_sqrPre12L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r4 = getelementptr i32, i32* %r2, i32 6
+%r6 = getelementptr i32, i32* %r2, i32 6
+%r8 = getelementptr i32, i32* %r1, i32 12
+call void @mcl_fpDbl_mulPre6L(i32* %r1, i32* %r2, i32* %r2)
+call void @mcl_fpDbl_mulPre6L(i32* %r8, i32* %r4, i32* %r6)
+%r9 = load i32, i32* %r4
+%r10 = zext i32 %r9 to i64
+%r12 = getelementptr i32, i32* %r4, i32 1
+%r13 = load i32, i32* %r12
+%r14 = zext i32 %r13 to i64
+%r15 = shl i64 %r14, 32
+%r16 = or i64 %r10, %r15
+%r17 = zext i64 %r16 to i96
+%r19 = getelementptr i32, i32* %r4, i32 2
+%r20 = load i32, i32* %r19
+%r21 = zext i32 %r20 to i96
+%r22 = shl i96 %r21, 64
+%r23 = or i96 %r17, %r22
+%r24 = zext i96 %r23 to i128
+%r26 = getelementptr i32, i32* %r4, i32 3
+%r27 = load i32, i32* %r26
+%r28 = zext i32 %r27 to i128
+%r29 = shl i128 %r28, 96
+%r30 = or i128 %r24, %r29
+%r31 = zext i128 %r30 to i160
+%r33 = getelementptr i32, i32* %r4, i32 4
+%r34 = load i32, i32* %r33
+%r35 = zext i32 %r34 to i160
+%r36 = shl i160 %r35, 128
+%r37 = or i160 %r31, %r36
+%r38 = zext i160 %r37 to i192
+%r40 = getelementptr i32, i32* %r4, i32 5
+%r41 = load i32, i32* %r40
+%r42 = zext i32 %r41 to i192
+%r43 = shl i192 %r42, 160
+%r44 = or i192 %r38, %r43
+%r45 = zext i192 %r44 to i224
+%r46 = load i32, i32* %r2
+%r47 = zext i32 %r46 to i64
+%r49 = getelementptr i32, i32* %r2, i32 1
+%r50 = load i32, i32* %r49
+%r51 = zext i32 %r50 to i64
+%r52 = shl i64 %r51, 32
+%r53 = or i64 %r47, %r52
+%r54 = zext i64 %r53 to i96
+%r56 = getelementptr i32, i32* %r2, i32 2
+%r57 = load i32, i32* %r56
+%r58 = zext i32 %r57 to i96
+%r59 = shl i96 %r58, 64
+%r60 = or i96 %r54, %r59
+%r61 = zext i96 %r60 to i128
+%r63 = getelementptr i32, i32* %r2, i32 3
+%r64 = load i32, i32* %r63
+%r65 = zext i32 %r64 to i128
+%r66 = shl i128 %r65, 96
+%r67 = or i128 %r61, %r66
+%r68 = zext i128 %r67 to i160
+%r70 = getelementptr i32, i32* %r2, i32 4
+%r71 = load i32, i32* %r70
+%r72 = zext i32 %r71 to i160
+%r73 = shl i160 %r72, 128
+%r74 = or i160 %r68, %r73
+%r75 = zext i160 %r74 to i192
+%r77 = getelementptr i32, i32* %r2, i32 5
+%r78 = load i32, i32* %r77
+%r79 = zext i32 %r78 to i192
+%r80 = shl i192 %r79, 160
+%r81 = or i192 %r75, %r80
+%r82 = zext i192 %r81 to i224
+%r83 = load i32, i32* %r6
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r6, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r6, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r6, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r6, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r6, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r120 = load i32, i32* %r2
+%r121 = zext i32 %r120 to i64
+%r123 = getelementptr i32, i32* %r2, i32 1
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i64
+%r126 = shl i64 %r125, 32
+%r127 = or i64 %r121, %r126
+%r128 = zext i64 %r127 to i96
+%r130 = getelementptr i32, i32* %r2, i32 2
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i96
+%r133 = shl i96 %r132, 64
+%r134 = or i96 %r128, %r133
+%r135 = zext i96 %r134 to i128
+%r137 = getelementptr i32, i32* %r2, i32 3
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i128
+%r140 = shl i128 %r139, 96
+%r141 = or i128 %r135, %r140
+%r142 = zext i128 %r141 to i160
+%r144 = getelementptr i32, i32* %r2, i32 4
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i160
+%r147 = shl i160 %r146, 128
+%r148 = or i160 %r142, %r147
+%r149 = zext i160 %r148 to i192
+%r151 = getelementptr i32, i32* %r2, i32 5
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i192
+%r154 = shl i192 %r153, 160
+%r155 = or i192 %r149, %r154
+%r156 = zext i192 %r155 to i224
+%r157 = add i224 %r45, %r82
+%r158 = add i224 %r119, %r156
+%r160 = alloca i32, i32 12
+%r161 = trunc i224 %r157 to i192
+%r162 = trunc i224 %r158 to i192
+%r163 = lshr i224 %r157, 192
+%r164 = trunc i224 %r163 to i1
+%r165 = lshr i224 %r158, 192
+%r166 = trunc i224 %r165 to i1
+%r167 = and i1 %r164, %r166
+%r169 = select i1 %r164, i192 %r162, i192 0
+%r171 = select i1 %r166, i192 %r161, i192 0
+%r173 = alloca i32, i32 6
+%r175 = alloca i32, i32 6
+%r177 = getelementptr i32, i32* %r173, i32 0
+%r178 = trunc i192 %r161 to i32
+store i32 %r178, i32* %r177
+%r179 = lshr i192 %r161, 32
+%r181 = getelementptr i32, i32* %r173, i32 1
+%r182 = trunc i192 %r179 to i32
+store i32 %r182, i32* %r181
+%r183 = lshr i192 %r179, 32
+%r185 = getelementptr i32, i32* %r173, i32 2
+%r186 = trunc i192 %r183 to i32
+store i32 %r186, i32* %r185
+%r187 = lshr i192 %r183, 32
+%r189 = getelementptr i32, i32* %r173, i32 3
+%r190 = trunc i192 %r187 to i32
+store i32 %r190, i32* %r189
+%r191 = lshr i192 %r187, 32
+%r193 = getelementptr i32, i32* %r173, i32 4
+%r194 = trunc i192 %r191 to i32
+store i32 %r194, i32* %r193
+%r195 = lshr i192 %r191, 32
+%r197 = getelementptr i32, i32* %r173, i32 5
+%r198 = trunc i192 %r195 to i32
+store i32 %r198, i32* %r197
+%r200 = getelementptr i32, i32* %r175, i32 0
+%r201 = trunc i192 %r162 to i32
+store i32 %r201, i32* %r200
+%r202 = lshr i192 %r162, 32
+%r204 = getelementptr i32, i32* %r175, i32 1
+%r205 = trunc i192 %r202 to i32
+store i32 %r205, i32* %r204
+%r206 = lshr i192 %r202, 32
+%r208 = getelementptr i32, i32* %r175, i32 2
+%r209 = trunc i192 %r206 to i32
+store i32 %r209, i32* %r208
+%r210 = lshr i192 %r206, 32
+%r212 = getelementptr i32, i32* %r175, i32 3
+%r213 = trunc i192 %r210 to i32
+store i32 %r213, i32* %r212
+%r214 = lshr i192 %r210, 32
+%r216 = getelementptr i32, i32* %r175, i32 4
+%r217 = trunc i192 %r214 to i32
+store i32 %r217, i32* %r216
+%r218 = lshr i192 %r214, 32
+%r220 = getelementptr i32, i32* %r175, i32 5
+%r221 = trunc i192 %r218 to i32
+store i32 %r221, i32* %r220
+call void @mcl_fpDbl_mulPre6L(i32* %r160, i32* %r173, i32* %r175)
+%r222 = load i32, i32* %r160
+%r223 = zext i32 %r222 to i64
+%r225 = getelementptr i32, i32* %r160, i32 1
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i64
+%r228 = shl i64 %r227, 32
+%r229 = or i64 %r223, %r228
+%r230 = zext i64 %r229 to i96
+%r232 = getelementptr i32, i32* %r160, i32 2
+%r233 = load i32, i32* %r232
+%r234 = zext i32 %r233 to i96
+%r235 = shl i96 %r234, 64
+%r236 = or i96 %r230, %r235
+%r237 = zext i96 %r236 to i128
+%r239 = getelementptr i32, i32* %r160, i32 3
+%r240 = load i32, i32* %r239
+%r241 = zext i32 %r240 to i128
+%r242 = shl i128 %r241, 96
+%r243 = or i128 %r237, %r242
+%r244 = zext i128 %r243 to i160
+%r246 = getelementptr i32, i32* %r160, i32 4
+%r247 = load i32, i32* %r246
+%r248 = zext i32 %r247 to i160
+%r249 = shl i160 %r248, 128
+%r250 = or i160 %r244, %r249
+%r251 = zext i160 %r250 to i192
+%r253 = getelementptr i32, i32* %r160, i32 5
+%r254 = load i32, i32* %r253
+%r255 = zext i32 %r254 to i192
+%r256 = shl i192 %r255, 160
+%r257 = or i192 %r251, %r256
+%r258 = zext i192 %r257 to i224
+%r260 = getelementptr i32, i32* %r160, i32 6
+%r261 = load i32, i32* %r260
+%r262 = zext i32 %r261 to i224
+%r263 = shl i224 %r262, 192
+%r264 = or i224 %r258, %r263
+%r265 = zext i224 %r264 to i256
+%r267 = getelementptr i32, i32* %r160, i32 7
+%r268 = load i32, i32* %r267
+%r269 = zext i32 %r268 to i256
+%r270 = shl i256 %r269, 224
+%r271 = or i256 %r265, %r270
+%r272 = zext i256 %r271 to i288
+%r274 = getelementptr i32, i32* %r160, i32 8
+%r275 = load i32, i32* %r274
+%r276 = zext i32 %r275 to i288
+%r277 = shl i288 %r276, 256
+%r278 = or i288 %r272, %r277
+%r279 = zext i288 %r278 to i320
+%r281 = getelementptr i32, i32* %r160, i32 9
+%r282 = load i32, i32* %r281
+%r283 = zext i32 %r282 to i320
+%r284 = shl i320 %r283, 288
+%r285 = or i320 %r279, %r284
+%r286 = zext i320 %r285 to i352
+%r288 = getelementptr i32, i32* %r160, i32 10
+%r289 = load i32, i32* %r288
+%r290 = zext i32 %r289 to i352
+%r291 = shl i352 %r290, 320
+%r292 = or i352 %r286, %r291
+%r293 = zext i352 %r292 to i384
+%r295 = getelementptr i32, i32* %r160, i32 11
+%r296 = load i32, i32* %r295
+%r297 = zext i32 %r296 to i384
+%r298 = shl i384 %r297, 352
+%r299 = or i384 %r293, %r298
+%r300 = zext i384 %r299 to i416
+%r301 = zext i1 %r167 to i416
+%r302 = shl i416 %r301, 384
+%r303 = or i416 %r300, %r302
+%r304 = zext i192 %r169 to i416
+%r305 = zext i192 %r171 to i416
+%r306 = shl i416 %r304, 192
+%r307 = shl i416 %r305, 192
+%r308 = add i416 %r303, %r306
+%r309 = add i416 %r308, %r307
+%r310 = load i32, i32* %r1
+%r311 = zext i32 %r310 to i64
+%r313 = getelementptr i32, i32* %r1, i32 1
+%r314 = load i32, i32* %r313
+%r315 = zext i32 %r314 to i64
+%r316 = shl i64 %r315, 32
+%r317 = or i64 %r311, %r316
+%r318 = zext i64 %r317 to i96
+%r320 = getelementptr i32, i32* %r1, i32 2
+%r321 = load i32, i32* %r320
+%r322 = zext i32 %r321 to i96
+%r323 = shl i96 %r322, 64
+%r324 = or i96 %r318, %r323
+%r325 = zext i96 %r324 to i128
+%r327 = getelementptr i32, i32* %r1, i32 3
+%r328 = load i32, i32* %r327
+%r329 = zext i32 %r328 to i128
+%r330 = shl i128 %r329, 96
+%r331 = or i128 %r325, %r330
+%r332 = zext i128 %r331 to i160
+%r334 = getelementptr i32, i32* %r1, i32 4
+%r335 = load i32, i32* %r334
+%r336 = zext i32 %r335 to i160
+%r337 = shl i160 %r336, 128
+%r338 = or i160 %r332, %r337
+%r339 = zext i160 %r338 to i192
+%r341 = getelementptr i32, i32* %r1, i32 5
+%r342 = load i32, i32* %r341
+%r343 = zext i32 %r342 to i192
+%r344 = shl i192 %r343, 160
+%r345 = or i192 %r339, %r344
+%r346 = zext i192 %r345 to i224
+%r348 = getelementptr i32, i32* %r1, i32 6
+%r349 = load i32, i32* %r348
+%r350 = zext i32 %r349 to i224
+%r351 = shl i224 %r350, 192
+%r352 = or i224 %r346, %r351
+%r353 = zext i224 %r352 to i256
+%r355 = getelementptr i32, i32* %r1, i32 7
+%r356 = load i32, i32* %r355
+%r357 = zext i32 %r356 to i256
+%r358 = shl i256 %r357, 224
+%r359 = or i256 %r353, %r358
+%r360 = zext i256 %r359 to i288
+%r362 = getelementptr i32, i32* %r1, i32 8
+%r363 = load i32, i32* %r362
+%r364 = zext i32 %r363 to i288
+%r365 = shl i288 %r364, 256
+%r366 = or i288 %r360, %r365
+%r367 = zext i288 %r366 to i320
+%r369 = getelementptr i32, i32* %r1, i32 9
+%r370 = load i32, i32* %r369
+%r371 = zext i32 %r370 to i320
+%r372 = shl i320 %r371, 288
+%r373 = or i320 %r367, %r372
+%r374 = zext i320 %r373 to i352
+%r376 = getelementptr i32, i32* %r1, i32 10
+%r377 = load i32, i32* %r376
+%r378 = zext i32 %r377 to i352
+%r379 = shl i352 %r378, 320
+%r380 = or i352 %r374, %r379
+%r381 = zext i352 %r380 to i384
+%r383 = getelementptr i32, i32* %r1, i32 11
+%r384 = load i32, i32* %r383
+%r385 = zext i32 %r384 to i384
+%r386 = shl i384 %r385, 352
+%r387 = or i384 %r381, %r386
+%r388 = zext i384 %r387 to i416
+%r389 = sub i416 %r309, %r388
+%r391 = getelementptr i32, i32* %r1, i32 12
+%r392 = load i32, i32* %r391
+%r393 = zext i32 %r392 to i64
+%r395 = getelementptr i32, i32* %r391, i32 1
+%r396 = load i32, i32* %r395
+%r397 = zext i32 %r396 to i64
+%r398 = shl i64 %r397, 32
+%r399 = or i64 %r393, %r398
+%r400 = zext i64 %r399 to i96
+%r402 = getelementptr i32, i32* %r391, i32 2
+%r403 = load i32, i32* %r402
+%r404 = zext i32 %r403 to i96
+%r405 = shl i96 %r404, 64
+%r406 = or i96 %r400, %r405
+%r407 = zext i96 %r406 to i128
+%r409 = getelementptr i32, i32* %r391, i32 3
+%r410 = load i32, i32* %r409
+%r411 = zext i32 %r410 to i128
+%r412 = shl i128 %r411, 96
+%r413 = or i128 %r407, %r412
+%r414 = zext i128 %r413 to i160
+%r416 = getelementptr i32, i32* %r391, i32 4
+%r417 = load i32, i32* %r416
+%r418 = zext i32 %r417 to i160
+%r419 = shl i160 %r418, 128
+%r420 = or i160 %r414, %r419
+%r421 = zext i160 %r420 to i192
+%r423 = getelementptr i32, i32* %r391, i32 5
+%r424 = load i32, i32* %r423
+%r425 = zext i32 %r424 to i192
+%r426 = shl i192 %r425, 160
+%r427 = or i192 %r421, %r426
+%r428 = zext i192 %r427 to i224
+%r430 = getelementptr i32, i32* %r391, i32 6
+%r431 = load i32, i32* %r430
+%r432 = zext i32 %r431 to i224
+%r433 = shl i224 %r432, 192
+%r434 = or i224 %r428, %r433
+%r435 = zext i224 %r434 to i256
+%r437 = getelementptr i32, i32* %r391, i32 7
+%r438 = load i32, i32* %r437
+%r439 = zext i32 %r438 to i256
+%r440 = shl i256 %r439, 224
+%r441 = or i256 %r435, %r440
+%r442 = zext i256 %r441 to i288
+%r444 = getelementptr i32, i32* %r391, i32 8
+%r445 = load i32, i32* %r444
+%r446 = zext i32 %r445 to i288
+%r447 = shl i288 %r446, 256
+%r448 = or i288 %r442, %r447
+%r449 = zext i288 %r448 to i320
+%r451 = getelementptr i32, i32* %r391, i32 9
+%r452 = load i32, i32* %r451
+%r453 = zext i32 %r452 to i320
+%r454 = shl i320 %r453, 288
+%r455 = or i320 %r449, %r454
+%r456 = zext i320 %r455 to i352
+%r458 = getelementptr i32, i32* %r391, i32 10
+%r459 = load i32, i32* %r458
+%r460 = zext i32 %r459 to i352
+%r461 = shl i352 %r460, 320
+%r462 = or i352 %r456, %r461
+%r463 = zext i352 %r462 to i384
+%r465 = getelementptr i32, i32* %r391, i32 11
+%r466 = load i32, i32* %r465
+%r467 = zext i32 %r466 to i384
+%r468 = shl i384 %r467, 352
+%r469 = or i384 %r463, %r468
+%r470 = zext i384 %r469 to i416
+%r471 = sub i416 %r389, %r470
+%r472 = zext i416 %r471 to i576
+%r474 = getelementptr i32, i32* %r1, i32 6
+%r475 = load i32, i32* %r474
+%r476 = zext i32 %r475 to i64
+%r478 = getelementptr i32, i32* %r474, i32 1
+%r479 = load i32, i32* %r478
+%r480 = zext i32 %r479 to i64
+%r481 = shl i64 %r480, 32
+%r482 = or i64 %r476, %r481
+%r483 = zext i64 %r482 to i96
+%r485 = getelementptr i32, i32* %r474, i32 2
+%r486 = load i32, i32* %r485
+%r487 = zext i32 %r486 to i96
+%r488 = shl i96 %r487, 64
+%r489 = or i96 %r483, %r488
+%r490 = zext i96 %r489 to i128
+%r492 = getelementptr i32, i32* %r474, i32 3
+%r493 = load i32, i32* %r492
+%r494 = zext i32 %r493 to i128
+%r495 = shl i128 %r494, 96
+%r496 = or i128 %r490, %r495
+%r497 = zext i128 %r496 to i160
+%r499 = getelementptr i32, i32* %r474, i32 4
+%r500 = load i32, i32* %r499
+%r501 = zext i32 %r500 to i160
+%r502 = shl i160 %r501, 128
+%r503 = or i160 %r497, %r502
+%r504 = zext i160 %r503 to i192
+%r506 = getelementptr i32, i32* %r474, i32 5
+%r507 = load i32, i32* %r506
+%r508 = zext i32 %r507 to i192
+%r509 = shl i192 %r508, 160
+%r510 = or i192 %r504, %r509
+%r511 = zext i192 %r510 to i224
+%r513 = getelementptr i32, i32* %r474, i32 6
 %r514 = load i32, i32* %r513
-%r515 = zext i32 %r514 to i64
-%r516 = shl i64 %r515, 32
-%r517 = or i64 %r511, %r516
-%r518 = zext i64 %r517 to i96
-%r520 = getelementptr i32, i32* %r4, i32 2
+%r515 = zext i32 %r514 to i224
+%r516 = shl i224 %r515, 192
+%r517 = or i224 %r511, %r516
+%r518 = zext i224 %r517 to i256
+%r520 = getelementptr i32, i32* %r474, i32 7
 %r521 = load i32, i32* %r520
-%r522 = zext i32 %r521 to i96
-%r523 = shl i96 %r522, 64
-%r524 = or i96 %r518, %r523
-%r525 = zext i96 %r524 to i128
-%r527 = getelementptr i32, i32* %r4, i32 3
+%r522 = zext i32 %r521 to i256
+%r523 = shl i256 %r522, 224
+%r524 = or i256 %r518, %r523
+%r525 = zext i256 %r524 to i288
+%r527 = getelementptr i32, i32* %r474, i32 8
 %r528 = load i32, i32* %r527
-%r529 = zext i32 %r528 to i128
-%r530 = shl i128 %r529, 96
-%r531 = or i128 %r525, %r530
-%r532 = zext i128 %r531 to i160
-%r534 = getelementptr i32, i32* %r4, i32 4
+%r529 = zext i32 %r528 to i288
+%r530 = shl i288 %r529, 256
+%r531 = or i288 %r525, %r530
+%r532 = zext i288 %r531 to i320
+%r534 = getelementptr i32, i32* %r474, i32 9
 %r535 = load i32, i32* %r534
-%r536 = zext i32 %r535 to i160
-%r537 = shl i160 %r536, 128
-%r538 = or i160 %r532, %r537
-%r539 = zext i160 %r538 to i192
-%r541 = getelementptr i32, i32* %r4, i32 5
+%r536 = zext i32 %r535 to i320
+%r537 = shl i320 %r536, 288
+%r538 = or i320 %r532, %r537
+%r539 = zext i320 %r538 to i352
+%r541 = getelementptr i32, i32* %r474, i32 10
 %r542 = load i32, i32* %r541
-%r543 = zext i32 %r542 to i192
-%r544 = shl i192 %r543, 160
-%r545 = or i192 %r539, %r544
-%r546 = zext i192 %r545 to i224
-%r548 = getelementptr i32, i32* %r4, i32 6
+%r543 = zext i32 %r542 to i352
+%r544 = shl i352 %r543, 320
+%r545 = or i352 %r539, %r544
+%r546 = zext i352 %r545 to i384
+%r548 = getelementptr i32, i32* %r474, i32 11
 %r549 = load i32, i32* %r548
-%r550 = zext i32 %r549 to i224
-%r551 = shl i224 %r550, 192
-%r552 = or i224 %r546, %r551
-%r553 = zext i224 %r552 to i256
-%r555 = getelementptr i32, i32* %r4, i32 7
+%r550 = zext i32 %r549 to i384
+%r551 = shl i384 %r550, 352
+%r552 = or i384 %r546, %r551
+%r553 = zext i384 %r552 to i416
+%r555 = getelementptr i32, i32* %r474, i32 12
 %r556 = load i32, i32* %r555
-%r557 = zext i32 %r556 to i256
-%r558 = shl i256 %r557, 224
-%r559 = or i256 %r553, %r558
-%r560 = zext i256 %r559 to i288
-%r562 = getelementptr i32, i32* %r4, i32 8
+%r557 = zext i32 %r556 to i416
+%r558 = shl i416 %r557, 384
+%r559 = or i416 %r553, %r558
+%r560 = zext i416 %r559 to i448
+%r562 = getelementptr i32, i32* %r474, i32 13
 %r563 = load i32, i32* %r562
-%r564 = zext i32 %r563 to i288
-%r565 = shl i288 %r564, 256
-%r566 = or i288 %r560, %r565
-%r567 = zext i288 %r566 to i320
-%r569 = getelementptr i32, i32* %r4, i32 9
+%r564 = zext i32 %r563 to i448
+%r565 = shl i448 %r564, 416
+%r566 = or i448 %r560, %r565
+%r567 = zext i448 %r566 to i480
+%r569 = getelementptr i32, i32* %r474, i32 14
 %r570 = load i32, i32* %r569
-%r571 = zext i32 %r570 to i320
-%r572 = shl i320 %r571, 288
-%r573 = or i320 %r567, %r572
-%r574 = zext i320 %r573 to i352
-%r576 = getelementptr i32, i32* %r4, i32 10
+%r571 = zext i32 %r570 to i480
+%r572 = shl i480 %r571, 448
+%r573 = or i480 %r567, %r572
+%r574 = zext i480 %r573 to i512
+%r576 = getelementptr i32, i32* %r474, i32 15
 %r577 = load i32, i32* %r576
-%r578 = zext i32 %r577 to i352
-%r579 = shl i352 %r578, 320
-%r580 = or i352 %r574, %r579
-%r581 = zext i352 %r580 to i384
-%r583 = getelementptr i32, i32* %r4, i32 11
+%r578 = zext i32 %r577 to i512
+%r579 = shl i512 %r578, 480
+%r580 = or i512 %r574, %r579
+%r581 = zext i512 %r580 to i544
+%r583 = getelementptr i32, i32* %r474, i32 16
 %r584 = load i32, i32* %r583
-%r585 = zext i32 %r584 to i384
-%r586 = shl i384 %r585, 352
-%r587 = or i384 %r581, %r586
-%r588 = zext i384 %r587 to i416
-%r590 = getelementptr i32, i32* %r4, i32 12
+%r585 = zext i32 %r584 to i544
+%r586 = shl i544 %r585, 512
+%r587 = or i544 %r581, %r586
+%r588 = zext i544 %r587 to i576
+%r590 = getelementptr i32, i32* %r474, i32 17
 %r591 = load i32, i32* %r590
-%r592 = zext i32 %r591 to i416
-%r593 = shl i416 %r592, 384
-%r594 = or i416 %r588, %r593
-%r595 = zext i416 %r594 to i448
-%r597 = getelementptr i32, i32* %r4, i32 13
-%r598 = load i32, i32* %r597
-%r599 = zext i32 %r598 to i448
-%r600 = shl i448 %r599, 416
-%r601 = or i448 %r595, %r600
-%r602 = zext i448 %r601 to i480
-%r604 = getelementptr i32, i32* %r4, i32 14
-%r605 = load i32, i32* %r604
-%r606 = zext i32 %r605 to i480
-%r607 = shl i480 %r606, 448
-%r608 = or i480 %r602, %r607
-%r609 = zext i480 %r608 to i512
-%r611 = getelementptr i32, i32* %r4, i32 15
-%r612 = load i32, i32* %r611
-%r613 = zext i32 %r612 to i512
-%r614 = shl i512 %r613, 480
-%r615 = or i512 %r609, %r614
-%r616 = zext i512 %r615 to i544
-%r617 = sub i544 %r509, %r616
-%r618 = lshr i544 %r617, 512
-%r619 = trunc i544 %r618 to i1
-%r620 = select i1 %r619, i544 %r509, i544 %r617
-%r621 = trunc i544 %r620 to i512
-%r623 = getelementptr i32, i32* %r1, i32 16
-%r624 = trunc i512 %r621 to i32
-%r626 = getelementptr i32, i32* %r623, i32 0
-store i32 %r624, i32* %r626
-%r627 = lshr i512 %r621, 32
-%r628 = trunc i512 %r627 to i32
-%r630 = getelementptr i32, i32* %r623, i32 1
-store i32 %r628, i32* %r630
-%r631 = lshr i512 %r627, 32
-%r632 = trunc i512 %r631 to i32
-%r634 = getelementptr i32, i32* %r623, i32 2
-store i32 %r632, i32* %r634
-%r635 = lshr i512 %r631, 32
-%r636 = trunc i512 %r635 to i32
-%r638 = getelementptr i32, i32* %r623, i32 3
-store i32 %r636, i32* %r638
-%r639 = lshr i512 %r635, 32
-%r640 = trunc i512 %r639 to i32
-%r642 = getelementptr i32, i32* %r623, i32 4
-store i32 %r640, i32* %r642
-%r643 = lshr i512 %r639, 32
-%r644 = trunc i512 %r643 to i32
-%r646 = getelementptr i32, i32* %r623, i32 5
-store i32 %r644, i32* %r646
-%r647 = lshr i512 %r643, 32
-%r648 = trunc i512 %r647 to i32
-%r650 = getelementptr i32, i32* %r623, i32 6
-store i32 %r648, i32* %r650
-%r651 = lshr i512 %r647, 32
-%r652 = trunc i512 %r651 to i32
-%r654 = getelementptr i32, i32* %r623, i32 7
-store i32 %r652, i32* %r654
-%r655 = lshr i512 %r651, 32
-%r656 = trunc i512 %r655 to i32
-%r658 = getelementptr i32, i32* %r623, i32 8
-store i32 %r656, i32* %r658
-%r659 = lshr i512 %r655, 32
-%r660 = trunc i512 %r659 to i32
-%r662 = getelementptr i32, i32* %r623, i32 9
-store i32 %r660, i32* %r662
-%r663 = lshr i512 %r659, 32
-%r664 = trunc i512 %r663 to i32
-%r666 = getelementptr i32, i32* %r623, i32 10
-store i32 %r664, i32* %r666
-%r667 = lshr i512 %r663, 32
-%r668 = trunc i512 %r667 to i32
-%r670 = getelementptr i32, i32* %r623, i32 11
-store i32 %r668, i32* %r670
-%r671 = lshr i512 %r667, 32
-%r672 = trunc i512 %r671 to i32
-%r674 = getelementptr i32, i32* %r623, i32 12
-store i32 %r672, i32* %r674
-%r675 = lshr i512 %r671, 32
-%r676 = trunc i512 %r675 to i32
-%r678 = getelementptr i32, i32* %r623, i32 13
-store i32 %r676, i32* %r678
-%r679 = lshr i512 %r675, 32
-%r680 = trunc i512 %r679 to i32
-%r682 = getelementptr i32, i32* %r623, i32 14
-store i32 %r680, i32* %r682
-%r683 = lshr i512 %r679, 32
-%r684 = trunc i512 %r683 to i32
-%r686 = getelementptr i32, i32* %r623, i32 15
-store i32 %r684, i32* %r686
+%r592 = zext i32 %r591 to i576
+%r593 = shl i576 %r592, 544
+%r594 = or i576 %r588, %r593
+%r595 = add i576 %r472, %r594
+%r597 = getelementptr i32, i32* %r1, i32 6
+%r599 = getelementptr i32, i32* %r597, i32 0
+%r600 = trunc i576 %r595 to i32
+store i32 %r600, i32* %r599
+%r601 = lshr i576 %r595, 32
+%r603 = getelementptr i32, i32* %r597, i32 1
+%r604 = trunc i576 %r601 to i32
+store i32 %r604, i32* %r603
+%r605 = lshr i576 %r601, 32
+%r607 = getelementptr i32, i32* %r597, i32 2
+%r608 = trunc i576 %r605 to i32
+store i32 %r608, i32* %r607
+%r609 = lshr i576 %r605, 32
+%r611 = getelementptr i32, i32* %r597, i32 3
+%r612 = trunc i576 %r609 to i32
+store i32 %r612, i32* %r611
+%r613 = lshr i576 %r609, 32
+%r615 = getelementptr i32, i32* %r597, i32 4
+%r616 = trunc i576 %r613 to i32
+store i32 %r616, i32* %r615
+%r617 = lshr i576 %r613, 32
+%r619 = getelementptr i32, i32* %r597, i32 5
+%r620 = trunc i576 %r617 to i32
+store i32 %r620, i32* %r619
+%r621 = lshr i576 %r617, 32
+%r623 = getelementptr i32, i32* %r597, i32 6
+%r624 = trunc i576 %r621 to i32
+store i32 %r624, i32* %r623
+%r625 = lshr i576 %r621, 32
+%r627 = getelementptr i32, i32* %r597, i32 7
+%r628 = trunc i576 %r625 to i32
+store i32 %r628, i32* %r627
+%r629 = lshr i576 %r625, 32
+%r631 = getelementptr i32, i32* %r597, i32 8
+%r632 = trunc i576 %r629 to i32
+store i32 %r632, i32* %r631
+%r633 = lshr i576 %r629, 32
+%r635 = getelementptr i32, i32* %r597, i32 9
+%r636 = trunc i576 %r633 to i32
+store i32 %r636, i32* %r635
+%r637 = lshr i576 %r633, 32
+%r639 = getelementptr i32, i32* %r597, i32 10
+%r640 = trunc i576 %r637 to i32
+store i32 %r640, i32* %r639
+%r641 = lshr i576 %r637, 32
+%r643 = getelementptr i32, i32* %r597, i32 11
+%r644 = trunc i576 %r641 to i32
+store i32 %r644, i32* %r643
+%r645 = lshr i576 %r641, 32
+%r647 = getelementptr i32, i32* %r597, i32 12
+%r648 = trunc i576 %r645 to i32
+store i32 %r648, i32* %r647
+%r649 = lshr i576 %r645, 32
+%r651 = getelementptr i32, i32* %r597, i32 13
+%r652 = trunc i576 %r649 to i32
+store i32 %r652, i32* %r651
+%r653 = lshr i576 %r649, 32
+%r655 = getelementptr i32, i32* %r597, i32 14
+%r656 = trunc i576 %r653 to i32
+store i32 %r656, i32* %r655
+%r657 = lshr i576 %r653, 32
+%r659 = getelementptr i32, i32* %r597, i32 15
+%r660 = trunc i576 %r657 to i32
+store i32 %r660, i32* %r659
+%r661 = lshr i576 %r657, 32
+%r663 = getelementptr i32, i32* %r597, i32 16
+%r664 = trunc i576 %r661 to i32
+store i32 %r664, i32* %r663
+%r665 = lshr i576 %r661, 32
+%r667 = getelementptr i32, i32* %r597, i32 17
+%r668 = trunc i576 %r665 to i32
+store i32 %r668, i32* %r667
+ret void
+}
+define void @mcl_fp_mont12L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i416 @mulPv384x32(i32* %r2, i32 %r10)
+%r12 = zext i416 %r11 to i448
+%r13 = trunc i416 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i416 @mulPv384x32(i32* %r4, i32 %r14)
+%r16 = zext i416 %r15 to i448
+%r17 = add i448 %r12, %r16
+%r18 = lshr i448 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i416 @mulPv384x32(i32* %r2, i32 %r21)
+%r23 = zext i416 %r22 to i448
+%r24 = add i448 %r18, %r23
+%r25 = trunc i448 %r24 to i32
+%r26 = mul i32 %r25, %r7
+%r27 = call i416 @mulPv384x32(i32* %r4, i32 %r26)
+%r28 = zext i416 %r27 to i448
+%r29 = add i448 %r24, %r28
+%r30 = lshr i448 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i416 @mulPv384x32(i32* %r2, i32 %r33)
+%r35 = zext i416 %r34 to i448
+%r36 = add i448 %r30, %r35
+%r37 = trunc i448 %r36 to i32
+%r38 = mul i32 %r37, %r7
+%r39 = call i416 @mulPv384x32(i32* %r4, i32 %r38)
+%r40 = zext i416 %r39 to i448
+%r41 = add i448 %r36, %r40
+%r42 = lshr i448 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i416 @mulPv384x32(i32* %r2, i32 %r45)
+%r47 = zext i416 %r46 to i448
+%r48 = add i448 %r42, %r47
+%r49 = trunc i448 %r48 to i32
+%r50 = mul i32 %r49, %r7
+%r51 = call i416 @mulPv384x32(i32* %r4, i32 %r50)
+%r52 = zext i416 %r51 to i448
+%r53 = add i448 %r48, %r52
+%r54 = lshr i448 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i416 @mulPv384x32(i32* %r2, i32 %r57)
+%r59 = zext i416 %r58 to i448
+%r60 = add i448 %r54, %r59
+%r61 = trunc i448 %r60 to i32
+%r62 = mul i32 %r61, %r7
+%r63 = call i416 @mulPv384x32(i32* %r4, i32 %r62)
+%r64 = zext i416 %r63 to i448
+%r65 = add i448 %r60, %r64
+%r66 = lshr i448 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i416 @mulPv384x32(i32* %r2, i32 %r69)
+%r71 = zext i416 %r70 to i448
+%r72 = add i448 %r66, %r71
+%r73 = trunc i448 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i416 @mulPv384x32(i32* %r4, i32 %r74)
+%r76 = zext i416 %r75 to i448
+%r77 = add i448 %r72, %r76
+%r78 = lshr i448 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
+%r81 = load i32, i32* %r80
+%r82 = call i416 @mulPv384x32(i32* %r2, i32 %r81)
+%r83 = zext i416 %r82 to i448
+%r84 = add i448 %r78, %r83
+%r85 = trunc i448 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i416 @mulPv384x32(i32* %r4, i32 %r86)
+%r88 = zext i416 %r87 to i448
+%r89 = add i448 %r84, %r88
+%r90 = lshr i448 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i416 @mulPv384x32(i32* %r2, i32 %r93)
+%r95 = zext i416 %r94 to i448
+%r96 = add i448 %r90, %r95
+%r97 = trunc i448 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i416 @mulPv384x32(i32* %r4, i32 %r98)
+%r100 = zext i416 %r99 to i448
+%r101 = add i448 %r96, %r100
+%r102 = lshr i448 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i416 @mulPv384x32(i32* %r2, i32 %r105)
+%r107 = zext i416 %r106 to i448
+%r108 = add i448 %r102, %r107
+%r109 = trunc i448 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i416 @mulPv384x32(i32* %r4, i32 %r110)
+%r112 = zext i416 %r111 to i448
+%r113 = add i448 %r108, %r112
+%r114 = lshr i448 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 9
+%r117 = load i32, i32* %r116
+%r118 = call i416 @mulPv384x32(i32* %r2, i32 %r117)
+%r119 = zext i416 %r118 to i448
+%r120 = add i448 %r114, %r119
+%r121 = trunc i448 %r120 to i32
+%r122 = mul i32 %r121, %r7
+%r123 = call i416 @mulPv384x32(i32* %r4, i32 %r122)
+%r124 = zext i416 %r123 to i448
+%r125 = add i448 %r120, %r124
+%r126 = lshr i448 %r125, 32
+%r128 = getelementptr i32, i32* %r3, i32 10
+%r129 = load i32, i32* %r128
+%r130 = call i416 @mulPv384x32(i32* %r2, i32 %r129)
+%r131 = zext i416 %r130 to i448
+%r132 = add i448 %r126, %r131
+%r133 = trunc i448 %r132 to i32
+%r134 = mul i32 %r133, %r7
+%r135 = call i416 @mulPv384x32(i32* %r4, i32 %r134)
+%r136 = zext i416 %r135 to i448
+%r137 = add i448 %r132, %r136
+%r138 = lshr i448 %r137, 32
+%r140 = getelementptr i32, i32* %r3, i32 11
+%r141 = load i32, i32* %r140
+%r142 = call i416 @mulPv384x32(i32* %r2, i32 %r141)
+%r143 = zext i416 %r142 to i448
+%r144 = add i448 %r138, %r143
+%r145 = trunc i448 %r144 to i32
+%r146 = mul i32 %r145, %r7
+%r147 = call i416 @mulPv384x32(i32* %r4, i32 %r146)
+%r148 = zext i416 %r147 to i448
+%r149 = add i448 %r144, %r148
+%r150 = lshr i448 %r149, 32
+%r151 = trunc i448 %r150 to i416
+%r152 = load i32, i32* %r4
+%r153 = zext i32 %r152 to i64
+%r155 = getelementptr i32, i32* %r4, i32 1
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i64
+%r158 = shl i64 %r157, 32
+%r159 = or i64 %r153, %r158
+%r160 = zext i64 %r159 to i96
+%r162 = getelementptr i32, i32* %r4, i32 2
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i96
+%r165 = shl i96 %r164, 64
+%r166 = or i96 %r160, %r165
+%r167 = zext i96 %r166 to i128
+%r169 = getelementptr i32, i32* %r4, i32 3
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i128
+%r172 = shl i128 %r171, 96
+%r173 = or i128 %r167, %r172
+%r174 = zext i128 %r173 to i160
+%r176 = getelementptr i32, i32* %r4, i32 4
+%r177 = load i32, i32* %r176
+%r178 = zext i32 %r177 to i160
+%r179 = shl i160 %r178, 128
+%r180 = or i160 %r174, %r179
+%r181 = zext i160 %r180 to i192
+%r183 = getelementptr i32, i32* %r4, i32 5
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i192
+%r186 = shl i192 %r185, 160
+%r187 = or i192 %r181, %r186
+%r188 = zext i192 %r187 to i224
+%r190 = getelementptr i32, i32* %r4, i32 6
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i224
+%r193 = shl i224 %r192, 192
+%r194 = or i224 %r188, %r193
+%r195 = zext i224 %r194 to i256
+%r197 = getelementptr i32, i32* %r4, i32 7
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i256
+%r200 = shl i256 %r199, 224
+%r201 = or i256 %r195, %r200
+%r202 = zext i256 %r201 to i288
+%r204 = getelementptr i32, i32* %r4, i32 8
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i288
+%r207 = shl i288 %r206, 256
+%r208 = or i288 %r202, %r207
+%r209 = zext i288 %r208 to i320
+%r211 = getelementptr i32, i32* %r4, i32 9
+%r212 = load i32, i32* %r211
+%r213 = zext i32 %r212 to i320
+%r214 = shl i320 %r213, 288
+%r215 = or i320 %r209, %r214
+%r216 = zext i320 %r215 to i352
+%r218 = getelementptr i32, i32* %r4, i32 10
+%r219 = load i32, i32* %r218
+%r220 = zext i32 %r219 to i352
+%r221 = shl i352 %r220, 320
+%r222 = or i352 %r216, %r221
+%r223 = zext i352 %r222 to i384
+%r225 = getelementptr i32, i32* %r4, i32 11
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i384
+%r228 = shl i384 %r227, 352
+%r229 = or i384 %r223, %r228
+%r230 = zext i384 %r229 to i416
+%r231 = sub i416 %r151, %r230
+%r232 = lshr i416 %r231, 384
+%r233 = trunc i416 %r232 to i1
+%r234 = select i1 %r233, i416 %r151, i416 %r231
+%r235 = trunc i416 %r234 to i384
+%r237 = getelementptr i32, i32* %r1, i32 0
+%r238 = trunc i384 %r235 to i32
+store i32 %r238, i32* %r237
+%r239 = lshr i384 %r235, 32
+%r241 = getelementptr i32, i32* %r1, i32 1
+%r242 = trunc i384 %r239 to i32
+store i32 %r242, i32* %r241
+%r243 = lshr i384 %r239, 32
+%r245 = getelementptr i32, i32* %r1, i32 2
+%r246 = trunc i384 %r243 to i32
+store i32 %r246, i32* %r245
+%r247 = lshr i384 %r243, 32
+%r249 = getelementptr i32, i32* %r1, i32 3
+%r250 = trunc i384 %r247 to i32
+store i32 %r250, i32* %r249
+%r251 = lshr i384 %r247, 32
+%r253 = getelementptr i32, i32* %r1, i32 4
+%r254 = trunc i384 %r251 to i32
+store i32 %r254, i32* %r253
+%r255 = lshr i384 %r251, 32
+%r257 = getelementptr i32, i32* %r1, i32 5
+%r258 = trunc i384 %r255 to i32
+store i32 %r258, i32* %r257
+%r259 = lshr i384 %r255, 32
+%r261 = getelementptr i32, i32* %r1, i32 6
+%r262 = trunc i384 %r259 to i32
+store i32 %r262, i32* %r261
+%r263 = lshr i384 %r259, 32
+%r265 = getelementptr i32, i32* %r1, i32 7
+%r266 = trunc i384 %r263 to i32
+store i32 %r266, i32* %r265
+%r267 = lshr i384 %r263, 32
+%r269 = getelementptr i32, i32* %r1, i32 8
+%r270 = trunc i384 %r267 to i32
+store i32 %r270, i32* %r269
+%r271 = lshr i384 %r267, 32
+%r273 = getelementptr i32, i32* %r1, i32 9
+%r274 = trunc i384 %r271 to i32
+store i32 %r274, i32* %r273
+%r275 = lshr i384 %r271, 32
+%r277 = getelementptr i32, i32* %r1, i32 10
+%r278 = trunc i384 %r275 to i32
+store i32 %r278, i32* %r277
+%r279 = lshr i384 %r275, 32
+%r281 = getelementptr i32, i32* %r1, i32 11
+%r282 = trunc i384 %r279 to i32
+store i32 %r282, i32* %r281
+ret void
+}
+define void @mcl_fp_montNF12L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+{
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r8 = load i32, i32* %r3
+%r9 = call i416 @mulPv384x32(i32* %r2, i32 %r8)
+%r10 = trunc i416 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i416 @mulPv384x32(i32* %r4, i32 %r11)
+%r13 = add i416 %r9, %r12
+%r14 = lshr i416 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i416 @mulPv384x32(i32* %r2, i32 %r17)
+%r19 = add i416 %r14, %r18
+%r20 = trunc i416 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i416 @mulPv384x32(i32* %r4, i32 %r21)
+%r23 = add i416 %r19, %r22
+%r24 = lshr i416 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i416 @mulPv384x32(i32* %r2, i32 %r27)
+%r29 = add i416 %r24, %r28
+%r30 = trunc i416 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i416 @mulPv384x32(i32* %r4, i32 %r31)
+%r33 = add i416 %r29, %r32
+%r34 = lshr i416 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i416 @mulPv384x32(i32* %r2, i32 %r37)
+%r39 = add i416 %r34, %r38
+%r40 = trunc i416 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i416 @mulPv384x32(i32* %r4, i32 %r41)
+%r43 = add i416 %r39, %r42
+%r44 = lshr i416 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i416 @mulPv384x32(i32* %r2, i32 %r47)
+%r49 = add i416 %r44, %r48
+%r50 = trunc i416 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i416 @mulPv384x32(i32* %r4, i32 %r51)
+%r53 = add i416 %r49, %r52
+%r54 = lshr i416 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i416 @mulPv384x32(i32* %r2, i32 %r57)
+%r59 = add i416 %r54, %r58
+%r60 = trunc i416 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i416 @mulPv384x32(i32* %r4, i32 %r61)
+%r63 = add i416 %r59, %r62
+%r64 = lshr i416 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i416 @mulPv384x32(i32* %r2, i32 %r67)
+%r69 = add i416 %r64, %r68
+%r70 = trunc i416 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i416 @mulPv384x32(i32* %r4, i32 %r71)
+%r73 = add i416 %r69, %r72
+%r74 = lshr i416 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i416 @mulPv384x32(i32* %r2, i32 %r77)
+%r79 = add i416 %r74, %r78
+%r80 = trunc i416 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i416 @mulPv384x32(i32* %r4, i32 %r81)
+%r83 = add i416 %r79, %r82
+%r84 = lshr i416 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i416 @mulPv384x32(i32* %r2, i32 %r87)
+%r89 = add i416 %r84, %r88
+%r90 = trunc i416 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i416 @mulPv384x32(i32* %r4, i32 %r91)
+%r93 = add i416 %r89, %r92
+%r94 = lshr i416 %r93, 32
+%r96 = getelementptr i32, i32* %r3, i32 9
+%r97 = load i32, i32* %r96
+%r98 = call i416 @mulPv384x32(i32* %r2, i32 %r97)
+%r99 = add i416 %r94, %r98
+%r100 = trunc i416 %r99 to i32
+%r101 = mul i32 %r100, %r7
+%r102 = call i416 @mulPv384x32(i32* %r4, i32 %r101)
+%r103 = add i416 %r99, %r102
+%r104 = lshr i416 %r103, 32
+%r106 = getelementptr i32, i32* %r3, i32 10
+%r107 = load i32, i32* %r106
+%r108 = call i416 @mulPv384x32(i32* %r2, i32 %r107)
+%r109 = add i416 %r104, %r108
+%r110 = trunc i416 %r109 to i32
+%r111 = mul i32 %r110, %r7
+%r112 = call i416 @mulPv384x32(i32* %r4, i32 %r111)
+%r113 = add i416 %r109, %r112
+%r114 = lshr i416 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 11
+%r117 = load i32, i32* %r116
+%r118 = call i416 @mulPv384x32(i32* %r2, i32 %r117)
+%r119 = add i416 %r114, %r118
+%r120 = trunc i416 %r119 to i32
+%r121 = mul i32 %r120, %r7
+%r122 = call i416 @mulPv384x32(i32* %r4, i32 %r121)
+%r123 = add i416 %r119, %r122
+%r124 = lshr i416 %r123, 32
+%r125 = trunc i416 %r124 to i384
+%r126 = load i32, i32* %r4
+%r127 = zext i32 %r126 to i64
+%r129 = getelementptr i32, i32* %r4, i32 1
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i64
+%r132 = shl i64 %r131, 32
+%r133 = or i64 %r127, %r132
+%r134 = zext i64 %r133 to i96
+%r136 = getelementptr i32, i32* %r4, i32 2
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i96
+%r139 = shl i96 %r138, 64
+%r140 = or i96 %r134, %r139
+%r141 = zext i96 %r140 to i128
+%r143 = getelementptr i32, i32* %r4, i32 3
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i128
+%r146 = shl i128 %r145, 96
+%r147 = or i128 %r141, %r146
+%r148 = zext i128 %r147 to i160
+%r150 = getelementptr i32, i32* %r4, i32 4
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i160
+%r153 = shl i160 %r152, 128
+%r154 = or i160 %r148, %r153
+%r155 = zext i160 %r154 to i192
+%r157 = getelementptr i32, i32* %r4, i32 5
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i192
+%r160 = shl i192 %r159, 160
+%r161 = or i192 %r155, %r160
+%r162 = zext i192 %r161 to i224
+%r164 = getelementptr i32, i32* %r4, i32 6
+%r165 = load i32, i32* %r164
+%r166 = zext i32 %r165 to i224
+%r167 = shl i224 %r166, 192
+%r168 = or i224 %r162, %r167
+%r169 = zext i224 %r168 to i256
+%r171 = getelementptr i32, i32* %r4, i32 7
+%r172 = load i32, i32* %r171
+%r173 = zext i32 %r172 to i256
+%r174 = shl i256 %r173, 224
+%r175 = or i256 %r169, %r174
+%r176 = zext i256 %r175 to i288
+%r178 = getelementptr i32, i32* %r4, i32 8
+%r179 = load i32, i32* %r178
+%r180 = zext i32 %r179 to i288
+%r181 = shl i288 %r180, 256
+%r182 = or i288 %r176, %r181
+%r183 = zext i288 %r182 to i320
+%r185 = getelementptr i32, i32* %r4, i32 9
+%r186 = load i32, i32* %r185
+%r187 = zext i32 %r186 to i320
+%r188 = shl i320 %r187, 288
+%r189 = or i320 %r183, %r188
+%r190 = zext i320 %r189 to i352
+%r192 = getelementptr i32, i32* %r4, i32 10
+%r193 = load i32, i32* %r192
+%r194 = zext i32 %r193 to i352
+%r195 = shl i352 %r194, 320
+%r196 = or i352 %r190, %r195
+%r197 = zext i352 %r196 to i384
+%r199 = getelementptr i32, i32* %r4, i32 11
+%r200 = load i32, i32* %r199
+%r201 = zext i32 %r200 to i384
+%r202 = shl i384 %r201, 352
+%r203 = or i384 %r197, %r202
+%r204 = sub i384 %r125, %r203
+%r205 = lshr i384 %r204, 383
+%r206 = trunc i384 %r205 to i1
+%r207 = select i1 %r206, i384 %r125, i384 %r204
+%r209 = getelementptr i32, i32* %r1, i32 0
+%r210 = trunc i384 %r207 to i32
+store i32 %r210, i32* %r209
+%r211 = lshr i384 %r207, 32
+%r213 = getelementptr i32, i32* %r1, i32 1
+%r214 = trunc i384 %r211 to i32
+store i32 %r214, i32* %r213
+%r215 = lshr i384 %r211, 32
+%r217 = getelementptr i32, i32* %r1, i32 2
+%r218 = trunc i384 %r215 to i32
+store i32 %r218, i32* %r217
+%r219 = lshr i384 %r215, 32
+%r221 = getelementptr i32, i32* %r1, i32 3
+%r222 = trunc i384 %r219 to i32
+store i32 %r222, i32* %r221
+%r223 = lshr i384 %r219, 32
+%r225 = getelementptr i32, i32* %r1, i32 4
+%r226 = trunc i384 %r223 to i32
+store i32 %r226, i32* %r225
+%r227 = lshr i384 %r223, 32
+%r229 = getelementptr i32, i32* %r1, i32 5
+%r230 = trunc i384 %r227 to i32
+store i32 %r230, i32* %r229
+%r231 = lshr i384 %r227, 32
+%r233 = getelementptr i32, i32* %r1, i32 6
+%r234 = trunc i384 %r231 to i32
+store i32 %r234, i32* %r233
+%r235 = lshr i384 %r231, 32
+%r237 = getelementptr i32, i32* %r1, i32 7
+%r238 = trunc i384 %r235 to i32
+store i32 %r238, i32* %r237
+%r239 = lshr i384 %r235, 32
+%r241 = getelementptr i32, i32* %r1, i32 8
+%r242 = trunc i384 %r239 to i32
+store i32 %r242, i32* %r241
+%r243 = lshr i384 %r239, 32
+%r245 = getelementptr i32, i32* %r1, i32 9
+%r246 = trunc i384 %r243 to i32
+store i32 %r246, i32* %r245
+%r247 = lshr i384 %r243, 32
+%r249 = getelementptr i32, i32* %r1, i32 10
+%r250 = trunc i384 %r247 to i32
+store i32 %r250, i32* %r249
+%r251 = lshr i384 %r247, 32
+%r253 = getelementptr i32, i32* %r1, i32 11
+%r254 = trunc i384 %r251 to i32
+store i32 %r254, i32* %r253
+ret void
+}
+define void @mcl_fp_montRed12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i352
+%r73 = getelementptr i32, i32* %r3, i32 10
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i352
+%r76 = shl i352 %r75, 320
+%r77 = or i352 %r71, %r76
+%r78 = zext i352 %r77 to i384
+%r80 = getelementptr i32, i32* %r3, i32 11
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i384
+%r83 = shl i384 %r82, 352
+%r84 = or i384 %r78, %r83
+%r85 = load i32, i32* %r2
+%r86 = zext i32 %r85 to i64
+%r88 = getelementptr i32, i32* %r2, i32 1
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i64
+%r91 = shl i64 %r90, 32
+%r92 = or i64 %r86, %r91
+%r93 = zext i64 %r92 to i96
+%r95 = getelementptr i32, i32* %r2, i32 2
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i96
+%r98 = shl i96 %r97, 64
+%r99 = or i96 %r93, %r98
+%r100 = zext i96 %r99 to i128
+%r102 = getelementptr i32, i32* %r2, i32 3
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i128
+%r105 = shl i128 %r104, 96
+%r106 = or i128 %r100, %r105
+%r107 = zext i128 %r106 to i160
+%r109 = getelementptr i32, i32* %r2, i32 4
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i160
+%r112 = shl i160 %r111, 128
+%r113 = or i160 %r107, %r112
+%r114 = zext i160 %r113 to i192
+%r116 = getelementptr i32, i32* %r2, i32 5
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i192
+%r119 = shl i192 %r118, 160
+%r120 = or i192 %r114, %r119
+%r121 = zext i192 %r120 to i224
+%r123 = getelementptr i32, i32* %r2, i32 6
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i224
+%r126 = shl i224 %r125, 192
+%r127 = or i224 %r121, %r126
+%r128 = zext i224 %r127 to i256
+%r130 = getelementptr i32, i32* %r2, i32 7
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i256
+%r133 = shl i256 %r132, 224
+%r134 = or i256 %r128, %r133
+%r135 = zext i256 %r134 to i288
+%r137 = getelementptr i32, i32* %r2, i32 8
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i288
+%r140 = shl i288 %r139, 256
+%r141 = or i288 %r135, %r140
+%r142 = zext i288 %r141 to i320
+%r144 = getelementptr i32, i32* %r2, i32 9
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i320
+%r147 = shl i320 %r146, 288
+%r148 = or i320 %r142, %r147
+%r149 = zext i320 %r148 to i352
+%r151 = getelementptr i32, i32* %r2, i32 10
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i352
+%r154 = shl i352 %r153, 320
+%r155 = or i352 %r149, %r154
+%r156 = zext i352 %r155 to i384
+%r158 = getelementptr i32, i32* %r2, i32 11
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i384
+%r161 = shl i384 %r160, 352
+%r162 = or i384 %r156, %r161
+%r163 = trunc i384 %r162 to i32
+%r164 = mul i32 %r163, %r6
+%r165 = call i416 @mulPv384x32(i32* %r3, i32 %r164)
+%r167 = getelementptr i32, i32* %r2, i32 12
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i416
+%r170 = shl i416 %r169, 384
+%r171 = zext i384 %r162 to i416
+%r172 = or i416 %r170, %r171
+%r173 = zext i416 %r172 to i448
+%r174 = zext i416 %r165 to i448
+%r175 = add i448 %r173, %r174
+%r176 = lshr i448 %r175, 32
+%r177 = trunc i448 %r176 to i416
+%r178 = lshr i416 %r177, 384
+%r179 = trunc i416 %r178 to i32
+%r180 = trunc i416 %r177 to i384
+%r181 = trunc i384 %r180 to i32
+%r182 = mul i32 %r181, %r6
+%r183 = call i416 @mulPv384x32(i32* %r3, i32 %r182)
+%r184 = zext i32 %r179 to i416
+%r185 = shl i416 %r184, 384
+%r186 = add i416 %r183, %r185
+%r188 = getelementptr i32, i32* %r2, i32 13
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i416
+%r191 = shl i416 %r190, 384
+%r192 = zext i384 %r180 to i416
+%r193 = or i416 %r191, %r192
+%r194 = zext i416 %r193 to i448
+%r195 = zext i416 %r186 to i448
+%r196 = add i448 %r194, %r195
+%r197 = lshr i448 %r196, 32
+%r198 = trunc i448 %r197 to i416
+%r199 = lshr i416 %r198, 384
+%r200 = trunc i416 %r199 to i32
+%r201 = trunc i416 %r198 to i384
+%r202 = trunc i384 %r201 to i32
+%r203 = mul i32 %r202, %r6
+%r204 = call i416 @mulPv384x32(i32* %r3, i32 %r203)
+%r205 = zext i32 %r200 to i416
+%r206 = shl i416 %r205, 384
+%r207 = add i416 %r204, %r206
+%r209 = getelementptr i32, i32* %r2, i32 14
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i416
+%r212 = shl i416 %r211, 384
+%r213 = zext i384 %r201 to i416
+%r214 = or i416 %r212, %r213
+%r215 = zext i416 %r214 to i448
+%r216 = zext i416 %r207 to i448
+%r217 = add i448 %r215, %r216
+%r218 = lshr i448 %r217, 32
+%r219 = trunc i448 %r218 to i416
+%r220 = lshr i416 %r219, 384
+%r221 = trunc i416 %r220 to i32
+%r222 = trunc i416 %r219 to i384
+%r223 = trunc i384 %r222 to i32
+%r224 = mul i32 %r223, %r6
+%r225 = call i416 @mulPv384x32(i32* %r3, i32 %r224)
+%r226 = zext i32 %r221 to i416
+%r227 = shl i416 %r226, 384
+%r228 = add i416 %r225, %r227
+%r230 = getelementptr i32, i32* %r2, i32 15
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i416
+%r233 = shl i416 %r232, 384
+%r234 = zext i384 %r222 to i416
+%r235 = or i416 %r233, %r234
+%r236 = zext i416 %r235 to i448
+%r237 = zext i416 %r228 to i448
+%r238 = add i448 %r236, %r237
+%r239 = lshr i448 %r238, 32
+%r240 = trunc i448 %r239 to i416
+%r241 = lshr i416 %r240, 384
+%r242 = trunc i416 %r241 to i32
+%r243 = trunc i416 %r240 to i384
+%r244 = trunc i384 %r243 to i32
+%r245 = mul i32 %r244, %r6
+%r246 = call i416 @mulPv384x32(i32* %r3, i32 %r245)
+%r247 = zext i32 %r242 to i416
+%r248 = shl i416 %r247, 384
+%r249 = add i416 %r246, %r248
+%r251 = getelementptr i32, i32* %r2, i32 16
+%r252 = load i32, i32* %r251
+%r253 = zext i32 %r252 to i416
+%r254 = shl i416 %r253, 384
+%r255 = zext i384 %r243 to i416
+%r256 = or i416 %r254, %r255
+%r257 = zext i416 %r256 to i448
+%r258 = zext i416 %r249 to i448
+%r259 = add i448 %r257, %r258
+%r260 = lshr i448 %r259, 32
+%r261 = trunc i448 %r260 to i416
+%r262 = lshr i416 %r261, 384
+%r263 = trunc i416 %r262 to i32
+%r264 = trunc i416 %r261 to i384
+%r265 = trunc i384 %r264 to i32
+%r266 = mul i32 %r265, %r6
+%r267 = call i416 @mulPv384x32(i32* %r3, i32 %r266)
+%r268 = zext i32 %r263 to i416
+%r269 = shl i416 %r268, 384
+%r270 = add i416 %r267, %r269
+%r272 = getelementptr i32, i32* %r2, i32 17
+%r273 = load i32, i32* %r272
+%r274 = zext i32 %r273 to i416
+%r275 = shl i416 %r274, 384
+%r276 = zext i384 %r264 to i416
+%r277 = or i416 %r275, %r276
+%r278 = zext i416 %r277 to i448
+%r279 = zext i416 %r270 to i448
+%r280 = add i448 %r278, %r279
+%r281 = lshr i448 %r280, 32
+%r282 = trunc i448 %r281 to i416
+%r283 = lshr i416 %r282, 384
+%r284 = trunc i416 %r283 to i32
+%r285 = trunc i416 %r282 to i384
+%r286 = trunc i384 %r285 to i32
+%r287 = mul i32 %r286, %r6
+%r288 = call i416 @mulPv384x32(i32* %r3, i32 %r287)
+%r289 = zext i32 %r284 to i416
+%r290 = shl i416 %r289, 384
+%r291 = add i416 %r288, %r290
+%r293 = getelementptr i32, i32* %r2, i32 18
+%r294 = load i32, i32* %r293
+%r295 = zext i32 %r294 to i416
+%r296 = shl i416 %r295, 384
+%r297 = zext i384 %r285 to i416
+%r298 = or i416 %r296, %r297
+%r299 = zext i416 %r298 to i448
+%r300 = zext i416 %r291 to i448
+%r301 = add i448 %r299, %r300
+%r302 = lshr i448 %r301, 32
+%r303 = trunc i448 %r302 to i416
+%r304 = lshr i416 %r303, 384
+%r305 = trunc i416 %r304 to i32
+%r306 = trunc i416 %r303 to i384
+%r307 = trunc i384 %r306 to i32
+%r308 = mul i32 %r307, %r6
+%r309 = call i416 @mulPv384x32(i32* %r3, i32 %r308)
+%r310 = zext i32 %r305 to i416
+%r311 = shl i416 %r310, 384
+%r312 = add i416 %r309, %r311
+%r314 = getelementptr i32, i32* %r2, i32 19
+%r315 = load i32, i32* %r314
+%r316 = zext i32 %r315 to i416
+%r317 = shl i416 %r316, 384
+%r318 = zext i384 %r306 to i416
+%r319 = or i416 %r317, %r318
+%r320 = zext i416 %r319 to i448
+%r321 = zext i416 %r312 to i448
+%r322 = add i448 %r320, %r321
+%r323 = lshr i448 %r322, 32
+%r324 = trunc i448 %r323 to i416
+%r325 = lshr i416 %r324, 384
+%r326 = trunc i416 %r325 to i32
+%r327 = trunc i416 %r324 to i384
+%r328 = trunc i384 %r327 to i32
+%r329 = mul i32 %r328, %r6
+%r330 = call i416 @mulPv384x32(i32* %r3, i32 %r329)
+%r331 = zext i32 %r326 to i416
+%r332 = shl i416 %r331, 384
+%r333 = add i416 %r330, %r332
+%r335 = getelementptr i32, i32* %r2, i32 20
+%r336 = load i32, i32* %r335
+%r337 = zext i32 %r336 to i416
+%r338 = shl i416 %r337, 384
+%r339 = zext i384 %r327 to i416
+%r340 = or i416 %r338, %r339
+%r341 = zext i416 %r340 to i448
+%r342 = zext i416 %r333 to i448
+%r343 = add i448 %r341, %r342
+%r344 = lshr i448 %r343, 32
+%r345 = trunc i448 %r344 to i416
+%r346 = lshr i416 %r345, 384
+%r347 = trunc i416 %r346 to i32
+%r348 = trunc i416 %r345 to i384
+%r349 = trunc i384 %r348 to i32
+%r350 = mul i32 %r349, %r6
+%r351 = call i416 @mulPv384x32(i32* %r3, i32 %r350)
+%r352 = zext i32 %r347 to i416
+%r353 = shl i416 %r352, 384
+%r354 = add i416 %r351, %r353
+%r356 = getelementptr i32, i32* %r2, i32 21
+%r357 = load i32, i32* %r356
+%r358 = zext i32 %r357 to i416
+%r359 = shl i416 %r358, 384
+%r360 = zext i384 %r348 to i416
+%r361 = or i416 %r359, %r360
+%r362 = zext i416 %r361 to i448
+%r363 = zext i416 %r354 to i448
+%r364 = add i448 %r362, %r363
+%r365 = lshr i448 %r364, 32
+%r366 = trunc i448 %r365 to i416
+%r367 = lshr i416 %r366, 384
+%r368 = trunc i416 %r367 to i32
+%r369 = trunc i416 %r366 to i384
+%r370 = trunc i384 %r369 to i32
+%r371 = mul i32 %r370, %r6
+%r372 = call i416 @mulPv384x32(i32* %r3, i32 %r371)
+%r373 = zext i32 %r368 to i416
+%r374 = shl i416 %r373, 384
+%r375 = add i416 %r372, %r374
+%r377 = getelementptr i32, i32* %r2, i32 22
+%r378 = load i32, i32* %r377
+%r379 = zext i32 %r378 to i416
+%r380 = shl i416 %r379, 384
+%r381 = zext i384 %r369 to i416
+%r382 = or i416 %r380, %r381
+%r383 = zext i416 %r382 to i448
+%r384 = zext i416 %r375 to i448
+%r385 = add i448 %r383, %r384
+%r386 = lshr i448 %r385, 32
+%r387 = trunc i448 %r386 to i416
+%r388 = lshr i416 %r387, 384
+%r389 = trunc i416 %r388 to i32
+%r390 = trunc i416 %r387 to i384
+%r391 = trunc i384 %r390 to i32
+%r392 = mul i32 %r391, %r6
+%r393 = call i416 @mulPv384x32(i32* %r3, i32 %r392)
+%r394 = zext i32 %r389 to i416
+%r395 = shl i416 %r394, 384
+%r396 = add i416 %r393, %r395
+%r398 = getelementptr i32, i32* %r2, i32 23
+%r399 = load i32, i32* %r398
+%r400 = zext i32 %r399 to i416
+%r401 = shl i416 %r400, 384
+%r402 = zext i384 %r390 to i416
+%r403 = or i416 %r401, %r402
+%r404 = zext i416 %r403 to i448
+%r405 = zext i416 %r396 to i448
+%r406 = add i448 %r404, %r405
+%r407 = lshr i448 %r406, 32
+%r408 = trunc i448 %r407 to i416
+%r409 = lshr i416 %r408, 384
+%r410 = trunc i416 %r409 to i32
+%r411 = trunc i416 %r408 to i384
+%r412 = zext i384 %r84 to i416
+%r413 = zext i384 %r411 to i416
+%r414 = sub i416 %r413, %r412
+%r415 = lshr i416 %r414, 384
+%r416 = trunc i416 %r415 to i1
+%r417 = select i1 %r416, i416 %r413, i416 %r414
+%r418 = trunc i416 %r417 to i384
+%r420 = getelementptr i32, i32* %r1, i32 0
+%r421 = trunc i384 %r418 to i32
+store i32 %r421, i32* %r420
+%r422 = lshr i384 %r418, 32
+%r424 = getelementptr i32, i32* %r1, i32 1
+%r425 = trunc i384 %r422 to i32
+store i32 %r425, i32* %r424
+%r426 = lshr i384 %r422, 32
+%r428 = getelementptr i32, i32* %r1, i32 2
+%r429 = trunc i384 %r426 to i32
+store i32 %r429, i32* %r428
+%r430 = lshr i384 %r426, 32
+%r432 = getelementptr i32, i32* %r1, i32 3
+%r433 = trunc i384 %r430 to i32
+store i32 %r433, i32* %r432
+%r434 = lshr i384 %r430, 32
+%r436 = getelementptr i32, i32* %r1, i32 4
+%r437 = trunc i384 %r434 to i32
+store i32 %r437, i32* %r436
+%r438 = lshr i384 %r434, 32
+%r440 = getelementptr i32, i32* %r1, i32 5
+%r441 = trunc i384 %r438 to i32
+store i32 %r441, i32* %r440
+%r442 = lshr i384 %r438, 32
+%r444 = getelementptr i32, i32* %r1, i32 6
+%r445 = trunc i384 %r442 to i32
+store i32 %r445, i32* %r444
+%r446 = lshr i384 %r442, 32
+%r448 = getelementptr i32, i32* %r1, i32 7
+%r449 = trunc i384 %r446 to i32
+store i32 %r449, i32* %r448
+%r450 = lshr i384 %r446, 32
+%r452 = getelementptr i32, i32* %r1, i32 8
+%r453 = trunc i384 %r450 to i32
+store i32 %r453, i32* %r452
+%r454 = lshr i384 %r450, 32
+%r456 = getelementptr i32, i32* %r1, i32 9
+%r457 = trunc i384 %r454 to i32
+store i32 %r457, i32* %r456
+%r458 = lshr i384 %r454, 32
+%r460 = getelementptr i32, i32* %r1, i32 10
+%r461 = trunc i384 %r458 to i32
+store i32 %r461, i32* %r460
+%r462 = lshr i384 %r458, 32
+%r464 = getelementptr i32, i32* %r1, i32 11
+%r465 = trunc i384 %r462 to i32
+store i32 %r465, i32* %r464
+ret void
+}
+define void @mcl_fp_montRedNF12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
+%r67 = load i32, i32* %r66
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i352
+%r73 = getelementptr i32, i32* %r3, i32 10
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i352
+%r76 = shl i352 %r75, 320
+%r77 = or i352 %r71, %r76
+%r78 = zext i352 %r77 to i384
+%r80 = getelementptr i32, i32* %r3, i32 11
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i384
+%r83 = shl i384 %r82, 352
+%r84 = or i384 %r78, %r83
+%r85 = load i32, i32* %r2
+%r86 = zext i32 %r85 to i64
+%r88 = getelementptr i32, i32* %r2, i32 1
+%r89 = load i32, i32* %r88
+%r90 = zext i32 %r89 to i64
+%r91 = shl i64 %r90, 32
+%r92 = or i64 %r86, %r91
+%r93 = zext i64 %r92 to i96
+%r95 = getelementptr i32, i32* %r2, i32 2
+%r96 = load i32, i32* %r95
+%r97 = zext i32 %r96 to i96
+%r98 = shl i96 %r97, 64
+%r99 = or i96 %r93, %r98
+%r100 = zext i96 %r99 to i128
+%r102 = getelementptr i32, i32* %r2, i32 3
+%r103 = load i32, i32* %r102
+%r104 = zext i32 %r103 to i128
+%r105 = shl i128 %r104, 96
+%r106 = or i128 %r100, %r105
+%r107 = zext i128 %r106 to i160
+%r109 = getelementptr i32, i32* %r2, i32 4
+%r110 = load i32, i32* %r109
+%r111 = zext i32 %r110 to i160
+%r112 = shl i160 %r111, 128
+%r113 = or i160 %r107, %r112
+%r114 = zext i160 %r113 to i192
+%r116 = getelementptr i32, i32* %r2, i32 5
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i192
+%r119 = shl i192 %r118, 160
+%r120 = or i192 %r114, %r119
+%r121 = zext i192 %r120 to i224
+%r123 = getelementptr i32, i32* %r2, i32 6
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i224
+%r126 = shl i224 %r125, 192
+%r127 = or i224 %r121, %r126
+%r128 = zext i224 %r127 to i256
+%r130 = getelementptr i32, i32* %r2, i32 7
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i256
+%r133 = shl i256 %r132, 224
+%r134 = or i256 %r128, %r133
+%r135 = zext i256 %r134 to i288
+%r137 = getelementptr i32, i32* %r2, i32 8
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i288
+%r140 = shl i288 %r139, 256
+%r141 = or i288 %r135, %r140
+%r142 = zext i288 %r141 to i320
+%r144 = getelementptr i32, i32* %r2, i32 9
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i320
+%r147 = shl i320 %r146, 288
+%r148 = or i320 %r142, %r147
+%r149 = zext i320 %r148 to i352
+%r151 = getelementptr i32, i32* %r2, i32 10
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i352
+%r154 = shl i352 %r153, 320
+%r155 = or i352 %r149, %r154
+%r156 = zext i352 %r155 to i384
+%r158 = getelementptr i32, i32* %r2, i32 11
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i384
+%r161 = shl i384 %r160, 352
+%r162 = or i384 %r156, %r161
+%r163 = trunc i384 %r162 to i32
+%r164 = mul i32 %r163, %r6
+%r165 = call i416 @mulPv384x32(i32* %r3, i32 %r164)
+%r167 = getelementptr i32, i32* %r2, i32 12
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i416
+%r170 = shl i416 %r169, 384
+%r171 = zext i384 %r162 to i416
+%r172 = or i416 %r170, %r171
+%r173 = zext i416 %r172 to i448
+%r174 = zext i416 %r165 to i448
+%r175 = add i448 %r173, %r174
+%r176 = lshr i448 %r175, 32
+%r177 = trunc i448 %r176 to i416
+%r178 = lshr i416 %r177, 384
+%r179 = trunc i416 %r178 to i32
+%r180 = trunc i416 %r177 to i384
+%r181 = trunc i384 %r180 to i32
+%r182 = mul i32 %r181, %r6
+%r183 = call i416 @mulPv384x32(i32* %r3, i32 %r182)
+%r184 = zext i32 %r179 to i416
+%r185 = shl i416 %r184, 384
+%r186 = add i416 %r183, %r185
+%r188 = getelementptr i32, i32* %r2, i32 13
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i416
+%r191 = shl i416 %r190, 384
+%r192 = zext i384 %r180 to i416
+%r193 = or i416 %r191, %r192
+%r194 = zext i416 %r193 to i448
+%r195 = zext i416 %r186 to i448
+%r196 = add i448 %r194, %r195
+%r197 = lshr i448 %r196, 32
+%r198 = trunc i448 %r197 to i416
+%r199 = lshr i416 %r198, 384
+%r200 = trunc i416 %r199 to i32
+%r201 = trunc i416 %r198 to i384
+%r202 = trunc i384 %r201 to i32
+%r203 = mul i32 %r202, %r6
+%r204 = call i416 @mulPv384x32(i32* %r3, i32 %r203)
+%r205 = zext i32 %r200 to i416
+%r206 = shl i416 %r205, 384
+%r207 = add i416 %r204, %r206
+%r209 = getelementptr i32, i32* %r2, i32 14
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i416
+%r212 = shl i416 %r211, 384
+%r213 = zext i384 %r201 to i416
+%r214 = or i416 %r212, %r213
+%r215 = zext i416 %r214 to i448
+%r216 = zext i416 %r207 to i448
+%r217 = add i448 %r215, %r216
+%r218 = lshr i448 %r217, 32
+%r219 = trunc i448 %r218 to i416
+%r220 = lshr i416 %r219, 384
+%r221 = trunc i416 %r220 to i32
+%r222 = trunc i416 %r219 to i384
+%r223 = trunc i384 %r222 to i32
+%r224 = mul i32 %r223, %r6
+%r225 = call i416 @mulPv384x32(i32* %r3, i32 %r224)
+%r226 = zext i32 %r221 to i416
+%r227 = shl i416 %r226, 384
+%r228 = add i416 %r225, %r227
+%r230 = getelementptr i32, i32* %r2, i32 15
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i416
+%r233 = shl i416 %r232, 384
+%r234 = zext i384 %r222 to i416
+%r235 = or i416 %r233, %r234
+%r236 = zext i416 %r235 to i448
+%r237 = zext i416 %r228 to i448
+%r238 = add i448 %r236, %r237
+%r239 = lshr i448 %r238, 32
+%r240 = trunc i448 %r239 to i416
+%r241 = lshr i416 %r240, 384
+%r242 = trunc i416 %r241 to i32
+%r243 = trunc i416 %r240 to i384
+%r244 = trunc i384 %r243 to i32
+%r245 = mul i32 %r244, %r6
+%r246 = call i416 @mulPv384x32(i32* %r3, i32 %r245)
+%r247 = zext i32 %r242 to i416
+%r248 = shl i416 %r247, 384
+%r249 = add i416 %r246, %r248
+%r251 = getelementptr i32, i32* %r2, i32 16
+%r252 = load i32, i32* %r251
+%r253 = zext i32 %r252 to i416
+%r254 = shl i416 %r253, 384
+%r255 = zext i384 %r243 to i416
+%r256 = or i416 %r254, %r255
+%r257 = zext i416 %r256 to i448
+%r258 = zext i416 %r249 to i448
+%r259 = add i448 %r257, %r258
+%r260 = lshr i448 %r259, 32
+%r261 = trunc i448 %r260 to i416
+%r262 = lshr i416 %r261, 384
+%r263 = trunc i416 %r262 to i32
+%r264 = trunc i416 %r261 to i384
+%r265 = trunc i384 %r264 to i32
+%r266 = mul i32 %r265, %r6
+%r267 = call i416 @mulPv384x32(i32* %r3, i32 %r266)
+%r268 = zext i32 %r263 to i416
+%r269 = shl i416 %r268, 384
+%r270 = add i416 %r267, %r269
+%r272 = getelementptr i32, i32* %r2, i32 17
+%r273 = load i32, i32* %r272
+%r274 = zext i32 %r273 to i416
+%r275 = shl i416 %r274, 384
+%r276 = zext i384 %r264 to i416
+%r277 = or i416 %r275, %r276
+%r278 = zext i416 %r277 to i448
+%r279 = zext i416 %r270 to i448
+%r280 = add i448 %r278, %r279
+%r281 = lshr i448 %r280, 32
+%r282 = trunc i448 %r281 to i416
+%r283 = lshr i416 %r282, 384
+%r284 = trunc i416 %r283 to i32
+%r285 = trunc i416 %r282 to i384
+%r286 = trunc i384 %r285 to i32
+%r287 = mul i32 %r286, %r6
+%r288 = call i416 @mulPv384x32(i32* %r3, i32 %r287)
+%r289 = zext i32 %r284 to i416
+%r290 = shl i416 %r289, 384
+%r291 = add i416 %r288, %r290
+%r293 = getelementptr i32, i32* %r2, i32 18
+%r294 = load i32, i32* %r293
+%r295 = zext i32 %r294 to i416
+%r296 = shl i416 %r295, 384
+%r297 = zext i384 %r285 to i416
+%r298 = or i416 %r296, %r297
+%r299 = zext i416 %r298 to i448
+%r300 = zext i416 %r291 to i448
+%r301 = add i448 %r299, %r300
+%r302 = lshr i448 %r301, 32
+%r303 = trunc i448 %r302 to i416
+%r304 = lshr i416 %r303, 384
+%r305 = trunc i416 %r304 to i32
+%r306 = trunc i416 %r303 to i384
+%r307 = trunc i384 %r306 to i32
+%r308 = mul i32 %r307, %r6
+%r309 = call i416 @mulPv384x32(i32* %r3, i32 %r308)
+%r310 = zext i32 %r305 to i416
+%r311 = shl i416 %r310, 384
+%r312 = add i416 %r309, %r311
+%r314 = getelementptr i32, i32* %r2, i32 19
+%r315 = load i32, i32* %r314
+%r316 = zext i32 %r315 to i416
+%r317 = shl i416 %r316, 384
+%r318 = zext i384 %r306 to i416
+%r319 = or i416 %r317, %r318
+%r320 = zext i416 %r319 to i448
+%r321 = zext i416 %r312 to i448
+%r322 = add i448 %r320, %r321
+%r323 = lshr i448 %r322, 32
+%r324 = trunc i448 %r323 to i416
+%r325 = lshr i416 %r324, 384
+%r326 = trunc i416 %r325 to i32
+%r327 = trunc i416 %r324 to i384
+%r328 = trunc i384 %r327 to i32
+%r329 = mul i32 %r328, %r6
+%r330 = call i416 @mulPv384x32(i32* %r3, i32 %r329)
+%r331 = zext i32 %r326 to i416
+%r332 = shl i416 %r331, 384
+%r333 = add i416 %r330, %r332
+%r335 = getelementptr i32, i32* %r2, i32 20
+%r336 = load i32, i32* %r335
+%r337 = zext i32 %r336 to i416
+%r338 = shl i416 %r337, 384
+%r339 = zext i384 %r327 to i416
+%r340 = or i416 %r338, %r339
+%r341 = zext i416 %r340 to i448
+%r342 = zext i416 %r333 to i448
+%r343 = add i448 %r341, %r342
+%r344 = lshr i448 %r343, 32
+%r345 = trunc i448 %r344 to i416
+%r346 = lshr i416 %r345, 384
+%r347 = trunc i416 %r346 to i32
+%r348 = trunc i416 %r345 to i384
+%r349 = trunc i384 %r348 to i32
+%r350 = mul i32 %r349, %r6
+%r351 = call i416 @mulPv384x32(i32* %r3, i32 %r350)
+%r352 = zext i32 %r347 to i416
+%r353 = shl i416 %r352, 384
+%r354 = add i416 %r351, %r353
+%r356 = getelementptr i32, i32* %r2, i32 21
+%r357 = load i32, i32* %r356
+%r358 = zext i32 %r357 to i416
+%r359 = shl i416 %r358, 384
+%r360 = zext i384 %r348 to i416
+%r361 = or i416 %r359, %r360
+%r362 = zext i416 %r361 to i448
+%r363 = zext i416 %r354 to i448
+%r364 = add i448 %r362, %r363
+%r365 = lshr i448 %r364, 32
+%r366 = trunc i448 %r365 to i416
+%r367 = lshr i416 %r366, 384
+%r368 = trunc i416 %r367 to i32
+%r369 = trunc i416 %r366 to i384
+%r370 = trunc i384 %r369 to i32
+%r371 = mul i32 %r370, %r6
+%r372 = call i416 @mulPv384x32(i32* %r3, i32 %r371)
+%r373 = zext i32 %r368 to i416
+%r374 = shl i416 %r373, 384
+%r375 = add i416 %r372, %r374
+%r377 = getelementptr i32, i32* %r2, i32 22
+%r378 = load i32, i32* %r377
+%r379 = zext i32 %r378 to i416
+%r380 = shl i416 %r379, 384
+%r381 = zext i384 %r369 to i416
+%r382 = or i416 %r380, %r381
+%r383 = zext i416 %r382 to i448
+%r384 = zext i416 %r375 to i448
+%r385 = add i448 %r383, %r384
+%r386 = lshr i448 %r385, 32
+%r387 = trunc i448 %r386 to i416
+%r388 = lshr i416 %r387, 384
+%r389 = trunc i416 %r388 to i32
+%r390 = trunc i416 %r387 to i384
+%r391 = trunc i384 %r390 to i32
+%r392 = mul i32 %r391, %r6
+%r393 = call i416 @mulPv384x32(i32* %r3, i32 %r392)
+%r394 = zext i32 %r389 to i416
+%r395 = shl i416 %r394, 384
+%r396 = add i416 %r393, %r395
+%r398 = getelementptr i32, i32* %r2, i32 23
+%r399 = load i32, i32* %r398
+%r400 = zext i32 %r399 to i416
+%r401 = shl i416 %r400, 384
+%r402 = zext i384 %r390 to i416
+%r403 = or i416 %r401, %r402
+%r404 = zext i416 %r403 to i448
+%r405 = zext i416 %r396 to i448
+%r406 = add i448 %r404, %r405
+%r407 = lshr i448 %r406, 32
+%r408 = trunc i448 %r407 to i416
+%r409 = lshr i416 %r408, 384
+%r410 = trunc i416 %r409 to i32
+%r411 = trunc i416 %r408 to i384
+%r412 = sub i384 %r411, %r84
+%r413 = lshr i384 %r412, 383
+%r414 = trunc i384 %r413 to i1
+%r415 = select i1 %r414, i384 %r411, i384 %r412
+%r417 = getelementptr i32, i32* %r1, i32 0
+%r418 = trunc i384 %r415 to i32
+store i32 %r418, i32* %r417
+%r419 = lshr i384 %r415, 32
+%r421 = getelementptr i32, i32* %r1, i32 1
+%r422 = trunc i384 %r419 to i32
+store i32 %r422, i32* %r421
+%r423 = lshr i384 %r419, 32
+%r425 = getelementptr i32, i32* %r1, i32 2
+%r426 = trunc i384 %r423 to i32
+store i32 %r426, i32* %r425
+%r427 = lshr i384 %r423, 32
+%r429 = getelementptr i32, i32* %r1, i32 3
+%r430 = trunc i384 %r427 to i32
+store i32 %r430, i32* %r429
+%r431 = lshr i384 %r427, 32
+%r433 = getelementptr i32, i32* %r1, i32 4
+%r434 = trunc i384 %r431 to i32
+store i32 %r434, i32* %r433
+%r435 = lshr i384 %r431, 32
+%r437 = getelementptr i32, i32* %r1, i32 5
+%r438 = trunc i384 %r435 to i32
+store i32 %r438, i32* %r437
+%r439 = lshr i384 %r435, 32
+%r441 = getelementptr i32, i32* %r1, i32 6
+%r442 = trunc i384 %r439 to i32
+store i32 %r442, i32* %r441
+%r443 = lshr i384 %r439, 32
+%r445 = getelementptr i32, i32* %r1, i32 7
+%r446 = trunc i384 %r443 to i32
+store i32 %r446, i32* %r445
+%r447 = lshr i384 %r443, 32
+%r449 = getelementptr i32, i32* %r1, i32 8
+%r450 = trunc i384 %r447 to i32
+store i32 %r450, i32* %r449
+%r451 = lshr i384 %r447, 32
+%r453 = getelementptr i32, i32* %r1, i32 9
+%r454 = trunc i384 %r451 to i32
+store i32 %r454, i32* %r453
+%r455 = lshr i384 %r451, 32
+%r457 = getelementptr i32, i32* %r1, i32 10
+%r458 = trunc i384 %r455 to i32
+store i32 %r458, i32* %r457
+%r459 = lshr i384 %r455, 32
+%r461 = getelementptr i32, i32* %r1, i32 11
+%r462 = trunc i384 %r459 to i32
+store i32 %r462, i32* %r461
+ret void
+}
+define i32 @mcl_fp_addPre12L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r84 = load i32, i32* %r4
+%r85 = zext i32 %r84 to i64
+%r87 = getelementptr i32, i32* %r4, i32 1
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i64
+%r90 = shl i64 %r89, 32
+%r91 = or i64 %r85, %r90
+%r92 = zext i64 %r91 to i96
+%r94 = getelementptr i32, i32* %r4, i32 2
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i96
+%r97 = shl i96 %r96, 64
+%r98 = or i96 %r92, %r97
+%r99 = zext i96 %r98 to i128
+%r101 = getelementptr i32, i32* %r4, i32 3
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i128
+%r104 = shl i128 %r103, 96
+%r105 = or i128 %r99, %r104
+%r106 = zext i128 %r105 to i160
+%r108 = getelementptr i32, i32* %r4, i32 4
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i160
+%r111 = shl i160 %r110, 128
+%r112 = or i160 %r106, %r111
+%r113 = zext i160 %r112 to i192
+%r115 = getelementptr i32, i32* %r4, i32 5
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i192
+%r118 = shl i192 %r117, 160
+%r119 = or i192 %r113, %r118
+%r120 = zext i192 %r119 to i224
+%r122 = getelementptr i32, i32* %r4, i32 6
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i224
+%r125 = shl i224 %r124, 192
+%r126 = or i224 %r120, %r125
+%r127 = zext i224 %r126 to i256
+%r129 = getelementptr i32, i32* %r4, i32 7
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i256
+%r132 = shl i256 %r131, 224
+%r133 = or i256 %r127, %r132
+%r134 = zext i256 %r133 to i288
+%r136 = getelementptr i32, i32* %r4, i32 8
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i288
+%r139 = shl i288 %r138, 256
+%r140 = or i288 %r134, %r139
+%r141 = zext i288 %r140 to i320
+%r143 = getelementptr i32, i32* %r4, i32 9
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i320
+%r146 = shl i320 %r145, 288
+%r147 = or i320 %r141, %r146
+%r148 = zext i320 %r147 to i352
+%r150 = getelementptr i32, i32* %r4, i32 10
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i352
+%r153 = shl i352 %r152, 320
+%r154 = or i352 %r148, %r153
+%r155 = zext i352 %r154 to i384
+%r157 = getelementptr i32, i32* %r4, i32 11
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i384
+%r160 = shl i384 %r159, 352
+%r161 = or i384 %r155, %r160
+%r162 = zext i384 %r161 to i416
+%r163 = add i416 %r83, %r162
+%r164 = trunc i416 %r163 to i384
+%r166 = getelementptr i32, i32* %r2, i32 0
+%r167 = trunc i384 %r164 to i32
+store i32 %r167, i32* %r166
+%r168 = lshr i384 %r164, 32
+%r170 = getelementptr i32, i32* %r2, i32 1
+%r171 = trunc i384 %r168 to i32
+store i32 %r171, i32* %r170
+%r172 = lshr i384 %r168, 32
+%r174 = getelementptr i32, i32* %r2, i32 2
+%r175 = trunc i384 %r172 to i32
+store i32 %r175, i32* %r174
+%r176 = lshr i384 %r172, 32
+%r178 = getelementptr i32, i32* %r2, i32 3
+%r179 = trunc i384 %r176 to i32
+store i32 %r179, i32* %r178
+%r180 = lshr i384 %r176, 32
+%r182 = getelementptr i32, i32* %r2, i32 4
+%r183 = trunc i384 %r180 to i32
+store i32 %r183, i32* %r182
+%r184 = lshr i384 %r180, 32
+%r186 = getelementptr i32, i32* %r2, i32 5
+%r187 = trunc i384 %r184 to i32
+store i32 %r187, i32* %r186
+%r188 = lshr i384 %r184, 32
+%r190 = getelementptr i32, i32* %r2, i32 6
+%r191 = trunc i384 %r188 to i32
+store i32 %r191, i32* %r190
+%r192 = lshr i384 %r188, 32
+%r194 = getelementptr i32, i32* %r2, i32 7
+%r195 = trunc i384 %r192 to i32
+store i32 %r195, i32* %r194
+%r196 = lshr i384 %r192, 32
+%r198 = getelementptr i32, i32* %r2, i32 8
+%r199 = trunc i384 %r196 to i32
+store i32 %r199, i32* %r198
+%r200 = lshr i384 %r196, 32
+%r202 = getelementptr i32, i32* %r2, i32 9
+%r203 = trunc i384 %r200 to i32
+store i32 %r203, i32* %r202
+%r204 = lshr i384 %r200, 32
+%r206 = getelementptr i32, i32* %r2, i32 10
+%r207 = trunc i384 %r204 to i32
+store i32 %r207, i32* %r206
+%r208 = lshr i384 %r204, 32
+%r210 = getelementptr i32, i32* %r2, i32 11
+%r211 = trunc i384 %r208 to i32
+store i32 %r211, i32* %r210
+%r212 = lshr i416 %r163, 384
+%r213 = trunc i416 %r212 to i32
+ret i32 %r213
+}
+define i32 @mcl_fp_subPre12L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r3
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r3, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r3, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r3, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r3, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r3, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r3, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r3, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r3, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r3, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r3, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r3, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r84 = load i32, i32* %r4
+%r85 = zext i32 %r84 to i64
+%r87 = getelementptr i32, i32* %r4, i32 1
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i64
+%r90 = shl i64 %r89, 32
+%r91 = or i64 %r85, %r90
+%r92 = zext i64 %r91 to i96
+%r94 = getelementptr i32, i32* %r4, i32 2
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i96
+%r97 = shl i96 %r96, 64
+%r98 = or i96 %r92, %r97
+%r99 = zext i96 %r98 to i128
+%r101 = getelementptr i32, i32* %r4, i32 3
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i128
+%r104 = shl i128 %r103, 96
+%r105 = or i128 %r99, %r104
+%r106 = zext i128 %r105 to i160
+%r108 = getelementptr i32, i32* %r4, i32 4
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i160
+%r111 = shl i160 %r110, 128
+%r112 = or i160 %r106, %r111
+%r113 = zext i160 %r112 to i192
+%r115 = getelementptr i32, i32* %r4, i32 5
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i192
+%r118 = shl i192 %r117, 160
+%r119 = or i192 %r113, %r118
+%r120 = zext i192 %r119 to i224
+%r122 = getelementptr i32, i32* %r4, i32 6
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i224
+%r125 = shl i224 %r124, 192
+%r126 = or i224 %r120, %r125
+%r127 = zext i224 %r126 to i256
+%r129 = getelementptr i32, i32* %r4, i32 7
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i256
+%r132 = shl i256 %r131, 224
+%r133 = or i256 %r127, %r132
+%r134 = zext i256 %r133 to i288
+%r136 = getelementptr i32, i32* %r4, i32 8
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i288
+%r139 = shl i288 %r138, 256
+%r140 = or i288 %r134, %r139
+%r141 = zext i288 %r140 to i320
+%r143 = getelementptr i32, i32* %r4, i32 9
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i320
+%r146 = shl i320 %r145, 288
+%r147 = or i320 %r141, %r146
+%r148 = zext i320 %r147 to i352
+%r150 = getelementptr i32, i32* %r4, i32 10
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i352
+%r153 = shl i352 %r152, 320
+%r154 = or i352 %r148, %r153
+%r155 = zext i352 %r154 to i384
+%r157 = getelementptr i32, i32* %r4, i32 11
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i384
+%r160 = shl i384 %r159, 352
+%r161 = or i384 %r155, %r160
+%r162 = zext i384 %r161 to i416
+%r163 = sub i416 %r83, %r162
+%r164 = trunc i416 %r163 to i384
+%r166 = getelementptr i32, i32* %r2, i32 0
+%r167 = trunc i384 %r164 to i32
+store i32 %r167, i32* %r166
+%r168 = lshr i384 %r164, 32
+%r170 = getelementptr i32, i32* %r2, i32 1
+%r171 = trunc i384 %r168 to i32
+store i32 %r171, i32* %r170
+%r172 = lshr i384 %r168, 32
+%r174 = getelementptr i32, i32* %r2, i32 2
+%r175 = trunc i384 %r172 to i32
+store i32 %r175, i32* %r174
+%r176 = lshr i384 %r172, 32
+%r178 = getelementptr i32, i32* %r2, i32 3
+%r179 = trunc i384 %r176 to i32
+store i32 %r179, i32* %r178
+%r180 = lshr i384 %r176, 32
+%r182 = getelementptr i32, i32* %r2, i32 4
+%r183 = trunc i384 %r180 to i32
+store i32 %r183, i32* %r182
+%r184 = lshr i384 %r180, 32
+%r186 = getelementptr i32, i32* %r2, i32 5
+%r187 = trunc i384 %r184 to i32
+store i32 %r187, i32* %r186
+%r188 = lshr i384 %r184, 32
+%r190 = getelementptr i32, i32* %r2, i32 6
+%r191 = trunc i384 %r188 to i32
+store i32 %r191, i32* %r190
+%r192 = lshr i384 %r188, 32
+%r194 = getelementptr i32, i32* %r2, i32 7
+%r195 = trunc i384 %r192 to i32
+store i32 %r195, i32* %r194
+%r196 = lshr i384 %r192, 32
+%r198 = getelementptr i32, i32* %r2, i32 8
+%r199 = trunc i384 %r196 to i32
+store i32 %r199, i32* %r198
+%r200 = lshr i384 %r196, 32
+%r202 = getelementptr i32, i32* %r2, i32 9
+%r203 = trunc i384 %r200 to i32
+store i32 %r203, i32* %r202
+%r204 = lshr i384 %r200, 32
+%r206 = getelementptr i32, i32* %r2, i32 10
+%r207 = trunc i384 %r204 to i32
+store i32 %r207, i32* %r206
+%r208 = lshr i384 %r204, 32
+%r210 = getelementptr i32, i32* %r2, i32 11
+%r211 = trunc i384 %r208 to i32
+store i32 %r211, i32* %r210
+%r213 = lshr i416 %r163, 384
+%r214 = trunc i416 %r213 to i32
+%r215 = and i32 %r214, 1
+ret i32 %r215
+}
+define void @mcl_fp_shr1_12L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r3 = load i32, i32* %r2
+%r4 = zext i32 %r3 to i64
+%r6 = getelementptr i32, i32* %r2, i32 1
+%r7 = load i32, i32* %r6
+%r8 = zext i32 %r7 to i64
+%r9 = shl i64 %r8, 32
+%r10 = or i64 %r4, %r9
+%r11 = zext i64 %r10 to i96
+%r13 = getelementptr i32, i32* %r2, i32 2
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i96
+%r16 = shl i96 %r15, 64
+%r17 = or i96 %r11, %r16
+%r18 = zext i96 %r17 to i128
+%r20 = getelementptr i32, i32* %r2, i32 3
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i128
+%r23 = shl i128 %r22, 96
+%r24 = or i128 %r18, %r23
+%r25 = zext i128 %r24 to i160
+%r27 = getelementptr i32, i32* %r2, i32 4
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i160
+%r30 = shl i160 %r29, 128
+%r31 = or i160 %r25, %r30
+%r32 = zext i160 %r31 to i192
+%r34 = getelementptr i32, i32* %r2, i32 5
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i192
+%r37 = shl i192 %r36, 160
+%r38 = or i192 %r32, %r37
+%r39 = zext i192 %r38 to i224
+%r41 = getelementptr i32, i32* %r2, i32 6
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i224
+%r44 = shl i224 %r43, 192
+%r45 = or i224 %r39, %r44
+%r46 = zext i224 %r45 to i256
+%r48 = getelementptr i32, i32* %r2, i32 7
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i256
+%r51 = shl i256 %r50, 224
+%r52 = or i256 %r46, %r51
+%r53 = zext i256 %r52 to i288
+%r55 = getelementptr i32, i32* %r2, i32 8
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i288
+%r58 = shl i288 %r57, 256
+%r59 = or i288 %r53, %r58
+%r60 = zext i288 %r59 to i320
+%r62 = getelementptr i32, i32* %r2, i32 9
+%r63 = load i32, i32* %r62
+%r64 = zext i32 %r63 to i320
+%r65 = shl i320 %r64, 288
+%r66 = or i320 %r60, %r65
+%r67 = zext i320 %r66 to i352
+%r69 = getelementptr i32, i32* %r2, i32 10
+%r70 = load i32, i32* %r69
+%r71 = zext i32 %r70 to i352
+%r72 = shl i352 %r71, 320
+%r73 = or i352 %r67, %r72
+%r74 = zext i352 %r73 to i384
+%r76 = getelementptr i32, i32* %r2, i32 11
+%r77 = load i32, i32* %r76
+%r78 = zext i32 %r77 to i384
+%r79 = shl i384 %r78, 352
+%r80 = or i384 %r74, %r79
+%r81 = lshr i384 %r80, 1
+%r83 = getelementptr i32, i32* %r1, i32 0
+%r84 = trunc i384 %r81 to i32
+store i32 %r84, i32* %r83
+%r85 = lshr i384 %r81, 32
+%r87 = getelementptr i32, i32* %r1, i32 1
+%r88 = trunc i384 %r85 to i32
+store i32 %r88, i32* %r87
+%r89 = lshr i384 %r85, 32
+%r91 = getelementptr i32, i32* %r1, i32 2
+%r92 = trunc i384 %r89 to i32
+store i32 %r92, i32* %r91
+%r93 = lshr i384 %r89, 32
+%r95 = getelementptr i32, i32* %r1, i32 3
+%r96 = trunc i384 %r93 to i32
+store i32 %r96, i32* %r95
+%r97 = lshr i384 %r93, 32
+%r99 = getelementptr i32, i32* %r1, i32 4
+%r100 = trunc i384 %r97 to i32
+store i32 %r100, i32* %r99
+%r101 = lshr i384 %r97, 32
+%r103 = getelementptr i32, i32* %r1, i32 5
+%r104 = trunc i384 %r101 to i32
+store i32 %r104, i32* %r103
+%r105 = lshr i384 %r101, 32
+%r107 = getelementptr i32, i32* %r1, i32 6
+%r108 = trunc i384 %r105 to i32
+store i32 %r108, i32* %r107
+%r109 = lshr i384 %r105, 32
+%r111 = getelementptr i32, i32* %r1, i32 7
+%r112 = trunc i384 %r109 to i32
+store i32 %r112, i32* %r111
+%r113 = lshr i384 %r109, 32
+%r115 = getelementptr i32, i32* %r1, i32 8
+%r116 = trunc i384 %r113 to i32
+store i32 %r116, i32* %r115
+%r117 = lshr i384 %r113, 32
+%r119 = getelementptr i32, i32* %r1, i32 9
+%r120 = trunc i384 %r117 to i32
+store i32 %r120, i32* %r119
+%r121 = lshr i384 %r117, 32
+%r123 = getelementptr i32, i32* %r1, i32 10
+%r124 = trunc i384 %r121 to i32
+store i32 %r124, i32* %r123
+%r125 = lshr i384 %r121, 32
+%r127 = getelementptr i32, i32* %r1, i32 11
+%r128 = trunc i384 %r125 to i32
+store i32 %r128, i32* %r127
+ret void
+}
+define void @mcl_fp_add12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = zext i384 %r82 to i416
+%r162 = zext i384 %r160 to i416
+%r163 = add i416 %r161, %r162
+%r164 = trunc i416 %r163 to i384
+%r166 = getelementptr i32, i32* %r1, i32 0
+%r167 = trunc i384 %r164 to i32
+store i32 %r167, i32* %r166
+%r168 = lshr i384 %r164, 32
+%r170 = getelementptr i32, i32* %r1, i32 1
+%r171 = trunc i384 %r168 to i32
+store i32 %r171, i32* %r170
+%r172 = lshr i384 %r168, 32
+%r174 = getelementptr i32, i32* %r1, i32 2
+%r175 = trunc i384 %r172 to i32
+store i32 %r175, i32* %r174
+%r176 = lshr i384 %r172, 32
+%r178 = getelementptr i32, i32* %r1, i32 3
+%r179 = trunc i384 %r176 to i32
+store i32 %r179, i32* %r178
+%r180 = lshr i384 %r176, 32
+%r182 = getelementptr i32, i32* %r1, i32 4
+%r183 = trunc i384 %r180 to i32
+store i32 %r183, i32* %r182
+%r184 = lshr i384 %r180, 32
+%r186 = getelementptr i32, i32* %r1, i32 5
+%r187 = trunc i384 %r184 to i32
+store i32 %r187, i32* %r186
+%r188 = lshr i384 %r184, 32
+%r190 = getelementptr i32, i32* %r1, i32 6
+%r191 = trunc i384 %r188 to i32
+store i32 %r191, i32* %r190
+%r192 = lshr i384 %r188, 32
+%r194 = getelementptr i32, i32* %r1, i32 7
+%r195 = trunc i384 %r192 to i32
+store i32 %r195, i32* %r194
+%r196 = lshr i384 %r192, 32
+%r198 = getelementptr i32, i32* %r1, i32 8
+%r199 = trunc i384 %r196 to i32
+store i32 %r199, i32* %r198
+%r200 = lshr i384 %r196, 32
+%r202 = getelementptr i32, i32* %r1, i32 9
+%r203 = trunc i384 %r200 to i32
+store i32 %r203, i32* %r202
+%r204 = lshr i384 %r200, 32
+%r206 = getelementptr i32, i32* %r1, i32 10
+%r207 = trunc i384 %r204 to i32
+store i32 %r207, i32* %r206
+%r208 = lshr i384 %r204, 32
+%r210 = getelementptr i32, i32* %r1, i32 11
+%r211 = trunc i384 %r208 to i32
+store i32 %r211, i32* %r210
+%r212 = load i32, i32* %r4
+%r213 = zext i32 %r212 to i64
+%r215 = getelementptr i32, i32* %r4, i32 1
+%r216 = load i32, i32* %r215
+%r217 = zext i32 %r216 to i64
+%r218 = shl i64 %r217, 32
+%r219 = or i64 %r213, %r218
+%r220 = zext i64 %r219 to i96
+%r222 = getelementptr i32, i32* %r4, i32 2
+%r223 = load i32, i32* %r222
+%r224 = zext i32 %r223 to i96
+%r225 = shl i96 %r224, 64
+%r226 = or i96 %r220, %r225
+%r227 = zext i96 %r226 to i128
+%r229 = getelementptr i32, i32* %r4, i32 3
+%r230 = load i32, i32* %r229
+%r231 = zext i32 %r230 to i128
+%r232 = shl i128 %r231, 96
+%r233 = or i128 %r227, %r232
+%r234 = zext i128 %r233 to i160
+%r236 = getelementptr i32, i32* %r4, i32 4
+%r237 = load i32, i32* %r236
+%r238 = zext i32 %r237 to i160
+%r239 = shl i160 %r238, 128
+%r240 = or i160 %r234, %r239
+%r241 = zext i160 %r240 to i192
+%r243 = getelementptr i32, i32* %r4, i32 5
+%r244 = load i32, i32* %r243
+%r245 = zext i32 %r244 to i192
+%r246 = shl i192 %r245, 160
+%r247 = or i192 %r241, %r246
+%r248 = zext i192 %r247 to i224
+%r250 = getelementptr i32, i32* %r4, i32 6
+%r251 = load i32, i32* %r250
+%r252 = zext i32 %r251 to i224
+%r253 = shl i224 %r252, 192
+%r254 = or i224 %r248, %r253
+%r255 = zext i224 %r254 to i256
+%r257 = getelementptr i32, i32* %r4, i32 7
+%r258 = load i32, i32* %r257
+%r259 = zext i32 %r258 to i256
+%r260 = shl i256 %r259, 224
+%r261 = or i256 %r255, %r260
+%r262 = zext i256 %r261 to i288
+%r264 = getelementptr i32, i32* %r4, i32 8
+%r265 = load i32, i32* %r264
+%r266 = zext i32 %r265 to i288
+%r267 = shl i288 %r266, 256
+%r268 = or i288 %r262, %r267
+%r269 = zext i288 %r268 to i320
+%r271 = getelementptr i32, i32* %r4, i32 9
+%r272 = load i32, i32* %r271
+%r273 = zext i32 %r272 to i320
+%r274 = shl i320 %r273, 288
+%r275 = or i320 %r269, %r274
+%r276 = zext i320 %r275 to i352
+%r278 = getelementptr i32, i32* %r4, i32 10
+%r279 = load i32, i32* %r278
+%r280 = zext i32 %r279 to i352
+%r281 = shl i352 %r280, 320
+%r282 = or i352 %r276, %r281
+%r283 = zext i352 %r282 to i384
+%r285 = getelementptr i32, i32* %r4, i32 11
+%r286 = load i32, i32* %r285
+%r287 = zext i32 %r286 to i384
+%r288 = shl i384 %r287, 352
+%r289 = or i384 %r283, %r288
+%r290 = zext i384 %r289 to i416
+%r291 = sub i416 %r163, %r290
+%r292 = lshr i416 %r291, 384
+%r293 = trunc i416 %r292 to i1
+br i1%r293, label %carry, label %nocarry
+nocarry:
+%r294 = trunc i416 %r291 to i384
+%r296 = getelementptr i32, i32* %r1, i32 0
+%r297 = trunc i384 %r294 to i32
+store i32 %r297, i32* %r296
+%r298 = lshr i384 %r294, 32
+%r300 = getelementptr i32, i32* %r1, i32 1
+%r301 = trunc i384 %r298 to i32
+store i32 %r301, i32* %r300
+%r302 = lshr i384 %r298, 32
+%r304 = getelementptr i32, i32* %r1, i32 2
+%r305 = trunc i384 %r302 to i32
+store i32 %r305, i32* %r304
+%r306 = lshr i384 %r302, 32
+%r308 = getelementptr i32, i32* %r1, i32 3
+%r309 = trunc i384 %r306 to i32
+store i32 %r309, i32* %r308
+%r310 = lshr i384 %r306, 32
+%r312 = getelementptr i32, i32* %r1, i32 4
+%r313 = trunc i384 %r310 to i32
+store i32 %r313, i32* %r312
+%r314 = lshr i384 %r310, 32
+%r316 = getelementptr i32, i32* %r1, i32 5
+%r317 = trunc i384 %r314 to i32
+store i32 %r317, i32* %r316
+%r318 = lshr i384 %r314, 32
+%r320 = getelementptr i32, i32* %r1, i32 6
+%r321 = trunc i384 %r318 to i32
+store i32 %r321, i32* %r320
+%r322 = lshr i384 %r318, 32
+%r324 = getelementptr i32, i32* %r1, i32 7
+%r325 = trunc i384 %r322 to i32
+store i32 %r325, i32* %r324
+%r326 = lshr i384 %r322, 32
+%r328 = getelementptr i32, i32* %r1, i32 8
+%r329 = trunc i384 %r326 to i32
+store i32 %r329, i32* %r328
+%r330 = lshr i384 %r326, 32
+%r332 = getelementptr i32, i32* %r1, i32 9
+%r333 = trunc i384 %r330 to i32
+store i32 %r333, i32* %r332
+%r334 = lshr i384 %r330, 32
+%r336 = getelementptr i32, i32* %r1, i32 10
+%r337 = trunc i384 %r334 to i32
+store i32 %r337, i32* %r336
+%r338 = lshr i384 %r334, 32
+%r340 = getelementptr i32, i32* %r1, i32 11
+%r341 = trunc i384 %r338 to i32
+store i32 %r341, i32* %r340
+ret void
+carry:
+ret void
+}
+define void @mcl_fp_addNF12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = add i384 %r82, %r160
+%r162 = load i32, i32* %r4
+%r163 = zext i32 %r162 to i64
+%r165 = getelementptr i32, i32* %r4, i32 1
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i64
+%r168 = shl i64 %r167, 32
+%r169 = or i64 %r163, %r168
+%r170 = zext i64 %r169 to i96
+%r172 = getelementptr i32, i32* %r4, i32 2
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i96
+%r175 = shl i96 %r174, 64
+%r176 = or i96 %r170, %r175
+%r177 = zext i96 %r176 to i128
+%r179 = getelementptr i32, i32* %r4, i32 3
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i128
+%r182 = shl i128 %r181, 96
+%r183 = or i128 %r177, %r182
+%r184 = zext i128 %r183 to i160
+%r186 = getelementptr i32, i32* %r4, i32 4
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i160
+%r189 = shl i160 %r188, 128
+%r190 = or i160 %r184, %r189
+%r191 = zext i160 %r190 to i192
+%r193 = getelementptr i32, i32* %r4, i32 5
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i192
+%r196 = shl i192 %r195, 160
+%r197 = or i192 %r191, %r196
+%r198 = zext i192 %r197 to i224
+%r200 = getelementptr i32, i32* %r4, i32 6
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i224
+%r203 = shl i224 %r202, 192
+%r204 = or i224 %r198, %r203
+%r205 = zext i224 %r204 to i256
+%r207 = getelementptr i32, i32* %r4, i32 7
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i256
+%r210 = shl i256 %r209, 224
+%r211 = or i256 %r205, %r210
+%r212 = zext i256 %r211 to i288
+%r214 = getelementptr i32, i32* %r4, i32 8
+%r215 = load i32, i32* %r214
+%r216 = zext i32 %r215 to i288
+%r217 = shl i288 %r216, 256
+%r218 = or i288 %r212, %r217
+%r219 = zext i288 %r218 to i320
+%r221 = getelementptr i32, i32* %r4, i32 9
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i320
+%r224 = shl i320 %r223, 288
+%r225 = or i320 %r219, %r224
+%r226 = zext i320 %r225 to i352
+%r228 = getelementptr i32, i32* %r4, i32 10
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i352
+%r231 = shl i352 %r230, 320
+%r232 = or i352 %r226, %r231
+%r233 = zext i352 %r232 to i384
+%r235 = getelementptr i32, i32* %r4, i32 11
+%r236 = load i32, i32* %r235
+%r237 = zext i32 %r236 to i384
+%r238 = shl i384 %r237, 352
+%r239 = or i384 %r233, %r238
+%r240 = sub i384 %r161, %r239
+%r241 = lshr i384 %r240, 383
+%r242 = trunc i384 %r241 to i1
+%r243 = select i1 %r242, i384 %r161, i384 %r240
+%r245 = getelementptr i32, i32* %r1, i32 0
+%r246 = trunc i384 %r243 to i32
+store i32 %r246, i32* %r245
+%r247 = lshr i384 %r243, 32
+%r249 = getelementptr i32, i32* %r1, i32 1
+%r250 = trunc i384 %r247 to i32
+store i32 %r250, i32* %r249
+%r251 = lshr i384 %r247, 32
+%r253 = getelementptr i32, i32* %r1, i32 2
+%r254 = trunc i384 %r251 to i32
+store i32 %r254, i32* %r253
+%r255 = lshr i384 %r251, 32
+%r257 = getelementptr i32, i32* %r1, i32 3
+%r258 = trunc i384 %r255 to i32
+store i32 %r258, i32* %r257
+%r259 = lshr i384 %r255, 32
+%r261 = getelementptr i32, i32* %r1, i32 4
+%r262 = trunc i384 %r259 to i32
+store i32 %r262, i32* %r261
+%r263 = lshr i384 %r259, 32
+%r265 = getelementptr i32, i32* %r1, i32 5
+%r266 = trunc i384 %r263 to i32
+store i32 %r266, i32* %r265
+%r267 = lshr i384 %r263, 32
+%r269 = getelementptr i32, i32* %r1, i32 6
+%r270 = trunc i384 %r267 to i32
+store i32 %r270, i32* %r269
+%r271 = lshr i384 %r267, 32
+%r273 = getelementptr i32, i32* %r1, i32 7
+%r274 = trunc i384 %r271 to i32
+store i32 %r274, i32* %r273
+%r275 = lshr i384 %r271, 32
+%r277 = getelementptr i32, i32* %r1, i32 8
+%r278 = trunc i384 %r275 to i32
+store i32 %r278, i32* %r277
+%r279 = lshr i384 %r275, 32
+%r281 = getelementptr i32, i32* %r1, i32 9
+%r282 = trunc i384 %r279 to i32
+store i32 %r282, i32* %r281
+%r283 = lshr i384 %r279, 32
+%r285 = getelementptr i32, i32* %r1, i32 10
+%r286 = trunc i384 %r283 to i32
+store i32 %r286, i32* %r285
+%r287 = lshr i384 %r283, 32
+%r289 = getelementptr i32, i32* %r1, i32 11
+%r290 = trunc i384 %r287 to i32
+store i32 %r290, i32* %r289
+ret void
+}
+define void @mcl_fp_sub12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = zext i384 %r82 to i416
+%r162 = zext i384 %r160 to i416
+%r163 = sub i416 %r161, %r162
+%r164 = trunc i416 %r163 to i384
+%r165 = lshr i416 %r163, 384
+%r166 = trunc i416 %r165 to i1
+%r168 = getelementptr i32, i32* %r1, i32 0
+%r169 = trunc i384 %r164 to i32
+store i32 %r169, i32* %r168
+%r170 = lshr i384 %r164, 32
+%r172 = getelementptr i32, i32* %r1, i32 1
+%r173 = trunc i384 %r170 to i32
+store i32 %r173, i32* %r172
+%r174 = lshr i384 %r170, 32
+%r176 = getelementptr i32, i32* %r1, i32 2
+%r177 = trunc i384 %r174 to i32
+store i32 %r177, i32* %r176
+%r178 = lshr i384 %r174, 32
+%r180 = getelementptr i32, i32* %r1, i32 3
+%r181 = trunc i384 %r178 to i32
+store i32 %r181, i32* %r180
+%r182 = lshr i384 %r178, 32
+%r184 = getelementptr i32, i32* %r1, i32 4
+%r185 = trunc i384 %r182 to i32
+store i32 %r185, i32* %r184
+%r186 = lshr i384 %r182, 32
+%r188 = getelementptr i32, i32* %r1, i32 5
+%r189 = trunc i384 %r186 to i32
+store i32 %r189, i32* %r188
+%r190 = lshr i384 %r186, 32
+%r192 = getelementptr i32, i32* %r1, i32 6
+%r193 = trunc i384 %r190 to i32
+store i32 %r193, i32* %r192
+%r194 = lshr i384 %r190, 32
+%r196 = getelementptr i32, i32* %r1, i32 7
+%r197 = trunc i384 %r194 to i32
+store i32 %r197, i32* %r196
+%r198 = lshr i384 %r194, 32
+%r200 = getelementptr i32, i32* %r1, i32 8
+%r201 = trunc i384 %r198 to i32
+store i32 %r201, i32* %r200
+%r202 = lshr i384 %r198, 32
+%r204 = getelementptr i32, i32* %r1, i32 9
+%r205 = trunc i384 %r202 to i32
+store i32 %r205, i32* %r204
+%r206 = lshr i384 %r202, 32
+%r208 = getelementptr i32, i32* %r1, i32 10
+%r209 = trunc i384 %r206 to i32
+store i32 %r209, i32* %r208
+%r210 = lshr i384 %r206, 32
+%r212 = getelementptr i32, i32* %r1, i32 11
+%r213 = trunc i384 %r210 to i32
+store i32 %r213, i32* %r212
+br i1%r166, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r214 = load i32, i32* %r4
+%r215 = zext i32 %r214 to i64
+%r217 = getelementptr i32, i32* %r4, i32 1
+%r218 = load i32, i32* %r217
+%r219 = zext i32 %r218 to i64
+%r220 = shl i64 %r219, 32
+%r221 = or i64 %r215, %r220
+%r222 = zext i64 %r221 to i96
+%r224 = getelementptr i32, i32* %r4, i32 2
+%r225 = load i32, i32* %r224
+%r226 = zext i32 %r225 to i96
+%r227 = shl i96 %r226, 64
+%r228 = or i96 %r222, %r227
+%r229 = zext i96 %r228 to i128
+%r231 = getelementptr i32, i32* %r4, i32 3
+%r232 = load i32, i32* %r231
+%r233 = zext i32 %r232 to i128
+%r234 = shl i128 %r233, 96
+%r235 = or i128 %r229, %r234
+%r236 = zext i128 %r235 to i160
+%r238 = getelementptr i32, i32* %r4, i32 4
+%r239 = load i32, i32* %r238
+%r240 = zext i32 %r239 to i160
+%r241 = shl i160 %r240, 128
+%r242 = or i160 %r236, %r241
+%r243 = zext i160 %r242 to i192
+%r245 = getelementptr i32, i32* %r4, i32 5
+%r246 = load i32, i32* %r245
+%r247 = zext i32 %r246 to i192
+%r248 = shl i192 %r247, 160
+%r249 = or i192 %r243, %r248
+%r250 = zext i192 %r249 to i224
+%r252 = getelementptr i32, i32* %r4, i32 6
+%r253 = load i32, i32* %r252
+%r254 = zext i32 %r253 to i224
+%r255 = shl i224 %r254, 192
+%r256 = or i224 %r250, %r255
+%r257 = zext i224 %r256 to i256
+%r259 = getelementptr i32, i32* %r4, i32 7
+%r260 = load i32, i32* %r259
+%r261 = zext i32 %r260 to i256
+%r262 = shl i256 %r261, 224
+%r263 = or i256 %r257, %r262
+%r264 = zext i256 %r263 to i288
+%r266 = getelementptr i32, i32* %r4, i32 8
+%r267 = load i32, i32* %r266
+%r268 = zext i32 %r267 to i288
+%r269 = shl i288 %r268, 256
+%r270 = or i288 %r264, %r269
+%r271 = zext i288 %r270 to i320
+%r273 = getelementptr i32, i32* %r4, i32 9
+%r274 = load i32, i32* %r273
+%r275 = zext i32 %r274 to i320
+%r276 = shl i320 %r275, 288
+%r277 = or i320 %r271, %r276
+%r278 = zext i320 %r277 to i352
+%r280 = getelementptr i32, i32* %r4, i32 10
+%r281 = load i32, i32* %r280
+%r282 = zext i32 %r281 to i352
+%r283 = shl i352 %r282, 320
+%r284 = or i352 %r278, %r283
+%r285 = zext i352 %r284 to i384
+%r287 = getelementptr i32, i32* %r4, i32 11
+%r288 = load i32, i32* %r287
+%r289 = zext i32 %r288 to i384
+%r290 = shl i384 %r289, 352
+%r291 = or i384 %r285, %r290
+%r292 = add i384 %r164, %r291
+%r294 = getelementptr i32, i32* %r1, i32 0
+%r295 = trunc i384 %r292 to i32
+store i32 %r295, i32* %r294
+%r296 = lshr i384 %r292, 32
+%r298 = getelementptr i32, i32* %r1, i32 1
+%r299 = trunc i384 %r296 to i32
+store i32 %r299, i32* %r298
+%r300 = lshr i384 %r296, 32
+%r302 = getelementptr i32, i32* %r1, i32 2
+%r303 = trunc i384 %r300 to i32
+store i32 %r303, i32* %r302
+%r304 = lshr i384 %r300, 32
+%r306 = getelementptr i32, i32* %r1, i32 3
+%r307 = trunc i384 %r304 to i32
+store i32 %r307, i32* %r306
+%r308 = lshr i384 %r304, 32
+%r310 = getelementptr i32, i32* %r1, i32 4
+%r311 = trunc i384 %r308 to i32
+store i32 %r311, i32* %r310
+%r312 = lshr i384 %r308, 32
+%r314 = getelementptr i32, i32* %r1, i32 5
+%r315 = trunc i384 %r312 to i32
+store i32 %r315, i32* %r314
+%r316 = lshr i384 %r312, 32
+%r318 = getelementptr i32, i32* %r1, i32 6
+%r319 = trunc i384 %r316 to i32
+store i32 %r319, i32* %r318
+%r320 = lshr i384 %r316, 32
+%r322 = getelementptr i32, i32* %r1, i32 7
+%r323 = trunc i384 %r320 to i32
+store i32 %r323, i32* %r322
+%r324 = lshr i384 %r320, 32
+%r326 = getelementptr i32, i32* %r1, i32 8
+%r327 = trunc i384 %r324 to i32
+store i32 %r327, i32* %r326
+%r328 = lshr i384 %r324, 32
+%r330 = getelementptr i32, i32* %r1, i32 9
+%r331 = trunc i384 %r328 to i32
+store i32 %r331, i32* %r330
+%r332 = lshr i384 %r328, 32
+%r334 = getelementptr i32, i32* %r1, i32 10
+%r335 = trunc i384 %r332 to i32
+store i32 %r335, i32* %r334
+%r336 = lshr i384 %r332, 32
+%r338 = getelementptr i32, i32* %r1, i32 11
+%r339 = trunc i384 %r336 to i32
+store i32 %r339, i32* %r338
+ret void
+}
+define void @mcl_fp_subNF12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = load i32, i32* %r3
+%r84 = zext i32 %r83 to i64
+%r86 = getelementptr i32, i32* %r3, i32 1
+%r87 = load i32, i32* %r86
+%r88 = zext i32 %r87 to i64
+%r89 = shl i64 %r88, 32
+%r90 = or i64 %r84, %r89
+%r91 = zext i64 %r90 to i96
+%r93 = getelementptr i32, i32* %r3, i32 2
+%r94 = load i32, i32* %r93
+%r95 = zext i32 %r94 to i96
+%r96 = shl i96 %r95, 64
+%r97 = or i96 %r91, %r96
+%r98 = zext i96 %r97 to i128
+%r100 = getelementptr i32, i32* %r3, i32 3
+%r101 = load i32, i32* %r100
+%r102 = zext i32 %r101 to i128
+%r103 = shl i128 %r102, 96
+%r104 = or i128 %r98, %r103
+%r105 = zext i128 %r104 to i160
+%r107 = getelementptr i32, i32* %r3, i32 4
+%r108 = load i32, i32* %r107
+%r109 = zext i32 %r108 to i160
+%r110 = shl i160 %r109, 128
+%r111 = or i160 %r105, %r110
+%r112 = zext i160 %r111 to i192
+%r114 = getelementptr i32, i32* %r3, i32 5
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i192
+%r117 = shl i192 %r116, 160
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i224
+%r121 = getelementptr i32, i32* %r3, i32 6
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i224
+%r124 = shl i224 %r123, 192
+%r125 = or i224 %r119, %r124
+%r126 = zext i224 %r125 to i256
+%r128 = getelementptr i32, i32* %r3, i32 7
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i256
+%r131 = shl i256 %r130, 224
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i288
+%r135 = getelementptr i32, i32* %r3, i32 8
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i288
+%r138 = shl i288 %r137, 256
+%r139 = or i288 %r133, %r138
+%r140 = zext i288 %r139 to i320
+%r142 = getelementptr i32, i32* %r3, i32 9
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i320
+%r145 = shl i320 %r144, 288
+%r146 = or i320 %r140, %r145
+%r147 = zext i320 %r146 to i352
+%r149 = getelementptr i32, i32* %r3, i32 10
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i352
+%r152 = shl i352 %r151, 320
+%r153 = or i352 %r147, %r152
+%r154 = zext i352 %r153 to i384
+%r156 = getelementptr i32, i32* %r3, i32 11
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i384
+%r159 = shl i384 %r158, 352
+%r160 = or i384 %r154, %r159
+%r161 = sub i384 %r82, %r160
+%r162 = lshr i384 %r161, 383
+%r163 = trunc i384 %r162 to i1
+%r164 = load i32, i32* %r4
+%r165 = zext i32 %r164 to i64
+%r167 = getelementptr i32, i32* %r4, i32 1
+%r168 = load i32, i32* %r167
+%r169 = zext i32 %r168 to i64
+%r170 = shl i64 %r169, 32
+%r171 = or i64 %r165, %r170
+%r172 = zext i64 %r171 to i96
+%r174 = getelementptr i32, i32* %r4, i32 2
+%r175 = load i32, i32* %r174
+%r176 = zext i32 %r175 to i96
+%r177 = shl i96 %r176, 64
+%r178 = or i96 %r172, %r177
+%r179 = zext i96 %r178 to i128
+%r181 = getelementptr i32, i32* %r4, i32 3
+%r182 = load i32, i32* %r181
+%r183 = zext i32 %r182 to i128
+%r184 = shl i128 %r183, 96
+%r185 = or i128 %r179, %r184
+%r186 = zext i128 %r185 to i160
+%r188 = getelementptr i32, i32* %r4, i32 4
+%r189 = load i32, i32* %r188
+%r190 = zext i32 %r189 to i160
+%r191 = shl i160 %r190, 128
+%r192 = or i160 %r186, %r191
+%r193 = zext i160 %r192 to i192
+%r195 = getelementptr i32, i32* %r4, i32 5
+%r196 = load i32, i32* %r195
+%r197 = zext i32 %r196 to i192
+%r198 = shl i192 %r197, 160
+%r199 = or i192 %r193, %r198
+%r200 = zext i192 %r199 to i224
+%r202 = getelementptr i32, i32* %r4, i32 6
+%r203 = load i32, i32* %r202
+%r204 = zext i32 %r203 to i224
+%r205 = shl i224 %r204, 192
+%r206 = or i224 %r200, %r205
+%r207 = zext i224 %r206 to i256
+%r209 = getelementptr i32, i32* %r4, i32 7
+%r210 = load i32, i32* %r209
+%r211 = zext i32 %r210 to i256
+%r212 = shl i256 %r211, 224
+%r213 = or i256 %r207, %r212
+%r214 = zext i256 %r213 to i288
+%r216 = getelementptr i32, i32* %r4, i32 8
+%r217 = load i32, i32* %r216
+%r218 = zext i32 %r217 to i288
+%r219 = shl i288 %r218, 256
+%r220 = or i288 %r214, %r219
+%r221 = zext i288 %r220 to i320
+%r223 = getelementptr i32, i32* %r4, i32 9
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i320
+%r226 = shl i320 %r225, 288
+%r227 = or i320 %r221, %r226
+%r228 = zext i320 %r227 to i352
+%r230 = getelementptr i32, i32* %r4, i32 10
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i352
+%r233 = shl i352 %r232, 320
+%r234 = or i352 %r228, %r233
+%r235 = zext i352 %r234 to i384
+%r237 = getelementptr i32, i32* %r4, i32 11
+%r238 = load i32, i32* %r237
+%r239 = zext i32 %r238 to i384
+%r240 = shl i384 %r239, 352
+%r241 = or i384 %r235, %r240
+%r243 = select i1 %r163, i384 %r241, i384 0
+%r244 = add i384 %r161, %r243
+%r246 = getelementptr i32, i32* %r1, i32 0
+%r247 = trunc i384 %r244 to i32
+store i32 %r247, i32* %r246
+%r248 = lshr i384 %r244, 32
+%r250 = getelementptr i32, i32* %r1, i32 1
+%r251 = trunc i384 %r248 to i32
+store i32 %r251, i32* %r250
+%r252 = lshr i384 %r248, 32
+%r254 = getelementptr i32, i32* %r1, i32 2
+%r255 = trunc i384 %r252 to i32
+store i32 %r255, i32* %r254
+%r256 = lshr i384 %r252, 32
+%r258 = getelementptr i32, i32* %r1, i32 3
+%r259 = trunc i384 %r256 to i32
+store i32 %r259, i32* %r258
+%r260 = lshr i384 %r256, 32
+%r262 = getelementptr i32, i32* %r1, i32 4
+%r263 = trunc i384 %r260 to i32
+store i32 %r263, i32* %r262
+%r264 = lshr i384 %r260, 32
+%r266 = getelementptr i32, i32* %r1, i32 5
+%r267 = trunc i384 %r264 to i32
+store i32 %r267, i32* %r266
+%r268 = lshr i384 %r264, 32
+%r270 = getelementptr i32, i32* %r1, i32 6
+%r271 = trunc i384 %r268 to i32
+store i32 %r271, i32* %r270
+%r272 = lshr i384 %r268, 32
+%r274 = getelementptr i32, i32* %r1, i32 7
+%r275 = trunc i384 %r272 to i32
+store i32 %r275, i32* %r274
+%r276 = lshr i384 %r272, 32
+%r278 = getelementptr i32, i32* %r1, i32 8
+%r279 = trunc i384 %r276 to i32
+store i32 %r279, i32* %r278
+%r280 = lshr i384 %r276, 32
+%r282 = getelementptr i32, i32* %r1, i32 9
+%r283 = trunc i384 %r280 to i32
+store i32 %r283, i32* %r282
+%r284 = lshr i384 %r280, 32
+%r286 = getelementptr i32, i32* %r1, i32 10
+%r287 = trunc i384 %r284 to i32
+store i32 %r287, i32* %r286
+%r288 = lshr i384 %r284, 32
+%r290 = getelementptr i32, i32* %r1, i32 11
+%r291 = trunc i384 %r288 to i32
+store i32 %r291, i32* %r290
+ret void
+}
+define void @mcl_fpDbl_add12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+{
+%r5 = load i32, i32* %r2
+%r6 = zext i32 %r5 to i64
+%r8 = getelementptr i32, i32* %r2, i32 1
+%r9 = load i32, i32* %r8
+%r10 = zext i32 %r9 to i64
+%r11 = shl i64 %r10, 32
+%r12 = or i64 %r6, %r11
+%r13 = zext i64 %r12 to i96
+%r15 = getelementptr i32, i32* %r2, i32 2
+%r16 = load i32, i32* %r15
+%r17 = zext i32 %r16 to i96
+%r18 = shl i96 %r17, 64
+%r19 = or i96 %r13, %r18
+%r20 = zext i96 %r19 to i128
+%r22 = getelementptr i32, i32* %r2, i32 3
+%r23 = load i32, i32* %r22
+%r24 = zext i32 %r23 to i128
+%r25 = shl i128 %r24, 96
+%r26 = or i128 %r20, %r25
+%r27 = zext i128 %r26 to i160
+%r29 = getelementptr i32, i32* %r2, i32 4
+%r30 = load i32, i32* %r29
+%r31 = zext i32 %r30 to i160
+%r32 = shl i160 %r31, 128
+%r33 = or i160 %r27, %r32
+%r34 = zext i160 %r33 to i192
+%r36 = getelementptr i32, i32* %r2, i32 5
+%r37 = load i32, i32* %r36
+%r38 = zext i32 %r37 to i192
+%r39 = shl i192 %r38, 160
+%r40 = or i192 %r34, %r39
+%r41 = zext i192 %r40 to i224
+%r43 = getelementptr i32, i32* %r2, i32 6
+%r44 = load i32, i32* %r43
+%r45 = zext i32 %r44 to i224
+%r46 = shl i224 %r45, 192
+%r47 = or i224 %r41, %r46
+%r48 = zext i224 %r47 to i256
+%r50 = getelementptr i32, i32* %r2, i32 7
+%r51 = load i32, i32* %r50
+%r52 = zext i32 %r51 to i256
+%r53 = shl i256 %r52, 224
+%r54 = or i256 %r48, %r53
+%r55 = zext i256 %r54 to i288
+%r57 = getelementptr i32, i32* %r2, i32 8
+%r58 = load i32, i32* %r57
+%r59 = zext i32 %r58 to i288
+%r60 = shl i288 %r59, 256
+%r61 = or i288 %r55, %r60
+%r62 = zext i288 %r61 to i320
+%r64 = getelementptr i32, i32* %r2, i32 9
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i320
+%r67 = shl i320 %r66, 288
+%r68 = or i320 %r62, %r67
+%r69 = zext i320 %r68 to i352
+%r71 = getelementptr i32, i32* %r2, i32 10
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i352
+%r74 = shl i352 %r73, 320
+%r75 = or i352 %r69, %r74
+%r76 = zext i352 %r75 to i384
+%r78 = getelementptr i32, i32* %r2, i32 11
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i384
+%r81 = shl i384 %r80, 352
+%r82 = or i384 %r76, %r81
+%r83 = zext i384 %r82 to i416
+%r85 = getelementptr i32, i32* %r2, i32 12
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i416
+%r88 = shl i416 %r87, 384
+%r89 = or i416 %r83, %r88
+%r90 = zext i416 %r89 to i448
+%r92 = getelementptr i32, i32* %r2, i32 13
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i448
+%r95 = shl i448 %r94, 416
+%r96 = or i448 %r90, %r95
+%r97 = zext i448 %r96 to i480
+%r99 = getelementptr i32, i32* %r2, i32 14
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i480
+%r102 = shl i480 %r101, 448
+%r103 = or i480 %r97, %r102
+%r104 = zext i480 %r103 to i512
+%r106 = getelementptr i32, i32* %r2, i32 15
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i512
+%r109 = shl i512 %r108, 480
+%r110 = or i512 %r104, %r109
+%r111 = zext i512 %r110 to i544
+%r113 = getelementptr i32, i32* %r2, i32 16
+%r114 = load i32, i32* %r113
+%r115 = zext i32 %r114 to i544
+%r116 = shl i544 %r115, 512
+%r117 = or i544 %r111, %r116
+%r118 = zext i544 %r117 to i576
+%r120 = getelementptr i32, i32* %r2, i32 17
+%r121 = load i32, i32* %r120
+%r122 = zext i32 %r121 to i576
+%r123 = shl i576 %r122, 544
+%r124 = or i576 %r118, %r123
+%r125 = zext i576 %r124 to i608
+%r127 = getelementptr i32, i32* %r2, i32 18
+%r128 = load i32, i32* %r127
+%r129 = zext i32 %r128 to i608
+%r130 = shl i608 %r129, 576
+%r131 = or i608 %r125, %r130
+%r132 = zext i608 %r131 to i640
+%r134 = getelementptr i32, i32* %r2, i32 19
+%r135 = load i32, i32* %r134
+%r136 = zext i32 %r135 to i640
+%r137 = shl i640 %r136, 608
+%r138 = or i640 %r132, %r137
+%r139 = zext i640 %r138 to i672
+%r141 = getelementptr i32, i32* %r2, i32 20
+%r142 = load i32, i32* %r141
+%r143 = zext i32 %r142 to i672
+%r144 = shl i672 %r143, 640
+%r145 = or i672 %r139, %r144
+%r146 = zext i672 %r145 to i704
+%r148 = getelementptr i32, i32* %r2, i32 21
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = load i32, i32* %r3
+%r168 = zext i32 %r167 to i64
+%r170 = getelementptr i32, i32* %r3, i32 1
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i64
+%r173 = shl i64 %r172, 32
+%r174 = or i64 %r168, %r173
+%r175 = zext i64 %r174 to i96
+%r177 = getelementptr i32, i32* %r3, i32 2
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i96
+%r180 = shl i96 %r179, 64
+%r181 = or i96 %r175, %r180
+%r182 = zext i96 %r181 to i128
+%r184 = getelementptr i32, i32* %r3, i32 3
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i128
+%r187 = shl i128 %r186, 96
+%r188 = or i128 %r182, %r187
+%r189 = zext i128 %r188 to i160
+%r191 = getelementptr i32, i32* %r3, i32 4
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i160
+%r194 = shl i160 %r193, 128
+%r195 = or i160 %r189, %r194
+%r196 = zext i160 %r195 to i192
+%r198 = getelementptr i32, i32* %r3, i32 5
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i192
+%r201 = shl i192 %r200, 160
+%r202 = or i192 %r196, %r201
+%r203 = zext i192 %r202 to i224
+%r205 = getelementptr i32, i32* %r3, i32 6
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i224
+%r208 = shl i224 %r207, 192
+%r209 = or i224 %r203, %r208
+%r210 = zext i224 %r209 to i256
+%r212 = getelementptr i32, i32* %r3, i32 7
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i256
+%r215 = shl i256 %r214, 224
+%r216 = or i256 %r210, %r215
+%r217 = zext i256 %r216 to i288
+%r219 = getelementptr i32, i32* %r3, i32 8
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i288
+%r222 = shl i288 %r221, 256
+%r223 = or i288 %r217, %r222
+%r224 = zext i288 %r223 to i320
+%r226 = getelementptr i32, i32* %r3, i32 9
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i320
+%r229 = shl i320 %r228, 288
+%r230 = or i320 %r224, %r229
+%r231 = zext i320 %r230 to i352
+%r233 = getelementptr i32, i32* %r3, i32 10
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i352
+%r236 = shl i352 %r235, 320
+%r237 = or i352 %r231, %r236
+%r238 = zext i352 %r237 to i384
+%r240 = getelementptr i32, i32* %r3, i32 11
+%r241 = load i32, i32* %r240
+%r242 = zext i32 %r241 to i384
+%r243 = shl i384 %r242, 352
+%r244 = or i384 %r238, %r243
+%r245 = zext i384 %r244 to i416
+%r247 = getelementptr i32, i32* %r3, i32 12
+%r248 = load i32, i32* %r247
+%r249 = zext i32 %r248 to i416
+%r250 = shl i416 %r249, 384
+%r251 = or i416 %r245, %r250
+%r252 = zext i416 %r251 to i448
+%r254 = getelementptr i32, i32* %r3, i32 13
+%r255 = load i32, i32* %r254
+%r256 = zext i32 %r255 to i448
+%r257 = shl i448 %r256, 416
+%r258 = or i448 %r252, %r257
+%r259 = zext i448 %r258 to i480
+%r261 = getelementptr i32, i32* %r3, i32 14
+%r262 = load i32, i32* %r261
+%r263 = zext i32 %r262 to i480
+%r264 = shl i480 %r263, 448
+%r265 = or i480 %r259, %r264
+%r266 = zext i480 %r265 to i512
+%r268 = getelementptr i32, i32* %r3, i32 15
+%r269 = load i32, i32* %r268
+%r270 = zext i32 %r269 to i512
+%r271 = shl i512 %r270, 480
+%r272 = or i512 %r266, %r271
+%r273 = zext i512 %r272 to i544
+%r275 = getelementptr i32, i32* %r3, i32 16
+%r276 = load i32, i32* %r275
+%r277 = zext i32 %r276 to i544
+%r278 = shl i544 %r277, 512
+%r279 = or i544 %r273, %r278
+%r280 = zext i544 %r279 to i576
+%r282 = getelementptr i32, i32* %r3, i32 17
+%r283 = load i32, i32* %r282
+%r284 = zext i32 %r283 to i576
+%r285 = shl i576 %r284, 544
+%r286 = or i576 %r280, %r285
+%r287 = zext i576 %r286 to i608
+%r289 = getelementptr i32, i32* %r3, i32 18
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i608
+%r292 = shl i608 %r291, 576
+%r293 = or i608 %r287, %r292
+%r294 = zext i608 %r293 to i640
+%r296 = getelementptr i32, i32* %r3, i32 19
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i640
+%r299 = shl i640 %r298, 608
+%r300 = or i640 %r294, %r299
+%r301 = zext i640 %r300 to i672
+%r303 = getelementptr i32, i32* %r3, i32 20
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i672
+%r306 = shl i672 %r305, 640
+%r307 = or i672 %r301, %r306
+%r308 = zext i672 %r307 to i704
+%r310 = getelementptr i32, i32* %r3, i32 21
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i704
+%r313 = shl i704 %r312, 672
+%r314 = or i704 %r308, %r313
+%r315 = zext i704 %r314 to i736
+%r317 = getelementptr i32, i32* %r3, i32 22
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i736
+%r320 = shl i736 %r319, 704
+%r321 = or i736 %r315, %r320
+%r322 = zext i736 %r321 to i768
+%r324 = getelementptr i32, i32* %r3, i32 23
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i768
+%r327 = shl i768 %r326, 736
+%r328 = or i768 %r322, %r327
+%r329 = zext i768 %r166 to i800
+%r330 = zext i768 %r328 to i800
+%r331 = add i800 %r329, %r330
+%r332 = trunc i800 %r331 to i384
+%r334 = getelementptr i32, i32* %r1, i32 0
+%r335 = trunc i384 %r332 to i32
+store i32 %r335, i32* %r334
+%r336 = lshr i384 %r332, 32
+%r338 = getelementptr i32, i32* %r1, i32 1
+%r339 = trunc i384 %r336 to i32
+store i32 %r339, i32* %r338
+%r340 = lshr i384 %r336, 32
+%r342 = getelementptr i32, i32* %r1, i32 2
+%r343 = trunc i384 %r340 to i32
+store i32 %r343, i32* %r342
+%r344 = lshr i384 %r340, 32
+%r346 = getelementptr i32, i32* %r1, i32 3
+%r347 = trunc i384 %r344 to i32
+store i32 %r347, i32* %r346
+%r348 = lshr i384 %r344, 32
+%r350 = getelementptr i32, i32* %r1, i32 4
+%r351 = trunc i384 %r348 to i32
+store i32 %r351, i32* %r350
+%r352 = lshr i384 %r348, 32
+%r354 = getelementptr i32, i32* %r1, i32 5
+%r355 = trunc i384 %r352 to i32
+store i32 %r355, i32* %r354
+%r356 = lshr i384 %r352, 32
+%r358 = getelementptr i32, i32* %r1, i32 6
+%r359 = trunc i384 %r356 to i32
+store i32 %r359, i32* %r358
+%r360 = lshr i384 %r356, 32
+%r362 = getelementptr i32, i32* %r1, i32 7
+%r363 = trunc i384 %r360 to i32
+store i32 %r363, i32* %r362
+%r364 = lshr i384 %r360, 32
+%r366 = getelementptr i32, i32* %r1, i32 8
+%r367 = trunc i384 %r364 to i32
+store i32 %r367, i32* %r366
+%r368 = lshr i384 %r364, 32
+%r370 = getelementptr i32, i32* %r1, i32 9
+%r371 = trunc i384 %r368 to i32
+store i32 %r371, i32* %r370
+%r372 = lshr i384 %r368, 32
+%r374 = getelementptr i32, i32* %r1, i32 10
+%r375 = trunc i384 %r372 to i32
+store i32 %r375, i32* %r374
+%r376 = lshr i384 %r372, 32
+%r378 = getelementptr i32, i32* %r1, i32 11
+%r379 = trunc i384 %r376 to i32
+store i32 %r379, i32* %r378
+%r380 = lshr i800 %r331, 384
+%r381 = trunc i800 %r380 to i416
+%r382 = load i32, i32* %r4
+%r383 = zext i32 %r382 to i64
+%r385 = getelementptr i32, i32* %r4, i32 1
+%r386 = load i32, i32* %r385
+%r387 = zext i32 %r386 to i64
+%r388 = shl i64 %r387, 32
+%r389 = or i64 %r383, %r388
+%r390 = zext i64 %r389 to i96
+%r392 = getelementptr i32, i32* %r4, i32 2
+%r393 = load i32, i32* %r392
+%r394 = zext i32 %r393 to i96
+%r395 = shl i96 %r394, 64
+%r396 = or i96 %r390, %r395
+%r397 = zext i96 %r396 to i128
+%r399 = getelementptr i32, i32* %r4, i32 3
+%r400 = load i32, i32* %r399
+%r401 = zext i32 %r400 to i128
+%r402 = shl i128 %r401, 96
+%r403 = or i128 %r397, %r402
+%r404 = zext i128 %r403 to i160
+%r406 = getelementptr i32, i32* %r4, i32 4
+%r407 = load i32, i32* %r406
+%r408 = zext i32 %r407 to i160
+%r409 = shl i160 %r408, 128
+%r410 = or i160 %r404, %r409
+%r411 = zext i160 %r410 to i192
+%r413 = getelementptr i32, i32* %r4, i32 5
+%r414 = load i32, i32* %r413
+%r415 = zext i32 %r414 to i192
+%r416 = shl i192 %r415, 160
+%r417 = or i192 %r411, %r416
+%r418 = zext i192 %r417 to i224
+%r420 = getelementptr i32, i32* %r4, i32 6
+%r421 = load i32, i32* %r420
+%r422 = zext i32 %r421 to i224
+%r423 = shl i224 %r422, 192
+%r424 = or i224 %r418, %r423
+%r425 = zext i224 %r424 to i256
+%r427 = getelementptr i32, i32* %r4, i32 7
+%r428 = load i32, i32* %r427
+%r429 = zext i32 %r428 to i256
+%r430 = shl i256 %r429, 224
+%r431 = or i256 %r425, %r430
+%r432 = zext i256 %r431 to i288
+%r434 = getelementptr i32, i32* %r4, i32 8
+%r435 = load i32, i32* %r434
+%r436 = zext i32 %r435 to i288
+%r437 = shl i288 %r436, 256
+%r438 = or i288 %r432, %r437
+%r439 = zext i288 %r438 to i320
+%r441 = getelementptr i32, i32* %r4, i32 9
+%r442 = load i32, i32* %r441
+%r443 = zext i32 %r442 to i320
+%r444 = shl i320 %r443, 288
+%r445 = or i320 %r439, %r444
+%r446 = zext i320 %r445 to i352
+%r448 = getelementptr i32, i32* %r4, i32 10
+%r449 = load i32, i32* %r448
+%r450 = zext i32 %r449 to i352
+%r451 = shl i352 %r450, 320
+%r452 = or i352 %r446, %r451
+%r453 = zext i352 %r452 to i384
+%r455 = getelementptr i32, i32* %r4, i32 11
+%r456 = load i32, i32* %r455
+%r457 = zext i32 %r456 to i384
+%r458 = shl i384 %r457, 352
+%r459 = or i384 %r453, %r458
+%r460 = zext i384 %r459 to i416
+%r461 = sub i416 %r381, %r460
+%r462 = lshr i416 %r461, 384
+%r463 = trunc i416 %r462 to i1
+%r464 = select i1 %r463, i416 %r381, i416 %r461
+%r465 = trunc i416 %r464 to i384
+%r467 = getelementptr i32, i32* %r1, i32 12
+%r469 = getelementptr i32, i32* %r467, i32 0
+%r470 = trunc i384 %r465 to i32
+store i32 %r470, i32* %r469
+%r471 = lshr i384 %r465, 32
+%r473 = getelementptr i32, i32* %r467, i32 1
+%r474 = trunc i384 %r471 to i32
+store i32 %r474, i32* %r473
+%r475 = lshr i384 %r471, 32
+%r477 = getelementptr i32, i32* %r467, i32 2
+%r478 = trunc i384 %r475 to i32
+store i32 %r478, i32* %r477
+%r479 = lshr i384 %r475, 32
+%r481 = getelementptr i32, i32* %r467, i32 3
+%r482 = trunc i384 %r479 to i32
+store i32 %r482, i32* %r481
+%r483 = lshr i384 %r479, 32
+%r485 = getelementptr i32, i32* %r467, i32 4
+%r486 = trunc i384 %r483 to i32
+store i32 %r486, i32* %r485
+%r487 = lshr i384 %r483, 32
+%r489 = getelementptr i32, i32* %r467, i32 5
+%r490 = trunc i384 %r487 to i32
+store i32 %r490, i32* %r489
+%r491 = lshr i384 %r487, 32
+%r493 = getelementptr i32, i32* %r467, i32 6
+%r494 = trunc i384 %r491 to i32
+store i32 %r494, i32* %r493
+%r495 = lshr i384 %r491, 32
+%r497 = getelementptr i32, i32* %r467, i32 7
+%r498 = trunc i384 %r495 to i32
+store i32 %r498, i32* %r497
+%r499 = lshr i384 %r495, 32
+%r501 = getelementptr i32, i32* %r467, i32 8
+%r502 = trunc i384 %r499 to i32
+store i32 %r502, i32* %r501
+%r503 = lshr i384 %r499, 32
+%r505 = getelementptr i32, i32* %r467, i32 9
+%r506 = trunc i384 %r503 to i32
+store i32 %r506, i32* %r505
+%r507 = lshr i384 %r503, 32
+%r509 = getelementptr i32, i32* %r467, i32 10
+%r510 = trunc i384 %r507 to i32
+store i32 %r510, i32* %r509
+%r511 = lshr i384 %r507, 32
+%r513 = getelementptr i32, i32* %r467, i32 11
+%r514 = trunc i384 %r511 to i32
+store i32 %r514, i32* %r513
 ret void
 }
-define void @mcl_fpDbl_sub16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fpDbl_sub12L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -48297,1850 +13112,3384 @@ define void @mcl_fpDbl_sub16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r145 = or i672 %r139, %r144
 %r146 = zext i672 %r145 to i704
 %r148 = getelementptr i32, i32* %r2, i32 21
-%r149 = load i32, i32* %r148
-%r150 = zext i32 %r149 to i704
-%r151 = shl i704 %r150, 672
-%r152 = or i704 %r146, %r151
-%r153 = zext i704 %r152 to i736
-%r155 = getelementptr i32, i32* %r2, i32 22
-%r156 = load i32, i32* %r155
-%r157 = zext i32 %r156 to i736
-%r158 = shl i736 %r157, 704
-%r159 = or i736 %r153, %r158
-%r160 = zext i736 %r159 to i768
-%r162 = getelementptr i32, i32* %r2, i32 23
-%r163 = load i32, i32* %r162
-%r164 = zext i32 %r163 to i768
-%r165 = shl i768 %r164, 736
-%r166 = or i768 %r160, %r165
-%r167 = zext i768 %r166 to i800
-%r169 = getelementptr i32, i32* %r2, i32 24
-%r170 = load i32, i32* %r169
-%r171 = zext i32 %r170 to i800
-%r172 = shl i800 %r171, 768
-%r173 = or i800 %r167, %r172
-%r174 = zext i800 %r173 to i832
-%r176 = getelementptr i32, i32* %r2, i32 25
-%r177 = load i32, i32* %r176
-%r178 = zext i32 %r177 to i832
-%r179 = shl i832 %r178, 800
-%r180 = or i832 %r174, %r179
-%r181 = zext i832 %r180 to i864
-%r183 = getelementptr i32, i32* %r2, i32 26
-%r184 = load i32, i32* %r183
-%r185 = zext i32 %r184 to i864
-%r186 = shl i864 %r185, 832
-%r187 = or i864 %r181, %r186
-%r188 = zext i864 %r187 to i896
-%r190 = getelementptr i32, i32* %r2, i32 27
-%r191 = load i32, i32* %r190
-%r192 = zext i32 %r191 to i896
-%r193 = shl i896 %r192, 864
-%r194 = or i896 %r188, %r193
-%r195 = zext i896 %r194 to i928
-%r197 = getelementptr i32, i32* %r2, i32 28
-%r198 = load i32, i32* %r197
-%r199 = zext i32 %r198 to i928
-%r200 = shl i928 %r199, 896
-%r201 = or i928 %r195, %r200
-%r202 = zext i928 %r201 to i960
-%r204 = getelementptr i32, i32* %r2, i32 29
-%r205 = load i32, i32* %r204
-%r206 = zext i32 %r205 to i960
-%r207 = shl i960 %r206, 928
-%r208 = or i960 %r202, %r207
-%r209 = zext i960 %r208 to i992
-%r211 = getelementptr i32, i32* %r2, i32 30
-%r212 = load i32, i32* %r211
-%r213 = zext i32 %r212 to i992
-%r214 = shl i992 %r213, 960
-%r215 = or i992 %r209, %r214
-%r216 = zext i992 %r215 to i1024
-%r218 = getelementptr i32, i32* %r2, i32 31
-%r219 = load i32, i32* %r218
-%r220 = zext i32 %r219 to i1024
-%r221 = shl i1024 %r220, 992
-%r222 = or i1024 %r216, %r221
-%r223 = load i32, i32* %r3
-%r224 = zext i32 %r223 to i64
-%r226 = getelementptr i32, i32* %r3, i32 1
+%r149 = load i32, i32* %r148
+%r150 = zext i32 %r149 to i704
+%r151 = shl i704 %r150, 672
+%r152 = or i704 %r146, %r151
+%r153 = zext i704 %r152 to i736
+%r155 = getelementptr i32, i32* %r2, i32 22
+%r156 = load i32, i32* %r155
+%r157 = zext i32 %r156 to i736
+%r158 = shl i736 %r157, 704
+%r159 = or i736 %r153, %r158
+%r160 = zext i736 %r159 to i768
+%r162 = getelementptr i32, i32* %r2, i32 23
+%r163 = load i32, i32* %r162
+%r164 = zext i32 %r163 to i768
+%r165 = shl i768 %r164, 736
+%r166 = or i768 %r160, %r165
+%r167 = load i32, i32* %r3
+%r168 = zext i32 %r167 to i64
+%r170 = getelementptr i32, i32* %r3, i32 1
+%r171 = load i32, i32* %r170
+%r172 = zext i32 %r171 to i64
+%r173 = shl i64 %r172, 32
+%r174 = or i64 %r168, %r173
+%r175 = zext i64 %r174 to i96
+%r177 = getelementptr i32, i32* %r3, i32 2
+%r178 = load i32, i32* %r177
+%r179 = zext i32 %r178 to i96
+%r180 = shl i96 %r179, 64
+%r181 = or i96 %r175, %r180
+%r182 = zext i96 %r181 to i128
+%r184 = getelementptr i32, i32* %r3, i32 3
+%r185 = load i32, i32* %r184
+%r186 = zext i32 %r185 to i128
+%r187 = shl i128 %r186, 96
+%r188 = or i128 %r182, %r187
+%r189 = zext i128 %r188 to i160
+%r191 = getelementptr i32, i32* %r3, i32 4
+%r192 = load i32, i32* %r191
+%r193 = zext i32 %r192 to i160
+%r194 = shl i160 %r193, 128
+%r195 = or i160 %r189, %r194
+%r196 = zext i160 %r195 to i192
+%r198 = getelementptr i32, i32* %r3, i32 5
+%r199 = load i32, i32* %r198
+%r200 = zext i32 %r199 to i192
+%r201 = shl i192 %r200, 160
+%r202 = or i192 %r196, %r201
+%r203 = zext i192 %r202 to i224
+%r205 = getelementptr i32, i32* %r3, i32 6
+%r206 = load i32, i32* %r205
+%r207 = zext i32 %r206 to i224
+%r208 = shl i224 %r207, 192
+%r209 = or i224 %r203, %r208
+%r210 = zext i224 %r209 to i256
+%r212 = getelementptr i32, i32* %r3, i32 7
+%r213 = load i32, i32* %r212
+%r214 = zext i32 %r213 to i256
+%r215 = shl i256 %r214, 224
+%r216 = or i256 %r210, %r215
+%r217 = zext i256 %r216 to i288
+%r219 = getelementptr i32, i32* %r3, i32 8
+%r220 = load i32, i32* %r219
+%r221 = zext i32 %r220 to i288
+%r222 = shl i288 %r221, 256
+%r223 = or i288 %r217, %r222
+%r224 = zext i288 %r223 to i320
+%r226 = getelementptr i32, i32* %r3, i32 9
 %r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i64
-%r229 = shl i64 %r228, 32
-%r230 = or i64 %r224, %r229
-%r231 = zext i64 %r230 to i96
-%r233 = getelementptr i32, i32* %r3, i32 2
+%r228 = zext i32 %r227 to i320
+%r229 = shl i320 %r228, 288
+%r230 = or i320 %r224, %r229
+%r231 = zext i320 %r230 to i352
+%r233 = getelementptr i32, i32* %r3, i32 10
 %r234 = load i32, i32* %r233
-%r235 = zext i32 %r234 to i96
-%r236 = shl i96 %r235, 64
-%r237 = or i96 %r231, %r236
-%r238 = zext i96 %r237 to i128
-%r240 = getelementptr i32, i32* %r3, i32 3
+%r235 = zext i32 %r234 to i352
+%r236 = shl i352 %r235, 320
+%r237 = or i352 %r231, %r236
+%r238 = zext i352 %r237 to i384
+%r240 = getelementptr i32, i32* %r3, i32 11
 %r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i128
-%r243 = shl i128 %r242, 96
-%r244 = or i128 %r238, %r243
-%r245 = zext i128 %r244 to i160
-%r247 = getelementptr i32, i32* %r3, i32 4
+%r242 = zext i32 %r241 to i384
+%r243 = shl i384 %r242, 352
+%r244 = or i384 %r238, %r243
+%r245 = zext i384 %r244 to i416
+%r247 = getelementptr i32, i32* %r3, i32 12
 %r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i160
-%r250 = shl i160 %r249, 128
-%r251 = or i160 %r245, %r250
-%r252 = zext i160 %r251 to i192
-%r254 = getelementptr i32, i32* %r3, i32 5
+%r249 = zext i32 %r248 to i416
+%r250 = shl i416 %r249, 384
+%r251 = or i416 %r245, %r250
+%r252 = zext i416 %r251 to i448
+%r254 = getelementptr i32, i32* %r3, i32 13
 %r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i192
-%r257 = shl i192 %r256, 160
-%r258 = or i192 %r252, %r257
-%r259 = zext i192 %r258 to i224
-%r261 = getelementptr i32, i32* %r3, i32 6
+%r256 = zext i32 %r255 to i448
+%r257 = shl i448 %r256, 416
+%r258 = or i448 %r252, %r257
+%r259 = zext i448 %r258 to i480
+%r261 = getelementptr i32, i32* %r3, i32 14
 %r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i224
-%r264 = shl i224 %r263, 192
-%r265 = or i224 %r259, %r264
-%r266 = zext i224 %r265 to i256
-%r268 = getelementptr i32, i32* %r3, i32 7
+%r263 = zext i32 %r262 to i480
+%r264 = shl i480 %r263, 448
+%r265 = or i480 %r259, %r264
+%r266 = zext i480 %r265 to i512
+%r268 = getelementptr i32, i32* %r3, i32 15
 %r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i256
-%r271 = shl i256 %r270, 224
-%r272 = or i256 %r266, %r271
-%r273 = zext i256 %r272 to i288
-%r275 = getelementptr i32, i32* %r3, i32 8
+%r270 = zext i32 %r269 to i512
+%r271 = shl i512 %r270, 480
+%r272 = or i512 %r266, %r271
+%r273 = zext i512 %r272 to i544
+%r275 = getelementptr i32, i32* %r3, i32 16
 %r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i288
-%r278 = shl i288 %r277, 256
-%r279 = or i288 %r273, %r278
-%r280 = zext i288 %r279 to i320
-%r282 = getelementptr i32, i32* %r3, i32 9
+%r277 = zext i32 %r276 to i544
+%r278 = shl i544 %r277, 512
+%r279 = or i544 %r273, %r278
+%r280 = zext i544 %r279 to i576
+%r282 = getelementptr i32, i32* %r3, i32 17
 %r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i320
-%r285 = shl i320 %r284, 288
-%r286 = or i320 %r280, %r285
-%r287 = zext i320 %r286 to i352
-%r289 = getelementptr i32, i32* %r3, i32 10
+%r284 = zext i32 %r283 to i576
+%r285 = shl i576 %r284, 544
+%r286 = or i576 %r280, %r285
+%r287 = zext i576 %r286 to i608
+%r289 = getelementptr i32, i32* %r3, i32 18
 %r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i352
-%r292 = shl i352 %r291, 320
-%r293 = or i352 %r287, %r292
-%r294 = zext i352 %r293 to i384
-%r296 = getelementptr i32, i32* %r3, i32 11
+%r291 = zext i32 %r290 to i608
+%r292 = shl i608 %r291, 576
+%r293 = or i608 %r287, %r292
+%r294 = zext i608 %r293 to i640
+%r296 = getelementptr i32, i32* %r3, i32 19
 %r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i384
-%r299 = shl i384 %r298, 352
-%r300 = or i384 %r294, %r299
-%r301 = zext i384 %r300 to i416
-%r303 = getelementptr i32, i32* %r3, i32 12
+%r298 = zext i32 %r297 to i640
+%r299 = shl i640 %r298, 608
+%r300 = or i640 %r294, %r299
+%r301 = zext i640 %r300 to i672
+%r303 = getelementptr i32, i32* %r3, i32 20
 %r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i416
-%r306 = shl i416 %r305, 384
-%r307 = or i416 %r301, %r306
-%r308 = zext i416 %r307 to i448
-%r310 = getelementptr i32, i32* %r3, i32 13
+%r305 = zext i32 %r304 to i672
+%r306 = shl i672 %r305, 640
+%r307 = or i672 %r301, %r306
+%r308 = zext i672 %r307 to i704
+%r310 = getelementptr i32, i32* %r3, i32 21
 %r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i448
-%r313 = shl i448 %r312, 416
-%r314 = or i448 %r308, %r313
-%r315 = zext i448 %r314 to i480
-%r317 = getelementptr i32, i32* %r3, i32 14
-%r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i480
-%r320 = shl i480 %r319, 448
-%r321 = or i480 %r315, %r320
-%r322 = zext i480 %r321 to i512
-%r324 = getelementptr i32, i32* %r3, i32 15
-%r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i512
-%r327 = shl i512 %r326, 480
-%r328 = or i512 %r322, %r327
-%r329 = zext i512 %r328 to i544
-%r331 = getelementptr i32, i32* %r3, i32 16
-%r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i544
-%r334 = shl i544 %r333, 512
-%r335 = or i544 %r329, %r334
-%r336 = zext i544 %r335 to i576
-%r338 = getelementptr i32, i32* %r3, i32 17
-%r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i576
-%r341 = shl i576 %r340, 544
-%r342 = or i576 %r336, %r341
-%r343 = zext i576 %r342 to i608
-%r345 = getelementptr i32, i32* %r3, i32 18
-%r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i608
-%r348 = shl i608 %r347, 576
-%r349 = or i608 %r343, %r348
-%r350 = zext i608 %r349 to i640
-%r352 = getelementptr i32, i32* %r3, i32 19
-%r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i640
-%r355 = shl i640 %r354, 608
-%r356 = or i640 %r350, %r355
-%r357 = zext i640 %r356 to i672
-%r359 = getelementptr i32, i32* %r3, i32 20
-%r360 = load i32, i32* %r359
-%r361 = zext i32 %r360 to i672
-%r362 = shl i672 %r361, 640
-%r363 = or i672 %r357, %r362
-%r364 = zext i672 %r363 to i704
-%r366 = getelementptr i32, i32* %r3, i32 21
-%r367 = load i32, i32* %r366
-%r368 = zext i32 %r367 to i704
-%r369 = shl i704 %r368, 672
-%r370 = or i704 %r364, %r369
-%r371 = zext i704 %r370 to i736
-%r373 = getelementptr i32, i32* %r3, i32 22
-%r374 = load i32, i32* %r373
-%r375 = zext i32 %r374 to i736
-%r376 = shl i736 %r375, 704
-%r377 = or i736 %r371, %r376
-%r378 = zext i736 %r377 to i768
-%r380 = getelementptr i32, i32* %r3, i32 23
-%r381 = load i32, i32* %r380
-%r382 = zext i32 %r381 to i768
-%r383 = shl i768 %r382, 736
-%r384 = or i768 %r378, %r383
-%r385 = zext i768 %r384 to i800
-%r387 = getelementptr i32, i32* %r3, i32 24
+%r312 = zext i32 %r311 to i704
+%r313 = shl i704 %r312, 672
+%r314 = or i704 %r308, %r313
+%r315 = zext i704 %r314 to i736
+%r317 = getelementptr i32, i32* %r3, i32 22
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i736
+%r320 = shl i736 %r319, 704
+%r321 = or i736 %r315, %r320
+%r322 = zext i736 %r321 to i768
+%r324 = getelementptr i32, i32* %r3, i32 23
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i768
+%r327 = shl i768 %r326, 736
+%r328 = or i768 %r322, %r327
+%r329 = zext i768 %r166 to i800
+%r330 = zext i768 %r328 to i800
+%r331 = sub i800 %r329, %r330
+%r332 = trunc i800 %r331 to i384
+%r334 = getelementptr i32, i32* %r1, i32 0
+%r335 = trunc i384 %r332 to i32
+store i32 %r335, i32* %r334
+%r336 = lshr i384 %r332, 32
+%r338 = getelementptr i32, i32* %r1, i32 1
+%r339 = trunc i384 %r336 to i32
+store i32 %r339, i32* %r338
+%r340 = lshr i384 %r336, 32
+%r342 = getelementptr i32, i32* %r1, i32 2
+%r343 = trunc i384 %r340 to i32
+store i32 %r343, i32* %r342
+%r344 = lshr i384 %r340, 32
+%r346 = getelementptr i32, i32* %r1, i32 3
+%r347 = trunc i384 %r344 to i32
+store i32 %r347, i32* %r346
+%r348 = lshr i384 %r344, 32
+%r350 = getelementptr i32, i32* %r1, i32 4
+%r351 = trunc i384 %r348 to i32
+store i32 %r351, i32* %r350
+%r352 = lshr i384 %r348, 32
+%r354 = getelementptr i32, i32* %r1, i32 5
+%r355 = trunc i384 %r352 to i32
+store i32 %r355, i32* %r354
+%r356 = lshr i384 %r352, 32
+%r358 = getelementptr i32, i32* %r1, i32 6
+%r359 = trunc i384 %r356 to i32
+store i32 %r359, i32* %r358
+%r360 = lshr i384 %r356, 32
+%r362 = getelementptr i32, i32* %r1, i32 7
+%r363 = trunc i384 %r360 to i32
+store i32 %r363, i32* %r362
+%r364 = lshr i384 %r360, 32
+%r366 = getelementptr i32, i32* %r1, i32 8
+%r367 = trunc i384 %r364 to i32
+store i32 %r367, i32* %r366
+%r368 = lshr i384 %r364, 32
+%r370 = getelementptr i32, i32* %r1, i32 9
+%r371 = trunc i384 %r368 to i32
+store i32 %r371, i32* %r370
+%r372 = lshr i384 %r368, 32
+%r374 = getelementptr i32, i32* %r1, i32 10
+%r375 = trunc i384 %r372 to i32
+store i32 %r375, i32* %r374
+%r376 = lshr i384 %r372, 32
+%r378 = getelementptr i32, i32* %r1, i32 11
+%r379 = trunc i384 %r376 to i32
+store i32 %r379, i32* %r378
+%r380 = lshr i800 %r331, 384
+%r381 = trunc i800 %r380 to i384
+%r382 = lshr i800 %r331, 768
+%r383 = trunc i800 %r382 to i1
+%r384 = load i32, i32* %r4
+%r385 = zext i32 %r384 to i64
+%r387 = getelementptr i32, i32* %r4, i32 1
 %r388 = load i32, i32* %r387
-%r389 = zext i32 %r388 to i800
-%r390 = shl i800 %r389, 768
-%r391 = or i800 %r385, %r390
-%r392 = zext i800 %r391 to i832
-%r394 = getelementptr i32, i32* %r3, i32 25
+%r389 = zext i32 %r388 to i64
+%r390 = shl i64 %r389, 32
+%r391 = or i64 %r385, %r390
+%r392 = zext i64 %r391 to i96
+%r394 = getelementptr i32, i32* %r4, i32 2
 %r395 = load i32, i32* %r394
-%r396 = zext i32 %r395 to i832
-%r397 = shl i832 %r396, 800
-%r398 = or i832 %r392, %r397
-%r399 = zext i832 %r398 to i864
-%r401 = getelementptr i32, i32* %r3, i32 26
+%r396 = zext i32 %r395 to i96
+%r397 = shl i96 %r396, 64
+%r398 = or i96 %r392, %r397
+%r399 = zext i96 %r398 to i128
+%r401 = getelementptr i32, i32* %r4, i32 3
 %r402 = load i32, i32* %r401
-%r403 = zext i32 %r402 to i864
-%r404 = shl i864 %r403, 832
-%r405 = or i864 %r399, %r404
-%r406 = zext i864 %r405 to i896
-%r408 = getelementptr i32, i32* %r3, i32 27
+%r403 = zext i32 %r402 to i128
+%r404 = shl i128 %r403, 96
+%r405 = or i128 %r399, %r404
+%r406 = zext i128 %r405 to i160
+%r408 = getelementptr i32, i32* %r4, i32 4
 %r409 = load i32, i32* %r408
-%r410 = zext i32 %r409 to i896
-%r411 = shl i896 %r410, 864
-%r412 = or i896 %r406, %r411
-%r413 = zext i896 %r412 to i928
-%r415 = getelementptr i32, i32* %r3, i32 28
+%r410 = zext i32 %r409 to i160
+%r411 = shl i160 %r410, 128
+%r412 = or i160 %r406, %r411
+%r413 = zext i160 %r412 to i192
+%r415 = getelementptr i32, i32* %r4, i32 5
 %r416 = load i32, i32* %r415
-%r417 = zext i32 %r416 to i928
-%r418 = shl i928 %r417, 896
-%r419 = or i928 %r413, %r418
-%r420 = zext i928 %r419 to i960
-%r422 = getelementptr i32, i32* %r3, i32 29
+%r417 = zext i32 %r416 to i192
+%r418 = shl i192 %r417, 160
+%r419 = or i192 %r413, %r418
+%r420 = zext i192 %r419 to i224
+%r422 = getelementptr i32, i32* %r4, i32 6
 %r423 = load i32, i32* %r422
-%r424 = zext i32 %r423 to i960
-%r425 = shl i960 %r424, 928
-%r426 = or i960 %r420, %r425
-%r427 = zext i960 %r426 to i992
-%r429 = getelementptr i32, i32* %r3, i32 30
+%r424 = zext i32 %r423 to i224
+%r425 = shl i224 %r424, 192
+%r426 = or i224 %r420, %r425
+%r427 = zext i224 %r426 to i256
+%r429 = getelementptr i32, i32* %r4, i32 7
 %r430 = load i32, i32* %r429
-%r431 = zext i32 %r430 to i992
-%r432 = shl i992 %r431, 960
-%r433 = or i992 %r427, %r432
-%r434 = zext i992 %r433 to i1024
-%r436 = getelementptr i32, i32* %r3, i32 31
+%r431 = zext i32 %r430 to i256
+%r432 = shl i256 %r431, 224
+%r433 = or i256 %r427, %r432
+%r434 = zext i256 %r433 to i288
+%r436 = getelementptr i32, i32* %r4, i32 8
 %r437 = load i32, i32* %r436
-%r438 = zext i32 %r437 to i1024
-%r439 = shl i1024 %r438, 992
-%r440 = or i1024 %r434, %r439
-%r441 = zext i1024 %r222 to i1056
-%r442 = zext i1024 %r440 to i1056
-%r443 = sub i1056 %r441, %r442
-%r444 = trunc i1056 %r443 to i512
-%r445 = trunc i512 %r444 to i32
-%r447 = getelementptr i32, i32* %r1, i32 0
-store i32 %r445, i32* %r447
-%r448 = lshr i512 %r444, 32
-%r449 = trunc i512 %r448 to i32
-%r451 = getelementptr i32, i32* %r1, i32 1
-store i32 %r449, i32* %r451
-%r452 = lshr i512 %r448, 32
-%r453 = trunc i512 %r452 to i32
-%r455 = getelementptr i32, i32* %r1, i32 2
-store i32 %r453, i32* %r455
-%r456 = lshr i512 %r452, 32
-%r457 = trunc i512 %r456 to i32
-%r459 = getelementptr i32, i32* %r1, i32 3
-store i32 %r457, i32* %r459
-%r460 = lshr i512 %r456, 32
-%r461 = trunc i512 %r460 to i32
-%r463 = getelementptr i32, i32* %r1, i32 4
-store i32 %r461, i32* %r463
-%r464 = lshr i512 %r460, 32
-%r465 = trunc i512 %r464 to i32
-%r467 = getelementptr i32, i32* %r1, i32 5
-store i32 %r465, i32* %r467
-%r468 = lshr i512 %r464, 32
-%r469 = trunc i512 %r468 to i32
-%r471 = getelementptr i32, i32* %r1, i32 6
-store i32 %r469, i32* %r471
-%r472 = lshr i512 %r468, 32
-%r473 = trunc i512 %r472 to i32
-%r475 = getelementptr i32, i32* %r1, i32 7
-store i32 %r473, i32* %r475
-%r476 = lshr i512 %r472, 32
-%r477 = trunc i512 %r476 to i32
-%r479 = getelementptr i32, i32* %r1, i32 8
-store i32 %r477, i32* %r479
-%r480 = lshr i512 %r476, 32
-%r481 = trunc i512 %r480 to i32
-%r483 = getelementptr i32, i32* %r1, i32 9
-store i32 %r481, i32* %r483
-%r484 = lshr i512 %r480, 32
-%r485 = trunc i512 %r484 to i32
-%r487 = getelementptr i32, i32* %r1, i32 10
-store i32 %r485, i32* %r487
-%r488 = lshr i512 %r484, 32
-%r489 = trunc i512 %r488 to i32
-%r491 = getelementptr i32, i32* %r1, i32 11
-store i32 %r489, i32* %r491
-%r492 = lshr i512 %r488, 32
-%r493 = trunc i512 %r492 to i32
-%r495 = getelementptr i32, i32* %r1, i32 12
-store i32 %r493, i32* %r495
-%r496 = lshr i512 %r492, 32
-%r497 = trunc i512 %r496 to i32
-%r499 = getelementptr i32, i32* %r1, i32 13
-store i32 %r497, i32* %r499
-%r500 = lshr i512 %r496, 32
-%r501 = trunc i512 %r500 to i32
-%r503 = getelementptr i32, i32* %r1, i32 14
-store i32 %r501, i32* %r503
-%r504 = lshr i512 %r500, 32
-%r505 = trunc i512 %r504 to i32
-%r507 = getelementptr i32, i32* %r1, i32 15
-store i32 %r505, i32* %r507
-%r508 = lshr i1056 %r443, 512
-%r509 = trunc i1056 %r508 to i512
-%r510 = lshr i1056 %r443, 1024
-%r511 = trunc i1056 %r510 to i1
-%r512 = load i32, i32* %r4
-%r513 = zext i32 %r512 to i64
-%r515 = getelementptr i32, i32* %r4, i32 1
-%r516 = load i32, i32* %r515
-%r517 = zext i32 %r516 to i64
-%r518 = shl i64 %r517, 32
-%r519 = or i64 %r513, %r518
-%r520 = zext i64 %r519 to i96
-%r522 = getelementptr i32, i32* %r4, i32 2
-%r523 = load i32, i32* %r522
-%r524 = zext i32 %r523 to i96
-%r525 = shl i96 %r524, 64
-%r526 = or i96 %r520, %r525
-%r527 = zext i96 %r526 to i128
-%r529 = getelementptr i32, i32* %r4, i32 3
-%r530 = load i32, i32* %r529
-%r531 = zext i32 %r530 to i128
-%r532 = shl i128 %r531, 96
-%r533 = or i128 %r527, %r532
-%r534 = zext i128 %r533 to i160
-%r536 = getelementptr i32, i32* %r4, i32 4
-%r537 = load i32, i32* %r536
-%r538 = zext i32 %r537 to i160
-%r539 = shl i160 %r538, 128
-%r540 = or i160 %r534, %r539
-%r541 = zext i160 %r540 to i192
-%r543 = getelementptr i32, i32* %r4, i32 5
-%r544 = load i32, i32* %r543
-%r545 = zext i32 %r544 to i192
-%r546 = shl i192 %r545, 160
-%r547 = or i192 %r541, %r546
-%r548 = zext i192 %r547 to i224
-%r550 = getelementptr i32, i32* %r4, i32 6
-%r551 = load i32, i32* %r550
-%r552 = zext i32 %r551 to i224
-%r553 = shl i224 %r552, 192
-%r554 = or i224 %r548, %r553
-%r555 = zext i224 %r554 to i256
-%r557 = getelementptr i32, i32* %r4, i32 7
-%r558 = load i32, i32* %r557
-%r559 = zext i32 %r558 to i256
-%r560 = shl i256 %r559, 224
-%r561 = or i256 %r555, %r560
-%r562 = zext i256 %r561 to i288
-%r564 = getelementptr i32, i32* %r4, i32 8
-%r565 = load i32, i32* %r564
-%r566 = zext i32 %r565 to i288
-%r567 = shl i288 %r566, 256
-%r568 = or i288 %r562, %r567
-%r569 = zext i288 %r568 to i320
-%r571 = getelementptr i32, i32* %r4, i32 9
-%r572 = load i32, i32* %r571
-%r573 = zext i32 %r572 to i320
-%r574 = shl i320 %r573, 288
-%r575 = or i320 %r569, %r574
-%r576 = zext i320 %r575 to i352
-%r578 = getelementptr i32, i32* %r4, i32 10
-%r579 = load i32, i32* %r578
-%r580 = zext i32 %r579 to i352
-%r581 = shl i352 %r580, 320
-%r582 = or i352 %r576, %r581
-%r583 = zext i352 %r582 to i384
-%r585 = getelementptr i32, i32* %r4, i32 11
-%r586 = load i32, i32* %r585
-%r587 = zext i32 %r586 to i384
-%r588 = shl i384 %r587, 352
-%r589 = or i384 %r583, %r588
-%r590 = zext i384 %r589 to i416
-%r592 = getelementptr i32, i32* %r4, i32 12
-%r593 = load i32, i32* %r592
-%r594 = zext i32 %r593 to i416
-%r595 = shl i416 %r594, 384
-%r596 = or i416 %r590, %r595
-%r597 = zext i416 %r596 to i448
-%r599 = getelementptr i32, i32* %r4, i32 13
-%r600 = load i32, i32* %r599
-%r601 = zext i32 %r600 to i448
-%r602 = shl i448 %r601, 416
-%r603 = or i448 %r597, %r602
-%r604 = zext i448 %r603 to i480
-%r606 = getelementptr i32, i32* %r4, i32 14
-%r607 = load i32, i32* %r606
-%r608 = zext i32 %r607 to i480
-%r609 = shl i480 %r608, 448
-%r610 = or i480 %r604, %r609
-%r611 = zext i480 %r610 to i512
-%r613 = getelementptr i32, i32* %r4, i32 15
-%r614 = load i32, i32* %r613
-%r615 = zext i32 %r614 to i512
-%r616 = shl i512 %r615, 480
-%r617 = or i512 %r611, %r616
-%r619 = select i1 %r511, i512 %r617, i512 0
-%r620 = add i512 %r509, %r619
-%r622 = getelementptr i32, i32* %r1, i32 16
-%r623 = trunc i512 %r620 to i32
-%r625 = getelementptr i32, i32* %r622, i32 0
-store i32 %r623, i32* %r625
-%r626 = lshr i512 %r620, 32
-%r627 = trunc i512 %r626 to i32
-%r629 = getelementptr i32, i32* %r622, i32 1
-store i32 %r627, i32* %r629
-%r630 = lshr i512 %r626, 32
-%r631 = trunc i512 %r630 to i32
-%r633 = getelementptr i32, i32* %r622, i32 2
-store i32 %r631, i32* %r633
-%r634 = lshr i512 %r630, 32
-%r635 = trunc i512 %r634 to i32
-%r637 = getelementptr i32, i32* %r622, i32 3
-store i32 %r635, i32* %r637
-%r638 = lshr i512 %r634, 32
-%r639 = trunc i512 %r638 to i32
-%r641 = getelementptr i32, i32* %r622, i32 4
-store i32 %r639, i32* %r641
-%r642 = lshr i512 %r638, 32
-%r643 = trunc i512 %r642 to i32
-%r645 = getelementptr i32, i32* %r622, i32 5
-store i32 %r643, i32* %r645
-%r646 = lshr i512 %r642, 32
-%r647 = trunc i512 %r646 to i32
-%r649 = getelementptr i32, i32* %r622, i32 6
-store i32 %r647, i32* %r649
-%r650 = lshr i512 %r646, 32
-%r651 = trunc i512 %r650 to i32
-%r653 = getelementptr i32, i32* %r622, i32 7
-store i32 %r651, i32* %r653
-%r654 = lshr i512 %r650, 32
-%r655 = trunc i512 %r654 to i32
-%r657 = getelementptr i32, i32* %r622, i32 8
-store i32 %r655, i32* %r657
-%r658 = lshr i512 %r654, 32
-%r659 = trunc i512 %r658 to i32
-%r661 = getelementptr i32, i32* %r622, i32 9
-store i32 %r659, i32* %r661
-%r662 = lshr i512 %r658, 32
-%r663 = trunc i512 %r662 to i32
-%r665 = getelementptr i32, i32* %r622, i32 10
-store i32 %r663, i32* %r665
-%r666 = lshr i512 %r662, 32
-%r667 = trunc i512 %r666 to i32
-%r669 = getelementptr i32, i32* %r622, i32 11
-store i32 %r667, i32* %r669
-%r670 = lshr i512 %r666, 32
-%r671 = trunc i512 %r670 to i32
-%r673 = getelementptr i32, i32* %r622, i32 12
-store i32 %r671, i32* %r673
-%r674 = lshr i512 %r670, 32
-%r675 = trunc i512 %r674 to i32
-%r677 = getelementptr i32, i32* %r622, i32 13
-store i32 %r675, i32* %r677
-%r678 = lshr i512 %r674, 32
-%r679 = trunc i512 %r678 to i32
-%r681 = getelementptr i32, i32* %r622, i32 14
-store i32 %r679, i32* %r681
-%r682 = lshr i512 %r678, 32
-%r683 = trunc i512 %r682 to i32
-%r685 = getelementptr i32, i32* %r622, i32 15
-store i32 %r683, i32* %r685
+%r438 = zext i32 %r437 to i288
+%r439 = shl i288 %r438, 256
+%r440 = or i288 %r434, %r439
+%r441 = zext i288 %r440 to i320
+%r443 = getelementptr i32, i32* %r4, i32 9
+%r444 = load i32, i32* %r443
+%r445 = zext i32 %r444 to i320
+%r446 = shl i320 %r445, 288
+%r447 = or i320 %r441, %r446
+%r448 = zext i320 %r447 to i352
+%r450 = getelementptr i32, i32* %r4, i32 10
+%r451 = load i32, i32* %r450
+%r452 = zext i32 %r451 to i352
+%r453 = shl i352 %r452, 320
+%r454 = or i352 %r448, %r453
+%r455 = zext i352 %r454 to i384
+%r457 = getelementptr i32, i32* %r4, i32 11
+%r458 = load i32, i32* %r457
+%r459 = zext i32 %r458 to i384
+%r460 = shl i384 %r459, 352
+%r461 = or i384 %r455, %r460
+%r463 = select i1 %r383, i384 %r461, i384 0
+%r464 = add i384 %r381, %r463
+%r466 = getelementptr i32, i32* %r1, i32 12
+%r468 = getelementptr i32, i32* %r466, i32 0
+%r469 = trunc i384 %r464 to i32
+store i32 %r469, i32* %r468
+%r470 = lshr i384 %r464, 32
+%r472 = getelementptr i32, i32* %r466, i32 1
+%r473 = trunc i384 %r470 to i32
+store i32 %r473, i32* %r472
+%r474 = lshr i384 %r470, 32
+%r476 = getelementptr i32, i32* %r466, i32 2
+%r477 = trunc i384 %r474 to i32
+store i32 %r477, i32* %r476
+%r478 = lshr i384 %r474, 32
+%r480 = getelementptr i32, i32* %r466, i32 3
+%r481 = trunc i384 %r478 to i32
+store i32 %r481, i32* %r480
+%r482 = lshr i384 %r478, 32
+%r484 = getelementptr i32, i32* %r466, i32 4
+%r485 = trunc i384 %r482 to i32
+store i32 %r485, i32* %r484
+%r486 = lshr i384 %r482, 32
+%r488 = getelementptr i32, i32* %r466, i32 5
+%r489 = trunc i384 %r486 to i32
+store i32 %r489, i32* %r488
+%r490 = lshr i384 %r486, 32
+%r492 = getelementptr i32, i32* %r466, i32 6
+%r493 = trunc i384 %r490 to i32
+store i32 %r493, i32* %r492
+%r494 = lshr i384 %r490, 32
+%r496 = getelementptr i32, i32* %r466, i32 7
+%r497 = trunc i384 %r494 to i32
+store i32 %r497, i32* %r496
+%r498 = lshr i384 %r494, 32
+%r500 = getelementptr i32, i32* %r466, i32 8
+%r501 = trunc i384 %r498 to i32
+store i32 %r501, i32* %r500
+%r502 = lshr i384 %r498, 32
+%r504 = getelementptr i32, i32* %r466, i32 9
+%r505 = trunc i384 %r502 to i32
+store i32 %r505, i32* %r504
+%r506 = lshr i384 %r502, 32
+%r508 = getelementptr i32, i32* %r466, i32 10
+%r509 = trunc i384 %r506 to i32
+store i32 %r509, i32* %r508
+%r510 = lshr i384 %r506, 32
+%r512 = getelementptr i32, i32* %r466, i32 11
+%r513 = trunc i384 %r510 to i32
+store i32 %r513, i32* %r512
+ret void
+}
+define i544 @mulPv512x32(i32* noalias  %r2, i32 %r3)
+{
+%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
+%r6 = trunc i64 %r5 to i32
+%r7 = call i32 @extractHigh32(i64 %r5)
+%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
+%r10 = trunc i64 %r9 to i32
+%r11 = call i32 @extractHigh32(i64 %r9)
+%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
+%r14 = trunc i64 %r13 to i32
+%r15 = call i32 @extractHigh32(i64 %r13)
+%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
+%r18 = trunc i64 %r17 to i32
+%r19 = call i32 @extractHigh32(i64 %r17)
+%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
+%r22 = trunc i64 %r21 to i32
+%r23 = call i32 @extractHigh32(i64 %r21)
+%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
+%r26 = trunc i64 %r25 to i32
+%r27 = call i32 @extractHigh32(i64 %r25)
+%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
+%r30 = trunc i64 %r29 to i32
+%r31 = call i32 @extractHigh32(i64 %r29)
+%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
+%r34 = trunc i64 %r33 to i32
+%r35 = call i32 @extractHigh32(i64 %r33)
+%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
+%r38 = trunc i64 %r37 to i32
+%r39 = call i32 @extractHigh32(i64 %r37)
+%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
+%r42 = trunc i64 %r41 to i32
+%r43 = call i32 @extractHigh32(i64 %r41)
+%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
+%r46 = trunc i64 %r45 to i32
+%r47 = call i32 @extractHigh32(i64 %r45)
+%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
+%r50 = trunc i64 %r49 to i32
+%r51 = call i32 @extractHigh32(i64 %r49)
+%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
+%r54 = trunc i64 %r53 to i32
+%r55 = call i32 @extractHigh32(i64 %r53)
+%r57 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 13)
+%r58 = trunc i64 %r57 to i32
+%r59 = call i32 @extractHigh32(i64 %r57)
+%r61 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 14)
+%r62 = trunc i64 %r61 to i32
+%r63 = call i32 @extractHigh32(i64 %r61)
+%r65 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 15)
+%r66 = trunc i64 %r65 to i32
+%r67 = call i32 @extractHigh32(i64 %r65)
+%r68 = zext i32 %r6 to i64
+%r69 = zext i32 %r10 to i64
+%r70 = shl i64 %r69, 32
+%r71 = or i64 %r68, %r70
+%r72 = zext i64 %r71 to i96
+%r73 = zext i32 %r14 to i96
+%r74 = shl i96 %r73, 64
+%r75 = or i96 %r72, %r74
+%r76 = zext i96 %r75 to i128
+%r77 = zext i32 %r18 to i128
+%r78 = shl i128 %r77, 96
+%r79 = or i128 %r76, %r78
+%r80 = zext i128 %r79 to i160
+%r81 = zext i32 %r22 to i160
+%r82 = shl i160 %r81, 128
+%r83 = or i160 %r80, %r82
+%r84 = zext i160 %r83 to i192
+%r85 = zext i32 %r26 to i192
+%r86 = shl i192 %r85, 160
+%r87 = or i192 %r84, %r86
+%r88 = zext i192 %r87 to i224
+%r89 = zext i32 %r30 to i224
+%r90 = shl i224 %r89, 192
+%r91 = or i224 %r88, %r90
+%r92 = zext i224 %r91 to i256
+%r93 = zext i32 %r34 to i256
+%r94 = shl i256 %r93, 224
+%r95 = or i256 %r92, %r94
+%r96 = zext i256 %r95 to i288
+%r97 = zext i32 %r38 to i288
+%r98 = shl i288 %r97, 256
+%r99 = or i288 %r96, %r98
+%r100 = zext i288 %r99 to i320
+%r101 = zext i32 %r42 to i320
+%r102 = shl i320 %r101, 288
+%r103 = or i320 %r100, %r102
+%r104 = zext i320 %r103 to i352
+%r105 = zext i32 %r46 to i352
+%r106 = shl i352 %r105, 320
+%r107 = or i352 %r104, %r106
+%r108 = zext i352 %r107 to i384
+%r109 = zext i32 %r50 to i384
+%r110 = shl i384 %r109, 352
+%r111 = or i384 %r108, %r110
+%r112 = zext i384 %r111 to i416
+%r113 = zext i32 %r54 to i416
+%r114 = shl i416 %r113, 384
+%r115 = or i416 %r112, %r114
+%r116 = zext i416 %r115 to i448
+%r117 = zext i32 %r58 to i448
+%r118 = shl i448 %r117, 416
+%r119 = or i448 %r116, %r118
+%r120 = zext i448 %r119 to i480
+%r121 = zext i32 %r62 to i480
+%r122 = shl i480 %r121, 448
+%r123 = or i480 %r120, %r122
+%r124 = zext i480 %r123 to i512
+%r125 = zext i32 %r66 to i512
+%r126 = shl i512 %r125, 480
+%r127 = or i512 %r124, %r126
+%r128 = zext i32 %r7 to i64
+%r129 = zext i32 %r11 to i64
+%r130 = shl i64 %r129, 32
+%r131 = or i64 %r128, %r130
+%r132 = zext i64 %r131 to i96
+%r133 = zext i32 %r15 to i96
+%r134 = shl i96 %r133, 64
+%r135 = or i96 %r132, %r134
+%r136 = zext i96 %r135 to i128
+%r137 = zext i32 %r19 to i128
+%r138 = shl i128 %r137, 96
+%r139 = or i128 %r136, %r138
+%r140 = zext i128 %r139 to i160
+%r141 = zext i32 %r23 to i160
+%r142 = shl i160 %r141, 128
+%r143 = or i160 %r140, %r142
+%r144 = zext i160 %r143 to i192
+%r145 = zext i32 %r27 to i192
+%r146 = shl i192 %r145, 160
+%r147 = or i192 %r144, %r146
+%r148 = zext i192 %r147 to i224
+%r149 = zext i32 %r31 to i224
+%r150 = shl i224 %r149, 192
+%r151 = or i224 %r148, %r150
+%r152 = zext i224 %r151 to i256
+%r153 = zext i32 %r35 to i256
+%r154 = shl i256 %r153, 224
+%r155 = or i256 %r152, %r154
+%r156 = zext i256 %r155 to i288
+%r157 = zext i32 %r39 to i288
+%r158 = shl i288 %r157, 256
+%r159 = or i288 %r156, %r158
+%r160 = zext i288 %r159 to i320
+%r161 = zext i32 %r43 to i320
+%r162 = shl i320 %r161, 288
+%r163 = or i320 %r160, %r162
+%r164 = zext i320 %r163 to i352
+%r165 = zext i32 %r47 to i352
+%r166 = shl i352 %r165, 320
+%r167 = or i352 %r164, %r166
+%r168 = zext i352 %r167 to i384
+%r169 = zext i32 %r51 to i384
+%r170 = shl i384 %r169, 352
+%r171 = or i384 %r168, %r170
+%r172 = zext i384 %r171 to i416
+%r173 = zext i32 %r55 to i416
+%r174 = shl i416 %r173, 384
+%r175 = or i416 %r172, %r174
+%r176 = zext i416 %r175 to i448
+%r177 = zext i32 %r59 to i448
+%r178 = shl i448 %r177, 416
+%r179 = or i448 %r176, %r178
+%r180 = zext i448 %r179 to i480
+%r181 = zext i32 %r63 to i480
+%r182 = shl i480 %r181, 448
+%r183 = or i480 %r180, %r182
+%r184 = zext i480 %r183 to i512
+%r185 = zext i32 %r67 to i512
+%r186 = shl i512 %r185, 480
+%r187 = or i512 %r184, %r186
+%r188 = zext i512 %r127 to i544
+%r189 = zext i512 %r187 to i544
+%r190 = shl i544 %r189, 32
+%r191 = add i544 %r188, %r190
+ret i544 %r191
+}
+define void @mcl_fp_mulUnitPre16L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+{
+%r4 = call i544 @mulPv512x32(i32* %r2, i32 %r3)
+%r6 = getelementptr i32, i32* %r1, i32 0
+%r7 = trunc i544 %r4 to i32
+store i32 %r7, i32* %r6
+%r8 = lshr i544 %r4, 32
+%r10 = getelementptr i32, i32* %r1, i32 1
+%r11 = trunc i544 %r8 to i32
+store i32 %r11, i32* %r10
+%r12 = lshr i544 %r8, 32
+%r14 = getelementptr i32, i32* %r1, i32 2
+%r15 = trunc i544 %r12 to i32
+store i32 %r15, i32* %r14
+%r16 = lshr i544 %r12, 32
+%r18 = getelementptr i32, i32* %r1, i32 3
+%r19 = trunc i544 %r16 to i32
+store i32 %r19, i32* %r18
+%r20 = lshr i544 %r16, 32
+%r22 = getelementptr i32, i32* %r1, i32 4
+%r23 = trunc i544 %r20 to i32
+store i32 %r23, i32* %r22
+%r24 = lshr i544 %r20, 32
+%r26 = getelementptr i32, i32* %r1, i32 5
+%r27 = trunc i544 %r24 to i32
+store i32 %r27, i32* %r26
+%r28 = lshr i544 %r24, 32
+%r30 = getelementptr i32, i32* %r1, i32 6
+%r31 = trunc i544 %r28 to i32
+store i32 %r31, i32* %r30
+%r32 = lshr i544 %r28, 32
+%r34 = getelementptr i32, i32* %r1, i32 7
+%r35 = trunc i544 %r32 to i32
+store i32 %r35, i32* %r34
+%r36 = lshr i544 %r32, 32
+%r38 = getelementptr i32, i32* %r1, i32 8
+%r39 = trunc i544 %r36 to i32
+store i32 %r39, i32* %r38
+%r40 = lshr i544 %r36, 32
+%r42 = getelementptr i32, i32* %r1, i32 9
+%r43 = trunc i544 %r40 to i32
+store i32 %r43, i32* %r42
+%r44 = lshr i544 %r40, 32
+%r46 = getelementptr i32, i32* %r1, i32 10
+%r47 = trunc i544 %r44 to i32
+store i32 %r47, i32* %r46
+%r48 = lshr i544 %r44, 32
+%r50 = getelementptr i32, i32* %r1, i32 11
+%r51 = trunc i544 %r48 to i32
+store i32 %r51, i32* %r50
+%r52 = lshr i544 %r48, 32
+%r54 = getelementptr i32, i32* %r1, i32 12
+%r55 = trunc i544 %r52 to i32
+store i32 %r55, i32* %r54
+%r56 = lshr i544 %r52, 32
+%r58 = getelementptr i32, i32* %r1, i32 13
+%r59 = trunc i544 %r56 to i32
+store i32 %r59, i32* %r58
+%r60 = lshr i544 %r56, 32
+%r62 = getelementptr i32, i32* %r1, i32 14
+%r63 = trunc i544 %r60 to i32
+store i32 %r63, i32* %r62
+%r64 = lshr i544 %r60, 32
+%r66 = getelementptr i32, i32* %r1, i32 15
+%r67 = trunc i544 %r64 to i32
+store i32 %r67, i32* %r66
+%r68 = lshr i544 %r64, 32
+%r70 = getelementptr i32, i32* %r1, i32 16
+%r71 = trunc i544 %r68 to i32
+store i32 %r71, i32* %r70
+ret void
+}
+define void @mcl_fpDbl_mulPre16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+{
+%r5 = getelementptr i32, i32* %r2, i32 8
+%r7 = getelementptr i32, i32* %r3, i32 8
+%r9 = getelementptr i32, i32* %r1, i32 16
+call void @mcl_fpDbl_mulPre8L(i32* %r1, i32* %r2, i32* %r3)
+call void @mcl_fpDbl_mulPre8L(i32* %r9, i32* %r5, i32* %r7)
+%r10 = load i32, i32* %r5
+%r11 = zext i32 %r10 to i64
+%r13 = getelementptr i32, i32* %r5, i32 1
+%r14 = load i32, i32* %r13
+%r15 = zext i32 %r14 to i64
+%r16 = shl i64 %r15, 32
+%r17 = or i64 %r11, %r16
+%r18 = zext i64 %r17 to i96
+%r20 = getelementptr i32, i32* %r5, i32 2
+%r21 = load i32, i32* %r20
+%r22 = zext i32 %r21 to i96
+%r23 = shl i96 %r22, 64
+%r24 = or i96 %r18, %r23
+%r25 = zext i96 %r24 to i128
+%r27 = getelementptr i32, i32* %r5, i32 3
+%r28 = load i32, i32* %r27
+%r29 = zext i32 %r28 to i128
+%r30 = shl i128 %r29, 96
+%r31 = or i128 %r25, %r30
+%r32 = zext i128 %r31 to i160
+%r34 = getelementptr i32, i32* %r5, i32 4
+%r35 = load i32, i32* %r34
+%r36 = zext i32 %r35 to i160
+%r37 = shl i160 %r36, 128
+%r38 = or i160 %r32, %r37
+%r39 = zext i160 %r38 to i192
+%r41 = getelementptr i32, i32* %r5, i32 5
+%r42 = load i32, i32* %r41
+%r43 = zext i32 %r42 to i192
+%r44 = shl i192 %r43, 160
+%r45 = or i192 %r39, %r44
+%r46 = zext i192 %r45 to i224
+%r48 = getelementptr i32, i32* %r5, i32 6
+%r49 = load i32, i32* %r48
+%r50 = zext i32 %r49 to i224
+%r51 = shl i224 %r50, 192
+%r52 = or i224 %r46, %r51
+%r53 = zext i224 %r52 to i256
+%r55 = getelementptr i32, i32* %r5, i32 7
+%r56 = load i32, i32* %r55
+%r57 = zext i32 %r56 to i256
+%r58 = shl i256 %r57, 224
+%r59 = or i256 %r53, %r58
+%r60 = zext i256 %r59 to i288
+%r61 = load i32, i32* %r2
+%r62 = zext i32 %r61 to i64
+%r64 = getelementptr i32, i32* %r2, i32 1
+%r65 = load i32, i32* %r64
+%r66 = zext i32 %r65 to i64
+%r67 = shl i64 %r66, 32
+%r68 = or i64 %r62, %r67
+%r69 = zext i64 %r68 to i96
+%r71 = getelementptr i32, i32* %r2, i32 2
+%r72 = load i32, i32* %r71
+%r73 = zext i32 %r72 to i96
+%r74 = shl i96 %r73, 64
+%r75 = or i96 %r69, %r74
+%r76 = zext i96 %r75 to i128
+%r78 = getelementptr i32, i32* %r2, i32 3
+%r79 = load i32, i32* %r78
+%r80 = zext i32 %r79 to i128
+%r81 = shl i128 %r80, 96
+%r82 = or i128 %r76, %r81
+%r83 = zext i128 %r82 to i160
+%r85 = getelementptr i32, i32* %r2, i32 4
+%r86 = load i32, i32* %r85
+%r87 = zext i32 %r86 to i160
+%r88 = shl i160 %r87, 128
+%r89 = or i160 %r83, %r88
+%r90 = zext i160 %r89 to i192
+%r92 = getelementptr i32, i32* %r2, i32 5
+%r93 = load i32, i32* %r92
+%r94 = zext i32 %r93 to i192
+%r95 = shl i192 %r94, 160
+%r96 = or i192 %r90, %r95
+%r97 = zext i192 %r96 to i224
+%r99 = getelementptr i32, i32* %r2, i32 6
+%r100 = load i32, i32* %r99
+%r101 = zext i32 %r100 to i224
+%r102 = shl i224 %r101, 192
+%r103 = or i224 %r97, %r102
+%r104 = zext i224 %r103 to i256
+%r106 = getelementptr i32, i32* %r2, i32 7
+%r107 = load i32, i32* %r106
+%r108 = zext i32 %r107 to i256
+%r109 = shl i256 %r108, 224
+%r110 = or i256 %r104, %r109
+%r111 = zext i256 %r110 to i288
+%r112 = load i32, i32* %r7
+%r113 = zext i32 %r112 to i64
+%r115 = getelementptr i32, i32* %r7, i32 1
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i64
+%r118 = shl i64 %r117, 32
+%r119 = or i64 %r113, %r118
+%r120 = zext i64 %r119 to i96
+%r122 = getelementptr i32, i32* %r7, i32 2
+%r123 = load i32, i32* %r122
+%r124 = zext i32 %r123 to i96
+%r125 = shl i96 %r124, 64
+%r126 = or i96 %r120, %r125
+%r127 = zext i96 %r126 to i128
+%r129 = getelementptr i32, i32* %r7, i32 3
+%r130 = load i32, i32* %r129
+%r131 = zext i32 %r130 to i128
+%r132 = shl i128 %r131, 96
+%r133 = or i128 %r127, %r132
+%r134 = zext i128 %r133 to i160
+%r136 = getelementptr i32, i32* %r7, i32 4
+%r137 = load i32, i32* %r136
+%r138 = zext i32 %r137 to i160
+%r139 = shl i160 %r138, 128
+%r140 = or i160 %r134, %r139
+%r141 = zext i160 %r140 to i192
+%r143 = getelementptr i32, i32* %r7, i32 5
+%r144 = load i32, i32* %r143
+%r145 = zext i32 %r144 to i192
+%r146 = shl i192 %r145, 160
+%r147 = or i192 %r141, %r146
+%r148 = zext i192 %r147 to i224
+%r150 = getelementptr i32, i32* %r7, i32 6
+%r151 = load i32, i32* %r150
+%r152 = zext i32 %r151 to i224
+%r153 = shl i224 %r152, 192
+%r154 = or i224 %r148, %r153
+%r155 = zext i224 %r154 to i256
+%r157 = getelementptr i32, i32* %r7, i32 7
+%r158 = load i32, i32* %r157
+%r159 = zext i32 %r158 to i256
+%r160 = shl i256 %r159, 224
+%r161 = or i256 %r155, %r160
+%r162 = zext i256 %r161 to i288
+%r163 = load i32, i32* %r3
+%r164 = zext i32 %r163 to i64
+%r166 = getelementptr i32, i32* %r3, i32 1
+%r167 = load i32, i32* %r166
+%r168 = zext i32 %r167 to i64
+%r169 = shl i64 %r168, 32
+%r170 = or i64 %r164, %r169
+%r171 = zext i64 %r170 to i96
+%r173 = getelementptr i32, i32* %r3, i32 2
+%r174 = load i32, i32* %r173
+%r175 = zext i32 %r174 to i96
+%r176 = shl i96 %r175, 64
+%r177 = or i96 %r171, %r176
+%r178 = zext i96 %r177 to i128
+%r180 = getelementptr i32, i32* %r3, i32 3
+%r181 = load i32, i32* %r180
+%r182 = zext i32 %r181 to i128
+%r183 = shl i128 %r182, 96
+%r184 = or i128 %r178, %r183
+%r185 = zext i128 %r184 to i160
+%r187 = getelementptr i32, i32* %r3, i32 4
+%r188 = load i32, i32* %r187
+%r189 = zext i32 %r188 to i160
+%r190 = shl i160 %r189, 128
+%r191 = or i160 %r185, %r190
+%r192 = zext i160 %r191 to i192
+%r194 = getelementptr i32, i32* %r3, i32 5
+%r195 = load i32, i32* %r194
+%r196 = zext i32 %r195 to i192
+%r197 = shl i192 %r196, 160
+%r198 = or i192 %r192, %r197
+%r199 = zext i192 %r198 to i224
+%r201 = getelementptr i32, i32* %r3, i32 6
+%r202 = load i32, i32* %r201
+%r203 = zext i32 %r202 to i224
+%r204 = shl i224 %r203, 192
+%r205 = or i224 %r199, %r204
+%r206 = zext i224 %r205 to i256
+%r208 = getelementptr i32, i32* %r3, i32 7
+%r209 = load i32, i32* %r208
+%r210 = zext i32 %r209 to i256
+%r211 = shl i256 %r210, 224
+%r212 = or i256 %r206, %r211
+%r213 = zext i256 %r212 to i288
+%r214 = add i288 %r60, %r111
+%r215 = add i288 %r162, %r213
+%r217 = alloca i32, i32 16
+%r218 = trunc i288 %r214 to i256
+%r219 = trunc i288 %r215 to i256
+%r220 = lshr i288 %r214, 256
+%r221 = trunc i288 %r220 to i1
+%r222 = lshr i288 %r215, 256
+%r223 = trunc i288 %r222 to i1
+%r224 = and i1 %r221, %r223
+%r226 = select i1 %r221, i256 %r219, i256 0
+%r228 = select i1 %r223, i256 %r218, i256 0
+%r230 = alloca i32, i32 8
+%r232 = alloca i32, i32 8
+%r234 = getelementptr i32, i32* %r230, i32 0
+%r235 = trunc i256 %r218 to i32
+store i32 %r235, i32* %r234
+%r236 = lshr i256 %r218, 32
+%r238 = getelementptr i32, i32* %r230, i32 1
+%r239 = trunc i256 %r236 to i32
+store i32 %r239, i32* %r238
+%r240 = lshr i256 %r236, 32
+%r242 = getelementptr i32, i32* %r230, i32 2
+%r243 = trunc i256 %r240 to i32
+store i32 %r243, i32* %r242
+%r244 = lshr i256 %r240, 32
+%r246 = getelementptr i32, i32* %r230, i32 3
+%r247 = trunc i256 %r244 to i32
+store i32 %r247, i32* %r246
+%r248 = lshr i256 %r244, 32
+%r250 = getelementptr i32, i32* %r230, i32 4
+%r251 = trunc i256 %r248 to i32
+store i32 %r251, i32* %r250
+%r252 = lshr i256 %r248, 32
+%r254 = getelementptr i32, i32* %r230, i32 5
+%r255 = trunc i256 %r252 to i32
+store i32 %r255, i32* %r254
+%r256 = lshr i256 %r252, 32
+%r258 = getelementptr i32, i32* %r230, i32 6
+%r259 = trunc i256 %r256 to i32
+store i32 %r259, i32* %r258
+%r260 = lshr i256 %r256, 32
+%r262 = getelementptr i32, i32* %r230, i32 7
+%r263 = trunc i256 %r260 to i32
+store i32 %r263, i32* %r262
+%r265 = getelementptr i32, i32* %r232, i32 0
+%r266 = trunc i256 %r219 to i32
+store i32 %r266, i32* %r265
+%r267 = lshr i256 %r219, 32
+%r269 = getelementptr i32, i32* %r232, i32 1
+%r270 = trunc i256 %r267 to i32
+store i32 %r270, i32* %r269
+%r271 = lshr i256 %r267, 32
+%r273 = getelementptr i32, i32* %r232, i32 2
+%r274 = trunc i256 %r271 to i32
+store i32 %r274, i32* %r273
+%r275 = lshr i256 %r271, 32
+%r277 = getelementptr i32, i32* %r232, i32 3
+%r278 = trunc i256 %r275 to i32
+store i32 %r278, i32* %r277
+%r279 = lshr i256 %r275, 32
+%r281 = getelementptr i32, i32* %r232, i32 4
+%r282 = trunc i256 %r279 to i32
+store i32 %r282, i32* %r281
+%r283 = lshr i256 %r279, 32
+%r285 = getelementptr i32, i32* %r232, i32 5
+%r286 = trunc i256 %r283 to i32
+store i32 %r286, i32* %r285
+%r287 = lshr i256 %r283, 32
+%r289 = getelementptr i32, i32* %r232, i32 6
+%r290 = trunc i256 %r287 to i32
+store i32 %r290, i32* %r289
+%r291 = lshr i256 %r287, 32
+%r293 = getelementptr i32, i32* %r232, i32 7
+%r294 = trunc i256 %r291 to i32
+store i32 %r294, i32* %r293
+call void @mcl_fpDbl_mulPre8L(i32* %r217, i32* %r230, i32* %r232)
+%r295 = load i32, i32* %r217
+%r296 = zext i32 %r295 to i64
+%r298 = getelementptr i32, i32* %r217, i32 1
+%r299 = load i32, i32* %r298
+%r300 = zext i32 %r299 to i64
+%r301 = shl i64 %r300, 32
+%r302 = or i64 %r296, %r301
+%r303 = zext i64 %r302 to i96
+%r305 = getelementptr i32, i32* %r217, i32 2
+%r306 = load i32, i32* %r305
+%r307 = zext i32 %r306 to i96
+%r308 = shl i96 %r307, 64
+%r309 = or i96 %r303, %r308
+%r310 = zext i96 %r309 to i128
+%r312 = getelementptr i32, i32* %r217, i32 3
+%r313 = load i32, i32* %r312
+%r314 = zext i32 %r313 to i128
+%r315 = shl i128 %r314, 96
+%r316 = or i128 %r310, %r315
+%r317 = zext i128 %r316 to i160
+%r319 = getelementptr i32, i32* %r217, i32 4
+%r320 = load i32, i32* %r319
+%r321 = zext i32 %r320 to i160
+%r322 = shl i160 %r321, 128
+%r323 = or i160 %r317, %r322
+%r324 = zext i160 %r323 to i192
+%r326 = getelementptr i32, i32* %r217, i32 5
+%r327 = load i32, i32* %r326
+%r328 = zext i32 %r327 to i192
+%r329 = shl i192 %r328, 160
+%r330 = or i192 %r324, %r329
+%r331 = zext i192 %r330 to i224
+%r333 = getelementptr i32, i32* %r217, i32 6
+%r334 = load i32, i32* %r333
+%r335 = zext i32 %r334 to i224
+%r336 = shl i224 %r335, 192
+%r337 = or i224 %r331, %r336
+%r338 = zext i224 %r337 to i256
+%r340 = getelementptr i32, i32* %r217, i32 7
+%r341 = load i32, i32* %r340
+%r342 = zext i32 %r341 to i256
+%r343 = shl i256 %r342, 224
+%r344 = or i256 %r338, %r343
+%r345 = zext i256 %r344 to i288
+%r347 = getelementptr i32, i32* %r217, i32 8
+%r348 = load i32, i32* %r347
+%r349 = zext i32 %r348 to i288
+%r350 = shl i288 %r349, 256
+%r351 = or i288 %r345, %r350
+%r352 = zext i288 %r351 to i320
+%r354 = getelementptr i32, i32* %r217, i32 9
+%r355 = load i32, i32* %r354
+%r356 = zext i32 %r355 to i320
+%r357 = shl i320 %r356, 288
+%r358 = or i320 %r352, %r357
+%r359 = zext i320 %r358 to i352
+%r361 = getelementptr i32, i32* %r217, i32 10
+%r362 = load i32, i32* %r361
+%r363 = zext i32 %r362 to i352
+%r364 = shl i352 %r363, 320
+%r365 = or i352 %r359, %r364
+%r366 = zext i352 %r365 to i384
+%r368 = getelementptr i32, i32* %r217, i32 11
+%r369 = load i32, i32* %r368
+%r370 = zext i32 %r369 to i384
+%r371 = shl i384 %r370, 352
+%r372 = or i384 %r366, %r371
+%r373 = zext i384 %r372 to i416
+%r375 = getelementptr i32, i32* %r217, i32 12
+%r376 = load i32, i32* %r375
+%r377 = zext i32 %r376 to i416
+%r378 = shl i416 %r377, 384
+%r379 = or i416 %r373, %r378
+%r380 = zext i416 %r379 to i448
+%r382 = getelementptr i32, i32* %r217, i32 13
+%r383 = load i32, i32* %r382
+%r384 = zext i32 %r383 to i448
+%r385 = shl i448 %r384, 416
+%r386 = or i448 %r380, %r385
+%r387 = zext i448 %r386 to i480
+%r389 = getelementptr i32, i32* %r217, i32 14
+%r390 = load i32, i32* %r389
+%r391 = zext i32 %r390 to i480
+%r392 = shl i480 %r391, 448
+%r393 = or i480 %r387, %r392
+%r394 = zext i480 %r393 to i512
+%r396 = getelementptr i32, i32* %r217, i32 15
+%r397 = load i32, i32* %r396
+%r398 = zext i32 %r397 to i512
+%r399 = shl i512 %r398, 480
+%r400 = or i512 %r394, %r399
+%r401 = zext i512 %r400 to i544
+%r402 = zext i1 %r224 to i544
+%r403 = shl i544 %r402, 512
+%r404 = or i544 %r401, %r403
+%r405 = zext i256 %r226 to i544
+%r406 = zext i256 %r228 to i544
+%r407 = shl i544 %r405, 256
+%r408 = shl i544 %r406, 256
+%r409 = add i544 %r404, %r407
+%r410 = add i544 %r409, %r408
+%r411 = load i32, i32* %r1
+%r412 = zext i32 %r411 to i64
+%r414 = getelementptr i32, i32* %r1, i32 1
+%r415 = load i32, i32* %r414
+%r416 = zext i32 %r415 to i64
+%r417 = shl i64 %r416, 32
+%r418 = or i64 %r412, %r417
+%r419 = zext i64 %r418 to i96
+%r421 = getelementptr i32, i32* %r1, i32 2
+%r422 = load i32, i32* %r421
+%r423 = zext i32 %r422 to i96
+%r424 = shl i96 %r423, 64
+%r425 = or i96 %r419, %r424
+%r426 = zext i96 %r425 to i128
+%r428 = getelementptr i32, i32* %r1, i32 3
+%r429 = load i32, i32* %r428
+%r430 = zext i32 %r429 to i128
+%r431 = shl i128 %r430, 96
+%r432 = or i128 %r426, %r431
+%r433 = zext i128 %r432 to i160
+%r435 = getelementptr i32, i32* %r1, i32 4
+%r436 = load i32, i32* %r435
+%r437 = zext i32 %r436 to i160
+%r438 = shl i160 %r437, 128
+%r439 = or i160 %r433, %r438
+%r440 = zext i160 %r439 to i192
+%r442 = getelementptr i32, i32* %r1, i32 5
+%r443 = load i32, i32* %r442
+%r444 = zext i32 %r443 to i192
+%r445 = shl i192 %r444, 160
+%r446 = or i192 %r440, %r445
+%r447 = zext i192 %r446 to i224
+%r449 = getelementptr i32, i32* %r1, i32 6
+%r450 = load i32, i32* %r449
+%r451 = zext i32 %r450 to i224
+%r452 = shl i224 %r451, 192
+%r453 = or i224 %r447, %r452
+%r454 = zext i224 %r453 to i256
+%r456 = getelementptr i32, i32* %r1, i32 7
+%r457 = load i32, i32* %r456
+%r458 = zext i32 %r457 to i256
+%r459 = shl i256 %r458, 224
+%r460 = or i256 %r454, %r459
+%r461 = zext i256 %r460 to i288
+%r463 = getelementptr i32, i32* %r1, i32 8
+%r464 = load i32, i32* %r463
+%r465 = zext i32 %r464 to i288
+%r466 = shl i288 %r465, 256
+%r467 = or i288 %r461, %r466
+%r468 = zext i288 %r467 to i320
+%r470 = getelementptr i32, i32* %r1, i32 9
+%r471 = load i32, i32* %r470
+%r472 = zext i32 %r471 to i320
+%r473 = shl i320 %r472, 288
+%r474 = or i320 %r468, %r473
+%r475 = zext i320 %r474 to i352
+%r477 = getelementptr i32, i32* %r1, i32 10
+%r478 = load i32, i32* %r477
+%r479 = zext i32 %r478 to i352
+%r480 = shl i352 %r479, 320
+%r481 = or i352 %r475, %r480
+%r482 = zext i352 %r481 to i384
+%r484 = getelementptr i32, i32* %r1, i32 11
+%r485 = load i32, i32* %r484
+%r486 = zext i32 %r485 to i384
+%r487 = shl i384 %r486, 352
+%r488 = or i384 %r482, %r487
+%r489 = zext i384 %r488 to i416
+%r491 = getelementptr i32, i32* %r1, i32 12
+%r492 = load i32, i32* %r491
+%r493 = zext i32 %r492 to i416
+%r494 = shl i416 %r493, 384
+%r495 = or i416 %r489, %r494
+%r496 = zext i416 %r495 to i448
+%r498 = getelementptr i32, i32* %r1, i32 13
+%r499 = load i32, i32* %r498
+%r500 = zext i32 %r499 to i448
+%r501 = shl i448 %r500, 416
+%r502 = or i448 %r496, %r501
+%r503 = zext i448 %r502 to i480
+%r505 = getelementptr i32, i32* %r1, i32 14
+%r506 = load i32, i32* %r505
+%r507 = zext i32 %r506 to i480
+%r508 = shl i480 %r507, 448
+%r509 = or i480 %r503, %r508
+%r510 = zext i480 %r509 to i512
+%r512 = getelementptr i32, i32* %r1, i32 15
+%r513 = load i32, i32* %r512
+%r514 = zext i32 %r513 to i512
+%r515 = shl i512 %r514, 480
+%r516 = or i512 %r510, %r515
+%r517 = zext i512 %r516 to i544
+%r518 = sub i544 %r410, %r517
+%r520 = getelementptr i32, i32* %r1, i32 16
+%r521 = load i32, i32* %r520
+%r522 = zext i32 %r521 to i64
+%r524 = getelementptr i32, i32* %r520, i32 1
+%r525 = load i32, i32* %r524
+%r526 = zext i32 %r525 to i64
+%r527 = shl i64 %r526, 32
+%r528 = or i64 %r522, %r527
+%r529 = zext i64 %r528 to i96
+%r531 = getelementptr i32, i32* %r520, i32 2
+%r532 = load i32, i32* %r531
+%r533 = zext i32 %r532 to i96
+%r534 = shl i96 %r533, 64
+%r535 = or i96 %r529, %r534
+%r536 = zext i96 %r535 to i128
+%r538 = getelementptr i32, i32* %r520, i32 3
+%r539 = load i32, i32* %r538
+%r540 = zext i32 %r539 to i128
+%r541 = shl i128 %r540, 96
+%r542 = or i128 %r536, %r541
+%r543 = zext i128 %r542 to i160
+%r545 = getelementptr i32, i32* %r520, i32 4
+%r546 = load i32, i32* %r545
+%r547 = zext i32 %r546 to i160
+%r548 = shl i160 %r547, 128
+%r549 = or i160 %r543, %r548
+%r550 = zext i160 %r549 to i192
+%r552 = getelementptr i32, i32* %r520, i32 5
+%r553 = load i32, i32* %r552
+%r554 = zext i32 %r553 to i192
+%r555 = shl i192 %r554, 160
+%r556 = or i192 %r550, %r555
+%r557 = zext i192 %r556 to i224
+%r559 = getelementptr i32, i32* %r520, i32 6
+%r560 = load i32, i32* %r559
+%r561 = zext i32 %r560 to i224
+%r562 = shl i224 %r561, 192
+%r563 = or i224 %r557, %r562
+%r564 = zext i224 %r563 to i256
+%r566 = getelementptr i32, i32* %r520, i32 7
+%r567 = load i32, i32* %r566
+%r568 = zext i32 %r567 to i256
+%r569 = shl i256 %r568, 224
+%r570 = or i256 %r564, %r569
+%r571 = zext i256 %r570 to i288
+%r573 = getelementptr i32, i32* %r520, i32 8
+%r574 = load i32, i32* %r573
+%r575 = zext i32 %r574 to i288
+%r576 = shl i288 %r575, 256
+%r577 = or i288 %r571, %r576
+%r578 = zext i288 %r577 to i320
+%r580 = getelementptr i32, i32* %r520, i32 9
+%r581 = load i32, i32* %r580
+%r582 = zext i32 %r581 to i320
+%r583 = shl i320 %r582, 288
+%r584 = or i320 %r578, %r583
+%r585 = zext i320 %r584 to i352
+%r587 = getelementptr i32, i32* %r520, i32 10
+%r588 = load i32, i32* %r587
+%r589 = zext i32 %r588 to i352
+%r590 = shl i352 %r589, 320
+%r591 = or i352 %r585, %r590
+%r592 = zext i352 %r591 to i384
+%r594 = getelementptr i32, i32* %r520, i32 11
+%r595 = load i32, i32* %r594
+%r596 = zext i32 %r595 to i384
+%r597 = shl i384 %r596, 352
+%r598 = or i384 %r592, %r597
+%r599 = zext i384 %r598 to i416
+%r601 = getelementptr i32, i32* %r520, i32 12
+%r602 = load i32, i32* %r601
+%r603 = zext i32 %r602 to i416
+%r604 = shl i416 %r603, 384
+%r605 = or i416 %r599, %r604
+%r606 = zext i416 %r605 to i448
+%r608 = getelementptr i32, i32* %r520, i32 13
+%r609 = load i32, i32* %r608
+%r610 = zext i32 %r609 to i448
+%r611 = shl i448 %r610, 416
+%r612 = or i448 %r606, %r611
+%r613 = zext i448 %r612 to i480
+%r615 = getelementptr i32, i32* %r520, i32 14
+%r616 = load i32, i32* %r615
+%r617 = zext i32 %r616 to i480
+%r618 = shl i480 %r617, 448
+%r619 = or i480 %r613, %r618
+%r620 = zext i480 %r619 to i512
+%r622 = getelementptr i32, i32* %r520, i32 15
+%r623 = load i32, i32* %r622
+%r624 = zext i32 %r623 to i512
+%r625 = shl i512 %r624, 480
+%r626 = or i512 %r620, %r625
+%r627 = zext i512 %r626 to i544
+%r628 = sub i544 %r518, %r627
+%r629 = zext i544 %r628 to i768
+%r631 = getelementptr i32, i32* %r1, i32 8
+%r632 = load i32, i32* %r631
+%r633 = zext i32 %r632 to i64
+%r635 = getelementptr i32, i32* %r631, i32 1
+%r636 = load i32, i32* %r635
+%r637 = zext i32 %r636 to i64
+%r638 = shl i64 %r637, 32
+%r639 = or i64 %r633, %r638
+%r640 = zext i64 %r639 to i96
+%r642 = getelementptr i32, i32* %r631, i32 2
+%r643 = load i32, i32* %r642
+%r644 = zext i32 %r643 to i96
+%r645 = shl i96 %r644, 64
+%r646 = or i96 %r640, %r645
+%r647 = zext i96 %r646 to i128
+%r649 = getelementptr i32, i32* %r631, i32 3
+%r650 = load i32, i32* %r649
+%r651 = zext i32 %r650 to i128
+%r652 = shl i128 %r651, 96
+%r653 = or i128 %r647, %r652
+%r654 = zext i128 %r653 to i160
+%r656 = getelementptr i32, i32* %r631, i32 4
+%r657 = load i32, i32* %r656
+%r658 = zext i32 %r657 to i160
+%r659 = shl i160 %r658, 128
+%r660 = or i160 %r654, %r659
+%r661 = zext i160 %r660 to i192
+%r663 = getelementptr i32, i32* %r631, i32 5
+%r664 = load i32, i32* %r663
+%r665 = zext i32 %r664 to i192
+%r666 = shl i192 %r665, 160
+%r667 = or i192 %r661, %r666
+%r668 = zext i192 %r667 to i224
+%r670 = getelementptr i32, i32* %r631, i32 6
+%r671 = load i32, i32* %r670
+%r672 = zext i32 %r671 to i224
+%r673 = shl i224 %r672, 192
+%r674 = or i224 %r668, %r673
+%r675 = zext i224 %r674 to i256
+%r677 = getelementptr i32, i32* %r631, i32 7
+%r678 = load i32, i32* %r677
+%r679 = zext i32 %r678 to i256
+%r680 = shl i256 %r679, 224
+%r681 = or i256 %r675, %r680
+%r682 = zext i256 %r681 to i288
+%r684 = getelementptr i32, i32* %r631, i32 8
+%r685 = load i32, i32* %r684
+%r686 = zext i32 %r685 to i288
+%r687 = shl i288 %r686, 256
+%r688 = or i288 %r682, %r687
+%r689 = zext i288 %r688 to i320
+%r691 = getelementptr i32, i32* %r631, i32 9
+%r692 = load i32, i32* %r691
+%r693 = zext i32 %r692 to i320
+%r694 = shl i320 %r693, 288
+%r695 = or i320 %r689, %r694
+%r696 = zext i320 %r695 to i352
+%r698 = getelementptr i32, i32* %r631, i32 10
+%r699 = load i32, i32* %r698
+%r700 = zext i32 %r699 to i352
+%r701 = shl i352 %r700, 320
+%r702 = or i352 %r696, %r701
+%r703 = zext i352 %r702 to i384
+%r705 = getelementptr i32, i32* %r631, i32 11
+%r706 = load i32, i32* %r705
+%r707 = zext i32 %r706 to i384
+%r708 = shl i384 %r707, 352
+%r709 = or i384 %r703, %r708
+%r710 = zext i384 %r709 to i416
+%r712 = getelementptr i32, i32* %r631, i32 12
+%r713 = load i32, i32* %r712
+%r714 = zext i32 %r713 to i416
+%r715 = shl i416 %r714, 384
+%r716 = or i416 %r710, %r715
+%r717 = zext i416 %r716 to i448
+%r719 = getelementptr i32, i32* %r631, i32 13
+%r720 = load i32, i32* %r719
+%r721 = zext i32 %r720 to i448
+%r722 = shl i448 %r721, 416
+%r723 = or i448 %r717, %r722
+%r724 = zext i448 %r723 to i480
+%r726 = getelementptr i32, i32* %r631, i32 14
+%r727 = load i32, i32* %r726
+%r728 = zext i32 %r727 to i480
+%r729 = shl i480 %r728, 448
+%r730 = or i480 %r724, %r729
+%r731 = zext i480 %r730 to i512
+%r733 = getelementptr i32, i32* %r631, i32 15
+%r734 = load i32, i32* %r733
+%r735 = zext i32 %r734 to i512
+%r736 = shl i512 %r735, 480
+%r737 = or i512 %r731, %r736
+%r738 = zext i512 %r737 to i544
+%r740 = getelementptr i32, i32* %r631, i32 16
+%r741 = load i32, i32* %r740
+%r742 = zext i32 %r741 to i544
+%r743 = shl i544 %r742, 512
+%r744 = or i544 %r738, %r743
+%r745 = zext i544 %r744 to i576
+%r747 = getelementptr i32, i32* %r631, i32 17
+%r748 = load i32, i32* %r747
+%r749 = zext i32 %r748 to i576
+%r750 = shl i576 %r749, 544
+%r751 = or i576 %r745, %r750
+%r752 = zext i576 %r751 to i608
+%r754 = getelementptr i32, i32* %r631, i32 18
+%r755 = load i32, i32* %r754
+%r756 = zext i32 %r755 to i608
+%r757 = shl i608 %r756, 576
+%r758 = or i608 %r752, %r757
+%r759 = zext i608 %r758 to i640
+%r761 = getelementptr i32, i32* %r631, i32 19
+%r762 = load i32, i32* %r761
+%r763 = zext i32 %r762 to i640
+%r764 = shl i640 %r763, 608
+%r765 = or i640 %r759, %r764
+%r766 = zext i640 %r765 to i672
+%r768 = getelementptr i32, i32* %r631, i32 20
+%r769 = load i32, i32* %r768
+%r770 = zext i32 %r769 to i672
+%r771 = shl i672 %r770, 640
+%r772 = or i672 %r766, %r771
+%r773 = zext i672 %r772 to i704
+%r775 = getelementptr i32, i32* %r631, i32 21
+%r776 = load i32, i32* %r775
+%r777 = zext i32 %r776 to i704
+%r778 = shl i704 %r777, 672
+%r779 = or i704 %r773, %r778
+%r780 = zext i704 %r779 to i736
+%r782 = getelementptr i32, i32* %r631, i32 22
+%r783 = load i32, i32* %r782
+%r784 = zext i32 %r783 to i736
+%r785 = shl i736 %r784, 704
+%r786 = or i736 %r780, %r785
+%r787 = zext i736 %r786 to i768
+%r789 = getelementptr i32, i32* %r631, i32 23
+%r790 = load i32, i32* %r789
+%r791 = zext i32 %r790 to i768
+%r792 = shl i768 %r791, 736
+%r793 = or i768 %r787, %r792
+%r794 = add i768 %r629, %r793
+%r796 = getelementptr i32, i32* %r1, i32 8
+%r798 = getelementptr i32, i32* %r796, i32 0
+%r799 = trunc i768 %r794 to i32
+store i32 %r799, i32* %r798
+%r800 = lshr i768 %r794, 32
+%r802 = getelementptr i32, i32* %r796, i32 1
+%r803 = trunc i768 %r800 to i32
+store i32 %r803, i32* %r802
+%r804 = lshr i768 %r800, 32
+%r806 = getelementptr i32, i32* %r796, i32 2
+%r807 = trunc i768 %r804 to i32
+store i32 %r807, i32* %r806
+%r808 = lshr i768 %r804, 32
+%r810 = getelementptr i32, i32* %r796, i32 3
+%r811 = trunc i768 %r808 to i32
+store i32 %r811, i32* %r810
+%r812 = lshr i768 %r808, 32
+%r814 = getelementptr i32, i32* %r796, i32 4
+%r815 = trunc i768 %r812 to i32
+store i32 %r815, i32* %r814
+%r816 = lshr i768 %r812, 32
+%r818 = getelementptr i32, i32* %r796, i32 5
+%r819 = trunc i768 %r816 to i32
+store i32 %r819, i32* %r818
+%r820 = lshr i768 %r816, 32
+%r822 = getelementptr i32, i32* %r796, i32 6
+%r823 = trunc i768 %r820 to i32
+store i32 %r823, i32* %r822
+%r824 = lshr i768 %r820, 32
+%r826 = getelementptr i32, i32* %r796, i32 7
+%r827 = trunc i768 %r824 to i32
+store i32 %r827, i32* %r826
+%r828 = lshr i768 %r824, 32
+%r830 = getelementptr i32, i32* %r796, i32 8
+%r831 = trunc i768 %r828 to i32
+store i32 %r831, i32* %r830
+%r832 = lshr i768 %r828, 32
+%r834 = getelementptr i32, i32* %r796, i32 9
+%r835 = trunc i768 %r832 to i32
+store i32 %r835, i32* %r834
+%r836 = lshr i768 %r832, 32
+%r838 = getelementptr i32, i32* %r796, i32 10
+%r839 = trunc i768 %r836 to i32
+store i32 %r839, i32* %r838
+%r840 = lshr i768 %r836, 32
+%r842 = getelementptr i32, i32* %r796, i32 11
+%r843 = trunc i768 %r840 to i32
+store i32 %r843, i32* %r842
+%r844 = lshr i768 %r840, 32
+%r846 = getelementptr i32, i32* %r796, i32 12
+%r847 = trunc i768 %r844 to i32
+store i32 %r847, i32* %r846
+%r848 = lshr i768 %r844, 32
+%r850 = getelementptr i32, i32* %r796, i32 13
+%r851 = trunc i768 %r848 to i32
+store i32 %r851, i32* %r850
+%r852 = lshr i768 %r848, 32
+%r854 = getelementptr i32, i32* %r796, i32 14
+%r855 = trunc i768 %r852 to i32
+store i32 %r855, i32* %r854
+%r856 = lshr i768 %r852, 32
+%r858 = getelementptr i32, i32* %r796, i32 15
+%r859 = trunc i768 %r856 to i32
+store i32 %r859, i32* %r858
+%r860 = lshr i768 %r856, 32
+%r862 = getelementptr i32, i32* %r796, i32 16
+%r863 = trunc i768 %r860 to i32
+store i32 %r863, i32* %r862
+%r864 = lshr i768 %r860, 32
+%r866 = getelementptr i32, i32* %r796, i32 17
+%r867 = trunc i768 %r864 to i32
+store i32 %r867, i32* %r866
+%r868 = lshr i768 %r864, 32
+%r870 = getelementptr i32, i32* %r796, i32 18
+%r871 = trunc i768 %r868 to i32
+store i32 %r871, i32* %r870
+%r872 = lshr i768 %r868, 32
+%r874 = getelementptr i32, i32* %r796, i32 19
+%r875 = trunc i768 %r872 to i32
+store i32 %r875, i32* %r874
+%r876 = lshr i768 %r872, 32
+%r878 = getelementptr i32, i32* %r796, i32 20
+%r879 = trunc i768 %r876 to i32
+store i32 %r879, i32* %r878
+%r880 = lshr i768 %r876, 32
+%r882 = getelementptr i32, i32* %r796, i32 21
+%r883 = trunc i768 %r880 to i32
+store i32 %r883, i32* %r882
+%r884 = lshr i768 %r880, 32
+%r886 = getelementptr i32, i32* %r796, i32 22
+%r887 = trunc i768 %r884 to i32
+store i32 %r887, i32* %r886
+%r888 = lshr i768 %r884, 32
+%r890 = getelementptr i32, i32* %r796, i32 23
+%r891 = trunc i768 %r888 to i32
+store i32 %r891, i32* %r890
+ret void
+}
+define void @mcl_fpDbl_sqrPre16L(i32* noalias  %r1, i32* noalias  %r2)
+{
+%r4 = getelementptr i32, i32* %r2, i32 8
+%r6 = getelementptr i32, i32* %r2, i32 8
+%r8 = getelementptr i32, i32* %r1, i32 16
+call void @mcl_fpDbl_mulPre8L(i32* %r1, i32* %r2, i32* %r2)
+call void @mcl_fpDbl_mulPre8L(i32* %r8, i32* %r4, i32* %r6)
+%r9 = load i32, i32* %r4
+%r10 = zext i32 %r9 to i64
+%r12 = getelementptr i32, i32* %r4, i32 1
+%r13 = load i32, i32* %r12
+%r14 = zext i32 %r13 to i64
+%r15 = shl i64 %r14, 32
+%r16 = or i64 %r10, %r15
+%r17 = zext i64 %r16 to i96
+%r19 = getelementptr i32, i32* %r4, i32 2
+%r20 = load i32, i32* %r19
+%r21 = zext i32 %r20 to i96
+%r22 = shl i96 %r21, 64
+%r23 = or i96 %r17, %r22
+%r24 = zext i96 %r23 to i128
+%r26 = getelementptr i32, i32* %r4, i32 3
+%r27 = load i32, i32* %r26
+%r28 = zext i32 %r27 to i128
+%r29 = shl i128 %r28, 96
+%r30 = or i128 %r24, %r29
+%r31 = zext i128 %r30 to i160
+%r33 = getelementptr i32, i32* %r4, i32 4
+%r34 = load i32, i32* %r33
+%r35 = zext i32 %r34 to i160
+%r36 = shl i160 %r35, 128
+%r37 = or i160 %r31, %r36
+%r38 = zext i160 %r37 to i192
+%r40 = getelementptr i32, i32* %r4, i32 5
+%r41 = load i32, i32* %r40
+%r42 = zext i32 %r41 to i192
+%r43 = shl i192 %r42, 160
+%r44 = or i192 %r38, %r43
+%r45 = zext i192 %r44 to i224
+%r47 = getelementptr i32, i32* %r4, i32 6
+%r48 = load i32, i32* %r47
+%r49 = zext i32 %r48 to i224
+%r50 = shl i224 %r49, 192
+%r51 = or i224 %r45, %r50
+%r52 = zext i224 %r51 to i256
+%r54 = getelementptr i32, i32* %r4, i32 7
+%r55 = load i32, i32* %r54
+%r56 = zext i32 %r55 to i256
+%r57 = shl i256 %r56, 224
+%r58 = or i256 %r52, %r57
+%r59 = zext i256 %r58 to i288
+%r60 = load i32, i32* %r2
+%r61 = zext i32 %r60 to i64
+%r63 = getelementptr i32, i32* %r2, i32 1
+%r64 = load i32, i32* %r63
+%r65 = zext i32 %r64 to i64
+%r66 = shl i64 %r65, 32
+%r67 = or i64 %r61, %r66
+%r68 = zext i64 %r67 to i96
+%r70 = getelementptr i32, i32* %r2, i32 2
+%r71 = load i32, i32* %r70
+%r72 = zext i32 %r71 to i96
+%r73 = shl i96 %r72, 64
+%r74 = or i96 %r68, %r73
+%r75 = zext i96 %r74 to i128
+%r77 = getelementptr i32, i32* %r2, i32 3
+%r78 = load i32, i32* %r77
+%r79 = zext i32 %r78 to i128
+%r80 = shl i128 %r79, 96
+%r81 = or i128 %r75, %r80
+%r82 = zext i128 %r81 to i160
+%r84 = getelementptr i32, i32* %r2, i32 4
+%r85 = load i32, i32* %r84
+%r86 = zext i32 %r85 to i160
+%r87 = shl i160 %r86, 128
+%r88 = or i160 %r82, %r87
+%r89 = zext i160 %r88 to i192
+%r91 = getelementptr i32, i32* %r2, i32 5
+%r92 = load i32, i32* %r91
+%r93 = zext i32 %r92 to i192
+%r94 = shl i192 %r93, 160
+%r95 = or i192 %r89, %r94
+%r96 = zext i192 %r95 to i224
+%r98 = getelementptr i32, i32* %r2, i32 6
+%r99 = load i32, i32* %r98
+%r100 = zext i32 %r99 to i224
+%r101 = shl i224 %r100, 192
+%r102 = or i224 %r96, %r101
+%r103 = zext i224 %r102 to i256
+%r105 = getelementptr i32, i32* %r2, i32 7
+%r106 = load i32, i32* %r105
+%r107 = zext i32 %r106 to i256
+%r108 = shl i256 %r107, 224
+%r109 = or i256 %r103, %r108
+%r110 = zext i256 %r109 to i288
+%r111 = load i32, i32* %r6
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r6, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r6, i32 2
+%r122 = load i32, i32* %r121
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r6, i32 3
+%r129 = load i32, i32* %r128
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r6, i32 4
+%r136 = load i32, i32* %r135
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r6, i32 5
+%r143 = load i32, i32* %r142
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r6, i32 6
+%r150 = load i32, i32* %r149
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r6, i32 7
+%r157 = load i32, i32* %r156
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r162 = load i32, i32* %r2
+%r163 = zext i32 %r162 to i64
+%r165 = getelementptr i32, i32* %r2, i32 1
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i64
+%r168 = shl i64 %r167, 32
+%r169 = or i64 %r163, %r168
+%r170 = zext i64 %r169 to i96
+%r172 = getelementptr i32, i32* %r2, i32 2
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i96
+%r175 = shl i96 %r174, 64
+%r176 = or i96 %r170, %r175
+%r177 = zext i96 %r176 to i128
+%r179 = getelementptr i32, i32* %r2, i32 3
+%r180 = load i32, i32* %r179
+%r181 = zext i32 %r180 to i128
+%r182 = shl i128 %r181, 96
+%r183 = or i128 %r177, %r182
+%r184 = zext i128 %r183 to i160
+%r186 = getelementptr i32, i32* %r2, i32 4
+%r187 = load i32, i32* %r186
+%r188 = zext i32 %r187 to i160
+%r189 = shl i160 %r188, 128
+%r190 = or i160 %r184, %r189
+%r191 = zext i160 %r190 to i192
+%r193 = getelementptr i32, i32* %r2, i32 5
+%r194 = load i32, i32* %r193
+%r195 = zext i32 %r194 to i192
+%r196 = shl i192 %r195, 160
+%r197 = or i192 %r191, %r196
+%r198 = zext i192 %r197 to i224
+%r200 = getelementptr i32, i32* %r2, i32 6
+%r201 = load i32, i32* %r200
+%r202 = zext i32 %r201 to i224
+%r203 = shl i224 %r202, 192
+%r204 = or i224 %r198, %r203
+%r205 = zext i224 %r204 to i256
+%r207 = getelementptr i32, i32* %r2, i32 7
+%r208 = load i32, i32* %r207
+%r209 = zext i32 %r208 to i256
+%r210 = shl i256 %r209, 224
+%r211 = or i256 %r205, %r210
+%r212 = zext i256 %r211 to i288
+%r213 = add i288 %r59, %r110
+%r214 = add i288 %r161, %r212
+%r216 = alloca i32, i32 16
+%r217 = trunc i288 %r213 to i256
+%r218 = trunc i288 %r214 to i256
+%r219 = lshr i288 %r213, 256
+%r220 = trunc i288 %r219 to i1
+%r221 = lshr i288 %r214, 256
+%r222 = trunc i288 %r221 to i1
+%r223 = and i1 %r220, %r222
+%r225 = select i1 %r220, i256 %r218, i256 0
+%r227 = select i1 %r222, i256 %r217, i256 0
+%r229 = alloca i32, i32 8
+%r231 = alloca i32, i32 8
+%r233 = getelementptr i32, i32* %r229, i32 0
+%r234 = trunc i256 %r217 to i32
+store i32 %r234, i32* %r233
+%r235 = lshr i256 %r217, 32
+%r237 = getelementptr i32, i32* %r229, i32 1
+%r238 = trunc i256 %r235 to i32
+store i32 %r238, i32* %r237
+%r239 = lshr i256 %r235, 32
+%r241 = getelementptr i32, i32* %r229, i32 2
+%r242 = trunc i256 %r239 to i32
+store i32 %r242, i32* %r241
+%r243 = lshr i256 %r239, 32
+%r245 = getelementptr i32, i32* %r229, i32 3
+%r246 = trunc i256 %r243 to i32
+store i32 %r246, i32* %r245
+%r247 = lshr i256 %r243, 32
+%r249 = getelementptr i32, i32* %r229, i32 4
+%r250 = trunc i256 %r247 to i32
+store i32 %r250, i32* %r249
+%r251 = lshr i256 %r247, 32
+%r253 = getelementptr i32, i32* %r229, i32 5
+%r254 = trunc i256 %r251 to i32
+store i32 %r254, i32* %r253
+%r255 = lshr i256 %r251, 32
+%r257 = getelementptr i32, i32* %r229, i32 6
+%r258 = trunc i256 %r255 to i32
+store i32 %r258, i32* %r257
+%r259 = lshr i256 %r255, 32
+%r261 = getelementptr i32, i32* %r229, i32 7
+%r262 = trunc i256 %r259 to i32
+store i32 %r262, i32* %r261
+%r264 = getelementptr i32, i32* %r231, i32 0
+%r265 = trunc i256 %r218 to i32
+store i32 %r265, i32* %r264
+%r266 = lshr i256 %r218, 32
+%r268 = getelementptr i32, i32* %r231, i32 1
+%r269 = trunc i256 %r266 to i32
+store i32 %r269, i32* %r268
+%r270 = lshr i256 %r266, 32
+%r272 = getelementptr i32, i32* %r231, i32 2
+%r273 = trunc i256 %r270 to i32
+store i32 %r273, i32* %r272
+%r274 = lshr i256 %r270, 32
+%r276 = getelementptr i32, i32* %r231, i32 3
+%r277 = trunc i256 %r274 to i32
+store i32 %r277, i32* %r276
+%r278 = lshr i256 %r274, 32
+%r280 = getelementptr i32, i32* %r231, i32 4
+%r281 = trunc i256 %r278 to i32
+store i32 %r281, i32* %r280
+%r282 = lshr i256 %r278, 32
+%r284 = getelementptr i32, i32* %r231, i32 5
+%r285 = trunc i256 %r282 to i32
+store i32 %r285, i32* %r284
+%r286 = lshr i256 %r282, 32
+%r288 = getelementptr i32, i32* %r231, i32 6
+%r289 = trunc i256 %r286 to i32
+store i32 %r289, i32* %r288
+%r290 = lshr i256 %r286, 32
+%r292 = getelementptr i32, i32* %r231, i32 7
+%r293 = trunc i256 %r290 to i32
+store i32 %r293, i32* %r292
+call void @mcl_fpDbl_mulPre8L(i32* %r216, i32* %r229, i32* %r231)
+%r294 = load i32, i32* %r216
+%r295 = zext i32 %r294 to i64
+%r297 = getelementptr i32, i32* %r216, i32 1
+%r298 = load i32, i32* %r297
+%r299 = zext i32 %r298 to i64
+%r300 = shl i64 %r299, 32
+%r301 = or i64 %r295, %r300
+%r302 = zext i64 %r301 to i96
+%r304 = getelementptr i32, i32* %r216, i32 2
+%r305 = load i32, i32* %r304
+%r306 = zext i32 %r305 to i96
+%r307 = shl i96 %r306, 64
+%r308 = or i96 %r302, %r307
+%r309 = zext i96 %r308 to i128
+%r311 = getelementptr i32, i32* %r216, i32 3
+%r312 = load i32, i32* %r311
+%r313 = zext i32 %r312 to i128
+%r314 = shl i128 %r313, 96
+%r315 = or i128 %r309, %r314
+%r316 = zext i128 %r315 to i160
+%r318 = getelementptr i32, i32* %r216, i32 4
+%r319 = load i32, i32* %r318
+%r320 = zext i32 %r319 to i160
+%r321 = shl i160 %r320, 128
+%r322 = or i160 %r316, %r321
+%r323 = zext i160 %r322 to i192
+%r325 = getelementptr i32, i32* %r216, i32 5
+%r326 = load i32, i32* %r325
+%r327 = zext i32 %r326 to i192
+%r328 = shl i192 %r327, 160
+%r329 = or i192 %r323, %r328
+%r330 = zext i192 %r329 to i224
+%r332 = getelementptr i32, i32* %r216, i32 6
+%r333 = load i32, i32* %r332
+%r334 = zext i32 %r333 to i224
+%r335 = shl i224 %r334, 192
+%r336 = or i224 %r330, %r335
+%r337 = zext i224 %r336 to i256
+%r339 = getelementptr i32, i32* %r216, i32 7
+%r340 = load i32, i32* %r339
+%r341 = zext i32 %r340 to i256
+%r342 = shl i256 %r341, 224
+%r343 = or i256 %r337, %r342
+%r344 = zext i256 %r343 to i288
+%r346 = getelementptr i32, i32* %r216, i32 8
+%r347 = load i32, i32* %r346
+%r348 = zext i32 %r347 to i288
+%r349 = shl i288 %r348, 256
+%r350 = or i288 %r344, %r349
+%r351 = zext i288 %r350 to i320
+%r353 = getelementptr i32, i32* %r216, i32 9
+%r354 = load i32, i32* %r353
+%r355 = zext i32 %r354 to i320
+%r356 = shl i320 %r355, 288
+%r357 = or i320 %r351, %r356
+%r358 = zext i320 %r357 to i352
+%r360 = getelementptr i32, i32* %r216, i32 10
+%r361 = load i32, i32* %r360
+%r362 = zext i32 %r361 to i352
+%r363 = shl i352 %r362, 320
+%r364 = or i352 %r358, %r363
+%r365 = zext i352 %r364 to i384
+%r367 = getelementptr i32, i32* %r216, i32 11
+%r368 = load i32, i32* %r367
+%r369 = zext i32 %r368 to i384
+%r370 = shl i384 %r369, 352
+%r371 = or i384 %r365, %r370
+%r372 = zext i384 %r371 to i416
+%r374 = getelementptr i32, i32* %r216, i32 12
+%r375 = load i32, i32* %r374
+%r376 = zext i32 %r375 to i416
+%r377 = shl i416 %r376, 384
+%r378 = or i416 %r372, %r377
+%r379 = zext i416 %r378 to i448
+%r381 = getelementptr i32, i32* %r216, i32 13
+%r382 = load i32, i32* %r381
+%r383 = zext i32 %r382 to i448
+%r384 = shl i448 %r383, 416
+%r385 = or i448 %r379, %r384
+%r386 = zext i448 %r385 to i480
+%r388 = getelementptr i32, i32* %r216, i32 14
+%r389 = load i32, i32* %r388
+%r390 = zext i32 %r389 to i480
+%r391 = shl i480 %r390, 448
+%r392 = or i480 %r386, %r391
+%r393 = zext i480 %r392 to i512
+%r395 = getelementptr i32, i32* %r216, i32 15
+%r396 = load i32, i32* %r395
+%r397 = zext i32 %r396 to i512
+%r398 = shl i512 %r397, 480
+%r399 = or i512 %r393, %r398
+%r400 = zext i512 %r399 to i544
+%r401 = zext i1 %r223 to i544
+%r402 = shl i544 %r401, 512
+%r403 = or i544 %r400, %r402
+%r404 = zext i256 %r225 to i544
+%r405 = zext i256 %r227 to i544
+%r406 = shl i544 %r404, 256
+%r407 = shl i544 %r405, 256
+%r408 = add i544 %r403, %r406
+%r409 = add i544 %r408, %r407
+%r410 = load i32, i32* %r1
+%r411 = zext i32 %r410 to i64
+%r413 = getelementptr i32, i32* %r1, i32 1
+%r414 = load i32, i32* %r413
+%r415 = zext i32 %r414 to i64
+%r416 = shl i64 %r415, 32
+%r417 = or i64 %r411, %r416
+%r418 = zext i64 %r417 to i96
+%r420 = getelementptr i32, i32* %r1, i32 2
+%r421 = load i32, i32* %r420
+%r422 = zext i32 %r421 to i96
+%r423 = shl i96 %r422, 64
+%r424 = or i96 %r418, %r423
+%r425 = zext i96 %r424 to i128
+%r427 = getelementptr i32, i32* %r1, i32 3
+%r428 = load i32, i32* %r427
+%r429 = zext i32 %r428 to i128
+%r430 = shl i128 %r429, 96
+%r431 = or i128 %r425, %r430
+%r432 = zext i128 %r431 to i160
+%r434 = getelementptr i32, i32* %r1, i32 4
+%r435 = load i32, i32* %r434
+%r436 = zext i32 %r435 to i160
+%r437 = shl i160 %r436, 128
+%r438 = or i160 %r432, %r437
+%r439 = zext i160 %r438 to i192
+%r441 = getelementptr i32, i32* %r1, i32 5
+%r442 = load i32, i32* %r441
+%r443 = zext i32 %r442 to i192
+%r444 = shl i192 %r443, 160
+%r445 = or i192 %r439, %r444
+%r446 = zext i192 %r445 to i224
+%r448 = getelementptr i32, i32* %r1, i32 6
+%r449 = load i32, i32* %r448
+%r450 = zext i32 %r449 to i224
+%r451 = shl i224 %r450, 192
+%r452 = or i224 %r446, %r451
+%r453 = zext i224 %r452 to i256
+%r455 = getelementptr i32, i32* %r1, i32 7
+%r456 = load i32, i32* %r455
+%r457 = zext i32 %r456 to i256
+%r458 = shl i256 %r457, 224
+%r459 = or i256 %r453, %r458
+%r460 = zext i256 %r459 to i288
+%r462 = getelementptr i32, i32* %r1, i32 8
+%r463 = load i32, i32* %r462
+%r464 = zext i32 %r463 to i288
+%r465 = shl i288 %r464, 256
+%r466 = or i288 %r460, %r465
+%r467 = zext i288 %r466 to i320
+%r469 = getelementptr i32, i32* %r1, i32 9
+%r470 = load i32, i32* %r469
+%r471 = zext i32 %r470 to i320
+%r472 = shl i320 %r471, 288
+%r473 = or i320 %r467, %r472
+%r474 = zext i320 %r473 to i352
+%r476 = getelementptr i32, i32* %r1, i32 10
+%r477 = load i32, i32* %r476
+%r478 = zext i32 %r477 to i352
+%r479 = shl i352 %r478, 320
+%r480 = or i352 %r474, %r479
+%r481 = zext i352 %r480 to i384
+%r483 = getelementptr i32, i32* %r1, i32 11
+%r484 = load i32, i32* %r483
+%r485 = zext i32 %r484 to i384
+%r486 = shl i384 %r485, 352
+%r487 = or i384 %r481, %r486
+%r488 = zext i384 %r487 to i416
+%r490 = getelementptr i32, i32* %r1, i32 12
+%r491 = load i32, i32* %r490
+%r492 = zext i32 %r491 to i416
+%r493 = shl i416 %r492, 384
+%r494 = or i416 %r488, %r493
+%r495 = zext i416 %r494 to i448
+%r497 = getelementptr i32, i32* %r1, i32 13
+%r498 = load i32, i32* %r497
+%r499 = zext i32 %r498 to i448
+%r500 = shl i448 %r499, 416
+%r501 = or i448 %r495, %r500
+%r502 = zext i448 %r501 to i480
+%r504 = getelementptr i32, i32* %r1, i32 14
+%r505 = load i32, i32* %r504
+%r506 = zext i32 %r505 to i480
+%r507 = shl i480 %r506, 448
+%r508 = or i480 %r502, %r507
+%r509 = zext i480 %r508 to i512
+%r511 = getelementptr i32, i32* %r1, i32 15
+%r512 = load i32, i32* %r511
+%r513 = zext i32 %r512 to i512
+%r514 = shl i512 %r513, 480
+%r515 = or i512 %r509, %r514
+%r516 = zext i512 %r515 to i544
+%r517 = sub i544 %r409, %r516
+%r519 = getelementptr i32, i32* %r1, i32 16
+%r520 = load i32, i32* %r519
+%r521 = zext i32 %r520 to i64
+%r523 = getelementptr i32, i32* %r519, i32 1
+%r524 = load i32, i32* %r523
+%r525 = zext i32 %r524 to i64
+%r526 = shl i64 %r525, 32
+%r527 = or i64 %r521, %r526
+%r528 = zext i64 %r527 to i96
+%r530 = getelementptr i32, i32* %r519, i32 2
+%r531 = load i32, i32* %r530
+%r532 = zext i32 %r531 to i96
+%r533 = shl i96 %r532, 64
+%r534 = or i96 %r528, %r533
+%r535 = zext i96 %r534 to i128
+%r537 = getelementptr i32, i32* %r519, i32 3
+%r538 = load i32, i32* %r537
+%r539 = zext i32 %r538 to i128
+%r540 = shl i128 %r539, 96
+%r541 = or i128 %r535, %r540
+%r542 = zext i128 %r541 to i160
+%r544 = getelementptr i32, i32* %r519, i32 4
+%r545 = load i32, i32* %r544
+%r546 = zext i32 %r545 to i160
+%r547 = shl i160 %r546, 128
+%r548 = or i160 %r542, %r547
+%r549 = zext i160 %r548 to i192
+%r551 = getelementptr i32, i32* %r519, i32 5
+%r552 = load i32, i32* %r551
+%r553 = zext i32 %r552 to i192
+%r554 = shl i192 %r553, 160
+%r555 = or i192 %r549, %r554
+%r556 = zext i192 %r555 to i224
+%r558 = getelementptr i32, i32* %r519, i32 6
+%r559 = load i32, i32* %r558
+%r560 = zext i32 %r559 to i224
+%r561 = shl i224 %r560, 192
+%r562 = or i224 %r556, %r561
+%r563 = zext i224 %r562 to i256
+%r565 = getelementptr i32, i32* %r519, i32 7
+%r566 = load i32, i32* %r565
+%r567 = zext i32 %r566 to i256
+%r568 = shl i256 %r567, 224
+%r569 = or i256 %r563, %r568
+%r570 = zext i256 %r569 to i288
+%r572 = getelementptr i32, i32* %r519, i32 8
+%r573 = load i32, i32* %r572
+%r574 = zext i32 %r573 to i288
+%r575 = shl i288 %r574, 256
+%r576 = or i288 %r570, %r575
+%r577 = zext i288 %r576 to i320
+%r579 = getelementptr i32, i32* %r519, i32 9
+%r580 = load i32, i32* %r579
+%r581 = zext i32 %r580 to i320
+%r582 = shl i320 %r581, 288
+%r583 = or i320 %r577, %r582
+%r584 = zext i320 %r583 to i352
+%r586 = getelementptr i32, i32* %r519, i32 10
+%r587 = load i32, i32* %r586
+%r588 = zext i32 %r587 to i352
+%r589 = shl i352 %r588, 320
+%r590 = or i352 %r584, %r589
+%r591 = zext i352 %r590 to i384
+%r593 = getelementptr i32, i32* %r519, i32 11
+%r594 = load i32, i32* %r593
+%r595 = zext i32 %r594 to i384
+%r596 = shl i384 %r595, 352
+%r597 = or i384 %r591, %r596
+%r598 = zext i384 %r597 to i416
+%r600 = getelementptr i32, i32* %r519, i32 12
+%r601 = load i32, i32* %r600
+%r602 = zext i32 %r601 to i416
+%r603 = shl i416 %r602, 384
+%r604 = or i416 %r598, %r603
+%r605 = zext i416 %r604 to i448
+%r607 = getelementptr i32, i32* %r519, i32 13
+%r608 = load i32, i32* %r607
+%r609 = zext i32 %r608 to i448
+%r610 = shl i448 %r609, 416
+%r611 = or i448 %r605, %r610
+%r612 = zext i448 %r611 to i480
+%r614 = getelementptr i32, i32* %r519, i32 14
+%r615 = load i32, i32* %r614
+%r616 = zext i32 %r615 to i480
+%r617 = shl i480 %r616, 448
+%r618 = or i480 %r612, %r617
+%r619 = zext i480 %r618 to i512
+%r621 = getelementptr i32, i32* %r519, i32 15
+%r622 = load i32, i32* %r621
+%r623 = zext i32 %r622 to i512
+%r624 = shl i512 %r623, 480
+%r625 = or i512 %r619, %r624
+%r626 = zext i512 %r625 to i544
+%r627 = sub i544 %r517, %r626
+%r628 = zext i544 %r627 to i768
+%r630 = getelementptr i32, i32* %r1, i32 8
+%r631 = load i32, i32* %r630
+%r632 = zext i32 %r631 to i64
+%r634 = getelementptr i32, i32* %r630, i32 1
+%r635 = load i32, i32* %r634
+%r636 = zext i32 %r635 to i64
+%r637 = shl i64 %r636, 32
+%r638 = or i64 %r632, %r637
+%r639 = zext i64 %r638 to i96
+%r641 = getelementptr i32, i32* %r630, i32 2
+%r642 = load i32, i32* %r641
+%r643 = zext i32 %r642 to i96
+%r644 = shl i96 %r643, 64
+%r645 = or i96 %r639, %r644
+%r646 = zext i96 %r645 to i128
+%r648 = getelementptr i32, i32* %r630, i32 3
+%r649 = load i32, i32* %r648
+%r650 = zext i32 %r649 to i128
+%r651 = shl i128 %r650, 96
+%r652 = or i128 %r646, %r651
+%r653 = zext i128 %r652 to i160
+%r655 = getelementptr i32, i32* %r630, i32 4
+%r656 = load i32, i32* %r655
+%r657 = zext i32 %r656 to i160
+%r658 = shl i160 %r657, 128
+%r659 = or i160 %r653, %r658
+%r660 = zext i160 %r659 to i192
+%r662 = getelementptr i32, i32* %r630, i32 5
+%r663 = load i32, i32* %r662
+%r664 = zext i32 %r663 to i192
+%r665 = shl i192 %r664, 160
+%r666 = or i192 %r660, %r665
+%r667 = zext i192 %r666 to i224
+%r669 = getelementptr i32, i32* %r630, i32 6
+%r670 = load i32, i32* %r669
+%r671 = zext i32 %r670 to i224
+%r672 = shl i224 %r671, 192
+%r673 = or i224 %r667, %r672
+%r674 = zext i224 %r673 to i256
+%r676 = getelementptr i32, i32* %r630, i32 7
+%r677 = load i32, i32* %r676
+%r678 = zext i32 %r677 to i256
+%r679 = shl i256 %r678, 224
+%r680 = or i256 %r674, %r679
+%r681 = zext i256 %r680 to i288
+%r683 = getelementptr i32, i32* %r630, i32 8
+%r684 = load i32, i32* %r683
+%r685 = zext i32 %r684 to i288
+%r686 = shl i288 %r685, 256
+%r687 = or i288 %r681, %r686
+%r688 = zext i288 %r687 to i320
+%r690 = getelementptr i32, i32* %r630, i32 9
+%r691 = load i32, i32* %r690
+%r692 = zext i32 %r691 to i320
+%r693 = shl i320 %r692, 288
+%r694 = or i320 %r688, %r693
+%r695 = zext i320 %r694 to i352
+%r697 = getelementptr i32, i32* %r630, i32 10
+%r698 = load i32, i32* %r697
+%r699 = zext i32 %r698 to i352
+%r700 = shl i352 %r699, 320
+%r701 = or i352 %r695, %r700
+%r702 = zext i352 %r701 to i384
+%r704 = getelementptr i32, i32* %r630, i32 11
+%r705 = load i32, i32* %r704
+%r706 = zext i32 %r705 to i384
+%r707 = shl i384 %r706, 352
+%r708 = or i384 %r702, %r707
+%r709 = zext i384 %r708 to i416
+%r711 = getelementptr i32, i32* %r630, i32 12
+%r712 = load i32, i32* %r711
+%r713 = zext i32 %r712 to i416
+%r714 = shl i416 %r713, 384
+%r715 = or i416 %r709, %r714
+%r716 = zext i416 %r715 to i448
+%r718 = getelementptr i32, i32* %r630, i32 13
+%r719 = load i32, i32* %r718
+%r720 = zext i32 %r719 to i448
+%r721 = shl i448 %r720, 416
+%r722 = or i448 %r716, %r721
+%r723 = zext i448 %r722 to i480
+%r725 = getelementptr i32, i32* %r630, i32 14
+%r726 = load i32, i32* %r725
+%r727 = zext i32 %r726 to i480
+%r728 = shl i480 %r727, 448
+%r729 = or i480 %r723, %r728
+%r730 = zext i480 %r729 to i512
+%r732 = getelementptr i32, i32* %r630, i32 15
+%r733 = load i32, i32* %r732
+%r734 = zext i32 %r733 to i512
+%r735 = shl i512 %r734, 480
+%r736 = or i512 %r730, %r735
+%r737 = zext i512 %r736 to i544
+%r739 = getelementptr i32, i32* %r630, i32 16
+%r740 = load i32, i32* %r739
+%r741 = zext i32 %r740 to i544
+%r742 = shl i544 %r741, 512
+%r743 = or i544 %r737, %r742
+%r744 = zext i544 %r743 to i576
+%r746 = getelementptr i32, i32* %r630, i32 17
+%r747 = load i32, i32* %r746
+%r748 = zext i32 %r747 to i576
+%r749 = shl i576 %r748, 544
+%r750 = or i576 %r744, %r749
+%r751 = zext i576 %r750 to i608
+%r753 = getelementptr i32, i32* %r630, i32 18
+%r754 = load i32, i32* %r753
+%r755 = zext i32 %r754 to i608
+%r756 = shl i608 %r755, 576
+%r757 = or i608 %r751, %r756
+%r758 = zext i608 %r757 to i640
+%r760 = getelementptr i32, i32* %r630, i32 19
+%r761 = load i32, i32* %r760
+%r762 = zext i32 %r761 to i640
+%r763 = shl i640 %r762, 608
+%r764 = or i640 %r758, %r763
+%r765 = zext i640 %r764 to i672
+%r767 = getelementptr i32, i32* %r630, i32 20
+%r768 = load i32, i32* %r767
+%r769 = zext i32 %r768 to i672
+%r770 = shl i672 %r769, 640
+%r771 = or i672 %r765, %r770
+%r772 = zext i672 %r771 to i704
+%r774 = getelementptr i32, i32* %r630, i32 21
+%r775 = load i32, i32* %r774
+%r776 = zext i32 %r775 to i704
+%r777 = shl i704 %r776, 672
+%r778 = or i704 %r772, %r777
+%r779 = zext i704 %r778 to i736
+%r781 = getelementptr i32, i32* %r630, i32 22
+%r782 = load i32, i32* %r781
+%r783 = zext i32 %r782 to i736
+%r784 = shl i736 %r783, 704
+%r785 = or i736 %r779, %r784
+%r786 = zext i736 %r785 to i768
+%r788 = getelementptr i32, i32* %r630, i32 23
+%r789 = load i32, i32* %r788
+%r790 = zext i32 %r789 to i768
+%r791 = shl i768 %r790, 736
+%r792 = or i768 %r786, %r791
+%r793 = add i768 %r628, %r792
+%r795 = getelementptr i32, i32* %r1, i32 8
+%r797 = getelementptr i32, i32* %r795, i32 0
+%r798 = trunc i768 %r793 to i32
+store i32 %r798, i32* %r797
+%r799 = lshr i768 %r793, 32
+%r801 = getelementptr i32, i32* %r795, i32 1
+%r802 = trunc i768 %r799 to i32
+store i32 %r802, i32* %r801
+%r803 = lshr i768 %r799, 32
+%r805 = getelementptr i32, i32* %r795, i32 2
+%r806 = trunc i768 %r803 to i32
+store i32 %r806, i32* %r805
+%r807 = lshr i768 %r803, 32
+%r809 = getelementptr i32, i32* %r795, i32 3
+%r810 = trunc i768 %r807 to i32
+store i32 %r810, i32* %r809
+%r811 = lshr i768 %r807, 32
+%r813 = getelementptr i32, i32* %r795, i32 4
+%r814 = trunc i768 %r811 to i32
+store i32 %r814, i32* %r813
+%r815 = lshr i768 %r811, 32
+%r817 = getelementptr i32, i32* %r795, i32 5
+%r818 = trunc i768 %r815 to i32
+store i32 %r818, i32* %r817
+%r819 = lshr i768 %r815, 32
+%r821 = getelementptr i32, i32* %r795, i32 6
+%r822 = trunc i768 %r819 to i32
+store i32 %r822, i32* %r821
+%r823 = lshr i768 %r819, 32
+%r825 = getelementptr i32, i32* %r795, i32 7
+%r826 = trunc i768 %r823 to i32
+store i32 %r826, i32* %r825
+%r827 = lshr i768 %r823, 32
+%r829 = getelementptr i32, i32* %r795, i32 8
+%r830 = trunc i768 %r827 to i32
+store i32 %r830, i32* %r829
+%r831 = lshr i768 %r827, 32
+%r833 = getelementptr i32, i32* %r795, i32 9
+%r834 = trunc i768 %r831 to i32
+store i32 %r834, i32* %r833
+%r835 = lshr i768 %r831, 32
+%r837 = getelementptr i32, i32* %r795, i32 10
+%r838 = trunc i768 %r835 to i32
+store i32 %r838, i32* %r837
+%r839 = lshr i768 %r835, 32
+%r841 = getelementptr i32, i32* %r795, i32 11
+%r842 = trunc i768 %r839 to i32
+store i32 %r842, i32* %r841
+%r843 = lshr i768 %r839, 32
+%r845 = getelementptr i32, i32* %r795, i32 12
+%r846 = trunc i768 %r843 to i32
+store i32 %r846, i32* %r845
+%r847 = lshr i768 %r843, 32
+%r849 = getelementptr i32, i32* %r795, i32 13
+%r850 = trunc i768 %r847 to i32
+store i32 %r850, i32* %r849
+%r851 = lshr i768 %r847, 32
+%r853 = getelementptr i32, i32* %r795, i32 14
+%r854 = trunc i768 %r851 to i32
+store i32 %r854, i32* %r853
+%r855 = lshr i768 %r851, 32
+%r857 = getelementptr i32, i32* %r795, i32 15
+%r858 = trunc i768 %r855 to i32
+store i32 %r858, i32* %r857
+%r859 = lshr i768 %r855, 32
+%r861 = getelementptr i32, i32* %r795, i32 16
+%r862 = trunc i768 %r859 to i32
+store i32 %r862, i32* %r861
+%r863 = lshr i768 %r859, 32
+%r865 = getelementptr i32, i32* %r795, i32 17
+%r866 = trunc i768 %r863 to i32
+store i32 %r866, i32* %r865
+%r867 = lshr i768 %r863, 32
+%r869 = getelementptr i32, i32* %r795, i32 18
+%r870 = trunc i768 %r867 to i32
+store i32 %r870, i32* %r869
+%r871 = lshr i768 %r867, 32
+%r873 = getelementptr i32, i32* %r795, i32 19
+%r874 = trunc i768 %r871 to i32
+store i32 %r874, i32* %r873
+%r875 = lshr i768 %r871, 32
+%r877 = getelementptr i32, i32* %r795, i32 20
+%r878 = trunc i768 %r875 to i32
+store i32 %r878, i32* %r877
+%r879 = lshr i768 %r875, 32
+%r881 = getelementptr i32, i32* %r795, i32 21
+%r882 = trunc i768 %r879 to i32
+store i32 %r882, i32* %r881
+%r883 = lshr i768 %r879, 32
+%r885 = getelementptr i32, i32* %r795, i32 22
+%r886 = trunc i768 %r883 to i32
+store i32 %r886, i32* %r885
+%r887 = lshr i768 %r883, 32
+%r889 = getelementptr i32, i32* %r795, i32 23
+%r890 = trunc i768 %r887 to i32
+store i32 %r890, i32* %r889
 ret void
 }
-define i576 @mulPv544x32(i32* noalias  %r2, i32 %r3)
-{
-%r5 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 0)
-%r6 = trunc i64 %r5 to i32
-%r7 = call i32 @extractHigh32(i64 %r5)
-%r9 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 1)
-%r10 = trunc i64 %r9 to i32
-%r11 = call i32 @extractHigh32(i64 %r9)
-%r13 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 2)
-%r14 = trunc i64 %r13 to i32
-%r15 = call i32 @extractHigh32(i64 %r13)
-%r17 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 3)
-%r18 = trunc i64 %r17 to i32
-%r19 = call i32 @extractHigh32(i64 %r17)
-%r21 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 4)
-%r22 = trunc i64 %r21 to i32
-%r23 = call i32 @extractHigh32(i64 %r21)
-%r25 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 5)
-%r26 = trunc i64 %r25 to i32
-%r27 = call i32 @extractHigh32(i64 %r25)
-%r29 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 6)
-%r30 = trunc i64 %r29 to i32
-%r31 = call i32 @extractHigh32(i64 %r29)
-%r33 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 7)
-%r34 = trunc i64 %r33 to i32
-%r35 = call i32 @extractHigh32(i64 %r33)
-%r37 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 8)
-%r38 = trunc i64 %r37 to i32
-%r39 = call i32 @extractHigh32(i64 %r37)
-%r41 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 9)
-%r42 = trunc i64 %r41 to i32
-%r43 = call i32 @extractHigh32(i64 %r41)
-%r45 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 10)
-%r46 = trunc i64 %r45 to i32
-%r47 = call i32 @extractHigh32(i64 %r45)
-%r49 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 11)
-%r50 = trunc i64 %r49 to i32
-%r51 = call i32 @extractHigh32(i64 %r49)
-%r53 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 12)
-%r54 = trunc i64 %r53 to i32
-%r55 = call i32 @extractHigh32(i64 %r53)
-%r57 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 13)
-%r58 = trunc i64 %r57 to i32
-%r59 = call i32 @extractHigh32(i64 %r57)
-%r61 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 14)
-%r62 = trunc i64 %r61 to i32
-%r63 = call i32 @extractHigh32(i64 %r61)
-%r65 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 15)
-%r66 = trunc i64 %r65 to i32
-%r67 = call i32 @extractHigh32(i64 %r65)
-%r69 = call i64 @mulPos32x32(i32* %r2, i32 %r3, i32 16)
-%r70 = trunc i64 %r69 to i32
-%r71 = call i32 @extractHigh32(i64 %r69)
-%r72 = zext i32 %r6 to i64
-%r73 = zext i32 %r10 to i64
-%r74 = shl i64 %r73, 32
-%r75 = or i64 %r72, %r74
-%r76 = zext i64 %r75 to i96
-%r77 = zext i32 %r14 to i96
-%r78 = shl i96 %r77, 64
-%r79 = or i96 %r76, %r78
-%r80 = zext i96 %r79 to i128
-%r81 = zext i32 %r18 to i128
-%r82 = shl i128 %r81, 96
-%r83 = or i128 %r80, %r82
-%r84 = zext i128 %r83 to i160
-%r85 = zext i32 %r22 to i160
-%r86 = shl i160 %r85, 128
-%r87 = or i160 %r84, %r86
-%r88 = zext i160 %r87 to i192
-%r89 = zext i32 %r26 to i192
-%r90 = shl i192 %r89, 160
-%r91 = or i192 %r88, %r90
-%r92 = zext i192 %r91 to i224
-%r93 = zext i32 %r30 to i224
-%r94 = shl i224 %r93, 192
-%r95 = or i224 %r92, %r94
-%r96 = zext i224 %r95 to i256
-%r97 = zext i32 %r34 to i256
-%r98 = shl i256 %r97, 224
-%r99 = or i256 %r96, %r98
-%r100 = zext i256 %r99 to i288
-%r101 = zext i32 %r38 to i288
-%r102 = shl i288 %r101, 256
-%r103 = or i288 %r100, %r102
-%r104 = zext i288 %r103 to i320
-%r105 = zext i32 %r42 to i320
-%r106 = shl i320 %r105, 288
-%r107 = or i320 %r104, %r106
-%r108 = zext i320 %r107 to i352
-%r109 = zext i32 %r46 to i352
-%r110 = shl i352 %r109, 320
-%r111 = or i352 %r108, %r110
-%r112 = zext i352 %r111 to i384
-%r113 = zext i32 %r50 to i384
-%r114 = shl i384 %r113, 352
-%r115 = or i384 %r112, %r114
-%r116 = zext i384 %r115 to i416
-%r117 = zext i32 %r54 to i416
-%r118 = shl i416 %r117, 384
-%r119 = or i416 %r116, %r118
-%r120 = zext i416 %r119 to i448
-%r121 = zext i32 %r58 to i448
-%r122 = shl i448 %r121, 416
-%r123 = or i448 %r120, %r122
-%r124 = zext i448 %r123 to i480
-%r125 = zext i32 %r62 to i480
-%r126 = shl i480 %r125, 448
-%r127 = or i480 %r124, %r126
-%r128 = zext i480 %r127 to i512
-%r129 = zext i32 %r66 to i512
-%r130 = shl i512 %r129, 480
-%r131 = or i512 %r128, %r130
-%r132 = zext i512 %r131 to i544
-%r133 = zext i32 %r70 to i544
-%r134 = shl i544 %r133, 512
-%r135 = or i544 %r132, %r134
-%r136 = zext i32 %r7 to i64
-%r137 = zext i32 %r11 to i64
-%r138 = shl i64 %r137, 32
-%r139 = or i64 %r136, %r138
-%r140 = zext i64 %r139 to i96
-%r141 = zext i32 %r15 to i96
-%r142 = shl i96 %r141, 64
-%r143 = or i96 %r140, %r142
-%r144 = zext i96 %r143 to i128
-%r145 = zext i32 %r19 to i128
-%r146 = shl i128 %r145, 96
-%r147 = or i128 %r144, %r146
-%r148 = zext i128 %r147 to i160
-%r149 = zext i32 %r23 to i160
-%r150 = shl i160 %r149, 128
-%r151 = or i160 %r148, %r150
-%r152 = zext i160 %r151 to i192
-%r153 = zext i32 %r27 to i192
-%r154 = shl i192 %r153, 160
-%r155 = or i192 %r152, %r154
-%r156 = zext i192 %r155 to i224
-%r157 = zext i32 %r31 to i224
-%r158 = shl i224 %r157, 192
-%r159 = or i224 %r156, %r158
-%r160 = zext i224 %r159 to i256
-%r161 = zext i32 %r35 to i256
-%r162 = shl i256 %r161, 224
-%r163 = or i256 %r160, %r162
-%r164 = zext i256 %r163 to i288
-%r165 = zext i32 %r39 to i288
-%r166 = shl i288 %r165, 256
-%r167 = or i288 %r164, %r166
-%r168 = zext i288 %r167 to i320
-%r169 = zext i32 %r43 to i320
-%r170 = shl i320 %r169, 288
-%r171 = or i320 %r168, %r170
-%r172 = zext i320 %r171 to i352
-%r173 = zext i32 %r47 to i352
-%r174 = shl i352 %r173, 320
-%r175 = or i352 %r172, %r174
-%r176 = zext i352 %r175 to i384
-%r177 = zext i32 %r51 to i384
-%r178 = shl i384 %r177, 352
-%r179 = or i384 %r176, %r178
-%r180 = zext i384 %r179 to i416
-%r181 = zext i32 %r55 to i416
-%r182 = shl i416 %r181, 384
-%r183 = or i416 %r180, %r182
-%r184 = zext i416 %r183 to i448
-%r185 = zext i32 %r59 to i448
-%r186 = shl i448 %r185, 416
-%r187 = or i448 %r184, %r186
-%r188 = zext i448 %r187 to i480
-%r189 = zext i32 %r63 to i480
-%r190 = shl i480 %r189, 448
-%r191 = or i480 %r188, %r190
-%r192 = zext i480 %r191 to i512
-%r193 = zext i32 %r67 to i512
-%r194 = shl i512 %r193, 480
-%r195 = or i512 %r192, %r194
-%r196 = zext i512 %r195 to i544
-%r197 = zext i32 %r71 to i544
-%r198 = shl i544 %r197, 512
-%r199 = or i544 %r196, %r198
-%r200 = zext i544 %r135 to i576
-%r201 = zext i544 %r199 to i576
-%r202 = shl i576 %r201, 32
-%r203 = add i576 %r200, %r202
-ret i576 %r203
-}
-define void @mcl_fp_mulUnitPre17L(i32* noalias  %r1, i32* noalias  %r2, i32 %r3)
+define void @mcl_fp_mont16L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
 {
-%r4 = call i576 @mulPv544x32(i32* %r2, i32 %r3)
-%r5 = trunc i576 %r4 to i32
-%r7 = getelementptr i32, i32* %r1, i32 0
-store i32 %r5, i32* %r7
-%r8 = lshr i576 %r4, 32
-%r9 = trunc i576 %r8 to i32
-%r11 = getelementptr i32, i32* %r1, i32 1
-store i32 %r9, i32* %r11
-%r12 = lshr i576 %r8, 32
-%r13 = trunc i576 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 2
-store i32 %r13, i32* %r15
-%r16 = lshr i576 %r12, 32
-%r17 = trunc i576 %r16 to i32
-%r19 = getelementptr i32, i32* %r1, i32 3
-store i32 %r17, i32* %r19
-%r20 = lshr i576 %r16, 32
-%r21 = trunc i576 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 4
-store i32 %r21, i32* %r23
-%r24 = lshr i576 %r20, 32
+%r6 = getelementptr i32, i32* %r4, i32 -1
+%r7 = load i32, i32* %r6
+%r9 = getelementptr i32, i32* %r3, i32 0
+%r10 = load i32, i32* %r9
+%r11 = call i544 @mulPv512x32(i32* %r2, i32 %r10)
+%r12 = zext i544 %r11 to i576
+%r13 = trunc i544 %r11 to i32
+%r14 = mul i32 %r13, %r7
+%r15 = call i544 @mulPv512x32(i32* %r4, i32 %r14)
+%r16 = zext i544 %r15 to i576
+%r17 = add i576 %r12, %r16
+%r18 = lshr i576 %r17, 32
+%r20 = getelementptr i32, i32* %r3, i32 1
+%r21 = load i32, i32* %r20
+%r22 = call i544 @mulPv512x32(i32* %r2, i32 %r21)
+%r23 = zext i544 %r22 to i576
+%r24 = add i576 %r18, %r23
 %r25 = trunc i576 %r24 to i32
-%r27 = getelementptr i32, i32* %r1, i32 5
-store i32 %r25, i32* %r27
-%r28 = lshr i576 %r24, 32
-%r29 = trunc i576 %r28 to i32
-%r31 = getelementptr i32, i32* %r1, i32 6
-store i32 %r29, i32* %r31
-%r32 = lshr i576 %r28, 32
-%r33 = trunc i576 %r32 to i32
-%r35 = getelementptr i32, i32* %r1, i32 7
-store i32 %r33, i32* %r35
-%r36 = lshr i576 %r32, 32
+%r26 = mul i32 %r25, %r7
+%r27 = call i544 @mulPv512x32(i32* %r4, i32 %r26)
+%r28 = zext i544 %r27 to i576
+%r29 = add i576 %r24, %r28
+%r30 = lshr i576 %r29, 32
+%r32 = getelementptr i32, i32* %r3, i32 2
+%r33 = load i32, i32* %r32
+%r34 = call i544 @mulPv512x32(i32* %r2, i32 %r33)
+%r35 = zext i544 %r34 to i576
+%r36 = add i576 %r30, %r35
 %r37 = trunc i576 %r36 to i32
-%r39 = getelementptr i32, i32* %r1, i32 8
-store i32 %r37, i32* %r39
-%r40 = lshr i576 %r36, 32
-%r41 = trunc i576 %r40 to i32
-%r43 = getelementptr i32, i32* %r1, i32 9
-store i32 %r41, i32* %r43
-%r44 = lshr i576 %r40, 32
-%r45 = trunc i576 %r44 to i32
-%r47 = getelementptr i32, i32* %r1, i32 10
-store i32 %r45, i32* %r47
-%r48 = lshr i576 %r44, 32
+%r38 = mul i32 %r37, %r7
+%r39 = call i544 @mulPv512x32(i32* %r4, i32 %r38)
+%r40 = zext i544 %r39 to i576
+%r41 = add i576 %r36, %r40
+%r42 = lshr i576 %r41, 32
+%r44 = getelementptr i32, i32* %r3, i32 3
+%r45 = load i32, i32* %r44
+%r46 = call i544 @mulPv512x32(i32* %r2, i32 %r45)
+%r47 = zext i544 %r46 to i576
+%r48 = add i576 %r42, %r47
 %r49 = trunc i576 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 11
-store i32 %r49, i32* %r51
-%r52 = lshr i576 %r48, 32
-%r53 = trunc i576 %r52 to i32
-%r55 = getelementptr i32, i32* %r1, i32 12
-store i32 %r53, i32* %r55
-%r56 = lshr i576 %r52, 32
-%r57 = trunc i576 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 13
-store i32 %r57, i32* %r59
-%r60 = lshr i576 %r56, 32
+%r50 = mul i32 %r49, %r7
+%r51 = call i544 @mulPv512x32(i32* %r4, i32 %r50)
+%r52 = zext i544 %r51 to i576
+%r53 = add i576 %r48, %r52
+%r54 = lshr i576 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 4
+%r57 = load i32, i32* %r56
+%r58 = call i544 @mulPv512x32(i32* %r2, i32 %r57)
+%r59 = zext i544 %r58 to i576
+%r60 = add i576 %r54, %r59
 %r61 = trunc i576 %r60 to i32
-%r63 = getelementptr i32, i32* %r1, i32 14
-store i32 %r61, i32* %r63
-%r64 = lshr i576 %r60, 32
-%r65 = trunc i576 %r64 to i32
-%r67 = getelementptr i32, i32* %r1, i32 15
-store i32 %r65, i32* %r67
-%r68 = lshr i576 %r64, 32
-%r69 = trunc i576 %r68 to i32
-%r71 = getelementptr i32, i32* %r1, i32 16
-store i32 %r69, i32* %r71
-%r72 = lshr i576 %r68, 32
-%r73 = trunc i576 %r72 to i32
-%r75 = getelementptr i32, i32* %r1, i32 17
-store i32 %r73, i32* %r75
-ret void
-}
-define void @mcl_fpDbl_mulPre17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
-{
-%r4 = load i32, i32* %r3
-%r5 = call i576 @mulPv544x32(i32* %r2, i32 %r4)
-%r6 = trunc i576 %r5 to i32
-store i32 %r6, i32* %r1
-%r7 = lshr i576 %r5, 32
-%r9 = getelementptr i32, i32* %r3, i32 1
-%r10 = load i32, i32* %r9
-%r11 = call i576 @mulPv544x32(i32* %r2, i32 %r10)
-%r12 = add i576 %r7, %r11
-%r13 = trunc i576 %r12 to i32
-%r15 = getelementptr i32, i32* %r1, i32 1
-store i32 %r13, i32* %r15
-%r16 = lshr i576 %r12, 32
-%r18 = getelementptr i32, i32* %r3, i32 2
-%r19 = load i32, i32* %r18
-%r20 = call i576 @mulPv544x32(i32* %r2, i32 %r19)
-%r21 = add i576 %r16, %r20
-%r22 = trunc i576 %r21 to i32
-%r24 = getelementptr i32, i32* %r1, i32 2
-store i32 %r22, i32* %r24
-%r25 = lshr i576 %r21, 32
-%r27 = getelementptr i32, i32* %r3, i32 3
-%r28 = load i32, i32* %r27
-%r29 = call i576 @mulPv544x32(i32* %r2, i32 %r28)
-%r30 = add i576 %r25, %r29
-%r31 = trunc i576 %r30 to i32
-%r33 = getelementptr i32, i32* %r1, i32 3
-store i32 %r31, i32* %r33
-%r34 = lshr i576 %r30, 32
-%r36 = getelementptr i32, i32* %r3, i32 4
-%r37 = load i32, i32* %r36
-%r38 = call i576 @mulPv544x32(i32* %r2, i32 %r37)
-%r39 = add i576 %r34, %r38
-%r40 = trunc i576 %r39 to i32
-%r42 = getelementptr i32, i32* %r1, i32 4
-store i32 %r40, i32* %r42
-%r43 = lshr i576 %r39, 32
-%r45 = getelementptr i32, i32* %r3, i32 5
-%r46 = load i32, i32* %r45
-%r47 = call i576 @mulPv544x32(i32* %r2, i32 %r46)
-%r48 = add i576 %r43, %r47
-%r49 = trunc i576 %r48 to i32
-%r51 = getelementptr i32, i32* %r1, i32 5
-store i32 %r49, i32* %r51
-%r52 = lshr i576 %r48, 32
-%r54 = getelementptr i32, i32* %r3, i32 6
-%r55 = load i32, i32* %r54
-%r56 = call i576 @mulPv544x32(i32* %r2, i32 %r55)
-%r57 = add i576 %r52, %r56
-%r58 = trunc i576 %r57 to i32
-%r60 = getelementptr i32, i32* %r1, i32 6
-store i32 %r58, i32* %r60
-%r61 = lshr i576 %r57, 32
-%r63 = getelementptr i32, i32* %r3, i32 7
-%r64 = load i32, i32* %r63
-%r65 = call i576 @mulPv544x32(i32* %r2, i32 %r64)
-%r66 = add i576 %r61, %r65
-%r67 = trunc i576 %r66 to i32
-%r69 = getelementptr i32, i32* %r1, i32 7
-store i32 %r67, i32* %r69
-%r70 = lshr i576 %r66, 32
-%r72 = getelementptr i32, i32* %r3, i32 8
-%r73 = load i32, i32* %r72
-%r74 = call i576 @mulPv544x32(i32* %r2, i32 %r73)
-%r75 = add i576 %r70, %r74
-%r76 = trunc i576 %r75 to i32
-%r78 = getelementptr i32, i32* %r1, i32 8
-store i32 %r76, i32* %r78
-%r79 = lshr i576 %r75, 32
-%r81 = getelementptr i32, i32* %r3, i32 9
-%r82 = load i32, i32* %r81
-%r83 = call i576 @mulPv544x32(i32* %r2, i32 %r82)
-%r84 = add i576 %r79, %r83
-%r85 = trunc i576 %r84 to i32
-%r87 = getelementptr i32, i32* %r1, i32 9
-store i32 %r85, i32* %r87
-%r88 = lshr i576 %r84, 32
-%r90 = getelementptr i32, i32* %r3, i32 10
-%r91 = load i32, i32* %r90
-%r92 = call i576 @mulPv544x32(i32* %r2, i32 %r91)
-%r93 = add i576 %r88, %r92
-%r94 = trunc i576 %r93 to i32
-%r96 = getelementptr i32, i32* %r1, i32 10
-store i32 %r94, i32* %r96
-%r97 = lshr i576 %r93, 32
-%r99 = getelementptr i32, i32* %r3, i32 11
-%r100 = load i32, i32* %r99
-%r101 = call i576 @mulPv544x32(i32* %r2, i32 %r100)
-%r102 = add i576 %r97, %r101
-%r103 = trunc i576 %r102 to i32
-%r105 = getelementptr i32, i32* %r1, i32 11
-store i32 %r103, i32* %r105
-%r106 = lshr i576 %r102, 32
-%r108 = getelementptr i32, i32* %r3, i32 12
-%r109 = load i32, i32* %r108
-%r110 = call i576 @mulPv544x32(i32* %r2, i32 %r109)
-%r111 = add i576 %r106, %r110
-%r112 = trunc i576 %r111 to i32
-%r114 = getelementptr i32, i32* %r1, i32 12
-store i32 %r112, i32* %r114
-%r115 = lshr i576 %r111, 32
-%r117 = getelementptr i32, i32* %r3, i32 13
-%r118 = load i32, i32* %r117
-%r119 = call i576 @mulPv544x32(i32* %r2, i32 %r118)
-%r120 = add i576 %r115, %r119
-%r121 = trunc i576 %r120 to i32
-%r123 = getelementptr i32, i32* %r1, i32 13
-store i32 %r121, i32* %r123
-%r124 = lshr i576 %r120, 32
-%r126 = getelementptr i32, i32* %r3, i32 14
-%r127 = load i32, i32* %r126
-%r128 = call i576 @mulPv544x32(i32* %r2, i32 %r127)
-%r129 = add i576 %r124, %r128
-%r130 = trunc i576 %r129 to i32
-%r132 = getelementptr i32, i32* %r1, i32 14
-store i32 %r130, i32* %r132
-%r133 = lshr i576 %r129, 32
-%r135 = getelementptr i32, i32* %r3, i32 15
-%r136 = load i32, i32* %r135
-%r137 = call i576 @mulPv544x32(i32* %r2, i32 %r136)
-%r138 = add i576 %r133, %r137
-%r139 = trunc i576 %r138 to i32
-%r141 = getelementptr i32, i32* %r1, i32 15
-store i32 %r139, i32* %r141
-%r142 = lshr i576 %r138, 32
-%r144 = getelementptr i32, i32* %r3, i32 16
-%r145 = load i32, i32* %r144
-%r146 = call i576 @mulPv544x32(i32* %r2, i32 %r145)
-%r147 = add i576 %r142, %r146
-%r149 = getelementptr i32, i32* %r1, i32 16
-%r150 = trunc i576 %r147 to i32
-%r152 = getelementptr i32, i32* %r149, i32 0
-store i32 %r150, i32* %r152
-%r153 = lshr i576 %r147, 32
-%r154 = trunc i576 %r153 to i32
-%r156 = getelementptr i32, i32* %r149, i32 1
-store i32 %r154, i32* %r156
-%r157 = lshr i576 %r153, 32
-%r158 = trunc i576 %r157 to i32
-%r160 = getelementptr i32, i32* %r149, i32 2
-store i32 %r158, i32* %r160
-%r161 = lshr i576 %r157, 32
-%r162 = trunc i576 %r161 to i32
-%r164 = getelementptr i32, i32* %r149, i32 3
-store i32 %r162, i32* %r164
-%r165 = lshr i576 %r161, 32
-%r166 = trunc i576 %r165 to i32
-%r168 = getelementptr i32, i32* %r149, i32 4
-store i32 %r166, i32* %r168
-%r169 = lshr i576 %r165, 32
-%r170 = trunc i576 %r169 to i32
-%r172 = getelementptr i32, i32* %r149, i32 5
-store i32 %r170, i32* %r172
-%r173 = lshr i576 %r169, 32
-%r174 = trunc i576 %r173 to i32
-%r176 = getelementptr i32, i32* %r149, i32 6
-store i32 %r174, i32* %r176
-%r177 = lshr i576 %r173, 32
-%r178 = trunc i576 %r177 to i32
-%r180 = getelementptr i32, i32* %r149, i32 7
-store i32 %r178, i32* %r180
-%r181 = lshr i576 %r177, 32
-%r182 = trunc i576 %r181 to i32
-%r184 = getelementptr i32, i32* %r149, i32 8
-store i32 %r182, i32* %r184
-%r185 = lshr i576 %r181, 32
-%r186 = trunc i576 %r185 to i32
-%r188 = getelementptr i32, i32* %r149, i32 9
-store i32 %r186, i32* %r188
-%r189 = lshr i576 %r185, 32
-%r190 = trunc i576 %r189 to i32
-%r192 = getelementptr i32, i32* %r149, i32 10
-store i32 %r190, i32* %r192
-%r193 = lshr i576 %r189, 32
-%r194 = trunc i576 %r193 to i32
-%r196 = getelementptr i32, i32* %r149, i32 11
-store i32 %r194, i32* %r196
-%r197 = lshr i576 %r193, 32
-%r198 = trunc i576 %r197 to i32
-%r200 = getelementptr i32, i32* %r149, i32 12
-store i32 %r198, i32* %r200
-%r201 = lshr i576 %r197, 32
-%r202 = trunc i576 %r201 to i32
-%r204 = getelementptr i32, i32* %r149, i32 13
-store i32 %r202, i32* %r204
-%r205 = lshr i576 %r201, 32
-%r206 = trunc i576 %r205 to i32
-%r208 = getelementptr i32, i32* %r149, i32 14
-store i32 %r206, i32* %r208
-%r209 = lshr i576 %r205, 32
-%r210 = trunc i576 %r209 to i32
-%r212 = getelementptr i32, i32* %r149, i32 15
-store i32 %r210, i32* %r212
-%r213 = lshr i576 %r209, 32
-%r214 = trunc i576 %r213 to i32
-%r216 = getelementptr i32, i32* %r149, i32 16
-store i32 %r214, i32* %r216
-%r217 = lshr i576 %r213, 32
-%r218 = trunc i576 %r217 to i32
-%r220 = getelementptr i32, i32* %r149, i32 17
-store i32 %r218, i32* %r220
-ret void
-}
-define void @mcl_fpDbl_sqrPre17L(i32* noalias  %r1, i32* noalias  %r2)
-{
-%r3 = load i32, i32* %r2
-%r4 = call i576 @mulPv544x32(i32* %r2, i32 %r3)
-%r5 = trunc i576 %r4 to i32
-store i32 %r5, i32* %r1
-%r6 = lshr i576 %r4, 32
-%r8 = getelementptr i32, i32* %r2, i32 1
-%r9 = load i32, i32* %r8
-%r10 = call i576 @mulPv544x32(i32* %r2, i32 %r9)
-%r11 = add i576 %r6, %r10
-%r12 = trunc i576 %r11 to i32
-%r14 = getelementptr i32, i32* %r1, i32 1
-store i32 %r12, i32* %r14
-%r15 = lshr i576 %r11, 32
-%r17 = getelementptr i32, i32* %r2, i32 2
-%r18 = load i32, i32* %r17
-%r19 = call i576 @mulPv544x32(i32* %r2, i32 %r18)
-%r20 = add i576 %r15, %r19
-%r21 = trunc i576 %r20 to i32
-%r23 = getelementptr i32, i32* %r1, i32 2
-store i32 %r21, i32* %r23
-%r24 = lshr i576 %r20, 32
-%r26 = getelementptr i32, i32* %r2, i32 3
-%r27 = load i32, i32* %r26
-%r28 = call i576 @mulPv544x32(i32* %r2, i32 %r27)
-%r29 = add i576 %r24, %r28
-%r30 = trunc i576 %r29 to i32
-%r32 = getelementptr i32, i32* %r1, i32 3
-store i32 %r30, i32* %r32
-%r33 = lshr i576 %r29, 32
-%r35 = getelementptr i32, i32* %r2, i32 4
-%r36 = load i32, i32* %r35
-%r37 = call i576 @mulPv544x32(i32* %r2, i32 %r36)
-%r38 = add i576 %r33, %r37
-%r39 = trunc i576 %r38 to i32
-%r41 = getelementptr i32, i32* %r1, i32 4
-store i32 %r39, i32* %r41
-%r42 = lshr i576 %r38, 32
-%r44 = getelementptr i32, i32* %r2, i32 5
-%r45 = load i32, i32* %r44
-%r46 = call i576 @mulPv544x32(i32* %r2, i32 %r45)
-%r47 = add i576 %r42, %r46
-%r48 = trunc i576 %r47 to i32
-%r50 = getelementptr i32, i32* %r1, i32 5
-store i32 %r48, i32* %r50
-%r51 = lshr i576 %r47, 32
-%r53 = getelementptr i32, i32* %r2, i32 6
-%r54 = load i32, i32* %r53
-%r55 = call i576 @mulPv544x32(i32* %r2, i32 %r54)
-%r56 = add i576 %r51, %r55
-%r57 = trunc i576 %r56 to i32
-%r59 = getelementptr i32, i32* %r1, i32 6
-store i32 %r57, i32* %r59
-%r60 = lshr i576 %r56, 32
-%r62 = getelementptr i32, i32* %r2, i32 7
-%r63 = load i32, i32* %r62
-%r64 = call i576 @mulPv544x32(i32* %r2, i32 %r63)
+%r62 = mul i32 %r61, %r7
+%r63 = call i544 @mulPv512x32(i32* %r4, i32 %r62)
+%r64 = zext i544 %r63 to i576
 %r65 = add i576 %r60, %r64
-%r66 = trunc i576 %r65 to i32
-%r68 = getelementptr i32, i32* %r1, i32 7
-store i32 %r66, i32* %r68
-%r69 = lshr i576 %r65, 32
-%r71 = getelementptr i32, i32* %r2, i32 8
-%r72 = load i32, i32* %r71
-%r73 = call i576 @mulPv544x32(i32* %r2, i32 %r72)
-%r74 = add i576 %r69, %r73
-%r75 = trunc i576 %r74 to i32
-%r77 = getelementptr i32, i32* %r1, i32 8
-store i32 %r75, i32* %r77
-%r78 = lshr i576 %r74, 32
-%r80 = getelementptr i32, i32* %r2, i32 9
+%r66 = lshr i576 %r65, 32
+%r68 = getelementptr i32, i32* %r3, i32 5
+%r69 = load i32, i32* %r68
+%r70 = call i544 @mulPv512x32(i32* %r2, i32 %r69)
+%r71 = zext i544 %r70 to i576
+%r72 = add i576 %r66, %r71
+%r73 = trunc i576 %r72 to i32
+%r74 = mul i32 %r73, %r7
+%r75 = call i544 @mulPv512x32(i32* %r4, i32 %r74)
+%r76 = zext i544 %r75 to i576
+%r77 = add i576 %r72, %r76
+%r78 = lshr i576 %r77, 32
+%r80 = getelementptr i32, i32* %r3, i32 6
 %r81 = load i32, i32* %r80
-%r82 = call i576 @mulPv544x32(i32* %r2, i32 %r81)
-%r83 = add i576 %r78, %r82
-%r84 = trunc i576 %r83 to i32
-%r86 = getelementptr i32, i32* %r1, i32 9
-store i32 %r84, i32* %r86
-%r87 = lshr i576 %r83, 32
-%r89 = getelementptr i32, i32* %r2, i32 10
-%r90 = load i32, i32* %r89
-%r91 = call i576 @mulPv544x32(i32* %r2, i32 %r90)
-%r92 = add i576 %r87, %r91
-%r93 = trunc i576 %r92 to i32
-%r95 = getelementptr i32, i32* %r1, i32 10
-store i32 %r93, i32* %r95
-%r96 = lshr i576 %r92, 32
-%r98 = getelementptr i32, i32* %r2, i32 11
-%r99 = load i32, i32* %r98
-%r100 = call i576 @mulPv544x32(i32* %r2, i32 %r99)
+%r82 = call i544 @mulPv512x32(i32* %r2, i32 %r81)
+%r83 = zext i544 %r82 to i576
+%r84 = add i576 %r78, %r83
+%r85 = trunc i576 %r84 to i32
+%r86 = mul i32 %r85, %r7
+%r87 = call i544 @mulPv512x32(i32* %r4, i32 %r86)
+%r88 = zext i544 %r87 to i576
+%r89 = add i576 %r84, %r88
+%r90 = lshr i576 %r89, 32
+%r92 = getelementptr i32, i32* %r3, i32 7
+%r93 = load i32, i32* %r92
+%r94 = call i544 @mulPv512x32(i32* %r2, i32 %r93)
+%r95 = zext i544 %r94 to i576
+%r96 = add i576 %r90, %r95
+%r97 = trunc i576 %r96 to i32
+%r98 = mul i32 %r97, %r7
+%r99 = call i544 @mulPv512x32(i32* %r4, i32 %r98)
+%r100 = zext i544 %r99 to i576
 %r101 = add i576 %r96, %r100
-%r102 = trunc i576 %r101 to i32
-%r104 = getelementptr i32, i32* %r1, i32 11
-store i32 %r102, i32* %r104
-%r105 = lshr i576 %r101, 32
-%r107 = getelementptr i32, i32* %r2, i32 12
-%r108 = load i32, i32* %r107
-%r109 = call i576 @mulPv544x32(i32* %r2, i32 %r108)
-%r110 = add i576 %r105, %r109
-%r111 = trunc i576 %r110 to i32
-%r113 = getelementptr i32, i32* %r1, i32 12
-store i32 %r111, i32* %r113
-%r114 = lshr i576 %r110, 32
-%r116 = getelementptr i32, i32* %r2, i32 13
+%r102 = lshr i576 %r101, 32
+%r104 = getelementptr i32, i32* %r3, i32 8
+%r105 = load i32, i32* %r104
+%r106 = call i544 @mulPv512x32(i32* %r2, i32 %r105)
+%r107 = zext i544 %r106 to i576
+%r108 = add i576 %r102, %r107
+%r109 = trunc i576 %r108 to i32
+%r110 = mul i32 %r109, %r7
+%r111 = call i544 @mulPv512x32(i32* %r4, i32 %r110)
+%r112 = zext i544 %r111 to i576
+%r113 = add i576 %r108, %r112
+%r114 = lshr i576 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 9
 %r117 = load i32, i32* %r116
-%r118 = call i576 @mulPv544x32(i32* %r2, i32 %r117)
-%r119 = add i576 %r114, %r118
-%r120 = trunc i576 %r119 to i32
-%r122 = getelementptr i32, i32* %r1, i32 13
-store i32 %r120, i32* %r122
-%r123 = lshr i576 %r119, 32
-%r125 = getelementptr i32, i32* %r2, i32 14
-%r126 = load i32, i32* %r125
-%r127 = call i576 @mulPv544x32(i32* %r2, i32 %r126)
-%r128 = add i576 %r123, %r127
-%r129 = trunc i576 %r128 to i32
-%r131 = getelementptr i32, i32* %r1, i32 14
-store i32 %r129, i32* %r131
-%r132 = lshr i576 %r128, 32
-%r134 = getelementptr i32, i32* %r2, i32 15
-%r135 = load i32, i32* %r134
-%r136 = call i576 @mulPv544x32(i32* %r2, i32 %r135)
+%r118 = call i544 @mulPv512x32(i32* %r2, i32 %r117)
+%r119 = zext i544 %r118 to i576
+%r120 = add i576 %r114, %r119
+%r121 = trunc i576 %r120 to i32
+%r122 = mul i32 %r121, %r7
+%r123 = call i544 @mulPv512x32(i32* %r4, i32 %r122)
+%r124 = zext i544 %r123 to i576
+%r125 = add i576 %r120, %r124
+%r126 = lshr i576 %r125, 32
+%r128 = getelementptr i32, i32* %r3, i32 10
+%r129 = load i32, i32* %r128
+%r130 = call i544 @mulPv512x32(i32* %r2, i32 %r129)
+%r131 = zext i544 %r130 to i576
+%r132 = add i576 %r126, %r131
+%r133 = trunc i576 %r132 to i32
+%r134 = mul i32 %r133, %r7
+%r135 = call i544 @mulPv512x32(i32* %r4, i32 %r134)
+%r136 = zext i544 %r135 to i576
 %r137 = add i576 %r132, %r136
-%r138 = trunc i576 %r137 to i32
-%r140 = getelementptr i32, i32* %r1, i32 15
-store i32 %r138, i32* %r140
-%r141 = lshr i576 %r137, 32
-%r143 = getelementptr i32, i32* %r2, i32 16
-%r144 = load i32, i32* %r143
-%r145 = call i576 @mulPv544x32(i32* %r2, i32 %r144)
-%r146 = add i576 %r141, %r145
-%r148 = getelementptr i32, i32* %r1, i32 16
-%r149 = trunc i576 %r146 to i32
-%r151 = getelementptr i32, i32* %r148, i32 0
-store i32 %r149, i32* %r151
-%r152 = lshr i576 %r146, 32
-%r153 = trunc i576 %r152 to i32
-%r155 = getelementptr i32, i32* %r148, i32 1
-store i32 %r153, i32* %r155
-%r156 = lshr i576 %r152, 32
+%r138 = lshr i576 %r137, 32
+%r140 = getelementptr i32, i32* %r3, i32 11
+%r141 = load i32, i32* %r140
+%r142 = call i544 @mulPv512x32(i32* %r2, i32 %r141)
+%r143 = zext i544 %r142 to i576
+%r144 = add i576 %r138, %r143
+%r145 = trunc i576 %r144 to i32
+%r146 = mul i32 %r145, %r7
+%r147 = call i544 @mulPv512x32(i32* %r4, i32 %r146)
+%r148 = zext i544 %r147 to i576
+%r149 = add i576 %r144, %r148
+%r150 = lshr i576 %r149, 32
+%r152 = getelementptr i32, i32* %r3, i32 12
+%r153 = load i32, i32* %r152
+%r154 = call i544 @mulPv512x32(i32* %r2, i32 %r153)
+%r155 = zext i544 %r154 to i576
+%r156 = add i576 %r150, %r155
 %r157 = trunc i576 %r156 to i32
-%r159 = getelementptr i32, i32* %r148, i32 2
-store i32 %r157, i32* %r159
-%r160 = lshr i576 %r156, 32
-%r161 = trunc i576 %r160 to i32
-%r163 = getelementptr i32, i32* %r148, i32 3
-store i32 %r161, i32* %r163
-%r164 = lshr i576 %r160, 32
-%r165 = trunc i576 %r164 to i32
-%r167 = getelementptr i32, i32* %r148, i32 4
-store i32 %r165, i32* %r167
-%r168 = lshr i576 %r164, 32
+%r158 = mul i32 %r157, %r7
+%r159 = call i544 @mulPv512x32(i32* %r4, i32 %r158)
+%r160 = zext i544 %r159 to i576
+%r161 = add i576 %r156, %r160
+%r162 = lshr i576 %r161, 32
+%r164 = getelementptr i32, i32* %r3, i32 13
+%r165 = load i32, i32* %r164
+%r166 = call i544 @mulPv512x32(i32* %r2, i32 %r165)
+%r167 = zext i544 %r166 to i576
+%r168 = add i576 %r162, %r167
 %r169 = trunc i576 %r168 to i32
-%r171 = getelementptr i32, i32* %r148, i32 5
-store i32 %r169, i32* %r171
-%r172 = lshr i576 %r168, 32
-%r173 = trunc i576 %r172 to i32
-%r175 = getelementptr i32, i32* %r148, i32 6
-store i32 %r173, i32* %r175
-%r176 = lshr i576 %r172, 32
-%r177 = trunc i576 %r176 to i32
-%r179 = getelementptr i32, i32* %r148, i32 7
-store i32 %r177, i32* %r179
-%r180 = lshr i576 %r176, 32
+%r170 = mul i32 %r169, %r7
+%r171 = call i544 @mulPv512x32(i32* %r4, i32 %r170)
+%r172 = zext i544 %r171 to i576
+%r173 = add i576 %r168, %r172
+%r174 = lshr i576 %r173, 32
+%r176 = getelementptr i32, i32* %r3, i32 14
+%r177 = load i32, i32* %r176
+%r178 = call i544 @mulPv512x32(i32* %r2, i32 %r177)
+%r179 = zext i544 %r178 to i576
+%r180 = add i576 %r174, %r179
 %r181 = trunc i576 %r180 to i32
-%r183 = getelementptr i32, i32* %r148, i32 8
-store i32 %r181, i32* %r183
-%r184 = lshr i576 %r180, 32
-%r185 = trunc i576 %r184 to i32
-%r187 = getelementptr i32, i32* %r148, i32 9
-store i32 %r185, i32* %r187
-%r188 = lshr i576 %r184, 32
-%r189 = trunc i576 %r188 to i32
-%r191 = getelementptr i32, i32* %r148, i32 10
-store i32 %r189, i32* %r191
-%r192 = lshr i576 %r188, 32
+%r182 = mul i32 %r181, %r7
+%r183 = call i544 @mulPv512x32(i32* %r4, i32 %r182)
+%r184 = zext i544 %r183 to i576
+%r185 = add i576 %r180, %r184
+%r186 = lshr i576 %r185, 32
+%r188 = getelementptr i32, i32* %r3, i32 15
+%r189 = load i32, i32* %r188
+%r190 = call i544 @mulPv512x32(i32* %r2, i32 %r189)
+%r191 = zext i544 %r190 to i576
+%r192 = add i576 %r186, %r191
 %r193 = trunc i576 %r192 to i32
-%r195 = getelementptr i32, i32* %r148, i32 11
-store i32 %r193, i32* %r195
-%r196 = lshr i576 %r192, 32
-%r197 = trunc i576 %r196 to i32
-%r199 = getelementptr i32, i32* %r148, i32 12
-store i32 %r197, i32* %r199
-%r200 = lshr i576 %r196, 32
-%r201 = trunc i576 %r200 to i32
-%r203 = getelementptr i32, i32* %r148, i32 13
-store i32 %r201, i32* %r203
-%r204 = lshr i576 %r200, 32
-%r205 = trunc i576 %r204 to i32
-%r207 = getelementptr i32, i32* %r148, i32 14
-store i32 %r205, i32* %r207
-%r208 = lshr i576 %r204, 32
-%r209 = trunc i576 %r208 to i32
-%r211 = getelementptr i32, i32* %r148, i32 15
-store i32 %r209, i32* %r211
-%r212 = lshr i576 %r208, 32
-%r213 = trunc i576 %r212 to i32
-%r215 = getelementptr i32, i32* %r148, i32 16
-store i32 %r213, i32* %r215
-%r216 = lshr i576 %r212, 32
-%r217 = trunc i576 %r216 to i32
-%r219 = getelementptr i32, i32* %r148, i32 17
-store i32 %r217, i32* %r219
+%r194 = mul i32 %r193, %r7
+%r195 = call i544 @mulPv512x32(i32* %r4, i32 %r194)
+%r196 = zext i544 %r195 to i576
+%r197 = add i576 %r192, %r196
+%r198 = lshr i576 %r197, 32
+%r199 = trunc i576 %r198 to i544
+%r200 = load i32, i32* %r4
+%r201 = zext i32 %r200 to i64
+%r203 = getelementptr i32, i32* %r4, i32 1
+%r204 = load i32, i32* %r203
+%r205 = zext i32 %r204 to i64
+%r206 = shl i64 %r205, 32
+%r207 = or i64 %r201, %r206
+%r208 = zext i64 %r207 to i96
+%r210 = getelementptr i32, i32* %r4, i32 2
+%r211 = load i32, i32* %r210
+%r212 = zext i32 %r211 to i96
+%r213 = shl i96 %r212, 64
+%r214 = or i96 %r208, %r213
+%r215 = zext i96 %r214 to i128
+%r217 = getelementptr i32, i32* %r4, i32 3
+%r218 = load i32, i32* %r217
+%r219 = zext i32 %r218 to i128
+%r220 = shl i128 %r219, 96
+%r221 = or i128 %r215, %r220
+%r222 = zext i128 %r221 to i160
+%r224 = getelementptr i32, i32* %r4, i32 4
+%r225 = load i32, i32* %r224
+%r226 = zext i32 %r225 to i160
+%r227 = shl i160 %r226, 128
+%r228 = or i160 %r222, %r227
+%r229 = zext i160 %r228 to i192
+%r231 = getelementptr i32, i32* %r4, i32 5
+%r232 = load i32, i32* %r231
+%r233 = zext i32 %r232 to i192
+%r234 = shl i192 %r233, 160
+%r235 = or i192 %r229, %r234
+%r236 = zext i192 %r235 to i224
+%r238 = getelementptr i32, i32* %r4, i32 6
+%r239 = load i32, i32* %r238
+%r240 = zext i32 %r239 to i224
+%r241 = shl i224 %r240, 192
+%r242 = or i224 %r236, %r241
+%r243 = zext i224 %r242 to i256
+%r245 = getelementptr i32, i32* %r4, i32 7
+%r246 = load i32, i32* %r245
+%r247 = zext i32 %r246 to i256
+%r248 = shl i256 %r247, 224
+%r249 = or i256 %r243, %r248
+%r250 = zext i256 %r249 to i288
+%r252 = getelementptr i32, i32* %r4, i32 8
+%r253 = load i32, i32* %r252
+%r254 = zext i32 %r253 to i288
+%r255 = shl i288 %r254, 256
+%r256 = or i288 %r250, %r255
+%r257 = zext i288 %r256 to i320
+%r259 = getelementptr i32, i32* %r4, i32 9
+%r260 = load i32, i32* %r259
+%r261 = zext i32 %r260 to i320
+%r262 = shl i320 %r261, 288
+%r263 = or i320 %r257, %r262
+%r264 = zext i320 %r263 to i352
+%r266 = getelementptr i32, i32* %r4, i32 10
+%r267 = load i32, i32* %r266
+%r268 = zext i32 %r267 to i352
+%r269 = shl i352 %r268, 320
+%r270 = or i352 %r264, %r269
+%r271 = zext i352 %r270 to i384
+%r273 = getelementptr i32, i32* %r4, i32 11
+%r274 = load i32, i32* %r273
+%r275 = zext i32 %r274 to i384
+%r276 = shl i384 %r275, 352
+%r277 = or i384 %r271, %r276
+%r278 = zext i384 %r277 to i416
+%r280 = getelementptr i32, i32* %r4, i32 12
+%r281 = load i32, i32* %r280
+%r282 = zext i32 %r281 to i416
+%r283 = shl i416 %r282, 384
+%r284 = or i416 %r278, %r283
+%r285 = zext i416 %r284 to i448
+%r287 = getelementptr i32, i32* %r4, i32 13
+%r288 = load i32, i32* %r287
+%r289 = zext i32 %r288 to i448
+%r290 = shl i448 %r289, 416
+%r291 = or i448 %r285, %r290
+%r292 = zext i448 %r291 to i480
+%r294 = getelementptr i32, i32* %r4, i32 14
+%r295 = load i32, i32* %r294
+%r296 = zext i32 %r295 to i480
+%r297 = shl i480 %r296, 448
+%r298 = or i480 %r292, %r297
+%r299 = zext i480 %r298 to i512
+%r301 = getelementptr i32, i32* %r4, i32 15
+%r302 = load i32, i32* %r301
+%r303 = zext i32 %r302 to i512
+%r304 = shl i512 %r303, 480
+%r305 = or i512 %r299, %r304
+%r306 = zext i512 %r305 to i544
+%r307 = sub i544 %r199, %r306
+%r308 = lshr i544 %r307, 512
+%r309 = trunc i544 %r308 to i1
+%r310 = select i1 %r309, i544 %r199, i544 %r307
+%r311 = trunc i544 %r310 to i512
+%r313 = getelementptr i32, i32* %r1, i32 0
+%r314 = trunc i512 %r311 to i32
+store i32 %r314, i32* %r313
+%r315 = lshr i512 %r311, 32
+%r317 = getelementptr i32, i32* %r1, i32 1
+%r318 = trunc i512 %r315 to i32
+store i32 %r318, i32* %r317
+%r319 = lshr i512 %r315, 32
+%r321 = getelementptr i32, i32* %r1, i32 2
+%r322 = trunc i512 %r319 to i32
+store i32 %r322, i32* %r321
+%r323 = lshr i512 %r319, 32
+%r325 = getelementptr i32, i32* %r1, i32 3
+%r326 = trunc i512 %r323 to i32
+store i32 %r326, i32* %r325
+%r327 = lshr i512 %r323, 32
+%r329 = getelementptr i32, i32* %r1, i32 4
+%r330 = trunc i512 %r327 to i32
+store i32 %r330, i32* %r329
+%r331 = lshr i512 %r327, 32
+%r333 = getelementptr i32, i32* %r1, i32 5
+%r334 = trunc i512 %r331 to i32
+store i32 %r334, i32* %r333
+%r335 = lshr i512 %r331, 32
+%r337 = getelementptr i32, i32* %r1, i32 6
+%r338 = trunc i512 %r335 to i32
+store i32 %r338, i32* %r337
+%r339 = lshr i512 %r335, 32
+%r341 = getelementptr i32, i32* %r1, i32 7
+%r342 = trunc i512 %r339 to i32
+store i32 %r342, i32* %r341
+%r343 = lshr i512 %r339, 32
+%r345 = getelementptr i32, i32* %r1, i32 8
+%r346 = trunc i512 %r343 to i32
+store i32 %r346, i32* %r345
+%r347 = lshr i512 %r343, 32
+%r349 = getelementptr i32, i32* %r1, i32 9
+%r350 = trunc i512 %r347 to i32
+store i32 %r350, i32* %r349
+%r351 = lshr i512 %r347, 32
+%r353 = getelementptr i32, i32* %r1, i32 10
+%r354 = trunc i512 %r351 to i32
+store i32 %r354, i32* %r353
+%r355 = lshr i512 %r351, 32
+%r357 = getelementptr i32, i32* %r1, i32 11
+%r358 = trunc i512 %r355 to i32
+store i32 %r358, i32* %r357
+%r359 = lshr i512 %r355, 32
+%r361 = getelementptr i32, i32* %r1, i32 12
+%r362 = trunc i512 %r359 to i32
+store i32 %r362, i32* %r361
+%r363 = lshr i512 %r359, 32
+%r365 = getelementptr i32, i32* %r1, i32 13
+%r366 = trunc i512 %r363 to i32
+store i32 %r366, i32* %r365
+%r367 = lshr i512 %r363, 32
+%r369 = getelementptr i32, i32* %r1, i32 14
+%r370 = trunc i512 %r367 to i32
+store i32 %r370, i32* %r369
+%r371 = lshr i512 %r367, 32
+%r373 = getelementptr i32, i32* %r1, i32 15
+%r374 = trunc i512 %r371 to i32
+store i32 %r374, i32* %r373
 ret void
 }
-define void @mcl_fp_mont17L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+define void @mcl_fp_montNF16L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
 {
 %r6 = getelementptr i32, i32* %r4, i32 -1
 %r7 = load i32, i32* %r6
-%r9 = getelementptr i32, i32* %r3, i32 0
-%r10 = load i32, i32* %r9
-%r11 = call i576 @mulPv544x32(i32* %r2, i32 %r10)
-%r12 = zext i576 %r11 to i608
-%r13 = trunc i576 %r11 to i32
-%r14 = mul i32 %r13, %r7
-%r15 = call i576 @mulPv544x32(i32* %r4, i32 %r14)
-%r16 = zext i576 %r15 to i608
-%r17 = add i608 %r12, %r16
-%r18 = lshr i608 %r17, 32
-%r20 = getelementptr i32, i32* %r3, i32 1
-%r21 = load i32, i32* %r20
-%r22 = call i576 @mulPv544x32(i32* %r2, i32 %r21)
-%r23 = zext i576 %r22 to i608
-%r24 = add i608 %r18, %r23
-%r25 = trunc i608 %r24 to i32
-%r26 = mul i32 %r25, %r7
-%r27 = call i576 @mulPv544x32(i32* %r4, i32 %r26)
-%r28 = zext i576 %r27 to i608
-%r29 = add i608 %r24, %r28
-%r30 = lshr i608 %r29, 32
-%r32 = getelementptr i32, i32* %r3, i32 2
-%r33 = load i32, i32* %r32
-%r34 = call i576 @mulPv544x32(i32* %r2, i32 %r33)
-%r35 = zext i576 %r34 to i608
-%r36 = add i608 %r30, %r35
-%r37 = trunc i608 %r36 to i32
-%r38 = mul i32 %r37, %r7
-%r39 = call i576 @mulPv544x32(i32* %r4, i32 %r38)
-%r40 = zext i576 %r39 to i608
-%r41 = add i608 %r36, %r40
-%r42 = lshr i608 %r41, 32
-%r44 = getelementptr i32, i32* %r3, i32 3
-%r45 = load i32, i32* %r44
-%r46 = call i576 @mulPv544x32(i32* %r2, i32 %r45)
-%r47 = zext i576 %r46 to i608
-%r48 = add i608 %r42, %r47
-%r49 = trunc i608 %r48 to i32
-%r50 = mul i32 %r49, %r7
-%r51 = call i576 @mulPv544x32(i32* %r4, i32 %r50)
-%r52 = zext i576 %r51 to i608
-%r53 = add i608 %r48, %r52
-%r54 = lshr i608 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 4
-%r57 = load i32, i32* %r56
-%r58 = call i576 @mulPv544x32(i32* %r2, i32 %r57)
-%r59 = zext i576 %r58 to i608
-%r60 = add i608 %r54, %r59
-%r61 = trunc i608 %r60 to i32
-%r62 = mul i32 %r61, %r7
-%r63 = call i576 @mulPv544x32(i32* %r4, i32 %r62)
-%r64 = zext i576 %r63 to i608
-%r65 = add i608 %r60, %r64
-%r66 = lshr i608 %r65, 32
-%r68 = getelementptr i32, i32* %r3, i32 5
-%r69 = load i32, i32* %r68
-%r70 = call i576 @mulPv544x32(i32* %r2, i32 %r69)
-%r71 = zext i576 %r70 to i608
-%r72 = add i608 %r66, %r71
-%r73 = trunc i608 %r72 to i32
-%r74 = mul i32 %r73, %r7
-%r75 = call i576 @mulPv544x32(i32* %r4, i32 %r74)
-%r76 = zext i576 %r75 to i608
-%r77 = add i608 %r72, %r76
-%r78 = lshr i608 %r77, 32
-%r80 = getelementptr i32, i32* %r3, i32 6
-%r81 = load i32, i32* %r80
-%r82 = call i576 @mulPv544x32(i32* %r2, i32 %r81)
-%r83 = zext i576 %r82 to i608
-%r84 = add i608 %r78, %r83
-%r85 = trunc i608 %r84 to i32
-%r86 = mul i32 %r85, %r7
-%r87 = call i576 @mulPv544x32(i32* %r4, i32 %r86)
-%r88 = zext i576 %r87 to i608
-%r89 = add i608 %r84, %r88
-%r90 = lshr i608 %r89, 32
-%r92 = getelementptr i32, i32* %r3, i32 7
-%r93 = load i32, i32* %r92
-%r94 = call i576 @mulPv544x32(i32* %r2, i32 %r93)
-%r95 = zext i576 %r94 to i608
-%r96 = add i608 %r90, %r95
-%r97 = trunc i608 %r96 to i32
-%r98 = mul i32 %r97, %r7
-%r99 = call i576 @mulPv544x32(i32* %r4, i32 %r98)
-%r100 = zext i576 %r99 to i608
-%r101 = add i608 %r96, %r100
-%r102 = lshr i608 %r101, 32
-%r104 = getelementptr i32, i32* %r3, i32 8
-%r105 = load i32, i32* %r104
-%r106 = call i576 @mulPv544x32(i32* %r2, i32 %r105)
-%r107 = zext i576 %r106 to i608
-%r108 = add i608 %r102, %r107
-%r109 = trunc i608 %r108 to i32
-%r110 = mul i32 %r109, %r7
-%r111 = call i576 @mulPv544x32(i32* %r4, i32 %r110)
-%r112 = zext i576 %r111 to i608
-%r113 = add i608 %r108, %r112
-%r114 = lshr i608 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 9
+%r8 = load i32, i32* %r3
+%r9 = call i544 @mulPv512x32(i32* %r2, i32 %r8)
+%r10 = trunc i544 %r9 to i32
+%r11 = mul i32 %r10, %r7
+%r12 = call i544 @mulPv512x32(i32* %r4, i32 %r11)
+%r13 = add i544 %r9, %r12
+%r14 = lshr i544 %r13, 32
+%r16 = getelementptr i32, i32* %r3, i32 1
+%r17 = load i32, i32* %r16
+%r18 = call i544 @mulPv512x32(i32* %r2, i32 %r17)
+%r19 = add i544 %r14, %r18
+%r20 = trunc i544 %r19 to i32
+%r21 = mul i32 %r20, %r7
+%r22 = call i544 @mulPv512x32(i32* %r4, i32 %r21)
+%r23 = add i544 %r19, %r22
+%r24 = lshr i544 %r23, 32
+%r26 = getelementptr i32, i32* %r3, i32 2
+%r27 = load i32, i32* %r26
+%r28 = call i544 @mulPv512x32(i32* %r2, i32 %r27)
+%r29 = add i544 %r24, %r28
+%r30 = trunc i544 %r29 to i32
+%r31 = mul i32 %r30, %r7
+%r32 = call i544 @mulPv512x32(i32* %r4, i32 %r31)
+%r33 = add i544 %r29, %r32
+%r34 = lshr i544 %r33, 32
+%r36 = getelementptr i32, i32* %r3, i32 3
+%r37 = load i32, i32* %r36
+%r38 = call i544 @mulPv512x32(i32* %r2, i32 %r37)
+%r39 = add i544 %r34, %r38
+%r40 = trunc i544 %r39 to i32
+%r41 = mul i32 %r40, %r7
+%r42 = call i544 @mulPv512x32(i32* %r4, i32 %r41)
+%r43 = add i544 %r39, %r42
+%r44 = lshr i544 %r43, 32
+%r46 = getelementptr i32, i32* %r3, i32 4
+%r47 = load i32, i32* %r46
+%r48 = call i544 @mulPv512x32(i32* %r2, i32 %r47)
+%r49 = add i544 %r44, %r48
+%r50 = trunc i544 %r49 to i32
+%r51 = mul i32 %r50, %r7
+%r52 = call i544 @mulPv512x32(i32* %r4, i32 %r51)
+%r53 = add i544 %r49, %r52
+%r54 = lshr i544 %r53, 32
+%r56 = getelementptr i32, i32* %r3, i32 5
+%r57 = load i32, i32* %r56
+%r58 = call i544 @mulPv512x32(i32* %r2, i32 %r57)
+%r59 = add i544 %r54, %r58
+%r60 = trunc i544 %r59 to i32
+%r61 = mul i32 %r60, %r7
+%r62 = call i544 @mulPv512x32(i32* %r4, i32 %r61)
+%r63 = add i544 %r59, %r62
+%r64 = lshr i544 %r63, 32
+%r66 = getelementptr i32, i32* %r3, i32 6
+%r67 = load i32, i32* %r66
+%r68 = call i544 @mulPv512x32(i32* %r2, i32 %r67)
+%r69 = add i544 %r64, %r68
+%r70 = trunc i544 %r69 to i32
+%r71 = mul i32 %r70, %r7
+%r72 = call i544 @mulPv512x32(i32* %r4, i32 %r71)
+%r73 = add i544 %r69, %r72
+%r74 = lshr i544 %r73, 32
+%r76 = getelementptr i32, i32* %r3, i32 7
+%r77 = load i32, i32* %r76
+%r78 = call i544 @mulPv512x32(i32* %r2, i32 %r77)
+%r79 = add i544 %r74, %r78
+%r80 = trunc i544 %r79 to i32
+%r81 = mul i32 %r80, %r7
+%r82 = call i544 @mulPv512x32(i32* %r4, i32 %r81)
+%r83 = add i544 %r79, %r82
+%r84 = lshr i544 %r83, 32
+%r86 = getelementptr i32, i32* %r3, i32 8
+%r87 = load i32, i32* %r86
+%r88 = call i544 @mulPv512x32(i32* %r2, i32 %r87)
+%r89 = add i544 %r84, %r88
+%r90 = trunc i544 %r89 to i32
+%r91 = mul i32 %r90, %r7
+%r92 = call i544 @mulPv512x32(i32* %r4, i32 %r91)
+%r93 = add i544 %r89, %r92
+%r94 = lshr i544 %r93, 32
+%r96 = getelementptr i32, i32* %r3, i32 9
+%r97 = load i32, i32* %r96
+%r98 = call i544 @mulPv512x32(i32* %r2, i32 %r97)
+%r99 = add i544 %r94, %r98
+%r100 = trunc i544 %r99 to i32
+%r101 = mul i32 %r100, %r7
+%r102 = call i544 @mulPv512x32(i32* %r4, i32 %r101)
+%r103 = add i544 %r99, %r102
+%r104 = lshr i544 %r103, 32
+%r106 = getelementptr i32, i32* %r3, i32 10
+%r107 = load i32, i32* %r106
+%r108 = call i544 @mulPv512x32(i32* %r2, i32 %r107)
+%r109 = add i544 %r104, %r108
+%r110 = trunc i544 %r109 to i32
+%r111 = mul i32 %r110, %r7
+%r112 = call i544 @mulPv512x32(i32* %r4, i32 %r111)
+%r113 = add i544 %r109, %r112
+%r114 = lshr i544 %r113, 32
+%r116 = getelementptr i32, i32* %r3, i32 11
 %r117 = load i32, i32* %r116
-%r118 = call i576 @mulPv544x32(i32* %r2, i32 %r117)
-%r119 = zext i576 %r118 to i608
-%r120 = add i608 %r114, %r119
-%r121 = trunc i608 %r120 to i32
-%r122 = mul i32 %r121, %r7
-%r123 = call i576 @mulPv544x32(i32* %r4, i32 %r122)
-%r124 = zext i576 %r123 to i608
-%r125 = add i608 %r120, %r124
-%r126 = lshr i608 %r125, 32
-%r128 = getelementptr i32, i32* %r3, i32 10
-%r129 = load i32, i32* %r128
-%r130 = call i576 @mulPv544x32(i32* %r2, i32 %r129)
-%r131 = zext i576 %r130 to i608
-%r132 = add i608 %r126, %r131
-%r133 = trunc i608 %r132 to i32
-%r134 = mul i32 %r133, %r7
-%r135 = call i576 @mulPv544x32(i32* %r4, i32 %r134)
-%r136 = zext i576 %r135 to i608
-%r137 = add i608 %r132, %r136
-%r138 = lshr i608 %r137, 32
-%r140 = getelementptr i32, i32* %r3, i32 11
-%r141 = load i32, i32* %r140
-%r142 = call i576 @mulPv544x32(i32* %r2, i32 %r141)
-%r143 = zext i576 %r142 to i608
-%r144 = add i608 %r138, %r143
-%r145 = trunc i608 %r144 to i32
-%r146 = mul i32 %r145, %r7
-%r147 = call i576 @mulPv544x32(i32* %r4, i32 %r146)
-%r148 = zext i576 %r147 to i608
-%r149 = add i608 %r144, %r148
-%r150 = lshr i608 %r149, 32
-%r152 = getelementptr i32, i32* %r3, i32 12
-%r153 = load i32, i32* %r152
-%r154 = call i576 @mulPv544x32(i32* %r2, i32 %r153)
-%r155 = zext i576 %r154 to i608
-%r156 = add i608 %r150, %r155
-%r157 = trunc i608 %r156 to i32
-%r158 = mul i32 %r157, %r7
-%r159 = call i576 @mulPv544x32(i32* %r4, i32 %r158)
-%r160 = zext i576 %r159 to i608
-%r161 = add i608 %r156, %r160
-%r162 = lshr i608 %r161, 32
-%r164 = getelementptr i32, i32* %r3, i32 13
-%r165 = load i32, i32* %r164
-%r166 = call i576 @mulPv544x32(i32* %r2, i32 %r165)
-%r167 = zext i576 %r166 to i608
-%r168 = add i608 %r162, %r167
-%r169 = trunc i608 %r168 to i32
-%r170 = mul i32 %r169, %r7
-%r171 = call i576 @mulPv544x32(i32* %r4, i32 %r170)
-%r172 = zext i576 %r171 to i608
-%r173 = add i608 %r168, %r172
-%r174 = lshr i608 %r173, 32
-%r176 = getelementptr i32, i32* %r3, i32 14
+%r118 = call i544 @mulPv512x32(i32* %r2, i32 %r117)
+%r119 = add i544 %r114, %r118
+%r120 = trunc i544 %r119 to i32
+%r121 = mul i32 %r120, %r7
+%r122 = call i544 @mulPv512x32(i32* %r4, i32 %r121)
+%r123 = add i544 %r119, %r122
+%r124 = lshr i544 %r123, 32
+%r126 = getelementptr i32, i32* %r3, i32 12
+%r127 = load i32, i32* %r126
+%r128 = call i544 @mulPv512x32(i32* %r2, i32 %r127)
+%r129 = add i544 %r124, %r128
+%r130 = trunc i544 %r129 to i32
+%r131 = mul i32 %r130, %r7
+%r132 = call i544 @mulPv512x32(i32* %r4, i32 %r131)
+%r133 = add i544 %r129, %r132
+%r134 = lshr i544 %r133, 32
+%r136 = getelementptr i32, i32* %r3, i32 13
+%r137 = load i32, i32* %r136
+%r138 = call i544 @mulPv512x32(i32* %r2, i32 %r137)
+%r139 = add i544 %r134, %r138
+%r140 = trunc i544 %r139 to i32
+%r141 = mul i32 %r140, %r7
+%r142 = call i544 @mulPv512x32(i32* %r4, i32 %r141)
+%r143 = add i544 %r139, %r142
+%r144 = lshr i544 %r143, 32
+%r146 = getelementptr i32, i32* %r3, i32 14
+%r147 = load i32, i32* %r146
+%r148 = call i544 @mulPv512x32(i32* %r2, i32 %r147)
+%r149 = add i544 %r144, %r148
+%r150 = trunc i544 %r149 to i32
+%r151 = mul i32 %r150, %r7
+%r152 = call i544 @mulPv512x32(i32* %r4, i32 %r151)
+%r153 = add i544 %r149, %r152
+%r154 = lshr i544 %r153, 32
+%r156 = getelementptr i32, i32* %r3, i32 15
+%r157 = load i32, i32* %r156
+%r158 = call i544 @mulPv512x32(i32* %r2, i32 %r157)
+%r159 = add i544 %r154, %r158
+%r160 = trunc i544 %r159 to i32
+%r161 = mul i32 %r160, %r7
+%r162 = call i544 @mulPv512x32(i32* %r4, i32 %r161)
+%r163 = add i544 %r159, %r162
+%r164 = lshr i544 %r163, 32
+%r165 = trunc i544 %r164 to i512
+%r166 = load i32, i32* %r4
+%r167 = zext i32 %r166 to i64
+%r169 = getelementptr i32, i32* %r4, i32 1
+%r170 = load i32, i32* %r169
+%r171 = zext i32 %r170 to i64
+%r172 = shl i64 %r171, 32
+%r173 = or i64 %r167, %r172
+%r174 = zext i64 %r173 to i96
+%r176 = getelementptr i32, i32* %r4, i32 2
 %r177 = load i32, i32* %r176
-%r178 = call i576 @mulPv544x32(i32* %r2, i32 %r177)
-%r179 = zext i576 %r178 to i608
-%r180 = add i608 %r174, %r179
-%r181 = trunc i608 %r180 to i32
-%r182 = mul i32 %r181, %r7
-%r183 = call i576 @mulPv544x32(i32* %r4, i32 %r182)
-%r184 = zext i576 %r183 to i608
-%r185 = add i608 %r180, %r184
-%r186 = lshr i608 %r185, 32
-%r188 = getelementptr i32, i32* %r3, i32 15
-%r189 = load i32, i32* %r188
-%r190 = call i576 @mulPv544x32(i32* %r2, i32 %r189)
-%r191 = zext i576 %r190 to i608
-%r192 = add i608 %r186, %r191
-%r193 = trunc i608 %r192 to i32
-%r194 = mul i32 %r193, %r7
-%r195 = call i576 @mulPv544x32(i32* %r4, i32 %r194)
-%r196 = zext i576 %r195 to i608
-%r197 = add i608 %r192, %r196
-%r198 = lshr i608 %r197, 32
-%r200 = getelementptr i32, i32* %r3, i32 16
-%r201 = load i32, i32* %r200
-%r202 = call i576 @mulPv544x32(i32* %r2, i32 %r201)
-%r203 = zext i576 %r202 to i608
-%r204 = add i608 %r198, %r203
-%r205 = trunc i608 %r204 to i32
-%r206 = mul i32 %r205, %r7
-%r207 = call i576 @mulPv544x32(i32* %r4, i32 %r206)
-%r208 = zext i576 %r207 to i608
-%r209 = add i608 %r204, %r208
-%r210 = lshr i608 %r209, 32
-%r211 = trunc i608 %r210 to i576
-%r212 = load i32, i32* %r4
-%r213 = zext i32 %r212 to i64
-%r215 = getelementptr i32, i32* %r4, i32 1
-%r216 = load i32, i32* %r215
-%r217 = zext i32 %r216 to i64
-%r218 = shl i64 %r217, 32
-%r219 = or i64 %r213, %r218
-%r220 = zext i64 %r219 to i96
-%r222 = getelementptr i32, i32* %r4, i32 2
-%r223 = load i32, i32* %r222
-%r224 = zext i32 %r223 to i96
-%r225 = shl i96 %r224, 64
-%r226 = or i96 %r220, %r225
-%r227 = zext i96 %r226 to i128
-%r229 = getelementptr i32, i32* %r4, i32 3
-%r230 = load i32, i32* %r229
-%r231 = zext i32 %r230 to i128
-%r232 = shl i128 %r231, 96
-%r233 = or i128 %r227, %r232
-%r234 = zext i128 %r233 to i160
-%r236 = getelementptr i32, i32* %r4, i32 4
-%r237 = load i32, i32* %r236
-%r238 = zext i32 %r237 to i160
-%r239 = shl i160 %r238, 128
-%r240 = or i160 %r234, %r239
-%r241 = zext i160 %r240 to i192
-%r243 = getelementptr i32, i32* %r4, i32 5
-%r244 = load i32, i32* %r243
-%r245 = zext i32 %r244 to i192
-%r246 = shl i192 %r245, 160
-%r247 = or i192 %r241, %r246
-%r248 = zext i192 %r247 to i224
-%r250 = getelementptr i32, i32* %r4, i32 6
-%r251 = load i32, i32* %r250
-%r252 = zext i32 %r251 to i224
-%r253 = shl i224 %r252, 192
-%r254 = or i224 %r248, %r253
-%r255 = zext i224 %r254 to i256
-%r257 = getelementptr i32, i32* %r4, i32 7
-%r258 = load i32, i32* %r257
-%r259 = zext i32 %r258 to i256
-%r260 = shl i256 %r259, 224
-%r261 = or i256 %r255, %r260
-%r262 = zext i256 %r261 to i288
-%r264 = getelementptr i32, i32* %r4, i32 8
-%r265 = load i32, i32* %r264
-%r266 = zext i32 %r265 to i288
-%r267 = shl i288 %r266, 256
-%r268 = or i288 %r262, %r267
-%r269 = zext i288 %r268 to i320
-%r271 = getelementptr i32, i32* %r4, i32 9
-%r272 = load i32, i32* %r271
-%r273 = zext i32 %r272 to i320
-%r274 = shl i320 %r273, 288
-%r275 = or i320 %r269, %r274
-%r276 = zext i320 %r275 to i352
-%r278 = getelementptr i32, i32* %r4, i32 10
-%r279 = load i32, i32* %r278
-%r280 = zext i32 %r279 to i352
-%r281 = shl i352 %r280, 320
-%r282 = or i352 %r276, %r281
-%r283 = zext i352 %r282 to i384
-%r285 = getelementptr i32, i32* %r4, i32 11
-%r286 = load i32, i32* %r285
-%r287 = zext i32 %r286 to i384
-%r288 = shl i384 %r287, 352
-%r289 = or i384 %r283, %r288
-%r290 = zext i384 %r289 to i416
-%r292 = getelementptr i32, i32* %r4, i32 12
-%r293 = load i32, i32* %r292
-%r294 = zext i32 %r293 to i416
-%r295 = shl i416 %r294, 384
-%r296 = or i416 %r290, %r295
-%r297 = zext i416 %r296 to i448
-%r299 = getelementptr i32, i32* %r4, i32 13
-%r300 = load i32, i32* %r299
-%r301 = zext i32 %r300 to i448
-%r302 = shl i448 %r301, 416
-%r303 = or i448 %r297, %r302
-%r304 = zext i448 %r303 to i480
-%r306 = getelementptr i32, i32* %r4, i32 14
-%r307 = load i32, i32* %r306
-%r308 = zext i32 %r307 to i480
-%r309 = shl i480 %r308, 448
-%r310 = or i480 %r304, %r309
-%r311 = zext i480 %r310 to i512
-%r313 = getelementptr i32, i32* %r4, i32 15
-%r314 = load i32, i32* %r313
-%r315 = zext i32 %r314 to i512
-%r316 = shl i512 %r315, 480
-%r317 = or i512 %r311, %r316
-%r318 = zext i512 %r317 to i544
-%r320 = getelementptr i32, i32* %r4, i32 16
-%r321 = load i32, i32* %r320
-%r322 = zext i32 %r321 to i544
-%r323 = shl i544 %r322, 512
-%r324 = or i544 %r318, %r323
-%r325 = zext i544 %r324 to i576
-%r326 = sub i576 %r211, %r325
-%r327 = lshr i576 %r326, 544
-%r328 = trunc i576 %r327 to i1
-%r329 = select i1 %r328, i576 %r211, i576 %r326
-%r330 = trunc i576 %r329 to i544
-%r331 = trunc i544 %r330 to i32
-%r333 = getelementptr i32, i32* %r1, i32 0
-store i32 %r331, i32* %r333
-%r334 = lshr i544 %r330, 32
-%r335 = trunc i544 %r334 to i32
-%r337 = getelementptr i32, i32* %r1, i32 1
-store i32 %r335, i32* %r337
-%r338 = lshr i544 %r334, 32
-%r339 = trunc i544 %r338 to i32
-%r341 = getelementptr i32, i32* %r1, i32 2
-store i32 %r339, i32* %r341
-%r342 = lshr i544 %r338, 32
-%r343 = trunc i544 %r342 to i32
-%r345 = getelementptr i32, i32* %r1, i32 3
-store i32 %r343, i32* %r345
-%r346 = lshr i544 %r342, 32
-%r347 = trunc i544 %r346 to i32
-%r349 = getelementptr i32, i32* %r1, i32 4
-store i32 %r347, i32* %r349
-%r350 = lshr i544 %r346, 32
-%r351 = trunc i544 %r350 to i32
-%r353 = getelementptr i32, i32* %r1, i32 5
-store i32 %r351, i32* %r353
-%r354 = lshr i544 %r350, 32
-%r355 = trunc i544 %r354 to i32
-%r357 = getelementptr i32, i32* %r1, i32 6
-store i32 %r355, i32* %r357
-%r358 = lshr i544 %r354, 32
-%r359 = trunc i544 %r358 to i32
-%r361 = getelementptr i32, i32* %r1, i32 7
-store i32 %r359, i32* %r361
-%r362 = lshr i544 %r358, 32
-%r363 = trunc i544 %r362 to i32
-%r365 = getelementptr i32, i32* %r1, i32 8
-store i32 %r363, i32* %r365
-%r366 = lshr i544 %r362, 32
-%r367 = trunc i544 %r366 to i32
-%r369 = getelementptr i32, i32* %r1, i32 9
-store i32 %r367, i32* %r369
-%r370 = lshr i544 %r366, 32
-%r371 = trunc i544 %r370 to i32
-%r373 = getelementptr i32, i32* %r1, i32 10
-store i32 %r371, i32* %r373
-%r374 = lshr i544 %r370, 32
-%r375 = trunc i544 %r374 to i32
-%r377 = getelementptr i32, i32* %r1, i32 11
-store i32 %r375, i32* %r377
-%r378 = lshr i544 %r374, 32
-%r379 = trunc i544 %r378 to i32
-%r381 = getelementptr i32, i32* %r1, i32 12
-store i32 %r379, i32* %r381
-%r382 = lshr i544 %r378, 32
-%r383 = trunc i544 %r382 to i32
-%r385 = getelementptr i32, i32* %r1, i32 13
-store i32 %r383, i32* %r385
-%r386 = lshr i544 %r382, 32
-%r387 = trunc i544 %r386 to i32
-%r389 = getelementptr i32, i32* %r1, i32 14
-store i32 %r387, i32* %r389
-%r390 = lshr i544 %r386, 32
-%r391 = trunc i544 %r390 to i32
-%r393 = getelementptr i32, i32* %r1, i32 15
-store i32 %r391, i32* %r393
-%r394 = lshr i544 %r390, 32
-%r395 = trunc i544 %r394 to i32
-%r397 = getelementptr i32, i32* %r1, i32 16
-store i32 %r395, i32* %r397
+%r178 = zext i32 %r177 to i96
+%r179 = shl i96 %r178, 64
+%r180 = or i96 %r174, %r179
+%r181 = zext i96 %r180 to i128
+%r183 = getelementptr i32, i32* %r4, i32 3
+%r184 = load i32, i32* %r183
+%r185 = zext i32 %r184 to i128
+%r186 = shl i128 %r185, 96
+%r187 = or i128 %r181, %r186
+%r188 = zext i128 %r187 to i160
+%r190 = getelementptr i32, i32* %r4, i32 4
+%r191 = load i32, i32* %r190
+%r192 = zext i32 %r191 to i160
+%r193 = shl i160 %r192, 128
+%r194 = or i160 %r188, %r193
+%r195 = zext i160 %r194 to i192
+%r197 = getelementptr i32, i32* %r4, i32 5
+%r198 = load i32, i32* %r197
+%r199 = zext i32 %r198 to i192
+%r200 = shl i192 %r199, 160
+%r201 = or i192 %r195, %r200
+%r202 = zext i192 %r201 to i224
+%r204 = getelementptr i32, i32* %r4, i32 6
+%r205 = load i32, i32* %r204
+%r206 = zext i32 %r205 to i224
+%r207 = shl i224 %r206, 192
+%r208 = or i224 %r202, %r207
+%r209 = zext i224 %r208 to i256
+%r211 = getelementptr i32, i32* %r4, i32 7
+%r212 = load i32, i32* %r211
+%r213 = zext i32 %r212 to i256
+%r214 = shl i256 %r213, 224
+%r215 = or i256 %r209, %r214
+%r216 = zext i256 %r215 to i288
+%r218 = getelementptr i32, i32* %r4, i32 8
+%r219 = load i32, i32* %r218
+%r220 = zext i32 %r219 to i288
+%r221 = shl i288 %r220, 256
+%r222 = or i288 %r216, %r221
+%r223 = zext i288 %r222 to i320
+%r225 = getelementptr i32, i32* %r4, i32 9
+%r226 = load i32, i32* %r225
+%r227 = zext i32 %r226 to i320
+%r228 = shl i320 %r227, 288
+%r229 = or i320 %r223, %r228
+%r230 = zext i320 %r229 to i352
+%r232 = getelementptr i32, i32* %r4, i32 10
+%r233 = load i32, i32* %r232
+%r234 = zext i32 %r233 to i352
+%r235 = shl i352 %r234, 320
+%r236 = or i352 %r230, %r235
+%r237 = zext i352 %r236 to i384
+%r239 = getelementptr i32, i32* %r4, i32 11
+%r240 = load i32, i32* %r239
+%r241 = zext i32 %r240 to i384
+%r242 = shl i384 %r241, 352
+%r243 = or i384 %r237, %r242
+%r244 = zext i384 %r243 to i416
+%r246 = getelementptr i32, i32* %r4, i32 12
+%r247 = load i32, i32* %r246
+%r248 = zext i32 %r247 to i416
+%r249 = shl i416 %r248, 384
+%r250 = or i416 %r244, %r249
+%r251 = zext i416 %r250 to i448
+%r253 = getelementptr i32, i32* %r4, i32 13
+%r254 = load i32, i32* %r253
+%r255 = zext i32 %r254 to i448
+%r256 = shl i448 %r255, 416
+%r257 = or i448 %r251, %r256
+%r258 = zext i448 %r257 to i480
+%r260 = getelementptr i32, i32* %r4, i32 14
+%r261 = load i32, i32* %r260
+%r262 = zext i32 %r261 to i480
+%r263 = shl i480 %r262, 448
+%r264 = or i480 %r258, %r263
+%r265 = zext i480 %r264 to i512
+%r267 = getelementptr i32, i32* %r4, i32 15
+%r268 = load i32, i32* %r267
+%r269 = zext i32 %r268 to i512
+%r270 = shl i512 %r269, 480
+%r271 = or i512 %r265, %r270
+%r272 = sub i512 %r165, %r271
+%r273 = lshr i512 %r272, 511
+%r274 = trunc i512 %r273 to i1
+%r275 = select i1 %r274, i512 %r165, i512 %r272
+%r277 = getelementptr i32, i32* %r1, i32 0
+%r278 = trunc i512 %r275 to i32
+store i32 %r278, i32* %r277
+%r279 = lshr i512 %r275, 32
+%r281 = getelementptr i32, i32* %r1, i32 1
+%r282 = trunc i512 %r279 to i32
+store i32 %r282, i32* %r281
+%r283 = lshr i512 %r279, 32
+%r285 = getelementptr i32, i32* %r1, i32 2
+%r286 = trunc i512 %r283 to i32
+store i32 %r286, i32* %r285
+%r287 = lshr i512 %r283, 32
+%r289 = getelementptr i32, i32* %r1, i32 3
+%r290 = trunc i512 %r287 to i32
+store i32 %r290, i32* %r289
+%r291 = lshr i512 %r287, 32
+%r293 = getelementptr i32, i32* %r1, i32 4
+%r294 = trunc i512 %r291 to i32
+store i32 %r294, i32* %r293
+%r295 = lshr i512 %r291, 32
+%r297 = getelementptr i32, i32* %r1, i32 5
+%r298 = trunc i512 %r295 to i32
+store i32 %r298, i32* %r297
+%r299 = lshr i512 %r295, 32
+%r301 = getelementptr i32, i32* %r1, i32 6
+%r302 = trunc i512 %r299 to i32
+store i32 %r302, i32* %r301
+%r303 = lshr i512 %r299, 32
+%r305 = getelementptr i32, i32* %r1, i32 7
+%r306 = trunc i512 %r303 to i32
+store i32 %r306, i32* %r305
+%r307 = lshr i512 %r303, 32
+%r309 = getelementptr i32, i32* %r1, i32 8
+%r310 = trunc i512 %r307 to i32
+store i32 %r310, i32* %r309
+%r311 = lshr i512 %r307, 32
+%r313 = getelementptr i32, i32* %r1, i32 9
+%r314 = trunc i512 %r311 to i32
+store i32 %r314, i32* %r313
+%r315 = lshr i512 %r311, 32
+%r317 = getelementptr i32, i32* %r1, i32 10
+%r318 = trunc i512 %r315 to i32
+store i32 %r318, i32* %r317
+%r319 = lshr i512 %r315, 32
+%r321 = getelementptr i32, i32* %r1, i32 11
+%r322 = trunc i512 %r319 to i32
+store i32 %r322, i32* %r321
+%r323 = lshr i512 %r319, 32
+%r325 = getelementptr i32, i32* %r1, i32 12
+%r326 = trunc i512 %r323 to i32
+store i32 %r326, i32* %r325
+%r327 = lshr i512 %r323, 32
+%r329 = getelementptr i32, i32* %r1, i32 13
+%r330 = trunc i512 %r327 to i32
+store i32 %r330, i32* %r329
+%r331 = lshr i512 %r327, 32
+%r333 = getelementptr i32, i32* %r1, i32 14
+%r334 = trunc i512 %r331 to i32
+store i32 %r334, i32* %r333
+%r335 = lshr i512 %r331, 32
+%r337 = getelementptr i32, i32* %r1, i32 15
+%r338 = trunc i512 %r335 to i32
+store i32 %r338, i32* %r337
 ret void
 }
-define void @mcl_fp_montNF17L(i32* %r1, i32* %r2, i32* %r3, i32* %r4)
+define void @mcl_fp_montRed16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
 {
-%r6 = getelementptr i32, i32* %r4, i32 -1
-%r7 = load i32, i32* %r6
-%r8 = load i32, i32* %r3
-%r9 = call i576 @mulPv544x32(i32* %r2, i32 %r8)
-%r10 = trunc i576 %r9 to i32
-%r11 = mul i32 %r10, %r7
-%r12 = call i576 @mulPv544x32(i32* %r4, i32 %r11)
-%r13 = add i576 %r9, %r12
-%r14 = lshr i576 %r13, 32
-%r16 = getelementptr i32, i32* %r3, i32 1
-%r17 = load i32, i32* %r16
-%r18 = call i576 @mulPv544x32(i32* %r2, i32 %r17)
-%r19 = add i576 %r14, %r18
-%r20 = trunc i576 %r19 to i32
-%r21 = mul i32 %r20, %r7
-%r22 = call i576 @mulPv544x32(i32* %r4, i32 %r21)
-%r23 = add i576 %r19, %r22
-%r24 = lshr i576 %r23, 32
-%r26 = getelementptr i32, i32* %r3, i32 2
-%r27 = load i32, i32* %r26
-%r28 = call i576 @mulPv544x32(i32* %r2, i32 %r27)
-%r29 = add i576 %r24, %r28
-%r30 = trunc i576 %r29 to i32
-%r31 = mul i32 %r30, %r7
-%r32 = call i576 @mulPv544x32(i32* %r4, i32 %r31)
-%r33 = add i576 %r29, %r32
-%r34 = lshr i576 %r33, 32
-%r36 = getelementptr i32, i32* %r3, i32 3
-%r37 = load i32, i32* %r36
-%r38 = call i576 @mulPv544x32(i32* %r2, i32 %r37)
-%r39 = add i576 %r34, %r38
-%r40 = trunc i576 %r39 to i32
-%r41 = mul i32 %r40, %r7
-%r42 = call i576 @mulPv544x32(i32* %r4, i32 %r41)
-%r43 = add i576 %r39, %r42
-%r44 = lshr i576 %r43, 32
-%r46 = getelementptr i32, i32* %r3, i32 4
-%r47 = load i32, i32* %r46
-%r48 = call i576 @mulPv544x32(i32* %r2, i32 %r47)
-%r49 = add i576 %r44, %r48
-%r50 = trunc i576 %r49 to i32
-%r51 = mul i32 %r50, %r7
-%r52 = call i576 @mulPv544x32(i32* %r4, i32 %r51)
-%r53 = add i576 %r49, %r52
-%r54 = lshr i576 %r53, 32
-%r56 = getelementptr i32, i32* %r3, i32 5
-%r57 = load i32, i32* %r56
-%r58 = call i576 @mulPv544x32(i32* %r2, i32 %r57)
-%r59 = add i576 %r54, %r58
-%r60 = trunc i576 %r59 to i32
-%r61 = mul i32 %r60, %r7
-%r62 = call i576 @mulPv544x32(i32* %r4, i32 %r61)
-%r63 = add i576 %r59, %r62
-%r64 = lshr i576 %r63, 32
-%r66 = getelementptr i32, i32* %r3, i32 6
+%r5 = getelementptr i32, i32* %r3, i32 -1
+%r6 = load i32, i32* %r5
+%r7 = load i32, i32* %r3
+%r8 = zext i32 %r7 to i64
+%r10 = getelementptr i32, i32* %r3, i32 1
+%r11 = load i32, i32* %r10
+%r12 = zext i32 %r11 to i64
+%r13 = shl i64 %r12, 32
+%r14 = or i64 %r8, %r13
+%r15 = zext i64 %r14 to i96
+%r17 = getelementptr i32, i32* %r3, i32 2
+%r18 = load i32, i32* %r17
+%r19 = zext i32 %r18 to i96
+%r20 = shl i96 %r19, 64
+%r21 = or i96 %r15, %r20
+%r22 = zext i96 %r21 to i128
+%r24 = getelementptr i32, i32* %r3, i32 3
+%r25 = load i32, i32* %r24
+%r26 = zext i32 %r25 to i128
+%r27 = shl i128 %r26, 96
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i160
+%r31 = getelementptr i32, i32* %r3, i32 4
+%r32 = load i32, i32* %r31
+%r33 = zext i32 %r32 to i160
+%r34 = shl i160 %r33, 128
+%r35 = or i160 %r29, %r34
+%r36 = zext i160 %r35 to i192
+%r38 = getelementptr i32, i32* %r3, i32 5
+%r39 = load i32, i32* %r38
+%r40 = zext i32 %r39 to i192
+%r41 = shl i192 %r40, 160
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i224
+%r45 = getelementptr i32, i32* %r3, i32 6
+%r46 = load i32, i32* %r45
+%r47 = zext i32 %r46 to i224
+%r48 = shl i224 %r47, 192
+%r49 = or i224 %r43, %r48
+%r50 = zext i224 %r49 to i256
+%r52 = getelementptr i32, i32* %r3, i32 7
+%r53 = load i32, i32* %r52
+%r54 = zext i32 %r53 to i256
+%r55 = shl i256 %r54, 224
+%r56 = or i256 %r50, %r55
+%r57 = zext i256 %r56 to i288
+%r59 = getelementptr i32, i32* %r3, i32 8
+%r60 = load i32, i32* %r59
+%r61 = zext i32 %r60 to i288
+%r62 = shl i288 %r61, 256
+%r63 = or i288 %r57, %r62
+%r64 = zext i288 %r63 to i320
+%r66 = getelementptr i32, i32* %r3, i32 9
 %r67 = load i32, i32* %r66
-%r68 = call i576 @mulPv544x32(i32* %r2, i32 %r67)
-%r69 = add i576 %r64, %r68
-%r70 = trunc i576 %r69 to i32
-%r71 = mul i32 %r70, %r7
-%r72 = call i576 @mulPv544x32(i32* %r4, i32 %r71)
-%r73 = add i576 %r69, %r72
-%r74 = lshr i576 %r73, 32
-%r76 = getelementptr i32, i32* %r3, i32 7
-%r77 = load i32, i32* %r76
-%r78 = call i576 @mulPv544x32(i32* %r2, i32 %r77)
-%r79 = add i576 %r74, %r78
-%r80 = trunc i576 %r79 to i32
-%r81 = mul i32 %r80, %r7
-%r82 = call i576 @mulPv544x32(i32* %r4, i32 %r81)
-%r83 = add i576 %r79, %r82
-%r84 = lshr i576 %r83, 32
-%r86 = getelementptr i32, i32* %r3, i32 8
-%r87 = load i32, i32* %r86
-%r88 = call i576 @mulPv544x32(i32* %r2, i32 %r87)
-%r89 = add i576 %r84, %r88
-%r90 = trunc i576 %r89 to i32
-%r91 = mul i32 %r90, %r7
-%r92 = call i576 @mulPv544x32(i32* %r4, i32 %r91)
-%r93 = add i576 %r89, %r92
-%r94 = lshr i576 %r93, 32
-%r96 = getelementptr i32, i32* %r3, i32 9
-%r97 = load i32, i32* %r96
-%r98 = call i576 @mulPv544x32(i32* %r2, i32 %r97)
-%r99 = add i576 %r94, %r98
-%r100 = trunc i576 %r99 to i32
-%r101 = mul i32 %r100, %r7
-%r102 = call i576 @mulPv544x32(i32* %r4, i32 %r101)
-%r103 = add i576 %r99, %r102
-%r104 = lshr i576 %r103, 32
-%r106 = getelementptr i32, i32* %r3, i32 10
-%r107 = load i32, i32* %r106
-%r108 = call i576 @mulPv544x32(i32* %r2, i32 %r107)
-%r109 = add i576 %r104, %r108
-%r110 = trunc i576 %r109 to i32
-%r111 = mul i32 %r110, %r7
-%r112 = call i576 @mulPv544x32(i32* %r4, i32 %r111)
-%r113 = add i576 %r109, %r112
-%r114 = lshr i576 %r113, 32
-%r116 = getelementptr i32, i32* %r3, i32 11
+%r68 = zext i32 %r67 to i320
+%r69 = shl i320 %r68, 288
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i352
+%r73 = getelementptr i32, i32* %r3, i32 10
+%r74 = load i32, i32* %r73
+%r75 = zext i32 %r74 to i352
+%r76 = shl i352 %r75, 320
+%r77 = or i352 %r71, %r76
+%r78 = zext i352 %r77 to i384
+%r80 = getelementptr i32, i32* %r3, i32 11
+%r81 = load i32, i32* %r80
+%r82 = zext i32 %r81 to i384
+%r83 = shl i384 %r82, 352
+%r84 = or i384 %r78, %r83
+%r85 = zext i384 %r84 to i416
+%r87 = getelementptr i32, i32* %r3, i32 12
+%r88 = load i32, i32* %r87
+%r89 = zext i32 %r88 to i416
+%r90 = shl i416 %r89, 384
+%r91 = or i416 %r85, %r90
+%r92 = zext i416 %r91 to i448
+%r94 = getelementptr i32, i32* %r3, i32 13
+%r95 = load i32, i32* %r94
+%r96 = zext i32 %r95 to i448
+%r97 = shl i448 %r96, 416
+%r98 = or i448 %r92, %r97
+%r99 = zext i448 %r98 to i480
+%r101 = getelementptr i32, i32* %r3, i32 14
+%r102 = load i32, i32* %r101
+%r103 = zext i32 %r102 to i480
+%r104 = shl i480 %r103, 448
+%r105 = or i480 %r99, %r104
+%r106 = zext i480 %r105 to i512
+%r108 = getelementptr i32, i32* %r3, i32 15
+%r109 = load i32, i32* %r108
+%r110 = zext i32 %r109 to i512
+%r111 = shl i512 %r110, 480
+%r112 = or i512 %r106, %r111
+%r113 = load i32, i32* %r2
+%r114 = zext i32 %r113 to i64
+%r116 = getelementptr i32, i32* %r2, i32 1
 %r117 = load i32, i32* %r116
-%r118 = call i576 @mulPv544x32(i32* %r2, i32 %r117)
-%r119 = add i576 %r114, %r118
-%r120 = trunc i576 %r119 to i32
-%r121 = mul i32 %r120, %r7
-%r122 = call i576 @mulPv544x32(i32* %r4, i32 %r121)
-%r123 = add i576 %r119, %r122
-%r124 = lshr i576 %r123, 32
-%r126 = getelementptr i32, i32* %r3, i32 12
-%r127 = load i32, i32* %r126
-%r128 = call i576 @mulPv544x32(i32* %r2, i32 %r127)
-%r129 = add i576 %r124, %r128
-%r130 = trunc i576 %r129 to i32
-%r131 = mul i32 %r130, %r7
-%r132 = call i576 @mulPv544x32(i32* %r4, i32 %r131)
-%r133 = add i576 %r129, %r132
-%r134 = lshr i576 %r133, 32
-%r136 = getelementptr i32, i32* %r3, i32 13
-%r137 = load i32, i32* %r136
-%r138 = call i576 @mulPv544x32(i32* %r2, i32 %r137)
-%r139 = add i576 %r134, %r138
-%r140 = trunc i576 %r139 to i32
-%r141 = mul i32 %r140, %r7
-%r142 = call i576 @mulPv544x32(i32* %r4, i32 %r141)
-%r143 = add i576 %r139, %r142
-%r144 = lshr i576 %r143, 32
-%r146 = getelementptr i32, i32* %r3, i32 14
-%r147 = load i32, i32* %r146
-%r148 = call i576 @mulPv544x32(i32* %r2, i32 %r147)
-%r149 = add i576 %r144, %r148
-%r150 = trunc i576 %r149 to i32
-%r151 = mul i32 %r150, %r7
-%r152 = call i576 @mulPv544x32(i32* %r4, i32 %r151)
-%r153 = add i576 %r149, %r152
-%r154 = lshr i576 %r153, 32
-%r156 = getelementptr i32, i32* %r3, i32 15
-%r157 = load i32, i32* %r156
-%r158 = call i576 @mulPv544x32(i32* %r2, i32 %r157)
-%r159 = add i576 %r154, %r158
-%r160 = trunc i576 %r159 to i32
-%r161 = mul i32 %r160, %r7
-%r162 = call i576 @mulPv544x32(i32* %r4, i32 %r161)
-%r163 = add i576 %r159, %r162
-%r164 = lshr i576 %r163, 32
-%r166 = getelementptr i32, i32* %r3, i32 16
-%r167 = load i32, i32* %r166
-%r168 = call i576 @mulPv544x32(i32* %r2, i32 %r167)
-%r169 = add i576 %r164, %r168
-%r170 = trunc i576 %r169 to i32
-%r171 = mul i32 %r170, %r7
-%r172 = call i576 @mulPv544x32(i32* %r4, i32 %r171)
-%r173 = add i576 %r169, %r172
-%r174 = lshr i576 %r173, 32
-%r175 = trunc i576 %r174 to i544
-%r176 = load i32, i32* %r4
-%r177 = zext i32 %r176 to i64
-%r179 = getelementptr i32, i32* %r4, i32 1
+%r118 = zext i32 %r117 to i64
+%r119 = shl i64 %r118, 32
+%r120 = or i64 %r114, %r119
+%r121 = zext i64 %r120 to i96
+%r123 = getelementptr i32, i32* %r2, i32 2
+%r124 = load i32, i32* %r123
+%r125 = zext i32 %r124 to i96
+%r126 = shl i96 %r125, 64
+%r127 = or i96 %r121, %r126
+%r128 = zext i96 %r127 to i128
+%r130 = getelementptr i32, i32* %r2, i32 3
+%r131 = load i32, i32* %r130
+%r132 = zext i32 %r131 to i128
+%r133 = shl i128 %r132, 96
+%r134 = or i128 %r128, %r133
+%r135 = zext i128 %r134 to i160
+%r137 = getelementptr i32, i32* %r2, i32 4
+%r138 = load i32, i32* %r137
+%r139 = zext i32 %r138 to i160
+%r140 = shl i160 %r139, 128
+%r141 = or i160 %r135, %r140
+%r142 = zext i160 %r141 to i192
+%r144 = getelementptr i32, i32* %r2, i32 5
+%r145 = load i32, i32* %r144
+%r146 = zext i32 %r145 to i192
+%r147 = shl i192 %r146, 160
+%r148 = or i192 %r142, %r147
+%r149 = zext i192 %r148 to i224
+%r151 = getelementptr i32, i32* %r2, i32 6
+%r152 = load i32, i32* %r151
+%r153 = zext i32 %r152 to i224
+%r154 = shl i224 %r153, 192
+%r155 = or i224 %r149, %r154
+%r156 = zext i224 %r155 to i256
+%r158 = getelementptr i32, i32* %r2, i32 7
+%r159 = load i32, i32* %r158
+%r160 = zext i32 %r159 to i256
+%r161 = shl i256 %r160, 224
+%r162 = or i256 %r156, %r161
+%r163 = zext i256 %r162 to i288
+%r165 = getelementptr i32, i32* %r2, i32 8
+%r166 = load i32, i32* %r165
+%r167 = zext i32 %r166 to i288
+%r168 = shl i288 %r167, 256
+%r169 = or i288 %r163, %r168
+%r170 = zext i288 %r169 to i320
+%r172 = getelementptr i32, i32* %r2, i32 9
+%r173 = load i32, i32* %r172
+%r174 = zext i32 %r173 to i320
+%r175 = shl i320 %r174, 288
+%r176 = or i320 %r170, %r175
+%r177 = zext i320 %r176 to i352
+%r179 = getelementptr i32, i32* %r2, i32 10
 %r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i64
-%r182 = shl i64 %r181, 32
-%r183 = or i64 %r177, %r182
-%r184 = zext i64 %r183 to i96
-%r186 = getelementptr i32, i32* %r4, i32 2
+%r181 = zext i32 %r180 to i352
+%r182 = shl i352 %r181, 320
+%r183 = or i352 %r177, %r182
+%r184 = zext i352 %r183 to i384
+%r186 = getelementptr i32, i32* %r2, i32 11
 %r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i96
-%r189 = shl i96 %r188, 64
-%r190 = or i96 %r184, %r189
-%r191 = zext i96 %r190 to i128
-%r193 = getelementptr i32, i32* %r4, i32 3
+%r188 = zext i32 %r187 to i384
+%r189 = shl i384 %r188, 352
+%r190 = or i384 %r184, %r189
+%r191 = zext i384 %r190 to i416
+%r193 = getelementptr i32, i32* %r2, i32 12
 %r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i128
-%r196 = shl i128 %r195, 96
-%r197 = or i128 %r191, %r196
-%r198 = zext i128 %r197 to i160
-%r200 = getelementptr i32, i32* %r4, i32 4
+%r195 = zext i32 %r194 to i416
+%r196 = shl i416 %r195, 384
+%r197 = or i416 %r191, %r196
+%r198 = zext i416 %r197 to i448
+%r200 = getelementptr i32, i32* %r2, i32 13
 %r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i160
-%r203 = shl i160 %r202, 128
-%r204 = or i160 %r198, %r203
-%r205 = zext i160 %r204 to i192
-%r207 = getelementptr i32, i32* %r4, i32 5
+%r202 = zext i32 %r201 to i448
+%r203 = shl i448 %r202, 416
+%r204 = or i448 %r198, %r203
+%r205 = zext i448 %r204 to i480
+%r207 = getelementptr i32, i32* %r2, i32 14
 %r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i192
-%r210 = shl i192 %r209, 160
-%r211 = or i192 %r205, %r210
-%r212 = zext i192 %r211 to i224
-%r214 = getelementptr i32, i32* %r4, i32 6
+%r209 = zext i32 %r208 to i480
+%r210 = shl i480 %r209, 448
+%r211 = or i480 %r205, %r210
+%r212 = zext i480 %r211 to i512
+%r214 = getelementptr i32, i32* %r2, i32 15
 %r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i224
-%r217 = shl i224 %r216, 192
-%r218 = or i224 %r212, %r217
-%r219 = zext i224 %r218 to i256
-%r221 = getelementptr i32, i32* %r4, i32 7
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i256
-%r224 = shl i256 %r223, 224
-%r225 = or i256 %r219, %r224
-%r226 = zext i256 %r225 to i288
-%r228 = getelementptr i32, i32* %r4, i32 8
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i288
-%r231 = shl i288 %r230, 256
-%r232 = or i288 %r226, %r231
-%r233 = zext i288 %r232 to i320
-%r235 = getelementptr i32, i32* %r4, i32 9
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i320
-%r238 = shl i320 %r237, 288
-%r239 = or i320 %r233, %r238
-%r240 = zext i320 %r239 to i352
-%r242 = getelementptr i32, i32* %r4, i32 10
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i352
-%r245 = shl i352 %r244, 320
-%r246 = or i352 %r240, %r245
-%r247 = zext i352 %r246 to i384
-%r249 = getelementptr i32, i32* %r4, i32 11
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i384
-%r252 = shl i384 %r251, 352
-%r253 = or i384 %r247, %r252
-%r254 = zext i384 %r253 to i416
-%r256 = getelementptr i32, i32* %r4, i32 12
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i416
-%r259 = shl i416 %r258, 384
-%r260 = or i416 %r254, %r259
-%r261 = zext i416 %r260 to i448
-%r263 = getelementptr i32, i32* %r4, i32 13
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i448
-%r266 = shl i448 %r265, 416
-%r267 = or i448 %r261, %r266
-%r268 = zext i448 %r267 to i480
-%r270 = getelementptr i32, i32* %r4, i32 14
-%r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i480
-%r273 = shl i480 %r272, 448
-%r274 = or i480 %r268, %r273
-%r275 = zext i480 %r274 to i512
-%r277 = getelementptr i32, i32* %r4, i32 15
-%r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i512
-%r280 = shl i512 %r279, 480
-%r281 = or i512 %r275, %r280
-%r282 = zext i512 %r281 to i544
-%r284 = getelementptr i32, i32* %r4, i32 16
-%r285 = load i32, i32* %r284
-%r286 = zext i32 %r285 to i544
-%r287 = shl i544 %r286, 512
-%r288 = or i544 %r282, %r287
-%r289 = sub i544 %r175, %r288
-%r290 = lshr i544 %r289, 543
-%r291 = trunc i544 %r290 to i1
-%r292 = select i1 %r291, i544 %r175, i544 %r289
-%r293 = trunc i544 %r292 to i32
-%r295 = getelementptr i32, i32* %r1, i32 0
-store i32 %r293, i32* %r295
-%r296 = lshr i544 %r292, 32
-%r297 = trunc i544 %r296 to i32
-%r299 = getelementptr i32, i32* %r1, i32 1
-store i32 %r297, i32* %r299
-%r300 = lshr i544 %r296, 32
-%r301 = trunc i544 %r300 to i32
-%r303 = getelementptr i32, i32* %r1, i32 2
-store i32 %r301, i32* %r303
-%r304 = lshr i544 %r300, 32
-%r305 = trunc i544 %r304 to i32
-%r307 = getelementptr i32, i32* %r1, i32 3
-store i32 %r305, i32* %r307
-%r308 = lshr i544 %r304, 32
-%r309 = trunc i544 %r308 to i32
-%r311 = getelementptr i32, i32* %r1, i32 4
-store i32 %r309, i32* %r311
-%r312 = lshr i544 %r308, 32
-%r313 = trunc i544 %r312 to i32
-%r315 = getelementptr i32, i32* %r1, i32 5
-store i32 %r313, i32* %r315
-%r316 = lshr i544 %r312, 32
-%r317 = trunc i544 %r316 to i32
-%r319 = getelementptr i32, i32* %r1, i32 6
-store i32 %r317, i32* %r319
-%r320 = lshr i544 %r316, 32
-%r321 = trunc i544 %r320 to i32
-%r323 = getelementptr i32, i32* %r1, i32 7
-store i32 %r321, i32* %r323
-%r324 = lshr i544 %r320, 32
-%r325 = trunc i544 %r324 to i32
-%r327 = getelementptr i32, i32* %r1, i32 8
-store i32 %r325, i32* %r327
-%r328 = lshr i544 %r324, 32
-%r329 = trunc i544 %r328 to i32
-%r331 = getelementptr i32, i32* %r1, i32 9
-store i32 %r329, i32* %r331
-%r332 = lshr i544 %r328, 32
-%r333 = trunc i544 %r332 to i32
-%r335 = getelementptr i32, i32* %r1, i32 10
-store i32 %r333, i32* %r335
-%r336 = lshr i544 %r332, 32
-%r337 = trunc i544 %r336 to i32
-%r339 = getelementptr i32, i32* %r1, i32 11
-store i32 %r337, i32* %r339
-%r340 = lshr i544 %r336, 32
-%r341 = trunc i544 %r340 to i32
-%r343 = getelementptr i32, i32* %r1, i32 12
-store i32 %r341, i32* %r343
-%r344 = lshr i544 %r340, 32
-%r345 = trunc i544 %r344 to i32
-%r347 = getelementptr i32, i32* %r1, i32 13
-store i32 %r345, i32* %r347
-%r348 = lshr i544 %r344, 32
-%r349 = trunc i544 %r348 to i32
-%r351 = getelementptr i32, i32* %r1, i32 14
-store i32 %r349, i32* %r351
-%r352 = lshr i544 %r348, 32
-%r353 = trunc i544 %r352 to i32
-%r355 = getelementptr i32, i32* %r1, i32 15
-store i32 %r353, i32* %r355
-%r356 = lshr i544 %r352, 32
-%r357 = trunc i544 %r356 to i32
-%r359 = getelementptr i32, i32* %r1, i32 16
-store i32 %r357, i32* %r359
-ret void
-}
-define void @mcl_fp_montRed17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
+%r216 = zext i32 %r215 to i512
+%r217 = shl i512 %r216, 480
+%r218 = or i512 %r212, %r217
+%r219 = trunc i512 %r218 to i32
+%r220 = mul i32 %r219, %r6
+%r221 = call i544 @mulPv512x32(i32* %r3, i32 %r220)
+%r223 = getelementptr i32, i32* %r2, i32 16
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i544
+%r226 = shl i544 %r225, 512
+%r227 = zext i512 %r218 to i544
+%r228 = or i544 %r226, %r227
+%r229 = zext i544 %r228 to i576
+%r230 = zext i544 %r221 to i576
+%r231 = add i576 %r229, %r230
+%r232 = lshr i576 %r231, 32
+%r233 = trunc i576 %r232 to i544
+%r234 = lshr i544 %r233, 512
+%r235 = trunc i544 %r234 to i32
+%r236 = trunc i544 %r233 to i512
+%r237 = trunc i512 %r236 to i32
+%r238 = mul i32 %r237, %r6
+%r239 = call i544 @mulPv512x32(i32* %r3, i32 %r238)
+%r240 = zext i32 %r235 to i544
+%r241 = shl i544 %r240, 512
+%r242 = add i544 %r239, %r241
+%r244 = getelementptr i32, i32* %r2, i32 17
+%r245 = load i32, i32* %r244
+%r246 = zext i32 %r245 to i544
+%r247 = shl i544 %r246, 512
+%r248 = zext i512 %r236 to i544
+%r249 = or i544 %r247, %r248
+%r250 = zext i544 %r249 to i576
+%r251 = zext i544 %r242 to i576
+%r252 = add i576 %r250, %r251
+%r253 = lshr i576 %r252, 32
+%r254 = trunc i576 %r253 to i544
+%r255 = lshr i544 %r254, 512
+%r256 = trunc i544 %r255 to i32
+%r257 = trunc i544 %r254 to i512
+%r258 = trunc i512 %r257 to i32
+%r259 = mul i32 %r258, %r6
+%r260 = call i544 @mulPv512x32(i32* %r3, i32 %r259)
+%r261 = zext i32 %r256 to i544
+%r262 = shl i544 %r261, 512
+%r263 = add i544 %r260, %r262
+%r265 = getelementptr i32, i32* %r2, i32 18
+%r266 = load i32, i32* %r265
+%r267 = zext i32 %r266 to i544
+%r268 = shl i544 %r267, 512
+%r269 = zext i512 %r257 to i544
+%r270 = or i544 %r268, %r269
+%r271 = zext i544 %r270 to i576
+%r272 = zext i544 %r263 to i576
+%r273 = add i576 %r271, %r272
+%r274 = lshr i576 %r273, 32
+%r275 = trunc i576 %r274 to i544
+%r276 = lshr i544 %r275, 512
+%r277 = trunc i544 %r276 to i32
+%r278 = trunc i544 %r275 to i512
+%r279 = trunc i512 %r278 to i32
+%r280 = mul i32 %r279, %r6
+%r281 = call i544 @mulPv512x32(i32* %r3, i32 %r280)
+%r282 = zext i32 %r277 to i544
+%r283 = shl i544 %r282, 512
+%r284 = add i544 %r281, %r283
+%r286 = getelementptr i32, i32* %r2, i32 19
+%r287 = load i32, i32* %r286
+%r288 = zext i32 %r287 to i544
+%r289 = shl i544 %r288, 512
+%r290 = zext i512 %r278 to i544
+%r291 = or i544 %r289, %r290
+%r292 = zext i544 %r291 to i576
+%r293 = zext i544 %r284 to i576
+%r294 = add i576 %r292, %r293
+%r295 = lshr i576 %r294, 32
+%r296 = trunc i576 %r295 to i544
+%r297 = lshr i544 %r296, 512
+%r298 = trunc i544 %r297 to i32
+%r299 = trunc i544 %r296 to i512
+%r300 = trunc i512 %r299 to i32
+%r301 = mul i32 %r300, %r6
+%r302 = call i544 @mulPv512x32(i32* %r3, i32 %r301)
+%r303 = zext i32 %r298 to i544
+%r304 = shl i544 %r303, 512
+%r305 = add i544 %r302, %r304
+%r307 = getelementptr i32, i32* %r2, i32 20
+%r308 = load i32, i32* %r307
+%r309 = zext i32 %r308 to i544
+%r310 = shl i544 %r309, 512
+%r311 = zext i512 %r299 to i544
+%r312 = or i544 %r310, %r311
+%r313 = zext i544 %r312 to i576
+%r314 = zext i544 %r305 to i576
+%r315 = add i576 %r313, %r314
+%r316 = lshr i576 %r315, 32
+%r317 = trunc i576 %r316 to i544
+%r318 = lshr i544 %r317, 512
+%r319 = trunc i544 %r318 to i32
+%r320 = trunc i544 %r317 to i512
+%r321 = trunc i512 %r320 to i32
+%r322 = mul i32 %r321, %r6
+%r323 = call i544 @mulPv512x32(i32* %r3, i32 %r322)
+%r324 = zext i32 %r319 to i544
+%r325 = shl i544 %r324, 512
+%r326 = add i544 %r323, %r325
+%r328 = getelementptr i32, i32* %r2, i32 21
+%r329 = load i32, i32* %r328
+%r330 = zext i32 %r329 to i544
+%r331 = shl i544 %r330, 512
+%r332 = zext i512 %r320 to i544
+%r333 = or i544 %r331, %r332
+%r334 = zext i544 %r333 to i576
+%r335 = zext i544 %r326 to i576
+%r336 = add i576 %r334, %r335
+%r337 = lshr i576 %r336, 32
+%r338 = trunc i576 %r337 to i544
+%r339 = lshr i544 %r338, 512
+%r340 = trunc i544 %r339 to i32
+%r341 = trunc i544 %r338 to i512
+%r342 = trunc i512 %r341 to i32
+%r343 = mul i32 %r342, %r6
+%r344 = call i544 @mulPv512x32(i32* %r3, i32 %r343)
+%r345 = zext i32 %r340 to i544
+%r346 = shl i544 %r345, 512
+%r347 = add i544 %r344, %r346
+%r349 = getelementptr i32, i32* %r2, i32 22
+%r350 = load i32, i32* %r349
+%r351 = zext i32 %r350 to i544
+%r352 = shl i544 %r351, 512
+%r353 = zext i512 %r341 to i544
+%r354 = or i544 %r352, %r353
+%r355 = zext i544 %r354 to i576
+%r356 = zext i544 %r347 to i576
+%r357 = add i576 %r355, %r356
+%r358 = lshr i576 %r357, 32
+%r359 = trunc i576 %r358 to i544
+%r360 = lshr i544 %r359, 512
+%r361 = trunc i544 %r360 to i32
+%r362 = trunc i544 %r359 to i512
+%r363 = trunc i512 %r362 to i32
+%r364 = mul i32 %r363, %r6
+%r365 = call i544 @mulPv512x32(i32* %r3, i32 %r364)
+%r366 = zext i32 %r361 to i544
+%r367 = shl i544 %r366, 512
+%r368 = add i544 %r365, %r367
+%r370 = getelementptr i32, i32* %r2, i32 23
+%r371 = load i32, i32* %r370
+%r372 = zext i32 %r371 to i544
+%r373 = shl i544 %r372, 512
+%r374 = zext i512 %r362 to i544
+%r375 = or i544 %r373, %r374
+%r376 = zext i544 %r375 to i576
+%r377 = zext i544 %r368 to i576
+%r378 = add i576 %r376, %r377
+%r379 = lshr i576 %r378, 32
+%r380 = trunc i576 %r379 to i544
+%r381 = lshr i544 %r380, 512
+%r382 = trunc i544 %r381 to i32
+%r383 = trunc i544 %r380 to i512
+%r384 = trunc i512 %r383 to i32
+%r385 = mul i32 %r384, %r6
+%r386 = call i544 @mulPv512x32(i32* %r3, i32 %r385)
+%r387 = zext i32 %r382 to i544
+%r388 = shl i544 %r387, 512
+%r389 = add i544 %r386, %r388
+%r391 = getelementptr i32, i32* %r2, i32 24
+%r392 = load i32, i32* %r391
+%r393 = zext i32 %r392 to i544
+%r394 = shl i544 %r393, 512
+%r395 = zext i512 %r383 to i544
+%r396 = or i544 %r394, %r395
+%r397 = zext i544 %r396 to i576
+%r398 = zext i544 %r389 to i576
+%r399 = add i576 %r397, %r398
+%r400 = lshr i576 %r399, 32
+%r401 = trunc i576 %r400 to i544
+%r402 = lshr i544 %r401, 512
+%r403 = trunc i544 %r402 to i32
+%r404 = trunc i544 %r401 to i512
+%r405 = trunc i512 %r404 to i32
+%r406 = mul i32 %r405, %r6
+%r407 = call i544 @mulPv512x32(i32* %r3, i32 %r406)
+%r408 = zext i32 %r403 to i544
+%r409 = shl i544 %r408, 512
+%r410 = add i544 %r407, %r409
+%r412 = getelementptr i32, i32* %r2, i32 25
+%r413 = load i32, i32* %r412
+%r414 = zext i32 %r413 to i544
+%r415 = shl i544 %r414, 512
+%r416 = zext i512 %r404 to i544
+%r417 = or i544 %r415, %r416
+%r418 = zext i544 %r417 to i576
+%r419 = zext i544 %r410 to i576
+%r420 = add i576 %r418, %r419
+%r421 = lshr i576 %r420, 32
+%r422 = trunc i576 %r421 to i544
+%r423 = lshr i544 %r422, 512
+%r424 = trunc i544 %r423 to i32
+%r425 = trunc i544 %r422 to i512
+%r426 = trunc i512 %r425 to i32
+%r427 = mul i32 %r426, %r6
+%r428 = call i544 @mulPv512x32(i32* %r3, i32 %r427)
+%r429 = zext i32 %r424 to i544
+%r430 = shl i544 %r429, 512
+%r431 = add i544 %r428, %r430
+%r433 = getelementptr i32, i32* %r2, i32 26
+%r434 = load i32, i32* %r433
+%r435 = zext i32 %r434 to i544
+%r436 = shl i544 %r435, 512
+%r437 = zext i512 %r425 to i544
+%r438 = or i544 %r436, %r437
+%r439 = zext i544 %r438 to i576
+%r440 = zext i544 %r431 to i576
+%r441 = add i576 %r439, %r440
+%r442 = lshr i576 %r441, 32
+%r443 = trunc i576 %r442 to i544
+%r444 = lshr i544 %r443, 512
+%r445 = trunc i544 %r444 to i32
+%r446 = trunc i544 %r443 to i512
+%r447 = trunc i512 %r446 to i32
+%r448 = mul i32 %r447, %r6
+%r449 = call i544 @mulPv512x32(i32* %r3, i32 %r448)
+%r450 = zext i32 %r445 to i544
+%r451 = shl i544 %r450, 512
+%r452 = add i544 %r449, %r451
+%r454 = getelementptr i32, i32* %r2, i32 27
+%r455 = load i32, i32* %r454
+%r456 = zext i32 %r455 to i544
+%r457 = shl i544 %r456, 512
+%r458 = zext i512 %r446 to i544
+%r459 = or i544 %r457, %r458
+%r460 = zext i544 %r459 to i576
+%r461 = zext i544 %r452 to i576
+%r462 = add i576 %r460, %r461
+%r463 = lshr i576 %r462, 32
+%r464 = trunc i576 %r463 to i544
+%r465 = lshr i544 %r464, 512
+%r466 = trunc i544 %r465 to i32
+%r467 = trunc i544 %r464 to i512
+%r468 = trunc i512 %r467 to i32
+%r469 = mul i32 %r468, %r6
+%r470 = call i544 @mulPv512x32(i32* %r3, i32 %r469)
+%r471 = zext i32 %r466 to i544
+%r472 = shl i544 %r471, 512
+%r473 = add i544 %r470, %r472
+%r475 = getelementptr i32, i32* %r2, i32 28
+%r476 = load i32, i32* %r475
+%r477 = zext i32 %r476 to i544
+%r478 = shl i544 %r477, 512
+%r479 = zext i512 %r467 to i544
+%r480 = or i544 %r478, %r479
+%r481 = zext i544 %r480 to i576
+%r482 = zext i544 %r473 to i576
+%r483 = add i576 %r481, %r482
+%r484 = lshr i576 %r483, 32
+%r485 = trunc i576 %r484 to i544
+%r486 = lshr i544 %r485, 512
+%r487 = trunc i544 %r486 to i32
+%r488 = trunc i544 %r485 to i512
+%r489 = trunc i512 %r488 to i32
+%r490 = mul i32 %r489, %r6
+%r491 = call i544 @mulPv512x32(i32* %r3, i32 %r490)
+%r492 = zext i32 %r487 to i544
+%r493 = shl i544 %r492, 512
+%r494 = add i544 %r491, %r493
+%r496 = getelementptr i32, i32* %r2, i32 29
+%r497 = load i32, i32* %r496
+%r498 = zext i32 %r497 to i544
+%r499 = shl i544 %r498, 512
+%r500 = zext i512 %r488 to i544
+%r501 = or i544 %r499, %r500
+%r502 = zext i544 %r501 to i576
+%r503 = zext i544 %r494 to i576
+%r504 = add i576 %r502, %r503
+%r505 = lshr i576 %r504, 32
+%r506 = trunc i576 %r505 to i544
+%r507 = lshr i544 %r506, 512
+%r508 = trunc i544 %r507 to i32
+%r509 = trunc i544 %r506 to i512
+%r510 = trunc i512 %r509 to i32
+%r511 = mul i32 %r510, %r6
+%r512 = call i544 @mulPv512x32(i32* %r3, i32 %r511)
+%r513 = zext i32 %r508 to i544
+%r514 = shl i544 %r513, 512
+%r515 = add i544 %r512, %r514
+%r517 = getelementptr i32, i32* %r2, i32 30
+%r518 = load i32, i32* %r517
+%r519 = zext i32 %r518 to i544
+%r520 = shl i544 %r519, 512
+%r521 = zext i512 %r509 to i544
+%r522 = or i544 %r520, %r521
+%r523 = zext i544 %r522 to i576
+%r524 = zext i544 %r515 to i576
+%r525 = add i576 %r523, %r524
+%r526 = lshr i576 %r525, 32
+%r527 = trunc i576 %r526 to i544
+%r528 = lshr i544 %r527, 512
+%r529 = trunc i544 %r528 to i32
+%r530 = trunc i544 %r527 to i512
+%r531 = trunc i512 %r530 to i32
+%r532 = mul i32 %r531, %r6
+%r533 = call i544 @mulPv512x32(i32* %r3, i32 %r532)
+%r534 = zext i32 %r529 to i544
+%r535 = shl i544 %r534, 512
+%r536 = add i544 %r533, %r535
+%r538 = getelementptr i32, i32* %r2, i32 31
+%r539 = load i32, i32* %r538
+%r540 = zext i32 %r539 to i544
+%r541 = shl i544 %r540, 512
+%r542 = zext i512 %r530 to i544
+%r543 = or i544 %r541, %r542
+%r544 = zext i544 %r543 to i576
+%r545 = zext i544 %r536 to i576
+%r546 = add i576 %r544, %r545
+%r547 = lshr i576 %r546, 32
+%r548 = trunc i576 %r547 to i544
+%r549 = lshr i544 %r548, 512
+%r550 = trunc i544 %r549 to i32
+%r551 = trunc i544 %r548 to i512
+%r552 = zext i512 %r112 to i544
+%r553 = zext i512 %r551 to i544
+%r554 = sub i544 %r553, %r552
+%r555 = lshr i544 %r554, 512
+%r556 = trunc i544 %r555 to i1
+%r557 = select i1 %r556, i544 %r553, i544 %r554
+%r558 = trunc i544 %r557 to i512
+%r560 = getelementptr i32, i32* %r1, i32 0
+%r561 = trunc i512 %r558 to i32
+store i32 %r561, i32* %r560
+%r562 = lshr i512 %r558, 32
+%r564 = getelementptr i32, i32* %r1, i32 1
+%r565 = trunc i512 %r562 to i32
+store i32 %r565, i32* %r564
+%r566 = lshr i512 %r562, 32
+%r568 = getelementptr i32, i32* %r1, i32 2
+%r569 = trunc i512 %r566 to i32
+store i32 %r569, i32* %r568
+%r570 = lshr i512 %r566, 32
+%r572 = getelementptr i32, i32* %r1, i32 3
+%r573 = trunc i512 %r570 to i32
+store i32 %r573, i32* %r572
+%r574 = lshr i512 %r570, 32
+%r576 = getelementptr i32, i32* %r1, i32 4
+%r577 = trunc i512 %r574 to i32
+store i32 %r577, i32* %r576
+%r578 = lshr i512 %r574, 32
+%r580 = getelementptr i32, i32* %r1, i32 5
+%r581 = trunc i512 %r578 to i32
+store i32 %r581, i32* %r580
+%r582 = lshr i512 %r578, 32
+%r584 = getelementptr i32, i32* %r1, i32 6
+%r585 = trunc i512 %r582 to i32
+store i32 %r585, i32* %r584
+%r586 = lshr i512 %r582, 32
+%r588 = getelementptr i32, i32* %r1, i32 7
+%r589 = trunc i512 %r586 to i32
+store i32 %r589, i32* %r588
+%r590 = lshr i512 %r586, 32
+%r592 = getelementptr i32, i32* %r1, i32 8
+%r593 = trunc i512 %r590 to i32
+store i32 %r593, i32* %r592
+%r594 = lshr i512 %r590, 32
+%r596 = getelementptr i32, i32* %r1, i32 9
+%r597 = trunc i512 %r594 to i32
+store i32 %r597, i32* %r596
+%r598 = lshr i512 %r594, 32
+%r600 = getelementptr i32, i32* %r1, i32 10
+%r601 = trunc i512 %r598 to i32
+store i32 %r601, i32* %r600
+%r602 = lshr i512 %r598, 32
+%r604 = getelementptr i32, i32* %r1, i32 11
+%r605 = trunc i512 %r602 to i32
+store i32 %r605, i32* %r604
+%r606 = lshr i512 %r602, 32
+%r608 = getelementptr i32, i32* %r1, i32 12
+%r609 = trunc i512 %r606 to i32
+store i32 %r609, i32* %r608
+%r610 = lshr i512 %r606, 32
+%r612 = getelementptr i32, i32* %r1, i32 13
+%r613 = trunc i512 %r610 to i32
+store i32 %r613, i32* %r612
+%r614 = lshr i512 %r610, 32
+%r616 = getelementptr i32, i32* %r1, i32 14
+%r617 = trunc i512 %r614 to i32
+store i32 %r617, i32* %r616
+%r618 = lshr i512 %r614, 32
+%r620 = getelementptr i32, i32* %r1, i32 15
+%r621 = trunc i512 %r618 to i32
+store i32 %r621, i32* %r620
+ret void
+}
+define void @mcl_fp_montRedNF16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3)
 {
 %r5 = getelementptr i32, i32* %r3, i32 -1
 %r6 = load i32, i32* %r5
@@ -50235,407 +16584,484 @@ define void @mcl_fp_montRed17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalia
 %r110 = zext i32 %r109 to i512
 %r111 = shl i512 %r110, 480
 %r112 = or i512 %r106, %r111
-%r113 = zext i512 %r112 to i544
-%r115 = getelementptr i32, i32* %r3, i32 16
-%r116 = load i32, i32* %r115
-%r117 = zext i32 %r116 to i544
-%r118 = shl i544 %r117, 512
-%r119 = or i544 %r113, %r118
-%r120 = load i32, i32* %r2
-%r121 = zext i32 %r120 to i64
-%r123 = getelementptr i32, i32* %r2, i32 1
+%r113 = load i32, i32* %r2
+%r114 = zext i32 %r113 to i64
+%r116 = getelementptr i32, i32* %r2, i32 1
+%r117 = load i32, i32* %r116
+%r118 = zext i32 %r117 to i64
+%r119 = shl i64 %r118, 32
+%r120 = or i64 %r114, %r119
+%r121 = zext i64 %r120 to i96
+%r123 = getelementptr i32, i32* %r2, i32 2
 %r124 = load i32, i32* %r123
-%r125 = zext i32 %r124 to i64
-%r126 = shl i64 %r125, 32
-%r127 = or i64 %r121, %r126
-%r128 = zext i64 %r127 to i96
-%r130 = getelementptr i32, i32* %r2, i32 2
+%r125 = zext i32 %r124 to i96
+%r126 = shl i96 %r125, 64
+%r127 = or i96 %r121, %r126
+%r128 = zext i96 %r127 to i128
+%r130 = getelementptr i32, i32* %r2, i32 3
 %r131 = load i32, i32* %r130
-%r132 = zext i32 %r131 to i96
-%r133 = shl i96 %r132, 64
-%r134 = or i96 %r128, %r133
-%r135 = zext i96 %r134 to i128
-%r137 = getelementptr i32, i32* %r2, i32 3
+%r132 = zext i32 %r131 to i128
+%r133 = shl i128 %r132, 96
+%r134 = or i128 %r128, %r133
+%r135 = zext i128 %r134 to i160
+%r137 = getelementptr i32, i32* %r2, i32 4
 %r138 = load i32, i32* %r137
-%r139 = zext i32 %r138 to i128
-%r140 = shl i128 %r139, 96
-%r141 = or i128 %r135, %r140
-%r142 = zext i128 %r141 to i160
-%r144 = getelementptr i32, i32* %r2, i32 4
+%r139 = zext i32 %r138 to i160
+%r140 = shl i160 %r139, 128
+%r141 = or i160 %r135, %r140
+%r142 = zext i160 %r141 to i192
+%r144 = getelementptr i32, i32* %r2, i32 5
 %r145 = load i32, i32* %r144
-%r146 = zext i32 %r145 to i160
-%r147 = shl i160 %r146, 128
-%r148 = or i160 %r142, %r147
-%r149 = zext i160 %r148 to i192
-%r151 = getelementptr i32, i32* %r2, i32 5
+%r146 = zext i32 %r145 to i192
+%r147 = shl i192 %r146, 160
+%r148 = or i192 %r142, %r147
+%r149 = zext i192 %r148 to i224
+%r151 = getelementptr i32, i32* %r2, i32 6
 %r152 = load i32, i32* %r151
-%r153 = zext i32 %r152 to i192
-%r154 = shl i192 %r153, 160
-%r155 = or i192 %r149, %r154
-%r156 = zext i192 %r155 to i224
-%r158 = getelementptr i32, i32* %r2, i32 6
+%r153 = zext i32 %r152 to i224
+%r154 = shl i224 %r153, 192
+%r155 = or i224 %r149, %r154
+%r156 = zext i224 %r155 to i256
+%r158 = getelementptr i32, i32* %r2, i32 7
 %r159 = load i32, i32* %r158
-%r160 = zext i32 %r159 to i224
-%r161 = shl i224 %r160, 192
-%r162 = or i224 %r156, %r161
-%r163 = zext i224 %r162 to i256
-%r165 = getelementptr i32, i32* %r2, i32 7
+%r160 = zext i32 %r159 to i256
+%r161 = shl i256 %r160, 224
+%r162 = or i256 %r156, %r161
+%r163 = zext i256 %r162 to i288
+%r165 = getelementptr i32, i32* %r2, i32 8
 %r166 = load i32, i32* %r165
-%r167 = zext i32 %r166 to i256
-%r168 = shl i256 %r167, 224
-%r169 = or i256 %r163, %r168
-%r170 = zext i256 %r169 to i288
-%r172 = getelementptr i32, i32* %r2, i32 8
+%r167 = zext i32 %r166 to i288
+%r168 = shl i288 %r167, 256
+%r169 = or i288 %r163, %r168
+%r170 = zext i288 %r169 to i320
+%r172 = getelementptr i32, i32* %r2, i32 9
 %r173 = load i32, i32* %r172
-%r174 = zext i32 %r173 to i288
-%r175 = shl i288 %r174, 256
-%r176 = or i288 %r170, %r175
-%r177 = zext i288 %r176 to i320
-%r179 = getelementptr i32, i32* %r2, i32 9
+%r174 = zext i32 %r173 to i320
+%r175 = shl i320 %r174, 288
+%r176 = or i320 %r170, %r175
+%r177 = zext i320 %r176 to i352
+%r179 = getelementptr i32, i32* %r2, i32 10
 %r180 = load i32, i32* %r179
-%r181 = zext i32 %r180 to i320
-%r182 = shl i320 %r181, 288
-%r183 = or i320 %r177, %r182
-%r184 = zext i320 %r183 to i352
-%r186 = getelementptr i32, i32* %r2, i32 10
+%r181 = zext i32 %r180 to i352
+%r182 = shl i352 %r181, 320
+%r183 = or i352 %r177, %r182
+%r184 = zext i352 %r183 to i384
+%r186 = getelementptr i32, i32* %r2, i32 11
 %r187 = load i32, i32* %r186
-%r188 = zext i32 %r187 to i352
-%r189 = shl i352 %r188, 320
-%r190 = or i352 %r184, %r189
-%r191 = zext i352 %r190 to i384
-%r193 = getelementptr i32, i32* %r2, i32 11
+%r188 = zext i32 %r187 to i384
+%r189 = shl i384 %r188, 352
+%r190 = or i384 %r184, %r189
+%r191 = zext i384 %r190 to i416
+%r193 = getelementptr i32, i32* %r2, i32 12
 %r194 = load i32, i32* %r193
-%r195 = zext i32 %r194 to i384
-%r196 = shl i384 %r195, 352
-%r197 = or i384 %r191, %r196
-%r198 = zext i384 %r197 to i416
-%r200 = getelementptr i32, i32* %r2, i32 12
+%r195 = zext i32 %r194 to i416
+%r196 = shl i416 %r195, 384
+%r197 = or i416 %r191, %r196
+%r198 = zext i416 %r197 to i448
+%r200 = getelementptr i32, i32* %r2, i32 13
 %r201 = load i32, i32* %r200
-%r202 = zext i32 %r201 to i416
-%r203 = shl i416 %r202, 384
-%r204 = or i416 %r198, %r203
-%r205 = zext i416 %r204 to i448
-%r207 = getelementptr i32, i32* %r2, i32 13
+%r202 = zext i32 %r201 to i448
+%r203 = shl i448 %r202, 416
+%r204 = or i448 %r198, %r203
+%r205 = zext i448 %r204 to i480
+%r207 = getelementptr i32, i32* %r2, i32 14
 %r208 = load i32, i32* %r207
-%r209 = zext i32 %r208 to i448
-%r210 = shl i448 %r209, 416
-%r211 = or i448 %r205, %r210
-%r212 = zext i448 %r211 to i480
-%r214 = getelementptr i32, i32* %r2, i32 14
+%r209 = zext i32 %r208 to i480
+%r210 = shl i480 %r209, 448
+%r211 = or i480 %r205, %r210
+%r212 = zext i480 %r211 to i512
+%r214 = getelementptr i32, i32* %r2, i32 15
 %r215 = load i32, i32* %r214
-%r216 = zext i32 %r215 to i480
-%r217 = shl i480 %r216, 448
-%r218 = or i480 %r212, %r217
-%r219 = zext i480 %r218 to i512
-%r221 = getelementptr i32, i32* %r2, i32 15
-%r222 = load i32, i32* %r221
-%r223 = zext i32 %r222 to i512
-%r224 = shl i512 %r223, 480
-%r225 = or i512 %r219, %r224
-%r226 = zext i512 %r225 to i544
-%r228 = getelementptr i32, i32* %r2, i32 16
-%r229 = load i32, i32* %r228
-%r230 = zext i32 %r229 to i544
-%r231 = shl i544 %r230, 512
-%r232 = or i544 %r226, %r231
-%r233 = zext i544 %r232 to i576
-%r235 = getelementptr i32, i32* %r2, i32 17
-%r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i576
-%r238 = shl i576 %r237, 544
-%r239 = or i576 %r233, %r238
-%r240 = zext i576 %r239 to i608
-%r242 = getelementptr i32, i32* %r2, i32 18
-%r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i608
-%r245 = shl i608 %r244, 576
-%r246 = or i608 %r240, %r245
-%r247 = zext i608 %r246 to i640
-%r249 = getelementptr i32, i32* %r2, i32 19
-%r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i640
-%r252 = shl i640 %r251, 608
-%r253 = or i640 %r247, %r252
-%r254 = zext i640 %r253 to i672
-%r256 = getelementptr i32, i32* %r2, i32 20
-%r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i672
-%r259 = shl i672 %r258, 640
-%r260 = or i672 %r254, %r259
-%r261 = zext i672 %r260 to i704
-%r263 = getelementptr i32, i32* %r2, i32 21
-%r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i704
-%r266 = shl i704 %r265, 672
-%r267 = or i704 %r261, %r266
-%r268 = zext i704 %r267 to i736
-%r270 = getelementptr i32, i32* %r2, i32 22
-%r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i736
-%r273 = shl i736 %r272, 704
-%r274 = or i736 %r268, %r273
-%r275 = zext i736 %r274 to i768
-%r277 = getelementptr i32, i32* %r2, i32 23
-%r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i768
-%r280 = shl i768 %r279, 736
-%r281 = or i768 %r275, %r280
-%r282 = zext i768 %r281 to i800
-%r284 = getelementptr i32, i32* %r2, i32 24
-%r285 = load i32, i32* %r284
-%r286 = zext i32 %r285 to i800
-%r287 = shl i800 %r286, 768
-%r288 = or i800 %r282, %r287
-%r289 = zext i800 %r288 to i832
-%r291 = getelementptr i32, i32* %r2, i32 25
-%r292 = load i32, i32* %r291
-%r293 = zext i32 %r292 to i832
-%r294 = shl i832 %r293, 800
-%r295 = or i832 %r289, %r294
-%r296 = zext i832 %r295 to i864
-%r298 = getelementptr i32, i32* %r2, i32 26
-%r299 = load i32, i32* %r298
-%r300 = zext i32 %r299 to i864
-%r301 = shl i864 %r300, 832
-%r302 = or i864 %r296, %r301
-%r303 = zext i864 %r302 to i896
-%r305 = getelementptr i32, i32* %r2, i32 27
-%r306 = load i32, i32* %r305
-%r307 = zext i32 %r306 to i896
-%r308 = shl i896 %r307, 864
-%r309 = or i896 %r303, %r308
-%r310 = zext i896 %r309 to i928
-%r312 = getelementptr i32, i32* %r2, i32 28
-%r313 = load i32, i32* %r312
-%r314 = zext i32 %r313 to i928
-%r315 = shl i928 %r314, 896
-%r316 = or i928 %r310, %r315
-%r317 = zext i928 %r316 to i960
-%r319 = getelementptr i32, i32* %r2, i32 29
-%r320 = load i32, i32* %r319
-%r321 = zext i32 %r320 to i960
-%r322 = shl i960 %r321, 928
-%r323 = or i960 %r317, %r322
-%r324 = zext i960 %r323 to i992
-%r326 = getelementptr i32, i32* %r2, i32 30
-%r327 = load i32, i32* %r326
-%r328 = zext i32 %r327 to i992
-%r329 = shl i992 %r328, 960
-%r330 = or i992 %r324, %r329
-%r331 = zext i992 %r330 to i1024
-%r333 = getelementptr i32, i32* %r2, i32 31
-%r334 = load i32, i32* %r333
-%r335 = zext i32 %r334 to i1024
-%r336 = shl i1024 %r335, 992
-%r337 = or i1024 %r331, %r336
-%r338 = zext i1024 %r337 to i1056
-%r340 = getelementptr i32, i32* %r2, i32 32
-%r341 = load i32, i32* %r340
-%r342 = zext i32 %r341 to i1056
-%r343 = shl i1056 %r342, 1024
-%r344 = or i1056 %r338, %r343
-%r345 = zext i1056 %r344 to i1088
-%r347 = getelementptr i32, i32* %r2, i32 33
-%r348 = load i32, i32* %r347
-%r349 = zext i32 %r348 to i1088
-%r350 = shl i1088 %r349, 1056
-%r351 = or i1088 %r345, %r350
-%r352 = zext i1088 %r351 to i1120
-%r353 = trunc i1120 %r352 to i32
-%r354 = mul i32 %r353, %r6
-%r355 = call i576 @mulPv544x32(i32* %r3, i32 %r354)
-%r356 = zext i576 %r355 to i1120
-%r357 = add i1120 %r352, %r356
-%r358 = lshr i1120 %r357, 32
-%r359 = trunc i1120 %r358 to i1088
-%r360 = trunc i1088 %r359 to i32
-%r361 = mul i32 %r360, %r6
-%r362 = call i576 @mulPv544x32(i32* %r3, i32 %r361)
-%r363 = zext i576 %r362 to i1088
-%r364 = add i1088 %r359, %r363
-%r365 = lshr i1088 %r364, 32
-%r366 = trunc i1088 %r365 to i1056
-%r367 = trunc i1056 %r366 to i32
-%r368 = mul i32 %r367, %r6
-%r369 = call i576 @mulPv544x32(i32* %r3, i32 %r368)
-%r370 = zext i576 %r369 to i1056
-%r371 = add i1056 %r366, %r370
-%r372 = lshr i1056 %r371, 32
-%r373 = trunc i1056 %r372 to i1024
-%r374 = trunc i1024 %r373 to i32
-%r375 = mul i32 %r374, %r6
-%r376 = call i576 @mulPv544x32(i32* %r3, i32 %r375)
-%r377 = zext i576 %r376 to i1024
-%r378 = add i1024 %r373, %r377
-%r379 = lshr i1024 %r378, 32
-%r380 = trunc i1024 %r379 to i992
-%r381 = trunc i992 %r380 to i32
-%r382 = mul i32 %r381, %r6
-%r383 = call i576 @mulPv544x32(i32* %r3, i32 %r382)
-%r384 = zext i576 %r383 to i992
-%r385 = add i992 %r380, %r384
-%r386 = lshr i992 %r385, 32
-%r387 = trunc i992 %r386 to i960
-%r388 = trunc i960 %r387 to i32
-%r389 = mul i32 %r388, %r6
-%r390 = call i576 @mulPv544x32(i32* %r3, i32 %r389)
-%r391 = zext i576 %r390 to i960
-%r392 = add i960 %r387, %r391
-%r393 = lshr i960 %r392, 32
-%r394 = trunc i960 %r393 to i928
-%r395 = trunc i928 %r394 to i32
-%r396 = mul i32 %r395, %r6
-%r397 = call i576 @mulPv544x32(i32* %r3, i32 %r396)
-%r398 = zext i576 %r397 to i928
-%r399 = add i928 %r394, %r398
-%r400 = lshr i928 %r399, 32
-%r401 = trunc i928 %r400 to i896
-%r402 = trunc i896 %r401 to i32
-%r403 = mul i32 %r402, %r6
-%r404 = call i576 @mulPv544x32(i32* %r3, i32 %r403)
-%r405 = zext i576 %r404 to i896
-%r406 = add i896 %r401, %r405
-%r407 = lshr i896 %r406, 32
-%r408 = trunc i896 %r407 to i864
-%r409 = trunc i864 %r408 to i32
-%r410 = mul i32 %r409, %r6
-%r411 = call i576 @mulPv544x32(i32* %r3, i32 %r410)
-%r412 = zext i576 %r411 to i864
-%r413 = add i864 %r408, %r412
-%r414 = lshr i864 %r413, 32
-%r415 = trunc i864 %r414 to i832
-%r416 = trunc i832 %r415 to i32
-%r417 = mul i32 %r416, %r6
-%r418 = call i576 @mulPv544x32(i32* %r3, i32 %r417)
-%r419 = zext i576 %r418 to i832
-%r420 = add i832 %r415, %r419
-%r421 = lshr i832 %r420, 32
-%r422 = trunc i832 %r421 to i800
-%r423 = trunc i800 %r422 to i32
-%r424 = mul i32 %r423, %r6
-%r425 = call i576 @mulPv544x32(i32* %r3, i32 %r424)
-%r426 = zext i576 %r425 to i800
-%r427 = add i800 %r422, %r426
-%r428 = lshr i800 %r427, 32
-%r429 = trunc i800 %r428 to i768
-%r430 = trunc i768 %r429 to i32
-%r431 = mul i32 %r430, %r6
-%r432 = call i576 @mulPv544x32(i32* %r3, i32 %r431)
-%r433 = zext i576 %r432 to i768
-%r434 = add i768 %r429, %r433
-%r435 = lshr i768 %r434, 32
-%r436 = trunc i768 %r435 to i736
-%r437 = trunc i736 %r436 to i32
-%r438 = mul i32 %r437, %r6
-%r439 = call i576 @mulPv544x32(i32* %r3, i32 %r438)
-%r440 = zext i576 %r439 to i736
-%r441 = add i736 %r436, %r440
-%r442 = lshr i736 %r441, 32
-%r443 = trunc i736 %r442 to i704
-%r444 = trunc i704 %r443 to i32
-%r445 = mul i32 %r444, %r6
-%r446 = call i576 @mulPv544x32(i32* %r3, i32 %r445)
-%r447 = zext i576 %r446 to i704
-%r448 = add i704 %r443, %r447
-%r449 = lshr i704 %r448, 32
-%r450 = trunc i704 %r449 to i672
-%r451 = trunc i672 %r450 to i32
-%r452 = mul i32 %r451, %r6
-%r453 = call i576 @mulPv544x32(i32* %r3, i32 %r452)
-%r454 = zext i576 %r453 to i672
-%r455 = add i672 %r450, %r454
-%r456 = lshr i672 %r455, 32
-%r457 = trunc i672 %r456 to i640
-%r458 = trunc i640 %r457 to i32
-%r459 = mul i32 %r458, %r6
-%r460 = call i576 @mulPv544x32(i32* %r3, i32 %r459)
-%r461 = zext i576 %r460 to i640
-%r462 = add i640 %r457, %r461
-%r463 = lshr i640 %r462, 32
-%r464 = trunc i640 %r463 to i608
-%r465 = trunc i608 %r464 to i32
-%r466 = mul i32 %r465, %r6
-%r467 = call i576 @mulPv544x32(i32* %r3, i32 %r466)
-%r468 = zext i576 %r467 to i608
-%r469 = add i608 %r464, %r468
-%r470 = lshr i608 %r469, 32
-%r471 = trunc i608 %r470 to i576
-%r472 = zext i544 %r119 to i576
-%r473 = sub i576 %r471, %r472
-%r474 = lshr i576 %r473, 544
-%r475 = trunc i576 %r474 to i1
-%r476 = select i1 %r475, i576 %r471, i576 %r473
-%r477 = trunc i576 %r476 to i544
-%r478 = trunc i544 %r477 to i32
-%r480 = getelementptr i32, i32* %r1, i32 0
-store i32 %r478, i32* %r480
-%r481 = lshr i544 %r477, 32
-%r482 = trunc i544 %r481 to i32
-%r484 = getelementptr i32, i32* %r1, i32 1
-store i32 %r482, i32* %r484
-%r485 = lshr i544 %r481, 32
-%r486 = trunc i544 %r485 to i32
-%r488 = getelementptr i32, i32* %r1, i32 2
-store i32 %r486, i32* %r488
-%r489 = lshr i544 %r485, 32
-%r490 = trunc i544 %r489 to i32
-%r492 = getelementptr i32, i32* %r1, i32 3
-store i32 %r490, i32* %r492
-%r493 = lshr i544 %r489, 32
-%r494 = trunc i544 %r493 to i32
-%r496 = getelementptr i32, i32* %r1, i32 4
-store i32 %r494, i32* %r496
-%r497 = lshr i544 %r493, 32
-%r498 = trunc i544 %r497 to i32
-%r500 = getelementptr i32, i32* %r1, i32 5
-store i32 %r498, i32* %r500
-%r501 = lshr i544 %r497, 32
-%r502 = trunc i544 %r501 to i32
-%r504 = getelementptr i32, i32* %r1, i32 6
-store i32 %r502, i32* %r504
-%r505 = lshr i544 %r501, 32
-%r506 = trunc i544 %r505 to i32
-%r508 = getelementptr i32, i32* %r1, i32 7
-store i32 %r506, i32* %r508
-%r509 = lshr i544 %r505, 32
-%r510 = trunc i544 %r509 to i32
-%r512 = getelementptr i32, i32* %r1, i32 8
-store i32 %r510, i32* %r512
-%r513 = lshr i544 %r509, 32
-%r514 = trunc i544 %r513 to i32
-%r516 = getelementptr i32, i32* %r1, i32 9
-store i32 %r514, i32* %r516
-%r517 = lshr i544 %r513, 32
-%r518 = trunc i544 %r517 to i32
-%r520 = getelementptr i32, i32* %r1, i32 10
-store i32 %r518, i32* %r520
-%r521 = lshr i544 %r517, 32
-%r522 = trunc i544 %r521 to i32
-%r524 = getelementptr i32, i32* %r1, i32 11
-store i32 %r522, i32* %r524
-%r525 = lshr i544 %r521, 32
-%r526 = trunc i544 %r525 to i32
-%r528 = getelementptr i32, i32* %r1, i32 12
-store i32 %r526, i32* %r528
-%r529 = lshr i544 %r525, 32
-%r530 = trunc i544 %r529 to i32
-%r532 = getelementptr i32, i32* %r1, i32 13
-store i32 %r530, i32* %r532
-%r533 = lshr i544 %r529, 32
-%r534 = trunc i544 %r533 to i32
-%r536 = getelementptr i32, i32* %r1, i32 14
-store i32 %r534, i32* %r536
-%r537 = lshr i544 %r533, 32
-%r538 = trunc i544 %r537 to i32
-%r540 = getelementptr i32, i32* %r1, i32 15
-store i32 %r538, i32* %r540
-%r541 = lshr i544 %r537, 32
-%r542 = trunc i544 %r541 to i32
-%r544 = getelementptr i32, i32* %r1, i32 16
-store i32 %r542, i32* %r544
+%r216 = zext i32 %r215 to i512
+%r217 = shl i512 %r216, 480
+%r218 = or i512 %r212, %r217
+%r219 = trunc i512 %r218 to i32
+%r220 = mul i32 %r219, %r6
+%r221 = call i544 @mulPv512x32(i32* %r3, i32 %r220)
+%r223 = getelementptr i32, i32* %r2, i32 16
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i544
+%r226 = shl i544 %r225, 512
+%r227 = zext i512 %r218 to i544
+%r228 = or i544 %r226, %r227
+%r229 = zext i544 %r228 to i576
+%r230 = zext i544 %r221 to i576
+%r231 = add i576 %r229, %r230
+%r232 = lshr i576 %r231, 32
+%r233 = trunc i576 %r232 to i544
+%r234 = lshr i544 %r233, 512
+%r235 = trunc i544 %r234 to i32
+%r236 = trunc i544 %r233 to i512
+%r237 = trunc i512 %r236 to i32
+%r238 = mul i32 %r237, %r6
+%r239 = call i544 @mulPv512x32(i32* %r3, i32 %r238)
+%r240 = zext i32 %r235 to i544
+%r241 = shl i544 %r240, 512
+%r242 = add i544 %r239, %r241
+%r244 = getelementptr i32, i32* %r2, i32 17
+%r245 = load i32, i32* %r244
+%r246 = zext i32 %r245 to i544
+%r247 = shl i544 %r246, 512
+%r248 = zext i512 %r236 to i544
+%r249 = or i544 %r247, %r248
+%r250 = zext i544 %r249 to i576
+%r251 = zext i544 %r242 to i576
+%r252 = add i576 %r250, %r251
+%r253 = lshr i576 %r252, 32
+%r254 = trunc i576 %r253 to i544
+%r255 = lshr i544 %r254, 512
+%r256 = trunc i544 %r255 to i32
+%r257 = trunc i544 %r254 to i512
+%r258 = trunc i512 %r257 to i32
+%r259 = mul i32 %r258, %r6
+%r260 = call i544 @mulPv512x32(i32* %r3, i32 %r259)
+%r261 = zext i32 %r256 to i544
+%r262 = shl i544 %r261, 512
+%r263 = add i544 %r260, %r262
+%r265 = getelementptr i32, i32* %r2, i32 18
+%r266 = load i32, i32* %r265
+%r267 = zext i32 %r266 to i544
+%r268 = shl i544 %r267, 512
+%r269 = zext i512 %r257 to i544
+%r270 = or i544 %r268, %r269
+%r271 = zext i544 %r270 to i576
+%r272 = zext i544 %r263 to i576
+%r273 = add i576 %r271, %r272
+%r274 = lshr i576 %r273, 32
+%r275 = trunc i576 %r274 to i544
+%r276 = lshr i544 %r275, 512
+%r277 = trunc i544 %r276 to i32
+%r278 = trunc i544 %r275 to i512
+%r279 = trunc i512 %r278 to i32
+%r280 = mul i32 %r279, %r6
+%r281 = call i544 @mulPv512x32(i32* %r3, i32 %r280)
+%r282 = zext i32 %r277 to i544
+%r283 = shl i544 %r282, 512
+%r284 = add i544 %r281, %r283
+%r286 = getelementptr i32, i32* %r2, i32 19
+%r287 = load i32, i32* %r286
+%r288 = zext i32 %r287 to i544
+%r289 = shl i544 %r288, 512
+%r290 = zext i512 %r278 to i544
+%r291 = or i544 %r289, %r290
+%r292 = zext i544 %r291 to i576
+%r293 = zext i544 %r284 to i576
+%r294 = add i576 %r292, %r293
+%r295 = lshr i576 %r294, 32
+%r296 = trunc i576 %r295 to i544
+%r297 = lshr i544 %r296, 512
+%r298 = trunc i544 %r297 to i32
+%r299 = trunc i544 %r296 to i512
+%r300 = trunc i512 %r299 to i32
+%r301 = mul i32 %r300, %r6
+%r302 = call i544 @mulPv512x32(i32* %r3, i32 %r301)
+%r303 = zext i32 %r298 to i544
+%r304 = shl i544 %r303, 512
+%r305 = add i544 %r302, %r304
+%r307 = getelementptr i32, i32* %r2, i32 20
+%r308 = load i32, i32* %r307
+%r309 = zext i32 %r308 to i544
+%r310 = shl i544 %r309, 512
+%r311 = zext i512 %r299 to i544
+%r312 = or i544 %r310, %r311
+%r313 = zext i544 %r312 to i576
+%r314 = zext i544 %r305 to i576
+%r315 = add i576 %r313, %r314
+%r316 = lshr i576 %r315, 32
+%r317 = trunc i576 %r316 to i544
+%r318 = lshr i544 %r317, 512
+%r319 = trunc i544 %r318 to i32
+%r320 = trunc i544 %r317 to i512
+%r321 = trunc i512 %r320 to i32
+%r322 = mul i32 %r321, %r6
+%r323 = call i544 @mulPv512x32(i32* %r3, i32 %r322)
+%r324 = zext i32 %r319 to i544
+%r325 = shl i544 %r324, 512
+%r326 = add i544 %r323, %r325
+%r328 = getelementptr i32, i32* %r2, i32 21
+%r329 = load i32, i32* %r328
+%r330 = zext i32 %r329 to i544
+%r331 = shl i544 %r330, 512
+%r332 = zext i512 %r320 to i544
+%r333 = or i544 %r331, %r332
+%r334 = zext i544 %r333 to i576
+%r335 = zext i544 %r326 to i576
+%r336 = add i576 %r334, %r335
+%r337 = lshr i576 %r336, 32
+%r338 = trunc i576 %r337 to i544
+%r339 = lshr i544 %r338, 512
+%r340 = trunc i544 %r339 to i32
+%r341 = trunc i544 %r338 to i512
+%r342 = trunc i512 %r341 to i32
+%r343 = mul i32 %r342, %r6
+%r344 = call i544 @mulPv512x32(i32* %r3, i32 %r343)
+%r345 = zext i32 %r340 to i544
+%r346 = shl i544 %r345, 512
+%r347 = add i544 %r344, %r346
+%r349 = getelementptr i32, i32* %r2, i32 22
+%r350 = load i32, i32* %r349
+%r351 = zext i32 %r350 to i544
+%r352 = shl i544 %r351, 512
+%r353 = zext i512 %r341 to i544
+%r354 = or i544 %r352, %r353
+%r355 = zext i544 %r354 to i576
+%r356 = zext i544 %r347 to i576
+%r357 = add i576 %r355, %r356
+%r358 = lshr i576 %r357, 32
+%r359 = trunc i576 %r358 to i544
+%r360 = lshr i544 %r359, 512
+%r361 = trunc i544 %r360 to i32
+%r362 = trunc i544 %r359 to i512
+%r363 = trunc i512 %r362 to i32
+%r364 = mul i32 %r363, %r6
+%r365 = call i544 @mulPv512x32(i32* %r3, i32 %r364)
+%r366 = zext i32 %r361 to i544
+%r367 = shl i544 %r366, 512
+%r368 = add i544 %r365, %r367
+%r370 = getelementptr i32, i32* %r2, i32 23
+%r371 = load i32, i32* %r370
+%r372 = zext i32 %r371 to i544
+%r373 = shl i544 %r372, 512
+%r374 = zext i512 %r362 to i544
+%r375 = or i544 %r373, %r374
+%r376 = zext i544 %r375 to i576
+%r377 = zext i544 %r368 to i576
+%r378 = add i576 %r376, %r377
+%r379 = lshr i576 %r378, 32
+%r380 = trunc i576 %r379 to i544
+%r381 = lshr i544 %r380, 512
+%r382 = trunc i544 %r381 to i32
+%r383 = trunc i544 %r380 to i512
+%r384 = trunc i512 %r383 to i32
+%r385 = mul i32 %r384, %r6
+%r386 = call i544 @mulPv512x32(i32* %r3, i32 %r385)
+%r387 = zext i32 %r382 to i544
+%r388 = shl i544 %r387, 512
+%r389 = add i544 %r386, %r388
+%r391 = getelementptr i32, i32* %r2, i32 24
+%r392 = load i32, i32* %r391
+%r393 = zext i32 %r392 to i544
+%r394 = shl i544 %r393, 512
+%r395 = zext i512 %r383 to i544
+%r396 = or i544 %r394, %r395
+%r397 = zext i544 %r396 to i576
+%r398 = zext i544 %r389 to i576
+%r399 = add i576 %r397, %r398
+%r400 = lshr i576 %r399, 32
+%r401 = trunc i576 %r400 to i544
+%r402 = lshr i544 %r401, 512
+%r403 = trunc i544 %r402 to i32
+%r404 = trunc i544 %r401 to i512
+%r405 = trunc i512 %r404 to i32
+%r406 = mul i32 %r405, %r6
+%r407 = call i544 @mulPv512x32(i32* %r3, i32 %r406)
+%r408 = zext i32 %r403 to i544
+%r409 = shl i544 %r408, 512
+%r410 = add i544 %r407, %r409
+%r412 = getelementptr i32, i32* %r2, i32 25
+%r413 = load i32, i32* %r412
+%r414 = zext i32 %r413 to i544
+%r415 = shl i544 %r414, 512
+%r416 = zext i512 %r404 to i544
+%r417 = or i544 %r415, %r416
+%r418 = zext i544 %r417 to i576
+%r419 = zext i544 %r410 to i576
+%r420 = add i576 %r418, %r419
+%r421 = lshr i576 %r420, 32
+%r422 = trunc i576 %r421 to i544
+%r423 = lshr i544 %r422, 512
+%r424 = trunc i544 %r423 to i32
+%r425 = trunc i544 %r422 to i512
+%r426 = trunc i512 %r425 to i32
+%r427 = mul i32 %r426, %r6
+%r428 = call i544 @mulPv512x32(i32* %r3, i32 %r427)
+%r429 = zext i32 %r424 to i544
+%r430 = shl i544 %r429, 512
+%r431 = add i544 %r428, %r430
+%r433 = getelementptr i32, i32* %r2, i32 26
+%r434 = load i32, i32* %r433
+%r435 = zext i32 %r434 to i544
+%r436 = shl i544 %r435, 512
+%r437 = zext i512 %r425 to i544
+%r438 = or i544 %r436, %r437
+%r439 = zext i544 %r438 to i576
+%r440 = zext i544 %r431 to i576
+%r441 = add i576 %r439, %r440
+%r442 = lshr i576 %r441, 32
+%r443 = trunc i576 %r442 to i544
+%r444 = lshr i544 %r443, 512
+%r445 = trunc i544 %r444 to i32
+%r446 = trunc i544 %r443 to i512
+%r447 = trunc i512 %r446 to i32
+%r448 = mul i32 %r447, %r6
+%r449 = call i544 @mulPv512x32(i32* %r3, i32 %r448)
+%r450 = zext i32 %r445 to i544
+%r451 = shl i544 %r450, 512
+%r452 = add i544 %r449, %r451
+%r454 = getelementptr i32, i32* %r2, i32 27
+%r455 = load i32, i32* %r454
+%r456 = zext i32 %r455 to i544
+%r457 = shl i544 %r456, 512
+%r458 = zext i512 %r446 to i544
+%r459 = or i544 %r457, %r458
+%r460 = zext i544 %r459 to i576
+%r461 = zext i544 %r452 to i576
+%r462 = add i576 %r460, %r461
+%r463 = lshr i576 %r462, 32
+%r464 = trunc i576 %r463 to i544
+%r465 = lshr i544 %r464, 512
+%r466 = trunc i544 %r465 to i32
+%r467 = trunc i544 %r464 to i512
+%r468 = trunc i512 %r467 to i32
+%r469 = mul i32 %r468, %r6
+%r470 = call i544 @mulPv512x32(i32* %r3, i32 %r469)
+%r471 = zext i32 %r466 to i544
+%r472 = shl i544 %r471, 512
+%r473 = add i544 %r470, %r472
+%r475 = getelementptr i32, i32* %r2, i32 28
+%r476 = load i32, i32* %r475
+%r477 = zext i32 %r476 to i544
+%r478 = shl i544 %r477, 512
+%r479 = zext i512 %r467 to i544
+%r480 = or i544 %r478, %r479
+%r481 = zext i544 %r480 to i576
+%r482 = zext i544 %r473 to i576
+%r483 = add i576 %r481, %r482
+%r484 = lshr i576 %r483, 32
+%r485 = trunc i576 %r484 to i544
+%r486 = lshr i544 %r485, 512
+%r487 = trunc i544 %r486 to i32
+%r488 = trunc i544 %r485 to i512
+%r489 = trunc i512 %r488 to i32
+%r490 = mul i32 %r489, %r6
+%r491 = call i544 @mulPv512x32(i32* %r3, i32 %r490)
+%r492 = zext i32 %r487 to i544
+%r493 = shl i544 %r492, 512
+%r494 = add i544 %r491, %r493
+%r496 = getelementptr i32, i32* %r2, i32 29
+%r497 = load i32, i32* %r496
+%r498 = zext i32 %r497 to i544
+%r499 = shl i544 %r498, 512
+%r500 = zext i512 %r488 to i544
+%r501 = or i544 %r499, %r500
+%r502 = zext i544 %r501 to i576
+%r503 = zext i544 %r494 to i576
+%r504 = add i576 %r502, %r503
+%r505 = lshr i576 %r504, 32
+%r506 = trunc i576 %r505 to i544
+%r507 = lshr i544 %r506, 512
+%r508 = trunc i544 %r507 to i32
+%r509 = trunc i544 %r506 to i512
+%r510 = trunc i512 %r509 to i32
+%r511 = mul i32 %r510, %r6
+%r512 = call i544 @mulPv512x32(i32* %r3, i32 %r511)
+%r513 = zext i32 %r508 to i544
+%r514 = shl i544 %r513, 512
+%r515 = add i544 %r512, %r514
+%r517 = getelementptr i32, i32* %r2, i32 30
+%r518 = load i32, i32* %r517
+%r519 = zext i32 %r518 to i544
+%r520 = shl i544 %r519, 512
+%r521 = zext i512 %r509 to i544
+%r522 = or i544 %r520, %r521
+%r523 = zext i544 %r522 to i576
+%r524 = zext i544 %r515 to i576
+%r525 = add i576 %r523, %r524
+%r526 = lshr i576 %r525, 32
+%r527 = trunc i576 %r526 to i544
+%r528 = lshr i544 %r527, 512
+%r529 = trunc i544 %r528 to i32
+%r530 = trunc i544 %r527 to i512
+%r531 = trunc i512 %r530 to i32
+%r532 = mul i32 %r531, %r6
+%r533 = call i544 @mulPv512x32(i32* %r3, i32 %r532)
+%r534 = zext i32 %r529 to i544
+%r535 = shl i544 %r534, 512
+%r536 = add i544 %r533, %r535
+%r538 = getelementptr i32, i32* %r2, i32 31
+%r539 = load i32, i32* %r538
+%r540 = zext i32 %r539 to i544
+%r541 = shl i544 %r540, 512
+%r542 = zext i512 %r530 to i544
+%r543 = or i544 %r541, %r542
+%r544 = zext i544 %r543 to i576
+%r545 = zext i544 %r536 to i576
+%r546 = add i576 %r544, %r545
+%r547 = lshr i576 %r546, 32
+%r548 = trunc i576 %r547 to i544
+%r549 = lshr i544 %r548, 512
+%r550 = trunc i544 %r549 to i32
+%r551 = trunc i544 %r548 to i512
+%r552 = sub i512 %r551, %r112
+%r553 = lshr i512 %r552, 511
+%r554 = trunc i512 %r553 to i1
+%r555 = select i1 %r554, i512 %r551, i512 %r552
+%r557 = getelementptr i32, i32* %r1, i32 0
+%r558 = trunc i512 %r555 to i32
+store i32 %r558, i32* %r557
+%r559 = lshr i512 %r555, 32
+%r561 = getelementptr i32, i32* %r1, i32 1
+%r562 = trunc i512 %r559 to i32
+store i32 %r562, i32* %r561
+%r563 = lshr i512 %r559, 32
+%r565 = getelementptr i32, i32* %r1, i32 2
+%r566 = trunc i512 %r563 to i32
+store i32 %r566, i32* %r565
+%r567 = lshr i512 %r563, 32
+%r569 = getelementptr i32, i32* %r1, i32 3
+%r570 = trunc i512 %r567 to i32
+store i32 %r570, i32* %r569
+%r571 = lshr i512 %r567, 32
+%r573 = getelementptr i32, i32* %r1, i32 4
+%r574 = trunc i512 %r571 to i32
+store i32 %r574, i32* %r573
+%r575 = lshr i512 %r571, 32
+%r577 = getelementptr i32, i32* %r1, i32 5
+%r578 = trunc i512 %r575 to i32
+store i32 %r578, i32* %r577
+%r579 = lshr i512 %r575, 32
+%r581 = getelementptr i32, i32* %r1, i32 6
+%r582 = trunc i512 %r579 to i32
+store i32 %r582, i32* %r581
+%r583 = lshr i512 %r579, 32
+%r585 = getelementptr i32, i32* %r1, i32 7
+%r586 = trunc i512 %r583 to i32
+store i32 %r586, i32* %r585
+%r587 = lshr i512 %r583, 32
+%r589 = getelementptr i32, i32* %r1, i32 8
+%r590 = trunc i512 %r587 to i32
+store i32 %r590, i32* %r589
+%r591 = lshr i512 %r587, 32
+%r593 = getelementptr i32, i32* %r1, i32 9
+%r594 = trunc i512 %r591 to i32
+store i32 %r594, i32* %r593
+%r595 = lshr i512 %r591, 32
+%r597 = getelementptr i32, i32* %r1, i32 10
+%r598 = trunc i512 %r595 to i32
+store i32 %r598, i32* %r597
+%r599 = lshr i512 %r595, 32
+%r601 = getelementptr i32, i32* %r1, i32 11
+%r602 = trunc i512 %r599 to i32
+store i32 %r602, i32* %r601
+%r603 = lshr i512 %r599, 32
+%r605 = getelementptr i32, i32* %r1, i32 12
+%r606 = trunc i512 %r603 to i32
+store i32 %r606, i32* %r605
+%r607 = lshr i512 %r603, 32
+%r609 = getelementptr i32, i32* %r1, i32 13
+%r610 = trunc i512 %r607 to i32
+store i32 %r610, i32* %r609
+%r611 = lshr i512 %r607, 32
+%r613 = getelementptr i32, i32* %r1, i32 14
+%r614 = trunc i512 %r611 to i32
+store i32 %r614, i32* %r613
+%r615 = lshr i512 %r611, 32
+%r617 = getelementptr i32, i32* %r1, i32 15
+%r618 = trunc i512 %r615 to i32
+store i32 %r618, i32* %r617
 ret void
 }
-define i32 @mcl_fp_addPre17L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define i32 @mcl_fp_addPre16L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r3
 %r6 = zext i32 %r5 to i64
@@ -50729,184 +17155,168 @@ define i32 @mcl_fp_addPre17L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias
 %r109 = shl i512 %r108, 480
 %r110 = or i512 %r104, %r109
 %r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r3, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r119 = load i32, i32* %r4
-%r120 = zext i32 %r119 to i64
-%r122 = getelementptr i32, i32* %r4, i32 1
+%r112 = load i32, i32* %r4
+%r113 = zext i32 %r112 to i64
+%r115 = getelementptr i32, i32* %r4, i32 1
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i64
+%r118 = shl i64 %r117, 32
+%r119 = or i64 %r113, %r118
+%r120 = zext i64 %r119 to i96
+%r122 = getelementptr i32, i32* %r4, i32 2
 %r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i64
-%r125 = shl i64 %r124, 32
-%r126 = or i64 %r120, %r125
-%r127 = zext i64 %r126 to i96
-%r129 = getelementptr i32, i32* %r4, i32 2
+%r124 = zext i32 %r123 to i96
+%r125 = shl i96 %r124, 64
+%r126 = or i96 %r120, %r125
+%r127 = zext i96 %r126 to i128
+%r129 = getelementptr i32, i32* %r4, i32 3
 %r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i96
-%r132 = shl i96 %r131, 64
-%r133 = or i96 %r127, %r132
-%r134 = zext i96 %r133 to i128
-%r136 = getelementptr i32, i32* %r4, i32 3
+%r131 = zext i32 %r130 to i128
+%r132 = shl i128 %r131, 96
+%r133 = or i128 %r127, %r132
+%r134 = zext i128 %r133 to i160
+%r136 = getelementptr i32, i32* %r4, i32 4
 %r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i128
-%r139 = shl i128 %r138, 96
-%r140 = or i128 %r134, %r139
-%r141 = zext i128 %r140 to i160
-%r143 = getelementptr i32, i32* %r4, i32 4
+%r138 = zext i32 %r137 to i160
+%r139 = shl i160 %r138, 128
+%r140 = or i160 %r134, %r139
+%r141 = zext i160 %r140 to i192
+%r143 = getelementptr i32, i32* %r4, i32 5
 %r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i160
-%r146 = shl i160 %r145, 128
-%r147 = or i160 %r141, %r146
-%r148 = zext i160 %r147 to i192
-%r150 = getelementptr i32, i32* %r4, i32 5
+%r145 = zext i32 %r144 to i192
+%r146 = shl i192 %r145, 160
+%r147 = or i192 %r141, %r146
+%r148 = zext i192 %r147 to i224
+%r150 = getelementptr i32, i32* %r4, i32 6
 %r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i192
-%r153 = shl i192 %r152, 160
-%r154 = or i192 %r148, %r153
-%r155 = zext i192 %r154 to i224
-%r157 = getelementptr i32, i32* %r4, i32 6
+%r152 = zext i32 %r151 to i224
+%r153 = shl i224 %r152, 192
+%r154 = or i224 %r148, %r153
+%r155 = zext i224 %r154 to i256
+%r157 = getelementptr i32, i32* %r4, i32 7
 %r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i224
-%r160 = shl i224 %r159, 192
-%r161 = or i224 %r155, %r160
-%r162 = zext i224 %r161 to i256
-%r164 = getelementptr i32, i32* %r4, i32 7
+%r159 = zext i32 %r158 to i256
+%r160 = shl i256 %r159, 224
+%r161 = or i256 %r155, %r160
+%r162 = zext i256 %r161 to i288
+%r164 = getelementptr i32, i32* %r4, i32 8
 %r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i256
-%r167 = shl i256 %r166, 224
-%r168 = or i256 %r162, %r167
-%r169 = zext i256 %r168 to i288
-%r171 = getelementptr i32, i32* %r4, i32 8
+%r166 = zext i32 %r165 to i288
+%r167 = shl i288 %r166, 256
+%r168 = or i288 %r162, %r167
+%r169 = zext i288 %r168 to i320
+%r171 = getelementptr i32, i32* %r4, i32 9
 %r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i288
-%r174 = shl i288 %r173, 256
-%r175 = or i288 %r169, %r174
-%r176 = zext i288 %r175 to i320
-%r178 = getelementptr i32, i32* %r4, i32 9
+%r173 = zext i32 %r172 to i320
+%r174 = shl i320 %r173, 288
+%r175 = or i320 %r169, %r174
+%r176 = zext i320 %r175 to i352
+%r178 = getelementptr i32, i32* %r4, i32 10
 %r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i320
-%r181 = shl i320 %r180, 288
-%r182 = or i320 %r176, %r181
-%r183 = zext i320 %r182 to i352
-%r185 = getelementptr i32, i32* %r4, i32 10
+%r180 = zext i32 %r179 to i352
+%r181 = shl i352 %r180, 320
+%r182 = or i352 %r176, %r181
+%r183 = zext i352 %r182 to i384
+%r185 = getelementptr i32, i32* %r4, i32 11
 %r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i352
-%r188 = shl i352 %r187, 320
-%r189 = or i352 %r183, %r188
-%r190 = zext i352 %r189 to i384
-%r192 = getelementptr i32, i32* %r4, i32 11
+%r187 = zext i32 %r186 to i384
+%r188 = shl i384 %r187, 352
+%r189 = or i384 %r183, %r188
+%r190 = zext i384 %r189 to i416
+%r192 = getelementptr i32, i32* %r4, i32 12
 %r193 = load i32, i32* %r192
-%r194 = zext i32 %r193 to i384
-%r195 = shl i384 %r194, 352
-%r196 = or i384 %r190, %r195
-%r197 = zext i384 %r196 to i416
-%r199 = getelementptr i32, i32* %r4, i32 12
+%r194 = zext i32 %r193 to i416
+%r195 = shl i416 %r194, 384
+%r196 = or i416 %r190, %r195
+%r197 = zext i416 %r196 to i448
+%r199 = getelementptr i32, i32* %r4, i32 13
 %r200 = load i32, i32* %r199
-%r201 = zext i32 %r200 to i416
-%r202 = shl i416 %r201, 384
-%r203 = or i416 %r197, %r202
-%r204 = zext i416 %r203 to i448
-%r206 = getelementptr i32, i32* %r4, i32 13
+%r201 = zext i32 %r200 to i448
+%r202 = shl i448 %r201, 416
+%r203 = or i448 %r197, %r202
+%r204 = zext i448 %r203 to i480
+%r206 = getelementptr i32, i32* %r4, i32 14
 %r207 = load i32, i32* %r206
-%r208 = zext i32 %r207 to i448
-%r209 = shl i448 %r208, 416
-%r210 = or i448 %r204, %r209
-%r211 = zext i448 %r210 to i480
-%r213 = getelementptr i32, i32* %r4, i32 14
+%r208 = zext i32 %r207 to i480
+%r209 = shl i480 %r208, 448
+%r210 = or i480 %r204, %r209
+%r211 = zext i480 %r210 to i512
+%r213 = getelementptr i32, i32* %r4, i32 15
 %r214 = load i32, i32* %r213
-%r215 = zext i32 %r214 to i480
-%r216 = shl i480 %r215, 448
-%r217 = or i480 %r211, %r216
-%r218 = zext i480 %r217 to i512
-%r220 = getelementptr i32, i32* %r4, i32 15
-%r221 = load i32, i32* %r220
-%r222 = zext i32 %r221 to i512
-%r223 = shl i512 %r222, 480
-%r224 = or i512 %r218, %r223
-%r225 = zext i512 %r224 to i544
-%r227 = getelementptr i32, i32* %r4, i32 16
-%r228 = load i32, i32* %r227
-%r229 = zext i32 %r228 to i544
-%r230 = shl i544 %r229, 512
-%r231 = or i544 %r225, %r230
-%r232 = zext i544 %r231 to i576
-%r233 = add i576 %r118, %r232
-%r234 = trunc i576 %r233 to i544
-%r235 = trunc i544 %r234 to i32
-%r237 = getelementptr i32, i32* %r2, i32 0
-store i32 %r235, i32* %r237
-%r238 = lshr i544 %r234, 32
-%r239 = trunc i544 %r238 to i32
-%r241 = getelementptr i32, i32* %r2, i32 1
-store i32 %r239, i32* %r241
-%r242 = lshr i544 %r238, 32
-%r243 = trunc i544 %r242 to i32
-%r245 = getelementptr i32, i32* %r2, i32 2
-store i32 %r243, i32* %r245
-%r246 = lshr i544 %r242, 32
-%r247 = trunc i544 %r246 to i32
-%r249 = getelementptr i32, i32* %r2, i32 3
-store i32 %r247, i32* %r249
-%r250 = lshr i544 %r246, 32
-%r251 = trunc i544 %r250 to i32
-%r253 = getelementptr i32, i32* %r2, i32 4
-store i32 %r251, i32* %r253
-%r254 = lshr i544 %r250, 32
-%r255 = trunc i544 %r254 to i32
-%r257 = getelementptr i32, i32* %r2, i32 5
-store i32 %r255, i32* %r257
-%r258 = lshr i544 %r254, 32
-%r259 = trunc i544 %r258 to i32
-%r261 = getelementptr i32, i32* %r2, i32 6
-store i32 %r259, i32* %r261
-%r262 = lshr i544 %r258, 32
-%r263 = trunc i544 %r262 to i32
-%r265 = getelementptr i32, i32* %r2, i32 7
-store i32 %r263, i32* %r265
-%r266 = lshr i544 %r262, 32
-%r267 = trunc i544 %r266 to i32
-%r269 = getelementptr i32, i32* %r2, i32 8
-store i32 %r267, i32* %r269
-%r270 = lshr i544 %r266, 32
-%r271 = trunc i544 %r270 to i32
-%r273 = getelementptr i32, i32* %r2, i32 9
-store i32 %r271, i32* %r273
-%r274 = lshr i544 %r270, 32
-%r275 = trunc i544 %r274 to i32
-%r277 = getelementptr i32, i32* %r2, i32 10
-store i32 %r275, i32* %r277
-%r278 = lshr i544 %r274, 32
-%r279 = trunc i544 %r278 to i32
-%r281 = getelementptr i32, i32* %r2, i32 11
-store i32 %r279, i32* %r281
-%r282 = lshr i544 %r278, 32
-%r283 = trunc i544 %r282 to i32
-%r285 = getelementptr i32, i32* %r2, i32 12
-store i32 %r283, i32* %r285
-%r286 = lshr i544 %r282, 32
-%r287 = trunc i544 %r286 to i32
-%r289 = getelementptr i32, i32* %r2, i32 13
-store i32 %r287, i32* %r289
-%r290 = lshr i544 %r286, 32
-%r291 = trunc i544 %r290 to i32
-%r293 = getelementptr i32, i32* %r2, i32 14
-store i32 %r291, i32* %r293
-%r294 = lshr i544 %r290, 32
-%r295 = trunc i544 %r294 to i32
-%r297 = getelementptr i32, i32* %r2, i32 15
-store i32 %r295, i32* %r297
-%r298 = lshr i544 %r294, 32
-%r299 = trunc i544 %r298 to i32
-%r301 = getelementptr i32, i32* %r2, i32 16
-store i32 %r299, i32* %r301
-%r302 = lshr i576 %r233, 544
-%r303 = trunc i576 %r302 to i32
-ret i32 %r303
+%r215 = zext i32 %r214 to i512
+%r216 = shl i512 %r215, 480
+%r217 = or i512 %r211, %r216
+%r218 = zext i512 %r217 to i544
+%r219 = add i544 %r111, %r218
+%r220 = trunc i544 %r219 to i512
+%r222 = getelementptr i32, i32* %r2, i32 0
+%r223 = trunc i512 %r220 to i32
+store i32 %r223, i32* %r222
+%r224 = lshr i512 %r220, 32
+%r226 = getelementptr i32, i32* %r2, i32 1
+%r227 = trunc i512 %r224 to i32
+store i32 %r227, i32* %r226
+%r228 = lshr i512 %r224, 32
+%r230 = getelementptr i32, i32* %r2, i32 2
+%r231 = trunc i512 %r228 to i32
+store i32 %r231, i32* %r230
+%r232 = lshr i512 %r228, 32
+%r234 = getelementptr i32, i32* %r2, i32 3
+%r235 = trunc i512 %r232 to i32
+store i32 %r235, i32* %r234
+%r236 = lshr i512 %r232, 32
+%r238 = getelementptr i32, i32* %r2, i32 4
+%r239 = trunc i512 %r236 to i32
+store i32 %r239, i32* %r238
+%r240 = lshr i512 %r236, 32
+%r242 = getelementptr i32, i32* %r2, i32 5
+%r243 = trunc i512 %r240 to i32
+store i32 %r243, i32* %r242
+%r244 = lshr i512 %r240, 32
+%r246 = getelementptr i32, i32* %r2, i32 6
+%r247 = trunc i512 %r244 to i32
+store i32 %r247, i32* %r246
+%r248 = lshr i512 %r244, 32
+%r250 = getelementptr i32, i32* %r2, i32 7
+%r251 = trunc i512 %r248 to i32
+store i32 %r251, i32* %r250
+%r252 = lshr i512 %r248, 32
+%r254 = getelementptr i32, i32* %r2, i32 8
+%r255 = trunc i512 %r252 to i32
+store i32 %r255, i32* %r254
+%r256 = lshr i512 %r252, 32
+%r258 = getelementptr i32, i32* %r2, i32 9
+%r259 = trunc i512 %r256 to i32
+store i32 %r259, i32* %r258
+%r260 = lshr i512 %r256, 32
+%r262 = getelementptr i32, i32* %r2, i32 10
+%r263 = trunc i512 %r260 to i32
+store i32 %r263, i32* %r262
+%r264 = lshr i512 %r260, 32
+%r266 = getelementptr i32, i32* %r2, i32 11
+%r267 = trunc i512 %r264 to i32
+store i32 %r267, i32* %r266
+%r268 = lshr i512 %r264, 32
+%r270 = getelementptr i32, i32* %r2, i32 12
+%r271 = trunc i512 %r268 to i32
+store i32 %r271, i32* %r270
+%r272 = lshr i512 %r268, 32
+%r274 = getelementptr i32, i32* %r2, i32 13
+%r275 = trunc i512 %r272 to i32
+store i32 %r275, i32* %r274
+%r276 = lshr i512 %r272, 32
+%r278 = getelementptr i32, i32* %r2, i32 14
+%r279 = trunc i512 %r276 to i32
+store i32 %r279, i32* %r278
+%r280 = lshr i512 %r276, 32
+%r282 = getelementptr i32, i32* %r2, i32 15
+%r283 = trunc i512 %r280 to i32
+store i32 %r283, i32* %r282
+%r284 = lshr i544 %r219, 512
+%r285 = trunc i544 %r284 to i32
+ret i32 %r285
 }
-define i32 @mcl_fp_subPre17L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define i32 @mcl_fp_subPre16L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r3
 %r6 = zext i32 %r5 to i64
@@ -51000,185 +17410,169 @@ define i32 @mcl_fp_subPre17L(i32* noalias  %r2, i32* noalias  %r3, i32* noalias
 %r109 = shl i512 %r108, 480
 %r110 = or i512 %r104, %r109
 %r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r3, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = zext i544 %r117 to i576
-%r119 = load i32, i32* %r4
-%r120 = zext i32 %r119 to i64
-%r122 = getelementptr i32, i32* %r4, i32 1
+%r112 = load i32, i32* %r4
+%r113 = zext i32 %r112 to i64
+%r115 = getelementptr i32, i32* %r4, i32 1
+%r116 = load i32, i32* %r115
+%r117 = zext i32 %r116 to i64
+%r118 = shl i64 %r117, 32
+%r119 = or i64 %r113, %r118
+%r120 = zext i64 %r119 to i96
+%r122 = getelementptr i32, i32* %r4, i32 2
 %r123 = load i32, i32* %r122
-%r124 = zext i32 %r123 to i64
-%r125 = shl i64 %r124, 32
-%r126 = or i64 %r120, %r125
-%r127 = zext i64 %r126 to i96
-%r129 = getelementptr i32, i32* %r4, i32 2
+%r124 = zext i32 %r123 to i96
+%r125 = shl i96 %r124, 64
+%r126 = or i96 %r120, %r125
+%r127 = zext i96 %r126 to i128
+%r129 = getelementptr i32, i32* %r4, i32 3
 %r130 = load i32, i32* %r129
-%r131 = zext i32 %r130 to i96
-%r132 = shl i96 %r131, 64
-%r133 = or i96 %r127, %r132
-%r134 = zext i96 %r133 to i128
-%r136 = getelementptr i32, i32* %r4, i32 3
+%r131 = zext i32 %r130 to i128
+%r132 = shl i128 %r131, 96
+%r133 = or i128 %r127, %r132
+%r134 = zext i128 %r133 to i160
+%r136 = getelementptr i32, i32* %r4, i32 4
 %r137 = load i32, i32* %r136
-%r138 = zext i32 %r137 to i128
-%r139 = shl i128 %r138, 96
-%r140 = or i128 %r134, %r139
-%r141 = zext i128 %r140 to i160
-%r143 = getelementptr i32, i32* %r4, i32 4
+%r138 = zext i32 %r137 to i160
+%r139 = shl i160 %r138, 128
+%r140 = or i160 %r134, %r139
+%r141 = zext i160 %r140 to i192
+%r143 = getelementptr i32, i32* %r4, i32 5
 %r144 = load i32, i32* %r143
-%r145 = zext i32 %r144 to i160
-%r146 = shl i160 %r145, 128
-%r147 = or i160 %r141, %r146
-%r148 = zext i160 %r147 to i192
-%r150 = getelementptr i32, i32* %r4, i32 5
+%r145 = zext i32 %r144 to i192
+%r146 = shl i192 %r145, 160
+%r147 = or i192 %r141, %r146
+%r148 = zext i192 %r147 to i224
+%r150 = getelementptr i32, i32* %r4, i32 6
 %r151 = load i32, i32* %r150
-%r152 = zext i32 %r151 to i192
-%r153 = shl i192 %r152, 160
-%r154 = or i192 %r148, %r153
-%r155 = zext i192 %r154 to i224
-%r157 = getelementptr i32, i32* %r4, i32 6
+%r152 = zext i32 %r151 to i224
+%r153 = shl i224 %r152, 192
+%r154 = or i224 %r148, %r153
+%r155 = zext i224 %r154 to i256
+%r157 = getelementptr i32, i32* %r4, i32 7
 %r158 = load i32, i32* %r157
-%r159 = zext i32 %r158 to i224
-%r160 = shl i224 %r159, 192
-%r161 = or i224 %r155, %r160
-%r162 = zext i224 %r161 to i256
-%r164 = getelementptr i32, i32* %r4, i32 7
+%r159 = zext i32 %r158 to i256
+%r160 = shl i256 %r159, 224
+%r161 = or i256 %r155, %r160
+%r162 = zext i256 %r161 to i288
+%r164 = getelementptr i32, i32* %r4, i32 8
 %r165 = load i32, i32* %r164
-%r166 = zext i32 %r165 to i256
-%r167 = shl i256 %r166, 224
-%r168 = or i256 %r162, %r167
-%r169 = zext i256 %r168 to i288
-%r171 = getelementptr i32, i32* %r4, i32 8
+%r166 = zext i32 %r165 to i288
+%r167 = shl i288 %r166, 256
+%r168 = or i288 %r162, %r167
+%r169 = zext i288 %r168 to i320
+%r171 = getelementptr i32, i32* %r4, i32 9
 %r172 = load i32, i32* %r171
-%r173 = zext i32 %r172 to i288
-%r174 = shl i288 %r173, 256
-%r175 = or i288 %r169, %r174
-%r176 = zext i288 %r175 to i320
-%r178 = getelementptr i32, i32* %r4, i32 9
+%r173 = zext i32 %r172 to i320
+%r174 = shl i320 %r173, 288
+%r175 = or i320 %r169, %r174
+%r176 = zext i320 %r175 to i352
+%r178 = getelementptr i32, i32* %r4, i32 10
 %r179 = load i32, i32* %r178
-%r180 = zext i32 %r179 to i320
-%r181 = shl i320 %r180, 288
-%r182 = or i320 %r176, %r181
-%r183 = zext i320 %r182 to i352
-%r185 = getelementptr i32, i32* %r4, i32 10
+%r180 = zext i32 %r179 to i352
+%r181 = shl i352 %r180, 320
+%r182 = or i352 %r176, %r181
+%r183 = zext i352 %r182 to i384
+%r185 = getelementptr i32, i32* %r4, i32 11
 %r186 = load i32, i32* %r185
-%r187 = zext i32 %r186 to i352
-%r188 = shl i352 %r187, 320
-%r189 = or i352 %r183, %r188
-%r190 = zext i352 %r189 to i384
-%r192 = getelementptr i32, i32* %r4, i32 11
+%r187 = zext i32 %r186 to i384
+%r188 = shl i384 %r187, 352
+%r189 = or i384 %r183, %r188
+%r190 = zext i384 %r189 to i416
+%r192 = getelementptr i32, i32* %r4, i32 12
 %r193 = load i32, i32* %r192
-%r194 = zext i32 %r193 to i384
-%r195 = shl i384 %r194, 352
-%r196 = or i384 %r190, %r195
-%r197 = zext i384 %r196 to i416
-%r199 = getelementptr i32, i32* %r4, i32 12
+%r194 = zext i32 %r193 to i416
+%r195 = shl i416 %r194, 384
+%r196 = or i416 %r190, %r195
+%r197 = zext i416 %r196 to i448
+%r199 = getelementptr i32, i32* %r4, i32 13
 %r200 = load i32, i32* %r199
-%r201 = zext i32 %r200 to i416
-%r202 = shl i416 %r201, 384
-%r203 = or i416 %r197, %r202
-%r204 = zext i416 %r203 to i448
-%r206 = getelementptr i32, i32* %r4, i32 13
+%r201 = zext i32 %r200 to i448
+%r202 = shl i448 %r201, 416
+%r203 = or i448 %r197, %r202
+%r204 = zext i448 %r203 to i480
+%r206 = getelementptr i32, i32* %r4, i32 14
 %r207 = load i32, i32* %r206
-%r208 = zext i32 %r207 to i448
-%r209 = shl i448 %r208, 416
-%r210 = or i448 %r204, %r209
-%r211 = zext i448 %r210 to i480
-%r213 = getelementptr i32, i32* %r4, i32 14
+%r208 = zext i32 %r207 to i480
+%r209 = shl i480 %r208, 448
+%r210 = or i480 %r204, %r209
+%r211 = zext i480 %r210 to i512
+%r213 = getelementptr i32, i32* %r4, i32 15
 %r214 = load i32, i32* %r213
-%r215 = zext i32 %r214 to i480
-%r216 = shl i480 %r215, 448
-%r217 = or i480 %r211, %r216
-%r218 = zext i480 %r217 to i512
-%r220 = getelementptr i32, i32* %r4, i32 15
-%r221 = load i32, i32* %r220
-%r222 = zext i32 %r221 to i512
-%r223 = shl i512 %r222, 480
-%r224 = or i512 %r218, %r223
-%r225 = zext i512 %r224 to i544
-%r227 = getelementptr i32, i32* %r4, i32 16
-%r228 = load i32, i32* %r227
-%r229 = zext i32 %r228 to i544
-%r230 = shl i544 %r229, 512
-%r231 = or i544 %r225, %r230
-%r232 = zext i544 %r231 to i576
-%r233 = sub i576 %r118, %r232
-%r234 = trunc i576 %r233 to i544
-%r235 = trunc i544 %r234 to i32
-%r237 = getelementptr i32, i32* %r2, i32 0
-store i32 %r235, i32* %r237
-%r238 = lshr i544 %r234, 32
-%r239 = trunc i544 %r238 to i32
-%r241 = getelementptr i32, i32* %r2, i32 1
-store i32 %r239, i32* %r241
-%r242 = lshr i544 %r238, 32
-%r243 = trunc i544 %r242 to i32
-%r245 = getelementptr i32, i32* %r2, i32 2
-store i32 %r243, i32* %r245
-%r246 = lshr i544 %r242, 32
-%r247 = trunc i544 %r246 to i32
-%r249 = getelementptr i32, i32* %r2, i32 3
-store i32 %r247, i32* %r249
-%r250 = lshr i544 %r246, 32
-%r251 = trunc i544 %r250 to i32
-%r253 = getelementptr i32, i32* %r2, i32 4
-store i32 %r251, i32* %r253
-%r254 = lshr i544 %r250, 32
-%r255 = trunc i544 %r254 to i32
-%r257 = getelementptr i32, i32* %r2, i32 5
-store i32 %r255, i32* %r257
-%r258 = lshr i544 %r254, 32
-%r259 = trunc i544 %r258 to i32
-%r261 = getelementptr i32, i32* %r2, i32 6
-store i32 %r259, i32* %r261
-%r262 = lshr i544 %r258, 32
-%r263 = trunc i544 %r262 to i32
-%r265 = getelementptr i32, i32* %r2, i32 7
-store i32 %r263, i32* %r265
-%r266 = lshr i544 %r262, 32
-%r267 = trunc i544 %r266 to i32
-%r269 = getelementptr i32, i32* %r2, i32 8
-store i32 %r267, i32* %r269
-%r270 = lshr i544 %r266, 32
-%r271 = trunc i544 %r270 to i32
-%r273 = getelementptr i32, i32* %r2, i32 9
-store i32 %r271, i32* %r273
-%r274 = lshr i544 %r270, 32
-%r275 = trunc i544 %r274 to i32
-%r277 = getelementptr i32, i32* %r2, i32 10
-store i32 %r275, i32* %r277
-%r278 = lshr i544 %r274, 32
-%r279 = trunc i544 %r278 to i32
-%r281 = getelementptr i32, i32* %r2, i32 11
-store i32 %r279, i32* %r281
-%r282 = lshr i544 %r278, 32
-%r283 = trunc i544 %r282 to i32
-%r285 = getelementptr i32, i32* %r2, i32 12
-store i32 %r283, i32* %r285
-%r286 = lshr i544 %r282, 32
-%r287 = trunc i544 %r286 to i32
-%r289 = getelementptr i32, i32* %r2, i32 13
-store i32 %r287, i32* %r289
-%r290 = lshr i544 %r286, 32
-%r291 = trunc i544 %r290 to i32
-%r293 = getelementptr i32, i32* %r2, i32 14
-store i32 %r291, i32* %r293
-%r294 = lshr i544 %r290, 32
-%r295 = trunc i544 %r294 to i32
-%r297 = getelementptr i32, i32* %r2, i32 15
-store i32 %r295, i32* %r297
-%r298 = lshr i544 %r294, 32
-%r299 = trunc i544 %r298 to i32
-%r301 = getelementptr i32, i32* %r2, i32 16
-store i32 %r299, i32* %r301
-%r302 = lshr i576 %r233, 544
-%r303 = trunc i576 %r302 to i32
-%r305 = and i32 %r303, 1
-ret i32 %r305
+%r215 = zext i32 %r214 to i512
+%r216 = shl i512 %r215, 480
+%r217 = or i512 %r211, %r216
+%r218 = zext i512 %r217 to i544
+%r219 = sub i544 %r111, %r218
+%r220 = trunc i544 %r219 to i512
+%r222 = getelementptr i32, i32* %r2, i32 0
+%r223 = trunc i512 %r220 to i32
+store i32 %r223, i32* %r222
+%r224 = lshr i512 %r220, 32
+%r226 = getelementptr i32, i32* %r2, i32 1
+%r227 = trunc i512 %r224 to i32
+store i32 %r227, i32* %r226
+%r228 = lshr i512 %r224, 32
+%r230 = getelementptr i32, i32* %r2, i32 2
+%r231 = trunc i512 %r228 to i32
+store i32 %r231, i32* %r230
+%r232 = lshr i512 %r228, 32
+%r234 = getelementptr i32, i32* %r2, i32 3
+%r235 = trunc i512 %r232 to i32
+store i32 %r235, i32* %r234
+%r236 = lshr i512 %r232, 32
+%r238 = getelementptr i32, i32* %r2, i32 4
+%r239 = trunc i512 %r236 to i32
+store i32 %r239, i32* %r238
+%r240 = lshr i512 %r236, 32
+%r242 = getelementptr i32, i32* %r2, i32 5
+%r243 = trunc i512 %r240 to i32
+store i32 %r243, i32* %r242
+%r244 = lshr i512 %r240, 32
+%r246 = getelementptr i32, i32* %r2, i32 6
+%r247 = trunc i512 %r244 to i32
+store i32 %r247, i32* %r246
+%r248 = lshr i512 %r244, 32
+%r250 = getelementptr i32, i32* %r2, i32 7
+%r251 = trunc i512 %r248 to i32
+store i32 %r251, i32* %r250
+%r252 = lshr i512 %r248, 32
+%r254 = getelementptr i32, i32* %r2, i32 8
+%r255 = trunc i512 %r252 to i32
+store i32 %r255, i32* %r254
+%r256 = lshr i512 %r252, 32
+%r258 = getelementptr i32, i32* %r2, i32 9
+%r259 = trunc i512 %r256 to i32
+store i32 %r259, i32* %r258
+%r260 = lshr i512 %r256, 32
+%r262 = getelementptr i32, i32* %r2, i32 10
+%r263 = trunc i512 %r260 to i32
+store i32 %r263, i32* %r262
+%r264 = lshr i512 %r260, 32
+%r266 = getelementptr i32, i32* %r2, i32 11
+%r267 = trunc i512 %r264 to i32
+store i32 %r267, i32* %r266
+%r268 = lshr i512 %r264, 32
+%r270 = getelementptr i32, i32* %r2, i32 12
+%r271 = trunc i512 %r268 to i32
+store i32 %r271, i32* %r270
+%r272 = lshr i512 %r268, 32
+%r274 = getelementptr i32, i32* %r2, i32 13
+%r275 = trunc i512 %r272 to i32
+store i32 %r275, i32* %r274
+%r276 = lshr i512 %r272, 32
+%r278 = getelementptr i32, i32* %r2, i32 14
+%r279 = trunc i512 %r276 to i32
+store i32 %r279, i32* %r278
+%r280 = lshr i512 %r276, 32
+%r282 = getelementptr i32, i32* %r2, i32 15
+%r283 = trunc i512 %r280 to i32
+store i32 %r283, i32* %r282
+%r285 = lshr i544 %r219, 512
+%r286 = trunc i544 %r285 to i32
+%r287 = and i32 %r286, 1
+ret i32 %r287
 }
-define void @mcl_fp_shr1_17L(i32* noalias  %r1, i32* noalias  %r2)
+define void @mcl_fp_shr1_16L(i32* noalias  %r1, i32* noalias  %r2)
 {
 %r3 = load i32, i32* %r2
 %r4 = zext i32 %r3 to i64
@@ -51271,83 +17665,73 @@ define void @mcl_fp_shr1_17L(i32* noalias  %r1, i32* noalias  %r2)
 %r106 = zext i32 %r105 to i512
 %r107 = shl i512 %r106, 480
 %r108 = or i512 %r102, %r107
-%r109 = zext i512 %r108 to i544
-%r111 = getelementptr i32, i32* %r2, i32 16
-%r112 = load i32, i32* %r111
-%r113 = zext i32 %r112 to i544
-%r114 = shl i544 %r113, 512
-%r115 = or i544 %r109, %r114
-%r116 = lshr i544 %r115, 1
-%r117 = trunc i544 %r116 to i32
-%r119 = getelementptr i32, i32* %r1, i32 0
-store i32 %r117, i32* %r119
-%r120 = lshr i544 %r116, 32
-%r121 = trunc i544 %r120 to i32
-%r123 = getelementptr i32, i32* %r1, i32 1
-store i32 %r121, i32* %r123
-%r124 = lshr i544 %r120, 32
-%r125 = trunc i544 %r124 to i32
-%r127 = getelementptr i32, i32* %r1, i32 2
-store i32 %r125, i32* %r127
-%r128 = lshr i544 %r124, 32
-%r129 = trunc i544 %r128 to i32
-%r131 = getelementptr i32, i32* %r1, i32 3
-store i32 %r129, i32* %r131
-%r132 = lshr i544 %r128, 32
-%r133 = trunc i544 %r132 to i32
-%r135 = getelementptr i32, i32* %r1, i32 4
-store i32 %r133, i32* %r135
-%r136 = lshr i544 %r132, 32
-%r137 = trunc i544 %r136 to i32
-%r139 = getelementptr i32, i32* %r1, i32 5
-store i32 %r137, i32* %r139
-%r140 = lshr i544 %r136, 32
-%r141 = trunc i544 %r140 to i32
-%r143 = getelementptr i32, i32* %r1, i32 6
-store i32 %r141, i32* %r143
-%r144 = lshr i544 %r140, 32
-%r145 = trunc i544 %r144 to i32
-%r147 = getelementptr i32, i32* %r1, i32 7
-store i32 %r145, i32* %r147
-%r148 = lshr i544 %r144, 32
-%r149 = trunc i544 %r148 to i32
-%r151 = getelementptr i32, i32* %r1, i32 8
-store i32 %r149, i32* %r151
-%r152 = lshr i544 %r148, 32
-%r153 = trunc i544 %r152 to i32
-%r155 = getelementptr i32, i32* %r1, i32 9
-store i32 %r153, i32* %r155
-%r156 = lshr i544 %r152, 32
-%r157 = trunc i544 %r156 to i32
-%r159 = getelementptr i32, i32* %r1, i32 10
-store i32 %r157, i32* %r159
-%r160 = lshr i544 %r156, 32
-%r161 = trunc i544 %r160 to i32
-%r163 = getelementptr i32, i32* %r1, i32 11
-store i32 %r161, i32* %r163
-%r164 = lshr i544 %r160, 32
-%r165 = trunc i544 %r164 to i32
-%r167 = getelementptr i32, i32* %r1, i32 12
-store i32 %r165, i32* %r167
-%r168 = lshr i544 %r164, 32
-%r169 = trunc i544 %r168 to i32
-%r171 = getelementptr i32, i32* %r1, i32 13
-store i32 %r169, i32* %r171
-%r172 = lshr i544 %r168, 32
-%r173 = trunc i544 %r172 to i32
-%r175 = getelementptr i32, i32* %r1, i32 14
-store i32 %r173, i32* %r175
-%r176 = lshr i544 %r172, 32
-%r177 = trunc i544 %r176 to i32
-%r179 = getelementptr i32, i32* %r1, i32 15
-store i32 %r177, i32* %r179
-%r180 = lshr i544 %r176, 32
-%r181 = trunc i544 %r180 to i32
-%r183 = getelementptr i32, i32* %r1, i32 16
-store i32 %r181, i32* %r183
+%r109 = lshr i512 %r108, 1
+%r111 = getelementptr i32, i32* %r1, i32 0
+%r112 = trunc i512 %r109 to i32
+store i32 %r112, i32* %r111
+%r113 = lshr i512 %r109, 32
+%r115 = getelementptr i32, i32* %r1, i32 1
+%r116 = trunc i512 %r113 to i32
+store i32 %r116, i32* %r115
+%r117 = lshr i512 %r113, 32
+%r119 = getelementptr i32, i32* %r1, i32 2
+%r120 = trunc i512 %r117 to i32
+store i32 %r120, i32* %r119
+%r121 = lshr i512 %r117, 32
+%r123 = getelementptr i32, i32* %r1, i32 3
+%r124 = trunc i512 %r121 to i32
+store i32 %r124, i32* %r123
+%r125 = lshr i512 %r121, 32
+%r127 = getelementptr i32, i32* %r1, i32 4
+%r128 = trunc i512 %r125 to i32
+store i32 %r128, i32* %r127
+%r129 = lshr i512 %r125, 32
+%r131 = getelementptr i32, i32* %r1, i32 5
+%r132 = trunc i512 %r129 to i32
+store i32 %r132, i32* %r131
+%r133 = lshr i512 %r129, 32
+%r135 = getelementptr i32, i32* %r1, i32 6
+%r136 = trunc i512 %r133 to i32
+store i32 %r136, i32* %r135
+%r137 = lshr i512 %r133, 32
+%r139 = getelementptr i32, i32* %r1, i32 7
+%r140 = trunc i512 %r137 to i32
+store i32 %r140, i32* %r139
+%r141 = lshr i512 %r137, 32
+%r143 = getelementptr i32, i32* %r1, i32 8
+%r144 = trunc i512 %r141 to i32
+store i32 %r144, i32* %r143
+%r145 = lshr i512 %r141, 32
+%r147 = getelementptr i32, i32* %r1, i32 9
+%r148 = trunc i512 %r145 to i32
+store i32 %r148, i32* %r147
+%r149 = lshr i512 %r145, 32
+%r151 = getelementptr i32, i32* %r1, i32 10
+%r152 = trunc i512 %r149 to i32
+store i32 %r152, i32* %r151
+%r153 = lshr i512 %r149, 32
+%r155 = getelementptr i32, i32* %r1, i32 11
+%r156 = trunc i512 %r153 to i32
+store i32 %r156, i32* %r155
+%r157 = lshr i512 %r153, 32
+%r159 = getelementptr i32, i32* %r1, i32 12
+%r160 = trunc i512 %r157 to i32
+store i32 %r160, i32* %r159
+%r161 = lshr i512 %r157, 32
+%r163 = getelementptr i32, i32* %r1, i32 13
+%r164 = trunc i512 %r161 to i32
+store i32 %r164, i32* %r163
+%r165 = lshr i512 %r161, 32
+%r167 = getelementptr i32, i32* %r1, i32 14
+%r168 = trunc i512 %r165 to i32
+store i32 %r168, i32* %r167
+%r169 = lshr i512 %r165, 32
+%r171 = getelementptr i32, i32* %r1, i32 15
+%r172 = trunc i512 %r169 to i32
+store i32 %r172, i32* %r171
 ret void
 }
-define void @mcl_fp_add17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_add16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -51440,356 +17824,330 @@ define void @mcl_fp_add17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %
 %r108 = zext i32 %r107 to i512
 %r109 = shl i512 %r108, 480
 %r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = load i32, i32* %r3
-%r119 = zext i32 %r118 to i64
-%r121 = getelementptr i32, i32* %r3, i32 1
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
 %r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i64
-%r124 = shl i64 %r123, 32
-%r125 = or i64 %r119, %r124
-%r126 = zext i64 %r125 to i96
-%r128 = getelementptr i32, i32* %r3, i32 2
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
 %r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i96
-%r131 = shl i96 %r130, 64
-%r132 = or i96 %r126, %r131
-%r133 = zext i96 %r132 to i128
-%r135 = getelementptr i32, i32* %r3, i32 3
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
 %r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i128
-%r138 = shl i128 %r137, 96
-%r139 = or i128 %r133, %r138
-%r140 = zext i128 %r139 to i160
-%r142 = getelementptr i32, i32* %r3, i32 4
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
 %r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i160
-%r145 = shl i160 %r144, 128
-%r146 = or i160 %r140, %r145
-%r147 = zext i160 %r146 to i192
-%r149 = getelementptr i32, i32* %r3, i32 5
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
 %r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i192
-%r152 = shl i192 %r151, 160
-%r153 = or i192 %r147, %r152
-%r154 = zext i192 %r153 to i224
-%r156 = getelementptr i32, i32* %r3, i32 6
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
 %r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i224
-%r159 = shl i224 %r158, 192
-%r160 = or i224 %r154, %r159
-%r161 = zext i224 %r160 to i256
-%r163 = getelementptr i32, i32* %r3, i32 7
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
 %r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i256
-%r166 = shl i256 %r165, 224
-%r167 = or i256 %r161, %r166
-%r168 = zext i256 %r167 to i288
-%r170 = getelementptr i32, i32* %r3, i32 8
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
 %r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i288
-%r173 = shl i288 %r172, 256
-%r174 = or i288 %r168, %r173
-%r175 = zext i288 %r174 to i320
-%r177 = getelementptr i32, i32* %r3, i32 9
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
 %r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i320
-%r180 = shl i320 %r179, 288
-%r181 = or i320 %r175, %r180
-%r182 = zext i320 %r181 to i352
-%r184 = getelementptr i32, i32* %r3, i32 10
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
 %r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i352
-%r187 = shl i352 %r186, 320
-%r188 = or i352 %r182, %r187
-%r189 = zext i352 %r188 to i384
-%r191 = getelementptr i32, i32* %r3, i32 11
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
 %r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i384
-%r194 = shl i384 %r193, 352
-%r195 = or i384 %r189, %r194
-%r196 = zext i384 %r195 to i416
-%r198 = getelementptr i32, i32* %r3, i32 12
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
 %r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i416
-%r201 = shl i416 %r200, 384
-%r202 = or i416 %r196, %r201
-%r203 = zext i416 %r202 to i448
-%r205 = getelementptr i32, i32* %r3, i32 13
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
 %r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i448
-%r208 = shl i448 %r207, 416
-%r209 = or i448 %r203, %r208
-%r210 = zext i448 %r209 to i480
-%r212 = getelementptr i32, i32* %r3, i32 14
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
 %r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i480
-%r215 = shl i480 %r214, 448
-%r216 = or i480 %r210, %r215
-%r217 = zext i480 %r216 to i512
-%r219 = getelementptr i32, i32* %r3, i32 15
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i512
-%r222 = shl i512 %r221, 480
-%r223 = or i512 %r217, %r222
-%r224 = zext i512 %r223 to i544
-%r226 = getelementptr i32, i32* %r3, i32 16
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i544
-%r229 = shl i544 %r228, 512
-%r230 = or i544 %r224, %r229
-%r231 = zext i544 %r117 to i576
-%r232 = zext i544 %r230 to i576
-%r233 = add i576 %r231, %r232
-%r234 = trunc i576 %r233 to i544
-%r235 = trunc i544 %r234 to i32
-%r237 = getelementptr i32, i32* %r1, i32 0
-store i32 %r235, i32* %r237
-%r238 = lshr i544 %r234, 32
-%r239 = trunc i544 %r238 to i32
-%r241 = getelementptr i32, i32* %r1, i32 1
-store i32 %r239, i32* %r241
-%r242 = lshr i544 %r238, 32
-%r243 = trunc i544 %r242 to i32
-%r245 = getelementptr i32, i32* %r1, i32 2
-store i32 %r243, i32* %r245
-%r246 = lshr i544 %r242, 32
-%r247 = trunc i544 %r246 to i32
-%r249 = getelementptr i32, i32* %r1, i32 3
-store i32 %r247, i32* %r249
-%r250 = lshr i544 %r246, 32
-%r251 = trunc i544 %r250 to i32
-%r253 = getelementptr i32, i32* %r1, i32 4
-store i32 %r251, i32* %r253
-%r254 = lshr i544 %r250, 32
-%r255 = trunc i544 %r254 to i32
-%r257 = getelementptr i32, i32* %r1, i32 5
-store i32 %r255, i32* %r257
-%r258 = lshr i544 %r254, 32
-%r259 = trunc i544 %r258 to i32
-%r261 = getelementptr i32, i32* %r1, i32 6
-store i32 %r259, i32* %r261
-%r262 = lshr i544 %r258, 32
-%r263 = trunc i544 %r262 to i32
-%r265 = getelementptr i32, i32* %r1, i32 7
-store i32 %r263, i32* %r265
-%r266 = lshr i544 %r262, 32
-%r267 = trunc i544 %r266 to i32
-%r269 = getelementptr i32, i32* %r1, i32 8
-store i32 %r267, i32* %r269
-%r270 = lshr i544 %r266, 32
-%r271 = trunc i544 %r270 to i32
-%r273 = getelementptr i32, i32* %r1, i32 9
-store i32 %r271, i32* %r273
-%r274 = lshr i544 %r270, 32
-%r275 = trunc i544 %r274 to i32
-%r277 = getelementptr i32, i32* %r1, i32 10
-store i32 %r275, i32* %r277
-%r278 = lshr i544 %r274, 32
-%r279 = trunc i544 %r278 to i32
-%r281 = getelementptr i32, i32* %r1, i32 11
-store i32 %r279, i32* %r281
-%r282 = lshr i544 %r278, 32
-%r283 = trunc i544 %r282 to i32
-%r285 = getelementptr i32, i32* %r1, i32 12
-store i32 %r283, i32* %r285
-%r286 = lshr i544 %r282, 32
-%r287 = trunc i544 %r286 to i32
-%r289 = getelementptr i32, i32* %r1, i32 13
-store i32 %r287, i32* %r289
-%r290 = lshr i544 %r286, 32
-%r291 = trunc i544 %r290 to i32
-%r293 = getelementptr i32, i32* %r1, i32 14
-store i32 %r291, i32* %r293
-%r294 = lshr i544 %r290, 32
-%r295 = trunc i544 %r294 to i32
-%r297 = getelementptr i32, i32* %r1, i32 15
-store i32 %r295, i32* %r297
-%r298 = lshr i544 %r294, 32
-%r299 = trunc i544 %r298 to i32
-%r301 = getelementptr i32, i32* %r1, i32 16
-store i32 %r299, i32* %r301
-%r302 = load i32, i32* %r4
-%r303 = zext i32 %r302 to i64
-%r305 = getelementptr i32, i32* %r4, i32 1
-%r306 = load i32, i32* %r305
-%r307 = zext i32 %r306 to i64
-%r308 = shl i64 %r307, 32
-%r309 = or i64 %r303, %r308
-%r310 = zext i64 %r309 to i96
-%r312 = getelementptr i32, i32* %r4, i32 2
-%r313 = load i32, i32* %r312
-%r314 = zext i32 %r313 to i96
-%r315 = shl i96 %r314, 64
-%r316 = or i96 %r310, %r315
-%r317 = zext i96 %r316 to i128
-%r319 = getelementptr i32, i32* %r4, i32 3
-%r320 = load i32, i32* %r319
-%r321 = zext i32 %r320 to i128
-%r322 = shl i128 %r321, 96
-%r323 = or i128 %r317, %r322
-%r324 = zext i128 %r323 to i160
-%r326 = getelementptr i32, i32* %r4, i32 4
-%r327 = load i32, i32* %r326
-%r328 = zext i32 %r327 to i160
-%r329 = shl i160 %r328, 128
-%r330 = or i160 %r324, %r329
-%r331 = zext i160 %r330 to i192
-%r333 = getelementptr i32, i32* %r4, i32 5
-%r334 = load i32, i32* %r333
-%r335 = zext i32 %r334 to i192
-%r336 = shl i192 %r335, 160
-%r337 = or i192 %r331, %r336
-%r338 = zext i192 %r337 to i224
-%r340 = getelementptr i32, i32* %r4, i32 6
-%r341 = load i32, i32* %r340
-%r342 = zext i32 %r341 to i224
-%r343 = shl i224 %r342, 192
-%r344 = or i224 %r338, %r343
-%r345 = zext i224 %r344 to i256
-%r347 = getelementptr i32, i32* %r4, i32 7
-%r348 = load i32, i32* %r347
-%r349 = zext i32 %r348 to i256
-%r350 = shl i256 %r349, 224
-%r351 = or i256 %r345, %r350
-%r352 = zext i256 %r351 to i288
-%r354 = getelementptr i32, i32* %r4, i32 8
-%r355 = load i32, i32* %r354
-%r356 = zext i32 %r355 to i288
-%r357 = shl i288 %r356, 256
-%r358 = or i288 %r352, %r357
-%r359 = zext i288 %r358 to i320
-%r361 = getelementptr i32, i32* %r4, i32 9
-%r362 = load i32, i32* %r361
-%r363 = zext i32 %r362 to i320
-%r364 = shl i320 %r363, 288
-%r365 = or i320 %r359, %r364
-%r366 = zext i320 %r365 to i352
-%r368 = getelementptr i32, i32* %r4, i32 10
-%r369 = load i32, i32* %r368
-%r370 = zext i32 %r369 to i352
-%r371 = shl i352 %r370, 320
-%r372 = or i352 %r366, %r371
-%r373 = zext i352 %r372 to i384
-%r375 = getelementptr i32, i32* %r4, i32 11
-%r376 = load i32, i32* %r375
-%r377 = zext i32 %r376 to i384
-%r378 = shl i384 %r377, 352
-%r379 = or i384 %r373, %r378
-%r380 = zext i384 %r379 to i416
-%r382 = getelementptr i32, i32* %r4, i32 12
-%r383 = load i32, i32* %r382
-%r384 = zext i32 %r383 to i416
-%r385 = shl i416 %r384, 384
-%r386 = or i416 %r380, %r385
-%r387 = zext i416 %r386 to i448
-%r389 = getelementptr i32, i32* %r4, i32 13
-%r390 = load i32, i32* %r389
-%r391 = zext i32 %r390 to i448
-%r392 = shl i448 %r391, 416
-%r393 = or i448 %r387, %r392
-%r394 = zext i448 %r393 to i480
-%r396 = getelementptr i32, i32* %r4, i32 14
-%r397 = load i32, i32* %r396
-%r398 = zext i32 %r397 to i480
-%r399 = shl i480 %r398, 448
-%r400 = or i480 %r394, %r399
-%r401 = zext i480 %r400 to i512
-%r403 = getelementptr i32, i32* %r4, i32 15
-%r404 = load i32, i32* %r403
-%r405 = zext i32 %r404 to i512
-%r406 = shl i512 %r405, 480
-%r407 = or i512 %r401, %r406
-%r408 = zext i512 %r407 to i544
-%r410 = getelementptr i32, i32* %r4, i32 16
-%r411 = load i32, i32* %r410
-%r412 = zext i32 %r411 to i544
-%r413 = shl i544 %r412, 512
-%r414 = or i544 %r408, %r413
-%r415 = zext i544 %r414 to i576
-%r416 = sub i576 %r233, %r415
-%r417 = lshr i576 %r416, 544
-%r418 = trunc i576 %r417 to i1
-br i1%r418, label %carry, label %nocarry
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = zext i512 %r110 to i544
+%r218 = zext i512 %r216 to i544
+%r219 = add i544 %r217, %r218
+%r220 = trunc i544 %r219 to i512
+%r222 = getelementptr i32, i32* %r1, i32 0
+%r223 = trunc i512 %r220 to i32
+store i32 %r223, i32* %r222
+%r224 = lshr i512 %r220, 32
+%r226 = getelementptr i32, i32* %r1, i32 1
+%r227 = trunc i512 %r224 to i32
+store i32 %r227, i32* %r226
+%r228 = lshr i512 %r224, 32
+%r230 = getelementptr i32, i32* %r1, i32 2
+%r231 = trunc i512 %r228 to i32
+store i32 %r231, i32* %r230
+%r232 = lshr i512 %r228, 32
+%r234 = getelementptr i32, i32* %r1, i32 3
+%r235 = trunc i512 %r232 to i32
+store i32 %r235, i32* %r234
+%r236 = lshr i512 %r232, 32
+%r238 = getelementptr i32, i32* %r1, i32 4
+%r239 = trunc i512 %r236 to i32
+store i32 %r239, i32* %r238
+%r240 = lshr i512 %r236, 32
+%r242 = getelementptr i32, i32* %r1, i32 5
+%r243 = trunc i512 %r240 to i32
+store i32 %r243, i32* %r242
+%r244 = lshr i512 %r240, 32
+%r246 = getelementptr i32, i32* %r1, i32 6
+%r247 = trunc i512 %r244 to i32
+store i32 %r247, i32* %r246
+%r248 = lshr i512 %r244, 32
+%r250 = getelementptr i32, i32* %r1, i32 7
+%r251 = trunc i512 %r248 to i32
+store i32 %r251, i32* %r250
+%r252 = lshr i512 %r248, 32
+%r254 = getelementptr i32, i32* %r1, i32 8
+%r255 = trunc i512 %r252 to i32
+store i32 %r255, i32* %r254
+%r256 = lshr i512 %r252, 32
+%r258 = getelementptr i32, i32* %r1, i32 9
+%r259 = trunc i512 %r256 to i32
+store i32 %r259, i32* %r258
+%r260 = lshr i512 %r256, 32
+%r262 = getelementptr i32, i32* %r1, i32 10
+%r263 = trunc i512 %r260 to i32
+store i32 %r263, i32* %r262
+%r264 = lshr i512 %r260, 32
+%r266 = getelementptr i32, i32* %r1, i32 11
+%r267 = trunc i512 %r264 to i32
+store i32 %r267, i32* %r266
+%r268 = lshr i512 %r264, 32
+%r270 = getelementptr i32, i32* %r1, i32 12
+%r271 = trunc i512 %r268 to i32
+store i32 %r271, i32* %r270
+%r272 = lshr i512 %r268, 32
+%r274 = getelementptr i32, i32* %r1, i32 13
+%r275 = trunc i512 %r272 to i32
+store i32 %r275, i32* %r274
+%r276 = lshr i512 %r272, 32
+%r278 = getelementptr i32, i32* %r1, i32 14
+%r279 = trunc i512 %r276 to i32
+store i32 %r279, i32* %r278
+%r280 = lshr i512 %r276, 32
+%r282 = getelementptr i32, i32* %r1, i32 15
+%r283 = trunc i512 %r280 to i32
+store i32 %r283, i32* %r282
+%r284 = load i32, i32* %r4
+%r285 = zext i32 %r284 to i64
+%r287 = getelementptr i32, i32* %r4, i32 1
+%r288 = load i32, i32* %r287
+%r289 = zext i32 %r288 to i64
+%r290 = shl i64 %r289, 32
+%r291 = or i64 %r285, %r290
+%r292 = zext i64 %r291 to i96
+%r294 = getelementptr i32, i32* %r4, i32 2
+%r295 = load i32, i32* %r294
+%r296 = zext i32 %r295 to i96
+%r297 = shl i96 %r296, 64
+%r298 = or i96 %r292, %r297
+%r299 = zext i96 %r298 to i128
+%r301 = getelementptr i32, i32* %r4, i32 3
+%r302 = load i32, i32* %r301
+%r303 = zext i32 %r302 to i128
+%r304 = shl i128 %r303, 96
+%r305 = or i128 %r299, %r304
+%r306 = zext i128 %r305 to i160
+%r308 = getelementptr i32, i32* %r4, i32 4
+%r309 = load i32, i32* %r308
+%r310 = zext i32 %r309 to i160
+%r311 = shl i160 %r310, 128
+%r312 = or i160 %r306, %r311
+%r313 = zext i160 %r312 to i192
+%r315 = getelementptr i32, i32* %r4, i32 5
+%r316 = load i32, i32* %r315
+%r317 = zext i32 %r316 to i192
+%r318 = shl i192 %r317, 160
+%r319 = or i192 %r313, %r318
+%r320 = zext i192 %r319 to i224
+%r322 = getelementptr i32, i32* %r4, i32 6
+%r323 = load i32, i32* %r322
+%r324 = zext i32 %r323 to i224
+%r325 = shl i224 %r324, 192
+%r326 = or i224 %r320, %r325
+%r327 = zext i224 %r326 to i256
+%r329 = getelementptr i32, i32* %r4, i32 7
+%r330 = load i32, i32* %r329
+%r331 = zext i32 %r330 to i256
+%r332 = shl i256 %r331, 224
+%r333 = or i256 %r327, %r332
+%r334 = zext i256 %r333 to i288
+%r336 = getelementptr i32, i32* %r4, i32 8
+%r337 = load i32, i32* %r336
+%r338 = zext i32 %r337 to i288
+%r339 = shl i288 %r338, 256
+%r340 = or i288 %r334, %r339
+%r341 = zext i288 %r340 to i320
+%r343 = getelementptr i32, i32* %r4, i32 9
+%r344 = load i32, i32* %r343
+%r345 = zext i32 %r344 to i320
+%r346 = shl i320 %r345, 288
+%r347 = or i320 %r341, %r346
+%r348 = zext i320 %r347 to i352
+%r350 = getelementptr i32, i32* %r4, i32 10
+%r351 = load i32, i32* %r350
+%r352 = zext i32 %r351 to i352
+%r353 = shl i352 %r352, 320
+%r354 = or i352 %r348, %r353
+%r355 = zext i352 %r354 to i384
+%r357 = getelementptr i32, i32* %r4, i32 11
+%r358 = load i32, i32* %r357
+%r359 = zext i32 %r358 to i384
+%r360 = shl i384 %r359, 352
+%r361 = or i384 %r355, %r360
+%r362 = zext i384 %r361 to i416
+%r364 = getelementptr i32, i32* %r4, i32 12
+%r365 = load i32, i32* %r364
+%r366 = zext i32 %r365 to i416
+%r367 = shl i416 %r366, 384
+%r368 = or i416 %r362, %r367
+%r369 = zext i416 %r368 to i448
+%r371 = getelementptr i32, i32* %r4, i32 13
+%r372 = load i32, i32* %r371
+%r373 = zext i32 %r372 to i448
+%r374 = shl i448 %r373, 416
+%r375 = or i448 %r369, %r374
+%r376 = zext i448 %r375 to i480
+%r378 = getelementptr i32, i32* %r4, i32 14
+%r379 = load i32, i32* %r378
+%r380 = zext i32 %r379 to i480
+%r381 = shl i480 %r380, 448
+%r382 = or i480 %r376, %r381
+%r383 = zext i480 %r382 to i512
+%r385 = getelementptr i32, i32* %r4, i32 15
+%r386 = load i32, i32* %r385
+%r387 = zext i32 %r386 to i512
+%r388 = shl i512 %r387, 480
+%r389 = or i512 %r383, %r388
+%r390 = zext i512 %r389 to i544
+%r391 = sub i544 %r219, %r390
+%r392 = lshr i544 %r391, 512
+%r393 = trunc i544 %r392 to i1
+br i1%r393, label %carry, label %nocarry
 nocarry:
-%r419 = trunc i576 %r416 to i544
-%r420 = trunc i544 %r419 to i32
-%r422 = getelementptr i32, i32* %r1, i32 0
-store i32 %r420, i32* %r422
-%r423 = lshr i544 %r419, 32
-%r424 = trunc i544 %r423 to i32
-%r426 = getelementptr i32, i32* %r1, i32 1
-store i32 %r424, i32* %r426
-%r427 = lshr i544 %r423, 32
-%r428 = trunc i544 %r427 to i32
-%r430 = getelementptr i32, i32* %r1, i32 2
-store i32 %r428, i32* %r430
-%r431 = lshr i544 %r427, 32
-%r432 = trunc i544 %r431 to i32
-%r434 = getelementptr i32, i32* %r1, i32 3
-store i32 %r432, i32* %r434
-%r435 = lshr i544 %r431, 32
-%r436 = trunc i544 %r435 to i32
-%r438 = getelementptr i32, i32* %r1, i32 4
-store i32 %r436, i32* %r438
-%r439 = lshr i544 %r435, 32
-%r440 = trunc i544 %r439 to i32
-%r442 = getelementptr i32, i32* %r1, i32 5
-store i32 %r440, i32* %r442
-%r443 = lshr i544 %r439, 32
-%r444 = trunc i544 %r443 to i32
-%r446 = getelementptr i32, i32* %r1, i32 6
-store i32 %r444, i32* %r446
-%r447 = lshr i544 %r443, 32
-%r448 = trunc i544 %r447 to i32
-%r450 = getelementptr i32, i32* %r1, i32 7
-store i32 %r448, i32* %r450
-%r451 = lshr i544 %r447, 32
-%r452 = trunc i544 %r451 to i32
-%r454 = getelementptr i32, i32* %r1, i32 8
-store i32 %r452, i32* %r454
-%r455 = lshr i544 %r451, 32
-%r456 = trunc i544 %r455 to i32
-%r458 = getelementptr i32, i32* %r1, i32 9
-store i32 %r456, i32* %r458
-%r459 = lshr i544 %r455, 32
-%r460 = trunc i544 %r459 to i32
-%r462 = getelementptr i32, i32* %r1, i32 10
-store i32 %r460, i32* %r462
-%r463 = lshr i544 %r459, 32
-%r464 = trunc i544 %r463 to i32
-%r466 = getelementptr i32, i32* %r1, i32 11
-store i32 %r464, i32* %r466
-%r467 = lshr i544 %r463, 32
-%r468 = trunc i544 %r467 to i32
-%r470 = getelementptr i32, i32* %r1, i32 12
-store i32 %r468, i32* %r470
-%r471 = lshr i544 %r467, 32
-%r472 = trunc i544 %r471 to i32
-%r474 = getelementptr i32, i32* %r1, i32 13
-store i32 %r472, i32* %r474
-%r475 = lshr i544 %r471, 32
-%r476 = trunc i544 %r475 to i32
-%r478 = getelementptr i32, i32* %r1, i32 14
-store i32 %r476, i32* %r478
-%r479 = lshr i544 %r475, 32
-%r480 = trunc i544 %r479 to i32
-%r482 = getelementptr i32, i32* %r1, i32 15
-store i32 %r480, i32* %r482
-%r483 = lshr i544 %r479, 32
-%r484 = trunc i544 %r483 to i32
-%r486 = getelementptr i32, i32* %r1, i32 16
-store i32 %r484, i32* %r486
+%r394 = trunc i544 %r391 to i512
+%r396 = getelementptr i32, i32* %r1, i32 0
+%r397 = trunc i512 %r394 to i32
+store i32 %r397, i32* %r396
+%r398 = lshr i512 %r394, 32
+%r400 = getelementptr i32, i32* %r1, i32 1
+%r401 = trunc i512 %r398 to i32
+store i32 %r401, i32* %r400
+%r402 = lshr i512 %r398, 32
+%r404 = getelementptr i32, i32* %r1, i32 2
+%r405 = trunc i512 %r402 to i32
+store i32 %r405, i32* %r404
+%r406 = lshr i512 %r402, 32
+%r408 = getelementptr i32, i32* %r1, i32 3
+%r409 = trunc i512 %r406 to i32
+store i32 %r409, i32* %r408
+%r410 = lshr i512 %r406, 32
+%r412 = getelementptr i32, i32* %r1, i32 4
+%r413 = trunc i512 %r410 to i32
+store i32 %r413, i32* %r412
+%r414 = lshr i512 %r410, 32
+%r416 = getelementptr i32, i32* %r1, i32 5
+%r417 = trunc i512 %r414 to i32
+store i32 %r417, i32* %r416
+%r418 = lshr i512 %r414, 32
+%r420 = getelementptr i32, i32* %r1, i32 6
+%r421 = trunc i512 %r418 to i32
+store i32 %r421, i32* %r420
+%r422 = lshr i512 %r418, 32
+%r424 = getelementptr i32, i32* %r1, i32 7
+%r425 = trunc i512 %r422 to i32
+store i32 %r425, i32* %r424
+%r426 = lshr i512 %r422, 32
+%r428 = getelementptr i32, i32* %r1, i32 8
+%r429 = trunc i512 %r426 to i32
+store i32 %r429, i32* %r428
+%r430 = lshr i512 %r426, 32
+%r432 = getelementptr i32, i32* %r1, i32 9
+%r433 = trunc i512 %r430 to i32
+store i32 %r433, i32* %r432
+%r434 = lshr i512 %r430, 32
+%r436 = getelementptr i32, i32* %r1, i32 10
+%r437 = trunc i512 %r434 to i32
+store i32 %r437, i32* %r436
+%r438 = lshr i512 %r434, 32
+%r440 = getelementptr i32, i32* %r1, i32 11
+%r441 = trunc i512 %r438 to i32
+store i32 %r441, i32* %r440
+%r442 = lshr i512 %r438, 32
+%r444 = getelementptr i32, i32* %r1, i32 12
+%r445 = trunc i512 %r442 to i32
+store i32 %r445, i32* %r444
+%r446 = lshr i512 %r442, 32
+%r448 = getelementptr i32, i32* %r1, i32 13
+%r449 = trunc i512 %r446 to i32
+store i32 %r449, i32* %r448
+%r450 = lshr i512 %r446, 32
+%r452 = getelementptr i32, i32* %r1, i32 14
+%r453 = trunc i512 %r450 to i32
+store i32 %r453, i32* %r452
+%r454 = lshr i512 %r450, 32
+%r456 = getelementptr i32, i32* %r1, i32 15
+%r457 = trunc i512 %r454 to i32
+store i32 %r457, i32* %r456
 ret void
 carry:
 ret void
 }
-define void @mcl_fp_addNF17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_addNF16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -51882,281 +18240,259 @@ define void @mcl_fp_addNF17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r108 = zext i32 %r107 to i512
 %r109 = shl i512 %r108, 480
 %r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = load i32, i32* %r3
-%r119 = zext i32 %r118 to i64
-%r121 = getelementptr i32, i32* %r3, i32 1
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
 %r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i64
-%r124 = shl i64 %r123, 32
-%r125 = or i64 %r119, %r124
-%r126 = zext i64 %r125 to i96
-%r128 = getelementptr i32, i32* %r3, i32 2
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
 %r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i96
-%r131 = shl i96 %r130, 64
-%r132 = or i96 %r126, %r131
-%r133 = zext i96 %r132 to i128
-%r135 = getelementptr i32, i32* %r3, i32 3
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
 %r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i128
-%r138 = shl i128 %r137, 96
-%r139 = or i128 %r133, %r138
-%r140 = zext i128 %r139 to i160
-%r142 = getelementptr i32, i32* %r3, i32 4
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
 %r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i160
-%r145 = shl i160 %r144, 128
-%r146 = or i160 %r140, %r145
-%r147 = zext i160 %r146 to i192
-%r149 = getelementptr i32, i32* %r3, i32 5
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
 %r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i192
-%r152 = shl i192 %r151, 160
-%r153 = or i192 %r147, %r152
-%r154 = zext i192 %r153 to i224
-%r156 = getelementptr i32, i32* %r3, i32 6
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
 %r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i224
-%r159 = shl i224 %r158, 192
-%r160 = or i224 %r154, %r159
-%r161 = zext i224 %r160 to i256
-%r163 = getelementptr i32, i32* %r3, i32 7
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
 %r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i256
-%r166 = shl i256 %r165, 224
-%r167 = or i256 %r161, %r166
-%r168 = zext i256 %r167 to i288
-%r170 = getelementptr i32, i32* %r3, i32 8
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
 %r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i288
-%r173 = shl i288 %r172, 256
-%r174 = or i288 %r168, %r173
-%r175 = zext i288 %r174 to i320
-%r177 = getelementptr i32, i32* %r3, i32 9
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
 %r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i320
-%r180 = shl i320 %r179, 288
-%r181 = or i320 %r175, %r180
-%r182 = zext i320 %r181 to i352
-%r184 = getelementptr i32, i32* %r3, i32 10
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
 %r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i352
-%r187 = shl i352 %r186, 320
-%r188 = or i352 %r182, %r187
-%r189 = zext i352 %r188 to i384
-%r191 = getelementptr i32, i32* %r3, i32 11
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
 %r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i384
-%r194 = shl i384 %r193, 352
-%r195 = or i384 %r189, %r194
-%r196 = zext i384 %r195 to i416
-%r198 = getelementptr i32, i32* %r3, i32 12
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
 %r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i416
-%r201 = shl i416 %r200, 384
-%r202 = or i416 %r196, %r201
-%r203 = zext i416 %r202 to i448
-%r205 = getelementptr i32, i32* %r3, i32 13
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
 %r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i448
-%r208 = shl i448 %r207, 416
-%r209 = or i448 %r203, %r208
-%r210 = zext i448 %r209 to i480
-%r212 = getelementptr i32, i32* %r3, i32 14
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
 %r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i480
-%r215 = shl i480 %r214, 448
-%r216 = or i480 %r210, %r215
-%r217 = zext i480 %r216 to i512
-%r219 = getelementptr i32, i32* %r3, i32 15
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i512
-%r222 = shl i512 %r221, 480
-%r223 = or i512 %r217, %r222
-%r224 = zext i512 %r223 to i544
-%r226 = getelementptr i32, i32* %r3, i32 16
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i544
-%r229 = shl i544 %r228, 512
-%r230 = or i544 %r224, %r229
-%r231 = add i544 %r117, %r230
-%r232 = load i32, i32* %r4
-%r233 = zext i32 %r232 to i64
-%r235 = getelementptr i32, i32* %r4, i32 1
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = add i512 %r110, %r216
+%r218 = load i32, i32* %r4
+%r219 = zext i32 %r218 to i64
+%r221 = getelementptr i32, i32* %r4, i32 1
+%r222 = load i32, i32* %r221
+%r223 = zext i32 %r222 to i64
+%r224 = shl i64 %r223, 32
+%r225 = or i64 %r219, %r224
+%r226 = zext i64 %r225 to i96
+%r228 = getelementptr i32, i32* %r4, i32 2
+%r229 = load i32, i32* %r228
+%r230 = zext i32 %r229 to i96
+%r231 = shl i96 %r230, 64
+%r232 = or i96 %r226, %r231
+%r233 = zext i96 %r232 to i128
+%r235 = getelementptr i32, i32* %r4, i32 3
 %r236 = load i32, i32* %r235
-%r237 = zext i32 %r236 to i64
-%r238 = shl i64 %r237, 32
-%r239 = or i64 %r233, %r238
-%r240 = zext i64 %r239 to i96
-%r242 = getelementptr i32, i32* %r4, i32 2
+%r237 = zext i32 %r236 to i128
+%r238 = shl i128 %r237, 96
+%r239 = or i128 %r233, %r238
+%r240 = zext i128 %r239 to i160
+%r242 = getelementptr i32, i32* %r4, i32 4
 %r243 = load i32, i32* %r242
-%r244 = zext i32 %r243 to i96
-%r245 = shl i96 %r244, 64
-%r246 = or i96 %r240, %r245
-%r247 = zext i96 %r246 to i128
-%r249 = getelementptr i32, i32* %r4, i32 3
+%r244 = zext i32 %r243 to i160
+%r245 = shl i160 %r244, 128
+%r246 = or i160 %r240, %r245
+%r247 = zext i160 %r246 to i192
+%r249 = getelementptr i32, i32* %r4, i32 5
 %r250 = load i32, i32* %r249
-%r251 = zext i32 %r250 to i128
-%r252 = shl i128 %r251, 96
-%r253 = or i128 %r247, %r252
-%r254 = zext i128 %r253 to i160
-%r256 = getelementptr i32, i32* %r4, i32 4
+%r251 = zext i32 %r250 to i192
+%r252 = shl i192 %r251, 160
+%r253 = or i192 %r247, %r252
+%r254 = zext i192 %r253 to i224
+%r256 = getelementptr i32, i32* %r4, i32 6
 %r257 = load i32, i32* %r256
-%r258 = zext i32 %r257 to i160
-%r259 = shl i160 %r258, 128
-%r260 = or i160 %r254, %r259
-%r261 = zext i160 %r260 to i192
-%r263 = getelementptr i32, i32* %r4, i32 5
+%r258 = zext i32 %r257 to i224
+%r259 = shl i224 %r258, 192
+%r260 = or i224 %r254, %r259
+%r261 = zext i224 %r260 to i256
+%r263 = getelementptr i32, i32* %r4, i32 7
 %r264 = load i32, i32* %r263
-%r265 = zext i32 %r264 to i192
-%r266 = shl i192 %r265, 160
-%r267 = or i192 %r261, %r266
-%r268 = zext i192 %r267 to i224
-%r270 = getelementptr i32, i32* %r4, i32 6
+%r265 = zext i32 %r264 to i256
+%r266 = shl i256 %r265, 224
+%r267 = or i256 %r261, %r266
+%r268 = zext i256 %r267 to i288
+%r270 = getelementptr i32, i32* %r4, i32 8
 %r271 = load i32, i32* %r270
-%r272 = zext i32 %r271 to i224
-%r273 = shl i224 %r272, 192
-%r274 = or i224 %r268, %r273
-%r275 = zext i224 %r274 to i256
-%r277 = getelementptr i32, i32* %r4, i32 7
+%r272 = zext i32 %r271 to i288
+%r273 = shl i288 %r272, 256
+%r274 = or i288 %r268, %r273
+%r275 = zext i288 %r274 to i320
+%r277 = getelementptr i32, i32* %r4, i32 9
 %r278 = load i32, i32* %r277
-%r279 = zext i32 %r278 to i256
-%r280 = shl i256 %r279, 224
-%r281 = or i256 %r275, %r280
-%r282 = zext i256 %r281 to i288
-%r284 = getelementptr i32, i32* %r4, i32 8
+%r279 = zext i32 %r278 to i320
+%r280 = shl i320 %r279, 288
+%r281 = or i320 %r275, %r280
+%r282 = zext i320 %r281 to i352
+%r284 = getelementptr i32, i32* %r4, i32 10
 %r285 = load i32, i32* %r284
-%r286 = zext i32 %r285 to i288
-%r287 = shl i288 %r286, 256
-%r288 = or i288 %r282, %r287
-%r289 = zext i288 %r288 to i320
-%r291 = getelementptr i32, i32* %r4, i32 9
+%r286 = zext i32 %r285 to i352
+%r287 = shl i352 %r286, 320
+%r288 = or i352 %r282, %r287
+%r289 = zext i352 %r288 to i384
+%r291 = getelementptr i32, i32* %r4, i32 11
 %r292 = load i32, i32* %r291
-%r293 = zext i32 %r292 to i320
-%r294 = shl i320 %r293, 288
-%r295 = or i320 %r289, %r294
-%r296 = zext i320 %r295 to i352
-%r298 = getelementptr i32, i32* %r4, i32 10
+%r293 = zext i32 %r292 to i384
+%r294 = shl i384 %r293, 352
+%r295 = or i384 %r289, %r294
+%r296 = zext i384 %r295 to i416
+%r298 = getelementptr i32, i32* %r4, i32 12
 %r299 = load i32, i32* %r298
-%r300 = zext i32 %r299 to i352
-%r301 = shl i352 %r300, 320
-%r302 = or i352 %r296, %r301
-%r303 = zext i352 %r302 to i384
-%r305 = getelementptr i32, i32* %r4, i32 11
+%r300 = zext i32 %r299 to i416
+%r301 = shl i416 %r300, 384
+%r302 = or i416 %r296, %r301
+%r303 = zext i416 %r302 to i448
+%r305 = getelementptr i32, i32* %r4, i32 13
 %r306 = load i32, i32* %r305
-%r307 = zext i32 %r306 to i384
-%r308 = shl i384 %r307, 352
-%r309 = or i384 %r303, %r308
-%r310 = zext i384 %r309 to i416
-%r312 = getelementptr i32, i32* %r4, i32 12
+%r307 = zext i32 %r306 to i448
+%r308 = shl i448 %r307, 416
+%r309 = or i448 %r303, %r308
+%r310 = zext i448 %r309 to i480
+%r312 = getelementptr i32, i32* %r4, i32 14
 %r313 = load i32, i32* %r312
-%r314 = zext i32 %r313 to i416
-%r315 = shl i416 %r314, 384
-%r316 = or i416 %r310, %r315
-%r317 = zext i416 %r316 to i448
-%r319 = getelementptr i32, i32* %r4, i32 13
+%r314 = zext i32 %r313 to i480
+%r315 = shl i480 %r314, 448
+%r316 = or i480 %r310, %r315
+%r317 = zext i480 %r316 to i512
+%r319 = getelementptr i32, i32* %r4, i32 15
 %r320 = load i32, i32* %r319
-%r321 = zext i32 %r320 to i448
-%r322 = shl i448 %r321, 416
-%r323 = or i448 %r317, %r322
-%r324 = zext i448 %r323 to i480
-%r326 = getelementptr i32, i32* %r4, i32 14
-%r327 = load i32, i32* %r326
-%r328 = zext i32 %r327 to i480
-%r329 = shl i480 %r328, 448
-%r330 = or i480 %r324, %r329
-%r331 = zext i480 %r330 to i512
-%r333 = getelementptr i32, i32* %r4, i32 15
-%r334 = load i32, i32* %r333
-%r335 = zext i32 %r334 to i512
-%r336 = shl i512 %r335, 480
-%r337 = or i512 %r331, %r336
-%r338 = zext i512 %r337 to i544
-%r340 = getelementptr i32, i32* %r4, i32 16
-%r341 = load i32, i32* %r340
-%r342 = zext i32 %r341 to i544
-%r343 = shl i544 %r342, 512
-%r344 = or i544 %r338, %r343
-%r345 = sub i544 %r231, %r344
-%r346 = lshr i544 %r345, 543
-%r347 = trunc i544 %r346 to i1
-%r348 = select i1 %r347, i544 %r231, i544 %r345
-%r349 = trunc i544 %r348 to i32
-%r351 = getelementptr i32, i32* %r1, i32 0
-store i32 %r349, i32* %r351
-%r352 = lshr i544 %r348, 32
-%r353 = trunc i544 %r352 to i32
-%r355 = getelementptr i32, i32* %r1, i32 1
-store i32 %r353, i32* %r355
-%r356 = lshr i544 %r352, 32
-%r357 = trunc i544 %r356 to i32
-%r359 = getelementptr i32, i32* %r1, i32 2
-store i32 %r357, i32* %r359
-%r360 = lshr i544 %r356, 32
-%r361 = trunc i544 %r360 to i32
-%r363 = getelementptr i32, i32* %r1, i32 3
-store i32 %r361, i32* %r363
-%r364 = lshr i544 %r360, 32
-%r365 = trunc i544 %r364 to i32
-%r367 = getelementptr i32, i32* %r1, i32 4
-store i32 %r365, i32* %r367
-%r368 = lshr i544 %r364, 32
-%r369 = trunc i544 %r368 to i32
-%r371 = getelementptr i32, i32* %r1, i32 5
-store i32 %r369, i32* %r371
-%r372 = lshr i544 %r368, 32
-%r373 = trunc i544 %r372 to i32
-%r375 = getelementptr i32, i32* %r1, i32 6
-store i32 %r373, i32* %r375
-%r376 = lshr i544 %r372, 32
-%r377 = trunc i544 %r376 to i32
-%r379 = getelementptr i32, i32* %r1, i32 7
-store i32 %r377, i32* %r379
-%r380 = lshr i544 %r376, 32
-%r381 = trunc i544 %r380 to i32
-%r383 = getelementptr i32, i32* %r1, i32 8
-store i32 %r381, i32* %r383
-%r384 = lshr i544 %r380, 32
-%r385 = trunc i544 %r384 to i32
-%r387 = getelementptr i32, i32* %r1, i32 9
-store i32 %r385, i32* %r387
-%r388 = lshr i544 %r384, 32
-%r389 = trunc i544 %r388 to i32
-%r391 = getelementptr i32, i32* %r1, i32 10
-store i32 %r389, i32* %r391
-%r392 = lshr i544 %r388, 32
-%r393 = trunc i544 %r392 to i32
-%r395 = getelementptr i32, i32* %r1, i32 11
-store i32 %r393, i32* %r395
-%r396 = lshr i544 %r392, 32
-%r397 = trunc i544 %r396 to i32
-%r399 = getelementptr i32, i32* %r1, i32 12
-store i32 %r397, i32* %r399
-%r400 = lshr i544 %r396, 32
-%r401 = trunc i544 %r400 to i32
-%r403 = getelementptr i32, i32* %r1, i32 13
-store i32 %r401, i32* %r403
-%r404 = lshr i544 %r400, 32
-%r405 = trunc i544 %r404 to i32
-%r407 = getelementptr i32, i32* %r1, i32 14
-store i32 %r405, i32* %r407
-%r408 = lshr i544 %r404, 32
-%r409 = trunc i544 %r408 to i32
-%r411 = getelementptr i32, i32* %r1, i32 15
-store i32 %r409, i32* %r411
-%r412 = lshr i544 %r408, 32
-%r413 = trunc i544 %r412 to i32
-%r415 = getelementptr i32, i32* %r1, i32 16
-store i32 %r413, i32* %r415
+%r321 = zext i32 %r320 to i512
+%r322 = shl i512 %r321, 480
+%r323 = or i512 %r317, %r322
+%r324 = sub i512 %r217, %r323
+%r325 = lshr i512 %r324, 511
+%r326 = trunc i512 %r325 to i1
+%r327 = select i1 %r326, i512 %r217, i512 %r324
+%r329 = getelementptr i32, i32* %r1, i32 0
+%r330 = trunc i512 %r327 to i32
+store i32 %r330, i32* %r329
+%r331 = lshr i512 %r327, 32
+%r333 = getelementptr i32, i32* %r1, i32 1
+%r334 = trunc i512 %r331 to i32
+store i32 %r334, i32* %r333
+%r335 = lshr i512 %r331, 32
+%r337 = getelementptr i32, i32* %r1, i32 2
+%r338 = trunc i512 %r335 to i32
+store i32 %r338, i32* %r337
+%r339 = lshr i512 %r335, 32
+%r341 = getelementptr i32, i32* %r1, i32 3
+%r342 = trunc i512 %r339 to i32
+store i32 %r342, i32* %r341
+%r343 = lshr i512 %r339, 32
+%r345 = getelementptr i32, i32* %r1, i32 4
+%r346 = trunc i512 %r343 to i32
+store i32 %r346, i32* %r345
+%r347 = lshr i512 %r343, 32
+%r349 = getelementptr i32, i32* %r1, i32 5
+%r350 = trunc i512 %r347 to i32
+store i32 %r350, i32* %r349
+%r351 = lshr i512 %r347, 32
+%r353 = getelementptr i32, i32* %r1, i32 6
+%r354 = trunc i512 %r351 to i32
+store i32 %r354, i32* %r353
+%r355 = lshr i512 %r351, 32
+%r357 = getelementptr i32, i32* %r1, i32 7
+%r358 = trunc i512 %r355 to i32
+store i32 %r358, i32* %r357
+%r359 = lshr i512 %r355, 32
+%r361 = getelementptr i32, i32* %r1, i32 8
+%r362 = trunc i512 %r359 to i32
+store i32 %r362, i32* %r361
+%r363 = lshr i512 %r359, 32
+%r365 = getelementptr i32, i32* %r1, i32 9
+%r366 = trunc i512 %r363 to i32
+store i32 %r366, i32* %r365
+%r367 = lshr i512 %r363, 32
+%r369 = getelementptr i32, i32* %r1, i32 10
+%r370 = trunc i512 %r367 to i32
+store i32 %r370, i32* %r369
+%r371 = lshr i512 %r367, 32
+%r373 = getelementptr i32, i32* %r1, i32 11
+%r374 = trunc i512 %r371 to i32
+store i32 %r374, i32* %r373
+%r375 = lshr i512 %r371, 32
+%r377 = getelementptr i32, i32* %r1, i32 12
+%r378 = trunc i512 %r375 to i32
+store i32 %r378, i32* %r377
+%r379 = lshr i512 %r375, 32
+%r381 = getelementptr i32, i32* %r1, i32 13
+%r382 = trunc i512 %r379 to i32
+store i32 %r382, i32* %r381
+%r383 = lshr i512 %r379, 32
+%r385 = getelementptr i32, i32* %r1, i32 14
+%r386 = trunc i512 %r383 to i32
+store i32 %r386, i32* %r385
+%r387 = lshr i512 %r383, 32
+%r389 = getelementptr i32, i32* %r1, i32 15
+%r390 = trunc i512 %r387 to i32
+store i32 %r390, i32* %r389
 ret void
 }
-define void @mcl_fp_sub17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_sub16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -52249,354 +18585,328 @@ define void @mcl_fp_sub17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %
 %r108 = zext i32 %r107 to i512
 %r109 = shl i512 %r108, 480
 %r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = load i32, i32* %r3
-%r119 = zext i32 %r118 to i64
-%r121 = getelementptr i32, i32* %r3, i32 1
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
 %r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i64
-%r124 = shl i64 %r123, 32
-%r125 = or i64 %r119, %r124
-%r126 = zext i64 %r125 to i96
-%r128 = getelementptr i32, i32* %r3, i32 2
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
 %r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i96
-%r131 = shl i96 %r130, 64
-%r132 = or i96 %r126, %r131
-%r133 = zext i96 %r132 to i128
-%r135 = getelementptr i32, i32* %r3, i32 3
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
 %r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i128
-%r138 = shl i128 %r137, 96
-%r139 = or i128 %r133, %r138
-%r140 = zext i128 %r139 to i160
-%r142 = getelementptr i32, i32* %r3, i32 4
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
 %r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i160
-%r145 = shl i160 %r144, 128
-%r146 = or i160 %r140, %r145
-%r147 = zext i160 %r146 to i192
-%r149 = getelementptr i32, i32* %r3, i32 5
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
 %r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i192
-%r152 = shl i192 %r151, 160
-%r153 = or i192 %r147, %r152
-%r154 = zext i192 %r153 to i224
-%r156 = getelementptr i32, i32* %r3, i32 6
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
 %r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i224
-%r159 = shl i224 %r158, 192
-%r160 = or i224 %r154, %r159
-%r161 = zext i224 %r160 to i256
-%r163 = getelementptr i32, i32* %r3, i32 7
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
 %r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i256
-%r166 = shl i256 %r165, 224
-%r167 = or i256 %r161, %r166
-%r168 = zext i256 %r167 to i288
-%r170 = getelementptr i32, i32* %r3, i32 8
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
 %r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i288
-%r173 = shl i288 %r172, 256
-%r174 = or i288 %r168, %r173
-%r175 = zext i288 %r174 to i320
-%r177 = getelementptr i32, i32* %r3, i32 9
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
 %r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i320
-%r180 = shl i320 %r179, 288
-%r181 = or i320 %r175, %r180
-%r182 = zext i320 %r181 to i352
-%r184 = getelementptr i32, i32* %r3, i32 10
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
 %r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i352
-%r187 = shl i352 %r186, 320
-%r188 = or i352 %r182, %r187
-%r189 = zext i352 %r188 to i384
-%r191 = getelementptr i32, i32* %r3, i32 11
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
 %r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i384
-%r194 = shl i384 %r193, 352
-%r195 = or i384 %r189, %r194
-%r196 = zext i384 %r195 to i416
-%r198 = getelementptr i32, i32* %r3, i32 12
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
 %r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i416
-%r201 = shl i416 %r200, 384
-%r202 = or i416 %r196, %r201
-%r203 = zext i416 %r202 to i448
-%r205 = getelementptr i32, i32* %r3, i32 13
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
 %r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i448
-%r208 = shl i448 %r207, 416
-%r209 = or i448 %r203, %r208
-%r210 = zext i448 %r209 to i480
-%r212 = getelementptr i32, i32* %r3, i32 14
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
 %r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i480
-%r215 = shl i480 %r214, 448
-%r216 = or i480 %r210, %r215
-%r217 = zext i480 %r216 to i512
-%r219 = getelementptr i32, i32* %r3, i32 15
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i512
-%r222 = shl i512 %r221, 480
-%r223 = or i512 %r217, %r222
-%r224 = zext i512 %r223 to i544
-%r226 = getelementptr i32, i32* %r3, i32 16
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i544
-%r229 = shl i544 %r228, 512
-%r230 = or i544 %r224, %r229
-%r231 = zext i544 %r117 to i576
-%r232 = zext i544 %r230 to i576
-%r233 = sub i576 %r231, %r232
-%r234 = trunc i576 %r233 to i544
-%r235 = lshr i576 %r233, 544
-%r236 = trunc i576 %r235 to i1
-%r237 = trunc i544 %r234 to i32
-%r239 = getelementptr i32, i32* %r1, i32 0
-store i32 %r237, i32* %r239
-%r240 = lshr i544 %r234, 32
-%r241 = trunc i544 %r240 to i32
-%r243 = getelementptr i32, i32* %r1, i32 1
-store i32 %r241, i32* %r243
-%r244 = lshr i544 %r240, 32
-%r245 = trunc i544 %r244 to i32
-%r247 = getelementptr i32, i32* %r1, i32 2
-store i32 %r245, i32* %r247
-%r248 = lshr i544 %r244, 32
-%r249 = trunc i544 %r248 to i32
-%r251 = getelementptr i32, i32* %r1, i32 3
-store i32 %r249, i32* %r251
-%r252 = lshr i544 %r248, 32
-%r253 = trunc i544 %r252 to i32
-%r255 = getelementptr i32, i32* %r1, i32 4
-store i32 %r253, i32* %r255
-%r256 = lshr i544 %r252, 32
-%r257 = trunc i544 %r256 to i32
-%r259 = getelementptr i32, i32* %r1, i32 5
-store i32 %r257, i32* %r259
-%r260 = lshr i544 %r256, 32
-%r261 = trunc i544 %r260 to i32
-%r263 = getelementptr i32, i32* %r1, i32 6
-store i32 %r261, i32* %r263
-%r264 = lshr i544 %r260, 32
-%r265 = trunc i544 %r264 to i32
-%r267 = getelementptr i32, i32* %r1, i32 7
-store i32 %r265, i32* %r267
-%r268 = lshr i544 %r264, 32
-%r269 = trunc i544 %r268 to i32
-%r271 = getelementptr i32, i32* %r1, i32 8
-store i32 %r269, i32* %r271
-%r272 = lshr i544 %r268, 32
-%r273 = trunc i544 %r272 to i32
-%r275 = getelementptr i32, i32* %r1, i32 9
-store i32 %r273, i32* %r275
-%r276 = lshr i544 %r272, 32
-%r277 = trunc i544 %r276 to i32
-%r279 = getelementptr i32, i32* %r1, i32 10
-store i32 %r277, i32* %r279
-%r280 = lshr i544 %r276, 32
-%r281 = trunc i544 %r280 to i32
-%r283 = getelementptr i32, i32* %r1, i32 11
-store i32 %r281, i32* %r283
-%r284 = lshr i544 %r280, 32
-%r285 = trunc i544 %r284 to i32
-%r287 = getelementptr i32, i32* %r1, i32 12
-store i32 %r285, i32* %r287
-%r288 = lshr i544 %r284, 32
-%r289 = trunc i544 %r288 to i32
-%r291 = getelementptr i32, i32* %r1, i32 13
-store i32 %r289, i32* %r291
-%r292 = lshr i544 %r288, 32
-%r293 = trunc i544 %r292 to i32
-%r295 = getelementptr i32, i32* %r1, i32 14
-store i32 %r293, i32* %r295
-%r296 = lshr i544 %r292, 32
-%r297 = trunc i544 %r296 to i32
-%r299 = getelementptr i32, i32* %r1, i32 15
-store i32 %r297, i32* %r299
-%r300 = lshr i544 %r296, 32
-%r301 = trunc i544 %r300 to i32
-%r303 = getelementptr i32, i32* %r1, i32 16
-store i32 %r301, i32* %r303
-br i1%r236, label %carry, label %nocarry
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = zext i512 %r110 to i544
+%r218 = zext i512 %r216 to i544
+%r219 = sub i544 %r217, %r218
+%r220 = trunc i544 %r219 to i512
+%r221 = lshr i544 %r219, 512
+%r222 = trunc i544 %r221 to i1
+%r224 = getelementptr i32, i32* %r1, i32 0
+%r225 = trunc i512 %r220 to i32
+store i32 %r225, i32* %r224
+%r226 = lshr i512 %r220, 32
+%r228 = getelementptr i32, i32* %r1, i32 1
+%r229 = trunc i512 %r226 to i32
+store i32 %r229, i32* %r228
+%r230 = lshr i512 %r226, 32
+%r232 = getelementptr i32, i32* %r1, i32 2
+%r233 = trunc i512 %r230 to i32
+store i32 %r233, i32* %r232
+%r234 = lshr i512 %r230, 32
+%r236 = getelementptr i32, i32* %r1, i32 3
+%r237 = trunc i512 %r234 to i32
+store i32 %r237, i32* %r236
+%r238 = lshr i512 %r234, 32
+%r240 = getelementptr i32, i32* %r1, i32 4
+%r241 = trunc i512 %r238 to i32
+store i32 %r241, i32* %r240
+%r242 = lshr i512 %r238, 32
+%r244 = getelementptr i32, i32* %r1, i32 5
+%r245 = trunc i512 %r242 to i32
+store i32 %r245, i32* %r244
+%r246 = lshr i512 %r242, 32
+%r248 = getelementptr i32, i32* %r1, i32 6
+%r249 = trunc i512 %r246 to i32
+store i32 %r249, i32* %r248
+%r250 = lshr i512 %r246, 32
+%r252 = getelementptr i32, i32* %r1, i32 7
+%r253 = trunc i512 %r250 to i32
+store i32 %r253, i32* %r252
+%r254 = lshr i512 %r250, 32
+%r256 = getelementptr i32, i32* %r1, i32 8
+%r257 = trunc i512 %r254 to i32
+store i32 %r257, i32* %r256
+%r258 = lshr i512 %r254, 32
+%r260 = getelementptr i32, i32* %r1, i32 9
+%r261 = trunc i512 %r258 to i32
+store i32 %r261, i32* %r260
+%r262 = lshr i512 %r258, 32
+%r264 = getelementptr i32, i32* %r1, i32 10
+%r265 = trunc i512 %r262 to i32
+store i32 %r265, i32* %r264
+%r266 = lshr i512 %r262, 32
+%r268 = getelementptr i32, i32* %r1, i32 11
+%r269 = trunc i512 %r266 to i32
+store i32 %r269, i32* %r268
+%r270 = lshr i512 %r266, 32
+%r272 = getelementptr i32, i32* %r1, i32 12
+%r273 = trunc i512 %r270 to i32
+store i32 %r273, i32* %r272
+%r274 = lshr i512 %r270, 32
+%r276 = getelementptr i32, i32* %r1, i32 13
+%r277 = trunc i512 %r274 to i32
+store i32 %r277, i32* %r276
+%r278 = lshr i512 %r274, 32
+%r280 = getelementptr i32, i32* %r1, i32 14
+%r281 = trunc i512 %r278 to i32
+store i32 %r281, i32* %r280
+%r282 = lshr i512 %r278, 32
+%r284 = getelementptr i32, i32* %r1, i32 15
+%r285 = trunc i512 %r282 to i32
+store i32 %r285, i32* %r284
+br i1%r222, label %carry, label %nocarry
 nocarry:
 ret void
 carry:
-%r304 = load i32, i32* %r4
-%r305 = zext i32 %r304 to i64
-%r307 = getelementptr i32, i32* %r4, i32 1
-%r308 = load i32, i32* %r307
-%r309 = zext i32 %r308 to i64
-%r310 = shl i64 %r309, 32
-%r311 = or i64 %r305, %r310
-%r312 = zext i64 %r311 to i96
-%r314 = getelementptr i32, i32* %r4, i32 2
-%r315 = load i32, i32* %r314
-%r316 = zext i32 %r315 to i96
-%r317 = shl i96 %r316, 64
-%r318 = or i96 %r312, %r317
-%r319 = zext i96 %r318 to i128
-%r321 = getelementptr i32, i32* %r4, i32 3
-%r322 = load i32, i32* %r321
-%r323 = zext i32 %r322 to i128
-%r324 = shl i128 %r323, 96
-%r325 = or i128 %r319, %r324
-%r326 = zext i128 %r325 to i160
-%r328 = getelementptr i32, i32* %r4, i32 4
-%r329 = load i32, i32* %r328
-%r330 = zext i32 %r329 to i160
-%r331 = shl i160 %r330, 128
-%r332 = or i160 %r326, %r331
-%r333 = zext i160 %r332 to i192
-%r335 = getelementptr i32, i32* %r4, i32 5
-%r336 = load i32, i32* %r335
-%r337 = zext i32 %r336 to i192
-%r338 = shl i192 %r337, 160
-%r339 = or i192 %r333, %r338
-%r340 = zext i192 %r339 to i224
-%r342 = getelementptr i32, i32* %r4, i32 6
-%r343 = load i32, i32* %r342
-%r344 = zext i32 %r343 to i224
-%r345 = shl i224 %r344, 192
-%r346 = or i224 %r340, %r345
-%r347 = zext i224 %r346 to i256
-%r349 = getelementptr i32, i32* %r4, i32 7
-%r350 = load i32, i32* %r349
-%r351 = zext i32 %r350 to i256
-%r352 = shl i256 %r351, 224
-%r353 = or i256 %r347, %r352
-%r354 = zext i256 %r353 to i288
-%r356 = getelementptr i32, i32* %r4, i32 8
-%r357 = load i32, i32* %r356
-%r358 = zext i32 %r357 to i288
-%r359 = shl i288 %r358, 256
-%r360 = or i288 %r354, %r359
-%r361 = zext i288 %r360 to i320
-%r363 = getelementptr i32, i32* %r4, i32 9
-%r364 = load i32, i32* %r363
-%r365 = zext i32 %r364 to i320
-%r366 = shl i320 %r365, 288
-%r367 = or i320 %r361, %r366
-%r368 = zext i320 %r367 to i352
-%r370 = getelementptr i32, i32* %r4, i32 10
-%r371 = load i32, i32* %r370
-%r372 = zext i32 %r371 to i352
-%r373 = shl i352 %r372, 320
-%r374 = or i352 %r368, %r373
-%r375 = zext i352 %r374 to i384
-%r377 = getelementptr i32, i32* %r4, i32 11
-%r378 = load i32, i32* %r377
-%r379 = zext i32 %r378 to i384
-%r380 = shl i384 %r379, 352
-%r381 = or i384 %r375, %r380
-%r382 = zext i384 %r381 to i416
-%r384 = getelementptr i32, i32* %r4, i32 12
-%r385 = load i32, i32* %r384
-%r386 = zext i32 %r385 to i416
-%r387 = shl i416 %r386, 384
-%r388 = or i416 %r382, %r387
-%r389 = zext i416 %r388 to i448
-%r391 = getelementptr i32, i32* %r4, i32 13
-%r392 = load i32, i32* %r391
-%r393 = zext i32 %r392 to i448
-%r394 = shl i448 %r393, 416
-%r395 = or i448 %r389, %r394
-%r396 = zext i448 %r395 to i480
-%r398 = getelementptr i32, i32* %r4, i32 14
-%r399 = load i32, i32* %r398
-%r400 = zext i32 %r399 to i480
-%r401 = shl i480 %r400, 448
-%r402 = or i480 %r396, %r401
-%r403 = zext i480 %r402 to i512
-%r405 = getelementptr i32, i32* %r4, i32 15
-%r406 = load i32, i32* %r405
-%r407 = zext i32 %r406 to i512
-%r408 = shl i512 %r407, 480
-%r409 = or i512 %r403, %r408
-%r410 = zext i512 %r409 to i544
-%r412 = getelementptr i32, i32* %r4, i32 16
-%r413 = load i32, i32* %r412
-%r414 = zext i32 %r413 to i544
-%r415 = shl i544 %r414, 512
-%r416 = or i544 %r410, %r415
-%r417 = add i544 %r234, %r416
-%r418 = trunc i544 %r417 to i32
-%r420 = getelementptr i32, i32* %r1, i32 0
-store i32 %r418, i32* %r420
-%r421 = lshr i544 %r417, 32
-%r422 = trunc i544 %r421 to i32
-%r424 = getelementptr i32, i32* %r1, i32 1
-store i32 %r422, i32* %r424
-%r425 = lshr i544 %r421, 32
-%r426 = trunc i544 %r425 to i32
-%r428 = getelementptr i32, i32* %r1, i32 2
-store i32 %r426, i32* %r428
-%r429 = lshr i544 %r425, 32
-%r430 = trunc i544 %r429 to i32
-%r432 = getelementptr i32, i32* %r1, i32 3
-store i32 %r430, i32* %r432
-%r433 = lshr i544 %r429, 32
-%r434 = trunc i544 %r433 to i32
-%r436 = getelementptr i32, i32* %r1, i32 4
-store i32 %r434, i32* %r436
-%r437 = lshr i544 %r433, 32
-%r438 = trunc i544 %r437 to i32
-%r440 = getelementptr i32, i32* %r1, i32 5
-store i32 %r438, i32* %r440
-%r441 = lshr i544 %r437, 32
-%r442 = trunc i544 %r441 to i32
-%r444 = getelementptr i32, i32* %r1, i32 6
-store i32 %r442, i32* %r444
-%r445 = lshr i544 %r441, 32
-%r446 = trunc i544 %r445 to i32
-%r448 = getelementptr i32, i32* %r1, i32 7
-store i32 %r446, i32* %r448
-%r449 = lshr i544 %r445, 32
-%r450 = trunc i544 %r449 to i32
-%r452 = getelementptr i32, i32* %r1, i32 8
-store i32 %r450, i32* %r452
-%r453 = lshr i544 %r449, 32
-%r454 = trunc i544 %r453 to i32
-%r456 = getelementptr i32, i32* %r1, i32 9
-store i32 %r454, i32* %r456
-%r457 = lshr i544 %r453, 32
-%r458 = trunc i544 %r457 to i32
-%r460 = getelementptr i32, i32* %r1, i32 10
-store i32 %r458, i32* %r460
-%r461 = lshr i544 %r457, 32
-%r462 = trunc i544 %r461 to i32
-%r464 = getelementptr i32, i32* %r1, i32 11
-store i32 %r462, i32* %r464
-%r465 = lshr i544 %r461, 32
-%r466 = trunc i544 %r465 to i32
-%r468 = getelementptr i32, i32* %r1, i32 12
-store i32 %r466, i32* %r468
-%r469 = lshr i544 %r465, 32
-%r470 = trunc i544 %r469 to i32
-%r472 = getelementptr i32, i32* %r1, i32 13
-store i32 %r470, i32* %r472
-%r473 = lshr i544 %r469, 32
-%r474 = trunc i544 %r473 to i32
-%r476 = getelementptr i32, i32* %r1, i32 14
-store i32 %r474, i32* %r476
-%r477 = lshr i544 %r473, 32
-%r478 = trunc i544 %r477 to i32
-%r480 = getelementptr i32, i32* %r1, i32 15
-store i32 %r478, i32* %r480
-%r481 = lshr i544 %r477, 32
-%r482 = trunc i544 %r481 to i32
-%r484 = getelementptr i32, i32* %r1, i32 16
-store i32 %r482, i32* %r484
+%r286 = load i32, i32* %r4
+%r287 = zext i32 %r286 to i64
+%r289 = getelementptr i32, i32* %r4, i32 1
+%r290 = load i32, i32* %r289
+%r291 = zext i32 %r290 to i64
+%r292 = shl i64 %r291, 32
+%r293 = or i64 %r287, %r292
+%r294 = zext i64 %r293 to i96
+%r296 = getelementptr i32, i32* %r4, i32 2
+%r297 = load i32, i32* %r296
+%r298 = zext i32 %r297 to i96
+%r299 = shl i96 %r298, 64
+%r300 = or i96 %r294, %r299
+%r301 = zext i96 %r300 to i128
+%r303 = getelementptr i32, i32* %r4, i32 3
+%r304 = load i32, i32* %r303
+%r305 = zext i32 %r304 to i128
+%r306 = shl i128 %r305, 96
+%r307 = or i128 %r301, %r306
+%r308 = zext i128 %r307 to i160
+%r310 = getelementptr i32, i32* %r4, i32 4
+%r311 = load i32, i32* %r310
+%r312 = zext i32 %r311 to i160
+%r313 = shl i160 %r312, 128
+%r314 = or i160 %r308, %r313
+%r315 = zext i160 %r314 to i192
+%r317 = getelementptr i32, i32* %r4, i32 5
+%r318 = load i32, i32* %r317
+%r319 = zext i32 %r318 to i192
+%r320 = shl i192 %r319, 160
+%r321 = or i192 %r315, %r320
+%r322 = zext i192 %r321 to i224
+%r324 = getelementptr i32, i32* %r4, i32 6
+%r325 = load i32, i32* %r324
+%r326 = zext i32 %r325 to i224
+%r327 = shl i224 %r326, 192
+%r328 = or i224 %r322, %r327
+%r329 = zext i224 %r328 to i256
+%r331 = getelementptr i32, i32* %r4, i32 7
+%r332 = load i32, i32* %r331
+%r333 = zext i32 %r332 to i256
+%r334 = shl i256 %r333, 224
+%r335 = or i256 %r329, %r334
+%r336 = zext i256 %r335 to i288
+%r338 = getelementptr i32, i32* %r4, i32 8
+%r339 = load i32, i32* %r338
+%r340 = zext i32 %r339 to i288
+%r341 = shl i288 %r340, 256
+%r342 = or i288 %r336, %r341
+%r343 = zext i288 %r342 to i320
+%r345 = getelementptr i32, i32* %r4, i32 9
+%r346 = load i32, i32* %r345
+%r347 = zext i32 %r346 to i320
+%r348 = shl i320 %r347, 288
+%r349 = or i320 %r343, %r348
+%r350 = zext i320 %r349 to i352
+%r352 = getelementptr i32, i32* %r4, i32 10
+%r353 = load i32, i32* %r352
+%r354 = zext i32 %r353 to i352
+%r355 = shl i352 %r354, 320
+%r356 = or i352 %r350, %r355
+%r357 = zext i352 %r356 to i384
+%r359 = getelementptr i32, i32* %r4, i32 11
+%r360 = load i32, i32* %r359
+%r361 = zext i32 %r360 to i384
+%r362 = shl i384 %r361, 352
+%r363 = or i384 %r357, %r362
+%r364 = zext i384 %r363 to i416
+%r366 = getelementptr i32, i32* %r4, i32 12
+%r367 = load i32, i32* %r366
+%r368 = zext i32 %r367 to i416
+%r369 = shl i416 %r368, 384
+%r370 = or i416 %r364, %r369
+%r371 = zext i416 %r370 to i448
+%r373 = getelementptr i32, i32* %r4, i32 13
+%r374 = load i32, i32* %r373
+%r375 = zext i32 %r374 to i448
+%r376 = shl i448 %r375, 416
+%r377 = or i448 %r371, %r376
+%r378 = zext i448 %r377 to i480
+%r380 = getelementptr i32, i32* %r4, i32 14
+%r381 = load i32, i32* %r380
+%r382 = zext i32 %r381 to i480
+%r383 = shl i480 %r382, 448
+%r384 = or i480 %r378, %r383
+%r385 = zext i480 %r384 to i512
+%r387 = getelementptr i32, i32* %r4, i32 15
+%r388 = load i32, i32* %r387
+%r389 = zext i32 %r388 to i512
+%r390 = shl i512 %r389, 480
+%r391 = or i512 %r385, %r390
+%r392 = add i512 %r220, %r391
+%r394 = getelementptr i32, i32* %r1, i32 0
+%r395 = trunc i512 %r392 to i32
+store i32 %r395, i32* %r394
+%r396 = lshr i512 %r392, 32
+%r398 = getelementptr i32, i32* %r1, i32 1
+%r399 = trunc i512 %r396 to i32
+store i32 %r399, i32* %r398
+%r400 = lshr i512 %r396, 32
+%r402 = getelementptr i32, i32* %r1, i32 2
+%r403 = trunc i512 %r400 to i32
+store i32 %r403, i32* %r402
+%r404 = lshr i512 %r400, 32
+%r406 = getelementptr i32, i32* %r1, i32 3
+%r407 = trunc i512 %r404 to i32
+store i32 %r407, i32* %r406
+%r408 = lshr i512 %r404, 32
+%r410 = getelementptr i32, i32* %r1, i32 4
+%r411 = trunc i512 %r408 to i32
+store i32 %r411, i32* %r410
+%r412 = lshr i512 %r408, 32
+%r414 = getelementptr i32, i32* %r1, i32 5
+%r415 = trunc i512 %r412 to i32
+store i32 %r415, i32* %r414
+%r416 = lshr i512 %r412, 32
+%r418 = getelementptr i32, i32* %r1, i32 6
+%r419 = trunc i512 %r416 to i32
+store i32 %r419, i32* %r418
+%r420 = lshr i512 %r416, 32
+%r422 = getelementptr i32, i32* %r1, i32 7
+%r423 = trunc i512 %r420 to i32
+store i32 %r423, i32* %r422
+%r424 = lshr i512 %r420, 32
+%r426 = getelementptr i32, i32* %r1, i32 8
+%r427 = trunc i512 %r424 to i32
+store i32 %r427, i32* %r426
+%r428 = lshr i512 %r424, 32
+%r430 = getelementptr i32, i32* %r1, i32 9
+%r431 = trunc i512 %r428 to i32
+store i32 %r431, i32* %r430
+%r432 = lshr i512 %r428, 32
+%r434 = getelementptr i32, i32* %r1, i32 10
+%r435 = trunc i512 %r432 to i32
+store i32 %r435, i32* %r434
+%r436 = lshr i512 %r432, 32
+%r438 = getelementptr i32, i32* %r1, i32 11
+%r439 = trunc i512 %r436 to i32
+store i32 %r439, i32* %r438
+%r440 = lshr i512 %r436, 32
+%r442 = getelementptr i32, i32* %r1, i32 12
+%r443 = trunc i512 %r440 to i32
+store i32 %r443, i32* %r442
+%r444 = lshr i512 %r440, 32
+%r446 = getelementptr i32, i32* %r1, i32 13
+%r447 = trunc i512 %r444 to i32
+store i32 %r447, i32* %r446
+%r448 = lshr i512 %r444, 32
+%r450 = getelementptr i32, i32* %r1, i32 14
+%r451 = trunc i512 %r448 to i32
+store i32 %r451, i32* %r450
+%r452 = lshr i512 %r448, 32
+%r454 = getelementptr i32, i32* %r1, i32 15
+%r455 = trunc i512 %r452 to i32
+store i32 %r455, i32* %r454
 ret void
 }
-define void @mcl_fp_subNF17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fp_subNF16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -52689,281 +18999,259 @@ define void @mcl_fp_subNF17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r108 = zext i32 %r107 to i512
 %r109 = shl i512 %r108, 480
 %r110 = or i512 %r104, %r109
-%r111 = zext i512 %r110 to i544
-%r113 = getelementptr i32, i32* %r2, i32 16
-%r114 = load i32, i32* %r113
-%r115 = zext i32 %r114 to i544
-%r116 = shl i544 %r115, 512
-%r117 = or i544 %r111, %r116
-%r118 = load i32, i32* %r3
-%r119 = zext i32 %r118 to i64
-%r121 = getelementptr i32, i32* %r3, i32 1
+%r111 = load i32, i32* %r3
+%r112 = zext i32 %r111 to i64
+%r114 = getelementptr i32, i32* %r3, i32 1
+%r115 = load i32, i32* %r114
+%r116 = zext i32 %r115 to i64
+%r117 = shl i64 %r116, 32
+%r118 = or i64 %r112, %r117
+%r119 = zext i64 %r118 to i96
+%r121 = getelementptr i32, i32* %r3, i32 2
 %r122 = load i32, i32* %r121
-%r123 = zext i32 %r122 to i64
-%r124 = shl i64 %r123, 32
-%r125 = or i64 %r119, %r124
-%r126 = zext i64 %r125 to i96
-%r128 = getelementptr i32, i32* %r3, i32 2
+%r123 = zext i32 %r122 to i96
+%r124 = shl i96 %r123, 64
+%r125 = or i96 %r119, %r124
+%r126 = zext i96 %r125 to i128
+%r128 = getelementptr i32, i32* %r3, i32 3
 %r129 = load i32, i32* %r128
-%r130 = zext i32 %r129 to i96
-%r131 = shl i96 %r130, 64
-%r132 = or i96 %r126, %r131
-%r133 = zext i96 %r132 to i128
-%r135 = getelementptr i32, i32* %r3, i32 3
+%r130 = zext i32 %r129 to i128
+%r131 = shl i128 %r130, 96
+%r132 = or i128 %r126, %r131
+%r133 = zext i128 %r132 to i160
+%r135 = getelementptr i32, i32* %r3, i32 4
 %r136 = load i32, i32* %r135
-%r137 = zext i32 %r136 to i128
-%r138 = shl i128 %r137, 96
-%r139 = or i128 %r133, %r138
-%r140 = zext i128 %r139 to i160
-%r142 = getelementptr i32, i32* %r3, i32 4
+%r137 = zext i32 %r136 to i160
+%r138 = shl i160 %r137, 128
+%r139 = or i160 %r133, %r138
+%r140 = zext i160 %r139 to i192
+%r142 = getelementptr i32, i32* %r3, i32 5
 %r143 = load i32, i32* %r142
-%r144 = zext i32 %r143 to i160
-%r145 = shl i160 %r144, 128
-%r146 = or i160 %r140, %r145
-%r147 = zext i160 %r146 to i192
-%r149 = getelementptr i32, i32* %r3, i32 5
+%r144 = zext i32 %r143 to i192
+%r145 = shl i192 %r144, 160
+%r146 = or i192 %r140, %r145
+%r147 = zext i192 %r146 to i224
+%r149 = getelementptr i32, i32* %r3, i32 6
 %r150 = load i32, i32* %r149
-%r151 = zext i32 %r150 to i192
-%r152 = shl i192 %r151, 160
-%r153 = or i192 %r147, %r152
-%r154 = zext i192 %r153 to i224
-%r156 = getelementptr i32, i32* %r3, i32 6
+%r151 = zext i32 %r150 to i224
+%r152 = shl i224 %r151, 192
+%r153 = or i224 %r147, %r152
+%r154 = zext i224 %r153 to i256
+%r156 = getelementptr i32, i32* %r3, i32 7
 %r157 = load i32, i32* %r156
-%r158 = zext i32 %r157 to i224
-%r159 = shl i224 %r158, 192
-%r160 = or i224 %r154, %r159
-%r161 = zext i224 %r160 to i256
-%r163 = getelementptr i32, i32* %r3, i32 7
+%r158 = zext i32 %r157 to i256
+%r159 = shl i256 %r158, 224
+%r160 = or i256 %r154, %r159
+%r161 = zext i256 %r160 to i288
+%r163 = getelementptr i32, i32* %r3, i32 8
 %r164 = load i32, i32* %r163
-%r165 = zext i32 %r164 to i256
-%r166 = shl i256 %r165, 224
-%r167 = or i256 %r161, %r166
-%r168 = zext i256 %r167 to i288
-%r170 = getelementptr i32, i32* %r3, i32 8
+%r165 = zext i32 %r164 to i288
+%r166 = shl i288 %r165, 256
+%r167 = or i288 %r161, %r166
+%r168 = zext i288 %r167 to i320
+%r170 = getelementptr i32, i32* %r3, i32 9
 %r171 = load i32, i32* %r170
-%r172 = zext i32 %r171 to i288
-%r173 = shl i288 %r172, 256
-%r174 = or i288 %r168, %r173
-%r175 = zext i288 %r174 to i320
-%r177 = getelementptr i32, i32* %r3, i32 9
+%r172 = zext i32 %r171 to i320
+%r173 = shl i320 %r172, 288
+%r174 = or i320 %r168, %r173
+%r175 = zext i320 %r174 to i352
+%r177 = getelementptr i32, i32* %r3, i32 10
 %r178 = load i32, i32* %r177
-%r179 = zext i32 %r178 to i320
-%r180 = shl i320 %r179, 288
-%r181 = or i320 %r175, %r180
-%r182 = zext i320 %r181 to i352
-%r184 = getelementptr i32, i32* %r3, i32 10
+%r179 = zext i32 %r178 to i352
+%r180 = shl i352 %r179, 320
+%r181 = or i352 %r175, %r180
+%r182 = zext i352 %r181 to i384
+%r184 = getelementptr i32, i32* %r3, i32 11
 %r185 = load i32, i32* %r184
-%r186 = zext i32 %r185 to i352
-%r187 = shl i352 %r186, 320
-%r188 = or i352 %r182, %r187
-%r189 = zext i352 %r188 to i384
-%r191 = getelementptr i32, i32* %r3, i32 11
+%r186 = zext i32 %r185 to i384
+%r187 = shl i384 %r186, 352
+%r188 = or i384 %r182, %r187
+%r189 = zext i384 %r188 to i416
+%r191 = getelementptr i32, i32* %r3, i32 12
 %r192 = load i32, i32* %r191
-%r193 = zext i32 %r192 to i384
-%r194 = shl i384 %r193, 352
-%r195 = or i384 %r189, %r194
-%r196 = zext i384 %r195 to i416
-%r198 = getelementptr i32, i32* %r3, i32 12
+%r193 = zext i32 %r192 to i416
+%r194 = shl i416 %r193, 384
+%r195 = or i416 %r189, %r194
+%r196 = zext i416 %r195 to i448
+%r198 = getelementptr i32, i32* %r3, i32 13
 %r199 = load i32, i32* %r198
-%r200 = zext i32 %r199 to i416
-%r201 = shl i416 %r200, 384
-%r202 = or i416 %r196, %r201
-%r203 = zext i416 %r202 to i448
-%r205 = getelementptr i32, i32* %r3, i32 13
+%r200 = zext i32 %r199 to i448
+%r201 = shl i448 %r200, 416
+%r202 = or i448 %r196, %r201
+%r203 = zext i448 %r202 to i480
+%r205 = getelementptr i32, i32* %r3, i32 14
 %r206 = load i32, i32* %r205
-%r207 = zext i32 %r206 to i448
-%r208 = shl i448 %r207, 416
-%r209 = or i448 %r203, %r208
-%r210 = zext i448 %r209 to i480
-%r212 = getelementptr i32, i32* %r3, i32 14
+%r207 = zext i32 %r206 to i480
+%r208 = shl i480 %r207, 448
+%r209 = or i480 %r203, %r208
+%r210 = zext i480 %r209 to i512
+%r212 = getelementptr i32, i32* %r3, i32 15
 %r213 = load i32, i32* %r212
-%r214 = zext i32 %r213 to i480
-%r215 = shl i480 %r214, 448
-%r216 = or i480 %r210, %r215
-%r217 = zext i480 %r216 to i512
-%r219 = getelementptr i32, i32* %r3, i32 15
-%r220 = load i32, i32* %r219
-%r221 = zext i32 %r220 to i512
-%r222 = shl i512 %r221, 480
-%r223 = or i512 %r217, %r222
-%r224 = zext i512 %r223 to i544
-%r226 = getelementptr i32, i32* %r3, i32 16
-%r227 = load i32, i32* %r226
-%r228 = zext i32 %r227 to i544
-%r229 = shl i544 %r228, 512
-%r230 = or i544 %r224, %r229
-%r231 = sub i544 %r117, %r230
-%r232 = lshr i544 %r231, 543
-%r233 = trunc i544 %r232 to i1
-%r234 = load i32, i32* %r4
-%r235 = zext i32 %r234 to i64
-%r237 = getelementptr i32, i32* %r4, i32 1
+%r214 = zext i32 %r213 to i512
+%r215 = shl i512 %r214, 480
+%r216 = or i512 %r210, %r215
+%r217 = sub i512 %r110, %r216
+%r218 = lshr i512 %r217, 511
+%r219 = trunc i512 %r218 to i1
+%r220 = load i32, i32* %r4
+%r221 = zext i32 %r220 to i64
+%r223 = getelementptr i32, i32* %r4, i32 1
+%r224 = load i32, i32* %r223
+%r225 = zext i32 %r224 to i64
+%r226 = shl i64 %r225, 32
+%r227 = or i64 %r221, %r226
+%r228 = zext i64 %r227 to i96
+%r230 = getelementptr i32, i32* %r4, i32 2
+%r231 = load i32, i32* %r230
+%r232 = zext i32 %r231 to i96
+%r233 = shl i96 %r232, 64
+%r234 = or i96 %r228, %r233
+%r235 = zext i96 %r234 to i128
+%r237 = getelementptr i32, i32* %r4, i32 3
 %r238 = load i32, i32* %r237
-%r239 = zext i32 %r238 to i64
-%r240 = shl i64 %r239, 32
-%r241 = or i64 %r235, %r240
-%r242 = zext i64 %r241 to i96
-%r244 = getelementptr i32, i32* %r4, i32 2
+%r239 = zext i32 %r238 to i128
+%r240 = shl i128 %r239, 96
+%r241 = or i128 %r235, %r240
+%r242 = zext i128 %r241 to i160
+%r244 = getelementptr i32, i32* %r4, i32 4
 %r245 = load i32, i32* %r244
-%r246 = zext i32 %r245 to i96
-%r247 = shl i96 %r246, 64
-%r248 = or i96 %r242, %r247
-%r249 = zext i96 %r248 to i128
-%r251 = getelementptr i32, i32* %r4, i32 3
+%r246 = zext i32 %r245 to i160
+%r247 = shl i160 %r246, 128
+%r248 = or i160 %r242, %r247
+%r249 = zext i160 %r248 to i192
+%r251 = getelementptr i32, i32* %r4, i32 5
 %r252 = load i32, i32* %r251
-%r253 = zext i32 %r252 to i128
-%r254 = shl i128 %r253, 96
-%r255 = or i128 %r249, %r254
-%r256 = zext i128 %r255 to i160
-%r258 = getelementptr i32, i32* %r4, i32 4
+%r253 = zext i32 %r252 to i192
+%r254 = shl i192 %r253, 160
+%r255 = or i192 %r249, %r254
+%r256 = zext i192 %r255 to i224
+%r258 = getelementptr i32, i32* %r4, i32 6
 %r259 = load i32, i32* %r258
-%r260 = zext i32 %r259 to i160
-%r261 = shl i160 %r260, 128
-%r262 = or i160 %r256, %r261
-%r263 = zext i160 %r262 to i192
-%r265 = getelementptr i32, i32* %r4, i32 5
+%r260 = zext i32 %r259 to i224
+%r261 = shl i224 %r260, 192
+%r262 = or i224 %r256, %r261
+%r263 = zext i224 %r262 to i256
+%r265 = getelementptr i32, i32* %r4, i32 7
 %r266 = load i32, i32* %r265
-%r267 = zext i32 %r266 to i192
-%r268 = shl i192 %r267, 160
-%r269 = or i192 %r263, %r268
-%r270 = zext i192 %r269 to i224
-%r272 = getelementptr i32, i32* %r4, i32 6
+%r267 = zext i32 %r266 to i256
+%r268 = shl i256 %r267, 224
+%r269 = or i256 %r263, %r268
+%r270 = zext i256 %r269 to i288
+%r272 = getelementptr i32, i32* %r4, i32 8
 %r273 = load i32, i32* %r272
-%r274 = zext i32 %r273 to i224
-%r275 = shl i224 %r274, 192
-%r276 = or i224 %r270, %r275
-%r277 = zext i224 %r276 to i256
-%r279 = getelementptr i32, i32* %r4, i32 7
+%r274 = zext i32 %r273 to i288
+%r275 = shl i288 %r274, 256
+%r276 = or i288 %r270, %r275
+%r277 = zext i288 %r276 to i320
+%r279 = getelementptr i32, i32* %r4, i32 9
 %r280 = load i32, i32* %r279
-%r281 = zext i32 %r280 to i256
-%r282 = shl i256 %r281, 224
-%r283 = or i256 %r277, %r282
-%r284 = zext i256 %r283 to i288
-%r286 = getelementptr i32, i32* %r4, i32 8
+%r281 = zext i32 %r280 to i320
+%r282 = shl i320 %r281, 288
+%r283 = or i320 %r277, %r282
+%r284 = zext i320 %r283 to i352
+%r286 = getelementptr i32, i32* %r4, i32 10
 %r287 = load i32, i32* %r286
-%r288 = zext i32 %r287 to i288
-%r289 = shl i288 %r288, 256
-%r290 = or i288 %r284, %r289
-%r291 = zext i288 %r290 to i320
-%r293 = getelementptr i32, i32* %r4, i32 9
+%r288 = zext i32 %r287 to i352
+%r289 = shl i352 %r288, 320
+%r290 = or i352 %r284, %r289
+%r291 = zext i352 %r290 to i384
+%r293 = getelementptr i32, i32* %r4, i32 11
 %r294 = load i32, i32* %r293
-%r295 = zext i32 %r294 to i320
-%r296 = shl i320 %r295, 288
-%r297 = or i320 %r291, %r296
-%r298 = zext i320 %r297 to i352
-%r300 = getelementptr i32, i32* %r4, i32 10
+%r295 = zext i32 %r294 to i384
+%r296 = shl i384 %r295, 352
+%r297 = or i384 %r291, %r296
+%r298 = zext i384 %r297 to i416
+%r300 = getelementptr i32, i32* %r4, i32 12
 %r301 = load i32, i32* %r300
-%r302 = zext i32 %r301 to i352
-%r303 = shl i352 %r302, 320
-%r304 = or i352 %r298, %r303
-%r305 = zext i352 %r304 to i384
-%r307 = getelementptr i32, i32* %r4, i32 11
+%r302 = zext i32 %r301 to i416
+%r303 = shl i416 %r302, 384
+%r304 = or i416 %r298, %r303
+%r305 = zext i416 %r304 to i448
+%r307 = getelementptr i32, i32* %r4, i32 13
 %r308 = load i32, i32* %r307
-%r309 = zext i32 %r308 to i384
-%r310 = shl i384 %r309, 352
-%r311 = or i384 %r305, %r310
-%r312 = zext i384 %r311 to i416
-%r314 = getelementptr i32, i32* %r4, i32 12
+%r309 = zext i32 %r308 to i448
+%r310 = shl i448 %r309, 416
+%r311 = or i448 %r305, %r310
+%r312 = zext i448 %r311 to i480
+%r314 = getelementptr i32, i32* %r4, i32 14
 %r315 = load i32, i32* %r314
-%r316 = zext i32 %r315 to i416
-%r317 = shl i416 %r316, 384
-%r318 = or i416 %r312, %r317
-%r319 = zext i416 %r318 to i448
-%r321 = getelementptr i32, i32* %r4, i32 13
+%r316 = zext i32 %r315 to i480
+%r317 = shl i480 %r316, 448
+%r318 = or i480 %r312, %r317
+%r319 = zext i480 %r318 to i512
+%r321 = getelementptr i32, i32* %r4, i32 15
 %r322 = load i32, i32* %r321
-%r323 = zext i32 %r322 to i448
-%r324 = shl i448 %r323, 416
-%r325 = or i448 %r319, %r324
-%r326 = zext i448 %r325 to i480
-%r328 = getelementptr i32, i32* %r4, i32 14
-%r329 = load i32, i32* %r328
-%r330 = zext i32 %r329 to i480
-%r331 = shl i480 %r330, 448
-%r332 = or i480 %r326, %r331
-%r333 = zext i480 %r332 to i512
-%r335 = getelementptr i32, i32* %r4, i32 15
-%r336 = load i32, i32* %r335
-%r337 = zext i32 %r336 to i512
-%r338 = shl i512 %r337, 480
-%r339 = or i512 %r333, %r338
-%r340 = zext i512 %r339 to i544
-%r342 = getelementptr i32, i32* %r4, i32 16
-%r343 = load i32, i32* %r342
-%r344 = zext i32 %r343 to i544
-%r345 = shl i544 %r344, 512
-%r346 = or i544 %r340, %r345
-%r348 = select i1 %r233, i544 %r346, i544 0
-%r349 = add i544 %r231, %r348
-%r350 = trunc i544 %r349 to i32
-%r352 = getelementptr i32, i32* %r1, i32 0
-store i32 %r350, i32* %r352
-%r353 = lshr i544 %r349, 32
-%r354 = trunc i544 %r353 to i32
-%r356 = getelementptr i32, i32* %r1, i32 1
-store i32 %r354, i32* %r356
-%r357 = lshr i544 %r353, 32
-%r358 = trunc i544 %r357 to i32
-%r360 = getelementptr i32, i32* %r1, i32 2
-store i32 %r358, i32* %r360
-%r361 = lshr i544 %r357, 32
-%r362 = trunc i544 %r361 to i32
-%r364 = getelementptr i32, i32* %r1, i32 3
-store i32 %r362, i32* %r364
-%r365 = lshr i544 %r361, 32
-%r366 = trunc i544 %r365 to i32
-%r368 = getelementptr i32, i32* %r1, i32 4
-store i32 %r366, i32* %r368
-%r369 = lshr i544 %r365, 32
-%r370 = trunc i544 %r369 to i32
-%r372 = getelementptr i32, i32* %r1, i32 5
-store i32 %r370, i32* %r372
-%r373 = lshr i544 %r369, 32
-%r374 = trunc i544 %r373 to i32
-%r376 = getelementptr i32, i32* %r1, i32 6
-store i32 %r374, i32* %r376
-%r377 = lshr i544 %r373, 32
-%r378 = trunc i544 %r377 to i32
-%r380 = getelementptr i32, i32* %r1, i32 7
-store i32 %r378, i32* %r380
-%r381 = lshr i544 %r377, 32
-%r382 = trunc i544 %r381 to i32
-%r384 = getelementptr i32, i32* %r1, i32 8
-store i32 %r382, i32* %r384
-%r385 = lshr i544 %r381, 32
-%r386 = trunc i544 %r385 to i32
-%r388 = getelementptr i32, i32* %r1, i32 9
-store i32 %r386, i32* %r388
-%r389 = lshr i544 %r385, 32
-%r390 = trunc i544 %r389 to i32
-%r392 = getelementptr i32, i32* %r1, i32 10
-store i32 %r390, i32* %r392
-%r393 = lshr i544 %r389, 32
-%r394 = trunc i544 %r393 to i32
-%r396 = getelementptr i32, i32* %r1, i32 11
-store i32 %r394, i32* %r396
-%r397 = lshr i544 %r393, 32
-%r398 = trunc i544 %r397 to i32
-%r400 = getelementptr i32, i32* %r1, i32 12
-store i32 %r398, i32* %r400
-%r401 = lshr i544 %r397, 32
-%r402 = trunc i544 %r401 to i32
-%r404 = getelementptr i32, i32* %r1, i32 13
-store i32 %r402, i32* %r404
-%r405 = lshr i544 %r401, 32
-%r406 = trunc i544 %r405 to i32
-%r408 = getelementptr i32, i32* %r1, i32 14
-store i32 %r406, i32* %r408
-%r409 = lshr i544 %r405, 32
-%r410 = trunc i544 %r409 to i32
-%r412 = getelementptr i32, i32* %r1, i32 15
-store i32 %r410, i32* %r412
-%r413 = lshr i544 %r409, 32
-%r414 = trunc i544 %r413 to i32
-%r416 = getelementptr i32, i32* %r1, i32 16
-store i32 %r414, i32* %r416
+%r323 = zext i32 %r322 to i512
+%r324 = shl i512 %r323, 480
+%r325 = or i512 %r319, %r324
+%r327 = select i1 %r219, i512 %r325, i512 0
+%r328 = add i512 %r217, %r327
+%r330 = getelementptr i32, i32* %r1, i32 0
+%r331 = trunc i512 %r328 to i32
+store i32 %r331, i32* %r330
+%r332 = lshr i512 %r328, 32
+%r334 = getelementptr i32, i32* %r1, i32 1
+%r335 = trunc i512 %r332 to i32
+store i32 %r335, i32* %r334
+%r336 = lshr i512 %r332, 32
+%r338 = getelementptr i32, i32* %r1, i32 2
+%r339 = trunc i512 %r336 to i32
+store i32 %r339, i32* %r338
+%r340 = lshr i512 %r336, 32
+%r342 = getelementptr i32, i32* %r1, i32 3
+%r343 = trunc i512 %r340 to i32
+store i32 %r343, i32* %r342
+%r344 = lshr i512 %r340, 32
+%r346 = getelementptr i32, i32* %r1, i32 4
+%r347 = trunc i512 %r344 to i32
+store i32 %r347, i32* %r346
+%r348 = lshr i512 %r344, 32
+%r350 = getelementptr i32, i32* %r1, i32 5
+%r351 = trunc i512 %r348 to i32
+store i32 %r351, i32* %r350
+%r352 = lshr i512 %r348, 32
+%r354 = getelementptr i32, i32* %r1, i32 6
+%r355 = trunc i512 %r352 to i32
+store i32 %r355, i32* %r354
+%r356 = lshr i512 %r352, 32
+%r358 = getelementptr i32, i32* %r1, i32 7
+%r359 = trunc i512 %r356 to i32
+store i32 %r359, i32* %r358
+%r360 = lshr i512 %r356, 32
+%r362 = getelementptr i32, i32* %r1, i32 8
+%r363 = trunc i512 %r360 to i32
+store i32 %r363, i32* %r362
+%r364 = lshr i512 %r360, 32
+%r366 = getelementptr i32, i32* %r1, i32 9
+%r367 = trunc i512 %r364 to i32
+store i32 %r367, i32* %r366
+%r368 = lshr i512 %r364, 32
+%r370 = getelementptr i32, i32* %r1, i32 10
+%r371 = trunc i512 %r368 to i32
+store i32 %r371, i32* %r370
+%r372 = lshr i512 %r368, 32
+%r374 = getelementptr i32, i32* %r1, i32 11
+%r375 = trunc i512 %r372 to i32
+store i32 %r375, i32* %r374
+%r376 = lshr i512 %r372, 32
+%r378 = getelementptr i32, i32* %r1, i32 12
+%r379 = trunc i512 %r376 to i32
+store i32 %r379, i32* %r378
+%r380 = lshr i512 %r376, 32
+%r382 = getelementptr i32, i32* %r1, i32 13
+%r383 = trunc i512 %r380 to i32
+store i32 %r383, i32* %r382
+%r384 = lshr i512 %r380, 32
+%r386 = getelementptr i32, i32* %r1, i32 14
+%r387 = trunc i512 %r384 to i32
+store i32 %r387, i32* %r386
+%r388 = lshr i512 %r384, 32
+%r390 = getelementptr i32, i32* %r1, i32 15
+%r391 = trunc i512 %r388 to i32
+store i32 %r391, i32* %r390
 ret void
 }
-define void @mcl_fpDbl_add17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fpDbl_add16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -53152,464 +19440,426 @@ define void @mcl_fpDbl_add17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r220 = zext i32 %r219 to i1024
 %r221 = shl i1024 %r220, 992
 %r222 = or i1024 %r216, %r221
-%r223 = zext i1024 %r222 to i1056
-%r225 = getelementptr i32, i32* %r2, i32 32
-%r226 = load i32, i32* %r225
-%r227 = zext i32 %r226 to i1056
-%r228 = shl i1056 %r227, 1024
-%r229 = or i1056 %r223, %r228
-%r230 = zext i1056 %r229 to i1088
-%r232 = getelementptr i32, i32* %r2, i32 33
-%r233 = load i32, i32* %r232
-%r234 = zext i32 %r233 to i1088
-%r235 = shl i1088 %r234, 1056
-%r236 = or i1088 %r230, %r235
-%r237 = load i32, i32* %r3
-%r238 = zext i32 %r237 to i64
-%r240 = getelementptr i32, i32* %r3, i32 1
+%r223 = load i32, i32* %r3
+%r224 = zext i32 %r223 to i64
+%r226 = getelementptr i32, i32* %r3, i32 1
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i64
+%r229 = shl i64 %r228, 32
+%r230 = or i64 %r224, %r229
+%r231 = zext i64 %r230 to i96
+%r233 = getelementptr i32, i32* %r3, i32 2
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i96
+%r236 = shl i96 %r235, 64
+%r237 = or i96 %r231, %r236
+%r238 = zext i96 %r237 to i128
+%r240 = getelementptr i32, i32* %r3, i32 3
 %r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i64
-%r243 = shl i64 %r242, 32
-%r244 = or i64 %r238, %r243
-%r245 = zext i64 %r244 to i96
-%r247 = getelementptr i32, i32* %r3, i32 2
+%r242 = zext i32 %r241 to i128
+%r243 = shl i128 %r242, 96
+%r244 = or i128 %r238, %r243
+%r245 = zext i128 %r244 to i160
+%r247 = getelementptr i32, i32* %r3, i32 4
 %r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i96
-%r250 = shl i96 %r249, 64
-%r251 = or i96 %r245, %r250
-%r252 = zext i96 %r251 to i128
-%r254 = getelementptr i32, i32* %r3, i32 3
+%r249 = zext i32 %r248 to i160
+%r250 = shl i160 %r249, 128
+%r251 = or i160 %r245, %r250
+%r252 = zext i160 %r251 to i192
+%r254 = getelementptr i32, i32* %r3, i32 5
 %r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i128
-%r257 = shl i128 %r256, 96
-%r258 = or i128 %r252, %r257
-%r259 = zext i128 %r258 to i160
-%r261 = getelementptr i32, i32* %r3, i32 4
+%r256 = zext i32 %r255 to i192
+%r257 = shl i192 %r256, 160
+%r258 = or i192 %r252, %r257
+%r259 = zext i192 %r258 to i224
+%r261 = getelementptr i32, i32* %r3, i32 6
 %r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i160
-%r264 = shl i160 %r263, 128
-%r265 = or i160 %r259, %r264
-%r266 = zext i160 %r265 to i192
-%r268 = getelementptr i32, i32* %r3, i32 5
+%r263 = zext i32 %r262 to i224
+%r264 = shl i224 %r263, 192
+%r265 = or i224 %r259, %r264
+%r266 = zext i224 %r265 to i256
+%r268 = getelementptr i32, i32* %r3, i32 7
 %r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i192
-%r271 = shl i192 %r270, 160
-%r272 = or i192 %r266, %r271
-%r273 = zext i192 %r272 to i224
-%r275 = getelementptr i32, i32* %r3, i32 6
+%r270 = zext i32 %r269 to i256
+%r271 = shl i256 %r270, 224
+%r272 = or i256 %r266, %r271
+%r273 = zext i256 %r272 to i288
+%r275 = getelementptr i32, i32* %r3, i32 8
 %r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i224
-%r278 = shl i224 %r277, 192
-%r279 = or i224 %r273, %r278
-%r280 = zext i224 %r279 to i256
-%r282 = getelementptr i32, i32* %r3, i32 7
+%r277 = zext i32 %r276 to i288
+%r278 = shl i288 %r277, 256
+%r279 = or i288 %r273, %r278
+%r280 = zext i288 %r279 to i320
+%r282 = getelementptr i32, i32* %r3, i32 9
 %r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i256
-%r285 = shl i256 %r284, 224
-%r286 = or i256 %r280, %r285
-%r287 = zext i256 %r286 to i288
-%r289 = getelementptr i32, i32* %r3, i32 8
+%r284 = zext i32 %r283 to i320
+%r285 = shl i320 %r284, 288
+%r286 = or i320 %r280, %r285
+%r287 = zext i320 %r286 to i352
+%r289 = getelementptr i32, i32* %r3, i32 10
 %r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i288
-%r292 = shl i288 %r291, 256
-%r293 = or i288 %r287, %r292
-%r294 = zext i288 %r293 to i320
-%r296 = getelementptr i32, i32* %r3, i32 9
+%r291 = zext i32 %r290 to i352
+%r292 = shl i352 %r291, 320
+%r293 = or i352 %r287, %r292
+%r294 = zext i352 %r293 to i384
+%r296 = getelementptr i32, i32* %r3, i32 11
 %r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i320
-%r299 = shl i320 %r298, 288
-%r300 = or i320 %r294, %r299
-%r301 = zext i320 %r300 to i352
-%r303 = getelementptr i32, i32* %r3, i32 10
+%r298 = zext i32 %r297 to i384
+%r299 = shl i384 %r298, 352
+%r300 = or i384 %r294, %r299
+%r301 = zext i384 %r300 to i416
+%r303 = getelementptr i32, i32* %r3, i32 12
 %r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i352
-%r306 = shl i352 %r305, 320
-%r307 = or i352 %r301, %r306
-%r308 = zext i352 %r307 to i384
-%r310 = getelementptr i32, i32* %r3, i32 11
+%r305 = zext i32 %r304 to i416
+%r306 = shl i416 %r305, 384
+%r307 = or i416 %r301, %r306
+%r308 = zext i416 %r307 to i448
+%r310 = getelementptr i32, i32* %r3, i32 13
 %r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i384
-%r313 = shl i384 %r312, 352
-%r314 = or i384 %r308, %r313
-%r315 = zext i384 %r314 to i416
-%r317 = getelementptr i32, i32* %r3, i32 12
+%r312 = zext i32 %r311 to i448
+%r313 = shl i448 %r312, 416
+%r314 = or i448 %r308, %r313
+%r315 = zext i448 %r314 to i480
+%r317 = getelementptr i32, i32* %r3, i32 14
 %r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i416
-%r320 = shl i416 %r319, 384
-%r321 = or i416 %r315, %r320
-%r322 = zext i416 %r321 to i448
-%r324 = getelementptr i32, i32* %r3, i32 13
+%r319 = zext i32 %r318 to i480
+%r320 = shl i480 %r319, 448
+%r321 = or i480 %r315, %r320
+%r322 = zext i480 %r321 to i512
+%r324 = getelementptr i32, i32* %r3, i32 15
 %r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i448
-%r327 = shl i448 %r326, 416
-%r328 = or i448 %r322, %r327
-%r329 = zext i448 %r328 to i480
-%r331 = getelementptr i32, i32* %r3, i32 14
+%r326 = zext i32 %r325 to i512
+%r327 = shl i512 %r326, 480
+%r328 = or i512 %r322, %r327
+%r329 = zext i512 %r328 to i544
+%r331 = getelementptr i32, i32* %r3, i32 16
 %r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i480
-%r334 = shl i480 %r333, 448
-%r335 = or i480 %r329, %r334
-%r336 = zext i480 %r335 to i512
-%r338 = getelementptr i32, i32* %r3, i32 15
+%r333 = zext i32 %r332 to i544
+%r334 = shl i544 %r333, 512
+%r335 = or i544 %r329, %r334
+%r336 = zext i544 %r335 to i576
+%r338 = getelementptr i32, i32* %r3, i32 17
 %r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i512
-%r341 = shl i512 %r340, 480
-%r342 = or i512 %r336, %r341
-%r343 = zext i512 %r342 to i544
-%r345 = getelementptr i32, i32* %r3, i32 16
+%r340 = zext i32 %r339 to i576
+%r341 = shl i576 %r340, 544
+%r342 = or i576 %r336, %r341
+%r343 = zext i576 %r342 to i608
+%r345 = getelementptr i32, i32* %r3, i32 18
 %r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i544
-%r348 = shl i544 %r347, 512
-%r349 = or i544 %r343, %r348
-%r350 = zext i544 %r349 to i576
-%r352 = getelementptr i32, i32* %r3, i32 17
+%r347 = zext i32 %r346 to i608
+%r348 = shl i608 %r347, 576
+%r349 = or i608 %r343, %r348
+%r350 = zext i608 %r349 to i640
+%r352 = getelementptr i32, i32* %r3, i32 19
 %r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i576
-%r355 = shl i576 %r354, 544
-%r356 = or i576 %r350, %r355
-%r357 = zext i576 %r356 to i608
-%r359 = getelementptr i32, i32* %r3, i32 18
+%r354 = zext i32 %r353 to i640
+%r355 = shl i640 %r354, 608
+%r356 = or i640 %r350, %r355
+%r357 = zext i640 %r356 to i672
+%r359 = getelementptr i32, i32* %r3, i32 20
 %r360 = load i32, i32* %r359
-%r361 = zext i32 %r360 to i608
-%r362 = shl i608 %r361, 576
-%r363 = or i608 %r357, %r362
-%r364 = zext i608 %r363 to i640
-%r366 = getelementptr i32, i32* %r3, i32 19
+%r361 = zext i32 %r360 to i672
+%r362 = shl i672 %r361, 640
+%r363 = or i672 %r357, %r362
+%r364 = zext i672 %r363 to i704
+%r366 = getelementptr i32, i32* %r3, i32 21
 %r367 = load i32, i32* %r366
-%r368 = zext i32 %r367 to i640
-%r369 = shl i640 %r368, 608
-%r370 = or i640 %r364, %r369
-%r371 = zext i640 %r370 to i672
-%r373 = getelementptr i32, i32* %r3, i32 20
+%r368 = zext i32 %r367 to i704
+%r369 = shl i704 %r368, 672
+%r370 = or i704 %r364, %r369
+%r371 = zext i704 %r370 to i736
+%r373 = getelementptr i32, i32* %r3, i32 22
 %r374 = load i32, i32* %r373
-%r375 = zext i32 %r374 to i672
-%r376 = shl i672 %r375, 640
-%r377 = or i672 %r371, %r376
-%r378 = zext i672 %r377 to i704
-%r380 = getelementptr i32, i32* %r3, i32 21
+%r375 = zext i32 %r374 to i736
+%r376 = shl i736 %r375, 704
+%r377 = or i736 %r371, %r376
+%r378 = zext i736 %r377 to i768
+%r380 = getelementptr i32, i32* %r3, i32 23
 %r381 = load i32, i32* %r380
-%r382 = zext i32 %r381 to i704
-%r383 = shl i704 %r382, 672
-%r384 = or i704 %r378, %r383
-%r385 = zext i704 %r384 to i736
-%r387 = getelementptr i32, i32* %r3, i32 22
+%r382 = zext i32 %r381 to i768
+%r383 = shl i768 %r382, 736
+%r384 = or i768 %r378, %r383
+%r385 = zext i768 %r384 to i800
+%r387 = getelementptr i32, i32* %r3, i32 24
 %r388 = load i32, i32* %r387
-%r389 = zext i32 %r388 to i736
-%r390 = shl i736 %r389, 704
-%r391 = or i736 %r385, %r390
-%r392 = zext i736 %r391 to i768
-%r394 = getelementptr i32, i32* %r3, i32 23
+%r389 = zext i32 %r388 to i800
+%r390 = shl i800 %r389, 768
+%r391 = or i800 %r385, %r390
+%r392 = zext i800 %r391 to i832
+%r394 = getelementptr i32, i32* %r3, i32 25
 %r395 = load i32, i32* %r394
-%r396 = zext i32 %r395 to i768
-%r397 = shl i768 %r396, 736
-%r398 = or i768 %r392, %r397
-%r399 = zext i768 %r398 to i800
-%r401 = getelementptr i32, i32* %r3, i32 24
+%r396 = zext i32 %r395 to i832
+%r397 = shl i832 %r396, 800
+%r398 = or i832 %r392, %r397
+%r399 = zext i832 %r398 to i864
+%r401 = getelementptr i32, i32* %r3, i32 26
 %r402 = load i32, i32* %r401
-%r403 = zext i32 %r402 to i800
-%r404 = shl i800 %r403, 768
-%r405 = or i800 %r399, %r404
-%r406 = zext i800 %r405 to i832
-%r408 = getelementptr i32, i32* %r3, i32 25
+%r403 = zext i32 %r402 to i864
+%r404 = shl i864 %r403, 832
+%r405 = or i864 %r399, %r404
+%r406 = zext i864 %r405 to i896
+%r408 = getelementptr i32, i32* %r3, i32 27
 %r409 = load i32, i32* %r408
-%r410 = zext i32 %r409 to i832
-%r411 = shl i832 %r410, 800
-%r412 = or i832 %r406, %r411
-%r413 = zext i832 %r412 to i864
-%r415 = getelementptr i32, i32* %r3, i32 26
+%r410 = zext i32 %r409 to i896
+%r411 = shl i896 %r410, 864
+%r412 = or i896 %r406, %r411
+%r413 = zext i896 %r412 to i928
+%r415 = getelementptr i32, i32* %r3, i32 28
 %r416 = load i32, i32* %r415
-%r417 = zext i32 %r416 to i864
-%r418 = shl i864 %r417, 832
-%r419 = or i864 %r413, %r418
-%r420 = zext i864 %r419 to i896
-%r422 = getelementptr i32, i32* %r3, i32 27
+%r417 = zext i32 %r416 to i928
+%r418 = shl i928 %r417, 896
+%r419 = or i928 %r413, %r418
+%r420 = zext i928 %r419 to i960
+%r422 = getelementptr i32, i32* %r3, i32 29
 %r423 = load i32, i32* %r422
-%r424 = zext i32 %r423 to i896
-%r425 = shl i896 %r424, 864
-%r426 = or i896 %r420, %r425
-%r427 = zext i896 %r426 to i928
-%r429 = getelementptr i32, i32* %r3, i32 28
+%r424 = zext i32 %r423 to i960
+%r425 = shl i960 %r424, 928
+%r426 = or i960 %r420, %r425
+%r427 = zext i960 %r426 to i992
+%r429 = getelementptr i32, i32* %r3, i32 30
 %r430 = load i32, i32* %r429
-%r431 = zext i32 %r430 to i928
-%r432 = shl i928 %r431, 896
-%r433 = or i928 %r427, %r432
-%r434 = zext i928 %r433 to i960
-%r436 = getelementptr i32, i32* %r3, i32 29
+%r431 = zext i32 %r430 to i992
+%r432 = shl i992 %r431, 960
+%r433 = or i992 %r427, %r432
+%r434 = zext i992 %r433 to i1024
+%r436 = getelementptr i32, i32* %r3, i32 31
 %r437 = load i32, i32* %r436
-%r438 = zext i32 %r437 to i960
-%r439 = shl i960 %r438, 928
-%r440 = or i960 %r434, %r439
-%r441 = zext i960 %r440 to i992
-%r443 = getelementptr i32, i32* %r3, i32 30
-%r444 = load i32, i32* %r443
-%r445 = zext i32 %r444 to i992
-%r446 = shl i992 %r445, 960
-%r447 = or i992 %r441, %r446
-%r448 = zext i992 %r447 to i1024
-%r450 = getelementptr i32, i32* %r3, i32 31
-%r451 = load i32, i32* %r450
-%r452 = zext i32 %r451 to i1024
-%r453 = shl i1024 %r452, 992
-%r454 = or i1024 %r448, %r453
-%r455 = zext i1024 %r454 to i1056
-%r457 = getelementptr i32, i32* %r3, i32 32
-%r458 = load i32, i32* %r457
-%r459 = zext i32 %r458 to i1056
-%r460 = shl i1056 %r459, 1024
-%r461 = or i1056 %r455, %r460
-%r462 = zext i1056 %r461 to i1088
-%r464 = getelementptr i32, i32* %r3, i32 33
-%r465 = load i32, i32* %r464
-%r466 = zext i32 %r465 to i1088
-%r467 = shl i1088 %r466, 1056
-%r468 = or i1088 %r462, %r467
-%r469 = zext i1088 %r236 to i1120
-%r470 = zext i1088 %r468 to i1120
-%r471 = add i1120 %r469, %r470
-%r472 = trunc i1120 %r471 to i544
-%r473 = trunc i544 %r472 to i32
-%r475 = getelementptr i32, i32* %r1, i32 0
-store i32 %r473, i32* %r475
-%r476 = lshr i544 %r472, 32
-%r477 = trunc i544 %r476 to i32
-%r479 = getelementptr i32, i32* %r1, i32 1
-store i32 %r477, i32* %r479
-%r480 = lshr i544 %r476, 32
-%r481 = trunc i544 %r480 to i32
-%r483 = getelementptr i32, i32* %r1, i32 2
-store i32 %r481, i32* %r483
-%r484 = lshr i544 %r480, 32
-%r485 = trunc i544 %r484 to i32
-%r487 = getelementptr i32, i32* %r1, i32 3
-store i32 %r485, i32* %r487
-%r488 = lshr i544 %r484, 32
-%r489 = trunc i544 %r488 to i32
-%r491 = getelementptr i32, i32* %r1, i32 4
-store i32 %r489, i32* %r491
-%r492 = lshr i544 %r488, 32
-%r493 = trunc i544 %r492 to i32
-%r495 = getelementptr i32, i32* %r1, i32 5
-store i32 %r493, i32* %r495
-%r496 = lshr i544 %r492, 32
-%r497 = trunc i544 %r496 to i32
-%r499 = getelementptr i32, i32* %r1, i32 6
-store i32 %r497, i32* %r499
-%r500 = lshr i544 %r496, 32
-%r501 = trunc i544 %r500 to i32
-%r503 = getelementptr i32, i32* %r1, i32 7
-store i32 %r501, i32* %r503
-%r504 = lshr i544 %r500, 32
-%r505 = trunc i544 %r504 to i32
-%r507 = getelementptr i32, i32* %r1, i32 8
-store i32 %r505, i32* %r507
-%r508 = lshr i544 %r504, 32
-%r509 = trunc i544 %r508 to i32
-%r511 = getelementptr i32, i32* %r1, i32 9
-store i32 %r509, i32* %r511
-%r512 = lshr i544 %r508, 32
-%r513 = trunc i544 %r512 to i32
-%r515 = getelementptr i32, i32* %r1, i32 10
-store i32 %r513, i32* %r515
-%r516 = lshr i544 %r512, 32
-%r517 = trunc i544 %r516 to i32
-%r519 = getelementptr i32, i32* %r1, i32 11
-store i32 %r517, i32* %r519
-%r520 = lshr i544 %r516, 32
-%r521 = trunc i544 %r520 to i32
-%r523 = getelementptr i32, i32* %r1, i32 12
-store i32 %r521, i32* %r523
-%r524 = lshr i544 %r520, 32
-%r525 = trunc i544 %r524 to i32
-%r527 = getelementptr i32, i32* %r1, i32 13
-store i32 %r525, i32* %r527
-%r528 = lshr i544 %r524, 32
-%r529 = trunc i544 %r528 to i32
-%r531 = getelementptr i32, i32* %r1, i32 14
-store i32 %r529, i32* %r531
-%r532 = lshr i544 %r528, 32
-%r533 = trunc i544 %r532 to i32
-%r535 = getelementptr i32, i32* %r1, i32 15
-store i32 %r533, i32* %r535
-%r536 = lshr i544 %r532, 32
-%r537 = trunc i544 %r536 to i32
-%r539 = getelementptr i32, i32* %r1, i32 16
-store i32 %r537, i32* %r539
-%r540 = lshr i1120 %r471, 544
-%r541 = trunc i1120 %r540 to i576
-%r542 = load i32, i32* %r4
-%r543 = zext i32 %r542 to i64
-%r545 = getelementptr i32, i32* %r4, i32 1
-%r546 = load i32, i32* %r545
-%r547 = zext i32 %r546 to i64
-%r548 = shl i64 %r547, 32
-%r549 = or i64 %r543, %r548
-%r550 = zext i64 %r549 to i96
-%r552 = getelementptr i32, i32* %r4, i32 2
-%r553 = load i32, i32* %r552
-%r554 = zext i32 %r553 to i96
-%r555 = shl i96 %r554, 64
-%r556 = or i96 %r550, %r555
-%r557 = zext i96 %r556 to i128
-%r559 = getelementptr i32, i32* %r4, i32 3
-%r560 = load i32, i32* %r559
-%r561 = zext i32 %r560 to i128
-%r562 = shl i128 %r561, 96
-%r563 = or i128 %r557, %r562
-%r564 = zext i128 %r563 to i160
-%r566 = getelementptr i32, i32* %r4, i32 4
-%r567 = load i32, i32* %r566
-%r568 = zext i32 %r567 to i160
-%r569 = shl i160 %r568, 128
-%r570 = or i160 %r564, %r569
-%r571 = zext i160 %r570 to i192
-%r573 = getelementptr i32, i32* %r4, i32 5
-%r574 = load i32, i32* %r573
-%r575 = zext i32 %r574 to i192
-%r576 = shl i192 %r575, 160
-%r577 = or i192 %r571, %r576
-%r578 = zext i192 %r577 to i224
-%r580 = getelementptr i32, i32* %r4, i32 6
-%r581 = load i32, i32* %r580
-%r582 = zext i32 %r581 to i224
-%r583 = shl i224 %r582, 192
-%r584 = or i224 %r578, %r583
-%r585 = zext i224 %r584 to i256
-%r587 = getelementptr i32, i32* %r4, i32 7
-%r588 = load i32, i32* %r587
-%r589 = zext i32 %r588 to i256
-%r590 = shl i256 %r589, 224
-%r591 = or i256 %r585, %r590
-%r592 = zext i256 %r591 to i288
-%r594 = getelementptr i32, i32* %r4, i32 8
-%r595 = load i32, i32* %r594
-%r596 = zext i32 %r595 to i288
-%r597 = shl i288 %r596, 256
-%r598 = or i288 %r592, %r597
-%r599 = zext i288 %r598 to i320
-%r601 = getelementptr i32, i32* %r4, i32 9
-%r602 = load i32, i32* %r601
-%r603 = zext i32 %r602 to i320
-%r604 = shl i320 %r603, 288
-%r605 = or i320 %r599, %r604
-%r606 = zext i320 %r605 to i352
-%r608 = getelementptr i32, i32* %r4, i32 10
-%r609 = load i32, i32* %r608
-%r610 = zext i32 %r609 to i352
-%r611 = shl i352 %r610, 320
-%r612 = or i352 %r606, %r611
-%r613 = zext i352 %r612 to i384
-%r615 = getelementptr i32, i32* %r4, i32 11
-%r616 = load i32, i32* %r615
-%r617 = zext i32 %r616 to i384
-%r618 = shl i384 %r617, 352
-%r619 = or i384 %r613, %r618
-%r620 = zext i384 %r619 to i416
-%r622 = getelementptr i32, i32* %r4, i32 12
-%r623 = load i32, i32* %r622
-%r624 = zext i32 %r623 to i416
-%r625 = shl i416 %r624, 384
-%r626 = or i416 %r620, %r625
-%r627 = zext i416 %r626 to i448
-%r629 = getelementptr i32, i32* %r4, i32 13
-%r630 = load i32, i32* %r629
-%r631 = zext i32 %r630 to i448
-%r632 = shl i448 %r631, 416
-%r633 = or i448 %r627, %r632
-%r634 = zext i448 %r633 to i480
-%r636 = getelementptr i32, i32* %r4, i32 14
-%r637 = load i32, i32* %r636
-%r638 = zext i32 %r637 to i480
-%r639 = shl i480 %r638, 448
-%r640 = or i480 %r634, %r639
-%r641 = zext i480 %r640 to i512
-%r643 = getelementptr i32, i32* %r4, i32 15
-%r644 = load i32, i32* %r643
-%r645 = zext i32 %r644 to i512
-%r646 = shl i512 %r645, 480
-%r647 = or i512 %r641, %r646
-%r648 = zext i512 %r647 to i544
-%r650 = getelementptr i32, i32* %r4, i32 16
-%r651 = load i32, i32* %r650
-%r652 = zext i32 %r651 to i544
-%r653 = shl i544 %r652, 512
-%r654 = or i544 %r648, %r653
-%r655 = zext i544 %r654 to i576
-%r656 = sub i576 %r541, %r655
-%r657 = lshr i576 %r656, 544
-%r658 = trunc i576 %r657 to i1
-%r659 = select i1 %r658, i576 %r541, i576 %r656
-%r660 = trunc i576 %r659 to i544
-%r662 = getelementptr i32, i32* %r1, i32 17
-%r663 = trunc i544 %r660 to i32
-%r665 = getelementptr i32, i32* %r662, i32 0
-store i32 %r663, i32* %r665
-%r666 = lshr i544 %r660, 32
-%r667 = trunc i544 %r666 to i32
-%r669 = getelementptr i32, i32* %r662, i32 1
-store i32 %r667, i32* %r669
-%r670 = lshr i544 %r666, 32
-%r671 = trunc i544 %r670 to i32
-%r673 = getelementptr i32, i32* %r662, i32 2
-store i32 %r671, i32* %r673
-%r674 = lshr i544 %r670, 32
-%r675 = trunc i544 %r674 to i32
-%r677 = getelementptr i32, i32* %r662, i32 3
-store i32 %r675, i32* %r677
-%r678 = lshr i544 %r674, 32
-%r679 = trunc i544 %r678 to i32
-%r681 = getelementptr i32, i32* %r662, i32 4
-store i32 %r679, i32* %r681
-%r682 = lshr i544 %r678, 32
-%r683 = trunc i544 %r682 to i32
-%r685 = getelementptr i32, i32* %r662, i32 5
-store i32 %r683, i32* %r685
-%r686 = lshr i544 %r682, 32
-%r687 = trunc i544 %r686 to i32
-%r689 = getelementptr i32, i32* %r662, i32 6
-store i32 %r687, i32* %r689
-%r690 = lshr i544 %r686, 32
-%r691 = trunc i544 %r690 to i32
-%r693 = getelementptr i32, i32* %r662, i32 7
-store i32 %r691, i32* %r693
-%r694 = lshr i544 %r690, 32
-%r695 = trunc i544 %r694 to i32
-%r697 = getelementptr i32, i32* %r662, i32 8
-store i32 %r695, i32* %r697
-%r698 = lshr i544 %r694, 32
-%r699 = trunc i544 %r698 to i32
-%r701 = getelementptr i32, i32* %r662, i32 9
-store i32 %r699, i32* %r701
-%r702 = lshr i544 %r698, 32
-%r703 = trunc i544 %r702 to i32
-%r705 = getelementptr i32, i32* %r662, i32 10
-store i32 %r703, i32* %r705
-%r706 = lshr i544 %r702, 32
-%r707 = trunc i544 %r706 to i32
-%r709 = getelementptr i32, i32* %r662, i32 11
-store i32 %r707, i32* %r709
-%r710 = lshr i544 %r706, 32
-%r711 = trunc i544 %r710 to i32
-%r713 = getelementptr i32, i32* %r662, i32 12
-store i32 %r711, i32* %r713
-%r714 = lshr i544 %r710, 32
-%r715 = trunc i544 %r714 to i32
-%r717 = getelementptr i32, i32* %r662, i32 13
-store i32 %r715, i32* %r717
-%r718 = lshr i544 %r714, 32
-%r719 = trunc i544 %r718 to i32
-%r721 = getelementptr i32, i32* %r662, i32 14
-store i32 %r719, i32* %r721
-%r722 = lshr i544 %r718, 32
-%r723 = trunc i544 %r722 to i32
-%r725 = getelementptr i32, i32* %r662, i32 15
-store i32 %r723, i32* %r725
-%r726 = lshr i544 %r722, 32
-%r727 = trunc i544 %r726 to i32
-%r729 = getelementptr i32, i32* %r662, i32 16
-store i32 %r727, i32* %r729
+%r438 = zext i32 %r437 to i1024
+%r439 = shl i1024 %r438, 992
+%r440 = or i1024 %r434, %r439
+%r441 = zext i1024 %r222 to i1056
+%r442 = zext i1024 %r440 to i1056
+%r443 = add i1056 %r441, %r442
+%r444 = trunc i1056 %r443 to i512
+%r446 = getelementptr i32, i32* %r1, i32 0
+%r447 = trunc i512 %r444 to i32
+store i32 %r447, i32* %r446
+%r448 = lshr i512 %r444, 32
+%r450 = getelementptr i32, i32* %r1, i32 1
+%r451 = trunc i512 %r448 to i32
+store i32 %r451, i32* %r450
+%r452 = lshr i512 %r448, 32
+%r454 = getelementptr i32, i32* %r1, i32 2
+%r455 = trunc i512 %r452 to i32
+store i32 %r455, i32* %r454
+%r456 = lshr i512 %r452, 32
+%r458 = getelementptr i32, i32* %r1, i32 3
+%r459 = trunc i512 %r456 to i32
+store i32 %r459, i32* %r458
+%r460 = lshr i512 %r456, 32
+%r462 = getelementptr i32, i32* %r1, i32 4
+%r463 = trunc i512 %r460 to i32
+store i32 %r463, i32* %r462
+%r464 = lshr i512 %r460, 32
+%r466 = getelementptr i32, i32* %r1, i32 5
+%r467 = trunc i512 %r464 to i32
+store i32 %r467, i32* %r466
+%r468 = lshr i512 %r464, 32
+%r470 = getelementptr i32, i32* %r1, i32 6
+%r471 = trunc i512 %r468 to i32
+store i32 %r471, i32* %r470
+%r472 = lshr i512 %r468, 32
+%r474 = getelementptr i32, i32* %r1, i32 7
+%r475 = trunc i512 %r472 to i32
+store i32 %r475, i32* %r474
+%r476 = lshr i512 %r472, 32
+%r478 = getelementptr i32, i32* %r1, i32 8
+%r479 = trunc i512 %r476 to i32
+store i32 %r479, i32* %r478
+%r480 = lshr i512 %r476, 32
+%r482 = getelementptr i32, i32* %r1, i32 9
+%r483 = trunc i512 %r480 to i32
+store i32 %r483, i32* %r482
+%r484 = lshr i512 %r480, 32
+%r486 = getelementptr i32, i32* %r1, i32 10
+%r487 = trunc i512 %r484 to i32
+store i32 %r487, i32* %r486
+%r488 = lshr i512 %r484, 32
+%r490 = getelementptr i32, i32* %r1, i32 11
+%r491 = trunc i512 %r488 to i32
+store i32 %r491, i32* %r490
+%r492 = lshr i512 %r488, 32
+%r494 = getelementptr i32, i32* %r1, i32 12
+%r495 = trunc i512 %r492 to i32
+store i32 %r495, i32* %r494
+%r496 = lshr i512 %r492, 32
+%r498 = getelementptr i32, i32* %r1, i32 13
+%r499 = trunc i512 %r496 to i32
+store i32 %r499, i32* %r498
+%r500 = lshr i512 %r496, 32
+%r502 = getelementptr i32, i32* %r1, i32 14
+%r503 = trunc i512 %r500 to i32
+store i32 %r503, i32* %r502
+%r504 = lshr i512 %r500, 32
+%r506 = getelementptr i32, i32* %r1, i32 15
+%r507 = trunc i512 %r504 to i32
+store i32 %r507, i32* %r506
+%r508 = lshr i1056 %r443, 512
+%r509 = trunc i1056 %r508 to i544
+%r510 = load i32, i32* %r4
+%r511 = zext i32 %r510 to i64
+%r513 = getelementptr i32, i32* %r4, i32 1
+%r514 = load i32, i32* %r513
+%r515 = zext i32 %r514 to i64
+%r516 = shl i64 %r515, 32
+%r517 = or i64 %r511, %r516
+%r518 = zext i64 %r517 to i96
+%r520 = getelementptr i32, i32* %r4, i32 2
+%r521 = load i32, i32* %r520
+%r522 = zext i32 %r521 to i96
+%r523 = shl i96 %r522, 64
+%r524 = or i96 %r518, %r523
+%r525 = zext i96 %r524 to i128
+%r527 = getelementptr i32, i32* %r4, i32 3
+%r528 = load i32, i32* %r527
+%r529 = zext i32 %r528 to i128
+%r530 = shl i128 %r529, 96
+%r531 = or i128 %r525, %r530
+%r532 = zext i128 %r531 to i160
+%r534 = getelementptr i32, i32* %r4, i32 4
+%r535 = load i32, i32* %r534
+%r536 = zext i32 %r535 to i160
+%r537 = shl i160 %r536, 128
+%r538 = or i160 %r532, %r537
+%r539 = zext i160 %r538 to i192
+%r541 = getelementptr i32, i32* %r4, i32 5
+%r542 = load i32, i32* %r541
+%r543 = zext i32 %r542 to i192
+%r544 = shl i192 %r543, 160
+%r545 = or i192 %r539, %r544
+%r546 = zext i192 %r545 to i224
+%r548 = getelementptr i32, i32* %r4, i32 6
+%r549 = load i32, i32* %r548
+%r550 = zext i32 %r549 to i224
+%r551 = shl i224 %r550, 192
+%r552 = or i224 %r546, %r551
+%r553 = zext i224 %r552 to i256
+%r555 = getelementptr i32, i32* %r4, i32 7
+%r556 = load i32, i32* %r555
+%r557 = zext i32 %r556 to i256
+%r558 = shl i256 %r557, 224
+%r559 = or i256 %r553, %r558
+%r560 = zext i256 %r559 to i288
+%r562 = getelementptr i32, i32* %r4, i32 8
+%r563 = load i32, i32* %r562
+%r564 = zext i32 %r563 to i288
+%r565 = shl i288 %r564, 256
+%r566 = or i288 %r560, %r565
+%r567 = zext i288 %r566 to i320
+%r569 = getelementptr i32, i32* %r4, i32 9
+%r570 = load i32, i32* %r569
+%r571 = zext i32 %r570 to i320
+%r572 = shl i320 %r571, 288
+%r573 = or i320 %r567, %r572
+%r574 = zext i320 %r573 to i352
+%r576 = getelementptr i32, i32* %r4, i32 10
+%r577 = load i32, i32* %r576
+%r578 = zext i32 %r577 to i352
+%r579 = shl i352 %r578, 320
+%r580 = or i352 %r574, %r579
+%r581 = zext i352 %r580 to i384
+%r583 = getelementptr i32, i32* %r4, i32 11
+%r584 = load i32, i32* %r583
+%r585 = zext i32 %r584 to i384
+%r586 = shl i384 %r585, 352
+%r587 = or i384 %r581, %r586
+%r588 = zext i384 %r587 to i416
+%r590 = getelementptr i32, i32* %r4, i32 12
+%r591 = load i32, i32* %r590
+%r592 = zext i32 %r591 to i416
+%r593 = shl i416 %r592, 384
+%r594 = or i416 %r588, %r593
+%r595 = zext i416 %r594 to i448
+%r597 = getelementptr i32, i32* %r4, i32 13
+%r598 = load i32, i32* %r597
+%r599 = zext i32 %r598 to i448
+%r600 = shl i448 %r599, 416
+%r601 = or i448 %r595, %r600
+%r602 = zext i448 %r601 to i480
+%r604 = getelementptr i32, i32* %r4, i32 14
+%r605 = load i32, i32* %r604
+%r606 = zext i32 %r605 to i480
+%r607 = shl i480 %r606, 448
+%r608 = or i480 %r602, %r607
+%r609 = zext i480 %r608 to i512
+%r611 = getelementptr i32, i32* %r4, i32 15
+%r612 = load i32, i32* %r611
+%r613 = zext i32 %r612 to i512
+%r614 = shl i512 %r613, 480
+%r615 = or i512 %r609, %r614
+%r616 = zext i512 %r615 to i544
+%r617 = sub i544 %r509, %r616
+%r618 = lshr i544 %r617, 512
+%r619 = trunc i544 %r618 to i1
+%r620 = select i1 %r619, i544 %r509, i544 %r617
+%r621 = trunc i544 %r620 to i512
+%r623 = getelementptr i32, i32* %r1, i32 16
+%r625 = getelementptr i32, i32* %r623, i32 0
+%r626 = trunc i512 %r621 to i32
+store i32 %r626, i32* %r625
+%r627 = lshr i512 %r621, 32
+%r629 = getelementptr i32, i32* %r623, i32 1
+%r630 = trunc i512 %r627 to i32
+store i32 %r630, i32* %r629
+%r631 = lshr i512 %r627, 32
+%r633 = getelementptr i32, i32* %r623, i32 2
+%r634 = trunc i512 %r631 to i32
+store i32 %r634, i32* %r633
+%r635 = lshr i512 %r631, 32
+%r637 = getelementptr i32, i32* %r623, i32 3
+%r638 = trunc i512 %r635 to i32
+store i32 %r638, i32* %r637
+%r639 = lshr i512 %r635, 32
+%r641 = getelementptr i32, i32* %r623, i32 4
+%r642 = trunc i512 %r639 to i32
+store i32 %r642, i32* %r641
+%r643 = lshr i512 %r639, 32
+%r645 = getelementptr i32, i32* %r623, i32 5
+%r646 = trunc i512 %r643 to i32
+store i32 %r646, i32* %r645
+%r647 = lshr i512 %r643, 32
+%r649 = getelementptr i32, i32* %r623, i32 6
+%r650 = trunc i512 %r647 to i32
+store i32 %r650, i32* %r649
+%r651 = lshr i512 %r647, 32
+%r653 = getelementptr i32, i32* %r623, i32 7
+%r654 = trunc i512 %r651 to i32
+store i32 %r654, i32* %r653
+%r655 = lshr i512 %r651, 32
+%r657 = getelementptr i32, i32* %r623, i32 8
+%r658 = trunc i512 %r655 to i32
+store i32 %r658, i32* %r657
+%r659 = lshr i512 %r655, 32
+%r661 = getelementptr i32, i32* %r623, i32 9
+%r662 = trunc i512 %r659 to i32
+store i32 %r662, i32* %r661
+%r663 = lshr i512 %r659, 32
+%r665 = getelementptr i32, i32* %r623, i32 10
+%r666 = trunc i512 %r663 to i32
+store i32 %r666, i32* %r665
+%r667 = lshr i512 %r663, 32
+%r669 = getelementptr i32, i32* %r623, i32 11
+%r670 = trunc i512 %r667 to i32
+store i32 %r670, i32* %r669
+%r671 = lshr i512 %r667, 32
+%r673 = getelementptr i32, i32* %r623, i32 12
+%r674 = trunc i512 %r671 to i32
+store i32 %r674, i32* %r673
+%r675 = lshr i512 %r671, 32
+%r677 = getelementptr i32, i32* %r623, i32 13
+%r678 = trunc i512 %r675 to i32
+store i32 %r678, i32* %r677
+%r679 = lshr i512 %r675, 32
+%r681 = getelementptr i32, i32* %r623, i32 14
+%r682 = trunc i512 %r679 to i32
+store i32 %r682, i32* %r681
+%r683 = lshr i512 %r679, 32
+%r685 = getelementptr i32, i32* %r623, i32 15
+%r686 = trunc i512 %r683 to i32
+store i32 %r686, i32* %r685
 ret void
 }
-define void @mcl_fpDbl_sub17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
+define void @mcl_fpDbl_sub16L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias  %r3, i32* noalias  %r4)
 {
 %r5 = load i32, i32* %r2
 %r6 = zext i32 %r5 to i64
@@ -53798,458 +20048,420 @@ define void @mcl_fpDbl_sub17L(i32* noalias  %r1, i32* noalias  %r2, i32* noalias
 %r220 = zext i32 %r219 to i1024
 %r221 = shl i1024 %r220, 992
 %r222 = or i1024 %r216, %r221
-%r223 = zext i1024 %r222 to i1056
-%r225 = getelementptr i32, i32* %r2, i32 32
-%r226 = load i32, i32* %r225
-%r227 = zext i32 %r226 to i1056
-%r228 = shl i1056 %r227, 1024
-%r229 = or i1056 %r223, %r228
-%r230 = zext i1056 %r229 to i1088
-%r232 = getelementptr i32, i32* %r2, i32 33
-%r233 = load i32, i32* %r232
-%r234 = zext i32 %r233 to i1088
-%r235 = shl i1088 %r234, 1056
-%r236 = or i1088 %r230, %r235
-%r237 = load i32, i32* %r3
-%r238 = zext i32 %r237 to i64
-%r240 = getelementptr i32, i32* %r3, i32 1
+%r223 = load i32, i32* %r3
+%r224 = zext i32 %r223 to i64
+%r226 = getelementptr i32, i32* %r3, i32 1
+%r227 = load i32, i32* %r226
+%r228 = zext i32 %r227 to i64
+%r229 = shl i64 %r228, 32
+%r230 = or i64 %r224, %r229
+%r231 = zext i64 %r230 to i96
+%r233 = getelementptr i32, i32* %r3, i32 2
+%r234 = load i32, i32* %r233
+%r235 = zext i32 %r234 to i96
+%r236 = shl i96 %r235, 64
+%r237 = or i96 %r231, %r236
+%r238 = zext i96 %r237 to i128
+%r240 = getelementptr i32, i32* %r3, i32 3
 %r241 = load i32, i32* %r240
-%r242 = zext i32 %r241 to i64
-%r243 = shl i64 %r242, 32
-%r244 = or i64 %r238, %r243
-%r245 = zext i64 %r244 to i96
-%r247 = getelementptr i32, i32* %r3, i32 2
+%r242 = zext i32 %r241 to i128
+%r243 = shl i128 %r242, 96
+%r244 = or i128 %r238, %r243
+%r245 = zext i128 %r244 to i160
+%r247 = getelementptr i32, i32* %r3, i32 4
 %r248 = load i32, i32* %r247
-%r249 = zext i32 %r248 to i96
-%r250 = shl i96 %r249, 64
-%r251 = or i96 %r245, %r250
-%r252 = zext i96 %r251 to i128
-%r254 = getelementptr i32, i32* %r3, i32 3
+%r249 = zext i32 %r248 to i160
+%r250 = shl i160 %r249, 128
+%r251 = or i160 %r245, %r250
+%r252 = zext i160 %r251 to i192
+%r254 = getelementptr i32, i32* %r3, i32 5
 %r255 = load i32, i32* %r254
-%r256 = zext i32 %r255 to i128
-%r257 = shl i128 %r256, 96
-%r258 = or i128 %r252, %r257
-%r259 = zext i128 %r258 to i160
-%r261 = getelementptr i32, i32* %r3, i32 4
+%r256 = zext i32 %r255 to i192
+%r257 = shl i192 %r256, 160
+%r258 = or i192 %r252, %r257
+%r259 = zext i192 %r258 to i224
+%r261 = getelementptr i32, i32* %r3, i32 6
 %r262 = load i32, i32* %r261
-%r263 = zext i32 %r262 to i160
-%r264 = shl i160 %r263, 128
-%r265 = or i160 %r259, %r264
-%r266 = zext i160 %r265 to i192
-%r268 = getelementptr i32, i32* %r3, i32 5
+%r263 = zext i32 %r262 to i224
+%r264 = shl i224 %r263, 192
+%r265 = or i224 %r259, %r264
+%r266 = zext i224 %r265 to i256
+%r268 = getelementptr i32, i32* %r3, i32 7
 %r269 = load i32, i32* %r268
-%r270 = zext i32 %r269 to i192
-%r271 = shl i192 %r270, 160
-%r272 = or i192 %r266, %r271
-%r273 = zext i192 %r272 to i224
-%r275 = getelementptr i32, i32* %r3, i32 6
+%r270 = zext i32 %r269 to i256
+%r271 = shl i256 %r270, 224
+%r272 = or i256 %r266, %r271
+%r273 = zext i256 %r272 to i288
+%r275 = getelementptr i32, i32* %r3, i32 8
 %r276 = load i32, i32* %r275
-%r277 = zext i32 %r276 to i224
-%r278 = shl i224 %r277, 192
-%r279 = or i224 %r273, %r278
-%r280 = zext i224 %r279 to i256
-%r282 = getelementptr i32, i32* %r3, i32 7
+%r277 = zext i32 %r276 to i288
+%r278 = shl i288 %r277, 256
+%r279 = or i288 %r273, %r278
+%r280 = zext i288 %r279 to i320
+%r282 = getelementptr i32, i32* %r3, i32 9
 %r283 = load i32, i32* %r282
-%r284 = zext i32 %r283 to i256
-%r285 = shl i256 %r284, 224
-%r286 = or i256 %r280, %r285
-%r287 = zext i256 %r286 to i288
-%r289 = getelementptr i32, i32* %r3, i32 8
+%r284 = zext i32 %r283 to i320
+%r285 = shl i320 %r284, 288
+%r286 = or i320 %r280, %r285
+%r287 = zext i320 %r286 to i352
+%r289 = getelementptr i32, i32* %r3, i32 10
 %r290 = load i32, i32* %r289
-%r291 = zext i32 %r290 to i288
-%r292 = shl i288 %r291, 256
-%r293 = or i288 %r287, %r292
-%r294 = zext i288 %r293 to i320
-%r296 = getelementptr i32, i32* %r3, i32 9
+%r291 = zext i32 %r290 to i352
+%r292 = shl i352 %r291, 320
+%r293 = or i352 %r287, %r292
+%r294 = zext i352 %r293 to i384
+%r296 = getelementptr i32, i32* %r3, i32 11
 %r297 = load i32, i32* %r296
-%r298 = zext i32 %r297 to i320
-%r299 = shl i320 %r298, 288
-%r300 = or i320 %r294, %r299
-%r301 = zext i320 %r300 to i352
-%r303 = getelementptr i32, i32* %r3, i32 10
+%r298 = zext i32 %r297 to i384
+%r299 = shl i384 %r298, 352
+%r300 = or i384 %r294, %r299
+%r301 = zext i384 %r300 to i416
+%r303 = getelementptr i32, i32* %r3, i32 12
 %r304 = load i32, i32* %r303
-%r305 = zext i32 %r304 to i352
-%r306 = shl i352 %r305, 320
-%r307 = or i352 %r301, %r306
-%r308 = zext i352 %r307 to i384
-%r310 = getelementptr i32, i32* %r3, i32 11
+%r305 = zext i32 %r304 to i416
+%r306 = shl i416 %r305, 384
+%r307 = or i416 %r301, %r306
+%r308 = zext i416 %r307 to i448
+%r310 = getelementptr i32, i32* %r3, i32 13
 %r311 = load i32, i32* %r310
-%r312 = zext i32 %r311 to i384
-%r313 = shl i384 %r312, 352
-%r314 = or i384 %r308, %r313
-%r315 = zext i384 %r314 to i416
-%r317 = getelementptr i32, i32* %r3, i32 12
+%r312 = zext i32 %r311 to i448
+%r313 = shl i448 %r312, 416
+%r314 = or i448 %r308, %r313
+%r315 = zext i448 %r314 to i480
+%r317 = getelementptr i32, i32* %r3, i32 14
 %r318 = load i32, i32* %r317
-%r319 = zext i32 %r318 to i416
-%r320 = shl i416 %r319, 384
-%r321 = or i416 %r315, %r320
-%r322 = zext i416 %r321 to i448
-%r324 = getelementptr i32, i32* %r3, i32 13
+%r319 = zext i32 %r318 to i480
+%r320 = shl i480 %r319, 448
+%r321 = or i480 %r315, %r320
+%r322 = zext i480 %r321 to i512
+%r324 = getelementptr i32, i32* %r3, i32 15
 %r325 = load i32, i32* %r324
-%r326 = zext i32 %r325 to i448
-%r327 = shl i448 %r326, 416
-%r328 = or i448 %r322, %r327
-%r329 = zext i448 %r328 to i480
-%r331 = getelementptr i32, i32* %r3, i32 14
+%r326 = zext i32 %r325 to i512
+%r327 = shl i512 %r326, 480
+%r328 = or i512 %r322, %r327
+%r329 = zext i512 %r328 to i544
+%r331 = getelementptr i32, i32* %r3, i32 16
 %r332 = load i32, i32* %r331
-%r333 = zext i32 %r332 to i480
-%r334 = shl i480 %r333, 448
-%r335 = or i480 %r329, %r334
-%r336 = zext i480 %r335 to i512
-%r338 = getelementptr i32, i32* %r3, i32 15
+%r333 = zext i32 %r332 to i544
+%r334 = shl i544 %r333, 512
+%r335 = or i544 %r329, %r334
+%r336 = zext i544 %r335 to i576
+%r338 = getelementptr i32, i32* %r3, i32 17
 %r339 = load i32, i32* %r338
-%r340 = zext i32 %r339 to i512
-%r341 = shl i512 %r340, 480
-%r342 = or i512 %r336, %r341
-%r343 = zext i512 %r342 to i544
-%r345 = getelementptr i32, i32* %r3, i32 16
+%r340 = zext i32 %r339 to i576
+%r341 = shl i576 %r340, 544
+%r342 = or i576 %r336, %r341
+%r343 = zext i576 %r342 to i608
+%r345 = getelementptr i32, i32* %r3, i32 18
 %r346 = load i32, i32* %r345
-%r347 = zext i32 %r346 to i544
-%r348 = shl i544 %r347, 512
-%r349 = or i544 %r343, %r348
-%r350 = zext i544 %r349 to i576
-%r352 = getelementptr i32, i32* %r3, i32 17
+%r347 = zext i32 %r346 to i608
+%r348 = shl i608 %r347, 576
+%r349 = or i608 %r343, %r348
+%r350 = zext i608 %r349 to i640
+%r352 = getelementptr i32, i32* %r3, i32 19
 %r353 = load i32, i32* %r352
-%r354 = zext i32 %r353 to i576
-%r355 = shl i576 %r354, 544
-%r356 = or i576 %r350, %r355
-%r357 = zext i576 %r356 to i608
-%r359 = getelementptr i32, i32* %r3, i32 18
+%r354 = zext i32 %r353 to i640
+%r355 = shl i640 %r354, 608
+%r356 = or i640 %r350, %r355
+%r357 = zext i640 %r356 to i672
+%r359 = getelementptr i32, i32* %r3, i32 20
 %r360 = load i32, i32* %r359
-%r361 = zext i32 %r360 to i608
-%r362 = shl i608 %r361, 576
-%r363 = or i608 %r357, %r362
-%r364 = zext i608 %r363 to i640
-%r366 = getelementptr i32, i32* %r3, i32 19
+%r361 = zext i32 %r360 to i672
+%r362 = shl i672 %r361, 640
+%r363 = or i672 %r357, %r362
+%r364 = zext i672 %r363 to i704
+%r366 = getelementptr i32, i32* %r3, i32 21
 %r367 = load i32, i32* %r366
-%r368 = zext i32 %r367 to i640
-%r369 = shl i640 %r368, 608
-%r370 = or i640 %r364, %r369
-%r371 = zext i640 %r370 to i672
-%r373 = getelementptr i32, i32* %r3, i32 20
+%r368 = zext i32 %r367 to i704
+%r369 = shl i704 %r368, 672
+%r370 = or i704 %r364, %r369
+%r371 = zext i704 %r370 to i736
+%r373 = getelementptr i32, i32* %r3, i32 22
 %r374 = load i32, i32* %r373
-%r375 = zext i32 %r374 to i672
-%r376 = shl i672 %r375, 640
-%r377 = or i672 %r371, %r376
-%r378 = zext i672 %r377 to i704
-%r380 = getelementptr i32, i32* %r3, i32 21
+%r375 = zext i32 %r374 to i736
+%r376 = shl i736 %r375, 704
+%r377 = or i736 %r371, %r376
+%r378 = zext i736 %r377 to i768
+%r380 = getelementptr i32, i32* %r3, i32 23
 %r381 = load i32, i32* %r380
-%r382 = zext i32 %r381 to i704
-%r383 = shl i704 %r382, 672
-%r384 = or i704 %r378, %r383
-%r385 = zext i704 %r384 to i736
-%r387 = getelementptr i32, i32* %r3, i32 22
+%r382 = zext i32 %r381 to i768
+%r383 = shl i768 %r382, 736
+%r384 = or i768 %r378, %r383
+%r385 = zext i768 %r384 to i800
+%r387 = getelementptr i32, i32* %r3, i32 24
 %r388 = load i32, i32* %r387
-%r389 = zext i32 %r388 to i736
-%r390 = shl i736 %r389, 704
-%r391 = or i736 %r385, %r390
-%r392 = zext i736 %r391 to i768
-%r394 = getelementptr i32, i32* %r3, i32 23
+%r389 = zext i32 %r388 to i800
+%r390 = shl i800 %r389, 768
+%r391 = or i800 %r385, %r390
+%r392 = zext i800 %r391 to i832
+%r394 = getelementptr i32, i32* %r3, i32 25
 %r395 = load i32, i32* %r394
-%r396 = zext i32 %r395 to i768
-%r397 = shl i768 %r396, 736
-%r398 = or i768 %r392, %r397
-%r399 = zext i768 %r398 to i800
-%r401 = getelementptr i32, i32* %r3, i32 24
+%r396 = zext i32 %r395 to i832
+%r397 = shl i832 %r396, 800
+%r398 = or i832 %r392, %r397
+%r399 = zext i832 %r398 to i864
+%r401 = getelementptr i32, i32* %r3, i32 26
 %r402 = load i32, i32* %r401
-%r403 = zext i32 %r402 to i800
-%r404 = shl i800 %r403, 768
-%r405 = or i800 %r399, %r404
-%r406 = zext i800 %r405 to i832
-%r408 = getelementptr i32, i32* %r3, i32 25
+%r403 = zext i32 %r402 to i864
+%r404 = shl i864 %r403, 832
+%r405 = or i864 %r399, %r404
+%r406 = zext i864 %r405 to i896
+%r408 = getelementptr i32, i32* %r3, i32 27
 %r409 = load i32, i32* %r408
-%r410 = zext i32 %r409 to i832
-%r411 = shl i832 %r410, 800
-%r412 = or i832 %r406, %r411
-%r413 = zext i832 %r412 to i864
-%r415 = getelementptr i32, i32* %r3, i32 26
+%r410 = zext i32 %r409 to i896
+%r411 = shl i896 %r410, 864
+%r412 = or i896 %r406, %r411
+%r413 = zext i896 %r412 to i928
+%r415 = getelementptr i32, i32* %r3, i32 28
 %r416 = load i32, i32* %r415
-%r417 = zext i32 %r416 to i864
-%r418 = shl i864 %r417, 832
-%r419 = or i864 %r413, %r418
-%r420 = zext i864 %r419 to i896
-%r422 = getelementptr i32, i32* %r3, i32 27
+%r417 = zext i32 %r416 to i928
+%r418 = shl i928 %r417, 896
+%r419 = or i928 %r413, %r418
+%r420 = zext i928 %r419 to i960
+%r422 = getelementptr i32, i32* %r3, i32 29
 %r423 = load i32, i32* %r422
-%r424 = zext i32 %r423 to i896
-%r425 = shl i896 %r424, 864
-%r426 = or i896 %r420, %r425
-%r427 = zext i896 %r426 to i928
-%r429 = getelementptr i32, i32* %r3, i32 28
+%r424 = zext i32 %r423 to i960
+%r425 = shl i960 %r424, 928
+%r426 = or i960 %r420, %r425
+%r427 = zext i960 %r426 to i992
+%r429 = getelementptr i32, i32* %r3, i32 30
 %r430 = load i32, i32* %r429
-%r431 = zext i32 %r430 to i928
-%r432 = shl i928 %r431, 896
-%r433 = or i928 %r427, %r432
-%r434 = zext i928 %r433 to i960
-%r436 = getelementptr i32, i32* %r3, i32 29
+%r431 = zext i32 %r430 to i992
+%r432 = shl i992 %r431, 960
+%r433 = or i992 %r427, %r432
+%r434 = zext i992 %r433 to i1024
+%r436 = getelementptr i32, i32* %r3, i32 31
 %r437 = load i32, i32* %r436
-%r438 = zext i32 %r437 to i960
-%r439 = shl i960 %r438, 928
-%r440 = or i960 %r434, %r439
-%r441 = zext i960 %r440 to i992
-%r443 = getelementptr i32, i32* %r3, i32 30
-%r444 = load i32, i32* %r443
-%r445 = zext i32 %r444 to i992
-%r446 = shl i992 %r445, 960
-%r447 = or i992 %r441, %r446
-%r448 = zext i992 %r447 to i1024
-%r450 = getelementptr i32, i32* %r3, i32 31
-%r451 = load i32, i32* %r450
-%r452 = zext i32 %r451 to i1024
-%r453 = shl i1024 %r452, 992
-%r454 = or i1024 %r448, %r453
-%r455 = zext i1024 %r454 to i1056
-%r457 = getelementptr i32, i32* %r3, i32 32
-%r458 = load i32, i32* %r457
-%r459 = zext i32 %r458 to i1056
-%r460 = shl i1056 %r459, 1024
-%r461 = or i1056 %r455, %r460
-%r462 = zext i1056 %r461 to i1088
-%r464 = getelementptr i32, i32* %r3, i32 33
-%r465 = load i32, i32* %r464
-%r466 = zext i32 %r465 to i1088
-%r467 = shl i1088 %r466, 1056
-%r468 = or i1088 %r462, %r467
-%r469 = zext i1088 %r236 to i1120
-%r470 = zext i1088 %r468 to i1120
-%r471 = sub i1120 %r469, %r470
-%r472 = trunc i1120 %r471 to i544
-%r473 = trunc i544 %r472 to i32
-%r475 = getelementptr i32, i32* %r1, i32 0
-store i32 %r473, i32* %r475
-%r476 = lshr i544 %r472, 32
-%r477 = trunc i544 %r476 to i32
-%r479 = getelementptr i32, i32* %r1, i32 1
-store i32 %r477, i32* %r479
-%r480 = lshr i544 %r476, 32
-%r481 = trunc i544 %r480 to i32
-%r483 = getelementptr i32, i32* %r1, i32 2
-store i32 %r481, i32* %r483
-%r484 = lshr i544 %r480, 32
-%r485 = trunc i544 %r484 to i32
-%r487 = getelementptr i32, i32* %r1, i32 3
-store i32 %r485, i32* %r487
-%r488 = lshr i544 %r484, 32
-%r489 = trunc i544 %r488 to i32
-%r491 = getelementptr i32, i32* %r1, i32 4
-store i32 %r489, i32* %r491
-%r492 = lshr i544 %r488, 32
-%r493 = trunc i544 %r492 to i32
-%r495 = getelementptr i32, i32* %r1, i32 5
-store i32 %r493, i32* %r495
-%r496 = lshr i544 %r492, 32
-%r497 = trunc i544 %r496 to i32
-%r499 = getelementptr i32, i32* %r1, i32 6
-store i32 %r497, i32* %r499
-%r500 = lshr i544 %r496, 32
-%r501 = trunc i544 %r500 to i32
-%r503 = getelementptr i32, i32* %r1, i32 7
-store i32 %r501, i32* %r503
-%r504 = lshr i544 %r500, 32
-%r505 = trunc i544 %r504 to i32
-%r507 = getelementptr i32, i32* %r1, i32 8
-store i32 %r505, i32* %r507
-%r508 = lshr i544 %r504, 32
-%r509 = trunc i544 %r508 to i32
-%r511 = getelementptr i32, i32* %r1, i32 9
-store i32 %r509, i32* %r511
-%r512 = lshr i544 %r508, 32
-%r513 = trunc i544 %r512 to i32
-%r515 = getelementptr i32, i32* %r1, i32 10
-store i32 %r513, i32* %r515
-%r516 = lshr i544 %r512, 32
-%r517 = trunc i544 %r516 to i32
-%r519 = getelementptr i32, i32* %r1, i32 11
-store i32 %r517, i32* %r519
-%r520 = lshr i544 %r516, 32
-%r521 = trunc i544 %r520 to i32
-%r523 = getelementptr i32, i32* %r1, i32 12
-store i32 %r521, i32* %r523
-%r524 = lshr i544 %r520, 32
-%r525 = trunc i544 %r524 to i32
-%r527 = getelementptr i32, i32* %r1, i32 13
-store i32 %r525, i32* %r527
-%r528 = lshr i544 %r524, 32
-%r529 = trunc i544 %r528 to i32
-%r531 = getelementptr i32, i32* %r1, i32 14
-store i32 %r529, i32* %r531
-%r532 = lshr i544 %r528, 32
-%r533 = trunc i544 %r532 to i32
-%r535 = getelementptr i32, i32* %r1, i32 15
-store i32 %r533, i32* %r535
-%r536 = lshr i544 %r532, 32
-%r537 = trunc i544 %r536 to i32
-%r539 = getelementptr i32, i32* %r1, i32 16
-store i32 %r537, i32* %r539
-%r540 = lshr i1120 %r471, 544
-%r541 = trunc i1120 %r540 to i544
-%r542 = lshr i1120 %r471, 1088
-%r543 = trunc i1120 %r542 to i1
-%r544 = load i32, i32* %r4
-%r545 = zext i32 %r544 to i64
-%r547 = getelementptr i32, i32* %r4, i32 1
-%r548 = load i32, i32* %r547
-%r549 = zext i32 %r548 to i64
-%r550 = shl i64 %r549, 32
-%r551 = or i64 %r545, %r550
-%r552 = zext i64 %r551 to i96
-%r554 = getelementptr i32, i32* %r4, i32 2
-%r555 = load i32, i32* %r554
-%r556 = zext i32 %r555 to i96
-%r557 = shl i96 %r556, 64
-%r558 = or i96 %r552, %r557
-%r559 = zext i96 %r558 to i128
-%r561 = getelementptr i32, i32* %r4, i32 3
-%r562 = load i32, i32* %r561
-%r563 = zext i32 %r562 to i128
-%r564 = shl i128 %r563, 96
-%r565 = or i128 %r559, %r564
-%r566 = zext i128 %r565 to i160
-%r568 = getelementptr i32, i32* %r4, i32 4
-%r569 = load i32, i32* %r568
-%r570 = zext i32 %r569 to i160
-%r571 = shl i160 %r570, 128
-%r572 = or i160 %r566, %r571
-%r573 = zext i160 %r572 to i192
-%r575 = getelementptr i32, i32* %r4, i32 5
-%r576 = load i32, i32* %r575
-%r577 = zext i32 %r576 to i192
-%r578 = shl i192 %r577, 160
-%r579 = or i192 %r573, %r578
-%r580 = zext i192 %r579 to i224
-%r582 = getelementptr i32, i32* %r4, i32 6
-%r583 = load i32, i32* %r582
-%r584 = zext i32 %r583 to i224
-%r585 = shl i224 %r584, 192
-%r586 = or i224 %r580, %r585
-%r587 = zext i224 %r586 to i256
-%r589 = getelementptr i32, i32* %r4, i32 7
-%r590 = load i32, i32* %r589
-%r591 = zext i32 %r590 to i256
-%r592 = shl i256 %r591, 224
-%r593 = or i256 %r587, %r592
-%r594 = zext i256 %r593 to i288
-%r596 = getelementptr i32, i32* %r4, i32 8
-%r597 = load i32, i32* %r596
-%r598 = zext i32 %r597 to i288
-%r599 = shl i288 %r598, 256
-%r600 = or i288 %r594, %r599
-%r601 = zext i288 %r600 to i320
-%r603 = getelementptr i32, i32* %r4, i32 9
-%r604 = load i32, i32* %r603
-%r605 = zext i32 %r604 to i320
-%r606 = shl i320 %r605, 288
-%r607 = or i320 %r601, %r606
-%r608 = zext i320 %r607 to i352
-%r610 = getelementptr i32, i32* %r4, i32 10
-%r611 = load i32, i32* %r610
-%r612 = zext i32 %r611 to i352
-%r613 = shl i352 %r612, 320
-%r614 = or i352 %r608, %r613
-%r615 = zext i352 %r614 to i384
-%r617 = getelementptr i32, i32* %r4, i32 11
-%r618 = load i32, i32* %r617
-%r619 = zext i32 %r618 to i384
-%r620 = shl i384 %r619, 352
-%r621 = or i384 %r615, %r620
-%r622 = zext i384 %r621 to i416
-%r624 = getelementptr i32, i32* %r4, i32 12
-%r625 = load i32, i32* %r624
-%r626 = zext i32 %r625 to i416
-%r627 = shl i416 %r626, 384
-%r628 = or i416 %r622, %r627
-%r629 = zext i416 %r628 to i448
-%r631 = getelementptr i32, i32* %r4, i32 13
-%r632 = load i32, i32* %r631
-%r633 = zext i32 %r632 to i448
-%r634 = shl i448 %r633, 416
-%r635 = or i448 %r629, %r634
-%r636 = zext i448 %r635 to i480
-%r638 = getelementptr i32, i32* %r4, i32 14
-%r639 = load i32, i32* %r638
-%r640 = zext i32 %r639 to i480
-%r641 = shl i480 %r640, 448
-%r642 = or i480 %r636, %r641
-%r643 = zext i480 %r642 to i512
-%r645 = getelementptr i32, i32* %r4, i32 15
-%r646 = load i32, i32* %r645
-%r647 = zext i32 %r646 to i512
-%r648 = shl i512 %r647, 480
-%r649 = or i512 %r643, %r648
-%r650 = zext i512 %r649 to i544
-%r652 = getelementptr i32, i32* %r4, i32 16
-%r653 = load i32, i32* %r652
-%r654 = zext i32 %r653 to i544
-%r655 = shl i544 %r654, 512
-%r656 = or i544 %r650, %r655
-%r658 = select i1 %r543, i544 %r656, i544 0
-%r659 = add i544 %r541, %r658
-%r661 = getelementptr i32, i32* %r1, i32 17
-%r662 = trunc i544 %r659 to i32
-%r664 = getelementptr i32, i32* %r661, i32 0
-store i32 %r662, i32* %r664
-%r665 = lshr i544 %r659, 32
-%r666 = trunc i544 %r665 to i32
-%r668 = getelementptr i32, i32* %r661, i32 1
-store i32 %r666, i32* %r668
-%r669 = lshr i544 %r665, 32
-%r670 = trunc i544 %r669 to i32
-%r672 = getelementptr i32, i32* %r661, i32 2
-store i32 %r670, i32* %r672
-%r673 = lshr i544 %r669, 32
-%r674 = trunc i544 %r673 to i32
-%r676 = getelementptr i32, i32* %r661, i32 3
-store i32 %r674, i32* %r676
-%r677 = lshr i544 %r673, 32
-%r678 = trunc i544 %r677 to i32
-%r680 = getelementptr i32, i32* %r661, i32 4
-store i32 %r678, i32* %r680
-%r681 = lshr i544 %r677, 32
-%r682 = trunc i544 %r681 to i32
-%r684 = getelementptr i32, i32* %r661, i32 5
-store i32 %r682, i32* %r684
-%r685 = lshr i544 %r681, 32
-%r686 = trunc i544 %r685 to i32
-%r688 = getelementptr i32, i32* %r661, i32 6
-store i32 %r686, i32* %r688
-%r689 = lshr i544 %r685, 32
-%r690 = trunc i544 %r689 to i32
-%r692 = getelementptr i32, i32* %r661, i32 7
-store i32 %r690, i32* %r692
-%r693 = lshr i544 %r689, 32
-%r694 = trunc i544 %r693 to i32
-%r696 = getelementptr i32, i32* %r661, i32 8
-store i32 %r694, i32* %r696
-%r697 = lshr i544 %r693, 32
-%r698 = trunc i544 %r697 to i32
-%r700 = getelementptr i32, i32* %r661, i32 9
-store i32 %r698, i32* %r700
-%r701 = lshr i544 %r697, 32
-%r702 = trunc i544 %r701 to i32
-%r704 = getelementptr i32, i32* %r661, i32 10
-store i32 %r702, i32* %r704
-%r705 = lshr i544 %r701, 32
-%r706 = trunc i544 %r705 to i32
-%r708 = getelementptr i32, i32* %r661, i32 11
-store i32 %r706, i32* %r708
-%r709 = lshr i544 %r705, 32
-%r710 = trunc i544 %r709 to i32
-%r712 = getelementptr i32, i32* %r661, i32 12
-store i32 %r710, i32* %r712
-%r713 = lshr i544 %r709, 32
-%r714 = trunc i544 %r713 to i32
-%r716 = getelementptr i32, i32* %r661, i32 13
-store i32 %r714, i32* %r716
-%r717 = lshr i544 %r713, 32
-%r718 = trunc i544 %r717 to i32
-%r720 = getelementptr i32, i32* %r661, i32 14
-store i32 %r718, i32* %r720
-%r721 = lshr i544 %r717, 32
-%r722 = trunc i544 %r721 to i32
-%r724 = getelementptr i32, i32* %r661, i32 15
-store i32 %r722, i32* %r724
-%r725 = lshr i544 %r721, 32
-%r726 = trunc i544 %r725 to i32
-%r728 = getelementptr i32, i32* %r661, i32 16
-store i32 %r726, i32* %r728
+%r438 = zext i32 %r437 to i1024
+%r439 = shl i1024 %r438, 992
+%r440 = or i1024 %r434, %r439
+%r441 = zext i1024 %r222 to i1056
+%r442 = zext i1024 %r440 to i1056
+%r443 = sub i1056 %r441, %r442
+%r444 = trunc i1056 %r443 to i512
+%r446 = getelementptr i32, i32* %r1, i32 0
+%r447 = trunc i512 %r444 to i32
+store i32 %r447, i32* %r446
+%r448 = lshr i512 %r444, 32
+%r450 = getelementptr i32, i32* %r1, i32 1
+%r451 = trunc i512 %r448 to i32
+store i32 %r451, i32* %r450
+%r452 = lshr i512 %r448, 32
+%r454 = getelementptr i32, i32* %r1, i32 2
+%r455 = trunc i512 %r452 to i32
+store i32 %r455, i32* %r454
+%r456 = lshr i512 %r452, 32
+%r458 = getelementptr i32, i32* %r1, i32 3
+%r459 = trunc i512 %r456 to i32
+store i32 %r459, i32* %r458
+%r460 = lshr i512 %r456, 32
+%r462 = getelementptr i32, i32* %r1, i32 4
+%r463 = trunc i512 %r460 to i32
+store i32 %r463, i32* %r462
+%r464 = lshr i512 %r460, 32
+%r466 = getelementptr i32, i32* %r1, i32 5
+%r467 = trunc i512 %r464 to i32
+store i32 %r467, i32* %r466
+%r468 = lshr i512 %r464, 32
+%r470 = getelementptr i32, i32* %r1, i32 6
+%r471 = trunc i512 %r468 to i32
+store i32 %r471, i32* %r470
+%r472 = lshr i512 %r468, 32
+%r474 = getelementptr i32, i32* %r1, i32 7
+%r475 = trunc i512 %r472 to i32
+store i32 %r475, i32* %r474
+%r476 = lshr i512 %r472, 32
+%r478 = getelementptr i32, i32* %r1, i32 8
+%r479 = trunc i512 %r476 to i32
+store i32 %r479, i32* %r478
+%r480 = lshr i512 %r476, 32
+%r482 = getelementptr i32, i32* %r1, i32 9
+%r483 = trunc i512 %r480 to i32
+store i32 %r483, i32* %r482
+%r484 = lshr i512 %r480, 32
+%r486 = getelementptr i32, i32* %r1, i32 10
+%r487 = trunc i512 %r484 to i32
+store i32 %r487, i32* %r486
+%r488 = lshr i512 %r484, 32
+%r490 = getelementptr i32, i32* %r1, i32 11
+%r491 = trunc i512 %r488 to i32
+store i32 %r491, i32* %r490
+%r492 = lshr i512 %r488, 32
+%r494 = getelementptr i32, i32* %r1, i32 12
+%r495 = trunc i512 %r492 to i32
+store i32 %r495, i32* %r494
+%r496 = lshr i512 %r492, 32
+%r498 = getelementptr i32, i32* %r1, i32 13
+%r499 = trunc i512 %r496 to i32
+store i32 %r499, i32* %r498
+%r500 = lshr i512 %r496, 32
+%r502 = getelementptr i32, i32* %r1, i32 14
+%r503 = trunc i512 %r500 to i32
+store i32 %r503, i32* %r502
+%r504 = lshr i512 %r500, 32
+%r506 = getelementptr i32, i32* %r1, i32 15
+%r507 = trunc i512 %r504 to i32
+store i32 %r507, i32* %r506
+%r508 = lshr i1056 %r443, 512
+%r509 = trunc i1056 %r508 to i512
+%r510 = lshr i1056 %r443, 1024
+%r511 = trunc i1056 %r510 to i1
+%r512 = load i32, i32* %r4
+%r513 = zext i32 %r512 to i64
+%r515 = getelementptr i32, i32* %r4, i32 1
+%r516 = load i32, i32* %r515
+%r517 = zext i32 %r516 to i64
+%r518 = shl i64 %r517, 32
+%r519 = or i64 %r513, %r518
+%r520 = zext i64 %r519 to i96
+%r522 = getelementptr i32, i32* %r4, i32 2
+%r523 = load i32, i32* %r522
+%r524 = zext i32 %r523 to i96
+%r525 = shl i96 %r524, 64
+%r526 = or i96 %r520, %r525
+%r527 = zext i96 %r526 to i128
+%r529 = getelementptr i32, i32* %r4, i32 3
+%r530 = load i32, i32* %r529
+%r531 = zext i32 %r530 to i128
+%r532 = shl i128 %r531, 96
+%r533 = or i128 %r527, %r532
+%r534 = zext i128 %r533 to i160
+%r536 = getelementptr i32, i32* %r4, i32 4
+%r537 = load i32, i32* %r536
+%r538 = zext i32 %r537 to i160
+%r539 = shl i160 %r538, 128
+%r540 = or i160 %r534, %r539
+%r541 = zext i160 %r540 to i192
+%r543 = getelementptr i32, i32* %r4, i32 5
+%r544 = load i32, i32* %r543
+%r545 = zext i32 %r544 to i192
+%r546 = shl i192 %r545, 160
+%r547 = or i192 %r541, %r546
+%r548 = zext i192 %r547 to i224
+%r550 = getelementptr i32, i32* %r4, i32 6
+%r551 = load i32, i32* %r550
+%r552 = zext i32 %r551 to i224
+%r553 = shl i224 %r552, 192
+%r554 = or i224 %r548, %r553
+%r555 = zext i224 %r554 to i256
+%r557 = getelementptr i32, i32* %r4, i32 7
+%r558 = load i32, i32* %r557
+%r559 = zext i32 %r558 to i256
+%r560 = shl i256 %r559, 224
+%r561 = or i256 %r555, %r560
+%r562 = zext i256 %r561 to i288
+%r564 = getelementptr i32, i32* %r4, i32 8
+%r565 = load i32, i32* %r564
+%r566 = zext i32 %r565 to i288
+%r567 = shl i288 %r566, 256
+%r568 = or i288 %r562, %r567
+%r569 = zext i288 %r568 to i320
+%r571 = getelementptr i32, i32* %r4, i32 9
+%r572 = load i32, i32* %r571
+%r573 = zext i32 %r572 to i320
+%r574 = shl i320 %r573, 288
+%r575 = or i320 %r569, %r574
+%r576 = zext i320 %r575 to i352
+%r578 = getelementptr i32, i32* %r4, i32 10
+%r579 = load i32, i32* %r578
+%r580 = zext i32 %r579 to i352
+%r581 = shl i352 %r580, 320
+%r582 = or i352 %r576, %r581
+%r583 = zext i352 %r582 to i384
+%r585 = getelementptr i32, i32* %r4, i32 11
+%r586 = load i32, i32* %r585
+%r587 = zext i32 %r586 to i384
+%r588 = shl i384 %r587, 352
+%r589 = or i384 %r583, %r588
+%r590 = zext i384 %r589 to i416
+%r592 = getelementptr i32, i32* %r4, i32 12
+%r593 = load i32, i32* %r592
+%r594 = zext i32 %r593 to i416
+%r595 = shl i416 %r594, 384
+%r596 = or i416 %r590, %r595
+%r597 = zext i416 %r596 to i448
+%r599 = getelementptr i32, i32* %r4, i32 13
+%r600 = load i32, i32* %r599
+%r601 = zext i32 %r600 to i448
+%r602 = shl i448 %r601, 416
+%r603 = or i448 %r597, %r602
+%r604 = zext i448 %r603 to i480
+%r606 = getelementptr i32, i32* %r4, i32 14
+%r607 = load i32, i32* %r606
+%r608 = zext i32 %r607 to i480
+%r609 = shl i480 %r608, 448
+%r610 = or i480 %r604, %r609
+%r611 = zext i480 %r610 to i512
+%r613 = getelementptr i32, i32* %r4, i32 15
+%r614 = load i32, i32* %r613
+%r615 = zext i32 %r614 to i512
+%r616 = shl i512 %r615, 480
+%r617 = or i512 %r611, %r616
+%r619 = select i1 %r511, i512 %r617, i512 0
+%r620 = add i512 %r509, %r619
+%r622 = getelementptr i32, i32* %r1, i32 16
+%r624 = getelementptr i32, i32* %r622, i32 0
+%r625 = trunc i512 %r620 to i32
+store i32 %r625, i32* %r624
+%r626 = lshr i512 %r620, 32
+%r628 = getelementptr i32, i32* %r622, i32 1
+%r629 = trunc i512 %r626 to i32
+store i32 %r629, i32* %r628
+%r630 = lshr i512 %r626, 32
+%r632 = getelementptr i32, i32* %r622, i32 2
+%r633 = trunc i512 %r630 to i32
+store i32 %r633, i32* %r632
+%r634 = lshr i512 %r630, 32
+%r636 = getelementptr i32, i32* %r622, i32 3
+%r637 = trunc i512 %r634 to i32
+store i32 %r637, i32* %r636
+%r638 = lshr i512 %r634, 32
+%r640 = getelementptr i32, i32* %r622, i32 4
+%r641 = trunc i512 %r638 to i32
+store i32 %r641, i32* %r640
+%r642 = lshr i512 %r638, 32
+%r644 = getelementptr i32, i32* %r622, i32 5
+%r645 = trunc i512 %r642 to i32
+store i32 %r645, i32* %r644
+%r646 = lshr i512 %r642, 32
+%r648 = getelementptr i32, i32* %r622, i32 6
+%r649 = trunc i512 %r646 to i32
+store i32 %r649, i32* %r648
+%r650 = lshr i512 %r646, 32
+%r652 = getelementptr i32, i32* %r622, i32 7
+%r653 = trunc i512 %r650 to i32
+store i32 %r653, i32* %r652
+%r654 = lshr i512 %r650, 32
+%r656 = getelementptr i32, i32* %r622, i32 8
+%r657 = trunc i512 %r654 to i32
+store i32 %r657, i32* %r656
+%r658 = lshr i512 %r654, 32
+%r660 = getelementptr i32, i32* %r622, i32 9
+%r661 = trunc i512 %r658 to i32
+store i32 %r661, i32* %r660
+%r662 = lshr i512 %r658, 32
+%r664 = getelementptr i32, i32* %r622, i32 10
+%r665 = trunc i512 %r662 to i32
+store i32 %r665, i32* %r664
+%r666 = lshr i512 %r662, 32
+%r668 = getelementptr i32, i32* %r622, i32 11
+%r669 = trunc i512 %r666 to i32
+store i32 %r669, i32* %r668
+%r670 = lshr i512 %r666, 32
+%r672 = getelementptr i32, i32* %r622, i32 12
+%r673 = trunc i512 %r670 to i32
+store i32 %r673, i32* %r672
+%r674 = lshr i512 %r670, 32
+%r676 = getelementptr i32, i32* %r622, i32 13
+%r677 = trunc i512 %r674 to i32
+store i32 %r677, i32* %r676
+%r678 = lshr i512 %r674, 32
+%r680 = getelementptr i32, i32* %r622, i32 14
+%r681 = trunc i512 %r678 to i32
+store i32 %r681, i32* %r680
+%r682 = lshr i512 %r678, 32
+%r684 = getelementptr i32, i32* %r622, i32 15
+%r685 = trunc i512 %r682 to i32
+store i32 %r685, i32* %r684
 ret void
 }
diff --git a/src/base64.ll b/src/base64.ll
index e64ee12a..437a3779 100644
--- a/src/base64.ll
+++ b/src/base64.ll
@@ -88,17 +88,17 @@ define void @mcl_fpDbl_mod_NIST_P192L(i64* noalias  %r1, i64* noalias  %r2, i64*
 %r60 = trunc i256 %r59 to i1
 %r61 = select i1 %r60, i256 %r55, i256 %r58
 %r62 = trunc i256 %r61 to i192
-%r63 = trunc i192 %r62 to i64
-%r65 = getelementptr i64, i64* %r1, i32 0
-store i64 %r63, i64* %r65
+%r64 = getelementptr i64, i64* %r1, i32 0
+%r65 = trunc i192 %r62 to i64
+store i64 %r65, i64* %r64
 %r66 = lshr i192 %r62, 64
-%r67 = trunc i192 %r66 to i64
-%r69 = getelementptr i64, i64* %r1, i32 1
-store i64 %r67, i64* %r69
+%r68 = getelementptr i64, i64* %r1, i32 1
+%r69 = trunc i192 %r66 to i64
+store i64 %r69, i64* %r68
 %r70 = lshr i192 %r66, 64
-%r71 = trunc i192 %r70 to i64
-%r73 = getelementptr i64, i64* %r1, i32 2
-store i64 %r71, i64* %r73
+%r72 = getelementptr i64, i64* %r1, i32 2
+%r73 = trunc i192 %r70 to i64
+store i64 %r73, i64* %r72
 ret void
 }
 define void @mcl_fp_sqr_NIST_P192L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
@@ -273,341 +273,44 @@ store i64 0, i64* %r179
 store i64 0, i64* %r182
 ret void
 nonzero:
-%r183 = trunc i576 %r127 to i64
-%r185 = getelementptr i64, i64* %r1, i32 0
-store i64 %r183, i64* %r185
+%r184 = getelementptr i64, i64* %r1, i32 0
+%r185 = trunc i576 %r127 to i64
+store i64 %r185, i64* %r184
 %r186 = lshr i576 %r127, 64
-%r187 = trunc i576 %r186 to i64
-%r189 = getelementptr i64, i64* %r1, i32 1
-store i64 %r187, i64* %r189
+%r188 = getelementptr i64, i64* %r1, i32 1
+%r189 = trunc i576 %r186 to i64
+store i64 %r189, i64* %r188
 %r190 = lshr i576 %r186, 64
-%r191 = trunc i576 %r190 to i64
-%r193 = getelementptr i64, i64* %r1, i32 2
-store i64 %r191, i64* %r193
+%r192 = getelementptr i64, i64* %r1, i32 2
+%r193 = trunc i576 %r190 to i64
+store i64 %r193, i64* %r192
 %r194 = lshr i576 %r190, 64
-%r195 = trunc i576 %r194 to i64
-%r197 = getelementptr i64, i64* %r1, i32 3
-store i64 %r195, i64* %r197
+%r196 = getelementptr i64, i64* %r1, i32 3
+%r197 = trunc i576 %r194 to i64
+store i64 %r197, i64* %r196
 %r198 = lshr i576 %r194, 64
-%r199 = trunc i576 %r198 to i64
-%r201 = getelementptr i64, i64* %r1, i32 4
-store i64 %r199, i64* %r201
+%r200 = getelementptr i64, i64* %r1, i32 4
+%r201 = trunc i576 %r198 to i64
+store i64 %r201, i64* %r200
 %r202 = lshr i576 %r198, 64
-%r203 = trunc i576 %r202 to i64
-%r205 = getelementptr i64, i64* %r1, i32 5
-store i64 %r203, i64* %r205
+%r204 = getelementptr i64, i64* %r1, i32 5
+%r205 = trunc i576 %r202 to i64
+store i64 %r205, i64* %r204
 %r206 = lshr i576 %r202, 64
-%r207 = trunc i576 %r206 to i64
-%r209 = getelementptr i64, i64* %r1, i32 6
-store i64 %r207, i64* %r209
+%r208 = getelementptr i64, i64* %r1, i32 6
+%r209 = trunc i576 %r206 to i64
+store i64 %r209, i64* %r208
 %r210 = lshr i576 %r206, 64
-%r211 = trunc i576 %r210 to i64
-%r213 = getelementptr i64, i64* %r1, i32 7
-store i64 %r211, i64* %r213
+%r212 = getelementptr i64, i64* %r1, i32 7
+%r213 = trunc i576 %r210 to i64
+store i64 %r213, i64* %r212
 %r214 = lshr i576 %r210, 64
-%r215 = trunc i576 %r214 to i64
-%r217 = getelementptr i64, i64* %r1, i32 8
-store i64 %r215, i64* %r217
-ret void
-}
-define i128 @mulPv64x64(i64* noalias  %r2, i64 %r3)
-{
-%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
-%r6 = trunc i128 %r5 to i64
-%r7 = call i64 @extractHigh64(i128 %r5)
-%r8 = zext i64 %r6 to i128
-%r9 = zext i64 %r7 to i128
-%r10 = shl i128 %r9, 64
-%r11 = add i128 %r8, %r10
-ret i128 %r11
-}
-define void @mcl_fp_mulUnitPre1L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
-{
-%r4 = call i128 @mulPv64x64(i64* %r2, i64 %r3)
-%r5 = trunc i128 %r4 to i64
-%r7 = getelementptr i64, i64* %r1, i32 0
-store i64 %r5, i64* %r7
-%r8 = lshr i128 %r4, 64
-%r9 = trunc i128 %r8 to i64
-%r11 = getelementptr i64, i64* %r1, i32 1
-store i64 %r9, i64* %r11
-ret void
-}
-define void @mcl_fpDbl_mulPre1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
-{
-%r4 = load i64, i64* %r2
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r4 to i128
-%r7 = zext i64 %r5 to i128
-%r8 = mul i128 %r6, %r7
-%r9 = trunc i128 %r8 to i64
-%r11 = getelementptr i64, i64* %r1, i32 0
-store i64 %r9, i64* %r11
-%r12 = lshr i128 %r8, 64
-%r13 = trunc i128 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 1
-store i64 %r13, i64* %r15
-ret void
-}
-define void @mcl_fpDbl_sqrPre1L(i64* noalias  %r1, i64* noalias  %r2)
-{
-%r3 = load i64, i64* %r2
-%r4 = load i64, i64* %r2
-%r5 = zext i64 %r3 to i128
-%r6 = zext i64 %r4 to i128
-%r7 = mul i128 %r5, %r6
-%r8 = trunc i128 %r7 to i64
-%r10 = getelementptr i64, i64* %r1, i32 0
-store i64 %r8, i64* %r10
-%r11 = lshr i128 %r7, 64
-%r12 = trunc i128 %r11 to i64
-%r14 = getelementptr i64, i64* %r1, i32 1
-store i64 %r12, i64* %r14
-ret void
-}
-define void @mcl_fp_mont1L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
-{
-%r6 = getelementptr i64, i64* %r4, i32 -1
-%r7 = load i64, i64* %r6
-%r9 = getelementptr i64, i64* %r3, i32 0
-%r10 = load i64, i64* %r9
-%r11 = call i128 @mulPv64x64(i64* %r2, i64 %r10)
-%r12 = zext i128 %r11 to i192
-%r13 = trunc i128 %r11 to i64
-%r14 = mul i64 %r13, %r7
-%r15 = call i128 @mulPv64x64(i64* %r4, i64 %r14)
-%r16 = zext i128 %r15 to i192
-%r17 = add i192 %r12, %r16
-%r18 = lshr i192 %r17, 64
-%r19 = trunc i192 %r18 to i128
-%r20 = load i64, i64* %r4
-%r21 = zext i64 %r20 to i128
-%r22 = sub i128 %r19, %r21
-%r23 = lshr i128 %r22, 64
-%r24 = trunc i128 %r23 to i1
-%r25 = select i1 %r24, i128 %r19, i128 %r22
-%r26 = trunc i128 %r25 to i64
-store i64 %r26, i64* %r1
-ret void
-}
-define void @mcl_fp_montNF1L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
-{
-%r6 = getelementptr i64, i64* %r4, i32 -1
-%r7 = load i64, i64* %r6
-%r8 = load i64, i64* %r3
-%r9 = call i128 @mulPv64x64(i64* %r2, i64 %r8)
-%r10 = trunc i128 %r9 to i64
-%r11 = mul i64 %r10, %r7
-%r12 = call i128 @mulPv64x64(i64* %r4, i64 %r11)
-%r13 = add i128 %r9, %r12
-%r14 = lshr i128 %r13, 64
-%r15 = trunc i128 %r14 to i64
-%r16 = load i64, i64* %r4
-%r17 = sub i64 %r15, %r16
-%r18 = lshr i64 %r17, 63
-%r19 = trunc i64 %r18 to i1
-%r20 = select i1 %r19, i64 %r15, i64 %r17
-store i64 %r20, i64* %r1
-ret void
-}
-define void @mcl_fp_montRed1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
-{
-%r5 = getelementptr i64, i64* %r3, i32 -1
-%r6 = load i64, i64* %r5
-%r7 = load i64, i64* %r3
-%r8 = load i64, i64* %r2
-%r9 = zext i64 %r8 to i128
-%r11 = getelementptr i64, i64* %r2, i32 1
-%r12 = load i64, i64* %r11
-%r13 = zext i64 %r12 to i128
-%r14 = shl i128 %r13, 64
-%r15 = or i128 %r9, %r14
-%r16 = zext i128 %r15 to i192
-%r17 = trunc i192 %r16 to i64
-%r18 = mul i64 %r17, %r6
-%r19 = call i128 @mulPv64x64(i64* %r3, i64 %r18)
-%r20 = zext i128 %r19 to i192
-%r21 = add i192 %r16, %r20
-%r22 = lshr i192 %r21, 64
-%r23 = trunc i192 %r22 to i128
-%r24 = zext i64 %r7 to i128
-%r25 = sub i128 %r23, %r24
-%r26 = lshr i128 %r25, 64
-%r27 = trunc i128 %r26 to i1
-%r28 = select i1 %r27, i128 %r23, i128 %r25
-%r29 = trunc i128 %r28 to i64
-store i64 %r29, i64* %r1
-ret void
-}
-define i64 @mcl_fp_addPre1L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r7 = load i64, i64* %r4
-%r8 = zext i64 %r7 to i128
-%r9 = add i128 %r6, %r8
-%r10 = trunc i128 %r9 to i64
-store i64 %r10, i64* %r2
-%r11 = lshr i128 %r9, 64
-%r12 = trunc i128 %r11 to i64
-ret i64 %r12
-}
-define i64 @mcl_fp_subPre1L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r7 = load i64, i64* %r4
-%r8 = zext i64 %r7 to i128
-%r9 = sub i128 %r6, %r8
-%r10 = trunc i128 %r9 to i64
-store i64 %r10, i64* %r2
-%r11 = lshr i128 %r9, 64
-%r12 = trunc i128 %r11 to i64
-%r14 = and i64 %r12, 1
-ret i64 %r14
-}
-define void @mcl_fp_shr1_1L(i64* noalias  %r1, i64* noalias  %r2)
-{
-%r3 = load i64, i64* %r2
-%r4 = lshr i64 %r3, 1
-store i64 %r4, i64* %r1
-ret void
-}
-define void @mcl_fp_add1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = load i64, i64* %r3
-%r7 = zext i64 %r5 to i128
-%r8 = zext i64 %r6 to i128
-%r9 = add i128 %r7, %r8
-%r10 = trunc i128 %r9 to i64
-store i64 %r10, i64* %r1
-%r11 = load i64, i64* %r4
-%r12 = zext i64 %r11 to i128
-%r13 = sub i128 %r9, %r12
-%r14 = lshr i128 %r13, 64
-%r15 = trunc i128 %r14 to i1
-br i1%r15, label %carry, label %nocarry
-nocarry:
-%r16 = trunc i128 %r13 to i64
-store i64 %r16, i64* %r1
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = load i64, i64* %r3
-%r7 = add i64 %r5, %r6
-%r8 = load i64, i64* %r4
-%r9 = sub i64 %r7, %r8
-%r10 = lshr i64 %r9, 63
-%r11 = trunc i64 %r10 to i1
-%r12 = select i1 %r11, i64 %r7, i64 %r9
-store i64 %r12, i64* %r1
-ret void
-}
-define void @mcl_fp_sub1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = load i64, i64* %r3
-%r7 = zext i64 %r5 to i128
-%r8 = zext i64 %r6 to i128
-%r9 = sub i128 %r7, %r8
-%r10 = trunc i128 %r9 to i64
-%r11 = lshr i128 %r9, 64
-%r12 = trunc i128 %r11 to i1
-store i64 %r10, i64* %r1
-br i1%r12, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r13 = load i64, i64* %r4
-%r14 = add i64 %r10, %r13
-store i64 %r14, i64* %r1
-ret void
-}
-define void @mcl_fp_subNF1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = load i64, i64* %r3
-%r7 = sub i64 %r5, %r6
-%r8 = lshr i64 %r7, 63
-%r9 = trunc i64 %r8 to i1
-%r10 = load i64, i64* %r4
-%r12 = select i1 %r9, i64 %r10, i64 0
-%r13 = add i64 %r7, %r12
-store i64 %r13, i64* %r1
-ret void
-}
-define void @mcl_fpDbl_add1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = load i64, i64* %r3
-%r14 = zext i64 %r13 to i128
-%r16 = getelementptr i64, i64* %r3, i32 1
-%r17 = load i64, i64* %r16
-%r18 = zext i64 %r17 to i128
-%r19 = shl i128 %r18, 64
-%r20 = or i128 %r14, %r19
-%r21 = zext i128 %r12 to i192
-%r22 = zext i128 %r20 to i192
-%r23 = add i192 %r21, %r22
-%r24 = trunc i192 %r23 to i64
-store i64 %r24, i64* %r1
-%r25 = lshr i192 %r23, 64
-%r26 = trunc i192 %r25 to i128
-%r27 = load i64, i64* %r4
-%r28 = zext i64 %r27 to i128
-%r29 = sub i128 %r26, %r28
-%r30 = lshr i128 %r29, 64
-%r31 = trunc i128 %r30 to i1
-%r32 = select i1 %r31, i128 %r26, i128 %r29
-%r33 = trunc i128 %r32 to i64
-%r35 = getelementptr i64, i64* %r1, i32 1
-store i64 %r33, i64* %r35
-ret void
-}
-define void @mcl_fpDbl_sub1L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = load i64, i64* %r3
-%r14 = zext i64 %r13 to i128
-%r16 = getelementptr i64, i64* %r3, i32 1
-%r17 = load i64, i64* %r16
-%r18 = zext i64 %r17 to i128
-%r19 = shl i128 %r18, 64
-%r20 = or i128 %r14, %r19
-%r21 = zext i128 %r12 to i192
-%r22 = zext i128 %r20 to i192
-%r23 = sub i192 %r21, %r22
-%r24 = trunc i192 %r23 to i64
-store i64 %r24, i64* %r1
-%r25 = lshr i192 %r23, 64
-%r26 = trunc i192 %r25 to i64
-%r27 = lshr i192 %r23, 128
-%r28 = trunc i192 %r27 to i1
-%r29 = load i64, i64* %r4
-%r31 = select i1 %r28, i64 %r29, i64 0
-%r32 = add i64 %r26, %r31
-%r34 = getelementptr i64, i64* %r1, i32 1
-store i64 %r32, i64* %r34
+%r216 = getelementptr i64, i64* %r1, i32 8
+%r217 = trunc i576 %r214 to i64
+store i64 %r217, i64* %r216
 ret void
 }
-define i192 @mulPv128x64(i64* noalias  %r2, i64 %r3)
+define i256 @mulPv192x64(i64* noalias  %r2, i64 %r3)
 {
 %r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
 %r6 = trunc i128 %r5 to i64
@@ -615,176 +318,362 @@ define i192 @mulPv128x64(i64* noalias  %r2, i64 %r3)
 %r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
 %r10 = trunc i128 %r9 to i64
 %r11 = call i64 @extractHigh64(i128 %r9)
-%r12 = zext i64 %r6 to i128
-%r13 = zext i64 %r10 to i128
-%r14 = shl i128 %r13, 64
-%r15 = or i128 %r12, %r14
-%r16 = zext i64 %r7 to i128
-%r17 = zext i64 %r11 to i128
+%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
+%r14 = trunc i128 %r13 to i64
+%r15 = call i64 @extractHigh64(i128 %r13)
+%r16 = zext i64 %r6 to i128
+%r17 = zext i64 %r10 to i128
 %r18 = shl i128 %r17, 64
 %r19 = or i128 %r16, %r18
-%r20 = zext i128 %r15 to i192
-%r21 = zext i128 %r19 to i192
-%r22 = shl i192 %r21, 64
-%r23 = add i192 %r20, %r22
-ret i192 %r23
+%r20 = zext i128 %r19 to i192
+%r21 = zext i64 %r14 to i192
+%r22 = shl i192 %r21, 128
+%r23 = or i192 %r20, %r22
+%r24 = zext i64 %r7 to i128
+%r25 = zext i64 %r11 to i128
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r24, %r26
+%r28 = zext i128 %r27 to i192
+%r29 = zext i64 %r15 to i192
+%r30 = shl i192 %r29, 128
+%r31 = or i192 %r28, %r30
+%r32 = zext i192 %r23 to i256
+%r33 = zext i192 %r31 to i256
+%r34 = shl i256 %r33, 64
+%r35 = add i256 %r32, %r34
+ret i256 %r35
 }
-define void @mcl_fp_mulUnitPre2L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+define void @mcl_fp_mulUnitPre3L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
 {
-%r4 = call i192 @mulPv128x64(i64* %r2, i64 %r3)
-%r5 = trunc i192 %r4 to i64
-%r7 = getelementptr i64, i64* %r1, i32 0
-store i64 %r5, i64* %r7
-%r8 = lshr i192 %r4, 64
-%r9 = trunc i192 %r8 to i64
-%r11 = getelementptr i64, i64* %r1, i32 1
-store i64 %r9, i64* %r11
-%r12 = lshr i192 %r8, 64
-%r13 = trunc i192 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 2
-store i64 %r13, i64* %r15
+%r4 = call i256 @mulPv192x64(i64* %r2, i64 %r3)
+%r6 = getelementptr i64, i64* %r1, i32 0
+%r7 = trunc i256 %r4 to i64
+store i64 %r7, i64* %r6
+%r8 = lshr i256 %r4, 64
+%r10 = getelementptr i64, i64* %r1, i32 1
+%r11 = trunc i256 %r8 to i64
+store i64 %r11, i64* %r10
+%r12 = lshr i256 %r8, 64
+%r14 = getelementptr i64, i64* %r1, i32 2
+%r15 = trunc i256 %r12 to i64
+store i64 %r15, i64* %r14
+%r16 = lshr i256 %r12, 64
+%r18 = getelementptr i64, i64* %r1, i32 3
+%r19 = trunc i256 %r16 to i64
+store i64 %r19, i64* %r18
 ret void
 }
-define void @mcl_fpDbl_mulPre2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+define void @mcl_fpDbl_mulPre3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
 {
 %r4 = load i64, i64* %r3
-%r5 = call i192 @mulPv128x64(i64* %r2, i64 %r4)
-%r6 = trunc i192 %r5 to i64
+%r5 = call i256 @mulPv192x64(i64* %r2, i64 %r4)
+%r6 = trunc i256 %r5 to i64
 store i64 %r6, i64* %r1
-%r7 = lshr i192 %r5, 64
+%r7 = lshr i256 %r5, 64
 %r9 = getelementptr i64, i64* %r3, i32 1
 %r10 = load i64, i64* %r9
-%r11 = call i192 @mulPv128x64(i64* %r2, i64 %r10)
-%r12 = add i192 %r7, %r11
-%r14 = getelementptr i64, i64* %r1, i32 1
-%r15 = trunc i192 %r12 to i64
-%r17 = getelementptr i64, i64* %r14, i32 0
-store i64 %r15, i64* %r17
-%r18 = lshr i192 %r12, 64
-%r19 = trunc i192 %r18 to i64
-%r21 = getelementptr i64, i64* %r14, i32 1
-store i64 %r19, i64* %r21
-%r22 = lshr i192 %r18, 64
-%r23 = trunc i192 %r22 to i64
-%r25 = getelementptr i64, i64* %r14, i32 2
-store i64 %r23, i64* %r25
+%r11 = call i256 @mulPv192x64(i64* %r2, i64 %r10)
+%r12 = add i256 %r7, %r11
+%r13 = trunc i256 %r12 to i64
+%r15 = getelementptr i64, i64* %r1, i32 1
+store i64 %r13, i64* %r15
+%r16 = lshr i256 %r12, 64
+%r18 = getelementptr i64, i64* %r3, i32 2
+%r19 = load i64, i64* %r18
+%r20 = call i256 @mulPv192x64(i64* %r2, i64 %r19)
+%r21 = add i256 %r16, %r20
+%r23 = getelementptr i64, i64* %r1, i32 2
+%r25 = getelementptr i64, i64* %r23, i32 0
+%r26 = trunc i256 %r21 to i64
+store i64 %r26, i64* %r25
+%r27 = lshr i256 %r21, 64
+%r29 = getelementptr i64, i64* %r23, i32 1
+%r30 = trunc i256 %r27 to i64
+store i64 %r30, i64* %r29
+%r31 = lshr i256 %r27, 64
+%r33 = getelementptr i64, i64* %r23, i32 2
+%r34 = trunc i256 %r31 to i64
+store i64 %r34, i64* %r33
+%r35 = lshr i256 %r31, 64
+%r37 = getelementptr i64, i64* %r23, i32 3
+%r38 = trunc i256 %r35 to i64
+store i64 %r38, i64* %r37
 ret void
 }
-define void @mcl_fpDbl_sqrPre2L(i64* noalias  %r1, i64* noalias  %r2)
+define void @mcl_fpDbl_sqrPre3L(i64* noalias  %r1, i64* noalias  %r2)
 {
 %r3 = load i64, i64* %r2
-%r4 = call i192 @mulPv128x64(i64* %r2, i64 %r3)
-%r5 = trunc i192 %r4 to i64
+%r4 = call i256 @mulPv192x64(i64* %r2, i64 %r3)
+%r5 = trunc i256 %r4 to i64
 store i64 %r5, i64* %r1
-%r6 = lshr i192 %r4, 64
+%r6 = lshr i256 %r4, 64
 %r8 = getelementptr i64, i64* %r2, i32 1
 %r9 = load i64, i64* %r8
-%r10 = call i192 @mulPv128x64(i64* %r2, i64 %r9)
-%r11 = add i192 %r6, %r10
-%r13 = getelementptr i64, i64* %r1, i32 1
-%r14 = trunc i192 %r11 to i64
-%r16 = getelementptr i64, i64* %r13, i32 0
-store i64 %r14, i64* %r16
-%r17 = lshr i192 %r11, 64
-%r18 = trunc i192 %r17 to i64
-%r20 = getelementptr i64, i64* %r13, i32 1
-store i64 %r18, i64* %r20
-%r21 = lshr i192 %r17, 64
-%r22 = trunc i192 %r21 to i64
-%r24 = getelementptr i64, i64* %r13, i32 2
-store i64 %r22, i64* %r24
+%r10 = call i256 @mulPv192x64(i64* %r2, i64 %r9)
+%r11 = add i256 %r6, %r10
+%r12 = trunc i256 %r11 to i64
+%r14 = getelementptr i64, i64* %r1, i32 1
+store i64 %r12, i64* %r14
+%r15 = lshr i256 %r11, 64
+%r17 = getelementptr i64, i64* %r2, i32 2
+%r18 = load i64, i64* %r17
+%r19 = call i256 @mulPv192x64(i64* %r2, i64 %r18)
+%r20 = add i256 %r15, %r19
+%r22 = getelementptr i64, i64* %r1, i32 2
+%r24 = getelementptr i64, i64* %r22, i32 0
+%r25 = trunc i256 %r20 to i64
+store i64 %r25, i64* %r24
+%r26 = lshr i256 %r20, 64
+%r28 = getelementptr i64, i64* %r22, i32 1
+%r29 = trunc i256 %r26 to i64
+store i64 %r29, i64* %r28
+%r30 = lshr i256 %r26, 64
+%r32 = getelementptr i64, i64* %r22, i32 2
+%r33 = trunc i256 %r30 to i64
+store i64 %r33, i64* %r32
+%r34 = lshr i256 %r30, 64
+%r36 = getelementptr i64, i64* %r22, i32 3
+%r37 = trunc i256 %r34 to i64
+store i64 %r37, i64* %r36
 ret void
 }
-define void @mcl_fp_mont2L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+define void @mcl_fp_mont3L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
 {
 %r6 = getelementptr i64, i64* %r4, i32 -1
 %r7 = load i64, i64* %r6
 %r9 = getelementptr i64, i64* %r3, i32 0
 %r10 = load i64, i64* %r9
-%r11 = call i192 @mulPv128x64(i64* %r2, i64 %r10)
-%r12 = zext i192 %r11 to i256
-%r13 = trunc i192 %r11 to i64
+%r11 = call i256 @mulPv192x64(i64* %r2, i64 %r10)
+%r12 = zext i256 %r11 to i320
+%r13 = trunc i256 %r11 to i64
 %r14 = mul i64 %r13, %r7
-%r15 = call i192 @mulPv128x64(i64* %r4, i64 %r14)
-%r16 = zext i192 %r15 to i256
-%r17 = add i256 %r12, %r16
-%r18 = lshr i256 %r17, 64
+%r15 = call i256 @mulPv192x64(i64* %r4, i64 %r14)
+%r16 = zext i256 %r15 to i320
+%r17 = add i320 %r12, %r16
+%r18 = lshr i320 %r17, 64
 %r20 = getelementptr i64, i64* %r3, i32 1
 %r21 = load i64, i64* %r20
-%r22 = call i192 @mulPv128x64(i64* %r2, i64 %r21)
-%r23 = zext i192 %r22 to i256
-%r24 = add i256 %r18, %r23
-%r25 = trunc i256 %r24 to i64
+%r22 = call i256 @mulPv192x64(i64* %r2, i64 %r21)
+%r23 = zext i256 %r22 to i320
+%r24 = add i320 %r18, %r23
+%r25 = trunc i320 %r24 to i64
 %r26 = mul i64 %r25, %r7
-%r27 = call i192 @mulPv128x64(i64* %r4, i64 %r26)
-%r28 = zext i192 %r27 to i256
-%r29 = add i256 %r24, %r28
-%r30 = lshr i256 %r29, 64
-%r31 = trunc i256 %r30 to i192
-%r32 = load i64, i64* %r4
-%r33 = zext i64 %r32 to i128
-%r35 = getelementptr i64, i64* %r4, i32 1
-%r36 = load i64, i64* %r35
-%r37 = zext i64 %r36 to i128
-%r38 = shl i128 %r37, 64
-%r39 = or i128 %r33, %r38
-%r40 = zext i128 %r39 to i192
-%r41 = sub i192 %r31, %r40
-%r42 = lshr i192 %r41, 128
-%r43 = trunc i192 %r42 to i1
-%r44 = select i1 %r43, i192 %r31, i192 %r41
-%r45 = trunc i192 %r44 to i128
-%r46 = trunc i128 %r45 to i64
-%r48 = getelementptr i64, i64* %r1, i32 0
-store i64 %r46, i64* %r48
-%r49 = lshr i128 %r45, 64
-%r50 = trunc i128 %r49 to i64
-%r52 = getelementptr i64, i64* %r1, i32 1
-store i64 %r50, i64* %r52
+%r27 = call i256 @mulPv192x64(i64* %r4, i64 %r26)
+%r28 = zext i256 %r27 to i320
+%r29 = add i320 %r24, %r28
+%r30 = lshr i320 %r29, 64
+%r32 = getelementptr i64, i64* %r3, i32 2
+%r33 = load i64, i64* %r32
+%r34 = call i256 @mulPv192x64(i64* %r2, i64 %r33)
+%r35 = zext i256 %r34 to i320
+%r36 = add i320 %r30, %r35
+%r37 = trunc i320 %r36 to i64
+%r38 = mul i64 %r37, %r7
+%r39 = call i256 @mulPv192x64(i64* %r4, i64 %r38)
+%r40 = zext i256 %r39 to i320
+%r41 = add i320 %r36, %r40
+%r42 = lshr i320 %r41, 64
+%r43 = trunc i320 %r42 to i256
+%r44 = load i64, i64* %r4
+%r45 = zext i64 %r44 to i128
+%r47 = getelementptr i64, i64* %r4, i32 1
+%r48 = load i64, i64* %r47
+%r49 = zext i64 %r48 to i128
+%r50 = shl i128 %r49, 64
+%r51 = or i128 %r45, %r50
+%r52 = zext i128 %r51 to i192
+%r54 = getelementptr i64, i64* %r4, i32 2
+%r55 = load i64, i64* %r54
+%r56 = zext i64 %r55 to i192
+%r57 = shl i192 %r56, 128
+%r58 = or i192 %r52, %r57
+%r59 = zext i192 %r58 to i256
+%r60 = sub i256 %r43, %r59
+%r61 = lshr i256 %r60, 192
+%r62 = trunc i256 %r61 to i1
+%r63 = select i1 %r62, i256 %r43, i256 %r60
+%r64 = trunc i256 %r63 to i192
+%r66 = getelementptr i64, i64* %r1, i32 0
+%r67 = trunc i192 %r64 to i64
+store i64 %r67, i64* %r66
+%r68 = lshr i192 %r64, 64
+%r70 = getelementptr i64, i64* %r1, i32 1
+%r71 = trunc i192 %r68 to i64
+store i64 %r71, i64* %r70
+%r72 = lshr i192 %r68, 64
+%r74 = getelementptr i64, i64* %r1, i32 2
+%r75 = trunc i192 %r72 to i64
+store i64 %r75, i64* %r74
 ret void
 }
-define void @mcl_fp_montNF2L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+define void @mcl_fp_montNF3L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
 {
 %r6 = getelementptr i64, i64* %r4, i32 -1
 %r7 = load i64, i64* %r6
 %r8 = load i64, i64* %r3
-%r9 = call i192 @mulPv128x64(i64* %r2, i64 %r8)
-%r10 = trunc i192 %r9 to i64
+%r9 = call i256 @mulPv192x64(i64* %r2, i64 %r8)
+%r10 = trunc i256 %r9 to i64
 %r11 = mul i64 %r10, %r7
-%r12 = call i192 @mulPv128x64(i64* %r4, i64 %r11)
-%r13 = add i192 %r9, %r12
-%r14 = lshr i192 %r13, 64
-%r16 = getelementptr i64, i64* %r3, i32 1
+%r12 = call i256 @mulPv192x64(i64* %r4, i64 %r11)
+%r13 = add i256 %r9, %r12
+%r14 = lshr i256 %r13, 64
+%r16 = getelementptr i64, i64* %r3, i32 1
 %r17 = load i64, i64* %r16
-%r18 = call i192 @mulPv128x64(i64* %r2, i64 %r17)
-%r19 = add i192 %r14, %r18
-%r20 = trunc i192 %r19 to i64
+%r18 = call i256 @mulPv192x64(i64* %r2, i64 %r17)
+%r19 = add i256 %r14, %r18
+%r20 = trunc i256 %r19 to i64
 %r21 = mul i64 %r20, %r7
-%r22 = call i192 @mulPv128x64(i64* %r4, i64 %r21)
-%r23 = add i192 %r19, %r22
-%r24 = lshr i192 %r23, 64
-%r25 = trunc i192 %r24 to i128
-%r26 = load i64, i64* %r4
+%r22 = call i256 @mulPv192x64(i64* %r4, i64 %r21)
+%r23 = add i256 %r19, %r22
+%r24 = lshr i256 %r23, 64
+%r26 = getelementptr i64, i64* %r3, i32 2
+%r27 = load i64, i64* %r26
+%r28 = call i256 @mulPv192x64(i64* %r2, i64 %r27)
+%r29 = add i256 %r24, %r28
+%r30 = trunc i256 %r29 to i64
+%r31 = mul i64 %r30, %r7
+%r32 = call i256 @mulPv192x64(i64* %r4, i64 %r31)
+%r33 = add i256 %r29, %r32
+%r34 = lshr i256 %r33, 64
+%r35 = trunc i256 %r34 to i192
+%r36 = load i64, i64* %r4
+%r37 = zext i64 %r36 to i128
+%r39 = getelementptr i64, i64* %r4, i32 1
+%r40 = load i64, i64* %r39
+%r41 = zext i64 %r40 to i128
+%r42 = shl i128 %r41, 64
+%r43 = or i128 %r37, %r42
+%r44 = zext i128 %r43 to i192
+%r46 = getelementptr i64, i64* %r4, i32 2
+%r47 = load i64, i64* %r46
+%r48 = zext i64 %r47 to i192
+%r49 = shl i192 %r48, 128
+%r50 = or i192 %r44, %r49
+%r51 = sub i192 %r35, %r50
+%r52 = lshr i192 %r51, 191
+%r53 = trunc i192 %r52 to i1
+%r54 = select i1 %r53, i192 %r35, i192 %r51
+%r56 = getelementptr i64, i64* %r1, i32 0
+%r57 = trunc i192 %r54 to i64
+store i64 %r57, i64* %r56
+%r58 = lshr i192 %r54, 64
+%r60 = getelementptr i64, i64* %r1, i32 1
+%r61 = trunc i192 %r58 to i64
+store i64 %r61, i64* %r60
+%r62 = lshr i192 %r58, 64
+%r64 = getelementptr i64, i64* %r1, i32 2
+%r65 = trunc i192 %r62 to i64
+store i64 %r65, i64* %r64
+ret void
+}
+define void @mcl_fp_montRed3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = load i64, i64* %r2
+%r23 = zext i64 %r22 to i128
+%r25 = getelementptr i64, i64* %r2, i32 1
+%r26 = load i64, i64* %r25
 %r27 = zext i64 %r26 to i128
-%r29 = getelementptr i64, i64* %r4, i32 1
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i128
-%r32 = shl i128 %r31, 64
-%r33 = or i128 %r27, %r32
-%r34 = sub i128 %r25, %r33
-%r35 = lshr i128 %r34, 127
-%r36 = trunc i128 %r35 to i1
-%r37 = select i1 %r36, i128 %r25, i128 %r34
-%r38 = trunc i128 %r37 to i64
-%r40 = getelementptr i64, i64* %r1, i32 0
-store i64 %r38, i64* %r40
-%r41 = lshr i128 %r37, 64
-%r42 = trunc i128 %r41 to i64
-%r44 = getelementptr i64, i64* %r1, i32 1
-store i64 %r42, i64* %r44
+%r28 = shl i128 %r27, 64
+%r29 = or i128 %r23, %r28
+%r30 = zext i128 %r29 to i192
+%r32 = getelementptr i64, i64* %r2, i32 2
+%r33 = load i64, i64* %r32
+%r34 = zext i64 %r33 to i192
+%r35 = shl i192 %r34, 128
+%r36 = or i192 %r30, %r35
+%r37 = trunc i192 %r36 to i64
+%r38 = mul i64 %r37, %r6
+%r39 = call i256 @mulPv192x64(i64* %r3, i64 %r38)
+%r41 = getelementptr i64, i64* %r2, i32 3
+%r42 = load i64, i64* %r41
+%r43 = zext i64 %r42 to i256
+%r44 = shl i256 %r43, 192
+%r45 = zext i192 %r36 to i256
+%r46 = or i256 %r44, %r45
+%r47 = zext i256 %r46 to i320
+%r48 = zext i256 %r39 to i320
+%r49 = add i320 %r47, %r48
+%r50 = lshr i320 %r49, 64
+%r51 = trunc i320 %r50 to i256
+%r52 = lshr i256 %r51, 192
+%r53 = trunc i256 %r52 to i64
+%r54 = trunc i256 %r51 to i192
+%r55 = trunc i192 %r54 to i64
+%r56 = mul i64 %r55, %r6
+%r57 = call i256 @mulPv192x64(i64* %r3, i64 %r56)
+%r58 = zext i64 %r53 to i256
+%r59 = shl i256 %r58, 192
+%r60 = add i256 %r57, %r59
+%r62 = getelementptr i64, i64* %r2, i32 4
+%r63 = load i64, i64* %r62
+%r64 = zext i64 %r63 to i256
+%r65 = shl i256 %r64, 192
+%r66 = zext i192 %r54 to i256
+%r67 = or i256 %r65, %r66
+%r68 = zext i256 %r67 to i320
+%r69 = zext i256 %r60 to i320
+%r70 = add i320 %r68, %r69
+%r71 = lshr i320 %r70, 64
+%r72 = trunc i320 %r71 to i256
+%r73 = lshr i256 %r72, 192
+%r74 = trunc i256 %r73 to i64
+%r75 = trunc i256 %r72 to i192
+%r76 = trunc i192 %r75 to i64
+%r77 = mul i64 %r76, %r6
+%r78 = call i256 @mulPv192x64(i64* %r3, i64 %r77)
+%r79 = zext i64 %r74 to i256
+%r80 = shl i256 %r79, 192
+%r81 = add i256 %r78, %r80
+%r83 = getelementptr i64, i64* %r2, i32 5
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i256
+%r86 = shl i256 %r85, 192
+%r87 = zext i192 %r75 to i256
+%r88 = or i256 %r86, %r87
+%r89 = zext i256 %r88 to i320
+%r90 = zext i256 %r81 to i320
+%r91 = add i320 %r89, %r90
+%r92 = lshr i320 %r91, 64
+%r93 = trunc i320 %r92 to i256
+%r94 = lshr i256 %r93, 192
+%r95 = trunc i256 %r94 to i64
+%r96 = trunc i256 %r93 to i192
+%r97 = zext i192 %r21 to i256
+%r98 = zext i192 %r96 to i256
+%r99 = sub i256 %r98, %r97
+%r100 = lshr i256 %r99, 192
+%r101 = trunc i256 %r100 to i1
+%r102 = select i1 %r101, i256 %r98, i256 %r99
+%r103 = trunc i256 %r102 to i192
+%r105 = getelementptr i64, i64* %r1, i32 0
+%r106 = trunc i192 %r103 to i64
+store i64 %r106, i64* %r105
+%r107 = lshr i192 %r103, 64
+%r109 = getelementptr i64, i64* %r1, i32 1
+%r110 = trunc i192 %r107 to i64
+store i64 %r110, i64* %r109
+%r111 = lshr i192 %r107, 64
+%r113 = getelementptr i64, i64* %r1, i32 2
+%r114 = trunc i192 %r111 to i64
+store i64 %r114, i64* %r113
 ret void
 }
-define void @mcl_fp_montRed2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+define void @mcl_fp_montRedNF3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
 {
 %r5 = getelementptr i64, i64* %r3, i32 -1
 %r6 = load i64, i64* %r5
@@ -795,56 +684,100 @@ define void @mcl_fp_montRed2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r12 = zext i64 %r11 to i128
 %r13 = shl i128 %r12, 64
 %r14 = or i128 %r8, %r13
-%r15 = load i64, i64* %r2
-%r16 = zext i64 %r15 to i128
-%r18 = getelementptr i64, i64* %r2, i32 1
-%r19 = load i64, i64* %r18
-%r20 = zext i64 %r19 to i128
-%r21 = shl i128 %r20, 64
-%r22 = or i128 %r16, %r21
-%r23 = zext i128 %r22 to i192
-%r25 = getelementptr i64, i64* %r2, i32 2
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = load i64, i64* %r2
+%r23 = zext i64 %r22 to i128
+%r25 = getelementptr i64, i64* %r2, i32 1
 %r26 = load i64, i64* %r25
-%r27 = zext i64 %r26 to i192
-%r28 = shl i192 %r27, 128
-%r29 = or i192 %r23, %r28
-%r30 = zext i192 %r29 to i256
-%r32 = getelementptr i64, i64* %r2, i32 3
+%r27 = zext i64 %r26 to i128
+%r28 = shl i128 %r27, 64
+%r29 = or i128 %r23, %r28
+%r30 = zext i128 %r29 to i192
+%r32 = getelementptr i64, i64* %r2, i32 2
 %r33 = load i64, i64* %r32
-%r34 = zext i64 %r33 to i256
-%r35 = shl i256 %r34, 192
-%r36 = or i256 %r30, %r35
-%r37 = zext i256 %r36 to i320
-%r38 = trunc i320 %r37 to i64
-%r39 = mul i64 %r38, %r6
-%r40 = call i192 @mulPv128x64(i64* %r3, i64 %r39)
-%r41 = zext i192 %r40 to i320
-%r42 = add i320 %r37, %r41
-%r43 = lshr i320 %r42, 64
-%r44 = trunc i320 %r43 to i256
-%r45 = trunc i256 %r44 to i64
-%r46 = mul i64 %r45, %r6
-%r47 = call i192 @mulPv128x64(i64* %r3, i64 %r46)
-%r48 = zext i192 %r47 to i256
-%r49 = add i256 %r44, %r48
-%r50 = lshr i256 %r49, 64
-%r51 = trunc i256 %r50 to i192
-%r52 = zext i128 %r14 to i192
-%r53 = sub i192 %r51, %r52
-%r54 = lshr i192 %r53, 128
-%r55 = trunc i192 %r54 to i1
-%r56 = select i1 %r55, i192 %r51, i192 %r53
-%r57 = trunc i192 %r56 to i128
-%r58 = trunc i128 %r57 to i64
-%r60 = getelementptr i64, i64* %r1, i32 0
-store i64 %r58, i64* %r60
-%r61 = lshr i128 %r57, 64
-%r62 = trunc i128 %r61 to i64
-%r64 = getelementptr i64, i64* %r1, i32 1
-store i64 %r62, i64* %r64
+%r34 = zext i64 %r33 to i192
+%r35 = shl i192 %r34, 128
+%r36 = or i192 %r30, %r35
+%r37 = trunc i192 %r36 to i64
+%r38 = mul i64 %r37, %r6
+%r39 = call i256 @mulPv192x64(i64* %r3, i64 %r38)
+%r41 = getelementptr i64, i64* %r2, i32 3
+%r42 = load i64, i64* %r41
+%r43 = zext i64 %r42 to i256
+%r44 = shl i256 %r43, 192
+%r45 = zext i192 %r36 to i256
+%r46 = or i256 %r44, %r45
+%r47 = zext i256 %r46 to i320
+%r48 = zext i256 %r39 to i320
+%r49 = add i320 %r47, %r48
+%r50 = lshr i320 %r49, 64
+%r51 = trunc i320 %r50 to i256
+%r52 = lshr i256 %r51, 192
+%r53 = trunc i256 %r52 to i64
+%r54 = trunc i256 %r51 to i192
+%r55 = trunc i192 %r54 to i64
+%r56 = mul i64 %r55, %r6
+%r57 = call i256 @mulPv192x64(i64* %r3, i64 %r56)
+%r58 = zext i64 %r53 to i256
+%r59 = shl i256 %r58, 192
+%r60 = add i256 %r57, %r59
+%r62 = getelementptr i64, i64* %r2, i32 4
+%r63 = load i64, i64* %r62
+%r64 = zext i64 %r63 to i256
+%r65 = shl i256 %r64, 192
+%r66 = zext i192 %r54 to i256
+%r67 = or i256 %r65, %r66
+%r68 = zext i256 %r67 to i320
+%r69 = zext i256 %r60 to i320
+%r70 = add i320 %r68, %r69
+%r71 = lshr i320 %r70, 64
+%r72 = trunc i320 %r71 to i256
+%r73 = lshr i256 %r72, 192
+%r74 = trunc i256 %r73 to i64
+%r75 = trunc i256 %r72 to i192
+%r76 = trunc i192 %r75 to i64
+%r77 = mul i64 %r76, %r6
+%r78 = call i256 @mulPv192x64(i64* %r3, i64 %r77)
+%r79 = zext i64 %r74 to i256
+%r80 = shl i256 %r79, 192
+%r81 = add i256 %r78, %r80
+%r83 = getelementptr i64, i64* %r2, i32 5
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i256
+%r86 = shl i256 %r85, 192
+%r87 = zext i192 %r75 to i256
+%r88 = or i256 %r86, %r87
+%r89 = zext i256 %r88 to i320
+%r90 = zext i256 %r81 to i320
+%r91 = add i320 %r89, %r90
+%r92 = lshr i320 %r91, 64
+%r93 = trunc i320 %r92 to i256
+%r94 = lshr i256 %r93, 192
+%r95 = trunc i256 %r94 to i64
+%r96 = trunc i256 %r93 to i192
+%r97 = sub i192 %r96, %r21
+%r98 = lshr i192 %r97, 191
+%r99 = trunc i192 %r98 to i1
+%r100 = select i1 %r99, i192 %r96, i192 %r97
+%r102 = getelementptr i64, i64* %r1, i32 0
+%r103 = trunc i192 %r100 to i64
+store i64 %r103, i64* %r102
+%r104 = lshr i192 %r100, 64
+%r106 = getelementptr i64, i64* %r1, i32 1
+%r107 = trunc i192 %r104 to i64
+store i64 %r107, i64* %r106
+%r108 = lshr i192 %r104, 64
+%r110 = getelementptr i64, i64* %r1, i32 2
+%r111 = trunc i192 %r108 to i64
+store i64 %r111, i64* %r110
 ret void
 }
-define i64 @mcl_fp_addPre2L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define i64 @mcl_fp_addPre3L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r3
 %r6 = zext i64 %r5 to i128
@@ -854,28 +787,44 @@ define i64 @mcl_fp_addPre2L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias
 %r11 = shl i128 %r10, 64
 %r12 = or i128 %r6, %r11
 %r13 = zext i128 %r12 to i192
-%r14 = load i64, i64* %r4
-%r15 = zext i64 %r14 to i128
-%r17 = getelementptr i64, i64* %r4, i32 1
-%r18 = load i64, i64* %r17
-%r19 = zext i64 %r18 to i128
-%r20 = shl i128 %r19, 64
-%r21 = or i128 %r15, %r20
-%r22 = zext i128 %r21 to i192
-%r23 = add i192 %r13, %r22
-%r24 = trunc i192 %r23 to i128
-%r25 = trunc i128 %r24 to i64
-%r27 = getelementptr i64, i64* %r2, i32 0
-store i64 %r25, i64* %r27
-%r28 = lshr i128 %r24, 64
-%r29 = trunc i128 %r28 to i64
-%r31 = getelementptr i64, i64* %r2, i32 1
-store i64 %r29, i64* %r31
-%r32 = lshr i192 %r23, 128
-%r33 = trunc i192 %r32 to i64
-ret i64 %r33
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r21 = load i64, i64* %r4
+%r22 = zext i64 %r21 to i128
+%r24 = getelementptr i64, i64* %r4, i32 1
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i128
+%r27 = shl i128 %r26, 64
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i192
+%r31 = getelementptr i64, i64* %r4, i32 2
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i192
+%r34 = shl i192 %r33, 128
+%r35 = or i192 %r29, %r34
+%r36 = zext i192 %r35 to i256
+%r37 = add i256 %r20, %r36
+%r38 = trunc i256 %r37 to i192
+%r40 = getelementptr i64, i64* %r2, i32 0
+%r41 = trunc i192 %r38 to i64
+store i64 %r41, i64* %r40
+%r42 = lshr i192 %r38, 64
+%r44 = getelementptr i64, i64* %r2, i32 1
+%r45 = trunc i192 %r42 to i64
+store i64 %r45, i64* %r44
+%r46 = lshr i192 %r42, 64
+%r48 = getelementptr i64, i64* %r2, i32 2
+%r49 = trunc i192 %r46 to i64
+store i64 %r49, i64* %r48
+%r50 = lshr i256 %r37, 192
+%r51 = trunc i256 %r50 to i64
+ret i64 %r51
 }
-define i64 @mcl_fp_subPre2L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define i64 @mcl_fp_subPre3L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r3
 %r6 = zext i64 %r5 to i128
@@ -885,29 +834,45 @@ define i64 @mcl_fp_subPre2L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias
 %r11 = shl i128 %r10, 64
 %r12 = or i128 %r6, %r11
 %r13 = zext i128 %r12 to i192
-%r14 = load i64, i64* %r4
-%r15 = zext i64 %r14 to i128
-%r17 = getelementptr i64, i64* %r4, i32 1
-%r18 = load i64, i64* %r17
-%r19 = zext i64 %r18 to i128
-%r20 = shl i128 %r19, 64
-%r21 = or i128 %r15, %r20
-%r22 = zext i128 %r21 to i192
-%r23 = sub i192 %r13, %r22
-%r24 = trunc i192 %r23 to i128
-%r25 = trunc i128 %r24 to i64
-%r27 = getelementptr i64, i64* %r2, i32 0
-store i64 %r25, i64* %r27
-%r28 = lshr i128 %r24, 64
-%r29 = trunc i128 %r28 to i64
-%r31 = getelementptr i64, i64* %r2, i32 1
-store i64 %r29, i64* %r31
-%r32 = lshr i192 %r23, 128
-%r33 = trunc i192 %r32 to i64
-%r35 = and i64 %r33, 1
-ret i64 %r35
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r21 = load i64, i64* %r4
+%r22 = zext i64 %r21 to i128
+%r24 = getelementptr i64, i64* %r4, i32 1
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i128
+%r27 = shl i128 %r26, 64
+%r28 = or i128 %r22, %r27
+%r29 = zext i128 %r28 to i192
+%r31 = getelementptr i64, i64* %r4, i32 2
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i192
+%r34 = shl i192 %r33, 128
+%r35 = or i192 %r29, %r34
+%r36 = zext i192 %r35 to i256
+%r37 = sub i256 %r20, %r36
+%r38 = trunc i256 %r37 to i192
+%r40 = getelementptr i64, i64* %r2, i32 0
+%r41 = trunc i192 %r38 to i64
+store i64 %r41, i64* %r40
+%r42 = lshr i192 %r38, 64
+%r44 = getelementptr i64, i64* %r2, i32 1
+%r45 = trunc i192 %r42 to i64
+store i64 %r45, i64* %r44
+%r46 = lshr i192 %r42, 64
+%r48 = getelementptr i64, i64* %r2, i32 2
+%r49 = trunc i192 %r46 to i64
+store i64 %r49, i64* %r48
+%r51 = lshr i256 %r37, 192
+%r52 = trunc i256 %r51 to i64
+%r53 = and i64 %r52, 1
+ret i64 %r53
 }
-define void @mcl_fp_shr1_2L(i64* noalias  %r1, i64* noalias  %r2)
+define void @mcl_fp_shr1_3L(i64* noalias  %r1, i64* noalias  %r2)
 {
 %r3 = load i64, i64* %r2
 %r4 = zext i64 %r3 to i128
@@ -916,17 +881,27 @@ define void @mcl_fp_shr1_2L(i64* noalias  %r1, i64* noalias  %r2)
 %r8 = zext i64 %r7 to i128
 %r9 = shl i128 %r8, 64
 %r10 = or i128 %r4, %r9
-%r11 = lshr i128 %r10, 1
-%r12 = trunc i128 %r11 to i64
-%r14 = getelementptr i64, i64* %r1, i32 0
-store i64 %r12, i64* %r14
-%r15 = lshr i128 %r11, 64
-%r16 = trunc i128 %r15 to i64
-%r18 = getelementptr i64, i64* %r1, i32 1
-store i64 %r16, i64* %r18
+%r11 = zext i128 %r10 to i192
+%r13 = getelementptr i64, i64* %r2, i32 2
+%r14 = load i64, i64* %r13
+%r15 = zext i64 %r14 to i192
+%r16 = shl i192 %r15, 128
+%r17 = or i192 %r11, %r16
+%r18 = lshr i192 %r17, 1
+%r20 = getelementptr i64, i64* %r1, i32 0
+%r21 = trunc i192 %r18 to i64
+store i64 %r21, i64* %r20
+%r22 = lshr i192 %r18, 64
+%r24 = getelementptr i64, i64* %r1, i32 1
+%r25 = trunc i192 %r22 to i64
+store i64 %r25, i64* %r24
+%r26 = lshr i192 %r22, 64
+%r28 = getelementptr i64, i64* %r1, i32 2
+%r29 = trunc i192 %r26 to i64
+store i64 %r29, i64* %r28
 ret void
 }
-define void @mcl_fp_add2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_add3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -935,50 +910,76 @@ define void @mcl_fp_add2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r
 %r10 = zext i64 %r9 to i128
 %r11 = shl i128 %r10, 64
 %r12 = or i128 %r6, %r11
-%r13 = load i64, i64* %r3
-%r14 = zext i64 %r13 to i128
-%r16 = getelementptr i64, i64* %r3, i32 1
-%r17 = load i64, i64* %r16
-%r18 = zext i64 %r17 to i128
-%r19 = shl i128 %r18, 64
-%r20 = or i128 %r14, %r19
-%r21 = zext i128 %r12 to i192
-%r22 = zext i128 %r20 to i192
-%r23 = add i192 %r21, %r22
-%r24 = trunc i192 %r23 to i128
-%r25 = trunc i128 %r24 to i64
-%r27 = getelementptr i64, i64* %r1, i32 0
-store i64 %r25, i64* %r27
-%r28 = lshr i128 %r24, 64
-%r29 = trunc i128 %r28 to i64
-%r31 = getelementptr i64, i64* %r1, i32 1
-store i64 %r29, i64* %r31
-%r32 = load i64, i64* %r4
-%r33 = zext i64 %r32 to i128
-%r35 = getelementptr i64, i64* %r4, i32 1
-%r36 = load i64, i64* %r35
-%r37 = zext i64 %r36 to i128
-%r38 = shl i128 %r37, 64
-%r39 = or i128 %r33, %r38
-%r40 = zext i128 %r39 to i192
-%r41 = sub i192 %r23, %r40
-%r42 = lshr i192 %r41, 128
-%r43 = trunc i192 %r42 to i1
-br i1%r43, label %carry, label %nocarry
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = load i64, i64* %r3
+%r21 = zext i64 %r20 to i128
+%r23 = getelementptr i64, i64* %r3, i32 1
+%r24 = load i64, i64* %r23
+%r25 = zext i64 %r24 to i128
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r21, %r26
+%r28 = zext i128 %r27 to i192
+%r30 = getelementptr i64, i64* %r3, i32 2
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i192
+%r33 = shl i192 %r32, 128
+%r34 = or i192 %r28, %r33
+%r35 = zext i192 %r19 to i256
+%r36 = zext i192 %r34 to i256
+%r37 = add i256 %r35, %r36
+%r38 = trunc i256 %r37 to i192
+%r40 = getelementptr i64, i64* %r1, i32 0
+%r41 = trunc i192 %r38 to i64
+store i64 %r41, i64* %r40
+%r42 = lshr i192 %r38, 64
+%r44 = getelementptr i64, i64* %r1, i32 1
+%r45 = trunc i192 %r42 to i64
+store i64 %r45, i64* %r44
+%r46 = lshr i192 %r42, 64
+%r48 = getelementptr i64, i64* %r1, i32 2
+%r49 = trunc i192 %r46 to i64
+store i64 %r49, i64* %r48
+%r50 = load i64, i64* %r4
+%r51 = zext i64 %r50 to i128
+%r53 = getelementptr i64, i64* %r4, i32 1
+%r54 = load i64, i64* %r53
+%r55 = zext i64 %r54 to i128
+%r56 = shl i128 %r55, 64
+%r57 = or i128 %r51, %r56
+%r58 = zext i128 %r57 to i192
+%r60 = getelementptr i64, i64* %r4, i32 2
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i192
+%r63 = shl i192 %r62, 128
+%r64 = or i192 %r58, %r63
+%r65 = zext i192 %r64 to i256
+%r66 = sub i256 %r37, %r65
+%r67 = lshr i256 %r66, 192
+%r68 = trunc i256 %r67 to i1
+br i1%r68, label %carry, label %nocarry
 nocarry:
-%r44 = trunc i192 %r41 to i128
-%r45 = trunc i128 %r44 to i64
-%r47 = getelementptr i64, i64* %r1, i32 0
-store i64 %r45, i64* %r47
-%r48 = lshr i128 %r44, 64
-%r49 = trunc i128 %r48 to i64
-%r51 = getelementptr i64, i64* %r1, i32 1
-store i64 %r49, i64* %r51
+%r69 = trunc i256 %r66 to i192
+%r71 = getelementptr i64, i64* %r1, i32 0
+%r72 = trunc i192 %r69 to i64
+store i64 %r72, i64* %r71
+%r73 = lshr i192 %r69, 64
+%r75 = getelementptr i64, i64* %r1, i32 1
+%r76 = trunc i192 %r73 to i64
+store i64 %r76, i64* %r75
+%r77 = lshr i192 %r73, 64
+%r79 = getelementptr i64, i64* %r1, i32 2
+%r80 = trunc i192 %r77 to i64
+store i64 %r80, i64* %r79
 ret void
 carry:
 ret void
 }
-define void @mcl_fp_addNF2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_addNF3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -987,35 +988,57 @@ define void @mcl_fp_addNF2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r10 = zext i64 %r9 to i128
 %r11 = shl i128 %r10, 64
 %r12 = or i128 %r6, %r11
-%r13 = load i64, i64* %r3
-%r14 = zext i64 %r13 to i128
-%r16 = getelementptr i64, i64* %r3, i32 1
-%r17 = load i64, i64* %r16
-%r18 = zext i64 %r17 to i128
-%r19 = shl i128 %r18, 64
-%r20 = or i128 %r14, %r19
-%r21 = add i128 %r12, %r20
-%r22 = load i64, i64* %r4
-%r23 = zext i64 %r22 to i128
-%r25 = getelementptr i64, i64* %r4, i32 1
-%r26 = load i64, i64* %r25
-%r27 = zext i64 %r26 to i128
-%r28 = shl i128 %r27, 64
-%r29 = or i128 %r23, %r28
-%r30 = sub i128 %r21, %r29
-%r31 = lshr i128 %r30, 127
-%r32 = trunc i128 %r31 to i1
-%r33 = select i1 %r32, i128 %r21, i128 %r30
-%r34 = trunc i128 %r33 to i64
-%r36 = getelementptr i64, i64* %r1, i32 0
-store i64 %r34, i64* %r36
-%r37 = lshr i128 %r33, 64
-%r38 = trunc i128 %r37 to i64
-%r40 = getelementptr i64, i64* %r1, i32 1
-store i64 %r38, i64* %r40
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = load i64, i64* %r3
+%r21 = zext i64 %r20 to i128
+%r23 = getelementptr i64, i64* %r3, i32 1
+%r24 = load i64, i64* %r23
+%r25 = zext i64 %r24 to i128
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r21, %r26
+%r28 = zext i128 %r27 to i192
+%r30 = getelementptr i64, i64* %r3, i32 2
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i192
+%r33 = shl i192 %r32, 128
+%r34 = or i192 %r28, %r33
+%r35 = add i192 %r19, %r34
+%r36 = load i64, i64* %r4
+%r37 = zext i64 %r36 to i128
+%r39 = getelementptr i64, i64* %r4, i32 1
+%r40 = load i64, i64* %r39
+%r41 = zext i64 %r40 to i128
+%r42 = shl i128 %r41, 64
+%r43 = or i128 %r37, %r42
+%r44 = zext i128 %r43 to i192
+%r46 = getelementptr i64, i64* %r4, i32 2
+%r47 = load i64, i64* %r46
+%r48 = zext i64 %r47 to i192
+%r49 = shl i192 %r48, 128
+%r50 = or i192 %r44, %r49
+%r51 = sub i192 %r35, %r50
+%r52 = lshr i192 %r51, 191
+%r53 = trunc i192 %r52 to i1
+%r54 = select i1 %r53, i192 %r35, i192 %r51
+%r56 = getelementptr i64, i64* %r1, i32 0
+%r57 = trunc i192 %r54 to i64
+store i64 %r57, i64* %r56
+%r58 = lshr i192 %r54, 64
+%r60 = getelementptr i64, i64* %r1, i32 1
+%r61 = trunc i192 %r58 to i64
+store i64 %r61, i64* %r60
+%r62 = lshr i192 %r58, 64
+%r64 = getelementptr i64, i64* %r1, i32 2
+%r65 = trunc i192 %r62 to i64
+store i64 %r65, i64* %r64
 ret void
 }
-define void @mcl_fp_sub2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_sub3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -1024,48 +1047,74 @@ define void @mcl_fp_sub2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r
 %r10 = zext i64 %r9 to i128
 %r11 = shl i128 %r10, 64
 %r12 = or i128 %r6, %r11
-%r13 = load i64, i64* %r3
-%r14 = zext i64 %r13 to i128
-%r16 = getelementptr i64, i64* %r3, i32 1
-%r17 = load i64, i64* %r16
-%r18 = zext i64 %r17 to i128
-%r19 = shl i128 %r18, 64
-%r20 = or i128 %r14, %r19
-%r21 = zext i128 %r12 to i192
-%r22 = zext i128 %r20 to i192
-%r23 = sub i192 %r21, %r22
-%r24 = trunc i192 %r23 to i128
-%r25 = lshr i192 %r23, 128
-%r26 = trunc i192 %r25 to i1
-%r27 = trunc i128 %r24 to i64
-%r29 = getelementptr i64, i64* %r1, i32 0
-store i64 %r27, i64* %r29
-%r30 = lshr i128 %r24, 64
-%r31 = trunc i128 %r30 to i64
-%r33 = getelementptr i64, i64* %r1, i32 1
-store i64 %r31, i64* %r33
-br i1%r26, label %carry, label %nocarry
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = load i64, i64* %r3
+%r21 = zext i64 %r20 to i128
+%r23 = getelementptr i64, i64* %r3, i32 1
+%r24 = load i64, i64* %r23
+%r25 = zext i64 %r24 to i128
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r21, %r26
+%r28 = zext i128 %r27 to i192
+%r30 = getelementptr i64, i64* %r3, i32 2
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i192
+%r33 = shl i192 %r32, 128
+%r34 = or i192 %r28, %r33
+%r35 = zext i192 %r19 to i256
+%r36 = zext i192 %r34 to i256
+%r37 = sub i256 %r35, %r36
+%r38 = trunc i256 %r37 to i192
+%r39 = lshr i256 %r37, 192
+%r40 = trunc i256 %r39 to i1
+%r42 = getelementptr i64, i64* %r1, i32 0
+%r43 = trunc i192 %r38 to i64
+store i64 %r43, i64* %r42
+%r44 = lshr i192 %r38, 64
+%r46 = getelementptr i64, i64* %r1, i32 1
+%r47 = trunc i192 %r44 to i64
+store i64 %r47, i64* %r46
+%r48 = lshr i192 %r44, 64
+%r50 = getelementptr i64, i64* %r1, i32 2
+%r51 = trunc i192 %r48 to i64
+store i64 %r51, i64* %r50
+br i1%r40, label %carry, label %nocarry
 nocarry:
 ret void
 carry:
-%r34 = load i64, i64* %r4
-%r35 = zext i64 %r34 to i128
-%r37 = getelementptr i64, i64* %r4, i32 1
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i128
-%r40 = shl i128 %r39, 64
-%r41 = or i128 %r35, %r40
-%r42 = add i128 %r24, %r41
-%r43 = trunc i128 %r42 to i64
-%r45 = getelementptr i64, i64* %r1, i32 0
-store i64 %r43, i64* %r45
-%r46 = lshr i128 %r42, 64
-%r47 = trunc i128 %r46 to i64
-%r49 = getelementptr i64, i64* %r1, i32 1
-store i64 %r47, i64* %r49
+%r52 = load i64, i64* %r4
+%r53 = zext i64 %r52 to i128
+%r55 = getelementptr i64, i64* %r4, i32 1
+%r56 = load i64, i64* %r55
+%r57 = zext i64 %r56 to i128
+%r58 = shl i128 %r57, 64
+%r59 = or i128 %r53, %r58
+%r60 = zext i128 %r59 to i192
+%r62 = getelementptr i64, i64* %r4, i32 2
+%r63 = load i64, i64* %r62
+%r64 = zext i64 %r63 to i192
+%r65 = shl i192 %r64, 128
+%r66 = or i192 %r60, %r65
+%r67 = add i192 %r38, %r66
+%r69 = getelementptr i64, i64* %r1, i32 0
+%r70 = trunc i192 %r67 to i64
+store i64 %r70, i64* %r69
+%r71 = lshr i192 %r67, 64
+%r73 = getelementptr i64, i64* %r1, i32 1
+%r74 = trunc i192 %r71 to i64
+store i64 %r74, i64* %r73
+%r75 = lshr i192 %r71, 64
+%r77 = getelementptr i64, i64* %r1, i32 2
+%r78 = trunc i192 %r75 to i64
+store i64 %r78, i64* %r77
 ret void
 }
-define void @mcl_fp_subNF2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_subNF3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -1074,35 +1123,57 @@ define void @mcl_fp_subNF2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r10 = zext i64 %r9 to i128
 %r11 = shl i128 %r10, 64
 %r12 = or i128 %r6, %r11
-%r13 = load i64, i64* %r3
-%r14 = zext i64 %r13 to i128
-%r16 = getelementptr i64, i64* %r3, i32 1
-%r17 = load i64, i64* %r16
-%r18 = zext i64 %r17 to i128
-%r19 = shl i128 %r18, 64
-%r20 = or i128 %r14, %r19
-%r21 = sub i128 %r12, %r20
-%r22 = lshr i128 %r21, 127
-%r23 = trunc i128 %r22 to i1
-%r24 = load i64, i64* %r4
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = load i64, i64* %r3
+%r21 = zext i64 %r20 to i128
+%r23 = getelementptr i64, i64* %r3, i32 1
+%r24 = load i64, i64* %r23
 %r25 = zext i64 %r24 to i128
-%r27 = getelementptr i64, i64* %r4, i32 1
-%r28 = load i64, i64* %r27
-%r29 = zext i64 %r28 to i128
-%r30 = shl i128 %r29, 64
-%r31 = or i128 %r25, %r30
-%r33 = select i1 %r23, i128 %r31, i128 0
-%r34 = add i128 %r21, %r33
-%r35 = trunc i128 %r34 to i64
-%r37 = getelementptr i64, i64* %r1, i32 0
-store i64 %r35, i64* %r37
-%r38 = lshr i128 %r34, 64
-%r39 = trunc i128 %r38 to i64
-%r41 = getelementptr i64, i64* %r1, i32 1
-store i64 %r39, i64* %r41
+%r26 = shl i128 %r25, 64
+%r27 = or i128 %r21, %r26
+%r28 = zext i128 %r27 to i192
+%r30 = getelementptr i64, i64* %r3, i32 2
+%r31 = load i64, i64* %r30
+%r32 = zext i64 %r31 to i192
+%r33 = shl i192 %r32, 128
+%r34 = or i192 %r28, %r33
+%r35 = sub i192 %r19, %r34
+%r36 = lshr i192 %r35, 191
+%r37 = trunc i192 %r36 to i1
+%r38 = load i64, i64* %r4
+%r39 = zext i64 %r38 to i128
+%r41 = getelementptr i64, i64* %r4, i32 1
+%r42 = load i64, i64* %r41
+%r43 = zext i64 %r42 to i128
+%r44 = shl i128 %r43, 64
+%r45 = or i128 %r39, %r44
+%r46 = zext i128 %r45 to i192
+%r48 = getelementptr i64, i64* %r4, i32 2
+%r49 = load i64, i64* %r48
+%r50 = zext i64 %r49 to i192
+%r51 = shl i192 %r50, 128
+%r52 = or i192 %r46, %r51
+%r54 = select i1 %r37, i192 %r52, i192 0
+%r55 = add i192 %r35, %r54
+%r57 = getelementptr i64, i64* %r1, i32 0
+%r58 = trunc i192 %r55 to i64
+store i64 %r58, i64* %r57
+%r59 = lshr i192 %r55, 64
+%r61 = getelementptr i64, i64* %r1, i32 1
+%r62 = trunc i192 %r59 to i64
+store i64 %r62, i64* %r61
+%r63 = lshr i192 %r59, 64
+%r65 = getelementptr i64, i64* %r1, i32 2
+%r66 = trunc i192 %r63 to i64
+store i64 %r66, i64* %r65
 ret void
 }
-define void @mcl_fpDbl_add2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fpDbl_add3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -1123,62 +1194,100 @@ define void @mcl_fpDbl_add2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r24 = zext i64 %r23 to i256
 %r25 = shl i256 %r24, 192
 %r26 = or i256 %r20, %r25
-%r27 = load i64, i64* %r3
-%r28 = zext i64 %r27 to i128
-%r30 = getelementptr i64, i64* %r3, i32 1
-%r31 = load i64, i64* %r30
-%r32 = zext i64 %r31 to i128
-%r33 = shl i128 %r32, 64
-%r34 = or i128 %r28, %r33
-%r35 = zext i128 %r34 to i192
-%r37 = getelementptr i64, i64* %r3, i32 2
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i192
-%r40 = shl i192 %r39, 128
-%r41 = or i192 %r35, %r40
-%r42 = zext i192 %r41 to i256
-%r44 = getelementptr i64, i64* %r3, i32 3
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
 %r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i256
-%r47 = shl i256 %r46, 192
-%r48 = or i256 %r42, %r47
-%r49 = zext i256 %r26 to i320
-%r50 = zext i256 %r48 to i320
-%r51 = add i320 %r49, %r50
-%r52 = trunc i320 %r51 to i128
-%r53 = trunc i128 %r52 to i64
-%r55 = getelementptr i64, i64* %r1, i32 0
-store i64 %r53, i64* %r55
-%r56 = lshr i128 %r52, 64
-%r57 = trunc i128 %r56 to i64
-%r59 = getelementptr i64, i64* %r1, i32 1
-store i64 %r57, i64* %r59
-%r60 = lshr i320 %r51, 128
-%r61 = trunc i320 %r60 to i192
-%r62 = load i64, i64* %r4
-%r63 = zext i64 %r62 to i128
-%r65 = getelementptr i64, i64* %r4, i32 1
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
 %r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i128
-%r68 = shl i128 %r67, 64
-%r69 = or i128 %r63, %r68
-%r70 = zext i128 %r69 to i192
-%r71 = sub i192 %r61, %r70
-%r72 = lshr i192 %r71, 128
-%r73 = trunc i192 %r72 to i1
-%r74 = select i1 %r73, i192 %r61, i192 %r71
-%r75 = trunc i192 %r74 to i128
-%r77 = getelementptr i64, i64* %r1, i32 2
-%r78 = trunc i128 %r75 to i64
-%r80 = getelementptr i64, i64* %r77, i32 0
-store i64 %r78, i64* %r80
-%r81 = lshr i128 %r75, 64
-%r82 = trunc i128 %r81 to i64
-%r84 = getelementptr i64, i64* %r77, i32 1
-store i64 %r82, i64* %r84
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = zext i384 %r40 to i448
+%r78 = zext i384 %r76 to i448
+%r79 = add i448 %r77, %r78
+%r80 = trunc i448 %r79 to i192
+%r82 = getelementptr i64, i64* %r1, i32 0
+%r83 = trunc i192 %r80 to i64
+store i64 %r83, i64* %r82
+%r84 = lshr i192 %r80, 64
+%r86 = getelementptr i64, i64* %r1, i32 1
+%r87 = trunc i192 %r84 to i64
+store i64 %r87, i64* %r86
+%r88 = lshr i192 %r84, 64
+%r90 = getelementptr i64, i64* %r1, i32 2
+%r91 = trunc i192 %r88 to i64
+store i64 %r91, i64* %r90
+%r92 = lshr i448 %r79, 192
+%r93 = trunc i448 %r92 to i256
+%r94 = load i64, i64* %r4
+%r95 = zext i64 %r94 to i128
+%r97 = getelementptr i64, i64* %r4, i32 1
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i128
+%r100 = shl i128 %r99, 64
+%r101 = or i128 %r95, %r100
+%r102 = zext i128 %r101 to i192
+%r104 = getelementptr i64, i64* %r4, i32 2
+%r105 = load i64, i64* %r104
+%r106 = zext i64 %r105 to i192
+%r107 = shl i192 %r106, 128
+%r108 = or i192 %r102, %r107
+%r109 = zext i192 %r108 to i256
+%r110 = sub i256 %r93, %r109
+%r111 = lshr i256 %r110, 192
+%r112 = trunc i256 %r111 to i1
+%r113 = select i1 %r112, i256 %r93, i256 %r110
+%r114 = trunc i256 %r113 to i192
+%r116 = getelementptr i64, i64* %r1, i32 3
+%r118 = getelementptr i64, i64* %r116, i32 0
+%r119 = trunc i192 %r114 to i64
+store i64 %r119, i64* %r118
+%r120 = lshr i192 %r114, 64
+%r122 = getelementptr i64, i64* %r116, i32 1
+%r123 = trunc i192 %r120 to i64
+store i64 %r123, i64* %r122
+%r124 = lshr i192 %r120, 64
+%r126 = getelementptr i64, i64* %r116, i32 2
+%r127 = trunc i192 %r124 to i64
+store i64 %r127, i64* %r126
 ret void
 }
-define void @mcl_fpDbl_sub2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fpDbl_sub3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -1199,60 +1308,98 @@ define void @mcl_fpDbl_sub2L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r24 = zext i64 %r23 to i256
 %r25 = shl i256 %r24, 192
 %r26 = or i256 %r20, %r25
-%r27 = load i64, i64* %r3
-%r28 = zext i64 %r27 to i128
-%r30 = getelementptr i64, i64* %r3, i32 1
-%r31 = load i64, i64* %r30
-%r32 = zext i64 %r31 to i128
-%r33 = shl i128 %r32, 64
-%r34 = or i128 %r28, %r33
-%r35 = zext i128 %r34 to i192
-%r37 = getelementptr i64, i64* %r3, i32 2
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i192
-%r40 = shl i192 %r39, 128
-%r41 = or i192 %r35, %r40
-%r42 = zext i192 %r41 to i256
-%r44 = getelementptr i64, i64* %r3, i32 3
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
 %r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i256
-%r47 = shl i256 %r46, 192
-%r48 = or i256 %r42, %r47
-%r49 = zext i256 %r26 to i320
-%r50 = zext i256 %r48 to i320
-%r51 = sub i320 %r49, %r50
-%r52 = trunc i320 %r51 to i128
-%r53 = trunc i128 %r52 to i64
-%r55 = getelementptr i64, i64* %r1, i32 0
-store i64 %r53, i64* %r55
-%r56 = lshr i128 %r52, 64
-%r57 = trunc i128 %r56 to i64
-%r59 = getelementptr i64, i64* %r1, i32 1
-store i64 %r57, i64* %r59
-%r60 = lshr i320 %r51, 128
-%r61 = trunc i320 %r60 to i128
-%r62 = lshr i320 %r51, 256
-%r63 = trunc i320 %r62 to i1
-%r64 = load i64, i64* %r4
-%r65 = zext i64 %r64 to i128
-%r67 = getelementptr i64, i64* %r4, i32 1
-%r68 = load i64, i64* %r67
-%r69 = zext i64 %r68 to i128
-%r70 = shl i128 %r69, 64
-%r71 = or i128 %r65, %r70
-%r73 = select i1 %r63, i128 %r71, i128 0
-%r74 = add i128 %r61, %r73
-%r76 = getelementptr i64, i64* %r1, i32 2
-%r77 = trunc i128 %r74 to i64
-%r79 = getelementptr i64, i64* %r76, i32 0
-store i64 %r77, i64* %r79
-%r80 = lshr i128 %r74, 64
-%r81 = trunc i128 %r80 to i64
-%r83 = getelementptr i64, i64* %r76, i32 1
-store i64 %r81, i64* %r83
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = zext i384 %r40 to i448
+%r78 = zext i384 %r76 to i448
+%r79 = sub i448 %r77, %r78
+%r80 = trunc i448 %r79 to i192
+%r82 = getelementptr i64, i64* %r1, i32 0
+%r83 = trunc i192 %r80 to i64
+store i64 %r83, i64* %r82
+%r84 = lshr i192 %r80, 64
+%r86 = getelementptr i64, i64* %r1, i32 1
+%r87 = trunc i192 %r84 to i64
+store i64 %r87, i64* %r86
+%r88 = lshr i192 %r84, 64
+%r90 = getelementptr i64, i64* %r1, i32 2
+%r91 = trunc i192 %r88 to i64
+store i64 %r91, i64* %r90
+%r92 = lshr i448 %r79, 192
+%r93 = trunc i448 %r92 to i192
+%r94 = lshr i448 %r79, 384
+%r95 = trunc i448 %r94 to i1
+%r96 = load i64, i64* %r4
+%r97 = zext i64 %r96 to i128
+%r99 = getelementptr i64, i64* %r4, i32 1
+%r100 = load i64, i64* %r99
+%r101 = zext i64 %r100 to i128
+%r102 = shl i128 %r101, 64
+%r103 = or i128 %r97, %r102
+%r104 = zext i128 %r103 to i192
+%r106 = getelementptr i64, i64* %r4, i32 2
+%r107 = load i64, i64* %r106
+%r108 = zext i64 %r107 to i192
+%r109 = shl i192 %r108, 128
+%r110 = or i192 %r104, %r109
+%r112 = select i1 %r95, i192 %r110, i192 0
+%r113 = add i192 %r93, %r112
+%r115 = getelementptr i64, i64* %r1, i32 3
+%r117 = getelementptr i64, i64* %r115, i32 0
+%r118 = trunc i192 %r113 to i64
+store i64 %r118, i64* %r117
+%r119 = lshr i192 %r113, 64
+%r121 = getelementptr i64, i64* %r115, i32 1
+%r122 = trunc i192 %r119 to i64
+store i64 %r122, i64* %r121
+%r123 = lshr i192 %r119, 64
+%r125 = getelementptr i64, i64* %r115, i32 2
+%r126 = trunc i192 %r123 to i64
+store i64 %r126, i64* %r125
 ret void
 }
-define i256 @mulPv192x64(i64* noalias  %r2, i64 %r3)
+define i320 @mulPv256x64(i64* noalias  %r2, i64 %r3)
 {
 %r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
 %r6 = trunc i128 %r5 to i64
@@ -1263,252 +1410,331 @@ define i256 @mulPv192x64(i64* noalias  %r2, i64 %r3)
 %r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
 %r14 = trunc i128 %r13 to i64
 %r15 = call i64 @extractHigh64(i128 %r13)
-%r16 = zext i64 %r6 to i128
-%r17 = zext i64 %r10 to i128
-%r18 = shl i128 %r17, 64
-%r19 = or i128 %r16, %r18
-%r20 = zext i128 %r19 to i192
-%r21 = zext i64 %r14 to i192
-%r22 = shl i192 %r21, 128
-%r23 = or i192 %r20, %r22
-%r24 = zext i64 %r7 to i128
-%r25 = zext i64 %r11 to i128
-%r26 = shl i128 %r25, 64
-%r27 = or i128 %r24, %r26
-%r28 = zext i128 %r27 to i192
-%r29 = zext i64 %r15 to i192
-%r30 = shl i192 %r29, 128
-%r31 = or i192 %r28, %r30
-%r32 = zext i192 %r23 to i256
-%r33 = zext i192 %r31 to i256
-%r34 = shl i256 %r33, 64
-%r35 = add i256 %r32, %r34
-ret i256 %r35
+%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
+%r18 = trunc i128 %r17 to i64
+%r19 = call i64 @extractHigh64(i128 %r17)
+%r20 = zext i64 %r6 to i128
+%r21 = zext i64 %r10 to i128
+%r22 = shl i128 %r21, 64
+%r23 = or i128 %r20, %r22
+%r24 = zext i128 %r23 to i192
+%r25 = zext i64 %r14 to i192
+%r26 = shl i192 %r25, 128
+%r27 = or i192 %r24, %r26
+%r28 = zext i192 %r27 to i256
+%r29 = zext i64 %r18 to i256
+%r30 = shl i256 %r29, 192
+%r31 = or i256 %r28, %r30
+%r32 = zext i64 %r7 to i128
+%r33 = zext i64 %r11 to i128
+%r34 = shl i128 %r33, 64
+%r35 = or i128 %r32, %r34
+%r36 = zext i128 %r35 to i192
+%r37 = zext i64 %r15 to i192
+%r38 = shl i192 %r37, 128
+%r39 = or i192 %r36, %r38
+%r40 = zext i192 %r39 to i256
+%r41 = zext i64 %r19 to i256
+%r42 = shl i256 %r41, 192
+%r43 = or i256 %r40, %r42
+%r44 = zext i256 %r31 to i320
+%r45 = zext i256 %r43 to i320
+%r46 = shl i320 %r45, 64
+%r47 = add i320 %r44, %r46
+ret i320 %r47
 }
-define void @mcl_fp_mulUnitPre3L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+define void @mcl_fp_mulUnitPre4L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
 {
-%r4 = call i256 @mulPv192x64(i64* %r2, i64 %r3)
-%r5 = trunc i256 %r4 to i64
-%r7 = getelementptr i64, i64* %r1, i32 0
-store i64 %r5, i64* %r7
-%r8 = lshr i256 %r4, 64
-%r9 = trunc i256 %r8 to i64
-%r11 = getelementptr i64, i64* %r1, i32 1
-store i64 %r9, i64* %r11
-%r12 = lshr i256 %r8, 64
-%r13 = trunc i256 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 2
-store i64 %r13, i64* %r15
-%r16 = lshr i256 %r12, 64
-%r17 = trunc i256 %r16 to i64
-%r19 = getelementptr i64, i64* %r1, i32 3
-store i64 %r17, i64* %r19
+%r4 = call i320 @mulPv256x64(i64* %r2, i64 %r3)
+%r6 = getelementptr i64, i64* %r1, i32 0
+%r7 = trunc i320 %r4 to i64
+store i64 %r7, i64* %r6
+%r8 = lshr i320 %r4, 64
+%r10 = getelementptr i64, i64* %r1, i32 1
+%r11 = trunc i320 %r8 to i64
+store i64 %r11, i64* %r10
+%r12 = lshr i320 %r8, 64
+%r14 = getelementptr i64, i64* %r1, i32 2
+%r15 = trunc i320 %r12 to i64
+store i64 %r15, i64* %r14
+%r16 = lshr i320 %r12, 64
+%r18 = getelementptr i64, i64* %r1, i32 3
+%r19 = trunc i320 %r16 to i64
+store i64 %r19, i64* %r18
+%r20 = lshr i320 %r16, 64
+%r22 = getelementptr i64, i64* %r1, i32 4
+%r23 = trunc i320 %r20 to i64
+store i64 %r23, i64* %r22
 ret void
 }
-define void @mcl_fpDbl_mulPre3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+define void @mcl_fpDbl_mulPre4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
 {
 %r4 = load i64, i64* %r3
-%r5 = call i256 @mulPv192x64(i64* %r2, i64 %r4)
-%r6 = trunc i256 %r5 to i64
+%r5 = call i320 @mulPv256x64(i64* %r2, i64 %r4)
+%r6 = trunc i320 %r5 to i64
 store i64 %r6, i64* %r1
-%r7 = lshr i256 %r5, 64
+%r7 = lshr i320 %r5, 64
 %r9 = getelementptr i64, i64* %r3, i32 1
 %r10 = load i64, i64* %r9
-%r11 = call i256 @mulPv192x64(i64* %r2, i64 %r10)
-%r12 = add i256 %r7, %r11
-%r13 = trunc i256 %r12 to i64
+%r11 = call i320 @mulPv256x64(i64* %r2, i64 %r10)
+%r12 = add i320 %r7, %r11
+%r13 = trunc i320 %r12 to i64
 %r15 = getelementptr i64, i64* %r1, i32 1
 store i64 %r13, i64* %r15
-%r16 = lshr i256 %r12, 64
+%r16 = lshr i320 %r12, 64
 %r18 = getelementptr i64, i64* %r3, i32 2
 %r19 = load i64, i64* %r18
-%r20 = call i256 @mulPv192x64(i64* %r2, i64 %r19)
-%r21 = add i256 %r16, %r20
-%r23 = getelementptr i64, i64* %r1, i32 2
-%r24 = trunc i256 %r21 to i64
-%r26 = getelementptr i64, i64* %r23, i32 0
-store i64 %r24, i64* %r26
-%r27 = lshr i256 %r21, 64
-%r28 = trunc i256 %r27 to i64
-%r30 = getelementptr i64, i64* %r23, i32 1
-store i64 %r28, i64* %r30
-%r31 = lshr i256 %r27, 64
-%r32 = trunc i256 %r31 to i64
-%r34 = getelementptr i64, i64* %r23, i32 2
-store i64 %r32, i64* %r34
-%r35 = lshr i256 %r31, 64
-%r36 = trunc i256 %r35 to i64
-%r38 = getelementptr i64, i64* %r23, i32 3
-store i64 %r36, i64* %r38
+%r20 = call i320 @mulPv256x64(i64* %r2, i64 %r19)
+%r21 = add i320 %r16, %r20
+%r22 = trunc i320 %r21 to i64
+%r24 = getelementptr i64, i64* %r1, i32 2
+store i64 %r22, i64* %r24
+%r25 = lshr i320 %r21, 64
+%r27 = getelementptr i64, i64* %r3, i32 3
+%r28 = load i64, i64* %r27
+%r29 = call i320 @mulPv256x64(i64* %r2, i64 %r28)
+%r30 = add i320 %r25, %r29
+%r32 = getelementptr i64, i64* %r1, i32 3
+%r34 = getelementptr i64, i64* %r32, i32 0
+%r35 = trunc i320 %r30 to i64
+store i64 %r35, i64* %r34
+%r36 = lshr i320 %r30, 64
+%r38 = getelementptr i64, i64* %r32, i32 1
+%r39 = trunc i320 %r36 to i64
+store i64 %r39, i64* %r38
+%r40 = lshr i320 %r36, 64
+%r42 = getelementptr i64, i64* %r32, i32 2
+%r43 = trunc i320 %r40 to i64
+store i64 %r43, i64* %r42
+%r44 = lshr i320 %r40, 64
+%r46 = getelementptr i64, i64* %r32, i32 3
+%r47 = trunc i320 %r44 to i64
+store i64 %r47, i64* %r46
+%r48 = lshr i320 %r44, 64
+%r50 = getelementptr i64, i64* %r32, i32 4
+%r51 = trunc i320 %r48 to i64
+store i64 %r51, i64* %r50
 ret void
 }
-define void @mcl_fpDbl_sqrPre3L(i64* noalias  %r1, i64* noalias  %r2)
+define void @mcl_fpDbl_sqrPre4L(i64* noalias  %r1, i64* noalias  %r2)
 {
 %r3 = load i64, i64* %r2
-%r4 = call i256 @mulPv192x64(i64* %r2, i64 %r3)
-%r5 = trunc i256 %r4 to i64
+%r4 = call i320 @mulPv256x64(i64* %r2, i64 %r3)
+%r5 = trunc i320 %r4 to i64
 store i64 %r5, i64* %r1
-%r6 = lshr i256 %r4, 64
+%r6 = lshr i320 %r4, 64
 %r8 = getelementptr i64, i64* %r2, i32 1
 %r9 = load i64, i64* %r8
-%r10 = call i256 @mulPv192x64(i64* %r2, i64 %r9)
-%r11 = add i256 %r6, %r10
-%r12 = trunc i256 %r11 to i64
+%r10 = call i320 @mulPv256x64(i64* %r2, i64 %r9)
+%r11 = add i320 %r6, %r10
+%r12 = trunc i320 %r11 to i64
 %r14 = getelementptr i64, i64* %r1, i32 1
 store i64 %r12, i64* %r14
-%r15 = lshr i256 %r11, 64
+%r15 = lshr i320 %r11, 64
 %r17 = getelementptr i64, i64* %r2, i32 2
 %r18 = load i64, i64* %r17
-%r19 = call i256 @mulPv192x64(i64* %r2, i64 %r18)
-%r20 = add i256 %r15, %r19
-%r22 = getelementptr i64, i64* %r1, i32 2
-%r23 = trunc i256 %r20 to i64
-%r25 = getelementptr i64, i64* %r22, i32 0
-store i64 %r23, i64* %r25
-%r26 = lshr i256 %r20, 64
-%r27 = trunc i256 %r26 to i64
-%r29 = getelementptr i64, i64* %r22, i32 1
-store i64 %r27, i64* %r29
-%r30 = lshr i256 %r26, 64
-%r31 = trunc i256 %r30 to i64
-%r33 = getelementptr i64, i64* %r22, i32 2
-store i64 %r31, i64* %r33
-%r34 = lshr i256 %r30, 64
-%r35 = trunc i256 %r34 to i64
-%r37 = getelementptr i64, i64* %r22, i32 3
-store i64 %r35, i64* %r37
+%r19 = call i320 @mulPv256x64(i64* %r2, i64 %r18)
+%r20 = add i320 %r15, %r19
+%r21 = trunc i320 %r20 to i64
+%r23 = getelementptr i64, i64* %r1, i32 2
+store i64 %r21, i64* %r23
+%r24 = lshr i320 %r20, 64
+%r26 = getelementptr i64, i64* %r2, i32 3
+%r27 = load i64, i64* %r26
+%r28 = call i320 @mulPv256x64(i64* %r2, i64 %r27)
+%r29 = add i320 %r24, %r28
+%r31 = getelementptr i64, i64* %r1, i32 3
+%r33 = getelementptr i64, i64* %r31, i32 0
+%r34 = trunc i320 %r29 to i64
+store i64 %r34, i64* %r33
+%r35 = lshr i320 %r29, 64
+%r37 = getelementptr i64, i64* %r31, i32 1
+%r38 = trunc i320 %r35 to i64
+store i64 %r38, i64* %r37
+%r39 = lshr i320 %r35, 64
+%r41 = getelementptr i64, i64* %r31, i32 2
+%r42 = trunc i320 %r39 to i64
+store i64 %r42, i64* %r41
+%r43 = lshr i320 %r39, 64
+%r45 = getelementptr i64, i64* %r31, i32 3
+%r46 = trunc i320 %r43 to i64
+store i64 %r46, i64* %r45
+%r47 = lshr i320 %r43, 64
+%r49 = getelementptr i64, i64* %r31, i32 4
+%r50 = trunc i320 %r47 to i64
+store i64 %r50, i64* %r49
 ret void
 }
-define void @mcl_fp_mont3L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+define void @mcl_fp_mont4L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
 {
 %r6 = getelementptr i64, i64* %r4, i32 -1
 %r7 = load i64, i64* %r6
 %r9 = getelementptr i64, i64* %r3, i32 0
 %r10 = load i64, i64* %r9
-%r11 = call i256 @mulPv192x64(i64* %r2, i64 %r10)
-%r12 = zext i256 %r11 to i320
-%r13 = trunc i256 %r11 to i64
+%r11 = call i320 @mulPv256x64(i64* %r2, i64 %r10)
+%r12 = zext i320 %r11 to i384
+%r13 = trunc i320 %r11 to i64
 %r14 = mul i64 %r13, %r7
-%r15 = call i256 @mulPv192x64(i64* %r4, i64 %r14)
-%r16 = zext i256 %r15 to i320
-%r17 = add i320 %r12, %r16
-%r18 = lshr i320 %r17, 64
+%r15 = call i320 @mulPv256x64(i64* %r4, i64 %r14)
+%r16 = zext i320 %r15 to i384
+%r17 = add i384 %r12, %r16
+%r18 = lshr i384 %r17, 64
 %r20 = getelementptr i64, i64* %r3, i32 1
 %r21 = load i64, i64* %r20
-%r22 = call i256 @mulPv192x64(i64* %r2, i64 %r21)
-%r23 = zext i256 %r22 to i320
-%r24 = add i320 %r18, %r23
-%r25 = trunc i320 %r24 to i64
+%r22 = call i320 @mulPv256x64(i64* %r2, i64 %r21)
+%r23 = zext i320 %r22 to i384
+%r24 = add i384 %r18, %r23
+%r25 = trunc i384 %r24 to i64
 %r26 = mul i64 %r25, %r7
-%r27 = call i256 @mulPv192x64(i64* %r4, i64 %r26)
-%r28 = zext i256 %r27 to i320
-%r29 = add i320 %r24, %r28
-%r30 = lshr i320 %r29, 64
+%r27 = call i320 @mulPv256x64(i64* %r4, i64 %r26)
+%r28 = zext i320 %r27 to i384
+%r29 = add i384 %r24, %r28
+%r30 = lshr i384 %r29, 64
 %r32 = getelementptr i64, i64* %r3, i32 2
 %r33 = load i64, i64* %r32
-%r34 = call i256 @mulPv192x64(i64* %r2, i64 %r33)
-%r35 = zext i256 %r34 to i320
-%r36 = add i320 %r30, %r35
-%r37 = trunc i320 %r36 to i64
+%r34 = call i320 @mulPv256x64(i64* %r2, i64 %r33)
+%r35 = zext i320 %r34 to i384
+%r36 = add i384 %r30, %r35
+%r37 = trunc i384 %r36 to i64
 %r38 = mul i64 %r37, %r7
-%r39 = call i256 @mulPv192x64(i64* %r4, i64 %r38)
-%r40 = zext i256 %r39 to i320
-%r41 = add i320 %r36, %r40
-%r42 = lshr i320 %r41, 64
-%r43 = trunc i320 %r42 to i256
-%r44 = load i64, i64* %r4
-%r45 = zext i64 %r44 to i128
-%r47 = getelementptr i64, i64* %r4, i32 1
-%r48 = load i64, i64* %r47
-%r49 = zext i64 %r48 to i128
-%r50 = shl i128 %r49, 64
-%r51 = or i128 %r45, %r50
-%r52 = zext i128 %r51 to i192
-%r54 = getelementptr i64, i64* %r4, i32 2
-%r55 = load i64, i64* %r54
-%r56 = zext i64 %r55 to i192
-%r57 = shl i192 %r56, 128
-%r58 = or i192 %r52, %r57
-%r59 = zext i192 %r58 to i256
-%r60 = sub i256 %r43, %r59
-%r61 = lshr i256 %r60, 192
-%r62 = trunc i256 %r61 to i1
-%r63 = select i1 %r62, i256 %r43, i256 %r60
-%r64 = trunc i256 %r63 to i192
-%r65 = trunc i192 %r64 to i64
-%r67 = getelementptr i64, i64* %r1, i32 0
-store i64 %r65, i64* %r67
-%r68 = lshr i192 %r64, 64
-%r69 = trunc i192 %r68 to i64
-%r71 = getelementptr i64, i64* %r1, i32 1
-store i64 %r69, i64* %r71
-%r72 = lshr i192 %r68, 64
-%r73 = trunc i192 %r72 to i64
-%r75 = getelementptr i64, i64* %r1, i32 2
-store i64 %r73, i64* %r75
+%r39 = call i320 @mulPv256x64(i64* %r4, i64 %r38)
+%r40 = zext i320 %r39 to i384
+%r41 = add i384 %r36, %r40
+%r42 = lshr i384 %r41, 64
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = call i320 @mulPv256x64(i64* %r2, i64 %r45)
+%r47 = zext i320 %r46 to i384
+%r48 = add i384 %r42, %r47
+%r49 = trunc i384 %r48 to i64
+%r50 = mul i64 %r49, %r7
+%r51 = call i320 @mulPv256x64(i64* %r4, i64 %r50)
+%r52 = zext i320 %r51 to i384
+%r53 = add i384 %r48, %r52
+%r54 = lshr i384 %r53, 64
+%r55 = trunc i384 %r54 to i320
+%r56 = load i64, i64* %r4
+%r57 = zext i64 %r56 to i128
+%r59 = getelementptr i64, i64* %r4, i32 1
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i128
+%r62 = shl i128 %r61, 64
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i192
+%r66 = getelementptr i64, i64* %r4, i32 2
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i192
+%r69 = shl i192 %r68, 128
+%r70 = or i192 %r64, %r69
+%r71 = zext i192 %r70 to i256
+%r73 = getelementptr i64, i64* %r4, i32 3
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i256
+%r76 = shl i256 %r75, 192
+%r77 = or i256 %r71, %r76
+%r78 = zext i256 %r77 to i320
+%r79 = sub i320 %r55, %r78
+%r80 = lshr i320 %r79, 256
+%r81 = trunc i320 %r80 to i1
+%r82 = select i1 %r81, i320 %r55, i320 %r79
+%r83 = trunc i320 %r82 to i256
+%r85 = getelementptr i64, i64* %r1, i32 0
+%r86 = trunc i256 %r83 to i64
+store i64 %r86, i64* %r85
+%r87 = lshr i256 %r83, 64
+%r89 = getelementptr i64, i64* %r1, i32 1
+%r90 = trunc i256 %r87 to i64
+store i64 %r90, i64* %r89
+%r91 = lshr i256 %r87, 64
+%r93 = getelementptr i64, i64* %r1, i32 2
+%r94 = trunc i256 %r91 to i64
+store i64 %r94, i64* %r93
+%r95 = lshr i256 %r91, 64
+%r97 = getelementptr i64, i64* %r1, i32 3
+%r98 = trunc i256 %r95 to i64
+store i64 %r98, i64* %r97
 ret void
 }
-define void @mcl_fp_montNF3L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+define void @mcl_fp_montNF4L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
 {
 %r6 = getelementptr i64, i64* %r4, i32 -1
 %r7 = load i64, i64* %r6
 %r8 = load i64, i64* %r3
-%r9 = call i256 @mulPv192x64(i64* %r2, i64 %r8)
-%r10 = trunc i256 %r9 to i64
+%r9 = call i320 @mulPv256x64(i64* %r2, i64 %r8)
+%r10 = trunc i320 %r9 to i64
 %r11 = mul i64 %r10, %r7
-%r12 = call i256 @mulPv192x64(i64* %r4, i64 %r11)
-%r13 = add i256 %r9, %r12
-%r14 = lshr i256 %r13, 64
+%r12 = call i320 @mulPv256x64(i64* %r4, i64 %r11)
+%r13 = add i320 %r9, %r12
+%r14 = lshr i320 %r13, 64
 %r16 = getelementptr i64, i64* %r3, i32 1
 %r17 = load i64, i64* %r16
-%r18 = call i256 @mulPv192x64(i64* %r2, i64 %r17)
-%r19 = add i256 %r14, %r18
-%r20 = trunc i256 %r19 to i64
+%r18 = call i320 @mulPv256x64(i64* %r2, i64 %r17)
+%r19 = add i320 %r14, %r18
+%r20 = trunc i320 %r19 to i64
 %r21 = mul i64 %r20, %r7
-%r22 = call i256 @mulPv192x64(i64* %r4, i64 %r21)
-%r23 = add i256 %r19, %r22
-%r24 = lshr i256 %r23, 64
+%r22 = call i320 @mulPv256x64(i64* %r4, i64 %r21)
+%r23 = add i320 %r19, %r22
+%r24 = lshr i320 %r23, 64
 %r26 = getelementptr i64, i64* %r3, i32 2
 %r27 = load i64, i64* %r26
-%r28 = call i256 @mulPv192x64(i64* %r2, i64 %r27)
-%r29 = add i256 %r24, %r28
-%r30 = trunc i256 %r29 to i64
+%r28 = call i320 @mulPv256x64(i64* %r2, i64 %r27)
+%r29 = add i320 %r24, %r28
+%r30 = trunc i320 %r29 to i64
 %r31 = mul i64 %r30, %r7
-%r32 = call i256 @mulPv192x64(i64* %r4, i64 %r31)
-%r33 = add i256 %r29, %r32
-%r34 = lshr i256 %r33, 64
-%r35 = trunc i256 %r34 to i192
-%r36 = load i64, i64* %r4
-%r37 = zext i64 %r36 to i128
-%r39 = getelementptr i64, i64* %r4, i32 1
-%r40 = load i64, i64* %r39
-%r41 = zext i64 %r40 to i128
-%r42 = shl i128 %r41, 64
-%r43 = or i128 %r37, %r42
-%r44 = zext i128 %r43 to i192
-%r46 = getelementptr i64, i64* %r4, i32 2
-%r47 = load i64, i64* %r46
-%r48 = zext i64 %r47 to i192
-%r49 = shl i192 %r48, 128
-%r50 = or i192 %r44, %r49
-%r51 = sub i192 %r35, %r50
-%r52 = lshr i192 %r51, 191
-%r53 = trunc i192 %r52 to i1
-%r54 = select i1 %r53, i192 %r35, i192 %r51
-%r55 = trunc i192 %r54 to i64
-%r57 = getelementptr i64, i64* %r1, i32 0
-store i64 %r55, i64* %r57
-%r58 = lshr i192 %r54, 64
-%r59 = trunc i192 %r58 to i64
-%r61 = getelementptr i64, i64* %r1, i32 1
-store i64 %r59, i64* %r61
-%r62 = lshr i192 %r58, 64
-%r63 = trunc i192 %r62 to i64
-%r65 = getelementptr i64, i64* %r1, i32 2
-store i64 %r63, i64* %r65
+%r32 = call i320 @mulPv256x64(i64* %r4, i64 %r31)
+%r33 = add i320 %r29, %r32
+%r34 = lshr i320 %r33, 64
+%r36 = getelementptr i64, i64* %r3, i32 3
+%r37 = load i64, i64* %r36
+%r38 = call i320 @mulPv256x64(i64* %r2, i64 %r37)
+%r39 = add i320 %r34, %r38
+%r40 = trunc i320 %r39 to i64
+%r41 = mul i64 %r40, %r7
+%r42 = call i320 @mulPv256x64(i64* %r4, i64 %r41)
+%r43 = add i320 %r39, %r42
+%r44 = lshr i320 %r43, 64
+%r45 = trunc i320 %r44 to i256
+%r46 = load i64, i64* %r4
+%r47 = zext i64 %r46 to i128
+%r49 = getelementptr i64, i64* %r4, i32 1
+%r50 = load i64, i64* %r49
+%r51 = zext i64 %r50 to i128
+%r52 = shl i128 %r51, 64
+%r53 = or i128 %r47, %r52
+%r54 = zext i128 %r53 to i192
+%r56 = getelementptr i64, i64* %r4, i32 2
+%r57 = load i64, i64* %r56
+%r58 = zext i64 %r57 to i192
+%r59 = shl i192 %r58, 128
+%r60 = or i192 %r54, %r59
+%r61 = zext i192 %r60 to i256
+%r63 = getelementptr i64, i64* %r4, i32 3
+%r64 = load i64, i64* %r63
+%r65 = zext i64 %r64 to i256
+%r66 = shl i256 %r65, 192
+%r67 = or i256 %r61, %r66
+%r68 = sub i256 %r45, %r67
+%r69 = lshr i256 %r68, 255
+%r70 = trunc i256 %r69 to i1
+%r71 = select i1 %r70, i256 %r45, i256 %r68
+%r73 = getelementptr i64, i64* %r1, i32 0
+%r74 = trunc i256 %r71 to i64
+store i64 %r74, i64* %r73
+%r75 = lshr i256 %r71, 64
+%r77 = getelementptr i64, i64* %r1, i32 1
+%r78 = trunc i256 %r75 to i64
+store i64 %r78, i64* %r77
+%r79 = lshr i256 %r75, 64
+%r81 = getelementptr i64, i64* %r1, i32 2
+%r82 = trunc i256 %r79 to i64
+store i64 %r82, i64* %r81
+%r83 = lshr i256 %r79, 64
+%r85 = getelementptr i64, i64* %r1, i32 3
+%r86 = trunc i256 %r83 to i64
+store i64 %r86, i64* %r85
 ret void
 }
-define void @mcl_fp_montRed3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+define void @mcl_fp_montRed4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
 {
 %r5 = getelementptr i64, i64* %r3, i32 -1
 %r6 = load i64, i64* %r5
@@ -1525,126 +1751,336 @@ define void @mcl_fp_montRed3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r19 = zext i64 %r18 to i192
 %r20 = shl i192 %r19, 128
 %r21 = or i192 %r15, %r20
-%r22 = load i64, i64* %r2
-%r23 = zext i64 %r22 to i128
-%r25 = getelementptr i64, i64* %r2, i32 1
-%r26 = load i64, i64* %r25
-%r27 = zext i64 %r26 to i128
-%r28 = shl i128 %r27, 64
-%r29 = or i128 %r23, %r28
-%r30 = zext i128 %r29 to i192
-%r32 = getelementptr i64, i64* %r2, i32 2
+%r22 = zext i192 %r21 to i256
+%r24 = getelementptr i64, i64* %r3, i32 3
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i256
+%r27 = shl i256 %r26, 192
+%r28 = or i256 %r22, %r27
+%r29 = load i64, i64* %r2
+%r30 = zext i64 %r29 to i128
+%r32 = getelementptr i64, i64* %r2, i32 1
 %r33 = load i64, i64* %r32
-%r34 = zext i64 %r33 to i192
-%r35 = shl i192 %r34, 128
-%r36 = or i192 %r30, %r35
-%r37 = zext i192 %r36 to i256
-%r39 = getelementptr i64, i64* %r2, i32 3
+%r34 = zext i64 %r33 to i128
+%r35 = shl i128 %r34, 64
+%r36 = or i128 %r30, %r35
+%r37 = zext i128 %r36 to i192
+%r39 = getelementptr i64, i64* %r2, i32 2
 %r40 = load i64, i64* %r39
-%r41 = zext i64 %r40 to i256
-%r42 = shl i256 %r41, 192
-%r43 = or i256 %r37, %r42
-%r44 = zext i256 %r43 to i320
-%r46 = getelementptr i64, i64* %r2, i32 4
+%r41 = zext i64 %r40 to i192
+%r42 = shl i192 %r41, 128
+%r43 = or i192 %r37, %r42
+%r44 = zext i192 %r43 to i256
+%r46 = getelementptr i64, i64* %r2, i32 3
 %r47 = load i64, i64* %r46
-%r48 = zext i64 %r47 to i320
-%r49 = shl i320 %r48, 256
-%r50 = or i320 %r44, %r49
-%r51 = zext i320 %r50 to i384
-%r53 = getelementptr i64, i64* %r2, i32 5
-%r54 = load i64, i64* %r53
-%r55 = zext i64 %r54 to i384
-%r56 = shl i384 %r55, 320
-%r57 = or i384 %r51, %r56
-%r58 = zext i384 %r57 to i448
-%r59 = trunc i448 %r58 to i64
-%r60 = mul i64 %r59, %r6
-%r61 = call i256 @mulPv192x64(i64* %r3, i64 %r60)
-%r62 = zext i256 %r61 to i448
-%r63 = add i448 %r58, %r62
-%r64 = lshr i448 %r63, 64
-%r65 = trunc i448 %r64 to i384
-%r66 = trunc i384 %r65 to i64
-%r67 = mul i64 %r66, %r6
-%r68 = call i256 @mulPv192x64(i64* %r3, i64 %r67)
-%r69 = zext i256 %r68 to i384
-%r70 = add i384 %r65, %r69
-%r71 = lshr i384 %r70, 64
-%r72 = trunc i384 %r71 to i320
-%r73 = trunc i320 %r72 to i64
-%r74 = mul i64 %r73, %r6
-%r75 = call i256 @mulPv192x64(i64* %r3, i64 %r74)
-%r76 = zext i256 %r75 to i320
-%r77 = add i320 %r72, %r76
-%r78 = lshr i320 %r77, 64
-%r79 = trunc i320 %r78 to i256
-%r80 = zext i192 %r21 to i256
-%r81 = sub i256 %r79, %r80
-%r82 = lshr i256 %r81, 192
-%r83 = trunc i256 %r82 to i1
-%r84 = select i1 %r83, i256 %r79, i256 %r81
-%r85 = trunc i256 %r84 to i192
-%r86 = trunc i192 %r85 to i64
-%r88 = getelementptr i64, i64* %r1, i32 0
-store i64 %r86, i64* %r88
-%r89 = lshr i192 %r85, 64
-%r90 = trunc i192 %r89 to i64
-%r92 = getelementptr i64, i64* %r1, i32 1
-store i64 %r90, i64* %r92
-%r93 = lshr i192 %r89, 64
-%r94 = trunc i192 %r93 to i64
-%r96 = getelementptr i64, i64* %r1, i32 2
-store i64 %r94, i64* %r96
+%r48 = zext i64 %r47 to i256
+%r49 = shl i256 %r48, 192
+%r50 = or i256 %r44, %r49
+%r51 = trunc i256 %r50 to i64
+%r52 = mul i64 %r51, %r6
+%r53 = call i320 @mulPv256x64(i64* %r3, i64 %r52)
+%r55 = getelementptr i64, i64* %r2, i32 4
+%r56 = load i64, i64* %r55
+%r57 = zext i64 %r56 to i320
+%r58 = shl i320 %r57, 256
+%r59 = zext i256 %r50 to i320
+%r60 = or i320 %r58, %r59
+%r61 = zext i320 %r60 to i384
+%r62 = zext i320 %r53 to i384
+%r63 = add i384 %r61, %r62
+%r64 = lshr i384 %r63, 64
+%r65 = trunc i384 %r64 to i320
+%r66 = lshr i320 %r65, 256
+%r67 = trunc i320 %r66 to i64
+%r68 = trunc i320 %r65 to i256
+%r69 = trunc i256 %r68 to i64
+%r70 = mul i64 %r69, %r6
+%r71 = call i320 @mulPv256x64(i64* %r3, i64 %r70)
+%r72 = zext i64 %r67 to i320
+%r73 = shl i320 %r72, 256
+%r74 = add i320 %r71, %r73
+%r76 = getelementptr i64, i64* %r2, i32 5
+%r77 = load i64, i64* %r76
+%r78 = zext i64 %r77 to i320
+%r79 = shl i320 %r78, 256
+%r80 = zext i256 %r68 to i320
+%r81 = or i320 %r79, %r80
+%r82 = zext i320 %r81 to i384
+%r83 = zext i320 %r74 to i384
+%r84 = add i384 %r82, %r83
+%r85 = lshr i384 %r84, 64
+%r86 = trunc i384 %r85 to i320
+%r87 = lshr i320 %r86, 256
+%r88 = trunc i320 %r87 to i64
+%r89 = trunc i320 %r86 to i256
+%r90 = trunc i256 %r89 to i64
+%r91 = mul i64 %r90, %r6
+%r92 = call i320 @mulPv256x64(i64* %r3, i64 %r91)
+%r93 = zext i64 %r88 to i320
+%r94 = shl i320 %r93, 256
+%r95 = add i320 %r92, %r94
+%r97 = getelementptr i64, i64* %r2, i32 6
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i320
+%r100 = shl i320 %r99, 256
+%r101 = zext i256 %r89 to i320
+%r102 = or i320 %r100, %r101
+%r103 = zext i320 %r102 to i384
+%r104 = zext i320 %r95 to i384
+%r105 = add i384 %r103, %r104
+%r106 = lshr i384 %r105, 64
+%r107 = trunc i384 %r106 to i320
+%r108 = lshr i320 %r107, 256
+%r109 = trunc i320 %r108 to i64
+%r110 = trunc i320 %r107 to i256
+%r111 = trunc i256 %r110 to i64
+%r112 = mul i64 %r111, %r6
+%r113 = call i320 @mulPv256x64(i64* %r3, i64 %r112)
+%r114 = zext i64 %r109 to i320
+%r115 = shl i320 %r114, 256
+%r116 = add i320 %r113, %r115
+%r118 = getelementptr i64, i64* %r2, i32 7
+%r119 = load i64, i64* %r118
+%r120 = zext i64 %r119 to i320
+%r121 = shl i320 %r120, 256
+%r122 = zext i256 %r110 to i320
+%r123 = or i320 %r121, %r122
+%r124 = zext i320 %r123 to i384
+%r125 = zext i320 %r116 to i384
+%r126 = add i384 %r124, %r125
+%r127 = lshr i384 %r126, 64
+%r128 = trunc i384 %r127 to i320
+%r129 = lshr i320 %r128, 256
+%r130 = trunc i320 %r129 to i64
+%r131 = trunc i320 %r128 to i256
+%r132 = zext i256 %r28 to i320
+%r133 = zext i256 %r131 to i320
+%r134 = sub i320 %r133, %r132
+%r135 = lshr i320 %r134, 256
+%r136 = trunc i320 %r135 to i1
+%r137 = select i1 %r136, i320 %r133, i320 %r134
+%r138 = trunc i320 %r137 to i256
+%r140 = getelementptr i64, i64* %r1, i32 0
+%r141 = trunc i256 %r138 to i64
+store i64 %r141, i64* %r140
+%r142 = lshr i256 %r138, 64
+%r144 = getelementptr i64, i64* %r1, i32 1
+%r145 = trunc i256 %r142 to i64
+store i64 %r145, i64* %r144
+%r146 = lshr i256 %r142, 64
+%r148 = getelementptr i64, i64* %r1, i32 2
+%r149 = trunc i256 %r146 to i64
+store i64 %r149, i64* %r148
+%r150 = lshr i256 %r146, 64
+%r152 = getelementptr i64, i64* %r1, i32 3
+%r153 = trunc i256 %r150 to i64
+store i64 %r153, i64* %r152
 ret void
 }
-define i64 @mcl_fp_addPre3L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_montRedNF4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
 {
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r3, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r3, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = zext i192 %r21 to i256
+%r24 = getelementptr i64, i64* %r3, i32 3
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i256
+%r27 = shl i256 %r26, 192
+%r28 = or i256 %r22, %r27
+%r29 = load i64, i64* %r2
+%r30 = zext i64 %r29 to i128
+%r32 = getelementptr i64, i64* %r2, i32 1
+%r33 = load i64, i64* %r32
+%r34 = zext i64 %r33 to i128
+%r35 = shl i128 %r34, 64
+%r36 = or i128 %r30, %r35
+%r37 = zext i128 %r36 to i192
+%r39 = getelementptr i64, i64* %r2, i32 2
+%r40 = load i64, i64* %r39
+%r41 = zext i64 %r40 to i192
+%r42 = shl i192 %r41, 128
+%r43 = or i192 %r37, %r42
+%r44 = zext i192 %r43 to i256
+%r46 = getelementptr i64, i64* %r2, i32 3
+%r47 = load i64, i64* %r46
+%r48 = zext i64 %r47 to i256
+%r49 = shl i256 %r48, 192
+%r50 = or i256 %r44, %r49
+%r51 = trunc i256 %r50 to i64
+%r52 = mul i64 %r51, %r6
+%r53 = call i320 @mulPv256x64(i64* %r3, i64 %r52)
+%r55 = getelementptr i64, i64* %r2, i32 4
+%r56 = load i64, i64* %r55
+%r57 = zext i64 %r56 to i320
+%r58 = shl i320 %r57, 256
+%r59 = zext i256 %r50 to i320
+%r60 = or i320 %r58, %r59
+%r61 = zext i320 %r60 to i384
+%r62 = zext i320 %r53 to i384
+%r63 = add i384 %r61, %r62
+%r64 = lshr i384 %r63, 64
+%r65 = trunc i384 %r64 to i320
+%r66 = lshr i320 %r65, 256
+%r67 = trunc i320 %r66 to i64
+%r68 = trunc i320 %r65 to i256
+%r69 = trunc i256 %r68 to i64
+%r70 = mul i64 %r69, %r6
+%r71 = call i320 @mulPv256x64(i64* %r3, i64 %r70)
+%r72 = zext i64 %r67 to i320
+%r73 = shl i320 %r72, 256
+%r74 = add i320 %r71, %r73
+%r76 = getelementptr i64, i64* %r2, i32 5
+%r77 = load i64, i64* %r76
+%r78 = zext i64 %r77 to i320
+%r79 = shl i320 %r78, 256
+%r80 = zext i256 %r68 to i320
+%r81 = or i320 %r79, %r80
+%r82 = zext i320 %r81 to i384
+%r83 = zext i320 %r74 to i384
+%r84 = add i384 %r82, %r83
+%r85 = lshr i384 %r84, 64
+%r86 = trunc i384 %r85 to i320
+%r87 = lshr i320 %r86, 256
+%r88 = trunc i320 %r87 to i64
+%r89 = trunc i320 %r86 to i256
+%r90 = trunc i256 %r89 to i64
+%r91 = mul i64 %r90, %r6
+%r92 = call i320 @mulPv256x64(i64* %r3, i64 %r91)
+%r93 = zext i64 %r88 to i320
+%r94 = shl i320 %r93, 256
+%r95 = add i320 %r92, %r94
+%r97 = getelementptr i64, i64* %r2, i32 6
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i320
+%r100 = shl i320 %r99, 256
+%r101 = zext i256 %r89 to i320
+%r102 = or i320 %r100, %r101
+%r103 = zext i320 %r102 to i384
+%r104 = zext i320 %r95 to i384
+%r105 = add i384 %r103, %r104
+%r106 = lshr i384 %r105, 64
+%r107 = trunc i384 %r106 to i320
+%r108 = lshr i320 %r107, 256
+%r109 = trunc i320 %r108 to i64
+%r110 = trunc i320 %r107 to i256
+%r111 = trunc i256 %r110 to i64
+%r112 = mul i64 %r111, %r6
+%r113 = call i320 @mulPv256x64(i64* %r3, i64 %r112)
+%r114 = zext i64 %r109 to i320
+%r115 = shl i320 %r114, 256
+%r116 = add i320 %r113, %r115
+%r118 = getelementptr i64, i64* %r2, i32 7
+%r119 = load i64, i64* %r118
+%r120 = zext i64 %r119 to i320
+%r121 = shl i320 %r120, 256
+%r122 = zext i256 %r110 to i320
+%r123 = or i320 %r121, %r122
+%r124 = zext i320 %r123 to i384
+%r125 = zext i320 %r116 to i384
+%r126 = add i384 %r124, %r125
+%r127 = lshr i384 %r126, 64
+%r128 = trunc i384 %r127 to i320
+%r129 = lshr i320 %r128, 256
+%r130 = trunc i320 %r129 to i64
+%r131 = trunc i320 %r128 to i256
+%r132 = sub i256 %r131, %r28
+%r133 = lshr i256 %r132, 255
+%r134 = trunc i256 %r133 to i1
+%r135 = select i1 %r134, i256 %r131, i256 %r132
+%r137 = getelementptr i64, i64* %r1, i32 0
+%r138 = trunc i256 %r135 to i64
+store i64 %r138, i64* %r137
+%r139 = lshr i256 %r135, 64
+%r141 = getelementptr i64, i64* %r1, i32 1
+%r142 = trunc i256 %r139 to i64
+store i64 %r142, i64* %r141
+%r143 = lshr i256 %r139, 64
+%r145 = getelementptr i64, i64* %r1, i32 2
+%r146 = trunc i256 %r143 to i64
+store i64 %r146, i64* %r145
+%r147 = lshr i256 %r143, 64
+%r149 = getelementptr i64, i64* %r1, i32 3
+%r150 = trunc i256 %r147 to i64
+store i64 %r150, i64* %r149
+ret void
+}
+define i64 @mcl_fp_addPre4L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
 %r19 = or i192 %r13, %r18
 %r20 = zext i192 %r19 to i256
-%r21 = load i64, i64* %r4
-%r22 = zext i64 %r21 to i128
-%r24 = getelementptr i64, i64* %r4, i32 1
-%r25 = load i64, i64* %r24
-%r26 = zext i64 %r25 to i128
-%r27 = shl i128 %r26, 64
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i192
-%r31 = getelementptr i64, i64* %r4, i32 2
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r28 = load i64, i64* %r4
+%r29 = zext i64 %r28 to i128
+%r31 = getelementptr i64, i64* %r4, i32 1
 %r32 = load i64, i64* %r31
-%r33 = zext i64 %r32 to i192
-%r34 = shl i192 %r33, 128
-%r35 = or i192 %r29, %r34
-%r36 = zext i192 %r35 to i256
-%r37 = add i256 %r20, %r36
-%r38 = trunc i256 %r37 to i192
-%r39 = trunc i192 %r38 to i64
-%r41 = getelementptr i64, i64* %r2, i32 0
-store i64 %r39, i64* %r41
-%r42 = lshr i192 %r38, 64
-%r43 = trunc i192 %r42 to i64
-%r45 = getelementptr i64, i64* %r2, i32 1
-store i64 %r43, i64* %r45
-%r46 = lshr i192 %r42, 64
-%r47 = trunc i192 %r46 to i64
-%r49 = getelementptr i64, i64* %r2, i32 2
-store i64 %r47, i64* %r49
-%r50 = lshr i256 %r37, 192
-%r51 = trunc i256 %r50 to i64
-ret i64 %r51
+%r33 = zext i64 %r32 to i128
+%r34 = shl i128 %r33, 64
+%r35 = or i128 %r29, %r34
+%r36 = zext i128 %r35 to i192
+%r38 = getelementptr i64, i64* %r4, i32 2
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i192
+%r41 = shl i192 %r40, 128
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i256
+%r45 = getelementptr i64, i64* %r4, i32 3
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i256
+%r48 = shl i256 %r47, 192
+%r49 = or i256 %r43, %r48
+%r50 = zext i256 %r49 to i320
+%r51 = add i320 %r27, %r50
+%r52 = trunc i320 %r51 to i256
+%r54 = getelementptr i64, i64* %r2, i32 0
+%r55 = trunc i256 %r52 to i64
+store i64 %r55, i64* %r54
+%r56 = lshr i256 %r52, 64
+%r58 = getelementptr i64, i64* %r2, i32 1
+%r59 = trunc i256 %r56 to i64
+store i64 %r59, i64* %r58
+%r60 = lshr i256 %r56, 64
+%r62 = getelementptr i64, i64* %r2, i32 2
+%r63 = trunc i256 %r60 to i64
+store i64 %r63, i64* %r62
+%r64 = lshr i256 %r60, 64
+%r66 = getelementptr i64, i64* %r2, i32 3
+%r67 = trunc i256 %r64 to i64
+store i64 %r67, i64* %r66
+%r68 = lshr i320 %r51, 256
+%r69 = trunc i320 %r68 to i64
+ret i64 %r69
 }
-define i64 @mcl_fp_subPre3L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define i64 @mcl_fp_subPre4L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r3
 %r6 = zext i64 %r5 to i128
@@ -1660,39 +2096,55 @@ define i64 @mcl_fp_subPre3L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias
 %r18 = shl i192 %r17, 128
 %r19 = or i192 %r13, %r18
 %r20 = zext i192 %r19 to i256
-%r21 = load i64, i64* %r4
-%r22 = zext i64 %r21 to i128
-%r24 = getelementptr i64, i64* %r4, i32 1
-%r25 = load i64, i64* %r24
-%r26 = zext i64 %r25 to i128
-%r27 = shl i128 %r26, 64
-%r28 = or i128 %r22, %r27
-%r29 = zext i128 %r28 to i192
-%r31 = getelementptr i64, i64* %r4, i32 2
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r28 = load i64, i64* %r4
+%r29 = zext i64 %r28 to i128
+%r31 = getelementptr i64, i64* %r4, i32 1
 %r32 = load i64, i64* %r31
-%r33 = zext i64 %r32 to i192
-%r34 = shl i192 %r33, 128
-%r35 = or i192 %r29, %r34
-%r36 = zext i192 %r35 to i256
-%r37 = sub i256 %r20, %r36
-%r38 = trunc i256 %r37 to i192
-%r39 = trunc i192 %r38 to i64
-%r41 = getelementptr i64, i64* %r2, i32 0
-store i64 %r39, i64* %r41
-%r42 = lshr i192 %r38, 64
-%r43 = trunc i192 %r42 to i64
-%r45 = getelementptr i64, i64* %r2, i32 1
-store i64 %r43, i64* %r45
-%r46 = lshr i192 %r42, 64
-%r47 = trunc i192 %r46 to i64
-%r49 = getelementptr i64, i64* %r2, i32 2
-store i64 %r47, i64* %r49
-%r50 = lshr i256 %r37, 192
-%r51 = trunc i256 %r50 to i64
-%r53 = and i64 %r51, 1
-ret i64 %r53
+%r33 = zext i64 %r32 to i128
+%r34 = shl i128 %r33, 64
+%r35 = or i128 %r29, %r34
+%r36 = zext i128 %r35 to i192
+%r38 = getelementptr i64, i64* %r4, i32 2
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i192
+%r41 = shl i192 %r40, 128
+%r42 = or i192 %r36, %r41
+%r43 = zext i192 %r42 to i256
+%r45 = getelementptr i64, i64* %r4, i32 3
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i256
+%r48 = shl i256 %r47, 192
+%r49 = or i256 %r43, %r48
+%r50 = zext i256 %r49 to i320
+%r51 = sub i320 %r27, %r50
+%r52 = trunc i320 %r51 to i256
+%r54 = getelementptr i64, i64* %r2, i32 0
+%r55 = trunc i256 %r52 to i64
+store i64 %r55, i64* %r54
+%r56 = lshr i256 %r52, 64
+%r58 = getelementptr i64, i64* %r2, i32 1
+%r59 = trunc i256 %r56 to i64
+store i64 %r59, i64* %r58
+%r60 = lshr i256 %r56, 64
+%r62 = getelementptr i64, i64* %r2, i32 2
+%r63 = trunc i256 %r60 to i64
+store i64 %r63, i64* %r62
+%r64 = lshr i256 %r60, 64
+%r66 = getelementptr i64, i64* %r2, i32 3
+%r67 = trunc i256 %r64 to i64
+store i64 %r67, i64* %r66
+%r69 = lshr i320 %r51, 256
+%r70 = trunc i320 %r69 to i64
+%r71 = and i64 %r70, 1
+ret i64 %r71
 }
-define void @mcl_fp_shr1_3L(i64* noalias  %r1, i64* noalias  %r2)
+define void @mcl_fp_shr1_4L(i64* noalias  %r1, i64* noalias  %r2)
 {
 %r3 = load i64, i64* %r2
 %r4 = zext i64 %r3 to i128
@@ -1707,21 +2159,31 @@ define void @mcl_fp_shr1_3L(i64* noalias  %r1, i64* noalias  %r2)
 %r15 = zext i64 %r14 to i192
 %r16 = shl i192 %r15, 128
 %r17 = or i192 %r11, %r16
-%r18 = lshr i192 %r17, 1
-%r19 = trunc i192 %r18 to i64
-%r21 = getelementptr i64, i64* %r1, i32 0
-store i64 %r19, i64* %r21
-%r22 = lshr i192 %r18, 64
-%r23 = trunc i192 %r22 to i64
-%r25 = getelementptr i64, i64* %r1, i32 1
-store i64 %r23, i64* %r25
-%r26 = lshr i192 %r22, 64
-%r27 = trunc i192 %r26 to i64
-%r29 = getelementptr i64, i64* %r1, i32 2
-store i64 %r27, i64* %r29
+%r18 = zext i192 %r17 to i256
+%r20 = getelementptr i64, i64* %r2, i32 3
+%r21 = load i64, i64* %r20
+%r22 = zext i64 %r21 to i256
+%r23 = shl i256 %r22, 192
+%r24 = or i256 %r18, %r23
+%r25 = lshr i256 %r24, 1
+%r27 = getelementptr i64, i64* %r1, i32 0
+%r28 = trunc i256 %r25 to i64
+store i64 %r28, i64* %r27
+%r29 = lshr i256 %r25, 64
+%r31 = getelementptr i64, i64* %r1, i32 1
+%r32 = trunc i256 %r29 to i64
+store i64 %r32, i64* %r31
+%r33 = lshr i256 %r29, 64
+%r35 = getelementptr i64, i64* %r1, i32 2
+%r36 = trunc i256 %r33 to i64
+store i64 %r36, i64* %r35
+%r37 = lshr i256 %r33, 64
+%r39 = getelementptr i64, i64* %r1, i32 3
+%r40 = trunc i256 %r37 to i64
+store i64 %r40, i64* %r39
 ret void
 }
-define void @mcl_fp_add3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_add4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -1736,70 +2198,96 @@ define void @mcl_fp_add3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r
 %r17 = zext i64 %r16 to i192
 %r18 = shl i192 %r17, 128
 %r19 = or i192 %r13, %r18
-%r20 = load i64, i64* %r3
-%r21 = zext i64 %r20 to i128
-%r23 = getelementptr i64, i64* %r3, i32 1
-%r24 = load i64, i64* %r23
-%r25 = zext i64 %r24 to i128
-%r26 = shl i128 %r25, 64
-%r27 = or i128 %r21, %r26
-%r28 = zext i128 %r27 to i192
-%r30 = getelementptr i64, i64* %r3, i32 2
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = load i64, i64* %r3
+%r28 = zext i64 %r27 to i128
+%r30 = getelementptr i64, i64* %r3, i32 1
 %r31 = load i64, i64* %r30
-%r32 = zext i64 %r31 to i192
-%r33 = shl i192 %r32, 128
-%r34 = or i192 %r28, %r33
-%r35 = zext i192 %r19 to i256
-%r36 = zext i192 %r34 to i256
-%r37 = add i256 %r35, %r36
-%r38 = trunc i256 %r37 to i192
-%r39 = trunc i192 %r38 to i64
-%r41 = getelementptr i64, i64* %r1, i32 0
-store i64 %r39, i64* %r41
-%r42 = lshr i192 %r38, 64
-%r43 = trunc i192 %r42 to i64
-%r45 = getelementptr i64, i64* %r1, i32 1
-store i64 %r43, i64* %r45
-%r46 = lshr i192 %r42, 64
-%r47 = trunc i192 %r46 to i64
-%r49 = getelementptr i64, i64* %r1, i32 2
-store i64 %r47, i64* %r49
-%r50 = load i64, i64* %r4
-%r51 = zext i64 %r50 to i128
-%r53 = getelementptr i64, i64* %r4, i32 1
-%r54 = load i64, i64* %r53
-%r55 = zext i64 %r54 to i128
-%r56 = shl i128 %r55, 64
-%r57 = or i128 %r51, %r56
-%r58 = zext i128 %r57 to i192
-%r60 = getelementptr i64, i64* %r4, i32 2
-%r61 = load i64, i64* %r60
-%r62 = zext i64 %r61 to i192
-%r63 = shl i192 %r62, 128
-%r64 = or i192 %r58, %r63
-%r65 = zext i192 %r64 to i256
-%r66 = sub i256 %r37, %r65
-%r67 = lshr i256 %r66, 192
-%r68 = trunc i256 %r67 to i1
-br i1%r68, label %carry, label %nocarry
-nocarry:
-%r69 = trunc i256 %r66 to i192
-%r70 = trunc i192 %r69 to i64
-%r72 = getelementptr i64, i64* %r1, i32 0
-store i64 %r70, i64* %r72
-%r73 = lshr i192 %r69, 64
-%r74 = trunc i192 %r73 to i64
-%r76 = getelementptr i64, i64* %r1, i32 1
-store i64 %r74, i64* %r76
-%r77 = lshr i192 %r73, 64
-%r78 = trunc i192 %r77 to i64
-%r80 = getelementptr i64, i64* %r1, i32 2
-store i64 %r78, i64* %r80
-ret void
-carry:
-ret void
+%r32 = zext i64 %r31 to i128
+%r33 = shl i128 %r32, 64
+%r34 = or i128 %r28, %r33
+%r35 = zext i128 %r34 to i192
+%r37 = getelementptr i64, i64* %r3, i32 2
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i192
+%r40 = shl i192 %r39, 128
+%r41 = or i192 %r35, %r40
+%r42 = zext i192 %r41 to i256
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i256
+%r47 = shl i256 %r46, 192
+%r48 = or i256 %r42, %r47
+%r49 = zext i256 %r26 to i320
+%r50 = zext i256 %r48 to i320
+%r51 = add i320 %r49, %r50
+%r52 = trunc i320 %r51 to i256
+%r54 = getelementptr i64, i64* %r1, i32 0
+%r55 = trunc i256 %r52 to i64
+store i64 %r55, i64* %r54
+%r56 = lshr i256 %r52, 64
+%r58 = getelementptr i64, i64* %r1, i32 1
+%r59 = trunc i256 %r56 to i64
+store i64 %r59, i64* %r58
+%r60 = lshr i256 %r56, 64
+%r62 = getelementptr i64, i64* %r1, i32 2
+%r63 = trunc i256 %r60 to i64
+store i64 %r63, i64* %r62
+%r64 = lshr i256 %r60, 64
+%r66 = getelementptr i64, i64* %r1, i32 3
+%r67 = trunc i256 %r64 to i64
+store i64 %r67, i64* %r66
+%r68 = load i64, i64* %r4
+%r69 = zext i64 %r68 to i128
+%r71 = getelementptr i64, i64* %r4, i32 1
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i128
+%r74 = shl i128 %r73, 64
+%r75 = or i128 %r69, %r74
+%r76 = zext i128 %r75 to i192
+%r78 = getelementptr i64, i64* %r4, i32 2
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i192
+%r81 = shl i192 %r80, 128
+%r82 = or i192 %r76, %r81
+%r83 = zext i192 %r82 to i256
+%r85 = getelementptr i64, i64* %r4, i32 3
+%r86 = load i64, i64* %r85
+%r87 = zext i64 %r86 to i256
+%r88 = shl i256 %r87, 192
+%r89 = or i256 %r83, %r88
+%r90 = zext i256 %r89 to i320
+%r91 = sub i320 %r51, %r90
+%r92 = lshr i320 %r91, 256
+%r93 = trunc i320 %r92 to i1
+br i1%r93, label %carry, label %nocarry
+nocarry:
+%r94 = trunc i320 %r91 to i256
+%r96 = getelementptr i64, i64* %r1, i32 0
+%r97 = trunc i256 %r94 to i64
+store i64 %r97, i64* %r96
+%r98 = lshr i256 %r94, 64
+%r100 = getelementptr i64, i64* %r1, i32 1
+%r101 = trunc i256 %r98 to i64
+store i64 %r101, i64* %r100
+%r102 = lshr i256 %r98, 64
+%r104 = getelementptr i64, i64* %r1, i32 2
+%r105 = trunc i256 %r102 to i64
+store i64 %r105, i64* %r104
+%r106 = lshr i256 %r102, 64
+%r108 = getelementptr i64, i64* %r1, i32 3
+%r109 = trunc i256 %r106 to i64
+store i64 %r109, i64* %r108
+ret void
+carry:
+ret void
 }
-define void @mcl_fp_addNF3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_addNF4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -1814,51 +2302,73 @@ define void @mcl_fp_addNF3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r17 = zext i64 %r16 to i192
 %r18 = shl i192 %r17, 128
 %r19 = or i192 %r13, %r18
-%r20 = load i64, i64* %r3
-%r21 = zext i64 %r20 to i128
-%r23 = getelementptr i64, i64* %r3, i32 1
-%r24 = load i64, i64* %r23
-%r25 = zext i64 %r24 to i128
-%r26 = shl i128 %r25, 64
-%r27 = or i128 %r21, %r26
-%r28 = zext i128 %r27 to i192
-%r30 = getelementptr i64, i64* %r3, i32 2
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = load i64, i64* %r3
+%r28 = zext i64 %r27 to i128
+%r30 = getelementptr i64, i64* %r3, i32 1
 %r31 = load i64, i64* %r30
-%r32 = zext i64 %r31 to i192
-%r33 = shl i192 %r32, 128
-%r34 = or i192 %r28, %r33
-%r35 = add i192 %r19, %r34
-%r36 = load i64, i64* %r4
-%r37 = zext i64 %r36 to i128
-%r39 = getelementptr i64, i64* %r4, i32 1
-%r40 = load i64, i64* %r39
-%r41 = zext i64 %r40 to i128
-%r42 = shl i128 %r41, 64
-%r43 = or i128 %r37, %r42
-%r44 = zext i128 %r43 to i192
-%r46 = getelementptr i64, i64* %r4, i32 2
-%r47 = load i64, i64* %r46
-%r48 = zext i64 %r47 to i192
-%r49 = shl i192 %r48, 128
-%r50 = or i192 %r44, %r49
-%r51 = sub i192 %r35, %r50
-%r52 = lshr i192 %r51, 191
-%r53 = trunc i192 %r52 to i1
-%r54 = select i1 %r53, i192 %r35, i192 %r51
-%r55 = trunc i192 %r54 to i64
-%r57 = getelementptr i64, i64* %r1, i32 0
-store i64 %r55, i64* %r57
-%r58 = lshr i192 %r54, 64
-%r59 = trunc i192 %r58 to i64
-%r61 = getelementptr i64, i64* %r1, i32 1
-store i64 %r59, i64* %r61
-%r62 = lshr i192 %r58, 64
-%r63 = trunc i192 %r62 to i64
-%r65 = getelementptr i64, i64* %r1, i32 2
-store i64 %r63, i64* %r65
+%r32 = zext i64 %r31 to i128
+%r33 = shl i128 %r32, 64
+%r34 = or i128 %r28, %r33
+%r35 = zext i128 %r34 to i192
+%r37 = getelementptr i64, i64* %r3, i32 2
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i192
+%r40 = shl i192 %r39, 128
+%r41 = or i192 %r35, %r40
+%r42 = zext i192 %r41 to i256
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i256
+%r47 = shl i256 %r46, 192
+%r48 = or i256 %r42, %r47
+%r49 = add i256 %r26, %r48
+%r50 = load i64, i64* %r4
+%r51 = zext i64 %r50 to i128
+%r53 = getelementptr i64, i64* %r4, i32 1
+%r54 = load i64, i64* %r53
+%r55 = zext i64 %r54 to i128
+%r56 = shl i128 %r55, 64
+%r57 = or i128 %r51, %r56
+%r58 = zext i128 %r57 to i192
+%r60 = getelementptr i64, i64* %r4, i32 2
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i192
+%r63 = shl i192 %r62, 128
+%r64 = or i192 %r58, %r63
+%r65 = zext i192 %r64 to i256
+%r67 = getelementptr i64, i64* %r4, i32 3
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i256
+%r70 = shl i256 %r69, 192
+%r71 = or i256 %r65, %r70
+%r72 = sub i256 %r49, %r71
+%r73 = lshr i256 %r72, 255
+%r74 = trunc i256 %r73 to i1
+%r75 = select i1 %r74, i256 %r49, i256 %r72
+%r77 = getelementptr i64, i64* %r1, i32 0
+%r78 = trunc i256 %r75 to i64
+store i64 %r78, i64* %r77
+%r79 = lshr i256 %r75, 64
+%r81 = getelementptr i64, i64* %r1, i32 1
+%r82 = trunc i256 %r79 to i64
+store i64 %r82, i64* %r81
+%r83 = lshr i256 %r79, 64
+%r85 = getelementptr i64, i64* %r1, i32 2
+%r86 = trunc i256 %r83 to i64
+store i64 %r86, i64* %r85
+%r87 = lshr i256 %r83, 64
+%r89 = getelementptr i64, i64* %r1, i32 3
+%r90 = trunc i256 %r87 to i64
+store i64 %r90, i64* %r89
 ret void
 }
-define void @mcl_fp_sub3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_sub4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -1873,68 +2383,94 @@ define void @mcl_fp_sub3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r
 %r17 = zext i64 %r16 to i192
 %r18 = shl i192 %r17, 128
 %r19 = or i192 %r13, %r18
-%r20 = load i64, i64* %r3
-%r21 = zext i64 %r20 to i128
-%r23 = getelementptr i64, i64* %r3, i32 1
-%r24 = load i64, i64* %r23
-%r25 = zext i64 %r24 to i128
-%r26 = shl i128 %r25, 64
-%r27 = or i128 %r21, %r26
-%r28 = zext i128 %r27 to i192
-%r30 = getelementptr i64, i64* %r3, i32 2
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = load i64, i64* %r3
+%r28 = zext i64 %r27 to i128
+%r30 = getelementptr i64, i64* %r3, i32 1
 %r31 = load i64, i64* %r30
-%r32 = zext i64 %r31 to i192
-%r33 = shl i192 %r32, 128
-%r34 = or i192 %r28, %r33
-%r35 = zext i192 %r19 to i256
-%r36 = zext i192 %r34 to i256
-%r37 = sub i256 %r35, %r36
-%r38 = trunc i256 %r37 to i192
-%r39 = lshr i256 %r37, 192
-%r40 = trunc i256 %r39 to i1
-%r41 = trunc i192 %r38 to i64
-%r43 = getelementptr i64, i64* %r1, i32 0
-store i64 %r41, i64* %r43
-%r44 = lshr i192 %r38, 64
-%r45 = trunc i192 %r44 to i64
-%r47 = getelementptr i64, i64* %r1, i32 1
-store i64 %r45, i64* %r47
-%r48 = lshr i192 %r44, 64
-%r49 = trunc i192 %r48 to i64
-%r51 = getelementptr i64, i64* %r1, i32 2
-store i64 %r49, i64* %r51
-br i1%r40, label %carry, label %nocarry
+%r32 = zext i64 %r31 to i128
+%r33 = shl i128 %r32, 64
+%r34 = or i128 %r28, %r33
+%r35 = zext i128 %r34 to i192
+%r37 = getelementptr i64, i64* %r3, i32 2
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i192
+%r40 = shl i192 %r39, 128
+%r41 = or i192 %r35, %r40
+%r42 = zext i192 %r41 to i256
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i256
+%r47 = shl i256 %r46, 192
+%r48 = or i256 %r42, %r47
+%r49 = zext i256 %r26 to i320
+%r50 = zext i256 %r48 to i320
+%r51 = sub i320 %r49, %r50
+%r52 = trunc i320 %r51 to i256
+%r53 = lshr i320 %r51, 256
+%r54 = trunc i320 %r53 to i1
+%r56 = getelementptr i64, i64* %r1, i32 0
+%r57 = trunc i256 %r52 to i64
+store i64 %r57, i64* %r56
+%r58 = lshr i256 %r52, 64
+%r60 = getelementptr i64, i64* %r1, i32 1
+%r61 = trunc i256 %r58 to i64
+store i64 %r61, i64* %r60
+%r62 = lshr i256 %r58, 64
+%r64 = getelementptr i64, i64* %r1, i32 2
+%r65 = trunc i256 %r62 to i64
+store i64 %r65, i64* %r64
+%r66 = lshr i256 %r62, 64
+%r68 = getelementptr i64, i64* %r1, i32 3
+%r69 = trunc i256 %r66 to i64
+store i64 %r69, i64* %r68
+br i1%r54, label %carry, label %nocarry
 nocarry:
 ret void
 carry:
-%r52 = load i64, i64* %r4
-%r53 = zext i64 %r52 to i128
-%r55 = getelementptr i64, i64* %r4, i32 1
-%r56 = load i64, i64* %r55
-%r57 = zext i64 %r56 to i128
-%r58 = shl i128 %r57, 64
-%r59 = or i128 %r53, %r58
-%r60 = zext i128 %r59 to i192
-%r62 = getelementptr i64, i64* %r4, i32 2
-%r63 = load i64, i64* %r62
-%r64 = zext i64 %r63 to i192
-%r65 = shl i192 %r64, 128
-%r66 = or i192 %r60, %r65
-%r67 = add i192 %r38, %r66
-%r68 = trunc i192 %r67 to i64
-%r70 = getelementptr i64, i64* %r1, i32 0
-store i64 %r68, i64* %r70
-%r71 = lshr i192 %r67, 64
-%r72 = trunc i192 %r71 to i64
-%r74 = getelementptr i64, i64* %r1, i32 1
-store i64 %r72, i64* %r74
-%r75 = lshr i192 %r71, 64
-%r76 = trunc i192 %r75 to i64
-%r78 = getelementptr i64, i64* %r1, i32 2
-store i64 %r76, i64* %r78
+%r70 = load i64, i64* %r4
+%r71 = zext i64 %r70 to i128
+%r73 = getelementptr i64, i64* %r4, i32 1
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i128
+%r76 = shl i128 %r75, 64
+%r77 = or i128 %r71, %r76
+%r78 = zext i128 %r77 to i192
+%r80 = getelementptr i64, i64* %r4, i32 2
+%r81 = load i64, i64* %r80
+%r82 = zext i64 %r81 to i192
+%r83 = shl i192 %r82, 128
+%r84 = or i192 %r78, %r83
+%r85 = zext i192 %r84 to i256
+%r87 = getelementptr i64, i64* %r4, i32 3
+%r88 = load i64, i64* %r87
+%r89 = zext i64 %r88 to i256
+%r90 = shl i256 %r89, 192
+%r91 = or i256 %r85, %r90
+%r92 = add i256 %r52, %r91
+%r94 = getelementptr i64, i64* %r1, i32 0
+%r95 = trunc i256 %r92 to i64
+store i64 %r95, i64* %r94
+%r96 = lshr i256 %r92, 64
+%r98 = getelementptr i64, i64* %r1, i32 1
+%r99 = trunc i256 %r96 to i64
+store i64 %r99, i64* %r98
+%r100 = lshr i256 %r96, 64
+%r102 = getelementptr i64, i64* %r1, i32 2
+%r103 = trunc i256 %r100 to i64
+store i64 %r103, i64* %r102
+%r104 = lshr i256 %r100, 64
+%r106 = getelementptr i64, i64* %r1, i32 3
+%r107 = trunc i256 %r104 to i64
+store i64 %r107, i64* %r106
 ret void
 }
-define void @mcl_fp_subNF3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_subNF4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -1949,51 +2485,73 @@ define void @mcl_fp_subNF3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r17 = zext i64 %r16 to i192
 %r18 = shl i192 %r17, 128
 %r19 = or i192 %r13, %r18
-%r20 = load i64, i64* %r3
-%r21 = zext i64 %r20 to i128
-%r23 = getelementptr i64, i64* %r3, i32 1
-%r24 = load i64, i64* %r23
-%r25 = zext i64 %r24 to i128
-%r26 = shl i128 %r25, 64
-%r27 = or i128 %r21, %r26
-%r28 = zext i128 %r27 to i192
-%r30 = getelementptr i64, i64* %r3, i32 2
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = load i64, i64* %r3
+%r28 = zext i64 %r27 to i128
+%r30 = getelementptr i64, i64* %r3, i32 1
 %r31 = load i64, i64* %r30
-%r32 = zext i64 %r31 to i192
-%r33 = shl i192 %r32, 128
-%r34 = or i192 %r28, %r33
-%r35 = sub i192 %r19, %r34
-%r36 = lshr i192 %r35, 191
-%r37 = trunc i192 %r36 to i1
-%r38 = load i64, i64* %r4
-%r39 = zext i64 %r38 to i128
-%r41 = getelementptr i64, i64* %r4, i32 1
-%r42 = load i64, i64* %r41
-%r43 = zext i64 %r42 to i128
-%r44 = shl i128 %r43, 64
-%r45 = or i128 %r39, %r44
-%r46 = zext i128 %r45 to i192
-%r48 = getelementptr i64, i64* %r4, i32 2
-%r49 = load i64, i64* %r48
-%r50 = zext i64 %r49 to i192
-%r51 = shl i192 %r50, 128
-%r52 = or i192 %r46, %r51
-%r54 = select i1 %r37, i192 %r52, i192 0
-%r55 = add i192 %r35, %r54
-%r56 = trunc i192 %r55 to i64
-%r58 = getelementptr i64, i64* %r1, i32 0
-store i64 %r56, i64* %r58
-%r59 = lshr i192 %r55, 64
-%r60 = trunc i192 %r59 to i64
-%r62 = getelementptr i64, i64* %r1, i32 1
-store i64 %r60, i64* %r62
-%r63 = lshr i192 %r59, 64
-%r64 = trunc i192 %r63 to i64
-%r66 = getelementptr i64, i64* %r1, i32 2
-store i64 %r64, i64* %r66
+%r32 = zext i64 %r31 to i128
+%r33 = shl i128 %r32, 64
+%r34 = or i128 %r28, %r33
+%r35 = zext i128 %r34 to i192
+%r37 = getelementptr i64, i64* %r3, i32 2
+%r38 = load i64, i64* %r37
+%r39 = zext i64 %r38 to i192
+%r40 = shl i192 %r39, 128
+%r41 = or i192 %r35, %r40
+%r42 = zext i192 %r41 to i256
+%r44 = getelementptr i64, i64* %r3, i32 3
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i256
+%r47 = shl i256 %r46, 192
+%r48 = or i256 %r42, %r47
+%r49 = sub i256 %r26, %r48
+%r50 = lshr i256 %r49, 255
+%r51 = trunc i256 %r50 to i1
+%r52 = load i64, i64* %r4
+%r53 = zext i64 %r52 to i128
+%r55 = getelementptr i64, i64* %r4, i32 1
+%r56 = load i64, i64* %r55
+%r57 = zext i64 %r56 to i128
+%r58 = shl i128 %r57, 64
+%r59 = or i128 %r53, %r58
+%r60 = zext i128 %r59 to i192
+%r62 = getelementptr i64, i64* %r4, i32 2
+%r63 = load i64, i64* %r62
+%r64 = zext i64 %r63 to i192
+%r65 = shl i192 %r64, 128
+%r66 = or i192 %r60, %r65
+%r67 = zext i192 %r66 to i256
+%r69 = getelementptr i64, i64* %r4, i32 3
+%r70 = load i64, i64* %r69
+%r71 = zext i64 %r70 to i256
+%r72 = shl i256 %r71, 192
+%r73 = or i256 %r67, %r72
+%r75 = select i1 %r51, i256 %r73, i256 0
+%r76 = add i256 %r49, %r75
+%r78 = getelementptr i64, i64* %r1, i32 0
+%r79 = trunc i256 %r76 to i64
+store i64 %r79, i64* %r78
+%r80 = lshr i256 %r76, 64
+%r82 = getelementptr i64, i64* %r1, i32 1
+%r83 = trunc i256 %r80 to i64
+store i64 %r83, i64* %r82
+%r84 = lshr i256 %r80, 64
+%r86 = getelementptr i64, i64* %r1, i32 2
+%r87 = trunc i256 %r84 to i64
+store i64 %r87, i64* %r86
+%r88 = lshr i256 %r84, 64
+%r90 = getelementptr i64, i64* %r1, i32 3
+%r91 = trunc i256 %r88 to i64
+store i64 %r91, i64* %r90
 ret void
 }
-define void @mcl_fpDbl_add3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fpDbl_add4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -2026,88 +2584,126 @@ define void @mcl_fpDbl_add3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r38 = zext i64 %r37 to i384
 %r39 = shl i384 %r38, 320
 %r40 = or i384 %r34, %r39
-%r41 = load i64, i64* %r3
-%r42 = zext i64 %r41 to i128
-%r44 = getelementptr i64, i64* %r3, i32 1
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i128
-%r47 = shl i128 %r46, 64
-%r48 = or i128 %r42, %r47
-%r49 = zext i128 %r48 to i192
-%r51 = getelementptr i64, i64* %r3, i32 2
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i192
-%r54 = shl i192 %r53, 128
-%r55 = or i192 %r49, %r54
-%r56 = zext i192 %r55 to i256
-%r58 = getelementptr i64, i64* %r3, i32 3
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
 %r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i256
-%r61 = shl i256 %r60, 192
-%r62 = or i256 %r56, %r61
-%r63 = zext i256 %r62 to i320
-%r65 = getelementptr i64, i64* %r3, i32 4
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
 %r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i320
-%r68 = shl i320 %r67, 256
-%r69 = or i320 %r63, %r68
-%r70 = zext i320 %r69 to i384
-%r72 = getelementptr i64, i64* %r3, i32 5
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
 %r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i384
-%r75 = shl i384 %r74, 320
-%r76 = or i384 %r70, %r75
-%r77 = zext i384 %r40 to i448
-%r78 = zext i384 %r76 to i448
-%r79 = add i448 %r77, %r78
-%r80 = trunc i448 %r79 to i192
-%r81 = trunc i192 %r80 to i64
-%r83 = getelementptr i64, i64* %r1, i32 0
-store i64 %r81, i64* %r83
-%r84 = lshr i192 %r80, 64
-%r85 = trunc i192 %r84 to i64
-%r87 = getelementptr i64, i64* %r1, i32 1
-store i64 %r85, i64* %r87
-%r88 = lshr i192 %r84, 64
-%r89 = trunc i192 %r88 to i64
-%r91 = getelementptr i64, i64* %r1, i32 2
-store i64 %r89, i64* %r91
-%r92 = lshr i448 %r79, 192
-%r93 = trunc i448 %r92 to i256
-%r94 = load i64, i64* %r4
-%r95 = zext i64 %r94 to i128
-%r97 = getelementptr i64, i64* %r4, i32 1
-%r98 = load i64, i64* %r97
-%r99 = zext i64 %r98 to i128
-%r100 = shl i128 %r99, 64
-%r101 = or i128 %r95, %r100
-%r102 = zext i128 %r101 to i192
-%r104 = getelementptr i64, i64* %r4, i32 2
-%r105 = load i64, i64* %r104
-%r106 = zext i64 %r105 to i192
-%r107 = shl i192 %r106, 128
-%r108 = or i192 %r102, %r107
-%r109 = zext i192 %r108 to i256
-%r110 = sub i256 %r93, %r109
-%r111 = lshr i256 %r110, 192
-%r112 = trunc i256 %r111 to i1
-%r113 = select i1 %r112, i256 %r93, i256 %r110
-%r114 = trunc i256 %r113 to i192
-%r116 = getelementptr i64, i64* %r1, i32 3
-%r117 = trunc i192 %r114 to i64
-%r119 = getelementptr i64, i64* %r116, i32 0
-store i64 %r117, i64* %r119
-%r120 = lshr i192 %r114, 64
-%r121 = trunc i192 %r120 to i64
-%r123 = getelementptr i64, i64* %r116, i32 1
-store i64 %r121, i64* %r123
-%r124 = lshr i192 %r120, 64
-%r125 = trunc i192 %r124 to i64
-%r127 = getelementptr i64, i64* %r116, i32 2
-store i64 %r125, i64* %r127
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = zext i512 %r54 to i576
+%r106 = zext i512 %r104 to i576
+%r107 = add i576 %r105, %r106
+%r108 = trunc i576 %r107 to i256
+%r110 = getelementptr i64, i64* %r1, i32 0
+%r111 = trunc i256 %r108 to i64
+store i64 %r111, i64* %r110
+%r112 = lshr i256 %r108, 64
+%r114 = getelementptr i64, i64* %r1, i32 1
+%r115 = trunc i256 %r112 to i64
+store i64 %r115, i64* %r114
+%r116 = lshr i256 %r112, 64
+%r118 = getelementptr i64, i64* %r1, i32 2
+%r119 = trunc i256 %r116 to i64
+store i64 %r119, i64* %r118
+%r120 = lshr i256 %r116, 64
+%r122 = getelementptr i64, i64* %r1, i32 3
+%r123 = trunc i256 %r120 to i64
+store i64 %r123, i64* %r122
+%r124 = lshr i576 %r107, 256
+%r125 = trunc i576 %r124 to i320
+%r126 = load i64, i64* %r4
+%r127 = zext i64 %r126 to i128
+%r129 = getelementptr i64, i64* %r4, i32 1
+%r130 = load i64, i64* %r129
+%r131 = zext i64 %r130 to i128
+%r132 = shl i128 %r131, 64
+%r133 = or i128 %r127, %r132
+%r134 = zext i128 %r133 to i192
+%r136 = getelementptr i64, i64* %r4, i32 2
+%r137 = load i64, i64* %r136
+%r138 = zext i64 %r137 to i192
+%r139 = shl i192 %r138, 128
+%r140 = or i192 %r134, %r139
+%r141 = zext i192 %r140 to i256
+%r143 = getelementptr i64, i64* %r4, i32 3
+%r144 = load i64, i64* %r143
+%r145 = zext i64 %r144 to i256
+%r146 = shl i256 %r145, 192
+%r147 = or i256 %r141, %r146
+%r148 = zext i256 %r147 to i320
+%r149 = sub i320 %r125, %r148
+%r150 = lshr i320 %r149, 256
+%r151 = trunc i320 %r150 to i1
+%r152 = select i1 %r151, i320 %r125, i320 %r149
+%r153 = trunc i320 %r152 to i256
+%r155 = getelementptr i64, i64* %r1, i32 4
+%r157 = getelementptr i64, i64* %r155, i32 0
+%r158 = trunc i256 %r153 to i64
+store i64 %r158, i64* %r157
+%r159 = lshr i256 %r153, 64
+%r161 = getelementptr i64, i64* %r155, i32 1
+%r162 = trunc i256 %r159 to i64
+store i64 %r162, i64* %r161
+%r163 = lshr i256 %r159, 64
+%r165 = getelementptr i64, i64* %r155, i32 2
+%r166 = trunc i256 %r163 to i64
+store i64 %r166, i64* %r165
+%r167 = lshr i256 %r163, 64
+%r169 = getelementptr i64, i64* %r155, i32 3
+%r170 = trunc i256 %r167 to i64
+store i64 %r170, i64* %r169
 ret void
 }
-define void @mcl_fpDbl_sub3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fpDbl_sub4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -2140,86 +2736,124 @@ define void @mcl_fpDbl_sub3L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r38 = zext i64 %r37 to i384
 %r39 = shl i384 %r38, 320
 %r40 = or i384 %r34, %r39
-%r41 = load i64, i64* %r3
-%r42 = zext i64 %r41 to i128
-%r44 = getelementptr i64, i64* %r3, i32 1
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i128
-%r47 = shl i128 %r46, 64
-%r48 = or i128 %r42, %r47
-%r49 = zext i128 %r48 to i192
-%r51 = getelementptr i64, i64* %r3, i32 2
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i192
-%r54 = shl i192 %r53, 128
-%r55 = or i192 %r49, %r54
-%r56 = zext i192 %r55 to i256
-%r58 = getelementptr i64, i64* %r3, i32 3
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i256
-%r61 = shl i256 %r60, 192
-%r62 = or i256 %r56, %r61
-%r63 = zext i256 %r62 to i320
-%r65 = getelementptr i64, i64* %r3, i32 4
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i320
-%r68 = shl i320 %r67, 256
-%r69 = or i320 %r63, %r68
-%r70 = zext i320 %r69 to i384
-%r72 = getelementptr i64, i64* %r3, i32 5
+%r41 = zext i384 %r40 to i448
+%r43 = getelementptr i64, i64* %r2, i32 6
+%r44 = load i64, i64* %r43
+%r45 = zext i64 %r44 to i448
+%r46 = shl i448 %r45, 384
+%r47 = or i448 %r41, %r46
+%r48 = zext i448 %r47 to i512
+%r50 = getelementptr i64, i64* %r2, i32 7
+%r51 = load i64, i64* %r50
+%r52 = zext i64 %r51 to i512
+%r53 = shl i512 %r52, 448
+%r54 = or i512 %r48, %r53
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
 %r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i384
-%r75 = shl i384 %r74, 320
-%r76 = or i384 %r70, %r75
-%r77 = zext i384 %r40 to i448
-%r78 = zext i384 %r76 to i448
-%r79 = sub i448 %r77, %r78
-%r80 = trunc i448 %r79 to i192
-%r81 = trunc i192 %r80 to i64
-%r83 = getelementptr i64, i64* %r1, i32 0
-store i64 %r81, i64* %r83
-%r84 = lshr i192 %r80, 64
-%r85 = trunc i192 %r84 to i64
-%r87 = getelementptr i64, i64* %r1, i32 1
-store i64 %r85, i64* %r87
-%r88 = lshr i192 %r84, 64
-%r89 = trunc i192 %r88 to i64
-%r91 = getelementptr i64, i64* %r1, i32 2
-store i64 %r89, i64* %r91
-%r92 = lshr i448 %r79, 192
-%r93 = trunc i448 %r92 to i192
-%r94 = lshr i448 %r79, 384
-%r95 = trunc i448 %r94 to i1
-%r96 = load i64, i64* %r4
-%r97 = zext i64 %r96 to i128
-%r99 = getelementptr i64, i64* %r4, i32 1
-%r100 = load i64, i64* %r99
-%r101 = zext i64 %r100 to i128
-%r102 = shl i128 %r101, 64
-%r103 = or i128 %r97, %r102
-%r104 = zext i128 %r103 to i192
-%r106 = getelementptr i64, i64* %r4, i32 2
-%r107 = load i64, i64* %r106
-%r108 = zext i64 %r107 to i192
-%r109 = shl i192 %r108, 128
-%r110 = or i192 %r104, %r109
-%r112 = select i1 %r95, i192 %r110, i192 0
-%r113 = add i192 %r93, %r112
-%r115 = getelementptr i64, i64* %r1, i32 3
-%r116 = trunc i192 %r113 to i64
-%r118 = getelementptr i64, i64* %r115, i32 0
-store i64 %r116, i64* %r118
-%r119 = lshr i192 %r113, 64
-%r120 = trunc i192 %r119 to i64
-%r122 = getelementptr i64, i64* %r115, i32 1
-store i64 %r120, i64* %r122
-%r123 = lshr i192 %r119, 64
-%r124 = trunc i192 %r123 to i64
-%r126 = getelementptr i64, i64* %r115, i32 2
-store i64 %r124, i64* %r126
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
+%r80 = load i64, i64* %r79
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = zext i512 %r54 to i576
+%r106 = zext i512 %r104 to i576
+%r107 = sub i576 %r105, %r106
+%r108 = trunc i576 %r107 to i256
+%r110 = getelementptr i64, i64* %r1, i32 0
+%r111 = trunc i256 %r108 to i64
+store i64 %r111, i64* %r110
+%r112 = lshr i256 %r108, 64
+%r114 = getelementptr i64, i64* %r1, i32 1
+%r115 = trunc i256 %r112 to i64
+store i64 %r115, i64* %r114
+%r116 = lshr i256 %r112, 64
+%r118 = getelementptr i64, i64* %r1, i32 2
+%r119 = trunc i256 %r116 to i64
+store i64 %r119, i64* %r118
+%r120 = lshr i256 %r116, 64
+%r122 = getelementptr i64, i64* %r1, i32 3
+%r123 = trunc i256 %r120 to i64
+store i64 %r123, i64* %r122
+%r124 = lshr i576 %r107, 256
+%r125 = trunc i576 %r124 to i256
+%r126 = lshr i576 %r107, 512
+%r127 = trunc i576 %r126 to i1
+%r128 = load i64, i64* %r4
+%r129 = zext i64 %r128 to i128
+%r131 = getelementptr i64, i64* %r4, i32 1
+%r132 = load i64, i64* %r131
+%r133 = zext i64 %r132 to i128
+%r134 = shl i128 %r133, 64
+%r135 = or i128 %r129, %r134
+%r136 = zext i128 %r135 to i192
+%r138 = getelementptr i64, i64* %r4, i32 2
+%r139 = load i64, i64* %r138
+%r140 = zext i64 %r139 to i192
+%r141 = shl i192 %r140, 128
+%r142 = or i192 %r136, %r141
+%r143 = zext i192 %r142 to i256
+%r145 = getelementptr i64, i64* %r4, i32 3
+%r146 = load i64, i64* %r145
+%r147 = zext i64 %r146 to i256
+%r148 = shl i256 %r147, 192
+%r149 = or i256 %r143, %r148
+%r151 = select i1 %r127, i256 %r149, i256 0
+%r152 = add i256 %r125, %r151
+%r154 = getelementptr i64, i64* %r1, i32 4
+%r156 = getelementptr i64, i64* %r154, i32 0
+%r157 = trunc i256 %r152 to i64
+store i64 %r157, i64* %r156
+%r158 = lshr i256 %r152, 64
+%r160 = getelementptr i64, i64* %r154, i32 1
+%r161 = trunc i256 %r158 to i64
+store i64 %r161, i64* %r160
+%r162 = lshr i256 %r158, 64
+%r164 = getelementptr i64, i64* %r154, i32 2
+%r165 = trunc i256 %r162 to i64
+store i64 %r165, i64* %r164
+%r166 = lshr i256 %r162, 64
+%r168 = getelementptr i64, i64* %r154, i32 3
+%r169 = trunc i256 %r166 to i64
+store i64 %r169, i64* %r168
 ret void
 }
-define i320 @mulPv256x64(i64* noalias  %r2, i64 %r3)
+define i448 @mulPv384x64(i64* noalias  %r2, i64 %r3)
 {
 %r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
 %r6 = trunc i128 %r5 to i64
@@ -2233,328 +2867,486 @@ define i320 @mulPv256x64(i64* noalias  %r2, i64 %r3)
 %r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
 %r18 = trunc i128 %r17 to i64
 %r19 = call i64 @extractHigh64(i128 %r17)
-%r20 = zext i64 %r6 to i128
-%r21 = zext i64 %r10 to i128
-%r22 = shl i128 %r21, 64
-%r23 = or i128 %r20, %r22
-%r24 = zext i128 %r23 to i192
-%r25 = zext i64 %r14 to i192
-%r26 = shl i192 %r25, 128
-%r27 = or i192 %r24, %r26
-%r28 = zext i192 %r27 to i256
-%r29 = zext i64 %r18 to i256
-%r30 = shl i256 %r29, 192
-%r31 = or i256 %r28, %r30
-%r32 = zext i64 %r7 to i128
-%r33 = zext i64 %r11 to i128
-%r34 = shl i128 %r33, 64
-%r35 = or i128 %r32, %r34
-%r36 = zext i128 %r35 to i192
-%r37 = zext i64 %r15 to i192
-%r38 = shl i192 %r37, 128
-%r39 = or i192 %r36, %r38
-%r40 = zext i192 %r39 to i256
-%r41 = zext i64 %r19 to i256
-%r42 = shl i256 %r41, 192
-%r43 = or i256 %r40, %r42
-%r44 = zext i256 %r31 to i320
-%r45 = zext i256 %r43 to i320
-%r46 = shl i320 %r45, 64
-%r47 = add i320 %r44, %r46
-ret i320 %r47
+%r21 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 4)
+%r22 = trunc i128 %r21 to i64
+%r23 = call i64 @extractHigh64(i128 %r21)
+%r25 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 5)
+%r26 = trunc i128 %r25 to i64
+%r27 = call i64 @extractHigh64(i128 %r25)
+%r28 = zext i64 %r6 to i128
+%r29 = zext i64 %r10 to i128
+%r30 = shl i128 %r29, 64
+%r31 = or i128 %r28, %r30
+%r32 = zext i128 %r31 to i192
+%r33 = zext i64 %r14 to i192
+%r34 = shl i192 %r33, 128
+%r35 = or i192 %r32, %r34
+%r36 = zext i192 %r35 to i256
+%r37 = zext i64 %r18 to i256
+%r38 = shl i256 %r37, 192
+%r39 = or i256 %r36, %r38
+%r40 = zext i256 %r39 to i320
+%r41 = zext i64 %r22 to i320
+%r42 = shl i320 %r41, 256
+%r43 = or i320 %r40, %r42
+%r44 = zext i320 %r43 to i384
+%r45 = zext i64 %r26 to i384
+%r46 = shl i384 %r45, 320
+%r47 = or i384 %r44, %r46
+%r48 = zext i64 %r7 to i128
+%r49 = zext i64 %r11 to i128
+%r50 = shl i128 %r49, 64
+%r51 = or i128 %r48, %r50
+%r52 = zext i128 %r51 to i192
+%r53 = zext i64 %r15 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r52, %r54
+%r56 = zext i192 %r55 to i256
+%r57 = zext i64 %r19 to i256
+%r58 = shl i256 %r57, 192
+%r59 = or i256 %r56, %r58
+%r60 = zext i256 %r59 to i320
+%r61 = zext i64 %r23 to i320
+%r62 = shl i320 %r61, 256
+%r63 = or i320 %r60, %r62
+%r64 = zext i320 %r63 to i384
+%r65 = zext i64 %r27 to i384
+%r66 = shl i384 %r65, 320
+%r67 = or i384 %r64, %r66
+%r68 = zext i384 %r47 to i448
+%r69 = zext i384 %r67 to i448
+%r70 = shl i448 %r69, 64
+%r71 = add i448 %r68, %r70
+ret i448 %r71
 }
-define void @mcl_fp_mulUnitPre4L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+define void @mcl_fp_mulUnitPre6L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
 {
-%r4 = call i320 @mulPv256x64(i64* %r2, i64 %r3)
-%r5 = trunc i320 %r4 to i64
-%r7 = getelementptr i64, i64* %r1, i32 0
-store i64 %r5, i64* %r7
-%r8 = lshr i320 %r4, 64
-%r9 = trunc i320 %r8 to i64
-%r11 = getelementptr i64, i64* %r1, i32 1
-store i64 %r9, i64* %r11
-%r12 = lshr i320 %r8, 64
-%r13 = trunc i320 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 2
-store i64 %r13, i64* %r15
-%r16 = lshr i320 %r12, 64
-%r17 = trunc i320 %r16 to i64
-%r19 = getelementptr i64, i64* %r1, i32 3
-store i64 %r17, i64* %r19
-%r20 = lshr i320 %r16, 64
-%r21 = trunc i320 %r20 to i64
-%r23 = getelementptr i64, i64* %r1, i32 4
-store i64 %r21, i64* %r23
+%r4 = call i448 @mulPv384x64(i64* %r2, i64 %r3)
+%r6 = getelementptr i64, i64* %r1, i32 0
+%r7 = trunc i448 %r4 to i64
+store i64 %r7, i64* %r6
+%r8 = lshr i448 %r4, 64
+%r10 = getelementptr i64, i64* %r1, i32 1
+%r11 = trunc i448 %r8 to i64
+store i64 %r11, i64* %r10
+%r12 = lshr i448 %r8, 64
+%r14 = getelementptr i64, i64* %r1, i32 2
+%r15 = trunc i448 %r12 to i64
+store i64 %r15, i64* %r14
+%r16 = lshr i448 %r12, 64
+%r18 = getelementptr i64, i64* %r1, i32 3
+%r19 = trunc i448 %r16 to i64
+store i64 %r19, i64* %r18
+%r20 = lshr i448 %r16, 64
+%r22 = getelementptr i64, i64* %r1, i32 4
+%r23 = trunc i448 %r20 to i64
+store i64 %r23, i64* %r22
+%r24 = lshr i448 %r20, 64
+%r26 = getelementptr i64, i64* %r1, i32 5
+%r27 = trunc i448 %r24 to i64
+store i64 %r27, i64* %r26
+%r28 = lshr i448 %r24, 64
+%r30 = getelementptr i64, i64* %r1, i32 6
+%r31 = trunc i448 %r28 to i64
+store i64 %r31, i64* %r30
 ret void
 }
-define void @mcl_fpDbl_mulPre4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+define void @mcl_fpDbl_mulPre6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
 {
 %r4 = load i64, i64* %r3
-%r5 = call i320 @mulPv256x64(i64* %r2, i64 %r4)
-%r6 = trunc i320 %r5 to i64
+%r5 = call i448 @mulPv384x64(i64* %r2, i64 %r4)
+%r6 = trunc i448 %r5 to i64
 store i64 %r6, i64* %r1
-%r7 = lshr i320 %r5, 64
+%r7 = lshr i448 %r5, 64
 %r9 = getelementptr i64, i64* %r3, i32 1
 %r10 = load i64, i64* %r9
-%r11 = call i320 @mulPv256x64(i64* %r2, i64 %r10)
-%r12 = add i320 %r7, %r11
-%r13 = trunc i320 %r12 to i64
+%r11 = call i448 @mulPv384x64(i64* %r2, i64 %r10)
+%r12 = add i448 %r7, %r11
+%r13 = trunc i448 %r12 to i64
 %r15 = getelementptr i64, i64* %r1, i32 1
 store i64 %r13, i64* %r15
-%r16 = lshr i320 %r12, 64
+%r16 = lshr i448 %r12, 64
 %r18 = getelementptr i64, i64* %r3, i32 2
 %r19 = load i64, i64* %r18
-%r20 = call i320 @mulPv256x64(i64* %r2, i64 %r19)
-%r21 = add i320 %r16, %r20
-%r22 = trunc i320 %r21 to i64
+%r20 = call i448 @mulPv384x64(i64* %r2, i64 %r19)
+%r21 = add i448 %r16, %r20
+%r22 = trunc i448 %r21 to i64
 %r24 = getelementptr i64, i64* %r1, i32 2
 store i64 %r22, i64* %r24
-%r25 = lshr i320 %r21, 64
+%r25 = lshr i448 %r21, 64
 %r27 = getelementptr i64, i64* %r3, i32 3
 %r28 = load i64, i64* %r27
-%r29 = call i320 @mulPv256x64(i64* %r2, i64 %r28)
-%r30 = add i320 %r25, %r29
-%r32 = getelementptr i64, i64* %r1, i32 3
-%r33 = trunc i320 %r30 to i64
-%r35 = getelementptr i64, i64* %r32, i32 0
-store i64 %r33, i64* %r35
-%r36 = lshr i320 %r30, 64
-%r37 = trunc i320 %r36 to i64
-%r39 = getelementptr i64, i64* %r32, i32 1
-store i64 %r37, i64* %r39
-%r40 = lshr i320 %r36, 64
-%r41 = trunc i320 %r40 to i64
-%r43 = getelementptr i64, i64* %r32, i32 2
-store i64 %r41, i64* %r43
-%r44 = lshr i320 %r40, 64
-%r45 = trunc i320 %r44 to i64
-%r47 = getelementptr i64, i64* %r32, i32 3
-store i64 %r45, i64* %r47
-%r48 = lshr i320 %r44, 64
-%r49 = trunc i320 %r48 to i64
-%r51 = getelementptr i64, i64* %r32, i32 4
-store i64 %r49, i64* %r51
+%r29 = call i448 @mulPv384x64(i64* %r2, i64 %r28)
+%r30 = add i448 %r25, %r29
+%r31 = trunc i448 %r30 to i64
+%r33 = getelementptr i64, i64* %r1, i32 3
+store i64 %r31, i64* %r33
+%r34 = lshr i448 %r30, 64
+%r36 = getelementptr i64, i64* %r3, i32 4
+%r37 = load i64, i64* %r36
+%r38 = call i448 @mulPv384x64(i64* %r2, i64 %r37)
+%r39 = add i448 %r34, %r38
+%r40 = trunc i448 %r39 to i64
+%r42 = getelementptr i64, i64* %r1, i32 4
+store i64 %r40, i64* %r42
+%r43 = lshr i448 %r39, 64
+%r45 = getelementptr i64, i64* %r3, i32 5
+%r46 = load i64, i64* %r45
+%r47 = call i448 @mulPv384x64(i64* %r2, i64 %r46)
+%r48 = add i448 %r43, %r47
+%r50 = getelementptr i64, i64* %r1, i32 5
+%r52 = getelementptr i64, i64* %r50, i32 0
+%r53 = trunc i448 %r48 to i64
+store i64 %r53, i64* %r52
+%r54 = lshr i448 %r48, 64
+%r56 = getelementptr i64, i64* %r50, i32 1
+%r57 = trunc i448 %r54 to i64
+store i64 %r57, i64* %r56
+%r58 = lshr i448 %r54, 64
+%r60 = getelementptr i64, i64* %r50, i32 2
+%r61 = trunc i448 %r58 to i64
+store i64 %r61, i64* %r60
+%r62 = lshr i448 %r58, 64
+%r64 = getelementptr i64, i64* %r50, i32 3
+%r65 = trunc i448 %r62 to i64
+store i64 %r65, i64* %r64
+%r66 = lshr i448 %r62, 64
+%r68 = getelementptr i64, i64* %r50, i32 4
+%r69 = trunc i448 %r66 to i64
+store i64 %r69, i64* %r68
+%r70 = lshr i448 %r66, 64
+%r72 = getelementptr i64, i64* %r50, i32 5
+%r73 = trunc i448 %r70 to i64
+store i64 %r73, i64* %r72
+%r74 = lshr i448 %r70, 64
+%r76 = getelementptr i64, i64* %r50, i32 6
+%r77 = trunc i448 %r74 to i64
+store i64 %r77, i64* %r76
 ret void
 }
-define void @mcl_fpDbl_sqrPre4L(i64* noalias  %r1, i64* noalias  %r2)
+define void @mcl_fpDbl_sqrPre6L(i64* noalias  %r1, i64* noalias  %r2)
 {
 %r3 = load i64, i64* %r2
-%r4 = call i320 @mulPv256x64(i64* %r2, i64 %r3)
-%r5 = trunc i320 %r4 to i64
+%r4 = call i448 @mulPv384x64(i64* %r2, i64 %r3)
+%r5 = trunc i448 %r4 to i64
 store i64 %r5, i64* %r1
-%r6 = lshr i320 %r4, 64
+%r6 = lshr i448 %r4, 64
 %r8 = getelementptr i64, i64* %r2, i32 1
 %r9 = load i64, i64* %r8
-%r10 = call i320 @mulPv256x64(i64* %r2, i64 %r9)
-%r11 = add i320 %r6, %r10
-%r12 = trunc i320 %r11 to i64
+%r10 = call i448 @mulPv384x64(i64* %r2, i64 %r9)
+%r11 = add i448 %r6, %r10
+%r12 = trunc i448 %r11 to i64
 %r14 = getelementptr i64, i64* %r1, i32 1
 store i64 %r12, i64* %r14
-%r15 = lshr i320 %r11, 64
+%r15 = lshr i448 %r11, 64
 %r17 = getelementptr i64, i64* %r2, i32 2
 %r18 = load i64, i64* %r17
-%r19 = call i320 @mulPv256x64(i64* %r2, i64 %r18)
-%r20 = add i320 %r15, %r19
-%r21 = trunc i320 %r20 to i64
+%r19 = call i448 @mulPv384x64(i64* %r2, i64 %r18)
+%r20 = add i448 %r15, %r19
+%r21 = trunc i448 %r20 to i64
 %r23 = getelementptr i64, i64* %r1, i32 2
 store i64 %r21, i64* %r23
-%r24 = lshr i320 %r20, 64
+%r24 = lshr i448 %r20, 64
 %r26 = getelementptr i64, i64* %r2, i32 3
 %r27 = load i64, i64* %r26
-%r28 = call i320 @mulPv256x64(i64* %r2, i64 %r27)
-%r29 = add i320 %r24, %r28
-%r31 = getelementptr i64, i64* %r1, i32 3
-%r32 = trunc i320 %r29 to i64
-%r34 = getelementptr i64, i64* %r31, i32 0
-store i64 %r32, i64* %r34
-%r35 = lshr i320 %r29, 64
-%r36 = trunc i320 %r35 to i64
-%r38 = getelementptr i64, i64* %r31, i32 1
-store i64 %r36, i64* %r38
-%r39 = lshr i320 %r35, 64
-%r40 = trunc i320 %r39 to i64
-%r42 = getelementptr i64, i64* %r31, i32 2
-store i64 %r40, i64* %r42
-%r43 = lshr i320 %r39, 64
-%r44 = trunc i320 %r43 to i64
-%r46 = getelementptr i64, i64* %r31, i32 3
-store i64 %r44, i64* %r46
-%r47 = lshr i320 %r43, 64
-%r48 = trunc i320 %r47 to i64
-%r50 = getelementptr i64, i64* %r31, i32 4
-store i64 %r48, i64* %r50
+%r28 = call i448 @mulPv384x64(i64* %r2, i64 %r27)
+%r29 = add i448 %r24, %r28
+%r30 = trunc i448 %r29 to i64
+%r32 = getelementptr i64, i64* %r1, i32 3
+store i64 %r30, i64* %r32
+%r33 = lshr i448 %r29, 64
+%r35 = getelementptr i64, i64* %r2, i32 4
+%r36 = load i64, i64* %r35
+%r37 = call i448 @mulPv384x64(i64* %r2, i64 %r36)
+%r38 = add i448 %r33, %r37
+%r39 = trunc i448 %r38 to i64
+%r41 = getelementptr i64, i64* %r1, i32 4
+store i64 %r39, i64* %r41
+%r42 = lshr i448 %r38, 64
+%r44 = getelementptr i64, i64* %r2, i32 5
+%r45 = load i64, i64* %r44
+%r46 = call i448 @mulPv384x64(i64* %r2, i64 %r45)
+%r47 = add i448 %r42, %r46
+%r49 = getelementptr i64, i64* %r1, i32 5
+%r51 = getelementptr i64, i64* %r49, i32 0
+%r52 = trunc i448 %r47 to i64
+store i64 %r52, i64* %r51
+%r53 = lshr i448 %r47, 64
+%r55 = getelementptr i64, i64* %r49, i32 1
+%r56 = trunc i448 %r53 to i64
+store i64 %r56, i64* %r55
+%r57 = lshr i448 %r53, 64
+%r59 = getelementptr i64, i64* %r49, i32 2
+%r60 = trunc i448 %r57 to i64
+store i64 %r60, i64* %r59
+%r61 = lshr i448 %r57, 64
+%r63 = getelementptr i64, i64* %r49, i32 3
+%r64 = trunc i448 %r61 to i64
+store i64 %r64, i64* %r63
+%r65 = lshr i448 %r61, 64
+%r67 = getelementptr i64, i64* %r49, i32 4
+%r68 = trunc i448 %r65 to i64
+store i64 %r68, i64* %r67
+%r69 = lshr i448 %r65, 64
+%r71 = getelementptr i64, i64* %r49, i32 5
+%r72 = trunc i448 %r69 to i64
+store i64 %r72, i64* %r71
+%r73 = lshr i448 %r69, 64
+%r75 = getelementptr i64, i64* %r49, i32 6
+%r76 = trunc i448 %r73 to i64
+store i64 %r76, i64* %r75
 ret void
 }
-define void @mcl_fp_mont4L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+define void @mcl_fp_mont6L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
 {
 %r6 = getelementptr i64, i64* %r4, i32 -1
 %r7 = load i64, i64* %r6
 %r9 = getelementptr i64, i64* %r3, i32 0
 %r10 = load i64, i64* %r9
-%r11 = call i320 @mulPv256x64(i64* %r2, i64 %r10)
-%r12 = zext i320 %r11 to i384
-%r13 = trunc i320 %r11 to i64
+%r11 = call i448 @mulPv384x64(i64* %r2, i64 %r10)
+%r12 = zext i448 %r11 to i512
+%r13 = trunc i448 %r11 to i64
 %r14 = mul i64 %r13, %r7
-%r15 = call i320 @mulPv256x64(i64* %r4, i64 %r14)
-%r16 = zext i320 %r15 to i384
-%r17 = add i384 %r12, %r16
-%r18 = lshr i384 %r17, 64
+%r15 = call i448 @mulPv384x64(i64* %r4, i64 %r14)
+%r16 = zext i448 %r15 to i512
+%r17 = add i512 %r12, %r16
+%r18 = lshr i512 %r17, 64
 %r20 = getelementptr i64, i64* %r3, i32 1
 %r21 = load i64, i64* %r20
-%r22 = call i320 @mulPv256x64(i64* %r2, i64 %r21)
-%r23 = zext i320 %r22 to i384
-%r24 = add i384 %r18, %r23
-%r25 = trunc i384 %r24 to i64
+%r22 = call i448 @mulPv384x64(i64* %r2, i64 %r21)
+%r23 = zext i448 %r22 to i512
+%r24 = add i512 %r18, %r23
+%r25 = trunc i512 %r24 to i64
 %r26 = mul i64 %r25, %r7
-%r27 = call i320 @mulPv256x64(i64* %r4, i64 %r26)
-%r28 = zext i320 %r27 to i384
-%r29 = add i384 %r24, %r28
-%r30 = lshr i384 %r29, 64
+%r27 = call i448 @mulPv384x64(i64* %r4, i64 %r26)
+%r28 = zext i448 %r27 to i512
+%r29 = add i512 %r24, %r28
+%r30 = lshr i512 %r29, 64
 %r32 = getelementptr i64, i64* %r3, i32 2
 %r33 = load i64, i64* %r32
-%r34 = call i320 @mulPv256x64(i64* %r2, i64 %r33)
-%r35 = zext i320 %r34 to i384
-%r36 = add i384 %r30, %r35
-%r37 = trunc i384 %r36 to i64
+%r34 = call i448 @mulPv384x64(i64* %r2, i64 %r33)
+%r35 = zext i448 %r34 to i512
+%r36 = add i512 %r30, %r35
+%r37 = trunc i512 %r36 to i64
 %r38 = mul i64 %r37, %r7
-%r39 = call i320 @mulPv256x64(i64* %r4, i64 %r38)
-%r40 = zext i320 %r39 to i384
-%r41 = add i384 %r36, %r40
-%r42 = lshr i384 %r41, 64
+%r39 = call i448 @mulPv384x64(i64* %r4, i64 %r38)
+%r40 = zext i448 %r39 to i512
+%r41 = add i512 %r36, %r40
+%r42 = lshr i512 %r41, 64
 %r44 = getelementptr i64, i64* %r3, i32 3
 %r45 = load i64, i64* %r44
-%r46 = call i320 @mulPv256x64(i64* %r2, i64 %r45)
-%r47 = zext i320 %r46 to i384
-%r48 = add i384 %r42, %r47
-%r49 = trunc i384 %r48 to i64
+%r46 = call i448 @mulPv384x64(i64* %r2, i64 %r45)
+%r47 = zext i448 %r46 to i512
+%r48 = add i512 %r42, %r47
+%r49 = trunc i512 %r48 to i64
 %r50 = mul i64 %r49, %r7
-%r51 = call i320 @mulPv256x64(i64* %r4, i64 %r50)
-%r52 = zext i320 %r51 to i384
-%r53 = add i384 %r48, %r52
-%r54 = lshr i384 %r53, 64
-%r55 = trunc i384 %r54 to i320
-%r56 = load i64, i64* %r4
-%r57 = zext i64 %r56 to i128
-%r59 = getelementptr i64, i64* %r4, i32 1
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i128
-%r62 = shl i128 %r61, 64
-%r63 = or i128 %r57, %r62
-%r64 = zext i128 %r63 to i192
-%r66 = getelementptr i64, i64* %r4, i32 2
-%r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i192
-%r69 = shl i192 %r68, 128
-%r70 = or i192 %r64, %r69
-%r71 = zext i192 %r70 to i256
-%r73 = getelementptr i64, i64* %r4, i32 3
-%r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i256
-%r76 = shl i256 %r75, 192
-%r77 = or i256 %r71, %r76
-%r78 = zext i256 %r77 to i320
-%r79 = sub i320 %r55, %r78
-%r80 = lshr i320 %r79, 256
-%r81 = trunc i320 %r80 to i1
-%r82 = select i1 %r81, i320 %r55, i320 %r79
-%r83 = trunc i320 %r82 to i256
-%r84 = trunc i256 %r83 to i64
-%r86 = getelementptr i64, i64* %r1, i32 0
-store i64 %r84, i64* %r86
-%r87 = lshr i256 %r83, 64
-%r88 = trunc i256 %r87 to i64
-%r90 = getelementptr i64, i64* %r1, i32 1
-store i64 %r88, i64* %r90
-%r91 = lshr i256 %r87, 64
-%r92 = trunc i256 %r91 to i64
-%r94 = getelementptr i64, i64* %r1, i32 2
-store i64 %r92, i64* %r94
-%r95 = lshr i256 %r91, 64
-%r96 = trunc i256 %r95 to i64
-%r98 = getelementptr i64, i64* %r1, i32 3
-store i64 %r96, i64* %r98
+%r51 = call i448 @mulPv384x64(i64* %r4, i64 %r50)
+%r52 = zext i448 %r51 to i512
+%r53 = add i512 %r48, %r52
+%r54 = lshr i512 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 4
+%r57 = load i64, i64* %r56
+%r58 = call i448 @mulPv384x64(i64* %r2, i64 %r57)
+%r59 = zext i448 %r58 to i512
+%r60 = add i512 %r54, %r59
+%r61 = trunc i512 %r60 to i64
+%r62 = mul i64 %r61, %r7
+%r63 = call i448 @mulPv384x64(i64* %r4, i64 %r62)
+%r64 = zext i448 %r63 to i512
+%r65 = add i512 %r60, %r64
+%r66 = lshr i512 %r65, 64
+%r68 = getelementptr i64, i64* %r3, i32 5
+%r69 = load i64, i64* %r68
+%r70 = call i448 @mulPv384x64(i64* %r2, i64 %r69)
+%r71 = zext i448 %r70 to i512
+%r72 = add i512 %r66, %r71
+%r73 = trunc i512 %r72 to i64
+%r74 = mul i64 %r73, %r7
+%r75 = call i448 @mulPv384x64(i64* %r4, i64 %r74)
+%r76 = zext i448 %r75 to i512
+%r77 = add i512 %r72, %r76
+%r78 = lshr i512 %r77, 64
+%r79 = trunc i512 %r78 to i448
+%r80 = load i64, i64* %r4
+%r81 = zext i64 %r80 to i128
+%r83 = getelementptr i64, i64* %r4, i32 1
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i128
+%r86 = shl i128 %r85, 64
+%r87 = or i128 %r81, %r86
+%r88 = zext i128 %r87 to i192
+%r90 = getelementptr i64, i64* %r4, i32 2
+%r91 = load i64, i64* %r90
+%r92 = zext i64 %r91 to i192
+%r93 = shl i192 %r92, 128
+%r94 = or i192 %r88, %r93
+%r95 = zext i192 %r94 to i256
+%r97 = getelementptr i64, i64* %r4, i32 3
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i256
+%r100 = shl i256 %r99, 192
+%r101 = or i256 %r95, %r100
+%r102 = zext i256 %r101 to i320
+%r104 = getelementptr i64, i64* %r4, i32 4
+%r105 = load i64, i64* %r104
+%r106 = zext i64 %r105 to i320
+%r107 = shl i320 %r106, 256
+%r108 = or i320 %r102, %r107
+%r109 = zext i320 %r108 to i384
+%r111 = getelementptr i64, i64* %r4, i32 5
+%r112 = load i64, i64* %r111
+%r113 = zext i64 %r112 to i384
+%r114 = shl i384 %r113, 320
+%r115 = or i384 %r109, %r114
+%r116 = zext i384 %r115 to i448
+%r117 = sub i448 %r79, %r116
+%r118 = lshr i448 %r117, 384
+%r119 = trunc i448 %r118 to i1
+%r120 = select i1 %r119, i448 %r79, i448 %r117
+%r121 = trunc i448 %r120 to i384
+%r123 = getelementptr i64, i64* %r1, i32 0
+%r124 = trunc i384 %r121 to i64
+store i64 %r124, i64* %r123
+%r125 = lshr i384 %r121, 64
+%r127 = getelementptr i64, i64* %r1, i32 1
+%r128 = trunc i384 %r125 to i64
+store i64 %r128, i64* %r127
+%r129 = lshr i384 %r125, 64
+%r131 = getelementptr i64, i64* %r1, i32 2
+%r132 = trunc i384 %r129 to i64
+store i64 %r132, i64* %r131
+%r133 = lshr i384 %r129, 64
+%r135 = getelementptr i64, i64* %r1, i32 3
+%r136 = trunc i384 %r133 to i64
+store i64 %r136, i64* %r135
+%r137 = lshr i384 %r133, 64
+%r139 = getelementptr i64, i64* %r1, i32 4
+%r140 = trunc i384 %r137 to i64
+store i64 %r140, i64* %r139
+%r141 = lshr i384 %r137, 64
+%r143 = getelementptr i64, i64* %r1, i32 5
+%r144 = trunc i384 %r141 to i64
+store i64 %r144, i64* %r143
 ret void
 }
-define void @mcl_fp_montNF4L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+define void @mcl_fp_montNF6L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
 {
 %r6 = getelementptr i64, i64* %r4, i32 -1
 %r7 = load i64, i64* %r6
 %r8 = load i64, i64* %r3
-%r9 = call i320 @mulPv256x64(i64* %r2, i64 %r8)
-%r10 = trunc i320 %r9 to i64
+%r9 = call i448 @mulPv384x64(i64* %r2, i64 %r8)
+%r10 = trunc i448 %r9 to i64
 %r11 = mul i64 %r10, %r7
-%r12 = call i320 @mulPv256x64(i64* %r4, i64 %r11)
-%r13 = add i320 %r9, %r12
-%r14 = lshr i320 %r13, 64
+%r12 = call i448 @mulPv384x64(i64* %r4, i64 %r11)
+%r13 = add i448 %r9, %r12
+%r14 = lshr i448 %r13, 64
 %r16 = getelementptr i64, i64* %r3, i32 1
 %r17 = load i64, i64* %r16
-%r18 = call i320 @mulPv256x64(i64* %r2, i64 %r17)
-%r19 = add i320 %r14, %r18
-%r20 = trunc i320 %r19 to i64
+%r18 = call i448 @mulPv384x64(i64* %r2, i64 %r17)
+%r19 = add i448 %r14, %r18
+%r20 = trunc i448 %r19 to i64
 %r21 = mul i64 %r20, %r7
-%r22 = call i320 @mulPv256x64(i64* %r4, i64 %r21)
-%r23 = add i320 %r19, %r22
-%r24 = lshr i320 %r23, 64
+%r22 = call i448 @mulPv384x64(i64* %r4, i64 %r21)
+%r23 = add i448 %r19, %r22
+%r24 = lshr i448 %r23, 64
 %r26 = getelementptr i64, i64* %r3, i32 2
 %r27 = load i64, i64* %r26
-%r28 = call i320 @mulPv256x64(i64* %r2, i64 %r27)
-%r29 = add i320 %r24, %r28
-%r30 = trunc i320 %r29 to i64
+%r28 = call i448 @mulPv384x64(i64* %r2, i64 %r27)
+%r29 = add i448 %r24, %r28
+%r30 = trunc i448 %r29 to i64
 %r31 = mul i64 %r30, %r7
-%r32 = call i320 @mulPv256x64(i64* %r4, i64 %r31)
-%r33 = add i320 %r29, %r32
-%r34 = lshr i320 %r33, 64
+%r32 = call i448 @mulPv384x64(i64* %r4, i64 %r31)
+%r33 = add i448 %r29, %r32
+%r34 = lshr i448 %r33, 64
 %r36 = getelementptr i64, i64* %r3, i32 3
 %r37 = load i64, i64* %r36
-%r38 = call i320 @mulPv256x64(i64* %r2, i64 %r37)
-%r39 = add i320 %r34, %r38
-%r40 = trunc i320 %r39 to i64
+%r38 = call i448 @mulPv384x64(i64* %r2, i64 %r37)
+%r39 = add i448 %r34, %r38
+%r40 = trunc i448 %r39 to i64
 %r41 = mul i64 %r40, %r7
-%r42 = call i320 @mulPv256x64(i64* %r4, i64 %r41)
-%r43 = add i320 %r39, %r42
-%r44 = lshr i320 %r43, 64
-%r45 = trunc i320 %r44 to i256
-%r46 = load i64, i64* %r4
-%r47 = zext i64 %r46 to i128
-%r49 = getelementptr i64, i64* %r4, i32 1
-%r50 = load i64, i64* %r49
-%r51 = zext i64 %r50 to i128
-%r52 = shl i128 %r51, 64
-%r53 = or i128 %r47, %r52
-%r54 = zext i128 %r53 to i192
-%r56 = getelementptr i64, i64* %r4, i32 2
+%r42 = call i448 @mulPv384x64(i64* %r4, i64 %r41)
+%r43 = add i448 %r39, %r42
+%r44 = lshr i448 %r43, 64
+%r46 = getelementptr i64, i64* %r3, i32 4
+%r47 = load i64, i64* %r46
+%r48 = call i448 @mulPv384x64(i64* %r2, i64 %r47)
+%r49 = add i448 %r44, %r48
+%r50 = trunc i448 %r49 to i64
+%r51 = mul i64 %r50, %r7
+%r52 = call i448 @mulPv384x64(i64* %r4, i64 %r51)
+%r53 = add i448 %r49, %r52
+%r54 = lshr i448 %r53, 64
+%r56 = getelementptr i64, i64* %r3, i32 5
 %r57 = load i64, i64* %r56
-%r58 = zext i64 %r57 to i192
-%r59 = shl i192 %r58, 128
-%r60 = or i192 %r54, %r59
-%r61 = zext i192 %r60 to i256
-%r63 = getelementptr i64, i64* %r4, i32 3
-%r64 = load i64, i64* %r63
-%r65 = zext i64 %r64 to i256
-%r66 = shl i256 %r65, 192
-%r67 = or i256 %r61, %r66
-%r68 = sub i256 %r45, %r67
-%r69 = lshr i256 %r68, 255
-%r70 = trunc i256 %r69 to i1
-%r71 = select i1 %r70, i256 %r45, i256 %r68
-%r72 = trunc i256 %r71 to i64
-%r74 = getelementptr i64, i64* %r1, i32 0
-store i64 %r72, i64* %r74
-%r75 = lshr i256 %r71, 64
-%r76 = trunc i256 %r75 to i64
-%r78 = getelementptr i64, i64* %r1, i32 1
-store i64 %r76, i64* %r78
-%r79 = lshr i256 %r75, 64
-%r80 = trunc i256 %r79 to i64
-%r82 = getelementptr i64, i64* %r1, i32 2
-store i64 %r80, i64* %r82
-%r83 = lshr i256 %r79, 64
-%r84 = trunc i256 %r83 to i64
-%r86 = getelementptr i64, i64* %r1, i32 3
-store i64 %r84, i64* %r86
+%r58 = call i448 @mulPv384x64(i64* %r2, i64 %r57)
+%r59 = add i448 %r54, %r58
+%r60 = trunc i448 %r59 to i64
+%r61 = mul i64 %r60, %r7
+%r62 = call i448 @mulPv384x64(i64* %r4, i64 %r61)
+%r63 = add i448 %r59, %r62
+%r64 = lshr i448 %r63, 64
+%r65 = trunc i448 %r64 to i384
+%r66 = load i64, i64* %r4
+%r67 = zext i64 %r66 to i128
+%r69 = getelementptr i64, i64* %r4, i32 1
+%r70 = load i64, i64* %r69
+%r71 = zext i64 %r70 to i128
+%r72 = shl i128 %r71, 64
+%r73 = or i128 %r67, %r72
+%r74 = zext i128 %r73 to i192
+%r76 = getelementptr i64, i64* %r4, i32 2
+%r77 = load i64, i64* %r76
+%r78 = zext i64 %r77 to i192
+%r79 = shl i192 %r78, 128
+%r80 = or i192 %r74, %r79
+%r81 = zext i192 %r80 to i256
+%r83 = getelementptr i64, i64* %r4, i32 3
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i256
+%r86 = shl i256 %r85, 192
+%r87 = or i256 %r81, %r86
+%r88 = zext i256 %r87 to i320
+%r90 = getelementptr i64, i64* %r4, i32 4
+%r91 = load i64, i64* %r90
+%r92 = zext i64 %r91 to i320
+%r93 = shl i320 %r92, 256
+%r94 = or i320 %r88, %r93
+%r95 = zext i320 %r94 to i384
+%r97 = getelementptr i64, i64* %r4, i32 5
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i384
+%r100 = shl i384 %r99, 320
+%r101 = or i384 %r95, %r100
+%r102 = sub i384 %r65, %r101
+%r103 = lshr i384 %r102, 383
+%r104 = trunc i384 %r103 to i1
+%r105 = select i1 %r104, i384 %r65, i384 %r102
+%r107 = getelementptr i64, i64* %r1, i32 0
+%r108 = trunc i384 %r105 to i64
+store i64 %r108, i64* %r107
+%r109 = lshr i384 %r105, 64
+%r111 = getelementptr i64, i64* %r1, i32 1
+%r112 = trunc i384 %r109 to i64
+store i64 %r112, i64* %r111
+%r113 = lshr i384 %r109, 64
+%r115 = getelementptr i64, i64* %r1, i32 2
+%r116 = trunc i384 %r113 to i64
+store i64 %r116, i64* %r115
+%r117 = lshr i384 %r113, 64
+%r119 = getelementptr i64, i64* %r1, i32 3
+%r120 = trunc i384 %r117 to i64
+store i64 %r120, i64* %r119
+%r121 = lshr i384 %r117, 64
+%r123 = getelementptr i64, i64* %r1, i32 4
+%r124 = trunc i384 %r121 to i64
+store i64 %r124, i64* %r123
+%r125 = lshr i384 %r121, 64
+%r127 = getelementptr i64, i64* %r1, i32 5
+%r128 = trunc i384 %r125 to i64
+store i64 %r128, i64* %r127
 ret void
 }
-define void @mcl_fp_montRed4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+define void @mcl_fp_montRed6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
 {
 %r5 = getelementptr i64, i64* %r3, i32 -1
 %r6 = load i64, i64* %r5
@@ -2577,165 +3369,506 @@ define void @mcl_fp_montRed4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r26 = zext i64 %r25 to i256
 %r27 = shl i256 %r26, 192
 %r28 = or i256 %r22, %r27
-%r29 = load i64, i64* %r2
-%r30 = zext i64 %r29 to i128
-%r32 = getelementptr i64, i64* %r2, i32 1
-%r33 = load i64, i64* %r32
-%r34 = zext i64 %r33 to i128
-%r35 = shl i128 %r34, 64
-%r36 = or i128 %r30, %r35
-%r37 = zext i128 %r36 to i192
-%r39 = getelementptr i64, i64* %r2, i32 2
-%r40 = load i64, i64* %r39
-%r41 = zext i64 %r40 to i192
-%r42 = shl i192 %r41, 128
-%r43 = or i192 %r37, %r42
-%r44 = zext i192 %r43 to i256
-%r46 = getelementptr i64, i64* %r2, i32 3
+%r29 = zext i256 %r28 to i320
+%r31 = getelementptr i64, i64* %r3, i32 4
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i320
+%r34 = shl i320 %r33, 256
+%r35 = or i320 %r29, %r34
+%r36 = zext i320 %r35 to i384
+%r38 = getelementptr i64, i64* %r3, i32 5
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i384
+%r41 = shl i384 %r40, 320
+%r42 = or i384 %r36, %r41
+%r43 = load i64, i64* %r2
+%r44 = zext i64 %r43 to i128
+%r46 = getelementptr i64, i64* %r2, i32 1
 %r47 = load i64, i64* %r46
-%r48 = zext i64 %r47 to i256
-%r49 = shl i256 %r48, 192
-%r50 = or i256 %r44, %r49
-%r51 = zext i256 %r50 to i320
-%r53 = getelementptr i64, i64* %r2, i32 4
+%r48 = zext i64 %r47 to i128
+%r49 = shl i128 %r48, 64
+%r50 = or i128 %r44, %r49
+%r51 = zext i128 %r50 to i192
+%r53 = getelementptr i64, i64* %r2, i32 2
 %r54 = load i64, i64* %r53
-%r55 = zext i64 %r54 to i320
-%r56 = shl i320 %r55, 256
-%r57 = or i320 %r51, %r56
-%r58 = zext i320 %r57 to i384
-%r60 = getelementptr i64, i64* %r2, i32 5
+%r55 = zext i64 %r54 to i192
+%r56 = shl i192 %r55, 128
+%r57 = or i192 %r51, %r56
+%r58 = zext i192 %r57 to i256
+%r60 = getelementptr i64, i64* %r2, i32 3
 %r61 = load i64, i64* %r60
-%r62 = zext i64 %r61 to i384
-%r63 = shl i384 %r62, 320
-%r64 = or i384 %r58, %r63
-%r65 = zext i384 %r64 to i448
-%r67 = getelementptr i64, i64* %r2, i32 6
+%r62 = zext i64 %r61 to i256
+%r63 = shl i256 %r62, 192
+%r64 = or i256 %r58, %r63
+%r65 = zext i256 %r64 to i320
+%r67 = getelementptr i64, i64* %r2, i32 4
 %r68 = load i64, i64* %r67
-%r69 = zext i64 %r68 to i448
-%r70 = shl i448 %r69, 384
-%r71 = or i448 %r65, %r70
-%r72 = zext i448 %r71 to i512
-%r74 = getelementptr i64, i64* %r2, i32 7
+%r69 = zext i64 %r68 to i320
+%r70 = shl i320 %r69, 256
+%r71 = or i320 %r65, %r70
+%r72 = zext i320 %r71 to i384
+%r74 = getelementptr i64, i64* %r2, i32 5
 %r75 = load i64, i64* %r74
-%r76 = zext i64 %r75 to i512
-%r77 = shl i512 %r76, 448
-%r78 = or i512 %r72, %r77
-%r79 = zext i512 %r78 to i576
-%r80 = trunc i576 %r79 to i64
-%r81 = mul i64 %r80, %r6
-%r82 = call i320 @mulPv256x64(i64* %r3, i64 %r81)
-%r83 = zext i320 %r82 to i576
-%r84 = add i576 %r79, %r83
-%r85 = lshr i576 %r84, 64
-%r86 = trunc i576 %r85 to i512
-%r87 = trunc i512 %r86 to i64
-%r88 = mul i64 %r87, %r6
-%r89 = call i320 @mulPv256x64(i64* %r3, i64 %r88)
-%r90 = zext i320 %r89 to i512
-%r91 = add i512 %r86, %r90
+%r76 = zext i64 %r75 to i384
+%r77 = shl i384 %r76, 320
+%r78 = or i384 %r72, %r77
+%r79 = trunc i384 %r78 to i64
+%r80 = mul i64 %r79, %r6
+%r81 = call i448 @mulPv384x64(i64* %r3, i64 %r80)
+%r83 = getelementptr i64, i64* %r2, i32 6
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i448
+%r86 = shl i448 %r85, 384
+%r87 = zext i384 %r78 to i448
+%r88 = or i448 %r86, %r87
+%r89 = zext i448 %r88 to i512
+%r90 = zext i448 %r81 to i512
+%r91 = add i512 %r89, %r90
 %r92 = lshr i512 %r91, 64
 %r93 = trunc i512 %r92 to i448
-%r94 = trunc i448 %r93 to i64
-%r95 = mul i64 %r94, %r6
-%r96 = call i320 @mulPv256x64(i64* %r3, i64 %r95)
-%r97 = zext i320 %r96 to i448
-%r98 = add i448 %r93, %r97
-%r99 = lshr i448 %r98, 64
-%r100 = trunc i448 %r99 to i384
-%r101 = trunc i384 %r100 to i64
-%r102 = mul i64 %r101, %r6
-%r103 = call i320 @mulPv256x64(i64* %r3, i64 %r102)
-%r104 = zext i320 %r103 to i384
-%r105 = add i384 %r100, %r104
-%r106 = lshr i384 %r105, 64
-%r107 = trunc i384 %r106 to i320
-%r108 = zext i256 %r28 to i320
-%r109 = sub i320 %r107, %r108
-%r110 = lshr i320 %r109, 256
-%r111 = trunc i320 %r110 to i1
-%r112 = select i1 %r111, i320 %r107, i320 %r109
-%r113 = trunc i320 %r112 to i256
-%r114 = trunc i256 %r113 to i64
-%r116 = getelementptr i64, i64* %r1, i32 0
-store i64 %r114, i64* %r116
-%r117 = lshr i256 %r113, 64
-%r118 = trunc i256 %r117 to i64
-%r120 = getelementptr i64, i64* %r1, i32 1
-store i64 %r118, i64* %r120
-%r121 = lshr i256 %r117, 64
-%r122 = trunc i256 %r121 to i64
-%r124 = getelementptr i64, i64* %r1, i32 2
-store i64 %r122, i64* %r124
-%r125 = lshr i256 %r121, 64
-%r126 = trunc i256 %r125 to i64
-%r128 = getelementptr i64, i64* %r1, i32 3
-store i64 %r126, i64* %r128
-ret void
-}
-define i64 @mcl_fp_addPre4L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+%r94 = lshr i448 %r93, 384
+%r95 = trunc i448 %r94 to i64
+%r96 = trunc i448 %r93 to i384
+%r97 = trunc i384 %r96 to i64
+%r98 = mul i64 %r97, %r6
+%r99 = call i448 @mulPv384x64(i64* %r3, i64 %r98)
+%r100 = zext i64 %r95 to i448
+%r101 = shl i448 %r100, 384
+%r102 = add i448 %r99, %r101
+%r104 = getelementptr i64, i64* %r2, i32 7
+%r105 = load i64, i64* %r104
+%r106 = zext i64 %r105 to i448
+%r107 = shl i448 %r106, 384
+%r108 = zext i384 %r96 to i448
+%r109 = or i448 %r107, %r108
+%r110 = zext i448 %r109 to i512
+%r111 = zext i448 %r102 to i512
+%r112 = add i512 %r110, %r111
+%r113 = lshr i512 %r112, 64
+%r114 = trunc i512 %r113 to i448
+%r115 = lshr i448 %r114, 384
+%r116 = trunc i448 %r115 to i64
+%r117 = trunc i448 %r114 to i384
+%r118 = trunc i384 %r117 to i64
+%r119 = mul i64 %r118, %r6
+%r120 = call i448 @mulPv384x64(i64* %r3, i64 %r119)
+%r121 = zext i64 %r116 to i448
+%r122 = shl i448 %r121, 384
+%r123 = add i448 %r120, %r122
+%r125 = getelementptr i64, i64* %r2, i32 8
+%r126 = load i64, i64* %r125
+%r127 = zext i64 %r126 to i448
+%r128 = shl i448 %r127, 384
+%r129 = zext i384 %r117 to i448
+%r130 = or i448 %r128, %r129
+%r131 = zext i448 %r130 to i512
+%r132 = zext i448 %r123 to i512
+%r133 = add i512 %r131, %r132
+%r134 = lshr i512 %r133, 64
+%r135 = trunc i512 %r134 to i448
+%r136 = lshr i448 %r135, 384
+%r137 = trunc i448 %r136 to i64
+%r138 = trunc i448 %r135 to i384
+%r139 = trunc i384 %r138 to i64
+%r140 = mul i64 %r139, %r6
+%r141 = call i448 @mulPv384x64(i64* %r3, i64 %r140)
+%r142 = zext i64 %r137 to i448
+%r143 = shl i448 %r142, 384
+%r144 = add i448 %r141, %r143
+%r146 = getelementptr i64, i64* %r2, i32 9
+%r147 = load i64, i64* %r146
+%r148 = zext i64 %r147 to i448
+%r149 = shl i448 %r148, 384
+%r150 = zext i384 %r138 to i448
+%r151 = or i448 %r149, %r150
+%r152 = zext i448 %r151 to i512
+%r153 = zext i448 %r144 to i512
+%r154 = add i512 %r152, %r153
+%r155 = lshr i512 %r154, 64
+%r156 = trunc i512 %r155 to i448
+%r157 = lshr i448 %r156, 384
+%r158 = trunc i448 %r157 to i64
+%r159 = trunc i448 %r156 to i384
+%r160 = trunc i384 %r159 to i64
+%r161 = mul i64 %r160, %r6
+%r162 = call i448 @mulPv384x64(i64* %r3, i64 %r161)
+%r163 = zext i64 %r158 to i448
+%r164 = shl i448 %r163, 384
+%r165 = add i448 %r162, %r164
+%r167 = getelementptr i64, i64* %r2, i32 10
+%r168 = load i64, i64* %r167
+%r169 = zext i64 %r168 to i448
+%r170 = shl i448 %r169, 384
+%r171 = zext i384 %r159 to i448
+%r172 = or i448 %r170, %r171
+%r173 = zext i448 %r172 to i512
+%r174 = zext i448 %r165 to i512
+%r175 = add i512 %r173, %r174
+%r176 = lshr i512 %r175, 64
+%r177 = trunc i512 %r176 to i448
+%r178 = lshr i448 %r177, 384
+%r179 = trunc i448 %r178 to i64
+%r180 = trunc i448 %r177 to i384
+%r181 = trunc i384 %r180 to i64
+%r182 = mul i64 %r181, %r6
+%r183 = call i448 @mulPv384x64(i64* %r3, i64 %r182)
+%r184 = zext i64 %r179 to i448
+%r185 = shl i448 %r184, 384
+%r186 = add i448 %r183, %r185
+%r188 = getelementptr i64, i64* %r2, i32 11
+%r189 = load i64, i64* %r188
+%r190 = zext i64 %r189 to i448
+%r191 = shl i448 %r190, 384
+%r192 = zext i384 %r180 to i448
+%r193 = or i448 %r191, %r192
+%r194 = zext i448 %r193 to i512
+%r195 = zext i448 %r186 to i512
+%r196 = add i512 %r194, %r195
+%r197 = lshr i512 %r196, 64
+%r198 = trunc i512 %r197 to i448
+%r199 = lshr i448 %r198, 384
+%r200 = trunc i448 %r199 to i64
+%r201 = trunc i448 %r198 to i384
+%r202 = zext i384 %r42 to i448
+%r203 = zext i384 %r201 to i448
+%r204 = sub i448 %r203, %r202
+%r205 = lshr i448 %r204, 384
+%r206 = trunc i448 %r205 to i1
+%r207 = select i1 %r206, i448 %r203, i448 %r204
+%r208 = trunc i448 %r207 to i384
+%r210 = getelementptr i64, i64* %r1, i32 0
+%r211 = trunc i384 %r208 to i64
+store i64 %r211, i64* %r210
+%r212 = lshr i384 %r208, 64
+%r214 = getelementptr i64, i64* %r1, i32 1
+%r215 = trunc i384 %r212 to i64
+store i64 %r215, i64* %r214
+%r216 = lshr i384 %r212, 64
+%r218 = getelementptr i64, i64* %r1, i32 2
+%r219 = trunc i384 %r216 to i64
+store i64 %r219, i64* %r218
+%r220 = lshr i384 %r216, 64
+%r222 = getelementptr i64, i64* %r1, i32 3
+%r223 = trunc i384 %r220 to i64
+store i64 %r223, i64* %r222
+%r224 = lshr i384 %r220, 64
+%r226 = getelementptr i64, i64* %r1, i32 4
+%r227 = trunc i384 %r224 to i64
+store i64 %r227, i64* %r226
+%r228 = lshr i384 %r224, 64
+%r230 = getelementptr i64, i64* %r1, i32 5
+%r231 = trunc i384 %r228 to i64
+store i64 %r231, i64* %r230
+ret void
+}
+define void @mcl_fp_montRedNF6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
 {
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r3, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r3, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r3, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r28 = load i64, i64* %r4
-%r29 = zext i64 %r28 to i128
-%r31 = getelementptr i64, i64* %r4, i32 1
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = zext i192 %r21 to i256
+%r24 = getelementptr i64, i64* %r3, i32 3
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i256
+%r27 = shl i256 %r26, 192
+%r28 = or i256 %r22, %r27
+%r29 = zext i256 %r28 to i320
+%r31 = getelementptr i64, i64* %r3, i32 4
 %r32 = load i64, i64* %r31
-%r33 = zext i64 %r32 to i128
-%r34 = shl i128 %r33, 64
-%r35 = or i128 %r29, %r34
-%r36 = zext i128 %r35 to i192
-%r38 = getelementptr i64, i64* %r4, i32 2
-%r39 = load i64, i64* %r38
-%r40 = zext i64 %r39 to i192
-%r41 = shl i192 %r40, 128
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i256
-%r45 = getelementptr i64, i64* %r4, i32 3
+%r33 = zext i64 %r32 to i320
+%r34 = shl i320 %r33, 256
+%r35 = or i320 %r29, %r34
+%r36 = zext i320 %r35 to i384
+%r38 = getelementptr i64, i64* %r3, i32 5
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i384
+%r41 = shl i384 %r40, 320
+%r42 = or i384 %r36, %r41
+%r43 = load i64, i64* %r2
+%r44 = zext i64 %r43 to i128
+%r46 = getelementptr i64, i64* %r2, i32 1
+%r47 = load i64, i64* %r46
+%r48 = zext i64 %r47 to i128
+%r49 = shl i128 %r48, 64
+%r50 = or i128 %r44, %r49
+%r51 = zext i128 %r50 to i192
+%r53 = getelementptr i64, i64* %r2, i32 2
+%r54 = load i64, i64* %r53
+%r55 = zext i64 %r54 to i192
+%r56 = shl i192 %r55, 128
+%r57 = or i192 %r51, %r56
+%r58 = zext i192 %r57 to i256
+%r60 = getelementptr i64, i64* %r2, i32 3
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i256
+%r63 = shl i256 %r62, 192
+%r64 = or i256 %r58, %r63
+%r65 = zext i256 %r64 to i320
+%r67 = getelementptr i64, i64* %r2, i32 4
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i320
+%r70 = shl i320 %r69, 256
+%r71 = or i320 %r65, %r70
+%r72 = zext i320 %r71 to i384
+%r74 = getelementptr i64, i64* %r2, i32 5
+%r75 = load i64, i64* %r74
+%r76 = zext i64 %r75 to i384
+%r77 = shl i384 %r76, 320
+%r78 = or i384 %r72, %r77
+%r79 = trunc i384 %r78 to i64
+%r80 = mul i64 %r79, %r6
+%r81 = call i448 @mulPv384x64(i64* %r3, i64 %r80)
+%r83 = getelementptr i64, i64* %r2, i32 6
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i448
+%r86 = shl i448 %r85, 384
+%r87 = zext i384 %r78 to i448
+%r88 = or i448 %r86, %r87
+%r89 = zext i448 %r88 to i512
+%r90 = zext i448 %r81 to i512
+%r91 = add i512 %r89, %r90
+%r92 = lshr i512 %r91, 64
+%r93 = trunc i512 %r92 to i448
+%r94 = lshr i448 %r93, 384
+%r95 = trunc i448 %r94 to i64
+%r96 = trunc i448 %r93 to i384
+%r97 = trunc i384 %r96 to i64
+%r98 = mul i64 %r97, %r6
+%r99 = call i448 @mulPv384x64(i64* %r3, i64 %r98)
+%r100 = zext i64 %r95 to i448
+%r101 = shl i448 %r100, 384
+%r102 = add i448 %r99, %r101
+%r104 = getelementptr i64, i64* %r2, i32 7
+%r105 = load i64, i64* %r104
+%r106 = zext i64 %r105 to i448
+%r107 = shl i448 %r106, 384
+%r108 = zext i384 %r96 to i448
+%r109 = or i448 %r107, %r108
+%r110 = zext i448 %r109 to i512
+%r111 = zext i448 %r102 to i512
+%r112 = add i512 %r110, %r111
+%r113 = lshr i512 %r112, 64
+%r114 = trunc i512 %r113 to i448
+%r115 = lshr i448 %r114, 384
+%r116 = trunc i448 %r115 to i64
+%r117 = trunc i448 %r114 to i384
+%r118 = trunc i384 %r117 to i64
+%r119 = mul i64 %r118, %r6
+%r120 = call i448 @mulPv384x64(i64* %r3, i64 %r119)
+%r121 = zext i64 %r116 to i448
+%r122 = shl i448 %r121, 384
+%r123 = add i448 %r120, %r122
+%r125 = getelementptr i64, i64* %r2, i32 8
+%r126 = load i64, i64* %r125
+%r127 = zext i64 %r126 to i448
+%r128 = shl i448 %r127, 384
+%r129 = zext i384 %r117 to i448
+%r130 = or i448 %r128, %r129
+%r131 = zext i448 %r130 to i512
+%r132 = zext i448 %r123 to i512
+%r133 = add i512 %r131, %r132
+%r134 = lshr i512 %r133, 64
+%r135 = trunc i512 %r134 to i448
+%r136 = lshr i448 %r135, 384
+%r137 = trunc i448 %r136 to i64
+%r138 = trunc i448 %r135 to i384
+%r139 = trunc i384 %r138 to i64
+%r140 = mul i64 %r139, %r6
+%r141 = call i448 @mulPv384x64(i64* %r3, i64 %r140)
+%r142 = zext i64 %r137 to i448
+%r143 = shl i448 %r142, 384
+%r144 = add i448 %r141, %r143
+%r146 = getelementptr i64, i64* %r2, i32 9
+%r147 = load i64, i64* %r146
+%r148 = zext i64 %r147 to i448
+%r149 = shl i448 %r148, 384
+%r150 = zext i384 %r138 to i448
+%r151 = or i448 %r149, %r150
+%r152 = zext i448 %r151 to i512
+%r153 = zext i448 %r144 to i512
+%r154 = add i512 %r152, %r153
+%r155 = lshr i512 %r154, 64
+%r156 = trunc i512 %r155 to i448
+%r157 = lshr i448 %r156, 384
+%r158 = trunc i448 %r157 to i64
+%r159 = trunc i448 %r156 to i384
+%r160 = trunc i384 %r159 to i64
+%r161 = mul i64 %r160, %r6
+%r162 = call i448 @mulPv384x64(i64* %r3, i64 %r161)
+%r163 = zext i64 %r158 to i448
+%r164 = shl i448 %r163, 384
+%r165 = add i448 %r162, %r164
+%r167 = getelementptr i64, i64* %r2, i32 10
+%r168 = load i64, i64* %r167
+%r169 = zext i64 %r168 to i448
+%r170 = shl i448 %r169, 384
+%r171 = zext i384 %r159 to i448
+%r172 = or i448 %r170, %r171
+%r173 = zext i448 %r172 to i512
+%r174 = zext i448 %r165 to i512
+%r175 = add i512 %r173, %r174
+%r176 = lshr i512 %r175, 64
+%r177 = trunc i512 %r176 to i448
+%r178 = lshr i448 %r177, 384
+%r179 = trunc i448 %r178 to i64
+%r180 = trunc i448 %r177 to i384
+%r181 = trunc i384 %r180 to i64
+%r182 = mul i64 %r181, %r6
+%r183 = call i448 @mulPv384x64(i64* %r3, i64 %r182)
+%r184 = zext i64 %r179 to i448
+%r185 = shl i448 %r184, 384
+%r186 = add i448 %r183, %r185
+%r188 = getelementptr i64, i64* %r2, i32 11
+%r189 = load i64, i64* %r188
+%r190 = zext i64 %r189 to i448
+%r191 = shl i448 %r190, 384
+%r192 = zext i384 %r180 to i448
+%r193 = or i448 %r191, %r192
+%r194 = zext i448 %r193 to i512
+%r195 = zext i448 %r186 to i512
+%r196 = add i512 %r194, %r195
+%r197 = lshr i512 %r196, 64
+%r198 = trunc i512 %r197 to i448
+%r199 = lshr i448 %r198, 384
+%r200 = trunc i448 %r199 to i64
+%r201 = trunc i448 %r198 to i384
+%r202 = sub i384 %r201, %r42
+%r203 = lshr i384 %r202, 383
+%r204 = trunc i384 %r203 to i1
+%r205 = select i1 %r204, i384 %r201, i384 %r202
+%r207 = getelementptr i64, i64* %r1, i32 0
+%r208 = trunc i384 %r205 to i64
+store i64 %r208, i64* %r207
+%r209 = lshr i384 %r205, 64
+%r211 = getelementptr i64, i64* %r1, i32 1
+%r212 = trunc i384 %r209 to i64
+store i64 %r212, i64* %r211
+%r213 = lshr i384 %r209, 64
+%r215 = getelementptr i64, i64* %r1, i32 2
+%r216 = trunc i384 %r213 to i64
+store i64 %r216, i64* %r215
+%r217 = lshr i384 %r213, 64
+%r219 = getelementptr i64, i64* %r1, i32 3
+%r220 = trunc i384 %r217 to i64
+store i64 %r220, i64* %r219
+%r221 = lshr i384 %r217, 64
+%r223 = getelementptr i64, i64* %r1, i32 4
+%r224 = trunc i384 %r221 to i64
+store i64 %r224, i64* %r223
+%r225 = lshr i384 %r221, 64
+%r227 = getelementptr i64, i64* %r1, i32 5
+%r228 = trunc i384 %r225 to i64
+store i64 %r228, i64* %r227
+ret void
+}
+define i64 @mcl_fp_addPre6L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r3
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r3, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r3, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r3, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r3, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r42 = load i64, i64* %r4
+%r43 = zext i64 %r42 to i128
+%r45 = getelementptr i64, i64* %r4, i32 1
 %r46 = load i64, i64* %r45
-%r47 = zext i64 %r46 to i256
-%r48 = shl i256 %r47, 192
-%r49 = or i256 %r43, %r48
-%r50 = zext i256 %r49 to i320
-%r51 = add i320 %r27, %r50
-%r52 = trunc i320 %r51 to i256
-%r53 = trunc i256 %r52 to i64
-%r55 = getelementptr i64, i64* %r2, i32 0
-store i64 %r53, i64* %r55
-%r56 = lshr i256 %r52, 64
-%r57 = trunc i256 %r56 to i64
-%r59 = getelementptr i64, i64* %r2, i32 1
-store i64 %r57, i64* %r59
-%r60 = lshr i256 %r56, 64
-%r61 = trunc i256 %r60 to i64
-%r63 = getelementptr i64, i64* %r2, i32 2
-store i64 %r61, i64* %r63
-%r64 = lshr i256 %r60, 64
-%r65 = trunc i256 %r64 to i64
-%r67 = getelementptr i64, i64* %r2, i32 3
-store i64 %r65, i64* %r67
-%r68 = lshr i320 %r51, 256
-%r69 = trunc i320 %r68 to i64
-ret i64 %r69
+%r47 = zext i64 %r46 to i128
+%r48 = shl i128 %r47, 64
+%r49 = or i128 %r43, %r48
+%r50 = zext i128 %r49 to i192
+%r52 = getelementptr i64, i64* %r4, i32 2
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i192
+%r55 = shl i192 %r54, 128
+%r56 = or i192 %r50, %r55
+%r57 = zext i192 %r56 to i256
+%r59 = getelementptr i64, i64* %r4, i32 3
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i256
+%r62 = shl i256 %r61, 192
+%r63 = or i256 %r57, %r62
+%r64 = zext i256 %r63 to i320
+%r66 = getelementptr i64, i64* %r4, i32 4
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i320
+%r69 = shl i320 %r68, 256
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i384
+%r73 = getelementptr i64, i64* %r4, i32 5
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i384
+%r76 = shl i384 %r75, 320
+%r77 = or i384 %r71, %r76
+%r78 = zext i384 %r77 to i448
+%r79 = add i448 %r41, %r78
+%r80 = trunc i448 %r79 to i384
+%r82 = getelementptr i64, i64* %r2, i32 0
+%r83 = trunc i384 %r80 to i64
+store i64 %r83, i64* %r82
+%r84 = lshr i384 %r80, 64
+%r86 = getelementptr i64, i64* %r2, i32 1
+%r87 = trunc i384 %r84 to i64
+store i64 %r87, i64* %r86
+%r88 = lshr i384 %r84, 64
+%r90 = getelementptr i64, i64* %r2, i32 2
+%r91 = trunc i384 %r88 to i64
+store i64 %r91, i64* %r90
+%r92 = lshr i384 %r88, 64
+%r94 = getelementptr i64, i64* %r2, i32 3
+%r95 = trunc i384 %r92 to i64
+store i64 %r95, i64* %r94
+%r96 = lshr i384 %r92, 64
+%r98 = getelementptr i64, i64* %r2, i32 4
+%r99 = trunc i384 %r96 to i64
+store i64 %r99, i64* %r98
+%r100 = lshr i384 %r96, 64
+%r102 = getelementptr i64, i64* %r2, i32 5
+%r103 = trunc i384 %r100 to i64
+store i64 %r103, i64* %r102
+%r104 = lshr i448 %r79, 384
+%r105 = trunc i448 %r104 to i64
+ret i64 %r105
 }
-define i64 @mcl_fp_subPre4L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define i64 @mcl_fp_subPre6L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r3
 %r6 = zext i64 %r5 to i128
@@ -2757,49 +3890,81 @@ define i64 @mcl_fp_subPre4L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias
 %r25 = shl i256 %r24, 192
 %r26 = or i256 %r20, %r25
 %r27 = zext i256 %r26 to i320
-%r28 = load i64, i64* %r4
-%r29 = zext i64 %r28 to i128
-%r31 = getelementptr i64, i64* %r4, i32 1
-%r32 = load i64, i64* %r31
-%r33 = zext i64 %r32 to i128
-%r34 = shl i128 %r33, 64
-%r35 = or i128 %r29, %r34
-%r36 = zext i128 %r35 to i192
-%r38 = getelementptr i64, i64* %r4, i32 2
-%r39 = load i64, i64* %r38
-%r40 = zext i64 %r39 to i192
-%r41 = shl i192 %r40, 128
-%r42 = or i192 %r36, %r41
-%r43 = zext i192 %r42 to i256
-%r45 = getelementptr i64, i64* %r4, i32 3
+%r29 = getelementptr i64, i64* %r3, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r3, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = zext i384 %r40 to i448
+%r42 = load i64, i64* %r4
+%r43 = zext i64 %r42 to i128
+%r45 = getelementptr i64, i64* %r4, i32 1
 %r46 = load i64, i64* %r45
-%r47 = zext i64 %r46 to i256
-%r48 = shl i256 %r47, 192
-%r49 = or i256 %r43, %r48
-%r50 = zext i256 %r49 to i320
-%r51 = sub i320 %r27, %r50
-%r52 = trunc i320 %r51 to i256
-%r53 = trunc i256 %r52 to i64
-%r55 = getelementptr i64, i64* %r2, i32 0
-store i64 %r53, i64* %r55
-%r56 = lshr i256 %r52, 64
-%r57 = trunc i256 %r56 to i64
-%r59 = getelementptr i64, i64* %r2, i32 1
-store i64 %r57, i64* %r59
-%r60 = lshr i256 %r56, 64
-%r61 = trunc i256 %r60 to i64
-%r63 = getelementptr i64, i64* %r2, i32 2
-store i64 %r61, i64* %r63
-%r64 = lshr i256 %r60, 64
-%r65 = trunc i256 %r64 to i64
-%r67 = getelementptr i64, i64* %r2, i32 3
-store i64 %r65, i64* %r67
-%r68 = lshr i320 %r51, 256
-%r69 = trunc i320 %r68 to i64
-%r71 = and i64 %r69, 1
-ret i64 %r71
+%r47 = zext i64 %r46 to i128
+%r48 = shl i128 %r47, 64
+%r49 = or i128 %r43, %r48
+%r50 = zext i128 %r49 to i192
+%r52 = getelementptr i64, i64* %r4, i32 2
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i192
+%r55 = shl i192 %r54, 128
+%r56 = or i192 %r50, %r55
+%r57 = zext i192 %r56 to i256
+%r59 = getelementptr i64, i64* %r4, i32 3
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i256
+%r62 = shl i256 %r61, 192
+%r63 = or i256 %r57, %r62
+%r64 = zext i256 %r63 to i320
+%r66 = getelementptr i64, i64* %r4, i32 4
+%r67 = load i64, i64* %r66
+%r68 = zext i64 %r67 to i320
+%r69 = shl i320 %r68, 256
+%r70 = or i320 %r64, %r69
+%r71 = zext i320 %r70 to i384
+%r73 = getelementptr i64, i64* %r4, i32 5
+%r74 = load i64, i64* %r73
+%r75 = zext i64 %r74 to i384
+%r76 = shl i384 %r75, 320
+%r77 = or i384 %r71, %r76
+%r78 = zext i384 %r77 to i448
+%r79 = sub i448 %r41, %r78
+%r80 = trunc i448 %r79 to i384
+%r82 = getelementptr i64, i64* %r2, i32 0
+%r83 = trunc i384 %r80 to i64
+store i64 %r83, i64* %r82
+%r84 = lshr i384 %r80, 64
+%r86 = getelementptr i64, i64* %r2, i32 1
+%r87 = trunc i384 %r84 to i64
+store i64 %r87, i64* %r86
+%r88 = lshr i384 %r84, 64
+%r90 = getelementptr i64, i64* %r2, i32 2
+%r91 = trunc i384 %r88 to i64
+store i64 %r91, i64* %r90
+%r92 = lshr i384 %r88, 64
+%r94 = getelementptr i64, i64* %r2, i32 3
+%r95 = trunc i384 %r92 to i64
+store i64 %r95, i64* %r94
+%r96 = lshr i384 %r92, 64
+%r98 = getelementptr i64, i64* %r2, i32 4
+%r99 = trunc i384 %r96 to i64
+store i64 %r99, i64* %r98
+%r100 = lshr i384 %r96, 64
+%r102 = getelementptr i64, i64* %r2, i32 5
+%r103 = trunc i384 %r100 to i64
+store i64 %r103, i64* %r102
+%r105 = lshr i448 %r79, 384
+%r106 = trunc i448 %r105 to i64
+%r107 = and i64 %r106, 1
+ret i64 %r107
 }
-define void @mcl_fp_shr1_4L(i64* noalias  %r1, i64* noalias  %r2)
+define void @mcl_fp_shr1_6L(i64* noalias  %r1, i64* noalias  %r2)
 {
 %r3 = load i64, i64* %r2
 %r4 = zext i64 %r3 to i128
@@ -2820,129 +3985,45 @@ define void @mcl_fp_shr1_4L(i64* noalias  %r1, i64* noalias  %r2)
 %r22 = zext i64 %r21 to i256
 %r23 = shl i256 %r22, 192
 %r24 = or i256 %r18, %r23
-%r25 = lshr i256 %r24, 1
-%r26 = trunc i256 %r25 to i64
-%r28 = getelementptr i64, i64* %r1, i32 0
-store i64 %r26, i64* %r28
-%r29 = lshr i256 %r25, 64
-%r30 = trunc i256 %r29 to i64
-%r32 = getelementptr i64, i64* %r1, i32 1
-store i64 %r30, i64* %r32
-%r33 = lshr i256 %r29, 64
-%r34 = trunc i256 %r33 to i64
-%r36 = getelementptr i64, i64* %r1, i32 2
-store i64 %r34, i64* %r36
-%r37 = lshr i256 %r33, 64
-%r38 = trunc i256 %r37 to i64
-%r40 = getelementptr i64, i64* %r1, i32 3
-store i64 %r38, i64* %r40
-ret void
-}
-define void @mcl_fp_add4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = load i64, i64* %r3
-%r28 = zext i64 %r27 to i128
-%r30 = getelementptr i64, i64* %r3, i32 1
-%r31 = load i64, i64* %r30
-%r32 = zext i64 %r31 to i128
-%r33 = shl i128 %r32, 64
-%r34 = or i128 %r28, %r33
-%r35 = zext i128 %r34 to i192
-%r37 = getelementptr i64, i64* %r3, i32 2
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i192
-%r40 = shl i192 %r39, 128
-%r41 = or i192 %r35, %r40
-%r42 = zext i192 %r41 to i256
-%r44 = getelementptr i64, i64* %r3, i32 3
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i256
-%r47 = shl i256 %r46, 192
-%r48 = or i256 %r42, %r47
-%r49 = zext i256 %r26 to i320
-%r50 = zext i256 %r48 to i320
-%r51 = add i320 %r49, %r50
-%r52 = trunc i320 %r51 to i256
-%r53 = trunc i256 %r52 to i64
-%r55 = getelementptr i64, i64* %r1, i32 0
-store i64 %r53, i64* %r55
-%r56 = lshr i256 %r52, 64
-%r57 = trunc i256 %r56 to i64
-%r59 = getelementptr i64, i64* %r1, i32 1
-store i64 %r57, i64* %r59
-%r60 = lshr i256 %r56, 64
-%r61 = trunc i256 %r60 to i64
-%r63 = getelementptr i64, i64* %r1, i32 2
-store i64 %r61, i64* %r63
-%r64 = lshr i256 %r60, 64
-%r65 = trunc i256 %r64 to i64
-%r67 = getelementptr i64, i64* %r1, i32 3
-store i64 %r65, i64* %r67
-%r68 = load i64, i64* %r4
-%r69 = zext i64 %r68 to i128
-%r71 = getelementptr i64, i64* %r4, i32 1
-%r72 = load i64, i64* %r71
-%r73 = zext i64 %r72 to i128
-%r74 = shl i128 %r73, 64
-%r75 = or i128 %r69, %r74
-%r76 = zext i128 %r75 to i192
-%r78 = getelementptr i64, i64* %r4, i32 2
-%r79 = load i64, i64* %r78
-%r80 = zext i64 %r79 to i192
-%r81 = shl i192 %r80, 128
-%r82 = or i192 %r76, %r81
-%r83 = zext i192 %r82 to i256
-%r85 = getelementptr i64, i64* %r4, i32 3
-%r86 = load i64, i64* %r85
-%r87 = zext i64 %r86 to i256
-%r88 = shl i256 %r87, 192
-%r89 = or i256 %r83, %r88
-%r90 = zext i256 %r89 to i320
-%r91 = sub i320 %r51, %r90
-%r92 = lshr i320 %r91, 256
-%r93 = trunc i320 %r92 to i1
-br i1%r93, label %carry, label %nocarry
-nocarry:
-%r94 = trunc i320 %r91 to i256
-%r95 = trunc i256 %r94 to i64
-%r97 = getelementptr i64, i64* %r1, i32 0
-store i64 %r95, i64* %r97
-%r98 = lshr i256 %r94, 64
-%r99 = trunc i256 %r98 to i64
-%r101 = getelementptr i64, i64* %r1, i32 1
-store i64 %r99, i64* %r101
-%r102 = lshr i256 %r98, 64
-%r103 = trunc i256 %r102 to i64
-%r105 = getelementptr i64, i64* %r1, i32 2
-store i64 %r103, i64* %r105
-%r106 = lshr i256 %r102, 64
-%r107 = trunc i256 %r106 to i64
-%r109 = getelementptr i64, i64* %r1, i32 3
-store i64 %r107, i64* %r109
-ret void
-carry:
+%r25 = zext i256 %r24 to i320
+%r27 = getelementptr i64, i64* %r2, i32 4
+%r28 = load i64, i64* %r27
+%r29 = zext i64 %r28 to i320
+%r30 = shl i320 %r29, 256
+%r31 = or i320 %r25, %r30
+%r32 = zext i320 %r31 to i384
+%r34 = getelementptr i64, i64* %r2, i32 5
+%r35 = load i64, i64* %r34
+%r36 = zext i64 %r35 to i384
+%r37 = shl i384 %r36, 320
+%r38 = or i384 %r32, %r37
+%r39 = lshr i384 %r38, 1
+%r41 = getelementptr i64, i64* %r1, i32 0
+%r42 = trunc i384 %r39 to i64
+store i64 %r42, i64* %r41
+%r43 = lshr i384 %r39, 64
+%r45 = getelementptr i64, i64* %r1, i32 1
+%r46 = trunc i384 %r43 to i64
+store i64 %r46, i64* %r45
+%r47 = lshr i384 %r43, 64
+%r49 = getelementptr i64, i64* %r1, i32 2
+%r50 = trunc i384 %r47 to i64
+store i64 %r50, i64* %r49
+%r51 = lshr i384 %r47, 64
+%r53 = getelementptr i64, i64* %r1, i32 3
+%r54 = trunc i384 %r51 to i64
+store i64 %r54, i64* %r53
+%r55 = lshr i384 %r51, 64
+%r57 = getelementptr i64, i64* %r1, i32 4
+%r58 = trunc i384 %r55 to i64
+store i64 %r58, i64* %r57
+%r59 = lshr i384 %r55, 64
+%r61 = getelementptr i64, i64* %r1, i32 5
+%r62 = trunc i384 %r59 to i64
+store i64 %r62, i64* %r61
 ret void
 }
-define void @mcl_fp_addNF4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_add6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -2963,67 +4044,142 @@ define void @mcl_fp_addNF4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r24 = zext i64 %r23 to i256
 %r25 = shl i256 %r24, 192
 %r26 = or i256 %r20, %r25
-%r27 = load i64, i64* %r3
-%r28 = zext i64 %r27 to i128
-%r30 = getelementptr i64, i64* %r3, i32 1
-%r31 = load i64, i64* %r30
-%r32 = zext i64 %r31 to i128
-%r33 = shl i128 %r32, 64
-%r34 = or i128 %r28, %r33
-%r35 = zext i128 %r34 to i192
-%r37 = getelementptr i64, i64* %r3, i32 2
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i192
-%r40 = shl i192 %r39, 128
-%r41 = or i192 %r35, %r40
-%r42 = zext i192 %r41 to i256
-%r44 = getelementptr i64, i64* %r3, i32 3
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
 %r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i256
-%r47 = shl i256 %r46, 192
-%r48 = or i256 %r42, %r47
-%r49 = add i256 %r26, %r48
-%r50 = load i64, i64* %r4
-%r51 = zext i64 %r50 to i128
-%r53 = getelementptr i64, i64* %r4, i32 1
-%r54 = load i64, i64* %r53
-%r55 = zext i64 %r54 to i128
-%r56 = shl i128 %r55, 64
-%r57 = or i128 %r51, %r56
-%r58 = zext i128 %r57 to i192
-%r60 = getelementptr i64, i64* %r4, i32 2
-%r61 = load i64, i64* %r60
-%r62 = zext i64 %r61 to i192
-%r63 = shl i192 %r62, 128
-%r64 = or i192 %r58, %r63
-%r65 = zext i192 %r64 to i256
-%r67 = getelementptr i64, i64* %r4, i32 3
-%r68 = load i64, i64* %r67
-%r69 = zext i64 %r68 to i256
-%r70 = shl i256 %r69, 192
-%r71 = or i256 %r65, %r70
-%r72 = sub i256 %r49, %r71
-%r73 = lshr i256 %r72, 255
-%r74 = trunc i256 %r73 to i1
-%r75 = select i1 %r74, i256 %r49, i256 %r72
-%r76 = trunc i256 %r75 to i64
-%r78 = getelementptr i64, i64* %r1, i32 0
-store i64 %r76, i64* %r78
-%r79 = lshr i256 %r75, 64
-%r80 = trunc i256 %r79 to i64
-%r82 = getelementptr i64, i64* %r1, i32 1
-store i64 %r80, i64* %r82
-%r83 = lshr i256 %r79, 64
-%r84 = trunc i256 %r83 to i64
-%r86 = getelementptr i64, i64* %r1, i32 2
-store i64 %r84, i64* %r86
-%r87 = lshr i256 %r83, 64
-%r88 = trunc i256 %r87 to i64
-%r90 = getelementptr i64, i64* %r1, i32 3
-store i64 %r88, i64* %r90
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = zext i384 %r40 to i448
+%r78 = zext i384 %r76 to i448
+%r79 = add i448 %r77, %r78
+%r80 = trunc i448 %r79 to i384
+%r82 = getelementptr i64, i64* %r1, i32 0
+%r83 = trunc i384 %r80 to i64
+store i64 %r83, i64* %r82
+%r84 = lshr i384 %r80, 64
+%r86 = getelementptr i64, i64* %r1, i32 1
+%r87 = trunc i384 %r84 to i64
+store i64 %r87, i64* %r86
+%r88 = lshr i384 %r84, 64
+%r90 = getelementptr i64, i64* %r1, i32 2
+%r91 = trunc i384 %r88 to i64
+store i64 %r91, i64* %r90
+%r92 = lshr i384 %r88, 64
+%r94 = getelementptr i64, i64* %r1, i32 3
+%r95 = trunc i384 %r92 to i64
+store i64 %r95, i64* %r94
+%r96 = lshr i384 %r92, 64
+%r98 = getelementptr i64, i64* %r1, i32 4
+%r99 = trunc i384 %r96 to i64
+store i64 %r99, i64* %r98
+%r100 = lshr i384 %r96, 64
+%r102 = getelementptr i64, i64* %r1, i32 5
+%r103 = trunc i384 %r100 to i64
+store i64 %r103, i64* %r102
+%r104 = load i64, i64* %r4
+%r105 = zext i64 %r104 to i128
+%r107 = getelementptr i64, i64* %r4, i32 1
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i128
+%r110 = shl i128 %r109, 64
+%r111 = or i128 %r105, %r110
+%r112 = zext i128 %r111 to i192
+%r114 = getelementptr i64, i64* %r4, i32 2
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i192
+%r117 = shl i192 %r116, 128
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i256
+%r121 = getelementptr i64, i64* %r4, i32 3
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i256
+%r124 = shl i256 %r123, 192
+%r125 = or i256 %r119, %r124
+%r126 = zext i256 %r125 to i320
+%r128 = getelementptr i64, i64* %r4, i32 4
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i320
+%r131 = shl i320 %r130, 256
+%r132 = or i320 %r126, %r131
+%r133 = zext i320 %r132 to i384
+%r135 = getelementptr i64, i64* %r4, i32 5
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i384
+%r138 = shl i384 %r137, 320
+%r139 = or i384 %r133, %r138
+%r140 = zext i384 %r139 to i448
+%r141 = sub i448 %r79, %r140
+%r142 = lshr i448 %r141, 384
+%r143 = trunc i448 %r142 to i1
+br i1%r143, label %carry, label %nocarry
+nocarry:
+%r144 = trunc i448 %r141 to i384
+%r146 = getelementptr i64, i64* %r1, i32 0
+%r147 = trunc i384 %r144 to i64
+store i64 %r147, i64* %r146
+%r148 = lshr i384 %r144, 64
+%r150 = getelementptr i64, i64* %r1, i32 1
+%r151 = trunc i384 %r148 to i64
+store i64 %r151, i64* %r150
+%r152 = lshr i384 %r148, 64
+%r154 = getelementptr i64, i64* %r1, i32 2
+%r155 = trunc i384 %r152 to i64
+store i64 %r155, i64* %r154
+%r156 = lshr i384 %r152, 64
+%r158 = getelementptr i64, i64* %r1, i32 3
+%r159 = trunc i384 %r156 to i64
+store i64 %r159, i64* %r158
+%r160 = lshr i384 %r156, 64
+%r162 = getelementptr i64, i64* %r1, i32 4
+%r163 = trunc i384 %r160 to i64
+store i64 %r163, i64* %r162
+%r164 = lshr i384 %r160, 64
+%r166 = getelementptr i64, i64* %r1, i32 5
+%r167 = trunc i384 %r164 to i64
+store i64 %r167, i64* %r166
+ret void
+carry:
 ret void
 }
-define void @mcl_fp_sub4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_addNF6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -3044,169 +4200,111 @@ define void @mcl_fp_sub4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r
 %r24 = zext i64 %r23 to i256
 %r25 = shl i256 %r24, 192
 %r26 = or i256 %r20, %r25
-%r27 = load i64, i64* %r3
-%r28 = zext i64 %r27 to i128
-%r30 = getelementptr i64, i64* %r3, i32 1
-%r31 = load i64, i64* %r30
-%r32 = zext i64 %r31 to i128
-%r33 = shl i128 %r32, 64
-%r34 = or i128 %r28, %r33
-%r35 = zext i128 %r34 to i192
-%r37 = getelementptr i64, i64* %r3, i32 2
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i192
-%r40 = shl i192 %r39, 128
-%r41 = or i192 %r35, %r40
-%r42 = zext i192 %r41 to i256
-%r44 = getelementptr i64, i64* %r3, i32 3
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i256
-%r47 = shl i256 %r46, 192
-%r48 = or i256 %r42, %r47
-%r49 = zext i256 %r26 to i320
-%r50 = zext i256 %r48 to i320
-%r51 = sub i320 %r49, %r50
-%r52 = trunc i320 %r51 to i256
-%r53 = lshr i320 %r51, 256
-%r54 = trunc i320 %r53 to i1
-%r55 = trunc i256 %r52 to i64
-%r57 = getelementptr i64, i64* %r1, i32 0
-store i64 %r55, i64* %r57
-%r58 = lshr i256 %r52, 64
-%r59 = trunc i256 %r58 to i64
-%r61 = getelementptr i64, i64* %r1, i32 1
-store i64 %r59, i64* %r61
-%r62 = lshr i256 %r58, 64
-%r63 = trunc i256 %r62 to i64
-%r65 = getelementptr i64, i64* %r1, i32 2
-store i64 %r63, i64* %r65
-%r66 = lshr i256 %r62, 64
-%r67 = trunc i256 %r66 to i64
-%r69 = getelementptr i64, i64* %r1, i32 3
-store i64 %r67, i64* %r69
-br i1%r54, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r70 = load i64, i64* %r4
-%r71 = zext i64 %r70 to i128
-%r73 = getelementptr i64, i64* %r4, i32 1
-%r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i128
-%r76 = shl i128 %r75, 64
-%r77 = or i128 %r71, %r76
-%r78 = zext i128 %r77 to i192
-%r80 = getelementptr i64, i64* %r4, i32 2
-%r81 = load i64, i64* %r80
-%r82 = zext i64 %r81 to i192
-%r83 = shl i192 %r82, 128
-%r84 = or i192 %r78, %r83
-%r85 = zext i192 %r84 to i256
-%r87 = getelementptr i64, i64* %r4, i32 3
-%r88 = load i64, i64* %r87
-%r89 = zext i64 %r88 to i256
-%r90 = shl i256 %r89, 192
-%r91 = or i256 %r85, %r90
-%r92 = add i256 %r52, %r91
-%r93 = trunc i256 %r92 to i64
-%r95 = getelementptr i64, i64* %r1, i32 0
-store i64 %r93, i64* %r95
-%r96 = lshr i256 %r92, 64
-%r97 = trunc i256 %r96 to i64
-%r99 = getelementptr i64, i64* %r1, i32 1
-store i64 %r97, i64* %r99
-%r100 = lshr i256 %r96, 64
-%r101 = trunc i256 %r100 to i64
-%r103 = getelementptr i64, i64* %r1, i32 2
-store i64 %r101, i64* %r103
-%r104 = lshr i256 %r100, 64
-%r105 = trunc i256 %r104 to i64
-%r107 = getelementptr i64, i64* %r1, i32 3
-store i64 %r105, i64* %r107
-ret void
-}
-define void @mcl_fp_subNF4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = load i64, i64* %r3
-%r28 = zext i64 %r27 to i128
-%r30 = getelementptr i64, i64* %r3, i32 1
-%r31 = load i64, i64* %r30
-%r32 = zext i64 %r31 to i128
-%r33 = shl i128 %r32, 64
-%r34 = or i128 %r28, %r33
-%r35 = zext i128 %r34 to i192
-%r37 = getelementptr i64, i64* %r3, i32 2
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i192
-%r40 = shl i192 %r39, 128
-%r41 = or i192 %r35, %r40
-%r42 = zext i192 %r41 to i256
-%r44 = getelementptr i64, i64* %r3, i32 3
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
 %r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i256
-%r47 = shl i256 %r46, 192
-%r48 = or i256 %r42, %r47
-%r49 = sub i256 %r26, %r48
-%r50 = lshr i256 %r49, 255
-%r51 = trunc i256 %r50 to i1
-%r52 = load i64, i64* %r4
-%r53 = zext i64 %r52 to i128
-%r55 = getelementptr i64, i64* %r4, i32 1
-%r56 = load i64, i64* %r55
-%r57 = zext i64 %r56 to i128
-%r58 = shl i128 %r57, 64
-%r59 = or i128 %r53, %r58
-%r60 = zext i128 %r59 to i192
-%r62 = getelementptr i64, i64* %r4, i32 2
-%r63 = load i64, i64* %r62
-%r64 = zext i64 %r63 to i192
-%r65 = shl i192 %r64, 128
-%r66 = or i192 %r60, %r65
-%r67 = zext i192 %r66 to i256
-%r69 = getelementptr i64, i64* %r4, i32 3
-%r70 = load i64, i64* %r69
-%r71 = zext i64 %r70 to i256
-%r72 = shl i256 %r71, 192
-%r73 = or i256 %r67, %r72
-%r75 = select i1 %r51, i256 %r73, i256 0
-%r76 = add i256 %r49, %r75
-%r77 = trunc i256 %r76 to i64
-%r79 = getelementptr i64, i64* %r1, i32 0
-store i64 %r77, i64* %r79
-%r80 = lshr i256 %r76, 64
-%r81 = trunc i256 %r80 to i64
-%r83 = getelementptr i64, i64* %r1, i32 1
-store i64 %r81, i64* %r83
-%r84 = lshr i256 %r80, 64
-%r85 = trunc i256 %r84 to i64
-%r87 = getelementptr i64, i64* %r1, i32 2
-store i64 %r85, i64* %r87
-%r88 = lshr i256 %r84, 64
-%r89 = trunc i256 %r88 to i64
-%r91 = getelementptr i64, i64* %r1, i32 3
-store i64 %r89, i64* %r91
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = add i384 %r40, %r76
+%r78 = load i64, i64* %r4
+%r79 = zext i64 %r78 to i128
+%r81 = getelementptr i64, i64* %r4, i32 1
+%r82 = load i64, i64* %r81
+%r83 = zext i64 %r82 to i128
+%r84 = shl i128 %r83, 64
+%r85 = or i128 %r79, %r84
+%r86 = zext i128 %r85 to i192
+%r88 = getelementptr i64, i64* %r4, i32 2
+%r89 = load i64, i64* %r88
+%r90 = zext i64 %r89 to i192
+%r91 = shl i192 %r90, 128
+%r92 = or i192 %r86, %r91
+%r93 = zext i192 %r92 to i256
+%r95 = getelementptr i64, i64* %r4, i32 3
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i256
+%r98 = shl i256 %r97, 192
+%r99 = or i256 %r93, %r98
+%r100 = zext i256 %r99 to i320
+%r102 = getelementptr i64, i64* %r4, i32 4
+%r103 = load i64, i64* %r102
+%r104 = zext i64 %r103 to i320
+%r105 = shl i320 %r104, 256
+%r106 = or i320 %r100, %r105
+%r107 = zext i320 %r106 to i384
+%r109 = getelementptr i64, i64* %r4, i32 5
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i384
+%r112 = shl i384 %r111, 320
+%r113 = or i384 %r107, %r112
+%r114 = sub i384 %r77, %r113
+%r115 = lshr i384 %r114, 383
+%r116 = trunc i384 %r115 to i1
+%r117 = select i1 %r116, i384 %r77, i384 %r114
+%r119 = getelementptr i64, i64* %r1, i32 0
+%r120 = trunc i384 %r117 to i64
+store i64 %r120, i64* %r119
+%r121 = lshr i384 %r117, 64
+%r123 = getelementptr i64, i64* %r1, i32 1
+%r124 = trunc i384 %r121 to i64
+store i64 %r124, i64* %r123
+%r125 = lshr i384 %r121, 64
+%r127 = getelementptr i64, i64* %r1, i32 2
+%r128 = trunc i384 %r125 to i64
+store i64 %r128, i64* %r127
+%r129 = lshr i384 %r125, 64
+%r131 = getelementptr i64, i64* %r1, i32 3
+%r132 = trunc i384 %r129 to i64
+store i64 %r132, i64* %r131
+%r133 = lshr i384 %r129, 64
+%r135 = getelementptr i64, i64* %r1, i32 4
+%r136 = trunc i384 %r133 to i64
+store i64 %r136, i64* %r135
+%r137 = lshr i384 %r133, 64
+%r139 = getelementptr i64, i64* %r1, i32 5
+%r140 = trunc i384 %r137 to i64
+store i64 %r140, i64* %r139
 ret void
 }
-define void @mcl_fpDbl_add4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_sub6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -3239,8644 +4337,253 @@ define void @mcl_fpDbl_add4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r38 = zext i64 %r37 to i384
 %r39 = shl i384 %r38, 320
 %r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = load i64, i64* %r3
-%r56 = zext i64 %r55 to i128
-%r58 = getelementptr i64, i64* %r3, i32 1
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
 %r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i128
-%r61 = shl i128 %r60, 64
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i192
-%r65 = getelementptr i64, i64* %r3, i32 2
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
 %r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i192
-%r68 = shl i192 %r67, 128
-%r69 = or i192 %r63, %r68
-%r70 = zext i192 %r69 to i256
-%r72 = getelementptr i64, i64* %r3, i32 3
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
 %r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i256
-%r75 = shl i256 %r74, 192
-%r76 = or i256 %r70, %r75
-%r77 = zext i256 %r76 to i320
-%r79 = getelementptr i64, i64* %r3, i32 4
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i320
-%r82 = shl i320 %r81, 256
-%r83 = or i320 %r77, %r82
-%r84 = zext i320 %r83 to i384
-%r86 = getelementptr i64, i64* %r3, i32 5
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i384
-%r89 = shl i384 %r88, 320
-%r90 = or i384 %r84, %r89
-%r91 = zext i384 %r90 to i448
-%r93 = getelementptr i64, i64* %r3, i32 6
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i448
-%r96 = shl i448 %r95, 384
-%r97 = or i448 %r91, %r96
-%r98 = zext i448 %r97 to i512
-%r100 = getelementptr i64, i64* %r3, i32 7
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i512
-%r103 = shl i512 %r102, 448
-%r104 = or i512 %r98, %r103
-%r105 = zext i512 %r54 to i576
-%r106 = zext i512 %r104 to i576
-%r107 = add i576 %r105, %r106
-%r108 = trunc i576 %r107 to i256
-%r109 = trunc i256 %r108 to i64
-%r111 = getelementptr i64, i64* %r1, i32 0
-store i64 %r109, i64* %r111
-%r112 = lshr i256 %r108, 64
-%r113 = trunc i256 %r112 to i64
-%r115 = getelementptr i64, i64* %r1, i32 1
-store i64 %r113, i64* %r115
-%r116 = lshr i256 %r112, 64
-%r117 = trunc i256 %r116 to i64
-%r119 = getelementptr i64, i64* %r1, i32 2
-store i64 %r117, i64* %r119
-%r120 = lshr i256 %r116, 64
-%r121 = trunc i256 %r120 to i64
-%r123 = getelementptr i64, i64* %r1, i32 3
-store i64 %r121, i64* %r123
-%r124 = lshr i576 %r107, 256
-%r125 = trunc i576 %r124 to i320
-%r126 = load i64, i64* %r4
-%r127 = zext i64 %r126 to i128
-%r129 = getelementptr i64, i64* %r4, i32 1
-%r130 = load i64, i64* %r129
-%r131 = zext i64 %r130 to i128
-%r132 = shl i128 %r131, 64
-%r133 = or i128 %r127, %r132
-%r134 = zext i128 %r133 to i192
-%r136 = getelementptr i64, i64* %r4, i32 2
-%r137 = load i64, i64* %r136
-%r138 = zext i64 %r137 to i192
-%r139 = shl i192 %r138, 128
-%r140 = or i192 %r134, %r139
-%r141 = zext i192 %r140 to i256
-%r143 = getelementptr i64, i64* %r4, i32 3
-%r144 = load i64, i64* %r143
-%r145 = zext i64 %r144 to i256
-%r146 = shl i256 %r145, 192
-%r147 = or i256 %r141, %r146
-%r148 = zext i256 %r147 to i320
-%r149 = sub i320 %r125, %r148
-%r150 = lshr i320 %r149, 256
-%r151 = trunc i320 %r150 to i1
-%r152 = select i1 %r151, i320 %r125, i320 %r149
-%r153 = trunc i320 %r152 to i256
-%r155 = getelementptr i64, i64* %r1, i32 4
-%r156 = trunc i256 %r153 to i64
-%r158 = getelementptr i64, i64* %r155, i32 0
-store i64 %r156, i64* %r158
-%r159 = lshr i256 %r153, 64
-%r160 = trunc i256 %r159 to i64
-%r162 = getelementptr i64, i64* %r155, i32 1
-store i64 %r160, i64* %r162
-%r163 = lshr i256 %r159, 64
-%r164 = trunc i256 %r163 to i64
-%r166 = getelementptr i64, i64* %r155, i32 2
-store i64 %r164, i64* %r166
-%r167 = lshr i256 %r163, 64
-%r168 = trunc i256 %r167 to i64
-%r170 = getelementptr i64, i64* %r155, i32 3
-store i64 %r168, i64* %r170
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = zext i384 %r40 to i448
+%r78 = zext i384 %r76 to i448
+%r79 = sub i448 %r77, %r78
+%r80 = trunc i448 %r79 to i384
+%r81 = lshr i448 %r79, 384
+%r82 = trunc i448 %r81 to i1
+%r84 = getelementptr i64, i64* %r1, i32 0
+%r85 = trunc i384 %r80 to i64
+store i64 %r85, i64* %r84
+%r86 = lshr i384 %r80, 64
+%r88 = getelementptr i64, i64* %r1, i32 1
+%r89 = trunc i384 %r86 to i64
+store i64 %r89, i64* %r88
+%r90 = lshr i384 %r86, 64
+%r92 = getelementptr i64, i64* %r1, i32 2
+%r93 = trunc i384 %r90 to i64
+store i64 %r93, i64* %r92
+%r94 = lshr i384 %r90, 64
+%r96 = getelementptr i64, i64* %r1, i32 3
+%r97 = trunc i384 %r94 to i64
+store i64 %r97, i64* %r96
+%r98 = lshr i384 %r94, 64
+%r100 = getelementptr i64, i64* %r1, i32 4
+%r101 = trunc i384 %r98 to i64
+store i64 %r101, i64* %r100
+%r102 = lshr i384 %r98, 64
+%r104 = getelementptr i64, i64* %r1, i32 5
+%r105 = trunc i384 %r102 to i64
+store i64 %r105, i64* %r104
+br i1%r82, label %carry, label %nocarry
+nocarry:
+ret void
+carry:
+%r106 = load i64, i64* %r4
+%r107 = zext i64 %r106 to i128
+%r109 = getelementptr i64, i64* %r4, i32 1
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i128
+%r112 = shl i128 %r111, 64
+%r113 = or i128 %r107, %r112
+%r114 = zext i128 %r113 to i192
+%r116 = getelementptr i64, i64* %r4, i32 2
+%r117 = load i64, i64* %r116
+%r118 = zext i64 %r117 to i192
+%r119 = shl i192 %r118, 128
+%r120 = or i192 %r114, %r119
+%r121 = zext i192 %r120 to i256
+%r123 = getelementptr i64, i64* %r4, i32 3
+%r124 = load i64, i64* %r123
+%r125 = zext i64 %r124 to i256
+%r126 = shl i256 %r125, 192
+%r127 = or i256 %r121, %r126
+%r128 = zext i256 %r127 to i320
+%r130 = getelementptr i64, i64* %r4, i32 4
+%r131 = load i64, i64* %r130
+%r132 = zext i64 %r131 to i320
+%r133 = shl i320 %r132, 256
+%r134 = or i320 %r128, %r133
+%r135 = zext i320 %r134 to i384
+%r137 = getelementptr i64, i64* %r4, i32 5
+%r138 = load i64, i64* %r137
+%r139 = zext i64 %r138 to i384
+%r140 = shl i384 %r139, 320
+%r141 = or i384 %r135, %r140
+%r142 = add i384 %r80, %r141
+%r144 = getelementptr i64, i64* %r1, i32 0
+%r145 = trunc i384 %r142 to i64
+store i64 %r145, i64* %r144
+%r146 = lshr i384 %r142, 64
+%r148 = getelementptr i64, i64* %r1, i32 1
+%r149 = trunc i384 %r146 to i64
+store i64 %r149, i64* %r148
+%r150 = lshr i384 %r146, 64
+%r152 = getelementptr i64, i64* %r1, i32 2
+%r153 = trunc i384 %r150 to i64
+store i64 %r153, i64* %r152
+%r154 = lshr i384 %r150, 64
+%r156 = getelementptr i64, i64* %r1, i32 3
+%r157 = trunc i384 %r154 to i64
+store i64 %r157, i64* %r156
+%r158 = lshr i384 %r154, 64
+%r160 = getelementptr i64, i64* %r1, i32 4
+%r161 = trunc i384 %r158 to i64
+store i64 %r161, i64* %r160
+%r162 = lshr i384 %r158, 64
+%r164 = getelementptr i64, i64* %r1, i32 5
+%r165 = trunc i384 %r162 to i64
+store i64 %r165, i64* %r164
 ret void
 }
-define void @mcl_fpDbl_sub4L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = load i64, i64* %r3
-%r56 = zext i64 %r55 to i128
-%r58 = getelementptr i64, i64* %r3, i32 1
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i128
-%r61 = shl i128 %r60, 64
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i192
-%r65 = getelementptr i64, i64* %r3, i32 2
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i192
-%r68 = shl i192 %r67, 128
-%r69 = or i192 %r63, %r68
-%r70 = zext i192 %r69 to i256
-%r72 = getelementptr i64, i64* %r3, i32 3
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i256
-%r75 = shl i256 %r74, 192
-%r76 = or i256 %r70, %r75
-%r77 = zext i256 %r76 to i320
-%r79 = getelementptr i64, i64* %r3, i32 4
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i320
-%r82 = shl i320 %r81, 256
-%r83 = or i320 %r77, %r82
-%r84 = zext i320 %r83 to i384
-%r86 = getelementptr i64, i64* %r3, i32 5
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i384
-%r89 = shl i384 %r88, 320
-%r90 = or i384 %r84, %r89
-%r91 = zext i384 %r90 to i448
-%r93 = getelementptr i64, i64* %r3, i32 6
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i448
-%r96 = shl i448 %r95, 384
-%r97 = or i448 %r91, %r96
-%r98 = zext i448 %r97 to i512
-%r100 = getelementptr i64, i64* %r3, i32 7
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i512
-%r103 = shl i512 %r102, 448
-%r104 = or i512 %r98, %r103
-%r105 = zext i512 %r54 to i576
-%r106 = zext i512 %r104 to i576
-%r107 = sub i576 %r105, %r106
-%r108 = trunc i576 %r107 to i256
-%r109 = trunc i256 %r108 to i64
-%r111 = getelementptr i64, i64* %r1, i32 0
-store i64 %r109, i64* %r111
-%r112 = lshr i256 %r108, 64
-%r113 = trunc i256 %r112 to i64
-%r115 = getelementptr i64, i64* %r1, i32 1
-store i64 %r113, i64* %r115
-%r116 = lshr i256 %r112, 64
-%r117 = trunc i256 %r116 to i64
-%r119 = getelementptr i64, i64* %r1, i32 2
-store i64 %r117, i64* %r119
-%r120 = lshr i256 %r116, 64
-%r121 = trunc i256 %r120 to i64
-%r123 = getelementptr i64, i64* %r1, i32 3
-store i64 %r121, i64* %r123
-%r124 = lshr i576 %r107, 256
-%r125 = trunc i576 %r124 to i256
-%r126 = lshr i576 %r107, 512
-%r127 = trunc i576 %r126 to i1
-%r128 = load i64, i64* %r4
-%r129 = zext i64 %r128 to i128
-%r131 = getelementptr i64, i64* %r4, i32 1
-%r132 = load i64, i64* %r131
-%r133 = zext i64 %r132 to i128
-%r134 = shl i128 %r133, 64
-%r135 = or i128 %r129, %r134
-%r136 = zext i128 %r135 to i192
-%r138 = getelementptr i64, i64* %r4, i32 2
-%r139 = load i64, i64* %r138
-%r140 = zext i64 %r139 to i192
-%r141 = shl i192 %r140, 128
-%r142 = or i192 %r136, %r141
-%r143 = zext i192 %r142 to i256
-%r145 = getelementptr i64, i64* %r4, i32 3
-%r146 = load i64, i64* %r145
-%r147 = zext i64 %r146 to i256
-%r148 = shl i256 %r147, 192
-%r149 = or i256 %r143, %r148
-%r151 = select i1 %r127, i256 %r149, i256 0
-%r152 = add i256 %r125, %r151
-%r154 = getelementptr i64, i64* %r1, i32 4
-%r155 = trunc i256 %r152 to i64
-%r157 = getelementptr i64, i64* %r154, i32 0
-store i64 %r155, i64* %r157
-%r158 = lshr i256 %r152, 64
-%r159 = trunc i256 %r158 to i64
-%r161 = getelementptr i64, i64* %r154, i32 1
-store i64 %r159, i64* %r161
-%r162 = lshr i256 %r158, 64
-%r163 = trunc i256 %r162 to i64
-%r165 = getelementptr i64, i64* %r154, i32 2
-store i64 %r163, i64* %r165
-%r166 = lshr i256 %r162, 64
-%r167 = trunc i256 %r166 to i64
-%r169 = getelementptr i64, i64* %r154, i32 3
-store i64 %r167, i64* %r169
-ret void
-}
-define i384 @mulPv320x64(i64* noalias  %r2, i64 %r3)
-{
-%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
-%r6 = trunc i128 %r5 to i64
-%r7 = call i64 @extractHigh64(i128 %r5)
-%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
-%r10 = trunc i128 %r9 to i64
-%r11 = call i64 @extractHigh64(i128 %r9)
-%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
-%r14 = trunc i128 %r13 to i64
-%r15 = call i64 @extractHigh64(i128 %r13)
-%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
-%r18 = trunc i128 %r17 to i64
-%r19 = call i64 @extractHigh64(i128 %r17)
-%r21 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 4)
-%r22 = trunc i128 %r21 to i64
-%r23 = call i64 @extractHigh64(i128 %r21)
-%r24 = zext i64 %r6 to i128
-%r25 = zext i64 %r10 to i128
-%r26 = shl i128 %r25, 64
-%r27 = or i128 %r24, %r26
-%r28 = zext i128 %r27 to i192
-%r29 = zext i64 %r14 to i192
-%r30 = shl i192 %r29, 128
-%r31 = or i192 %r28, %r30
-%r32 = zext i192 %r31 to i256
-%r33 = zext i64 %r18 to i256
-%r34 = shl i256 %r33, 192
-%r35 = or i256 %r32, %r34
-%r36 = zext i256 %r35 to i320
-%r37 = zext i64 %r22 to i320
-%r38 = shl i320 %r37, 256
-%r39 = or i320 %r36, %r38
-%r40 = zext i64 %r7 to i128
-%r41 = zext i64 %r11 to i128
-%r42 = shl i128 %r41, 64
-%r43 = or i128 %r40, %r42
-%r44 = zext i128 %r43 to i192
-%r45 = zext i64 %r15 to i192
-%r46 = shl i192 %r45, 128
-%r47 = or i192 %r44, %r46
-%r48 = zext i192 %r47 to i256
-%r49 = zext i64 %r19 to i256
-%r50 = shl i256 %r49, 192
-%r51 = or i256 %r48, %r50
-%r52 = zext i256 %r51 to i320
-%r53 = zext i64 %r23 to i320
-%r54 = shl i320 %r53, 256
-%r55 = or i320 %r52, %r54
-%r56 = zext i320 %r39 to i384
-%r57 = zext i320 %r55 to i384
-%r58 = shl i384 %r57, 64
-%r59 = add i384 %r56, %r58
-ret i384 %r59
-}
-define void @mcl_fp_mulUnitPre5L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
-{
-%r4 = call i384 @mulPv320x64(i64* %r2, i64 %r3)
-%r5 = trunc i384 %r4 to i64
-%r7 = getelementptr i64, i64* %r1, i32 0
-store i64 %r5, i64* %r7
-%r8 = lshr i384 %r4, 64
-%r9 = trunc i384 %r8 to i64
-%r11 = getelementptr i64, i64* %r1, i32 1
-store i64 %r9, i64* %r11
-%r12 = lshr i384 %r8, 64
-%r13 = trunc i384 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 2
-store i64 %r13, i64* %r15
-%r16 = lshr i384 %r12, 64
-%r17 = trunc i384 %r16 to i64
-%r19 = getelementptr i64, i64* %r1, i32 3
-store i64 %r17, i64* %r19
-%r20 = lshr i384 %r16, 64
-%r21 = trunc i384 %r20 to i64
-%r23 = getelementptr i64, i64* %r1, i32 4
-store i64 %r21, i64* %r23
-%r24 = lshr i384 %r20, 64
-%r25 = trunc i384 %r24 to i64
-%r27 = getelementptr i64, i64* %r1, i32 5
-store i64 %r25, i64* %r27
-ret void
-}
-define void @mcl_fpDbl_mulPre5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
-{
-%r4 = load i64, i64* %r3
-%r5 = call i384 @mulPv320x64(i64* %r2, i64 %r4)
-%r6 = trunc i384 %r5 to i64
-store i64 %r6, i64* %r1
-%r7 = lshr i384 %r5, 64
-%r9 = getelementptr i64, i64* %r3, i32 1
-%r10 = load i64, i64* %r9
-%r11 = call i384 @mulPv320x64(i64* %r2, i64 %r10)
-%r12 = add i384 %r7, %r11
-%r13 = trunc i384 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 1
-store i64 %r13, i64* %r15
-%r16 = lshr i384 %r12, 64
-%r18 = getelementptr i64, i64* %r3, i32 2
-%r19 = load i64, i64* %r18
-%r20 = call i384 @mulPv320x64(i64* %r2, i64 %r19)
-%r21 = add i384 %r16, %r20
-%r22 = trunc i384 %r21 to i64
-%r24 = getelementptr i64, i64* %r1, i32 2
-store i64 %r22, i64* %r24
-%r25 = lshr i384 %r21, 64
-%r27 = getelementptr i64, i64* %r3, i32 3
-%r28 = load i64, i64* %r27
-%r29 = call i384 @mulPv320x64(i64* %r2, i64 %r28)
-%r30 = add i384 %r25, %r29
-%r31 = trunc i384 %r30 to i64
-%r33 = getelementptr i64, i64* %r1, i32 3
-store i64 %r31, i64* %r33
-%r34 = lshr i384 %r30, 64
-%r36 = getelementptr i64, i64* %r3, i32 4
-%r37 = load i64, i64* %r36
-%r38 = call i384 @mulPv320x64(i64* %r2, i64 %r37)
-%r39 = add i384 %r34, %r38
-%r41 = getelementptr i64, i64* %r1, i32 4
-%r42 = trunc i384 %r39 to i64
-%r44 = getelementptr i64, i64* %r41, i32 0
-store i64 %r42, i64* %r44
-%r45 = lshr i384 %r39, 64
-%r46 = trunc i384 %r45 to i64
-%r48 = getelementptr i64, i64* %r41, i32 1
-store i64 %r46, i64* %r48
-%r49 = lshr i384 %r45, 64
-%r50 = trunc i384 %r49 to i64
-%r52 = getelementptr i64, i64* %r41, i32 2
-store i64 %r50, i64* %r52
-%r53 = lshr i384 %r49, 64
-%r54 = trunc i384 %r53 to i64
-%r56 = getelementptr i64, i64* %r41, i32 3
-store i64 %r54, i64* %r56
-%r57 = lshr i384 %r53, 64
-%r58 = trunc i384 %r57 to i64
-%r60 = getelementptr i64, i64* %r41, i32 4
-store i64 %r58, i64* %r60
-%r61 = lshr i384 %r57, 64
-%r62 = trunc i384 %r61 to i64
-%r64 = getelementptr i64, i64* %r41, i32 5
-store i64 %r62, i64* %r64
-ret void
-}
-define void @mcl_fpDbl_sqrPre5L(i64* noalias  %r1, i64* noalias  %r2)
-{
-%r3 = load i64, i64* %r2
-%r4 = call i384 @mulPv320x64(i64* %r2, i64 %r3)
-%r5 = trunc i384 %r4 to i64
-store i64 %r5, i64* %r1
-%r6 = lshr i384 %r4, 64
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = call i384 @mulPv320x64(i64* %r2, i64 %r9)
-%r11 = add i384 %r6, %r10
-%r12 = trunc i384 %r11 to i64
-%r14 = getelementptr i64, i64* %r1, i32 1
-store i64 %r12, i64* %r14
-%r15 = lshr i384 %r11, 64
-%r17 = getelementptr i64, i64* %r2, i32 2
-%r18 = load i64, i64* %r17
-%r19 = call i384 @mulPv320x64(i64* %r2, i64 %r18)
-%r20 = add i384 %r15, %r19
-%r21 = trunc i384 %r20 to i64
-%r23 = getelementptr i64, i64* %r1, i32 2
-store i64 %r21, i64* %r23
-%r24 = lshr i384 %r20, 64
-%r26 = getelementptr i64, i64* %r2, i32 3
-%r27 = load i64, i64* %r26
-%r28 = call i384 @mulPv320x64(i64* %r2, i64 %r27)
-%r29 = add i384 %r24, %r28
-%r30 = trunc i384 %r29 to i64
-%r32 = getelementptr i64, i64* %r1, i32 3
-store i64 %r30, i64* %r32
-%r33 = lshr i384 %r29, 64
-%r35 = getelementptr i64, i64* %r2, i32 4
-%r36 = load i64, i64* %r35
-%r37 = call i384 @mulPv320x64(i64* %r2, i64 %r36)
-%r38 = add i384 %r33, %r37
-%r40 = getelementptr i64, i64* %r1, i32 4
-%r41 = trunc i384 %r38 to i64
-%r43 = getelementptr i64, i64* %r40, i32 0
-store i64 %r41, i64* %r43
-%r44 = lshr i384 %r38, 64
-%r45 = trunc i384 %r44 to i64
-%r47 = getelementptr i64, i64* %r40, i32 1
-store i64 %r45, i64* %r47
-%r48 = lshr i384 %r44, 64
-%r49 = trunc i384 %r48 to i64
-%r51 = getelementptr i64, i64* %r40, i32 2
-store i64 %r49, i64* %r51
-%r52 = lshr i384 %r48, 64
-%r53 = trunc i384 %r52 to i64
-%r55 = getelementptr i64, i64* %r40, i32 3
-store i64 %r53, i64* %r55
-%r56 = lshr i384 %r52, 64
-%r57 = trunc i384 %r56 to i64
-%r59 = getelementptr i64, i64* %r40, i32 4
-store i64 %r57, i64* %r59
-%r60 = lshr i384 %r56, 64
-%r61 = trunc i384 %r60 to i64
-%r63 = getelementptr i64, i64* %r40, i32 5
-store i64 %r61, i64* %r63
-ret void
-}
-define void @mcl_fp_mont5L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
-{
-%r6 = getelementptr i64, i64* %r4, i32 -1
-%r7 = load i64, i64* %r6
-%r9 = getelementptr i64, i64* %r3, i32 0
-%r10 = load i64, i64* %r9
-%r11 = call i384 @mulPv320x64(i64* %r2, i64 %r10)
-%r12 = zext i384 %r11 to i448
-%r13 = trunc i384 %r11 to i64
-%r14 = mul i64 %r13, %r7
-%r15 = call i384 @mulPv320x64(i64* %r4, i64 %r14)
-%r16 = zext i384 %r15 to i448
-%r17 = add i448 %r12, %r16
-%r18 = lshr i448 %r17, 64
-%r20 = getelementptr i64, i64* %r3, i32 1
-%r21 = load i64, i64* %r20
-%r22 = call i384 @mulPv320x64(i64* %r2, i64 %r21)
-%r23 = zext i384 %r22 to i448
-%r24 = add i448 %r18, %r23
-%r25 = trunc i448 %r24 to i64
-%r26 = mul i64 %r25, %r7
-%r27 = call i384 @mulPv320x64(i64* %r4, i64 %r26)
-%r28 = zext i384 %r27 to i448
-%r29 = add i448 %r24, %r28
-%r30 = lshr i448 %r29, 64
-%r32 = getelementptr i64, i64* %r3, i32 2
-%r33 = load i64, i64* %r32
-%r34 = call i384 @mulPv320x64(i64* %r2, i64 %r33)
-%r35 = zext i384 %r34 to i448
-%r36 = add i448 %r30, %r35
-%r37 = trunc i448 %r36 to i64
-%r38 = mul i64 %r37, %r7
-%r39 = call i384 @mulPv320x64(i64* %r4, i64 %r38)
-%r40 = zext i384 %r39 to i448
-%r41 = add i448 %r36, %r40
-%r42 = lshr i448 %r41, 64
-%r44 = getelementptr i64, i64* %r3, i32 3
-%r45 = load i64, i64* %r44
-%r46 = call i384 @mulPv320x64(i64* %r2, i64 %r45)
-%r47 = zext i384 %r46 to i448
-%r48 = add i448 %r42, %r47
-%r49 = trunc i448 %r48 to i64
-%r50 = mul i64 %r49, %r7
-%r51 = call i384 @mulPv320x64(i64* %r4, i64 %r50)
-%r52 = zext i384 %r51 to i448
-%r53 = add i448 %r48, %r52
-%r54 = lshr i448 %r53, 64
-%r56 = getelementptr i64, i64* %r3, i32 4
-%r57 = load i64, i64* %r56
-%r58 = call i384 @mulPv320x64(i64* %r2, i64 %r57)
-%r59 = zext i384 %r58 to i448
-%r60 = add i448 %r54, %r59
-%r61 = trunc i448 %r60 to i64
-%r62 = mul i64 %r61, %r7
-%r63 = call i384 @mulPv320x64(i64* %r4, i64 %r62)
-%r64 = zext i384 %r63 to i448
-%r65 = add i448 %r60, %r64
-%r66 = lshr i448 %r65, 64
-%r67 = trunc i448 %r66 to i384
-%r68 = load i64, i64* %r4
-%r69 = zext i64 %r68 to i128
-%r71 = getelementptr i64, i64* %r4, i32 1
-%r72 = load i64, i64* %r71
-%r73 = zext i64 %r72 to i128
-%r74 = shl i128 %r73, 64
-%r75 = or i128 %r69, %r74
-%r76 = zext i128 %r75 to i192
-%r78 = getelementptr i64, i64* %r4, i32 2
-%r79 = load i64, i64* %r78
-%r80 = zext i64 %r79 to i192
-%r81 = shl i192 %r80, 128
-%r82 = or i192 %r76, %r81
-%r83 = zext i192 %r82 to i256
-%r85 = getelementptr i64, i64* %r4, i32 3
-%r86 = load i64, i64* %r85
-%r87 = zext i64 %r86 to i256
-%r88 = shl i256 %r87, 192
-%r89 = or i256 %r83, %r88
-%r90 = zext i256 %r89 to i320
-%r92 = getelementptr i64, i64* %r4, i32 4
-%r93 = load i64, i64* %r92
-%r94 = zext i64 %r93 to i320
-%r95 = shl i320 %r94, 256
-%r96 = or i320 %r90, %r95
-%r97 = zext i320 %r96 to i384
-%r98 = sub i384 %r67, %r97
-%r99 = lshr i384 %r98, 320
-%r100 = trunc i384 %r99 to i1
-%r101 = select i1 %r100, i384 %r67, i384 %r98
-%r102 = trunc i384 %r101 to i320
-%r103 = trunc i320 %r102 to i64
-%r105 = getelementptr i64, i64* %r1, i32 0
-store i64 %r103, i64* %r105
-%r106 = lshr i320 %r102, 64
-%r107 = trunc i320 %r106 to i64
-%r109 = getelementptr i64, i64* %r1, i32 1
-store i64 %r107, i64* %r109
-%r110 = lshr i320 %r106, 64
-%r111 = trunc i320 %r110 to i64
-%r113 = getelementptr i64, i64* %r1, i32 2
-store i64 %r111, i64* %r113
-%r114 = lshr i320 %r110, 64
-%r115 = trunc i320 %r114 to i64
-%r117 = getelementptr i64, i64* %r1, i32 3
-store i64 %r115, i64* %r117
-%r118 = lshr i320 %r114, 64
-%r119 = trunc i320 %r118 to i64
-%r121 = getelementptr i64, i64* %r1, i32 4
-store i64 %r119, i64* %r121
-ret void
-}
-define void @mcl_fp_montNF5L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
-{
-%r6 = getelementptr i64, i64* %r4, i32 -1
-%r7 = load i64, i64* %r6
-%r8 = load i64, i64* %r3
-%r9 = call i384 @mulPv320x64(i64* %r2, i64 %r8)
-%r10 = trunc i384 %r9 to i64
-%r11 = mul i64 %r10, %r7
-%r12 = call i384 @mulPv320x64(i64* %r4, i64 %r11)
-%r13 = add i384 %r9, %r12
-%r14 = lshr i384 %r13, 64
-%r16 = getelementptr i64, i64* %r3, i32 1
-%r17 = load i64, i64* %r16
-%r18 = call i384 @mulPv320x64(i64* %r2, i64 %r17)
-%r19 = add i384 %r14, %r18
-%r20 = trunc i384 %r19 to i64
-%r21 = mul i64 %r20, %r7
-%r22 = call i384 @mulPv320x64(i64* %r4, i64 %r21)
-%r23 = add i384 %r19, %r22
-%r24 = lshr i384 %r23, 64
-%r26 = getelementptr i64, i64* %r3, i32 2
-%r27 = load i64, i64* %r26
-%r28 = call i384 @mulPv320x64(i64* %r2, i64 %r27)
-%r29 = add i384 %r24, %r28
-%r30 = trunc i384 %r29 to i64
-%r31 = mul i64 %r30, %r7
-%r32 = call i384 @mulPv320x64(i64* %r4, i64 %r31)
-%r33 = add i384 %r29, %r32
-%r34 = lshr i384 %r33, 64
-%r36 = getelementptr i64, i64* %r3, i32 3
-%r37 = load i64, i64* %r36
-%r38 = call i384 @mulPv320x64(i64* %r2, i64 %r37)
-%r39 = add i384 %r34, %r38
-%r40 = trunc i384 %r39 to i64
-%r41 = mul i64 %r40, %r7
-%r42 = call i384 @mulPv320x64(i64* %r4, i64 %r41)
-%r43 = add i384 %r39, %r42
-%r44 = lshr i384 %r43, 64
-%r46 = getelementptr i64, i64* %r3, i32 4
-%r47 = load i64, i64* %r46
-%r48 = call i384 @mulPv320x64(i64* %r2, i64 %r47)
-%r49 = add i384 %r44, %r48
-%r50 = trunc i384 %r49 to i64
-%r51 = mul i64 %r50, %r7
-%r52 = call i384 @mulPv320x64(i64* %r4, i64 %r51)
-%r53 = add i384 %r49, %r52
-%r54 = lshr i384 %r53, 64
-%r55 = trunc i384 %r54 to i320
-%r56 = load i64, i64* %r4
-%r57 = zext i64 %r56 to i128
-%r59 = getelementptr i64, i64* %r4, i32 1
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i128
-%r62 = shl i128 %r61, 64
-%r63 = or i128 %r57, %r62
-%r64 = zext i128 %r63 to i192
-%r66 = getelementptr i64, i64* %r4, i32 2
-%r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i192
-%r69 = shl i192 %r68, 128
-%r70 = or i192 %r64, %r69
-%r71 = zext i192 %r70 to i256
-%r73 = getelementptr i64, i64* %r4, i32 3
-%r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i256
-%r76 = shl i256 %r75, 192
-%r77 = or i256 %r71, %r76
-%r78 = zext i256 %r77 to i320
-%r80 = getelementptr i64, i64* %r4, i32 4
-%r81 = load i64, i64* %r80
-%r82 = zext i64 %r81 to i320
-%r83 = shl i320 %r82, 256
-%r84 = or i320 %r78, %r83
-%r85 = sub i320 %r55, %r84
-%r86 = lshr i320 %r85, 319
-%r87 = trunc i320 %r86 to i1
-%r88 = select i1 %r87, i320 %r55, i320 %r85
-%r89 = trunc i320 %r88 to i64
-%r91 = getelementptr i64, i64* %r1, i32 0
-store i64 %r89, i64* %r91
-%r92 = lshr i320 %r88, 64
-%r93 = trunc i320 %r92 to i64
-%r95 = getelementptr i64, i64* %r1, i32 1
-store i64 %r93, i64* %r95
-%r96 = lshr i320 %r92, 64
-%r97 = trunc i320 %r96 to i64
-%r99 = getelementptr i64, i64* %r1, i32 2
-store i64 %r97, i64* %r99
-%r100 = lshr i320 %r96, 64
-%r101 = trunc i320 %r100 to i64
-%r103 = getelementptr i64, i64* %r1, i32 3
-store i64 %r101, i64* %r103
-%r104 = lshr i320 %r100, 64
-%r105 = trunc i320 %r104 to i64
-%r107 = getelementptr i64, i64* %r1, i32 4
-store i64 %r105, i64* %r107
-ret void
-}
-define void @mcl_fp_montRed5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
-{
-%r5 = getelementptr i64, i64* %r3, i32 -1
-%r6 = load i64, i64* %r5
-%r7 = load i64, i64* %r3
-%r8 = zext i64 %r7 to i128
-%r10 = getelementptr i64, i64* %r3, i32 1
-%r11 = load i64, i64* %r10
-%r12 = zext i64 %r11 to i128
-%r13 = shl i128 %r12, 64
-%r14 = or i128 %r8, %r13
-%r15 = zext i128 %r14 to i192
-%r17 = getelementptr i64, i64* %r3, i32 2
-%r18 = load i64, i64* %r17
-%r19 = zext i64 %r18 to i192
-%r20 = shl i192 %r19, 128
-%r21 = or i192 %r15, %r20
-%r22 = zext i192 %r21 to i256
-%r24 = getelementptr i64, i64* %r3, i32 3
-%r25 = load i64, i64* %r24
-%r26 = zext i64 %r25 to i256
-%r27 = shl i256 %r26, 192
-%r28 = or i256 %r22, %r27
-%r29 = zext i256 %r28 to i320
-%r31 = getelementptr i64, i64* %r3, i32 4
-%r32 = load i64, i64* %r31
-%r33 = zext i64 %r32 to i320
-%r34 = shl i320 %r33, 256
-%r35 = or i320 %r29, %r34
-%r36 = load i64, i64* %r2
-%r37 = zext i64 %r36 to i128
-%r39 = getelementptr i64, i64* %r2, i32 1
-%r40 = load i64, i64* %r39
-%r41 = zext i64 %r40 to i128
-%r42 = shl i128 %r41, 64
-%r43 = or i128 %r37, %r42
-%r44 = zext i128 %r43 to i192
-%r46 = getelementptr i64, i64* %r2, i32 2
-%r47 = load i64, i64* %r46
-%r48 = zext i64 %r47 to i192
-%r49 = shl i192 %r48, 128
-%r50 = or i192 %r44, %r49
-%r51 = zext i192 %r50 to i256
-%r53 = getelementptr i64, i64* %r2, i32 3
-%r54 = load i64, i64* %r53
-%r55 = zext i64 %r54 to i256
-%r56 = shl i256 %r55, 192
-%r57 = or i256 %r51, %r56
-%r58 = zext i256 %r57 to i320
-%r60 = getelementptr i64, i64* %r2, i32 4
-%r61 = load i64, i64* %r60
-%r62 = zext i64 %r61 to i320
-%r63 = shl i320 %r62, 256
-%r64 = or i320 %r58, %r63
-%r65 = zext i320 %r64 to i384
-%r67 = getelementptr i64, i64* %r2, i32 5
-%r68 = load i64, i64* %r67
-%r69 = zext i64 %r68 to i384
-%r70 = shl i384 %r69, 320
-%r71 = or i384 %r65, %r70
-%r72 = zext i384 %r71 to i448
-%r74 = getelementptr i64, i64* %r2, i32 6
-%r75 = load i64, i64* %r74
-%r76 = zext i64 %r75 to i448
-%r77 = shl i448 %r76, 384
-%r78 = or i448 %r72, %r77
-%r79 = zext i448 %r78 to i512
-%r81 = getelementptr i64, i64* %r2, i32 7
-%r82 = load i64, i64* %r81
-%r83 = zext i64 %r82 to i512
-%r84 = shl i512 %r83, 448
-%r85 = or i512 %r79, %r84
-%r86 = zext i512 %r85 to i576
-%r88 = getelementptr i64, i64* %r2, i32 8
-%r89 = load i64, i64* %r88
-%r90 = zext i64 %r89 to i576
-%r91 = shl i576 %r90, 512
-%r92 = or i576 %r86, %r91
-%r93 = zext i576 %r92 to i640
-%r95 = getelementptr i64, i64* %r2, i32 9
-%r96 = load i64, i64* %r95
-%r97 = zext i64 %r96 to i640
-%r98 = shl i640 %r97, 576
-%r99 = or i640 %r93, %r98
-%r100 = zext i640 %r99 to i704
-%r101 = trunc i704 %r100 to i64
-%r102 = mul i64 %r101, %r6
-%r103 = call i384 @mulPv320x64(i64* %r3, i64 %r102)
-%r104 = zext i384 %r103 to i704
-%r105 = add i704 %r100, %r104
-%r106 = lshr i704 %r105, 64
-%r107 = trunc i704 %r106 to i640
-%r108 = trunc i640 %r107 to i64
-%r109 = mul i64 %r108, %r6
-%r110 = call i384 @mulPv320x64(i64* %r3, i64 %r109)
-%r111 = zext i384 %r110 to i640
-%r112 = add i640 %r107, %r111
-%r113 = lshr i640 %r112, 64
-%r114 = trunc i640 %r113 to i576
-%r115 = trunc i576 %r114 to i64
-%r116 = mul i64 %r115, %r6
-%r117 = call i384 @mulPv320x64(i64* %r3, i64 %r116)
-%r118 = zext i384 %r117 to i576
-%r119 = add i576 %r114, %r118
-%r120 = lshr i576 %r119, 64
-%r121 = trunc i576 %r120 to i512
-%r122 = trunc i512 %r121 to i64
-%r123 = mul i64 %r122, %r6
-%r124 = call i384 @mulPv320x64(i64* %r3, i64 %r123)
-%r125 = zext i384 %r124 to i512
-%r126 = add i512 %r121, %r125
-%r127 = lshr i512 %r126, 64
-%r128 = trunc i512 %r127 to i448
-%r129 = trunc i448 %r128 to i64
-%r130 = mul i64 %r129, %r6
-%r131 = call i384 @mulPv320x64(i64* %r3, i64 %r130)
-%r132 = zext i384 %r131 to i448
-%r133 = add i448 %r128, %r132
-%r134 = lshr i448 %r133, 64
-%r135 = trunc i448 %r134 to i384
-%r136 = zext i320 %r35 to i384
-%r137 = sub i384 %r135, %r136
-%r138 = lshr i384 %r137, 320
-%r139 = trunc i384 %r138 to i1
-%r140 = select i1 %r139, i384 %r135, i384 %r137
-%r141 = trunc i384 %r140 to i320
-%r142 = trunc i320 %r141 to i64
-%r144 = getelementptr i64, i64* %r1, i32 0
-store i64 %r142, i64* %r144
-%r145 = lshr i320 %r141, 64
-%r146 = trunc i320 %r145 to i64
-%r148 = getelementptr i64, i64* %r1, i32 1
-store i64 %r146, i64* %r148
-%r149 = lshr i320 %r145, 64
-%r150 = trunc i320 %r149 to i64
-%r152 = getelementptr i64, i64* %r1, i32 2
-store i64 %r150, i64* %r152
-%r153 = lshr i320 %r149, 64
-%r154 = trunc i320 %r153 to i64
-%r156 = getelementptr i64, i64* %r1, i32 3
-store i64 %r154, i64* %r156
-%r157 = lshr i320 %r153, 64
-%r158 = trunc i320 %r157 to i64
-%r160 = getelementptr i64, i64* %r1, i32 4
-store i64 %r158, i64* %r160
-ret void
-}
-define i64 @mcl_fp_addPre5L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r3, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r3, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r3, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r3, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r35 = load i64, i64* %r4
-%r36 = zext i64 %r35 to i128
-%r38 = getelementptr i64, i64* %r4, i32 1
-%r39 = load i64, i64* %r38
-%r40 = zext i64 %r39 to i128
-%r41 = shl i128 %r40, 64
-%r42 = or i128 %r36, %r41
-%r43 = zext i128 %r42 to i192
-%r45 = getelementptr i64, i64* %r4, i32 2
-%r46 = load i64, i64* %r45
-%r47 = zext i64 %r46 to i192
-%r48 = shl i192 %r47, 128
-%r49 = or i192 %r43, %r48
-%r50 = zext i192 %r49 to i256
-%r52 = getelementptr i64, i64* %r4, i32 3
-%r53 = load i64, i64* %r52
-%r54 = zext i64 %r53 to i256
-%r55 = shl i256 %r54, 192
-%r56 = or i256 %r50, %r55
-%r57 = zext i256 %r56 to i320
-%r59 = getelementptr i64, i64* %r4, i32 4
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i320
-%r62 = shl i320 %r61, 256
-%r63 = or i320 %r57, %r62
-%r64 = zext i320 %r63 to i384
-%r65 = add i384 %r34, %r64
-%r66 = trunc i384 %r65 to i320
-%r67 = trunc i320 %r66 to i64
-%r69 = getelementptr i64, i64* %r2, i32 0
-store i64 %r67, i64* %r69
-%r70 = lshr i320 %r66, 64
-%r71 = trunc i320 %r70 to i64
-%r73 = getelementptr i64, i64* %r2, i32 1
-store i64 %r71, i64* %r73
-%r74 = lshr i320 %r70, 64
-%r75 = trunc i320 %r74 to i64
-%r77 = getelementptr i64, i64* %r2, i32 2
-store i64 %r75, i64* %r77
-%r78 = lshr i320 %r74, 64
-%r79 = trunc i320 %r78 to i64
-%r81 = getelementptr i64, i64* %r2, i32 3
-store i64 %r79, i64* %r81
-%r82 = lshr i320 %r78, 64
-%r83 = trunc i320 %r82 to i64
-%r85 = getelementptr i64, i64* %r2, i32 4
-store i64 %r83, i64* %r85
-%r86 = lshr i384 %r65, 320
-%r87 = trunc i384 %r86 to i64
-ret i64 %r87
-}
-define i64 @mcl_fp_subPre5L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r3, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r3, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r3, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r3, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r35 = load i64, i64* %r4
-%r36 = zext i64 %r35 to i128
-%r38 = getelementptr i64, i64* %r4, i32 1
-%r39 = load i64, i64* %r38
-%r40 = zext i64 %r39 to i128
-%r41 = shl i128 %r40, 64
-%r42 = or i128 %r36, %r41
-%r43 = zext i128 %r42 to i192
-%r45 = getelementptr i64, i64* %r4, i32 2
-%r46 = load i64, i64* %r45
-%r47 = zext i64 %r46 to i192
-%r48 = shl i192 %r47, 128
-%r49 = or i192 %r43, %r48
-%r50 = zext i192 %r49 to i256
-%r52 = getelementptr i64, i64* %r4, i32 3
-%r53 = load i64, i64* %r52
-%r54 = zext i64 %r53 to i256
-%r55 = shl i256 %r54, 192
-%r56 = or i256 %r50, %r55
-%r57 = zext i256 %r56 to i320
-%r59 = getelementptr i64, i64* %r4, i32 4
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i320
-%r62 = shl i320 %r61, 256
-%r63 = or i320 %r57, %r62
-%r64 = zext i320 %r63 to i384
-%r65 = sub i384 %r34, %r64
-%r66 = trunc i384 %r65 to i320
-%r67 = trunc i320 %r66 to i64
-%r69 = getelementptr i64, i64* %r2, i32 0
-store i64 %r67, i64* %r69
-%r70 = lshr i320 %r66, 64
-%r71 = trunc i320 %r70 to i64
-%r73 = getelementptr i64, i64* %r2, i32 1
-store i64 %r71, i64* %r73
-%r74 = lshr i320 %r70, 64
-%r75 = trunc i320 %r74 to i64
-%r77 = getelementptr i64, i64* %r2, i32 2
-store i64 %r75, i64* %r77
-%r78 = lshr i320 %r74, 64
-%r79 = trunc i320 %r78 to i64
-%r81 = getelementptr i64, i64* %r2, i32 3
-store i64 %r79, i64* %r81
-%r82 = lshr i320 %r78, 64
-%r83 = trunc i320 %r82 to i64
-%r85 = getelementptr i64, i64* %r2, i32 4
-store i64 %r83, i64* %r85
-%r86 = lshr i384 %r65, 320
-%r87 = trunc i384 %r86 to i64
-%r89 = and i64 %r87, 1
-ret i64 %r89
-}
-define void @mcl_fp_shr1_5L(i64* noalias  %r1, i64* noalias  %r2)
-{
-%r3 = load i64, i64* %r2
-%r4 = zext i64 %r3 to i128
-%r6 = getelementptr i64, i64* %r2, i32 1
-%r7 = load i64, i64* %r6
-%r8 = zext i64 %r7 to i128
-%r9 = shl i128 %r8, 64
-%r10 = or i128 %r4, %r9
-%r11 = zext i128 %r10 to i192
-%r13 = getelementptr i64, i64* %r2, i32 2
-%r14 = load i64, i64* %r13
-%r15 = zext i64 %r14 to i192
-%r16 = shl i192 %r15, 128
-%r17 = or i192 %r11, %r16
-%r18 = zext i192 %r17 to i256
-%r20 = getelementptr i64, i64* %r2, i32 3
-%r21 = load i64, i64* %r20
-%r22 = zext i64 %r21 to i256
-%r23 = shl i256 %r22, 192
-%r24 = or i256 %r18, %r23
-%r25 = zext i256 %r24 to i320
-%r27 = getelementptr i64, i64* %r2, i32 4
-%r28 = load i64, i64* %r27
-%r29 = zext i64 %r28 to i320
-%r30 = shl i320 %r29, 256
-%r31 = or i320 %r25, %r30
-%r32 = lshr i320 %r31, 1
-%r33 = trunc i320 %r32 to i64
-%r35 = getelementptr i64, i64* %r1, i32 0
-store i64 %r33, i64* %r35
-%r36 = lshr i320 %r32, 64
-%r37 = trunc i320 %r36 to i64
-%r39 = getelementptr i64, i64* %r1, i32 1
-store i64 %r37, i64* %r39
-%r40 = lshr i320 %r36, 64
-%r41 = trunc i320 %r40 to i64
-%r43 = getelementptr i64, i64* %r1, i32 2
-store i64 %r41, i64* %r43
-%r44 = lshr i320 %r40, 64
-%r45 = trunc i320 %r44 to i64
-%r47 = getelementptr i64, i64* %r1, i32 3
-store i64 %r45, i64* %r47
-%r48 = lshr i320 %r44, 64
-%r49 = trunc i320 %r48 to i64
-%r51 = getelementptr i64, i64* %r1, i32 4
-store i64 %r49, i64* %r51
-ret void
-}
-define void @mcl_fp_add5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = load i64, i64* %r3
-%r35 = zext i64 %r34 to i128
-%r37 = getelementptr i64, i64* %r3, i32 1
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i128
-%r40 = shl i128 %r39, 64
-%r41 = or i128 %r35, %r40
-%r42 = zext i128 %r41 to i192
-%r44 = getelementptr i64, i64* %r3, i32 2
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i192
-%r47 = shl i192 %r46, 128
-%r48 = or i192 %r42, %r47
-%r49 = zext i192 %r48 to i256
-%r51 = getelementptr i64, i64* %r3, i32 3
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i256
-%r54 = shl i256 %r53, 192
-%r55 = or i256 %r49, %r54
-%r56 = zext i256 %r55 to i320
-%r58 = getelementptr i64, i64* %r3, i32 4
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i320
-%r61 = shl i320 %r60, 256
-%r62 = or i320 %r56, %r61
-%r63 = zext i320 %r33 to i384
-%r64 = zext i320 %r62 to i384
-%r65 = add i384 %r63, %r64
-%r66 = trunc i384 %r65 to i320
-%r67 = trunc i320 %r66 to i64
-%r69 = getelementptr i64, i64* %r1, i32 0
-store i64 %r67, i64* %r69
-%r70 = lshr i320 %r66, 64
-%r71 = trunc i320 %r70 to i64
-%r73 = getelementptr i64, i64* %r1, i32 1
-store i64 %r71, i64* %r73
-%r74 = lshr i320 %r70, 64
-%r75 = trunc i320 %r74 to i64
-%r77 = getelementptr i64, i64* %r1, i32 2
-store i64 %r75, i64* %r77
-%r78 = lshr i320 %r74, 64
-%r79 = trunc i320 %r78 to i64
-%r81 = getelementptr i64, i64* %r1, i32 3
-store i64 %r79, i64* %r81
-%r82 = lshr i320 %r78, 64
-%r83 = trunc i320 %r82 to i64
-%r85 = getelementptr i64, i64* %r1, i32 4
-store i64 %r83, i64* %r85
-%r86 = load i64, i64* %r4
-%r87 = zext i64 %r86 to i128
-%r89 = getelementptr i64, i64* %r4, i32 1
-%r90 = load i64, i64* %r89
-%r91 = zext i64 %r90 to i128
-%r92 = shl i128 %r91, 64
-%r93 = or i128 %r87, %r92
-%r94 = zext i128 %r93 to i192
-%r96 = getelementptr i64, i64* %r4, i32 2
-%r97 = load i64, i64* %r96
-%r98 = zext i64 %r97 to i192
-%r99 = shl i192 %r98, 128
-%r100 = or i192 %r94, %r99
-%r101 = zext i192 %r100 to i256
-%r103 = getelementptr i64, i64* %r4, i32 3
-%r104 = load i64, i64* %r103
-%r105 = zext i64 %r104 to i256
-%r106 = shl i256 %r105, 192
-%r107 = or i256 %r101, %r106
-%r108 = zext i256 %r107 to i320
-%r110 = getelementptr i64, i64* %r4, i32 4
-%r111 = load i64, i64* %r110
-%r112 = zext i64 %r111 to i320
-%r113 = shl i320 %r112, 256
-%r114 = or i320 %r108, %r113
-%r115 = zext i320 %r114 to i384
-%r116 = sub i384 %r65, %r115
-%r117 = lshr i384 %r116, 320
-%r118 = trunc i384 %r117 to i1
-br i1%r118, label %carry, label %nocarry
-nocarry:
-%r119 = trunc i384 %r116 to i320
-%r120 = trunc i320 %r119 to i64
-%r122 = getelementptr i64, i64* %r1, i32 0
-store i64 %r120, i64* %r122
-%r123 = lshr i320 %r119, 64
-%r124 = trunc i320 %r123 to i64
-%r126 = getelementptr i64, i64* %r1, i32 1
-store i64 %r124, i64* %r126
-%r127 = lshr i320 %r123, 64
-%r128 = trunc i320 %r127 to i64
-%r130 = getelementptr i64, i64* %r1, i32 2
-store i64 %r128, i64* %r130
-%r131 = lshr i320 %r127, 64
-%r132 = trunc i320 %r131 to i64
-%r134 = getelementptr i64, i64* %r1, i32 3
-store i64 %r132, i64* %r134
-%r135 = lshr i320 %r131, 64
-%r136 = trunc i320 %r135 to i64
-%r138 = getelementptr i64, i64* %r1, i32 4
-store i64 %r136, i64* %r138
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = load i64, i64* %r3
-%r35 = zext i64 %r34 to i128
-%r37 = getelementptr i64, i64* %r3, i32 1
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i128
-%r40 = shl i128 %r39, 64
-%r41 = or i128 %r35, %r40
-%r42 = zext i128 %r41 to i192
-%r44 = getelementptr i64, i64* %r3, i32 2
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i192
-%r47 = shl i192 %r46, 128
-%r48 = or i192 %r42, %r47
-%r49 = zext i192 %r48 to i256
-%r51 = getelementptr i64, i64* %r3, i32 3
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i256
-%r54 = shl i256 %r53, 192
-%r55 = or i256 %r49, %r54
-%r56 = zext i256 %r55 to i320
-%r58 = getelementptr i64, i64* %r3, i32 4
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i320
-%r61 = shl i320 %r60, 256
-%r62 = or i320 %r56, %r61
-%r63 = add i320 %r33, %r62
-%r64 = load i64, i64* %r4
-%r65 = zext i64 %r64 to i128
-%r67 = getelementptr i64, i64* %r4, i32 1
-%r68 = load i64, i64* %r67
-%r69 = zext i64 %r68 to i128
-%r70 = shl i128 %r69, 64
-%r71 = or i128 %r65, %r70
-%r72 = zext i128 %r71 to i192
-%r74 = getelementptr i64, i64* %r4, i32 2
-%r75 = load i64, i64* %r74
-%r76 = zext i64 %r75 to i192
-%r77 = shl i192 %r76, 128
-%r78 = or i192 %r72, %r77
-%r79 = zext i192 %r78 to i256
-%r81 = getelementptr i64, i64* %r4, i32 3
-%r82 = load i64, i64* %r81
-%r83 = zext i64 %r82 to i256
-%r84 = shl i256 %r83, 192
-%r85 = or i256 %r79, %r84
-%r86 = zext i256 %r85 to i320
-%r88 = getelementptr i64, i64* %r4, i32 4
-%r89 = load i64, i64* %r88
-%r90 = zext i64 %r89 to i320
-%r91 = shl i320 %r90, 256
-%r92 = or i320 %r86, %r91
-%r93 = sub i320 %r63, %r92
-%r94 = lshr i320 %r93, 319
-%r95 = trunc i320 %r94 to i1
-%r96 = select i1 %r95, i320 %r63, i320 %r93
-%r97 = trunc i320 %r96 to i64
-%r99 = getelementptr i64, i64* %r1, i32 0
-store i64 %r97, i64* %r99
-%r100 = lshr i320 %r96, 64
-%r101 = trunc i320 %r100 to i64
-%r103 = getelementptr i64, i64* %r1, i32 1
-store i64 %r101, i64* %r103
-%r104 = lshr i320 %r100, 64
-%r105 = trunc i320 %r104 to i64
-%r107 = getelementptr i64, i64* %r1, i32 2
-store i64 %r105, i64* %r107
-%r108 = lshr i320 %r104, 64
-%r109 = trunc i320 %r108 to i64
-%r111 = getelementptr i64, i64* %r1, i32 3
-store i64 %r109, i64* %r111
-%r112 = lshr i320 %r108, 64
-%r113 = trunc i320 %r112 to i64
-%r115 = getelementptr i64, i64* %r1, i32 4
-store i64 %r113, i64* %r115
-ret void
-}
-define void @mcl_fp_sub5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = load i64, i64* %r3
-%r35 = zext i64 %r34 to i128
-%r37 = getelementptr i64, i64* %r3, i32 1
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i128
-%r40 = shl i128 %r39, 64
-%r41 = or i128 %r35, %r40
-%r42 = zext i128 %r41 to i192
-%r44 = getelementptr i64, i64* %r3, i32 2
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i192
-%r47 = shl i192 %r46, 128
-%r48 = or i192 %r42, %r47
-%r49 = zext i192 %r48 to i256
-%r51 = getelementptr i64, i64* %r3, i32 3
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i256
-%r54 = shl i256 %r53, 192
-%r55 = or i256 %r49, %r54
-%r56 = zext i256 %r55 to i320
-%r58 = getelementptr i64, i64* %r3, i32 4
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i320
-%r61 = shl i320 %r60, 256
-%r62 = or i320 %r56, %r61
-%r63 = zext i320 %r33 to i384
-%r64 = zext i320 %r62 to i384
-%r65 = sub i384 %r63, %r64
-%r66 = trunc i384 %r65 to i320
-%r67 = lshr i384 %r65, 320
-%r68 = trunc i384 %r67 to i1
-%r69 = trunc i320 %r66 to i64
-%r71 = getelementptr i64, i64* %r1, i32 0
-store i64 %r69, i64* %r71
-%r72 = lshr i320 %r66, 64
-%r73 = trunc i320 %r72 to i64
-%r75 = getelementptr i64, i64* %r1, i32 1
-store i64 %r73, i64* %r75
-%r76 = lshr i320 %r72, 64
-%r77 = trunc i320 %r76 to i64
-%r79 = getelementptr i64, i64* %r1, i32 2
-store i64 %r77, i64* %r79
-%r80 = lshr i320 %r76, 64
-%r81 = trunc i320 %r80 to i64
-%r83 = getelementptr i64, i64* %r1, i32 3
-store i64 %r81, i64* %r83
-%r84 = lshr i320 %r80, 64
-%r85 = trunc i320 %r84 to i64
-%r87 = getelementptr i64, i64* %r1, i32 4
-store i64 %r85, i64* %r87
-br i1%r68, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r88 = load i64, i64* %r4
-%r89 = zext i64 %r88 to i128
-%r91 = getelementptr i64, i64* %r4, i32 1
-%r92 = load i64, i64* %r91
-%r93 = zext i64 %r92 to i128
-%r94 = shl i128 %r93, 64
-%r95 = or i128 %r89, %r94
-%r96 = zext i128 %r95 to i192
-%r98 = getelementptr i64, i64* %r4, i32 2
-%r99 = load i64, i64* %r98
-%r100 = zext i64 %r99 to i192
-%r101 = shl i192 %r100, 128
-%r102 = or i192 %r96, %r101
-%r103 = zext i192 %r102 to i256
-%r105 = getelementptr i64, i64* %r4, i32 3
-%r106 = load i64, i64* %r105
-%r107 = zext i64 %r106 to i256
-%r108 = shl i256 %r107, 192
-%r109 = or i256 %r103, %r108
-%r110 = zext i256 %r109 to i320
-%r112 = getelementptr i64, i64* %r4, i32 4
-%r113 = load i64, i64* %r112
-%r114 = zext i64 %r113 to i320
-%r115 = shl i320 %r114, 256
-%r116 = or i320 %r110, %r115
-%r117 = add i320 %r66, %r116
-%r118 = trunc i320 %r117 to i64
-%r120 = getelementptr i64, i64* %r1, i32 0
-store i64 %r118, i64* %r120
-%r121 = lshr i320 %r117, 64
-%r122 = trunc i320 %r121 to i64
-%r124 = getelementptr i64, i64* %r1, i32 1
-store i64 %r122, i64* %r124
-%r125 = lshr i320 %r121, 64
-%r126 = trunc i320 %r125 to i64
-%r128 = getelementptr i64, i64* %r1, i32 2
-store i64 %r126, i64* %r128
-%r129 = lshr i320 %r125, 64
-%r130 = trunc i320 %r129 to i64
-%r132 = getelementptr i64, i64* %r1, i32 3
-store i64 %r130, i64* %r132
-%r133 = lshr i320 %r129, 64
-%r134 = trunc i320 %r133 to i64
-%r136 = getelementptr i64, i64* %r1, i32 4
-store i64 %r134, i64* %r136
-ret void
-}
-define void @mcl_fp_subNF5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = load i64, i64* %r3
-%r35 = zext i64 %r34 to i128
-%r37 = getelementptr i64, i64* %r3, i32 1
-%r38 = load i64, i64* %r37
-%r39 = zext i64 %r38 to i128
-%r40 = shl i128 %r39, 64
-%r41 = or i128 %r35, %r40
-%r42 = zext i128 %r41 to i192
-%r44 = getelementptr i64, i64* %r3, i32 2
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i192
-%r47 = shl i192 %r46, 128
-%r48 = or i192 %r42, %r47
-%r49 = zext i192 %r48 to i256
-%r51 = getelementptr i64, i64* %r3, i32 3
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i256
-%r54 = shl i256 %r53, 192
-%r55 = or i256 %r49, %r54
-%r56 = zext i256 %r55 to i320
-%r58 = getelementptr i64, i64* %r3, i32 4
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i320
-%r61 = shl i320 %r60, 256
-%r62 = or i320 %r56, %r61
-%r63 = sub i320 %r33, %r62
-%r64 = lshr i320 %r63, 319
-%r65 = trunc i320 %r64 to i1
-%r66 = load i64, i64* %r4
-%r67 = zext i64 %r66 to i128
-%r69 = getelementptr i64, i64* %r4, i32 1
-%r70 = load i64, i64* %r69
-%r71 = zext i64 %r70 to i128
-%r72 = shl i128 %r71, 64
-%r73 = or i128 %r67, %r72
-%r74 = zext i128 %r73 to i192
-%r76 = getelementptr i64, i64* %r4, i32 2
-%r77 = load i64, i64* %r76
-%r78 = zext i64 %r77 to i192
-%r79 = shl i192 %r78, 128
-%r80 = or i192 %r74, %r79
-%r81 = zext i192 %r80 to i256
-%r83 = getelementptr i64, i64* %r4, i32 3
-%r84 = load i64, i64* %r83
-%r85 = zext i64 %r84 to i256
-%r86 = shl i256 %r85, 192
-%r87 = or i256 %r81, %r86
-%r88 = zext i256 %r87 to i320
-%r90 = getelementptr i64, i64* %r4, i32 4
-%r91 = load i64, i64* %r90
-%r92 = zext i64 %r91 to i320
-%r93 = shl i320 %r92, 256
-%r94 = or i320 %r88, %r93
-%r96 = select i1 %r65, i320 %r94, i320 0
-%r97 = add i320 %r63, %r96
-%r98 = trunc i320 %r97 to i64
-%r100 = getelementptr i64, i64* %r1, i32 0
-store i64 %r98, i64* %r100
-%r101 = lshr i320 %r97, 64
-%r102 = trunc i320 %r101 to i64
-%r104 = getelementptr i64, i64* %r1, i32 1
-store i64 %r102, i64* %r104
-%r105 = lshr i320 %r101, 64
-%r106 = trunc i320 %r105 to i64
-%r108 = getelementptr i64, i64* %r1, i32 2
-store i64 %r106, i64* %r108
-%r109 = lshr i320 %r105, 64
-%r110 = trunc i320 %r109 to i64
-%r112 = getelementptr i64, i64* %r1, i32 3
-store i64 %r110, i64* %r112
-%r113 = lshr i320 %r109, 64
-%r114 = trunc i320 %r113 to i64
-%r116 = getelementptr i64, i64* %r1, i32 4
-store i64 %r114, i64* %r116
-ret void
-}
-define void @mcl_fpDbl_add5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r2, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = zext i576 %r61 to i640
-%r64 = getelementptr i64, i64* %r2, i32 9
-%r65 = load i64, i64* %r64
-%r66 = zext i64 %r65 to i640
-%r67 = shl i640 %r66, 576
-%r68 = or i640 %r62, %r67
-%r69 = load i64, i64* %r3
-%r70 = zext i64 %r69 to i128
-%r72 = getelementptr i64, i64* %r3, i32 1
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i128
-%r75 = shl i128 %r74, 64
-%r76 = or i128 %r70, %r75
-%r77 = zext i128 %r76 to i192
-%r79 = getelementptr i64, i64* %r3, i32 2
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i192
-%r82 = shl i192 %r81, 128
-%r83 = or i192 %r77, %r82
-%r84 = zext i192 %r83 to i256
-%r86 = getelementptr i64, i64* %r3, i32 3
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i256
-%r89 = shl i256 %r88, 192
-%r90 = or i256 %r84, %r89
-%r91 = zext i256 %r90 to i320
-%r93 = getelementptr i64, i64* %r3, i32 4
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i320
-%r96 = shl i320 %r95, 256
-%r97 = or i320 %r91, %r96
-%r98 = zext i320 %r97 to i384
-%r100 = getelementptr i64, i64* %r3, i32 5
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i384
-%r103 = shl i384 %r102, 320
-%r104 = or i384 %r98, %r103
-%r105 = zext i384 %r104 to i448
-%r107 = getelementptr i64, i64* %r3, i32 6
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i448
-%r110 = shl i448 %r109, 384
-%r111 = or i448 %r105, %r110
-%r112 = zext i448 %r111 to i512
-%r114 = getelementptr i64, i64* %r3, i32 7
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i512
-%r117 = shl i512 %r116, 448
-%r118 = or i512 %r112, %r117
-%r119 = zext i512 %r118 to i576
-%r121 = getelementptr i64, i64* %r3, i32 8
-%r122 = load i64, i64* %r121
-%r123 = zext i64 %r122 to i576
-%r124 = shl i576 %r123, 512
-%r125 = or i576 %r119, %r124
-%r126 = zext i576 %r125 to i640
-%r128 = getelementptr i64, i64* %r3, i32 9
-%r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i640
-%r131 = shl i640 %r130, 576
-%r132 = or i640 %r126, %r131
-%r133 = zext i640 %r68 to i704
-%r134 = zext i640 %r132 to i704
-%r135 = add i704 %r133, %r134
-%r136 = trunc i704 %r135 to i320
-%r137 = trunc i320 %r136 to i64
-%r139 = getelementptr i64, i64* %r1, i32 0
-store i64 %r137, i64* %r139
-%r140 = lshr i320 %r136, 64
-%r141 = trunc i320 %r140 to i64
-%r143 = getelementptr i64, i64* %r1, i32 1
-store i64 %r141, i64* %r143
-%r144 = lshr i320 %r140, 64
-%r145 = trunc i320 %r144 to i64
-%r147 = getelementptr i64, i64* %r1, i32 2
-store i64 %r145, i64* %r147
-%r148 = lshr i320 %r144, 64
-%r149 = trunc i320 %r148 to i64
-%r151 = getelementptr i64, i64* %r1, i32 3
-store i64 %r149, i64* %r151
-%r152 = lshr i320 %r148, 64
-%r153 = trunc i320 %r152 to i64
-%r155 = getelementptr i64, i64* %r1, i32 4
-store i64 %r153, i64* %r155
-%r156 = lshr i704 %r135, 320
-%r157 = trunc i704 %r156 to i384
-%r158 = load i64, i64* %r4
-%r159 = zext i64 %r158 to i128
-%r161 = getelementptr i64, i64* %r4, i32 1
-%r162 = load i64, i64* %r161
-%r163 = zext i64 %r162 to i128
-%r164 = shl i128 %r163, 64
-%r165 = or i128 %r159, %r164
-%r166 = zext i128 %r165 to i192
-%r168 = getelementptr i64, i64* %r4, i32 2
-%r169 = load i64, i64* %r168
-%r170 = zext i64 %r169 to i192
-%r171 = shl i192 %r170, 128
-%r172 = or i192 %r166, %r171
-%r173 = zext i192 %r172 to i256
-%r175 = getelementptr i64, i64* %r4, i32 3
-%r176 = load i64, i64* %r175
-%r177 = zext i64 %r176 to i256
-%r178 = shl i256 %r177, 192
-%r179 = or i256 %r173, %r178
-%r180 = zext i256 %r179 to i320
-%r182 = getelementptr i64, i64* %r4, i32 4
-%r183 = load i64, i64* %r182
-%r184 = zext i64 %r183 to i320
-%r185 = shl i320 %r184, 256
-%r186 = or i320 %r180, %r185
-%r187 = zext i320 %r186 to i384
-%r188 = sub i384 %r157, %r187
-%r189 = lshr i384 %r188, 320
-%r190 = trunc i384 %r189 to i1
-%r191 = select i1 %r190, i384 %r157, i384 %r188
-%r192 = trunc i384 %r191 to i320
-%r194 = getelementptr i64, i64* %r1, i32 5
-%r195 = trunc i320 %r192 to i64
-%r197 = getelementptr i64, i64* %r194, i32 0
-store i64 %r195, i64* %r197
-%r198 = lshr i320 %r192, 64
-%r199 = trunc i320 %r198 to i64
-%r201 = getelementptr i64, i64* %r194, i32 1
-store i64 %r199, i64* %r201
-%r202 = lshr i320 %r198, 64
-%r203 = trunc i320 %r202 to i64
-%r205 = getelementptr i64, i64* %r194, i32 2
-store i64 %r203, i64* %r205
-%r206 = lshr i320 %r202, 64
-%r207 = trunc i320 %r206 to i64
-%r209 = getelementptr i64, i64* %r194, i32 3
-store i64 %r207, i64* %r209
-%r210 = lshr i320 %r206, 64
-%r211 = trunc i320 %r210 to i64
-%r213 = getelementptr i64, i64* %r194, i32 4
-store i64 %r211, i64* %r213
-ret void
-}
-define void @mcl_fpDbl_sub5L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r2, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = zext i576 %r61 to i640
-%r64 = getelementptr i64, i64* %r2, i32 9
-%r65 = load i64, i64* %r64
-%r66 = zext i64 %r65 to i640
-%r67 = shl i640 %r66, 576
-%r68 = or i640 %r62, %r67
-%r69 = load i64, i64* %r3
-%r70 = zext i64 %r69 to i128
-%r72 = getelementptr i64, i64* %r3, i32 1
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i128
-%r75 = shl i128 %r74, 64
-%r76 = or i128 %r70, %r75
-%r77 = zext i128 %r76 to i192
-%r79 = getelementptr i64, i64* %r3, i32 2
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i192
-%r82 = shl i192 %r81, 128
-%r83 = or i192 %r77, %r82
-%r84 = zext i192 %r83 to i256
-%r86 = getelementptr i64, i64* %r3, i32 3
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i256
-%r89 = shl i256 %r88, 192
-%r90 = or i256 %r84, %r89
-%r91 = zext i256 %r90 to i320
-%r93 = getelementptr i64, i64* %r3, i32 4
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i320
-%r96 = shl i320 %r95, 256
-%r97 = or i320 %r91, %r96
-%r98 = zext i320 %r97 to i384
-%r100 = getelementptr i64, i64* %r3, i32 5
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i384
-%r103 = shl i384 %r102, 320
-%r104 = or i384 %r98, %r103
-%r105 = zext i384 %r104 to i448
-%r107 = getelementptr i64, i64* %r3, i32 6
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i448
-%r110 = shl i448 %r109, 384
-%r111 = or i448 %r105, %r110
-%r112 = zext i448 %r111 to i512
-%r114 = getelementptr i64, i64* %r3, i32 7
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i512
-%r117 = shl i512 %r116, 448
-%r118 = or i512 %r112, %r117
-%r119 = zext i512 %r118 to i576
-%r121 = getelementptr i64, i64* %r3, i32 8
-%r122 = load i64, i64* %r121
-%r123 = zext i64 %r122 to i576
-%r124 = shl i576 %r123, 512
-%r125 = or i576 %r119, %r124
-%r126 = zext i576 %r125 to i640
-%r128 = getelementptr i64, i64* %r3, i32 9
-%r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i640
-%r131 = shl i640 %r130, 576
-%r132 = or i640 %r126, %r131
-%r133 = zext i640 %r68 to i704
-%r134 = zext i640 %r132 to i704
-%r135 = sub i704 %r133, %r134
-%r136 = trunc i704 %r135 to i320
-%r137 = trunc i320 %r136 to i64
-%r139 = getelementptr i64, i64* %r1, i32 0
-store i64 %r137, i64* %r139
-%r140 = lshr i320 %r136, 64
-%r141 = trunc i320 %r140 to i64
-%r143 = getelementptr i64, i64* %r1, i32 1
-store i64 %r141, i64* %r143
-%r144 = lshr i320 %r140, 64
-%r145 = trunc i320 %r144 to i64
-%r147 = getelementptr i64, i64* %r1, i32 2
-store i64 %r145, i64* %r147
-%r148 = lshr i320 %r144, 64
-%r149 = trunc i320 %r148 to i64
-%r151 = getelementptr i64, i64* %r1, i32 3
-store i64 %r149, i64* %r151
-%r152 = lshr i320 %r148, 64
-%r153 = trunc i320 %r152 to i64
-%r155 = getelementptr i64, i64* %r1, i32 4
-store i64 %r153, i64* %r155
-%r156 = lshr i704 %r135, 320
-%r157 = trunc i704 %r156 to i320
-%r158 = lshr i704 %r135, 640
-%r159 = trunc i704 %r158 to i1
-%r160 = load i64, i64* %r4
-%r161 = zext i64 %r160 to i128
-%r163 = getelementptr i64, i64* %r4, i32 1
-%r164 = load i64, i64* %r163
-%r165 = zext i64 %r164 to i128
-%r166 = shl i128 %r165, 64
-%r167 = or i128 %r161, %r166
-%r168 = zext i128 %r167 to i192
-%r170 = getelementptr i64, i64* %r4, i32 2
-%r171 = load i64, i64* %r170
-%r172 = zext i64 %r171 to i192
-%r173 = shl i192 %r172, 128
-%r174 = or i192 %r168, %r173
-%r175 = zext i192 %r174 to i256
-%r177 = getelementptr i64, i64* %r4, i32 3
-%r178 = load i64, i64* %r177
-%r179 = zext i64 %r178 to i256
-%r180 = shl i256 %r179, 192
-%r181 = or i256 %r175, %r180
-%r182 = zext i256 %r181 to i320
-%r184 = getelementptr i64, i64* %r4, i32 4
-%r185 = load i64, i64* %r184
-%r186 = zext i64 %r185 to i320
-%r187 = shl i320 %r186, 256
-%r188 = or i320 %r182, %r187
-%r190 = select i1 %r159, i320 %r188, i320 0
-%r191 = add i320 %r157, %r190
-%r193 = getelementptr i64, i64* %r1, i32 5
-%r194 = trunc i320 %r191 to i64
-%r196 = getelementptr i64, i64* %r193, i32 0
-store i64 %r194, i64* %r196
-%r197 = lshr i320 %r191, 64
-%r198 = trunc i320 %r197 to i64
-%r200 = getelementptr i64, i64* %r193, i32 1
-store i64 %r198, i64* %r200
-%r201 = lshr i320 %r197, 64
-%r202 = trunc i320 %r201 to i64
-%r204 = getelementptr i64, i64* %r193, i32 2
-store i64 %r202, i64* %r204
-%r205 = lshr i320 %r201, 64
-%r206 = trunc i320 %r205 to i64
-%r208 = getelementptr i64, i64* %r193, i32 3
-store i64 %r206, i64* %r208
-%r209 = lshr i320 %r205, 64
-%r210 = trunc i320 %r209 to i64
-%r212 = getelementptr i64, i64* %r193, i32 4
-store i64 %r210, i64* %r212
-ret void
-}
-define i448 @mulPv384x64(i64* noalias  %r2, i64 %r3)
-{
-%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
-%r6 = trunc i128 %r5 to i64
-%r7 = call i64 @extractHigh64(i128 %r5)
-%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
-%r10 = trunc i128 %r9 to i64
-%r11 = call i64 @extractHigh64(i128 %r9)
-%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
-%r14 = trunc i128 %r13 to i64
-%r15 = call i64 @extractHigh64(i128 %r13)
-%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
-%r18 = trunc i128 %r17 to i64
-%r19 = call i64 @extractHigh64(i128 %r17)
-%r21 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 4)
-%r22 = trunc i128 %r21 to i64
-%r23 = call i64 @extractHigh64(i128 %r21)
-%r25 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 5)
-%r26 = trunc i128 %r25 to i64
-%r27 = call i64 @extractHigh64(i128 %r25)
-%r28 = zext i64 %r6 to i128
-%r29 = zext i64 %r10 to i128
-%r30 = shl i128 %r29, 64
-%r31 = or i128 %r28, %r30
-%r32 = zext i128 %r31 to i192
-%r33 = zext i64 %r14 to i192
-%r34 = shl i192 %r33, 128
-%r35 = or i192 %r32, %r34
-%r36 = zext i192 %r35 to i256
-%r37 = zext i64 %r18 to i256
-%r38 = shl i256 %r37, 192
-%r39 = or i256 %r36, %r38
-%r40 = zext i256 %r39 to i320
-%r41 = zext i64 %r22 to i320
-%r42 = shl i320 %r41, 256
-%r43 = or i320 %r40, %r42
-%r44 = zext i320 %r43 to i384
-%r45 = zext i64 %r26 to i384
-%r46 = shl i384 %r45, 320
-%r47 = or i384 %r44, %r46
-%r48 = zext i64 %r7 to i128
-%r49 = zext i64 %r11 to i128
-%r50 = shl i128 %r49, 64
-%r51 = or i128 %r48, %r50
-%r52 = zext i128 %r51 to i192
-%r53 = zext i64 %r15 to i192
-%r54 = shl i192 %r53, 128
-%r55 = or i192 %r52, %r54
-%r56 = zext i192 %r55 to i256
-%r57 = zext i64 %r19 to i256
-%r58 = shl i256 %r57, 192
-%r59 = or i256 %r56, %r58
-%r60 = zext i256 %r59 to i320
-%r61 = zext i64 %r23 to i320
-%r62 = shl i320 %r61, 256
-%r63 = or i320 %r60, %r62
-%r64 = zext i320 %r63 to i384
-%r65 = zext i64 %r27 to i384
-%r66 = shl i384 %r65, 320
-%r67 = or i384 %r64, %r66
-%r68 = zext i384 %r47 to i448
-%r69 = zext i384 %r67 to i448
-%r70 = shl i448 %r69, 64
-%r71 = add i448 %r68, %r70
-ret i448 %r71
-}
-define void @mcl_fp_mulUnitPre6L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
-{
-%r4 = call i448 @mulPv384x64(i64* %r2, i64 %r3)
-%r5 = trunc i448 %r4 to i64
-%r7 = getelementptr i64, i64* %r1, i32 0
-store i64 %r5, i64* %r7
-%r8 = lshr i448 %r4, 64
-%r9 = trunc i448 %r8 to i64
-%r11 = getelementptr i64, i64* %r1, i32 1
-store i64 %r9, i64* %r11
-%r12 = lshr i448 %r8, 64
-%r13 = trunc i448 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 2
-store i64 %r13, i64* %r15
-%r16 = lshr i448 %r12, 64
-%r17 = trunc i448 %r16 to i64
-%r19 = getelementptr i64, i64* %r1, i32 3
-store i64 %r17, i64* %r19
-%r20 = lshr i448 %r16, 64
-%r21 = trunc i448 %r20 to i64
-%r23 = getelementptr i64, i64* %r1, i32 4
-store i64 %r21, i64* %r23
-%r24 = lshr i448 %r20, 64
-%r25 = trunc i448 %r24 to i64
-%r27 = getelementptr i64, i64* %r1, i32 5
-store i64 %r25, i64* %r27
-%r28 = lshr i448 %r24, 64
-%r29 = trunc i448 %r28 to i64
-%r31 = getelementptr i64, i64* %r1, i32 6
-store i64 %r29, i64* %r31
-ret void
-}
-define void @mcl_fpDbl_mulPre6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
-{
-%r4 = load i64, i64* %r3
-%r5 = call i448 @mulPv384x64(i64* %r2, i64 %r4)
-%r6 = trunc i448 %r5 to i64
-store i64 %r6, i64* %r1
-%r7 = lshr i448 %r5, 64
-%r9 = getelementptr i64, i64* %r3, i32 1
-%r10 = load i64, i64* %r9
-%r11 = call i448 @mulPv384x64(i64* %r2, i64 %r10)
-%r12 = add i448 %r7, %r11
-%r13 = trunc i448 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 1
-store i64 %r13, i64* %r15
-%r16 = lshr i448 %r12, 64
-%r18 = getelementptr i64, i64* %r3, i32 2
-%r19 = load i64, i64* %r18
-%r20 = call i448 @mulPv384x64(i64* %r2, i64 %r19)
-%r21 = add i448 %r16, %r20
-%r22 = trunc i448 %r21 to i64
-%r24 = getelementptr i64, i64* %r1, i32 2
-store i64 %r22, i64* %r24
-%r25 = lshr i448 %r21, 64
-%r27 = getelementptr i64, i64* %r3, i32 3
-%r28 = load i64, i64* %r27
-%r29 = call i448 @mulPv384x64(i64* %r2, i64 %r28)
-%r30 = add i448 %r25, %r29
-%r31 = trunc i448 %r30 to i64
-%r33 = getelementptr i64, i64* %r1, i32 3
-store i64 %r31, i64* %r33
-%r34 = lshr i448 %r30, 64
-%r36 = getelementptr i64, i64* %r3, i32 4
-%r37 = load i64, i64* %r36
-%r38 = call i448 @mulPv384x64(i64* %r2, i64 %r37)
-%r39 = add i448 %r34, %r38
-%r40 = trunc i448 %r39 to i64
-%r42 = getelementptr i64, i64* %r1, i32 4
-store i64 %r40, i64* %r42
-%r43 = lshr i448 %r39, 64
-%r45 = getelementptr i64, i64* %r3, i32 5
-%r46 = load i64, i64* %r45
-%r47 = call i448 @mulPv384x64(i64* %r2, i64 %r46)
-%r48 = add i448 %r43, %r47
-%r50 = getelementptr i64, i64* %r1, i32 5
-%r51 = trunc i448 %r48 to i64
-%r53 = getelementptr i64, i64* %r50, i32 0
-store i64 %r51, i64* %r53
-%r54 = lshr i448 %r48, 64
-%r55 = trunc i448 %r54 to i64
-%r57 = getelementptr i64, i64* %r50, i32 1
-store i64 %r55, i64* %r57
-%r58 = lshr i448 %r54, 64
-%r59 = trunc i448 %r58 to i64
-%r61 = getelementptr i64, i64* %r50, i32 2
-store i64 %r59, i64* %r61
-%r62 = lshr i448 %r58, 64
-%r63 = trunc i448 %r62 to i64
-%r65 = getelementptr i64, i64* %r50, i32 3
-store i64 %r63, i64* %r65
-%r66 = lshr i448 %r62, 64
-%r67 = trunc i448 %r66 to i64
-%r69 = getelementptr i64, i64* %r50, i32 4
-store i64 %r67, i64* %r69
-%r70 = lshr i448 %r66, 64
-%r71 = trunc i448 %r70 to i64
-%r73 = getelementptr i64, i64* %r50, i32 5
-store i64 %r71, i64* %r73
-%r74 = lshr i448 %r70, 64
-%r75 = trunc i448 %r74 to i64
-%r77 = getelementptr i64, i64* %r50, i32 6
-store i64 %r75, i64* %r77
-ret void
-}
-define void @mcl_fpDbl_sqrPre6L(i64* noalias  %r1, i64* noalias  %r2)
-{
-%r3 = load i64, i64* %r2
-%r4 = call i448 @mulPv384x64(i64* %r2, i64 %r3)
-%r5 = trunc i448 %r4 to i64
-store i64 %r5, i64* %r1
-%r6 = lshr i448 %r4, 64
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = call i448 @mulPv384x64(i64* %r2, i64 %r9)
-%r11 = add i448 %r6, %r10
-%r12 = trunc i448 %r11 to i64
-%r14 = getelementptr i64, i64* %r1, i32 1
-store i64 %r12, i64* %r14
-%r15 = lshr i448 %r11, 64
-%r17 = getelementptr i64, i64* %r2, i32 2
-%r18 = load i64, i64* %r17
-%r19 = call i448 @mulPv384x64(i64* %r2, i64 %r18)
-%r20 = add i448 %r15, %r19
-%r21 = trunc i448 %r20 to i64
-%r23 = getelementptr i64, i64* %r1, i32 2
-store i64 %r21, i64* %r23
-%r24 = lshr i448 %r20, 64
-%r26 = getelementptr i64, i64* %r2, i32 3
-%r27 = load i64, i64* %r26
-%r28 = call i448 @mulPv384x64(i64* %r2, i64 %r27)
-%r29 = add i448 %r24, %r28
-%r30 = trunc i448 %r29 to i64
-%r32 = getelementptr i64, i64* %r1, i32 3
-store i64 %r30, i64* %r32
-%r33 = lshr i448 %r29, 64
-%r35 = getelementptr i64, i64* %r2, i32 4
-%r36 = load i64, i64* %r35
-%r37 = call i448 @mulPv384x64(i64* %r2, i64 %r36)
-%r38 = add i448 %r33, %r37
-%r39 = trunc i448 %r38 to i64
-%r41 = getelementptr i64, i64* %r1, i32 4
-store i64 %r39, i64* %r41
-%r42 = lshr i448 %r38, 64
-%r44 = getelementptr i64, i64* %r2, i32 5
-%r45 = load i64, i64* %r44
-%r46 = call i448 @mulPv384x64(i64* %r2, i64 %r45)
-%r47 = add i448 %r42, %r46
-%r49 = getelementptr i64, i64* %r1, i32 5
-%r50 = trunc i448 %r47 to i64
-%r52 = getelementptr i64, i64* %r49, i32 0
-store i64 %r50, i64* %r52
-%r53 = lshr i448 %r47, 64
-%r54 = trunc i448 %r53 to i64
-%r56 = getelementptr i64, i64* %r49, i32 1
-store i64 %r54, i64* %r56
-%r57 = lshr i448 %r53, 64
-%r58 = trunc i448 %r57 to i64
-%r60 = getelementptr i64, i64* %r49, i32 2
-store i64 %r58, i64* %r60
-%r61 = lshr i448 %r57, 64
-%r62 = trunc i448 %r61 to i64
-%r64 = getelementptr i64, i64* %r49, i32 3
-store i64 %r62, i64* %r64
-%r65 = lshr i448 %r61, 64
-%r66 = trunc i448 %r65 to i64
-%r68 = getelementptr i64, i64* %r49, i32 4
-store i64 %r66, i64* %r68
-%r69 = lshr i448 %r65, 64
-%r70 = trunc i448 %r69 to i64
-%r72 = getelementptr i64, i64* %r49, i32 5
-store i64 %r70, i64* %r72
-%r73 = lshr i448 %r69, 64
-%r74 = trunc i448 %r73 to i64
-%r76 = getelementptr i64, i64* %r49, i32 6
-store i64 %r74, i64* %r76
-ret void
-}
-define void @mcl_fp_mont6L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
-{
-%r6 = getelementptr i64, i64* %r4, i32 -1
-%r7 = load i64, i64* %r6
-%r9 = getelementptr i64, i64* %r3, i32 0
-%r10 = load i64, i64* %r9
-%r11 = call i448 @mulPv384x64(i64* %r2, i64 %r10)
-%r12 = zext i448 %r11 to i512
-%r13 = trunc i448 %r11 to i64
-%r14 = mul i64 %r13, %r7
-%r15 = call i448 @mulPv384x64(i64* %r4, i64 %r14)
-%r16 = zext i448 %r15 to i512
-%r17 = add i512 %r12, %r16
-%r18 = lshr i512 %r17, 64
-%r20 = getelementptr i64, i64* %r3, i32 1
-%r21 = load i64, i64* %r20
-%r22 = call i448 @mulPv384x64(i64* %r2, i64 %r21)
-%r23 = zext i448 %r22 to i512
-%r24 = add i512 %r18, %r23
-%r25 = trunc i512 %r24 to i64
-%r26 = mul i64 %r25, %r7
-%r27 = call i448 @mulPv384x64(i64* %r4, i64 %r26)
-%r28 = zext i448 %r27 to i512
-%r29 = add i512 %r24, %r28
-%r30 = lshr i512 %r29, 64
-%r32 = getelementptr i64, i64* %r3, i32 2
-%r33 = load i64, i64* %r32
-%r34 = call i448 @mulPv384x64(i64* %r2, i64 %r33)
-%r35 = zext i448 %r34 to i512
-%r36 = add i512 %r30, %r35
-%r37 = trunc i512 %r36 to i64
-%r38 = mul i64 %r37, %r7
-%r39 = call i448 @mulPv384x64(i64* %r4, i64 %r38)
-%r40 = zext i448 %r39 to i512
-%r41 = add i512 %r36, %r40
-%r42 = lshr i512 %r41, 64
-%r44 = getelementptr i64, i64* %r3, i32 3
-%r45 = load i64, i64* %r44
-%r46 = call i448 @mulPv384x64(i64* %r2, i64 %r45)
-%r47 = zext i448 %r46 to i512
-%r48 = add i512 %r42, %r47
-%r49 = trunc i512 %r48 to i64
-%r50 = mul i64 %r49, %r7
-%r51 = call i448 @mulPv384x64(i64* %r4, i64 %r50)
-%r52 = zext i448 %r51 to i512
-%r53 = add i512 %r48, %r52
-%r54 = lshr i512 %r53, 64
-%r56 = getelementptr i64, i64* %r3, i32 4
-%r57 = load i64, i64* %r56
-%r58 = call i448 @mulPv384x64(i64* %r2, i64 %r57)
-%r59 = zext i448 %r58 to i512
-%r60 = add i512 %r54, %r59
-%r61 = trunc i512 %r60 to i64
-%r62 = mul i64 %r61, %r7
-%r63 = call i448 @mulPv384x64(i64* %r4, i64 %r62)
-%r64 = zext i448 %r63 to i512
-%r65 = add i512 %r60, %r64
-%r66 = lshr i512 %r65, 64
-%r68 = getelementptr i64, i64* %r3, i32 5
-%r69 = load i64, i64* %r68
-%r70 = call i448 @mulPv384x64(i64* %r2, i64 %r69)
-%r71 = zext i448 %r70 to i512
-%r72 = add i512 %r66, %r71
-%r73 = trunc i512 %r72 to i64
-%r74 = mul i64 %r73, %r7
-%r75 = call i448 @mulPv384x64(i64* %r4, i64 %r74)
-%r76 = zext i448 %r75 to i512
-%r77 = add i512 %r72, %r76
-%r78 = lshr i512 %r77, 64
-%r79 = trunc i512 %r78 to i448
-%r80 = load i64, i64* %r4
-%r81 = zext i64 %r80 to i128
-%r83 = getelementptr i64, i64* %r4, i32 1
-%r84 = load i64, i64* %r83
-%r85 = zext i64 %r84 to i128
-%r86 = shl i128 %r85, 64
-%r87 = or i128 %r81, %r86
-%r88 = zext i128 %r87 to i192
-%r90 = getelementptr i64, i64* %r4, i32 2
-%r91 = load i64, i64* %r90
-%r92 = zext i64 %r91 to i192
-%r93 = shl i192 %r92, 128
-%r94 = or i192 %r88, %r93
-%r95 = zext i192 %r94 to i256
-%r97 = getelementptr i64, i64* %r4, i32 3
-%r98 = load i64, i64* %r97
-%r99 = zext i64 %r98 to i256
-%r100 = shl i256 %r99, 192
-%r101 = or i256 %r95, %r100
-%r102 = zext i256 %r101 to i320
-%r104 = getelementptr i64, i64* %r4, i32 4
-%r105 = load i64, i64* %r104
-%r106 = zext i64 %r105 to i320
-%r107 = shl i320 %r106, 256
-%r108 = or i320 %r102, %r107
-%r109 = zext i320 %r108 to i384
-%r111 = getelementptr i64, i64* %r4, i32 5
-%r112 = load i64, i64* %r111
-%r113 = zext i64 %r112 to i384
-%r114 = shl i384 %r113, 320
-%r115 = or i384 %r109, %r114
-%r116 = zext i384 %r115 to i448
-%r117 = sub i448 %r79, %r116
-%r118 = lshr i448 %r117, 384
-%r119 = trunc i448 %r118 to i1
-%r120 = select i1 %r119, i448 %r79, i448 %r117
-%r121 = trunc i448 %r120 to i384
-%r122 = trunc i384 %r121 to i64
-%r124 = getelementptr i64, i64* %r1, i32 0
-store i64 %r122, i64* %r124
-%r125 = lshr i384 %r121, 64
-%r126 = trunc i384 %r125 to i64
-%r128 = getelementptr i64, i64* %r1, i32 1
-store i64 %r126, i64* %r128
-%r129 = lshr i384 %r125, 64
-%r130 = trunc i384 %r129 to i64
-%r132 = getelementptr i64, i64* %r1, i32 2
-store i64 %r130, i64* %r132
-%r133 = lshr i384 %r129, 64
-%r134 = trunc i384 %r133 to i64
-%r136 = getelementptr i64, i64* %r1, i32 3
-store i64 %r134, i64* %r136
-%r137 = lshr i384 %r133, 64
-%r138 = trunc i384 %r137 to i64
-%r140 = getelementptr i64, i64* %r1, i32 4
-store i64 %r138, i64* %r140
-%r141 = lshr i384 %r137, 64
-%r142 = trunc i384 %r141 to i64
-%r144 = getelementptr i64, i64* %r1, i32 5
-store i64 %r142, i64* %r144
-ret void
-}
-define void @mcl_fp_montNF6L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
-{
-%r6 = getelementptr i64, i64* %r4, i32 -1
-%r7 = load i64, i64* %r6
-%r8 = load i64, i64* %r3
-%r9 = call i448 @mulPv384x64(i64* %r2, i64 %r8)
-%r10 = trunc i448 %r9 to i64
-%r11 = mul i64 %r10, %r7
-%r12 = call i448 @mulPv384x64(i64* %r4, i64 %r11)
-%r13 = add i448 %r9, %r12
-%r14 = lshr i448 %r13, 64
-%r16 = getelementptr i64, i64* %r3, i32 1
-%r17 = load i64, i64* %r16
-%r18 = call i448 @mulPv384x64(i64* %r2, i64 %r17)
-%r19 = add i448 %r14, %r18
-%r20 = trunc i448 %r19 to i64
-%r21 = mul i64 %r20, %r7
-%r22 = call i448 @mulPv384x64(i64* %r4, i64 %r21)
-%r23 = add i448 %r19, %r22
-%r24 = lshr i448 %r23, 64
-%r26 = getelementptr i64, i64* %r3, i32 2
-%r27 = load i64, i64* %r26
-%r28 = call i448 @mulPv384x64(i64* %r2, i64 %r27)
-%r29 = add i448 %r24, %r28
-%r30 = trunc i448 %r29 to i64
-%r31 = mul i64 %r30, %r7
-%r32 = call i448 @mulPv384x64(i64* %r4, i64 %r31)
-%r33 = add i448 %r29, %r32
-%r34 = lshr i448 %r33, 64
-%r36 = getelementptr i64, i64* %r3, i32 3
-%r37 = load i64, i64* %r36
-%r38 = call i448 @mulPv384x64(i64* %r2, i64 %r37)
-%r39 = add i448 %r34, %r38
-%r40 = trunc i448 %r39 to i64
-%r41 = mul i64 %r40, %r7
-%r42 = call i448 @mulPv384x64(i64* %r4, i64 %r41)
-%r43 = add i448 %r39, %r42
-%r44 = lshr i448 %r43, 64
-%r46 = getelementptr i64, i64* %r3, i32 4
-%r47 = load i64, i64* %r46
-%r48 = call i448 @mulPv384x64(i64* %r2, i64 %r47)
-%r49 = add i448 %r44, %r48
-%r50 = trunc i448 %r49 to i64
-%r51 = mul i64 %r50, %r7
-%r52 = call i448 @mulPv384x64(i64* %r4, i64 %r51)
-%r53 = add i448 %r49, %r52
-%r54 = lshr i448 %r53, 64
-%r56 = getelementptr i64, i64* %r3, i32 5
-%r57 = load i64, i64* %r56
-%r58 = call i448 @mulPv384x64(i64* %r2, i64 %r57)
-%r59 = add i448 %r54, %r58
-%r60 = trunc i448 %r59 to i64
-%r61 = mul i64 %r60, %r7
-%r62 = call i448 @mulPv384x64(i64* %r4, i64 %r61)
-%r63 = add i448 %r59, %r62
-%r64 = lshr i448 %r63, 64
-%r65 = trunc i448 %r64 to i384
-%r66 = load i64, i64* %r4
-%r67 = zext i64 %r66 to i128
-%r69 = getelementptr i64, i64* %r4, i32 1
-%r70 = load i64, i64* %r69
-%r71 = zext i64 %r70 to i128
-%r72 = shl i128 %r71, 64
-%r73 = or i128 %r67, %r72
-%r74 = zext i128 %r73 to i192
-%r76 = getelementptr i64, i64* %r4, i32 2
-%r77 = load i64, i64* %r76
-%r78 = zext i64 %r77 to i192
-%r79 = shl i192 %r78, 128
-%r80 = or i192 %r74, %r79
-%r81 = zext i192 %r80 to i256
-%r83 = getelementptr i64, i64* %r4, i32 3
-%r84 = load i64, i64* %r83
-%r85 = zext i64 %r84 to i256
-%r86 = shl i256 %r85, 192
-%r87 = or i256 %r81, %r86
-%r88 = zext i256 %r87 to i320
-%r90 = getelementptr i64, i64* %r4, i32 4
-%r91 = load i64, i64* %r90
-%r92 = zext i64 %r91 to i320
-%r93 = shl i320 %r92, 256
-%r94 = or i320 %r88, %r93
-%r95 = zext i320 %r94 to i384
-%r97 = getelementptr i64, i64* %r4, i32 5
-%r98 = load i64, i64* %r97
-%r99 = zext i64 %r98 to i384
-%r100 = shl i384 %r99, 320
-%r101 = or i384 %r95, %r100
-%r102 = sub i384 %r65, %r101
-%r103 = lshr i384 %r102, 383
-%r104 = trunc i384 %r103 to i1
-%r105 = select i1 %r104, i384 %r65, i384 %r102
-%r106 = trunc i384 %r105 to i64
-%r108 = getelementptr i64, i64* %r1, i32 0
-store i64 %r106, i64* %r108
-%r109 = lshr i384 %r105, 64
-%r110 = trunc i384 %r109 to i64
-%r112 = getelementptr i64, i64* %r1, i32 1
-store i64 %r110, i64* %r112
-%r113 = lshr i384 %r109, 64
-%r114 = trunc i384 %r113 to i64
-%r116 = getelementptr i64, i64* %r1, i32 2
-store i64 %r114, i64* %r116
-%r117 = lshr i384 %r113, 64
-%r118 = trunc i384 %r117 to i64
-%r120 = getelementptr i64, i64* %r1, i32 3
-store i64 %r118, i64* %r120
-%r121 = lshr i384 %r117, 64
-%r122 = trunc i384 %r121 to i64
-%r124 = getelementptr i64, i64* %r1, i32 4
-store i64 %r122, i64* %r124
-%r125 = lshr i384 %r121, 64
-%r126 = trunc i384 %r125 to i64
-%r128 = getelementptr i64, i64* %r1, i32 5
-store i64 %r126, i64* %r128
-ret void
-}
-define void @mcl_fp_montRed6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
-{
-%r5 = getelementptr i64, i64* %r3, i32 -1
-%r6 = load i64, i64* %r5
-%r7 = load i64, i64* %r3
-%r8 = zext i64 %r7 to i128
-%r10 = getelementptr i64, i64* %r3, i32 1
-%r11 = load i64, i64* %r10
-%r12 = zext i64 %r11 to i128
-%r13 = shl i128 %r12, 64
-%r14 = or i128 %r8, %r13
-%r15 = zext i128 %r14 to i192
-%r17 = getelementptr i64, i64* %r3, i32 2
-%r18 = load i64, i64* %r17
-%r19 = zext i64 %r18 to i192
-%r20 = shl i192 %r19, 128
-%r21 = or i192 %r15, %r20
-%r22 = zext i192 %r21 to i256
-%r24 = getelementptr i64, i64* %r3, i32 3
-%r25 = load i64, i64* %r24
-%r26 = zext i64 %r25 to i256
-%r27 = shl i256 %r26, 192
-%r28 = or i256 %r22, %r27
-%r29 = zext i256 %r28 to i320
-%r31 = getelementptr i64, i64* %r3, i32 4
-%r32 = load i64, i64* %r31
-%r33 = zext i64 %r32 to i320
-%r34 = shl i320 %r33, 256
-%r35 = or i320 %r29, %r34
-%r36 = zext i320 %r35 to i384
-%r38 = getelementptr i64, i64* %r3, i32 5
-%r39 = load i64, i64* %r38
-%r40 = zext i64 %r39 to i384
-%r41 = shl i384 %r40, 320
-%r42 = or i384 %r36, %r41
-%r43 = load i64, i64* %r2
-%r44 = zext i64 %r43 to i128
-%r46 = getelementptr i64, i64* %r2, i32 1
-%r47 = load i64, i64* %r46
-%r48 = zext i64 %r47 to i128
-%r49 = shl i128 %r48, 64
-%r50 = or i128 %r44, %r49
-%r51 = zext i128 %r50 to i192
-%r53 = getelementptr i64, i64* %r2, i32 2
-%r54 = load i64, i64* %r53
-%r55 = zext i64 %r54 to i192
-%r56 = shl i192 %r55, 128
-%r57 = or i192 %r51, %r56
-%r58 = zext i192 %r57 to i256
-%r60 = getelementptr i64, i64* %r2, i32 3
-%r61 = load i64, i64* %r60
-%r62 = zext i64 %r61 to i256
-%r63 = shl i256 %r62, 192
-%r64 = or i256 %r58, %r63
-%r65 = zext i256 %r64 to i320
-%r67 = getelementptr i64, i64* %r2, i32 4
-%r68 = load i64, i64* %r67
-%r69 = zext i64 %r68 to i320
-%r70 = shl i320 %r69, 256
-%r71 = or i320 %r65, %r70
-%r72 = zext i320 %r71 to i384
-%r74 = getelementptr i64, i64* %r2, i32 5
-%r75 = load i64, i64* %r74
-%r76 = zext i64 %r75 to i384
-%r77 = shl i384 %r76, 320
-%r78 = or i384 %r72, %r77
-%r79 = zext i384 %r78 to i448
-%r81 = getelementptr i64, i64* %r2, i32 6
-%r82 = load i64, i64* %r81
-%r83 = zext i64 %r82 to i448
-%r84 = shl i448 %r83, 384
-%r85 = or i448 %r79, %r84
-%r86 = zext i448 %r85 to i512
-%r88 = getelementptr i64, i64* %r2, i32 7
-%r89 = load i64, i64* %r88
-%r90 = zext i64 %r89 to i512
-%r91 = shl i512 %r90, 448
-%r92 = or i512 %r86, %r91
-%r93 = zext i512 %r92 to i576
-%r95 = getelementptr i64, i64* %r2, i32 8
-%r96 = load i64, i64* %r95
-%r97 = zext i64 %r96 to i576
-%r98 = shl i576 %r97, 512
-%r99 = or i576 %r93, %r98
-%r100 = zext i576 %r99 to i640
-%r102 = getelementptr i64, i64* %r2, i32 9
-%r103 = load i64, i64* %r102
-%r104 = zext i64 %r103 to i640
-%r105 = shl i640 %r104, 576
-%r106 = or i640 %r100, %r105
-%r107 = zext i640 %r106 to i704
-%r109 = getelementptr i64, i64* %r2, i32 10
-%r110 = load i64, i64* %r109
-%r111 = zext i64 %r110 to i704
-%r112 = shl i704 %r111, 640
-%r113 = or i704 %r107, %r112
-%r114 = zext i704 %r113 to i768
-%r116 = getelementptr i64, i64* %r2, i32 11
-%r117 = load i64, i64* %r116
-%r118 = zext i64 %r117 to i768
-%r119 = shl i768 %r118, 704
-%r120 = or i768 %r114, %r119
-%r121 = zext i768 %r120 to i832
-%r122 = trunc i832 %r121 to i64
-%r123 = mul i64 %r122, %r6
-%r124 = call i448 @mulPv384x64(i64* %r3, i64 %r123)
-%r125 = zext i448 %r124 to i832
-%r126 = add i832 %r121, %r125
-%r127 = lshr i832 %r126, 64
-%r128 = trunc i832 %r127 to i768
-%r129 = trunc i768 %r128 to i64
-%r130 = mul i64 %r129, %r6
-%r131 = call i448 @mulPv384x64(i64* %r3, i64 %r130)
-%r132 = zext i448 %r131 to i768
-%r133 = add i768 %r128, %r132
-%r134 = lshr i768 %r133, 64
-%r135 = trunc i768 %r134 to i704
-%r136 = trunc i704 %r135 to i64
-%r137 = mul i64 %r136, %r6
-%r138 = call i448 @mulPv384x64(i64* %r3, i64 %r137)
-%r139 = zext i448 %r138 to i704
-%r140 = add i704 %r135, %r139
-%r141 = lshr i704 %r140, 64
-%r142 = trunc i704 %r141 to i640
-%r143 = trunc i640 %r142 to i64
-%r144 = mul i64 %r143, %r6
-%r145 = call i448 @mulPv384x64(i64* %r3, i64 %r144)
-%r146 = zext i448 %r145 to i640
-%r147 = add i640 %r142, %r146
-%r148 = lshr i640 %r147, 64
-%r149 = trunc i640 %r148 to i576
-%r150 = trunc i576 %r149 to i64
-%r151 = mul i64 %r150, %r6
-%r152 = call i448 @mulPv384x64(i64* %r3, i64 %r151)
-%r153 = zext i448 %r152 to i576
-%r154 = add i576 %r149, %r153
-%r155 = lshr i576 %r154, 64
-%r156 = trunc i576 %r155 to i512
-%r157 = trunc i512 %r156 to i64
-%r158 = mul i64 %r157, %r6
-%r159 = call i448 @mulPv384x64(i64* %r3, i64 %r158)
-%r160 = zext i448 %r159 to i512
-%r161 = add i512 %r156, %r160
-%r162 = lshr i512 %r161, 64
-%r163 = trunc i512 %r162 to i448
-%r164 = zext i384 %r42 to i448
-%r165 = sub i448 %r163, %r164
-%r166 = lshr i448 %r165, 384
-%r167 = trunc i448 %r166 to i1
-%r168 = select i1 %r167, i448 %r163, i448 %r165
-%r169 = trunc i448 %r168 to i384
-%r170 = trunc i384 %r169 to i64
-%r172 = getelementptr i64, i64* %r1, i32 0
-store i64 %r170, i64* %r172
-%r173 = lshr i384 %r169, 64
-%r174 = trunc i384 %r173 to i64
-%r176 = getelementptr i64, i64* %r1, i32 1
-store i64 %r174, i64* %r176
-%r177 = lshr i384 %r173, 64
-%r178 = trunc i384 %r177 to i64
-%r180 = getelementptr i64, i64* %r1, i32 2
-store i64 %r178, i64* %r180
-%r181 = lshr i384 %r177, 64
-%r182 = trunc i384 %r181 to i64
-%r184 = getelementptr i64, i64* %r1, i32 3
-store i64 %r182, i64* %r184
-%r185 = lshr i384 %r181, 64
-%r186 = trunc i384 %r185 to i64
-%r188 = getelementptr i64, i64* %r1, i32 4
-store i64 %r186, i64* %r188
-%r189 = lshr i384 %r185, 64
-%r190 = trunc i384 %r189 to i64
-%r192 = getelementptr i64, i64* %r1, i32 5
-store i64 %r190, i64* %r192
-ret void
-}
-define i64 @mcl_fp_addPre6L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r3, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r3, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r3, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r3, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r3, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r42 = load i64, i64* %r4
-%r43 = zext i64 %r42 to i128
-%r45 = getelementptr i64, i64* %r4, i32 1
-%r46 = load i64, i64* %r45
-%r47 = zext i64 %r46 to i128
-%r48 = shl i128 %r47, 64
-%r49 = or i128 %r43, %r48
-%r50 = zext i128 %r49 to i192
-%r52 = getelementptr i64, i64* %r4, i32 2
-%r53 = load i64, i64* %r52
-%r54 = zext i64 %r53 to i192
-%r55 = shl i192 %r54, 128
-%r56 = or i192 %r50, %r55
-%r57 = zext i192 %r56 to i256
-%r59 = getelementptr i64, i64* %r4, i32 3
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i256
-%r62 = shl i256 %r61, 192
-%r63 = or i256 %r57, %r62
-%r64 = zext i256 %r63 to i320
-%r66 = getelementptr i64, i64* %r4, i32 4
-%r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i320
-%r69 = shl i320 %r68, 256
-%r70 = or i320 %r64, %r69
-%r71 = zext i320 %r70 to i384
-%r73 = getelementptr i64, i64* %r4, i32 5
-%r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i384
-%r76 = shl i384 %r75, 320
-%r77 = or i384 %r71, %r76
-%r78 = zext i384 %r77 to i448
-%r79 = add i448 %r41, %r78
-%r80 = trunc i448 %r79 to i384
-%r81 = trunc i384 %r80 to i64
-%r83 = getelementptr i64, i64* %r2, i32 0
-store i64 %r81, i64* %r83
-%r84 = lshr i384 %r80, 64
-%r85 = trunc i384 %r84 to i64
-%r87 = getelementptr i64, i64* %r2, i32 1
-store i64 %r85, i64* %r87
-%r88 = lshr i384 %r84, 64
-%r89 = trunc i384 %r88 to i64
-%r91 = getelementptr i64, i64* %r2, i32 2
-store i64 %r89, i64* %r91
-%r92 = lshr i384 %r88, 64
-%r93 = trunc i384 %r92 to i64
-%r95 = getelementptr i64, i64* %r2, i32 3
-store i64 %r93, i64* %r95
-%r96 = lshr i384 %r92, 64
-%r97 = trunc i384 %r96 to i64
-%r99 = getelementptr i64, i64* %r2, i32 4
-store i64 %r97, i64* %r99
-%r100 = lshr i384 %r96, 64
-%r101 = trunc i384 %r100 to i64
-%r103 = getelementptr i64, i64* %r2, i32 5
-store i64 %r101, i64* %r103
-%r104 = lshr i448 %r79, 384
-%r105 = trunc i448 %r104 to i64
-ret i64 %r105
-}
-define i64 @mcl_fp_subPre6L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r3, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r3, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r3, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r3, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r3, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r42 = load i64, i64* %r4
-%r43 = zext i64 %r42 to i128
-%r45 = getelementptr i64, i64* %r4, i32 1
-%r46 = load i64, i64* %r45
-%r47 = zext i64 %r46 to i128
-%r48 = shl i128 %r47, 64
-%r49 = or i128 %r43, %r48
-%r50 = zext i128 %r49 to i192
-%r52 = getelementptr i64, i64* %r4, i32 2
-%r53 = load i64, i64* %r52
-%r54 = zext i64 %r53 to i192
-%r55 = shl i192 %r54, 128
-%r56 = or i192 %r50, %r55
-%r57 = zext i192 %r56 to i256
-%r59 = getelementptr i64, i64* %r4, i32 3
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i256
-%r62 = shl i256 %r61, 192
-%r63 = or i256 %r57, %r62
-%r64 = zext i256 %r63 to i320
-%r66 = getelementptr i64, i64* %r4, i32 4
-%r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i320
-%r69 = shl i320 %r68, 256
-%r70 = or i320 %r64, %r69
-%r71 = zext i320 %r70 to i384
-%r73 = getelementptr i64, i64* %r4, i32 5
-%r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i384
-%r76 = shl i384 %r75, 320
-%r77 = or i384 %r71, %r76
-%r78 = zext i384 %r77 to i448
-%r79 = sub i448 %r41, %r78
-%r80 = trunc i448 %r79 to i384
-%r81 = trunc i384 %r80 to i64
-%r83 = getelementptr i64, i64* %r2, i32 0
-store i64 %r81, i64* %r83
-%r84 = lshr i384 %r80, 64
-%r85 = trunc i384 %r84 to i64
-%r87 = getelementptr i64, i64* %r2, i32 1
-store i64 %r85, i64* %r87
-%r88 = lshr i384 %r84, 64
-%r89 = trunc i384 %r88 to i64
-%r91 = getelementptr i64, i64* %r2, i32 2
-store i64 %r89, i64* %r91
-%r92 = lshr i384 %r88, 64
-%r93 = trunc i384 %r92 to i64
-%r95 = getelementptr i64, i64* %r2, i32 3
-store i64 %r93, i64* %r95
-%r96 = lshr i384 %r92, 64
-%r97 = trunc i384 %r96 to i64
-%r99 = getelementptr i64, i64* %r2, i32 4
-store i64 %r97, i64* %r99
-%r100 = lshr i384 %r96, 64
-%r101 = trunc i384 %r100 to i64
-%r103 = getelementptr i64, i64* %r2, i32 5
-store i64 %r101, i64* %r103
-%r104 = lshr i448 %r79, 384
-%r105 = trunc i448 %r104 to i64
-%r107 = and i64 %r105, 1
-ret i64 %r107
-}
-define void @mcl_fp_shr1_6L(i64* noalias  %r1, i64* noalias  %r2)
-{
-%r3 = load i64, i64* %r2
-%r4 = zext i64 %r3 to i128
-%r6 = getelementptr i64, i64* %r2, i32 1
-%r7 = load i64, i64* %r6
-%r8 = zext i64 %r7 to i128
-%r9 = shl i128 %r8, 64
-%r10 = or i128 %r4, %r9
-%r11 = zext i128 %r10 to i192
-%r13 = getelementptr i64, i64* %r2, i32 2
-%r14 = load i64, i64* %r13
-%r15 = zext i64 %r14 to i192
-%r16 = shl i192 %r15, 128
-%r17 = or i192 %r11, %r16
-%r18 = zext i192 %r17 to i256
-%r20 = getelementptr i64, i64* %r2, i32 3
-%r21 = load i64, i64* %r20
-%r22 = zext i64 %r21 to i256
-%r23 = shl i256 %r22, 192
-%r24 = or i256 %r18, %r23
-%r25 = zext i256 %r24 to i320
-%r27 = getelementptr i64, i64* %r2, i32 4
-%r28 = load i64, i64* %r27
-%r29 = zext i64 %r28 to i320
-%r30 = shl i320 %r29, 256
-%r31 = or i320 %r25, %r30
-%r32 = zext i320 %r31 to i384
-%r34 = getelementptr i64, i64* %r2, i32 5
-%r35 = load i64, i64* %r34
-%r36 = zext i64 %r35 to i384
-%r37 = shl i384 %r36, 320
-%r38 = or i384 %r32, %r37
-%r39 = lshr i384 %r38, 1
-%r40 = trunc i384 %r39 to i64
-%r42 = getelementptr i64, i64* %r1, i32 0
-store i64 %r40, i64* %r42
-%r43 = lshr i384 %r39, 64
-%r44 = trunc i384 %r43 to i64
-%r46 = getelementptr i64, i64* %r1, i32 1
-store i64 %r44, i64* %r46
-%r47 = lshr i384 %r43, 64
-%r48 = trunc i384 %r47 to i64
-%r50 = getelementptr i64, i64* %r1, i32 2
-store i64 %r48, i64* %r50
-%r51 = lshr i384 %r47, 64
-%r52 = trunc i384 %r51 to i64
-%r54 = getelementptr i64, i64* %r1, i32 3
-store i64 %r52, i64* %r54
-%r55 = lshr i384 %r51, 64
-%r56 = trunc i384 %r55 to i64
-%r58 = getelementptr i64, i64* %r1, i32 4
-store i64 %r56, i64* %r58
-%r59 = lshr i384 %r55, 64
-%r60 = trunc i384 %r59 to i64
-%r62 = getelementptr i64, i64* %r1, i32 5
-store i64 %r60, i64* %r62
-ret void
-}
-define void @mcl_fp_add6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = load i64, i64* %r3
-%r42 = zext i64 %r41 to i128
-%r44 = getelementptr i64, i64* %r3, i32 1
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i128
-%r47 = shl i128 %r46, 64
-%r48 = or i128 %r42, %r47
-%r49 = zext i128 %r48 to i192
-%r51 = getelementptr i64, i64* %r3, i32 2
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i192
-%r54 = shl i192 %r53, 128
-%r55 = or i192 %r49, %r54
-%r56 = zext i192 %r55 to i256
-%r58 = getelementptr i64, i64* %r3, i32 3
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i256
-%r61 = shl i256 %r60, 192
-%r62 = or i256 %r56, %r61
-%r63 = zext i256 %r62 to i320
-%r65 = getelementptr i64, i64* %r3, i32 4
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i320
-%r68 = shl i320 %r67, 256
-%r69 = or i320 %r63, %r68
-%r70 = zext i320 %r69 to i384
-%r72 = getelementptr i64, i64* %r3, i32 5
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i384
-%r75 = shl i384 %r74, 320
-%r76 = or i384 %r70, %r75
-%r77 = zext i384 %r40 to i448
-%r78 = zext i384 %r76 to i448
-%r79 = add i448 %r77, %r78
-%r80 = trunc i448 %r79 to i384
-%r81 = trunc i384 %r80 to i64
-%r83 = getelementptr i64, i64* %r1, i32 0
-store i64 %r81, i64* %r83
-%r84 = lshr i384 %r80, 64
-%r85 = trunc i384 %r84 to i64
-%r87 = getelementptr i64, i64* %r1, i32 1
-store i64 %r85, i64* %r87
-%r88 = lshr i384 %r84, 64
-%r89 = trunc i384 %r88 to i64
-%r91 = getelementptr i64, i64* %r1, i32 2
-store i64 %r89, i64* %r91
-%r92 = lshr i384 %r88, 64
-%r93 = trunc i384 %r92 to i64
-%r95 = getelementptr i64, i64* %r1, i32 3
-store i64 %r93, i64* %r95
-%r96 = lshr i384 %r92, 64
-%r97 = trunc i384 %r96 to i64
-%r99 = getelementptr i64, i64* %r1, i32 4
-store i64 %r97, i64* %r99
-%r100 = lshr i384 %r96, 64
-%r101 = trunc i384 %r100 to i64
-%r103 = getelementptr i64, i64* %r1, i32 5
-store i64 %r101, i64* %r103
-%r104 = load i64, i64* %r4
-%r105 = zext i64 %r104 to i128
-%r107 = getelementptr i64, i64* %r4, i32 1
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i128
-%r110 = shl i128 %r109, 64
-%r111 = or i128 %r105, %r110
-%r112 = zext i128 %r111 to i192
-%r114 = getelementptr i64, i64* %r4, i32 2
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i192
-%r117 = shl i192 %r116, 128
-%r118 = or i192 %r112, %r117
-%r119 = zext i192 %r118 to i256
-%r121 = getelementptr i64, i64* %r4, i32 3
-%r122 = load i64, i64* %r121
-%r123 = zext i64 %r122 to i256
-%r124 = shl i256 %r123, 192
-%r125 = or i256 %r119, %r124
-%r126 = zext i256 %r125 to i320
-%r128 = getelementptr i64, i64* %r4, i32 4
-%r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i320
-%r131 = shl i320 %r130, 256
-%r132 = or i320 %r126, %r131
-%r133 = zext i320 %r132 to i384
-%r135 = getelementptr i64, i64* %r4, i32 5
-%r136 = load i64, i64* %r135
-%r137 = zext i64 %r136 to i384
-%r138 = shl i384 %r137, 320
-%r139 = or i384 %r133, %r138
-%r140 = zext i384 %r139 to i448
-%r141 = sub i448 %r79, %r140
-%r142 = lshr i448 %r141, 384
-%r143 = trunc i448 %r142 to i1
-br i1%r143, label %carry, label %nocarry
-nocarry:
-%r144 = trunc i448 %r141 to i384
-%r145 = trunc i384 %r144 to i64
-%r147 = getelementptr i64, i64* %r1, i32 0
-store i64 %r145, i64* %r147
-%r148 = lshr i384 %r144, 64
-%r149 = trunc i384 %r148 to i64
-%r151 = getelementptr i64, i64* %r1, i32 1
-store i64 %r149, i64* %r151
-%r152 = lshr i384 %r148, 64
-%r153 = trunc i384 %r152 to i64
-%r155 = getelementptr i64, i64* %r1, i32 2
-store i64 %r153, i64* %r155
-%r156 = lshr i384 %r152, 64
-%r157 = trunc i384 %r156 to i64
-%r159 = getelementptr i64, i64* %r1, i32 3
-store i64 %r157, i64* %r159
-%r160 = lshr i384 %r156, 64
-%r161 = trunc i384 %r160 to i64
-%r163 = getelementptr i64, i64* %r1, i32 4
-store i64 %r161, i64* %r163
-%r164 = lshr i384 %r160, 64
-%r165 = trunc i384 %r164 to i64
-%r167 = getelementptr i64, i64* %r1, i32 5
-store i64 %r165, i64* %r167
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = load i64, i64* %r3
-%r42 = zext i64 %r41 to i128
-%r44 = getelementptr i64, i64* %r3, i32 1
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i128
-%r47 = shl i128 %r46, 64
-%r48 = or i128 %r42, %r47
-%r49 = zext i128 %r48 to i192
-%r51 = getelementptr i64, i64* %r3, i32 2
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i192
-%r54 = shl i192 %r53, 128
-%r55 = or i192 %r49, %r54
-%r56 = zext i192 %r55 to i256
-%r58 = getelementptr i64, i64* %r3, i32 3
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i256
-%r61 = shl i256 %r60, 192
-%r62 = or i256 %r56, %r61
-%r63 = zext i256 %r62 to i320
-%r65 = getelementptr i64, i64* %r3, i32 4
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i320
-%r68 = shl i320 %r67, 256
-%r69 = or i320 %r63, %r68
-%r70 = zext i320 %r69 to i384
-%r72 = getelementptr i64, i64* %r3, i32 5
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i384
-%r75 = shl i384 %r74, 320
-%r76 = or i384 %r70, %r75
-%r77 = add i384 %r40, %r76
-%r78 = load i64, i64* %r4
-%r79 = zext i64 %r78 to i128
-%r81 = getelementptr i64, i64* %r4, i32 1
-%r82 = load i64, i64* %r81
-%r83 = zext i64 %r82 to i128
-%r84 = shl i128 %r83, 64
-%r85 = or i128 %r79, %r84
-%r86 = zext i128 %r85 to i192
-%r88 = getelementptr i64, i64* %r4, i32 2
-%r89 = load i64, i64* %r88
-%r90 = zext i64 %r89 to i192
-%r91 = shl i192 %r90, 128
-%r92 = or i192 %r86, %r91
-%r93 = zext i192 %r92 to i256
-%r95 = getelementptr i64, i64* %r4, i32 3
-%r96 = load i64, i64* %r95
-%r97 = zext i64 %r96 to i256
-%r98 = shl i256 %r97, 192
-%r99 = or i256 %r93, %r98
-%r100 = zext i256 %r99 to i320
-%r102 = getelementptr i64, i64* %r4, i32 4
-%r103 = load i64, i64* %r102
-%r104 = zext i64 %r103 to i320
-%r105 = shl i320 %r104, 256
-%r106 = or i320 %r100, %r105
-%r107 = zext i320 %r106 to i384
-%r109 = getelementptr i64, i64* %r4, i32 5
-%r110 = load i64, i64* %r109
-%r111 = zext i64 %r110 to i384
-%r112 = shl i384 %r111, 320
-%r113 = or i384 %r107, %r112
-%r114 = sub i384 %r77, %r113
-%r115 = lshr i384 %r114, 383
-%r116 = trunc i384 %r115 to i1
-%r117 = select i1 %r116, i384 %r77, i384 %r114
-%r118 = trunc i384 %r117 to i64
-%r120 = getelementptr i64, i64* %r1, i32 0
-store i64 %r118, i64* %r120
-%r121 = lshr i384 %r117, 64
-%r122 = trunc i384 %r121 to i64
-%r124 = getelementptr i64, i64* %r1, i32 1
-store i64 %r122, i64* %r124
-%r125 = lshr i384 %r121, 64
-%r126 = trunc i384 %r125 to i64
-%r128 = getelementptr i64, i64* %r1, i32 2
-store i64 %r126, i64* %r128
-%r129 = lshr i384 %r125, 64
-%r130 = trunc i384 %r129 to i64
-%r132 = getelementptr i64, i64* %r1, i32 3
-store i64 %r130, i64* %r132
-%r133 = lshr i384 %r129, 64
-%r134 = trunc i384 %r133 to i64
-%r136 = getelementptr i64, i64* %r1, i32 4
-store i64 %r134, i64* %r136
-%r137 = lshr i384 %r133, 64
-%r138 = trunc i384 %r137 to i64
-%r140 = getelementptr i64, i64* %r1, i32 5
-store i64 %r138, i64* %r140
-ret void
-}
-define void @mcl_fp_sub6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = load i64, i64* %r3
-%r42 = zext i64 %r41 to i128
-%r44 = getelementptr i64, i64* %r3, i32 1
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i128
-%r47 = shl i128 %r46, 64
-%r48 = or i128 %r42, %r47
-%r49 = zext i128 %r48 to i192
-%r51 = getelementptr i64, i64* %r3, i32 2
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i192
-%r54 = shl i192 %r53, 128
-%r55 = or i192 %r49, %r54
-%r56 = zext i192 %r55 to i256
-%r58 = getelementptr i64, i64* %r3, i32 3
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i256
-%r61 = shl i256 %r60, 192
-%r62 = or i256 %r56, %r61
-%r63 = zext i256 %r62 to i320
-%r65 = getelementptr i64, i64* %r3, i32 4
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i320
-%r68 = shl i320 %r67, 256
-%r69 = or i320 %r63, %r68
-%r70 = zext i320 %r69 to i384
-%r72 = getelementptr i64, i64* %r3, i32 5
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i384
-%r75 = shl i384 %r74, 320
-%r76 = or i384 %r70, %r75
-%r77 = zext i384 %r40 to i448
-%r78 = zext i384 %r76 to i448
-%r79 = sub i448 %r77, %r78
-%r80 = trunc i448 %r79 to i384
-%r81 = lshr i448 %r79, 384
-%r82 = trunc i448 %r81 to i1
-%r83 = trunc i384 %r80 to i64
-%r85 = getelementptr i64, i64* %r1, i32 0
-store i64 %r83, i64* %r85
-%r86 = lshr i384 %r80, 64
-%r87 = trunc i384 %r86 to i64
-%r89 = getelementptr i64, i64* %r1, i32 1
-store i64 %r87, i64* %r89
-%r90 = lshr i384 %r86, 64
-%r91 = trunc i384 %r90 to i64
-%r93 = getelementptr i64, i64* %r1, i32 2
-store i64 %r91, i64* %r93
-%r94 = lshr i384 %r90, 64
-%r95 = trunc i384 %r94 to i64
-%r97 = getelementptr i64, i64* %r1, i32 3
-store i64 %r95, i64* %r97
-%r98 = lshr i384 %r94, 64
-%r99 = trunc i384 %r98 to i64
-%r101 = getelementptr i64, i64* %r1, i32 4
-store i64 %r99, i64* %r101
-%r102 = lshr i384 %r98, 64
-%r103 = trunc i384 %r102 to i64
-%r105 = getelementptr i64, i64* %r1, i32 5
-store i64 %r103, i64* %r105
-br i1%r82, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r106 = load i64, i64* %r4
-%r107 = zext i64 %r106 to i128
-%r109 = getelementptr i64, i64* %r4, i32 1
-%r110 = load i64, i64* %r109
-%r111 = zext i64 %r110 to i128
-%r112 = shl i128 %r111, 64
-%r113 = or i128 %r107, %r112
-%r114 = zext i128 %r113 to i192
-%r116 = getelementptr i64, i64* %r4, i32 2
-%r117 = load i64, i64* %r116
-%r118 = zext i64 %r117 to i192
-%r119 = shl i192 %r118, 128
-%r120 = or i192 %r114, %r119
-%r121 = zext i192 %r120 to i256
-%r123 = getelementptr i64, i64* %r4, i32 3
-%r124 = load i64, i64* %r123
-%r125 = zext i64 %r124 to i256
-%r126 = shl i256 %r125, 192
-%r127 = or i256 %r121, %r126
-%r128 = zext i256 %r127 to i320
-%r130 = getelementptr i64, i64* %r4, i32 4
-%r131 = load i64, i64* %r130
-%r132 = zext i64 %r131 to i320
-%r133 = shl i320 %r132, 256
-%r134 = or i320 %r128, %r133
-%r135 = zext i320 %r134 to i384
-%r137 = getelementptr i64, i64* %r4, i32 5
-%r138 = load i64, i64* %r137
-%r139 = zext i64 %r138 to i384
-%r140 = shl i384 %r139, 320
-%r141 = or i384 %r135, %r140
-%r142 = add i384 %r80, %r141
-%r143 = trunc i384 %r142 to i64
-%r145 = getelementptr i64, i64* %r1, i32 0
-store i64 %r143, i64* %r145
-%r146 = lshr i384 %r142, 64
-%r147 = trunc i384 %r146 to i64
-%r149 = getelementptr i64, i64* %r1, i32 1
-store i64 %r147, i64* %r149
-%r150 = lshr i384 %r146, 64
-%r151 = trunc i384 %r150 to i64
-%r153 = getelementptr i64, i64* %r1, i32 2
-store i64 %r151, i64* %r153
-%r154 = lshr i384 %r150, 64
-%r155 = trunc i384 %r154 to i64
-%r157 = getelementptr i64, i64* %r1, i32 3
-store i64 %r155, i64* %r157
-%r158 = lshr i384 %r154, 64
-%r159 = trunc i384 %r158 to i64
-%r161 = getelementptr i64, i64* %r1, i32 4
-store i64 %r159, i64* %r161
-%r162 = lshr i384 %r158, 64
-%r163 = trunc i384 %r162 to i64
-%r165 = getelementptr i64, i64* %r1, i32 5
-store i64 %r163, i64* %r165
-ret void
-}
-define void @mcl_fp_subNF6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = load i64, i64* %r3
-%r42 = zext i64 %r41 to i128
-%r44 = getelementptr i64, i64* %r3, i32 1
-%r45 = load i64, i64* %r44
-%r46 = zext i64 %r45 to i128
-%r47 = shl i128 %r46, 64
-%r48 = or i128 %r42, %r47
-%r49 = zext i128 %r48 to i192
-%r51 = getelementptr i64, i64* %r3, i32 2
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i192
-%r54 = shl i192 %r53, 128
-%r55 = or i192 %r49, %r54
-%r56 = zext i192 %r55 to i256
-%r58 = getelementptr i64, i64* %r3, i32 3
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i256
-%r61 = shl i256 %r60, 192
-%r62 = or i256 %r56, %r61
-%r63 = zext i256 %r62 to i320
-%r65 = getelementptr i64, i64* %r3, i32 4
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i320
-%r68 = shl i320 %r67, 256
-%r69 = or i320 %r63, %r68
-%r70 = zext i320 %r69 to i384
-%r72 = getelementptr i64, i64* %r3, i32 5
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i384
-%r75 = shl i384 %r74, 320
-%r76 = or i384 %r70, %r75
-%r77 = sub i384 %r40, %r76
-%r78 = lshr i384 %r77, 383
-%r79 = trunc i384 %r78 to i1
-%r80 = load i64, i64* %r4
-%r81 = zext i64 %r80 to i128
-%r83 = getelementptr i64, i64* %r4, i32 1
-%r84 = load i64, i64* %r83
-%r85 = zext i64 %r84 to i128
-%r86 = shl i128 %r85, 64
-%r87 = or i128 %r81, %r86
-%r88 = zext i128 %r87 to i192
-%r90 = getelementptr i64, i64* %r4, i32 2
-%r91 = load i64, i64* %r90
-%r92 = zext i64 %r91 to i192
-%r93 = shl i192 %r92, 128
-%r94 = or i192 %r88, %r93
-%r95 = zext i192 %r94 to i256
-%r97 = getelementptr i64, i64* %r4, i32 3
-%r98 = load i64, i64* %r97
-%r99 = zext i64 %r98 to i256
-%r100 = shl i256 %r99, 192
-%r101 = or i256 %r95, %r100
-%r102 = zext i256 %r101 to i320
-%r104 = getelementptr i64, i64* %r4, i32 4
-%r105 = load i64, i64* %r104
-%r106 = zext i64 %r105 to i320
-%r107 = shl i320 %r106, 256
-%r108 = or i320 %r102, %r107
-%r109 = zext i320 %r108 to i384
-%r111 = getelementptr i64, i64* %r4, i32 5
-%r112 = load i64, i64* %r111
-%r113 = zext i64 %r112 to i384
-%r114 = shl i384 %r113, 320
-%r115 = or i384 %r109, %r114
-%r117 = select i1 %r79, i384 %r115, i384 0
-%r118 = add i384 %r77, %r117
-%r119 = trunc i384 %r118 to i64
-%r121 = getelementptr i64, i64* %r1, i32 0
-store i64 %r119, i64* %r121
-%r122 = lshr i384 %r118, 64
-%r123 = trunc i384 %r122 to i64
-%r125 = getelementptr i64, i64* %r1, i32 1
-store i64 %r123, i64* %r125
-%r126 = lshr i384 %r122, 64
-%r127 = trunc i384 %r126 to i64
-%r129 = getelementptr i64, i64* %r1, i32 2
-store i64 %r127, i64* %r129
-%r130 = lshr i384 %r126, 64
-%r131 = trunc i384 %r130 to i64
-%r133 = getelementptr i64, i64* %r1, i32 3
-store i64 %r131, i64* %r133
-%r134 = lshr i384 %r130, 64
-%r135 = trunc i384 %r134 to i64
-%r137 = getelementptr i64, i64* %r1, i32 4
-store i64 %r135, i64* %r137
-%r138 = lshr i384 %r134, 64
-%r139 = trunc i384 %r138 to i64
-%r141 = getelementptr i64, i64* %r1, i32 5
-store i64 %r139, i64* %r141
-ret void
-}
-define void @mcl_fpDbl_add6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r2, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = zext i576 %r61 to i640
-%r64 = getelementptr i64, i64* %r2, i32 9
-%r65 = load i64, i64* %r64
-%r66 = zext i64 %r65 to i640
-%r67 = shl i640 %r66, 576
-%r68 = or i640 %r62, %r67
-%r69 = zext i640 %r68 to i704
-%r71 = getelementptr i64, i64* %r2, i32 10
-%r72 = load i64, i64* %r71
-%r73 = zext i64 %r72 to i704
-%r74 = shl i704 %r73, 640
-%r75 = or i704 %r69, %r74
-%r76 = zext i704 %r75 to i768
-%r78 = getelementptr i64, i64* %r2, i32 11
-%r79 = load i64, i64* %r78
-%r80 = zext i64 %r79 to i768
-%r81 = shl i768 %r80, 704
-%r82 = or i768 %r76, %r81
-%r83 = load i64, i64* %r3
-%r84 = zext i64 %r83 to i128
-%r86 = getelementptr i64, i64* %r3, i32 1
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i128
-%r89 = shl i128 %r88, 64
-%r90 = or i128 %r84, %r89
-%r91 = zext i128 %r90 to i192
-%r93 = getelementptr i64, i64* %r3, i32 2
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i192
-%r96 = shl i192 %r95, 128
-%r97 = or i192 %r91, %r96
-%r98 = zext i192 %r97 to i256
-%r100 = getelementptr i64, i64* %r3, i32 3
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i256
-%r103 = shl i256 %r102, 192
-%r104 = or i256 %r98, %r103
-%r105 = zext i256 %r104 to i320
-%r107 = getelementptr i64, i64* %r3, i32 4
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i320
-%r110 = shl i320 %r109, 256
-%r111 = or i320 %r105, %r110
-%r112 = zext i320 %r111 to i384
-%r114 = getelementptr i64, i64* %r3, i32 5
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i384
-%r117 = shl i384 %r116, 320
-%r118 = or i384 %r112, %r117
-%r119 = zext i384 %r118 to i448
-%r121 = getelementptr i64, i64* %r3, i32 6
-%r122 = load i64, i64* %r121
-%r123 = zext i64 %r122 to i448
-%r124 = shl i448 %r123, 384
-%r125 = or i448 %r119, %r124
-%r126 = zext i448 %r125 to i512
-%r128 = getelementptr i64, i64* %r3, i32 7
-%r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i512
-%r131 = shl i512 %r130, 448
-%r132 = or i512 %r126, %r131
-%r133 = zext i512 %r132 to i576
-%r135 = getelementptr i64, i64* %r3, i32 8
-%r136 = load i64, i64* %r135
-%r137 = zext i64 %r136 to i576
-%r138 = shl i576 %r137, 512
-%r139 = or i576 %r133, %r138
-%r140 = zext i576 %r139 to i640
-%r142 = getelementptr i64, i64* %r3, i32 9
-%r143 = load i64, i64* %r142
-%r144 = zext i64 %r143 to i640
-%r145 = shl i640 %r144, 576
-%r146 = or i640 %r140, %r145
-%r147 = zext i640 %r146 to i704
-%r149 = getelementptr i64, i64* %r3, i32 10
-%r150 = load i64, i64* %r149
-%r151 = zext i64 %r150 to i704
-%r152 = shl i704 %r151, 640
-%r153 = or i704 %r147, %r152
-%r154 = zext i704 %r153 to i768
-%r156 = getelementptr i64, i64* %r3, i32 11
-%r157 = load i64, i64* %r156
-%r158 = zext i64 %r157 to i768
-%r159 = shl i768 %r158, 704
-%r160 = or i768 %r154, %r159
-%r161 = zext i768 %r82 to i832
-%r162 = zext i768 %r160 to i832
-%r163 = add i832 %r161, %r162
-%r164 = trunc i832 %r163 to i384
-%r165 = trunc i384 %r164 to i64
-%r167 = getelementptr i64, i64* %r1, i32 0
-store i64 %r165, i64* %r167
-%r168 = lshr i384 %r164, 64
-%r169 = trunc i384 %r168 to i64
-%r171 = getelementptr i64, i64* %r1, i32 1
-store i64 %r169, i64* %r171
-%r172 = lshr i384 %r168, 64
-%r173 = trunc i384 %r172 to i64
-%r175 = getelementptr i64, i64* %r1, i32 2
-store i64 %r173, i64* %r175
-%r176 = lshr i384 %r172, 64
-%r177 = trunc i384 %r176 to i64
-%r179 = getelementptr i64, i64* %r1, i32 3
-store i64 %r177, i64* %r179
-%r180 = lshr i384 %r176, 64
-%r181 = trunc i384 %r180 to i64
-%r183 = getelementptr i64, i64* %r1, i32 4
-store i64 %r181, i64* %r183
-%r184 = lshr i384 %r180, 64
-%r185 = trunc i384 %r184 to i64
-%r187 = getelementptr i64, i64* %r1, i32 5
-store i64 %r185, i64* %r187
-%r188 = lshr i832 %r163, 384
-%r189 = trunc i832 %r188 to i448
-%r190 = load i64, i64* %r4
-%r191 = zext i64 %r190 to i128
-%r193 = getelementptr i64, i64* %r4, i32 1
-%r194 = load i64, i64* %r193
-%r195 = zext i64 %r194 to i128
-%r196 = shl i128 %r195, 64
-%r197 = or i128 %r191, %r196
-%r198 = zext i128 %r197 to i192
-%r200 = getelementptr i64, i64* %r4, i32 2
-%r201 = load i64, i64* %r200
-%r202 = zext i64 %r201 to i192
-%r203 = shl i192 %r202, 128
-%r204 = or i192 %r198, %r203
-%r205 = zext i192 %r204 to i256
-%r207 = getelementptr i64, i64* %r4, i32 3
-%r208 = load i64, i64* %r207
-%r209 = zext i64 %r208 to i256
-%r210 = shl i256 %r209, 192
-%r211 = or i256 %r205, %r210
-%r212 = zext i256 %r211 to i320
-%r214 = getelementptr i64, i64* %r4, i32 4
-%r215 = load i64, i64* %r214
-%r216 = zext i64 %r215 to i320
-%r217 = shl i320 %r216, 256
-%r218 = or i320 %r212, %r217
-%r219 = zext i320 %r218 to i384
-%r221 = getelementptr i64, i64* %r4, i32 5
-%r222 = load i64, i64* %r221
-%r223 = zext i64 %r222 to i384
-%r224 = shl i384 %r223, 320
-%r225 = or i384 %r219, %r224
-%r226 = zext i384 %r225 to i448
-%r227 = sub i448 %r189, %r226
-%r228 = lshr i448 %r227, 384
-%r229 = trunc i448 %r228 to i1
-%r230 = select i1 %r229, i448 %r189, i448 %r227
-%r231 = trunc i448 %r230 to i384
-%r233 = getelementptr i64, i64* %r1, i32 6
-%r234 = trunc i384 %r231 to i64
-%r236 = getelementptr i64, i64* %r233, i32 0
-store i64 %r234, i64* %r236
-%r237 = lshr i384 %r231, 64
-%r238 = trunc i384 %r237 to i64
-%r240 = getelementptr i64, i64* %r233, i32 1
-store i64 %r238, i64* %r240
-%r241 = lshr i384 %r237, 64
-%r242 = trunc i384 %r241 to i64
-%r244 = getelementptr i64, i64* %r233, i32 2
-store i64 %r242, i64* %r244
-%r245 = lshr i384 %r241, 64
-%r246 = trunc i384 %r245 to i64
-%r248 = getelementptr i64, i64* %r233, i32 3
-store i64 %r246, i64* %r248
-%r249 = lshr i384 %r245, 64
-%r250 = trunc i384 %r249 to i64
-%r252 = getelementptr i64, i64* %r233, i32 4
-store i64 %r250, i64* %r252
-%r253 = lshr i384 %r249, 64
-%r254 = trunc i384 %r253 to i64
-%r256 = getelementptr i64, i64* %r233, i32 5
-store i64 %r254, i64* %r256
-ret void
-}
-define void @mcl_fpDbl_sub6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r2, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = zext i576 %r61 to i640
-%r64 = getelementptr i64, i64* %r2, i32 9
-%r65 = load i64, i64* %r64
-%r66 = zext i64 %r65 to i640
-%r67 = shl i640 %r66, 576
-%r68 = or i640 %r62, %r67
-%r69 = zext i640 %r68 to i704
-%r71 = getelementptr i64, i64* %r2, i32 10
-%r72 = load i64, i64* %r71
-%r73 = zext i64 %r72 to i704
-%r74 = shl i704 %r73, 640
-%r75 = or i704 %r69, %r74
-%r76 = zext i704 %r75 to i768
-%r78 = getelementptr i64, i64* %r2, i32 11
-%r79 = load i64, i64* %r78
-%r80 = zext i64 %r79 to i768
-%r81 = shl i768 %r80, 704
-%r82 = or i768 %r76, %r81
-%r83 = load i64, i64* %r3
-%r84 = zext i64 %r83 to i128
-%r86 = getelementptr i64, i64* %r3, i32 1
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i128
-%r89 = shl i128 %r88, 64
-%r90 = or i128 %r84, %r89
-%r91 = zext i128 %r90 to i192
-%r93 = getelementptr i64, i64* %r3, i32 2
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i192
-%r96 = shl i192 %r95, 128
-%r97 = or i192 %r91, %r96
-%r98 = zext i192 %r97 to i256
-%r100 = getelementptr i64, i64* %r3, i32 3
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i256
-%r103 = shl i256 %r102, 192
-%r104 = or i256 %r98, %r103
-%r105 = zext i256 %r104 to i320
-%r107 = getelementptr i64, i64* %r3, i32 4
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i320
-%r110 = shl i320 %r109, 256
-%r111 = or i320 %r105, %r110
-%r112 = zext i320 %r111 to i384
-%r114 = getelementptr i64, i64* %r3, i32 5
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i384
-%r117 = shl i384 %r116, 320
-%r118 = or i384 %r112, %r117
-%r119 = zext i384 %r118 to i448
-%r121 = getelementptr i64, i64* %r3, i32 6
-%r122 = load i64, i64* %r121
-%r123 = zext i64 %r122 to i448
-%r124 = shl i448 %r123, 384
-%r125 = or i448 %r119, %r124
-%r126 = zext i448 %r125 to i512
-%r128 = getelementptr i64, i64* %r3, i32 7
-%r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i512
-%r131 = shl i512 %r130, 448
-%r132 = or i512 %r126, %r131
-%r133 = zext i512 %r132 to i576
-%r135 = getelementptr i64, i64* %r3, i32 8
-%r136 = load i64, i64* %r135
-%r137 = zext i64 %r136 to i576
-%r138 = shl i576 %r137, 512
-%r139 = or i576 %r133, %r138
-%r140 = zext i576 %r139 to i640
-%r142 = getelementptr i64, i64* %r3, i32 9
-%r143 = load i64, i64* %r142
-%r144 = zext i64 %r143 to i640
-%r145 = shl i640 %r144, 576
-%r146 = or i640 %r140, %r145
-%r147 = zext i640 %r146 to i704
-%r149 = getelementptr i64, i64* %r3, i32 10
-%r150 = load i64, i64* %r149
-%r151 = zext i64 %r150 to i704
-%r152 = shl i704 %r151, 640
-%r153 = or i704 %r147, %r152
-%r154 = zext i704 %r153 to i768
-%r156 = getelementptr i64, i64* %r3, i32 11
-%r157 = load i64, i64* %r156
-%r158 = zext i64 %r157 to i768
-%r159 = shl i768 %r158, 704
-%r160 = or i768 %r154, %r159
-%r161 = zext i768 %r82 to i832
-%r162 = zext i768 %r160 to i832
-%r163 = sub i832 %r161, %r162
-%r164 = trunc i832 %r163 to i384
-%r165 = trunc i384 %r164 to i64
-%r167 = getelementptr i64, i64* %r1, i32 0
-store i64 %r165, i64* %r167
-%r168 = lshr i384 %r164, 64
-%r169 = trunc i384 %r168 to i64
-%r171 = getelementptr i64, i64* %r1, i32 1
-store i64 %r169, i64* %r171
-%r172 = lshr i384 %r168, 64
-%r173 = trunc i384 %r172 to i64
-%r175 = getelementptr i64, i64* %r1, i32 2
-store i64 %r173, i64* %r175
-%r176 = lshr i384 %r172, 64
-%r177 = trunc i384 %r176 to i64
-%r179 = getelementptr i64, i64* %r1, i32 3
-store i64 %r177, i64* %r179
-%r180 = lshr i384 %r176, 64
-%r181 = trunc i384 %r180 to i64
-%r183 = getelementptr i64, i64* %r1, i32 4
-store i64 %r181, i64* %r183
-%r184 = lshr i384 %r180, 64
-%r185 = trunc i384 %r184 to i64
-%r187 = getelementptr i64, i64* %r1, i32 5
-store i64 %r185, i64* %r187
-%r188 = lshr i832 %r163, 384
-%r189 = trunc i832 %r188 to i384
-%r190 = lshr i832 %r163, 768
-%r191 = trunc i832 %r190 to i1
-%r192 = load i64, i64* %r4
-%r193 = zext i64 %r192 to i128
-%r195 = getelementptr i64, i64* %r4, i32 1
-%r196 = load i64, i64* %r195
-%r197 = zext i64 %r196 to i128
-%r198 = shl i128 %r197, 64
-%r199 = or i128 %r193, %r198
-%r200 = zext i128 %r199 to i192
-%r202 = getelementptr i64, i64* %r4, i32 2
-%r203 = load i64, i64* %r202
-%r204 = zext i64 %r203 to i192
-%r205 = shl i192 %r204, 128
-%r206 = or i192 %r200, %r205
-%r207 = zext i192 %r206 to i256
-%r209 = getelementptr i64, i64* %r4, i32 3
-%r210 = load i64, i64* %r209
-%r211 = zext i64 %r210 to i256
-%r212 = shl i256 %r211, 192
-%r213 = or i256 %r207, %r212
-%r214 = zext i256 %r213 to i320
-%r216 = getelementptr i64, i64* %r4, i32 4
-%r217 = load i64, i64* %r216
-%r218 = zext i64 %r217 to i320
-%r219 = shl i320 %r218, 256
-%r220 = or i320 %r214, %r219
-%r221 = zext i320 %r220 to i384
-%r223 = getelementptr i64, i64* %r4, i32 5
-%r224 = load i64, i64* %r223
-%r225 = zext i64 %r224 to i384
-%r226 = shl i384 %r225, 320
-%r227 = or i384 %r221, %r226
-%r229 = select i1 %r191, i384 %r227, i384 0
-%r230 = add i384 %r189, %r229
-%r232 = getelementptr i64, i64* %r1, i32 6
-%r233 = trunc i384 %r230 to i64
-%r235 = getelementptr i64, i64* %r232, i32 0
-store i64 %r233, i64* %r235
-%r236 = lshr i384 %r230, 64
-%r237 = trunc i384 %r236 to i64
-%r239 = getelementptr i64, i64* %r232, i32 1
-store i64 %r237, i64* %r239
-%r240 = lshr i384 %r236, 64
-%r241 = trunc i384 %r240 to i64
-%r243 = getelementptr i64, i64* %r232, i32 2
-store i64 %r241, i64* %r243
-%r244 = lshr i384 %r240, 64
-%r245 = trunc i384 %r244 to i64
-%r247 = getelementptr i64, i64* %r232, i32 3
-store i64 %r245, i64* %r247
-%r248 = lshr i384 %r244, 64
-%r249 = trunc i384 %r248 to i64
-%r251 = getelementptr i64, i64* %r232, i32 4
-store i64 %r249, i64* %r251
-%r252 = lshr i384 %r248, 64
-%r253 = trunc i384 %r252 to i64
-%r255 = getelementptr i64, i64* %r232, i32 5
-store i64 %r253, i64* %r255
-ret void
-}
-define i512 @mulPv448x64(i64* noalias  %r2, i64 %r3)
-{
-%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
-%r6 = trunc i128 %r5 to i64
-%r7 = call i64 @extractHigh64(i128 %r5)
-%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
-%r10 = trunc i128 %r9 to i64
-%r11 = call i64 @extractHigh64(i128 %r9)
-%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
-%r14 = trunc i128 %r13 to i64
-%r15 = call i64 @extractHigh64(i128 %r13)
-%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
-%r18 = trunc i128 %r17 to i64
-%r19 = call i64 @extractHigh64(i128 %r17)
-%r21 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 4)
-%r22 = trunc i128 %r21 to i64
-%r23 = call i64 @extractHigh64(i128 %r21)
-%r25 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 5)
-%r26 = trunc i128 %r25 to i64
-%r27 = call i64 @extractHigh64(i128 %r25)
-%r29 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 6)
-%r30 = trunc i128 %r29 to i64
-%r31 = call i64 @extractHigh64(i128 %r29)
-%r32 = zext i64 %r6 to i128
-%r33 = zext i64 %r10 to i128
-%r34 = shl i128 %r33, 64
-%r35 = or i128 %r32, %r34
-%r36 = zext i128 %r35 to i192
-%r37 = zext i64 %r14 to i192
-%r38 = shl i192 %r37, 128
-%r39 = or i192 %r36, %r38
-%r40 = zext i192 %r39 to i256
-%r41 = zext i64 %r18 to i256
-%r42 = shl i256 %r41, 192
-%r43 = or i256 %r40, %r42
-%r44 = zext i256 %r43 to i320
-%r45 = zext i64 %r22 to i320
-%r46 = shl i320 %r45, 256
-%r47 = or i320 %r44, %r46
-%r48 = zext i320 %r47 to i384
-%r49 = zext i64 %r26 to i384
-%r50 = shl i384 %r49, 320
-%r51 = or i384 %r48, %r50
-%r52 = zext i384 %r51 to i448
-%r53 = zext i64 %r30 to i448
-%r54 = shl i448 %r53, 384
-%r55 = or i448 %r52, %r54
-%r56 = zext i64 %r7 to i128
-%r57 = zext i64 %r11 to i128
-%r58 = shl i128 %r57, 64
-%r59 = or i128 %r56, %r58
-%r60 = zext i128 %r59 to i192
-%r61 = zext i64 %r15 to i192
-%r62 = shl i192 %r61, 128
-%r63 = or i192 %r60, %r62
-%r64 = zext i192 %r63 to i256
-%r65 = zext i64 %r19 to i256
-%r66 = shl i256 %r65, 192
-%r67 = or i256 %r64, %r66
-%r68 = zext i256 %r67 to i320
-%r69 = zext i64 %r23 to i320
-%r70 = shl i320 %r69, 256
-%r71 = or i320 %r68, %r70
-%r72 = zext i320 %r71 to i384
-%r73 = zext i64 %r27 to i384
-%r74 = shl i384 %r73, 320
-%r75 = or i384 %r72, %r74
-%r76 = zext i384 %r75 to i448
-%r77 = zext i64 %r31 to i448
-%r78 = shl i448 %r77, 384
-%r79 = or i448 %r76, %r78
-%r80 = zext i448 %r55 to i512
-%r81 = zext i448 %r79 to i512
-%r82 = shl i512 %r81, 64
-%r83 = add i512 %r80, %r82
-ret i512 %r83
-}
-define void @mcl_fp_mulUnitPre7L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
-{
-%r4 = call i512 @mulPv448x64(i64* %r2, i64 %r3)
-%r5 = trunc i512 %r4 to i64
-%r7 = getelementptr i64, i64* %r1, i32 0
-store i64 %r5, i64* %r7
-%r8 = lshr i512 %r4, 64
-%r9 = trunc i512 %r8 to i64
-%r11 = getelementptr i64, i64* %r1, i32 1
-store i64 %r9, i64* %r11
-%r12 = lshr i512 %r8, 64
-%r13 = trunc i512 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 2
-store i64 %r13, i64* %r15
-%r16 = lshr i512 %r12, 64
-%r17 = trunc i512 %r16 to i64
-%r19 = getelementptr i64, i64* %r1, i32 3
-store i64 %r17, i64* %r19
-%r20 = lshr i512 %r16, 64
-%r21 = trunc i512 %r20 to i64
-%r23 = getelementptr i64, i64* %r1, i32 4
-store i64 %r21, i64* %r23
-%r24 = lshr i512 %r20, 64
-%r25 = trunc i512 %r24 to i64
-%r27 = getelementptr i64, i64* %r1, i32 5
-store i64 %r25, i64* %r27
-%r28 = lshr i512 %r24, 64
-%r29 = trunc i512 %r28 to i64
-%r31 = getelementptr i64, i64* %r1, i32 6
-store i64 %r29, i64* %r31
-%r32 = lshr i512 %r28, 64
-%r33 = trunc i512 %r32 to i64
-%r35 = getelementptr i64, i64* %r1, i32 7
-store i64 %r33, i64* %r35
-ret void
-}
-define void @mcl_fpDbl_mulPre7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
-{
-%r4 = load i64, i64* %r3
-%r5 = call i512 @mulPv448x64(i64* %r2, i64 %r4)
-%r6 = trunc i512 %r5 to i64
-store i64 %r6, i64* %r1
-%r7 = lshr i512 %r5, 64
-%r9 = getelementptr i64, i64* %r3, i32 1
-%r10 = load i64, i64* %r9
-%r11 = call i512 @mulPv448x64(i64* %r2, i64 %r10)
-%r12 = add i512 %r7, %r11
-%r13 = trunc i512 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 1
-store i64 %r13, i64* %r15
-%r16 = lshr i512 %r12, 64
-%r18 = getelementptr i64, i64* %r3, i32 2
-%r19 = load i64, i64* %r18
-%r20 = call i512 @mulPv448x64(i64* %r2, i64 %r19)
-%r21 = add i512 %r16, %r20
-%r22 = trunc i512 %r21 to i64
-%r24 = getelementptr i64, i64* %r1, i32 2
-store i64 %r22, i64* %r24
-%r25 = lshr i512 %r21, 64
-%r27 = getelementptr i64, i64* %r3, i32 3
-%r28 = load i64, i64* %r27
-%r29 = call i512 @mulPv448x64(i64* %r2, i64 %r28)
-%r30 = add i512 %r25, %r29
-%r31 = trunc i512 %r30 to i64
-%r33 = getelementptr i64, i64* %r1, i32 3
-store i64 %r31, i64* %r33
-%r34 = lshr i512 %r30, 64
-%r36 = getelementptr i64, i64* %r3, i32 4
-%r37 = load i64, i64* %r36
-%r38 = call i512 @mulPv448x64(i64* %r2, i64 %r37)
-%r39 = add i512 %r34, %r38
-%r40 = trunc i512 %r39 to i64
-%r42 = getelementptr i64, i64* %r1, i32 4
-store i64 %r40, i64* %r42
-%r43 = lshr i512 %r39, 64
-%r45 = getelementptr i64, i64* %r3, i32 5
-%r46 = load i64, i64* %r45
-%r47 = call i512 @mulPv448x64(i64* %r2, i64 %r46)
-%r48 = add i512 %r43, %r47
-%r49 = trunc i512 %r48 to i64
-%r51 = getelementptr i64, i64* %r1, i32 5
-store i64 %r49, i64* %r51
-%r52 = lshr i512 %r48, 64
-%r54 = getelementptr i64, i64* %r3, i32 6
-%r55 = load i64, i64* %r54
-%r56 = call i512 @mulPv448x64(i64* %r2, i64 %r55)
-%r57 = add i512 %r52, %r56
-%r59 = getelementptr i64, i64* %r1, i32 6
-%r60 = trunc i512 %r57 to i64
-%r62 = getelementptr i64, i64* %r59, i32 0
-store i64 %r60, i64* %r62
-%r63 = lshr i512 %r57, 64
-%r64 = trunc i512 %r63 to i64
-%r66 = getelementptr i64, i64* %r59, i32 1
-store i64 %r64, i64* %r66
-%r67 = lshr i512 %r63, 64
-%r68 = trunc i512 %r67 to i64
-%r70 = getelementptr i64, i64* %r59, i32 2
-store i64 %r68, i64* %r70
-%r71 = lshr i512 %r67, 64
-%r72 = trunc i512 %r71 to i64
-%r74 = getelementptr i64, i64* %r59, i32 3
-store i64 %r72, i64* %r74
-%r75 = lshr i512 %r71, 64
-%r76 = trunc i512 %r75 to i64
-%r78 = getelementptr i64, i64* %r59, i32 4
-store i64 %r76, i64* %r78
-%r79 = lshr i512 %r75, 64
-%r80 = trunc i512 %r79 to i64
-%r82 = getelementptr i64, i64* %r59, i32 5
-store i64 %r80, i64* %r82
-%r83 = lshr i512 %r79, 64
-%r84 = trunc i512 %r83 to i64
-%r86 = getelementptr i64, i64* %r59, i32 6
-store i64 %r84, i64* %r86
-%r87 = lshr i512 %r83, 64
-%r88 = trunc i512 %r87 to i64
-%r90 = getelementptr i64, i64* %r59, i32 7
-store i64 %r88, i64* %r90
-ret void
-}
-define void @mcl_fpDbl_sqrPre7L(i64* noalias  %r1, i64* noalias  %r2)
-{
-%r3 = load i64, i64* %r2
-%r4 = call i512 @mulPv448x64(i64* %r2, i64 %r3)
-%r5 = trunc i512 %r4 to i64
-store i64 %r5, i64* %r1
-%r6 = lshr i512 %r4, 64
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = call i512 @mulPv448x64(i64* %r2, i64 %r9)
-%r11 = add i512 %r6, %r10
-%r12 = trunc i512 %r11 to i64
-%r14 = getelementptr i64, i64* %r1, i32 1
-store i64 %r12, i64* %r14
-%r15 = lshr i512 %r11, 64
-%r17 = getelementptr i64, i64* %r2, i32 2
-%r18 = load i64, i64* %r17
-%r19 = call i512 @mulPv448x64(i64* %r2, i64 %r18)
-%r20 = add i512 %r15, %r19
-%r21 = trunc i512 %r20 to i64
-%r23 = getelementptr i64, i64* %r1, i32 2
-store i64 %r21, i64* %r23
-%r24 = lshr i512 %r20, 64
-%r26 = getelementptr i64, i64* %r2, i32 3
-%r27 = load i64, i64* %r26
-%r28 = call i512 @mulPv448x64(i64* %r2, i64 %r27)
-%r29 = add i512 %r24, %r28
-%r30 = trunc i512 %r29 to i64
-%r32 = getelementptr i64, i64* %r1, i32 3
-store i64 %r30, i64* %r32
-%r33 = lshr i512 %r29, 64
-%r35 = getelementptr i64, i64* %r2, i32 4
-%r36 = load i64, i64* %r35
-%r37 = call i512 @mulPv448x64(i64* %r2, i64 %r36)
-%r38 = add i512 %r33, %r37
-%r39 = trunc i512 %r38 to i64
-%r41 = getelementptr i64, i64* %r1, i32 4
-store i64 %r39, i64* %r41
-%r42 = lshr i512 %r38, 64
-%r44 = getelementptr i64, i64* %r2, i32 5
-%r45 = load i64, i64* %r44
-%r46 = call i512 @mulPv448x64(i64* %r2, i64 %r45)
-%r47 = add i512 %r42, %r46
-%r48 = trunc i512 %r47 to i64
-%r50 = getelementptr i64, i64* %r1, i32 5
-store i64 %r48, i64* %r50
-%r51 = lshr i512 %r47, 64
-%r53 = getelementptr i64, i64* %r2, i32 6
-%r54 = load i64, i64* %r53
-%r55 = call i512 @mulPv448x64(i64* %r2, i64 %r54)
-%r56 = add i512 %r51, %r55
-%r58 = getelementptr i64, i64* %r1, i32 6
-%r59 = trunc i512 %r56 to i64
-%r61 = getelementptr i64, i64* %r58, i32 0
-store i64 %r59, i64* %r61
-%r62 = lshr i512 %r56, 64
-%r63 = trunc i512 %r62 to i64
-%r65 = getelementptr i64, i64* %r58, i32 1
-store i64 %r63, i64* %r65
-%r66 = lshr i512 %r62, 64
-%r67 = trunc i512 %r66 to i64
-%r69 = getelementptr i64, i64* %r58, i32 2
-store i64 %r67, i64* %r69
-%r70 = lshr i512 %r66, 64
-%r71 = trunc i512 %r70 to i64
-%r73 = getelementptr i64, i64* %r58, i32 3
-store i64 %r71, i64* %r73
-%r74 = lshr i512 %r70, 64
-%r75 = trunc i512 %r74 to i64
-%r77 = getelementptr i64, i64* %r58, i32 4
-store i64 %r75, i64* %r77
-%r78 = lshr i512 %r74, 64
-%r79 = trunc i512 %r78 to i64
-%r81 = getelementptr i64, i64* %r58, i32 5
-store i64 %r79, i64* %r81
-%r82 = lshr i512 %r78, 64
-%r83 = trunc i512 %r82 to i64
-%r85 = getelementptr i64, i64* %r58, i32 6
-store i64 %r83, i64* %r85
-%r86 = lshr i512 %r82, 64
-%r87 = trunc i512 %r86 to i64
-%r89 = getelementptr i64, i64* %r58, i32 7
-store i64 %r87, i64* %r89
-ret void
-}
-define void @mcl_fp_mont7L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
-{
-%r6 = getelementptr i64, i64* %r4, i32 -1
-%r7 = load i64, i64* %r6
-%r9 = getelementptr i64, i64* %r3, i32 0
-%r10 = load i64, i64* %r9
-%r11 = call i512 @mulPv448x64(i64* %r2, i64 %r10)
-%r12 = zext i512 %r11 to i576
-%r13 = trunc i512 %r11 to i64
-%r14 = mul i64 %r13, %r7
-%r15 = call i512 @mulPv448x64(i64* %r4, i64 %r14)
-%r16 = zext i512 %r15 to i576
-%r17 = add i576 %r12, %r16
-%r18 = lshr i576 %r17, 64
-%r20 = getelementptr i64, i64* %r3, i32 1
-%r21 = load i64, i64* %r20
-%r22 = call i512 @mulPv448x64(i64* %r2, i64 %r21)
-%r23 = zext i512 %r22 to i576
-%r24 = add i576 %r18, %r23
-%r25 = trunc i576 %r24 to i64
-%r26 = mul i64 %r25, %r7
-%r27 = call i512 @mulPv448x64(i64* %r4, i64 %r26)
-%r28 = zext i512 %r27 to i576
-%r29 = add i576 %r24, %r28
-%r30 = lshr i576 %r29, 64
-%r32 = getelementptr i64, i64* %r3, i32 2
-%r33 = load i64, i64* %r32
-%r34 = call i512 @mulPv448x64(i64* %r2, i64 %r33)
-%r35 = zext i512 %r34 to i576
-%r36 = add i576 %r30, %r35
-%r37 = trunc i576 %r36 to i64
-%r38 = mul i64 %r37, %r7
-%r39 = call i512 @mulPv448x64(i64* %r4, i64 %r38)
-%r40 = zext i512 %r39 to i576
-%r41 = add i576 %r36, %r40
-%r42 = lshr i576 %r41, 64
-%r44 = getelementptr i64, i64* %r3, i32 3
-%r45 = load i64, i64* %r44
-%r46 = call i512 @mulPv448x64(i64* %r2, i64 %r45)
-%r47 = zext i512 %r46 to i576
-%r48 = add i576 %r42, %r47
-%r49 = trunc i576 %r48 to i64
-%r50 = mul i64 %r49, %r7
-%r51 = call i512 @mulPv448x64(i64* %r4, i64 %r50)
-%r52 = zext i512 %r51 to i576
-%r53 = add i576 %r48, %r52
-%r54 = lshr i576 %r53, 64
-%r56 = getelementptr i64, i64* %r3, i32 4
-%r57 = load i64, i64* %r56
-%r58 = call i512 @mulPv448x64(i64* %r2, i64 %r57)
-%r59 = zext i512 %r58 to i576
-%r60 = add i576 %r54, %r59
-%r61 = trunc i576 %r60 to i64
-%r62 = mul i64 %r61, %r7
-%r63 = call i512 @mulPv448x64(i64* %r4, i64 %r62)
-%r64 = zext i512 %r63 to i576
-%r65 = add i576 %r60, %r64
-%r66 = lshr i576 %r65, 64
-%r68 = getelementptr i64, i64* %r3, i32 5
-%r69 = load i64, i64* %r68
-%r70 = call i512 @mulPv448x64(i64* %r2, i64 %r69)
-%r71 = zext i512 %r70 to i576
-%r72 = add i576 %r66, %r71
-%r73 = trunc i576 %r72 to i64
-%r74 = mul i64 %r73, %r7
-%r75 = call i512 @mulPv448x64(i64* %r4, i64 %r74)
-%r76 = zext i512 %r75 to i576
-%r77 = add i576 %r72, %r76
-%r78 = lshr i576 %r77, 64
-%r80 = getelementptr i64, i64* %r3, i32 6
-%r81 = load i64, i64* %r80
-%r82 = call i512 @mulPv448x64(i64* %r2, i64 %r81)
-%r83 = zext i512 %r82 to i576
-%r84 = add i576 %r78, %r83
-%r85 = trunc i576 %r84 to i64
-%r86 = mul i64 %r85, %r7
-%r87 = call i512 @mulPv448x64(i64* %r4, i64 %r86)
-%r88 = zext i512 %r87 to i576
-%r89 = add i576 %r84, %r88
-%r90 = lshr i576 %r89, 64
-%r91 = trunc i576 %r90 to i512
-%r92 = load i64, i64* %r4
-%r93 = zext i64 %r92 to i128
-%r95 = getelementptr i64, i64* %r4, i32 1
-%r96 = load i64, i64* %r95
-%r97 = zext i64 %r96 to i128
-%r98 = shl i128 %r97, 64
-%r99 = or i128 %r93, %r98
-%r100 = zext i128 %r99 to i192
-%r102 = getelementptr i64, i64* %r4, i32 2
-%r103 = load i64, i64* %r102
-%r104 = zext i64 %r103 to i192
-%r105 = shl i192 %r104, 128
-%r106 = or i192 %r100, %r105
-%r107 = zext i192 %r106 to i256
-%r109 = getelementptr i64, i64* %r4, i32 3
-%r110 = load i64, i64* %r109
-%r111 = zext i64 %r110 to i256
-%r112 = shl i256 %r111, 192
-%r113 = or i256 %r107, %r112
-%r114 = zext i256 %r113 to i320
-%r116 = getelementptr i64, i64* %r4, i32 4
-%r117 = load i64, i64* %r116
-%r118 = zext i64 %r117 to i320
-%r119 = shl i320 %r118, 256
-%r120 = or i320 %r114, %r119
-%r121 = zext i320 %r120 to i384
-%r123 = getelementptr i64, i64* %r4, i32 5
-%r124 = load i64, i64* %r123
-%r125 = zext i64 %r124 to i384
-%r126 = shl i384 %r125, 320
-%r127 = or i384 %r121, %r126
-%r128 = zext i384 %r127 to i448
-%r130 = getelementptr i64, i64* %r4, i32 6
-%r131 = load i64, i64* %r130
-%r132 = zext i64 %r131 to i448
-%r133 = shl i448 %r132, 384
-%r134 = or i448 %r128, %r133
-%r135 = zext i448 %r134 to i512
-%r136 = sub i512 %r91, %r135
-%r137 = lshr i512 %r136, 448
-%r138 = trunc i512 %r137 to i1
-%r139 = select i1 %r138, i512 %r91, i512 %r136
-%r140 = trunc i512 %r139 to i448
-%r141 = trunc i448 %r140 to i64
-%r143 = getelementptr i64, i64* %r1, i32 0
-store i64 %r141, i64* %r143
-%r144 = lshr i448 %r140, 64
-%r145 = trunc i448 %r144 to i64
-%r147 = getelementptr i64, i64* %r1, i32 1
-store i64 %r145, i64* %r147
-%r148 = lshr i448 %r144, 64
-%r149 = trunc i448 %r148 to i64
-%r151 = getelementptr i64, i64* %r1, i32 2
-store i64 %r149, i64* %r151
-%r152 = lshr i448 %r148, 64
-%r153 = trunc i448 %r152 to i64
-%r155 = getelementptr i64, i64* %r1, i32 3
-store i64 %r153, i64* %r155
-%r156 = lshr i448 %r152, 64
-%r157 = trunc i448 %r156 to i64
-%r159 = getelementptr i64, i64* %r1, i32 4
-store i64 %r157, i64* %r159
-%r160 = lshr i448 %r156, 64
-%r161 = trunc i448 %r160 to i64
-%r163 = getelementptr i64, i64* %r1, i32 5
-store i64 %r161, i64* %r163
-%r164 = lshr i448 %r160, 64
-%r165 = trunc i448 %r164 to i64
-%r167 = getelementptr i64, i64* %r1, i32 6
-store i64 %r165, i64* %r167
-ret void
-}
-define void @mcl_fp_montNF7L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
-{
-%r6 = getelementptr i64, i64* %r4, i32 -1
-%r7 = load i64, i64* %r6
-%r8 = load i64, i64* %r3
-%r9 = call i512 @mulPv448x64(i64* %r2, i64 %r8)
-%r10 = trunc i512 %r9 to i64
-%r11 = mul i64 %r10, %r7
-%r12 = call i512 @mulPv448x64(i64* %r4, i64 %r11)
-%r13 = add i512 %r9, %r12
-%r14 = lshr i512 %r13, 64
-%r16 = getelementptr i64, i64* %r3, i32 1
-%r17 = load i64, i64* %r16
-%r18 = call i512 @mulPv448x64(i64* %r2, i64 %r17)
-%r19 = add i512 %r14, %r18
-%r20 = trunc i512 %r19 to i64
-%r21 = mul i64 %r20, %r7
-%r22 = call i512 @mulPv448x64(i64* %r4, i64 %r21)
-%r23 = add i512 %r19, %r22
-%r24 = lshr i512 %r23, 64
-%r26 = getelementptr i64, i64* %r3, i32 2
-%r27 = load i64, i64* %r26
-%r28 = call i512 @mulPv448x64(i64* %r2, i64 %r27)
-%r29 = add i512 %r24, %r28
-%r30 = trunc i512 %r29 to i64
-%r31 = mul i64 %r30, %r7
-%r32 = call i512 @mulPv448x64(i64* %r4, i64 %r31)
-%r33 = add i512 %r29, %r32
-%r34 = lshr i512 %r33, 64
-%r36 = getelementptr i64, i64* %r3, i32 3
-%r37 = load i64, i64* %r36
-%r38 = call i512 @mulPv448x64(i64* %r2, i64 %r37)
-%r39 = add i512 %r34, %r38
-%r40 = trunc i512 %r39 to i64
-%r41 = mul i64 %r40, %r7
-%r42 = call i512 @mulPv448x64(i64* %r4, i64 %r41)
-%r43 = add i512 %r39, %r42
-%r44 = lshr i512 %r43, 64
-%r46 = getelementptr i64, i64* %r3, i32 4
-%r47 = load i64, i64* %r46
-%r48 = call i512 @mulPv448x64(i64* %r2, i64 %r47)
-%r49 = add i512 %r44, %r48
-%r50 = trunc i512 %r49 to i64
-%r51 = mul i64 %r50, %r7
-%r52 = call i512 @mulPv448x64(i64* %r4, i64 %r51)
-%r53 = add i512 %r49, %r52
-%r54 = lshr i512 %r53, 64
-%r56 = getelementptr i64, i64* %r3, i32 5
-%r57 = load i64, i64* %r56
-%r58 = call i512 @mulPv448x64(i64* %r2, i64 %r57)
-%r59 = add i512 %r54, %r58
-%r60 = trunc i512 %r59 to i64
-%r61 = mul i64 %r60, %r7
-%r62 = call i512 @mulPv448x64(i64* %r4, i64 %r61)
-%r63 = add i512 %r59, %r62
-%r64 = lshr i512 %r63, 64
-%r66 = getelementptr i64, i64* %r3, i32 6
-%r67 = load i64, i64* %r66
-%r68 = call i512 @mulPv448x64(i64* %r2, i64 %r67)
-%r69 = add i512 %r64, %r68
-%r70 = trunc i512 %r69 to i64
-%r71 = mul i64 %r70, %r7
-%r72 = call i512 @mulPv448x64(i64* %r4, i64 %r71)
-%r73 = add i512 %r69, %r72
-%r74 = lshr i512 %r73, 64
-%r75 = trunc i512 %r74 to i448
-%r76 = load i64, i64* %r4
-%r77 = zext i64 %r76 to i128
-%r79 = getelementptr i64, i64* %r4, i32 1
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i128
-%r82 = shl i128 %r81, 64
-%r83 = or i128 %r77, %r82
-%r84 = zext i128 %r83 to i192
-%r86 = getelementptr i64, i64* %r4, i32 2
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i192
-%r89 = shl i192 %r88, 128
-%r90 = or i192 %r84, %r89
-%r91 = zext i192 %r90 to i256
-%r93 = getelementptr i64, i64* %r4, i32 3
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i256
-%r96 = shl i256 %r95, 192
-%r97 = or i256 %r91, %r96
-%r98 = zext i256 %r97 to i320
-%r100 = getelementptr i64, i64* %r4, i32 4
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i320
-%r103 = shl i320 %r102, 256
-%r104 = or i320 %r98, %r103
-%r105 = zext i320 %r104 to i384
-%r107 = getelementptr i64, i64* %r4, i32 5
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i384
-%r110 = shl i384 %r109, 320
-%r111 = or i384 %r105, %r110
-%r112 = zext i384 %r111 to i448
-%r114 = getelementptr i64, i64* %r4, i32 6
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i448
-%r117 = shl i448 %r116, 384
-%r118 = or i448 %r112, %r117
-%r119 = sub i448 %r75, %r118
-%r120 = lshr i448 %r119, 447
-%r121 = trunc i448 %r120 to i1
-%r122 = select i1 %r121, i448 %r75, i448 %r119
-%r123 = trunc i448 %r122 to i64
-%r125 = getelementptr i64, i64* %r1, i32 0
-store i64 %r123, i64* %r125
-%r126 = lshr i448 %r122, 64
-%r127 = trunc i448 %r126 to i64
-%r129 = getelementptr i64, i64* %r1, i32 1
-store i64 %r127, i64* %r129
-%r130 = lshr i448 %r126, 64
-%r131 = trunc i448 %r130 to i64
-%r133 = getelementptr i64, i64* %r1, i32 2
-store i64 %r131, i64* %r133
-%r134 = lshr i448 %r130, 64
-%r135 = trunc i448 %r134 to i64
-%r137 = getelementptr i64, i64* %r1, i32 3
-store i64 %r135, i64* %r137
-%r138 = lshr i448 %r134, 64
-%r139 = trunc i448 %r138 to i64
-%r141 = getelementptr i64, i64* %r1, i32 4
-store i64 %r139, i64* %r141
-%r142 = lshr i448 %r138, 64
-%r143 = trunc i448 %r142 to i64
-%r145 = getelementptr i64, i64* %r1, i32 5
-store i64 %r143, i64* %r145
-%r146 = lshr i448 %r142, 64
-%r147 = trunc i448 %r146 to i64
-%r149 = getelementptr i64, i64* %r1, i32 6
-store i64 %r147, i64* %r149
-ret void
-}
-define void @mcl_fp_montRed7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
-{
-%r5 = getelementptr i64, i64* %r3, i32 -1
-%r6 = load i64, i64* %r5
-%r7 = load i64, i64* %r3
-%r8 = zext i64 %r7 to i128
-%r10 = getelementptr i64, i64* %r3, i32 1
-%r11 = load i64, i64* %r10
-%r12 = zext i64 %r11 to i128
-%r13 = shl i128 %r12, 64
-%r14 = or i128 %r8, %r13
-%r15 = zext i128 %r14 to i192
-%r17 = getelementptr i64, i64* %r3, i32 2
-%r18 = load i64, i64* %r17
-%r19 = zext i64 %r18 to i192
-%r20 = shl i192 %r19, 128
-%r21 = or i192 %r15, %r20
-%r22 = zext i192 %r21 to i256
-%r24 = getelementptr i64, i64* %r3, i32 3
-%r25 = load i64, i64* %r24
-%r26 = zext i64 %r25 to i256
-%r27 = shl i256 %r26, 192
-%r28 = or i256 %r22, %r27
-%r29 = zext i256 %r28 to i320
-%r31 = getelementptr i64, i64* %r3, i32 4
-%r32 = load i64, i64* %r31
-%r33 = zext i64 %r32 to i320
-%r34 = shl i320 %r33, 256
-%r35 = or i320 %r29, %r34
-%r36 = zext i320 %r35 to i384
-%r38 = getelementptr i64, i64* %r3, i32 5
-%r39 = load i64, i64* %r38
-%r40 = zext i64 %r39 to i384
-%r41 = shl i384 %r40, 320
-%r42 = or i384 %r36, %r41
-%r43 = zext i384 %r42 to i448
-%r45 = getelementptr i64, i64* %r3, i32 6
-%r46 = load i64, i64* %r45
-%r47 = zext i64 %r46 to i448
-%r48 = shl i448 %r47, 384
-%r49 = or i448 %r43, %r48
-%r50 = load i64, i64* %r2
-%r51 = zext i64 %r50 to i128
-%r53 = getelementptr i64, i64* %r2, i32 1
-%r54 = load i64, i64* %r53
-%r55 = zext i64 %r54 to i128
-%r56 = shl i128 %r55, 64
-%r57 = or i128 %r51, %r56
-%r58 = zext i128 %r57 to i192
-%r60 = getelementptr i64, i64* %r2, i32 2
-%r61 = load i64, i64* %r60
-%r62 = zext i64 %r61 to i192
-%r63 = shl i192 %r62, 128
-%r64 = or i192 %r58, %r63
-%r65 = zext i192 %r64 to i256
-%r67 = getelementptr i64, i64* %r2, i32 3
-%r68 = load i64, i64* %r67
-%r69 = zext i64 %r68 to i256
-%r70 = shl i256 %r69, 192
-%r71 = or i256 %r65, %r70
-%r72 = zext i256 %r71 to i320
-%r74 = getelementptr i64, i64* %r2, i32 4
-%r75 = load i64, i64* %r74
-%r76 = zext i64 %r75 to i320
-%r77 = shl i320 %r76, 256
-%r78 = or i320 %r72, %r77
-%r79 = zext i320 %r78 to i384
-%r81 = getelementptr i64, i64* %r2, i32 5
-%r82 = load i64, i64* %r81
-%r83 = zext i64 %r82 to i384
-%r84 = shl i384 %r83, 320
-%r85 = or i384 %r79, %r84
-%r86 = zext i384 %r85 to i448
-%r88 = getelementptr i64, i64* %r2, i32 6
-%r89 = load i64, i64* %r88
-%r90 = zext i64 %r89 to i448
-%r91 = shl i448 %r90, 384
-%r92 = or i448 %r86, %r91
-%r93 = zext i448 %r92 to i512
-%r95 = getelementptr i64, i64* %r2, i32 7
-%r96 = load i64, i64* %r95
-%r97 = zext i64 %r96 to i512
-%r98 = shl i512 %r97, 448
-%r99 = or i512 %r93, %r98
-%r100 = zext i512 %r99 to i576
-%r102 = getelementptr i64, i64* %r2, i32 8
-%r103 = load i64, i64* %r102
-%r104 = zext i64 %r103 to i576
-%r105 = shl i576 %r104, 512
-%r106 = or i576 %r100, %r105
-%r107 = zext i576 %r106 to i640
-%r109 = getelementptr i64, i64* %r2, i32 9
-%r110 = load i64, i64* %r109
-%r111 = zext i64 %r110 to i640
-%r112 = shl i640 %r111, 576
-%r113 = or i640 %r107, %r112
-%r114 = zext i640 %r113 to i704
-%r116 = getelementptr i64, i64* %r2, i32 10
-%r117 = load i64, i64* %r116
-%r118 = zext i64 %r117 to i704
-%r119 = shl i704 %r118, 640
-%r120 = or i704 %r114, %r119
-%r121 = zext i704 %r120 to i768
-%r123 = getelementptr i64, i64* %r2, i32 11
-%r124 = load i64, i64* %r123
-%r125 = zext i64 %r124 to i768
-%r126 = shl i768 %r125, 704
-%r127 = or i768 %r121, %r126
-%r128 = zext i768 %r127 to i832
-%r130 = getelementptr i64, i64* %r2, i32 12
-%r131 = load i64, i64* %r130
-%r132 = zext i64 %r131 to i832
-%r133 = shl i832 %r132, 768
-%r134 = or i832 %r128, %r133
-%r135 = zext i832 %r134 to i896
-%r137 = getelementptr i64, i64* %r2, i32 13
-%r138 = load i64, i64* %r137
-%r139 = zext i64 %r138 to i896
-%r140 = shl i896 %r139, 832
-%r141 = or i896 %r135, %r140
-%r142 = zext i896 %r141 to i960
-%r143 = trunc i960 %r142 to i64
-%r144 = mul i64 %r143, %r6
-%r145 = call i512 @mulPv448x64(i64* %r3, i64 %r144)
-%r146 = zext i512 %r145 to i960
-%r147 = add i960 %r142, %r146
-%r148 = lshr i960 %r147, 64
-%r149 = trunc i960 %r148 to i896
-%r150 = trunc i896 %r149 to i64
-%r151 = mul i64 %r150, %r6
-%r152 = call i512 @mulPv448x64(i64* %r3, i64 %r151)
-%r153 = zext i512 %r152 to i896
-%r154 = add i896 %r149, %r153
-%r155 = lshr i896 %r154, 64
-%r156 = trunc i896 %r155 to i832
-%r157 = trunc i832 %r156 to i64
-%r158 = mul i64 %r157, %r6
-%r159 = call i512 @mulPv448x64(i64* %r3, i64 %r158)
-%r160 = zext i512 %r159 to i832
-%r161 = add i832 %r156, %r160
-%r162 = lshr i832 %r161, 64
-%r163 = trunc i832 %r162 to i768
-%r164 = trunc i768 %r163 to i64
-%r165 = mul i64 %r164, %r6
-%r166 = call i512 @mulPv448x64(i64* %r3, i64 %r165)
-%r167 = zext i512 %r166 to i768
-%r168 = add i768 %r163, %r167
-%r169 = lshr i768 %r168, 64
-%r170 = trunc i768 %r169 to i704
-%r171 = trunc i704 %r170 to i64
-%r172 = mul i64 %r171, %r6
-%r173 = call i512 @mulPv448x64(i64* %r3, i64 %r172)
-%r174 = zext i512 %r173 to i704
-%r175 = add i704 %r170, %r174
-%r176 = lshr i704 %r175, 64
-%r177 = trunc i704 %r176 to i640
-%r178 = trunc i640 %r177 to i64
-%r179 = mul i64 %r178, %r6
-%r180 = call i512 @mulPv448x64(i64* %r3, i64 %r179)
-%r181 = zext i512 %r180 to i640
-%r182 = add i640 %r177, %r181
-%r183 = lshr i640 %r182, 64
-%r184 = trunc i640 %r183 to i576
-%r185 = trunc i576 %r184 to i64
-%r186 = mul i64 %r185, %r6
-%r187 = call i512 @mulPv448x64(i64* %r3, i64 %r186)
-%r188 = zext i512 %r187 to i576
-%r189 = add i576 %r184, %r188
-%r190 = lshr i576 %r189, 64
-%r191 = trunc i576 %r190 to i512
-%r192 = zext i448 %r49 to i512
-%r193 = sub i512 %r191, %r192
-%r194 = lshr i512 %r193, 448
-%r195 = trunc i512 %r194 to i1
-%r196 = select i1 %r195, i512 %r191, i512 %r193
-%r197 = trunc i512 %r196 to i448
-%r198 = trunc i448 %r197 to i64
-%r200 = getelementptr i64, i64* %r1, i32 0
-store i64 %r198, i64* %r200
-%r201 = lshr i448 %r197, 64
-%r202 = trunc i448 %r201 to i64
-%r204 = getelementptr i64, i64* %r1, i32 1
-store i64 %r202, i64* %r204
-%r205 = lshr i448 %r201, 64
-%r206 = trunc i448 %r205 to i64
-%r208 = getelementptr i64, i64* %r1, i32 2
-store i64 %r206, i64* %r208
-%r209 = lshr i448 %r205, 64
-%r210 = trunc i448 %r209 to i64
-%r212 = getelementptr i64, i64* %r1, i32 3
-store i64 %r210, i64* %r212
-%r213 = lshr i448 %r209, 64
-%r214 = trunc i448 %r213 to i64
-%r216 = getelementptr i64, i64* %r1, i32 4
-store i64 %r214, i64* %r216
-%r217 = lshr i448 %r213, 64
-%r218 = trunc i448 %r217 to i64
-%r220 = getelementptr i64, i64* %r1, i32 5
-store i64 %r218, i64* %r220
-%r221 = lshr i448 %r217, 64
-%r222 = trunc i448 %r221 to i64
-%r224 = getelementptr i64, i64* %r1, i32 6
-store i64 %r222, i64* %r224
-ret void
-}
-define i64 @mcl_fp_addPre7L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r3, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r3, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r3, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r3, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r3, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r3, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r49 = load i64, i64* %r4
-%r50 = zext i64 %r49 to i128
-%r52 = getelementptr i64, i64* %r4, i32 1
-%r53 = load i64, i64* %r52
-%r54 = zext i64 %r53 to i128
-%r55 = shl i128 %r54, 64
-%r56 = or i128 %r50, %r55
-%r57 = zext i128 %r56 to i192
-%r59 = getelementptr i64, i64* %r4, i32 2
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i192
-%r62 = shl i192 %r61, 128
-%r63 = or i192 %r57, %r62
-%r64 = zext i192 %r63 to i256
-%r66 = getelementptr i64, i64* %r4, i32 3
-%r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i256
-%r69 = shl i256 %r68, 192
-%r70 = or i256 %r64, %r69
-%r71 = zext i256 %r70 to i320
-%r73 = getelementptr i64, i64* %r4, i32 4
-%r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i320
-%r76 = shl i320 %r75, 256
-%r77 = or i320 %r71, %r76
-%r78 = zext i320 %r77 to i384
-%r80 = getelementptr i64, i64* %r4, i32 5
-%r81 = load i64, i64* %r80
-%r82 = zext i64 %r81 to i384
-%r83 = shl i384 %r82, 320
-%r84 = or i384 %r78, %r83
-%r85 = zext i384 %r84 to i448
-%r87 = getelementptr i64, i64* %r4, i32 6
-%r88 = load i64, i64* %r87
-%r89 = zext i64 %r88 to i448
-%r90 = shl i448 %r89, 384
-%r91 = or i448 %r85, %r90
-%r92 = zext i448 %r91 to i512
-%r93 = add i512 %r48, %r92
-%r94 = trunc i512 %r93 to i448
-%r95 = trunc i448 %r94 to i64
-%r97 = getelementptr i64, i64* %r2, i32 0
-store i64 %r95, i64* %r97
-%r98 = lshr i448 %r94, 64
-%r99 = trunc i448 %r98 to i64
-%r101 = getelementptr i64, i64* %r2, i32 1
-store i64 %r99, i64* %r101
-%r102 = lshr i448 %r98, 64
-%r103 = trunc i448 %r102 to i64
-%r105 = getelementptr i64, i64* %r2, i32 2
-store i64 %r103, i64* %r105
-%r106 = lshr i448 %r102, 64
-%r107 = trunc i448 %r106 to i64
-%r109 = getelementptr i64, i64* %r2, i32 3
-store i64 %r107, i64* %r109
-%r110 = lshr i448 %r106, 64
-%r111 = trunc i448 %r110 to i64
-%r113 = getelementptr i64, i64* %r2, i32 4
-store i64 %r111, i64* %r113
-%r114 = lshr i448 %r110, 64
-%r115 = trunc i448 %r114 to i64
-%r117 = getelementptr i64, i64* %r2, i32 5
-store i64 %r115, i64* %r117
-%r118 = lshr i448 %r114, 64
-%r119 = trunc i448 %r118 to i64
-%r121 = getelementptr i64, i64* %r2, i32 6
-store i64 %r119, i64* %r121
-%r122 = lshr i512 %r93, 448
-%r123 = trunc i512 %r122 to i64
-ret i64 %r123
-}
-define i64 @mcl_fp_subPre7L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r3, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r3, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r3, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r3, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r3, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r3, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r49 = load i64, i64* %r4
-%r50 = zext i64 %r49 to i128
-%r52 = getelementptr i64, i64* %r4, i32 1
-%r53 = load i64, i64* %r52
-%r54 = zext i64 %r53 to i128
-%r55 = shl i128 %r54, 64
-%r56 = or i128 %r50, %r55
-%r57 = zext i128 %r56 to i192
-%r59 = getelementptr i64, i64* %r4, i32 2
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i192
-%r62 = shl i192 %r61, 128
-%r63 = or i192 %r57, %r62
-%r64 = zext i192 %r63 to i256
-%r66 = getelementptr i64, i64* %r4, i32 3
-%r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i256
-%r69 = shl i256 %r68, 192
-%r70 = or i256 %r64, %r69
-%r71 = zext i256 %r70 to i320
-%r73 = getelementptr i64, i64* %r4, i32 4
-%r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i320
-%r76 = shl i320 %r75, 256
-%r77 = or i320 %r71, %r76
-%r78 = zext i320 %r77 to i384
-%r80 = getelementptr i64, i64* %r4, i32 5
-%r81 = load i64, i64* %r80
-%r82 = zext i64 %r81 to i384
-%r83 = shl i384 %r82, 320
-%r84 = or i384 %r78, %r83
-%r85 = zext i384 %r84 to i448
-%r87 = getelementptr i64, i64* %r4, i32 6
-%r88 = load i64, i64* %r87
-%r89 = zext i64 %r88 to i448
-%r90 = shl i448 %r89, 384
-%r91 = or i448 %r85, %r90
-%r92 = zext i448 %r91 to i512
-%r93 = sub i512 %r48, %r92
-%r94 = trunc i512 %r93 to i448
-%r95 = trunc i448 %r94 to i64
-%r97 = getelementptr i64, i64* %r2, i32 0
-store i64 %r95, i64* %r97
-%r98 = lshr i448 %r94, 64
-%r99 = trunc i448 %r98 to i64
-%r101 = getelementptr i64, i64* %r2, i32 1
-store i64 %r99, i64* %r101
-%r102 = lshr i448 %r98, 64
-%r103 = trunc i448 %r102 to i64
-%r105 = getelementptr i64, i64* %r2, i32 2
-store i64 %r103, i64* %r105
-%r106 = lshr i448 %r102, 64
-%r107 = trunc i448 %r106 to i64
-%r109 = getelementptr i64, i64* %r2, i32 3
-store i64 %r107, i64* %r109
-%r110 = lshr i448 %r106, 64
-%r111 = trunc i448 %r110 to i64
-%r113 = getelementptr i64, i64* %r2, i32 4
-store i64 %r111, i64* %r113
-%r114 = lshr i448 %r110, 64
-%r115 = trunc i448 %r114 to i64
-%r117 = getelementptr i64, i64* %r2, i32 5
-store i64 %r115, i64* %r117
-%r118 = lshr i448 %r114, 64
-%r119 = trunc i448 %r118 to i64
-%r121 = getelementptr i64, i64* %r2, i32 6
-store i64 %r119, i64* %r121
-%r122 = lshr i512 %r93, 448
-%r123 = trunc i512 %r122 to i64
-%r125 = and i64 %r123, 1
-ret i64 %r125
-}
-define void @mcl_fp_shr1_7L(i64* noalias  %r1, i64* noalias  %r2)
-{
-%r3 = load i64, i64* %r2
-%r4 = zext i64 %r3 to i128
-%r6 = getelementptr i64, i64* %r2, i32 1
-%r7 = load i64, i64* %r6
-%r8 = zext i64 %r7 to i128
-%r9 = shl i128 %r8, 64
-%r10 = or i128 %r4, %r9
-%r11 = zext i128 %r10 to i192
-%r13 = getelementptr i64, i64* %r2, i32 2
-%r14 = load i64, i64* %r13
-%r15 = zext i64 %r14 to i192
-%r16 = shl i192 %r15, 128
-%r17 = or i192 %r11, %r16
-%r18 = zext i192 %r17 to i256
-%r20 = getelementptr i64, i64* %r2, i32 3
-%r21 = load i64, i64* %r20
-%r22 = zext i64 %r21 to i256
-%r23 = shl i256 %r22, 192
-%r24 = or i256 %r18, %r23
-%r25 = zext i256 %r24 to i320
-%r27 = getelementptr i64, i64* %r2, i32 4
-%r28 = load i64, i64* %r27
-%r29 = zext i64 %r28 to i320
-%r30 = shl i320 %r29, 256
-%r31 = or i320 %r25, %r30
-%r32 = zext i320 %r31 to i384
-%r34 = getelementptr i64, i64* %r2, i32 5
-%r35 = load i64, i64* %r34
-%r36 = zext i64 %r35 to i384
-%r37 = shl i384 %r36, 320
-%r38 = or i384 %r32, %r37
-%r39 = zext i384 %r38 to i448
-%r41 = getelementptr i64, i64* %r2, i32 6
-%r42 = load i64, i64* %r41
-%r43 = zext i64 %r42 to i448
-%r44 = shl i448 %r43, 384
-%r45 = or i448 %r39, %r44
-%r46 = lshr i448 %r45, 1
-%r47 = trunc i448 %r46 to i64
-%r49 = getelementptr i64, i64* %r1, i32 0
-store i64 %r47, i64* %r49
-%r50 = lshr i448 %r46, 64
-%r51 = trunc i448 %r50 to i64
-%r53 = getelementptr i64, i64* %r1, i32 1
-store i64 %r51, i64* %r53
-%r54 = lshr i448 %r50, 64
-%r55 = trunc i448 %r54 to i64
-%r57 = getelementptr i64, i64* %r1, i32 2
-store i64 %r55, i64* %r57
-%r58 = lshr i448 %r54, 64
-%r59 = trunc i448 %r58 to i64
-%r61 = getelementptr i64, i64* %r1, i32 3
-store i64 %r59, i64* %r61
-%r62 = lshr i448 %r58, 64
-%r63 = trunc i448 %r62 to i64
-%r65 = getelementptr i64, i64* %r1, i32 4
-store i64 %r63, i64* %r65
-%r66 = lshr i448 %r62, 64
-%r67 = trunc i448 %r66 to i64
-%r69 = getelementptr i64, i64* %r1, i32 5
-store i64 %r67, i64* %r69
-%r70 = lshr i448 %r66, 64
-%r71 = trunc i448 %r70 to i64
-%r73 = getelementptr i64, i64* %r1, i32 6
-store i64 %r71, i64* %r73
-ret void
-}
-define void @mcl_fp_add7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = load i64, i64* %r3
-%r49 = zext i64 %r48 to i128
-%r51 = getelementptr i64, i64* %r3, i32 1
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i128
-%r54 = shl i128 %r53, 64
-%r55 = or i128 %r49, %r54
-%r56 = zext i128 %r55 to i192
-%r58 = getelementptr i64, i64* %r3, i32 2
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i192
-%r61 = shl i192 %r60, 128
-%r62 = or i192 %r56, %r61
-%r63 = zext i192 %r62 to i256
-%r65 = getelementptr i64, i64* %r3, i32 3
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i256
-%r68 = shl i256 %r67, 192
-%r69 = or i256 %r63, %r68
-%r70 = zext i256 %r69 to i320
-%r72 = getelementptr i64, i64* %r3, i32 4
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i320
-%r75 = shl i320 %r74, 256
-%r76 = or i320 %r70, %r75
-%r77 = zext i320 %r76 to i384
-%r79 = getelementptr i64, i64* %r3, i32 5
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i384
-%r82 = shl i384 %r81, 320
-%r83 = or i384 %r77, %r82
-%r84 = zext i384 %r83 to i448
-%r86 = getelementptr i64, i64* %r3, i32 6
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i448
-%r89 = shl i448 %r88, 384
-%r90 = or i448 %r84, %r89
-%r91 = zext i448 %r47 to i512
-%r92 = zext i448 %r90 to i512
-%r93 = add i512 %r91, %r92
-%r94 = trunc i512 %r93 to i448
-%r95 = trunc i448 %r94 to i64
-%r97 = getelementptr i64, i64* %r1, i32 0
-store i64 %r95, i64* %r97
-%r98 = lshr i448 %r94, 64
-%r99 = trunc i448 %r98 to i64
-%r101 = getelementptr i64, i64* %r1, i32 1
-store i64 %r99, i64* %r101
-%r102 = lshr i448 %r98, 64
-%r103 = trunc i448 %r102 to i64
-%r105 = getelementptr i64, i64* %r1, i32 2
-store i64 %r103, i64* %r105
-%r106 = lshr i448 %r102, 64
-%r107 = trunc i448 %r106 to i64
-%r109 = getelementptr i64, i64* %r1, i32 3
-store i64 %r107, i64* %r109
-%r110 = lshr i448 %r106, 64
-%r111 = trunc i448 %r110 to i64
-%r113 = getelementptr i64, i64* %r1, i32 4
-store i64 %r111, i64* %r113
-%r114 = lshr i448 %r110, 64
-%r115 = trunc i448 %r114 to i64
-%r117 = getelementptr i64, i64* %r1, i32 5
-store i64 %r115, i64* %r117
-%r118 = lshr i448 %r114, 64
-%r119 = trunc i448 %r118 to i64
-%r121 = getelementptr i64, i64* %r1, i32 6
-store i64 %r119, i64* %r121
-%r122 = load i64, i64* %r4
-%r123 = zext i64 %r122 to i128
-%r125 = getelementptr i64, i64* %r4, i32 1
-%r126 = load i64, i64* %r125
-%r127 = zext i64 %r126 to i128
-%r128 = shl i128 %r127, 64
-%r129 = or i128 %r123, %r128
-%r130 = zext i128 %r129 to i192
-%r132 = getelementptr i64, i64* %r4, i32 2
-%r133 = load i64, i64* %r132
-%r134 = zext i64 %r133 to i192
-%r135 = shl i192 %r134, 128
-%r136 = or i192 %r130, %r135
-%r137 = zext i192 %r136 to i256
-%r139 = getelementptr i64, i64* %r4, i32 3
-%r140 = load i64, i64* %r139
-%r141 = zext i64 %r140 to i256
-%r142 = shl i256 %r141, 192
-%r143 = or i256 %r137, %r142
-%r144 = zext i256 %r143 to i320
-%r146 = getelementptr i64, i64* %r4, i32 4
-%r147 = load i64, i64* %r146
-%r148 = zext i64 %r147 to i320
-%r149 = shl i320 %r148, 256
-%r150 = or i320 %r144, %r149
-%r151 = zext i320 %r150 to i384
-%r153 = getelementptr i64, i64* %r4, i32 5
-%r154 = load i64, i64* %r153
-%r155 = zext i64 %r154 to i384
-%r156 = shl i384 %r155, 320
-%r157 = or i384 %r151, %r156
-%r158 = zext i384 %r157 to i448
-%r160 = getelementptr i64, i64* %r4, i32 6
-%r161 = load i64, i64* %r160
-%r162 = zext i64 %r161 to i448
-%r163 = shl i448 %r162, 384
-%r164 = or i448 %r158, %r163
-%r165 = zext i448 %r164 to i512
-%r166 = sub i512 %r93, %r165
-%r167 = lshr i512 %r166, 448
-%r168 = trunc i512 %r167 to i1
-br i1%r168, label %carry, label %nocarry
-nocarry:
-%r169 = trunc i512 %r166 to i448
-%r170 = trunc i448 %r169 to i64
-%r172 = getelementptr i64, i64* %r1, i32 0
-store i64 %r170, i64* %r172
-%r173 = lshr i448 %r169, 64
-%r174 = trunc i448 %r173 to i64
-%r176 = getelementptr i64, i64* %r1, i32 1
-store i64 %r174, i64* %r176
-%r177 = lshr i448 %r173, 64
-%r178 = trunc i448 %r177 to i64
-%r180 = getelementptr i64, i64* %r1, i32 2
-store i64 %r178, i64* %r180
-%r181 = lshr i448 %r177, 64
-%r182 = trunc i448 %r181 to i64
-%r184 = getelementptr i64, i64* %r1, i32 3
-store i64 %r182, i64* %r184
-%r185 = lshr i448 %r181, 64
-%r186 = trunc i448 %r185 to i64
-%r188 = getelementptr i64, i64* %r1, i32 4
-store i64 %r186, i64* %r188
-%r189 = lshr i448 %r185, 64
-%r190 = trunc i448 %r189 to i64
-%r192 = getelementptr i64, i64* %r1, i32 5
-store i64 %r190, i64* %r192
-%r193 = lshr i448 %r189, 64
-%r194 = trunc i448 %r193 to i64
-%r196 = getelementptr i64, i64* %r1, i32 6
-store i64 %r194, i64* %r196
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = load i64, i64* %r3
-%r49 = zext i64 %r48 to i128
-%r51 = getelementptr i64, i64* %r3, i32 1
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i128
-%r54 = shl i128 %r53, 64
-%r55 = or i128 %r49, %r54
-%r56 = zext i128 %r55 to i192
-%r58 = getelementptr i64, i64* %r3, i32 2
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i192
-%r61 = shl i192 %r60, 128
-%r62 = or i192 %r56, %r61
-%r63 = zext i192 %r62 to i256
-%r65 = getelementptr i64, i64* %r3, i32 3
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i256
-%r68 = shl i256 %r67, 192
-%r69 = or i256 %r63, %r68
-%r70 = zext i256 %r69 to i320
-%r72 = getelementptr i64, i64* %r3, i32 4
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i320
-%r75 = shl i320 %r74, 256
-%r76 = or i320 %r70, %r75
-%r77 = zext i320 %r76 to i384
-%r79 = getelementptr i64, i64* %r3, i32 5
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i384
-%r82 = shl i384 %r81, 320
-%r83 = or i384 %r77, %r82
-%r84 = zext i384 %r83 to i448
-%r86 = getelementptr i64, i64* %r3, i32 6
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i448
-%r89 = shl i448 %r88, 384
-%r90 = or i448 %r84, %r89
-%r91 = add i448 %r47, %r90
-%r92 = load i64, i64* %r4
-%r93 = zext i64 %r92 to i128
-%r95 = getelementptr i64, i64* %r4, i32 1
-%r96 = load i64, i64* %r95
-%r97 = zext i64 %r96 to i128
-%r98 = shl i128 %r97, 64
-%r99 = or i128 %r93, %r98
-%r100 = zext i128 %r99 to i192
-%r102 = getelementptr i64, i64* %r4, i32 2
-%r103 = load i64, i64* %r102
-%r104 = zext i64 %r103 to i192
-%r105 = shl i192 %r104, 128
-%r106 = or i192 %r100, %r105
-%r107 = zext i192 %r106 to i256
-%r109 = getelementptr i64, i64* %r4, i32 3
-%r110 = load i64, i64* %r109
-%r111 = zext i64 %r110 to i256
-%r112 = shl i256 %r111, 192
-%r113 = or i256 %r107, %r112
-%r114 = zext i256 %r113 to i320
-%r116 = getelementptr i64, i64* %r4, i32 4
-%r117 = load i64, i64* %r116
-%r118 = zext i64 %r117 to i320
-%r119 = shl i320 %r118, 256
-%r120 = or i320 %r114, %r119
-%r121 = zext i320 %r120 to i384
-%r123 = getelementptr i64, i64* %r4, i32 5
-%r124 = load i64, i64* %r123
-%r125 = zext i64 %r124 to i384
-%r126 = shl i384 %r125, 320
-%r127 = or i384 %r121, %r126
-%r128 = zext i384 %r127 to i448
-%r130 = getelementptr i64, i64* %r4, i32 6
-%r131 = load i64, i64* %r130
-%r132 = zext i64 %r131 to i448
-%r133 = shl i448 %r132, 384
-%r134 = or i448 %r128, %r133
-%r135 = sub i448 %r91, %r134
-%r136 = lshr i448 %r135, 447
-%r137 = trunc i448 %r136 to i1
-%r138 = select i1 %r137, i448 %r91, i448 %r135
-%r139 = trunc i448 %r138 to i64
-%r141 = getelementptr i64, i64* %r1, i32 0
-store i64 %r139, i64* %r141
-%r142 = lshr i448 %r138, 64
-%r143 = trunc i448 %r142 to i64
-%r145 = getelementptr i64, i64* %r1, i32 1
-store i64 %r143, i64* %r145
-%r146 = lshr i448 %r142, 64
-%r147 = trunc i448 %r146 to i64
-%r149 = getelementptr i64, i64* %r1, i32 2
-store i64 %r147, i64* %r149
-%r150 = lshr i448 %r146, 64
-%r151 = trunc i448 %r150 to i64
-%r153 = getelementptr i64, i64* %r1, i32 3
-store i64 %r151, i64* %r153
-%r154 = lshr i448 %r150, 64
-%r155 = trunc i448 %r154 to i64
-%r157 = getelementptr i64, i64* %r1, i32 4
-store i64 %r155, i64* %r157
-%r158 = lshr i448 %r154, 64
-%r159 = trunc i448 %r158 to i64
-%r161 = getelementptr i64, i64* %r1, i32 5
-store i64 %r159, i64* %r161
-%r162 = lshr i448 %r158, 64
-%r163 = trunc i448 %r162 to i64
-%r165 = getelementptr i64, i64* %r1, i32 6
-store i64 %r163, i64* %r165
-ret void
-}
-define void @mcl_fp_sub7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = load i64, i64* %r3
-%r49 = zext i64 %r48 to i128
-%r51 = getelementptr i64, i64* %r3, i32 1
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i128
-%r54 = shl i128 %r53, 64
-%r55 = or i128 %r49, %r54
-%r56 = zext i128 %r55 to i192
-%r58 = getelementptr i64, i64* %r3, i32 2
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i192
-%r61 = shl i192 %r60, 128
-%r62 = or i192 %r56, %r61
-%r63 = zext i192 %r62 to i256
-%r65 = getelementptr i64, i64* %r3, i32 3
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i256
-%r68 = shl i256 %r67, 192
-%r69 = or i256 %r63, %r68
-%r70 = zext i256 %r69 to i320
-%r72 = getelementptr i64, i64* %r3, i32 4
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i320
-%r75 = shl i320 %r74, 256
-%r76 = or i320 %r70, %r75
-%r77 = zext i320 %r76 to i384
-%r79 = getelementptr i64, i64* %r3, i32 5
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i384
-%r82 = shl i384 %r81, 320
-%r83 = or i384 %r77, %r82
-%r84 = zext i384 %r83 to i448
-%r86 = getelementptr i64, i64* %r3, i32 6
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i448
-%r89 = shl i448 %r88, 384
-%r90 = or i448 %r84, %r89
-%r91 = zext i448 %r47 to i512
-%r92 = zext i448 %r90 to i512
-%r93 = sub i512 %r91, %r92
-%r94 = trunc i512 %r93 to i448
-%r95 = lshr i512 %r93, 448
-%r96 = trunc i512 %r95 to i1
-%r97 = trunc i448 %r94 to i64
-%r99 = getelementptr i64, i64* %r1, i32 0
-store i64 %r97, i64* %r99
-%r100 = lshr i448 %r94, 64
-%r101 = trunc i448 %r100 to i64
-%r103 = getelementptr i64, i64* %r1, i32 1
-store i64 %r101, i64* %r103
-%r104 = lshr i448 %r100, 64
-%r105 = trunc i448 %r104 to i64
-%r107 = getelementptr i64, i64* %r1, i32 2
-store i64 %r105, i64* %r107
-%r108 = lshr i448 %r104, 64
-%r109 = trunc i448 %r108 to i64
-%r111 = getelementptr i64, i64* %r1, i32 3
-store i64 %r109, i64* %r111
-%r112 = lshr i448 %r108, 64
-%r113 = trunc i448 %r112 to i64
-%r115 = getelementptr i64, i64* %r1, i32 4
-store i64 %r113, i64* %r115
-%r116 = lshr i448 %r112, 64
-%r117 = trunc i448 %r116 to i64
-%r119 = getelementptr i64, i64* %r1, i32 5
-store i64 %r117, i64* %r119
-%r120 = lshr i448 %r116, 64
-%r121 = trunc i448 %r120 to i64
-%r123 = getelementptr i64, i64* %r1, i32 6
-store i64 %r121, i64* %r123
-br i1%r96, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r124 = load i64, i64* %r4
-%r125 = zext i64 %r124 to i128
-%r127 = getelementptr i64, i64* %r4, i32 1
-%r128 = load i64, i64* %r127
-%r129 = zext i64 %r128 to i128
-%r130 = shl i128 %r129, 64
-%r131 = or i128 %r125, %r130
-%r132 = zext i128 %r131 to i192
-%r134 = getelementptr i64, i64* %r4, i32 2
-%r135 = load i64, i64* %r134
-%r136 = zext i64 %r135 to i192
-%r137 = shl i192 %r136, 128
-%r138 = or i192 %r132, %r137
-%r139 = zext i192 %r138 to i256
-%r141 = getelementptr i64, i64* %r4, i32 3
-%r142 = load i64, i64* %r141
-%r143 = zext i64 %r142 to i256
-%r144 = shl i256 %r143, 192
-%r145 = or i256 %r139, %r144
-%r146 = zext i256 %r145 to i320
-%r148 = getelementptr i64, i64* %r4, i32 4
-%r149 = load i64, i64* %r148
-%r150 = zext i64 %r149 to i320
-%r151 = shl i320 %r150, 256
-%r152 = or i320 %r146, %r151
-%r153 = zext i320 %r152 to i384
-%r155 = getelementptr i64, i64* %r4, i32 5
-%r156 = load i64, i64* %r155
-%r157 = zext i64 %r156 to i384
-%r158 = shl i384 %r157, 320
-%r159 = or i384 %r153, %r158
-%r160 = zext i384 %r159 to i448
-%r162 = getelementptr i64, i64* %r4, i32 6
-%r163 = load i64, i64* %r162
-%r164 = zext i64 %r163 to i448
-%r165 = shl i448 %r164, 384
-%r166 = or i448 %r160, %r165
-%r167 = add i448 %r94, %r166
-%r168 = trunc i448 %r167 to i64
-%r170 = getelementptr i64, i64* %r1, i32 0
-store i64 %r168, i64* %r170
-%r171 = lshr i448 %r167, 64
-%r172 = trunc i448 %r171 to i64
-%r174 = getelementptr i64, i64* %r1, i32 1
-store i64 %r172, i64* %r174
-%r175 = lshr i448 %r171, 64
-%r176 = trunc i448 %r175 to i64
-%r178 = getelementptr i64, i64* %r1, i32 2
-store i64 %r176, i64* %r178
-%r179 = lshr i448 %r175, 64
-%r180 = trunc i448 %r179 to i64
-%r182 = getelementptr i64, i64* %r1, i32 3
-store i64 %r180, i64* %r182
-%r183 = lshr i448 %r179, 64
-%r184 = trunc i448 %r183 to i64
-%r186 = getelementptr i64, i64* %r1, i32 4
-store i64 %r184, i64* %r186
-%r187 = lshr i448 %r183, 64
-%r188 = trunc i448 %r187 to i64
-%r190 = getelementptr i64, i64* %r1, i32 5
-store i64 %r188, i64* %r190
-%r191 = lshr i448 %r187, 64
-%r192 = trunc i448 %r191 to i64
-%r194 = getelementptr i64, i64* %r1, i32 6
-store i64 %r192, i64* %r194
-ret void
-}
-define void @mcl_fp_subNF7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = load i64, i64* %r3
-%r49 = zext i64 %r48 to i128
-%r51 = getelementptr i64, i64* %r3, i32 1
-%r52 = load i64, i64* %r51
-%r53 = zext i64 %r52 to i128
-%r54 = shl i128 %r53, 64
-%r55 = or i128 %r49, %r54
-%r56 = zext i128 %r55 to i192
-%r58 = getelementptr i64, i64* %r3, i32 2
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i192
-%r61 = shl i192 %r60, 128
-%r62 = or i192 %r56, %r61
-%r63 = zext i192 %r62 to i256
-%r65 = getelementptr i64, i64* %r3, i32 3
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i256
-%r68 = shl i256 %r67, 192
-%r69 = or i256 %r63, %r68
-%r70 = zext i256 %r69 to i320
-%r72 = getelementptr i64, i64* %r3, i32 4
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i320
-%r75 = shl i320 %r74, 256
-%r76 = or i320 %r70, %r75
-%r77 = zext i320 %r76 to i384
-%r79 = getelementptr i64, i64* %r3, i32 5
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i384
-%r82 = shl i384 %r81, 320
-%r83 = or i384 %r77, %r82
-%r84 = zext i384 %r83 to i448
-%r86 = getelementptr i64, i64* %r3, i32 6
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i448
-%r89 = shl i448 %r88, 384
-%r90 = or i448 %r84, %r89
-%r91 = sub i448 %r47, %r90
-%r92 = lshr i448 %r91, 447
-%r93 = trunc i448 %r92 to i1
-%r94 = load i64, i64* %r4
-%r95 = zext i64 %r94 to i128
-%r97 = getelementptr i64, i64* %r4, i32 1
-%r98 = load i64, i64* %r97
-%r99 = zext i64 %r98 to i128
-%r100 = shl i128 %r99, 64
-%r101 = or i128 %r95, %r100
-%r102 = zext i128 %r101 to i192
-%r104 = getelementptr i64, i64* %r4, i32 2
-%r105 = load i64, i64* %r104
-%r106 = zext i64 %r105 to i192
-%r107 = shl i192 %r106, 128
-%r108 = or i192 %r102, %r107
-%r109 = zext i192 %r108 to i256
-%r111 = getelementptr i64, i64* %r4, i32 3
-%r112 = load i64, i64* %r111
-%r113 = zext i64 %r112 to i256
-%r114 = shl i256 %r113, 192
-%r115 = or i256 %r109, %r114
-%r116 = zext i256 %r115 to i320
-%r118 = getelementptr i64, i64* %r4, i32 4
-%r119 = load i64, i64* %r118
-%r120 = zext i64 %r119 to i320
-%r121 = shl i320 %r120, 256
-%r122 = or i320 %r116, %r121
-%r123 = zext i320 %r122 to i384
-%r125 = getelementptr i64, i64* %r4, i32 5
-%r126 = load i64, i64* %r125
-%r127 = zext i64 %r126 to i384
-%r128 = shl i384 %r127, 320
-%r129 = or i384 %r123, %r128
-%r130 = zext i384 %r129 to i448
-%r132 = getelementptr i64, i64* %r4, i32 6
-%r133 = load i64, i64* %r132
-%r134 = zext i64 %r133 to i448
-%r135 = shl i448 %r134, 384
-%r136 = or i448 %r130, %r135
-%r138 = select i1 %r93, i448 %r136, i448 0
-%r139 = add i448 %r91, %r138
-%r140 = trunc i448 %r139 to i64
-%r142 = getelementptr i64, i64* %r1, i32 0
-store i64 %r140, i64* %r142
-%r143 = lshr i448 %r139, 64
-%r144 = trunc i448 %r143 to i64
-%r146 = getelementptr i64, i64* %r1, i32 1
-store i64 %r144, i64* %r146
-%r147 = lshr i448 %r143, 64
-%r148 = trunc i448 %r147 to i64
-%r150 = getelementptr i64, i64* %r1, i32 2
-store i64 %r148, i64* %r150
-%r151 = lshr i448 %r147, 64
-%r152 = trunc i448 %r151 to i64
-%r154 = getelementptr i64, i64* %r1, i32 3
-store i64 %r152, i64* %r154
-%r155 = lshr i448 %r151, 64
-%r156 = trunc i448 %r155 to i64
-%r158 = getelementptr i64, i64* %r1, i32 4
-store i64 %r156, i64* %r158
-%r159 = lshr i448 %r155, 64
-%r160 = trunc i448 %r159 to i64
-%r162 = getelementptr i64, i64* %r1, i32 5
-store i64 %r160, i64* %r162
-%r163 = lshr i448 %r159, 64
-%r164 = trunc i448 %r163 to i64
-%r166 = getelementptr i64, i64* %r1, i32 6
-store i64 %r164, i64* %r166
-ret void
-}
-define void @mcl_fpDbl_add7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r2, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = zext i576 %r61 to i640
-%r64 = getelementptr i64, i64* %r2, i32 9
-%r65 = load i64, i64* %r64
-%r66 = zext i64 %r65 to i640
-%r67 = shl i640 %r66, 576
-%r68 = or i640 %r62, %r67
-%r69 = zext i640 %r68 to i704
-%r71 = getelementptr i64, i64* %r2, i32 10
-%r72 = load i64, i64* %r71
-%r73 = zext i64 %r72 to i704
-%r74 = shl i704 %r73, 640
-%r75 = or i704 %r69, %r74
-%r76 = zext i704 %r75 to i768
-%r78 = getelementptr i64, i64* %r2, i32 11
-%r79 = load i64, i64* %r78
-%r80 = zext i64 %r79 to i768
-%r81 = shl i768 %r80, 704
-%r82 = or i768 %r76, %r81
-%r83 = zext i768 %r82 to i832
-%r85 = getelementptr i64, i64* %r2, i32 12
-%r86 = load i64, i64* %r85
-%r87 = zext i64 %r86 to i832
-%r88 = shl i832 %r87, 768
-%r89 = or i832 %r83, %r88
-%r90 = zext i832 %r89 to i896
-%r92 = getelementptr i64, i64* %r2, i32 13
-%r93 = load i64, i64* %r92
-%r94 = zext i64 %r93 to i896
-%r95 = shl i896 %r94, 832
-%r96 = or i896 %r90, %r95
-%r97 = load i64, i64* %r3
-%r98 = zext i64 %r97 to i128
-%r100 = getelementptr i64, i64* %r3, i32 1
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i128
-%r103 = shl i128 %r102, 64
-%r104 = or i128 %r98, %r103
-%r105 = zext i128 %r104 to i192
-%r107 = getelementptr i64, i64* %r3, i32 2
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i192
-%r110 = shl i192 %r109, 128
-%r111 = or i192 %r105, %r110
-%r112 = zext i192 %r111 to i256
-%r114 = getelementptr i64, i64* %r3, i32 3
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i256
-%r117 = shl i256 %r116, 192
-%r118 = or i256 %r112, %r117
-%r119 = zext i256 %r118 to i320
-%r121 = getelementptr i64, i64* %r3, i32 4
-%r122 = load i64, i64* %r121
-%r123 = zext i64 %r122 to i320
-%r124 = shl i320 %r123, 256
-%r125 = or i320 %r119, %r124
-%r126 = zext i320 %r125 to i384
-%r128 = getelementptr i64, i64* %r3, i32 5
-%r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i384
-%r131 = shl i384 %r130, 320
-%r132 = or i384 %r126, %r131
-%r133 = zext i384 %r132 to i448
-%r135 = getelementptr i64, i64* %r3, i32 6
-%r136 = load i64, i64* %r135
-%r137 = zext i64 %r136 to i448
-%r138 = shl i448 %r137, 384
-%r139 = or i448 %r133, %r138
-%r140 = zext i448 %r139 to i512
-%r142 = getelementptr i64, i64* %r3, i32 7
-%r143 = load i64, i64* %r142
-%r144 = zext i64 %r143 to i512
-%r145 = shl i512 %r144, 448
-%r146 = or i512 %r140, %r145
-%r147 = zext i512 %r146 to i576
-%r149 = getelementptr i64, i64* %r3, i32 8
-%r150 = load i64, i64* %r149
-%r151 = zext i64 %r150 to i576
-%r152 = shl i576 %r151, 512
-%r153 = or i576 %r147, %r152
-%r154 = zext i576 %r153 to i640
-%r156 = getelementptr i64, i64* %r3, i32 9
-%r157 = load i64, i64* %r156
-%r158 = zext i64 %r157 to i640
-%r159 = shl i640 %r158, 576
-%r160 = or i640 %r154, %r159
-%r161 = zext i640 %r160 to i704
-%r163 = getelementptr i64, i64* %r3, i32 10
-%r164 = load i64, i64* %r163
-%r165 = zext i64 %r164 to i704
-%r166 = shl i704 %r165, 640
-%r167 = or i704 %r161, %r166
-%r168 = zext i704 %r167 to i768
-%r170 = getelementptr i64, i64* %r3, i32 11
-%r171 = load i64, i64* %r170
-%r172 = zext i64 %r171 to i768
-%r173 = shl i768 %r172, 704
-%r174 = or i768 %r168, %r173
-%r175 = zext i768 %r174 to i832
-%r177 = getelementptr i64, i64* %r3, i32 12
-%r178 = load i64, i64* %r177
-%r179 = zext i64 %r178 to i832
-%r180 = shl i832 %r179, 768
-%r181 = or i832 %r175, %r180
-%r182 = zext i832 %r181 to i896
-%r184 = getelementptr i64, i64* %r3, i32 13
-%r185 = load i64, i64* %r184
-%r186 = zext i64 %r185 to i896
-%r187 = shl i896 %r186, 832
-%r188 = or i896 %r182, %r187
-%r189 = zext i896 %r96 to i960
-%r190 = zext i896 %r188 to i960
-%r191 = add i960 %r189, %r190
-%r192 = trunc i960 %r191 to i448
-%r193 = trunc i448 %r192 to i64
-%r195 = getelementptr i64, i64* %r1, i32 0
-store i64 %r193, i64* %r195
-%r196 = lshr i448 %r192, 64
-%r197 = trunc i448 %r196 to i64
-%r199 = getelementptr i64, i64* %r1, i32 1
-store i64 %r197, i64* %r199
-%r200 = lshr i448 %r196, 64
-%r201 = trunc i448 %r200 to i64
-%r203 = getelementptr i64, i64* %r1, i32 2
-store i64 %r201, i64* %r203
-%r204 = lshr i448 %r200, 64
-%r205 = trunc i448 %r204 to i64
-%r207 = getelementptr i64, i64* %r1, i32 3
-store i64 %r205, i64* %r207
-%r208 = lshr i448 %r204, 64
-%r209 = trunc i448 %r208 to i64
-%r211 = getelementptr i64, i64* %r1, i32 4
-store i64 %r209, i64* %r211
-%r212 = lshr i448 %r208, 64
-%r213 = trunc i448 %r212 to i64
-%r215 = getelementptr i64, i64* %r1, i32 5
-store i64 %r213, i64* %r215
-%r216 = lshr i448 %r212, 64
-%r217 = trunc i448 %r216 to i64
-%r219 = getelementptr i64, i64* %r1, i32 6
-store i64 %r217, i64* %r219
-%r220 = lshr i960 %r191, 448
-%r221 = trunc i960 %r220 to i512
-%r222 = load i64, i64* %r4
-%r223 = zext i64 %r222 to i128
-%r225 = getelementptr i64, i64* %r4, i32 1
-%r226 = load i64, i64* %r225
-%r227 = zext i64 %r226 to i128
-%r228 = shl i128 %r227, 64
-%r229 = or i128 %r223, %r228
-%r230 = zext i128 %r229 to i192
-%r232 = getelementptr i64, i64* %r4, i32 2
-%r233 = load i64, i64* %r232
-%r234 = zext i64 %r233 to i192
-%r235 = shl i192 %r234, 128
-%r236 = or i192 %r230, %r235
-%r237 = zext i192 %r236 to i256
-%r239 = getelementptr i64, i64* %r4, i32 3
-%r240 = load i64, i64* %r239
-%r241 = zext i64 %r240 to i256
-%r242 = shl i256 %r241, 192
-%r243 = or i256 %r237, %r242
-%r244 = zext i256 %r243 to i320
-%r246 = getelementptr i64, i64* %r4, i32 4
-%r247 = load i64, i64* %r246
-%r248 = zext i64 %r247 to i320
-%r249 = shl i320 %r248, 256
-%r250 = or i320 %r244, %r249
-%r251 = zext i320 %r250 to i384
-%r253 = getelementptr i64, i64* %r4, i32 5
-%r254 = load i64, i64* %r253
-%r255 = zext i64 %r254 to i384
-%r256 = shl i384 %r255, 320
-%r257 = or i384 %r251, %r256
-%r258 = zext i384 %r257 to i448
-%r260 = getelementptr i64, i64* %r4, i32 6
-%r261 = load i64, i64* %r260
-%r262 = zext i64 %r261 to i448
-%r263 = shl i448 %r262, 384
-%r264 = or i448 %r258, %r263
-%r265 = zext i448 %r264 to i512
-%r266 = sub i512 %r221, %r265
-%r267 = lshr i512 %r266, 448
-%r268 = trunc i512 %r267 to i1
-%r269 = select i1 %r268, i512 %r221, i512 %r266
-%r270 = trunc i512 %r269 to i448
-%r272 = getelementptr i64, i64* %r1, i32 7
-%r273 = trunc i448 %r270 to i64
-%r275 = getelementptr i64, i64* %r272, i32 0
-store i64 %r273, i64* %r275
-%r276 = lshr i448 %r270, 64
-%r277 = trunc i448 %r276 to i64
-%r279 = getelementptr i64, i64* %r272, i32 1
-store i64 %r277, i64* %r279
-%r280 = lshr i448 %r276, 64
-%r281 = trunc i448 %r280 to i64
-%r283 = getelementptr i64, i64* %r272, i32 2
-store i64 %r281, i64* %r283
-%r284 = lshr i448 %r280, 64
-%r285 = trunc i448 %r284 to i64
-%r287 = getelementptr i64, i64* %r272, i32 3
-store i64 %r285, i64* %r287
-%r288 = lshr i448 %r284, 64
-%r289 = trunc i448 %r288 to i64
-%r291 = getelementptr i64, i64* %r272, i32 4
-store i64 %r289, i64* %r291
-%r292 = lshr i448 %r288, 64
-%r293 = trunc i448 %r292 to i64
-%r295 = getelementptr i64, i64* %r272, i32 5
-store i64 %r293, i64* %r295
-%r296 = lshr i448 %r292, 64
-%r297 = trunc i448 %r296 to i64
-%r299 = getelementptr i64, i64* %r272, i32 6
-store i64 %r297, i64* %r299
-ret void
-}
-define void @mcl_fpDbl_sub7L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r2, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = zext i576 %r61 to i640
-%r64 = getelementptr i64, i64* %r2, i32 9
-%r65 = load i64, i64* %r64
-%r66 = zext i64 %r65 to i640
-%r67 = shl i640 %r66, 576
-%r68 = or i640 %r62, %r67
-%r69 = zext i640 %r68 to i704
-%r71 = getelementptr i64, i64* %r2, i32 10
-%r72 = load i64, i64* %r71
-%r73 = zext i64 %r72 to i704
-%r74 = shl i704 %r73, 640
-%r75 = or i704 %r69, %r74
-%r76 = zext i704 %r75 to i768
-%r78 = getelementptr i64, i64* %r2, i32 11
-%r79 = load i64, i64* %r78
-%r80 = zext i64 %r79 to i768
-%r81 = shl i768 %r80, 704
-%r82 = or i768 %r76, %r81
-%r83 = zext i768 %r82 to i832
-%r85 = getelementptr i64, i64* %r2, i32 12
-%r86 = load i64, i64* %r85
-%r87 = zext i64 %r86 to i832
-%r88 = shl i832 %r87, 768
-%r89 = or i832 %r83, %r88
-%r90 = zext i832 %r89 to i896
-%r92 = getelementptr i64, i64* %r2, i32 13
-%r93 = load i64, i64* %r92
-%r94 = zext i64 %r93 to i896
-%r95 = shl i896 %r94, 832
-%r96 = or i896 %r90, %r95
-%r97 = load i64, i64* %r3
-%r98 = zext i64 %r97 to i128
-%r100 = getelementptr i64, i64* %r3, i32 1
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i128
-%r103 = shl i128 %r102, 64
-%r104 = or i128 %r98, %r103
-%r105 = zext i128 %r104 to i192
-%r107 = getelementptr i64, i64* %r3, i32 2
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i192
-%r110 = shl i192 %r109, 128
-%r111 = or i192 %r105, %r110
-%r112 = zext i192 %r111 to i256
-%r114 = getelementptr i64, i64* %r3, i32 3
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i256
-%r117 = shl i256 %r116, 192
-%r118 = or i256 %r112, %r117
-%r119 = zext i256 %r118 to i320
-%r121 = getelementptr i64, i64* %r3, i32 4
-%r122 = load i64, i64* %r121
-%r123 = zext i64 %r122 to i320
-%r124 = shl i320 %r123, 256
-%r125 = or i320 %r119, %r124
-%r126 = zext i320 %r125 to i384
-%r128 = getelementptr i64, i64* %r3, i32 5
-%r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i384
-%r131 = shl i384 %r130, 320
-%r132 = or i384 %r126, %r131
-%r133 = zext i384 %r132 to i448
-%r135 = getelementptr i64, i64* %r3, i32 6
-%r136 = load i64, i64* %r135
-%r137 = zext i64 %r136 to i448
-%r138 = shl i448 %r137, 384
-%r139 = or i448 %r133, %r138
-%r140 = zext i448 %r139 to i512
-%r142 = getelementptr i64, i64* %r3, i32 7
-%r143 = load i64, i64* %r142
-%r144 = zext i64 %r143 to i512
-%r145 = shl i512 %r144, 448
-%r146 = or i512 %r140, %r145
-%r147 = zext i512 %r146 to i576
-%r149 = getelementptr i64, i64* %r3, i32 8
-%r150 = load i64, i64* %r149
-%r151 = zext i64 %r150 to i576
-%r152 = shl i576 %r151, 512
-%r153 = or i576 %r147, %r152
-%r154 = zext i576 %r153 to i640
-%r156 = getelementptr i64, i64* %r3, i32 9
-%r157 = load i64, i64* %r156
-%r158 = zext i64 %r157 to i640
-%r159 = shl i640 %r158, 576
-%r160 = or i640 %r154, %r159
-%r161 = zext i640 %r160 to i704
-%r163 = getelementptr i64, i64* %r3, i32 10
-%r164 = load i64, i64* %r163
-%r165 = zext i64 %r164 to i704
-%r166 = shl i704 %r165, 640
-%r167 = or i704 %r161, %r166
-%r168 = zext i704 %r167 to i768
-%r170 = getelementptr i64, i64* %r3, i32 11
-%r171 = load i64, i64* %r170
-%r172 = zext i64 %r171 to i768
-%r173 = shl i768 %r172, 704
-%r174 = or i768 %r168, %r173
-%r175 = zext i768 %r174 to i832
-%r177 = getelementptr i64, i64* %r3, i32 12
-%r178 = load i64, i64* %r177
-%r179 = zext i64 %r178 to i832
-%r180 = shl i832 %r179, 768
-%r181 = or i832 %r175, %r180
-%r182 = zext i832 %r181 to i896
-%r184 = getelementptr i64, i64* %r3, i32 13
-%r185 = load i64, i64* %r184
-%r186 = zext i64 %r185 to i896
-%r187 = shl i896 %r186, 832
-%r188 = or i896 %r182, %r187
-%r189 = zext i896 %r96 to i960
-%r190 = zext i896 %r188 to i960
-%r191 = sub i960 %r189, %r190
-%r192 = trunc i960 %r191 to i448
-%r193 = trunc i448 %r192 to i64
-%r195 = getelementptr i64, i64* %r1, i32 0
-store i64 %r193, i64* %r195
-%r196 = lshr i448 %r192, 64
-%r197 = trunc i448 %r196 to i64
-%r199 = getelementptr i64, i64* %r1, i32 1
-store i64 %r197, i64* %r199
-%r200 = lshr i448 %r196, 64
-%r201 = trunc i448 %r200 to i64
-%r203 = getelementptr i64, i64* %r1, i32 2
-store i64 %r201, i64* %r203
-%r204 = lshr i448 %r200, 64
-%r205 = trunc i448 %r204 to i64
-%r207 = getelementptr i64, i64* %r1, i32 3
-store i64 %r205, i64* %r207
-%r208 = lshr i448 %r204, 64
-%r209 = trunc i448 %r208 to i64
-%r211 = getelementptr i64, i64* %r1, i32 4
-store i64 %r209, i64* %r211
-%r212 = lshr i448 %r208, 64
-%r213 = trunc i448 %r212 to i64
-%r215 = getelementptr i64, i64* %r1, i32 5
-store i64 %r213, i64* %r215
-%r216 = lshr i448 %r212, 64
-%r217 = trunc i448 %r216 to i64
-%r219 = getelementptr i64, i64* %r1, i32 6
-store i64 %r217, i64* %r219
-%r220 = lshr i960 %r191, 448
-%r221 = trunc i960 %r220 to i448
-%r222 = lshr i960 %r191, 896
-%r223 = trunc i960 %r222 to i1
-%r224 = load i64, i64* %r4
-%r225 = zext i64 %r224 to i128
-%r227 = getelementptr i64, i64* %r4, i32 1
-%r228 = load i64, i64* %r227
-%r229 = zext i64 %r228 to i128
-%r230 = shl i128 %r229, 64
-%r231 = or i128 %r225, %r230
-%r232 = zext i128 %r231 to i192
-%r234 = getelementptr i64, i64* %r4, i32 2
-%r235 = load i64, i64* %r234
-%r236 = zext i64 %r235 to i192
-%r237 = shl i192 %r236, 128
-%r238 = or i192 %r232, %r237
-%r239 = zext i192 %r238 to i256
-%r241 = getelementptr i64, i64* %r4, i32 3
-%r242 = load i64, i64* %r241
-%r243 = zext i64 %r242 to i256
-%r244 = shl i256 %r243, 192
-%r245 = or i256 %r239, %r244
-%r246 = zext i256 %r245 to i320
-%r248 = getelementptr i64, i64* %r4, i32 4
-%r249 = load i64, i64* %r248
-%r250 = zext i64 %r249 to i320
-%r251 = shl i320 %r250, 256
-%r252 = or i320 %r246, %r251
-%r253 = zext i320 %r252 to i384
-%r255 = getelementptr i64, i64* %r4, i32 5
-%r256 = load i64, i64* %r255
-%r257 = zext i64 %r256 to i384
-%r258 = shl i384 %r257, 320
-%r259 = or i384 %r253, %r258
-%r260 = zext i384 %r259 to i448
-%r262 = getelementptr i64, i64* %r4, i32 6
-%r263 = load i64, i64* %r262
-%r264 = zext i64 %r263 to i448
-%r265 = shl i448 %r264, 384
-%r266 = or i448 %r260, %r265
-%r268 = select i1 %r223, i448 %r266, i448 0
-%r269 = add i448 %r221, %r268
-%r271 = getelementptr i64, i64* %r1, i32 7
-%r272 = trunc i448 %r269 to i64
-%r274 = getelementptr i64, i64* %r271, i32 0
-store i64 %r272, i64* %r274
-%r275 = lshr i448 %r269, 64
-%r276 = trunc i448 %r275 to i64
-%r278 = getelementptr i64, i64* %r271, i32 1
-store i64 %r276, i64* %r278
-%r279 = lshr i448 %r275, 64
-%r280 = trunc i448 %r279 to i64
-%r282 = getelementptr i64, i64* %r271, i32 2
-store i64 %r280, i64* %r282
-%r283 = lshr i448 %r279, 64
-%r284 = trunc i448 %r283 to i64
-%r286 = getelementptr i64, i64* %r271, i32 3
-store i64 %r284, i64* %r286
-%r287 = lshr i448 %r283, 64
-%r288 = trunc i448 %r287 to i64
-%r290 = getelementptr i64, i64* %r271, i32 4
-store i64 %r288, i64* %r290
-%r291 = lshr i448 %r287, 64
-%r292 = trunc i448 %r291 to i64
-%r294 = getelementptr i64, i64* %r271, i32 5
-store i64 %r292, i64* %r294
-%r295 = lshr i448 %r291, 64
-%r296 = trunc i448 %r295 to i64
-%r298 = getelementptr i64, i64* %r271, i32 6
-store i64 %r296, i64* %r298
-ret void
-}
-define i576 @mulPv512x64(i64* noalias  %r2, i64 %r3)
-{
-%r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
-%r6 = trunc i128 %r5 to i64
-%r7 = call i64 @extractHigh64(i128 %r5)
-%r9 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 1)
-%r10 = trunc i128 %r9 to i64
-%r11 = call i64 @extractHigh64(i128 %r9)
-%r13 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 2)
-%r14 = trunc i128 %r13 to i64
-%r15 = call i64 @extractHigh64(i128 %r13)
-%r17 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 3)
-%r18 = trunc i128 %r17 to i64
-%r19 = call i64 @extractHigh64(i128 %r17)
-%r21 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 4)
-%r22 = trunc i128 %r21 to i64
-%r23 = call i64 @extractHigh64(i128 %r21)
-%r25 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 5)
-%r26 = trunc i128 %r25 to i64
-%r27 = call i64 @extractHigh64(i128 %r25)
-%r29 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 6)
-%r30 = trunc i128 %r29 to i64
-%r31 = call i64 @extractHigh64(i128 %r29)
-%r33 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 7)
-%r34 = trunc i128 %r33 to i64
-%r35 = call i64 @extractHigh64(i128 %r33)
-%r36 = zext i64 %r6 to i128
-%r37 = zext i64 %r10 to i128
-%r38 = shl i128 %r37, 64
-%r39 = or i128 %r36, %r38
-%r40 = zext i128 %r39 to i192
-%r41 = zext i64 %r14 to i192
-%r42 = shl i192 %r41, 128
-%r43 = or i192 %r40, %r42
-%r44 = zext i192 %r43 to i256
-%r45 = zext i64 %r18 to i256
-%r46 = shl i256 %r45, 192
-%r47 = or i256 %r44, %r46
-%r48 = zext i256 %r47 to i320
-%r49 = zext i64 %r22 to i320
-%r50 = shl i320 %r49, 256
-%r51 = or i320 %r48, %r50
-%r52 = zext i320 %r51 to i384
-%r53 = zext i64 %r26 to i384
-%r54 = shl i384 %r53, 320
-%r55 = or i384 %r52, %r54
-%r56 = zext i384 %r55 to i448
-%r57 = zext i64 %r30 to i448
-%r58 = shl i448 %r57, 384
-%r59 = or i448 %r56, %r58
-%r60 = zext i448 %r59 to i512
-%r61 = zext i64 %r34 to i512
-%r62 = shl i512 %r61, 448
-%r63 = or i512 %r60, %r62
-%r64 = zext i64 %r7 to i128
-%r65 = zext i64 %r11 to i128
-%r66 = shl i128 %r65, 64
-%r67 = or i128 %r64, %r66
-%r68 = zext i128 %r67 to i192
-%r69 = zext i64 %r15 to i192
-%r70 = shl i192 %r69, 128
-%r71 = or i192 %r68, %r70
-%r72 = zext i192 %r71 to i256
-%r73 = zext i64 %r19 to i256
-%r74 = shl i256 %r73, 192
-%r75 = or i256 %r72, %r74
-%r76 = zext i256 %r75 to i320
-%r77 = zext i64 %r23 to i320
-%r78 = shl i320 %r77, 256
-%r79 = or i320 %r76, %r78
-%r80 = zext i320 %r79 to i384
-%r81 = zext i64 %r27 to i384
-%r82 = shl i384 %r81, 320
-%r83 = or i384 %r80, %r82
-%r84 = zext i384 %r83 to i448
-%r85 = zext i64 %r31 to i448
-%r86 = shl i448 %r85, 384
-%r87 = or i448 %r84, %r86
-%r88 = zext i448 %r87 to i512
-%r89 = zext i64 %r35 to i512
-%r90 = shl i512 %r89, 448
-%r91 = or i512 %r88, %r90
-%r92 = zext i512 %r63 to i576
-%r93 = zext i512 %r91 to i576
-%r94 = shl i576 %r93, 64
-%r95 = add i576 %r92, %r94
-ret i576 %r95
-}
-define void @mcl_fp_mulUnitPre8L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
-{
-%r4 = call i576 @mulPv512x64(i64* %r2, i64 %r3)
-%r5 = trunc i576 %r4 to i64
-%r7 = getelementptr i64, i64* %r1, i32 0
-store i64 %r5, i64* %r7
-%r8 = lshr i576 %r4, 64
-%r9 = trunc i576 %r8 to i64
-%r11 = getelementptr i64, i64* %r1, i32 1
-store i64 %r9, i64* %r11
-%r12 = lshr i576 %r8, 64
-%r13 = trunc i576 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 2
-store i64 %r13, i64* %r15
-%r16 = lshr i576 %r12, 64
-%r17 = trunc i576 %r16 to i64
-%r19 = getelementptr i64, i64* %r1, i32 3
-store i64 %r17, i64* %r19
-%r20 = lshr i576 %r16, 64
-%r21 = trunc i576 %r20 to i64
-%r23 = getelementptr i64, i64* %r1, i32 4
-store i64 %r21, i64* %r23
-%r24 = lshr i576 %r20, 64
-%r25 = trunc i576 %r24 to i64
-%r27 = getelementptr i64, i64* %r1, i32 5
-store i64 %r25, i64* %r27
-%r28 = lshr i576 %r24, 64
-%r29 = trunc i576 %r28 to i64
-%r31 = getelementptr i64, i64* %r1, i32 6
-store i64 %r29, i64* %r31
-%r32 = lshr i576 %r28, 64
-%r33 = trunc i576 %r32 to i64
-%r35 = getelementptr i64, i64* %r1, i32 7
-store i64 %r33, i64* %r35
-%r36 = lshr i576 %r32, 64
-%r37 = trunc i576 %r36 to i64
-%r39 = getelementptr i64, i64* %r1, i32 8
-store i64 %r37, i64* %r39
-ret void
-}
-define void @mcl_fpDbl_mulPre8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
-{
-%r5 = getelementptr i64, i64* %r2, i32 4
-%r7 = getelementptr i64, i64* %r3, i32 4
-%r9 = getelementptr i64, i64* %r1, i32 8
-call void @mcl_fpDbl_mulPre4L(i64* %r1, i64* %r2, i64* %r3)
-call void @mcl_fpDbl_mulPre4L(i64* %r9, i64* %r5, i64* %r7)
-%r10 = load i64, i64* %r5
-%r11 = zext i64 %r10 to i128
-%r13 = getelementptr i64, i64* %r5, i32 1
-%r14 = load i64, i64* %r13
-%r15 = zext i64 %r14 to i128
-%r16 = shl i128 %r15, 64
-%r17 = or i128 %r11, %r16
-%r18 = zext i128 %r17 to i192
-%r20 = getelementptr i64, i64* %r5, i32 2
-%r21 = load i64, i64* %r20
-%r22 = zext i64 %r21 to i192
-%r23 = shl i192 %r22, 128
-%r24 = or i192 %r18, %r23
-%r25 = zext i192 %r24 to i256
-%r27 = getelementptr i64, i64* %r5, i32 3
-%r28 = load i64, i64* %r27
-%r29 = zext i64 %r28 to i256
-%r30 = shl i256 %r29, 192
-%r31 = or i256 %r25, %r30
-%r32 = zext i256 %r31 to i320
-%r33 = load i64, i64* %r2
-%r34 = zext i64 %r33 to i128
-%r36 = getelementptr i64, i64* %r2, i32 1
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i128
-%r39 = shl i128 %r38, 64
-%r40 = or i128 %r34, %r39
-%r41 = zext i128 %r40 to i192
-%r43 = getelementptr i64, i64* %r2, i32 2
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i192
-%r46 = shl i192 %r45, 128
-%r47 = or i192 %r41, %r46
-%r48 = zext i192 %r47 to i256
-%r50 = getelementptr i64, i64* %r2, i32 3
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i256
-%r53 = shl i256 %r52, 192
-%r54 = or i256 %r48, %r53
-%r55 = zext i256 %r54 to i320
-%r56 = load i64, i64* %r7
-%r57 = zext i64 %r56 to i128
-%r59 = getelementptr i64, i64* %r7, i32 1
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i128
-%r62 = shl i128 %r61, 64
-%r63 = or i128 %r57, %r62
-%r64 = zext i128 %r63 to i192
-%r66 = getelementptr i64, i64* %r7, i32 2
-%r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i192
-%r69 = shl i192 %r68, 128
-%r70 = or i192 %r64, %r69
-%r71 = zext i192 %r70 to i256
-%r73 = getelementptr i64, i64* %r7, i32 3
-%r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i256
-%r76 = shl i256 %r75, 192
-%r77 = or i256 %r71, %r76
-%r78 = zext i256 %r77 to i320
-%r79 = load i64, i64* %r3
-%r80 = zext i64 %r79 to i128
-%r82 = getelementptr i64, i64* %r3, i32 1
-%r83 = load i64, i64* %r82
-%r84 = zext i64 %r83 to i128
-%r85 = shl i128 %r84, 64
-%r86 = or i128 %r80, %r85
-%r87 = zext i128 %r86 to i192
-%r89 = getelementptr i64, i64* %r3, i32 2
-%r90 = load i64, i64* %r89
-%r91 = zext i64 %r90 to i192
-%r92 = shl i192 %r91, 128
-%r93 = or i192 %r87, %r92
-%r94 = zext i192 %r93 to i256
-%r96 = getelementptr i64, i64* %r3, i32 3
-%r97 = load i64, i64* %r96
-%r98 = zext i64 %r97 to i256
-%r99 = shl i256 %r98, 192
-%r100 = or i256 %r94, %r99
-%r101 = zext i256 %r100 to i320
-%r102 = add i320 %r32, %r55
-%r103 = add i320 %r78, %r101
-%r105 = alloca i64, i32 8
-%r106 = trunc i320 %r102 to i256
-%r107 = trunc i320 %r103 to i256
-%r108 = lshr i320 %r102, 256
-%r109 = trunc i320 %r108 to i1
-%r110 = lshr i320 %r103, 256
-%r111 = trunc i320 %r110 to i1
-%r112 = and i1 %r109, %r111
-%r114 = select i1 %r109, i256 %r107, i256 0
-%r116 = select i1 %r111, i256 %r106, i256 0
-%r118 = alloca i64, i32 4
-%r120 = alloca i64, i32 4
-%r121 = trunc i256 %r106 to i64
-%r123 = getelementptr i64, i64* %r118, i32 0
-store i64 %r121, i64* %r123
-%r124 = lshr i256 %r106, 64
-%r125 = trunc i256 %r124 to i64
-%r127 = getelementptr i64, i64* %r118, i32 1
-store i64 %r125, i64* %r127
-%r128 = lshr i256 %r124, 64
-%r129 = trunc i256 %r128 to i64
-%r131 = getelementptr i64, i64* %r118, i32 2
-store i64 %r129, i64* %r131
-%r132 = lshr i256 %r128, 64
-%r133 = trunc i256 %r132 to i64
-%r135 = getelementptr i64, i64* %r118, i32 3
-store i64 %r133, i64* %r135
-%r136 = trunc i256 %r107 to i64
-%r138 = getelementptr i64, i64* %r120, i32 0
-store i64 %r136, i64* %r138
-%r139 = lshr i256 %r107, 64
-%r140 = trunc i256 %r139 to i64
-%r142 = getelementptr i64, i64* %r120, i32 1
-store i64 %r140, i64* %r142
-%r143 = lshr i256 %r139, 64
-%r144 = trunc i256 %r143 to i64
-%r146 = getelementptr i64, i64* %r120, i32 2
-store i64 %r144, i64* %r146
-%r147 = lshr i256 %r143, 64
-%r148 = trunc i256 %r147 to i64
-%r150 = getelementptr i64, i64* %r120, i32 3
-store i64 %r148, i64* %r150
-call void @mcl_fpDbl_mulPre4L(i64* %r105, i64* %r118, i64* %r120)
-%r151 = load i64, i64* %r105
-%r152 = zext i64 %r151 to i128
-%r154 = getelementptr i64, i64* %r105, i32 1
-%r155 = load i64, i64* %r154
-%r156 = zext i64 %r155 to i128
-%r157 = shl i128 %r156, 64
-%r158 = or i128 %r152, %r157
-%r159 = zext i128 %r158 to i192
-%r161 = getelementptr i64, i64* %r105, i32 2
-%r162 = load i64, i64* %r161
-%r163 = zext i64 %r162 to i192
-%r164 = shl i192 %r163, 128
-%r165 = or i192 %r159, %r164
-%r166 = zext i192 %r165 to i256
-%r168 = getelementptr i64, i64* %r105, i32 3
-%r169 = load i64, i64* %r168
-%r170 = zext i64 %r169 to i256
-%r171 = shl i256 %r170, 192
-%r172 = or i256 %r166, %r171
-%r173 = zext i256 %r172 to i320
-%r175 = getelementptr i64, i64* %r105, i32 4
-%r176 = load i64, i64* %r175
-%r177 = zext i64 %r176 to i320
-%r178 = shl i320 %r177, 256
-%r179 = or i320 %r173, %r178
-%r180 = zext i320 %r179 to i384
-%r182 = getelementptr i64, i64* %r105, i32 5
-%r183 = load i64, i64* %r182
-%r184 = zext i64 %r183 to i384
-%r185 = shl i384 %r184, 320
-%r186 = or i384 %r180, %r185
-%r187 = zext i384 %r186 to i448
-%r189 = getelementptr i64, i64* %r105, i32 6
-%r190 = load i64, i64* %r189
-%r191 = zext i64 %r190 to i448
-%r192 = shl i448 %r191, 384
-%r193 = or i448 %r187, %r192
-%r194 = zext i448 %r193 to i512
-%r196 = getelementptr i64, i64* %r105, i32 7
-%r197 = load i64, i64* %r196
-%r198 = zext i64 %r197 to i512
-%r199 = shl i512 %r198, 448
-%r200 = or i512 %r194, %r199
-%r201 = zext i512 %r200 to i576
-%r202 = zext i1 %r112 to i576
-%r203 = shl i576 %r202, 512
-%r204 = or i576 %r201, %r203
-%r205 = zext i256 %r114 to i576
-%r206 = zext i256 %r116 to i576
-%r207 = shl i576 %r205, 256
-%r208 = shl i576 %r206, 256
-%r209 = add i576 %r204, %r207
-%r210 = add i576 %r209, %r208
-%r211 = load i64, i64* %r1
-%r212 = zext i64 %r211 to i128
-%r214 = getelementptr i64, i64* %r1, i32 1
-%r215 = load i64, i64* %r214
-%r216 = zext i64 %r215 to i128
-%r217 = shl i128 %r216, 64
-%r218 = or i128 %r212, %r217
-%r219 = zext i128 %r218 to i192
-%r221 = getelementptr i64, i64* %r1, i32 2
-%r222 = load i64, i64* %r221
-%r223 = zext i64 %r222 to i192
-%r224 = shl i192 %r223, 128
-%r225 = or i192 %r219, %r224
-%r226 = zext i192 %r225 to i256
-%r228 = getelementptr i64, i64* %r1, i32 3
-%r229 = load i64, i64* %r228
-%r230 = zext i64 %r229 to i256
-%r231 = shl i256 %r230, 192
-%r232 = or i256 %r226, %r231
-%r233 = zext i256 %r232 to i320
-%r235 = getelementptr i64, i64* %r1, i32 4
-%r236 = load i64, i64* %r235
-%r237 = zext i64 %r236 to i320
-%r238 = shl i320 %r237, 256
-%r239 = or i320 %r233, %r238
-%r240 = zext i320 %r239 to i384
-%r242 = getelementptr i64, i64* %r1, i32 5
-%r243 = load i64, i64* %r242
-%r244 = zext i64 %r243 to i384
-%r245 = shl i384 %r244, 320
-%r246 = or i384 %r240, %r245
-%r247 = zext i384 %r246 to i448
-%r249 = getelementptr i64, i64* %r1, i32 6
-%r250 = load i64, i64* %r249
-%r251 = zext i64 %r250 to i448
-%r252 = shl i448 %r251, 384
-%r253 = or i448 %r247, %r252
-%r254 = zext i448 %r253 to i512
-%r256 = getelementptr i64, i64* %r1, i32 7
-%r257 = load i64, i64* %r256
-%r258 = zext i64 %r257 to i512
-%r259 = shl i512 %r258, 448
-%r260 = or i512 %r254, %r259
-%r261 = zext i512 %r260 to i576
-%r262 = sub i576 %r210, %r261
-%r264 = getelementptr i64, i64* %r1, i32 8
-%r265 = load i64, i64* %r264
-%r266 = zext i64 %r265 to i128
-%r268 = getelementptr i64, i64* %r264, i32 1
-%r269 = load i64, i64* %r268
-%r270 = zext i64 %r269 to i128
-%r271 = shl i128 %r270, 64
-%r272 = or i128 %r266, %r271
-%r273 = zext i128 %r272 to i192
-%r275 = getelementptr i64, i64* %r264, i32 2
-%r276 = load i64, i64* %r275
-%r277 = zext i64 %r276 to i192
-%r278 = shl i192 %r277, 128
-%r279 = or i192 %r273, %r278
-%r280 = zext i192 %r279 to i256
-%r282 = getelementptr i64, i64* %r264, i32 3
-%r283 = load i64, i64* %r282
-%r284 = zext i64 %r283 to i256
-%r285 = shl i256 %r284, 192
-%r286 = or i256 %r280, %r285
-%r287 = zext i256 %r286 to i320
-%r289 = getelementptr i64, i64* %r264, i32 4
-%r290 = load i64, i64* %r289
-%r291 = zext i64 %r290 to i320
-%r292 = shl i320 %r291, 256
-%r293 = or i320 %r287, %r292
-%r294 = zext i320 %r293 to i384
-%r296 = getelementptr i64, i64* %r264, i32 5
-%r297 = load i64, i64* %r296
-%r298 = zext i64 %r297 to i384
-%r299 = shl i384 %r298, 320
-%r300 = or i384 %r294, %r299
-%r301 = zext i384 %r300 to i448
-%r303 = getelementptr i64, i64* %r264, i32 6
-%r304 = load i64, i64* %r303
-%r305 = zext i64 %r304 to i448
-%r306 = shl i448 %r305, 384
-%r307 = or i448 %r301, %r306
-%r308 = zext i448 %r307 to i512
-%r310 = getelementptr i64, i64* %r264, i32 7
-%r311 = load i64, i64* %r310
-%r312 = zext i64 %r311 to i512
-%r313 = shl i512 %r312, 448
-%r314 = or i512 %r308, %r313
-%r315 = zext i512 %r314 to i576
-%r316 = sub i576 %r262, %r315
-%r317 = zext i576 %r316 to i768
-%r319 = getelementptr i64, i64* %r1, i32 4
-%r320 = load i64, i64* %r319
-%r321 = zext i64 %r320 to i128
-%r323 = getelementptr i64, i64* %r319, i32 1
-%r324 = load i64, i64* %r323
-%r325 = zext i64 %r324 to i128
-%r326 = shl i128 %r325, 64
-%r327 = or i128 %r321, %r326
-%r328 = zext i128 %r327 to i192
-%r330 = getelementptr i64, i64* %r319, i32 2
-%r331 = load i64, i64* %r330
-%r332 = zext i64 %r331 to i192
-%r333 = shl i192 %r332, 128
-%r334 = or i192 %r328, %r333
-%r335 = zext i192 %r334 to i256
-%r337 = getelementptr i64, i64* %r319, i32 3
-%r338 = load i64, i64* %r337
-%r339 = zext i64 %r338 to i256
-%r340 = shl i256 %r339, 192
-%r341 = or i256 %r335, %r340
-%r342 = zext i256 %r341 to i320
-%r344 = getelementptr i64, i64* %r319, i32 4
-%r345 = load i64, i64* %r344
-%r346 = zext i64 %r345 to i320
-%r347 = shl i320 %r346, 256
-%r348 = or i320 %r342, %r347
-%r349 = zext i320 %r348 to i384
-%r351 = getelementptr i64, i64* %r319, i32 5
-%r352 = load i64, i64* %r351
-%r353 = zext i64 %r352 to i384
-%r354 = shl i384 %r353, 320
-%r355 = or i384 %r349, %r354
-%r356 = zext i384 %r355 to i448
-%r358 = getelementptr i64, i64* %r319, i32 6
-%r359 = load i64, i64* %r358
-%r360 = zext i64 %r359 to i448
-%r361 = shl i448 %r360, 384
-%r362 = or i448 %r356, %r361
-%r363 = zext i448 %r362 to i512
-%r365 = getelementptr i64, i64* %r319, i32 7
-%r366 = load i64, i64* %r365
-%r367 = zext i64 %r366 to i512
-%r368 = shl i512 %r367, 448
-%r369 = or i512 %r363, %r368
-%r370 = zext i512 %r369 to i576
-%r372 = getelementptr i64, i64* %r319, i32 8
-%r373 = load i64, i64* %r372
-%r374 = zext i64 %r373 to i576
-%r375 = shl i576 %r374, 512
-%r376 = or i576 %r370, %r375
-%r377 = zext i576 %r376 to i640
-%r379 = getelementptr i64, i64* %r319, i32 9
-%r380 = load i64, i64* %r379
-%r381 = zext i64 %r380 to i640
-%r382 = shl i640 %r381, 576
-%r383 = or i640 %r377, %r382
-%r384 = zext i640 %r383 to i704
-%r386 = getelementptr i64, i64* %r319, i32 10
-%r387 = load i64, i64* %r386
-%r388 = zext i64 %r387 to i704
-%r389 = shl i704 %r388, 640
-%r390 = or i704 %r384, %r389
-%r391 = zext i704 %r390 to i768
-%r393 = getelementptr i64, i64* %r319, i32 11
-%r394 = load i64, i64* %r393
-%r395 = zext i64 %r394 to i768
-%r396 = shl i768 %r395, 704
-%r397 = or i768 %r391, %r396
-%r398 = add i768 %r317, %r397
-%r400 = getelementptr i64, i64* %r1, i32 4
-%r401 = trunc i768 %r398 to i64
-%r403 = getelementptr i64, i64* %r400, i32 0
-store i64 %r401, i64* %r403
-%r404 = lshr i768 %r398, 64
-%r405 = trunc i768 %r404 to i64
-%r407 = getelementptr i64, i64* %r400, i32 1
-store i64 %r405, i64* %r407
-%r408 = lshr i768 %r404, 64
-%r409 = trunc i768 %r408 to i64
-%r411 = getelementptr i64, i64* %r400, i32 2
-store i64 %r409, i64* %r411
-%r412 = lshr i768 %r408, 64
-%r413 = trunc i768 %r412 to i64
-%r415 = getelementptr i64, i64* %r400, i32 3
-store i64 %r413, i64* %r415
-%r416 = lshr i768 %r412, 64
-%r417 = trunc i768 %r416 to i64
-%r419 = getelementptr i64, i64* %r400, i32 4
-store i64 %r417, i64* %r419
-%r420 = lshr i768 %r416, 64
-%r421 = trunc i768 %r420 to i64
-%r423 = getelementptr i64, i64* %r400, i32 5
-store i64 %r421, i64* %r423
-%r424 = lshr i768 %r420, 64
-%r425 = trunc i768 %r424 to i64
-%r427 = getelementptr i64, i64* %r400, i32 6
-store i64 %r425, i64* %r427
-%r428 = lshr i768 %r424, 64
-%r429 = trunc i768 %r428 to i64
-%r431 = getelementptr i64, i64* %r400, i32 7
-store i64 %r429, i64* %r431
-%r432 = lshr i768 %r428, 64
-%r433 = trunc i768 %r432 to i64
-%r435 = getelementptr i64, i64* %r400, i32 8
-store i64 %r433, i64* %r435
-%r436 = lshr i768 %r432, 64
-%r437 = trunc i768 %r436 to i64
-%r439 = getelementptr i64, i64* %r400, i32 9
-store i64 %r437, i64* %r439
-%r440 = lshr i768 %r436, 64
-%r441 = trunc i768 %r440 to i64
-%r443 = getelementptr i64, i64* %r400, i32 10
-store i64 %r441, i64* %r443
-%r444 = lshr i768 %r440, 64
-%r445 = trunc i768 %r444 to i64
-%r447 = getelementptr i64, i64* %r400, i32 11
-store i64 %r445, i64* %r447
-ret void
-}
-define void @mcl_fpDbl_sqrPre8L(i64* noalias  %r1, i64* noalias  %r2)
-{
-%r4 = getelementptr i64, i64* %r2, i32 4
-%r6 = getelementptr i64, i64* %r2, i32 4
-%r8 = getelementptr i64, i64* %r1, i32 8
-call void @mcl_fpDbl_mulPre4L(i64* %r1, i64* %r2, i64* %r2)
-call void @mcl_fpDbl_mulPre4L(i64* %r8, i64* %r4, i64* %r6)
-%r9 = load i64, i64* %r4
-%r10 = zext i64 %r9 to i128
-%r12 = getelementptr i64, i64* %r4, i32 1
-%r13 = load i64, i64* %r12
-%r14 = zext i64 %r13 to i128
-%r15 = shl i128 %r14, 64
-%r16 = or i128 %r10, %r15
-%r17 = zext i128 %r16 to i192
-%r19 = getelementptr i64, i64* %r4, i32 2
-%r20 = load i64, i64* %r19
-%r21 = zext i64 %r20 to i192
-%r22 = shl i192 %r21, 128
-%r23 = or i192 %r17, %r22
-%r24 = zext i192 %r23 to i256
-%r26 = getelementptr i64, i64* %r4, i32 3
-%r27 = load i64, i64* %r26
-%r28 = zext i64 %r27 to i256
-%r29 = shl i256 %r28, 192
-%r30 = or i256 %r24, %r29
-%r31 = zext i256 %r30 to i320
-%r32 = load i64, i64* %r2
-%r33 = zext i64 %r32 to i128
-%r35 = getelementptr i64, i64* %r2, i32 1
-%r36 = load i64, i64* %r35
-%r37 = zext i64 %r36 to i128
-%r38 = shl i128 %r37, 64
-%r39 = or i128 %r33, %r38
-%r40 = zext i128 %r39 to i192
-%r42 = getelementptr i64, i64* %r2, i32 2
-%r43 = load i64, i64* %r42
-%r44 = zext i64 %r43 to i192
-%r45 = shl i192 %r44, 128
-%r46 = or i192 %r40, %r45
-%r47 = zext i192 %r46 to i256
-%r49 = getelementptr i64, i64* %r2, i32 3
-%r50 = load i64, i64* %r49
-%r51 = zext i64 %r50 to i256
-%r52 = shl i256 %r51, 192
-%r53 = or i256 %r47, %r52
-%r54 = zext i256 %r53 to i320
-%r55 = load i64, i64* %r6
-%r56 = zext i64 %r55 to i128
-%r58 = getelementptr i64, i64* %r6, i32 1
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i128
-%r61 = shl i128 %r60, 64
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i192
-%r65 = getelementptr i64, i64* %r6, i32 2
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i192
-%r68 = shl i192 %r67, 128
-%r69 = or i192 %r63, %r68
-%r70 = zext i192 %r69 to i256
-%r72 = getelementptr i64, i64* %r6, i32 3
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i256
-%r75 = shl i256 %r74, 192
-%r76 = or i256 %r70, %r75
-%r77 = zext i256 %r76 to i320
-%r78 = load i64, i64* %r2
-%r79 = zext i64 %r78 to i128
-%r81 = getelementptr i64, i64* %r2, i32 1
-%r82 = load i64, i64* %r81
-%r83 = zext i64 %r82 to i128
-%r84 = shl i128 %r83, 64
-%r85 = or i128 %r79, %r84
-%r86 = zext i128 %r85 to i192
-%r88 = getelementptr i64, i64* %r2, i32 2
-%r89 = load i64, i64* %r88
-%r90 = zext i64 %r89 to i192
-%r91 = shl i192 %r90, 128
-%r92 = or i192 %r86, %r91
-%r93 = zext i192 %r92 to i256
-%r95 = getelementptr i64, i64* %r2, i32 3
-%r96 = load i64, i64* %r95
-%r97 = zext i64 %r96 to i256
-%r98 = shl i256 %r97, 192
-%r99 = or i256 %r93, %r98
-%r100 = zext i256 %r99 to i320
-%r101 = add i320 %r31, %r54
-%r102 = add i320 %r77, %r100
-%r104 = alloca i64, i32 8
-%r105 = trunc i320 %r101 to i256
-%r106 = trunc i320 %r102 to i256
-%r107 = lshr i320 %r101, 256
-%r108 = trunc i320 %r107 to i1
-%r109 = lshr i320 %r102, 256
-%r110 = trunc i320 %r109 to i1
-%r111 = and i1 %r108, %r110
-%r113 = select i1 %r108, i256 %r106, i256 0
-%r115 = select i1 %r110, i256 %r105, i256 0
-%r117 = alloca i64, i32 4
-%r119 = alloca i64, i32 4
-%r120 = trunc i256 %r105 to i64
-%r122 = getelementptr i64, i64* %r117, i32 0
-store i64 %r120, i64* %r122
-%r123 = lshr i256 %r105, 64
-%r124 = trunc i256 %r123 to i64
-%r126 = getelementptr i64, i64* %r117, i32 1
-store i64 %r124, i64* %r126
-%r127 = lshr i256 %r123, 64
-%r128 = trunc i256 %r127 to i64
-%r130 = getelementptr i64, i64* %r117, i32 2
-store i64 %r128, i64* %r130
-%r131 = lshr i256 %r127, 64
-%r132 = trunc i256 %r131 to i64
-%r134 = getelementptr i64, i64* %r117, i32 3
-store i64 %r132, i64* %r134
-%r135 = trunc i256 %r106 to i64
-%r137 = getelementptr i64, i64* %r119, i32 0
-store i64 %r135, i64* %r137
-%r138 = lshr i256 %r106, 64
-%r139 = trunc i256 %r138 to i64
-%r141 = getelementptr i64, i64* %r119, i32 1
-store i64 %r139, i64* %r141
-%r142 = lshr i256 %r138, 64
-%r143 = trunc i256 %r142 to i64
-%r145 = getelementptr i64, i64* %r119, i32 2
-store i64 %r143, i64* %r145
-%r146 = lshr i256 %r142, 64
-%r147 = trunc i256 %r146 to i64
-%r149 = getelementptr i64, i64* %r119, i32 3
-store i64 %r147, i64* %r149
-call void @mcl_fpDbl_mulPre4L(i64* %r104, i64* %r117, i64* %r119)
-%r150 = load i64, i64* %r104
-%r151 = zext i64 %r150 to i128
-%r153 = getelementptr i64, i64* %r104, i32 1
-%r154 = load i64, i64* %r153
-%r155 = zext i64 %r154 to i128
-%r156 = shl i128 %r155, 64
-%r157 = or i128 %r151, %r156
-%r158 = zext i128 %r157 to i192
-%r160 = getelementptr i64, i64* %r104, i32 2
-%r161 = load i64, i64* %r160
-%r162 = zext i64 %r161 to i192
-%r163 = shl i192 %r162, 128
-%r164 = or i192 %r158, %r163
-%r165 = zext i192 %r164 to i256
-%r167 = getelementptr i64, i64* %r104, i32 3
-%r168 = load i64, i64* %r167
-%r169 = zext i64 %r168 to i256
-%r170 = shl i256 %r169, 192
-%r171 = or i256 %r165, %r170
-%r172 = zext i256 %r171 to i320
-%r174 = getelementptr i64, i64* %r104, i32 4
-%r175 = load i64, i64* %r174
-%r176 = zext i64 %r175 to i320
-%r177 = shl i320 %r176, 256
-%r178 = or i320 %r172, %r177
-%r179 = zext i320 %r178 to i384
-%r181 = getelementptr i64, i64* %r104, i32 5
-%r182 = load i64, i64* %r181
-%r183 = zext i64 %r182 to i384
-%r184 = shl i384 %r183, 320
-%r185 = or i384 %r179, %r184
-%r186 = zext i384 %r185 to i448
-%r188 = getelementptr i64, i64* %r104, i32 6
-%r189 = load i64, i64* %r188
-%r190 = zext i64 %r189 to i448
-%r191 = shl i448 %r190, 384
-%r192 = or i448 %r186, %r191
-%r193 = zext i448 %r192 to i512
-%r195 = getelementptr i64, i64* %r104, i32 7
-%r196 = load i64, i64* %r195
-%r197 = zext i64 %r196 to i512
-%r198 = shl i512 %r197, 448
-%r199 = or i512 %r193, %r198
-%r200 = zext i512 %r199 to i576
-%r201 = zext i1 %r111 to i576
-%r202 = shl i576 %r201, 512
-%r203 = or i576 %r200, %r202
-%r204 = zext i256 %r113 to i576
-%r205 = zext i256 %r115 to i576
-%r206 = shl i576 %r204, 256
-%r207 = shl i576 %r205, 256
-%r208 = add i576 %r203, %r206
-%r209 = add i576 %r208, %r207
-%r210 = load i64, i64* %r1
-%r211 = zext i64 %r210 to i128
-%r213 = getelementptr i64, i64* %r1, i32 1
-%r214 = load i64, i64* %r213
-%r215 = zext i64 %r214 to i128
-%r216 = shl i128 %r215, 64
-%r217 = or i128 %r211, %r216
-%r218 = zext i128 %r217 to i192
-%r220 = getelementptr i64, i64* %r1, i32 2
-%r221 = load i64, i64* %r220
-%r222 = zext i64 %r221 to i192
-%r223 = shl i192 %r222, 128
-%r224 = or i192 %r218, %r223
-%r225 = zext i192 %r224 to i256
-%r227 = getelementptr i64, i64* %r1, i32 3
-%r228 = load i64, i64* %r227
-%r229 = zext i64 %r228 to i256
-%r230 = shl i256 %r229, 192
-%r231 = or i256 %r225, %r230
-%r232 = zext i256 %r231 to i320
-%r234 = getelementptr i64, i64* %r1, i32 4
-%r235 = load i64, i64* %r234
-%r236 = zext i64 %r235 to i320
-%r237 = shl i320 %r236, 256
-%r238 = or i320 %r232, %r237
-%r239 = zext i320 %r238 to i384
-%r241 = getelementptr i64, i64* %r1, i32 5
-%r242 = load i64, i64* %r241
-%r243 = zext i64 %r242 to i384
-%r244 = shl i384 %r243, 320
-%r245 = or i384 %r239, %r244
-%r246 = zext i384 %r245 to i448
-%r248 = getelementptr i64, i64* %r1, i32 6
-%r249 = load i64, i64* %r248
-%r250 = zext i64 %r249 to i448
-%r251 = shl i448 %r250, 384
-%r252 = or i448 %r246, %r251
-%r253 = zext i448 %r252 to i512
-%r255 = getelementptr i64, i64* %r1, i32 7
-%r256 = load i64, i64* %r255
-%r257 = zext i64 %r256 to i512
-%r258 = shl i512 %r257, 448
-%r259 = or i512 %r253, %r258
-%r260 = zext i512 %r259 to i576
-%r261 = sub i576 %r209, %r260
-%r263 = getelementptr i64, i64* %r1, i32 8
-%r264 = load i64, i64* %r263
-%r265 = zext i64 %r264 to i128
-%r267 = getelementptr i64, i64* %r263, i32 1
-%r268 = load i64, i64* %r267
-%r269 = zext i64 %r268 to i128
-%r270 = shl i128 %r269, 64
-%r271 = or i128 %r265, %r270
-%r272 = zext i128 %r271 to i192
-%r274 = getelementptr i64, i64* %r263, i32 2
-%r275 = load i64, i64* %r274
-%r276 = zext i64 %r275 to i192
-%r277 = shl i192 %r276, 128
-%r278 = or i192 %r272, %r277
-%r279 = zext i192 %r278 to i256
-%r281 = getelementptr i64, i64* %r263, i32 3
-%r282 = load i64, i64* %r281
-%r283 = zext i64 %r282 to i256
-%r284 = shl i256 %r283, 192
-%r285 = or i256 %r279, %r284
-%r286 = zext i256 %r285 to i320
-%r288 = getelementptr i64, i64* %r263, i32 4
-%r289 = load i64, i64* %r288
-%r290 = zext i64 %r289 to i320
-%r291 = shl i320 %r290, 256
-%r292 = or i320 %r286, %r291
-%r293 = zext i320 %r292 to i384
-%r295 = getelementptr i64, i64* %r263, i32 5
-%r296 = load i64, i64* %r295
-%r297 = zext i64 %r296 to i384
-%r298 = shl i384 %r297, 320
-%r299 = or i384 %r293, %r298
-%r300 = zext i384 %r299 to i448
-%r302 = getelementptr i64, i64* %r263, i32 6
-%r303 = load i64, i64* %r302
-%r304 = zext i64 %r303 to i448
-%r305 = shl i448 %r304, 384
-%r306 = or i448 %r300, %r305
-%r307 = zext i448 %r306 to i512
-%r309 = getelementptr i64, i64* %r263, i32 7
-%r310 = load i64, i64* %r309
-%r311 = zext i64 %r310 to i512
-%r312 = shl i512 %r311, 448
-%r313 = or i512 %r307, %r312
-%r314 = zext i512 %r313 to i576
-%r315 = sub i576 %r261, %r314
-%r316 = zext i576 %r315 to i768
-%r318 = getelementptr i64, i64* %r1, i32 4
-%r319 = load i64, i64* %r318
-%r320 = zext i64 %r319 to i128
-%r322 = getelementptr i64, i64* %r318, i32 1
-%r323 = load i64, i64* %r322
-%r324 = zext i64 %r323 to i128
-%r325 = shl i128 %r324, 64
-%r326 = or i128 %r320, %r325
-%r327 = zext i128 %r326 to i192
-%r329 = getelementptr i64, i64* %r318, i32 2
-%r330 = load i64, i64* %r329
-%r331 = zext i64 %r330 to i192
-%r332 = shl i192 %r331, 128
-%r333 = or i192 %r327, %r332
-%r334 = zext i192 %r333 to i256
-%r336 = getelementptr i64, i64* %r318, i32 3
-%r337 = load i64, i64* %r336
-%r338 = zext i64 %r337 to i256
-%r339 = shl i256 %r338, 192
-%r340 = or i256 %r334, %r339
-%r341 = zext i256 %r340 to i320
-%r343 = getelementptr i64, i64* %r318, i32 4
-%r344 = load i64, i64* %r343
-%r345 = zext i64 %r344 to i320
-%r346 = shl i320 %r345, 256
-%r347 = or i320 %r341, %r346
-%r348 = zext i320 %r347 to i384
-%r350 = getelementptr i64, i64* %r318, i32 5
-%r351 = load i64, i64* %r350
-%r352 = zext i64 %r351 to i384
-%r353 = shl i384 %r352, 320
-%r354 = or i384 %r348, %r353
-%r355 = zext i384 %r354 to i448
-%r357 = getelementptr i64, i64* %r318, i32 6
-%r358 = load i64, i64* %r357
-%r359 = zext i64 %r358 to i448
-%r360 = shl i448 %r359, 384
-%r361 = or i448 %r355, %r360
-%r362 = zext i448 %r361 to i512
-%r364 = getelementptr i64, i64* %r318, i32 7
-%r365 = load i64, i64* %r364
-%r366 = zext i64 %r365 to i512
-%r367 = shl i512 %r366, 448
-%r368 = or i512 %r362, %r367
-%r369 = zext i512 %r368 to i576
-%r371 = getelementptr i64, i64* %r318, i32 8
-%r372 = load i64, i64* %r371
-%r373 = zext i64 %r372 to i576
-%r374 = shl i576 %r373, 512
-%r375 = or i576 %r369, %r374
-%r376 = zext i576 %r375 to i640
-%r378 = getelementptr i64, i64* %r318, i32 9
-%r379 = load i64, i64* %r378
-%r380 = zext i64 %r379 to i640
-%r381 = shl i640 %r380, 576
-%r382 = or i640 %r376, %r381
-%r383 = zext i640 %r382 to i704
-%r385 = getelementptr i64, i64* %r318, i32 10
-%r386 = load i64, i64* %r385
-%r387 = zext i64 %r386 to i704
-%r388 = shl i704 %r387, 640
-%r389 = or i704 %r383, %r388
-%r390 = zext i704 %r389 to i768
-%r392 = getelementptr i64, i64* %r318, i32 11
-%r393 = load i64, i64* %r392
-%r394 = zext i64 %r393 to i768
-%r395 = shl i768 %r394, 704
-%r396 = or i768 %r390, %r395
-%r397 = add i768 %r316, %r396
-%r399 = getelementptr i64, i64* %r1, i32 4
-%r400 = trunc i768 %r397 to i64
-%r402 = getelementptr i64, i64* %r399, i32 0
-store i64 %r400, i64* %r402
-%r403 = lshr i768 %r397, 64
-%r404 = trunc i768 %r403 to i64
-%r406 = getelementptr i64, i64* %r399, i32 1
-store i64 %r404, i64* %r406
-%r407 = lshr i768 %r403, 64
-%r408 = trunc i768 %r407 to i64
-%r410 = getelementptr i64, i64* %r399, i32 2
-store i64 %r408, i64* %r410
-%r411 = lshr i768 %r407, 64
-%r412 = trunc i768 %r411 to i64
-%r414 = getelementptr i64, i64* %r399, i32 3
-store i64 %r412, i64* %r414
-%r415 = lshr i768 %r411, 64
-%r416 = trunc i768 %r415 to i64
-%r418 = getelementptr i64, i64* %r399, i32 4
-store i64 %r416, i64* %r418
-%r419 = lshr i768 %r415, 64
-%r420 = trunc i768 %r419 to i64
-%r422 = getelementptr i64, i64* %r399, i32 5
-store i64 %r420, i64* %r422
-%r423 = lshr i768 %r419, 64
-%r424 = trunc i768 %r423 to i64
-%r426 = getelementptr i64, i64* %r399, i32 6
-store i64 %r424, i64* %r426
-%r427 = lshr i768 %r423, 64
-%r428 = trunc i768 %r427 to i64
-%r430 = getelementptr i64, i64* %r399, i32 7
-store i64 %r428, i64* %r430
-%r431 = lshr i768 %r427, 64
-%r432 = trunc i768 %r431 to i64
-%r434 = getelementptr i64, i64* %r399, i32 8
-store i64 %r432, i64* %r434
-%r435 = lshr i768 %r431, 64
-%r436 = trunc i768 %r435 to i64
-%r438 = getelementptr i64, i64* %r399, i32 9
-store i64 %r436, i64* %r438
-%r439 = lshr i768 %r435, 64
-%r440 = trunc i768 %r439 to i64
-%r442 = getelementptr i64, i64* %r399, i32 10
-store i64 %r440, i64* %r442
-%r443 = lshr i768 %r439, 64
-%r444 = trunc i768 %r443 to i64
-%r446 = getelementptr i64, i64* %r399, i32 11
-store i64 %r444, i64* %r446
-ret void
-}
-define void @mcl_fp_mont8L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
-{
-%r6 = getelementptr i64, i64* %r4, i32 -1
-%r7 = load i64, i64* %r6
-%r9 = getelementptr i64, i64* %r3, i32 0
-%r10 = load i64, i64* %r9
-%r11 = call i576 @mulPv512x64(i64* %r2, i64 %r10)
-%r12 = zext i576 %r11 to i640
-%r13 = trunc i576 %r11 to i64
-%r14 = mul i64 %r13, %r7
-%r15 = call i576 @mulPv512x64(i64* %r4, i64 %r14)
-%r16 = zext i576 %r15 to i640
-%r17 = add i640 %r12, %r16
-%r18 = lshr i640 %r17, 64
-%r20 = getelementptr i64, i64* %r3, i32 1
-%r21 = load i64, i64* %r20
-%r22 = call i576 @mulPv512x64(i64* %r2, i64 %r21)
-%r23 = zext i576 %r22 to i640
-%r24 = add i640 %r18, %r23
-%r25 = trunc i640 %r24 to i64
-%r26 = mul i64 %r25, %r7
-%r27 = call i576 @mulPv512x64(i64* %r4, i64 %r26)
-%r28 = zext i576 %r27 to i640
-%r29 = add i640 %r24, %r28
-%r30 = lshr i640 %r29, 64
-%r32 = getelementptr i64, i64* %r3, i32 2
-%r33 = load i64, i64* %r32
-%r34 = call i576 @mulPv512x64(i64* %r2, i64 %r33)
-%r35 = zext i576 %r34 to i640
-%r36 = add i640 %r30, %r35
-%r37 = trunc i640 %r36 to i64
-%r38 = mul i64 %r37, %r7
-%r39 = call i576 @mulPv512x64(i64* %r4, i64 %r38)
-%r40 = zext i576 %r39 to i640
-%r41 = add i640 %r36, %r40
-%r42 = lshr i640 %r41, 64
-%r44 = getelementptr i64, i64* %r3, i32 3
-%r45 = load i64, i64* %r44
-%r46 = call i576 @mulPv512x64(i64* %r2, i64 %r45)
-%r47 = zext i576 %r46 to i640
-%r48 = add i640 %r42, %r47
-%r49 = trunc i640 %r48 to i64
-%r50 = mul i64 %r49, %r7
-%r51 = call i576 @mulPv512x64(i64* %r4, i64 %r50)
-%r52 = zext i576 %r51 to i640
-%r53 = add i640 %r48, %r52
-%r54 = lshr i640 %r53, 64
-%r56 = getelementptr i64, i64* %r3, i32 4
-%r57 = load i64, i64* %r56
-%r58 = call i576 @mulPv512x64(i64* %r2, i64 %r57)
-%r59 = zext i576 %r58 to i640
-%r60 = add i640 %r54, %r59
-%r61 = trunc i640 %r60 to i64
-%r62 = mul i64 %r61, %r7
-%r63 = call i576 @mulPv512x64(i64* %r4, i64 %r62)
-%r64 = zext i576 %r63 to i640
-%r65 = add i640 %r60, %r64
-%r66 = lshr i640 %r65, 64
-%r68 = getelementptr i64, i64* %r3, i32 5
-%r69 = load i64, i64* %r68
-%r70 = call i576 @mulPv512x64(i64* %r2, i64 %r69)
-%r71 = zext i576 %r70 to i640
-%r72 = add i640 %r66, %r71
-%r73 = trunc i640 %r72 to i64
-%r74 = mul i64 %r73, %r7
-%r75 = call i576 @mulPv512x64(i64* %r4, i64 %r74)
-%r76 = zext i576 %r75 to i640
-%r77 = add i640 %r72, %r76
-%r78 = lshr i640 %r77, 64
-%r80 = getelementptr i64, i64* %r3, i32 6
-%r81 = load i64, i64* %r80
-%r82 = call i576 @mulPv512x64(i64* %r2, i64 %r81)
-%r83 = zext i576 %r82 to i640
-%r84 = add i640 %r78, %r83
-%r85 = trunc i640 %r84 to i64
-%r86 = mul i64 %r85, %r7
-%r87 = call i576 @mulPv512x64(i64* %r4, i64 %r86)
-%r88 = zext i576 %r87 to i640
-%r89 = add i640 %r84, %r88
-%r90 = lshr i640 %r89, 64
-%r92 = getelementptr i64, i64* %r3, i32 7
-%r93 = load i64, i64* %r92
-%r94 = call i576 @mulPv512x64(i64* %r2, i64 %r93)
-%r95 = zext i576 %r94 to i640
-%r96 = add i640 %r90, %r95
-%r97 = trunc i640 %r96 to i64
-%r98 = mul i64 %r97, %r7
-%r99 = call i576 @mulPv512x64(i64* %r4, i64 %r98)
-%r100 = zext i576 %r99 to i640
-%r101 = add i640 %r96, %r100
-%r102 = lshr i640 %r101, 64
-%r103 = trunc i640 %r102 to i576
-%r104 = load i64, i64* %r4
-%r105 = zext i64 %r104 to i128
-%r107 = getelementptr i64, i64* %r4, i32 1
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i128
-%r110 = shl i128 %r109, 64
-%r111 = or i128 %r105, %r110
-%r112 = zext i128 %r111 to i192
-%r114 = getelementptr i64, i64* %r4, i32 2
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i192
-%r117 = shl i192 %r116, 128
-%r118 = or i192 %r112, %r117
-%r119 = zext i192 %r118 to i256
-%r121 = getelementptr i64, i64* %r4, i32 3
-%r122 = load i64, i64* %r121
-%r123 = zext i64 %r122 to i256
-%r124 = shl i256 %r123, 192
-%r125 = or i256 %r119, %r124
-%r126 = zext i256 %r125 to i320
-%r128 = getelementptr i64, i64* %r4, i32 4
-%r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i320
-%r131 = shl i320 %r130, 256
-%r132 = or i320 %r126, %r131
-%r133 = zext i320 %r132 to i384
-%r135 = getelementptr i64, i64* %r4, i32 5
-%r136 = load i64, i64* %r135
-%r137 = zext i64 %r136 to i384
-%r138 = shl i384 %r137, 320
-%r139 = or i384 %r133, %r138
-%r140 = zext i384 %r139 to i448
-%r142 = getelementptr i64, i64* %r4, i32 6
-%r143 = load i64, i64* %r142
-%r144 = zext i64 %r143 to i448
-%r145 = shl i448 %r144, 384
-%r146 = or i448 %r140, %r145
-%r147 = zext i448 %r146 to i512
-%r149 = getelementptr i64, i64* %r4, i32 7
-%r150 = load i64, i64* %r149
-%r151 = zext i64 %r150 to i512
-%r152 = shl i512 %r151, 448
-%r153 = or i512 %r147, %r152
-%r154 = zext i512 %r153 to i576
-%r155 = sub i576 %r103, %r154
-%r156 = lshr i576 %r155, 512
-%r157 = trunc i576 %r156 to i1
-%r158 = select i1 %r157, i576 %r103, i576 %r155
-%r159 = trunc i576 %r158 to i512
-%r160 = trunc i512 %r159 to i64
-%r162 = getelementptr i64, i64* %r1, i32 0
-store i64 %r160, i64* %r162
-%r163 = lshr i512 %r159, 64
-%r164 = trunc i512 %r163 to i64
-%r166 = getelementptr i64, i64* %r1, i32 1
-store i64 %r164, i64* %r166
-%r167 = lshr i512 %r163, 64
-%r168 = trunc i512 %r167 to i64
-%r170 = getelementptr i64, i64* %r1, i32 2
-store i64 %r168, i64* %r170
-%r171 = lshr i512 %r167, 64
-%r172 = trunc i512 %r171 to i64
-%r174 = getelementptr i64, i64* %r1, i32 3
-store i64 %r172, i64* %r174
-%r175 = lshr i512 %r171, 64
-%r176 = trunc i512 %r175 to i64
-%r178 = getelementptr i64, i64* %r1, i32 4
-store i64 %r176, i64* %r178
-%r179 = lshr i512 %r175, 64
-%r180 = trunc i512 %r179 to i64
-%r182 = getelementptr i64, i64* %r1, i32 5
-store i64 %r180, i64* %r182
-%r183 = lshr i512 %r179, 64
-%r184 = trunc i512 %r183 to i64
-%r186 = getelementptr i64, i64* %r1, i32 6
-store i64 %r184, i64* %r186
-%r187 = lshr i512 %r183, 64
-%r188 = trunc i512 %r187 to i64
-%r190 = getelementptr i64, i64* %r1, i32 7
-store i64 %r188, i64* %r190
-ret void
-}
-define void @mcl_fp_montNF8L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
-{
-%r6 = getelementptr i64, i64* %r4, i32 -1
-%r7 = load i64, i64* %r6
-%r8 = load i64, i64* %r3
-%r9 = call i576 @mulPv512x64(i64* %r2, i64 %r8)
-%r10 = trunc i576 %r9 to i64
-%r11 = mul i64 %r10, %r7
-%r12 = call i576 @mulPv512x64(i64* %r4, i64 %r11)
-%r13 = add i576 %r9, %r12
-%r14 = lshr i576 %r13, 64
-%r16 = getelementptr i64, i64* %r3, i32 1
-%r17 = load i64, i64* %r16
-%r18 = call i576 @mulPv512x64(i64* %r2, i64 %r17)
-%r19 = add i576 %r14, %r18
-%r20 = trunc i576 %r19 to i64
-%r21 = mul i64 %r20, %r7
-%r22 = call i576 @mulPv512x64(i64* %r4, i64 %r21)
-%r23 = add i576 %r19, %r22
-%r24 = lshr i576 %r23, 64
-%r26 = getelementptr i64, i64* %r3, i32 2
-%r27 = load i64, i64* %r26
-%r28 = call i576 @mulPv512x64(i64* %r2, i64 %r27)
-%r29 = add i576 %r24, %r28
-%r30 = trunc i576 %r29 to i64
-%r31 = mul i64 %r30, %r7
-%r32 = call i576 @mulPv512x64(i64* %r4, i64 %r31)
-%r33 = add i576 %r29, %r32
-%r34 = lshr i576 %r33, 64
-%r36 = getelementptr i64, i64* %r3, i32 3
-%r37 = load i64, i64* %r36
-%r38 = call i576 @mulPv512x64(i64* %r2, i64 %r37)
-%r39 = add i576 %r34, %r38
-%r40 = trunc i576 %r39 to i64
-%r41 = mul i64 %r40, %r7
-%r42 = call i576 @mulPv512x64(i64* %r4, i64 %r41)
-%r43 = add i576 %r39, %r42
-%r44 = lshr i576 %r43, 64
-%r46 = getelementptr i64, i64* %r3, i32 4
-%r47 = load i64, i64* %r46
-%r48 = call i576 @mulPv512x64(i64* %r2, i64 %r47)
-%r49 = add i576 %r44, %r48
-%r50 = trunc i576 %r49 to i64
-%r51 = mul i64 %r50, %r7
-%r52 = call i576 @mulPv512x64(i64* %r4, i64 %r51)
-%r53 = add i576 %r49, %r52
-%r54 = lshr i576 %r53, 64
-%r56 = getelementptr i64, i64* %r3, i32 5
-%r57 = load i64, i64* %r56
-%r58 = call i576 @mulPv512x64(i64* %r2, i64 %r57)
-%r59 = add i576 %r54, %r58
-%r60 = trunc i576 %r59 to i64
-%r61 = mul i64 %r60, %r7
-%r62 = call i576 @mulPv512x64(i64* %r4, i64 %r61)
-%r63 = add i576 %r59, %r62
-%r64 = lshr i576 %r63, 64
-%r66 = getelementptr i64, i64* %r3, i32 6
-%r67 = load i64, i64* %r66
-%r68 = call i576 @mulPv512x64(i64* %r2, i64 %r67)
-%r69 = add i576 %r64, %r68
-%r70 = trunc i576 %r69 to i64
-%r71 = mul i64 %r70, %r7
-%r72 = call i576 @mulPv512x64(i64* %r4, i64 %r71)
-%r73 = add i576 %r69, %r72
-%r74 = lshr i576 %r73, 64
-%r76 = getelementptr i64, i64* %r3, i32 7
-%r77 = load i64, i64* %r76
-%r78 = call i576 @mulPv512x64(i64* %r2, i64 %r77)
-%r79 = add i576 %r74, %r78
-%r80 = trunc i576 %r79 to i64
-%r81 = mul i64 %r80, %r7
-%r82 = call i576 @mulPv512x64(i64* %r4, i64 %r81)
-%r83 = add i576 %r79, %r82
-%r84 = lshr i576 %r83, 64
-%r85 = trunc i576 %r84 to i512
-%r86 = load i64, i64* %r4
-%r87 = zext i64 %r86 to i128
-%r89 = getelementptr i64, i64* %r4, i32 1
-%r90 = load i64, i64* %r89
-%r91 = zext i64 %r90 to i128
-%r92 = shl i128 %r91, 64
-%r93 = or i128 %r87, %r92
-%r94 = zext i128 %r93 to i192
-%r96 = getelementptr i64, i64* %r4, i32 2
-%r97 = load i64, i64* %r96
-%r98 = zext i64 %r97 to i192
-%r99 = shl i192 %r98, 128
-%r100 = or i192 %r94, %r99
-%r101 = zext i192 %r100 to i256
-%r103 = getelementptr i64, i64* %r4, i32 3
-%r104 = load i64, i64* %r103
-%r105 = zext i64 %r104 to i256
-%r106 = shl i256 %r105, 192
-%r107 = or i256 %r101, %r106
-%r108 = zext i256 %r107 to i320
-%r110 = getelementptr i64, i64* %r4, i32 4
-%r111 = load i64, i64* %r110
-%r112 = zext i64 %r111 to i320
-%r113 = shl i320 %r112, 256
-%r114 = or i320 %r108, %r113
-%r115 = zext i320 %r114 to i384
-%r117 = getelementptr i64, i64* %r4, i32 5
-%r118 = load i64, i64* %r117
-%r119 = zext i64 %r118 to i384
-%r120 = shl i384 %r119, 320
-%r121 = or i384 %r115, %r120
-%r122 = zext i384 %r121 to i448
-%r124 = getelementptr i64, i64* %r4, i32 6
-%r125 = load i64, i64* %r124
-%r126 = zext i64 %r125 to i448
-%r127 = shl i448 %r126, 384
-%r128 = or i448 %r122, %r127
-%r129 = zext i448 %r128 to i512
-%r131 = getelementptr i64, i64* %r4, i32 7
-%r132 = load i64, i64* %r131
-%r133 = zext i64 %r132 to i512
-%r134 = shl i512 %r133, 448
-%r135 = or i512 %r129, %r134
-%r136 = sub i512 %r85, %r135
-%r137 = lshr i512 %r136, 511
-%r138 = trunc i512 %r137 to i1
-%r139 = select i1 %r138, i512 %r85, i512 %r136
-%r140 = trunc i512 %r139 to i64
-%r142 = getelementptr i64, i64* %r1, i32 0
-store i64 %r140, i64* %r142
-%r143 = lshr i512 %r139, 64
-%r144 = trunc i512 %r143 to i64
-%r146 = getelementptr i64, i64* %r1, i32 1
-store i64 %r144, i64* %r146
-%r147 = lshr i512 %r143, 64
-%r148 = trunc i512 %r147 to i64
-%r150 = getelementptr i64, i64* %r1, i32 2
-store i64 %r148, i64* %r150
-%r151 = lshr i512 %r147, 64
-%r152 = trunc i512 %r151 to i64
-%r154 = getelementptr i64, i64* %r1, i32 3
-store i64 %r152, i64* %r154
-%r155 = lshr i512 %r151, 64
-%r156 = trunc i512 %r155 to i64
-%r158 = getelementptr i64, i64* %r1, i32 4
-store i64 %r156, i64* %r158
-%r159 = lshr i512 %r155, 64
-%r160 = trunc i512 %r159 to i64
-%r162 = getelementptr i64, i64* %r1, i32 5
-store i64 %r160, i64* %r162
-%r163 = lshr i512 %r159, 64
-%r164 = trunc i512 %r163 to i64
-%r166 = getelementptr i64, i64* %r1, i32 6
-store i64 %r164, i64* %r166
-%r167 = lshr i512 %r163, 64
-%r168 = trunc i512 %r167 to i64
-%r170 = getelementptr i64, i64* %r1, i32 7
-store i64 %r168, i64* %r170
-ret void
-}
-define void @mcl_fp_montRed8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
-{
-%r5 = getelementptr i64, i64* %r3, i32 -1
-%r6 = load i64, i64* %r5
-%r7 = load i64, i64* %r3
-%r8 = zext i64 %r7 to i128
-%r10 = getelementptr i64, i64* %r3, i32 1
-%r11 = load i64, i64* %r10
-%r12 = zext i64 %r11 to i128
-%r13 = shl i128 %r12, 64
-%r14 = or i128 %r8, %r13
-%r15 = zext i128 %r14 to i192
-%r17 = getelementptr i64, i64* %r3, i32 2
-%r18 = load i64, i64* %r17
-%r19 = zext i64 %r18 to i192
-%r20 = shl i192 %r19, 128
-%r21 = or i192 %r15, %r20
-%r22 = zext i192 %r21 to i256
-%r24 = getelementptr i64, i64* %r3, i32 3
-%r25 = load i64, i64* %r24
-%r26 = zext i64 %r25 to i256
-%r27 = shl i256 %r26, 192
-%r28 = or i256 %r22, %r27
-%r29 = zext i256 %r28 to i320
-%r31 = getelementptr i64, i64* %r3, i32 4
-%r32 = load i64, i64* %r31
-%r33 = zext i64 %r32 to i320
-%r34 = shl i320 %r33, 256
-%r35 = or i320 %r29, %r34
-%r36 = zext i320 %r35 to i384
-%r38 = getelementptr i64, i64* %r3, i32 5
-%r39 = load i64, i64* %r38
-%r40 = zext i64 %r39 to i384
-%r41 = shl i384 %r40, 320
-%r42 = or i384 %r36, %r41
-%r43 = zext i384 %r42 to i448
-%r45 = getelementptr i64, i64* %r3, i32 6
-%r46 = load i64, i64* %r45
-%r47 = zext i64 %r46 to i448
-%r48 = shl i448 %r47, 384
-%r49 = or i448 %r43, %r48
-%r50 = zext i448 %r49 to i512
-%r52 = getelementptr i64, i64* %r3, i32 7
-%r53 = load i64, i64* %r52
-%r54 = zext i64 %r53 to i512
-%r55 = shl i512 %r54, 448
-%r56 = or i512 %r50, %r55
-%r57 = load i64, i64* %r2
-%r58 = zext i64 %r57 to i128
-%r60 = getelementptr i64, i64* %r2, i32 1
-%r61 = load i64, i64* %r60
-%r62 = zext i64 %r61 to i128
-%r63 = shl i128 %r62, 64
-%r64 = or i128 %r58, %r63
-%r65 = zext i128 %r64 to i192
-%r67 = getelementptr i64, i64* %r2, i32 2
-%r68 = load i64, i64* %r67
-%r69 = zext i64 %r68 to i192
-%r70 = shl i192 %r69, 128
-%r71 = or i192 %r65, %r70
-%r72 = zext i192 %r71 to i256
-%r74 = getelementptr i64, i64* %r2, i32 3
-%r75 = load i64, i64* %r74
-%r76 = zext i64 %r75 to i256
-%r77 = shl i256 %r76, 192
-%r78 = or i256 %r72, %r77
-%r79 = zext i256 %r78 to i320
-%r81 = getelementptr i64, i64* %r2, i32 4
-%r82 = load i64, i64* %r81
-%r83 = zext i64 %r82 to i320
-%r84 = shl i320 %r83, 256
-%r85 = or i320 %r79, %r84
-%r86 = zext i320 %r85 to i384
-%r88 = getelementptr i64, i64* %r2, i32 5
-%r89 = load i64, i64* %r88
-%r90 = zext i64 %r89 to i384
-%r91 = shl i384 %r90, 320
-%r92 = or i384 %r86, %r91
-%r93 = zext i384 %r92 to i448
-%r95 = getelementptr i64, i64* %r2, i32 6
-%r96 = load i64, i64* %r95
-%r97 = zext i64 %r96 to i448
-%r98 = shl i448 %r97, 384
-%r99 = or i448 %r93, %r98
-%r100 = zext i448 %r99 to i512
-%r102 = getelementptr i64, i64* %r2, i32 7
-%r103 = load i64, i64* %r102
-%r104 = zext i64 %r103 to i512
-%r105 = shl i512 %r104, 448
-%r106 = or i512 %r100, %r105
-%r107 = zext i512 %r106 to i576
-%r109 = getelementptr i64, i64* %r2, i32 8
-%r110 = load i64, i64* %r109
-%r111 = zext i64 %r110 to i576
-%r112 = shl i576 %r111, 512
-%r113 = or i576 %r107, %r112
-%r114 = zext i576 %r113 to i640
-%r116 = getelementptr i64, i64* %r2, i32 9
-%r117 = load i64, i64* %r116
-%r118 = zext i64 %r117 to i640
-%r119 = shl i640 %r118, 576
-%r120 = or i640 %r114, %r119
-%r121 = zext i640 %r120 to i704
-%r123 = getelementptr i64, i64* %r2, i32 10
-%r124 = load i64, i64* %r123
-%r125 = zext i64 %r124 to i704
-%r126 = shl i704 %r125, 640
-%r127 = or i704 %r121, %r126
-%r128 = zext i704 %r127 to i768
-%r130 = getelementptr i64, i64* %r2, i32 11
-%r131 = load i64, i64* %r130
-%r132 = zext i64 %r131 to i768
-%r133 = shl i768 %r132, 704
-%r134 = or i768 %r128, %r133
-%r135 = zext i768 %r134 to i832
-%r137 = getelementptr i64, i64* %r2, i32 12
-%r138 = load i64, i64* %r137
-%r139 = zext i64 %r138 to i832
-%r140 = shl i832 %r139, 768
-%r141 = or i832 %r135, %r140
-%r142 = zext i832 %r141 to i896
-%r144 = getelementptr i64, i64* %r2, i32 13
-%r145 = load i64, i64* %r144
-%r146 = zext i64 %r145 to i896
-%r147 = shl i896 %r146, 832
-%r148 = or i896 %r142, %r147
-%r149 = zext i896 %r148 to i960
-%r151 = getelementptr i64, i64* %r2, i32 14
-%r152 = load i64, i64* %r151
-%r153 = zext i64 %r152 to i960
-%r154 = shl i960 %r153, 896
-%r155 = or i960 %r149, %r154
-%r156 = zext i960 %r155 to i1024
-%r158 = getelementptr i64, i64* %r2, i32 15
-%r159 = load i64, i64* %r158
-%r160 = zext i64 %r159 to i1024
-%r161 = shl i1024 %r160, 960
-%r162 = or i1024 %r156, %r161
-%r163 = zext i1024 %r162 to i1088
-%r164 = trunc i1088 %r163 to i64
-%r165 = mul i64 %r164, %r6
-%r166 = call i576 @mulPv512x64(i64* %r3, i64 %r165)
-%r167 = zext i576 %r166 to i1088
-%r168 = add i1088 %r163, %r167
-%r169 = lshr i1088 %r168, 64
-%r170 = trunc i1088 %r169 to i1024
-%r171 = trunc i1024 %r170 to i64
-%r172 = mul i64 %r171, %r6
-%r173 = call i576 @mulPv512x64(i64* %r3, i64 %r172)
-%r174 = zext i576 %r173 to i1024
-%r175 = add i1024 %r170, %r174
-%r176 = lshr i1024 %r175, 64
-%r177 = trunc i1024 %r176 to i960
-%r178 = trunc i960 %r177 to i64
-%r179 = mul i64 %r178, %r6
-%r180 = call i576 @mulPv512x64(i64* %r3, i64 %r179)
-%r181 = zext i576 %r180 to i960
-%r182 = add i960 %r177, %r181
-%r183 = lshr i960 %r182, 64
-%r184 = trunc i960 %r183 to i896
-%r185 = trunc i896 %r184 to i64
-%r186 = mul i64 %r185, %r6
-%r187 = call i576 @mulPv512x64(i64* %r3, i64 %r186)
-%r188 = zext i576 %r187 to i896
-%r189 = add i896 %r184, %r188
-%r190 = lshr i896 %r189, 64
-%r191 = trunc i896 %r190 to i832
-%r192 = trunc i832 %r191 to i64
-%r193 = mul i64 %r192, %r6
-%r194 = call i576 @mulPv512x64(i64* %r3, i64 %r193)
-%r195 = zext i576 %r194 to i832
-%r196 = add i832 %r191, %r195
-%r197 = lshr i832 %r196, 64
-%r198 = trunc i832 %r197 to i768
-%r199 = trunc i768 %r198 to i64
-%r200 = mul i64 %r199, %r6
-%r201 = call i576 @mulPv512x64(i64* %r3, i64 %r200)
-%r202 = zext i576 %r201 to i768
-%r203 = add i768 %r198, %r202
-%r204 = lshr i768 %r203, 64
-%r205 = trunc i768 %r204 to i704
-%r206 = trunc i704 %r205 to i64
-%r207 = mul i64 %r206, %r6
-%r208 = call i576 @mulPv512x64(i64* %r3, i64 %r207)
-%r209 = zext i576 %r208 to i704
-%r210 = add i704 %r205, %r209
-%r211 = lshr i704 %r210, 64
-%r212 = trunc i704 %r211 to i640
-%r213 = trunc i640 %r212 to i64
-%r214 = mul i64 %r213, %r6
-%r215 = call i576 @mulPv512x64(i64* %r3, i64 %r214)
-%r216 = zext i576 %r215 to i640
-%r217 = add i640 %r212, %r216
-%r218 = lshr i640 %r217, 64
-%r219 = trunc i640 %r218 to i576
-%r220 = zext i512 %r56 to i576
-%r221 = sub i576 %r219, %r220
-%r222 = lshr i576 %r221, 512
-%r223 = trunc i576 %r222 to i1
-%r224 = select i1 %r223, i576 %r219, i576 %r221
-%r225 = trunc i576 %r224 to i512
-%r226 = trunc i512 %r225 to i64
-%r228 = getelementptr i64, i64* %r1, i32 0
-store i64 %r226, i64* %r228
-%r229 = lshr i512 %r225, 64
-%r230 = trunc i512 %r229 to i64
-%r232 = getelementptr i64, i64* %r1, i32 1
-store i64 %r230, i64* %r232
-%r233 = lshr i512 %r229, 64
-%r234 = trunc i512 %r233 to i64
-%r236 = getelementptr i64, i64* %r1, i32 2
-store i64 %r234, i64* %r236
-%r237 = lshr i512 %r233, 64
-%r238 = trunc i512 %r237 to i64
-%r240 = getelementptr i64, i64* %r1, i32 3
-store i64 %r238, i64* %r240
-%r241 = lshr i512 %r237, 64
-%r242 = trunc i512 %r241 to i64
-%r244 = getelementptr i64, i64* %r1, i32 4
-store i64 %r242, i64* %r244
-%r245 = lshr i512 %r241, 64
-%r246 = trunc i512 %r245 to i64
-%r248 = getelementptr i64, i64* %r1, i32 5
-store i64 %r246, i64* %r248
-%r249 = lshr i512 %r245, 64
-%r250 = trunc i512 %r249 to i64
-%r252 = getelementptr i64, i64* %r1, i32 6
-store i64 %r250, i64* %r252
-%r253 = lshr i512 %r249, 64
-%r254 = trunc i512 %r253 to i64
-%r256 = getelementptr i64, i64* %r1, i32 7
-store i64 %r254, i64* %r256
-ret void
-}
-define i64 @mcl_fp_addPre8L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r3, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r3, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r3, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r3, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r3, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r3, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r3, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r56 = load i64, i64* %r4
-%r57 = zext i64 %r56 to i128
-%r59 = getelementptr i64, i64* %r4, i32 1
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i128
-%r62 = shl i128 %r61, 64
-%r63 = or i128 %r57, %r62
-%r64 = zext i128 %r63 to i192
-%r66 = getelementptr i64, i64* %r4, i32 2
-%r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i192
-%r69 = shl i192 %r68, 128
-%r70 = or i192 %r64, %r69
-%r71 = zext i192 %r70 to i256
-%r73 = getelementptr i64, i64* %r4, i32 3
-%r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i256
-%r76 = shl i256 %r75, 192
-%r77 = or i256 %r71, %r76
-%r78 = zext i256 %r77 to i320
-%r80 = getelementptr i64, i64* %r4, i32 4
-%r81 = load i64, i64* %r80
-%r82 = zext i64 %r81 to i320
-%r83 = shl i320 %r82, 256
-%r84 = or i320 %r78, %r83
-%r85 = zext i320 %r84 to i384
-%r87 = getelementptr i64, i64* %r4, i32 5
-%r88 = load i64, i64* %r87
-%r89 = zext i64 %r88 to i384
-%r90 = shl i384 %r89, 320
-%r91 = or i384 %r85, %r90
-%r92 = zext i384 %r91 to i448
-%r94 = getelementptr i64, i64* %r4, i32 6
-%r95 = load i64, i64* %r94
-%r96 = zext i64 %r95 to i448
-%r97 = shl i448 %r96, 384
-%r98 = or i448 %r92, %r97
-%r99 = zext i448 %r98 to i512
-%r101 = getelementptr i64, i64* %r4, i32 7
-%r102 = load i64, i64* %r101
-%r103 = zext i64 %r102 to i512
-%r104 = shl i512 %r103, 448
-%r105 = or i512 %r99, %r104
-%r106 = zext i512 %r105 to i576
-%r107 = add i576 %r55, %r106
-%r108 = trunc i576 %r107 to i512
-%r109 = trunc i512 %r108 to i64
-%r111 = getelementptr i64, i64* %r2, i32 0
-store i64 %r109, i64* %r111
-%r112 = lshr i512 %r108, 64
-%r113 = trunc i512 %r112 to i64
-%r115 = getelementptr i64, i64* %r2, i32 1
-store i64 %r113, i64* %r115
-%r116 = lshr i512 %r112, 64
-%r117 = trunc i512 %r116 to i64
-%r119 = getelementptr i64, i64* %r2, i32 2
-store i64 %r117, i64* %r119
-%r120 = lshr i512 %r116, 64
-%r121 = trunc i512 %r120 to i64
-%r123 = getelementptr i64, i64* %r2, i32 3
-store i64 %r121, i64* %r123
-%r124 = lshr i512 %r120, 64
-%r125 = trunc i512 %r124 to i64
-%r127 = getelementptr i64, i64* %r2, i32 4
-store i64 %r125, i64* %r127
-%r128 = lshr i512 %r124, 64
-%r129 = trunc i512 %r128 to i64
-%r131 = getelementptr i64, i64* %r2, i32 5
-store i64 %r129, i64* %r131
-%r132 = lshr i512 %r128, 64
-%r133 = trunc i512 %r132 to i64
-%r135 = getelementptr i64, i64* %r2, i32 6
-store i64 %r133, i64* %r135
-%r136 = lshr i512 %r132, 64
-%r137 = trunc i512 %r136 to i64
-%r139 = getelementptr i64, i64* %r2, i32 7
-store i64 %r137, i64* %r139
-%r140 = lshr i576 %r107, 512
-%r141 = trunc i576 %r140 to i64
-ret i64 %r141
-}
-define i64 @mcl_fp_subPre8L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r3
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r3, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r3, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r3, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r3, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r3, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r3, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r3, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r56 = load i64, i64* %r4
-%r57 = zext i64 %r56 to i128
-%r59 = getelementptr i64, i64* %r4, i32 1
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i128
-%r62 = shl i128 %r61, 64
-%r63 = or i128 %r57, %r62
-%r64 = zext i128 %r63 to i192
-%r66 = getelementptr i64, i64* %r4, i32 2
-%r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i192
-%r69 = shl i192 %r68, 128
-%r70 = or i192 %r64, %r69
-%r71 = zext i192 %r70 to i256
-%r73 = getelementptr i64, i64* %r4, i32 3
-%r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i256
-%r76 = shl i256 %r75, 192
-%r77 = or i256 %r71, %r76
-%r78 = zext i256 %r77 to i320
-%r80 = getelementptr i64, i64* %r4, i32 4
-%r81 = load i64, i64* %r80
-%r82 = zext i64 %r81 to i320
-%r83 = shl i320 %r82, 256
-%r84 = or i320 %r78, %r83
-%r85 = zext i320 %r84 to i384
-%r87 = getelementptr i64, i64* %r4, i32 5
-%r88 = load i64, i64* %r87
-%r89 = zext i64 %r88 to i384
-%r90 = shl i384 %r89, 320
-%r91 = or i384 %r85, %r90
-%r92 = zext i384 %r91 to i448
-%r94 = getelementptr i64, i64* %r4, i32 6
-%r95 = load i64, i64* %r94
-%r96 = zext i64 %r95 to i448
-%r97 = shl i448 %r96, 384
-%r98 = or i448 %r92, %r97
-%r99 = zext i448 %r98 to i512
-%r101 = getelementptr i64, i64* %r4, i32 7
-%r102 = load i64, i64* %r101
-%r103 = zext i64 %r102 to i512
-%r104 = shl i512 %r103, 448
-%r105 = or i512 %r99, %r104
-%r106 = zext i512 %r105 to i576
-%r107 = sub i576 %r55, %r106
-%r108 = trunc i576 %r107 to i512
-%r109 = trunc i512 %r108 to i64
-%r111 = getelementptr i64, i64* %r2, i32 0
-store i64 %r109, i64* %r111
-%r112 = lshr i512 %r108, 64
-%r113 = trunc i512 %r112 to i64
-%r115 = getelementptr i64, i64* %r2, i32 1
-store i64 %r113, i64* %r115
-%r116 = lshr i512 %r112, 64
-%r117 = trunc i512 %r116 to i64
-%r119 = getelementptr i64, i64* %r2, i32 2
-store i64 %r117, i64* %r119
-%r120 = lshr i512 %r116, 64
-%r121 = trunc i512 %r120 to i64
-%r123 = getelementptr i64, i64* %r2, i32 3
-store i64 %r121, i64* %r123
-%r124 = lshr i512 %r120, 64
-%r125 = trunc i512 %r124 to i64
-%r127 = getelementptr i64, i64* %r2, i32 4
-store i64 %r125, i64* %r127
-%r128 = lshr i512 %r124, 64
-%r129 = trunc i512 %r128 to i64
-%r131 = getelementptr i64, i64* %r2, i32 5
-store i64 %r129, i64* %r131
-%r132 = lshr i512 %r128, 64
-%r133 = trunc i512 %r132 to i64
-%r135 = getelementptr i64, i64* %r2, i32 6
-store i64 %r133, i64* %r135
-%r136 = lshr i512 %r132, 64
-%r137 = trunc i512 %r136 to i64
-%r139 = getelementptr i64, i64* %r2, i32 7
-store i64 %r137, i64* %r139
-%r140 = lshr i576 %r107, 512
-%r141 = trunc i576 %r140 to i64
-%r143 = and i64 %r141, 1
-ret i64 %r143
-}
-define void @mcl_fp_shr1_8L(i64* noalias  %r1, i64* noalias  %r2)
-{
-%r3 = load i64, i64* %r2
-%r4 = zext i64 %r3 to i128
-%r6 = getelementptr i64, i64* %r2, i32 1
-%r7 = load i64, i64* %r6
-%r8 = zext i64 %r7 to i128
-%r9 = shl i128 %r8, 64
-%r10 = or i128 %r4, %r9
-%r11 = zext i128 %r10 to i192
-%r13 = getelementptr i64, i64* %r2, i32 2
-%r14 = load i64, i64* %r13
-%r15 = zext i64 %r14 to i192
-%r16 = shl i192 %r15, 128
-%r17 = or i192 %r11, %r16
-%r18 = zext i192 %r17 to i256
-%r20 = getelementptr i64, i64* %r2, i32 3
-%r21 = load i64, i64* %r20
-%r22 = zext i64 %r21 to i256
-%r23 = shl i256 %r22, 192
-%r24 = or i256 %r18, %r23
-%r25 = zext i256 %r24 to i320
-%r27 = getelementptr i64, i64* %r2, i32 4
-%r28 = load i64, i64* %r27
-%r29 = zext i64 %r28 to i320
-%r30 = shl i320 %r29, 256
-%r31 = or i320 %r25, %r30
-%r32 = zext i320 %r31 to i384
-%r34 = getelementptr i64, i64* %r2, i32 5
-%r35 = load i64, i64* %r34
-%r36 = zext i64 %r35 to i384
-%r37 = shl i384 %r36, 320
-%r38 = or i384 %r32, %r37
-%r39 = zext i384 %r38 to i448
-%r41 = getelementptr i64, i64* %r2, i32 6
-%r42 = load i64, i64* %r41
-%r43 = zext i64 %r42 to i448
-%r44 = shl i448 %r43, 384
-%r45 = or i448 %r39, %r44
-%r46 = zext i448 %r45 to i512
-%r48 = getelementptr i64, i64* %r2, i32 7
-%r49 = load i64, i64* %r48
-%r50 = zext i64 %r49 to i512
-%r51 = shl i512 %r50, 448
-%r52 = or i512 %r46, %r51
-%r53 = lshr i512 %r52, 1
-%r54 = trunc i512 %r53 to i64
-%r56 = getelementptr i64, i64* %r1, i32 0
-store i64 %r54, i64* %r56
-%r57 = lshr i512 %r53, 64
-%r58 = trunc i512 %r57 to i64
-%r60 = getelementptr i64, i64* %r1, i32 1
-store i64 %r58, i64* %r60
-%r61 = lshr i512 %r57, 64
-%r62 = trunc i512 %r61 to i64
-%r64 = getelementptr i64, i64* %r1, i32 2
-store i64 %r62, i64* %r64
-%r65 = lshr i512 %r61, 64
-%r66 = trunc i512 %r65 to i64
-%r68 = getelementptr i64, i64* %r1, i32 3
-store i64 %r66, i64* %r68
-%r69 = lshr i512 %r65, 64
-%r70 = trunc i512 %r69 to i64
-%r72 = getelementptr i64, i64* %r1, i32 4
-store i64 %r70, i64* %r72
-%r73 = lshr i512 %r69, 64
-%r74 = trunc i512 %r73 to i64
-%r76 = getelementptr i64, i64* %r1, i32 5
-store i64 %r74, i64* %r76
-%r77 = lshr i512 %r73, 64
-%r78 = trunc i512 %r77 to i64
-%r80 = getelementptr i64, i64* %r1, i32 6
-store i64 %r78, i64* %r80
-%r81 = lshr i512 %r77, 64
-%r82 = trunc i512 %r81 to i64
-%r84 = getelementptr i64, i64* %r1, i32 7
-store i64 %r82, i64* %r84
-ret void
-}
-define void @mcl_fp_add8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = load i64, i64* %r3
-%r56 = zext i64 %r55 to i128
-%r58 = getelementptr i64, i64* %r3, i32 1
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i128
-%r61 = shl i128 %r60, 64
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i192
-%r65 = getelementptr i64, i64* %r3, i32 2
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i192
-%r68 = shl i192 %r67, 128
-%r69 = or i192 %r63, %r68
-%r70 = zext i192 %r69 to i256
-%r72 = getelementptr i64, i64* %r3, i32 3
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i256
-%r75 = shl i256 %r74, 192
-%r76 = or i256 %r70, %r75
-%r77 = zext i256 %r76 to i320
-%r79 = getelementptr i64, i64* %r3, i32 4
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i320
-%r82 = shl i320 %r81, 256
-%r83 = or i320 %r77, %r82
-%r84 = zext i320 %r83 to i384
-%r86 = getelementptr i64, i64* %r3, i32 5
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i384
-%r89 = shl i384 %r88, 320
-%r90 = or i384 %r84, %r89
-%r91 = zext i384 %r90 to i448
-%r93 = getelementptr i64, i64* %r3, i32 6
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i448
-%r96 = shl i448 %r95, 384
-%r97 = or i448 %r91, %r96
-%r98 = zext i448 %r97 to i512
-%r100 = getelementptr i64, i64* %r3, i32 7
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i512
-%r103 = shl i512 %r102, 448
-%r104 = or i512 %r98, %r103
-%r105 = zext i512 %r54 to i576
-%r106 = zext i512 %r104 to i576
-%r107 = add i576 %r105, %r106
-%r108 = trunc i576 %r107 to i512
-%r109 = trunc i512 %r108 to i64
-%r111 = getelementptr i64, i64* %r1, i32 0
-store i64 %r109, i64* %r111
-%r112 = lshr i512 %r108, 64
-%r113 = trunc i512 %r112 to i64
-%r115 = getelementptr i64, i64* %r1, i32 1
-store i64 %r113, i64* %r115
-%r116 = lshr i512 %r112, 64
-%r117 = trunc i512 %r116 to i64
-%r119 = getelementptr i64, i64* %r1, i32 2
-store i64 %r117, i64* %r119
-%r120 = lshr i512 %r116, 64
-%r121 = trunc i512 %r120 to i64
-%r123 = getelementptr i64, i64* %r1, i32 3
-store i64 %r121, i64* %r123
-%r124 = lshr i512 %r120, 64
-%r125 = trunc i512 %r124 to i64
-%r127 = getelementptr i64, i64* %r1, i32 4
-store i64 %r125, i64* %r127
-%r128 = lshr i512 %r124, 64
-%r129 = trunc i512 %r128 to i64
-%r131 = getelementptr i64, i64* %r1, i32 5
-store i64 %r129, i64* %r131
-%r132 = lshr i512 %r128, 64
-%r133 = trunc i512 %r132 to i64
-%r135 = getelementptr i64, i64* %r1, i32 6
-store i64 %r133, i64* %r135
-%r136 = lshr i512 %r132, 64
-%r137 = trunc i512 %r136 to i64
-%r139 = getelementptr i64, i64* %r1, i32 7
-store i64 %r137, i64* %r139
-%r140 = load i64, i64* %r4
-%r141 = zext i64 %r140 to i128
-%r143 = getelementptr i64, i64* %r4, i32 1
-%r144 = load i64, i64* %r143
-%r145 = zext i64 %r144 to i128
-%r146 = shl i128 %r145, 64
-%r147 = or i128 %r141, %r146
-%r148 = zext i128 %r147 to i192
-%r150 = getelementptr i64, i64* %r4, i32 2
-%r151 = load i64, i64* %r150
-%r152 = zext i64 %r151 to i192
-%r153 = shl i192 %r152, 128
-%r154 = or i192 %r148, %r153
-%r155 = zext i192 %r154 to i256
-%r157 = getelementptr i64, i64* %r4, i32 3
-%r158 = load i64, i64* %r157
-%r159 = zext i64 %r158 to i256
-%r160 = shl i256 %r159, 192
-%r161 = or i256 %r155, %r160
-%r162 = zext i256 %r161 to i320
-%r164 = getelementptr i64, i64* %r4, i32 4
-%r165 = load i64, i64* %r164
-%r166 = zext i64 %r165 to i320
-%r167 = shl i320 %r166, 256
-%r168 = or i320 %r162, %r167
-%r169 = zext i320 %r168 to i384
-%r171 = getelementptr i64, i64* %r4, i32 5
-%r172 = load i64, i64* %r171
-%r173 = zext i64 %r172 to i384
-%r174 = shl i384 %r173, 320
-%r175 = or i384 %r169, %r174
-%r176 = zext i384 %r175 to i448
-%r178 = getelementptr i64, i64* %r4, i32 6
-%r179 = load i64, i64* %r178
-%r180 = zext i64 %r179 to i448
-%r181 = shl i448 %r180, 384
-%r182 = or i448 %r176, %r181
-%r183 = zext i448 %r182 to i512
-%r185 = getelementptr i64, i64* %r4, i32 7
-%r186 = load i64, i64* %r185
-%r187 = zext i64 %r186 to i512
-%r188 = shl i512 %r187, 448
-%r189 = or i512 %r183, %r188
-%r190 = zext i512 %r189 to i576
-%r191 = sub i576 %r107, %r190
-%r192 = lshr i576 %r191, 512
-%r193 = trunc i576 %r192 to i1
-br i1%r193, label %carry, label %nocarry
-nocarry:
-%r194 = trunc i576 %r191 to i512
-%r195 = trunc i512 %r194 to i64
-%r197 = getelementptr i64, i64* %r1, i32 0
-store i64 %r195, i64* %r197
-%r198 = lshr i512 %r194, 64
-%r199 = trunc i512 %r198 to i64
-%r201 = getelementptr i64, i64* %r1, i32 1
-store i64 %r199, i64* %r201
-%r202 = lshr i512 %r198, 64
-%r203 = trunc i512 %r202 to i64
-%r205 = getelementptr i64, i64* %r1, i32 2
-store i64 %r203, i64* %r205
-%r206 = lshr i512 %r202, 64
-%r207 = trunc i512 %r206 to i64
-%r209 = getelementptr i64, i64* %r1, i32 3
-store i64 %r207, i64* %r209
-%r210 = lshr i512 %r206, 64
-%r211 = trunc i512 %r210 to i64
-%r213 = getelementptr i64, i64* %r1, i32 4
-store i64 %r211, i64* %r213
-%r214 = lshr i512 %r210, 64
-%r215 = trunc i512 %r214 to i64
-%r217 = getelementptr i64, i64* %r1, i32 5
-store i64 %r215, i64* %r217
-%r218 = lshr i512 %r214, 64
-%r219 = trunc i512 %r218 to i64
-%r221 = getelementptr i64, i64* %r1, i32 6
-store i64 %r219, i64* %r221
-%r222 = lshr i512 %r218, 64
-%r223 = trunc i512 %r222 to i64
-%r225 = getelementptr i64, i64* %r1, i32 7
-store i64 %r223, i64* %r225
-ret void
-carry:
-ret void
-}
-define void @mcl_fp_addNF8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = load i64, i64* %r3
-%r56 = zext i64 %r55 to i128
-%r58 = getelementptr i64, i64* %r3, i32 1
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i128
-%r61 = shl i128 %r60, 64
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i192
-%r65 = getelementptr i64, i64* %r3, i32 2
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i192
-%r68 = shl i192 %r67, 128
-%r69 = or i192 %r63, %r68
-%r70 = zext i192 %r69 to i256
-%r72 = getelementptr i64, i64* %r3, i32 3
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i256
-%r75 = shl i256 %r74, 192
-%r76 = or i256 %r70, %r75
-%r77 = zext i256 %r76 to i320
-%r79 = getelementptr i64, i64* %r3, i32 4
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i320
-%r82 = shl i320 %r81, 256
-%r83 = or i320 %r77, %r82
-%r84 = zext i320 %r83 to i384
-%r86 = getelementptr i64, i64* %r3, i32 5
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i384
-%r89 = shl i384 %r88, 320
-%r90 = or i384 %r84, %r89
-%r91 = zext i384 %r90 to i448
-%r93 = getelementptr i64, i64* %r3, i32 6
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i448
-%r96 = shl i448 %r95, 384
-%r97 = or i448 %r91, %r96
-%r98 = zext i448 %r97 to i512
-%r100 = getelementptr i64, i64* %r3, i32 7
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i512
-%r103 = shl i512 %r102, 448
-%r104 = or i512 %r98, %r103
-%r105 = add i512 %r54, %r104
-%r106 = load i64, i64* %r4
-%r107 = zext i64 %r106 to i128
-%r109 = getelementptr i64, i64* %r4, i32 1
-%r110 = load i64, i64* %r109
-%r111 = zext i64 %r110 to i128
-%r112 = shl i128 %r111, 64
-%r113 = or i128 %r107, %r112
-%r114 = zext i128 %r113 to i192
-%r116 = getelementptr i64, i64* %r4, i32 2
-%r117 = load i64, i64* %r116
-%r118 = zext i64 %r117 to i192
-%r119 = shl i192 %r118, 128
-%r120 = or i192 %r114, %r119
-%r121 = zext i192 %r120 to i256
-%r123 = getelementptr i64, i64* %r4, i32 3
-%r124 = load i64, i64* %r123
-%r125 = zext i64 %r124 to i256
-%r126 = shl i256 %r125, 192
-%r127 = or i256 %r121, %r126
-%r128 = zext i256 %r127 to i320
-%r130 = getelementptr i64, i64* %r4, i32 4
-%r131 = load i64, i64* %r130
-%r132 = zext i64 %r131 to i320
-%r133 = shl i320 %r132, 256
-%r134 = or i320 %r128, %r133
-%r135 = zext i320 %r134 to i384
-%r137 = getelementptr i64, i64* %r4, i32 5
-%r138 = load i64, i64* %r137
-%r139 = zext i64 %r138 to i384
-%r140 = shl i384 %r139, 320
-%r141 = or i384 %r135, %r140
-%r142 = zext i384 %r141 to i448
-%r144 = getelementptr i64, i64* %r4, i32 6
-%r145 = load i64, i64* %r144
-%r146 = zext i64 %r145 to i448
-%r147 = shl i448 %r146, 384
-%r148 = or i448 %r142, %r147
-%r149 = zext i448 %r148 to i512
-%r151 = getelementptr i64, i64* %r4, i32 7
-%r152 = load i64, i64* %r151
-%r153 = zext i64 %r152 to i512
-%r154 = shl i512 %r153, 448
-%r155 = or i512 %r149, %r154
-%r156 = sub i512 %r105, %r155
-%r157 = lshr i512 %r156, 511
-%r158 = trunc i512 %r157 to i1
-%r159 = select i1 %r158, i512 %r105, i512 %r156
-%r160 = trunc i512 %r159 to i64
-%r162 = getelementptr i64, i64* %r1, i32 0
-store i64 %r160, i64* %r162
-%r163 = lshr i512 %r159, 64
-%r164 = trunc i512 %r163 to i64
-%r166 = getelementptr i64, i64* %r1, i32 1
-store i64 %r164, i64* %r166
-%r167 = lshr i512 %r163, 64
-%r168 = trunc i512 %r167 to i64
-%r170 = getelementptr i64, i64* %r1, i32 2
-store i64 %r168, i64* %r170
-%r171 = lshr i512 %r167, 64
-%r172 = trunc i512 %r171 to i64
-%r174 = getelementptr i64, i64* %r1, i32 3
-store i64 %r172, i64* %r174
-%r175 = lshr i512 %r171, 64
-%r176 = trunc i512 %r175 to i64
-%r178 = getelementptr i64, i64* %r1, i32 4
-store i64 %r176, i64* %r178
-%r179 = lshr i512 %r175, 64
-%r180 = trunc i512 %r179 to i64
-%r182 = getelementptr i64, i64* %r1, i32 5
-store i64 %r180, i64* %r182
-%r183 = lshr i512 %r179, 64
-%r184 = trunc i512 %r183 to i64
-%r186 = getelementptr i64, i64* %r1, i32 6
-store i64 %r184, i64* %r186
-%r187 = lshr i512 %r183, 64
-%r188 = trunc i512 %r187 to i64
-%r190 = getelementptr i64, i64* %r1, i32 7
-store i64 %r188, i64* %r190
-ret void
-}
-define void @mcl_fp_sub8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = load i64, i64* %r3
-%r56 = zext i64 %r55 to i128
-%r58 = getelementptr i64, i64* %r3, i32 1
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i128
-%r61 = shl i128 %r60, 64
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i192
-%r65 = getelementptr i64, i64* %r3, i32 2
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i192
-%r68 = shl i192 %r67, 128
-%r69 = or i192 %r63, %r68
-%r70 = zext i192 %r69 to i256
-%r72 = getelementptr i64, i64* %r3, i32 3
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i256
-%r75 = shl i256 %r74, 192
-%r76 = or i256 %r70, %r75
-%r77 = zext i256 %r76 to i320
-%r79 = getelementptr i64, i64* %r3, i32 4
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i320
-%r82 = shl i320 %r81, 256
-%r83 = or i320 %r77, %r82
-%r84 = zext i320 %r83 to i384
-%r86 = getelementptr i64, i64* %r3, i32 5
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i384
-%r89 = shl i384 %r88, 320
-%r90 = or i384 %r84, %r89
-%r91 = zext i384 %r90 to i448
-%r93 = getelementptr i64, i64* %r3, i32 6
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i448
-%r96 = shl i448 %r95, 384
-%r97 = or i448 %r91, %r96
-%r98 = zext i448 %r97 to i512
-%r100 = getelementptr i64, i64* %r3, i32 7
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i512
-%r103 = shl i512 %r102, 448
-%r104 = or i512 %r98, %r103
-%r105 = zext i512 %r54 to i576
-%r106 = zext i512 %r104 to i576
-%r107 = sub i576 %r105, %r106
-%r108 = trunc i576 %r107 to i512
-%r109 = lshr i576 %r107, 512
-%r110 = trunc i576 %r109 to i1
-%r111 = trunc i512 %r108 to i64
-%r113 = getelementptr i64, i64* %r1, i32 0
-store i64 %r111, i64* %r113
-%r114 = lshr i512 %r108, 64
-%r115 = trunc i512 %r114 to i64
-%r117 = getelementptr i64, i64* %r1, i32 1
-store i64 %r115, i64* %r117
-%r118 = lshr i512 %r114, 64
-%r119 = trunc i512 %r118 to i64
-%r121 = getelementptr i64, i64* %r1, i32 2
-store i64 %r119, i64* %r121
-%r122 = lshr i512 %r118, 64
-%r123 = trunc i512 %r122 to i64
-%r125 = getelementptr i64, i64* %r1, i32 3
-store i64 %r123, i64* %r125
-%r126 = lshr i512 %r122, 64
-%r127 = trunc i512 %r126 to i64
-%r129 = getelementptr i64, i64* %r1, i32 4
-store i64 %r127, i64* %r129
-%r130 = lshr i512 %r126, 64
-%r131 = trunc i512 %r130 to i64
-%r133 = getelementptr i64, i64* %r1, i32 5
-store i64 %r131, i64* %r133
-%r134 = lshr i512 %r130, 64
-%r135 = trunc i512 %r134 to i64
-%r137 = getelementptr i64, i64* %r1, i32 6
-store i64 %r135, i64* %r137
-%r138 = lshr i512 %r134, 64
-%r139 = trunc i512 %r138 to i64
-%r141 = getelementptr i64, i64* %r1, i32 7
-store i64 %r139, i64* %r141
-br i1%r110, label %carry, label %nocarry
-nocarry:
-ret void
-carry:
-%r142 = load i64, i64* %r4
-%r143 = zext i64 %r142 to i128
-%r145 = getelementptr i64, i64* %r4, i32 1
-%r146 = load i64, i64* %r145
-%r147 = zext i64 %r146 to i128
-%r148 = shl i128 %r147, 64
-%r149 = or i128 %r143, %r148
-%r150 = zext i128 %r149 to i192
-%r152 = getelementptr i64, i64* %r4, i32 2
-%r153 = load i64, i64* %r152
-%r154 = zext i64 %r153 to i192
-%r155 = shl i192 %r154, 128
-%r156 = or i192 %r150, %r155
-%r157 = zext i192 %r156 to i256
-%r159 = getelementptr i64, i64* %r4, i32 3
-%r160 = load i64, i64* %r159
-%r161 = zext i64 %r160 to i256
-%r162 = shl i256 %r161, 192
-%r163 = or i256 %r157, %r162
-%r164 = zext i256 %r163 to i320
-%r166 = getelementptr i64, i64* %r4, i32 4
-%r167 = load i64, i64* %r166
-%r168 = zext i64 %r167 to i320
-%r169 = shl i320 %r168, 256
-%r170 = or i320 %r164, %r169
-%r171 = zext i320 %r170 to i384
-%r173 = getelementptr i64, i64* %r4, i32 5
-%r174 = load i64, i64* %r173
-%r175 = zext i64 %r174 to i384
-%r176 = shl i384 %r175, 320
-%r177 = or i384 %r171, %r176
-%r178 = zext i384 %r177 to i448
-%r180 = getelementptr i64, i64* %r4, i32 6
-%r181 = load i64, i64* %r180
-%r182 = zext i64 %r181 to i448
-%r183 = shl i448 %r182, 384
-%r184 = or i448 %r178, %r183
-%r185 = zext i448 %r184 to i512
-%r187 = getelementptr i64, i64* %r4, i32 7
-%r188 = load i64, i64* %r187
-%r189 = zext i64 %r188 to i512
-%r190 = shl i512 %r189, 448
-%r191 = or i512 %r185, %r190
-%r192 = add i512 %r108, %r191
-%r193 = trunc i512 %r192 to i64
-%r195 = getelementptr i64, i64* %r1, i32 0
-store i64 %r193, i64* %r195
-%r196 = lshr i512 %r192, 64
-%r197 = trunc i512 %r196 to i64
-%r199 = getelementptr i64, i64* %r1, i32 1
-store i64 %r197, i64* %r199
-%r200 = lshr i512 %r196, 64
-%r201 = trunc i512 %r200 to i64
-%r203 = getelementptr i64, i64* %r1, i32 2
-store i64 %r201, i64* %r203
-%r204 = lshr i512 %r200, 64
-%r205 = trunc i512 %r204 to i64
-%r207 = getelementptr i64, i64* %r1, i32 3
-store i64 %r205, i64* %r207
-%r208 = lshr i512 %r204, 64
-%r209 = trunc i512 %r208 to i64
-%r211 = getelementptr i64, i64* %r1, i32 4
-store i64 %r209, i64* %r211
-%r212 = lshr i512 %r208, 64
-%r213 = trunc i512 %r212 to i64
-%r215 = getelementptr i64, i64* %r1, i32 5
-store i64 %r213, i64* %r215
-%r216 = lshr i512 %r212, 64
-%r217 = trunc i512 %r216 to i64
-%r219 = getelementptr i64, i64* %r1, i32 6
-store i64 %r217, i64* %r219
-%r220 = lshr i512 %r216, 64
-%r221 = trunc i512 %r220 to i64
-%r223 = getelementptr i64, i64* %r1, i32 7
-store i64 %r221, i64* %r223
-ret void
-}
-define void @mcl_fp_subNF8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
-{
-%r5 = load i64, i64* %r2
-%r6 = zext i64 %r5 to i128
-%r8 = getelementptr i64, i64* %r2, i32 1
-%r9 = load i64, i64* %r8
-%r10 = zext i64 %r9 to i128
-%r11 = shl i128 %r10, 64
-%r12 = or i128 %r6, %r11
-%r13 = zext i128 %r12 to i192
-%r15 = getelementptr i64, i64* %r2, i32 2
-%r16 = load i64, i64* %r15
-%r17 = zext i64 %r16 to i192
-%r18 = shl i192 %r17, 128
-%r19 = or i192 %r13, %r18
-%r20 = zext i192 %r19 to i256
-%r22 = getelementptr i64, i64* %r2, i32 3
-%r23 = load i64, i64* %r22
-%r24 = zext i64 %r23 to i256
-%r25 = shl i256 %r24, 192
-%r26 = or i256 %r20, %r25
-%r27 = zext i256 %r26 to i320
-%r29 = getelementptr i64, i64* %r2, i32 4
-%r30 = load i64, i64* %r29
-%r31 = zext i64 %r30 to i320
-%r32 = shl i320 %r31, 256
-%r33 = or i320 %r27, %r32
-%r34 = zext i320 %r33 to i384
-%r36 = getelementptr i64, i64* %r2, i32 5
-%r37 = load i64, i64* %r36
-%r38 = zext i64 %r37 to i384
-%r39 = shl i384 %r38, 320
-%r40 = or i384 %r34, %r39
-%r41 = zext i384 %r40 to i448
-%r43 = getelementptr i64, i64* %r2, i32 6
-%r44 = load i64, i64* %r43
-%r45 = zext i64 %r44 to i448
-%r46 = shl i448 %r45, 384
-%r47 = or i448 %r41, %r46
-%r48 = zext i448 %r47 to i512
-%r50 = getelementptr i64, i64* %r2, i32 7
-%r51 = load i64, i64* %r50
-%r52 = zext i64 %r51 to i512
-%r53 = shl i512 %r52, 448
-%r54 = or i512 %r48, %r53
-%r55 = load i64, i64* %r3
-%r56 = zext i64 %r55 to i128
-%r58 = getelementptr i64, i64* %r3, i32 1
-%r59 = load i64, i64* %r58
-%r60 = zext i64 %r59 to i128
-%r61 = shl i128 %r60, 64
-%r62 = or i128 %r56, %r61
-%r63 = zext i128 %r62 to i192
-%r65 = getelementptr i64, i64* %r3, i32 2
-%r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i192
-%r68 = shl i192 %r67, 128
-%r69 = or i192 %r63, %r68
-%r70 = zext i192 %r69 to i256
-%r72 = getelementptr i64, i64* %r3, i32 3
-%r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i256
-%r75 = shl i256 %r74, 192
-%r76 = or i256 %r70, %r75
-%r77 = zext i256 %r76 to i320
-%r79 = getelementptr i64, i64* %r3, i32 4
-%r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i320
-%r82 = shl i320 %r81, 256
-%r83 = or i320 %r77, %r82
-%r84 = zext i320 %r83 to i384
-%r86 = getelementptr i64, i64* %r3, i32 5
-%r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i384
-%r89 = shl i384 %r88, 320
-%r90 = or i384 %r84, %r89
-%r91 = zext i384 %r90 to i448
-%r93 = getelementptr i64, i64* %r3, i32 6
-%r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i448
-%r96 = shl i448 %r95, 384
-%r97 = or i448 %r91, %r96
-%r98 = zext i448 %r97 to i512
-%r100 = getelementptr i64, i64* %r3, i32 7
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i512
-%r103 = shl i512 %r102, 448
-%r104 = or i512 %r98, %r103
-%r105 = sub i512 %r54, %r104
-%r106 = lshr i512 %r105, 511
-%r107 = trunc i512 %r106 to i1
-%r108 = load i64, i64* %r4
-%r109 = zext i64 %r108 to i128
-%r111 = getelementptr i64, i64* %r4, i32 1
-%r112 = load i64, i64* %r111
-%r113 = zext i64 %r112 to i128
-%r114 = shl i128 %r113, 64
-%r115 = or i128 %r109, %r114
-%r116 = zext i128 %r115 to i192
-%r118 = getelementptr i64, i64* %r4, i32 2
-%r119 = load i64, i64* %r118
-%r120 = zext i64 %r119 to i192
-%r121 = shl i192 %r120, 128
-%r122 = or i192 %r116, %r121
-%r123 = zext i192 %r122 to i256
-%r125 = getelementptr i64, i64* %r4, i32 3
-%r126 = load i64, i64* %r125
-%r127 = zext i64 %r126 to i256
-%r128 = shl i256 %r127, 192
-%r129 = or i256 %r123, %r128
-%r130 = zext i256 %r129 to i320
-%r132 = getelementptr i64, i64* %r4, i32 4
-%r133 = load i64, i64* %r132
-%r134 = zext i64 %r133 to i320
-%r135 = shl i320 %r134, 256
-%r136 = or i320 %r130, %r135
-%r137 = zext i320 %r136 to i384
-%r139 = getelementptr i64, i64* %r4, i32 5
-%r140 = load i64, i64* %r139
-%r141 = zext i64 %r140 to i384
-%r142 = shl i384 %r141, 320
-%r143 = or i384 %r137, %r142
-%r144 = zext i384 %r143 to i448
-%r146 = getelementptr i64, i64* %r4, i32 6
-%r147 = load i64, i64* %r146
-%r148 = zext i64 %r147 to i448
-%r149 = shl i448 %r148, 384
-%r150 = or i448 %r144, %r149
-%r151 = zext i448 %r150 to i512
-%r153 = getelementptr i64, i64* %r4, i32 7
-%r154 = load i64, i64* %r153
-%r155 = zext i64 %r154 to i512
-%r156 = shl i512 %r155, 448
-%r157 = or i512 %r151, %r156
-%r159 = select i1 %r107, i512 %r157, i512 0
-%r160 = add i512 %r105, %r159
-%r161 = trunc i512 %r160 to i64
-%r163 = getelementptr i64, i64* %r1, i32 0
-store i64 %r161, i64* %r163
-%r164 = lshr i512 %r160, 64
-%r165 = trunc i512 %r164 to i64
-%r167 = getelementptr i64, i64* %r1, i32 1
-store i64 %r165, i64* %r167
-%r168 = lshr i512 %r164, 64
-%r169 = trunc i512 %r168 to i64
-%r171 = getelementptr i64, i64* %r1, i32 2
-store i64 %r169, i64* %r171
-%r172 = lshr i512 %r168, 64
-%r173 = trunc i512 %r172 to i64
-%r175 = getelementptr i64, i64* %r1, i32 3
-store i64 %r173, i64* %r175
-%r176 = lshr i512 %r172, 64
-%r177 = trunc i512 %r176 to i64
-%r179 = getelementptr i64, i64* %r1, i32 4
-store i64 %r177, i64* %r179
-%r180 = lshr i512 %r176, 64
-%r181 = trunc i512 %r180 to i64
-%r183 = getelementptr i64, i64* %r1, i32 5
-store i64 %r181, i64* %r183
-%r184 = lshr i512 %r180, 64
-%r185 = trunc i512 %r184 to i64
-%r187 = getelementptr i64, i64* %r1, i32 6
-store i64 %r185, i64* %r187
-%r188 = lshr i512 %r184, 64
-%r189 = trunc i512 %r188 to i64
-%r191 = getelementptr i64, i64* %r1, i32 7
-store i64 %r189, i64* %r191
-ret void
-}
-define void @mcl_fpDbl_add8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_subNF6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+{
+%r5 = load i64, i64* %r2
+%r6 = zext i64 %r5 to i128
+%r8 = getelementptr i64, i64* %r2, i32 1
+%r9 = load i64, i64* %r8
+%r10 = zext i64 %r9 to i128
+%r11 = shl i128 %r10, 64
+%r12 = or i128 %r6, %r11
+%r13 = zext i128 %r12 to i192
+%r15 = getelementptr i64, i64* %r2, i32 2
+%r16 = load i64, i64* %r15
+%r17 = zext i64 %r16 to i192
+%r18 = shl i192 %r17, 128
+%r19 = or i192 %r13, %r18
+%r20 = zext i192 %r19 to i256
+%r22 = getelementptr i64, i64* %r2, i32 3
+%r23 = load i64, i64* %r22
+%r24 = zext i64 %r23 to i256
+%r25 = shl i256 %r24, 192
+%r26 = or i256 %r20, %r25
+%r27 = zext i256 %r26 to i320
+%r29 = getelementptr i64, i64* %r2, i32 4
+%r30 = load i64, i64* %r29
+%r31 = zext i64 %r30 to i320
+%r32 = shl i320 %r31, 256
+%r33 = or i320 %r27, %r32
+%r34 = zext i320 %r33 to i384
+%r36 = getelementptr i64, i64* %r2, i32 5
+%r37 = load i64, i64* %r36
+%r38 = zext i64 %r37 to i384
+%r39 = shl i384 %r38, 320
+%r40 = or i384 %r34, %r39
+%r41 = load i64, i64* %r3
+%r42 = zext i64 %r41 to i128
+%r44 = getelementptr i64, i64* %r3, i32 1
+%r45 = load i64, i64* %r44
+%r46 = zext i64 %r45 to i128
+%r47 = shl i128 %r46, 64
+%r48 = or i128 %r42, %r47
+%r49 = zext i128 %r48 to i192
+%r51 = getelementptr i64, i64* %r3, i32 2
+%r52 = load i64, i64* %r51
+%r53 = zext i64 %r52 to i192
+%r54 = shl i192 %r53, 128
+%r55 = or i192 %r49, %r54
+%r56 = zext i192 %r55 to i256
+%r58 = getelementptr i64, i64* %r3, i32 3
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i256
+%r61 = shl i256 %r60, 192
+%r62 = or i256 %r56, %r61
+%r63 = zext i256 %r62 to i320
+%r65 = getelementptr i64, i64* %r3, i32 4
+%r66 = load i64, i64* %r65
+%r67 = zext i64 %r66 to i320
+%r68 = shl i320 %r67, 256
+%r69 = or i320 %r63, %r68
+%r70 = zext i320 %r69 to i384
+%r72 = getelementptr i64, i64* %r3, i32 5
+%r73 = load i64, i64* %r72
+%r74 = zext i64 %r73 to i384
+%r75 = shl i384 %r74, 320
+%r76 = or i384 %r70, %r75
+%r77 = sub i384 %r40, %r76
+%r78 = lshr i384 %r77, 383
+%r79 = trunc i384 %r78 to i1
+%r80 = load i64, i64* %r4
+%r81 = zext i64 %r80 to i128
+%r83 = getelementptr i64, i64* %r4, i32 1
+%r84 = load i64, i64* %r83
+%r85 = zext i64 %r84 to i128
+%r86 = shl i128 %r85, 64
+%r87 = or i128 %r81, %r86
+%r88 = zext i128 %r87 to i192
+%r90 = getelementptr i64, i64* %r4, i32 2
+%r91 = load i64, i64* %r90
+%r92 = zext i64 %r91 to i192
+%r93 = shl i192 %r92, 128
+%r94 = or i192 %r88, %r93
+%r95 = zext i192 %r94 to i256
+%r97 = getelementptr i64, i64* %r4, i32 3
+%r98 = load i64, i64* %r97
+%r99 = zext i64 %r98 to i256
+%r100 = shl i256 %r99, 192
+%r101 = or i256 %r95, %r100
+%r102 = zext i256 %r101 to i320
+%r104 = getelementptr i64, i64* %r4, i32 4
+%r105 = load i64, i64* %r104
+%r106 = zext i64 %r105 to i320
+%r107 = shl i320 %r106, 256
+%r108 = or i320 %r102, %r107
+%r109 = zext i320 %r108 to i384
+%r111 = getelementptr i64, i64* %r4, i32 5
+%r112 = load i64, i64* %r111
+%r113 = zext i64 %r112 to i384
+%r114 = shl i384 %r113, 320
+%r115 = or i384 %r109, %r114
+%r117 = select i1 %r79, i384 %r115, i384 0
+%r118 = add i384 %r77, %r117
+%r120 = getelementptr i64, i64* %r1, i32 0
+%r121 = trunc i384 %r118 to i64
+store i64 %r121, i64* %r120
+%r122 = lshr i384 %r118, 64
+%r124 = getelementptr i64, i64* %r1, i32 1
+%r125 = trunc i384 %r122 to i64
+store i64 %r125, i64* %r124
+%r126 = lshr i384 %r122, 64
+%r128 = getelementptr i64, i64* %r1, i32 2
+%r129 = trunc i384 %r126 to i64
+store i64 %r129, i64* %r128
+%r130 = lshr i384 %r126, 64
+%r132 = getelementptr i64, i64* %r1, i32 3
+%r133 = trunc i384 %r130 to i64
+store i64 %r133, i64* %r132
+%r134 = lshr i384 %r130, 64
+%r136 = getelementptr i64, i64* %r1, i32 4
+%r137 = trunc i384 %r134 to i64
+store i64 %r137, i64* %r136
+%r138 = lshr i384 %r134, 64
+%r140 = getelementptr i64, i64* %r1, i32 5
+%r141 = trunc i384 %r138 to i64
+store i64 %r141, i64* %r140
+ret void
+}
+define void @mcl_fpDbl_add6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -11934,253 +4641,177 @@ define void @mcl_fpDbl_add8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r67 = shl i640 %r66, 576
 %r68 = or i640 %r62, %r67
 %r69 = zext i640 %r68 to i704
-%r71 = getelementptr i64, i64* %r2, i32 10
-%r72 = load i64, i64* %r71
-%r73 = zext i64 %r72 to i704
-%r74 = shl i704 %r73, 640
-%r75 = or i704 %r69, %r74
-%r76 = zext i704 %r75 to i768
-%r78 = getelementptr i64, i64* %r2, i32 11
-%r79 = load i64, i64* %r78
-%r80 = zext i64 %r79 to i768
-%r81 = shl i768 %r80, 704
-%r82 = or i768 %r76, %r81
-%r83 = zext i768 %r82 to i832
-%r85 = getelementptr i64, i64* %r2, i32 12
-%r86 = load i64, i64* %r85
-%r87 = zext i64 %r86 to i832
-%r88 = shl i832 %r87, 768
-%r89 = or i832 %r83, %r88
-%r90 = zext i832 %r89 to i896
-%r92 = getelementptr i64, i64* %r2, i32 13
-%r93 = load i64, i64* %r92
-%r94 = zext i64 %r93 to i896
-%r95 = shl i896 %r94, 832
-%r96 = or i896 %r90, %r95
-%r97 = zext i896 %r96 to i960
-%r99 = getelementptr i64, i64* %r2, i32 14
-%r100 = load i64, i64* %r99
-%r101 = zext i64 %r100 to i960
-%r102 = shl i960 %r101, 896
-%r103 = or i960 %r97, %r102
-%r104 = zext i960 %r103 to i1024
-%r106 = getelementptr i64, i64* %r2, i32 15
-%r107 = load i64, i64* %r106
-%r108 = zext i64 %r107 to i1024
-%r109 = shl i1024 %r108, 960
-%r110 = or i1024 %r104, %r109
-%r111 = load i64, i64* %r3
-%r112 = zext i64 %r111 to i128
-%r114 = getelementptr i64, i64* %r3, i32 1
+%r71 = getelementptr i64, i64* %r2, i32 10
+%r72 = load i64, i64* %r71
+%r73 = zext i64 %r72 to i704
+%r74 = shl i704 %r73, 640
+%r75 = or i704 %r69, %r74
+%r76 = zext i704 %r75 to i768
+%r78 = getelementptr i64, i64* %r2, i32 11
+%r79 = load i64, i64* %r78
+%r80 = zext i64 %r79 to i768
+%r81 = shl i768 %r80, 704
+%r82 = or i768 %r76, %r81
+%r83 = load i64, i64* %r3
+%r84 = zext i64 %r83 to i128
+%r86 = getelementptr i64, i64* %r3, i32 1
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i128
+%r89 = shl i128 %r88, 64
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i192
+%r93 = getelementptr i64, i64* %r3, i32 2
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i192
+%r96 = shl i192 %r95, 128
+%r97 = or i192 %r91, %r96
+%r98 = zext i192 %r97 to i256
+%r100 = getelementptr i64, i64* %r3, i32 3
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i256
+%r103 = shl i256 %r102, 192
+%r104 = or i256 %r98, %r103
+%r105 = zext i256 %r104 to i320
+%r107 = getelementptr i64, i64* %r3, i32 4
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i320
+%r110 = shl i320 %r109, 256
+%r111 = or i320 %r105, %r110
+%r112 = zext i320 %r111 to i384
+%r114 = getelementptr i64, i64* %r3, i32 5
 %r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i128
-%r117 = shl i128 %r116, 64
-%r118 = or i128 %r112, %r117
-%r119 = zext i128 %r118 to i192
-%r121 = getelementptr i64, i64* %r3, i32 2
+%r116 = zext i64 %r115 to i384
+%r117 = shl i384 %r116, 320
+%r118 = or i384 %r112, %r117
+%r119 = zext i384 %r118 to i448
+%r121 = getelementptr i64, i64* %r3, i32 6
 %r122 = load i64, i64* %r121
-%r123 = zext i64 %r122 to i192
-%r124 = shl i192 %r123, 128
-%r125 = or i192 %r119, %r124
-%r126 = zext i192 %r125 to i256
-%r128 = getelementptr i64, i64* %r3, i32 3
+%r123 = zext i64 %r122 to i448
+%r124 = shl i448 %r123, 384
+%r125 = or i448 %r119, %r124
+%r126 = zext i448 %r125 to i512
+%r128 = getelementptr i64, i64* %r3, i32 7
 %r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i256
-%r131 = shl i256 %r130, 192
-%r132 = or i256 %r126, %r131
-%r133 = zext i256 %r132 to i320
-%r135 = getelementptr i64, i64* %r3, i32 4
+%r130 = zext i64 %r129 to i512
+%r131 = shl i512 %r130, 448
+%r132 = or i512 %r126, %r131
+%r133 = zext i512 %r132 to i576
+%r135 = getelementptr i64, i64* %r3, i32 8
 %r136 = load i64, i64* %r135
-%r137 = zext i64 %r136 to i320
-%r138 = shl i320 %r137, 256
-%r139 = or i320 %r133, %r138
-%r140 = zext i320 %r139 to i384
-%r142 = getelementptr i64, i64* %r3, i32 5
+%r137 = zext i64 %r136 to i576
+%r138 = shl i576 %r137, 512
+%r139 = or i576 %r133, %r138
+%r140 = zext i576 %r139 to i640
+%r142 = getelementptr i64, i64* %r3, i32 9
 %r143 = load i64, i64* %r142
-%r144 = zext i64 %r143 to i384
-%r145 = shl i384 %r144, 320
-%r146 = or i384 %r140, %r145
-%r147 = zext i384 %r146 to i448
-%r149 = getelementptr i64, i64* %r3, i32 6
+%r144 = zext i64 %r143 to i640
+%r145 = shl i640 %r144, 576
+%r146 = or i640 %r140, %r145
+%r147 = zext i640 %r146 to i704
+%r149 = getelementptr i64, i64* %r3, i32 10
 %r150 = load i64, i64* %r149
-%r151 = zext i64 %r150 to i448
-%r152 = shl i448 %r151, 384
-%r153 = or i448 %r147, %r152
-%r154 = zext i448 %r153 to i512
-%r156 = getelementptr i64, i64* %r3, i32 7
+%r151 = zext i64 %r150 to i704
+%r152 = shl i704 %r151, 640
+%r153 = or i704 %r147, %r152
+%r154 = zext i704 %r153 to i768
+%r156 = getelementptr i64, i64* %r3, i32 11
 %r157 = load i64, i64* %r156
-%r158 = zext i64 %r157 to i512
-%r159 = shl i512 %r158, 448
-%r160 = or i512 %r154, %r159
-%r161 = zext i512 %r160 to i576
-%r163 = getelementptr i64, i64* %r3, i32 8
-%r164 = load i64, i64* %r163
-%r165 = zext i64 %r164 to i576
-%r166 = shl i576 %r165, 512
-%r167 = or i576 %r161, %r166
-%r168 = zext i576 %r167 to i640
-%r170 = getelementptr i64, i64* %r3, i32 9
-%r171 = load i64, i64* %r170
-%r172 = zext i64 %r171 to i640
-%r173 = shl i640 %r172, 576
-%r174 = or i640 %r168, %r173
-%r175 = zext i640 %r174 to i704
-%r177 = getelementptr i64, i64* %r3, i32 10
-%r178 = load i64, i64* %r177
-%r179 = zext i64 %r178 to i704
-%r180 = shl i704 %r179, 640
-%r181 = or i704 %r175, %r180
-%r182 = zext i704 %r181 to i768
-%r184 = getelementptr i64, i64* %r3, i32 11
-%r185 = load i64, i64* %r184
-%r186 = zext i64 %r185 to i768
-%r187 = shl i768 %r186, 704
-%r188 = or i768 %r182, %r187
-%r189 = zext i768 %r188 to i832
-%r191 = getelementptr i64, i64* %r3, i32 12
-%r192 = load i64, i64* %r191
-%r193 = zext i64 %r192 to i832
-%r194 = shl i832 %r193, 768
-%r195 = or i832 %r189, %r194
-%r196 = zext i832 %r195 to i896
-%r198 = getelementptr i64, i64* %r3, i32 13
-%r199 = load i64, i64* %r198
-%r200 = zext i64 %r199 to i896
-%r201 = shl i896 %r200, 832
-%r202 = or i896 %r196, %r201
-%r203 = zext i896 %r202 to i960
-%r205 = getelementptr i64, i64* %r3, i32 14
-%r206 = load i64, i64* %r205
-%r207 = zext i64 %r206 to i960
-%r208 = shl i960 %r207, 896
-%r209 = or i960 %r203, %r208
-%r210 = zext i960 %r209 to i1024
-%r212 = getelementptr i64, i64* %r3, i32 15
-%r213 = load i64, i64* %r212
-%r214 = zext i64 %r213 to i1024
-%r215 = shl i1024 %r214, 960
-%r216 = or i1024 %r210, %r215
-%r217 = zext i1024 %r110 to i1088
-%r218 = zext i1024 %r216 to i1088
-%r219 = add i1088 %r217, %r218
-%r220 = trunc i1088 %r219 to i512
-%r221 = trunc i512 %r220 to i64
-%r223 = getelementptr i64, i64* %r1, i32 0
-store i64 %r221, i64* %r223
-%r224 = lshr i512 %r220, 64
-%r225 = trunc i512 %r224 to i64
-%r227 = getelementptr i64, i64* %r1, i32 1
-store i64 %r225, i64* %r227
-%r228 = lshr i512 %r224, 64
-%r229 = trunc i512 %r228 to i64
-%r231 = getelementptr i64, i64* %r1, i32 2
-store i64 %r229, i64* %r231
-%r232 = lshr i512 %r228, 64
-%r233 = trunc i512 %r232 to i64
-%r235 = getelementptr i64, i64* %r1, i32 3
-store i64 %r233, i64* %r235
-%r236 = lshr i512 %r232, 64
-%r237 = trunc i512 %r236 to i64
-%r239 = getelementptr i64, i64* %r1, i32 4
-store i64 %r237, i64* %r239
-%r240 = lshr i512 %r236, 64
-%r241 = trunc i512 %r240 to i64
-%r243 = getelementptr i64, i64* %r1, i32 5
-store i64 %r241, i64* %r243
-%r244 = lshr i512 %r240, 64
-%r245 = trunc i512 %r244 to i64
-%r247 = getelementptr i64, i64* %r1, i32 6
-store i64 %r245, i64* %r247
-%r248 = lshr i512 %r244, 64
-%r249 = trunc i512 %r248 to i64
-%r251 = getelementptr i64, i64* %r1, i32 7
-store i64 %r249, i64* %r251
-%r252 = lshr i1088 %r219, 512
-%r253 = trunc i1088 %r252 to i576
-%r254 = load i64, i64* %r4
-%r255 = zext i64 %r254 to i128
-%r257 = getelementptr i64, i64* %r4, i32 1
-%r258 = load i64, i64* %r257
-%r259 = zext i64 %r258 to i128
-%r260 = shl i128 %r259, 64
-%r261 = or i128 %r255, %r260
-%r262 = zext i128 %r261 to i192
-%r264 = getelementptr i64, i64* %r4, i32 2
-%r265 = load i64, i64* %r264
-%r266 = zext i64 %r265 to i192
-%r267 = shl i192 %r266, 128
-%r268 = or i192 %r262, %r267
-%r269 = zext i192 %r268 to i256
-%r271 = getelementptr i64, i64* %r4, i32 3
-%r272 = load i64, i64* %r271
-%r273 = zext i64 %r272 to i256
-%r274 = shl i256 %r273, 192
-%r275 = or i256 %r269, %r274
-%r276 = zext i256 %r275 to i320
-%r278 = getelementptr i64, i64* %r4, i32 4
-%r279 = load i64, i64* %r278
-%r280 = zext i64 %r279 to i320
-%r281 = shl i320 %r280, 256
-%r282 = or i320 %r276, %r281
-%r283 = zext i320 %r282 to i384
-%r285 = getelementptr i64, i64* %r4, i32 5
-%r286 = load i64, i64* %r285
-%r287 = zext i64 %r286 to i384
-%r288 = shl i384 %r287, 320
-%r289 = or i384 %r283, %r288
-%r290 = zext i384 %r289 to i448
-%r292 = getelementptr i64, i64* %r4, i32 6
-%r293 = load i64, i64* %r292
-%r294 = zext i64 %r293 to i448
-%r295 = shl i448 %r294, 384
-%r296 = or i448 %r290, %r295
-%r297 = zext i448 %r296 to i512
-%r299 = getelementptr i64, i64* %r4, i32 7
-%r300 = load i64, i64* %r299
-%r301 = zext i64 %r300 to i512
-%r302 = shl i512 %r301, 448
-%r303 = or i512 %r297, %r302
-%r304 = zext i512 %r303 to i576
-%r305 = sub i576 %r253, %r304
-%r306 = lshr i576 %r305, 512
-%r307 = trunc i576 %r306 to i1
-%r308 = select i1 %r307, i576 %r253, i576 %r305
-%r309 = trunc i576 %r308 to i512
-%r311 = getelementptr i64, i64* %r1, i32 8
-%r312 = trunc i512 %r309 to i64
-%r314 = getelementptr i64, i64* %r311, i32 0
-store i64 %r312, i64* %r314
-%r315 = lshr i512 %r309, 64
-%r316 = trunc i512 %r315 to i64
-%r318 = getelementptr i64, i64* %r311, i32 1
-store i64 %r316, i64* %r318
-%r319 = lshr i512 %r315, 64
-%r320 = trunc i512 %r319 to i64
-%r322 = getelementptr i64, i64* %r311, i32 2
-store i64 %r320, i64* %r322
-%r323 = lshr i512 %r319, 64
-%r324 = trunc i512 %r323 to i64
-%r326 = getelementptr i64, i64* %r311, i32 3
-store i64 %r324, i64* %r326
-%r327 = lshr i512 %r323, 64
-%r328 = trunc i512 %r327 to i64
-%r330 = getelementptr i64, i64* %r311, i32 4
-store i64 %r328, i64* %r330
-%r331 = lshr i512 %r327, 64
-%r332 = trunc i512 %r331 to i64
-%r334 = getelementptr i64, i64* %r311, i32 5
-store i64 %r332, i64* %r334
-%r335 = lshr i512 %r331, 64
-%r336 = trunc i512 %r335 to i64
-%r338 = getelementptr i64, i64* %r311, i32 6
-store i64 %r336, i64* %r338
-%r339 = lshr i512 %r335, 64
-%r340 = trunc i512 %r339 to i64
-%r342 = getelementptr i64, i64* %r311, i32 7
-store i64 %r340, i64* %r342
+%r158 = zext i64 %r157 to i768
+%r159 = shl i768 %r158, 704
+%r160 = or i768 %r154, %r159
+%r161 = zext i768 %r82 to i832
+%r162 = zext i768 %r160 to i832
+%r163 = add i832 %r161, %r162
+%r164 = trunc i832 %r163 to i384
+%r166 = getelementptr i64, i64* %r1, i32 0
+%r167 = trunc i384 %r164 to i64
+store i64 %r167, i64* %r166
+%r168 = lshr i384 %r164, 64
+%r170 = getelementptr i64, i64* %r1, i32 1
+%r171 = trunc i384 %r168 to i64
+store i64 %r171, i64* %r170
+%r172 = lshr i384 %r168, 64
+%r174 = getelementptr i64, i64* %r1, i32 2
+%r175 = trunc i384 %r172 to i64
+store i64 %r175, i64* %r174
+%r176 = lshr i384 %r172, 64
+%r178 = getelementptr i64, i64* %r1, i32 3
+%r179 = trunc i384 %r176 to i64
+store i64 %r179, i64* %r178
+%r180 = lshr i384 %r176, 64
+%r182 = getelementptr i64, i64* %r1, i32 4
+%r183 = trunc i384 %r180 to i64
+store i64 %r183, i64* %r182
+%r184 = lshr i384 %r180, 64
+%r186 = getelementptr i64, i64* %r1, i32 5
+%r187 = trunc i384 %r184 to i64
+store i64 %r187, i64* %r186
+%r188 = lshr i832 %r163, 384
+%r189 = trunc i832 %r188 to i448
+%r190 = load i64, i64* %r4
+%r191 = zext i64 %r190 to i128
+%r193 = getelementptr i64, i64* %r4, i32 1
+%r194 = load i64, i64* %r193
+%r195 = zext i64 %r194 to i128
+%r196 = shl i128 %r195, 64
+%r197 = or i128 %r191, %r196
+%r198 = zext i128 %r197 to i192
+%r200 = getelementptr i64, i64* %r4, i32 2
+%r201 = load i64, i64* %r200
+%r202 = zext i64 %r201 to i192
+%r203 = shl i192 %r202, 128
+%r204 = or i192 %r198, %r203
+%r205 = zext i192 %r204 to i256
+%r207 = getelementptr i64, i64* %r4, i32 3
+%r208 = load i64, i64* %r207
+%r209 = zext i64 %r208 to i256
+%r210 = shl i256 %r209, 192
+%r211 = or i256 %r205, %r210
+%r212 = zext i256 %r211 to i320
+%r214 = getelementptr i64, i64* %r4, i32 4
+%r215 = load i64, i64* %r214
+%r216 = zext i64 %r215 to i320
+%r217 = shl i320 %r216, 256
+%r218 = or i320 %r212, %r217
+%r219 = zext i320 %r218 to i384
+%r221 = getelementptr i64, i64* %r4, i32 5
+%r222 = load i64, i64* %r221
+%r223 = zext i64 %r222 to i384
+%r224 = shl i384 %r223, 320
+%r225 = or i384 %r219, %r224
+%r226 = zext i384 %r225 to i448
+%r227 = sub i448 %r189, %r226
+%r228 = lshr i448 %r227, 384
+%r229 = trunc i448 %r228 to i1
+%r230 = select i1 %r229, i448 %r189, i448 %r227
+%r231 = trunc i448 %r230 to i384
+%r233 = getelementptr i64, i64* %r1, i32 6
+%r235 = getelementptr i64, i64* %r233, i32 0
+%r236 = trunc i384 %r231 to i64
+store i64 %r236, i64* %r235
+%r237 = lshr i384 %r231, 64
+%r239 = getelementptr i64, i64* %r233, i32 1
+%r240 = trunc i384 %r237 to i64
+store i64 %r240, i64* %r239
+%r241 = lshr i384 %r237, 64
+%r243 = getelementptr i64, i64* %r233, i32 2
+%r244 = trunc i384 %r241 to i64
+store i64 %r244, i64* %r243
+%r245 = lshr i384 %r241, 64
+%r247 = getelementptr i64, i64* %r233, i32 3
+%r248 = trunc i384 %r245 to i64
+store i64 %r248, i64* %r247
+%r249 = lshr i384 %r245, 64
+%r251 = getelementptr i64, i64* %r233, i32 4
+%r252 = trunc i384 %r249 to i64
+store i64 %r252, i64* %r251
+%r253 = lshr i384 %r249, 64
+%r255 = getelementptr i64, i64* %r233, i32 5
+%r256 = trunc i384 %r253 to i64
+store i64 %r256, i64* %r255
 ret void
 }
-define void @mcl_fpDbl_sub8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fpDbl_sub6L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -12249,240 +4880,164 @@ define void @mcl_fpDbl_sub8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r80 = zext i64 %r79 to i768
 %r81 = shl i768 %r80, 704
 %r82 = or i768 %r76, %r81
-%r83 = zext i768 %r82 to i832
-%r85 = getelementptr i64, i64* %r2, i32 12
-%r86 = load i64, i64* %r85
-%r87 = zext i64 %r86 to i832
-%r88 = shl i832 %r87, 768
-%r89 = or i832 %r83, %r88
-%r90 = zext i832 %r89 to i896
-%r92 = getelementptr i64, i64* %r2, i32 13
-%r93 = load i64, i64* %r92
-%r94 = zext i64 %r93 to i896
-%r95 = shl i896 %r94, 832
-%r96 = or i896 %r90, %r95
-%r97 = zext i896 %r96 to i960
-%r99 = getelementptr i64, i64* %r2, i32 14
-%r100 = load i64, i64* %r99
-%r101 = zext i64 %r100 to i960
-%r102 = shl i960 %r101, 896
-%r103 = or i960 %r97, %r102
-%r104 = zext i960 %r103 to i1024
-%r106 = getelementptr i64, i64* %r2, i32 15
-%r107 = load i64, i64* %r106
-%r108 = zext i64 %r107 to i1024
-%r109 = shl i1024 %r108, 960
-%r110 = or i1024 %r104, %r109
-%r111 = load i64, i64* %r3
-%r112 = zext i64 %r111 to i128
-%r114 = getelementptr i64, i64* %r3, i32 1
+%r83 = load i64, i64* %r3
+%r84 = zext i64 %r83 to i128
+%r86 = getelementptr i64, i64* %r3, i32 1
+%r87 = load i64, i64* %r86
+%r88 = zext i64 %r87 to i128
+%r89 = shl i128 %r88, 64
+%r90 = or i128 %r84, %r89
+%r91 = zext i128 %r90 to i192
+%r93 = getelementptr i64, i64* %r3, i32 2
+%r94 = load i64, i64* %r93
+%r95 = zext i64 %r94 to i192
+%r96 = shl i192 %r95, 128
+%r97 = or i192 %r91, %r96
+%r98 = zext i192 %r97 to i256
+%r100 = getelementptr i64, i64* %r3, i32 3
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i256
+%r103 = shl i256 %r102, 192
+%r104 = or i256 %r98, %r103
+%r105 = zext i256 %r104 to i320
+%r107 = getelementptr i64, i64* %r3, i32 4
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i320
+%r110 = shl i320 %r109, 256
+%r111 = or i320 %r105, %r110
+%r112 = zext i320 %r111 to i384
+%r114 = getelementptr i64, i64* %r3, i32 5
 %r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i128
-%r117 = shl i128 %r116, 64
-%r118 = or i128 %r112, %r117
-%r119 = zext i128 %r118 to i192
-%r121 = getelementptr i64, i64* %r3, i32 2
+%r116 = zext i64 %r115 to i384
+%r117 = shl i384 %r116, 320
+%r118 = or i384 %r112, %r117
+%r119 = zext i384 %r118 to i448
+%r121 = getelementptr i64, i64* %r3, i32 6
 %r122 = load i64, i64* %r121
-%r123 = zext i64 %r122 to i192
-%r124 = shl i192 %r123, 128
-%r125 = or i192 %r119, %r124
-%r126 = zext i192 %r125 to i256
-%r128 = getelementptr i64, i64* %r3, i32 3
+%r123 = zext i64 %r122 to i448
+%r124 = shl i448 %r123, 384
+%r125 = or i448 %r119, %r124
+%r126 = zext i448 %r125 to i512
+%r128 = getelementptr i64, i64* %r3, i32 7
 %r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i256
-%r131 = shl i256 %r130, 192
-%r132 = or i256 %r126, %r131
-%r133 = zext i256 %r132 to i320
-%r135 = getelementptr i64, i64* %r3, i32 4
+%r130 = zext i64 %r129 to i512
+%r131 = shl i512 %r130, 448
+%r132 = or i512 %r126, %r131
+%r133 = zext i512 %r132 to i576
+%r135 = getelementptr i64, i64* %r3, i32 8
 %r136 = load i64, i64* %r135
-%r137 = zext i64 %r136 to i320
-%r138 = shl i320 %r137, 256
-%r139 = or i320 %r133, %r138
-%r140 = zext i320 %r139 to i384
-%r142 = getelementptr i64, i64* %r3, i32 5
+%r137 = zext i64 %r136 to i576
+%r138 = shl i576 %r137, 512
+%r139 = or i576 %r133, %r138
+%r140 = zext i576 %r139 to i640
+%r142 = getelementptr i64, i64* %r3, i32 9
 %r143 = load i64, i64* %r142
-%r144 = zext i64 %r143 to i384
-%r145 = shl i384 %r144, 320
-%r146 = or i384 %r140, %r145
-%r147 = zext i384 %r146 to i448
-%r149 = getelementptr i64, i64* %r3, i32 6
+%r144 = zext i64 %r143 to i640
+%r145 = shl i640 %r144, 576
+%r146 = or i640 %r140, %r145
+%r147 = zext i640 %r146 to i704
+%r149 = getelementptr i64, i64* %r3, i32 10
 %r150 = load i64, i64* %r149
-%r151 = zext i64 %r150 to i448
-%r152 = shl i448 %r151, 384
-%r153 = or i448 %r147, %r152
-%r154 = zext i448 %r153 to i512
-%r156 = getelementptr i64, i64* %r3, i32 7
-%r157 = load i64, i64* %r156
-%r158 = zext i64 %r157 to i512
-%r159 = shl i512 %r158, 448
-%r160 = or i512 %r154, %r159
-%r161 = zext i512 %r160 to i576
-%r163 = getelementptr i64, i64* %r3, i32 8
-%r164 = load i64, i64* %r163
-%r165 = zext i64 %r164 to i576
-%r166 = shl i576 %r165, 512
-%r167 = or i576 %r161, %r166
-%r168 = zext i576 %r167 to i640
-%r170 = getelementptr i64, i64* %r3, i32 9
-%r171 = load i64, i64* %r170
-%r172 = zext i64 %r171 to i640
-%r173 = shl i640 %r172, 576
-%r174 = or i640 %r168, %r173
-%r175 = zext i640 %r174 to i704
-%r177 = getelementptr i64, i64* %r3, i32 10
-%r178 = load i64, i64* %r177
-%r179 = zext i64 %r178 to i704
-%r180 = shl i704 %r179, 640
-%r181 = or i704 %r175, %r180
-%r182 = zext i704 %r181 to i768
-%r184 = getelementptr i64, i64* %r3, i32 11
-%r185 = load i64, i64* %r184
-%r186 = zext i64 %r185 to i768
-%r187 = shl i768 %r186, 704
-%r188 = or i768 %r182, %r187
-%r189 = zext i768 %r188 to i832
-%r191 = getelementptr i64, i64* %r3, i32 12
-%r192 = load i64, i64* %r191
-%r193 = zext i64 %r192 to i832
-%r194 = shl i832 %r193, 768
-%r195 = or i832 %r189, %r194
-%r196 = zext i832 %r195 to i896
-%r198 = getelementptr i64, i64* %r3, i32 13
-%r199 = load i64, i64* %r198
-%r200 = zext i64 %r199 to i896
-%r201 = shl i896 %r200, 832
-%r202 = or i896 %r196, %r201
-%r203 = zext i896 %r202 to i960
-%r205 = getelementptr i64, i64* %r3, i32 14
-%r206 = load i64, i64* %r205
-%r207 = zext i64 %r206 to i960
-%r208 = shl i960 %r207, 896
-%r209 = or i960 %r203, %r208
-%r210 = zext i960 %r209 to i1024
-%r212 = getelementptr i64, i64* %r3, i32 15
-%r213 = load i64, i64* %r212
-%r214 = zext i64 %r213 to i1024
-%r215 = shl i1024 %r214, 960
-%r216 = or i1024 %r210, %r215
-%r217 = zext i1024 %r110 to i1088
-%r218 = zext i1024 %r216 to i1088
-%r219 = sub i1088 %r217, %r218
-%r220 = trunc i1088 %r219 to i512
-%r221 = trunc i512 %r220 to i64
-%r223 = getelementptr i64, i64* %r1, i32 0
-store i64 %r221, i64* %r223
-%r224 = lshr i512 %r220, 64
-%r225 = trunc i512 %r224 to i64
-%r227 = getelementptr i64, i64* %r1, i32 1
-store i64 %r225, i64* %r227
-%r228 = lshr i512 %r224, 64
-%r229 = trunc i512 %r228 to i64
-%r231 = getelementptr i64, i64* %r1, i32 2
-store i64 %r229, i64* %r231
-%r232 = lshr i512 %r228, 64
-%r233 = trunc i512 %r232 to i64
-%r235 = getelementptr i64, i64* %r1, i32 3
-store i64 %r233, i64* %r235
-%r236 = lshr i512 %r232, 64
-%r237 = trunc i512 %r236 to i64
-%r239 = getelementptr i64, i64* %r1, i32 4
-store i64 %r237, i64* %r239
-%r240 = lshr i512 %r236, 64
-%r241 = trunc i512 %r240 to i64
-%r243 = getelementptr i64, i64* %r1, i32 5
-store i64 %r241, i64* %r243
-%r244 = lshr i512 %r240, 64
-%r245 = trunc i512 %r244 to i64
-%r247 = getelementptr i64, i64* %r1, i32 6
-store i64 %r245, i64* %r247
-%r248 = lshr i512 %r244, 64
-%r249 = trunc i512 %r248 to i64
-%r251 = getelementptr i64, i64* %r1, i32 7
-store i64 %r249, i64* %r251
-%r252 = lshr i1088 %r219, 512
-%r253 = trunc i1088 %r252 to i512
-%r254 = lshr i1088 %r219, 1024
-%r255 = trunc i1088 %r254 to i1
-%r256 = load i64, i64* %r4
-%r257 = zext i64 %r256 to i128
-%r259 = getelementptr i64, i64* %r4, i32 1
-%r260 = load i64, i64* %r259
-%r261 = zext i64 %r260 to i128
-%r262 = shl i128 %r261, 64
-%r263 = or i128 %r257, %r262
-%r264 = zext i128 %r263 to i192
-%r266 = getelementptr i64, i64* %r4, i32 2
-%r267 = load i64, i64* %r266
-%r268 = zext i64 %r267 to i192
-%r269 = shl i192 %r268, 128
-%r270 = or i192 %r264, %r269
-%r271 = zext i192 %r270 to i256
-%r273 = getelementptr i64, i64* %r4, i32 3
-%r274 = load i64, i64* %r273
-%r275 = zext i64 %r274 to i256
-%r276 = shl i256 %r275, 192
-%r277 = or i256 %r271, %r276
-%r278 = zext i256 %r277 to i320
-%r280 = getelementptr i64, i64* %r4, i32 4
-%r281 = load i64, i64* %r280
-%r282 = zext i64 %r281 to i320
-%r283 = shl i320 %r282, 256
-%r284 = or i320 %r278, %r283
-%r285 = zext i320 %r284 to i384
-%r287 = getelementptr i64, i64* %r4, i32 5
-%r288 = load i64, i64* %r287
-%r289 = zext i64 %r288 to i384
-%r290 = shl i384 %r289, 320
-%r291 = or i384 %r285, %r290
-%r292 = zext i384 %r291 to i448
-%r294 = getelementptr i64, i64* %r4, i32 6
-%r295 = load i64, i64* %r294
-%r296 = zext i64 %r295 to i448
-%r297 = shl i448 %r296, 384
-%r298 = or i448 %r292, %r297
-%r299 = zext i448 %r298 to i512
-%r301 = getelementptr i64, i64* %r4, i32 7
-%r302 = load i64, i64* %r301
-%r303 = zext i64 %r302 to i512
-%r304 = shl i512 %r303, 448
-%r305 = or i512 %r299, %r304
-%r307 = select i1 %r255, i512 %r305, i512 0
-%r308 = add i512 %r253, %r307
-%r310 = getelementptr i64, i64* %r1, i32 8
-%r311 = trunc i512 %r308 to i64
-%r313 = getelementptr i64, i64* %r310, i32 0
-store i64 %r311, i64* %r313
-%r314 = lshr i512 %r308, 64
-%r315 = trunc i512 %r314 to i64
-%r317 = getelementptr i64, i64* %r310, i32 1
-store i64 %r315, i64* %r317
-%r318 = lshr i512 %r314, 64
-%r319 = trunc i512 %r318 to i64
-%r321 = getelementptr i64, i64* %r310, i32 2
-store i64 %r319, i64* %r321
-%r322 = lshr i512 %r318, 64
-%r323 = trunc i512 %r322 to i64
-%r325 = getelementptr i64, i64* %r310, i32 3
-store i64 %r323, i64* %r325
-%r326 = lshr i512 %r322, 64
-%r327 = trunc i512 %r326 to i64
-%r329 = getelementptr i64, i64* %r310, i32 4
-store i64 %r327, i64* %r329
-%r330 = lshr i512 %r326, 64
-%r331 = trunc i512 %r330 to i64
-%r333 = getelementptr i64, i64* %r310, i32 5
-store i64 %r331, i64* %r333
-%r334 = lshr i512 %r330, 64
-%r335 = trunc i512 %r334 to i64
-%r337 = getelementptr i64, i64* %r310, i32 6
-store i64 %r335, i64* %r337
-%r338 = lshr i512 %r334, 64
-%r339 = trunc i512 %r338 to i64
-%r341 = getelementptr i64, i64* %r310, i32 7
-store i64 %r339, i64* %r341
+%r151 = zext i64 %r150 to i704
+%r152 = shl i704 %r151, 640
+%r153 = or i704 %r147, %r152
+%r154 = zext i704 %r153 to i768
+%r156 = getelementptr i64, i64* %r3, i32 11
+%r157 = load i64, i64* %r156
+%r158 = zext i64 %r157 to i768
+%r159 = shl i768 %r158, 704
+%r160 = or i768 %r154, %r159
+%r161 = zext i768 %r82 to i832
+%r162 = zext i768 %r160 to i832
+%r163 = sub i832 %r161, %r162
+%r164 = trunc i832 %r163 to i384
+%r166 = getelementptr i64, i64* %r1, i32 0
+%r167 = trunc i384 %r164 to i64
+store i64 %r167, i64* %r166
+%r168 = lshr i384 %r164, 64
+%r170 = getelementptr i64, i64* %r1, i32 1
+%r171 = trunc i384 %r168 to i64
+store i64 %r171, i64* %r170
+%r172 = lshr i384 %r168, 64
+%r174 = getelementptr i64, i64* %r1, i32 2
+%r175 = trunc i384 %r172 to i64
+store i64 %r175, i64* %r174
+%r176 = lshr i384 %r172, 64
+%r178 = getelementptr i64, i64* %r1, i32 3
+%r179 = trunc i384 %r176 to i64
+store i64 %r179, i64* %r178
+%r180 = lshr i384 %r176, 64
+%r182 = getelementptr i64, i64* %r1, i32 4
+%r183 = trunc i384 %r180 to i64
+store i64 %r183, i64* %r182
+%r184 = lshr i384 %r180, 64
+%r186 = getelementptr i64, i64* %r1, i32 5
+%r187 = trunc i384 %r184 to i64
+store i64 %r187, i64* %r186
+%r188 = lshr i832 %r163, 384
+%r189 = trunc i832 %r188 to i384
+%r190 = lshr i832 %r163, 768
+%r191 = trunc i832 %r190 to i1
+%r192 = load i64, i64* %r4
+%r193 = zext i64 %r192 to i128
+%r195 = getelementptr i64, i64* %r4, i32 1
+%r196 = load i64, i64* %r195
+%r197 = zext i64 %r196 to i128
+%r198 = shl i128 %r197, 64
+%r199 = or i128 %r193, %r198
+%r200 = zext i128 %r199 to i192
+%r202 = getelementptr i64, i64* %r4, i32 2
+%r203 = load i64, i64* %r202
+%r204 = zext i64 %r203 to i192
+%r205 = shl i192 %r204, 128
+%r206 = or i192 %r200, %r205
+%r207 = zext i192 %r206 to i256
+%r209 = getelementptr i64, i64* %r4, i32 3
+%r210 = load i64, i64* %r209
+%r211 = zext i64 %r210 to i256
+%r212 = shl i256 %r211, 192
+%r213 = or i256 %r207, %r212
+%r214 = zext i256 %r213 to i320
+%r216 = getelementptr i64, i64* %r4, i32 4
+%r217 = load i64, i64* %r216
+%r218 = zext i64 %r217 to i320
+%r219 = shl i320 %r218, 256
+%r220 = or i320 %r214, %r219
+%r221 = zext i320 %r220 to i384
+%r223 = getelementptr i64, i64* %r4, i32 5
+%r224 = load i64, i64* %r223
+%r225 = zext i64 %r224 to i384
+%r226 = shl i384 %r225, 320
+%r227 = or i384 %r221, %r226
+%r229 = select i1 %r191, i384 %r227, i384 0
+%r230 = add i384 %r189, %r229
+%r232 = getelementptr i64, i64* %r1, i32 6
+%r234 = getelementptr i64, i64* %r232, i32 0
+%r235 = trunc i384 %r230 to i64
+store i64 %r235, i64* %r234
+%r236 = lshr i384 %r230, 64
+%r238 = getelementptr i64, i64* %r232, i32 1
+%r239 = trunc i384 %r236 to i64
+store i64 %r239, i64* %r238
+%r240 = lshr i384 %r236, 64
+%r242 = getelementptr i64, i64* %r232, i32 2
+%r243 = trunc i384 %r240 to i64
+store i64 %r243, i64* %r242
+%r244 = lshr i384 %r240, 64
+%r246 = getelementptr i64, i64* %r232, i32 3
+%r247 = trunc i384 %r244 to i64
+store i64 %r247, i64* %r246
+%r248 = lshr i384 %r244, 64
+%r250 = getelementptr i64, i64* %r232, i32 4
+%r251 = trunc i384 %r248 to i64
+store i64 %r251, i64* %r250
+%r252 = lshr i384 %r248, 64
+%r254 = getelementptr i64, i64* %r232, i32 5
+%r255 = trunc i384 %r252 to i64
+store i64 %r255, i64* %r254
 ret void
 }
-define i640 @mulPv576x64(i64* noalias  %r2, i64 %r3)
+define i576 @mulPv512x64(i64* noalias  %r2, i64 %r3)
 {
 %r5 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 0)
 %r6 = trunc i128 %r5 to i64
@@ -12508,711 +5063,919 @@ define i640 @mulPv576x64(i64* noalias  %r2, i64 %r3)
 %r33 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 7)
 %r34 = trunc i128 %r33 to i64
 %r35 = call i64 @extractHigh64(i128 %r33)
-%r37 = call i128 @mulPos64x64(i64* %r2, i64 %r3, i64 8)
-%r38 = trunc i128 %r37 to i64
-%r39 = call i64 @extractHigh64(i128 %r37)
-%r40 = zext i64 %r6 to i128
-%r41 = zext i64 %r10 to i128
-%r42 = shl i128 %r41, 64
-%r43 = or i128 %r40, %r42
-%r44 = zext i128 %r43 to i192
-%r45 = zext i64 %r14 to i192
-%r46 = shl i192 %r45, 128
-%r47 = or i192 %r44, %r46
-%r48 = zext i192 %r47 to i256
-%r49 = zext i64 %r18 to i256
-%r50 = shl i256 %r49, 192
-%r51 = or i256 %r48, %r50
-%r52 = zext i256 %r51 to i320
-%r53 = zext i64 %r22 to i320
-%r54 = shl i320 %r53, 256
-%r55 = or i320 %r52, %r54
-%r56 = zext i320 %r55 to i384
-%r57 = zext i64 %r26 to i384
-%r58 = shl i384 %r57, 320
-%r59 = or i384 %r56, %r58
-%r60 = zext i384 %r59 to i448
-%r61 = zext i64 %r30 to i448
-%r62 = shl i448 %r61, 384
-%r63 = or i448 %r60, %r62
-%r64 = zext i448 %r63 to i512
-%r65 = zext i64 %r34 to i512
-%r66 = shl i512 %r65, 448
-%r67 = or i512 %r64, %r66
-%r68 = zext i512 %r67 to i576
-%r69 = zext i64 %r38 to i576
-%r70 = shl i576 %r69, 512
-%r71 = or i576 %r68, %r70
-%r72 = zext i64 %r7 to i128
-%r73 = zext i64 %r11 to i128
-%r74 = shl i128 %r73, 64
-%r75 = or i128 %r72, %r74
-%r76 = zext i128 %r75 to i192
-%r77 = zext i64 %r15 to i192
-%r78 = shl i192 %r77, 128
-%r79 = or i192 %r76, %r78
-%r80 = zext i192 %r79 to i256
-%r81 = zext i64 %r19 to i256
-%r82 = shl i256 %r81, 192
-%r83 = or i256 %r80, %r82
-%r84 = zext i256 %r83 to i320
-%r85 = zext i64 %r23 to i320
-%r86 = shl i320 %r85, 256
-%r87 = or i320 %r84, %r86
-%r88 = zext i320 %r87 to i384
-%r89 = zext i64 %r27 to i384
-%r90 = shl i384 %r89, 320
-%r91 = or i384 %r88, %r90
-%r92 = zext i384 %r91 to i448
-%r93 = zext i64 %r31 to i448
-%r94 = shl i448 %r93, 384
-%r95 = or i448 %r92, %r94
-%r96 = zext i448 %r95 to i512
-%r97 = zext i64 %r35 to i512
-%r98 = shl i512 %r97, 448
-%r99 = or i512 %r96, %r98
-%r100 = zext i512 %r99 to i576
-%r101 = zext i64 %r39 to i576
-%r102 = shl i576 %r101, 512
-%r103 = or i576 %r100, %r102
-%r104 = zext i576 %r71 to i640
-%r105 = zext i576 %r103 to i640
-%r106 = shl i640 %r105, 64
-%r107 = add i640 %r104, %r106
-ret i640 %r107
+%r36 = zext i64 %r6 to i128
+%r37 = zext i64 %r10 to i128
+%r38 = shl i128 %r37, 64
+%r39 = or i128 %r36, %r38
+%r40 = zext i128 %r39 to i192
+%r41 = zext i64 %r14 to i192
+%r42 = shl i192 %r41, 128
+%r43 = or i192 %r40, %r42
+%r44 = zext i192 %r43 to i256
+%r45 = zext i64 %r18 to i256
+%r46 = shl i256 %r45, 192
+%r47 = or i256 %r44, %r46
+%r48 = zext i256 %r47 to i320
+%r49 = zext i64 %r22 to i320
+%r50 = shl i320 %r49, 256
+%r51 = or i320 %r48, %r50
+%r52 = zext i320 %r51 to i384
+%r53 = zext i64 %r26 to i384
+%r54 = shl i384 %r53, 320
+%r55 = or i384 %r52, %r54
+%r56 = zext i384 %r55 to i448
+%r57 = zext i64 %r30 to i448
+%r58 = shl i448 %r57, 384
+%r59 = or i448 %r56, %r58
+%r60 = zext i448 %r59 to i512
+%r61 = zext i64 %r34 to i512
+%r62 = shl i512 %r61, 448
+%r63 = or i512 %r60, %r62
+%r64 = zext i64 %r7 to i128
+%r65 = zext i64 %r11 to i128
+%r66 = shl i128 %r65, 64
+%r67 = or i128 %r64, %r66
+%r68 = zext i128 %r67 to i192
+%r69 = zext i64 %r15 to i192
+%r70 = shl i192 %r69, 128
+%r71 = or i192 %r68, %r70
+%r72 = zext i192 %r71 to i256
+%r73 = zext i64 %r19 to i256
+%r74 = shl i256 %r73, 192
+%r75 = or i256 %r72, %r74
+%r76 = zext i256 %r75 to i320
+%r77 = zext i64 %r23 to i320
+%r78 = shl i320 %r77, 256
+%r79 = or i320 %r76, %r78
+%r80 = zext i320 %r79 to i384
+%r81 = zext i64 %r27 to i384
+%r82 = shl i384 %r81, 320
+%r83 = or i384 %r80, %r82
+%r84 = zext i384 %r83 to i448
+%r85 = zext i64 %r31 to i448
+%r86 = shl i448 %r85, 384
+%r87 = or i448 %r84, %r86
+%r88 = zext i448 %r87 to i512
+%r89 = zext i64 %r35 to i512
+%r90 = shl i512 %r89, 448
+%r91 = or i512 %r88, %r90
+%r92 = zext i512 %r63 to i576
+%r93 = zext i512 %r91 to i576
+%r94 = shl i576 %r93, 64
+%r95 = add i576 %r92, %r94
+ret i576 %r95
 }
-define void @mcl_fp_mulUnitPre9L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
+define void @mcl_fp_mulUnitPre8L(i64* noalias  %r1, i64* noalias  %r2, i64 %r3)
 {
-%r4 = call i640 @mulPv576x64(i64* %r2, i64 %r3)
-%r5 = trunc i640 %r4 to i64
-%r7 = getelementptr i64, i64* %r1, i32 0
-store i64 %r5, i64* %r7
-%r8 = lshr i640 %r4, 64
-%r9 = trunc i640 %r8 to i64
-%r11 = getelementptr i64, i64* %r1, i32 1
-store i64 %r9, i64* %r11
-%r12 = lshr i640 %r8, 64
-%r13 = trunc i640 %r12 to i64
-%r15 = getelementptr i64, i64* %r1, i32 2
-store i64 %r13, i64* %r15
-%r16 = lshr i640 %r12, 64
-%r17 = trunc i640 %r16 to i64
-%r19 = getelementptr i64, i64* %r1, i32 3
-store i64 %r17, i64* %r19
-%r20 = lshr i640 %r16, 64
-%r21 = trunc i640 %r20 to i64
-%r23 = getelementptr i64, i64* %r1, i32 4
-store i64 %r21, i64* %r23
-%r24 = lshr i640 %r20, 64
-%r25 = trunc i640 %r24 to i64
-%r27 = getelementptr i64, i64* %r1, i32 5
-store i64 %r25, i64* %r27
-%r28 = lshr i640 %r24, 64
-%r29 = trunc i640 %r28 to i64
-%r31 = getelementptr i64, i64* %r1, i32 6
-store i64 %r29, i64* %r31
-%r32 = lshr i640 %r28, 64
-%r33 = trunc i640 %r32 to i64
-%r35 = getelementptr i64, i64* %r1, i32 7
-store i64 %r33, i64* %r35
-%r36 = lshr i640 %r32, 64
-%r37 = trunc i640 %r36 to i64
-%r39 = getelementptr i64, i64* %r1, i32 8
-store i64 %r37, i64* %r39
-%r40 = lshr i640 %r36, 64
-%r41 = trunc i640 %r40 to i64
-%r43 = getelementptr i64, i64* %r1, i32 9
-store i64 %r41, i64* %r43
+%r4 = call i576 @mulPv512x64(i64* %r2, i64 %r3)
+%r6 = getelementptr i64, i64* %r1, i32 0
+%r7 = trunc i576 %r4 to i64
+store i64 %r7, i64* %r6
+%r8 = lshr i576 %r4, 64
+%r10 = getelementptr i64, i64* %r1, i32 1
+%r11 = trunc i576 %r8 to i64
+store i64 %r11, i64* %r10
+%r12 = lshr i576 %r8, 64
+%r14 = getelementptr i64, i64* %r1, i32 2
+%r15 = trunc i576 %r12 to i64
+store i64 %r15, i64* %r14
+%r16 = lshr i576 %r12, 64
+%r18 = getelementptr i64, i64* %r1, i32 3
+%r19 = trunc i576 %r16 to i64
+store i64 %r19, i64* %r18
+%r20 = lshr i576 %r16, 64
+%r22 = getelementptr i64, i64* %r1, i32 4
+%r23 = trunc i576 %r20 to i64
+store i64 %r23, i64* %r22
+%r24 = lshr i576 %r20, 64
+%r26 = getelementptr i64, i64* %r1, i32 5
+%r27 = trunc i576 %r24 to i64
+store i64 %r27, i64* %r26
+%r28 = lshr i576 %r24, 64
+%r30 = getelementptr i64, i64* %r1, i32 6
+%r31 = trunc i576 %r28 to i64
+store i64 %r31, i64* %r30
+%r32 = lshr i576 %r28, 64
+%r34 = getelementptr i64, i64* %r1, i32 7
+%r35 = trunc i576 %r32 to i64
+store i64 %r35, i64* %r34
+%r36 = lshr i576 %r32, 64
+%r38 = getelementptr i64, i64* %r1, i32 8
+%r39 = trunc i576 %r36 to i64
+store i64 %r39, i64* %r38
 ret void
 }
-define void @mcl_fpDbl_mulPre9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+define void @mcl_fpDbl_mulPre8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
 {
 %r4 = load i64, i64* %r3
-%r5 = call i640 @mulPv576x64(i64* %r2, i64 %r4)
-%r6 = trunc i640 %r5 to i64
+%r5 = call i576 @mulPv512x64(i64* %r2, i64 %r4)
+%r6 = trunc i576 %r5 to i64
 store i64 %r6, i64* %r1
-%r7 = lshr i640 %r5, 64
+%r7 = lshr i576 %r5, 64
 %r9 = getelementptr i64, i64* %r3, i32 1
 %r10 = load i64, i64* %r9
-%r11 = call i640 @mulPv576x64(i64* %r2, i64 %r10)
-%r12 = add i640 %r7, %r11
-%r13 = trunc i640 %r12 to i64
+%r11 = call i576 @mulPv512x64(i64* %r2, i64 %r10)
+%r12 = add i576 %r7, %r11
+%r13 = trunc i576 %r12 to i64
 %r15 = getelementptr i64, i64* %r1, i32 1
 store i64 %r13, i64* %r15
-%r16 = lshr i640 %r12, 64
+%r16 = lshr i576 %r12, 64
 %r18 = getelementptr i64, i64* %r3, i32 2
 %r19 = load i64, i64* %r18
-%r20 = call i640 @mulPv576x64(i64* %r2, i64 %r19)
-%r21 = add i640 %r16, %r20
-%r22 = trunc i640 %r21 to i64
+%r20 = call i576 @mulPv512x64(i64* %r2, i64 %r19)
+%r21 = add i576 %r16, %r20
+%r22 = trunc i576 %r21 to i64
 %r24 = getelementptr i64, i64* %r1, i32 2
 store i64 %r22, i64* %r24
-%r25 = lshr i640 %r21, 64
+%r25 = lshr i576 %r21, 64
 %r27 = getelementptr i64, i64* %r3, i32 3
 %r28 = load i64, i64* %r27
-%r29 = call i640 @mulPv576x64(i64* %r2, i64 %r28)
-%r30 = add i640 %r25, %r29
-%r31 = trunc i640 %r30 to i64
+%r29 = call i576 @mulPv512x64(i64* %r2, i64 %r28)
+%r30 = add i576 %r25, %r29
+%r31 = trunc i576 %r30 to i64
 %r33 = getelementptr i64, i64* %r1, i32 3
 store i64 %r31, i64* %r33
-%r34 = lshr i640 %r30, 64
+%r34 = lshr i576 %r30, 64
 %r36 = getelementptr i64, i64* %r3, i32 4
 %r37 = load i64, i64* %r36
-%r38 = call i640 @mulPv576x64(i64* %r2, i64 %r37)
-%r39 = add i640 %r34, %r38
-%r40 = trunc i640 %r39 to i64
+%r38 = call i576 @mulPv512x64(i64* %r2, i64 %r37)
+%r39 = add i576 %r34, %r38
+%r40 = trunc i576 %r39 to i64
 %r42 = getelementptr i64, i64* %r1, i32 4
 store i64 %r40, i64* %r42
-%r43 = lshr i640 %r39, 64
+%r43 = lshr i576 %r39, 64
 %r45 = getelementptr i64, i64* %r3, i32 5
 %r46 = load i64, i64* %r45
-%r47 = call i640 @mulPv576x64(i64* %r2, i64 %r46)
-%r48 = add i640 %r43, %r47
-%r49 = trunc i640 %r48 to i64
+%r47 = call i576 @mulPv512x64(i64* %r2, i64 %r46)
+%r48 = add i576 %r43, %r47
+%r49 = trunc i576 %r48 to i64
 %r51 = getelementptr i64, i64* %r1, i32 5
 store i64 %r49, i64* %r51
-%r52 = lshr i640 %r48, 64
+%r52 = lshr i576 %r48, 64
 %r54 = getelementptr i64, i64* %r3, i32 6
 %r55 = load i64, i64* %r54
-%r56 = call i640 @mulPv576x64(i64* %r2, i64 %r55)
-%r57 = add i640 %r52, %r56
-%r58 = trunc i640 %r57 to i64
+%r56 = call i576 @mulPv512x64(i64* %r2, i64 %r55)
+%r57 = add i576 %r52, %r56
+%r58 = trunc i576 %r57 to i64
 %r60 = getelementptr i64, i64* %r1, i32 6
 store i64 %r58, i64* %r60
-%r61 = lshr i640 %r57, 64
+%r61 = lshr i576 %r57, 64
 %r63 = getelementptr i64, i64* %r3, i32 7
 %r64 = load i64, i64* %r63
-%r65 = call i640 @mulPv576x64(i64* %r2, i64 %r64)
-%r66 = add i640 %r61, %r65
-%r67 = trunc i640 %r66 to i64
-%r69 = getelementptr i64, i64* %r1, i32 7
-store i64 %r67, i64* %r69
-%r70 = lshr i640 %r66, 64
-%r72 = getelementptr i64, i64* %r3, i32 8
-%r73 = load i64, i64* %r72
-%r74 = call i640 @mulPv576x64(i64* %r2, i64 %r73)
-%r75 = add i640 %r70, %r74
-%r77 = getelementptr i64, i64* %r1, i32 8
-%r78 = trunc i640 %r75 to i64
-%r80 = getelementptr i64, i64* %r77, i32 0
-store i64 %r78, i64* %r80
-%r81 = lshr i640 %r75, 64
-%r82 = trunc i640 %r81 to i64
-%r84 = getelementptr i64, i64* %r77, i32 1
-store i64 %r82, i64* %r84
-%r85 = lshr i640 %r81, 64
-%r86 = trunc i640 %r85 to i64
-%r88 = getelementptr i64, i64* %r77, i32 2
-store i64 %r86, i64* %r88
-%r89 = lshr i640 %r85, 64
-%r90 = trunc i640 %r89 to i64
-%r92 = getelementptr i64, i64* %r77, i32 3
-store i64 %r90, i64* %r92
-%r93 = lshr i640 %r89, 64
-%r94 = trunc i640 %r93 to i64
-%r96 = getelementptr i64, i64* %r77, i32 4
-store i64 %r94, i64* %r96
-%r97 = lshr i640 %r93, 64
-%r98 = trunc i640 %r97 to i64
-%r100 = getelementptr i64, i64* %r77, i32 5
-store i64 %r98, i64* %r100
-%r101 = lshr i640 %r97, 64
-%r102 = trunc i640 %r101 to i64
-%r104 = getelementptr i64, i64* %r77, i32 6
-store i64 %r102, i64* %r104
-%r105 = lshr i640 %r101, 64
-%r106 = trunc i640 %r105 to i64
-%r108 = getelementptr i64, i64* %r77, i32 7
-store i64 %r106, i64* %r108
-%r109 = lshr i640 %r105, 64
-%r110 = trunc i640 %r109 to i64
-%r112 = getelementptr i64, i64* %r77, i32 8
-store i64 %r110, i64* %r112
-%r113 = lshr i640 %r109, 64
-%r114 = trunc i640 %r113 to i64
-%r116 = getelementptr i64, i64* %r77, i32 9
-store i64 %r114, i64* %r116
+%r65 = call i576 @mulPv512x64(i64* %r2, i64 %r64)
+%r66 = add i576 %r61, %r65
+%r68 = getelementptr i64, i64* %r1, i32 7
+%r70 = getelementptr i64, i64* %r68, i32 0
+%r71 = trunc i576 %r66 to i64
+store i64 %r71, i64* %r70
+%r72 = lshr i576 %r66, 64
+%r74 = getelementptr i64, i64* %r68, i32 1
+%r75 = trunc i576 %r72 to i64
+store i64 %r75, i64* %r74
+%r76 = lshr i576 %r72, 64
+%r78 = getelementptr i64, i64* %r68, i32 2
+%r79 = trunc i576 %r76 to i64
+store i64 %r79, i64* %r78
+%r80 = lshr i576 %r76, 64
+%r82 = getelementptr i64, i64* %r68, i32 3
+%r83 = trunc i576 %r80 to i64
+store i64 %r83, i64* %r82
+%r84 = lshr i576 %r80, 64
+%r86 = getelementptr i64, i64* %r68, i32 4
+%r87 = trunc i576 %r84 to i64
+store i64 %r87, i64* %r86
+%r88 = lshr i576 %r84, 64
+%r90 = getelementptr i64, i64* %r68, i32 5
+%r91 = trunc i576 %r88 to i64
+store i64 %r91, i64* %r90
+%r92 = lshr i576 %r88, 64
+%r94 = getelementptr i64, i64* %r68, i32 6
+%r95 = trunc i576 %r92 to i64
+store i64 %r95, i64* %r94
+%r96 = lshr i576 %r92, 64
+%r98 = getelementptr i64, i64* %r68, i32 7
+%r99 = trunc i576 %r96 to i64
+store i64 %r99, i64* %r98
+%r100 = lshr i576 %r96, 64
+%r102 = getelementptr i64, i64* %r68, i32 8
+%r103 = trunc i576 %r100 to i64
+store i64 %r103, i64* %r102
 ret void
 }
-define void @mcl_fpDbl_sqrPre9L(i64* noalias  %r1, i64* noalias  %r2)
+define void @mcl_fpDbl_sqrPre8L(i64* noalias  %r1, i64* noalias  %r2)
 {
 %r3 = load i64, i64* %r2
-%r4 = call i640 @mulPv576x64(i64* %r2, i64 %r3)
-%r5 = trunc i640 %r4 to i64
+%r4 = call i576 @mulPv512x64(i64* %r2, i64 %r3)
+%r5 = trunc i576 %r4 to i64
 store i64 %r5, i64* %r1
-%r6 = lshr i640 %r4, 64
+%r6 = lshr i576 %r4, 64
 %r8 = getelementptr i64, i64* %r2, i32 1
 %r9 = load i64, i64* %r8
-%r10 = call i640 @mulPv576x64(i64* %r2, i64 %r9)
-%r11 = add i640 %r6, %r10
-%r12 = trunc i640 %r11 to i64
+%r10 = call i576 @mulPv512x64(i64* %r2, i64 %r9)
+%r11 = add i576 %r6, %r10
+%r12 = trunc i576 %r11 to i64
 %r14 = getelementptr i64, i64* %r1, i32 1
 store i64 %r12, i64* %r14
-%r15 = lshr i640 %r11, 64
+%r15 = lshr i576 %r11, 64
 %r17 = getelementptr i64, i64* %r2, i32 2
 %r18 = load i64, i64* %r17
-%r19 = call i640 @mulPv576x64(i64* %r2, i64 %r18)
-%r20 = add i640 %r15, %r19
-%r21 = trunc i640 %r20 to i64
+%r19 = call i576 @mulPv512x64(i64* %r2, i64 %r18)
+%r20 = add i576 %r15, %r19
+%r21 = trunc i576 %r20 to i64
 %r23 = getelementptr i64, i64* %r1, i32 2
 store i64 %r21, i64* %r23
-%r24 = lshr i640 %r20, 64
+%r24 = lshr i576 %r20, 64
 %r26 = getelementptr i64, i64* %r2, i32 3
 %r27 = load i64, i64* %r26
-%r28 = call i640 @mulPv576x64(i64* %r2, i64 %r27)
-%r29 = add i640 %r24, %r28
-%r30 = trunc i640 %r29 to i64
+%r28 = call i576 @mulPv512x64(i64* %r2, i64 %r27)
+%r29 = add i576 %r24, %r28
+%r30 = trunc i576 %r29 to i64
 %r32 = getelementptr i64, i64* %r1, i32 3
 store i64 %r30, i64* %r32
-%r33 = lshr i640 %r29, 64
+%r33 = lshr i576 %r29, 64
 %r35 = getelementptr i64, i64* %r2, i32 4
 %r36 = load i64, i64* %r35
-%r37 = call i640 @mulPv576x64(i64* %r2, i64 %r36)
-%r38 = add i640 %r33, %r37
-%r39 = trunc i640 %r38 to i64
+%r37 = call i576 @mulPv512x64(i64* %r2, i64 %r36)
+%r38 = add i576 %r33, %r37
+%r39 = trunc i576 %r38 to i64
 %r41 = getelementptr i64, i64* %r1, i32 4
 store i64 %r39, i64* %r41
-%r42 = lshr i640 %r38, 64
+%r42 = lshr i576 %r38, 64
 %r44 = getelementptr i64, i64* %r2, i32 5
 %r45 = load i64, i64* %r44
-%r46 = call i640 @mulPv576x64(i64* %r2, i64 %r45)
-%r47 = add i640 %r42, %r46
-%r48 = trunc i640 %r47 to i64
+%r46 = call i576 @mulPv512x64(i64* %r2, i64 %r45)
+%r47 = add i576 %r42, %r46
+%r48 = trunc i576 %r47 to i64
 %r50 = getelementptr i64, i64* %r1, i32 5
 store i64 %r48, i64* %r50
-%r51 = lshr i640 %r47, 64
+%r51 = lshr i576 %r47, 64
 %r53 = getelementptr i64, i64* %r2, i32 6
 %r54 = load i64, i64* %r53
-%r55 = call i640 @mulPv576x64(i64* %r2, i64 %r54)
-%r56 = add i640 %r51, %r55
-%r57 = trunc i640 %r56 to i64
+%r55 = call i576 @mulPv512x64(i64* %r2, i64 %r54)
+%r56 = add i576 %r51, %r55
+%r57 = trunc i576 %r56 to i64
 %r59 = getelementptr i64, i64* %r1, i32 6
 store i64 %r57, i64* %r59
-%r60 = lshr i640 %r56, 64
+%r60 = lshr i576 %r56, 64
 %r62 = getelementptr i64, i64* %r2, i32 7
 %r63 = load i64, i64* %r62
-%r64 = call i640 @mulPv576x64(i64* %r2, i64 %r63)
-%r65 = add i640 %r60, %r64
-%r66 = trunc i640 %r65 to i64
-%r68 = getelementptr i64, i64* %r1, i32 7
-store i64 %r66, i64* %r68
-%r69 = lshr i640 %r65, 64
-%r71 = getelementptr i64, i64* %r2, i32 8
-%r72 = load i64, i64* %r71
-%r73 = call i640 @mulPv576x64(i64* %r2, i64 %r72)
-%r74 = add i640 %r69, %r73
-%r76 = getelementptr i64, i64* %r1, i32 8
-%r77 = trunc i640 %r74 to i64
-%r79 = getelementptr i64, i64* %r76, i32 0
-store i64 %r77, i64* %r79
-%r80 = lshr i640 %r74, 64
-%r81 = trunc i640 %r80 to i64
-%r83 = getelementptr i64, i64* %r76, i32 1
-store i64 %r81, i64* %r83
-%r84 = lshr i640 %r80, 64
-%r85 = trunc i640 %r84 to i64
-%r87 = getelementptr i64, i64* %r76, i32 2
-store i64 %r85, i64* %r87
-%r88 = lshr i640 %r84, 64
-%r89 = trunc i640 %r88 to i64
-%r91 = getelementptr i64, i64* %r76, i32 3
-store i64 %r89, i64* %r91
-%r92 = lshr i640 %r88, 64
-%r93 = trunc i640 %r92 to i64
-%r95 = getelementptr i64, i64* %r76, i32 4
-store i64 %r93, i64* %r95
-%r96 = lshr i640 %r92, 64
-%r97 = trunc i640 %r96 to i64
-%r99 = getelementptr i64, i64* %r76, i32 5
-store i64 %r97, i64* %r99
-%r100 = lshr i640 %r96, 64
-%r101 = trunc i640 %r100 to i64
-%r103 = getelementptr i64, i64* %r76, i32 6
-store i64 %r101, i64* %r103
-%r104 = lshr i640 %r100, 64
-%r105 = trunc i640 %r104 to i64
-%r107 = getelementptr i64, i64* %r76, i32 7
-store i64 %r105, i64* %r107
-%r108 = lshr i640 %r104, 64
-%r109 = trunc i640 %r108 to i64
-%r111 = getelementptr i64, i64* %r76, i32 8
-store i64 %r109, i64* %r111
-%r112 = lshr i640 %r108, 64
-%r113 = trunc i640 %r112 to i64
-%r115 = getelementptr i64, i64* %r76, i32 9
-store i64 %r113, i64* %r115
+%r64 = call i576 @mulPv512x64(i64* %r2, i64 %r63)
+%r65 = add i576 %r60, %r64
+%r67 = getelementptr i64, i64* %r1, i32 7
+%r69 = getelementptr i64, i64* %r67, i32 0
+%r70 = trunc i576 %r65 to i64
+store i64 %r70, i64* %r69
+%r71 = lshr i576 %r65, 64
+%r73 = getelementptr i64, i64* %r67, i32 1
+%r74 = trunc i576 %r71 to i64
+store i64 %r74, i64* %r73
+%r75 = lshr i576 %r71, 64
+%r77 = getelementptr i64, i64* %r67, i32 2
+%r78 = trunc i576 %r75 to i64
+store i64 %r78, i64* %r77
+%r79 = lshr i576 %r75, 64
+%r81 = getelementptr i64, i64* %r67, i32 3
+%r82 = trunc i576 %r79 to i64
+store i64 %r82, i64* %r81
+%r83 = lshr i576 %r79, 64
+%r85 = getelementptr i64, i64* %r67, i32 4
+%r86 = trunc i576 %r83 to i64
+store i64 %r86, i64* %r85
+%r87 = lshr i576 %r83, 64
+%r89 = getelementptr i64, i64* %r67, i32 5
+%r90 = trunc i576 %r87 to i64
+store i64 %r90, i64* %r89
+%r91 = lshr i576 %r87, 64
+%r93 = getelementptr i64, i64* %r67, i32 6
+%r94 = trunc i576 %r91 to i64
+store i64 %r94, i64* %r93
+%r95 = lshr i576 %r91, 64
+%r97 = getelementptr i64, i64* %r67, i32 7
+%r98 = trunc i576 %r95 to i64
+store i64 %r98, i64* %r97
+%r99 = lshr i576 %r95, 64
+%r101 = getelementptr i64, i64* %r67, i32 8
+%r102 = trunc i576 %r99 to i64
+store i64 %r102, i64* %r101
 ret void
 }
-define void @mcl_fp_mont9L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+define void @mcl_fp_mont8L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
 {
 %r6 = getelementptr i64, i64* %r4, i32 -1
 %r7 = load i64, i64* %r6
 %r9 = getelementptr i64, i64* %r3, i32 0
 %r10 = load i64, i64* %r9
-%r11 = call i640 @mulPv576x64(i64* %r2, i64 %r10)
-%r12 = zext i640 %r11 to i704
-%r13 = trunc i640 %r11 to i64
+%r11 = call i576 @mulPv512x64(i64* %r2, i64 %r10)
+%r12 = zext i576 %r11 to i640
+%r13 = trunc i576 %r11 to i64
 %r14 = mul i64 %r13, %r7
-%r15 = call i640 @mulPv576x64(i64* %r4, i64 %r14)
-%r16 = zext i640 %r15 to i704
-%r17 = add i704 %r12, %r16
-%r18 = lshr i704 %r17, 64
+%r15 = call i576 @mulPv512x64(i64* %r4, i64 %r14)
+%r16 = zext i576 %r15 to i640
+%r17 = add i640 %r12, %r16
+%r18 = lshr i640 %r17, 64
 %r20 = getelementptr i64, i64* %r3, i32 1
 %r21 = load i64, i64* %r20
-%r22 = call i640 @mulPv576x64(i64* %r2, i64 %r21)
-%r23 = zext i640 %r22 to i704
-%r24 = add i704 %r18, %r23
-%r25 = trunc i704 %r24 to i64
+%r22 = call i576 @mulPv512x64(i64* %r2, i64 %r21)
+%r23 = zext i576 %r22 to i640
+%r24 = add i640 %r18, %r23
+%r25 = trunc i640 %r24 to i64
 %r26 = mul i64 %r25, %r7
-%r27 = call i640 @mulPv576x64(i64* %r4, i64 %r26)
-%r28 = zext i640 %r27 to i704
-%r29 = add i704 %r24, %r28
-%r30 = lshr i704 %r29, 64
+%r27 = call i576 @mulPv512x64(i64* %r4, i64 %r26)
+%r28 = zext i576 %r27 to i640
+%r29 = add i640 %r24, %r28
+%r30 = lshr i640 %r29, 64
 %r32 = getelementptr i64, i64* %r3, i32 2
 %r33 = load i64, i64* %r32
-%r34 = call i640 @mulPv576x64(i64* %r2, i64 %r33)
-%r35 = zext i640 %r34 to i704
-%r36 = add i704 %r30, %r35
-%r37 = trunc i704 %r36 to i64
+%r34 = call i576 @mulPv512x64(i64* %r2, i64 %r33)
+%r35 = zext i576 %r34 to i640
+%r36 = add i640 %r30, %r35
+%r37 = trunc i640 %r36 to i64
 %r38 = mul i64 %r37, %r7
-%r39 = call i640 @mulPv576x64(i64* %r4, i64 %r38)
-%r40 = zext i640 %r39 to i704
-%r41 = add i704 %r36, %r40
-%r42 = lshr i704 %r41, 64
+%r39 = call i576 @mulPv512x64(i64* %r4, i64 %r38)
+%r40 = zext i576 %r39 to i640
+%r41 = add i640 %r36, %r40
+%r42 = lshr i640 %r41, 64
 %r44 = getelementptr i64, i64* %r3, i32 3
 %r45 = load i64, i64* %r44
-%r46 = call i640 @mulPv576x64(i64* %r2, i64 %r45)
-%r47 = zext i640 %r46 to i704
-%r48 = add i704 %r42, %r47
-%r49 = trunc i704 %r48 to i64
+%r46 = call i576 @mulPv512x64(i64* %r2, i64 %r45)
+%r47 = zext i576 %r46 to i640
+%r48 = add i640 %r42, %r47
+%r49 = trunc i640 %r48 to i64
 %r50 = mul i64 %r49, %r7
-%r51 = call i640 @mulPv576x64(i64* %r4, i64 %r50)
-%r52 = zext i640 %r51 to i704
-%r53 = add i704 %r48, %r52
-%r54 = lshr i704 %r53, 64
+%r51 = call i576 @mulPv512x64(i64* %r4, i64 %r50)
+%r52 = zext i576 %r51 to i640
+%r53 = add i640 %r48, %r52
+%r54 = lshr i640 %r53, 64
 %r56 = getelementptr i64, i64* %r3, i32 4
 %r57 = load i64, i64* %r56
-%r58 = call i640 @mulPv576x64(i64* %r2, i64 %r57)
-%r59 = zext i640 %r58 to i704
-%r60 = add i704 %r54, %r59
-%r61 = trunc i704 %r60 to i64
+%r58 = call i576 @mulPv512x64(i64* %r2, i64 %r57)
+%r59 = zext i576 %r58 to i640
+%r60 = add i640 %r54, %r59
+%r61 = trunc i640 %r60 to i64
 %r62 = mul i64 %r61, %r7
-%r63 = call i640 @mulPv576x64(i64* %r4, i64 %r62)
-%r64 = zext i640 %r63 to i704
-%r65 = add i704 %r60, %r64
-%r66 = lshr i704 %r65, 64
+%r63 = call i576 @mulPv512x64(i64* %r4, i64 %r62)
+%r64 = zext i576 %r63 to i640
+%r65 = add i640 %r60, %r64
+%r66 = lshr i640 %r65, 64
 %r68 = getelementptr i64, i64* %r3, i32 5
 %r69 = load i64, i64* %r68
-%r70 = call i640 @mulPv576x64(i64* %r2, i64 %r69)
-%r71 = zext i640 %r70 to i704
-%r72 = add i704 %r66, %r71
-%r73 = trunc i704 %r72 to i64
+%r70 = call i576 @mulPv512x64(i64* %r2, i64 %r69)
+%r71 = zext i576 %r70 to i640
+%r72 = add i640 %r66, %r71
+%r73 = trunc i640 %r72 to i64
 %r74 = mul i64 %r73, %r7
-%r75 = call i640 @mulPv576x64(i64* %r4, i64 %r74)
-%r76 = zext i640 %r75 to i704
-%r77 = add i704 %r72, %r76
-%r78 = lshr i704 %r77, 64
+%r75 = call i576 @mulPv512x64(i64* %r4, i64 %r74)
+%r76 = zext i576 %r75 to i640
+%r77 = add i640 %r72, %r76
+%r78 = lshr i640 %r77, 64
 %r80 = getelementptr i64, i64* %r3, i32 6
 %r81 = load i64, i64* %r80
-%r82 = call i640 @mulPv576x64(i64* %r2, i64 %r81)
-%r83 = zext i640 %r82 to i704
-%r84 = add i704 %r78, %r83
-%r85 = trunc i704 %r84 to i64
+%r82 = call i576 @mulPv512x64(i64* %r2, i64 %r81)
+%r83 = zext i576 %r82 to i640
+%r84 = add i640 %r78, %r83
+%r85 = trunc i640 %r84 to i64
 %r86 = mul i64 %r85, %r7
-%r87 = call i640 @mulPv576x64(i64* %r4, i64 %r86)
-%r88 = zext i640 %r87 to i704
-%r89 = add i704 %r84, %r88
-%r90 = lshr i704 %r89, 64
+%r87 = call i576 @mulPv512x64(i64* %r4, i64 %r86)
+%r88 = zext i576 %r87 to i640
+%r89 = add i640 %r84, %r88
+%r90 = lshr i640 %r89, 64
 %r92 = getelementptr i64, i64* %r3, i32 7
 %r93 = load i64, i64* %r92
-%r94 = call i640 @mulPv576x64(i64* %r2, i64 %r93)
-%r95 = zext i640 %r94 to i704
-%r96 = add i704 %r90, %r95
-%r97 = trunc i704 %r96 to i64
+%r94 = call i576 @mulPv512x64(i64* %r2, i64 %r93)
+%r95 = zext i576 %r94 to i640
+%r96 = add i640 %r90, %r95
+%r97 = trunc i640 %r96 to i64
 %r98 = mul i64 %r97, %r7
-%r99 = call i640 @mulPv576x64(i64* %r4, i64 %r98)
-%r100 = zext i640 %r99 to i704
-%r101 = add i704 %r96, %r100
-%r102 = lshr i704 %r101, 64
-%r104 = getelementptr i64, i64* %r3, i32 8
-%r105 = load i64, i64* %r104
-%r106 = call i640 @mulPv576x64(i64* %r2, i64 %r105)
-%r107 = zext i640 %r106 to i704
-%r108 = add i704 %r102, %r107
-%r109 = trunc i704 %r108 to i64
-%r110 = mul i64 %r109, %r7
-%r111 = call i640 @mulPv576x64(i64* %r4, i64 %r110)
-%r112 = zext i640 %r111 to i704
-%r113 = add i704 %r108, %r112
-%r114 = lshr i704 %r113, 64
-%r115 = trunc i704 %r114 to i640
-%r116 = load i64, i64* %r4
-%r117 = zext i64 %r116 to i128
-%r119 = getelementptr i64, i64* %r4, i32 1
-%r120 = load i64, i64* %r119
-%r121 = zext i64 %r120 to i128
-%r122 = shl i128 %r121, 64
-%r123 = or i128 %r117, %r122
-%r124 = zext i128 %r123 to i192
-%r126 = getelementptr i64, i64* %r4, i32 2
-%r127 = load i64, i64* %r126
-%r128 = zext i64 %r127 to i192
-%r129 = shl i192 %r128, 128
-%r130 = or i192 %r124, %r129
-%r131 = zext i192 %r130 to i256
-%r133 = getelementptr i64, i64* %r4, i32 3
-%r134 = load i64, i64* %r133
-%r135 = zext i64 %r134 to i256
-%r136 = shl i256 %r135, 192
-%r137 = or i256 %r131, %r136
-%r138 = zext i256 %r137 to i320
-%r140 = getelementptr i64, i64* %r4, i32 4
-%r141 = load i64, i64* %r140
-%r142 = zext i64 %r141 to i320
-%r143 = shl i320 %r142, 256
-%r144 = or i320 %r138, %r143
-%r145 = zext i320 %r144 to i384
-%r147 = getelementptr i64, i64* %r4, i32 5
-%r148 = load i64, i64* %r147
-%r149 = zext i64 %r148 to i384
-%r150 = shl i384 %r149, 320
-%r151 = or i384 %r145, %r150
-%r152 = zext i384 %r151 to i448
-%r154 = getelementptr i64, i64* %r4, i32 6
-%r155 = load i64, i64* %r154
-%r156 = zext i64 %r155 to i448
-%r157 = shl i448 %r156, 384
-%r158 = or i448 %r152, %r157
-%r159 = zext i448 %r158 to i512
-%r161 = getelementptr i64, i64* %r4, i32 7
-%r162 = load i64, i64* %r161
-%r163 = zext i64 %r162 to i512
-%r164 = shl i512 %r163, 448
-%r165 = or i512 %r159, %r164
-%r166 = zext i512 %r165 to i576
-%r168 = getelementptr i64, i64* %r4, i32 8
-%r169 = load i64, i64* %r168
-%r170 = zext i64 %r169 to i576
-%r171 = shl i576 %r170, 512
-%r172 = or i576 %r166, %r171
-%r173 = zext i576 %r172 to i640
-%r174 = sub i640 %r115, %r173
-%r175 = lshr i640 %r174, 576
-%r176 = trunc i640 %r175 to i1
-%r177 = select i1 %r176, i640 %r115, i640 %r174
-%r178 = trunc i640 %r177 to i576
-%r179 = trunc i576 %r178 to i64
-%r181 = getelementptr i64, i64* %r1, i32 0
-store i64 %r179, i64* %r181
-%r182 = lshr i576 %r178, 64
-%r183 = trunc i576 %r182 to i64
-%r185 = getelementptr i64, i64* %r1, i32 1
-store i64 %r183, i64* %r185
-%r186 = lshr i576 %r182, 64
-%r187 = trunc i576 %r186 to i64
-%r189 = getelementptr i64, i64* %r1, i32 2
-store i64 %r187, i64* %r189
-%r190 = lshr i576 %r186, 64
-%r191 = trunc i576 %r190 to i64
-%r193 = getelementptr i64, i64* %r1, i32 3
-store i64 %r191, i64* %r193
-%r194 = lshr i576 %r190, 64
-%r195 = trunc i576 %r194 to i64
-%r197 = getelementptr i64, i64* %r1, i32 4
-store i64 %r195, i64* %r197
-%r198 = lshr i576 %r194, 64
-%r199 = trunc i576 %r198 to i64
-%r201 = getelementptr i64, i64* %r1, i32 5
-store i64 %r199, i64* %r201
-%r202 = lshr i576 %r198, 64
-%r203 = trunc i576 %r202 to i64
-%r205 = getelementptr i64, i64* %r1, i32 6
-store i64 %r203, i64* %r205
-%r206 = lshr i576 %r202, 64
-%r207 = trunc i576 %r206 to i64
-%r209 = getelementptr i64, i64* %r1, i32 7
-store i64 %r207, i64* %r209
-%r210 = lshr i576 %r206, 64
-%r211 = trunc i576 %r210 to i64
-%r213 = getelementptr i64, i64* %r1, i32 8
-store i64 %r211, i64* %r213
+%r99 = call i576 @mulPv512x64(i64* %r4, i64 %r98)
+%r100 = zext i576 %r99 to i640
+%r101 = add i640 %r96, %r100
+%r102 = lshr i640 %r101, 64
+%r103 = trunc i640 %r102 to i576
+%r104 = load i64, i64* %r4
+%r105 = zext i64 %r104 to i128
+%r107 = getelementptr i64, i64* %r4, i32 1
+%r108 = load i64, i64* %r107
+%r109 = zext i64 %r108 to i128
+%r110 = shl i128 %r109, 64
+%r111 = or i128 %r105, %r110
+%r112 = zext i128 %r111 to i192
+%r114 = getelementptr i64, i64* %r4, i32 2
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i192
+%r117 = shl i192 %r116, 128
+%r118 = or i192 %r112, %r117
+%r119 = zext i192 %r118 to i256
+%r121 = getelementptr i64, i64* %r4, i32 3
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i256
+%r124 = shl i256 %r123, 192
+%r125 = or i256 %r119, %r124
+%r126 = zext i256 %r125 to i320
+%r128 = getelementptr i64, i64* %r4, i32 4
+%r129 = load i64, i64* %r128
+%r130 = zext i64 %r129 to i320
+%r131 = shl i320 %r130, 256
+%r132 = or i320 %r126, %r131
+%r133 = zext i320 %r132 to i384
+%r135 = getelementptr i64, i64* %r4, i32 5
+%r136 = load i64, i64* %r135
+%r137 = zext i64 %r136 to i384
+%r138 = shl i384 %r137, 320
+%r139 = or i384 %r133, %r138
+%r140 = zext i384 %r139 to i448
+%r142 = getelementptr i64, i64* %r4, i32 6
+%r143 = load i64, i64* %r142
+%r144 = zext i64 %r143 to i448
+%r145 = shl i448 %r144, 384
+%r146 = or i448 %r140, %r145
+%r147 = zext i448 %r146 to i512
+%r149 = getelementptr i64, i64* %r4, i32 7
+%r150 = load i64, i64* %r149
+%r151 = zext i64 %r150 to i512
+%r152 = shl i512 %r151, 448
+%r153 = or i512 %r147, %r152
+%r154 = zext i512 %r153 to i576
+%r155 = sub i576 %r103, %r154
+%r156 = lshr i576 %r155, 512
+%r157 = trunc i576 %r156 to i1
+%r158 = select i1 %r157, i576 %r103, i576 %r155
+%r159 = trunc i576 %r158 to i512
+%r161 = getelementptr i64, i64* %r1, i32 0
+%r162 = trunc i512 %r159 to i64
+store i64 %r162, i64* %r161
+%r163 = lshr i512 %r159, 64
+%r165 = getelementptr i64, i64* %r1, i32 1
+%r166 = trunc i512 %r163 to i64
+store i64 %r166, i64* %r165
+%r167 = lshr i512 %r163, 64
+%r169 = getelementptr i64, i64* %r1, i32 2
+%r170 = trunc i512 %r167 to i64
+store i64 %r170, i64* %r169
+%r171 = lshr i512 %r167, 64
+%r173 = getelementptr i64, i64* %r1, i32 3
+%r174 = trunc i512 %r171 to i64
+store i64 %r174, i64* %r173
+%r175 = lshr i512 %r171, 64
+%r177 = getelementptr i64, i64* %r1, i32 4
+%r178 = trunc i512 %r175 to i64
+store i64 %r178, i64* %r177
+%r179 = lshr i512 %r175, 64
+%r181 = getelementptr i64, i64* %r1, i32 5
+%r182 = trunc i512 %r179 to i64
+store i64 %r182, i64* %r181
+%r183 = lshr i512 %r179, 64
+%r185 = getelementptr i64, i64* %r1, i32 6
+%r186 = trunc i512 %r183 to i64
+store i64 %r186, i64* %r185
+%r187 = lshr i512 %r183, 64
+%r189 = getelementptr i64, i64* %r1, i32 7
+%r190 = trunc i512 %r187 to i64
+store i64 %r190, i64* %r189
 ret void
 }
-define void @mcl_fp_montNF9L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
+define void @mcl_fp_montNF8L(i64* %r1, i64* %r2, i64* %r3, i64* %r4)
 {
 %r6 = getelementptr i64, i64* %r4, i32 -1
 %r7 = load i64, i64* %r6
 %r8 = load i64, i64* %r3
-%r9 = call i640 @mulPv576x64(i64* %r2, i64 %r8)
-%r10 = trunc i640 %r9 to i64
+%r9 = call i576 @mulPv512x64(i64* %r2, i64 %r8)
+%r10 = trunc i576 %r9 to i64
 %r11 = mul i64 %r10, %r7
-%r12 = call i640 @mulPv576x64(i64* %r4, i64 %r11)
-%r13 = add i640 %r9, %r12
-%r14 = lshr i640 %r13, 64
+%r12 = call i576 @mulPv512x64(i64* %r4, i64 %r11)
+%r13 = add i576 %r9, %r12
+%r14 = lshr i576 %r13, 64
 %r16 = getelementptr i64, i64* %r3, i32 1
 %r17 = load i64, i64* %r16
-%r18 = call i640 @mulPv576x64(i64* %r2, i64 %r17)
-%r19 = add i640 %r14, %r18
-%r20 = trunc i640 %r19 to i64
+%r18 = call i576 @mulPv512x64(i64* %r2, i64 %r17)
+%r19 = add i576 %r14, %r18
+%r20 = trunc i576 %r19 to i64
 %r21 = mul i64 %r20, %r7
-%r22 = call i640 @mulPv576x64(i64* %r4, i64 %r21)
-%r23 = add i640 %r19, %r22
-%r24 = lshr i640 %r23, 64
+%r22 = call i576 @mulPv512x64(i64* %r4, i64 %r21)
+%r23 = add i576 %r19, %r22
+%r24 = lshr i576 %r23, 64
 %r26 = getelementptr i64, i64* %r3, i32 2
 %r27 = load i64, i64* %r26
-%r28 = call i640 @mulPv576x64(i64* %r2, i64 %r27)
-%r29 = add i640 %r24, %r28
-%r30 = trunc i640 %r29 to i64
+%r28 = call i576 @mulPv512x64(i64* %r2, i64 %r27)
+%r29 = add i576 %r24, %r28
+%r30 = trunc i576 %r29 to i64
 %r31 = mul i64 %r30, %r7
-%r32 = call i640 @mulPv576x64(i64* %r4, i64 %r31)
-%r33 = add i640 %r29, %r32
-%r34 = lshr i640 %r33, 64
+%r32 = call i576 @mulPv512x64(i64* %r4, i64 %r31)
+%r33 = add i576 %r29, %r32
+%r34 = lshr i576 %r33, 64
 %r36 = getelementptr i64, i64* %r3, i32 3
 %r37 = load i64, i64* %r36
-%r38 = call i640 @mulPv576x64(i64* %r2, i64 %r37)
-%r39 = add i640 %r34, %r38
-%r40 = trunc i640 %r39 to i64
+%r38 = call i576 @mulPv512x64(i64* %r2, i64 %r37)
+%r39 = add i576 %r34, %r38
+%r40 = trunc i576 %r39 to i64
 %r41 = mul i64 %r40, %r7
-%r42 = call i640 @mulPv576x64(i64* %r4, i64 %r41)
-%r43 = add i640 %r39, %r42
-%r44 = lshr i640 %r43, 64
+%r42 = call i576 @mulPv512x64(i64* %r4, i64 %r41)
+%r43 = add i576 %r39, %r42
+%r44 = lshr i576 %r43, 64
 %r46 = getelementptr i64, i64* %r3, i32 4
 %r47 = load i64, i64* %r46
-%r48 = call i640 @mulPv576x64(i64* %r2, i64 %r47)
-%r49 = add i640 %r44, %r48
-%r50 = trunc i640 %r49 to i64
+%r48 = call i576 @mulPv512x64(i64* %r2, i64 %r47)
+%r49 = add i576 %r44, %r48
+%r50 = trunc i576 %r49 to i64
 %r51 = mul i64 %r50, %r7
-%r52 = call i640 @mulPv576x64(i64* %r4, i64 %r51)
-%r53 = add i640 %r49, %r52
-%r54 = lshr i640 %r53, 64
+%r52 = call i576 @mulPv512x64(i64* %r4, i64 %r51)
+%r53 = add i576 %r49, %r52
+%r54 = lshr i576 %r53, 64
 %r56 = getelementptr i64, i64* %r3, i32 5
 %r57 = load i64, i64* %r56
-%r58 = call i640 @mulPv576x64(i64* %r2, i64 %r57)
-%r59 = add i640 %r54, %r58
-%r60 = trunc i640 %r59 to i64
+%r58 = call i576 @mulPv512x64(i64* %r2, i64 %r57)
+%r59 = add i576 %r54, %r58
+%r60 = trunc i576 %r59 to i64
 %r61 = mul i64 %r60, %r7
-%r62 = call i640 @mulPv576x64(i64* %r4, i64 %r61)
-%r63 = add i640 %r59, %r62
-%r64 = lshr i640 %r63, 64
+%r62 = call i576 @mulPv512x64(i64* %r4, i64 %r61)
+%r63 = add i576 %r59, %r62
+%r64 = lshr i576 %r63, 64
 %r66 = getelementptr i64, i64* %r3, i32 6
 %r67 = load i64, i64* %r66
-%r68 = call i640 @mulPv576x64(i64* %r2, i64 %r67)
-%r69 = add i640 %r64, %r68
-%r70 = trunc i640 %r69 to i64
+%r68 = call i576 @mulPv512x64(i64* %r2, i64 %r67)
+%r69 = add i576 %r64, %r68
+%r70 = trunc i576 %r69 to i64
 %r71 = mul i64 %r70, %r7
-%r72 = call i640 @mulPv576x64(i64* %r4, i64 %r71)
-%r73 = add i640 %r69, %r72
-%r74 = lshr i640 %r73, 64
+%r72 = call i576 @mulPv512x64(i64* %r4, i64 %r71)
+%r73 = add i576 %r69, %r72
+%r74 = lshr i576 %r73, 64
 %r76 = getelementptr i64, i64* %r3, i32 7
 %r77 = load i64, i64* %r76
-%r78 = call i640 @mulPv576x64(i64* %r2, i64 %r77)
-%r79 = add i640 %r74, %r78
-%r80 = trunc i640 %r79 to i64
+%r78 = call i576 @mulPv512x64(i64* %r2, i64 %r77)
+%r79 = add i576 %r74, %r78
+%r80 = trunc i576 %r79 to i64
 %r81 = mul i64 %r80, %r7
-%r82 = call i640 @mulPv576x64(i64* %r4, i64 %r81)
-%r83 = add i640 %r79, %r82
-%r84 = lshr i640 %r83, 64
-%r86 = getelementptr i64, i64* %r3, i32 8
-%r87 = load i64, i64* %r86
-%r88 = call i640 @mulPv576x64(i64* %r2, i64 %r87)
-%r89 = add i640 %r84, %r88
-%r90 = trunc i640 %r89 to i64
-%r91 = mul i64 %r90, %r7
-%r92 = call i640 @mulPv576x64(i64* %r4, i64 %r91)
-%r93 = add i640 %r89, %r92
-%r94 = lshr i640 %r93, 64
-%r95 = trunc i640 %r94 to i576
-%r96 = load i64, i64* %r4
-%r97 = zext i64 %r96 to i128
-%r99 = getelementptr i64, i64* %r4, i32 1
-%r100 = load i64, i64* %r99
-%r101 = zext i64 %r100 to i128
-%r102 = shl i128 %r101, 64
-%r103 = or i128 %r97, %r102
-%r104 = zext i128 %r103 to i192
-%r106 = getelementptr i64, i64* %r4, i32 2
-%r107 = load i64, i64* %r106
-%r108 = zext i64 %r107 to i192
-%r109 = shl i192 %r108, 128
-%r110 = or i192 %r104, %r109
-%r111 = zext i192 %r110 to i256
-%r113 = getelementptr i64, i64* %r4, i32 3
-%r114 = load i64, i64* %r113
-%r115 = zext i64 %r114 to i256
-%r116 = shl i256 %r115, 192
-%r117 = or i256 %r111, %r116
-%r118 = zext i256 %r117 to i320
-%r120 = getelementptr i64, i64* %r4, i32 4
-%r121 = load i64, i64* %r120
-%r122 = zext i64 %r121 to i320
-%r123 = shl i320 %r122, 256
-%r124 = or i320 %r118, %r123
-%r125 = zext i320 %r124 to i384
-%r127 = getelementptr i64, i64* %r4, i32 5
-%r128 = load i64, i64* %r127
-%r129 = zext i64 %r128 to i384
-%r130 = shl i384 %r129, 320
-%r131 = or i384 %r125, %r130
-%r132 = zext i384 %r131 to i448
-%r134 = getelementptr i64, i64* %r4, i32 6
-%r135 = load i64, i64* %r134
-%r136 = zext i64 %r135 to i448
-%r137 = shl i448 %r136, 384
-%r138 = or i448 %r132, %r137
-%r139 = zext i448 %r138 to i512
-%r141 = getelementptr i64, i64* %r4, i32 7
-%r142 = load i64, i64* %r141
-%r143 = zext i64 %r142 to i512
-%r144 = shl i512 %r143, 448
-%r145 = or i512 %r139, %r144
-%r146 = zext i512 %r145 to i576
-%r148 = getelementptr i64, i64* %r4, i32 8
-%r149 = load i64, i64* %r148
-%r150 = zext i64 %r149 to i576
-%r151 = shl i576 %r150, 512
-%r152 = or i576 %r146, %r151
-%r153 = sub i576 %r95, %r152
-%r154 = lshr i576 %r153, 575
-%r155 = trunc i576 %r154 to i1
-%r156 = select i1 %r155, i576 %r95, i576 %r153
-%r157 = trunc i576 %r156 to i64
-%r159 = getelementptr i64, i64* %r1, i32 0
-store i64 %r157, i64* %r159
-%r160 = lshr i576 %r156, 64
-%r161 = trunc i576 %r160 to i64
-%r163 = getelementptr i64, i64* %r1, i32 1
-store i64 %r161, i64* %r163
-%r164 = lshr i576 %r160, 64
+%r82 = call i576 @mulPv512x64(i64* %r4, i64 %r81)
+%r83 = add i576 %r79, %r82
+%r84 = lshr i576 %r83, 64
+%r85 = trunc i576 %r84 to i512
+%r86 = load i64, i64* %r4
+%r87 = zext i64 %r86 to i128
+%r89 = getelementptr i64, i64* %r4, i32 1
+%r90 = load i64, i64* %r89
+%r91 = zext i64 %r90 to i128
+%r92 = shl i128 %r91, 64
+%r93 = or i128 %r87, %r92
+%r94 = zext i128 %r93 to i192
+%r96 = getelementptr i64, i64* %r4, i32 2
+%r97 = load i64, i64* %r96
+%r98 = zext i64 %r97 to i192
+%r99 = shl i192 %r98, 128
+%r100 = or i192 %r94, %r99
+%r101 = zext i192 %r100 to i256
+%r103 = getelementptr i64, i64* %r4, i32 3
+%r104 = load i64, i64* %r103
+%r105 = zext i64 %r104 to i256
+%r106 = shl i256 %r105, 192
+%r107 = or i256 %r101, %r106
+%r108 = zext i256 %r107 to i320
+%r110 = getelementptr i64, i64* %r4, i32 4
+%r111 = load i64, i64* %r110
+%r112 = zext i64 %r111 to i320
+%r113 = shl i320 %r112, 256
+%r114 = or i320 %r108, %r113
+%r115 = zext i320 %r114 to i384
+%r117 = getelementptr i64, i64* %r4, i32 5
+%r118 = load i64, i64* %r117
+%r119 = zext i64 %r118 to i384
+%r120 = shl i384 %r119, 320
+%r121 = or i384 %r115, %r120
+%r122 = zext i384 %r121 to i448
+%r124 = getelementptr i64, i64* %r4, i32 6
+%r125 = load i64, i64* %r124
+%r126 = zext i64 %r125 to i448
+%r127 = shl i448 %r126, 384
+%r128 = or i448 %r122, %r127
+%r129 = zext i448 %r128 to i512
+%r131 = getelementptr i64, i64* %r4, i32 7
+%r132 = load i64, i64* %r131
+%r133 = zext i64 %r132 to i512
+%r134 = shl i512 %r133, 448
+%r135 = or i512 %r129, %r134
+%r136 = sub i512 %r85, %r135
+%r137 = lshr i512 %r136, 511
+%r138 = trunc i512 %r137 to i1
+%r139 = select i1 %r138, i512 %r85, i512 %r136
+%r141 = getelementptr i64, i64* %r1, i32 0
+%r142 = trunc i512 %r139 to i64
+store i64 %r142, i64* %r141
+%r143 = lshr i512 %r139, 64
+%r145 = getelementptr i64, i64* %r1, i32 1
+%r146 = trunc i512 %r143 to i64
+store i64 %r146, i64* %r145
+%r147 = lshr i512 %r143, 64
+%r149 = getelementptr i64, i64* %r1, i32 2
+%r150 = trunc i512 %r147 to i64
+store i64 %r150, i64* %r149
+%r151 = lshr i512 %r147, 64
+%r153 = getelementptr i64, i64* %r1, i32 3
+%r154 = trunc i512 %r151 to i64
+store i64 %r154, i64* %r153
+%r155 = lshr i512 %r151, 64
+%r157 = getelementptr i64, i64* %r1, i32 4
+%r158 = trunc i512 %r155 to i64
+store i64 %r158, i64* %r157
+%r159 = lshr i512 %r155, 64
+%r161 = getelementptr i64, i64* %r1, i32 5
+%r162 = trunc i512 %r159 to i64
+store i64 %r162, i64* %r161
+%r163 = lshr i512 %r159, 64
+%r165 = getelementptr i64, i64* %r1, i32 6
+%r166 = trunc i512 %r163 to i64
+store i64 %r166, i64* %r165
+%r167 = lshr i512 %r163, 64
+%r169 = getelementptr i64, i64* %r1, i32 7
+%r170 = trunc i512 %r167 to i64
+store i64 %r170, i64* %r169
+ret void
+}
+define void @mcl_fp_montRed8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+{
+%r5 = getelementptr i64, i64* %r3, i32 -1
+%r6 = load i64, i64* %r5
+%r7 = load i64, i64* %r3
+%r8 = zext i64 %r7 to i128
+%r10 = getelementptr i64, i64* %r3, i32 1
+%r11 = load i64, i64* %r10
+%r12 = zext i64 %r11 to i128
+%r13 = shl i128 %r12, 64
+%r14 = or i128 %r8, %r13
+%r15 = zext i128 %r14 to i192
+%r17 = getelementptr i64, i64* %r3, i32 2
+%r18 = load i64, i64* %r17
+%r19 = zext i64 %r18 to i192
+%r20 = shl i192 %r19, 128
+%r21 = or i192 %r15, %r20
+%r22 = zext i192 %r21 to i256
+%r24 = getelementptr i64, i64* %r3, i32 3
+%r25 = load i64, i64* %r24
+%r26 = zext i64 %r25 to i256
+%r27 = shl i256 %r26, 192
+%r28 = or i256 %r22, %r27
+%r29 = zext i256 %r28 to i320
+%r31 = getelementptr i64, i64* %r3, i32 4
+%r32 = load i64, i64* %r31
+%r33 = zext i64 %r32 to i320
+%r34 = shl i320 %r33, 256
+%r35 = or i320 %r29, %r34
+%r36 = zext i320 %r35 to i384
+%r38 = getelementptr i64, i64* %r3, i32 5
+%r39 = load i64, i64* %r38
+%r40 = zext i64 %r39 to i384
+%r41 = shl i384 %r40, 320
+%r42 = or i384 %r36, %r41
+%r43 = zext i384 %r42 to i448
+%r45 = getelementptr i64, i64* %r3, i32 6
+%r46 = load i64, i64* %r45
+%r47 = zext i64 %r46 to i448
+%r48 = shl i448 %r47, 384
+%r49 = or i448 %r43, %r48
+%r50 = zext i448 %r49 to i512
+%r52 = getelementptr i64, i64* %r3, i32 7
+%r53 = load i64, i64* %r52
+%r54 = zext i64 %r53 to i512
+%r55 = shl i512 %r54, 448
+%r56 = or i512 %r50, %r55
+%r57 = load i64, i64* %r2
+%r58 = zext i64 %r57 to i128
+%r60 = getelementptr i64, i64* %r2, i32 1
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i128
+%r63 = shl i128 %r62, 64
+%r64 = or i128 %r58, %r63
+%r65 = zext i128 %r64 to i192
+%r67 = getelementptr i64, i64* %r2, i32 2
+%r68 = load i64, i64* %r67
+%r69 = zext i64 %r68 to i192
+%r70 = shl i192 %r69, 128
+%r71 = or i192 %r65, %r70
+%r72 = zext i192 %r71 to i256
+%r74 = getelementptr i64, i64* %r2, i32 3
+%r75 = load i64, i64* %r74
+%r76 = zext i64 %r75 to i256
+%r77 = shl i256 %r76, 192
+%r78 = or i256 %r72, %r77
+%r79 = zext i256 %r78 to i320
+%r81 = getelementptr i64, i64* %r2, i32 4
+%r82 = load i64, i64* %r81
+%r83 = zext i64 %r82 to i320
+%r84 = shl i320 %r83, 256
+%r85 = or i320 %r79, %r84
+%r86 = zext i320 %r85 to i384
+%r88 = getelementptr i64, i64* %r2, i32 5
+%r89 = load i64, i64* %r88
+%r90 = zext i64 %r89 to i384
+%r91 = shl i384 %r90, 320
+%r92 = or i384 %r86, %r91
+%r93 = zext i384 %r92 to i448
+%r95 = getelementptr i64, i64* %r2, i32 6
+%r96 = load i64, i64* %r95
+%r97 = zext i64 %r96 to i448
+%r98 = shl i448 %r97, 384
+%r99 = or i448 %r93, %r98
+%r100 = zext i448 %r99 to i512
+%r102 = getelementptr i64, i64* %r2, i32 7
+%r103 = load i64, i64* %r102
+%r104 = zext i64 %r103 to i512
+%r105 = shl i512 %r104, 448
+%r106 = or i512 %r100, %r105
+%r107 = trunc i512 %r106 to i64
+%r108 = mul i64 %r107, %r6
+%r109 = call i576 @mulPv512x64(i64* %r3, i64 %r108)
+%r111 = getelementptr i64, i64* %r2, i32 8
+%r112 = load i64, i64* %r111
+%r113 = zext i64 %r112 to i576
+%r114 = shl i576 %r113, 512
+%r115 = zext i512 %r106 to i576
+%r116 = or i576 %r114, %r115
+%r117 = zext i576 %r116 to i640
+%r118 = zext i576 %r109 to i640
+%r119 = add i640 %r117, %r118
+%r120 = lshr i640 %r119, 64
+%r121 = trunc i640 %r120 to i576
+%r122 = lshr i576 %r121, 512
+%r123 = trunc i576 %r122 to i64
+%r124 = trunc i576 %r121 to i512
+%r125 = trunc i512 %r124 to i64
+%r126 = mul i64 %r125, %r6
+%r127 = call i576 @mulPv512x64(i64* %r3, i64 %r126)
+%r128 = zext i64 %r123 to i576
+%r129 = shl i576 %r128, 512
+%r130 = add i576 %r127, %r129
+%r132 = getelementptr i64, i64* %r2, i32 9
+%r133 = load i64, i64* %r132
+%r134 = zext i64 %r133 to i576
+%r135 = shl i576 %r134, 512
+%r136 = zext i512 %r124 to i576
+%r137 = or i576 %r135, %r136
+%r138 = zext i576 %r137 to i640
+%r139 = zext i576 %r130 to i640
+%r140 = add i640 %r138, %r139
+%r141 = lshr i640 %r140, 64
+%r142 = trunc i640 %r141 to i576
+%r143 = lshr i576 %r142, 512
+%r144 = trunc i576 %r143 to i64
+%r145 = trunc i576 %r142 to i512
+%r146 = trunc i512 %r145 to i64
+%r147 = mul i64 %r146, %r6
+%r148 = call i576 @mulPv512x64(i64* %r3, i64 %r147)
+%r149 = zext i64 %r144 to i576
+%r150 = shl i576 %r149, 512
+%r151 = add i576 %r148, %r150
+%r153 = getelementptr i64, i64* %r2, i32 10
+%r154 = load i64, i64* %r153
+%r155 = zext i64 %r154 to i576
+%r156 = shl i576 %r155, 512
+%r157 = zext i512 %r145 to i576
+%r158 = or i576 %r156, %r157
+%r159 = zext i576 %r158 to i640
+%r160 = zext i576 %r151 to i640
+%r161 = add i640 %r159, %r160
+%r162 = lshr i640 %r161, 64
+%r163 = trunc i640 %r162 to i576
+%r164 = lshr i576 %r163, 512
 %r165 = trunc i576 %r164 to i64
-%r167 = getelementptr i64, i64* %r1, i32 2
-store i64 %r165, i64* %r167
-%r168 = lshr i576 %r164, 64
-%r169 = trunc i576 %r168 to i64
-%r171 = getelementptr i64, i64* %r1, i32 3
-store i64 %r169, i64* %r171
-%r172 = lshr i576 %r168, 64
-%r173 = trunc i576 %r172 to i64
-%r175 = getelementptr i64, i64* %r1, i32 4
-store i64 %r173, i64* %r175
-%r176 = lshr i576 %r172, 64
-%r177 = trunc i576 %r176 to i64
-%r179 = getelementptr i64, i64* %r1, i32 5
-store i64 %r177, i64* %r179
-%r180 = lshr i576 %r176, 64
-%r181 = trunc i576 %r180 to i64
-%r183 = getelementptr i64, i64* %r1, i32 6
-store i64 %r181, i64* %r183
-%r184 = lshr i576 %r180, 64
-%r185 = trunc i576 %r184 to i64
-%r187 = getelementptr i64, i64* %r1, i32 7
-store i64 %r185, i64* %r187
-%r188 = lshr i576 %r184, 64
-%r189 = trunc i576 %r188 to i64
-%r191 = getelementptr i64, i64* %r1, i32 8
-store i64 %r189, i64* %r191
-ret void
-}
-define void @mcl_fp_montRed9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
+%r166 = trunc i576 %r163 to i512
+%r167 = trunc i512 %r166 to i64
+%r168 = mul i64 %r167, %r6
+%r169 = call i576 @mulPv512x64(i64* %r3, i64 %r168)
+%r170 = zext i64 %r165 to i576
+%r171 = shl i576 %r170, 512
+%r172 = add i576 %r169, %r171
+%r174 = getelementptr i64, i64* %r2, i32 11
+%r175 = load i64, i64* %r174
+%r176 = zext i64 %r175 to i576
+%r177 = shl i576 %r176, 512
+%r178 = zext i512 %r166 to i576
+%r179 = or i576 %r177, %r178
+%r180 = zext i576 %r179 to i640
+%r181 = zext i576 %r172 to i640
+%r182 = add i640 %r180, %r181
+%r183 = lshr i640 %r182, 64
+%r184 = trunc i640 %r183 to i576
+%r185 = lshr i576 %r184, 512
+%r186 = trunc i576 %r185 to i64
+%r187 = trunc i576 %r184 to i512
+%r188 = trunc i512 %r187 to i64
+%r189 = mul i64 %r188, %r6
+%r190 = call i576 @mulPv512x64(i64* %r3, i64 %r189)
+%r191 = zext i64 %r186 to i576
+%r192 = shl i576 %r191, 512
+%r193 = add i576 %r190, %r192
+%r195 = getelementptr i64, i64* %r2, i32 12
+%r196 = load i64, i64* %r195
+%r197 = zext i64 %r196 to i576
+%r198 = shl i576 %r197, 512
+%r199 = zext i512 %r187 to i576
+%r200 = or i576 %r198, %r199
+%r201 = zext i576 %r200 to i640
+%r202 = zext i576 %r193 to i640
+%r203 = add i640 %r201, %r202
+%r204 = lshr i640 %r203, 64
+%r205 = trunc i640 %r204 to i576
+%r206 = lshr i576 %r205, 512
+%r207 = trunc i576 %r206 to i64
+%r208 = trunc i576 %r205 to i512
+%r209 = trunc i512 %r208 to i64
+%r210 = mul i64 %r209, %r6
+%r211 = call i576 @mulPv512x64(i64* %r3, i64 %r210)
+%r212 = zext i64 %r207 to i576
+%r213 = shl i576 %r212, 512
+%r214 = add i576 %r211, %r213
+%r216 = getelementptr i64, i64* %r2, i32 13
+%r217 = load i64, i64* %r216
+%r218 = zext i64 %r217 to i576
+%r219 = shl i576 %r218, 512
+%r220 = zext i512 %r208 to i576
+%r221 = or i576 %r219, %r220
+%r222 = zext i576 %r221 to i640
+%r223 = zext i576 %r214 to i640
+%r224 = add i640 %r222, %r223
+%r225 = lshr i640 %r224, 64
+%r226 = trunc i640 %r225 to i576
+%r227 = lshr i576 %r226, 512
+%r228 = trunc i576 %r227 to i64
+%r229 = trunc i576 %r226 to i512
+%r230 = trunc i512 %r229 to i64
+%r231 = mul i64 %r230, %r6
+%r232 = call i576 @mulPv512x64(i64* %r3, i64 %r231)
+%r233 = zext i64 %r228 to i576
+%r234 = shl i576 %r233, 512
+%r235 = add i576 %r232, %r234
+%r237 = getelementptr i64, i64* %r2, i32 14
+%r238 = load i64, i64* %r237
+%r239 = zext i64 %r238 to i576
+%r240 = shl i576 %r239, 512
+%r241 = zext i512 %r229 to i576
+%r242 = or i576 %r240, %r241
+%r243 = zext i576 %r242 to i640
+%r244 = zext i576 %r235 to i640
+%r245 = add i640 %r243, %r244
+%r246 = lshr i640 %r245, 64
+%r247 = trunc i640 %r246 to i576
+%r248 = lshr i576 %r247, 512
+%r249 = trunc i576 %r248 to i64
+%r250 = trunc i576 %r247 to i512
+%r251 = trunc i512 %r250 to i64
+%r252 = mul i64 %r251, %r6
+%r253 = call i576 @mulPv512x64(i64* %r3, i64 %r252)
+%r254 = zext i64 %r249 to i576
+%r255 = shl i576 %r254, 512
+%r256 = add i576 %r253, %r255
+%r258 = getelementptr i64, i64* %r2, i32 15
+%r259 = load i64, i64* %r258
+%r260 = zext i64 %r259 to i576
+%r261 = shl i576 %r260, 512
+%r262 = zext i512 %r250 to i576
+%r263 = or i576 %r261, %r262
+%r264 = zext i576 %r263 to i640
+%r265 = zext i576 %r256 to i640
+%r266 = add i640 %r264, %r265
+%r267 = lshr i640 %r266, 64
+%r268 = trunc i640 %r267 to i576
+%r269 = lshr i576 %r268, 512
+%r270 = trunc i576 %r269 to i64
+%r271 = trunc i576 %r268 to i512
+%r272 = zext i512 %r56 to i576
+%r273 = zext i512 %r271 to i576
+%r274 = sub i576 %r273, %r272
+%r275 = lshr i576 %r274, 512
+%r276 = trunc i576 %r275 to i1
+%r277 = select i1 %r276, i576 %r273, i576 %r274
+%r278 = trunc i576 %r277 to i512
+%r280 = getelementptr i64, i64* %r1, i32 0
+%r281 = trunc i512 %r278 to i64
+store i64 %r281, i64* %r280
+%r282 = lshr i512 %r278, 64
+%r284 = getelementptr i64, i64* %r1, i32 1
+%r285 = trunc i512 %r282 to i64
+store i64 %r285, i64* %r284
+%r286 = lshr i512 %r282, 64
+%r288 = getelementptr i64, i64* %r1, i32 2
+%r289 = trunc i512 %r286 to i64
+store i64 %r289, i64* %r288
+%r290 = lshr i512 %r286, 64
+%r292 = getelementptr i64, i64* %r1, i32 3
+%r293 = trunc i512 %r290 to i64
+store i64 %r293, i64* %r292
+%r294 = lshr i512 %r290, 64
+%r296 = getelementptr i64, i64* %r1, i32 4
+%r297 = trunc i512 %r294 to i64
+store i64 %r297, i64* %r296
+%r298 = lshr i512 %r294, 64
+%r300 = getelementptr i64, i64* %r1, i32 5
+%r301 = trunc i512 %r298 to i64
+store i64 %r301, i64* %r300
+%r302 = lshr i512 %r298, 64
+%r304 = getelementptr i64, i64* %r1, i32 6
+%r305 = trunc i512 %r302 to i64
+store i64 %r305, i64* %r304
+%r306 = lshr i512 %r302, 64
+%r308 = getelementptr i64, i64* %r1, i32 7
+%r309 = trunc i512 %r306 to i64
+store i64 %r309, i64* %r308
+ret void
+}
+define void @mcl_fp_montRedNF8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3)
 {
 %r5 = getelementptr i64, i64* %r3, i32 -1
 %r6 = load i64, i64* %r5
@@ -13259,223 +6022,244 @@ define void @mcl_fp_montRed9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r54 = zext i64 %r53 to i512
 %r55 = shl i512 %r54, 448
 %r56 = or i512 %r50, %r55
-%r57 = zext i512 %r56 to i576
-%r59 = getelementptr i64, i64* %r3, i32 8
-%r60 = load i64, i64* %r59
-%r61 = zext i64 %r60 to i576
-%r62 = shl i576 %r61, 512
-%r63 = or i576 %r57, %r62
-%r64 = load i64, i64* %r2
-%r65 = zext i64 %r64 to i128
-%r67 = getelementptr i64, i64* %r2, i32 1
+%r57 = load i64, i64* %r2
+%r58 = zext i64 %r57 to i128
+%r60 = getelementptr i64, i64* %r2, i32 1
+%r61 = load i64, i64* %r60
+%r62 = zext i64 %r61 to i128
+%r63 = shl i128 %r62, 64
+%r64 = or i128 %r58, %r63
+%r65 = zext i128 %r64 to i192
+%r67 = getelementptr i64, i64* %r2, i32 2
 %r68 = load i64, i64* %r67
-%r69 = zext i64 %r68 to i128
-%r70 = shl i128 %r69, 64
-%r71 = or i128 %r65, %r70
-%r72 = zext i128 %r71 to i192
-%r74 = getelementptr i64, i64* %r2, i32 2
+%r69 = zext i64 %r68 to i192
+%r70 = shl i192 %r69, 128
+%r71 = or i192 %r65, %r70
+%r72 = zext i192 %r71 to i256
+%r74 = getelementptr i64, i64* %r2, i32 3
 %r75 = load i64, i64* %r74
-%r76 = zext i64 %r75 to i192
-%r77 = shl i192 %r76, 128
-%r78 = or i192 %r72, %r77
-%r79 = zext i192 %r78 to i256
-%r81 = getelementptr i64, i64* %r2, i32 3
+%r76 = zext i64 %r75 to i256
+%r77 = shl i256 %r76, 192
+%r78 = or i256 %r72, %r77
+%r79 = zext i256 %r78 to i320
+%r81 = getelementptr i64, i64* %r2, i32 4
 %r82 = load i64, i64* %r81
-%r83 = zext i64 %r82 to i256
-%r84 = shl i256 %r83, 192
-%r85 = or i256 %r79, %r84
-%r86 = zext i256 %r85 to i320
-%r88 = getelementptr i64, i64* %r2, i32 4
+%r83 = zext i64 %r82 to i320
+%r84 = shl i320 %r83, 256
+%r85 = or i320 %r79, %r84
+%r86 = zext i320 %r85 to i384
+%r88 = getelementptr i64, i64* %r2, i32 5
 %r89 = load i64, i64* %r88
-%r90 = zext i64 %r89 to i320
-%r91 = shl i320 %r90, 256
-%r92 = or i320 %r86, %r91
-%r93 = zext i320 %r92 to i384
-%r95 = getelementptr i64, i64* %r2, i32 5
+%r90 = zext i64 %r89 to i384
+%r91 = shl i384 %r90, 320
+%r92 = or i384 %r86, %r91
+%r93 = zext i384 %r92 to i448
+%r95 = getelementptr i64, i64* %r2, i32 6
 %r96 = load i64, i64* %r95
-%r97 = zext i64 %r96 to i384
-%r98 = shl i384 %r97, 320
-%r99 = or i384 %r93, %r98
-%r100 = zext i384 %r99 to i448
-%r102 = getelementptr i64, i64* %r2, i32 6
+%r97 = zext i64 %r96 to i448
+%r98 = shl i448 %r97, 384
+%r99 = or i448 %r93, %r98
+%r100 = zext i448 %r99 to i512
+%r102 = getelementptr i64, i64* %r2, i32 7
 %r103 = load i64, i64* %r102
-%r104 = zext i64 %r103 to i448
-%r105 = shl i448 %r104, 384
-%r106 = or i448 %r100, %r105
-%r107 = zext i448 %r106 to i512
-%r109 = getelementptr i64, i64* %r2, i32 7
-%r110 = load i64, i64* %r109
-%r111 = zext i64 %r110 to i512
-%r112 = shl i512 %r111, 448
-%r113 = or i512 %r107, %r112
-%r114 = zext i512 %r113 to i576
-%r116 = getelementptr i64, i64* %r2, i32 8
-%r117 = load i64, i64* %r116
-%r118 = zext i64 %r117 to i576
-%r119 = shl i576 %r118, 512
-%r120 = or i576 %r114, %r119
-%r121 = zext i576 %r120 to i640
-%r123 = getelementptr i64, i64* %r2, i32 9
-%r124 = load i64, i64* %r123
-%r125 = zext i64 %r124 to i640
-%r126 = shl i640 %r125, 576
-%r127 = or i640 %r121, %r126
-%r128 = zext i640 %r127 to i704
-%r130 = getelementptr i64, i64* %r2, i32 10
-%r131 = load i64, i64* %r130
-%r132 = zext i64 %r131 to i704
-%r133 = shl i704 %r132, 640
-%r134 = or i704 %r128, %r133
-%r135 = zext i704 %r134 to i768
-%r137 = getelementptr i64, i64* %r2, i32 11
-%r138 = load i64, i64* %r137
-%r139 = zext i64 %r138 to i768
-%r140 = shl i768 %r139, 704
-%r141 = or i768 %r135, %r140
-%r142 = zext i768 %r141 to i832
-%r144 = getelementptr i64, i64* %r2, i32 12
-%r145 = load i64, i64* %r144
-%r146 = zext i64 %r145 to i832
-%r147 = shl i832 %r146, 768
-%r148 = or i832 %r142, %r147
-%r149 = zext i832 %r148 to i896
-%r151 = getelementptr i64, i64* %r2, i32 13
-%r152 = load i64, i64* %r151
-%r153 = zext i64 %r152 to i896
-%r154 = shl i896 %r153, 832
-%r155 = or i896 %r149, %r154
-%r156 = zext i896 %r155 to i960
-%r158 = getelementptr i64, i64* %r2, i32 14
-%r159 = load i64, i64* %r158
-%r160 = zext i64 %r159 to i960
-%r161 = shl i960 %r160, 896
-%r162 = or i960 %r156, %r161
-%r163 = zext i960 %r162 to i1024
-%r165 = getelementptr i64, i64* %r2, i32 15
-%r166 = load i64, i64* %r165
-%r167 = zext i64 %r166 to i1024
-%r168 = shl i1024 %r167, 960
-%r169 = or i1024 %r163, %r168
-%r170 = zext i1024 %r169 to i1088
-%r172 = getelementptr i64, i64* %r2, i32 16
-%r173 = load i64, i64* %r172
-%r174 = zext i64 %r173 to i1088
-%r175 = shl i1088 %r174, 1024
-%r176 = or i1088 %r170, %r175
-%r177 = zext i1088 %r176 to i1152
-%r179 = getelementptr i64, i64* %r2, i32 17
-%r180 = load i64, i64* %r179
-%r181 = zext i64 %r180 to i1152
-%r182 = shl i1152 %r181, 1088
-%r183 = or i1152 %r177, %r182
-%r184 = zext i1152 %r183 to i1216
-%r185 = trunc i1216 %r184 to i64
-%r186 = mul i64 %r185, %r6
-%r187 = call i640 @mulPv576x64(i64* %r3, i64 %r186)
-%r188 = zext i640 %r187 to i1216
-%r189 = add i1216 %r184, %r188
-%r190 = lshr i1216 %r189, 64
-%r191 = trunc i1216 %r190 to i1152
-%r192 = trunc i1152 %r191 to i64
-%r193 = mul i64 %r192, %r6
-%r194 = call i640 @mulPv576x64(i64* %r3, i64 %r193)
-%r195 = zext i640 %r194 to i1152
-%r196 = add i1152 %r191, %r195
-%r197 = lshr i1152 %r196, 64
-%r198 = trunc i1152 %r197 to i1088
-%r199 = trunc i1088 %r198 to i64
-%r200 = mul i64 %r199, %r6
-%r201 = call i640 @mulPv576x64(i64* %r3, i64 %r200)
-%r202 = zext i640 %r201 to i1088
-%r203 = add i1088 %r198, %r202
-%r204 = lshr i1088 %r203, 64
-%r205 = trunc i1088 %r204 to i1024
-%r206 = trunc i1024 %r205 to i64
-%r207 = mul i64 %r206, %r6
-%r208 = call i640 @mulPv576x64(i64* %r3, i64 %r207)
-%r209 = zext i640 %r208 to i1024
-%r210 = add i1024 %r205, %r209
-%r211 = lshr i1024 %r210, 64
-%r212 = trunc i1024 %r211 to i960
-%r213 = trunc i960 %r212 to i64
-%r214 = mul i64 %r213, %r6
-%r215 = call i640 @mulPv576x64(i64* %r3, i64 %r214)
-%r216 = zext i640 %r215 to i960
-%r217 = add i960 %r212, %r216
-%r218 = lshr i960 %r217, 64
-%r219 = trunc i960 %r218 to i896
-%r220 = trunc i896 %r219 to i64
-%r221 = mul i64 %r220, %r6
-%r222 = call i640 @mulPv576x64(i64* %r3, i64 %r221)
-%r223 = zext i640 %r222 to i896
-%r224 = add i896 %r219, %r223
-%r225 = lshr i896 %r224, 64
-%r226 = trunc i896 %r225 to i832
-%r227 = trunc i832 %r226 to i64
-%r228 = mul i64 %r227, %r6
-%r229 = call i640 @mulPv576x64(i64* %r3, i64 %r228)
-%r230 = zext i640 %r229 to i832
-%r231 = add i832 %r226, %r230
-%r232 = lshr i832 %r231, 64
-%r233 = trunc i832 %r232 to i768
-%r234 = trunc i768 %r233 to i64
-%r235 = mul i64 %r234, %r6
-%r236 = call i640 @mulPv576x64(i64* %r3, i64 %r235)
-%r237 = zext i640 %r236 to i768
-%r238 = add i768 %r233, %r237
-%r239 = lshr i768 %r238, 64
-%r240 = trunc i768 %r239 to i704
-%r241 = trunc i704 %r240 to i64
-%r242 = mul i64 %r241, %r6
-%r243 = call i640 @mulPv576x64(i64* %r3, i64 %r242)
-%r244 = zext i640 %r243 to i704
-%r245 = add i704 %r240, %r244
-%r246 = lshr i704 %r245, 64
-%r247 = trunc i704 %r246 to i640
-%r248 = zext i576 %r63 to i640
-%r249 = sub i640 %r247, %r248
-%r250 = lshr i640 %r249, 576
-%r251 = trunc i640 %r250 to i1
-%r252 = select i1 %r251, i640 %r247, i640 %r249
-%r253 = trunc i640 %r252 to i576
-%r254 = trunc i576 %r253 to i64
-%r256 = getelementptr i64, i64* %r1, i32 0
-store i64 %r254, i64* %r256
-%r257 = lshr i576 %r253, 64
-%r258 = trunc i576 %r257 to i64
-%r260 = getelementptr i64, i64* %r1, i32 1
-store i64 %r258, i64* %r260
-%r261 = lshr i576 %r257, 64
-%r262 = trunc i576 %r261 to i64
-%r264 = getelementptr i64, i64* %r1, i32 2
-store i64 %r262, i64* %r264
-%r265 = lshr i576 %r261, 64
-%r266 = trunc i576 %r265 to i64
-%r268 = getelementptr i64, i64* %r1, i32 3
-store i64 %r266, i64* %r268
-%r269 = lshr i576 %r265, 64
+%r104 = zext i64 %r103 to i512
+%r105 = shl i512 %r104, 448
+%r106 = or i512 %r100, %r105
+%r107 = trunc i512 %r106 to i64
+%r108 = mul i64 %r107, %r6
+%r109 = call i576 @mulPv512x64(i64* %r3, i64 %r108)
+%r111 = getelementptr i64, i64* %r2, i32 8
+%r112 = load i64, i64* %r111
+%r113 = zext i64 %r112 to i576
+%r114 = shl i576 %r113, 512
+%r115 = zext i512 %r106 to i576
+%r116 = or i576 %r114, %r115
+%r117 = zext i576 %r116 to i640
+%r118 = zext i576 %r109 to i640
+%r119 = add i640 %r117, %r118
+%r120 = lshr i640 %r119, 64
+%r121 = trunc i640 %r120 to i576
+%r122 = lshr i576 %r121, 512
+%r123 = trunc i576 %r122 to i64
+%r124 = trunc i576 %r121 to i512
+%r125 = trunc i512 %r124 to i64
+%r126 = mul i64 %r125, %r6
+%r127 = call i576 @mulPv512x64(i64* %r3, i64 %r126)
+%r128 = zext i64 %r123 to i576
+%r129 = shl i576 %r128, 512
+%r130 = add i576 %r127, %r129
+%r132 = getelementptr i64, i64* %r2, i32 9
+%r133 = load i64, i64* %r132
+%r134 = zext i64 %r133 to i576
+%r135 = shl i576 %r134, 512
+%r136 = zext i512 %r124 to i576
+%r137 = or i576 %r135, %r136
+%r138 = zext i576 %r137 to i640
+%r139 = zext i576 %r130 to i640
+%r140 = add i640 %r138, %r139
+%r141 = lshr i640 %r140, 64
+%r142 = trunc i640 %r141 to i576
+%r143 = lshr i576 %r142, 512
+%r144 = trunc i576 %r143 to i64
+%r145 = trunc i576 %r142 to i512
+%r146 = trunc i512 %r145 to i64
+%r147 = mul i64 %r146, %r6
+%r148 = call i576 @mulPv512x64(i64* %r3, i64 %r147)
+%r149 = zext i64 %r144 to i576
+%r150 = shl i576 %r149, 512
+%r151 = add i576 %r148, %r150
+%r153 = getelementptr i64, i64* %r2, i32 10
+%r154 = load i64, i64* %r153
+%r155 = zext i64 %r154 to i576
+%r156 = shl i576 %r155, 512
+%r157 = zext i512 %r145 to i576
+%r158 = or i576 %r156, %r157
+%r159 = zext i576 %r158 to i640
+%r160 = zext i576 %r151 to i640
+%r161 = add i640 %r159, %r160
+%r162 = lshr i640 %r161, 64
+%r163 = trunc i640 %r162 to i576
+%r164 = lshr i576 %r163, 512
+%r165 = trunc i576 %r164 to i64
+%r166 = trunc i576 %r163 to i512
+%r167 = trunc i512 %r166 to i64
+%r168 = mul i64 %r167, %r6
+%r169 = call i576 @mulPv512x64(i64* %r3, i64 %r168)
+%r170 = zext i64 %r165 to i576
+%r171 = shl i576 %r170, 512
+%r172 = add i576 %r169, %r171
+%r174 = getelementptr i64, i64* %r2, i32 11
+%r175 = load i64, i64* %r174
+%r176 = zext i64 %r175 to i576
+%r177 = shl i576 %r176, 512
+%r178 = zext i512 %r166 to i576
+%r179 = or i576 %r177, %r178
+%r180 = zext i576 %r179 to i640
+%r181 = zext i576 %r172 to i640
+%r182 = add i640 %r180, %r181
+%r183 = lshr i640 %r182, 64
+%r184 = trunc i640 %r183 to i576
+%r185 = lshr i576 %r184, 512
+%r186 = trunc i576 %r185 to i64
+%r187 = trunc i576 %r184 to i512
+%r188 = trunc i512 %r187 to i64
+%r189 = mul i64 %r188, %r6
+%r190 = call i576 @mulPv512x64(i64* %r3, i64 %r189)
+%r191 = zext i64 %r186 to i576
+%r192 = shl i576 %r191, 512
+%r193 = add i576 %r190, %r192
+%r195 = getelementptr i64, i64* %r2, i32 12
+%r196 = load i64, i64* %r195
+%r197 = zext i64 %r196 to i576
+%r198 = shl i576 %r197, 512
+%r199 = zext i512 %r187 to i576
+%r200 = or i576 %r198, %r199
+%r201 = zext i576 %r200 to i640
+%r202 = zext i576 %r193 to i640
+%r203 = add i640 %r201, %r202
+%r204 = lshr i640 %r203, 64
+%r205 = trunc i640 %r204 to i576
+%r206 = lshr i576 %r205, 512
+%r207 = trunc i576 %r206 to i64
+%r208 = trunc i576 %r205 to i512
+%r209 = trunc i512 %r208 to i64
+%r210 = mul i64 %r209, %r6
+%r211 = call i576 @mulPv512x64(i64* %r3, i64 %r210)
+%r212 = zext i64 %r207 to i576
+%r213 = shl i576 %r212, 512
+%r214 = add i576 %r211, %r213
+%r216 = getelementptr i64, i64* %r2, i32 13
+%r217 = load i64, i64* %r216
+%r218 = zext i64 %r217 to i576
+%r219 = shl i576 %r218, 512
+%r220 = zext i512 %r208 to i576
+%r221 = or i576 %r219, %r220
+%r222 = zext i576 %r221 to i640
+%r223 = zext i576 %r214 to i640
+%r224 = add i640 %r222, %r223
+%r225 = lshr i640 %r224, 64
+%r226 = trunc i640 %r225 to i576
+%r227 = lshr i576 %r226, 512
+%r228 = trunc i576 %r227 to i64
+%r229 = trunc i576 %r226 to i512
+%r230 = trunc i512 %r229 to i64
+%r231 = mul i64 %r230, %r6
+%r232 = call i576 @mulPv512x64(i64* %r3, i64 %r231)
+%r233 = zext i64 %r228 to i576
+%r234 = shl i576 %r233, 512
+%r235 = add i576 %r232, %r234
+%r237 = getelementptr i64, i64* %r2, i32 14
+%r238 = load i64, i64* %r237
+%r239 = zext i64 %r238 to i576
+%r240 = shl i576 %r239, 512
+%r241 = zext i512 %r229 to i576
+%r242 = or i576 %r240, %r241
+%r243 = zext i576 %r242 to i640
+%r244 = zext i576 %r235 to i640
+%r245 = add i640 %r243, %r244
+%r246 = lshr i640 %r245, 64
+%r247 = trunc i640 %r246 to i576
+%r248 = lshr i576 %r247, 512
+%r249 = trunc i576 %r248 to i64
+%r250 = trunc i576 %r247 to i512
+%r251 = trunc i512 %r250 to i64
+%r252 = mul i64 %r251, %r6
+%r253 = call i576 @mulPv512x64(i64* %r3, i64 %r252)
+%r254 = zext i64 %r249 to i576
+%r255 = shl i576 %r254, 512
+%r256 = add i576 %r253, %r255
+%r258 = getelementptr i64, i64* %r2, i32 15
+%r259 = load i64, i64* %r258
+%r260 = zext i64 %r259 to i576
+%r261 = shl i576 %r260, 512
+%r262 = zext i512 %r250 to i576
+%r263 = or i576 %r261, %r262
+%r264 = zext i576 %r263 to i640
+%r265 = zext i576 %r256 to i640
+%r266 = add i640 %r264, %r265
+%r267 = lshr i640 %r266, 64
+%r268 = trunc i640 %r267 to i576
+%r269 = lshr i576 %r268, 512
 %r270 = trunc i576 %r269 to i64
-%r272 = getelementptr i64, i64* %r1, i32 4
-store i64 %r270, i64* %r272
-%r273 = lshr i576 %r269, 64
-%r274 = trunc i576 %r273 to i64
-%r276 = getelementptr i64, i64* %r1, i32 5
-store i64 %r274, i64* %r276
-%r277 = lshr i576 %r273, 64
-%r278 = trunc i576 %r277 to i64
-%r280 = getelementptr i64, i64* %r1, i32 6
-store i64 %r278, i64* %r280
-%r281 = lshr i576 %r277, 64
-%r282 = trunc i576 %r281 to i64
-%r284 = getelementptr i64, i64* %r1, i32 7
-store i64 %r282, i64* %r284
-%r285 = lshr i576 %r281, 64
-%r286 = trunc i576 %r285 to i64
-%r288 = getelementptr i64, i64* %r1, i32 8
-store i64 %r286, i64* %r288
+%r271 = trunc i576 %r268 to i512
+%r272 = sub i512 %r271, %r56
+%r273 = lshr i512 %r272, 511
+%r274 = trunc i512 %r273 to i1
+%r275 = select i1 %r274, i512 %r271, i512 %r272
+%r277 = getelementptr i64, i64* %r1, i32 0
+%r278 = trunc i512 %r275 to i64
+store i64 %r278, i64* %r277
+%r279 = lshr i512 %r275, 64
+%r281 = getelementptr i64, i64* %r1, i32 1
+%r282 = trunc i512 %r279 to i64
+store i64 %r282, i64* %r281
+%r283 = lshr i512 %r279, 64
+%r285 = getelementptr i64, i64* %r1, i32 2
+%r286 = trunc i512 %r283 to i64
+store i64 %r286, i64* %r285
+%r287 = lshr i512 %r283, 64
+%r289 = getelementptr i64, i64* %r1, i32 3
+%r290 = trunc i512 %r287 to i64
+store i64 %r290, i64* %r289
+%r291 = lshr i512 %r287, 64
+%r293 = getelementptr i64, i64* %r1, i32 4
+%r294 = trunc i512 %r291 to i64
+store i64 %r294, i64* %r293
+%r295 = lshr i512 %r291, 64
+%r297 = getelementptr i64, i64* %r1, i32 5
+%r298 = trunc i512 %r295 to i64
+store i64 %r298, i64* %r297
+%r299 = lshr i512 %r295, 64
+%r301 = getelementptr i64, i64* %r1, i32 6
+%r302 = trunc i512 %r299 to i64
+store i64 %r302, i64* %r301
+%r303 = lshr i512 %r299, 64
+%r305 = getelementptr i64, i64* %r1, i32 7
+%r306 = trunc i512 %r303 to i64
+store i64 %r306, i64* %r305
 ret void
 }
-define i64 @mcl_fp_addPre9L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define i64 @mcl_fp_addPre8L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r3
 %r6 = zext i64 %r5 to i128
@@ -13521,104 +6305,88 @@ define i64 @mcl_fp_addPre9L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias
 %r53 = shl i512 %r52, 448
 %r54 = or i512 %r48, %r53
 %r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r3, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = zext i576 %r61 to i640
-%r63 = load i64, i64* %r4
-%r64 = zext i64 %r63 to i128
-%r66 = getelementptr i64, i64* %r4, i32 1
+%r56 = load i64, i64* %r4
+%r57 = zext i64 %r56 to i128
+%r59 = getelementptr i64, i64* %r4, i32 1
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i128
+%r62 = shl i128 %r61, 64
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i192
+%r66 = getelementptr i64, i64* %r4, i32 2
 %r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i128
-%r69 = shl i128 %r68, 64
-%r70 = or i128 %r64, %r69
-%r71 = zext i128 %r70 to i192
-%r73 = getelementptr i64, i64* %r4, i32 2
+%r68 = zext i64 %r67 to i192
+%r69 = shl i192 %r68, 128
+%r70 = or i192 %r64, %r69
+%r71 = zext i192 %r70 to i256
+%r73 = getelementptr i64, i64* %r4, i32 3
 %r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i192
-%r76 = shl i192 %r75, 128
-%r77 = or i192 %r71, %r76
-%r78 = zext i192 %r77 to i256
-%r80 = getelementptr i64, i64* %r4, i32 3
+%r75 = zext i64 %r74 to i256
+%r76 = shl i256 %r75, 192
+%r77 = or i256 %r71, %r76
+%r78 = zext i256 %r77 to i320
+%r80 = getelementptr i64, i64* %r4, i32 4
 %r81 = load i64, i64* %r80
-%r82 = zext i64 %r81 to i256
-%r83 = shl i256 %r82, 192
-%r84 = or i256 %r78, %r83
-%r85 = zext i256 %r84 to i320
-%r87 = getelementptr i64, i64* %r4, i32 4
+%r82 = zext i64 %r81 to i320
+%r83 = shl i320 %r82, 256
+%r84 = or i320 %r78, %r83
+%r85 = zext i320 %r84 to i384
+%r87 = getelementptr i64, i64* %r4, i32 5
 %r88 = load i64, i64* %r87
-%r89 = zext i64 %r88 to i320
-%r90 = shl i320 %r89, 256
-%r91 = or i320 %r85, %r90
-%r92 = zext i320 %r91 to i384
-%r94 = getelementptr i64, i64* %r4, i32 5
+%r89 = zext i64 %r88 to i384
+%r90 = shl i384 %r89, 320
+%r91 = or i384 %r85, %r90
+%r92 = zext i384 %r91 to i448
+%r94 = getelementptr i64, i64* %r4, i32 6
 %r95 = load i64, i64* %r94
-%r96 = zext i64 %r95 to i384
-%r97 = shl i384 %r96, 320
-%r98 = or i384 %r92, %r97
-%r99 = zext i384 %r98 to i448
-%r101 = getelementptr i64, i64* %r4, i32 6
+%r96 = zext i64 %r95 to i448
+%r97 = shl i448 %r96, 384
+%r98 = or i448 %r92, %r97
+%r99 = zext i448 %r98 to i512
+%r101 = getelementptr i64, i64* %r4, i32 7
 %r102 = load i64, i64* %r101
-%r103 = zext i64 %r102 to i448
-%r104 = shl i448 %r103, 384
-%r105 = or i448 %r99, %r104
-%r106 = zext i448 %r105 to i512
-%r108 = getelementptr i64, i64* %r4, i32 7
-%r109 = load i64, i64* %r108
-%r110 = zext i64 %r109 to i512
-%r111 = shl i512 %r110, 448
-%r112 = or i512 %r106, %r111
-%r113 = zext i512 %r112 to i576
-%r115 = getelementptr i64, i64* %r4, i32 8
-%r116 = load i64, i64* %r115
-%r117 = zext i64 %r116 to i576
-%r118 = shl i576 %r117, 512
-%r119 = or i576 %r113, %r118
-%r120 = zext i576 %r119 to i640
-%r121 = add i640 %r62, %r120
-%r122 = trunc i640 %r121 to i576
-%r123 = trunc i576 %r122 to i64
-%r125 = getelementptr i64, i64* %r2, i32 0
-store i64 %r123, i64* %r125
-%r126 = lshr i576 %r122, 64
-%r127 = trunc i576 %r126 to i64
-%r129 = getelementptr i64, i64* %r2, i32 1
-store i64 %r127, i64* %r129
-%r130 = lshr i576 %r126, 64
-%r131 = trunc i576 %r130 to i64
-%r133 = getelementptr i64, i64* %r2, i32 2
-store i64 %r131, i64* %r133
-%r134 = lshr i576 %r130, 64
-%r135 = trunc i576 %r134 to i64
-%r137 = getelementptr i64, i64* %r2, i32 3
-store i64 %r135, i64* %r137
-%r138 = lshr i576 %r134, 64
-%r139 = trunc i576 %r138 to i64
-%r141 = getelementptr i64, i64* %r2, i32 4
-store i64 %r139, i64* %r141
-%r142 = lshr i576 %r138, 64
-%r143 = trunc i576 %r142 to i64
-%r145 = getelementptr i64, i64* %r2, i32 5
-store i64 %r143, i64* %r145
-%r146 = lshr i576 %r142, 64
-%r147 = trunc i576 %r146 to i64
-%r149 = getelementptr i64, i64* %r2, i32 6
-store i64 %r147, i64* %r149
-%r150 = lshr i576 %r146, 64
-%r151 = trunc i576 %r150 to i64
-%r153 = getelementptr i64, i64* %r2, i32 7
-store i64 %r151, i64* %r153
-%r154 = lshr i576 %r150, 64
-%r155 = trunc i576 %r154 to i64
-%r157 = getelementptr i64, i64* %r2, i32 8
-store i64 %r155, i64* %r157
-%r158 = lshr i640 %r121, 576
-%r159 = trunc i640 %r158 to i64
-ret i64 %r159
+%r103 = zext i64 %r102 to i512
+%r104 = shl i512 %r103, 448
+%r105 = or i512 %r99, %r104
+%r106 = zext i512 %r105 to i576
+%r107 = add i576 %r55, %r106
+%r108 = trunc i576 %r107 to i512
+%r110 = getelementptr i64, i64* %r2, i32 0
+%r111 = trunc i512 %r108 to i64
+store i64 %r111, i64* %r110
+%r112 = lshr i512 %r108, 64
+%r114 = getelementptr i64, i64* %r2, i32 1
+%r115 = trunc i512 %r112 to i64
+store i64 %r115, i64* %r114
+%r116 = lshr i512 %r112, 64
+%r118 = getelementptr i64, i64* %r2, i32 2
+%r119 = trunc i512 %r116 to i64
+store i64 %r119, i64* %r118
+%r120 = lshr i512 %r116, 64
+%r122 = getelementptr i64, i64* %r2, i32 3
+%r123 = trunc i512 %r120 to i64
+store i64 %r123, i64* %r122
+%r124 = lshr i512 %r120, 64
+%r126 = getelementptr i64, i64* %r2, i32 4
+%r127 = trunc i512 %r124 to i64
+store i64 %r127, i64* %r126
+%r128 = lshr i512 %r124, 64
+%r130 = getelementptr i64, i64* %r2, i32 5
+%r131 = trunc i512 %r128 to i64
+store i64 %r131, i64* %r130
+%r132 = lshr i512 %r128, 64
+%r134 = getelementptr i64, i64* %r2, i32 6
+%r135 = trunc i512 %r132 to i64
+store i64 %r135, i64* %r134
+%r136 = lshr i512 %r132, 64
+%r138 = getelementptr i64, i64* %r2, i32 7
+%r139 = trunc i512 %r136 to i64
+store i64 %r139, i64* %r138
+%r140 = lshr i576 %r107, 512
+%r141 = trunc i576 %r140 to i64
+ret i64 %r141
 }
-define i64 @mcl_fp_subPre9L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define i64 @mcl_fp_subPre8L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r3
 %r6 = zext i64 %r5 to i128
@@ -13664,105 +6432,89 @@ define i64 @mcl_fp_subPre9L(i64* noalias  %r2, i64* noalias  %r3, i64* noalias
 %r53 = shl i512 %r52, 448
 %r54 = or i512 %r48, %r53
 %r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r3, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = zext i576 %r61 to i640
-%r63 = load i64, i64* %r4
-%r64 = zext i64 %r63 to i128
-%r66 = getelementptr i64, i64* %r4, i32 1
+%r56 = load i64, i64* %r4
+%r57 = zext i64 %r56 to i128
+%r59 = getelementptr i64, i64* %r4, i32 1
+%r60 = load i64, i64* %r59
+%r61 = zext i64 %r60 to i128
+%r62 = shl i128 %r61, 64
+%r63 = or i128 %r57, %r62
+%r64 = zext i128 %r63 to i192
+%r66 = getelementptr i64, i64* %r4, i32 2
 %r67 = load i64, i64* %r66
-%r68 = zext i64 %r67 to i128
-%r69 = shl i128 %r68, 64
-%r70 = or i128 %r64, %r69
-%r71 = zext i128 %r70 to i192
-%r73 = getelementptr i64, i64* %r4, i32 2
+%r68 = zext i64 %r67 to i192
+%r69 = shl i192 %r68, 128
+%r70 = or i192 %r64, %r69
+%r71 = zext i192 %r70 to i256
+%r73 = getelementptr i64, i64* %r4, i32 3
 %r74 = load i64, i64* %r73
-%r75 = zext i64 %r74 to i192
-%r76 = shl i192 %r75, 128
-%r77 = or i192 %r71, %r76
-%r78 = zext i192 %r77 to i256
-%r80 = getelementptr i64, i64* %r4, i32 3
+%r75 = zext i64 %r74 to i256
+%r76 = shl i256 %r75, 192
+%r77 = or i256 %r71, %r76
+%r78 = zext i256 %r77 to i320
+%r80 = getelementptr i64, i64* %r4, i32 4
 %r81 = load i64, i64* %r80
-%r82 = zext i64 %r81 to i256
-%r83 = shl i256 %r82, 192
-%r84 = or i256 %r78, %r83
-%r85 = zext i256 %r84 to i320
-%r87 = getelementptr i64, i64* %r4, i32 4
+%r82 = zext i64 %r81 to i320
+%r83 = shl i320 %r82, 256
+%r84 = or i320 %r78, %r83
+%r85 = zext i320 %r84 to i384
+%r87 = getelementptr i64, i64* %r4, i32 5
 %r88 = load i64, i64* %r87
-%r89 = zext i64 %r88 to i320
-%r90 = shl i320 %r89, 256
-%r91 = or i320 %r85, %r90
-%r92 = zext i320 %r91 to i384
-%r94 = getelementptr i64, i64* %r4, i32 5
+%r89 = zext i64 %r88 to i384
+%r90 = shl i384 %r89, 320
+%r91 = or i384 %r85, %r90
+%r92 = zext i384 %r91 to i448
+%r94 = getelementptr i64, i64* %r4, i32 6
 %r95 = load i64, i64* %r94
-%r96 = zext i64 %r95 to i384
-%r97 = shl i384 %r96, 320
-%r98 = or i384 %r92, %r97
-%r99 = zext i384 %r98 to i448
-%r101 = getelementptr i64, i64* %r4, i32 6
+%r96 = zext i64 %r95 to i448
+%r97 = shl i448 %r96, 384
+%r98 = or i448 %r92, %r97
+%r99 = zext i448 %r98 to i512
+%r101 = getelementptr i64, i64* %r4, i32 7
 %r102 = load i64, i64* %r101
-%r103 = zext i64 %r102 to i448
-%r104 = shl i448 %r103, 384
-%r105 = or i448 %r99, %r104
-%r106 = zext i448 %r105 to i512
-%r108 = getelementptr i64, i64* %r4, i32 7
-%r109 = load i64, i64* %r108
-%r110 = zext i64 %r109 to i512
-%r111 = shl i512 %r110, 448
-%r112 = or i512 %r106, %r111
-%r113 = zext i512 %r112 to i576
-%r115 = getelementptr i64, i64* %r4, i32 8
-%r116 = load i64, i64* %r115
-%r117 = zext i64 %r116 to i576
-%r118 = shl i576 %r117, 512
-%r119 = or i576 %r113, %r118
-%r120 = zext i576 %r119 to i640
-%r121 = sub i640 %r62, %r120
-%r122 = trunc i640 %r121 to i576
-%r123 = trunc i576 %r122 to i64
-%r125 = getelementptr i64, i64* %r2, i32 0
-store i64 %r123, i64* %r125
-%r126 = lshr i576 %r122, 64
-%r127 = trunc i576 %r126 to i64
-%r129 = getelementptr i64, i64* %r2, i32 1
-store i64 %r127, i64* %r129
-%r130 = lshr i576 %r126, 64
-%r131 = trunc i576 %r130 to i64
-%r133 = getelementptr i64, i64* %r2, i32 2
-store i64 %r131, i64* %r133
-%r134 = lshr i576 %r130, 64
-%r135 = trunc i576 %r134 to i64
-%r137 = getelementptr i64, i64* %r2, i32 3
-store i64 %r135, i64* %r137
-%r138 = lshr i576 %r134, 64
-%r139 = trunc i576 %r138 to i64
-%r141 = getelementptr i64, i64* %r2, i32 4
-store i64 %r139, i64* %r141
-%r142 = lshr i576 %r138, 64
-%r143 = trunc i576 %r142 to i64
-%r145 = getelementptr i64, i64* %r2, i32 5
-store i64 %r143, i64* %r145
-%r146 = lshr i576 %r142, 64
-%r147 = trunc i576 %r146 to i64
-%r149 = getelementptr i64, i64* %r2, i32 6
-store i64 %r147, i64* %r149
-%r150 = lshr i576 %r146, 64
-%r151 = trunc i576 %r150 to i64
-%r153 = getelementptr i64, i64* %r2, i32 7
-store i64 %r151, i64* %r153
-%r154 = lshr i576 %r150, 64
-%r155 = trunc i576 %r154 to i64
-%r157 = getelementptr i64, i64* %r2, i32 8
-store i64 %r155, i64* %r157
-%r158 = lshr i640 %r121, 576
-%r159 = trunc i640 %r158 to i64
-%r161 = and i64 %r159, 1
-ret i64 %r161
+%r103 = zext i64 %r102 to i512
+%r104 = shl i512 %r103, 448
+%r105 = or i512 %r99, %r104
+%r106 = zext i512 %r105 to i576
+%r107 = sub i576 %r55, %r106
+%r108 = trunc i576 %r107 to i512
+%r110 = getelementptr i64, i64* %r2, i32 0
+%r111 = trunc i512 %r108 to i64
+store i64 %r111, i64* %r110
+%r112 = lshr i512 %r108, 64
+%r114 = getelementptr i64, i64* %r2, i32 1
+%r115 = trunc i512 %r112 to i64
+store i64 %r115, i64* %r114
+%r116 = lshr i512 %r112, 64
+%r118 = getelementptr i64, i64* %r2, i32 2
+%r119 = trunc i512 %r116 to i64
+store i64 %r119, i64* %r118
+%r120 = lshr i512 %r116, 64
+%r122 = getelementptr i64, i64* %r2, i32 3
+%r123 = trunc i512 %r120 to i64
+store i64 %r123, i64* %r122
+%r124 = lshr i512 %r120, 64
+%r126 = getelementptr i64, i64* %r2, i32 4
+%r127 = trunc i512 %r124 to i64
+store i64 %r127, i64* %r126
+%r128 = lshr i512 %r124, 64
+%r130 = getelementptr i64, i64* %r2, i32 5
+%r131 = trunc i512 %r128 to i64
+store i64 %r131, i64* %r130
+%r132 = lshr i512 %r128, 64
+%r134 = getelementptr i64, i64* %r2, i32 6
+%r135 = trunc i512 %r132 to i64
+store i64 %r135, i64* %r134
+%r136 = lshr i512 %r132, 64
+%r138 = getelementptr i64, i64* %r2, i32 7
+%r139 = trunc i512 %r136 to i64
+store i64 %r139, i64* %r138
+%r141 = lshr i576 %r107, 512
+%r142 = trunc i576 %r141 to i64
+%r143 = and i64 %r142, 1
+ret i64 %r143
 }
-define void @mcl_fp_shr1_9L(i64* noalias  %r1, i64* noalias  %r2)
+define void @mcl_fp_shr1_8L(i64* noalias  %r1, i64* noalias  %r2)
 {
 %r3 = load i64, i64* %r2
 %r4 = zext i64 %r3 to i128
@@ -13807,51 +6559,41 @@ define void @mcl_fp_shr1_9L(i64* noalias  %r1, i64* noalias  %r2)
 %r50 = zext i64 %r49 to i512
 %r51 = shl i512 %r50, 448
 %r52 = or i512 %r46, %r51
-%r53 = zext i512 %r52 to i576
-%r55 = getelementptr i64, i64* %r2, i32 8
-%r56 = load i64, i64* %r55
-%r57 = zext i64 %r56 to i576
-%r58 = shl i576 %r57, 512
-%r59 = or i576 %r53, %r58
-%r60 = lshr i576 %r59, 1
-%r61 = trunc i576 %r60 to i64
-%r63 = getelementptr i64, i64* %r1, i32 0
-store i64 %r61, i64* %r63
-%r64 = lshr i576 %r60, 64
-%r65 = trunc i576 %r64 to i64
-%r67 = getelementptr i64, i64* %r1, i32 1
-store i64 %r65, i64* %r67
-%r68 = lshr i576 %r64, 64
-%r69 = trunc i576 %r68 to i64
-%r71 = getelementptr i64, i64* %r1, i32 2
-store i64 %r69, i64* %r71
-%r72 = lshr i576 %r68, 64
-%r73 = trunc i576 %r72 to i64
-%r75 = getelementptr i64, i64* %r1, i32 3
-store i64 %r73, i64* %r75
-%r76 = lshr i576 %r72, 64
-%r77 = trunc i576 %r76 to i64
-%r79 = getelementptr i64, i64* %r1, i32 4
-store i64 %r77, i64* %r79
-%r80 = lshr i576 %r76, 64
-%r81 = trunc i576 %r80 to i64
-%r83 = getelementptr i64, i64* %r1, i32 5
-store i64 %r81, i64* %r83
-%r84 = lshr i576 %r80, 64
-%r85 = trunc i576 %r84 to i64
-%r87 = getelementptr i64, i64* %r1, i32 6
-store i64 %r85, i64* %r87
-%r88 = lshr i576 %r84, 64
-%r89 = trunc i576 %r88 to i64
-%r91 = getelementptr i64, i64* %r1, i32 7
-store i64 %r89, i64* %r91
-%r92 = lshr i576 %r88, 64
-%r93 = trunc i576 %r92 to i64
-%r95 = getelementptr i64, i64* %r1, i32 8
-store i64 %r93, i64* %r95
+%r53 = lshr i512 %r52, 1
+%r55 = getelementptr i64, i64* %r1, i32 0
+%r56 = trunc i512 %r53 to i64
+store i64 %r56, i64* %r55
+%r57 = lshr i512 %r53, 64
+%r59 = getelementptr i64, i64* %r1, i32 1
+%r60 = trunc i512 %r57 to i64
+store i64 %r60, i64* %r59
+%r61 = lshr i512 %r57, 64
+%r63 = getelementptr i64, i64* %r1, i32 2
+%r64 = trunc i512 %r61 to i64
+store i64 %r64, i64* %r63
+%r65 = lshr i512 %r61, 64
+%r67 = getelementptr i64, i64* %r1, i32 3
+%r68 = trunc i512 %r65 to i64
+store i64 %r68, i64* %r67
+%r69 = lshr i512 %r65, 64
+%r71 = getelementptr i64, i64* %r1, i32 4
+%r72 = trunc i512 %r69 to i64
+store i64 %r72, i64* %r71
+%r73 = lshr i512 %r69, 64
+%r75 = getelementptr i64, i64* %r1, i32 5
+%r76 = trunc i512 %r73 to i64
+store i64 %r76, i64* %r75
+%r77 = lshr i512 %r73, 64
+%r79 = getelementptr i64, i64* %r1, i32 6
+%r80 = trunc i512 %r77 to i64
+store i64 %r80, i64* %r79
+%r81 = lshr i512 %r77, 64
+%r83 = getelementptr i64, i64* %r1, i32 7
+%r84 = trunc i512 %r81 to i64
+store i64 %r84, i64* %r83
 ret void
 }
-define void @mcl_fp_add9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_add8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -13896,196 +6638,170 @@ define void @mcl_fp_add9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r
 %r52 = zext i64 %r51 to i512
 %r53 = shl i512 %r52, 448
 %r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r2, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = load i64, i64* %r3
-%r63 = zext i64 %r62 to i128
-%r65 = getelementptr i64, i64* %r3, i32 1
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
 %r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i128
-%r68 = shl i128 %r67, 64
-%r69 = or i128 %r63, %r68
-%r70 = zext i128 %r69 to i192
-%r72 = getelementptr i64, i64* %r3, i32 2
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
 %r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i192
-%r75 = shl i192 %r74, 128
-%r76 = or i192 %r70, %r75
-%r77 = zext i192 %r76 to i256
-%r79 = getelementptr i64, i64* %r3, i32 3
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
 %r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i256
-%r82 = shl i256 %r81, 192
-%r83 = or i256 %r77, %r82
-%r84 = zext i256 %r83 to i320
-%r86 = getelementptr i64, i64* %r3, i32 4
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
 %r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i320
-%r89 = shl i320 %r88, 256
-%r90 = or i320 %r84, %r89
-%r91 = zext i320 %r90 to i384
-%r93 = getelementptr i64, i64* %r3, i32 5
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
 %r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i384
-%r96 = shl i384 %r95, 320
-%r97 = or i384 %r91, %r96
-%r98 = zext i384 %r97 to i448
-%r100 = getelementptr i64, i64* %r3, i32 6
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
 %r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i448
-%r103 = shl i448 %r102, 384
-%r104 = or i448 %r98, %r103
-%r105 = zext i448 %r104 to i512
-%r107 = getelementptr i64, i64* %r3, i32 7
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i512
-%r110 = shl i512 %r109, 448
-%r111 = or i512 %r105, %r110
-%r112 = zext i512 %r111 to i576
-%r114 = getelementptr i64, i64* %r3, i32 8
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i576
-%r117 = shl i576 %r116, 512
-%r118 = or i576 %r112, %r117
-%r119 = zext i576 %r61 to i640
-%r120 = zext i576 %r118 to i640
-%r121 = add i640 %r119, %r120
-%r122 = trunc i640 %r121 to i576
-%r123 = trunc i576 %r122 to i64
-%r125 = getelementptr i64, i64* %r1, i32 0
-store i64 %r123, i64* %r125
-%r126 = lshr i576 %r122, 64
-%r127 = trunc i576 %r126 to i64
-%r129 = getelementptr i64, i64* %r1, i32 1
-store i64 %r127, i64* %r129
-%r130 = lshr i576 %r126, 64
-%r131 = trunc i576 %r130 to i64
-%r133 = getelementptr i64, i64* %r1, i32 2
-store i64 %r131, i64* %r133
-%r134 = lshr i576 %r130, 64
-%r135 = trunc i576 %r134 to i64
-%r137 = getelementptr i64, i64* %r1, i32 3
-store i64 %r135, i64* %r137
-%r138 = lshr i576 %r134, 64
-%r139 = trunc i576 %r138 to i64
-%r141 = getelementptr i64, i64* %r1, i32 4
-store i64 %r139, i64* %r141
-%r142 = lshr i576 %r138, 64
-%r143 = trunc i576 %r142 to i64
-%r145 = getelementptr i64, i64* %r1, i32 5
-store i64 %r143, i64* %r145
-%r146 = lshr i576 %r142, 64
-%r147 = trunc i576 %r146 to i64
-%r149 = getelementptr i64, i64* %r1, i32 6
-store i64 %r147, i64* %r149
-%r150 = lshr i576 %r146, 64
-%r151 = trunc i576 %r150 to i64
-%r153 = getelementptr i64, i64* %r1, i32 7
-store i64 %r151, i64* %r153
-%r154 = lshr i576 %r150, 64
-%r155 = trunc i576 %r154 to i64
-%r157 = getelementptr i64, i64* %r1, i32 8
-store i64 %r155, i64* %r157
-%r158 = load i64, i64* %r4
-%r159 = zext i64 %r158 to i128
-%r161 = getelementptr i64, i64* %r4, i32 1
-%r162 = load i64, i64* %r161
-%r163 = zext i64 %r162 to i128
-%r164 = shl i128 %r163, 64
-%r165 = or i128 %r159, %r164
-%r166 = zext i128 %r165 to i192
-%r168 = getelementptr i64, i64* %r4, i32 2
-%r169 = load i64, i64* %r168
-%r170 = zext i64 %r169 to i192
-%r171 = shl i192 %r170, 128
-%r172 = or i192 %r166, %r171
-%r173 = zext i192 %r172 to i256
-%r175 = getelementptr i64, i64* %r4, i32 3
-%r176 = load i64, i64* %r175
-%r177 = zext i64 %r176 to i256
-%r178 = shl i256 %r177, 192
-%r179 = or i256 %r173, %r178
-%r180 = zext i256 %r179 to i320
-%r182 = getelementptr i64, i64* %r4, i32 4
-%r183 = load i64, i64* %r182
-%r184 = zext i64 %r183 to i320
-%r185 = shl i320 %r184, 256
-%r186 = or i320 %r180, %r185
-%r187 = zext i320 %r186 to i384
-%r189 = getelementptr i64, i64* %r4, i32 5
-%r190 = load i64, i64* %r189
-%r191 = zext i64 %r190 to i384
-%r192 = shl i384 %r191, 320
-%r193 = or i384 %r187, %r192
-%r194 = zext i384 %r193 to i448
-%r196 = getelementptr i64, i64* %r4, i32 6
-%r197 = load i64, i64* %r196
-%r198 = zext i64 %r197 to i448
-%r199 = shl i448 %r198, 384
-%r200 = or i448 %r194, %r199
-%r201 = zext i448 %r200 to i512
-%r203 = getelementptr i64, i64* %r4, i32 7
-%r204 = load i64, i64* %r203
-%r205 = zext i64 %r204 to i512
-%r206 = shl i512 %r205, 448
-%r207 = or i512 %r201, %r206
-%r208 = zext i512 %r207 to i576
-%r210 = getelementptr i64, i64* %r4, i32 8
-%r211 = load i64, i64* %r210
-%r212 = zext i64 %r211 to i576
-%r213 = shl i576 %r212, 512
-%r214 = or i576 %r208, %r213
-%r215 = zext i576 %r214 to i640
-%r216 = sub i640 %r121, %r215
-%r217 = lshr i640 %r216, 576
-%r218 = trunc i640 %r217 to i1
-br i1%r218, label %carry, label %nocarry
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = zext i512 %r54 to i576
+%r106 = zext i512 %r104 to i576
+%r107 = add i576 %r105, %r106
+%r108 = trunc i576 %r107 to i512
+%r110 = getelementptr i64, i64* %r1, i32 0
+%r111 = trunc i512 %r108 to i64
+store i64 %r111, i64* %r110
+%r112 = lshr i512 %r108, 64
+%r114 = getelementptr i64, i64* %r1, i32 1
+%r115 = trunc i512 %r112 to i64
+store i64 %r115, i64* %r114
+%r116 = lshr i512 %r112, 64
+%r118 = getelementptr i64, i64* %r1, i32 2
+%r119 = trunc i512 %r116 to i64
+store i64 %r119, i64* %r118
+%r120 = lshr i512 %r116, 64
+%r122 = getelementptr i64, i64* %r1, i32 3
+%r123 = trunc i512 %r120 to i64
+store i64 %r123, i64* %r122
+%r124 = lshr i512 %r120, 64
+%r126 = getelementptr i64, i64* %r1, i32 4
+%r127 = trunc i512 %r124 to i64
+store i64 %r127, i64* %r126
+%r128 = lshr i512 %r124, 64
+%r130 = getelementptr i64, i64* %r1, i32 5
+%r131 = trunc i512 %r128 to i64
+store i64 %r131, i64* %r130
+%r132 = lshr i512 %r128, 64
+%r134 = getelementptr i64, i64* %r1, i32 6
+%r135 = trunc i512 %r132 to i64
+store i64 %r135, i64* %r134
+%r136 = lshr i512 %r132, 64
+%r138 = getelementptr i64, i64* %r1, i32 7
+%r139 = trunc i512 %r136 to i64
+store i64 %r139, i64* %r138
+%r140 = load i64, i64* %r4
+%r141 = zext i64 %r140 to i128
+%r143 = getelementptr i64, i64* %r4, i32 1
+%r144 = load i64, i64* %r143
+%r145 = zext i64 %r144 to i128
+%r146 = shl i128 %r145, 64
+%r147 = or i128 %r141, %r146
+%r148 = zext i128 %r147 to i192
+%r150 = getelementptr i64, i64* %r4, i32 2
+%r151 = load i64, i64* %r150
+%r152 = zext i64 %r151 to i192
+%r153 = shl i192 %r152, 128
+%r154 = or i192 %r148, %r153
+%r155 = zext i192 %r154 to i256
+%r157 = getelementptr i64, i64* %r4, i32 3
+%r158 = load i64, i64* %r157
+%r159 = zext i64 %r158 to i256
+%r160 = shl i256 %r159, 192
+%r161 = or i256 %r155, %r160
+%r162 = zext i256 %r161 to i320
+%r164 = getelementptr i64, i64* %r4, i32 4
+%r165 = load i64, i64* %r164
+%r166 = zext i64 %r165 to i320
+%r167 = shl i320 %r166, 256
+%r168 = or i320 %r162, %r167
+%r169 = zext i320 %r168 to i384
+%r171 = getelementptr i64, i64* %r4, i32 5
+%r172 = load i64, i64* %r171
+%r173 = zext i64 %r172 to i384
+%r174 = shl i384 %r173, 320
+%r175 = or i384 %r169, %r174
+%r176 = zext i384 %r175 to i448
+%r178 = getelementptr i64, i64* %r4, i32 6
+%r179 = load i64, i64* %r178
+%r180 = zext i64 %r179 to i448
+%r181 = shl i448 %r180, 384
+%r182 = or i448 %r176, %r181
+%r183 = zext i448 %r182 to i512
+%r185 = getelementptr i64, i64* %r4, i32 7
+%r186 = load i64, i64* %r185
+%r187 = zext i64 %r186 to i512
+%r188 = shl i512 %r187, 448
+%r189 = or i512 %r183, %r188
+%r190 = zext i512 %r189 to i576
+%r191 = sub i576 %r107, %r190
+%r192 = lshr i576 %r191, 512
+%r193 = trunc i576 %r192 to i1
+br i1%r193, label %carry, label %nocarry
 nocarry:
-%r219 = trunc i640 %r216 to i576
-%r220 = trunc i576 %r219 to i64
-%r222 = getelementptr i64, i64* %r1, i32 0
-store i64 %r220, i64* %r222
-%r223 = lshr i576 %r219, 64
-%r224 = trunc i576 %r223 to i64
-%r226 = getelementptr i64, i64* %r1, i32 1
-store i64 %r224, i64* %r226
-%r227 = lshr i576 %r223, 64
-%r228 = trunc i576 %r227 to i64
-%r230 = getelementptr i64, i64* %r1, i32 2
-store i64 %r228, i64* %r230
-%r231 = lshr i576 %r227, 64
-%r232 = trunc i576 %r231 to i64
-%r234 = getelementptr i64, i64* %r1, i32 3
-store i64 %r232, i64* %r234
-%r235 = lshr i576 %r231, 64
-%r236 = trunc i576 %r235 to i64
-%r238 = getelementptr i64, i64* %r1, i32 4
-store i64 %r236, i64* %r238
-%r239 = lshr i576 %r235, 64
-%r240 = trunc i576 %r239 to i64
-%r242 = getelementptr i64, i64* %r1, i32 5
-store i64 %r240, i64* %r242
-%r243 = lshr i576 %r239, 64
-%r244 = trunc i576 %r243 to i64
-%r246 = getelementptr i64, i64* %r1, i32 6
-store i64 %r244, i64* %r246
-%r247 = lshr i576 %r243, 64
-%r248 = trunc i576 %r247 to i64
-%r250 = getelementptr i64, i64* %r1, i32 7
-store i64 %r248, i64* %r250
-%r251 = lshr i576 %r247, 64
-%r252 = trunc i576 %r251 to i64
-%r254 = getelementptr i64, i64* %r1, i32 8
-store i64 %r252, i64* %r254
+%r194 = trunc i576 %r191 to i512
+%r196 = getelementptr i64, i64* %r1, i32 0
+%r197 = trunc i512 %r194 to i64
+store i64 %r197, i64* %r196
+%r198 = lshr i512 %r194, 64
+%r200 = getelementptr i64, i64* %r1, i32 1
+%r201 = trunc i512 %r198 to i64
+store i64 %r201, i64* %r200
+%r202 = lshr i512 %r198, 64
+%r204 = getelementptr i64, i64* %r1, i32 2
+%r205 = trunc i512 %r202 to i64
+store i64 %r205, i64* %r204
+%r206 = lshr i512 %r202, 64
+%r208 = getelementptr i64, i64* %r1, i32 3
+%r209 = trunc i512 %r206 to i64
+store i64 %r209, i64* %r208
+%r210 = lshr i512 %r206, 64
+%r212 = getelementptr i64, i64* %r1, i32 4
+%r213 = trunc i512 %r210 to i64
+store i64 %r213, i64* %r212
+%r214 = lshr i512 %r210, 64
+%r216 = getelementptr i64, i64* %r1, i32 5
+%r217 = trunc i512 %r214 to i64
+store i64 %r217, i64* %r216
+%r218 = lshr i512 %r214, 64
+%r220 = getelementptr i64, i64* %r1, i32 6
+%r221 = trunc i512 %r218 to i64
+store i64 %r221, i64* %r220
+%r222 = lshr i512 %r218, 64
+%r224 = getelementptr i64, i64* %r1, i32 7
+%r225 = trunc i512 %r222 to i64
+store i64 %r225, i64* %r224
 ret void
 carry:
 ret void
 }
-define void @mcl_fp_addNF9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_addNF8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -14130,153 +6846,131 @@ define void @mcl_fp_addNF9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r52 = zext i64 %r51 to i512
 %r53 = shl i512 %r52, 448
 %r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r2, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = load i64, i64* %r3
-%r63 = zext i64 %r62 to i128
-%r65 = getelementptr i64, i64* %r3, i32 1
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
 %r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i128
-%r68 = shl i128 %r67, 64
-%r69 = or i128 %r63, %r68
-%r70 = zext i128 %r69 to i192
-%r72 = getelementptr i64, i64* %r3, i32 2
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
 %r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i192
-%r75 = shl i192 %r74, 128
-%r76 = or i192 %r70, %r75
-%r77 = zext i192 %r76 to i256
-%r79 = getelementptr i64, i64* %r3, i32 3
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
 %r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i256
-%r82 = shl i256 %r81, 192
-%r83 = or i256 %r77, %r82
-%r84 = zext i256 %r83 to i320
-%r86 = getelementptr i64, i64* %r3, i32 4
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
 %r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i320
-%r89 = shl i320 %r88, 256
-%r90 = or i320 %r84, %r89
-%r91 = zext i320 %r90 to i384
-%r93 = getelementptr i64, i64* %r3, i32 5
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
 %r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i384
-%r96 = shl i384 %r95, 320
-%r97 = or i384 %r91, %r96
-%r98 = zext i384 %r97 to i448
-%r100 = getelementptr i64, i64* %r3, i32 6
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
 %r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i448
-%r103 = shl i448 %r102, 384
-%r104 = or i448 %r98, %r103
-%r105 = zext i448 %r104 to i512
-%r107 = getelementptr i64, i64* %r3, i32 7
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i512
-%r110 = shl i512 %r109, 448
-%r111 = or i512 %r105, %r110
-%r112 = zext i512 %r111 to i576
-%r114 = getelementptr i64, i64* %r3, i32 8
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i576
-%r117 = shl i576 %r116, 512
-%r118 = or i576 %r112, %r117
-%r119 = add i576 %r61, %r118
-%r120 = load i64, i64* %r4
-%r121 = zext i64 %r120 to i128
-%r123 = getelementptr i64, i64* %r4, i32 1
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = add i512 %r54, %r104
+%r106 = load i64, i64* %r4
+%r107 = zext i64 %r106 to i128
+%r109 = getelementptr i64, i64* %r4, i32 1
+%r110 = load i64, i64* %r109
+%r111 = zext i64 %r110 to i128
+%r112 = shl i128 %r111, 64
+%r113 = or i128 %r107, %r112
+%r114 = zext i128 %r113 to i192
+%r116 = getelementptr i64, i64* %r4, i32 2
+%r117 = load i64, i64* %r116
+%r118 = zext i64 %r117 to i192
+%r119 = shl i192 %r118, 128
+%r120 = or i192 %r114, %r119
+%r121 = zext i192 %r120 to i256
+%r123 = getelementptr i64, i64* %r4, i32 3
 %r124 = load i64, i64* %r123
-%r125 = zext i64 %r124 to i128
-%r126 = shl i128 %r125, 64
-%r127 = or i128 %r121, %r126
-%r128 = zext i128 %r127 to i192
-%r130 = getelementptr i64, i64* %r4, i32 2
+%r125 = zext i64 %r124 to i256
+%r126 = shl i256 %r125, 192
+%r127 = or i256 %r121, %r126
+%r128 = zext i256 %r127 to i320
+%r130 = getelementptr i64, i64* %r4, i32 4
 %r131 = load i64, i64* %r130
-%r132 = zext i64 %r131 to i192
-%r133 = shl i192 %r132, 128
-%r134 = or i192 %r128, %r133
-%r135 = zext i192 %r134 to i256
-%r137 = getelementptr i64, i64* %r4, i32 3
+%r132 = zext i64 %r131 to i320
+%r133 = shl i320 %r132, 256
+%r134 = or i320 %r128, %r133
+%r135 = zext i320 %r134 to i384
+%r137 = getelementptr i64, i64* %r4, i32 5
 %r138 = load i64, i64* %r137
-%r139 = zext i64 %r138 to i256
-%r140 = shl i256 %r139, 192
-%r141 = or i256 %r135, %r140
-%r142 = zext i256 %r141 to i320
-%r144 = getelementptr i64, i64* %r4, i32 4
+%r139 = zext i64 %r138 to i384
+%r140 = shl i384 %r139, 320
+%r141 = or i384 %r135, %r140
+%r142 = zext i384 %r141 to i448
+%r144 = getelementptr i64, i64* %r4, i32 6
 %r145 = load i64, i64* %r144
-%r146 = zext i64 %r145 to i320
-%r147 = shl i320 %r146, 256
-%r148 = or i320 %r142, %r147
-%r149 = zext i320 %r148 to i384
-%r151 = getelementptr i64, i64* %r4, i32 5
+%r146 = zext i64 %r145 to i448
+%r147 = shl i448 %r146, 384
+%r148 = or i448 %r142, %r147
+%r149 = zext i448 %r148 to i512
+%r151 = getelementptr i64, i64* %r4, i32 7
 %r152 = load i64, i64* %r151
-%r153 = zext i64 %r152 to i384
-%r154 = shl i384 %r153, 320
-%r155 = or i384 %r149, %r154
-%r156 = zext i384 %r155 to i448
-%r158 = getelementptr i64, i64* %r4, i32 6
-%r159 = load i64, i64* %r158
-%r160 = zext i64 %r159 to i448
-%r161 = shl i448 %r160, 384
-%r162 = or i448 %r156, %r161
-%r163 = zext i448 %r162 to i512
-%r165 = getelementptr i64, i64* %r4, i32 7
-%r166 = load i64, i64* %r165
-%r167 = zext i64 %r166 to i512
-%r168 = shl i512 %r167, 448
-%r169 = or i512 %r163, %r168
-%r170 = zext i512 %r169 to i576
-%r172 = getelementptr i64, i64* %r4, i32 8
-%r173 = load i64, i64* %r172
-%r174 = zext i64 %r173 to i576
-%r175 = shl i576 %r174, 512
-%r176 = or i576 %r170, %r175
-%r177 = sub i576 %r119, %r176
-%r178 = lshr i576 %r177, 575
-%r179 = trunc i576 %r178 to i1
-%r180 = select i1 %r179, i576 %r119, i576 %r177
-%r181 = trunc i576 %r180 to i64
-%r183 = getelementptr i64, i64* %r1, i32 0
-store i64 %r181, i64* %r183
-%r184 = lshr i576 %r180, 64
-%r185 = trunc i576 %r184 to i64
-%r187 = getelementptr i64, i64* %r1, i32 1
-store i64 %r185, i64* %r187
-%r188 = lshr i576 %r184, 64
-%r189 = trunc i576 %r188 to i64
-%r191 = getelementptr i64, i64* %r1, i32 2
-store i64 %r189, i64* %r191
-%r192 = lshr i576 %r188, 64
-%r193 = trunc i576 %r192 to i64
-%r195 = getelementptr i64, i64* %r1, i32 3
-store i64 %r193, i64* %r195
-%r196 = lshr i576 %r192, 64
-%r197 = trunc i576 %r196 to i64
-%r199 = getelementptr i64, i64* %r1, i32 4
-store i64 %r197, i64* %r199
-%r200 = lshr i576 %r196, 64
-%r201 = trunc i576 %r200 to i64
-%r203 = getelementptr i64, i64* %r1, i32 5
-store i64 %r201, i64* %r203
-%r204 = lshr i576 %r200, 64
-%r205 = trunc i576 %r204 to i64
-%r207 = getelementptr i64, i64* %r1, i32 6
-store i64 %r205, i64* %r207
-%r208 = lshr i576 %r204, 64
-%r209 = trunc i576 %r208 to i64
-%r211 = getelementptr i64, i64* %r1, i32 7
-store i64 %r209, i64* %r211
-%r212 = lshr i576 %r208, 64
-%r213 = trunc i576 %r212 to i64
-%r215 = getelementptr i64, i64* %r1, i32 8
-store i64 %r213, i64* %r215
+%r153 = zext i64 %r152 to i512
+%r154 = shl i512 %r153, 448
+%r155 = or i512 %r149, %r154
+%r156 = sub i512 %r105, %r155
+%r157 = lshr i512 %r156, 511
+%r158 = trunc i512 %r157 to i1
+%r159 = select i1 %r158, i512 %r105, i512 %r156
+%r161 = getelementptr i64, i64* %r1, i32 0
+%r162 = trunc i512 %r159 to i64
+store i64 %r162, i64* %r161
+%r163 = lshr i512 %r159, 64
+%r165 = getelementptr i64, i64* %r1, i32 1
+%r166 = trunc i512 %r163 to i64
+store i64 %r166, i64* %r165
+%r167 = lshr i512 %r163, 64
+%r169 = getelementptr i64, i64* %r1, i32 2
+%r170 = trunc i512 %r167 to i64
+store i64 %r170, i64* %r169
+%r171 = lshr i512 %r167, 64
+%r173 = getelementptr i64, i64* %r1, i32 3
+%r174 = trunc i512 %r171 to i64
+store i64 %r174, i64* %r173
+%r175 = lshr i512 %r171, 64
+%r177 = getelementptr i64, i64* %r1, i32 4
+%r178 = trunc i512 %r175 to i64
+store i64 %r178, i64* %r177
+%r179 = lshr i512 %r175, 64
+%r181 = getelementptr i64, i64* %r1, i32 5
+%r182 = trunc i512 %r179 to i64
+store i64 %r182, i64* %r181
+%r183 = lshr i512 %r179, 64
+%r185 = getelementptr i64, i64* %r1, i32 6
+%r186 = trunc i512 %r183 to i64
+store i64 %r186, i64* %r185
+%r187 = lshr i512 %r183, 64
+%r189 = getelementptr i64, i64* %r1, i32 7
+%r190 = trunc i512 %r187 to i64
+store i64 %r190, i64* %r189
 ret void
 }
-define void @mcl_fp_sub9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_sub8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -14321,194 +7015,168 @@ define void @mcl_fp_sub9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r
 %r52 = zext i64 %r51 to i512
 %r53 = shl i512 %r52, 448
 %r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r2, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = load i64, i64* %r3
-%r63 = zext i64 %r62 to i128
-%r65 = getelementptr i64, i64* %r3, i32 1
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
 %r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i128
-%r68 = shl i128 %r67, 64
-%r69 = or i128 %r63, %r68
-%r70 = zext i128 %r69 to i192
-%r72 = getelementptr i64, i64* %r3, i32 2
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
 %r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i192
-%r75 = shl i192 %r74, 128
-%r76 = or i192 %r70, %r75
-%r77 = zext i192 %r76 to i256
-%r79 = getelementptr i64, i64* %r3, i32 3
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
 %r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i256
-%r82 = shl i256 %r81, 192
-%r83 = or i256 %r77, %r82
-%r84 = zext i256 %r83 to i320
-%r86 = getelementptr i64, i64* %r3, i32 4
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
 %r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i320
-%r89 = shl i320 %r88, 256
-%r90 = or i320 %r84, %r89
-%r91 = zext i320 %r90 to i384
-%r93 = getelementptr i64, i64* %r3, i32 5
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
 %r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i384
-%r96 = shl i384 %r95, 320
-%r97 = or i384 %r91, %r96
-%r98 = zext i384 %r97 to i448
-%r100 = getelementptr i64, i64* %r3, i32 6
-%r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i448
-%r103 = shl i448 %r102, 384
-%r104 = or i448 %r98, %r103
-%r105 = zext i448 %r104 to i512
-%r107 = getelementptr i64, i64* %r3, i32 7
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i512
-%r110 = shl i512 %r109, 448
-%r111 = or i512 %r105, %r110
-%r112 = zext i512 %r111 to i576
-%r114 = getelementptr i64, i64* %r3, i32 8
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i576
-%r117 = shl i576 %r116, 512
-%r118 = or i576 %r112, %r117
-%r119 = zext i576 %r61 to i640
-%r120 = zext i576 %r118 to i640
-%r121 = sub i640 %r119, %r120
-%r122 = trunc i640 %r121 to i576
-%r123 = lshr i640 %r121, 576
-%r124 = trunc i640 %r123 to i1
-%r125 = trunc i576 %r122 to i64
-%r127 = getelementptr i64, i64* %r1, i32 0
-store i64 %r125, i64* %r127
-%r128 = lshr i576 %r122, 64
-%r129 = trunc i576 %r128 to i64
-%r131 = getelementptr i64, i64* %r1, i32 1
-store i64 %r129, i64* %r131
-%r132 = lshr i576 %r128, 64
-%r133 = trunc i576 %r132 to i64
-%r135 = getelementptr i64, i64* %r1, i32 2
-store i64 %r133, i64* %r135
-%r136 = lshr i576 %r132, 64
-%r137 = trunc i576 %r136 to i64
-%r139 = getelementptr i64, i64* %r1, i32 3
-store i64 %r137, i64* %r139
-%r140 = lshr i576 %r136, 64
-%r141 = trunc i576 %r140 to i64
-%r143 = getelementptr i64, i64* %r1, i32 4
-store i64 %r141, i64* %r143
-%r144 = lshr i576 %r140, 64
-%r145 = trunc i576 %r144 to i64
-%r147 = getelementptr i64, i64* %r1, i32 5
-store i64 %r145, i64* %r147
-%r148 = lshr i576 %r144, 64
-%r149 = trunc i576 %r148 to i64
-%r151 = getelementptr i64, i64* %r1, i32 6
-store i64 %r149, i64* %r151
-%r152 = lshr i576 %r148, 64
-%r153 = trunc i576 %r152 to i64
-%r155 = getelementptr i64, i64* %r1, i32 7
-store i64 %r153, i64* %r155
-%r156 = lshr i576 %r152, 64
-%r157 = trunc i576 %r156 to i64
-%r159 = getelementptr i64, i64* %r1, i32 8
-store i64 %r157, i64* %r159
-br i1%r124, label %carry, label %nocarry
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
+%r101 = load i64, i64* %r100
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = zext i512 %r54 to i576
+%r106 = zext i512 %r104 to i576
+%r107 = sub i576 %r105, %r106
+%r108 = trunc i576 %r107 to i512
+%r109 = lshr i576 %r107, 512
+%r110 = trunc i576 %r109 to i1
+%r112 = getelementptr i64, i64* %r1, i32 0
+%r113 = trunc i512 %r108 to i64
+store i64 %r113, i64* %r112
+%r114 = lshr i512 %r108, 64
+%r116 = getelementptr i64, i64* %r1, i32 1
+%r117 = trunc i512 %r114 to i64
+store i64 %r117, i64* %r116
+%r118 = lshr i512 %r114, 64
+%r120 = getelementptr i64, i64* %r1, i32 2
+%r121 = trunc i512 %r118 to i64
+store i64 %r121, i64* %r120
+%r122 = lshr i512 %r118, 64
+%r124 = getelementptr i64, i64* %r1, i32 3
+%r125 = trunc i512 %r122 to i64
+store i64 %r125, i64* %r124
+%r126 = lshr i512 %r122, 64
+%r128 = getelementptr i64, i64* %r1, i32 4
+%r129 = trunc i512 %r126 to i64
+store i64 %r129, i64* %r128
+%r130 = lshr i512 %r126, 64
+%r132 = getelementptr i64, i64* %r1, i32 5
+%r133 = trunc i512 %r130 to i64
+store i64 %r133, i64* %r132
+%r134 = lshr i512 %r130, 64
+%r136 = getelementptr i64, i64* %r1, i32 6
+%r137 = trunc i512 %r134 to i64
+store i64 %r137, i64* %r136
+%r138 = lshr i512 %r134, 64
+%r140 = getelementptr i64, i64* %r1, i32 7
+%r141 = trunc i512 %r138 to i64
+store i64 %r141, i64* %r140
+br i1%r110, label %carry, label %nocarry
 nocarry:
 ret void
 carry:
-%r160 = load i64, i64* %r4
-%r161 = zext i64 %r160 to i128
-%r163 = getelementptr i64, i64* %r4, i32 1
-%r164 = load i64, i64* %r163
-%r165 = zext i64 %r164 to i128
-%r166 = shl i128 %r165, 64
-%r167 = or i128 %r161, %r166
-%r168 = zext i128 %r167 to i192
-%r170 = getelementptr i64, i64* %r4, i32 2
-%r171 = load i64, i64* %r170
-%r172 = zext i64 %r171 to i192
-%r173 = shl i192 %r172, 128
-%r174 = or i192 %r168, %r173
-%r175 = zext i192 %r174 to i256
-%r177 = getelementptr i64, i64* %r4, i32 3
-%r178 = load i64, i64* %r177
-%r179 = zext i64 %r178 to i256
-%r180 = shl i256 %r179, 192
-%r181 = or i256 %r175, %r180
-%r182 = zext i256 %r181 to i320
-%r184 = getelementptr i64, i64* %r4, i32 4
-%r185 = load i64, i64* %r184
-%r186 = zext i64 %r185 to i320
-%r187 = shl i320 %r186, 256
-%r188 = or i320 %r182, %r187
-%r189 = zext i320 %r188 to i384
-%r191 = getelementptr i64, i64* %r4, i32 5
-%r192 = load i64, i64* %r191
-%r193 = zext i64 %r192 to i384
-%r194 = shl i384 %r193, 320
-%r195 = or i384 %r189, %r194
-%r196 = zext i384 %r195 to i448
-%r198 = getelementptr i64, i64* %r4, i32 6
-%r199 = load i64, i64* %r198
-%r200 = zext i64 %r199 to i448
-%r201 = shl i448 %r200, 384
-%r202 = or i448 %r196, %r201
-%r203 = zext i448 %r202 to i512
-%r205 = getelementptr i64, i64* %r4, i32 7
-%r206 = load i64, i64* %r205
-%r207 = zext i64 %r206 to i512
-%r208 = shl i512 %r207, 448
-%r209 = or i512 %r203, %r208
-%r210 = zext i512 %r209 to i576
-%r212 = getelementptr i64, i64* %r4, i32 8
-%r213 = load i64, i64* %r212
-%r214 = zext i64 %r213 to i576
-%r215 = shl i576 %r214, 512
-%r216 = or i576 %r210, %r215
-%r217 = add i576 %r122, %r216
-%r218 = trunc i576 %r217 to i64
-%r220 = getelementptr i64, i64* %r1, i32 0
-store i64 %r218, i64* %r220
-%r221 = lshr i576 %r217, 64
-%r222 = trunc i576 %r221 to i64
-%r224 = getelementptr i64, i64* %r1, i32 1
-store i64 %r222, i64* %r224
-%r225 = lshr i576 %r221, 64
-%r226 = trunc i576 %r225 to i64
-%r228 = getelementptr i64, i64* %r1, i32 2
-store i64 %r226, i64* %r228
-%r229 = lshr i576 %r225, 64
-%r230 = trunc i576 %r229 to i64
-%r232 = getelementptr i64, i64* %r1, i32 3
-store i64 %r230, i64* %r232
-%r233 = lshr i576 %r229, 64
-%r234 = trunc i576 %r233 to i64
-%r236 = getelementptr i64, i64* %r1, i32 4
-store i64 %r234, i64* %r236
-%r237 = lshr i576 %r233, 64
-%r238 = trunc i576 %r237 to i64
-%r240 = getelementptr i64, i64* %r1, i32 5
-store i64 %r238, i64* %r240
-%r241 = lshr i576 %r237, 64
-%r242 = trunc i576 %r241 to i64
-%r244 = getelementptr i64, i64* %r1, i32 6
-store i64 %r242, i64* %r244
-%r245 = lshr i576 %r241, 64
-%r246 = trunc i576 %r245 to i64
-%r248 = getelementptr i64, i64* %r1, i32 7
-store i64 %r246, i64* %r248
-%r249 = lshr i576 %r245, 64
-%r250 = trunc i576 %r249 to i64
-%r252 = getelementptr i64, i64* %r1, i32 8
-store i64 %r250, i64* %r252
+%r142 = load i64, i64* %r4
+%r143 = zext i64 %r142 to i128
+%r145 = getelementptr i64, i64* %r4, i32 1
+%r146 = load i64, i64* %r145
+%r147 = zext i64 %r146 to i128
+%r148 = shl i128 %r147, 64
+%r149 = or i128 %r143, %r148
+%r150 = zext i128 %r149 to i192
+%r152 = getelementptr i64, i64* %r4, i32 2
+%r153 = load i64, i64* %r152
+%r154 = zext i64 %r153 to i192
+%r155 = shl i192 %r154, 128
+%r156 = or i192 %r150, %r155
+%r157 = zext i192 %r156 to i256
+%r159 = getelementptr i64, i64* %r4, i32 3
+%r160 = load i64, i64* %r159
+%r161 = zext i64 %r160 to i256
+%r162 = shl i256 %r161, 192
+%r163 = or i256 %r157, %r162
+%r164 = zext i256 %r163 to i320
+%r166 = getelementptr i64, i64* %r4, i32 4
+%r167 = load i64, i64* %r166
+%r168 = zext i64 %r167 to i320
+%r169 = shl i320 %r168, 256
+%r170 = or i320 %r164, %r169
+%r171 = zext i320 %r170 to i384
+%r173 = getelementptr i64, i64* %r4, i32 5
+%r174 = load i64, i64* %r173
+%r175 = zext i64 %r174 to i384
+%r176 = shl i384 %r175, 320
+%r177 = or i384 %r171, %r176
+%r178 = zext i384 %r177 to i448
+%r180 = getelementptr i64, i64* %r4, i32 6
+%r181 = load i64, i64* %r180
+%r182 = zext i64 %r181 to i448
+%r183 = shl i448 %r182, 384
+%r184 = or i448 %r178, %r183
+%r185 = zext i448 %r184 to i512
+%r187 = getelementptr i64, i64* %r4, i32 7
+%r188 = load i64, i64* %r187
+%r189 = zext i64 %r188 to i512
+%r190 = shl i512 %r189, 448
+%r191 = or i512 %r185, %r190
+%r192 = add i512 %r108, %r191
+%r194 = getelementptr i64, i64* %r1, i32 0
+%r195 = trunc i512 %r192 to i64
+store i64 %r195, i64* %r194
+%r196 = lshr i512 %r192, 64
+%r198 = getelementptr i64, i64* %r1, i32 1
+%r199 = trunc i512 %r196 to i64
+store i64 %r199, i64* %r198
+%r200 = lshr i512 %r196, 64
+%r202 = getelementptr i64, i64* %r1, i32 2
+%r203 = trunc i512 %r200 to i64
+store i64 %r203, i64* %r202
+%r204 = lshr i512 %r200, 64
+%r206 = getelementptr i64, i64* %r1, i32 3
+%r207 = trunc i512 %r204 to i64
+store i64 %r207, i64* %r206
+%r208 = lshr i512 %r204, 64
+%r210 = getelementptr i64, i64* %r1, i32 4
+%r211 = trunc i512 %r208 to i64
+store i64 %r211, i64* %r210
+%r212 = lshr i512 %r208, 64
+%r214 = getelementptr i64, i64* %r1, i32 5
+%r215 = trunc i512 %r212 to i64
+store i64 %r215, i64* %r214
+%r216 = lshr i512 %r212, 64
+%r218 = getelementptr i64, i64* %r1, i32 6
+%r219 = trunc i512 %r216 to i64
+store i64 %r219, i64* %r218
+%r220 = lshr i512 %r216, 64
+%r222 = getelementptr i64, i64* %r1, i32 7
+%r223 = trunc i512 %r220 to i64
+store i64 %r223, i64* %r222
 ret void
 }
-define void @mcl_fp_subNF9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fp_subNF8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -14553,153 +7221,131 @@ define void @mcl_fp_subNF9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r52 = zext i64 %r51 to i512
 %r53 = shl i512 %r52, 448
 %r54 = or i512 %r48, %r53
-%r55 = zext i512 %r54 to i576
-%r57 = getelementptr i64, i64* %r2, i32 8
-%r58 = load i64, i64* %r57
-%r59 = zext i64 %r58 to i576
-%r60 = shl i576 %r59, 512
-%r61 = or i576 %r55, %r60
-%r62 = load i64, i64* %r3
-%r63 = zext i64 %r62 to i128
-%r65 = getelementptr i64, i64* %r3, i32 1
+%r55 = load i64, i64* %r3
+%r56 = zext i64 %r55 to i128
+%r58 = getelementptr i64, i64* %r3, i32 1
+%r59 = load i64, i64* %r58
+%r60 = zext i64 %r59 to i128
+%r61 = shl i128 %r60, 64
+%r62 = or i128 %r56, %r61
+%r63 = zext i128 %r62 to i192
+%r65 = getelementptr i64, i64* %r3, i32 2
 %r66 = load i64, i64* %r65
-%r67 = zext i64 %r66 to i128
-%r68 = shl i128 %r67, 64
-%r69 = or i128 %r63, %r68
-%r70 = zext i128 %r69 to i192
-%r72 = getelementptr i64, i64* %r3, i32 2
+%r67 = zext i64 %r66 to i192
+%r68 = shl i192 %r67, 128
+%r69 = or i192 %r63, %r68
+%r70 = zext i192 %r69 to i256
+%r72 = getelementptr i64, i64* %r3, i32 3
 %r73 = load i64, i64* %r72
-%r74 = zext i64 %r73 to i192
-%r75 = shl i192 %r74, 128
-%r76 = or i192 %r70, %r75
-%r77 = zext i192 %r76 to i256
-%r79 = getelementptr i64, i64* %r3, i32 3
+%r74 = zext i64 %r73 to i256
+%r75 = shl i256 %r74, 192
+%r76 = or i256 %r70, %r75
+%r77 = zext i256 %r76 to i320
+%r79 = getelementptr i64, i64* %r3, i32 4
 %r80 = load i64, i64* %r79
-%r81 = zext i64 %r80 to i256
-%r82 = shl i256 %r81, 192
-%r83 = or i256 %r77, %r82
-%r84 = zext i256 %r83 to i320
-%r86 = getelementptr i64, i64* %r3, i32 4
+%r81 = zext i64 %r80 to i320
+%r82 = shl i320 %r81, 256
+%r83 = or i320 %r77, %r82
+%r84 = zext i320 %r83 to i384
+%r86 = getelementptr i64, i64* %r3, i32 5
 %r87 = load i64, i64* %r86
-%r88 = zext i64 %r87 to i320
-%r89 = shl i320 %r88, 256
-%r90 = or i320 %r84, %r89
-%r91 = zext i320 %r90 to i384
-%r93 = getelementptr i64, i64* %r3, i32 5
+%r88 = zext i64 %r87 to i384
+%r89 = shl i384 %r88, 320
+%r90 = or i384 %r84, %r89
+%r91 = zext i384 %r90 to i448
+%r93 = getelementptr i64, i64* %r3, i32 6
 %r94 = load i64, i64* %r93
-%r95 = zext i64 %r94 to i384
-%r96 = shl i384 %r95, 320
-%r97 = or i384 %r91, %r96
-%r98 = zext i384 %r97 to i448
-%r100 = getelementptr i64, i64* %r3, i32 6
+%r95 = zext i64 %r94 to i448
+%r96 = shl i448 %r95, 384
+%r97 = or i448 %r91, %r96
+%r98 = zext i448 %r97 to i512
+%r100 = getelementptr i64, i64* %r3, i32 7
 %r101 = load i64, i64* %r100
-%r102 = zext i64 %r101 to i448
-%r103 = shl i448 %r102, 384
-%r104 = or i448 %r98, %r103
-%r105 = zext i448 %r104 to i512
-%r107 = getelementptr i64, i64* %r3, i32 7
-%r108 = load i64, i64* %r107
-%r109 = zext i64 %r108 to i512
-%r110 = shl i512 %r109, 448
-%r111 = or i512 %r105, %r110
-%r112 = zext i512 %r111 to i576
-%r114 = getelementptr i64, i64* %r3, i32 8
-%r115 = load i64, i64* %r114
-%r116 = zext i64 %r115 to i576
-%r117 = shl i576 %r116, 512
-%r118 = or i576 %r112, %r117
-%r119 = sub i576 %r61, %r118
-%r120 = lshr i576 %r119, 575
-%r121 = trunc i576 %r120 to i1
-%r122 = load i64, i64* %r4
-%r123 = zext i64 %r122 to i128
-%r125 = getelementptr i64, i64* %r4, i32 1
+%r102 = zext i64 %r101 to i512
+%r103 = shl i512 %r102, 448
+%r104 = or i512 %r98, %r103
+%r105 = sub i512 %r54, %r104
+%r106 = lshr i512 %r105, 511
+%r107 = trunc i512 %r106 to i1
+%r108 = load i64, i64* %r4
+%r109 = zext i64 %r108 to i128
+%r111 = getelementptr i64, i64* %r4, i32 1
+%r112 = load i64, i64* %r111
+%r113 = zext i64 %r112 to i128
+%r114 = shl i128 %r113, 64
+%r115 = or i128 %r109, %r114
+%r116 = zext i128 %r115 to i192
+%r118 = getelementptr i64, i64* %r4, i32 2
+%r119 = load i64, i64* %r118
+%r120 = zext i64 %r119 to i192
+%r121 = shl i192 %r120, 128
+%r122 = or i192 %r116, %r121
+%r123 = zext i192 %r122 to i256
+%r125 = getelementptr i64, i64* %r4, i32 3
 %r126 = load i64, i64* %r125
-%r127 = zext i64 %r126 to i128
-%r128 = shl i128 %r127, 64
-%r129 = or i128 %r123, %r128
-%r130 = zext i128 %r129 to i192
-%r132 = getelementptr i64, i64* %r4, i32 2
+%r127 = zext i64 %r126 to i256
+%r128 = shl i256 %r127, 192
+%r129 = or i256 %r123, %r128
+%r130 = zext i256 %r129 to i320
+%r132 = getelementptr i64, i64* %r4, i32 4
 %r133 = load i64, i64* %r132
-%r134 = zext i64 %r133 to i192
-%r135 = shl i192 %r134, 128
-%r136 = or i192 %r130, %r135
-%r137 = zext i192 %r136 to i256
-%r139 = getelementptr i64, i64* %r4, i32 3
+%r134 = zext i64 %r133 to i320
+%r135 = shl i320 %r134, 256
+%r136 = or i320 %r130, %r135
+%r137 = zext i320 %r136 to i384
+%r139 = getelementptr i64, i64* %r4, i32 5
 %r140 = load i64, i64* %r139
-%r141 = zext i64 %r140 to i256
-%r142 = shl i256 %r141, 192
-%r143 = or i256 %r137, %r142
-%r144 = zext i256 %r143 to i320
-%r146 = getelementptr i64, i64* %r4, i32 4
+%r141 = zext i64 %r140 to i384
+%r142 = shl i384 %r141, 320
+%r143 = or i384 %r137, %r142
+%r144 = zext i384 %r143 to i448
+%r146 = getelementptr i64, i64* %r4, i32 6
 %r147 = load i64, i64* %r146
-%r148 = zext i64 %r147 to i320
-%r149 = shl i320 %r148, 256
-%r150 = or i320 %r144, %r149
-%r151 = zext i320 %r150 to i384
-%r153 = getelementptr i64, i64* %r4, i32 5
+%r148 = zext i64 %r147 to i448
+%r149 = shl i448 %r148, 384
+%r150 = or i448 %r144, %r149
+%r151 = zext i448 %r150 to i512
+%r153 = getelementptr i64, i64* %r4, i32 7
 %r154 = load i64, i64* %r153
-%r155 = zext i64 %r154 to i384
-%r156 = shl i384 %r155, 320
-%r157 = or i384 %r151, %r156
-%r158 = zext i384 %r157 to i448
-%r160 = getelementptr i64, i64* %r4, i32 6
-%r161 = load i64, i64* %r160
-%r162 = zext i64 %r161 to i448
-%r163 = shl i448 %r162, 384
-%r164 = or i448 %r158, %r163
-%r165 = zext i448 %r164 to i512
-%r167 = getelementptr i64, i64* %r4, i32 7
-%r168 = load i64, i64* %r167
-%r169 = zext i64 %r168 to i512
-%r170 = shl i512 %r169, 448
-%r171 = or i512 %r165, %r170
-%r172 = zext i512 %r171 to i576
-%r174 = getelementptr i64, i64* %r4, i32 8
-%r175 = load i64, i64* %r174
-%r176 = zext i64 %r175 to i576
-%r177 = shl i576 %r176, 512
-%r178 = or i576 %r172, %r177
-%r180 = select i1 %r121, i576 %r178, i576 0
-%r181 = add i576 %r119, %r180
-%r182 = trunc i576 %r181 to i64
-%r184 = getelementptr i64, i64* %r1, i32 0
-store i64 %r182, i64* %r184
-%r185 = lshr i576 %r181, 64
-%r186 = trunc i576 %r185 to i64
-%r188 = getelementptr i64, i64* %r1, i32 1
-store i64 %r186, i64* %r188
-%r189 = lshr i576 %r185, 64
-%r190 = trunc i576 %r189 to i64
-%r192 = getelementptr i64, i64* %r1, i32 2
-store i64 %r190, i64* %r192
-%r193 = lshr i576 %r189, 64
-%r194 = trunc i576 %r193 to i64
-%r196 = getelementptr i64, i64* %r1, i32 3
-store i64 %r194, i64* %r196
-%r197 = lshr i576 %r193, 64
-%r198 = trunc i576 %r197 to i64
-%r200 = getelementptr i64, i64* %r1, i32 4
-store i64 %r198, i64* %r200
-%r201 = lshr i576 %r197, 64
-%r202 = trunc i576 %r201 to i64
-%r204 = getelementptr i64, i64* %r1, i32 5
-store i64 %r202, i64* %r204
-%r205 = lshr i576 %r201, 64
-%r206 = trunc i576 %r205 to i64
-%r208 = getelementptr i64, i64* %r1, i32 6
-store i64 %r206, i64* %r208
-%r209 = lshr i576 %r205, 64
-%r210 = trunc i576 %r209 to i64
-%r212 = getelementptr i64, i64* %r1, i32 7
-store i64 %r210, i64* %r212
-%r213 = lshr i576 %r209, 64
-%r214 = trunc i576 %r213 to i64
-%r216 = getelementptr i64, i64* %r1, i32 8
-store i64 %r214, i64* %r216
+%r155 = zext i64 %r154 to i512
+%r156 = shl i512 %r155, 448
+%r157 = or i512 %r151, %r156
+%r159 = select i1 %r107, i512 %r157, i512 0
+%r160 = add i512 %r105, %r159
+%r162 = getelementptr i64, i64* %r1, i32 0
+%r163 = trunc i512 %r160 to i64
+store i64 %r163, i64* %r162
+%r164 = lshr i512 %r160, 64
+%r166 = getelementptr i64, i64* %r1, i32 1
+%r167 = trunc i512 %r164 to i64
+store i64 %r167, i64* %r166
+%r168 = lshr i512 %r164, 64
+%r170 = getelementptr i64, i64* %r1, i32 2
+%r171 = trunc i512 %r168 to i64
+store i64 %r171, i64* %r170
+%r172 = lshr i512 %r168, 64
+%r174 = getelementptr i64, i64* %r1, i32 3
+%r175 = trunc i512 %r172 to i64
+store i64 %r175, i64* %r174
+%r176 = lshr i512 %r172, 64
+%r178 = getelementptr i64, i64* %r1, i32 4
+%r179 = trunc i512 %r176 to i64
+store i64 %r179, i64* %r178
+%r180 = lshr i512 %r176, 64
+%r182 = getelementptr i64, i64* %r1, i32 5
+%r183 = trunc i512 %r180 to i64
+store i64 %r183, i64* %r182
+%r184 = lshr i512 %r180, 64
+%r186 = getelementptr i64, i64* %r1, i32 6
+%r187 = trunc i512 %r184 to i64
+store i64 %r187, i64* %r186
+%r188 = lshr i512 %r184, 64
+%r190 = getelementptr i64, i64* %r1, i32 7
+%r191 = trunc i512 %r188 to i64
+store i64 %r191, i64* %r190
 ret void
 }
-define void @mcl_fpDbl_add9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fpDbl_add8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -14792,256 +7438,218 @@ define void @mcl_fpDbl_add9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r108 = zext i64 %r107 to i1024
 %r109 = shl i1024 %r108, 960
 %r110 = or i1024 %r104, %r109
-%r111 = zext i1024 %r110 to i1088
-%r113 = getelementptr i64, i64* %r2, i32 16
-%r114 = load i64, i64* %r113
-%r115 = zext i64 %r114 to i1088
-%r116 = shl i1088 %r115, 1024
-%r117 = or i1088 %r111, %r116
-%r118 = zext i1088 %r117 to i1152
-%r120 = getelementptr i64, i64* %r2, i32 17
-%r121 = load i64, i64* %r120
-%r122 = zext i64 %r121 to i1152
-%r123 = shl i1152 %r122, 1088
-%r124 = or i1152 %r118, %r123
-%r125 = load i64, i64* %r3
-%r126 = zext i64 %r125 to i128
-%r128 = getelementptr i64, i64* %r3, i32 1
+%r111 = load i64, i64* %r3
+%r112 = zext i64 %r111 to i128
+%r114 = getelementptr i64, i64* %r3, i32 1
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i128
+%r117 = shl i128 %r116, 64
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i192
+%r121 = getelementptr i64, i64* %r3, i32 2
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i192
+%r124 = shl i192 %r123, 128
+%r125 = or i192 %r119, %r124
+%r126 = zext i192 %r125 to i256
+%r128 = getelementptr i64, i64* %r3, i32 3
 %r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i128
-%r131 = shl i128 %r130, 64
-%r132 = or i128 %r126, %r131
-%r133 = zext i128 %r132 to i192
-%r135 = getelementptr i64, i64* %r3, i32 2
+%r130 = zext i64 %r129 to i256
+%r131 = shl i256 %r130, 192
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i320
+%r135 = getelementptr i64, i64* %r3, i32 4
 %r136 = load i64, i64* %r135
-%r137 = zext i64 %r136 to i192
-%r138 = shl i192 %r137, 128
-%r139 = or i192 %r133, %r138
-%r140 = zext i192 %r139 to i256
-%r142 = getelementptr i64, i64* %r3, i32 3
+%r137 = zext i64 %r136 to i320
+%r138 = shl i320 %r137, 256
+%r139 = or i320 %r133, %r138
+%r140 = zext i320 %r139 to i384
+%r142 = getelementptr i64, i64* %r3, i32 5
 %r143 = load i64, i64* %r142
-%r144 = zext i64 %r143 to i256
-%r145 = shl i256 %r144, 192
-%r146 = or i256 %r140, %r145
-%r147 = zext i256 %r146 to i320
-%r149 = getelementptr i64, i64* %r3, i32 4
+%r144 = zext i64 %r143 to i384
+%r145 = shl i384 %r144, 320
+%r146 = or i384 %r140, %r145
+%r147 = zext i384 %r146 to i448
+%r149 = getelementptr i64, i64* %r3, i32 6
 %r150 = load i64, i64* %r149
-%r151 = zext i64 %r150 to i320
-%r152 = shl i320 %r151, 256
-%r153 = or i320 %r147, %r152
-%r154 = zext i320 %r153 to i384
-%r156 = getelementptr i64, i64* %r3, i32 5
+%r151 = zext i64 %r150 to i448
+%r152 = shl i448 %r151, 384
+%r153 = or i448 %r147, %r152
+%r154 = zext i448 %r153 to i512
+%r156 = getelementptr i64, i64* %r3, i32 7
 %r157 = load i64, i64* %r156
-%r158 = zext i64 %r157 to i384
-%r159 = shl i384 %r158, 320
-%r160 = or i384 %r154, %r159
-%r161 = zext i384 %r160 to i448
-%r163 = getelementptr i64, i64* %r3, i32 6
+%r158 = zext i64 %r157 to i512
+%r159 = shl i512 %r158, 448
+%r160 = or i512 %r154, %r159
+%r161 = zext i512 %r160 to i576
+%r163 = getelementptr i64, i64* %r3, i32 8
 %r164 = load i64, i64* %r163
-%r165 = zext i64 %r164 to i448
-%r166 = shl i448 %r165, 384
-%r167 = or i448 %r161, %r166
-%r168 = zext i448 %r167 to i512
-%r170 = getelementptr i64, i64* %r3, i32 7
+%r165 = zext i64 %r164 to i576
+%r166 = shl i576 %r165, 512
+%r167 = or i576 %r161, %r166
+%r168 = zext i576 %r167 to i640
+%r170 = getelementptr i64, i64* %r3, i32 9
 %r171 = load i64, i64* %r170
-%r172 = zext i64 %r171 to i512
-%r173 = shl i512 %r172, 448
-%r174 = or i512 %r168, %r173
-%r175 = zext i512 %r174 to i576
-%r177 = getelementptr i64, i64* %r3, i32 8
+%r172 = zext i64 %r171 to i640
+%r173 = shl i640 %r172, 576
+%r174 = or i640 %r168, %r173
+%r175 = zext i640 %r174 to i704
+%r177 = getelementptr i64, i64* %r3, i32 10
 %r178 = load i64, i64* %r177
-%r179 = zext i64 %r178 to i576
-%r180 = shl i576 %r179, 512
-%r181 = or i576 %r175, %r180
-%r182 = zext i576 %r181 to i640
-%r184 = getelementptr i64, i64* %r3, i32 9
+%r179 = zext i64 %r178 to i704
+%r180 = shl i704 %r179, 640
+%r181 = or i704 %r175, %r180
+%r182 = zext i704 %r181 to i768
+%r184 = getelementptr i64, i64* %r3, i32 11
 %r185 = load i64, i64* %r184
-%r186 = zext i64 %r185 to i640
-%r187 = shl i640 %r186, 576
-%r188 = or i640 %r182, %r187
-%r189 = zext i640 %r188 to i704
-%r191 = getelementptr i64, i64* %r3, i32 10
+%r186 = zext i64 %r185 to i768
+%r187 = shl i768 %r186, 704
+%r188 = or i768 %r182, %r187
+%r189 = zext i768 %r188 to i832
+%r191 = getelementptr i64, i64* %r3, i32 12
 %r192 = load i64, i64* %r191
-%r193 = zext i64 %r192 to i704
-%r194 = shl i704 %r193, 640
-%r195 = or i704 %r189, %r194
-%r196 = zext i704 %r195 to i768
-%r198 = getelementptr i64, i64* %r3, i32 11
+%r193 = zext i64 %r192 to i832
+%r194 = shl i832 %r193, 768
+%r195 = or i832 %r189, %r194
+%r196 = zext i832 %r195 to i896
+%r198 = getelementptr i64, i64* %r3, i32 13
 %r199 = load i64, i64* %r198
-%r200 = zext i64 %r199 to i768
-%r201 = shl i768 %r200, 704
-%r202 = or i768 %r196, %r201
-%r203 = zext i768 %r202 to i832
-%r205 = getelementptr i64, i64* %r3, i32 12
+%r200 = zext i64 %r199 to i896
+%r201 = shl i896 %r200, 832
+%r202 = or i896 %r196, %r201
+%r203 = zext i896 %r202 to i960
+%r205 = getelementptr i64, i64* %r3, i32 14
 %r206 = load i64, i64* %r205
-%r207 = zext i64 %r206 to i832
-%r208 = shl i832 %r207, 768
-%r209 = or i832 %r203, %r208
-%r210 = zext i832 %r209 to i896
-%r212 = getelementptr i64, i64* %r3, i32 13
+%r207 = zext i64 %r206 to i960
+%r208 = shl i960 %r207, 896
+%r209 = or i960 %r203, %r208
+%r210 = zext i960 %r209 to i1024
+%r212 = getelementptr i64, i64* %r3, i32 15
 %r213 = load i64, i64* %r212
-%r214 = zext i64 %r213 to i896
-%r215 = shl i896 %r214, 832
-%r216 = or i896 %r210, %r215
-%r217 = zext i896 %r216 to i960
-%r219 = getelementptr i64, i64* %r3, i32 14
-%r220 = load i64, i64* %r219
-%r221 = zext i64 %r220 to i960
-%r222 = shl i960 %r221, 896
-%r223 = or i960 %r217, %r222
-%r224 = zext i960 %r223 to i1024
-%r226 = getelementptr i64, i64* %r3, i32 15
-%r227 = load i64, i64* %r226
-%r228 = zext i64 %r227 to i1024
-%r229 = shl i1024 %r228, 960
-%r230 = or i1024 %r224, %r229
-%r231 = zext i1024 %r230 to i1088
-%r233 = getelementptr i64, i64* %r3, i32 16
-%r234 = load i64, i64* %r233
-%r235 = zext i64 %r234 to i1088
-%r236 = shl i1088 %r235, 1024
-%r237 = or i1088 %r231, %r236
-%r238 = zext i1088 %r237 to i1152
-%r240 = getelementptr i64, i64* %r3, i32 17
-%r241 = load i64, i64* %r240
-%r242 = zext i64 %r241 to i1152
-%r243 = shl i1152 %r242, 1088
-%r244 = or i1152 %r238, %r243
-%r245 = zext i1152 %r124 to i1216
-%r246 = zext i1152 %r244 to i1216
-%r247 = add i1216 %r245, %r246
-%r248 = trunc i1216 %r247 to i576
-%r249 = trunc i576 %r248 to i64
-%r251 = getelementptr i64, i64* %r1, i32 0
-store i64 %r249, i64* %r251
-%r252 = lshr i576 %r248, 64
-%r253 = trunc i576 %r252 to i64
-%r255 = getelementptr i64, i64* %r1, i32 1
-store i64 %r253, i64* %r255
-%r256 = lshr i576 %r252, 64
-%r257 = trunc i576 %r256 to i64
-%r259 = getelementptr i64, i64* %r1, i32 2
-store i64 %r257, i64* %r259
-%r260 = lshr i576 %r256, 64
-%r261 = trunc i576 %r260 to i64
-%r263 = getelementptr i64, i64* %r1, i32 3
-store i64 %r261, i64* %r263
-%r264 = lshr i576 %r260, 64
-%r265 = trunc i576 %r264 to i64
-%r267 = getelementptr i64, i64* %r1, i32 4
-store i64 %r265, i64* %r267
-%r268 = lshr i576 %r264, 64
-%r269 = trunc i576 %r268 to i64
-%r271 = getelementptr i64, i64* %r1, i32 5
-store i64 %r269, i64* %r271
-%r272 = lshr i576 %r268, 64
-%r273 = trunc i576 %r272 to i64
-%r275 = getelementptr i64, i64* %r1, i32 6
-store i64 %r273, i64* %r275
-%r276 = lshr i576 %r272, 64
-%r277 = trunc i576 %r276 to i64
-%r279 = getelementptr i64, i64* %r1, i32 7
-store i64 %r277, i64* %r279
-%r280 = lshr i576 %r276, 64
-%r281 = trunc i576 %r280 to i64
-%r283 = getelementptr i64, i64* %r1, i32 8
-store i64 %r281, i64* %r283
-%r284 = lshr i1216 %r247, 576
-%r285 = trunc i1216 %r284 to i640
-%r286 = load i64, i64* %r4
-%r287 = zext i64 %r286 to i128
-%r289 = getelementptr i64, i64* %r4, i32 1
-%r290 = load i64, i64* %r289
-%r291 = zext i64 %r290 to i128
-%r292 = shl i128 %r291, 64
-%r293 = or i128 %r287, %r292
-%r294 = zext i128 %r293 to i192
-%r296 = getelementptr i64, i64* %r4, i32 2
-%r297 = load i64, i64* %r296
-%r298 = zext i64 %r297 to i192
-%r299 = shl i192 %r298, 128
-%r300 = or i192 %r294, %r299
-%r301 = zext i192 %r300 to i256
-%r303 = getelementptr i64, i64* %r4, i32 3
-%r304 = load i64, i64* %r303
-%r305 = zext i64 %r304 to i256
-%r306 = shl i256 %r305, 192
-%r307 = or i256 %r301, %r306
-%r308 = zext i256 %r307 to i320
-%r310 = getelementptr i64, i64* %r4, i32 4
-%r311 = load i64, i64* %r310
-%r312 = zext i64 %r311 to i320
-%r313 = shl i320 %r312, 256
-%r314 = or i320 %r308, %r313
-%r315 = zext i320 %r314 to i384
-%r317 = getelementptr i64, i64* %r4, i32 5
-%r318 = load i64, i64* %r317
-%r319 = zext i64 %r318 to i384
-%r320 = shl i384 %r319, 320
-%r321 = or i384 %r315, %r320
-%r322 = zext i384 %r321 to i448
-%r324 = getelementptr i64, i64* %r4, i32 6
-%r325 = load i64, i64* %r324
-%r326 = zext i64 %r325 to i448
-%r327 = shl i448 %r326, 384
-%r328 = or i448 %r322, %r327
-%r329 = zext i448 %r328 to i512
-%r331 = getelementptr i64, i64* %r4, i32 7
-%r332 = load i64, i64* %r331
-%r333 = zext i64 %r332 to i512
-%r334 = shl i512 %r333, 448
-%r335 = or i512 %r329, %r334
-%r336 = zext i512 %r335 to i576
-%r338 = getelementptr i64, i64* %r4, i32 8
-%r339 = load i64, i64* %r338
-%r340 = zext i64 %r339 to i576
-%r341 = shl i576 %r340, 512
-%r342 = or i576 %r336, %r341
-%r343 = zext i576 %r342 to i640
-%r344 = sub i640 %r285, %r343
-%r345 = lshr i640 %r344, 576
-%r346 = trunc i640 %r345 to i1
-%r347 = select i1 %r346, i640 %r285, i640 %r344
-%r348 = trunc i640 %r347 to i576
-%r350 = getelementptr i64, i64* %r1, i32 9
-%r351 = trunc i576 %r348 to i64
-%r353 = getelementptr i64, i64* %r350, i32 0
-store i64 %r351, i64* %r353
-%r354 = lshr i576 %r348, 64
-%r355 = trunc i576 %r354 to i64
-%r357 = getelementptr i64, i64* %r350, i32 1
-store i64 %r355, i64* %r357
-%r358 = lshr i576 %r354, 64
-%r359 = trunc i576 %r358 to i64
-%r361 = getelementptr i64, i64* %r350, i32 2
-store i64 %r359, i64* %r361
-%r362 = lshr i576 %r358, 64
-%r363 = trunc i576 %r362 to i64
-%r365 = getelementptr i64, i64* %r350, i32 3
-store i64 %r363, i64* %r365
-%r366 = lshr i576 %r362, 64
-%r367 = trunc i576 %r366 to i64
-%r369 = getelementptr i64, i64* %r350, i32 4
-store i64 %r367, i64* %r369
-%r370 = lshr i576 %r366, 64
-%r371 = trunc i576 %r370 to i64
-%r373 = getelementptr i64, i64* %r350, i32 5
-store i64 %r371, i64* %r373
-%r374 = lshr i576 %r370, 64
-%r375 = trunc i576 %r374 to i64
-%r377 = getelementptr i64, i64* %r350, i32 6
-store i64 %r375, i64* %r377
-%r378 = lshr i576 %r374, 64
-%r379 = trunc i576 %r378 to i64
-%r381 = getelementptr i64, i64* %r350, i32 7
-store i64 %r379, i64* %r381
-%r382 = lshr i576 %r378, 64
-%r383 = trunc i576 %r382 to i64
-%r385 = getelementptr i64, i64* %r350, i32 8
-store i64 %r383, i64* %r385
+%r214 = zext i64 %r213 to i1024
+%r215 = shl i1024 %r214, 960
+%r216 = or i1024 %r210, %r215
+%r217 = zext i1024 %r110 to i1088
+%r218 = zext i1024 %r216 to i1088
+%r219 = add i1088 %r217, %r218
+%r220 = trunc i1088 %r219 to i512
+%r222 = getelementptr i64, i64* %r1, i32 0
+%r223 = trunc i512 %r220 to i64
+store i64 %r223, i64* %r222
+%r224 = lshr i512 %r220, 64
+%r226 = getelementptr i64, i64* %r1, i32 1
+%r227 = trunc i512 %r224 to i64
+store i64 %r227, i64* %r226
+%r228 = lshr i512 %r224, 64
+%r230 = getelementptr i64, i64* %r1, i32 2
+%r231 = trunc i512 %r228 to i64
+store i64 %r231, i64* %r230
+%r232 = lshr i512 %r228, 64
+%r234 = getelementptr i64, i64* %r1, i32 3
+%r235 = trunc i512 %r232 to i64
+store i64 %r235, i64* %r234
+%r236 = lshr i512 %r232, 64
+%r238 = getelementptr i64, i64* %r1, i32 4
+%r239 = trunc i512 %r236 to i64
+store i64 %r239, i64* %r238
+%r240 = lshr i512 %r236, 64
+%r242 = getelementptr i64, i64* %r1, i32 5
+%r243 = trunc i512 %r240 to i64
+store i64 %r243, i64* %r242
+%r244 = lshr i512 %r240, 64
+%r246 = getelementptr i64, i64* %r1, i32 6
+%r247 = trunc i512 %r244 to i64
+store i64 %r247, i64* %r246
+%r248 = lshr i512 %r244, 64
+%r250 = getelementptr i64, i64* %r1, i32 7
+%r251 = trunc i512 %r248 to i64
+store i64 %r251, i64* %r250
+%r252 = lshr i1088 %r219, 512
+%r253 = trunc i1088 %r252 to i576
+%r254 = load i64, i64* %r4
+%r255 = zext i64 %r254 to i128
+%r257 = getelementptr i64, i64* %r4, i32 1
+%r258 = load i64, i64* %r257
+%r259 = zext i64 %r258 to i128
+%r260 = shl i128 %r259, 64
+%r261 = or i128 %r255, %r260
+%r262 = zext i128 %r261 to i192
+%r264 = getelementptr i64, i64* %r4, i32 2
+%r265 = load i64, i64* %r264
+%r266 = zext i64 %r265 to i192
+%r267 = shl i192 %r266, 128
+%r268 = or i192 %r262, %r267
+%r269 = zext i192 %r268 to i256
+%r271 = getelementptr i64, i64* %r4, i32 3
+%r272 = load i64, i64* %r271
+%r273 = zext i64 %r272 to i256
+%r274 = shl i256 %r273, 192
+%r275 = or i256 %r269, %r274
+%r276 = zext i256 %r275 to i320
+%r278 = getelementptr i64, i64* %r4, i32 4
+%r279 = load i64, i64* %r278
+%r280 = zext i64 %r279 to i320
+%r281 = shl i320 %r280, 256
+%r282 = or i320 %r276, %r281
+%r283 = zext i320 %r282 to i384
+%r285 = getelementptr i64, i64* %r4, i32 5
+%r286 = load i64, i64* %r285
+%r287 = zext i64 %r286 to i384
+%r288 = shl i384 %r287, 320
+%r289 = or i384 %r283, %r288
+%r290 = zext i384 %r289 to i448
+%r292 = getelementptr i64, i64* %r4, i32 6
+%r293 = load i64, i64* %r292
+%r294 = zext i64 %r293 to i448
+%r295 = shl i448 %r294, 384
+%r296 = or i448 %r290, %r295
+%r297 = zext i448 %r296 to i512
+%r299 = getelementptr i64, i64* %r4, i32 7
+%r300 = load i64, i64* %r299
+%r301 = zext i64 %r300 to i512
+%r302 = shl i512 %r301, 448
+%r303 = or i512 %r297, %r302
+%r304 = zext i512 %r303 to i576
+%r305 = sub i576 %r253, %r304
+%r306 = lshr i576 %r305, 512
+%r307 = trunc i576 %r306 to i1
+%r308 = select i1 %r307, i576 %r253, i576 %r305
+%r309 = trunc i576 %r308 to i512
+%r311 = getelementptr i64, i64* %r1, i32 8
+%r313 = getelementptr i64, i64* %r311, i32 0
+%r314 = trunc i512 %r309 to i64
+store i64 %r314, i64* %r313
+%r315 = lshr i512 %r309, 64
+%r317 = getelementptr i64, i64* %r311, i32 1
+%r318 = trunc i512 %r315 to i64
+store i64 %r318, i64* %r317
+%r319 = lshr i512 %r315, 64
+%r321 = getelementptr i64, i64* %r311, i32 2
+%r322 = trunc i512 %r319 to i64
+store i64 %r322, i64* %r321
+%r323 = lshr i512 %r319, 64
+%r325 = getelementptr i64, i64* %r311, i32 3
+%r326 = trunc i512 %r323 to i64
+store i64 %r326, i64* %r325
+%r327 = lshr i512 %r323, 64
+%r329 = getelementptr i64, i64* %r311, i32 4
+%r330 = trunc i512 %r327 to i64
+store i64 %r330, i64* %r329
+%r331 = lshr i512 %r327, 64
+%r333 = getelementptr i64, i64* %r311, i32 5
+%r334 = trunc i512 %r331 to i64
+store i64 %r334, i64* %r333
+%r335 = lshr i512 %r331, 64
+%r337 = getelementptr i64, i64* %r311, i32 6
+%r338 = trunc i512 %r335 to i64
+store i64 %r338, i64* %r337
+%r339 = lshr i512 %r335, 64
+%r341 = getelementptr i64, i64* %r311, i32 7
+%r342 = trunc i512 %r339 to i64
+store i64 %r342, i64* %r341
 ret void
 }
-define void @mcl_fpDbl_sub9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
+define void @mcl_fpDbl_sub8L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias  %r3, i64* noalias  %r4)
 {
 %r5 = load i64, i64* %r2
 %r6 = zext i64 %r5 to i128
@@ -15134,250 +7742,212 @@ define void @mcl_fpDbl_sub9L(i64* noalias  %r1, i64* noalias  %r2, i64* noalias
 %r108 = zext i64 %r107 to i1024
 %r109 = shl i1024 %r108, 960
 %r110 = or i1024 %r104, %r109
-%r111 = zext i1024 %r110 to i1088
-%r113 = getelementptr i64, i64* %r2, i32 16
-%r114 = load i64, i64* %r113
-%r115 = zext i64 %r114 to i1088
-%r116 = shl i1088 %r115, 1024
-%r117 = or i1088 %r111, %r116
-%r118 = zext i1088 %r117 to i1152
-%r120 = getelementptr i64, i64* %r2, i32 17
-%r121 = load i64, i64* %r120
-%r122 = zext i64 %r121 to i1152
-%r123 = shl i1152 %r122, 1088
-%r124 = or i1152 %r118, %r123
-%r125 = load i64, i64* %r3
-%r126 = zext i64 %r125 to i128
-%r128 = getelementptr i64, i64* %r3, i32 1
+%r111 = load i64, i64* %r3
+%r112 = zext i64 %r111 to i128
+%r114 = getelementptr i64, i64* %r3, i32 1
+%r115 = load i64, i64* %r114
+%r116 = zext i64 %r115 to i128
+%r117 = shl i128 %r116, 64
+%r118 = or i128 %r112, %r117
+%r119 = zext i128 %r118 to i192
+%r121 = getelementptr i64, i64* %r3, i32 2
+%r122 = load i64, i64* %r121
+%r123 = zext i64 %r122 to i192
+%r124 = shl i192 %r123, 128
+%r125 = or i192 %r119, %r124
+%r126 = zext i192 %r125 to i256
+%r128 = getelementptr i64, i64* %r3, i32 3
 %r129 = load i64, i64* %r128
-%r130 = zext i64 %r129 to i128
-%r131 = shl i128 %r130, 64
-%r132 = or i128 %r126, %r131
-%r133 = zext i128 %r132 to i192
-%r135 = getelementptr i64, i64* %r3, i32 2
+%r130 = zext i64 %r129 to i256
+%r131 = shl i256 %r130, 192
+%r132 = or i256 %r126, %r131
+%r133 = zext i256 %r132 to i320
+%r135 = getelementptr i64, i64* %r3, i32 4
 %r136 = load i64, i64* %r135
-%r137 = zext i64 %r136 to i192
-%r138 = shl i192 %r137, 128
-%r139 = or i192 %r133, %r138
-%r140 = zext i192 %r139 to i256
-%r142 = getelementptr i64, i64* %r3, i32 3
+%r137 = zext i64 %r136 to i320
+%r138 = shl i320 %r137, 256
+%r139 = or i320 %r133, %r138
+%r140 = zext i320 %r139 to i384
+%r142 = getelementptr i64, i64* %r3, i32 5
 %r143 = load i64, i64* %r142
-%r144 = zext i64 %r143 to i256
-%r145 = shl i256 %r144, 192
-%r146 = or i256 %r140, %r145
-%r147 = zext i256 %r146 to i320
-%r149 = getelementptr i64, i64* %r3, i32 4
+%r144 = zext i64 %r143 to i384
+%r145 = shl i384 %r144, 320
+%r146 = or i384 %r140, %r145
+%r147 = zext i384 %r146 to i448
+%r149 = getelementptr i64, i64* %r3, i32 6
 %r150 = load i64, i64* %r149
-%r151 = zext i64 %r150 to i320
-%r152 = shl i320 %r151, 256
-%r153 = or i320 %r147, %r152
-%r154 = zext i320 %r153 to i384
-%r156 = getelementptr i64, i64* %r3, i32 5
+%r151 = zext i64 %r150 to i448
+%r152 = shl i448 %r151, 384
+%r153 = or i448 %r147, %r152
+%r154 = zext i448 %r153 to i512
+%r156 = getelementptr i64, i64* %r3, i32 7
 %r157 = load i64, i64* %r156
-%r158 = zext i64 %r157 to i384
-%r159 = shl i384 %r158, 320
-%r160 = or i384 %r154, %r159
-%r161 = zext i384 %r160 to i448
-%r163 = getelementptr i64, i64* %r3, i32 6
+%r158 = zext i64 %r157 to i512
+%r159 = shl i512 %r158, 448
+%r160 = or i512 %r154, %r159
+%r161 = zext i512 %r160 to i576
+%r163 = getelementptr i64, i64* %r3, i32 8
 %r164 = load i64, i64* %r163
-%r165 = zext i64 %r164 to i448
-%r166 = shl i448 %r165, 384
-%r167 = or i448 %r161, %r166
-%r168 = zext i448 %r167 to i512
-%r170 = getelementptr i64, i64* %r3, i32 7
+%r165 = zext i64 %r164 to i576
+%r166 = shl i576 %r165, 512
+%r167 = or i576 %r161, %r166
+%r168 = zext i576 %r167 to i640
+%r170 = getelementptr i64, i64* %r3, i32 9
 %r171 = load i64, i64* %r170
-%r172 = zext i64 %r171 to i512
-%r173 = shl i512 %r172, 448
-%r174 = or i512 %r168, %r173
-%r175 = zext i512 %r174 to i576
-%r177 = getelementptr i64, i64* %r3, i32 8
+%r172 = zext i64 %r171 to i640
+%r173 = shl i640 %r172, 576
+%r174 = or i640 %r168, %r173
+%r175 = zext i640 %r174 to i704
+%r177 = getelementptr i64, i64* %r3, i32 10
 %r178 = load i64, i64* %r177
-%r179 = zext i64 %r178 to i576
-%r180 = shl i576 %r179, 512
-%r181 = or i576 %r175, %r180
-%r182 = zext i576 %r181 to i640
-%r184 = getelementptr i64, i64* %r3, i32 9
+%r179 = zext i64 %r178 to i704
+%r180 = shl i704 %r179, 640
+%r181 = or i704 %r175, %r180
+%r182 = zext i704 %r181 to i768
+%r184 = getelementptr i64, i64* %r3, i32 11
 %r185 = load i64, i64* %r184
-%r186 = zext i64 %r185 to i640
-%r187 = shl i640 %r186, 576
-%r188 = or i640 %r182, %r187
-%r189 = zext i640 %r188 to i704
-%r191 = getelementptr i64, i64* %r3, i32 10
+%r186 = zext i64 %r185 to i768
+%r187 = shl i768 %r186, 704
+%r188 = or i768 %r182, %r187
+%r189 = zext i768 %r188 to i832
+%r191 = getelementptr i64, i64* %r3, i32 12
 %r192 = load i64, i64* %r191
-%r193 = zext i64 %r192 to i704
-%r194 = shl i704 %r193, 640
-%r195 = or i704 %r189, %r194
-%r196 = zext i704 %r195 to i768
-%r198 = getelementptr i64, i64* %r3, i32 11
+%r193 = zext i64 %r192 to i832
+%r194 = shl i832 %r193, 768
+%r195 = or i832 %r189, %r194
+%r196 = zext i832 %r195 to i896
+%r198 = getelementptr i64, i64* %r3, i32 13
 %r199 = load i64, i64* %r198
-%r200 = zext i64 %r199 to i768
-%r201 = shl i768 %r200, 704
-%r202 = or i768 %r196, %r201
-%r203 = zext i768 %r202 to i832
-%r205 = getelementptr i64, i64* %r3, i32 12
+%r200 = zext i64 %r199 to i896
+%r201 = shl i896 %r200, 832
+%r202 = or i896 %r196, %r201
+%r203 = zext i896 %r202 to i960
+%r205 = getelementptr i64, i64* %r3, i32 14
 %r206 = load i64, i64* %r205
-%r207 = zext i64 %r206 to i832
-%r208 = shl i832 %r207, 768
-%r209 = or i832 %r203, %r208
-%r210 = zext i832 %r209 to i896
-%r212 = getelementptr i64, i64* %r3, i32 13
+%r207 = zext i64 %r206 to i960
+%r208 = shl i960 %r207, 896
+%r209 = or i960 %r203, %r208
+%r210 = zext i960 %r209 to i1024
+%r212 = getelementptr i64, i64* %r3, i32 15
 %r213 = load i64, i64* %r212
-%r214 = zext i64 %r213 to i896
-%r215 = shl i896 %r214, 832
-%r216 = or i896 %r210, %r215
-%r217 = zext i896 %r216 to i960
-%r219 = getelementptr i64, i64* %r3, i32 14
-%r220 = load i64, i64* %r219
-%r221 = zext i64 %r220 to i960
-%r222 = shl i960 %r221, 896
-%r223 = or i960 %r217, %r222
-%r224 = zext i960 %r223 to i1024
-%r226 = getelementptr i64, i64* %r3, i32 15
-%r227 = load i64, i64* %r226
-%r228 = zext i64 %r227 to i1024
-%r229 = shl i1024 %r228, 960
-%r230 = or i1024 %r224, %r229
-%r231 = zext i1024 %r230 to i1088
-%r233 = getelementptr i64, i64* %r3, i32 16
-%r234 = load i64, i64* %r233
-%r235 = zext i64 %r234 to i1088
-%r236 = shl i1088 %r235, 1024
-%r237 = or i1088 %r231, %r236
-%r238 = zext i1088 %r237 to i1152
-%r240 = getelementptr i64, i64* %r3, i32 17
-%r241 = load i64, i64* %r240
-%r242 = zext i64 %r241 to i1152
-%r243 = shl i1152 %r242, 1088
-%r244 = or i1152 %r238, %r243
-%r245 = zext i1152 %r124 to i1216
-%r246 = zext i1152 %r244 to i1216
-%r247 = sub i1216 %r245, %r246
-%r248 = trunc i1216 %r247 to i576
-%r249 = trunc i576 %r248 to i64
-%r251 = getelementptr i64, i64* %r1, i32 0
-store i64 %r249, i64* %r251
-%r252 = lshr i576 %r248, 64
-%r253 = trunc i576 %r252 to i64
-%r255 = getelementptr i64, i64* %r1, i32 1
-store i64 %r253, i64* %r255
-%r256 = lshr i576 %r252, 64
-%r257 = trunc i576 %r256 to i64
-%r259 = getelementptr i64, i64* %r1, i32 2
-store i64 %r257, i64* %r259
-%r260 = lshr i576 %r256, 64
-%r261 = trunc i576 %r260 to i64
-%r263 = getelementptr i64, i64* %r1, i32 3
-store i64 %r261, i64* %r263
-%r264 = lshr i576 %r260, 64
-%r265 = trunc i576 %r264 to i64
-%r267 = getelementptr i64, i64* %r1, i32 4
-store i64 %r265, i64* %r267
-%r268 = lshr i576 %r264, 64
-%r269 = trunc i576 %r268 to i64
-%r271 = getelementptr i64, i64* %r1, i32 5
-store i64 %r269, i64* %r271
-%r272 = lshr i576 %r268, 64
-%r273 = trunc i576 %r272 to i64
-%r275 = getelementptr i64, i64* %r1, i32 6
-store i64 %r273, i64* %r275
-%r276 = lshr i576 %r272, 64
-%r277 = trunc i576 %r276 to i64
-%r279 = getelementptr i64, i64* %r1, i32 7
-store i64 %r277, i64* %r279
-%r280 = lshr i576 %r276, 64
-%r281 = trunc i576 %r280 to i64
-%r283 = getelementptr i64, i64* %r1, i32 8
-store i64 %r281, i64* %r283
-%r284 = lshr i1216 %r247, 576
-%r285 = trunc i1216 %r284 to i576
-%r286 = lshr i1216 %r247, 1152
-%r287 = trunc i1216 %r286 to i1
-%r288 = load i64, i64* %r4
-%r289 = zext i64 %r288 to i128
-%r291 = getelementptr i64, i64* %r4, i32 1
-%r292 = load i64, i64* %r291
-%r293 = zext i64 %r292 to i128
-%r294 = shl i128 %r293, 64
-%r295 = or i128 %r289, %r294
-%r296 = zext i128 %r295 to i192
-%r298 = getelementptr i64, i64* %r4, i32 2
-%r299 = load i64, i64* %r298
-%r300 = zext i64 %r299 to i192
-%r301 = shl i192 %r300, 128
-%r302 = or i192 %r296, %r301
-%r303 = zext i192 %r302 to i256
-%r305 = getelementptr i64, i64* %r4, i32 3
-%r306 = load i64, i64* %r305
-%r307 = zext i64 %r306 to i256
-%r308 = shl i256 %r307, 192
-%r309 = or i256 %r303, %r308
-%r310 = zext i256 %r309 to i320
-%r312 = getelementptr i64, i64* %r4, i32 4
-%r313 = load i64, i64* %r312
-%r314 = zext i64 %r313 to i320
-%r315 = shl i320 %r314, 256
-%r316 = or i320 %r310, %r315
-%r317 = zext i320 %r316 to i384
-%r319 = getelementptr i64, i64* %r4, i32 5
-%r320 = load i64, i64* %r319
-%r321 = zext i64 %r320 to i384
-%r322 = shl i384 %r321, 320
-%r323 = or i384 %r317, %r322
-%r324 = zext i384 %r323 to i448
-%r326 = getelementptr i64, i64* %r4, i32 6
-%r327 = load i64, i64* %r326
-%r328 = zext i64 %r327 to i448
-%r329 = shl i448 %r328, 384
-%r330 = or i448 %r324, %r329
-%r331 = zext i448 %r330 to i512
-%r333 = getelementptr i64, i64* %r4, i32 7
-%r334 = load i64, i64* %r333
-%r335 = zext i64 %r334 to i512
-%r336 = shl i512 %r335, 448
-%r337 = or i512 %r331, %r336
-%r338 = zext i512 %r337 to i576
-%r340 = getelementptr i64, i64* %r4, i32 8
-%r341 = load i64, i64* %r340
-%r342 = zext i64 %r341 to i576
-%r343 = shl i576 %r342, 512
-%r344 = or i576 %r338, %r343
-%r346 = select i1 %r287, i576 %r344, i576 0
-%r347 = add i576 %r285, %r346
-%r349 = getelementptr i64, i64* %r1, i32 9
-%r350 = trunc i576 %r347 to i64
-%r352 = getelementptr i64, i64* %r349, i32 0
-store i64 %r350, i64* %r352
-%r353 = lshr i576 %r347, 64
-%r354 = trunc i576 %r353 to i64
-%r356 = getelementptr i64, i64* %r349, i32 1
-store i64 %r354, i64* %r356
-%r357 = lshr i576 %r353, 64
-%r358 = trunc i576 %r357 to i64
-%r360 = getelementptr i64, i64* %r349, i32 2
-store i64 %r358, i64* %r360
-%r361 = lshr i576 %r357, 64
-%r362 = trunc i576 %r361 to i64
-%r364 = getelementptr i64, i64* %r349, i32 3
-store i64 %r362, i64* %r364
-%r365 = lshr i576 %r361, 64
-%r366 = trunc i576 %r365 to i64
-%r368 = getelementptr i64, i64* %r349, i32 4
-store i64 %r366, i64* %r368
-%r369 = lshr i576 %r365, 64
-%r370 = trunc i576 %r369 to i64
-%r372 = getelementptr i64, i64* %r349, i32 5
-store i64 %r370, i64* %r372
-%r373 = lshr i576 %r369, 64
-%r374 = trunc i576 %r373 to i64
-%r376 = getelementptr i64, i64* %r349, i32 6
-store i64 %r374, i64* %r376
-%r377 = lshr i576 %r373, 64
-%r378 = trunc i576 %r377 to i64
-%r380 = getelementptr i64, i64* %r349, i32 7
-store i64 %r378, i64* %r380
-%r381 = lshr i576 %r377, 64
-%r382 = trunc i576 %r381 to i64
-%r384 = getelementptr i64, i64* %r349, i32 8
-store i64 %r382, i64* %r384
+%r214 = zext i64 %r213 to i1024
+%r215 = shl i1024 %r214, 960
+%r216 = or i1024 %r210, %r215
+%r217 = zext i1024 %r110 to i1088
+%r218 = zext i1024 %r216 to i1088
+%r219 = sub i1088 %r217, %r218
+%r220 = trunc i1088 %r219 to i512
+%r222 = getelementptr i64, i64* %r1, i32 0
+%r223 = trunc i512 %r220 to i64
+store i64 %r223, i64* %r222
+%r224 = lshr i512 %r220, 64
+%r226 = getelementptr i64, i64* %r1, i32 1
+%r227 = trunc i512 %r224 to i64
+store i64 %r227, i64* %r226
+%r228 = lshr i512 %r224, 64
+%r230 = getelementptr i64, i64* %r1, i32 2
+%r231 = trunc i512 %r228 to i64
+store i64 %r231, i64* %r230
+%r232 = lshr i512 %r228, 64
+%r234 = getelementptr i64, i64* %r1, i32 3
+%r235 = trunc i512 %r232 to i64
+store i64 %r235, i64* %r234
+%r236 = lshr i512 %r232, 64
+%r238 = getelementptr i64, i64* %r1, i32 4
+%r239 = trunc i512 %r236 to i64
+store i64 %r239, i64* %r238
+%r240 = lshr i512 %r236, 64
+%r242 = getelementptr i64, i64* %r1, i32 5
+%r243 = trunc i512 %r240 to i64
+store i64 %r243, i64* %r242
+%r244 = lshr i512 %r240, 64
+%r246 = getelementptr i64, i64* %r1, i32 6
+%r247 = trunc i512 %r244 to i64
+store i64 %r247, i64* %r246
+%r248 = lshr i512 %r244, 64
+%r250 = getelementptr i64, i64* %r1, i32 7
+%r251 = trunc i512 %r248 to i64
+store i64 %r251, i64* %r250
+%r252 = lshr i1088 %r219, 512
+%r253 = trunc i1088 %r252 to i512
+%r254 = lshr i1088 %r219, 1024
+%r255 = trunc i1088 %r254 to i1
+%r256 = load i64, i64* %r4
+%r257 = zext i64 %r256 to i128
+%r259 = getelementptr i64, i64* %r4, i32 1
+%r260 = load i64, i64* %r259
+%r261 = zext i64 %r260 to i128
+%r262 = shl i128 %r261, 64
+%r263 = or i128 %r257, %r262
+%r264 = zext i128 %r263 to i192
+%r266 = getelementptr i64, i64* %r4, i32 2
+%r267 = load i64, i64* %r266
+%r268 = zext i64 %r267 to i192
+%r269 = shl i192 %r268, 128
+%r270 = or i192 %r264, %r269
+%r271 = zext i192 %r270 to i256
+%r273 = getelementptr i64, i64* %r4, i32 3
+%r274 = load i64, i64* %r273
+%r275 = zext i64 %r274 to i256
+%r276 = shl i256 %r275, 192
+%r277 = or i256 %r271, %r276
+%r278 = zext i256 %r277 to i320
+%r280 = getelementptr i64, i64* %r4, i32 4
+%r281 = load i64, i64* %r280
+%r282 = zext i64 %r281 to i320
+%r283 = shl i320 %r282, 256
+%r284 = or i320 %r278, %r283
+%r285 = zext i320 %r284 to i384
+%r287 = getelementptr i64, i64* %r4, i32 5
+%r288 = load i64, i64* %r287
+%r289 = zext i64 %r288 to i384
+%r290 = shl i384 %r289, 320
+%r291 = or i384 %r285, %r290
+%r292 = zext i384 %r291 to i448
+%r294 = getelementptr i64, i64* %r4, i32 6
+%r295 = load i64, i64* %r294
+%r296 = zext i64 %r295 to i448
+%r297 = shl i448 %r296, 384
+%r298 = or i448 %r292, %r297
+%r299 = zext i448 %r298 to i512
+%r301 = getelementptr i64, i64* %r4, i32 7
+%r302 = load i64, i64* %r301
+%r303 = zext i64 %r302 to i512
+%r304 = shl i512 %r303, 448
+%r305 = or i512 %r299, %r304
+%r307 = select i1 %r255, i512 %r305, i512 0
+%r308 = add i512 %r253, %r307
+%r310 = getelementptr i64, i64* %r1, i32 8
+%r312 = getelementptr i64, i64* %r310, i32 0
+%r313 = trunc i512 %r308 to i64
+store i64 %r313, i64* %r312
+%r314 = lshr i512 %r308, 64
+%r316 = getelementptr i64, i64* %r310, i32 1
+%r317 = trunc i512 %r314 to i64
+store i64 %r317, i64* %r316
+%r318 = lshr i512 %r314, 64
+%r320 = getelementptr i64, i64* %r310, i32 2
+%r321 = trunc i512 %r318 to i64
+store i64 %r321, i64* %r320
+%r322 = lshr i512 %r318, 64
+%r324 = getelementptr i64, i64* %r310, i32 3
+%r325 = trunc i512 %r322 to i64
+store i64 %r325, i64* %r324
+%r326 = lshr i512 %r322, 64
+%r328 = getelementptr i64, i64* %r310, i32 4
+%r329 = trunc i512 %r326 to i64
+store i64 %r329, i64* %r328
+%r330 = lshr i512 %r326, 64
+%r332 = getelementptr i64, i64* %r310, i32 5
+%r333 = trunc i512 %r330 to i64
+store i64 %r333, i64* %r332
+%r334 = lshr i512 %r330, 64
+%r336 = getelementptr i64, i64* %r310, i32 6
+%r337 = trunc i512 %r334 to i64
+store i64 %r337, i64* %r336
+%r338 = lshr i512 %r334, 64
+%r340 = getelementptr i64, i64* %r310, i32 7
+%r341 = trunc i512 %r338 to i64
+store i64 %r341, i64* %r340
 ret void
 }

From 89a0e3515cf2edf5c71762aef5f51d94ce5f3c11 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 14:24:27 +0900
Subject: [PATCH 518/553] update x86-64 asm

---
 src/asm/x86-64.bmi2.s | 19545 +++++++++++-----------------------
 src/asm/x86-64.s      | 22911 +++++++++++++---------------------------
 2 files changed, 13526 insertions(+), 28930 deletions(-)

diff --git a/src/asm/x86-64.bmi2.s b/src/asm/x86-64.bmi2.s
index e12174ac..227fb484 100644
--- a/src/asm/x86-64.bmi2.s
+++ b/src/asm/x86-64.bmi2.s
@@ -1,147 +1,148 @@
 	.text
-	.file	"<stdin>"
-	.globl	makeNIST_P192Lbmi2
-	.align	16, 0x90
+	.file	"base64.bmi2.ll"
+	.globl	makeNIST_P192Lbmi2              # -- Begin function makeNIST_P192Lbmi2
+	.p2align	4, 0x90
 	.type	makeNIST_P192Lbmi2,@function
 makeNIST_P192Lbmi2:                     # @makeNIST_P192Lbmi2
-# BB#0:
+# %bb.0:
 	movq	$-1, %rax
 	movq	$-2, %rdx
 	movq	$-1, %rcx
 	retq
 .Lfunc_end0:
 	.size	makeNIST_P192Lbmi2, .Lfunc_end0-makeNIST_P192Lbmi2
-
-	.globl	mcl_fpDbl_mod_NIST_P192Lbmi2
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fpDbl_mod_NIST_P192Lbmi2    # -- Begin function mcl_fpDbl_mod_NIST_P192Lbmi2
+	.p2align	4, 0x90
 	.type	mcl_fpDbl_mod_NIST_P192Lbmi2,@function
 mcl_fpDbl_mod_NIST_P192Lbmi2:           # @mcl_fpDbl_mod_NIST_P192Lbmi2
-# BB#0:
+# %bb.0:
 	pushq	%r14
 	pushq	%rbx
-	movq	16(%rsi), %r10
+	movq	16(%rsi), %rbx
 	movq	24(%rsi), %r8
 	movq	40(%rsi), %r9
-	movq	8(%rsi), %rax
-	addq	%r9, %rax
-	adcq	$0, %r10
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
+	movq	8(%rsi), %rdx
+	addq	%r9, %rdx
+	adcq	$0, %rbx
+	setb	%cl
+	movzbl	%cl, %r10d
 	movq	32(%rsi), %r11
 	movq	(%rsi), %r14
 	addq	%r8, %r14
-	adcq	%r11, %rax
-	adcq	%r9, %r10
-	adcq	$0, %rcx
-	addq	%r9, %r14
-	adcq	%r8, %rax
-	adcq	%r11, %r10
-	adcq	$0, %rcx
-	addq	%rcx, %r14
-	adcq	%rax, %rcx
+	adcq	%r11, %rdx
+	adcq	%r9, %rbx
 	adcq	$0, %r10
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r14, %rsi
-	addq	$1, %rsi
-	movq	%rcx, %rdx
-	adcq	$1, %rdx
-	movq	%r10, %rbx
+	addq	%r9, %r14
+	adcq	%r8, %rdx
+	adcq	%r11, %rbx
+	setb	%r8b
+	movq	%r10, %r9
+	adcq	$0, %r9
+	addb	$255, %r8b
+	adcq	%r10, %r14
+	adcq	%rdx, %r9
 	adcq	$0, %rbx
-	adcq	$-1, %rax
-	andl	$1, %eax
-	cmovneq	%r14, %rsi
-	movq	%rsi, (%rdi)
-	testb	%al, %al
-	cmovneq	%rcx, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovneq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	movq	%r14, %rcx
+	addq	$1, %rcx
+	movq	%r9, %rsi
+	adcq	$1, %rsi
+	movq	%rbx, %rax
+	adcq	$0, %rax
+	adcq	$-1, %rdx
+	testb	$1, %dl
+	cmovneq	%rbx, %rax
+	movq	%rax, 16(%rdi)
+	cmovneq	%r9, %rsi
+	movq	%rsi, 8(%rdi)
+	cmovneq	%r14, %rcx
+	movq	%rcx, (%rdi)
 	popq	%rbx
 	popq	%r14
 	retq
 .Lfunc_end1:
 	.size	mcl_fpDbl_mod_NIST_P192Lbmi2, .Lfunc_end1-mcl_fpDbl_mod_NIST_P192Lbmi2
-
-	.globl	mcl_fp_sqr_NIST_P192Lbmi2
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fp_sqr_NIST_P192Lbmi2       # -- Begin function mcl_fp_sqr_NIST_P192Lbmi2
+	.p2align	4, 0x90
 	.type	mcl_fp_sqr_NIST_P192Lbmi2,@function
 mcl_fp_sqr_NIST_P192Lbmi2:              # @mcl_fp_sqr_NIST_P192Lbmi2
-# BB#0:
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	16(%rsi), %r8
+	movq	16(%rsi), %r9
 	movq	(%rsi), %rcx
 	movq	8(%rsi), %rsi
-	movq	%r8, %rdx
-	mulxq	%rsi, %r14, %rbx
-	movq	%rbx, -16(%rsp)         # 8-byte Spill
-	movq	%rsi, %rdx
-	mulxq	%rsi, %r13, %r15
+	movq	%r9, %rdx
+	mulxq	%rsi, %r11, %r10
 	movq	%rsi, %rdx
-	mulxq	%rcx, %r12, %rsi
-	addq	%rsi, %r13
-	adcq	%r14, %r15
+	mulxq	%rsi, %r12, %r14
+	mulxq	%rcx, %r15, %rsi
+	addq	%rsi, %r12
+	adcq	%r11, %r14
+	movq	%r10, %rbx
 	adcq	$0, %rbx
 	movq	%rcx, %rdx
-	mulxq	%rcx, %r9, %rax
-	addq	%r12, %rax
-	movq	%r8, %rdx
-	mulxq	%rcx, %rbp, %r11
-	adcq	%rbp, %rsi
-	movq	%r11, %r10
-	adcq	$0, %r10
-	addq	%r12, %rax
+	mulxq	%rcx, %r8, %rax
+	addq	%r15, %rax
+	movq	%r9, %rdx
+	mulxq	%rcx, %r13, %rcx
 	adcq	%r13, %rsi
-	adcq	%r15, %r10
+	movq	%rcx, %rbp
+	adcq	$0, %rbp
+	addq	%r15, %rax
+	adcq	%r12, %rsi
+	adcq	%r14, %rbp
 	adcq	$0, %rbx
-	movq	%r8, %rdx
-	mulxq	%r8, %rcx, %rdi
-	addq	%r14, %r11
-	adcq	-16(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	$0, %rdi
-	addq	%rbp, %rsi
-	adcq	%r10, %r11
-	adcq	%rbx, %rcx
-	adcq	$0, %rdi
-	addq	%rdi, %rax
-	adcq	$0, %rsi
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	addq	%r11, %r9
-	adcq	%rcx, %rax
-	adcq	%rdi, %rsi
+	movq	%r9, %rdx
+	mulxq	%r9, %r9, %rdx
+	addq	%r11, %rcx
+	adcq	%r10, %r9
 	adcq	$0, %rdx
-	addq	%rdi, %r9
-	adcq	%r11, %rax
-	adcq	%rcx, %rsi
+	addq	%r13, %rsi
+	adcq	%rbp, %rcx
+	adcq	%rbx, %r9
 	adcq	$0, %rdx
-	addq	%rdx, %r9
-	adcq	%rax, %rdx
+	addq	%rdx, %rax
 	adcq	$0, %rsi
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r9, %rcx
-	addq	$1, %rcx
-	movq	%rdx, %rdi
-	adcq	$1, %rdi
-	movq	%rsi, %rbp
+	setb	%bl
+	movzbl	%bl, %ebx
+	addq	%rcx, %r8
+	adcq	%r9, %rax
+	adcq	%rdx, %rsi
+	adcq	$0, %rbx
+	addq	%rdx, %r8
+	adcq	%rcx, %rax
+	adcq	%r9, %rsi
+	setb	%cl
+	movq	%rbx, %rbp
 	adcq	$0, %rbp
+	addb	$255, %cl
+	adcq	%rbx, %r8
+	adcq	%rax, %rbp
+	adcq	$0, %rsi
+	setb	%al
+	movzbl	%al, %eax
+	movq	%r8, %rcx
+	addq	$1, %rcx
+	movq	%rbp, %rdx
+	adcq	$1, %rdx
+	movq	%rsi, %rbx
+	adcq	$0, %rbx
 	adcq	$-1, %rax
-	andl	$1, %eax
-	cmovneq	%r9, %rcx
-	movq	-8(%rsp), %rbx          # 8-byte Reload
-	movq	%rcx, (%rbx)
-	testb	%al, %al
-	cmovneq	%rdx, %rdi
-	movq	%rdi, 8(%rbx)
-	cmovneq	%rsi, %rbp
-	movq	%rbp, 16(%rbx)
+	testb	$1, %al
+	cmovneq	%rsi, %rbx
+	movq	%rbx, 16(%rdi)
+	cmovneq	%rbp, %rdx
+	movq	%rdx, 8(%rdi)
+	cmovneq	%r8, %rcx
+	movq	%rcx, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -151,68 +152,70 @@ mcl_fp_sqr_NIST_P192Lbmi2:              # @mcl_fp_sqr_NIST_P192Lbmi2
 	retq
 .Lfunc_end2:
 	.size	mcl_fp_sqr_NIST_P192Lbmi2, .Lfunc_end2-mcl_fp_sqr_NIST_P192Lbmi2
-
-	.globl	mcl_fp_mulNIST_P192Lbmi2
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fp_mulNIST_P192Lbmi2        # -- Begin function mcl_fp_mulNIST_P192Lbmi2
+	.p2align	4, 0x90
 	.type	mcl_fp_mulNIST_P192Lbmi2,@function
 mcl_fp_mulNIST_P192Lbmi2:               # @mcl_fp_mulNIST_P192Lbmi2
-# BB#0:
+# %bb.0:
 	pushq	%r14
 	pushq	%rbx
 	subq	$56, %rsp
 	movq	%rdi, %r14
 	leaq	8(%rsp), %rdi
 	callq	mcl_fpDbl_mulPre3Lbmi2@PLT
-	movq	24(%rsp), %r9
+	movq	24(%rsp), %rbx
 	movq	32(%rsp), %r8
-	movq	48(%rsp), %rdi
-	movq	16(%rsp), %rbx
-	addq	%rdi, %rbx
-	adcq	$0, %r9
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	movq	40(%rsp), %rsi
-	movq	8(%rsp), %rdx
-	addq	%r8, %rdx
-	adcq	%rsi, %rbx
-	adcq	%rdi, %r9
+	movq	48(%rsp), %rax
+	movq	16(%rsp), %rdi
+	addq	%rax, %rdi
+	adcq	$0, %rbx
+	setb	%cl
+	movzbl	%cl, %esi
+	movq	40(%rsp), %rdx
+	movq	8(%rsp), %r9
+	addq	%r8, %r9
+	adcq	%rdx, %rdi
+	adcq	%rax, %rbx
+	adcq	$0, %rsi
+	addq	%rax, %r9
+	adcq	%r8, %rdi
+	adcq	%rdx, %rbx
+	setb	%dl
+	movq	%rsi, %rcx
 	adcq	$0, %rcx
-	addq	%rdi, %rdx
-	adcq	%r8, %rbx
+	addb	$255, %dl
 	adcq	%rsi, %r9
-	adcq	$0, %rcx
-	addq	%rcx, %rdx
-	adcq	%rbx, %rcx
-	adcq	$0, %r9
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rdx, %rdi
+	adcq	%rdi, %rcx
+	adcq	$0, %rbx
+	setb	%dl
+	movzbl	%dl, %edx
+	movq	%r9, %rdi
 	addq	$1, %rdi
-	movq	%rcx, %rbx
-	adcq	$1, %rbx
-	movq	%r9, %rax
+	movq	%rcx, %rsi
+	adcq	$1, %rsi
+	movq	%rbx, %rax
 	adcq	$0, %rax
-	adcq	$-1, %rsi
-	andl	$1, %esi
-	cmovneq	%rdx, %rdi
-	movq	%rdi, (%r14)
-	testb	%sil, %sil
-	cmovneq	%rcx, %rbx
-	movq	%rbx, 8(%r14)
-	cmovneq	%r9, %rax
+	adcq	$-1, %rdx
+	testb	$1, %dl
+	cmovneq	%rbx, %rax
 	movq	%rax, 16(%r14)
+	cmovneq	%rcx, %rsi
+	movq	%rsi, 8(%r14)
+	cmovneq	%r9, %rdi
+	movq	%rdi, (%r14)
 	addq	$56, %rsp
 	popq	%rbx
 	popq	%r14
 	retq
 .Lfunc_end3:
 	.size	mcl_fp_mulNIST_P192Lbmi2, .Lfunc_end3-mcl_fp_mulNIST_P192Lbmi2
-
-	.globl	mcl_fpDbl_mod_NIST_P521Lbmi2
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fpDbl_mod_NIST_P521Lbmi2    # -- Begin function mcl_fpDbl_mod_NIST_P521Lbmi2
+	.p2align	4, 0x90
 	.type	mcl_fpDbl_mod_NIST_P521Lbmi2,@function
 mcl_fpDbl_mod_NIST_P521Lbmi2:           # @mcl_fpDbl_mod_NIST_P521Lbmi2
-# BB#0:
+# %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r12
@@ -236,7 +239,8 @@ mcl_fpDbl_mod_NIST_P521Lbmi2:           # @mcl_fpDbl_mod_NIST_P521Lbmi2
 	shldq	$55, %rax, %rcx
 	shrq	$9, %r14
 	shldq	$55, %rbx, %rax
-	andl	$511, %ebx              # imm = 0x1FF
+	movl	%ebx, %edx
+	andl	$511, %edx                      # imm = 0x1FF
 	addq	(%rsi), %rax
 	adcq	8(%rsi), %rcx
 	adcq	16(%rsi), %r12
@@ -245,9 +249,9 @@ mcl_fpDbl_mod_NIST_P521Lbmi2:           # @mcl_fpDbl_mod_NIST_P521Lbmi2
 	adcq	40(%rsi), %r10
 	adcq	48(%rsi), %r9
 	adcq	56(%rsi), %r8
-	adcq	%r14, %rbx
-	movq	%rbx, %rsi
-	shrq	$9, %rsi
+	adcq	%r14, %rdx
+	movl	%edx, %esi
+	shrl	$9, %esi
 	andl	$1, %esi
 	addq	%rax, %rsi
 	adcq	$0, %rcx
@@ -257,7 +261,7 @@ mcl_fpDbl_mod_NIST_P521Lbmi2:           # @mcl_fpDbl_mod_NIST_P521Lbmi2
 	adcq	$0, %r10
 	adcq	$0, %r9
 	adcq	$0, %r8
-	adcq	$0, %rbx
+	adcq	$0, %rdx
 	movq	%rsi, %rax
 	andq	%r12, %rax
 	andq	%r15, %rax
@@ -265,23 +269,23 @@ mcl_fpDbl_mod_NIST_P521Lbmi2:           # @mcl_fpDbl_mod_NIST_P521Lbmi2
 	andq	%r10, %rax
 	andq	%r9, %rax
 	andq	%r8, %rax
-	movq	%rbx, %rdx
-	orq	$-512, %rdx             # imm = 0xFFFFFFFFFFFFFE00
-	andq	%rax, %rdx
-	andq	%rcx, %rdx
-	cmpq	$-1, %rdx
+	movq	%rdx, %rbx
+	orq	$-512, %rbx                     # imm = 0xFE00
+	andq	%rax, %rbx
+	andq	%rcx, %rbx
+	cmpq	$-1, %rbx
 	je	.LBB4_1
-# BB#3:                                 # %nonzero
-	movq	%rsi, (%rdi)
-	movq	%rcx, 8(%rdi)
-	movq	%r12, 16(%rdi)
-	movq	%r15, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r10, 40(%rdi)
-	movq	%r9, 48(%rdi)
+# %bb.3:                                # %nonzero
 	movq	%r8, 56(%rdi)
-	andl	$511, %ebx              # imm = 0x1FF
-	movq	%rbx, 64(%rdi)
+	movq	%r9, 48(%rdi)
+	movq	%r10, 40(%rdi)
+	movq	%r11, 32(%rdi)
+	movq	%r15, 24(%rdi)
+	movq	%r12, 16(%rdi)
+	movq	%rcx, 8(%rdi)
+	movq	%rsi, (%rdi)
+	andl	$511, %edx                      # imm = 0x1FF
+	movq	%rdx, 64(%rdi)
 	jmp	.LBB4_2
 .LBB4_1:                                # %zero
 	movq	$0, 64(%rdi)
@@ -301,493 +305,291 @@ mcl_fpDbl_mod_NIST_P521Lbmi2:           # @mcl_fpDbl_mod_NIST_P521Lbmi2
 	retq
 .Lfunc_end4:
 	.size	mcl_fpDbl_mod_NIST_P521Lbmi2, .Lfunc_end4-mcl_fpDbl_mod_NIST_P521Lbmi2
-
-	.globl	mcl_fp_mulUnitPre1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre1Lbmi2,@function
-mcl_fp_mulUnitPre1Lbmi2:                # @mcl_fp_mulUnitPre1Lbmi2
-# BB#0:
-	mulxq	(%rsi), %rcx, %rax
-	movq	%rcx, (%rdi)
-	movq	%rax, 8(%rdi)
+                                        # -- End function
+	.globl	mulPv192x64bmi2                 # -- Begin function mulPv192x64bmi2
+	.p2align	4, 0x90
+	.type	mulPv192x64bmi2,@function
+mulPv192x64bmi2:                        # @mulPv192x64bmi2
+# %bb.0:
+	movq	%rdi, %rax
+	mulxq	(%rsi), %rdi, %rcx
+	movq	%rdi, (%rax)
+	mulxq	8(%rsi), %rdi, %r8
+	addq	%rcx, %rdi
+	movq	%rdi, 8(%rax)
+	mulxq	16(%rsi), %rcx, %rdx
+	adcq	%r8, %rcx
+	movq	%rcx, 16(%rax)
+	adcq	$0, %rdx
+	movq	%rdx, 24(%rax)
 	retq
 .Lfunc_end5:
-	.size	mcl_fp_mulUnitPre1Lbmi2, .Lfunc_end5-mcl_fp_mulUnitPre1Lbmi2
-
-	.globl	mcl_fpDbl_mulPre1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre1Lbmi2,@function
-mcl_fpDbl_mulPre1Lbmi2:                 # @mcl_fpDbl_mulPre1Lbmi2
-# BB#0:
-	movq	(%rdx), %rdx
-	mulxq	(%rsi), %rcx, %rax
-	movq	%rcx, (%rdi)
-	movq	%rax, 8(%rdi)
+	.size	mulPv192x64bmi2, .Lfunc_end5-mulPv192x64bmi2
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre3Lbmi2         # -- Begin function mcl_fp_mulUnitPre3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre3Lbmi2,@function
+mcl_fp_mulUnitPre3Lbmi2:                # @mcl_fp_mulUnitPre3Lbmi2
+# %bb.0:
+	mulxq	16(%rsi), %r8, %rcx
+	mulxq	8(%rsi), %r9, %rax
+	mulxq	(%rsi), %rdx, %rsi
+	movq	%rdx, (%rdi)
+	addq	%r9, %rsi
+	movq	%rsi, 8(%rdi)
+	adcq	%r8, %rax
+	movq	%rax, 16(%rdi)
+	adcq	$0, %rcx
+	movq	%rcx, 24(%rdi)
 	retq
 .Lfunc_end6:
-	.size	mcl_fpDbl_mulPre1Lbmi2, .Lfunc_end6-mcl_fpDbl_mulPre1Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre1Lbmi2,@function
-mcl_fpDbl_sqrPre1Lbmi2:                 # @mcl_fpDbl_sqrPre1Lbmi2
-# BB#0:
-	movq	(%rsi), %rdx
-	mulxq	%rdx, %rcx, %rax
-	movq	%rcx, (%rdi)
-	movq	%rax, 8(%rdi)
+	.size	mcl_fp_mulUnitPre3Lbmi2, .Lfunc_end6-mcl_fp_mulUnitPre3Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre3Lbmi2          # -- Begin function mcl_fpDbl_mulPre3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre3Lbmi2,@function
+mcl_fpDbl_mulPre3Lbmi2:                 # @mcl_fpDbl_mulPre3Lbmi2
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %r10
+	movq	(%rsi), %r8
+	movq	8(%rsi), %r9
+	movq	(%rdx), %r13
+	movq	%r8, %rdx
+	mulxq	%r13, %rdx, %rax
+	movq	16(%rsi), %r12
+	movq	%rdx, (%rdi)
+	movq	8(%r10), %rdx
+	mulxq	%r9, %rsi, %r15
+	mulxq	%r8, %r14, %rbp
+	addq	%rsi, %rbp
+	mulxq	%r12, %r11, %rsi
+	adcq	%r15, %r11
+	adcq	$0, %rsi
+	movq	%r9, %rdx
+	mulxq	%r13, %rcx, %r15
+	addq	%rax, %rcx
+	movq	%r12, %rdx
+	mulxq	%r13, %rbx, %r13
+	adcq	%r15, %rbx
+	adcq	$0, %r13
+	addq	%r14, %rcx
+	movq	%rcx, 8(%rdi)
+	adcq	%rbp, %rbx
+	adcq	%r11, %r13
+	adcq	$0, %rsi
+	movq	16(%r10), %rdx
+	mulxq	%r12, %r10, %rbp
+	mulxq	%r9, %r9, %rcx
+	mulxq	%r8, %rdx, %rax
+	addq	%r9, %rax
+	adcq	%r10, %rcx
+	adcq	$0, %rbp
+	addq	%rbx, %rdx
+	movq	%rdx, 16(%rdi)
+	adcq	%r13, %rax
+	movq	%rax, 24(%rdi)
+	adcq	%rsi, %rcx
+	movq	%rcx, 32(%rdi)
+	adcq	$0, %rbp
+	movq	%rbp, 40(%rdi)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
 	retq
 .Lfunc_end7:
-	.size	mcl_fpDbl_sqrPre1Lbmi2, .Lfunc_end7-mcl_fpDbl_sqrPre1Lbmi2
-
-	.globl	mcl_fp_mont1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont1Lbmi2,@function
-mcl_fp_mont1Lbmi2:                      # @mcl_fp_mont1Lbmi2
-# BB#0:
-	movq	%rdx, %rax
-	movq	(%rsi), %rdx
-	mulxq	(%rax), %rsi, %r8
-	movq	-8(%rcx), %rdx
-	imulq	%rsi, %rdx
-	movq	(%rcx), %rcx
-	mulxq	%rcx, %rdx, %rax
-	addq	%rsi, %rdx
-	adcq	%r8, %rax
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	movq	%rax, %rsi
-	subq	%rcx, %rsi
-	sbbq	$0, %rdx
-	testb	$1, %dl
-	cmovneq	%rax, %rsi
-	movq	%rsi, (%rdi)
-	retq
-.Lfunc_end8:
-	.size	mcl_fp_mont1Lbmi2, .Lfunc_end8-mcl_fp_mont1Lbmi2
-
-	.globl	mcl_fp_montNF1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF1Lbmi2,@function
-mcl_fp_montNF1Lbmi2:                    # @mcl_fp_montNF1Lbmi2
-# BB#0:
-	movq	%rdx, %rax
-	movq	(%rsi), %rdx
-	mulxq	(%rax), %rsi, %r8
-	movq	-8(%rcx), %rdx
-	imulq	%rsi, %rdx
-	movq	(%rcx), %rcx
-	mulxq	%rcx, %rdx, %rax
-	addq	%rsi, %rdx
-	adcq	%r8, %rax
-	movq	%rax, %rdx
-	subq	%rcx, %rdx
-	cmovsq	%rax, %rdx
-	movq	%rdx, (%rdi)
-	retq
-.Lfunc_end9:
-	.size	mcl_fp_montNF1Lbmi2, .Lfunc_end9-mcl_fp_montNF1Lbmi2
-
-	.globl	mcl_fp_montRed1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed1Lbmi2,@function
-mcl_fp_montRed1Lbmi2:                   # @mcl_fp_montRed1Lbmi2
-# BB#0:
-	movq	(%rsi), %rcx
-	movq	-8(%rdx), %rax
-	imulq	%rcx, %rax
-	movq	(%rdx), %r8
-	movq	%rax, %rdx
-	mulxq	%r8, %rax, %rdx
-	addq	%rcx, %rax
-	adcq	8(%rsi), %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rdx, %rcx
-	subq	%r8, %rcx
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	%rdx, %rcx
-	movq	%rcx, (%rdi)
-	retq
-.Lfunc_end10:
-	.size	mcl_fp_montRed1Lbmi2, .Lfunc_end10-mcl_fp_montRed1Lbmi2
-
-	.globl	mcl_fp_addPre1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre1Lbmi2,@function
-mcl_fp_addPre1Lbmi2:                    # @mcl_fp_addPre1Lbmi2
-# BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, (%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end11:
-	.size	mcl_fp_addPre1Lbmi2, .Lfunc_end11-mcl_fp_addPre1Lbmi2
-
-	.globl	mcl_fp_subPre1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre1Lbmi2,@function
-mcl_fp_subPre1Lbmi2:                    # @mcl_fp_subPre1Lbmi2
-# BB#0:
-	movq	(%rsi), %rcx
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	movq	%rcx, (%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end12:
-	.size	mcl_fp_subPre1Lbmi2, .Lfunc_end12-mcl_fp_subPre1Lbmi2
-
-	.globl	mcl_fp_shr1_1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_1Lbmi2,@function
-mcl_fp_shr1_1Lbmi2:                     # @mcl_fp_shr1_1Lbmi2
-# BB#0:
-	movq	(%rsi), %rax
-	shrq	%rax
-	movq	%rax, (%rdi)
-	retq
-.Lfunc_end13:
-	.size	mcl_fp_shr1_1Lbmi2, .Lfunc_end13-mcl_fp_shr1_1Lbmi2
-
-	.globl	mcl_fp_add1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add1Lbmi2,@function
-mcl_fp_add1Lbmi2:                       # @mcl_fp_add1Lbmi2
-# BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, (%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %rax
-	sbbq	$0, %rdx
-	testb	$1, %dl
-	jne	.LBB14_2
-# BB#1:                                 # %nocarry
-	movq	%rax, (%rdi)
-.LBB14_2:                               # %carry
-	retq
-.Lfunc_end14:
-	.size	mcl_fp_add1Lbmi2, .Lfunc_end14-mcl_fp_add1Lbmi2
-
-	.globl	mcl_fp_addNF1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF1Lbmi2,@function
-mcl_fp_addNF1Lbmi2:                     # @mcl_fp_addNF1Lbmi2
-# BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, %rdx
-	subq	(%rcx), %rdx
-	cmovsq	%rax, %rdx
-	movq	%rdx, (%rdi)
-	retq
-.Lfunc_end15:
-	.size	mcl_fp_addNF1Lbmi2, .Lfunc_end15-mcl_fp_addNF1Lbmi2
-
-	.globl	mcl_fp_sub1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub1Lbmi2,@function
-mcl_fp_sub1Lbmi2:                       # @mcl_fp_sub1Lbmi2
-# BB#0:
-	movq	(%rsi), %rax
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	movq	%rax, (%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB16_2
-# BB#1:                                 # %nocarry
-	retq
-.LBB16_2:                               # %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	retq
-.Lfunc_end16:
-	.size	mcl_fp_sub1Lbmi2, .Lfunc_end16-mcl_fp_sub1Lbmi2
-
-	.globl	mcl_fp_subNF1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF1Lbmi2,@function
-mcl_fp_subNF1Lbmi2:                     # @mcl_fp_subNF1Lbmi2
-# BB#0:
-	movq	(%rsi), %rax
-	subq	(%rdx), %rax
-	movq	%rax, %rdx
-	sarq	$63, %rdx
-	andq	(%rcx), %rdx
-	addq	%rax, %rdx
-	movq	%rdx, (%rdi)
-	retq
-.Lfunc_end17:
-	.size	mcl_fp_subNF1Lbmi2, .Lfunc_end17-mcl_fp_subNF1Lbmi2
-
-	.globl	mcl_fpDbl_add1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add1Lbmi2,@function
-mcl_fpDbl_add1Lbmi2:                    # @mcl_fpDbl_add1Lbmi2
-# BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	movq	%rax, (%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rdx, %rsi
-	subq	(%rcx), %rsi
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	%rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	retq
-.Lfunc_end18:
-	.size	mcl_fpDbl_add1Lbmi2, .Lfunc_end18-mcl_fpDbl_add1Lbmi2
-
-	.globl	mcl_fpDbl_sub1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub1Lbmi2,@function
-mcl_fpDbl_sub1Lbmi2:                    # @mcl_fpDbl_sub1Lbmi2
-# BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r8
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r8
-	movq	%rax, (%rdi)
-	movl	$0, %eax
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	(%rcx), %rsi
-	addq	%r8, %rsi
-	movq	%rsi, 8(%rdi)
-	retq
-.Lfunc_end19:
-	.size	mcl_fpDbl_sub1Lbmi2, .Lfunc_end19-mcl_fpDbl_sub1Lbmi2
-
-	.globl	mcl_fp_mulUnitPre2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre2Lbmi2,@function
-mcl_fp_mulUnitPre2Lbmi2:                # @mcl_fp_mulUnitPre2Lbmi2
-# BB#0:
-	mulxq	8(%rsi), %rax, %rcx
-	mulxq	(%rsi), %rdx, %rsi
-	movq	%rdx, (%rdi)
-	addq	%rax, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	$0, %rcx
-	movq	%rcx, 16(%rdi)
-	retq
-.Lfunc_end20:
-	.size	mcl_fp_mulUnitPre2Lbmi2, .Lfunc_end20-mcl_fp_mulUnitPre2Lbmi2
-
-	.globl	mcl_fpDbl_mulPre2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre2Lbmi2,@function
-mcl_fpDbl_mulPre2Lbmi2:                 # @mcl_fpDbl_mulPre2Lbmi2
-# BB#0:
-	movq	%rdx, %r10
-	movq	(%rsi), %r11
-	movq	8(%rsi), %r8
-	movq	(%r10), %rsi
-	movq	%r11, %rdx
-	mulxq	%rsi, %rdx, %r9
-	movq	%rdx, (%rdi)
-	movq	%r8, %rdx
-	mulxq	%rsi, %rsi, %rax
-	addq	%r9, %rsi
-	adcq	$0, %rax
-	movq	8(%r10), %rcx
-	movq	%r11, %rdx
-	mulxq	%rcx, %rdx, %r9
-	addq	%rsi, %rdx
-	movq	%rdx, 8(%rdi)
-	movq	%r8, %rdx
-	mulxq	%rcx, %rdx, %rcx
-	adcq	%rax, %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r9, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%rcx, %rax
-	movq	%rax, 24(%rdi)
-	retq
-.Lfunc_end21:
-	.size	mcl_fpDbl_mulPre2Lbmi2, .Lfunc_end21-mcl_fpDbl_mulPre2Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre2Lbmi2,@function
-mcl_fpDbl_sqrPre2Lbmi2:                 # @mcl_fpDbl_sqrPre2Lbmi2
-# BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rcx
-	movq	%rax, %rdx
-	mulxq	%rax, %rdx, %rsi
-	movq	%rdx, (%rdi)
-	movq	%rcx, %rdx
-	mulxq	%rax, %rdx, %r8
-	addq	%rdx, %rsi
-	movq	%r8, %rax
-	adcq	$0, %rax
-	addq	%rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rdx, %rcx
-	adcq	%rax, %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r8, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%rcx, %rax
-	movq	%rax, 24(%rdi)
-	retq
-.Lfunc_end22:
-	.size	mcl_fpDbl_sqrPre2Lbmi2, .Lfunc_end22-mcl_fpDbl_sqrPre2Lbmi2
-
-	.globl	mcl_fp_mont2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont2Lbmi2,@function
-mcl_fp_mont2Lbmi2:                      # @mcl_fp_mont2Lbmi2
-# BB#0:
-	pushq	%rbp
+	.size	mcl_fpDbl_mulPre3Lbmi2, .Lfunc_end7-mcl_fpDbl_mulPre3Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre3Lbmi2          # -- Begin function mcl_fpDbl_sqrPre3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre3Lbmi2,@function
+mcl_fpDbl_sqrPre3Lbmi2:                 # @mcl_fpDbl_sqrPre3Lbmi2
+# %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %r11
-	movq	%r9, %rdx
-	mulxq	%rax, %r10, %r13
+	movq	16(%rsi), %r8
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %rsi
+	movq	%rcx, %rdx
+	mulxq	%rcx, %rdx, %rax
+	movq	%rdx, (%rdi)
 	movq	%r8, %rdx
-	mulxq	%rax, %r14, %rsi
-	addq	%r10, %rsi
+	mulxq	%rsi, %r10, %r9
+	movq	%rsi, %rdx
+	mulxq	%rsi, %r11, %r15
+	mulxq	%rcx, %r14, %rsi
+	addq	%rsi, %r11
+	adcq	%r10, %r15
+	movq	%r9, %r13
 	adcq	$0, %r13
-	movq	-8(%rcx), %rbp
-	movq	(%rcx), %r10
-	movq	%r14, %rdx
-	imulq	%rbp, %rdx
-	movq	8(%rcx), %r15
-	mulxq	%r15, %r12, %rcx
-	mulxq	%r10, %rdx, %rbx
-	addq	%r12, %rbx
-	adcq	$0, %rcx
-	addq	%r14, %rdx
-	adcq	%rsi, %rbx
-	adcq	%r13, %rcx
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%r11, %rdx
-	mulxq	%r9, %r9, %r14
-	movq	%r11, %rdx
-	mulxq	%r8, %r8, %rax
-	addq	%r9, %rax
-	adcq	$0, %r14
-	addq	%rbx, %r8
-	adcq	%rcx, %rax
-	adcq	%rsi, %r14
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	imulq	%r8, %rbp
-	movq	%rbp, %rdx
-	mulxq	%r15, %rcx, %rbx
-	movq	%rbp, %rdx
-	mulxq	%r10, %rdx, %rbp
-	addq	%rcx, %rbp
+	addq	%r14, %rax
+	movq	%r8, %rdx
+	mulxq	%rcx, %r12, %rcx
+	adcq	%r12, %rsi
+	movq	%rcx, %rbx
 	adcq	$0, %rbx
-	addq	%r8, %rdx
-	adcq	%rax, %rbp
-	adcq	%r14, %rbx
-	adcq	$0, %rsi
-	movq	%rbp, %rax
-	subq	%r10, %rax
-	movq	%rbx, %rcx
-	sbbq	%r15, %rcx
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rbx, %rcx
-	testb	%sil, %sil
-	cmovneq	%rbp, %rax
-	movq	%rax, (%rdi)
-	movq	%rcx, 8(%rdi)
+	addq	%r14, %rax
+	movq	%rax, 8(%rdi)
+	adcq	%r11, %rsi
+	adcq	%r15, %rbx
+	adcq	$0, %r13
+	movq	%r8, %rdx
+	mulxq	%r8, %rax, %rdx
+	addq	%r10, %rcx
+	adcq	%r9, %rax
+	adcq	$0, %rdx
+	addq	%r12, %rsi
+	movq	%rsi, 16(%rdi)
+	adcq	%rbx, %rcx
+	movq	%rcx, 24(%rdi)
+	adcq	%r13, %rax
+	movq	%rax, 32(%rdi)
+	adcq	$0, %rdx
+	movq	%rdx, 40(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-.Lfunc_end23:
-	.size	mcl_fp_mont2Lbmi2, .Lfunc_end23-mcl_fp_mont2Lbmi2
-
-	.globl	mcl_fp_montNF2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF2Lbmi2,@function
-mcl_fp_montNF2Lbmi2:                    # @mcl_fp_montNF2Lbmi2
-# BB#0:
+.Lfunc_end8:
+	.size	mcl_fpDbl_sqrPre3Lbmi2, .Lfunc_end8-mcl_fpDbl_sqrPre3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_mont3Lbmi2               # -- Begin function mcl_fp_mont3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mont3Lbmi2,@function
+mcl_fp_mont3Lbmi2:                      # @mcl_fp_mont3Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r9
+	movq	%rdx, %r14
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	16(%rsi), %rdi
+	movq	%rdi, -48(%rsp)                 # 8-byte Spill
 	movq	(%rdx), %rax
-	movq	8(%rdx), %r11
-	movq	%r9, %rdx
-	mulxq	%rax, %r10, %rsi
-	movq	%r8, %rdx
-	mulxq	%rax, %r15, %r13
-	addq	%r10, %r13
-	adcq	$0, %rsi
-	movq	-8(%rcx), %rbp
-	movq	(%rcx), %r10
-	movq	%r15, %rdx
-	imulq	%rbp, %rdx
-	movq	8(%rcx), %r14
-	mulxq	%r10, %rcx, %r12
-	addq	%r15, %rcx
-	mulxq	%r14, %rbx, %rcx
-	adcq	%r13, %rbx
-	adcq	$0, %rsi
-	addq	%r12, %rbx
-	adcq	%rcx, %rsi
-	movq	%r11, %rdx
-	mulxq	%r9, %r9, %rcx
-	movq	%r11, %rdx
-	mulxq	%r8, %r8, %rax
-	addq	%r9, %rax
-	adcq	$0, %rcx
-	addq	%rbx, %r8
-	adcq	%rsi, %rax
-	adcq	$0, %rcx
-	imulq	%r8, %rbp
-	movq	%rbp, %rdx
-	mulxq	%r14, %rbx, %rsi
-	movq	%rbp, %rdx
-	mulxq	%r10, %rbp, %rdx
-	addq	%r8, %rbp
-	adcq	%rax, %rbx
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rdx
+	mulxq	%rax, %r11, %rbx
+	movq	(%rsi), %rdi
+	movq	%rdi, -56(%rsp)                 # 8-byte Spill
+	movq	8(%rsi), %rdx
+	movq	%rdx, -64(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r15, %rbp
+	movq	%rdi, %rdx
+	mulxq	%rax, %r9, %r8
+	addq	%r15, %r8
+	adcq	%r11, %rbp
+	adcq	$0, %rbx
+	movq	-8(%rcx), %r13
+	movq	%r13, %rdx
+	imulq	%r9, %rdx
+	movq	8(%rcx), %rax
+	movq	%rax, -32(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r12, %r10
+	movq	(%rcx), %rax
+	movq	%rax, -40(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r11, %rax
+	addq	%r12, %rax
+	movq	16(%rcx), %rdi
+	mulxq	%rdi, %rcx, %rsi
+	movq	%rdi, %r15
+	movq	%rdi, -24(%rsp)                 # 8-byte Spill
+	adcq	%r10, %rcx
+	adcq	$0, %rsi
+	addq	%r9, %r11
+	adcq	%r8, %rax
+	movq	8(%r14), %rdx
+	adcq	%rbp, %rcx
+	adcq	%rbx, %rsi
+	movq	-48(%rsp), %r14                 # 8-byte Reload
+	mulxq	%r14, %r9, %r8
+	mulxq	-64(%rsp), %rbp, %rbx           # 8-byte Folded Reload
+	mulxq	-56(%rsp), %r10, %rdi           # 8-byte Folded Reload
+	setb	%dl
+	addq	%rbp, %rdi
+	adcq	%r9, %rbx
+	adcq	$0, %r8
+	addq	%rax, %r10
+	adcq	%rcx, %rdi
+	movzbl	%dl, %eax
+	adcq	%rsi, %rbx
+	adcq	%rax, %r8
+	setb	%r11b
+	movq	%r13, %rdx
+	imulq	%r10, %rdx
+	mulxq	%r15, %r9, %rcx
+	movq	-32(%rsp), %r12                 # 8-byte Reload
+	mulxq	%r12, %rsi, %rbp
+	movq	-40(%rsp), %r15                 # 8-byte Reload
+	mulxq	%r15, %rdx, %rax
+	addq	%rsi, %rax
+	adcq	%r9, %rbp
 	adcq	$0, %rcx
-	addq	%rdx, %rbx
-	adcq	%rsi, %rcx
-	movq	%rbx, %rax
-	subq	%r10, %rax
-	movq	%rcx, %rdx
-	sbbq	%r14, %rdx
-	cmovsq	%rbx, %rax
-	movq	%rax, (%rdi)
-	cmovsq	%rcx, %rdx
-	movq	%rdx, 8(%rdi)
+	addq	%r10, %rdx
+	adcq	%rdi, %rax
+	movzbl	%r11b, %r9d
+	adcq	%rbx, %rbp
+	adcq	%r8, %rcx
+	adcq	$0, %r9
+	movq	-16(%rsp), %rdx                 # 8-byte Reload
+	movq	16(%rdx), %rdx
+	mulxq	%r14, %r8, %rsi
+	mulxq	-64(%rsp), %r10, %r14           # 8-byte Folded Reload
+	mulxq	-56(%rsp), %r11, %rdi           # 8-byte Folded Reload
+	addq	%r10, %rdi
+	adcq	%r8, %r14
+	adcq	$0, %rsi
+	addq	%rax, %r11
+	adcq	%rbp, %rdi
+	adcq	%rcx, %r14
+	adcq	%r9, %rsi
+	setb	%r8b
+	imulq	%r11, %r13
+	movq	%r13, %rdx
+	mulxq	%r15, %rax, %rbp
+	movq	%r12, %r10
+	mulxq	%r12, %rcx, %r9
+	addq	%rbp, %rcx
+	movq	-24(%rsp), %r12                 # 8-byte Reload
+	mulxq	%r12, %rdx, %rbx
+	adcq	%r9, %rdx
+	adcq	$0, %rbx
+	addq	%r11, %rax
+	adcq	%rdi, %rcx
+	adcq	%r14, %rdx
+	movzbl	%r8b, %eax
+	adcq	%rsi, %rbx
+	adcq	$0, %rax
+	movq	%rcx, %rsi
+	subq	%r15, %rsi
+	movq	%rdx, %rdi
+	sbbq	%r10, %rdi
+	movq	%rbx, %rbp
+	sbbq	%r12, %rbp
+	sbbq	$0, %rax
+	testb	$1, %al
+	cmovneq	%rbx, %rbp
+	movq	-8(%rsp), %rax                  # 8-byte Reload
+	movq	%rbp, 16(%rax)
+	cmovneq	%rdx, %rdi
+	movq	%rdi, 8(%rax)
+	cmovneq	%rcx, %rsi
+	movq	%rsi, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -795,541 +597,981 @@ mcl_fp_montNF2Lbmi2:                    # @mcl_fp_montNF2Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end24:
-	.size	mcl_fp_montNF2Lbmi2, .Lfunc_end24-mcl_fp_montNF2Lbmi2
-
-	.globl	mcl_fp_montRed2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed2Lbmi2,@function
-mcl_fp_montRed2Lbmi2:                   # @mcl_fp_montRed2Lbmi2
-# BB#0:
+.Lfunc_end9:
+	.size	mcl_fp_mont3Lbmi2, .Lfunc_end9-mcl_fp_mont3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montNF3Lbmi2             # -- Begin function mcl_fp_montNF3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF3Lbmi2,@function
+mcl_fp_montNF3Lbmi2:                    # @mcl_fp_montNF3Lbmi2
+# %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
+	pushq	%r13
+	pushq	%r12
 	pushq	%rbx
-	movq	-8(%rdx), %r15
-	movq	(%rdx), %r8
-	movq	(%rsi), %r10
-	movq	%r10, %rcx
-	imulq	%r15, %rcx
-	movq	8(%rdx), %r9
-	movq	%rcx, %rdx
-	mulxq	%r9, %r11, %r14
-	movq	%rcx, %rdx
-	mulxq	%r8, %rcx, %rax
-	addq	%r11, %rax
-	adcq	$0, %r14
-	movq	24(%rsi), %r11
-	addq	%r10, %rcx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r14
-	adcq	$0, %r11
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	imulq	%rax, %r15
-	movq	%r15, %rdx
-	mulxq	%r9, %r10, %rbx
+	movq	%rdx, %r10
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rbp
+	movq	%rbp, -32(%rsp)                 # 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	movq	%rbp, %rdx
+	mulxq	%rax, %rbx, %r14
+	movq	%r11, %rdx
+	movq	%r11, -24(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r15, %r12
+	movq	16(%rsi), %rdx
+	movq	%rdx, -40(%rsp)                 # 8-byte Spill
+	addq	%rbx, %r12
+	mulxq	%rax, %rsi, %rbx
+	adcq	%r14, %rsi
+	adcq	$0, %rbx
+	movq	-8(%rcx), %r13
+	movq	(%rcx), %r14
+	movq	%r13, %rax
+	imulq	%r15, %rax
+	movq	%r14, %rdx
+	mulxq	%rax, %rdx, %rbp
+	addq	%r15, %rdx
+	movq	8(%rcx), %r15
 	movq	%r15, %rdx
-	mulxq	%r8, %rsi, %rdx
-	addq	%r10, %rdx
+	mulxq	%rax, %rdi, %r9
+	adcq	%r12, %rdi
+	movq	16(%rcx), %r12
+	movq	%r12, %rdx
+	mulxq	%rax, %r8, %rax
+	adcq	%rsi, %r8
 	adcq	$0, %rbx
-	addq	%rax, %rsi
-	adcq	%r14, %rdx
-	adcq	%r11, %rbx
+	addq	%rbp, %rdi
+	movq	8(%r10), %rcx
+	adcq	%r9, %r8
+	adcq	%rax, %rbx
+	movq	-32(%rsp), %r10                 # 8-byte Reload
+	movq	%r10, %rdx
+	mulxq	%rcx, %rsi, %r9
+	movq	%r11, %rdx
+	mulxq	%rcx, %rbp, %rax
+	addq	%rsi, %rax
+	movq	-40(%rsp), %r11                 # 8-byte Reload
+	movq	%r11, %rdx
+	mulxq	%rcx, %rsi, %rcx
+	adcq	%r9, %rsi
 	adcq	$0, %rcx
-	movq	%rdx, %rax
-	subq	%r8, %rax
+	addq	%rdi, %rbp
+	adcq	%r8, %rax
+	adcq	%rbx, %rsi
+	adcq	$0, %rcx
+	movq	%r13, %rdx
+	imulq	%rbp, %rdx
+	mulxq	%r14, %rbx, %r8
+	addq	%rbp, %rbx
+	mulxq	%r15, %rdi, %rbx
+	adcq	%rax, %rdi
+	mulxq	%r12, %rbp, %rax
+	adcq	%rsi, %rbp
+	adcq	$0, %rcx
+	addq	%r8, %rdi
+	adcq	%rbx, %rbp
+	adcq	%rax, %rcx
+	movq	-16(%rsp), %rax                 # 8-byte Reload
+	movq	16(%rax), %rdx
+	mulxq	%r10, %rbx, %r8
+	mulxq	-24(%rsp), %r9, %rsi            # 8-byte Folded Reload
+	addq	%rbx, %rsi
+	mulxq	%r11, %rax, %rbx
+	adcq	%r8, %rax
+	adcq	$0, %rbx
+	addq	%rdi, %r9
+	adcq	%rbp, %rsi
+	adcq	%rcx, %rax
+	adcq	$0, %rbx
+	imulq	%r9, %r13
+	movq	%r14, %rdx
+	mulxq	%r13, %rdx, %r8
+	addq	%r9, %rdx
+	movq	%r12, %rdx
+	mulxq	%r13, %rbp, %rdi
+	movq	%r15, %rdx
+	mulxq	%r13, %rcx, %rdx
+	adcq	%rsi, %rcx
+	adcq	%rax, %rbp
+	adcq	$0, %rbx
+	addq	%r8, %rcx
+	adcq	%rdx, %rbp
+	adcq	%rdi, %rbx
+	movq	%rcx, %rax
+	subq	%r14, %rax
+	movq	%rbp, %rdx
+	sbbq	%r15, %rdx
 	movq	%rbx, %rsi
-	sbbq	%r9, %rsi
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rbx, %rsi
-	testb	%cl, %cl
-	cmovneq	%rdx, %rax
+	sbbq	%r12, %rsi
+	movq	%rsi, %rdi
+	sarq	$63, %rdi
+	cmovsq	%rbx, %rsi
+	movq	-8(%rsp), %rdi                  # 8-byte Reload
+	movq	%rsi, 16(%rdi)
+	cmovsq	%rbp, %rdx
+	movq	%rdx, 8(%rdi)
+	cmovsq	%rcx, %rax
 	movq	%rax, (%rdi)
-	movq	%rsi, 8(%rdi)
 	popq	%rbx
+	popq	%r12
+	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
 	retq
-.Lfunc_end25:
-	.size	mcl_fp_montRed2Lbmi2, .Lfunc_end25-mcl_fp_montRed2Lbmi2
-
-	.globl	mcl_fp_addPre2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre2Lbmi2,@function
-mcl_fp_addPre2Lbmi2:                    # @mcl_fp_addPre2Lbmi2
-# BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rcx
-	addq	(%rsi), %rax
+.Lfunc_end10:
+	.size	mcl_fp_montNF3Lbmi2, .Lfunc_end10-mcl_fp_montNF3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRed3Lbmi2            # -- Begin function mcl_fp_montRed3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed3Lbmi2,@function
+mcl_fp_montRed3Lbmi2:                   # @mcl_fp_montRed3Lbmi2
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	-8(%rdx), %r14
+	movq	(%rdx), %r8
+	movq	(%rsi), %rax
+	movq	%rax, %rdx
+	imulq	%r14, %rdx
+	movq	16(%rcx), %r9
+	mulxq	%r9, %r15, %r10
+	movq	8(%rcx), %r11
+	mulxq	%r11, %rbx, %r12
+	mulxq	%r8, %rdx, %rcx
+	addq	%rbx, %rcx
+	adcq	%r15, %r12
+	adcq	$0, %r10
+	addq	%rax, %rdx
 	adcq	8(%rsi), %rcx
-	movq	%rax, (%rdi)
-	movq	%rcx, 8(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end26:
-	.size	mcl_fp_addPre2Lbmi2, .Lfunc_end26-mcl_fp_addPre2Lbmi2
-
-	.globl	mcl_fp_subPre2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre2Lbmi2,@function
-mcl_fp_subPre2Lbmi2:                    # @mcl_fp_subPre2Lbmi2
-# BB#0:
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	sbbq	8(%rdx), %rsi
-	movq	%rcx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
+	adcq	16(%rsi), %r12
+	adcq	24(%rsi), %r10
+	setb	%r13b
+	movq	%r14, %rdx
+	imulq	%rcx, %rdx
+	mulxq	%r8, %rbp, %rax
+	mulxq	%r11, %rbx, %rdi
+	addq	%rax, %rbx
+	mulxq	%r9, %r15, %rdx
+	adcq	%rdi, %r15
+	movzbl	%r13b, %edi
+	adcq	%rdx, %rdi
+	addq	%rcx, %rbp
+	adcq	%r12, %rbx
+	adcq	%r10, %r15
+	adcq	32(%rsi), %rdi
+	setb	%r10b
+	imulq	%rbx, %r14
+	movq	%r14, %rdx
+	mulxq	%r8, %r13, %rbp
+	mulxq	%r11, %rcx, %r12
+	addq	%rbp, %rcx
+	mulxq	%r9, %rbp, %r14
+	adcq	%r12, %rbp
+	movzbl	%r10b, %eax
+	adcq	%r14, %rax
+	addq	%rbx, %r13
+	adcq	%r15, %rcx
+	adcq	%rdi, %rbp
+	adcq	40(%rsi), %rax
+	xorl	%ebx, %ebx
+	movq	%rcx, %rsi
+	subq	%r8, %rsi
+	movq	%rbp, %rdi
+	sbbq	%r11, %rdi
+	movq	%rax, %rdx
+	sbbq	%r9, %rdx
+	sbbq	%rbx, %rbx
+	testb	$1, %bl
+	cmovneq	%rax, %rdx
+	movq	-8(%rsp), %rax                  # 8-byte Reload
+	movq	%rdx, 16(%rax)
+	cmovneq	%rbp, %rdi
+	movq	%rdi, 8(%rax)
+	cmovneq	%rcx, %rsi
+	movq	%rsi, (%rax)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
 	retq
-.Lfunc_end27:
-	.size	mcl_fp_subPre2Lbmi2, .Lfunc_end27-mcl_fp_subPre2Lbmi2
-
-	.globl	mcl_fp_shr1_2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_2Lbmi2,@function
-mcl_fp_shr1_2Lbmi2:                     # @mcl_fp_shr1_2Lbmi2
-# BB#0:
-	movq	(%rsi), %rax
+.Lfunc_end11:
+	.size	mcl_fp_montRed3Lbmi2, .Lfunc_end11-mcl_fp_montRed3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRedNF3Lbmi2          # -- Begin function mcl_fp_montRedNF3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF3Lbmi2,@function
+mcl_fp_montRedNF3Lbmi2:                 # @mcl_fp_montRedNF3Lbmi2
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	-8(%rdx), %r14
+	movq	(%rdx), %r8
+	movq	(%rsi), %rbx
+	movq	%rbx, %rdx
+	imulq	%r14, %rdx
+	movq	16(%rcx), %r9
+	mulxq	%r9, %r12, %r10
+	movq	8(%rcx), %r11
+	mulxq	%r11, %rcx, %r15
+	mulxq	%r8, %rdx, %rax
+	addq	%rcx, %rax
+	adcq	%r12, %r15
+	adcq	$0, %r10
+	addq	%rbx, %rdx
+	adcq	8(%rsi), %rax
+	adcq	16(%rsi), %r15
+	adcq	24(%rsi), %r10
+	setb	%r13b
+	movq	%r14, %rdx
+	imulq	%rax, %rdx
+	mulxq	%r8, %rbp, %rcx
+	mulxq	%r11, %rbx, %rdi
+	addq	%rcx, %rbx
+	mulxq	%r9, %r12, %rdx
+	adcq	%rdi, %r12
+	movzbl	%r13b, %ecx
+	adcq	%rdx, %rcx
+	addq	%rax, %rbp
+	adcq	%r15, %rbx
+	adcq	%r10, %r12
+	adcq	32(%rsi), %rcx
+	setb	%r10b
+	imulq	%rbx, %r14
+	movq	%r14, %rdx
+	mulxq	%r8, %r13, %rdi
+	mulxq	%r11, %rax, %r15
+	addq	%rdi, %rax
+	mulxq	%r9, %rdi, %r14
+	adcq	%r15, %rdi
+	movzbl	%r10b, %r10d
+	adcq	%r14, %r10
+	addq	%rbx, %r13
+	adcq	%r12, %rax
+	adcq	%rcx, %rdi
+	adcq	40(%rsi), %r10
+	movq	%rax, %rcx
+	subq	%r8, %rcx
+	movq	%rdi, %rsi
+	sbbq	%r11, %rsi
+	movq	%r10, %rbp
+	sbbq	%r9, %rbp
+	movq	%rbp, %rdx
+	sarq	$63, %rdx
+	cmovsq	%r10, %rbp
+	movq	-8(%rsp), %rdx                  # 8-byte Reload
+	movq	%rbp, 16(%rdx)
+	cmovsq	%rdi, %rsi
+	movq	%rsi, 8(%rdx)
+	cmovsq	%rax, %rcx
+	movq	%rcx, (%rdx)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+.Lfunc_end12:
+	.size	mcl_fp_montRedNF3Lbmi2, .Lfunc_end12-mcl_fp_montRedNF3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addPre3Lbmi2             # -- Begin function mcl_fp_addPre3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre3Lbmi2,@function
+mcl_fp_addPre3Lbmi2:                    # @mcl_fp_addPre3Lbmi2
+# %bb.0:
+	movq	16(%rsi), %rax
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rcx
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rcx, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
+	retq
+.Lfunc_end13:
+	.size	mcl_fp_addPre3Lbmi2, .Lfunc_end13-mcl_fp_addPre3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subPre3Lbmi2             # -- Begin function mcl_fp_subPre3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre3Lbmi2,@function
+mcl_fp_subPre3Lbmi2:                    # @mcl_fp_subPre3Lbmi2
+# %bb.0:
+	movq	16(%rsi), %rcx
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rcx
+	movq	%rcx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	retq
+.Lfunc_end14:
+	.size	mcl_fp_subPre3Lbmi2, .Lfunc_end14-mcl_fp_subPre3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_shr1_3Lbmi2              # -- Begin function mcl_fp_shr1_3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_3Lbmi2,@function
+mcl_fp_shr1_3Lbmi2:                     # @mcl_fp_shr1_3Lbmi2
+# %bb.0:
+	movq	(%rsi), %rax
 	movq	8(%rsi), %rcx
+	movq	16(%rsi), %rdx
+	movq	%rdx, %rsi
+	shrq	%rsi
+	movq	%rsi, 16(%rdi)
+	shldq	$63, %rcx, %rdx
+	movq	%rdx, 8(%rdi)
 	shrdq	$1, %rcx, %rax
 	movq	%rax, (%rdi)
-	shrq	%rcx
-	movq	%rcx, 8(%rdi)
 	retq
-.Lfunc_end28:
-	.size	mcl_fp_shr1_2Lbmi2, .Lfunc_end28-mcl_fp_shr1_2Lbmi2
-
-	.globl	mcl_fp_add2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add2Lbmi2,@function
-mcl_fp_add2Lbmi2:                       # @mcl_fp_add2Lbmi2
-# BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
+.Lfunc_end15:
+	.size	mcl_fp_shr1_3Lbmi2, .Lfunc_end15-mcl_fp_shr1_3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_add3Lbmi2                # -- Begin function mcl_fp_add3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_add3Lbmi2,@function
+mcl_fp_add3Lbmi2:                       # @mcl_fp_add3Lbmi2
+# %bb.0:
+	movq	16(%rsi), %r8
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r8
+	movq	%r8, 16(%rdi)
+	movq	%rsi, 8(%rdi)
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
+	setb	%dl
+	movzbl	%dl, %edx
 	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB29_2
-# BB#1:                                 # %nocarry
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r8
+	sbbq	$0, %rdx
+	testb	$1, %dl
+	jne	.LBB16_2
+# %bb.1:                                # %nocarry
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-.LBB29_2:                               # %carry
+	movq	%rsi, 8(%rdi)
+	movq	%r8, 16(%rdi)
+.LBB16_2:                               # %carry
 	retq
-.Lfunc_end29:
-	.size	mcl_fp_add2Lbmi2, .Lfunc_end29-mcl_fp_add2Lbmi2
-
-	.globl	mcl_fp_addNF2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF2Lbmi2,@function
-mcl_fp_addNF2Lbmi2:                     # @mcl_fp_addNF2Lbmi2
-# BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %r8
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %r8
-	movq	%rax, %rsi
+.Lfunc_end16:
+	.size	mcl_fp_add3Lbmi2, .Lfunc_end16-mcl_fp_add3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addNF3Lbmi2              # -- Begin function mcl_fp_addNF3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF3Lbmi2,@function
+mcl_fp_addNF3Lbmi2:                     # @mcl_fp_addNF3Lbmi2
+# %bb.0:
+	movq	16(%rdx), %r10
+	movq	(%rdx), %r8
+	movq	8(%rdx), %r9
+	addq	(%rsi), %r8
+	adcq	8(%rsi), %r9
+	adcq	16(%rsi), %r10
+	movq	%r8, %rsi
 	subq	(%rcx), %rsi
-	movq	%r8, %rdx
+	movq	%r9, %rdx
 	sbbq	8(%rcx), %rdx
-	testq	%rdx, %rdx
-	cmovsq	%rax, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r8, %rdx
-	movq	%rdx, 8(%rdi)
-	retq
-.Lfunc_end30:
-	.size	mcl_fp_addNF2Lbmi2, .Lfunc_end30-mcl_fp_addNF2Lbmi2
-
-	.globl	mcl_fp_sub2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub2Lbmi2,@function
-mcl_fp_sub2Lbmi2:                       # @mcl_fp_sub2Lbmi2
-# BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r8
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r8
-	movq	%rax, (%rdi)
-	movq	%r8, 8(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB31_2
-# BB#1:                                 # %nocarry
-	retq
-.LBB31_2:                               # %carry
-	movq	8(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r8, %rdx
+	movq	%r10, %rax
+	sbbq	16(%rcx), %rax
+	movq	%rax, %rcx
+	sarq	$63, %rcx
+	cmovsq	%r10, %rax
+	movq	%rax, 16(%rdi)
+	cmovsq	%r9, %rdx
 	movq	%rdx, 8(%rdi)
+	cmovsq	%r8, %rsi
+	movq	%rsi, (%rdi)
 	retq
-.Lfunc_end31:
-	.size	mcl_fp_sub2Lbmi2, .Lfunc_end31-mcl_fp_sub2Lbmi2
-
-	.globl	mcl_fp_subNF2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF2Lbmi2,@function
-mcl_fp_subNF2Lbmi2:                     # @mcl_fp_subNF2Lbmi2
-# BB#0:
+.Lfunc_end17:
+	.size	mcl_fp_addNF3Lbmi2, .Lfunc_end17-mcl_fp_addNF3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_sub3Lbmi2                # -- Begin function mcl_fp_sub3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_sub3Lbmi2,@function
+mcl_fp_sub3Lbmi2:                       # @mcl_fp_sub3Lbmi2
+# %bb.0:
+	movq	16(%rsi), %rax
 	movq	(%rsi), %r8
 	movq	8(%rsi), %rsi
+	xorl	%r9d, %r9d
 	subq	(%rdx), %r8
 	sbbq	8(%rdx), %rsi
-	movq	%rsi, %rdx
+	sbbq	16(%rdx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%r9, %r9
+	testb	$1, %r9b
+	jne	.LBB18_2
+# %bb.1:                                # %nocarry
+	retq
+.LBB18_2:                               # %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	retq
+.Lfunc_end18:
+	.size	mcl_fp_sub3Lbmi2, .Lfunc_end18-mcl_fp_sub3Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subNF3Lbmi2              # -- Begin function mcl_fp_subNF3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF3Lbmi2,@function
+mcl_fp_subNF3Lbmi2:                     # @mcl_fp_subNF3Lbmi2
+# %bb.0:
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r8
+	movq	8(%rsi), %r9
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %r9
+	sbbq	16(%rdx), %r10
+	movq	%r10, %rdx
 	sarq	$63, %rdx
-	movq	8(%rcx), %rax
+	movq	%rdx, %rsi
+	shldq	$1, %r10, %rsi
+	andq	(%rcx), %rsi
+	movq	16(%rcx), %rax
 	andq	%rdx, %rax
-	andq	(%rcx), %rdx
-	addq	%r8, %rdx
-	movq	%rdx, (%rdi)
-	adcq	%rsi, %rax
-	movq	%rax, 8(%rdi)
-	retq
-.Lfunc_end32:
-	.size	mcl_fp_subNF2Lbmi2, .Lfunc_end32-mcl_fp_subNF2Lbmi2
-
-	.globl	mcl_fpDbl_add2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add2Lbmi2,@function
-mcl_fpDbl_add2Lbmi2:                    # @mcl_fpDbl_add2Lbmi2
-# BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rdx), %r10
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r10
-	movq	%rax, (%rdi)
+	andq	8(%rcx), %rdx
+	addq	%r8, %rsi
+	movq	%rsi, (%rdi)
+	adcq	%r9, %rdx
 	movq	%rdx, 8(%rdi)
-	adcq	%r8, %r9
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r10, %rdx
+	adcq	%r10, %rax
+	movq	%rax, 16(%rdi)
+	retq
+.Lfunc_end19:
+	.size	mcl_fp_subNF3Lbmi2, .Lfunc_end19-mcl_fp_subNF3Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_add3Lbmi2             # -- Begin function mcl_fpDbl_add3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add3Lbmi2,@function
+mcl_fpDbl_add3Lbmi2:                    # @mcl_fpDbl_add3Lbmi2
+# %bb.0:
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %rax
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r11
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rax
+	adcq	24(%rdx), %r8
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r10
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, (%rdi)
+	setb	%al
+	movzbl	%al, %r11d
+	movq	%r8, %rdx
 	subq	(%rcx), %rdx
 	movq	%r9, %rsi
 	sbbq	8(%rcx), %rsi
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%r10, %rdx
-	movq	%rdx, 16(%rdi)
-	testb	%al, %al
+	movq	%r10, %rax
+	sbbq	16(%rcx), %rax
+	sbbq	$0, %r11
+	testb	$1, %r11b
+	cmovneq	%r10, %rax
+	movq	%rax, 40(%rdi)
 	cmovneq	%r9, %rsi
-	movq	%rsi, 24(%rdi)
+	movq	%rsi, 32(%rdi)
+	cmovneq	%r8, %rdx
+	movq	%rdx, 24(%rdi)
 	retq
-.Lfunc_end33:
-	.size	mcl_fpDbl_add2Lbmi2, .Lfunc_end33-mcl_fpDbl_add2Lbmi2
-
-	.globl	mcl_fpDbl_sub2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub2Lbmi2,@function
-mcl_fpDbl_sub2Lbmi2:                    # @mcl_fpDbl_sub2Lbmi2
-# BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
+.Lfunc_end20:
+	.size	mcl_fpDbl_add3Lbmi2, .Lfunc_end20-mcl_fpDbl_add3Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sub3Lbmi2             # -- Begin function mcl_fpDbl_sub3Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub3Lbmi2,@function
+mcl_fpDbl_sub3Lbmi2:                    # @mcl_fpDbl_sub3Lbmi2
+# %bb.0:
+	pushq	%rbx
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %rax
 	movq	(%rsi), %r11
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
+	movq	8(%rsi), %rbx
+	xorl	%esi, %esi
 	subq	(%rdx), %r11
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
+	sbbq	8(%rdx), %rbx
+	sbbq	16(%rdx), %rax
+	sbbq	24(%rdx), %r10
+	sbbq	32(%rdx), %r9
+	sbbq	40(%rdx), %r8
+	movq	%rax, 16(%rdi)
+	movq	%rbx, 8(%rdi)
 	movq	%r11, (%rdi)
-	movq	%rsi, 8(%rdi)
-	sbbq	%r8, %r9
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	cmovneq	8(%rcx), %rax
-	addq	%r10, %rsi
-	movq	%rsi, 16(%rdi)
-	adcq	%r9, %rax
-	movq	%rax, 24(%rdi)
-	retq
-.Lfunc_end34:
-	.size	mcl_fpDbl_sub2Lbmi2, .Lfunc_end34-mcl_fpDbl_sub2Lbmi2
-
-	.globl	mcl_fp_mulUnitPre3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre3Lbmi2,@function
-mcl_fp_mulUnitPre3Lbmi2:                # @mcl_fp_mulUnitPre3Lbmi2
-# BB#0:
-	mulxq	16(%rsi), %r8, %rcx
-	mulxq	8(%rsi), %r9, %rax
+	sbbq	%rsi, %rsi
+	andl	$1, %esi
+	negq	%rsi
+	movq	16(%rcx), %rax
+	andq	%rsi, %rax
+	movq	8(%rcx), %rdx
+	andq	%rsi, %rdx
+	andq	(%rcx), %rsi
+	addq	%r10, %rsi
+	movq	%rsi, 24(%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 32(%rdi)
+	adcq	%r8, %rax
+	movq	%rax, 40(%rdi)
+	popq	%rbx
+	retq
+.Lfunc_end21:
+	.size	mcl_fpDbl_sub3Lbmi2, .Lfunc_end21-mcl_fpDbl_sub3Lbmi2
+                                        # -- End function
+	.globl	mulPv256x64bmi2                 # -- Begin function mulPv256x64bmi2
+	.p2align	4, 0x90
+	.type	mulPv256x64bmi2,@function
+mulPv256x64bmi2:                        # @mulPv256x64bmi2
+# %bb.0:
+	movq	%rdi, %rax
+	mulxq	(%rsi), %rdi, %rcx
+	movq	%rdi, (%rax)
+	mulxq	8(%rsi), %rdi, %r8
+	addq	%rcx, %rdi
+	movq	%rdi, 8(%rax)
+	mulxq	16(%rsi), %rdi, %rcx
+	adcq	%r8, %rdi
+	movq	%rdi, 16(%rax)
+	mulxq	24(%rsi), %rdx, %rsi
+	adcq	%rcx, %rdx
+	movq	%rdx, 24(%rax)
+	adcq	$0, %rsi
+	movq	%rsi, 32(%rax)
+	retq
+.Lfunc_end22:
+	.size	mulPv256x64bmi2, .Lfunc_end22-mulPv256x64bmi2
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre4Lbmi2         # -- Begin function mcl_fp_mulUnitPre4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre4Lbmi2,@function
+mcl_fp_mulUnitPre4Lbmi2:                # @mcl_fp_mulUnitPre4Lbmi2
+# %bb.0:
+	mulxq	24(%rsi), %r8, %r11
+	mulxq	16(%rsi), %r9, %rax
+	mulxq	8(%rsi), %r10, %rcx
 	mulxq	(%rsi), %rdx, %rsi
 	movq	%rdx, (%rdi)
-	addq	%r9, %rsi
+	addq	%r10, %rsi
 	movq	%rsi, 8(%rdi)
+	adcq	%r9, %rcx
+	movq	%rcx, 16(%rdi)
 	adcq	%r8, %rax
-	movq	%rax, 16(%rdi)
-	adcq	$0, %rcx
-	movq	%rcx, 24(%rdi)
+	movq	%rax, 24(%rdi)
+	adcq	$0, %r11
+	movq	%r11, 32(%rdi)
 	retq
-.Lfunc_end35:
-	.size	mcl_fp_mulUnitPre3Lbmi2, .Lfunc_end35-mcl_fp_mulUnitPre3Lbmi2
-
-	.globl	mcl_fpDbl_mulPre3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre3Lbmi2,@function
-mcl_fpDbl_mulPre3Lbmi2:                 # @mcl_fpDbl_mulPre3Lbmi2
-# BB#0:
+.Lfunc_end23:
+	.size	mcl_fp_mulUnitPre4Lbmi2, .Lfunc_end23-mcl_fp_mulUnitPre4Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre4Lbmi2          # -- Begin function mcl_fpDbl_mulPre4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre4Lbmi2,@function
+mcl_fpDbl_mulPre4Lbmi2:                 # @mcl_fpDbl_mulPre4Lbmi2
+# %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
+	pushq	%r13
+	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, %r9
-	movq	(%rsi), %r10
-	movq	8(%rsi), %r8
-	movq	(%r9), %rax
-	movq	%r10, %rdx
-	mulxq	%rax, %rdx, %r14
-	movq	16(%rsi), %r11
-	movq	%rdx, (%rdi)
+	movq	%rdi, %r9
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rbp
+	movq	(%rdx), %rax
+	movq	%rdx, %rbx
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	movq	%r14, %rdx
+	mulxq	%rax, %rcx, %r10
+	movq	16(%rsi), %rdi
+	movq	24(%rsi), %r11
+	movq	%rcx, (%r9)
 	movq	%r11, %rdx
-	mulxq	%rax, %rsi, %rbx
-	movq	%r8, %rdx
+	mulxq	%rax, %r12, %r15
+	movq	%rbp, %rdx
+	mulxq	%rax, %rsi, %r8
+	addq	%r10, %rsi
+	movq	%rdi, %rdx
+	movq	%rdi, %r10
 	mulxq	%rax, %rax, %rcx
-	addq	%r14, %rax
-	adcq	%rsi, %rcx
-	adcq	$0, %rbx
-	movq	8(%r9), %rsi
-	movq	%r10, %rdx
-	mulxq	%rsi, %rdx, %r14
-	addq	%rax, %rdx
-	movq	%rdx, 8(%rdi)
-	movq	%r11, %rdx
-	mulxq	%rsi, %rax, %r15
-	movq	%r8, %rdx
-	mulxq	%rsi, %rsi, %rdx
-	adcq	%rcx, %rsi
-	adcq	%rbx, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r14, %rsi
-	adcq	%rdx, %rax
-	adcq	%r15, %rcx
-	movq	16(%r9), %rbx
-	movq	%r10, %rdx
-	mulxq	%rbx, %rdx, %r9
-	addq	%rsi, %rdx
-	movq	%rdx, 16(%rdi)
-	movq	%r11, %rdx
-	mulxq	%rbx, %rsi, %r10
-	movq	%r8, %rdx
-	mulxq	%rbx, %rbx, %rdx
+	adcq	%r8, %rax
+	adcq	%r12, %rcx
+	adcq	$0, %r15
+	movq	8(%rbx), %rdx
+	mulxq	%r14, %r13, %r8
+	movq	%r14, -8(%rsp)                  # 8-byte Spill
+	addq	%rsi, %r13
+	mulxq	%rbp, %rbx, %r12
 	adcq	%rax, %rbx
+	mulxq	%rdi, %rsi, %rax
 	adcq	%rcx, %rsi
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r9, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%rdx, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 40(%rdi)
+	mulxq	%r11, %rcx, %rdx
+	adcq	%r15, %rcx
+	setb	%r15b
+	addq	%r8, %rbx
+	adcq	%r12, %rsi
+	movq	%r13, 8(%r9)
+	movzbl	%r15b, %r8d
+	adcq	%rax, %rcx
+	adcq	%rdx, %r8
+	movq	-16(%rsp), %rax                 # 8-byte Reload
+	movq	16(%rax), %rdx
+	mulxq	%rbp, %rdi, %r15
+	mulxq	%r14, %rax, %r12
+	addq	%rdi, %r12
+	mulxq	%r10, %r13, %r14
+	adcq	%r15, %r13
+	mulxq	%r11, %rdi, %r15
+	adcq	%r14, %rdi
+	adcq	$0, %r15
+	addq	%rbx, %rax
+	adcq	%rsi, %r12
+	movq	%rax, 16(%r9)
+	adcq	%rcx, %r13
+	adcq	%r8, %rdi
+	adcq	$0, %r15
+	movq	-16(%rsp), %rax                 # 8-byte Reload
+	movq	24(%rax), %rdx
+	mulxq	%rbp, %rcx, %r8
+	mulxq	-8(%rsp), %rsi, %rbp            # 8-byte Folded Reload
+	addq	%rcx, %rbp
+	mulxq	%r11, %rcx, %rbx
+	mulxq	%r10, %rdx, %rax
+	adcq	%r8, %rdx
+	adcq	%rcx, %rax
+	adcq	$0, %rbx
+	addq	%r12, %rsi
+	movq	%rsi, 24(%r9)
+	adcq	%r13, %rbp
+	movq	%rbp, 32(%r9)
+	adcq	%rdi, %rdx
+	movq	%rdx, 40(%r9)
+	adcq	%r15, %rax
+	movq	%rax, 48(%r9)
+	adcq	$0, %rbx
+	movq	%rbx, 56(%r9)
 	popq	%rbx
+	popq	%r12
+	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
 	retq
-.Lfunc_end36:
-	.size	mcl_fpDbl_mulPre3Lbmi2, .Lfunc_end36-mcl_fpDbl_mulPre3Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre3Lbmi2,@function
-mcl_fpDbl_sqrPre3Lbmi2:                 # @mcl_fpDbl_sqrPre3Lbmi2
-# BB#0:
+.Lfunc_end24:
+	.size	mcl_fpDbl_mulPre4Lbmi2, .Lfunc_end24-mcl_fpDbl_mulPre4Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre4Lbmi2          # -- Begin function mcl_fpDbl_sqrPre4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre4Lbmi2,@function
+mcl_fpDbl_sqrPre4Lbmi2:                 # @mcl_fpDbl_sqrPre4Lbmi2
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
 	pushq	%r14
+	pushq	%r13
+	pushq	%r12
 	pushq	%rbx
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
+	movq	24(%rsi), %r8
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rcx
+	movq	%r8, %rdx
+	movq	%r8, -64(%rsp)                  # 8-byte Spill
+	mulxq	%rcx, %r14, %r9
+	movq	%r14, -8(%rsp)                  # 8-byte Spill
+	movq	16(%rsi), %r12
+	movq	%r12, %rdx
+	mulxq	%rcx, %rbp, %rsi
+	movq	%rbp, -40(%rsp)                 # 8-byte Spill
+	movq	%rsi, -24(%rsp)                 # 8-byte Spill
 	movq	%rcx, %rdx
-	mulxq	%rcx, %rdx, %rax
-	movq	%rdx, (%rdi)
-	movq	%r10, %rdx
-	mulxq	%rcx, %r11, %r8
-	movq	%rsi, %rdx
-	mulxq	%rcx, %rdx, %r14
-	addq	%rdx, %rax
-	movq	%r14, %rbx
-	adcq	%r11, %rbx
-	movq	%r8, %rcx
-	adcq	$0, %rcx
-	addq	%rdx, %rax
-	movq	%rax, 8(%rdi)
-	movq	%r10, %rdx
-	mulxq	%rsi, %rax, %r9
-	movq	%rsi, %rdx
-	mulxq	%rsi, %rsi, %rdx
-	adcq	%rbx, %rsi
-	adcq	%rax, %rcx
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	%r14, %rsi
-	adcq	%rdx, %rcx
-	adcq	%r9, %rbx
-	addq	%r11, %rsi
-	movq	%rsi, 16(%rdi)
-	movq	%r10, %rdx
-	mulxq	%r10, %rsi, %rdx
-	adcq	%rax, %rcx
-	adcq	%rbx, %rsi
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r8, %rcx
+	mulxq	%rcx, %r10, %r11
+	mulxq	%rax, %r15, %rbx
+	movq	%r15, -56(%rsp)                 # 8-byte Spill
+	addq	%rbx, %r10
+	adcq	%rbp, %r11
+	movq	%rsi, %rbp
+	adcq	%r14, %rbp
+	movq	%r9, %r14
+	adcq	$0, %r14
+	movq	%rax, %rdx
+	mulxq	%rax, %rcx, %rsi
+	movq	%rcx, -48(%rsp)                 # 8-byte Spill
+	addq	%r15, %rsi
+	movq	%r12, %rdx
+	mulxq	%rax, %rdx, %rcx
+	movq	%rdx, -32(%rsp)                 # 8-byte Spill
+	adcq	%rdx, %rbx
+	movq	%r8, %rdx
+	mulxq	%rax, %rax, %r15
+	movq	%rax, -16(%rsp)                 # 8-byte Spill
+	movq	%rcx, %r8
+	adcq	%rax, %r8
+	movq	%r15, %r13
+	adcq	$0, %r13
+	addq	-56(%rsp), %rsi                 # 8-byte Folded Reload
+	adcq	%r10, %rbx
+	adcq	%r11, %r8
+	adcq	%rbp, %r13
+	adcq	$0, %r14
+	addq	-40(%rsp), %rcx                 # 8-byte Folded Reload
+	movq	%r12, %rdx
+	mulxq	%r12, %rbp, %r11
+	adcq	-24(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	-48(%rsp), %rax                 # 8-byte Reload
+	movq	%rax, (%rdi)
+	movq	-64(%rsp), %rdx                 # 8-byte Reload
+	mulxq	%r12, %rdx, %r10
+	adcq	%rdx, %r11
+	movq	%r10, %rax
+	adcq	$0, %rax
+	addq	-32(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%rsi, 8(%rdi)
+	adcq	%r8, %rcx
+	movq	%rbx, 16(%rdi)
+	adcq	%r13, %rbp
+	adcq	%r14, %r11
+	adcq	$0, %rax
+	addq	-8(%rsp), %r15                  # 8-byte Folded Reload
+	adcq	%rdx, %r9
+	movq	-64(%rsp), %rdx                 # 8-byte Reload
+	mulxq	%rdx, %rdx, %rsi
+	adcq	%r10, %rdx
+	adcq	$0, %rsi
+	addq	-16(%rsp), %rcx                 # 8-byte Folded Reload
 	movq	%rcx, 24(%rdi)
-	adcq	%r9, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%rdx, %rax
-	movq	%rax, 40(%rdi)
+	adcq	%rbp, %r15
+	movq	%r15, 32(%rdi)
+	adcq	%r11, %r9
+	movq	%r9, 40(%rdi)
+	adcq	%rax, %rdx
+	movq	%rdx, 48(%rdi)
+	adcq	$0, %rsi
+	movq	%rsi, 56(%rdi)
 	popq	%rbx
+	popq	%r12
+	popq	%r13
 	popq	%r14
+	popq	%r15
+	popq	%rbp
 	retq
-.Lfunc_end37:
-	.size	mcl_fpDbl_sqrPre3Lbmi2, .Lfunc_end37-mcl_fpDbl_sqrPre3Lbmi2
-
-	.globl	mcl_fp_mont3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont3Lbmi2,@function
-mcl_fp_mont3Lbmi2:                      # @mcl_fp_mont3Lbmi2
-# BB#0:
+.Lfunc_end25:
+	.size	mcl_fpDbl_sqrPre4Lbmi2, .Lfunc_end25-mcl_fpDbl_sqrPre4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_mont4Lbmi2               # -- Begin function mcl_fp_mont4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mont4Lbmi2,@function
+mcl_fp_mont4Lbmi2:                      # @mcl_fp_mont4Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, %r15
-	movq	%r15, -32(%rsp)         # 8-byte Spill
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rdi
-	movq	%rdi, -56(%rsp)         # 8-byte Spill
-	movq	(%r15), %rax
+	movq	%rdx, -32(%rsp)                 # 8-byte Spill
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	24(%rsi), %rdi
+	movq	%rdi, -48(%rsp)                 # 8-byte Spill
+	movq	(%rdx), %rax
 	movq	%rdi, %rdx
 	mulxq	%rax, %r14, %r11
+	movq	16(%rsi), %rdx
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rbx, %r10
 	movq	(%rsi), %r12
-	movq	%r12, -48(%rsp)         # 8-byte Spill
 	movq	8(%rsi), %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rbx, %r8
+	movq	%rdx, -40(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rdi, %r8
 	movq	%r12, %rdx
-	mulxq	%rax, %r9, %rdi
-	addq	%rbx, %rdi
-	adcq	%r14, %r8
+	movq	%r12, -16(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r15, %r13
+	addq	%rdi, %r13
+	adcq	%rbx, %r8
+	adcq	%r14, %r10
 	adcq	$0, %r11
-	movq	-8(%rcx), %r13
-	movq	(%rcx), %rbx
-	movq	%rbx, -8(%rsp)          # 8-byte Spill
-	movq	%r9, %rdx
-	imulq	%r13, %rdx
+	movq	-8(%rcx), %rdx
+	movq	%rdx, -64(%rsp)                 # 8-byte Spill
+	imulq	%r15, %rdx
+	movq	24(%rcx), %rax
+	movq	%rax, -24(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r14, %rbx
+	movq	16(%rcx), %rax
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r9, %rdi
+	movq	(%rcx), %rbp
+	movq	%rbp, -72(%rsp)                 # 8-byte Spill
 	movq	8(%rcx), %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rax, %r10
-	mulxq	%rbx, %rsi, %rbx
-	addq	%rax, %rbx
-	movq	16(%rcx), %rbp
-	mulxq	%rbp, %rcx, %rax
-	movq	%rbp, %r14
-	adcq	%r10, %rcx
-	adcq	$0, %rax
-	addq	%r9, %rsi
-	adcq	%rdi, %rbx
-	movq	8(%r15), %rdx
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rsi, %rcx
+	mulxq	%rbp, %rdx, %rax
+	addq	%rsi, %rax
+	adcq	%r9, %rcx
+	adcq	%r14, %rdi
+	adcq	$0, %rbx
+	addq	%r15, %rdx
+	adcq	%r13, %rax
 	adcq	%r8, %rcx
-	adcq	%r11, %rax
-	sbbq	%r9, %r9
-	andl	$1, %r9d
-	movq	-56(%rsp), %r15         # 8-byte Reload
-	mulxq	%r15, %r11, %rdi
-	mulxq	-16(%rsp), %r10, %rsi   # 8-byte Folded Reload
-	mulxq	%r12, %r8, %rbp
-	addq	%r10, %rbp
-	adcq	%r11, %rsi
-	adcq	$0, %rdi
-	addq	%rbx, %r8
+	adcq	%r10, %rdi
+	adcq	%r11, %rbx
+	movq	-32(%rsp), %r13                 # 8-byte Reload
+	movq	8(%r13), %rdx
+	mulxq	-48(%rsp), %r11, %r10           # 8-byte Folded Reload
+	mulxq	-88(%rsp), %r14, %rbp           # 8-byte Folded Reload
+	mulxq	-40(%rsp), %r15, %r8            # 8-byte Folded Reload
+	mulxq	%r12, %r9, %rsi
+	setb	%dl
+	addq	%r15, %rsi
+	adcq	%r14, %r8
+	adcq	%r11, %rbp
+	adcq	$0, %r10
+	addq	%rax, %r9
+	adcq	%rcx, %rsi
+	adcq	%rdi, %r8
+	adcq	%rbx, %rbp
+	movzbl	%dl, %eax
+	adcq	%rax, %r10
+	setb	-89(%rsp)                       # 1-byte Folded Spill
+	movq	-64(%rsp), %rdx                 # 8-byte Reload
+	imulq	%r9, %rdx
+	movq	-24(%rsp), %r12                 # 8-byte Reload
+	mulxq	%r12, %r14, %rbx
+	mulxq	-80(%rsp), %r15, %rcx           # 8-byte Folded Reload
+	mulxq	-56(%rsp), %r11, %rdi           # 8-byte Folded Reload
+	mulxq	-72(%rsp), %rdx, %rax           # 8-byte Folded Reload
+	addq	%r11, %rax
+	adcq	%r15, %rdi
+	adcq	%r14, %rcx
+	adcq	$0, %rbx
+	addq	%r9, %rdx
+	adcq	%rsi, %rax
+	adcq	%r8, %rdi
+	adcq	%rbp, %rcx
+	adcq	%r10, %rbx
+	movzbl	-89(%rsp), %r11d                # 1-byte Folded Reload
+	adcq	$0, %r11
+	movq	16(%r13), %rdx
+	mulxq	-48(%rsp), %r14, %r8            # 8-byte Folded Reload
+	mulxq	-88(%rsp), %r15, %r10           # 8-byte Folded Reload
+	mulxq	-40(%rsp), %r13, %rbp           # 8-byte Folded Reload
+	mulxq	-16(%rsp), %r9, %rsi            # 8-byte Folded Reload
+	addq	%r13, %rsi
+	adcq	%r15, %rbp
+	adcq	%r14, %r10
+	adcq	$0, %r8
+	addq	%rax, %r9
+	adcq	%rdi, %rsi
 	adcq	%rcx, %rbp
-	adcq	%rax, %rsi
-	adcq	%r9, %rdi
-	sbbq	%r11, %r11
-	andl	$1, %r11d
-	movq	%r8, %rdx
-	imulq	%r13, %rdx
-	mulxq	%r14, %r9, %rcx
-	movq	%r14, %r12
-	movq	-40(%rsp), %r14         # 8-byte Reload
-	mulxq	%r14, %r10, %rax
-	mulxq	-8(%rsp), %rdx, %rbx    # 8-byte Folded Reload
-	addq	%r10, %rbx
-	adcq	%r9, %rax
-	adcq	$0, %rcx
-	addq	%r8, %rdx
-	adcq	%rbp, %rbx
+	adcq	%rbx, %r10
+	adcq	%r11, %r8
+	setb	%r11b
+	movq	-64(%rsp), %rdx                 # 8-byte Reload
+	imulq	%r9, %rdx
+	mulxq	%r12, %r14, %rbx
+	mulxq	-80(%rsp), %r15, %rcx           # 8-byte Folded Reload
+	movq	-56(%rsp), %r12                 # 8-byte Reload
+	mulxq	%r12, %r13, %rdi
+	mulxq	-72(%rsp), %rdx, %rax           # 8-byte Folded Reload
+	addq	%r13, %rax
+	adcq	%r15, %rdi
+	adcq	%r14, %rcx
+	adcq	$0, %rbx
+	addq	%r9, %rdx
 	adcq	%rsi, %rax
-	adcq	%rdi, %rcx
+	adcq	%rbp, %rdi
+	adcq	%r10, %rcx
+	adcq	%r8, %rbx
+	movzbl	%r11b, %r11d
 	adcq	$0, %r11
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	movq	16(%rdx), %rdx
-	mulxq	%r15, %r9, %rsi
-	mulxq	-16(%rsp), %r10, %r15   # 8-byte Folded Reload
-	mulxq	-48(%rsp), %r8, %rdi    # 8-byte Folded Reload
+	movq	-32(%rsp), %rdx                 # 8-byte Reload
+	movq	24(%rdx), %rdx
+	mulxq	-48(%rsp), %r14, %r8            # 8-byte Folded Reload
+	mulxq	-88(%rsp), %r15, %r9            # 8-byte Folded Reload
+	mulxq	-40(%rsp), %r13, %rbp           # 8-byte Folded Reload
+	mulxq	-16(%rsp), %r10, %rsi           # 8-byte Folded Reload
+	addq	%r13, %rsi
+	adcq	%r15, %rbp
+	adcq	%r14, %r9
+	adcq	$0, %r8
+	addq	%rax, %r10
+	adcq	%rdi, %rsi
+	adcq	%rcx, %rbp
+	adcq	%rbx, %r9
+	adcq	%r11, %r8
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-64(%rsp), %rdx                 # 8-byte Reload
+	imulq	%r10, %rdx
+	movq	-72(%rsp), %rcx                 # 8-byte Reload
+	mulxq	%rcx, %rdi, %rax
+	mulxq	%r12, %r13, %r14
+	addq	%rax, %r13
+	mulxq	-80(%rsp), %rbx, %r15           # 8-byte Folded Reload
+	adcq	%r14, %rbx
+	movq	-24(%rsp), %r11                 # 8-byte Reload
+	mulxq	%r11, %r14, %r12
+	adcq	%r15, %r14
+	adcq	$0, %r12
 	addq	%r10, %rdi
-	adcq	%r9, %r15
+	adcq	%rsi, %r13
+	adcq	%rbp, %rbx
+	adcq	%r9, %r14
+	movzbl	-88(%rsp), %esi                 # 1-byte Folded Reload
+	adcq	%r8, %r12
 	adcq	$0, %rsi
-	addq	%rbx, %r8
-	adcq	%rax, %rdi
-	adcq	%rcx, %r15
-	adcq	%r11, %rsi
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	imulq	%r8, %r13
-	movq	%r13, %rdx
-	mulxq	%r12, %r9, %rbp
-	movq	%r13, %rdx
-	mulxq	%r14, %r10, %rax
-	movq	%r13, %rdx
-	movq	-8(%rsp), %rcx          # 8-byte Reload
-	mulxq	%rcx, %r11, %rdx
-	addq	%r10, %rdx
-	adcq	%r9, %rax
-	adcq	$0, %rbp
-	addq	%r8, %r11
-	adcq	%rdi, %rdx
-	adcq	%r15, %rax
-	adcq	%rsi, %rbp
-	adcq	$0, %rbx
-	movq	%rdx, %rsi
-	subq	%rcx, %rsi
-	movq	%rax, %rdi
-	sbbq	%r14, %rdi
-	movq	%rbp, %rcx
-	sbbq	%r12, %rcx
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%rbp, %rcx
-	testb	%bl, %bl
-	cmovneq	%rdx, %rsi
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	movq	%rsi, (%rdx)
-	cmovneq	%rax, %rdi
-	movq	%rdi, 8(%rdx)
-	movq	%rcx, 16(%rdx)
+	movq	%r13, %rdi
+	subq	%rcx, %rdi
+	movq	%rbx, %rcx
+	sbbq	-56(%rsp), %rcx                 # 8-byte Folded Reload
+	movq	%r14, %rax
+	sbbq	-80(%rsp), %rax                 # 8-byte Folded Reload
+	movq	%r12, %rdx
+	sbbq	%r11, %rdx
+	sbbq	$0, %rsi
+	testb	$1, %sil
+	cmovneq	%r12, %rdx
+	movq	-8(%rsp), %rsi                  # 8-byte Reload
+	movq	%rdx, 24(%rsi)
+	cmovneq	%r14, %rax
+	movq	%rax, 16(%rsi)
+	cmovneq	%rbx, %rcx
+	movq	%rcx, 8(%rsi)
+	cmovneq	%r13, %rdi
+	movq	%rdi, (%rsi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1337,120 +1579,180 @@ mcl_fp_mont3Lbmi2:                      # @mcl_fp_mont3Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end38:
-	.size	mcl_fp_mont3Lbmi2, .Lfunc_end38-mcl_fp_mont3Lbmi2
-
-	.globl	mcl_fp_montNF3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF3Lbmi2,@function
-mcl_fp_montNF3Lbmi2:                    # @mcl_fp_montNF3Lbmi2
-# BB#0:
+.Lfunc_end26:
+	.size	mcl_fp_mont4Lbmi2, .Lfunc_end26-mcl_fp_mont4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montNF4Lbmi2             # -- Begin function mcl_fp_montNF4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF4Lbmi2,@function
+mcl_fp_montNF4Lbmi2:                    # @mcl_fp_montNF4Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	%rdx, %r10
-	movq	%r10, -16(%rsp)         # 8-byte Spill
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	(%rsi), %rcx
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rdi
-	movq	%rdi, -32(%rsp)         # 8-byte Spill
-	movq	(%r10), %rax
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	(%rsi), %rdi
+	movq	%rdi, -56(%rsp)                 # 8-byte Spill
+	movq	8(%rsi), %rbp
+	movq	%rbp, -64(%rsp)                 # 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, %r15
+	movq	%rdx, -24(%rsp)                 # 8-byte Spill
+	movq	%rbp, %rdx
+	mulxq	%rax, %rbp, %r9
 	movq	%rdi, %rdx
-	mulxq	%rax, %rbx, %r14
-	movq	%rcx, %rdx
-	mulxq	%rax, %r15, %r12
-	movq	16(%rsi), %r11
-	addq	%rbx, %r12
-	movq	%r11, %rdx
+	mulxq	%rax, %r12, %rbx
+	movq	16(%rsi), %rdx
+	movq	%rdx, -40(%rsp)                 # 8-byte Spill
+	addq	%rbp, %rbx
+	mulxq	%rax, %r14, %rbp
+	adcq	%r9, %r14
+	movq	24(%rsi), %rdx
+	movq	%rdx, -80(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r8, %rdi
+	adcq	%rbp, %r8
+	adcq	$0, %rdi
+	movq	-8(%rcx), %r13
+	movq	(%rcx), %rax
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	movq	%r13, %rdx
+	imulq	%r12, %rdx
+	mulxq	%rax, %rax, %r11
+	addq	%r12, %rax
+	movq	8(%rcx), %rax
+	movq	%rax, -16(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rbp, %r10
+	adcq	%rbx, %rbp
+	movq	16(%rcx), %rax
+	movq	%rax, -32(%rsp)                 # 8-byte Spill
 	mulxq	%rax, %rsi, %rbx
 	adcq	%r14, %rsi
+	movq	24(%rcx), %rax
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rcx, %rdx
+	adcq	%r8, %rcx
+	adcq	$0, %rdi
+	addq	%r11, %rbp
+	adcq	%r10, %rsi
+	adcq	%rbx, %rcx
+	adcq	%rdx, %rdi
+	movq	8(%r15), %rdx
+	movq	-64(%rsp), %r12                 # 8-byte Reload
+	mulxq	%r12, %rbx, %r9
+	movq	-56(%rsp), %r15                 # 8-byte Reload
+	mulxq	%r15, %r10, %r11
+	addq	%rbx, %r11
+	mulxq	-40(%rsp), %rax, %r8            # 8-byte Folded Reload
+	adcq	%r9, %rax
+	mulxq	-80(%rsp), %r9, %rbx            # 8-byte Folded Reload
+	adcq	%r8, %r9
 	adcq	$0, %rbx
-	movq	-8(%r8), %r9
-	movq	(%r8), %r14
-	movq	%r15, %rdx
-	imulq	%r9, %rdx
-	mulxq	%r14, %rbp, %r13
-	addq	%r15, %rbp
-	movq	8(%r8), %r15
-	mulxq	%r15, %rdi, %rbp
-	adcq	%r12, %rdi
-	movq	16(%r8), %r12
-	mulxq	%r12, %rax, %r8
-	adcq	%rsi, %rax
+	addq	%rbp, %r10
+	adcq	%rsi, %r11
+	adcq	%rcx, %rax
+	adcq	%rdi, %r9
 	adcq	$0, %rbx
-	addq	%r13, %rdi
-	movq	8(%r10), %rdx
-	adcq	%rbp, %rax
-	adcq	%r8, %rbx
-	movq	-32(%rsp), %r10         # 8-byte Reload
-	mulxq	%r10, %rsi, %r8
-	mulxq	%rcx, %r13, %rbp
+	movq	%r13, %rdx
+	imulq	%r10, %rdx
+	movq	-48(%rsp), %r14                 # 8-byte Reload
+	mulxq	%r14, %rcx, %r8
+	addq	%r10, %rcx
+	mulxq	-16(%rsp), %r10, %rdi           # 8-byte Folded Reload
+	adcq	%r11, %r10
+	mulxq	-32(%rsp), %rcx, %rsi           # 8-byte Folded Reload
+	adcq	%rax, %rcx
+	mulxq	-72(%rsp), %rax, %rdx           # 8-byte Folded Reload
+	adcq	%r9, %rax
+	adcq	$0, %rbx
+	addq	%r8, %r10
+	adcq	%rdi, %rcx
+	adcq	%rsi, %rax
+	adcq	%rdx, %rbx
+	movq	-24(%rsp), %rdx                 # 8-byte Reload
+	movq	16(%rdx), %rdx
+	mulxq	%r12, %rsi, %r8
+	mulxq	%r15, %r11, %rbp
 	addq	%rsi, %rbp
-	mulxq	%r11, %rcx, %rsi
-	adcq	%r8, %rcx
+	movq	-40(%rsp), %r12                 # 8-byte Reload
+	mulxq	%r12, %rdi, %r9
+	adcq	%r8, %rdi
+	mulxq	-80(%rsp), %r8, %rsi            # 8-byte Folded Reload
+	adcq	%r9, %r8
 	adcq	$0, %rsi
-	addq	%rdi, %r13
-	adcq	%rax, %rbp
-	adcq	%rbx, %rcx
+	addq	%r10, %r11
+	adcq	%rcx, %rbp
+	adcq	%rax, %rdi
+	adcq	%rbx, %r8
 	adcq	$0, %rsi
 	movq	%r13, %rdx
-	imulq	%r9, %rdx
-	mulxq	%r14, %rdi, %rbx
-	addq	%r13, %rdi
-	mulxq	%r15, %rax, %rdi
-	adcq	%rbp, %rax
-	mulxq	%r12, %rbp, %rdx
-	adcq	%rcx, %rbp
+	imulq	%r11, %rdx
+	mulxq	%r14, %rax, %r10
+	addq	%r11, %rax
+	movq	-16(%rsp), %r14                 # 8-byte Reload
+	mulxq	%r14, %r9, %rbx
+	adcq	%rbp, %r9
+	movq	-32(%rsp), %r15                 # 8-byte Reload
+	mulxq	%r15, %rax, %rbp
+	adcq	%rdi, %rax
+	mulxq	-72(%rsp), %rcx, %rdx           # 8-byte Folded Reload
+	adcq	%r8, %rcx
 	adcq	$0, %rsi
-	addq	%rbx, %rax
-	adcq	%rdi, %rbp
+	addq	%r10, %r9
+	adcq	%rbx, %rax
+	adcq	%rbp, %rcx
 	adcq	%rdx, %rsi
-	movq	-16(%rsp), %rcx         # 8-byte Reload
-	movq	16(%rcx), %rdx
-	mulxq	%r10, %rbx, %r8
-	mulxq	-24(%rsp), %r10, %rdi   # 8-byte Folded Reload
-	addq	%rbx, %rdi
-	mulxq	%r11, %rcx, %rbx
-	adcq	%r8, %rcx
-	adcq	$0, %rbx
-	addq	%rax, %r10
-	adcq	%rbp, %rdi
-	adcq	%rsi, %rcx
-	adcq	$0, %rbx
-	imulq	%r10, %r9
-	movq	%r9, %rdx
-	mulxq	%r14, %rdx, %r8
-	addq	%r10, %rdx
-	movq	%r9, %rdx
-	mulxq	%r12, %rbp, %rsi
-	movq	%r9, %rdx
-	mulxq	%r15, %rax, %rdx
+	movq	-24(%rsp), %rdx                 # 8-byte Reload
+	movq	24(%rdx), %rdx
+	mulxq	-64(%rsp), %rbx, %r8            # 8-byte Folded Reload
+	mulxq	-56(%rsp), %r11, %rbp           # 8-byte Folded Reload
+	addq	%rbx, %rbp
+	mulxq	%r12, %rdi, %rbx
+	adcq	%r8, %rdi
+	mulxq	-80(%rsp), %r8, %r10            # 8-byte Folded Reload
+	adcq	%rbx, %r8
+	adcq	$0, %r10
+	addq	%r9, %r11
+	adcq	%rax, %rbp
+	adcq	%rcx, %rdi
+	adcq	%rsi, %r8
+	adcq	$0, %r10
+	imulq	%r11, %r13
+	movq	%r13, %rdx
+	movq	-48(%rsp), %rbx                 # 8-byte Reload
+	mulxq	%rbx, %rcx, %r9
+	addq	%r11, %rcx
+	mulxq	%r14, %r11, %r12
+	adcq	%rbp, %r11
+	mulxq	%r15, %rax, %rcx
 	adcq	%rdi, %rax
+	movq	-72(%rsp), %rsi                 # 8-byte Reload
+	mulxq	%rsi, %rbp, %rdx
+	adcq	%r8, %rbp
+	adcq	$0, %r10
+	addq	%r9, %r11
+	adcq	%r12, %rax
 	adcq	%rcx, %rbp
-	adcq	$0, %rbx
-	addq	%r8, %rax
-	adcq	%rdx, %rbp
-	adcq	%rsi, %rbx
-	movq	%rax, %rcx
-	subq	%r14, %rcx
-	movq	%rbp, %rdx
-	sbbq	%r15, %rdx
-	movq	%rbx, %rsi
-	sbbq	%r12, %rsi
-	movq	%rsi, %rdi
-	sarq	$63, %rdi
-	cmovsq	%rax, %rcx
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	%rcx, (%rax)
-	cmovsq	%rbp, %rdx
-	movq	%rdx, 8(%rax)
-	cmovsq	%rbx, %rsi
-	movq	%rsi, 16(%rax)
+	adcq	%rdx, %r10
+	movq	%r11, %rcx
+	subq	%rbx, %rcx
+	movq	%rax, %rdx
+	sbbq	%r14, %rdx
+	movq	%rbp, %rdi
+	sbbq	%r15, %rdi
+	movq	%r10, %rbx
+	sbbq	%rsi, %rbx
+	cmovsq	%r10, %rbx
+	movq	-8(%rsp), %rsi                  # 8-byte Reload
+	movq	%rbx, 24(%rsi)
+	cmovsq	%rbp, %rdi
+	movq	%rdi, 16(%rsi)
+	cmovsq	%rax, %rdx
+	movq	%rdx, 8(%rsi)
+	cmovsq	%r11, %rcx
+	movq	%rcx, (%rsi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1458,14 +1760,14 @@ mcl_fp_montNF3Lbmi2:                    # @mcl_fp_montNF3Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end39:
-	.size	mcl_fp_montNF3Lbmi2, .Lfunc_end39-mcl_fp_montNF3Lbmi2
-
-	.globl	mcl_fp_montRed3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed3Lbmi2,@function
-mcl_fp_montRed3Lbmi2:                   # @mcl_fp_montRed3Lbmi2
-# BB#0:
+.Lfunc_end27:
+	.size	mcl_fp_montNF4Lbmi2, .Lfunc_end27-mcl_fp_montNF4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRed4Lbmi2            # -- Begin function mcl_fp_montRed4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed4Lbmi2,@function
+mcl_fp_montRed4Lbmi2:                   # @mcl_fp_montRed4Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
@@ -1473,78 +1775,239 @@ mcl_fp_montRed3Lbmi2:                   # @mcl_fp_montRed3Lbmi2
 	pushq	%r12
 	pushq	%rbx
 	movq	%rdx, %rcx
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	-8(%rcx), %r15
-	movq	(%rcx), %r9
-	movq	(%rsi), %rbx
-	movq	%rbx, %rdx
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	-8(%rdx), %r15
+	movq	(%rdx), %rdi
+	movq	%rdi, -64(%rsp)                 # 8-byte Spill
+	movq	(%rsi), %rax
+	movq	%rax, %rdx
 	imulq	%r15, %rdx
-	movq	16(%rcx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r14, %r11
-	movq	%rax, %rbp
-	movq	8(%rcx), %r10
-	mulxq	%r10, %rax, %r13
-	mulxq	%r9, %rdx, %rcx
-	addq	%rax, %rcx
-	adcq	%r14, %r13
+	movq	24(%rcx), %rbp
+	mulxq	%rbp, %r12, %r11
+	movq	%rbp, %r8
+	movq	%rbp, -40(%rsp)                 # 8-byte Spill
+	movq	16(%rcx), %r9
+	mulxq	%r9, %r10, %r13
+	movq	8(%rcx), %rcx
+	movq	%rcx, -56(%rsp)                 # 8-byte Spill
+	mulxq	%rcx, %rcx, %rbx
+	mulxq	%rdi, %rdx, %rbp
+	addq	%rcx, %rbp
+	adcq	%r10, %rbx
+	adcq	%r12, %r13
 	adcq	$0, %r11
-	movq	40(%rsi), %r14
-	movq	32(%rsi), %r12
-	addq	%rbx, %rdx
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r13
-	adcq	24(%rsi), %r11
-	adcq	$0, %r12
-	adcq	$0, %r14
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rcx, %rdx
-	imulq	%r15, %rdx
-	mulxq	%rbp, %rbp, %rdi
-	mulxq	%r10, %r8, %rbx
-	mulxq	%r9, %rdx, %rax
+	addq	%rax, %rdx
+	movq	%rsi, -48(%rsp)                 # 8-byte Spill
+	adcq	8(%rsi), %rbp
+	adcq	16(%rsi), %rbx
+	adcq	24(%rsi), %r13
+	adcq	32(%rsi), %r11
+	setb	-65(%rsp)                       # 1-byte Folded Spill
+	movq	%r15, %rdx
+	imulq	%rbp, %rdx
+	mulxq	%r8, %r14, %r12
+	movq	%r9, -16(%rsp)                  # 8-byte Spill
+	mulxq	%r9, %r10, %rsi
+	mulxq	-64(%rsp), %rdi, %r8            # 8-byte Folded Reload
+	mulxq	-56(%rsp), %rax, %rcx           # 8-byte Folded Reload
 	addq	%r8, %rax
-	adcq	%rbp, %rbx
-	adcq	$0, %rdi
-	addq	%rcx, %rdx
+	adcq	%r10, %rcx
+	adcq	%r14, %rsi
+	movzbl	-65(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %r12
+	addq	%rbp, %rdi
+	adcq	%rbx, %rax
+	adcq	%r13, %rcx
+	adcq	%r11, %rsi
+	movq	-48(%rsp), %r10                 # 8-byte Reload
+	adcq	40(%r10), %r12
+	setb	-65(%rsp)                       # 1-byte Folded Spill
+	movq	%r15, %rdx
+	imulq	%rax, %rdx
+	mulxq	-40(%rsp), %rdi, %r11           # 8-byte Folded Reload
+	movq	%rdi, -24(%rsp)                 # 8-byte Spill
+	mulxq	%r9, %rdi, %r13
+	movq	%rdi, -32(%rsp)                 # 8-byte Spill
+	movq	-64(%rsp), %r8                  # 8-byte Reload
+	mulxq	%r8, %rdi, %r14
+	movq	-56(%rsp), %r9                  # 8-byte Reload
+	mulxq	%r9, %rbp, %rbx
+	addq	%r14, %rbp
+	adcq	-32(%rsp), %rbx                 # 8-byte Folded Reload
+	adcq	-24(%rsp), %r13                 # 8-byte Folded Reload
+	movzbl	-65(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %r11
+	addq	%rax, %rdi
+	adcq	%rcx, %rbp
+	adcq	%rsi, %rbx
+	adcq	%r12, %r13
+	adcq	48(%r10), %r11
+	setb	%dil
+	imulq	%rbp, %r15
+	movq	%r15, %rdx
+	mulxq	%r8, %rcx, %rax
+	mulxq	%r9, %r12, %rsi
+	addq	%rax, %r12
+	movq	-16(%rsp), %r8                  # 8-byte Reload
+	mulxq	%r8, %rax, %r9
+	adcq	%rsi, %rax
+	movq	-40(%rsp), %r10                 # 8-byte Reload
+	mulxq	%r10, %r15, %r14
+	adcq	%r9, %r15
+	movzbl	%dil, %edi
+	adcq	%r14, %rdi
+	addq	%rbp, %rcx
+	adcq	%rbx, %r12
 	adcq	%r13, %rax
-	adcq	%r11, %rbx
-	adcq	%r12, %rdi
-	adcq	$0, %r14
-	adcq	$0, %rsi
-	imulq	%rax, %r15
+	adcq	%r11, %r15
+	movq	-48(%rsp), %rcx                 # 8-byte Reload
+	adcq	56(%rcx), %rdi
+	xorl	%ebx, %ebx
+	movq	%r12, %rcx
+	subq	-64(%rsp), %rcx                 # 8-byte Folded Reload
+	movq	%rax, %rbp
+	sbbq	-56(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%r15, %rdx
+	sbbq	%r8, %rdx
+	movq	%rdi, %rsi
+	sbbq	%r10, %rsi
+	sbbq	%rbx, %rbx
+	testb	$1, %bl
+	cmovneq	%rdi, %rsi
+	movq	-8(%rsp), %rdi                  # 8-byte Reload
+	movq	%rsi, 24(%rdi)
+	cmovneq	%r15, %rdx
+	movq	%rdx, 16(%rdi)
+	cmovneq	%rax, %rbp
+	movq	%rbp, 8(%rdi)
+	cmovneq	%r12, %rcx
+	movq	%rcx, (%rdi)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+.Lfunc_end28:
+	.size	mcl_fp_montRed4Lbmi2, .Lfunc_end28-mcl_fp_montRed4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRedNF4Lbmi2          # -- Begin function mcl_fp_montRedNF4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF4Lbmi2,@function
+mcl_fp_montRedNF4Lbmi2:                 # @mcl_fp_montRedNF4Lbmi2
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	-8(%rdx), %r15
+	movq	(%rdx), %rdi
+	movq	(%rsi), %rax
+	movq	%rax, %rdx
+	imulq	%r15, %rdx
+	movq	24(%rcx), %rbp
+	mulxq	%rbp, %r12, %r11
+	movq	%rbp, %r14
+	movq	%rbp, -32(%rsp)                 # 8-byte Spill
+	movq	16(%rcx), %r8
+	mulxq	%r8, %r9, %r13
+	movq	%r8, -40(%rsp)                  # 8-byte Spill
+	movq	8(%rcx), %rcx
+	movq	%rcx, -64(%rsp)                 # 8-byte Spill
+	mulxq	%rcx, %rbp, %rbx
+	mulxq	%rdi, %rdx, %rcx
+	movq	%rdi, -56(%rsp)                 # 8-byte Spill
+	addq	%rbp, %rcx
+	adcq	%r9, %rbx
+	adcq	%r12, %r13
+	adcq	$0, %r11
+	addq	%rax, %rdx
+	movq	%rsi, -48(%rsp)                 # 8-byte Spill
+	adcq	8(%rsi), %rcx
+	adcq	16(%rsi), %rbx
+	adcq	24(%rsi), %r13
+	adcq	32(%rsi), %r11
+	setb	%r10b
 	movq	%r15, %rdx
-	movq	-16(%rsp), %r13         # 8-byte Reload
-	mulxq	%r13, %r8, %rcx
+	imulq	%rcx, %rdx
+	mulxq	%r14, %r14, %r12
+	mulxq	%r8, %r9, %rbp
+	mulxq	%rdi, %rdi, %r8
+	mulxq	-64(%rsp), %rax, %rsi           # 8-byte Folded Reload
+	addq	%r8, %rax
+	adcq	%r9, %rsi
+	adcq	%r14, %rbp
+	movzbl	%r10b, %edx
+	adcq	%rdx, %r12
+	addq	%rcx, %rdi
+	adcq	%rbx, %rax
+	adcq	%r13, %rsi
+	adcq	%r11, %rbp
+	movq	-48(%rsp), %r10                 # 8-byte Reload
+	adcq	40(%r10), %r12
+	setb	-65(%rsp)                       # 1-byte Folded Spill
 	movq	%r15, %rdx
-	mulxq	%r10, %r11, %r12
+	imulq	%rax, %rdx
+	mulxq	-32(%rsp), %rcx, %r11           # 8-byte Folded Reload
+	movq	%rcx, -16(%rsp)                 # 8-byte Spill
+	mulxq	-40(%rsp), %rcx, %r13           # 8-byte Folded Reload
+	movq	%rcx, -24(%rsp)                 # 8-byte Spill
+	movq	-56(%rsp), %r9                  # 8-byte Reload
+	mulxq	%r9, %rdi, %r14
+	movq	-64(%rsp), %r8                  # 8-byte Reload
+	mulxq	%r8, %rbx, %rcx
+	addq	%r14, %rbx
+	adcq	-24(%rsp), %rcx                 # 8-byte Folded Reload
+	adcq	-16(%rsp), %r13                 # 8-byte Folded Reload
+	movzbl	-65(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %r11
+	addq	%rax, %rdi
+	adcq	%rsi, %rbx
+	adcq	%rbp, %rcx
+	adcq	%r12, %r13
+	adcq	48(%r10), %r11
+	setb	%al
+	imulq	%rbx, %r15
 	movq	%r15, %rdx
-	mulxq	%r9, %r15, %rdx
-	addq	%r11, %rdx
-	adcq	%r8, %r12
-	adcq	$0, %rcx
-	addq	%rax, %r15
-	adcq	%rbx, %rdx
-	adcq	%rdi, %r12
-	adcq	%r14, %rcx
-	adcq	$0, %rsi
-	movq	%rdx, %rax
-	subq	%r9, %rax
-	movq	%r12, %rdi
-	sbbq	%r10, %rdi
-	movq	%rcx, %rbp
-	sbbq	%r13, %rbp
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rcx, %rbp
-	testb	%sil, %sil
-	cmovneq	%rdx, %rax
-	movq	-8(%rsp), %rcx          # 8-byte Reload
-	movq	%rax, (%rcx)
-	cmovneq	%r12, %rdi
-	movq	%rdi, 8(%rcx)
-	movq	%rbp, 16(%rcx)
+	mulxq	%r9, %rsi, %rbp
+	mulxq	%r8, %r12, %rdi
+	addq	%rbp, %r12
+	movq	-40(%rsp), %r8                  # 8-byte Reload
+	mulxq	%r8, %rbp, %r9
+	adcq	%rdi, %rbp
+	movq	-32(%rsp), %r10                 # 8-byte Reload
+	mulxq	%r10, %r15, %r14
+	adcq	%r9, %r15
+	movzbl	%al, %eax
+	adcq	%r14, %rax
+	addq	%rbx, %rsi
+	adcq	%rcx, %r12
+	adcq	%r13, %rbp
+	adcq	%r11, %r15
+	movq	-48(%rsp), %rcx                 # 8-byte Reload
+	adcq	56(%rcx), %rax
+	movq	%r12, %rcx
+	subq	-56(%rsp), %rcx                 # 8-byte Folded Reload
+	movq	%rbp, %rsi
+	sbbq	-64(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	%r15, %rdi
+	sbbq	%r8, %rdi
+	movq	%rax, %rdx
+	sbbq	%r10, %rdx
+	cmovsq	%rax, %rdx
+	movq	-8(%rsp), %rax                  # 8-byte Reload
+	movq	%rdx, 24(%rax)
+	cmovsq	%r15, %rdi
+	movq	%rdi, 16(%rax)
+	cmovsq	%rbp, %rsi
+	movq	%rsi, 8(%rax)
+	cmovsq	%r12, %rcx
+	movq	%rcx, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1552,500 +2015,587 @@ mcl_fp_montRed3Lbmi2:                   # @mcl_fp_montRed3Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end40:
-	.size	mcl_fp_montRed3Lbmi2, .Lfunc_end40-mcl_fp_montRed3Lbmi2
-
-	.globl	mcl_fp_addPre3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre3Lbmi2,@function
-mcl_fp_addPre3Lbmi2:                    # @mcl_fp_addPre3Lbmi2
-# BB#0:
-	movq	16(%rdx), %rax
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rax
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+.Lfunc_end29:
+	.size	mcl_fp_montRedNF4Lbmi2, .Lfunc_end29-mcl_fp_montRedNF4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addPre4Lbmi2             # -- Begin function mcl_fp_addPre4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre4Lbmi2,@function
+mcl_fp_addPre4Lbmi2:                    # @mcl_fp_addPre4Lbmi2
+# %bb.0:
+	movq	24(%rsi), %rax
+	movq	16(%rsi), %rcx
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r8
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rcx
+	adcq	24(%rdx), %rax
+	movq	%rax, 24(%rdi)
+	movq	%rcx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
 	retq
-.Lfunc_end41:
-	.size	mcl_fp_addPre3Lbmi2, .Lfunc_end41-mcl_fp_addPre3Lbmi2
-
-	.globl	mcl_fp_subPre3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre3Lbmi2,@function
-mcl_fp_subPre3Lbmi2:                    # @mcl_fp_subPre3Lbmi2
-# BB#0:
+.Lfunc_end30:
+	.size	mcl_fp_addPre4Lbmi2, .Lfunc_end30-mcl_fp_addPre4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subPre4Lbmi2             # -- Begin function mcl_fp_subPre4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre4Lbmi2,@function
+mcl_fp_subPre4Lbmi2:                    # @mcl_fp_subPre4Lbmi2
+# %bb.0:
+	movq	24(%rsi), %rcx
 	movq	16(%rsi), %r8
-	movq	(%rsi), %rcx
+	movq	(%rsi), %r9
 	movq	8(%rsi), %rsi
 	xorl	%eax, %eax
-	subq	(%rdx), %rcx
+	subq	(%rdx), %r9
 	sbbq	8(%rdx), %rsi
 	sbbq	16(%rdx), %r8
-	movq	%rcx, (%rdi)
-	movq	%rsi, 8(%rdi)
+	sbbq	24(%rdx), %rcx
+	movq	%rcx, 24(%rdi)
 	movq	%r8, 16(%rdi)
-	sbbq	$0, %rax
+	movq	%rsi, 8(%rdi)
+	movq	%r9, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
 	retq
-.Lfunc_end42:
-	.size	mcl_fp_subPre3Lbmi2, .Lfunc_end42-mcl_fp_subPre3Lbmi2
-
-	.globl	mcl_fp_shr1_3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_3Lbmi2,@function
-mcl_fp_shr1_3Lbmi2:                     # @mcl_fp_shr1_3Lbmi2
-# BB#0:
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rdx
-	shrdq	$1, %rdx, %rcx
-	movq	%rcx, (%rdi)
-	shrdq	$1, %rax, %rdx
+.Lfunc_end31:
+	.size	mcl_fp_subPre4Lbmi2, .Lfunc_end31-mcl_fp_subPre4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_shr1_4Lbmi2              # -- Begin function mcl_fp_shr1_4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_4Lbmi2,@function
+mcl_fp_shr1_4Lbmi2:                     # @mcl_fp_shr1_4Lbmi2
+# %bb.0:
+	movq	(%rsi), %rax
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %rdx
+	movq	24(%rsi), %rcx
+	movq	%rcx, %rsi
+	shrq	%rsi
+	movq	%rsi, 24(%rdi)
+	shldq	$63, %rdx, %rcx
+	movq	%rcx, 16(%rdi)
+	shldq	$63, %r8, %rdx
 	movq	%rdx, 8(%rdi)
-	shrq	%rax
-	movq	%rax, 16(%rdi)
+	shrdq	$1, %r8, %rax
+	movq	%rax, (%rdi)
 	retq
-.Lfunc_end43:
-	.size	mcl_fp_shr1_3Lbmi2, .Lfunc_end43-mcl_fp_shr1_3Lbmi2
-
-	.globl	mcl_fp_add3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add3Lbmi2,@function
-mcl_fp_add3Lbmi2:                       # @mcl_fp_add3Lbmi2
-# BB#0:
-	movq	16(%rdx), %r8
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r8
+.Lfunc_end32:
+	.size	mcl_fp_shr1_4Lbmi2, .Lfunc_end32-mcl_fp_shr1_4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_add4Lbmi2                # -- Begin function mcl_fp_add4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_add4Lbmi2,@function
+mcl_fp_add4Lbmi2:                       # @mcl_fp_add4Lbmi2
+# %bb.0:
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %r9
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r9
+	adcq	24(%rdx), %r8
+	movq	%r8, 24(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%rsi, 8(%rdi)
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r8, 16(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
+	setb	%dl
+	movzbl	%dl, %edx
 	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB44_2
-# BB#1:                                 # %nocarry
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r9
+	sbbq	24(%rcx), %r8
+	sbbq	$0, %rdx
+	testb	$1, %dl
+	jne	.LBB33_2
+# %bb.1:                                # %nocarry
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r8, 16(%rdi)
-.LBB44_2:                               # %carry
+	movq	%rsi, 8(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%r8, 24(%rdi)
+.LBB33_2:                               # %carry
 	retq
-.Lfunc_end44:
-	.size	mcl_fp_add3Lbmi2, .Lfunc_end44-mcl_fp_add3Lbmi2
-
-	.globl	mcl_fp_addNF3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF3Lbmi2,@function
-mcl_fp_addNF3Lbmi2:                     # @mcl_fp_addNF3Lbmi2
-# BB#0:
+.Lfunc_end33:
+	.size	mcl_fp_add4Lbmi2, .Lfunc_end33-mcl_fp_add4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addNF4Lbmi2              # -- Begin function mcl_fp_addNF4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF4Lbmi2,@function
+mcl_fp_addNF4Lbmi2:                     # @mcl_fp_addNF4Lbmi2
+# %bb.0:
+	pushq	%rbx
+	movq	24(%rdx), %r11
 	movq	16(%rdx), %r8
-	movq	(%rdx), %r10
-	movq	8(%rdx), %r9
-	addq	(%rsi), %r10
-	adcq	8(%rsi), %r9
+	movq	(%rdx), %r9
+	movq	8(%rdx), %r10
+	addq	(%rsi), %r9
+	adcq	8(%rsi), %r10
 	adcq	16(%rsi), %r8
-	movq	%r10, %rsi
+	adcq	24(%rsi), %r11
+	movq	%r9, %rsi
 	subq	(%rcx), %rsi
-	movq	%r9, %rdx
+	movq	%r10, %rdx
 	sbbq	8(%rcx), %rdx
 	movq	%r8, %rax
 	sbbq	16(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r10, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
+	movq	%r11, %rbx
+	sbbq	24(%rcx), %rbx
+	cmovsq	%r11, %rbx
+	movq	%rbx, 24(%rdi)
 	cmovsq	%r8, %rax
 	movq	%rax, 16(%rdi)
-	retq
-.Lfunc_end45:
-	.size	mcl_fp_addNF3Lbmi2, .Lfunc_end45-mcl_fp_addNF3Lbmi2
-
-	.globl	mcl_fp_sub3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub3Lbmi2,@function
-mcl_fp_sub3Lbmi2:                       # @mcl_fp_sub3Lbmi2
-# BB#0:
-	movq	16(%rsi), %r8
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r9
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r9
-	sbbq	16(%rdx), %r8
-	movq	%rax, (%rdi)
-	movq	%r9, 8(%rdi)
-	movq	%r8, 16(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB46_2
-# BB#1:                                 # %nocarry
-	retq
-.LBB46_2:                               # %carry
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rsi
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r9, %rdx
+	cmovsq	%r10, %rdx
 	movq	%rdx, 8(%rdi)
-	adcq	%r8, %rsi
-	movq	%rsi, 16(%rdi)
+	cmovsq	%r9, %rsi
+	movq	%rsi, (%rdi)
+	popq	%rbx
 	retq
-.Lfunc_end46:
-	.size	mcl_fp_sub3Lbmi2, .Lfunc_end46-mcl_fp_sub3Lbmi2
-
-	.globl	mcl_fp_subNF3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF3Lbmi2,@function
-mcl_fp_subNF3Lbmi2:                     # @mcl_fp_subNF3Lbmi2
-# BB#0:
+.Lfunc_end34:
+	.size	mcl_fp_addNF4Lbmi2, .Lfunc_end34-mcl_fp_addNF4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_sub4Lbmi2                # -- Begin function mcl_fp_sub4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_sub4Lbmi2,@function
+mcl_fp_sub4Lbmi2:                       # @mcl_fp_sub4Lbmi2
+# %bb.0:
+	movq	24(%rsi), %r9
 	movq	16(%rsi), %r10
 	movq	(%rsi), %r8
-	movq	8(%rsi), %r9
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
 	subq	(%rdx), %r8
-	sbbq	8(%rdx), %r9
+	sbbq	8(%rdx), %rsi
 	sbbq	16(%rdx), %r10
-	movq	%r10, %rdx
+	sbbq	24(%rdx), %r9
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
+	testb	$1, %al
+	jne	.LBB35_2
+# %bb.1:                                # %nocarry
+	retq
+.LBB35_2:                               # %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %r10
+	adcq	24(%rcx), %r9
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	retq
+.Lfunc_end35:
+	.size	mcl_fp_sub4Lbmi2, .Lfunc_end35-mcl_fp_sub4Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subNF4Lbmi2              # -- Begin function mcl_fp_subNF4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF4Lbmi2,@function
+mcl_fp_subNF4Lbmi2:                     # @mcl_fp_subNF4Lbmi2
+# %bb.0:
+	pushq	%rbx
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %r8
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r10
+	subq	(%rdx), %r9
+	sbbq	8(%rdx), %r10
+	sbbq	16(%rdx), %r8
+	sbbq	24(%rdx), %r11
+	movq	%r11, %rdx
 	sarq	$63, %rdx
-	movq	%rdx, %rsi
-	shldq	$1, %r10, %rsi
-	andq	(%rcx), %rsi
+	movq	24(%rcx), %rsi
+	andq	%rdx, %rsi
 	movq	16(%rcx), %rax
 	andq	%rdx, %rax
-	andq	8(%rcx), %rdx
-	addq	%r8, %rsi
-	movq	%rsi, (%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r10, %rax
+	movq	8(%rcx), %rbx
+	andq	%rdx, %rbx
+	andq	(%rcx), %rdx
+	addq	%r9, %rdx
+	movq	%rdx, (%rdi)
+	adcq	%r10, %rbx
+	movq	%rbx, 8(%rdi)
+	adcq	%r8, %rax
 	movq	%rax, 16(%rdi)
+	adcq	%r11, %rsi
+	movq	%rsi, 24(%rdi)
+	popq	%rbx
 	retq
-.Lfunc_end47:
-	.size	mcl_fp_subNF3Lbmi2, .Lfunc_end47-mcl_fp_subNF3Lbmi2
-
-	.globl	mcl_fpDbl_add3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add3Lbmi2,@function
-mcl_fpDbl_add3Lbmi2:                    # @mcl_fpDbl_add3Lbmi2
-# BB#0:
-	pushq	%r15
+.Lfunc_end36:
+	.size	mcl_fp_subNF4Lbmi2, .Lfunc_end36-mcl_fp_subNF4Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_add4Lbmi2             # -- Begin function mcl_fpDbl_add4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add4Lbmi2,@function
+mcl_fpDbl_add4Lbmi2:                    # @mcl_fpDbl_add4Lbmi2
+# %bb.0:
 	pushq	%r14
 	pushq	%rbx
-	movq	40(%rdx), %r10
-	movq	40(%rsi), %r8
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r9
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
+	movq	56(%rsi), %r11
+	movq	48(%rsi), %r10
+	movq	40(%rsi), %r9
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %rax
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r14
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rbx
+	adcq	24(%rdx), %rax
+	adcq	32(%rdx), %r8
+	adcq	40(%rdx), %r9
+	adcq	48(%rdx), %r10
+	adcq	56(%rdx), %r11
+	movq	%rax, 24(%rdi)
 	movq	%rbx, 16(%rdi)
-	adcq	%r14, %r15
-	adcq	%r11, %r9
-	adcq	%r10, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r15, %rdx
+	movq	%rsi, 8(%rdi)
+	movq	%r14, (%rdi)
+	setb	%al
+	movzbl	%al, %r14d
+	movq	%r8, %rdx
 	subq	(%rcx), %rdx
 	movq	%r9, %rsi
 	sbbq	8(%rcx), %rsi
-	movq	%r8, %rbx
+	movq	%r10, %rbx
 	sbbq	16(%rcx), %rbx
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%r15, %rdx
-	movq	%rdx, 24(%rdi)
-	testb	%al, %al
+	movq	%r11, %rax
+	sbbq	24(%rcx), %rax
+	sbbq	$0, %r14
+	testb	$1, %r14b
+	cmovneq	%r11, %rax
+	movq	%rax, 56(%rdi)
+	cmovneq	%r10, %rbx
+	movq	%rbx, 48(%rdi)
 	cmovneq	%r9, %rsi
-	movq	%rsi, 32(%rdi)
-	cmovneq	%r8, %rbx
-	movq	%rbx, 40(%rdi)
+	movq	%rsi, 40(%rdi)
+	cmovneq	%r8, %rdx
+	movq	%rdx, 32(%rdi)
 	popq	%rbx
 	popq	%r14
-	popq	%r15
 	retq
-.Lfunc_end48:
-	.size	mcl_fpDbl_add3Lbmi2, .Lfunc_end48-mcl_fpDbl_add3Lbmi2
-
-	.globl	mcl_fpDbl_sub3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub3Lbmi2,@function
-mcl_fpDbl_sub3Lbmi2:                    # @mcl_fpDbl_sub3Lbmi2
-# BB#0:
+.Lfunc_end37:
+	.size	mcl_fpDbl_add4Lbmi2, .Lfunc_end37-mcl_fpDbl_add4Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sub4Lbmi2             # -- Begin function mcl_fpDbl_sub4Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub4Lbmi2,@function
+mcl_fpDbl_sub4Lbmi2:                    # @mcl_fpDbl_sub4Lbmi2
+# %bb.0:
 	pushq	%r15
 	pushq	%r14
-	pushq	%r12
 	pushq	%rbx
-	movq	40(%rdx), %r10
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %r14
-	movq	(%rsi), %rbx
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %r9
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r11
+	movq	24(%rsi), %r15
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
 	movq	8(%rsi), %rax
 	xorl	%esi, %esi
-	subq	(%rdx), %rbx
+	subq	(%rdx), %r14
 	sbbq	8(%rdx), %rax
-	movq	24(%rdx), %r15
-	movq	32(%rdx), %r12
-	sbbq	16(%rdx), %r14
-	movq	%rbx, (%rdi)
+	sbbq	16(%rdx), %rbx
+	sbbq	24(%rdx), %r15
+	sbbq	32(%rdx), %r11
+	sbbq	40(%rdx), %r10
+	sbbq	48(%rdx), %r9
+	sbbq	56(%rdx), %r8
+	movq	%r15, 24(%rdi)
+	movq	%rbx, 16(%rdi)
 	movq	%rax, 8(%rdi)
-	movq	%r14, 16(%rdi)
-	sbbq	%r15, %r11
-	sbbq	%r12, %r9
-	sbbq	%r10, %r8
-	movl	$0, %eax
-	sbbq	$0, %rax
-	andl	$1, %eax
-	movq	(%rcx), %rdx
-	cmoveq	%rsi, %rdx
-	testb	%al, %al
-	movq	16(%rcx), %rax
-	cmoveq	%rsi, %rax
-	cmovneq	8(%rcx), %rsi
-	addq	%r11, %rdx
-	movq	%rdx, 24(%rdi)
-	adcq	%r9, %rsi
+	movq	%r14, (%rdi)
+	sbbq	%rsi, %rsi
+	andl	$1, %esi
+	negq	%rsi
+	movq	24(%rcx), %rax
+	andq	%rsi, %rax
+	movq	16(%rcx), %rdx
+	andq	%rsi, %rdx
+	movq	8(%rcx), %rbx
+	andq	%rsi, %rbx
+	andq	(%rcx), %rsi
+	addq	%r11, %rsi
 	movq	%rsi, 32(%rdi)
+	adcq	%r10, %rbx
+	movq	%rbx, 40(%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 48(%rdi)
 	adcq	%r8, %rax
-	movq	%rax, 40(%rdi)
+	movq	%rax, 56(%rdi)
 	popq	%rbx
-	popq	%r12
 	popq	%r14
 	popq	%r15
 	retq
-.Lfunc_end49:
-	.size	mcl_fpDbl_sub3Lbmi2, .Lfunc_end49-mcl_fpDbl_sub3Lbmi2
-
-	.globl	mcl_fp_mulUnitPre4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre4Lbmi2,@function
-mcl_fp_mulUnitPre4Lbmi2:                # @mcl_fp_mulUnitPre4Lbmi2
-# BB#0:
-	mulxq	24(%rsi), %r8, %r11
-	mulxq	16(%rsi), %r9, %rax
-	mulxq	8(%rsi), %r10, %rcx
-	mulxq	(%rsi), %rdx, %rsi
-	movq	%rdx, (%rdi)
-	addq	%r10, %rsi
-	movq	%rsi, 8(%rdi)
+.Lfunc_end38:
+	.size	mcl_fpDbl_sub4Lbmi2, .Lfunc_end38-mcl_fpDbl_sub4Lbmi2
+                                        # -- End function
+	.globl	mulPv384x64bmi2                 # -- Begin function mulPv384x64bmi2
+	.p2align	4, 0x90
+	.type	mulPv384x64bmi2,@function
+mulPv384x64bmi2:                        # @mulPv384x64bmi2
+# %bb.0:
+	movq	%rdi, %rax
+	mulxq	(%rsi), %rdi, %rcx
+	movq	%rdi, (%rax)
+	mulxq	8(%rsi), %rdi, %r8
+	addq	%rcx, %rdi
+	movq	%rdi, 8(%rax)
+	mulxq	16(%rsi), %rdi, %r9
+	adcq	%r8, %rdi
+	movq	%rdi, 16(%rax)
+	mulxq	24(%rsi), %rcx, %rdi
 	adcq	%r9, %rcx
-	movq	%rcx, 16(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 24(%rdi)
-	adcq	$0, %r11
-	movq	%r11, 32(%rdi)
+	movq	%rcx, 24(%rax)
+	mulxq	32(%rsi), %rcx, %r8
+	adcq	%rdi, %rcx
+	movq	%rcx, 32(%rax)
+	mulxq	40(%rsi), %rcx, %rdx
+	adcq	%r8, %rcx
+	movq	%rcx, 40(%rax)
+	adcq	$0, %rdx
+	movq	%rdx, 48(%rax)
 	retq
-.Lfunc_end50:
-	.size	mcl_fp_mulUnitPre4Lbmi2, .Lfunc_end50-mcl_fp_mulUnitPre4Lbmi2
-
-	.globl	mcl_fpDbl_mulPre4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre4Lbmi2,@function
-mcl_fpDbl_mulPre4Lbmi2:                 # @mcl_fpDbl_mulPre4Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
+.Lfunc_end39:
+	.size	mulPv384x64bmi2, .Lfunc_end39-mulPv384x64bmi2
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre6Lbmi2         # -- Begin function mcl_fp_mulUnitPre6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre6Lbmi2,@function
+mcl_fp_mulUnitPre6Lbmi2:                # @mcl_fp_mulUnitPre6Lbmi2
+# %bb.0:
+	pushq	%r15
+	pushq	%r14
 	pushq	%r12
 	pushq	%rbx
-	movq	(%rsi), %r14
-	movq	8(%rsi), %r10
-	movq	(%rdx), %rcx
-	movq	%rdx, %rbp
-	movq	%r14, %rdx
-	mulxq	%rcx, %rdx, %r15
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %r9
+	mulxq	40(%rsi), %r8, %r11
+	mulxq	32(%rsi), %r9, %r12
+	mulxq	24(%rsi), %r10, %rcx
+	mulxq	16(%rsi), %r14, %rbx
+	mulxq	8(%rsi), %r15, %rax
+	mulxq	(%rsi), %rdx, %rsi
 	movq	%rdx, (%rdi)
-	movq	%r10, %rdx
-	mulxq	%rcx, %rbx, %r12
-	addq	%r15, %rbx
-	movq	%r9, %rdx
-	mulxq	%rcx, %r13, %r15
-	adcq	%r12, %r13
-	movq	%r11, %rdx
-	mulxq	%rcx, %rcx, %r12
-	adcq	%r15, %rcx
-	adcq	$0, %r12
-	movq	8(%rbp), %rax
-	movq	%r14, %rdx
-	mulxq	%rax, %r8, %rdx
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	addq	%rbx, %r8
-	movq	%r10, %rdx
-	mulxq	%rax, %r15, %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	adcq	%r13, %r15
-	movq	%r9, %rdx
-	mulxq	%rax, %rbx, %r13
-	adcq	%rcx, %rbx
-	movq	%r11, %rdx
-	mulxq	%rax, %rcx, %rax
-	adcq	%r12, %rcx
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-8(%rsp), %r15          # 8-byte Folded Reload
-	adcq	-16(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	%r13, %rcx
-	movq	%r8, 8(%rdi)
-	adcq	%rax, %r12
-	movq	%rbp, %r13
-	movq	16(%r13), %rax
-	movq	%r14, %rdx
-	mulxq	%rax, %rdx, %r8
-	addq	%r15, %rdx
-	movq	%rdx, 16(%rdi)
-	movq	%r10, %rdx
-	mulxq	%rax, %rbp, %r10
-	adcq	%rbx, %rbp
-	movq	%r11, %rdx
-	mulxq	%rax, %r14, %r11
-	movq	%r9, %rdx
-	mulxq	%rax, %r15, %rdx
-	adcq	%rcx, %r15
-	adcq	%r12, %r14
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r8, %rbp
-	adcq	%r10, %r15
-	adcq	%rdx, %r14
-	adcq	%r11, %rcx
-	movq	24(%r13), %rdx
-	mulxq	24(%rsi), %rbx, %r8
-	mulxq	(%rsi), %rax, %r9
-	addq	%rbp, %rax
-	mulxq	16(%rsi), %rbp, %r10
-	mulxq	8(%rsi), %rsi, %rdx
-	movq	%rax, 24(%rdi)
-	adcq	%r15, %rsi
-	adcq	%r14, %rbp
-	adcq	%rcx, %rbx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r9, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%rdx, %rbp
-	movq	%rbp, 40(%rdi)
+	addq	%r15, %rsi
+	movq	%rsi, 8(%rdi)
+	adcq	%r14, %rax
+	movq	%rax, 16(%rdi)
 	adcq	%r10, %rbx
-	movq	%rbx, 48(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 56(%rdi)
+	movq	%rbx, 24(%rdi)
+	adcq	%r9, %rcx
+	movq	%rcx, 32(%rdi)
+	adcq	%r8, %r12
+	movq	%r12, 40(%rdi)
+	adcq	$0, %r11
+	movq	%r11, 48(%rdi)
 	popq	%rbx
 	popq	%r12
-	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-.Lfunc_end51:
-	.size	mcl_fpDbl_mulPre4Lbmi2, .Lfunc_end51-mcl_fpDbl_mulPre4Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre4Lbmi2,@function
-mcl_fpDbl_sqrPre4Lbmi2:                 # @mcl_fpDbl_sqrPre4Lbmi2
-# BB#0:
+.Lfunc_end40:
+	.size	mcl_fp_mulUnitPre6Lbmi2, .Lfunc_end40-mcl_fp_mulUnitPre6Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre6Lbmi2          # -- Begin function mcl_fpDbl_mulPre6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre6Lbmi2,@function
+mcl_fpDbl_mulPre6Lbmi2:                 # @mcl_fpDbl_mulPre6Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	24(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rax
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rdx, %r11
-	movq	%rdx, (%rdi)
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r13
+	movq	(%rdx), %rcx
+	movq	%rdx, %r12
+	movq	%rdx, -40(%rsp)                 # 8-byte Spill
 	movq	%r9, %rdx
-	mulxq	%rcx, %rbp, %r10
-	movq	%rbp, -16(%rsp)         # 8-byte Spill
-	movq	%r10, -8(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	mulxq	%rcx, %r12, %r15
-	addq	%r12, %r11
-	movq	%r15, %rbx
-	adcq	%rbp, %rbx
-	movq	%r8, %rdx
-	mulxq	%rcx, %rcx, %r13
-	adcq	%r10, %rcx
-	adcq	$0, %r13
-	addq	%r12, %r11
+	movq	%r9, -24(%rsp)                  # 8-byte Spill
+	mulxq	%rcx, %r8, %rax
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	16(%rsi), %rax
+	movq	%rax, -32(%rsp)                 # 8-byte Spill
+	movq	24(%rsi), %rbx
+	movq	%rbx, -80(%rsp)                 # 8-byte Spill
+	movq	32(%rsi), %rbp
+	movq	%rbp, -72(%rsp)                 # 8-byte Spill
+	movq	40(%rsi), %rdx
+	movq	%r8, (%rdi)
+	movq	%rdi, %r15
+	movq	%rdi, -16(%rsp)                 # 8-byte Spill
+	movq	%rdx, %r8
+	movq	%rdx, -48(%rsp)                 # 8-byte Spill
+	mulxq	%rcx, %rdx, %rsi
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
+	movq	%rbp, %rdx
+	mulxq	%rcx, %r10, %r14
+	movq	%rbx, %rdx
+	mulxq	%rcx, %r11, %rdi
 	movq	%rax, %rdx
-	mulxq	%rax, %rbp, %r12
+	mulxq	%rcx, %rbx, %rax
+	movq	%r13, %rdx
+	movq	%r13, -64(%rsp)                 # 8-byte Spill
+	mulxq	%rcx, %rcx, %rbp
+	addq	-112(%rsp), %rcx                # 8-byte Folded Reload
 	adcq	%rbx, %rbp
-	movq	%r8, %rdx
-	mulxq	%rax, %r10, %rbx
-	movq	%r9, %rdx
-	mulxq	%rax, %r14, %rdx
-	adcq	%r14, %rcx
-	adcq	%r13, %r10
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r15, %rbp
+	adcq	%r11, %rax
+	adcq	%r10, %rdi
+	adcq	-104(%rsp), %r14                # 8-byte Folded Reload
+	adcq	$0, %rsi
+	movq	%rsi, -96(%rsp)                 # 8-byte Spill
+	movq	8(%r12), %rdx
+	mulxq	%r9, %rbx, %rsi
+	movq	%rsi, -88(%rsp)                 # 8-byte Spill
+	addq	%rcx, %rbx
+	movq	%rbx, 8(%r15)
+	mulxq	%r8, %r10, %rcx
+	movq	%rcx, -104(%rsp)                # 8-byte Spill
+	movq	-72(%rsp), %rcx                 # 8-byte Reload
+	mulxq	%rcx, %r9, %rbx
+	movq	%rbx, -112(%rsp)                # 8-byte Spill
+	movq	-80(%rsp), %r12                 # 8-byte Reload
+	mulxq	%r12, %r11, %rsi
+	mulxq	-32(%rsp), %r8, %r15            # 8-byte Folded Reload
+	mulxq	%r13, %rbx, %rdx
+	adcq	%rbp, %rbx
+	adcq	%rax, %r8
+	adcq	%rdi, %r11
+	adcq	%r14, %r9
+	adcq	-96(%rsp), %r10                 # 8-byte Folded Reload
+	setb	%al
+	addq	-88(%rsp), %rbx                 # 8-byte Folded Reload
+	adcq	%rdx, %r8
+	adcq	%r15, %r11
+	adcq	%rsi, %r9
+	adcq	-112(%rsp), %r10                # 8-byte Folded Reload
+	movzbl	%al, %r13d
+	adcq	-104(%rsp), %r13                # 8-byte Folded Reload
+	movq	-40(%rsp), %r15                 # 8-byte Reload
+	movq	16(%r15), %rdx
+	mulxq	-48(%rsp), %rsi, %rax           # 8-byte Folded Reload
+	movq	%rsi, -104(%rsp)                # 8-byte Spill
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
+	mulxq	%rcx, %rax, %r14
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	mulxq	%r12, %rax, %rbp
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	mulxq	-64(%rsp), %rcx, %r12           # 8-byte Folded Reload
+	mulxq	-24(%rsp), %rax, %rsi           # 8-byte Folded Reload
+	addq	%rcx, %rsi
+	mulxq	-32(%rsp), %rcx, %rdi           # 8-byte Folded Reload
 	adcq	%r12, %rcx
-	adcq	%rdx, %r10
-	movq	%rdx, %r12
-	adcq	%rbx, %rax
-	movq	%r11, 8(%rdi)
-	addq	-16(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 16(%rdi)
-	movq	%r8, %rdx
-	mulxq	%r9, %r11, %r8
-	movq	%r9, %rdx
-	mulxq	%r9, %r15, %rdx
-	adcq	%r14, %rcx
-	adcq	%r10, %r15
-	adcq	%rax, %r11
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	-8(%rsp), %rcx          # 8-byte Folded Reload
-	adcq	%r12, %r15
-	adcq	%rdx, %r11
-	adcq	%r8, %rax
-	movq	24(%rsi), %rdx
-	mulxq	16(%rsi), %rbx, %r8
-	mulxq	8(%rsi), %rbp, %r9
-	mulxq	(%rsi), %rsi, %r10
+	adcq	-96(%rsp), %rdi                 # 8-byte Folded Reload
+	adcq	-112(%rsp), %rbp                # 8-byte Folded Reload
+	adcq	-104(%rsp), %r14                # 8-byte Folded Reload
+	movq	-88(%rsp), %r12                 # 8-byte Reload
+	adcq	$0, %r12
+	addq	%rbx, %rax
+	movq	-16(%rsp), %rdx                 # 8-byte Reload
+	movq	%rax, 16(%rdx)
+	adcq	%r8, %rsi
+	adcq	%r11, %rcx
+	adcq	%r9, %rdi
+	adcq	%r10, %rbp
+	adcq	%r13, %r14
+	adcq	$0, %r12
+	movq	%r12, -88(%rsp)                 # 8-byte Spill
+	movq	24(%r15), %rdx
+	movq	-48(%rsp), %r15                 # 8-byte Reload
+	mulxq	%r15, %rbx, %rax
+	movq	%rbx, -96(%rsp)                 # 8-byte Spill
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	mulxq	-72(%rsp), %rbx, %rax           # 8-byte Folded Reload
+	movq	%rbx, -56(%rsp)                 # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	mulxq	-80(%rsp), %rax, %r11           # 8-byte Folded Reload
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	mulxq	-64(%rsp), %r8, %r12            # 8-byte Folded Reload
+	mulxq	-24(%rsp), %rax, %rbx           # 8-byte Folded Reload
+	addq	%r8, %rbx
+	movq	-32(%rsp), %r13                 # 8-byte Reload
+	mulxq	%r13, %r9, %r10
+	adcq	%r12, %r9
+	adcq	-8(%rsp), %r10                  # 8-byte Folded Reload
+	adcq	-56(%rsp), %r11                 # 8-byte Folded Reload
+	movq	-112(%rsp), %r8                 # 8-byte Reload
+	adcq	-96(%rsp), %r8                  # 8-byte Folded Reload
+	movq	-104(%rsp), %r12                # 8-byte Reload
+	adcq	$0, %r12
+	addq	%rsi, %rax
+	movq	-16(%rsp), %rdx                 # 8-byte Reload
+	movq	%rax, 24(%rdx)
+	adcq	%rcx, %rbx
+	adcq	%rdi, %r9
+	adcq	%rbp, %r10
+	adcq	%r14, %r11
+	adcq	-88(%rsp), %r8                  # 8-byte Folded Reload
+	movq	%r8, -112(%rsp)                 # 8-byte Spill
+	adcq	$0, %r12
+	movq	%r12, -104(%rsp)                # 8-byte Spill
+	movq	-40(%rsp), %rax                 # 8-byte Reload
+	movq	32(%rax), %rdx
+	mulxq	%r15, %rcx, %rax
+	movq	%rcx, -88(%rsp)                 # 8-byte Spill
+	mulxq	-72(%rsp), %rcx, %r14           # 8-byte Folded Reload
+	movq	%rcx, -96(%rsp)                 # 8-byte Spill
+	mulxq	-80(%rsp), %rcx, %rbp           # 8-byte Folded Reload
+	movq	%rcx, -56(%rsp)                 # 8-byte Spill
+	mulxq	-64(%rsp), %rdi, %r15           # 8-byte Folded Reload
+	movq	-24(%rsp), %r12                 # 8-byte Reload
+	mulxq	%r12, %rcx, %rsi
+	addq	%rdi, %rsi
+	mulxq	%r13, %rdi, %r8
+	adcq	%r15, %rdi
+	adcq	-56(%rsp), %r8                  # 8-byte Folded Reload
+	adcq	-96(%rsp), %rbp                 # 8-byte Folded Reload
+	adcq	-88(%rsp), %r14                 # 8-byte Folded Reload
+	adcq	$0, %rax
+	addq	%rbx, %rcx
+	movq	-16(%rsp), %r15                 # 8-byte Reload
+	movq	%rcx, 32(%r15)
+	adcq	%r9, %rsi
+	adcq	%r10, %rdi
+	adcq	%r11, %r8
+	adcq	-112(%rsp), %rbp                # 8-byte Folded Reload
+	movq	-40(%rsp), %rcx                 # 8-byte Reload
+	movq	40(%rcx), %rdx
+	adcq	-104(%rsp), %r14                # 8-byte Folded Reload
+	mulxq	-64(%rsp), %rbx, %r9            # 8-byte Folded Reload
+	mulxq	%r12, %rcx, %r11
+	adcq	$0, %rax
+	addq	%rbx, %r11
+	mulxq	%r13, %r12, %r10
+	adcq	%r9, %r12
+	mulxq	-80(%rsp), %r13, %r9            # 8-byte Folded Reload
+	adcq	%r10, %r13
+	mulxq	-72(%rsp), %rbx, %r10           # 8-byte Folded Reload
+	adcq	%r9, %rbx
+	mulxq	-48(%rsp), %rdx, %r9            # 8-byte Folded Reload
+	adcq	%r10, %rdx
+	adcq	$0, %r9
 	addq	%rcx, %rsi
-	movq	%rsi, 24(%rdi)
-	adcq	%r15, %rbp
-	adcq	%r11, %rbx
-	mulxq	%rdx, %rdx, %rcx
+	movq	%rsi, 40(%r15)
+	adcq	%rdi, %r11
+	movq	%r11, 48(%r15)
+	adcq	%r8, %r12
+	movq	%r12, 56(%r15)
+	adcq	%rbp, %r13
+	movq	%r13, 64(%r15)
+	adcq	%r14, %rbx
+	movq	%rbx, 72(%r15)
 	adcq	%rax, %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r10, %rbp
-	movq	%rbp, 32(%rdi)
-	adcq	%r9, %rbx
-	movq	%rbx, 40(%rdi)
-	adcq	%r8, %rdx
-	movq	%rdx, 48(%rdi)
-	adcq	%rcx, %rax
-	movq	%rax, 56(%rdi)
+	movq	%rdx, 80(%r15)
+	adcq	$0, %r9
+	movq	%r9, 88(%r15)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -2053,191 +2603,205 @@ mcl_fpDbl_sqrPre4Lbmi2:                 # @mcl_fpDbl_sqrPre4Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end52:
-	.size	mcl_fpDbl_sqrPre4Lbmi2, .Lfunc_end52-mcl_fpDbl_sqrPre4Lbmi2
-
-	.globl	mcl_fp_mont4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont4Lbmi2,@function
-mcl_fp_mont4Lbmi2:                      # @mcl_fp_mont4Lbmi2
-# BB#0:
+.Lfunc_end41:
+	.size	mcl_fpDbl_mulPre6Lbmi2, .Lfunc_end41-mcl_fpDbl_mulPre6Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre6Lbmi2          # -- Begin function mcl_fpDbl_sqrPre6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre6Lbmi2,@function
+mcl_fpDbl_sqrPre6Lbmi2:                 # @mcl_fpDbl_sqrPre6Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rdi, -88(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %rdi
-	movq	%rdi, -40(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rax
-	movq	%rdi, %rdx
-	mulxq	%rax, %r10, %r15
-	movq	16(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rbx, %r11
+	subq	$168, %rsp
+	movq	%rdi, -48(%rsp)                 # 8-byte Spill
+	movq	40(%rsi), %rdx
+	movq	32(%rsi), %rcx
+	mulxq	%rcx, %rax, %rdi
+	movq	%rdi, -104(%rsp)                # 8-byte Spill
+	movq	%rax, -128(%rsp)                # 8-byte Spill
+	movq	24(%rsi), %rax
+	mulxq	%rax, %r14, %r13
+	movq	%r14, -112(%rsp)                # 8-byte Spill
+	movq	%r13, -64(%rsp)                 # 8-byte Spill
+	movq	16(%rsi), %r10
+	mulxq	%r10, %r8, %r11
+	movq	%r8, 24(%rsp)                   # 8-byte Spill
+	movq	%r11, -88(%rsp)                 # 8-byte Spill
 	movq	(%rsi), %rdi
-	movq	%rdi, -56(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rdx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rbp, %r14
-	movq	%rdi, %rdx
-	mulxq	%rax, %r13, %r12
-	addq	%rbp, %r12
-	adcq	%rbx, %r14
-	adcq	%r10, %r11
-	adcq	$0, %r15
-	movq	-8(%rcx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	(%rcx), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	movq	%r13, %rdx
-	imulq	%rax, %rdx
-	movq	24(%rcx), %rsi
-	movq	%rsi, -72(%rsp)         # 8-byte Spill
-	movq	16(%rcx), %rbp
-	movq	%rbp, -8(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	mulxq	%rsi, %r10, %r8
-	mulxq	%rbp, %r9, %rbx
-	mulxq	%rax, %rsi, %rcx
-	mulxq	%rdi, %rdx, %rbp
+	movq	%rdi, -96(%rsp)                 # 8-byte Spill
+	movq	8(%rsi), %r15
+	mulxq	%r15, %r9, %r12
+	movq	%r9, 40(%rsp)                   # 8-byte Spill
+	mulxq	%rdi, %rsi, %rbx
+	movq	%rsi, -56(%rsp)                 # 8-byte Spill
+	mulxq	%rdx, %rbp, %rdx
+	movq	%rbx, %rdi
+	addq	%r9, %rdi
+	movq	%rdi, 120(%rsp)                 # 8-byte Spill
+	movq	%r12, %rdi
+	adcq	%r8, %rdi
+	movq	%rdi, 128(%rsp)                 # 8-byte Spill
+	movq	%r11, %rdi
+	adcq	%r14, %rdi
+	movq	%rdi, 136(%rsp)                 # 8-byte Spill
+	adcq	-128(%rsp), %r13                # 8-byte Folded Reload
+	movq	%r13, 144(%rsp)                 # 8-byte Spill
+	movq	-104(%rsp), %r9                 # 8-byte Reload
+	adcq	%r9, %rbp
+	movq	%rbp, 152(%rsp)                 # 8-byte Spill
+	adcq	$0, %rdx
+	movq	%rdx, 160(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rdx
+	mulxq	%rax, %rdx, %r14
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rcx, %rdx
+	mulxq	%r10, %r13, %r11
+	movq	%r13, -16(%rsp)                 # 8-byte Spill
+	movq	%r11, -80(%rsp)                 # 8-byte Spill
+	mulxq	%r15, %rsi, %rdi
+	movq	%rsi, 16(%rsp)                  # 8-byte Spill
+	movq	%rdi, -72(%rsp)                 # 8-byte Spill
+	mulxq	-96(%rsp), %rdx, %r8            # 8-byte Folded Reload
+	movq	%rdx, 32(%rsp)                  # 8-byte Spill
+	movq	%rcx, %rdx
+	mulxq	%rcx, %rdx, %rcx
+	movq	%r8, %rbp
 	addq	%rsi, %rbp
-	adcq	%r9, %rcx
-	adcq	%r10, %rbx
-	adcq	$0, %r8
-	addq	%r13, %rdx
-	adcq	%r12, %rbp
-	adcq	%r14, %rcx
-	adcq	%r11, %rbx
-	adcq	%r15, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	movq	8(%rdx), %rdx
-	mulxq	-40(%rsp), %r12, %r14   # 8-byte Folded Reload
-	mulxq	-48(%rsp), %r15, %r11   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r9, %rdi    # 8-byte Folded Reload
-	mulxq	-56(%rsp), %r10, %rsi   # 8-byte Folded Reload
-	addq	%r9, %rsi
-	adcq	%r15, %rdi
-	adcq	%r12, %r11
-	adcq	$0, %r14
-	addq	%rbp, %r10
-	adcq	%rcx, %rsi
-	adcq	%rbx, %rdi
-	adcq	%r8, %r11
-	adcq	%rax, %r14
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	movq	%r10, %rdx
-	imulq	-16(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-72(%rsp), %r15, %r9    # 8-byte Folded Reload
-	mulxq	-8(%rsp), %r12, %r8     # 8-byte Folded Reload
-	movq	-80(%rsp), %r13         # 8-byte Reload
-	mulxq	%r13, %rbp, %rcx
-	mulxq	-24(%rsp), %rdx, %rax   # 8-byte Folded Reload
-	addq	%rbp, %rax
-	adcq	%r12, %rcx
-	adcq	%r15, %r8
+	movq	%rbp, 96(%rsp)                  # 8-byte Spill
+	adcq	%r13, %rdi
+	movq	%rdi, 88(%rsp)                  # 8-byte Spill
+	adcq	-120(%rsp), %r11                # 8-byte Folded Reload
+	movq	%r11, 80(%rsp)                  # 8-byte Spill
+	adcq	%r14, %rdx
+	movq	%rdx, 104(%rsp)                 # 8-byte Spill
+	adcq	-128(%rsp), %rcx                # 8-byte Folded Reload
+	movq	%rcx, 112(%rsp)                 # 8-byte Spill
 	adcq	$0, %r9
-	addq	%r10, %rdx
-	adcq	%rsi, %rax
-	adcq	%rdi, %rcx
-	adcq	%r11, %r8
-	adcq	%r14, %r9
-	adcq	$0, %rbx
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	movq	16(%rdx), %rdx
-	mulxq	-40(%rsp), %r15, %r11   # 8-byte Folded Reload
-	mulxq	-48(%rsp), %r12, %r14   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %rsi, %rbp   # 8-byte Folded Reload
-	mulxq	-56(%rsp), %r10, %rdi   # 8-byte Folded Reload
-	addq	%rsi, %rdi
-	adcq	%r12, %rbp
-	adcq	%r15, %r14
-	adcq	$0, %r11
-	addq	%rax, %r10
-	adcq	%rcx, %rdi
-	adcq	%r8, %rbp
-	adcq	%r9, %r14
-	adcq	%rbx, %r11
-	sbbq	%rbx, %rbx
+	movq	%r9, -104(%rsp)                 # 8-byte Spill
+	movq	%rax, %rdx
+	mulxq	%r10, %rdi, %r13
+	mulxq	%r15, %rbp, %rcx
+	movq	%rbp, -24(%rsp)                 # 8-byte Spill
+	movq	%rcx, -128(%rsp)                # 8-byte Spill
+	movq	-96(%rsp), %r11                 # 8-byte Reload
+	mulxq	%r11, %rdx, %r9
+	movq	%rdx, -8(%rsp)                  # 8-byte Spill
+	movq	%rax, %rdx
+	mulxq	%rax, %rdx, %rax
+	movq	%r9, %rsi
+	addq	%rbp, %rsi
+	movq	%rsi, 56(%rsp)                  # 8-byte Spill
+	adcq	%rdi, %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	adcq	%r13, %rdx
+	movq	%rdx, 64(%rsp)                  # 8-byte Spill
+	movq	%r13, %rbp
+	adcq	-120(%rsp), %rax                # 8-byte Folded Reload
+	movq	%rax, 72(%rsp)                  # 8-byte Spill
+	adcq	-112(%rsp), %r14                # 8-byte Folded Reload
+	movq	%r14, -120(%rsp)                # 8-byte Spill
+	adcq	$0, -64(%rsp)                   # 8-byte Folded Spill
 	movq	%r10, %rdx
-	imulq	-16(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	%r13, %rcx, %rsi
-	mulxq	-24(%rsp), %r8, %rax    # 8-byte Folded Reload
-	addq	%rcx, %rax
-	mulxq	-8(%rsp), %rcx, %r15    # 8-byte Folded Reload
+	mulxq	%r15, %r13, %rsi
+	mulxq	%r11, %rcx, %rax
+	movq	%rcx, -32(%rsp)                 # 8-byte Spill
+	mulxq	%r10, %rcx, %r10
+	movq	%rax, %rdx
+	addq	%r13, %rdx
+	movq	%rdx, (%rsp)                    # 8-byte Spill
 	adcq	%rsi, %rcx
-	movq	-72(%rsp), %r13         # 8-byte Reload
-	mulxq	%r13, %r9, %rsi
-	adcq	%r15, %r9
-	adcq	$0, %rsi
-	andl	$1, %ebx
-	addq	%r10, %r8
-	adcq	%rdi, %rax
-	adcq	%rbp, %rcx
-	adcq	%r14, %r9
-	adcq	%r11, %rsi
+	movq	%rcx, -40(%rsp)                 # 8-byte Spill
+	adcq	%rdi, %r10
+	movq	%r10, 8(%rsp)                   # 8-byte Spill
+	adcq	-16(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%rbp, -112(%rsp)                # 8-byte Spill
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	%rcx, -80(%rsp)                 # 8-byte Folded Spill
+	adcq	$0, -88(%rsp)                   # 8-byte Folded Spill
+	movq	%r15, %rdx
+	mulxq	%r15, %r14, %rdi
+	mulxq	%r11, %r10, %rcx
+	addq	%rcx, %r14
+	adcq	%r13, %rdi
+	adcq	-24(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	16(%rsp), %rdx                  # 8-byte Reload
+	adcq	%rdx, -128(%rsp)                # 8-byte Folded Spill
+	movq	40(%rsp), %rdx                  # 8-byte Reload
+	adcq	%rdx, -72(%rsp)                 # 8-byte Folded Spill
+	movq	%r11, %rdx
+	mulxq	%r11, %rdx, %r11
+	movq	-48(%rsp), %rbp                 # 8-byte Reload
+	movq	%rdx, (%rbp)
+	adcq	$0, %r12
+	addq	%r10, %r11
+	movq	-32(%rsp), %rdx                 # 8-byte Reload
+	adcq	%rdx, %rcx
+	movq	-8(%rsp), %r15                  # 8-byte Reload
+	adcq	%r15, %rax
+	movq	32(%rsp), %rbp                  # 8-byte Reload
+	adcq	%rbp, %r9
+	adcq	-56(%rsp), %r8                  # 8-byte Folded Reload
 	adcq	$0, %rbx
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	movq	24(%rdx), %rdx
-	mulxq	-40(%rsp), %r11, %r8    # 8-byte Folded Reload
-	mulxq	-48(%rsp), %r15, %rdi   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r12, %r14   # 8-byte Folded Reload
-	mulxq	-56(%rsp), %r10, %rbp   # 8-byte Folded Reload
-	addq	%r12, %rbp
-	adcq	%r15, %r14
-	adcq	%r11, %rdi
-	adcq	$0, %r8
-	addq	%rax, %r10
-	adcq	%rcx, %rbp
-	adcq	%r9, %r14
-	adcq	%rsi, %rdi
-	adcq	%rbx, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	imulq	%r10, %rdx
-	mulxq	%r13, %rcx, %rsi
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	mulxq	-8(%rsp), %r11, %rbx    # 8-byte Folded Reload
-	mulxq	-80(%rsp), %r15, %rcx   # 8-byte Folded Reload
-	movq	-24(%rsp), %r9          # 8-byte Reload
-	mulxq	%r9, %r12, %r13
-	addq	%r15, %r13
-	adcq	%r11, %rcx
-	adcq	-16(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	$0, %rsi
-	addq	%r10, %r12
-	adcq	%rbp, %r13
+	addq	%r10, %r11
 	adcq	%r14, %rcx
-	adcq	%rdi, %rbx
-	adcq	%r8, %rsi
+	adcq	%rdi, %rax
+	adcq	%rsi, %r9
+	adcq	-128(%rsp), %r8                 # 8-byte Folded Reload
+	adcq	-72(%rsp), %rbx                 # 8-byte Folded Reload
+	adcq	$0, %r12
+	addq	%rdx, %rcx
+	adcq	(%rsp), %rax                    # 8-byte Folded Reload
+	adcq	-40(%rsp), %r9                  # 8-byte Folded Reload
+	adcq	8(%rsp), %r8                    # 8-byte Folded Reload
+	adcq	-112(%rsp), %rbx                # 8-byte Folded Reload
+	adcq	-80(%rsp), %r12                 # 8-byte Folded Reload
+	movq	-88(%rsp), %rsi                 # 8-byte Reload
+	adcq	$0, %rsi
+	addq	%r15, %rax
+	adcq	56(%rsp), %r9                   # 8-byte Folded Reload
+	adcq	48(%rsp), %r8                   # 8-byte Folded Reload
+	adcq	64(%rsp), %rbx                  # 8-byte Folded Reload
+	adcq	72(%rsp), %r12                  # 8-byte Folded Reload
+	adcq	-120(%rsp), %rsi                # 8-byte Folded Reload
+	movq	-64(%rsp), %rdi                 # 8-byte Reload
+	adcq	$0, %rdi
+	addq	%rbp, %r9
+	adcq	96(%rsp), %r8                   # 8-byte Folded Reload
+	adcq	88(%rsp), %rbx                  # 8-byte Folded Reload
+	adcq	80(%rsp), %r12                  # 8-byte Folded Reload
+	adcq	104(%rsp), %rsi                 # 8-byte Folded Reload
+	adcq	112(%rsp), %rdi                 # 8-byte Folded Reload
+	movq	-104(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	-56(%rsp), %r8                  # 8-byte Folded Reload
+	movq	-48(%rsp), %rbp                 # 8-byte Reload
+	movq	%r11, 8(%rbp)
+	movq	%rcx, 16(%rbp)
+	movq	%rax, 24(%rbp)
+	movq	%r9, 32(%rbp)
+	movq	%r8, 40(%rbp)
+	adcq	120(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%rbx, 48(%rbp)
+	adcq	128(%rsp), %r12                 # 8-byte Folded Reload
+	movq	%r12, 56(%rbp)
+	movq	%rsi, %rax
+	adcq	136(%rsp), %rax                 # 8-byte Folded Reload
+	movq	%rax, 64(%rbp)
+	movq	%rdi, %rax
+	adcq	144(%rsp), %rax                 # 8-byte Folded Reload
+	movq	%rax, 72(%rbp)
+	movq	%rdx, %rax
+	adcq	152(%rsp), %rax                 # 8-byte Folded Reload
+	movq	%rax, 80(%rbp)
+	movq	160(%rsp), %rax                 # 8-byte Reload
 	adcq	$0, %rax
-	movq	%r13, %rdi
-	subq	%r9, %rdi
-	movq	%rcx, %rbp
-	sbbq	-80(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbx, %r8
-	sbbq	-8(%rsp), %r8           # 8-byte Folded Reload
-	movq	%rsi, %rdx
-	sbbq	-72(%rsp), %rdx         # 8-byte Folded Reload
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rsi, %rdx
-	testb	%al, %al
-	cmovneq	%r13, %rdi
-	movq	-88(%rsp), %rax         # 8-byte Reload
-	movq	%rdi, (%rax)
-	cmovneq	%rcx, %rbp
-	movq	%rbp, 8(%rax)
-	cmovneq	%rbx, %r8
-	movq	%r8, 16(%rax)
-	movq	%rdx, 24(%rax)
+	movq	%rax, 88(%rbp)
+	addq	$168, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -2245,184 +2809,385 @@ mcl_fp_mont4Lbmi2:                      # @mcl_fp_mont4Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end53:
-	.size	mcl_fp_mont4Lbmi2, .Lfunc_end53-mcl_fp_mont4Lbmi2
-
-	.globl	mcl_fp_montNF4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF4Lbmi2,@function
-mcl_fp_montNF4Lbmi2:                    # @mcl_fp_montNF4Lbmi2
-# BB#0:
+.Lfunc_end42:
+	.size	mcl_fpDbl_sqrPre6Lbmi2, .Lfunc_end42-mcl_fpDbl_sqrPre6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_mont6Lbmi2               # -- Begin function mcl_fp_mont6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mont6Lbmi2,@function
+mcl_fp_mont6Lbmi2:                      # @mcl_fp_mont6Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rdi, -80(%rsp)         # 8-byte Spill
-	movq	(%rsi), %rdi
-	movq	%rdi, -64(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rbp
-	movq	%rbp, -72(%rsp)         # 8-byte Spill
+	subq	$32, %rsp
+	movq	%rdx, -80(%rsp)                 # 8-byte Spill
+	movq	%rdi, 24(%rsp)                  # 8-byte Spill
+	movq	40(%rsi), %rdi
+	movq	%rdi, -88(%rsp)                 # 8-byte Spill
 	movq	(%rdx), %rax
-	movq	%rdx, %r15
-	movq	%rbp, %rdx
-	mulxq	%rax, %rbp, %r9
 	movq	%rdi, %rdx
-	mulxq	%rax, %r12, %rbx
-	movq	16(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	addq	%rbp, %rbx
-	mulxq	%rax, %r14, %rbp
-	adcq	%r9, %r14
+	mulxq	%rax, %r8, %rbx
+	movq	32(%rsi), %rdx
+	movq	%rdx, -96(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r11, %rdi
 	movq	24(%rsi), %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r8, %rdi
-	adcq	%rbp, %r8
-	adcq	$0, %rdi
-	movq	-8(%rcx), %r13
-	movq	(%rcx), %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%r12, %rdx
-	imulq	%r13, %rdx
-	mulxq	%rax, %rax, %r11
-	addq	%r12, %rax
-	movq	8(%rcx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rbp, %r10
-	adcq	%rbx, %rbp
+	movq	%rdx, -72(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r13, %r12
+	movq	16(%rsi), %rdx
+	movq	%rdx, -8(%rsp)                  # 8-byte Spill
+	mulxq	%rax, %r14, %r15
+	movq	(%rsi), %rbp
+	movq	%rbp, -16(%rsp)                 # 8-byte Spill
+	movq	8(%rsi), %rdx
+	movq	%rdx, -24(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rsi, %r10
+	movq	%rbp, %rdx
+	mulxq	%rax, %rax, %r9
+	movq	%rax, -120(%rsp)                # 8-byte Spill
+	addq	%rsi, %r9
+	adcq	%r14, %r10
+	adcq	%r13, %r15
+	adcq	%r11, %r12
+	adcq	%r8, %rdi
+	movq	%rdi, -112(%rsp)                # 8-byte Spill
+	adcq	$0, %rbx
+	movq	%rbx, -128(%rsp)                # 8-byte Spill
+	movq	-8(%rcx), %rdx
+	movq	%rdx, 8(%rsp)                   # 8-byte Spill
+	imulq	%rax, %rdx
+	movq	40(%rcx), %rax
+	movq	%rax, -32(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r13, %rbp
 	movq	16(%rcx), %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rsi, %rbx
-	adcq	%r14, %rsi
+	movq	%rax, -40(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r8, %r14
+	movq	8(%rcx), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	mulxq	%rax, %rax, %r11
+	movq	(%rcx), %rsi
+	movq	%rsi, -48(%rsp)                 # 8-byte Spill
+	mulxq	%rsi, %rsi, %rdi
+	addq	%rax, %rdi
+	adcq	%r8, %r11
 	movq	24(%rcx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	mulxq	%rax, %rcx, %rdx
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rbx, %r8
+	adcq	%r14, %rbx
+	movq	32(%rcx), %rax
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rcx, %rax
 	adcq	%r8, %rcx
-	adcq	$0, %rdi
-	addq	%r11, %rbp
-	adcq	%r10, %rsi
-	adcq	%rbx, %rcx
-	adcq	%rdx, %rdi
-	movq	8(%r15), %rdx
-	movq	-72(%rsp), %r12         # 8-byte Reload
-	mulxq	%r12, %rbx, %r9
-	movq	-64(%rsp), %r15         # 8-byte Reload
-	mulxq	%r15, %r10, %r11
-	addq	%rbx, %r11
-	mulxq	-48(%rsp), %rax, %r8    # 8-byte Folded Reload
-	adcq	%r9, %rax
-	mulxq	-16(%rsp), %r9, %rbx    # 8-byte Folded Reload
+	adcq	%r13, %rax
+	adcq	$0, %rbp
+	addq	-120(%rsp), %rsi                # 8-byte Folded Reload
+	adcq	%r9, %rdi
+	adcq	%r10, %r11
+	adcq	%r15, %rbx
+	adcq	%r12, %rcx
+	adcq	-112(%rsp), %rax                # 8-byte Folded Reload
+	adcq	-128(%rsp), %rbp                # 8-byte Folded Reload
+	movq	%rbp, -104(%rsp)                # 8-byte Spill
+	movq	-80(%rsp), %rdx                 # 8-byte Reload
+	movq	8(%rdx), %rdx
+	mulxq	-88(%rsp), %rbp, %rsi           # 8-byte Folded Reload
+	movq	%rbp, -120(%rsp)                # 8-byte Spill
+	movq	%rsi, -128(%rsp)                # 8-byte Spill
+	mulxq	-96(%rsp), %rbp, %r15           # 8-byte Folded Reload
+	mulxq	-72(%rsp), %rsi, %r14           # 8-byte Folded Reload
+	movq	%rsi, 16(%rsp)                  # 8-byte Spill
+	mulxq	-24(%rsp), %rsi, %r8            # 8-byte Folded Reload
+	mulxq	-16(%rsp), %r12, %r10           # 8-byte Folded Reload
+	setb	-112(%rsp)                      # 1-byte Folded Spill
+	addq	%rsi, %r10
+	mulxq	-8(%rsp), %r9, %r13             # 8-byte Folded Reload
 	adcq	%r8, %r9
-	adcq	$0, %rbx
-	addq	%rbp, %r10
-	adcq	%rsi, %r11
-	adcq	%rcx, %rax
-	adcq	%rdi, %r9
-	adcq	$0, %rbx
-	movq	%r10, %rdx
-	imulq	%r13, %rdx
-	movq	-56(%rsp), %r14         # 8-byte Reload
-	mulxq	%r14, %rcx, %r8
-	addq	%r10, %rcx
-	mulxq	-24(%rsp), %r10, %rdi   # 8-byte Folded Reload
+	adcq	16(%rsp), %r13                  # 8-byte Folded Reload
+	adcq	%rbp, %r14
+	adcq	-120(%rsp), %r15                # 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%rdi, %r12
 	adcq	%r11, %r10
-	mulxq	-40(%rsp), %rcx, %rsi   # 8-byte Folded Reload
-	adcq	%rax, %rcx
-	mulxq	-8(%rsp), %rax, %rdx    # 8-byte Folded Reload
-	adcq	%r9, %rax
-	adcq	$0, %rbx
-	addq	%r8, %r10
-	adcq	%rdi, %rcx
-	adcq	%rsi, %rax
-	adcq	%rdx, %rbx
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	movq	16(%rdx), %rdx
-	mulxq	%r12, %rsi, %r8
-	mulxq	%r15, %r11, %rbp
-	addq	%rsi, %rbp
-	movq	-48(%rsp), %r12         # 8-byte Reload
-	mulxq	%r12, %rdi, %r9
-	adcq	%r8, %rdi
-	mulxq	-16(%rsp), %r8, %rsi    # 8-byte Folded Reload
-	adcq	%r9, %r8
-	adcq	$0, %rsi
-	addq	%r10, %r11
-	adcq	%rcx, %rbp
-	adcq	%rax, %rdi
+	adcq	%rbx, %r9
+	adcq	%rcx, %r13
+	adcq	%rax, %r14
+	adcq	-104(%rsp), %r15                # 8-byte Folded Reload
+	movzbl	-112(%rsp), %eax                # 1-byte Folded Reload
+	adcq	%rax, %rdx
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	setb	-112(%rsp)                      # 1-byte Folded Spill
+	movq	8(%rsp), %rdx                   # 8-byte Reload
+	imulq	%r12, %rdx
+	mulxq	-32(%rsp), %rax, %rbp           # 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                # 8-byte Spill
+	mulxq	-64(%rsp), %rax, %r11           # 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	mulxq	(%rsp), %rdi, %rsi              # 8-byte Folded Reload
+	mulxq	-48(%rsp), %rcx, %r8            # 8-byte Folded Reload
+	addq	%rdi, %r8
+	mulxq	-40(%rsp), %rbx, %rax           # 8-byte Folded Reload
+	adcq	%rsi, %rbx
+	mulxq	-56(%rsp), %rsi, %rdi           # 8-byte Folded Reload
+	adcq	%rax, %rsi
+	adcq	-104(%rsp), %rdi                # 8-byte Folded Reload
+	adcq	-120(%rsp), %r11                # 8-byte Folded Reload
+	adcq	$0, %rbp
+	addq	%r12, %rcx
+	adcq	%r10, %r8
+	adcq	%r9, %rbx
+	adcq	%r13, %rsi
+	adcq	%r14, %rdi
+	adcq	%r15, %r11
+	adcq	-128(%rsp), %rbp                # 8-byte Folded Reload
+	movzbl	-112(%rsp), %r10d               # 1-byte Folded Reload
+	adcq	$0, %r10
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	movq	16(%rcx), %rdx
+	mulxq	-88(%rsp), %rcx, %rax           # 8-byte Folded Reload
+	movq	%rcx, -112(%rsp)                # 8-byte Spill
+	movq	%rax, -128(%rsp)                # 8-byte Spill
+	mulxq	-96(%rsp), %rax, %r13           # 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                # 8-byte Spill
+	mulxq	-72(%rsp), %rax, %r15           # 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	mulxq	-24(%rsp), %rcx, %r14           # 8-byte Folded Reload
+	mulxq	-16(%rsp), %rax, %r9            # 8-byte Folded Reload
+	addq	%rcx, %r9
+	mulxq	-8(%rsp), %rcx, %r12            # 8-byte Folded Reload
+	adcq	%r14, %rcx
+	adcq	-104(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-120(%rsp), %r15                # 8-byte Folded Reload
+	adcq	-112(%rsp), %r13                # 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r8, %rax
+	movq	%rax, %r14
+	adcq	%rbx, %r9
+	adcq	%rsi, %rcx
+	adcq	%rdi, %r12
+	adcq	%r11, %r15
+	adcq	%rbp, %r13
+	movq	%r13, -120(%rsp)                # 8-byte Spill
+	adcq	%r10, %rdx
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	setb	-112(%rsp)                      # 1-byte Folded Spill
+	movq	8(%rsp), %rdx                   # 8-byte Reload
+	imulq	%rax, %rdx
+	mulxq	-32(%rsp), %rax, %r13           # 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	mulxq	-64(%rsp), %r10, %r11           # 8-byte Folded Reload
+	mulxq	(%rsp), %rbx, %rdi              # 8-byte Folded Reload
+	mulxq	-48(%rsp), %r8, %rsi            # 8-byte Folded Reload
+	addq	%rbx, %rsi
+	mulxq	-40(%rsp), %rbx, %rax           # 8-byte Folded Reload
+	adcq	%rdi, %rbx
+	mulxq	-56(%rsp), %rbp, %rdi           # 8-byte Folded Reload
+	adcq	%rax, %rbp
+	adcq	%r10, %rdi
+	adcq	-104(%rsp), %r11                # 8-byte Folded Reload
+	adcq	$0, %r13
+	addq	%r14, %r8
+	adcq	%r9, %rsi
+	adcq	%rcx, %rbx
+	adcq	%r12, %rbp
+	adcq	%r15, %rdi
+	adcq	-120(%rsp), %r11                # 8-byte Folded Reload
+	adcq	-128(%rsp), %r13                # 8-byte Folded Reload
+	movzbl	-112(%rsp), %r9d                # 1-byte Folded Reload
+	adcq	$0, %r9
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	movq	24(%rcx), %rdx
+	mulxq	-88(%rsp), %rcx, %rax           # 8-byte Folded Reload
+	movq	%rcx, -112(%rsp)                # 8-byte Spill
+	movq	%rax, -128(%rsp)                # 8-byte Spill
+	mulxq	-96(%rsp), %rax, %r14           # 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                # 8-byte Spill
+	mulxq	-72(%rsp), %rax, %r15           # 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	mulxq	-24(%rsp), %rcx, %r10           # 8-byte Folded Reload
+	mulxq	-16(%rsp), %rax, %r8            # 8-byte Folded Reload
+	addq	%rcx, %r8
+	mulxq	-8(%rsp), %rcx, %r12            # 8-byte Folded Reload
+	adcq	%r10, %rcx
+	adcq	-104(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-120(%rsp), %r15                # 8-byte Folded Reload
+	adcq	-112(%rsp), %r14                # 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%rsi, %rax
+	movq	%rax, %r10
 	adcq	%rbx, %r8
-	adcq	$0, %rsi
-	movq	%r11, %rdx
-	imulq	%r13, %rdx
-	mulxq	%r14, %rax, %r10
-	addq	%r11, %rax
-	movq	-24(%rsp), %r14         # 8-byte Reload
-	mulxq	%r14, %r9, %rbx
-	adcq	%rbp, %r9
-	movq	-40(%rsp), %r15         # 8-byte Reload
-	mulxq	%r15, %rax, %rbp
-	adcq	%rdi, %rax
-	mulxq	-8(%rsp), %rcx, %rdx    # 8-byte Folded Reload
-	adcq	%r8, %rcx
-	adcq	$0, %rsi
-	addq	%r10, %r9
-	adcq	%rbx, %rax
 	adcq	%rbp, %rcx
-	adcq	%rdx, %rsi
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	movq	24(%rdx), %rdx
-	mulxq	-72(%rsp), %rbx, %r8    # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r11, %rbp   # 8-byte Folded Reload
-	addq	%rbx, %rbp
-	mulxq	%r12, %rdi, %r10
+	adcq	%rdi, %r12
+	adcq	%r11, %r15
+	adcq	%r13, %r14
+	movq	%r14, -120(%rsp)                # 8-byte Spill
+	adcq	%r9, %rdx
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	setb	-112(%rsp)                      # 1-byte Folded Spill
+	movq	8(%rsp), %rdx                   # 8-byte Reload
+	imulq	%rax, %rdx
+	mulxq	-32(%rsp), %rax, %r14           # 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	mulxq	-64(%rsp), %r13, %r11           # 8-byte Folded Reload
+	mulxq	(%rsp), %rbx, %rsi              # 8-byte Folded Reload
+	mulxq	-48(%rsp), %rax, %rdi           # 8-byte Folded Reload
+	addq	%rbx, %rdi
+	mulxq	-40(%rsp), %rbx, %r9            # 8-byte Folded Reload
+	adcq	%rsi, %rbx
+	mulxq	-56(%rsp), %rbp, %rsi           # 8-byte Folded Reload
+	adcq	%r9, %rbp
+	adcq	%r13, %rsi
+	adcq	-104(%rsp), %r11                # 8-byte Folded Reload
+	adcq	$0, %r14
+	addq	%r10, %rax
 	adcq	%r8, %rdi
-	mulxq	-16(%rsp), %r8, %rbx    # 8-byte Folded Reload
-	adcq	%r10, %r8
-	adcq	$0, %rbx
-	addq	%r9, %r11
-	adcq	%rax, %rbp
-	adcq	%rcx, %rdi
-	adcq	%rsi, %r8
-	adcq	$0, %rbx
-	imulq	%r11, %r13
-	movq	%r13, %rdx
-	movq	-56(%rsp), %r12         # 8-byte Reload
-	mulxq	%r12, %rcx, %r9
-	addq	%r11, %rcx
-	movq	%r13, %rdx
-	mulxq	%r14, %r11, %r10
-	adcq	%rbp, %r11
-	movq	%r13, %rdx
-	movq	%r15, %rsi
-	mulxq	%rsi, %rax, %rcx
-	adcq	%rdi, %rax
-	movq	%r13, %rdx
-	movq	-8(%rsp), %rbp          # 8-byte Reload
-	mulxq	%rbp, %r15, %rdx
-	adcq	%r8, %r15
+	adcq	%rcx, %rbx
+	adcq	%r12, %rbp
+	adcq	%r15, %rsi
+	adcq	-120(%rsp), %r11                # 8-byte Folded Reload
+	adcq	-128(%rsp), %r14                # 8-byte Folded Reload
+	movzbl	-112(%rsp), %r9d                # 1-byte Folded Reload
+	adcq	$0, %r9
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	movq	32(%rcx), %rdx
+	mulxq	-88(%rsp), %rcx, %rax           # 8-byte Folded Reload
+	movq	%rcx, -112(%rsp)                # 8-byte Spill
+	movq	%rax, -128(%rsp)                # 8-byte Spill
+	mulxq	-96(%rsp), %rax, %r15           # 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                # 8-byte Spill
+	mulxq	-72(%rsp), %rax, %r12           # 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	mulxq	-24(%rsp), %rcx, %r10           # 8-byte Folded Reload
+	mulxq	-16(%rsp), %rax, %r13           # 8-byte Folded Reload
+	addq	%rcx, %r13
+	mulxq	-8(%rsp), %rcx, %r8             # 8-byte Folded Reload
+	adcq	%r10, %rcx
+	adcq	-104(%rsp), %r8                 # 8-byte Folded Reload
+	adcq	-120(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-112(%rsp), %r15                # 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%rdi, %rax
+	movq	%rax, -120(%rsp)                # 8-byte Spill
+	adcq	%rbx, %r13
+	adcq	%rbp, %rcx
+	adcq	%rsi, %r8
+	adcq	%r11, %r12
+	adcq	%r14, %r15
+	adcq	%r9, %rdx
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	setb	-112(%rsp)                      # 1-byte Folded Spill
+	movq	8(%rsp), %rdx                   # 8-byte Reload
+	imulq	%rax, %rdx
+	mulxq	-32(%rsp), %rax, %r14           # 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	mulxq	-64(%rsp), %r9, %r10            # 8-byte Folded Reload
+	mulxq	(%rsp), %rbx, %rsi              # 8-byte Folded Reload
+	mulxq	-48(%rsp), %rax, %r11           # 8-byte Folded Reload
+	addq	%rbx, %r11
+	mulxq	-40(%rsp), %rbx, %rdi           # 8-byte Folded Reload
+	adcq	%rsi, %rbx
+	mulxq	-56(%rsp), %rbp, %rsi           # 8-byte Folded Reload
+	adcq	%rdi, %rbp
+	adcq	%r9, %rsi
+	adcq	-104(%rsp), %r10                # 8-byte Folded Reload
+	adcq	$0, %r14
+	addq	-120(%rsp), %rax                # 8-byte Folded Reload
+	adcq	%r13, %r11
+	adcq	%rcx, %rbx
+	adcq	%r8, %rbp
+	adcq	%r12, %rsi
+	adcq	%r15, %r10
+	adcq	-128(%rsp), %r14                # 8-byte Folded Reload
+	movq	%r14, -128(%rsp)                # 8-byte Spill
+	movzbl	-112(%rsp), %edi                # 1-byte Folded Reload
+	adcq	$0, %rdi
+	movq	-80(%rsp), %rax                 # 8-byte Reload
+	movq	40(%rax), %rdx
+	mulxq	-88(%rsp), %rcx, %rax           # 8-byte Folded Reload
+	movq	%rcx, -88(%rsp)                 # 8-byte Spill
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	mulxq	-96(%rsp), %rax, %r8            # 8-byte Folded Reload
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	mulxq	-72(%rsp), %rax, %r15           # 8-byte Folded Reload
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	mulxq	-8(%rsp), %r14, %r12            # 8-byte Folded Reload
+	mulxq	-24(%rsp), %rcx, %r13           # 8-byte Folded Reload
+	mulxq	-16(%rsp), %r9, %rax            # 8-byte Folded Reload
+	addq	%rcx, %rax
+	adcq	%r14, %r13
+	adcq	-72(%rsp), %r12                 # 8-byte Folded Reload
+	adcq	-96(%rsp), %r15                 # 8-byte Folded Reload
+	adcq	-88(%rsp), %r8                  # 8-byte Folded Reload
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	adcq	$0, %rcx
+	addq	%r11, %r9
+	adcq	%rbx, %rax
+	adcq	%rbp, %r13
+	adcq	%rsi, %r12
+	adcq	%r10, %r15
+	adcq	-128(%rsp), %r8                 # 8-byte Folded Reload
+	movq	%r8, -96(%rsp)                  # 8-byte Spill
+	adcq	%rdi, %rcx
+	movq	%rcx, -80(%rsp)                 # 8-byte Spill
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	8(%rsp), %rdx                   # 8-byte Reload
+	imulq	%r9, %rdx
+	mulxq	-48(%rsp), %r11, %rsi           # 8-byte Folded Reload
+	movq	(%rsp), %r10                    # 8-byte Reload
+	mulxq	%r10, %rcx, %rbx
+	addq	%rsi, %rcx
+	mulxq	-40(%rsp), %rdi, %rbp           # 8-byte Folded Reload
+	adcq	%rbx, %rdi
+	mulxq	-56(%rsp), %rsi, %rbx           # 8-byte Folded Reload
+	adcq	%rbp, %rsi
+	mulxq	-64(%rsp), %rbp, %r14           # 8-byte Folded Reload
+	adcq	%rbx, %rbp
+	mulxq	-32(%rsp), %rdx, %rbx           # 8-byte Folded Reload
+	adcq	%r14, %rdx
 	adcq	$0, %rbx
 	addq	%r9, %r11
-	adcq	%r10, %rax
-	adcq	%rcx, %r15
-	adcq	%rdx, %rbx
-	movq	%r11, %rcx
-	subq	%r12, %rcx
-	movq	%rax, %rdx
-	sbbq	%r14, %rdx
-	movq	%r15, %rdi
-	sbbq	%rsi, %rdi
-	movq	%rbx, %rsi
-	sbbq	%rbp, %rsi
-	cmovsq	%r11, %rcx
-	movq	-80(%rsp), %rbp         # 8-byte Reload
-	movq	%rcx, (%rbp)
-	cmovsq	%rax, %rdx
-	movq	%rdx, 8(%rbp)
-	cmovsq	%r15, %rdi
-	movq	%rdi, 16(%rbp)
-	cmovsq	%rbx, %rsi
-	movq	%rsi, 24(%rbp)
+	adcq	%rax, %rcx
+	adcq	%r13, %rdi
+	adcq	%r12, %rsi
+	adcq	%r15, %rbp
+	adcq	-96(%rsp), %rdx                 # 8-byte Folded Reload
+	adcq	-80(%rsp), %rbx                 # 8-byte Folded Reload
+	movzbl	-88(%rsp), %r11d                # 1-byte Folded Reload
+	adcq	$0, %r11
+	movq	%rcx, %r8
+	subq	-48(%rsp), %r8                  # 8-byte Folded Reload
+	movq	%rdi, %r9
+	sbbq	%r10, %r9
+	movq	%rsi, %r10
+	sbbq	-40(%rsp), %r10                 # 8-byte Folded Reload
+	movq	%rbp, %r14
+	sbbq	-56(%rsp), %r14                 # 8-byte Folded Reload
+	movq	%rdx, %r15
+	sbbq	-64(%rsp), %r15                 # 8-byte Folded Reload
+	movq	%rbx, %rax
+	sbbq	-32(%rsp), %rax                 # 8-byte Folded Reload
+	sbbq	$0, %r11
+	testb	$1, %r11b
+	cmovneq	%rbx, %rax
+	movq	24(%rsp), %rbx                  # 8-byte Reload
+	movq	%rax, 40(%rbx)
+	cmovneq	%rdx, %r15
+	movq	%r15, 32(%rbx)
+	cmovneq	%rbp, %r14
+	movq	%r14, 24(%rbx)
+	cmovneq	%rsi, %r10
+	movq	%r10, 16(%rbx)
+	cmovneq	%rdi, %r9
+	movq	%r9, 8(%rbx)
+	cmovneq	%rcx, %r8
+	movq	%r8, (%rbx)
+	addq	$32, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -2430,7756 +3195,335 @@ mcl_fp_montNF4Lbmi2:                    # @mcl_fp_montNF4Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end54:
-	.size	mcl_fp_montNF4Lbmi2, .Lfunc_end54-mcl_fp_montNF4Lbmi2
-
-	.globl	mcl_fp_montRed4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed4Lbmi2,@function
-mcl_fp_montRed4Lbmi2:                   # @mcl_fp_montRed4Lbmi2
-# BB#0:
+.Lfunc_end43:
+	.size	mcl_fp_mont6Lbmi2, .Lfunc_end43-mcl_fp_mont6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montNF6Lbmi2             # -- Begin function mcl_fp_montNF6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF6Lbmi2,@function
+mcl_fp_montNF6Lbmi2:                    # @mcl_fp_montNF6Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rdi, -56(%rsp)         # 8-byte Spill
-	movq	-8(%rcx), %r13
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	(%rsi), %rax
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	8(%rsi), %rdi
+	movq	%rdi, -128(%rsp)                # 8-byte Spill
+	movq	(%rdx), %rbp
+	movq	%rdi, %rdx
+	mulxq	%rbp, %rdi, %rbx
+	movq	%rax, %rdx
+	mulxq	%rbp, %r9, %r14
+	movq	16(%rsi), %rdx
+	movq	%rdx, -64(%rsp)                 # 8-byte Spill
+	addq	%rdi, %r14
+	mulxq	%rbp, %rdi, %r8
+	adcq	%rbx, %rdi
+	movq	24(%rsi), %rdx
+	movq	%rdx, -72(%rsp)                 # 8-byte Spill
+	mulxq	%rbp, %rbx, %r10
+	adcq	%r8, %rbx
+	movq	32(%rsi), %rdx
+	movq	%rdx, -80(%rsp)                 # 8-byte Spill
+	mulxq	%rbp, %r8, %r11
+	adcq	%r10, %r8
+	movq	40(%rsi), %rdx
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	mulxq	%rbp, %rsi, %r15
+	adcq	%r11, %rsi
+	adcq	$0, %r15
+	movq	-8(%rcx), %rdx
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
+	imulq	%r9, %rdx
 	movq	(%rcx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r10
-	movq	%r10, %rdx
-	imulq	%r13, %rdx
+	movq	%rax, -16(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rbp, %rax
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	addq	%r9, %rbp
+	movq	8(%rcx), %rax
+	movq	%rax, -24(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r13, %r9
+	adcq	%r14, %r13
+	movq	16(%rcx), %rax
+	movq	%rax, -32(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r12, %rax
+	adcq	%rdi, %r12
 	movq	24(%rcx), %rdi
-	movq	%rdi, -48(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %r9, %r15
-	movq	%rdi, %r14
-	movq	16(%rcx), %rdi
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	mulxq	%rdi, %rdi, %rbx
-	movq	8(%rcx), %rcx
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	mulxq	%rcx, %rcx, %r8
-	mulxq	%rax, %rdx, %rbp
-	addq	%rcx, %rbp
-	adcq	%rdi, %r8
-	adcq	%r9, %rbx
+	movq	%rdi, -40(%rsp)                 # 8-byte Spill
+	mulxq	%rdi, %r14, %rdi
+	adcq	%rbx, %r14
+	movq	32(%rcx), %rbp
+	movq	%rbp, -48(%rsp)                 # 8-byte Spill
+	mulxq	%rbp, %r11, %rbx
+	adcq	%r8, %r11
+	movq	40(%rcx), %rcx
+	movq	%rcx, -56(%rsp)                 # 8-byte Spill
+	mulxq	%rcx, %r10, %rcx
+	adcq	%rsi, %r10
 	adcq	$0, %r15
-	movq	56(%rsi), %r11
-	movq	48(%rsi), %rcx
-	addq	%r10, %rdx
-	movq	40(%rsi), %r12
-	adcq	8(%rsi), %rbp
-	adcq	16(%rsi), %r8
-	adcq	24(%rsi), %rbx
-	adcq	32(%rsi), %r15
-	adcq	$0, %r12
-	adcq	$0, %rcx
-	movq	%rcx, -32(%rsp)         # 8-byte Spill
-	adcq	$0, %r11
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rbp, %rdx
-	imulq	%r13, %rdx
-	mulxq	%r14, %rax, %r9
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	mulxq	-8(%rsp), %r14, %rdi    # 8-byte Folded Reload
-	mulxq	-16(%rsp), %r10, %rcx   # 8-byte Folded Reload
-	mulxq	-24(%rsp), %rdx, %rax   # 8-byte Folded Reload
-	addq	%r10, %rax
+	addq	-96(%rsp), %r13                 # 8-byte Folded Reload
+	adcq	%r9, %r12
+	adcq	%rax, %r14
+	adcq	%rdi, %r11
+	adcq	%rbx, %r10
+	adcq	%rcx, %r15
+	movq	-120(%rsp), %rax                # 8-byte Reload
+	movq	8(%rax), %rdx
+	mulxq	-128(%rsp), %rcx, %rsi          # 8-byte Folded Reload
+	mulxq	-112(%rsp), %rbx, %rax          # 8-byte Folded Reload
+	addq	%rcx, %rax
+	mulxq	-64(%rsp), %rcx, %rdi           # 8-byte Folded Reload
+	adcq	%rsi, %rcx
+	mulxq	-72(%rsp), %rsi, %r8            # 8-byte Folded Reload
+	adcq	%rdi, %rsi
+	mulxq	-80(%rsp), %rdi, %rbp           # 8-byte Folded Reload
+	movq	%rbp, -96(%rsp)                 # 8-byte Spill
+	adcq	%r8, %rdi
+	mulxq	-88(%rsp), %r8, %r9             # 8-byte Folded Reload
+	adcq	-96(%rsp), %r8                  # 8-byte Folded Reload
+	adcq	$0, %r9
+	addq	%r13, %rbx
+	adcq	%r12, %rax
 	adcq	%r14, %rcx
-	adcq	-40(%rsp), %rdi         # 8-byte Folded Reload
+	adcq	%r11, %rsi
+	adcq	%r10, %rdi
+	adcq	%r15, %r8
+	adcq	$0, %r9
+	movq	-104(%rsp), %rdx                # 8-byte Reload
+	imulq	%rbx, %rdx
+	mulxq	-16(%rsp), %rbp, %r13           # 8-byte Folded Reload
+	addq	%rbx, %rbp
+	mulxq	-24(%rsp), %r11, %rbx           # 8-byte Folded Reload
+	adcq	%rax, %r11
+	mulxq	-32(%rsp), %r14, %rax           # 8-byte Folded Reload
+	adcq	%rcx, %r14
+	mulxq	-40(%rsp), %r10, %rcx           # 8-byte Folded Reload
+	adcq	%rsi, %r10
+	mulxq	-48(%rsp), %r15, %rsi           # 8-byte Folded Reload
+	adcq	%rdi, %r15
+	mulxq	-56(%rsp), %r12, %rdx           # 8-byte Folded Reload
+	adcq	%r8, %r12
 	adcq	$0, %r9
-	addq	%rbp, %rdx
+	addq	%r13, %r11
+	adcq	%rbx, %r14
+	adcq	%rax, %r10
+	adcq	%rcx, %r15
+	adcq	%rsi, %r12
+	adcq	%rdx, %r9
+	movq	-120(%rsp), %rax                # 8-byte Reload
+	movq	16(%rax), %rdx
+	mulxq	-128(%rsp), %rcx, %rax          # 8-byte Folded Reload
+	mulxq	-112(%rsp), %r13, %rdi          # 8-byte Folded Reload
+	addq	%rcx, %rdi
+	mulxq	-64(%rsp), %rbx, %rcx           # 8-byte Folded Reload
+	adcq	%rax, %rbx
+	mulxq	-72(%rsp), %rsi, %rbp           # 8-byte Folded Reload
+	adcq	%rcx, %rsi
+	mulxq	-80(%rsp), %rax, %rcx           # 8-byte Folded Reload
+	movq	%rcx, -96(%rsp)                 # 8-byte Spill
+	adcq	%rbp, %rax
+	mulxq	-88(%rsp), %r8, %rcx            # 8-byte Folded Reload
+	adcq	-96(%rsp), %r8                  # 8-byte Folded Reload
+	adcq	$0, %rcx
+	addq	%r11, %r13
+	adcq	%r14, %rdi
+	adcq	%r10, %rbx
+	adcq	%r15, %rsi
+	adcq	%r12, %rax
+	adcq	%r9, %r8
+	adcq	$0, %rcx
+	movq	-104(%rsp), %rdx                # 8-byte Reload
+	imulq	%r13, %rdx
+	mulxq	-16(%rsp), %rbp, %r12           # 8-byte Folded Reload
+	addq	%r13, %rbp
+	mulxq	-24(%rsp), %r11, %rbp           # 8-byte Folded Reload
+	adcq	%rdi, %r11
+	mulxq	-32(%rsp), %r9, %rdi            # 8-byte Folded Reload
+	adcq	%rbx, %r9
+	mulxq	-40(%rsp), %r10, %rbx           # 8-byte Folded Reload
+	adcq	%rsi, %r10
+	mulxq	-48(%rsp), %r14, %rsi           # 8-byte Folded Reload
+	adcq	%rax, %r14
+	mulxq	-56(%rsp), %r15, %rax           # 8-byte Folded Reload
+	adcq	%r8, %r15
+	adcq	$0, %rcx
+	addq	%r12, %r11
+	adcq	%rbp, %r9
+	adcq	%rdi, %r10
+	adcq	%rbx, %r14
+	adcq	%rsi, %r15
+	adcq	%rax, %rcx
+	movq	-120(%rsp), %rax                # 8-byte Reload
+	movq	24(%rax), %rdx
+	mulxq	-128(%rsp), %rsi, %rax          # 8-byte Folded Reload
+	mulxq	-112(%rsp), %r13, %rbx          # 8-byte Folded Reload
+	addq	%rsi, %rbx
+	mulxq	-64(%rsp), %rdi, %rbp           # 8-byte Folded Reload
+	adcq	%rax, %rdi
+	mulxq	-72(%rsp), %rsi, %r8            # 8-byte Folded Reload
+	adcq	%rbp, %rsi
+	mulxq	-80(%rsp), %rax, %rbp           # 8-byte Folded Reload
 	adcq	%r8, %rax
-	adcq	%rbx, %rcx
-	adcq	%r15, %rdi
-	adcq	%r12, %r9
-	adcq	$0, -32(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r11
-	movq	%r11, -40(%rsp)         # 8-byte Spill
-	adcq	$0, %rsi
-	movq	%rax, %rdx
+	mulxq	-88(%rsp), %r8, %r12            # 8-byte Folded Reload
+	adcq	%rbp, %r8
+	adcq	$0, %r12
+	addq	%r11, %r13
+	adcq	%r9, %rbx
+	adcq	%r10, %rdi
+	adcq	%r14, %rsi
+	adcq	%r15, %rax
+	adcq	%rcx, %r8
+	adcq	$0, %r12
+	movq	-104(%rsp), %rdx                # 8-byte Reload
 	imulq	%r13, %rdx
-	movq	-48(%rsp), %r15         # 8-byte Reload
-	mulxq	%r15, %rbp, %r8
-	movq	%rbp, -64(%rsp)         # 8-byte Spill
-	movq	-8(%rsp), %r11          # 8-byte Reload
-	mulxq	%r11, %rbx, %r10
-	movq	%rbx, -72(%rsp)         # 8-byte Spill
-	mulxq	-16(%rsp), %r12, %rbp   # 8-byte Folded Reload
-	movq	-24(%rsp), %r14         # 8-byte Reload
-	mulxq	%r14, %rdx, %rbx
-	addq	%r12, %rbx
-	adcq	-72(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r10         # 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%rax, %rdx
+	mulxq	-16(%rsp), %rbp, %rcx           # 8-byte Folded Reload
+	addq	%r13, %rbp
+	mulxq	-24(%rsp), %r11, %rbp           # 8-byte Folded Reload
+	adcq	%rbx, %r11
+	mulxq	-32(%rsp), %r9, %rbx            # 8-byte Folded Reload
+	adcq	%rdi, %r9
+	mulxq	-40(%rsp), %r10, %rdi           # 8-byte Folded Reload
+	adcq	%rsi, %r10
+	mulxq	-48(%rsp), %r14, %rsi           # 8-byte Folded Reload
+	adcq	%rax, %r14
+	mulxq	-56(%rsp), %r15, %rax           # 8-byte Folded Reload
+	adcq	%r8, %r15
+	adcq	$0, %r12
+	addq	%rcx, %r11
+	adcq	%rbp, %r9
+	adcq	%rbx, %r10
+	adcq	%rdi, %r14
+	adcq	%rsi, %r15
+	adcq	%rax, %r12
+	movq	-120(%rsp), %rax                # 8-byte Reload
+	movq	32(%rax), %rdx
+	mulxq	-128(%rsp), %rsi, %rcx          # 8-byte Folded Reload
+	mulxq	-112(%rsp), %r13, %rax          # 8-byte Folded Reload
+	addq	%rsi, %rax
+	mulxq	-64(%rsp), %rbx, %rsi           # 8-byte Folded Reload
 	adcq	%rcx, %rbx
-	adcq	%rdi, %rbp
-	adcq	%r9, %r10
-	adcq	-32(%rsp), %r8          # 8-byte Folded Reload
-	adcq	$0, -40(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rsi
-	imulq	%rbx, %r13
-	movq	%r13, %rdx
-	mulxq	%r15, %rax, %rdi
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	%r13, %rdx
-	mulxq	%r11, %r9, %rax
-	movq	%r13, %rdx
-	movq	-16(%rsp), %r11         # 8-byte Reload
-	mulxq	%r11, %r12, %rcx
-	movq	%r13, %rdx
-	mulxq	%r14, %r15, %r13
-	addq	%r12, %r13
-	adcq	%r9, %rcx
-	adcq	-32(%rsp), %rax         # 8-byte Folded Reload
-	adcq	$0, %rdi
-	addq	%rbx, %r15
-	adcq	%rbp, %r13
-	adcq	%r10, %rcx
-	adcq	%r8, %rax
-	adcq	-40(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	$0, %rsi
-	movq	%r13, %rdx
-	subq	%r14, %rdx
-	movq	%rcx, %rbp
-	sbbq	%r11, %rbp
-	movq	%rax, %r8
-	sbbq	-8(%rsp), %r8           # 8-byte Folded Reload
-	movq	%rdi, %rbx
-	sbbq	-48(%rsp), %rbx         # 8-byte Folded Reload
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rdi, %rbx
-	testb	%sil, %sil
-	cmovneq	%r13, %rdx
-	movq	-56(%rsp), %rsi         # 8-byte Reload
-	movq	%rdx, (%rsi)
-	cmovneq	%rcx, %rbp
-	movq	%rbp, 8(%rsi)
-	cmovneq	%rax, %r8
-	movq	%r8, 16(%rsi)
-	movq	%rbx, 24(%rsi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end55:
-	.size	mcl_fp_montRed4Lbmi2, .Lfunc_end55-mcl_fp_montRed4Lbmi2
-
-	.globl	mcl_fp_addPre4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre4Lbmi2,@function
-mcl_fp_addPre4Lbmi2:                    # @mcl_fp_addPre4Lbmi2
-# BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rdx), %rax
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rax
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	adcq	%r8, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end56:
-	.size	mcl_fp_addPre4Lbmi2, .Lfunc_end56-mcl_fp_addPre4Lbmi2
-
-	.globl	mcl_fp_subPre4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre4Lbmi2,@function
-mcl_fp_subPre4Lbmi2:                    # @mcl_fp_subPre4Lbmi2
-# BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
-	movq	%rcx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	sbbq	%r8, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end57:
-	.size	mcl_fp_subPre4Lbmi2, .Lfunc_end57-mcl_fp_subPre4Lbmi2
-
-	.globl	mcl_fp_shr1_4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_4Lbmi2,@function
-mcl_fp_shr1_4Lbmi2:                     # @mcl_fp_shr1_4Lbmi2
-# BB#0:
-	movq	24(%rsi), %rax
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rdx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rdx
-	movq	%rdx, (%rdi)
-	shrdq	$1, %rcx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rax, %rcx
-	movq	%rcx, 16(%rdi)
-	shrq	%rax
-	movq	%rax, 24(%rdi)
-	retq
-.Lfunc_end58:
-	.size	mcl_fp_shr1_4Lbmi2, .Lfunc_end58-mcl_fp_shr1_4Lbmi2
-
-	.globl	mcl_fp_add4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add4Lbmi2,@function
-mcl_fp_add4Lbmi2:                       # @mcl_fp_add4Lbmi2
-# BB#0:
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %r8
-	movq	16(%rdx), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r9
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	adcq	%r10, %r8
-	movq	%r8, 24(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r9
-	sbbq	24(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB59_2
-# BB#1:                                 # %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	movq	%r8, 24(%rdi)
-.LBB59_2:                               # %carry
-	retq
-.Lfunc_end59:
-	.size	mcl_fp_add4Lbmi2, .Lfunc_end59-mcl_fp_add4Lbmi2
-
-	.globl	mcl_fp_addNF4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF4Lbmi2,@function
-mcl_fp_addNF4Lbmi2:                     # @mcl_fp_addNF4Lbmi2
-# BB#0:
-	pushq	%rbx
-	movq	24(%rdx), %r8
-	movq	16(%rdx), %r9
-	movq	(%rdx), %r11
-	movq	8(%rdx), %r10
-	addq	(%rsi), %r11
-	adcq	8(%rsi), %r10
-	adcq	16(%rsi), %r9
-	adcq	24(%rsi), %r8
-	movq	%r11, %rsi
-	subq	(%rcx), %rsi
-	movq	%r10, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r9, %rax
-	sbbq	16(%rcx), %rax
-	movq	%r8, %rbx
-	sbbq	24(%rcx), %rbx
-	testq	%rbx, %rbx
-	cmovsq	%r11, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r10, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r9, %rax
-	movq	%rax, 16(%rdi)
-	cmovsq	%r8, %rbx
-	movq	%rbx, 24(%rdi)
-	popq	%rbx
-	retq
-.Lfunc_end60:
-	.size	mcl_fp_addNF4Lbmi2, .Lfunc_end60-mcl_fp_addNF4Lbmi2
-
-	.globl	mcl_fp_sub4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub4Lbmi2,@function
-mcl_fp_sub4Lbmi2:                       # @mcl_fp_sub4Lbmi2
-# BB#0:
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r11
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r9
-	movq	%rax, (%rdi)
-	movq	%r11, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	sbbq	%r10, %r8
-	movq	%r8, 24(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB61_2
-# BB#1:                                 # %nocarry
-	retq
-.LBB61_2:                               # %carry
-	movq	24(%rcx), %r10
-	movq	8(%rcx), %rsi
-	movq	16(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 24(%rdi)
-	retq
-.Lfunc_end61:
-	.size	mcl_fp_sub4Lbmi2, .Lfunc_end61-mcl_fp_sub4Lbmi2
-
-	.globl	mcl_fp_subNF4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF4Lbmi2,@function
-mcl_fp_subNF4Lbmi2:                     # @mcl_fp_subNF4Lbmi2
-# BB#0:
-	pushq	%rbx
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %r8
-	movq	(%rsi), %r9
-	movq	8(%rsi), %r10
-	subq	(%rdx), %r9
-	sbbq	8(%rdx), %r10
-	sbbq	16(%rdx), %r8
-	sbbq	24(%rdx), %r11
-	movq	%r11, %rdx
-	sarq	$63, %rdx
-	movq	24(%rcx), %rsi
-	andq	%rdx, %rsi
-	movq	16(%rcx), %rax
-	andq	%rdx, %rax
-	movq	8(%rcx), %rbx
-	andq	%rdx, %rbx
-	andq	(%rcx), %rdx
-	addq	%r9, %rdx
-	movq	%rdx, (%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 16(%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 24(%rdi)
-	popq	%rbx
-	retq
-.Lfunc_end62:
-	.size	mcl_fp_subNF4Lbmi2, .Lfunc_end62-mcl_fp_subNF4Lbmi2
-
-	.globl	mcl_fpDbl_add4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add4Lbmi2,@function
-mcl_fpDbl_add4Lbmi2:                    # @mcl_fpDbl_add4Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r9
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r10
-	movq	48(%rsi), %r12
-	movq	40(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	24(%rdx), %r15
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %rsi
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r15, %rbp
-	movq	%rbp, 24(%rdi)
-	adcq	%r14, %rsi
-	adcq	%r11, %r13
-	adcq	%r10, %r12
-	adcq	%r9, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rsi, %rdx
-	subq	(%rcx), %rdx
-	movq	%r13, %rbp
-	sbbq	8(%rcx), %rbp
-	movq	%r12, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r8, %r9
-	sbbq	24(%rcx), %r9
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rsi, %rdx
-	movq	%rdx, 32(%rdi)
-	testb	%al, %al
-	cmovneq	%r13, %rbp
-	movq	%rbp, 40(%rdi)
-	cmovneq	%r12, %rbx
-	movq	%rbx, 48(%rdi)
-	cmovneq	%r8, %r9
-	movq	%r9, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end63:
-	.size	mcl_fpDbl_add4Lbmi2, .Lfunc_end63-mcl_fpDbl_add4Lbmi2
-
-	.globl	mcl_fpDbl_sub4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub4Lbmi2,@function
-mcl_fpDbl_sub4Lbmi2:                    # @mcl_fpDbl_sub4Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r9
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	(%rsi), %rbx
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	movq	%rbx, (%rdi)
-	movq	8(%rsi), %rbx
-	sbbq	8(%rdx), %rbx
-	movq	%rbx, 8(%rdi)
-	movq	16(%rsi), %rbx
-	sbbq	16(%rdx), %rbx
-	movq	%rbx, 16(%rdi)
-	movq	24(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	40(%rdx), %r11
-	movq	32(%rdx), %rdx
-	movq	%rbx, 24(%rdi)
-	movq	32(%rsi), %r12
-	sbbq	%rdx, %r12
-	movq	48(%rsi), %r14
-	movq	40(%rsi), %r15
-	sbbq	%r11, %r15
-	sbbq	%r10, %r14
-	sbbq	%r9, %r8
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	movq	16(%rcx), %rdx
-	cmoveq	%rax, %rdx
-	movq	24(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	cmovneq	8(%rcx), %rax
-	addq	%r12, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 40(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 48(%rdi)
-	adcq	%r8, %rbx
-	movq	%rbx, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end64:
-	.size	mcl_fpDbl_sub4Lbmi2, .Lfunc_end64-mcl_fpDbl_sub4Lbmi2
-
-	.globl	mcl_fp_mulUnitPre5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre5Lbmi2,@function
-mcl_fp_mulUnitPre5Lbmi2:                # @mcl_fp_mulUnitPre5Lbmi2
-# BB#0:
-	pushq	%r14
-	pushq	%rbx
-	mulxq	32(%rsi), %r8, %r11
-	mulxq	24(%rsi), %r9, %rax
-	mulxq	16(%rsi), %r10, %rcx
-	mulxq	8(%rsi), %r14, %rbx
-	mulxq	(%rsi), %rdx, %rsi
-	movq	%rdx, (%rdi)
-	addq	%r14, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r9, %rcx
-	movq	%rcx, 24(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 32(%rdi)
-	adcq	$0, %r11
-	movq	%r11, 40(%rdi)
-	popq	%rbx
-	popq	%r14
-	retq
-.Lfunc_end65:
-	.size	mcl_fp_mulUnitPre5Lbmi2, .Lfunc_end65-mcl_fp_mulUnitPre5Lbmi2
-
-	.globl	mcl_fpDbl_mulPre5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre5Lbmi2,@function
-mcl_fpDbl_mulPre5Lbmi2:                 # @mcl_fpDbl_mulPre5Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	(%rsi), %r11
-	movq	8(%rsi), %r10
-	movq	(%rdx), %rcx
-	movq	%r10, %rdx
-	mulxq	%rcx, %rax, %r14
-	movq	%r11, %rdx
-	mulxq	%rcx, %rdx, %rbx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %rbp
-	movq	%rbp, -16(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %r15
-	addq	%rax, %rbx
-	movq	%r15, %rdx
-	mulxq	%rcx, %rax, %r13
-	adcq	%r14, %rax
-	movq	%rbp, %rdx
-	mulxq	%rcx, %r8, %r12
-	adcq	%r13, %r8
-	movq	32(%rsi), %r14
-	movq	%r14, %rdx
-	mulxq	%rcx, %r9, %r13
-	adcq	%r12, %r9
-	movq	-24(%rsp), %rcx         # 8-byte Reload
-	movq	%rcx, (%rdi)
-	adcq	$0, %r13
-	movq	-48(%rsp), %rdi         # 8-byte Reload
-	movq	8(%rdi), %rbp
-	movq	%r11, %rdx
-	mulxq	%rbp, %r12, %r11
-	addq	%rbx, %r12
-	movq	%r10, %rdx
-	mulxq	%rbp, %rbx, %rcx
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	adcq	%rax, %rbx
-	movq	%r15, %rdx
-	mulxq	%rbp, %rcx, %r10
-	adcq	%r8, %rcx
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rbp, %rax, %r8
-	adcq	%r9, %rax
-	movq	%r14, %rdx
-	mulxq	%rbp, %r15, %rdx
-	adcq	%r13, %r15
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	addq	%r11, %rbx
-	movq	-8(%rsp), %rbp          # 8-byte Reload
-	movq	%r12, 8(%rbp)
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r10, %rax
-	adcq	%r8, %r15
-	adcq	%rdx, %r14
-	movq	(%rsi), %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %r8
-	movq	%r8, -16(%rsp)          # 8-byte Spill
-	movq	16(%rdi), %rbp
-	mulxq	%rbp, %r12, %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	addq	%rbx, %r12
-	movq	%r8, %rdx
-	mulxq	%rbp, %rbx, %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	adcq	%rcx, %rbx
-	movq	16(%rsi), %r11
-	movq	%r11, %rdx
-	mulxq	%rbp, %rcx, %rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	adcq	%rax, %rcx
-	movq	24(%rsi), %r13
-	movq	%r13, %rdx
-	mulxq	%rbp, %r9, %r10
-	adcq	%r15, %r9
-	movq	32(%rsi), %r15
-	movq	%r15, %rdx
-	mulxq	%rbp, %r8, %rdx
-	adcq	%r14, %r8
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	addq	-32(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r9          # 8-byte Folded Reload
-	adcq	%r10, %r8
-	adcq	%rdx, %r14
-	movq	-8(%rsp), %r10          # 8-byte Reload
-	movq	%r12, 16(%r10)
-	movq	%rdi, %rbp
-	movq	24(%rbp), %rax
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r12, %rdi
-	addq	%rbx, %r12
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %rbx, %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	adcq	%rcx, %rbx
-	movq	%r11, %rdx
-	mulxq	%rax, %rcx, %r11
-	adcq	%r9, %rcx
-	movq	%r13, %rdx
-	mulxq	%rax, %r13, %r9
-	adcq	%r8, %r13
-	movq	%r15, %rdx
-	mulxq	%rax, %r8, %rdx
-	adcq	%r14, %r8
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	addq	%rdi, %rbx
-	movq	%r12, 24(%r10)
-	movq	%r10, %rdi
-	adcq	-16(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r11, %r13
-	adcq	%r9, %r8
-	adcq	%rdx, %r14
-	movq	32(%rbp), %rdx
-	mulxq	8(%rsi), %rax, %r9
-	mulxq	(%rsi), %rbp, %r10
-	addq	%rbx, %rbp
-	adcq	%rcx, %rax
-	mulxq	16(%rsi), %rbx, %r15
-	adcq	%r13, %rbx
-	mulxq	32(%rsi), %rcx, %r11
-	mulxq	24(%rsi), %rsi, %rdx
-	movq	%rbp, 32(%rdi)
-	adcq	%r8, %rsi
-	adcq	%r14, %rcx
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	addq	%r10, %rax
-	movq	%rax, 40(%rdi)
-	adcq	%r9, %rbx
-	movq	%rbx, 48(%rdi)
-	adcq	%r15, %rsi
-	movq	%rsi, 56(%rdi)
-	adcq	%rdx, %rcx
-	movq	%rcx, 64(%rdi)
-	adcq	%r11, %rbp
-	movq	%rbp, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end66:
-	.size	mcl_fpDbl_mulPre5Lbmi2, .Lfunc_end66-mcl_fpDbl_mulPre5Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre5Lbmi2,@function
-mcl_fpDbl_sqrPre5Lbmi2:                 # @mcl_fpDbl_sqrPre5Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	16(%rsi), %r11
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rcx
-	movq	%r11, %rdx
-	mulxq	%rax, %rbx, %r15
-	movq	32(%rsi), %r9
-	movq	%r9, -8(%rsp)           # 8-byte Spill
-	movq	24(%rsi), %r13
-	movq	%rcx, %rdx
-	mulxq	%rax, %r12, %rbp
-	movq	%rbp, -16(%rsp)         # 8-byte Spill
-	movq	%rax, %rdx
-	mulxq	%rax, %rdx, %r14
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	addq	%r12, %r14
-	adcq	%rbp, %rbx
-	movq	%r13, %rdx
-	mulxq	%rax, %r8, %r10
-	adcq	%r15, %r8
-	movq	%r9, %rdx
-	mulxq	%rax, %rbp, %r15
-	adcq	%r10, %rbp
-	movq	-24(%rsp), %rax         # 8-byte Reload
-	movq	%rax, (%rdi)
-	adcq	$0, %r15
-	addq	%r12, %r14
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rax, %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	adcq	%rbx, %rax
-	movq	%r11, %rdx
-	mulxq	%rcx, %rbx, %r10
-	adcq	%r8, %rbx
-	movq	%r13, %rdx
-	mulxq	%rcx, %r13, %r8
-	adcq	%rbp, %r13
-	movq	%r9, %rdx
-	mulxq	%rcx, %r12, %rcx
-	adcq	%r15, %r12
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	addq	-16(%rsp), %rax         # 8-byte Folded Reload
-	movq	%r14, 8(%rdi)
-	adcq	-24(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	%r10, %r13
-	adcq	%r8, %r12
-	adcq	%rcx, %r15
-	movq	(%rsi), %r9
-	movq	8(%rsi), %r10
-	movq	%r9, %rdx
-	mulxq	%r11, %rbp, %rcx
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	addq	%rax, %rbp
-	movq	%r10, %rdx
-	mulxq	%r11, %rax, %r8
-	adcq	%rbx, %rax
-	movq	%r11, %rdx
-	mulxq	%r11, %r14, %rcx
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	adcq	%r13, %r14
-	movq	24(%rsi), %rcx
-	movq	%rcx, %rdx
-	mulxq	%r11, %rbx, %r13
-	adcq	%r12, %rbx
-	movq	-8(%rsp), %rdx          # 8-byte Reload
-	mulxq	%r11, %r12, %rdx
-	adcq	%r15, %r12
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	addq	-16(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%r8, %r14
-	movq	%rbp, 16(%rdi)
-	adcq	-24(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	%r13, %r12
-	adcq	%rdx, %r15
-	movq	%r10, %rdx
-	mulxq	%rcx, %r10, %rdx
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%r9, %rdx
-	mulxq	%rcx, %r13, %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	addq	%rax, %r13
-	movq	16(%rsi), %r8
-	movq	32(%rsi), %rax
-	adcq	%r14, %r10
-	movq	%r8, %rdx
-	mulxq	%rcx, %r9, %r14
-	adcq	%rbx, %r9
-	movq	%rcx, %rdx
-	mulxq	%rcx, %r11, %rbp
-	adcq	%r12, %r11
-	movq	%rax, %rdx
-	mulxq	%rcx, %r12, %rdx
-	adcq	%r15, %r12
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	-16(%rsp), %r10         # 8-byte Folded Reload
-	movq	%r13, 24(%rdi)
-	adcq	-8(%rsp), %r9           # 8-byte Folded Reload
-	adcq	%r14, %r11
-	adcq	%rbp, %r12
-	adcq	%rdx, %rbx
-	movq	%rax, %rdx
-	mulxq	24(%rsi), %rbp, %r14
-	movq	%rax, %rdx
-	mulxq	(%rsi), %rcx, %r15
-	addq	%r10, %rcx
-	movq	%rax, %rdx
-	mulxq	8(%rsi), %rsi, %r10
-	movq	%rcx, 32(%rdi)
-	adcq	%r9, %rsi
-	movq	%r8, %rdx
-	mulxq	%rax, %rcx, %r8
-	adcq	%r11, %rcx
-	adcq	%r12, %rbp
-	movq	%rax, %rdx
-	mulxq	%rax, %rdx, %rax
-	adcq	%rbx, %rdx
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	%r15, %rsi
-	movq	%rsi, 40(%rdi)
-	adcq	%r10, %rcx
-	movq	%rcx, 48(%rdi)
-	adcq	%r8, %rbp
-	movq	%rbp, 56(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 64(%rdi)
-	adcq	%rax, %rbx
-	movq	%rbx, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end67:
-	.size	mcl_fpDbl_sqrPre5Lbmi2, .Lfunc_end67-mcl_fpDbl_sqrPre5Lbmi2
-
-	.globl	mcl_fp_mont5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont5Lbmi2,@function
-mcl_fp_mont5Lbmi2:                      # @mcl_fp_mont5Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rdi, -112(%rsp)        # 8-byte Spill
-	movq	32(%rsi), %rdi
-	movq	%rdi, -64(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rax
-	movq	%rdi, %rdx
-	mulxq	%rax, %r10, %rbx
-	movq	24(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r12, %r14
-	movq	16(%rsi), %rdx
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r13, %r11
-	movq	(%rsi), %rbp
-	movq	%rbp, -88(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rdx
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rdi, %r9
-	movq	%rbp, %rdx
-	mulxq	%rax, %r15, %r8
-	addq	%rdi, %r8
-	adcq	%r13, %r9
-	adcq	%r12, %r11
-	adcq	%r10, %r14
-	adcq	$0, %rbx
-	movq	%rbx, -104(%rsp)        # 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	%r15, %rdx
-	imulq	%rax, %rdx
-	movq	(%rcx), %rsi
-	movq	%rsi, -32(%rsp)         # 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	mulxq	%rax, %rax, %r12
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	24(%rcx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r13, %r10
-	movq	8(%rcx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rdi, %rbp
-	mulxq	%rsi, %rax, %rbx
-	addq	%rdi, %rbx
-	movq	16(%rcx), %rcx
-	movq	%rcx, -40(%rsp)         # 8-byte Spill
-	mulxq	%rcx, %rdi, %rcx
-	adcq	%rbp, %rdi
-	adcq	%r13, %rcx
-	adcq	-120(%rsp), %r10        # 8-byte Folded Reload
-	adcq	$0, %r12
-	addq	%r15, %rax
-	adcq	%r8, %rbx
-	adcq	%r9, %rdi
-	adcq	%r11, %rcx
-	adcq	%r14, %r10
-	adcq	-104(%rsp), %r12        # 8-byte Folded Reload
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	movq	8(%rax), %rdx
-	mulxq	-64(%rsp), %rax, %r14   # 8-byte Folded Reload
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	mulxq	-72(%rsp), %rax, %r15   # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	mulxq	-80(%rsp), %r13, %r9    # 8-byte Folded Reload
-	mulxq	-96(%rsp), %r8, %rsi    # 8-byte Folded Reload
-	mulxq	-88(%rsp), %r11, %rax   # 8-byte Folded Reload
-	addq	%r8, %rax
-	adcq	%r13, %rsi
-	adcq	-120(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-104(%rsp), %r15        # 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	%rbx, %r11
-	adcq	%rdi, %rax
-	adcq	%rcx, %rsi
-	adcq	%r10, %r9
-	adcq	%r12, %r15
-	adcq	%rbp, %r14
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	movq	%r11, %rdx
-	imulq	-48(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rcx, %r10    # 8-byte Folded Reload
-	movq	%rcx, -104(%rsp)        # 8-byte Spill
-	mulxq	-16(%rsp), %rcx, %rdi   # 8-byte Folded Reload
-	movq	%rcx, -120(%rsp)        # 8-byte Spill
-	mulxq	-40(%rsp), %r13, %rcx   # 8-byte Folded Reload
-	mulxq	-24(%rsp), %r8, %rbx    # 8-byte Folded Reload
-	mulxq	-32(%rsp), %rdx, %rbp   # 8-byte Folded Reload
-	addq	%r8, %rbp
-	adcq	%r13, %rbx
-	adcq	-120(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-104(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	$0, %r10
-	addq	%r11, %rdx
-	adcq	%rax, %rbp
-	adcq	%rsi, %rbx
-	adcq	%r9, %rcx
-	adcq	%r15, %rdi
-	adcq	%r14, %r10
-	adcq	$0, %r12
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	movq	16(%rax), %rdx
-	mulxq	-64(%rsp), %rax, %r15   # 8-byte Folded Reload
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	mulxq	-72(%rsp), %rax, %r11   # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	mulxq	-80(%rsp), %r13, %r9    # 8-byte Folded Reload
-	mulxq	-96(%rsp), %rsi, %r8    # 8-byte Folded Reload
-	mulxq	-88(%rsp), %r14, %rax   # 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%r13, %r8
-	adcq	-120(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-104(%rsp), %r11        # 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	%rbp, %r14
-	adcq	%rbx, %rax
-	adcq	%rcx, %r8
-	adcq	%rdi, %r9
-	adcq	%r10, %r11
-	adcq	%r12, %r15
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	movq	%r14, %rdx
-	imulq	-48(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rcx, %r12    # 8-byte Folded Reload
-	movq	%rcx, -104(%rsp)        # 8-byte Spill
-	mulxq	-16(%rsp), %rcx, %r10   # 8-byte Folded Reload
-	movq	%rcx, -120(%rsp)        # 8-byte Spill
-	mulxq	-40(%rsp), %rdi, %rsi   # 8-byte Folded Reload
-	mulxq	-24(%rsp), %rcx, %rbx   # 8-byte Folded Reload
-	mulxq	-32(%rsp), %rdx, %rbp   # 8-byte Folded Reload
-	addq	%rcx, %rbp
-	adcq	%rdi, %rbx
-	adcq	-120(%rsp), %rsi        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r10        # 8-byte Folded Reload
-	adcq	$0, %r12
-	addq	%r14, %rdx
-	adcq	%rax, %rbp
-	adcq	%r8, %rbx
-	adcq	%r9, %rsi
-	adcq	%r11, %r10
-	adcq	%r15, %r12
-	adcq	$0, %r13
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	movq	24(%rax), %rdx
-	mulxq	-64(%rsp), %rcx, %rax   # 8-byte Folded Reload
-	movq	%rcx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	mulxq	-72(%rsp), %r11, %r14   # 8-byte Folded Reload
-	mulxq	-80(%rsp), %r8, %r9     # 8-byte Folded Reload
-	mulxq	-96(%rsp), %rax, %rdi   # 8-byte Folded Reload
-	mulxq	-88(%rsp), %r15, %rcx   # 8-byte Folded Reload
-	addq	%rax, %rcx
-	adcq	%r8, %rdi
-	adcq	%r11, %r9
-	adcq	-120(%rsp), %r14        # 8-byte Folded Reload
-	movq	-104(%rsp), %rax        # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rbp, %r15
-	adcq	%rbx, %rcx
-	adcq	%rsi, %rdi
-	adcq	%r10, %r9
-	adcq	%r12, %r14
-	adcq	%r13, %rax
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	movq	%r15, %rdx
-	imulq	-48(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rax, %rbp    # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	mulxq	-16(%rsp), %r13, %r10   # 8-byte Folded Reload
-	mulxq	-40(%rsp), %rbx, %r8    # 8-byte Folded Reload
-	mulxq	-24(%rsp), %rsi, %r11   # 8-byte Folded Reload
-	mulxq	-32(%rsp), %rdx, %rax   # 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%rbx, %r11
-	adcq	%r13, %r8
-	adcq	-120(%rsp), %r10        # 8-byte Folded Reload
-	adcq	$0, %rbp
-	addq	%r15, %rdx
-	adcq	%rcx, %rax
-	adcq	%rdi, %r11
-	adcq	%r9, %r8
-	adcq	%r14, %r10
-	adcq	-104(%rsp), %rbp        # 8-byte Folded Reload
-	adcq	$0, %r12
-	movq	-56(%rsp), %rcx         # 8-byte Reload
-	movq	32(%rcx), %rdx
-	mulxq	-64(%rsp), %rcx, %r14   # 8-byte Folded Reload
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	mulxq	-72(%rsp), %rcx, %rbx   # 8-byte Folded Reload
-	movq	%rcx, -64(%rsp)         # 8-byte Spill
-	mulxq	-80(%rsp), %rsi, %r15   # 8-byte Folded Reload
-	mulxq	-96(%rsp), %rcx, %r9    # 8-byte Folded Reload
-	mulxq	-88(%rsp), %r13, %rdi   # 8-byte Folded Reload
-	addq	%rcx, %rdi
-	adcq	%rsi, %r9
-	adcq	-64(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	%rax, %r13
-	adcq	%r11, %rdi
-	adcq	%r8, %r9
-	adcq	%r10, %r15
-	adcq	%rbp, %rbx
-	adcq	%r12, %r14
-	sbbq	%rax, %rax
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	imulq	%r13, %rdx
-	mulxq	-32(%rsp), %r10, %rcx   # 8-byte Folded Reload
-	mulxq	-24(%rsp), %r8, %rsi    # 8-byte Folded Reload
-	addq	%rcx, %r8
-	mulxq	-40(%rsp), %rbp, %r11   # 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	mulxq	-16(%rsp), %rcx, %r12   # 8-byte Folded Reload
-	adcq	%r11, %rcx
-	mulxq	-8(%rsp), %rsi, %r11    # 8-byte Folded Reload
-	adcq	%r12, %rsi
-	adcq	$0, %r11
-	andl	$1, %eax
-	addq	%r13, %r10
-	adcq	%rdi, %r8
-	adcq	%r9, %rbp
-	adcq	%r15, %rcx
-	adcq	%rbx, %rsi
-	adcq	%r14, %r11
-	adcq	$0, %rax
-	movq	%r8, %rdi
-	subq	-32(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rbp, %rbx
-	sbbq	-24(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rcx, %r9
-	sbbq	-40(%rsp), %r9          # 8-byte Folded Reload
-	movq	%rsi, %rdx
-	sbbq	-16(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%r11, %r10
-	sbbq	-8(%rsp), %r10          # 8-byte Folded Reload
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rsi, %rdx
-	testb	%al, %al
-	cmovneq	%r8, %rdi
-	movq	-112(%rsp), %rax        # 8-byte Reload
-	movq	%rdi, (%rax)
-	cmovneq	%rbp, %rbx
-	movq	%rbx, 8(%rax)
-	cmovneq	%rcx, %r9
-	movq	%r9, 16(%rax)
-	movq	%rdx, 24(%rax)
-	cmovneq	%r11, %r10
-	movq	%r10, 32(%rax)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end68:
-	.size	mcl_fp_mont5Lbmi2, .Lfunc_end68-mcl_fp_mont5Lbmi2
-
-	.globl	mcl_fp_montNF5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF5Lbmi2,@function
-mcl_fp_montNF5Lbmi2:                    # @mcl_fp_montNF5Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	movq	(%rsi), %r13
-	movq	%r13, -64(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rbp
-	movq	%rbp, -24(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rax
-	movq	%rbp, %rdx
-	mulxq	%rax, %rbp, %r9
-	movq	%r13, %rdx
-	mulxq	%rax, %r8, %r10
-	movq	16(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	addq	%rbp, %r10
-	mulxq	%rax, %rbp, %rbx
-	adcq	%r9, %rbp
-	movq	24(%rsi), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r15, %r9
-	adcq	%rbx, %r15
-	movq	32(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rax, %r11
-	adcq	%r9, %rax
-	adcq	$0, %r11
-	movq	-8(%rcx), %rsi
-	movq	%rsi, -72(%rsp)         # 8-byte Spill
-	movq	%r8, %rdx
-	imulq	%rsi, %rdx
-	movq	(%rcx), %rsi
-	movq	%rsi, -88(%rsp)         # 8-byte Spill
-	mulxq	%rsi, %rbx, %r14
-	addq	%r8, %rbx
-	movq	8(%rcx), %rsi
-	movq	%rsi, -80(%rsp)         # 8-byte Spill
-	mulxq	%rsi, %rbx, %r12
-	adcq	%r10, %rbx
-	movq	16(%rcx), %rsi
-	movq	%rsi, -96(%rsp)         # 8-byte Spill
-	mulxq	%rsi, %r10, %rdi
-	adcq	%rbp, %r10
-	movq	24(%rcx), %rsi
-	movq	%rsi, -56(%rsp)         # 8-byte Spill
-	mulxq	%rsi, %r9, %rbp
-	adcq	%r15, %r9
-	movq	32(%rcx), %rcx
-	movq	%rcx, -8(%rsp)          # 8-byte Spill
-	mulxq	%rcx, %r8, %rcx
-	adcq	%rax, %r8
-	adcq	$0, %r11
-	addq	%r14, %rbx
-	adcq	%r12, %r10
-	adcq	%rdi, %r9
-	adcq	%rbp, %r8
-	adcq	%rcx, %r11
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	8(%rax), %rdx
-	mulxq	-24(%rsp), %rcx, %rsi   # 8-byte Folded Reload
-	mulxq	%r13, %r14, %rax
-	addq	%rcx, %rax
-	mulxq	-32(%rsp), %rcx, %rdi   # 8-byte Folded Reload
-	adcq	%rsi, %rcx
-	mulxq	-40(%rsp), %rsi, %r15   # 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-48(%rsp), %rdi, %rbp   # 8-byte Folded Reload
-	adcq	%r15, %rdi
-	adcq	$0, %rbp
-	addq	%rbx, %r14
-	adcq	%r10, %rax
-	adcq	%r9, %rcx
-	adcq	%r8, %rsi
-	adcq	%r11, %rdi
-	adcq	$0, %rbp
-	movq	%r14, %rdx
-	movq	-72(%rsp), %r12         # 8-byte Reload
-	imulq	%r12, %rdx
-	mulxq	-88(%rsp), %rbx, %r15   # 8-byte Folded Reload
-	addq	%r14, %rbx
-	movq	-80(%rsp), %r13         # 8-byte Reload
-	mulxq	%r13, %r8, %rbx
-	adcq	%rax, %r8
-	mulxq	-96(%rsp), %r9, %rax    # 8-byte Folded Reload
-	adcq	%rcx, %r9
-	mulxq	-56(%rsp), %r10, %rcx   # 8-byte Folded Reload
-	adcq	%rsi, %r10
-	mulxq	-8(%rsp), %r11, %rdx    # 8-byte Folded Reload
-	adcq	%rdi, %r11
-	adcq	$0, %rbp
-	addq	%r15, %r8
-	adcq	%rbx, %r9
-	adcq	%rax, %r10
-	adcq	%rcx, %r11
-	adcq	%rdx, %rbp
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	16(%rax), %rdx
-	mulxq	-24(%rsp), %rcx, %rax   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r14, %rsi   # 8-byte Folded Reload
-	addq	%rcx, %rsi
-	mulxq	-32(%rsp), %rbx, %rcx   # 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-40(%rsp), %rdi, %r15   # 8-byte Folded Reload
-	adcq	%rcx, %rdi
-	mulxq	-48(%rsp), %rcx, %rax   # 8-byte Folded Reload
-	adcq	%r15, %rcx
-	adcq	$0, %rax
-	addq	%r8, %r14
-	adcq	%r9, %rsi
-	adcq	%r10, %rbx
-	adcq	%r11, %rdi
-	adcq	%rbp, %rcx
-	adcq	$0, %rax
-	movq	%r14, %rdx
-	imulq	%r12, %rdx
-	movq	-88(%rsp), %r12         # 8-byte Reload
-	mulxq	%r12, %rbp, %r15
-	addq	%r14, %rbp
-	mulxq	%r13, %r8, %rbp
-	adcq	%rsi, %r8
-	movq	-96(%rsp), %r13         # 8-byte Reload
-	mulxq	%r13, %r9, %rsi
-	adcq	%rbx, %r9
-	mulxq	-56(%rsp), %r10, %rbx   # 8-byte Folded Reload
-	adcq	%rdi, %r10
-	mulxq	-8(%rsp), %r11, %rdx    # 8-byte Folded Reload
-	adcq	%rcx, %r11
-	adcq	$0, %rax
-	addq	%r15, %r8
-	adcq	%rbp, %r9
-	adcq	%rsi, %r10
-	adcq	%rbx, %r11
-	adcq	%rdx, %rax
-	movq	-16(%rsp), %rcx         # 8-byte Reload
-	movq	24(%rcx), %rdx
-	mulxq	-24(%rsp), %rdi, %rsi   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r14, %rcx   # 8-byte Folded Reload
-	addq	%rdi, %rcx
-	mulxq	-32(%rsp), %rbx, %rdi   # 8-byte Folded Reload
-	adcq	%rsi, %rbx
-	mulxq	-40(%rsp), %rsi, %r15   # 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-48(%rsp), %rdi, %rbp   # 8-byte Folded Reload
-	adcq	%r15, %rdi
-	adcq	$0, %rbp
-	addq	%r8, %r14
-	adcq	%r9, %rcx
-	adcq	%r10, %rbx
-	adcq	%r11, %rsi
-	adcq	%rax, %rdi
-	adcq	$0, %rbp
-	movq	%r14, %rdx
-	imulq	-72(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	%r12, %rax, %r11
-	addq	%r14, %rax
-	mulxq	-80(%rsp), %r8, %r14    # 8-byte Folded Reload
-	adcq	%rcx, %r8
-	mulxq	%r13, %r9, %rax
-	adcq	%rbx, %r9
-	movq	-56(%rsp), %r12         # 8-byte Reload
-	mulxq	%r12, %r10, %rbx
-	adcq	%rsi, %r10
-	mulxq	-8(%rsp), %rcx, %rdx    # 8-byte Folded Reload
-	adcq	%rdi, %rcx
-	adcq	$0, %rbp
-	addq	%r11, %r8
-	adcq	%r14, %r9
-	adcq	%rax, %r10
-	adcq	%rbx, %rcx
-	adcq	%rdx, %rbp
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	32(%rax), %rdx
-	mulxq	-24(%rsp), %rdi, %rbx   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r14, %rsi   # 8-byte Folded Reload
-	addq	%rdi, %rsi
-	mulxq	-32(%rsp), %rdi, %rax   # 8-byte Folded Reload
-	adcq	%rbx, %rdi
-	mulxq	-40(%rsp), %rbx, %r15   # 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-48(%rsp), %r11, %rax   # 8-byte Folded Reload
-	adcq	%r15, %r11
-	adcq	$0, %rax
-	addq	%r8, %r14
-	adcq	%r9, %rsi
-	adcq	%r10, %rdi
-	adcq	%rcx, %rbx
-	adcq	%rbp, %r11
-	adcq	$0, %rax
-	movq	-72(%rsp), %rdx         # 8-byte Reload
-	imulq	%r14, %rdx
-	movq	-88(%rsp), %r10         # 8-byte Reload
-	mulxq	%r10, %rcx, %rbp
-	movq	%rbp, -16(%rsp)         # 8-byte Spill
-	addq	%r14, %rcx
-	movq	-80(%rsp), %r9          # 8-byte Reload
-	mulxq	%r9, %r14, %rcx
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	adcq	%rsi, %r14
-	movq	%r13, %r8
-	mulxq	%r8, %r15, %r13
-	adcq	%rdi, %r15
-	mulxq	%r12, %rbp, %rcx
-	adcq	%rbx, %rbp
-	movq	-8(%rsp), %rbx          # 8-byte Reload
-	mulxq	%rbx, %r12, %rdx
-	adcq	%r11, %r12
-	adcq	$0, %rax
-	addq	-16(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-24(%rsp), %r15         # 8-byte Folded Reload
-	adcq	%r13, %rbp
-	adcq	%rcx, %r12
-	adcq	%rdx, %rax
-	movq	%r14, %rcx
-	subq	%r10, %rcx
-	movq	%r15, %rsi
-	sbbq	%r9, %rsi
-	movq	%rbp, %rdi
-	sbbq	%r8, %rdi
-	movq	%r12, %r8
-	sbbq	-56(%rsp), %r8          # 8-byte Folded Reload
-	movq	%rax, %rdx
-	sbbq	%rbx, %rdx
-	movq	%rdx, %rbx
-	sarq	$63, %rbx
-	cmovsq	%r14, %rcx
-	movq	-104(%rsp), %rbx        # 8-byte Reload
-	movq	%rcx, (%rbx)
-	cmovsq	%r15, %rsi
-	movq	%rsi, 8(%rbx)
-	cmovsq	%rbp, %rdi
-	movq	%rdi, 16(%rbx)
-	cmovsq	%r12, %r8
-	movq	%r8, 24(%rbx)
-	cmovsq	%rax, %rdx
-	movq	%rdx, 32(%rbx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end69:
-	.size	mcl_fp_montNF5Lbmi2, .Lfunc_end69-mcl_fp_montNF5Lbmi2
-
-	.globl	mcl_fp_montRed5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed5Lbmi2,@function
-mcl_fp_montRed5Lbmi2:                   # @mcl_fp_montRed5Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rdi, -80(%rsp)         # 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	(%rcx), %rbx
-	movq	%rbx, -8(%rsp)          # 8-byte Spill
-	movq	(%rsi), %r9
-	movq	%r9, %rdx
-	imulq	%rax, %rdx
-	movq	%rax, %r15
-	movq	32(%rcx), %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r8, %r13
-	movq	24(%rcx), %r12
-	movq	%r12, -32(%rsp)         # 8-byte Spill
-	mulxq	%r12, %r10, %r14
-	movq	16(%rcx), %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rdi, %rbp
-	mulxq	%rcx, %rax, %r11
-	mulxq	%rbx, %rdx, %rcx
-	addq	%rax, %rcx
-	adcq	%rdi, %r11
-	adcq	%r10, %rbp
-	adcq	%r8, %r14
-	adcq	$0, %r13
-	addq	%r9, %rdx
-	movq	72(%rsi), %rax
-	movq	64(%rsi), %rdx
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r11
-	adcq	24(%rsi), %rbp
-	adcq	32(%rsi), %r14
-	adcq	40(%rsi), %r13
-	movq	56(%rsi), %rdi
-	movq	48(%rsi), %rsi
-	adcq	$0, %rsi
-	movq	%rsi, -88(%rsp)         # 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -72(%rsp)         # 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rcx, %rdx
-	imulq	%r15, %rdx
-	mulxq	-40(%rsp), %rax, %r15   # 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	mulxq	%r12, %rax, %r10
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	-48(%rsp), %r12         # 8-byte Reload
-	mulxq	%r12, %rbx, %r8
-	mulxq	-16(%rsp), %r9, %rdi    # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rdx, %rax    # 8-byte Folded Reload
-	addq	%r9, %rax
-	adcq	%rbx, %rdi
-	adcq	-104(%rsp), %r8         # 8-byte Folded Reload
-	adcq	-96(%rsp), %r10         # 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	%rcx, %rdx
-	adcq	%r11, %rax
-	adcq	%rbp, %rdi
-	adcq	%r14, %r8
-	adcq	%r13, %r10
-	adcq	-88(%rsp), %r15         # 8-byte Folded Reload
-	adcq	$0, -72(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -56(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -24(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rsi
-	movq	%rax, %rdx
-	imulq	-64(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-40(%rsp), %rcx, %r13   # 8-byte Folded Reload
-	movq	%rcx, -88(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rcx, %r14   # 8-byte Folded Reload
-	movq	%rcx, -96(%rsp)         # 8-byte Spill
-	mulxq	%r12, %r11, %rbx
-	mulxq	-16(%rsp), %r9, %rbp    # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rdx, %rcx    # 8-byte Folded Reload
-	addq	%r9, %rcx
-	adcq	%r11, %rbp
-	adcq	-96(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-88(%rsp), %r14         # 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%rax, %rdx
-	adcq	%rdi, %rcx
-	adcq	%r8, %rbp
-	adcq	%r10, %rbx
-	adcq	%r15, %r14
-	adcq	-72(%rsp), %r13         # 8-byte Folded Reload
-	adcq	$0, -56(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -24(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rsi
-	movq	%rcx, %rdx
-	imulq	-64(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-40(%rsp), %r9          # 8-byte Reload
-	mulxq	%r9, %rax, %r12
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rax, %r10   # 8-byte Folded Reload
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	mulxq	-48(%rsp), %r8, %r11    # 8-byte Folded Reload
-	mulxq	-16(%rsp), %rdi, %r15   # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rdx, %rax    # 8-byte Folded Reload
-	addq	%rdi, %rax
-	adcq	%r8, %r15
-	adcq	-88(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-72(%rsp), %r10         # 8-byte Folded Reload
-	adcq	$0, %r12
-	addq	%rcx, %rdx
-	adcq	%rbp, %rax
-	adcq	%rbx, %r15
-	adcq	%r14, %r11
-	adcq	%r13, %r10
-	adcq	-56(%rsp), %r12         # 8-byte Folded Reload
-	adcq	$0, -24(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rsi
-	movq	-64(%rsp), %rdx         # 8-byte Reload
-	imulq	%rax, %rdx
-	mulxq	%r9, %rdi, %rcx
-	movq	%rdi, -56(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rbp, %rdi   # 8-byte Folded Reload
-	movq	%rbp, -64(%rsp)         # 8-byte Spill
-	mulxq	-48(%rsp), %r13, %rbp   # 8-byte Folded Reload
-	mulxq	-8(%rsp), %r8, %r9      # 8-byte Folded Reload
-	movq	-16(%rsp), %r14         # 8-byte Reload
-	mulxq	%r14, %rbx, %rdx
-	addq	%r9, %rbx
-	adcq	%r13, %rdx
-	adcq	-64(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	$0, %rcx
-	addq	%rax, %r8
-	adcq	%r15, %rbx
-	adcq	%r11, %rdx
-	adcq	%r10, %rbp
-	adcq	%r12, %rdi
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	$0, %rsi
-	movq	%rbx, %rax
-	subq	-8(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rdx, %r8
-	sbbq	%r14, %r8
-	movq	%rbp, %r9
-	sbbq	-48(%rsp), %r9          # 8-byte Folded Reload
-	movq	%rdi, %r10
-	sbbq	-32(%rsp), %r10         # 8-byte Folded Reload
-	movq	%rcx, %r11
-	sbbq	-40(%rsp), %r11         # 8-byte Folded Reload
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rcx, %r11
-	testb	%sil, %sil
-	cmovneq	%rbx, %rax
-	movq	-80(%rsp), %rcx         # 8-byte Reload
-	movq	%rax, (%rcx)
-	cmovneq	%rdx, %r8
-	movq	%r8, 8(%rcx)
-	cmovneq	%rbp, %r9
-	movq	%r9, 16(%rcx)
-	cmovneq	%rdi, %r10
-	movq	%r10, 24(%rcx)
-	movq	%r11, 32(%rcx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end70:
-	.size	mcl_fp_montRed5Lbmi2, .Lfunc_end70-mcl_fp_montRed5Lbmi2
-
-	.globl	mcl_fp_addPre5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre5Lbmi2,@function
-mcl_fp_addPre5Lbmi2:                    # @mcl_fp_addPre5Lbmi2
-# BB#0:
-	movq	32(%rdx), %r8
-	movq	24(%rdx), %r9
-	movq	24(%rsi), %r11
-	movq	32(%rsi), %r10
-	movq	16(%rdx), %rcx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rcx
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	adcq	%r9, %r11
-	movq	%r11, 24(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end71:
-	.size	mcl_fp_addPre5Lbmi2, .Lfunc_end71-mcl_fp_addPre5Lbmi2
-
-	.globl	mcl_fp_subPre5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre5Lbmi2,@function
-mcl_fp_subPre5Lbmi2:                    # @mcl_fp_subPre5Lbmi2
-# BB#0:
-	pushq	%rbx
-	movq	32(%rsi), %r10
-	movq	24(%rdx), %r8
-	movq	32(%rdx), %r9
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %rcx
-	movq	%rbx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r8, %r11
-	movq	%r11, 24(%rdi)
-	sbbq	%r9, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	retq
-.Lfunc_end72:
-	.size	mcl_fp_subPre5Lbmi2, .Lfunc_end72-mcl_fp_subPre5Lbmi2
-
-	.globl	mcl_fp_shr1_5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_5Lbmi2,@function
-mcl_fp_shr1_5Lbmi2:                     # @mcl_fp_shr1_5Lbmi2
-# BB#0:
-	movq	32(%rsi), %r8
-	movq	24(%rsi), %rcx
-	movq	16(%rsi), %rdx
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rax
-	movq	%rax, (%rdi)
-	shrdq	$1, %rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 16(%rdi)
-	shrdq	$1, %r8, %rcx
-	movq	%rcx, 24(%rdi)
-	shrq	%r8
-	movq	%r8, 32(%rdi)
-	retq
-.Lfunc_end73:
-	.size	mcl_fp_shr1_5Lbmi2, .Lfunc_end73-mcl_fp_shr1_5Lbmi2
-
-	.globl	mcl_fp_add5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add5Lbmi2,@function
-mcl_fp_add5Lbmi2:                       # @mcl_fp_add5Lbmi2
-# BB#0:
-	pushq	%rbx
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %rbx
-	movq	24(%rsi), %r9
-	movq	32(%rsi), %r8
-	movq	16(%rdx), %r10
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r10
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	adcq	%rbx, %r9
-	movq	%r9, 24(%rdi)
-	adcq	%r11, %r8
-	movq	%r8, 32(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r10
-	sbbq	24(%rcx), %r9
-	sbbq	32(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB74_2
-# BB#1:                                 # %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%r9, 24(%rdi)
-	movq	%r8, 32(%rdi)
-.LBB74_2:                               # %carry
-	popq	%rbx
-	retq
-.Lfunc_end74:
-	.size	mcl_fp_add5Lbmi2, .Lfunc_end74-mcl_fp_add5Lbmi2
-
-	.globl	mcl_fp_addNF5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF5Lbmi2,@function
-mcl_fp_addNF5Lbmi2:                     # @mcl_fp_addNF5Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	32(%rdx), %r8
-	movq	24(%rdx), %r9
-	movq	16(%rdx), %r10
-	movq	(%rdx), %r14
-	movq	8(%rdx), %r11
-	addq	(%rsi), %r14
-	adcq	8(%rsi), %r11
-	adcq	16(%rsi), %r10
-	adcq	24(%rsi), %r9
-	adcq	32(%rsi), %r8
-	movq	%r14, %rsi
-	subq	(%rcx), %rsi
-	movq	%r11, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r10, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r9, %r15
-	sbbq	24(%rcx), %r15
-	movq	%r8, %rax
-	sbbq	32(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r14, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r11, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
-	cmovsq	%r9, %r15
-	movq	%r15, 24(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 32(%rdi)
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end75:
-	.size	mcl_fp_addNF5Lbmi2, .Lfunc_end75-mcl_fp_addNF5Lbmi2
-
-	.globl	mcl_fp_sub5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub5Lbmi2,@function
-mcl_fp_sub5Lbmi2:                       # @mcl_fp_sub5Lbmi2
-# BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	32(%rsi), %r8
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
-	movq	%rax, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	sbbq	%r11, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 32(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	.LBB76_2
-# BB#1:                                 # %carry
-	movq	32(%rcx), %r11
-	movq	24(%rcx), %r14
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rbx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%rsi, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 24(%rdi)
-	adcq	%r8, %r11
-	movq	%r11, 32(%rdi)
-.LBB76_2:                               # %nocarry
-	popq	%rbx
-	popq	%r14
-	retq
-.Lfunc_end76:
-	.size	mcl_fp_sub5Lbmi2, .Lfunc_end76-mcl_fp_sub5Lbmi2
-
-	.globl	mcl_fp_subNF5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF5Lbmi2,@function
-mcl_fp_subNF5Lbmi2:                     # @mcl_fp_subNF5Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	32(%rsi), %r11
-	movq	24(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	(%rsi), %r10
-	movq	8(%rsi), %r14
-	subq	(%rdx), %r10
-	sbbq	8(%rdx), %r14
-	sbbq	16(%rdx), %r9
-	sbbq	24(%rdx), %r8
-	sbbq	32(%rdx), %r11
-	movq	%r11, %rax
-	sarq	$63, %rax
-	movq	%rax, %rdx
-	shldq	$1, %r11, %rdx
-	movq	8(%rcx), %rbx
-	andq	%rdx, %rbx
-	andq	(%rcx), %rdx
-	movq	32(%rcx), %r15
-	andq	%rax, %r15
-	rorxq	$63, %rax, %rsi
-	andq	24(%rcx), %rax
-	andq	16(%rcx), %rsi
-	addq	%r10, %rdx
-	movq	%rdx, (%rdi)
-	adcq	%r14, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r9, %rsi
-	movq	%rsi, 16(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r11, %r15
-	movq	%r15, 32(%rdi)
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end77:
-	.size	mcl_fp_subNF5Lbmi2, .Lfunc_end77-mcl_fp_subNF5Lbmi2
-
-	.globl	mcl_fpDbl_add5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add5Lbmi2,@function
-mcl_fpDbl_add5Lbmi2:                    # @mcl_fpDbl_add5Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	72(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	64(%rdx), %r11
-	movq	56(%rdx), %r14
-	movq	48(%rdx), %r15
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %r13
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %rbp
-	adcq	32(%rdx), %r13
-	movq	40(%rdx), %r9
-	movq	%rbx, (%rdi)
-	movq	72(%rsi), %r8
-	movq	%rax, 8(%rdi)
-	movq	64(%rsi), %r10
-	movq	%r12, 16(%rdi)
-	movq	56(%rsi), %r12
-	movq	%rbp, 24(%rdi)
-	movq	48(%rsi), %rbp
-	movq	40(%rsi), %rbx
-	movq	%r13, 32(%rdi)
-	adcq	%r9, %rbx
-	adcq	%r15, %rbp
-	adcq	%r14, %r12
-	adcq	%r11, %r10
-	adcq	-8(%rsp), %r8           # 8-byte Folded Reload
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rbx, %rax
-	subq	(%rcx), %rax
-	movq	%rbp, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r12, %r9
-	sbbq	16(%rcx), %r9
-	movq	%r10, %r11
-	sbbq	24(%rcx), %r11
-	movq	%r8, %r14
-	sbbq	32(%rcx), %r14
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rbx, %rax
-	movq	%rax, 40(%rdi)
-	testb	%sil, %sil
-	cmovneq	%rbp, %rdx
-	movq	%rdx, 48(%rdi)
-	cmovneq	%r12, %r9
-	movq	%r9, 56(%rdi)
-	cmovneq	%r10, %r11
-	movq	%r11, 64(%rdi)
-	cmovneq	%r8, %r14
-	movq	%r14, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end78:
-	.size	mcl_fpDbl_add5Lbmi2, .Lfunc_end78-mcl_fpDbl_add5Lbmi2
-
-	.globl	mcl_fpDbl_sub5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub5Lbmi2,@function
-mcl_fpDbl_sub5Lbmi2:                    # @mcl_fpDbl_sub5Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	72(%rdx), %r9
-	movq	64(%rdx), %r10
-	movq	56(%rdx), %r14
-	movq	16(%rsi), %r8
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%eax, %eax
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r8
-	movq	24(%rsi), %r12
-	sbbq	24(%rdx), %r12
-	movq	%r15, (%rdi)
-	movq	32(%rsi), %rbx
-	sbbq	32(%rdx), %rbx
-	movq	%r11, 8(%rdi)
-	movq	48(%rdx), %r15
-	movq	40(%rdx), %rdx
-	movq	%r8, 16(%rdi)
-	movq	72(%rsi), %r8
-	movq	%r12, 24(%rdi)
-	movq	64(%rsi), %r11
-	movq	%rbx, 32(%rdi)
-	movq	40(%rsi), %rbp
-	sbbq	%rdx, %rbp
-	movq	56(%rsi), %r12
-	movq	48(%rsi), %r13
-	sbbq	%r15, %r13
-	sbbq	%r14, %r12
-	sbbq	%r10, %r11
-	sbbq	%r9, %r8
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	movq	16(%rcx), %rdx
-	cmoveq	%rax, %rdx
-	movq	8(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	movq	32(%rcx), %r9
-	cmoveq	%rax, %r9
-	cmovneq	24(%rcx), %rax
-	addq	%rbp, %rsi
-	movq	%rsi, 40(%rdi)
-	adcq	%r13, %rbx
-	movq	%rbx, 48(%rdi)
-	adcq	%r12, %rdx
-	movq	%rdx, 56(%rdi)
-	adcq	%r11, %rax
-	movq	%rax, 64(%rdi)
-	adcq	%r8, %r9
-	movq	%r9, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end79:
-	.size	mcl_fpDbl_sub5Lbmi2, .Lfunc_end79-mcl_fpDbl_sub5Lbmi2
-
-	.globl	mcl_fp_mulUnitPre6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre6Lbmi2,@function
-mcl_fp_mulUnitPre6Lbmi2:                # @mcl_fp_mulUnitPre6Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	mulxq	40(%rsi), %r8, %r11
-	mulxq	32(%rsi), %r9, %r12
-	mulxq	24(%rsi), %r10, %rcx
-	mulxq	16(%rsi), %r14, %rbx
-	mulxq	8(%rsi), %r15, %rax
-	mulxq	(%rsi), %rdx, %rsi
-	movq	%rdx, (%rdi)
-	addq	%r15, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r14, %rax
-	movq	%rax, 16(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%r9, %rcx
-	movq	%rcx, 32(%rdi)
-	adcq	%r8, %r12
-	movq	%r12, 40(%rdi)
-	adcq	$0, %r11
-	movq	%r11, 48(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end80:
-	.size	mcl_fp_mulUnitPre6Lbmi2, .Lfunc_end80-mcl_fp_mulUnitPre6Lbmi2
-
-	.globl	mcl_fpDbl_mulPre6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre6Lbmi2,@function
-mcl_fpDbl_mulPre6Lbmi2:                 # @mcl_fpDbl_mulPre6Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %r11
-	movq	%r11, -16(%rsp)         # 8-byte Spill
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	(%rsi), %r15
-	movq	8(%rsi), %rcx
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	movq	(%r11), %rax
-	movq	%rcx, %rdx
-	mulxq	%rax, %rcx, %r14
-	movq	%r15, %rdx
-	mulxq	%rax, %rdx, %rbp
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %rbx
-	movq	%rbx, -32(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	addq	%rcx, %rbp
-	mulxq	%rax, %rcx, %r12
-	adcq	%r14, %rcx
-	movq	%rbx, %rdx
-	mulxq	%rax, %rbx, %r14
-	adcq	%r12, %rbx
-	movq	32(%rsi), %r12
-	movq	%r12, %rdx
-	mulxq	%rax, %r8, %r13
-	adcq	%r14, %r8
-	movq	40(%rsi), %r14
-	movq	%r14, %rdx
-	mulxq	%rax, %r9, %r10
-	adcq	%r13, %r9
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	movq	%rax, (%rdi)
-	adcq	$0, %r10
-	movq	8(%r11), %rdi
-	movq	%r15, %rdx
-	mulxq	%rdi, %r13, %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	addq	%rbp, %r13
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rbp, %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	adcq	%rcx, %rbp
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rax, %r11
-	adcq	%rbx, %rax
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rbx, %rcx
-	movq	%rcx, -32(%rsp)         # 8-byte Spill
-	adcq	%r8, %rbx
-	movq	%r12, %rdx
-	mulxq	%rdi, %rcx, %r8
-	adcq	%r9, %rcx
-	movq	%r14, %rdx
-	mulxq	%rdi, %r12, %rdx
-	adcq	%r10, %r12
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	addq	-48(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-24(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%r11, %rbx
-	movq	-8(%rsp), %rdi          # 8-byte Reload
-	movq	%r13, 8(%rdi)
-	adcq	-32(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r8, %r12
-	adcq	%rdx, %r15
-	movq	(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %r8
-	movq	%r8, -24(%rsp)          # 8-byte Spill
-	movq	-16(%rsp), %r14         # 8-byte Reload
-	movq	16(%r14), %rdi
-	mulxq	%rdi, %r13, %rdx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	addq	%rbp, %r13
-	movq	%r8, %rdx
-	mulxq	%rdi, %r8, %rdx
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	adcq	%rax, %r8
-	movq	16(%rsi), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %r11, %rax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	adcq	%rbx, %r11
-	movq	24(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %rax, %rbx
-	adcq	%rcx, %rax
-	movq	32(%rsi), %rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %r10, %rcx
-	adcq	%r12, %r10
-	movq	40(%rsi), %rdx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %r9, %rdx
-	adcq	%r15, %r9
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	addq	-72(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-80(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-88(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%rbx, %r10
-	adcq	%rcx, %r9
-	adcq	%rdx, %rbp
-	movq	-8(%rsp), %rcx          # 8-byte Reload
-	movq	%r13, 16(%rcx)
-	movq	24(%r14), %rdi
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r12, %rcx
-	movq	%rcx, -32(%rsp)         # 8-byte Spill
-	addq	%r8, %r12
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rbx, %rcx
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	adcq	%r11, %rbx
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rcx, %r11
-	adcq	%rax, %rcx
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r14, %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	adcq	%r10, %r14
-	movq	-56(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r8, %rax
-	adcq	%r9, %r8
-	movq	-64(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r13, %rdx
-	adcq	%rbp, %r13
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	addq	-32(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r11, %r14
-	movq	-8(%rsp), %rdi          # 8-byte Reload
-	movq	%r12, 24(%rdi)
-	adcq	-40(%rsp), %r8          # 8-byte Folded Reload
-	adcq	%rax, %r13
-	adcq	%rdx, %r15
-	movq	(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rbp
-	movq	%rbp, -24(%rsp)         # 8-byte Spill
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	32(%rax), %rdi
-	mulxq	%rdi, %r12, %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	addq	%rbx, %r12
-	movq	%rbp, %rdx
-	mulxq	%rdi, %rbx, %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	adcq	%rcx, %rbx
-	movq	16(%rsi), %r11
-	movq	%r11, %rdx
-	mulxq	%rdi, %rax, %rcx
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	adcq	%r14, %rax
-	movq	24(%rsi), %r14
-	movq	%r14, %rdx
-	mulxq	%rdi, %rbp, %rcx
-	movq	%rcx, -64(%rsp)         # 8-byte Spill
-	adcq	%r8, %rbp
-	movq	32(%rsi), %r8
-	movq	%r8, %rdx
-	mulxq	%rdi, %rcx, %r10
-	adcq	%r13, %rcx
-	movq	40(%rsi), %r13
-	movq	%r13, %rdx
-	mulxq	%rdi, %r9, %rdx
-	adcq	%r15, %r9
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	-40(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rax         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-64(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r10, %r9
-	adcq	%rdx, %rsi
-	movq	-8(%rsp), %r10          # 8-byte Reload
-	movq	%r12, 32(%r10)
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	movq	40(%rdx), %rdi
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r15, %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	addq	%rbx, %r15
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rbx, %r12
-	adcq	%rax, %rbx
-	movq	%r11, %rdx
-	mulxq	%rdi, %rax, %r11
-	adcq	%rbp, %rax
-	movq	%r14, %rdx
-	mulxq	%rdi, %rbp, %r14
-	adcq	%rcx, %rbp
-	movq	%r8, %rdx
-	mulxq	%rdi, %rcx, %r8
-	adcq	%r9, %rcx
-	movq	%r13, %rdx
-	mulxq	%rdi, %rdi, %r9
-	adcq	%rsi, %rdi
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	-16(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%r15, 40(%r10)
-	movq	%rbx, 48(%r10)
-	adcq	%r12, %rax
-	movq	%rax, 56(%r10)
-	adcq	%r11, %rbp
-	movq	%rbp, 64(%r10)
-	adcq	%r14, %rcx
-	movq	%rcx, 72(%r10)
-	adcq	%r8, %rdi
-	movq	%rdi, 80(%r10)
-	adcq	%r9, %rsi
-	movq	%rsi, 88(%r10)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end81:
-	.size	mcl_fpDbl_mulPre6Lbmi2, .Lfunc_end81-mcl_fpDbl_mulPre6Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre6Lbmi2,@function
-mcl_fpDbl_sqrPre6Lbmi2:                 # @mcl_fpDbl_sqrPre6Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdi, %r9
-	movq	%r9, -8(%rsp)           # 8-byte Spill
-	movq	16(%rsi), %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rax
-	mulxq	%rcx, %r10, %r8
-	movq	24(%rsi), %rbp
-	movq	%rbp, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %rdx
-	mulxq	%rcx, %r11, %rbx
-	movq	%rbx, -16(%rsp)         # 8-byte Spill
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rdx, %r14
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	addq	%r11, %r14
-	adcq	%rbx, %r10
-	movq	%rbp, %rdx
-	mulxq	%rcx, %r15, %rbp
-	adcq	%r8, %r15
-	movq	32(%rsi), %rbx
-	movq	%rbx, %rdx
-	mulxq	%rcx, %r8, %r13
-	adcq	%rbp, %r8
-	movq	40(%rsi), %rdi
-	movq	%rdi, %rdx
-	mulxq	%rcx, %rcx, %r12
-	adcq	%r13, %rcx
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	movq	%rdx, (%r9)
-	adcq	$0, %r12
-	addq	%r11, %r14
-	movq	%rax, %rdx
-	mulxq	%rax, %rbp, %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	adcq	%r10, %rbp
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r13, %r10
-	adcq	%r15, %r13
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r15, %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	adcq	%r8, %r15
-	movq	%rbx, %rdx
-	mulxq	%rax, %rbx, %r8
-	adcq	%rcx, %rbx
-	movq	%rdi, %rdx
-	mulxq	%rax, %r11, %rax
-	adcq	%r12, %r11
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-16(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-40(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r14, 8(%r9)
-	adcq	%r10, %r15
-	adcq	-24(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	%r8, %r11
-	adcq	%rax, %r12
-	movq	(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rcx
-	mulxq	%rcx, %rax, %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	addq	%rbp, %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rdi, %rdx
-	mulxq	%rcx, %rbp, %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	adcq	%r13, %rbp
-	movq	%rcx, %rdx
-	mulxq	%rcx, %r13, %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	adcq	%r15, %r13
-	movq	24(%rsi), %rax
-	movq	%rax, %rdx
-	mulxq	%rcx, %r8, %rdi
-	movq	%rdi, -40(%rsp)         # 8-byte Spill
-	adcq	%r8, %rbx
-	movq	32(%rsi), %r10
-	movq	%r10, %rdx
-	mulxq	%rcx, %r14, %r15
-	adcq	%r11, %r14
-	movq	40(%rsi), %r11
-	movq	%r11, %rdx
-	mulxq	%rcx, %r9, %rdx
-	adcq	%r12, %r9
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	-48(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-64(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	%rdi, %r14
-	adcq	%r15, %r9
-	adcq	%rdx, %rcx
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %rdi, %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	addq	%rbp, %rdi
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r15, %rbp
-	adcq	%r13, %r15
-	adcq	%r8, %rbx
-	movq	%rax, %rdx
-	mulxq	%rax, %r8, %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	adcq	%r14, %r8
-	movq	%r10, %rdx
-	mulxq	%rax, %r12, %r10
-	adcq	%r9, %r12
-	movq	%r11, %rdx
-	mulxq	%rax, %r13, %rax
-	adcq	%rcx, %r13
-	sbbq	%r9, %r9
-	andl	$1, %r9d
-	addq	-32(%rsp), %r15         # 8-byte Folded Reload
-	adcq	%rbp, %rbx
-	movq	-8(%rsp), %rdx          # 8-byte Reload
-	movq	-16(%rsp), %rbp         # 8-byte Reload
-	movq	%rbp, 16(%rdx)
-	movq	%rdi, 24(%rdx)
-	adcq	-40(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-24(%rsp), %r12         # 8-byte Folded Reload
-	adcq	%r10, %r13
-	adcq	%rax, %r9
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %rax
-	movq	%rcx, %rdx
-	mulxq	%rax, %rdx, %rbp
-	movq	%rbp, -40(%rsp)         # 8-byte Spill
-	addq	%r15, %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rdi, %rdx
-	mulxq	%rax, %r15, %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	adcq	%rbx, %r15
-	movq	16(%rsi), %r10
-	movq	%r10, %rdx
-	mulxq	%rax, %r14, %rbx
-	adcq	%r8, %r14
-	movq	24(%rsi), %r8
-	movq	%r8, %rdx
-	mulxq	%rax, %rbp, %rdi
-	adcq	%r12, %rbp
-	movq	%rax, %rdx
-	mulxq	%rax, %r11, %r12
-	adcq	%r13, %r11
-	movq	40(%rsi), %rsi
-	movq	%rsi, %rdx
-	mulxq	%rax, %r13, %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	adcq	%r13, %r9
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	-40(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r14         # 8-byte Folded Reload
-	adcq	%rbx, %rbp
-	adcq	%rdi, %r11
-	adcq	%r12, %r9
-	adcq	%rdx, %rax
-	movq	%rcx, %rdx
-	mulxq	%rsi, %r12, %rcx
-	addq	%r15, %r12
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rsi, %rdi, %r15
-	adcq	%r14, %rdi
-	movq	%r10, %rdx
-	mulxq	%rsi, %rbx, %r10
-	adcq	%rbp, %rbx
-	movq	%r8, %rdx
-	mulxq	%rsi, %rbp, %r8
-	adcq	%r11, %rbp
-	adcq	%r13, %r9
-	movq	%rsi, %rdx
-	mulxq	%rsi, %rsi, %r11
-	adcq	%rax, %rsi
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rcx, %rdi
-	movq	-8(%rsp), %rdx          # 8-byte Reload
-	movq	-16(%rsp), %rcx         # 8-byte Reload
-	movq	%rcx, 32(%rdx)
-	movq	%r12, 40(%rdx)
-	movq	%rdi, 48(%rdx)
-	adcq	%r15, %rbx
-	movq	%rbx, 56(%rdx)
-	adcq	%r10, %rbp
-	movq	%rbp, 64(%rdx)
-	adcq	%r8, %r9
-	movq	%r9, 72(%rdx)
-	adcq	-32(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 80(%rdx)
-	adcq	%r11, %rax
-	movq	%rax, 88(%rdx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end82:
-	.size	mcl_fpDbl_sqrPre6Lbmi2, .Lfunc_end82-mcl_fpDbl_sqrPre6Lbmi2
-
-	.globl	mcl_fp_mont6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont6Lbmi2,@function
-mcl_fp_mont6Lbmi2:                      # @mcl_fp_mont6Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$32, %rsp
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	movq	40(%rsi), %rdi
-	movq	%rdi, -40(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rax
-	movq	%rdi, %rdx
-	mulxq	%rax, %r11, %r14
-	movq	32(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r15, %rbx
-	movq	24(%rsi), %rdx
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rdi
-	movq	%rdi, -72(%rsp)         # 8-byte Spill
-	movq	(%rsi), %rbp
-	movq	%rbp, -56(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rsi
-	movq	%rsi, -64(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r8, %r12
-	movq	%rdi, %rdx
-	mulxq	%rax, %r9, %r10
-	movq	%rsi, %rdx
-	mulxq	%rax, %rdi, %r13
-	movq	%rbp, %rdx
-	mulxq	%rax, %rdx, %rbp
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	addq	%rdi, %rbp
-	adcq	%r9, %r13
-	adcq	%r8, %r10
-	adcq	%r15, %r12
-	adcq	%r11, %rbx
-	movq	%rbx, %rdi
-	adcq	$0, %r14
-	movq	%r14, -88(%rsp)         # 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	imulq	%rax, %rdx
-	movq	(%rcx), %rsi
-	movq	%rsi, (%rsp)            # 8-byte Spill
-	movq	40(%rcx), %rax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	mulxq	%rax, %rax, %r9
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	16(%rcx), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	mulxq	%rax, %r8, %r11
-	movq	8(%rcx), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	mulxq	%rax, %rax, %r14
-	mulxq	%rsi, %r15, %rsi
-	addq	%rax, %rsi
-	adcq	%r8, %r14
-	movq	24(%rcx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	mulxq	%rax, %rbx, %r8
-	adcq	%r11, %rbx
-	movq	32(%rcx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rax, %rcx
-	adcq	%r8, %rax
-	adcq	-112(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	-96(%rsp), %r15         # 8-byte Folded Reload
-	adcq	%rbp, %rsi
-	adcq	%r13, %r14
-	adcq	%r10, %rbx
-	adcq	%r12, %rax
-	adcq	%rdi, %rcx
-	adcq	-88(%rsp), %r9          # 8-byte Folded Reload
-	movq	%r9, -96(%rsp)          # 8-byte Spill
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	movq	8(%rdx), %rdx
-	mulxq	-40(%rsp), %rdi, %rbp   # 8-byte Folded Reload
-	movq	%rdi, -112(%rsp)        # 8-byte Spill
-	movq	%rbp, -88(%rsp)         # 8-byte Spill
-	mulxq	-48(%rsp), %rdi, %r13   # 8-byte Folded Reload
-	movq	%rdi, -120(%rsp)        # 8-byte Spill
-	mulxq	-80(%rsp), %rdi, %r15   # 8-byte Folded Reload
-	movq	%rdi, -128(%rsp)        # 8-byte Spill
-	mulxq	-64(%rsp), %r8, %rdi    # 8-byte Folded Reload
-	mulxq	-56(%rsp), %rbp, %r10   # 8-byte Folded Reload
-	addq	%r8, %r10
-	mulxq	-72(%rsp), %r9, %r11    # 8-byte Folded Reload
-	adcq	%rdi, %r9
-	adcq	-128(%rsp), %r11        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        # 8-byte Folded Reload
-	movq	-88(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%rsi, %rbp
-	adcq	%r14, %r10
-	adcq	%rbx, %r9
-	adcq	%rax, %r11
-	adcq	%rcx, %r15
-	adcq	-96(%rsp), %r13         # 8-byte Folded Reload
-	adcq	%r12, %rdx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rbp, %rdx
-	imulq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	24(%rsp), %rax, %r12    # 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	mulxq	-16(%rsp), %rax, %r14   # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	mulxq	8(%rsp), %rcx, %rsi     # 8-byte Folded Reload
-	mulxq	(%rsp), %rax, %r8       # 8-byte Folded Reload
-	addq	%rcx, %r8
-	mulxq	16(%rsp), %rdi, %rbx    # 8-byte Folded Reload
-	adcq	%rsi, %rdi
-	mulxq	-8(%rsp), %rcx, %rsi    # 8-byte Folded Reload
-	adcq	%rbx, %rcx
-	adcq	-120(%rsp), %rsi        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r14        # 8-byte Folded Reload
-	adcq	$0, %r12
-	addq	%rbp, %rax
-	adcq	%r10, %r8
-	adcq	%r9, %rdi
-	adcq	%r11, %rcx
-	adcq	%r15, %rsi
-	adcq	%r13, %r14
-	adcq	-88(%rsp), %r12         # 8-byte Folded Reload
-	adcq	$0, -96(%rsp)           # 8-byte Folded Spill
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	movq	16(%rdx), %rdx
-	mulxq	-40(%rsp), %rbp, %rax   # 8-byte Folded Reload
-	movq	%rbp, -112(%rsp)        # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	mulxq	-48(%rsp), %rax, %r13   # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	mulxq	-80(%rsp), %rbp, %r15   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r9, %rbx    # 8-byte Folded Reload
-	mulxq	-56(%rsp), %rax, %r11   # 8-byte Folded Reload
-	addq	%r9, %r11
-	mulxq	-72(%rsp), %r9, %r10    # 8-byte Folded Reload
-	adcq	%rbx, %r9
-	adcq	%rbp, %r10
-	adcq	-120(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        # 8-byte Folded Reload
-	movq	-88(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r8, %rax
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %rbp
-	adcq	%rdi, %r11
-	adcq	%rcx, %r9
-	adcq	%rsi, %r10
-	adcq	%r14, %r15
-	adcq	%r12, %r13
-	adcq	-96(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rbp, %rdx
-	imulq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	24(%rsp), %rax, %r8     # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	mulxq	-16(%rsp), %r12, %r14   # 8-byte Folded Reload
-	mulxq	8(%rsp), %rcx, %rsi     # 8-byte Folded Reload
-	mulxq	(%rsp), %rax, %rbx      # 8-byte Folded Reload
-	addq	%rcx, %rbx
-	mulxq	16(%rsp), %rbp, %rdi    # 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	mulxq	-8(%rsp), %rcx, %rsi    # 8-byte Folded Reload
-	adcq	%rdi, %rcx
-	adcq	%r12, %rsi
-	adcq	-120(%rsp), %r14        # 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	-112(%rsp), %rax        # 8-byte Folded Reload
-	adcq	%r11, %rbx
-	adcq	%r9, %rbp
-	adcq	%r10, %rcx
-	adcq	%r15, %rsi
-	adcq	%r13, %r14
-	adcq	-88(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, -112(%rsp)         # 8-byte Spill
-	movq	-96(%rsp), %r13         # 8-byte Reload
-	adcq	$0, %r13
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	movq	24(%rdx), %rdx
-	mulxq	-40(%rsp), %rdi, %rax   # 8-byte Folded Reload
-	movq	%rdi, -96(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	mulxq	-48(%rsp), %rdi, %rax   # 8-byte Folded Reload
-	movq	%rdi, -120(%rsp)        # 8-byte Spill
-	mulxq	-80(%rsp), %r15, %r12   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r8, %r11    # 8-byte Folded Reload
-	mulxq	-56(%rsp), %r10, %rdi   # 8-byte Folded Reload
-	addq	%r8, %rdi
-	mulxq	-72(%rsp), %r8, %r9     # 8-byte Folded Reload
-	adcq	%r11, %r8
-	adcq	%r15, %r9
-	adcq	-120(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-96(%rsp), %rax         # 8-byte Folded Reload
-	movq	-88(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%rbx, %r10
-	adcq	%rbp, %rdi
-	adcq	%rcx, %r8
-	adcq	%rsi, %r9
-	adcq	%r14, %r12
-	adcq	-112(%rsp), %rax        # 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	adcq	%r13, %rdx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, %r14
-	movq	%r10, %rdx
-	imulq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	24(%rsp), %rax, %r13    # 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	mulxq	-16(%rsp), %rax, %r11   # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	mulxq	8(%rsp), %rbp, %rsi     # 8-byte Folded Reload
-	mulxq	(%rsp), %rcx, %rbx      # 8-byte Folded Reload
-	addq	%rbp, %rbx
-	mulxq	16(%rsp), %rbp, %rax    # 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	mulxq	-8(%rsp), %rsi, %r15    # 8-byte Folded Reload
-	adcq	%rax, %rsi
-	adcq	-120(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r11        # 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%r10, %rcx
-	adcq	%rdi, %rbx
-	adcq	%r8, %rbp
-	adcq	%r9, %rsi
-	adcq	%r12, %r15
-	adcq	-96(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-88(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r14, %rdi
-	adcq	$0, %rdi
-	movq	-32(%rsp), %rcx         # 8-byte Reload
-	movq	32(%rcx), %rdx
-	mulxq	-40(%rsp), %rcx, %rax   # 8-byte Folded Reload
-	movq	%rcx, -96(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	mulxq	-48(%rsp), %rax, %r12   # 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	mulxq	-80(%rsp), %rax, %r14   # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	mulxq	-64(%rsp), %rcx, %r9    # 8-byte Folded Reload
-	mulxq	-56(%rsp), %rax, %r8    # 8-byte Folded Reload
-	addq	%rcx, %r8
-	mulxq	-72(%rsp), %rcx, %r10   # 8-byte Folded Reload
-	adcq	%r9, %rcx
-	adcq	-120(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r14        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r12         # 8-byte Folded Reload
-	movq	-88(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%rbx, %rax
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %rbx
-	adcq	%rbp, %r8
-	adcq	%rsi, %rcx
-	adcq	%r15, %r10
-	adcq	%r11, %r14
-	adcq	%r13, %r12
-	adcq	%rdi, %rdx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rbx, %rdx
-	imulq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	24(%rsp), %rax, %r15    # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	mulxq	-16(%rsp), %r13, %r11   # 8-byte Folded Reload
-	mulxq	8(%rsp), %rsi, %rax     # 8-byte Folded Reload
-	mulxq	(%rsp), %rdi, %rbx      # 8-byte Folded Reload
-	addq	%rsi, %rbx
-	mulxq	16(%rsp), %rbp, %r9     # 8-byte Folded Reload
-	adcq	%rax, %rbp
-	mulxq	-8(%rsp), %rax, %rsi    # 8-byte Folded Reload
-	adcq	%r9, %rax
-	adcq	%r13, %rsi
-	adcq	-120(%rsp), %r11        # 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	-112(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	%r8, %rbx
-	adcq	%rcx, %rbp
-	adcq	%r10, %rax
-	adcq	%r14, %rsi
-	adcq	%r12, %r11
-	adcq	-88(%rsp), %r15         # 8-byte Folded Reload
-	movq	-96(%rsp), %r8          # 8-byte Reload
-	adcq	$0, %r8
-	movq	-32(%rsp), %rcx         # 8-byte Reload
-	movq	40(%rcx), %rdx
-	mulxq	-40(%rsp), %rdi, %rcx   # 8-byte Folded Reload
-	movq	%rdi, -88(%rsp)         # 8-byte Spill
-	movq	%rcx, -32(%rsp)         # 8-byte Spill
-	mulxq	-48(%rsp), %rdi, %rcx   # 8-byte Folded Reload
-	movq	%rdi, -48(%rsp)         # 8-byte Spill
-	movq	%rcx, -40(%rsp)         # 8-byte Spill
-	mulxq	-80(%rsp), %rcx, %r14   # 8-byte Folded Reload
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	mulxq	-72(%rsp), %rdi, %r12   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %rcx, %r10   # 8-byte Folded Reload
-	mulxq	-56(%rsp), %r13, %r9    # 8-byte Folded Reload
-	addq	%rcx, %r9
-	adcq	%rdi, %r10
-	adcq	-80(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r14         # 8-byte Folded Reload
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	adcq	-88(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-32(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	addq	%rbx, %r13
-	adcq	%rbp, %r9
-	adcq	%rax, %r10
-	adcq	%rsi, %r12
-	adcq	%r11, %r14
-	adcq	%r15, %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	adcq	%r8, %rcx
-	movq	%rcx, -32(%rsp)         # 8-byte Spill
-	sbbq	%rcx, %rcx
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	imulq	%r13, %rdx
-	mulxq	(%rsp), %r11, %rax      # 8-byte Folded Reload
-	mulxq	8(%rsp), %rdi, %rbx     # 8-byte Folded Reload
-	addq	%rax, %rdi
-	mulxq	16(%rsp), %rsi, %rax    # 8-byte Folded Reload
-	adcq	%rbx, %rsi
-	mulxq	-8(%rsp), %rbx, %rbp    # 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-16(%rsp), %rax, %r15   # 8-byte Folded Reload
-	adcq	%rbp, %rax
-	mulxq	24(%rsp), %rbp, %rdx    # 8-byte Folded Reload
-	adcq	%r15, %rbp
-	adcq	$0, %rdx
-	andl	$1, %ecx
-	addq	%r13, %r11
-	adcq	%r9, %rdi
-	adcq	%r10, %rsi
-	adcq	%r12, %rbx
-	adcq	%r14, %rax
-	adcq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-32(%rsp), %rdx         # 8-byte Folded Reload
-	adcq	$0, %rcx
-	movq	%rdi, %r8
-	subq	(%rsp), %r8             # 8-byte Folded Reload
-	movq	%rsi, %r9
-	sbbq	8(%rsp), %r9            # 8-byte Folded Reload
-	movq	%rbx, %r10
-	sbbq	16(%rsp), %r10          # 8-byte Folded Reload
-	movq	%rax, %r11
-	sbbq	-8(%rsp), %r11          # 8-byte Folded Reload
-	movq	%rbp, %r14
-	sbbq	-16(%rsp), %r14         # 8-byte Folded Reload
-	movq	%rdx, %r15
-	sbbq	24(%rsp), %r15          # 8-byte Folded Reload
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rax, %r11
-	testb	%cl, %cl
-	cmovneq	%rdi, %r8
-	movq	-104(%rsp), %rax        # 8-byte Reload
-	movq	%r8, (%rax)
-	cmovneq	%rsi, %r9
-	movq	%r9, 8(%rax)
-	cmovneq	%rbx, %r10
-	movq	%r10, 16(%rax)
-	movq	%r11, 24(%rax)
-	cmovneq	%rbp, %r14
-	movq	%r14, 32(%rax)
-	cmovneq	%rdx, %r15
-	movq	%r15, 40(%rax)
-	addq	$32, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end83:
-	.size	mcl_fp_mont6Lbmi2, .Lfunc_end83-mcl_fp_mont6Lbmi2
-
-	.globl	mcl_fp_montNF6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF6Lbmi2,@function
-mcl_fp_montNF6Lbmi2:                    # @mcl_fp_montNF6Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rdi, -120(%rsp)        # 8-byte Spill
-	movq	(%rsi), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rdi
-	movq	%rdi, -80(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rbp
-	movq	%rdi, %rdx
-	mulxq	%rbp, %rdi, %rbx
-	movq	%rax, %rdx
-	mulxq	%rbp, %r9, %r14
-	movq	16(%rsi), %rdx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	addq	%rdi, %r14
-	mulxq	%rbp, %rdi, %r8
-	adcq	%rbx, %rdi
-	movq	24(%rsi), %rdx
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	mulxq	%rbp, %rbx, %r10
-	adcq	%r8, %rbx
-	movq	32(%rsi), %rdx
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	mulxq	%rbp, %r8, %r11
-	adcq	%r10, %r8
-	movq	40(%rsi), %rdx
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	mulxq	%rbp, %rsi, %r15
-	adcq	%r11, %rsi
-	adcq	$0, %r15
-	movq	-8(%rcx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%r9, %rdx
-	imulq	%rax, %rdx
-	movq	(%rcx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	mulxq	%rax, %rbp, %rax
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	addq	%r9, %rbp
-	movq	8(%rcx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r12, %r9
-	adcq	%r14, %r12
-	movq	16(%rcx), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r14, %rax
-	adcq	%rdi, %r14
-	movq	24(%rcx), %rdi
-	movq	%rdi, -40(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %r13, %rdi
-	adcq	%rbx, %r13
-	movq	32(%rcx), %rbp
-	movq	%rbp, -48(%rsp)         # 8-byte Spill
-	mulxq	%rbp, %r11, %rbx
-	adcq	%r8, %r11
-	movq	40(%rcx), %rcx
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	mulxq	%rcx, %r10, %rcx
-	adcq	%rsi, %r10
-	adcq	$0, %r15
-	addq	-128(%rsp), %r12        # 8-byte Folded Reload
-	adcq	%r9, %r14
-	adcq	%rax, %r13
-	adcq	%rdi, %r11
-	adcq	%rbx, %r10
-	adcq	%rcx, %r15
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	movq	8(%rax), %rdx
-	mulxq	-80(%rsp), %rcx, %rsi   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %rbx, %rax   # 8-byte Folded Reload
-	addq	%rcx, %rax
-	mulxq	-88(%rsp), %rcx, %rdi   # 8-byte Folded Reload
-	adcq	%rsi, %rcx
-	mulxq	-96(%rsp), %rsi, %r8    # 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-104(%rsp), %rdi, %rbp  # 8-byte Folded Reload
-	movq	%rbp, -128(%rsp)        # 8-byte Spill
-	adcq	%r8, %rdi
-	mulxq	-112(%rsp), %r8, %r9    # 8-byte Folded Reload
-	adcq	-128(%rsp), %r8         # 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	%r12, %rbx
-	adcq	%r14, %rax
-	adcq	%r13, %rcx
-	adcq	%r11, %rsi
-	adcq	%r10, %rdi
-	adcq	%r15, %r8
-	adcq	$0, %r9
-	movq	%rbx, %rdx
-	imulq	-16(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rbp, %r13    # 8-byte Folded Reload
-	addq	%rbx, %rbp
-	mulxq	-24(%rsp), %r11, %rbx   # 8-byte Folded Reload
-	adcq	%rax, %r11
-	mulxq	-32(%rsp), %r14, %rax   # 8-byte Folded Reload
-	adcq	%rcx, %r14
-	mulxq	-40(%rsp), %r10, %rcx   # 8-byte Folded Reload
-	adcq	%rsi, %r10
-	mulxq	-48(%rsp), %r15, %rsi   # 8-byte Folded Reload
-	adcq	%rdi, %r15
-	mulxq	-56(%rsp), %r12, %rdx   # 8-byte Folded Reload
-	adcq	%r8, %r12
-	adcq	$0, %r9
-	addq	%r13, %r11
-	adcq	%rbx, %r14
-	adcq	%rax, %r10
-	adcq	%rcx, %r15
-	adcq	%rsi, %r12
-	adcq	%rdx, %r9
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	movq	16(%rax), %rdx
-	mulxq	-80(%rsp), %rcx, %rax   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r13, %rdi   # 8-byte Folded Reload
-	addq	%rcx, %rdi
-	mulxq	-88(%rsp), %rbx, %rcx   # 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-96(%rsp), %rsi, %rbp   # 8-byte Folded Reload
-	adcq	%rcx, %rsi
-	mulxq	-104(%rsp), %rax, %rcx  # 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        # 8-byte Spill
-	adcq	%rbp, %rax
-	mulxq	-112(%rsp), %r8, %rcx   # 8-byte Folded Reload
-	adcq	-128(%rsp), %r8         # 8-byte Folded Reload
-	adcq	$0, %rcx
-	addq	%r11, %r13
-	adcq	%r14, %rdi
-	adcq	%r10, %rbx
-	adcq	%r15, %rsi
-	adcq	%r12, %rax
-	adcq	%r9, %r8
-	adcq	$0, %rcx
-	movq	%r13, %rdx
-	imulq	-16(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rbp, %r12    # 8-byte Folded Reload
-	addq	%r13, %rbp
-	mulxq	-24(%rsp), %r11, %rbp   # 8-byte Folded Reload
-	adcq	%rdi, %r11
-	mulxq	-32(%rsp), %r9, %rdi    # 8-byte Folded Reload
-	adcq	%rbx, %r9
-	mulxq	-40(%rsp), %r10, %rbx   # 8-byte Folded Reload
-	adcq	%rsi, %r10
-	mulxq	-48(%rsp), %r14, %rsi   # 8-byte Folded Reload
-	adcq	%rax, %r14
-	mulxq	-56(%rsp), %r15, %rax   # 8-byte Folded Reload
-	adcq	%r8, %r15
-	adcq	$0, %rcx
-	addq	%r12, %r11
-	adcq	%rbp, %r9
-	adcq	%rdi, %r10
-	adcq	%rbx, %r14
-	adcq	%rsi, %r15
-	adcq	%rax, %rcx
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	movq	24(%rax), %rdx
-	mulxq	-80(%rsp), %rsi, %rax   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r13, %rbx   # 8-byte Folded Reload
-	addq	%rsi, %rbx
-	mulxq	-88(%rsp), %rdi, %rbp   # 8-byte Folded Reload
-	adcq	%rax, %rdi
-	mulxq	-96(%rsp), %rsi, %r8    # 8-byte Folded Reload
-	adcq	%rbp, %rsi
-	mulxq	-104(%rsp), %rax, %rbp  # 8-byte Folded Reload
-	adcq	%r8, %rax
-	mulxq	-112(%rsp), %r8, %r12   # 8-byte Folded Reload
-	adcq	%rbp, %r8
-	adcq	$0, %r12
-	addq	%r11, %r13
-	adcq	%r9, %rbx
-	adcq	%r10, %rdi
-	adcq	%r14, %rsi
-	adcq	%r15, %rax
-	adcq	%rcx, %r8
-	adcq	$0, %r12
-	movq	%r13, %rdx
-	imulq	-16(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rbp, %rcx    # 8-byte Folded Reload
-	addq	%r13, %rbp
-	mulxq	-24(%rsp), %r11, %rbp   # 8-byte Folded Reload
-	adcq	%rbx, %r11
-	mulxq	-32(%rsp), %r9, %rbx    # 8-byte Folded Reload
-	adcq	%rdi, %r9
-	mulxq	-40(%rsp), %r10, %rdi   # 8-byte Folded Reload
-	adcq	%rsi, %r10
-	mulxq	-48(%rsp), %r14, %rsi   # 8-byte Folded Reload
-	adcq	%rax, %r14
-	mulxq	-56(%rsp), %r15, %rax   # 8-byte Folded Reload
-	adcq	%r8, %r15
-	adcq	$0, %r12
-	addq	%rcx, %r11
-	adcq	%rbp, %r9
-	adcq	%rbx, %r10
-	adcq	%rdi, %r14
-	adcq	%rsi, %r15
-	adcq	%rax, %r12
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	movq	32(%rax), %rdx
-	mulxq	-80(%rsp), %rsi, %rcx   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r13, %rax   # 8-byte Folded Reload
-	addq	%rsi, %rax
-	mulxq	-88(%rsp), %rbx, %rsi   # 8-byte Folded Reload
-	adcq	%rcx, %rbx
-	mulxq	-96(%rsp), %rdi, %rcx   # 8-byte Folded Reload
-	adcq	%rsi, %rdi
-	mulxq	-104(%rsp), %rsi, %rbp  # 8-byte Folded Reload
-	adcq	%rcx, %rsi
-	mulxq	-112(%rsp), %r8, %rcx   # 8-byte Folded Reload
-	adcq	%rbp, %r8
-	adcq	$0, %rcx
-	addq	%r11, %r13
-	adcq	%r9, %rax
-	adcq	%r10, %rbx
-	adcq	%r14, %rdi
-	adcq	%r15, %rsi
-	adcq	%r12, %r8
-	adcq	$0, %rcx
-	movq	%r13, %rdx
-	imulq	-16(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rbp, %r9     # 8-byte Folded Reload
-	addq	%r13, %rbp
-	mulxq	-24(%rsp), %r13, %rbp   # 8-byte Folded Reload
-	adcq	%rax, %r13
-	mulxq	-32(%rsp), %r11, %rax   # 8-byte Folded Reload
-	adcq	%rbx, %r11
-	mulxq	-40(%rsp), %r10, %rbx   # 8-byte Folded Reload
-	adcq	%rdi, %r10
-	mulxq	-48(%rsp), %r14, %rdi   # 8-byte Folded Reload
-	adcq	%rsi, %r14
-	mulxq	-56(%rsp), %rsi, %rdx   # 8-byte Folded Reload
-	adcq	%r8, %rsi
-	adcq	$0, %rcx
-	addq	%r9, %r13
-	adcq	%rbp, %r11
-	adcq	%rax, %r10
-	adcq	%rbx, %r14
-	adcq	%rdi, %rsi
-	adcq	%rdx, %rcx
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	movq	40(%rax), %rdx
-	mulxq	-80(%rsp), %rdi, %rax   # 8-byte Folded Reload
-	mulxq	-64(%rsp), %r8, %rbx    # 8-byte Folded Reload
-	addq	%rdi, %rbx
-	mulxq	-88(%rsp), %rdi, %rbp   # 8-byte Folded Reload
-	adcq	%rax, %rdi
-	mulxq	-96(%rsp), %r15, %rax   # 8-byte Folded Reload
-	adcq	%rbp, %r15
-	mulxq	-104(%rsp), %r12, %rbp  # 8-byte Folded Reload
-	adcq	%rax, %r12
-	mulxq	-112(%rsp), %r9, %rax   # 8-byte Folded Reload
-	adcq	%rbp, %r9
-	adcq	$0, %rax
-	addq	%r13, %r8
-	adcq	%r11, %rbx
-	adcq	%r10, %rdi
-	adcq	%r14, %r15
-	adcq	%rsi, %r12
-	adcq	%rcx, %r9
-	adcq	$0, %rax
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	imulq	%r8, %rdx
-	mulxq	-8(%rsp), %rcx, %rsi    # 8-byte Folded Reload
-	movq	%rsi, -16(%rsp)         # 8-byte Spill
-	addq	%r8, %rcx
-	movq	-24(%rsp), %r11         # 8-byte Reload
-	mulxq	%r11, %r8, %rcx
-	movq	%rcx, -64(%rsp)         # 8-byte Spill
-	adcq	%rbx, %r8
-	movq	-32(%rsp), %r10         # 8-byte Reload
-	mulxq	%r10, %rsi, %rcx
-	movq	%rcx, -72(%rsp)         # 8-byte Spill
-	adcq	%rdi, %rsi
-	movq	-40(%rsp), %r13         # 8-byte Reload
-	mulxq	%r13, %rdi, %rcx
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	adcq	%r15, %rdi
-	movq	-48(%rsp), %rcx         # 8-byte Reload
-	mulxq	%rcx, %r15, %rbx
-	adcq	%r12, %r15
-	movq	-56(%rsp), %r14         # 8-byte Reload
-	mulxq	%r14, %r12, %rbp
-	adcq	%r9, %r12
-	adcq	$0, %rax
-	addq	-16(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-64(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-72(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r15         # 8-byte Folded Reload
-	adcq	%rbx, %r12
-	adcq	%rbp, %rax
-	movq	%r8, %rbp
-	subq	-8(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rsi, %rbx
-	sbbq	%r11, %rbx
-	movq	%rdi, %r11
-	sbbq	%r10, %r11
-	movq	%r15, %r10
-	sbbq	%r13, %r10
-	movq	%r12, %r9
-	sbbq	%rcx, %r9
-	movq	%rax, %rcx
-	sbbq	%r14, %rcx
-	movq	%rcx, %rdx
-	sarq	$63, %rdx
-	cmovsq	%r8, %rbp
-	movq	-120(%rsp), %rdx        # 8-byte Reload
-	movq	%rbp, (%rdx)
-	cmovsq	%rsi, %rbx
-	movq	%rbx, 8(%rdx)
-	cmovsq	%rdi, %r11
-	movq	%r11, 16(%rdx)
-	cmovsq	%r15, %r10
-	movq	%r10, 24(%rdx)
-	cmovsq	%r12, %r9
-	movq	%r9, 32(%rdx)
-	cmovsq	%rax, %rcx
-	movq	%rcx, 40(%rdx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end84:
-	.size	mcl_fp_montNF6Lbmi2, .Lfunc_end84-mcl_fp_montNF6Lbmi2
-
-	.globl	mcl_fp_montRed6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed6Lbmi2,@function
-mcl_fp_montRed6Lbmi2:                   # @mcl_fp_montRed6Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	pushq	%rax
-	movq	%rdx, %rcx
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	(%rcx), %rdi
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	(%rsi), %r14
-	movq	%r14, %rdx
-	imulq	%rax, %rdx
-	movq	40(%rcx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rbx, %r12
-	movq	32(%rcx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r10, %r11
-	movq	24(%rcx), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	16(%rcx), %rbp
-	movq	%rbp, -40(%rsp)         # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, (%rsp)            # 8-byte Spill
-	mulxq	%rax, %r15, %r8
-	mulxq	%rbp, %r13, %rbp
-	mulxq	%rcx, %rax, %r9
-	mulxq	%rdi, %rdx, %rcx
-	addq	%rax, %rcx
-	adcq	%r13, %r9
-	adcq	%r15, %rbp
-	adcq	%r10, %r8
-	adcq	%rbx, %r11
-	adcq	$0, %r12
-	addq	%r14, %rdx
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r9
-	adcq	24(%rsi), %rbp
-	adcq	32(%rsi), %r8
-	adcq	40(%rsi), %r11
-	movq	%r11, -88(%rsp)         # 8-byte Spill
-	adcq	48(%rsi), %r12
-	movq	%r12, -80(%rsp)         # 8-byte Spill
-	movq	88(%rsi), %r10
-	movq	80(%rsi), %rdx
-	movq	72(%rsi), %rdi
-	movq	64(%rsi), %rax
-	movq	56(%rsi), %rsi
-	adcq	$0, %rsi
-	movq	%rsi, -112(%rsp)        # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -72(%rsp)         # 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	adcq	$0, %r10
-	movq	%r10, -48(%rsp)         # 8-byte Spill
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	movq	%rcx, %rdx
-	imulq	-56(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-24(%rsp), %rax, %r13   # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	mulxq	-16(%rsp), %rax, %r15   # 8-byte Folded Reload
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	mulxq	-32(%rsp), %r11, %r14   # 8-byte Folded Reload
-	mulxq	-40(%rsp), %rbx, %r10   # 8-byte Folded Reload
-	mulxq	(%rsp), %rsi, %rdi      # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rdx, %rax    # 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%rbx, %rdi
-	adcq	%r11, %r10
-	adcq	-128(%rsp), %r14        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r15        # 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%rcx, %rdx
-	adcq	%r9, %rax
-	adcq	%rbp, %rdi
-	adcq	%r8, %r10
-	adcq	-88(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        # 8-byte Folded Reload
-	adcq	$0, -96(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -72(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -64(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -48(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rdx
-	movq	-56(%rsp), %r11         # 8-byte Reload
-	imulq	%r11, %rdx
-	mulxq	-24(%rsp), %rsi, %rcx   # 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	mulxq	-16(%rsp), %rsi, %rcx   # 8-byte Folded Reload
-	movq	%rsi, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, -88(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rcx, %rbx   # 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        # 8-byte Spill
-	mulxq	-40(%rsp), %rcx, %r9    # 8-byte Folded Reload
-	mulxq	(%rsp), %rsi, %rbp      # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rdx, %r8     # 8-byte Folded Reload
-	addq	%rsi, %r8
-	adcq	%rcx, %rbp
-	adcq	-128(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-120(%rsp), %rbx        # 8-byte Folded Reload
-	movq	-88(%rsp), %rsi         # 8-byte Reload
-	adcq	-112(%rsp), %rsi        # 8-byte Folded Reload
-	movq	-80(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	addq	%rax, %rdx
-	adcq	%rdi, %r8
-	adcq	%r10, %rbp
-	adcq	%r14, %r9
-	adcq	%r15, %rbx
-	adcq	%r13, %rsi
-	movq	%rsi, -88(%rsp)         # 8-byte Spill
-	adcq	-96(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	adcq	$0, -72(%rsp)           # 8-byte Folded Spill
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	adcq	$0, -48(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%r8, %rdx
-	imulq	%r11, %rdx
-	mulxq	-24(%rsp), %rsi, %rcx   # 8-byte Folded Reload
-	movq	%rsi, -64(%rsp)         # 8-byte Spill
-	movq	%rcx, -96(%rsp)         # 8-byte Spill
-	mulxq	-16(%rsp), %rcx, %r11   # 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        # 8-byte Spill
-	mulxq	-32(%rsp), %r10, %r14   # 8-byte Folded Reload
-	mulxq	-40(%rsp), %r13, %r15   # 8-byte Folded Reload
-	mulxq	(%rsp), %rsi, %rdi      # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rdx, %rcx    # 8-byte Folded Reload
-	addq	%rsi, %rcx
-	adcq	%r13, %rdi
-	adcq	%r10, %r15
-	adcq	-112(%rsp), %r14        # 8-byte Folded Reload
-	adcq	-64(%rsp), %r11         # 8-byte Folded Reload
-	movq	-96(%rsp), %rsi         # 8-byte Reload
-	adcq	$0, %rsi
-	addq	%r8, %rdx
-	adcq	%rbp, %rcx
-	adcq	%r9, %rdi
-	adcq	%rbx, %r15
-	adcq	-88(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-72(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -96(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	adcq	$0, %r12
-	movq	%rcx, %rdx
-	imulq	-56(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	-24(%rsp), %rbp, %rsi   # 8-byte Folded Reload
-	movq	%rbp, -48(%rsp)         # 8-byte Spill
-	movq	%rsi, -72(%rsp)         # 8-byte Spill
-	mulxq	-16(%rsp), %rbp, %rsi   # 8-byte Folded Reload
-	movq	%rbp, -88(%rsp)         # 8-byte Spill
-	movq	%rsi, -80(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rsi, %r13   # 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        # 8-byte Spill
-	movq	-40(%rsp), %r9          # 8-byte Reload
-	mulxq	%r9, %r10, %rbp
-	mulxq	(%rsp), %rsi, %r8       # 8-byte Folded Reload
-	mulxq	-8(%rsp), %rdx, %rbx    # 8-byte Folded Reload
-	addq	%rsi, %rbx
-	adcq	%r10, %r8
-	adcq	-112(%rsp), %rbp        # 8-byte Folded Reload
-	adcq	-88(%rsp), %r13         # 8-byte Folded Reload
-	movq	-80(%rsp), %r10         # 8-byte Reload
-	adcq	-48(%rsp), %r10         # 8-byte Folded Reload
-	movq	-72(%rsp), %rsi         # 8-byte Reload
-	adcq	$0, %rsi
-	addq	%rcx, %rdx
-	adcq	%rdi, %rbx
-	adcq	%r15, %r8
-	adcq	%r14, %rbp
-	adcq	%r11, %r13
-	adcq	-96(%rsp), %r10         # 8-byte Folded Reload
-	movq	%r10, -80(%rsp)         # 8-byte Spill
-	adcq	-64(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -72(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	adcq	$0, %r12
-	movq	-56(%rsp), %rdx         # 8-byte Reload
-	imulq	%rbx, %rdx
-	mulxq	-24(%rsp), %rax, %r10   # 8-byte Folded Reload
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	mulxq	%r9, %rsi, %r14
-	mulxq	-8(%rsp), %r11, %rdi    # 8-byte Folded Reload
-	mulxq	(%rsp), %rax, %r9       # 8-byte Folded Reload
-	addq	%rdi, %rax
-	adcq	%rsi, %r9
-	movq	-32(%rsp), %r15         # 8-byte Reload
-	mulxq	%r15, %rsi, %rdi
-	adcq	%r14, %rsi
-	mulxq	-16(%rsp), %rdx, %r14   # 8-byte Folded Reload
-	adcq	%rdi, %rdx
-	adcq	-56(%rsp), %r14         # 8-byte Folded Reload
-	adcq	$0, %r10
-	addq	%rbx, %r11
-	adcq	%r8, %rax
-	adcq	%rbp, %r9
-	adcq	%r13, %rsi
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	adcq	-72(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r10         # 8-byte Folded Reload
-	adcq	$0, %r12
-	movq	%rax, %rcx
-	subq	-8(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%r9, %rdi
-	sbbq	(%rsp), %rdi            # 8-byte Folded Reload
-	movq	%rsi, %rbp
-	sbbq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	sbbq	%r15, %rbx
-	movq	%r14, %r8
-	sbbq	-16(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r10, %r15
-	sbbq	-24(%rsp), %r15         # 8-byte Folded Reload
-	sbbq	$0, %r12
-	andl	$1, %r12d
-	cmovneq	%r10, %r15
-	testb	%r12b, %r12b
-	cmovneq	%rax, %rcx
-	movq	-104(%rsp), %rax        # 8-byte Reload
-	movq	%rcx, (%rax)
-	cmovneq	%r9, %rdi
-	movq	%rdi, 8(%rax)
-	cmovneq	%rsi, %rbp
-	movq	%rbp, 16(%rax)
-	cmovneq	%rdx, %rbx
-	movq	%rbx, 24(%rax)
-	cmovneq	%r14, %r8
-	movq	%r8, 32(%rax)
-	movq	%r15, 40(%rax)
-	addq	$8, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end85:
-	.size	mcl_fp_montRed6Lbmi2, .Lfunc_end85-mcl_fp_montRed6Lbmi2
-
-	.globl	mcl_fp_addPre6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre6Lbmi2,@function
-mcl_fp_addPre6Lbmi2:                    # @mcl_fp_addPre6Lbmi2
-# BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	40(%rsi), %r11
-	movq	32(%rdx), %r9
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %r14
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 32(%rdi)
-	adcq	%r8, %r11
-	movq	%r11, 40(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r14
-	retq
-.Lfunc_end86:
-	.size	mcl_fp_addPre6Lbmi2, .Lfunc_end86-mcl_fp_addPre6Lbmi2
-
-	.globl	mcl_fp_subPre6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre6Lbmi2,@function
-mcl_fp_subPre6Lbmi2:                    # @mcl_fp_subPre6Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	40(%rsi), %r9
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %rsi
-	movq	24(%rdx), %r14
-	movq	32(%rdx), %r15
-	sbbq	16(%rdx), %rcx
-	movq	%rbx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r14, %r11
-	movq	%r11, 24(%rdi)
-	sbbq	%r15, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%r8, %r9
-	movq	%r9, 40(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end87:
-	.size	mcl_fp_subPre6Lbmi2, .Lfunc_end87-mcl_fp_subPre6Lbmi2
-
-	.globl	mcl_fp_shr1_6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_6Lbmi2,@function
-mcl_fp_shr1_6Lbmi2:                     # @mcl_fp_shr1_6Lbmi2
-# BB#0:
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %rdx
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rcx
-	movq	%rcx, (%rdi)
-	shrdq	$1, %rax, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rdx, %rax
-	movq	%rax, 16(%rdi)
-	shrdq	$1, %r9, %rdx
-	movq	%rdx, 24(%rdi)
-	shrdq	$1, %r8, %r9
-	movq	%r9, 32(%rdi)
-	shrq	%r8
-	movq	%r8, 40(%rdi)
-	retq
-.Lfunc_end88:
-	.size	mcl_fp_shr1_6Lbmi2, .Lfunc_end88-mcl_fp_shr1_6Lbmi2
-
-	.globl	mcl_fp_add6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add6Lbmi2,@function
-mcl_fp_add6Lbmi2:                       # @mcl_fp_add6Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r14
-	movq	40(%rsi), %r8
-	movq	32(%rdx), %r15
-	movq	24(%rdx), %rbx
-	movq	24(%rsi), %r10
-	movq	32(%rsi), %r9
-	movq	16(%rdx), %r11
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r11
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	adcq	%rbx, %r10
-	movq	%r10, 24(%rdi)
-	adcq	%r15, %r9
-	movq	%r9, 32(%rdi)
-	adcq	%r14, %r8
-	movq	%r8, 40(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r11
-	sbbq	24(%rcx), %r10
-	sbbq	32(%rcx), %r9
-	sbbq	40(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB89_2
-# BB#1:                                 # %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	movq	%r10, 24(%rdi)
-	movq	%r9, 32(%rdi)
-	movq	%r8, 40(%rdi)
-.LBB89_2:                               # %carry
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end89:
-	.size	mcl_fp_add6Lbmi2, .Lfunc_end89-mcl_fp_add6Lbmi2
-
-	.globl	mcl_fp_addNF6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF6Lbmi2,@function
-mcl_fp_addNF6Lbmi2:                     # @mcl_fp_addNF6Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	32(%rdx), %r9
-	movq	24(%rdx), %r10
-	movq	16(%rdx), %r11
-	movq	(%rdx), %r15
-	movq	8(%rdx), %r14
-	addq	(%rsi), %r15
-	adcq	8(%rsi), %r14
-	adcq	16(%rsi), %r11
-	adcq	24(%rsi), %r10
-	adcq	32(%rsi), %r9
-	adcq	40(%rsi), %r8
-	movq	%r15, %rsi
-	subq	(%rcx), %rsi
-	movq	%r14, %rbx
-	sbbq	8(%rcx), %rbx
-	movq	%r11, %rdx
-	sbbq	16(%rcx), %rdx
-	movq	%r10, %r13
-	sbbq	24(%rcx), %r13
-	movq	%r9, %r12
-	sbbq	32(%rcx), %r12
-	movq	%r8, %rax
-	sbbq	40(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r15, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r14, %rbx
-	movq	%rbx, 8(%rdi)
-	cmovsq	%r11, %rdx
-	movq	%rdx, 16(%rdi)
-	cmovsq	%r10, %r13
-	movq	%r13, 24(%rdi)
-	cmovsq	%r9, %r12
-	movq	%r12, 32(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 40(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end90:
-	.size	mcl_fp_addNF6Lbmi2, .Lfunc_end90-mcl_fp_addNF6Lbmi2
-
-	.globl	mcl_fp_sub6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub6Lbmi2,@function
-mcl_fp_sub6Lbmi2:                       # @mcl_fp_sub6Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rdx), %r14
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r10
-	movq	16(%rsi), %r11
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %rsi
-	movq	24(%rdx), %r15
-	movq	32(%rdx), %r12
-	sbbq	16(%rdx), %r11
-	movq	%rax, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	sbbq	%r15, %r10
-	movq	%r10, 24(%rdi)
-	sbbq	%r12, %r9
-	movq	%r9, 32(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 40(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	.LBB91_2
-# BB#1:                                 # %carry
-	movq	40(%rcx), %r14
-	movq	32(%rcx), %r15
-	movq	24(%rcx), %r12
-	movq	8(%rcx), %rbx
-	movq	16(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%rsi, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r11, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r10, %r12
-	movq	%r12, 24(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 32(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 40(%rdi)
-.LBB91_2:                               # %nocarry
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end91:
-	.size	mcl_fp_sub6Lbmi2, .Lfunc_end91-mcl_fp_sub6Lbmi2
-
-	.globl	mcl_fp_subNF6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF6Lbmi2,@function
-mcl_fp_subNF6Lbmi2:                     # @mcl_fp_subNF6Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rsi), %r15
-	movq	32(%rsi), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %r11
-	movq	8(%rsi), %r14
-	subq	(%rdx), %r11
-	sbbq	8(%rdx), %r14
-	sbbq	16(%rdx), %r10
-	sbbq	24(%rdx), %r9
-	sbbq	32(%rdx), %r8
-	sbbq	40(%rdx), %r15
-	movq	%r15, %rdx
-	sarq	$63, %rdx
-	movq	%rdx, %rbx
-	addq	%rbx, %rbx
-	movq	%rdx, %rsi
-	adcq	%rsi, %rsi
-	andq	8(%rcx), %rsi
-	movq	%r15, %rax
-	shrq	$63, %rax
-	orq	%rbx, %rax
-	andq	(%rcx), %rax
-	movq	40(%rcx), %r12
-	andq	%rdx, %r12
-	movq	32(%rcx), %r13
-	andq	%rdx, %r13
-	movq	24(%rcx), %rbx
-	andq	%rdx, %rbx
-	andq	16(%rcx), %rdx
-	addq	%r11, %rax
-	movq	%rax, (%rdi)
-	adcq	%r14, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r10, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r9, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%r8, %r13
-	movq	%r13, 32(%rdi)
-	adcq	%r15, %r12
-	movq	%r12, 40(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end92:
-	.size	mcl_fp_subNF6Lbmi2, .Lfunc_end92-mcl_fp_subNF6Lbmi2
-
-	.globl	mcl_fpDbl_add6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add6Lbmi2,@function
-mcl_fpDbl_add6Lbmi2:                    # @mcl_fpDbl_add6Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	88(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	80(%rdx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	72(%rdx), %r14
-	movq	64(%rdx), %r15
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %r13
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %rbp
-	adcq	32(%rdx), %r13
-	movq	56(%rdx), %r11
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %rdx
-	movq	%rbx, (%rdi)
-	movq	88(%rsi), %r8
-	movq	%rax, 8(%rdi)
-	movq	80(%rsi), %r10
-	movq	%r12, 16(%rdi)
-	movq	72(%rsi), %r12
-	movq	%rbp, 24(%rdi)
-	movq	40(%rsi), %rax
-	adcq	%rdx, %rax
-	movq	64(%rsi), %rdx
-	movq	%r13, 32(%rdi)
-	movq	56(%rsi), %r13
-	movq	48(%rsi), %rbp
-	adcq	%r9, %rbp
-	movq	%rax, 40(%rdi)
-	adcq	%r11, %r13
-	adcq	%r15, %rdx
-	adcq	%r14, %r12
-	adcq	-16(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-8(%rsp), %r8           # 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rbp, %rsi
-	subq	(%rcx), %rsi
-	movq	%r13, %rbx
-	sbbq	8(%rcx), %rbx
-	movq	%rdx, %r9
-	sbbq	16(%rcx), %r9
-	movq	%r12, %r11
-	sbbq	24(%rcx), %r11
-	movq	%r10, %r14
-	sbbq	32(%rcx), %r14
-	movq	%r8, %r15
-	sbbq	40(%rcx), %r15
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rbp, %rsi
-	movq	%rsi, 48(%rdi)
-	testb	%al, %al
-	cmovneq	%r13, %rbx
-	movq	%rbx, 56(%rdi)
-	cmovneq	%rdx, %r9
-	movq	%r9, 64(%rdi)
-	cmovneq	%r12, %r11
-	movq	%r11, 72(%rdi)
-	cmovneq	%r10, %r14
-	movq	%r14, 80(%rdi)
-	cmovneq	%r8, %r15
-	movq	%r15, 88(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end93:
-	.size	mcl_fpDbl_add6Lbmi2, .Lfunc_end93-mcl_fpDbl_add6Lbmi2
-
-	.globl	mcl_fpDbl_sub6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub6Lbmi2,@function
-mcl_fpDbl_sub6Lbmi2:                    # @mcl_fpDbl_sub6Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	88(%rdx), %r9
-	movq	80(%rdx), %r10
-	movq	72(%rdx), %r14
-	movq	16(%rsi), %r8
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%eax, %eax
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r8
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	64(%rdx), %r13
-	movq	%r15, (%rdi)
-	movq	56(%rdx), %rbp
-	movq	%r11, 8(%rdi)
-	movq	48(%rdx), %r15
-	movq	40(%rdx), %rdx
-	movq	%r8, 16(%rdi)
-	movq	88(%rsi), %r8
-	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%rdx, %rbx
-	movq	80(%rsi), %r11
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %rdx
-	sbbq	%r15, %rdx
-	movq	72(%rsi), %r15
-	movq	%rbx, 40(%rdi)
-	movq	64(%rsi), %r12
-	movq	56(%rsi), %rsi
-	sbbq	%rbp, %rsi
-	sbbq	%r13, %r12
-	sbbq	%r14, %r15
-	sbbq	%r10, %r11
-	sbbq	%r9, %r8
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%rcx), %r14
-	cmoveq	%rax, %r14
-	testb	%bpl, %bpl
-	movq	16(%rcx), %r9
-	cmoveq	%rax, %r9
-	movq	8(%rcx), %rbp
-	cmoveq	%rax, %rbp
-	movq	40(%rcx), %r10
-	cmoveq	%rax, %r10
-	movq	32(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	cmovneq	24(%rcx), %rax
-	addq	%rdx, %r14
-	movq	%r14, 48(%rdi)
-	adcq	%rsi, %rbp
-	movq	%rbp, 56(%rdi)
-	adcq	%r12, %r9
-	movq	%r9, 64(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 72(%rdi)
-	adcq	%r11, %rbx
-	movq	%rbx, 80(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 88(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end94:
-	.size	mcl_fpDbl_sub6Lbmi2, .Lfunc_end94-mcl_fpDbl_sub6Lbmi2
-
-	.globl	mcl_fp_mulUnitPre7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre7Lbmi2,@function
-mcl_fp_mulUnitPre7Lbmi2:                # @mcl_fp_mulUnitPre7Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	mulxq	48(%rsi), %r8, %r11
-	mulxq	40(%rsi), %r9, %r13
-	mulxq	32(%rsi), %r10, %rcx
-	mulxq	8(%rsi), %r12, %r14
-	mulxq	(%rsi), %r15, %rbx
-	addq	%r12, %rbx
-	mulxq	24(%rsi), %r12, %rax
-	mulxq	16(%rsi), %rdx, %rsi
-	movq	%r15, (%rdi)
-	movq	%rbx, 8(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r12, %rsi
-	movq	%rsi, 24(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 32(%rdi)
-	adcq	%r9, %rcx
-	movq	%rcx, 40(%rdi)
-	adcq	%r8, %r13
-	movq	%r13, 48(%rdi)
-	adcq	$0, %r11
-	movq	%r11, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end95:
-	.size	mcl_fp_mulUnitPre7Lbmi2, .Lfunc_end95-mcl_fp_mulUnitPre7Lbmi2
-
-	.globl	mcl_fpDbl_mulPre7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre7Lbmi2,@function
-mcl_fpDbl_mulPre7Lbmi2:                 # @mcl_fpDbl_mulPre7Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %r14
-	movq	%r14, -8(%rsp)          # 8-byte Spill
-	movq	%rsi, %r8
-	movq	%rdi, %r13
-	movq	%r13, -16(%rsp)         # 8-byte Spill
-	movq	(%r8), %rcx
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	movq	8(%r8), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	(%r14), %rsi
-	movq	%rax, %rdx
-	mulxq	%rsi, %rbp, %rax
-	movq	%rcx, %rdx
-	mulxq	%rsi, %rdx, %rcx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	24(%r8), %rdi
-	movq	%rdi, -32(%rsp)         # 8-byte Spill
-	movq	16(%r8), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	addq	%rbp, %rcx
-	mulxq	%rsi, %rbx, %rbp
-	adcq	%rax, %rbx
-	movq	%rdi, %rdx
-	mulxq	%rsi, %r12, %rax
-	adcq	%rbp, %r12
-	movq	32(%r8), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	mulxq	%rsi, %r9, %rbp
-	adcq	%rax, %r9
-	movq	40(%r8), %rdi
-	movq	%rdi, %rdx
-	mulxq	%rsi, %r10, %rax
-	adcq	%rbp, %r10
-	movq	48(%r8), %r15
-	movq	%r15, %rdx
-	mulxq	%rsi, %rsi, %r11
-	adcq	%rax, %rsi
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	movq	%rax, (%r13)
-	adcq	$0, %r11
-	movq	8(%r14), %r13
-	movq	-56(%rsp), %rdx         # 8-byte Reload
-	mulxq	%r13, %r14, %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	addq	%rcx, %r14
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%r13, %rcx, %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	adcq	%rbx, %rcx
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	mulxq	%r13, %rbx, %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	adcq	%r12, %rbx
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%r13, %rbp, %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	adcq	%r9, %rbp
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	mulxq	%r13, %rax, %r9
-	adcq	%r10, %rax
-	movq	%rdi, %rdx
-	mulxq	%r13, %r10, %rdi
-	adcq	%rsi, %r10
-	movq	%r15, %rdx
-	mulxq	%r13, %r13, %rdx
-	adcq	%r11, %r13
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-56(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-24(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-32(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%r9, %r10
-	movq	-16(%rsp), %rsi         # 8-byte Reload
-	movq	%r14, 8(%rsi)
-	adcq	%rdi, %r13
-	adcq	%rdx, %r12
-	movq	(%r8), %rsi
-	movq	%rsi, -32(%rsp)         # 8-byte Spill
-	movq	8(%r8), %r11
-	movq	%r11, -24(%rsp)         # 8-byte Spill
-	movq	-8(%rsp), %rdx          # 8-byte Reload
-	movq	16(%rdx), %rdi
-	movq	%rsi, %rdx
-	mulxq	%rdi, %r9, %rdx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	addq	%rcx, %r9
-	movq	%r11, %rdx
-	mulxq	%rdi, %r14, %rcx
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	adcq	%rbx, %r14
-	movq	16(%r8), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %rsi, %rcx
-	movq	%rcx, -88(%rsp)         # 8-byte Spill
-	adcq	%rbp, %rsi
-	movq	24(%r8), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %rbp, %rcx
-	movq	%rcx, -96(%rsp)         # 8-byte Spill
-	adcq	%rax, %rbp
-	movq	32(%r8), %rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %r11, %rax
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	adcq	%r10, %r11
-	movq	40(%r8), %rdx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %r15, %rax
-	adcq	%r13, %r15
-	movq	48(%r8), %r13
-	movq	%r13, %rdx
-	mulxq	%rdi, %rcx, %rdx
-	adcq	%r12, %rcx
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	-72(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-88(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-96(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-104(%rsp), %r15        # 8-byte Folded Reload
-	adcq	%rax, %rcx
-	adcq	%rdx, %rbx
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	%r9, 16(%rax)
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdi
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r9, %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	addq	%r14, %r9
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rax, %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	adcq	%rsi, %rax
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r14, %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	adcq	%rbp, %r14
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r10, %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	adcq	%r11, %r10
-	movq	-56(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rbp, %rsi
-	adcq	%r15, %rbp
-	movq	-64(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r11, %r15
-	adcq	%rcx, %r11
-	movq	%r13, %rdx
-	mulxq	%rdi, %r13, %rcx
-	adcq	%rbx, %r13
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-32(%rsp), %rax         # 8-byte Folded Reload
-	adcq	-24(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-40(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	%rsi, %r11
-	movq	-16(%rsp), %rdi         # 8-byte Reload
-	movq	%r9, 24(%rdi)
-	adcq	%r15, %r13
-	adcq	%rcx, %r12
-	movq	(%r8), %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	8(%r8), %rbx
-	movq	%rbx, -24(%rsp)         # 8-byte Spill
-	movq	-8(%rsp), %rcx          # 8-byte Reload
-	movq	32(%rcx), %rcx
-	mulxq	%rcx, %rsi, %rdx
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	addq	%rax, %rsi
-	movq	%rbx, %rdx
-	mulxq	%rcx, %r9, %rax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	adcq	%r14, %r9
-	movq	16(%r8), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	mulxq	%rcx, %rax, %rdx
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	adcq	%r10, %rax
-	movq	24(%r8), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	mulxq	%rcx, %r15, %rdx
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	adcq	%rbp, %r15
-	movq	32(%r8), %rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	mulxq	%rcx, %r10, %rbp
-	adcq	%r11, %r10
-	movq	40(%r8), %rdx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	mulxq	%rcx, %r11, %rbx
-	adcq	%r13, %r11
-	movq	48(%r8), %rdx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	mulxq	%rcx, %r14, %rcx
-	adcq	%r12, %r14
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-80(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-88(%rsp), %rax         # 8-byte Folded Reload
-	adcq	-96(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-104(%rsp), %r10        # 8-byte Folded Reload
-	adcq	%rbp, %r11
-	adcq	%rbx, %r14
-	adcq	%rcx, %r12
-	movq	%rsi, 32(%rdi)
-	movq	-8(%rsp), %rsi          # 8-byte Reload
-	movq	40(%rsi), %rdi
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r13, %rcx
-	movq	%rcx, -32(%rsp)         # 8-byte Spill
-	addq	%r9, %r13
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rcx, %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	adcq	%rax, %rcx
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rax, %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	adcq	%r15, %rax
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rbx, %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	adcq	%r10, %rbx
-	movq	-56(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %rbp, %r15
-	adcq	%r11, %rbp
-	movq	-64(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r9, %r11
-	adcq	%r14, %r9
-	movq	-72(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rdi, %r10, %rdx
-	adcq	%r12, %r10
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	addq	-32(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-24(%rsp), %rax         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	%r15, %r9
-	movq	-16(%rsp), %r14         # 8-byte Reload
-	movq	%r13, 40(%r14)
-	adcq	%r11, %r10
-	adcq	%rdx, %rdi
-	movq	48(%rsi), %rdx
-	mulxq	(%r8), %r11, %rsi
-	movq	%rsi, -8(%rsp)          # 8-byte Spill
-	addq	%rcx, %r11
-	mulxq	8(%r8), %rsi, %r15
-	adcq	%rax, %rsi
-	mulxq	16(%r8), %rcx, %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	adcq	%rbx, %rcx
-	mulxq	24(%r8), %rbx, %r12
-	adcq	%rbp, %rbx
-	mulxq	32(%r8), %rbp, %r13
-	adcq	%r9, %rbp
-	mulxq	40(%r8), %rax, %r9
-	adcq	%r10, %rax
-	mulxq	48(%r8), %rdx, %r8
-	adcq	%rdi, %rdx
-	sbbq	%r10, %r10
-	andl	$1, %r10d
-	addq	-8(%rsp), %rsi          # 8-byte Folded Reload
-	adcq	%r15, %rcx
-	movq	%r11, 48(%r14)
-	movq	%rsi, 56(%r14)
-	movq	%rcx, 64(%r14)
-	adcq	-24(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rbx, 72(%r14)
-	adcq	%r12, %rbp
-	movq	%rbp, 80(%r14)
-	adcq	%r13, %rax
-	movq	%rax, 88(%r14)
-	adcq	%r9, %rdx
-	movq	%rdx, 96(%r14)
-	adcq	%r8, %r10
-	movq	%r10, 104(%r14)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end96:
-	.size	mcl_fpDbl_mulPre7Lbmi2, .Lfunc_end96-mcl_fpDbl_mulPre7Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre7Lbmi2,@function
-mcl_fpDbl_sqrPre7Lbmi2:                 # @mcl_fpDbl_sqrPre7Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	16(%rsi), %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rax
-	mulxq	%rcx, %r8, %r10
-	movq	24(%rsi), %rbx
-	movq	%rbx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %rdx
-	mulxq	%rcx, %r12, %rbp
-	movq	%rbp, -16(%rsp)         # 8-byte Spill
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rdx, %rdi
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	addq	%r12, %rdi
-	adcq	%rbp, %r8
-	movq	%rbx, %rdx
-	mulxq	%rcx, %rbp, %r9
-	adcq	%r10, %rbp
-	movq	32(%rsi), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	mulxq	%rcx, %r11, %r14
-	adcq	%r9, %r11
-	movq	40(%rsi), %rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	mulxq	%rcx, %r10, %r15
-	adcq	%r14, %r10
-	movq	48(%rsi), %r14
-	movq	%r14, %rdx
-	mulxq	%rcx, %rcx, %r13
-	adcq	%r15, %rcx
-	movq	-8(%rsp), %rdx          # 8-byte Reload
-	movq	-48(%rsp), %rbx         # 8-byte Reload
-	movq	%rbx, (%rdx)
-	adcq	$0, %r13
-	addq	%r12, %rdi
-	movq	%rax, %rdx
-	mulxq	%rax, %r12, %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	adcq	%r8, %r12
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r8, %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	adcq	%rbp, %r8
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r9, %rbp
-	adcq	%r11, %r9
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r15, %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	adcq	%r10, %r15
-	movq	-56(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r11, %rbx
-	adcq	%rcx, %r11
-	movq	%r14, %rdx
-	mulxq	%rax, %r14, %rax
-	adcq	%r13, %r14
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	addq	-16(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-24(%rsp), %r9          # 8-byte Folded Reload
-	adcq	%rbp, %r15
-	movq	-8(%rsp), %rcx          # 8-byte Reload
-	movq	%rdi, 8(%rcx)
-	adcq	-32(%rsp), %r11         # 8-byte Folded Reload
-	adcq	%rbx, %r14
-	adcq	%rax, %r13
-	movq	(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rcx
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rbx
-	mulxq	%rbx, %rax, %rdx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	addq	%r12, %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rcx, %rdx
-	mulxq	%rbx, %r10, %rax
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	adcq	%r8, %r10
-	movq	%rbx, %rdx
-	mulxq	%rbx, %r12, %rax
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	adcq	%r9, %r12
-	movq	24(%rsi), %rax
-	movq	%rax, %rdx
-	mulxq	%rbx, %r8, %rdi
-	movq	%rdi, -56(%rsp)         # 8-byte Spill
-	adcq	%r8, %r15
-	movq	32(%rsi), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	mulxq	%rbx, %rcx, %rdx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	adcq	%r11, %rcx
-	movq	40(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	mulxq	%rbx, %rbp, %r11
-	adcq	%r14, %rbp
-	movq	48(%rsi), %r14
-	movq	%r14, %rdx
-	mulxq	%rbx, %r9, %rdx
-	adcq	%r13, %r9
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	-64(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-72(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r15         # 8-byte Folded Reload
-	adcq	%rdi, %rcx
-	adcq	-88(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	%r11, %r9
-	adcq	%rdx, %rbx
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %rdi, %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	addq	%r10, %rdi
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r11, %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	adcq	%r12, %r11
-	adcq	%r8, %r15
-	movq	%rax, %rdx
-	mulxq	%rax, %r8, %rdx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	adcq	%rcx, %r8
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r13, %rcx
-	movq	%rcx, -40(%rsp)         # 8-byte Spill
-	adcq	%rbp, %r13
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r12, %rbp
-	adcq	%r9, %r12
-	movq	%r14, %rdx
-	mulxq	%rax, %rcx, %rax
-	adcq	%rbx, %rcx
-	sbbq	%r10, %r10
-	andl	$1, %r10d
-	addq	-32(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-24(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-64(%rsp), %r13         # 8-byte Folded Reload
-	movq	-8(%rsp), %rdx          # 8-byte Reload
-	movq	-16(%rsp), %rbx         # 8-byte Reload
-	movq	%rbx, 16(%rdx)
-	movq	%rdi, 24(%rdx)
-	adcq	-40(%rsp), %r12         # 8-byte Folded Reload
-	adcq	%rbp, %rcx
-	adcq	%rax, %r10
-	movq	(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %rbx
-	mulxq	%rbx, %rax, %rdx
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	addq	%r11, %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rdi, %rdx
-	mulxq	%rbx, %r9, %rax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	adcq	%r15, %r9
-	movq	16(%rsi), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	mulxq	%rbx, %r15, %rax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	adcq	%r8, %r15
-	movq	24(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	mulxq	%rbx, %r8, %rbp
-	adcq	%r13, %r8
-	movq	%rbx, %rdx
-	mulxq	%rbx, %r13, %r14
-	adcq	%r12, %r13
-	movq	40(%rsi), %rax
-	movq	%rax, %rdx
-	mulxq	%rbx, %rdx, %rdi
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rdi, -56(%rsp)         # 8-byte Spill
-	adcq	%rdx, %rcx
-	movq	48(%rsi), %rdx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	mulxq	%rbx, %r11, %rdx
-	adcq	%r10, %r11
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-80(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-88(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-96(%rsp), %r8          # 8-byte Folded Reload
-	adcq	%rbp, %r13
-	adcq	%r14, %rcx
-	adcq	%rdi, %r11
-	adcq	%rdx, %r12
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r14, %rdi
-	addq	%r9, %r14
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %rbx, %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	adcq	%r15, %rbx
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %rbp, %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	adcq	%r8, %rbp
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %r10, %r15
-	adcq	%r13, %r10
-	adcq	-72(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rax, %rdx
-	mulxq	%rax, %r9, %r13
-	adcq	%r11, %r9
-	movq	-64(%rsp), %rdx         # 8-byte Reload
-	mulxq	%rax, %rax, %r11
-	adcq	%r12, %rax
-	sbbq	%r8, %r8
-	andl	$1, %r8d
-	addq	%rdi, %rbx
-	adcq	-24(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-32(%rsp), %r10         # 8-byte Folded Reload
-	adcq	%r15, %rcx
-	movq	-8(%rsp), %rdi          # 8-byte Reload
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	movq	%rdx, 32(%rdi)
-	movq	%r14, 40(%rdi)
-	adcq	-56(%rsp), %r9          # 8-byte Folded Reload
-	adcq	%r13, %rax
-	adcq	%r11, %r8
-	movq	48(%rsi), %rdx
-	mulxq	(%rsi), %r12, %r11
-	addq	%rbx, %r12
-	mulxq	8(%rsi), %rbx, %r14
-	adcq	%rbp, %rbx
-	mulxq	16(%rsi), %rbp, %r15
-	adcq	%r10, %rbp
-	mulxq	24(%rsi), %rdi, %r10
-	adcq	%rcx, %rdi
-	mulxq	32(%rsi), %rcx, %r13
-	adcq	%r9, %rcx
-	mulxq	40(%rsi), %rsi, %r9
-	adcq	%rax, %rsi
-	mulxq	%rdx, %rdx, %rax
-	adcq	%r8, %rdx
-	sbbq	%r8, %r8
-	andl	$1, %r8d
-	addq	%r11, %rbx
-	adcq	%r14, %rbp
-	movq	-8(%rsp), %r11          # 8-byte Reload
-	movq	%r12, 48(%r11)
-	movq	%rbx, 56(%r11)
-	movq	%rbp, 64(%r11)
-	adcq	%r15, %rdi
-	movq	%rdi, 72(%r11)
-	adcq	%r10, %rcx
-	movq	%rcx, 80(%r11)
-	adcq	%r13, %rsi
-	movq	%rsi, 88(%r11)
-	adcq	%r9, %rdx
-	movq	%rdx, 96(%r11)
-	adcq	%rax, %r8
-	movq	%r8, 104(%r11)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end97:
-	.size	mcl_fpDbl_sqrPre7Lbmi2, .Lfunc_end97-mcl_fpDbl_sqrPre7Lbmi2
-
-	.globl	mcl_fp_mont7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont7Lbmi2,@function
-mcl_fp_mont7Lbmi2:                      # @mcl_fp_mont7Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$56, %rsp
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rdi, -120(%rsp)        # 8-byte Spill
-	movq	48(%rsi), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rax
-	movq	%rdi, %rdx
-	mulxq	%rax, %rdx, %r8
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	movq	40(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	mulxq	%rax, %rdx, %r9
-	movq	%rdx, 40(%rsp)          # 8-byte Spill
-	movq	32(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %r11
-	movq	%r11, -64(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %r10
-	movq	%r10, -56(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r15
-	movq	%r15, -40(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rsi
-	movq	%rsi, -48(%rsp)         # 8-byte Spill
-	mulxq	%rax, %r13, %rdi
-	movq	%r11, %rdx
-	mulxq	%rax, %r14, %rbp
-	movq	%r10, %rdx
-	mulxq	%rax, %r12, %rbx
-	movq	%rsi, %rdx
-	mulxq	%rax, %r10, %rsi
-	movq	%r15, %rdx
-	mulxq	%rax, %r15, %r11
-	addq	%r10, %r11
-	adcq	%r12, %rsi
-	movq	%rsi, -112(%rsp)        # 8-byte Spill
-	adcq	%r14, %rbx
-	movq	%rbx, -104(%rsp)        # 8-byte Spill
-	adcq	%r13, %rbp
-	movq	%rbp, -96(%rsp)         # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, -88(%rsp)         # 8-byte Spill
-	adcq	48(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, -80(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, %r13
-	movq	-8(%rcx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	%r15, %rdx
-	imulq	%rax, %rdx
-	movq	(%rcx), %rdi
-	movq	%rdi, 24(%rsp)          # 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	mulxq	%rax, %rbx, %r9
-	movq	16(%rcx), %rsi
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	mulxq	%rsi, %r14, %rbp
-	movq	8(%rcx), %rax
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	mulxq	%rax, %rsi, %rax
-	mulxq	%rdi, %r8, %r12
-	addq	%rsi, %r12
-	adcq	%r14, %rax
-	movq	%rax, %rsi
-	movq	24(%rcx), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	mulxq	%rax, %r10, %r14
-	adcq	%rbp, %r10
-	adcq	%rbx, %r14
-	movq	40(%rcx), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	mulxq	%rax, %rbp, %rdi
-	adcq	%r9, %rbp
-	movq	48(%rcx), %rax
-	movq	%rax, (%rsp)            # 8-byte Spill
-	mulxq	%rax, %rax, %rbx
-	adcq	%rdi, %rax
-	adcq	$0, %rbx
-	addq	%r15, %r8
-	adcq	%r11, %r12
-	adcq	-112(%rsp), %rsi        # 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        # 8-byte Spill
-	adcq	-104(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-88(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%r13, %rbx
-	sbbq	%r11, %r11
-	andl	$1, %r11d
-	movq	-16(%rsp), %rcx         # 8-byte Reload
-	movq	8(%rcx), %rdx
-	mulxq	-24(%rsp), %rdi, %rcx   # 8-byte Folded Reload
-	movq	%rdi, -96(%rsp)         # 8-byte Spill
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rsi, %rcx   # 8-byte Folded Reload
-	movq	%rsi, -128(%rsp)        # 8-byte Spill
-	movq	%rcx, -88(%rsp)         # 8-byte Spill
-	mulxq	-48(%rsp), %r9, %r8     # 8-byte Folded Reload
-	mulxq	-40(%rsp), %rsi, %rcx   # 8-byte Folded Reload
-	movq	%rsi, -104(%rsp)        # 8-byte Spill
-	addq	%r9, %rcx
-	movq	%rcx, %rdi
-	mulxq	-56(%rsp), %rcx, %r9    # 8-byte Folded Reload
-	adcq	%r8, %rcx
-	movq	%rcx, %rsi
-	mulxq	-64(%rsp), %r13, %rcx   # 8-byte Folded Reload
-	adcq	%r9, %r13
-	mulxq	-72(%rsp), %r8, %r15    # 8-byte Folded Reload
-	adcq	%rcx, %r8
-	adcq	-128(%rsp), %r15        # 8-byte Folded Reload
-	movq	-88(%rsp), %rdx         # 8-byte Reload
-	adcq	-96(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-80(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	movq	-104(%rsp), %r9         # 8-byte Reload
-	addq	%r12, %r9
-	movq	%r9, -104(%rsp)         # 8-byte Spill
-	adcq	-112(%rsp), %rdi        # 8-byte Folded Reload
-	movq	%rdi, %r12
-	adcq	%r10, %rsi
-	movq	%rsi, -128(%rsp)        # 8-byte Spill
-	adcq	%r14, %r13
-	adcq	%rbp, %r8
-	adcq	%rax, %r15
-	adcq	%rbx, %rdx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	adcq	%r11, %rcx
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%r9, %rdx
-	imulq	-8(%rsp), %rdx          # 8-byte Folded Reload
-	mulxq	(%rsp), %r10, %rax      # 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	mulxq	16(%rsp), %rcx, %rax    # 8-byte Folded Reload
-	mulxq	32(%rsp), %rdi, %rbx    # 8-byte Folded Reload
-	mulxq	24(%rsp), %r14, %r9     # 8-byte Folded Reload
-	addq	%rdi, %r9
-	mulxq	40(%rsp), %rbp, %r11    # 8-byte Folded Reload
-	adcq	%rbx, %rbp
-	adcq	%rcx, %r11
-	mulxq	48(%rsp), %rbx, %rsi    # 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	8(%rsp), %rax, %rcx     # 8-byte Folded Reload
-	adcq	%rsi, %rax
-	adcq	%r10, %rcx
-	movq	-96(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	-104(%rsp), %r14        # 8-byte Folded Reload
-	adcq	%r12, %r9
-	adcq	-128(%rsp), %rbp        # 8-byte Folded Reload
-	adcq	%r13, %r11
-	adcq	%r8, %rbx
-	adcq	%r15, %rax
-	adcq	-88(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	adcq	$0, -112(%rsp)          # 8-byte Folded Spill
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	movq	16(%rdx), %rdx
-	mulxq	-24(%rsp), %rdi, %rsi   # 8-byte Folded Reload
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	movq	%rsi, -80(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rdi, %rsi   # 8-byte Folded Reload
-	movq	%rdi, -128(%rsp)        # 8-byte Spill
-	movq	%rsi, -88(%rsp)         # 8-byte Spill
-	mulxq	-56(%rsp), %rdi, %r10   # 8-byte Folded Reload
-	mulxq	-48(%rsp), %rsi, %r13   # 8-byte Folded Reload
-	mulxq	-40(%rsp), %r8, %r15    # 8-byte Folded Reload
-	addq	%rsi, %r15
-	adcq	%rdi, %r13
-	mulxq	-64(%rsp), %r12, %rsi   # 8-byte Folded Reload
-	adcq	%r10, %r12
-	mulxq	-72(%rsp), %r10, %r14   # 8-byte Folded Reload
-	adcq	%rsi, %r10
-	adcq	-128(%rsp), %r14        # 8-byte Folded Reload
-	movq	-88(%rsp), %rsi         # 8-byte Reload
-	adcq	-104(%rsp), %rsi        # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r9, %r8
-	movq	%r8, -104(%rsp)         # 8-byte Spill
-	adcq	%rbp, %r15
-	adcq	%r11, %r13
-	adcq	%rbx, %r12
-	adcq	%rax, %r10
-	adcq	%rcx, %r14
-	adcq	-96(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -88(%rsp)         # 8-byte Spill
-	adcq	-112(%rsp), %rdx        # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, %rbx
-	movq	%r8, %rdx
-	imulq	-8(%rsp), %rdx          # 8-byte Folded Reload
-	mulxq	(%rsp), %rcx, %rax      # 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	mulxq	16(%rsp), %rcx, %rax    # 8-byte Folded Reload
-	mulxq	32(%rsp), %rbp, %rsi    # 8-byte Folded Reload
-	mulxq	24(%rsp), %r11, %r8     # 8-byte Folded Reload
-	addq	%rbp, %r8
-	mulxq	40(%rsp), %rbp, %r9     # 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	adcq	%rcx, %r9
-	mulxq	48(%rsp), %rsi, %rdi    # 8-byte Folded Reload
-	adcq	%rax, %rsi
-	mulxq	8(%rsp), %rax, %rcx     # 8-byte Folded Reload
-	adcq	%rdi, %rax
-	adcq	-112(%rsp), %rcx        # 8-byte Folded Reload
-	movq	-96(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	-104(%rsp), %r11        # 8-byte Folded Reload
-	adcq	%r15, %r8
-	adcq	%r13, %rbp
-	adcq	%r12, %r9
-	adcq	%r10, %rsi
-	adcq	%r14, %rax
-	adcq	-88(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	adcq	$0, %rbx
-	movq	%rbx, -88(%rsp)         # 8-byte Spill
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	movq	24(%rdx), %rdx
-	mulxq	-24(%rsp), %rbx, %rdi   # 8-byte Folded Reload
-	movq	%rbx, -112(%rsp)        # 8-byte Spill
-	movq	%rdi, -80(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rdi, %r13   # 8-byte Folded Reload
-	movq	%rdi, -128(%rsp)        # 8-byte Spill
-	mulxq	-56(%rsp), %r10, %r11   # 8-byte Folded Reload
-	mulxq	-48(%rsp), %rdi, %r15   # 8-byte Folded Reload
-	mulxq	-40(%rsp), %rbx, %r12   # 8-byte Folded Reload
-	movq	%rbx, -104(%rsp)        # 8-byte Spill
-	addq	%rdi, %r12
-	adcq	%r10, %r15
-	mulxq	-64(%rsp), %rbx, %rdi   # 8-byte Folded Reload
-	adcq	%r11, %rbx
-	mulxq	-72(%rsp), %r10, %r14   # 8-byte Folded Reload
-	adcq	%rdi, %r10
-	adcq	-128(%rsp), %r14        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	movq	-104(%rsp), %rdi        # 8-byte Reload
-	addq	%r8, %rdi
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	adcq	%rbp, %r12
-	adcq	%r9, %r15
-	adcq	%rsi, %rbx
-	adcq	%rax, %r10
-	adcq	%rcx, %r14
-	adcq	-96(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-88(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rdi, %rdx
-	imulq	-8(%rsp), %rdx          # 8-byte Folded Reload
-	mulxq	(%rsp), %rcx, %rax      # 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	mulxq	16(%rsp), %rcx, %rax    # 8-byte Folded Reload
-	mulxq	32(%rsp), %rbp, %rsi    # 8-byte Folded Reload
-	mulxq	24(%rsp), %r11, %r8     # 8-byte Folded Reload
-	addq	%rbp, %r8
-	mulxq	40(%rsp), %rbp, %r9     # 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	adcq	%rcx, %r9
-	mulxq	48(%rsp), %rsi, %rdi    # 8-byte Folded Reload
-	adcq	%rax, %rsi
-	mulxq	8(%rsp), %rax, %rcx     # 8-byte Folded Reload
-	adcq	%rdi, %rax
-	adcq	-112(%rsp), %rcx        # 8-byte Folded Reload
-	movq	-88(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	-104(%rsp), %r11        # 8-byte Folded Reload
-	adcq	%r12, %r8
-	adcq	%r15, %rbp
-	adcq	%rbx, %r9
-	adcq	%r10, %rsi
-	adcq	%r14, %rax
-	adcq	%r13, %rcx
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	adcq	$0, -96(%rsp)           # 8-byte Folded Spill
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	movq	32(%rdx), %rdx
-	mulxq	-24(%rsp), %rbx, %rdi   # 8-byte Folded Reload
-	movq	%rbx, -104(%rsp)        # 8-byte Spill
-	movq	%rdi, -80(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rdi, %r11   # 8-byte Folded Reload
-	movq	%rdi, -112(%rsp)        # 8-byte Spill
-	mulxq	-56(%rsp), %r10, %r13   # 8-byte Folded Reload
-	mulxq	-48(%rsp), %rdi, %r15   # 8-byte Folded Reload
-	mulxq	-40(%rsp), %rbx, %r12   # 8-byte Folded Reload
-	addq	%rdi, %r12
-	adcq	%r10, %r15
-	mulxq	-64(%rsp), %r10, %rdi   # 8-byte Folded Reload
-	adcq	%r13, %r10
-	mulxq	-72(%rsp), %r13, %r14   # 8-byte Folded Reload
-	adcq	%rdi, %r13
-	adcq	-112(%rsp), %r14        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r11        # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r8, %rbx
-	movq	%rbx, -112(%rsp)        # 8-byte Spill
-	adcq	%rbp, %r12
-	adcq	%r9, %r15
-	adcq	%rsi, %r10
-	adcq	%rax, %r13
-	adcq	%rcx, %r14
-	adcq	-88(%rsp), %r11         # 8-byte Folded Reload
-	movq	%r11, -128(%rsp)        # 8-byte Spill
-	adcq	-96(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rbx, %rdx
-	imulq	-8(%rsp), %rdx          # 8-byte Folded Reload
-	mulxq	(%rsp), %rcx, %rax      # 8-byte Folded Reload
-	movq	%rcx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	mulxq	16(%rsp), %rcx, %rax    # 8-byte Folded Reload
-	mulxq	32(%rsp), %rbp, %rsi    # 8-byte Folded Reload
-	mulxq	24(%rsp), %r9, %r11     # 8-byte Folded Reload
-	addq	%rbp, %r11
-	mulxq	40(%rsp), %rbp, %r8     # 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	adcq	%rcx, %r8
-	mulxq	48(%rsp), %rsi, %rdi    # 8-byte Folded Reload
-	adcq	%rax, %rsi
-	mulxq	8(%rsp), %rax, %rcx     # 8-byte Folded Reload
-	adcq	%rdi, %rax
-	adcq	-88(%rsp), %rcx         # 8-byte Folded Reload
-	movq	-96(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	-112(%rsp), %r9         # 8-byte Folded Reload
-	adcq	%r12, %r11
-	adcq	%r15, %rbp
-	adcq	%r10, %r8
-	adcq	%r13, %rsi
-	adcq	%r14, %rax
-	adcq	-128(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	adcq	$0, -104(%rsp)          # 8-byte Folded Spill
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	movq	40(%rdx), %rdx
-	mulxq	-24(%rsp), %rbx, %rdi   # 8-byte Folded Reload
-	movq	%rbx, -112(%rsp)        # 8-byte Spill
-	movq	%rdi, -80(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rbx, %rdi   # 8-byte Folded Reload
-	movq	%rbx, -128(%rsp)        # 8-byte Spill
-	movq	%rdi, -88(%rsp)         # 8-byte Spill
-	mulxq	-56(%rsp), %rbx, %r10   # 8-byte Folded Reload
-	mulxq	-48(%rsp), %rdi, %r13   # 8-byte Folded Reload
-	mulxq	-40(%rsp), %r9, %r12    # 8-byte Folded Reload
-	addq	%rdi, %r12
-	adcq	%rbx, %r13
-	mulxq	-64(%rsp), %r15, %rdi   # 8-byte Folded Reload
-	adcq	%r10, %r15
-	mulxq	-72(%rsp), %r10, %r14   # 8-byte Folded Reload
-	adcq	%rdi, %r10
-	adcq	-128(%rsp), %r14        # 8-byte Folded Reload
-	movq	-88(%rsp), %rdi         # 8-byte Reload
-	adcq	-112(%rsp), %rdi        # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r11, %r9
-	movq	%r9, -112(%rsp)         # 8-byte Spill
-	adcq	%rbp, %r12
-	adcq	%r8, %r13
-	adcq	%rsi, %r15
-	adcq	%rax, %r10
-	adcq	%rcx, %r14
-	adcq	-96(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, -88(%rsp)         # 8-byte Spill
-	adcq	-104(%rsp), %rdx        # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%r9, %rdx
-	imulq	-8(%rsp), %rdx          # 8-byte Folded Reload
-	mulxq	(%rsp), %rcx, %rax      # 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        # 8-byte Spill
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	mulxq	16(%rsp), %rcx, %rax    # 8-byte Folded Reload
-	mulxq	32(%rsp), %rdi, %rsi    # 8-byte Folded Reload
-	mulxq	24(%rsp), %r11, %rbx    # 8-byte Folded Reload
-	addq	%rdi, %rbx
-	mulxq	40(%rsp), %r8, %r9      # 8-byte Folded Reload
-	adcq	%rsi, %r8
-	adcq	%rcx, %r9
-	mulxq	48(%rsp), %rdi, %rbp    # 8-byte Folded Reload
-	adcq	%rax, %rdi
-	mulxq	8(%rsp), %rcx, %rsi     # 8-byte Folded Reload
-	adcq	%rbp, %rcx
-	adcq	-128(%rsp), %rsi        # 8-byte Folded Reload
-	movq	-96(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	-112(%rsp), %r11        # 8-byte Folded Reload
-	adcq	%r12, %rbx
-	adcq	%r13, %r8
-	adcq	%r15, %r9
-	adcq	%r10, %rdi
-	adcq	%r14, %rcx
-	adcq	-88(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	-104(%rsp), %r12        # 8-byte Reload
-	adcq	$0, %r12
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	48(%rax), %rdx
-	mulxq	-24(%rsp), %rbp, %rax   # 8-byte Folded Reload
-	movq	%rbp, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	mulxq	-32(%rsp), %rbp, %rax   # 8-byte Folded Reload
-	movq	%rbp, -88(%rsp)         # 8-byte Spill
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	mulxq	-72(%rsp), %rbp, %rax   # 8-byte Folded Reload
-	movq	%rbp, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	mulxq	-64(%rsp), %r13, %rbp   # 8-byte Folded Reload
-	mulxq	-56(%rsp), %r14, %r15   # 8-byte Folded Reload
-	mulxq	-48(%rsp), %rax, %r11   # 8-byte Folded Reload
-	mulxq	-40(%rsp), %rdx, %r10   # 8-byte Folded Reload
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	addq	%rax, %r10
-	adcq	%r14, %r11
-	adcq	%r13, %r15
-	adcq	-72(%rsp), %rbp         # 8-byte Folded Reload
-	movq	-32(%rsp), %r14         # 8-byte Reload
-	adcq	-88(%rsp), %r14         # 8-byte Folded Reload
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	movq	-40(%rsp), %r13         # 8-byte Reload
-	addq	%rbx, %r13
-	movq	%r13, -40(%rsp)         # 8-byte Spill
-	adcq	%r8, %r10
-	adcq	%r9, %r11
-	adcq	%rdi, %r15
-	adcq	%rcx, %rbp
-	movq	%rbp, -48(%rsp)         # 8-byte Spill
-	adcq	%rsi, %r14
-	movq	%r14, -32(%rsp)         # 8-byte Spill
-	adcq	-96(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	adcq	%r12, %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	sbbq	%rdi, %rdi
-	movq	-8(%rsp), %rdx          # 8-byte Reload
-	imulq	%r13, %rdx
-	mulxq	16(%rsp), %rbp, %rsi    # 8-byte Folded Reload
-	mulxq	32(%rsp), %rcx, %rbx    # 8-byte Folded Reload
-	mulxq	24(%rsp), %r13, %rax    # 8-byte Folded Reload
-	addq	%rcx, %rax
-	mulxq	40(%rsp), %rcx, %r9     # 8-byte Folded Reload
-	adcq	%rbx, %rcx
-	adcq	%rbp, %r9
-	mulxq	48(%rsp), %rbp, %rbx    # 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	mulxq	8(%rsp), %rsi, %r14     # 8-byte Folded Reload
-	adcq	%rbx, %rsi
-	mulxq	(%rsp), %rdx, %rbx      # 8-byte Folded Reload
-	adcq	%r14, %rdx
-	adcq	$0, %rbx
-	andl	$1, %edi
-	addq	-40(%rsp), %r13         # 8-byte Folded Reload
-	adcq	%r10, %rax
-	adcq	%r11, %rcx
-	adcq	%r15, %r9
-	adcq	-48(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-32(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	adcq	-16(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	$0, %rdi
-	movq	%rax, %r8
-	subq	24(%rsp), %r8           # 8-byte Folded Reload
-	movq	%rcx, %r10
-	sbbq	32(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r9, %r11
-	sbbq	40(%rsp), %r11          # 8-byte Folded Reload
-	movq	%rbp, %r14
-	sbbq	16(%rsp), %r14          # 8-byte Folded Reload
-	movq	%rsi, %r15
-	sbbq	48(%rsp), %r15          # 8-byte Folded Reload
-	movq	%rdx, %r12
-	sbbq	8(%rsp), %r12           # 8-byte Folded Reload
-	movq	%rbx, %r13
-	sbbq	(%rsp), %r13            # 8-byte Folded Reload
-	sbbq	$0, %rdi
-	andl	$1, %edi
-	cmovneq	%rbx, %r13
-	testb	%dil, %dil
-	cmovneq	%rax, %r8
-	movq	-120(%rsp), %rax        # 8-byte Reload
-	movq	%r8, (%rax)
-	cmovneq	%rcx, %r10
-	movq	%r10, 8(%rax)
-	cmovneq	%r9, %r11
-	movq	%r11, 16(%rax)
-	cmovneq	%rbp, %r14
-	movq	%r14, 24(%rax)
-	cmovneq	%rsi, %r15
-	movq	%r15, 32(%rax)
-	cmovneq	%rdx, %r12
-	movq	%r12, 40(%rax)
-	movq	%r13, 48(%rax)
-	addq	$56, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end98:
-	.size	mcl_fp_mont7Lbmi2, .Lfunc_end98-mcl_fp_mont7Lbmi2
-
-	.globl	mcl_fp_montNF7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF7Lbmi2,@function
-mcl_fp_montNF7Lbmi2:                    # @mcl_fp_montNF7Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$40, %rsp
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	movq	(%rsi), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rdi
-	movq	%rdi, -48(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rbp
-	movq	%rdi, %rdx
-	mulxq	%rbp, %rdi, %rbx
-	movq	%rax, %rdx
-	mulxq	%rbp, %r8, %r14
-	movq	16(%rsi), %rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	addq	%rdi, %r14
-	mulxq	%rbp, %r15, %rax
-	adcq	%rbx, %r15
-	movq	24(%rsi), %rdx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	mulxq	%rbp, %rbx, %rdi
-	adcq	%rax, %rbx
-	movq	32(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	mulxq	%rbp, %r11, %rax
-	adcq	%rdi, %r11
-	movq	40(%rsi), %rdx
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	mulxq	%rbp, %r9, %rdi
-	adcq	%rax, %r9
-	movq	48(%rsi), %rdx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	mulxq	%rbp, %r10, %rbp
-	adcq	%rdi, %r10
-	adcq	$0, %rbp
-	movq	-8(%rcx), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	%r8, %rdx
-	imulq	%rax, %rdx
-	movq	(%rcx), %rax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	mulxq	%rax, %rax, %rsi
-	movq	%rsi, -96(%rsp)         # 8-byte Spill
-	addq	%r8, %rax
-	movq	8(%rcx), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	mulxq	%rax, %r8, %rsi
-	movq	%rsi, -112(%rsp)        # 8-byte Spill
-	adcq	%r14, %r8
-	movq	16(%rcx), %rax
-	movq	%rax, (%rsp)            # 8-byte Spill
-	mulxq	%rax, %rsi, %r13
-	adcq	%r15, %rsi
-	movq	24(%rcx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	mulxq	%rax, %r12, %rax
-	adcq	%rbx, %r12
-	movq	32(%rcx), %rdi
-	movq	%rdi, -16(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %r15, %rbx
-	adcq	%r11, %r15
-	movq	40(%rcx), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	mulxq	%rdi, %r14, %rdi
-	adcq	%r9, %r14
-	movq	48(%rcx), %rcx
-	movq	%rcx, 32(%rsp)          # 8-byte Spill
-	mulxq	%rcx, %r11, %rcx
-	adcq	%r10, %r11
-	adcq	$0, %rbp
-	addq	-96(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, -96(%rsp)          # 8-byte Spill
-	adcq	-112(%rsp), %rsi        # 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        # 8-byte Spill
-	adcq	%r13, %r12
-	adcq	%rax, %r15
-	adcq	%rbx, %r14
-	adcq	%rdi, %r11
-	adcq	%rcx, %rbp
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	movq	8(%rax), %rdx
-	mulxq	-48(%rsp), %rcx, %rsi   # 8-byte Folded Reload
-	mulxq	-32(%rsp), %r13, %rax   # 8-byte Folded Reload
-	addq	%rcx, %rax
-	mulxq	-56(%rsp), %rcx, %rdi   # 8-byte Folded Reload
-	adcq	%rsi, %rcx
-	mulxq	-64(%rsp), %rsi, %r8    # 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-72(%rsp), %rdi, %r9    # 8-byte Folded Reload
-	adcq	%r8, %rdi
-	mulxq	-80(%rsp), %r8, %rbx    # 8-byte Folded Reload
-	adcq	%r9, %r8
-	mulxq	-88(%rsp), %r9, %r10    # 8-byte Folded Reload
-	adcq	%rbx, %r9
-	adcq	$0, %r10
-	addq	-96(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-112(%rsp), %rax        # 8-byte Folded Reload
-	adcq	%r12, %rcx
-	adcq	%r15, %rsi
-	adcq	%r14, %rdi
-	adcq	%r11, %r8
-	adcq	%rbp, %r9
-	adcq	$0, %r10
-	movq	%r13, %rdx
-	imulq	8(%rsp), %rdx           # 8-byte Folded Reload
-	mulxq	24(%rsp), %rbp, %rbx    # 8-byte Folded Reload
-	movq	%rbx, -96(%rsp)         # 8-byte Spill
-	addq	%r13, %rbp
-	mulxq	16(%rsp), %rbp, %r14    # 8-byte Folded Reload
-	adcq	%rax, %rbp
-	mulxq	(%rsp), %rax, %r11      # 8-byte Folded Reload
-	adcq	%rcx, %rax
-	mulxq	-8(%rsp), %r12, %rcx    # 8-byte Folded Reload
-	adcq	%rsi, %r12
-	mulxq	-16(%rsp), %r15, %rbx   # 8-byte Folded Reload
-	adcq	%rdi, %r15
-	mulxq	-24(%rsp), %r13, %rdi   # 8-byte Folded Reload
-	adcq	%r8, %r13
-	mulxq	32(%rsp), %rsi, %rdx    # 8-byte Folded Reload
-	adcq	%r9, %rsi
-	adcq	$0, %r10
-	addq	-96(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, -96(%rsp)         # 8-byte Spill
-	adcq	%r14, %rax
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	adcq	%r11, %r12
-	adcq	%rcx, %r15
-	adcq	%rbx, %r13
-	adcq	%rdi, %rsi
-	adcq	%rdx, %r10
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	movq	16(%rax), %rdx
-	mulxq	-48(%rsp), %rcx, %rax   # 8-byte Folded Reload
-	mulxq	-32(%rsp), %r14, %rdi   # 8-byte Folded Reload
-	addq	%rcx, %rdi
-	mulxq	-56(%rsp), %rbp, %rcx   # 8-byte Folded Reload
-	adcq	%rax, %rbp
-	mulxq	-64(%rsp), %rbx, %r8    # 8-byte Folded Reload
-	adcq	%rcx, %rbx
-	mulxq	-72(%rsp), %rax, %r9    # 8-byte Folded Reload
-	adcq	%r8, %rax
-	mulxq	-80(%rsp), %r8, %rcx    # 8-byte Folded Reload
-	movq	%rcx, -120(%rsp)        # 8-byte Spill
-	adcq	%r9, %r8
-	mulxq	-88(%rsp), %r9, %r11    # 8-byte Folded Reload
-	adcq	-120(%rsp), %r9         # 8-byte Folded Reload
-	adcq	$0, %r11
-	addq	-96(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-112(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	%r12, %rbp
-	adcq	%r15, %rbx
-	adcq	%r13, %rax
-	adcq	%rsi, %r8
-	adcq	%r10, %r9
-	adcq	$0, %r11
-	movq	%r14, %rdx
-	imulq	8(%rsp), %rdx           # 8-byte Folded Reload
-	mulxq	24(%rsp), %rsi, %rcx    # 8-byte Folded Reload
-	movq	%rcx, -96(%rsp)         # 8-byte Spill
-	addq	%r14, %rsi
-	mulxq	16(%rsp), %rsi, %r13    # 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	(%rsp), %rdi, %r15      # 8-byte Folded Reload
-	adcq	%rbp, %rdi
-	mulxq	-8(%rsp), %rcx, %rbp    # 8-byte Folded Reload
-	adcq	%rbx, %rcx
-	mulxq	-16(%rsp), %r14, %rbx   # 8-byte Folded Reload
-	adcq	%rax, %r14
-	mulxq	-24(%rsp), %r12, %rax   # 8-byte Folded Reload
-	adcq	%r8, %r12
-	mulxq	32(%rsp), %r10, %rdx    # 8-byte Folded Reload
-	adcq	%r9, %r10
-	adcq	$0, %r11
-	addq	-96(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -96(%rsp)         # 8-byte Spill
-	adcq	%r13, %rdi
-	movq	%rdi, -112(%rsp)        # 8-byte Spill
-	adcq	%r15, %rcx
-	adcq	%rbp, %r14
-	adcq	%rbx, %r12
-	adcq	%rax, %r10
-	adcq	%rdx, %r11
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	movq	24(%rax), %rdx
-	mulxq	-48(%rsp), %rsi, %rax   # 8-byte Folded Reload
-	mulxq	-32(%rsp), %r15, %rbp   # 8-byte Folded Reload
-	addq	%rsi, %rbp
-	mulxq	-56(%rsp), %rbx, %rdi   # 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-64(%rsp), %rsi, %rax   # 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-72(%rsp), %rdi, %r9    # 8-byte Folded Reload
-	adcq	%rax, %rdi
-	mulxq	-80(%rsp), %r8, %rax    # 8-byte Folded Reload
-	adcq	%r9, %r8
-	mulxq	-88(%rsp), %r9, %r13    # 8-byte Folded Reload
-	adcq	%rax, %r9
-	adcq	$0, %r13
-	addq	-96(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-112(%rsp), %rbp        # 8-byte Folded Reload
-	adcq	%rcx, %rbx
-	adcq	%r14, %rsi
-	adcq	%r12, %rdi
-	adcq	%r10, %r8
-	adcq	%r11, %r9
-	adcq	$0, %r13
-	movq	%r15, %rdx
-	imulq	8(%rsp), %rdx           # 8-byte Folded Reload
-	mulxq	24(%rsp), %rcx, %rax    # 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	addq	%r15, %rcx
-	mulxq	16(%rsp), %rcx, %r11    # 8-byte Folded Reload
-	adcq	%rbp, %rcx
-	mulxq	(%rsp), %rbp, %r10      # 8-byte Folded Reload
-	adcq	%rbx, %rbp
-	mulxq	-8(%rsp), %rax, %rbx    # 8-byte Folded Reload
-	adcq	%rsi, %rax
-	mulxq	-16(%rsp), %r14, %rsi   # 8-byte Folded Reload
-	adcq	%rdi, %r14
-	mulxq	-24(%rsp), %r15, %rdi   # 8-byte Folded Reload
-	adcq	%r8, %r15
-	mulxq	32(%rsp), %r12, %rdx    # 8-byte Folded Reload
-	adcq	%r9, %r12
-	adcq	$0, %r13
-	addq	-96(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r11, %rbp
-	movq	%rbp, -96(%rsp)         # 8-byte Spill
-	adcq	%r10, %rax
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	adcq	%rbx, %r14
-	adcq	%rsi, %r15
-	adcq	%rdi, %r12
-	adcq	%rdx, %r13
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	movq	32(%rax), %rdx
-	mulxq	-48(%rsp), %rsi, %rdi   # 8-byte Folded Reload
-	mulxq	-32(%rsp), %r11, %r8    # 8-byte Folded Reload
-	addq	%rsi, %r8
-	mulxq	-56(%rsp), %rbx, %rsi   # 8-byte Folded Reload
-	adcq	%rdi, %rbx
-	mulxq	-64(%rsp), %rbp, %rdi   # 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	mulxq	-72(%rsp), %rsi, %r9    # 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-80(%rsp), %rdi, %rax   # 8-byte Folded Reload
-	adcq	%r9, %rdi
-	mulxq	-88(%rsp), %r9, %r10    # 8-byte Folded Reload
-	adcq	%rax, %r9
-	adcq	$0, %r10
-	addq	%rcx, %r11
-	adcq	-96(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-112(%rsp), %rbx        # 8-byte Folded Reload
-	adcq	%r14, %rbp
-	adcq	%r15, %rsi
-	adcq	%r12, %rdi
-	adcq	%r13, %r9
-	adcq	$0, %r10
-	movq	%r11, %rdx
-	imulq	8(%rsp), %rdx           # 8-byte Folded Reload
-	mulxq	24(%rsp), %rcx, %rax    # 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	addq	%r11, %rcx
-	mulxq	16(%rsp), %rcx, %r13    # 8-byte Folded Reload
-	adcq	%r8, %rcx
-	mulxq	(%rsp), %rax, %r8       # 8-byte Folded Reload
-	adcq	%rbx, %rax
-	mulxq	-8(%rsp), %rbx, %r11    # 8-byte Folded Reload
-	adcq	%rbp, %rbx
-	mulxq	-16(%rsp), %r14, %rbp   # 8-byte Folded Reload
-	adcq	%rsi, %r14
-	mulxq	-24(%rsp), %r15, %rsi   # 8-byte Folded Reload
-	adcq	%rdi, %r15
-	mulxq	32(%rsp), %r12, %rdx    # 8-byte Folded Reload
-	adcq	%r9, %r12
-	adcq	$0, %r10
-	addq	-96(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r13, %rax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	adcq	%r8, %rbx
-	movq	%rbx, -112(%rsp)        # 8-byte Spill
-	adcq	%r11, %r14
-	adcq	%rbp, %r15
-	adcq	%rsi, %r12
-	adcq	%rdx, %r10
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	movq	40(%rax), %rdx
-	mulxq	-48(%rsp), %rsi, %rax   # 8-byte Folded Reload
-	mulxq	-32(%rsp), %r11, %rbp   # 8-byte Folded Reload
-	addq	%rsi, %rbp
-	mulxq	-56(%rsp), %rbx, %rdi   # 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-64(%rsp), %rsi, %rax   # 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-72(%rsp), %rdi, %r9    # 8-byte Folded Reload
-	adcq	%rax, %rdi
-	mulxq	-80(%rsp), %r8, %rax    # 8-byte Folded Reload
-	adcq	%r9, %r8
-	mulxq	-88(%rsp), %r9, %r13    # 8-byte Folded Reload
-	adcq	%rax, %r9
-	adcq	$0, %r13
-	addq	%rcx, %r11
-	adcq	-96(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-112(%rsp), %rbx        # 8-byte Folded Reload
-	adcq	%r14, %rsi
-	adcq	%r15, %rdi
-	adcq	%r12, %r8
-	adcq	%r10, %r9
-	adcq	$0, %r13
-	movq	%r11, %rdx
-	imulq	8(%rsp), %rdx           # 8-byte Folded Reload
-	mulxq	24(%rsp), %rcx, %rax    # 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	addq	%r11, %rcx
-	mulxq	16(%rsp), %rcx, %rax    # 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	adcq	%rbp, %rcx
-	mulxq	(%rsp), %rax, %rbp      # 8-byte Folded Reload
-	movq	%rbp, -128(%rsp)        # 8-byte Spill
-	adcq	%rbx, %rax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	mulxq	-8(%rsp), %r14, %rbp    # 8-byte Folded Reload
-	adcq	%rsi, %r14
-	mulxq	-16(%rsp), %r11, %r12   # 8-byte Folded Reload
-	adcq	%rdi, %r11
-	mulxq	-24(%rsp), %r10, %rbx   # 8-byte Folded Reload
-	adcq	%r8, %r10
-	mulxq	32(%rsp), %rdi, %rax    # 8-byte Folded Reload
-	adcq	%r9, %rdi
-	adcq	$0, %r13
-	addq	-112(%rsp), %rcx        # 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        # 8-byte Spill
-	movq	-96(%rsp), %rcx         # 8-byte Reload
-	adcq	-120(%rsp), %rcx        # 8-byte Folded Reload
-	movq	%rcx, -96(%rsp)         # 8-byte Spill
-	adcq	-128(%rsp), %r14        # 8-byte Folded Reload
-	adcq	%rbp, %r11
-	adcq	%r12, %r10
-	adcq	%rbx, %rdi
-	adcq	%rax, %r13
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	movq	48(%rax), %rdx
-	mulxq	-48(%rsp), %rbp, %r9    # 8-byte Folded Reload
-	mulxq	-32(%rsp), %r8, %rax    # 8-byte Folded Reload
-	addq	%rbp, %rax
-	mulxq	-56(%rsp), %rbx, %rcx   # 8-byte Folded Reload
-	adcq	%r9, %rbx
-	mulxq	-64(%rsp), %rbp, %r9    # 8-byte Folded Reload
-	adcq	%rcx, %rbp
-	mulxq	-72(%rsp), %rcx, %r12   # 8-byte Folded Reload
-	adcq	%r9, %rcx
-	mulxq	-80(%rsp), %r15, %rsi   # 8-byte Folded Reload
-	movq	%rsi, -32(%rsp)         # 8-byte Spill
-	adcq	%r12, %r15
-	mulxq	-88(%rsp), %r12, %r9    # 8-byte Folded Reload
-	adcq	-32(%rsp), %r12         # 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	-112(%rsp), %r8         # 8-byte Folded Reload
-	adcq	-96(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%r14, %rbx
-	adcq	%r11, %rbp
-	adcq	%r10, %rcx
-	adcq	%rdi, %r15
-	adcq	%r13, %r12
-	adcq	$0, %r9
-	movq	8(%rsp), %rdx           # 8-byte Reload
-	imulq	%r8, %rdx
-	mulxq	24(%rsp), %rdi, %rsi    # 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           # 8-byte Spill
-	addq	%r8, %rdi
-	mulxq	16(%rsp), %r8, %rsi     # 8-byte Folded Reload
-	movq	%rsi, -32(%rsp)         # 8-byte Spill
-	adcq	%rax, %r8
-	movq	(%rsp), %r11            # 8-byte Reload
-	mulxq	%r11, %rsi, %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	adcq	%rbx, %rsi
-	movq	-8(%rsp), %r14          # 8-byte Reload
-	mulxq	%r14, %rdi, %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	adcq	%rbp, %rdi
-	movq	-16(%rsp), %rbp         # 8-byte Reload
-	mulxq	%rbp, %rax, %rbx
-	movq	%rbx, -56(%rsp)         # 8-byte Spill
-	adcq	%rcx, %rax
-	movq	-24(%rsp), %rbx         # 8-byte Reload
-	mulxq	%rbx, %rcx, %r13
-	adcq	%r15, %rcx
-	mulxq	32(%rsp), %rdx, %r15    # 8-byte Folded Reload
-	adcq	%r12, %rdx
-	adcq	$0, %r9
-	addq	8(%rsp), %r8            # 8-byte Folded Reload
-	adcq	-32(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rax         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r13, %rdx
-	adcq	%r15, %r9
-	movq	%r8, %r13
-	subq	24(%rsp), %r13          # 8-byte Folded Reload
-	movq	%rsi, %r12
-	sbbq	16(%rsp), %r12          # 8-byte Folded Reload
-	movq	%rdi, %r10
-	sbbq	%r11, %r10
-	movq	%rax, %r11
-	sbbq	%r14, %r11
-	movq	%rcx, %r14
-	sbbq	%rbp, %r14
-	movq	%rdx, %r15
-	sbbq	%rbx, %r15
-	movq	%r9, %rbp
-	sbbq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, %rbx
-	sarq	$63, %rbx
-	cmovsq	%r8, %r13
-	movq	-104(%rsp), %rbx        # 8-byte Reload
-	movq	%r13, (%rbx)
-	cmovsq	%rsi, %r12
-	movq	%r12, 8(%rbx)
-	cmovsq	%rdi, %r10
-	movq	%r10, 16(%rbx)
-	cmovsq	%rax, %r11
-	movq	%r11, 24(%rbx)
-	cmovsq	%rcx, %r14
-	movq	%r14, 32(%rbx)
-	cmovsq	%rdx, %r15
-	movq	%r15, 40(%rbx)
-	cmovsq	%r9, %rbp
-	movq	%rbp, 48(%rbx)
-	addq	$40, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end99:
-	.size	mcl_fp_montNF7Lbmi2, .Lfunc_end99-mcl_fp_montNF7Lbmi2
-
-	.globl	mcl_fp_montRed7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed7Lbmi2,@function
-mcl_fp_montRed7Lbmi2:                   # @mcl_fp_montRed7Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$64, %rsp
-	movq	%rdx, %rcx
-	movq	%rdi, -88(%rsp)         # 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	(%rcx), %rbx
-	movq	%rbx, 32(%rsp)          # 8-byte Spill
-	movq	(%rsi), %rdx
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	imulq	%rax, %rdx
-	movq	48(%rcx), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	mulxq	%rax, %rdi, %rax
-	movq	%rdi, 40(%rsp)          # 8-byte Spill
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	40(%rcx), %r8
-	movq	%r8, (%rsp)             # 8-byte Spill
-	movq	32(%rcx), %r9
-	movq	%r9, 24(%rsp)           # 8-byte Spill
-	movq	24(%rcx), %rbp
-	movq	%rbp, 8(%rsp)           # 8-byte Spill
-	movq	16(%rcx), %rdi
-	movq	%rdi, 56(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	mulxq	%r8, %r10, %r11
-	mulxq	%r9, %r14, %r9
-	mulxq	%rbp, %r8, %r13
-	mulxq	%rdi, %rcx, %r12
-	mulxq	%rax, %rbp, %r15
-	mulxq	%rbx, %rdx, %rdi
-	addq	%rbp, %rdi
-	adcq	%rcx, %r15
-	adcq	%r8, %r12
-	adcq	%r14, %r13
-	adcq	%r10, %r9
-	adcq	40(%rsp), %r11          # 8-byte Folded Reload
-	movq	-48(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	addq	48(%rsp), %rdx          # 8-byte Folded Reload
-	adcq	8(%rsi), %rdi
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %r12
-	adcq	32(%rsi), %r13
-	adcq	40(%rsi), %r9
-	movq	%r9, -96(%rsp)          # 8-byte Spill
-	adcq	48(%rsi), %r11
-	movq	%r11, -72(%rsp)         # 8-byte Spill
-	adcq	56(%rsi), %rcx
-	movq	%rcx, -48(%rsp)         # 8-byte Spill
-	movq	104(%rsi), %r8
-	movq	96(%rsi), %rdx
-	movq	88(%rsi), %rbp
-	movq	80(%rsi), %rbx
-	movq	72(%rsi), %rcx
-	movq	64(%rsi), %rsi
-	adcq	$0, %rsi
-	movq	%rsi, -104(%rsp)        # 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	adcq	$0, %rbx
-	movq	%rbx, -40(%rsp)         # 8-byte Spill
-	adcq	$0, %rbp
-	movq	%rbp, -32(%rsp)         # 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 48(%rsp)           # 8-byte Spill
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	movq	%rcx, 40(%rsp)          # 8-byte Spill
-	movq	%rdi, %rdx
-	movq	-24(%rsp), %r9          # 8-byte Reload
-	imulq	%r9, %rdx
-	mulxq	16(%rsp), %rsi, %rcx    # 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	mulxq	(%rsp), %rsi, %rcx      # 8-byte Folded Reload
-	movq	%rsi, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, -64(%rsp)         # 8-byte Spill
-	movq	24(%rsp), %rbx          # 8-byte Reload
-	mulxq	%rbx, %rcx, %rbp
-	movq	%rcx, -128(%rsp)        # 8-byte Spill
-	mulxq	8(%rsp), %r10, %r14     # 8-byte Folded Reload
-	mulxq	56(%rsp), %rsi, %r11    # 8-byte Folded Reload
-	mulxq	%rax, %rcx, %r8
-	mulxq	32(%rsp), %rdx, %rax    # 8-byte Folded Reload
-	addq	%rcx, %rax
-	adcq	%rsi, %r8
-	adcq	%r10, %r11
-	adcq	-128(%rsp), %r14        # 8-byte Folded Reload
-	adcq	-120(%rsp), %rbp        # 8-byte Folded Reload
-	movq	-64(%rsp), %rsi         # 8-byte Reload
-	adcq	-112(%rsp), %rsi        # 8-byte Folded Reload
-	movq	-56(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	addq	%rdi, %rdx
-	adcq	%r15, %rax
-	adcq	%r12, %r8
-	adcq	%r13, %r11
-	adcq	-96(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-72(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -64(%rsp)         # 8-byte Spill
-	adcq	-104(%rsp), %rcx        # 8-byte Folded Reload
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	adcq	$0, -80(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -40(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -32(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -8(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 48(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 40(%rsp)            # 8-byte Folded Spill
-	movq	%rax, %rdx
-	imulq	%r9, %rdx
-	mulxq	16(%rsp), %rsi, %rcx    # 8-byte Folded Reload
-	movq	%rsi, -96(%rsp)         # 8-byte Spill
-	movq	%rcx, -48(%rsp)         # 8-byte Spill
-	movq	(%rsp), %r15            # 8-byte Reload
-	mulxq	%r15, %rsi, %rcx
-	movq	%rsi, -104(%rsp)        # 8-byte Spill
-	movq	%rcx, -72(%rsp)         # 8-byte Spill
-	mulxq	%rbx, %rcx, %r13
-	movq	%rcx, -112(%rsp)        # 8-byte Spill
-	mulxq	8(%rsp), %rbx, %r12     # 8-byte Folded Reload
-	mulxq	56(%rsp), %rdi, %r9     # 8-byte Folded Reload
-	mulxq	-16(%rsp), %rsi, %r10   # 8-byte Folded Reload
-	mulxq	32(%rsp), %rdx, %rcx    # 8-byte Folded Reload
-	addq	%rsi, %rcx
-	adcq	%rdi, %r10
-	adcq	%rbx, %r9
-	adcq	-112(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r13        # 8-byte Folded Reload
-	movq	-72(%rsp), %rdi         # 8-byte Reload
-	adcq	-96(%rsp), %rdi         # 8-byte Folded Reload
-	movq	-48(%rsp), %rsi         # 8-byte Reload
-	adcq	$0, %rsi
-	addq	%rax, %rdx
-	adcq	%r8, %rcx
-	adcq	%r11, %r10
-	adcq	%r14, %r9
-	adcq	%rbp, %r12
-	adcq	-64(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, -72(%rsp)         # 8-byte Spill
-	adcq	-80(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -48(%rsp)         # 8-byte Spill
-	adcq	$0, -40(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -32(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -8(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 48(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 40(%rsp)            # 8-byte Folded Spill
-	movq	%rcx, %rdx
-	imulq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	16(%rsp), %rsi, %rax    # 8-byte Folded Reload
-	movq	%rsi, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	mulxq	%r15, %rsi, %rax
-	movq	%rsi, -96(%rsp)         # 8-byte Spill
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	mulxq	24(%rsp), %r8, %r15     # 8-byte Folded Reload
-	mulxq	8(%rsp), %r14, %rbp     # 8-byte Folded Reload
-	mulxq	56(%rsp), %rdi, %rbx    # 8-byte Folded Reload
-	mulxq	-16(%rsp), %rsi, %r11   # 8-byte Folded Reload
-	mulxq	32(%rsp), %rdx, %rax    # 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%rdi, %r11
-	adcq	%r14, %rbx
-	adcq	%r8, %rbp
-	adcq	-96(%rsp), %r15         # 8-byte Folded Reload
-	movq	-64(%rsp), %rdi         # 8-byte Reload
-	adcq	-80(%rsp), %rdi         # 8-byte Folded Reload
-	movq	-56(%rsp), %rsi         # 8-byte Reload
-	adcq	$0, %rsi
-	addq	%rcx, %rdx
-	adcq	%r10, %rax
-	adcq	%r9, %r11
-	adcq	%r12, %rbx
-	adcq	%r13, %rbp
-	adcq	-72(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, -64(%rsp)         # 8-byte Spill
-	adcq	-40(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -56(%rsp)         # 8-byte Spill
-	adcq	$0, -32(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -8(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 48(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 40(%rsp)            # 8-byte Folded Spill
-	movq	%rax, %rdx
-	imulq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	movq	16(%rsp), %r10          # 8-byte Reload
-	mulxq	%r10, %rsi, %rcx
-	movq	%rsi, -72(%rsp)         # 8-byte Spill
-	movq	%rcx, -40(%rsp)         # 8-byte Spill
-	mulxq	(%rsp), %rsi, %rcx      # 8-byte Folded Reload
-	movq	%rsi, -80(%rsp)         # 8-byte Spill
-	movq	%rcx, -48(%rsp)         # 8-byte Spill
-	mulxq	24(%rsp), %rsi, %rcx    # 8-byte Folded Reload
-	movq	%rsi, -96(%rsp)         # 8-byte Spill
-	mulxq	8(%rsp), %r12, %r13     # 8-byte Folded Reload
-	mulxq	56(%rsp), %r8, %r14     # 8-byte Folded Reload
-	mulxq	-16(%rsp), %rsi, %r9    # 8-byte Folded Reload
-	mulxq	32(%rsp), %rdx, %rdi    # 8-byte Folded Reload
-	addq	%rsi, %rdi
-	adcq	%r8, %r9
-	adcq	%r12, %r14
-	adcq	-96(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rcx         # 8-byte Folded Reload
-	movq	-48(%rsp), %r8          # 8-byte Reload
-	adcq	-72(%rsp), %r8          # 8-byte Folded Reload
-	movq	-40(%rsp), %rsi         # 8-byte Reload
-	adcq	$0, %rsi
-	addq	%rax, %rdx
-	adcq	%r11, %rdi
-	adcq	%rbx, %r9
-	adcq	%rbp, %r14
-	adcq	%r15, %r13
-	adcq	-64(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -64(%rsp)         # 8-byte Spill
-	adcq	-56(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, -48(%rsp)          # 8-byte Spill
-	adcq	-32(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -40(%rsp)         # 8-byte Spill
-	adcq	$0, -8(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 48(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 40(%rsp)            # 8-byte Folded Spill
-	movq	%rdi, %rdx
-	imulq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	mulxq	%r10, %rcx, %rax
-	movq	%rcx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	mulxq	(%rsp), %rcx, %rax      # 8-byte Folded Reload
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	mulxq	24(%rsp), %rax, %rcx    # 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	8(%rsp), %r12           # 8-byte Reload
-	mulxq	%r12, %rax, %r15
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	mulxq	56(%rsp), %rsi, %r11    # 8-byte Folded Reload
-	movq	-16(%rsp), %r10         # 8-byte Reload
-	mulxq	%r10, %rax, %rbp
-	movq	32(%rsp), %rbx          # 8-byte Reload
-	mulxq	%rbx, %rdx, %r8
-	addq	%rax, %r8
-	adcq	%rsi, %rbp
-	adcq	-104(%rsp), %r11        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rcx         # 8-byte Folded Reload
-	movq	-56(%rsp), %rsi         # 8-byte Reload
-	adcq	-72(%rsp), %rsi         # 8-byte Folded Reload
-	movq	-32(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdi, %rdx
-	adcq	%r9, %r8
-	adcq	%r14, %rbp
-	adcq	%r13, %r11
-	adcq	-64(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -48(%rsp)         # 8-byte Spill
-	adcq	-40(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -56(%rsp)         # 8-byte Spill
-	adcq	-8(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	adcq	$0, 48(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 40(%rsp)            # 8-byte Folded Spill
-	movq	-24(%rsp), %rdx         # 8-byte Reload
-	imulq	%r8, %rdx
-	mulxq	%r12, %rax, %r13
-	mulxq	%r10, %rcx, %rdi
-	mulxq	%rbx, %r12, %r14
-	addq	%rcx, %r14
-	mulxq	56(%rsp), %rcx, %r10    # 8-byte Folded Reload
-	adcq	%rdi, %rcx
-	adcq	%rax, %r10
-	mulxq	24(%rsp), %rax, %r9     # 8-byte Folded Reload
-	adcq	%r13, %rax
-	mulxq	(%rsp), %rdi, %r13      # 8-byte Folded Reload
-	adcq	%r9, %rdi
-	mulxq	16(%rsp), %rdx, %rsi    # 8-byte Folded Reload
-	adcq	%r13, %rdx
-	adcq	$0, %rsi
-	addq	%r8, %r12
-	adcq	%rbp, %r14
-	adcq	%r11, %rcx
-	adcq	%r15, %r10
-	adcq	-48(%rsp), %rax         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	-32(%rsp), %rdx         # 8-byte Folded Reload
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	40(%rsp), %rbx          # 8-byte Reload
-	adcq	$0, %rbx
-	movq	%r14, %rbp
-	subq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rcx, %r13
-	sbbq	-16(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r10, %r8
-	sbbq	56(%rsp), %r8           # 8-byte Folded Reload
-	movq	%rax, %r9
-	sbbq	8(%rsp), %r9            # 8-byte Folded Reload
-	movq	%rdi, %r11
-	sbbq	24(%rsp), %r11          # 8-byte Folded Reload
-	movq	%rdx, %r15
-	sbbq	(%rsp), %r15            # 8-byte Folded Reload
-	movq	%rsi, %r12
-	sbbq	16(%rsp), %r12          # 8-byte Folded Reload
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%rsi, %r12
-	testb	%bl, %bl
-	cmovneq	%r14, %rbp
-	movq	-88(%rsp), %rsi         # 8-byte Reload
-	movq	%rbp, (%rsi)
-	cmovneq	%rcx, %r13
-	movq	%r13, 8(%rsi)
-	cmovneq	%r10, %r8
-	movq	%r8, 16(%rsi)
-	cmovneq	%rax, %r9
-	movq	%r9, 24(%rsi)
-	cmovneq	%rdi, %r11
-	movq	%r11, 32(%rsi)
-	cmovneq	%rdx, %r15
-	movq	%r15, 40(%rsi)
-	movq	%r12, 48(%rsi)
-	addq	$64, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end100:
-	.size	mcl_fp_montRed7Lbmi2, .Lfunc_end100-mcl_fp_montRed7Lbmi2
-
-	.globl	mcl_fp_addPre7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre7Lbmi2,@function
-mcl_fp_addPre7Lbmi2:                    # @mcl_fp_addPre7Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r8
-	movq	48(%rsi), %r14
-	movq	40(%rdx), %r9
-	movq	40(%rsi), %r15
-	movq	32(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rbx
-	adcq	16(%rsi), %r12
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r12, 16(%rdi)
-	adcq	%r11, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 48(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end101:
-	.size	mcl_fp_addPre7Lbmi2, .Lfunc_end101-mcl_fp_addPre7Lbmi2
-
-	.globl	mcl_fp_subPre7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre7Lbmi2,@function
-mcl_fp_subPre7Lbmi2:                    # @mcl_fp_subPre7Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r8
-	movq	48(%rsi), %r10
-	movq	40(%rdx), %r9
-	movq	40(%rsi), %r15
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %r12
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %r12
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	32(%rsi), %rdx
-	movq	24(%rsi), %rsi
-	movq	%rbx, (%rdi)
-	movq	%r12, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r11, %rsi
-	movq	%rsi, 24(%rdi)
-	sbbq	%r14, %rdx
-	movq	%rdx, 32(%rdi)
-	sbbq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	sbbq	%r8, %r10
-	movq	%r10, 48(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end102:
-	.size	mcl_fp_subPre7Lbmi2, .Lfunc_end102-mcl_fp_subPre7Lbmi2
-
-	.globl	mcl_fp_shr1_7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_7Lbmi2,@function
-mcl_fp_shr1_7Lbmi2:                     # @mcl_fp_shr1_7Lbmi2
-# BB#0:
-	movq	48(%rsi), %r8
-	movq	40(%rsi), %r9
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %rax
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rdx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rdx
-	movq	%rdx, (%rdi)
-	shrdq	$1, %rcx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rax, %rcx
-	movq	%rcx, 16(%rdi)
-	shrdq	$1, %r10, %rax
-	movq	%rax, 24(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 32(%rdi)
-	shrdq	$1, %r8, %r9
-	movq	%r9, 40(%rdi)
-	shrq	%r8
-	movq	%r8, 48(%rdi)
-	retq
-.Lfunc_end103:
-	.size	mcl_fp_shr1_7Lbmi2, .Lfunc_end103-mcl_fp_shr1_7Lbmi2
-
-	.globl	mcl_fp_add7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add7Lbmi2,@function
-mcl_fp_add7Lbmi2:                       # @mcl_fp_add7Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r14
-	movq	48(%rsi), %r8
-	movq	40(%rdx), %r15
-	movq	40(%rsi), %r9
-	movq	32(%rdx), %r12
-	movq	24(%rdx), %r13
-	movq	16(%rdx), %r10
-	movq	(%rdx), %r11
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %r11
-	adcq	8(%rsi), %rdx
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rbx
-	adcq	16(%rsi), %r10
-	movq	%r11, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	adcq	%r13, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r12, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r15, %r9
-	movq	%r9, 40(%rdi)
-	adcq	%r14, %r8
-	movq	%r8, 48(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %r11
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r10
-	sbbq	24(%rcx), %rax
-	sbbq	32(%rcx), %rbx
-	sbbq	40(%rcx), %r9
-	sbbq	48(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB104_2
-# BB#1:                                 # %nocarry
-	movq	%r11, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%rax, 24(%rdi)
-	movq	%rbx, 32(%rdi)
-	movq	%r9, 40(%rdi)
-	movq	%r8, 48(%rdi)
-.LBB104_2:                              # %carry
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end104:
-	.size	mcl_fp_add7Lbmi2, .Lfunc_end104-mcl_fp_add7Lbmi2
-
-	.globl	mcl_fp_addNF7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF7Lbmi2,@function
-mcl_fp_addNF7Lbmi2:                     # @mcl_fp_addNF7Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %rbp
-	movq	32(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r14
-	movq	(%rdx), %r12
-	movq	8(%rdx), %r15
-	addq	(%rsi), %r12
-	adcq	8(%rsi), %r15
-	adcq	16(%rsi), %r14
-	adcq	24(%rsi), %r11
-	adcq	32(%rsi), %r10
-	adcq	40(%rsi), %rbp
-	movq	%rbp, -8(%rsp)          # 8-byte Spill
-	adcq	48(%rsi), %r9
-	movq	%r12, %rsi
-	subq	(%rcx), %rsi
-	movq	%r15, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r14, %rax
-	sbbq	16(%rcx), %rax
-	movq	%r11, %rbx
-	sbbq	24(%rcx), %rbx
-	movq	%r10, %r13
-	sbbq	32(%rcx), %r13
-	sbbq	40(%rcx), %rbp
-	movq	%r9, %r8
-	sbbq	48(%rcx), %r8
-	movq	%r8, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r12, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r15, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r14, %rax
-	movq	%rax, 16(%rdi)
-	cmovsq	%r11, %rbx
-	movq	%rbx, 24(%rdi)
-	cmovsq	%r10, %r13
-	movq	%r13, 32(%rdi)
-	cmovsq	-8(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 40(%rdi)
-	cmovsq	%r9, %r8
-	movq	%r8, 48(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end105:
-	.size	mcl_fp_addNF7Lbmi2, .Lfunc_end105-mcl_fp_addNF7Lbmi2
-
-	.globl	mcl_fp_sub7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub7Lbmi2,@function
-mcl_fp_sub7Lbmi2:                       # @mcl_fp_sub7Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r14
-	movq	48(%rsi), %r8
-	movq	40(%rdx), %r15
-	movq	40(%rsi), %r9
-	movq	32(%rdx), %r12
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r11
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r11
-	movq	16(%rsi), %r13
-	sbbq	16(%rdx), %r13
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %rsi
-	sbbq	24(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r11, 8(%rdi)
-	movq	%r13, 16(%rdi)
-	movq	%rsi, 24(%rdi)
-	sbbq	%r12, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%r15, %r9
-	movq	%r9, 40(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 48(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	.LBB106_2
-# BB#1:                                 # %carry
-	movq	48(%rcx), %r14
-	movq	40(%rcx), %r15
-	movq	32(%rcx), %r12
-	movq	24(%rcx), %rbx
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rbp
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r11, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r13, %rbp
-	movq	%rbp, 16(%rdi)
-	adcq	%rsi, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%r10, %r12
-	movq	%r12, 32(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 48(%rdi)
-.LBB106_2:                              # %nocarry
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end106:
-	.size	mcl_fp_sub7Lbmi2, .Lfunc_end106-mcl_fp_sub7Lbmi2
-
-	.globl	mcl_fp_subNF7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF7Lbmi2,@function
-mcl_fp_subNF7Lbmi2:                     # @mcl_fp_subNF7Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	48(%rsi), %r12
-	movq	40(%rsi), %rax
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r10
-	movq	16(%rsi), %r11
-	movq	(%rsi), %r14
-	movq	8(%rsi), %r15
-	subq	(%rdx), %r14
-	sbbq	8(%rdx), %r15
-	sbbq	16(%rdx), %r11
-	sbbq	24(%rdx), %r10
-	sbbq	32(%rdx), %r9
-	sbbq	40(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	sbbq	48(%rdx), %r12
-	movq	%r12, %rax
-	sarq	$63, %rax
-	movq	%rax, %rsi
-	shldq	$1, %r12, %rsi
-	andq	(%r8), %rsi
-	movq	48(%r8), %r13
-	andq	%rax, %r13
-	movq	40(%r8), %rbx
-	andq	%rax, %rbx
-	movq	32(%r8), %rdx
-	andq	%rax, %rdx
-	movq	24(%r8), %rbp
-	andq	%rax, %rbp
-	movq	16(%r8), %rcx
-	andq	%rax, %rcx
-	andq	8(%r8), %rax
-	addq	%r14, %rsi
-	adcq	%r15, %rax
-	movq	%rsi, (%rdi)
-	movq	%rax, 8(%rdi)
-	adcq	%r11, %rcx
-	movq	%rcx, 16(%rdi)
-	adcq	%r10, %rbp
-	movq	%rbp, 24(%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 32(%rdi)
-	adcq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, 40(%rdi)
-	adcq	%r12, %r13
-	movq	%r13, 48(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end107:
-	.size	mcl_fp_subNF7Lbmi2, .Lfunc_end107-mcl_fp_subNF7Lbmi2
-
-	.globl	mcl_fpDbl_add7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add7Lbmi2,@function
-mcl_fpDbl_add7Lbmi2:                    # @mcl_fpDbl_add7Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	104(%rdx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	96(%rdx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	88(%rdx), %r11
-	movq	80(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r12
-	movq	16(%rdx), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %r9
-	adcq	24(%rdx), %r15
-	adcq	32(%rdx), %r12
-	movq	72(%rdx), %r13
-	movq	64(%rdx), %rbp
-	movq	%rax, (%rdi)
-	movq	56(%rdx), %r10
-	movq	%rbx, 8(%rdi)
-	movq	48(%rdx), %rcx
-	movq	40(%rdx), %rdx
-	movq	%r9, 16(%rdi)
-	movq	104(%rsi), %r9
-	movq	%r15, 24(%rdi)
-	movq	40(%rsi), %rbx
-	adcq	%rdx, %rbx
-	movq	96(%rsi), %r15
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %rdx
-	adcq	%rcx, %rdx
-	movq	88(%rsi), %rax
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rcx
-	adcq	%r10, %rcx
-	movq	80(%rsi), %r12
-	movq	%rdx, 48(%rdi)
-	movq	72(%rsi), %rdx
-	movq	64(%rsi), %rsi
-	adcq	%rbp, %rsi
-	adcq	%r13, %rdx
-	adcq	%r14, %r12
-	adcq	%r11, %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	adcq	-24(%rsp), %r15         # 8-byte Folded Reload
-	movq	%r15, -24(%rsp)         # 8-byte Spill
-	adcq	-16(%rsp), %r9          # 8-byte Folded Reload
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	movq	%rcx, %rbx
-	subq	(%r8), %rbx
-	movq	%rsi, %r10
-	sbbq	8(%r8), %r10
-	movq	%rdx, %r11
-	sbbq	16(%r8), %r11
-	movq	%r12, %r14
-	sbbq	24(%r8), %r14
-	movq	-8(%rsp), %r13          # 8-byte Reload
-	sbbq	32(%r8), %r13
-	sbbq	40(%r8), %r15
-	movq	%r9, %rax
-	sbbq	48(%r8), %rax
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	cmovneq	%rcx, %rbx
-	movq	%rbx, 56(%rdi)
-	testb	%bpl, %bpl
-	cmovneq	%rsi, %r10
-	movq	%r10, 64(%rdi)
-	cmovneq	%rdx, %r11
-	movq	%r11, 72(%rdi)
-	cmovneq	%r12, %r14
-	movq	%r14, 80(%rdi)
-	cmovneq	-8(%rsp), %r13          # 8-byte Folded Reload
-	movq	%r13, 88(%rdi)
-	cmovneq	-24(%rsp), %r15         # 8-byte Folded Reload
-	movq	%r15, 96(%rdi)
-	cmovneq	%r9, %rax
-	movq	%rax, 104(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end108:
-	.size	mcl_fpDbl_add7Lbmi2, .Lfunc_end108-mcl_fpDbl_add7Lbmi2
-
-	.globl	mcl_fpDbl_sub7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub7Lbmi2,@function
-mcl_fpDbl_sub7Lbmi2:                    # @mcl_fpDbl_sub7Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	104(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	96(%rdx), %r10
-	movq	88(%rdx), %r14
-	movq	16(%rsi), %rax
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%ecx, %ecx
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %rax
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	80(%rdx), %r13
-	movq	72(%rdx), %rbp
-	movq	%r15, (%rdi)
-	movq	64(%rdx), %r9
-	movq	%r11, 8(%rdi)
-	movq	56(%rdx), %r15
-	movq	%rax, 16(%rdi)
-	movq	48(%rdx), %r11
-	movq	40(%rdx), %rdx
-	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%rdx, %rbx
-	movq	104(%rsi), %rax
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %r12
-	sbbq	%r11, %r12
-	movq	96(%rsi), %r11
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rdx
-	sbbq	%r15, %rdx
-	movq	88(%rsi), %r15
-	movq	%r12, 48(%rdi)
-	movq	64(%rsi), %rbx
-	sbbq	%r9, %rbx
-	movq	80(%rsi), %r12
-	movq	72(%rsi), %r9
-	sbbq	%rbp, %r9
-	sbbq	%r13, %r12
-	sbbq	%r14, %r15
-	sbbq	%r10, %r11
-	sbbq	-8(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%r8), %r10
-	cmoveq	%rcx, %r10
-	testb	%bpl, %bpl
-	movq	16(%r8), %rbp
-	cmoveq	%rcx, %rbp
-	movq	8(%r8), %rsi
-	cmoveq	%rcx, %rsi
-	movq	48(%r8), %r14
-	cmoveq	%rcx, %r14
-	movq	40(%r8), %r13
-	cmoveq	%rcx, %r13
-	movq	32(%r8), %rax
-	cmoveq	%rcx, %rax
-	cmovneq	24(%r8), %rcx
-	addq	%rdx, %r10
-	adcq	%rbx, %rsi
-	movq	%r10, 56(%rdi)
-	movq	%rsi, 64(%rdi)
-	adcq	%r9, %rbp
-	movq	%rbp, 72(%rdi)
-	adcq	%r12, %rcx
-	movq	%rcx, 80(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 88(%rdi)
-	adcq	%r11, %r13
-	movq	%r13, 96(%rdi)
-	adcq	-8(%rsp), %r14          # 8-byte Folded Reload
-	movq	%r14, 104(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end109:
-	.size	mcl_fpDbl_sub7Lbmi2, .Lfunc_end109-mcl_fpDbl_sub7Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv512x64,@function
-.LmulPv512x64:                          # @mulPv512x64
-# BB#0:
-	mulxq	(%rsi), %rcx, %rax
-	movq	%rcx, (%rdi)
-	mulxq	8(%rsi), %rcx, %r8
-	addq	%rax, %rcx
-	movq	%rcx, 8(%rdi)
-	mulxq	16(%rsi), %rcx, %r9
-	adcq	%r8, %rcx
-	movq	%rcx, 16(%rdi)
-	mulxq	24(%rsi), %rax, %rcx
-	adcq	%r9, %rax
-	movq	%rax, 24(%rdi)
-	mulxq	32(%rsi), %rax, %r8
-	adcq	%rcx, %rax
-	movq	%rax, 32(%rdi)
-	mulxq	40(%rsi), %rcx, %r9
-	adcq	%r8, %rcx
-	movq	%rcx, 40(%rdi)
-	mulxq	48(%rsi), %rax, %rcx
-	adcq	%r9, %rax
-	movq	%rax, 48(%rdi)
-	mulxq	56(%rsi), %rax, %rdx
-	adcq	%rcx, %rax
-	movq	%rax, 56(%rdi)
-	adcq	$0, %rdx
-	movq	%rdx, 64(%rdi)
-	movq	%rdi, %rax
-	retq
-.Lfunc_end110:
-	.size	.LmulPv512x64, .Lfunc_end110-.LmulPv512x64
-
-	.globl	mcl_fp_mulUnitPre8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre8Lbmi2,@function
-mcl_fp_mulUnitPre8Lbmi2:                # @mcl_fp_mulUnitPre8Lbmi2
-# BB#0:
-	pushq	%rbx
-	subq	$80, %rsp
-	movq	%rdi, %rbx
-	leaq	8(%rsp), %rdi
-	callq	.LmulPv512x64
-	movq	72(%rsp), %r8
-	movq	64(%rsp), %r9
-	movq	56(%rsp), %r10
-	movq	48(%rsp), %r11
-	movq	40(%rsp), %rdi
-	movq	32(%rsp), %rax
-	movq	24(%rsp), %rcx
-	movq	8(%rsp), %rdx
-	movq	16(%rsp), %rsi
-	movq	%rdx, (%rbx)
-	movq	%rsi, 8(%rbx)
-	movq	%rcx, 16(%rbx)
-	movq	%rax, 24(%rbx)
-	movq	%rdi, 32(%rbx)
-	movq	%r11, 40(%rbx)
-	movq	%r10, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	movq	%r8, 64(%rbx)
-	addq	$80, %rsp
-	popq	%rbx
-	retq
-.Lfunc_end111:
-	.size	mcl_fp_mulUnitPre8Lbmi2, .Lfunc_end111-mcl_fp_mulUnitPre8Lbmi2
-
-	.globl	mcl_fpDbl_mulPre8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre8Lbmi2,@function
-mcl_fpDbl_mulPre8Lbmi2:                 # @mcl_fpDbl_mulPre8Lbmi2
-# BB#0:
-	pushq	%rbp
-	movq	%rsp, %rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$200, %rsp
-	movq	%rdx, %rbx
-	movq	%rsi, %r15
-	movq	%rdi, %r14
-	callq	mcl_fpDbl_mulPre4Lbmi2@PLT
-	leaq	64(%r14), %rdi
-	leaq	32(%r15), %rsi
-	leaq	32(%rbx), %rdx
-	callq	mcl_fpDbl_mulPre4Lbmi2@PLT
-	movq	56(%rbx), %r10
-	movq	48(%rbx), %rcx
-	movq	(%rbx), %rdx
-	movq	8(%rbx), %rsi
-	addq	32(%rbx), %rdx
-	adcq	40(%rbx), %rsi
-	adcq	16(%rbx), %rcx
-	adcq	24(%rbx), %r10
-	pushfq
-	popq	%r8
-	xorl	%r9d, %r9d
-	movq	56(%r15), %rdi
-	movq	48(%r15), %r13
-	movq	(%r15), %r12
-	movq	8(%r15), %rbx
-	addq	32(%r15), %r12
-	adcq	40(%r15), %rbx
-	adcq	16(%r15), %r13
-	adcq	24(%r15), %rdi
-	movl	$0, %eax
-	cmovbq	%r10, %rax
-	movq	%rax, -176(%rbp)        # 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rcx, %rax
-	movq	%rax, -184(%rbp)        # 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rsi, %rax
-	movq	%rax, -192(%rbp)        # 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rdx, %rax
-	movq	%rax, -200(%rbp)        # 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%r12, -136(%rbp)
-	movq	%rbx, -128(%rbp)
-	movq	%r13, -120(%rbp)
-	movq	%rdi, -112(%rbp)
-	movq	%rdx, -168(%rbp)
-	movq	%rsi, -160(%rbp)
-	movq	%rcx, -152(%rbp)
-	movq	%r10, -144(%rbp)
-	pushq	%r8
-	popfq
-	cmovaeq	%r9, %rdi
-	movq	%rdi, -216(%rbp)        # 8-byte Spill
-	cmovaeq	%r9, %r13
-	cmovaeq	%r9, %rbx
-	cmovaeq	%r9, %r12
-	sbbq	%rax, %rax
-	movq	%rax, -208(%rbp)        # 8-byte Spill
-	leaq	-104(%rbp), %rdi
-	leaq	-136(%rbp), %rsi
-	leaq	-168(%rbp), %rdx
-	callq	mcl_fpDbl_mulPre4Lbmi2@PLT
-	addq	-200(%rbp), %r12        # 8-byte Folded Reload
-	adcq	-192(%rbp), %rbx        # 8-byte Folded Reload
-	adcq	-184(%rbp), %r13        # 8-byte Folded Reload
-	movq	-216(%rbp), %r10        # 8-byte Reload
-	adcq	-176(%rbp), %r10        # 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	-208(%rbp), %rdx        # 8-byte Reload
-	andl	%edx, %r15d
-	andl	$1, %r15d
-	addq	-72(%rbp), %r12
-	adcq	-64(%rbp), %rbx
-	adcq	-56(%rbp), %r13
-	adcq	-48(%rbp), %r10
-	adcq	%rax, %r15
-	movq	-80(%rbp), %rax
-	movq	-88(%rbp), %rcx
-	movq	-104(%rbp), %rsi
-	movq	-96(%rbp), %rdx
-	subq	(%r14), %rsi
-	sbbq	8(%r14), %rdx
-	sbbq	16(%r14), %rcx
-	sbbq	24(%r14), %rax
-	movq	32(%r14), %rdi
-	movq	%rdi, -184(%rbp)        # 8-byte Spill
-	movq	40(%r14), %r8
-	movq	%r8, -176(%rbp)         # 8-byte Spill
-	sbbq	%rdi, %r12
-	sbbq	%r8, %rbx
-	movq	48(%r14), %rdi
-	movq	%rdi, -192(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %r13
-	movq	56(%r14), %rdi
-	movq	%rdi, -200(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %r10
-	sbbq	$0, %r15
-	movq	64(%r14), %r11
-	subq	%r11, %rsi
-	movq	72(%r14), %rdi
-	movq	%rdi, -208(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %rdx
-	movq	80(%r14), %rdi
-	movq	%rdi, -216(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %rcx
-	movq	88(%r14), %rdi
-	movq	%rdi, -224(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %rax
-	movq	96(%r14), %rdi
-	movq	%rdi, -232(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %r12
-	movq	104(%r14), %rdi
-	sbbq	%rdi, %rbx
-	movq	112(%r14), %r8
-	sbbq	%r8, %r13
-	movq	120(%r14), %r9
-	sbbq	%r9, %r10
-	sbbq	$0, %r15
-	addq	-184(%rbp), %rsi        # 8-byte Folded Reload
-	adcq	-176(%rbp), %rdx        # 8-byte Folded Reload
-	movq	%rsi, 32(%r14)
-	adcq	-192(%rbp), %rcx        # 8-byte Folded Reload
-	movq	%rdx, 40(%r14)
-	adcq	-200(%rbp), %rax        # 8-byte Folded Reload
-	movq	%rcx, 48(%r14)
-	adcq	%r11, %r12
-	movq	%rax, 56(%r14)
-	movq	%r12, 64(%r14)
-	adcq	-208(%rbp), %rbx        # 8-byte Folded Reload
-	movq	%rbx, 72(%r14)
-	adcq	-216(%rbp), %r13        # 8-byte Folded Reload
-	movq	%r13, 80(%r14)
-	adcq	-224(%rbp), %r10        # 8-byte Folded Reload
-	movq	%r10, 88(%r14)
-	adcq	-232(%rbp), %r15        # 8-byte Folded Reload
-	movq	%r15, 96(%r14)
-	adcq	$0, %rdi
-	movq	%rdi, 104(%r14)
-	adcq	$0, %r8
-	movq	%r8, 112(%r14)
-	adcq	$0, %r9
-	movq	%r9, 120(%r14)
-	addq	$200, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end112:
-	.size	mcl_fpDbl_mulPre8Lbmi2, .Lfunc_end112-mcl_fpDbl_mulPre8Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre8Lbmi2,@function
-mcl_fpDbl_sqrPre8Lbmi2:                 # @mcl_fpDbl_sqrPre8Lbmi2
-# BB#0:
-	pushq	%rbp
-	movq	%rsp, %rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$200, %rsp
-	movq	%rsi, %r14
-	movq	%rdi, %rbx
-	movq	%r14, %rdx
-	callq	mcl_fpDbl_mulPre4Lbmi2@PLT
-	leaq	64(%rbx), %rdi
-	leaq	32(%r14), %rsi
-	movq	%rsi, %rdx
-	callq	mcl_fpDbl_mulPre4Lbmi2@PLT
-	movq	(%r14), %r12
-	movq	8(%r14), %r15
-	addq	32(%r14), %r12
-	adcq	40(%r14), %r15
-	pushfq
-	popq	%rax
-	movq	%r12, -136(%rbp)
-	movq	%r12, -168(%rbp)
-	addq	%r12, %r12
-	movq	%r15, -128(%rbp)
-	movq	%r15, -160(%rbp)
-	adcq	%r15, %r15
-	pushfq
-	popq	%rcx
-	movq	56(%r14), %r13
-	movq	48(%r14), %rdx
-	pushq	%rax
-	popfq
-	adcq	16(%r14), %rdx
-	adcq	24(%r14), %r13
-	pushfq
-	popq	%r8
-	pushfq
-	popq	%rsi
-	pushfq
-	popq	%rdi
-	sbbq	%rax, %rax
-	movq	%rax, -184(%rbp)        # 8-byte Spill
-	xorl	%eax, %eax
-	pushq	%rdi
-	popfq
-	cmovaeq	%rax, %r15
-	movq	%r15, -176(%rbp)        # 8-byte Spill
-	cmovaeq	%rax, %r12
-	movq	%rdx, -120(%rbp)
-	movq	%rdx, -152(%rbp)
-	movq	%rdx, %r15
-	pushq	%rcx
-	popfq
-	adcq	%r15, %r15
-	movq	%r13, %r14
-	movq	%r13, -112(%rbp)
-	movq	%r13, -144(%rbp)
-	adcq	%r13, %r13
-	pushq	%rsi
-	popfq
-	cmovaeq	%rax, %r13
-	cmovaeq	%rax, %r15
-	shrq	$63, %r14
-	pushq	%r8
-	popfq
-	cmovaeq	%rax, %r14
-	leaq	-104(%rbp), %rdi
-	leaq	-136(%rbp), %rsi
-	leaq	-168(%rbp), %rdx
-	callq	mcl_fpDbl_mulPre4Lbmi2@PLT
-	movq	-184(%rbp), %rax        # 8-byte Reload
-	andl	$1, %eax
-	addq	-72(%rbp), %r12
-	movq	-176(%rbp), %r8         # 8-byte Reload
-	adcq	-64(%rbp), %r8
-	adcq	-56(%rbp), %r15
-	adcq	-48(%rbp), %r13
-	adcq	%r14, %rax
-	movq	%rax, %rdi
-	movq	-80(%rbp), %rax
-	movq	-88(%rbp), %rcx
-	movq	-104(%rbp), %rsi
-	movq	-96(%rbp), %rdx
-	subq	(%rbx), %rsi
-	sbbq	8(%rbx), %rdx
-	sbbq	16(%rbx), %rcx
-	sbbq	24(%rbx), %rax
-	movq	32(%rbx), %r10
-	movq	%r10, -184(%rbp)        # 8-byte Spill
-	movq	40(%rbx), %r9
-	movq	%r9, -176(%rbp)         # 8-byte Spill
-	sbbq	%r10, %r12
-	sbbq	%r9, %r8
-	movq	%r8, %r10
-	movq	48(%rbx), %r8
-	movq	%r8, -192(%rbp)         # 8-byte Spill
-	sbbq	%r8, %r15
-	movq	56(%rbx), %r8
-	movq	%r8, -200(%rbp)         # 8-byte Spill
-	sbbq	%r8, %r13
-	sbbq	$0, %rdi
-	movq	64(%rbx), %r11
-	subq	%r11, %rsi
-	movq	72(%rbx), %r8
-	movq	%r8, -208(%rbp)         # 8-byte Spill
-	sbbq	%r8, %rdx
-	movq	80(%rbx), %r8
-	movq	%r8, -216(%rbp)         # 8-byte Spill
-	sbbq	%r8, %rcx
-	movq	88(%rbx), %r8
-	movq	%r8, -224(%rbp)         # 8-byte Spill
-	sbbq	%r8, %rax
-	movq	96(%rbx), %r8
-	movq	%r8, -232(%rbp)         # 8-byte Spill
-	sbbq	%r8, %r12
-	movq	104(%rbx), %r14
-	sbbq	%r14, %r10
-	movq	112(%rbx), %r8
-	sbbq	%r8, %r15
-	movq	120(%rbx), %r9
-	sbbq	%r9, %r13
-	sbbq	$0, %rdi
-	addq	-184(%rbp), %rsi        # 8-byte Folded Reload
-	adcq	-176(%rbp), %rdx        # 8-byte Folded Reload
-	movq	%rsi, 32(%rbx)
-	adcq	-192(%rbp), %rcx        # 8-byte Folded Reload
-	movq	%rdx, 40(%rbx)
-	adcq	-200(%rbp), %rax        # 8-byte Folded Reload
-	movq	%rcx, 48(%rbx)
-	adcq	%r11, %r12
-	movq	%rax, 56(%rbx)
-	movq	%r12, 64(%rbx)
-	adcq	-208(%rbp), %r10        # 8-byte Folded Reload
-	movq	%r10, 72(%rbx)
-	adcq	-216(%rbp), %r15        # 8-byte Folded Reload
-	movq	%r15, 80(%rbx)
-	adcq	-224(%rbp), %r13        # 8-byte Folded Reload
-	movq	%r13, 88(%rbx)
-	adcq	-232(%rbp), %rdi        # 8-byte Folded Reload
-	movq	%rdi, 96(%rbx)
-	adcq	$0, %r14
-	movq	%r14, 104(%rbx)
-	adcq	$0, %r8
-	movq	%r8, 112(%rbx)
-	adcq	$0, %r9
-	movq	%r9, 120(%rbx)
-	addq	$200, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end113:
-	.size	mcl_fpDbl_sqrPre8Lbmi2, .Lfunc_end113-mcl_fpDbl_sqrPre8Lbmi2
-
-	.globl	mcl_fp_mont8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont8Lbmi2,@function
-mcl_fp_mont8Lbmi2:                      # @mcl_fp_mont8Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$1256, %rsp             # imm = 0x4E8
-	movq	%rcx, %r13
-	movq	%r13, 40(%rsp)          # 8-byte Spill
-	movq	%rdx, 16(%rsp)          # 8-byte Spill
-	movq	%rsi, 24(%rsp)          # 8-byte Spill
-	movq	%rdi, (%rsp)            # 8-byte Spill
-	movq	-8(%r13), %rbx
-	movq	%rbx, 32(%rsp)          # 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1184(%rsp), %rdi
-	callq	.LmulPv512x64
-	movq	1184(%rsp), %r15
-	movq	1192(%rsp), %r14
-	movq	%r15, %rdx
-	imulq	%rbx, %rdx
-	movq	1248(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	1240(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	1232(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	1224(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	1216(%rsp), %r12
-	movq	1208(%rsp), %rbx
-	movq	1200(%rsp), %rbp
-	leaq	1112(%rsp), %rdi
-	movq	%r13, %rsi
-	callq	.LmulPv512x64
-	addq	1112(%rsp), %r15
-	adcq	1120(%rsp), %r14
-	adcq	1128(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           # 8-byte Spill
-	adcq	1136(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	adcq	1144(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %r13          # 8-byte Reload
-	adcq	1152(%rsp), %r13
-	movq	88(%rsp), %rbx          # 8-byte Reload
-	adcq	1160(%rsp), %rbx
-	movq	80(%rsp), %rbp          # 8-byte Reload
-	adcq	1168(%rsp), %rbp
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	1176(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	sbbq	%r15, %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1040(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %r15d
-	addq	1040(%rsp), %r14
-	movq	8(%rsp), %rax           # 8-byte Reload
-	adcq	1048(%rsp), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	48(%rsp), %rax          # 8-byte Reload
-	adcq	1056(%rsp), %rax
-	movq	%rax, %r12
-	movq	64(%rsp), %rax          # 8-byte Reload
-	adcq	1064(%rsp), %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	adcq	1072(%rsp), %r13
-	movq	%r13, 72(%rsp)          # 8-byte Spill
-	adcq	1080(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	adcq	1088(%rsp), %rbp
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	1096(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	adcq	1104(%rsp), %r15
-	movq	%r15, 56(%rsp)          # 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%r14, %rdx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	968(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %r15d
-	addq	968(%rsp), %r14
-	movq	8(%rsp), %r13           # 8-byte Reload
-	adcq	976(%rsp), %r13
-	adcq	984(%rsp), %r12
-	movq	%r12, 48(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r14          # 8-byte Reload
-	adcq	992(%rsp), %r14
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	adcq	1000(%rsp), %rbx
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	1008(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	adcq	1016(%rsp), %rbp
-	movq	%rbp, %r12
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	1024(%rsp), %rbp
-	movq	56(%rsp), %rax          # 8-byte Reload
-	adcq	1032(%rsp), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	896(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	%r13, %rcx
-	addq	896(%rsp), %rcx
-	movq	48(%rsp), %r13          # 8-byte Reload
-	adcq	904(%rsp), %r13
-	adcq	912(%rsp), %r14
-	adcq	920(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	928(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	adcq	936(%rsp), %r12
-	movq	%r12, 80(%rsp)          # 8-byte Spill
-	adcq	944(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	movq	56(%rsp), %r12          # 8-byte Reload
-	adcq	952(%rsp), %r12
-	adcq	960(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	%rcx, %rdx
-	movq	%rcx, %rbp
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	824(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %ebx
-	addq	824(%rsp), %rbp
-	adcq	832(%rsp), %r13
-	movq	%r13, 48(%rsp)          # 8-byte Spill
-	adcq	840(%rsp), %r14
-	movq	%r14, 64(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %r13          # 8-byte Reload
-	adcq	848(%rsp), %r13
-	movq	88(%rsp), %rbp          # 8-byte Reload
-	adcq	856(%rsp), %rbp
-	movq	80(%rsp), %r14          # 8-byte Reload
-	adcq	864(%rsp), %r14
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	872(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	adcq	880(%rsp), %r12
-	adcq	888(%rsp), %r15
-	adcq	$0, %rbx
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	752(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	48(%rsp), %rax          # 8-byte Reload
-	addq	752(%rsp), %rax
-	movq	64(%rsp), %rcx          # 8-byte Reload
-	adcq	760(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 72(%rsp)          # 8-byte Spill
-	adcq	776(%rsp), %rbp
-	movq	%rbp, 88(%rsp)          # 8-byte Spill
-	adcq	784(%rsp), %r14
-	movq	%r14, 80(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	792(%rsp), %rbp
-	adcq	800(%rsp), %r12
-	adcq	808(%rsp), %r15
-	adcq	816(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	680(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	%r13, %rax
-	andl	$1, %eax
-	addq	680(%rsp), %rbx
-	movq	64(%rsp), %r14          # 8-byte Reload
-	adcq	688(%rsp), %r14
-	movq	72(%rsp), %rcx          # 8-byte Reload
-	adcq	696(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %r13          # 8-byte Reload
-	adcq	704(%rsp), %r13
-	movq	80(%rsp), %rbx          # 8-byte Reload
-	adcq	712(%rsp), %rbx
-	adcq	720(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	movq	%r12, %rbp
-	adcq	728(%rsp), %rbp
-	adcq	736(%rsp), %r15
-	movq	48(%rsp), %r12          # 8-byte Reload
-	adcq	744(%rsp), %r12
-	adcq	$0, %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	608(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	%r14, %rax
-	addq	608(%rsp), %rax
-	movq	72(%rsp), %r14          # 8-byte Reload
-	adcq	616(%rsp), %r14
-	adcq	624(%rsp), %r13
-	movq	%r13, 88(%rsp)          # 8-byte Spill
-	adcq	632(%rsp), %rbx
-	movq	%rbx, %r13
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	640(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	adcq	648(%rsp), %rbp
-	movq	%rbp, 56(%rsp)          # 8-byte Spill
-	adcq	656(%rsp), %r15
-	adcq	664(%rsp), %r12
-	movq	%r12, 48(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %rcx          # 8-byte Reload
-	adcq	672(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
-	sbbq	%rbp, %rbp
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	536(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	%rbp, %rax
-	andl	$1, %eax
-	addq	536(%rsp), %rbx
-	adcq	544(%rsp), %r14
-	movq	%r14, 72(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %rbx          # 8-byte Reload
-	adcq	552(%rsp), %rbx
-	adcq	560(%rsp), %r13
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	568(%rsp), %rbp
-	movq	56(%rsp), %r12          # 8-byte Reload
-	adcq	576(%rsp), %r12
-	adcq	584(%rsp), %r15
-	movq	48(%rsp), %rcx          # 8-byte Reload
-	adcq	592(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r14          # 8-byte Reload
-	adcq	600(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	464(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	72(%rsp), %rax          # 8-byte Reload
-	addq	464(%rsp), %rax
-	adcq	472(%rsp), %rbx
-	adcq	480(%rsp), %r13
-	movq	%r13, 80(%rsp)          # 8-byte Spill
-	adcq	488(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	adcq	496(%rsp), %r12
-	adcq	504(%rsp), %r15
-	movq	%r15, 72(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r15          # 8-byte Reload
-	adcq	512(%rsp), %r15
-	adcq	520(%rsp), %r14
-	movq	%r14, 64(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	528(%rsp), %r14
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
-	movq	%rax, %rbp
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	392(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	%r13, %rax
-	andl	$1, %eax
-	addq	392(%rsp), %rbp
-	adcq	400(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %rbp          # 8-byte Reload
-	adcq	408(%rsp), %rbp
-	movq	96(%rsp), %rbx          # 8-byte Reload
-	adcq	416(%rsp), %rbx
-	adcq	424(%rsp), %r12
-	movq	72(%rsp), %r13          # 8-byte Reload
-	adcq	432(%rsp), %r13
-	adcq	440(%rsp), %r15
-	movq	%r15, 48(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r15          # 8-byte Reload
-	adcq	448(%rsp), %r15
-	adcq	456(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	320(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	88(%rsp), %rax          # 8-byte Reload
-	addq	320(%rsp), %rax
-	adcq	328(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	adcq	336(%rsp), %rbx
-	movq	%rbx, 96(%rsp)          # 8-byte Spill
-	movq	%r12, %rbp
-	adcq	344(%rsp), %rbp
-	adcq	352(%rsp), %r13
-	movq	48(%rsp), %r12          # 8-byte Reload
-	adcq	360(%rsp), %r12
-	adcq	368(%rsp), %r15
-	movq	%r15, 64(%rsp)          # 8-byte Spill
-	adcq	376(%rsp), %r14
-	movq	%r14, 88(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %rcx          # 8-byte Reload
-	adcq	384(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	248(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %r15d
-	addq	248(%rsp), %rbx
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	256(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %r14          # 8-byte Reload
-	adcq	264(%rsp), %r14
-	adcq	272(%rsp), %rbp
-	movq	%rbp, 56(%rsp)          # 8-byte Spill
-	movq	%r13, %rbx
-	adcq	280(%rsp), %rbx
-	movq	%r12, %rbp
-	adcq	288(%rsp), %rbp
-	movq	64(%rsp), %r13          # 8-byte Reload
-	adcq	296(%rsp), %r13
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	304(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	312(%rsp), %r12
-	adcq	$0, %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	176(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	80(%rsp), %rax          # 8-byte Reload
-	addq	176(%rsp), %rax
-	adcq	184(%rsp), %r14
-	movq	%r14, 96(%rsp)          # 8-byte Spill
-	movq	56(%rsp), %rcx          # 8-byte Reload
-	adcq	192(%rsp), %rcx
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	200(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	adcq	208(%rsp), %rbp
-	adcq	216(%rsp), %r13
-	movq	%r13, 64(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	224(%rsp), %r14
-	adcq	232(%rsp), %r12
-	adcq	240(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	32(%rsp), %rdx          # 8-byte Reload
-	imulq	%rax, %rdx
-	movq	%rax, %r13
-	leaq	104(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %ebx
-	addq	104(%rsp), %r13
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	112(%rsp), %rcx
-	movq	56(%rsp), %rdx          # 8-byte Reload
-	adcq	120(%rsp), %rdx
-	movq	72(%rsp), %rsi          # 8-byte Reload
-	adcq	128(%rsp), %rsi
-	movq	%rbp, %rdi
-	adcq	136(%rsp), %rdi
-	movq	%rdi, 48(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r8           # 8-byte Reload
-	adcq	144(%rsp), %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	%r14, %r9
-	adcq	152(%rsp), %r9
-	movq	%r9, 88(%rsp)           # 8-byte Spill
-	adcq	160(%rsp), %r12
-	adcq	168(%rsp), %r15
-	adcq	$0, %rbx
-	movq	%rcx, %rax
-	movq	%rcx, %r11
-	movq	40(%rsp), %rbp          # 8-byte Reload
-	subq	(%rbp), %rax
-	movq	%rdx, %rcx
-	movq	%rdx, %r14
-	sbbq	8(%rbp), %rcx
-	movq	%rsi, %rdx
-	movq	%rsi, %r13
-	sbbq	16(%rbp), %rdx
-	movq	%rdi, %rsi
-	sbbq	24(%rbp), %rsi
-	movq	%r8, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%r9, %r10
-	sbbq	40(%rbp), %r10
-	movq	%r12, %r8
-	sbbq	48(%rbp), %r8
-	movq	%r15, %r9
-	sbbq	56(%rbp), %r9
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%r15, %r9
-	testb	%bl, %bl
-	cmovneq	%r11, %rax
-	movq	(%rsp), %rbx            # 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovneq	%r14, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovneq	%r13, %rdx
-	movq	%rdx, 16(%rbx)
-	cmovneq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovneq	64(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rbx)
-	cmovneq	88(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 40(%rbx)
-	cmovneq	%r12, %r8
-	movq	%r8, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	addq	$1256, %rsp             # imm = 0x4E8
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end114:
-	.size	mcl_fp_mont8Lbmi2, .Lfunc_end114-mcl_fp_mont8Lbmi2
-
-	.globl	mcl_fp_montNF8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF8Lbmi2,@function
-mcl_fp_montNF8Lbmi2:                    # @mcl_fp_montNF8Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$1240, %rsp             # imm = 0x4D8
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	movq	%rdx, 16(%rsp)          # 8-byte Spill
-	movq	%rsi, 24(%rsp)          # 8-byte Spill
-	movq	%rdi, (%rsp)            # 8-byte Spill
-	movq	-8(%rcx), %rbx
-	movq	%rbx, 32(%rsp)          # 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1168(%rsp), %rdi
-	callq	.LmulPv512x64
-	movq	1168(%rsp), %r15
-	movq	1176(%rsp), %r12
-	movq	%r15, %rdx
-	imulq	%rbx, %rdx
-	movq	1232(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	1224(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	1216(%rsp), %r13
-	movq	1208(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	1200(%rsp), %r14
-	movq	1192(%rsp), %rbp
-	movq	1184(%rsp), %rbx
-	leaq	1096(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	1096(%rsp), %r15
-	adcq	1104(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	adcq	1112(%rsp), %rbx
-	adcq	1120(%rsp), %rbp
-	adcq	1128(%rsp), %r14
-	movq	%r14, %r12
-	movq	72(%rsp), %r14          # 8-byte Reload
-	adcq	1136(%rsp), %r14
-	adcq	1144(%rsp), %r13
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	1152(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rax          # 8-byte Reload
-	adcq	1160(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1024(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	1088(%rsp), %r15
-	movq	64(%rsp), %rax          # 8-byte Reload
-	addq	1024(%rsp), %rax
-	adcq	1032(%rsp), %rbx
-	movq	%rbx, 8(%rsp)           # 8-byte Spill
-	movq	%rbp, %rbx
-	adcq	1040(%rsp), %rbx
-	adcq	1048(%rsp), %r12
-	adcq	1056(%rsp), %r14
-	movq	%r14, 72(%rsp)          # 8-byte Spill
-	movq	%r13, %rbp
-	adcq	1064(%rsp), %rbp
-	movq	80(%rsp), %rcx          # 8-byte Reload
-	adcq	1072(%rsp), %rcx
-	movq	%rcx, 80(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r14          # 8-byte Reload
-	adcq	1080(%rsp), %r14
-	adcq	$0, %r15
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	952(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	952(%rsp), %r13
-	movq	8(%rsp), %rax           # 8-byte Reload
-	adcq	960(%rsp), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	adcq	968(%rsp), %rbx
-	movq	%rbx, 64(%rsp)          # 8-byte Spill
-	movq	%r12, %rbx
-	adcq	976(%rsp), %rbx
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	984(%rsp), %r12
-	adcq	992(%rsp), %rbp
-	movq	%rbp, 40(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %r13          # 8-byte Reload
-	adcq	1000(%rsp), %r13
-	movq	%r14, %rbp
-	adcq	1008(%rsp), %rbp
-	adcq	1016(%rsp), %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	880(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	944(%rsp), %r14
-	movq	8(%rsp), %rax           # 8-byte Reload
-	addq	880(%rsp), %rax
-	movq	64(%rsp), %rcx          # 8-byte Reload
-	adcq	888(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
-	adcq	896(%rsp), %rbx
-	adcq	904(%rsp), %r12
-	movq	%r12, 72(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %rcx          # 8-byte Reload
-	adcq	912(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          # 8-byte Spill
-	adcq	920(%rsp), %r13
-	movq	%r13, 80(%rsp)          # 8-byte Spill
-	adcq	928(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          # 8-byte Spill
-	adcq	936(%rsp), %r15
-	adcq	$0, %r14
-	movq	%rax, %rdx
-	movq	%rax, %rbp
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	808(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	808(%rsp), %rbp
-	movq	64(%rsp), %r13          # 8-byte Reload
-	adcq	816(%rsp), %r13
-	movq	%rbx, %r12
-	adcq	824(%rsp), %r12
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	adcq	832(%rsp), %rbx
-	movq	40(%rsp), %rbp          # 8-byte Reload
-	adcq	840(%rsp), %rbp
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	848(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rax          # 8-byte Reload
-	adcq	856(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	adcq	864(%rsp), %r15
-	adcq	872(%rsp), %r14
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	736(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	800(%rsp), %rax
-	movq	%r13, %rcx
-	addq	736(%rsp), %rcx
-	adcq	744(%rsp), %r12
-	movq	%r12, 40(%rsp)          # 8-byte Spill
-	adcq	752(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	adcq	760(%rsp), %rbp
-	movq	%rbp, %r13
-	movq	80(%rsp), %rbp          # 8-byte Reload
-	adcq	768(%rsp), %rbp
-	movq	48(%rsp), %rbx          # 8-byte Reload
-	adcq	776(%rsp), %rbx
-	adcq	784(%rsp), %r15
-	adcq	792(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	%rcx, %rdx
-	movq	%rcx, %r12
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	664(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	664(%rsp), %r12
-	movq	40(%rsp), %rax          # 8-byte Reload
-	adcq	672(%rsp), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %rax          # 8-byte Reload
-	adcq	680(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	adcq	688(%rsp), %r13
-	adcq	696(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	adcq	704(%rsp), %rbx
-	adcq	712(%rsp), %r15
-	adcq	720(%rsp), %r14
-	movq	64(%rsp), %r12          # 8-byte Reload
-	adcq	728(%rsp), %r12
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	592(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	656(%rsp), %rcx
-	movq	40(%rsp), %rax          # 8-byte Reload
-	addq	592(%rsp), %rax
-	movq	72(%rsp), %rbp          # 8-byte Reload
-	adcq	600(%rsp), %rbp
-	adcq	608(%rsp), %r13
-	movq	%r13, 40(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %r13          # 8-byte Reload
-	adcq	616(%rsp), %r13
-	adcq	624(%rsp), %rbx
-	adcq	632(%rsp), %r15
-	adcq	640(%rsp), %r14
-	adcq	648(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, 80(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	520(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	520(%rsp), %r12
-	adcq	528(%rsp), %rbp
-	movq	%rbp, 72(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %r12          # 8-byte Reload
-	adcq	536(%rsp), %r12
-	movq	%r13, %rbp
-	adcq	544(%rsp), %rbp
-	adcq	552(%rsp), %rbx
-	adcq	560(%rsp), %r15
-	adcq	568(%rsp), %r14
-	movq	64(%rsp), %r13          # 8-byte Reload
-	adcq	576(%rsp), %r13
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	584(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	448(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	512(%rsp), %rcx
-	movq	72(%rsp), %rax          # 8-byte Reload
-	addq	448(%rsp), %rax
-	adcq	456(%rsp), %r12
-	movq	%r12, 40(%rsp)          # 8-byte Spill
-	adcq	464(%rsp), %rbp
-	adcq	472(%rsp), %rbx
-	adcq	480(%rsp), %r15
-	adcq	488(%rsp), %r14
-	adcq	496(%rsp), %r13
-	movq	%r13, 64(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %r13          # 8-byte Reload
-	adcq	504(%rsp), %r13
+	mulxq	-72(%rsp), %rdi, %rcx           # 8-byte Folded Reload
+	adcq	%rsi, %rdi
+	mulxq	-80(%rsp), %rsi, %rbp           # 8-byte Folded Reload
+	adcq	%rcx, %rsi
+	mulxq	-88(%rsp), %r8, %rcx            # 8-byte Folded Reload
+	adcq	%rbp, %r8
 	adcq	$0, %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	376(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	376(%rsp), %r12
-	movq	40(%rsp), %rax          # 8-byte Reload
-	adcq	384(%rsp), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	adcq	392(%rsp), %rbp
-	adcq	400(%rsp), %rbx
-	adcq	408(%rsp), %r15
-	adcq	416(%rsp), %r14
-	movq	64(%rsp), %r12          # 8-byte Reload
-	adcq	424(%rsp), %r12
-	adcq	432(%rsp), %r13
-	movq	72(%rsp), %rax          # 8-byte Reload
-	adcq	440(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	304(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	368(%rsp), %rcx
-	movq	40(%rsp), %rax          # 8-byte Reload
-	addq	304(%rsp), %rax
-	adcq	312(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	adcq	320(%rsp), %rbx
-	adcq	328(%rsp), %r15
-	adcq	336(%rsp), %r14
-	adcq	344(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	adcq	352(%rsp), %r13
-	movq	72(%rsp), %rbp          # 8-byte Reload
-	adcq	360(%rsp), %rbp
+	addq	%r11, %r13
+	adcq	%r9, %rax
+	adcq	%r10, %rbx
+	adcq	%r14, %rdi
+	adcq	%r15, %rsi
+	adcq	%r12, %r8
 	adcq	$0, %rcx
-	movq	%rcx, 48(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	232(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	232(%rsp), %r12
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	240(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	adcq	248(%rsp), %rbx
-	adcq	256(%rsp), %r15
-	adcq	264(%rsp), %r14
-	movq	64(%rsp), %r12          # 8-byte Reload
-	adcq	272(%rsp), %r12
-	adcq	280(%rsp), %r13
-	adcq	288(%rsp), %rbp
-	movq	%rbp, 72(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rbp          # 8-byte Reload
-	adcq	296(%rsp), %rbp
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	160(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	224(%rsp), %rcx
-	movq	80(%rsp), %rax          # 8-byte Reload
-	addq	160(%rsp), %rax
-	adcq	168(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	adcq	176(%rsp), %r15
-	adcq	184(%rsp), %r14
-	adcq	192(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	adcq	200(%rsp), %r13
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	adcq	208(%rsp), %rbx
-	adcq	216(%rsp), %rbp
-	movq	%rbp, %r12
+	movq	-104(%rsp), %rdx                # 8-byte Reload
+	imulq	%r13, %rdx
+	mulxq	-16(%rsp), %rbp, %r15           # 8-byte Folded Reload
+	addq	%r13, %rbp
+	mulxq	-24(%rsp), %r11, %rbp           # 8-byte Folded Reload
+	adcq	%rax, %r11
+	mulxq	-32(%rsp), %r9, %rax            # 8-byte Folded Reload
+	adcq	%rbx, %r9
+	mulxq	-40(%rsp), %r10, %rbx           # 8-byte Folded Reload
+	adcq	%rdi, %r10
+	mulxq	-48(%rsp), %r14, %rdi           # 8-byte Folded Reload
+	adcq	%rsi, %r14
+	mulxq	-56(%rsp), %rsi, %rdx           # 8-byte Folded Reload
+	adcq	%r8, %rsi
 	adcq	$0, %rcx
-	movq	%rcx, 80(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rdx          # 8-byte Reload
-	imulq	%rax, %rdx
-	movq	%rax, %rbp
-	leaq	88(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	88(%rsp), %rbp
-	movq	48(%rsp), %r11          # 8-byte Reload
-	adcq	96(%rsp), %r11
-	adcq	104(%rsp), %r15
-	adcq	112(%rsp), %r14
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	adcq	120(%rsp), %rsi
-	movq	%rsi, 64(%rsp)          # 8-byte Spill
-	adcq	128(%rsp), %r13
-	adcq	136(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	adcq	144(%rsp), %r12
-	movq	80(%rsp), %r8           # 8-byte Reload
-	adcq	152(%rsp), %r8
-	movq	%r11, %rax
-	movq	56(%rsp), %rbp          # 8-byte Reload
-	subq	(%rbp), %rax
-	movq	%r15, %rcx
-	sbbq	8(%rbp), %rcx
-	movq	%r14, %rdx
-	sbbq	16(%rbp), %rdx
-	sbbq	24(%rbp), %rsi
-	movq	%r13, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%rbx, %r9
-	sbbq	40(%rbp), %r9
-	movq	%r12, %r10
-	sbbq	48(%rbp), %r10
-	movq	%rbp, %rbx
-	movq	%r8, %rbp
-	sbbq	56(%rbx), %rbp
-	testq	%rbp, %rbp
-	cmovsq	%r11, %rax
-	movq	(%rsp), %rbx            # 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovsq	%r15, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovsq	%r14, %rdx
-	movq	%rdx, 16(%rbx)
-	cmovsq	64(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovsq	%r13, %rdi
-	movq	%rdi, 32(%rbx)
-	cmovsq	72(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 40(%rbx)
-	cmovsq	%r12, %r10
-	movq	%r10, 48(%rbx)
-	cmovsq	%r8, %rbp
-	movq	%rbp, 56(%rbx)
-	addq	$1240, %rsp             # imm = 0x4D8
+	addq	%r15, %r11
+	adcq	%rbp, %r9
+	adcq	%rax, %r10
+	adcq	%rbx, %r14
+	adcq	%rdi, %rsi
+	adcq	%rdx, %rcx
+	movq	-120(%rsp), %rax                # 8-byte Reload
+	movq	40(%rax), %rdx
+	mulxq	-128(%rsp), %rdi, %rax          # 8-byte Folded Reload
+	mulxq	-112(%rsp), %r13, %rbx          # 8-byte Folded Reload
+	addq	%rdi, %rbx
+	mulxq	-64(%rsp), %rdi, %rbp           # 8-byte Folded Reload
+	adcq	%rax, %rdi
+	mulxq	-72(%rsp), %r8, %rax            # 8-byte Folded Reload
+	adcq	%rbp, %r8
+	mulxq	-80(%rsp), %r15, %rbp           # 8-byte Folded Reload
+	adcq	%rax, %r15
+	mulxq	-88(%rsp), %r12, %rax           # 8-byte Folded Reload
+	adcq	%rbp, %r12
+	adcq	$0, %rax
+	addq	%r11, %r13
+	adcq	%r9, %rbx
+	adcq	%r10, %rdi
+	adcq	%r14, %r8
+	adcq	%rsi, %r15
+	adcq	%rcx, %r12
+	adcq	$0, %rax
+	movq	-104(%rsp), %rdx                # 8-byte Reload
+	imulq	%r13, %rdx
+	movq	-16(%rsp), %r9                  # 8-byte Reload
+	mulxq	%r9, %rcx, %rsi
+	movq	%rsi, -104(%rsp)                # 8-byte Spill
+	addq	%r13, %rcx
+	movq	-24(%rsp), %r10                 # 8-byte Reload
+	mulxq	%r10, %r13, %rcx
+	movq	%rcx, -112(%rsp)                # 8-byte Spill
+	adcq	%rbx, %r13
+	movq	-32(%rsp), %r11                 # 8-byte Reload
+	mulxq	%r11, %rbp, %rcx
+	movq	%rcx, -120(%rsp)                # 8-byte Spill
+	adcq	%rdi, %rbp
+	movq	%rdx, %rcx
+	movq	-40(%rsp), %rsi                 # 8-byte Reload
+	mulxq	%rsi, %rdi, %rdx
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	adcq	%r8, %rdi
+	movq	%rcx, %rdx
+	movq	-48(%rsp), %r14                 # 8-byte Reload
+	mulxq	%r14, %rbx, %r8
+	adcq	%r15, %rbx
+	movq	-56(%rsp), %rcx                 # 8-byte Reload
+	mulxq	%rcx, %r15, %rdx
+	adcq	%r12, %r15
+	adcq	$0, %rax
+	addq	-104(%rsp), %r13                # 8-byte Folded Reload
+	adcq	-112(%rsp), %rbp                # 8-byte Folded Reload
+	adcq	-120(%rsp), %rdi                # 8-byte Folded Reload
+	adcq	-128(%rsp), %rbx                # 8-byte Folded Reload
+	adcq	%r8, %r15
+	adcq	%rdx, %rax
+	movq	%r13, %r8
+	subq	%r9, %r8
+	movq	%rbp, %r9
+	sbbq	%r10, %r9
+	movq	%rdi, %r10
+	sbbq	%r11, %r10
+	movq	%rbx, %r11
+	sbbq	%rsi, %r11
+	movq	%r15, %rsi
+	sbbq	%r14, %rsi
+	movq	%rax, %rdx
+	sbbq	%rcx, %rdx
+	movq	%rdx, %rcx
+	sarq	$63, %rcx
+	cmovsq	%rax, %rdx
+	movq	-8(%rsp), %rax                  # 8-byte Reload
+	movq	%rdx, 40(%rax)
+	cmovsq	%r15, %rsi
+	movq	%rsi, 32(%rax)
+	cmovsq	%rbx, %r11
+	movq	%r11, 24(%rax)
+	cmovsq	%rdi, %r10
+	movq	%r10, 16(%rax)
+	cmovsq	%rbp, %r9
+	movq	%r9, 8(%rax)
+	cmovsq	%r13, %r8
+	movq	%r8, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -10187,374 +3531,214 @@ mcl_fp_montNF8Lbmi2:                    # @mcl_fp_montNF8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end115:
-	.size	mcl_fp_montNF8Lbmi2, .Lfunc_end115-mcl_fp_montNF8Lbmi2
-
-	.globl	mcl_fp_montRed8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed8Lbmi2,@function
-mcl_fp_montRed8Lbmi2:                   # @mcl_fp_montRed8Lbmi2
-# BB#0:
+.Lfunc_end44:
+	.size	mcl_fp_montNF6Lbmi2, .Lfunc_end44-mcl_fp_montNF6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRed6Lbmi2            # -- Begin function mcl_fp_montRed6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed6Lbmi2,@function
+mcl_fp_montRed6Lbmi2:                   # @mcl_fp_montRed6Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$776, %rsp              # imm = 0x308
-	movq	%rdx, %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	movq	%rdi, 72(%rsp)          # 8-byte Spill
-	movq	-8(%rax), %rcx
-	movq	%rcx, 128(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r15
-	movq	8(%rsi), %rdx
-	movq	%rdx, 184(%rsp)         # 8-byte Spill
-	movq	%r15, %rdx
-	imulq	%rcx, %rdx
-	movq	120(%rsi), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsi), %rcx
-	movq	%rcx, 136(%rsp)         # 8-byte Spill
-	movq	104(%rsi), %rcx
-	movq	%rcx, 120(%rsp)         # 8-byte Spill
-	movq	96(%rsi), %rcx
-	movq	%rcx, 168(%rsp)         # 8-byte Spill
-	movq	88(%rsi), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	movq	80(%rsi), %rcx
-	movq	%rcx, 160(%rsp)         # 8-byte Spill
-	movq	72(%rsi), %rcx
-	movq	%rcx, 192(%rsp)         # 8-byte Spill
-	movq	64(%rsi), %r13
-	movq	56(%rsi), %rcx
-	movq	%rcx, 144(%rsp)         # 8-byte Spill
-	movq	48(%rsi), %r14
-	movq	40(%rsi), %rcx
-	movq	%rcx, 152(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %r12
-	movq	24(%rsi), %rbx
-	movq	16(%rsi), %rbp
-	movq	%rax, %rcx
-	movq	(%rcx), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	movq	56(%rcx), %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	48(%rcx), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	40(%rcx), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	24(%rcx), %rax
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	%rcx, %rsi
-	leaq	704(%rsp), %rdi
-	callq	.LmulPv512x64
-	addq	704(%rsp), %r15
-	movq	184(%rsp), %rcx         # 8-byte Reload
-	adcq	712(%rsp), %rcx
-	adcq	720(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	adcq	728(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	adcq	736(%rsp), %r12
-	movq	%r12, 104(%rsp)         # 8-byte Spill
-	movq	152(%rsp), %rax         # 8-byte Reload
-	adcq	744(%rsp), %rax
-	movq	%rax, 152(%rsp)         # 8-byte Spill
-	adcq	752(%rsp), %r14
-	movq	%r14, %r12
-	movq	144(%rsp), %rax         # 8-byte Reload
-	adcq	760(%rsp), %rax
-	movq	%rax, 144(%rsp)         # 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 184(%rsp)         # 8-byte Spill
-	adcq	$0, 192(%rsp)           # 8-byte Folded Spill
-	movq	160(%rsp), %r15         # 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 176(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 168(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 120(%rsp)           # 8-byte Folded Spill
-	movq	136(%rsp), %r13         # 8-byte Reload
-	adcq	$0, %r13
-	movq	96(%rsp), %r14          # 8-byte Reload
-	adcq	$0, %r14
-	sbbq	%rbx, %rbx
-	movq	%rcx, %rbp
-	movq	%rbp, %rdx
-	imulq	128(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	632(%rsp), %rdi
-	movq	112(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	632(%rsp), %rbp
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	adcq	640(%rsp), %rsi
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	648(%rsp), %rcx
-	movq	%rcx, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %rcx         # 8-byte Reload
-	adcq	656(%rsp), %rcx
-	movq	%rcx, 104(%rsp)         # 8-byte Spill
-	movq	152(%rsp), %rcx         # 8-byte Reload
-	adcq	664(%rsp), %rcx
-	movq	%rcx, 152(%rsp)         # 8-byte Spill
-	adcq	672(%rsp), %r12
-	movq	144(%rsp), %rcx         # 8-byte Reload
-	adcq	680(%rsp), %rcx
-	movq	%rcx, 144(%rsp)         # 8-byte Spill
-	movq	184(%rsp), %rcx         # 8-byte Reload
-	adcq	688(%rsp), %rcx
-	movq	%rcx, 184(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rcx         # 8-byte Reload
-	adcq	696(%rsp), %rcx
-	movq	%rcx, 192(%rsp)         # 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, 160(%rsp)         # 8-byte Spill
-	movq	176(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	movq	168(%rsp), %r15         # 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 120(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r13
-	movq	%r13, 136(%rsp)         # 8-byte Spill
-	adcq	$0, %r14
-	movq	%r14, 96(%rsp)          # 8-byte Spill
-	movq	%rax, %rbp
-	adcq	$0, %rbp
-	movq	%rsi, %rdx
+	movq	%rdx, %rcx
+	movq	%rsi, %r11
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	-8(%rdx), %rax
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	(%rsi), %rdi
+	movq	%rdi, %rdx
+	imulq	%rax, %rdx
+	movq	40(%rcx), %rsi
+	movq	%rsi, -56(%rsp)                 # 8-byte Spill
+	mulxq	%rsi, %rax, %r12
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
+	movq	32(%rcx), %rsi
+	movq	%rsi, -64(%rsp)                 # 8-byte Spill
+	mulxq	%rsi, %rax, %r13
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	movq	24(%rcx), %rsi
+	mulxq	%rsi, %r8, %r15
 	movq	%rsi, %r14
-	imulq	128(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	560(%rsp), %rdi
-	movq	112(%rsp), %r13         # 8-byte Reload
-	movq	%r13, %rsi
-	callq	.LmulPv512x64
-	addq	560(%rsp), %r14
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	568(%rsp), %rcx
-	movq	104(%rsp), %rax         # 8-byte Reload
-	adcq	576(%rsp), %rax
-	movq	%rax, 104(%rsp)         # 8-byte Spill
-	movq	152(%rsp), %rax         # 8-byte Reload
-	adcq	584(%rsp), %rax
-	movq	%rax, 152(%rsp)         # 8-byte Spill
-	adcq	592(%rsp), %r12
-	movq	%r12, 88(%rsp)          # 8-byte Spill
-	movq	144(%rsp), %r14         # 8-byte Reload
-	adcq	600(%rsp), %r14
-	movq	184(%rsp), %rax         # 8-byte Reload
-	adcq	608(%rsp), %rax
-	movq	%rax, 184(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rax         # 8-byte Reload
-	adcq	616(%rsp), %rax
-	movq	%rax, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rax         # 8-byte Reload
-	adcq	624(%rsp), %rax
-	movq	%rax, 160(%rsp)         # 8-byte Spill
-	adcq	$0, %rbx
-	movq	%rbx, 176(%rsp)         # 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, 168(%rsp)         # 8-byte Spill
-	movq	120(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	movq	136(%rsp), %r15         # 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 96(%rsp)            # 8-byte Folded Spill
-	adcq	$0, %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	movq	%rcx, %rbp
-	movq	%rbp, %rdx
-	movq	128(%rsp), %r12         # 8-byte Reload
-	imulq	%r12, %rdx
-	leaq	488(%rsp), %rdi
-	movq	%r13, %rsi
-	callq	.LmulPv512x64
-	addq	488(%rsp), %rbp
-	movq	104(%rsp), %rax         # 8-byte Reload
-	adcq	496(%rsp), %rax
-	movq	152(%rsp), %rbp         # 8-byte Reload
-	adcq	504(%rsp), %rbp
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	512(%rsp), %rcx
-	movq	%rcx, 88(%rsp)          # 8-byte Spill
-	adcq	520(%rsp), %r14
-	movq	184(%rsp), %rcx         # 8-byte Reload
-	adcq	528(%rsp), %rcx
-	movq	%rcx, 184(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rcx         # 8-byte Reload
-	adcq	536(%rsp), %rcx
-	movq	%rcx, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %r13         # 8-byte Reload
-	adcq	544(%rsp), %r13
-	movq	176(%rsp), %rcx         # 8-byte Reload
-	adcq	552(%rsp), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	adcq	$0, 168(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 120(%rsp)         # 8-byte Spill
-	movq	%r15, %rbx
-	adcq	$0, %rbx
-	adcq	$0, 96(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            # 8-byte Folded Spill
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	%r12, %rdx
-	leaq	416(%rsp), %rdi
-	movq	112(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	416(%rsp), %r15
-	adcq	424(%rsp), %rbp
-	movq	%rbp, %rax
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 88(%rsp)          # 8-byte Spill
-	movq	%r14, %r12
-	adcq	440(%rsp), %r12
-	movq	184(%rsp), %r14         # 8-byte Reload
-	adcq	448(%rsp), %r14
-	movq	192(%rsp), %rbp         # 8-byte Reload
-	adcq	456(%rsp), %rbp
-	adcq	464(%rsp), %r13
-	movq	176(%rsp), %rcx         # 8-byte Reload
-	adcq	472(%rsp), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	movq	168(%rsp), %rcx         # 8-byte Reload
-	adcq	480(%rsp), %rcx
-	movq	%rcx, 168(%rsp)         # 8-byte Spill
-	adcq	$0, 120(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 136(%rsp)         # 8-byte Spill
-	movq	96(%rsp), %r15          # 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 80(%rsp)            # 8-byte Folded Spill
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	128(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	344(%rsp), %rdi
-	movq	112(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	344(%rsp), %rbx
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	352(%rsp), %rax
-	adcq	360(%rsp), %r12
-	movq	%r12, 144(%rsp)         # 8-byte Spill
-	adcq	368(%rsp), %r14
-	movq	%r14, 184(%rsp)         # 8-byte Spill
-	adcq	376(%rsp), %rbp
-	movq	%rbp, 192(%rsp)         # 8-byte Spill
-	adcq	384(%rsp), %r13
-	movq	%r13, 160(%rsp)         # 8-byte Spill
-	movq	176(%rsp), %r13         # 8-byte Reload
-	adcq	392(%rsp), %r13
-	movq	168(%rsp), %r12         # 8-byte Reload
-	adcq	400(%rsp), %r12
-	movq	120(%rsp), %r14         # 8-byte Reload
-	adcq	408(%rsp), %r14
-	movq	136(%rsp), %rbp         # 8-byte Reload
-	adcq	$0, %rbp
-	movq	%r15, %rbx
-	adcq	$0, %rbx
-	adcq	$0, 80(%rsp)            # 8-byte Folded Spill
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	128(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	272(%rsp), %rdi
-	movq	112(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	272(%rsp), %r15
-	movq	144(%rsp), %rcx         # 8-byte Reload
-	adcq	280(%rsp), %rcx
-	movq	184(%rsp), %rax         # 8-byte Reload
-	adcq	288(%rsp), %rax
-	movq	%rax, 184(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rax         # 8-byte Reload
-	adcq	296(%rsp), %rax
-	movq	%rax, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rax         # 8-byte Reload
-	adcq	304(%rsp), %rax
-	movq	%rax, 160(%rsp)         # 8-byte Spill
-	adcq	312(%rsp), %r13
-	movq	%r13, 176(%rsp)         # 8-byte Spill
-	adcq	320(%rsp), %r12
-	movq	%r12, 168(%rsp)         # 8-byte Spill
-	adcq	328(%rsp), %r14
-	movq	%r14, %r13
-	adcq	336(%rsp), %rbp
+	movq	%rsi, -16(%rsp)                 # 8-byte Spill
+	movq	16(%rcx), %rsi
+	movq	%rsi, -72(%rsp)                 # 8-byte Spill
+	mulxq	%rsi, %rbp, %r9
+	movq	(%rcx), %rax
+	movq	8(%rcx), %r10
+	mulxq	%r10, %rcx, %rsi
+	movq	%r10, -32(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rdx, %rbx
+	movq	%rax, -40(%rsp)                 # 8-byte Spill
+	addq	%rcx, %rbx
+	adcq	%rbp, %rsi
+	adcq	%r8, %r9
+	adcq	-48(%rsp), %r15                 # 8-byte Folded Reload
+	adcq	-88(%rsp), %r13                 # 8-byte Folded Reload
+	adcq	$0, %r12
+	addq	%rdi, %rdx
+	movq	%r11, -24(%rsp)                 # 8-byte Spill
+	adcq	8(%r11), %rbx
+	adcq	16(%r11), %rsi
+	adcq	24(%r11), %r9
+	adcq	32(%r11), %r15
+	adcq	40(%r11), %r13
+	adcq	48(%r11), %r12
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-80(%rsp), %rdx                 # 8-byte Reload
+	imulq	%rbx, %rdx
+	mulxq	%r14, %rcx, %rdi
+	movq	%rdi, -48(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r14, %rdi
+	mulxq	%r10, %rbp, %rax
+	addq	%rdi, %rbp
+	mulxq	-72(%rsp), %r8, %r10            # 8-byte Folded Reload
+	adcq	%rax, %r8
+	adcq	%rcx, %r10
+	mulxq	-64(%rsp), %rdi, %r11           # 8-byte Folded Reload
+	adcq	-48(%rsp), %rdi                 # 8-byte Folded Reload
+	mulxq	-56(%rsp), %rax, %rcx           # 8-byte Folded Reload
+	adcq	%r11, %rax
+	movzbl	-88(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %rcx
+	addq	%rbx, %r14
+	adcq	%rsi, %rbp
+	adcq	%r9, %r8
+	adcq	%r15, %r10
+	adcq	%r13, %rdi
+	adcq	%r12, %rax
+	movq	-24(%rsp), %rdx                 # 8-byte Reload
+	adcq	56(%rdx), %rcx
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-80(%rsp), %rdx                 # 8-byte Reload
+	imulq	%rbp, %rdx
+	mulxq	-16(%rsp), %r11, %rsi           # 8-byte Folded Reload
+	movq	%rsi, -48(%rsp)                 # 8-byte Spill
+	mulxq	-40(%rsp), %r15, %rbx           # 8-byte Folded Reload
+	mulxq	-32(%rsp), %rsi, %r13           # 8-byte Folded Reload
+	addq	%rbx, %rsi
+	mulxq	-72(%rsp), %r9, %r12            # 8-byte Folded Reload
+	adcq	%r13, %r9
+	adcq	%r11, %r12
+	mulxq	-64(%rsp), %r11, %r14           # 8-byte Folded Reload
+	adcq	-48(%rsp), %r11                 # 8-byte Folded Reload
+	mulxq	-56(%rsp), %rbx, %r13           # 8-byte Folded Reload
+	adcq	%r14, %rbx
+	movzbl	-88(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %r13
+	addq	%rbp, %r15
+	adcq	%r8, %rsi
+	adcq	%r10, %r9
+	adcq	%rdi, %r12
+	adcq	%rax, %r11
+	adcq	%rcx, %rbx
+	movq	-24(%rsp), %rax                 # 8-byte Reload
+	adcq	64(%rax), %r13
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-80(%rsp), %rdx                 # 8-byte Reload
+	imulq	%rsi, %rdx
+	mulxq	-16(%rsp), %rbp, %r8            # 8-byte Folded Reload
+	mulxq	-40(%rsp), %r15, %rdi           # 8-byte Folded Reload
+	mulxq	-32(%rsp), %rax, %rcx           # 8-byte Folded Reload
+	addq	%rdi, %rax
+	mulxq	-72(%rsp), %r10, %r14           # 8-byte Folded Reload
+	adcq	%rcx, %r10
+	adcq	%rbp, %r14
+	mulxq	-64(%rsp), %rbp, %rdi           # 8-byte Folded Reload
+	adcq	%r8, %rbp
+	mulxq	-56(%rsp), %rcx, %r8            # 8-byte Folded Reload
+	adcq	%rdi, %rcx
+	movzbl	-88(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %r8
+	addq	%rsi, %r15
+	adcq	%r9, %rax
+	adcq	%r12, %r10
+	adcq	%r11, %r14
+	adcq	%rbx, %rbp
+	adcq	%r13, %rcx
+	movq	-24(%rsp), %rdx                 # 8-byte Reload
+	adcq	72(%rdx), %r8
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-80(%rsp), %rdx                 # 8-byte Reload
+	imulq	%rax, %rdx
+	mulxq	-16(%rsp), %r15, %r13           # 8-byte Folded Reload
+	mulxq	-40(%rsp), %rbx, %rdi           # 8-byte Folded Reload
+	mulxq	-32(%rsp), %rsi, %r11           # 8-byte Folded Reload
+	addq	%rdi, %rsi
+	mulxq	-72(%rsp), %r9, %r12            # 8-byte Folded Reload
+	adcq	%r11, %r9
+	adcq	%r15, %r12
+	mulxq	-64(%rsp), %r11, %r15           # 8-byte Folded Reload
+	adcq	%r13, %r11
+	mulxq	-56(%rsp), %rdi, %r13           # 8-byte Folded Reload
+	adcq	%r15, %rdi
+	movzbl	-88(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %r13
+	addq	%rax, %rbx
+	adcq	%r10, %rsi
+	adcq	%r14, %r9
+	adcq	%rbp, %r12
+	adcq	%rcx, %r11
+	adcq	%r8, %rdi
+	movq	-24(%rsp), %rax                 # 8-byte Reload
+	adcq	80(%rax), %r13
+	setb	%r14b
+	movq	-80(%rsp), %rdx                 # 8-byte Reload
+	imulq	%rsi, %rdx
+	mulxq	-40(%rsp), %rax, %rcx           # 8-byte Folded Reload
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	mulxq	-32(%rsp), %r8, %rbp            # 8-byte Folded Reload
+	addq	%rcx, %r8
+	mulxq	-72(%rsp), %rbx, %r10           # 8-byte Folded Reload
+	adcq	%rbp, %rbx
+	mulxq	-16(%rsp), %rcx, %r15           # 8-byte Folded Reload
+	adcq	%r10, %rcx
+	mulxq	-64(%rsp), %rbp, %r10           # 8-byte Folded Reload
+	adcq	%r15, %rbp
+	mulxq	-56(%rsp), %rdx, %r15           # 8-byte Folded Reload
+	adcq	%r10, %rdx
+	movzbl	%r14b, %r14d
+	adcq	%r15, %r14
+	addq	%rsi, -80(%rsp)                 # 8-byte Folded Spill
+	adcq	%r9, %r8
+	adcq	%r12, %rbx
+	adcq	%r11, %rcx
+	adcq	%rdi, %rbp
+	adcq	%r13, %rdx
+	movq	-24(%rsp), %rax                 # 8-byte Reload
+	adcq	88(%rax), %r14
+	xorl	%r9d, %r9d
+	movq	%r8, %r10
+	subq	-40(%rsp), %r10                 # 8-byte Folded Reload
+	movq	%rbx, %r11
+	sbbq	-32(%rsp), %r11                 # 8-byte Folded Reload
+	movq	%rcx, %r15
+	sbbq	-72(%rsp), %r15                 # 8-byte Folded Reload
 	movq	%rbp, %r12
-	adcq	$0, %rbx
-	movq	%rbx, %r14
-	movq	80(%rsp), %r15          # 8-byte Reload
-	adcq	$0, %r15
-	movq	128(%rsp), %rdx         # 8-byte Reload
-	movq	%rcx, %rbx
-	imulq	%rbx, %rdx
-	leaq	200(%rsp), %rdi
-	movq	112(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	200(%rsp), %rbx
-	movq	184(%rsp), %rax         # 8-byte Reload
-	adcq	208(%rsp), %rax
-	movq	%rax, 184(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %r8          # 8-byte Reload
-	adcq	216(%rsp), %r8
-	movq	%r8, 192(%rsp)          # 8-byte Spill
-	movq	160(%rsp), %rdx         # 8-byte Reload
-	adcq	224(%rsp), %rdx
-	movq	176(%rsp), %rsi         # 8-byte Reload
-	adcq	232(%rsp), %rsi
-	movq	168(%rsp), %rdi         # 8-byte Reload
-	adcq	240(%rsp), %rdi
-	movq	%r13, %rbp
-	adcq	248(%rsp), %rbp
-	movq	%r12, %rbx
-	adcq	256(%rsp), %rbx
-	movq	%rbx, 136(%rsp)         # 8-byte Spill
-	movq	%r14, %r9
-	adcq	264(%rsp), %r9
-	adcq	$0, %r15
-	movq	%r15, %r10
-	subq	16(%rsp), %rax          # 8-byte Folded Reload
-	movq	%r8, %rcx
-	sbbq	8(%rsp), %rcx           # 8-byte Folded Reload
-	movq	%rdx, %r13
-	sbbq	24(%rsp), %r13          # 8-byte Folded Reload
-	movq	%rsi, %r12
-	sbbq	32(%rsp), %r12          # 8-byte Folded Reload
-	movq	%rdi, %r14
-	sbbq	40(%rsp), %r14          # 8-byte Folded Reload
-	movq	%rbp, %r11
-	sbbq	48(%rsp), %r11          # 8-byte Folded Reload
-	movq	%rbx, %r8
-	sbbq	56(%rsp), %r8           # 8-byte Folded Reload
-	movq	%r9, %r15
-	sbbq	64(%rsp), %r9           # 8-byte Folded Reload
-	sbbq	$0, %r10
-	andl	$1, %r10d
-	cmovneq	%r15, %r9
-	testb	%r10b, %r10b
-	cmovneq	184(%rsp), %rax         # 8-byte Folded Reload
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovneq	192(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 8(%rbx)
-	cmovneq	%rdx, %r13
-	movq	%r13, 16(%rbx)
-	cmovneq	%rsi, %r12
-	movq	%r12, 24(%rbx)
-	cmovneq	%rdi, %r14
-	movq	%r14, 32(%rbx)
-	cmovneq	%rbp, %r11
-	movq	%r11, 40(%rbx)
-	cmovneq	136(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	addq	$776, %rsp              # imm = 0x308
+	sbbq	-16(%rsp), %r12                 # 8-byte Folded Reload
+	movq	%rdx, %rsi
+	sbbq	-64(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	%r14, %rdi
+	sbbq	-56(%rsp), %rdi                 # 8-byte Folded Reload
+	sbbq	%r9, %r9
+	testb	$1, %r9b
+	cmovneq	%r14, %rdi
+	movq	-8(%rsp), %rax                  # 8-byte Reload
+	movq	%rdi, 40(%rax)
+	cmovneq	%rdx, %rsi
+	movq	%rsi, 32(%rax)
+	cmovneq	%rbp, %r12
+	movq	%r12, 24(%rax)
+	cmovneq	%rcx, %r15
+	movq	%r15, 16(%rax)
+	cmovneq	%rbx, %r11
+	movq	%r11, 8(%rax)
+	cmovneq	%r8, %r10
+	movq	%r10, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -10562,102 +3746,214 @@ mcl_fp_montRed8Lbmi2:                   # @mcl_fp_montRed8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end116:
-	.size	mcl_fp_montRed8Lbmi2, .Lfunc_end116-mcl_fp_montRed8Lbmi2
-
-	.globl	mcl_fp_addPre8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre8Lbmi2,@function
-mcl_fp_addPre8Lbmi2:                    # @mcl_fp_addPre8Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	56(%rsi), %r15
-	movq	48(%rdx), %r9
-	movq	48(%rsi), %r12
-	movq	40(%rdx), %r10
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %r14
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rsi
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r14, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r10, %r13
-	movq	%r13, 40(%rdi)
-	adcq	%r9, %r12
-	movq	%r12, 48(%rdi)
-	adcq	%r8, %r15
-	movq	%r15, 56(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end117:
-	.size	mcl_fp_addPre8Lbmi2, .Lfunc_end117-mcl_fp_addPre8Lbmi2
-
-	.globl	mcl_fp_subPre8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre8Lbmi2,@function
-mcl_fp_subPre8Lbmi2:                    # @mcl_fp_subPre8Lbmi2
-# BB#0:
+.Lfunc_end45:
+	.size	mcl_fp_montRed6Lbmi2, .Lfunc_end45-mcl_fp_montRed6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRedNF6Lbmi2          # -- Begin function mcl_fp_montRedNF6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF6Lbmi2,@function
+mcl_fp_montRedNF6Lbmi2:                 # @mcl_fp_montRedNF6Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	56(%rsi), %r15
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %r12
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %r12
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	48(%rsi), %r13
-	movq	40(%rsi), %rdx
-	movq	32(%rsi), %rbp
-	movq	24(%rsi), %rsi
-	movq	%rbx, (%rdi)
-	movq	%r12, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r11, %rsi
-	movq	%rsi, 24(%rdi)
-	sbbq	%r14, %rbp
-	movq	%rbp, 32(%rdi)
-	sbbq	%r10, %rdx
-	movq	%rdx, 40(%rdi)
-	sbbq	%r9, %r13
-	movq	%r13, 48(%rdi)
-	sbbq	%r8, %r15
-	movq	%r15, 56(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
+	movq	%rdx, %rcx
+	movq	%rsi, %r11
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	-8(%rdx), %rax
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	movq	(%rsi), %rdi
+	movq	%rdi, %rdx
+	imulq	%rax, %rdx
+	movq	40(%rcx), %rsi
+	movq	%rsi, -48(%rsp)                 # 8-byte Spill
+	mulxq	%rsi, %rax, %r12
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
+	movq	32(%rcx), %rsi
+	movq	%rsi, -56(%rsp)                 # 8-byte Spill
+	mulxq	%rsi, %rax, %r13
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	24(%rcx), %rsi
+	mulxq	%rsi, %r8, %r15
+	movq	%rsi, %r14
+	movq	%rsi, -16(%rsp)                 # 8-byte Spill
+	movq	16(%rcx), %rsi
+	movq	%rsi, -64(%rsp)                 # 8-byte Spill
+	mulxq	%rsi, %rbp, %r9
+	movq	(%rcx), %rax
+	movq	8(%rcx), %r10
+	mulxq	%r10, %rcx, %rsi
+	movq	%r10, -32(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %rdx, %rbx
+	movq	%rax, -40(%rsp)                 # 8-byte Spill
+	addq	%rcx, %rbx
+	adcq	%rbp, %rsi
+	adcq	%r8, %r9
+	adcq	-80(%rsp), %r15                 # 8-byte Folded Reload
+	adcq	-88(%rsp), %r13                 # 8-byte Folded Reload
+	adcq	$0, %r12
+	addq	%rdi, %rdx
+	movq	%r11, -24(%rsp)                 # 8-byte Spill
+	adcq	8(%r11), %rbx
+	adcq	16(%r11), %rsi
+	adcq	24(%r11), %r9
+	adcq	32(%r11), %r15
+	adcq	40(%r11), %r13
+	adcq	48(%r11), %r12
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-72(%rsp), %rdx                 # 8-byte Reload
+	imulq	%rbx, %rdx
+	mulxq	%r14, %rcx, %rdi
+	movq	%rdi, -80(%rsp)                 # 8-byte Spill
+	mulxq	%rax, %r14, %rdi
+	mulxq	%r10, %rbp, %rax
+	addq	%rdi, %rbp
+	mulxq	-64(%rsp), %r8, %r10            # 8-byte Folded Reload
+	adcq	%rax, %r8
+	adcq	%rcx, %r10
+	mulxq	-56(%rsp), %rdi, %r11           # 8-byte Folded Reload
+	adcq	-80(%rsp), %rdi                 # 8-byte Folded Reload
+	mulxq	-48(%rsp), %rax, %rcx           # 8-byte Folded Reload
+	adcq	%r11, %rax
+	movzbl	-88(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %rcx
+	addq	%rbx, %r14
+	adcq	%rsi, %rbp
+	adcq	%r9, %r8
+	adcq	%r15, %r10
+	adcq	%r13, %rdi
+	adcq	%r12, %rax
+	movq	-24(%rsp), %rdx                 # 8-byte Reload
+	adcq	56(%rdx), %rcx
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-72(%rsp), %rdx                 # 8-byte Reload
+	imulq	%rbp, %rdx
+	mulxq	-16(%rsp), %r11, %rsi           # 8-byte Folded Reload
+	movq	%rsi, -80(%rsp)                 # 8-byte Spill
+	mulxq	-40(%rsp), %r15, %rbx           # 8-byte Folded Reload
+	mulxq	-32(%rsp), %rsi, %r13           # 8-byte Folded Reload
+	addq	%rbx, %rsi
+	mulxq	-64(%rsp), %r9, %r12            # 8-byte Folded Reload
+	adcq	%r13, %r9
+	adcq	%r11, %r12
+	mulxq	-56(%rsp), %r11, %r14           # 8-byte Folded Reload
+	adcq	-80(%rsp), %r11                 # 8-byte Folded Reload
+	mulxq	-48(%rsp), %rbx, %r13           # 8-byte Folded Reload
+	adcq	%r14, %rbx
+	movzbl	-88(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %r13
+	addq	%rbp, %r15
+	adcq	%r8, %rsi
+	adcq	%r10, %r9
+	adcq	%rdi, %r12
+	adcq	%rax, %r11
+	adcq	%rcx, %rbx
+	movq	-24(%rsp), %rax                 # 8-byte Reload
+	adcq	64(%rax), %r13
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-72(%rsp), %rdx                 # 8-byte Reload
+	imulq	%rsi, %rdx
+	mulxq	-16(%rsp), %rbp, %r8            # 8-byte Folded Reload
+	mulxq	-40(%rsp), %r15, %rdi           # 8-byte Folded Reload
+	mulxq	-32(%rsp), %rax, %rcx           # 8-byte Folded Reload
+	addq	%rdi, %rax
+	mulxq	-64(%rsp), %r10, %r14           # 8-byte Folded Reload
+	adcq	%rcx, %r10
+	adcq	%rbp, %r14
+	mulxq	-56(%rsp), %rbp, %rdi           # 8-byte Folded Reload
+	adcq	%r8, %rbp
+	mulxq	-48(%rsp), %rcx, %r8            # 8-byte Folded Reload
+	adcq	%rdi, %rcx
+	movzbl	-88(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %r8
+	addq	%rsi, %r15
+	adcq	%r9, %rax
+	adcq	%r12, %r10
+	adcq	%r11, %r14
+	adcq	%rbx, %rbp
+	adcq	%r13, %rcx
+	movq	-24(%rsp), %rdx                 # 8-byte Reload
+	adcq	72(%rdx), %r8
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-72(%rsp), %rdx                 # 8-byte Reload
+	imulq	%rax, %rdx
+	mulxq	-16(%rsp), %r13, %rsi           # 8-byte Folded Reload
+	movq	%rsi, -80(%rsp)                 # 8-byte Spill
+	mulxq	-40(%rsp), %r15, %rdi           # 8-byte Folded Reload
+	mulxq	-32(%rsp), %rsi, %r11           # 8-byte Folded Reload
+	addq	%rdi, %rsi
+	mulxq	-64(%rsp), %r12, %r9            # 8-byte Folded Reload
+	adcq	%r11, %r12
+	adcq	%r13, %r9
+	mulxq	-56(%rsp), %r13, %rbx           # 8-byte Folded Reload
+	adcq	-80(%rsp), %r13                 # 8-byte Folded Reload
+	mulxq	-48(%rsp), %rdi, %r11           # 8-byte Folded Reload
+	adcq	%rbx, %rdi
+	movzbl	-88(%rsp), %edx                 # 1-byte Folded Reload
+	adcq	%rdx, %r11
+	addq	%rax, %r15
+	adcq	%r10, %rsi
+	adcq	%r14, %r12
+	adcq	%rbp, %r9
+	adcq	%rcx, %r13
+	adcq	%r8, %rdi
+	movq	-24(%rsp), %rax                 # 8-byte Reload
+	adcq	80(%rax), %r11
+	setb	%r14b
+	movq	-72(%rsp), %rdx                 # 8-byte Reload
+	imulq	%rsi, %rdx
+	mulxq	-40(%rsp), %rax, %rcx           # 8-byte Folded Reload
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	mulxq	-32(%rsp), %r8, %rbx            # 8-byte Folded Reload
+	addq	%rcx, %r8
+	mulxq	-64(%rsp), %rcx, %r10           # 8-byte Folded Reload
+	adcq	%rbx, %rcx
+	mulxq	-16(%rsp), %rbp, %r15           # 8-byte Folded Reload
+	adcq	%r10, %rbp
+	mulxq	-56(%rsp), %rbx, %r10           # 8-byte Folded Reload
+	adcq	%r15, %rbx
+	mulxq	-48(%rsp), %rdx, %r15           # 8-byte Folded Reload
+	adcq	%r10, %rdx
+	movzbl	%r14b, %r14d
+	adcq	%r15, %r14
+	addq	%rsi, -72(%rsp)                 # 8-byte Folded Spill
+	adcq	%r12, %r8
+	adcq	%r9, %rcx
+	adcq	%r13, %rbp
+	adcq	%rdi, %rbx
+	adcq	%r11, %rdx
+	movq	-24(%rsp), %rax                 # 8-byte Reload
+	adcq	88(%rax), %r14
+	movq	%r8, %r9
+	subq	-40(%rsp), %r9                  # 8-byte Folded Reload
+	movq	%rcx, %r10
+	sbbq	-32(%rsp), %r10                 # 8-byte Folded Reload
+	movq	%rbp, %r11
+	sbbq	-64(%rsp), %r11                 # 8-byte Folded Reload
+	movq	%rbx, %r15
+	sbbq	-16(%rsp), %r15                 # 8-byte Folded Reload
+	movq	%rdx, %rax
+	sbbq	-56(%rsp), %rax                 # 8-byte Folded Reload
+	movq	%r14, %rdi
+	sbbq	-48(%rsp), %rdi                 # 8-byte Folded Reload
+	movq	%rdi, %rsi
+	sarq	$63, %rsi
+	cmovsq	%r14, %rdi
+	movq	-8(%rsp), %rsi                  # 8-byte Reload
+	movq	%rdi, 40(%rsi)
+	cmovsq	%rdx, %rax
+	movq	%rax, 32(%rsi)
+	cmovsq	%rbx, %r15
+	movq	%r15, 24(%rsi)
+	cmovsq	%rbp, %r11
+	movq	%r11, 16(%rsi)
+	cmovsq	%rcx, %r10
+	movq	%r10, 8(%rsi)
+	cmovsq	%r8, %r9
+	movq	%r9, (%rsi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -10665,444 +3961,374 @@ mcl_fp_subPre8Lbmi2:                    # @mcl_fp_subPre8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end118:
-	.size	mcl_fp_subPre8Lbmi2, .Lfunc_end118-mcl_fp_subPre8Lbmi2
-
-	.globl	mcl_fp_shr1_8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_8Lbmi2,@function
-mcl_fp_shr1_8Lbmi2:                     # @mcl_fp_shr1_8Lbmi2
-# BB#0:
-	movq	56(%rsi), %r8
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %r10
-	movq	32(%rsi), %r11
-	movq	24(%rsi), %rcx
-	movq	16(%rsi), %rdx
-	movq	(%rsi), %rax
+.Lfunc_end46:
+	.size	mcl_fp_montRedNF6Lbmi2, .Lfunc_end46-mcl_fp_montRedNF6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addPre6Lbmi2             # -- Begin function mcl_fp_addPre6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre6Lbmi2,@function
+mcl_fp_addPre6Lbmi2:                    # @mcl_fp_addPre6Lbmi2
+# %bb.0:
+	movq	40(%rsi), %rax
+	movq	32(%rsi), %rcx
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %r9
+	movq	(%rsi), %r10
 	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rax
-	movq	%rax, (%rdi)
-	shrdq	$1, %rdx, %rsi
+	addq	(%rdx), %r10
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r9
+	adcq	24(%rdx), %r8
+	adcq	32(%rdx), %rcx
+	adcq	40(%rdx), %rax
+	movq	%rax, 40(%rdi)
+	movq	%rcx, 32(%rdi)
+	movq	%r8, 24(%rdi)
+	movq	%r9, 16(%rdi)
 	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 16(%rdi)
-	shrdq	$1, %r11, %rcx
-	movq	%rcx, 24(%rdi)
-	shrdq	$1, %r10, %r11
-	movq	%r11, 32(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 40(%rdi)
-	shrdq	$1, %r8, %r9
-	movq	%r9, 48(%rdi)
-	shrq	%r8
-	movq	%r8, 56(%rdi)
+	movq	%r10, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
 	retq
-.Lfunc_end119:
-	.size	mcl_fp_shr1_8Lbmi2, .Lfunc_end119-mcl_fp_shr1_8Lbmi2
-
-	.globl	mcl_fp_add8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add8Lbmi2,@function
-mcl_fp_add8Lbmi2:                       # @mcl_fp_add8Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r15
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r12
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %r11
-	movq	32(%rsi), %r10
-	movq	(%rdx), %r14
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %r14
-	adcq	8(%rsi), %rbx
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
-	adcq	24(%rdx), %r11
-	movq	40(%rdx), %rsi
-	adcq	32(%rdx), %r10
-	movq	%r14, (%rdi)
-	movq	%rbx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r10, 32(%rdi)
-	adcq	%r13, %rsi
+.Lfunc_end47:
+	.size	mcl_fp_addPre6Lbmi2, .Lfunc_end47-mcl_fp_addPre6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subPre6Lbmi2             # -- Begin function mcl_fp_subPre6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre6Lbmi2,@function
+mcl_fp_subPre6Lbmi2:                    # @mcl_fp_subPre6Lbmi2
+# %bb.0:
+	movq	40(%rsi), %rcx
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r11
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %r10
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r8
+	sbbq	40(%rdx), %rcx
+	movq	%rcx, 40(%rdi)
+	movq	%r8, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	retq
+.Lfunc_end48:
+	.size	mcl_fp_subPre6Lbmi2, .Lfunc_end48-mcl_fp_subPre6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_shr1_6Lbmi2              # -- Begin function mcl_fp_shr1_6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_6Lbmi2,@function
+mcl_fp_shr1_6Lbmi2:                     # @mcl_fp_shr1_6Lbmi2
+# %bb.0:
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %r10
+	movq	24(%rsi), %rcx
+	movq	32(%rsi), %rax
+	movq	40(%rsi), %rdx
+	movq	%rdx, %rsi
+	shrq	%rsi
 	movq	%rsi, 40(%rdi)
-	adcq	%r12, %r9
-	movq	%r9, 48(%rdi)
-	adcq	%r15, %r8
-	movq	%r8, 56(%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %r14
-	sbbq	8(%rcx), %rbx
-	sbbq	16(%rcx), %rax
-	sbbq	24(%rcx), %r11
-	sbbq	32(%rcx), %r10
-	sbbq	40(%rcx), %rsi
-	sbbq	48(%rcx), %r9
-	sbbq	56(%rcx), %r8
+	shldq	$63, %rax, %rdx
+	movq	%rdx, 32(%rdi)
+	shldq	$63, %rcx, %rax
+	movq	%rax, 24(%rdi)
+	shldq	$63, %r10, %rcx
+	movq	%rcx, 16(%rdi)
+	shldq	$63, %r8, %r10
+	movq	%r10, 8(%rdi)
+	shrdq	$1, %r8, %r9
+	movq	%r9, (%rdi)
+	retq
+.Lfunc_end49:
+	.size	mcl_fp_shr1_6Lbmi2, .Lfunc_end49-mcl_fp_shr1_6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_add6Lbmi2                # -- Begin function mcl_fp_add6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_add6Lbmi2,@function
+mcl_fp_add6Lbmi2:                       # @mcl_fp_add6Lbmi2
+# %bb.0:
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %r11
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r11
+	adcq	24(%rdx), %r10
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r8
+	movq	%r8, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rax, (%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r11
+	sbbq	24(%rcx), %r10
+	sbbq	32(%rcx), %r9
+	sbbq	40(%rcx), %r8
 	sbbq	$0, %rdx
 	testb	$1, %dl
-	jne	.LBB120_2
-# BB#1:                                 # %nocarry
-	movq	%r14, (%rdi)
-	movq	%rbx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r10, 32(%rdi)
-	movq	%rsi, 40(%rdi)
-	movq	%r9, 48(%rdi)
-	movq	%r8, 56(%rdi)
-.LBB120_2:                              # %carry
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
+	jne	.LBB50_2
+# %bb.1:                                # %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r8, 40(%rdi)
+.LBB50_2:                               # %carry
 	retq
-.Lfunc_end120:
-	.size	mcl_fp_add8Lbmi2, .Lfunc_end120-mcl_fp_add8Lbmi2
-
-	.globl	mcl_fp_addNF8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF8Lbmi2,@function
-mcl_fp_addNF8Lbmi2:                     # @mcl_fp_addNF8Lbmi2
-# BB#0:
-	pushq	%rbp
+.Lfunc_end50:
+	.size	mcl_fp_add6Lbmi2, .Lfunc_end50-mcl_fp_add6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addNF6Lbmi2              # -- Begin function mcl_fp_addNF6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF6Lbmi2,@function
+mcl_fp_addNF6Lbmi2:                     # @mcl_fp_addNF6Lbmi2
+# %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	48(%rdx), %rbp
-	movq	40(%rdx), %rbx
-	movq	32(%rdx), %rax
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r15
-	movq	(%rdx), %r13
-	movq	8(%rdx), %r12
-	addq	(%rsi), %r13
-	adcq	8(%rsi), %r12
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %r11
-	adcq	32(%rsi), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	adcq	40(%rsi), %rbx
-	movq	%rbx, -16(%rsp)         # 8-byte Spill
-	movq	%rbx, %r9
-	adcq	48(%rsi), %rbp
-	movq	%rbp, -8(%rsp)          # 8-byte Spill
-	movq	%rbp, %rax
-	adcq	56(%rsi), %r8
-	movq	%r13, %rsi
-	subq	(%rcx), %rsi
-	movq	%r12, %rdx
-	sbbq	8(%rcx), %rdx
+	movq	40(%rdx), %r15
+	movq	32(%rdx), %r11
+	movq	24(%rdx), %r10
+	movq	16(%rdx), %r9
+	movq	(%rdx), %r8
+	movq	8(%rdx), %r14
+	addq	(%rsi), %r8
+	adcq	8(%rsi), %r14
+	adcq	16(%rsi), %r9
+	adcq	24(%rsi), %r10
+	adcq	32(%rsi), %r11
+	adcq	40(%rsi), %r15
+	movq	%r8, %r12
+	subq	(%rcx), %r12
+	movq	%r14, %r13
+	sbbq	8(%rcx), %r13
+	movq	%r9, %rdx
+	sbbq	16(%rcx), %rdx
+	movq	%r10, %rax
+	sbbq	24(%rcx), %rax
+	movq	%r11, %rsi
+	sbbq	32(%rcx), %rsi
 	movq	%r15, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r11, %r14
-	sbbq	24(%rcx), %r14
-	movq	%r10, %rbp
-	sbbq	32(%rcx), %rbp
-	movq	%r9, %r10
-	sbbq	40(%rcx), %r10
-	movq	%rax, %r9
-	sbbq	48(%rcx), %r9
-	movq	%r8, %rax
-	sbbq	56(%rcx), %rax
-	testq	%rax, %rax
-	cmovsq	%r13, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r12, %rdx
-	movq	%rdx, 8(%rdi)
+	sbbq	40(%rcx), %rbx
+	movq	%rbx, %rcx
+	sarq	$63, %rcx
 	cmovsq	%r15, %rbx
-	movq	%rbx, 16(%rdi)
-	cmovsq	%r11, %r14
-	movq	%r14, 24(%rdi)
-	cmovsq	-24(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 32(%rdi)
-	cmovsq	-16(%rsp), %r10         # 8-byte Folded Reload
-	movq	%r10, 40(%rdi)
-	cmovsq	-8(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 48(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 56(%rdi)
+	movq	%rbx, 40(%rdi)
+	cmovsq	%r11, %rsi
+	movq	%rsi, 32(%rdi)
+	cmovsq	%r10, %rax
+	movq	%rax, 24(%rdi)
+	cmovsq	%r9, %rdx
+	movq	%rdx, 16(%rdi)
+	cmovsq	%r14, %r13
+	movq	%r13, 8(%rdi)
+	cmovsq	%r8, %r12
+	movq	%r12, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-.Lfunc_end121:
-	.size	mcl_fp_addNF8Lbmi2, .Lfunc_end121-mcl_fp_addNF8Lbmi2
-
-	.globl	mcl_fp_sub8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub8Lbmi2,@function
-mcl_fp_sub8Lbmi2:                       # @mcl_fp_sub8Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
+.Lfunc_end51:
+	.size	mcl_fp_addNF6Lbmi2, .Lfunc_end51-mcl_fp_addNF6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_sub6Lbmi2                # -- Begin function mcl_fp_sub6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_sub6Lbmi2,@function
+mcl_fp_sub6Lbmi2:                       # @mcl_fp_sub6Lbmi2
+# %bb.0:
 	pushq	%rbx
-	movq	56(%rdx), %r12
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r13
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r10
+	movq	40(%rsi), %r11
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %rax
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
 	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r10
-	movq	16(%rsi), %r11
-	sbbq	16(%rdx), %r11
-	movq	24(%rsi), %r15
-	sbbq	24(%rdx), %r15
-	movq	32(%rsi), %r14
-	sbbq	32(%rdx), %r14
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %rsi
-	sbbq	40(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r10, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	movq	%r15, 24(%rdi)
-	movq	%r14, 32(%rdi)
-	movq	%rsi, 40(%rdi)
-	sbbq	%r13, %r9
-	movq	%r9, 48(%rdi)
-	sbbq	%r12, %r8
-	movq	%r8, 56(%rdi)
-	sbbq	$0, %rbx
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rax
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r11
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rbx, %rbx
 	testb	$1, %bl
-	je	.LBB122_2
-# BB#1:                                 # %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	movq	8(%rcx), %rax
-	adcq	%r10, %rax
-	movq	%rax, 8(%rdi)
-	movq	16(%rcx), %rax
-	adcq	%r11, %rax
+	jne	.LBB52_2
+# %bb.1:                                # %nocarry
+	popq	%rbx
+	retq
+.LBB52_2:                               # %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %rax
+	adcq	24(%rcx), %r9
+	adcq	32(%rcx), %r10
+	adcq	40(%rcx), %r11
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
 	movq	%rax, 16(%rdi)
-	movq	24(%rcx), %rax
-	adcq	%r15, %rax
-	movq	%rax, 24(%rdi)
-	movq	32(%rcx), %rax
-	adcq	%r14, %rax
-	movq	%rax, 32(%rdi)
-	movq	40(%rcx), %rax
-	adcq	%rsi, %rax
-	movq	%rax, 40(%rdi)
-	movq	48(%rcx), %rax
-	adcq	%r9, %rax
-	movq	%rax, 48(%rdi)
-	movq	56(%rcx), %rax
-	adcq	%r8, %rax
-	movq	%rax, 56(%rdi)
-.LBB122_2:                              # %nocarry
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
 	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
 	retq
-.Lfunc_end122:
-	.size	mcl_fp_sub8Lbmi2, .Lfunc_end122-mcl_fp_sub8Lbmi2
-
-	.globl	mcl_fp_subNF8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF8Lbmi2,@function
-mcl_fp_subNF8Lbmi2:                     # @mcl_fp_subNF8Lbmi2
-# BB#0:
-	pushq	%rbp
+.Lfunc_end52:
+	.size	mcl_fp_sub6Lbmi2, .Lfunc_end52-mcl_fp_sub6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subNF6Lbmi2              # -- Begin function mcl_fp_subNF6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF6Lbmi2,@function
+mcl_fp_subNF6Lbmi2:                     # @mcl_fp_subNF6Lbmi2
+# %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	%rdi, %r9
-	movq	56(%rsi), %r14
-	movq	48(%rsi), %rax
-	movq	40(%rsi), %rcx
-	movq	32(%rsi), %rdi
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %r15
-	movq	(%rsi), %r13
-	movq	8(%rsi), %r12
-	subq	(%rdx), %r13
-	sbbq	8(%rdx), %r12
-	sbbq	16(%rdx), %r15
-	sbbq	24(%rdx), %r11
-	sbbq	32(%rdx), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	sbbq	40(%rdx), %rcx
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	sbbq	48(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	sbbq	56(%rdx), %r14
-	movq	%r14, %rsi
-	sarq	$63, %rsi
-	movq	56(%r8), %r10
-	andq	%rsi, %r10
-	movq	48(%r8), %rbx
-	andq	%rsi, %rbx
-	movq	40(%r8), %rdi
-	andq	%rsi, %rdi
-	movq	32(%r8), %rbp
-	andq	%rsi, %rbp
-	movq	24(%r8), %rdx
-	andq	%rsi, %rdx
-	movq	16(%r8), %rcx
-	andq	%rsi, %rcx
-	movq	8(%r8), %rax
-	andq	%rsi, %rax
-	andq	(%r8), %rsi
-	addq	%r13, %rsi
-	adcq	%r12, %rax
-	movq	%rsi, (%r9)
-	adcq	%r15, %rcx
-	movq	%rax, 8(%r9)
-	movq	%rcx, 16(%r9)
-	adcq	%r11, %rdx
-	movq	%rdx, 24(%r9)
-	adcq	-24(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 32(%r9)
-	adcq	-16(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, 40(%r9)
-	adcq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, 48(%r9)
-	adcq	%r14, %r10
-	movq	%r10, 56(%r9)
+	movq	40(%rsi), %r15
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r11
+	movq	8(%rsi), %r14
+	subq	(%rdx), %r11
+	sbbq	8(%rdx), %r14
+	sbbq	16(%rdx), %r10
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r8
+	sbbq	40(%rdx), %r15
+	movq	%r15, %rdx
+	sarq	$63, %rdx
+	movq	%rdx, %rbx
+	shldq	$1, %r15, %rbx
+	andq	(%rcx), %rbx
+	movq	40(%rcx), %r12
+	andq	%rdx, %r12
+	movq	32(%rcx), %r13
+	andq	%rdx, %r13
+	movq	24(%rcx), %rsi
+	andq	%rdx, %rsi
+	movq	16(%rcx), %rax
+	andq	%rdx, %rax
+	andq	8(%rcx), %rdx
+	addq	%r11, %rbx
+	movq	%rbx, (%rdi)
+	adcq	%r14, %rdx
+	movq	%rdx, 8(%rdi)
+	adcq	%r10, %rax
+	movq	%rax, 16(%rdi)
+	adcq	%r9, %rsi
+	movq	%rsi, 24(%rdi)
+	adcq	%r8, %r13
+	movq	%r13, 32(%rdi)
+	adcq	%r15, %r12
+	movq	%r12, 40(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-.Lfunc_end123:
-	.size	mcl_fp_subNF8Lbmi2, .Lfunc_end123-mcl_fp_subNF8Lbmi2
-
-	.globl	mcl_fpDbl_add8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add8Lbmi2,@function
-mcl_fpDbl_add8Lbmi2:                    # @mcl_fpDbl_add8Lbmi2
-# BB#0:
+.Lfunc_end53:
+	.size	mcl_fp_subNF6Lbmi2, .Lfunc_end53-mcl_fp_subNF6Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_add6Lbmi2             # -- Begin function mcl_fpDbl_add6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add6Lbmi2,@function
+mcl_fpDbl_add6Lbmi2:                    # @mcl_fpDbl_add6Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	120(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	112(%rdx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	104(%rdx), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	96(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r11
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %r15
-	adcq	32(%rdx), %r11
-	movq	88(%rdx), %rbp
-	movq	80(%rdx), %r13
-	movq	%rbx, (%rdi)
-	movq	72(%rdx), %r10
-	movq	%rax, 8(%rdi)
-	movq	64(%rdx), %r9
-	movq	%r12, 16(%rdi)
-	movq	40(%rdx), %r12
-	movq	%r15, 24(%rdi)
-	movq	40(%rsi), %rbx
-	adcq	%r12, %rbx
-	movq	56(%rdx), %r15
-	movq	48(%rdx), %r12
-	movq	%r11, 32(%rdi)
-	movq	48(%rsi), %rdx
-	adcq	%r12, %rdx
-	movq	120(%rsi), %r12
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rax
-	adcq	%r15, %rax
-	movq	112(%rsi), %rcx
-	movq	%rdx, 48(%rdi)
-	movq	64(%rsi), %rbx
-	adcq	%r9, %rbx
-	movq	104(%rsi), %rdx
-	movq	%rax, 56(%rdi)
-	movq	72(%rsi), %r9
-	adcq	%r10, %r9
-	movq	80(%rsi), %r11
-	adcq	%r13, %r11
-	movq	96(%rsi), %rax
 	movq	88(%rsi), %r15
-	adcq	%rbp, %r15
-	adcq	%r14, %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rdx, %rax
-	adcq	-32(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	adcq	-8(%rsp), %r12          # 8-byte Folded Reload
-	movq	%r12, -8(%rsp)          # 8-byte Spill
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	movq	%rbx, %rsi
-	subq	(%r8), %rsi
-	movq	%r9, %rdx
-	sbbq	8(%r8), %rdx
-	movq	%r11, %r10
-	sbbq	16(%r8), %r10
-	movq	%r15, %r14
-	sbbq	24(%r8), %r14
-	movq	-16(%rsp), %r13         # 8-byte Reload
-	sbbq	32(%r8), %r13
-	movq	%rax, %r12
-	sbbq	40(%r8), %r12
-	movq	%rcx, %rax
-	sbbq	48(%r8), %rax
-	movq	-8(%rsp), %rcx          # 8-byte Reload
-	sbbq	56(%r8), %rcx
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	cmovneq	%rbx, %rsi
-	movq	%rsi, 64(%rdi)
-	testb	%bpl, %bpl
-	cmovneq	%r9, %rdx
-	movq	%rdx, 72(%rdi)
-	cmovneq	%r11, %r10
-	movq	%r10, 80(%rdi)
-	cmovneq	%r15, %r14
-	movq	%r14, 88(%rdi)
-	cmovneq	-16(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r13, 96(%rdi)
-	cmovneq	-32(%rsp), %r12         # 8-byte Folded Reload
-	movq	%r12, 104(%rdi)
-	cmovneq	-24(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, 112(%rdi)
-	cmovneq	-8(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 120(%rdi)
+	movq	80(%rsi), %r14
+	movq	72(%rsi), %r11
+	movq	64(%rsi), %r10
+	movq	56(%rsi), %r9
+	movq	48(%rsi), %r8
+	movq	40(%rsi), %rax
+	movq	(%rsi), %r12
+	movq	8(%rsi), %r13
+	addq	(%rdx), %r12
+	adcq	8(%rdx), %r13
+	movq	32(%rsi), %rbx
+	movq	24(%rsi), %rbp
+	movq	16(%rsi), %rsi
+	adcq	16(%rdx), %rsi
+	adcq	24(%rdx), %rbp
+	adcq	32(%rdx), %rbx
+	adcq	40(%rdx), %rax
+	adcq	48(%rdx), %r8
+	adcq	56(%rdx), %r9
+	adcq	64(%rdx), %r10
+	adcq	72(%rdx), %r11
+	adcq	80(%rdx), %r14
+	adcq	88(%rdx), %r15
+	movq	%rax, 40(%rdi)
+	movq	%rbx, 32(%rdi)
+	movq	%rbp, 24(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	%r13, 8(%rdi)
+	movq	%r12, (%rdi)
+	setb	%al
+	movzbl	%al, %r12d
+	movq	%r8, %r13
+	subq	(%rcx), %r13
+	movq	%r9, %rsi
+	sbbq	8(%rcx), %rsi
+	movq	%r10, %rbx
+	sbbq	16(%rcx), %rbx
+	movq	%r11, %rbp
+	sbbq	24(%rcx), %rbp
+	movq	%r14, %rax
+	sbbq	32(%rcx), %rax
+	movq	%r15, %rdx
+	sbbq	40(%rcx), %rdx
+	sbbq	$0, %r12
+	testb	$1, %r12b
+	cmovneq	%r15, %rdx
+	movq	%rdx, 88(%rdi)
+	cmovneq	%r14, %rax
+	movq	%rax, 80(%rdi)
+	cmovneq	%r11, %rbp
+	movq	%rbp, 72(%rdi)
+	cmovneq	%r10, %rbx
+	movq	%rbx, 64(%rdi)
+	cmovneq	%r9, %rsi
+	movq	%rsi, 56(%rdi)
+	cmovneq	%r8, %r13
+	movq	%r13, 48(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -11110,114 +4336,83 @@ mcl_fpDbl_add8Lbmi2:                    # @mcl_fpDbl_add8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end124:
-	.size	mcl_fpDbl_add8Lbmi2, .Lfunc_end124-mcl_fpDbl_add8Lbmi2
-
-	.globl	mcl_fpDbl_sub8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub8Lbmi2,@function
-mcl_fpDbl_sub8Lbmi2:                    # @mcl_fpDbl_sub8Lbmi2
-# BB#0:
+.Lfunc_end54:
+	.size	mcl_fpDbl_add6Lbmi2, .Lfunc_end54-mcl_fpDbl_add6Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sub6Lbmi2             # -- Begin function mcl_fpDbl_sub6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub6Lbmi2,@function
+mcl_fpDbl_sub6Lbmi2:                    # @mcl_fpDbl_sub6Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r15
-	movq	120(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	112(%rdx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	104(%rdx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %r9
-	movq	(%rsi), %r12
-	movq	8(%rsi), %r14
-	xorl	%r8d, %r8d
-	subq	(%rdx), %r12
-	sbbq	8(%rdx), %r14
-	sbbq	16(%rdx), %r9
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r13
-	sbbq	32(%rdx), %r13
-	movq	96(%rdx), %rbp
-	movq	88(%rdx), %r11
-	movq	%r12, (%rdi)
-	movq	80(%rdx), %r12
-	movq	%r14, 8(%rdi)
-	movq	72(%rdx), %r10
-	movq	%r9, 16(%rdi)
-	movq	40(%rdx), %r9
-	movq	%rbx, 24(%rdi)
+	movq	%rcx, %r10
+	movq	88(%rsi), %r15
+	movq	80(%rsi), %r14
+	movq	72(%rsi), %r11
+	movq	64(%rsi), %r9
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %rax
+	movq	%rax, -16(%rsp)                 # 8-byte Spill
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %r13
+	xorl	%eax, %eax
+	subq	(%rdx), %rcx
+	movq	%rcx, -8(%rsp)                  # 8-byte Spill
+	sbbq	8(%rdx), %r13
 	movq	40(%rsi), %rbx
-	sbbq	%r9, %rbx
-	movq	48(%rdx), %r9
-	movq	%r13, 32(%rdi)
-	movq	48(%rsi), %r14
-	sbbq	%r9, %r14
-	movq	64(%rdx), %r13
-	movq	56(%rdx), %r9
+	movq	32(%rsi), %rbp
+	movq	24(%rsi), %rcx
+	movq	16(%rsi), %rsi
+	sbbq	16(%rdx), %rsi
+	sbbq	24(%rdx), %rcx
+	sbbq	32(%rdx), %rbp
+	sbbq	40(%rdx), %rbx
+	movq	-16(%rsp), %r12                 # 8-byte Reload
+	sbbq	48(%rdx), %r12
+	movq	%r12, -16(%rsp)                 # 8-byte Spill
+	sbbq	56(%rdx), %r8
+	sbbq	64(%rdx), %r9
+	sbbq	72(%rdx), %r11
+	sbbq	80(%rdx), %r14
+	sbbq	88(%rdx), %r15
 	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rdx
-	sbbq	%r9, %rdx
-	movq	120(%rsi), %rcx
-	movq	%r14, 48(%rdi)
-	movq	64(%rsi), %rbx
-	sbbq	%r13, %rbx
-	movq	112(%rsi), %rax
-	movq	%rdx, 56(%rdi)
-	movq	72(%rsi), %r9
-	sbbq	%r10, %r9
-	movq	80(%rsi), %r13
-	sbbq	%r12, %r13
-	movq	88(%rsi), %r12
-	sbbq	%r11, %r12
-	movq	104(%rsi), %rdx
-	movq	96(%rsi), %r14
-	sbbq	%rbp, %r14
-	sbbq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	sbbq	-16(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	sbbq	-8(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, -8(%rsp)          # 8-byte Spill
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%r15), %r11
-	cmoveq	%r8, %r11
-	testb	%bpl, %bpl
-	movq	16(%r15), %rbp
-	cmoveq	%r8, %rbp
-	movq	8(%r15), %rsi
-	cmoveq	%r8, %rsi
-	movq	56(%r15), %r10
-	cmoveq	%r8, %r10
-	movq	48(%r15), %rdx
-	cmoveq	%r8, %rdx
-	movq	40(%r15), %rcx
-	cmoveq	%r8, %rcx
-	movq	32(%r15), %rax
-	cmoveq	%r8, %rax
-	cmovneq	24(%r15), %r8
-	addq	%rbx, %r11
-	adcq	%r9, %rsi
-	movq	%r11, 64(%rdi)
-	adcq	%r13, %rbp
+	movq	%rbp, 32(%rdi)
+	movq	%rcx, 24(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	%r13, 8(%rdi)
+	movq	-8(%rsp), %rcx                  # 8-byte Reload
+	movq	%rcx, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	negq	%rax
+	movq	40(%r10), %rcx
+	andq	%rax, %rcx
+	movq	32(%r10), %rdx
+	andq	%rax, %rdx
+	movq	24(%r10), %rsi
+	andq	%rax, %rsi
+	movq	16(%r10), %rbx
+	andq	%rax, %rbx
+	movq	8(%r10), %rbp
+	andq	%rax, %rbp
+	andq	(%r10), %rax
+	addq	-16(%rsp), %rax                 # 8-byte Folded Reload
+	movq	%rax, 48(%rdi)
+	adcq	%r8, %rbp
+	movq	%rbp, 56(%rdi)
+	adcq	%r9, %rbx
+	movq	%rbx, 64(%rdi)
+	adcq	%r11, %rsi
 	movq	%rsi, 72(%rdi)
-	movq	%rbp, 80(%rdi)
-	adcq	%r12, %r8
-	movq	%r8, 88(%rdi)
-	adcq	%r14, %rax
-	movq	%rax, 96(%rdi)
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 104(%rdi)
-	adcq	-16(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, 112(%rdi)
-	adcq	-8(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 120(%rdi)
+	adcq	%r14, %rdx
+	movq	%rdx, 80(%rdi)
+	adcq	%r15, %rcx
+	movq	%rcx, 88(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -11225,388 +4420,286 @@ mcl_fpDbl_sub8Lbmi2:                    # @mcl_fpDbl_sub8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end125:
-	.size	mcl_fpDbl_sub8Lbmi2, .Lfunc_end125-mcl_fpDbl_sub8Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv576x64,@function
-.LmulPv576x64:                          # @mulPv576x64
-# BB#0:
-	mulxq	(%rsi), %rcx, %rax
-	movq	%rcx, (%rdi)
-	mulxq	8(%rsi), %rcx, %r8
-	addq	%rax, %rcx
-	movq	%rcx, 8(%rdi)
-	mulxq	16(%rsi), %rcx, %r9
-	adcq	%r8, %rcx
-	movq	%rcx, 16(%rdi)
-	mulxq	24(%rsi), %rax, %rcx
-	adcq	%r9, %rax
-	movq	%rax, 24(%rdi)
-	mulxq	32(%rsi), %rax, %r8
-	adcq	%rcx, %rax
-	movq	%rax, 32(%rdi)
-	mulxq	40(%rsi), %rcx, %r9
-	adcq	%r8, %rcx
-	movq	%rcx, 40(%rdi)
-	mulxq	48(%rsi), %rax, %rcx
-	adcq	%r9, %rax
-	movq	%rax, 48(%rdi)
-	mulxq	56(%rsi), %rax, %r8
-	adcq	%rcx, %rax
-	movq	%rax, 56(%rdi)
-	mulxq	64(%rsi), %rax, %rcx
-	adcq	%r8, %rax
-	movq	%rax, 64(%rdi)
-	adcq	$0, %rcx
-	movq	%rcx, 72(%rdi)
+.Lfunc_end55:
+	.size	mcl_fpDbl_sub6Lbmi2, .Lfunc_end55-mcl_fpDbl_sub6Lbmi2
+                                        # -- End function
+	.globl	mulPv512x64bmi2                 # -- Begin function mulPv512x64bmi2
+	.p2align	4, 0x90
+	.type	mulPv512x64bmi2,@function
+mulPv512x64bmi2:                        # @mulPv512x64bmi2
+# %bb.0:
 	movq	%rdi, %rax
+	mulxq	(%rsi), %rdi, %rcx
+	movq	%rdi, (%rax)
+	mulxq	8(%rsi), %rdi, %r8
+	addq	%rcx, %rdi
+	movq	%rdi, 8(%rax)
+	mulxq	16(%rsi), %rdi, %r9
+	adcq	%r8, %rdi
+	movq	%rdi, 16(%rax)
+	mulxq	24(%rsi), %rcx, %rdi
+	adcq	%r9, %rcx
+	movq	%rcx, 24(%rax)
+	mulxq	32(%rsi), %rcx, %r8
+	adcq	%rdi, %rcx
+	movq	%rcx, 32(%rax)
+	mulxq	40(%rsi), %rdi, %r9
+	adcq	%r8, %rdi
+	movq	%rdi, 40(%rax)
+	mulxq	48(%rsi), %rcx, %rdi
+	adcq	%r9, %rcx
+	movq	%rcx, 48(%rax)
+	mulxq	56(%rsi), %rcx, %rdx
+	adcq	%rdi, %rcx
+	movq	%rcx, 56(%rax)
+	adcq	$0, %rdx
+	movq	%rdx, 64(%rax)
 	retq
-.Lfunc_end126:
-	.size	.LmulPv576x64, .Lfunc_end126-.LmulPv576x64
-
-	.globl	mcl_fp_mulUnitPre9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre9Lbmi2,@function
-mcl_fp_mulUnitPre9Lbmi2:                # @mcl_fp_mulUnitPre9Lbmi2
-# BB#0:
-	pushq	%r14
+.Lfunc_end56:
+	.size	mulPv512x64bmi2, .Lfunc_end56-mulPv512x64bmi2
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre8Lbmi2         # -- Begin function mcl_fp_mulUnitPre8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre8Lbmi2,@function
+mcl_fp_mulUnitPre8Lbmi2:                # @mcl_fp_mulUnitPre8Lbmi2
+# %bb.0:
 	pushq	%rbx
-	subq	$88, %rsp
+	subq	$80, %rsp
 	movq	%rdi, %rbx
 	leaq	8(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	80(%rsp), %r8
-	movq	72(%rsp), %r9
-	movq	64(%rsp), %r10
-	movq	56(%rsp), %r11
-	movq	48(%rsp), %r14
-	movq	40(%rsp), %rax
-	movq	32(%rsp), %rcx
-	movq	24(%rsp), %rdx
-	movq	8(%rsp), %rsi
-	movq	16(%rsp), %rdi
-	movq	%rsi, (%rbx)
-	movq	%rdi, 8(%rbx)
-	movq	%rdx, 16(%rbx)
-	movq	%rcx, 24(%rbx)
-	movq	%rax, 32(%rbx)
-	movq	%r14, 40(%rbx)
-	movq	%r11, 48(%rbx)
-	movq	%r10, 56(%rbx)
-	movq	%r9, 64(%rbx)
-	movq	%r8, 72(%rbx)
-	addq	$88, %rsp
+	callq	mulPv512x64bmi2@PLT
+	movq	8(%rsp), %r8
+	movq	16(%rsp), %r9
+	movq	24(%rsp), %r10
+	movq	32(%rsp), %r11
+	movq	40(%rsp), %rdi
+	movq	48(%rsp), %rax
+	movq	56(%rsp), %rcx
+	movq	64(%rsp), %rdx
+	movq	72(%rsp), %rsi
+	movq	%rsi, 64(%rbx)
+	movq	%rdx, 56(%rbx)
+	movq	%rcx, 48(%rbx)
+	movq	%rax, 40(%rbx)
+	movq	%rdi, 32(%rbx)
+	movq	%r11, 24(%rbx)
+	movq	%r10, 16(%rbx)
+	movq	%r9, 8(%rbx)
+	movq	%r8, (%rbx)
+	addq	$80, %rsp
 	popq	%rbx
-	popq	%r14
 	retq
-.Lfunc_end127:
-	.size	mcl_fp_mulUnitPre9Lbmi2, .Lfunc_end127-mcl_fp_mulUnitPre9Lbmi2
-
-	.globl	mcl_fpDbl_mulPre9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre9Lbmi2,@function
-mcl_fpDbl_mulPre9Lbmi2:                 # @mcl_fpDbl_mulPre9Lbmi2
-# BB#0:
+.Lfunc_end57:
+	.size	mcl_fp_mulUnitPre8Lbmi2, .Lfunc_end57-mcl_fp_mulUnitPre8Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre8Lbmi2          # -- Begin function mcl_fpDbl_mulPre8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre8Lbmi2,@function
+mcl_fpDbl_mulPre8Lbmi2:                 # @mcl_fpDbl_mulPre8Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$808, %rsp              # imm = 0x328
+	subq	$648, %rsp                      # imm = 0x288
 	movq	%rdx, %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	%rsi, 72(%rsp)          # 8-byte Spill
-	movq	%rdi, %r12
-	movq	%r12, 80(%rsp)          # 8-byte Spill
-	movq	(%rax), %rdx
-	movq	%rax, %rbx
-	leaq	728(%rsp), %rdi
-	movq	%rsi, %rbp
-	callq	.LmulPv576x64
-	movq	800(%rsp), %r13
-	movq	792(%rsp), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	784(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	776(%rsp), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	768(%rsp), %rax
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	movq	760(%rsp), %rax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	movq	752(%rsp), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	movq	744(%rsp), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	728(%rsp), %rax
-	movq	736(%rsp), %r14
-	movq	%rax, (%r12)
-	movq	8(%rbx), %rdx
-	leaq	648(%rsp), %rdi
-	movq	%rbp, %rsi
-	callq	.LmulPv576x64
-	movq	720(%rsp), %r8
-	movq	712(%rsp), %rcx
-	movq	704(%rsp), %rdx
-	movq	696(%rsp), %rsi
-	movq	688(%rsp), %rdi
-	movq	680(%rsp), %rbp
-	addq	648(%rsp), %r14
-	movq	672(%rsp), %rax
-	movq	656(%rsp), %rbx
-	movq	664(%rsp), %r15
-	movq	%r14, 8(%r12)
-	adcq	8(%rsp), %rbx           # 8-byte Folded Reload
-	adcq	16(%rsp), %r15          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, %r14
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 24(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 32(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 40(%rsp)          # 8-byte Spill
-	adcq	%r13, %rcx
-	movq	%rcx, 48(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	64(%rsp), %r13          # 8-byte Reload
-	movq	16(%r13), %rdx
-	leaq	568(%rsp), %rdi
-	movq	72(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	640(%rsp), %r8
-	movq	632(%rsp), %r9
-	movq	624(%rsp), %r10
-	movq	616(%rsp), %rdi
-	movq	608(%rsp), %rbp
-	movq	600(%rsp), %rcx
-	addq	568(%rsp), %rbx
-	movq	592(%rsp), %rdx
-	movq	576(%rsp), %r12
-	movq	584(%rsp), %rsi
-	movq	80(%rsp), %rax          # 8-byte Reload
-	movq	%rbx, 16(%rax)
-	adcq	%r15, %r12
-	adcq	%r14, %rsi
-	movq	%rsi, (%rsp)            # 8-byte Spill
-	adcq	16(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 8(%rsp)           # 8-byte Spill
-	adcq	24(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 48(%rsp)           # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	24(%r13), %rdx
-	leaq	488(%rsp), %rdi
-	movq	72(%rsp), %r15          # 8-byte Reload
-	movq	%r15, %rsi
-	callq	.LmulPv576x64
-	movq	560(%rsp), %r8
-	movq	552(%rsp), %rcx
-	movq	544(%rsp), %rdx
-	movq	536(%rsp), %rsi
-	movq	528(%rsp), %rdi
-	movq	520(%rsp), %rbp
-	addq	488(%rsp), %r12
-	movq	512(%rsp), %rax
-	movq	496(%rsp), %rbx
-	movq	504(%rsp), %r13
-	movq	80(%rsp), %r14          # 8-byte Reload
-	movq	%r12, 24(%r14)
-	adcq	(%rsp), %rbx            # 8-byte Folded Reload
-	adcq	8(%rsp), %r13           # 8-byte Folded Reload
-	adcq	16(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	adcq	24(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 48(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	64(%rsp), %r12          # 8-byte Reload
-	movq	32(%r12), %rdx
-	leaq	408(%rsp), %rdi
+	movq	%rdi, 32(%rsp)                  # 8-byte Spill
+	movq	(%rdx), %rdx
+	movq	%rax, %r12
+	movq	%rax, 40(%rsp)                  # 8-byte Spill
+	leaq	576(%rsp), %rdi
+	movq	%rsi, %r15
+	callq	mulPv512x64bmi2@PLT
+	movq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	movq	632(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	624(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	616(%rsp), %r13
+	movq	608(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	movq	600(%rsp), %rbp
+	movq	592(%rsp), %rbx
+	movq	576(%rsp), %rax
+	movq	584(%rsp), %r14
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, (%rcx)
+	movq	8(%r12), %rdx
+	leaq	504(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	.LmulPv576x64
-	movq	480(%rsp), %r8
-	movq	472(%rsp), %r9
-	movq	464(%rsp), %rdx
-	movq	456(%rsp), %rsi
-	movq	448(%rsp), %rdi
-	movq	440(%rsp), %rbp
-	addq	408(%rsp), %rbx
-	movq	432(%rsp), %rax
-	movq	416(%rsp), %r15
-	movq	424(%rsp), %rcx
-	movq	%rbx, 32(%r14)
-	adcq	%r13, %r15
-	adcq	8(%rsp), %rcx           # 8-byte Folded Reload
-	movq	%rcx, (%rsp)            # 8-byte Spill
-	adcq	16(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	adcq	24(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 48(%rsp)           # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	%r12, %r14
-	movq	40(%r14), %rdx
-	leaq	328(%rsp), %rdi
-	movq	72(%rsp), %r13          # 8-byte Reload
-	movq	%r13, %rsi
-	callq	.LmulPv576x64
-	movq	400(%rsp), %r8
-	movq	392(%rsp), %r9
-	movq	384(%rsp), %rsi
-	movq	376(%rsp), %rdi
-	movq	368(%rsp), %rbx
-	movq	360(%rsp), %rbp
-	addq	328(%rsp), %r15
-	movq	352(%rsp), %rcx
-	movq	336(%rsp), %r12
-	movq	344(%rsp), %rdx
-	movq	80(%rsp), %rax          # 8-byte Reload
-	movq	%r15, 40(%rax)
-	adcq	(%rsp), %r12            # 8-byte Folded Reload
-	adcq	8(%rsp), %rdx           # 8-byte Folded Reload
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	adcq	16(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 8(%rsp)           # 8-byte Spill
-	adcq	24(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 48(%rsp)           # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	48(%r14), %rdx
-	leaq	248(%rsp), %rdi
-	movq	%r13, %rsi
-	movq	%r13, %r15
-	callq	.LmulPv576x64
-	movq	320(%rsp), %r8
-	movq	312(%rsp), %r9
-	movq	304(%rsp), %rsi
-	movq	296(%rsp), %rdi
-	movq	288(%rsp), %rbx
-	movq	280(%rsp), %rbp
-	addq	248(%rsp), %r12
-	movq	272(%rsp), %rcx
-	movq	256(%rsp), %r13
-	movq	264(%rsp), %rdx
-	movq	80(%rsp), %rax          # 8-byte Reload
-	movq	%r12, 48(%rax)
-	adcq	(%rsp), %r13            # 8-byte Folded Reload
-	adcq	8(%rsp), %rdx           # 8-byte Folded Reload
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	adcq	16(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 8(%rsp)           # 8-byte Spill
-	adcq	24(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 48(%rsp)           # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	56(%r14), %rdx
-	leaq	168(%rsp), %rdi
+	movq	%r15, 56(%rsp)                  # 8-byte Spill
+	callq	mulPv512x64bmi2@PLT
+	movq	568(%rsp), %r12
+	addq	504(%rsp), %r14
+	adcq	512(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  # 8-byte Spill
+	adcq	520(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %rax                  # 8-byte Reload
+	adcq	528(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	adcq	536(%rsp), %r13
+	movq	16(%rsp), %rbp                  # 8-byte Reload
+	adcq	544(%rsp), %rbp
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	552(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rax                    # 8-byte Reload
+	adcq	560(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	movq	%r14, 8(%rax)
+	adcq	$0, %r12
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	432(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	.LmulPv576x64
-	movq	240(%rsp), %rcx
-	movq	232(%rsp), %rdx
-	movq	224(%rsp), %rsi
-	movq	216(%rsp), %rdi
-	movq	208(%rsp), %rbx
-	addq	168(%rsp), %r13
-	movq	200(%rsp), %r12
-	movq	192(%rsp), %rbp
-	movq	176(%rsp), %r14
-	movq	184(%rsp), %r15
-	movq	80(%rsp), %rax          # 8-byte Reload
-	movq	%r13, 56(%rax)
-	adcq	(%rsp), %r14            # 8-byte Folded Reload
-	adcq	8(%rsp), %r15           # 8-byte Folded Reload
-	adcq	16(%rsp), %rbp          # 8-byte Folded Reload
-	adcq	24(%rsp), %r12          # 8-byte Folded Reload
-	adcq	32(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, %r13
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %rax          # 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	88(%rsp), %rdi
-	movq	72(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	88(%rsp), %r14
+	callq	mulPv512x64bmi2@PLT
+	movq	496(%rsp), %r15
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	addq	432(%rsp), %rcx
+	movq	64(%rsp), %rax                  # 8-byte Reload
+	adcq	440(%rsp), %rax
+	movq	%rax, 64(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %rbx                  # 8-byte Reload
+	adcq	448(%rsp), %rbx
+	adcq	456(%rsp), %r13
+	movq	%r13, 24(%rsp)                  # 8-byte Spill
+	adcq	464(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbp                   # 8-byte Reload
+	adcq	472(%rsp), %rbp
+	movq	(%rsp), %rax                    # 8-byte Reload
+	adcq	480(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	adcq	488(%rsp), %r12
+	movq	32(%rsp), %r14                  # 8-byte Reload
+	movq	%rcx, 16(%r14)
+	adcq	$0, %r15
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	360(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	424(%rsp), %r13
+	movq	64(%rsp), %rcx                  # 8-byte Reload
+	addq	360(%rsp), %rcx
+	adcq	368(%rsp), %rbx
+	movq	%rbx, 48(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	376(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	384(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rbx                    # 8-byte Reload
+	adcq	400(%rsp), %rbx
+	adcq	408(%rsp), %r12
+	adcq	416(%rsp), %r15
+	movq	%rcx, 24(%r14)
+	adcq	$0, %r13
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	288(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	352(%rsp), %r14
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	addq	288(%rsp), %rcx
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	296(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	304(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbp                   # 8-byte Reload
+	adcq	312(%rsp), %rbp
+	adcq	320(%rsp), %rbx
+	movq	%rbx, (%rsp)                    # 8-byte Spill
+	adcq	328(%rsp), %r12
+	adcq	336(%rsp), %r15
+	adcq	344(%rsp), %r13
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	movq	%rcx, 32(%rax)
+	adcq	$0, %r14
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	216(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	280(%rsp), %rbx
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	addq	216(%rsp), %rax
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	224(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	adcq	232(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	adcq	248(%rsp), %r12
+	adcq	256(%rsp), %r15
+	adcq	264(%rsp), %r13
+	adcq	272(%rsp), %r14
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, 40(%rcx)
+	adcq	$0, %rbx
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	144(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	208(%rsp), %rbp
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	addq	144(%rsp), %rax
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	152(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	160(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	adcq	168(%rsp), %r12
+	adcq	176(%rsp), %r15
+	adcq	184(%rsp), %r13
+	adcq	192(%rsp), %r14
+	adcq	200(%rsp), %rbx
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, 48(%rcx)
+	adcq	$0, %rbp
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	56(%rax), %rdx
+	leaq	72(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	136(%rsp), %rax
+	movq	8(%rsp), %rsi                   # 8-byte Reload
+	addq	72(%rsp), %rsi
+	movq	(%rsp), %rdx                    # 8-byte Reload
+	adcq	80(%rsp), %rdx
+	adcq	88(%rsp), %r12
 	adcq	96(%rsp), %r15
-	movq	160(%rsp), %r8
-	adcq	104(%rsp), %rbp
-	movq	152(%rsp), %r9
-	movq	144(%rsp), %rdx
-	movq	136(%rsp), %rsi
-	movq	128(%rsp), %rdi
-	movq	120(%rsp), %rbx
-	movq	112(%rsp), %rax
-	movq	80(%rsp), %rcx          # 8-byte Reload
-	movq	%r14, 64(%rcx)
-	movq	%r15, 72(%rcx)
-	adcq	%r12, %rax
-	movq	%rbp, 80(%rcx)
-	movq	%rax, 88(%rcx)
-	adcq	%r13, %rbx
-	movq	%rbx, 96(%rcx)
-	adcq	32(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 104(%rcx)
-	adcq	40(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 112(%rcx)
-	adcq	48(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 120(%rcx)
-	adcq	56(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 128(%rcx)
-	adcq	$0, %r8
-	movq	%r8, 136(%rcx)
-	addq	$808, %rsp              # imm = 0x328
+	adcq	104(%rsp), %r13
+	adcq	112(%rsp), %r14
+	adcq	120(%rsp), %rbx
+	adcq	128(%rsp), %rbp
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	movq	%rbp, 112(%rcx)
+	movq	%rbx, 104(%rcx)
+	movq	%r14, 96(%rcx)
+	movq	%r13, 88(%rcx)
+	movq	%r15, 80(%rcx)
+	movq	%r12, 72(%rcx)
+	movq	%rdx, 64(%rcx)
+	movq	%rsi, 56(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 120(%rcx)
+	addq	$648, %rsp                      # imm = 0x288
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -11614,298 +4707,219 @@ mcl_fpDbl_mulPre9Lbmi2:                 # @mcl_fpDbl_mulPre9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end128:
-	.size	mcl_fpDbl_mulPre9Lbmi2, .Lfunc_end128-mcl_fpDbl_mulPre9Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre9Lbmi2,@function
-mcl_fpDbl_sqrPre9Lbmi2:                 # @mcl_fpDbl_sqrPre9Lbmi2
-# BB#0:
+.Lfunc_end58:
+	.size	mcl_fpDbl_mulPre8Lbmi2, .Lfunc_end58-mcl_fpDbl_mulPre8Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre8Lbmi2          # -- Begin function mcl_fpDbl_sqrPre8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre8Lbmi2,@function
+mcl_fpDbl_sqrPre8Lbmi2:                 # @mcl_fpDbl_sqrPre8Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$808, %rsp              # imm = 0x328
+	subq	$648, %rsp                      # imm = 0x288
 	movq	%rsi, %r15
-	movq	%r15, 80(%rsp)          # 8-byte Spill
-	movq	%rdi, %r14
-	movq	%r14, 72(%rsp)          # 8-byte Spill
-	movq	(%r15), %rdx
-	leaq	728(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	800(%rsp), %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	792(%rsp), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	784(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	776(%rsp), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	768(%rsp), %rax
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	movq	760(%rsp), %rax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	movq	752(%rsp), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	movq	744(%rsp), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	728(%rsp), %rax
-	movq	736(%rsp), %r12
-	movq	%rax, (%r14)
+	movq	%rdi, %r12
+	movq	%rdi, 56(%rsp)                  # 8-byte Spill
+	movq	(%rsi), %rdx
+	leaq	576(%rsp), %rdi
+	callq	mulPv512x64bmi2@PLT
+	movq	640(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	632(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	624(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	movq	616(%rsp), %rax
+	movq	%rax, 40(%rsp)                  # 8-byte Spill
+	movq	608(%rsp), %r13
+	movq	600(%rsp), %rbp
+	movq	592(%rsp), %rbx
+	movq	576(%rsp), %rax
+	movq	584(%rsp), %r14
+	movq	%rax, (%r12)
 	movq	8(%r15), %rdx
-	leaq	648(%rsp), %rdi
+	leaq	504(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	.LmulPv576x64
-	movq	720(%rsp), %r8
-	movq	712(%rsp), %rcx
-	movq	704(%rsp), %rdx
-	movq	696(%rsp), %rsi
-	movq	688(%rsp), %rdi
-	movq	680(%rsp), %rbp
-	addq	648(%rsp), %r12
-	movq	672(%rsp), %rax
-	movq	656(%rsp), %rbx
-	movq	664(%rsp), %r13
-	movq	%r12, 8(%r14)
-	adcq	8(%rsp), %rbx           # 8-byte Folded Reload
-	adcq	16(%rsp), %r13          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
+	callq	mulPv512x64bmi2@PLT
+	movq	568(%rsp), %rax
+	addq	504(%rsp), %r14
+	adcq	512(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	adcq	520(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  # 8-byte Spill
+	adcq	528(%rsp), %r13
+	movq	%r13, %rbx
+	movq	40(%rsp), %r13                  # 8-byte Reload
+	adcq	536(%rsp), %r13
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	adcq	552(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %r12                  # 8-byte Reload
+	adcq	560(%rsp), %r12
+	movq	56(%rsp), %rcx                  # 8-byte Reload
+	movq	%r14, 8(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
 	movq	16(%r15), %rdx
-	leaq	568(%rsp), %rdi
+	leaq	432(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	.LmulPv576x64
-	movq	640(%rsp), %r8
-	movq	632(%rsp), %rcx
-	movq	624(%rsp), %rdx
-	movq	616(%rsp), %rsi
-	movq	608(%rsp), %rdi
-	movq	600(%rsp), %rbp
-	addq	568(%rsp), %rbx
-	movq	592(%rsp), %rax
-	movq	576(%rsp), %r14
-	movq	584(%rsp), %r12
-	movq	72(%rsp), %r15          # 8-byte Reload
-	movq	%rbx, 16(%r15)
-	adcq	%r13, %r14
-	adcq	16(%rsp), %r12          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	24(%rsi), %rdx
-	leaq	488(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	560(%rsp), %r8
-	movq	552(%rsp), %rcx
-	movq	544(%rsp), %rdx
-	movq	536(%rsp), %rsi
-	movq	528(%rsp), %rdi
-	movq	520(%rsp), %rbp
-	addq	488(%rsp), %r14
-	movq	512(%rsp), %rax
-	movq	496(%rsp), %rbx
-	movq	504(%rsp), %r13
-	movq	%r14, 24(%r15)
-	adcq	%r12, %rbx
-	adcq	16(%rsp), %r13          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	32(%rsi), %rdx
-	leaq	408(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	480(%rsp), %r8
-	movq	472(%rsp), %rcx
-	movq	464(%rsp), %rdx
-	movq	456(%rsp), %rsi
-	movq	448(%rsp), %rdi
-	movq	440(%rsp), %rbp
-	addq	408(%rsp), %rbx
-	movq	432(%rsp), %rax
-	movq	416(%rsp), %r14
-	movq	424(%rsp), %r12
-	movq	%rbx, 32(%r15)
-	adcq	%r13, %r14
-	adcq	16(%rsp), %r12          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	40(%rsi), %rdx
-	leaq	328(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	400(%rsp), %r8
-	movq	392(%rsp), %rcx
-	movq	384(%rsp), %rdx
-	movq	376(%rsp), %rsi
-	movq	368(%rsp), %rdi
-	movq	360(%rsp), %rbp
-	addq	328(%rsp), %r14
-	movq	352(%rsp), %rax
-	movq	336(%rsp), %rbx
-	movq	344(%rsp), %r13
-	movq	%r14, 40(%r15)
-	adcq	%r12, %rbx
-	adcq	16(%rsp), %r13          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	48(%rsi), %rdx
-	leaq	248(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	320(%rsp), %r8
-	movq	312(%rsp), %rcx
-	movq	304(%rsp), %rdx
-	movq	296(%rsp), %rsi
-	movq	288(%rsp), %rdi
+	callq	mulPv512x64bmi2@PLT
+	movq	496(%rsp), %rax
+	movq	8(%rsp), %rdx                   # 8-byte Reload
+	addq	432(%rsp), %rdx
+	movq	64(%rsp), %rcx                  # 8-byte Reload
+	adcq	440(%rsp), %rcx
+	movq	%rcx, 64(%rsp)                  # 8-byte Spill
+	adcq	448(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  # 8-byte Spill
+	adcq	456(%rsp), %r13
+	movq	48(%rsp), %rbx                  # 8-byte Reload
+	adcq	464(%rsp), %rbx
+	movq	32(%rsp), %rbp                  # 8-byte Reload
+	adcq	472(%rsp), %rbp
+	adcq	480(%rsp), %r12
+	movq	%r12, 24(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	488(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	movq	56(%rsp), %r12                  # 8-byte Reload
+	movq	%rdx, 16(%r12)
+	adcq	$0, %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	24(%r15), %rdx
+	leaq	360(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	mulPv512x64bmi2@PLT
+	movq	424(%rsp), %r14
+	movq	64(%rsp), %rax                  # 8-byte Reload
+	addq	360(%rsp), %rax
+	movq	40(%rsp), %rcx                  # 8-byte Reload
+	adcq	368(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	adcq	376(%rsp), %r13
+	adcq	384(%rsp), %rbx
+	movq	%rbx, 48(%rsp)                  # 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, %rbx
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	400(%rsp), %rbp
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	408(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	416(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	movq	%rax, 24(%r12)
+	adcq	$0, %r14
+	movq	32(%r15), %rdx
+	leaq	288(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	mulPv512x64bmi2@PLT
+	movq	352(%rsp), %r12
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	addq	288(%rsp), %rax
+	adcq	296(%rsp), %r13
+	movq	%r13, 40(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %r13                  # 8-byte Reload
+	adcq	304(%rsp), %r13
+	adcq	312(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  # 8-byte Spill
+	adcq	320(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rbx                  # 8-byte Reload
+	adcq	328(%rsp), %rbx
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	336(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	adcq	344(%rsp), %r14
+	movq	56(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, 32(%rcx)
+	adcq	$0, %r12
+	movq	40(%r15), %rdx
+	leaq	216(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	mulPv512x64bmi2@PLT
 	movq	280(%rsp), %rbp
-	addq	248(%rsp), %rbx
-	movq	272(%rsp), %rax
-	movq	256(%rsp), %r12
-	movq	264(%rsp), %r14
-	movq	%rbx, 48(%r15)
-	adcq	%r13, %r12
-	adcq	16(%rsp), %r14          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	56(%rsi), %rdx
-	leaq	168(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	240(%rsp), %r8
-	movq	232(%rsp), %rdx
-	movq	224(%rsp), %rsi
-	movq	216(%rsp), %rdi
-	movq	208(%rsp), %rbx
-	movq	200(%rsp), %rcx
-	addq	168(%rsp), %r12
-	movq	192(%rsp), %r15
-	movq	176(%rsp), %r13
-	movq	184(%rsp), %rbp
-	movq	72(%rsp), %rax          # 8-byte Reload
-	movq	%r12, 56(%rax)
-	adcq	%r14, %r13
-	adcq	16(%rsp), %rbp          # 8-byte Folded Reload
-	adcq	24(%rsp), %r15          # 8-byte Folded Reload
-	adcq	32(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, %r12
-	adcq	40(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, %r14
-	adcq	48(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	64(%rsi), %rdx
-	leaq	88(%rsp), %rdi
-	callq	.LmulPv576x64
-	addq	88(%rsp), %r13
-	adcq	96(%rsp), %rbp
-	movq	160(%rsp), %r8
-	adcq	104(%rsp), %r15
-	movq	152(%rsp), %r9
-	movq	144(%rsp), %rdx
-	movq	136(%rsp), %rsi
-	movq	128(%rsp), %rdi
-	movq	120(%rsp), %rbx
-	movq	112(%rsp), %rax
-	movq	72(%rsp), %rcx          # 8-byte Reload
-	movq	%r13, 64(%rcx)
-	movq	%rbp, 72(%rcx)
-	adcq	%r12, %rax
-	movq	%r15, 80(%rcx)
-	movq	%rax, 88(%rcx)
-	adcq	%r14, %rbx
-	movq	%rbx, 96(%rcx)
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 104(%rcx)
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 112(%rcx)
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 120(%rcx)
-	adcq	64(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 128(%rcx)
-	adcq	$0, %r8
-	movq	%r8, 136(%rcx)
-	addq	$808, %rsp              # imm = 0x328
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	addq	216(%rsp), %rax
+	adcq	224(%rsp), %r13
+	movq	%r13, 48(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	adcq	232(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	adcq	248(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	256(%rsp), %rbx
+	adcq	264(%rsp), %r14
+	adcq	272(%rsp), %r12
+	movq	56(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, 40(%rcx)
+	adcq	$0, %rbp
+	movq	48(%r15), %rdx
+	leaq	144(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	mulPv512x64bmi2@PLT
+	movq	208(%rsp), %r13
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	addq	144(%rsp), %rcx
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	adcq	152(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	160(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	168(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	adcq	176(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	adcq	184(%rsp), %r14
+	adcq	192(%rsp), %r12
+	adcq	200(%rsp), %rbp
+	movq	56(%rsp), %rax                  # 8-byte Reload
+	movq	%rcx, 48(%rax)
+	adcq	$0, %r13
+	movq	56(%r15), %rdx
+	leaq	72(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	mulPv512x64bmi2@PLT
+	movq	136(%rsp), %rax
+	movq	32(%rsp), %rsi                  # 8-byte Reload
+	addq	72(%rsp), %rsi
+	movq	24(%rsp), %rdi                  # 8-byte Reload
+	adcq	80(%rsp), %rdi
+	movq	16(%rsp), %rbx                  # 8-byte Reload
+	adcq	88(%rsp), %rbx
+	movq	8(%rsp), %rdx                   # 8-byte Reload
+	adcq	96(%rsp), %rdx
+	adcq	104(%rsp), %r14
+	adcq	112(%rsp), %r12
+	adcq	120(%rsp), %rbp
+	adcq	128(%rsp), %r13
+	movq	56(%rsp), %rcx                  # 8-byte Reload
+	movq	%r13, 112(%rcx)
+	movq	%rbp, 104(%rcx)
+	movq	%r12, 96(%rcx)
+	movq	%r14, 88(%rcx)
+	movq	%rdx, 80(%rcx)
+	movq	%rbx, 72(%rcx)
+	movq	%rdi, 64(%rcx)
+	movq	%rsi, 56(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 120(%rcx)
+	addq	$648, %rsp                      # imm = 0x288
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -11913,559 +4927,859 @@ mcl_fpDbl_sqrPre9Lbmi2:                 # @mcl_fpDbl_sqrPre9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end129:
-	.size	mcl_fpDbl_sqrPre9Lbmi2, .Lfunc_end129-mcl_fpDbl_sqrPre9Lbmi2
-
-	.globl	mcl_fp_mont9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont9Lbmi2,@function
-mcl_fp_mont9Lbmi2:                      # @mcl_fp_mont9Lbmi2
-# BB#0:
+.Lfunc_end59:
+	.size	mcl_fpDbl_sqrPre8Lbmi2, .Lfunc_end59-mcl_fpDbl_sqrPre8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_mont8Lbmi2               # -- Begin function mcl_fp_mont8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mont8Lbmi2,@function
+mcl_fp_mont8Lbmi2:                      # @mcl_fp_mont8Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$1560, %rsp             # imm = 0x618
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	movq	%rdx, 32(%rsp)          # 8-byte Spill
-	movq	%rsi, 24(%rsp)          # 8-byte Spill
-	movq	%rdi, (%rsp)            # 8-byte Spill
+	subq	$1256, %rsp                     # imm = 0x4E8
+	movq	%rcx, %r13
+	movq	%rdx, 80(%rsp)                  # 8-byte Spill
+	movq	%rsi, 88(%rsp)                  # 8-byte Spill
+	movq	%rdi, 96(%rsp)                  # 8-byte Spill
 	movq	-8(%rcx), %rbx
-	movq	%rbx, 16(%rsp)          # 8-byte Spill
+	movq	%rbx, 72(%rsp)                  # 8-byte Spill
+	movq	%rcx, 56(%rsp)                  # 8-byte Spill
 	movq	(%rdx), %rdx
-	leaq	1480(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	1480(%rsp), %r14
-	movq	1488(%rsp), %r15
-	movq	%r14, %rdx
-	imulq	%rbx, %rdx
-	movq	1552(%rsp), %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	movq	1544(%rsp), %rax
-	movq	%rax, 104(%rsp)         # 8-byte Spill
-	movq	1536(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	1528(%rsp), %r12
-	movq	1520(%rsp), %r13
-	movq	1512(%rsp), %rbx
-	movq	1504(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	1496(%rsp), %rbp
-	leaq	1400(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	1400(%rsp), %r14
-	adcq	1408(%rsp), %r15
-	adcq	1416(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	1424(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	adcq	1432(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	adcq	1440(%rsp), %r13
-	movq	%r13, 64(%rsp)          # 8-byte Spill
-	adcq	1448(%rsp), %r12
-	movq	%r12, 48(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %rbx          # 8-byte Reload
-	adcq	1456(%rsp), %rbx
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	1464(%rsp), %r14
-	movq	112(%rsp), %r13         # 8-byte Reload
-	adcq	1472(%rsp), %r13
-	sbbq	%rbp, %rbp
-	movq	32(%rsp), %rax          # 8-byte Reload
+	leaq	1184(%rsp), %rdi
+	callq	mulPv512x64bmi2@PLT
+	movq	1184(%rsp), %r15
+	movq	1192(%rsp), %r12
+	movq	%rbx, %rdx
+	imulq	%r15, %rdx
+	movq	1248(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	1240(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	1232(%rsp), %r14
+	movq	1224(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	movq	1216(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	1208(%rsp), %rbx
+	movq	1200(%rsp), %rbp
+	leaq	1112(%rsp), %rdi
+	movq	%r13, %rsi
+	callq	mulPv512x64bmi2@PLT
+	addq	1112(%rsp), %r15
+	adcq	1120(%rsp), %r12
+	adcq	1128(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  # 8-byte Spill
+	adcq	1136(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	1144(%rsp), %rbp
+	movq	(%rsp), %r15                    # 8-byte Reload
+	adcq	1152(%rsp), %r15
+	adcq	1160(%rsp), %r14
+	movq	%r14, 48(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %r13                  # 8-byte Reload
+	adcq	1168(%rsp), %r13
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	1176(%rsp), %rbx
+	setb	%r14b
+	movq	80(%rsp), %rax                  # 8-byte Reload
 	movq	8(%rax), %rdx
-	leaq	1320(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %ebp
-	addq	1320(%rsp), %r15
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	1328(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	1336(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %r12          # 8-byte Reload
-	adcq	1344(%rsp), %r12
-	movq	64(%rsp), %rax          # 8-byte Reload
-	adcq	1352(%rsp), %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rax          # 8-byte Reload
-	adcq	1360(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	adcq	1368(%rsp), %rbx
-	adcq	1376(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
-	adcq	1384(%rsp), %r13
-	movq	%r13, 112(%rsp)         # 8-byte Spill
-	adcq	1392(%rsp), %rbp
-	sbbq	%r14, %r14
-	movq	%r15, %rdx
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	1240(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	%r14, %rax
-	andl	$1, %eax
-	addq	1240(%rsp), %r15
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	1248(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %r14          # 8-byte Reload
-	adcq	1256(%rsp), %r14
-	adcq	1264(%rsp), %r12
-	movq	%r12, 40(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r12          # 8-byte Reload
-	adcq	1272(%rsp), %r12
-	movq	48(%rsp), %r13          # 8-byte Reload
-	adcq	1280(%rsp), %r13
-	adcq	1288(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %r15         # 8-byte Reload
-	adcq	1296(%rsp), %r15
-	movq	112(%rsp), %rbx         # 8-byte Reload
-	adcq	1304(%rsp), %rbx
-	adcq	1312(%rsp), %rbp
-	adcq	$0, %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
+	leaq	1040(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movzbl	%r14b, %ecx
+	addq	1040(%rsp), %r12
+	movq	64(%rsp), %r14                  # 8-byte Reload
+	adcq	1048(%rsp), %r14
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	adcq	1056(%rsp), %rax
+	movq	%rax, 40(%rsp)                  # 8-byte Spill
+	adcq	1064(%rsp), %rbp
+	adcq	1072(%rsp), %r15
+	movq	%r15, (%rsp)                    # 8-byte Spill
+	movq	48(%rsp), %rax                  # 8-byte Reload
+	adcq	1080(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	adcq	1088(%rsp), %r13
+	movq	%r13, 16(%rsp)                  # 8-byte Spill
+	adcq	1096(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	adcq	1104(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	setb	%r15b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r12, %rdx
+	leaq	968(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movzbl	%r15b, %r15d
+	addq	968(%rsp), %r12
+	adcq	976(%rsp), %r14
+	movq	%r14, 64(%rsp)                  # 8-byte Spill
+	movq	40(%rsp), %r13                  # 8-byte Reload
+	adcq	984(%rsp), %r13
+	adcq	992(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	(%rsp), %r12                    # 8-byte Reload
+	adcq	1000(%rsp), %r12
+	movq	48(%rsp), %r14                  # 8-byte Reload
+	adcq	1008(%rsp), %r14
+	movq	16(%rsp), %rbx                  # 8-byte Reload
+	adcq	1016(%rsp), %rbx
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	1024(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	32(%rsp), %rbp                  # 8-byte Reload
+	adcq	1032(%rsp), %rbp
+	adcq	$0, %r15
+	movq	80(%rsp), %rax                  # 8-byte Reload
 	movq	16(%rax), %rdx
-	leaq	1160(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	96(%rsp), %rax          # 8-byte Reload
-	addq	1160(%rsp), %rax
-	adcq	1168(%rsp), %r14
-	movq	%r14, 80(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %r14          # 8-byte Reload
-	adcq	1176(%rsp), %r14
-	adcq	1184(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	movq	%r13, %r12
-	adcq	1192(%rsp), %r12
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	1200(%rsp), %rcx
-	movq	%rcx, 88(%rsp)          # 8-byte Spill
-	adcq	1208(%rsp), %r15
+	leaq	896(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	64(%rsp), %rax                  # 8-byte Reload
+	addq	896(%rsp), %rax
+	adcq	904(%rsp), %r13
+	movq	%r13, 40(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %r13                  # 8-byte Reload
+	adcq	912(%rsp), %r13
+	adcq	920(%rsp), %r12
+	adcq	928(%rsp), %r14
+	movq	%r14, 48(%rsp)                  # 8-byte Spill
+	adcq	936(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	944(%rsp), %rbx
+	adcq	952(%rsp), %rbp
+	movq	%rbp, 32(%rsp)                  # 8-byte Spill
+	adcq	960(%rsp), %r15
+	setb	%r14b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	824(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movzbl	%r14b, %eax
+	addq	824(%rsp), %rbp
+	movq	40(%rsp), %r14                  # 8-byte Reload
+	adcq	832(%rsp), %r14
+	adcq	840(%rsp), %r13
+	movq	%r13, 24(%rsp)                  # 8-byte Spill
+	adcq	848(%rsp), %r12
+	movq	%r12, (%rsp)                    # 8-byte Spill
+	movq	48(%rsp), %r12                  # 8-byte Reload
+	adcq	856(%rsp), %r12
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	864(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	movq	%rbx, %rbp
+	adcq	872(%rsp), %rbp
+	movq	32(%rsp), %r13                  # 8-byte Reload
+	adcq	880(%rsp), %r13
+	adcq	888(%rsp), %r15
+	movq	%rax, %rbx
+	adcq	$0, %rbx
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	752(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	%r14, %rax
+	addq	752(%rsp), %rax
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	760(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	movq	(%rsp), %r14                    # 8-byte Reload
+	adcq	768(%rsp), %r14
+	adcq	776(%rsp), %r12
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	784(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	adcq	792(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	adcq	800(%rsp), %r13
+	movq	%r13, 32(%rsp)                  # 8-byte Spill
+	adcq	808(%rsp), %r15
 	movq	%r15, %r13
-	adcq	1216(%rsp), %rbx
-	movq	%rbx, 112(%rsp)         # 8-byte Spill
-	adcq	1224(%rsp), %rbp
-	movq	72(%rsp), %rcx          # 8-byte Reload
-	adcq	1232(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%rax, %rdx
+	adcq	816(%rsp), %rbx
+	setb	%r15b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	680(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movzbl	%r15b, %eax
+	addq	680(%rsp), %rbp
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	688(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	adcq	696(%rsp), %r14
+	movq	%r14, (%rsp)                    # 8-byte Spill
+	adcq	704(%rsp), %r12
+	movq	16(%rsp), %rbp                  # 8-byte Reload
+	adcq	712(%rsp), %rbp
+	movq	8(%rsp), %r14                   # 8-byte Reload
+	adcq	720(%rsp), %r14
+	movq	32(%rsp), %r15                  # 8-byte Reload
+	adcq	728(%rsp), %r15
+	adcq	736(%rsp), %r13
+	movq	%r13, 40(%rsp)                  # 8-byte Spill
+	adcq	744(%rsp), %rbx
+	adcq	$0, %rax
+	movq	%rax, %r13
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	608(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	addq	608(%rsp), %rax
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	616(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	adcq	624(%rsp), %r12
+	adcq	632(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  # 8-byte Spill
+	adcq	640(%rsp), %r14
+	movq	%r14, 8(%rsp)                   # 8-byte Spill
+	adcq	648(%rsp), %r15
+	movq	%r15, 32(%rsp)                  # 8-byte Spill
+	movq	40(%rsp), %rbp                  # 8-byte Reload
+	adcq	656(%rsp), %rbp
+	adcq	664(%rsp), %rbx
+	movq	%rbx, %r15
+	adcq	672(%rsp), %r13
+	setb	%r14b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
 	movq	%rax, %rbx
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	1080(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
+	leaq	536(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movzbl	%r14b, %eax
+	addq	536(%rsp), %rbx
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	adcq	552(%rsp), %r12
+	movq	%r12, 48(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %r12                  # 8-byte Reload
+	adcq	560(%rsp), %r12
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	568(%rsp), %rbx
+	movq	32(%rsp), %r14                  # 8-byte Reload
+	adcq	576(%rsp), %r14
+	adcq	584(%rsp), %rbp
+	adcq	592(%rsp), %r15
+	movq	%r15, 64(%rsp)                  # 8-byte Spill
+	adcq	600(%rsp), %r13
+	movq	%r13, 24(%rsp)                  # 8-byte Spill
+	adcq	$0, %rax
+	movq	%rax, %r13
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	464(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	(%rsp), %rax                    # 8-byte Reload
+	addq	464(%rsp), %rax
+	movq	48(%rsp), %r15                  # 8-byte Reload
+	adcq	472(%rsp), %r15
+	adcq	480(%rsp), %r12
+	movq	%r12, 16(%rsp)                  # 8-byte Spill
+	adcq	488(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	adcq	496(%rsp), %r14
+	movq	%r14, %r12
+	adcq	504(%rsp), %rbp
+	movq	64(%rsp), %rcx                  # 8-byte Reload
+	adcq	512(%rsp), %rcx
+	movq	%rcx, 64(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	520(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	adcq	528(%rsp), %r13
+	movq	%r13, (%rsp)                    # 8-byte Spill
+	setb	%r14b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	392(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movzbl	%r14b, %eax
+	addq	392(%rsp), %rbx
+	adcq	400(%rsp), %r15
+	movq	%r15, 48(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rbx                  # 8-byte Reload
+	adcq	408(%rsp), %rbx
+	movq	8(%rsp), %r14                   # 8-byte Reload
+	adcq	416(%rsp), %r14
+	adcq	424(%rsp), %r12
+	movq	%r12, 32(%rsp)                  # 8-byte Spill
+	adcq	432(%rsp), %rbp
+	movq	%rbp, 40(%rsp)                  # 8-byte Spill
+	movq	64(%rsp), %rbp                  # 8-byte Reload
+	adcq	440(%rsp), %rbp
+	movq	24(%rsp), %r13                  # 8-byte Reload
+	adcq	448(%rsp), %r13
+	movq	(%rsp), %r12                    # 8-byte Reload
+	adcq	456(%rsp), %r12
+	movq	%rax, %r15
+	adcq	$0, %r15
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	320(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	leaq	248(%rsp), %rdi
+	movq	48(%rsp), %rax                  # 8-byte Reload
+	addq	320(%rsp), %rax
+	adcq	328(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  # 8-byte Spill
+	adcq	336(%rsp), %r14
+	movq	%r14, 8(%rsp)                   # 8-byte Spill
+	movq	32(%rsp), %rbx                  # 8-byte Reload
+	adcq	344(%rsp), %rbx
+	movq	40(%rsp), %rcx                  # 8-byte Reload
+	adcq	352(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	adcq	360(%rsp), %rbp
+	adcq	368(%rsp), %r13
+	adcq	376(%rsp), %r12
+	movq	%r12, (%rsp)                    # 8-byte Spill
+	adcq	384(%rsp), %r15
+	movq	%r15, 48(%rsp)                  # 8-byte Spill
+	setb	%r12b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %r14
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movzbl	%r12b, %r12d
+	addq	248(%rsp), %r14
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	256(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %r15                   # 8-byte Reload
+	adcq	264(%rsp), %r15
+	adcq	272(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  # 8-byte Spill
+	movq	40(%rsp), %rbx                  # 8-byte Reload
+	adcq	280(%rsp), %rbx
+	adcq	288(%rsp), %rbp
+	adcq	296(%rsp), %r13
+	movq	(%rsp), %r14                    # 8-byte Reload
+	adcq	304(%rsp), %r14
+	movq	48(%rsp), %rax                  # 8-byte Reload
+	adcq	312(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	adcq	$0, %r12
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	56(%rax), %rdx
+	leaq	176(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	addq	176(%rsp), %rax
+	adcq	184(%rsp), %r15
+	movq	%r15, 8(%rsp)                   # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	adcq	192(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	adcq	200(%rsp), %rbx
+	adcq	208(%rsp), %rbp
+	adcq	216(%rsp), %r13
+	movq	%r13, 24(%rsp)                  # 8-byte Spill
+	adcq	224(%rsp), %r14
+	movq	%r14, (%rsp)                    # 8-byte Spill
+	movq	48(%rsp), %r15                  # 8-byte Reload
+	adcq	232(%rsp), %r15
+	adcq	240(%rsp), %r12
+	setb	%r14b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %r13
+	leaq	104(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movzbl	%r14b, %r9d
+	addq	104(%rsp), %r13
+	movq	8(%rsp), %r11                   # 8-byte Reload
+	adcq	112(%rsp), %r11
+	movq	%r11, 8(%rsp)                   # 8-byte Spill
+	movq	32(%rsp), %r10                  # 8-byte Reload
+	adcq	120(%rsp), %r10
+	movq	%r10, 32(%rsp)                  # 8-byte Spill
+	movq	%rbx, %r8
+	adcq	128(%rsp), %r8
+	movq	%r8, 40(%rsp)                   # 8-byte Spill
+	movq	%rbp, %r13
+	adcq	136(%rsp), %r13
+	movq	24(%rsp), %r14                  # 8-byte Reload
+	adcq	144(%rsp), %r14
+	movq	(%rsp), %rsi                    # 8-byte Reload
+	adcq	152(%rsp), %rsi
+	adcq	160(%rsp), %r15
+	adcq	168(%rsp), %r12
+	adcq	$0, %r9
+	movq	56(%rsp), %rcx                  # 8-byte Reload
+	subq	(%rcx), %r11
+	sbbq	8(%rcx), %r10
+	sbbq	16(%rcx), %r8
+	movq	%r13, %rdi
+	sbbq	24(%rcx), %rdi
+	movq	%r14, %rbx
+	sbbq	32(%rcx), %rbx
+	movq	%rsi, %rbp
+	sbbq	40(%rcx), %rbp
 	movq	%r15, %rax
-	andl	$1, %eax
-	addq	1080(%rsp), %rbx
-	movq	80(%rsp), %rcx          # 8-byte Reload
-	adcq	1088(%rsp), %rcx
-	movq	%rcx, 80(%rsp)          # 8-byte Spill
-	movq	%r14, %r15
-	adcq	1096(%rsp), %r15
-	movq	64(%rsp), %r14          # 8-byte Reload
-	adcq	1104(%rsp), %r14
-	movq	%r12, %rbx
-	adcq	1112(%rsp), %rbx
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	1120(%rsp), %rcx
-	movq	%rcx, 88(%rsp)          # 8-byte Spill
+	sbbq	48(%rcx), %rax
+	movq	%rcx, %rdx
+	movq	%r12, %rcx
+	sbbq	56(%rdx), %rcx
+	sbbq	$0, %r9
+	testb	$1, %r9b
+	cmovneq	%r12, %rcx
+	movq	96(%rsp), %rdx                  # 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovneq	%r15, %rax
+	movq	%rax, 48(%rdx)
+	cmovneq	%rsi, %rbp
+	movq	%rbp, 40(%rdx)
+	cmovneq	%r14, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovneq	%r13, %rdi
+	movq	%rdi, 24(%rdx)
+	cmovneq	40(%rsp), %r8                   # 8-byte Folded Reload
+	movq	%r8, 16(%rdx)
+	cmovneq	32(%rsp), %r10                  # 8-byte Folded Reload
+	movq	%r10, 8(%rdx)
+	cmovneq	8(%rsp), %r11                   # 8-byte Folded Reload
+	movq	%r11, (%rdx)
+	addq	$1256, %rsp                     # imm = 0x4E8
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+.Lfunc_end60:
+	.size	mcl_fp_mont8Lbmi2, .Lfunc_end60-mcl_fp_mont8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montNF8Lbmi2             # -- Begin function mcl_fp_montNF8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF8Lbmi2,@function
+mcl_fp_montNF8Lbmi2:                    # @mcl_fp_montNF8Lbmi2
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	subq	$1256, %rsp                     # imm = 0x4E8
+	movq	%rcx, %rbp
+	movq	%rdx, 88(%rsp)                  # 8-byte Spill
+	movq	%rsi, 80(%rsp)                  # 8-byte Spill
+	movq	%rdi, 96(%rsp)                  # 8-byte Spill
+	movq	-8(%rcx), %rbx
+	movq	%rbx, 64(%rsp)                  # 8-byte Spill
+	movq	%rcx, 72(%rsp)                  # 8-byte Spill
+	movq	(%rdx), %rdx
+	leaq	1184(%rsp), %rdi
+	callq	mulPv512x64bmi2@PLT
+	movq	1184(%rsp), %r15
+	movq	1192(%rsp), %r12
+	movq	%rbx, %rdx
+	imulq	%r15, %rdx
+	movq	1248(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	1240(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	1232(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	1224(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	1216(%rsp), %r14
+	movq	1208(%rsp), %rbx
+	movq	1200(%rsp), %r13
+	leaq	1112(%rsp), %rdi
+	movq	%rbp, %rsi
+	callq	mulPv512x64bmi2@PLT
+	addq	1112(%rsp), %r15
+	adcq	1120(%rsp), %r12
 	adcq	1128(%rsp), %r13
-	movq	%r13, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %r13         # 8-byte Reload
-	adcq	1136(%rsp), %r13
-	adcq	1144(%rsp), %rbp
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	1152(%rsp), %r12
-	adcq	$0, %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	1000(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	80(%rsp), %rax          # 8-byte Reload
-	addq	1000(%rsp), %rax
-	adcq	1008(%rsp), %r15
-	movq	%r15, 40(%rsp)          # 8-byte Spill
-	adcq	1016(%rsp), %r14
+	adcq	1136(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  # 8-byte Spill
 	movq	%r14, %r15
-	adcq	1024(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	1032(%rsp), %r14
-	movq	104(%rsp), %rcx         # 8-byte Reload
-	adcq	1040(%rsp), %rcx
-	movq	%rcx, 104(%rsp)         # 8-byte Spill
+	adcq	1144(%rsp), %r15
+	movq	16(%rsp), %rbx                  # 8-byte Reload
+	adcq	1152(%rsp), %rbx
+	movq	32(%rsp), %r14                  # 8-byte Reload
+	adcq	1160(%rsp), %r14
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	1168(%rsp), %rbp
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	1176(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	8(%rax), %rdx
+	leaq	1040(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	1104(%rsp), %rcx
+	addq	1040(%rsp), %r12
 	adcq	1048(%rsp), %r13
-	movq	%r13, 112(%rsp)         # 8-byte Spill
-	adcq	1056(%rsp), %rbp
-	adcq	1064(%rsp), %r12
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	1072(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	920(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	920(%rsp), %r13
-	movq	40(%rsp), %rcx          # 8-byte Reload
-	adcq	928(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          # 8-byte Spill
-	adcq	936(%rsp), %r15
-	movq	%r15, 64(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r15          # 8-byte Reload
-	adcq	944(%rsp), %r15
-	movq	%r14, %r13
-	adcq	952(%rsp), %r13
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	960(%rsp), %r14
-	movq	112(%rsp), %rbx         # 8-byte Reload
-	adcq	968(%rsp), %rbx
-	adcq	976(%rsp), %rbp
-	adcq	984(%rsp), %r12
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	992(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	840(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	40(%rsp), %rax          # 8-byte Reload
-	addq	840(%rsp), %rax
-	movq	64(%rsp), %rcx          # 8-byte Reload
-	adcq	848(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
-	adcq	856(%rsp), %r15
-	adcq	864(%rsp), %r13
-	movq	%r13, 88(%rsp)          # 8-byte Spill
-	adcq	872(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	adcq	1056(%rsp), %rax
+	movq	%rax, 40(%rsp)                  # 8-byte Spill
+	adcq	1064(%rsp), %r15
+	adcq	1072(%rsp), %rbx
+	adcq	1080(%rsp), %r14
+	movq	%r14, 32(%rsp)                  # 8-byte Spill
+	adcq	1088(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %r14                   # 8-byte Reload
+	adcq	1096(%rsp), %r14
+	adcq	$0, %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r12, %rdx
+	leaq	968(%rsp), %rdi
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	addq	968(%rsp), %r12
+	adcq	976(%rsp), %r13
+	movq	40(%rsp), %rbp                  # 8-byte Reload
+	adcq	984(%rsp), %rbp
+	adcq	992(%rsp), %r15
+	movq	%r15, 56(%rsp)                  # 8-byte Spill
+	adcq	1000(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %r15                  # 8-byte Reload
+	adcq	1008(%rsp), %r15
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	1016(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	adcq	1024(%rsp), %r14
+	movq	%r14, 8(%rsp)                   # 8-byte Spill
+	movq	48(%rsp), %rbx                  # 8-byte Reload
+	adcq	1032(%rsp), %rbx
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	896(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	960(%rsp), %r12
+	addq	896(%rsp), %r13
+	movq	%rbp, %r14
+	adcq	904(%rsp), %r14
+	movq	56(%rsp), %rax                  # 8-byte Reload
+	adcq	912(%rsp), %rax
+	movq	%rax, 56(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	920(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	adcq	928(%rsp), %r15
+	movq	%r15, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	936(%rsp), %rbp
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	944(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	adcq	952(%rsp), %rbx
+	adcq	$0, %r12
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	824(%rsp), %rdi
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	addq	824(%rsp), %r13
+	adcq	832(%rsp), %r14
+	movq	%r14, 40(%rsp)                  # 8-byte Spill
+	movq	56(%rsp), %r13                  # 8-byte Reload
+	adcq	840(%rsp), %r13
+	movq	16(%rsp), %r15                  # 8-byte Reload
+	adcq	848(%rsp), %r15
+	movq	32(%rsp), %r14                  # 8-byte Reload
+	adcq	856(%rsp), %r14
+	adcq	864(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbp                   # 8-byte Reload
+	adcq	872(%rsp), %rbp
 	adcq	880(%rsp), %rbx
-	movq	%rbx, 112(%rsp)         # 8-byte Spill
-	adcq	888(%rsp), %rbp
-	adcq	896(%rsp), %r12
-	movq	96(%rsp), %r13          # 8-byte Reload
-	adcq	904(%rsp), %r13
-	movq	80(%rsp), %rcx          # 8-byte Reload
-	adcq	912(%rsp), %rcx
-	movq	%rcx, 80(%rsp)          # 8-byte Spill
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r14
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	760(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	760(%rsp), %r14
-	movq	64(%rsp), %rcx          # 8-byte Reload
-	adcq	768(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
+	adcq	888(%rsp), %r12
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	752(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	816(%rsp), %rcx
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	addq	752(%rsp), %rax
+	adcq	760(%rsp), %r13
+	adcq	768(%rsp), %r15
+	movq	%r15, 16(%rsp)                  # 8-byte Spill
+	movq	%r14, %r15
 	adcq	776(%rsp), %r15
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	784(%rsp), %r14
-	movq	104(%rsp), %rcx         # 8-byte Reload
-	adcq	792(%rsp), %rcx
-	movq	%rcx, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %rcx         # 8-byte Reload
-	adcq	800(%rsp), %rcx
-	movq	%rcx, 112(%rsp)         # 8-byte Spill
-	adcq	808(%rsp), %rbp
-	movq	%r12, %rbx
-	adcq	816(%rsp), %rbx
-	movq	%r13, %r12
-	adcq	824(%rsp), %r12
-	movq	80(%rsp), %r13          # 8-byte Reload
-	adcq	832(%rsp), %r13
-	adcq	$0, %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rdx
+	movq	24(%rsp), %rdx                  # 8-byte Reload
+	adcq	784(%rsp), %rdx
+	movq	%rdx, 24(%rsp)                  # 8-byte Spill
+	adcq	792(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	adcq	800(%rsp), %rbx
+	adcq	808(%rsp), %r12
+	adcq	$0, %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
 	leaq	680(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	64(%rsp), %rax          # 8-byte Reload
-	addq	680(%rsp), %rax
-	adcq	688(%rsp), %r15
-	movq	%r15, 48(%rsp)          # 8-byte Spill
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	addq	680(%rsp), %rbp
+	adcq	688(%rsp), %r13
+	movq	16(%rsp), %r14                  # 8-byte Reload
 	adcq	696(%rsp), %r14
-	movq	%r14, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %rcx         # 8-byte Reload
-	adcq	704(%rsp), %rcx
-	movq	%rcx, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %r15         # 8-byte Reload
+	adcq	704(%rsp), %r15
+	movq	%r15, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %r15                  # 8-byte Reload
 	adcq	712(%rsp), %r15
+	movq	8(%rsp), %rbp                   # 8-byte Reload
 	adcq	720(%rsp), %rbp
 	adcq	728(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
 	adcq	736(%rsp), %r12
-	movq	%r12, 96(%rsp)          # 8-byte Spill
-	adcq	744(%rsp), %r13
-	movq	%r13, 80(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %r13          # 8-byte Reload
-	adcq	752(%rsp), %r13
-	sbbq	%r14, %r14
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	600(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %r14d
-	addq	600(%rsp), %rbx
-	movq	48(%rsp), %rax          # 8-byte Reload
-	adcq	608(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	616(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %rbx         # 8-byte Reload
-	adcq	624(%rsp), %rbx
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	adcq	744(%rsp), %rax
+	movq	%rax, 40(%rsp)                  # 8-byte Spill
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	608(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	672(%rsp), %rcx
+	movq	%r13, %rax
+	addq	608(%rsp), %rax
+	adcq	616(%rsp), %r14
+	movq	%r14, 16(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %r13                  # 8-byte Reload
+	adcq	624(%rsp), %r13
 	adcq	632(%rsp), %r15
-	movq	%r15, 112(%rsp)         # 8-byte Spill
+	movq	%r15, 24(%rsp)                  # 8-byte Spill
 	adcq	640(%rsp), %rbp
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	648(%rsp), %r12
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	656(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %r15          # 8-byte Reload
-	adcq	664(%rsp), %r15
-	adcq	672(%rsp), %r13
-	adcq	$0, %r14
-	movq	%r14, 64(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	520(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	48(%rsp), %rax          # 8-byte Reload
-	addq	520(%rsp), %rax
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	528(%rsp), %r14
-	adcq	536(%rsp), %rbx
-	movq	%rbx, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %rcx         # 8-byte Reload
-	adcq	544(%rsp), %rcx
-	movq	%rcx, 112(%rsp)         # 8-byte Spill
-	adcq	552(%rsp), %rbp
-	adcq	560(%rsp), %r12
-	movq	%r12, 72(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %r12          # 8-byte Reload
-	adcq	568(%rsp), %r12
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	adcq	648(%rsp), %rbx
+	movq	%rbx, %r15
+	adcq	656(%rsp), %r12
+	movq	40(%rsp), %r14                  # 8-byte Reload
+	adcq	664(%rsp), %r14
+	movq	%rcx, %rbp
+	adcq	$0, %rbp
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	536(%rsp), %rdi
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	addq	536(%rsp), %rbx
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	544(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	%r13, %rbx
+	adcq	552(%rsp), %rbx
+	movq	24(%rsp), %r13                  # 8-byte Reload
+	adcq	560(%rsp), %r13
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	568(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
 	adcq	576(%rsp), %r15
-	movq	%r15, 80(%rsp)          # 8-byte Spill
-	adcq	584(%rsp), %r13
-	movq	%r13, 40(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r15          # 8-byte Reload
+	movq	%r15, 48(%rsp)                  # 8-byte Spill
+	adcq	584(%rsp), %r12
+	movq	%r14, %r15
 	adcq	592(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	440(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	440(%rsp), %r13
-	adcq	448(%rsp), %r14
-	movq	%r14, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	456(%rsp), %r14
-	movq	112(%rsp), %rbx         # 8-byte Reload
-	adcq	464(%rsp), %rbx
-	adcq	472(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           # 8-byte Spill
-	movq	72(%rsp), %rcx          # 8-byte Reload
-	adcq	480(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	adcq	488(%rsp), %r12
-	movq	%r12, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %rbp          # 8-byte Reload
-	adcq	496(%rsp), %rbp
-	movq	40(%rsp), %r12          # 8-byte Reload
+	adcq	600(%rsp), %rbp
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	464(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	528(%rsp), %rcx
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	addq	464(%rsp), %rax
+	adcq	472(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  # 8-byte Spill
+	adcq	480(%rsp), %r13
+	movq	%r13, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %r14                   # 8-byte Reload
+	adcq	488(%rsp), %r14
+	movq	48(%rsp), %r13                  # 8-byte Reload
+	adcq	496(%rsp), %r13
 	adcq	504(%rsp), %r12
+	movq	%r12, 16(%rsp)                  # 8-byte Spill
 	adcq	512(%rsp), %r15
-	movq	%r15, %r13
-	adcq	$0, %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	360(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	88(%rsp), %rax          # 8-byte Reload
-	addq	360(%rsp), %rax
-	adcq	368(%rsp), %r14
-	adcq	376(%rsp), %rbx
-	movq	%rbx, 112(%rsp)         # 8-byte Spill
-	movq	8(%rsp), %rcx           # 8-byte Reload
-	adcq	384(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           # 8-byte Spill
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	adcq	392(%rsp), %rbx
-	movq	96(%rsp), %r15          # 8-byte Reload
-	adcq	400(%rsp), %r15
-	adcq	408(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	adcq	416(%rsp), %r12
-	movq	%r12, %rbp
+	movq	%r15, %r12
+	adcq	520(%rsp), %rbp
+	movq	%rcx, %r15
+	adcq	$0, %r15
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	392(%rsp), %rdi
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	addq	392(%rsp), %rbx
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	adcq	400(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	408(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	%r14, %rbx
+	adcq	416(%rsp), %rbx
 	adcq	424(%rsp), %r13
-	movq	%r13, 64(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rcx          # 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          # 8-byte Spill
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	280(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %r13d
-	addq	280(%rsp), %r12
-	adcq	288(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %rax         # 8-byte Reload
-	adcq	296(%rsp), %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	movq	8(%rsp), %r14           # 8-byte Reload
-	adcq	304(%rsp), %r14
-	adcq	312(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	adcq	320(%rsp), %r15
-	movq	%r15, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %rbx          # 8-byte Reload
-	adcq	328(%rsp), %rbx
-	adcq	336(%rsp), %rbp
-	movq	%rbp, 40(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r12          # 8-byte Reload
-	adcq	344(%rsp), %r12
-	movq	48(%rsp), %rbp          # 8-byte Reload
-	adcq	352(%rsp), %rbp
+	movq	%r13, 48(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %r14                  # 8-byte Reload
+	adcq	432(%rsp), %r14
+	adcq	440(%rsp), %r12
+	adcq	448(%rsp), %rbp
+	movq	%rbp, 56(%rsp)                  # 8-byte Spill
+	adcq	456(%rsp), %r15
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	320(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	leaq	248(%rsp), %rdi
+	movq	384(%rsp), %r13
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	addq	320(%rsp), %rax
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	328(%rsp), %rbp
+	adcq	336(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	344(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	adcq	352(%rsp), %r14
+	movq	%r14, 16(%rsp)                  # 8-byte Spill
+	adcq	360(%rsp), %r12
+	movq	%r12, 40(%rsp)                  # 8-byte Spill
+	movq	56(%rsp), %r12                  # 8-byte Reload
+	adcq	368(%rsp), %r12
+	adcq	376(%rsp), %r15
+	movq	%r15, 32(%rsp)                  # 8-byte Spill
 	adcq	$0, %r13
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	200(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	104(%rsp), %rax         # 8-byte Reload
-	addq	200(%rsp), %rax
-	movq	112(%rsp), %r15         # 8-byte Reload
-	adcq	208(%rsp), %r15
-	adcq	216(%rsp), %r14
-	movq	%r14, 8(%rsp)           # 8-byte Spill
-	movq	72(%rsp), %r14          # 8-byte Reload
-	adcq	224(%rsp), %r14
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	232(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	adcq	240(%rsp), %rbx
-	movq	%rbx, 80(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %rcx          # 8-byte Reload
-	adcq	248(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          # 8-byte Spill
-	adcq	256(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	adcq	264(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          # 8-byte Spill
-	adcq	272(%rsp), %r13
-	sbbq	%rbx, %rbx
-	movq	16(%rsp), %rdx          # 8-byte Reload
+	movq	64(%rsp), %rdx                  # 8-byte Reload
 	imulq	%rax, %rdx
-	movq	%rax, %r12
-	leaq	120(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %ebx
-	addq	120(%rsp), %r12
-	adcq	128(%rsp), %r15
-	movq	8(%rsp), %rbp           # 8-byte Reload
-	adcq	136(%rsp), %rbp
-	movq	%r14, %rcx
-	adcq	144(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %r8           # 8-byte Reload
-	adcq	152(%rsp), %r8
-	movq	%r8, 96(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %r9           # 8-byte Reload
-	adcq	160(%rsp), %r9
-	movq	%r9, 80(%rsp)           # 8-byte Spill
-	movq	40(%rsp), %r10          # 8-byte Reload
-	adcq	168(%rsp), %r10
-	movq	%r10, 40(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %rdi          # 8-byte Reload
-	adcq	176(%rsp), %rdi
-	movq	%rdi, 64(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r14          # 8-byte Reload
+	movq	%rax, %rbx
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	leaq	176(%rsp), %rdi
+	addq	248(%rsp), %rbx
+	adcq	256(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %r14                   # 8-byte Reload
+	adcq	264(%rsp), %r14
+	movq	48(%rsp), %rbp                  # 8-byte Reload
+	adcq	272(%rsp), %rbp
+	movq	16(%rsp), %r15                  # 8-byte Reload
+	adcq	280(%rsp), %r15
+	movq	40(%rsp), %rbx                  # 8-byte Reload
+	adcq	288(%rsp), %rbx
+	adcq	296(%rsp), %r12
+	movq	%r12, 56(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	adcq	304(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	adcq	312(%rsp), %r13
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	56(%rax), %rdx
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	leaq	104(%rsp), %rdi
+	movq	240(%rsp), %r12
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	addq	176(%rsp), %rax
 	adcq	184(%rsp), %r14
-	adcq	192(%rsp), %r13
-	adcq	$0, %rbx
-	movq	%r15, %rsi
-	movq	%r15, %r12
-	movq	56(%rsp), %rdx          # 8-byte Reload
-	subq	(%rdx), %rsi
-	movq	%rbp, %rax
-	movq	%rbp, %r15
-	sbbq	8(%rdx), %rax
-	movq	%rcx, %rbp
-	sbbq	16(%rdx), %rbp
-	movq	%r8, %rcx
-	sbbq	24(%rdx), %rcx
-	movq	%r9, %r8
-	sbbq	32(%rdx), %r8
-	movq	%r10, %r11
-	sbbq	40(%rdx), %r11
-	movq	%rdi, %r10
-	sbbq	48(%rdx), %r10
-	movq	%r14, %rdi
-	sbbq	56(%rdx), %rdi
-	movq	%r13, %r9
-	sbbq	64(%rdx), %r9
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%r13, %r9
-	testb	%bl, %bl
-	cmovneq	%r12, %rsi
-	movq	(%rsp), %rbx            # 8-byte Reload
-	movq	%rsi, (%rbx)
-	cmovneq	%r15, %rax
-	movq	%rax, 8(%rbx)
-	cmovneq	72(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rbx)
-	cmovneq	96(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 24(%rbx)
-	cmovneq	80(%rsp), %r8           # 8-byte Folded Reload
-	movq	%r8, 32(%rbx)
-	cmovneq	40(%rsp), %r11          # 8-byte Folded Reload
-	movq	%r11, 40(%rbx)
-	cmovneq	64(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 48(%rbx)
-	cmovneq	%r14, %rdi
-	movq	%rdi, 56(%rbx)
-	movq	%r9, 64(%rbx)
-	addq	$1560, %rsp             # imm = 0x618
+	movq	%r14, 8(%rsp)                   # 8-byte Spill
+	adcq	192(%rsp), %rbp
+	movq	%rbp, 48(%rsp)                  # 8-byte Spill
+	adcq	200(%rsp), %r15
+	movq	%r15, 16(%rsp)                  # 8-byte Spill
+	adcq	208(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  # 8-byte Spill
+	movq	56(%rsp), %rbp                  # 8-byte Reload
+	adcq	216(%rsp), %rbp
+	movq	32(%rsp), %r15                  # 8-byte Reload
+	adcq	224(%rsp), %r15
+	adcq	232(%rsp), %r13
+	adcq	$0, %r12
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	movq	72(%rsp), %r14                  # 8-byte Reload
+	movq	%r14, %rsi
+	callq	mulPv512x64bmi2@PLT
+	addq	104(%rsp), %rbx
+	movq	8(%rsp), %r8                    # 8-byte Reload
+	adcq	112(%rsp), %r8
+	movq	%r8, 8(%rsp)                    # 8-byte Spill
+	movq	48(%rsp), %r9                   # 8-byte Reload
+	adcq	120(%rsp), %r9
+	movq	%r9, 48(%rsp)                   # 8-byte Spill
+	movq	16(%rsp), %rsi                  # 8-byte Reload
+	adcq	128(%rsp), %rsi
+	movq	40(%rsp), %r11                  # 8-byte Reload
+	adcq	136(%rsp), %r11
+	movq	%rbp, %r10
+	adcq	144(%rsp), %r10
+	adcq	152(%rsp), %r15
+	adcq	160(%rsp), %r13
+	adcq	168(%rsp), %r12
+	movq	%r14, %rax
+	subq	(%r14), %r8
+	sbbq	8(%r14), %r9
+	movq	%rsi, %rdx
+	movq	%rsi, %r14
+	sbbq	16(%rax), %rdx
+	movq	%r11, %rsi
+	sbbq	24(%rax), %rsi
+	movq	%r10, %rdi
+	sbbq	32(%rax), %rdi
+	movq	%r15, %rbp
+	sbbq	40(%rax), %rbp
+	movq	%r13, %rbx
+	sbbq	48(%rax), %rbx
+	movq	%rax, %rcx
+	movq	%r12, %rax
+	sbbq	56(%rcx), %rax
+	cmovsq	%r12, %rax
+	movq	96(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, 56(%rcx)
+	cmovsq	%r13, %rbx
+	movq	%rbx, 48(%rcx)
+	cmovsq	%r15, %rbp
+	movq	%rbp, 40(%rcx)
+	cmovsq	%r10, %rdi
+	movq	%rdi, 32(%rcx)
+	cmovsq	%r11, %rsi
+	movq	%rsi, 24(%rcx)
+	cmovsq	%r14, %rdx
+	movq	%rdx, 16(%rcx)
+	cmovsq	48(%rsp), %r9                   # 8-byte Folded Reload
+	movq	%r9, 8(%rcx)
+	cmovsq	8(%rsp), %r8                    # 8-byte Folded Reload
+	movq	%r8, (%rcx)
+	addq	$1256, %rsp                     # imm = 0x4E8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -12473,532 +5787,304 @@ mcl_fp_mont9Lbmi2:                      # @mcl_fp_mont9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end130:
-	.size	mcl_fp_mont9Lbmi2, .Lfunc_end130-mcl_fp_mont9Lbmi2
-
-	.globl	mcl_fp_montNF9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF9Lbmi2,@function
-mcl_fp_montNF9Lbmi2:                    # @mcl_fp_montNF9Lbmi2
-# BB#0:
+.Lfunc_end61:
+	.size	mcl_fp_montNF8Lbmi2, .Lfunc_end61-mcl_fp_montNF8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRed8Lbmi2            # -- Begin function mcl_fp_montRed8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed8Lbmi2,@function
+mcl_fp_montRed8Lbmi2:                   # @mcl_fp_montRed8Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
-	pushq	%rbx
-	subq	$1560, %rsp             # imm = 0x618
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
-	movq	%rdx, 16(%rsp)          # 8-byte Spill
-	movq	%rsi, 24(%rsp)          # 8-byte Spill
-	movq	%rdi, (%rsp)            # 8-byte Spill
-	movq	-8(%rcx), %rbx
-	movq	%rbx, 32(%rsp)          # 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1480(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	1480(%rsp), %r12
-	movq	1488(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	%r12, %rdx
-	imulq	%rbx, %rdx
-	movq	1552(%rsp), %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	movq	1544(%rsp), %r13
-	movq	1536(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	1528(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	1520(%rsp), %r14
-	movq	1512(%rsp), %r15
-	movq	1504(%rsp), %rbx
-	movq	1496(%rsp), %rbp
-	leaq	1400(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	1400(%rsp), %r12
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	1408(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	adcq	1416(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           # 8-byte Spill
-	adcq	1424(%rsp), %rbx
-	movq	%rbx, 104(%rsp)         # 8-byte Spill
-	adcq	1432(%rsp), %r15
-	movq	%r15, 56(%rsp)          # 8-byte Spill
-	adcq	1440(%rsp), %r14
-	movq	%r14, 40(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rbx          # 8-byte Reload
-	adcq	1448(%rsp), %rbx
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	1456(%rsp), %r12
-	adcq	1464(%rsp), %r13
-	movq	%r13, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsp), %rbp         # 8-byte Reload
-	adcq	1472(%rsp), %rbp
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1320(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	1392(%rsp), %rax
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	addq	1320(%rsp), %rcx
-	movq	8(%rsp), %r15           # 8-byte Reload
-	adcq	1328(%rsp), %r15
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	1336(%rsp), %r14
-	movq	56(%rsp), %rdx          # 8-byte Reload
-	adcq	1344(%rsp), %rdx
-	movq	%rdx, 56(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %r13          # 8-byte Reload
-	adcq	1352(%rsp), %r13
-	adcq	1360(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	adcq	1368(%rsp), %r12
-	movq	%r12, 72(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %rdx          # 8-byte Reload
-	adcq	1376(%rsp), %rdx
-	movq	%rdx, 96(%rsp)          # 8-byte Spill
-	adcq	1384(%rsp), %rbp
-	movq	%rbp, 112(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, %rbp
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	1240(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	1240(%rsp), %rbx
-	adcq	1248(%rsp), %r15
-	movq	%r15, 8(%rsp)           # 8-byte Spill
-	adcq	1256(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
-	movq	56(%rsp), %r12          # 8-byte Reload
-	adcq	1264(%rsp), %r12
-	adcq	1272(%rsp), %r13
-	movq	%r13, %r14
-	movq	48(%rsp), %r13          # 8-byte Reload
-	adcq	1280(%rsp), %r13
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	adcq	1288(%rsp), %rbx
-	movq	96(%rsp), %r15          # 8-byte Reload
-	adcq	1296(%rsp), %r15
-	movq	112(%rsp), %rax         # 8-byte Reload
-	adcq	1304(%rsp), %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	adcq	1312(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	1160(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	1232(%rsp), %rax
-	movq	8(%rsp), %rcx           # 8-byte Reload
-	addq	1160(%rsp), %rcx
-	movq	104(%rsp), %rbp         # 8-byte Reload
-	adcq	1168(%rsp), %rbp
-	adcq	1176(%rsp), %r12
-	movq	%r12, 56(%rsp)          # 8-byte Spill
-	adcq	1184(%rsp), %r14
-	adcq	1192(%rsp), %r13
-	movq	%r13, %r12
-	adcq	1200(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	adcq	1208(%rsp), %r15
-	movq	%r15, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsp), %rbx         # 8-byte Reload
-	adcq	1216(%rsp), %rbx
-	movq	80(%rsp), %rdx          # 8-byte Reload
-	adcq	1224(%rsp), %rdx
-	movq	%rdx, 80(%rsp)          # 8-byte Spill
-	movq	%rax, %r15
-	adcq	$0, %r15
-	movq	%rcx, %rdx
-	movq	%rcx, %r13
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	1080(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	1080(%rsp), %r13
-	adcq	1088(%rsp), %rbp
-	movq	%rbp, 104(%rsp)         # 8-byte Spill
-	movq	56(%rsp), %r13          # 8-byte Reload
-	adcq	1096(%rsp), %r13
-	adcq	1104(%rsp), %r14
-	adcq	1112(%rsp), %r12
-	movq	%r12, 48(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	1120(%rsp), %r12
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	1128(%rsp), %rbp
-	adcq	1136(%rsp), %rbx
-	movq	%rbx, 112(%rsp)         # 8-byte Spill
-	movq	80(%rsp), %rbx          # 8-byte Reload
-	adcq	1144(%rsp), %rbx
-	adcq	1152(%rsp), %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	1000(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	1072(%rsp), %rax
-	movq	104(%rsp), %rcx         # 8-byte Reload
-	addq	1000(%rsp), %rcx
-	adcq	1008(%rsp), %r13
-	movq	%r13, 56(%rsp)          # 8-byte Spill
-	adcq	1016(%rsp), %r14
-	movq	%r14, 40(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r14          # 8-byte Reload
-	adcq	1024(%rsp), %r14
-	adcq	1032(%rsp), %r12
-	adcq	1040(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsp), %r13         # 8-byte Reload
-	adcq	1048(%rsp), %r13
-	adcq	1056(%rsp), %rbx
-	movq	%rbx, 80(%rsp)          # 8-byte Spill
-	adcq	1064(%rsp), %r15
-	movq	%r15, 88(%rsp)          # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, 104(%rsp)         # 8-byte Spill
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	920(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	920(%rsp), %rbx
-	movq	56(%rsp), %rax          # 8-byte Reload
-	adcq	928(%rsp), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %rbp          # 8-byte Reload
-	adcq	936(%rsp), %rbp
-	movq	%r14, %rbx
-	adcq	944(%rsp), %rbx
-	adcq	952(%rsp), %r12
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	960(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	adcq	968(%rsp), %r13
-	movq	%r13, %r15
-	movq	80(%rsp), %r13          # 8-byte Reload
-	adcq	976(%rsp), %r13
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	984(%rsp), %r14
-	movq	104(%rsp), %rax         # 8-byte Reload
-	adcq	992(%rsp), %rax
-	movq	%rax, 104(%rsp)         # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	840(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	912(%rsp), %rax
-	movq	56(%rsp), %rcx          # 8-byte Reload
-	addq	840(%rsp), %rcx
-	adcq	848(%rsp), %rbp
-	movq	%rbp, 40(%rsp)          # 8-byte Spill
-	adcq	856(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	adcq	864(%rsp), %r12
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	872(%rsp), %rbp
-	adcq	880(%rsp), %r15
-	movq	%r15, 112(%rsp)         # 8-byte Spill
-	adcq	888(%rsp), %r13
-	adcq	896(%rsp), %r14
-	movq	%r14, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %rdx         # 8-byte Reload
-	adcq	904(%rsp), %rdx
-	movq	%rdx, 104(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, %r14
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	760(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	760(%rsp), %rbx
-	movq	40(%rsp), %rax          # 8-byte Reload
-	adcq	768(%rsp), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r15          # 8-byte Reload
-	adcq	776(%rsp), %r15
-	adcq	784(%rsp), %r12
-	movq	%r12, 72(%rsp)          # 8-byte Spill
-	movq	%rbp, %rbx
-	adcq	792(%rsp), %rbx
-	movq	112(%rsp), %rbp         # 8-byte Reload
-	adcq	800(%rsp), %rbp
-	adcq	808(%rsp), %r13
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	816(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %r12         # 8-byte Reload
-	adcq	824(%rsp), %r12
-	adcq	832(%rsp), %r14
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	680(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	752(%rsp), %rcx
-	movq	40(%rsp), %rax          # 8-byte Reload
-	addq	680(%rsp), %rax
-	adcq	688(%rsp), %r15
-	movq	%r15, 48(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %rdx          # 8-byte Reload
-	adcq	696(%rsp), %rdx
-	movq	%rdx, 72(%rsp)          # 8-byte Spill
-	adcq	704(%rsp), %rbx
-	movq	%rbx, 96(%rsp)          # 8-byte Spill
-	adcq	712(%rsp), %rbp
-	movq	%rbp, 112(%rsp)         # 8-byte Spill
-	adcq	720(%rsp), %r13
-	movq	%r13, %r15
-	movq	88(%rsp), %rbx          # 8-byte Reload
-	adcq	728(%rsp), %rbx
-	adcq	736(%rsp), %r12
-	movq	%r12, 104(%rsp)         # 8-byte Spill
-	adcq	744(%rsp), %r14
-	movq	%r14, 40(%rsp)          # 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	600(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	600(%rsp), %r13
-	movq	48(%rsp), %r13          # 8-byte Reload
-	adcq	608(%rsp), %r13
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	616(%rsp), %r12
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	624(%rsp), %rbp
-	movq	112(%rsp), %rax         # 8-byte Reload
-	adcq	632(%rsp), %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	adcq	640(%rsp), %r15
-	movq	%r15, 80(%rsp)          # 8-byte Spill
-	adcq	648(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	656(%rsp), %r14
-	movq	40(%rsp), %rbx          # 8-byte Reload
+	pushq	%rbx
+	subq	$728, %rsp                      # imm = 0x2D8
+	movq	%rdi, 144(%rsp)                 # 8-byte Spill
+	movq	56(%rdx), %rax
+	movq	%rax, 136(%rsp)                 # 8-byte Spill
+	movq	48(%rdx), %rax
+	movq	%rax, 128(%rsp)                 # 8-byte Spill
+	movq	40(%rdx), %rax
+	movq	%rax, 120(%rsp)                 # 8-byte Spill
+	movq	32(%rdx), %rax
+	movq	%rax, 112(%rsp)                 # 8-byte Spill
+	movq	24(%rdx), %rax
+	movq	%rax, 104(%rsp)                 # 8-byte Spill
+	movq	16(%rdx), %rax
+	movq	%rax, 96(%rsp)                  # 8-byte Spill
+	movq	8(%rdx), %rax
+	movq	%rax, 88(%rsp)                  # 8-byte Spill
+	movq	%rsi, 72(%rsp)                  # 8-byte Spill
+	movq	56(%rsi), %r12
+	movq	48(%rsi), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	40(%rsi), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	32(%rsi), %r15
+	movq	24(%rsi), %r14
+	movq	16(%rsi), %r13
+	movq	(%rsi), %rbp
+	movq	8(%rsi), %rbx
+	movq	-8(%rdx), %rcx
+	movq	%rcx, 56(%rsp)                  # 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, %rsi
+	movq	%rdx, 64(%rsp)                  # 8-byte Spill
+	movq	%rax, 80(%rsp)                  # 8-byte Spill
+	movq	%rbp, %rdx
+	imulq	%rcx, %rdx
+	leaq	656(%rsp), %rdi
+	callq	mulPv512x64bmi2@PLT
+	addq	656(%rsp), %rbp
 	adcq	664(%rsp), %rbx
-	movq	56(%rsp), %r15          # 8-byte Reload
-	adcq	672(%rsp), %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	520(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	592(%rsp), %rcx
-	movq	%r13, %rax
-	addq	520(%rsp), %rax
-	adcq	528(%rsp), %r12
-	movq	%r12, 72(%rsp)          # 8-byte Spill
-	movq	%rbp, %r12
-	adcq	536(%rsp), %r12
-	movq	112(%rsp), %rbp         # 8-byte Reload
-	adcq	544(%rsp), %rbp
-	movq	80(%rsp), %rdx          # 8-byte Reload
-	adcq	552(%rsp), %rdx
-	movq	%rdx, 80(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %rdx          # 8-byte Reload
-	adcq	560(%rsp), %rdx
-	movq	%rdx, 88(%rsp)          # 8-byte Spill
-	adcq	568(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
-	adcq	576(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	adcq	584(%rsp), %r15
-	movq	%r15, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, %r13
-	movq	%rax, %rdx
-	movq	%rax, %r14
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
+	adcq	672(%rsp), %r13
+	adcq	680(%rsp), %r14
+	adcq	688(%rsp), %r15
+	movq	32(%rsp), %rbp                  # 8-byte Reload
+	adcq	696(%rsp), %rbp
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	704(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	adcq	712(%rsp), %r12
+	movq	%r12, 24(%rsp)                  # 8-byte Spill
+	movq	72(%rsp), %rax                  # 8-byte Reload
+	movq	64(%rax), %rax
+	adcq	720(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rbx, %rdx
+	leaq	584(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	648(%rsp), %rax
+	addb	$255, %r12b
+	adcq	$0, %rax
+	movq	%rax, %rcx
+	addq	584(%rsp), %rbx
+	adcq	592(%rsp), %r13
+	adcq	600(%rsp), %r14
+	adcq	608(%rsp), %r15
+	adcq	616(%rsp), %rbp
+	movq	%rbp, 32(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	624(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	632(%rsp), %rbp
+	movq	(%rsp), %rax                    # 8-byte Reload
+	adcq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	movq	72(%rsp), %r12                  # 8-byte Reload
+	adcq	72(%r12), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	512(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	576(%rsp), %rax
+	addb	$255, %bl
+	adcq	$0, %rax
+	movq	%rax, %rcx
+	addq	512(%rsp), %r13
+	adcq	520(%rsp), %r14
+	adcq	528(%rsp), %r15
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	adcq	536(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	544(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	adcq	552(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	(%rsp), %rbp                    # 8-byte Reload
+	adcq	560(%rsp), %rbp
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	568(%rsp), %rbx
+	adcq	80(%r12), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	setb	%r13b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r14, %rdx
 	leaq	440(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
+	movq	64(%rsp), %r12                  # 8-byte Reload
+	movq	%r12, %rsi
+	callq	mulPv512x64bmi2@PLT
+	movq	504(%rsp), %rax
+	addb	$255, %r13b
+	adcq	$0, %rax
 	addq	440(%rsp), %r14
-	movq	72(%rsp), %rax          # 8-byte Reload
-	adcq	448(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	adcq	456(%rsp), %r12
-	adcq	464(%rsp), %rbp
-	movq	%rbp, 112(%rsp)         # 8-byte Spill
-	movq	80(%rsp), %r14          # 8-byte Reload
-	adcq	472(%rsp), %r14
-	movq	88(%rsp), %r15          # 8-byte Reload
-	adcq	480(%rsp), %r15
-	movq	104(%rsp), %rbp         # 8-byte Reload
-	adcq	488(%rsp), %rbp
-	movq	40(%rsp), %rbx          # 8-byte Reload
-	adcq	496(%rsp), %rbx
-	movq	56(%rsp), %rax          # 8-byte Reload
-	adcq	504(%rsp), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	adcq	512(%rsp), %r13
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	360(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	432(%rsp), %rcx
-	movq	72(%rsp), %rax          # 8-byte Reload
-	addq	360(%rsp), %rax
-	adcq	368(%rsp), %r12
-	movq	%r12, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsp), %rdx         # 8-byte Reload
-	adcq	376(%rsp), %rdx
-	movq	%rdx, 112(%rsp)         # 8-byte Spill
-	adcq	384(%rsp), %r14
-	movq	%r14, 80(%rsp)          # 8-byte Spill
-	adcq	392(%rsp), %r15
-	movq	%r15, 88(%rsp)          # 8-byte Spill
-	adcq	400(%rsp), %rbp
-	movq	%rbp, 104(%rsp)         # 8-byte Spill
-	adcq	408(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	movq	56(%rsp), %r14          # 8-byte Reload
-	adcq	416(%rsp), %r14
-	adcq	424(%rsp), %r13
-	movq	%r13, %r15
-	adcq	$0, %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	280(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	280(%rsp), %r12
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	288(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsp), %rbp         # 8-byte Reload
-	adcq	296(%rsp), %rbp
-	movq	80(%rsp), %rax          # 8-byte Reload
+	adcq	448(%rsp), %r15
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	adcq	456(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %r13                  # 8-byte Reload
+	adcq	464(%rsp), %r13
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	472(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	adcq	480(%rsp), %rbp
+	movq	%rbp, (%rsp)                    # 8-byte Spill
+	adcq	488(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	movq	40(%rsp), %rbp                  # 8-byte Reload
+	adcq	496(%rsp), %rbp
+	movq	72(%rsp), %rcx                  # 8-byte Reload
+	adcq	88(%rcx), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r15, %rdx
+	leaq	368(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	mulPv512x64bmi2@PLT
+	movq	432(%rsp), %r14
+	addb	$255, %bl
+	adcq	$0, %r14
+	addq	368(%rsp), %r15
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	adcq	376(%rsp), %rax
+	adcq	384(%rsp), %r13
+	movq	%r13, 16(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rbx                  # 8-byte Reload
+	adcq	392(%rsp), %rbx
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	400(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	408(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	adcq	416(%rsp), %rbp
+	movq	%rbp, 40(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	424(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	movq	72(%rsp), %rcx                  # 8-byte Reload
+	adcq	96(%rcx), %r14
+	setb	%r15b
+	movq	56(%rsp), %r13                  # 8-byte Reload
+	movq	%r13, %rdx
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	296(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	mulPv512x64bmi2@PLT
+	movq	360(%rsp), %r12
+	addb	$255, %r15b
+	adcq	$0, %r12
+	addq	296(%rsp), %rbp
+	movq	16(%rsp), %rax                  # 8-byte Reload
 	adcq	304(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %r13          # 8-byte Reload
-	adcq	312(%rsp), %r13
-	movq	104(%rsp), %r12         # 8-byte Reload
-	adcq	320(%rsp), %r12
-	movq	40(%rsp), %rbx          # 8-byte Reload
-	adcq	328(%rsp), %rbx
-	adcq	336(%rsp), %r14
-	movq	%r14, 56(%rsp)          # 8-byte Spill
-	adcq	344(%rsp), %r15
-	movq	%r15, 48(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %r14          # 8-byte Reload
+	adcq	312(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  # 8-byte Spill
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	320(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	328(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	movq	40(%rsp), %rcx                  # 8-byte Reload
+	adcq	336(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	344(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
 	adcq	352(%rsp), %r14
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	200(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	272(%rsp), %rcx
-	movq	96(%rsp), %rax          # 8-byte Reload
-	addq	200(%rsp), %rax
-	adcq	208(%rsp), %rbp
-	movq	%rbp, 112(%rsp)         # 8-byte Spill
-	movq	80(%rsp), %rbp          # 8-byte Reload
-	adcq	216(%rsp), %rbp
-	adcq	224(%rsp), %r13
-	movq	%r13, 88(%rsp)          # 8-byte Spill
-	adcq	232(%rsp), %r12
-	movq	%r12, 104(%rsp)         # 8-byte Spill
-	adcq	240(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	movq	56(%rsp), %r15          # 8-byte Reload
-	adcq	248(%rsp), %r15
-	movq	48(%rsp), %r12          # 8-byte Reload
-	adcq	256(%rsp), %r12
-	adcq	264(%rsp), %r14
-	adcq	$0, %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rdx          # 8-byte Reload
+	movq	72(%rsp), %rbp                  # 8-byte Reload
+	adcq	104(%rbp), %r12
+	setb	%r15b
+	movq	%r13, %rdx
 	imulq	%rax, %rdx
 	movq	%rax, %rbx
-	leaq	120(%rsp), %rdi
-	movq	64(%rsp), %r13          # 8-byte Reload
-	movq	%r13, %rsi
-	callq	.LmulPv576x64
-	addq	120(%rsp), %rbx
-	movq	112(%rsp), %rcx         # 8-byte Reload
-	adcq	128(%rsp), %rcx
-	movq	%rbp, %rdx
-	adcq	136(%rsp), %rdx
-	movq	88(%rsp), %rsi          # 8-byte Reload
-	adcq	144(%rsp), %rsi
-	movq	%rsi, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %rdi         # 8-byte Reload
-	adcq	152(%rsp), %rdi
-	movq	%rdi, 104(%rsp)         # 8-byte Spill
-	movq	40(%rsp), %rbx          # 8-byte Reload
-	adcq	160(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	movq	%r15, %r8
-	adcq	168(%rsp), %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	%r12, %r15
+	leaq	224(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	288(%rsp), %r13
+	addb	$255, %r15b
+	adcq	$0, %r13
+	addq	224(%rsp), %rbx
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	232(%rsp), %rax
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	248(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	movq	40(%rsp), %rcx                  # 8-byte Reload
+	adcq	256(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	264(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	adcq	272(%rsp), %r14
+	adcq	280(%rsp), %r12
+	adcq	112(%rbp), %r13
+	setb	%r15b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	152(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	addb	$255, %r15b
+	movq	216(%rsp), %rdx
+	adcq	$0, %rdx
+	addq	152(%rsp), %rbx
+	movq	(%rsp), %r9                     # 8-byte Reload
+	adcq	160(%rsp), %r9
+	movq	%r9, (%rsp)                     # 8-byte Spill
+	movq	8(%rsp), %r10                   # 8-byte Reload
+	adcq	168(%rsp), %r10
+	movq	%r10, 8(%rsp)                   # 8-byte Spill
+	movq	40(%rsp), %r15                  # 8-byte Reload
 	adcq	176(%rsp), %r15
-	adcq	184(%rsp), %r14
-	movq	96(%rsp), %r9           # 8-byte Reload
-	adcq	192(%rsp), %r9
-	movq	%rcx, %rax
-	movq	%rcx, %r11
-	movq	%r13, %rbp
-	subq	(%rbp), %rax
+	movq	48(%rsp), %r11                  # 8-byte Reload
+	adcq	184(%rsp), %r11
+	adcq	192(%rsp), %r14
+	adcq	200(%rsp), %r12
+	adcq	208(%rsp), %r13
+	adcq	120(%rbp), %rdx
+	xorl	%r8d, %r8d
+	subq	80(%rsp), %r9                   # 8-byte Folded Reload
+	sbbq	88(%rsp), %r10                  # 8-byte Folded Reload
+	movq	%r15, %rdi
+	sbbq	96(%rsp), %rdi                  # 8-byte Folded Reload
+	movq	%r11, %rbp
+	sbbq	104(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%r14, %rbx
+	sbbq	112(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%r12, %rsi
+	sbbq	120(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	%r13, %rax
+	sbbq	128(%rsp), %rax                 # 8-byte Folded Reload
 	movq	%rdx, %rcx
-	movq	%rdx, %r12
-	sbbq	8(%rbp), %rcx
-	movq	%rsi, %rdx
-	sbbq	16(%rbp), %rdx
-	movq	%rdi, %rsi
-	sbbq	24(%rbp), %rsi
-	movq	%rbx, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%r8, %r10
-	sbbq	40(%rbp), %r10
-	movq	%r15, %r13
-	sbbq	48(%rbp), %r13
-	movq	%r14, %r8
-	sbbq	56(%rbp), %r8
-	movq	%rbp, %rbx
-	movq	%r9, %rbp
-	sbbq	64(%rbx), %rbp
-	movq	%rbp, %rbx
-	sarq	$63, %rbx
-	cmovsq	%r11, %rax
-	movq	(%rsp), %rbx            # 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovsq	%r12, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovsq	88(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 16(%rbx)
-	cmovsq	104(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovsq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rbx)
-	cmovsq	56(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 40(%rbx)
-	cmovsq	%r15, %r13
-	movq	%r13, 48(%rbx)
-	cmovsq	%r14, %r8
-	movq	%r8, 56(%rbx)
-	cmovsq	%r9, %rbp
-	movq	%rbp, 64(%rbx)
-	addq	$1560, %rsp             # imm = 0x618
+	sbbq	136(%rsp), %rcx                 # 8-byte Folded Reload
+	sbbq	%r8, %r8
+	testb	$1, %r8b
+	cmovneq	%rdx, %rcx
+	movq	144(%rsp), %rdx                 # 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovneq	%r13, %rax
+	movq	%rax, 48(%rdx)
+	cmovneq	%r12, %rsi
+	movq	%rsi, 40(%rdx)
+	cmovneq	%r14, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovneq	%r11, %rbp
+	movq	%rbp, 24(%rdx)
+	cmovneq	%r15, %rdi
+	movq	%rdi, 16(%rdx)
+	cmovneq	8(%rsp), %r10                   # 8-byte Folded Reload
+	movq	%r10, 8(%rdx)
+	cmovneq	(%rsp), %r9                     # 8-byte Folded Reload
+	movq	%r9, (%rdx)
+	addq	$728, %rsp                      # imm = 0x2D8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13006,428 +6092,304 @@ mcl_fp_montNF9Lbmi2:                    # @mcl_fp_montNF9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end131:
-	.size	mcl_fp_montNF9Lbmi2, .Lfunc_end131-mcl_fp_montNF9Lbmi2
-
-	.globl	mcl_fp_montRed9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed9Lbmi2,@function
-mcl_fp_montRed9Lbmi2:                   # @mcl_fp_montRed9Lbmi2
-# BB#0:
+.Lfunc_end62:
+	.size	mcl_fp_montRed8Lbmi2, .Lfunc_end62-mcl_fp_montRed8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRedNF8Lbmi2          # -- Begin function mcl_fp_montRedNF8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF8Lbmi2,@function
+mcl_fp_montRedNF8Lbmi2:                 # @mcl_fp_montRedNF8Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$936, %rsp              # imm = 0x3A8
-	movq	%rdx, %rax
-	movq	%rax, 128(%rsp)         # 8-byte Spill
-	movq	%rdi, 80(%rsp)          # 8-byte Spill
-	movq	-8(%rax), %rcx
-	movq	%rcx, 120(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r14
-	movq	8(%rsi), %rdx
-	movq	%rdx, 192(%rsp)         # 8-byte Spill
-	movq	%r14, %rdx
+	subq	$728, %rsp                      # imm = 0x2D8
+	movq	%rdi, 144(%rsp)                 # 8-byte Spill
+	movq	56(%rdx), %rax
+	movq	%rax, 136(%rsp)                 # 8-byte Spill
+	movq	48(%rdx), %rax
+	movq	%rax, 128(%rsp)                 # 8-byte Spill
+	movq	40(%rdx), %rax
+	movq	%rax, 120(%rsp)                 # 8-byte Spill
+	movq	32(%rdx), %rax
+	movq	%rax, 112(%rsp)                 # 8-byte Spill
+	movq	24(%rdx), %rax
+	movq	%rax, 104(%rsp)                 # 8-byte Spill
+	movq	16(%rdx), %rax
+	movq	%rax, 96(%rsp)                  # 8-byte Spill
+	movq	8(%rdx), %rax
+	movq	%rax, 88(%rsp)                  # 8-byte Spill
+	movq	%rsi, 72(%rsp)                  # 8-byte Spill
+	movq	56(%rsi), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	48(%rsi), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	40(%rsi), %r12
+	movq	32(%rsi), %r13
+	movq	24(%rsi), %r15
+	movq	16(%rsi), %r14
+	movq	(%rsi), %rbx
+	movq	8(%rsi), %rbp
+	movq	-8(%rdx), %rcx
+	movq	%rcx, 56(%rsp)                  # 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, %rsi
+	movq	%rdx, 64(%rsp)                  # 8-byte Spill
+	movq	%rax, 80(%rsp)                  # 8-byte Spill
+	movq	%rbx, %rdx
 	imulq	%rcx, %rdx
-	movq	136(%rsi), %rcx
-	movq	%rcx, 112(%rsp)         # 8-byte Spill
-	movq	128(%rsi), %rcx
-	movq	%rcx, 152(%rsp)         # 8-byte Spill
-	movq	120(%rsi), %rcx
-	movq	%rcx, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsi), %rcx
-	movq	%rcx, 144(%rsp)         # 8-byte Spill
-	movq	104(%rsi), %rcx
-	movq	%rcx, 184(%rsp)         # 8-byte Spill
-	movq	96(%rsi), %rcx
-	movq	%rcx, 208(%rsp)         # 8-byte Spill
-	movq	88(%rsi), %rcx
-	movq	%rcx, 200(%rsp)         # 8-byte Spill
-	movq	80(%rsi), %rcx
-	movq	%rcx, 160(%rsp)         # 8-byte Spill
-	movq	72(%rsi), %r12
-	movq	64(%rsi), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	movq	56(%rsi), %rcx
-	movq	%rcx, 168(%rsp)         # 8-byte Spill
-	movq	48(%rsi), %rcx
-	movq	%rcx, 136(%rsp)         # 8-byte Spill
-	movq	40(%rsi), %rbp
-	movq	32(%rsi), %rbx
-	movq	24(%rsi), %r13
-	movq	16(%rsi), %r15
-	movq	%rax, %rcx
-	movq	(%rcx), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	movq	64(%rcx), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	56(%rcx), %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	48(%rcx), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	40(%rcx), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	24(%rcx), %rax
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	%rcx, %rsi
-	leaq	856(%rsp), %rdi
-	callq	.LmulPv576x64
-	addq	856(%rsp), %r14
-	movq	192(%rsp), %rcx         # 8-byte Reload
-	adcq	864(%rsp), %rcx
-	adcq	872(%rsp), %r15
-	adcq	880(%rsp), %r13
-	adcq	888(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	adcq	896(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	movq	136(%rsp), %rax         # 8-byte Reload
-	adcq	904(%rsp), %rax
-	movq	%rax, 136(%rsp)         # 8-byte Spill
-	movq	168(%rsp), %rax         # 8-byte Reload
-	adcq	912(%rsp), %rax
-	movq	%rax, 168(%rsp)         # 8-byte Spill
-	movq	176(%rsp), %rax         # 8-byte Reload
-	adcq	920(%rsp), %rax
-	movq	%rax, 176(%rsp)         # 8-byte Spill
-	adcq	928(%rsp), %r12
-	movq	%r12, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rbp         # 8-byte Reload
-	adcq	$0, %rbp
-	adcq	$0, 200(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 208(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 184(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 144(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 104(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 152(%rsp)           # 8-byte Folded Spill
-	movq	112(%rsp), %r14         # 8-byte Reload
-	adcq	$0, %r14
-	sbbq	%r12, %r12
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	776(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %r12d
-	addq	776(%rsp), %rbx
-	adcq	784(%rsp), %r15
-	adcq	792(%rsp), %r13
-	movq	%r13, (%rsp)            # 8-byte Spill
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	800(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	808(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	136(%rsp), %rax         # 8-byte Reload
-	adcq	816(%rsp), %rax
-	movq	%rax, 136(%rsp)         # 8-byte Spill
-	movq	168(%rsp), %rax         # 8-byte Reload
-	adcq	824(%rsp), %rax
-	movq	%rax, 168(%rsp)         # 8-byte Spill
-	movq	176(%rsp), %rax         # 8-byte Reload
-	adcq	832(%rsp), %rax
-	movq	%rax, 176(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rax         # 8-byte Reload
-	adcq	840(%rsp), %rax
-	movq	%rax, 192(%rsp)         # 8-byte Spill
-	adcq	848(%rsp), %rbp
-	movq	%rbp, 160(%rsp)         # 8-byte Spill
-	movq	200(%rsp), %r13         # 8-byte Reload
-	adcq	$0, %r13
-	adcq	$0, 208(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 184(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 144(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 104(%rsp)           # 8-byte Folded Spill
-	movq	152(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, %r14
-	movq	%r14, 112(%rsp)         # 8-byte Spill
-	adcq	$0, %r12
-	movq	%r15, %rdx
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	696(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	696(%rsp), %r15
-	movq	(%rsp), %rcx            # 8-byte Reload
-	adcq	704(%rsp), %rcx
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	712(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %rax          # 8-byte Reload
+	leaq	656(%rsp), %rdi
+	callq	mulPv512x64bmi2@PLT
+	addq	656(%rsp), %rbx
+	adcq	664(%rsp), %rbp
+	adcq	672(%rsp), %r14
+	adcq	680(%rsp), %r15
+	adcq	688(%rsp), %r13
+	adcq	696(%rsp), %r12
+	movq	%r12, 48(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	704(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	712(%rsp), %rbx
+	movq	72(%rsp), %rax                  # 8-byte Reload
+	movq	64(%rax), %rax
 	adcq	720(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	136(%rsp), %rbp         # 8-byte Reload
-	adcq	728(%rsp), %rbp
-	movq	168(%rsp), %r14         # 8-byte Reload
-	adcq	736(%rsp), %r14
-	movq	176(%rsp), %r15         # 8-byte Reload
-	adcq	744(%rsp), %r15
-	movq	192(%rsp), %rax         # 8-byte Reload
-	adcq	752(%rsp), %rax
-	movq	%rax, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rax         # 8-byte Reload
-	adcq	760(%rsp), %rax
-	movq	%rax, 160(%rsp)         # 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 200(%rsp)         # 8-byte Spill
-	adcq	$0, 208(%rsp)           # 8-byte Folded Spill
-	movq	184(%rsp), %r13         # 8-byte Reload
-	adcq	$0, %r13
-	adcq	$0, 144(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 104(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 152(%rsp)         # 8-byte Spill
-	adcq	$0, 112(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rcx, %rbx
-	movq	%rbx, %rdx
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	616(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	616(%rsp), %rbx
-	movq	88(%rsp), %rax          # 8-byte Reload
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rbp, %rdx
+	leaq	584(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	648(%rsp), %rax
+	addb	$255, %r12b
+	adcq	$0, %rax
+	movq	%rax, %rcx
+	addq	584(%rsp), %rbp
+	adcq	592(%rsp), %r14
+	adcq	600(%rsp), %r15
+	adcq	608(%rsp), %r13
+	movq	48(%rsp), %rax                  # 8-byte Reload
+	adcq	616(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rax                  # 8-byte Reload
 	adcq	624(%rsp), %rax
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	632(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	adcq	640(%rsp), %rbp
-	movq	%rbp, 136(%rsp)         # 8-byte Spill
-	adcq	648(%rsp), %r14
-	movq	%r14, 168(%rsp)         # 8-byte Spill
-	adcq	656(%rsp), %r15
-	movq	192(%rsp), %r14         # 8-byte Reload
-	adcq	664(%rsp), %r14
-	movq	160(%rsp), %rbp         # 8-byte Reload
-	adcq	672(%rsp), %rbp
-	movq	200(%rsp), %rcx         # 8-byte Reload
-	adcq	680(%rsp), %rcx
-	movq	%rcx, 200(%rsp)         # 8-byte Spill
-	movq	208(%rsp), %rcx         # 8-byte Reload
-	adcq	688(%rsp), %rcx
-	movq	%rcx, 208(%rsp)         # 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, 184(%rsp)         # 8-byte Spill
-	adcq	$0, 144(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 104(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 152(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 112(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	536(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	536(%rsp), %rbx
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	544(%rsp), %rax
-	movq	136(%rsp), %rcx         # 8-byte Reload
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	adcq	632(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rax                    # 8-byte Reload
+	adcq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	movq	72(%rsp), %rax                  # 8-byte Reload
+	adcq	72(%rax), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rbp                  # 8-byte Reload
+	movq	%rbp, %rdx
+	imulq	%r14, %rdx
+	leaq	512(%rsp), %rdi
+	movq	64(%rsp), %r12                  # 8-byte Reload
+	movq	%r12, %rsi
+	callq	mulPv512x64bmi2@PLT
+	movq	576(%rsp), %rax
+	addb	$255, %bl
+	adcq	$0, %rax
+	addq	512(%rsp), %r14
+	adcq	520(%rsp), %r15
+	adcq	528(%rsp), %r13
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	536(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rcx                   # 8-byte Reload
 	adcq	552(%rsp), %rcx
-	movq	%rcx, 136(%rsp)         # 8-byte Spill
-	movq	168(%rsp), %rcx         # 8-byte Reload
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rcx                    # 8-byte Reload
 	adcq	560(%rsp), %rcx
-	movq	%rcx, 168(%rsp)         # 8-byte Spill
-	adcq	568(%rsp), %r15
-	movq	%r15, 176(%rsp)         # 8-byte Spill
-	adcq	576(%rsp), %r14
-	movq	%r14, 192(%rsp)         # 8-byte Spill
-	adcq	584(%rsp), %rbp
-	movq	%rbp, 160(%rsp)         # 8-byte Spill
-	movq	200(%rsp), %r13         # 8-byte Reload
-	adcq	592(%rsp), %r13
-	movq	208(%rsp), %r15         # 8-byte Reload
-	adcq	600(%rsp), %r15
-	movq	184(%rsp), %rbp         # 8-byte Reload
-	adcq	608(%rsp), %rbp
-	movq	144(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, 104(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 152(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 112(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rdx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	568(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
 	movq	%rax, %r14
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	456(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	456(%rsp), %r14
-	movq	136(%rsp), %rax         # 8-byte Reload
-	adcq	464(%rsp), %rax
-	movq	168(%rsp), %rcx         # 8-byte Reload
-	adcq	472(%rsp), %rcx
-	movq	%rcx, 168(%rsp)         # 8-byte Spill
-	movq	176(%rsp), %rcx         # 8-byte Reload
+	movq	72(%rsp), %rax                  # 8-byte Reload
+	adcq	80(%rax), %r14
+	setb	%bl
+	movq	%rbp, %rdx
+	imulq	%r15, %rdx
+	leaq	440(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	mulPv512x64bmi2@PLT
+	movq	504(%rsp), %rax
+	addb	$255, %bl
+	adcq	$0, %rax
+	addq	440(%rsp), %r15
+	adcq	448(%rsp), %r13
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	456(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rbx                  # 8-byte Reload
+	adcq	464(%rsp), %rbx
+	movq	8(%rsp), %rbp                   # 8-byte Reload
+	adcq	472(%rsp), %rbp
+	movq	(%rsp), %rcx                    # 8-byte Reload
 	adcq	480(%rsp), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rcx         # 8-byte Reload
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	16(%rsp), %rcx                  # 8-byte Reload
 	adcq	488(%rsp), %rcx
-	movq	%rcx, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rcx         # 8-byte Reload
-	adcq	496(%rsp), %rcx
-	movq	%rcx, 160(%rsp)         # 8-byte Spill
-	adcq	504(%rsp), %r13
-	movq	%r13, 200(%rsp)         # 8-byte Spill
-	adcq	512(%rsp), %r15
-	movq	%r15, 208(%rsp)         # 8-byte Spill
-	adcq	520(%rsp), %rbp
-	movq	%rbp, 184(%rsp)         # 8-byte Spill
-	adcq	528(%rsp), %rbx
-	movq	%rbx, 144(%rsp)         # 8-byte Spill
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	$0, %r14
-	movq	152(%rsp), %r13         # 8-byte Reload
-	adcq	$0, %r13
-	movq	112(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, %r12
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	376(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	376(%rsp), %r15
-	movq	168(%rsp), %rax         # 8-byte Reload
-	adcq	384(%rsp), %rax
-	movq	176(%rsp), %rcx         # 8-byte Reload
-	adcq	392(%rsp), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rcx         # 8-byte Reload
-	adcq	400(%rsp), %rcx
-	movq	%rcx, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rbp         # 8-byte Reload
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	adcq	496(%rsp), %r14
+	movq	%r14, 40(%rsp)                  # 8-byte Spill
+	movq	72(%rsp), %r14                  # 8-byte Reload
+	adcq	88(%r14), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	368(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	432(%rsp), %r15
+	addb	$255, %r12b
+	adcq	$0, %r15
+	addq	368(%rsp), %r13
+	movq	48(%rsp), %r13                  # 8-byte Reload
+	adcq	376(%rsp), %r13
+	adcq	384(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  # 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rbx                    # 8-byte Reload
+	adcq	400(%rsp), %rbx
+	movq	16(%rsp), %rbp                  # 8-byte Reload
 	adcq	408(%rsp), %rbp
-	movq	200(%rsp), %rcx         # 8-byte Reload
+	movq	40(%rsp), %rcx                  # 8-byte Reload
 	adcq	416(%rsp), %rcx
-	movq	%rcx, 200(%rsp)         # 8-byte Spill
-	movq	208(%rsp), %rcx         # 8-byte Reload
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
 	adcq	424(%rsp), %rcx
-	movq	%rcx, 208(%rsp)         # 8-byte Spill
-	movq	184(%rsp), %rcx         # 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 184(%rsp)         # 8-byte Spill
-	movq	144(%rsp), %r15         # 8-byte Reload
-	adcq	440(%rsp), %r15
-	adcq	448(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, %r14
-	adcq	$0, %rbx
-	movq	%rbx, 112(%rsp)         # 8-byte Spill
-	adcq	$0, %r12
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	adcq	96(%r14), %r15
+	setb	%r14b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r13, %rdx
 	leaq	296(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	296(%rsp), %rbx
-	movq	176(%rsp), %rax         # 8-byte Reload
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	360(%rsp), %r12
+	addb	$255, %r14b
+	adcq	$0, %r12
+	addq	296(%rsp), %r13
+	movq	24(%rsp), %rax                  # 8-byte Reload
 	adcq	304(%rsp), %rax
-	movq	192(%rsp), %r13         # 8-byte Reload
-	adcq	312(%rsp), %r13
-	adcq	320(%rsp), %rbp
-	movq	200(%rsp), %rcx         # 8-byte Reload
-	adcq	328(%rsp), %rcx
-	movq	%rcx, 200(%rsp)         # 8-byte Spill
-	movq	208(%rsp), %rcx         # 8-byte Reload
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	312(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	adcq	320(%rsp), %rbx
+	movq	%rbx, (%rsp)                    # 8-byte Spill
+	adcq	328(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  # 8-byte Spill
+	movq	40(%rsp), %rcx                  # 8-byte Reload
 	adcq	336(%rsp), %rcx
-	movq	%rcx, 208(%rsp)         # 8-byte Spill
-	movq	184(%rsp), %rcx         # 8-byte Reload
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
 	adcq	344(%rsp), %rcx
-	movq	%rcx, 184(%rsp)         # 8-byte Spill
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
 	adcq	352(%rsp), %r15
-	movq	%r15, 144(%rsp)         # 8-byte Spill
-	movq	104(%rsp), %r15         # 8-byte Reload
-	adcq	360(%rsp), %r15
-	adcq	368(%rsp), %r14
-	movq	%r14, 152(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %r14         # 8-byte Reload
-	adcq	$0, %r14
-	adcq	$0, %r12
-	movq	120(%rsp), %rdx         # 8-byte Reload
+	movq	72(%rsp), %rbx                  # 8-byte Reload
+	adcq	104(%rbx), %r12
+	setb	%r13b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
 	imulq	%rax, %rdx
-	movq	%rax, %rbx
-	leaq	216(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	216(%rsp), %rbx
-	movq	%r13, %rsi
-	adcq	224(%rsp), %rsi
-	movq	%rsi, 192(%rsp)         # 8-byte Spill
-	adcq	232(%rsp), %rbp
-	movq	%rbp, 160(%rsp)         # 8-byte Spill
-	movq	200(%rsp), %r9          # 8-byte Reload
-	adcq	240(%rsp), %r9
-	movq	%r9, 200(%rsp)          # 8-byte Spill
-	movq	208(%rsp), %r8          # 8-byte Reload
-	adcq	248(%rsp), %r8
-	movq	%r8, 208(%rsp)          # 8-byte Spill
-	movq	184(%rsp), %rbx         # 8-byte Reload
-	adcq	256(%rsp), %rbx
-	movq	144(%rsp), %rax         # 8-byte Reload
-	adcq	264(%rsp), %rax
-	movq	%r15, %rcx
-	adcq	272(%rsp), %rcx
-	movq	152(%rsp), %rdx         # 8-byte Reload
-	adcq	280(%rsp), %rdx
-	movq	%rdx, 152(%rsp)         # 8-byte Spill
-	adcq	288(%rsp), %r14
-	movq	%r14, %r11
-	adcq	$0, %r12
-	subq	16(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rbp, %rdi
-	sbbq	8(%rsp), %rdi           # 8-byte Folded Reload
-	movq	%r9, %rbp
-	sbbq	24(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%r8, %r13
-	sbbq	32(%rsp), %r13          # 8-byte Folded Reload
-	movq	%rbx, %r15
-	sbbq	40(%rsp), %r15          # 8-byte Folded Reload
-	movq	%rax, %r14
-	sbbq	48(%rsp), %r14          # 8-byte Folded Reload
-	movq	%rcx, %r10
-	sbbq	56(%rsp), %r10          # 8-byte Folded Reload
-	movq	%rdx, %r8
-	sbbq	64(%rsp), %r8           # 8-byte Folded Reload
-	movq	%r11, %r9
-	sbbq	72(%rsp), %r9           # 8-byte Folded Reload
-	sbbq	$0, %r12
-	andl	$1, %r12d
-	cmovneq	%r11, %r9
-	testb	%r12b, %r12b
-	cmovneq	192(%rsp), %rsi         # 8-byte Folded Reload
-	movq	80(%rsp), %rdx          # 8-byte Reload
-	movq	%rsi, (%rdx)
-	cmovneq	160(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, 8(%rdx)
-	cmovneq	200(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 16(%rdx)
-	cmovneq	208(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r13, 24(%rdx)
-	cmovneq	%rbx, %r15
-	movq	%r15, 32(%rdx)
-	cmovneq	%rax, %r14
-	movq	%r14, 40(%rdx)
-	cmovneq	%rcx, %r10
-	movq	%r10, 48(%rdx)
-	cmovneq	152(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, 56(%rdx)
-	movq	%r9, 64(%rdx)
-	addq	$936, %rsp              # imm = 0x3A8
+	movq	%rax, %rbp
+	leaq	224(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	movq	288(%rsp), %r14
+	addb	$255, %r13b
+	adcq	$0, %r14
+	addq	224(%rsp), %rbp
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	232(%rsp), %rax
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	248(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	movq	40(%rsp), %rcx                  # 8-byte Reload
+	adcq	256(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	adcq	264(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	adcq	272(%rsp), %r15
+	adcq	280(%rsp), %r12
+	adcq	112(%rbx), %r14
+	setb	%r13b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	152(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64bmi2@PLT
+	addb	$255, %r13b
+	movq	216(%rsp), %rdx
+	adcq	$0, %rdx
+	addq	152(%rsp), %rbp
+	movq	(%rsp), %r8                     # 8-byte Reload
+	adcq	160(%rsp), %r8
+	movq	%r8, (%rsp)                     # 8-byte Spill
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	168(%rsp), %rcx
+	movq	40(%rsp), %rdi                  # 8-byte Reload
+	adcq	176(%rsp), %rdi
+	movq	32(%rsp), %r10                  # 8-byte Reload
+	adcq	184(%rsp), %r10
+	adcq	192(%rsp), %r15
+	adcq	200(%rsp), %r12
+	adcq	208(%rsp), %r14
+	adcq	120(%rbx), %rdx
+	subq	80(%rsp), %r8                   # 8-byte Folded Reload
+	movq	%rcx, %r9
+	movq	%rcx, %r11
+	sbbq	88(%rsp), %r9                   # 8-byte Folded Reload
+	movq	%rdi, %rsi
+	movq	%rdi, %r13
+	sbbq	96(%rsp), %rsi                  # 8-byte Folded Reload
+	movq	%r10, %rdi
+	sbbq	104(%rsp), %rdi                 # 8-byte Folded Reload
+	movq	%r15, %rbx
+	sbbq	112(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%r12, %rbp
+	sbbq	120(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%r14, %rax
+	sbbq	128(%rsp), %rax                 # 8-byte Folded Reload
+	movq	%rdx, %rcx
+	sbbq	136(%rsp), %rcx                 # 8-byte Folded Reload
+	cmovsq	%rdx, %rcx
+	movq	144(%rsp), %rdx                 # 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovsq	%r14, %rax
+	movq	%rax, 48(%rdx)
+	cmovsq	%r12, %rbp
+	movq	%rbp, 40(%rdx)
+	cmovsq	%r15, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovsq	%r10, %rdi
+	movq	%rdi, 24(%rdx)
+	cmovsq	%r13, %rsi
+	movq	%rsi, 16(%rdx)
+	cmovsq	%r11, %r9
+	movq	%r9, 8(%rdx)
+	cmovsq	(%rsp), %r8                     # 8-byte Folded Reload
+	movq	%r8, (%rdx)
+	addq	$728, %rsp                      # imm = 0x2D8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13435,388 +6397,318 @@ mcl_fp_montRed9Lbmi2:                   # @mcl_fp_montRed9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end132:
-	.size	mcl_fp_montRed9Lbmi2, .Lfunc_end132-mcl_fp_montRed9Lbmi2
-
-	.globl	mcl_fp_addPre9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre9Lbmi2,@function
-mcl_fp_addPre9Lbmi2:                    # @mcl_fp_addPre9Lbmi2
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
+.Lfunc_end63:
+	.size	mcl_fp_montRedNF8Lbmi2, .Lfunc_end63-mcl_fp_montRedNF8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addPre8Lbmi2             # -- Begin function mcl_fp_addPre8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre8Lbmi2,@function
+mcl_fp_addPre8Lbmi2:                    # @mcl_fp_addPre8Lbmi2
+# %bb.0:
 	pushq	%rbx
-	movq	64(%rdx), %r8
-	movq	64(%rsi), %r15
-	movq	56(%rsi), %r9
-	movq	48(%rsi), %r10
-	movq	40(%rsi), %r11
-	movq	24(%rsi), %r12
-	movq	32(%rsi), %r14
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rcx
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rcx
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
-	adcq	24(%rdx), %r12
-	movq	56(%rdx), %r13
-	movq	48(%rdx), %rsi
-	movq	40(%rdx), %rbp
-	movq	32(%rdx), %rdx
+	movq	56(%rsi), %rax
+	movq	48(%rsi), %rcx
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %r11
+	movq	(%rsi), %rbx
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rbx
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r11
+	adcq	24(%rdx), %r10
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r8
+	adcq	48(%rdx), %rcx
+	adcq	56(%rdx), %rax
+	movq	%rax, 56(%rdi)
+	movq	%rcx, 48(%rdi)
+	movq	%r8, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%rsi, 8(%rdi)
 	movq	%rbx, (%rdi)
-	movq	%rcx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r12, 24(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 32(%rdi)
-	adcq	%r11, %rbp
-	movq	%rbp, 40(%rdi)
-	adcq	%r10, %rsi
-	movq	%rsi, 48(%rdi)
-	adcq	%r9, %r13
-	movq	%r13, 56(%rdi)
-	adcq	%r8, %r15
-	movq	%r15, 64(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+	setb	%al
+	movzbl	%al, %eax
 	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
 	retq
-.Lfunc_end133:
-	.size	mcl_fp_addPre9Lbmi2, .Lfunc_end133-mcl_fp_addPre9Lbmi2
-
-	.globl	mcl_fp_subPre9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre9Lbmi2,@function
-mcl_fp_subPre9Lbmi2:                    # @mcl_fp_subPre9Lbmi2
-# BB#0:
-	movq	32(%rdx), %r8
-	movq	(%rsi), %rcx
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	movq	%rcx, (%rdi)
-	movq	8(%rsi), %rcx
-	sbbq	8(%rdx), %rcx
-	movq	%rcx, 8(%rdi)
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	%rcx, 16(%rdi)
-	movq	24(%rsi), %rcx
-	sbbq	24(%rdx), %rcx
-	movq	%rcx, 24(%rdi)
-	movq	32(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	40(%rdx), %r8
-	movq	%rcx, 32(%rdi)
-	movq	40(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	48(%rdx), %r8
-	movq	%rcx, 40(%rdi)
-	movq	48(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	56(%rdx), %r8
-	movq	%rcx, 48(%rdi)
+.Lfunc_end64:
+	.size	mcl_fp_addPre8Lbmi2, .Lfunc_end64-mcl_fp_addPre8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subPre8Lbmi2             # -- Begin function mcl_fp_subPre8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre8Lbmi2,@function
+mcl_fp_subPre8Lbmi2:                    # @mcl_fp_subPre8Lbmi2
+# %bb.0:
+	pushq	%r14
+	pushq	%rbx
 	movq	56(%rsi), %rcx
-	sbbq	%r8, %rcx
+	movq	48(%rsi), %r8
+	movq	40(%rsi), %r9
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r14
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rbx
+	sbbq	24(%rdx), %r11
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r9
+	sbbq	48(%rdx), %r8
+	sbbq	56(%rdx), %rcx
 	movq	%rcx, 56(%rdi)
-	movq	64(%rdx), %rcx
-	movq	64(%rsi), %rdx
-	sbbq	%rcx, %rdx
-	movq	%rdx, 64(%rdi)
-	sbbq	$0, %rax
+	movq	%r8, 48(%rdi)
+	movq	%r9, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r11, 24(%rdi)
+	movq	%rbx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r14, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
+	popq	%rbx
+	popq	%r14
 	retq
-.Lfunc_end134:
-	.size	mcl_fp_subPre9Lbmi2, .Lfunc_end134-mcl_fp_subPre9Lbmi2
-
-	.globl	mcl_fp_shr1_9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_9Lbmi2,@function
-mcl_fp_shr1_9Lbmi2:                     # @mcl_fp_shr1_9Lbmi2
-# BB#0:
+.Lfunc_end65:
+	.size	mcl_fp_subPre8Lbmi2, .Lfunc_end65-mcl_fp_subPre8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_shr1_8Lbmi2              # -- Begin function mcl_fp_shr1_8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_8Lbmi2,@function
+mcl_fp_shr1_8Lbmi2:                     # @mcl_fp_shr1_8Lbmi2
+# %bb.0:
 	pushq	%rbx
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %r9
-	movq	48(%rsi), %r10
-	movq	40(%rsi), %r11
-	movq	32(%rsi), %rcx
-	movq	24(%rsi), %rdx
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rbx
-	movq	%rbx, (%rdi)
-	shrdq	$1, %rax, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rdx, %rax
-	movq	%rax, 16(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 24(%rdi)
-	shrdq	$1, %r11, %rcx
-	movq	%rcx, 32(%rdi)
-	shrdq	$1, %r10, %r11
-	movq	%r11, 40(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 48(%rdi)
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %r10
+	movq	24(%rsi), %r11
+	movq	32(%rsi), %rax
+	movq	40(%rsi), %rdx
+	movq	48(%rsi), %rcx
+	movq	56(%rsi), %rsi
+	movq	%rsi, %rbx
+	shrq	%rbx
+	movq	%rbx, 56(%rdi)
+	shldq	$63, %rcx, %rsi
+	movq	%rsi, 48(%rdi)
+	shldq	$63, %rdx, %rcx
+	movq	%rcx, 40(%rdi)
+	shldq	$63, %rax, %rdx
+	movq	%rdx, 32(%rdi)
+	shldq	$63, %r11, %rax
+	movq	%rax, 24(%rdi)
+	shldq	$63, %r10, %r11
+	movq	%r11, 16(%rdi)
+	shldq	$63, %r8, %r10
+	movq	%r10, 8(%rdi)
 	shrdq	$1, %r8, %r9
-	movq	%r9, 56(%rdi)
-	shrq	%r8
-	movq	%r8, 64(%rdi)
+	movq	%r9, (%rdi)
 	popq	%rbx
 	retq
-.Lfunc_end135:
-	.size	mcl_fp_shr1_9Lbmi2, .Lfunc_end135-mcl_fp_shr1_9Lbmi2
-
-	.globl	mcl_fp_add9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add9Lbmi2,@function
-mcl_fp_add9Lbmi2:                       # @mcl_fp_add9Lbmi2
-# BB#0:
-	pushq	%r15
+.Lfunc_end66:
+	.size	mcl_fp_shr1_8Lbmi2, .Lfunc_end66-mcl_fp_shr1_8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_add8Lbmi2                # -- Begin function mcl_fp_add8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_add8Lbmi2,@function
+mcl_fp_add8Lbmi2:                       # @mcl_fp_add8Lbmi2
+# %bb.0:
 	pushq	%r14
-	pushq	%r13
-	pushq	%r12
 	pushq	%rbx
-	movq	64(%rdx), %r12
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %r13
+	movq	56(%rsi), %r8
 	movq	48(%rsi), %r9
 	movq	40(%rsi), %r10
-	movq	24(%rsi), %r14
 	movq	32(%rsi), %r11
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %r15
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %r15
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
+	movq	24(%rsi), %r14
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rbx
 	adcq	24(%rdx), %r14
 	adcq	32(%rdx), %r11
 	adcq	40(%rdx), %r10
-	movq	56(%rdx), %rsi
 	adcq	48(%rdx), %r9
-	movq	%rbx, (%rdi)
-	movq	%r15, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r14, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r10, 40(%rdi)
+	adcq	56(%rdx), %r8
+	movq	%r8, 56(%rdi)
 	movq	%r9, 48(%rdi)
-	adcq	%r13, %rsi
-	movq	%rsi, 56(%rdi)
-	adcq	%r12, %r8
-	movq	%r8, 64(%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %rbx
-	sbbq	8(%rcx), %r15
-	sbbq	16(%rcx), %rax
+	movq	%r10, 40(%rdi)
+	movq	%r11, 32(%rdi)
+	movq	%r14, 24(%rdi)
+	movq	%rbx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rax, (%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %rbx
 	sbbq	24(%rcx), %r14
 	sbbq	32(%rcx), %r11
 	sbbq	40(%rcx), %r10
 	sbbq	48(%rcx), %r9
-	sbbq	56(%rcx), %rsi
-	sbbq	64(%rcx), %r8
+	sbbq	56(%rcx), %r8
 	sbbq	$0, %rdx
 	testb	$1, %dl
-	jne	.LBB136_2
-# BB#1:                                 # %nocarry
-	movq	%rbx, (%rdi)
-	movq	%r15, 8(%rdi)
-	movq	%rax, 16(%rdi)
+	jne	.LBB67_2
+# %bb.1:                                # %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rbx, 16(%rdi)
 	movq	%r14, 24(%rdi)
 	movq	%r11, 32(%rdi)
 	movq	%r10, 40(%rdi)
 	movq	%r9, 48(%rdi)
-	movq	%rsi, 56(%rdi)
-	movq	%r8, 64(%rdi)
-.LBB136_2:                              # %carry
+	movq	%r8, 56(%rdi)
+.LBB67_2:                               # %carry
 	popq	%rbx
-	popq	%r12
-	popq	%r13
 	popq	%r14
-	popq	%r15
 	retq
-.Lfunc_end136:
-	.size	mcl_fp_add9Lbmi2, .Lfunc_end136-mcl_fp_add9Lbmi2
-
-	.globl	mcl_fp_addNF9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF9Lbmi2,@function
-mcl_fp_addNF9Lbmi2:                     # @mcl_fp_addNF9Lbmi2
-# BB#0:
+.Lfunc_end67:
+	.size	mcl_fp_add8Lbmi2, .Lfunc_end67-mcl_fp_add8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addNF8Lbmi2              # -- Begin function mcl_fp_addNF8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF8Lbmi2,@function
+mcl_fp_addNF8Lbmi2:                     # @mcl_fp_addNF8Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, %r8
-	movq	64(%rdx), %r10
-	movq	56(%rdx), %r11
+	movq	56(%rdx), %r8
 	movq	48(%rdx), %r9
-	movq	40(%rdx), %rax
-	movq	32(%rdx), %rdi
-	movq	24(%rdx), %rbp
-	movq	16(%rdx), %r15
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %r13
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %r13
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %rbp
-	movq	%rbp, -40(%rsp)         # 8-byte Spill
-	adcq	32(%rsi), %rdi
-	movq	%rdi, -16(%rsp)         # 8-byte Spill
-	adcq	40(%rsi), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
+	movq	40(%rdx), %r10
+	movq	32(%rdx), %r11
+	movq	24(%rdx), %r15
+	movq	16(%rdx), %rbx
+	movq	(%rdx), %rax
+	movq	8(%rdx), %rdx
+	addq	(%rsi), %rax
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	adcq	8(%rsi), %rdx
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	adcq	16(%rsi), %rbx
+	movq	%rbx, -24(%rsp)                 # 8-byte Spill
+	adcq	24(%rsi), %r15
+	adcq	32(%rsi), %r11
+	adcq	40(%rsi), %r10
 	adcq	48(%rsi), %r9
-	movq	%r9, -32(%rsp)          # 8-byte Spill
-	movq	%r9, %rdi
-	adcq	56(%rsi), %r11
-	movq	%r11, -24(%rsp)         # 8-byte Spill
-	movq	%r11, %rax
-	adcq	64(%rsi), %r10
-	movq	%r10, %r9
-	movq	%rbx, %rsi
+	adcq	56(%rsi), %r8
+	movq	%rax, %rsi
 	subq	(%rcx), %rsi
-	movq	%r13, %rdx
 	sbbq	8(%rcx), %rdx
-	movq	%r15, %r12
-	sbbq	16(%rcx), %r12
-	sbbq	24(%rcx), %rbp
-	movq	-16(%rsp), %r14         # 8-byte Reload
-	sbbq	32(%rcx), %r14
-	movq	-8(%rsp), %r11          # 8-byte Reload
-	sbbq	40(%rcx), %r11
-	movq	%rdi, %r10
-	sbbq	48(%rcx), %r10
-	movq	%rax, %rdi
-	sbbq	56(%rcx), %rdi
-	movq	%r9, %rax
-	sbbq	64(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%rbx, %rsi
-	movq	%rsi, (%r8)
-	cmovsq	%r13, %rdx
-	movq	%rdx, 8(%r8)
-	cmovsq	%r15, %r12
-	movq	%r12, 16(%r8)
-	cmovsq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 24(%r8)
-	cmovsq	-16(%rsp), %r14         # 8-byte Folded Reload
-	movq	%r14, 32(%r8)
-	cmovsq	-8(%rsp), %r11          # 8-byte Folded Reload
-	movq	%r11, 40(%r8)
-	cmovsq	-32(%rsp), %r10         # 8-byte Folded Reload
-	movq	%r10, 48(%r8)
-	cmovsq	-24(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, 56(%r8)
-	cmovsq	%r9, %rax
-	movq	%rax, 64(%r8)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end137:
-	.size	mcl_fp_addNF9Lbmi2, .Lfunc_end137-mcl_fp_addNF9Lbmi2
-
-	.globl	mcl_fp_sub9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub9Lbmi2,@function
-mcl_fp_sub9Lbmi2:                       # @mcl_fp_sub9Lbmi2
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	64(%rdx), %r13
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r9
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r9
-	movq	16(%rsi), %r10
-	sbbq	16(%rdx), %r10
-	movq	24(%rsi), %r11
-	sbbq	24(%rdx), %r11
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	40(%rsi), %r14
-	sbbq	40(%rdx), %r14
-	movq	48(%rsi), %r15
-	sbbq	48(%rdx), %r15
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %rsi
-	sbbq	56(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r9, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r12, 32(%rdi)
+	sbbq	16(%rcx), %rbx
+	movq	%r15, %rax
+	sbbq	24(%rcx), %rax
+	movq	%r11, %rbp
+	sbbq	32(%rcx), %rbp
+	movq	%r10, %r14
+	sbbq	40(%rcx), %r14
+	movq	%r9, %r12
+	sbbq	48(%rcx), %r12
+	movq	%r8, %r13
+	sbbq	56(%rcx), %r13
+	cmovsq	%r8, %r13
+	movq	%r13, 56(%rdi)
+	cmovsq	%r9, %r12
+	movq	%r12, 48(%rdi)
+	cmovsq	%r10, %r14
 	movq	%r14, 40(%rdi)
-	movq	%r15, 48(%rdi)
-	movq	%rsi, 56(%rdi)
-	sbbq	%r13, %r8
-	movq	%r8, 64(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	.LBB138_2
-# BB#1:                                 # %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	movq	8(%rcx), %rax
-	adcq	%r9, %rax
-	movq	%rax, 8(%rdi)
-	movq	16(%rcx), %rax
-	adcq	%r10, %rax
-	movq	%rax, 16(%rdi)
-	movq	24(%rcx), %rax
-	adcq	%r11, %rax
+	cmovsq	%r11, %rbp
+	movq	%rbp, 32(%rdi)
+	cmovsq	%r15, %rax
 	movq	%rax, 24(%rdi)
-	movq	32(%rcx), %rax
-	adcq	%r12, %rax
-	movq	%rax, 32(%rdi)
-	movq	40(%rcx), %rax
-	adcq	%r14, %rax
-	movq	%rax, 40(%rdi)
-	movq	48(%rcx), %rax
-	adcq	%r15, %rax
-	movq	%rax, 48(%rdi)
-	movq	56(%rcx), %rax
-	adcq	%rsi, %rax
-	movq	%rax, 56(%rdi)
-	movq	64(%rcx), %rax
-	adcq	%r8, %rax
-	movq	%rax, 64(%rdi)
-.LBB138_2:                              # %nocarry
+	cmovsq	-24(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%rbx, 16(%rdi)
+	cmovsq	-16(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	%rdx, 8(%rdi)
+	cmovsq	-8(%rsp), %rsi                  # 8-byte Folded Reload
+	movq	%rsi, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
+	retq
+.Lfunc_end68:
+	.size	mcl_fp_addNF8Lbmi2, .Lfunc_end68-mcl_fp_addNF8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_sub8Lbmi2                # -- Begin function mcl_fp_sub8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_sub8Lbmi2,@function
+mcl_fp_sub8Lbmi2:                       # @mcl_fp_sub8Lbmi2
+# %bb.0:
+	pushq	%r15
+	pushq	%r14
+	pushq	%rbx
+	movq	56(%rsi), %r14
+	movq	48(%rsi), %rbx
+	movq	40(%rsi), %r11
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r15
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %r15
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r11
+	sbbq	48(%rdx), %rbx
+	sbbq	56(%rdx), %r14
+	movq	%r14, 56(%rdi)
+	movq	%rbx, 48(%rdi)
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r15, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
+	testb	$1, %al
+	je	.LBB69_2
+# %bb.1:                                # %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %r15
+	adcq	24(%rcx), %r9
+	adcq	32(%rcx), %r10
+	adcq	40(%rcx), %r11
+	adcq	48(%rcx), %rbx
+	adcq	56(%rcx), %r14
+	movq	%r14, 56(%rdi)
+	movq	%rbx, 48(%rdi)
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r15, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+.LBB69_2:                               # %nocarry
+	popq	%rbx
+	popq	%r14
+	popq	%r15
 	retq
-.Lfunc_end138:
-	.size	mcl_fp_sub9Lbmi2, .Lfunc_end138-mcl_fp_sub9Lbmi2
-
-	.globl	mcl_fp_subNF9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF9Lbmi2,@function
-mcl_fp_subNF9Lbmi2:                     # @mcl_fp_subNF9Lbmi2
-# BB#0:
+.Lfunc_end69:
+	.size	mcl_fp_sub8Lbmi2, .Lfunc_end69-mcl_fp_sub8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subNF8Lbmi2              # -- Begin function mcl_fp_subNF8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF8Lbmi2,@function
+mcl_fp_subNF8Lbmi2:                     # @mcl_fp_subNF8Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
@@ -13824,68 +6716,59 @@ mcl_fp_subNF9Lbmi2:                     # @mcl_fp_subNF9Lbmi2
 	pushq	%r12
 	pushq	%rbx
 	movq	%rcx, %r8
-	movq	%rdi, %r10
-	movq	64(%rsi), %r14
-	movq	56(%rsi), %rax
-	movq	48(%rsi), %rcx
-	movq	40(%rsi), %rdi
-	movq	32(%rsi), %rbp
-	movq	24(%rsi), %rbx
+	movq	%rdi, %r9
+	movq	56(%rsi), %r14
+	movq	48(%rsi), %rax
+	movq	40(%rsi), %rcx
+	movq	32(%rsi), %rdi
+	movq	24(%rsi), %r11
 	movq	16(%rsi), %r15
 	movq	(%rsi), %r13
 	movq	8(%rsi), %r12
 	subq	(%rdx), %r13
 	sbbq	8(%rdx), %r12
 	sbbq	16(%rdx), %r15
-	sbbq	24(%rdx), %rbx
-	movq	%rbx, -40(%rsp)         # 8-byte Spill
-	sbbq	32(%rdx), %rbp
-	movq	%rbp, -32(%rsp)         # 8-byte Spill
-	sbbq	40(%rdx), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	sbbq	48(%rdx), %rcx
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	sbbq	56(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	sbbq	64(%rdx), %r14
-	movq	%r14, %rdx
-	sarq	$63, %rdx
-	movq	%rdx, %rbp
-	shldq	$1, %r14, %rbp
-	movq	24(%r8), %rbx
-	andq	%rbp, %rbx
-	movq	8(%r8), %rdi
-	andq	%rbp, %rdi
-	andq	(%r8), %rbp
-	movq	64(%r8), %r11
-	andq	%rdx, %r11
-	rorxq	$63, %rdx, %rax
-	andq	56(%r8), %rdx
-	movq	48(%r8), %r9
-	andq	%rax, %r9
-	movq	40(%r8), %rsi
-	andq	%rax, %rsi
-	movq	32(%r8), %rcx
-	andq	%rax, %rcx
-	andq	16(%r8), %rax
-	addq	%r13, %rbp
-	adcq	%r12, %rdi
-	movq	%rbp, (%r10)
-	adcq	%r15, %rax
-	movq	%rdi, 8(%r10)
-	adcq	-40(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rax, 16(%r10)
-	movq	%rbx, 24(%r10)
-	adcq	-32(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 32(%r10)
-	adcq	-24(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 40(%r10)
-	adcq	-16(%rsp), %r9          # 8-byte Folded Reload
-	movq	%r9, 48(%r10)
-	adcq	-8(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 56(%r10)
-	adcq	%r14, %r11
-	movq	%r11, 64(%r10)
+	sbbq	24(%rdx), %r11
+	sbbq	32(%rdx), %rdi
+	movq	%rdi, -24(%rsp)                 # 8-byte Spill
+	sbbq	40(%rdx), %rcx
+	movq	%rcx, -16(%rsp)                 # 8-byte Spill
+	sbbq	48(%rdx), %rax
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	sbbq	56(%rdx), %r14
+	movq	%r14, %rsi
+	sarq	$63, %rsi
+	movq	56(%r8), %r10
+	andq	%rsi, %r10
+	movq	48(%r8), %rbx
+	andq	%rsi, %rbx
+	movq	40(%r8), %rdi
+	andq	%rsi, %rdi
+	movq	32(%r8), %rbp
+	andq	%rsi, %rbp
+	movq	24(%r8), %rdx
+	andq	%rsi, %rdx
+	movq	16(%r8), %rcx
+	andq	%rsi, %rcx
+	movq	8(%r8), %rax
+	andq	%rsi, %rax
+	andq	(%r8), %rsi
+	addq	%r13, %rsi
+	adcq	%r12, %rax
+	movq	%rsi, (%r9)
+	adcq	%r15, %rcx
+	movq	%rax, 8(%r9)
+	movq	%rcx, 16(%r9)
+	adcq	%r11, %rdx
+	movq	%rdx, 24(%r9)
+	adcq	-24(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%rbp, 32(%r9)
+	adcq	-16(%rsp), %rdi                 # 8-byte Folded Reload
+	movq	%rdi, 40(%r9)
+	adcq	-8(%rsp), %rbx                  # 8-byte Folded Reload
+	movq	%rbx, 48(%r9)
+	adcq	%r14, %r10
+	movq	%r10, 56(%r9)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13893,14 +6776,14 @@ mcl_fp_subNF9Lbmi2:                     # @mcl_fp_subNF9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end139:
-	.size	mcl_fp_subNF9Lbmi2, .Lfunc_end139-mcl_fp_subNF9Lbmi2
-
-	.globl	mcl_fpDbl_add9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add9Lbmi2,@function
-mcl_fpDbl_add9Lbmi2:                    # @mcl_fpDbl_add9Lbmi2
-# BB#0:
+.Lfunc_end70:
+	.size	mcl_fp_subNF8Lbmi2, .Lfunc_end70-mcl_fp_subNF8Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_add8Lbmi2             # -- Begin function mcl_fpDbl_add8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add8Lbmi2,@function
+mcl_fpDbl_add8Lbmi2:                    # @mcl_fpDbl_add8Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
@@ -13908,111 +6791,103 @@ mcl_fpDbl_add9Lbmi2:                    # @mcl_fpDbl_add9Lbmi2
 	pushq	%r12
 	pushq	%rbx
 	movq	%rcx, %r15
-	movq	136(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	128(%rdx), %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	120(%rdx), %r10
-	movq	112(%rdx), %r11
-	movq	24(%rsi), %rcx
-	movq	32(%rsi), %r14
-	movq	16(%rdx), %rbp
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %rbp
-	adcq	24(%rdx), %rcx
-	adcq	32(%rdx), %r14
-	movq	104(%rdx), %r9
-	movq	96(%rdx), %r13
-	movq	%rax, (%rdi)
-	movq	88(%rdx), %r8
-	movq	%rbx, 8(%rdi)
-	movq	80(%rdx), %r12
-	movq	%rbp, 16(%rdi)
-	movq	40(%rdx), %rax
-	movq	%rcx, 24(%rdi)
-	movq	40(%rsi), %rbp
-	adcq	%rax, %rbp
-	movq	48(%rdx), %rcx
-	movq	%r14, 32(%rdi)
-	movq	48(%rsi), %rax
-	adcq	%rcx, %rax
-	movq	56(%rdx), %r14
-	movq	%rbp, 40(%rdi)
-	movq	56(%rsi), %rbp
-	adcq	%r14, %rbp
-	movq	72(%rdx), %rcx
-	movq	64(%rdx), %rdx
-	movq	%rax, 48(%rdi)
+	movq	120(%rsi), %rax
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	movq	112(%rsi), %rax
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	104(%rsi), %rax
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	96(%rsi), %rbx
+	movq	88(%rsi), %rcx
+	movq	80(%rsi), %r8
+	movq	72(%rsi), %r10
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rbp
+	addq	(%rdx), %rax
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	adcq	8(%rdx), %rbp
+	movq	%rbp, -16(%rsp)                 # 8-byte Spill
 	movq	64(%rsi), %rax
-	adcq	%rdx, %rax
-	movq	136(%rsi), %rbx
+	movq	56(%rsi), %rbp
+	movq	48(%rsi), %r13
+	movq	40(%rsi), %r14
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %r12
+	adcq	16(%rdx), %r12
+	adcq	24(%rdx), %r11
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r14
+	adcq	48(%rdx), %r13
+	adcq	56(%rdx), %rbp
+	adcq	64(%rdx), %rax
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	adcq	72(%rdx), %r10
+	movq	%r8, %rax
+	adcq	80(%rdx), %rax
+	movq	%rax, -24(%rsp)                 # 8-byte Spill
+	adcq	88(%rdx), %rcx
+	movq	%rcx, -32(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rsi
+	adcq	96(%rdx), %rsi
+	movq	%rsi, -40(%rsp)                 # 8-byte Spill
+	movq	-56(%rsp), %r8                  # 8-byte Reload
+	adcq	104(%rdx), %r8
+	movq	%r8, -56(%rsp)                  # 8-byte Spill
+	movq	-64(%rsp), %rbx                 # 8-byte Reload
+	adcq	112(%rdx), %rbx
+	movq	%rbx, -64(%rsp)                 # 8-byte Spill
+	movq	-72(%rsp), %r8                  # 8-byte Reload
+	adcq	120(%rdx), %r8
 	movq	%rbp, 56(%rdi)
-	movq	72(%rsi), %rbp
-	adcq	%rcx, %rbp
-	movq	128(%rsi), %rcx
-	movq	%rax, 64(%rdi)
-	movq	80(%rsi), %rdx
-	adcq	%r12, %rdx
-	movq	88(%rsi), %r12
-	adcq	%r8, %r12
-	movq	96(%rsi), %r14
-	adcq	%r13, %r14
-	movq	%r14, -48(%rsp)         # 8-byte Spill
-	movq	104(%rsi), %rax
-	adcq	%r9, %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	120(%rsi), %rax
-	movq	112(%rsi), %rsi
-	adcq	%r11, %rsi
-	movq	%rsi, -24(%rsp)         # 8-byte Spill
-	adcq	%r10, %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	adcq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -40(%rsp)         # 8-byte Spill
-	adcq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, -8(%rsp)          # 8-byte Spill
-	sbbq	%r9, %r9
-	andl	$1, %r9d
-	movq	%rbp, %r10
-	subq	(%r15), %r10
-	movq	%rdx, %r11
-	sbbq	8(%r15), %r11
-	movq	%r12, %rbx
-	sbbq	16(%r15), %rbx
-	sbbq	24(%r15), %r14
-	movq	-32(%rsp), %r13         # 8-byte Reload
-	sbbq	32(%r15), %r13
-	movq	-24(%rsp), %rsi         # 8-byte Reload
-	sbbq	40(%r15), %rsi
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	sbbq	48(%r15), %rax
-	sbbq	56(%r15), %rcx
-	movq	-8(%rsp), %r8           # 8-byte Reload
-	sbbq	64(%r15), %r8
-	sbbq	$0, %r9
-	andl	$1, %r9d
-	cmovneq	%rbp, %r10
-	movq	%r10, 72(%rdi)
-	testb	%r9b, %r9b
-	cmovneq	%rdx, %r11
+	movq	%r13, 48(%rdi)
+	movq	%r14, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r11, 24(%rdi)
+	movq	%r12, 16(%rdi)
+	movq	-16(%rsp), %rdx                 # 8-byte Reload
+	movq	%rdx, 8(%rdi)
+	movq	-8(%rsp), %rdx                  # 8-byte Reload
+	movq	%rdx, (%rdi)
+	setb	-72(%rsp)                       # 1-byte Folded Spill
+	movq	-48(%rsp), %r14                 # 8-byte Reload
+	subq	(%r15), %r14
+	movq	%r10, %r9
+	movq	%r10, %r13
+	sbbq	8(%r15), %r9
+	movq	%rax, %r11
+	sbbq	16(%r15), %r11
+	movq	%rcx, %rbp
+	sbbq	24(%r15), %rbp
+	movq	%rsi, %rbx
+	sbbq	32(%r15), %rbx
+	movq	-56(%rsp), %r12                 # 8-byte Reload
+	movq	%r12, %rax
+	sbbq	40(%r15), %rax
+	movq	-64(%rsp), %r10                 # 8-byte Reload
+	movq	%r10, %rdx
+	sbbq	48(%r15), %rdx
+	movq	%r8, %rsi
+	sbbq	56(%r15), %rsi
+	movzbl	-72(%rsp), %ecx                 # 1-byte Folded Reload
+	sbbq	$0, %rcx
+	testb	$1, %cl
+	cmovneq	%r8, %rsi
+	movq	%rsi, 120(%rdi)
+	cmovneq	%r10, %rdx
+	movq	%rdx, 112(%rdi)
+	cmovneq	%r12, %rax
+	movq	%rax, 104(%rdi)
+	cmovneq	-40(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%rbx, 96(%rdi)
+	cmovneq	-32(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%rbp, 88(%rdi)
+	cmovneq	-24(%rsp), %r11                 # 8-byte Folded Reload
 	movq	%r11, 80(%rdi)
-	cmovneq	%r12, %rbx
-	movq	%rbx, 88(%rdi)
-	cmovneq	-48(%rsp), %r14         # 8-byte Folded Reload
-	movq	%r14, 96(%rdi)
-	cmovneq	-32(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r13, 104(%rdi)
-	cmovneq	-24(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 112(%rdi)
-	cmovneq	-16(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, 120(%rdi)
-	cmovneq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 128(%rdi)
-	cmovneq	-8(%rsp), %r8           # 8-byte Folded Reload
-	movq	%r8, 136(%rdi)
+	cmovneq	%r13, %r9
+	movq	%r9, 72(%rdi)
+	cmovneq	-48(%rsp), %r14                 # 8-byte Folded Reload
+	movq	%r14, 64(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -14020,127 +6895,112 @@ mcl_fpDbl_add9Lbmi2:                    # @mcl_fpDbl_add9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end140:
-	.size	mcl_fpDbl_add9Lbmi2, .Lfunc_end140-mcl_fpDbl_add9Lbmi2
-
-	.globl	mcl_fpDbl_sub9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub9Lbmi2,@function
-mcl_fpDbl_sub9Lbmi2:                    # @mcl_fpDbl_sub9Lbmi2
-# BB#0:
+.Lfunc_end71:
+	.size	mcl_fpDbl_add8Lbmi2, .Lfunc_end71-mcl_fpDbl_add8Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sub8Lbmi2             # -- Begin function mcl_fpDbl_sub8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub8Lbmi2,@function
+mcl_fpDbl_sub8Lbmi2:                    # @mcl_fpDbl_sub8Lbmi2
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r14
-	movq	136(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	128(%rdx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	120(%rdx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %r11
-	movq	(%rsi), %r12
-	movq	8(%rsi), %r13
-	xorl	%r9d, %r9d
-	subq	(%rdx), %r12
-	sbbq	8(%rdx), %r13
-	sbbq	16(%rdx), %r11
+	movq	%rcx, %r11
+	movq	120(%rsi), %rax
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	112(%rsi), %r12
+	movq	104(%rsi), %r15
+	movq	96(%rsi), %rax
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	movq	88(%rsi), %r13
+	movq	80(%rsi), %rax
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %rbp
+	xorl	%eax, %eax
+	subq	(%rdx), %rcx
+	movq	%rcx, -32(%rsp)                 # 8-byte Spill
+	sbbq	8(%rdx), %rbp
+	movq	%rbp, -40(%rsp)                 # 8-byte Spill
+	movq	72(%rsi), %rbp
+	movq	64(%rsi), %rcx
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %r9
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r14
 	movq	24(%rsi), %rbx
+	movq	16(%rsi), %rsi
+	sbbq	16(%rdx), %rsi
 	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %rbp
-	sbbq	32(%rdx), %rbp
-	movq	112(%rdx), %r10
-	movq	104(%rdx), %rcx
-	movq	%r12, (%rdi)
-	movq	96(%rdx), %rax
-	movq	%r13, 8(%rdi)
-	movq	88(%rdx), %r13
-	movq	%r11, 16(%rdi)
-	movq	40(%rdx), %r11
+	sbbq	32(%rdx), %r14
+	sbbq	40(%rdx), %r10
+	sbbq	48(%rdx), %r9
+	sbbq	56(%rdx), %r8
+	sbbq	64(%rdx), %rcx
+	movq	%rcx, -24(%rsp)                 # 8-byte Spill
+	sbbq	72(%rdx), %rbp
+	movq	%rbp, -16(%rsp)                 # 8-byte Spill
+	movq	-56(%rsp), %rbp                 # 8-byte Reload
+	sbbq	80(%rdx), %rbp
+	movq	%rbp, -56(%rsp)                 # 8-byte Spill
+	sbbq	88(%rdx), %r13
+	movq	%r13, -8(%rsp)                  # 8-byte Spill
+	movq	-48(%rsp), %r13                 # 8-byte Reload
+	sbbq	96(%rdx), %r13
+	movq	%r13, -48(%rsp)                 # 8-byte Spill
+	sbbq	104(%rdx), %r15
+	sbbq	112(%rdx), %r12
+	movq	-64(%rsp), %rcx                 # 8-byte Reload
+	sbbq	120(%rdx), %rcx
+	movq	%rcx, -64(%rsp)                 # 8-byte Spill
+	movq	%r8, 56(%rdi)
+	movq	%r9, 48(%rdi)
+	movq	%r10, 40(%rdi)
+	movq	%r14, 32(%rdi)
 	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	48(%rdx), %r11
-	movq	%rbp, 32(%rdi)
-	movq	48(%rsi), %rbp
-	sbbq	%r11, %rbp
-	movq	56(%rdx), %r11
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	64(%rdx), %r11
-	movq	%rbp, 48(%rdi)
-	movq	64(%rsi), %rbp
-	sbbq	%r11, %rbp
-	movq	80(%rdx), %r8
-	movq	72(%rdx), %r11
-	movq	%rbx, 56(%rdi)
-	movq	72(%rsi), %r15
-	sbbq	%r11, %r15
-	movq	136(%rsi), %rdx
-	movq	%rbp, 64(%rdi)
-	movq	80(%rsi), %rbp
-	sbbq	%r8, %rbp
-	movq	88(%rsi), %r12
-	sbbq	%r13, %r12
-	movq	96(%rsi), %r13
-	sbbq	%rax, %r13
-	movq	104(%rsi), %rax
-	sbbq	%rcx, %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	112(%rsi), %rax
-	sbbq	%r10, %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	128(%rsi), %rax
-	movq	120(%rsi), %rcx
-	sbbq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	sbbq	-16(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	sbbq	-8(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movl	$0, %r8d
-	sbbq	$0, %r8
-	andl	$1, %r8d
-	movq	(%r14), %r10
-	cmoveq	%r9, %r10
-	testb	%r8b, %r8b
-	movq	16(%r14), %r8
-	cmoveq	%r9, %r8
-	movq	8(%r14), %rdx
-	cmoveq	%r9, %rdx
-	movq	64(%r14), %rbx
-	cmoveq	%r9, %rbx
-	movq	56(%r14), %r11
-	cmoveq	%r9, %r11
-	movq	48(%r14), %rsi
-	cmoveq	%r9, %rsi
-	movq	40(%r14), %rcx
-	cmoveq	%r9, %rcx
-	movq	32(%r14), %rax
-	cmoveq	%r9, %rax
-	cmovneq	24(%r14), %r9
-	addq	%r15, %r10
-	adcq	%rbp, %rdx
-	movq	%r10, 72(%rdi)
-	adcq	%r12, %r8
-	movq	%rdx, 80(%rdi)
-	adcq	%r13, %r9
-	movq	%r8, 88(%rdi)
-	movq	%r9, 96(%rdi)
-	adcq	-40(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, 104(%rdi)
-	adcq	-32(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 112(%rdi)
-	adcq	-24(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 120(%rdi)
-	adcq	-16(%rsp), %r11         # 8-byte Folded Reload
-	movq	%r11, 128(%rdi)
-	adcq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, 136(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	-40(%rsp), %rcx                 # 8-byte Reload
+	movq	%rcx, 8(%rdi)
+	movq	-32(%rsp), %rcx                 # 8-byte Reload
+	movq	%rcx, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	negq	%rax
+	movq	56(%r11), %r8
+	andq	%rax, %r8
+	movq	48(%r11), %r9
+	andq	%rax, %r9
+	movq	40(%r11), %r10
+	andq	%rax, %r10
+	movq	32(%r11), %rbx
+	andq	%rax, %rbx
+	movq	24(%r11), %rdx
+	andq	%rax, %rdx
+	movq	16(%r11), %rsi
+	andq	%rax, %rsi
+	movq	8(%r11), %rbp
+	andq	%rax, %rbp
+	andq	(%r11), %rax
+	addq	-24(%rsp), %rax                 # 8-byte Folded Reload
+	adcq	-16(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%rax, 64(%rdi)
+	adcq	-56(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	%rbp, 72(%rdi)
+	movq	%rsi, 80(%rdi)
+	adcq	-8(%rsp), %rdx                  # 8-byte Folded Reload
+	movq	%rdx, 88(%rdi)
+	adcq	-48(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%rbx, 96(%rdi)
+	adcq	%r15, %r10
+	movq	%r10, 104(%rdi)
+	adcq	%r12, %r9
+	movq	%r9, 112(%rdi)
+	adcq	-64(%rsp), %r8                  # 8-byte Folded Reload
+	movq	%r8, 120(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -14148,8 +7008,7 @@ mcl_fpDbl_sub9Lbmi2:                    # @mcl_fpDbl_sub9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end141:
-	.size	mcl_fpDbl_sub9Lbmi2, .Lfunc_end141-mcl_fpDbl_sub9Lbmi2
-
-
+.Lfunc_end72:
+	.size	mcl_fpDbl_sub8Lbmi2, .Lfunc_end72-mcl_fpDbl_sub8Lbmi2
+                                        # -- End function
 	.section	".note.GNU-stack","",@progbits
diff --git a/src/asm/x86-64.s b/src/asm/x86-64.s
index aa677d2e..eb892dcb 100644
--- a/src/asm/x86-64.s
+++ b/src/asm/x86-64.s
@@ -1,80 +1,82 @@
 	.text
-	.file	"<stdin>"
-	.globl	makeNIST_P192L
-	.align	16, 0x90
+	.file	"base64.ll"
+	.globl	makeNIST_P192L                  # -- Begin function makeNIST_P192L
+	.p2align	4, 0x90
 	.type	makeNIST_P192L,@function
 makeNIST_P192L:                         # @makeNIST_P192L
-# BB#0:
+# %bb.0:
 	movq	$-1, %rax
 	movq	$-2, %rdx
 	movq	$-1, %rcx
 	retq
 .Lfunc_end0:
 	.size	makeNIST_P192L, .Lfunc_end0-makeNIST_P192L
-
-	.globl	mcl_fpDbl_mod_NIST_P192L
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fpDbl_mod_NIST_P192L        # -- Begin function mcl_fpDbl_mod_NIST_P192L
+	.p2align	4, 0x90
 	.type	mcl_fpDbl_mod_NIST_P192L,@function
 mcl_fpDbl_mod_NIST_P192L:               # @mcl_fpDbl_mod_NIST_P192L
-# BB#0:
+# %bb.0:
 	pushq	%r14
 	pushq	%rbx
-	movq	16(%rsi), %r10
+	movq	16(%rsi), %rbx
 	movq	24(%rsi), %r8
 	movq	40(%rsi), %r9
-	movq	8(%rsi), %rax
-	addq	%r9, %rax
-	adcq	$0, %r10
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
+	movq	8(%rsi), %rdx
+	addq	%r9, %rdx
+	adcq	$0, %rbx
+	setb	%cl
+	movzbl	%cl, %r10d
 	movq	32(%rsi), %r11
 	movq	(%rsi), %r14
 	addq	%r8, %r14
-	adcq	%r11, %rax
-	adcq	%r9, %r10
-	adcq	$0, %rcx
-	addq	%r9, %r14
-	adcq	%r8, %rax
-	adcq	%r11, %r10
-	adcq	$0, %rcx
-	addq	%rcx, %r14
-	adcq	%rax, %rcx
+	adcq	%r11, %rdx
+	adcq	%r9, %rbx
 	adcq	$0, %r10
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r14, %rsi
-	addq	$1, %rsi
-	movq	%rcx, %rdx
-	adcq	$1, %rdx
-	movq	%r10, %rbx
+	addq	%r9, %r14
+	adcq	%r8, %rdx
+	adcq	%r11, %rbx
+	setb	%r8b
+	movq	%r10, %r9
+	adcq	$0, %r9
+	addb	$255, %r8b
+	adcq	%r10, %r14
+	adcq	%rdx, %r9
 	adcq	$0, %rbx
-	adcq	$-1, %rax
-	andl	$1, %eax
-	cmovneq	%r14, %rsi
-	movq	%rsi, (%rdi)
-	testb	%al, %al
-	cmovneq	%rcx, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovneq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	movq	%r14, %rcx
+	addq	$1, %rcx
+	movq	%r9, %rsi
+	adcq	$1, %rsi
+	movq	%rbx, %rax
+	adcq	$0, %rax
+	adcq	$-1, %rdx
+	testb	$1, %dl
+	cmovneq	%rbx, %rax
+	movq	%rax, 16(%rdi)
+	cmovneq	%r9, %rsi
+	movq	%rsi, 8(%rdi)
+	cmovneq	%r14, %rcx
+	movq	%rcx, (%rdi)
 	popq	%rbx
 	popq	%r14
 	retq
 .Lfunc_end1:
 	.size	mcl_fpDbl_mod_NIST_P192L, .Lfunc_end1-mcl_fpDbl_mod_NIST_P192L
-
-	.globl	mcl_fp_sqr_NIST_P192L
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fp_sqr_NIST_P192L           # -- Begin function mcl_fp_sqr_NIST_P192L
+	.p2align	4, 0x90
 	.type	mcl_fp_sqr_NIST_P192L,@function
 mcl_fp_sqr_NIST_P192L:                  # @mcl_fp_sqr_NIST_P192L
-# BB#0:
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
 	movq	16(%rsi), %r11
 	movq	(%rsi), %rbx
 	movq	8(%rsi), %rcx
@@ -90,7 +92,7 @@ mcl_fp_sqr_NIST_P192L:                  # @mcl_fp_sqr_NIST_P192L
 	mulq	%rbx
 	movq	%rax, %r13
 	movq	%rdx, %rcx
-	addq	%rcx, %r12
+	addq	%rdx, %r12
 	adcq	%r14, %r15
 	movq	%rdi, %r10
 	adcq	$0, %r10
@@ -121,37 +123,39 @@ mcl_fp_sqr_NIST_P192L:                  # @mcl_fp_sqr_NIST_P192L
 	adcq	$0, %rdx
 	addq	%rdx, %rsi
 	adcq	$0, %rcx
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
+	setb	%bl
+	movzbl	%bl, %edi
 	addq	%r9, %r8
 	adcq	%rax, %rsi
 	adcq	%rdx, %rcx
-	adcq	$0, %rbp
+	adcq	$0, %rdi
 	addq	%rdx, %r8
 	adcq	%r9, %rsi
 	adcq	%rax, %rcx
-	adcq	$0, %rbp
-	addq	%rbp, %r8
-	adcq	%rsi, %rbp
+	setb	%al
+	movq	%rdi, %rdx
+	adcq	$0, %rdx
+	addb	$255, %al
+	adcq	%rdi, %r8
+	adcq	%rsi, %rdx
 	adcq	$0, %rcx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r8, %rdx
-	addq	$1, %rdx
-	movq	%rbp, %rsi
-	adcq	$1, %rsi
-	movq	%rcx, %rdi
-	adcq	$0, %rdi
+	setb	%al
+	movzbl	%al, %eax
+	movq	%r8, %rsi
+	addq	$1, %rsi
+	movq	%rdx, %rdi
+	adcq	$1, %rdi
+	movq	%rcx, %rbp
+	adcq	$0, %rbp
 	adcq	$-1, %rax
-	andl	$1, %eax
-	cmovneq	%r8, %rdx
-	movq	-8(%rsp), %rbx          # 8-byte Reload
-	movq	%rdx, (%rbx)
-	testb	%al, %al
-	cmovneq	%rbp, %rsi
-	movq	%rsi, 8(%rbx)
-	cmovneq	%rcx, %rdi
-	movq	%rdi, 16(%rbx)
+	testb	$1, %al
+	cmovneq	%rcx, %rbp
+	movq	-8(%rsp), %rax                  # 8-byte Reload
+	movq	%rbp, 16(%rax)
+	cmovneq	%rdx, %rdi
+	movq	%rdi, 8(%rax)
+	cmovneq	%r8, %rsi
+	movq	%rsi, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -161,68 +165,70 @@ mcl_fp_sqr_NIST_P192L:                  # @mcl_fp_sqr_NIST_P192L
 	retq
 .Lfunc_end2:
 	.size	mcl_fp_sqr_NIST_P192L, .Lfunc_end2-mcl_fp_sqr_NIST_P192L
-
-	.globl	mcl_fp_mulNIST_P192L
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fp_mulNIST_P192L            # -- Begin function mcl_fp_mulNIST_P192L
+	.p2align	4, 0x90
 	.type	mcl_fp_mulNIST_P192L,@function
 mcl_fp_mulNIST_P192L:                   # @mcl_fp_mulNIST_P192L
-# BB#0:
+# %bb.0:
 	pushq	%r14
 	pushq	%rbx
 	subq	$56, %rsp
 	movq	%rdi, %r14
 	leaq	8(%rsp), %rdi
 	callq	mcl_fpDbl_mulPre3L@PLT
-	movq	24(%rsp), %r9
+	movq	24(%rsp), %rbx
 	movq	32(%rsp), %r8
-	movq	48(%rsp), %rdi
-	movq	16(%rsp), %rbx
-	addq	%rdi, %rbx
-	adcq	$0, %r9
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	movq	40(%rsp), %rsi
-	movq	8(%rsp), %rdx
-	addq	%r8, %rdx
-	adcq	%rsi, %rbx
-	adcq	%rdi, %r9
+	movq	48(%rsp), %rax
+	movq	16(%rsp), %rdi
+	addq	%rax, %rdi
+	adcq	$0, %rbx
+	setb	%cl
+	movzbl	%cl, %esi
+	movq	40(%rsp), %rdx
+	movq	8(%rsp), %r9
+	addq	%r8, %r9
+	adcq	%rdx, %rdi
+	adcq	%rax, %rbx
+	adcq	$0, %rsi
+	addq	%rax, %r9
+	adcq	%r8, %rdi
+	adcq	%rdx, %rbx
+	setb	%dl
+	movq	%rsi, %rcx
 	adcq	$0, %rcx
-	addq	%rdi, %rdx
-	adcq	%r8, %rbx
+	addb	$255, %dl
 	adcq	%rsi, %r9
-	adcq	$0, %rcx
-	addq	%rcx, %rdx
-	adcq	%rbx, %rcx
-	adcq	$0, %r9
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rdx, %rdi
+	adcq	%rdi, %rcx
+	adcq	$0, %rbx
+	setb	%dl
+	movzbl	%dl, %edx
+	movq	%r9, %rdi
 	addq	$1, %rdi
-	movq	%rcx, %rbx
-	adcq	$1, %rbx
-	movq	%r9, %rax
+	movq	%rcx, %rsi
+	adcq	$1, %rsi
+	movq	%rbx, %rax
 	adcq	$0, %rax
-	adcq	$-1, %rsi
-	andl	$1, %esi
-	cmovneq	%rdx, %rdi
-	movq	%rdi, (%r14)
-	testb	%sil, %sil
-	cmovneq	%rcx, %rbx
-	movq	%rbx, 8(%r14)
-	cmovneq	%r9, %rax
+	adcq	$-1, %rdx
+	testb	$1, %dl
+	cmovneq	%rbx, %rax
 	movq	%rax, 16(%r14)
+	cmovneq	%rcx, %rsi
+	movq	%rsi, 8(%r14)
+	cmovneq	%r9, %rdi
+	movq	%rdi, (%r14)
 	addq	$56, %rsp
 	popq	%rbx
 	popq	%r14
 	retq
 .Lfunc_end3:
 	.size	mcl_fp_mulNIST_P192L, .Lfunc_end3-mcl_fp_mulNIST_P192L
-
-	.globl	mcl_fpDbl_mod_NIST_P521L
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fpDbl_mod_NIST_P521L        # -- Begin function mcl_fpDbl_mod_NIST_P521L
+	.p2align	4, 0x90
 	.type	mcl_fpDbl_mod_NIST_P521L,@function
 mcl_fpDbl_mod_NIST_P521L:               # @mcl_fpDbl_mod_NIST_P521L
-# BB#0:
+# %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r12
@@ -246,7 +252,8 @@ mcl_fpDbl_mod_NIST_P521L:               # @mcl_fpDbl_mod_NIST_P521L
 	shldq	$55, %rax, %rcx
 	shrq	$9, %r14
 	shldq	$55, %rbx, %rax
-	andl	$511, %ebx              # imm = 0x1FF
+	movl	%ebx, %edx
+	andl	$511, %edx                      # imm = 0x1FF
 	addq	(%rsi), %rax
 	adcq	8(%rsi), %rcx
 	adcq	16(%rsi), %r12
@@ -255,9 +262,9 @@ mcl_fpDbl_mod_NIST_P521L:               # @mcl_fpDbl_mod_NIST_P521L
 	adcq	40(%rsi), %r10
 	adcq	48(%rsi), %r9
 	adcq	56(%rsi), %r8
-	adcq	%r14, %rbx
-	movq	%rbx, %rsi
-	shrq	$9, %rsi
+	adcq	%r14, %rdx
+	movl	%edx, %esi
+	shrl	$9, %esi
 	andl	$1, %esi
 	addq	%rax, %rsi
 	adcq	$0, %rcx
@@ -267,7 +274,7 @@ mcl_fpDbl_mod_NIST_P521L:               # @mcl_fpDbl_mod_NIST_P521L
 	adcq	$0, %r10
 	adcq	$0, %r9
 	adcq	$0, %r8
-	adcq	$0, %rbx
+	adcq	$0, %rdx
 	movq	%rsi, %rax
 	andq	%r12, %rax
 	andq	%r15, %rax
@@ -275,23 +282,23 @@ mcl_fpDbl_mod_NIST_P521L:               # @mcl_fpDbl_mod_NIST_P521L
 	andq	%r10, %rax
 	andq	%r9, %rax
 	andq	%r8, %rax
-	movq	%rbx, %rdx
-	orq	$-512, %rdx             # imm = 0xFFFFFFFFFFFFFE00
-	andq	%rax, %rdx
-	andq	%rcx, %rdx
-	cmpq	$-1, %rdx
+	movq	%rdx, %rbx
+	orq	$-512, %rbx                     # imm = 0xFE00
+	andq	%rax, %rbx
+	andq	%rcx, %rbx
+	cmpq	$-1, %rbx
 	je	.LBB4_1
-# BB#3:                                 # %nonzero
-	movq	%rsi, (%rdi)
-	movq	%rcx, 8(%rdi)
-	movq	%r12, 16(%rdi)
-	movq	%r15, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r10, 40(%rdi)
-	movq	%r9, 48(%rdi)
+# %bb.3:                                # %nonzero
 	movq	%r8, 56(%rdi)
-	andl	$511, %ebx              # imm = 0x1FF
-	movq	%rbx, 64(%rdi)
+	movq	%r9, 48(%rdi)
+	movq	%r10, 40(%rdi)
+	movq	%r11, 32(%rdi)
+	movq	%r15, 24(%rdi)
+	movq	%r12, 16(%rdi)
+	movq	%rcx, 8(%rdi)
+	movq	%rsi, (%rdi)
+	andl	$511, %edx                      # imm = 0x1FF
+	movq	%rdx, 64(%rdi)
 	jmp	.LBB4_2
 .LBB4_1:                                # %zero
 	movq	$0, 64(%rdi)
@@ -311,459 +318,203 @@ mcl_fpDbl_mod_NIST_P521L:               # @mcl_fpDbl_mod_NIST_P521L
 	retq
 .Lfunc_end4:
 	.size	mcl_fpDbl_mod_NIST_P521L, .Lfunc_end4-mcl_fpDbl_mod_NIST_P521L
-
-	.globl	mcl_fp_mulUnitPre1L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre1L,@function
-mcl_fp_mulUnitPre1L:                    # @mcl_fp_mulUnitPre1L
-# BB#0:
+                                        # -- End function
+	.globl	mulPv192x64                     # -- Begin function mulPv192x64
+	.p2align	4, 0x90
+	.type	mulPv192x64,@function
+mulPv192x64:                            # @mulPv192x64
+# %bb.0:
+	movq	%rdx, %rcx
 	movq	%rdx, %rax
 	mulq	(%rsi)
+	movq	%rdx, %r8
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
+	movq	%rcx, %rax
+	mulq	16(%rsi)
+	movq	%rdx, %r9
+	movq	%rax, %r10
+	movq	%rcx, %rax
+	mulq	8(%rsi)
+	addq	%r8, %rax
+	movq	%rax, 8(%rdi)
+	adcq	%r10, %rdx
+	movq	%rdx, 16(%rdi)
+	adcq	$0, %r9
+	movq	%r9, 24(%rdi)
+	movq	%rdi, %rax
 	retq
 .Lfunc_end5:
-	.size	mcl_fp_mulUnitPre1L, .Lfunc_end5-mcl_fp_mulUnitPre1L
-
-	.globl	mcl_fpDbl_mulPre1L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre1L,@function
-mcl_fpDbl_mulPre1L:                     # @mcl_fpDbl_mulPre1L
-# BB#0:
-	movq	(%rdx), %rax
+	.size	mulPv192x64, .Lfunc_end5-mulPv192x64
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre3L             # -- Begin function mcl_fp_mulUnitPre3L
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre3L,@function
+mcl_fp_mulUnitPre3L:                    # @mcl_fp_mulUnitPre3L
+# %bb.0:
+	movq	%rdx, %rcx
+	movq	%rdx, %rax
+	mulq	16(%rsi)
+	movq	%rdx, %r8
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	8(%rsi)
+	movq	%rdx, %r10
+	movq	%rax, %r11
+	movq	%rcx, %rax
 	mulq	(%rsi)
 	movq	%rax, (%rdi)
+	addq	%r11, %rdx
 	movq	%rdx, 8(%rdi)
+	adcq	%r9, %r10
+	movq	%r10, 16(%rdi)
+	adcq	$0, %r8
+	movq	%r8, 24(%rdi)
 	retq
 .Lfunc_end6:
-	.size	mcl_fpDbl_mulPre1L, .Lfunc_end6-mcl_fpDbl_mulPre1L
-
-	.globl	mcl_fpDbl_sqrPre1L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre1L,@function
-mcl_fpDbl_sqrPre1L:                     # @mcl_fpDbl_sqrPre1L
-# BB#0:
-	movq	(%rsi), %rax
-	mulq	%rax
+	.size	mcl_fp_mulUnitPre3L, .Lfunc_end6-mcl_fp_mulUnitPre3L
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre3L              # -- Begin function mcl_fpDbl_mulPre3L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre3L,@function
+mcl_fpDbl_mulPre3L:                     # @mcl_fpDbl_mulPre3L
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %r11
+	movq	(%rsi), %r8
+	movq	8(%rsi), %r10
+	movq	(%rdx), %rcx
+	movq	%r8, %rax
+	mulq	%rcx
+	movq	%rdx, -8(%rsp)                  # 8-byte Spill
+	movq	16(%rsi), %r12
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	retq
-.Lfunc_end7:
-	.size	mcl_fpDbl_sqrPre1L, .Lfunc_end7-mcl_fpDbl_sqrPre1L
-
-	.globl	mcl_fp_mont1L
-	.align	16, 0x90
-	.type	mcl_fp_mont1L,@function
-mcl_fp_mont1L:                          # @mcl_fp_mont1L
-# BB#0:
-	movq	(%rsi), %rax
-	mulq	(%rdx)
-	movq	%rax, %rsi
-	movq	%rdx, %r8
-	movq	-8(%rcx), %rax
-	imulq	%rsi, %rax
-	movq	(%rcx), %rcx
+	movq	%r12, %rax
 	mulq	%rcx
-	addq	%rsi, %rax
-	adcq	%r8, %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rdx, %rsi
-	subq	%rcx, %rsi
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	%rdx, %rsi
-	movq	%rsi, (%rdi)
-	retq
-.Lfunc_end8:
-	.size	mcl_fp_mont1L, .Lfunc_end8-mcl_fp_mont1L
-
-	.globl	mcl_fp_montNF1L
-	.align	16, 0x90
-	.type	mcl_fp_montNF1L,@function
-mcl_fp_montNF1L:                        # @mcl_fp_montNF1L
-# BB#0:
-	movq	(%rsi), %rax
-	mulq	(%rdx)
-	movq	%rax, %rsi
-	movq	%rdx, %r8
-	movq	-8(%rcx), %rax
-	imulq	%rsi, %rax
-	movq	(%rcx), %rcx
+	movq	%rdx, %r9
+	movq	%rax, -16(%rsp)                 # 8-byte Spill
+	movq	%r10, %rax
 	mulq	%rcx
-	addq	%rsi, %rax
-	adcq	%r8, %rdx
-	movq	%rdx, %rax
-	subq	%rcx, %rax
-	cmovsq	%rdx, %rax
-	movq	%rax, (%rdi)
-	retq
-.Lfunc_end9:
-	.size	mcl_fp_montNF1L, .Lfunc_end9-mcl_fp_montNF1L
-
-	.globl	mcl_fp_montRed1L
-	.align	16, 0x90
-	.type	mcl_fp_montRed1L,@function
-mcl_fp_montRed1L:                       # @mcl_fp_montRed1L
-# BB#0:
-	movq	(%rsi), %rcx
-	movq	-8(%rdx), %rax
-	imulq	%rcx, %rax
-	movq	(%rdx), %r8
+	movq	%rax, %rbx
+	movq	%rdx, %rcx
+	movq	8(%r11), %rsi
+	movq	%rsi, %rax
+	mulq	%r12
+	movq	%rdx, %r13
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	%r10
+	movq	%rdx, %r14
+	movq	%rax, %r15
+	movq	%rsi, %rax
+	mulq	%r8
+	addq	%r15, %rdx
+	adcq	%rbp, %r14
+	adcq	$0, %r13
+	addq	-8(%rsp), %rbx                  # 8-byte Folded Reload
+	adcq	-16(%rsp), %rcx                 # 8-byte Folded Reload
+	adcq	$0, %r9
+	addq	%rax, %rbx
+	movq	%rbx, 8(%rdi)
+	adcq	%rdx, %rcx
+	adcq	%r14, %r9
+	adcq	$0, %r13
+	movq	16(%r11), %rsi
+	movq	%rsi, %rax
+	mulq	%r12
+	movq	%rdx, %rbp
+	movq	%rax, %r11
+	movq	%rsi, %rax
+	mulq	%r10
+	movq	%rdx, %rbx
+	movq	%rax, %r10
+	movq	%rsi, %rax
 	mulq	%r8
+	addq	%r10, %rdx
+	adcq	%r11, %rbx
+	adcq	$0, %rbp
 	addq	%rcx, %rax
-	adcq	8(%rsi), %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rdx, %rcx
-	subq	%r8, %rcx
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	%rdx, %rcx
-	movq	%rcx, (%rdi)
-	retq
-.Lfunc_end10:
-	.size	mcl_fp_montRed1L, .Lfunc_end10-mcl_fp_montRed1L
-
-	.globl	mcl_fp_addPre1L
-	.align	16, 0x90
-	.type	mcl_fp_addPre1L,@function
-mcl_fp_addPre1L:                        # @mcl_fp_addPre1L
-# BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, (%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end11:
-	.size	mcl_fp_addPre1L, .Lfunc_end11-mcl_fp_addPre1L
-
-	.globl	mcl_fp_subPre1L
-	.align	16, 0x90
-	.type	mcl_fp_subPre1L,@function
-mcl_fp_subPre1L:                        # @mcl_fp_subPre1L
-# BB#0:
-	movq	(%rsi), %rcx
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	movq	%rcx, (%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end12:
-	.size	mcl_fp_subPre1L, .Lfunc_end12-mcl_fp_subPre1L
-
-	.globl	mcl_fp_shr1_1L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_1L,@function
-mcl_fp_shr1_1L:                         # @mcl_fp_shr1_1L
-# BB#0:
-	movq	(%rsi), %rax
-	shrq	%rax
-	movq	%rax, (%rdi)
-	retq
-.Lfunc_end13:
-	.size	mcl_fp_shr1_1L, .Lfunc_end13-mcl_fp_shr1_1L
-
-	.globl	mcl_fp_add1L
-	.align	16, 0x90
-	.type	mcl_fp_add1L,@function
-mcl_fp_add1L:                           # @mcl_fp_add1L
-# BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, (%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %rax
-	sbbq	$0, %rdx
-	testb	$1, %dl
-	jne	.LBB14_2
-# BB#1:                                 # %nocarry
-	movq	%rax, (%rdi)
-.LBB14_2:                               # %carry
-	retq
-.Lfunc_end14:
-	.size	mcl_fp_add1L, .Lfunc_end14-mcl_fp_add1L
-
-	.globl	mcl_fp_addNF1L
-	.align	16, 0x90
-	.type	mcl_fp_addNF1L,@function
-mcl_fp_addNF1L:                         # @mcl_fp_addNF1L
-# BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, %rdx
-	subq	(%rcx), %rdx
-	cmovsq	%rax, %rdx
-	movq	%rdx, (%rdi)
-	retq
-.Lfunc_end15:
-	.size	mcl_fp_addNF1L, .Lfunc_end15-mcl_fp_addNF1L
-
-	.globl	mcl_fp_sub1L
-	.align	16, 0x90
-	.type	mcl_fp_sub1L,@function
-mcl_fp_sub1L:                           # @mcl_fp_sub1L
-# BB#0:
-	movq	(%rsi), %rax
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	movq	%rax, (%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB16_2
-# BB#1:                                 # %nocarry
-	retq
-.LBB16_2:                               # %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	retq
-.Lfunc_end16:
-	.size	mcl_fp_sub1L, .Lfunc_end16-mcl_fp_sub1L
-
-	.globl	mcl_fp_subNF1L
-	.align	16, 0x90
-	.type	mcl_fp_subNF1L,@function
-mcl_fp_subNF1L:                         # @mcl_fp_subNF1L
-# BB#0:
-	movq	(%rsi), %rax
-	subq	(%rdx), %rax
-	movq	%rax, %rdx
-	sarq	$63, %rdx
-	andq	(%rcx), %rdx
-	addq	%rax, %rdx
-	movq	%rdx, (%rdi)
-	retq
-.Lfunc_end17:
-	.size	mcl_fp_subNF1L, .Lfunc_end17-mcl_fp_subNF1L
-
-	.globl	mcl_fpDbl_add1L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add1L,@function
-mcl_fpDbl_add1L:                        # @mcl_fpDbl_add1L
-# BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	movq	%rax, (%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rdx, %rsi
-	subq	(%rcx), %rsi
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	%rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	retq
-.Lfunc_end18:
-	.size	mcl_fpDbl_add1L, .Lfunc_end18-mcl_fpDbl_add1L
-
-	.globl	mcl_fpDbl_sub1L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub1L,@function
-mcl_fpDbl_sub1L:                        # @mcl_fpDbl_sub1L
-# BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r8
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r8
-	movq	%rax, (%rdi)
-	movl	$0, %eax
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	(%rcx), %rsi
-	addq	%r8, %rsi
-	movq	%rsi, 8(%rdi)
-	retq
-.Lfunc_end19:
-	.size	mcl_fpDbl_sub1L, .Lfunc_end19-mcl_fpDbl_sub1L
-
-	.globl	mcl_fp_mulUnitPre2L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre2L,@function
-mcl_fp_mulUnitPre2L:                    # @mcl_fp_mulUnitPre2L
-# BB#0:
-	movq	%rdx, %r8
-	movq	%r8, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %rcx
-	movq	%rax, %r9
-	movq	%r8, %rax
-	mulq	(%rsi)
-	movq	%rax, (%rdi)
-	addq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	$0, %rcx
-	movq	%rcx, 16(%rdi)
-	retq
-.Lfunc_end20:
-	.size	mcl_fp_mulUnitPre2L, .Lfunc_end20-mcl_fp_mulUnitPre2L
-
-	.globl	mcl_fpDbl_mulPre2L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre2L,@function
-mcl_fpDbl_mulPre2L:                     # @mcl_fpDbl_mulPre2L
-# BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	%rdx, %r10
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r11
-	movq	(%r10), %rcx
-	movq	%r8, %rax
-	mulq	%rcx
-	movq	%rdx, %r9
-	movq	%rax, (%rdi)
-	movq	%r11, %rax
-	mulq	%rcx
-	movq	%rdx, %r14
-	movq	%rax, %rsi
-	addq	%r9, %rsi
-	adcq	$0, %r14
-	movq	8(%r10), %rbx
-	movq	%r11, %rax
-	mulq	%rbx
-	movq	%rdx, %r9
-	movq	%rax, %rcx
-	movq	%r8, %rax
-	mulq	%rbx
-	addq	%rsi, %rax
-	movq	%rax, 8(%rdi)
-	adcq	%r14, %rcx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rdx, %rcx
-	movq	%rcx, 16(%rdi)
-	adcq	%r9, %rax
-	movq	%rax, 24(%rdi)
+	movq	%rax, 16(%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 24(%rdi)
+	adcq	%r13, %rbx
+	movq	%rbx, 32(%rdi)
+	adcq	$0, %rbp
+	movq	%rbp, 40(%rdi)
 	popq	%rbx
+	popq	%r12
+	popq	%r13
 	popq	%r14
+	popq	%r15
+	popq	%rbp
 	retq
-.Lfunc_end21:
-	.size	mcl_fpDbl_mulPre2L, .Lfunc_end21-mcl_fpDbl_mulPre2L
-
-	.globl	mcl_fpDbl_sqrPre2L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre2L,@function
-mcl_fpDbl_sqrPre2L:                     # @mcl_fpDbl_sqrPre2L
-# BB#0:
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %r8
-	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rdx, %rsi
-	movq	%rax, (%rdi)
-	movq	%r8, %rax
-	mulq	%rcx
-	movq	%rdx, %r9
-	movq	%rax, %r10
-	addq	%r10, %rsi
-	movq	%r9, %rcx
-	adcq	$0, %rcx
-	movq	%r8, %rax
-	mulq	%r8
-	addq	%r10, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%rcx, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r9, %rax
-	movq	%rax, 16(%rdi)
-	adcq	%rdx, %rcx
-	movq	%rcx, 24(%rdi)
-	retq
-.Lfunc_end22:
-	.size	mcl_fpDbl_sqrPre2L, .Lfunc_end22-mcl_fpDbl_sqrPre2L
-
-	.globl	mcl_fp_mont2L
-	.align	16, 0x90
-	.type	mcl_fp_mont2L,@function
-mcl_fp_mont2L:                          # @mcl_fp_mont2L
-# BB#0:
+.Lfunc_end7:
+	.size	mcl_fpDbl_mulPre3L, .Lfunc_end7-mcl_fpDbl_mulPre3L
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre3L              # -- Begin function mcl_fpDbl_sqrPre3L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre3L,@function
+mcl_fpDbl_sqrPre3L:                     # @mcl_fpDbl_sqrPre3L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r11
-	movq	(%rdx), %rsi
-	movq	8(%rdx), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rsi
 	movq	%r11, %rax
+	mulq	%r11
+	movq	%rdx, %rcx
+	movq	%rax, (%rdi)
+	movq	%r10, %rax
 	mulq	%rsi
-	movq	%rdx, %r15
-	movq	%rax, %r10
-	movq	%r8, %rax
-	mulq	%rsi
-	movq	%rax, %r14
-	movq	%rdx, %r13
-	addq	%r10, %r13
-	adcq	$0, %r15
-	movq	-8(%rcx), %r10
-	movq	(%rcx), %rbp
-	movq	%r14, %rsi
-	imulq	%r10, %rsi
-	movq	8(%rcx), %rdi
+	movq	%rdx, %r8
+	movq	%rax, %r9
 	movq	%rsi, %rax
-	mulq	%rdi
-	movq	%rdx, %rcx
+	mulq	%rsi
+	movq	%rdx, %r14
 	movq	%rax, %r12
 	movq	%rsi, %rax
-	mulq	%rbp
-	movq	%rdx, %rbx
-	addq	%r12, %rbx
-	adcq	$0, %rcx
-	addq	%r14, %rax
-	adcq	%r13, %rbx
-	adcq	%r15, %rcx
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	movq	%r9, %rax
 	mulq	%r11
-	movq	%rdx, %r14
-	movq	%rax, %r11
-	movq	%r9, %rax
-	mulq	%r8
-	movq	%rax, %r8
+	movq	%rax, %r15
 	movq	%rdx, %rsi
-	addq	%r11, %rsi
-	adcq	$0, %r14
-	addq	%rbx, %r8
-	adcq	%rcx, %rsi
-	adcq	%r15, %r14
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	imulq	%r8, %r10
+	addq	%rdx, %r12
+	adcq	%r9, %r14
+	movq	%r8, %r13
+	adcq	$0, %r13
 	movq	%r10, %rax
-	mulq	%rdi
-	movq	%rdx, %rcx
-	movq	%rax, %r9
+	mulq	%r11
+	movq	%rax, %r11
+	movq	%rdx, %rbx
+	addq	%r15, %rcx
+	adcq	%rax, %rsi
+	movq	%rdx, %rbp
+	adcq	$0, %rbp
+	addq	%r15, %rcx
+	movq	%rcx, 8(%rdi)
+	adcq	%r12, %rsi
+	adcq	%r14, %rbp
+	adcq	$0, %r13
 	movq	%r10, %rax
-	mulq	%rbp
-	addq	%r9, %rdx
-	adcq	$0, %rcx
-	addq	%r8, %rax
-	adcq	%rsi, %rdx
-	adcq	%r14, %rcx
-	adcq	$0, %rbx
-	movq	%rdx, %rax
-	subq	%rbp, %rax
-	movq	%rcx, %rsi
-	sbbq	%rdi, %rsi
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%rcx, %rsi
-	testb	%bl, %bl
-	cmovneq	%rdx, %rax
-	movq	-8(%rsp), %rcx          # 8-byte Reload
-	movq	%rax, (%rcx)
-	movq	%rsi, 8(%rcx)
+	mulq	%r10
+	addq	%r9, %rbx
+	adcq	%r8, %rax
+	adcq	$0, %rdx
+	addq	%r11, %rsi
+	movq	%rsi, 16(%rdi)
+	adcq	%rbp, %rbx
+	movq	%rbx, 24(%rdi)
+	adcq	%r13, %rax
+	movq	%rax, 32(%rdi)
+	adcq	$0, %rdx
+	movq	%rdx, 40(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -771,87 +522,179 @@ mcl_fp_mont2L:                          # @mcl_fp_mont2L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end23:
-	.size	mcl_fp_mont2L, .Lfunc_end23-mcl_fp_mont2L
-
-	.globl	mcl_fp_montNF2L
-	.align	16, 0x90
-	.type	mcl_fp_montNF2L,@function
-mcl_fp_montNF2L:                        # @mcl_fp_montNF2L
-# BB#0:
+.Lfunc_end8:
+	.size	mcl_fpDbl_sqrPre3L, .Lfunc_end8-mcl_fpDbl_sqrPre3L
+                                        # -- End function
+	.globl	mcl_fp_mont3L                   # -- Begin function mcl_fp_mont3L
+	.p2align	4, 0x90
+	.type	mcl_fp_mont3L,@function
+mcl_fp_mont3L:                          # @mcl_fp_mont3L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r11
-	movq	(%rdx), %rbp
-	movq	8(%rdx), %r9
-	movq	%r8, %rax
-	mulq	%rbp
-	movq	%rax, %rsi
-	movq	%rdx, %r14
-	movq	-8(%rcx), %r10
-	movq	(%rcx), %r15
-	movq	%rsi, %rbx
-	imulq	%r10, %rbx
-	movq	8(%rcx), %rdi
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	16(%rsi), %r10
+	movq	(%rdx), %rdi
+	movq	%rdx, %r11
+	movq	%rdx, -64(%rsp)                 # 8-byte Spill
+	movq	%r10, %rax
+	mulq	%rdi
+	movq	%rax, %rbp
+	movq	%rdx, %r15
+	movq	(%rsi), %rbx
+	movq	%rbx, -16(%rsp)                 # 8-byte Spill
+	movq	8(%rsi), %r14
+	movq	%r14, %rax
+	movq	%r14, -72(%rsp)                 # 8-byte Spill
+	mulq	%rdi
+	movq	%rdx, %r8
+	movq	%rax, %rsi
 	movq	%rbx, %rax
 	mulq	%rdi
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
+	movq	%rax, %r12
+	movq	%rdx, %rdi
+	addq	%rsi, %rdi
+	adcq	%rbp, %r8
+	adcq	$0, %r15
+	movq	-8(%rcx), %rbp
+	movq	%rbp, -32(%rsp)                 # 8-byte Spill
+	imulq	%rax, %rbp
+	movq	16(%rcx), %rdx
+	movq	%rdx, -56(%rsp)                 # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	%rdx
 	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	%r15
-	movq	%rdx, %r12
-	movq	%rax, %rbx
-	movq	%r11, %rax
-	mulq	%rbp
-	movq	%rdx, %rcx
-	movq	%rax, %rbp
-	addq	%r14, %rbp
-	adcq	$0, %rcx
-	addq	%rsi, %rbx
-	adcq	%r13, %rbp
-	adcq	$0, %rcx
-	addq	%r12, %rbp
-	adcq	-16(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%r9, %rax
-	mulq	%r11
+	movq	%rdx, %r9
+	movq	(%rcx), %rbx
+	movq	%rbx, -48(%rsp)                 # 8-byte Spill
+	movq	8(%rcx), %rcx
+	movq	%rcx, -40(%rsp)                 # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	%rcx
 	movq	%rdx, %rsi
-	movq	%rax, %r11
-	movq	%r9, %rax
-	mulq	%r8
+	movq	%rax, %rcx
+	movq	%rbp, %rax
+	mulq	%rbx
+	movq	%rdx, %rbp
+	addq	%rcx, %rbp
+	adcq	%r13, %rsi
+	adcq	$0, %r9
+	addq	%r12, %rax
+	adcq	%rdi, %rbp
+	movq	8(%r11), %rcx
+	adcq	%r8, %rsi
+	adcq	%r15, %r9
+	setb	%r11b
+	movq	%rcx, %rax
+	mulq	%r10
+	movq	%rdx, %r15
+	movq	%rax, %rdi
+	movq	%rcx, %rax
+	mulq	%r14
+	movq	%rdx, %r13
 	movq	%rax, %r8
+	movq	%rcx, %rax
+	movq	-16(%rsp), %rcx                 # 8-byte Reload
+	mulq	%rcx
+	movq	%rax, %r12
 	movq	%rdx, %rbx
-	addq	%r11, %rbx
-	adcq	$0, %rsi
-	addq	%rbp, %r8
-	adcq	%rcx, %rbx
-	adcq	$0, %rsi
-	imulq	%r8, %r10
-	movq	%r10, %rax
-	mulq	%rdi
-	movq	%rdx, %rcx
+	addq	%r8, %rbx
+	adcq	%rdi, %r13
+	adcq	$0, %r15
+	addq	%rbp, %r12
+	adcq	%rsi, %rbx
+	movzbl	%r11b, %eax
+	adcq	%r9, %r13
+	adcq	%rax, %r15
+	setb	-73(%rsp)                       # 1-byte Folded Spill
+	movq	-32(%rsp), %rsi                 # 8-byte Reload
+	imulq	%r12, %rsi
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -24(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, %r11
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
 	movq	%rax, %rbp
-	movq	%r10, %rax
-	mulq	%r15
-	addq	%r8, %rax
+	movq	%rdx, %rsi
+	movq	-64(%rsp), %rax                 # 8-byte Reload
+	movq	16(%rax), %r9
+	movq	%r9, %rax
+	mulq	%r10
+	movq	%rdx, %r8
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	%r9, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	movq	%rdx, %r10
+	addq	%rdi, %rbp
+	adcq	-24(%rsp), %rsi                 # 8-byte Folded Reload
+	adcq	$0, %r14
+	addq	%r12, %r11
 	adcq	%rbx, %rbp
-	adcq	$0, %rsi
-	addq	%rdx, %rbp
-	adcq	%rcx, %rsi
-	movq	%rbp, %rax
-	subq	%r15, %rax
-	movq	%rsi, %rcx
-	sbbq	%rdi, %rcx
-	cmovsq	%rbp, %rax
-	movq	-8(%rsp), %rdx          # 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovsq	%rsi, %rcx
-	movq	%rcx, 8(%rdx)
+	adcq	%r13, %rsi
+	adcq	%r15, %r14
+	movzbl	-73(%rsp), %edi                 # 1-byte Folded Reload
+	adcq	$0, %rdi
+	movq	%r9, %rax
+	mulq	%rcx
+	movq	%rax, %r9
+	movq	%rdx, %rcx
+	addq	-72(%rsp), %rcx                 # 8-byte Folded Reload
+	adcq	-64(%rsp), %r10                 # 8-byte Folded Reload
+	adcq	$0, %r8
+	addq	%rbp, %r9
+	adcq	%rsi, %rcx
+	adcq	%r14, %r10
+	adcq	%rdi, %r8
+	setb	%r11b
+	movq	-32(%rsp), %rsi                 # 8-byte Reload
+	imulq	%r9, %rsi
+	movq	%rsi, %rax
+	movq	-56(%rsp), %r14                 # 8-byte Reload
+	mulq	%r14
+	movq	%rdx, %rbx
+	movq	%rax, %r12
+	movq	%rsi, %rax
+	movq	-40(%rsp), %r15                 # 8-byte Reload
+	mulq	%r15
+	movq	%rdx, %rbp
+	movq	%rax, %rdi
+	movq	%rsi, %rax
+	movq	-48(%rsp), %rsi                 # 8-byte Reload
+	mulq	%rsi
+	addq	%rdi, %rdx
+	adcq	%r12, %rbp
+	adcq	$0, %rbx
+	addq	%r9, %rax
+	adcq	%rcx, %rdx
+	adcq	%r10, %rbp
+	movzbl	%r11b, %eax
+	adcq	%r8, %rbx
+	adcq	$0, %rax
+	movq	%rdx, %rdi
+	subq	%rsi, %rdi
+	movq	%rbp, %rsi
+	sbbq	%r15, %rsi
+	movq	%rbx, %rcx
+	sbbq	%r14, %rcx
+	sbbq	$0, %rax
+	testb	$1, %al
+	cmovneq	%rbx, %rcx
+	movq	-8(%rsp), %rax                  # 8-byte Reload
+	movq	%rcx, 16(%rax)
+	cmovneq	%rbp, %rsi
+	movq	%rsi, 8(%rax)
+	cmovneq	%rdx, %rdi
+	movq	%rdi, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -859,383 +702,861 @@ mcl_fp_montNF2L:                        # @mcl_fp_montNF2L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end24:
-	.size	mcl_fp_montNF2L, .Lfunc_end24-mcl_fp_montNF2L
-
-	.globl	mcl_fp_montRed2L
-	.align	16, 0x90
-	.type	mcl_fp_montRed2L,@function
-mcl_fp_montRed2L:                       # @mcl_fp_montRed2L
-# BB#0:
+.Lfunc_end9:
+	.size	mcl_fp_mont3L, .Lfunc_end9-mcl_fp_mont3L
+                                        # -- End function
+	.globl	mcl_fp_montNF3L                 # -- Begin function mcl_fp_montNF3L
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF3L,@function
+mcl_fp_montNF3L:                        # @mcl_fp_montNF3L
+# %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
+	pushq	%r13
+	pushq	%r12
 	pushq	%rbx
-	movq	-8(%rdx), %r9
-	movq	(%rdx), %r11
-	movq	(%rsi), %rbx
-	movq	%rbx, %rcx
-	imulq	%r9, %rcx
-	movq	8(%rdx), %r14
-	movq	%rcx, %rax
-	mulq	%r14
-	movq	%rdx, %r8
+	movq	%rcx, %r8
+	movq	%rdx, %r15
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	16(%rsi), %rax
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	movq	(%rdx), %rdi
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	mulq	%rdi
+	movq	%rax, %rcx
+	movq	%rdx, %r13
+	movq	(%rsi), %r12
+	movq	8(%rsi), %rax
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	mulq	%rdi
+	movq	%rdx, %rbx
+	movq	%rax, %rsi
+	movq	%r12, %rax
+	movq	%r12, -24(%rsp)                 # 8-byte Spill
+	mulq	%rdi
 	movq	%rax, %r10
+	movq	%rdx, %rdi
+	addq	%rsi, %rdi
+	adcq	%rcx, %rbx
+	adcq	$0, %r13
+	movq	-8(%r8), %r11
+	movq	%r11, %rbp
+	imulq	%rax, %rbp
+	movq	16(%r8), %rax
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	mulq	%rbp
+	movq	%rax, %r9
+	movq	%rdx, %r14
+	movq	(%r8), %rcx
+	movq	%rcx, -40(%rsp)                 # 8-byte Spill
+	movq	8(%r8), %rax
+	movq	%rax, -32(%rsp)                 # 8-byte Spill
+	mulq	%rbp
+	movq	%rdx, %r8
+	movq	%rax, %rsi
 	movq	%rcx, %rax
-	mulq	%r11
-	movq	%rdx, %rcx
-	addq	%r10, %rcx
-	adcq	$0, %r8
-	movq	24(%rsi), %r15
-	addq	%rbx, %rax
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r8
-	adcq	$0, %r15
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	imulq	%rcx, %r9
-	movq	%r9, %rax
+	mulq	%rbp
+	addq	%r10, %rax
+	adcq	%rdi, %rsi
+	adcq	%rbx, %r9
+	adcq	$0, %r13
+	addq	%rdx, %rsi
+	movq	8(%r15), %rdi
+	adcq	%r8, %r9
+	adcq	%r14, %r13
+	movq	-48(%rsp), %r14                 # 8-byte Reload
+	movq	%r14, %rax
+	mulq	%rdi
+	movq	%rdx, %rbx
+	movq	%rax, %r8
+	movq	-64(%rsp), %rax                 # 8-byte Reload
+	mulq	%rdi
+	movq	%rdx, %rbp
+	movq	%rax, %rcx
+	movq	%r12, %rax
+	mulq	%rdi
+	movq	%rax, %rdi
+	movq	%rdx, %r10
+	addq	%rcx, %r10
+	adcq	%r8, %rbp
+	adcq	$0, %rbx
+	addq	%rsi, %rdi
+	adcq	%r9, %r10
+	adcq	%r13, %rbp
+	adcq	$0, %rbx
+	movq	%r11, %rsi
+	imulq	%rdi, %rsi
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r13
+	movq	%rsi, %rax
+	movq	-32(%rsp), %r15                 # 8-byte Reload
+	mulq	%r15
+	movq	%rdx, %r9
+	movq	%rax, %rcx
+	movq	%rsi, %rax
+	movq	-40(%rsp), %r12                 # 8-byte Reload
+	mulq	%r12
+	addq	%rdi, %rax
+	adcq	%r10, %rcx
+	adcq	%rbp, %r13
+	adcq	$0, %rbx
+	addq	%rdx, %rcx
+	adcq	%r9, %r13
+	adcq	%r8, %rbx
+	movq	-16(%rsp), %rax                 # 8-byte Reload
+	movq	16(%rax), %rdi
+	movq	%rdi, %rax
 	mulq	%r14
 	movq	%rdx, %rsi
+	movq	%rax, %r8
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r9
+	movq	%rdi, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
 	movq	%rax, %r10
-	movq	%r9, %rax
-	mulq	%r11
-	addq	%r10, %rdx
+	movq	%rdx, %rdi
+	addq	%r9, %rdi
+	adcq	%r8, %rbp
 	adcq	$0, %rsi
-	addq	%rcx, %rax
-	adcq	%r8, %rdx
-	adcq	%r15, %rsi
-	adcq	$0, %rbx
-	movq	%rdx, %rax
-	subq	%r11, %rax
-	movq	%rsi, %rcx
-	sbbq	%r14, %rcx
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%rsi, %rcx
-	testb	%bl, %bl
-	cmovneq	%rdx, %rax
-	movq	%rax, (%rdi)
-	movq	%rcx, 8(%rdi)
+	addq	%rcx, %r10
+	adcq	%r13, %rdi
+	adcq	%rbx, %rbp
+	adcq	$0, %rsi
+	imulq	%r10, %r11
+	movq	-56(%rsp), %r14                 # 8-byte Reload
+	movq	%r14, %rax
+	mulq	%r11
+	movq	%rdx, %r8
+	movq	%rax, %rcx
+	movq	%r15, %rax
+	mulq	%r11
+	movq	%rdx, %r9
+	movq	%rax, %rbx
+	movq	%r12, %rax
+	mulq	%r11
+	addq	%r10, %rax
+	adcq	%rdi, %rbx
+	adcq	%rbp, %rcx
+	adcq	$0, %rsi
+	addq	%rdx, %rbx
+	adcq	%r9, %rcx
+	adcq	%r8, %rsi
+	movq	%rbx, %rax
+	subq	%r12, %rax
+	movq	%rcx, %rdx
+	sbbq	%r15, %rdx
+	movq	%rsi, %rbp
+	sbbq	%r14, %rbp
+	movq	%rbp, %rdi
+	sarq	$63, %rdi
+	cmovsq	%rsi, %rbp
+	movq	-8(%rsp), %rsi                  # 8-byte Reload
+	movq	%rbp, 16(%rsi)
+	cmovsq	%rcx, %rdx
+	movq	%rdx, 8(%rsi)
+	cmovsq	%rbx, %rax
+	movq	%rax, (%rsi)
 	popq	%rbx
+	popq	%r12
+	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
 	retq
-.Lfunc_end25:
-	.size	mcl_fp_montRed2L, .Lfunc_end25-mcl_fp_montRed2L
-
-	.globl	mcl_fp_addPre2L
-	.align	16, 0x90
-	.type	mcl_fp_addPre2L,@function
-mcl_fp_addPre2L:                        # @mcl_fp_addPre2L
-# BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rcx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rcx
-	movq	%rax, (%rdi)
-	movq	%rcx, 8(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+.Lfunc_end10:
+	.size	mcl_fp_montNF3L, .Lfunc_end10-mcl_fp_montNF3L
+                                        # -- End function
+	.globl	mcl_fp_montRed3L                # -- Begin function mcl_fp_montRed3L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed3L,@function
+mcl_fp_montRed3L:                       # @mcl_fp_montRed3L
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	-8(%rdx), %r9
+	movq	(%rdx), %rdi
+	movq	(%rsi), %r14
+	movq	%r14, %rbx
+	imulq	%r9, %rbx
+	movq	16(%rdx), %rbp
+	movq	%rbx, %rax
+	mulq	%rbp
+	movq	%rbp, -16(%rsp)                 # 8-byte Spill
+	movq	%rax, %r11
+	movq	%rdx, %r8
+	movq	8(%rcx), %rcx
+	movq	%rcx, -32(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%rcx
+	movq	%rdx, %r10
+	movq	%rax, %rcx
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rdi, -24(%rsp)                 # 8-byte Spill
+	movq	%rdx, %rbx
+	addq	%rcx, %rbx
+	adcq	%r11, %r10
+	adcq	$0, %r8
+	addq	%r14, %rax
+	adcq	8(%rsi), %rbx
+	adcq	16(%rsi), %r10
+	adcq	24(%rsi), %r8
+	setb	-33(%rsp)                       # 1-byte Folded Spill
+	movq	%r9, %rcx
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	mulq	%rbp
+	movq	%rdx, %r14
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	%rdi
+	movq	%rdx, %r12
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	movq	-32(%rsp), %rbp                 # 8-byte Reload
+	mulq	%rbp
+	movq	%rdx, %r11
+	movq	%rax, %rcx
+	addq	%r12, %rcx
+	adcq	%r15, %r11
+	movzbl	-33(%rsp), %r15d                # 1-byte Folded Reload
+	adcq	%r14, %r15
+	addq	%rbx, %r13
+	adcq	%r10, %rcx
+	adcq	%r8, %r11
+	adcq	32(%rsi), %r15
+	setb	%dil
+	imulq	%rcx, %r9
+	movq	%r9, %rax
+	movq	-16(%rsp), %r13                 # 8-byte Reload
+	mulq	%r13
+	movq	%rdx, %r12
+	movq	%rax, %r8
+	movq	%r9, %rax
+	movq	-24(%rsp), %rbx                 # 8-byte Reload
+	mulq	%rbx
+	movq	%rdx, %r10
+	movq	%rax, %r14
+	movq	%r9, %rax
+	mulq	%rbp
+	addq	%r10, %rax
+	adcq	%r8, %rdx
+	movzbl	%dil, %edi
+	adcq	%rdi, %r12
+	addq	%rcx, %r14
+	adcq	%r11, %rax
+	adcq	%r15, %rdx
+	adcq	40(%rsi), %r12
+	xorl	%ecx, %ecx
+	movq	%rax, %rsi
+	subq	%rbx, %rsi
+	movq	%rdx, %rdi
+	sbbq	%rbp, %rdi
+	movq	%r12, %rbx
+	sbbq	%r13, %rbx
+	sbbq	%rcx, %rcx
+	testb	$1, %cl
+	cmovneq	%r12, %rbx
+	movq	-8(%rsp), %rcx                  # 8-byte Reload
+	movq	%rbx, 16(%rcx)
+	cmovneq	%rdx, %rdi
+	movq	%rdi, 8(%rcx)
+	cmovneq	%rax, %rsi
+	movq	%rsi, (%rcx)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
 	retq
-.Lfunc_end26:
-	.size	mcl_fp_addPre2L, .Lfunc_end26-mcl_fp_addPre2L
-
-	.globl	mcl_fp_subPre2L
-	.align	16, 0x90
-	.type	mcl_fp_subPre2L,@function
-mcl_fp_subPre2L:                        # @mcl_fp_subPre2L
-# BB#0:
+.Lfunc_end11:
+	.size	mcl_fp_montRed3L, .Lfunc_end11-mcl_fp_montRed3L
+                                        # -- End function
+	.globl	mcl_fp_montRedNF3L              # -- Begin function mcl_fp_montRedNF3L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF3L,@function
+mcl_fp_montRedNF3L:                     # @mcl_fp_montRedNF3L
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	-8(%rdx), %r9
+	movq	(%rdx), %rbp
+	movq	(%rsi), %r14
+	movq	%r14, %rbx
+	imulq	%r9, %rbx
+	movq	16(%rdx), %rdi
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rdi, %r15
+	movq	%rdi, -16(%rsp)                 # 8-byte Spill
+	movq	%rax, %r11
+	movq	%rdx, %r8
+	movq	8(%rcx), %rdi
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rdx, %r10
+	movq	%rax, %rcx
+	movq	%rbx, %rax
+	mulq	%rbp
+	movq	%rbp, -24(%rsp)                 # 8-byte Spill
+	movq	%rdx, %rbx
+	addq	%rcx, %rbx
+	adcq	%r11, %r10
+	adcq	$0, %r8
+	addq	%r14, %rax
+	adcq	8(%rsi), %rbx
+	adcq	16(%rsi), %r10
+	adcq	24(%rsi), %r8
+	setb	-25(%rsp)                       # 1-byte Folded Spill
+	movq	%r9, %rcx
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	mulq	%r15
+	movq	%rdx, %r14
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	%rbp
+	movq	%rdx, %r12
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	%rdi
+	movq	%rdx, %r11
+	movq	%rax, %rcx
+	addq	%r12, %rcx
+	adcq	%r15, %r11
+	movzbl	-25(%rsp), %r15d                # 1-byte Folded Reload
+	adcq	%r14, %r15
+	addq	%rbx, %r13
+	adcq	%r10, %rcx
+	adcq	%r8, %r11
+	adcq	32(%rsi), %r15
+	setb	%bpl
+	imulq	%rcx, %r9
+	movq	%r9, %rax
+	movq	-16(%rsp), %r13                 # 8-byte Reload
+	mulq	%r13
+	movq	%rdx, %r12
+	movq	%rax, %r8
+	movq	%r9, %rax
+	movq	-24(%rsp), %rbx                 # 8-byte Reload
+	mulq	%rbx
+	movq	%rdx, %r10
+	movq	%rax, %r14
+	movq	%r9, %rax
+	mulq	%rdi
+	addq	%r10, %rax
+	adcq	%r8, %rdx
+	movzbl	%bpl, %ebp
+	adcq	%rbp, %r12
+	addq	%rcx, %r14
+	adcq	%r11, %rax
+	adcq	%r15, %rdx
+	adcq	40(%rsi), %r12
+	movq	%rax, %rcx
+	subq	%rbx, %rcx
+	movq	%rdx, %rsi
+	sbbq	%rdi, %rsi
+	movq	%r12, %rbx
+	sbbq	%r13, %rbx
+	movq	%rbx, %rdi
+	sarq	$63, %rdi
+	cmovsq	%r12, %rbx
+	movq	-8(%rsp), %rdi                  # 8-byte Reload
+	movq	%rbx, 16(%rdi)
+	cmovsq	%rdx, %rsi
+	movq	%rsi, 8(%rdi)
+	cmovsq	%rax, %rcx
+	movq	%rcx, (%rdi)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+.Lfunc_end12:
+	.size	mcl_fp_montRedNF3L, .Lfunc_end12-mcl_fp_montRedNF3L
+                                        # -- End function
+	.globl	mcl_fp_addPre3L                 # -- Begin function mcl_fp_addPre3L
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre3L,@function
+mcl_fp_addPre3L:                        # @mcl_fp_addPre3L
+# %bb.0:
+	movq	16(%rsi), %rax
 	movq	(%rsi), %rcx
 	movq	8(%rsi), %rsi
+	addq	(%rdx), %rcx
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rcx, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
+	retq
+.Lfunc_end13:
+	.size	mcl_fp_addPre3L, .Lfunc_end13-mcl_fp_addPre3L
+                                        # -- End function
+	.globl	mcl_fp_subPre3L                 # -- Begin function mcl_fp_subPre3L
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre3L,@function
+mcl_fp_subPre3L:                        # @mcl_fp_subPre3L
+# %bb.0:
+	movq	16(%rsi), %rcx
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
 	xorl	%eax, %eax
-	subq	(%rdx), %rcx
+	subq	(%rdx), %r8
 	sbbq	8(%rdx), %rsi
-	movq	%rcx, (%rdi)
+	sbbq	16(%rdx), %rcx
+	movq	%rcx, 16(%rdi)
 	movq	%rsi, 8(%rdi)
-	sbbq	$0, %rax
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
 	retq
-.Lfunc_end27:
-	.size	mcl_fp_subPre2L, .Lfunc_end27-mcl_fp_subPre2L
-
-	.globl	mcl_fp_shr1_2L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_2L,@function
-mcl_fp_shr1_2L:                         # @mcl_fp_shr1_2L
-# BB#0:
+.Lfunc_end14:
+	.size	mcl_fp_subPre3L, .Lfunc_end14-mcl_fp_subPre3L
+                                        # -- End function
+	.globl	mcl_fp_shr1_3L                  # -- Begin function mcl_fp_shr1_3L
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_3L,@function
+mcl_fp_shr1_3L:                         # @mcl_fp_shr1_3L
+# %bb.0:
 	movq	(%rsi), %rax
 	movq	8(%rsi), %rcx
+	movq	16(%rsi), %rdx
+	movq	%rdx, %rsi
+	shrq	%rsi
+	movq	%rsi, 16(%rdi)
+	shldq	$63, %rcx, %rdx
+	movq	%rdx, 8(%rdi)
 	shrdq	$1, %rcx, %rax
 	movq	%rax, (%rdi)
-	shrq	%rcx
-	movq	%rcx, 8(%rdi)
 	retq
-.Lfunc_end28:
-	.size	mcl_fp_shr1_2L, .Lfunc_end28-mcl_fp_shr1_2L
-
-	.globl	mcl_fp_add2L
-	.align	16, 0x90
-	.type	mcl_fp_add2L,@function
-mcl_fp_add2L:                           # @mcl_fp_add2L
-# BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB29_2
-# BB#1:                                 # %nocarry
+.Lfunc_end15:
+	.size	mcl_fp_shr1_3L, .Lfunc_end15-mcl_fp_shr1_3L
+                                        # -- End function
+	.globl	mcl_fp_add3L                    # -- Begin function mcl_fp_add3L
+	.p2align	4, 0x90
+	.type	mcl_fp_add3L,@function
+mcl_fp_add3L:                           # @mcl_fp_add3L
+# %bb.0:
+	movq	16(%rsi), %r8
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r8
+	movq	%r8, 16(%rdi)
+	movq	%rsi, 8(%rdi)
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-.LBB29_2:                               # %carry
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r8
+	sbbq	$0, %rdx
+	testb	$1, %dl
+	jne	.LBB16_2
+# %bb.1:                                # %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, 16(%rdi)
+.LBB16_2:                               # %carry
 	retq
-.Lfunc_end29:
-	.size	mcl_fp_add2L, .Lfunc_end29-mcl_fp_add2L
-
-	.globl	mcl_fp_addNF2L
-	.align	16, 0x90
-	.type	mcl_fp_addNF2L,@function
-mcl_fp_addNF2L:                         # @mcl_fp_addNF2L
-# BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %r8
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %r8
-	movq	%rax, %rsi
+.Lfunc_end16:
+	.size	mcl_fp_add3L, .Lfunc_end16-mcl_fp_add3L
+                                        # -- End function
+	.globl	mcl_fp_addNF3L                  # -- Begin function mcl_fp_addNF3L
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF3L,@function
+mcl_fp_addNF3L:                         # @mcl_fp_addNF3L
+# %bb.0:
+	movq	16(%rdx), %r10
+	movq	(%rdx), %r8
+	movq	8(%rdx), %r9
+	addq	(%rsi), %r8
+	adcq	8(%rsi), %r9
+	adcq	16(%rsi), %r10
+	movq	%r8, %rsi
 	subq	(%rcx), %rsi
-	movq	%r8, %rdx
+	movq	%r9, %rdx
 	sbbq	8(%rcx), %rdx
-	testq	%rdx, %rdx
-	cmovsq	%rax, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r8, %rdx
-	movq	%rdx, 8(%rdi)
-	retq
-.Lfunc_end30:
-	.size	mcl_fp_addNF2L, .Lfunc_end30-mcl_fp_addNF2L
-
-	.globl	mcl_fp_sub2L
-	.align	16, 0x90
-	.type	mcl_fp_sub2L,@function
-mcl_fp_sub2L:                           # @mcl_fp_sub2L
-# BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r8
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r8
-	movq	%rax, (%rdi)
-	movq	%r8, 8(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB31_2
-# BB#1:                                 # %nocarry
-	retq
-.LBB31_2:                               # %carry
-	movq	8(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r8, %rdx
+	movq	%r10, %rax
+	sbbq	16(%rcx), %rax
+	movq	%rax, %rcx
+	sarq	$63, %rcx
+	cmovsq	%r10, %rax
+	movq	%rax, 16(%rdi)
+	cmovsq	%r9, %rdx
 	movq	%rdx, 8(%rdi)
+	cmovsq	%r8, %rsi
+	movq	%rsi, (%rdi)
 	retq
-.Lfunc_end31:
-	.size	mcl_fp_sub2L, .Lfunc_end31-mcl_fp_sub2L
-
-	.globl	mcl_fp_subNF2L
-	.align	16, 0x90
-	.type	mcl_fp_subNF2L,@function
-mcl_fp_subNF2L:                         # @mcl_fp_subNF2L
-# BB#0:
+.Lfunc_end17:
+	.size	mcl_fp_addNF3L, .Lfunc_end17-mcl_fp_addNF3L
+                                        # -- End function
+	.globl	mcl_fp_sub3L                    # -- Begin function mcl_fp_sub3L
+	.p2align	4, 0x90
+	.type	mcl_fp_sub3L,@function
+mcl_fp_sub3L:                           # @mcl_fp_sub3L
+# %bb.0:
+	movq	16(%rsi), %rax
 	movq	(%rsi), %r8
 	movq	8(%rsi), %rsi
+	xorl	%r9d, %r9d
 	subq	(%rdx), %r8
 	sbbq	8(%rdx), %rsi
-	movq	%rsi, %rdx
+	sbbq	16(%rdx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%r9, %r9
+	testb	$1, %r9b
+	jne	.LBB18_2
+# %bb.1:                                # %nocarry
+	retq
+.LBB18_2:                               # %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	retq
+.Lfunc_end18:
+	.size	mcl_fp_sub3L, .Lfunc_end18-mcl_fp_sub3L
+                                        # -- End function
+	.globl	mcl_fp_subNF3L                  # -- Begin function mcl_fp_subNF3L
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF3L,@function
+mcl_fp_subNF3L:                         # @mcl_fp_subNF3L
+# %bb.0:
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r8
+	movq	8(%rsi), %r9
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %r9
+	sbbq	16(%rdx), %r10
+	movq	%r10, %rdx
 	sarq	$63, %rdx
-	movq	8(%rcx), %rax
+	movq	%rdx, %rsi
+	shldq	$1, %r10, %rsi
+	andq	(%rcx), %rsi
+	movq	16(%rcx), %rax
 	andq	%rdx, %rax
-	andq	(%rcx), %rdx
-	addq	%r8, %rdx
-	movq	%rdx, (%rdi)
-	adcq	%rsi, %rax
-	movq	%rax, 8(%rdi)
-	retq
-.Lfunc_end32:
-	.size	mcl_fp_subNF2L, .Lfunc_end32-mcl_fp_subNF2L
-
-	.globl	mcl_fpDbl_add2L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add2L,@function
-mcl_fpDbl_add2L:                        # @mcl_fpDbl_add2L
-# BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rdx), %r10
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r10
-	movq	%rax, (%rdi)
+	andq	8(%rcx), %rdx
+	addq	%r8, %rsi
+	movq	%rsi, (%rdi)
+	adcq	%r9, %rdx
 	movq	%rdx, 8(%rdi)
-	adcq	%r8, %r9
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r10, %rdx
+	adcq	%r10, %rax
+	movq	%rax, 16(%rdi)
+	retq
+.Lfunc_end19:
+	.size	mcl_fp_subNF3L, .Lfunc_end19-mcl_fp_subNF3L
+                                        # -- End function
+	.globl	mcl_fpDbl_add3L                 # -- Begin function mcl_fpDbl_add3L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add3L,@function
+mcl_fpDbl_add3L:                        # @mcl_fpDbl_add3L
+# %bb.0:
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %rax
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r11
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rax
+	adcq	24(%rdx), %r8
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r10
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, (%rdi)
+	setb	%al
+	movzbl	%al, %r11d
+	movq	%r8, %rdx
 	subq	(%rcx), %rdx
 	movq	%r9, %rsi
 	sbbq	8(%rcx), %rsi
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%r10, %rdx
-	movq	%rdx, 16(%rdi)
-	testb	%al, %al
+	movq	%r10, %rax
+	sbbq	16(%rcx), %rax
+	sbbq	$0, %r11
+	testb	$1, %r11b
+	cmovneq	%r10, %rax
+	movq	%rax, 40(%rdi)
 	cmovneq	%r9, %rsi
-	movq	%rsi, 24(%rdi)
+	movq	%rsi, 32(%rdi)
+	cmovneq	%r8, %rdx
+	movq	%rdx, 24(%rdi)
 	retq
-.Lfunc_end33:
-	.size	mcl_fpDbl_add2L, .Lfunc_end33-mcl_fpDbl_add2L
-
-	.globl	mcl_fpDbl_sub2L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub2L,@function
-mcl_fpDbl_sub2L:                        # @mcl_fpDbl_sub2L
-# BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
+.Lfunc_end20:
+	.size	mcl_fpDbl_add3L, .Lfunc_end20-mcl_fpDbl_add3L
+                                        # -- End function
+	.globl	mcl_fpDbl_sub3L                 # -- Begin function mcl_fpDbl_sub3L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub3L,@function
+mcl_fpDbl_sub3L:                        # @mcl_fpDbl_sub3L
+# %bb.0:
+	pushq	%rbx
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %rax
 	movq	(%rsi), %r11
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
+	movq	8(%rsi), %rbx
+	xorl	%esi, %esi
 	subq	(%rdx), %r11
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
+	sbbq	8(%rdx), %rbx
+	sbbq	16(%rdx), %rax
+	sbbq	24(%rdx), %r10
+	sbbq	32(%rdx), %r9
+	sbbq	40(%rdx), %r8
+	movq	%rax, 16(%rdi)
+	movq	%rbx, 8(%rdi)
 	movq	%r11, (%rdi)
-	movq	%rsi, 8(%rdi)
-	sbbq	%r8, %r9
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	cmovneq	8(%rcx), %rax
+	sbbq	%rsi, %rsi
+	andl	$1, %esi
+	negq	%rsi
+	movq	16(%rcx), %rax
+	andq	%rsi, %rax
+	movq	8(%rcx), %rdx
+	andq	%rsi, %rdx
+	andq	(%rcx), %rsi
 	addq	%r10, %rsi
-	movq	%rsi, 16(%rdi)
-	adcq	%r9, %rax
-	movq	%rax, 24(%rdi)
+	movq	%rsi, 24(%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 32(%rdi)
+	adcq	%r8, %rax
+	movq	%rax, 40(%rdi)
+	popq	%rbx
 	retq
-.Lfunc_end34:
-	.size	mcl_fpDbl_sub2L, .Lfunc_end34-mcl_fpDbl_sub2L
-
-	.globl	mcl_fp_mulUnitPre3L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre3L,@function
-mcl_fp_mulUnitPre3L:                    # @mcl_fp_mulUnitPre3L
-# BB#0:
+.Lfunc_end21:
+	.size	mcl_fpDbl_sub3L, .Lfunc_end21-mcl_fpDbl_sub3L
+                                        # -- End function
+	.globl	mulPv256x64                     # -- Begin function mulPv256x64
+	.p2align	4, 0x90
+	.type	mulPv256x64,@function
+mulPv256x64:                            # @mulPv256x64
+# %bb.0:
+	pushq	%rbx
 	movq	%rdx, %rcx
+	movq	%rdx, %rax
+	mulq	(%rsi)
+	movq	%rdx, %r8
+	movq	%rax, (%rdi)
+	movq	%rcx, %rax
+	mulq	24(%rsi)
+	movq	%rdx, %r9
+	movq	%rax, %r10
 	movq	%rcx, %rax
 	mulq	16(%rsi)
+	movq	%rdx, %r11
+	movq	%rax, %rbx
+	movq	%rcx, %rax
+	mulq	8(%rsi)
+	addq	%r8, %rax
+	movq	%rax, 8(%rdi)
+	adcq	%rbx, %rdx
+	movq	%rdx, 16(%rdi)
+	adcq	%r10, %r11
+	movq	%r11, 24(%rdi)
+	adcq	$0, %r9
+	movq	%r9, 32(%rdi)
+	movq	%rdi, %rax
+	popq	%rbx
+	retq
+.Lfunc_end22:
+	.size	mulPv256x64, .Lfunc_end22-mulPv256x64
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre4L             # -- Begin function mcl_fp_mulUnitPre4L
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre4L,@function
+mcl_fp_mulUnitPre4L:                    # @mcl_fp_mulUnitPre4L
+# %bb.0:
+	pushq	%r14
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdx, %rax
+	mulq	24(%rsi)
 	movq	%rdx, %r8
 	movq	%rax, %r9
 	movq	%rcx, %rax
-	mulq	8(%rsi)
+	mulq	16(%rsi)
 	movq	%rdx, %r10
 	movq	%rax, %r11
 	movq	%rcx, %rax
+	mulq	8(%rsi)
+	movq	%rdx, %rbx
+	movq	%rax, %r14
+	movq	%rcx, %rax
 	mulq	(%rsi)
 	movq	%rax, (%rdi)
-	addq	%r11, %rdx
+	addq	%r14, %rdx
 	movq	%rdx, 8(%rdi)
+	adcq	%r11, %rbx
+	movq	%rbx, 16(%rdi)
 	adcq	%r9, %r10
-	movq	%r10, 16(%rdi)
+	movq	%r10, 24(%rdi)
 	adcq	$0, %r8
-	movq	%r8, 24(%rdi)
+	movq	%r8, 32(%rdi)
+	popq	%rbx
+	popq	%r14
 	retq
-.Lfunc_end35:
-	.size	mcl_fp_mulUnitPre3L, .Lfunc_end35-mcl_fp_mulUnitPre3L
-
-	.globl	mcl_fpDbl_mulPre3L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre3L,@function
-mcl_fpDbl_mulPre3L:                     # @mcl_fpDbl_mulPre3L
-# BB#0:
+.Lfunc_end23:
+	.size	mcl_fp_mulUnitPre4L, .Lfunc_end23-mcl_fp_mulUnitPre4L
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre4L              # -- Begin function mcl_fpDbl_mulPre4L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre4L,@function
+mcl_fpDbl_mulPre4L:                     # @mcl_fpDbl_mulPre4L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, %r10
-	movq	(%rsi), %r8
+	movq	%rdx, %rbp
+	movq	(%rsi), %rax
 	movq	8(%rsi), %r9
-	movq	(%r10), %rbx
-	movq	%r8, %rax
+	movq	(%rdx), %rbx
+	movq	%rax, %r8
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
 	mulq	%rbx
-	movq	%rdx, %rcx
-	movq	16(%rsi), %r11
+	movq	%rdx, -80(%rsp)                 # 8-byte Spill
+	movq	16(%rsi), %r10
+	movq	24(%rsi), %r13
 	movq	%rax, (%rdi)
-	movq	%r11, %rax
+	movq	8(%rbp), %rcx
+	movq	%rbp, %r11
+	movq	%rbp, -48(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r13
+	movq	%rdx, -96(%rsp)                 # 8-byte Spill
+	movq	%rax, -24(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r10
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	movq	%rax, -40(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r9
+	movq	%rdx, -32(%rsp)                 # 8-byte Spill
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	mulq	%r8
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rax, %r15
+	movq	%r13, %rax
+	movq	%r13, -72(%rsp)                 # 8-byte Spill
 	mulq	%rbx
-	movq	%rdx, %r14
-	movq	%rax, %rsi
+	movq	%rdx, %rsi
+	movq	%rax, %r12
+	movq	%r10, %rax
+	movq	%r10, %r8
+	movq	%r10, -56(%rsp)                 # 8-byte Spill
+	mulq	%rbx
+	movq	%rdx, %rcx
+	movq	%rax, %rbp
 	movq	%r9, %rax
+	movq	%r9, %r10
+	movq	%r9, -64(%rsp)                  # 8-byte Spill
 	mulq	%rbx
+	movq	%rdx, %rbx
+	addq	-80(%rsp), %rax                 # 8-byte Folded Reload
+	adcq	%rbp, %rbx
+	adcq	%r12, %rcx
+	adcq	$0, %rsi
+	addq	%r15, %rax
+	movq	%rax, 8(%rdi)
+	adcq	%r14, %rbx
+	adcq	-40(%rsp), %rcx                 # 8-byte Folded Reload
+	adcq	-24(%rsp), %rsi                 # 8-byte Folded Reload
+	setb	%al
+	addq	-88(%rsp), %rbx                 # 8-byte Folded Reload
+	adcq	-32(%rsp), %rcx                 # 8-byte Folded Reload
+	movzbl	%al, %r14d
+	adcq	-16(%rsp), %rsi                 # 8-byte Folded Reload
+	adcq	-96(%rsp), %r14                 # 8-byte Folded Reload
+	movq	16(%r11), %rbp
+	movq	%rbp, %rax
+	mulq	%r13
 	movq	%rdx, %r15
-	movq	%rax, %rbx
-	addq	%rcx, %rbx
-	adcq	%rsi, %r15
-	adcq	$0, %r14
-	movq	8(%r10), %rcx
-	movq	%r11, %rax
-	mulq	%rcx
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	%r8
 	movq	%rdx, %r12
-	movq	%rax, %rbp
-	movq	%r9, %rax
-	mulq	%rcx
+	movq	%rax, %r9
+	movq	%rbp, %rax
+	mulq	%r10
 	movq	%rdx, %r13
-	movq	%rax, %rsi
-	movq	%r8, %rax
-	mulq	%rcx
+	movq	%rax, %r10
+	movq	%rbp, %rax
+	movq	-8(%rsp), %r8                   # 8-byte Reload
+	mulq	%r8
+	movq	%rdx, %r11
+	addq	%r10, %r11
+	adcq	%r9, %r13
+	adcq	-96(%rsp), %r12                 # 8-byte Folded Reload
+	adcq	$0, %r15
 	addq	%rbx, %rax
-	movq	%rax, 8(%rdi)
-	adcq	%r15, %rsi
-	adcq	%r14, %rbp
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	addq	%rdx, %rsi
-	adcq	%r13, %rbp
-	adcq	%r12, %r14
-	movq	16(%r10), %r15
-	movq	%r11, %rax
-	mulq	%r15
-	movq	%rdx, %r10
-	movq	%rax, %rbx
-	movq	%r9, %rax
-	mulq	%r15
-	movq	%rdx, %r9
-	movq	%rax, %rcx
-	movq	%r8, %rax
-	mulq	%r15
-	addq	%rsi, %rax
+	adcq	%rcx, %r11
 	movq	%rax, 16(%rdi)
-	adcq	%rbp, %rcx
+	adcq	%rsi, %r13
+	adcq	%r14, %r12
+	adcq	$0, %r15
+	movq	-48(%rsp), %rax                 # 8-byte Reload
+	movq	24(%rax), %rsi
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, %r14
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r9
+	movq	%rsi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r10
+	movq	%rsi, %rax
+	mulq	%r8
+	addq	%r10, %rdx
+	adcq	%r9, %rbp
 	adcq	%r14, %rbx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rdx, %rcx
-	movq	%rcx, 24(%rdi)
-	adcq	%r9, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 40(%rdi)
+	adcq	$0, %rcx
+	addq	%r11, %rax
+	movq	%rax, 24(%rdi)
+	adcq	%r13, %rdx
+	movq	%rdx, 32(%rdi)
+	adcq	%r12, %rbp
+	movq	%rbp, 40(%rdi)
+	adcq	%r15, %rbx
+	movq	%rbx, 48(%rdi)
+	adcq	$0, %rcx
+	movq	%rcx, 56(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1243,246 +1564,388 @@ mcl_fpDbl_mulPre3L:                     # @mcl_fpDbl_mulPre3L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end36:
-	.size	mcl_fpDbl_mulPre3L, .Lfunc_end36-mcl_fpDbl_mulPre3L
-
-	.globl	mcl_fpDbl_sqrPre3L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre3L,@function
-mcl_fpDbl_sqrPre3L:                     # @mcl_fpDbl_sqrPre3L
-# BB#0:
+.Lfunc_end24:
+	.size	mcl_fpDbl_mulPre4L, .Lfunc_end24-mcl_fpDbl_mulPre4L
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre4L              # -- Begin function mcl_fpDbl_sqrPre4L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre4L,@function
+mcl_fpDbl_sqrPre4L:                     # @mcl_fpDbl_sqrPre4L
+# %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rdx, %rbx
+	movq	%rdi, %r10
+	movq	24(%rsi), %rbx
+	movq	16(%rsi), %rcx
+	movq	(%rsi), %r11
+	movq	8(%rsi), %r12
+	movq	%r11, %rax
+	mulq	%r11
+	movq	%rdx, %rbp
 	movq	%rax, (%rdi)
-	movq	%r10, %rax
-	mulq	%rcx
-	movq	%rdx, %r8
-	movq	%rax, %r11
-	movq	%rsi, %rax
+	movq	%rbx, %rax
 	mulq	%rcx
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	movq	%rax, -24(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	movq	%rbx, -8(%rsp)                  # 8-byte Spill
+	mulq	%r11
+	movq	%rdx, -48(%rsp)                 # 8-byte Spill
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r11
+	movq	%rdx, %rsi
+	movq	%rax, %r15
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%r12
 	movq	%rdx, %r14
-	movq	%rax, %r12
-	addq	%r12, %rbx
-	movq	%r14, %r13
-	adcq	%r11, %r13
-	movq	%r8, %rcx
-	adcq	$0, %rcx
-	movq	%r10, %rax
-	mulq	%rsi
+	movq	%rax, %rbx
+	movq	%rax, -32(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r12
 	movq	%rdx, %r9
-	movq	%rax, %r15
-	movq	%rsi, %rax
-	mulq	%rsi
-	movq	%rax, %rsi
-	addq	%r12, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r13, %rsi
-	adcq	%r15, %rcx
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	%r14, %rsi
-	adcq	%rdx, %rcx
-	adcq	%r9, %rbx
-	movq	%r10, %rax
-	mulq	%r10
-	addq	%r11, %rsi
-	movq	%rsi, 16(%rdi)
+	movq	%rax, %rdi
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%rcx
+	movq	%rdx, -40(%rsp)                 # 8-byte Spill
+	movq	%rax, %rcx
+	movq	%r12, %rax
+	mulq	%r12
+	movq	%rdx, %r13
+	movq	%rax, %r8
+	movq	%r12, %rax
+	mulq	%r11
+	addq	%rdx, %r8
+	adcq	%rdi, %r13
+	movq	%r9, %r12
+	adcq	%rbx, %r12
+	movq	%r14, %r11
+	adcq	$0, %r11
+	addq	%rax, %rbp
+	adcq	%r15, %rdx
+	movq	%rsi, %rbx
+	adcq	-72(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	-48(%rsp), %rdi                 # 8-byte Reload
+	movq	%rdi, %r15
+	adcq	$0, %r15
+	addq	%rax, %rbp
+	adcq	%r8, %rdx
+	movq	%rbp, 8(%r10)
+	adcq	%r13, %rbx
+	adcq	%r12, %r15
+	adcq	$0, %r11
+	addq	-64(%rsp), %rsi                 # 8-byte Folded Reload
+	adcq	%r9, %rcx
+	movq	-24(%rsp), %r12                 # 8-byte Reload
+	movq	-40(%rsp), %rax                 # 8-byte Reload
+	adcq	%r12, %rax
+	movq	-16(%rsp), %r8                  # 8-byte Reload
+	movq	%r8, %rbp
+	adcq	$0, %rbp
+	addq	-56(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	%rdx, 16(%r10)
+	adcq	%rbx, %rsi
 	adcq	%r15, %rcx
-	adcq	%rbx, %rax
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	%r8, %rcx
-	movq	%rcx, 24(%rdi)
-	adcq	%r9, %rax
-	movq	%rax, 32(%rdi)
-	adcq	%rdx, %rsi
-	movq	%rsi, 40(%rdi)
+	adcq	%r11, %rax
+	movq	%rax, %r9
+	adcq	$0, %rbp
+	movq	-8(%rsp), %rax                  # 8-byte Reload
+	mulq	%rax
+	addq	-32(%rsp), %rdi                 # 8-byte Folded Reload
+	adcq	%r12, %r14
+	adcq	%r8, %rax
+	adcq	$0, %rdx
+	addq	-72(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	%rsi, 24(%r10)
+	adcq	%rcx, %rdi
+	movq	%rdi, 32(%r10)
+	adcq	%r9, %r14
+	movq	%r14, 40(%r10)
+	adcq	%rbp, %rax
+	movq	%rax, 48(%r10)
+	adcq	$0, %rdx
+	movq	%rdx, 56(%r10)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
 	retq
-.Lfunc_end37:
-	.size	mcl_fpDbl_sqrPre3L, .Lfunc_end37-mcl_fpDbl_sqrPre3L
-
-	.globl	mcl_fp_mont3L
-	.align	16, 0x90
-	.type	mcl_fp_mont3L,@function
-mcl_fp_mont3L:                          # @mcl_fp_mont3L
-# BB#0:
+.Lfunc_end25:
+	.size	mcl_fpDbl_sqrPre4L, .Lfunc_end25-mcl_fpDbl_sqrPre4L
+                                        # -- End function
+	.globl	mcl_fp_mont4L                   # -- Begin function mcl_fp_mont4L
+	.p2align	4, 0x90
+	.type	mcl_fp_mont4L,@function
+mcl_fp_mont4L:                          # @mcl_fp_mont4L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, %r10
-	movq	%r10, -56(%rsp)         # 8-byte Spill
-	movq	%rdi, -48(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	(%r10), %rdi
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	24(%rsi), %rax
+	movq	%rax, -40(%rsp)                 # 8-byte Spill
+	movq	(%rdx), %rdi
 	mulq	%rdi
-	movq	%rax, %rbp
+	movq	%rax, %r14
 	movq	%rdx, %r8
+	movq	16(%rsi), %rax
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	mulq	%rdi
+	movq	%rax, %r12
+	movq	%rdx, %r9
 	movq	(%rsi), %rbx
-	movq	%rbx, -32(%rsp)         # 8-byte Spill
+	movq	%rbx, -56(%rsp)                 # 8-byte Spill
 	movq	8(%rsi), %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
 	mulq	%rdi
-	movq	%rdx, %r15
-	movq	%rax, %rsi
+	movq	%rdx, %r10
+	movq	%rax, %rbp
 	movq	%rbx, %rax
 	mulq	%rdi
-	movq	%rax, %r12
-	movq	%rdx, %r11
-	addq	%rsi, %r11
-	adcq	%rbp, %r15
+	movq	%rax, %r11
+	movq	%rdx, %r15
+	addq	%rbp, %r15
+	adcq	%r12, %r10
+	adcq	%r14, %r9
 	adcq	$0, %r8
-	movq	-8(%rcx), %r14
-	movq	(%rcx), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	movq	%r12, %rbp
-	imulq	%r14, %rbp
+	movq	-8(%rcx), %rdi
+	movq	%rdi, -80(%rsp)                 # 8-byte Spill
+	imulq	%rax, %rdi
+	movq	24(%rcx), %rdx
+	movq	%rdx, -72(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r12
+	movq	%rdx, %r13
 	movq	16(%rcx), %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	8(%rcx), %rbx
-	movq	%rbx, -8(%rsp)          # 8-byte Spill
-	movq	%rbp, %rax
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
 	mulq	%rdx
-	movq	%rdx, %rcx
-	movq	%rax, %r13
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rdx, %rsi
-	movq	%rax, %r9
-	movq	%rbp, %rax
-	mulq	%rdi
+	movq	%rax, %r14
+	movq	%rdx, %rbx
+	movq	(%rcx), %rsi
+	movq	%rsi, -24(%rsp)                 # 8-byte Spill
+	movq	8(%rcx), %rcx
+	movq	%rcx, -32(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rcx
 	movq	%rdx, %rbp
-	addq	%r9, %rbp
-	adcq	%r13, %rsi
-	adcq	$0, %rcx
-	addq	%r12, %rax
-	adcq	%r11, %rbp
-	movq	8(%r10), %rbx
-	adcq	%r15, %rsi
-	adcq	%r8, %rcx
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	movq	%rbx, %rax
-	movq	-64(%rsp), %r10         # 8-byte Reload
-	mulq	%r10
-	movq	%rdx, %r15
-	movq	%rax, %r9
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
+	movq	%rax, %rcx
+	movq	%rdi, %rax
+	mulq	%rsi
+	movq	%rdx, %rdi
+	addq	%rcx, %rdi
+	adcq	%r14, %rbp
+	adcq	%r12, %rbx
+	adcq	$0, %r13
+	addq	%r11, %rax
+	adcq	%r15, %rdi
+	adcq	%r10, %rbp
+	adcq	%r9, %rbx
+	adcq	%r8, %r13
+	setb	-96(%rsp)                       # 1-byte Folded Spill
+	movq	-88(%rsp), %rax                 # 8-byte Reload
+	movq	8(%rax), %rcx
+	movq	%rcx, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r12
-	movq	%rax, %r11
-	movq	%rbx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
 	movq	%rax, %r8
-	movq	%rdx, %rbx
-	addq	%r11, %rbx
-	adcq	%r9, %r12
-	adcq	$0, %r15
-	addq	%rbp, %r8
-	adcq	%rsi, %rbx
-	adcq	%rcx, %r12
-	adcq	%rdi, %r15
-	sbbq	%r11, %r11
-	andl	$1, %r11d
-	movq	%r8, %rcx
-	imulq	%r14, %rcx
 	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r9
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, %r11
 	movq	%rcx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %rsi
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rax, %r10
+	movq	%rdx, %r9
+	addq	%r15, %r9
+	adcq	%r11, %rsi
+	adcq	%r8, %r14
+	adcq	$0, %r12
+	addq	%rdi, %r10
+	adcq	%rbp, %r9
+	adcq	%rbx, %rsi
+	adcq	%r13, %r14
+	movzbl	-96(%rsp), %eax                 # 1-byte Folded Reload
+	adcq	%rax, %r12
+	setb	-96(%rsp)                       # 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	imulq	%r10, %rcx
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %rbp
+	movq	%rcx, %rax
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
 	movq	%rax, %rdi
 	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	addq	%rdi, %rbp
-	adcq	%r9, %rsi
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	addq	%rdi, %r11
+	adcq	%rbp, %r8
+	adcq	%r15, %rbx
 	adcq	$0, %r13
-	addq	%r8, %rax
-	adcq	%rbx, %rbp
-	adcq	%r12, %rsi
-	adcq	%r15, %r13
-	adcq	$0, %r11
-	movq	-56(%rsp), %rax         # 8-byte Reload
+	addq	%r10, %rax
+	adcq	%r9, %r11
+	adcq	%rsi, %r8
+	adcq	%r14, %rbx
+	adcq	%r12, %r13
+	movzbl	-96(%rsp), %r14d                # 1-byte Folded Reload
+	adcq	$0, %r14
+	movq	-88(%rsp), %rax                 # 8-byte Reload
 	movq	16(%rax), %rcx
 	movq	%rcx, %rax
-	mulq	%r10
-	movq	%rdx, %r8
-	movq	%rax, %r10
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
 	movq	%rcx, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %rdi
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r15
 	movq	%rcx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %rsi
+	movq	%rcx, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
 	movq	%rax, %r9
+	movq	%rdx, %rdi
+	addq	%rsi, %rdi
+	adcq	%r15, %rbp
+	adcq	-96(%rsp), %r10                 # 8-byte Folded Reload
+	adcq	$0, %r12
+	addq	%r11, %r9
+	adcq	%r8, %rdi
+	adcq	%rbx, %rbp
+	adcq	%r13, %r10
+	adcq	%r14, %r12
+	setb	%r15b
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	imulq	%r9, %rcx
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %r8
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r11
+	movq	%rcx, %rax
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %rcx
-	addq	%rdi, %rcx
-	adcq	%r10, %r15
+	addq	%r14, %rcx
+	adcq	%r11, %rsi
+	adcq	%r8, %rbx
+	adcq	$0, %r13
+	addq	%r9, %rax
+	adcq	%rdi, %rcx
+	adcq	%rbp, %rsi
+	adcq	%r10, %rbx
+	adcq	%r12, %r13
+	movzbl	%r15b, %r12d
+	adcq	$0, %r12
+	movq	-88(%rsp), %rax                 # 8-byte Reload
+	movq	24(%rax), %rdi
+	movq	%rdi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r11
+	movq	%rdi, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r9
+	movq	%rax, %r14
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r15
+	movq	%rdi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rax, %r10
+	movq	%rdx, %rdi
+	addq	%r15, %rdi
+	adcq	%r14, %rbp
+	adcq	%r11, %r9
 	adcq	$0, %r8
-	addq	%rbp, %r9
-	adcq	%rsi, %rcx
-	adcq	%r13, %r15
-	adcq	%r11, %r8
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	imulq	%r9, %r14
-	movq	%r14, %rax
-	movq	-16(%rsp), %r12         # 8-byte Reload
+	addq	%rcx, %r10
+	adcq	%rsi, %rdi
+	adcq	%rbx, %rbp
+	adcq	%r13, %r9
+	adcq	%r12, %r8
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	imulq	%r10, %rcx
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	movq	-16(%rsp), %r12                 # 8-byte Reload
 	mulq	%r12
 	movq	%rdx, %rbx
-	movq	%rax, %r10
-	movq	%r14, %rax
-	movq	-8(%rsp), %r13          # 8-byte Reload
-	mulq	%r13
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	movq	-32(%rsp), %r11                 # 8-byte Reload
+	mulq	%r11
 	movq	%rdx, %rsi
-	movq	%rax, %r11
-	movq	%r14, %rax
-	movq	-24(%rsp), %rbp         # 8-byte Reload
-	mulq	%rbp
-	addq	%r11, %rdx
-	adcq	%r10, %rsi
-	adcq	$0, %rbx
-	addq	%r9, %rax
-	adcq	%rcx, %rdx
-	adcq	%r15, %rsi
-	adcq	%r8, %rbx
-	adcq	$0, %rdi
-	movq	%rdx, %rax
-	subq	%rbp, %rax
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	movq	-24(%rsp), %rcx                 # 8-byte Reload
+	mulq	%rcx
+	addq	%r15, %rdx
+	adcq	%r14, %rsi
+	adcq	-80(%rsp), %rbx                 # 8-byte Folded Reload
+	adcq	$0, %r13
+	addq	%r10, %rax
+	adcq	%rdi, %rdx
+	adcq	%rbp, %rsi
+	adcq	%r9, %rbx
+	movzbl	-88(%rsp), %eax                 # 1-byte Folded Reload
+	adcq	%r8, %r13
+	adcq	$0, %rax
+	movq	%rdx, %r8
+	subq	%rcx, %r8
 	movq	%rsi, %rcx
-	sbbq	%r13, %rcx
+	sbbq	%r11, %rcx
 	movq	%rbx, %rbp
 	sbbq	%r12, %rbp
-	sbbq	$0, %rdi
-	andl	$1, %edi
+	movq	%r13, %rdi
+	sbbq	-72(%rsp), %rdi                 # 8-byte Folded Reload
+	sbbq	$0, %rax
+	testb	$1, %al
+	cmovneq	%r13, %rdi
+	movq	-8(%rsp), %rax                  # 8-byte Reload
+	movq	%rdi, 24(%rax)
 	cmovneq	%rbx, %rbp
-	testb	%dil, %dil
-	cmovneq	%rdx, %rax
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	movq	%rax, (%rdx)
+	movq	%rbp, 16(%rax)
 	cmovneq	%rsi, %rcx
-	movq	%rcx, 8(%rdx)
-	movq	%rbp, 16(%rdx)
+	movq	%rcx, 8(%rax)
+	cmovneq	%rdx, %r8
+	movq	%r8, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1490,168 +1953,261 @@ mcl_fp_mont3L:                          # @mcl_fp_mont3L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end38:
-	.size	mcl_fp_mont3L, .Lfunc_end38-mcl_fp_mont3L
-
-	.globl	mcl_fp_montNF3L
-	.align	16, 0x90
-	.type	mcl_fp_montNF3L,@function
-mcl_fp_montNF3L:                        # @mcl_fp_montNF3L
-# BB#0:
+.Lfunc_end26:
+	.size	mcl_fp_mont4L, .Lfunc_end26-mcl_fp_mont4L
+                                        # -- End function
+	.globl	mcl_fp_montNF4L                 # -- Begin function mcl_fp_montNF4L
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF4L,@function
+mcl_fp_montNF4L:                        # @mcl_fp_montNF4L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rdi, -32(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %r10
-	movq	%r10, -40(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rbp
-	movq	%r10, %rax
-	mulq	%rbp
-	movq	%rax, %r14
-	movq	%rdx, %r15
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	24(%rsi), %rax
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	movq	(%rdx), %rdi
+	mulq	%rdi
+	movq	%rax, %r8
+	movq	%rdx, %r12
+	movq	16(%rsi), %rax
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	mulq	%rdi
+	movq	%rax, %rbp
+	movq	%rdx, %r9
 	movq	(%rsi), %rbx
-	movq	%rbx, -64(%rsp)         # 8-byte Spill
+	movq	%rbx, -64(%rsp)                 # 8-byte Spill
 	movq	8(%rsi), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	mulq	%rbp
-	movq	%rdx, %rdi
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	mulq	%rdi
+	movq	%rdx, %r15
 	movq	%rax, %rsi
 	movq	%rbx, %rax
-	mulq	%rbp
+	mulq	%rdi
+	movq	%rax, %r10
+	movq	%rdx, %rdi
+	addq	%rsi, %rdi
+	adcq	%rbp, %r15
+	adcq	%r8, %r9
+	adcq	$0, %r12
+	movq	-8(%rcx), %rsi
+	movq	%rsi, -80(%rsp)                 # 8-byte Spill
+	imulq	%rax, %rsi
+	movq	24(%rcx), %rdx
+	movq	%rdx, -32(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	%rdx
 	movq	%rax, %r13
-	movq	%rdx, %rbp
-	addq	%rsi, %rbp
-	adcq	%r14, %rdi
-	adcq	$0, %r15
-	movq	-8(%rcx), %r14
-	movq	(%rcx), %r11
-	movq	%r11, -48(%rsp)         # 8-byte Spill
-	movq	%r13, %rbx
-	imulq	%r14, %rbx
+	movq	%rdx, %r11
 	movq	16(%rcx), %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rdx, -40(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
 	mulq	%rdx
-	movq	%rdx, %r8
-	movq	%rax, %r12
-	movq	%rbx, %rax
-	mulq	%rcx
-	movq	%rdx, %r9
-	movq	%rax, %rcx
-	movq	%rbx, %rax
-	mulq	%r11
-	addq	%r13, %rax
-	adcq	%rbp, %rcx
-	adcq	%rdi, %r12
-	adcq	$0, %r15
-	addq	%rdx, %rcx
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rbp
-	adcq	%r9, %r12
-	adcq	%r8, %r15
-	movq	%rbp, %rax
-	mulq	%r10
-	movq	%rdx, %rsi
 	movq	%rax, %r8
-	movq	%rbp, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r9
-	movq	%rbp, %rax
-	movq	-64(%rsp), %r10         # 8-byte Reload
-	mulq	%r10
-	movq	%rax, %r13
-	movq	%rdx, %rbp
-	addq	%r9, %rbp
-	adcq	%r8, %rbx
-	adcq	$0, %rsi
-	addq	%rcx, %r13
-	adcq	%r12, %rbp
-	adcq	%r15, %rbx
-	adcq	$0, %rsi
-	movq	%r13, %rcx
-	imulq	%r14, %rcx
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r15
-	movq	%rcx, %rax
-	movq	-56(%rsp), %rdi         # 8-byte Reload
-	mulq	%rdi
+	movq	%rdx, %r14
+	movq	(%rcx), %rbx
+	movq	%rbx, -16(%rsp)                 # 8-byte Spill
+	movq	8(%rcx), %rcx
+	movq	%rcx, -24(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	%rcx
+	movq	%rdx, %rcx
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	%rbx
+	addq	%r10, %rax
+	adcq	%rdi, %rbp
+	adcq	%r15, %r8
+	adcq	%r9, %r13
+	adcq	$0, %r12
+	addq	%rdx, %rbp
+	adcq	%rcx, %r8
+	adcq	%r14, %r13
+	adcq	%r11, %r12
+	movq	-88(%rsp), %rax                 # 8-byte Reload
+	movq	8(%rax), %rdi
+	movq	%rdi, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %rsi
+	movq	%rdi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r11
+	movq	%rdi, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, %r14
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rax, %rdi
 	movq	%rdx, %r9
+	addq	%r14, %r9
+	adcq	%r11, %rcx
+	adcq	%rsi, %r10
+	adcq	$0, %rbx
+	addq	%rbp, %rdi
+	adcq	%r8, %r9
+	adcq	%r13, %rcx
+	adcq	%r12, %r10
+	adcq	$0, %rbx
+	movq	-80(%rsp), %rsi                 # 8-byte Reload
+	imulq	%rdi, %rsi
+	movq	%rsi, %rax
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
 	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	%r11
-	addq	%r13, %rax
-	adcq	%rbp, %r12
-	adcq	%rbx, %r15
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rsi, %rax
+	movq	-24(%rsp), %r15                 # 8-byte Reload
+	mulq	%r15
+	movq	%rdx, %r14
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	addq	%rdi, %rax
+	adcq	%r9, %rbp
+	adcq	%rcx, %r13
+	adcq	%r10, %r12
+	adcq	$0, %rbx
+	addq	%rdx, %rbp
+	adcq	%r14, %r13
+	adcq	%r11, %r12
+	adcq	%r8, %rbx
+	movq	-88(%rsp), %rax                 # 8-byte Reload
+	movq	16(%rax), %rdi
+	movq	%rdi, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, %r10
+	movq	%rdi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r11
+	movq	%rdi, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, %r14
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rax, %r9
+	movq	%rdx, %rdi
+	addq	%r14, %rdi
+	adcq	%r11, %rcx
+	adcq	%r10, %r8
 	adcq	$0, %rsi
-	addq	%rdx, %r12
-	adcq	%r9, %r15
-	adcq	%r8, %rsi
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rbx
+	addq	%rbp, %r9
+	adcq	%r13, %rdi
+	adcq	%r12, %rcx
+	adcq	%rbx, %r8
+	adcq	$0, %rsi
+	movq	-80(%rsp), %rbx                 # 8-byte Reload
+	imulq	%r9, %rbx
 	movq	%rbx, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r8
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r12
 	movq	%rbx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r9
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, %r13
 	movq	%rbx, %rax
-	mulq	%r10
-	movq	%rax, %r10
-	movq	%rdx, %rbx
-	addq	%r9, %rbx
-	adcq	%r8, %rcx
-	adcq	$0, %rbp
-	addq	%r12, %r10
-	adcq	%r15, %rbx
-	adcq	%rsi, %rcx
-	adcq	$0, %rbp
-	imulq	%r10, %r14
-	movq	%r14, %rax
-	movq	-16(%rsp), %r15         # 8-byte Reload
 	mulq	%r15
+	movq	%rdx, %r14
+	movq	%rax, %rbp
+	movq	%rbx, %rax
+	movq	-16(%rsp), %r15                 # 8-byte Reload
+	mulq	%r15
+	addq	%r9, %rax
+	adcq	%rdi, %rbp
+	adcq	%rcx, %r13
+	adcq	%r8, %r12
+	adcq	$0, %rsi
+	addq	%rdx, %rbp
+	adcq	%r14, %r13
+	adcq	%r11, %r12
+	adcq	%r10, %rsi
+	movq	-88(%rsp), %rax                 # 8-byte Reload
+	movq	24(%rax), %rdi
+	movq	%rdi, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, %rcx
+	movq	%rdi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r8
-	movq	%rax, %rsi
-	movq	%r14, %rax
-	movq	%rdi, %r11
-	mulq	%r11
-	movq	%rdx, %r9
-	movq	%rax, %rdi
-	movq	%r14, %rax
-	movq	-48(%rsp), %r14         # 8-byte Reload
-	mulq	%r14
-	addq	%r10, %rax
-	adcq	%rbx, %rdi
-	adcq	%rcx, %rsi
-	adcq	$0, %rbp
-	addq	%rdx, %rdi
-	adcq	%r9, %rsi
-	adcq	%r8, %rbp
+	movq	%rax, %rbx
 	movq	%rdi, %rax
-	subq	%r14, %rax
-	movq	%rsi, %rcx
-	sbbq	%r11, %rcx
-	movq	%rbp, %rbx
-	sbbq	%r15, %rbx
-	movq	%rbx, %rdx
-	sarq	$63, %rdx
-	cmovsq	%rdi, %rax
-	movq	-32(%rsp), %rdx         # 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovsq	%rsi, %rcx
-	movq	%rcx, 8(%rdx)
-	cmovsq	%rbp, %rbx
-	movq	%rbx, 16(%rdx)
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r14
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rax, %r9
+	movq	%rdx, %rdi
+	addq	%r14, %rdi
+	adcq	%rbx, %r10
+	adcq	%rcx, %r8
+	adcq	$0, %r11
+	addq	%rbp, %r9
+	adcq	%r13, %rdi
+	adcq	%r12, %r10
+	adcq	%rsi, %r8
+	adcq	$0, %r11
+	movq	-80(%rsp), %rsi                 # 8-byte Reload
+	imulq	%r9, %rsi
+	movq	%rsi, %rax
+	movq	-32(%rsp), %r12                 # 8-byte Reload
+	mulq	%r12
+	movq	%rdx, -80(%rsp)                 # 8-byte Spill
+	movq	%rax, %r13
+	movq	%rsi, %rax
+	movq	-40(%rsp), %r14                 # 8-byte Reload
+	mulq	%r14
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	movq	%r15, %rbx
+	mulq	%r15
+	movq	%rdx, %r15
+	movq	%rax, %rcx
+	movq	%rsi, %rax
+	movq	-24(%rsp), %rsi                 # 8-byte Reload
+	mulq	%rsi
+	addq	%r9, %rcx
+	adcq	%rdi, %rax
+	adcq	%r10, %rbp
+	adcq	%r8, %r13
+	adcq	$0, %r11
+	addq	%r15, %rax
+	adcq	%rdx, %rbp
+	adcq	-88(%rsp), %r13                 # 8-byte Folded Reload
+	adcq	-80(%rsp), %r11                 # 8-byte Folded Reload
+	movq	%rax, %rcx
+	subq	%rbx, %rcx
+	movq	%rbp, %rdx
+	sbbq	%rsi, %rdx
+	movq	%r13, %rdi
+	sbbq	%r14, %rdi
+	movq	%r11, %rbx
+	sbbq	%r12, %rbx
+	cmovsq	%r11, %rbx
+	movq	-8(%rsp), %rsi                  # 8-byte Reload
+	movq	%rbx, 24(%rsi)
+	cmovsq	%r13, %rdi
+	movq	%rdi, 16(%rsi)
+	cmovsq	%rbp, %rdx
+	movq	%rdx, 8(%rsi)
+	cmovsq	%rax, %rcx
+	movq	%rcx, (%rsi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1659,14 +2215,14 @@ mcl_fp_montNF3L:                        # @mcl_fp_montNF3L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end39:
-	.size	mcl_fp_montNF3L, .Lfunc_end39-mcl_fp_montNF3L
-
-	.globl	mcl_fp_montRed3L
-	.align	16, 0x90
-	.type	mcl_fp_montRed3L,@function
-mcl_fp_montRed3L:                       # @mcl_fp_montRed3L
-# BB#0:
+.Lfunc_end27:
+	.size	mcl_fp_montNF4L, .Lfunc_end27-mcl_fp_montNF4L
+                                        # -- End function
+	.globl	mcl_fp_montRed4L                # -- Begin function mcl_fp_montRed4L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed4L,@function
+mcl_fp_montRed4L:                       # @mcl_fp_montRed4L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
@@ -1674,103 +2230,320 @@ mcl_fp_montRed3L:                       # @mcl_fp_montRed3L
 	pushq	%r12
 	pushq	%rbx
 	movq	%rdx, %rcx
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	-8(%rcx), %r9
-	movq	(%rcx), %rdi
-	movq	%rdi, -16(%rsp)         # 8-byte Spill
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	-8(%rdx), %r8
+	movq	(%rdx), %r13
 	movq	(%rsi), %r15
 	movq	%r15, %rbx
-	imulq	%r9, %rbx
+	imulq	%r8, %rbx
+	movq	24(%rdx), %rdi
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rdi, -40(%rsp)                 # 8-byte Spill
+	movq	%rax, %r10
+	movq	%rdx, %r9
 	movq	16(%rcx), %rbp
-	movq	%rbp, -24(%rsp)         # 8-byte Spill
 	movq	%rbx, %rax
 	mulq	%rbp
-	movq	%rax, %r11
-	movq	%rdx, %r8
+	movq	%rbp, -24(%rsp)                 # 8-byte Spill
+	movq	%rax, %r14
+	movq	%rdx, %r11
 	movq	8(%rcx), %rcx
-	movq	%rcx, -32(%rsp)         # 8-byte Spill
+	movq	%rcx, -48(%rsp)                 # 8-byte Spill
 	movq	%rbx, %rax
 	mulq	%rcx
-	movq	%rcx, %r12
-	movq	%rdx, %r10
-	movq	%rax, %r14
+	movq	%rdx, %r12
+	movq	%rax, %rcx
 	movq	%rbx, %rax
-	mulq	%rdi
-	movq	%rdi, %rbx
-	movq	%rdx, %rcx
-	addq	%r14, %rcx
-	adcq	%r11, %r10
-	adcq	$0, %r8
-	movq	40(%rsi), %rdi
-	movq	32(%rsi), %r13
+	mulq	%r13
+	movq	%r13, -32(%rsp)                 # 8-byte Spill
+	movq	%rdx, %rbx
+	addq	%rcx, %rbx
+	adcq	%r14, %r12
+	adcq	%r10, %r11
+	adcq	$0, %r9
 	addq	%r15, %rax
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r10
-	adcq	24(%rsi), %r8
-	adcq	$0, %r13
-	adcq	$0, %rdi
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	movq	%rcx, %rsi
-	imulq	%r9, %rsi
-	movq	%rsi, %rax
+	adcq	8(%rsi), %rbx
+	adcq	16(%rsi), %r12
+	adcq	24(%rsi), %r11
+	adcq	32(%rsi), %r9
+	movq	%rsi, -16(%rsp)                 # 8-byte Spill
+	setb	-65(%rsp)                       # 1-byte Folded Spill
+	movq	%r8, %rcx
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	mulq	%rdi
+	movq	%rdx, %r10
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
 	mulq	%rbp
-	movq	%rdx, %r11
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	%r12
 	movq	%rdx, %r14
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	%rbx
-	movq	%rdx, %rbx
-	addq	%r12, %rbx
-	adcq	%rbp, %r14
-	adcq	$0, %r11
-	addq	%rcx, %rax
-	adcq	%r10, %rbx
-	adcq	%r8, %r14
-	adcq	%r13, %r11
-	adcq	$0, %rdi
-	adcq	$0, %r15
-	imulq	%rbx, %r9
-	movq	%r9, %rax
-	movq	-24(%rsp), %r12         # 8-byte Reload
-	mulq	%r12
-	movq	%rdx, %rbp
-	movq	%rax, %r8
-	movq	%r9, %rax
-	movq	-32(%rsp), %r13         # 8-byte Reload
+	movq	%rax, %rbp
+	movq	%rcx, %rax
 	mulq	%r13
+	movq	%rdx, %r13
+	movq	%rax, %rdi
+	movq	%rcx, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %rcx
+	addq	%r13, %rcx
+	adcq	%rbp, %r15
+	adcq	-64(%rsp), %r14                 # 8-byte Folded Reload
+	movzbl	-65(%rsp), %eax                 # 1-byte Folded Reload
+	adcq	%rax, %r10
+	addq	%rbx, %rdi
+	adcq	%r12, %rcx
+	adcq	%r11, %r15
+	adcq	%r9, %r14
+	adcq	40(%rsi), %r10
+	setb	-65(%rsp)                       # 1-byte Folded Spill
+	movq	%r8, %rdi
+	imulq	%rcx, %rdi
+	movq	%rdi, %rax
+	movq	-40(%rsp), %rsi                 # 8-byte Reload
+	mulq	%rsi
+	movq	%rdx, %r9
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rbp
+	movq	%rdi, %rax
+	movq	-48(%rsp), %rdi                 # 8-byte Reload
+	mulq	%rdi
+	movq	%rdx, %r12
+	movq	%rax, %rbx
+	addq	%r13, %rbx
+	adcq	-56(%rsp), %r12                 # 8-byte Folded Reload
+	adcq	-64(%rsp), %r11                 # 8-byte Folded Reload
+	movzbl	-65(%rsp), %eax                 # 1-byte Folded Reload
+	adcq	%rax, %r9
+	addq	%rcx, %rbp
+	adcq	%r15, %rbx
+	adcq	%r14, %r12
+	adcq	%r10, %r11
+	movq	-16(%rsp), %r15                 # 8-byte Reload
+	adcq	48(%r15), %r9
+	setb	-65(%rsp)                       # 1-byte Folded Spill
+	imulq	%rbx, %r8
+	movq	%r8, %rax
+	mulq	%rsi
+	movq	%rdx, -64(%rsp)                 # 8-byte Spill
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%r8, %rax
+	movq	-24(%rsp), %r14                 # 8-byte Reload
+	mulq	%r14
+	movq	%rdx, %r13
+	movq	%rax, %rbp
+	movq	%r8, %rax
+	movq	-32(%rsp), %r10                 # 8-byte Reload
+	mulq	%r10
 	movq	%rdx, %rsi
+	movq	%rax, %rcx
+	movq	%r8, %rax
+	mulq	%rdi
+	addq	%rsi, %rax
+	adcq	%rbp, %rdx
+	adcq	-56(%rsp), %r13                 # 8-byte Folded Reload
+	movzbl	-65(%rsp), %edi                 # 1-byte Folded Reload
+	adcq	-64(%rsp), %rdi                 # 8-byte Folded Reload
+	addq	%rbx, %rcx
+	adcq	%r12, %rax
+	adcq	%r11, %rdx
+	adcq	%r9, %r13
+	adcq	56(%r15), %rdi
+	xorl	%r8d, %r8d
+	movq	%rax, %rbp
+	subq	%r10, %rbp
+	movq	%rdx, %rbx
+	sbbq	-48(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%r13, %rcx
+	sbbq	%r14, %rcx
+	movq	%rdi, %rsi
+	sbbq	-40(%rsp), %rsi                 # 8-byte Folded Reload
+	sbbq	%r8, %r8
+	testb	$1, %r8b
+	cmovneq	%rdi, %rsi
+	movq	-8(%rsp), %rdi                  # 8-byte Reload
+	movq	%rsi, 24(%rdi)
+	cmovneq	%r13, %rcx
+	movq	%rcx, 16(%rdi)
+	cmovneq	%rdx, %rbx
+	movq	%rbx, 8(%rdi)
+	cmovneq	%rax, %rbp
+	movq	%rbp, (%rdi)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+.Lfunc_end28:
+	.size	mcl_fp_montRed4L, .Lfunc_end28-mcl_fp_montRed4L
+                                        # -- End function
+	.globl	mcl_fp_montRedNF4L              # -- Begin function mcl_fp_montRedNF4L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF4L,@function
+mcl_fp_montRedNF4L:                     # @mcl_fp_montRedNF4L
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  # 8-byte Spill
+	movq	-8(%rdx), %r8
+	movq	(%rdx), %r13
+	movq	(%rsi), %r15
+	movq	%r15, %rbx
+	imulq	%r8, %rbx
+	movq	24(%rdx), %rdi
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rdi, -48(%rsp)                 # 8-byte Spill
 	movq	%rax, %r10
-	movq	%r9, %rax
-	movq	-16(%rsp), %rcx         # 8-byte Reload
+	movq	%rdx, %r9
+	movq	16(%rcx), %rbp
+	movq	%rbx, %rax
+	mulq	%rbp
+	movq	%rbp, -32(%rsp)                 # 8-byte Spill
+	movq	%rax, %r14
+	movq	%rdx, %r11
+	movq	8(%rcx), %rcx
+	movq	%rcx, -24(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
 	mulq	%rcx
-	addq	%r10, %rdx
-	adcq	%r8, %rsi
-	adcq	$0, %rbp
-	addq	%rbx, %rax
-	adcq	%r14, %rdx
-	adcq	%r11, %rsi
-	adcq	%rdi, %rbp
-	adcq	$0, %r15
-	movq	%rdx, %rax
-	subq	%rcx, %rax
-	movq	%rsi, %rdi
-	sbbq	%r13, %rdi
-	movq	%rbp, %rcx
-	sbbq	%r12, %rcx
-	sbbq	$0, %r15
-	andl	$1, %r15d
-	cmovneq	%rbp, %rcx
-	testb	%r15b, %r15b
-	cmovneq	%rdx, %rax
-	movq	-8(%rsp), %rdx          # 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovneq	%rsi, %rdi
-	movq	%rdi, 8(%rdx)
-	movq	%rcx, 16(%rdx)
+	movq	%rdx, %r12
+	movq	%rax, %rcx
+	movq	%rbx, %rax
+	mulq	%r13
+	movq	%r13, -40(%rsp)                 # 8-byte Spill
+	movq	%rdx, %rbx
+	addq	%rcx, %rbx
+	adcq	%r14, %r12
+	adcq	%r10, %r11
+	adcq	$0, %r9
+	addq	%r15, %rax
+	adcq	8(%rsi), %rbx
+	adcq	16(%rsi), %r12
+	adcq	24(%rsi), %r11
+	adcq	32(%rsi), %r9
+	movq	%rsi, -16(%rsp)                 # 8-byte Spill
+	setb	-65(%rsp)                       # 1-byte Folded Spill
+	movq	%r8, %rcx
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	mulq	%rdi
+	movq	%rdx, %r10
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%rbp
+	movq	%rdx, %r14
+	movq	%rax, %rbp
+	movq	%rcx, %rax
+	mulq	%r13
+	movq	%rdx, %r13
+	movq	%rax, %rdi
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %rcx
+	addq	%r13, %rcx
+	adcq	%rbp, %r15
+	adcq	-64(%rsp), %r14                 # 8-byte Folded Reload
+	movzbl	-65(%rsp), %eax                 # 1-byte Folded Reload
+	adcq	%rax, %r10
+	addq	%rbx, %rdi
+	adcq	%r12, %rcx
+	adcq	%r11, %r15
+	adcq	%r9, %r14
+	adcq	40(%rsi), %r10
+	setb	-65(%rsp)                       # 1-byte Folded Spill
+	movq	%r8, %rdi
+	imulq	%rcx, %rdi
+	movq	%rdi, %rax
+	movq	-48(%rsp), %rsi                 # 8-byte Reload
+	mulq	%rsi
+	movq	%rdx, %r9
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rbp
+	movq	%rdi, %rax
+	movq	-24(%rsp), %rdi                 # 8-byte Reload
+	mulq	%rdi
+	movq	%rdx, %r12
+	movq	%rax, %rbx
+	addq	%r13, %rbx
+	adcq	-56(%rsp), %r12                 # 8-byte Folded Reload
+	adcq	-64(%rsp), %r11                 # 8-byte Folded Reload
+	movzbl	-65(%rsp), %eax                 # 1-byte Folded Reload
+	adcq	%rax, %r9
+	addq	%rcx, %rbp
+	adcq	%r15, %rbx
+	adcq	%r14, %r12
+	adcq	%r10, %r11
+	movq	-16(%rsp), %r15                 # 8-byte Reload
+	adcq	48(%r15), %r9
+	setb	-65(%rsp)                       # 1-byte Folded Spill
+	imulq	%rbx, %r8
+	movq	%r8, %rax
+	mulq	%rsi
+	movq	%rdx, -64(%rsp)                 # 8-byte Spill
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%r8, %rax
+	movq	-32(%rsp), %r14                 # 8-byte Reload
+	mulq	%r14
+	movq	%rdx, %r13
+	movq	%rax, %rbp
+	movq	%r8, %rax
+	movq	-40(%rsp), %r10                 # 8-byte Reload
+	mulq	%r10
+	movq	%rdx, %rsi
+	movq	%rax, %rcx
+	movq	%r8, %rax
+	mulq	%rdi
+	movq	%rdi, %r8
+	addq	%rsi, %rax
+	adcq	%rbp, %rdx
+	adcq	-56(%rsp), %r13                 # 8-byte Folded Reload
+	movzbl	-65(%rsp), %edi                 # 1-byte Folded Reload
+	adcq	-64(%rsp), %rdi                 # 8-byte Folded Reload
+	addq	%rbx, %rcx
+	adcq	%r12, %rax
+	adcq	%r11, %rdx
+	adcq	%r9, %r13
+	adcq	56(%r15), %rdi
+	movq	%rax, %rbx
+	subq	%r10, %rbx
+	movq	%rdx, %rbp
+	sbbq	%r8, %rbp
+	movq	%r13, %rcx
+	sbbq	%r14, %rcx
+	movq	%rdi, %rsi
+	sbbq	-48(%rsp), %rsi                 # 8-byte Folded Reload
+	cmovsq	%rdi, %rsi
+	movq	-8(%rsp), %rdi                  # 8-byte Reload
+	movq	%rsi, 24(%rdi)
+	cmovsq	%r13, %rcx
+	movq	%rcx, 16(%rdi)
+	cmovsq	%rdx, %rbp
+	movq	%rbp, 8(%rdi)
+	cmovsq	%rax, %rbx
+	movq	%rbx, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1778,454 +2551,430 @@ mcl_fp_montRed3L:                       # @mcl_fp_montRed3L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end40:
-	.size	mcl_fp_montRed3L, .Lfunc_end40-mcl_fp_montRed3L
-
-	.globl	mcl_fp_addPre3L
-	.align	16, 0x90
-	.type	mcl_fp_addPre3L,@function
-mcl_fp_addPre3L:                        # @mcl_fp_addPre3L
-# BB#0:
-	movq	16(%rdx), %rax
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rax
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+.Lfunc_end29:
+	.size	mcl_fp_montRedNF4L, .Lfunc_end29-mcl_fp_montRedNF4L
+                                        # -- End function
+	.globl	mcl_fp_addPre4L                 # -- Begin function mcl_fp_addPre4L
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre4L,@function
+mcl_fp_addPre4L:                        # @mcl_fp_addPre4L
+# %bb.0:
+	movq	24(%rsi), %rax
+	movq	16(%rsi), %rcx
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r8
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rcx
+	adcq	24(%rdx), %rax
+	movq	%rax, 24(%rdi)
+	movq	%rcx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
 	retq
-.Lfunc_end41:
-	.size	mcl_fp_addPre3L, .Lfunc_end41-mcl_fp_addPre3L
-
-	.globl	mcl_fp_subPre3L
-	.align	16, 0x90
-	.type	mcl_fp_subPre3L,@function
-mcl_fp_subPre3L:                        # @mcl_fp_subPre3L
-# BB#0:
+.Lfunc_end30:
+	.size	mcl_fp_addPre4L, .Lfunc_end30-mcl_fp_addPre4L
+                                        # -- End function
+	.globl	mcl_fp_subPre4L                 # -- Begin function mcl_fp_subPre4L
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre4L,@function
+mcl_fp_subPre4L:                        # @mcl_fp_subPre4L
+# %bb.0:
+	movq	24(%rsi), %rcx
 	movq	16(%rsi), %r8
-	movq	(%rsi), %rcx
+	movq	(%rsi), %r9
 	movq	8(%rsi), %rsi
 	xorl	%eax, %eax
-	subq	(%rdx), %rcx
+	subq	(%rdx), %r9
 	sbbq	8(%rdx), %rsi
 	sbbq	16(%rdx), %r8
-	movq	%rcx, (%rdi)
-	movq	%rsi, 8(%rdi)
+	sbbq	24(%rdx), %rcx
+	movq	%rcx, 24(%rdi)
 	movq	%r8, 16(%rdi)
-	sbbq	$0, %rax
+	movq	%rsi, 8(%rdi)
+	movq	%r9, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
 	retq
-.Lfunc_end42:
-	.size	mcl_fp_subPre3L, .Lfunc_end42-mcl_fp_subPre3L
-
-	.globl	mcl_fp_shr1_3L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_3L,@function
-mcl_fp_shr1_3L:                         # @mcl_fp_shr1_3L
-# BB#0:
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rdx
-	shrdq	$1, %rdx, %rcx
-	movq	%rcx, (%rdi)
-	shrdq	$1, %rax, %rdx
+.Lfunc_end31:
+	.size	mcl_fp_subPre4L, .Lfunc_end31-mcl_fp_subPre4L
+                                        # -- End function
+	.globl	mcl_fp_shr1_4L                  # -- Begin function mcl_fp_shr1_4L
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_4L,@function
+mcl_fp_shr1_4L:                         # @mcl_fp_shr1_4L
+# %bb.0:
+	movq	(%rsi), %rax
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %rdx
+	movq	24(%rsi), %rcx
+	movq	%rcx, %rsi
+	shrq	%rsi
+	movq	%rsi, 24(%rdi)
+	shldq	$63, %rdx, %rcx
+	movq	%rcx, 16(%rdi)
+	shldq	$63, %r8, %rdx
 	movq	%rdx, 8(%rdi)
-	shrq	%rax
-	movq	%rax, 16(%rdi)
+	shrdq	$1, %r8, %rax
+	movq	%rax, (%rdi)
 	retq
-.Lfunc_end43:
-	.size	mcl_fp_shr1_3L, .Lfunc_end43-mcl_fp_shr1_3L
-
-	.globl	mcl_fp_add3L
-	.align	16, 0x90
-	.type	mcl_fp_add3L,@function
-mcl_fp_add3L:                           # @mcl_fp_add3L
-# BB#0:
-	movq	16(%rdx), %r8
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r8
+.Lfunc_end32:
+	.size	mcl_fp_shr1_4L, .Lfunc_end32-mcl_fp_shr1_4L
+                                        # -- End function
+	.globl	mcl_fp_add4L                    # -- Begin function mcl_fp_add4L
+	.p2align	4, 0x90
+	.type	mcl_fp_add4L,@function
+mcl_fp_add4L:                           # @mcl_fp_add4L
+# %bb.0:
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %r9
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r9
+	adcq	24(%rdx), %r8
+	movq	%r8, 24(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%rsi, 8(%rdi)
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r8, 16(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
+	setb	%dl
+	movzbl	%dl, %edx
 	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB44_2
-# BB#1:                                 # %nocarry
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r9
+	sbbq	24(%rcx), %r8
+	sbbq	$0, %rdx
+	testb	$1, %dl
+	jne	.LBB33_2
+# %bb.1:                                # %nocarry
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r8, 16(%rdi)
-.LBB44_2:                               # %carry
+	movq	%rsi, 8(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%r8, 24(%rdi)
+.LBB33_2:                               # %carry
 	retq
-.Lfunc_end44:
-	.size	mcl_fp_add3L, .Lfunc_end44-mcl_fp_add3L
-
-	.globl	mcl_fp_addNF3L
-	.align	16, 0x90
-	.type	mcl_fp_addNF3L,@function
-mcl_fp_addNF3L:                         # @mcl_fp_addNF3L
-# BB#0:
+.Lfunc_end33:
+	.size	mcl_fp_add4L, .Lfunc_end33-mcl_fp_add4L
+                                        # -- End function
+	.globl	mcl_fp_addNF4L                  # -- Begin function mcl_fp_addNF4L
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF4L,@function
+mcl_fp_addNF4L:                         # @mcl_fp_addNF4L
+# %bb.0:
+	pushq	%rbx
+	movq	24(%rdx), %r11
 	movq	16(%rdx), %r8
-	movq	(%rdx), %r10
-	movq	8(%rdx), %r9
-	addq	(%rsi), %r10
-	adcq	8(%rsi), %r9
+	movq	(%rdx), %r9
+	movq	8(%rdx), %r10
+	addq	(%rsi), %r9
+	adcq	8(%rsi), %r10
 	adcq	16(%rsi), %r8
-	movq	%r10, %rsi
+	adcq	24(%rsi), %r11
+	movq	%r9, %rsi
 	subq	(%rcx), %rsi
-	movq	%r9, %rdx
+	movq	%r10, %rdx
 	sbbq	8(%rcx), %rdx
 	movq	%r8, %rax
 	sbbq	16(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r10, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
+	movq	%r11, %rbx
+	sbbq	24(%rcx), %rbx
+	cmovsq	%r11, %rbx
+	movq	%rbx, 24(%rdi)
 	cmovsq	%r8, %rax
 	movq	%rax, 16(%rdi)
-	retq
-.Lfunc_end45:
-	.size	mcl_fp_addNF3L, .Lfunc_end45-mcl_fp_addNF3L
-
-	.globl	mcl_fp_sub3L
-	.align	16, 0x90
-	.type	mcl_fp_sub3L,@function
-mcl_fp_sub3L:                           # @mcl_fp_sub3L
-# BB#0:
-	movq	16(%rsi), %r8
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r9
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r9
-	sbbq	16(%rdx), %r8
-	movq	%rax, (%rdi)
-	movq	%r9, 8(%rdi)
-	movq	%r8, 16(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB46_2
-# BB#1:                                 # %nocarry
-	retq
-.LBB46_2:                               # %carry
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rsi
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r9, %rdx
+	cmovsq	%r10, %rdx
 	movq	%rdx, 8(%rdi)
-	adcq	%r8, %rsi
-	movq	%rsi, 16(%rdi)
+	cmovsq	%r9, %rsi
+	movq	%rsi, (%rdi)
+	popq	%rbx
 	retq
-.Lfunc_end46:
-	.size	mcl_fp_sub3L, .Lfunc_end46-mcl_fp_sub3L
-
-	.globl	mcl_fp_subNF3L
-	.align	16, 0x90
-	.type	mcl_fp_subNF3L,@function
-mcl_fp_subNF3L:                         # @mcl_fp_subNF3L
-# BB#0:
+.Lfunc_end34:
+	.size	mcl_fp_addNF4L, .Lfunc_end34-mcl_fp_addNF4L
+                                        # -- End function
+	.globl	mcl_fp_sub4L                    # -- Begin function mcl_fp_sub4L
+	.p2align	4, 0x90
+	.type	mcl_fp_sub4L,@function
+mcl_fp_sub4L:                           # @mcl_fp_sub4L
+# %bb.0:
+	movq	24(%rsi), %r9
 	movq	16(%rsi), %r10
 	movq	(%rsi), %r8
-	movq	8(%rsi), %r9
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
 	subq	(%rdx), %r8
-	sbbq	8(%rdx), %r9
+	sbbq	8(%rdx), %rsi
 	sbbq	16(%rdx), %r10
-	movq	%r10, %rdx
+	sbbq	24(%rdx), %r9
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
+	testb	$1, %al
+	jne	.LBB35_2
+# %bb.1:                                # %nocarry
+	retq
+.LBB35_2:                               # %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %r10
+	adcq	24(%rcx), %r9
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	retq
+.Lfunc_end35:
+	.size	mcl_fp_sub4L, .Lfunc_end35-mcl_fp_sub4L
+                                        # -- End function
+	.globl	mcl_fp_subNF4L                  # -- Begin function mcl_fp_subNF4L
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF4L,@function
+mcl_fp_subNF4L:                         # @mcl_fp_subNF4L
+# %bb.0:
+	pushq	%rbx
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %r8
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r10
+	subq	(%rdx), %r9
+	sbbq	8(%rdx), %r10
+	sbbq	16(%rdx), %r8
+	sbbq	24(%rdx), %r11
+	movq	%r11, %rdx
 	sarq	$63, %rdx
-	movq	%rdx, %rsi
-	shldq	$1, %r10, %rsi
-	andq	(%rcx), %rsi
+	movq	24(%rcx), %rsi
+	andq	%rdx, %rsi
 	movq	16(%rcx), %rax
 	andq	%rdx, %rax
-	andq	8(%rcx), %rdx
-	addq	%r8, %rsi
-	movq	%rsi, (%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r10, %rax
+	movq	8(%rcx), %rbx
+	andq	%rdx, %rbx
+	andq	(%rcx), %rdx
+	addq	%r9, %rdx
+	movq	%rdx, (%rdi)
+	adcq	%r10, %rbx
+	movq	%rbx, 8(%rdi)
+	adcq	%r8, %rax
 	movq	%rax, 16(%rdi)
+	adcq	%r11, %rsi
+	movq	%rsi, 24(%rdi)
+	popq	%rbx
 	retq
-.Lfunc_end47:
-	.size	mcl_fp_subNF3L, .Lfunc_end47-mcl_fp_subNF3L
-
-	.globl	mcl_fpDbl_add3L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add3L,@function
-mcl_fpDbl_add3L:                        # @mcl_fpDbl_add3L
-# BB#0:
-	pushq	%r15
+.Lfunc_end36:
+	.size	mcl_fp_subNF4L, .Lfunc_end36-mcl_fp_subNF4L
+                                        # -- End function
+	.globl	mcl_fpDbl_add4L                 # -- Begin function mcl_fpDbl_add4L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add4L,@function
+mcl_fpDbl_add4L:                        # @mcl_fpDbl_add4L
+# %bb.0:
 	pushq	%r14
 	pushq	%rbx
-	movq	40(%rdx), %r10
-	movq	40(%rsi), %r8
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r9
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
+	movq	56(%rsi), %r11
+	movq	48(%rsi), %r10
+	movq	40(%rsi), %r9
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %rax
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r14
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rbx
+	adcq	24(%rdx), %rax
+	adcq	32(%rdx), %r8
+	adcq	40(%rdx), %r9
+	adcq	48(%rdx), %r10
+	adcq	56(%rdx), %r11
+	movq	%rax, 24(%rdi)
 	movq	%rbx, 16(%rdi)
-	adcq	%r14, %r15
-	adcq	%r11, %r9
-	adcq	%r10, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r15, %rdx
+	movq	%rsi, 8(%rdi)
+	movq	%r14, (%rdi)
+	setb	%al
+	movzbl	%al, %r14d
+	movq	%r8, %rdx
 	subq	(%rcx), %rdx
 	movq	%r9, %rsi
 	sbbq	8(%rcx), %rsi
-	movq	%r8, %rbx
+	movq	%r10, %rbx
 	sbbq	16(%rcx), %rbx
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%r15, %rdx
-	movq	%rdx, 24(%rdi)
-	testb	%al, %al
+	movq	%r11, %rax
+	sbbq	24(%rcx), %rax
+	sbbq	$0, %r14
+	testb	$1, %r14b
+	cmovneq	%r11, %rax
+	movq	%rax, 56(%rdi)
+	cmovneq	%r10, %rbx
+	movq	%rbx, 48(%rdi)
 	cmovneq	%r9, %rsi
-	movq	%rsi, 32(%rdi)
-	cmovneq	%r8, %rbx
-	movq	%rbx, 40(%rdi)
+	movq	%rsi, 40(%rdi)
+	cmovneq	%r8, %rdx
+	movq	%rdx, 32(%rdi)
 	popq	%rbx
 	popq	%r14
-	popq	%r15
 	retq
-.Lfunc_end48:
-	.size	mcl_fpDbl_add3L, .Lfunc_end48-mcl_fpDbl_add3L
-
-	.globl	mcl_fpDbl_sub3L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub3L,@function
-mcl_fpDbl_sub3L:                        # @mcl_fpDbl_sub3L
-# BB#0:
+.Lfunc_end37:
+	.size	mcl_fpDbl_add4L, .Lfunc_end37-mcl_fpDbl_add4L
+                                        # -- End function
+	.globl	mcl_fpDbl_sub4L                 # -- Begin function mcl_fpDbl_sub4L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub4L,@function
+mcl_fpDbl_sub4L:                        # @mcl_fpDbl_sub4L
+# %bb.0:
 	pushq	%r15
 	pushq	%r14
-	pushq	%r12
 	pushq	%rbx
-	movq	40(%rdx), %r10
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %r14
-	movq	(%rsi), %rbx
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %r9
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r11
+	movq	24(%rsi), %r15
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
 	movq	8(%rsi), %rax
 	xorl	%esi, %esi
-	subq	(%rdx), %rbx
+	subq	(%rdx), %r14
 	sbbq	8(%rdx), %rax
-	movq	24(%rdx), %r15
-	movq	32(%rdx), %r12
-	sbbq	16(%rdx), %r14
-	movq	%rbx, (%rdi)
+	sbbq	16(%rdx), %rbx
+	sbbq	24(%rdx), %r15
+	sbbq	32(%rdx), %r11
+	sbbq	40(%rdx), %r10
+	sbbq	48(%rdx), %r9
+	sbbq	56(%rdx), %r8
+	movq	%r15, 24(%rdi)
+	movq	%rbx, 16(%rdi)
 	movq	%rax, 8(%rdi)
-	movq	%r14, 16(%rdi)
-	sbbq	%r15, %r11
-	sbbq	%r12, %r9
-	sbbq	%r10, %r8
-	movl	$0, %eax
-	sbbq	$0, %rax
-	andl	$1, %eax
-	movq	(%rcx), %rdx
-	cmoveq	%rsi, %rdx
-	testb	%al, %al
-	movq	16(%rcx), %rax
-	cmoveq	%rsi, %rax
-	cmovneq	8(%rcx), %rsi
-	addq	%r11, %rdx
-	movq	%rdx, 24(%rdi)
-	adcq	%r9, %rsi
+	movq	%r14, (%rdi)
+	sbbq	%rsi, %rsi
+	andl	$1, %esi
+	negq	%rsi
+	movq	24(%rcx), %rax
+	andq	%rsi, %rax
+	movq	16(%rcx), %rdx
+	andq	%rsi, %rdx
+	movq	8(%rcx), %rbx
+	andq	%rsi, %rbx
+	andq	(%rcx), %rsi
+	addq	%r11, %rsi
 	movq	%rsi, 32(%rdi)
+	adcq	%r10, %rbx
+	movq	%rbx, 40(%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 48(%rdi)
 	adcq	%r8, %rax
-	movq	%rax, 40(%rdi)
+	movq	%rax, 56(%rdi)
 	popq	%rbx
-	popq	%r12
 	popq	%r14
 	popq	%r15
 	retq
-.Lfunc_end49:
-	.size	mcl_fpDbl_sub3L, .Lfunc_end49-mcl_fpDbl_sub3L
-
-	.globl	mcl_fp_mulUnitPre4L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre4L,@function
-mcl_fp_mulUnitPre4L:                    # @mcl_fp_mulUnitPre4L
-# BB#0:
+.Lfunc_end38:
+	.size	mcl_fpDbl_sub4L, .Lfunc_end38-mcl_fpDbl_sub4L
+                                        # -- End function
+	.globl	mulPv384x64                     # -- Begin function mulPv384x64
+	.p2align	4, 0x90
+	.type	mulPv384x64,@function
+mulPv384x64:                            # @mulPv384x64
+# %bb.0:
+	pushq	%r15
 	pushq	%r14
+	pushq	%r13
+	pushq	%r12
 	pushq	%rbx
 	movq	%rdx, %rcx
+	movq	%rdx, %rax
+	mulq	(%rsi)
+	movq	%rdx, %r9
+	movq	%rax, (%rdi)
 	movq	%rcx, %rax
-	mulq	24(%rsi)
+	mulq	40(%rsi)
 	movq	%rdx, %r8
-	movq	%rax, %r9
+	movq	%rax, %r10
 	movq	%rcx, %rax
-	mulq	16(%rsi)
-	movq	%rdx, %r10
-	movq	%rax, %r11
+	mulq	32(%rsi)
+	movq	%rdx, %r11
+	movq	%rax, %r14
 	movq	%rcx, %rax
-	mulq	8(%rsi)
+	mulq	24(%rsi)
+	movq	%rdx, %r12
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	16(%rsi)
 	movq	%rdx, %rbx
-	movq	%rax, %r14
+	movq	%rax, %r13
 	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rax, (%rdi)
-	addq	%r14, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r11, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r9, %r10
-	movq	%r10, 24(%rdi)
+	mulq	8(%rsi)
+	addq	%r9, %rax
+	movq	%rax, 8(%rdi)
+	adcq	%r13, %rdx
+	movq	%rdx, 16(%rdi)
+	adcq	%r15, %rbx
+	movq	%rbx, 24(%rdi)
+	adcq	%r14, %r12
+	movq	%r12, 32(%rdi)
+	adcq	%r10, %r11
+	movq	%r11, 40(%rdi)
 	adcq	$0, %r8
-	movq	%r8, 32(%rdi)
+	movq	%r8, 48(%rdi)
+	movq	%rdi, %rax
 	popq	%rbx
+	popq	%r12
+	popq	%r13
 	popq	%r14
+	popq	%r15
 	retq
-.Lfunc_end50:
-	.size	mcl_fp_mulUnitPre4L, .Lfunc_end50-mcl_fp_mulUnitPre4L
-
-	.globl	mcl_fpDbl_mulPre4L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre4L,@function
-mcl_fpDbl_mulPre4L:                     # @mcl_fpDbl_mulPre4L
-# BB#0:
+.Lfunc_end39:
+	.size	mulPv384x64, .Lfunc_end39-mulPv384x64
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre6L             # -- Begin function mcl_fp_mulUnitPre6L
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre6L,@function
+mcl_fp_mulUnitPre6L:                    # @mcl_fp_mulUnitPre6L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	(%rsi), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	8(%rsi), %r8
-	movq	%r8, -64(%rsp)          # 8-byte Spill
-	movq	(%rdx), %rbx
-	movq	%rdx, %rbp
-	mulq	%rbx
-	movq	%rdx, %r15
-	movq	16(%rsi), %rcx
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %r11
-	movq	%rax, (%rdi)
-	movq	%r11, %rax
-	mulq	%rbx
-	movq	%rdx, %r12
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	%rbx
-	movq	%rdx, %r10
-	movq	%rax, %r9
-	movq	%r8, %rax
-	mulq	%rbx
-	movq	%rdx, %r13
+	movq	%rdx, %rcx
+	movq	%rdx, %rax
+	mulq	40(%rsi)
+	movq	%rdx, %r9
 	movq	%rax, %r8
-	addq	%r15, %r8
-	adcq	%r9, %r13
-	adcq	%r14, %r10
-	adcq	$0, %r12
-	movq	%rbp, %r9
-	movq	8(%r9), %rbp
-	movq	%r11, %rax
-	mulq	%rbp
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
 	movq	%rcx, %rax
-	mulq	%rbp
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %rcx
-	movq	-64(%rsp), %r14         # 8-byte Reload
-	movq	%r14, %rax
-	mulq	%rbp
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, %rbx
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	addq	%r8, %rax
-	movq	%rax, 8(%rdi)
-	adcq	%r13, %rbx
-	adcq	%r10, %rcx
-	adcq	%r12, %r15
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	movq	16(%r9), %rbp
-	movq	%r14, %rax
-	mulq	%rbp
-	movq	%rax, %r12
-	movq	%rdx, %r8
-	addq	-56(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-40(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-32(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r11, %rax
-	mulq	%rbp
-	movq	%rdx, %r9
+	mulq	32(%rsi)
+	movq	%rdx, %r10
 	movq	%rax, %r11
-	movq	-24(%rsp), %rax         # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, %r14
-	movq	%rax, %r10
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	mulq	%rbp
-	addq	%rbx, %rax
-	movq	%rax, 16(%rdi)
-	adcq	%r12, %rcx
-	adcq	%r15, %r10
-	adcq	%r13, %r11
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	addq	%rdx, %rcx
-	adcq	%r8, %r10
-	adcq	%r14, %r11
-	adcq	%r9, %r13
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	24(%rax), %rbx
-	movq	%rbx, %rax
+	movq	%rcx, %rax
 	mulq	24(%rsi)
-	movq	%rdx, %r8
+	movq	%rdx, %r15
 	movq	%rax, %r14
-	movq	%rbx, %rax
+	movq	%rcx, %rax
 	mulq	16(%rsi)
-	movq	%rdx, %r9
+	movq	%rdx, %r13
 	movq	%rax, %r12
-	movq	%rbx, %rax
+	movq	%rcx, %rax
 	mulq	8(%rsi)
-	movq	%rdx, %r15
+	movq	%rdx, %rbx
 	movq	%rax, %rbp
-	movq	%rbx, %rax
+	movq	%rcx, %rax
 	mulq	(%rsi)
-	addq	%rcx, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r10, %rbp
-	adcq	%r11, %r12
-	adcq	%r13, %r14
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rdx, %rbp
-	movq	%rbp, 32(%rdi)
-	adcq	%r15, %r12
-	movq	%r12, 40(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 48(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 56(%rdi)
+	movq	%rax, (%rdi)
+	addq	%rbp, %rdx
+	movq	%rdx, 8(%rdi)
+	adcq	%r12, %rbx
+	movq	%rbx, 16(%rdi)
+	adcq	%r14, %r13
+	movq	%r13, 24(%rdi)
+	adcq	%r11, %r15
+	movq	%r15, 32(%rdi)
+	adcq	%r8, %r10
+	movq	%r10, 40(%rdi)
+	adcq	$0, %r9
+	movq	%r9, 48(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -2233,395 +2982,282 @@ mcl_fpDbl_mulPre4L:                     # @mcl_fpDbl_mulPre4L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end51:
-	.size	mcl_fpDbl_mulPre4L, .Lfunc_end51-mcl_fpDbl_mulPre4L
-
-	.globl	mcl_fpDbl_sqrPre4L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre4L,@function
-mcl_fpDbl_sqrPre4L:                     # @mcl_fpDbl_sqrPre4L
-# BB#0:
+.Lfunc_end40:
+	.size	mcl_fp_mulUnitPre6L, .Lfunc_end40-mcl_fp_mulUnitPre6L
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre6L              # -- Begin function mcl_fpDbl_mulPre6L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre6L,@function
+mcl_fpDbl_mulPre6L:                     # @mcl_fpDbl_mulPre6L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rsi, %r10
-	movq	16(%r10), %r9
-	movq	24(%r10), %r11
-	movq	(%r10), %r15
-	movq	8(%r10), %r8
-	movq	%r15, %rax
-	mulq	%r15
-	movq	%rdx, %rbp
+	movq	%rdx, -64(%rsp)                 # 8-byte Spill
+	movq	%rdi, -48(%rsp)                 # 8-byte Spill
+	movq	(%rsi), %rax
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	8(%rsi), %r14
+	movq	(%rdx), %rbx
+	mulq	%rbx
+	movq	%rdx, %r12
+	movq	16(%rsi), %r13
+	movq	24(%rsi), %r8
+	movq	32(%rsi), %r10
+	movq	40(%rsi), %rdx
+	movq	%rdx, -72(%rsp)                 # 8-byte Spill
 	movq	%rax, (%rdi)
-	movq	%r11, %rax
-	mulq	%r8
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	%r9, %rax
-	mulq	%r8
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%r11, %rax
-	mulq	%r15
-	movq	%rdx, %rbx
-	movq	%rax, %rcx
-	movq	%r9, %rax
-	mulq	%r15
+	movq	%rdx, %rax
+	mulq	%rbx
+	movq	%rdx, %rcx
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%r10, %rax
+	mulq	%rbx
+	movq	%rdx, %rbp
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%r8, %rax
+	mulq	%rbx
+	movq	%rdx, %r11
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
+	movq	%r13, %rax
+	movq	%r13, %r9
+	movq	%r13, -32(%rsp)                 # 8-byte Spill
+	mulq	%rbx
+	movq	%rdx, %r13
+	movq	%rax, %r15
+	movq	%r14, %rax
+	movq	%r14, -40(%rsp)                 # 8-byte Spill
+	mulq	%rbx
 	movq	%rdx, %rsi
-	movq	%rsi, -16(%rsp)         # 8-byte Spill
+	movq	%rax, %rdi
+	addq	%r12, %rdi
+	adcq	%r15, %rsi
+	adcq	-88(%rsp), %r13                 # 8-byte Folded Reload
+	adcq	-112(%rsp), %r11                # 8-byte Folded Reload
+	adcq	-104(%rsp), %rbp                # 8-byte Folded Reload
+	movq	%rbp, -24(%rsp)                 # 8-byte Spill
+	adcq	$0, %rcx
+	movq	%rcx, -80(%rsp)                 # 8-byte Spill
+	movq	-64(%rsp), %rax                 # 8-byte Reload
+	movq	8(%rax), %r15
+	movq	%r15, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%r15, %rax
+	mulq	%r10
+	movq	%r10, -16(%rsp)                 # 8-byte Spill
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
 	movq	%rax, %r12
-	movq	%r8, %rax
+	movq	%r15, %rax
 	mulq	%r8
-	movq	%rdx, %r13
-	movq	%rax, %r14
-	movq	%r8, %rax
-	mulq	%r15
-	addq	%rax, %rbp
-	movq	%rdx, %r8
-	adcq	%r12, %r8
-	adcq	%rsi, %rcx
-	adcq	$0, %rbx
-	addq	%rax, %rbp
-	movq	%rbp, 8(%rdi)
-	adcq	%r14, %r8
-	movq	-40(%rsp), %rsi         # 8-byte Reload
-	adcq	%rsi, %rcx
-	adcq	-32(%rsp), %rbx         # 8-byte Folded Reload
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	addq	%rdx, %r8
-	adcq	%r13, %rcx
-	movq	-24(%rsp), %r15         # 8-byte Reload
-	adcq	%r15, %rbx
-	adcq	-8(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%r11, %rax
+	movq	%r8, -8(%rsp)                   # 8-byte Spill
+	movq	%rdx, -96(%rsp)                 # 8-byte Spill
+	movq	%rax, %rbp
+	movq	%r15, %rax
 	mulq	%r9
+	movq	%rdx, %r9
+	movq	%rax, %rcx
+	movq	%r15, %rax
+	mulq	%r14
 	movq	%rdx, %r14
-	movq	%rax, %r11
-	movq	%r9, %rax
-	mulq	%r9
-	movq	%rax, %r9
-	addq	%r12, %r8
-	movq	%r8, 16(%rdi)
-	adcq	%rsi, %rcx
-	adcq	%rbx, %r9
-	adcq	%rbp, %r11
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-16(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r15, %r9
-	adcq	%rdx, %r11
-	adcq	%r14, %r12
-	movq	24(%r10), %rbp
-	movq	%rbp, %rax
-	mulq	16(%r10)
-	movq	%rdx, %r8
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	8(%r10)
-	movq	%rdx, %r13
 	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	(%r10)
-	movq	%rdx, %r15
-	movq	%rax, %rsi
-	movq	%rbp, %rax
-	mulq	%rbp
-	addq	%rcx, %rsi
-	movq	%rsi, 24(%rdi)
-	adcq	%r9, %rbx
-	adcq	%r11, %r14
-	adcq	%r12, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r15, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r13, %r14
-	movq	%r14, 40(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 48(%rdi)
-	adcq	%rdx, %rcx
-	movq	%rcx, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end52:
-	.size	mcl_fpDbl_sqrPre4L, .Lfunc_end52-mcl_fpDbl_sqrPre4L
-
-	.globl	mcl_fp_mont4L
-	.align	16, 0x90
-	.type	mcl_fp_mont4L,@function
-mcl_fp_mont4L:                          # @mcl_fp_mont4L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rdi, -88(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rdi
-	mulq	%rdi
-	movq	%rax, %r9
-	movq	%rdx, %rbp
-	movq	16(%rsi), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r8
-	movq	%rdx, %r10
-	movq	(%rsi), %rbx
-	movq	%rbx, -72(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, %r14
-	movq	%rax, %rsi
-	movq	%rbx, %rax
-	mulq	%rdi
-	movq	%rax, %r11
-	movq	%rdx, %r13
-	addq	%rsi, %r13
-	adcq	%r8, %r14
-	adcq	%r9, %r10
-	adcq	$0, %rbp
-	movq	%rbp, -96(%rsp)         # 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	(%rcx), %r8
-	movq	%r8, -32(%rsp)          # 8-byte Spill
-	movq	%r11, %rdi
-	imulq	%rax, %rdi
-	movq	24(%rcx), %rdx
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rsi
-	movq	%rsi, -16(%rsp)         # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -40(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rdx, %r9
-	movq	%rax, %r12
-	movq	%rdi, %rax
-	mulq	%rsi
-	movq	%rdx, %rbp
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rdx, %rsi
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	%r8
-	movq	%rdx, %rcx
-	addq	%r15, %rcx
-	adcq	%rbx, %rsi
-	adcq	%r12, %rbp
-	adcq	$0, %r9
-	addq	%r11, %rax
+	movq	%r15, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	addq	%rdi, %rax
+	movq	-48(%rsp), %rdi                 # 8-byte Reload
+	movq	%rax, 8(%rdi)
+	adcq	%rsi, %rbx
 	adcq	%r13, %rcx
-	adcq	%r14, %rsi
-	adcq	%r10, %rbp
-	adcq	-96(%rsp), %r9          # 8-byte Folded Reload
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	movq	8(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %r11
-	movq	%rdi, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
+	adcq	%r11, %rbp
+	adcq	-24(%rsp), %r12                 # 8-byte Folded Reload
+	movq	-112(%rsp), %rsi                # 8-byte Reload
+	adcq	-80(%rsp), %rsi                 # 8-byte Folded Reload
+	setb	%al
+	addq	%rdx, %rbx
+	adcq	%r14, %rcx
+	adcq	%r9, %rbp
+	adcq	-96(%rsp), %r12                 # 8-byte Folded Reload
+	adcq	-88(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	%rsi, -112(%rsp)                # 8-byte Spill
+	movzbl	%al, %r9d
+	adcq	-104(%rsp), %r9                 # 8-byte Folded Reload
+	movq	-64(%rsp), %rax                 # 8-byte Reload
+	movq	16(%rax), %rsi
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	%r10
 	movq	%rdx, %r10
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	%r8
+	movq	%rdx, %r8
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
 	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r8
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %rdi
-	addq	%r15, %rdi
-	adcq	%r14, %rbx
-	adcq	%r11, %r10
-	adcq	$0, %r12
-	addq	%rcx, %r8
-	adcq	%rsi, %rdi
-	adcq	%rbp, %rbx
+	addq	%r14, %rdi
+	adcq	%r13, %r15
+	adcq	-80(%rsp), %r11                 # 8-byte Folded Reload
+	adcq	-96(%rsp), %r8                  # 8-byte Folded Reload
+	adcq	-88(%rsp), %r10                 # 8-byte Folded Reload
+	movq	-104(%rsp), %rsi                # 8-byte Reload
+	adcq	$0, %rsi
+	addq	%rbx, %rax
+	movq	-48(%rsp), %rdx                 # 8-byte Reload
+	movq	%rax, 16(%rdx)
+	adcq	%rcx, %rdi
+	adcq	%rbp, %r15
+	adcq	%r12, %r11
+	adcq	-112(%rsp), %r8                 # 8-byte Folded Reload
 	adcq	%r9, %r10
-	adcq	%r13, %r12
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	movq	%r8, %rsi
-	imulq	-24(%rsp), %rsi         # 8-byte Folded Reload
+	adcq	$0, %rsi
+	movq	%rsi, -104(%rsp)                # 8-byte Spill
+	movq	-64(%rsp), %rax                 # 8-byte Reload
+	movq	24(%rax), %rsi
 	movq	%rsi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -96(%rsp)         # 8-byte Spill
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
 	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %rcx
-	movq	%rax, %r14
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
 	movq	%rsi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
 	movq	%rdx, %r9
-	movq	%rax, %rbp
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
 	movq	%rsi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	addq	%rbp, %r11
-	adcq	%r14, %r9
-	adcq	-96(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%r8, %rax
-	adcq	%rdi, %r11
-	adcq	%rbx, %r9
-	adcq	%r10, %rcx
-	adcq	%r12, %r13
-	adcq	$0, %r15
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	movq	16(%rax), %rsi
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, %rbp
 	movq	%rsi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r12
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
 	movq	%rax, %rbx
 	movq	%rsi, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r8
-	movq	%rdx, %rbp
-	addq	%rdi, %rbp
-	adcq	%rbx, %r14
-	adcq	-96(%rsp), %r10         # 8-byte Folded Reload
-	adcq	$0, %r12
-	addq	%r11, %r8
-	adcq	%r9, %rbp
-	adcq	%rcx, %r14
-	adcq	%r13, %r10
-	adcq	%r15, %r12
-	sbbq	%r13, %r13
-	movq	%r8, %rsi
-	imulq	-24(%rsp), %rsi         # 8-byte Folded Reload
-	andl	$1, %r13d
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	addq	%rbx, %r13
+	adcq	%rbp, %r12
+	adcq	-80(%rsp), %r14                 # 8-byte Folded Reload
+	adcq	-96(%rsp), %r9                  # 8-byte Folded Reload
+	adcq	-112(%rsp), %rcx                # 8-byte Folded Reload
+	movq	-88(%rsp), %rdx                 # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%rdi, %rax
+	movq	-48(%rsp), %rdi                 # 8-byte Reload
+	movq	%rax, 24(%rdi)
+	adcq	%r15, %r13
+	adcq	%r11, %r12
+	adcq	%r8, %r14
+	adcq	%r10, %r9
+	adcq	-104(%rsp), %rcx                # 8-byte Folded Reload
+	adcq	$0, %rdx
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	-64(%rsp), %rax                 # 8-byte Reload
+	movq	32(%rax), %rsi
 	movq	%rsi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r9
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
 	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r11
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                # 8-byte Spill
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
 	movq	%rsi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
 	movq	%rdx, %rbx
-	movq	%rax, %r15
+	movq	%rax, -24(%rsp)                 # 8-byte Spill
 	movq	%rsi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	addq	%r15, %rsi
-	adcq	%r11, %rbx
-	adcq	%r9, %rcx
-	adcq	$0, %rdi
-	addq	%r8, %rax
-	adcq	%rbp, %rsi
-	adcq	%r14, %rbx
-	adcq	%r10, %rcx
-	adcq	%r12, %rdi
-	adcq	$0, %r13
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	movq	24(%rax), %rbp
-	movq	%rbp, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r10
-	movq	%rax, %r15
-	movq	%rbp, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
+	movq	%rax, %r8
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r11
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r9
-	movq	%rdx, %rbp
-	addq	%r12, %rbp
-	adcq	%r15, %r11
+	addq	%r8, %r11
+	adcq	%rbp, %r10
+	adcq	-24(%rsp), %r15                 # 8-byte Folded Reload
+	adcq	-80(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	-112(%rsp), %rsi                # 8-byte Reload
+	adcq	-96(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	-104(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r13, %rax
+	movq	%rax, 32(%rdi)
+	adcq	%r12, %r11
 	adcq	%r14, %r10
-	adcq	$0, %r8
-	addq	%rsi, %r9
-	adcq	%rbx, %rbp
-	adcq	%rcx, %r11
-	adcq	%rdi, %r10
-	adcq	%r13, %r8
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	-24(%rsp), %rcx         # 8-byte Reload
-	imulq	%r9, %rcx
-	movq	%rcx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
+	adcq	%r9, %r15
+	adcq	%rcx, %rbx
+	movq	%rbx, -96(%rsp)                 # 8-byte Spill
+	adcq	-88(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	%rsi, -112(%rsp)                # 8-byte Spill
+	adcq	$0, %rdx
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
+	movq	-64(%rsp), %rax                 # 8-byte Reload
+	movq	40(%rax), %rbx
+	movq	%rbx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r9
+	movq	%rbx, %rax
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, %r14
+	movq	%rbx, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, %rdi
+	movq	%rbx, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r13
-	movq	%rax, %r15
-	movq	%rcx, %rax
-	movq	-40(%rsp), %r14         # 8-byte Reload
-	mulq	%r14
-	movq	%rdx, %rdi
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	movq	-32(%rsp), %rcx         # 8-byte Reload
-	mulq	%rcx
-	addq	%r12, %rdx
-	adcq	%r15, %rdi
-	adcq	-24(%rsp), %r13         # 8-byte Folded Reload
-	adcq	$0, %rbx
-	addq	%r9, %rax
-	adcq	%rbp, %rdx
-	adcq	%r11, %rdi
-	adcq	%r10, %r13
-	adcq	%r8, %rbx
-	adcq	$0, %rsi
-	movq	%rdx, %rax
-	subq	%rcx, %rax
-	movq	%rdi, %rcx
-	sbbq	%r14, %rcx
-	movq	%r13, %r8
-	sbbq	-16(%rsp), %r8          # 8-byte Folded Reload
-	movq	%rbx, %rbp
-	sbbq	-8(%rsp), %rbp          # 8-byte Folded Reload
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rbx, %rbp
-	testb	%sil, %sil
-	cmovneq	%rdx, %rax
-	movq	-88(%rsp), %rdx         # 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovneq	%rdi, %rcx
-	movq	%rcx, 8(%rdx)
-	cmovneq	%r13, %r8
-	movq	%r8, 16(%rdx)
-	movq	%rbp, 24(%rdx)
+	movq	%rax, %r8
+	movq	%rbx, %rax
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	addq	%r12, %r8
+	adcq	%r13, %rax
+	adcq	%r14, %rdx
+	adcq	%r9, %rsi
+	adcq	-72(%rsp), %rbp                 # 8-byte Folded Reload
+	adcq	$0, %rcx
+	addq	%r11, %rdi
+	movq	-48(%rsp), %rbx                 # 8-byte Reload
+	movq	%rdi, 40(%rbx)
+	adcq	%r10, %r8
+	movq	%r8, 48(%rbx)
+	adcq	%r15, %rax
+	movq	%rax, 56(%rbx)
+	adcq	-96(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	%rdx, 64(%rbx)
+	adcq	-112(%rsp), %rsi                # 8-byte Folded Reload
+	movq	%rsi, 72(%rbx)
+	adcq	-104(%rsp), %rbp                # 8-byte Folded Reload
+	movq	%rbp, 80(%rbx)
+	adcq	$0, %rcx
+	movq	%rcx, 88(%rbx)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -2629,447 +3265,826 @@ mcl_fp_mont4L:                          # @mcl_fp_mont4L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end53:
-	.size	mcl_fp_mont4L, .Lfunc_end53-mcl_fp_mont4L
-
-	.globl	mcl_fp_montNF4L
-	.align	16, 0x90
-	.type	mcl_fp_montNF4L,@function
-mcl_fp_montNF4L:                        # @mcl_fp_montNF4L
-# BB#0:
-	pushq	%rbp
+.Lfunc_end41:
+	.size	mcl_fpDbl_mulPre6L, .Lfunc_end41-mcl_fpDbl_mulPre6L
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre6L              # -- Begin function mcl_fpDbl_sqrPre6L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre6L,@function
+mcl_fpDbl_sqrPre6L:                     # @mcl_fpDbl_sqrPre6L
+# %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rdi, -88(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rdi
-	mulq	%rdi
+	subq	$168, %rsp
+	movq	%rdi, -128(%rsp)                # 8-byte Spill
+	movq	40(%rsi), %r9
+	movq	(%rsi), %r10
+	movq	8(%rsi), %rcx
+	movq	%r9, %rax
+	mulq	%r10
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	%rdx, 16(%rsp)                  # 8-byte Spill
+	movq	32(%rsi), %r8
+	movq	%r8, %rax
+	mulq	%r10
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	%rdx, (%rsp)                    # 8-byte Spill
+	movq	24(%rsi), %r11
+	movq	%r11, %rax
+	mulq	%r10
+	movq	%rax, -16(%rsp)                 # 8-byte Spill
+	movq	%rdx, -80(%rsp)                 # 8-byte Spill
+	movq	16(%rsi), %r14
+	movq	%r14, %rax
+	mulq	%r10
+	movq	%rdx, 144(%rsp)                 # 8-byte Spill
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	movq	%r9, %rax
+	mulq	%rcx
+	movq	%rdx, -8(%rsp)                  # 8-byte Spill
+	movq	%rax, -24(%rsp)                 # 8-byte Spill
+	movq	%r8, %rax
+	mulq	%rcx
+	movq	%rdx, -32(%rsp)                 # 8-byte Spill
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
+	movq	%r11, %rax
+	mulq	%rcx
+	movq	%rdx, -96(%rsp)                 # 8-byte Spill
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%r14, %rax
+	mulq	%rcx
+	movq	%rdx, %rsi
+	movq	%rdx, 40(%rsp)                  # 8-byte Spill
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	%rcx
+	movq	%rdx, 112(%rsp)                 # 8-byte Spill
+	movq	%rax, %rbp
+	movq	%rcx, %rax
+	mulq	%r10
+	movq	%rdx, %rbx
 	movq	%rax, %r15
+	movq	%r10, %rax
+	mulq	%r10
+	movq	%rdx, %rcx
+	movq	%rax, (%rdi)
+	movq	%r9, %rax
+	mulq	%r8
+	movq	%rdx, 136(%rsp)                 # 8-byte Spill
+	movq	%rax, 128(%rsp)                 # 8-byte Spill
+	movq	%r9, %rax
+	mulq	%r11
+	movq	%rdx, -40(%rsp)                 # 8-byte Spill
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	movq	%r9, %rax
+	mulq	%r14
+	movq	%rdx, -56(%rsp)                 # 8-byte Spill
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	%r9, %rax
+	mulq	%r9
+	movq	%rdx, 160(%rsp)                 # 8-byte Spill
+	movq	%rax, 152(%rsp)                 # 8-byte Spill
+	movq	%r8, %rax
+	mulq	%r11
+	movq	%rdx, 96(%rsp)                  # 8-byte Spill
+	movq	%rax, 88(%rsp)                  # 8-byte Spill
+	movq	%r8, %rax
+	mulq	%r14
+	movq	%rdx, -112(%rsp)                # 8-byte Spill
+	movq	%rax, -120(%rsp)                # 8-byte Spill
+	movq	%r8, %rax
+	mulq	%r8
+	movq	%rdx, 120(%rsp)                 # 8-byte Spill
+	movq	%rax, 104(%rsp)                 # 8-byte Spill
+	movq	%r11, %rax
+	mulq	%r14
+	movq	%rdx, 64(%rsp)                  # 8-byte Spill
+	movq	%rax, 56(%rsp)                  # 8-byte Spill
+	movq	%r11, %rax
+	mulq	%r11
+	movq	%rdx, 80(%rsp)                  # 8-byte Spill
+	movq	%rax, 72(%rsp)                  # 8-byte Spill
+	movq	%r14, %rax
+	mulq	%r14
+	movq	%rax, %r12
+	movq	%rdx, 48(%rsp)                  # 8-byte Spill
+	addq	%rbx, %rbp
+	movq	%rbp, 32(%rsp)                  # 8-byte Spill
+	movq	112(%rsp), %r11                 # 8-byte Reload
+	adcq	%r13, %r11
+	movq	%rsi, %r10
+	adcq	-104(%rsp), %r10                # 8-byte Folded Reload
+	movq	-96(%rsp), %r14                 # 8-byte Reload
+	adcq	-88(%rsp), %r14                 # 8-byte Folded Reload
+	movq	-32(%rsp), %r9                  # 8-byte Reload
+	adcq	-24(%rsp), %r9                  # 8-byte Folded Reload
+	movq	-8(%rsp), %r8                   # 8-byte Reload
+	adcq	$0, %r8
+	movq	%r15, %rdi
+	addq	%r15, %rcx
+	adcq	-72(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	144(%rsp), %r15                 # 8-byte Reload
+	movq	%r15, %rbp
+	adcq	-16(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	-80(%rsp), %rax                 # 8-byte Reload
+	adcq	8(%rsp), %rax                   # 8-byte Folded Reload
+	movq	(%rsp), %rdx                    # 8-byte Reload
+	adcq	24(%rsp), %rdx                  # 8-byte Folded Reload
+	movq	16(%rsp), %rsi                  # 8-byte Reload
+	adcq	$0, %rsi
+	addq	%rdi, %rcx
+	adcq	32(%rsp), %rbx                  # 8-byte Folded Reload
+	movq	-128(%rsp), %rdi                # 8-byte Reload
+	movq	%rcx, 8(%rdi)
+	adcq	%r11, %rbp
+	adcq	%r10, %rax
+	adcq	%r14, %rdx
+	adcq	%r9, %rsi
+	adcq	$0, %r8
+	movq	%r15, %r9
+	addq	%r13, %r9
+	adcq	40(%rsp), %r12                  # 8-byte Folded Reload
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	movq	56(%rsp), %rdi                  # 8-byte Reload
+	adcq	%rdi, %rcx
+	movq	64(%rsp), %r15                  # 8-byte Reload
+	movq	%r15, %r10
+	adcq	-120(%rsp), %r10                # 8-byte Folded Reload
+	movq	-112(%rsp), %r11                # 8-byte Reload
+	adcq	-64(%rsp), %r11                 # 8-byte Folded Reload
+	movq	-56(%rsp), %r13                 # 8-byte Reload
+	adcq	$0, %r13
+	addq	-72(%rsp), %rbx                 # 8-byte Folded Reload
+	adcq	%rbp, %r9
+	movq	-128(%rsp), %rbp                # 8-byte Reload
+	movq	%rbx, 16(%rbp)
+	adcq	%rax, %r12
+	adcq	%rdx, %rcx
+	movq	%rcx, %rbx
+	adcq	%rsi, %r10
+	adcq	%r8, %r11
+	adcq	$0, %r13
+	movq	-80(%rsp), %rsi                 # 8-byte Reload
+	addq	-104(%rsp), %rsi                # 8-byte Folded Reload
+	movq	-96(%rsp), %rax                 # 8-byte Reload
+	adcq	%rdi, %rax
+	movq	72(%rsp), %rdi                  # 8-byte Reload
+	adcq	%r15, %rdi
+	movq	80(%rsp), %rdx                  # 8-byte Reload
+	movq	88(%rsp), %r15                  # 8-byte Reload
+	adcq	%r15, %rdx
+	movq	96(%rsp), %r8                   # 8-byte Reload
+	movq	%r8, %r14
+	adcq	-48(%rsp), %r14                 # 8-byte Folded Reload
+	movq	-40(%rsp), %rcx                 # 8-byte Reload
+	adcq	$0, %rcx
+	addq	-16(%rsp), %r9                  # 8-byte Folded Reload
+	adcq	%r12, %rsi
+	movq	%r9, 24(%rbp)
+	adcq	%rbx, %rax
+	adcq	%r10, %rdi
+	movq	%rdi, %r9
+	adcq	%r11, %rdx
 	movq	%rdx, %r12
-	movq	16(%rsi), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r8
+	adcq	%r13, %r14
+	adcq	$0, %rcx
+	movq	(%rsp), %rdi                    # 8-byte Reload
+	addq	-88(%rsp), %rdi                 # 8-byte Folded Reload
+	movq	-32(%rsp), %rdx                 # 8-byte Reload
+	adcq	-120(%rsp), %rdx                # 8-byte Folded Reload
+	movq	-112(%rsp), %rbx                # 8-byte Reload
+	adcq	%r15, %rbx
+	movq	104(%rsp), %r13                 # 8-byte Reload
+	adcq	%r8, %r13
+	movq	120(%rsp), %rbp                 # 8-byte Reload
+	movq	128(%rsp), %r11                 # 8-byte Reload
+	adcq	%r11, %rbp
+	movq	136(%rsp), %r15                 # 8-byte Reload
+	movq	%r15, %r10
+	adcq	$0, %r10
+	addq	8(%rsp), %rsi                   # 8-byte Folded Reload
+	adcq	%rax, %rdi
+	movq	-128(%rsp), %r8                 # 8-byte Reload
+	movq	%rsi, 32(%r8)
+	adcq	%r9, %rdx
 	movq	%rdx, %r9
-	movq	(%rsi), %rbp
-	movq	%rbp, -40(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, %rbx
-	movq	%rax, %rsi
-	movq	%rbp, %rax
-	mulq	%rdi
+	adcq	%r12, %rbx
+	movq	%rbx, %r12
+	adcq	%r14, %r13
+	adcq	%rcx, %rbp
+	movq	%rbp, %r14
+	adcq	$0, %r10
+	movq	16(%rsp), %rsi                  # 8-byte Reload
+	addq	-24(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	-8(%rsp), %rdx                  # 8-byte Reload
+	adcq	-64(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	-56(%rsp), %rbp                 # 8-byte Reload
+	adcq	-48(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	-40(%rsp), %rbx                 # 8-byte Reload
+	adcq	%r11, %rbx
+	movq	152(%rsp), %r11                 # 8-byte Reload
+	adcq	%r15, %r11
+	movq	160(%rsp), %rax                 # 8-byte Reload
+	adcq	$0, %rax
+	addq	24(%rsp), %rdi                  # 8-byte Folded Reload
+	movq	%rdi, 40(%r8)
+	adcq	%r9, %rsi
+	movq	%rsi, 48(%r8)
+	adcq	%r12, %rdx
+	movq	%rdx, 56(%r8)
+	movq	%rbp, %rdx
+	adcq	%r13, %rdx
+	movq	%rdx, 64(%r8)
+	movq	%rbx, %rdx
+	adcq	%r14, %rdx
+	movq	%rdx, 72(%r8)
+	movq	%r11, %rdx
+	adcq	%r10, %rdx
+	movq	%rdx, 80(%r8)
+	adcq	$0, %rax
+	movq	%rax, 88(%r8)
+	addq	$168, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+.Lfunc_end42:
+	.size	mcl_fpDbl_sqrPre6L, .Lfunc_end42-mcl_fpDbl_sqrPre6L
+                                        # -- End function
+	.globl	mcl_fp_mont6L                   # -- Begin function mcl_fp_mont6L
+	.p2align	4, 0x90
+	.type	mcl_fp_mont6L,@function
+mcl_fp_mont6L:                          # @mcl_fp_mont6L
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	subq	$48, %rsp
+	movq	%rdx, -48(%rsp)                 # 8-byte Spill
+	movq	%rdi, 40(%rsp)                  # 8-byte Spill
+	movq	40(%rsi), %rax
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	(%rdx), %rbp
+	mulq	%rbp
+	movq	%rax, %r8
+	movq	%rdx, %r10
+	movq	32(%rsi), %rax
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	mulq	%rbp
 	movq	%rax, %r11
+	movq	%rdx, %r13
+	movq	24(%rsi), %rax
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	mulq	%rbp
+	movq	%rax, %r15
 	movq	%rdx, %rdi
-	addq	%rsi, %rdi
-	adcq	%r8, %rbx
-	adcq	%r15, %r9
-	adcq	$0, %r12
-	movq	-8(%rcx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	(%rcx), %r8
-	movq	%r8, -64(%rsp)          # 8-byte Spill
-	movq	%r11, %rsi
-	imulq	%rax, %rsi
+	movq	16(%rsi), %rax
+	movq	%rax, -40(%rsp)                 # 8-byte Spill
+	mulq	%rbp
+	movq	%rax, %r9
+	movq	%rdx, %r14
+	movq	(%rsi), %rbx
+	movq	%rbx, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsi), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	mulq	%rbp
+	movq	%rdx, %r12
+	movq	%rax, %rsi
+	movq	%rbx, %rax
+	mulq	%rbp
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rdx, %rbp
+	addq	%rsi, %rbp
+	adcq	%r9, %r12
+	adcq	%r15, %r14
+	adcq	%r11, %rdi
+	movq	%rdi, -88(%rsp)                 # 8-byte Spill
+	adcq	%r8, %r13
+	movq	%r13, -128(%rsp)                # 8-byte Spill
+	adcq	$0, %r10
+	movq	%r10, -112(%rsp)                # 8-byte Spill
+	movq	-8(%rcx), %r8
+	movq	%r8, -32(%rsp)                  # 8-byte Spill
+	imulq	%rax, %r8
+	movq	40(%rcx), %rdx
+	movq	%rdx, 8(%rsp)                   # 8-byte Spill
+	movq	%r8, %rax
+	mulq	%rdx
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	32(%rcx), %rdx
+	movq	%rdx, (%rsp)                    # 8-byte Spill
+	movq	%r8, %rax
+	mulq	%rdx
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	%rdx, %r11
 	movq	24(%rcx), %rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	16(%rcx), %rbp
-	movq	%rbp, -72(%rsp)         # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
+	movq	%rdx, -8(%rsp)                  # 8-byte Spill
+	movq	%r8, %rax
 	mulq	%rdx
+	movq	%rax, %r13
 	movq	%rdx, %r15
+	movq	16(%rcx), %rdx
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	movq	%r8, %rax
+	mulq	%rdx
+	movq	%rax, %r9
+	movq	%rdx, %rsi
+	movq	(%rcx), %rbx
+	movq	%rbx, -24(%rsp)                 # 8-byte Spill
+	movq	8(%rcx), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	movq	%r8, %rax
+	mulq	%rcx
+	movq	%rdx, %rdi
+	movq	%rax, %r10
+	movq	%r8, %rax
+	mulq	%rbx
+	movq	%rdx, %rcx
+	addq	%r10, %rcx
+	adcq	%r9, %rdi
+	adcq	%r13, %rsi
+	adcq	-80(%rsp), %r15                 # 8-byte Folded Reload
+	adcq	-104(%rsp), %r11                # 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	-96(%rsp), %rax                 # 8-byte Folded Reload
+	adcq	%rbp, %rcx
+	adcq	%r12, %rdi
+	adcq	%r14, %rsi
+	adcq	-88(%rsp), %r15                 # 8-byte Folded Reload
+	adcq	-128(%rsp), %r11                # 8-byte Folded Reload
+	adcq	-112(%rsp), %rdx                # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	setb	-128(%rsp)                      # 1-byte Folded Spill
+	movq	-48(%rsp), %rax                 # 8-byte Reload
+	movq	8(%rax), %rbx
+	movq	%rbx, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                # 8-byte Spill
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
 	movq	%rax, %r13
+	movq	%rbx, %rax
+	mulq	16(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r12
+	movq	%rbx, %rax
+	mulq	24(%rsp)                        # 8-byte Folded Reload
+	movq	%rax, %r9
+	movq	%rdx, %rbx
+	addq	%r12, %rbx
+	adcq	%r13, %rbp
+	adcq	-104(%rsp), %r8                 # 8-byte Folded Reload
+	adcq	-96(%rsp), %r14                 # 8-byte Folded Reload
+	adcq	-88(%rsp), %r10                 # 8-byte Folded Reload
+	movq	-112(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%rcx, %r9
+	adcq	%rdi, %rbx
+	adcq	%rsi, %rbp
+	adcq	%r15, %r8
+	adcq	%r11, %r14
+	adcq	-120(%rsp), %r10                # 8-byte Folded Reload
+	movzbl	-128(%rsp), %eax                # 1-byte Folded Reload
+	adcq	%rax, %rdx
+	movq	%rdx, -112(%rsp)                # 8-byte Spill
+	setb	-120(%rsp)                      # 1-byte Folded Spill
+	movq	-32(%rsp), %rsi                 # 8-byte Reload
+	imulq	%r9, %rsi
 	movq	%rsi, %rax
-	mulq	%rbp
-	movq	%rdx, %r10
-	movq	%rax, %rbp
+	mulq	8(%rsp)                         # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
 	movq	%rsi, %rax
-	mulq	%rcx
-	movq	%rdx, %r14
-	movq	%rax, %rcx
+	mulq	(%rsp)                          # 8-byte Folded Reload
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rax, -104(%rsp)                # 8-byte Spill
 	movq	%rsi, %rax
-	mulq	%r8
-	addq	%r11, %rax
-	adcq	%rdi, %rcx
-	adcq	%rbx, %rbp
-	adcq	%r9, %r13
-	adcq	$0, %r12
-	addq	%rdx, %rcx
-	adcq	%r14, %rbp
-	adcq	%r10, %r13
-	adcq	%r15, %r12
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	8(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r11
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rcx
 	movq	%rax, %rdi
-	movq	%rdx, %r9
-	addq	%r14, %r9
-	adcq	%r11, %r8
+	movq	%rsi, %rax
+	mulq	32(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, %r15
+	movq	%rsi, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	addq	%r15, %r11
+	adcq	%rdi, %r12
+	adcq	-80(%rsp), %rcx                 # 8-byte Folded Reload
+	adcq	-104(%rsp), %r13                # 8-byte Folded Reload
+	movq	-88(%rsp), %rsi                 # 8-byte Reload
+	adcq	-96(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r9, %rax
+	adcq	%rbx, %r11
+	adcq	%rbp, %r12
+	adcq	%r8, %rcx
+	adcq	%r14, %r13
 	adcq	%r10, %rsi
+	movq	%rsi, -88(%rsp)                 # 8-byte Spill
+	adcq	-112(%rsp), %rdx                # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movzbl	-120(%rsp), %ebx                # 1-byte Folded Reload
 	adcq	$0, %rbx
-	addq	%rcx, %rdi
-	adcq	%rbp, %r9
-	adcq	%r13, %r8
-	adcq	%r12, %rsi
-	adcq	$0, %rbx
-	movq	%rdi, %rcx
-	imulq	-8(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
+	movq	-48(%rsp), %rax                 # 8-byte Reload
+	movq	16(%rax), %rsi
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, %r9
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r10
-	movq	%rax, %r12
+	movq	%rax, %r14
+	movq	%rsi, %rax
+	mulq	16(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	24(%rsp)                        # 8-byte Folded Reload
+	movq	%rax, %rsi
+	movq	%rdx, %r8
+	addq	%rbp, %r8
+	adcq	%r14, %r15
+	adcq	%r9, %r10
+	adcq	-104(%rsp), %rdi                # 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                # 8-byte Reload
+	adcq	-96(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	-112(%rsp), %rax                # 8-byte Reload
+	adcq	$0, %rax
+	addq	%r11, %rsi
+	adcq	%r12, %r8
+	adcq	%rcx, %r15
+	adcq	%r13, %r10
+	adcq	-88(%rsp), %rdi                 # 8-byte Folded Reload
+	adcq	-128(%rsp), %rdx                # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	adcq	%rbx, %rax
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-32(%rsp), %rcx                 # 8-byte Reload
+	imulq	%rsi, %rcx
 	movq	%rcx, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %r13
+	mulq	8(%rsp)                         # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
 	movq	%rcx, %rax
-	movq	-80(%rsp), %r15         # 8-byte Reload
-	mulq	%r15
+	mulq	(%rsp)                          # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r14
-	movq	%rax, %rbp
+	movq	%rax, %r13
 	movq	%rcx, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	addq	%rdi, %rax
-	adcq	%r9, %rbp
-	adcq	%r8, %r13
-	adcq	%rsi, %r12
-	adcq	$0, %rbx
-	addq	%rdx, %rbp
-	adcq	%r14, %r13
-	adcq	%r11, %r12
-	adcq	%r10, %rbx
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	16(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r11
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
+	mulq	32(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rbp
 	movq	%rax, %r9
-	movq	%rdx, %rdi
-	addq	%r14, %rdi
-	adcq	%r11, %rcx
-	adcq	%r10, %r8
-	adcq	$0, %rsi
-	addq	%rbp, %r9
-	adcq	%r13, %rdi
-	adcq	%r12, %rcx
-	adcq	%rbx, %r8
-	adcq	$0, %rsi
-	movq	%r9, %rbx
-	imulq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %r12
-	movq	%rbx, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r11
-	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	%r15
-	movq	%rdx, %r14
-	movq	%rax, %rbp
-	movq	%rbx, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	addq	%r9, %rax
-	adcq	%rdi, %rbp
-	adcq	%rcx, %r13
-	adcq	%r8, %r12
+	addq	%r9, %r11
+	adcq	%r13, %rbp
+	adcq	-80(%rsp), %r14                 # 8-byte Folded Reload
+	adcq	-104(%rsp), %rbx                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r12                 # 8-byte Folded Reload
+	movq	-128(%rsp), %rcx                # 8-byte Reload
+	adcq	$0, %rcx
+	addq	%rsi, %rax
+	adcq	%r8, %r11
+	adcq	%r15, %rbp
+	adcq	%r10, %r14
+	adcq	%rdi, %rbx
+	adcq	-120(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-112(%rsp), %rcx                # 8-byte Folded Reload
+	movq	%rcx, -128(%rsp)                # 8-byte Spill
+	movzbl	-88(%rsp), %esi                 # 1-byte Folded Reload
 	adcq	$0, %rsi
-	addq	%rdx, %rbp
-	adcq	%r14, %r13
-	adcq	%r11, %r12
-	adcq	%r10, %rsi
-	movq	-16(%rsp), %rax         # 8-byte Reload
+	movq	-48(%rsp), %rax                 # 8-byte Reload
 	movq	24(%rax), %rdi
 	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %rcx
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                # 8-byte Spill
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
 	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r11
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
 	movq	%rdi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %r14
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %r10
+	movq	%rdi, %rax
+	mulq	16(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, %r8
 	movq	%rdi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
+	mulq	24(%rsp)                        # 8-byte Folded Reload
 	movq	%rax, %r9
 	movq	%rdx, %rdi
-	addq	%r14, %rdi
-	adcq	%r11, %r10
-	adcq	%rcx, %r8
-	adcq	$0, %rbx
-	addq	%rbp, %r9
-	adcq	%r13, %rdi
-	adcq	%r12, %r10
-	adcq	%rsi, %r8
-	adcq	$0, %rbx
-	movq	-8(%rsp), %rsi          # 8-byte Reload
+	addq	%r8, %rdi
+	adcq	%r10, %rcx
+	adcq	-104(%rsp), %r15                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r13                 # 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                # 8-byte Reload
+	adcq	-88(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	-112(%rsp), %rax                # 8-byte Reload
+	adcq	$0, %rax
+	addq	%r11, %r9
+	adcq	%rbp, %rdi
+	adcq	%r14, %rcx
+	adcq	%rbx, %r15
+	adcq	%r12, %r13
+	adcq	-128(%rsp), %rdx                # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	adcq	%rsi, %rax
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-32(%rsp), %rsi                 # 8-byte Reload
 	imulq	%r9, %rsi
 	movq	%rsi, %rax
-	movq	-56(%rsp), %r12         # 8-byte Reload
-	mulq	%r12
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rax, %r13
+	mulq	8(%rsp)                         # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
 	movq	%rsi, %rax
-	movq	-72(%rsp), %r14         # 8-byte Reload
-	mulq	%r14
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
+	mulq	(%rsp)                          # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, -104(%rsp)                # 8-byte Spill
 	movq	%rsi, %rax
-	movq	-64(%rsp), %r11         # 8-byte Reload
-	mulq	%r11
-	movq	%rdx, %r15
-	movq	%rax, %rcx
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
 	movq	%rsi, %rax
-	movq	-80(%rsp), %rsi         # 8-byte Reload
-	mulq	%rsi
-	addq	%r9, %rcx
-	adcq	%rdi, %rax
-	adcq	%r10, %rbp
-	adcq	%r8, %r13
-	adcq	$0, %rbx
-	addq	%r15, %rax
-	adcq	%rdx, %rbp
-	adcq	-16(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rax, %rcx
-	subq	%r11, %rcx
-	movq	%rbp, %rdx
-	sbbq	%rsi, %rdx
-	movq	%r13, %rdi
-	sbbq	%r14, %rdi
-	movq	%rbx, %rsi
-	sbbq	%r12, %rsi
-	cmovsq	%rax, %rcx
-	movq	-88(%rsp), %rax         # 8-byte Reload
-	movq	%rcx, (%rax)
-	cmovsq	%rbp, %rdx
-	movq	%rdx, 8(%rax)
-	cmovsq	%r13, %rdi
-	movq	%rdi, 16(%rax)
-	cmovsq	%rbx, %rsi
-	movq	%rsi, 24(%rax)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end54:
-	.size	mcl_fp_montNF4L, .Lfunc_end54-mcl_fp_montNF4L
-
-	.globl	mcl_fp_montRed4L
-	.align	16, 0x90
-	.type	mcl_fp_montRed4L,@function
-mcl_fp_montRed4L:                       # @mcl_fp_montRed4L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rdi, -56(%rsp)         # 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	(%rcx), %rdi
-	movq	%rdi, -48(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r12
-	movq	%r12, %rbx
-	imulq	%rax, %rbx
-	movq	%rax, %r9
-	movq	24(%rcx), %rdx
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rdx
-	movq	%rax, %r11
-	movq	%rdx, %r8
-	movq	16(%rcx), %rbp
-	movq	%rbp, -32(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rbp
-	movq	%rbp, %r13
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbp
 	movq	%rax, %r14
+	movq	%rsi, %rax
+	mulq	32(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r11
+	movq	%rsi, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r10
-	movq	8(%rcx), %rcx
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
+	addq	%r11, %r10
+	adcq	%r14, %rbx
+	adcq	-80(%rsp), %rbp                 # 8-byte Folded Reload
+	adcq	-104(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r8                  # 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r9, %rax
+	adcq	%rdi, %r10
+	adcq	%rcx, %rbx
+	adcq	%r15, %rbp
+	adcq	%r13, %r12
+	adcq	-120(%rsp), %r8                 # 8-byte Folded Reload
+	adcq	-112(%rsp), %rdx                # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movzbl	-88(%rsp), %r11d                # 1-byte Folded Reload
+	adcq	$0, %r11
+	movq	-48(%rsp), %rax                 # 8-byte Reload
+	movq	32(%rax), %rcx
+	movq	%rcx, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                # 8-byte Spill
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rsi
+	movq	%rcx, %rax
+	mulq	16(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	24(%rsp)                        # 8-byte Folded Reload
+	movq	%rax, %r9
+	movq	%rdx, %rcx
+	addq	%r15, %rcx
+	adcq	%rsi, %rdi
+	adcq	-104(%rsp), %r13                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r14                 # 8-byte Folded Reload
+	movq	-120(%rsp), %rax                # 8-byte Reload
+	adcq	-88(%rsp), %rax                 # 8-byte Folded Reload
+	movq	-112(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r10, %r9
+	adcq	%rbx, %rcx
+	adcq	%rbp, %rdi
+	adcq	%r12, %r13
+	adcq	%r8, %r14
+	adcq	-128(%rsp), %rax                # 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                # 8-byte Spill
+	adcq	%r11, %rdx
+	movq	%rdx, -112(%rsp)                # 8-byte Spill
+	setb	-88(%rsp)                       # 1-byte Folded Spill
+	movq	-32(%rsp), %rbx                 # 8-byte Reload
+	imulq	%r9, %rbx
 	movq	%rbx, %rax
-	mulq	%rcx
-	movq	%rcx, %rbp
+	mulq	8(%rsp)                         # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	(%rsp)                          # 8-byte Folded Reload
 	movq	%rdx, %r15
-	movq	%rax, %rcx
+	movq	%rax, -104(%rsp)                # 8-byte Spill
 	movq	%rbx, %rax
-	mulq	%rdi
-	movq	%rdx, %rbx
-	addq	%rcx, %rbx
-	adcq	%r14, %r15
-	adcq	%r11, %r10
-	adcq	$0, %r8
-	movq	56(%rsi), %rcx
-	movq	48(%rsi), %rdx
-	addq	%r12, %rax
-	movq	40(%rsi), %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %r10
-	adcq	32(%rsi), %r8
-	adcq	$0, %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	adcq	$0, %rdx
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
 	movq	%rdx, %r12
-	adcq	$0, %rcx
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	movq	%rbx, %rsi
-	imulq	%r9, %rsi
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	%r13
-	movq	%rdx, %r14
-	movq	%rax, %r9
-	movq	%rsi, %rax
-	mulq	%rbp
-	movq	%rdx, %rcx
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	movq	-48(%rsp), %r13         # 8-byte Reload
-	mulq	%r13
-	movq	%rdx, %rsi
-	addq	%rbp, %rsi
-	adcq	%r9, %rcx
-	adcq	-72(%rsp), %r14         # 8-byte Folded Reload
-	adcq	$0, %r11
-	addq	%rbx, %rax
-	adcq	%r15, %rsi
-	adcq	%r10, %rcx
-	adcq	%r8, %r14
-	adcq	-64(%rsp), %r11         # 8-byte Folded Reload
-	adcq	$0, %r12
-	movq	%r12, -64(%rsp)         # 8-byte Spill
-	movq	-16(%rsp), %rbp         # 8-byte Reload
-	adcq	$0, %rbp
-	adcq	$0, %rdi
-	movq	%rsi, %rbx
-	imulq	-40(%rsp), %rbx         # 8-byte Folded Reload
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
 	movq	%rbx, %rax
-	movq	-8(%rsp), %r12          # 8-byte Reload
-	mulq	%r12
-	movq	%rdx, %r8
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -72(%rsp)         # 8-byte Spill
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r10
 	movq	%rbx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r9
+	mulq	32(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, %r11
 	movq	%rbx, %rax
-	mulq	%r13
-	movq	%rdx, %rbx
-	addq	%r9, %rbx
-	adcq	-72(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-16(%rsp), %r10         # 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%rsi, %rax
-	adcq	%rcx, %rbx
-	adcq	%r14, %r15
-	adcq	%r11, %r10
-	adcq	-64(%rsp), %r8          # 8-byte Folded Reload
-	adcq	$0, %rbp
-	movq	%rbp, -16(%rsp)         # 8-byte Spill
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	addq	%r11, %r8
+	adcq	%r10, %rsi
+	adcq	-80(%rsp), %rbp                 # 8-byte Folded Reload
+	adcq	-104(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r15                 # 8-byte Folded Reload
+	movq	-128(%rsp), %rbx                # 8-byte Reload
+	adcq	$0, %rbx
+	addq	%r9, %rax
+	adcq	%rcx, %r8
+	adcq	%rdi, %rsi
+	adcq	%r13, %rbp
+	adcq	%r14, %r12
+	adcq	-120(%rsp), %r15                # 8-byte Folded Reload
+	movq	%r15, -120(%rsp)                # 8-byte Spill
+	adcq	-112(%rsp), %rbx                # 8-byte Folded Reload
+	movq	%rbx, -128(%rsp)                # 8-byte Spill
+	movzbl	-88(%rsp), %edi                 # 1-byte Folded Reload
 	adcq	$0, %rdi
-	movq	-40(%rsp), %rcx         # 8-byte Reload
-	imulq	%rbx, %rcx
+	movq	-48(%rsp), %rax                 # 8-byte Reload
+	movq	40(%rax), %rcx
 	movq	%rcx, %rax
-	mulq	%r12
-	movq	%rdx, %r13
-	movq	%rax, -40(%rsp)         # 8-byte Spill
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -48(%rsp)                 # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
 	movq	%rcx, %rax
-	movq	-32(%rsp), %r14         # 8-byte Reload
-	mulq	%r14
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -56(%rsp)                 # 8-byte Spill
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r11
-	movq	%rax, %r12
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
 	movq	%rcx, %rax
-	movq	%rcx, %r9
-	movq	-24(%rsp), %rsi         # 8-byte Reload
-	mulq	%rsi
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rbx
+	movq	%rcx, %rax
+	mulq	16(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	24(%rsp)                        # 8-byte Folded Reload
+	movq	%rax, %r14
+	movq	%rdx, %r9
+	addq	%r15, %r9
+	adcq	%rbx, %r10
+	adcq	-72(%rsp), %r13                 # 8-byte Folded Reload
+	adcq	-64(%rsp), %r11                 # 8-byte Folded Reload
+	movq	-56(%rsp), %rcx                 # 8-byte Reload
+	adcq	-112(%rsp), %rcx                # 8-byte Folded Reload
+	movq	-48(%rsp), %rax                 # 8-byte Reload
+	adcq	$0, %rax
+	addq	%r8, %r14
+	adcq	%rsi, %r9
+	adcq	%rbp, %r10
+	adcq	%r12, %r13
+	adcq	-120(%rsp), %r11                # 8-byte Folded Reload
+	adcq	-128(%rsp), %rcx                # 8-byte Folded Reload
+	movq	%rcx, -56(%rsp)                 # 8-byte Spill
+	adcq	%rdi, %rax
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	setb	-64(%rsp)                       # 1-byte Folded Spill
+	movq	-32(%rsp), %r12                 # 8-byte Reload
+	imulq	%r14, %r12
+	movq	%r12, %rax
+	mulq	8(%rsp)                         # 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, -32(%rsp)                 # 8-byte Spill
+	movq	%r12, %rax
+	mulq	(%rsp)                          # 8-byte Folded Reload
 	movq	%rdx, %rbp
-	movq	%rax, %rcx
-	movq	%r9, %rax
-	movq	-48(%rsp), %r9          # 8-byte Reload
-	mulq	%r9
-	addq	%rcx, %rdx
-	adcq	%r12, %rbp
-	adcq	-40(%rsp), %r11         # 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%rbx, %rax
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	movq	%r12, %rax
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, -40(%rsp)                 # 8-byte Spill
+	movq	%r12, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r15
+	movq	%r12, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %rdi
+	movq	%r12, %rax
+	movq	32(%rsp), %r12                  # 8-byte Reload
+	mulq	%r12
+	addq	%r8, %rax
 	adcq	%r15, %rdx
-	adcq	%r10, %rbp
-	adcq	%r8, %r11
-	adcq	-16(%rsp), %r13         # 8-byte Folded Reload
-	adcq	$0, %rdi
-	movq	%rdx, %rax
-	subq	%r9, %rax
-	movq	%rbp, %rcx
-	sbbq	%rsi, %rcx
-	movq	%r11, %rbx
-	sbbq	%r14, %rbx
-	movq	%r13, %rsi
-	sbbq	-8(%rsp), %rsi          # 8-byte Folded Reload
-	sbbq	$0, %rdi
-	andl	$1, %edi
-	cmovneq	%r13, %rsi
-	testb	%dil, %dil
-	cmovneq	%rdx, %rax
-	movq	-56(%rsp), %rdx         # 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovneq	%rbp, %rcx
-	movq	%rcx, 8(%rdx)
-	cmovneq	%r11, %rbx
-	movq	%rbx, 16(%rdx)
-	movq	%rsi, 24(%rdx)
+	adcq	-40(%rsp), %rbx                 # 8-byte Folded Reload
+	adcq	-72(%rsp), %rcx                 # 8-byte Folded Reload
+	adcq	-32(%rsp), %rbp                 # 8-byte Folded Reload
+	adcq	$0, %rsi
+	addq	%r14, %rdi
+	adcq	%r9, %rax
+	adcq	%r10, %rdx
+	adcq	%r13, %rbx
+	adcq	%r11, %rcx
+	adcq	-56(%rsp), %rbp                 # 8-byte Folded Reload
+	adcq	-48(%rsp), %rsi                 # 8-byte Folded Reload
+	movzbl	-64(%rsp), %r11d                # 1-byte Folded Reload
+	adcq	$0, %r11
+	movq	%rax, %r8
+	subq	-24(%rsp), %r8                  # 8-byte Folded Reload
+	movq	%rdx, %r9
+	sbbq	%r12, %r9
+	movq	%rbx, %r10
+	sbbq	-16(%rsp), %r10                 # 8-byte Folded Reload
+	movq	%rcx, %r14
+	sbbq	-8(%rsp), %r14                  # 8-byte Folded Reload
+	movq	%rbp, %r15
+	sbbq	(%rsp), %r15                    # 8-byte Folded Reload
+	movq	%rsi, %rdi
+	sbbq	8(%rsp), %rdi                   # 8-byte Folded Reload
+	sbbq	$0, %r11
+	testb	$1, %r11b
+	cmovneq	%rsi, %rdi
+	movq	40(%rsp), %rsi                  # 8-byte Reload
+	movq	%rdi, 40(%rsi)
+	cmovneq	%rbp, %r15
+	movq	%r15, 32(%rsi)
+	cmovneq	%rcx, %r14
+	movq	%r14, 24(%rsi)
+	cmovneq	%rbx, %r10
+	movq	%r10, 16(%rsi)
+	cmovneq	%rdx, %r9
+	movq	%r9, 8(%rsi)
+	cmovneq	%rax, %r8
+	movq	%r8, (%rsi)
+	addq	$48, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -3077,9170 +4092,538 @@ mcl_fp_montRed4L:                       # @mcl_fp_montRed4L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end55:
-	.size	mcl_fp_montRed4L, .Lfunc_end55-mcl_fp_montRed4L
-
-	.globl	mcl_fp_addPre4L
-	.align	16, 0x90
-	.type	mcl_fp_addPre4L,@function
-mcl_fp_addPre4L:                        # @mcl_fp_addPre4L
-# BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rdx), %rax
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rax
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	adcq	%r8, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end56:
-	.size	mcl_fp_addPre4L, .Lfunc_end56-mcl_fp_addPre4L
-
-	.globl	mcl_fp_subPre4L
-	.align	16, 0x90
-	.type	mcl_fp_subPre4L,@function
-mcl_fp_subPre4L:                        # @mcl_fp_subPre4L
-# BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
-	movq	%rcx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	sbbq	%r8, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end57:
-	.size	mcl_fp_subPre4L, .Lfunc_end57-mcl_fp_subPre4L
-
-	.globl	mcl_fp_shr1_4L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_4L,@function
-mcl_fp_shr1_4L:                         # @mcl_fp_shr1_4L
-# BB#0:
-	movq	24(%rsi), %rax
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rdx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rdx
-	movq	%rdx, (%rdi)
-	shrdq	$1, %rcx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rax, %rcx
-	movq	%rcx, 16(%rdi)
-	shrq	%rax
-	movq	%rax, 24(%rdi)
-	retq
-.Lfunc_end58:
-	.size	mcl_fp_shr1_4L, .Lfunc_end58-mcl_fp_shr1_4L
-
-	.globl	mcl_fp_add4L
-	.align	16, 0x90
-	.type	mcl_fp_add4L,@function
-mcl_fp_add4L:                           # @mcl_fp_add4L
-# BB#0:
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %r8
-	movq	16(%rdx), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r9
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	adcq	%r10, %r8
-	movq	%r8, 24(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r9
-	sbbq	24(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB59_2
-# BB#1:                                 # %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	movq	%r8, 24(%rdi)
-.LBB59_2:                               # %carry
-	retq
-.Lfunc_end59:
-	.size	mcl_fp_add4L, .Lfunc_end59-mcl_fp_add4L
-
-	.globl	mcl_fp_addNF4L
-	.align	16, 0x90
-	.type	mcl_fp_addNF4L,@function
-mcl_fp_addNF4L:                         # @mcl_fp_addNF4L
-# BB#0:
+.Lfunc_end43:
+	.size	mcl_fp_mont6L, .Lfunc_end43-mcl_fp_mont6L
+                                        # -- End function
+	.globl	mcl_fp_montNF6L                 # -- Begin function mcl_fp_montNF6L
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF6L,@function
+mcl_fp_montNF6L:                        # @mcl_fp_montNF6L
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
 	pushq	%rbx
-	movq	24(%rdx), %r8
-	movq	16(%rdx), %r9
-	movq	(%rdx), %r11
-	movq	8(%rdx), %r10
-	addq	(%rsi), %r11
-	adcq	8(%rsi), %r10
-	adcq	16(%rsi), %r9
-	adcq	24(%rsi), %r8
-	movq	%r11, %rsi
-	subq	(%rcx), %rsi
-	movq	%r10, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r9, %rax
-	sbbq	16(%rcx), %rax
-	movq	%r8, %rbx
-	sbbq	24(%rcx), %rbx
-	testq	%rbx, %rbx
-	cmovsq	%r11, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r10, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r9, %rax
-	movq	%rax, 16(%rdi)
-	cmovsq	%r8, %rbx
-	movq	%rbx, 24(%rdi)
-	popq	%rbx
-	retq
-.Lfunc_end60:
-	.size	mcl_fp_addNF4L, .Lfunc_end60-mcl_fp_addNF4L
-
-	.globl	mcl_fp_sub4L
-	.align	16, 0x90
-	.type	mcl_fp_sub4L,@function
-mcl_fp_sub4L:                           # @mcl_fp_sub4L
-# BB#0:
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r11
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r9
-	movq	%rax, (%rdi)
-	movq	%r11, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	sbbq	%r10, %r8
-	movq	%r8, 24(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB61_2
-# BB#1:                                 # %nocarry
-	retq
-.LBB61_2:                               # %carry
-	movq	24(%rcx), %r10
-	movq	8(%rcx), %rsi
-	movq	16(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 24(%rdi)
-	retq
-.Lfunc_end61:
-	.size	mcl_fp_sub4L, .Lfunc_end61-mcl_fp_sub4L
-
-	.globl	mcl_fp_subNF4L
-	.align	16, 0x90
-	.type	mcl_fp_subNF4L,@function
-mcl_fp_subNF4L:                         # @mcl_fp_subNF4L
-# BB#0:
-	pushq	%rbx
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %r8
-	movq	(%rsi), %r9
-	movq	8(%rsi), %r10
-	subq	(%rdx), %r9
-	sbbq	8(%rdx), %r10
-	sbbq	16(%rdx), %r8
-	sbbq	24(%rdx), %r11
-	movq	%r11, %rdx
-	sarq	$63, %rdx
-	movq	24(%rcx), %rsi
-	andq	%rdx, %rsi
-	movq	16(%rcx), %rax
-	andq	%rdx, %rax
-	movq	8(%rcx), %rbx
-	andq	%rdx, %rbx
-	andq	(%rcx), %rdx
-	addq	%r9, %rdx
-	movq	%rdx, (%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 16(%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 24(%rdi)
-	popq	%rbx
-	retq
-.Lfunc_end62:
-	.size	mcl_fp_subNF4L, .Lfunc_end62-mcl_fp_subNF4L
-
-	.globl	mcl_fpDbl_add4L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add4L,@function
-mcl_fpDbl_add4L:                        # @mcl_fpDbl_add4L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r9
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r10
-	movq	48(%rsi), %r12
-	movq	40(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	24(%rdx), %r15
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %rsi
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r15, %rbp
-	movq	%rbp, 24(%rdi)
-	adcq	%r14, %rsi
-	adcq	%r11, %r13
-	adcq	%r10, %r12
-	adcq	%r9, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rsi, %rdx
-	subq	(%rcx), %rdx
-	movq	%r13, %rbp
-	sbbq	8(%rcx), %rbp
-	movq	%r12, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r8, %r9
-	sbbq	24(%rcx), %r9
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rsi, %rdx
-	movq	%rdx, 32(%rdi)
-	testb	%al, %al
-	cmovneq	%r13, %rbp
-	movq	%rbp, 40(%rdi)
-	cmovneq	%r12, %rbx
-	movq	%rbx, 48(%rdi)
-	cmovneq	%r8, %r9
-	movq	%r9, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end63:
-	.size	mcl_fpDbl_add4L, .Lfunc_end63-mcl_fpDbl_add4L
-
-	.globl	mcl_fpDbl_sub4L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub4L,@function
-mcl_fpDbl_sub4L:                        # @mcl_fpDbl_sub4L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r9
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	(%rsi), %rbx
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	movq	%rbx, (%rdi)
-	movq	8(%rsi), %rbx
-	sbbq	8(%rdx), %rbx
-	movq	%rbx, 8(%rdi)
-	movq	16(%rsi), %rbx
-	sbbq	16(%rdx), %rbx
-	movq	%rbx, 16(%rdi)
-	movq	24(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	40(%rdx), %r11
-	movq	32(%rdx), %rdx
-	movq	%rbx, 24(%rdi)
-	movq	32(%rsi), %r12
-	sbbq	%rdx, %r12
-	movq	48(%rsi), %r14
-	movq	40(%rsi), %r15
-	sbbq	%r11, %r15
-	sbbq	%r10, %r14
-	sbbq	%r9, %r8
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	movq	16(%rcx), %rdx
-	cmoveq	%rax, %rdx
-	movq	24(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	cmovneq	8(%rcx), %rax
-	addq	%r12, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 40(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 48(%rdi)
-	adcq	%r8, %rbx
-	movq	%rbx, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end64:
-	.size	mcl_fpDbl_sub4L, .Lfunc_end64-mcl_fpDbl_sub4L
-
-	.globl	mcl_fp_mulUnitPre5L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre5L,@function
-mcl_fp_mulUnitPre5L:                    # @mcl_fp_mulUnitPre5L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rcx, %rax
-	mulq	32(%rsi)
-	movq	%rdx, %r8
-	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, %r10
-	movq	%rax, %r11
-	movq	%rcx, %rax
-	mulq	16(%rsi)
-	movq	%rdx, %r15
+	subq	$40, %rsp
+	movq	%rdx, -72(%rsp)                 # 8-byte Spill
+	movq	%rdi, 32(%rsp)                  # 8-byte Spill
+	movq	40(%rsi), %rax
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	(%rdx), %rdi
+	mulq	%rdi
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	%rdx, %r12
+	movq	32(%rsi), %rax
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
+	mulq	%rdi
 	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %rbx
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rax, (%rdi)
-	addq	%r12, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r14, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r11, %r15
-	movq	%r15, 24(%rdi)
-	adcq	%r9, %r10
-	movq	%r10, 32(%rdi)
-	adcq	$0, %r8
-	movq	%r8, 40(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end65:
-	.size	mcl_fp_mulUnitPre5L, .Lfunc_end65-mcl_fp_mulUnitPre5L
-
-	.globl	mcl_fpDbl_mulPre5L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre5L,@function
-mcl_fpDbl_mulPre5L:                     # @mcl_fpDbl_mulPre5L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rsi, %r9
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	(%r9), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	8(%r9), %rbx
-	movq	%rbx, -48(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rbp
-	movq	%rdx, %r8
-	mulq	%rbp
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	16(%r9), %r13
-	movq	24(%r9), %r14
-	movq	32(%r9), %r15
-	movq	%rax, (%rdi)
-	movq	%r15, %rax
-	mulq	%rbp
 	movq	%rdx, %r10
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%r14, %rax
-	mulq	%rbp
-	movq	%rdx, %r12
+	movq	24(%rsi), %rax
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	mulq	%rdi
+	movq	%rax, %r15
+	movq	%rdx, %r9
+	movq	16(%rsi), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	mulq	%rdi
 	movq	%rax, %r11
-	movq	%r13, %rax
-	mulq	%rbp
-	movq	%rdx, %rcx
-	movq	%rax, %rsi
-	movq	%rbx, %rax
-	mulq	%rbp
+	movq	%rdx, %r8
+	movq	(%rsi), %rbx
+	movq	%rbx, (%rsp)                    # 8-byte Spill
+	movq	8(%rsi), %rax
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	mulq	%rdi
 	movq	%rdx, %rbp
-	movq	%rax, %rdi
-	addq	-32(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	adcq	%r11, %rcx
-	adcq	-40(%rsp), %r12         # 8-byte Folded Reload
-	adcq	$0, %r10
-	movq	8(%r8), %r11
-	movq	%r15, %rax
-	mulq	%r11
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
 	movq	%rax, %rsi
-	movq	%r14, %rax
-	mulq	%r11
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	%r13, %rax
-	mulq	%r11
-	movq	%rdx, %r8
+	movq	%rbx, %rax
+	mulq	%rdi
 	movq	%rax, %r13
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	mulq	%r11
-	movq	%rdx, %r14
-	movq	%rax, %rbx
-	movq	-24(%rsp), %rax         # 8-byte Reload
-	mulq	%r11
-	addq	%rdi, %rax
-	movq	-8(%rsp), %rdi          # 8-byte Reload
-	movq	%rax, 8(%rdi)
-	adcq	%rbp, %rbx
-	adcq	%rcx, %r13
-	adcq	%r12, %r15
-	adcq	%r10, %rsi
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%rdx, %rbx
-	adcq	%r14, %r13
-	adcq	%r8, %r15
-	adcq	-40(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-32(%rsp), %rcx         # 8-byte Folded Reload
-	movq	32(%r9), %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	-16(%rsp), %rdi         # 8-byte Reload
-	movq	16(%rdi), %r12
-	mulq	%r12
-	movq	%rax, %r11
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	24(%r9), %rax
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	mulq	%r12
-	movq	%rax, %r10
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	16(%r9), %rax
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	mulq	%r12
-	movq	%rax, %r8
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	(%r9), %r14
-	movq	8(%r9), %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	mulq	%r12
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%r14, %rax
-	mulq	%r12
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	addq	%rbx, %rax
-	movq	-8(%rsp), %rbx          # 8-byte Reload
-	movq	%rax, 16(%rbx)
-	adcq	%r13, %rbp
+	movq	%rdx, %rdi
+	addq	%rsi, %rdi
+	adcq	%r11, %rbp
 	adcq	%r15, %r8
-	adcq	%rsi, %r10
-	adcq	%rcx, %r11
-	sbbq	%rcx, %rcx
-	movq	24(%rdi), %rsi
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %r13
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, %r12
-	movq	%r14, %rax
-	mulq	%rsi
-	movq	%rdx, %r15
-	movq	%rax, %rdi
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
+	adcq	%r14, %r9
+	adcq	-64(%rsp), %r10                 # 8-byte Folded Reload
+	movq	%r10, -128(%rsp)                # 8-byte Spill
+	adcq	$0, %r12
+	movq	%r12, -112(%rsp)                # 8-byte Spill
+	movq	-8(%rcx), %rbx
+	movq	%rbx, -48(%rsp)                 # 8-byte Spill
+	imulq	%rax, %rbx
+	movq	40(%rcx), %rdx
+	movq	%rdx, -64(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%rdx
 	movq	%rax, %r14
-	movq	-80(%rsp), %rax         # 8-byte Reload
-	mulq	%rsi
-	andl	$1, %ecx
-	addq	-88(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-48(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-32(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	addq	%rdi, %rbp
-	movq	%rbp, 24(%rbx)
-	adcq	%r12, %r8
-	adcq	%rax, %r10
-	adcq	%r14, %r11
-	adcq	%r13, %rcx
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	%r15, %r8
-	adcq	-56(%rsp), %r10         # 8-byte Folded Reload
-	adcq	%rdx, %r11
-	adcq	-72(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rsi         # 8-byte Folded Reload
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	32(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	32(%r9)
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	32(%rcx), %rdx
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%rdx
 	movq	%rax, %r15
+	movq	%rdx, -96(%rsp)                 # 8-byte Spill
+	movq	24(%rcx), %rdx
+	movq	%rdx, -24(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%rdx
+	movq	%rax, %r12
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
+	movq	16(%rcx), %rdx
+	movq	%rdx, -40(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%rdx
+	movq	%rax, %r10
+	movq	%rdx, 24(%rsp)                  # 8-byte Spill
+	movq	(%rcx), %rsi
+	movq	%rsi, -32(%rsp)                 # 8-byte Spill
+	movq	8(%rcx), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%rcx
+	movq	%rdx, %r11
+	movq	%rax, %rcx
+	movq	%rbx, %rax
+	mulq	%rsi
+	addq	%r13, %rax
+	adcq	%rdi, %rcx
+	adcq	%rbp, %r10
+	adcq	%r8, %r12
+	adcq	%r9, %r15
+	adcq	-128(%rsp), %r14                # 8-byte Folded Reload
+	movq	-112(%rsp), %rax                # 8-byte Reload
+	adcq	$0, %rax
+	addq	%rdx, %rcx
+	adcq	%r11, %r10
+	adcq	24(%rsp), %r12                  # 8-byte Folded Reload
+	adcq	-104(%rsp), %r15                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r14                 # 8-byte Folded Reload
+	movq	%r14, -128(%rsp)                # 8-byte Spill
+	adcq	-120(%rsp), %rax                # 8-byte Folded Reload
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	-72(%rsp), %rax                 # 8-byte Reload
+	movq	8(%rax), %rdi
 	movq	%rdi, %rax
-	mulq	24(%r9)
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %r13
+	mulq	-80(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, -120(%rsp)                # 8-byte Spill
 	movq	%rdi, %rax
-	mulq	16(%r9)
-	movq	%rdx, %r14
-	movq	%rax, %rbx
+	mulq	-88(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r9
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
 	movq	%rdi, %rax
-	mulq	8(%r9)
-	movq	%rdx, %r12
-	movq	%rax, %rbp
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, -104(%rsp)                # 8-byte Spill
 	movq	%rdi, %rax
-	mulq	(%r9)
-	addq	%r8, %rax
-	movq	-8(%rsp), %rdi          # 8-byte Reload
-	movq	%rax, 32(%rdi)
+	mulq	8(%rsp)                         # 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, %r14
+	movq	%rdi, %rax
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r11
+	movq	%rdi, %rax
+	mulq	(%rsp)                          # 8-byte Folded Reload
+	movq	%rax, %rdi
+	movq	%rdx, %rbp
+	addq	%r11, %rbp
+	adcq	%r14, %rbx
+	adcq	-104(%rsp), %rsi                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r13                 # 8-byte Folded Reload
+	adcq	-120(%rsp), %r9                 # 8-byte Folded Reload
+	adcq	$0, %r8
+	addq	%rcx, %rdi
 	adcq	%r10, %rbp
-	adcq	%r11, %rbx
-	adcq	%rcx, %r13
-	adcq	%rsi, %r15
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rdx, %rbp
-	movq	%rbp, 40(%rdi)
 	adcq	%r12, %rbx
-	movq	%rbx, 48(%rdi)
-	adcq	%r14, %r13
-	movq	%r13, 56(%rdi)
-	adcq	-24(%rsp), %r15         # 8-byte Folded Reload
-	movq	%r15, 64(%rdi)
-	adcq	-16(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end66:
-	.size	mcl_fpDbl_mulPre5L, .Lfunc_end66-mcl_fpDbl_mulPre5L
-
-	.globl	mcl_fpDbl_sqrPre5L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre5L,@function
-mcl_fpDbl_sqrPre5L:                     # @mcl_fpDbl_sqrPre5L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	32(%rsi), %r11
-	movq	(%rsi), %r13
-	movq	8(%rsi), %rbx
+	adcq	%r15, %rsi
+	adcq	-128(%rsp), %r13                # 8-byte Folded Reload
+	adcq	-112(%rsp), %r9                 # 8-byte Folded Reload
+	adcq	$0, %r8
+	movq	-48(%rsp), %r11                 # 8-byte Reload
+	imulq	%rdi, %r11
 	movq	%r11, %rax
-	mulq	%rbx
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %rbp
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rcx
-	movq	%rcx, %rax
-	mulq	%rbx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, -56(%rsp)         # 8-byte Spill
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
 	movq	%r11, %rax
-	mulq	%r13
-	movq	%rdx, %r8
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%r13
-	movq	%rdx, %r9
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	%r13
-	movq	%rdx, %r10
-	movq	%rax, %r12
-	movq	%rbx, %rax
-	mulq	%rbx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -96(%rsp)                 # 8-byte Spill
 	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	%r13
-	movq	%rdx, %rbx
-	movq	%rax, %r14
-	movq	%r13, %rax
-	mulq	%r13
-	movq	%rax, (%rdi)
-	addq	%r14, %rdx
-	adcq	%rbx, %r12
-	adcq	%rbp, %r10
-	adcq	-72(%rsp), %r9          # 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%r14, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r15, %r12
-	adcq	-56(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-40(%rsp), %r8          # 8-byte Folded Reload
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	addq	%rbx, %r12
-	adcq	-64(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-32(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-24(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-16(%rsp), %rdi         # 8-byte Folded Reload
 	movq	%r11, %rax
-	mulq	%rcx
-	movq	%rax, %r11
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %rbx
-	movq	%rbx, %rax
-	mulq	%rcx
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rax, %rcx
+	movq	%r11, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
+	movq	%rax, %r10
+	movq	%r11, %rax
+	mulq	16(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %r12
 	movq	%rax, %r14
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	(%rsi), %rbp
-	movq	%rbp, -32(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	%rbp, %rax
-	mulq	%rcx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
+	movq	%r11, %rax
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	addq	%rdi, %rax
+	adcq	%rbp, %r14
+	adcq	%rbx, %r10
+	adcq	%rsi, %rcx
+	adcq	%r13, %r15
+	movq	-112(%rsp), %rax                # 8-byte Reload
+	adcq	%r9, %rax
+	adcq	$0, %r8
+	addq	%rdx, %r14
+	adcq	%r12, %r10
+	adcq	-104(%rsp), %rcx                # 8-byte Folded Reload
+	adcq	-120(%rsp), %r15                # 8-byte Folded Reload
+	movq	%r15, -120(%rsp)                # 8-byte Spill
+	adcq	-96(%rsp), %rax                 # 8-byte Folded Reload
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	adcq	-128(%rsp), %r8                 # 8-byte Folded Reload
+	movq	-72(%rsp), %rax                 # 8-byte Reload
+	movq	16(%rax), %rdi
+	movq	%rdi, %rax
+	mulq	-80(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, -128(%rsp)                # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-88(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	8(%rsp)                         # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, %r9
+	movq	%rdi, %rax
+	mulq	(%rsp)                          # 8-byte Folded Reload
 	movq	%rax, %rbp
+	movq	%rdx, %rbx
+	addq	%r9, %rbx
+	adcq	24(%rsp), %rsi                  # 8-byte Folded Reload
+	adcq	-104(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r11                 # 8-byte Folded Reload
+	adcq	-128(%rsp), %r15                # 8-byte Folded Reload
+	adcq	$0, %r13
+	addq	%r14, %rbp
+	adcq	%r10, %rbx
+	adcq	%rcx, %rsi
+	adcq	-120(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-112(%rsp), %r11                # 8-byte Folded Reload
+	adcq	%r8, %r15
+	adcq	$0, %r13
+	movq	-48(%rsp), %rcx                 # 8-byte Reload
+	imulq	%rbp, %rcx
 	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rax, %r13
-	addq	%r12, %rbp
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	%rbp, 16(%rax)
-	adcq	%r10, %r15
-	adcq	%r9, %r13
-	adcq	%r8, %r14
-	adcq	%rdi, %r11
-	sbbq	%r10, %r10
-	andl	$1, %r10d
-	addq	-56(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r13         # 8-byte Folded Reload
-	adcq	%rdx, %r14
-	adcq	-24(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-16(%rsp), %r10         # 8-byte Folded Reload
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %r8
-	movq	-32(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rax, %rdi
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rbp
-	movq	%rbp, -16(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %rcx
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
 	movq	%rcx, %rax
-	mulq	%rbx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
 	movq	%rax, %r9
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rdx, %rbp
-	movq	%rax, %r12
-	movq	%rbx, %rax
-	mulq	%rbx
-	movq	%rax, %rbx
-	addq	%r15, %rdi
-	movq	-8(%rsp), %r15          # 8-byte Reload
-	movq	%rdi, 24(%r15)
-	adcq	%r13, %r8
-	adcq	%r14, %r12
-	adcq	%r11, %rbx
-	adcq	%r10, %r9
-	sbbq	%r10, %r10
-	andl	$1, %r10d
-	addq	-40(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-24(%rsp), %r12         # 8-byte Folded Reload
-	adcq	%rbp, %rbx
-	adcq	%rdx, %r9
-	adcq	-32(%rsp), %r10         # 8-byte Folded Reload
 	movq	%rcx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -96(%rsp)                 # 8-byte Spill
+	movq	%rax, %r10
 	movq	%rcx, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %r14
-	movq	%rax, %rdi
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
+	movq	%rax, %r14
 	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rdx, %r13
-	movq	%rax, %rsi
+	mulq	16(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %rdi
 	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %r11
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	addq	%r8, %rsi
-	movq	%rsi, 32(%r15)
-	adcq	%r12, %rdi
-	adcq	%rbx, %rax
-	adcq	%r9, %rbp
-	adcq	%r10, %r11
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r13, %rdi
-	movq	%rdi, 40(%r15)
-	adcq	%r14, %rax
-	movq	%rax, 48(%r15)
-	adcq	%rdx, %rbp
-	movq	%rbp, 56(%r15)
-	adcq	-24(%rsp), %r11         # 8-byte Folded Reload
-	movq	%r11, 64(%r15)
-	adcq	-32(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 72(%r15)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end67:
-	.size	mcl_fpDbl_sqrPre5L, .Lfunc_end67-mcl_fpDbl_sqrPre5L
-
-	.globl	mcl_fp_mont5L
-	.align	16, 0x90
-	.type	mcl_fp_mont5L,@function
-mcl_fp_mont5L:                          # @mcl_fp_mont5L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	pushq	%rax
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	movq	32(%rsi), %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rdi
-	mulq	%rdi
-	movq	%rax, %r8
-	movq	%rdx, %r14
-	movq	24(%rsi), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r9
-	movq	%rdx, %r12
-	movq	16(%rsi), %rax
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r10
-	movq	%rdx, %rbp
-	movq	(%rsi), %rbx
-	movq	%rbx, -80(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, %r11
-	movq	%rax, %rsi
-	movq	%rbx, %rax
-	mulq	%rdi
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rdx, %r15
-	addq	%rsi, %r15
-	adcq	%r10, %r11
-	adcq	%r9, %rbp
-	movq	%rbp, -96(%rsp)         # 8-byte Spill
-	adcq	%r8, %r12
-	movq	%r12, -112(%rsp)        # 8-byte Spill
-	adcq	$0, %r14
-	movq	%r14, -120(%rsp)        # 8-byte Spill
-	movq	-8(%rcx), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	imulq	%rdx, %rbp
-	movq	(%rcx), %r9
-	movq	%r9, -32(%rsp)          # 8-byte Spill
-	movq	32(%rcx), %rdx
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	movq	24(%rcx), %rsi
-	movq	%rsi, -8(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rbx
-	movq	%rbx, -16(%rsp)         # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rdx, %r14
-	movq	%rax, %r13
-	movq	%rbp, %rax
-	mulq	%rsi
-	movq	%rdx, %rdi
-	movq	%rax, %r10
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rdx, %rbx
-	movq	%rax, %r8
-	movq	%rbp, %rax
-	mulq	%rcx
-	movq	%rdx, %rsi
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	%r9
-	movq	%rdx, %rbp
-	addq	%r12, %rbp
-	adcq	%r8, %rsi
-	adcq	%r10, %rbx
-	adcq	%r13, %rdi
-	adcq	$0, %r14
-	addq	-128(%rsp), %rax        # 8-byte Folded Reload
-	adcq	%r15, %rbp
-	adcq	%r11, %rsi
-	adcq	-96(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-112(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r14        # 8-byte Folded Reload
-	sbbq	%r9, %r9
-	andl	$1, %r9d
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	movq	8(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-88(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r10
-	movq	%rcx, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r15
-	movq	%rdx, %rcx
-	addq	%r10, %rcx
-	adcq	-120(%rsp), %r8         # 8-byte Folded Reload
-	adcq	-112(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r11         # 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%rbp, %r15
-	adcq	%rsi, %rcx
-	adcq	%rbx, %r8
-	adcq	%rdi, %r12
-	adcq	%r14, %r11
-	adcq	%r9, %r13
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%r15, %rsi
-	imulq	-40(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	addq	%rdi, %rbx
-	adcq	-128(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-112(%rsp), %rbp        # 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	%r15, %rax
-	adcq	%rcx, %rbx
-	adcq	%r8, %r10
-	adcq	%r12, %r9
-	adcq	%r11, %rbp
-	adcq	%r13, %r14
-	adcq	$0, -96(%rsp)           # 8-byte Folded Spill
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	movq	16(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-88(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r12
-	movq	%rdx, %r15
-	addq	%r8, %r15
-	adcq	-128(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	-120(%rsp), %rsi        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r11        # 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%rbx, %r12
-	adcq	%r10, %r15
-	adcq	%r9, %rdi
-	adcq	%rbp, %rsi
-	adcq	%r14, %r11
-	adcq	-96(%rsp), %r13         # 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%r12, %rbp
-	imulq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r10
-	movq	%rbp, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	addq	%r14, %rbp
-	adcq	%r10, %rbx
-	adcq	-120(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r9         # 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%r12, %rax
-	adcq	%r15, %rbp
-	adcq	%rdi, %rbx
-	adcq	%rsi, %rcx
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	addq	%rbp, %rax
+	adcq	%rbx, %rdi
+	adcq	%rsi, %r14
+	adcq	%r12, %r10
 	adcq	%r11, %r9
-	adcq	%r13, %r8
-	adcq	$0, -96(%rsp)           # 8-byte Folded Spill
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	movq	24(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %r15
-	movq	%rsi, %rax
-	mulq	-88(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r14
-	movq	%rdx, %rsi
-	addq	%r12, %rsi
-	adcq	%r15, %rdi
-	adcq	-120(%rsp), %r11        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r10        # 8-byte Folded Reload
+	movq	-112(%rsp), %rax                # 8-byte Reload
+	adcq	%r15, %rax
 	adcq	$0, %r13
-	addq	%rbp, %r14
-	adcq	%rbx, %rsi
-	adcq	%rcx, %rdi
-	adcq	%r9, %r11
-	adcq	%r8, %r10
-	adcq	-96(%rsp), %r13         # 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%r14, %rbp
-	imulq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r8
-	movq	%rbp, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	addq	%r12, %rbp
-	adcq	%r8, %rbx
-	adcq	-120(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r15        # 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	%r14, %rax
-	adcq	%rsi, %rbp
-	adcq	%rdi, %rbx
-	adcq	%r11, %rcx
-	adcq	%r10, %r15
-	adcq	%r13, %r9
-	movq	-96(%rsp), %r14         # 8-byte Reload
-	adcq	$0, %r14
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	movq	32(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-88(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %r8
-	addq	%rdi, %r8
-	adcq	-72(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r13         # 8-byte Folded Reload
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rbp, %r10
-	adcq	%rbx, %r8
-	adcq	%rcx, %r12
-	adcq	%r15, %r11
-	adcq	%r9, %r13
-	adcq	%r14, %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	sbbq	%rcx, %rcx
-	movq	-40(%rsp), %rsi         # 8-byte Reload
-	imulq	%r10, %rsi
-	movq	%rsi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r15
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r9
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	addq	%r9, %rdx
-	adcq	%r15, %rdi
-	adcq	-56(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	$0, %r14
-	andl	$1, %ecx
-	addq	%r10, %rax
-	adcq	%r8, %rdx
-	adcq	%r12, %rdi
-	adcq	%r11, %rbp
-	adcq	%r13, %rbx
-	adcq	-48(%rsp), %r14         # 8-byte Folded Reload
-	adcq	$0, %rcx
-	movq	%rdx, %rax
-	subq	-32(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rdi, %r8
-	sbbq	-24(%rsp), %r8          # 8-byte Folded Reload
-	movq	%rbp, %r9
-	sbbq	-16(%rsp), %r9          # 8-byte Folded Reload
-	movq	%rbx, %r10
-	sbbq	-8(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r14, %r11
-	sbbq	(%rsp), %r11            # 8-byte Folded Reload
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rbx, %r10
-	testb	%cl, %cl
-	cmovneq	%rdx, %rax
-	movq	-104(%rsp), %rcx        # 8-byte Reload
-	movq	%rax, (%rcx)
-	cmovneq	%rdi, %r8
-	movq	%r8, 8(%rcx)
-	cmovneq	%rbp, %r9
-	movq	%r9, 16(%rcx)
-	movq	%r10, 24(%rcx)
-	cmovneq	%r14, %r11
-	movq	%r11, 32(%rcx)
-	addq	$8, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end68:
-	.size	mcl_fp_mont5L, .Lfunc_end68-mcl_fp_mont5L
-
-	.globl	mcl_fp_montNF5L
-	.align	16, 0x90
-	.type	mcl_fp_montNF5L,@function
-mcl_fp_montNF5L:                        # @mcl_fp_montNF5L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	movq	32(%rsi), %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rbx
-	mulq	%rbx
-	movq	%rax, %r15
-	movq	%rdx, %r10
-	movq	24(%rsi), %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	mulq	%rbx
-	movq	%rax, %r13
-	movq	%rdx, %r14
-	movq	16(%rsi), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	mulq	%rbx
-	movq	%rax, %r8
-	movq	%rdx, %r9
-	movq	(%rsi), %rbp
-	movq	%rbp, -80(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	mulq	%rbx
-	movq	%rdx, %r11
-	movq	%rax, %rdi
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rdx, %r12
-	addq	%rdi, %r12
-	adcq	%r8, %r11
-	adcq	%r13, %r9
-	adcq	%r15, %r14
-	adcq	$0, %r10
-	movq	-8(%rcx), %rdx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %rsi
-	imulq	%rdx, %rsi
-	movq	(%rcx), %r8
-	movq	%r8, -96(%rsp)          # 8-byte Spill
-	movq	32(%rcx), %rdx
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	24(%rcx), %rdi
-	movq	%rdi, -16(%rsp)         # 8-byte Spill
-	movq	16(%rcx), %rbx
-	movq	%rbx, -24(%rsp)         # 8-byte Spill
-	movq	8(%rcx), %rbp
-	movq	%rbp, -72(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	%rdx
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %rcx
-	movq	%rsi, %rax
-	mulq	%rdi
-	movq	%rdx, -128(%rsp)        # 8-byte Spill
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	%rbx
-	movq	%rdx, %r15
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	%rbp
-	movq	%rdx, %r13
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	%r8
-	addq	-112(%rsp), %rax        # 8-byte Folded Reload
-	adcq	%r12, %rbp
-	adcq	%r11, %rbx
-	adcq	%r9, %rdi
-	adcq	%r14, %rcx
-	adcq	$0, %r10
-	addq	%rdx, %rbp
-	adcq	%r13, %rbx
-	adcq	%r15, %rdi
-	adcq	-128(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r10        # 8-byte Folded Reload
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	movq	8(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %r8
-	movq	%rsi, %rax
-	mulq	-88(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %r14
-	movq	%rsi, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %rsi
-	movq	%rdx, %r12
-	addq	%r14, %r12
-	adcq	%r8, %r11
-	adcq	-120(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-112(%rsp), %r15        # 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%rbp, %rsi
-	adcq	%rbx, %r12
-	adcq	%rdi, %r11
-	adcq	%rcx, %r9
-	adcq	%r10, %r15
-	adcq	$0, %r13
-	movq	%rsi, %rdi
-	imulq	-32(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r8
-	movq	%rdi, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	-96(%rsp)               # 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%r12, %r10
-	adcq	%r11, %r8
-	adcq	%r9, %r14
-	adcq	%r15, %rbp
-	adcq	$0, %r13
-	addq	%rdx, %r10
-	adcq	%rbx, %r8
-	adcq	%rcx, %r14
-	adcq	-120(%rsp), %rbp        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        # 8-byte Folded Reload
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	movq	16(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	-88(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r11
-	movq	%rdx, %rsi
-	addq	%r12, %rsi
-	adcq	%rbx, %rcx
-	adcq	-120(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r9         # 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	%r10, %r11
-	adcq	%r8, %rsi
-	adcq	%r14, %rcx
-	adcq	%rbp, %rdi
-	adcq	%r13, %r9
-	adcq	$0, %r15
-	movq	%r11, %rbx
-	imulq	-32(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r8
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %r10
-	movq	%rbx, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rbp
-	movq	%rbx, %rax
-	mulq	-96(%rsp)               # 8-byte Folded Reload
-	addq	%r11, %rax
-	adcq	%rsi, %rbp
-	adcq	%rcx, %r10
-	adcq	%rdi, %r8
-	adcq	%r9, %r13
-	adcq	$0, %r15
-	addq	%rdx, %rbp
-	adcq	%r12, %r10
-	adcq	%r14, %r8
-	adcq	-120(%rsp), %r13        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r15        # 8-byte Folded Reload
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	movq	24(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	-88(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r14
-	movq	%rdx, %rsi
-	addq	%r12, %rsi
-	adcq	%rbx, %rcx
-	adcq	-120(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r9         # 8-byte Folded Reload
-	adcq	$0, %r11
-	addq	%rbp, %r14
-	adcq	%r10, %rsi
-	adcq	%r8, %rcx
-	adcq	%r13, %rdi
-	adcq	%r15, %r9
-	adcq	$0, %r11
-	movq	%r14, %rbx
-	imulq	-32(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r8
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r10
-	movq	%rbx, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rbp
-	movq	%rbx, %rax
-	mulq	-96(%rsp)               # 8-byte Folded Reload
-	addq	%r14, %rax
-	adcq	%rsi, %rbp
-	adcq	%rcx, %r10
-	adcq	%rdi, %r8
-	adcq	%r9, %r13
-	adcq	$0, %r11
-	addq	%rdx, %rbp
-	adcq	%r12, %r10
-	adcq	%r15, %r8
-	adcq	-120(%rsp), %r13        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r11        # 8-byte Folded Reload
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	movq	32(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-88(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %rsi
-	movq	%rcx, %rax
-	mulq	-80(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r12
-	movq	%rdx, %rdi
-	addq	%rsi, %rdi
-	adcq	-56(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-40(%rsp), %r9          # 8-byte Folded Reload
-	adcq	$0, %rbx
-	addq	%rbp, %r12
-	adcq	%r10, %rdi
-	adcq	%r8, %r15
-	adcq	%r13, %r14
-	adcq	%r11, %r9
-	adcq	$0, %rbx
-	movq	-32(%rsp), %r8          # 8-byte Reload
-	imulq	%r12, %r8
-	movq	%r8, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %rcx
-	movq	%r8, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%r8, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, %rsi
-	movq	%r8, %rax
-	movq	%r8, %r13
-	movq	-96(%rsp), %r10         # 8-byte Reload
-	mulq	%r10
-	movq	%rdx, %r11
-	movq	%rax, %r8
-	movq	%r13, %rax
-	movq	-72(%rsp), %r13         # 8-byte Reload
-	mulq	%r13
-	addq	%r12, %r8
-	adcq	%rdi, %rax
-	adcq	%r15, %rsi
-	adcq	%r14, %rbp
-	adcq	%r9, %rcx
-	adcq	$0, %rbx
-	addq	%r11, %rax
-	adcq	%rdx, %rsi
-	adcq	-48(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-32(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rax, %r11
-	subq	%r10, %r11
-	movq	%rsi, %r10
-	sbbq	%r13, %r10
-	movq	%rbp, %r8
-	sbbq	-24(%rsp), %r8          # 8-byte Folded Reload
-	movq	%rcx, %r9
-	sbbq	-16(%rsp), %r9          # 8-byte Folded Reload
-	movq	%rbx, %rdx
-	sbbq	-8(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	sarq	$63, %rdi
-	cmovsq	%rax, %r11
-	movq	-104(%rsp), %rax        # 8-byte Reload
-	movq	%r11, (%rax)
-	cmovsq	%rsi, %r10
-	movq	%r10, 8(%rax)
-	cmovsq	%rbp, %r8
-	movq	%r8, 16(%rax)
-	cmovsq	%rcx, %r9
-	movq	%r9, 24(%rax)
-	cmovsq	%rbx, %rdx
-	movq	%rdx, 32(%rax)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end69:
-	.size	mcl_fp_montNF5L, .Lfunc_end69-mcl_fp_montNF5L
-
-	.globl	mcl_fp_montRed5L
-	.align	16, 0x90
-	.type	mcl_fp_montRed5L,@function
-mcl_fp_montRed5L:                       # @mcl_fp_montRed5L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rdi, -80(%rsp)         # 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	(%rcx), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r9
-	movq	%r9, %rbp
-	imulq	%rax, %rbp
-	movq	32(%rcx), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, %r10
-	movq	%rdx, %r13
-	movq	24(%rcx), %rdx
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, %r14
-	movq	%rdx, %r11
-	movq	16(%rcx), %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -32(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rdx, %r15
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	%rcx
-	movq	%rdx, %r8
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rdx, %rcx
-	addq	%rbx, %rcx
-	adcq	%r12, %r8
-	adcq	%r14, %r15
-	adcq	%r10, %r11
-	adcq	$0, %r13
-	addq	%r9, %rax
-	movq	72(%rsi), %rax
-	movq	64(%rsi), %rdx
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r8
-	adcq	24(%rsi), %r15
-	adcq	32(%rsi), %r11
-	adcq	40(%rsi), %r13
-	movq	%r13, -88(%rsp)         # 8-byte Spill
-	movq	56(%rsi), %rdi
-	movq	48(%rsi), %rsi
-	adcq	$0, %rsi
-	movq	%rsi, -96(%rsp)         # 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -72(%rsp)         # 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	movq	%rcx, %rsi
-	movq	-64(%rsp), %r9          # 8-byte Reload
-	imulq	%r9, %rsi
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	addq	%rbp, %rsi
-	adcq	%rbx, %r13
-	adcq	-112(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r14        # 8-byte Folded Reload
-	adcq	$0, %r10
-	addq	%rcx, %rax
-	adcq	%r8, %rsi
-	adcq	%r15, %r13
-	adcq	%r11, %r12
-	adcq	-88(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-96(%rsp), %r10         # 8-byte Folded Reload
-	adcq	$0, -72(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -56(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -40(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rdi
-	movq	%rsi, %rcx
-	imulq	%r9, %rcx
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	addq	%r8, %rbp
-	adcq	-104(%rsp), %rbx        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-88(%rsp), %r11         # 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	%rsi, %rax
-	adcq	%r13, %rbp
-	adcq	%r12, %rbx
-	adcq	%r14, %r15
-	adcq	%r10, %r11
-	adcq	-72(%rsp), %r9          # 8-byte Folded Reload
-	adcq	$0, -56(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -40(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rdi
-	movq	%rbp, %rcx
-	imulq	-64(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, %rax
-	movq	-48(%rsp), %rsi         # 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %r10
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	addq	%r8, %rcx
-	adcq	%r10, %r13
-	adcq	-96(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-88(%rsp), %r14         # 8-byte Folded Reload
-	movq	-72(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%rbp, %rax
-	adcq	%rbx, %rcx
-	adcq	%r15, %r13
-	adcq	%r11, %r12
-	adcq	%r9, %r14
-	adcq	-56(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	adcq	$0, -40(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rdi
-	movq	-64(%rsp), %rbx         # 8-byte Reload
-	imulq	%rcx, %rbx
-	movq	%rbx, %rax
-	mulq	%rsi
-	movq	%rdx, %rsi
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	movq	%rbx, %r10
-	movq	-32(%rsp), %r11         # 8-byte Reload
-	mulq	%r11
-	movq	%rdx, %rbx
-	movq	%rax, %r8
-	movq	%r10, %rax
-	movq	-24(%rsp), %r10         # 8-byte Reload
-	mulq	%r10
-	addq	%r8, %rdx
-	adcq	%r15, %rbx
-	adcq	-64(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r9          # 8-byte Folded Reload
-	adcq	$0, %rsi
-	addq	%rcx, %rax
-	adcq	%r13, %rdx
-	adcq	%r12, %rbx
-	adcq	%r14, %rbp
-	adcq	-72(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-40(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	$0, %rdi
-	movq	%rdx, %rax
-	subq	%r10, %rax
-	movq	%rbx, %rcx
-	sbbq	%r11, %rcx
-	movq	%rbp, %r8
-	sbbq	-16(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r9, %r10
-	sbbq	-8(%rsp), %r10          # 8-byte Folded Reload
-	movq	%rsi, %r11
-	sbbq	-48(%rsp), %r11         # 8-byte Folded Reload
-	sbbq	$0, %rdi
-	andl	$1, %edi
-	cmovneq	%rsi, %r11
-	testb	%dil, %dil
-	cmovneq	%rdx, %rax
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovneq	%rbx, %rcx
-	movq	%rcx, 8(%rdx)
-	cmovneq	%rbp, %r8
-	movq	%r8, 16(%rdx)
-	cmovneq	%r9, %r10
-	movq	%r10, 24(%rdx)
-	movq	%r11, 32(%rdx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end70:
-	.size	mcl_fp_montRed5L, .Lfunc_end70-mcl_fp_montRed5L
-
-	.globl	mcl_fp_addPre5L
-	.align	16, 0x90
-	.type	mcl_fp_addPre5L,@function
-mcl_fp_addPre5L:                        # @mcl_fp_addPre5L
-# BB#0:
-	movq	32(%rdx), %r8
-	movq	24(%rdx), %r9
-	movq	24(%rsi), %r11
-	movq	32(%rsi), %r10
-	movq	16(%rdx), %rcx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rcx
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	adcq	%r9, %r11
-	movq	%r11, 24(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-.Lfunc_end71:
-	.size	mcl_fp_addPre5L, .Lfunc_end71-mcl_fp_addPre5L
-
-	.globl	mcl_fp_subPre5L
-	.align	16, 0x90
-	.type	mcl_fp_subPre5L,@function
-mcl_fp_subPre5L:                        # @mcl_fp_subPre5L
-# BB#0:
-	pushq	%rbx
-	movq	32(%rsi), %r10
-	movq	24(%rdx), %r8
-	movq	32(%rdx), %r9
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %rcx
-	movq	%rbx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r8, %r11
-	movq	%r11, 24(%rdi)
-	sbbq	%r9, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	retq
-.Lfunc_end72:
-	.size	mcl_fp_subPre5L, .Lfunc_end72-mcl_fp_subPre5L
-
-	.globl	mcl_fp_shr1_5L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_5L,@function
-mcl_fp_shr1_5L:                         # @mcl_fp_shr1_5L
-# BB#0:
-	movq	32(%rsi), %r8
-	movq	24(%rsi), %rcx
-	movq	16(%rsi), %rdx
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rax
-	movq	%rax, (%rdi)
-	shrdq	$1, %rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 16(%rdi)
-	shrdq	$1, %r8, %rcx
-	movq	%rcx, 24(%rdi)
-	shrq	%r8
-	movq	%r8, 32(%rdi)
-	retq
-.Lfunc_end73:
-	.size	mcl_fp_shr1_5L, .Lfunc_end73-mcl_fp_shr1_5L
-
-	.globl	mcl_fp_add5L
-	.align	16, 0x90
-	.type	mcl_fp_add5L,@function
-mcl_fp_add5L:                           # @mcl_fp_add5L
-# BB#0:
-	pushq	%rbx
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %rbx
-	movq	24(%rsi), %r9
-	movq	32(%rsi), %r8
-	movq	16(%rdx), %r10
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r10
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	adcq	%rbx, %r9
-	movq	%r9, 24(%rdi)
-	adcq	%r11, %r8
-	movq	%r8, 32(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r10
-	sbbq	24(%rcx), %r9
-	sbbq	32(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB74_2
-# BB#1:                                 # %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%r9, 24(%rdi)
-	movq	%r8, 32(%rdi)
-.LBB74_2:                               # %carry
-	popq	%rbx
-	retq
-.Lfunc_end74:
-	.size	mcl_fp_add5L, .Lfunc_end74-mcl_fp_add5L
-
-	.globl	mcl_fp_addNF5L
-	.align	16, 0x90
-	.type	mcl_fp_addNF5L,@function
-mcl_fp_addNF5L:                         # @mcl_fp_addNF5L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	32(%rdx), %r8
-	movq	24(%rdx), %r9
-	movq	16(%rdx), %r10
-	movq	(%rdx), %r14
-	movq	8(%rdx), %r11
-	addq	(%rsi), %r14
-	adcq	8(%rsi), %r11
-	adcq	16(%rsi), %r10
-	adcq	24(%rsi), %r9
-	adcq	32(%rsi), %r8
-	movq	%r14, %rsi
-	subq	(%rcx), %rsi
-	movq	%r11, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r10, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r9, %r15
-	sbbq	24(%rcx), %r15
-	movq	%r8, %rax
-	sbbq	32(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r14, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r11, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
-	cmovsq	%r9, %r15
-	movq	%r15, 24(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 32(%rdi)
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end75:
-	.size	mcl_fp_addNF5L, .Lfunc_end75-mcl_fp_addNF5L
-
-	.globl	mcl_fp_sub5L
-	.align	16, 0x90
-	.type	mcl_fp_sub5L,@function
-mcl_fp_sub5L:                           # @mcl_fp_sub5L
-# BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	32(%rsi), %r8
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
-	movq	%rax, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	sbbq	%r11, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 32(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	.LBB76_2
-# BB#1:                                 # %carry
-	movq	32(%rcx), %r11
-	movq	24(%rcx), %r14
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rbx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%rsi, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 24(%rdi)
-	adcq	%r8, %r11
-	movq	%r11, 32(%rdi)
-.LBB76_2:                               # %nocarry
-	popq	%rbx
-	popq	%r14
-	retq
-.Lfunc_end76:
-	.size	mcl_fp_sub5L, .Lfunc_end76-mcl_fp_sub5L
-
-	.globl	mcl_fp_subNF5L
-	.align	16, 0x90
-	.type	mcl_fp_subNF5L,@function
-mcl_fp_subNF5L:                         # @mcl_fp_subNF5L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	32(%rsi), %r14
-	movq	24(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	(%rsi), %r10
-	movq	8(%rsi), %r11
-	subq	(%rdx), %r10
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r9
-	sbbq	24(%rdx), %r8
-	sbbq	32(%rdx), %r14
-	movq	%r14, %rdx
-	sarq	$63, %rdx
-	movq	%rdx, %rsi
-	shldq	$1, %r14, %rsi
-	movq	8(%rcx), %rbx
-	andq	%rsi, %rbx
-	andq	(%rcx), %rsi
-	movq	32(%rcx), %r15
-	andq	%rdx, %r15
-	movq	24(%rcx), %rax
-	andq	%rdx, %rax
-	rolq	%rdx
-	andq	16(%rcx), %rdx
-	addq	%r10, %rsi
-	movq	%rsi, (%rdi)
-	adcq	%r11, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r14, %r15
-	movq	%r15, 32(%rdi)
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end77:
-	.size	mcl_fp_subNF5L, .Lfunc_end77-mcl_fp_subNF5L
-
-	.globl	mcl_fpDbl_add5L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add5L,@function
-mcl_fpDbl_add5L:                        # @mcl_fpDbl_add5L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	72(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	64(%rdx), %r11
-	movq	56(%rdx), %r14
-	movq	48(%rdx), %r15
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %r13
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %rbp
-	adcq	32(%rdx), %r13
-	movq	40(%rdx), %r9
-	movq	%rbx, (%rdi)
-	movq	72(%rsi), %r8
-	movq	%rax, 8(%rdi)
-	movq	64(%rsi), %r10
-	movq	%r12, 16(%rdi)
-	movq	56(%rsi), %r12
-	movq	%rbp, 24(%rdi)
-	movq	48(%rsi), %rbp
-	movq	40(%rsi), %rbx
-	movq	%r13, 32(%rdi)
-	adcq	%r9, %rbx
-	adcq	%r15, %rbp
-	adcq	%r14, %r12
-	adcq	%r11, %r10
-	adcq	-8(%rsp), %r8           # 8-byte Folded Reload
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rbx, %rax
-	subq	(%rcx), %rax
-	movq	%rbp, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r12, %r9
-	sbbq	16(%rcx), %r9
-	movq	%r10, %r11
-	sbbq	24(%rcx), %r11
-	movq	%r8, %r14
-	sbbq	32(%rcx), %r14
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rbx, %rax
-	movq	%rax, 40(%rdi)
-	testb	%sil, %sil
-	cmovneq	%rbp, %rdx
-	movq	%rdx, 48(%rdi)
-	cmovneq	%r12, %r9
-	movq	%r9, 56(%rdi)
-	cmovneq	%r10, %r11
-	movq	%r11, 64(%rdi)
-	cmovneq	%r8, %r14
-	movq	%r14, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end78:
-	.size	mcl_fpDbl_add5L, .Lfunc_end78-mcl_fpDbl_add5L
-
-	.globl	mcl_fpDbl_sub5L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub5L,@function
-mcl_fpDbl_sub5L:                        # @mcl_fpDbl_sub5L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	72(%rdx), %r9
-	movq	64(%rdx), %r10
-	movq	56(%rdx), %r14
-	movq	16(%rsi), %r8
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%eax, %eax
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r8
-	movq	24(%rsi), %r12
-	sbbq	24(%rdx), %r12
-	movq	%r15, (%rdi)
-	movq	32(%rsi), %rbx
-	sbbq	32(%rdx), %rbx
-	movq	%r11, 8(%rdi)
-	movq	48(%rdx), %r15
-	movq	40(%rdx), %rdx
-	movq	%r8, 16(%rdi)
-	movq	72(%rsi), %r8
-	movq	%r12, 24(%rdi)
-	movq	64(%rsi), %r11
-	movq	%rbx, 32(%rdi)
-	movq	40(%rsi), %rbp
-	sbbq	%rdx, %rbp
-	movq	56(%rsi), %r12
-	movq	48(%rsi), %r13
-	sbbq	%r15, %r13
-	sbbq	%r14, %r12
-	sbbq	%r10, %r11
-	sbbq	%r9, %r8
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	movq	16(%rcx), %rdx
-	cmoveq	%rax, %rdx
-	movq	8(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	movq	32(%rcx), %r9
-	cmoveq	%rax, %r9
-	cmovneq	24(%rcx), %rax
-	addq	%rbp, %rsi
-	movq	%rsi, 40(%rdi)
-	adcq	%r13, %rbx
-	movq	%rbx, 48(%rdi)
-	adcq	%r12, %rdx
-	movq	%rdx, 56(%rdi)
-	adcq	%r11, %rax
-	movq	%rax, 64(%rdi)
-	adcq	%r8, %r9
-	movq	%r9, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end79:
-	.size	mcl_fpDbl_sub5L, .Lfunc_end79-mcl_fpDbl_sub5L
-
-	.globl	mcl_fp_mulUnitPre6L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre6L,@function
-mcl_fp_mulUnitPre6L:                    # @mcl_fp_mulUnitPre6L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rcx, %rax
-	mulq	40(%rsi)
-	movq	%rdx, %r9
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	32(%rsi)
-	movq	%rdx, %r10
-	movq	%rax, %r11
-	movq	%rcx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, %r15
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	16(%rsi)
-	movq	%rdx, %r13
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %rbx
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rax, (%rdi)
-	addq	%rbp, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r12, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r14, %r13
-	movq	%r13, 24(%rdi)
-	adcq	%r11, %r15
-	movq	%r15, 32(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 40(%rdi)
-	adcq	$0, %r9
-	movq	%r9, 48(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end80:
-	.size	mcl_fp_mulUnitPre6L, .Lfunc_end80-mcl_fp_mulUnitPre6L
-
-	.globl	mcl_fpDbl_mulPre6L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre6L,@function
-mcl_fpDbl_mulPre6L:                     # @mcl_fpDbl_mulPre6L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rsi, %r8
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	(%r8), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	8(%r8), %r13
-	movq	%r13, -72(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rbx
-	mulq	%rbx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	16(%r8), %rbp
-	movq	%rbp, -64(%rsp)         # 8-byte Spill
-	movq	24(%r8), %rsi
-	movq	%rsi, -48(%rsp)         # 8-byte Spill
-	movq	32(%r8), %r10
-	movq	40(%r8), %r11
-	movq	%rax, (%rdi)
-	movq	%r11, %rax
-	mulq	%rbx
-	movq	%rdx, %rcx
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%r10, %rax
-	mulq	%rbx
-	movq	%rdx, %r12
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	%rbx
-	movq	%rdx, %r9
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rdx, %rbp
-	movq	%rax, %r15
-	movq	%r13, %rax
-	mulq	%rbx
-	movq	%rdx, %r13
-	movq	%rax, %rsi
-	addq	-32(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	%r15, %r13
-	adcq	%r14, %rbp
-	adcq	%rdi, %r9
-	adcq	-40(%rsp), %r12         # 8-byte Folded Reload
-	movq	%r12, %rdi
-	adcq	$0, %rcx
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	movq	-16(%rsp), %r15         # 8-byte Reload
-	movq	8(%r15), %rcx
-	movq	%r11, %rax
-	mulq	%rcx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %r11
-	movq	%r10, %rax
-	mulq	%rcx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %r12
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, %r14
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, %rbx
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	movq	-24(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	addq	%rsi, %rax
-	movq	-8(%rsp), %rcx          # 8-byte Reload
-	movq	%rax, 8(%rcx)
-	adcq	%r13, %r10
-	adcq	%rbp, %rbx
-	adcq	%r9, %r14
-	adcq	%rdi, %r12
-	adcq	-56(%rsp), %r11         # 8-byte Folded Reload
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	addq	%rdx, %r10
-	adcq	-72(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-40(%rsp), %r11         # 8-byte Folded Reload
-	movq	%r11, -96(%rsp)         # 8-byte Spill
-	adcq	-32(%rsp), %rdi         # 8-byte Folded Reload
-	movq	40(%r8), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	16(%r15), %rcx
-	mulq	%rcx
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	32(%r8), %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r15
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	24(%r8), %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r11
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	16(%r8), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %rbp
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	(%r8), %rsi
-	movq	%rsi, -72(%rsp)         # 8-byte Spill
-	movq	8(%r8), %rax
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rdx, %r13
-	movq	%rax, %r9
-	movq	%rsi, %rax
-	mulq	%rcx
-	addq	%r10, %rax
-	movq	-8(%rsp), %r10          # 8-byte Reload
-	movq	%rax, 16(%r10)
-	adcq	%rbx, %r9
-	adcq	%r14, %rbp
-	adcq	%r12, %r11
-	adcq	-96(%rsp), %r15         # 8-byte Folded Reload
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	adcq	%rdi, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%rdx, %r9
-	adcq	%r13, %rbp
-	adcq	-112(%rsp), %r11        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-88(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	adcq	-32(%rsp), %rcx         # 8-byte Folded Reload
-	movq	-16(%rsp), %rdi         # 8-byte Reload
-	movq	24(%rdi), %rbx
-	movq	-24(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, %r14
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, %r12
-	movq	-80(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, %rsi
-	movq	%rax, %r13
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	addq	%r9, %rax
-	movq	%rax, 24(%r10)
-	adcq	%rbp, %r13
-	adcq	%r11, %r12
-	adcq	%r15, %r14
-	movq	-24(%rsp), %rbp         # 8-byte Reload
-	adcq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	-32(%rsp), %rax         # 8-byte Reload
-	adcq	%rcx, %rax
-	sbbq	%r10, %r10
-	andl	$1, %r10d
-	addq	%rdx, %r13
-	adcq	%rsi, %r12
-	adcq	-64(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, -24(%rsp)         # 8-byte Spill
-	adcq	-48(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	adcq	-88(%rsp), %r10         # 8-byte Folded Reload
-	movq	40(%r8), %rax
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	32(%rdi), %rcx
-	movq	32(%r8), %rbx
-	movq	%rbx, -112(%rsp)        # 8-byte Spill
-	movq	24(%r8), %rsi
-	movq	%rsi, -64(%rsp)         # 8-byte Spill
-	movq	16(%r8), %rdi
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	movq	(%r8), %r15
-	movq	8(%r8), %r9
-	mulq	%rcx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %r11
-	movq	%rbx, %rax
-	mulq	%rcx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, %r8
-	movq	%rsi, %rax
-	mulq	%rcx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, %rdi
-	movq	%r9, %rax
-	mulq	%rcx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%r15, %rax
-	mulq	%rcx
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	addq	%r13, %rax
-	movq	-8(%rsp), %r13          # 8-byte Reload
-	movq	%rax, 32(%r13)
-	adcq	%r12, %rbp
-	adcq	%r14, %rdi
-	adcq	-24(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-32(%rsp), %r8          # 8-byte Folded Reload
-	adcq	%r10, %r11
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	movq	40(%rax), %rcx
-	sbbq	%rsi, %rsi
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	-112(%rsp), %rax        # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %r14
-	movq	%r9, %rax
-	mulq	%rcx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	movq	%r15, %rax
-	mulq	%rcx
-	movq	%rdx, %r12
-	movq	%rax, %r9
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	-104(%rsp), %rax        # 8-byte Reload
-	mulq	%rcx
-	andl	$1, %esi
-	addq	-96(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-88(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-48(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rsi         # 8-byte Folded Reload
-	addq	%r9, %rbp
-	movq	%rbp, 40(%r13)
-	adcq	%r10, %rdi
-	adcq	%rax, %rbx
-	adcq	%r15, %r8
-	adcq	%r14, %r11
-	adcq	-72(%rsp), %rsi         # 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r12, %rdi
-	movq	%rdi, 48(%r13)
-	adcq	-32(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rbx, 56(%r13)
-	adcq	%rdx, %r8
-	movq	%r8, 64(%r13)
-	adcq	-64(%rsp), %r11         # 8-byte Folded Reload
-	movq	%r11, 72(%r13)
-	adcq	-24(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 80(%r13)
-	adcq	-16(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, 88(%r13)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end81:
-	.size	mcl_fpDbl_mulPre6L, .Lfunc_end81-mcl_fpDbl_mulPre6L
-
-	.globl	mcl_fpDbl_sqrPre6L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre6L,@function
-mcl_fpDbl_sqrPre6L:                     # @mcl_fpDbl_sqrPre6L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	16(%rsi), %r8
-	movq	%r8, -56(%rsp)          # 8-byte Spill
-	movq	24(%rsi), %r10
-	movq	%r10, -40(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %r9
-	movq	%r9, -32(%rsp)          # 8-byte Spill
-	movq	40(%rsi), %r11
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rbx
-	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rdx, %rbp
-	movq	%rax, (%rdi)
-	movq	%r11, %rax
-	mulq	%rcx
-	movq	%rdx, %rdi
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%r9, %rax
-	mulq	%rcx
-	movq	%rdx, %r14
-	movq	%rax, %r9
-	movq	%r10, %rax
-	mulq	%rcx
-	movq	%rdx, %r12
-	movq	%rax, %r10
-	movq	%r8, %rax
-	mulq	%rcx
-	movq	%rdx, %r13
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	%rcx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %r8
-	addq	%r8, %rbp
-	adcq	%rdx, %r15
-	adcq	%r10, %r13
-	adcq	%r9, %r12
-	adcq	-16(%rsp), %r14         # 8-byte Folded Reload
-	adcq	$0, %rdi
-	movq	%rdi, -48(%rsp)         # 8-byte Spill
-	movq	%r11, %rax
-	mulq	%rbx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rax, %rcx
-	movq	-32(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %r9
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, %rdi
-	movq	%rax, %r11
-	movq	%rbx, %rax
-	mulq	%rbx
-	movq	%rax, %rbx
-	addq	%r8, %rbp
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	%rbp, 8(%rax)
-	adcq	%r15, %rbx
-	adcq	%r13, %r11
-	adcq	%r12, %r10
-	adcq	%r14, %r9
-	movq	%rcx, %rax
-	adcq	-48(%rsp), %rax         # 8-byte Folded Reload
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	-24(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	%rdx, %r11
-	adcq	%rdi, %r10
-	adcq	-40(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-32(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	adcq	-16(%rsp), %rcx         # 8-byte Folded Reload
-	movq	40(%rsi), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rdi
-	mulq	%rdi
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r12
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %rbp
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rax, %r14
-	movq	%r14, -96(%rsp)         # 8-byte Spill
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r15
-	movq	%r15, -48(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, %r8
-	movq	%r15, %rax
-	mulq	%rdi
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	%rdi
-	movq	%rax, %r13
-	addq	%rbx, %r15
-	movq	-8(%rsp), %rbx          # 8-byte Reload
-	movq	%r15, 16(%rbx)
-	adcq	%r11, %r8
-	adcq	%r10, %r13
-	adcq	%r14, %r9
-	adcq	-72(%rsp), %r12         # 8-byte Folded Reload
-	movq	-80(%rsp), %r14         # 8-byte Reload
-	adcq	%rcx, %r14
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	-104(%rsp), %r8         # 8-byte Folded Reload
-	adcq	-88(%rsp), %r13         # 8-byte Folded Reload
-	adcq	%rdx, %r9
-	adcq	-24(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	-32(%rsp), %rax         # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, %r11
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, %rdi
-	movq	%rbp, %rax
-	mulq	%rbp
-	movq	%rax, %r15
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	addq	%r8, %rdi
-	movq	%rdi, 24(%rbx)
-	adcq	%r13, %r11
-	adcq	-96(%rsp), %r9          # 8-byte Folded Reload
-	adcq	%r12, %r15
-	adcq	%r14, %r10
-	movq	-16(%rsp), %r12         # 8-byte Reload
-	adcq	%rcx, %r12
-	sbbq	%rcx, %rcx
-	movq	(%rsi), %r8
-	andl	$1, %ecx
-	movq	8(%rsi), %rbx
-	movq	40(%rsi), %rdi
-	movq	%rbx, %rax
-	mulq	%rdi
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	%r8, %rax
-	mulq	%rdi
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %rbp
-	movq	%rbx, %rax
-	mulq	%rbp
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%r8, %rax
-	mulq	%rbp
-	movq	%rax, %r14
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	addq	-88(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-24(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-104(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-72(%rsp), %r12         # 8-byte Folded Reload
-	movq	%r12, -16(%rsp)         # 8-byte Spill
-	adcq	-56(%rsp), %rcx         # 8-byte Folded Reload
-	movq	24(%rsi), %rbx
-	movq	16(%rsi), %r8
-	movq	%rbx, %rax
-	mulq	%rdi
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rbp
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, %rsi
-	movq	%r8, %rax
-	mulq	%rdi
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%r8, %rax
-	mulq	%rbp
-	movq	%rdx, %rbx
-	movq	%rax, %r13
-	movq	%rdi, %rax
-	mulq	%rbp
-	movq	%rdx, %r12
-	movq	%rax, %r8
-	movq	%rdi, %rax
-	mulq	%rdi
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, %rdi
-	movq	%rbp, %rax
-	mulq	%rbp
-	addq	%r14, %r11
-	movq	-8(%rsp), %r14          # 8-byte Reload
-	movq	%r11, 32(%r14)
-	adcq	-120(%rsp), %r9         # 8-byte Folded Reload
-	adcq	%r15, %r13
-	adcq	%r10, %rsi
-	adcq	-16(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%r8, %rcx
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	addq	-112(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-96(%rsp), %r13         # 8-byte Folded Reload
-	adcq	%rbx, %rsi
-	adcq	-104(%rsp), %rax        # 8-byte Folded Reload
-	adcq	%rdx, %rcx
-	adcq	%r12, %rbp
-	addq	-64(%rsp), %r9          # 8-byte Folded Reload
-	movq	%r14, %rbx
-	movq	%r9, 40(%rbx)
-	adcq	-48(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-88(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%r8, %rcx
-	adcq	%rdi, %rbp
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	addq	-40(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r13, 48(%rbx)
-	adcq	-32(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 56(%rbx)
-	adcq	-72(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, 64(%rbx)
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 72(%rbx)
-	adcq	%r12, %rbp
-	movq	%rbp, 80(%rbx)
-	adcq	-56(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, 88(%rbx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end82:
-	.size	mcl_fpDbl_sqrPre6L, .Lfunc_end82-mcl_fpDbl_sqrPre6L
-
-	.globl	mcl_fp_mont6L
-	.align	16, 0x90
-	.type	mcl_fp_mont6L,@function
-mcl_fp_mont6L:                          # @mcl_fp_mont6L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$56, %rsp
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rdi, -96(%rsp)         # 8-byte Spill
-	movq	40(%rsi), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rbx
-	mulq	%rbx
-	movq	%rax, %r8
-	movq	%rdx, %r14
-	movq	32(%rsi), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	mulq	%rbx
-	movq	%rax, %r9
-	movq	%rdx, %r15
-	movq	24(%rsi), %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rbp
-	movq	%rbp, -48(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r12
-	movq	%r12, -32(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rsi
-	movq	%rsi, -40(%rsp)         # 8-byte Spill
-	mulq	%rbx
-	movq	%rdx, %rdi
-	movq	%rax, %r10
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rdx, %rbp
-	movq	%rax, %r11
-	movq	%rsi, %rax
-	mulq	%rbx
-	movq	%rdx, %rsi
-	movq	%rax, %r13
-	movq	%r12, %rax
-	mulq	%rbx
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	addq	%r13, %rdx
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	adcq	%r11, %rsi
-	movq	%rsi, -104(%rsp)        # 8-byte Spill
-	adcq	%r10, %rbp
-	movq	%rbp, -88(%rsp)         # 8-byte Spill
-	adcq	%r9, %rdi
-	movq	%rdi, -80(%rsp)         # 8-byte Spill
-	adcq	%r8, %r15
-	movq	%r15, -72(%rsp)         # 8-byte Spill
-	adcq	$0, %r14
-	movq	%r14, -64(%rsp)         # 8-byte Spill
-	movq	-8(%rcx), %rdx
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	movq	%rax, %rdi
-	imulq	%rdx, %rdi
-	movq	(%rcx), %r9
-	movq	%r9, 8(%rsp)            # 8-byte Spill
-	movq	40(%rcx), %rdx
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	movq	32(%rcx), %rbp
-	movq	%rbp, 32(%rsp)          # 8-byte Spill
-	movq	24(%rcx), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rsi
-	movq	%rsi, 16(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, 24(%rsp)          # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rdx, %r11
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rbp
-	movq	%rdx, %r13
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	%rbx
-	movq	%rdx, %rbp
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	%rsi
-	movq	%rdx, %rbx
-	movq	%rax, %r12
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rdx, %r8
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	%r9
-	movq	%rdx, %r9
-	addq	%r15, %r9
-	adcq	%r12, %r8
-	adcq	%r14, %rbx
-	adcq	%r10, %rbp
-	adcq	-128(%rsp), %r13        # 8-byte Folded Reload
-	adcq	$0, %r11
-	addq	-120(%rsp), %rax        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-104(%rsp), %r8         # 8-byte Folded Reload
-	adcq	-88(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-72(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r11         # 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %rcx
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r12
-	movq	%rdx, %rdi
-	addq	%r10, %rdi
-	adcq	%rcx, %rsi
-	adcq	-112(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r14        # 8-byte Folded Reload
-	movq	-72(%rsp), %rcx         # 8-byte Reload
-	adcq	-88(%rsp), %rcx         # 8-byte Folded Reload
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%r9, %r12
-	adcq	%r8, %rdi
-	adcq	%rbx, %rsi
-	adcq	%rbp, %r15
-	adcq	%r13, %r14
-	adcq	%r11, %rcx
-	movq	%rcx, -72(%rsp)         # 8-byte Spill
-	adcq	-80(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%r12, %rbx
-	imulq	(%rsp), %rbx            # 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	24(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r11
-	movq	%rbx, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, %r9
-	addq	%r11, %r9
-	adcq	%r13, %rbp
-	adcq	-120(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r8         # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r12, %rax
-	adcq	%rdi, %r9
-	adcq	%rsi, %rbp
-	adcq	%r15, %rcx
-	adcq	%r14, %r10
-	adcq	-72(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-64(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	adcq	$0, -88(%rsp)           # 8-byte Folded Spill
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %r11
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r13
-	movq	%rdx, %rdi
-	addq	%r15, %rdi
-	adcq	%r11, %rsi
-	adcq	%rbx, %r12
-	adcq	-112(%rsp), %r14        # 8-byte Folded Reload
-	movq	-72(%rsp), %rdx         # 8-byte Reload
-	adcq	-104(%rsp), %rdx        # 8-byte Folded Reload
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%r9, %r13
-	adcq	%rbp, %rdi
-	adcq	%rcx, %rsi
-	adcq	%r10, %r12
-	adcq	%r8, %r14
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	adcq	-88(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%r13, %rbp
-	imulq	(%rsp), %rbp            # 8-byte Folded Reload
-	movq	%rbp, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r10
-	movq	%rbp, %rax
-	mulq	24(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r8
-	movq	%rbp, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, %r9
-	addq	%r8, %r9
-	adcq	%r10, %rcx
-	adcq	-120(%rsp), %rbx        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r11        # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r13, %rax
-	adcq	%rdi, %r9
-	adcq	%rsi, %rcx
-	adcq	%r12, %rbx
-	adcq	%r14, %r15
-	adcq	-72(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-64(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	-88(%rsp), %rbp         # 8-byte Reload
-	adcq	$0, %rbp
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %r8
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r13
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r12
-	movq	%rdx, %rdi
-	addq	%r13, %rdi
-	adcq	%r8, %rsi
-	adcq	-112(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r14        # 8-byte Folded Reload
-	movq	-72(%rsp), %rdx         # 8-byte Reload
-	adcq	-88(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%r9, %r12
-	adcq	%rcx, %rdi
-	adcq	%rbx, %rsi
-	adcq	%r15, %r10
-	adcq	%r11, %r14
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	adcq	%rbp, %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%r12, %rbp
-	imulq	(%rsp), %rbp            # 8-byte Folded Reload
-	movq	%rbp, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r13
-	movq	%rbp, %rax
-	mulq	24(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r9
-	movq	%rbp, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, %r8
-	addq	%r9, %r8
-	adcq	%r13, %rcx
-	adcq	-120(%rsp), %rbx        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r11        # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r12, %rax
-	adcq	%rdi, %r8
-	adcq	%rsi, %rcx
-	adcq	%r10, %rbx
-	adcq	%r14, %r15
-	adcq	-72(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-64(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	adcq	$0, -88(%rsp)           # 8-byte Folded Spill
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %r10
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r9
-	movq	%rdx, %r13
-	addq	%r10, %r13
-	adcq	%r12, %r14
-	adcq	-120(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	-112(%rsp), %rbp        # 8-byte Folded Reload
-	movq	-72(%rsp), %rdx         # 8-byte Reload
-	adcq	-104(%rsp), %rdx        # 8-byte Folded Reload
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%r8, %r9
-	adcq	%rcx, %r13
-	adcq	%rbx, %r14
-	adcq	%r15, %rdi
-	adcq	%r11, %rbp
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	adcq	-88(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%r9, %rsi
-	imulq	(%rsp), %rsi            # 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	24(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rcx
-	movq	%rsi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, %r8
-	addq	%rcx, %r8
-	adcq	%rbx, %r12
-	adcq	-120(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r11        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r10        # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r9, %rax
-	adcq	%r13, %r8
-	adcq	%r14, %r12
-	adcq	%rdi, %r15
-	adcq	%rbp, %r11
-	adcq	-72(%rsp), %r10         # 8-byte Folded Reload
-	movq	%r10, -72(%rsp)         # 8-byte Spill
-	adcq	-64(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	-88(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %rsi
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r13
-	movq	%rdx, %rbp
-	addq	%rdi, %rbp
-	adcq	%rsi, %r14
-	adcq	%r9, %r10
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	adcq	-24(%rsp), %rax         # 8-byte Folded Reload
-	movq	-16(%rsp), %rdx         # 8-byte Reload
-	adcq	-88(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-64(%rsp), %rsi         # 8-byte Reload
-	adcq	$0, %rsi
-	addq	%r8, %r13
-	movq	%r13, -40(%rsp)         # 8-byte Spill
-	adcq	%r12, %rbp
-	adcq	%r15, %r14
-	movq	%r14, -24(%rsp)         # 8-byte Spill
-	adcq	%r11, %r10
-	movq	%r10, -32(%rsp)         # 8-byte Spill
-	adcq	-72(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	adcq	%rbx, %rsi
-	movq	%rsi, -64(%rsp)         # 8-byte Spill
-	sbbq	%rcx, %rcx
-	movq	(%rsp), %r9             # 8-byte Reload
-	imulq	%r13, %r9
-	andl	$1, %ecx
-	movq	%r9, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, (%rsp)            # 8-byte Spill
-	movq	%r9, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	%r9, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%r9, %rax
-	movq	8(%rsp), %r13           # 8-byte Reload
-	mulq	%r13
-	movq	%rdx, %r15
-	movq	%rax, %r12
-	movq	%r9, %rax
-	movq	16(%rsp), %r14          # 8-byte Reload
-	mulq	%r14
-	movq	%rdx, %rsi
-	movq	%rax, %r11
-	movq	%r9, %rax
-	movq	24(%rsp), %r10          # 8-byte Reload
-	mulq	%r10
-	addq	%r15, %rax
-	adcq	%r11, %rdx
-	adcq	-56(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	(%rsp), %rbx            # 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	-40(%rsp), %r12         # 8-byte Folded Reload
-	adcq	%rbp, %rax
-	adcq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	adcq	-32(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-8(%rsp), %rdi          # 8-byte Folded Reload
-	adcq	-16(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r8          # 8-byte Folded Reload
-	adcq	$0, %rcx
-	movq	%rax, %rbp
-	subq	%r13, %rbp
-	movq	%rdx, %r9
-	sbbq	%r10, %r9
-	movq	%rsi, %r10
-	sbbq	%r14, %r10
-	movq	%rdi, %r11
-	sbbq	40(%rsp), %r11          # 8-byte Folded Reload
-	movq	%rbx, %r14
-	sbbq	32(%rsp), %r14          # 8-byte Folded Reload
-	movq	%r8, %r15
-	sbbq	48(%rsp), %r15          # 8-byte Folded Reload
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rdi, %r11
-	testb	%cl, %cl
-	cmovneq	%rax, %rbp
-	movq	-96(%rsp), %rax         # 8-byte Reload
-	movq	%rbp, (%rax)
-	cmovneq	%rdx, %r9
-	movq	%r9, 8(%rax)
-	cmovneq	%rsi, %r10
-	movq	%r10, 16(%rax)
-	movq	%r11, 24(%rax)
-	cmovneq	%rbx, %r14
-	movq	%r14, 32(%rax)
-	cmovneq	%r8, %r15
-	movq	%r15, 40(%rax)
-	addq	$56, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end83:
-	.size	mcl_fp_mont6L, .Lfunc_end83-mcl_fp_mont6L
-
-	.globl	mcl_fp_montNF6L
-	.align	16, 0x90
-	.type	mcl_fp_montNF6L,@function
-mcl_fp_montNF6L:                        # @mcl_fp_montNF6L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$40, %rsp
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rdi, -88(%rsp)         # 8-byte Spill
-	movq	40(%rsi), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rbx
-	mulq	%rbx
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	movq	%rdx, %r13
-	movq	32(%rsi), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	mulq	%rbx
-	movq	%rax, %r10
-	movq	%rdx, %r9
-	movq	24(%rsi), %rax
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rbp
-	movq	%rbp, -64(%rsp)         # 8-byte Spill
-	movq	(%rsi), %rdi
-	movq	%rdi, -48(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rsi
-	movq	%rsi, -56(%rsp)         # 8-byte Spill
-	mulq	%rbx
-	movq	%rdx, %r11
-	movq	%rax, %r8
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rdx, %r14
-	movq	%rax, %r15
-	movq	%rsi, %rax
-	mulq	%rbx
-	movq	%rdx, %r12
-	movq	%rax, %rbp
-	movq	%rdi, %rax
-	mulq	%rbx
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rdx, %rbx
-	addq	%rbp, %rbx
-	adcq	%r15, %r12
-	adcq	%r8, %r14
-	adcq	%r10, %r11
-	adcq	32(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, -96(%rsp)          # 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, -80(%rsp)         # 8-byte Spill
-	movq	-8(%rcx), %rdx
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	movq	%rax, %r9
-	imulq	%rdx, %r9
-	movq	(%rcx), %r8
-	movq	%r8, 8(%rsp)            # 8-byte Spill
-	movq	40(%rcx), %rdx
-	movq	%rdx, 32(%rsp)          # 8-byte Spill
-	movq	32(%rcx), %rsi
-	movq	%rsi, 24(%rsp)          # 8-byte Spill
-	movq	24(%rcx), %rbp
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rdi
-	movq	%rdi, -40(%rsp)         # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -32(%rsp)         # 8-byte Spill
-	movq	%r9, %rax
-	mulq	%rdx
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r10
-	movq	%r9, %rax
-	mulq	%rsi
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r15
-	movq	%r9, %rax
-	mulq	%rbp
-	movq	%rdx, -128(%rsp)        # 8-byte Spill
-	movq	%rax, %rsi
-	movq	%r9, %rax
-	mulq	%rdi
-	movq	%rdx, %r13
-	movq	%rax, %rdi
-	movq	%r9, %rax
-	mulq	%rcx
-	movq	%rdx, %rcx
-	movq	%rax, %rbp
-	movq	%r9, %rax
-	mulq	%r8
-	addq	-104(%rsp), %rax        # 8-byte Folded Reload
-	adcq	%rbx, %rbp
-	adcq	%r12, %rdi
-	adcq	%r14, %rsi
-	adcq	%r11, %r15
-	adcq	-96(%rsp), %r10         # 8-byte Folded Reload
-	movq	-80(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdx, %rbp
-	adcq	%rcx, %rdi
-	adcq	%r13, %rsi
-	adcq	-128(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-112(%rsp), %rax        # 8-byte Folded Reload
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r13
-	movq	%rdx, %rcx
-	addq	%r8, %rcx
-	adcq	-120(%rsp), %rbx        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r11        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r9          # 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	%rbp, %r13
-	adcq	%rdi, %rcx
-	adcq	%rsi, %rbx
-	adcq	%r15, %r12
-	adcq	%r10, %r11
-	adcq	-80(%rsp), %r9          # 8-byte Folded Reload
-	adcq	$0, %r14
-	movq	%r13, %rsi
-	imulq	(%rsp), %rsi            # 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	24(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, %r8
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r10
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r15
-	movq	%rsi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	addq	%r13, %rax
-	adcq	%rcx, %r15
-	adcq	%rbx, %r10
-	adcq	%r12, %r8
-	adcq	%r11, %rbp
-	adcq	%r9, %rdi
-	adcq	$0, %r14
-	addq	%rdx, %r15
-	adcq	-120(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r8         # 8-byte Folded Reload
-	adcq	-104(%rsp), %rbp        # 8-byte Folded Reload
-	adcq	-80(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, -80(%rsp)         # 8-byte Spill
-	adcq	-96(%rsp), %r14         # 8-byte Folded Reload
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r9
-	movq	%rdx, %rsi
-	addq	%rdi, %rsi
-	adcq	-120(%rsp), %rbx        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-104(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r11         # 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%r15, %r9
-	adcq	%r10, %rsi
-	adcq	%r8, %rbx
-	adcq	%rbp, %r12
-	adcq	-80(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r14, %r11
-	adcq	$0, %r13
-	movq	%r9, %r8
-	imulq	(%rsp), %r8             # 8-byte Folded Reload
-	movq	%r8, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%r8, %rax
-	mulq	24(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r15
-	movq	%r8, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	movq	%r8, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r14
-	movq	%r8, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %rdi
-	movq	%r8, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	addq	%r9, %rax
-	adcq	%rsi, %rdi
-	adcq	%rbx, %r14
-	adcq	%r12, %r10
-	adcq	%rcx, %r15
-	movq	-80(%rsp), %rax         # 8-byte Reload
-	adcq	%r11, %rax
-	adcq	$0, %r13
-	addq	%rdx, %rdi
-	adcq	%rbp, %r14
-	adcq	-120(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r15         # 8-byte Folded Reload
-	movq	%r15, -96(%rsp)         # 8-byte Spill
-	adcq	-112(%rsp), %rax        # 8-byte Folded Reload
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	adcq	-104(%rsp), %r13        # 8-byte Folded Reload
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rbp
-	movq	%rbp, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r9
-	movq	%rbp, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r8
-	movq	%rdx, %rbp
-	addq	%r9, %rbp
-	adcq	%r12, %rbx
-	adcq	-120(%rsp), %rsi        # 8-byte Folded Reload
-	adcq	-112(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r11        # 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	%rdi, %r8
-	adcq	%r14, %rbp
-	adcq	%r10, %rbx
-	adcq	-96(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	%r13, %r11
-	adcq	$0, %r15
-	movq	%r8, %r14
-	imulq	(%rsp), %r14            # 8-byte Folded Reload
-	movq	%r14, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, %r9
-	movq	%r14, %rax
-	mulq	24(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, %r13
-	movq	%r14, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	movq	%r14, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r12
-	movq	%r14, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %rdi
-	movq	%r14, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	addq	%r8, %rax
-	adcq	%rbp, %rdi
-	adcq	%rbx, %r12
-	adcq	%rsi, %r10
-	adcq	%rcx, %r13
-	adcq	%r11, %r9
-	adcq	$0, %r15
-	addq	%rdx, %rdi
-	adcq	-120(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r13, -96(%rsp)         # 8-byte Spill
-	adcq	-80(%rsp), %r9          # 8-byte Folded Reload
-	movq	%r9, -80(%rsp)          # 8-byte Spill
-	adcq	-104(%rsp), %r15        # 8-byte Folded Reload
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r13
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r11
-	movq	%rdx, %rbp
-	addq	%r13, %rbp
-	adcq	-128(%rsp), %rbx        # 8-byte Folded Reload
-	adcq	-120(%rsp), %rsi        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r8         # 8-byte Folded Reload
-	adcq	-104(%rsp), %r9         # 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	%rdi, %r11
-	adcq	%r12, %rbp
-	adcq	%r10, %rbx
-	adcq	-96(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r8          # 8-byte Folded Reload
-	adcq	%r15, %r9
-	adcq	$0, %r14
-	movq	%r11, %rcx
-	imulq	(%rsp), %rcx            # 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	24(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, %r15
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	addq	%r11, %rax
-	adcq	%rbp, %rdi
-	adcq	%rbx, %r15
-	adcq	%rsi, %r10
-	adcq	%r8, %r12
-	movq	-80(%rsp), %rcx         # 8-byte Reload
-	adcq	%r9, %rcx
-	adcq	$0, %r14
-	addq	%rdx, %rdi
-	adcq	%r13, %r15
-	adcq	-104(%rsp), %r10        # 8-byte Folded Reload
-	movq	%r10, -104(%rsp)        # 8-byte Spill
-	adcq	-96(%rsp), %r12         # 8-byte Folded Reload
-	movq	%r12, -96(%rsp)         # 8-byte Spill
-	adcq	-120(%rsp), %rcx        # 8-byte Folded Reload
-	movq	%rcx, -80(%rsp)         # 8-byte Spill
-	adcq	-112(%rsp), %r14        # 8-byte Folded Reload
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-72(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %rsi
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r11
-	movq	%rdx, %r8
-	addq	%rsi, %r8
-	adcq	%rbp, %r10
-	adcq	-24(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-16(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-8(%rsp), %r9           # 8-byte Folded Reload
-	adcq	$0, %rbx
-	addq	%rdi, %r11
-	adcq	%r15, %r8
-	adcq	-104(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r12         # 8-byte Folded Reload
-	adcq	%r14, %r9
-	movq	%r9, -16(%rsp)          # 8-byte Spill
-	adcq	$0, %rbx
-	movq	(%rsp), %r9             # 8-byte Reload
-	imulq	%r11, %r9
-	movq	%r9, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	movq	%rax, %rsi
-	movq	%r9, %rax
-	mulq	24(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rax, %rdi
-	movq	%r9, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%r9, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, %r14
-	movq	%r9, %rax
-	movq	-40(%rsp), %r15         # 8-byte Reload
-	mulq	%r15
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, %rcx
-	movq	%r9, %rax
-	movq	-32(%rsp), %r9          # 8-byte Reload
-	mulq	%r9
-	addq	%r11, %r14
-	adcq	%r8, %rax
-	adcq	%r10, %rcx
-	adcq	%r13, %rbp
-	adcq	%r12, %rdi
-	adcq	-16(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	$0, %rbx
-	addq	-48(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%rdx, %rcx
-	adcq	-56(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-24(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	-8(%rsp), %rsi          # 8-byte Folded Reload
-	adcq	(%rsp), %rbx            # 8-byte Folded Reload
-	movq	%rax, %r14
-	subq	8(%rsp), %r14           # 8-byte Folded Reload
-	movq	%rcx, %r8
-	sbbq	%r9, %r8
-	movq	%rbp, %r9
-	sbbq	%r15, %r9
-	movq	%rdi, %r10
-	sbbq	16(%rsp), %r10          # 8-byte Folded Reload
-	movq	%rsi, %r11
-	sbbq	24(%rsp), %r11          # 8-byte Folded Reload
-	movq	%rbx, %r15
-	sbbq	32(%rsp), %r15          # 8-byte Folded Reload
-	movq	%r15, %rdx
-	sarq	$63, %rdx
-	cmovsq	%rax, %r14
-	movq	-88(%rsp), %rax         # 8-byte Reload
-	movq	%r14, (%rax)
-	cmovsq	%rcx, %r8
-	movq	%r8, 8(%rax)
-	cmovsq	%rbp, %r9
-	movq	%r9, 16(%rax)
-	cmovsq	%rdi, %r10
-	movq	%r10, 24(%rax)
-	cmovsq	%rsi, %r11
-	movq	%r11, 32(%rax)
-	cmovsq	%rbx, %r15
-	movq	%r15, 40(%rax)
-	addq	$40, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end84:
-	.size	mcl_fp_montNF6L, .Lfunc_end84-mcl_fp_montNF6L
-
-	.globl	mcl_fp_montRed6L
-	.align	16, 0x90
-	.type	mcl_fp_montRed6L,@function
-mcl_fp_montRed6L:                       # @mcl_fp_montRed6L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$16, %rsp
-	movq	%rdx, %rcx
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	(%rcx), %r11
-	movq	%r11, -24(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r9
-	movq	%r9, %rbp
-	imulq	%rax, %rbp
-	movq	40(%rcx), %rdx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, %r12
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	32(%rcx), %rdx
-	movq	%rdx, 8(%rsp)           # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, %r15
-	movq	%rdx, %r8
-	movq	24(%rcx), %rdx
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	movq	16(%rcx), %rdi
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rdx, %r10
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rdx, %r13
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	%rcx
-	movq	%rdx, %rcx
-	movq	%rax, %rdi
-	movq	%rbp, %rax
-	mulq	%r11
-	movq	%rdx, %rbp
-	addq	%rdi, %rbp
-	adcq	%rbx, %rcx
-	adcq	%r14, %r13
-	adcq	%r15, %r10
-	adcq	%r12, %r8
-	movq	-72(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r9, %rax
-	adcq	8(%rsi), %rbp
-	adcq	16(%rsi), %rcx
-	adcq	24(%rsi), %r13
-	adcq	32(%rsi), %r10
-	adcq	40(%rsi), %r8
-	movq	%r8, -112(%rsp)         # 8-byte Spill
-	adcq	48(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	88(%rsi), %rax
-	movq	80(%rsi), %rdx
-	movq	72(%rsi), %rdi
-	movq	64(%rsi), %rbx
-	movq	56(%rsi), %r15
-	adcq	$0, %r15
-	adcq	$0, %rbx
-	movq	%rbx, -96(%rsp)         # 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -64(%rsp)         # 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	movq	%rbp, %rdi
-	imulq	-32(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %r8
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r11
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	addq	%r11, %rdi
-	adcq	%r9, %rsi
-	adcq	%r8, %rbx
-	adcq	-128(%rsp), %r14        # 8-byte Folded Reload
-	movq	-88(%rsp), %r8          # 8-byte Reload
-	adcq	-120(%rsp), %r8         # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%rbp, %rax
-	adcq	%rcx, %rdi
-	adcq	%r13, %rsi
-	adcq	%r10, %rbx
-	adcq	-112(%rsp), %r14        # 8-byte Folded Reload
-	adcq	-72(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, -88(%rsp)          # 8-byte Spill
-	adcq	%r15, %rdx
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	adcq	$0, -96(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -64(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -56(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -48(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rdi, %rcx
-	imulq	-32(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r10
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	addq	%r10, %r9
-	adcq	%r8, %rbp
-	adcq	-128(%rsp), %r13        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r11        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r15        # 8-byte Folded Reload
-	movq	-72(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	addq	%rdi, %rax
-	adcq	%rsi, %r9
-	adcq	%rbx, %rbp
-	adcq	%r14, %r13
-	adcq	-88(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-96(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -72(%rsp)         # 8-byte Spill
-	adcq	$0, -64(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -56(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -48(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%r9, %rsi
-	imulq	-32(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r10
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	addq	%rbx, %rdi
-	adcq	%r10, %rcx
-	adcq	-120(%rsp), %r8         # 8-byte Folded Reload
-	adcq	-112(%rsp), %r14        # 8-byte Folded Reload
-	movq	-88(%rsp), %rsi         # 8-byte Reload
-	adcq	-96(%rsp), %rsi         # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r9, %rax
-	adcq	%rbp, %rdi
-	adcq	%r13, %rcx
-	adcq	%r11, %r8
-	adcq	%r15, %r14
-	adcq	-72(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -88(%rsp)         # 8-byte Spill
-	adcq	-64(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	adcq	$0, -56(%rsp)           # 8-byte Folded Spill
-	movq	-48(%rsp), %rbp         # 8-byte Reload
-	adcq	$0, %rbp
-	adcq	$0, %r12
-	movq	%rdi, %rsi
-	imulq	-32(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, %rax
-	movq	-40(%rsp), %r11         # 8-byte Reload
-	mulq	%r11
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r15
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	addq	%rbx, %rsi
-	adcq	%r15, %r10
-	adcq	-112(%rsp), %r13        # 8-byte Folded Reload
-	adcq	-96(%rsp), %r9          # 8-byte Folded Reload
-	movq	-72(%rsp), %rbx         # 8-byte Reload
-	adcq	-48(%rsp), %rbx         # 8-byte Folded Reload
-	movq	-64(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%rdi, %rax
-	adcq	%rcx, %rsi
-	adcq	%r8, %r10
-	adcq	%r14, %r13
-	adcq	-88(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-80(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rbx, -72(%rsp)         # 8-byte Spill
-	adcq	-56(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	adcq	$0, %rbp
-	movq	%rbp, -48(%rsp)         # 8-byte Spill
-	adcq	$0, %r12
-	movq	-32(%rsp), %r8          # 8-byte Reload
-	imulq	%rsi, %r8
-	movq	%r8, %rax
-	mulq	%r11
-	movq	%rdx, %rdi
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	%r8, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%r8, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%r8, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r11
-	movq	%r8, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r14
-	movq	%r8, %rax
-	movq	-24(%rsp), %r8          # 8-byte Reload
-	mulq	%r8
-	addq	%r14, %rdx
-	adcq	%r11, %rbp
-	adcq	-80(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-32(%rsp), %r15         # 8-byte Folded Reload
-	adcq	$0, %rdi
-	addq	%rsi, %rax
-	adcq	%r10, %rdx
-	adcq	%r13, %rbp
-	adcq	%r9, %rbx
-	adcq	-72(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	$0, %r12
-	movq	%rdx, %rax
-	subq	%r8, %rax
-	movq	%rbp, %rsi
-	sbbq	-16(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rbx, %r9
-	sbbq	-8(%rsp), %r9           # 8-byte Folded Reload
-	movq	%rcx, %r10
-	sbbq	(%rsp), %r10            # 8-byte Folded Reload
-	movq	%r15, %r11
-	sbbq	8(%rsp), %r11           # 8-byte Folded Reload
-	movq	%rdi, %r14
-	sbbq	-40(%rsp), %r14         # 8-byte Folded Reload
-	sbbq	$0, %r12
-	andl	$1, %r12d
-	cmovneq	%rdi, %r14
-	testb	%r12b, %r12b
-	cmovneq	%rdx, %rax
-	movq	-104(%rsp), %rdx        # 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovneq	%rbp, %rsi
-	movq	%rsi, 8(%rdx)
-	cmovneq	%rbx, %r9
-	movq	%r9, 16(%rdx)
-	cmovneq	%rcx, %r10
-	movq	%r10, 24(%rdx)
-	cmovneq	%r15, %r11
-	movq	%r11, 32(%rdx)
-	movq	%r14, 40(%rdx)
-	addq	$16, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end85:
-	.size	mcl_fp_montRed6L, .Lfunc_end85-mcl_fp_montRed6L
-
-	.globl	mcl_fp_addPre6L
-	.align	16, 0x90
-	.type	mcl_fp_addPre6L,@function
-mcl_fp_addPre6L:                        # @mcl_fp_addPre6L
-# BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	40(%rsi), %r11
-	movq	32(%rdx), %r9
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %r14
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 32(%rdi)
-	adcq	%r8, %r11
-	movq	%r11, 40(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r14
-	retq
-.Lfunc_end86:
-	.size	mcl_fp_addPre6L, .Lfunc_end86-mcl_fp_addPre6L
-
-	.globl	mcl_fp_subPre6L
-	.align	16, 0x90
-	.type	mcl_fp_subPre6L,@function
-mcl_fp_subPre6L:                        # @mcl_fp_subPre6L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	40(%rsi), %r9
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %rsi
-	movq	24(%rdx), %r14
-	movq	32(%rdx), %r15
-	sbbq	16(%rdx), %rcx
-	movq	%rbx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r14, %r11
-	movq	%r11, 24(%rdi)
-	sbbq	%r15, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%r8, %r9
-	movq	%r9, 40(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end87:
-	.size	mcl_fp_subPre6L, .Lfunc_end87-mcl_fp_subPre6L
-
-	.globl	mcl_fp_shr1_6L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_6L,@function
-mcl_fp_shr1_6L:                         # @mcl_fp_shr1_6L
-# BB#0:
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %rdx
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rcx
-	movq	%rcx, (%rdi)
-	shrdq	$1, %rax, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rdx, %rax
-	movq	%rax, 16(%rdi)
-	shrdq	$1, %r9, %rdx
-	movq	%rdx, 24(%rdi)
-	shrdq	$1, %r8, %r9
-	movq	%r9, 32(%rdi)
-	shrq	%r8
-	movq	%r8, 40(%rdi)
-	retq
-.Lfunc_end88:
-	.size	mcl_fp_shr1_6L, .Lfunc_end88-mcl_fp_shr1_6L
-
-	.globl	mcl_fp_add6L
-	.align	16, 0x90
-	.type	mcl_fp_add6L,@function
-mcl_fp_add6L:                           # @mcl_fp_add6L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r14
-	movq	40(%rsi), %r8
-	movq	32(%rdx), %r15
-	movq	24(%rdx), %rbx
-	movq	24(%rsi), %r10
-	movq	32(%rsi), %r9
-	movq	16(%rdx), %r11
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r11
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	adcq	%rbx, %r10
-	movq	%r10, 24(%rdi)
-	adcq	%r15, %r9
-	movq	%r9, 32(%rdi)
-	adcq	%r14, %r8
-	movq	%r8, 40(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r11
-	sbbq	24(%rcx), %r10
-	sbbq	32(%rcx), %r9
-	sbbq	40(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB89_2
-# BB#1:                                 # %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	movq	%r10, 24(%rdi)
-	movq	%r9, 32(%rdi)
-	movq	%r8, 40(%rdi)
-.LBB89_2:                               # %carry
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end89:
-	.size	mcl_fp_add6L, .Lfunc_end89-mcl_fp_add6L
-
-	.globl	mcl_fp_addNF6L
-	.align	16, 0x90
-	.type	mcl_fp_addNF6L,@function
-mcl_fp_addNF6L:                         # @mcl_fp_addNF6L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	32(%rdx), %r9
-	movq	24(%rdx), %r10
-	movq	16(%rdx), %r11
-	movq	(%rdx), %r15
-	movq	8(%rdx), %r14
-	addq	(%rsi), %r15
-	adcq	8(%rsi), %r14
-	adcq	16(%rsi), %r11
-	adcq	24(%rsi), %r10
-	adcq	32(%rsi), %r9
-	adcq	40(%rsi), %r8
-	movq	%r15, %rsi
-	subq	(%rcx), %rsi
-	movq	%r14, %rbx
-	sbbq	8(%rcx), %rbx
-	movq	%r11, %rdx
-	sbbq	16(%rcx), %rdx
-	movq	%r10, %r13
-	sbbq	24(%rcx), %r13
-	movq	%r9, %r12
-	sbbq	32(%rcx), %r12
-	movq	%r8, %rax
-	sbbq	40(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r15, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r14, %rbx
-	movq	%rbx, 8(%rdi)
-	cmovsq	%r11, %rdx
-	movq	%rdx, 16(%rdi)
-	cmovsq	%r10, %r13
-	movq	%r13, 24(%rdi)
-	cmovsq	%r9, %r12
-	movq	%r12, 32(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 40(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end90:
-	.size	mcl_fp_addNF6L, .Lfunc_end90-mcl_fp_addNF6L
-
-	.globl	mcl_fp_sub6L
-	.align	16, 0x90
-	.type	mcl_fp_sub6L,@function
-mcl_fp_sub6L:                           # @mcl_fp_sub6L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rdx), %r14
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r10
-	movq	16(%rsi), %r11
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %rsi
-	movq	24(%rdx), %r15
-	movq	32(%rdx), %r12
-	sbbq	16(%rdx), %r11
-	movq	%rax, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	sbbq	%r15, %r10
-	movq	%r10, 24(%rdi)
-	sbbq	%r12, %r9
-	movq	%r9, 32(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 40(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	.LBB91_2
-# BB#1:                                 # %carry
-	movq	40(%rcx), %r14
-	movq	32(%rcx), %r15
-	movq	24(%rcx), %r12
-	movq	8(%rcx), %rbx
-	movq	16(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%rsi, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r11, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r10, %r12
-	movq	%r12, 24(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 32(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 40(%rdi)
-.LBB91_2:                               # %nocarry
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end91:
-	.size	mcl_fp_sub6L, .Lfunc_end91-mcl_fp_sub6L
-
-	.globl	mcl_fp_subNF6L
-	.align	16, 0x90
-	.type	mcl_fp_subNF6L,@function
-mcl_fp_subNF6L:                         # @mcl_fp_subNF6L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rsi), %r15
-	movq	32(%rsi), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %r11
-	movq	8(%rsi), %r14
-	subq	(%rdx), %r11
-	sbbq	8(%rdx), %r14
-	sbbq	16(%rdx), %r10
-	sbbq	24(%rdx), %r9
-	sbbq	32(%rdx), %r8
-	sbbq	40(%rdx), %r15
-	movq	%r15, %rdx
-	sarq	$63, %rdx
-	movq	%rdx, %rbx
-	addq	%rbx, %rbx
-	movq	%rdx, %rsi
-	adcq	%rsi, %rsi
-	andq	8(%rcx), %rsi
-	movq	%r15, %rax
-	shrq	$63, %rax
-	orq	%rbx, %rax
-	andq	(%rcx), %rax
-	movq	40(%rcx), %r12
-	andq	%rdx, %r12
-	movq	32(%rcx), %r13
-	andq	%rdx, %r13
-	movq	24(%rcx), %rbx
-	andq	%rdx, %rbx
-	andq	16(%rcx), %rdx
-	addq	%r11, %rax
-	movq	%rax, (%rdi)
-	adcq	%r14, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r10, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r9, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%r8, %r13
-	movq	%r13, 32(%rdi)
-	adcq	%r15, %r12
-	movq	%r12, 40(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end92:
-	.size	mcl_fp_subNF6L, .Lfunc_end92-mcl_fp_subNF6L
-
-	.globl	mcl_fpDbl_add6L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add6L,@function
-mcl_fpDbl_add6L:                        # @mcl_fpDbl_add6L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	88(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	80(%rdx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	72(%rdx), %r14
-	movq	64(%rdx), %r15
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %r13
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %rbp
-	adcq	32(%rdx), %r13
-	movq	56(%rdx), %r11
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %rdx
-	movq	%rbx, (%rdi)
-	movq	88(%rsi), %r8
-	movq	%rax, 8(%rdi)
-	movq	80(%rsi), %r10
-	movq	%r12, 16(%rdi)
-	movq	72(%rsi), %r12
-	movq	%rbp, 24(%rdi)
-	movq	40(%rsi), %rax
-	adcq	%rdx, %rax
-	movq	64(%rsi), %rdx
-	movq	%r13, 32(%rdi)
-	movq	56(%rsi), %r13
-	movq	48(%rsi), %rbp
-	adcq	%r9, %rbp
-	movq	%rax, 40(%rdi)
-	adcq	%r11, %r13
-	adcq	%r15, %rdx
-	adcq	%r14, %r12
-	adcq	-16(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-8(%rsp), %r8           # 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rbp, %rsi
-	subq	(%rcx), %rsi
-	movq	%r13, %rbx
-	sbbq	8(%rcx), %rbx
-	movq	%rdx, %r9
-	sbbq	16(%rcx), %r9
-	movq	%r12, %r11
-	sbbq	24(%rcx), %r11
-	movq	%r10, %r14
-	sbbq	32(%rcx), %r14
-	movq	%r8, %r15
-	sbbq	40(%rcx), %r15
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rbp, %rsi
-	movq	%rsi, 48(%rdi)
-	testb	%al, %al
-	cmovneq	%r13, %rbx
-	movq	%rbx, 56(%rdi)
-	cmovneq	%rdx, %r9
-	movq	%r9, 64(%rdi)
-	cmovneq	%r12, %r11
-	movq	%r11, 72(%rdi)
-	cmovneq	%r10, %r14
-	movq	%r14, 80(%rdi)
-	cmovneq	%r8, %r15
-	movq	%r15, 88(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end93:
-	.size	mcl_fpDbl_add6L, .Lfunc_end93-mcl_fpDbl_add6L
-
-	.globl	mcl_fpDbl_sub6L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub6L,@function
-mcl_fpDbl_sub6L:                        # @mcl_fpDbl_sub6L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	88(%rdx), %r9
-	movq	80(%rdx), %r10
-	movq	72(%rdx), %r14
-	movq	16(%rsi), %r8
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%eax, %eax
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r8
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	64(%rdx), %r13
-	movq	%r15, (%rdi)
-	movq	56(%rdx), %rbp
-	movq	%r11, 8(%rdi)
-	movq	48(%rdx), %r15
-	movq	40(%rdx), %rdx
-	movq	%r8, 16(%rdi)
-	movq	88(%rsi), %r8
-	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%rdx, %rbx
-	movq	80(%rsi), %r11
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %rdx
-	sbbq	%r15, %rdx
-	movq	72(%rsi), %r15
-	movq	%rbx, 40(%rdi)
-	movq	64(%rsi), %r12
-	movq	56(%rsi), %rsi
-	sbbq	%rbp, %rsi
-	sbbq	%r13, %r12
-	sbbq	%r14, %r15
-	sbbq	%r10, %r11
-	sbbq	%r9, %r8
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%rcx), %r14
-	cmoveq	%rax, %r14
-	testb	%bpl, %bpl
-	movq	16(%rcx), %r9
-	cmoveq	%rax, %r9
-	movq	8(%rcx), %rbp
-	cmoveq	%rax, %rbp
-	movq	40(%rcx), %r10
-	cmoveq	%rax, %r10
-	movq	32(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	cmovneq	24(%rcx), %rax
-	addq	%rdx, %r14
-	movq	%r14, 48(%rdi)
-	adcq	%rsi, %rbp
-	movq	%rbp, 56(%rdi)
-	adcq	%r12, %r9
-	movq	%r9, 64(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 72(%rdi)
-	adcq	%r11, %rbx
-	movq	%rbx, 80(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 88(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end94:
-	.size	mcl_fpDbl_sub6L, .Lfunc_end94-mcl_fpDbl_sub6L
-
-	.globl	mcl_fp_mulUnitPre7L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre7L,@function
-mcl_fp_mulUnitPre7L:                    # @mcl_fp_mulUnitPre7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rcx, %rax
-	mulq	48(%rsi)
-	movq	%rdx, %r10
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	40(%rsi)
-	movq	%rdx, %r11
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	32(%rsi)
-	movq	%rdx, %r15
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, %r13
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	16(%rsi)
-	movq	%rdx, %rbx
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %r8
-	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rax, (%rdi)
-	addq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%rbp, %r8
-	movq	%r8, 16(%rdi)
-	adcq	%r12, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%r14, %r13
-	movq	%r13, 32(%rdi)
-	adcq	-16(%rsp), %r15         # 8-byte Folded Reload
-	movq	%r15, 40(%rdi)
-	adcq	-8(%rsp), %r11          # 8-byte Folded Reload
-	movq	%r11, 48(%rdi)
-	adcq	$0, %r10
-	movq	%r10, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end95:
-	.size	mcl_fp_mulUnitPre7L, .Lfunc_end95-mcl_fp_mulUnitPre7L
-
-	.globl	mcl_fpDbl_mulPre7L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre7L,@function
-mcl_fpDbl_mulPre7L:                     # @mcl_fpDbl_mulPre7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$24, %rsp
-	movq	%rdx, 8(%rsp)           # 8-byte Spill
-	movq	%rsi, %r9
-	movq	%rdi, 16(%rsp)          # 8-byte Spill
-	movq	(%r9), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	8(%r9), %r10
-	movq	%r10, -64(%rsp)         # 8-byte Spill
-	movq	(%rdx), %rsi
-	mulq	%rsi
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	16(%r9), %r11
-	movq	%r11, -72(%rsp)         # 8-byte Spill
-	movq	24(%r9), %rbx
-	movq	%rbx, -56(%rsp)         # 8-byte Spill
-	movq	32(%r9), %rbp
-	movq	%rbp, -24(%rsp)         # 8-byte Spill
-	movq	40(%r9), %rcx
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	movq	48(%r9), %r14
-	movq	%rax, (%rdi)
-	movq	%r14, %rax
-	mulq	%rsi
-	movq	%rdx, %rdi
-	movq	%rax, (%rsp)            # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	%rsi
-	movq	%rdx, %rcx
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rsi
-	movq	%rdx, %rbp
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	%rsi
-	movq	%rdx, %rbx
-	movq	%rax, %r8
-	movq	%r11, %rax
-	mulq	%rsi
-	movq	%rdx, %r12
-	movq	%rax, %r13
-	movq	%r10, %rax
-	mulq	%rsi
-	movq	%rdx, %rsi
-	movq	%rax, %r10
-	addq	-32(%rsp), %r10         # 8-byte Folded Reload
-	adcq	%r13, %rsi
-	adcq	%r8, %r12
-	adcq	%r15, %rbx
-	adcq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, -48(%rsp)         # 8-byte Spill
-	adcq	(%rsp), %rcx            # 8-byte Folded Reload
-	movq	%rcx, -40(%rsp)         # 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -32(%rsp)         # 8-byte Spill
-	movq	8(%rsp), %r11           # 8-byte Reload
-	movq	8(%r11), %rcx
-	movq	%r14, %rax
-	mulq	%rcx
-	movq	%rdx, %r14
-	movq	%rax, (%rsp)            # 8-byte Spill
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rax, %r8
-	movq	-24(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %r13
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, %rdi
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	mulq	%rcx
-	addq	%r10, %rax
-	movq	16(%rsp), %r10          # 8-byte Reload
-	movq	%rax, 8(%r10)
-	adcq	%rsi, %rdi
-	adcq	%r12, %rbp
-	adcq	%rbx, %r15
-	adcq	-48(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r8, %rcx
-	adcq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	movq	(%rsp), %rax            # 8-byte Reload
-	adcq	-32(%rsp), %rax         # 8-byte Folded Reload
-	sbbq	%r8, %r8
-	andl	$1, %r8d
-	addq	%rdx, %rdi
-	adcq	-64(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-72(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -88(%rsp)         # 8-byte Spill
-	adcq	-16(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, (%rsp)            # 8-byte Spill
-	adcq	%r14, %r8
-	movq	48(%r9), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	16(%r11), %rcx
-	mulq	%rcx
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	40(%r9), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	32(%r9), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r12
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	24(%r9), %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r14
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	16(%r9), %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %rbx
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	(%r9), %rsi
-	movq	8(%r9), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r11
-	movq	%rsi, %rax
-	mulq	%rcx
-	addq	%rdi, %rax
-	movq	%rax, 16(%r10)
-	adcq	%rbp, %r11
-	adcq	%r15, %rbx
-	adcq	%r13, %r14
-	adcq	-88(%rsp), %r12         # 8-byte Folded Reload
-	movq	-16(%rsp), %rdi         # 8-byte Reload
-	adcq	(%rsp), %rdi            # 8-byte Folded Reload
-	movq	-96(%rsp), %rax         # 8-byte Reload
-	adcq	%r8, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%rdx, %r11
-	adcq	-120(%rsp), %rbx        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r14        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-80(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, -16(%rsp)         # 8-byte Spill
-	adcq	-72(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, %rdi
-	adcq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	movq	8(%rsp), %rax           # 8-byte Reload
-	movq	24(%rax), %rbp
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, (%rsp)            # 8-byte Spill
-	movq	-24(%rsp), %rax         # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	-32(%rsp), %rax         # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %r13
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, %r8
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	movq	%rsi, %rax
-	mulq	%rbp
-	addq	%r11, %rax
-	movq	16(%rsp), %rsi          # 8-byte Reload
-	movq	%rax, 24(%rsi)
-	adcq	%rbx, %r10
-	adcq	%r14, %r8
-	adcq	%r12, %r15
-	adcq	-16(%rsp), %r13         # 8-byte Folded Reload
-	movq	-8(%rsp), %rsi          # 8-byte Reload
-	adcq	%rdi, %rsi
-	movq	(%rsp), %rax            # 8-byte Reload
-	adcq	%rcx, %rax
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	addq	%rdx, %r10
-	adcq	-64(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-56(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-32(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -8(%rsp)          # 8-byte Spill
-	adcq	-24(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, (%rsp)            # 8-byte Spill
-	adcq	-40(%rsp), %rdi         # 8-byte Folded Reload
-	movq	48(%r9), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	8(%rsp), %rbx           # 8-byte Reload
-	movq	32(%rbx), %rcx
-	mulq	%rcx
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	40(%r9), %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	movq	32(%r9), %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r12
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	24(%r9), %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %rbp
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	16(%r9), %rax
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r14
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	(%r9), %rsi
-	movq	%rsi, -80(%rsp)         # 8-byte Spill
-	movq	8(%r9), %rax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	mulq	%rcx
-	movq	%rdx, -128(%rsp)        # 8-byte Spill
-	movq	%rax, %r11
-	movq	%rsi, %rax
-	mulq	%rcx
-	addq	%r10, %rax
-	movq	16(%rsp), %rcx          # 8-byte Reload
-	movq	%rax, 32(%rcx)
-	adcq	%r8, %r11
-	adcq	%r15, %r14
-	adcq	%r13, %rbp
-	adcq	-8(%rsp), %r12          # 8-byte Folded Reload
-	movq	-24(%rsp), %rcx         # 8-byte Reload
-	adcq	(%rsp), %rcx            # 8-byte Folded Reload
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	adcq	%rdi, %rax
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	addq	%rdx, %r11
-	adcq	-128(%rsp), %r14        # 8-byte Folded Reload
-	adcq	-120(%rsp), %rbp        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-104(%rsp), %rcx        # 8-byte Folded Reload
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	adcq	-96(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	adcq	-56(%rsp), %r13         # 8-byte Folded Reload
-	movq	40(%rbx), %rcx
-	movq	-32(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	movq	%rax, %rdi
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rax, %r10
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %rbx
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, %rsi
-	movq	-88(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, %r8
-	movq	-80(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	addq	%r11, %rax
-	movq	16(%rsp), %rcx          # 8-byte Reload
-	movq	%rax, 40(%rcx)
-	adcq	%r14, %r8
-	adcq	%rbp, %rsi
-	adcq	%r12, %rbx
-	adcq	-24(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-16(%rsp), %r10         # 8-byte Folded Reload
-	adcq	%r13, %rdi
-	movq	8(%rsp), %rax           # 8-byte Reload
-	movq	48(%rax), %r11
-	sbbq	%rcx, %rcx
-	movq	%r11, %rax
-	mulq	48(%r9)
-	movq	%rdx, 8(%rsp)           # 8-byte Spill
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%r11, %rax
-	mulq	40(%r9)
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%r11, %rax
-	mulq	32(%r9)
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %r13
-	movq	%r11, %rax
-	mulq	24(%r9)
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%r11, %rax
-	mulq	16(%r9)
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, %r14
-	movq	%r11, %rax
-	mulq	8(%r9)
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, %r12
-	movq	%r11, %rax
-	mulq	(%r9)
-	andl	$1, %ecx
-	addq	-96(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-72(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-40(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-32(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-8(%rsp), %rdi          # 8-byte Folded Reload
-	adcq	(%rsp), %rcx            # 8-byte Folded Reload
-	addq	%rax, %r8
-	movq	16(%rsp), %r9           # 8-byte Reload
-	movq	%r8, 48(%r9)
-	adcq	%r12, %rsi
-	adcq	%r14, %rbx
-	adcq	%rbp, %r15
-	adcq	%r13, %r10
-	adcq	-88(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	-64(%rsp), %rcx         # 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rdx, %rsi
-	adcq	-104(%rsp), %rbx        # 8-byte Folded Reload
-	movq	%r9, %rdx
-	movq	%rsi, 56(%rdx)
-	movq	%rbx, 64(%rdx)
-	adcq	-80(%rsp), %r15         # 8-byte Folded Reload
-	movq	%r15, 72(%rdx)
-	adcq	-56(%rsp), %r10         # 8-byte Folded Reload
-	movq	%r10, 80(%rdx)
-	adcq	-24(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, 88(%rdx)
-	adcq	-16(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 96(%rdx)
-	adcq	8(%rsp), %rax           # 8-byte Folded Reload
-	movq	%rax, 104(%rdx)
-	addq	$24, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end96:
-	.size	mcl_fpDbl_mulPre7L, .Lfunc_end96-mcl_fpDbl_mulPre7L
-
-	.globl	mcl_fpDbl_sqrPre7L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre7L,@function
-mcl_fpDbl_sqrPre7L:                     # @mcl_fpDbl_sqrPre7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$16, %rsp
-	movq	%rdi, 8(%rsp)           # 8-byte Spill
-	movq	16(%rsi), %r11
-	movq	%r11, -64(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %r14
-	movq	%r14, -48(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %r9
-	movq	%r9, -24(%rsp)          # 8-byte Spill
-	movq	40(%rsi), %r10
-	movq	%r10, -16(%rsp)         # 8-byte Spill
-	movq	48(%rsi), %r8
-	movq	(%rsi), %rbp
-	movq	8(%rsi), %rbx
-	movq	%rbp, %rax
-	mulq	%rbp
-	movq	%rdx, %rcx
-	movq	%rax, (%rdi)
-	movq	%r8, %rax
-	mulq	%rbp
-	movq	%rdx, %r15
-	movq	%rax, (%rsp)            # 8-byte Spill
-	movq	%r10, %rax
-	mulq	%rbp
-	movq	%rdx, %rdi
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	%r9, %rax
-	mulq	%rbp
-	movq	%rdx, %r9
-	movq	%rax, %r10
-	movq	%r14, %rax
-	mulq	%rbp
-	movq	%rdx, %r13
-	movq	%rax, %r14
-	movq	%r11, %rax
-	mulq	%rbp
-	movq	%rdx, %r12
-	movq	%rax, %r11
-	movq	%rbx, %rax
-	mulq	%rbp
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	addq	%rax, %rcx
-	adcq	%rdx, %r11
-	adcq	%r14, %r12
-	adcq	%r10, %r13
-	adcq	-32(%rsp), %r9          # 8-byte Folded Reload
-	adcq	(%rsp), %rdi            # 8-byte Folded Reload
-	movq	%rdi, -40(%rsp)         # 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, -32(%rsp)         # 8-byte Spill
-	movq	%r8, %rax
-	mulq	%rbx
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	movq	%rax, %rdi
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	-24(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, %r14
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rbx, %rax
-	mulq	%rbx
-	movq	%rax, %rbx
-	addq	-56(%rsp), %rcx         # 8-byte Folded Reload
-	movq	8(%rsp), %rax           # 8-byte Reload
-	movq	%rcx, 8(%rax)
-	adcq	%r11, %rbx
-	adcq	%r12, %rbp
-	adcq	%r13, %r14
-	adcq	%r9, %r10
-	adcq	-40(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-32(%rsp), %rdi         # 8-byte Folded Reload
-	sbbq	%r8, %r8
-	andl	$1, %r8d
-	addq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	adcq	%rdx, %rbp
-	adcq	-64(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-24(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-16(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, -72(%rsp)         # 8-byte Spill
-	adcq	(%rsp), %r8             # 8-byte Folded Reload
-	movq	48(%rsi), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	16(%rsi), %rdi
-	mulq	%rdi
-	movq	%rax, (%rsp)            # 8-byte Spill
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	40(%rsi), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r13
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %rcx
-	movq	%rcx, %rax
-	mulq	%rdi
-	movq	%rax, %r9
-	movq	%r9, -104(%rsp)         # 8-byte Spill
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r12
-	movq	%r12, -48(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	movq	%rax, %r11
-	movq	%r12, %rax
-	mulq	%rdi
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r12
-	movq	%rdi, %rax
-	mulq	%rdi
-	movq	%rax, %rdi
-	addq	%rbx, %r12
-	movq	8(%rsp), %rax           # 8-byte Reload
-	movq	%r12, 16(%rax)
-	adcq	%rbp, %r11
-	adcq	%r14, %rdi
-	adcq	%r9, %r10
-	adcq	%r15, %r13
-	movq	-88(%rsp), %r14         # 8-byte Reload
-	adcq	-72(%rsp), %r14         # 8-byte Folded Reload
-	movq	(%rsp), %rax            # 8-byte Reload
-	adcq	%r8, %rax
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	-112(%rsp), %r11        # 8-byte Folded Reload
-	adcq	-96(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	%rdx, %r10
-	adcq	-16(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, (%rsp)            # 8-byte Spill
-	adcq	-40(%rsp), %rbx         # 8-byte Folded Reload
-	movq	-8(%rsp), %rax          # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, %r8
-	movq	-24(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	-32(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, %r9
-	movq	-80(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rax, %r12
-	movq	%rdx, -96(%rsp)         # 8-byte Spill
-	addq	%r11, %rbp
-	movq	8(%rsp), %rax           # 8-byte Reload
-	movq	%rbp, 24(%rax)
-	adcq	%rdi, %r15
-	adcq	-104(%rsp), %r10        # 8-byte Folded Reload
-	adcq	%r13, %r12
-	movq	%r9, %rcx
-	adcq	%r14, %rcx
-	movq	-8(%rsp), %rdi          # 8-byte Reload
-	adcq	(%rsp), %rdi            # 8-byte Folded Reload
-	adcq	%rbx, %r8
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	movq	(%rsi), %r9
-	movq	8(%rsi), %rbp
-	movq	40(%rsi), %r11
-	movq	%rbp, %rax
-	mulq	%r11
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%r9, %rax
-	mulq	%r11
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %rbx
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%r9, %rax
-	mulq	%rbx
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	addq	-88(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-16(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-96(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -96(%rsp)         # 8-byte Spill
-	adcq	-72(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, -8(%rsp)          # 8-byte Spill
-	adcq	-64(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-56(%rsp), %r14         # 8-byte Folded Reload
-	movq	48(%rsi), %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	mulq	%rbx
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, %rcx
-	movq	%r11, %rax
-	mulq	%rbx
-	movq	%rax, %rbp
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %rax
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	mulq	%rbx
-	movq	%rax, %rdi
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %rax
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	mulq	%rbx
-	movq	%rdx, -128(%rsp)        # 8-byte Spill
-	movq	%rax, %r9
-	movq	%rbx, %rax
-	mulq	%rbx
-	movq	%rax, %r13
-	addq	-120(%rsp), %r15        # 8-byte Folded Reload
-	movq	8(%rsp), %rax           # 8-byte Reload
-	movq	%r15, 32(%rax)
-	adcq	-112(%rsp), %r10        # 8-byte Folded Reload
-	adcq	%r12, %r9
-	adcq	-96(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	-8(%rsp), %r13          # 8-byte Folded Reload
-	adcq	%rbp, %r8
-	adcq	%r14, %rcx
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	-104(%rsp), %r10        # 8-byte Folded Reload
-	adcq	(%rsp), %r9             # 8-byte Folded Reload
-	adcq	-128(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	-88(%rsp), %r13         # 8-byte Folded Reload
-	adcq	%rdx, %r8
-	adcq	-16(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-64(%rsp), %rbx         # 8-byte Folded Reload
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	mulq	%r11
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rax, (%rsp)            # 8-byte Spill
-	movq	-72(%rsp), %rax         # 8-byte Reload
-	mulq	%r11
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	-80(%rsp), %rax         # 8-byte Reload
-	mulq	%r11
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, %r14
-	movq	%r11, %rax
-	mulq	%r11
-	movq	%rax, %r12
-	addq	-48(%rsp), %r10         # 8-byte Folded Reload
-	movq	8(%rsp), %rax           # 8-byte Reload
-	movq	%r10, 40(%rax)
-	adcq	-40(%rsp), %r9          # 8-byte Folded Reload
-	adcq	%rdi, %r14
-	adcq	%r13, %r15
-	adcq	%rbp, %r8
-	adcq	%rcx, %r12
-	movq	(%rsp), %rax            # 8-byte Reload
-	adcq	%rbx, %rax
-	sbbq	%r11, %r11
-	andl	$1, %r11d
-	addq	-32(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-24(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, -32(%rsp)          # 8-byte Spill
-	adcq	-16(%rsp), %r12         # 8-byte Folded Reload
-	adcq	%rdx, %rax
-	movq	%rax, (%rsp)            # 8-byte Spill
-	adcq	-8(%rsp), %r11          # 8-byte Folded Reload
-	movq	48(%rsi), %rcx
-	movq	%rcx, %rax
-	mulq	40(%rsi)
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	32(%rsi)
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rax, %rbx
-	movq	%rcx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	16(%rsi)
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	movq	%rcx, %rax
-	mulq	8(%rsi)
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rdx, %r13
-	movq	%rax, %rsi
-	movq	%rcx, %rax
-	mulq	%rcx
-	addq	%r9, %rsi
-	movq	8(%rsp), %r9            # 8-byte Reload
-	movq	%rsi, 48(%r9)
-	adcq	%r14, %rdi
-	adcq	%r15, %r10
-	adcq	-32(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	%r12, %rbx
-	adcq	(%rsp), %r8             # 8-byte Folded Reload
-	adcq	%r11, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r13, %rdi
-	adcq	-48(%rsp), %r10         # 8-byte Folded Reload
-	movq	%r9, %rsi
-	movq	%rdi, 56(%rsi)
-	movq	%r10, 64(%rsi)
-	adcq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 72(%rsi)
-	adcq	-24(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rbx, 80(%rsi)
-	adcq	-16(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, 88(%rsi)
-	adcq	-8(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 96(%rsi)
-	adcq	%rdx, %rcx
-	movq	%rcx, 104(%rsi)
-	addq	$16, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end97:
-	.size	mcl_fpDbl_sqrPre7L, .Lfunc_end97-mcl_fpDbl_sqrPre7L
-
-	.globl	mcl_fp_mont7L
-	.align	16, 0x90
-	.type	mcl_fp_mont7L,@function
-mcl_fp_mont7L:                          # @mcl_fp_mont7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$96, %rsp
-	movq	%rdx, 24(%rsp)          # 8-byte Spill
-	movq	%rdi, -96(%rsp)         # 8-byte Spill
-	movq	48(%rsi), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	movq	(%rdx), %rbx
-	mulq	%rbx
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	%rdx, %r15
-	movq	40(%rsi), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	mulq	%rbx
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	%rdx, %r12
-	movq	32(%rsi), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %r9
-	movq	%r9, -24(%rsp)          # 8-byte Spill
-	movq	16(%rsi), %r10
-	movq	%r10, -16(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r13
-	movq	%r13, (%rsp)            # 8-byte Spill
-	movq	8(%rsi), %rsi
-	movq	%rsi, -8(%rsp)          # 8-byte Spill
-	mulq	%rbx
-	movq	%rdx, %r14
-	movq	%rax, %r8
-	movq	%r9, %rax
-	mulq	%rbx
-	movq	%rdx, %rdi
-	movq	%rax, %r9
-	movq	%r10, %rax
-	mulq	%rbx
-	movq	%rdx, %rbp
-	movq	%rax, %r10
-	movq	%rsi, %rax
-	mulq	%rbx
-	movq	%rdx, %rsi
-	movq	%rax, %r11
-	movq	%r13, %rax
-	mulq	%rbx
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	addq	%r11, %rdx
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	adcq	%r10, %rsi
-	movq	%rsi, -88(%rsp)         # 8-byte Spill
-	adcq	%r9, %rbp
-	movq	%rbp, -80(%rsp)         # 8-byte Spill
-	adcq	%r8, %rdi
-	movq	%rdi, -72(%rsp)         # 8-byte Spill
-	adcq	80(%rsp), %r14          # 8-byte Folded Reload
-	movq	%r14, -64(%rsp)         # 8-byte Spill
-	adcq	88(%rsp), %r12          # 8-byte Folded Reload
-	movq	%r12, -48(%rsp)         # 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, -40(%rsp)         # 8-byte Spill
-	movq	-8(%rcx), %rdx
-	movq	%rdx, 32(%rsp)          # 8-byte Spill
-	movq	%rax, %rdi
-	imulq	%rdx, %rdi
-	movq	(%rcx), %r12
-	movq	%r12, 40(%rsp)          # 8-byte Spill
-	movq	48(%rcx), %rdx
-	movq	%rdx, 64(%rsp)          # 8-byte Spill
-	movq	40(%rcx), %r9
-	movq	%r9, 88(%rsp)           # 8-byte Spill
-	movq	32(%rcx), %rbx
-	movq	%rbx, 80(%rsp)          # 8-byte Spill
-	movq	24(%rcx), %rsi
-	movq	%rsi, 72(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rbp
-	movq	%rbp, 48(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%r9
-	movq	%rdx, %r14
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rbx
-	movq	%rdx, %r11
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	%rsi
-	movq	%rdx, %rbx
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	%rbp
-	movq	%rdx, %r8
-	movq	%rax, %r13
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rdx, %rbp
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	%r12
-	movq	%rdx, %r12
-	addq	%r9, %r12
-	adcq	%r13, %rbp
-	adcq	%r10, %r8
-	adcq	%r15, %rbx
-	adcq	-128(%rsp), %r11        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r14        # 8-byte Folded Reload
-	movq	-56(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	addq	-112(%rsp), %rax        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-88(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-72(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	24(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %r13
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%rdx, %rdi
-	addq	%r10, %rdi
-	adcq	%r13, %r15
-	adcq	-112(%rsp), %r9         # 8-byte Folded Reload
-	movq	%rcx, %rdx
-	adcq	-104(%rsp), %rdx        # 8-byte Folded Reload
-	adcq	-88(%rsp), %rsi         # 8-byte Folded Reload
-	movq	-48(%rsp), %rax         # 8-byte Reload
-	adcq	-80(%rsp), %rax         # 8-byte Folded Reload
-	movq	-40(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	movq	-64(%rsp), %r10         # 8-byte Reload
-	addq	%r12, %r10
-	movq	%r10, -64(%rsp)         # 8-byte Spill
-	adcq	%rbp, %rdi
-	adcq	%r8, %r15
-	adcq	%rbx, %r9
-	adcq	%r11, %rdx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	adcq	%r14, %rsi
-	movq	%rsi, -80(%rsp)         # 8-byte Spill
-	adcq	-56(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	adcq	-72(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -40(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%r10, %rbp
-	imulq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	88(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	80(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r10
-	movq	%rbp, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r11
-	addq	%r14, %r11
-	adcq	%r10, %rsi
-	adcq	%rbx, %rcx
-	adcq	-120(%rsp), %r13        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r8         # 8-byte Folded Reload
-	movq	-72(%rsp), %rbp         # 8-byte Reload
-	adcq	$0, %rbp
-	addq	-64(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%rdi, %r11
-	adcq	%r15, %rsi
-	adcq	%r9, %rcx
-	adcq	-88(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-80(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, -80(%rsp)          # 8-byte Spill
-	adcq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, -72(%rsp)         # 8-byte Spill
-	movq	-56(%rsp), %rbp         # 8-byte Reload
-	adcq	$0, %rbp
-	movq	24(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %r8
-	addq	%r14, %r8
-	adcq	%r9, %rbx
-	adcq	-120(%rsp), %r15        # 8-byte Folded Reload
-	movq	-64(%rsp), %r9          # 8-byte Reload
-	adcq	-112(%rsp), %r9         # 8-byte Folded Reload
-	movq	-56(%rsp), %rdi         # 8-byte Reload
-	adcq	-104(%rsp), %rdi        # 8-byte Folded Reload
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	adcq	-88(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%r11, %r10
-	adcq	%rsi, %r8
-	adcq	%rcx, %rbx
-	adcq	%r13, %r15
-	adcq	%r12, %r9
-	movq	%r9, -64(%rsp)          # 8-byte Spill
-	adcq	-80(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, -56(%rsp)         # 8-byte Spill
-	adcq	-72(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	adcq	%rbp, %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%r10, %rbp
-	imulq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	88(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	80(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %rcx
-	movq	%rbp, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r13
-	movq	%rbp, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r14
-	addq	%r12, %r14
-	adcq	%r13, %rsi
-	adcq	%rcx, %rdi
-	adcq	-120(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-112(%rsp), %r11        # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	-104(%rsp), %rdx        # 8-byte Folded Reload
-	movq	-72(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	addq	%r10, %rax
-	adcq	%r8, %r14
-	adcq	%rbx, %rsi
-	adcq	%r15, %rdi
-	adcq	-64(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-56(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	adcq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -72(%rsp)         # 8-byte Spill
-	adcq	$0, -88(%rsp)           # 8-byte Folded Spill
-	movq	24(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %r13
-	addq	%r12, %r13
-	adcq	%r8, %rbp
-	adcq	-120(%rsp), %rbx        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r15        # 8-byte Folded Reload
-	movq	-56(%rsp), %rdx         # 8-byte Reload
-	adcq	-104(%rsp), %rdx        # 8-byte Folded Reload
-	movq	-48(%rsp), %rcx         # 8-byte Reload
-	adcq	-64(%rsp), %rcx         # 8-byte Folded Reload
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%r14, %r10
-	adcq	%rsi, %r13
-	adcq	%rdi, %rbp
-	adcq	%r9, %rbx
-	adcq	%r11, %r15
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	adcq	-72(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -48(%rsp)         # 8-byte Spill
-	adcq	-88(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%r10, %rsi
-	imulq	32(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	88(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	80(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r9
-	movq	%rsi, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r11
-	movq	%rsi, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r14
-	addq	%r11, %r14
-	adcq	%r9, %r8
-	adcq	-120(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-112(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r12        # 8-byte Folded Reload
-	movq	-80(%rsp), %rsi         # 8-byte Reload
-	adcq	-88(%rsp), %rsi         # 8-byte Folded Reload
-	movq	-72(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r10, %rax
-	adcq	%r13, %r14
-	adcq	%rbp, %r8
-	adcq	%rbx, %rcx
-	adcq	%r15, %rdi
-	adcq	-56(%rsp), %r12         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -80(%rsp)         # 8-byte Spill
-	adcq	-40(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	-64(%rsp), %r11         # 8-byte Reload
-	adcq	$0, %r11
-	movq	24(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rbp
-	movq	%rbp, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r9
-	movq	%rbp, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r15
-	movq	%rbp, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rax, %rsi
-	movq	%rdx, %r10
-	addq	%r15, %r10
-	adcq	%r9, %r13
-	adcq	-120(%rsp), %rbx        # 8-byte Folded Reload
-	movq	-64(%rsp), %r15         # 8-byte Reload
-	adcq	-112(%rsp), %r15        # 8-byte Folded Reload
-	movq	-56(%rsp), %rbp         # 8-byte Reload
-	adcq	-104(%rsp), %rbp        # 8-byte Folded Reload
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	adcq	-88(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	movq	%rsi, %r9
-	addq	%r14, %r9
-	adcq	%r8, %r10
-	adcq	%rcx, %r13
-	adcq	%rdi, %rbx
-	adcq	%r12, %r15
-	movq	%r15, -64(%rsp)         # 8-byte Spill
-	adcq	-80(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, -56(%rsp)         # 8-byte Spill
-	adcq	-72(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	adcq	%r11, %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%r9, %rsi
-	movq	%r9, %r11
-	imulq	32(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	88(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	80(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r15
-	movq	%rsi, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r14
-	addq	%r15, %r14
-	adcq	%r12, %rcx
-	adcq	%rdi, %rbp
-	adcq	-120(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-112(%rsp), %r8         # 8-byte Folded Reload
-	movq	-80(%rsp), %rsi         # 8-byte Reload
-	adcq	-104(%rsp), %rsi        # 8-byte Folded Reload
-	movq	-72(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r11, %rax
-	adcq	%r10, %r14
-	adcq	%r13, %rcx
-	adcq	%rbx, %rbp
-	adcq	-64(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-56(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-48(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -80(%rsp)         # 8-byte Spill
-	adcq	-40(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	adcq	$0, -88(%rsp)           # 8-byte Folded Spill
-	movq	24(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r13
-	movq	%rdi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rax, %rdi
-	movq	%rdx, %r11
-	addq	%r13, %r11
-	adcq	%r15, %rsi
-	adcq	%r10, %rbx
-	adcq	-112(%rsp), %r12        # 8-byte Folded Reload
-	movq	-56(%rsp), %r10         # 8-byte Reload
-	adcq	-104(%rsp), %r10        # 8-byte Folded Reload
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	adcq	-64(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-40(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%r14, %rdi
-	adcq	%rcx, %r11
-	adcq	%rbp, %rsi
-	adcq	%r9, %rbx
-	adcq	%r8, %r12
-	adcq	-80(%rsp), %r10         # 8-byte Folded Reload
-	movq	%r10, -56(%rsp)         # 8-byte Spill
-	adcq	-72(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	adcq	-88(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%rdi, %rbp
-	imulq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	88(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	80(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %r8
-	movq	%rbp, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r9
-	movq	%rbp, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	addq	%r9, %r15
-	adcq	%r8, %r13
-	adcq	-120(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-112(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r14        # 8-byte Folded Reload
-	movq	-72(%rsp), %rdx         # 8-byte Reload
-	adcq	-88(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-64(%rsp), %r8          # 8-byte Reload
-	adcq	$0, %r8
-	addq	%rdi, %rax
-	adcq	%r11, %r15
-	adcq	%rsi, %r13
-	adcq	%rbx, %r10
-	adcq	%r12, %rcx
-	adcq	-56(%rsp), %r14         # 8-byte Folded Reload
-	movq	%r14, -56(%rsp)         # 8-byte Spill
-	adcq	-48(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	adcq	-40(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, -64(%rsp)          # 8-byte Spill
-	movq	-80(%rsp), %rsi         # 8-byte Reload
-	adcq	$0, %rsi
-	movq	24(%rsp), %rax          # 8-byte Reload
-	movq	48(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          # 8-byte Spill
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, 24(%rsp)          # 8-byte Spill
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r12
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %rbp
-	movq	%rdi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	addq	%rbp, %rdx
-	movq	%rdx, %rbp
-	adcq	%rbx, %r9
-	adcq	%r12, %r14
-	movq	%r8, %rdi
-	adcq	-32(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	8(%rsp), %r11           # 8-byte Folded Reload
-	movq	24(%rsp), %rbx          # 8-byte Reload
-	adcq	-40(%rsp), %rbx         # 8-byte Folded Reload
-	movq	16(%rsp), %r8           # 8-byte Reload
-	adcq	$0, %r8
-	addq	%r15, %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	adcq	%r13, %rbp
-	movq	%rbp, 8(%rsp)           # 8-byte Spill
-	adcq	%r10, %r9
-	movq	%r9, (%rsp)             # 8-byte Spill
-	adcq	%rcx, %r14
-	movq	%r14, -8(%rsp)          # 8-byte Spill
-	adcq	-56(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, -16(%rsp)         # 8-byte Spill
-	adcq	-72(%rsp), %r11         # 8-byte Folded Reload
-	movq	%r11, -24(%rsp)         # 8-byte Spill
-	adcq	-64(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rbx, 24(%rsp)          # 8-byte Spill
-	adcq	%rsi, %r8
-	movq	%r8, 16(%rsp)           # 8-byte Spill
-	sbbq	%rcx, %rcx
-	movq	32(%rsp), %r10          # 8-byte Reload
-	imulq	%rax, %r10
-	andl	$1, %ecx
-	movq	%r10, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	movq	%r10, %rax
-	mulq	88(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%r10, %rax
-	mulq	80(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	%r10, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%r10, %rax
-	movq	48(%rsp), %r13          # 8-byte Reload
-	mulq	%r13
-	movq	%rdx, %rbp
-	movq	%rax, %r12
-	movq	%r10, %rax
-	movq	40(%rsp), %r15          # 8-byte Reload
-	mulq	%r15
-	movq	%rdx, %r11
-	movq	%rax, %r8
-	movq	%r10, %rax
-	movq	56(%rsp), %r14          # 8-byte Reload
-	mulq	%r14
-	addq	%r11, %rax
-	adcq	%r12, %rdx
-	adcq	-56(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	-48(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-40(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	32(%rsp), %r9           # 8-byte Folded Reload
-	adcq	$0, %rbx
-	addq	-32(%rsp), %r8          # 8-byte Folded Reload
-	adcq	8(%rsp), %rax           # 8-byte Folded Reload
-	adcq	(%rsp), %rdx            # 8-byte Folded Reload
-	adcq	-8(%rsp), %rbp          # 8-byte Folded Reload
-	adcq	-16(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-24(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	24(%rsp), %r9           # 8-byte Folded Reload
-	adcq	16(%rsp), %rbx          # 8-byte Folded Reload
-	adcq	$0, %rcx
-	movq	%rax, %r8
-	subq	%r15, %r8
-	movq	%rdx, %r10
-	sbbq	%r14, %r10
-	movq	%rbp, %r11
-	sbbq	%r13, %r11
-	movq	%rsi, %r14
-	sbbq	72(%rsp), %r14          # 8-byte Folded Reload
-	movq	%rdi, %r15
-	sbbq	80(%rsp), %r15          # 8-byte Folded Reload
-	movq	%r9, %r12
-	sbbq	88(%rsp), %r12          # 8-byte Folded Reload
-	movq	%rbx, %r13
-	sbbq	64(%rsp), %r13          # 8-byte Folded Reload
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rbx, %r13
-	testb	%cl, %cl
-	cmovneq	%rax, %r8
-	movq	-96(%rsp), %rax         # 8-byte Reload
-	movq	%r8, (%rax)
-	cmovneq	%rdx, %r10
-	movq	%r10, 8(%rax)
-	cmovneq	%rbp, %r11
-	movq	%r11, 16(%rax)
-	cmovneq	%rsi, %r14
-	movq	%r14, 24(%rax)
-	cmovneq	%rdi, %r15
-	movq	%r15, 32(%rax)
-	cmovneq	%r9, %r12
-	movq	%r12, 40(%rax)
-	movq	%r13, 48(%rax)
-	addq	$96, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end98:
-	.size	mcl_fp_mont7L, .Lfunc_end98-mcl_fp_mont7L
-
-	.globl	mcl_fp_montNF7L
-	.align	16, 0x90
-	.type	mcl_fp_montNF7L,@function
-mcl_fp_montNF7L:                        # @mcl_fp_montNF7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$80, %rsp
-	movq	%rdx, 16(%rsp)          # 8-byte Spill
-	movq	%rdi, -96(%rsp)         # 8-byte Spill
-	movq	48(%rsi), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	(%rdx), %rbp
-	mulq	%rbp
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	%rdx, %r9
-	movq	40(%rsi), %rax
-	movq	%rax, (%rsp)            # 8-byte Spill
-	mulq	%rbp
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	%rdx, %r11
-	movq	32(%rsi), %rax
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	24(%rsi), %r8
-	movq	%r8, -40(%rsp)          # 8-byte Spill
-	movq	16(%rsi), %rbx
-	movq	%rbx, -32(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r10
-	movq	%r10, -16(%rsp)         # 8-byte Spill
-	movq	8(%rsi), %rsi
-	movq	%rsi, -24(%rsp)         # 8-byte Spill
-	mulq	%rbp
-	movq	%rdx, %rdi
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	%r8, %rax
-	mulq	%rbp
-	movq	%rdx, %r14
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	%rbp
-	movq	%rdx, %rbx
-	movq	%rax, %r13
-	movq	%rsi, %rax
-	mulq	%rbp
-	movq	%rdx, %rsi
-	movq	%rax, %r12
-	movq	%r10, %rax
-	mulq	%rbp
-	movq	%rdx, %r8
-	addq	%r12, %r8
-	adcq	%r13, %rsi
-	movq	%rsi, -104(%rsp)        # 8-byte Spill
-	adcq	%r15, %rbx
-	movq	%rbx, -88(%rsp)         # 8-byte Spill
-	adcq	56(%rsp), %r14          # 8-byte Folded Reload
-	movq	%r14, %r12
-	adcq	64(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, -80(%rsp)         # 8-byte Spill
-	adcq	72(%rsp), %r11          # 8-byte Folded Reload
-	movq	%r11, -56(%rsp)         # 8-byte Spill
-	adcq	$0, %r9
-	movq	%r9, -64(%rsp)          # 8-byte Spill
-	movq	-8(%rcx), %rdx
-	movq	%rdx, 24(%rsp)          # 8-byte Spill
-	movq	%rax, %r9
-	movq	%rax, %r14
-	imulq	%rdx, %r9
-	movq	(%rcx), %r11
-	movq	%r11, 32(%rsp)          # 8-byte Spill
-	movq	48(%rcx), %rdx
-	movq	%rdx, 72(%rsp)          # 8-byte Spill
-	movq	40(%rcx), %r10
-	movq	%r10, 64(%rsp)          # 8-byte Spill
-	movq	32(%rcx), %rbp
-	movq	%rbp, 56(%rsp)          # 8-byte Spill
-	movq	24(%rcx), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rdi
-	movq	%rdi, 40(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rsi
-	movq	%rsi, -8(%rsp)          # 8-byte Spill
-	movq	%r9, %rax
-	mulq	%rdx
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	%r9, %rax
-	mulq	%r10
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r15
-	movq	%r9, %rax
-	mulq	%rbp
-	movq	%rdx, -128(%rsp)        # 8-byte Spill
-	movq	%rax, %r13
-	movq	%r9, %rax
-	mulq	%rbx
-	movq	%rdx, %rbx
-	movq	%rax, %rbp
-	movq	%r9, %rax
-	mulq	%rdi
-	movq	%rdx, %rcx
-	movq	%rax, %rdi
-	movq	%r9, %rax
-	mulq	%rsi
-	movq	%rdx, %r10
-	movq	%rax, %rsi
-	movq	%r9, %rax
-	mulq	%r11
-	addq	%r14, %rax
-	adcq	%r8, %rsi
-	adcq	-104(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	-88(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	%r12, %r13
-	adcq	-80(%rsp), %r15         # 8-byte Folded Reload
-	movq	-72(%rsp), %r8          # 8-byte Reload
-	adcq	-56(%rsp), %r8          # 8-byte Folded Reload
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdx, %rsi
-	adcq	%r10, %rdi
-	adcq	%rcx, %rbp
-	adcq	%rbx, %r13
-	adcq	-128(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r8         # 8-byte Folded Reload
-	movq	%r8, -72(%rsp)          # 8-byte Spill
-	adcq	-112(%rsp), %rax        # 8-byte Folded Reload
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rbx
-	movq	%rbx, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r14
-	movq	%rbx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %r12
-	addq	%r14, %r12
-	adcq	-128(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-112(%rsp), %r8         # 8-byte Folded Reload
-	adcq	-104(%rsp), %r11        # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	-88(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rsi, %r10
-	adcq	%rdi, %r12
-	adcq	%rbp, %rcx
-	adcq	%r13, %r9
-	adcq	%r15, %r8
-	adcq	-72(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-64(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%r10, %rbx
-	imulq	24(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r14
-	movq	%rbx, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        # 8-byte Spill
-	movq	%rax, %rdi
-	movq	%rbx, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %rbp
-	movq	%rbx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rsi
-	movq	%rbx, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	addq	%r10, %rax
-	adcq	%r12, %rsi
-	adcq	%rcx, %rbp
-	adcq	%r9, %rdi
-	adcq	%r8, %r14
-	movq	-72(%rsp), %rcx         # 8-byte Reload
-	adcq	%r11, %rcx
-	movq	-64(%rsp), %r8          # 8-byte Reload
-	adcq	-80(%rsp), %r8          # 8-byte Folded Reload
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdx, %rsi
-	adcq	%r13, %rbp
-	adcq	%r15, %rdi
-	movq	%rdi, -88(%rsp)         # 8-byte Spill
-	adcq	-128(%rsp), %r14        # 8-byte Folded Reload
-	movq	%r14, -80(%rsp)         # 8-byte Spill
-	adcq	-120(%rsp), %rcx        # 8-byte Folded Reload
-	movq	%rcx, -72(%rsp)         # 8-byte Spill
-	adcq	-112(%rsp), %r8         # 8-byte Folded Reload
-	movq	%r8, -64(%rsp)          # 8-byte Spill
-	adcq	-104(%rsp), %rax        # 8-byte Folded Reload
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r11
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r13
-	movq	%rdx, %rcx
-	addq	%r11, %rcx
-	adcq	%r9, %r15
-	adcq	%r12, %rbx
-	adcq	-120(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r14        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r10        # 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%rsi, %r13
-	adcq	%rbp, %rcx
-	adcq	-88(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-80(%rsp), %rbx         # 8-byte Folded Reload
-	adcq	-72(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r14         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r10         # 8-byte Folded Reload
-	adcq	$0, %r8
-	movq	%r13, %r9
-	imulq	24(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%r9, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	%r9, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%r9, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, %r11
-	movq	%r9, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r12
-	movq	%r9, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %rsi
-	movq	%r9, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	addq	%r13, %rax
-	adcq	%rcx, %rsi
-	adcq	%r15, %r12
-	adcq	%rbx, %r11
-	adcq	%rdi, %rbp
-	movq	-72(%rsp), %rcx         # 8-byte Reload
-	adcq	%r14, %rcx
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	adcq	%r10, %rax
-	adcq	$0, %r8
-	addq	%rdx, %rsi
-	adcq	-120(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r11        # 8-byte Folded Reload
-	adcq	-104(%rsp), %rbp        # 8-byte Folded Reload
-	adcq	-88(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -72(%rsp)         # 8-byte Spill
-	adcq	-80(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	adcq	-56(%rsp), %r8          # 8-byte Folded Reload
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r14
-	movq	%rdx, %rdi
-	addq	%r9, %rdi
-	adcq	%rbx, %rcx
-	adcq	-120(%rsp), %r10        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        # 8-byte Folded Reload
-	adcq	-104(%rsp), %r15        # 8-byte Folded Reload
-	movq	-88(%rsp), %rdx         # 8-byte Reload
-	adcq	-80(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rsi, %r14
-	adcq	%r12, %rdi
-	adcq	%r11, %rcx
-	adcq	%rbp, %r10
-	adcq	-72(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r15         # 8-byte Folded Reload
-	adcq	%r8, %rdx
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%r14, %rsi
-	imulq	24(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r8
-	movq	%rsi, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	addq	%r14, %rax
-	adcq	%rdi, %rbx
-	adcq	%rcx, %rbp
-	adcq	%r10, %r8
-	adcq	%r13, %r12
-	movq	-80(%rsp), %rsi         # 8-byte Reload
-	adcq	%r15, %rsi
-	movq	-72(%rsp), %rcx         # 8-byte Reload
-	adcq	-88(%rsp), %rcx         # 8-byte Folded Reload
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdx, %rbx
-	adcq	%r9, %rbp
-	adcq	%r11, %r8
-	adcq	-120(%rsp), %r12        # 8-byte Folded Reload
-	adcq	-112(%rsp), %rsi        # 8-byte Folded Reload
-	movq	%rsi, -80(%rsp)         # 8-byte Spill
-	adcq	-104(%rsp), %rcx        # 8-byte Folded Reload
-	movq	%rcx, -72(%rsp)         # 8-byte Spill
-	adcq	-64(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r14
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r15
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r11
-	movq	%rdx, %r10
-	addq	%r15, %r10
-	adcq	%r14, %rdi
-	adcq	-128(%rsp), %rcx        # 8-byte Folded Reload
-	adcq	-120(%rsp), %r9         # 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        # 8-byte Folded Reload
-	movq	-88(%rsp), %rdx         # 8-byte Reload
-	adcq	-104(%rsp), %rdx        # 8-byte Folded Reload
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rbx, %r11
-	adcq	%rbp, %r10
-	adcq	%r8, %rdi
-	adcq	%r12, %rcx
-	adcq	-80(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-72(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%r11, %rsi
-	imulq	24(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r14
-	movq	%rsi, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	addq	%r11, %rax
-	adcq	%r10, %rbx
-	adcq	%rdi, %rbp
-	adcq	%rcx, %r12
-	adcq	%r9, %r14
-	movq	-72(%rsp), %rdi         # 8-byte Reload
-	adcq	%r13, %rdi
-	movq	-56(%rsp), %rcx         # 8-byte Reload
-	adcq	-88(%rsp), %rcx         # 8-byte Folded Reload
-	movq	-64(%rsp), %rax         # 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdx, %rbx
-	adcq	%r8, %rbp
-	adcq	%r15, %r12
-	adcq	-120(%rsp), %r14        # 8-byte Folded Reload
-	movq	%r14, -88(%rsp)         # 8-byte Spill
-	adcq	-112(%rsp), %rdi        # 8-byte Folded Reload
-	movq	%rdi, -72(%rsp)         # 8-byte Spill
-	adcq	-104(%rsp), %rcx        # 8-byte Folded Reload
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	adcq	-80(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -104(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r15
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r11
-	movq	%rdx, %r10
-	addq	%r14, %r10
-	adcq	%r15, %r8
-	adcq	-128(%rsp), %rdi        # 8-byte Folded Reload
-	adcq	-120(%rsp), %rsi        # 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        # 8-byte Folded Reload
-	movq	-80(%rsp), %rax         # 8-byte Reload
-	adcq	-104(%rsp), %rax        # 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	%rbx, %r11
-	adcq	%rbp, %r10
-	adcq	%r12, %r8
-	adcq	-88(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	-72(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-64(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	adcq	$0, %r9
-	movq	%r11, %rbx
-	imulq	24(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        # 8-byte Spill
-	movq	%rax, -64(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        # 8-byte Spill
-	movq	%rax, %r12
-	movq	%rbx, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rbx, %rax
-	mulq	-8(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %rcx
-	movq	%rbx, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	addq	%r11, %rax
-	adcq	%r10, %rcx
-	adcq	%r8, %rbp
-	adcq	%rdi, %r15
-	adcq	%rsi, %r12
-	movq	-64(%rsp), %rsi         # 8-byte Reload
-	adcq	%r13, %rsi
-	movq	-56(%rsp), %rax         # 8-byte Reload
-	adcq	-80(%rsp), %rax         # 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	%rdx, %rcx
-	adcq	%r14, %rbp
-	adcq	-120(%rsp), %r15        # 8-byte Folded Reload
-	adcq	-72(%rsp), %r12         # 8-byte Folded Reload
-	movq	%r12, -72(%rsp)         # 8-byte Spill
-	adcq	-112(%rsp), %rsi        # 8-byte Folded Reload
-	movq	%rsi, -64(%rsp)         # 8-byte Spill
-	adcq	-104(%rsp), %rax        # 8-byte Folded Reload
-	movq	%rax, -56(%rsp)         # 8-byte Spill
-	adcq	-88(%rsp), %r9          # 8-byte Folded Reload
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	48(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          # 8-byte Spill
-	movq	%rax, (%rsp)            # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, 8(%rsp)           # 8-byte Spill
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %rsi
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rax, %r12
-	movq	%rdx, %r8
-	addq	%rbx, %r8
-	adcq	%rsi, %r10
-	adcq	-40(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-48(%rsp), %r13         # 8-byte Folded Reload
-	movq	8(%rsp), %rdx           # 8-byte Reload
-	adcq	(%rsp), %rdx            # 8-byte Folded Reload
-	movq	16(%rsp), %rax          # 8-byte Reload
-	adcq	-80(%rsp), %rax         # 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	%rcx, %r12
-	adcq	%rbp, %r8
-	adcq	%r15, %r10
-	adcq	-72(%rsp), %r11         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-56(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, 8(%rsp)           # 8-byte Spill
-	adcq	%r9, %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	$0, %r14
-	movq	24(%rsp), %rdi          # 8-byte Reload
-	imulq	%r12, %rdi
-	movq	%rdi, %rax
-	mulq	72(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, 24(%rsp)          # 8-byte Spill
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	64(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rdi, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rax, %rsi
-	movq	%rdi, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %rcx
-	movq	%rdi, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	movq	-8(%rsp), %rdi          # 8-byte Reload
-	mulq	%rdi
-	addq	%r12, %r15
-	adcq	%r8, %rax
-	adcq	%r10, %rbx
-	adcq	%r11, %rcx
-	adcq	%r13, %rsi
-	adcq	8(%rsp), %rbp           # 8-byte Folded Reload
-	adcq	16(%rsp), %r9           # 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	-32(%rsp), %rax         # 8-byte Folded Reload
-	adcq	%rdx, %rbx
-	adcq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	adcq	-24(%rsp), %rsi         # 8-byte Folded Reload
-	adcq	-16(%rsp), %rbp         # 8-byte Folded Reload
-	adcq	(%rsp), %r9             # 8-byte Folded Reload
-	adcq	24(%rsp), %r14          # 8-byte Folded Reload
-	movq	%rax, %r13
-	subq	32(%rsp), %r13          # 8-byte Folded Reload
-	movq	%rbx, %r12
-	sbbq	%rdi, %r12
-	movq	%rcx, %r8
-	sbbq	40(%rsp), %r8           # 8-byte Folded Reload
-	movq	%rsi, %r10
-	sbbq	48(%rsp), %r10          # 8-byte Folded Reload
-	movq	%rbp, %r11
-	sbbq	56(%rsp), %r11          # 8-byte Folded Reload
-	movq	%r9, %r15
-	sbbq	64(%rsp), %r15          # 8-byte Folded Reload
-	movq	%r14, %rdx
-	sbbq	72(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	sarq	$63, %rdi
-	cmovsq	%rax, %r13
-	movq	-96(%rsp), %rax         # 8-byte Reload
-	movq	%r13, (%rax)
-	cmovsq	%rbx, %r12
-	movq	%r12, 8(%rax)
-	cmovsq	%rcx, %r8
-	movq	%r8, 16(%rax)
-	cmovsq	%rsi, %r10
-	movq	%r10, 24(%rax)
-	cmovsq	%rbp, %r11
-	movq	%r11, 32(%rax)
-	cmovsq	%r9, %r15
-	movq	%r15, 40(%rax)
-	cmovsq	%r14, %rdx
-	movq	%rdx, 48(%rax)
-	addq	$80, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end99:
-	.size	mcl_fp_montNF7L, .Lfunc_end99-mcl_fp_montNF7L
-
-	.globl	mcl_fp_montRed7L
-	.align	16, 0x90
-	.type	mcl_fp_montRed7L,@function
-mcl_fp_montRed7L:                       # @mcl_fp_montRed7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$64, %rsp
-	movq	%rdx, %rcx
-	movq	%rdi, -104(%rsp)        # 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	(%rcx), %rdx
-	movq	%rdx, 32(%rsp)          # 8-byte Spill
-	movq	(%rsi), %rbp
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	imulq	%rax, %rbp
-	movq	48(%rcx), %rdx
-	movq	%rdx, -16(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	40(%rcx), %rdx
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	movq	32(%rcx), %r10
-	movq	%r10, 56(%rsp)          # 8-byte Spill
-	movq	24(%rcx), %rdi
-	movq	%rdi, 48(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, 16(%rsp)          # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rdx, %r13
-	movq	%rax, %r9
-	movq	%rbp, %rax
-	mulq	%r10
-	movq	%rdx, %r15
-	movq	%rax, %r11
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rdx, %r10
-	movq	%rax, %r8
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rdx, %r14
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	%rcx
-	movq	%rdx, %r12
-	movq	%rax, %rdi
-	movq	%rbp, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	addq	%rdi, %rbp
-	adcq	%rbx, %r12
-	adcq	%r8, %r14
-	adcq	%r11, %r10
-	adcq	%r9, %r15
-	adcq	-8(%rsp), %r13          # 8-byte Folded Reload
-	movq	-48(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	24(%rsp), %rax          # 8-byte Folded Reload
-	adcq	8(%rsi), %rbp
-	adcq	16(%rsi), %r12
-	adcq	24(%rsi), %r14
-	adcq	32(%rsi), %r10
-	adcq	40(%rsi), %r15
-	adcq	48(%rsi), %r13
-	movq	%r13, -80(%rsp)         # 8-byte Spill
-	adcq	56(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	104(%rsi), %r8
-	movq	96(%rsi), %rdx
-	movq	88(%rsi), %rdi
-	movq	80(%rsi), %rbx
-	movq	72(%rsi), %rax
-	movq	64(%rsi), %rsi
-	adcq	$0, %rsi
-	movq	%rsi, -88(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	adcq	$0, %rbx
-	movq	%rbx, -40(%rsp)         # 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -32(%rsp)         # 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, -8(%rsp)           # 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	movq	%rbp, %rdi
-	imulq	8(%rsp), %rdi           # 8-byte Folded Reload
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -128(%rsp)        # 8-byte Spill
-	movq	%rdi, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r11
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rdx, %r8
-	movq	%rax, %rcx
-	movq	%rdi, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r9
-	addq	%rcx, %r9
-	adcq	%r11, %r8
-	adcq	%rbx, %rsi
-	adcq	-128(%rsp), %r13        # 8-byte Folded Reload
-	movq	-72(%rsp), %rdi         # 8-byte Reload
-	adcq	-120(%rsp), %rdi        # 8-byte Folded Reload
-	movq	-64(%rsp), %rdx         # 8-byte Reload
-	adcq	-112(%rsp), %rdx        # 8-byte Folded Reload
-	movq	-56(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	addq	%rbp, %rax
-	adcq	%r12, %r9
-	adcq	%r14, %r8
-	adcq	%r10, %rsi
-	adcq	%r15, %r13
-	adcq	-80(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, -72(%rsp)         # 8-byte Spill
-	adcq	-48(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	adcq	-88(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -56(%rsp)         # 8-byte Spill
-	adcq	$0, -96(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -40(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -32(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -24(%rsp)           # 8-byte Folded Spill
-	movq	-8(%rsp), %rbx          # 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, 24(%rsp)            # 8-byte Folded Spill
-	movq	%r9, %rcx
-	imulq	8(%rsp), %rcx           # 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         # 8-byte Spill
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r11
-	movq	%rcx, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r10
-	addq	%r14, %r10
-	adcq	%r12, %rdi
-	adcq	%r11, %rbp
-	adcq	-120(%rsp), %r15        # 8-byte Folded Reload
-	movq	-88(%rsp), %r11         # 8-byte Reload
-	adcq	-112(%rsp), %r11        # 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         # 8-byte Reload
-	adcq	-8(%rsp), %rdx          # 8-byte Folded Reload
-	movq	-48(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	addq	%r9, %rax
-	adcq	%r8, %r10
-	adcq	%rsi, %rdi
-	adcq	%r13, %rbp
-	adcq	-72(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r11         # 8-byte Folded Reload
-	movq	%r11, -88(%rsp)         # 8-byte Spill
-	adcq	-56(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         # 8-byte Spill
-	adcq	-96(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -48(%rsp)         # 8-byte Spill
-	adcq	$0, -40(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -32(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -24(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, -8(%rsp)          # 8-byte Spill
-	adcq	$0, 24(%rsp)            # 8-byte Folded Spill
-	movq	%r10, %rbx
-	imulq	8(%rsp), %rbx           # 8-byte Folded Reload
-	movq	%rbx, %rax
-	movq	-16(%rsp), %r12         # 8-byte Reload
-	mulq	%r12
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -112(%rsp)        # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         # 8-byte Spill
-	movq	%rax, -120(%rsp)        # 8-byte Spill
-	movq	%rbx, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r14
-	movq	%rbx, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r11
-	movq	%rbx, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r9
-	addq	%r11, %r9
-	adcq	%r13, %rcx
-	adcq	%r14, %rsi
-	adcq	-120(%rsp), %r8         # 8-byte Folded Reload
-	movq	-72(%rsp), %r11         # 8-byte Reload
-	adcq	-112(%rsp), %r11        # 8-byte Folded Reload
-	movq	-64(%rsp), %rbx         # 8-byte Reload
-	adcq	-96(%rsp), %rbx         # 8-byte Folded Reload
-	movq	-56(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r10, %rax
-	adcq	%rdi, %r9
-	adcq	%rbp, %rcx
-	adcq	%r15, %rsi
-	adcq	-88(%rsp), %r8          # 8-byte Folded Reload
-	adcq	-80(%rsp), %r11         # 8-byte Folded Reload
-	movq	%r11, -72(%rsp)         # 8-byte Spill
-	adcq	-48(%rsp), %rbx         # 8-byte Folded Reload
-	movq	%rbx, -64(%rsp)         # 8-byte Spill
-	adcq	-40(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	adcq	$0, -32(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -24(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -8(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 24(%rsp)            # 8-byte Folded Spill
-	movq	%r9, %rbp
-	imulq	8(%rsp), %rbp           # 8-byte Folded Reload
-	movq	%rbp, %rax
-	mulq	%r12
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	(%rsp)                  # 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rbp, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r11
-	movq	%rbp, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	movq	32(%rsp), %rbp          # 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, %r10
-	addq	%r12, %r10
-	adcq	%r11, %rbx
-	adcq	%r14, %rdi
-	adcq	-96(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-88(%rsp), %r15         # 8-byte Folded Reload
-	movq	-48(%rsp), %r11         # 8-byte Reload
-	adcq	-80(%rsp), %r11         # 8-byte Folded Reload
-	movq	-40(%rsp), %rdx         # 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r9, %rax
-	adcq	%rcx, %r10
-	adcq	%rsi, %rbx
-	adcq	%r8, %rdi
-	adcq	-72(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-64(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r11         # 8-byte Folded Reload
-	movq	%r11, -48(%rsp)         # 8-byte Spill
-	adcq	-32(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -40(%rsp)         # 8-byte Spill
-	adcq	$0, -24(%rsp)           # 8-byte Folded Spill
-	adcq	$0, -8(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 24(%rsp)            # 8-byte Folded Spill
-	movq	%r10, %rsi
-	imulq	8(%rsp), %rsi           # 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
-	movq	%rax, -72(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	movq	(%rsp), %r8             # 8-byte Reload
-	mulq	%r8
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	movq	%rax, -80(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         # 8-byte Spill
-	movq	%rax, -88(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -96(%rsp)         # 8-byte Spill
-	movq	%rsi, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %r11
-	movq	%rsi, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %rcx
-	movq	%rsi, %rax
-	mulq	%rbp
-	movq	%rdx, %rbp
-	addq	%rcx, %rbp
-	adcq	%r11, %r14
-	adcq	-96(%rsp), %r9          # 8-byte Folded Reload
-	adcq	-88(%rsp), %r12         # 8-byte Folded Reload
-	movq	-64(%rsp), %rsi         # 8-byte Reload
-	adcq	-80(%rsp), %rsi         # 8-byte Folded Reload
-	movq	-56(%rsp), %rdx         # 8-byte Reload
-	adcq	-72(%rsp), %rdx         # 8-byte Folded Reload
-	movq	-32(%rsp), %rcx         # 8-byte Reload
-	adcq	$0, %rcx
-	addq	%r10, %rax
-	adcq	%rbx, %rbp
-	adcq	%rdi, %r14
-	adcq	%r13, %r9
-	adcq	%r15, %r12
-	adcq	-48(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, -64(%rsp)         # 8-byte Spill
-	adcq	-40(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         # 8-byte Spill
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -32(%rsp)         # 8-byte Spill
-	adcq	$0, -8(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 24(%rsp)            # 8-byte Folded Spill
-	movq	8(%rsp), %rcx           # 8-byte Reload
-	imulq	%rbp, %rcx
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               # 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	%r8
-	movq	%rdx, %r13
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	56(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	48(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -48(%rsp)         # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	40(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	16(%rsp)                # 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r11
-	movq	%rcx, %rax
-	mulq	32(%rsp)                # 8-byte Folded Reload
-	addq	%r11, %rdx
-	adcq	%r8, %rbx
-	adcq	-48(%rsp), %rdi         # 8-byte Folded Reload
-	adcq	-40(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-24(%rsp), %r15         # 8-byte Folded Reload
-	adcq	8(%rsp), %r13           # 8-byte Folded Reload
-	adcq	$0, %rsi
-	addq	%rbp, %rax
-	adcq	%r14, %rdx
-	adcq	%r9, %rbx
-	adcq	%r12, %rdi
-	adcq	-64(%rsp), %r10         # 8-byte Folded Reload
-	adcq	-56(%rsp), %r15         # 8-byte Folded Reload
-	adcq	-32(%rsp), %r13         # 8-byte Folded Reload
-	adcq	-8(%rsp), %rsi          # 8-byte Folded Reload
-	movq	24(%rsp), %rcx          # 8-byte Reload
-	adcq	$0, %rcx
-	movq	%rdx, %rax
-	subq	32(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rbx, %rbp
-	sbbq	16(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rdi, %r8
-	sbbq	40(%rsp), %r8           # 8-byte Folded Reload
-	movq	%r10, %r9
-	sbbq	48(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r15, %r11
-	sbbq	56(%rsp), %r11          # 8-byte Folded Reload
-	movq	%r13, %r14
-	sbbq	(%rsp), %r14            # 8-byte Folded Reload
-	movq	%rsi, %r12
-	sbbq	-16(%rsp), %r12         # 8-byte Folded Reload
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rsi, %r12
-	testb	%cl, %cl
-	cmovneq	%rdx, %rax
-	movq	-104(%rsp), %rcx        # 8-byte Reload
-	movq	%rax, (%rcx)
-	cmovneq	%rbx, %rbp
-	movq	%rbp, 8(%rcx)
-	cmovneq	%rdi, %r8
-	movq	%r8, 16(%rcx)
-	cmovneq	%r10, %r9
-	movq	%r9, 24(%rcx)
-	cmovneq	%r15, %r11
-	movq	%r11, 32(%rcx)
-	cmovneq	%r13, %r14
-	movq	%r14, 40(%rcx)
-	movq	%r12, 48(%rcx)
-	addq	$64, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end100:
-	.size	mcl_fp_montRed7L, .Lfunc_end100-mcl_fp_montRed7L
-
-	.globl	mcl_fp_addPre7L
-	.align	16, 0x90
-	.type	mcl_fp_addPre7L,@function
-mcl_fp_addPre7L:                        # @mcl_fp_addPre7L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r8
-	movq	48(%rsi), %r14
-	movq	40(%rdx), %r9
-	movq	40(%rsi), %r15
-	movq	32(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rbx
-	adcq	16(%rsi), %r12
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r12, 16(%rdi)
-	adcq	%r11, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 48(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end101:
-	.size	mcl_fp_addPre7L, .Lfunc_end101-mcl_fp_addPre7L
-
-	.globl	mcl_fp_subPre7L
-	.align	16, 0x90
-	.type	mcl_fp_subPre7L,@function
-mcl_fp_subPre7L:                        # @mcl_fp_subPre7L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r8
-	movq	48(%rsi), %r10
-	movq	40(%rdx), %r9
-	movq	40(%rsi), %r15
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %r12
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %r12
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	32(%rsi), %rdx
-	movq	24(%rsi), %rsi
-	movq	%rbx, (%rdi)
-	movq	%r12, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r11, %rsi
-	movq	%rsi, 24(%rdi)
-	sbbq	%r14, %rdx
-	movq	%rdx, 32(%rdi)
-	sbbq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	sbbq	%r8, %r10
-	movq	%r10, 48(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end102:
-	.size	mcl_fp_subPre7L, .Lfunc_end102-mcl_fp_subPre7L
-
-	.globl	mcl_fp_shr1_7L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_7L,@function
-mcl_fp_shr1_7L:                         # @mcl_fp_shr1_7L
-# BB#0:
-	movq	48(%rsi), %r8
-	movq	40(%rsi), %r9
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %rax
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rdx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rdx
-	movq	%rdx, (%rdi)
-	shrdq	$1, %rcx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rax, %rcx
-	movq	%rcx, 16(%rdi)
-	shrdq	$1, %r10, %rax
-	movq	%rax, 24(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 32(%rdi)
-	shrdq	$1, %r8, %r9
-	movq	%r9, 40(%rdi)
-	shrq	%r8
-	movq	%r8, 48(%rdi)
-	retq
-.Lfunc_end103:
-	.size	mcl_fp_shr1_7L, .Lfunc_end103-mcl_fp_shr1_7L
-
-	.globl	mcl_fp_add7L
-	.align	16, 0x90
-	.type	mcl_fp_add7L,@function
-mcl_fp_add7L:                           # @mcl_fp_add7L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r14
-	movq	48(%rsi), %r8
-	movq	40(%rdx), %r15
-	movq	40(%rsi), %r9
-	movq	32(%rdx), %r12
-	movq	24(%rdx), %r13
-	movq	16(%rdx), %r10
-	movq	(%rdx), %r11
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %r11
-	adcq	8(%rsi), %rdx
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rbx
-	adcq	16(%rsi), %r10
-	movq	%r11, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	adcq	%r13, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r12, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r15, %r9
-	movq	%r9, 40(%rdi)
-	adcq	%r14, %r8
-	movq	%r8, 48(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %r11
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r10
-	sbbq	24(%rcx), %rax
-	sbbq	32(%rcx), %rbx
-	sbbq	40(%rcx), %r9
-	sbbq	48(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	.LBB104_2
-# BB#1:                                 # %nocarry
-	movq	%r11, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%rax, 24(%rdi)
-	movq	%rbx, 32(%rdi)
-	movq	%r9, 40(%rdi)
-	movq	%r8, 48(%rdi)
-.LBB104_2:                              # %carry
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-.Lfunc_end104:
-	.size	mcl_fp_add7L, .Lfunc_end104-mcl_fp_add7L
-
-	.globl	mcl_fp_addNF7L
-	.align	16, 0x90
-	.type	mcl_fp_addNF7L,@function
-mcl_fp_addNF7L:                         # @mcl_fp_addNF7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %rbp
-	movq	32(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r14
-	movq	(%rdx), %r12
-	movq	8(%rdx), %r15
-	addq	(%rsi), %r12
-	adcq	8(%rsi), %r15
-	adcq	16(%rsi), %r14
-	adcq	24(%rsi), %r11
-	adcq	32(%rsi), %r10
-	adcq	40(%rsi), %rbp
-	movq	%rbp, -8(%rsp)          # 8-byte Spill
-	adcq	48(%rsi), %r9
-	movq	%r12, %rsi
-	subq	(%rcx), %rsi
-	movq	%r15, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r14, %rax
-	sbbq	16(%rcx), %rax
-	movq	%r11, %rbx
-	sbbq	24(%rcx), %rbx
-	movq	%r10, %r13
-	sbbq	32(%rcx), %r13
-	sbbq	40(%rcx), %rbp
-	movq	%r9, %r8
-	sbbq	48(%rcx), %r8
-	movq	%r8, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r12, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r15, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r14, %rax
-	movq	%rax, 16(%rdi)
-	cmovsq	%r11, %rbx
-	movq	%rbx, 24(%rdi)
-	cmovsq	%r10, %r13
-	movq	%r13, 32(%rdi)
-	cmovsq	-8(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 40(%rdi)
-	cmovsq	%r9, %r8
-	movq	%r8, 48(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end105:
-	.size	mcl_fp_addNF7L, .Lfunc_end105-mcl_fp_addNF7L
-
-	.globl	mcl_fp_sub7L
-	.align	16, 0x90
-	.type	mcl_fp_sub7L,@function
-mcl_fp_sub7L:                           # @mcl_fp_sub7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r14
-	movq	48(%rsi), %r8
-	movq	40(%rdx), %r15
-	movq	40(%rsi), %r9
-	movq	32(%rdx), %r12
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r11
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r11
-	movq	16(%rsi), %r13
-	sbbq	16(%rdx), %r13
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %rsi
-	sbbq	24(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r11, 8(%rdi)
-	movq	%r13, 16(%rdi)
-	movq	%rsi, 24(%rdi)
-	sbbq	%r12, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%r15, %r9
-	movq	%r9, 40(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 48(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	.LBB106_2
-# BB#1:                                 # %carry
-	movq	48(%rcx), %r14
-	movq	40(%rcx), %r15
-	movq	32(%rcx), %r12
-	movq	24(%rcx), %rbx
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rbp
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r11, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r13, %rbp
-	movq	%rbp, 16(%rdi)
-	adcq	%rsi, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%r10, %r12
-	movq	%r12, 32(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 40(%rdi)
+	addq	%rdx, %rdi
 	adcq	%r8, %r14
-	movq	%r14, 48(%rdi)
-.LBB106_2:                              # %nocarry
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end106:
-	.size	mcl_fp_sub7L, .Lfunc_end106-mcl_fp_sub7L
-
-	.globl	mcl_fp_subNF7L
-	.align	16, 0x90
-	.type	mcl_fp_subNF7L,@function
-mcl_fp_subNF7L:                         # @mcl_fp_subNF7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	48(%rsi), %r12
-	movq	40(%rsi), %rax
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r10
-	movq	16(%rsi), %r11
-	movq	(%rsi), %r14
-	movq	8(%rsi), %r15
-	subq	(%rdx), %r14
-	sbbq	8(%rdx), %r15
-	sbbq	16(%rdx), %r11
-	sbbq	24(%rdx), %r10
-	sbbq	32(%rdx), %r9
-	sbbq	40(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	sbbq	48(%rdx), %r12
-	movq	%r12, %rax
-	sarq	$63, %rax
-	movq	%rax, %rsi
-	shldq	$1, %r12, %rsi
-	andq	(%r8), %rsi
-	movq	48(%r8), %r13
-	andq	%rax, %r13
-	movq	40(%r8), %rbx
-	andq	%rax, %rbx
-	movq	32(%r8), %rdx
-	andq	%rax, %rdx
-	movq	24(%r8), %rbp
-	andq	%rax, %rbp
-	movq	16(%r8), %rcx
-	andq	%rax, %rcx
-	andq	8(%r8), %rax
-	addq	%r14, %rsi
-	adcq	%r15, %rax
-	movq	%rsi, (%rdi)
-	movq	%rax, 8(%rdi)
-	adcq	%r11, %rcx
-	movq	%rcx, 16(%rdi)
-	adcq	%r10, %rbp
-	movq	%rbp, 24(%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 32(%rdi)
-	adcq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, 40(%rdi)
-	adcq	%r12, %r13
-	movq	%r13, 48(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end107:
-	.size	mcl_fp_subNF7L, .Lfunc_end107-mcl_fp_subNF7L
-
-	.globl	mcl_fpDbl_add7L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add7L,@function
-mcl_fpDbl_add7L:                        # @mcl_fpDbl_add7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	104(%rdx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	96(%rdx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	88(%rdx), %r11
-	movq	80(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r12
-	movq	16(%rdx), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %r9
-	adcq	24(%rdx), %r15
-	adcq	32(%rdx), %r12
-	movq	72(%rdx), %r13
-	movq	64(%rdx), %rbp
-	movq	%rax, (%rdi)
-	movq	56(%rdx), %r10
-	movq	%rbx, 8(%rdi)
-	movq	48(%rdx), %rcx
-	movq	40(%rdx), %rdx
-	movq	%r9, 16(%rdi)
-	movq	104(%rsi), %r9
-	movq	%r15, 24(%rdi)
-	movq	40(%rsi), %rbx
-	adcq	%rdx, %rbx
-	movq	96(%rsi), %r15
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %rdx
-	adcq	%rcx, %rdx
-	movq	88(%rsi), %rax
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rcx
-	adcq	%r10, %rcx
-	movq	80(%rsi), %r12
-	movq	%rdx, 48(%rdi)
-	movq	72(%rsi), %rdx
-	movq	64(%rsi), %rsi
-	adcq	%rbp, %rsi
-	adcq	%r13, %rdx
-	adcq	%r14, %r12
-	adcq	%r11, %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	adcq	-24(%rsp), %r15         # 8-byte Folded Reload
-	movq	%r15, -24(%rsp)         # 8-byte Spill
-	adcq	-16(%rsp), %r9          # 8-byte Folded Reload
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	movq	%rcx, %rbx
-	subq	(%r8), %rbx
-	movq	%rsi, %r10
-	sbbq	8(%r8), %r10
-	movq	%rdx, %r11
-	sbbq	16(%r8), %r11
-	movq	%r12, %r14
-	sbbq	24(%r8), %r14
-	movq	-8(%rsp), %r13          # 8-byte Reload
-	sbbq	32(%r8), %r13
-	sbbq	40(%r8), %r15
-	movq	%r9, %rax
-	sbbq	48(%r8), %rax
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	cmovneq	%rcx, %rbx
-	movq	%rbx, 56(%rdi)
-	testb	%bpl, %bpl
-	cmovneq	%rsi, %r10
-	movq	%r10, 64(%rdi)
-	cmovneq	%rdx, %r11
-	movq	%r11, 72(%rdi)
-	cmovneq	%r12, %r14
-	movq	%r14, 80(%rdi)
-	cmovneq	-8(%rsp), %r13          # 8-byte Folded Reload
-	movq	%r13, 88(%rdi)
-	cmovneq	-24(%rsp), %r15         # 8-byte Folded Reload
-	movq	%r15, 96(%rdi)
-	cmovneq	%r9, %rax
-	movq	%rax, 104(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end108:
-	.size	mcl_fpDbl_add7L, .Lfunc_end108-mcl_fpDbl_add7L
-
-	.globl	mcl_fpDbl_sub7L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub7L,@function
-mcl_fpDbl_sub7L:                        # @mcl_fpDbl_sub7L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	104(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	96(%rdx), %r10
-	movq	88(%rdx), %r14
-	movq	16(%rsi), %rax
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%ecx, %ecx
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %rax
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	80(%rdx), %r13
-	movq	72(%rdx), %rbp
-	movq	%r15, (%rdi)
-	movq	64(%rdx), %r9
-	movq	%r11, 8(%rdi)
-	movq	56(%rdx), %r15
-	movq	%rax, 16(%rdi)
-	movq	48(%rdx), %r11
-	movq	40(%rdx), %rdx
-	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%rdx, %rbx
-	movq	104(%rsi), %rax
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %r12
-	sbbq	%r11, %r12
-	movq	96(%rsi), %r11
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rdx
-	sbbq	%r15, %rdx
-	movq	88(%rsi), %r15
-	movq	%r12, 48(%rdi)
-	movq	64(%rsi), %rbx
-	sbbq	%r9, %rbx
-	movq	80(%rsi), %r12
-	movq	72(%rsi), %r9
-	sbbq	%rbp, %r9
-	sbbq	%r13, %r12
-	sbbq	%r14, %r15
-	sbbq	%r10, %r11
-	sbbq	-8(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%r8), %r10
-	cmoveq	%rcx, %r10
-	testb	%bpl, %bpl
-	movq	16(%r8), %rbp
-	cmoveq	%rcx, %rbp
-	movq	8(%r8), %rsi
-	cmoveq	%rcx, %rsi
-	movq	48(%r8), %r14
-	cmoveq	%rcx, %r14
-	movq	40(%r8), %r13
-	cmoveq	%rcx, %r13
-	movq	32(%r8), %rax
-	cmoveq	%rcx, %rax
-	cmovneq	24(%r8), %rcx
-	addq	%rdx, %r10
-	adcq	%rbx, %rsi
-	movq	%r10, 56(%rdi)
-	movq	%rsi, 64(%rdi)
-	adcq	%r9, %rbp
-	movq	%rbp, 72(%rdi)
-	adcq	%r12, %rcx
-	movq	%rcx, 80(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 88(%rdi)
-	adcq	%r11, %r13
-	movq	%r13, 96(%rdi)
-	adcq	-8(%rsp), %r14          # 8-byte Folded Reload
-	movq	%r14, 104(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end109:
-	.size	mcl_fpDbl_sub7L, .Lfunc_end109-mcl_fpDbl_sub7L
-
-	.align	16, 0x90
-	.type	.LmulPv512x64,@function
-.LmulPv512x64:                          # @mulPv512x64
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
+	adcq	-104(%rsp), %r10                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r9                  # 8-byte Folded Reload
+	adcq	-128(%rsp), %rax                # 8-byte Folded Reload
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	adcq	-120(%rsp), %r13                # 8-byte Folded Reload
+	movq	-72(%rsp), %rax                 # 8-byte Reload
+	movq	24(%rax), %rbp
+	movq	%rbp, %rax
+	mulq	-80(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -120(%rsp)                # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-88(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -128(%rsp)                # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %rcx
-	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	movq	%rax, (%rdi)
-	movq	%rcx, %rax
-	mulq	56(%rsi)
-	movq	%rdx, %r10
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	%rcx, %rax
-	mulq	48(%rsi)
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	8(%rsp)                         # 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r12
+	movq	%rbp, %rax
+	mulq	(%rsp)                          # 8-byte Folded Reload
+	movq	%rax, %r8
+	movq	%rdx, %rbp
+	addq	%r12, %rbp
+	adcq	-104(%rsp), %rbx                # 8-byte Folded Reload
+	adcq	-96(%rsp), %rsi                 # 8-byte Folded Reload
+	adcq	-128(%rsp), %rcx                # 8-byte Folded Reload
+	adcq	-120(%rsp), %r11                # 8-byte Folded Reload
+	adcq	$0, %r15
+	addq	%rdi, %r8
+	adcq	%r14, %rbp
+	adcq	%r10, %rbx
+	adcq	%r9, %rsi
+	adcq	-112(%rsp), %rcx                # 8-byte Folded Reload
+	adcq	%r13, %r11
+	adcq	$0, %r15
+	movq	-48(%rsp), %r13                 # 8-byte Reload
+	imulq	%r8, %r13
+	movq	%r13, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%r13, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	%rax, %r9
+	movq	%r13, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                # 8-byte Spill
+	movq	%rax, %r10
+	movq	%r13, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
+	movq	%rax, %r12
+	movq	%r13, %rax
+	mulq	16(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, %rdi
+	movq	%r13, %rax
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	addq	%r8, %rax
+	adcq	%rbp, %rdi
+	adcq	%rbx, %r12
+	adcq	%rsi, %r10
+	movq	%r9, %rax
+	adcq	%rcx, %rax
+	movq	-96(%rsp), %r9                  # 8-byte Reload
+	adcq	%r11, %r9
+	adcq	$0, %r15
+	addq	%rdx, %rdi
+	adcq	%r14, %r12
+	adcq	-104(%rsp), %r10                # 8-byte Folded Reload
+	adcq	-112(%rsp), %rax                # 8-byte Folded Reload
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	adcq	-128(%rsp), %r9                 # 8-byte Folded Reload
+	movq	%r9, %rcx
+	adcq	-120(%rsp), %r15                # 8-byte Folded Reload
+	movq	-72(%rsp), %rax                 # 8-byte Reload
+	movq	32(%rax), %rbp
+	movq	%rbp, %rax
+	mulq	-80(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -120(%rsp)                # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-88(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r11
-	movq	%rax, -16(%rsp)         # 8-byte Spill
+	movq	%rax, -128(%rsp)                # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r9
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	8(%rsp)                         # 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r8
+	movq	%rbp, %rax
+	mulq	(%rsp)                          # 8-byte Folded Reload
+	movq	%rax, %r13
+	movq	%rdx, %rbp
+	addq	%r8, %rbp
+	adcq	-104(%rsp), %rbx                # 8-byte Folded Reload
+	adcq	-96(%rsp), %rsi                 # 8-byte Folded Reload
+	adcq	-128(%rsp), %r9                 # 8-byte Folded Reload
+	adcq	-120(%rsp), %r11                # 8-byte Folded Reload
+	adcq	$0, %r14
+	addq	%rdi, %r13
+	adcq	%r12, %rbp
+	adcq	%r10, %rbx
+	adcq	-112(%rsp), %rsi                # 8-byte Folded Reload
+	adcq	%rcx, %r9
+	adcq	%r15, %r11
+	adcq	$0, %r14
+	movq	-48(%rsp), %rcx                 # 8-byte Reload
+	imulq	%r13, %rcx
 	movq	%rcx, %rax
-	mulq	40(%rsi)
-	movq	%rdx, %r12
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -96(%rsp)                 # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                # 8-byte Spill
 	movq	%rax, %r15
 	movq	%rcx, %rax
-	mulq	32(%rsi)
-	movq	%rdx, %rbx
-	movq	%rax, %r13
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rax, %r10
 	movq	%rcx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, %rbp
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
 	movq	%rax, %r8
 	movq	%rcx, %rax
-	mulq	16(%rsi)
-	movq	%rdx, %r9
-	movq	%rax, %r14
+	mulq	16(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, %rdi
 	movq	%rcx, %rax
-	mulq	8(%rsi)
-	addq	-24(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, 8(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r8, %r9
-	movq	%r9, 24(%rdi)
-	adcq	%r13, %rbp
-	movq	%rbp, 32(%rdi)
-	adcq	%r15, %rbx
-	movq	%rbx, 40(%rdi)
-	adcq	-16(%rsp), %r12         # 8-byte Folded Reload
-	movq	%r12, 48(%rdi)
-	adcq	-8(%rsp), %r11          # 8-byte Folded Reload
-	movq	%r11, 56(%rdi)
-	adcq	$0, %r10
-	movq	%r10, 64(%rdi)
-	movq	%rdi, %rax
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end110:
-	.size	.LmulPv512x64, .Lfunc_end110-.LmulPv512x64
-
-	.globl	mcl_fp_mulUnitPre8L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre8L,@function
-mcl_fp_mulUnitPre8L:                    # @mcl_fp_mulUnitPre8L
-# BB#0:
-	pushq	%rbx
-	subq	$80, %rsp
-	movq	%rdi, %rbx
-	leaq	8(%rsp), %rdi
-	callq	.LmulPv512x64
-	movq	72(%rsp), %r8
-	movq	64(%rsp), %r9
-	movq	56(%rsp), %r10
-	movq	48(%rsp), %r11
-	movq	40(%rsp), %rdi
-	movq	32(%rsp), %rax
-	movq	24(%rsp), %rcx
-	movq	8(%rsp), %rdx
-	movq	16(%rsp), %rsi
-	movq	%rdx, (%rbx)
-	movq	%rsi, 8(%rbx)
-	movq	%rcx, 16(%rbx)
-	movq	%rax, 24(%rbx)
-	movq	%rdi, 32(%rbx)
-	movq	%r11, 40(%rbx)
-	movq	%r10, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	movq	%r8, 64(%rbx)
-	addq	$80, %rsp
-	popq	%rbx
-	retq
-.Lfunc_end111:
-	.size	mcl_fp_mulUnitPre8L, .Lfunc_end111-mcl_fp_mulUnitPre8L
-
-	.globl	mcl_fpDbl_mulPre8L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre8L,@function
-mcl_fpDbl_mulPre8L:                     # @mcl_fpDbl_mulPre8L
-# BB#0:
-	pushq	%rbp
-	movq	%rsp, %rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$200, %rsp
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	addq	%r13, %rax
+	adcq	%rbp, %rdi
+	adcq	%rbx, %r8
+	adcq	%rsi, %r10
+	adcq	%r9, %r15
+	movq	-112(%rsp), %rcx                # 8-byte Reload
+	adcq	%r11, %rcx
+	adcq	$0, %r14
+	addq	%rdx, %rdi
+	adcq	%r12, %r8
+	adcq	-128(%rsp), %r10                # 8-byte Folded Reload
+	movq	%r10, -128(%rsp)                # 8-byte Spill
+	adcq	-120(%rsp), %r15                # 8-byte Folded Reload
+	movq	%r15, -120(%rsp)                # 8-byte Spill
+	adcq	-104(%rsp), %rcx                # 8-byte Folded Reload
+	movq	%rcx, -112(%rsp)                # 8-byte Spill
+	adcq	-96(%rsp), %r14                 # 8-byte Folded Reload
+	movq	-72(%rsp), %rax                 # 8-byte Reload
+	movq	40(%rax), %rcx
+	movq	%rcx, %rax
+	mulq	-80(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %rbx
-	movq	%rsi, %r15
-	movq	%rdi, %r14
-	callq	mcl_fpDbl_mulPre4L@PLT
-	leaq	64(%r14), %rdi
-	leaq	32(%r15), %rsi
-	leaq	32(%rbx), %rdx
-	callq	mcl_fpDbl_mulPre4L@PLT
-	movq	56(%rbx), %r10
-	movq	48(%rbx), %rcx
-	movq	(%rbx), %rdx
-	movq	8(%rbx), %rsi
-	addq	32(%rbx), %rdx
-	adcq	40(%rbx), %rsi
-	adcq	16(%rbx), %rcx
-	adcq	24(%rbx), %r10
-	pushfq
-	popq	%r8
-	xorl	%r9d, %r9d
-	movq	56(%r15), %rdi
-	movq	48(%r15), %r13
-	movq	(%r15), %r12
-	movq	8(%r15), %rbx
-	addq	32(%r15), %r12
-	adcq	40(%r15), %rbx
-	adcq	16(%r15), %r13
-	adcq	24(%r15), %rdi
-	movl	$0, %eax
-	cmovbq	%r10, %rax
-	movq	%rax, -176(%rbp)        # 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rcx, %rax
-	movq	%rax, -184(%rbp)        # 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rsi, %rax
-	movq	%rax, -192(%rbp)        # 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rdx, %rax
-	movq	%rax, -200(%rbp)        # 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%r12, -136(%rbp)
-	movq	%rbx, -128(%rbp)
-	movq	%r13, -120(%rbp)
-	movq	%rdi, -112(%rbp)
-	movq	%rdx, -168(%rbp)
-	movq	%rsi, -160(%rbp)
-	movq	%rcx, -152(%rbp)
-	movq	%r10, -144(%rbp)
-	pushq	%r8
-	popfq
-	cmovaeq	%r9, %rdi
-	movq	%rdi, -216(%rbp)        # 8-byte Spill
-	cmovaeq	%r9, %r13
-	cmovaeq	%r9, %rbx
-	cmovaeq	%r9, %r12
-	sbbq	%rax, %rax
-	movq	%rax, -208(%rbp)        # 8-byte Spill
-	leaq	-104(%rbp), %rdi
-	leaq	-136(%rbp), %rsi
-	leaq	-168(%rbp), %rdx
-	callq	mcl_fpDbl_mulPre4L@PLT
-	addq	-200(%rbp), %r12        # 8-byte Folded Reload
-	adcq	-192(%rbp), %rbx        # 8-byte Folded Reload
-	adcq	-184(%rbp), %r13        # 8-byte Folded Reload
-	movq	-216(%rbp), %r10        # 8-byte Reload
-	adcq	-176(%rbp), %r10        # 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	-208(%rbp), %rdx        # 8-byte Reload
-	andl	%edx, %r15d
-	andl	$1, %r15d
-	addq	-72(%rbp), %r12
-	adcq	-64(%rbp), %rbx
-	adcq	-56(%rbp), %r13
-	adcq	-48(%rbp), %r10
-	adcq	%rax, %r15
-	movq	-80(%rbp), %rax
-	movq	-88(%rbp), %rcx
-	movq	-104(%rbp), %rsi
-	movq	-96(%rbp), %rdx
-	subq	(%r14), %rsi
-	sbbq	8(%r14), %rdx
-	sbbq	16(%r14), %rcx
-	sbbq	24(%r14), %rax
-	movq	32(%r14), %rdi
-	movq	%rdi, -184(%rbp)        # 8-byte Spill
-	movq	40(%r14), %r8
-	movq	%r8, -176(%rbp)         # 8-byte Spill
-	sbbq	%rdi, %r12
-	sbbq	%r8, %rbx
-	movq	48(%r14), %rdi
-	movq	%rdi, -192(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %r13
-	movq	56(%r14), %rdi
-	movq	%rdi, -200(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %r10
-	sbbq	$0, %r15
-	movq	64(%r14), %r11
-	subq	%r11, %rsi
-	movq	72(%r14), %rdi
-	movq	%rdi, -208(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %rdx
-	movq	80(%r14), %rdi
-	movq	%rdi, -216(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %rcx
-	movq	88(%r14), %rdi
-	movq	%rdi, -224(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %rax
-	movq	96(%r14), %rdi
-	movq	%rdi, -232(%rbp)        # 8-byte Spill
-	sbbq	%rdi, %r12
-	movq	104(%r14), %rdi
-	sbbq	%rdi, %rbx
-	movq	112(%r14), %r8
-	sbbq	%r8, %r13
-	movq	120(%r14), %r9
-	sbbq	%r9, %r10
-	sbbq	$0, %r15
-	addq	-184(%rbp), %rsi        # 8-byte Folded Reload
-	adcq	-176(%rbp), %rdx        # 8-byte Folded Reload
-	movq	%rsi, 32(%r14)
-	adcq	-192(%rbp), %rcx        # 8-byte Folded Reload
-	movq	%rdx, 40(%r14)
-	adcq	-200(%rbp), %rax        # 8-byte Folded Reload
-	movq	%rcx, 48(%r14)
-	adcq	%r11, %r12
-	movq	%rax, 56(%r14)
-	movq	%r12, 64(%r14)
-	adcq	-208(%rbp), %rbx        # 8-byte Folded Reload
-	movq	%rbx, 72(%r14)
-	adcq	-216(%rbp), %r13        # 8-byte Folded Reload
-	movq	%r13, 80(%r14)
-	adcq	-224(%rbp), %r10        # 8-byte Folded Reload
-	movq	%r10, 88(%r14)
-	adcq	-232(%rbp), %r15        # 8-byte Folded Reload
-	movq	%r15, 96(%r14)
-	adcq	$0, %rdi
-	movq	%rdi, 104(%r14)
-	adcq	$0, %r8
-	movq	%r8, 112(%r14)
-	adcq	$0, %r9
-	movq	%r9, 120(%r14)
-	addq	$200, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end112:
-	.size	mcl_fpDbl_mulPre8L, .Lfunc_end112-mcl_fpDbl_mulPre8L
-
-	.globl	mcl_fpDbl_sqrPre8L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre8L,@function
-mcl_fpDbl_sqrPre8L:                     # @mcl_fpDbl_sqrPre8L
-# BB#0:
-	pushq	%rbp
-	movq	%rsp, %rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$200, %rsp
-	movq	%rsi, %r14
-	movq	%rdi, %rbx
-	movq	%r14, %rdx
-	callq	mcl_fpDbl_mulPre4L@PLT
-	leaq	64(%rbx), %rdi
-	leaq	32(%r14), %rsi
-	movq	%rsi, %rdx
-	callq	mcl_fpDbl_mulPre4L@PLT
-	movq	(%r14), %r12
-	movq	8(%r14), %r15
-	addq	32(%r14), %r12
-	adcq	40(%r14), %r15
-	pushfq
-	popq	%rax
-	movq	%r12, -136(%rbp)
-	movq	%r12, -168(%rbp)
-	addq	%r12, %r12
-	movq	%r15, -128(%rbp)
-	movq	%r15, -160(%rbp)
-	adcq	%r15, %r15
-	pushfq
-	popq	%rcx
-	movq	56(%r14), %r13
-	movq	48(%r14), %rdx
-	pushq	%rax
-	popfq
-	adcq	16(%r14), %rdx
-	adcq	24(%r14), %r13
-	pushfq
-	popq	%r8
-	pushfq
-	popq	%rsi
-	pushfq
-	popq	%rdi
-	sbbq	%rax, %rax
-	movq	%rax, -184(%rbp)        # 8-byte Spill
-	xorl	%eax, %eax
-	pushq	%rdi
-	popfq
-	cmovaeq	%rax, %r15
-	movq	%r15, -176(%rbp)        # 8-byte Spill
-	cmovaeq	%rax, %r12
-	movq	%rdx, -120(%rbp)
-	movq	%rdx, -152(%rbp)
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-88(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-56(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r15
-	pushq	%rcx
-	popfq
-	adcq	%r15, %r15
-	movq	%r13, %r14
-	movq	%r13, -112(%rbp)
-	movq	%r13, -144(%rbp)
-	adcq	%r13, %r13
-	pushq	%rsi
-	popfq
-	cmovaeq	%rax, %r13
-	cmovaeq	%rax, %r15
-	shrq	$63, %r14
-	pushq	%r8
-	popfq
-	cmovaeq	%rax, %r14
-	leaq	-104(%rbp), %rdi
-	leaq	-136(%rbp), %rsi
-	leaq	-168(%rbp), %rdx
-	callq	mcl_fpDbl_mulPre4L@PLT
-	movq	-184(%rbp), %rax        # 8-byte Reload
-	andl	$1, %eax
-	addq	-72(%rbp), %r12
-	movq	-176(%rbp), %r8         # 8-byte Reload
-	adcq	-64(%rbp), %r8
-	adcq	-56(%rbp), %r15
-	adcq	-48(%rbp), %r13
-	adcq	%r14, %rax
-	movq	%rax, %rdi
-	movq	-80(%rbp), %rax
-	movq	-88(%rbp), %rcx
-	movq	-104(%rbp), %rsi
-	movq	-96(%rbp), %rdx
-	subq	(%rbx), %rsi
-	sbbq	8(%rbx), %rdx
-	sbbq	16(%rbx), %rcx
-	sbbq	24(%rbx), %rax
-	movq	32(%rbx), %r10
-	movq	%r10, -184(%rbp)        # 8-byte Spill
-	movq	40(%rbx), %r9
-	movq	%r9, -176(%rbp)         # 8-byte Spill
-	sbbq	%r10, %r12
-	sbbq	%r9, %r8
-	movq	%r8, %r10
-	movq	48(%rbx), %r8
-	movq	%r8, -192(%rbp)         # 8-byte Spill
-	sbbq	%r8, %r15
-	movq	56(%rbx), %r8
-	movq	%r8, -200(%rbp)         # 8-byte Spill
-	sbbq	%r8, %r13
-	sbbq	$0, %rdi
-	movq	64(%rbx), %r11
-	subq	%r11, %rsi
-	movq	72(%rbx), %r8
-	movq	%r8, -208(%rbp)         # 8-byte Spill
-	sbbq	%r8, %rdx
-	movq	80(%rbx), %r8
-	movq	%r8, -216(%rbp)         # 8-byte Spill
-	sbbq	%r8, %rcx
-	movq	88(%rbx), %r8
-	movq	%r8, -224(%rbp)         # 8-byte Spill
-	sbbq	%r8, %rax
-	movq	96(%rbx), %r8
-	movq	%r8, -232(%rbp)         # 8-byte Spill
-	sbbq	%r8, %r12
-	movq	104(%rbx), %r14
-	sbbq	%r14, %r10
-	movq	112(%rbx), %r8
-	sbbq	%r8, %r15
-	movq	120(%rbx), %r9
-	sbbq	%r9, %r13
-	sbbq	$0, %rdi
-	addq	-184(%rbp), %rsi        # 8-byte Folded Reload
-	adcq	-176(%rbp), %rdx        # 8-byte Folded Reload
-	movq	%rsi, 32(%rbx)
-	adcq	-192(%rbp), %rcx        # 8-byte Folded Reload
-	movq	%rdx, 40(%rbx)
-	adcq	-200(%rbp), %rax        # 8-byte Folded Reload
-	movq	%rcx, 48(%rbx)
-	adcq	%r11, %r12
-	movq	%rax, 56(%rbx)
-	movq	%r12, 64(%rbx)
-	adcq	-208(%rbp), %r10        # 8-byte Folded Reload
-	movq	%r10, 72(%rbx)
-	adcq	-216(%rbp), %r15        # 8-byte Folded Reload
-	movq	%r15, 80(%rbx)
-	adcq	-224(%rbp), %r13        # 8-byte Folded Reload
-	movq	%r13, 88(%rbx)
-	adcq	-232(%rbp), %rdi        # 8-byte Folded Reload
-	movq	%rdi, 96(%rbx)
-	adcq	$0, %r14
-	movq	%r14, 104(%rbx)
-	adcq	$0, %r8
-	movq	%r8, 112(%rbx)
-	adcq	$0, %r9
-	movq	%r9, 120(%rbx)
-	addq	$200, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end113:
-	.size	mcl_fpDbl_sqrPre8L, .Lfunc_end113-mcl_fpDbl_sqrPre8L
-
-	.globl	mcl_fp_mont8L
-	.align	16, 0x90
-	.type	mcl_fp_mont8L,@function
-mcl_fp_mont8L:                          # @mcl_fp_mont8L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$1256, %rsp             # imm = 0x4E8
-	movq	%rcx, %r13
-	movq	%r13, 40(%rsp)          # 8-byte Spill
-	movq	%rdx, 16(%rsp)          # 8-byte Spill
-	movq	%rsi, 24(%rsp)          # 8-byte Spill
-	movq	%rdi, (%rsp)            # 8-byte Spill
-	movq	-8(%r13), %rbx
-	movq	%rbx, 32(%rsp)          # 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1184(%rsp), %rdi
-	callq	.LmulPv512x64
-	movq	1184(%rsp), %r15
-	movq	1192(%rsp), %r14
-	movq	%r15, %rdx
-	imulq	%rbx, %rdx
-	movq	1248(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	1240(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	1232(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	1224(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	1216(%rsp), %r12
-	movq	1208(%rsp), %rbx
-	movq	1200(%rsp), %rbp
-	leaq	1112(%rsp), %rdi
-	movq	%r13, %rsi
-	callq	.LmulPv512x64
-	addq	1112(%rsp), %r15
-	adcq	1120(%rsp), %r14
-	adcq	1128(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           # 8-byte Spill
-	adcq	1136(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	adcq	1144(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %r13          # 8-byte Reload
-	adcq	1152(%rsp), %r13
-	movq	88(%rsp), %rbx          # 8-byte Reload
-	adcq	1160(%rsp), %rbx
-	movq	80(%rsp), %rbp          # 8-byte Reload
-	adcq	1168(%rsp), %rbp
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	1176(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	sbbq	%r15, %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1040(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %r15d
-	addq	1040(%rsp), %r14
-	movq	8(%rsp), %rax           # 8-byte Reload
-	adcq	1048(%rsp), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	48(%rsp), %rax          # 8-byte Reload
-	adcq	1056(%rsp), %rax
-	movq	%rax, %r12
-	movq	64(%rsp), %rax          # 8-byte Reload
-	adcq	1064(%rsp), %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	adcq	1072(%rsp), %r13
-	movq	%r13, 72(%rsp)          # 8-byte Spill
-	adcq	1080(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	adcq	1088(%rsp), %rbp
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	1096(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	adcq	1104(%rsp), %r15
-	movq	%r15, 56(%rsp)          # 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%r14, %rdx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	968(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %r15d
-	addq	968(%rsp), %r14
-	movq	8(%rsp), %r13           # 8-byte Reload
-	adcq	976(%rsp), %r13
-	adcq	984(%rsp), %r12
-	movq	%r12, 48(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r14          # 8-byte Reload
-	adcq	992(%rsp), %r14
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	adcq	1000(%rsp), %rbx
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	1008(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	adcq	1016(%rsp), %rbp
-	movq	%rbp, %r12
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	1024(%rsp), %rbp
-	movq	56(%rsp), %rax          # 8-byte Reload
-	adcq	1032(%rsp), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	896(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	%r13, %rcx
-	addq	896(%rsp), %rcx
-	movq	48(%rsp), %r13          # 8-byte Reload
-	adcq	904(%rsp), %r13
-	adcq	912(%rsp), %r14
-	adcq	920(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	928(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	adcq	936(%rsp), %r12
-	movq	%r12, 80(%rsp)          # 8-byte Spill
-	adcq	944(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	movq	56(%rsp), %r12          # 8-byte Reload
-	adcq	952(%rsp), %r12
-	adcq	960(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	%rcx, %rdx
-	movq	%rcx, %rbp
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	824(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %ebx
-	addq	824(%rsp), %rbp
-	adcq	832(%rsp), %r13
-	movq	%r13, 48(%rsp)          # 8-byte Spill
-	adcq	840(%rsp), %r14
-	movq	%r14, 64(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %r13          # 8-byte Reload
-	adcq	848(%rsp), %r13
-	movq	88(%rsp), %rbp          # 8-byte Reload
-	adcq	856(%rsp), %rbp
-	movq	80(%rsp), %r14          # 8-byte Reload
-	adcq	864(%rsp), %r14
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	872(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	adcq	880(%rsp), %r12
-	adcq	888(%rsp), %r15
-	adcq	$0, %rbx
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	752(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	48(%rsp), %rax          # 8-byte Reload
-	addq	752(%rsp), %rax
-	movq	64(%rsp), %rcx          # 8-byte Reload
-	adcq	760(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 72(%rsp)          # 8-byte Spill
-	adcq	776(%rsp), %rbp
-	movq	%rbp, 88(%rsp)          # 8-byte Spill
-	adcq	784(%rsp), %r14
-	movq	%r14, 80(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	792(%rsp), %rbp
-	adcq	800(%rsp), %r12
-	adcq	808(%rsp), %r15
-	adcq	816(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	680(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	%r13, %rax
-	andl	$1, %eax
-	addq	680(%rsp), %rbx
-	movq	64(%rsp), %r14          # 8-byte Reload
-	adcq	688(%rsp), %r14
-	movq	72(%rsp), %rcx          # 8-byte Reload
-	adcq	696(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %r13          # 8-byte Reload
-	adcq	704(%rsp), %r13
-	movq	80(%rsp), %rbx          # 8-byte Reload
-	adcq	712(%rsp), %rbx
-	adcq	720(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	movq	%r12, %rbp
-	adcq	728(%rsp), %rbp
-	adcq	736(%rsp), %r15
-	movq	48(%rsp), %r12          # 8-byte Reload
-	adcq	744(%rsp), %r12
-	adcq	$0, %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	608(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
+	movq	%rax, -88(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	8(%rsp)                         # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, %rbp
+	movq	%rcx, %rax
+	mulq	-8(%rsp)                        # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rsi
+	movq	%rcx, %rax
+	mulq	(%rsp)                          # 8-byte Folded Reload
+	movq	%rax, %r10
+	movq	%rdx, %r9
+	addq	%rsi, %r9
+	adcq	%rbp, %r13
+	adcq	-88(%rsp), %r12                 # 8-byte Folded Reload
+	adcq	-80(%rsp), %r15                 # 8-byte Folded Reload
+	adcq	-72(%rsp), %r11                 # 8-byte Folded Reload
+	adcq	$0, %rbx
+	addq	%rdi, %r10
+	adcq	%r8, %r9
+	adcq	-128(%rsp), %r13                # 8-byte Folded Reload
+	adcq	-120(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-112(%rsp), %r15                # 8-byte Folded Reload
+	adcq	%r14, %r11
+	adcq	$0, %rbx
+	movq	-48(%rsp), %r14                 # 8-byte Reload
+	imulq	%r10, %r14
 	movq	%r14, %rax
-	addq	608(%rsp), %rax
-	movq	72(%rsp), %r14          # 8-byte Reload
-	adcq	616(%rsp), %r14
-	adcq	624(%rsp), %r13
-	movq	%r13, 88(%rsp)          # 8-byte Spill
-	adcq	632(%rsp), %rbx
-	movq	%rbx, %r13
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	640(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	adcq	648(%rsp), %rbp
-	movq	%rbp, 56(%rsp)          # 8-byte Spill
-	adcq	656(%rsp), %r15
-	adcq	664(%rsp), %r12
-	movq	%r12, 48(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %rcx          # 8-byte Reload
-	adcq	672(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
-	sbbq	%rbp, %rbp
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	536(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	%rbp, %rax
-	andl	$1, %eax
-	addq	536(%rsp), %rbx
-	adcq	544(%rsp), %r14
-	movq	%r14, 72(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %rbx          # 8-byte Reload
-	adcq	552(%rsp), %rbx
-	adcq	560(%rsp), %r13
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	568(%rsp), %rbp
-	movq	56(%rsp), %r12          # 8-byte Reload
-	adcq	576(%rsp), %r12
-	adcq	584(%rsp), %r15
-	movq	48(%rsp), %rcx          # 8-byte Reload
-	adcq	592(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r14          # 8-byte Reload
-	adcq	600(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	464(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	72(%rsp), %rax          # 8-byte Reload
-	addq	464(%rsp), %rax
-	adcq	472(%rsp), %rbx
-	adcq	480(%rsp), %r13
-	movq	%r13, 80(%rsp)          # 8-byte Spill
-	adcq	488(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	adcq	496(%rsp), %r12
-	adcq	504(%rsp), %r15
-	movq	%r15, 72(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r15          # 8-byte Reload
-	adcq	512(%rsp), %r15
-	adcq	520(%rsp), %r14
-	movq	%r14, 64(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	528(%rsp), %r14
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -48(%rsp)                 # 8-byte Spill
+	movq	%rax, %rdi
+	movq	%r14, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -72(%rsp)                 # 8-byte Spill
 	movq	%rax, %rbp
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	392(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	%r13, %rax
-	andl	$1, %eax
-	addq	392(%rsp), %rbp
-	adcq	400(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %rbp          # 8-byte Reload
-	adcq	408(%rsp), %rbp
-	movq	96(%rsp), %rbx          # 8-byte Reload
-	adcq	416(%rsp), %rbx
-	adcq	424(%rsp), %r12
-	movq	72(%rsp), %r13          # 8-byte Reload
-	adcq	432(%rsp), %r13
-	adcq	440(%rsp), %r15
-	movq	%r15, 48(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r15          # 8-byte Reload
-	adcq	448(%rsp), %r15
-	adcq	456(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	320(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	88(%rsp), %rax          # 8-byte Reload
-	addq	320(%rsp), %rax
-	adcq	328(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	adcq	336(%rsp), %rbx
-	movq	%rbx, 96(%rsp)          # 8-byte Spill
-	movq	%r12, %rbp
-	adcq	344(%rsp), %rbp
-	adcq	352(%rsp), %r13
-	movq	48(%rsp), %r12          # 8-byte Reload
-	adcq	360(%rsp), %r12
-	adcq	368(%rsp), %r15
-	movq	%r15, 64(%rsp)          # 8-byte Spill
-	adcq	376(%rsp), %r14
-	movq	%r14, 88(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %rcx          # 8-byte Reload
-	adcq	384(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	248(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %r15d
-	addq	248(%rsp), %rbx
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	256(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %r14          # 8-byte Reload
-	adcq	264(%rsp), %r14
-	adcq	272(%rsp), %rbp
-	movq	%rbp, 56(%rsp)          # 8-byte Spill
-	movq	%r13, %rbx
-	adcq	280(%rsp), %rbx
-	movq	%r12, %rbp
-	adcq	288(%rsp), %rbp
-	movq	64(%rsp), %r13          # 8-byte Reload
-	adcq	296(%rsp), %r13
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	304(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	312(%rsp), %r12
-	adcq	$0, %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	176(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	80(%rsp), %rax          # 8-byte Reload
-	addq	176(%rsp), %rax
-	adcq	184(%rsp), %r14
-	movq	%r14, 96(%rsp)          # 8-byte Spill
-	movq	56(%rsp), %rcx          # 8-byte Reload
-	adcq	192(%rsp), %rcx
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	200(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	adcq	208(%rsp), %rbp
-	adcq	216(%rsp), %r13
-	movq	%r13, 64(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	224(%rsp), %r14
-	adcq	232(%rsp), %r12
-	adcq	240(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	32(%rsp), %rdx          # 8-byte Reload
-	imulq	%rax, %rdx
-	movq	%rax, %r13
-	leaq	104(%rsp), %rdi
-	movq	40(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %ebx
-	addq	104(%rsp), %r13
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	112(%rsp), %rcx
-	movq	56(%rsp), %rdx          # 8-byte Reload
-	adcq	120(%rsp), %rdx
-	movq	72(%rsp), %rsi          # 8-byte Reload
-	adcq	128(%rsp), %rsi
-	movq	%rbp, %rdi
-	adcq	136(%rsp), %rdi
-	movq	%rdi, 48(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r8           # 8-byte Reload
-	adcq	144(%rsp), %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	%r14, %r9
-	adcq	152(%rsp), %r9
-	movq	%r9, 88(%rsp)           # 8-byte Spill
-	adcq	160(%rsp), %r12
-	adcq	168(%rsp), %r15
+	movq	%r14, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -80(%rsp)                 # 8-byte Spill
+	movq	%rax, %rcx
+	movq	%r14, %rax
+	mulq	-32(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rax, %r8
+	movq	%r14, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -56(%rsp)                 # 8-byte Spill
+	movq	%rax, %rsi
+	movq	%r14, %rax
+	movq	16(%rsp), %r14                  # 8-byte Reload
+	mulq	%r14
+	addq	%r10, %r8
+	adcq	%r9, %rax
+	adcq	%r13, %rsi
+	adcq	%r12, %rcx
+	adcq	%r15, %rbp
+	adcq	%r11, %rdi
 	adcq	$0, %rbx
-	movq	%rcx, %rax
-	movq	%rcx, %r11
-	movq	40(%rsp), %rbp          # 8-byte Reload
-	subq	(%rbp), %rax
-	movq	%rdx, %rcx
-	movq	%rdx, %r14
-	sbbq	8(%rbp), %rcx
-	movq	%rsi, %rdx
-	movq	%rsi, %r13
-	sbbq	16(%rbp), %rdx
-	movq	%rdi, %rsi
-	sbbq	24(%rbp), %rsi
-	movq	%r8, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%r9, %r10
-	sbbq	40(%rbp), %r10
-	movq	%r12, %r8
-	sbbq	48(%rbp), %r8
-	movq	%r15, %r9
-	sbbq	56(%rbp), %r9
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%r15, %r9
-	testb	%bl, %bl
-	cmovneq	%r11, %rax
-	movq	(%rsp), %rbx            # 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovneq	%r14, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovneq	%r13, %rdx
-	movq	%rdx, 16(%rbx)
-	cmovneq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovneq	64(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rbx)
-	cmovneq	88(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 40(%rbx)
-	cmovneq	%r12, %r8
-	movq	%r8, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	addq	$1256, %rsp             # imm = 0x4E8
+	addq	-88(%rsp), %rax                 # 8-byte Folded Reload
+	adcq	%rdx, %rsi
+	adcq	-56(%rsp), %rcx                 # 8-byte Folded Reload
+	adcq	-80(%rsp), %rbp                 # 8-byte Folded Reload
+	adcq	-72(%rsp), %rdi                 # 8-byte Folded Reload
+	adcq	-48(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%rax, %r8
+	subq	-32(%rsp), %r8                  # 8-byte Folded Reload
+	movq	%rsi, %r9
+	sbbq	%r14, %r9
+	movq	%rcx, %r10
+	sbbq	-40(%rsp), %r10                 # 8-byte Folded Reload
+	movq	%rbp, %r11
+	sbbq	-24(%rsp), %r11                 # 8-byte Folded Reload
+	movq	%rdi, %r14
+	sbbq	-16(%rsp), %r14                 # 8-byte Folded Reload
+	movq	%rbx, %r15
+	sbbq	-64(%rsp), %r15                 # 8-byte Folded Reload
+	movq	%r15, %rdx
+	sarq	$63, %rdx
+	cmovsq	%rbx, %r15
+	movq	32(%rsp), %rdx                  # 8-byte Reload
+	movq	%r15, 40(%rdx)
+	cmovsq	%rdi, %r14
+	movq	%r14, 32(%rdx)
+	cmovsq	%rbp, %r11
+	movq	%r11, 24(%rdx)
+	cmovsq	%rcx, %r10
+	movq	%r10, 16(%rdx)
+	cmovsq	%rsi, %r9
+	movq	%r9, 8(%rdx)
+	cmovsq	%rax, %r8
+	movq	%r8, (%rdx)
+	addq	$40, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -12248,397 +4631,335 @@ mcl_fp_mont8L:                          # @mcl_fp_mont8L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end114:
-	.size	mcl_fp_mont8L, .Lfunc_end114-mcl_fp_mont8L
-
-	.globl	mcl_fp_montNF8L
-	.align	16, 0x90
-	.type	mcl_fp_montNF8L,@function
-mcl_fp_montNF8L:                        # @mcl_fp_montNF8L
-# BB#0:
+.Lfunc_end44:
+	.size	mcl_fp_montNF6L, .Lfunc_end44-mcl_fp_montNF6L
+                                        # -- End function
+	.globl	mcl_fp_montRed6L                # -- Begin function mcl_fp_montRed6L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed6L,@function
+mcl_fp_montRed6L:                       # @mcl_fp_montRed6L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$1240, %rsp             # imm = 0x4D8
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	movq	%rdx, 16(%rsp)          # 8-byte Spill
-	movq	%rsi, 24(%rsp)          # 8-byte Spill
-	movq	%rdi, (%rsp)            # 8-byte Spill
-	movq	-8(%rcx), %rbx
-	movq	%rbx, 32(%rsp)          # 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1168(%rsp), %rdi
-	callq	.LmulPv512x64
-	movq	1168(%rsp), %r15
-	movq	1176(%rsp), %r12
-	movq	%r15, %rdx
-	imulq	%rbx, %rdx
-	movq	1232(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	1224(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	1216(%rsp), %r13
-	movq	1208(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	1200(%rsp), %r14
-	movq	1192(%rsp), %rbp
-	movq	1184(%rsp), %rbx
-	leaq	1096(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	1096(%rsp), %r15
-	adcq	1104(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	adcq	1112(%rsp), %rbx
-	adcq	1120(%rsp), %rbp
-	adcq	1128(%rsp), %r14
-	movq	%r14, %r12
-	movq	72(%rsp), %r14          # 8-byte Reload
-	adcq	1136(%rsp), %r14
-	adcq	1144(%rsp), %r13
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	1152(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rax          # 8-byte Reload
-	adcq	1160(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1024(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	1088(%rsp), %r15
-	movq	64(%rsp), %rax          # 8-byte Reload
-	addq	1024(%rsp), %rax
-	adcq	1032(%rsp), %rbx
-	movq	%rbx, 8(%rsp)           # 8-byte Spill
-	movq	%rbp, %rbx
-	adcq	1040(%rsp), %rbx
-	adcq	1048(%rsp), %r12
-	adcq	1056(%rsp), %r14
-	movq	%r14, 72(%rsp)          # 8-byte Spill
-	movq	%r13, %rbp
-	adcq	1064(%rsp), %rbp
-	movq	80(%rsp), %rcx          # 8-byte Reload
-	adcq	1072(%rsp), %rcx
-	movq	%rcx, 80(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r14          # 8-byte Reload
-	adcq	1080(%rsp), %r14
-	adcq	$0, %r15
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	952(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	952(%rsp), %r13
-	movq	8(%rsp), %rax           # 8-byte Reload
-	adcq	960(%rsp), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	adcq	968(%rsp), %rbx
-	movq	%rbx, 64(%rsp)          # 8-byte Spill
-	movq	%r12, %rbx
-	adcq	976(%rsp), %rbx
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	984(%rsp), %r12
-	adcq	992(%rsp), %rbp
-	movq	%rbp, 40(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %r13          # 8-byte Reload
-	adcq	1000(%rsp), %r13
-	movq	%r14, %rbp
-	adcq	1008(%rsp), %rbp
-	adcq	1016(%rsp), %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	880(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	944(%rsp), %r14
-	movq	8(%rsp), %rax           # 8-byte Reload
-	addq	880(%rsp), %rax
-	movq	64(%rsp), %rcx          # 8-byte Reload
-	adcq	888(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
-	adcq	896(%rsp), %rbx
-	adcq	904(%rsp), %r12
-	movq	%r12, 72(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %rcx          # 8-byte Reload
-	adcq	912(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          # 8-byte Spill
-	adcq	920(%rsp), %r13
-	movq	%r13, 80(%rsp)          # 8-byte Spill
-	adcq	928(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          # 8-byte Spill
-	adcq	936(%rsp), %r15
-	adcq	$0, %r14
-	movq	%rax, %rdx
-	movq	%rax, %rbp
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	808(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	808(%rsp), %rbp
-	movq	64(%rsp), %r13          # 8-byte Reload
-	adcq	816(%rsp), %r13
-	movq	%rbx, %r12
-	adcq	824(%rsp), %r12
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	adcq	832(%rsp), %rbx
-	movq	40(%rsp), %rbp          # 8-byte Reload
-	adcq	840(%rsp), %rbp
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	848(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rax          # 8-byte Reload
-	adcq	856(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	adcq	864(%rsp), %r15
-	adcq	872(%rsp), %r14
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	736(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	800(%rsp), %rax
-	movq	%r13, %rcx
-	addq	736(%rsp), %rcx
-	adcq	744(%rsp), %r12
-	movq	%r12, 40(%rsp)          # 8-byte Spill
-	adcq	752(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	adcq	760(%rsp), %rbp
-	movq	%rbp, %r13
-	movq	80(%rsp), %rbp          # 8-byte Reload
-	adcq	768(%rsp), %rbp
-	movq	48(%rsp), %rbx          # 8-byte Reload
-	adcq	776(%rsp), %rbx
-	adcq	784(%rsp), %r15
-	adcq	792(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	%rcx, %rdx
-	movq	%rcx, %r12
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	664(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	664(%rsp), %r12
-	movq	40(%rsp), %rax          # 8-byte Reload
-	adcq	672(%rsp), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %rax          # 8-byte Reload
-	adcq	680(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	adcq	688(%rsp), %r13
-	adcq	696(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	adcq	704(%rsp), %rbx
-	adcq	712(%rsp), %r15
-	adcq	720(%rsp), %r14
-	movq	64(%rsp), %r12          # 8-byte Reload
-	adcq	728(%rsp), %r12
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	592(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	656(%rsp), %rcx
-	movq	40(%rsp), %rax          # 8-byte Reload
-	addq	592(%rsp), %rax
-	movq	72(%rsp), %rbp          # 8-byte Reload
-	adcq	600(%rsp), %rbp
-	adcq	608(%rsp), %r13
-	movq	%r13, 40(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %r13          # 8-byte Reload
-	adcq	616(%rsp), %r13
-	adcq	624(%rsp), %rbx
-	adcq	632(%rsp), %r15
-	adcq	640(%rsp), %r14
-	adcq	648(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, 80(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	520(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	520(%rsp), %r12
-	adcq	528(%rsp), %rbp
-	movq	%rbp, 72(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %r12          # 8-byte Reload
-	adcq	536(%rsp), %r12
-	movq	%r13, %rbp
-	adcq	544(%rsp), %rbp
-	adcq	552(%rsp), %rbx
-	adcq	560(%rsp), %r15
-	adcq	568(%rsp), %r14
-	movq	64(%rsp), %r13          # 8-byte Reload
-	adcq	576(%rsp), %r13
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	584(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	448(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	512(%rsp), %rcx
-	movq	72(%rsp), %rax          # 8-byte Reload
-	addq	448(%rsp), %rax
-	adcq	456(%rsp), %r12
-	movq	%r12, 40(%rsp)          # 8-byte Spill
-	adcq	464(%rsp), %rbp
-	adcq	472(%rsp), %rbx
-	adcq	480(%rsp), %r15
-	adcq	488(%rsp), %r14
-	adcq	496(%rsp), %r13
-	movq	%r13, 64(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %r13          # 8-byte Reload
-	adcq	504(%rsp), %r13
-	adcq	$0, %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	376(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	376(%rsp), %r12
-	movq	40(%rsp), %rax          # 8-byte Reload
-	adcq	384(%rsp), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	adcq	392(%rsp), %rbp
-	adcq	400(%rsp), %rbx
-	adcq	408(%rsp), %r15
-	adcq	416(%rsp), %r14
-	movq	64(%rsp), %r12          # 8-byte Reload
-	adcq	424(%rsp), %r12
-	adcq	432(%rsp), %r13
-	movq	72(%rsp), %rax          # 8-byte Reload
-	adcq	440(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	304(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	368(%rsp), %rcx
-	movq	40(%rsp), %rax          # 8-byte Reload
-	addq	304(%rsp), %rax
-	adcq	312(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	adcq	320(%rsp), %rbx
-	adcq	328(%rsp), %r15
-	adcq	336(%rsp), %r14
-	adcq	344(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	adcq	352(%rsp), %r13
-	movq	72(%rsp), %rbp          # 8-byte Reload
-	adcq	360(%rsp), %rbp
-	adcq	$0, %rcx
-	movq	%rcx, 48(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	232(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	232(%rsp), %r12
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	240(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	adcq	248(%rsp), %rbx
-	adcq	256(%rsp), %r15
-	adcq	264(%rsp), %r14
-	movq	64(%rsp), %r12          # 8-byte Reload
-	adcq	272(%rsp), %r12
-	adcq	280(%rsp), %r13
-	adcq	288(%rsp), %rbp
-	movq	%rbp, 72(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rbp          # 8-byte Reload
-	adcq	296(%rsp), %rbp
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	160(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	movq	224(%rsp), %rcx
-	movq	80(%rsp), %rax          # 8-byte Reload
-	addq	160(%rsp), %rax
-	adcq	168(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	adcq	176(%rsp), %r15
-	adcq	184(%rsp), %r14
-	adcq	192(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	adcq	200(%rsp), %r13
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	adcq	208(%rsp), %rbx
-	adcq	216(%rsp), %rbp
-	movq	%rbp, %r12
-	adcq	$0, %rcx
-	movq	%rcx, 80(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rdx          # 8-byte Reload
-	imulq	%rax, %rdx
+	pushq	%rax
+	movq	%rdx, %rcx
+	movq	%rdi, (%rsp)                    # 8-byte Spill
+	movq	-8(%rdx), %rax
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	(%rsi), %r9
+	movq	%r9, %rdi
+	imulq	%rax, %rdi
+	movq	40(%rdx), %rdx
+	movq	%rdx, -40(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, -128(%rsp)                # 8-byte Spill
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	32(%rcx), %rdx
+	movq	%rdx, -64(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r10
+	movq	%rdx, %r12
+	movq	24(%rcx), %rdx
+	movq	%rdx, -72(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r14
+	movq	%rdx, %r15
+	movq	16(%rcx), %rdx
+	movq	%rdx, -48(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r11
+	movq	%rdx, %r13
+	movq	(%rcx), %r8
+	movq	8(%rcx), %rcx
+	movq	%rcx, -24(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rcx
+	movq	%rdx, %rbx
 	movq	%rax, %rbp
-	leaq	88(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	88(%rsp), %rbp
-	movq	48(%rsp), %r11          # 8-byte Reload
-	adcq	96(%rsp), %r11
-	adcq	104(%rsp), %r15
-	adcq	112(%rsp), %r14
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	adcq	120(%rsp), %rsi
-	movq	%rsi, 64(%rsp)          # 8-byte Spill
-	adcq	128(%rsp), %r13
-	adcq	136(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	adcq	144(%rsp), %r12
-	movq	80(%rsp), %r8           # 8-byte Reload
-	adcq	152(%rsp), %r8
-	movq	%r11, %rax
-	movq	56(%rsp), %rbp          # 8-byte Reload
-	subq	(%rbp), %rax
-	movq	%r15, %rcx
-	sbbq	8(%rbp), %rcx
-	movq	%r14, %rdx
-	sbbq	16(%rbp), %rdx
-	sbbq	24(%rbp), %rsi
-	movq	%r13, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%rbx, %r9
-	sbbq	40(%rbp), %r9
-	movq	%r12, %r10
-	sbbq	48(%rbp), %r10
-	movq	%rbp, %rbx
-	movq	%r8, %rbp
-	sbbq	56(%rbx), %rbp
-	testq	%rbp, %rbp
-	cmovsq	%r11, %rax
-	movq	(%rsp), %rbx            # 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovsq	%r15, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovsq	%r14, %rdx
-	movq	%rdx, 16(%rbx)
-	cmovsq	64(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovsq	%r13, %rdi
-	movq	%rdi, 32(%rbx)
-	cmovsq	72(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 40(%rbx)
-	cmovsq	%r12, %r10
-	movq	%r10, 48(%rbx)
-	cmovsq	%r8, %rbp
-	movq	%rbp, 56(%rbx)
-	addq	$1240, %rsp             # imm = 0x4D8
+	movq	%rdi, %rax
+	mulq	%r8
+	movq	%r8, %rdi
+	movq	%r8, -16(%rsp)                  # 8-byte Spill
+	movq	%rdx, %rcx
+	addq	%rbp, %rcx
+	adcq	%r11, %rbx
+	adcq	%r14, %r13
+	adcq	%r10, %r15
+	adcq	-128(%rsp), %r12                # 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r9, %rax
+	movq	%rsi, -32(%rsp)                 # 8-byte Spill
+	adcq	8(%rsi), %rcx
+	adcq	16(%rsi), %rbx
+	adcq	24(%rsi), %r13
+	adcq	32(%rsi), %r15
+	adcq	40(%rsi), %r12
+	movq	%r12, -88(%rsp)                 # 8-byte Spill
+	adcq	48(%rsi), %rdx
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	setb	-96(%rsp)                       # 1-byte Folded Spill
+	movq	-80(%rsp), %rsi                 # 8-byte Reload
+	imulq	%rcx, %rsi
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r9
+	movq	%rsi, %rax
+	mulq	%rdi
+	movq	%rdx, %r10
+	movq	%rax, %r11
+	movq	%rsi, %rax
+	movq	-24(%rsp), %rsi                 # 8-byte Reload
+	mulq	%rsi
+	movq	%rdx, %rbp
+	movq	%rax, %rdi
+	addq	%r10, %rdi
+	adcq	%r9, %rbp
+	adcq	-56(%rsp), %r8                  # 8-byte Folded Reload
+	adcq	-112(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-104(%rsp), %r14                # 8-byte Folded Reload
+	movzbl	-96(%rsp), %eax                 # 1-byte Folded Reload
+	movq	-128(%rsp), %rdx                # 8-byte Reload
+	adcq	%rax, %rdx
+	addq	%rcx, %r11
+	adcq	%rbx, %rdi
+	adcq	%r13, %rbp
+	adcq	%r15, %r8
+	adcq	-88(%rsp), %r12                 # 8-byte Folded Reload
+	adcq	-120(%rsp), %r14                # 8-byte Folded Reload
+	movq	-32(%rsp), %rax                 # 8-byte Reload
+	adcq	56(%rax), %rdx
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	setb	-120(%rsp)                      # 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	imulq	%rdi, %rcx
+	movq	%rcx, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rbx
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	%rsi
+	movq	%rdx, %rcx
+	movq	%rax, %rsi
+	addq	%r10, %rsi
+	adcq	%rbx, %rcx
+	adcq	-112(%rsp), %r13                # 8-byte Folded Reload
+	adcq	-104(%rsp), %r15                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r11                 # 8-byte Folded Reload
+	movzbl	-120(%rsp), %eax                # 1-byte Folded Reload
+	movq	-88(%rsp), %rdx                 # 8-byte Reload
+	adcq	%rax, %rdx
+	addq	%rdi, %r9
+	adcq	%rbp, %rsi
+	adcq	%r8, %rcx
+	adcq	%r12, %r13
+	adcq	%r14, %r15
+	adcq	-128(%rsp), %r11                # 8-byte Folded Reload
+	movq	-32(%rsp), %rax                 # 8-byte Reload
+	adcq	64(%rax), %rdx
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	setb	-128(%rsp)                      # 1-byte Folded Spill
+	movq	-80(%rsp), %rdi                 # 8-byte Reload
+	imulq	%rsi, %rdi
+	movq	%rdi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -96(%rsp)                 # 8-byte Spill
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	movq	-48(%rsp), %r14                 # 8-byte Reload
+	mulq	%r14
+	movq	%rdx, %rbp
+	movq	%rax, %r9
+	movq	%rdi, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r10
+	movq	%rdi, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, %rbx
+	addq	%r8, %rbx
+	adcq	%r9, %rdi
+	adcq	-56(%rsp), %rbp                 # 8-byte Folded Reload
+	adcq	-112(%rsp), %r12                # 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                # 8-byte Reload
+	adcq	-104(%rsp), %rdx                # 8-byte Folded Reload
+	movzbl	-128(%rsp), %eax                # 1-byte Folded Reload
+	movq	-96(%rsp), %r8                  # 8-byte Reload
+	adcq	%rax, %r8
+	addq	%rsi, %r10
+	adcq	%rcx, %rbx
+	adcq	%r13, %rdi
+	adcq	%r15, %rbp
+	adcq	%r11, %r12
+	adcq	-88(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	-32(%rsp), %rax                 # 8-byte Reload
+	adcq	72(%rax), %r8
+	movq	%r8, -96(%rsp)                  # 8-byte Spill
+	setb	-104(%rsp)                      # 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	movq	-40(%rsp), %r9                  # 8-byte Reload
+	mulq	%r9
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r14
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %rsi
+	addq	%r10, %rsi
+	adcq	%r13, %r8
+	adcq	-8(%rsp), %r11                  # 8-byte Folded Reload
+	adcq	-56(%rsp), %r15                 # 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                # 8-byte Reload
+	adcq	-112(%rsp), %rdx                # 8-byte Folded Reload
+	movzbl	-104(%rsp), %eax                # 1-byte Folded Reload
+	movq	-88(%rsp), %rcx                 # 8-byte Reload
+	adcq	%rax, %rcx
+	addq	%rbx, %r14
+	adcq	%rdi, %rsi
+	adcq	%rbp, %r8
+	adcq	%r12, %r11
+	adcq	-120(%rsp), %r15                # 8-byte Folded Reload
+	adcq	-96(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	-32(%rsp), %rax                 # 8-byte Reload
+	adcq	80(%rax), %rcx
+	movq	%rcx, -88(%rsp)                 # 8-byte Spill
+	setb	-120(%rsp)                      # 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	imulq	%rsi, %rcx
+	movq	%rcx, %rax
+	mulq	%r9
+	movq	%rdx, -80(%rsp)                 # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	movq	-16(%rsp), %r13                 # 8-byte Reload
+	mulq	%r13
+	movq	%rdx, %r14
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r10
+	movq	%rcx, %rax
+	movq	-24(%rsp), %r12                 # 8-byte Reload
+	mulq	%r12
+	addq	%r14, %rax
+	adcq	%r10, %rdx
+	adcq	-112(%rsp), %rbx                # 8-byte Folded Reload
+	adcq	-104(%rsp), %rbp                # 8-byte Folded Reload
+	adcq	-96(%rsp), %rdi                 # 8-byte Folded Reload
+	movzbl	-120(%rsp), %r10d               # 1-byte Folded Reload
+	adcq	-80(%rsp), %r10                 # 8-byte Folded Reload
+	addq	%rsi, %r9
+	adcq	%r8, %rax
+	adcq	%r11, %rdx
+	adcq	%r15, %rbx
+	adcq	-128(%rsp), %rbp                # 8-byte Folded Reload
+	adcq	-88(%rsp), %rdi                 # 8-byte Folded Reload
+	movq	-32(%rsp), %rcx                 # 8-byte Reload
+	adcq	88(%rcx), %r10
+	xorl	%r8d, %r8d
+	movq	%rax, %r9
+	subq	%r13, %r9
+	movq	%rdx, %r11
+	sbbq	%r12, %r11
+	movq	%rbx, %r14
+	sbbq	-48(%rsp), %r14                 # 8-byte Folded Reload
+	movq	%rbp, %r15
+	sbbq	-72(%rsp), %r15                 # 8-byte Folded Reload
+	movq	%rdi, %r12
+	sbbq	-64(%rsp), %r12                 # 8-byte Folded Reload
+	movq	%r10, %rcx
+	sbbq	-40(%rsp), %rcx                 # 8-byte Folded Reload
+	sbbq	%r8, %r8
+	testb	$1, %r8b
+	cmovneq	%r10, %rcx
+	movq	(%rsp), %rsi                    # 8-byte Reload
+	movq	%rcx, 40(%rsi)
+	cmovneq	%rdi, %r12
+	movq	%r12, 32(%rsi)
+	cmovneq	%rbp, %r15
+	movq	%r15, 24(%rsi)
+	cmovneq	%rbx, %r14
+	movq	%r14, 16(%rsi)
+	cmovneq	%rdx, %r11
+	movq	%r11, 8(%rsi)
+	cmovneq	%rax, %r9
+	movq	%r9, (%rsi)
+	addq	$8, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -12646,374 +4967,334 @@ mcl_fp_montNF8L:                        # @mcl_fp_montNF8L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end115:
-	.size	mcl_fp_montNF8L, .Lfunc_end115-mcl_fp_montNF8L
-
-	.globl	mcl_fp_montRed8L
-	.align	16, 0x90
-	.type	mcl_fp_montRed8L,@function
-mcl_fp_montRed8L:                       # @mcl_fp_montRed8L
-# BB#0:
+.Lfunc_end45:
+	.size	mcl_fp_montRed6L, .Lfunc_end45-mcl_fp_montRed6L
+                                        # -- End function
+	.globl	mcl_fp_montRedNF6L              # -- Begin function mcl_fp_montRedNF6L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF6L,@function
+mcl_fp_montRedNF6L:                     # @mcl_fp_montRedNF6L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$776, %rsp              # imm = 0x308
-	movq	%rdx, %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	movq	%rdi, 72(%rsp)          # 8-byte Spill
-	movq	-8(%rax), %rcx
-	movq	%rcx, 128(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r15
-	movq	8(%rsi), %rdx
-	movq	%rdx, 184(%rsp)         # 8-byte Spill
-	movq	%r15, %rdx
-	imulq	%rcx, %rdx
-	movq	120(%rsi), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsi), %rcx
-	movq	%rcx, 136(%rsp)         # 8-byte Spill
-	movq	104(%rsi), %rcx
-	movq	%rcx, 120(%rsp)         # 8-byte Spill
-	movq	96(%rsi), %rcx
-	movq	%rcx, 168(%rsp)         # 8-byte Spill
-	movq	88(%rsi), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	movq	80(%rsi), %rcx
-	movq	%rcx, 160(%rsp)         # 8-byte Spill
-	movq	72(%rsi), %rcx
-	movq	%rcx, 192(%rsp)         # 8-byte Spill
-	movq	64(%rsi), %r13
-	movq	56(%rsi), %rcx
-	movq	%rcx, 144(%rsp)         # 8-byte Spill
-	movq	48(%rsi), %r14
-	movq	40(%rsi), %rcx
-	movq	%rcx, 152(%rsp)         # 8-byte Spill
-	movq	32(%rsi), %r12
-	movq	24(%rsi), %rbx
-	movq	16(%rsi), %rbp
-	movq	%rax, %rcx
-	movq	(%rcx), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	movq	56(%rcx), %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	48(%rcx), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	40(%rcx), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	24(%rcx), %rax
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	%rcx, %rsi
-	leaq	704(%rsp), %rdi
-	callq	.LmulPv512x64
-	addq	704(%rsp), %r15
-	movq	184(%rsp), %rcx         # 8-byte Reload
-	adcq	712(%rsp), %rcx
-	adcq	720(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	adcq	728(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	adcq	736(%rsp), %r12
-	movq	%r12, 104(%rsp)         # 8-byte Spill
-	movq	152(%rsp), %rax         # 8-byte Reload
-	adcq	744(%rsp), %rax
-	movq	%rax, 152(%rsp)         # 8-byte Spill
-	adcq	752(%rsp), %r14
-	movq	%r14, %r12
-	movq	144(%rsp), %rax         # 8-byte Reload
-	adcq	760(%rsp), %rax
-	movq	%rax, 144(%rsp)         # 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 184(%rsp)         # 8-byte Spill
-	adcq	$0, 192(%rsp)           # 8-byte Folded Spill
-	movq	160(%rsp), %r15         # 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 176(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 168(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 120(%rsp)           # 8-byte Folded Spill
-	movq	136(%rsp), %r13         # 8-byte Reload
-	adcq	$0, %r13
-	movq	96(%rsp), %r14          # 8-byte Reload
-	adcq	$0, %r14
-	sbbq	%rbx, %rbx
-	movq	%rcx, %rbp
-	movq	%rbp, %rdx
-	imulq	128(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	632(%rsp), %rdi
-	movq	112(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv512x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	632(%rsp), %rbp
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	adcq	640(%rsp), %rsi
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	648(%rsp), %rcx
-	movq	%rcx, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %rcx         # 8-byte Reload
-	adcq	656(%rsp), %rcx
-	movq	%rcx, 104(%rsp)         # 8-byte Spill
-	movq	152(%rsp), %rcx         # 8-byte Reload
-	adcq	664(%rsp), %rcx
-	movq	%rcx, 152(%rsp)         # 8-byte Spill
-	adcq	672(%rsp), %r12
-	movq	144(%rsp), %rcx         # 8-byte Reload
-	adcq	680(%rsp), %rcx
-	movq	%rcx, 144(%rsp)         # 8-byte Spill
-	movq	184(%rsp), %rcx         # 8-byte Reload
-	adcq	688(%rsp), %rcx
-	movq	%rcx, 184(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rcx         # 8-byte Reload
-	adcq	696(%rsp), %rcx
-	movq	%rcx, 192(%rsp)         # 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, 160(%rsp)         # 8-byte Spill
-	movq	176(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	movq	168(%rsp), %r15         # 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 120(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r13
-	movq	%r13, 136(%rsp)         # 8-byte Spill
-	adcq	$0, %r14
-	movq	%r14, 96(%rsp)          # 8-byte Spill
+	pushq	%rax
+	movq	%rdx, %rcx
+	movq	%rdi, (%rsp)                    # 8-byte Spill
+	movq	-8(%rdx), %rax
+	movq	%rax, -80(%rsp)                 # 8-byte Spill
+	movq	(%rsi), %r9
+	movq	%r9, %rdi
+	imulq	%rax, %rdi
+	movq	40(%rdx), %rdx
+	movq	%rdx, -40(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, -128(%rsp)                # 8-byte Spill
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	32(%rcx), %rdx
+	movq	%rdx, -64(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r10
+	movq	%rdx, %r12
+	movq	24(%rcx), %rdx
+	movq	%rdx, -72(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r14
+	movq	%rdx, %r15
+	movq	16(%rcx), %rdx
+	movq	%rdx, -48(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r11
+	movq	%rdx, %r13
+	movq	(%rcx), %r8
+	movq	8(%rcx), %rcx
+	movq	%rcx, -24(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rcx
+	movq	%rdx, %rbx
 	movq	%rax, %rbp
-	adcq	$0, %rbp
-	movq	%rsi, %rdx
-	movq	%rsi, %r14
-	imulq	128(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	560(%rsp), %rdi
-	movq	112(%rsp), %r13         # 8-byte Reload
-	movq	%r13, %rsi
-	callq	.LmulPv512x64
-	addq	560(%rsp), %r14
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	568(%rsp), %rcx
-	movq	104(%rsp), %rax         # 8-byte Reload
-	adcq	576(%rsp), %rax
-	movq	%rax, 104(%rsp)         # 8-byte Spill
-	movq	152(%rsp), %rax         # 8-byte Reload
-	adcq	584(%rsp), %rax
-	movq	%rax, 152(%rsp)         # 8-byte Spill
-	adcq	592(%rsp), %r12
-	movq	%r12, 88(%rsp)          # 8-byte Spill
-	movq	144(%rsp), %r14         # 8-byte Reload
-	adcq	600(%rsp), %r14
-	movq	184(%rsp), %rax         # 8-byte Reload
-	adcq	608(%rsp), %rax
-	movq	%rax, 184(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rax         # 8-byte Reload
-	adcq	616(%rsp), %rax
-	movq	%rax, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rax         # 8-byte Reload
-	adcq	624(%rsp), %rax
-	movq	%rax, 160(%rsp)         # 8-byte Spill
-	adcq	$0, %rbx
-	movq	%rbx, 176(%rsp)         # 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, 168(%rsp)         # 8-byte Spill
-	movq	120(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	movq	136(%rsp), %r15         # 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 96(%rsp)            # 8-byte Folded Spill
-	adcq	$0, %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	movq	%rcx, %rbp
-	movq	%rbp, %rdx
-	movq	128(%rsp), %r12         # 8-byte Reload
-	imulq	%r12, %rdx
-	leaq	488(%rsp), %rdi
-	movq	%r13, %rsi
-	callq	.LmulPv512x64
-	addq	488(%rsp), %rbp
-	movq	104(%rsp), %rax         # 8-byte Reload
-	adcq	496(%rsp), %rax
-	movq	152(%rsp), %rbp         # 8-byte Reload
-	adcq	504(%rsp), %rbp
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	512(%rsp), %rcx
-	movq	%rcx, 88(%rsp)          # 8-byte Spill
-	adcq	520(%rsp), %r14
-	movq	184(%rsp), %rcx         # 8-byte Reload
-	adcq	528(%rsp), %rcx
-	movq	%rcx, 184(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rcx         # 8-byte Reload
-	adcq	536(%rsp), %rcx
-	movq	%rcx, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %r13         # 8-byte Reload
-	adcq	544(%rsp), %r13
-	movq	176(%rsp), %rcx         # 8-byte Reload
-	adcq	552(%rsp), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	adcq	$0, 168(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 120(%rsp)         # 8-byte Spill
-	movq	%r15, %rbx
-	adcq	$0, %rbx
-	adcq	$0, 96(%rsp)            # 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            # 8-byte Folded Spill
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	%r12, %rdx
-	leaq	416(%rsp), %rdi
-	movq	112(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	416(%rsp), %r15
-	adcq	424(%rsp), %rbp
-	movq	%rbp, %rax
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 88(%rsp)          # 8-byte Spill
-	movq	%r14, %r12
-	adcq	440(%rsp), %r12
-	movq	184(%rsp), %r14         # 8-byte Reload
-	adcq	448(%rsp), %r14
-	movq	192(%rsp), %rbp         # 8-byte Reload
-	adcq	456(%rsp), %rbp
-	adcq	464(%rsp), %r13
-	movq	176(%rsp), %rcx         # 8-byte Reload
-	adcq	472(%rsp), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	movq	168(%rsp), %rcx         # 8-byte Reload
-	adcq	480(%rsp), %rcx
-	movq	%rcx, 168(%rsp)         # 8-byte Spill
-	adcq	$0, 120(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 136(%rsp)         # 8-byte Spill
-	movq	96(%rsp), %r15          # 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 80(%rsp)            # 8-byte Folded Spill
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	128(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	344(%rsp), %rdi
-	movq	112(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	344(%rsp), %rbx
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	352(%rsp), %rax
-	adcq	360(%rsp), %r12
-	movq	%r12, 144(%rsp)         # 8-byte Spill
-	adcq	368(%rsp), %r14
-	movq	%r14, 184(%rsp)         # 8-byte Spill
-	adcq	376(%rsp), %rbp
-	movq	%rbp, 192(%rsp)         # 8-byte Spill
-	adcq	384(%rsp), %r13
-	movq	%r13, 160(%rsp)         # 8-byte Spill
-	movq	176(%rsp), %r13         # 8-byte Reload
-	adcq	392(%rsp), %r13
-	movq	168(%rsp), %r12         # 8-byte Reload
-	adcq	400(%rsp), %r12
-	movq	120(%rsp), %r14         # 8-byte Reload
-	adcq	408(%rsp), %r14
-	movq	136(%rsp), %rbp         # 8-byte Reload
-	adcq	$0, %rbp
-	movq	%r15, %rbx
-	adcq	$0, %rbx
-	adcq	$0, 80(%rsp)            # 8-byte Folded Spill
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	128(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	272(%rsp), %rdi
-	movq	112(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	272(%rsp), %r15
-	movq	144(%rsp), %rcx         # 8-byte Reload
-	adcq	280(%rsp), %rcx
-	movq	184(%rsp), %rax         # 8-byte Reload
-	adcq	288(%rsp), %rax
-	movq	%rax, 184(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rax         # 8-byte Reload
-	adcq	296(%rsp), %rax
-	movq	%rax, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rax         # 8-byte Reload
-	adcq	304(%rsp), %rax
-	movq	%rax, 160(%rsp)         # 8-byte Spill
-	adcq	312(%rsp), %r13
-	movq	%r13, 176(%rsp)         # 8-byte Spill
-	adcq	320(%rsp), %r12
-	movq	%r12, 168(%rsp)         # 8-byte Spill
-	adcq	328(%rsp), %r14
-	movq	%r14, %r13
-	adcq	336(%rsp), %rbp
-	movq	%rbp, %r12
-	adcq	$0, %rbx
-	movq	%rbx, %r14
-	movq	80(%rsp), %r15          # 8-byte Reload
-	adcq	$0, %r15
-	movq	128(%rsp), %rdx         # 8-byte Reload
-	movq	%rcx, %rbx
-	imulq	%rbx, %rdx
-	leaq	200(%rsp), %rdi
-	movq	112(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv512x64
-	addq	200(%rsp), %rbx
-	movq	184(%rsp), %rax         # 8-byte Reload
-	adcq	208(%rsp), %rax
-	movq	%rax, 184(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %r8          # 8-byte Reload
-	adcq	216(%rsp), %r8
-	movq	%r8, 192(%rsp)          # 8-byte Spill
-	movq	160(%rsp), %rdx         # 8-byte Reload
-	adcq	224(%rsp), %rdx
-	movq	176(%rsp), %rsi         # 8-byte Reload
-	adcq	232(%rsp), %rsi
-	movq	168(%rsp), %rdi         # 8-byte Reload
-	adcq	240(%rsp), %rdi
-	movq	%r13, %rbp
-	adcq	248(%rsp), %rbp
-	movq	%r12, %rbx
-	adcq	256(%rsp), %rbx
-	movq	%rbx, 136(%rsp)         # 8-byte Spill
-	movq	%r14, %r9
-	adcq	264(%rsp), %r9
-	adcq	$0, %r15
-	movq	%r15, %r10
-	subq	16(%rsp), %rax          # 8-byte Folded Reload
-	movq	%r8, %rcx
-	sbbq	8(%rsp), %rcx           # 8-byte Folded Reload
+	movq	%rdi, %rax
+	mulq	%r8
+	movq	%r8, %rdi
+	movq	%r8, -16(%rsp)                  # 8-byte Spill
+	movq	%rdx, %rcx
+	addq	%rbp, %rcx
+	adcq	%r11, %rbx
+	adcq	%r14, %r13
+	adcq	%r10, %r15
+	adcq	-128(%rsp), %r12                # 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                # 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r9, %rax
+	movq	%rsi, -32(%rsp)                 # 8-byte Spill
+	adcq	8(%rsi), %rcx
+	adcq	16(%rsi), %rbx
+	adcq	24(%rsi), %r13
+	adcq	32(%rsi), %r15
+	adcq	40(%rsi), %r12
+	movq	%r12, -88(%rsp)                 # 8-byte Spill
+	adcq	48(%rsi), %rdx
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	setb	-96(%rsp)                       # 1-byte Folded Spill
+	movq	-80(%rsp), %rsi                 # 8-byte Reload
+	imulq	%rcx, %rsi
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r9
+	movq	%rsi, %rax
+	mulq	%rdi
+	movq	%rdx, %r10
+	movq	%rax, %r11
+	movq	%rsi, %rax
+	movq	-24(%rsp), %rsi                 # 8-byte Reload
+	mulq	%rsi
+	movq	%rdx, %rbp
+	movq	%rax, %rdi
+	addq	%r10, %rdi
+	adcq	%r9, %rbp
+	adcq	-56(%rsp), %r8                  # 8-byte Folded Reload
+	adcq	-112(%rsp), %r12                # 8-byte Folded Reload
+	adcq	-104(%rsp), %r14                # 8-byte Folded Reload
+	movzbl	-96(%rsp), %eax                 # 1-byte Folded Reload
+	movq	-128(%rsp), %rdx                # 8-byte Reload
+	adcq	%rax, %rdx
+	addq	%rcx, %r11
+	adcq	%rbx, %rdi
+	adcq	%r13, %rbp
+	adcq	%r15, %r8
+	adcq	-88(%rsp), %r12                 # 8-byte Folded Reload
+	adcq	-120(%rsp), %r14                # 8-byte Folded Reload
+	movq	-32(%rsp), %rax                 # 8-byte Reload
+	adcq	56(%rax), %rdx
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	setb	-120(%rsp)                      # 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	imulq	%rdi, %rcx
+	movq	%rcx, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
 	movq	%rdx, %r13
-	sbbq	24(%rsp), %r13          # 8-byte Folded Reload
-	movq	%rsi, %r12
-	sbbq	32(%rsp), %r12          # 8-byte Folded Reload
-	movq	%rdi, %r14
-	sbbq	40(%rsp), %r14          # 8-byte Folded Reload
-	movq	%rbp, %r11
-	sbbq	48(%rsp), %r11          # 8-byte Folded Reload
-	movq	%rbx, %r8
-	sbbq	56(%rsp), %r8           # 8-byte Folded Reload
-	movq	%r9, %r15
-	sbbq	64(%rsp), %r9           # 8-byte Folded Reload
-	sbbq	$0, %r10
-	andl	$1, %r10d
-	cmovneq	%r15, %r9
-	testb	%r10b, %r10b
-	cmovneq	184(%rsp), %rax         # 8-byte Folded Reload
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovneq	192(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 8(%rbx)
-	cmovneq	%rdx, %r13
-	movq	%r13, 16(%rbx)
-	cmovneq	%rsi, %r12
-	movq	%r12, 24(%rbx)
-	cmovneq	%rdi, %r14
-	movq	%r14, 32(%rbx)
-	cmovneq	%rbp, %r11
-	movq	%r11, 40(%rbx)
-	cmovneq	136(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	addq	$776, %rsp              # imm = 0x308
+	movq	%rax, %rbx
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	%rsi
+	movq	%rdx, %rcx
+	movq	%rax, %rsi
+	addq	%r10, %rsi
+	adcq	%rbx, %rcx
+	adcq	-112(%rsp), %r13                # 8-byte Folded Reload
+	adcq	-104(%rsp), %r15                # 8-byte Folded Reload
+	adcq	-96(%rsp), %r11                 # 8-byte Folded Reload
+	movzbl	-120(%rsp), %eax                # 1-byte Folded Reload
+	movq	-88(%rsp), %rdx                 # 8-byte Reload
+	adcq	%rax, %rdx
+	addq	%rdi, %r9
+	adcq	%rbp, %rsi
+	adcq	%r8, %rcx
+	adcq	%r12, %r13
+	adcq	%r14, %r15
+	adcq	-128(%rsp), %r11                # 8-byte Folded Reload
+	movq	-32(%rsp), %rax                 # 8-byte Reload
+	adcq	64(%rax), %rdx
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	setb	-128(%rsp)                      # 1-byte Folded Spill
+	movq	-80(%rsp), %rdi                 # 8-byte Reload
+	imulq	%rsi, %rdi
+	movq	%rdi, %rax
+	mulq	-40(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -96(%rsp)                 # 8-byte Spill
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%rdi, %rax
+	movq	-48(%rsp), %r14                 # 8-byte Reload
+	mulq	%r14
+	movq	%rdx, %rbp
+	movq	%rax, %r9
+	movq	%rdi, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r10
+	movq	%rdi, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, %rbx
+	addq	%r8, %rbx
+	adcq	%r9, %rdi
+	adcq	-56(%rsp), %rbp                 # 8-byte Folded Reload
+	adcq	-112(%rsp), %r12                # 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                # 8-byte Reload
+	adcq	-104(%rsp), %rdx                # 8-byte Folded Reload
+	movzbl	-128(%rsp), %eax                # 1-byte Folded Reload
+	movq	-96(%rsp), %r8                  # 8-byte Reload
+	adcq	%rax, %r8
+	addq	%rsi, %r10
+	adcq	%rcx, %rbx
+	adcq	%r13, %rdi
+	adcq	%r15, %rbp
+	adcq	%r11, %r12
+	adcq	-88(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                # 8-byte Spill
+	movq	-32(%rsp), %rax                 # 8-byte Reload
+	adcq	72(%rax), %r8
+	movq	%r8, -96(%rsp)                  # 8-byte Spill
+	setb	-104(%rsp)                      # 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	movq	-40(%rsp), %r9                  # 8-byte Reload
+	mulq	%r9
+	movq	%rdx, -88(%rsp)                 # 8-byte Spill
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r14
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %rsi
+	addq	%r10, %rsi
+	adcq	%r13, %r8
+	adcq	-8(%rsp), %r11                  # 8-byte Folded Reload
+	adcq	-56(%rsp), %r15                 # 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                # 8-byte Reload
+	adcq	-112(%rsp), %rdx                # 8-byte Folded Reload
+	movzbl	-104(%rsp), %eax                # 1-byte Folded Reload
+	movq	-88(%rsp), %rcx                 # 8-byte Reload
+	adcq	%rax, %rcx
+	addq	%rbx, %r14
+	adcq	%rdi, %rsi
+	adcq	%rbp, %r8
+	adcq	%r12, %r11
+	adcq	-120(%rsp), %r15                # 8-byte Folded Reload
+	adcq	-96(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                # 8-byte Spill
+	movq	-32(%rsp), %rax                 # 8-byte Reload
+	adcq	80(%rax), %rcx
+	movq	%rcx, -88(%rsp)                 # 8-byte Spill
+	setb	-120(%rsp)                      # 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 # 8-byte Reload
+	imulq	%rsi, %rcx
+	movq	%rcx, %rax
+	mulq	%r9
+	movq	%rdx, -80(%rsp)                 # 8-byte Spill
+	movq	%rax, -96(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, -104(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, -112(%rsp)                # 8-byte Spill
+	movq	%rcx, %rax
+	movq	-16(%rsp), %r13                 # 8-byte Reload
+	mulq	%r13
+	movq	%rdx, %r14
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	-48(%rsp)                       # 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r10
+	movq	%rcx, %rax
+	movq	-24(%rsp), %r12                 # 8-byte Reload
+	mulq	%r12
+	addq	%r14, %rax
+	adcq	%r10, %rdx
+	adcq	-112(%rsp), %rbx                # 8-byte Folded Reload
+	adcq	-104(%rsp), %rbp                # 8-byte Folded Reload
+	adcq	-96(%rsp), %rdi                 # 8-byte Folded Reload
+	movzbl	-120(%rsp), %r10d               # 1-byte Folded Reload
+	adcq	-80(%rsp), %r10                 # 8-byte Folded Reload
+	addq	%rsi, %r9
+	adcq	%r8, %rax
+	adcq	%r11, %rdx
+	adcq	%r15, %rbx
+	adcq	-128(%rsp), %rbp                # 8-byte Folded Reload
+	adcq	-88(%rsp), %rdi                 # 8-byte Folded Reload
+	movq	-32(%rsp), %rcx                 # 8-byte Reload
+	adcq	88(%rcx), %r10
+	movq	%rax, %r8
+	subq	%r13, %r8
+	movq	%rdx, %r9
+	sbbq	%r12, %r9
+	movq	%rbx, %r11
+	sbbq	-48(%rsp), %r11                 # 8-byte Folded Reload
+	movq	%rbp, %r14
+	sbbq	-72(%rsp), %r14                 # 8-byte Folded Reload
+	movq	%rdi, %r15
+	sbbq	-64(%rsp), %r15                 # 8-byte Folded Reload
+	movq	%r10, %rcx
+	sbbq	-40(%rsp), %rcx                 # 8-byte Folded Reload
+	movq	%rcx, %rsi
+	sarq	$63, %rsi
+	cmovsq	%r10, %rcx
+	movq	(%rsp), %rsi                    # 8-byte Reload
+	movq	%rcx, 40(%rsi)
+	cmovsq	%rdi, %r15
+	movq	%r15, 32(%rsi)
+	cmovsq	%rbp, %r14
+	movq	%r14, 24(%rsi)
+	cmovsq	%rbx, %r11
+	movq	%r11, 16(%rsi)
+	cmovsq	%rdx, %r9
+	movq	%r9, 8(%rsi)
+	cmovsq	%rax, %r8
+	movq	%r8, (%rsi)
+	addq	$8, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13021,547 +5302,374 @@ mcl_fp_montRed8L:                       # @mcl_fp_montRed8L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end116:
-	.size	mcl_fp_montRed8L, .Lfunc_end116-mcl_fp_montRed8L
-
-	.globl	mcl_fp_addPre8L
-	.align	16, 0x90
-	.type	mcl_fp_addPre8L,@function
-mcl_fp_addPre8L:                        # @mcl_fp_addPre8L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	56(%rsi), %r15
-	movq	48(%rdx), %r9
-	movq	48(%rsi), %r12
-	movq	40(%rdx), %r10
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %r14
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rsi
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r14, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r10, %r13
-	movq	%r13, 40(%rdi)
-	adcq	%r9, %r12
-	movq	%r12, 48(%rdi)
-	adcq	%r8, %r15
-	movq	%r15, 56(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
+.Lfunc_end46:
+	.size	mcl_fp_montRedNF6L, .Lfunc_end46-mcl_fp_montRedNF6L
+                                        # -- End function
+	.globl	mcl_fp_addPre6L                 # -- Begin function mcl_fp_addPre6L
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre6L,@function
+mcl_fp_addPre6L:                        # @mcl_fp_addPre6L
+# %bb.0:
+	movq	40(%rsi), %rax
+	movq	32(%rsi), %rcx
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %r9
+	movq	(%rsi), %r10
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r10
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r9
+	adcq	24(%rdx), %r8
+	adcq	32(%rdx), %rcx
+	adcq	40(%rdx), %rax
+	movq	%rax, 40(%rdi)
+	movq	%rcx, 32(%rdi)
+	movq	%r8, 24(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r10, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
 	retq
-.Lfunc_end117:
-	.size	mcl_fp_addPre8L, .Lfunc_end117-mcl_fp_addPre8L
-
-	.globl	mcl_fp_subPre8L
-	.align	16, 0x90
-	.type	mcl_fp_subPre8L,@function
-mcl_fp_subPre8L:                        # @mcl_fp_subPre8L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	56(%rsi), %r15
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %r12
+.Lfunc_end47:
+	.size	mcl_fp_addPre6L, .Lfunc_end47-mcl_fp_addPre6L
+                                        # -- End function
+	.globl	mcl_fp_subPre6L                 # -- Begin function mcl_fp_subPre6L
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre6L,@function
+mcl_fp_subPre6L:                        # @mcl_fp_subPre6L
+# %bb.0:
+	movq	40(%rsi), %rcx
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rsi
 	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %r12
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	48(%rsi), %r13
-	movq	40(%rsi), %rdx
-	movq	32(%rsi), %rbp
-	movq	24(%rsi), %rsi
-	movq	%rbx, (%rdi)
-	movq	%r12, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r11, %rsi
-	movq	%rsi, 24(%rdi)
-	sbbq	%r14, %rbp
-	movq	%rbp, 32(%rdi)
-	sbbq	%r10, %rdx
-	movq	%rdx, 40(%rdi)
-	sbbq	%r9, %r13
-	movq	%r13, 48(%rdi)
-	sbbq	%r8, %r15
-	movq	%r15, 56(%rdi)
-	sbbq	$0, %rax
+	subq	(%rdx), %r11
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %r10
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r8
+	sbbq	40(%rdx), %rcx
+	movq	%rcx, 40(%rdi)
+	movq	%r8, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
 	retq
-.Lfunc_end118:
-	.size	mcl_fp_subPre8L, .Lfunc_end118-mcl_fp_subPre8L
-
-	.globl	mcl_fp_shr1_8L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_8L,@function
-mcl_fp_shr1_8L:                         # @mcl_fp_shr1_8L
-# BB#0:
-	movq	56(%rsi), %r8
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %r10
-	movq	32(%rsi), %r11
+.Lfunc_end48:
+	.size	mcl_fp_subPre6L, .Lfunc_end48-mcl_fp_subPre6L
+                                        # -- End function
+	.globl	mcl_fp_shr1_6L                  # -- Begin function mcl_fp_shr1_6L
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_6L,@function
+mcl_fp_shr1_6L:                         # @mcl_fp_shr1_6L
+# %bb.0:
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %r10
 	movq	24(%rsi), %rcx
-	movq	16(%rsi), %rdx
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rax
-	movq	%rax, (%rdi)
-	shrdq	$1, %rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 16(%rdi)
-	shrdq	$1, %r11, %rcx
-	movq	%rcx, 24(%rdi)
-	shrdq	$1, %r10, %r11
-	movq	%r11, 32(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 40(%rdi)
+	movq	32(%rsi), %rax
+	movq	40(%rsi), %rdx
+	movq	%rdx, %rsi
+	shrq	%rsi
+	movq	%rsi, 40(%rdi)
+	shldq	$63, %rax, %rdx
+	movq	%rdx, 32(%rdi)
+	shldq	$63, %rcx, %rax
+	movq	%rax, 24(%rdi)
+	shldq	$63, %r10, %rcx
+	movq	%rcx, 16(%rdi)
+	shldq	$63, %r8, %r10
+	movq	%r10, 8(%rdi)
 	shrdq	$1, %r8, %r9
-	movq	%r9, 48(%rdi)
-	shrq	%r8
-	movq	%r8, 56(%rdi)
+	movq	%r9, (%rdi)
 	retq
-.Lfunc_end119:
-	.size	mcl_fp_shr1_8L, .Lfunc_end119-mcl_fp_shr1_8L
-
-	.globl	mcl_fp_add8L
-	.align	16, 0x90
-	.type	mcl_fp_add8L,@function
-mcl_fp_add8L:                           # @mcl_fp_add8L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r15
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r12
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %r11
-	movq	32(%rsi), %r10
-	movq	(%rdx), %r14
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %r14
-	adcq	8(%rsi), %rbx
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
-	adcq	24(%rdx), %r11
-	movq	40(%rdx), %rsi
-	adcq	32(%rdx), %r10
-	movq	%r14, (%rdi)
-	movq	%rbx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r10, 32(%rdi)
-	adcq	%r13, %rsi
-	movq	%rsi, 40(%rdi)
-	adcq	%r12, %r9
-	movq	%r9, 48(%rdi)
-	adcq	%r15, %r8
-	movq	%r8, 56(%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %r14
-	sbbq	8(%rcx), %rbx
-	sbbq	16(%rcx), %rax
-	sbbq	24(%rcx), %r11
-	sbbq	32(%rcx), %r10
-	sbbq	40(%rcx), %rsi
-	sbbq	48(%rcx), %r9
-	sbbq	56(%rcx), %r8
+.Lfunc_end49:
+	.size	mcl_fp_shr1_6L, .Lfunc_end49-mcl_fp_shr1_6L
+                                        # -- End function
+	.globl	mcl_fp_add6L                    # -- Begin function mcl_fp_add6L
+	.p2align	4, 0x90
+	.type	mcl_fp_add6L,@function
+mcl_fp_add6L:                           # @mcl_fp_add6L
+# %bb.0:
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %r11
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r11
+	adcq	24(%rdx), %r10
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r8
+	movq	%r8, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rax, (%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r11
+	sbbq	24(%rcx), %r10
+	sbbq	32(%rcx), %r9
+	sbbq	40(%rcx), %r8
 	sbbq	$0, %rdx
 	testb	$1, %dl
-	jne	.LBB120_2
-# BB#1:                                 # %nocarry
-	movq	%r14, (%rdi)
-	movq	%rbx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r10, 32(%rdi)
-	movq	%rsi, 40(%rdi)
-	movq	%r9, 48(%rdi)
-	movq	%r8, 56(%rdi)
-.LBB120_2:                              # %carry
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
+	jne	.LBB50_2
+# %bb.1:                                # %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r8, 40(%rdi)
+.LBB50_2:                               # %carry
 	retq
-.Lfunc_end120:
-	.size	mcl_fp_add8L, .Lfunc_end120-mcl_fp_add8L
-
-	.globl	mcl_fp_addNF8L
-	.align	16, 0x90
-	.type	mcl_fp_addNF8L,@function
-mcl_fp_addNF8L:                         # @mcl_fp_addNF8L
-# BB#0:
-	pushq	%rbp
+.Lfunc_end50:
+	.size	mcl_fp_add6L, .Lfunc_end50-mcl_fp_add6L
+                                        # -- End function
+	.globl	mcl_fp_addNF6L                  # -- Begin function mcl_fp_addNF6L
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF6L,@function
+mcl_fp_addNF6L:                         # @mcl_fp_addNF6L
+# %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	48(%rdx), %rbp
-	movq	40(%rdx), %rbx
-	movq	32(%rdx), %rax
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r15
-	movq	(%rdx), %r13
-	movq	8(%rdx), %r12
-	addq	(%rsi), %r13
-	adcq	8(%rsi), %r12
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %r11
-	adcq	32(%rsi), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	%rax, %r10
-	adcq	40(%rsi), %rbx
-	movq	%rbx, -16(%rsp)         # 8-byte Spill
-	movq	%rbx, %r9
-	adcq	48(%rsi), %rbp
-	movq	%rbp, -8(%rsp)          # 8-byte Spill
-	movq	%rbp, %rax
-	adcq	56(%rsi), %r8
-	movq	%r13, %rsi
-	subq	(%rcx), %rsi
-	movq	%r12, %rdx
-	sbbq	8(%rcx), %rdx
+	movq	40(%rdx), %r15
+	movq	32(%rdx), %r11
+	movq	24(%rdx), %r10
+	movq	16(%rdx), %r9
+	movq	(%rdx), %r8
+	movq	8(%rdx), %r14
+	addq	(%rsi), %r8
+	adcq	8(%rsi), %r14
+	adcq	16(%rsi), %r9
+	adcq	24(%rsi), %r10
+	adcq	32(%rsi), %r11
+	adcq	40(%rsi), %r15
+	movq	%r8, %r12
+	subq	(%rcx), %r12
+	movq	%r14, %r13
+	sbbq	8(%rcx), %r13
+	movq	%r9, %rdx
+	sbbq	16(%rcx), %rdx
+	movq	%r10, %rax
+	sbbq	24(%rcx), %rax
+	movq	%r11, %rsi
+	sbbq	32(%rcx), %rsi
 	movq	%r15, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r11, %r14
-	sbbq	24(%rcx), %r14
-	movq	%r10, %rbp
-	sbbq	32(%rcx), %rbp
-	movq	%r9, %r10
-	sbbq	40(%rcx), %r10
-	movq	%rax, %r9
-	sbbq	48(%rcx), %r9
-	movq	%r8, %rax
-	sbbq	56(%rcx), %rax
-	testq	%rax, %rax
-	cmovsq	%r13, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r12, %rdx
-	movq	%rdx, 8(%rdi)
+	sbbq	40(%rcx), %rbx
+	movq	%rbx, %rcx
+	sarq	$63, %rcx
 	cmovsq	%r15, %rbx
-	movq	%rbx, 16(%rdi)
-	cmovsq	%r11, %r14
-	movq	%r14, 24(%rdi)
-	cmovsq	-24(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 32(%rdi)
-	cmovsq	-16(%rsp), %r10         # 8-byte Folded Reload
-	movq	%r10, 40(%rdi)
-	cmovsq	-8(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 48(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 56(%rdi)
+	movq	%rbx, 40(%rdi)
+	cmovsq	%r11, %rsi
+	movq	%rsi, 32(%rdi)
+	cmovsq	%r10, %rax
+	movq	%rax, 24(%rdi)
+	cmovsq	%r9, %rdx
+	movq	%rdx, 16(%rdi)
+	cmovsq	%r14, %r13
+	movq	%r13, 8(%rdi)
+	cmovsq	%r8, %r12
+	movq	%r12, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-.Lfunc_end121:
-	.size	mcl_fp_addNF8L, .Lfunc_end121-mcl_fp_addNF8L
-
-	.globl	mcl_fp_sub8L
-	.align	16, 0x90
-	.type	mcl_fp_sub8L,@function
-mcl_fp_sub8L:                           # @mcl_fp_sub8L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
+.Lfunc_end51:
+	.size	mcl_fp_addNF6L, .Lfunc_end51-mcl_fp_addNF6L
+                                        # -- End function
+	.globl	mcl_fp_sub6L                    # -- Begin function mcl_fp_sub6L
+	.p2align	4, 0x90
+	.type	mcl_fp_sub6L,@function
+mcl_fp_sub6L:                           # @mcl_fp_sub6L
+# %bb.0:
 	pushq	%rbx
-	movq	56(%rdx), %r12
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r13
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r10
+	movq	40(%rsi), %r11
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %rax
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
 	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r10
-	movq	16(%rsi), %r11
-	sbbq	16(%rdx), %r11
-	movq	24(%rsi), %r15
-	sbbq	24(%rdx), %r15
-	movq	32(%rsi), %r14
-	sbbq	32(%rdx), %r14
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %rsi
-	sbbq	40(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r10, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	movq	%r15, 24(%rdi)
-	movq	%r14, 32(%rdi)
-	movq	%rsi, 40(%rdi)
-	sbbq	%r13, %r9
-	movq	%r9, 48(%rdi)
-	sbbq	%r12, %r8
-	movq	%r8, 56(%rdi)
-	sbbq	$0, %rbx
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rax
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r11
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rbx, %rbx
 	testb	$1, %bl
-	je	.LBB122_2
-# BB#1:                                 # %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	movq	8(%rcx), %rax
-	adcq	%r10, %rax
-	movq	%rax, 8(%rdi)
-	movq	16(%rcx), %rax
-	adcq	%r11, %rax
+	jne	.LBB52_2
+# %bb.1:                                # %nocarry
+	popq	%rbx
+	retq
+.LBB52_2:                               # %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %rax
+	adcq	24(%rcx), %r9
+	adcq	32(%rcx), %r10
+	adcq	40(%rcx), %r11
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
 	movq	%rax, 16(%rdi)
-	movq	24(%rcx), %rax
-	adcq	%r15, %rax
-	movq	%rax, 24(%rdi)
-	movq	32(%rcx), %rax
-	adcq	%r14, %rax
-	movq	%rax, 32(%rdi)
-	movq	40(%rcx), %rax
-	adcq	%rsi, %rax
-	movq	%rax, 40(%rdi)
-	movq	48(%rcx), %rax
-	adcq	%r9, %rax
-	movq	%rax, 48(%rdi)
-	movq	56(%rcx), %rax
-	adcq	%r8, %rax
-	movq	%rax, 56(%rdi)
-.LBB122_2:                              # %nocarry
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
 	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
 	retq
-.Lfunc_end122:
-	.size	mcl_fp_sub8L, .Lfunc_end122-mcl_fp_sub8L
-
-	.globl	mcl_fp_subNF8L
-	.align	16, 0x90
-	.type	mcl_fp_subNF8L,@function
-mcl_fp_subNF8L:                         # @mcl_fp_subNF8L
-# BB#0:
-	pushq	%rbp
+.Lfunc_end52:
+	.size	mcl_fp_sub6L, .Lfunc_end52-mcl_fp_sub6L
+                                        # -- End function
+	.globl	mcl_fp_subNF6L                  # -- Begin function mcl_fp_subNF6L
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF6L,@function
+mcl_fp_subNF6L:                         # @mcl_fp_subNF6L
+# %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	%rdi, %r9
-	movq	56(%rsi), %r14
-	movq	48(%rsi), %rax
-	movq	40(%rsi), %rcx
-	movq	32(%rsi), %rdi
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %r15
-	movq	(%rsi), %r13
-	movq	8(%rsi), %r12
-	subq	(%rdx), %r13
-	sbbq	8(%rdx), %r12
-	sbbq	16(%rdx), %r15
-	sbbq	24(%rdx), %r11
-	sbbq	32(%rdx), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	sbbq	40(%rdx), %rcx
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	sbbq	48(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	sbbq	56(%rdx), %r14
-	movq	%r14, %rsi
-	sarq	$63, %rsi
-	movq	56(%r8), %r10
-	andq	%rsi, %r10
-	movq	48(%r8), %rbx
-	andq	%rsi, %rbx
-	movq	40(%r8), %rdi
-	andq	%rsi, %rdi
-	movq	32(%r8), %rbp
-	andq	%rsi, %rbp
-	movq	24(%r8), %rdx
-	andq	%rsi, %rdx
-	movq	16(%r8), %rcx
-	andq	%rsi, %rcx
-	movq	8(%r8), %rax
-	andq	%rsi, %rax
-	andq	(%r8), %rsi
-	addq	%r13, %rsi
-	adcq	%r12, %rax
-	movq	%rsi, (%r9)
-	adcq	%r15, %rcx
-	movq	%rax, 8(%r9)
-	movq	%rcx, 16(%r9)
-	adcq	%r11, %rdx
-	movq	%rdx, 24(%r9)
-	adcq	-24(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 32(%r9)
-	adcq	-16(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, 40(%r9)
-	adcq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, 48(%r9)
-	adcq	%r14, %r10
-	movq	%r10, 56(%r9)
+	movq	40(%rsi), %r15
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r11
+	movq	8(%rsi), %r14
+	subq	(%rdx), %r11
+	sbbq	8(%rdx), %r14
+	sbbq	16(%rdx), %r10
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r8
+	sbbq	40(%rdx), %r15
+	movq	%r15, %rdx
+	sarq	$63, %rdx
+	movq	%rdx, %rbx
+	shldq	$1, %r15, %rbx
+	andq	(%rcx), %rbx
+	movq	40(%rcx), %r12
+	andq	%rdx, %r12
+	movq	32(%rcx), %r13
+	andq	%rdx, %r13
+	movq	24(%rcx), %rsi
+	andq	%rdx, %rsi
+	movq	16(%rcx), %rax
+	andq	%rdx, %rax
+	andq	8(%rcx), %rdx
+	addq	%r11, %rbx
+	movq	%rbx, (%rdi)
+	adcq	%r14, %rdx
+	movq	%rdx, 8(%rdi)
+	adcq	%r10, %rax
+	movq	%rax, 16(%rdi)
+	adcq	%r9, %rsi
+	movq	%rsi, 24(%rdi)
+	adcq	%r8, %r13
+	movq	%r13, 32(%rdi)
+	adcq	%r15, %r12
+	movq	%r12, 40(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-.Lfunc_end123:
-	.size	mcl_fp_subNF8L, .Lfunc_end123-mcl_fp_subNF8L
-
-	.globl	mcl_fpDbl_add8L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add8L,@function
-mcl_fpDbl_add8L:                        # @mcl_fpDbl_add8L
-# BB#0:
+.Lfunc_end53:
+	.size	mcl_fp_subNF6L, .Lfunc_end53-mcl_fp_subNF6L
+                                        # -- End function
+	.globl	mcl_fpDbl_add6L                 # -- Begin function mcl_fpDbl_add6L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add6L,@function
+mcl_fpDbl_add6L:                        # @mcl_fpDbl_add6L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	120(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	112(%rdx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	104(%rdx), %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	96(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r11
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %r15
-	adcq	32(%rdx), %r11
-	movq	88(%rdx), %rbp
-	movq	80(%rdx), %r13
-	movq	%rbx, (%rdi)
-	movq	72(%rdx), %r10
-	movq	%rax, 8(%rdi)
-	movq	64(%rdx), %r9
-	movq	%r12, 16(%rdi)
-	movq	40(%rdx), %r12
-	movq	%r15, 24(%rdi)
-	movq	40(%rsi), %rbx
-	adcq	%r12, %rbx
-	movq	56(%rdx), %r15
-	movq	48(%rdx), %r12
-	movq	%r11, 32(%rdi)
-	movq	48(%rsi), %rdx
-	adcq	%r12, %rdx
-	movq	120(%rsi), %r12
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rax
-	adcq	%r15, %rax
-	movq	112(%rsi), %rcx
-	movq	%rdx, 48(%rdi)
-	movq	64(%rsi), %rbx
-	adcq	%r9, %rbx
-	movq	104(%rsi), %rdx
-	movq	%rax, 56(%rdi)
-	movq	72(%rsi), %r9
-	adcq	%r10, %r9
-	movq	80(%rsi), %r11
-	adcq	%r13, %r11
-	movq	96(%rsi), %rax
 	movq	88(%rsi), %r15
-	adcq	%rbp, %r15
-	adcq	%r14, %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rdx, %rax
-	adcq	-32(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	adcq	-8(%rsp), %r12          # 8-byte Folded Reload
-	movq	%r12, -8(%rsp)          # 8-byte Spill
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	movq	%rbx, %rsi
-	subq	(%r8), %rsi
-	movq	%r9, %rdx
-	sbbq	8(%r8), %rdx
-	movq	%r11, %r10
-	sbbq	16(%r8), %r10
-	movq	%r15, %r14
-	sbbq	24(%r8), %r14
-	movq	-16(%rsp), %r13         # 8-byte Reload
-	sbbq	32(%r8), %r13
-	movq	%rax, %r12
-	sbbq	40(%r8), %r12
-	movq	%rcx, %rax
-	sbbq	48(%r8), %rax
-	movq	-8(%rsp), %rcx          # 8-byte Reload
-	sbbq	56(%r8), %rcx
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	cmovneq	%rbx, %rsi
-	movq	%rsi, 64(%rdi)
-	testb	%bpl, %bpl
-	cmovneq	%r9, %rdx
-	movq	%rdx, 72(%rdi)
-	cmovneq	%r11, %r10
-	movq	%r10, 80(%rdi)
-	cmovneq	%r15, %r14
-	movq	%r14, 88(%rdi)
-	cmovneq	-16(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r13, 96(%rdi)
-	cmovneq	-32(%rsp), %r12         # 8-byte Folded Reload
-	movq	%r12, 104(%rdi)
-	cmovneq	-24(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, 112(%rdi)
-	cmovneq	-8(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 120(%rdi)
+	movq	80(%rsi), %r14
+	movq	72(%rsi), %r11
+	movq	64(%rsi), %r10
+	movq	56(%rsi), %r9
+	movq	48(%rsi), %r8
+	movq	40(%rsi), %rax
+	movq	(%rsi), %r12
+	movq	8(%rsi), %r13
+	addq	(%rdx), %r12
+	adcq	8(%rdx), %r13
+	movq	32(%rsi), %rbx
+	movq	24(%rsi), %rbp
+	movq	16(%rsi), %rsi
+	adcq	16(%rdx), %rsi
+	adcq	24(%rdx), %rbp
+	adcq	32(%rdx), %rbx
+	adcq	40(%rdx), %rax
+	adcq	48(%rdx), %r8
+	adcq	56(%rdx), %r9
+	adcq	64(%rdx), %r10
+	adcq	72(%rdx), %r11
+	adcq	80(%rdx), %r14
+	adcq	88(%rdx), %r15
+	movq	%rax, 40(%rdi)
+	movq	%rbx, 32(%rdi)
+	movq	%rbp, 24(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	%r13, 8(%rdi)
+	movq	%r12, (%rdi)
+	setb	%al
+	movzbl	%al, %r12d
+	movq	%r8, %r13
+	subq	(%rcx), %r13
+	movq	%r9, %rsi
+	sbbq	8(%rcx), %rsi
+	movq	%r10, %rbx
+	sbbq	16(%rcx), %rbx
+	movq	%r11, %rbp
+	sbbq	24(%rcx), %rbp
+	movq	%r14, %rax
+	sbbq	32(%rcx), %rax
+	movq	%r15, %rdx
+	sbbq	40(%rcx), %rdx
+	sbbq	$0, %r12
+	testb	$1, %r12b
+	cmovneq	%r15, %rdx
+	movq	%rdx, 88(%rdi)
+	cmovneq	%r14, %rax
+	movq	%rax, 80(%rdi)
+	cmovneq	%r11, %rbp
+	movq	%rbp, 72(%rdi)
+	cmovneq	%r10, %rbx
+	movq	%rbx, 64(%rdi)
+	cmovneq	%r9, %rsi
+	movq	%rsi, 56(%rdi)
+	cmovneq	%r8, %r13
+	movq	%r13, 48(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13569,114 +5677,83 @@ mcl_fpDbl_add8L:                        # @mcl_fpDbl_add8L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end124:
-	.size	mcl_fpDbl_add8L, .Lfunc_end124-mcl_fpDbl_add8L
-
-	.globl	mcl_fpDbl_sub8L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub8L,@function
-mcl_fpDbl_sub8L:                        # @mcl_fpDbl_sub8L
-# BB#0:
+.Lfunc_end54:
+	.size	mcl_fpDbl_add6L, .Lfunc_end54-mcl_fpDbl_add6L
+                                        # -- End function
+	.globl	mcl_fpDbl_sub6L                 # -- Begin function mcl_fpDbl_sub6L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub6L,@function
+mcl_fpDbl_sub6L:                        # @mcl_fpDbl_sub6L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r15
-	movq	120(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	112(%rdx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	104(%rdx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %r9
-	movq	(%rsi), %r12
-	movq	8(%rsi), %r14
-	xorl	%r8d, %r8d
-	subq	(%rdx), %r12
-	sbbq	8(%rdx), %r14
-	sbbq	16(%rdx), %r9
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r13
-	sbbq	32(%rdx), %r13
-	movq	96(%rdx), %rbp
-	movq	88(%rdx), %r11
-	movq	%r12, (%rdi)
-	movq	80(%rdx), %r12
-	movq	%r14, 8(%rdi)
-	movq	72(%rdx), %r10
-	movq	%r9, 16(%rdi)
-	movq	40(%rdx), %r9
-	movq	%rbx, 24(%rdi)
+	movq	%rcx, %r10
+	movq	88(%rsi), %r15
+	movq	80(%rsi), %r14
+	movq	72(%rsi), %r11
+	movq	64(%rsi), %r9
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %rax
+	movq	%rax, -16(%rsp)                 # 8-byte Spill
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %r13
+	xorl	%eax, %eax
+	subq	(%rdx), %rcx
+	movq	%rcx, -8(%rsp)                  # 8-byte Spill
+	sbbq	8(%rdx), %r13
 	movq	40(%rsi), %rbx
-	sbbq	%r9, %rbx
-	movq	48(%rdx), %r9
-	movq	%r13, 32(%rdi)
-	movq	48(%rsi), %r14
-	sbbq	%r9, %r14
-	movq	64(%rdx), %r13
-	movq	56(%rdx), %r9
+	movq	32(%rsi), %rbp
+	movq	24(%rsi), %rcx
+	movq	16(%rsi), %rsi
+	sbbq	16(%rdx), %rsi
+	sbbq	24(%rdx), %rcx
+	sbbq	32(%rdx), %rbp
+	sbbq	40(%rdx), %rbx
+	movq	-16(%rsp), %r12                 # 8-byte Reload
+	sbbq	48(%rdx), %r12
+	movq	%r12, -16(%rsp)                 # 8-byte Spill
+	sbbq	56(%rdx), %r8
+	sbbq	64(%rdx), %r9
+	sbbq	72(%rdx), %r11
+	sbbq	80(%rdx), %r14
+	sbbq	88(%rdx), %r15
 	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rdx
-	sbbq	%r9, %rdx
-	movq	120(%rsi), %rcx
-	movq	%r14, 48(%rdi)
-	movq	64(%rsi), %rbx
-	sbbq	%r13, %rbx
-	movq	112(%rsi), %rax
-	movq	%rdx, 56(%rdi)
-	movq	72(%rsi), %r9
-	sbbq	%r10, %r9
-	movq	80(%rsi), %r13
-	sbbq	%r12, %r13
-	movq	88(%rsi), %r12
-	sbbq	%r11, %r12
-	movq	104(%rsi), %rdx
-	movq	96(%rsi), %r14
-	sbbq	%rbp, %r14
-	sbbq	-24(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, -24(%rsp)         # 8-byte Spill
-	sbbq	-16(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	sbbq	-8(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, -8(%rsp)          # 8-byte Spill
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%r15), %r11
-	cmoveq	%r8, %r11
-	testb	%bpl, %bpl
-	movq	16(%r15), %rbp
-	cmoveq	%r8, %rbp
-	movq	8(%r15), %rsi
-	cmoveq	%r8, %rsi
-	movq	56(%r15), %r10
-	cmoveq	%r8, %r10
-	movq	48(%r15), %rdx
-	cmoveq	%r8, %rdx
-	movq	40(%r15), %rcx
-	cmoveq	%r8, %rcx
-	movq	32(%r15), %rax
-	cmoveq	%r8, %rax
-	cmovneq	24(%r15), %r8
-	addq	%rbx, %r11
-	adcq	%r9, %rsi
-	movq	%r11, 64(%rdi)
-	adcq	%r13, %rbp
+	movq	%rbp, 32(%rdi)
+	movq	%rcx, 24(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	%r13, 8(%rdi)
+	movq	-8(%rsp), %rcx                  # 8-byte Reload
+	movq	%rcx, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	negq	%rax
+	movq	40(%r10), %rcx
+	andq	%rax, %rcx
+	movq	32(%r10), %rdx
+	andq	%rax, %rdx
+	movq	24(%r10), %rsi
+	andq	%rax, %rsi
+	movq	16(%r10), %rbx
+	andq	%rax, %rbx
+	movq	8(%r10), %rbp
+	andq	%rax, %rbp
+	andq	(%r10), %rax
+	addq	-16(%rsp), %rax                 # 8-byte Folded Reload
+	movq	%rax, 48(%rdi)
+	adcq	%r8, %rbp
+	movq	%rbp, 56(%rdi)
+	adcq	%r9, %rbx
+	movq	%rbx, 64(%rdi)
+	adcq	%r11, %rsi
 	movq	%rsi, 72(%rdi)
-	movq	%rbp, 80(%rdi)
-	adcq	%r12, %r8
-	movq	%r8, 88(%rdi)
-	adcq	%r14, %rax
-	movq	%rax, 96(%rdi)
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 104(%rdi)
-	adcq	-16(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, 112(%rdi)
-	adcq	-8(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 120(%rdi)
+	adcq	%r14, %rdx
+	movq	%rdx, 80(%rdi)
+	adcq	%r15, %rcx
+	movq	%rcx, 88(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13684,72 +5761,67 @@ mcl_fpDbl_sub8L:                        # @mcl_fpDbl_sub8L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end125:
-	.size	mcl_fpDbl_sub8L, .Lfunc_end125-mcl_fpDbl_sub8L
-
-	.align	16, 0x90
-	.type	.LmulPv576x64,@function
-.LmulPv576x64:                          # @mulPv576x64
-# BB#0:
+.Lfunc_end55:
+	.size	mcl_fpDbl_sub6L, .Lfunc_end55-mcl_fpDbl_sub6L
+                                        # -- End function
+	.globl	mulPv512x64                     # -- Begin function mulPv512x64
+	.p2align	4, 0x90
+	.type	mulPv512x64,@function
+mulPv512x64:                            # @mulPv512x64
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, %rbx
-	movq	%rbx, %rax
+	movq	%rdx, %rcx
+	movq	%rdx, %rax
 	mulq	(%rsi)
-	movq	%rdx, -32(%rsp)         # 8-byte Spill
+	movq	%rdx, -24(%rsp)                 # 8-byte Spill
 	movq	%rax, (%rdi)
-	movq	%rbx, %rax
-	mulq	64(%rsi)
-	movq	%rdx, %r10
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rcx, %rax
 	mulq	56(%rsi)
-	movq	%rdx, %r14
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rdx, %r10
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	movq	%rcx, %rax
 	mulq	48(%rsi)
-	movq	%rdx, %r12
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rdx, %r11
+	movq	%rax, -16(%rsp)                 # 8-byte Spill
+	movq	%rcx, %rax
 	mulq	40(%rsi)
-	movq	%rdx, %rcx
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rdx, %r12
+	movq	%rax, %r15
+	movq	%rcx, %rax
 	mulq	32(%rsi)
+	movq	%rdx, %rbx
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	24(%rsi)
 	movq	%rdx, %rbp
 	movq	%rax, %r8
-	movq	%rbx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, %r9
-	movq	%rax, %r11
-	movq	%rbx, %rax
+	movq	%rcx, %rax
 	mulq	16(%rsi)
-	movq	%rdx, %r15
-	movq	%rax, %r13
-	movq	%rbx, %rax
+	movq	%rdx, %r9
+	movq	%rax, %r14
+	movq	%rcx, %rax
 	mulq	8(%rsi)
-	addq	-32(%rsp), %rax         # 8-byte Folded Reload
+	addq	-24(%rsp), %rax                 # 8-byte Folded Reload
 	movq	%rax, 8(%rdi)
-	adcq	%r13, %rdx
+	adcq	%r14, %rdx
 	movq	%rdx, 16(%rdi)
-	adcq	%r11, %r15
-	movq	%r15, 24(%rdi)
 	adcq	%r8, %r9
-	movq	%r9, 32(%rdi)
-	adcq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 40(%rdi)
-	adcq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 48(%rdi)
-	adcq	-16(%rsp), %r12         # 8-byte Folded Reload
-	movq	%r12, 56(%rdi)
-	adcq	-8(%rsp), %r14          # 8-byte Folded Reload
-	movq	%r14, 64(%rdi)
+	movq	%r9, 24(%rdi)
+	adcq	%r13, %rbp
+	movq	%rbp, 32(%rdi)
+	adcq	%r15, %rbx
+	movq	%rbx, 40(%rdi)
+	adcq	-16(%rsp), %r12                 # 8-byte Folded Reload
+	movq	%r12, 48(%rdi)
+	adcq	-8(%rsp), %r11                  # 8-byte Folded Reload
+	movq	%r11, 56(%rdi)
 	adcq	$0, %r10
-	movq	%r10, 72(%rdi)
+	movq	%r10, 64(%rdi)
 	movq	%rdi, %rax
 	popq	%rbx
 	popq	%r12
@@ -13758,351 +5830,471 @@ mcl_fpDbl_sub8L:                        # @mcl_fpDbl_sub8L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end126:
-	.size	.LmulPv576x64, .Lfunc_end126-.LmulPv576x64
-
-	.globl	mcl_fp_mulUnitPre9L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre9L,@function
-mcl_fp_mulUnitPre9L:                    # @mcl_fp_mulUnitPre9L
-# BB#0:
-	pushq	%r14
-	pushq	%rbx
-	subq	$88, %rsp
-	movq	%rdi, %rbx
-	leaq	8(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	80(%rsp), %r8
-	movq	72(%rsp), %r9
-	movq	64(%rsp), %r10
-	movq	56(%rsp), %r11
-	movq	48(%rsp), %r14
-	movq	40(%rsp), %rax
-	movq	32(%rsp), %rcx
-	movq	24(%rsp), %rdx
-	movq	8(%rsp), %rsi
-	movq	16(%rsp), %rdi
-	movq	%rsi, (%rbx)
-	movq	%rdi, 8(%rbx)
-	movq	%rdx, 16(%rbx)
-	movq	%rcx, 24(%rbx)
-	movq	%rax, 32(%rbx)
-	movq	%r14, 40(%rbx)
-	movq	%r11, 48(%rbx)
-	movq	%r10, 56(%rbx)
-	movq	%r9, 64(%rbx)
-	movq	%r8, 72(%rbx)
-	addq	$88, %rsp
-	popq	%rbx
-	popq	%r14
-	retq
-.Lfunc_end127:
-	.size	mcl_fp_mulUnitPre9L, .Lfunc_end127-mcl_fp_mulUnitPre9L
-
-	.globl	mcl_fpDbl_mulPre9L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre9L,@function
-mcl_fpDbl_mulPre9L:                     # @mcl_fpDbl_mulPre9L
-# BB#0:
+.Lfunc_end56:
+	.size	mulPv512x64, .Lfunc_end56-mulPv512x64
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre8L             # -- Begin function mcl_fp_mulUnitPre8L
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre8L,@function
+mcl_fp_mulUnitPre8L:                    # @mcl_fp_mulUnitPre8L
+# %bb.0:
+	pushq	%rbx
+	subq	$80, %rsp
+	movq	%rdi, %rbx
+	leaq	8(%rsp), %rdi
+	callq	mulPv512x64@PLT
+	movq	8(%rsp), %r8
+	movq	16(%rsp), %r9
+	movq	24(%rsp), %r10
+	movq	32(%rsp), %r11
+	movq	40(%rsp), %rdi
+	movq	48(%rsp), %rax
+	movq	56(%rsp), %rcx
+	movq	64(%rsp), %rdx
+	movq	72(%rsp), %rsi
+	movq	%rsi, 64(%rbx)
+	movq	%rdx, 56(%rbx)
+	movq	%rcx, 48(%rbx)
+	movq	%rax, 40(%rbx)
+	movq	%rdi, 32(%rbx)
+	movq	%r11, 24(%rbx)
+	movq	%r10, 16(%rbx)
+	movq	%r9, 8(%rbx)
+	movq	%r8, (%rbx)
+	addq	$80, %rsp
+	popq	%rbx
+	retq
+.Lfunc_end57:
+	.size	mcl_fp_mulUnitPre8L, .Lfunc_end57-mcl_fp_mulUnitPre8L
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre8L              # -- Begin function mcl_fpDbl_mulPre8L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre8L,@function
+mcl_fpDbl_mulPre8L:                     # @mcl_fpDbl_mulPre8L
+# %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	subq	$648, %rsp                      # imm = 0x288
+	movq	%rdx, %rax
+	movq	%rdi, 32(%rsp)                  # 8-byte Spill
+	movq	(%rdx), %rdx
+	movq	%rax, %r12
+	movq	%rax, 40(%rsp)                  # 8-byte Spill
+	leaq	576(%rsp), %rdi
+	movq	%rsi, %r15
+	callq	mulPv512x64@PLT
+	movq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	movq	632(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	624(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	616(%rsp), %r13
+	movq	608(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	movq	600(%rsp), %rbp
+	movq	592(%rsp), %rbx
+	movq	576(%rsp), %rax
+	movq	584(%rsp), %r14
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, (%rcx)
+	movq	8(%r12), %rdx
+	leaq	504(%rsp), %rdi
+	movq	%r15, %rsi
+	movq	%r15, 56(%rsp)                  # 8-byte Spill
+	callq	mulPv512x64@PLT
+	movq	568(%rsp), %r12
+	addq	504(%rsp), %r14
+	adcq	512(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  # 8-byte Spill
+	adcq	520(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %rax                  # 8-byte Reload
+	adcq	528(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	adcq	536(%rsp), %r13
+	movq	16(%rsp), %rbp                  # 8-byte Reload
+	adcq	544(%rsp), %rbp
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	552(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rax                    # 8-byte Reload
+	adcq	560(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	movq	%r14, 8(%rax)
+	adcq	$0, %r12
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	432(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	mulPv512x64@PLT
+	movq	496(%rsp), %r15
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	addq	432(%rsp), %rcx
+	movq	64(%rsp), %rax                  # 8-byte Reload
+	adcq	440(%rsp), %rax
+	movq	%rax, 64(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %rbx                  # 8-byte Reload
+	adcq	448(%rsp), %rbx
+	adcq	456(%rsp), %r13
+	movq	%r13, 24(%rsp)                  # 8-byte Spill
+	adcq	464(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbp                   # 8-byte Reload
+	adcq	472(%rsp), %rbp
+	movq	(%rsp), %rax                    # 8-byte Reload
+	adcq	480(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	adcq	488(%rsp), %r12
+	movq	32(%rsp), %r14                  # 8-byte Reload
+	movq	%rcx, 16(%r14)
+	adcq	$0, %r15
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	360(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	424(%rsp), %r13
+	movq	64(%rsp), %rcx                  # 8-byte Reload
+	addq	360(%rsp), %rcx
+	adcq	368(%rsp), %rbx
+	movq	%rbx, 48(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	376(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	384(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rbx                    # 8-byte Reload
+	adcq	400(%rsp), %rbx
+	adcq	408(%rsp), %r12
+	adcq	416(%rsp), %r15
+	movq	%rcx, 24(%r14)
+	adcq	$0, %r13
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	288(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	352(%rsp), %r14
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	addq	288(%rsp), %rcx
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	296(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	304(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbp                   # 8-byte Reload
+	adcq	312(%rsp), %rbp
+	adcq	320(%rsp), %rbx
+	movq	%rbx, (%rsp)                    # 8-byte Spill
+	adcq	328(%rsp), %r12
+	adcq	336(%rsp), %r15
+	adcq	344(%rsp), %r13
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	movq	%rcx, 32(%rax)
+	adcq	$0, %r14
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	216(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	280(%rsp), %rbx
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	addq	216(%rsp), %rax
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	224(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	adcq	232(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	adcq	248(%rsp), %r12
+	adcq	256(%rsp), %r15
+	adcq	264(%rsp), %r13
+	adcq	272(%rsp), %r14
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, 40(%rcx)
+	adcq	$0, %rbx
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	144(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	208(%rsp), %rbp
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	addq	144(%rsp), %rax
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	152(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	160(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	adcq	168(%rsp), %r12
+	adcq	176(%rsp), %r15
+	adcq	184(%rsp), %r13
+	adcq	192(%rsp), %r14
+	adcq	200(%rsp), %rbx
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, 48(%rcx)
+	adcq	$0, %rbp
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	movq	56(%rax), %rdx
+	leaq	72(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	136(%rsp), %rax
+	movq	8(%rsp), %rsi                   # 8-byte Reload
+	addq	72(%rsp), %rsi
+	movq	(%rsp), %rdx                    # 8-byte Reload
+	adcq	80(%rsp), %rdx
+	adcq	88(%rsp), %r12
+	adcq	96(%rsp), %r15
+	adcq	104(%rsp), %r13
+	adcq	112(%rsp), %r14
+	adcq	120(%rsp), %rbx
+	adcq	128(%rsp), %rbp
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	movq	%rbp, 112(%rcx)
+	movq	%rbx, 104(%rcx)
+	movq	%r14, 96(%rcx)
+	movq	%r13, 88(%rcx)
+	movq	%r15, 80(%rcx)
+	movq	%r12, 72(%rcx)
+	movq	%rdx, 64(%rcx)
+	movq	%rsi, 56(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 120(%rcx)
+	addq	$648, %rsp                      # imm = 0x288
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+.Lfunc_end58:
+	.size	mcl_fpDbl_mulPre8L, .Lfunc_end58-mcl_fpDbl_mulPre8L
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre8L              # -- Begin function mcl_fpDbl_sqrPre8L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre8L,@function
+mcl_fpDbl_sqrPre8L:                     # @mcl_fpDbl_sqrPre8L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$808, %rsp              # imm = 0x328
-	movq	%rdx, %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	%rsi, 72(%rsp)          # 8-byte Spill
+	subq	$648, %rsp                      # imm = 0x288
+	movq	%rsi, %r15
 	movq	%rdi, %r12
-	movq	%r12, 80(%rsp)          # 8-byte Spill
-	movq	(%rax), %rdx
-	movq	%rax, %rbx
-	leaq	728(%rsp), %rdi
-	movq	%rsi, %rbp
-	callq	.LmulPv576x64
-	movq	800(%rsp), %r13
-	movq	792(%rsp), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	784(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	776(%rsp), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	768(%rsp), %rax
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	movq	760(%rsp), %rax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	movq	752(%rsp), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	movq	744(%rsp), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	728(%rsp), %rax
-	movq	736(%rsp), %r14
+	movq	%rdi, 56(%rsp)                  # 8-byte Spill
+	movq	(%rsi), %rdx
+	leaq	576(%rsp), %rdi
+	callq	mulPv512x64@PLT
+	movq	640(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	632(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	624(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	movq	616(%rsp), %rax
+	movq	%rax, 40(%rsp)                  # 8-byte Spill
+	movq	608(%rsp), %r13
+	movq	600(%rsp), %rbp
+	movq	592(%rsp), %rbx
+	movq	576(%rsp), %rax
+	movq	584(%rsp), %r14
 	movq	%rax, (%r12)
-	movq	8(%rbx), %rdx
-	leaq	648(%rsp), %rdi
-	movq	%rbp, %rsi
-	callq	.LmulPv576x64
-	movq	720(%rsp), %r8
-	movq	712(%rsp), %rcx
-	movq	704(%rsp), %rdx
-	movq	696(%rsp), %rsi
-	movq	688(%rsp), %rdi
-	movq	680(%rsp), %rbp
-	addq	648(%rsp), %r14
-	movq	672(%rsp), %rax
-	movq	656(%rsp), %rbx
-	movq	664(%rsp), %r15
-	movq	%r14, 8(%r12)
-	adcq	8(%rsp), %rbx           # 8-byte Folded Reload
-	adcq	16(%rsp), %r15          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, %r14
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 24(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 32(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 40(%rsp)          # 8-byte Spill
-	adcq	%r13, %rcx
-	movq	%rcx, 48(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	64(%rsp), %r13          # 8-byte Reload
-	movq	16(%r13), %rdx
-	leaq	568(%rsp), %rdi
-	movq	72(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	640(%rsp), %r8
-	movq	632(%rsp), %r9
-	movq	624(%rsp), %r10
-	movq	616(%rsp), %rdi
-	movq	608(%rsp), %rbp
-	movq	600(%rsp), %rcx
-	addq	568(%rsp), %rbx
-	movq	592(%rsp), %rdx
-	movq	576(%rsp), %r12
-	movq	584(%rsp), %rsi
-	movq	80(%rsp), %rax          # 8-byte Reload
-	movq	%rbx, 16(%rax)
-	adcq	%r15, %r12
-	adcq	%r14, %rsi
-	movq	%rsi, (%rsp)            # 8-byte Spill
-	adcq	16(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 8(%rsp)           # 8-byte Spill
-	adcq	24(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 48(%rsp)           # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	24(%r13), %rdx
-	leaq	488(%rsp), %rdi
-	movq	72(%rsp), %r15          # 8-byte Reload
+	movq	8(%r15), %rdx
+	leaq	504(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	.LmulPv576x64
-	movq	560(%rsp), %r8
-	movq	552(%rsp), %rcx
-	movq	544(%rsp), %rdx
-	movq	536(%rsp), %rsi
-	movq	528(%rsp), %rdi
-	movq	520(%rsp), %rbp
-	addq	488(%rsp), %r12
-	movq	512(%rsp), %rax
-	movq	496(%rsp), %rbx
-	movq	504(%rsp), %r13
-	movq	80(%rsp), %r14          # 8-byte Reload
-	movq	%r12, 24(%r14)
-	adcq	(%rsp), %rbx            # 8-byte Folded Reload
-	adcq	8(%rsp), %r13           # 8-byte Folded Reload
-	adcq	16(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	adcq	24(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 48(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	64(%rsp), %r12          # 8-byte Reload
-	movq	32(%r12), %rdx
-	leaq	408(%rsp), %rdi
+	callq	mulPv512x64@PLT
+	movq	568(%rsp), %rax
+	addq	504(%rsp), %r14
+	adcq	512(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	adcq	520(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  # 8-byte Spill
+	adcq	528(%rsp), %r13
+	movq	%r13, %rbx
+	movq	40(%rsp), %r13                  # 8-byte Reload
+	adcq	536(%rsp), %r13
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	adcq	552(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %r12                  # 8-byte Reload
+	adcq	560(%rsp), %r12
+	movq	56(%rsp), %rcx                  # 8-byte Reload
+	movq	%r14, 8(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	16(%r15), %rdx
+	leaq	432(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	.LmulPv576x64
-	movq	480(%rsp), %r8
-	movq	472(%rsp), %r9
-	movq	464(%rsp), %rdx
-	movq	456(%rsp), %rsi
-	movq	448(%rsp), %rdi
-	movq	440(%rsp), %rbp
-	addq	408(%rsp), %rbx
-	movq	432(%rsp), %rax
-	movq	416(%rsp), %r15
-	movq	424(%rsp), %rcx
-	movq	%rbx, 32(%r14)
-	adcq	%r13, %r15
-	adcq	8(%rsp), %rcx           # 8-byte Folded Reload
-	movq	%rcx, (%rsp)            # 8-byte Spill
-	adcq	16(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	adcq	24(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 48(%rsp)           # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	%r12, %r14
-	movq	40(%r14), %rdx
-	leaq	328(%rsp), %rdi
-	movq	72(%rsp), %r13          # 8-byte Reload
-	movq	%r13, %rsi
-	callq	.LmulPv576x64
-	movq	400(%rsp), %r8
-	movq	392(%rsp), %r9
-	movq	384(%rsp), %rsi
-	movq	376(%rsp), %rdi
-	movq	368(%rsp), %rbx
-	movq	360(%rsp), %rbp
-	addq	328(%rsp), %r15
-	movq	352(%rsp), %rcx
-	movq	336(%rsp), %r12
-	movq	344(%rsp), %rdx
-	movq	80(%rsp), %rax          # 8-byte Reload
-	movq	%r15, 40(%rax)
-	adcq	(%rsp), %r12            # 8-byte Folded Reload
-	adcq	8(%rsp), %rdx           # 8-byte Folded Reload
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	adcq	16(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 8(%rsp)           # 8-byte Spill
-	adcq	24(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 48(%rsp)           # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	48(%r14), %rdx
-	leaq	248(%rsp), %rdi
-	movq	%r13, %rsi
-	movq	%r13, %r15
-	callq	.LmulPv576x64
-	movq	320(%rsp), %r8
-	movq	312(%rsp), %r9
-	movq	304(%rsp), %rsi
-	movq	296(%rsp), %rdi
-	movq	288(%rsp), %rbx
+	callq	mulPv512x64@PLT
+	movq	496(%rsp), %rax
+	movq	8(%rsp), %rdx                   # 8-byte Reload
+	addq	432(%rsp), %rdx
+	movq	64(%rsp), %rcx                  # 8-byte Reload
+	adcq	440(%rsp), %rcx
+	movq	%rcx, 64(%rsp)                  # 8-byte Spill
+	adcq	448(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  # 8-byte Spill
+	adcq	456(%rsp), %r13
+	movq	48(%rsp), %rbx                  # 8-byte Reload
+	adcq	464(%rsp), %rbx
+	movq	32(%rsp), %rbp                  # 8-byte Reload
+	adcq	472(%rsp), %rbp
+	adcq	480(%rsp), %r12
+	movq	%r12, 24(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	488(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	movq	56(%rsp), %r12                  # 8-byte Reload
+	movq	%rdx, 16(%r12)
+	adcq	$0, %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	24(%r15), %rdx
+	leaq	360(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	mulPv512x64@PLT
+	movq	424(%rsp), %r14
+	movq	64(%rsp), %rax                  # 8-byte Reload
+	addq	360(%rsp), %rax
+	movq	40(%rsp), %rcx                  # 8-byte Reload
+	adcq	368(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	adcq	376(%rsp), %r13
+	adcq	384(%rsp), %rbx
+	movq	%rbx, 48(%rsp)                  # 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, %rbx
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	400(%rsp), %rbp
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	408(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	416(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	movq	%rax, 24(%r12)
+	adcq	$0, %r14
+	movq	32(%r15), %rdx
+	leaq	288(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	mulPv512x64@PLT
+	movq	352(%rsp), %r12
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	addq	288(%rsp), %rax
+	adcq	296(%rsp), %r13
+	movq	%r13, 40(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %r13                  # 8-byte Reload
+	adcq	304(%rsp), %r13
+	adcq	312(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  # 8-byte Spill
+	adcq	320(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rbx                  # 8-byte Reload
+	adcq	328(%rsp), %rbx
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	336(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	adcq	344(%rsp), %r14
+	movq	56(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, 32(%rcx)
+	adcq	$0, %r12
+	movq	40(%r15), %rdx
+	leaq	216(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	mulPv512x64@PLT
 	movq	280(%rsp), %rbp
-	addq	248(%rsp), %r12
-	movq	272(%rsp), %rcx
-	movq	256(%rsp), %r13
-	movq	264(%rsp), %rdx
-	movq	80(%rsp), %rax          # 8-byte Reload
-	movq	%r12, 48(%rax)
-	adcq	(%rsp), %r13            # 8-byte Folded Reload
-	adcq	8(%rsp), %rdx           # 8-byte Folded Reload
-	movq	%rdx, (%rsp)            # 8-byte Spill
-	adcq	16(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 8(%rsp)           # 8-byte Spill
-	adcq	24(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 48(%rsp)           # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	56(%r14), %rdx
-	leaq	168(%rsp), %rdi
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	addq	216(%rsp), %rax
+	adcq	224(%rsp), %r13
+	movq	%r13, 48(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	adcq	232(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	adcq	248(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	256(%rsp), %rbx
+	adcq	264(%rsp), %r14
+	adcq	272(%rsp), %r12
+	movq	56(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, 40(%rcx)
+	adcq	$0, %rbp
+	movq	48(%r15), %rdx
+	leaq	144(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	.LmulPv576x64
-	movq	240(%rsp), %rcx
-	movq	232(%rsp), %rdx
-	movq	224(%rsp), %rsi
-	movq	216(%rsp), %rdi
-	movq	208(%rsp), %rbx
-	addq	168(%rsp), %r13
-	movq	200(%rsp), %r12
-	movq	192(%rsp), %rbp
-	movq	176(%rsp), %r14
-	movq	184(%rsp), %r15
-	movq	80(%rsp), %rax          # 8-byte Reload
-	movq	%r13, 56(%rax)
-	adcq	(%rsp), %r14            # 8-byte Folded Reload
-	adcq	8(%rsp), %r15           # 8-byte Folded Reload
-	adcq	16(%rsp), %rbp          # 8-byte Folded Reload
-	adcq	24(%rsp), %r12          # 8-byte Folded Reload
-	adcq	32(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, %r13
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %rax          # 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	88(%rsp), %rdi
-	movq	72(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	88(%rsp), %r14
-	adcq	96(%rsp), %r15
-	movq	160(%rsp), %r8
-	adcq	104(%rsp), %rbp
-	movq	152(%rsp), %r9
-	movq	144(%rsp), %rdx
-	movq	136(%rsp), %rsi
-	movq	128(%rsp), %rdi
-	movq	120(%rsp), %rbx
-	movq	112(%rsp), %rax
-	movq	80(%rsp), %rcx          # 8-byte Reload
-	movq	%r14, 64(%rcx)
-	movq	%r15, 72(%rcx)
-	adcq	%r12, %rax
-	movq	%rbp, 80(%rcx)
-	movq	%rax, 88(%rcx)
-	adcq	%r13, %rbx
-	movq	%rbx, 96(%rcx)
-	adcq	32(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 104(%rcx)
-	adcq	40(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 112(%rcx)
-	adcq	48(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 120(%rcx)
-	adcq	56(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 128(%rcx)
-	adcq	$0, %r8
-	movq	%r8, 136(%rcx)
-	addq	$808, %rsp              # imm = 0x328
+	callq	mulPv512x64@PLT
+	movq	208(%rsp), %r13
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	addq	144(%rsp), %rcx
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	adcq	152(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	160(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	168(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	adcq	176(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	adcq	184(%rsp), %r14
+	adcq	192(%rsp), %r12
+	adcq	200(%rsp), %rbp
+	movq	56(%rsp), %rax                  # 8-byte Reload
+	movq	%rcx, 48(%rax)
+	adcq	$0, %r13
+	movq	56(%r15), %rdx
+	leaq	72(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	mulPv512x64@PLT
+	movq	136(%rsp), %rax
+	movq	32(%rsp), %rsi                  # 8-byte Reload
+	addq	72(%rsp), %rsi
+	movq	24(%rsp), %rdi                  # 8-byte Reload
+	adcq	80(%rsp), %rdi
+	movq	16(%rsp), %rbx                  # 8-byte Reload
+	adcq	88(%rsp), %rbx
+	movq	8(%rsp), %rdx                   # 8-byte Reload
+	adcq	96(%rsp), %rdx
+	adcq	104(%rsp), %r14
+	adcq	112(%rsp), %r12
+	adcq	120(%rsp), %rbp
+	adcq	128(%rsp), %r13
+	movq	56(%rsp), %rcx                  # 8-byte Reload
+	movq	%r13, 112(%rcx)
+	movq	%rbp, 104(%rcx)
+	movq	%r12, 96(%rcx)
+	movq	%r14, 88(%rcx)
+	movq	%rdx, 80(%rcx)
+	movq	%rbx, 72(%rcx)
+	movq	%rdi, 64(%rcx)
+	movq	%rsi, 56(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 120(%rcx)
+	addq	$648, %rsp                      # imm = 0x288
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -14110,298 +6302,444 @@ mcl_fpDbl_mulPre9L:                     # @mcl_fpDbl_mulPre9L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end128:
-	.size	mcl_fpDbl_mulPre9L, .Lfunc_end128-mcl_fpDbl_mulPre9L
-
-	.globl	mcl_fpDbl_sqrPre9L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre9L,@function
-mcl_fpDbl_sqrPre9L:                     # @mcl_fpDbl_sqrPre9L
-# BB#0:
+.Lfunc_end59:
+	.size	mcl_fpDbl_sqrPre8L, .Lfunc_end59-mcl_fpDbl_sqrPre8L
+                                        # -- End function
+	.globl	mcl_fp_mont8L                   # -- Begin function mcl_fp_mont8L
+	.p2align	4, 0x90
+	.type	mcl_fp_mont8L,@function
+mcl_fp_mont8L:                          # @mcl_fp_mont8L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$808, %rsp              # imm = 0x328
-	movq	%rsi, %r15
-	movq	%r15, 80(%rsp)          # 8-byte Spill
-	movq	%rdi, %r14
-	movq	%r14, 72(%rsp)          # 8-byte Spill
-	movq	(%r15), %rdx
-	leaq	728(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	800(%rsp), %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	792(%rsp), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	784(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	776(%rsp), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	768(%rsp), %rax
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	movq	760(%rsp), %rax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	movq	752(%rsp), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	movq	744(%rsp), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	728(%rsp), %rax
-	movq	736(%rsp), %r12
-	movq	%rax, (%r14)
-	movq	8(%r15), %rdx
-	leaq	648(%rsp), %rdi
-	movq	%r15, %rsi
-	callq	.LmulPv576x64
-	movq	720(%rsp), %r8
-	movq	712(%rsp), %rcx
-	movq	704(%rsp), %rdx
-	movq	696(%rsp), %rsi
-	movq	688(%rsp), %rdi
-	movq	680(%rsp), %rbp
-	addq	648(%rsp), %r12
-	movq	672(%rsp), %rax
-	movq	656(%rsp), %rbx
-	movq	664(%rsp), %r13
-	movq	%r12, 8(%r14)
-	adcq	8(%rsp), %rbx           # 8-byte Folded Reload
-	adcq	16(%rsp), %r13          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	16(%r15), %rdx
-	leaq	568(%rsp), %rdi
-	movq	%r15, %rsi
-	callq	.LmulPv576x64
-	movq	640(%rsp), %r8
-	movq	632(%rsp), %rcx
-	movq	624(%rsp), %rdx
-	movq	616(%rsp), %rsi
-	movq	608(%rsp), %rdi
-	movq	600(%rsp), %rbp
-	addq	568(%rsp), %rbx
-	movq	592(%rsp), %rax
-	movq	576(%rsp), %r14
-	movq	584(%rsp), %r12
-	movq	72(%rsp), %r15          # 8-byte Reload
-	movq	%rbx, 16(%r15)
-	adcq	%r13, %r14
-	adcq	16(%rsp), %r12          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	24(%rsi), %rdx
-	leaq	488(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	560(%rsp), %r8
-	movq	552(%rsp), %rcx
-	movq	544(%rsp), %rdx
-	movq	536(%rsp), %rsi
-	movq	528(%rsp), %rdi
-	movq	520(%rsp), %rbp
-	addq	488(%rsp), %r14
-	movq	512(%rsp), %rax
-	movq	496(%rsp), %rbx
-	movq	504(%rsp), %r13
-	movq	%r14, 24(%r15)
-	adcq	%r12, %rbx
-	adcq	16(%rsp), %r13          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	32(%rsi), %rdx
-	leaq	408(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	480(%rsp), %r8
-	movq	472(%rsp), %rcx
-	movq	464(%rsp), %rdx
-	movq	456(%rsp), %rsi
-	movq	448(%rsp), %rdi
-	movq	440(%rsp), %rbp
-	addq	408(%rsp), %rbx
-	movq	432(%rsp), %rax
-	movq	416(%rsp), %r14
-	movq	424(%rsp), %r12
-	movq	%rbx, 32(%r15)
-	adcq	%r13, %r14
-	adcq	16(%rsp), %r12          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	40(%rsi), %rdx
-	leaq	328(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	400(%rsp), %r8
-	movq	392(%rsp), %rcx
-	movq	384(%rsp), %rdx
-	movq	376(%rsp), %rsi
-	movq	368(%rsp), %rdi
-	movq	360(%rsp), %rbp
-	addq	328(%rsp), %r14
-	movq	352(%rsp), %rax
-	movq	336(%rsp), %rbx
-	movq	344(%rsp), %r13
-	movq	%r14, 40(%r15)
-	adcq	%r12, %rbx
-	adcq	16(%rsp), %r13          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	48(%rsi), %rdx
+	subq	$1256, %rsp                     # imm = 0x4E8
+	movq	%rcx, %r13
+	movq	%rdx, 80(%rsp)                  # 8-byte Spill
+	movq	%rsi, 88(%rsp)                  # 8-byte Spill
+	movq	%rdi, 96(%rsp)                  # 8-byte Spill
+	movq	-8(%rcx), %rbx
+	movq	%rbx, 72(%rsp)                  # 8-byte Spill
+	movq	%rcx, 56(%rsp)                  # 8-byte Spill
+	movq	(%rdx), %rdx
+	leaq	1184(%rsp), %rdi
+	callq	mulPv512x64@PLT
+	movq	1184(%rsp), %r15
+	movq	1192(%rsp), %r12
+	movq	%rbx, %rdx
+	imulq	%r15, %rdx
+	movq	1248(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	1240(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	1232(%rsp), %r14
+	movq	1224(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	movq	1216(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	1208(%rsp), %rbx
+	movq	1200(%rsp), %rbp
+	leaq	1112(%rsp), %rdi
+	movq	%r13, %rsi
+	callq	mulPv512x64@PLT
+	addq	1112(%rsp), %r15
+	adcq	1120(%rsp), %r12
+	adcq	1128(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  # 8-byte Spill
+	adcq	1136(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	1144(%rsp), %rbp
+	movq	(%rsp), %r15                    # 8-byte Reload
+	adcq	1152(%rsp), %r15
+	adcq	1160(%rsp), %r14
+	movq	%r14, 48(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %r13                  # 8-byte Reload
+	adcq	1168(%rsp), %r13
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	1176(%rsp), %rbx
+	setb	%r14b
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	8(%rax), %rdx
+	leaq	1040(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movzbl	%r14b, %ecx
+	addq	1040(%rsp), %r12
+	movq	64(%rsp), %r14                  # 8-byte Reload
+	adcq	1048(%rsp), %r14
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	adcq	1056(%rsp), %rax
+	movq	%rax, 40(%rsp)                  # 8-byte Spill
+	adcq	1064(%rsp), %rbp
+	adcq	1072(%rsp), %r15
+	movq	%r15, (%rsp)                    # 8-byte Spill
+	movq	48(%rsp), %rax                  # 8-byte Reload
+	adcq	1080(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	adcq	1088(%rsp), %r13
+	movq	%r13, 16(%rsp)                  # 8-byte Spill
+	adcq	1096(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	adcq	1104(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	setb	%r15b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r12, %rdx
+	leaq	968(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movzbl	%r15b, %r15d
+	addq	968(%rsp), %r12
+	adcq	976(%rsp), %r14
+	movq	%r14, 64(%rsp)                  # 8-byte Spill
+	movq	40(%rsp), %r13                  # 8-byte Reload
+	adcq	984(%rsp), %r13
+	adcq	992(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	(%rsp), %r12                    # 8-byte Reload
+	adcq	1000(%rsp), %r12
+	movq	48(%rsp), %r14                  # 8-byte Reload
+	adcq	1008(%rsp), %r14
+	movq	16(%rsp), %rbx                  # 8-byte Reload
+	adcq	1016(%rsp), %rbx
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	1024(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	32(%rsp), %rbp                  # 8-byte Reload
+	adcq	1032(%rsp), %rbp
+	adcq	$0, %r15
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	896(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	64(%rsp), %rax                  # 8-byte Reload
+	addq	896(%rsp), %rax
+	adcq	904(%rsp), %r13
+	movq	%r13, 40(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %r13                  # 8-byte Reload
+	adcq	912(%rsp), %r13
+	adcq	920(%rsp), %r12
+	adcq	928(%rsp), %r14
+	movq	%r14, 48(%rsp)                  # 8-byte Spill
+	adcq	936(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	944(%rsp), %rbx
+	adcq	952(%rsp), %rbp
+	movq	%rbp, 32(%rsp)                  # 8-byte Spill
+	adcq	960(%rsp), %r15
+	setb	%r14b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	824(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movzbl	%r14b, %eax
+	addq	824(%rsp), %rbp
+	movq	40(%rsp), %r14                  # 8-byte Reload
+	adcq	832(%rsp), %r14
+	adcq	840(%rsp), %r13
+	movq	%r13, 24(%rsp)                  # 8-byte Spill
+	adcq	848(%rsp), %r12
+	movq	%r12, (%rsp)                    # 8-byte Spill
+	movq	48(%rsp), %r12                  # 8-byte Reload
+	adcq	856(%rsp), %r12
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	864(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	movq	%rbx, %rbp
+	adcq	872(%rsp), %rbp
+	movq	32(%rsp), %r13                  # 8-byte Reload
+	adcq	880(%rsp), %r13
+	adcq	888(%rsp), %r15
+	movq	%rax, %rbx
+	adcq	$0, %rbx
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	752(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	%r14, %rax
+	addq	752(%rsp), %rax
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	760(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	movq	(%rsp), %r14                    # 8-byte Reload
+	adcq	768(%rsp), %r14
+	adcq	776(%rsp), %r12
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	784(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	adcq	792(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	adcq	800(%rsp), %r13
+	movq	%r13, 32(%rsp)                  # 8-byte Spill
+	adcq	808(%rsp), %r15
+	movq	%r15, %r13
+	adcq	816(%rsp), %rbx
+	setb	%r15b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	680(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movzbl	%r15b, %eax
+	addq	680(%rsp), %rbp
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	688(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	adcq	696(%rsp), %r14
+	movq	%r14, (%rsp)                    # 8-byte Spill
+	adcq	704(%rsp), %r12
+	movq	16(%rsp), %rbp                  # 8-byte Reload
+	adcq	712(%rsp), %rbp
+	movq	8(%rsp), %r14                   # 8-byte Reload
+	adcq	720(%rsp), %r14
+	movq	32(%rsp), %r15                  # 8-byte Reload
+	adcq	728(%rsp), %r15
+	adcq	736(%rsp), %r13
+	movq	%r13, 40(%rsp)                  # 8-byte Spill
+	adcq	744(%rsp), %rbx
+	adcq	$0, %rax
+	movq	%rax, %r13
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	608(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	addq	608(%rsp), %rax
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	616(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	adcq	624(%rsp), %r12
+	adcq	632(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  # 8-byte Spill
+	adcq	640(%rsp), %r14
+	movq	%r14, 8(%rsp)                   # 8-byte Spill
+	adcq	648(%rsp), %r15
+	movq	%r15, 32(%rsp)                  # 8-byte Spill
+	movq	40(%rsp), %rbp                  # 8-byte Reload
+	adcq	656(%rsp), %rbp
+	adcq	664(%rsp), %rbx
+	movq	%rbx, %r15
+	adcq	672(%rsp), %r13
+	setb	%r14b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	536(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movzbl	%r14b, %eax
+	addq	536(%rsp), %rbx
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	adcq	552(%rsp), %r12
+	movq	%r12, 48(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %r12                  # 8-byte Reload
+	adcq	560(%rsp), %r12
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	568(%rsp), %rbx
+	movq	32(%rsp), %r14                  # 8-byte Reload
+	adcq	576(%rsp), %r14
+	adcq	584(%rsp), %rbp
+	adcq	592(%rsp), %r15
+	movq	%r15, 64(%rsp)                  # 8-byte Spill
+	adcq	600(%rsp), %r13
+	movq	%r13, 24(%rsp)                  # 8-byte Spill
+	adcq	$0, %rax
+	movq	%rax, %r13
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	464(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	(%rsp), %rax                    # 8-byte Reload
+	addq	464(%rsp), %rax
+	movq	48(%rsp), %r15                  # 8-byte Reload
+	adcq	472(%rsp), %r15
+	adcq	480(%rsp), %r12
+	movq	%r12, 16(%rsp)                  # 8-byte Spill
+	adcq	488(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	adcq	496(%rsp), %r14
+	movq	%r14, %r12
+	adcq	504(%rsp), %rbp
+	movq	64(%rsp), %rcx                  # 8-byte Reload
+	adcq	512(%rsp), %rcx
+	movq	%rcx, 64(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	520(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	adcq	528(%rsp), %r13
+	movq	%r13, (%rsp)                    # 8-byte Spill
+	setb	%r14b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	392(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movzbl	%r14b, %eax
+	addq	392(%rsp), %rbx
+	adcq	400(%rsp), %r15
+	movq	%r15, 48(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rbx                  # 8-byte Reload
+	adcq	408(%rsp), %rbx
+	movq	8(%rsp), %r14                   # 8-byte Reload
+	adcq	416(%rsp), %r14
+	adcq	424(%rsp), %r12
+	movq	%r12, 32(%rsp)                  # 8-byte Spill
+	adcq	432(%rsp), %rbp
+	movq	%rbp, 40(%rsp)                  # 8-byte Spill
+	movq	64(%rsp), %rbp                  # 8-byte Reload
+	adcq	440(%rsp), %rbp
+	movq	24(%rsp), %r13                  # 8-byte Reload
+	adcq	448(%rsp), %r13
+	movq	(%rsp), %r12                    # 8-byte Reload
+	adcq	456(%rsp), %r12
+	movq	%rax, %r15
+	adcq	$0, %r15
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	320(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
 	leaq	248(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	320(%rsp), %r8
-	movq	312(%rsp), %rcx
-	movq	304(%rsp), %rdx
-	movq	296(%rsp), %rsi
-	movq	288(%rsp), %rdi
-	movq	280(%rsp), %rbp
-	addq	248(%rsp), %rbx
-	movq	272(%rsp), %rax
-	movq	256(%rsp), %r12
-	movq	264(%rsp), %r14
-	movq	%rbx, 48(%r15)
-	adcq	%r13, %r12
-	adcq	16(%rsp), %r14          # 8-byte Folded Reload
-	adcq	24(%rsp), %rax          # 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	adcq	32(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          # 8-byte Spill
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          # 8-byte Spill
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	56(%rsi), %rdx
-	leaq	168(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	240(%rsp), %r8
-	movq	232(%rsp), %rdx
-	movq	224(%rsp), %rsi
-	movq	216(%rsp), %rdi
-	movq	208(%rsp), %rbx
-	movq	200(%rsp), %rcx
-	addq	168(%rsp), %r12
-	movq	192(%rsp), %r15
-	movq	176(%rsp), %r13
-	movq	184(%rsp), %rbp
-	movq	72(%rsp), %rax          # 8-byte Reload
-	movq	%r12, 56(%rax)
-	adcq	%r14, %r13
-	adcq	16(%rsp), %rbp          # 8-byte Folded Reload
-	adcq	24(%rsp), %r15          # 8-byte Folded Reload
-	adcq	32(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, %r12
-	adcq	40(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, %r14
-	adcq	48(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 40(%rsp)          # 8-byte Spill
-	adcq	56(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 48(%rsp)          # 8-byte Spill
-	adcq	64(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 64(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %rsi          # 8-byte Reload
-	movq	64(%rsi), %rdx
-	leaq	88(%rsp), %rdi
-	callq	.LmulPv576x64
-	addq	88(%rsp), %r13
-	adcq	96(%rsp), %rbp
-	movq	160(%rsp), %r8
-	adcq	104(%rsp), %r15
-	movq	152(%rsp), %r9
-	movq	144(%rsp), %rdx
-	movq	136(%rsp), %rsi
-	movq	128(%rsp), %rdi
-	movq	120(%rsp), %rbx
-	movq	112(%rsp), %rax
-	movq	72(%rsp), %rcx          # 8-byte Reload
-	movq	%r13, 64(%rcx)
-	movq	%rbp, 72(%rcx)
-	adcq	%r12, %rax
-	movq	%r15, 80(%rcx)
-	movq	%rax, 88(%rcx)
-	adcq	%r14, %rbx
-	movq	%rbx, 96(%rcx)
-	adcq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 104(%rcx)
-	adcq	48(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rsi, 112(%rcx)
-	adcq	56(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 120(%rcx)
-	adcq	64(%rsp), %r9           # 8-byte Folded Reload
-	movq	%r9, 128(%rcx)
-	adcq	$0, %r8
-	movq	%r8, 136(%rcx)
-	addq	$808, %rsp              # imm = 0x328
+	movq	48(%rsp), %rax                  # 8-byte Reload
+	addq	320(%rsp), %rax
+	adcq	328(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  # 8-byte Spill
+	adcq	336(%rsp), %r14
+	movq	%r14, 8(%rsp)                   # 8-byte Spill
+	movq	32(%rsp), %rbx                  # 8-byte Reload
+	adcq	344(%rsp), %rbx
+	movq	40(%rsp), %rcx                  # 8-byte Reload
+	adcq	352(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	adcq	360(%rsp), %rbp
+	adcq	368(%rsp), %r13
+	adcq	376(%rsp), %r12
+	movq	%r12, (%rsp)                    # 8-byte Spill
+	adcq	384(%rsp), %r15
+	movq	%r15, 48(%rsp)                  # 8-byte Spill
+	setb	%r12b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %r14
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movzbl	%r12b, %r12d
+	addq	248(%rsp), %r14
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	256(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %r15                   # 8-byte Reload
+	adcq	264(%rsp), %r15
+	adcq	272(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  # 8-byte Spill
+	movq	40(%rsp), %rbx                  # 8-byte Reload
+	adcq	280(%rsp), %rbx
+	adcq	288(%rsp), %rbp
+	adcq	296(%rsp), %r13
+	movq	(%rsp), %r14                    # 8-byte Reload
+	adcq	304(%rsp), %r14
+	movq	48(%rsp), %rax                  # 8-byte Reload
+	adcq	312(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	adcq	$0, %r12
+	movq	80(%rsp), %rax                  # 8-byte Reload
+	movq	56(%rax), %rdx
+	leaq	176(%rsp), %rdi
+	movq	88(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	addq	176(%rsp), %rax
+	adcq	184(%rsp), %r15
+	movq	%r15, 8(%rsp)                   # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	adcq	192(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	adcq	200(%rsp), %rbx
+	adcq	208(%rsp), %rbp
+	adcq	216(%rsp), %r13
+	movq	%r13, 24(%rsp)                  # 8-byte Spill
+	adcq	224(%rsp), %r14
+	movq	%r14, (%rsp)                    # 8-byte Spill
+	movq	48(%rsp), %r15                  # 8-byte Reload
+	adcq	232(%rsp), %r15
+	adcq	240(%rsp), %r12
+	setb	%r14b
+	movq	72(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %r13
+	leaq	104(%rsp), %rdi
+	movq	56(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movzbl	%r14b, %r9d
+	addq	104(%rsp), %r13
+	movq	8(%rsp), %r11                   # 8-byte Reload
+	adcq	112(%rsp), %r11
+	movq	%r11, 8(%rsp)                   # 8-byte Spill
+	movq	32(%rsp), %r10                  # 8-byte Reload
+	adcq	120(%rsp), %r10
+	movq	%r10, 32(%rsp)                  # 8-byte Spill
+	movq	%rbx, %r8
+	adcq	128(%rsp), %r8
+	movq	%r8, 40(%rsp)                   # 8-byte Spill
+	movq	%rbp, %r13
+	adcq	136(%rsp), %r13
+	movq	24(%rsp), %r14                  # 8-byte Reload
+	adcq	144(%rsp), %r14
+	movq	(%rsp), %rsi                    # 8-byte Reload
+	adcq	152(%rsp), %rsi
+	adcq	160(%rsp), %r15
+	adcq	168(%rsp), %r12
+	adcq	$0, %r9
+	movq	56(%rsp), %rcx                  # 8-byte Reload
+	subq	(%rcx), %r11
+	sbbq	8(%rcx), %r10
+	sbbq	16(%rcx), %r8
+	movq	%r13, %rdi
+	sbbq	24(%rcx), %rdi
+	movq	%r14, %rbx
+	sbbq	32(%rcx), %rbx
+	movq	%rsi, %rbp
+	sbbq	40(%rcx), %rbp
+	movq	%r15, %rax
+	sbbq	48(%rcx), %rax
+	movq	%rcx, %rdx
+	movq	%r12, %rcx
+	sbbq	56(%rdx), %rcx
+	sbbq	$0, %r9
+	testb	$1, %r9b
+	cmovneq	%r12, %rcx
+	movq	96(%rsp), %rdx                  # 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovneq	%r15, %rax
+	movq	%rax, 48(%rdx)
+	cmovneq	%rsi, %rbp
+	movq	%rbp, 40(%rdx)
+	cmovneq	%r14, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovneq	%r13, %rdi
+	movq	%rdi, 24(%rdx)
+	cmovneq	40(%rsp), %r8                   # 8-byte Folded Reload
+	movq	%r8, 16(%rdx)
+	cmovneq	32(%rsp), %r10                  # 8-byte Folded Reload
+	movq	%r10, 8(%rdx)
+	cmovneq	8(%rsp), %r11                   # 8-byte Folded Reload
+	movq	%r11, (%rdx)
+	addq	$1256, %rsp                     # imm = 0x4E8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -14409,559 +6747,414 @@ mcl_fpDbl_sqrPre9L:                     # @mcl_fpDbl_sqrPre9L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end129:
-	.size	mcl_fpDbl_sqrPre9L, .Lfunc_end129-mcl_fpDbl_sqrPre9L
-
-	.globl	mcl_fp_mont9L
-	.align	16, 0x90
-	.type	mcl_fp_mont9L,@function
-mcl_fp_mont9L:                          # @mcl_fp_mont9L
-# BB#0:
+.Lfunc_end60:
+	.size	mcl_fp_mont8L, .Lfunc_end60-mcl_fp_mont8L
+                                        # -- End function
+	.globl	mcl_fp_montNF8L                 # -- Begin function mcl_fp_montNF8L
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF8L,@function
+mcl_fp_montNF8L:                        # @mcl_fp_montNF8L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$1560, %rsp             # imm = 0x618
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	movq	%rdx, 32(%rsp)          # 8-byte Spill
-	movq	%rsi, 24(%rsp)          # 8-byte Spill
-	movq	%rdi, (%rsp)            # 8-byte Spill
+	subq	$1256, %rsp                     # imm = 0x4E8
+	movq	%rcx, %rbp
+	movq	%rdx, 88(%rsp)                  # 8-byte Spill
+	movq	%rsi, 80(%rsp)                  # 8-byte Spill
+	movq	%rdi, 96(%rsp)                  # 8-byte Spill
 	movq	-8(%rcx), %rbx
-	movq	%rbx, 16(%rsp)          # 8-byte Spill
+	movq	%rbx, 64(%rsp)                  # 8-byte Spill
+	movq	%rcx, 72(%rsp)                  # 8-byte Spill
 	movq	(%rdx), %rdx
-	leaq	1480(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	1480(%rsp), %r14
-	movq	1488(%rsp), %r15
-	movq	%r14, %rdx
-	imulq	%rbx, %rdx
-	movq	1552(%rsp), %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	movq	1544(%rsp), %rax
-	movq	%rax, 104(%rsp)         # 8-byte Spill
-	movq	1536(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	1528(%rsp), %r12
-	movq	1520(%rsp), %r13
-	movq	1512(%rsp), %rbx
-	movq	1504(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	1496(%rsp), %rbp
-	leaq	1400(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	1400(%rsp), %r14
-	adcq	1408(%rsp), %r15
-	adcq	1416(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	1424(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	adcq	1432(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	adcq	1440(%rsp), %r13
-	movq	%r13, 64(%rsp)          # 8-byte Spill
-	adcq	1448(%rsp), %r12
-	movq	%r12, 48(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %rbx          # 8-byte Reload
-	adcq	1456(%rsp), %rbx
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	1464(%rsp), %r14
-	movq	112(%rsp), %r13         # 8-byte Reload
-	adcq	1472(%rsp), %r13
-	sbbq	%rbp, %rbp
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1320(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %ebp
-	addq	1320(%rsp), %r15
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	1328(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %rax          # 8-byte Reload
-	adcq	1336(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %r12          # 8-byte Reload
-	adcq	1344(%rsp), %r12
-	movq	64(%rsp), %rax          # 8-byte Reload
-	adcq	1352(%rsp), %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rax          # 8-byte Reload
-	adcq	1360(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	adcq	1368(%rsp), %rbx
-	adcq	1376(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
-	adcq	1384(%rsp), %r13
-	movq	%r13, 112(%rsp)         # 8-byte Spill
-	adcq	1392(%rsp), %rbp
-	sbbq	%r14, %r14
-	movq	%r15, %rdx
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	1240(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	%r14, %rax
-	andl	$1, %eax
-	addq	1240(%rsp), %r15
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	1248(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %r14          # 8-byte Reload
-	adcq	1256(%rsp), %r14
-	adcq	1264(%rsp), %r12
-	movq	%r12, 40(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r12          # 8-byte Reload
-	adcq	1272(%rsp), %r12
-	movq	48(%rsp), %r13          # 8-byte Reload
-	adcq	1280(%rsp), %r13
-	adcq	1288(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %r15         # 8-byte Reload
-	adcq	1296(%rsp), %r15
-	movq	112(%rsp), %rbx         # 8-byte Reload
-	adcq	1304(%rsp), %rbx
-	adcq	1312(%rsp), %rbp
-	adcq	$0, %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	1160(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	96(%rsp), %rax          # 8-byte Reload
-	addq	1160(%rsp), %rax
-	adcq	1168(%rsp), %r14
-	movq	%r14, 80(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %r14          # 8-byte Reload
-	adcq	1176(%rsp), %r14
-	adcq	1184(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	movq	%r13, %r12
-	adcq	1192(%rsp), %r12
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	1200(%rsp), %rcx
-	movq	%rcx, 88(%rsp)          # 8-byte Spill
-	adcq	1208(%rsp), %r15
-	movq	%r15, %r13
-	adcq	1216(%rsp), %rbx
-	movq	%rbx, 112(%rsp)         # 8-byte Spill
-	adcq	1224(%rsp), %rbp
-	movq	72(%rsp), %rcx          # 8-byte Reload
-	adcq	1232(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	1080(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	%r15, %rax
-	andl	$1, %eax
-	addq	1080(%rsp), %rbx
-	movq	80(%rsp), %rcx          # 8-byte Reload
-	adcq	1088(%rsp), %rcx
-	movq	%rcx, 80(%rsp)          # 8-byte Spill
-	movq	%r14, %r15
-	adcq	1096(%rsp), %r15
-	movq	64(%rsp), %r14          # 8-byte Reload
-	adcq	1104(%rsp), %r14
-	movq	%r12, %rbx
-	adcq	1112(%rsp), %rbx
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	adcq	1120(%rsp), %rcx
-	movq	%rcx, 88(%rsp)          # 8-byte Spill
+	leaq	1184(%rsp), %rdi
+	callq	mulPv512x64@PLT
+	movq	1184(%rsp), %r15
+	movq	1192(%rsp), %r12
+	movq	%rbx, %rdx
+	imulq	%r15, %rdx
+	movq	1248(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	1240(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	1232(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	1224(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	1216(%rsp), %r14
+	movq	1208(%rsp), %rbx
+	movq	1200(%rsp), %r13
+	leaq	1112(%rsp), %rdi
+	movq	%rbp, %rsi
+	callq	mulPv512x64@PLT
+	addq	1112(%rsp), %r15
+	adcq	1120(%rsp), %r12
 	adcq	1128(%rsp), %r13
-	movq	%r13, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %r13         # 8-byte Reload
-	adcq	1136(%rsp), %r13
-	adcq	1144(%rsp), %rbp
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	1152(%rsp), %r12
-	adcq	$0, %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	1000(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	80(%rsp), %rax          # 8-byte Reload
-	addq	1000(%rsp), %rax
-	adcq	1008(%rsp), %r15
-	movq	%r15, 40(%rsp)          # 8-byte Spill
-	adcq	1016(%rsp), %r14
+	adcq	1136(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  # 8-byte Spill
 	movq	%r14, %r15
-	adcq	1024(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	1032(%rsp), %r14
-	movq	104(%rsp), %rcx         # 8-byte Reload
-	adcq	1040(%rsp), %rcx
-	movq	%rcx, 104(%rsp)         # 8-byte Spill
+	adcq	1144(%rsp), %r15
+	movq	16(%rsp), %rbx                  # 8-byte Reload
+	adcq	1152(%rsp), %rbx
+	movq	32(%rsp), %r14                  # 8-byte Reload
+	adcq	1160(%rsp), %r14
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	1168(%rsp), %rbp
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	1176(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	8(%rax), %rdx
+	leaq	1040(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	1104(%rsp), %rcx
+	addq	1040(%rsp), %r12
 	adcq	1048(%rsp), %r13
-	movq	%r13, 112(%rsp)         # 8-byte Spill
-	adcq	1056(%rsp), %rbp
-	adcq	1064(%rsp), %r12
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	1072(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	920(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	920(%rsp), %r13
-	movq	40(%rsp), %rcx          # 8-byte Reload
-	adcq	928(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          # 8-byte Spill
-	adcq	936(%rsp), %r15
-	movq	%r15, 64(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r15          # 8-byte Reload
-	adcq	944(%rsp), %r15
-	movq	%r14, %r13
-	adcq	952(%rsp), %r13
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	960(%rsp), %r14
-	movq	112(%rsp), %rbx         # 8-byte Reload
-	adcq	968(%rsp), %rbx
-	adcq	976(%rsp), %rbp
-	adcq	984(%rsp), %r12
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	992(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	840(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	40(%rsp), %rax          # 8-byte Reload
-	addq	840(%rsp), %rax
-	movq	64(%rsp), %rcx          # 8-byte Reload
-	adcq	848(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
-	adcq	856(%rsp), %r15
-	adcq	864(%rsp), %r13
-	movq	%r13, 88(%rsp)          # 8-byte Spill
-	adcq	872(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	adcq	1056(%rsp), %rax
+	movq	%rax, 40(%rsp)                  # 8-byte Spill
+	adcq	1064(%rsp), %r15
+	adcq	1072(%rsp), %rbx
+	adcq	1080(%rsp), %r14
+	movq	%r14, 32(%rsp)                  # 8-byte Spill
+	adcq	1088(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %r14                   # 8-byte Reload
+	adcq	1096(%rsp), %r14
+	adcq	$0, %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r12, %rdx
+	leaq	968(%rsp), %rdi
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	addq	968(%rsp), %r12
+	adcq	976(%rsp), %r13
+	movq	40(%rsp), %rbp                  # 8-byte Reload
+	adcq	984(%rsp), %rbp
+	adcq	992(%rsp), %r15
+	movq	%r15, 56(%rsp)                  # 8-byte Spill
+	adcq	1000(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %r15                  # 8-byte Reload
+	adcq	1008(%rsp), %r15
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	1016(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	adcq	1024(%rsp), %r14
+	movq	%r14, 8(%rsp)                   # 8-byte Spill
+	movq	48(%rsp), %rbx                  # 8-byte Reload
+	adcq	1032(%rsp), %rbx
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	896(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	960(%rsp), %r12
+	addq	896(%rsp), %r13
+	movq	%rbp, %r14
+	adcq	904(%rsp), %r14
+	movq	56(%rsp), %rax                  # 8-byte Reload
+	adcq	912(%rsp), %rax
+	movq	%rax, 56(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	920(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	adcq	928(%rsp), %r15
+	movq	%r15, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	936(%rsp), %rbp
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	944(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	adcq	952(%rsp), %rbx
+	adcq	$0, %r12
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	824(%rsp), %rdi
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	addq	824(%rsp), %r13
+	adcq	832(%rsp), %r14
+	movq	%r14, 40(%rsp)                  # 8-byte Spill
+	movq	56(%rsp), %r13                  # 8-byte Reload
+	adcq	840(%rsp), %r13
+	movq	16(%rsp), %r15                  # 8-byte Reload
+	adcq	848(%rsp), %r15
+	movq	32(%rsp), %r14                  # 8-byte Reload
+	adcq	856(%rsp), %r14
+	adcq	864(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbp                   # 8-byte Reload
+	adcq	872(%rsp), %rbp
 	adcq	880(%rsp), %rbx
-	movq	%rbx, 112(%rsp)         # 8-byte Spill
-	adcq	888(%rsp), %rbp
-	adcq	896(%rsp), %r12
-	movq	96(%rsp), %r13          # 8-byte Reload
-	adcq	904(%rsp), %r13
-	movq	80(%rsp), %rcx          # 8-byte Reload
-	adcq	912(%rsp), %rcx
-	movq	%rcx, 80(%rsp)          # 8-byte Spill
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r14
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	760(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	760(%rsp), %r14
-	movq	64(%rsp), %rcx          # 8-byte Reload
-	adcq	768(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
+	adcq	888(%rsp), %r12
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	752(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	816(%rsp), %rcx
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	addq	752(%rsp), %rax
+	adcq	760(%rsp), %r13
+	adcq	768(%rsp), %r15
+	movq	%r15, 16(%rsp)                  # 8-byte Spill
+	movq	%r14, %r15
 	adcq	776(%rsp), %r15
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	784(%rsp), %r14
-	movq	104(%rsp), %rcx         # 8-byte Reload
-	adcq	792(%rsp), %rcx
-	movq	%rcx, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %rcx         # 8-byte Reload
-	adcq	800(%rsp), %rcx
-	movq	%rcx, 112(%rsp)         # 8-byte Spill
-	adcq	808(%rsp), %rbp
-	movq	%r12, %rbx
-	adcq	816(%rsp), %rbx
-	movq	%r13, %r12
-	adcq	824(%rsp), %r12
-	movq	80(%rsp), %r13          # 8-byte Reload
-	adcq	832(%rsp), %r13
-	adcq	$0, %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rdx
+	movq	24(%rsp), %rdx                  # 8-byte Reload
+	adcq	784(%rsp), %rdx
+	movq	%rdx, 24(%rsp)                  # 8-byte Spill
+	adcq	792(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	adcq	800(%rsp), %rbx
+	adcq	808(%rsp), %r12
+	adcq	$0, %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
 	leaq	680(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	64(%rsp), %rax          # 8-byte Reload
-	addq	680(%rsp), %rax
-	adcq	688(%rsp), %r15
-	movq	%r15, 48(%rsp)          # 8-byte Spill
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	addq	680(%rsp), %rbp
+	adcq	688(%rsp), %r13
+	movq	16(%rsp), %r14                  # 8-byte Reload
 	adcq	696(%rsp), %r14
-	movq	%r14, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %rcx         # 8-byte Reload
-	adcq	704(%rsp), %rcx
-	movq	%rcx, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %r15         # 8-byte Reload
+	adcq	704(%rsp), %r15
+	movq	%r15, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %r15                  # 8-byte Reload
 	adcq	712(%rsp), %r15
+	movq	8(%rsp), %rbp                   # 8-byte Reload
 	adcq	720(%rsp), %rbp
 	adcq	728(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
 	adcq	736(%rsp), %r12
-	movq	%r12, 96(%rsp)          # 8-byte Spill
-	adcq	744(%rsp), %r13
-	movq	%r13, 80(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %r13          # 8-byte Reload
-	adcq	752(%rsp), %r13
-	sbbq	%r14, %r14
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	600(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %r14d
-	addq	600(%rsp), %rbx
-	movq	48(%rsp), %rax          # 8-byte Reload
-	adcq	608(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	616(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %rbx         # 8-byte Reload
-	adcq	624(%rsp), %rbx
+	movq	40(%rsp), %rax                  # 8-byte Reload
+	adcq	744(%rsp), %rax
+	movq	%rax, 40(%rsp)                  # 8-byte Spill
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	608(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	672(%rsp), %rcx
+	movq	%r13, %rax
+	addq	608(%rsp), %rax
+	adcq	616(%rsp), %r14
+	movq	%r14, 16(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %r13                  # 8-byte Reload
+	adcq	624(%rsp), %r13
 	adcq	632(%rsp), %r15
-	movq	%r15, 112(%rsp)         # 8-byte Spill
+	movq	%r15, 24(%rsp)                  # 8-byte Spill
 	adcq	640(%rsp), %rbp
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	648(%rsp), %r12
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	656(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %r15          # 8-byte Reload
-	adcq	664(%rsp), %r15
-	adcq	672(%rsp), %r13
-	adcq	$0, %r14
-	movq	%r14, 64(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	520(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	48(%rsp), %rax          # 8-byte Reload
-	addq	520(%rsp), %rax
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	528(%rsp), %r14
-	adcq	536(%rsp), %rbx
-	movq	%rbx, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %rcx         # 8-byte Reload
-	adcq	544(%rsp), %rcx
-	movq	%rcx, 112(%rsp)         # 8-byte Spill
-	adcq	552(%rsp), %rbp
-	adcq	560(%rsp), %r12
-	movq	%r12, 72(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %r12          # 8-byte Reload
-	adcq	568(%rsp), %r12
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	adcq	648(%rsp), %rbx
+	movq	%rbx, %r15
+	adcq	656(%rsp), %r12
+	movq	40(%rsp), %r14                  # 8-byte Reload
+	adcq	664(%rsp), %r14
+	movq	%rcx, %rbp
+	adcq	$0, %rbp
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	536(%rsp), %rdi
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	addq	536(%rsp), %rbx
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	544(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	%r13, %rbx
+	adcq	552(%rsp), %rbx
+	movq	24(%rsp), %r13                  # 8-byte Reload
+	adcq	560(%rsp), %r13
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	568(%rsp), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
 	adcq	576(%rsp), %r15
-	movq	%r15, 80(%rsp)          # 8-byte Spill
-	adcq	584(%rsp), %r13
-	movq	%r13, 40(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r15          # 8-byte Reload
+	movq	%r15, 48(%rsp)                  # 8-byte Spill
+	adcq	584(%rsp), %r12
+	movq	%r14, %r15
 	adcq	592(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	440(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	440(%rsp), %r13
-	adcq	448(%rsp), %r14
-	movq	%r14, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	456(%rsp), %r14
-	movq	112(%rsp), %rbx         # 8-byte Reload
-	adcq	464(%rsp), %rbx
-	adcq	472(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           # 8-byte Spill
-	movq	72(%rsp), %rcx          # 8-byte Reload
-	adcq	480(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	adcq	488(%rsp), %r12
-	movq	%r12, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %rbp          # 8-byte Reload
-	adcq	496(%rsp), %rbp
-	movq	40(%rsp), %r12          # 8-byte Reload
+	adcq	600(%rsp), %rbp
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	464(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	528(%rsp), %rcx
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	addq	464(%rsp), %rax
+	adcq	472(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  # 8-byte Spill
+	adcq	480(%rsp), %r13
+	movq	%r13, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %r14                   # 8-byte Reload
+	adcq	488(%rsp), %r14
+	movq	48(%rsp), %r13                  # 8-byte Reload
+	adcq	496(%rsp), %r13
 	adcq	504(%rsp), %r12
+	movq	%r12, 16(%rsp)                  # 8-byte Spill
 	adcq	512(%rsp), %r15
-	movq	%r15, %r13
-	adcq	$0, %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	360(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	88(%rsp), %rax          # 8-byte Reload
-	addq	360(%rsp), %rax
-	adcq	368(%rsp), %r14
-	adcq	376(%rsp), %rbx
-	movq	%rbx, 112(%rsp)         # 8-byte Spill
-	movq	8(%rsp), %rcx           # 8-byte Reload
-	adcq	384(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           # 8-byte Spill
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	adcq	392(%rsp), %rbx
-	movq	96(%rsp), %r15          # 8-byte Reload
-	adcq	400(%rsp), %r15
-	adcq	408(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	adcq	416(%rsp), %r12
-	movq	%r12, %rbp
+	movq	%r15, %r12
+	adcq	520(%rsp), %rbp
+	movq	%rcx, %r15
+	adcq	$0, %r15
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	392(%rsp), %rdi
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	addq	392(%rsp), %rbx
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	adcq	400(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	408(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	%r14, %rbx
+	adcq	416(%rsp), %rbx
 	adcq	424(%rsp), %r13
-	movq	%r13, 64(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rcx          # 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          # 8-byte Spill
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	16(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	280(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %r13d
-	addq	280(%rsp), %r12
-	adcq	288(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %rax         # 8-byte Reload
-	adcq	296(%rsp), %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	movq	8(%rsp), %r14           # 8-byte Reload
-	adcq	304(%rsp), %r14
-	adcq	312(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	adcq	320(%rsp), %r15
-	movq	%r15, 96(%rsp)          # 8-byte Spill
-	movq	80(%rsp), %rbx          # 8-byte Reload
-	adcq	328(%rsp), %rbx
-	adcq	336(%rsp), %rbp
-	movq	%rbp, 40(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %r12          # 8-byte Reload
-	adcq	344(%rsp), %r12
-	movq	48(%rsp), %rbp          # 8-byte Reload
-	adcq	352(%rsp), %rbp
+	movq	%r13, 48(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %r14                  # 8-byte Reload
+	adcq	432(%rsp), %r14
+	adcq	440(%rsp), %r12
+	adcq	448(%rsp), %rbp
+	movq	%rbp, 56(%rsp)                  # 8-byte Spill
+	adcq	456(%rsp), %r15
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	320(%rsp), %rdi
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	leaq	248(%rsp), %rdi
+	movq	384(%rsp), %r13
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	addq	320(%rsp), %rax
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	328(%rsp), %rbp
+	adcq	336(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	344(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	adcq	352(%rsp), %r14
+	movq	%r14, 16(%rsp)                  # 8-byte Spill
+	adcq	360(%rsp), %r12
+	movq	%r12, 40(%rsp)                  # 8-byte Spill
+	movq	56(%rsp), %r12                  # 8-byte Reload
+	adcq	368(%rsp), %r12
+	adcq	376(%rsp), %r15
+	movq	%r15, 32(%rsp)                  # 8-byte Spill
 	adcq	$0, %r13
-	movq	32(%rsp), %rax          # 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	200(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	104(%rsp), %rax         # 8-byte Reload
-	addq	200(%rsp), %rax
-	movq	112(%rsp), %r15         # 8-byte Reload
-	adcq	208(%rsp), %r15
-	adcq	216(%rsp), %r14
-	movq	%r14, 8(%rsp)           # 8-byte Spill
-	movq	72(%rsp), %r14          # 8-byte Reload
-	adcq	224(%rsp), %r14
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	232(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	adcq	240(%rsp), %rbx
-	movq	%rbx, 80(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %rcx          # 8-byte Reload
-	adcq	248(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          # 8-byte Spill
-	adcq	256(%rsp), %r12
-	movq	%r12, 64(%rsp)          # 8-byte Spill
-	adcq	264(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          # 8-byte Spill
-	adcq	272(%rsp), %r13
-	sbbq	%rbx, %rbx
-	movq	16(%rsp), %rdx          # 8-byte Reload
+	movq	64(%rsp), %rdx                  # 8-byte Reload
 	imulq	%rax, %rdx
-	movq	%rax, %r12
-	leaq	120(%rsp), %rdi
-	movq	56(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %ebx
-	addq	120(%rsp), %r12
-	adcq	128(%rsp), %r15
-	movq	8(%rsp), %rbp           # 8-byte Reload
-	adcq	136(%rsp), %rbp
-	movq	%r14, %rcx
-	adcq	144(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %r8           # 8-byte Reload
-	adcq	152(%rsp), %r8
-	movq	%r8, 96(%rsp)           # 8-byte Spill
-	movq	80(%rsp), %r9           # 8-byte Reload
-	adcq	160(%rsp), %r9
-	movq	%r9, 80(%rsp)           # 8-byte Spill
-	movq	40(%rsp), %r10          # 8-byte Reload
-	adcq	168(%rsp), %r10
-	movq	%r10, 40(%rsp)          # 8-byte Spill
-	movq	64(%rsp), %rdi          # 8-byte Reload
-	adcq	176(%rsp), %rdi
-	movq	%rdi, 64(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r14          # 8-byte Reload
+	movq	%rax, %rbx
+	movq	72(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	leaq	176(%rsp), %rdi
+	addq	248(%rsp), %rbx
+	adcq	256(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %r14                   # 8-byte Reload
+	adcq	264(%rsp), %r14
+	movq	48(%rsp), %rbp                  # 8-byte Reload
+	adcq	272(%rsp), %rbp
+	movq	16(%rsp), %r15                  # 8-byte Reload
+	adcq	280(%rsp), %r15
+	movq	40(%rsp), %rbx                  # 8-byte Reload
+	adcq	288(%rsp), %rbx
+	adcq	296(%rsp), %r12
+	movq	%r12, 56(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	adcq	304(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	adcq	312(%rsp), %r13
+	movq	88(%rsp), %rax                  # 8-byte Reload
+	movq	56(%rax), %rdx
+	movq	80(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	leaq	104(%rsp), %rdi
+	movq	240(%rsp), %r12
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	addq	176(%rsp), %rax
 	adcq	184(%rsp), %r14
-	adcq	192(%rsp), %r13
-	adcq	$0, %rbx
-	movq	%r15, %rsi
-	movq	%r15, %r12
-	movq	56(%rsp), %rdx          # 8-byte Reload
-	subq	(%rdx), %rsi
-	movq	%rbp, %rax
-	movq	%rbp, %r15
-	sbbq	8(%rdx), %rax
-	movq	%rcx, %rbp
-	sbbq	16(%rdx), %rbp
-	movq	%r8, %rcx
-	sbbq	24(%rdx), %rcx
-	movq	%r9, %r8
-	sbbq	32(%rdx), %r8
-	movq	%r10, %r11
-	sbbq	40(%rdx), %r11
-	movq	%rdi, %r10
-	sbbq	48(%rdx), %r10
-	movq	%r14, %rdi
-	sbbq	56(%rdx), %rdi
-	movq	%r13, %r9
-	sbbq	64(%rdx), %r9
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%r13, %r9
-	testb	%bl, %bl
-	cmovneq	%r12, %rsi
-	movq	(%rsp), %rbx            # 8-byte Reload
-	movq	%rsi, (%rbx)
-	cmovneq	%r15, %rax
-	movq	%rax, 8(%rbx)
-	cmovneq	72(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%rbp, 16(%rbx)
-	cmovneq	96(%rsp), %rcx          # 8-byte Folded Reload
-	movq	%rcx, 24(%rbx)
-	cmovneq	80(%rsp), %r8           # 8-byte Folded Reload
-	movq	%r8, 32(%rbx)
-	cmovneq	40(%rsp), %r11          # 8-byte Folded Reload
-	movq	%r11, 40(%rbx)
-	cmovneq	64(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 48(%rbx)
-	cmovneq	%r14, %rdi
-	movq	%rdi, 56(%rbx)
-	movq	%r9, 64(%rbx)
-	addq	$1560, %rsp             # imm = 0x618
+	movq	%r14, 8(%rsp)                   # 8-byte Spill
+	adcq	192(%rsp), %rbp
+	movq	%rbp, 48(%rsp)                  # 8-byte Spill
+	adcq	200(%rsp), %r15
+	movq	%r15, 16(%rsp)                  # 8-byte Spill
+	adcq	208(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  # 8-byte Spill
+	movq	56(%rsp), %rbp                  # 8-byte Reload
+	adcq	216(%rsp), %rbp
+	movq	32(%rsp), %r15                  # 8-byte Reload
+	adcq	224(%rsp), %r15
+	adcq	232(%rsp), %r13
+	adcq	$0, %r12
+	movq	64(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	movq	72(%rsp), %r14                  # 8-byte Reload
+	movq	%r14, %rsi
+	callq	mulPv512x64@PLT
+	addq	104(%rsp), %rbx
+	movq	8(%rsp), %r8                    # 8-byte Reload
+	adcq	112(%rsp), %r8
+	movq	%r8, 8(%rsp)                    # 8-byte Spill
+	movq	48(%rsp), %r9                   # 8-byte Reload
+	adcq	120(%rsp), %r9
+	movq	%r9, 48(%rsp)                   # 8-byte Spill
+	movq	16(%rsp), %rsi                  # 8-byte Reload
+	adcq	128(%rsp), %rsi
+	movq	40(%rsp), %r11                  # 8-byte Reload
+	adcq	136(%rsp), %r11
+	movq	%rbp, %r10
+	adcq	144(%rsp), %r10
+	adcq	152(%rsp), %r15
+	adcq	160(%rsp), %r13
+	adcq	168(%rsp), %r12
+	movq	%r14, %rax
+	subq	(%r14), %r8
+	sbbq	8(%r14), %r9
+	movq	%rsi, %rdx
+	movq	%rsi, %r14
+	sbbq	16(%rax), %rdx
+	movq	%r11, %rsi
+	sbbq	24(%rax), %rsi
+	movq	%r10, %rdi
+	sbbq	32(%rax), %rdi
+	movq	%r15, %rbp
+	sbbq	40(%rax), %rbp
+	movq	%r13, %rbx
+	sbbq	48(%rax), %rbx
+	movq	%rax, %rcx
+	movq	%r12, %rax
+	sbbq	56(%rcx), %rax
+	cmovsq	%r12, %rax
+	movq	96(%rsp), %rcx                  # 8-byte Reload
+	movq	%rax, 56(%rcx)
+	cmovsq	%r13, %rbx
+	movq	%rbx, 48(%rcx)
+	cmovsq	%r15, %rbp
+	movq	%rbp, 40(%rcx)
+	cmovsq	%r10, %rdi
+	movq	%rdi, 32(%rcx)
+	cmovsq	%r11, %rsi
+	movq	%rsi, 24(%rcx)
+	cmovsq	%r14, %rdx
+	movq	%rdx, 16(%rcx)
+	cmovsq	48(%rsp), %r9                   # 8-byte Folded Reload
+	movq	%r9, 8(%rcx)
+	cmovsq	8(%rsp), %r8                    # 8-byte Folded Reload
+	movq	%r8, (%rcx)
+	addq	$1256, %rsp                     # imm = 0x4E8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -14969,532 +7162,304 @@ mcl_fp_mont9L:                          # @mcl_fp_mont9L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end130:
-	.size	mcl_fp_mont9L, .Lfunc_end130-mcl_fp_mont9L
-
-	.globl	mcl_fp_montNF9L
-	.align	16, 0x90
-	.type	mcl_fp_montNF9L,@function
-mcl_fp_montNF9L:                        # @mcl_fp_montNF9L
-# BB#0:
+.Lfunc_end61:
+	.size	mcl_fp_montNF8L, .Lfunc_end61-mcl_fp_montNF8L
+                                        # -- End function
+	.globl	mcl_fp_montRed8L                # -- Begin function mcl_fp_montRed8L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed8L,@function
+mcl_fp_montRed8L:                       # @mcl_fp_montRed8L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$1560, %rsp             # imm = 0x618
-	movq	%rcx, 64(%rsp)          # 8-byte Spill
-	movq	%rdx, 16(%rsp)          # 8-byte Spill
-	movq	%rsi, 24(%rsp)          # 8-byte Spill
-	movq	%rdi, (%rsp)            # 8-byte Spill
-	movq	-8(%rcx), %rbx
-	movq	%rbx, 32(%rsp)          # 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1480(%rsp), %rdi
-	callq	.LmulPv576x64
-	movq	1480(%rsp), %r12
-	movq	1488(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	%r12, %rdx
-	imulq	%rbx, %rdx
-	movq	1552(%rsp), %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	movq	1544(%rsp), %r13
-	movq	1536(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	1528(%rsp), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	1520(%rsp), %r14
-	movq	1512(%rsp), %r15
-	movq	1504(%rsp), %rbx
-	movq	1496(%rsp), %rbp
-	leaq	1400(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	1400(%rsp), %r12
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	1408(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	adcq	1416(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           # 8-byte Spill
-	adcq	1424(%rsp), %rbx
-	movq	%rbx, 104(%rsp)         # 8-byte Spill
-	adcq	1432(%rsp), %r15
-	movq	%r15, 56(%rsp)          # 8-byte Spill
-	adcq	1440(%rsp), %r14
-	movq	%r14, 40(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %rbx          # 8-byte Reload
-	adcq	1448(%rsp), %rbx
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	1456(%rsp), %r12
-	adcq	1464(%rsp), %r13
-	movq	%r13, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsp), %rbp         # 8-byte Reload
-	adcq	1472(%rsp), %rbp
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1320(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	1392(%rsp), %rax
-	movq	88(%rsp), %rcx          # 8-byte Reload
-	addq	1320(%rsp), %rcx
-	movq	8(%rsp), %r15           # 8-byte Reload
-	adcq	1328(%rsp), %r15
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	1336(%rsp), %r14
-	movq	56(%rsp), %rdx          # 8-byte Reload
-	adcq	1344(%rsp), %rdx
-	movq	%rdx, 56(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %r13          # 8-byte Reload
-	adcq	1352(%rsp), %r13
-	adcq	1360(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	adcq	1368(%rsp), %r12
-	movq	%r12, 72(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %rdx          # 8-byte Reload
-	adcq	1376(%rsp), %rdx
-	movq	%rdx, 96(%rsp)          # 8-byte Spill
-	adcq	1384(%rsp), %rbp
-	movq	%rbp, 112(%rsp)         # 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, %rbp
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	1240(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	1240(%rsp), %rbx
-	adcq	1248(%rsp), %r15
-	movq	%r15, 8(%rsp)           # 8-byte Spill
-	adcq	1256(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
-	movq	56(%rsp), %r12          # 8-byte Reload
-	adcq	1264(%rsp), %r12
-	adcq	1272(%rsp), %r13
-	movq	%r13, %r14
-	movq	48(%rsp), %r13          # 8-byte Reload
-	adcq	1280(%rsp), %r13
-	movq	72(%rsp), %rbx          # 8-byte Reload
-	adcq	1288(%rsp), %rbx
-	movq	96(%rsp), %r15          # 8-byte Reload
-	adcq	1296(%rsp), %r15
-	movq	112(%rsp), %rax         # 8-byte Reload
-	adcq	1304(%rsp), %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	adcq	1312(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	1160(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	1232(%rsp), %rax
-	movq	8(%rsp), %rcx           # 8-byte Reload
-	addq	1160(%rsp), %rcx
-	movq	104(%rsp), %rbp         # 8-byte Reload
-	adcq	1168(%rsp), %rbp
-	adcq	1176(%rsp), %r12
-	movq	%r12, 56(%rsp)          # 8-byte Spill
-	adcq	1184(%rsp), %r14
-	adcq	1192(%rsp), %r13
-	movq	%r13, %r12
-	adcq	1200(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          # 8-byte Spill
-	adcq	1208(%rsp), %r15
-	movq	%r15, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsp), %rbx         # 8-byte Reload
-	adcq	1216(%rsp), %rbx
-	movq	80(%rsp), %rdx          # 8-byte Reload
-	adcq	1224(%rsp), %rdx
-	movq	%rdx, 80(%rsp)          # 8-byte Spill
-	movq	%rax, %r15
-	adcq	$0, %r15
-	movq	%rcx, %rdx
-	movq	%rcx, %r13
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	1080(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	1080(%rsp), %r13
-	adcq	1088(%rsp), %rbp
-	movq	%rbp, 104(%rsp)         # 8-byte Spill
-	movq	56(%rsp), %r13          # 8-byte Reload
-	adcq	1096(%rsp), %r13
-	adcq	1104(%rsp), %r14
-	adcq	1112(%rsp), %r12
-	movq	%r12, 48(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	1120(%rsp), %r12
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	1128(%rsp), %rbp
-	adcq	1136(%rsp), %rbx
-	movq	%rbx, 112(%rsp)         # 8-byte Spill
-	movq	80(%rsp), %rbx          # 8-byte Reload
-	adcq	1144(%rsp), %rbx
-	adcq	1152(%rsp), %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	1000(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	1072(%rsp), %rax
-	movq	104(%rsp), %rcx         # 8-byte Reload
-	addq	1000(%rsp), %rcx
-	adcq	1008(%rsp), %r13
-	movq	%r13, 56(%rsp)          # 8-byte Spill
-	adcq	1016(%rsp), %r14
-	movq	%r14, 40(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r14          # 8-byte Reload
-	adcq	1024(%rsp), %r14
-	adcq	1032(%rsp), %r12
-	adcq	1040(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsp), %r13         # 8-byte Reload
-	adcq	1048(%rsp), %r13
-	adcq	1056(%rsp), %rbx
-	movq	%rbx, 80(%rsp)          # 8-byte Spill
-	adcq	1064(%rsp), %r15
-	movq	%r15, 88(%rsp)          # 8-byte Spill
+	subq	$728, %rsp                      # imm = 0x2D8
+	movq	%rdi, 144(%rsp)                 # 8-byte Spill
+	movq	56(%rdx), %rax
+	movq	%rax, 136(%rsp)                 # 8-byte Spill
+	movq	48(%rdx), %rax
+	movq	%rax, 128(%rsp)                 # 8-byte Spill
+	movq	40(%rdx), %rax
+	movq	%rax, 120(%rsp)                 # 8-byte Spill
+	movq	32(%rdx), %rax
+	movq	%rax, 112(%rsp)                 # 8-byte Spill
+	movq	24(%rdx), %rax
+	movq	%rax, 104(%rsp)                 # 8-byte Spill
+	movq	16(%rdx), %rax
+	movq	%rax, 96(%rsp)                  # 8-byte Spill
+	movq	8(%rdx), %rax
+	movq	%rax, 88(%rsp)                  # 8-byte Spill
+	movq	%rsi, 72(%rsp)                  # 8-byte Spill
+	movq	56(%rsi), %r12
+	movq	48(%rsi), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	40(%rsi), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	32(%rsi), %r15
+	movq	24(%rsi), %r14
+	movq	16(%rsi), %r13
+	movq	(%rsi), %rbp
+	movq	8(%rsi), %rbx
+	movq	-8(%rdx), %rcx
+	movq	%rcx, 56(%rsp)                  # 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, %rsi
+	movq	%rdx, 64(%rsp)                  # 8-byte Spill
+	movq	%rax, 80(%rsp)                  # 8-byte Spill
+	movq	%rbp, %rdx
+	imulq	%rcx, %rdx
+	leaq	656(%rsp), %rdi
+	callq	mulPv512x64@PLT
+	addq	656(%rsp), %rbp
+	adcq	664(%rsp), %rbx
+	adcq	672(%rsp), %r13
+	adcq	680(%rsp), %r14
+	adcq	688(%rsp), %r15
+	movq	32(%rsp), %rbp                  # 8-byte Reload
+	adcq	696(%rsp), %rbp
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	704(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	adcq	712(%rsp), %r12
+	movq	%r12, 24(%rsp)                  # 8-byte Spill
+	movq	72(%rsp), %rax                  # 8-byte Reload
+	movq	64(%rax), %rax
+	adcq	720(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rbx, %rdx
+	leaq	584(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	648(%rsp), %rax
+	addb	$255, %r12b
 	adcq	$0, %rax
-	movq	%rax, 104(%rsp)         # 8-byte Spill
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	920(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	920(%rsp), %rbx
-	movq	56(%rsp), %rax          # 8-byte Reload
-	adcq	928(%rsp), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	40(%rsp), %rbp          # 8-byte Reload
-	adcq	936(%rsp), %rbp
-	movq	%r14, %rbx
-	adcq	944(%rsp), %rbx
-	adcq	952(%rsp), %r12
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	960(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	adcq	968(%rsp), %r13
-	movq	%r13, %r15
-	movq	80(%rsp), %r13          # 8-byte Reload
-	adcq	976(%rsp), %r13
-	movq	88(%rsp), %r14          # 8-byte Reload
-	adcq	984(%rsp), %r14
-	movq	104(%rsp), %rax         # 8-byte Reload
-	adcq	992(%rsp), %rax
-	movq	%rax, 104(%rsp)         # 8-byte Spill
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	840(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	912(%rsp), %rax
-	movq	56(%rsp), %rcx          # 8-byte Reload
-	addq	840(%rsp), %rcx
-	adcq	848(%rsp), %rbp
-	movq	%rbp, 40(%rsp)          # 8-byte Spill
-	adcq	856(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          # 8-byte Spill
-	adcq	864(%rsp), %r12
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	872(%rsp), %rbp
-	adcq	880(%rsp), %r15
-	movq	%r15, 112(%rsp)         # 8-byte Spill
-	adcq	888(%rsp), %r13
-	adcq	896(%rsp), %r14
-	movq	%r14, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %rdx         # 8-byte Reload
-	adcq	904(%rsp), %rdx
-	movq	%rdx, 104(%rsp)         # 8-byte Spill
+	movq	%rax, %rcx
+	addq	584(%rsp), %rbx
+	adcq	592(%rsp), %r13
+	adcq	600(%rsp), %r14
+	adcq	608(%rsp), %r15
+	adcq	616(%rsp), %rbp
+	movq	%rbp, 32(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	624(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rbp                  # 8-byte Reload
+	adcq	632(%rsp), %rbp
+	movq	(%rsp), %rax                    # 8-byte Reload
+	adcq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	movq	72(%rsp), %r12                  # 8-byte Reload
+	adcq	72(%r12), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	512(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	576(%rsp), %rax
+	addb	$255, %bl
 	adcq	$0, %rax
-	movq	%rax, %r14
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	760(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	760(%rsp), %rbx
-	movq	40(%rsp), %rax          # 8-byte Reload
-	adcq	768(%rsp), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	48(%rsp), %r15          # 8-byte Reload
-	adcq	776(%rsp), %r15
-	adcq	784(%rsp), %r12
-	movq	%r12, 72(%rsp)          # 8-byte Spill
-	movq	%rbp, %rbx
-	adcq	792(%rsp), %rbx
-	movq	112(%rsp), %rbp         # 8-byte Reload
-	adcq	800(%rsp), %rbp
-	adcq	808(%rsp), %r13
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	816(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %r12         # 8-byte Reload
-	adcq	824(%rsp), %r12
-	adcq	832(%rsp), %r14
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	680(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	752(%rsp), %rcx
-	movq	40(%rsp), %rax          # 8-byte Reload
-	addq	680(%rsp), %rax
-	adcq	688(%rsp), %r15
-	movq	%r15, 48(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %rdx          # 8-byte Reload
-	adcq	696(%rsp), %rdx
-	movq	%rdx, 72(%rsp)          # 8-byte Spill
-	adcq	704(%rsp), %rbx
-	movq	%rbx, 96(%rsp)          # 8-byte Spill
-	adcq	712(%rsp), %rbp
-	movq	%rbp, 112(%rsp)         # 8-byte Spill
-	adcq	720(%rsp), %r13
-	movq	%r13, %r15
-	movq	88(%rsp), %rbx          # 8-byte Reload
-	adcq	728(%rsp), %rbx
-	adcq	736(%rsp), %r12
-	movq	%r12, 104(%rsp)         # 8-byte Spill
-	adcq	744(%rsp), %r14
-	movq	%r14, 40(%rsp)          # 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, 56(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	600(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	600(%rsp), %r13
-	movq	48(%rsp), %r13          # 8-byte Reload
-	adcq	608(%rsp), %r13
-	movq	72(%rsp), %r12          # 8-byte Reload
-	adcq	616(%rsp), %r12
-	movq	96(%rsp), %rbp          # 8-byte Reload
-	adcq	624(%rsp), %rbp
-	movq	112(%rsp), %rax         # 8-byte Reload
-	adcq	632(%rsp), %rax
-	movq	%rax, 112(%rsp)         # 8-byte Spill
-	adcq	640(%rsp), %r15
-	movq	%r15, 80(%rsp)          # 8-byte Spill
-	adcq	648(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	656(%rsp), %r14
-	movq	40(%rsp), %rbx          # 8-byte Reload
-	adcq	664(%rsp), %rbx
-	movq	56(%rsp), %r15          # 8-byte Reload
-	adcq	672(%rsp), %r15
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	520(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	592(%rsp), %rcx
-	movq	%r13, %rax
-	addq	520(%rsp), %rax
-	adcq	528(%rsp), %r12
-	movq	%r12, 72(%rsp)          # 8-byte Spill
-	movq	%rbp, %r12
-	adcq	536(%rsp), %r12
-	movq	112(%rsp), %rbp         # 8-byte Reload
-	adcq	544(%rsp), %rbp
-	movq	80(%rsp), %rdx          # 8-byte Reload
-	adcq	552(%rsp), %rdx
-	movq	%rdx, 80(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %rdx          # 8-byte Reload
-	adcq	560(%rsp), %rdx
-	movq	%rdx, 88(%rsp)          # 8-byte Spill
-	adcq	568(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
-	adcq	576(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	adcq	584(%rsp), %r15
-	movq	%r15, 56(%rsp)          # 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, %r13
-	movq	%rax, %rdx
-	movq	%rax, %r14
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
+	movq	%rax, %rcx
+	addq	512(%rsp), %r13
+	adcq	520(%rsp), %r14
+	adcq	528(%rsp), %r15
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	adcq	536(%rsp), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %rax                  # 8-byte Reload
+	adcq	544(%rsp), %rax
+	movq	%rax, 16(%rsp)                  # 8-byte Spill
+	adcq	552(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  # 8-byte Spill
+	movq	(%rsp), %rbp                    # 8-byte Reload
+	adcq	560(%rsp), %rbp
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	568(%rsp), %rbx
+	adcq	80(%r12), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	setb	%r13b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r14, %rdx
 	leaq	440(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
+	movq	64(%rsp), %r12                  # 8-byte Reload
+	movq	%r12, %rsi
+	callq	mulPv512x64@PLT
+	movq	504(%rsp), %rax
+	addb	$255, %r13b
+	adcq	$0, %rax
 	addq	440(%rsp), %r14
-	movq	72(%rsp), %rax          # 8-byte Reload
-	adcq	448(%rsp), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	adcq	456(%rsp), %r12
-	adcq	464(%rsp), %rbp
-	movq	%rbp, 112(%rsp)         # 8-byte Spill
-	movq	80(%rsp), %r14          # 8-byte Reload
-	adcq	472(%rsp), %r14
-	movq	88(%rsp), %r15          # 8-byte Reload
-	adcq	480(%rsp), %r15
-	movq	104(%rsp), %rbp         # 8-byte Reload
-	adcq	488(%rsp), %rbp
-	movq	40(%rsp), %rbx          # 8-byte Reload
-	adcq	496(%rsp), %rbx
-	movq	56(%rsp), %rax          # 8-byte Reload
-	adcq	504(%rsp), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	adcq	512(%rsp), %r13
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	360(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	432(%rsp), %rcx
-	movq	72(%rsp), %rax          # 8-byte Reload
-	addq	360(%rsp), %rax
-	adcq	368(%rsp), %r12
-	movq	%r12, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsp), %rdx         # 8-byte Reload
-	adcq	376(%rsp), %rdx
-	movq	%rdx, 112(%rsp)         # 8-byte Spill
-	adcq	384(%rsp), %r14
-	movq	%r14, 80(%rsp)          # 8-byte Spill
-	adcq	392(%rsp), %r15
-	movq	%r15, 88(%rsp)          # 8-byte Spill
-	adcq	400(%rsp), %rbp
-	movq	%rbp, 104(%rsp)         # 8-byte Spill
-	adcq	408(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	movq	56(%rsp), %r14          # 8-byte Reload
-	adcq	416(%rsp), %r14
-	adcq	424(%rsp), %r13
-	movq	%r13, %r15
-	adcq	$0, %rcx
-	movq	%rcx, 72(%rsp)          # 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	32(%rsp), %rdx          # 8-byte Folded Reload
-	leaq	280(%rsp), %rdi
-	movq	64(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	280(%rsp), %r12
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	288(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	112(%rsp), %rbp         # 8-byte Reload
-	adcq	296(%rsp), %rbp
-	movq	80(%rsp), %rax          # 8-byte Reload
+	adcq	448(%rsp), %r15
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	adcq	456(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	movq	16(%rsp), %r13                  # 8-byte Reload
+	adcq	464(%rsp), %r13
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	472(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	adcq	480(%rsp), %rbp
+	movq	%rbp, (%rsp)                    # 8-byte Spill
+	adcq	488(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	movq	40(%rsp), %rbp                  # 8-byte Reload
+	adcq	496(%rsp), %rbp
+	movq	72(%rsp), %rcx                  # 8-byte Reload
+	adcq	88(%rcx), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r15, %rdx
+	leaq	368(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	mulPv512x64@PLT
+	movq	432(%rsp), %r14
+	addb	$255, %bl
+	adcq	$0, %r14
+	addq	368(%rsp), %r15
+	movq	32(%rsp), %rax                  # 8-byte Reload
+	adcq	376(%rsp), %rax
+	adcq	384(%rsp), %r13
+	movq	%r13, 16(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rbx                  # 8-byte Reload
+	adcq	392(%rsp), %rbx
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	400(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	408(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	adcq	416(%rsp), %rbp
+	movq	%rbp, 40(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	424(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	movq	72(%rsp), %rcx                  # 8-byte Reload
+	adcq	96(%rcx), %r14
+	setb	%r15b
+	movq	56(%rsp), %r13                  # 8-byte Reload
+	movq	%r13, %rdx
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	296(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	mulPv512x64@PLT
+	movq	360(%rsp), %r12
+	addb	$255, %r15b
+	adcq	$0, %r12
+	addq	296(%rsp), %rbp
+	movq	16(%rsp), %rax                  # 8-byte Reload
 	adcq	304(%rsp), %rax
-	movq	%rax, 80(%rsp)          # 8-byte Spill
-	movq	88(%rsp), %r13          # 8-byte Reload
-	adcq	312(%rsp), %r13
-	movq	104(%rsp), %r12         # 8-byte Reload
-	adcq	320(%rsp), %r12
-	movq	40(%rsp), %rbx          # 8-byte Reload
-	adcq	328(%rsp), %rbx
-	adcq	336(%rsp), %r14
-	movq	%r14, 56(%rsp)          # 8-byte Spill
-	adcq	344(%rsp), %r15
-	movq	%r15, 48(%rsp)          # 8-byte Spill
-	movq	72(%rsp), %r14          # 8-byte Reload
+	adcq	312(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  # 8-byte Spill
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	320(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	328(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	movq	40(%rsp), %rcx                  # 8-byte Reload
+	adcq	336(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	344(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
 	adcq	352(%rsp), %r14
-	movq	16(%rsp), %rax          # 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	200(%rsp), %rdi
-	movq	24(%rsp), %rsi          # 8-byte Reload
-	callq	.LmulPv576x64
-	movq	272(%rsp), %rcx
-	movq	96(%rsp), %rax          # 8-byte Reload
-	addq	200(%rsp), %rax
-	adcq	208(%rsp), %rbp
-	movq	%rbp, 112(%rsp)         # 8-byte Spill
-	movq	80(%rsp), %rbp          # 8-byte Reload
-	adcq	216(%rsp), %rbp
-	adcq	224(%rsp), %r13
-	movq	%r13, 88(%rsp)          # 8-byte Spill
-	adcq	232(%rsp), %r12
-	movq	%r12, 104(%rsp)         # 8-byte Spill
-	adcq	240(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	movq	56(%rsp), %r15          # 8-byte Reload
-	adcq	248(%rsp), %r15
-	movq	48(%rsp), %r12          # 8-byte Reload
-	adcq	256(%rsp), %r12
-	adcq	264(%rsp), %r14
-	adcq	$0, %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	movq	32(%rsp), %rdx          # 8-byte Reload
+	movq	72(%rsp), %rbp                  # 8-byte Reload
+	adcq	104(%rbp), %r12
+	setb	%r15b
+	movq	%r13, %rdx
 	imulq	%rax, %rdx
 	movq	%rax, %rbx
-	leaq	120(%rsp), %rdi
-	movq	64(%rsp), %r13          # 8-byte Reload
-	movq	%r13, %rsi
-	callq	.LmulPv576x64
-	addq	120(%rsp), %rbx
-	movq	112(%rsp), %rcx         # 8-byte Reload
-	adcq	128(%rsp), %rcx
-	movq	%rbp, %rdx
-	adcq	136(%rsp), %rdx
-	movq	88(%rsp), %rsi          # 8-byte Reload
-	adcq	144(%rsp), %rsi
-	movq	%rsi, 88(%rsp)          # 8-byte Spill
-	movq	104(%rsp), %rdi         # 8-byte Reload
-	adcq	152(%rsp), %rdi
-	movq	%rdi, 104(%rsp)         # 8-byte Spill
-	movq	40(%rsp), %rbx          # 8-byte Reload
-	adcq	160(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          # 8-byte Spill
-	movq	%r15, %r8
-	adcq	168(%rsp), %r8
-	movq	%r8, 56(%rsp)           # 8-byte Spill
-	movq	%r12, %r15
+	leaq	224(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	288(%rsp), %r13
+	addb	$255, %r15b
+	adcq	$0, %r13
+	addq	224(%rsp), %rbx
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	232(%rsp), %rax
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	248(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	movq	40(%rsp), %rcx                  # 8-byte Reload
+	adcq	256(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	264(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	adcq	272(%rsp), %r14
+	adcq	280(%rsp), %r12
+	adcq	112(%rbp), %r13
+	setb	%r15b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	152(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	addb	$255, %r15b
+	movq	216(%rsp), %rdx
+	adcq	$0, %rdx
+	addq	152(%rsp), %rbx
+	movq	(%rsp), %r9                     # 8-byte Reload
+	adcq	160(%rsp), %r9
+	movq	%r9, (%rsp)                     # 8-byte Spill
+	movq	8(%rsp), %r10                   # 8-byte Reload
+	adcq	168(%rsp), %r10
+	movq	%r10, 8(%rsp)                   # 8-byte Spill
+	movq	40(%rsp), %r15                  # 8-byte Reload
 	adcq	176(%rsp), %r15
-	adcq	184(%rsp), %r14
-	movq	96(%rsp), %r9           # 8-byte Reload
-	adcq	192(%rsp), %r9
-	movq	%rcx, %rax
-	movq	%rcx, %r11
-	movq	%r13, %rbp
-	subq	(%rbp), %rax
+	movq	48(%rsp), %r11                  # 8-byte Reload
+	adcq	184(%rsp), %r11
+	adcq	192(%rsp), %r14
+	adcq	200(%rsp), %r12
+	adcq	208(%rsp), %r13
+	adcq	120(%rbp), %rdx
+	xorl	%r8d, %r8d
+	subq	80(%rsp), %r9                   # 8-byte Folded Reload
+	sbbq	88(%rsp), %r10                  # 8-byte Folded Reload
+	movq	%r15, %rdi
+	sbbq	96(%rsp), %rdi                  # 8-byte Folded Reload
+	movq	%r11, %rbp
+	sbbq	104(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%r14, %rbx
+	sbbq	112(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%r12, %rsi
+	sbbq	120(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	%r13, %rax
+	sbbq	128(%rsp), %rax                 # 8-byte Folded Reload
 	movq	%rdx, %rcx
-	movq	%rdx, %r12
-	sbbq	8(%rbp), %rcx
-	movq	%rsi, %rdx
-	sbbq	16(%rbp), %rdx
-	movq	%rdi, %rsi
-	sbbq	24(%rbp), %rsi
-	movq	%rbx, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%r8, %r10
-	sbbq	40(%rbp), %r10
-	movq	%r15, %r13
-	sbbq	48(%rbp), %r13
-	movq	%r14, %r8
-	sbbq	56(%rbp), %r8
-	movq	%rbp, %rbx
-	movq	%r9, %rbp
-	sbbq	64(%rbx), %rbp
-	movq	%rbp, %rbx
-	sarq	$63, %rbx
-	cmovsq	%r11, %rax
-	movq	(%rsp), %rbx            # 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovsq	%r12, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovsq	88(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, 16(%rbx)
-	cmovsq	104(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovsq	40(%rsp), %rdi          # 8-byte Folded Reload
-	movq	%rdi, 32(%rbx)
-	cmovsq	56(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 40(%rbx)
-	cmovsq	%r15, %r13
-	movq	%r13, 48(%rbx)
-	cmovsq	%r14, %r8
-	movq	%r8, 56(%rbx)
-	cmovsq	%r9, %rbp
-	movq	%rbp, 64(%rbx)
-	addq	$1560, %rsp             # imm = 0x618
+	sbbq	136(%rsp), %rcx                 # 8-byte Folded Reload
+	sbbq	%r8, %r8
+	testb	$1, %r8b
+	cmovneq	%rdx, %rcx
+	movq	144(%rsp), %rdx                 # 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovneq	%r13, %rax
+	movq	%rax, 48(%rdx)
+	cmovneq	%r12, %rsi
+	movq	%rsi, 40(%rdx)
+	cmovneq	%r14, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovneq	%r11, %rbp
+	movq	%rbp, 24(%rdx)
+	cmovneq	%r15, %rdi
+	movq	%rdi, 16(%rdx)
+	cmovneq	8(%rsp), %r10                   # 8-byte Folded Reload
+	movq	%r10, 8(%rdx)
+	cmovneq	(%rsp), %r9                     # 8-byte Folded Reload
+	movq	%r9, (%rdx)
+	addq	$728, %rsp                      # imm = 0x2D8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -15502,428 +7467,304 @@ mcl_fp_montNF9L:                        # @mcl_fp_montNF9L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end131:
-	.size	mcl_fp_montNF9L, .Lfunc_end131-mcl_fp_montNF9L
-
-	.globl	mcl_fp_montRed9L
-	.align	16, 0x90
-	.type	mcl_fp_montRed9L,@function
-mcl_fp_montRed9L:                       # @mcl_fp_montRed9L
-# BB#0:
+.Lfunc_end62:
+	.size	mcl_fp_montRed8L, .Lfunc_end62-mcl_fp_montRed8L
+                                        # -- End function
+	.globl	mcl_fp_montRedNF8L              # -- Begin function mcl_fp_montRedNF8L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF8L,@function
+mcl_fp_montRedNF8L:                     # @mcl_fp_montRedNF8L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$936, %rsp              # imm = 0x3A8
-	movq	%rdx, %rax
-	movq	%rax, 128(%rsp)         # 8-byte Spill
-	movq	%rdi, 80(%rsp)          # 8-byte Spill
-	movq	-8(%rax), %rcx
-	movq	%rcx, 120(%rsp)         # 8-byte Spill
-	movq	(%rsi), %r14
-	movq	8(%rsi), %rdx
-	movq	%rdx, 192(%rsp)         # 8-byte Spill
-	movq	%r14, %rdx
+	subq	$728, %rsp                      # imm = 0x2D8
+	movq	%rdi, 144(%rsp)                 # 8-byte Spill
+	movq	56(%rdx), %rax
+	movq	%rax, 136(%rsp)                 # 8-byte Spill
+	movq	48(%rdx), %rax
+	movq	%rax, 128(%rsp)                 # 8-byte Spill
+	movq	40(%rdx), %rax
+	movq	%rax, 120(%rsp)                 # 8-byte Spill
+	movq	32(%rdx), %rax
+	movq	%rax, 112(%rsp)                 # 8-byte Spill
+	movq	24(%rdx), %rax
+	movq	%rax, 104(%rsp)                 # 8-byte Spill
+	movq	16(%rdx), %rax
+	movq	%rax, 96(%rsp)                  # 8-byte Spill
+	movq	8(%rdx), %rax
+	movq	%rax, 88(%rsp)                  # 8-byte Spill
+	movq	%rsi, 72(%rsp)                  # 8-byte Spill
+	movq	56(%rsi), %rax
+	movq	%rax, 8(%rsp)                   # 8-byte Spill
+	movq	48(%rsi), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	40(%rsi), %r12
+	movq	32(%rsi), %r13
+	movq	24(%rsi), %r15
+	movq	16(%rsi), %r14
+	movq	(%rsi), %rbx
+	movq	8(%rsi), %rbp
+	movq	-8(%rdx), %rcx
+	movq	%rcx, 56(%rsp)                  # 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, %rsi
+	movq	%rdx, 64(%rsp)                  # 8-byte Spill
+	movq	%rax, 80(%rsp)                  # 8-byte Spill
+	movq	%rbx, %rdx
 	imulq	%rcx, %rdx
-	movq	136(%rsi), %rcx
-	movq	%rcx, 112(%rsp)         # 8-byte Spill
-	movq	128(%rsi), %rcx
-	movq	%rcx, 152(%rsp)         # 8-byte Spill
-	movq	120(%rsi), %rcx
-	movq	%rcx, 104(%rsp)         # 8-byte Spill
-	movq	112(%rsi), %rcx
-	movq	%rcx, 144(%rsp)         # 8-byte Spill
-	movq	104(%rsi), %rcx
-	movq	%rcx, 184(%rsp)         # 8-byte Spill
-	movq	96(%rsi), %rcx
-	movq	%rcx, 208(%rsp)         # 8-byte Spill
-	movq	88(%rsi), %rcx
-	movq	%rcx, 200(%rsp)         # 8-byte Spill
-	movq	80(%rsi), %rcx
-	movq	%rcx, 160(%rsp)         # 8-byte Spill
-	movq	72(%rsi), %r12
-	movq	64(%rsi), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	movq	56(%rsi), %rcx
-	movq	%rcx, 168(%rsp)         # 8-byte Spill
-	movq	48(%rsi), %rcx
-	movq	%rcx, 136(%rsp)         # 8-byte Spill
-	movq	40(%rsi), %rbp
-	movq	32(%rsi), %rbx
-	movq	24(%rsi), %r13
-	movq	16(%rsi), %r15
-	movq	%rax, %rcx
-	movq	(%rcx), %rax
-	movq	%rax, 16(%rsp)          # 8-byte Spill
-	movq	64(%rcx), %rax
-	movq	%rax, 72(%rsp)          # 8-byte Spill
-	movq	56(%rcx), %rax
-	movq	%rax, 64(%rsp)          # 8-byte Spill
-	movq	48(%rcx), %rax
-	movq	%rax, 56(%rsp)          # 8-byte Spill
-	movq	40(%rcx), %rax
-	movq	%rax, 48(%rsp)          # 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, 40(%rsp)          # 8-byte Spill
-	movq	24(%rcx), %rax
-	movq	%rax, 32(%rsp)          # 8-byte Spill
-	movq	16(%rcx), %rax
-	movq	%rax, 24(%rsp)          # 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, 8(%rsp)           # 8-byte Spill
-	movq	%rcx, %rsi
-	leaq	856(%rsp), %rdi
-	callq	.LmulPv576x64
-	addq	856(%rsp), %r14
-	movq	192(%rsp), %rcx         # 8-byte Reload
-	adcq	864(%rsp), %rcx
-	adcq	872(%rsp), %r15
-	adcq	880(%rsp), %r13
-	adcq	888(%rsp), %rbx
-	movq	%rbx, 88(%rsp)          # 8-byte Spill
-	adcq	896(%rsp), %rbp
-	movq	%rbp, 96(%rsp)          # 8-byte Spill
-	movq	136(%rsp), %rax         # 8-byte Reload
-	adcq	904(%rsp), %rax
-	movq	%rax, 136(%rsp)         # 8-byte Spill
-	movq	168(%rsp), %rax         # 8-byte Reload
-	adcq	912(%rsp), %rax
-	movq	%rax, 168(%rsp)         # 8-byte Spill
-	movq	176(%rsp), %rax         # 8-byte Reload
-	adcq	920(%rsp), %rax
-	movq	%rax, 176(%rsp)         # 8-byte Spill
-	adcq	928(%rsp), %r12
-	movq	%r12, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rbp         # 8-byte Reload
-	adcq	$0, %rbp
-	adcq	$0, 200(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 208(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 184(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 144(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 104(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 152(%rsp)           # 8-byte Folded Spill
-	movq	112(%rsp), %r14         # 8-byte Reload
-	adcq	$0, %r14
-	sbbq	%r12, %r12
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	776(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	andl	$1, %r12d
-	addq	776(%rsp), %rbx
-	adcq	784(%rsp), %r15
-	adcq	792(%rsp), %r13
-	movq	%r13, (%rsp)            # 8-byte Spill
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	800(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	808(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	136(%rsp), %rax         # 8-byte Reload
-	adcq	816(%rsp), %rax
-	movq	%rax, 136(%rsp)         # 8-byte Spill
-	movq	168(%rsp), %rax         # 8-byte Reload
-	adcq	824(%rsp), %rax
-	movq	%rax, 168(%rsp)         # 8-byte Spill
-	movq	176(%rsp), %rax         # 8-byte Reload
-	adcq	832(%rsp), %rax
-	movq	%rax, 176(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rax         # 8-byte Reload
-	adcq	840(%rsp), %rax
-	movq	%rax, 192(%rsp)         # 8-byte Spill
-	adcq	848(%rsp), %rbp
-	movq	%rbp, 160(%rsp)         # 8-byte Spill
-	movq	200(%rsp), %r13         # 8-byte Reload
-	adcq	$0, %r13
-	adcq	$0, 208(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 184(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 144(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 104(%rsp)           # 8-byte Folded Spill
-	movq	152(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, %r14
-	movq	%r14, 112(%rsp)         # 8-byte Spill
-	adcq	$0, %r12
-	movq	%r15, %rdx
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	696(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	696(%rsp), %r15
-	movq	(%rsp), %rcx            # 8-byte Reload
-	adcq	704(%rsp), %rcx
-	movq	88(%rsp), %rax          # 8-byte Reload
-	adcq	712(%rsp), %rax
-	movq	%rax, 88(%rsp)          # 8-byte Spill
-	movq	96(%rsp), %rax          # 8-byte Reload
+	leaq	656(%rsp), %rdi
+	callq	mulPv512x64@PLT
+	addq	656(%rsp), %rbx
+	adcq	664(%rsp), %rbp
+	adcq	672(%rsp), %r14
+	adcq	680(%rsp), %r15
+	adcq	688(%rsp), %r13
+	adcq	696(%rsp), %r12
+	movq	%r12, 48(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rax                  # 8-byte Reload
+	adcq	704(%rsp), %rax
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rbx                   # 8-byte Reload
+	adcq	712(%rsp), %rbx
+	movq	72(%rsp), %rax                  # 8-byte Reload
+	movq	64(%rax), %rax
 	adcq	720(%rsp), %rax
-	movq	%rax, 96(%rsp)          # 8-byte Spill
-	movq	136(%rsp), %rbp         # 8-byte Reload
-	adcq	728(%rsp), %rbp
-	movq	168(%rsp), %r14         # 8-byte Reload
-	adcq	736(%rsp), %r14
-	movq	176(%rsp), %r15         # 8-byte Reload
-	adcq	744(%rsp), %r15
-	movq	192(%rsp), %rax         # 8-byte Reload
-	adcq	752(%rsp), %rax
-	movq	%rax, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rax         # 8-byte Reload
-	adcq	760(%rsp), %rax
-	movq	%rax, 160(%rsp)         # 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 200(%rsp)         # 8-byte Spill
-	adcq	$0, 208(%rsp)           # 8-byte Folded Spill
-	movq	184(%rsp), %r13         # 8-byte Reload
-	adcq	$0, %r13
-	adcq	$0, 144(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 104(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 152(%rsp)         # 8-byte Spill
-	adcq	$0, 112(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rcx, %rbx
-	movq	%rbx, %rdx
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	616(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	616(%rsp), %rbx
-	movq	88(%rsp), %rax          # 8-byte Reload
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rbp, %rdx
+	leaq	584(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	648(%rsp), %rax
+	addb	$255, %r12b
+	adcq	$0, %rax
+	movq	%rax, %rcx
+	addq	584(%rsp), %rbp
+	adcq	592(%rsp), %r14
+	adcq	600(%rsp), %r15
+	adcq	608(%rsp), %r13
+	movq	48(%rsp), %rax                  # 8-byte Reload
+	adcq	616(%rsp), %rax
+	movq	%rax, 48(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rax                  # 8-byte Reload
 	adcq	624(%rsp), %rax
-	movq	96(%rsp), %rcx          # 8-byte Reload
-	adcq	632(%rsp), %rcx
-	movq	%rcx, 96(%rsp)          # 8-byte Spill
-	adcq	640(%rsp), %rbp
-	movq	%rbp, 136(%rsp)         # 8-byte Spill
-	adcq	648(%rsp), %r14
-	movq	%r14, 168(%rsp)         # 8-byte Spill
-	adcq	656(%rsp), %r15
-	movq	192(%rsp), %r14         # 8-byte Reload
-	adcq	664(%rsp), %r14
-	movq	160(%rsp), %rbp         # 8-byte Reload
-	adcq	672(%rsp), %rbp
-	movq	200(%rsp), %rcx         # 8-byte Reload
-	adcq	680(%rsp), %rcx
-	movq	%rcx, 200(%rsp)         # 8-byte Spill
-	movq	208(%rsp), %rcx         # 8-byte Reload
-	adcq	688(%rsp), %rcx
-	movq	%rcx, 208(%rsp)         # 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, 184(%rsp)         # 8-byte Spill
-	adcq	$0, 144(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 104(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 152(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 112(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	536(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	536(%rsp), %rbx
-	movq	96(%rsp), %rax          # 8-byte Reload
-	adcq	544(%rsp), %rax
-	movq	136(%rsp), %rcx         # 8-byte Reload
+	movq	%rax, 24(%rsp)                  # 8-byte Spill
+	adcq	632(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rax                    # 8-byte Reload
+	adcq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    # 8-byte Spill
+	movq	72(%rsp), %rax                  # 8-byte Reload
+	adcq	72(%rax), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rbp                  # 8-byte Reload
+	movq	%rbp, %rdx
+	imulq	%r14, %rdx
+	leaq	512(%rsp), %rdi
+	movq	64(%rsp), %r12                  # 8-byte Reload
+	movq	%r12, %rsi
+	callq	mulPv512x64@PLT
+	movq	576(%rsp), %rax
+	addb	$255, %bl
+	adcq	$0, %rax
+	addq	512(%rsp), %r14
+	adcq	520(%rsp), %r15
+	adcq	528(%rsp), %r13
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	536(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rcx                  # 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  # 8-byte Spill
+	movq	8(%rsp), %rcx                   # 8-byte Reload
 	adcq	552(%rsp), %rcx
-	movq	%rcx, 136(%rsp)         # 8-byte Spill
-	movq	168(%rsp), %rcx         # 8-byte Reload
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rcx                    # 8-byte Reload
 	adcq	560(%rsp), %rcx
-	movq	%rcx, 168(%rsp)         # 8-byte Spill
-	adcq	568(%rsp), %r15
-	movq	%r15, 176(%rsp)         # 8-byte Spill
-	adcq	576(%rsp), %r14
-	movq	%r14, 192(%rsp)         # 8-byte Spill
-	adcq	584(%rsp), %rbp
-	movq	%rbp, 160(%rsp)         # 8-byte Spill
-	movq	200(%rsp), %r13         # 8-byte Reload
-	adcq	592(%rsp), %r13
-	movq	208(%rsp), %r15         # 8-byte Reload
-	adcq	600(%rsp), %r15
-	movq	184(%rsp), %rbp         # 8-byte Reload
-	adcq	608(%rsp), %rbp
-	movq	144(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, 104(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 152(%rsp)           # 8-byte Folded Spill
-	adcq	$0, 112(%rsp)           # 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rdx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	568(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
 	movq	%rax, %r14
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	456(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	456(%rsp), %r14
-	movq	136(%rsp), %rax         # 8-byte Reload
-	adcq	464(%rsp), %rax
-	movq	168(%rsp), %rcx         # 8-byte Reload
-	adcq	472(%rsp), %rcx
-	movq	%rcx, 168(%rsp)         # 8-byte Spill
-	movq	176(%rsp), %rcx         # 8-byte Reload
+	movq	72(%rsp), %rax                  # 8-byte Reload
+	adcq	80(%rax), %r14
+	setb	%bl
+	movq	%rbp, %rdx
+	imulq	%r15, %rdx
+	leaq	440(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	mulPv512x64@PLT
+	movq	504(%rsp), %rax
+	addb	$255, %bl
+	adcq	$0, %rax
+	addq	440(%rsp), %r15
+	adcq	448(%rsp), %r13
+	movq	48(%rsp), %rcx                  # 8-byte Reload
+	adcq	456(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  # 8-byte Spill
+	movq	24(%rsp), %rbx                  # 8-byte Reload
+	adcq	464(%rsp), %rbx
+	movq	8(%rsp), %rbp                   # 8-byte Reload
+	adcq	472(%rsp), %rbp
+	movq	(%rsp), %rcx                    # 8-byte Reload
 	adcq	480(%rsp), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rcx         # 8-byte Reload
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	16(%rsp), %rcx                  # 8-byte Reload
 	adcq	488(%rsp), %rcx
-	movq	%rcx, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rcx         # 8-byte Reload
-	adcq	496(%rsp), %rcx
-	movq	%rcx, 160(%rsp)         # 8-byte Spill
-	adcq	504(%rsp), %r13
-	movq	%r13, 200(%rsp)         # 8-byte Spill
-	adcq	512(%rsp), %r15
-	movq	%r15, 208(%rsp)         # 8-byte Spill
-	adcq	520(%rsp), %rbp
-	movq	%rbp, 184(%rsp)         # 8-byte Spill
-	adcq	528(%rsp), %rbx
-	movq	%rbx, 144(%rsp)         # 8-byte Spill
-	movq	104(%rsp), %r14         # 8-byte Reload
-	adcq	$0, %r14
-	movq	152(%rsp), %r13         # 8-byte Reload
-	adcq	$0, %r13
-	movq	112(%rsp), %rbx         # 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, %r12
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
-	leaq	376(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	376(%rsp), %r15
-	movq	168(%rsp), %rax         # 8-byte Reload
-	adcq	384(%rsp), %rax
-	movq	176(%rsp), %rcx         # 8-byte Reload
-	adcq	392(%rsp), %rcx
-	movq	%rcx, 176(%rsp)         # 8-byte Spill
-	movq	192(%rsp), %rcx         # 8-byte Reload
-	adcq	400(%rsp), %rcx
-	movq	%rcx, 192(%rsp)         # 8-byte Spill
-	movq	160(%rsp), %rbp         # 8-byte Reload
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	adcq	496(%rsp), %r14
+	movq	%r14, 40(%rsp)                  # 8-byte Spill
+	movq	72(%rsp), %r14                  # 8-byte Reload
+	adcq	88(%r14), %rax
+	movq	%rax, 32(%rsp)                  # 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	368(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	432(%rsp), %r15
+	addb	$255, %r12b
+	adcq	$0, %r15
+	addq	368(%rsp), %r13
+	movq	48(%rsp), %r13                  # 8-byte Reload
+	adcq	376(%rsp), %r13
+	adcq	384(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  # 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   # 8-byte Spill
+	movq	(%rsp), %rbx                    # 8-byte Reload
+	adcq	400(%rsp), %rbx
+	movq	16(%rsp), %rbp                  # 8-byte Reload
 	adcq	408(%rsp), %rbp
-	movq	200(%rsp), %rcx         # 8-byte Reload
+	movq	40(%rsp), %rcx                  # 8-byte Reload
 	adcq	416(%rsp), %rcx
-	movq	%rcx, 200(%rsp)         # 8-byte Spill
-	movq	208(%rsp), %rcx         # 8-byte Reload
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
 	adcq	424(%rsp), %rcx
-	movq	%rcx, 208(%rsp)         # 8-byte Spill
-	movq	184(%rsp), %rcx         # 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 184(%rsp)         # 8-byte Spill
-	movq	144(%rsp), %r15         # 8-byte Reload
-	adcq	440(%rsp), %r15
-	adcq	448(%rsp), %r14
-	movq	%r14, 104(%rsp)         # 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, %r14
-	adcq	$0, %rbx
-	movq	%rbx, 112(%rsp)         # 8-byte Spill
-	adcq	$0, %r12
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	120(%rsp), %rdx         # 8-byte Folded Reload
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	adcq	96(%r14), %r15
+	setb	%r14b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%r13, %rdx
 	leaq	296(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	296(%rsp), %rbx
-	movq	176(%rsp), %rax         # 8-byte Reload
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	360(%rsp), %r12
+	addb	$255, %r14b
+	adcq	$0, %r12
+	addq	296(%rsp), %r13
+	movq	24(%rsp), %rax                  # 8-byte Reload
 	adcq	304(%rsp), %rax
-	movq	192(%rsp), %r13         # 8-byte Reload
-	adcq	312(%rsp), %r13
-	adcq	320(%rsp), %rbp
-	movq	200(%rsp), %rcx         # 8-byte Reload
-	adcq	328(%rsp), %rcx
-	movq	%rcx, 200(%rsp)         # 8-byte Spill
-	movq	208(%rsp), %rcx         # 8-byte Reload
+	movq	8(%rsp), %rcx                   # 8-byte Reload
+	adcq	312(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   # 8-byte Spill
+	adcq	320(%rsp), %rbx
+	movq	%rbx, (%rsp)                    # 8-byte Spill
+	adcq	328(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  # 8-byte Spill
+	movq	40(%rsp), %rcx                  # 8-byte Reload
 	adcq	336(%rsp), %rcx
-	movq	%rcx, 208(%rsp)         # 8-byte Spill
-	movq	184(%rsp), %rcx         # 8-byte Reload
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
 	adcq	344(%rsp), %rcx
-	movq	%rcx, 184(%rsp)         # 8-byte Spill
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
 	adcq	352(%rsp), %r15
-	movq	%r15, 144(%rsp)         # 8-byte Spill
-	movq	104(%rsp), %r15         # 8-byte Reload
-	adcq	360(%rsp), %r15
-	adcq	368(%rsp), %r14
-	movq	%r14, 152(%rsp)         # 8-byte Spill
-	movq	112(%rsp), %r14         # 8-byte Reload
+	movq	72(%rsp), %rbx                  # 8-byte Reload
+	adcq	104(%rbx), %r12
+	setb	%r13b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	224(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	movq	288(%rsp), %r14
+	addb	$255, %r13b
 	adcq	$0, %r14
-	adcq	$0, %r12
-	movq	120(%rsp), %rdx         # 8-byte Reload
+	addq	224(%rsp), %rbp
+	movq	8(%rsp), %rax                   # 8-byte Reload
+	adcq	232(%rsp), %rax
+	movq	(%rsp), %rcx                    # 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    # 8-byte Spill
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	248(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  # 8-byte Spill
+	movq	40(%rsp), %rcx                  # 8-byte Reload
+	adcq	256(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  # 8-byte Spill
+	movq	32(%rsp), %rcx                  # 8-byte Reload
+	adcq	264(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  # 8-byte Spill
+	adcq	272(%rsp), %r15
+	adcq	280(%rsp), %r12
+	adcq	112(%rbx), %r14
+	setb	%r13b
+	movq	56(%rsp), %rdx                  # 8-byte Reload
 	imulq	%rax, %rdx
-	movq	%rax, %rbx
-	leaq	216(%rsp), %rdi
-	movq	128(%rsp), %rsi         # 8-byte Reload
-	callq	.LmulPv576x64
-	addq	216(%rsp), %rbx
-	movq	%r13, %rsi
-	adcq	224(%rsp), %rsi
-	movq	%rsi, 192(%rsp)         # 8-byte Spill
-	adcq	232(%rsp), %rbp
-	movq	%rbp, 160(%rsp)         # 8-byte Spill
-	movq	200(%rsp), %r9          # 8-byte Reload
-	adcq	240(%rsp), %r9
-	movq	%r9, 200(%rsp)          # 8-byte Spill
-	movq	208(%rsp), %r8          # 8-byte Reload
-	adcq	248(%rsp), %r8
-	movq	%r8, 208(%rsp)          # 8-byte Spill
-	movq	184(%rsp), %rbx         # 8-byte Reload
-	adcq	256(%rsp), %rbx
-	movq	144(%rsp), %rax         # 8-byte Reload
-	adcq	264(%rsp), %rax
-	movq	%r15, %rcx
-	adcq	272(%rsp), %rcx
-	movq	152(%rsp), %rdx         # 8-byte Reload
-	adcq	280(%rsp), %rdx
-	movq	%rdx, 152(%rsp)         # 8-byte Spill
-	adcq	288(%rsp), %r14
-	movq	%r14, %r11
-	adcq	$0, %r12
-	subq	16(%rsp), %rsi          # 8-byte Folded Reload
-	movq	%rbp, %rdi
-	sbbq	8(%rsp), %rdi           # 8-byte Folded Reload
-	movq	%r9, %rbp
-	sbbq	24(%rsp), %rbp          # 8-byte Folded Reload
-	movq	%r8, %r13
-	sbbq	32(%rsp), %r13          # 8-byte Folded Reload
-	movq	%rbx, %r15
-	sbbq	40(%rsp), %r15          # 8-byte Folded Reload
-	movq	%rax, %r14
-	sbbq	48(%rsp), %r14          # 8-byte Folded Reload
-	movq	%rcx, %r10
-	sbbq	56(%rsp), %r10          # 8-byte Folded Reload
-	movq	%rdx, %r8
-	sbbq	64(%rsp), %r8           # 8-byte Folded Reload
-	movq	%r11, %r9
-	sbbq	72(%rsp), %r9           # 8-byte Folded Reload
-	sbbq	$0, %r12
-	andl	$1, %r12d
-	cmovneq	%r11, %r9
-	testb	%r12b, %r12b
-	cmovneq	192(%rsp), %rsi         # 8-byte Folded Reload
-	movq	80(%rsp), %rdx          # 8-byte Reload
-	movq	%rsi, (%rdx)
-	cmovneq	160(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, 8(%rdx)
-	cmovneq	200(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 16(%rdx)
-	cmovneq	208(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r13, 24(%rdx)
-	cmovneq	%rbx, %r15
-	movq	%r15, 32(%rdx)
-	cmovneq	%rax, %r14
-	movq	%r14, 40(%rdx)
-	cmovneq	%rcx, %r10
-	movq	%r10, 48(%rdx)
-	cmovneq	152(%rsp), %r8          # 8-byte Folded Reload
-	movq	%r8, 56(%rdx)
-	movq	%r9, 64(%rdx)
-	addq	$936, %rsp              # imm = 0x3A8
+	movq	%rax, %rbp
+	leaq	152(%rsp), %rdi
+	movq	64(%rsp), %rsi                  # 8-byte Reload
+	callq	mulPv512x64@PLT
+	addb	$255, %r13b
+	movq	216(%rsp), %rdx
+	adcq	$0, %rdx
+	addq	152(%rsp), %rbp
+	movq	(%rsp), %r8                     # 8-byte Reload
+	adcq	160(%rsp), %r8
+	movq	%r8, (%rsp)                     # 8-byte Spill
+	movq	16(%rsp), %rcx                  # 8-byte Reload
+	adcq	168(%rsp), %rcx
+	movq	40(%rsp), %rdi                  # 8-byte Reload
+	adcq	176(%rsp), %rdi
+	movq	32(%rsp), %r10                  # 8-byte Reload
+	adcq	184(%rsp), %r10
+	adcq	192(%rsp), %r15
+	adcq	200(%rsp), %r12
+	adcq	208(%rsp), %r14
+	adcq	120(%rbx), %rdx
+	subq	80(%rsp), %r8                   # 8-byte Folded Reload
+	movq	%rcx, %r9
+	movq	%rcx, %r11
+	sbbq	88(%rsp), %r9                   # 8-byte Folded Reload
+	movq	%rdi, %rsi
+	movq	%rdi, %r13
+	sbbq	96(%rsp), %rsi                  # 8-byte Folded Reload
+	movq	%r10, %rdi
+	sbbq	104(%rsp), %rdi                 # 8-byte Folded Reload
+	movq	%r15, %rbx
+	sbbq	112(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%r12, %rbp
+	sbbq	120(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%r14, %rax
+	sbbq	128(%rsp), %rax                 # 8-byte Folded Reload
+	movq	%rdx, %rcx
+	sbbq	136(%rsp), %rcx                 # 8-byte Folded Reload
+	cmovsq	%rdx, %rcx
+	movq	144(%rsp), %rdx                 # 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovsq	%r14, %rax
+	movq	%rax, 48(%rdx)
+	cmovsq	%r12, %rbp
+	movq	%rbp, 40(%rdx)
+	cmovsq	%r15, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovsq	%r10, %rdi
+	movq	%rdi, 24(%rdx)
+	cmovsq	%r13, %rsi
+	movq	%rsi, 16(%rdx)
+	cmovsq	%r11, %r9
+	movq	%r9, 8(%rdx)
+	cmovsq	(%rsp), %r8                     # 8-byte Folded Reload
+	movq	%r8, (%rdx)
+	addq	$728, %rsp                      # imm = 0x2D8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -15931,388 +7772,318 @@ mcl_fp_montRed9L:                       # @mcl_fp_montRed9L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end132:
-	.size	mcl_fp_montRed9L, .Lfunc_end132-mcl_fp_montRed9L
-
-	.globl	mcl_fp_addPre9L
-	.align	16, 0x90
-	.type	mcl_fp_addPre9L,@function
-mcl_fp_addPre9L:                        # @mcl_fp_addPre9L
-# BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
+.Lfunc_end63:
+	.size	mcl_fp_montRedNF8L, .Lfunc_end63-mcl_fp_montRedNF8L
+                                        # -- End function
+	.globl	mcl_fp_addPre8L                 # -- Begin function mcl_fp_addPre8L
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre8L,@function
+mcl_fp_addPre8L:                        # @mcl_fp_addPre8L
+# %bb.0:
 	pushq	%rbx
-	movq	64(%rdx), %r8
-	movq	64(%rsi), %r15
-	movq	56(%rsi), %r9
-	movq	48(%rsi), %r10
-	movq	40(%rsi), %r11
-	movq	24(%rsi), %r12
-	movq	32(%rsi), %r14
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rcx
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rcx
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
-	adcq	24(%rdx), %r12
-	movq	56(%rdx), %r13
-	movq	48(%rdx), %rsi
-	movq	40(%rdx), %rbp
-	movq	32(%rdx), %rdx
+	movq	56(%rsi), %rax
+	movq	48(%rsi), %rcx
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %r11
+	movq	(%rsi), %rbx
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rbx
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r11
+	adcq	24(%rdx), %r10
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r8
+	adcq	48(%rdx), %rcx
+	adcq	56(%rdx), %rax
+	movq	%rax, 56(%rdi)
+	movq	%rcx, 48(%rdi)
+	movq	%r8, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%rsi, 8(%rdi)
 	movq	%rbx, (%rdi)
-	movq	%rcx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r12, 24(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 32(%rdi)
-	adcq	%r11, %rbp
-	movq	%rbp, 40(%rdi)
-	adcq	%r10, %rsi
-	movq	%rsi, 48(%rdi)
-	adcq	%r9, %r13
-	movq	%r13, 56(%rdi)
-	adcq	%r8, %r15
-	movq	%r15, 64(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+	setb	%al
+	movzbl	%al, %eax
 	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
 	retq
-.Lfunc_end133:
-	.size	mcl_fp_addPre9L, .Lfunc_end133-mcl_fp_addPre9L
-
-	.globl	mcl_fp_subPre9L
-	.align	16, 0x90
-	.type	mcl_fp_subPre9L,@function
-mcl_fp_subPre9L:                        # @mcl_fp_subPre9L
-# BB#0:
-	movq	32(%rdx), %r8
-	movq	(%rsi), %rcx
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	movq	%rcx, (%rdi)
-	movq	8(%rsi), %rcx
-	sbbq	8(%rdx), %rcx
-	movq	%rcx, 8(%rdi)
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	%rcx, 16(%rdi)
-	movq	24(%rsi), %rcx
-	sbbq	24(%rdx), %rcx
-	movq	%rcx, 24(%rdi)
-	movq	32(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	40(%rdx), %r8
-	movq	%rcx, 32(%rdi)
-	movq	40(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	48(%rdx), %r8
-	movq	%rcx, 40(%rdi)
-	movq	48(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	56(%rdx), %r8
-	movq	%rcx, 48(%rdi)
+.Lfunc_end64:
+	.size	mcl_fp_addPre8L, .Lfunc_end64-mcl_fp_addPre8L
+                                        # -- End function
+	.globl	mcl_fp_subPre8L                 # -- Begin function mcl_fp_subPre8L
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre8L,@function
+mcl_fp_subPre8L:                        # @mcl_fp_subPre8L
+# %bb.0:
+	pushq	%r14
+	pushq	%rbx
 	movq	56(%rsi), %rcx
-	sbbq	%r8, %rcx
+	movq	48(%rsi), %r8
+	movq	40(%rsi), %r9
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r14
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rbx
+	sbbq	24(%rdx), %r11
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r9
+	sbbq	48(%rdx), %r8
+	sbbq	56(%rdx), %rcx
 	movq	%rcx, 56(%rdi)
-	movq	64(%rdx), %rcx
-	movq	64(%rsi), %rdx
-	sbbq	%rcx, %rdx
-	movq	%rdx, 64(%rdi)
-	sbbq	$0, %rax
+	movq	%r8, 48(%rdi)
+	movq	%r9, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r11, 24(%rdi)
+	movq	%rbx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r14, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
+	popq	%rbx
+	popq	%r14
 	retq
-.Lfunc_end134:
-	.size	mcl_fp_subPre9L, .Lfunc_end134-mcl_fp_subPre9L
-
-	.globl	mcl_fp_shr1_9L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_9L,@function
-mcl_fp_shr1_9L:                         # @mcl_fp_shr1_9L
-# BB#0:
+.Lfunc_end65:
+	.size	mcl_fp_subPre8L, .Lfunc_end65-mcl_fp_subPre8L
+                                        # -- End function
+	.globl	mcl_fp_shr1_8L                  # -- Begin function mcl_fp_shr1_8L
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_8L,@function
+mcl_fp_shr1_8L:                         # @mcl_fp_shr1_8L
+# %bb.0:
 	pushq	%rbx
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %r9
-	movq	48(%rsi), %r10
-	movq	40(%rsi), %r11
-	movq	32(%rsi), %rcx
-	movq	24(%rsi), %rdx
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rbx
-	movq	%rbx, (%rdi)
-	shrdq	$1, %rax, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rdx, %rax
-	movq	%rax, 16(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 24(%rdi)
-	shrdq	$1, %r11, %rcx
-	movq	%rcx, 32(%rdi)
-	shrdq	$1, %r10, %r11
-	movq	%r11, 40(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 48(%rdi)
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %r10
+	movq	24(%rsi), %r11
+	movq	32(%rsi), %rax
+	movq	40(%rsi), %rdx
+	movq	48(%rsi), %rcx
+	movq	56(%rsi), %rsi
+	movq	%rsi, %rbx
+	shrq	%rbx
+	movq	%rbx, 56(%rdi)
+	shldq	$63, %rcx, %rsi
+	movq	%rsi, 48(%rdi)
+	shldq	$63, %rdx, %rcx
+	movq	%rcx, 40(%rdi)
+	shldq	$63, %rax, %rdx
+	movq	%rdx, 32(%rdi)
+	shldq	$63, %r11, %rax
+	movq	%rax, 24(%rdi)
+	shldq	$63, %r10, %r11
+	movq	%r11, 16(%rdi)
+	shldq	$63, %r8, %r10
+	movq	%r10, 8(%rdi)
 	shrdq	$1, %r8, %r9
-	movq	%r9, 56(%rdi)
-	shrq	%r8
-	movq	%r8, 64(%rdi)
+	movq	%r9, (%rdi)
 	popq	%rbx
 	retq
-.Lfunc_end135:
-	.size	mcl_fp_shr1_9L, .Lfunc_end135-mcl_fp_shr1_9L
-
-	.globl	mcl_fp_add9L
-	.align	16, 0x90
-	.type	mcl_fp_add9L,@function
-mcl_fp_add9L:                           # @mcl_fp_add9L
-# BB#0:
-	pushq	%r15
+.Lfunc_end66:
+	.size	mcl_fp_shr1_8L, .Lfunc_end66-mcl_fp_shr1_8L
+                                        # -- End function
+	.globl	mcl_fp_add8L                    # -- Begin function mcl_fp_add8L
+	.p2align	4, 0x90
+	.type	mcl_fp_add8L,@function
+mcl_fp_add8L:                           # @mcl_fp_add8L
+# %bb.0:
 	pushq	%r14
-	pushq	%r13
-	pushq	%r12
 	pushq	%rbx
-	movq	64(%rdx), %r12
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %r13
+	movq	56(%rsi), %r8
 	movq	48(%rsi), %r9
 	movq	40(%rsi), %r10
-	movq	24(%rsi), %r14
 	movq	32(%rsi), %r11
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %r15
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %r15
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
+	movq	24(%rsi), %r14
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rbx
 	adcq	24(%rdx), %r14
 	adcq	32(%rdx), %r11
 	adcq	40(%rdx), %r10
-	movq	56(%rdx), %rsi
 	adcq	48(%rdx), %r9
-	movq	%rbx, (%rdi)
-	movq	%r15, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r14, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r10, 40(%rdi)
+	adcq	56(%rdx), %r8
+	movq	%r8, 56(%rdi)
 	movq	%r9, 48(%rdi)
-	adcq	%r13, %rsi
-	movq	%rsi, 56(%rdi)
-	adcq	%r12, %r8
-	movq	%r8, 64(%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %rbx
-	sbbq	8(%rcx), %r15
-	sbbq	16(%rcx), %rax
+	movq	%r10, 40(%rdi)
+	movq	%r11, 32(%rdi)
+	movq	%r14, 24(%rdi)
+	movq	%rbx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rax, (%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %rbx
 	sbbq	24(%rcx), %r14
 	sbbq	32(%rcx), %r11
 	sbbq	40(%rcx), %r10
 	sbbq	48(%rcx), %r9
-	sbbq	56(%rcx), %rsi
-	sbbq	64(%rcx), %r8
+	sbbq	56(%rcx), %r8
 	sbbq	$0, %rdx
 	testb	$1, %dl
-	jne	.LBB136_2
-# BB#1:                                 # %nocarry
-	movq	%rbx, (%rdi)
-	movq	%r15, 8(%rdi)
-	movq	%rax, 16(%rdi)
+	jne	.LBB67_2
+# %bb.1:                                # %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rbx, 16(%rdi)
 	movq	%r14, 24(%rdi)
 	movq	%r11, 32(%rdi)
 	movq	%r10, 40(%rdi)
 	movq	%r9, 48(%rdi)
-	movq	%rsi, 56(%rdi)
-	movq	%r8, 64(%rdi)
-.LBB136_2:                              # %carry
+	movq	%r8, 56(%rdi)
+.LBB67_2:                               # %carry
 	popq	%rbx
-	popq	%r12
-	popq	%r13
 	popq	%r14
-	popq	%r15
 	retq
-.Lfunc_end136:
-	.size	mcl_fp_add9L, .Lfunc_end136-mcl_fp_add9L
-
-	.globl	mcl_fp_addNF9L
-	.align	16, 0x90
-	.type	mcl_fp_addNF9L,@function
-mcl_fp_addNF9L:                         # @mcl_fp_addNF9L
-# BB#0:
+.Lfunc_end67:
+	.size	mcl_fp_add8L, .Lfunc_end67-mcl_fp_add8L
+                                        # -- End function
+	.globl	mcl_fp_addNF8L                  # -- Begin function mcl_fp_addNF8L
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF8L,@function
+mcl_fp_addNF8L:                         # @mcl_fp_addNF8L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, %r8
-	movq	64(%rdx), %r10
-	movq	56(%rdx), %r11
+	movq	56(%rdx), %r8
 	movq	48(%rdx), %r9
-	movq	40(%rdx), %rax
-	movq	32(%rdx), %rdi
-	movq	24(%rdx), %rbp
-	movq	16(%rdx), %r15
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %r13
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %r13
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %rbp
-	movq	%rbp, -40(%rsp)         # 8-byte Spill
-	adcq	32(%rsi), %rdi
-	movq	%rdi, -16(%rsp)         # 8-byte Spill
-	adcq	40(%rsi), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
+	movq	40(%rdx), %r10
+	movq	32(%rdx), %r11
+	movq	24(%rdx), %r15
+	movq	16(%rdx), %rbx
+	movq	(%rdx), %rax
+	movq	8(%rdx), %rdx
+	addq	(%rsi), %rax
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	adcq	8(%rsi), %rdx
+	movq	%rdx, -16(%rsp)                 # 8-byte Spill
+	adcq	16(%rsi), %rbx
+	movq	%rbx, -24(%rsp)                 # 8-byte Spill
+	adcq	24(%rsi), %r15
+	adcq	32(%rsi), %r11
+	adcq	40(%rsi), %r10
 	adcq	48(%rsi), %r9
-	movq	%r9, -32(%rsp)          # 8-byte Spill
-	movq	%r9, %rdi
-	adcq	56(%rsi), %r11
-	movq	%r11, -24(%rsp)         # 8-byte Spill
-	movq	%r11, %rax
-	adcq	64(%rsi), %r10
-	movq	%r10, %r9
-	movq	%rbx, %rsi
+	adcq	56(%rsi), %r8
+	movq	%rax, %rsi
 	subq	(%rcx), %rsi
-	movq	%r13, %rdx
 	sbbq	8(%rcx), %rdx
-	movq	%r15, %r12
-	sbbq	16(%rcx), %r12
-	sbbq	24(%rcx), %rbp
-	movq	-16(%rsp), %r14         # 8-byte Reload
-	sbbq	32(%rcx), %r14
-	movq	-8(%rsp), %r11          # 8-byte Reload
-	sbbq	40(%rcx), %r11
-	movq	%rdi, %r10
-	sbbq	48(%rcx), %r10
-	movq	%rax, %rdi
-	sbbq	56(%rcx), %rdi
-	movq	%r9, %rax
-	sbbq	64(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%rbx, %rsi
-	movq	%rsi, (%r8)
-	cmovsq	%r13, %rdx
-	movq	%rdx, 8(%r8)
-	cmovsq	%r15, %r12
-	movq	%r12, 16(%r8)
-	cmovsq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rbp, 24(%r8)
-	cmovsq	-16(%rsp), %r14         # 8-byte Folded Reload
-	movq	%r14, 32(%r8)
-	cmovsq	-8(%rsp), %r11          # 8-byte Folded Reload
-	movq	%r11, 40(%r8)
-	cmovsq	-32(%rsp), %r10         # 8-byte Folded Reload
-	movq	%r10, 48(%r8)
-	cmovsq	-24(%rsp), %rdi         # 8-byte Folded Reload
-	movq	%rdi, 56(%r8)
-	cmovsq	%r9, %rax
-	movq	%rax, 64(%r8)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-.Lfunc_end137:
-	.size	mcl_fp_addNF9L, .Lfunc_end137-mcl_fp_addNF9L
-
-	.globl	mcl_fp_sub9L
-	.align	16, 0x90
-	.type	mcl_fp_sub9L,@function
-mcl_fp_sub9L:                           # @mcl_fp_sub9L
-# BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	64(%rdx), %r13
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r9
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r9
-	movq	16(%rsi), %r10
-	sbbq	16(%rdx), %r10
-	movq	24(%rsi), %r11
-	sbbq	24(%rdx), %r11
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	40(%rsi), %r14
-	sbbq	40(%rdx), %r14
-	movq	48(%rsi), %r15
-	sbbq	48(%rdx), %r15
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %rsi
-	sbbq	56(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r9, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r12, 32(%rdi)
-	movq	%r14, 40(%rdi)
-	movq	%r15, 48(%rdi)
-	movq	%rsi, 56(%rdi)
-	sbbq	%r13, %r8
-	movq	%r8, 64(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	.LBB138_2
-# BB#1:                                 # %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	movq	8(%rcx), %rax
-	adcq	%r9, %rax
-	movq	%rax, 8(%rdi)
-	movq	16(%rcx), %rax
-	adcq	%r10, %rax
-	movq	%rax, 16(%rdi)
-	movq	24(%rcx), %rax
-	adcq	%r11, %rax
+	sbbq	16(%rcx), %rbx
+	movq	%r15, %rax
+	sbbq	24(%rcx), %rax
+	movq	%r11, %rbp
+	sbbq	32(%rcx), %rbp
+	movq	%r10, %r14
+	sbbq	40(%rcx), %r14
+	movq	%r9, %r12
+	sbbq	48(%rcx), %r12
+	movq	%r8, %r13
+	sbbq	56(%rcx), %r13
+	cmovsq	%r8, %r13
+	movq	%r13, 56(%rdi)
+	cmovsq	%r9, %r12
+	movq	%r12, 48(%rdi)
+	cmovsq	%r10, %r14
+	movq	%r14, 40(%rdi)
+	cmovsq	%r11, %rbp
+	movq	%rbp, 32(%rdi)
+	cmovsq	%r15, %rax
 	movq	%rax, 24(%rdi)
-	movq	32(%rcx), %rax
-	adcq	%r12, %rax
-	movq	%rax, 32(%rdi)
-	movq	40(%rcx), %rax
-	adcq	%r14, %rax
-	movq	%rax, 40(%rdi)
-	movq	48(%rcx), %rax
-	adcq	%r15, %rax
-	movq	%rax, 48(%rdi)
-	movq	56(%rcx), %rax
-	adcq	%rsi, %rax
-	movq	%rax, 56(%rdi)
-	movq	64(%rcx), %rax
-	adcq	%r8, %rax
-	movq	%rax, 64(%rdi)
-.LBB138_2:                              # %nocarry
+	cmovsq	-24(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%rbx, 16(%rdi)
+	cmovsq	-16(%rsp), %rdx                 # 8-byte Folded Reload
+	movq	%rdx, 8(%rdi)
+	cmovsq	-8(%rsp), %rsi                  # 8-byte Folded Reload
+	movq	%rsi, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
+	retq
+.Lfunc_end68:
+	.size	mcl_fp_addNF8L, .Lfunc_end68-mcl_fp_addNF8L
+                                        # -- End function
+	.globl	mcl_fp_sub8L                    # -- Begin function mcl_fp_sub8L
+	.p2align	4, 0x90
+	.type	mcl_fp_sub8L,@function
+mcl_fp_sub8L:                           # @mcl_fp_sub8L
+# %bb.0:
+	pushq	%r15
+	pushq	%r14
+	pushq	%rbx
+	movq	56(%rsi), %r14
+	movq	48(%rsi), %rbx
+	movq	40(%rsi), %r11
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r15
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %r15
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r11
+	sbbq	48(%rdx), %rbx
+	sbbq	56(%rdx), %r14
+	movq	%r14, 56(%rdi)
+	movq	%rbx, 48(%rdi)
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r15, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
+	testb	$1, %al
+	je	.LBB69_2
+# %bb.1:                                # %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %r15
+	adcq	24(%rcx), %r9
+	adcq	32(%rcx), %r10
+	adcq	40(%rcx), %r11
+	adcq	48(%rcx), %rbx
+	adcq	56(%rcx), %r14
+	movq	%r14, 56(%rdi)
+	movq	%rbx, 48(%rdi)
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r15, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+.LBB69_2:                               # %nocarry
+	popq	%rbx
+	popq	%r14
+	popq	%r15
 	retq
-.Lfunc_end138:
-	.size	mcl_fp_sub9L, .Lfunc_end138-mcl_fp_sub9L
-
-	.globl	mcl_fp_subNF9L
-	.align	16, 0x90
-	.type	mcl_fp_subNF9L,@function
-mcl_fp_subNF9L:                         # @mcl_fp_subNF9L
-# BB#0:
+.Lfunc_end69:
+	.size	mcl_fp_sub8L, .Lfunc_end69-mcl_fp_sub8L
+                                        # -- End function
+	.globl	mcl_fp_subNF8L                  # -- Begin function mcl_fp_subNF8L
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF8L,@function
+mcl_fp_subNF8L:                         # @mcl_fp_subNF8L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
@@ -16320,69 +8091,59 @@ mcl_fp_subNF9L:                         # @mcl_fp_subNF9L
 	pushq	%r12
 	pushq	%rbx
 	movq	%rcx, %r8
-	movq	%rdi, %r11
-	movq	64(%rsi), %r14
-	movq	56(%rsi), %rax
-	movq	48(%rsi), %rcx
-	movq	40(%rsi), %rdi
-	movq	32(%rsi), %rbp
-	movq	24(%rsi), %rbx
+	movq	%rdi, %r9
+	movq	56(%rsi), %r14
+	movq	48(%rsi), %rax
+	movq	40(%rsi), %rcx
+	movq	32(%rsi), %rdi
+	movq	24(%rsi), %r11
 	movq	16(%rsi), %r15
-	movq	(%rsi), %r12
-	movq	8(%rsi), %r13
-	subq	(%rdx), %r12
-	sbbq	8(%rdx), %r13
+	movq	(%rsi), %r13
+	movq	8(%rsi), %r12
+	subq	(%rdx), %r13
+	sbbq	8(%rdx), %r12
 	sbbq	16(%rdx), %r15
-	sbbq	24(%rdx), %rbx
-	movq	%rbx, -40(%rsp)         # 8-byte Spill
-	sbbq	32(%rdx), %rbp
-	movq	%rbp, -32(%rsp)         # 8-byte Spill
-	sbbq	40(%rdx), %rdi
-	movq	%rdi, -24(%rsp)         # 8-byte Spill
-	sbbq	48(%rdx), %rcx
-	movq	%rcx, -16(%rsp)         # 8-byte Spill
-	sbbq	56(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	sbbq	64(%rdx), %r14
-	movq	%r14, %rax
-	sarq	$63, %rax
-	movq	%rax, %rcx
-	shldq	$1, %r14, %rcx
-	movq	24(%r8), %rbp
-	andq	%rcx, %rbp
-	movq	8(%r8), %rdi
-	andq	%rcx, %rdi
-	andq	(%r8), %rcx
-	movq	64(%r8), %rbx
-	andq	%rax, %rbx
+	sbbq	24(%rdx), %r11
+	sbbq	32(%rdx), %rdi
+	movq	%rdi, -24(%rsp)                 # 8-byte Spill
+	sbbq	40(%rdx), %rcx
+	movq	%rcx, -16(%rsp)                 # 8-byte Spill
+	sbbq	48(%rdx), %rax
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	sbbq	56(%rdx), %r14
+	movq	%r14, %rsi
+	sarq	$63, %rsi
 	movq	56(%r8), %r10
-	andq	%rax, %r10
-	rolq	%rax
-	movq	48(%r8), %r9
-	andq	%rax, %r9
-	movq	40(%r8), %rsi
-	andq	%rax, %rsi
-	movq	32(%r8), %rdx
-	andq	%rax, %rdx
-	andq	16(%r8), %rax
-	addq	%r12, %rcx
-	adcq	%r13, %rdi
-	movq	%rcx, (%r11)
-	adcq	%r15, %rax
-	movq	%rdi, 8(%r11)
-	adcq	-40(%rsp), %rbp         # 8-byte Folded Reload
-	movq	%rax, 16(%r11)
-	movq	%rbp, 24(%r11)
-	adcq	-32(%rsp), %rdx         # 8-byte Folded Reload
-	movq	%rdx, 32(%r11)
-	adcq	-24(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 40(%r11)
-	adcq	-16(%rsp), %r9          # 8-byte Folded Reload
-	movq	%r9, 48(%r11)
-	adcq	-8(%rsp), %r10          # 8-byte Folded Reload
-	movq	%r10, 56(%r11)
-	adcq	%r14, %rbx
-	movq	%rbx, 64(%r11)
+	andq	%rsi, %r10
+	movq	48(%r8), %rbx
+	andq	%rsi, %rbx
+	movq	40(%r8), %rdi
+	andq	%rsi, %rdi
+	movq	32(%r8), %rbp
+	andq	%rsi, %rbp
+	movq	24(%r8), %rdx
+	andq	%rsi, %rdx
+	movq	16(%r8), %rcx
+	andq	%rsi, %rcx
+	movq	8(%r8), %rax
+	andq	%rsi, %rax
+	andq	(%r8), %rsi
+	addq	%r13, %rsi
+	adcq	%r12, %rax
+	movq	%rsi, (%r9)
+	adcq	%r15, %rcx
+	movq	%rax, 8(%r9)
+	movq	%rcx, 16(%r9)
+	adcq	%r11, %rdx
+	movq	%rdx, 24(%r9)
+	adcq	-24(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%rbp, 32(%r9)
+	adcq	-16(%rsp), %rdi                 # 8-byte Folded Reload
+	movq	%rdi, 40(%r9)
+	adcq	-8(%rsp), %rbx                  # 8-byte Folded Reload
+	movq	%rbx, 48(%r9)
+	adcq	%r14, %r10
+	movq	%r10, 56(%r9)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -16390,14 +8151,14 @@ mcl_fp_subNF9L:                         # @mcl_fp_subNF9L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end139:
-	.size	mcl_fp_subNF9L, .Lfunc_end139-mcl_fp_subNF9L
-
-	.globl	mcl_fpDbl_add9L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add9L,@function
-mcl_fpDbl_add9L:                        # @mcl_fpDbl_add9L
-# BB#0:
+.Lfunc_end70:
+	.size	mcl_fp_subNF8L, .Lfunc_end70-mcl_fp_subNF8L
+                                        # -- End function
+	.globl	mcl_fpDbl_add8L                 # -- Begin function mcl_fpDbl_add8L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add8L,@function
+mcl_fpDbl_add8L:                        # @mcl_fpDbl_add8L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
@@ -16405,111 +8166,103 @@ mcl_fpDbl_add9L:                        # @mcl_fpDbl_add9L
 	pushq	%r12
 	pushq	%rbx
 	movq	%rcx, %r15
-	movq	136(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	128(%rdx), %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	120(%rdx), %r10
-	movq	112(%rdx), %r11
-	movq	24(%rsi), %rcx
-	movq	32(%rsi), %r14
-	movq	16(%rdx), %rbp
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %rbp
-	adcq	24(%rdx), %rcx
-	adcq	32(%rdx), %r14
-	movq	104(%rdx), %r9
-	movq	96(%rdx), %r13
-	movq	%rax, (%rdi)
-	movq	88(%rdx), %r8
-	movq	%rbx, 8(%rdi)
-	movq	80(%rdx), %r12
-	movq	%rbp, 16(%rdi)
-	movq	40(%rdx), %rax
-	movq	%rcx, 24(%rdi)
-	movq	40(%rsi), %rbp
-	adcq	%rax, %rbp
-	movq	48(%rdx), %rcx
-	movq	%r14, 32(%rdi)
-	movq	48(%rsi), %rax
-	adcq	%rcx, %rax
-	movq	56(%rdx), %r14
-	movq	%rbp, 40(%rdi)
-	movq	56(%rsi), %rbp
-	adcq	%r14, %rbp
-	movq	72(%rdx), %rcx
-	movq	64(%rdx), %rdx
-	movq	%rax, 48(%rdi)
+	movq	120(%rsi), %rax
+	movq	%rax, -72(%rsp)                 # 8-byte Spill
+	movq	112(%rsi), %rax
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	104(%rsi), %rax
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	96(%rsi), %rbx
+	movq	88(%rsi), %rcx
+	movq	80(%rsi), %r8
+	movq	72(%rsi), %r10
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rbp
+	addq	(%rdx), %rax
+	movq	%rax, -8(%rsp)                  # 8-byte Spill
+	adcq	8(%rdx), %rbp
+	movq	%rbp, -16(%rsp)                 # 8-byte Spill
 	movq	64(%rsi), %rax
-	adcq	%rdx, %rax
-	movq	136(%rsi), %rbx
+	movq	56(%rsi), %rbp
+	movq	48(%rsi), %r13
+	movq	40(%rsi), %r14
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %r12
+	adcq	16(%rdx), %r12
+	adcq	24(%rdx), %r11
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r14
+	adcq	48(%rdx), %r13
+	adcq	56(%rdx), %rbp
+	adcq	64(%rdx), %rax
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	adcq	72(%rdx), %r10
+	movq	%r8, %rax
+	adcq	80(%rdx), %rax
+	movq	%rax, -24(%rsp)                 # 8-byte Spill
+	adcq	88(%rdx), %rcx
+	movq	%rcx, -32(%rsp)                 # 8-byte Spill
+	movq	%rbx, %rsi
+	adcq	96(%rdx), %rsi
+	movq	%rsi, -40(%rsp)                 # 8-byte Spill
+	movq	-56(%rsp), %r8                  # 8-byte Reload
+	adcq	104(%rdx), %r8
+	movq	%r8, -56(%rsp)                  # 8-byte Spill
+	movq	-64(%rsp), %rbx                 # 8-byte Reload
+	adcq	112(%rdx), %rbx
+	movq	%rbx, -64(%rsp)                 # 8-byte Spill
+	movq	-72(%rsp), %r8                  # 8-byte Reload
+	adcq	120(%rdx), %r8
 	movq	%rbp, 56(%rdi)
-	movq	72(%rsi), %rbp
-	adcq	%rcx, %rbp
-	movq	128(%rsi), %rcx
-	movq	%rax, 64(%rdi)
-	movq	80(%rsi), %rdx
-	adcq	%r12, %rdx
-	movq	88(%rsi), %r12
-	adcq	%r8, %r12
-	movq	96(%rsi), %r14
-	adcq	%r13, %r14
-	movq	%r14, -48(%rsp)         # 8-byte Spill
-	movq	104(%rsi), %rax
-	adcq	%r9, %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	120(%rsi), %rax
-	movq	112(%rsi), %rsi
-	adcq	%r11, %rsi
-	movq	%rsi, -24(%rsp)         # 8-byte Spill
-	adcq	%r10, %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	adcq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -40(%rsp)         # 8-byte Spill
-	adcq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, -8(%rsp)          # 8-byte Spill
-	sbbq	%r9, %r9
-	andl	$1, %r9d
-	movq	%rbp, %r10
-	subq	(%r15), %r10
-	movq	%rdx, %r11
-	sbbq	8(%r15), %r11
-	movq	%r12, %rbx
-	sbbq	16(%r15), %rbx
-	sbbq	24(%r15), %r14
-	movq	-32(%rsp), %r13         # 8-byte Reload
-	sbbq	32(%r15), %r13
-	movq	-24(%rsp), %rsi         # 8-byte Reload
-	sbbq	40(%r15), %rsi
-	movq	-16(%rsp), %rax         # 8-byte Reload
-	sbbq	48(%r15), %rax
-	sbbq	56(%r15), %rcx
-	movq	-8(%rsp), %r8           # 8-byte Reload
-	sbbq	64(%r15), %r8
-	sbbq	$0, %r9
-	andl	$1, %r9d
-	cmovneq	%rbp, %r10
-	movq	%r10, 72(%rdi)
-	testb	%r9b, %r9b
-	cmovneq	%rdx, %r11
+	movq	%r13, 48(%rdi)
+	movq	%r14, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r11, 24(%rdi)
+	movq	%r12, 16(%rdi)
+	movq	-16(%rsp), %rdx                 # 8-byte Reload
+	movq	%rdx, 8(%rdi)
+	movq	-8(%rsp), %rdx                  # 8-byte Reload
+	movq	%rdx, (%rdi)
+	setb	-72(%rsp)                       # 1-byte Folded Spill
+	movq	-48(%rsp), %r14                 # 8-byte Reload
+	subq	(%r15), %r14
+	movq	%r10, %r9
+	movq	%r10, %r13
+	sbbq	8(%r15), %r9
+	movq	%rax, %r11
+	sbbq	16(%r15), %r11
+	movq	%rcx, %rbp
+	sbbq	24(%r15), %rbp
+	movq	%rsi, %rbx
+	sbbq	32(%r15), %rbx
+	movq	-56(%rsp), %r12                 # 8-byte Reload
+	movq	%r12, %rax
+	sbbq	40(%r15), %rax
+	movq	-64(%rsp), %r10                 # 8-byte Reload
+	movq	%r10, %rdx
+	sbbq	48(%r15), %rdx
+	movq	%r8, %rsi
+	sbbq	56(%r15), %rsi
+	movzbl	-72(%rsp), %ecx                 # 1-byte Folded Reload
+	sbbq	$0, %rcx
+	testb	$1, %cl
+	cmovneq	%r8, %rsi
+	movq	%rsi, 120(%rdi)
+	cmovneq	%r10, %rdx
+	movq	%rdx, 112(%rdi)
+	cmovneq	%r12, %rax
+	movq	%rax, 104(%rdi)
+	cmovneq	-40(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%rbx, 96(%rdi)
+	cmovneq	-32(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%rbp, 88(%rdi)
+	cmovneq	-24(%rsp), %r11                 # 8-byte Folded Reload
 	movq	%r11, 80(%rdi)
-	cmovneq	%r12, %rbx
-	movq	%rbx, 88(%rdi)
-	cmovneq	-48(%rsp), %r14         # 8-byte Folded Reload
-	movq	%r14, 96(%rdi)
-	cmovneq	-32(%rsp), %r13         # 8-byte Folded Reload
-	movq	%r13, 104(%rdi)
-	cmovneq	-24(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 112(%rdi)
-	cmovneq	-16(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, 120(%rdi)
-	cmovneq	-40(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 128(%rdi)
-	cmovneq	-8(%rsp), %r8           # 8-byte Folded Reload
-	movq	%r8, 136(%rdi)
+	cmovneq	%r13, %r9
+	movq	%r9, 72(%rdi)
+	cmovneq	-48(%rsp), %r14                 # 8-byte Folded Reload
+	movq	%r14, 64(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -16517,127 +8270,112 @@ mcl_fpDbl_add9L:                        # @mcl_fpDbl_add9L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end140:
-	.size	mcl_fpDbl_add9L, .Lfunc_end140-mcl_fpDbl_add9L
-
-	.globl	mcl_fpDbl_sub9L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub9L,@function
-mcl_fpDbl_sub9L:                        # @mcl_fpDbl_sub9L
-# BB#0:
+.Lfunc_end71:
+	.size	mcl_fpDbl_add8L, .Lfunc_end71-mcl_fpDbl_add8L
+                                        # -- End function
+	.globl	mcl_fpDbl_sub8L                 # -- Begin function mcl_fpDbl_sub8L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub8L,@function
+mcl_fpDbl_sub8L:                        # @mcl_fpDbl_sub8L
+# %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r14
-	movq	136(%rdx), %rax
-	movq	%rax, -8(%rsp)          # 8-byte Spill
-	movq	128(%rdx), %rax
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	movq	120(%rdx), %rax
-	movq	%rax, -24(%rsp)         # 8-byte Spill
-	movq	16(%rsi), %r11
-	movq	(%rsi), %r12
-	movq	8(%rsi), %r13
-	xorl	%r9d, %r9d
-	subq	(%rdx), %r12
-	sbbq	8(%rdx), %r13
-	sbbq	16(%rdx), %r11
+	movq	%rcx, %r11
+	movq	120(%rsi), %rax
+	movq	%rax, -64(%rsp)                 # 8-byte Spill
+	movq	112(%rsi), %r12
+	movq	104(%rsi), %r15
+	movq	96(%rsi), %rax
+	movq	%rax, -48(%rsp)                 # 8-byte Spill
+	movq	88(%rsi), %r13
+	movq	80(%rsi), %rax
+	movq	%rax, -56(%rsp)                 # 8-byte Spill
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %rbp
+	xorl	%eax, %eax
+	subq	(%rdx), %rcx
+	movq	%rcx, -32(%rsp)                 # 8-byte Spill
+	sbbq	8(%rdx), %rbp
+	movq	%rbp, -40(%rsp)                 # 8-byte Spill
+	movq	72(%rsi), %rbp
+	movq	64(%rsi), %rcx
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %r9
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r14
 	movq	24(%rsi), %rbx
+	movq	16(%rsi), %rsi
+	sbbq	16(%rdx), %rsi
 	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %rbp
-	sbbq	32(%rdx), %rbp
-	movq	112(%rdx), %r10
-	movq	104(%rdx), %rcx
-	movq	%r12, (%rdi)
-	movq	96(%rdx), %rax
-	movq	%r13, 8(%rdi)
-	movq	88(%rdx), %r13
-	movq	%r11, 16(%rdi)
-	movq	40(%rdx), %r11
+	sbbq	32(%rdx), %r14
+	sbbq	40(%rdx), %r10
+	sbbq	48(%rdx), %r9
+	sbbq	56(%rdx), %r8
+	sbbq	64(%rdx), %rcx
+	movq	%rcx, -24(%rsp)                 # 8-byte Spill
+	sbbq	72(%rdx), %rbp
+	movq	%rbp, -16(%rsp)                 # 8-byte Spill
+	movq	-56(%rsp), %rbp                 # 8-byte Reload
+	sbbq	80(%rdx), %rbp
+	movq	%rbp, -56(%rsp)                 # 8-byte Spill
+	sbbq	88(%rdx), %r13
+	movq	%r13, -8(%rsp)                  # 8-byte Spill
+	movq	-48(%rsp), %r13                 # 8-byte Reload
+	sbbq	96(%rdx), %r13
+	movq	%r13, -48(%rsp)                 # 8-byte Spill
+	sbbq	104(%rdx), %r15
+	sbbq	112(%rdx), %r12
+	movq	-64(%rsp), %rcx                 # 8-byte Reload
+	sbbq	120(%rdx), %rcx
+	movq	%rcx, -64(%rsp)                 # 8-byte Spill
+	movq	%r8, 56(%rdi)
+	movq	%r9, 48(%rdi)
+	movq	%r10, 40(%rdi)
+	movq	%r14, 32(%rdi)
 	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	48(%rdx), %r11
-	movq	%rbp, 32(%rdi)
-	movq	48(%rsi), %rbp
-	sbbq	%r11, %rbp
-	movq	56(%rdx), %r11
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	64(%rdx), %r11
-	movq	%rbp, 48(%rdi)
-	movq	64(%rsi), %rbp
-	sbbq	%r11, %rbp
-	movq	80(%rdx), %r8
-	movq	72(%rdx), %r11
-	movq	%rbx, 56(%rdi)
-	movq	72(%rsi), %r15
-	sbbq	%r11, %r15
-	movq	136(%rsi), %rdx
-	movq	%rbp, 64(%rdi)
-	movq	80(%rsi), %rbp
-	sbbq	%r8, %rbp
-	movq	88(%rsi), %r12
-	sbbq	%r13, %r12
-	movq	96(%rsi), %r13
-	sbbq	%rax, %r13
-	movq	104(%rsi), %rax
-	sbbq	%rcx, %rax
-	movq	%rax, -40(%rsp)         # 8-byte Spill
-	movq	112(%rsi), %rax
-	sbbq	%r10, %rax
-	movq	%rax, -32(%rsp)         # 8-byte Spill
-	movq	128(%rsi), %rax
-	movq	120(%rsi), %rcx
-	sbbq	-24(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, -24(%rsp)         # 8-byte Spill
-	sbbq	-16(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, -16(%rsp)         # 8-byte Spill
-	sbbq	-8(%rsp), %rdx          # 8-byte Folded Reload
-	movq	%rdx, -8(%rsp)          # 8-byte Spill
-	movl	$0, %r8d
-	sbbq	$0, %r8
-	andl	$1, %r8d
-	movq	(%r14), %r10
-	cmoveq	%r9, %r10
-	testb	%r8b, %r8b
-	movq	16(%r14), %r8
-	cmoveq	%r9, %r8
-	movq	8(%r14), %rdx
-	cmoveq	%r9, %rdx
-	movq	64(%r14), %rbx
-	cmoveq	%r9, %rbx
-	movq	56(%r14), %r11
-	cmoveq	%r9, %r11
-	movq	48(%r14), %rsi
-	cmoveq	%r9, %rsi
-	movq	40(%r14), %rcx
-	cmoveq	%r9, %rcx
-	movq	32(%r14), %rax
-	cmoveq	%r9, %rax
-	cmovneq	24(%r14), %r9
-	addq	%r15, %r10
-	adcq	%rbp, %rdx
-	movq	%r10, 72(%rdi)
-	adcq	%r12, %r8
-	movq	%rdx, 80(%rdi)
-	adcq	%r13, %r9
-	movq	%r8, 88(%rdi)
-	movq	%r9, 96(%rdi)
-	adcq	-40(%rsp), %rax         # 8-byte Folded Reload
-	movq	%rax, 104(%rdi)
-	adcq	-32(%rsp), %rcx         # 8-byte Folded Reload
-	movq	%rcx, 112(%rdi)
-	adcq	-24(%rsp), %rsi         # 8-byte Folded Reload
-	movq	%rsi, 120(%rdi)
-	adcq	-16(%rsp), %r11         # 8-byte Folded Reload
-	movq	%r11, 128(%rdi)
-	adcq	-8(%rsp), %rbx          # 8-byte Folded Reload
-	movq	%rbx, 136(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	-40(%rsp), %rcx                 # 8-byte Reload
+	movq	%rcx, 8(%rdi)
+	movq	-32(%rsp), %rcx                 # 8-byte Reload
+	movq	%rcx, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	negq	%rax
+	movq	56(%r11), %r8
+	andq	%rax, %r8
+	movq	48(%r11), %r9
+	andq	%rax, %r9
+	movq	40(%r11), %r10
+	andq	%rax, %r10
+	movq	32(%r11), %rbx
+	andq	%rax, %rbx
+	movq	24(%r11), %rdx
+	andq	%rax, %rdx
+	movq	16(%r11), %rsi
+	andq	%rax, %rsi
+	movq	8(%r11), %rbp
+	andq	%rax, %rbp
+	andq	(%r11), %rax
+	addq	-24(%rsp), %rax                 # 8-byte Folded Reload
+	adcq	-16(%rsp), %rbp                 # 8-byte Folded Reload
+	movq	%rax, 64(%rdi)
+	adcq	-56(%rsp), %rsi                 # 8-byte Folded Reload
+	movq	%rbp, 72(%rdi)
+	movq	%rsi, 80(%rdi)
+	adcq	-8(%rsp), %rdx                  # 8-byte Folded Reload
+	movq	%rdx, 88(%rdi)
+	adcq	-48(%rsp), %rbx                 # 8-byte Folded Reload
+	movq	%rbx, 96(%rdi)
+	adcq	%r15, %r10
+	movq	%r10, 104(%rdi)
+	adcq	%r12, %r9
+	movq	%r9, 112(%rdi)
+	adcq	-64(%rsp), %r8                  # 8-byte Folded Reload
+	movq	%r8, 120(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -16645,8 +8383,7 @@ mcl_fpDbl_sub9L:                        # @mcl_fpDbl_sub9L
 	popq	%r15
 	popq	%rbp
 	retq
-.Lfunc_end141:
-	.size	mcl_fpDbl_sub9L, .Lfunc_end141-mcl_fpDbl_sub9L
-
-
+.Lfunc_end72:
+	.size	mcl_fpDbl_sub8L, .Lfunc_end72-mcl_fpDbl_sub8L
+                                        # -- End function
 	.section	".note.GNU-stack","",@progbits

From f50e57c3b0d17cfd7121f1305c534cce49fd757e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 14:39:24 +0900
Subject: [PATCH 519/553] update x86-64mac asm

---
 src/asm/x86-64mac.bmi2.s | 19031 ++++++++++---------------------
 src/asm/x86-64mac.s      | 22245 ++++++++++++-------------------------
 2 files changed, 13049 insertions(+), 28227 deletions(-)

diff --git a/src/asm/x86-64mac.bmi2.s b/src/asm/x86-64mac.bmi2.s
index 849c6664..b9d03fa9 100644
--- a/src/asm/x86-64mac.bmi2.s
+++ b/src/asm/x86-64mac.bmi2.s
@@ -1,138 +1,141 @@
 	.section	__TEXT,__text,regular,pure_instructions
-	.macosx_version_min 10, 12
-	.globl	_makeNIST_P192Lbmi2
+	.build_version macos, 11, 0
+	.globl	_makeNIST_P192Lbmi2             ## -- Begin function makeNIST_P192Lbmi2
 	.p2align	4, 0x90
 _makeNIST_P192Lbmi2:                    ## @makeNIST_P192Lbmi2
-## BB#0:
+## %bb.0:
 	movq	$-1, %rax
 	movq	$-2, %rdx
 	movq	$-1, %rcx
 	retq
-
-	.globl	_mcl_fpDbl_mod_NIST_P192Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mod_NIST_P192Lbmi2   ## -- Begin function mcl_fpDbl_mod_NIST_P192Lbmi2
 	.p2align	4, 0x90
 _mcl_fpDbl_mod_NIST_P192Lbmi2:          ## @mcl_fpDbl_mod_NIST_P192Lbmi2
-## BB#0:
+## %bb.0:
 	pushq	%r14
 	pushq	%rbx
-	movq	16(%rsi), %r10
+	movq	16(%rsi), %rbx
 	movq	24(%rsi), %r8
 	movq	40(%rsi), %r9
-	movq	8(%rsi), %rax
-	addq	%r9, %rax
-	adcq	$0, %r10
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
+	movq	8(%rsi), %rdx
+	addq	%r9, %rdx
+	adcq	$0, %rbx
+	setb	%cl
+	movzbl	%cl, %r10d
 	movq	32(%rsi), %r11
 	movq	(%rsi), %r14
 	addq	%r8, %r14
-	adcq	%r11, %rax
-	adcq	%r9, %r10
-	adcq	$0, %rcx
-	addq	%r9, %r14
-	adcq	%r8, %rax
-	adcq	%r11, %r10
-	adcq	$0, %rcx
-	addq	%rcx, %r14
-	adcq	%rax, %rcx
+	adcq	%r11, %rdx
+	adcq	%r9, %rbx
 	adcq	$0, %r10
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r14, %rsi
-	addq	$1, %rsi
-	movq	%rcx, %rdx
-	adcq	$1, %rdx
-	movq	%r10, %rbx
+	addq	%r9, %r14
+	adcq	%r8, %rdx
+	adcq	%r11, %rbx
+	setb	%r8b
+	movq	%r10, %r9
+	adcq	$0, %r9
+	addb	$255, %r8b
+	adcq	%r10, %r14
+	adcq	%rdx, %r9
 	adcq	$0, %rbx
-	adcq	$-1, %rax
-	andl	$1, %eax
-	cmovneq	%r14, %rsi
-	movq	%rsi, (%rdi)
-	testb	%al, %al
-	cmovneq	%rcx, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovneq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	movq	%r14, %rcx
+	addq	$1, %rcx
+	movq	%r9, %rsi
+	adcq	$1, %rsi
+	movq	%rbx, %rax
+	adcq	$0, %rax
+	adcq	$-1, %rdx
+	testb	$1, %dl
+	cmovneq	%rbx, %rax
+	movq	%rax, 16(%rdi)
+	cmovneq	%r9, %rsi
+	movq	%rsi, 8(%rdi)
+	cmovneq	%r14, %rcx
+	movq	%rcx, (%rdi)
 	popq	%rbx
 	popq	%r14
 	retq
-
-	.globl	_mcl_fp_sqr_NIST_P192Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_sqr_NIST_P192Lbmi2      ## -- Begin function mcl_fp_sqr_NIST_P192Lbmi2
 	.p2align	4, 0x90
 _mcl_fp_sqr_NIST_P192Lbmi2:             ## @mcl_fp_sqr_NIST_P192Lbmi2
-## BB#0:
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	16(%rsi), %r8
+	movq	16(%rsi), %r9
 	movq	(%rsi), %rcx
 	movq	8(%rsi), %rsi
-	movq	%r8, %rdx
-	mulxq	%rsi, %r14, %rbx
-	movq	%rbx, -16(%rsp)         ## 8-byte Spill
+	movq	%r9, %rdx
+	mulxq	%rsi, %r11, %r10
 	movq	%rsi, %rdx
-	mulxq	%rsi, %r13, %r15
-	mulxq	%rcx, %r12, %rsi
-	addq	%rsi, %r13
-	adcq	%r14, %r15
+	mulxq	%rsi, %r12, %r14
+	mulxq	%rcx, %r15, %rsi
+	addq	%rsi, %r12
+	adcq	%r11, %r14
+	movq	%r10, %rbx
 	adcq	$0, %rbx
 	movq	%rcx, %rdx
-	mulxq	%rcx, %r9, %rax
-	addq	%r12, %rax
-	movq	%r8, %rdx
-	mulxq	%rcx, %rbp, %r11
-	adcq	%rbp, %rsi
-	movq	%r11, %r10
-	adcq	$0, %r10
-	addq	%r12, %rax
+	mulxq	%rcx, %r8, %rax
+	addq	%r15, %rax
+	movq	%r9, %rdx
+	mulxq	%rcx, %r13, %rcx
 	adcq	%r13, %rsi
-	adcq	%r15, %r10
+	movq	%rcx, %rbp
+	adcq	$0, %rbp
+	addq	%r15, %rax
+	adcq	%r12, %rsi
+	adcq	%r14, %rbp
 	adcq	$0, %rbx
-	mulxq	%r8, %rcx, %rdi
-	addq	%r14, %r11
-	adcq	-16(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	$0, %rdi
-	addq	%rbp, %rsi
-	adcq	%r10, %r11
-	adcq	%rbx, %rcx
-	adcq	$0, %rdi
-	addq	%rdi, %rax
-	adcq	$0, %rsi
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	addq	%r11, %r9
-	adcq	%rcx, %rax
-	adcq	%rdi, %rsi
+	movq	%r9, %rdx
+	mulxq	%r9, %r9, %rdx
+	addq	%r11, %rcx
+	adcq	%r10, %r9
 	adcq	$0, %rdx
-	addq	%rdi, %r9
-	adcq	%r11, %rax
-	adcq	%rcx, %rsi
+	addq	%r13, %rsi
+	adcq	%rbp, %rcx
+	adcq	%rbx, %r9
 	adcq	$0, %rdx
-	addq	%rdx, %r9
-	adcq	%rax, %rdx
+	addq	%rdx, %rax
 	adcq	$0, %rsi
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r9, %rcx
-	addq	$1, %rcx
-	movq	%rdx, %rdi
-	adcq	$1, %rdi
-	movq	%rsi, %rbp
+	setb	%bl
+	movzbl	%bl, %ebx
+	addq	%rcx, %r8
+	adcq	%r9, %rax
+	adcq	%rdx, %rsi
+	adcq	$0, %rbx
+	addq	%rdx, %r8
+	adcq	%rcx, %rax
+	adcq	%r9, %rsi
+	setb	%cl
+	movq	%rbx, %rbp
 	adcq	$0, %rbp
+	addb	$255, %cl
+	adcq	%rbx, %r8
+	adcq	%rax, %rbp
+	adcq	$0, %rsi
+	setb	%al
+	movzbl	%al, %eax
+	movq	%r8, %rcx
+	addq	$1, %rcx
+	movq	%rbp, %rdx
+	adcq	$1, %rdx
+	movq	%rsi, %rbx
+	adcq	$0, %rbx
 	adcq	$-1, %rax
-	andl	$1, %eax
-	cmovneq	%r9, %rcx
-	movq	-8(%rsp), %rbx          ## 8-byte Reload
-	movq	%rcx, (%rbx)
-	testb	%al, %al
-	cmovneq	%rdx, %rdi
-	movq	%rdi, 8(%rbx)
-	cmovneq	%rsi, %rbp
-	movq	%rbp, 16(%rbx)
+	testb	$1, %al
+	cmovneq	%rsi, %rbx
+	movq	%rbx, 16(%rdi)
+	cmovneq	%rbp, %rdx
+	movq	%rdx, 8(%rdi)
+	cmovneq	%r8, %rcx
+	movq	%rcx, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -140,64 +143,66 @@ _mcl_fp_sqr_NIST_P192Lbmi2:             ## @mcl_fp_sqr_NIST_P192Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_mulNIST_P192Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_mulNIST_P192Lbmi2       ## -- Begin function mcl_fp_mulNIST_P192Lbmi2
 	.p2align	4, 0x90
 _mcl_fp_mulNIST_P192Lbmi2:              ## @mcl_fp_mulNIST_P192Lbmi2
-## BB#0:
+## %bb.0:
 	pushq	%r14
 	pushq	%rbx
 	subq	$56, %rsp
 	movq	%rdi, %r14
 	leaq	8(%rsp), %rdi
 	callq	_mcl_fpDbl_mulPre3Lbmi2
-	movq	24(%rsp), %r9
+	movq	24(%rsp), %rbx
 	movq	32(%rsp), %r8
-	movq	48(%rsp), %rdi
-	movq	16(%rsp), %rbx
-	addq	%rdi, %rbx
-	adcq	$0, %r9
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	movq	40(%rsp), %rsi
-	movq	8(%rsp), %rdx
-	addq	%r8, %rdx
-	adcq	%rsi, %rbx
-	adcq	%rdi, %r9
+	movq	48(%rsp), %rax
+	movq	16(%rsp), %rdi
+	addq	%rax, %rdi
+	adcq	$0, %rbx
+	setb	%cl
+	movzbl	%cl, %esi
+	movq	40(%rsp), %rdx
+	movq	8(%rsp), %r9
+	addq	%r8, %r9
+	adcq	%rdx, %rdi
+	adcq	%rax, %rbx
+	adcq	$0, %rsi
+	addq	%rax, %r9
+	adcq	%r8, %rdi
+	adcq	%rdx, %rbx
+	setb	%dl
+	movq	%rsi, %rcx
 	adcq	$0, %rcx
-	addq	%rdi, %rdx
-	adcq	%r8, %rbx
+	addb	$255, %dl
 	adcq	%rsi, %r9
-	adcq	$0, %rcx
-	addq	%rcx, %rdx
-	adcq	%rbx, %rcx
-	adcq	$0, %r9
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rdx, %rdi
+	adcq	%rdi, %rcx
+	adcq	$0, %rbx
+	setb	%dl
+	movzbl	%dl, %edx
+	movq	%r9, %rdi
 	addq	$1, %rdi
-	movq	%rcx, %rbx
-	adcq	$1, %rbx
-	movq	%r9, %rax
+	movq	%rcx, %rsi
+	adcq	$1, %rsi
+	movq	%rbx, %rax
 	adcq	$0, %rax
-	adcq	$-1, %rsi
-	andl	$1, %esi
-	cmovneq	%rdx, %rdi
-	movq	%rdi, (%r14)
-	testb	%sil, %sil
-	cmovneq	%rcx, %rbx
-	movq	%rbx, 8(%r14)
-	cmovneq	%r9, %rax
+	adcq	$-1, %rdx
+	testb	$1, %dl
+	cmovneq	%rbx, %rax
 	movq	%rax, 16(%r14)
+	cmovneq	%rcx, %rsi
+	movq	%rsi, 8(%r14)
+	cmovneq	%r9, %rdi
+	movq	%rdi, (%r14)
 	addq	$56, %rsp
 	popq	%rbx
 	popq	%r14
 	retq
-
-	.globl	_mcl_fpDbl_mod_NIST_P521Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mod_NIST_P521Lbmi2   ## -- Begin function mcl_fpDbl_mod_NIST_P521Lbmi2
 	.p2align	4, 0x90
 _mcl_fpDbl_mod_NIST_P521Lbmi2:          ## @mcl_fpDbl_mod_NIST_P521Lbmi2
-## BB#0:
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r12
@@ -221,8 +226,8 @@ _mcl_fpDbl_mod_NIST_P521Lbmi2:          ## @mcl_fpDbl_mod_NIST_P521Lbmi2
 	shldq	$55, %rax, %rcx
 	shrq	$9, %r14
 	shldq	$55, %rbx, %rax
-                                        ## kill: %EBX<def> %EBX<kill> %RBX<kill> %RBX<def>
-	andl	$511, %ebx              ## imm = 0x1FF
+	movl	%ebx, %edx
+	andl	$511, %edx                      ## imm = 0x1FF
 	addq	(%rsi), %rax
 	adcq	8(%rsi), %rcx
 	adcq	16(%rsi), %r12
@@ -231,8 +236,8 @@ _mcl_fpDbl_mod_NIST_P521Lbmi2:          ## @mcl_fpDbl_mod_NIST_P521Lbmi2
 	adcq	40(%rsi), %r10
 	adcq	48(%rsi), %r9
 	adcq	56(%rsi), %r8
-	adcq	%r14, %rbx
-	movl	%ebx, %esi
+	adcq	%r14, %rdx
+	movl	%edx, %esi
 	shrl	$9, %esi
 	andl	$1, %esi
 	addq	%rax, %rsi
@@ -243,7 +248,7 @@ _mcl_fpDbl_mod_NIST_P521Lbmi2:          ## @mcl_fpDbl_mod_NIST_P521Lbmi2
 	adcq	$0, %r10
 	adcq	$0, %r9
 	adcq	$0, %r8
-	adcq	$0, %rbx
+	adcq	$0, %rdx
 	movq	%rsi, %rax
 	andq	%r12, %rax
 	andq	%r15, %rax
@@ -251,23 +256,23 @@ _mcl_fpDbl_mod_NIST_P521Lbmi2:          ## @mcl_fpDbl_mod_NIST_P521Lbmi2
 	andq	%r10, %rax
 	andq	%r9, %rax
 	andq	%r8, %rax
-	movq	%rbx, %rdx
-	orq	$-512, %rdx             ## imm = 0xFE00
-	andq	%rax, %rdx
-	andq	%rcx, %rdx
-	cmpq	$-1, %rdx
+	movq	%rdx, %rbx
+	orq	$-512, %rbx                     ## imm = 0xFE00
+	andq	%rax, %rbx
+	andq	%rcx, %rbx
+	cmpq	$-1, %rbx
 	je	LBB4_1
-## BB#3:                                ## %nonzero
-	movq	%rsi, (%rdi)
-	movq	%rcx, 8(%rdi)
-	movq	%r12, 16(%rdi)
-	movq	%r15, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r10, 40(%rdi)
-	movq	%r9, 48(%rdi)
+## %bb.3:                               ## %nonzero
 	movq	%r8, 56(%rdi)
-	andl	$511, %ebx              ## imm = 0x1FF
-	movq	%rbx, 64(%rdi)
+	movq	%r9, 48(%rdi)
+	movq	%r10, 40(%rdi)
+	movq	%r11, 32(%rdi)
+	movq	%r15, 24(%rdi)
+	movq	%r12, 16(%rdi)
+	movq	%rcx, 8(%rdi)
+	movq	%rsi, (%rdi)
+	andl	$511, %edx                      ## imm = 0x1FF
+	movq	%rdx, 64(%rdi)
 	jmp	LBB4_2
 LBB4_1:                                 ## %zero
 	movq	$0, 64(%rdi)
@@ -285,367 +290,92 @@ LBB4_2:                                 ## %zero
 	popq	%r14
 	popq	%r15
 	retq
-
-	.globl	_mcl_fp_mulUnitPre1Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre1Lbmi2:               ## @mcl_fp_mulUnitPre1Lbmi2
-## BB#0:
-	mulxq	(%rsi), %rcx, %rax
-	movq	%rcx, (%rdi)
-	movq	%rax, 8(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_mulPre1Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre1Lbmi2:                ## @mcl_fpDbl_mulPre1Lbmi2
-## BB#0:
-	movq	(%rdx), %rdx
-	mulxq	(%rsi), %rcx, %rax
-	movq	%rcx, (%rdi)
-	movq	%rax, 8(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_sqrPre1Lbmi2
+                                        ## -- End function
+	.globl	_mulPv192x64bmi2                ## -- Begin function mulPv192x64bmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre1Lbmi2:                ## @mcl_fpDbl_sqrPre1Lbmi2
-## BB#0:
-	movq	(%rsi), %rdx
-	mulxq	%rdx, %rcx, %rax
-	movq	%rcx, (%rdi)
-	movq	%rax, 8(%rdi)
+_mulPv192x64bmi2:                       ## @mulPv192x64bmi2
+## %bb.0:
+	movq	%rdi, %rax
+	mulxq	(%rsi), %rdi, %rcx
+	movq	%rdi, (%rax)
+	mulxq	8(%rsi), %rdi, %r8
+	addq	%rcx, %rdi
+	movq	%rdi, 8(%rax)
+	mulxq	16(%rsi), %rcx, %rdx
+	adcq	%r8, %rcx
+	movq	%rcx, 16(%rax)
+	adcq	$0, %rdx
+	movq	%rdx, 24(%rax)
 	retq
-
-	.globl	_mcl_fp_mont1Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_mulUnitPre3Lbmi2        ## -- Begin function mcl_fp_mulUnitPre3Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_mont1Lbmi2:                     ## @mcl_fp_mont1Lbmi2
-## BB#0:
-	movq	%rdx, %rax
-	movq	(%rsi), %rdx
-	mulxq	(%rax), %rsi, %r8
-	movq	-8(%rcx), %rdx
-	imulq	%rsi, %rdx
-	movq	(%rcx), %rcx
-	mulxq	%rcx, %rdx, %rax
-	addq	%rsi, %rdx
+_mcl_fp_mulUnitPre3Lbmi2:               ## @mcl_fp_mulUnitPre3Lbmi2
+## %bb.0:
+	mulxq	16(%rsi), %r8, %rcx
+	mulxq	8(%rsi), %r9, %rax
+	mulxq	(%rsi), %rdx, %rsi
+	movq	%rdx, (%rdi)
+	addq	%r9, %rsi
+	movq	%rsi, 8(%rdi)
 	adcq	%r8, %rax
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	movq	%rax, %rsi
-	subq	%rcx, %rsi
-	sbbq	$0, %rdx
-	testb	$1, %dl
-	cmovneq	%rax, %rsi
-	movq	%rsi, (%rdi)
+	movq	%rax, 16(%rdi)
+	adcq	$0, %rcx
+	movq	%rcx, 24(%rdi)
 	retq
-
-	.globl	_mcl_fp_montNF1Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mulPre3Lbmi2         ## -- Begin function mcl_fpDbl_mulPre3Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_montNF1Lbmi2:                   ## @mcl_fp_montNF1Lbmi2
-## BB#0:
-	movq	%rdx, %rax
-	movq	(%rsi), %rdx
-	mulxq	(%rax), %rsi, %r8
-	movq	-8(%rcx), %rdx
-	imulq	%rsi, %rdx
-	movq	(%rcx), %rcx
-	mulxq	%rcx, %rdx, %rax
-	addq	%rsi, %rdx
-	adcq	%r8, %rax
-	movq	%rax, %rdx
-	subq	%rcx, %rdx
-	cmovsq	%rax, %rdx
+_mcl_fpDbl_mulPre3Lbmi2:                ## @mcl_fpDbl_mulPre3Lbmi2
+## %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %r10
+	movq	(%rsi), %r8
+	movq	8(%rsi), %r9
+	movq	(%rdx), %r13
+	movq	%r8, %rdx
+	mulxq	%r13, %rdx, %rax
+	movq	16(%rsi), %r12
 	movq	%rdx, (%rdi)
-	retq
-
-	.globl	_mcl_fp_montRed1Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_montRed1Lbmi2:                  ## @mcl_fp_montRed1Lbmi2
-## BB#0:
-	movq	(%rsi), %rcx
-	movq	-8(%rdx), %rax
-	imulq	%rcx, %rax
-	movq	(%rdx), %r8
-	movq	%rax, %rdx
-	mulxq	%r8, %rax, %rdx
-	addq	%rcx, %rax
-	adcq	8(%rsi), %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rdx, %rcx
-	subq	%r8, %rcx
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	%rdx, %rcx
-	movq	%rcx, (%rdi)
-	retq
-
-	.globl	_mcl_fp_addPre1Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addPre1Lbmi2:                   ## @mcl_fp_addPre1Lbmi2
-## BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, (%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_subPre1Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subPre1Lbmi2:                   ## @mcl_fp_subPre1Lbmi2
-## BB#0:
-	movq	(%rsi), %rcx
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	movq	%rcx, (%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_shr1_1Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_shr1_1Lbmi2:                    ## @mcl_fp_shr1_1Lbmi2
-## BB#0:
-	movq	(%rsi), %rax
-	shrq	%rax
-	movq	%rax, (%rdi)
-	retq
-
-	.globl	_mcl_fp_add1Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_add1Lbmi2:                      ## @mcl_fp_add1Lbmi2
-## BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, (%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %rax
-	sbbq	$0, %rdx
-	testb	$1, %dl
-	jne	LBB14_2
-## BB#1:                                ## %nocarry
-	movq	%rax, (%rdi)
-LBB14_2:                                ## %carry
-	retq
-
-	.globl	_mcl_fp_addNF1Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addNF1Lbmi2:                    ## @mcl_fp_addNF1Lbmi2
-## BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, %rdx
-	subq	(%rcx), %rdx
-	cmovsq	%rax, %rdx
-	movq	%rdx, (%rdi)
-	retq
-
-	.globl	_mcl_fp_sub1Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_sub1Lbmi2:                      ## @mcl_fp_sub1Lbmi2
-## BB#0:
-	movq	(%rsi), %rax
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	movq	%rax, (%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB16_2
-## BB#1:                                ## %nocarry
-	retq
-LBB16_2:                                ## %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	retq
-
-	.globl	_mcl_fp_subNF1Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subNF1Lbmi2:                    ## @mcl_fp_subNF1Lbmi2
-## BB#0:
-	movq	(%rsi), %rax
-	subq	(%rdx), %rax
-	movq	%rax, %rdx
-	sarq	$63, %rdx
-	andq	(%rcx), %rdx
-	addq	%rax, %rdx
-	movq	%rdx, (%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_add1Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_add1Lbmi2:                   ## @mcl_fpDbl_add1Lbmi2
-## BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	movq	%rax, (%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rdx, %rsi
-	subq	(%rcx), %rsi
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	%rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_sub1Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sub1Lbmi2:                   ## @mcl_fpDbl_sub1Lbmi2
-## BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r8
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r8
-	movq	%rax, (%rdi)
-	movl	$0, %eax
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	(%rcx), %rsi
-	addq	%r8, %rsi
-	movq	%rsi, 8(%rdi)
-	retq
-
-	.globl	_mcl_fp_mulUnitPre2Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre2Lbmi2:               ## @mcl_fp_mulUnitPre2Lbmi2
-## BB#0:
-	mulxq	8(%rsi), %rax, %rcx
-	mulxq	(%rsi), %rdx, %rsi
-	movq	%rdx, (%rdi)
-	addq	%rax, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	$0, %rcx
-	movq	%rcx, 16(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_mulPre2Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre2Lbmi2:                ## @mcl_fpDbl_mulPre2Lbmi2
-## BB#0:
-	movq	%rdx, %r10
-	movq	(%rsi), %r11
-	movq	8(%rsi), %r8
-	movq	(%r10), %rsi
-	movq	%r11, %rdx
-	mulxq	%rsi, %rdx, %r9
-	movq	%rdx, (%rdi)
-	movq	%r8, %rdx
-	mulxq	%rsi, %rsi, %rax
-	addq	%r9, %rsi
-	adcq	$0, %rax
-	movq	8(%r10), %rcx
-	movq	%r11, %rdx
-	mulxq	%rcx, %rdx, %r9
-	addq	%rsi, %rdx
-	movq	%rdx, 8(%rdi)
-	movq	%r8, %rdx
-	mulxq	%rcx, %rdx, %rcx
-	adcq	%rax, %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r9, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%rcx, %rax
-	movq	%rax, 24(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_sqrPre2Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre2Lbmi2:                ## @mcl_fpDbl_sqrPre2Lbmi2
-## BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rcx
-	movq	%rax, %rdx
-	mulxq	%rax, %rdx, %rsi
-	movq	%rdx, (%rdi)
-	movq	%rcx, %rdx
-	mulxq	%rax, %rdx, %r8
-	addq	%rdx, %rsi
-	movq	%r8, %rax
-	adcq	$0, %rax
-	addq	%rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rdx, %rcx
-	adcq	%rax, %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r8, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%rcx, %rax
-	movq	%rax, 24(%rdi)
-	retq
-
-	.globl	_mcl_fp_mont2Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mont2Lbmi2:                     ## @mcl_fp_mont2Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %r11
+	movq	8(%r10), %rdx
+	mulxq	%r9, %rsi, %r15
+	mulxq	%r8, %r14, %rbp
+	addq	%rsi, %rbp
+	mulxq	%r12, %r11, %rsi
+	adcq	%r15, %r11
+	adcq	$0, %rsi
 	movq	%r9, %rdx
-	mulxq	%rax, %r10, %r13
-	movq	%r8, %rdx
-	mulxq	%rax, %r14, %rsi
-	addq	%r10, %rsi
+	mulxq	%r13, %rcx, %r15
+	addq	%rax, %rcx
+	movq	%r12, %rdx
+	mulxq	%r13, %rbx, %r13
+	adcq	%r15, %rbx
 	adcq	$0, %r13
-	movq	-8(%rcx), %rbp
-	movq	(%rcx), %r10
-	movq	%r14, %rdx
-	imulq	%rbp, %rdx
-	movq	8(%rcx), %r15
-	mulxq	%r15, %r12, %rcx
-	mulxq	%r10, %rdx, %rbx
-	addq	%r12, %rbx
-	adcq	$0, %rcx
-	addq	%r14, %rdx
-	adcq	%rsi, %rbx
-	adcq	%r13, %rcx
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%r11, %rdx
-	mulxq	%r9, %r9, %r14
-	movq	%r11, %rdx
-	mulxq	%r8, %r8, %rax
-	addq	%r9, %rax
-	adcq	$0, %r14
-	addq	%rbx, %r8
-	adcq	%rcx, %rax
-	adcq	%rsi, %r14
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	imulq	%r8, %rbp
-	movq	%rbp, %rdx
-	mulxq	%r15, %rcx, %rbx
-	mulxq	%r10, %rdx, %rbp
-	addq	%rcx, %rbp
-	adcq	$0, %rbx
-	addq	%r8, %rdx
-	adcq	%rax, %rbp
-	adcq	%r14, %rbx
-	adcq	$0, %rsi
-	movq	%rbp, %rax
-	subq	%r10, %rax
-	movq	%rbx, %rcx
-	sbbq	%r15, %rcx
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rbx, %rcx
-	testb	%sil, %sil
-	cmovneq	%rbp, %rax
-	movq	%rax, (%rdi)
+	addq	%r14, %rcx
 	movq	%rcx, 8(%rdi)
+	adcq	%rbp, %rbx
+	adcq	%r11, %r13
+	adcq	$0, %rsi
+	movq	16(%r10), %rdx
+	mulxq	%r12, %r10, %rbp
+	mulxq	%r9, %r9, %rcx
+	mulxq	%r8, %rdx, %rax
+	addq	%r9, %rax
+	adcq	%r10, %rcx
+	adcq	$0, %rbp
+	addq	%rbx, %rdx
+	movq	%rdx, 16(%rdi)
+	adcq	%r13, %rax
+	movq	%rax, 24(%rdi)
+	adcq	%rsi, %rcx
+	movq	%rcx, 32(%rdi)
+	adcq	$0, %rbp
+	movq	%rbp, 40(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -653,5324 +383,185 @@ _mcl_fp_mont2Lbmi2:                     ## @mcl_fp_mont2Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montNF2Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sqrPre3Lbmi2         ## -- Begin function mcl_fpDbl_sqrPre3Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_montNF2Lbmi2:                   ## @mcl_fp_montNF2Lbmi2
-## BB#0:
-	pushq	%rbp
+_mcl_fpDbl_sqrPre3Lbmi2:                ## @mcl_fpDbl_sqrPre3Lbmi2
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %r11
-	movq	%r9, %rdx
-	mulxq	%rax, %r10, %rsi
-	movq	%r8, %rdx
-	mulxq	%rax, %r15, %r13
-	addq	%r10, %r13
-	adcq	$0, %rsi
-	movq	-8(%rcx), %rbp
-	movq	(%rcx), %r10
-	movq	%r15, %rdx
-	imulq	%rbp, %rdx
-	movq	8(%rcx), %r14
-	mulxq	%r10, %rcx, %r12
-	addq	%r15, %rcx
-	mulxq	%r14, %rbx, %rcx
-	adcq	%r13, %rbx
-	adcq	$0, %rsi
-	addq	%r12, %rbx
-	adcq	%rcx, %rsi
-	movq	%r11, %rdx
-	mulxq	%r9, %r9, %rcx
-	movq	%r11, %rdx
-	mulxq	%r8, %r8, %rax
-	addq	%r9, %rax
-	adcq	$0, %rcx
-	addq	%rbx, %r8
-	adcq	%rsi, %rax
-	adcq	$0, %rcx
-	imulq	%r8, %rbp
-	movq	%rbp, %rdx
-	mulxq	%r14, %rbx, %rsi
-	mulxq	%r10, %rbp, %rdx
-	addq	%r8, %rbp
-	adcq	%rax, %rbx
-	adcq	$0, %rcx
-	addq	%rdx, %rbx
-	adcq	%rsi, %rcx
-	movq	%rbx, %rax
-	subq	%r10, %rax
+	movq	16(%rsi), %r8
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %rsi
 	movq	%rcx, %rdx
-	sbbq	%r14, %rdx
-	cmovsq	%rbx, %rax
-	movq	%rax, (%rdi)
-	cmovsq	%rcx, %rdx
-	movq	%rdx, 8(%rdi)
+	mulxq	%rcx, %rdx, %rax
+	movq	%rdx, (%rdi)
+	movq	%r8, %rdx
+	mulxq	%rsi, %r10, %r9
+	movq	%rsi, %rdx
+	mulxq	%rsi, %r11, %r15
+	mulxq	%rcx, %r14, %rsi
+	addq	%rsi, %r11
+	adcq	%r10, %r15
+	movq	%r9, %r13
+	adcq	$0, %r13
+	addq	%r14, %rax
+	movq	%r8, %rdx
+	mulxq	%rcx, %r12, %rcx
+	adcq	%r12, %rsi
+	movq	%rcx, %rbx
+	adcq	$0, %rbx
+	addq	%r14, %rax
+	movq	%rax, 8(%rdi)
+	adcq	%r11, %rsi
+	adcq	%r15, %rbx
+	adcq	$0, %r13
+	movq	%r8, %rdx
+	mulxq	%r8, %rax, %rdx
+	addq	%r10, %rcx
+	adcq	%r9, %rax
+	adcq	$0, %rdx
+	addq	%r12, %rsi
+	movq	%rsi, 16(%rdi)
+	adcq	%rbx, %rcx
+	movq	%rcx, 24(%rdi)
+	adcq	%r13, %rax
+	movq	%rax, 32(%rdi)
+	adcq	$0, %rdx
+	movq	%rdx, 40(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montRed2Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_mont3Lbmi2              ## -- Begin function mcl_fp_mont3Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_montRed2Lbmi2:                  ## @mcl_fp_montRed2Lbmi2
-## BB#0:
+_mcl_fp_mont3Lbmi2:                     ## @mcl_fp_mont3Lbmi2
+## %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
+	pushq	%r13
+	pushq	%r12
 	pushq	%rbx
-	movq	-8(%rdx), %r15
-	movq	(%rdx), %r8
-	movq	(%rsi), %r10
-	movq	%r10, %rcx
-	imulq	%r15, %rcx
-	movq	8(%rdx), %r9
-	movq	%rcx, %rdx
-	mulxq	%r9, %r11, %r14
-	mulxq	%r8, %rcx, %rax
-	addq	%r11, %rax
-	adcq	$0, %r14
-	movq	24(%rsi), %r11
-	addq	%r10, %rcx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r14
-	adcq	$0, %r11
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	imulq	%rax, %r15
-	movq	%r15, %rdx
-	mulxq	%r9, %r10, %rbx
-	mulxq	%r8, %rsi, %rdx
-	addq	%r10, %rdx
-	adcq	$0, %rbx
-	addq	%rax, %rsi
-	adcq	%r14, %rdx
-	adcq	%r11, %rbx
-	adcq	$0, %rcx
-	movq	%rdx, %rax
-	subq	%r8, %rax
-	movq	%rbx, %rsi
-	sbbq	%r9, %rsi
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rbx, %rsi
-	testb	%cl, %cl
-	cmovneq	%rdx, %rax
-	movq	%rax, (%rdi)
-	movq	%rsi, 8(%rdi)
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_addPre2Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addPre2Lbmi2:                   ## @mcl_fp_addPre2Lbmi2
-## BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rcx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rcx
-	movq	%rax, (%rdi)
-	movq	%rcx, 8(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_subPre2Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subPre2Lbmi2:                   ## @mcl_fp_subPre2Lbmi2
-## BB#0:
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	sbbq	8(%rdx), %rsi
-	movq	%rcx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_shr1_2Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_shr1_2Lbmi2:                    ## @mcl_fp_shr1_2Lbmi2
-## BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rcx
-	shrdq	$1, %rcx, %rax
-	movq	%rax, (%rdi)
-	shrq	%rcx
-	movq	%rcx, 8(%rdi)
-	retq
-
-	.globl	_mcl_fp_add2Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_add2Lbmi2:                      ## @mcl_fp_add2Lbmi2
-## BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB29_2
-## BB#1:                                ## %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-LBB29_2:                                ## %carry
-	retq
-
-	.globl	_mcl_fp_addNF2Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addNF2Lbmi2:                    ## @mcl_fp_addNF2Lbmi2
-## BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %r8
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %r8
-	movq	%rax, %rsi
-	subq	(%rcx), %rsi
-	movq	%r8, %rdx
-	sbbq	8(%rcx), %rdx
-	testq	%rdx, %rdx
-	cmovsq	%rax, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r8, %rdx
-	movq	%rdx, 8(%rdi)
-	retq
-
-	.globl	_mcl_fp_sub2Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_sub2Lbmi2:                      ## @mcl_fp_sub2Lbmi2
-## BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r8
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r8
-	movq	%rax, (%rdi)
-	movq	%r8, 8(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB31_2
-## BB#1:                                ## %nocarry
-	retq
-LBB31_2:                                ## %carry
-	movq	8(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r8, %rdx
-	movq	%rdx, 8(%rdi)
-	retq
-
-	.globl	_mcl_fp_subNF2Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subNF2Lbmi2:                    ## @mcl_fp_subNF2Lbmi2
-## BB#0:
-	movq	(%rsi), %r8
-	movq	8(%rsi), %rsi
-	subq	(%rdx), %r8
-	sbbq	8(%rdx), %rsi
-	movq	%rsi, %rdx
-	sarq	$63, %rdx
-	movq	8(%rcx), %rax
-	andq	%rdx, %rax
-	andq	(%rcx), %rdx
-	addq	%r8, %rdx
-	movq	%rdx, (%rdi)
-	adcq	%rsi, %rax
-	movq	%rax, 8(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_add2Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_add2Lbmi2:                   ## @mcl_fpDbl_add2Lbmi2
-## BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rdx), %r10
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r10
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	adcq	%r8, %r9
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r10, %rdx
-	subq	(%rcx), %rdx
-	movq	%r9, %rsi
-	sbbq	8(%rcx), %rsi
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%r10, %rdx
-	movq	%rdx, 16(%rdi)
-	testb	%al, %al
-	cmovneq	%r9, %rsi
-	movq	%rsi, 24(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_sub2Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sub2Lbmi2:                   ## @mcl_fpDbl_sub2Lbmi2
-## BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %r11
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %r11
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
-	movq	%r11, (%rdi)
-	movq	%rsi, 8(%rdi)
-	sbbq	%r8, %r9
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	cmovneq	8(%rcx), %rax
-	addq	%r10, %rsi
-	movq	%rsi, 16(%rdi)
-	adcq	%r9, %rax
-	movq	%rax, 24(%rdi)
-	retq
-
-	.globl	_mcl_fp_mulUnitPre3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre3Lbmi2:               ## @mcl_fp_mulUnitPre3Lbmi2
-## BB#0:
-	mulxq	16(%rsi), %r8, %rcx
-	mulxq	8(%rsi), %r9, %rax
-	mulxq	(%rsi), %rdx, %rsi
-	movq	%rdx, (%rdi)
-	addq	%r9, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 16(%rdi)
-	adcq	$0, %rcx
-	movq	%rcx, 24(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_mulPre3Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre3Lbmi2:                ## @mcl_fpDbl_mulPre3Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	%rdx, %r9
-	movq	(%rsi), %r10
-	movq	8(%rsi), %r8
-	movq	(%r9), %rax
-	movq	%r10, %rdx
-	mulxq	%rax, %rdx, %r14
-	movq	16(%rsi), %r11
-	movq	%rdx, (%rdi)
-	movq	%r11, %rdx
-	mulxq	%rax, %rsi, %rbx
-	movq	%r8, %rdx
-	mulxq	%rax, %rax, %rcx
-	addq	%r14, %rax
-	adcq	%rsi, %rcx
-	adcq	$0, %rbx
-	movq	8(%r9), %rsi
-	movq	%r10, %rdx
-	mulxq	%rsi, %rdx, %r14
-	addq	%rax, %rdx
-	movq	%rdx, 8(%rdi)
-	movq	%r11, %rdx
-	mulxq	%rsi, %rax, %r15
-	movq	%r8, %rdx
-	mulxq	%rsi, %rsi, %rdx
-	adcq	%rcx, %rsi
-	adcq	%rbx, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r14, %rsi
-	adcq	%rdx, %rax
-	adcq	%r15, %rcx
-	movq	16(%r9), %rbx
-	movq	%r10, %rdx
-	mulxq	%rbx, %rdx, %r9
-	addq	%rsi, %rdx
-	movq	%rdx, 16(%rdi)
-	movq	%r11, %rdx
-	mulxq	%rbx, %rsi, %r10
-	movq	%r8, %rdx
-	mulxq	%rbx, %rbx, %rdx
-	adcq	%rax, %rbx
-	adcq	%rcx, %rsi
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r9, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%rdx, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 40(%rdi)
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fpDbl_sqrPre3Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre3Lbmi2:                ## @mcl_fpDbl_sqrPre3Lbmi2
-## BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rdx, %rax
-	movq	%rdx, (%rdi)
-	movq	%r10, %rdx
-	mulxq	%rcx, %r11, %r8
-	movq	%rsi, %rdx
-	mulxq	%rcx, %rdx, %r14
-	addq	%rdx, %rax
-	movq	%r14, %rbx
-	adcq	%r11, %rbx
-	movq	%r8, %rcx
-	adcq	$0, %rcx
-	addq	%rdx, %rax
-	movq	%rax, 8(%rdi)
-	movq	%r10, %rdx
-	mulxq	%rsi, %rax, %r9
-	movq	%rsi, %rdx
-	mulxq	%rsi, %rsi, %rdx
-	adcq	%rbx, %rsi
-	adcq	%rax, %rcx
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	%r14, %rsi
-	adcq	%rdx, %rcx
-	adcq	%r9, %rbx
-	addq	%r11, %rsi
-	movq	%rsi, 16(%rdi)
-	movq	%r10, %rdx
-	mulxq	%r10, %rsi, %rdx
-	adcq	%rax, %rcx
-	adcq	%rbx, %rsi
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r8, %rcx
-	movq	%rcx, 24(%rdi)
-	adcq	%r9, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%rdx, %rax
-	movq	%rax, 40(%rdi)
-	popq	%rbx
-	popq	%r14
-	retq
-
-	.globl	_mcl_fp_mont3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mont3Lbmi2:                     ## @mcl_fp_mont3Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %r14
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	16(%rsi), %r12
-	movq	(%r14), %rax
-	movq	%r14, -16(%rsp)         ## 8-byte Spill
-	movq	%r12, %rdx
-	movq	%r12, -24(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r11, %rbp
-	movq	(%rsi), %r15
-	movq	8(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rbx, %r8
-	movq	%r15, %rdx
-	movq	%r15, -32(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r9, %rdi
-	addq	%rbx, %rdi
-	adcq	%r11, %r8
-	adcq	$0, %rbp
-	movq	-8(%rcx), %r13
-	movq	%r9, %rdx
-	imulq	%r13, %rdx
-	movq	8(%rcx), %rax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r11, %r10
-	movq	(%rcx), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rsi, %rbx
-	addq	%r11, %rbx
-	movq	16(%rcx), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rcx, %rax
-	adcq	%r10, %rcx
-	adcq	$0, %rax
-	addq	%r9, %rsi
-	adcq	%rdi, %rbx
-	movq	8(%r14), %rdx
-	adcq	%r8, %rcx
-	adcq	%rbp, %rax
-	sbbq	%r9, %r9
-	andl	$1, %r9d
-	mulxq	%r12, %r11, %rdi
-	movq	-48(%rsp), %r12         ## 8-byte Reload
-	mulxq	%r12, %r10, %rsi
-	mulxq	%r15, %r8, %rbp
-	addq	%r10, %rbp
-	adcq	%r11, %rsi
-	adcq	$0, %rdi
-	addq	%rbx, %r8
-	adcq	%rcx, %rbp
-	adcq	%rax, %rsi
-	adcq	%r9, %rdi
-	sbbq	%r11, %r11
-	andl	$1, %r11d
-	movq	%r8, %rdx
-	imulq	%r13, %rdx
-	movq	-40(%rsp), %r14         ## 8-byte Reload
-	mulxq	%r14, %r9, %rcx
-	mulxq	-56(%rsp), %r10, %rax   ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rdx, %rbx   ## 8-byte Folded Reload
-	addq	%r10, %rbx
-	adcq	%r9, %rax
-	adcq	$0, %rcx
-	addq	%r8, %rdx
-	adcq	%rbp, %rbx
-	adcq	%rsi, %rax
-	adcq	%rdi, %rcx
-	adcq	$0, %r11
-	movq	-16(%rsp), %rdx         ## 8-byte Reload
-	movq	16(%rdx), %rdx
-	mulxq	-24(%rsp), %r9, %rsi    ## 8-byte Folded Reload
-	mulxq	%r12, %r10, %r15
-	mulxq	-32(%rsp), %r8, %rdi    ## 8-byte Folded Reload
-	addq	%r10, %rdi
-	adcq	%r9, %r15
-	adcq	$0, %rsi
-	addq	%rbx, %r8
-	adcq	%rax, %rdi
-	adcq	%rcx, %r15
-	adcq	%r11, %rsi
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	imulq	%r8, %r13
-	movq	%r13, %rdx
-	mulxq	%r14, %r9, %rbp
-	movq	%r14, %r12
-	movq	-56(%rsp), %r14         ## 8-byte Reload
-	mulxq	%r14, %r10, %rax
-	movq	-64(%rsp), %rcx         ## 8-byte Reload
-	mulxq	%rcx, %r11, %rdx
-	addq	%r10, %rdx
-	adcq	%r9, %rax
-	adcq	$0, %rbp
-	addq	%r8, %r11
-	adcq	%rdi, %rdx
-	adcq	%r15, %rax
-	adcq	%rsi, %rbp
-	adcq	$0, %rbx
-	movq	%rdx, %rsi
-	subq	%rcx, %rsi
-	movq	%rax, %rdi
-	sbbq	%r14, %rdi
-	movq	%rbp, %rcx
-	sbbq	%r12, %rcx
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%rbp, %rcx
-	testb	%bl, %bl
-	cmovneq	%rdx, %rsi
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	movq	%rsi, (%rdx)
-	cmovneq	%rax, %rdi
-	movq	%rdi, 8(%rdx)
-	movq	%rcx, 16(%rdx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montNF3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_montNF3Lbmi2:                   ## @mcl_fp_montNF3Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	%rdx, %r10
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rdi
-	movq	%rdi, -32(%rsp)         ## 8-byte Spill
-	movq	(%r10), %rax
-	movq	%r10, -16(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rdx
-	mulxq	%rax, %rbx, %r14
-	movq	%rcx, %rdx
-	movq	%rcx, -24(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r15, %r12
-	movq	16(%rsi), %r11
-	addq	%rbx, %r12
-	movq	%r11, %rdx
-	mulxq	%rax, %rsi, %rbx
-	adcq	%r14, %rsi
-	adcq	$0, %rbx
-	movq	-8(%r8), %r9
-	movq	(%r8), %r14
-	movq	%r15, %rdx
-	imulq	%r9, %rdx
-	mulxq	%r14, %rbp, %r13
-	addq	%r15, %rbp
-	movq	8(%r8), %r15
-	mulxq	%r15, %rdi, %rbp
-	adcq	%r12, %rdi
-	movq	16(%r8), %r12
-	mulxq	%r12, %rax, %r8
-	adcq	%rsi, %rax
-	adcq	$0, %rbx
-	addq	%r13, %rdi
-	movq	8(%r10), %rdx
-	adcq	%rbp, %rax
-	adcq	%r8, %rbx
-	movq	-32(%rsp), %r10         ## 8-byte Reload
-	mulxq	%r10, %rsi, %r8
-	mulxq	%rcx, %r13, %rbp
-	addq	%rsi, %rbp
-	mulxq	%r11, %rcx, %rsi
-	adcq	%r8, %rcx
-	adcq	$0, %rsi
-	addq	%rdi, %r13
-	adcq	%rax, %rbp
-	adcq	%rbx, %rcx
-	adcq	$0, %rsi
-	movq	%r13, %rdx
-	imulq	%r9, %rdx
-	mulxq	%r14, %rdi, %rbx
-	addq	%r13, %rdi
-	mulxq	%r15, %rax, %rdi
-	adcq	%rbp, %rax
-	mulxq	%r12, %rbp, %rdx
-	adcq	%rcx, %rbp
-	adcq	$0, %rsi
-	addq	%rbx, %rax
-	adcq	%rdi, %rbp
-	adcq	%rdx, %rsi
-	movq	-16(%rsp), %rcx         ## 8-byte Reload
-	movq	16(%rcx), %rdx
-	mulxq	%r10, %rbx, %r8
-	mulxq	-24(%rsp), %r10, %rdi   ## 8-byte Folded Reload
-	addq	%rbx, %rdi
-	mulxq	%r11, %rcx, %rbx
-	adcq	%r8, %rcx
-	adcq	$0, %rbx
-	addq	%rax, %r10
-	adcq	%rbp, %rdi
-	adcq	%rsi, %rcx
-	adcq	$0, %rbx
-	imulq	%r10, %r9
-	movq	%r9, %rdx
-	mulxq	%r14, %rdx, %r8
-	addq	%r10, %rdx
-	movq	%r9, %rdx
-	mulxq	%r12, %rbp, %rsi
-	mulxq	%r15, %rax, %rdx
-	adcq	%rdi, %rax
-	adcq	%rcx, %rbp
-	adcq	$0, %rbx
-	addq	%r8, %rax
-	adcq	%rdx, %rbp
-	adcq	%rsi, %rbx
-	movq	%rax, %rcx
-	subq	%r14, %rcx
-	movq	%rbp, %rdx
-	sbbq	%r15, %rdx
-	movq	%rbx, %rsi
-	sbbq	%r12, %rsi
-	movq	%rsi, %rdi
-	sarq	$63, %rdi
-	cmovsq	%rax, %rcx
-	movq	-8(%rsp), %rax          ## 8-byte Reload
-	movq	%rcx, (%rax)
-	cmovsq	%rbp, %rdx
-	movq	%rdx, 8(%rax)
-	cmovsq	%rbx, %rsi
-	movq	%rsi, 16(%rax)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montRed3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_montRed3Lbmi2:                  ## @mcl_fp_montRed3Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	-8(%rcx), %r15
-	movq	(%rcx), %r9
-	movq	(%rsi), %rbx
-	movq	%rbx, %rdx
-	imulq	%r15, %rdx
-	movq	16(%rcx), %rax
-	mulxq	%rax, %r14, %r11
-	movq	%rax, %rbp
-	movq	%rbp, -16(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %r10
-	mulxq	%r10, %rax, %r13
-	mulxq	%r9, %rdx, %rcx
-	addq	%rax, %rcx
-	adcq	%r14, %r13
-	adcq	$0, %r11
-	movq	40(%rsi), %r14
-	movq	32(%rsi), %r12
-	addq	%rbx, %rdx
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r13
-	adcq	24(%rsi), %r11
-	adcq	$0, %r12
-	adcq	$0, %r14
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rcx, %rdx
-	imulq	%r15, %rdx
-	mulxq	%rbp, %rbp, %rdi
-	mulxq	%r10, %r8, %rbx
-	mulxq	%r9, %rdx, %rax
-	addq	%r8, %rax
-	adcq	%rbp, %rbx
-	adcq	$0, %rdi
-	addq	%rcx, %rdx
-	adcq	%r13, %rax
-	adcq	%r11, %rbx
-	adcq	%r12, %rdi
-	adcq	$0, %r14
-	adcq	$0, %rsi
-	imulq	%rax, %r15
-	movq	%r15, %rdx
-	movq	-16(%rsp), %r13         ## 8-byte Reload
-	mulxq	%r13, %r8, %rcx
-	movq	%r15, %rdx
-	mulxq	%r10, %r11, %r12
-	mulxq	%r9, %r15, %rdx
-	addq	%r11, %rdx
-	adcq	%r8, %r12
-	adcq	$0, %rcx
-	addq	%rax, %r15
-	adcq	%rbx, %rdx
-	adcq	%rdi, %r12
-	adcq	%r14, %rcx
-	adcq	$0, %rsi
-	movq	%rdx, %rax
-	subq	%r9, %rax
-	movq	%r12, %rdi
-	sbbq	%r10, %rdi
-	movq	%rcx, %rbp
-	sbbq	%r13, %rbp
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rcx, %rbp
-	testb	%sil, %sil
-	cmovneq	%rdx, %rax
-	movq	-8(%rsp), %rcx          ## 8-byte Reload
-	movq	%rax, (%rcx)
-	cmovneq	%r12, %rdi
-	movq	%rdi, 8(%rcx)
-	movq	%rbp, 16(%rcx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_addPre3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addPre3Lbmi2:                   ## @mcl_fp_addPre3Lbmi2
-## BB#0:
-	movq	16(%rdx), %rax
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rax
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_subPre3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subPre3Lbmi2:                   ## @mcl_fp_subPre3Lbmi2
-## BB#0:
-	movq	16(%rsi), %r8
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r8
-	movq	%rcx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r8, 16(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_shr1_3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_shr1_3Lbmi2:                    ## @mcl_fp_shr1_3Lbmi2
-## BB#0:
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rdx
-	shrdq	$1, %rdx, %rcx
-	movq	%rcx, (%rdi)
-	shrdq	$1, %rax, %rdx
-	movq	%rdx, 8(%rdi)
-	shrq	%rax
-	movq	%rax, 16(%rdi)
-	retq
-
-	.globl	_mcl_fp_add3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_add3Lbmi2:                      ## @mcl_fp_add3Lbmi2
-## BB#0:
-	movq	16(%rdx), %r8
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r8
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r8, 16(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB44_2
-## BB#1:                                ## %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r8, 16(%rdi)
-LBB44_2:                                ## %carry
-	retq
-
-	.globl	_mcl_fp_addNF3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addNF3Lbmi2:                    ## @mcl_fp_addNF3Lbmi2
-## BB#0:
-	movq	16(%rdx), %r8
-	movq	(%rdx), %r10
-	movq	8(%rdx), %r9
-	addq	(%rsi), %r10
-	adcq	8(%rsi), %r9
-	adcq	16(%rsi), %r8
-	movq	%r10, %rsi
-	subq	(%rcx), %rsi
-	movq	%r9, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r8, %rax
-	sbbq	16(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r10, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 16(%rdi)
-	retq
-
-	.globl	_mcl_fp_sub3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_sub3Lbmi2:                      ## @mcl_fp_sub3Lbmi2
-## BB#0:
-	movq	16(%rsi), %r8
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r9
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r9
-	sbbq	16(%rdx), %r8
-	movq	%rax, (%rdi)
-	movq	%r9, 8(%rdi)
-	movq	%r8, 16(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB46_2
-## BB#1:                                ## %nocarry
-	retq
-LBB46_2:                                ## %carry
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rsi
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r8, %rsi
-	movq	%rsi, 16(%rdi)
-	retq
-
-	.globl	_mcl_fp_subNF3Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subNF3Lbmi2:                    ## @mcl_fp_subNF3Lbmi2
-## BB#0:
-	movq	16(%rsi), %r10
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r9
-	subq	(%rdx), %r8
-	sbbq	8(%rdx), %r9
-	sbbq	16(%rdx), %r10
-	movq	%r10, %rdx
-	sarq	$63, %rdx
-	movq	%rdx, %rsi
-	shldq	$1, %r10, %rsi
-	andq	(%rcx), %rsi
-	movq	16(%rcx), %rax
-	andq	%rdx, %rax
-	andq	8(%rcx), %rdx
-	addq	%r8, %rsi
-	movq	%rsi, (%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 16(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_add3Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_add3Lbmi2:                   ## @mcl_fpDbl_add3Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r10
-	movq	40(%rsi), %r8
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r9
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r14, %r15
-	adcq	%r11, %r9
-	adcq	%r10, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r15, %rdx
-	subq	(%rcx), %rdx
-	movq	%r9, %rsi
-	sbbq	8(%rcx), %rsi
-	movq	%r8, %rbx
-	sbbq	16(%rcx), %rbx
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%r15, %rdx
-	movq	%rdx, 24(%rdi)
-	testb	%al, %al
-	cmovneq	%r9, %rsi
-	movq	%rsi, 32(%rdi)
-	cmovneq	%r8, %rbx
-	movq	%rbx, 40(%rdi)
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fpDbl_sub3Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sub3Lbmi2:                   ## @mcl_fpDbl_sub3Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rdx), %r10
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %r14
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rax
-	xorl	%esi, %esi
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %rax
-	movq	24(%rdx), %r15
-	movq	32(%rdx), %r12
-	sbbq	16(%rdx), %r14
-	movq	%rbx, (%rdi)
-	movq	%rax, 8(%rdi)
-	movq	%r14, 16(%rdi)
-	sbbq	%r15, %r11
-	sbbq	%r12, %r9
-	sbbq	%r10, %r8
-	movl	$0, %eax
-	sbbq	$0, %rax
-	andl	$1, %eax
-	movq	(%rcx), %rdx
-	cmoveq	%rsi, %rdx
-	testb	%al, %al
-	movq	16(%rcx), %rax
-	cmoveq	%rsi, %rax
-	cmovneq	8(%rcx), %rsi
-	addq	%r11, %rdx
-	movq	%rdx, 24(%rdi)
-	adcq	%r9, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 40(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_mulUnitPre4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre4Lbmi2:               ## @mcl_fp_mulUnitPre4Lbmi2
-## BB#0:
-	mulxq	24(%rsi), %r8, %r11
-	mulxq	16(%rsi), %r9, %rax
-	mulxq	8(%rsi), %r10, %rcx
-	mulxq	(%rsi), %rdx, %rsi
-	movq	%rdx, (%rdi)
-	addq	%r10, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r9, %rcx
-	movq	%rcx, 16(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 24(%rdi)
-	adcq	$0, %r11
-	movq	%r11, 32(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_mulPre4Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre4Lbmi2:                ## @mcl_fpDbl_mulPre4Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	(%rsi), %r14
-	movq	8(%rsi), %r10
-	movq	(%rdx), %rcx
-	movq	%rdx, %rbp
-	movq	%r14, %rdx
-	mulxq	%rcx, %rdx, %r15
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %r9
-	movq	%rdx, (%rdi)
-	movq	%r10, %rdx
-	mulxq	%rcx, %rbx, %r12
-	addq	%r15, %rbx
-	movq	%r9, %rdx
-	mulxq	%rcx, %r13, %r15
-	adcq	%r12, %r13
-	movq	%r11, %rdx
-	mulxq	%rcx, %rcx, %r12
-	adcq	%r15, %rcx
-	adcq	$0, %r12
-	movq	8(%rbp), %rax
-	movq	%r14, %rdx
-	mulxq	%rax, %r8, %rdx
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	addq	%rbx, %r8
-	movq	%r10, %rdx
-	mulxq	%rax, %r15, %rdx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	adcq	%r13, %r15
-	movq	%r9, %rdx
-	mulxq	%rax, %rbx, %r13
-	adcq	%rcx, %rbx
-	movq	%r11, %rdx
-	mulxq	%rax, %rcx, %rax
-	adcq	%r12, %rcx
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-8(%rsp), %r15          ## 8-byte Folded Reload
-	adcq	-16(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	%r13, %rcx
-	movq	%r8, 8(%rdi)
-	adcq	%rax, %r12
-	movq	%rbp, %r13
-	movq	16(%r13), %rax
-	movq	%r14, %rdx
-	mulxq	%rax, %rdx, %r8
-	addq	%r15, %rdx
-	movq	%rdx, 16(%rdi)
-	movq	%r10, %rdx
-	mulxq	%rax, %rbp, %r10
-	adcq	%rbx, %rbp
-	movq	%r11, %rdx
-	mulxq	%rax, %r14, %r11
-	movq	%r9, %rdx
-	mulxq	%rax, %r15, %rdx
-	adcq	%rcx, %r15
-	adcq	%r12, %r14
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r8, %rbp
-	adcq	%r10, %r15
-	adcq	%rdx, %r14
-	adcq	%r11, %rcx
-	movq	24(%r13), %rdx
-	mulxq	24(%rsi), %rbx, %r8
-	mulxq	(%rsi), %rax, %r9
-	addq	%rbp, %rax
-	movq	%rax, 24(%rdi)
-	mulxq	16(%rsi), %rbp, %rax
-	mulxq	8(%rsi), %rsi, %rdx
-	adcq	%r15, %rsi
-	adcq	%r14, %rbp
-	adcq	%rcx, %rbx
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r9, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%rdx, %rbp
-	movq	%rbp, 40(%rdi)
-	adcq	%rax, %rbx
-	movq	%rbx, 48(%rdi)
-	adcq	%r8, %rcx
-	movq	%rcx, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sqrPre4Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre4Lbmi2:                ## @mcl_fpDbl_sqrPre4Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	24(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rax
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rdx, %r11
-	movq	%rdx, (%rdi)
-	movq	%r9, %rdx
-	mulxq	%rcx, %rbp, %r10
-	movq	%rbp, -16(%rsp)         ## 8-byte Spill
-	movq	%r10, -8(%rsp)          ## 8-byte Spill
-	movq	%rax, %rdx
-	mulxq	%rcx, %r12, %r15
-	addq	%r12, %r11
-	movq	%r15, %rbx
-	adcq	%rbp, %rbx
-	movq	%r8, %rdx
-	mulxq	%rcx, %rcx, %r13
-	adcq	%r10, %rcx
-	adcq	$0, %r13
-	addq	%r12, %r11
-	movq	%rax, %rdx
-	mulxq	%rax, %rbp, %r12
-	adcq	%rbx, %rbp
-	movq	%r8, %rdx
-	mulxq	%rax, %r10, %rbx
-	movq	%r9, %rdx
-	mulxq	%rax, %r14, %rdx
-	adcq	%r14, %rcx
-	adcq	%r13, %r10
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r15, %rbp
-	adcq	%r12, %rcx
-	adcq	%rdx, %r10
-	movq	%rdx, %r12
-	adcq	%rbx, %rax
-	movq	%r11, 8(%rdi)
-	addq	-16(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, 16(%rdi)
-	movq	%r8, %rdx
-	mulxq	%r9, %r11, %r8
-	movq	%r9, %rdx
-	mulxq	%r9, %r15, %rdx
-	adcq	%r14, %rcx
-	adcq	%r10, %r15
-	adcq	%rax, %r11
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	-8(%rsp), %rcx          ## 8-byte Folded Reload
-	adcq	%r12, %r15
-	adcq	%rdx, %r11
-	adcq	%r8, %rax
-	movq	24(%rsi), %rdx
-	mulxq	16(%rsi), %rbx, %r8
-	mulxq	8(%rsi), %rbp, %r9
-	mulxq	(%rsi), %rsi, %r10
-	addq	%rcx, %rsi
-	movq	%rsi, 24(%rdi)
-	adcq	%r15, %rbp
-	adcq	%r11, %rbx
-	mulxq	%rdx, %rdx, %rcx
-	adcq	%rax, %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%r10, %rbp
-	movq	%rbp, 32(%rdi)
-	adcq	%r9, %rbx
-	movq	%rbx, 40(%rdi)
-	adcq	%r8, %rdx
-	movq	%rdx, 48(%rdi)
-	adcq	%rcx, %rax
-	movq	%rax, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mont4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mont4Lbmi2:                     ## @mcl_fp_mont4Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %r13
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	24(%rsi), %rdi
-	movq	%rdi, -32(%rsp)         ## 8-byte Spill
-	movq	(%r13), %rax
-	movq	%r13, -16(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rdx
-	mulxq	%rax, %rdi, %r11
-	movq	16(%rsi), %rdx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rbx, %r10
-	movq	(%rsi), %rbp
-	movq	%rbp, -48(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rsi, %r12
-	movq	%rbp, %rdx
-	mulxq	%rax, %r14, %r8
-	addq	%rsi, %r8
-	adcq	%rbx, %r12
-	adcq	%rdi, %r10
-	adcq	$0, %r11
-	movq	-8(%rcx), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%r14, %rdx
-	imulq	%rax, %rdx
-	movq	24(%rcx), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r15, %rax
-	movq	16(%rcx), %rsi
-	movq	%rsi, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rsi, %r9, %rsi
-	movq	(%rcx), %rbp
-	movq	%rbp, -24(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %rdi, %rcx
-	mulxq	%rbp, %rdx, %rbx
-	addq	%rdi, %rbx
-	adcq	%r9, %rcx
-	adcq	%r15, %rsi
-	adcq	$0, %rax
-	addq	%r14, %rdx
-	adcq	%r8, %rbx
-	adcq	%r12, %rcx
-	adcq	%r10, %rsi
-	adcq	%r11, %rax
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	movq	8(%r13), %rdx
-	mulxq	-32(%rsp), %r12, %r10   ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %r15, %r11   ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %r14, %rbp   ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %r8, %r9     ## 8-byte Folded Reload
-	addq	%r14, %r9
-	adcq	%r15, %rbp
-	adcq	%r12, %r11
-	adcq	$0, %r10
-	addq	%rbx, %r8
-	adcq	%rcx, %r9
-	adcq	%rsi, %rbp
-	adcq	%rax, %r11
-	adcq	%rdi, %r10
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	movq	%r8, %rdx
-	imulq	-88(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %r14, %rcx   ## 8-byte Folded Reload
-	mulxq	-80(%rsp), %r15, %rsi   ## 8-byte Folded Reload
-	mulxq	-72(%rsp), %r12, %rax   ## 8-byte Folded Reload
-	movq	-24(%rsp), %r13         ## 8-byte Reload
-	mulxq	%r13, %rdx, %rdi
-	addq	%r12, %rdi
-	adcq	%r15, %rax
-	adcq	%r14, %rsi
-	adcq	$0, %rcx
-	addq	%r8, %rdx
-	adcq	%r9, %rdi
-	adcq	%rbp, %rax
-	adcq	%r11, %rsi
-	adcq	%r10, %rcx
-	adcq	$0, %rbx
-	movq	-16(%rsp), %rdx         ## 8-byte Reload
-	movq	16(%rdx), %rdx
-	mulxq	-32(%rsp), %r14, %r11   ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %r15, %rbp   ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %r12, %r8    ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %r9, %r10    ## 8-byte Folded Reload
-	addq	%r12, %r10
-	adcq	%r15, %r8
-	adcq	%r14, %rbp
-	adcq	$0, %r11
-	addq	%rdi, %r9
-	adcq	%rax, %r10
-	adcq	%rsi, %r8
-	adcq	%rcx, %rbp
-	adcq	%rbx, %r11
-	sbbq	%rax, %rax
-	movq	%r9, %rdx
-	imulq	-88(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-72(%rsp), %rcx, %rsi   ## 8-byte Folded Reload
-	mulxq	%r13, %r14, %rdi
-	addq	%rcx, %rdi
-	mulxq	-80(%rsp), %rcx, %r15   ## 8-byte Folded Reload
-	adcq	%rsi, %rcx
-	movq	-64(%rsp), %r13         ## 8-byte Reload
-	mulxq	%r13, %rbx, %rsi
-	adcq	%r15, %rbx
-	adcq	$0, %rsi
-	andl	$1, %eax
-	addq	%r9, %r14
-	adcq	%r10, %rdi
-	adcq	%r8, %rcx
-	adcq	%rbp, %rbx
-	adcq	%r11, %rsi
-	adcq	$0, %rax
-	movq	-16(%rsp), %rdx         ## 8-byte Reload
-	movq	24(%rdx), %rdx
-	mulxq	-32(%rsp), %r11, %r8    ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %r15, %r9    ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %r12, %r14   ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %r10, %rbp   ## 8-byte Folded Reload
-	addq	%r12, %rbp
-	adcq	%r15, %r14
-	adcq	%r11, %r9
-	adcq	$0, %r8
-	addq	%rdi, %r10
-	adcq	%rcx, %rbp
-	adcq	%rbx, %r14
-	adcq	%rsi, %r9
-	adcq	%rax, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	imulq	%r10, %rdx
-	mulxq	%r13, %rcx, %rdi
-	movq	%rcx, -88(%rsp)         ## 8-byte Spill
-	mulxq	-80(%rsp), %r15, %rsi   ## 8-byte Folded Reload
-	movq	-72(%rsp), %rbx         ## 8-byte Reload
-	mulxq	%rbx, %r12, %rcx
-	movq	-24(%rsp), %r11         ## 8-byte Reload
-	mulxq	%r11, %rdx, %r13
-	addq	%r12, %r13
-	adcq	%r15, %rcx
-	adcq	-88(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	$0, %rdi
-	addq	%r10, %rdx
-	adcq	%rbp, %r13
-	adcq	%r14, %rcx
-	adcq	%r9, %rsi
-	adcq	%r8, %rdi
-	adcq	$0, %rax
-	movq	%r13, %rdx
-	subq	%r11, %rdx
-	movq	%rcx, %rbp
-	sbbq	%rbx, %rbp
-	movq	%rsi, %r8
-	sbbq	-80(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%rdi, %rbx
-	sbbq	-64(%rsp), %rbx         ## 8-byte Folded Reload
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rdi, %rbx
-	testb	%al, %al
-	cmovneq	%r13, %rdx
-	movq	-8(%rsp), %rax          ## 8-byte Reload
-	movq	%rdx, (%rax)
-	cmovneq	%rcx, %rbp
-	movq	%rbp, 8(%rax)
-	cmovneq	%rsi, %r8
-	movq	%r8, 16(%rax)
-	movq	%rbx, 24(%rax)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montNF4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_montNF4Lbmi2:                   ## @mcl_fp_montNF4Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	(%rsi), %rdi
-	movq	%rdi, -56(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rbp
-	movq	%rbp, -64(%rsp)         ## 8-byte Spill
-	movq	(%rdx), %rax
-	movq	%rdx, %r15
-	movq	%r15, -24(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rdx
-	mulxq	%rax, %rbp, %r9
-	movq	%rdi, %rdx
-	mulxq	%rax, %r12, %rbx
-	movq	16(%rsi), %rdx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	addq	%rbp, %rbx
-	mulxq	%rax, %r14, %rbp
-	adcq	%r9, %r14
-	movq	24(%rsi), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r8, %rdi
-	adcq	%rbp, %r8
-	adcq	$0, %rdi
-	movq	-8(%rcx), %r13
-	movq	(%rcx), %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	%r12, %rdx
-	imulq	%r13, %rdx
-	mulxq	%rax, %rax, %r11
-	addq	%r12, %rax
-	movq	8(%rcx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rbp, %r10
-	adcq	%rbx, %rbp
-	movq	16(%rcx), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rsi, %rbx
-	adcq	%r14, %rsi
-	movq	24(%rcx), %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rcx, %rdx
-	adcq	%r8, %rcx
-	adcq	$0, %rdi
-	addq	%r11, %rbp
-	adcq	%r10, %rsi
-	adcq	%rbx, %rcx
-	adcq	%rdx, %rdi
-	movq	8(%r15), %rdx
-	movq	-64(%rsp), %r12         ## 8-byte Reload
-	mulxq	%r12, %rbx, %r9
-	movq	-56(%rsp), %r15         ## 8-byte Reload
-	mulxq	%r15, %r10, %r11
-	addq	%rbx, %r11
-	mulxq	-40(%rsp), %rax, %r8    ## 8-byte Folded Reload
-	adcq	%r9, %rax
-	mulxq	-80(%rsp), %r9, %rbx    ## 8-byte Folded Reload
-	adcq	%r8, %r9
-	adcq	$0, %rbx
-	addq	%rbp, %r10
-	adcq	%rsi, %r11
-	adcq	%rcx, %rax
-	adcq	%rdi, %r9
-	adcq	$0, %rbx
-	movq	%r10, %rdx
-	imulq	%r13, %rdx
-	movq	-48(%rsp), %r14         ## 8-byte Reload
-	mulxq	%r14, %rcx, %r8
-	addq	%r10, %rcx
-	mulxq	-16(%rsp), %r10, %rdi   ## 8-byte Folded Reload
-	adcq	%r11, %r10
-	mulxq	-32(%rsp), %rcx, %rsi   ## 8-byte Folded Reload
-	adcq	%rax, %rcx
-	mulxq	-72(%rsp), %rax, %rdx   ## 8-byte Folded Reload
-	adcq	%r9, %rax
-	adcq	$0, %rbx
-	addq	%r8, %r10
-	adcq	%rdi, %rcx
-	adcq	%rsi, %rax
-	adcq	%rdx, %rbx
-	movq	-24(%rsp), %rdx         ## 8-byte Reload
-	movq	16(%rdx), %rdx
-	mulxq	%r12, %rsi, %r8
-	mulxq	%r15, %r11, %rbp
-	addq	%rsi, %rbp
-	movq	-40(%rsp), %r12         ## 8-byte Reload
-	mulxq	%r12, %rdi, %r9
-	adcq	%r8, %rdi
-	mulxq	-80(%rsp), %r8, %rsi    ## 8-byte Folded Reload
-	adcq	%r9, %r8
-	adcq	$0, %rsi
-	addq	%r10, %r11
-	adcq	%rcx, %rbp
-	adcq	%rax, %rdi
-	adcq	%rbx, %r8
-	adcq	$0, %rsi
-	movq	%r11, %rdx
-	imulq	%r13, %rdx
-	mulxq	%r14, %rax, %r10
-	addq	%r11, %rax
-	movq	-16(%rsp), %r14         ## 8-byte Reload
-	mulxq	%r14, %r9, %rbx
-	adcq	%rbp, %r9
-	movq	-32(%rsp), %r15         ## 8-byte Reload
-	mulxq	%r15, %rax, %rbp
-	adcq	%rdi, %rax
-	mulxq	-72(%rsp), %rcx, %rdx   ## 8-byte Folded Reload
-	adcq	%r8, %rcx
-	adcq	$0, %rsi
-	addq	%r10, %r9
-	adcq	%rbx, %rax
-	adcq	%rbp, %rcx
-	adcq	%rdx, %rsi
-	movq	-24(%rsp), %rdx         ## 8-byte Reload
-	movq	24(%rdx), %rdx
-	mulxq	-64(%rsp), %rbx, %r8    ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %r11, %rbp   ## 8-byte Folded Reload
-	addq	%rbx, %rbp
-	mulxq	%r12, %rdi, %r10
-	adcq	%r8, %rdi
-	mulxq	-80(%rsp), %r8, %rbx    ## 8-byte Folded Reload
-	adcq	%r10, %r8
-	adcq	$0, %rbx
-	addq	%r9, %r11
-	adcq	%rax, %rbp
-	adcq	%rcx, %rdi
-	adcq	%rsi, %r8
-	adcq	$0, %rbx
-	imulq	%r11, %r13
-	movq	%r13, %rdx
-	movq	-48(%rsp), %r12         ## 8-byte Reload
-	mulxq	%r12, %rcx, %r9
-	addq	%r11, %rcx
-	mulxq	%r14, %r11, %r10
-	adcq	%rbp, %r11
-	movq	%r15, %rsi
-	mulxq	%rsi, %rax, %rcx
-	adcq	%rdi, %rax
-	movq	-72(%rsp), %rbp         ## 8-byte Reload
-	mulxq	%rbp, %r15, %rdx
-	adcq	%r8, %r15
-	adcq	$0, %rbx
-	addq	%r9, %r11
-	adcq	%r10, %rax
-	adcq	%rcx, %r15
-	adcq	%rdx, %rbx
-	movq	%r11, %rcx
-	subq	%r12, %rcx
-	movq	%rax, %rdx
-	sbbq	%r14, %rdx
-	movq	%r15, %rdi
-	sbbq	%rsi, %rdi
-	movq	%rbx, %rsi
-	sbbq	%rbp, %rsi
-	cmovsq	%r11, %rcx
-	movq	-8(%rsp), %rbp          ## 8-byte Reload
-	movq	%rcx, (%rbp)
-	cmovsq	%rax, %rdx
-	movq	%rdx, 8(%rbp)
-	cmovsq	%r15, %rdi
-	movq	%rdi, 16(%rbp)
-	cmovsq	%rbx, %rsi
-	movq	%rsi, 24(%rbp)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montRed4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_montRed4Lbmi2:                  ## @mcl_fp_montRed4Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	-8(%rcx), %r13
-	movq	(%rcx), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %r10
-	movq	%r10, %rdx
-	imulq	%r13, %rdx
-	movq	24(%rcx), %rdi
-	mulxq	%rdi, %r9, %r15
-	movq	%rdi, %r14
-	movq	%r14, -40(%rsp)         ## 8-byte Spill
-	movq	16(%rcx), %rdi
-	movq	%rdi, -48(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %rdi, %rbx
-	movq	8(%rcx), %rcx
-	movq	%rcx, -56(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %rcx, %r8
-	mulxq	%rax, %rdx, %rbp
-	addq	%rcx, %rbp
-	adcq	%rdi, %r8
-	adcq	%r9, %rbx
-	adcq	$0, %r15
-	movq	56(%rsi), %r11
-	movq	48(%rsi), %rcx
-	addq	%r10, %rdx
-	movq	40(%rsi), %r12
-	adcq	8(%rsi), %rbp
-	adcq	16(%rsi), %r8
-	adcq	24(%rsi), %rbx
-	adcq	32(%rsi), %r15
-	adcq	$0, %r12
-	adcq	$0, %rcx
-	movq	%rcx, -64(%rsp)         ## 8-byte Spill
-	adcq	$0, %r11
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rbp, %rdx
-	imulq	%r13, %rdx
-	mulxq	%r14, %rax, %r9
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulxq	-48(%rsp), %r14, %rdi   ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %r10, %rcx   ## 8-byte Folded Reload
-	mulxq	-32(%rsp), %rdx, %rax   ## 8-byte Folded Reload
-	addq	%r10, %rax
-	adcq	%r14, %rcx
-	adcq	-72(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	%rbp, %rdx
-	adcq	%r8, %rax
-	adcq	%rbx, %rcx
-	adcq	%r15, %rdi
-	adcq	%r12, %r9
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %r11
-	movq	%r11, -72(%rsp)         ## 8-byte Spill
-	adcq	$0, %rsi
-	movq	%rax, %rdx
-	imulq	%r13, %rdx
-	movq	-40(%rsp), %r15         ## 8-byte Reload
-	mulxq	%r15, %rbp, %r8
-	movq	%rbp, -16(%rsp)         ## 8-byte Spill
-	movq	-48(%rsp), %r11         ## 8-byte Reload
-	mulxq	%r11, %rbx, %r10
-	movq	%rbx, -24(%rsp)         ## 8-byte Spill
-	mulxq	-56(%rsp), %r12, %rbp   ## 8-byte Folded Reload
-	movq	-32(%rsp), %r14         ## 8-byte Reload
-	mulxq	%r14, %rdx, %rbx
-	addq	%r12, %rbx
-	adcq	-24(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-16(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%rax, %rdx
-	adcq	%rcx, %rbx
-	adcq	%rdi, %rbp
-	adcq	%r9, %r10
-	adcq	-64(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	$0, -72(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %rsi
-	imulq	%rbx, %r13
-	movq	%r13, %rdx
-	mulxq	%r15, %rax, %rdi
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%r13, %rdx
-	mulxq	%r11, %r9, %rax
-	movq	-56(%rsp), %r11         ## 8-byte Reload
-	mulxq	%r11, %r12, %rcx
-	mulxq	%r14, %r15, %r13
-	addq	%r12, %r13
-	adcq	%r9, %rcx
-	adcq	-64(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	$0, %rdi
-	addq	%rbx, %r15
-	adcq	%rbp, %r13
-	adcq	%r10, %rcx
-	adcq	%r8, %rax
-	adcq	-72(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	$0, %rsi
-	movq	%r13, %rdx
-	subq	%r14, %rdx
-	movq	%rcx, %rbp
-	sbbq	%r11, %rbp
-	movq	%rax, %r8
-	sbbq	-48(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%rdi, %rbx
-	sbbq	-40(%rsp), %rbx         ## 8-byte Folded Reload
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rdi, %rbx
-	testb	%sil, %sil
-	cmovneq	%r13, %rdx
-	movq	-8(%rsp), %rsi          ## 8-byte Reload
-	movq	%rdx, (%rsi)
-	cmovneq	%rcx, %rbp
-	movq	%rbp, 8(%rsi)
-	cmovneq	%rax, %r8
-	movq	%r8, 16(%rsi)
-	movq	%rbx, 24(%rsi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_addPre4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addPre4Lbmi2:                   ## @mcl_fp_addPre4Lbmi2
-## BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rdx), %rax
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rax
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	adcq	%r8, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_subPre4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subPre4Lbmi2:                   ## @mcl_fp_subPre4Lbmi2
-## BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
-	movq	%rcx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	sbbq	%r8, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_shr1_4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_shr1_4Lbmi2:                    ## @mcl_fp_shr1_4Lbmi2
-## BB#0:
-	movq	24(%rsi), %rax
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rdx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rdx
-	movq	%rdx, (%rdi)
-	shrdq	$1, %rcx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rax, %rcx
-	movq	%rcx, 16(%rdi)
-	shrq	%rax
-	movq	%rax, 24(%rdi)
-	retq
-
-	.globl	_mcl_fp_add4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_add4Lbmi2:                      ## @mcl_fp_add4Lbmi2
-## BB#0:
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %r8
-	movq	16(%rdx), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r9
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	adcq	%r10, %r8
-	movq	%r8, 24(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r9
-	sbbq	24(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB59_2
-## BB#1:                                ## %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	movq	%r8, 24(%rdi)
-LBB59_2:                                ## %carry
-	retq
-
-	.globl	_mcl_fp_addNF4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addNF4Lbmi2:                    ## @mcl_fp_addNF4Lbmi2
-## BB#0:
-	pushq	%rbx
-	movq	24(%rdx), %r8
-	movq	16(%rdx), %r9
-	movq	(%rdx), %r11
-	movq	8(%rdx), %r10
-	addq	(%rsi), %r11
-	adcq	8(%rsi), %r10
-	adcq	16(%rsi), %r9
-	adcq	24(%rsi), %r8
-	movq	%r11, %rsi
-	subq	(%rcx), %rsi
-	movq	%r10, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r9, %rax
-	sbbq	16(%rcx), %rax
-	movq	%r8, %rbx
-	sbbq	24(%rcx), %rbx
-	testq	%rbx, %rbx
-	cmovsq	%r11, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r10, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r9, %rax
-	movq	%rax, 16(%rdi)
-	cmovsq	%r8, %rbx
-	movq	%rbx, 24(%rdi)
-	popq	%rbx
-	retq
-
-	.globl	_mcl_fp_sub4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_sub4Lbmi2:                      ## @mcl_fp_sub4Lbmi2
-## BB#0:
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r11
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r9
-	movq	%rax, (%rdi)
-	movq	%r11, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	sbbq	%r10, %r8
-	movq	%r8, 24(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB61_2
-## BB#1:                                ## %nocarry
-	retq
-LBB61_2:                                ## %carry
-	movq	24(%rcx), %r10
-	movq	8(%rcx), %rsi
-	movq	16(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 24(%rdi)
-	retq
-
-	.globl	_mcl_fp_subNF4Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subNF4Lbmi2:                    ## @mcl_fp_subNF4Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movdqu	(%rdx), %xmm0
-	movdqu	16(%rdx), %xmm1
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r8
-	movdqu	(%rsi), %xmm2
-	movdqu	16(%rsi), %xmm3
-	pshufd	$78, %xmm3, %xmm4       ## xmm4 = xmm3[2,3,0,1]
-	movd	%xmm4, %r15
-	movd	%xmm1, %r9
-	movd	%xmm3, %r11
-	pshufd	$78, %xmm0, %xmm1       ## xmm1 = xmm0[2,3,0,1]
-	movd	%xmm1, %r10
-	pshufd	$78, %xmm2, %xmm1       ## xmm1 = xmm2[2,3,0,1]
-	movd	%xmm1, %r14
-	movd	%xmm0, %rdx
-	movd	%xmm2, %r12
-	subq	%rdx, %r12
-	sbbq	%r10, %r14
-	sbbq	%r9, %r11
-	sbbq	%r8, %r15
-	movq	%r15, %rdx
-	sarq	$63, %rdx
-	movq	24(%rcx), %rsi
-	andq	%rdx, %rsi
-	movq	16(%rcx), %rax
-	andq	%rdx, %rax
-	movq	8(%rcx), %rbx
-	andq	%rdx, %rbx
-	andq	(%rcx), %rdx
-	addq	%r12, %rdx
-	movq	%rdx, (%rdi)
-	adcq	%r14, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r11, %rax
-	movq	%rax, 16(%rdi)
-	adcq	%r15, %rsi
-	movq	%rsi, 24(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fpDbl_add4Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_add4Lbmi2:                   ## @mcl_fpDbl_add4Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r9
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r10
-	movq	48(%rsi), %r12
-	movq	40(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	24(%rdx), %r15
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %rsi
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r15, %rbp
-	movq	%rbp, 24(%rdi)
-	adcq	%r14, %rsi
-	adcq	%r11, %r13
-	adcq	%r10, %r12
-	adcq	%r9, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rsi, %rdx
-	subq	(%rcx), %rdx
-	movq	%r13, %rbp
-	sbbq	8(%rcx), %rbp
-	movq	%r12, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r8, %r9
-	sbbq	24(%rcx), %r9
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rsi, %rdx
-	movq	%rdx, 32(%rdi)
-	testb	%al, %al
-	cmovneq	%r13, %rbp
-	movq	%rbp, 40(%rdi)
-	cmovneq	%r12, %rbx
-	movq	%rbx, 48(%rdi)
-	cmovneq	%r8, %r9
-	movq	%r9, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sub4Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sub4Lbmi2:                   ## @mcl_fpDbl_sub4Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r9
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	(%rsi), %rbx
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	movq	%rbx, (%rdi)
-	movq	8(%rsi), %rbx
-	sbbq	8(%rdx), %rbx
-	movq	%rbx, 8(%rdi)
-	movq	16(%rsi), %rbx
-	sbbq	16(%rdx), %rbx
-	movq	%rbx, 16(%rdi)
-	movq	24(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	40(%rdx), %r11
-	movq	32(%rdx), %rdx
-	movq	%rbx, 24(%rdi)
-	movq	32(%rsi), %r12
-	sbbq	%rdx, %r12
-	movq	48(%rsi), %r14
-	movq	40(%rsi), %r15
-	sbbq	%r11, %r15
-	sbbq	%r10, %r14
-	sbbq	%r9, %r8
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	movq	16(%rcx), %rdx
-	cmoveq	%rax, %rdx
-	movq	24(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	cmovneq	8(%rcx), %rax
-	addq	%r12, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 40(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 48(%rdi)
-	adcq	%r8, %rbx
-	movq	%rbx, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_mulUnitPre5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre5Lbmi2:               ## @mcl_fp_mulUnitPre5Lbmi2
-## BB#0:
-	pushq	%r14
-	pushq	%rbx
-	mulxq	32(%rsi), %r8, %r11
-	mulxq	24(%rsi), %r9, %rax
-	mulxq	16(%rsi), %r10, %rcx
-	mulxq	8(%rsi), %r14, %rbx
-	mulxq	(%rsi), %rdx, %rsi
-	movq	%rdx, (%rdi)
-	addq	%r14, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r9, %rcx
-	movq	%rcx, 24(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 32(%rdi)
-	adcq	$0, %r11
-	movq	%r11, 40(%rdi)
-	popq	%rbx
-	popq	%r14
-	retq
-
-	.globl	_mcl_fpDbl_mulPre5Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre5Lbmi2:                ## @mcl_fpDbl_mulPre5Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	movq	%rdi, -40(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %r11
-	movq	8(%rsi), %r10
-	movq	(%rdx), %rcx
-	movq	%r10, %rdx
-	mulxq	%rcx, %rax, %r14
-	movq	%r11, %rdx
-	mulxq	%rcx, %rdx, %rbx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	24(%rsi), %rbp
-	movq	%rbp, -48(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %r15
-	addq	%rax, %rbx
-	movq	%r15, %rdx
-	mulxq	%rcx, %rax, %r13
-	adcq	%r14, %rax
-	movq	%rbp, %rdx
-	mulxq	%rcx, %r8, %r12
-	adcq	%r13, %r8
-	movq	32(%rsi), %r14
-	movq	%r14, %rdx
-	mulxq	%rcx, %r9, %r13
-	adcq	%r12, %r9
-	movq	-56(%rsp), %rcx         ## 8-byte Reload
-	movq	%rcx, (%rdi)
-	adcq	$0, %r13
-	movq	-24(%rsp), %rdi         ## 8-byte Reload
-	movq	8(%rdi), %rbp
-	movq	%r11, %rdx
-	mulxq	%rbp, %r12, %r11
-	addq	%rbx, %r12
-	movq	%r10, %rdx
-	mulxq	%rbp, %rbx, %rcx
-	movq	%rcx, -56(%rsp)         ## 8-byte Spill
-	adcq	%rax, %rbx
-	movq	%r15, %rdx
-	mulxq	%rbp, %rcx, %r10
-	adcq	%r8, %rcx
-	movq	-48(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rbp, %rax, %r8
-	adcq	%r9, %rax
-	movq	%r14, %rdx
-	mulxq	%rbp, %r15, %rdx
-	adcq	%r13, %r15
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	addq	%r11, %rbx
-	movq	-40(%rsp), %rbp         ## 8-byte Reload
-	movq	%r12, 8(%rbp)
-	adcq	-56(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	%r10, %rax
-	adcq	%r8, %r15
-	adcq	%rdx, %r14
-	movq	(%rsi), %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %r8
-	movq	%r8, -48(%rsp)          ## 8-byte Spill
-	movq	16(%rdi), %rbp
-	mulxq	%rbp, %r12, %rdx
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	addq	%rbx, %r12
-	movq	%r8, %rdx
-	mulxq	%rbp, %rbx, %rdx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	adcq	%rcx, %rbx
-	movq	16(%rsi), %r11
-	movq	%r11, %rdx
-	mulxq	%rbp, %rcx, %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	adcq	%rax, %rcx
-	movq	24(%rsi), %r13
-	movq	%r13, %rdx
-	mulxq	%rbp, %r9, %r10
-	adcq	%r15, %r9
-	movq	32(%rsi), %r15
-	movq	%r15, %rdx
-	mulxq	%rbp, %r8, %rdx
-	adcq	%r14, %r8
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	addq	-8(%rsp), %rbx          ## 8-byte Folded Reload
-	adcq	-16(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	%r10, %r8
-	adcq	%rdx, %r14
-	movq	-40(%rsp), %r10         ## 8-byte Reload
-	movq	%r12, 16(%r10)
-	movq	%rdi, %rbp
-	movq	24(%rbp), %rax
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r12, %rdi
-	addq	%rbx, %r12
-	movq	-48(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %rbx, %rdx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	adcq	%rcx, %rbx
-	movq	%r11, %rdx
-	mulxq	%rax, %rcx, %r11
-	adcq	%r9, %rcx
-	movq	%r13, %rdx
-	mulxq	%rax, %r13, %r9
-	adcq	%r8, %r13
-	movq	%r15, %rdx
-	mulxq	%rax, %r8, %rdx
-	adcq	%r14, %r8
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	addq	%rdi, %rbx
-	movq	%r12, 24(%r10)
-	movq	%r10, %rdi
-	adcq	-48(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	%r11, %r13
-	adcq	%r9, %r8
-	adcq	%rdx, %r14
-	movq	32(%rbp), %rdx
-	mulxq	8(%rsi), %rax, %r9
-	mulxq	(%rsi), %rbp, %r10
-	addq	%rbx, %rbp
-	adcq	%rcx, %rax
-	mulxq	16(%rsi), %rbx, %r11
-	adcq	%r13, %rbx
-	movq	%rbp, 32(%rdi)
-	mulxq	32(%rsi), %rcx, %r15
-	mulxq	24(%rsi), %rsi, %rdx
-	adcq	%r8, %rsi
-	adcq	%r14, %rcx
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	addq	%r10, %rax
-	movq	%rax, 40(%rdi)
-	adcq	%r9, %rbx
-	movq	%rbx, 48(%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 56(%rdi)
-	adcq	%rdx, %rcx
-	movq	%rcx, 64(%rdi)
-	adcq	%r15, %rbp
-	movq	%rbp, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sqrPre5Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre5Lbmi2:                ## @mcl_fpDbl_sqrPre5Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	16(%rsi), %r11
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rcx
-	movq	%r11, %rdx
-	mulxq	%rax, %rbx, %r15
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r13
-	movq	%rcx, %rdx
-	mulxq	%rax, %r12, %rbp
-	movq	%rbp, -16(%rsp)         ## 8-byte Spill
-	movq	%rax, %rdx
-	mulxq	%rax, %rdx, %r14
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	addq	%r12, %r14
-	adcq	%rbp, %rbx
-	movq	%r13, %rdx
-	mulxq	%rax, %r8, %r10
-	adcq	%r15, %r8
-	movq	%r9, %rdx
-	movq	%r9, -8(%rsp)           ## 8-byte Spill
-	mulxq	%rax, %rbp, %r15
-	adcq	%r10, %rbp
-	movq	-24(%rsp), %rax         ## 8-byte Reload
-	movq	%rax, (%rdi)
-	adcq	$0, %r15
-	addq	%r12, %r14
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rax, %rdx
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	adcq	%rbx, %rax
-	movq	%r11, %rdx
-	mulxq	%rcx, %rbx, %r10
-	adcq	%r8, %rbx
-	movq	%r13, %rdx
-	mulxq	%rcx, %r13, %r8
-	adcq	%rbp, %r13
-	movq	%r9, %rdx
-	mulxq	%rcx, %r12, %rcx
-	adcq	%r15, %r12
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	addq	-16(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%r14, 8(%rdi)
-	adcq	-24(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	%r10, %r13
-	adcq	%r8, %r12
-	adcq	%rcx, %r15
-	movq	(%rsi), %r9
-	movq	8(%rsi), %r10
-	movq	%r9, %rdx
-	mulxq	%r11, %rbp, %rcx
-	movq	%rcx, -16(%rsp)         ## 8-byte Spill
-	addq	%rax, %rbp
-	movq	%r10, %rdx
-	mulxq	%r11, %rax, %r8
-	adcq	%rbx, %rax
-	movq	%r11, %rdx
-	mulxq	%r11, %r14, %rcx
-	movq	%rcx, -24(%rsp)         ## 8-byte Spill
-	adcq	%r13, %r14
-	movq	24(%rsi), %rcx
-	movq	%rcx, %rdx
-	mulxq	%r11, %rbx, %r13
-	adcq	%r12, %rbx
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	mulxq	%r11, %r12, %rdx
-	adcq	%r15, %r12
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	addq	-16(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	%r8, %r14
-	movq	%rbp, 16(%rdi)
-	adcq	-24(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	%r13, %r12
-	adcq	%rdx, %r15
-	movq	%r10, %rdx
-	mulxq	%rcx, %r10, %rdx
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	movq	%r9, %rdx
-	mulxq	%rcx, %r13, %rdx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	addq	%rax, %r13
-	movq	16(%rsi), %r8
-	movq	32(%rsi), %rax
-	adcq	%r14, %r10
-	movq	%r8, %rdx
-	mulxq	%rcx, %r9, %r14
-	adcq	%rbx, %r9
-	movq	%rcx, %rdx
-	mulxq	%rcx, %r11, %rbp
-	adcq	%r12, %r11
-	movq	%rax, %rdx
-	mulxq	%rcx, %r12, %rdx
-	adcq	%r15, %r12
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	-16(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%r13, 24(%rdi)
-	adcq	-8(%rsp), %r9           ## 8-byte Folded Reload
-	adcq	%r14, %r11
-	adcq	%rbp, %r12
-	adcq	%rdx, %rbx
-	movq	%rax, %rdx
-	mulxq	24(%rsi), %rbp, %r14
-	mulxq	(%rsi), %rdx, %r15
-	addq	%r10, %rdx
-	movq	%rdx, 32(%rdi)
-	movq	%rax, %rdx
-	mulxq	8(%rsi), %rsi, %r10
-	adcq	%r9, %rsi
-	movq	%r8, %rdx
-	mulxq	%rax, %rcx, %r8
-	adcq	%r11, %rcx
-	adcq	%r12, %rbp
-	movq	%rax, %rdx
-	mulxq	%rax, %rdx, %rax
-	adcq	%rbx, %rdx
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	%r15, %rsi
-	movq	%rsi, 40(%rdi)
-	adcq	%r10, %rcx
-	movq	%rcx, 48(%rdi)
-	adcq	%r8, %rbp
-	movq	%rbp, 56(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 64(%rdi)
-	adcq	%rax, %rbx
-	movq	%rbx, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mont5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mont5Lbmi2:                     ## @mcl_fp_mont5Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	32(%rsi), %rdi
-	movq	%rdi, -104(%rsp)        ## 8-byte Spill
-	movq	(%rdx), %rax
-	movq	%rdi, %rdx
-	mulxq	%rax, %r10, %rbx
-	movq	24(%rsi), %rdx
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r12, %r14
-	movq	16(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r13, %r11
-	movq	(%rsi), %rbp
-	movq	%rbp, -40(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rdi, %r9
-	movq	%rbp, %rdx
-	mulxq	%rax, %r15, %r8
-	addq	%rdi, %r8
-	adcq	%r13, %r9
-	adcq	%r12, %r11
-	adcq	%r10, %r14
-	adcq	$0, %rbx
-	movq	%rbx, -112(%rsp)        ## 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	%r15, %rdx
-	imulq	%rax, %rdx
-	movq	32(%rcx), %rax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rax, %r12
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	24(%rcx), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r13, %r10
-	movq	8(%rcx), %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rdi, %rbp
-	movq	(%rcx), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rsi, %rbx
-	addq	%rdi, %rbx
-	movq	16(%rcx), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rdi, %rcx
-	adcq	%rbp, %rdi
-	adcq	%r13, %rcx
-	adcq	-120(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	$0, %r12
-	addq	%r15, %rsi
-	adcq	%r8, %rbx
-	adcq	%r9, %rdi
-	adcq	%r11, %rcx
-	adcq	%r14, %r10
-	adcq	-112(%rsp), %r12        ## 8-byte Folded Reload
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	8(%rax), %rdx
-	mulxq	-104(%rsp), %rax, %r14  ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	mulxq	-24(%rsp), %rax, %r15   ## 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	mulxq	-32(%rsp), %r13, %r9    ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %r8, %rsi    ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %r11, %rax   ## 8-byte Folded Reload
-	addq	%r8, %rax
-	adcq	%r13, %rsi
-	adcq	-120(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	%rbx, %r11
-	adcq	%rdi, %rax
-	adcq	%rcx, %rsi
-	adcq	%r10, %r9
-	adcq	%r12, %r15
-	adcq	%rbp, %r14
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	movq	%r11, %rdx
-	imulq	-16(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %rcx, %r10   ## 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        ## 8-byte Spill
-	mulxq	-64(%rsp), %rcx, %rdi   ## 8-byte Folded Reload
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	mulxq	-88(%rsp), %r13, %rcx   ## 8-byte Folded Reload
-	mulxq	-72(%rsp), %r8, %rbx    ## 8-byte Folded Reload
-	mulxq	-80(%rsp), %rdx, %rbp   ## 8-byte Folded Reload
-	addq	%r8, %rbp
-	adcq	%r13, %rbx
-	adcq	-120(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	$0, %r10
-	addq	%r11, %rdx
-	adcq	%rax, %rbp
-	adcq	%rsi, %rbx
-	adcq	%r9, %rcx
-	adcq	%r15, %rdi
-	adcq	%r14, %r10
-	adcq	$0, %r12
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rdx
-	mulxq	-104(%rsp), %rax, %r15  ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	mulxq	-24(%rsp), %rax, %r11   ## 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	mulxq	-32(%rsp), %r13, %r9    ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %rsi, %r8    ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %r14, %rax   ## 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%r13, %r8
-	adcq	-120(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	%rbp, %r14
-	adcq	%rbx, %rax
-	adcq	%rcx, %r8
-	adcq	%rdi, %r9
-	adcq	%r10, %r11
-	adcq	%r12, %r15
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	movq	%r14, %rdx
-	imulq	-16(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %rcx, %r12   ## 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        ## 8-byte Spill
-	mulxq	-64(%rsp), %rcx, %r10   ## 8-byte Folded Reload
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	mulxq	-88(%rsp), %rdi, %rsi   ## 8-byte Folded Reload
-	mulxq	-72(%rsp), %rcx, %rbx   ## 8-byte Folded Reload
-	mulxq	-80(%rsp), %rdx, %rbp   ## 8-byte Folded Reload
-	addq	%rcx, %rbp
-	adcq	%rdi, %rbx
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	$0, %r12
-	addq	%r14, %rdx
-	adcq	%rax, %rbp
-	adcq	%r8, %rbx
-	adcq	%r9, %rsi
-	adcq	%r11, %r10
-	adcq	%r15, %r12
-	adcq	$0, %r13
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rdx
-	mulxq	-104(%rsp), %rcx, %rax  ## 8-byte Folded Reload
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	mulxq	-24(%rsp), %r11, %r14   ## 8-byte Folded Reload
-	mulxq	-32(%rsp), %r8, %r9     ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %rax, %rdi   ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %r15, %rcx   ## 8-byte Folded Reload
-	addq	%rax, %rcx
-	adcq	%r8, %rdi
-	adcq	%r11, %r9
-	adcq	-120(%rsp), %r14        ## 8-byte Folded Reload
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rbp, %r15
-	adcq	%rbx, %rcx
-	adcq	%rsi, %rdi
-	adcq	%r10, %r9
-	adcq	%r12, %r14
-	adcq	%r13, %rax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	movq	%r15, %rdx
-	imulq	-16(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %rax, %rbp   ## 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	mulxq	-64(%rsp), %r13, %r10   ## 8-byte Folded Reload
-	mulxq	-88(%rsp), %rbx, %r8    ## 8-byte Folded Reload
-	mulxq	-72(%rsp), %rsi, %r11   ## 8-byte Folded Reload
-	mulxq	-80(%rsp), %rdx, %rax   ## 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%rbx, %r11
-	adcq	%r13, %r8
-	adcq	-120(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	$0, %rbp
-	addq	%r15, %rdx
-	adcq	%rcx, %rax
-	adcq	%rdi, %r11
-	adcq	%r9, %r8
-	adcq	%r14, %r10
-	adcq	-112(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	$0, %r12
-	movq	-96(%rsp), %rcx         ## 8-byte Reload
-	movq	32(%rcx), %rdx
-	mulxq	-104(%rsp), %rcx, %r14  ## 8-byte Folded Reload
-	movq	%rcx, -96(%rsp)         ## 8-byte Spill
-	mulxq	-24(%rsp), %rcx, %rbx   ## 8-byte Folded Reload
-	movq	%rcx, -104(%rsp)        ## 8-byte Spill
-	mulxq	-32(%rsp), %rsi, %r15   ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %rcx, %r9    ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %r13, %rdi   ## 8-byte Folded Reload
-	addq	%rcx, %rdi
-	adcq	%rsi, %r9
-	adcq	-104(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	%rax, %r13
-	adcq	%r11, %rdi
-	adcq	%r8, %r9
-	adcq	%r10, %r15
-	adcq	%rbp, %rbx
-	adcq	%r12, %r14
-	sbbq	%rax, %rax
-	movq	-16(%rsp), %rdx         ## 8-byte Reload
-	imulq	%r13, %rdx
-	mulxq	-80(%rsp), %r10, %rcx   ## 8-byte Folded Reload
-	mulxq	-72(%rsp), %r8, %rsi    ## 8-byte Folded Reload
-	addq	%rcx, %r8
-	mulxq	-88(%rsp), %rbp, %r11   ## 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	mulxq	-64(%rsp), %rcx, %r12   ## 8-byte Folded Reload
-	adcq	%r11, %rcx
-	mulxq	-56(%rsp), %rsi, %r11   ## 8-byte Folded Reload
-	adcq	%r12, %rsi
-	adcq	$0, %r11
-	andl	$1, %eax
-	addq	%r13, %r10
-	adcq	%rdi, %r8
-	adcq	%r9, %rbp
-	adcq	%r15, %rcx
-	adcq	%rbx, %rsi
-	adcq	%r14, %r11
-	adcq	$0, %rax
-	movq	%r8, %rdi
-	subq	-80(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rbp, %rbx
-	sbbq	-72(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rcx, %r9
-	sbbq	-88(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%rsi, %rdx
-	sbbq	-64(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%r11, %r10
-	sbbq	-56(%rsp), %r10         ## 8-byte Folded Reload
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rsi, %rdx
-	testb	%al, %al
-	cmovneq	%r8, %rdi
-	movq	-8(%rsp), %rax          ## 8-byte Reload
-	movq	%rdi, (%rax)
-	cmovneq	%rbp, %rbx
-	movq	%rbx, 8(%rax)
-	cmovneq	%rcx, %r9
-	movq	%r9, 16(%rax)
-	movq	%rdx, 24(%rax)
-	cmovneq	%r11, %r10
-	movq	%r10, 32(%rax)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montNF5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_montNF5Lbmi2:                   ## @mcl_fp_montNF5Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	(%rsi), %r13
-	movq	8(%rsi), %rbp
-	movq	%rbp, -104(%rsp)        ## 8-byte Spill
-	movq	(%rdx), %rax
-	movq	%rbp, %rdx
-	mulxq	%rax, %rbp, %r9
-	movq	%r13, %rdx
-	movq	%r13, -24(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r8, %r10
-	movq	16(%rsi), %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	addq	%rbp, %r10
-	mulxq	%rax, %rbp, %rbx
-	adcq	%r9, %rbp
-	movq	24(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r15, %r9
-	adcq	%rbx, %r15
-	movq	32(%rsi), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rax, %r11
-	adcq	%r9, %rax
-	adcq	$0, %r11
-	movq	-8(%rcx), %rsi
-	movq	%rsi, -32(%rsp)         ## 8-byte Spill
-	movq	%r8, %rdx
-	imulq	%rsi, %rdx
-	movq	(%rcx), %rsi
-	movq	%rsi, -48(%rsp)         ## 8-byte Spill
-	mulxq	%rsi, %rbx, %r14
-	addq	%r8, %rbx
-	movq	8(%rcx), %rsi
-	movq	%rsi, -40(%rsp)         ## 8-byte Spill
-	mulxq	%rsi, %rbx, %r12
-	adcq	%r10, %rbx
-	movq	16(%rcx), %rsi
-	movq	%rsi, -16(%rsp)         ## 8-byte Spill
-	mulxq	%rsi, %r10, %rdi
-	adcq	%rbp, %r10
-	movq	24(%rcx), %rsi
-	movq	%rsi, -88(%rsp)         ## 8-byte Spill
-	mulxq	%rsi, %r9, %rbp
-	adcq	%r15, %r9
-	movq	32(%rcx), %rcx
-	movq	%rcx, -56(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %r8, %rcx
-	adcq	%rax, %r8
-	adcq	$0, %r11
-	addq	%r14, %rbx
-	adcq	%r12, %r10
-	adcq	%rdi, %r9
-	adcq	%rbp, %r8
-	adcq	%rcx, %r11
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	8(%rax), %rdx
-	mulxq	-104(%rsp), %rcx, %rsi  ## 8-byte Folded Reload
-	mulxq	%r13, %r14, %rax
-	addq	%rcx, %rax
-	mulxq	-64(%rsp), %rcx, %rdi   ## 8-byte Folded Reload
-	adcq	%rsi, %rcx
-	mulxq	-72(%rsp), %rsi, %r15   ## 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-80(%rsp), %rdi, %rbp   ## 8-byte Folded Reload
-	adcq	%r15, %rdi
-	adcq	$0, %rbp
-	addq	%rbx, %r14
-	adcq	%r10, %rax
-	adcq	%r9, %rcx
-	adcq	%r8, %rsi
-	adcq	%r11, %rdi
-	adcq	$0, %rbp
-	movq	%r14, %rdx
-	movq	-32(%rsp), %r12         ## 8-byte Reload
-	imulq	%r12, %rdx
-	mulxq	-48(%rsp), %rbx, %r15   ## 8-byte Folded Reload
-	addq	%r14, %rbx
-	movq	-40(%rsp), %r13         ## 8-byte Reload
-	mulxq	%r13, %r8, %rbx
-	adcq	%rax, %r8
-	mulxq	-16(%rsp), %r9, %rax    ## 8-byte Folded Reload
-	adcq	%rcx, %r9
-	mulxq	-88(%rsp), %r10, %rcx   ## 8-byte Folded Reload
-	adcq	%rsi, %r10
-	mulxq	-56(%rsp), %r11, %rdx   ## 8-byte Folded Reload
-	adcq	%rdi, %r11
-	adcq	$0, %rbp
-	addq	%r15, %r8
-	adcq	%rbx, %r9
-	adcq	%rax, %r10
-	adcq	%rcx, %r11
-	adcq	%rdx, %rbp
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rdx
-	mulxq	-104(%rsp), %rcx, %rax  ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %r14, %rsi   ## 8-byte Folded Reload
-	addq	%rcx, %rsi
-	mulxq	-64(%rsp), %rbx, %rcx   ## 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-72(%rsp), %rdi, %r15   ## 8-byte Folded Reload
-	adcq	%rcx, %rdi
-	mulxq	-80(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	adcq	%r15, %rcx
-	adcq	$0, %rax
-	addq	%r8, %r14
-	adcq	%r9, %rsi
-	adcq	%r10, %rbx
-	adcq	%r11, %rdi
-	adcq	%rbp, %rcx
-	adcq	$0, %rax
-	movq	%r14, %rdx
-	imulq	%r12, %rdx
-	movq	-48(%rsp), %r12         ## 8-byte Reload
-	mulxq	%r12, %rbp, %r15
-	addq	%r14, %rbp
-	mulxq	%r13, %r8, %rbp
-	adcq	%rsi, %r8
-	movq	-16(%rsp), %r13         ## 8-byte Reload
-	mulxq	%r13, %r9, %rsi
-	adcq	%rbx, %r9
-	mulxq	-88(%rsp), %r10, %rbx   ## 8-byte Folded Reload
-	adcq	%rdi, %r10
-	mulxq	-56(%rsp), %r11, %rdx   ## 8-byte Folded Reload
-	adcq	%rcx, %r11
-	adcq	$0, %rax
-	addq	%r15, %r8
-	adcq	%rbp, %r9
-	adcq	%rsi, %r10
-	adcq	%rbx, %r11
-	adcq	%rdx, %rax
-	movq	-96(%rsp), %rcx         ## 8-byte Reload
-	movq	24(%rcx), %rdx
-	mulxq	-104(%rsp), %rdi, %rsi  ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %r14, %rcx   ## 8-byte Folded Reload
-	addq	%rdi, %rcx
-	mulxq	-64(%rsp), %rbx, %rdi   ## 8-byte Folded Reload
-	adcq	%rsi, %rbx
-	mulxq	-72(%rsp), %rsi, %r15   ## 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-80(%rsp), %rdi, %rbp   ## 8-byte Folded Reload
-	adcq	%r15, %rdi
-	adcq	$0, %rbp
-	addq	%r8, %r14
-	adcq	%r9, %rcx
-	adcq	%r10, %rbx
-	adcq	%r11, %rsi
-	adcq	%rax, %rdi
-	adcq	$0, %rbp
-	movq	%r14, %rdx
-	imulq	-32(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	%r12, %rax, %r11
-	addq	%r14, %rax
-	mulxq	-40(%rsp), %r8, %r14    ## 8-byte Folded Reload
-	adcq	%rcx, %r8
-	mulxq	%r13, %r9, %rax
-	adcq	%rbx, %r9
-	movq	-88(%rsp), %r12         ## 8-byte Reload
-	mulxq	%r12, %r10, %rbx
-	adcq	%rsi, %r10
-	mulxq	-56(%rsp), %rcx, %rdx   ## 8-byte Folded Reload
-	adcq	%rdi, %rcx
-	adcq	$0, %rbp
-	addq	%r11, %r8
-	adcq	%r14, %r9
-	adcq	%rax, %r10
-	adcq	%rbx, %rcx
-	adcq	%rdx, %rbp
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	32(%rax), %rdx
-	mulxq	-104(%rsp), %rdi, %rbx  ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %r14, %rsi   ## 8-byte Folded Reload
-	addq	%rdi, %rsi
-	mulxq	-64(%rsp), %rdi, %rax   ## 8-byte Folded Reload
-	adcq	%rbx, %rdi
-	mulxq	-72(%rsp), %rbx, %r15   ## 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-80(%rsp), %r11, %rax   ## 8-byte Folded Reload
-	adcq	%r15, %r11
-	adcq	$0, %rax
-	addq	%r8, %r14
-	adcq	%r9, %rsi
-	adcq	%r10, %rdi
-	adcq	%rcx, %rbx
-	adcq	%rbp, %r11
-	adcq	$0, %rax
-	movq	-32(%rsp), %rdx         ## 8-byte Reload
-	imulq	%r14, %rdx
-	movq	-48(%rsp), %r10         ## 8-byte Reload
-	mulxq	%r10, %rcx, %rbp
-	movq	%rbp, -96(%rsp)         ## 8-byte Spill
-	addq	%r14, %rcx
-	movq	-40(%rsp), %r9          ## 8-byte Reload
-	mulxq	%r9, %r14, %rcx
-	movq	%rcx, -104(%rsp)        ## 8-byte Spill
-	adcq	%rsi, %r14
-	movq	%r13, %r8
-	mulxq	%r8, %r15, %r13
-	adcq	%rdi, %r15
-	mulxq	%r12, %rbp, %rcx
-	adcq	%rbx, %rbp
-	movq	-56(%rsp), %rbx         ## 8-byte Reload
-	mulxq	%rbx, %r12, %rdx
-	adcq	%r11, %r12
-	adcq	$0, %rax
-	addq	-96(%rsp), %r14         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	%r13, %rbp
-	adcq	%rcx, %r12
-	adcq	%rdx, %rax
-	movq	%r14, %rcx
-	subq	%r10, %rcx
-	movq	%r15, %rsi
-	sbbq	%r9, %rsi
-	movq	%rbp, %rdi
-	sbbq	%r8, %rdi
-	movq	%r12, %r8
-	sbbq	-88(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%rax, %rdx
-	sbbq	%rbx, %rdx
-	movq	%rdx, %rbx
-	sarq	$63, %rbx
-	cmovsq	%r14, %rcx
-	movq	-8(%rsp), %rbx          ## 8-byte Reload
-	movq	%rcx, (%rbx)
-	cmovsq	%r15, %rsi
-	movq	%rsi, 8(%rbx)
-	cmovsq	%rbp, %rdi
-	movq	%rdi, 16(%rbx)
-	cmovsq	%r12, %r8
-	movq	%r8, 24(%rbx)
-	cmovsq	%rax, %rdx
-	movq	%rdx, 32(%rbx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montRed5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_montRed5Lbmi2:                  ## @mcl_fp_montRed5Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	(%rsi), %r15
-	movq	%r15, %rdx
-	imulq	%rax, %rdx
-	movq	32(%rcx), %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r8, %r14
-	movq	24(%rcx), %r12
-	mulxq	%r12, %r10, %r13
-	movq	%r12, -56(%rsp)         ## 8-byte Spill
-	movq	16(%rcx), %r9
-	mulxq	%r9, %rdi, %rbp
-	movq	%r9, -64(%rsp)          ## 8-byte Spill
-	movq	(%rcx), %rbx
-	movq	%rbx, -40(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rax, %r11
-	mulxq	%rbx, %rdx, %rcx
-	addq	%rax, %rcx
-	adcq	%rdi, %r11
-	adcq	%r10, %rbp
-	adcq	%r8, %r13
-	adcq	$0, %r14
-	addq	%r15, %rdx
-	movq	72(%rsi), %rax
-	movq	64(%rsi), %rdx
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r11
-	adcq	24(%rsi), %rbp
-	adcq	32(%rsi), %r13
-	adcq	40(%rsi), %r14
-	movq	%r14, -112(%rsp)        ## 8-byte Spill
-	movq	56(%rsi), %rdi
-	movq	48(%rsi), %rsi
-	adcq	$0, %rsi
-	movq	%rsi, -32(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -88(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rcx, %rdx
-	movq	-104(%rsp), %r14        ## 8-byte Reload
-	imulq	%r14, %rdx
-	mulxq	-72(%rsp), %rax, %r15   ## 8-byte Folded Reload
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	mulxq	%r12, %rax, %r10
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	mulxq	%r9, %rbx, %r8
-	movq	-80(%rsp), %r12         ## 8-byte Reload
-	mulxq	%r12, %r9, %rdi
-	mulxq	-40(%rsp), %rdx, %rax   ## 8-byte Folded Reload
-	addq	%r9, %rax
-	adcq	%rbx, %rdi
-	adcq	-24(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-16(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	%rcx, %rdx
-	adcq	%r11, %rax
-	adcq	%rbp, %rdi
-	adcq	%r13, %r8
-	adcq	-112(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	$0, -88(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -96(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %rsi
-	movq	%rax, %rdx
-	imulq	%r14, %rdx
-	mulxq	-72(%rsp), %rcx, %r13   ## 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        ## 8-byte Spill
-	mulxq	-56(%rsp), %rcx, %r14   ## 8-byte Folded Reload
-	movq	%rcx, -32(%rsp)         ## 8-byte Spill
-	mulxq	-64(%rsp), %r11, %rbx   ## 8-byte Folded Reload
-	mulxq	%r12, %r9, %rbp
-	mulxq	-40(%rsp), %rdx, %rcx   ## 8-byte Folded Reload
-	addq	%r9, %rcx
-	adcq	%r11, %rbp
-	adcq	-32(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%rax, %rdx
-	adcq	%rdi, %rcx
-	adcq	%r8, %rbp
-	adcq	%r10, %rbx
-	adcq	%r15, %r14
-	adcq	-88(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	$0, -96(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %rsi
-	movq	%rcx, %rdx
-	imulq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	-72(%rsp), %r9          ## 8-byte Reload
-	mulxq	%r9, %rax, %r12
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	mulxq	-56(%rsp), %rax, %r10   ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	mulxq	-64(%rsp), %r8, %r11    ## 8-byte Folded Reload
-	mulxq	-80(%rsp), %rdi, %r15   ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %rdx, %rax   ## 8-byte Folded Reload
-	addq	%rdi, %rax
-	adcq	%r8, %r15
-	adcq	-112(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	$0, %r12
-	addq	%rcx, %rdx
-	adcq	%rbp, %rax
-	adcq	%rbx, %r15
-	adcq	%r14, %r11
-	adcq	%r13, %r10
-	adcq	-96(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %rsi
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	imulq	%rax, %rdx
-	mulxq	%r9, %rdi, %rcx
-	movq	%rdi, -96(%rsp)         ## 8-byte Spill
-	mulxq	-56(%rsp), %rbp, %rdi   ## 8-byte Folded Reload
-	movq	%rbp, -104(%rsp)        ## 8-byte Spill
-	mulxq	-64(%rsp), %r13, %rbp   ## 8-byte Folded Reload
-	movq	-40(%rsp), %r14         ## 8-byte Reload
-	mulxq	%r14, %r8, %r9
-	mulxq	-80(%rsp), %rbx, %rdx   ## 8-byte Folded Reload
-	addq	%r9, %rbx
-	adcq	%r13, %rdx
-	adcq	-104(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	$0, %rcx
-	addq	%rax, %r8
-	adcq	%r15, %rbx
-	adcq	%r11, %rdx
-	adcq	%r10, %rbp
-	adcq	%r12, %rdi
-	adcq	-48(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	$0, %rsi
-	movq	%rbx, %rax
-	subq	%r14, %rax
-	movq	%rdx, %r8
-	sbbq	-80(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%rbp, %r9
-	sbbq	-64(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%rdi, %r10
-	sbbq	-56(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%rcx, %r11
-	sbbq	-72(%rsp), %r11         ## 8-byte Folded Reload
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rcx, %r11
-	testb	%sil, %sil
-	cmovneq	%rbx, %rax
-	movq	-8(%rsp), %rcx          ## 8-byte Reload
-	movq	%rax, (%rcx)
-	cmovneq	%rdx, %r8
-	movq	%r8, 8(%rcx)
-	cmovneq	%rbp, %r9
-	movq	%r9, 16(%rcx)
-	cmovneq	%rdi, %r10
-	movq	%r10, 24(%rcx)
-	movq	%r11, 32(%rcx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_addPre5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addPre5Lbmi2:                   ## @mcl_fp_addPre5Lbmi2
-## BB#0:
-	movq	32(%rdx), %r8
-	movq	24(%rdx), %r9
-	movq	24(%rsi), %r11
-	movq	32(%rsi), %r10
-	movq	16(%rdx), %rcx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rcx
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	adcq	%r9, %r11
-	movq	%r11, 24(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_subPre5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subPre5Lbmi2:                   ## @mcl_fp_subPre5Lbmi2
-## BB#0:
-	pushq	%rbx
-	movq	32(%rsi), %r10
-	movq	24(%rdx), %r8
-	movq	32(%rdx), %r9
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %rcx
-	movq	%rbx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r8, %r11
-	movq	%r11, 24(%rdi)
-	sbbq	%r9, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	retq
-
-	.globl	_mcl_fp_shr1_5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_shr1_5Lbmi2:                    ## @mcl_fp_shr1_5Lbmi2
-## BB#0:
-	movq	32(%rsi), %r8
-	movq	24(%rsi), %rcx
-	movq	16(%rsi), %rdx
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rax
-	movq	%rax, (%rdi)
-	shrdq	$1, %rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 16(%rdi)
-	shrdq	$1, %r8, %rcx
-	movq	%rcx, 24(%rdi)
-	shrq	%r8
-	movq	%r8, 32(%rdi)
-	retq
-
-	.globl	_mcl_fp_add5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_add5Lbmi2:                      ## @mcl_fp_add5Lbmi2
-## BB#0:
-	pushq	%rbx
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %rbx
-	movq	24(%rsi), %r9
-	movq	32(%rsi), %r8
-	movq	16(%rdx), %r10
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r10
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	adcq	%rbx, %r9
-	movq	%r9, 24(%rdi)
-	adcq	%r11, %r8
-	movq	%r8, 32(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r10
-	sbbq	24(%rcx), %r9
-	sbbq	32(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB74_2
-## BB#1:                                ## %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%r9, 24(%rdi)
-	movq	%r8, 32(%rdi)
-LBB74_2:                                ## %carry
-	popq	%rbx
-	retq
-
-	.globl	_mcl_fp_addNF5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addNF5Lbmi2:                    ## @mcl_fp_addNF5Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	32(%rdx), %r8
-	movq	24(%rdx), %r9
-	movq	16(%rdx), %r10
-	movq	(%rdx), %r14
-	movq	8(%rdx), %r11
-	addq	(%rsi), %r14
-	adcq	8(%rsi), %r11
-	adcq	16(%rsi), %r10
-	adcq	24(%rsi), %r9
-	adcq	32(%rsi), %r8
-	movq	%r14, %rsi
-	subq	(%rcx), %rsi
-	movq	%r11, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r10, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r9, %r15
-	sbbq	24(%rcx), %r15
-	movq	%r8, %rax
-	sbbq	32(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r14, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r11, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
-	cmovsq	%r9, %r15
-	movq	%r15, 24(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 32(%rdi)
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_sub5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_sub5Lbmi2:                      ## @mcl_fp_sub5Lbmi2
-## BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	32(%rsi), %r8
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
-	movq	%rax, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	sbbq	%r11, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 32(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	LBB76_2
-## BB#1:                                ## %carry
-	movq	32(%rcx), %r11
-	movq	24(%rcx), %r14
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rbx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%rsi, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 24(%rdi)
-	adcq	%r8, %r11
-	movq	%r11, 32(%rdi)
-LBB76_2:                                ## %nocarry
-	popq	%rbx
-	popq	%r14
-	retq
-
-	.globl	_mcl_fp_subNF5Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subNF5Lbmi2:                    ## @mcl_fp_subNF5Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	32(%rsi), %r12
-	movdqu	(%rdx), %xmm0
-	movdqu	16(%rdx), %xmm1
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r9
-	movdqu	(%rsi), %xmm2
-	movdqu	16(%rsi), %xmm3
-	pshufd	$78, %xmm3, %xmm4       ## xmm4 = xmm3[2,3,0,1]
-	movd	%xmm4, %r8
-	movd	%xmm1, %r10
-	movd	%xmm3, %r14
-	pshufd	$78, %xmm0, %xmm1       ## xmm1 = xmm0[2,3,0,1]
-	movd	%xmm1, %r11
-	pshufd	$78, %xmm2, %xmm1       ## xmm1 = xmm2[2,3,0,1]
-	movd	%xmm1, %r15
-	movd	%xmm0, %rsi
-	movd	%xmm2, %r13
-	subq	%rsi, %r13
-	sbbq	%r11, %r15
-	sbbq	%r10, %r14
-	sbbq	%r9, %r8
-	sbbq	32(%rdx), %r12
-	movq	%r12, %rdx
-	sarq	$63, %rdx
-	movq	%rdx, %rsi
-	shldq	$1, %r12, %rsi
-	movq	8(%rcx), %rax
-	andq	%rsi, %rax
-	andq	(%rcx), %rsi
-	movq	32(%rcx), %r9
-	andq	%rdx, %r9
-	rorxq	$63, %rdx, %rbx
-	andq	24(%rcx), %rdx
-	andq	16(%rcx), %rbx
-	addq	%r13, %rsi
-	movq	%rsi, (%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 8(%rdi)
-	adcq	%r14, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r8, %rdx
-	movq	%rdx, 24(%rdi)
-	adcq	%r12, %r9
-	movq	%r9, 32(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fpDbl_add5Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_add5Lbmi2:                   ## @mcl_fpDbl_add5Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	72(%rdx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	64(%rdx), %r11
-	movq	56(%rdx), %r14
-	movq	48(%rdx), %r15
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %r13
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %rbp
-	adcq	32(%rdx), %r13
-	movq	40(%rdx), %r9
-	movq	%rbx, (%rdi)
-	movq	72(%rsi), %r8
-	movq	%rax, 8(%rdi)
-	movq	64(%rsi), %r10
-	movq	%r12, 16(%rdi)
-	movq	56(%rsi), %r12
-	movq	%rbp, 24(%rdi)
-	movq	48(%rsi), %rbp
-	movq	40(%rsi), %rbx
-	movq	%r13, 32(%rdi)
-	adcq	%r9, %rbx
-	adcq	%r15, %rbp
-	adcq	%r14, %r12
-	adcq	%r11, %r10
-	adcq	-8(%rsp), %r8           ## 8-byte Folded Reload
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rbx, %rax
-	subq	(%rcx), %rax
-	movq	%rbp, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r12, %r9
-	sbbq	16(%rcx), %r9
-	movq	%r10, %r11
-	sbbq	24(%rcx), %r11
-	movq	%r8, %r14
-	sbbq	32(%rcx), %r14
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rbx, %rax
-	movq	%rax, 40(%rdi)
-	testb	%sil, %sil
-	cmovneq	%rbp, %rdx
-	movq	%rdx, 48(%rdi)
-	cmovneq	%r12, %r9
-	movq	%r9, 56(%rdi)
-	cmovneq	%r10, %r11
-	movq	%r11, 64(%rdi)
-	cmovneq	%r8, %r14
-	movq	%r14, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sub5Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sub5Lbmi2:                   ## @mcl_fpDbl_sub5Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	72(%rdx), %r9
-	movq	64(%rdx), %r10
-	movq	56(%rdx), %r14
-	movq	16(%rsi), %r8
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%eax, %eax
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r8
-	movq	24(%rsi), %r12
-	sbbq	24(%rdx), %r12
-	movq	%r15, (%rdi)
-	movq	32(%rsi), %rbx
-	sbbq	32(%rdx), %rbx
-	movq	%r11, 8(%rdi)
-	movq	48(%rdx), %r15
-	movq	40(%rdx), %rdx
-	movq	%r8, 16(%rdi)
-	movq	72(%rsi), %r8
-	movq	%r12, 24(%rdi)
-	movq	64(%rsi), %r11
-	movq	%rbx, 32(%rdi)
-	movq	40(%rsi), %rbp
-	sbbq	%rdx, %rbp
-	movq	56(%rsi), %r12
-	movq	48(%rsi), %r13
-	sbbq	%r15, %r13
-	sbbq	%r14, %r12
-	sbbq	%r10, %r11
-	sbbq	%r9, %r8
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	movq	16(%rcx), %rdx
-	cmoveq	%rax, %rdx
-	movq	8(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	movq	32(%rcx), %r9
-	cmoveq	%rax, %r9
-	cmovneq	24(%rcx), %rax
-	addq	%rbp, %rsi
-	movq	%rsi, 40(%rdi)
-	adcq	%r13, %rbx
-	movq	%rbx, 48(%rdi)
-	adcq	%r12, %rdx
-	movq	%rdx, 56(%rdi)
-	adcq	%r11, %rax
-	movq	%rax, 64(%rdi)
-	adcq	%r8, %r9
-	movq	%r9, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mulUnitPre6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre6Lbmi2:               ## @mcl_fp_mulUnitPre6Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	mulxq	40(%rsi), %r8, %r11
-	mulxq	32(%rsi), %r9, %r12
-	mulxq	24(%rsi), %r10, %rcx
-	mulxq	16(%rsi), %r14, %rbx
-	mulxq	8(%rsi), %r15, %rax
-	mulxq	(%rsi), %rdx, %rsi
-	movq	%rdx, (%rdi)
-	addq	%r15, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r14, %rax
-	movq	%rax, 16(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%r9, %rcx
-	movq	%rcx, 32(%rdi)
-	adcq	%r8, %r12
-	movq	%r12, 40(%rdi)
-	adcq	$0, %r11
-	movq	%r11, 48(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fpDbl_mulPre6Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre6Lbmi2:                ## @mcl_fpDbl_mulPre6Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %r11
-	movq	%rdi, -48(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %r15
-	movq	8(%rsi), %rcx
-	movq	%rcx, -80(%rsp)         ## 8-byte Spill
-	movq	(%r11), %rax
-	movq	%r11, -56(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rdx
-	mulxq	%rax, %rcx, %r14
-	movq	%r15, %rdx
-	mulxq	%rax, %rdx, %rbp
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	24(%rsi), %rbx
-	movq	%rbx, -88(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	addq	%rcx, %rbp
-	mulxq	%rax, %rcx, %r12
-	adcq	%r14, %rcx
-	movq	%rbx, %rdx
-	mulxq	%rax, %rbx, %r14
-	adcq	%r12, %rbx
-	movq	32(%rsi), %r12
-	movq	%r12, %rdx
-	mulxq	%rax, %r8, %r13
-	adcq	%r14, %r8
-	movq	40(%rsi), %r14
-	movq	%r14, %rdx
-	mulxq	%rax, %r9, %r10
-	adcq	%r13, %r9
-	movq	-72(%rsp), %rax         ## 8-byte Reload
-	movq	%rax, (%rdi)
-	adcq	$0, %r10
-	movq	8(%r11), %rdi
-	movq	%r15, %rdx
-	mulxq	%rdi, %r13, %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	addq	%rbp, %r13
-	movq	-80(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %rbp, %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	adcq	%rcx, %rbp
-	movq	-64(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %rax, %r11
-	adcq	%rbx, %rax
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %rbx, %rcx
-	movq	%rcx, -88(%rsp)         ## 8-byte Spill
-	adcq	%r8, %rbx
-	movq	%r12, %rdx
-	mulxq	%rdi, %rcx, %r8
-	adcq	%r9, %rcx
-	movq	%r14, %rdx
-	mulxq	%rdi, %r12, %rdx
-	adcq	%r10, %r12
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	addq	-72(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	%r11, %rbx
-	movq	-48(%rsp), %rdi         ## 8-byte Reload
-	movq	%r13, 8(%rdi)
-	adcq	-88(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	%r8, %r12
-	adcq	%rdx, %r15
-	movq	(%rsi), %rdx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %r8
-	movq	%r8, -80(%rsp)          ## 8-byte Spill
-	movq	-56(%rsp), %r14         ## 8-byte Reload
-	movq	16(%r14), %rdi
-	mulxq	%rdi, %r13, %rdx
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	addq	%rbp, %r13
-	movq	%r8, %rdx
-	mulxq	%rdi, %r8, %rdx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	adcq	%rax, %r8
-	movq	16(%rsi), %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %r11, %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	adcq	%rbx, %r11
-	movq	24(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %rax, %rbx
-	adcq	%rcx, %rax
-	movq	32(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %r10, %rcx
-	adcq	%r12, %r10
-	movq	40(%rsi), %rdx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %r9, %rdx
-	adcq	%r15, %r9
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	addq	-8(%rsp), %r8           ## 8-byte Folded Reload
-	adcq	-16(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-24(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	%rbx, %r10
-	adcq	%rcx, %r9
-	adcq	%rdx, %rbp
-	movq	-48(%rsp), %rcx         ## 8-byte Reload
-	movq	%r13, 16(%rcx)
-	movq	24(%r14), %rdi
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r12, %rcx
-	movq	%rcx, -88(%rsp)         ## 8-byte Spill
-	addq	%r8, %r12
-	movq	-80(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %rbx, %rcx
-	movq	%rcx, -80(%rsp)         ## 8-byte Spill
-	adcq	%r11, %rbx
-	movq	-64(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %rcx, %r11
-	adcq	%rax, %rcx
-	movq	-72(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r14, %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	adcq	%r10, %r14
-	movq	-32(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r8, %rax
-	adcq	%r9, %r8
-	movq	-40(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r13, %rdx
-	adcq	%rbp, %r13
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	addq	-88(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	%r11, %r14
-	movq	-48(%rsp), %rdi         ## 8-byte Reload
-	movq	%r12, 24(%rdi)
-	adcq	-64(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	%rax, %r13
-	adcq	%rdx, %r15
-	movq	(%rsi), %rdx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rbp
-	movq	%rbp, -80(%rsp)         ## 8-byte Spill
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	32(%rax), %rdi
-	mulxq	%rdi, %r12, %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	addq	%rbx, %r12
-	movq	%rbp, %rdx
-	mulxq	%rdi, %rbx, %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	adcq	%rcx, %rbx
-	movq	16(%rsi), %r11
-	movq	%r11, %rdx
-	mulxq	%rdi, %rax, %rcx
-	movq	%rcx, -32(%rsp)         ## 8-byte Spill
-	adcq	%r14, %rax
-	movq	24(%rsi), %r14
-	movq	%r14, %rdx
-	mulxq	%rdi, %rbp, %rcx
-	movq	%rcx, -40(%rsp)         ## 8-byte Spill
-	adcq	%r8, %rbp
-	movq	32(%rsi), %r8
-	movq	%r8, %rdx
-	mulxq	%rdi, %rcx, %r10
-	adcq	%r13, %rcx
-	movq	40(%rsi), %r13
-	movq	%r13, %rdx
-	mulxq	%rdi, %r9, %rdx
-	adcq	%r15, %r9
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	-64(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	-32(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-40(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	%r10, %r9
-	adcq	%rdx, %rsi
-	movq	-48(%rsp), %r10         ## 8-byte Reload
-	movq	%r12, 32(%r10)
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	movq	40(%rdx), %rdi
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r15, %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	addq	%rbx, %r15
-	movq	-80(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %rbx, %r12
-	adcq	%rax, %rbx
-	movq	%r11, %rdx
-	mulxq	%rdi, %rax, %r11
-	adcq	%rbp, %rax
-	movq	%r14, %rdx
-	mulxq	%rdi, %rbp, %r14
-	adcq	%rcx, %rbp
-	movq	%r8, %rdx
-	mulxq	%rdi, %rcx, %r8
-	adcq	%r9, %rcx
-	movq	%r13, %rdx
-	mulxq	%rdi, %rdi, %r9
-	adcq	%rsi, %rdi
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	-56(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%r15, 40(%r10)
-	movq	%rbx, 48(%r10)
-	adcq	%r12, %rax
-	movq	%rax, 56(%r10)
-	adcq	%r11, %rbp
-	movq	%rbp, 64(%r10)
-	adcq	%r14, %rcx
-	movq	%rcx, 72(%r10)
-	adcq	%r8, %rdi
-	movq	%rdi, 80(%r10)
-	adcq	%r9, %rsi
-	movq	%rsi, 88(%r10)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sqrPre6Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre6Lbmi2:                ## @mcl_fpDbl_sqrPre6Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdi, %r9
-	movq	16(%rsi), %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rax
-	mulxq	%rcx, %r10, %r8
-	movq	24(%rsi), %rbp
-	movq	%rbp, -48(%rsp)         ## 8-byte Spill
-	movq	%rax, %rdx
-	mulxq	%rcx, %r11, %rbx
-	movq	%rbx, -40(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rdx, %r14
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	addq	%r11, %r14
-	adcq	%rbx, %r10
-	movq	%rbp, %rdx
-	mulxq	%rcx, %r15, %rbp
-	adcq	%r8, %r15
-	movq	32(%rsi), %rbx
-	movq	%rbx, %rdx
-	mulxq	%rcx, %r8, %r13
-	adcq	%rbp, %r8
-	movq	40(%rsi), %rdi
-	movq	%rdi, %rdx
-	mulxq	%rcx, %rcx, %r12
-	adcq	%r13, %rcx
-	movq	%r9, -24(%rsp)          ## 8-byte Spill
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	movq	%rdx, (%r9)
-	adcq	$0, %r12
-	addq	%r11, %r14
-	movq	%rax, %rdx
-	mulxq	%rax, %rbp, %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	adcq	%r10, %rbp
-	movq	-64(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r13, %r10
-	adcq	%r15, %r13
-	movq	-48(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r15, %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	adcq	%r8, %r15
-	movq	%rbx, %rdx
-	mulxq	%rax, %rbx, %r8
-	adcq	%rcx, %rbx
-	movq	%rdi, %rdx
-	mulxq	%rax, %r11, %rax
-	adcq	%r12, %r11
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-40(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-56(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%r14, 8(%r9)
-	adcq	%r10, %r15
-	adcq	-64(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	%r8, %r11
-	adcq	%rax, %r12
-	movq	(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rdi
-	movq	%rdi, -64(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %rcx
-	mulxq	%rcx, %rax, %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	addq	%rbp, %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rdx
-	mulxq	%rcx, %rbp, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	adcq	%r13, %rbp
-	movq	%rcx, %rdx
-	mulxq	%rcx, %r13, %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	adcq	%r15, %r13
-	movq	24(%rsi), %rax
-	movq	%rax, %rdx
-	mulxq	%rcx, %r8, %rdi
-	movq	%rdi, -56(%rsp)         ## 8-byte Spill
-	adcq	%r8, %rbx
-	movq	32(%rsi), %r10
-	movq	%r10, %rdx
-	mulxq	%rcx, %r14, %r15
-	adcq	%r11, %r14
-	movq	40(%rsi), %r11
-	movq	%r11, %rdx
-	mulxq	%rcx, %r9, %rdx
-	adcq	%r12, %r9
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	-32(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-8(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	-16(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	%rdi, %r14
-	adcq	%r15, %r9
-	adcq	%rdx, %rcx
-	movq	-48(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %rdi, %rdx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	addq	%rbp, %rdi
-	movq	-64(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r15, %rbp
-	adcq	%r13, %r15
-	adcq	%r8, %rbx
-	movq	%rax, %rdx
-	mulxq	%rax, %r8, %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	adcq	%r14, %r8
-	movq	%r10, %rdx
-	mulxq	%rax, %r12, %r10
-	adcq	%r9, %r12
-	movq	%r11, %rdx
-	mulxq	%rax, %r13, %rax
-	adcq	%rcx, %r13
-	sbbq	%r9, %r9
-	andl	$1, %r9d
-	addq	-48(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	%rbp, %rbx
-	movq	-24(%rsp), %rdx         ## 8-byte Reload
-	movq	-40(%rsp), %rbp         ## 8-byte Reload
-	movq	%rbp, 16(%rdx)
-	movq	%rdi, 24(%rdx)
-	adcq	-56(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-64(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	%r10, %r13
-	adcq	%rax, %r9
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rdi
-	movq	%rdi, -64(%rsp)         ## 8-byte Spill
-	movq	32(%rsi), %rax
-	movq	%rcx, %rdx
-	mulxq	%rax, %rdx, %rbp
-	movq	%rbp, -56(%rsp)         ## 8-byte Spill
-	addq	%r15, %rdx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rdx
-	mulxq	%rax, %r15, %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	adcq	%rbx, %r15
-	movq	16(%rsi), %r10
-	movq	%r10, %rdx
-	mulxq	%rax, %r14, %rbx
-	adcq	%r8, %r14
-	movq	24(%rsi), %r8
-	movq	%r8, %rdx
-	mulxq	%rax, %rbp, %rdi
-	adcq	%r12, %rbp
-	movq	%rax, %rdx
-	mulxq	%rax, %r11, %r12
-	adcq	%r13, %r11
-	movq	40(%rsi), %rsi
-	movq	%rsi, %rdx
-	mulxq	%rax, %r13, %rdx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	adcq	%r13, %r9
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	-56(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r14         ## 8-byte Folded Reload
-	adcq	%rbx, %rbp
-	adcq	%rdi, %r11
-	adcq	%r12, %r9
-	adcq	%rdx, %rax
-	movq	%rcx, %rdx
-	mulxq	%rsi, %r12, %rcx
-	addq	%r15, %r12
-	movq	-64(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rsi, %rdi, %r15
-	adcq	%r14, %rdi
-	movq	%r10, %rdx
-	mulxq	%rsi, %rbx, %r10
-	adcq	%rbp, %rbx
-	movq	%r8, %rdx
-	mulxq	%rsi, %rbp, %r8
-	adcq	%r11, %rbp
-	adcq	%r13, %r9
-	movq	%rsi, %rdx
-	mulxq	%rsi, %rsi, %r11
-	adcq	%rax, %rsi
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rcx, %rdi
-	movq	-24(%rsp), %rdx         ## 8-byte Reload
-	movq	-40(%rsp), %rcx         ## 8-byte Reload
-	movq	%rcx, 32(%rdx)
-	movq	%r12, 40(%rdx)
-	movq	%rdi, 48(%rdx)
-	adcq	%r15, %rbx
-	movq	%rbx, 56(%rdx)
-	adcq	%r10, %rbp
-	movq	%rbp, 64(%rdx)
-	adcq	%r8, %r9
-	movq	%r9, 72(%rdx)
-	adcq	-48(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, 80(%rdx)
-	adcq	%r11, %rax
-	movq	%rax, 88(%rdx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mont6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mont6Lbmi2:                     ## @mcl_fp_mont6Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$32, %rsp
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rdi, 24(%rsp)          ## 8-byte Spill
-	movq	40(%rsi), %rdi
-	movq	%rdi, -96(%rsp)         ## 8-byte Spill
-	movq	(%rdx), %rax
-	movq	%rdi, %rdx
-	mulxq	%rax, %r11, %rbx
-	movq	32(%rsi), %rdx
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	mulxq	%rax, %r14, %r12
-	movq	24(%rsi), %rdx
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	mulxq	%rax, %r15, %r13
-	movq	16(%rsi), %rdx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r8, %r10
-	movq	(%rsi), %rbp
-	movq	%rbp, -24(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rdi, %r9
-	movq	%rbp, %rdx
-	mulxq	%rax, %rdx, %rbp
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	addq	%rdi, %rbp
-	adcq	%r8, %r9
-	adcq	%r15, %r10
-	adcq	%r14, %r13
-	adcq	%r11, %r12
-	adcq	$0, %rbx
-	movq	%rbx, -120(%rsp)        ## 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	imulq	%rax, %rdx
-	movq	40(%rcx), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rax, %r15
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	16(%rcx), %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r8, %rax
-	movq	8(%rcx), %rsi
-	movq	%rsi, -56(%rsp)         ## 8-byte Spill
-	mulxq	%rsi, %rbx, %r11
-	movq	(%rcx), %rsi
-	movq	%rsi, -64(%rsp)         ## 8-byte Spill
-	mulxq	%rsi, %rsi, %r14
-	addq	%rbx, %r14
-	adcq	%r8, %r11
-	movq	24(%rcx), %rdi
-	movq	%rdi, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %rdi, %r8
-	adcq	%rax, %rdi
-	movq	32(%rcx), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rbx, %rax
-	adcq	%r8, %rbx
-	adcq	-112(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	-128(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	%rbp, %r14
-	adcq	%r9, %r11
-	adcq	%r10, %rdi
-	adcq	%r13, %rbx
-	adcq	%r12, %rax
-	adcq	-120(%rsp), %r15        ## 8-byte Folded Reload
-	sbbq	%r10, %r10
-	andl	$1, %r10d
-	movq	-88(%rsp), %rcx         ## 8-byte Reload
-	movq	8(%rcx), %rdx
-	mulxq	-96(%rsp), %rsi, %rcx   ## 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        ## 8-byte Spill
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	mulxq	(%rsp), %rcx, %r13      ## 8-byte Folded Reload
-	movq	%rcx, -104(%rsp)        ## 8-byte Spill
-	mulxq	-8(%rsp), %r12, %rcx    ## 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	mulxq	-32(%rsp), %rbp, %rcx   ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %rsi, %r9    ## 8-byte Folded Reload
-	addq	%rbp, %r9
-	mulxq	-16(%rsp), %rbp, %r8    ## 8-byte Folded Reload
-	adcq	%rcx, %rbp
-	adcq	%r12, %r8
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rcx        ## 8-byte Reload
-	adcq	$0, %rcx
-	addq	%r14, %rsi
-	adcq	%r11, %r9
-	adcq	%rdi, %rbp
-	adcq	%rbx, %r8
-	adcq	%rax, %rdx
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	%r15, %r13
-	adcq	%r10, %rcx
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rbx
-	movq	%rbx, %rdx
-	imulq	8(%rsp), %rdx           ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %rax, %r12   ## 8-byte Folded Reload
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulxq	-80(%rsp), %r14, %r11   ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rdi, %rsi   ## 8-byte Folded Reload
-	addq	%rcx, %rsi
-	mulxq	-48(%rsp), %rcx, %r10   ## 8-byte Folded Reload
-	adcq	%rax, %rcx
-	mulxq	-72(%rsp), %rax, %r15   ## 8-byte Folded Reload
-	adcq	%r10, %rax
-	adcq	%r14, %r15
-	adcq	-104(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	$0, %r12
-	addq	%rbx, %rdi
-	adcq	%r9, %rsi
-	adcq	%rbp, %rcx
-	adcq	%r8, %rax
-	adcq	-128(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	%r13, %r11
-	adcq	-120(%rsp), %r12        ## 8-byte Folded Reload
-	movq	-112(%rsp), %r10        ## 8-byte Reload
-	adcq	$0, %r10
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	movq	16(%rdx), %rdx
-	mulxq	-96(%rsp), %rbp, %rdi   ## 8-byte Folded Reload
-	movq	%rbp, -112(%rsp)        ## 8-byte Spill
-	movq	%rdi, -120(%rsp)        ## 8-byte Spill
-	mulxq	(%rsp), %rdi, %rbp      ## 8-byte Folded Reload
-	movq	%rdi, -104(%rsp)        ## 8-byte Spill
-	mulxq	-8(%rsp), %rdi, %r13    ## 8-byte Folded Reload
-	movq	%rdi, 16(%rsp)          ## 8-byte Spill
-	mulxq	-32(%rsp), %rdi, %r14   ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %rbx, %r9    ## 8-byte Folded Reload
-	movq	%rbx, -128(%rsp)        ## 8-byte Spill
-	addq	%rdi, %r9
-	mulxq	-16(%rsp), %rbx, %r8    ## 8-byte Folded Reload
-	adcq	%r14, %rbx
-	adcq	16(%rsp), %r8           ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rbp        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	movq	-128(%rsp), %rdi        ## 8-byte Reload
-	addq	%rsi, %rdi
-	movq	%rdi, -128(%rsp)        ## 8-byte Spill
-	adcq	%rcx, %r9
-	adcq	%rax, %rbx
-	adcq	%r15, %r8
-	adcq	%r11, %r13
-	adcq	%r12, %rbp
-	adcq	%r10, %rdx
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rdx
-	imulq	8(%rsp), %rdx           ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %rax, %r11   ## 8-byte Folded Reload
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulxq	-80(%rsp), %r15, %r12   ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %rax, %rcx   ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rdi, %r14   ## 8-byte Folded Reload
-	addq	%rax, %r14
-	mulxq	-48(%rsp), %rax, %r10   ## 8-byte Folded Reload
-	adcq	%rcx, %rax
-	mulxq	-72(%rsp), %rsi, %rcx   ## 8-byte Folded Reload
-	adcq	%r10, %rsi
-	adcq	%r15, %rcx
-	adcq	-104(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	$0, %r11
-	addq	-128(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	%r9, %r14
-	adcq	%rbx, %rax
-	adcq	%r8, %rsi
-	adcq	%r13, %rcx
-	adcq	%rbp, %r12
-	adcq	-120(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	$0, -112(%rsp)          ## 8-byte Folded Spill
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	movq	24(%rdx), %rdx
-	mulxq	-96(%rsp), %rbp, %rdi   ## 8-byte Folded Reload
-	movq	%rbp, -128(%rsp)        ## 8-byte Spill
-	movq	%rdi, -120(%rsp)        ## 8-byte Spill
-	mulxq	(%rsp), %rdi, %r15      ## 8-byte Folded Reload
-	movq	%rdi, -104(%rsp)        ## 8-byte Spill
-	mulxq	-8(%rsp), %r10, %rbp    ## 8-byte Folded Reload
-	mulxq	-32(%rsp), %rbx, %r9    ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %r13, %rdi   ## 8-byte Folded Reload
-	addq	%rbx, %rdi
-	mulxq	-16(%rsp), %rbx, %r8    ## 8-byte Folded Reload
-	adcq	%r9, %rbx
-	adcq	%r10, %r8
-	adcq	-104(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r15        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r14, %r13
-	adcq	%rax, %rdi
-	adcq	%rsi, %rbx
-	adcq	%rcx, %r8
-	adcq	%r12, %rbp
-	adcq	%r11, %r15
-	adcq	-112(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%r13, %rdx
-	imulq	8(%rsp), %rdx           ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %rax, %r10   ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	mulxq	-80(%rsp), %rax, %r12   ## 8-byte Folded Reload
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulxq	-56(%rsp), %rax, %r11   ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rcx, %rsi   ## 8-byte Folded Reload
-	addq	%rax, %rsi
-	mulxq	-48(%rsp), %r14, %r9    ## 8-byte Folded Reload
-	adcq	%r11, %r14
-	mulxq	-72(%rsp), %rax, %r11   ## 8-byte Folded Reload
-	adcq	%r9, %rax
-	adcq	-104(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	$0, %r10
-	addq	%r13, %rcx
-	adcq	%rdi, %rsi
-	adcq	%rbx, %r14
-	adcq	%r8, %rax
-	adcq	%rbp, %r11
-	adcq	%r15, %r12
-	adcq	-120(%rsp), %r10        ## 8-byte Folded Reload
-	movq	-128(%rsp), %r15        ## 8-byte Reload
-	adcq	$0, %r15
-	movq	-88(%rsp), %rcx         ## 8-byte Reload
-	movq	32(%rcx), %rdx
-	mulxq	-96(%rsp), %rdi, %rcx   ## 8-byte Folded Reload
-	movq	%rdi, -112(%rsp)        ## 8-byte Spill
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	mulxq	(%rsp), %rdi, %rcx      ## 8-byte Folded Reload
-	movq	%rdi, 16(%rsp)          ## 8-byte Spill
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	mulxq	-8(%rsp), %r13, %rbp    ## 8-byte Folded Reload
-	mulxq	-32(%rsp), %rdi, %rcx   ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %rbx, %r8    ## 8-byte Folded Reload
-	movq	%rbx, -104(%rsp)        ## 8-byte Spill
-	addq	%rdi, %r8
-	mulxq	-16(%rsp), %rbx, %r9    ## 8-byte Folded Reload
-	adcq	%rcx, %rbx
-	adcq	%r13, %r9
-	adcq	16(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	-112(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rcx        ## 8-byte Reload
-	adcq	$0, %rcx
-	movq	-104(%rsp), %rdi        ## 8-byte Reload
-	addq	%rsi, %rdi
-	movq	%rdi, -104(%rsp)        ## 8-byte Spill
-	adcq	%r14, %r8
-	adcq	%rax, %rbx
-	adcq	%r11, %r9
-	adcq	%r12, %rbp
-	adcq	%r10, %rdx
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	%r15, %rcx
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, %r13
-	movq	%rdi, %rdx
-	imulq	8(%rsp), %rdx           ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %r14, %rax   ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	mulxq	-80(%rsp), %r12, %r15   ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rdi, %rsi   ## 8-byte Folded Reload
-	addq	%rcx, %rsi
-	mulxq	-48(%rsp), %r11, %r10   ## 8-byte Folded Reload
-	adcq	%rax, %r11
-	mulxq	-72(%rsp), %rax, %rcx   ## 8-byte Folded Reload
-	adcq	%r10, %rax
-	adcq	%r12, %rcx
-	adcq	%r14, %r15
-	movq	-112(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	-104(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	%r8, %rsi
-	adcq	%rbx, %r11
-	adcq	%r9, %rax
-	adcq	%rbp, %rcx
-	adcq	-128(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, -120(%rsp)        ## 8-byte Spill
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	movq	40(%rdx), %rdx
-	mulxq	-96(%rsp), %rbp, %rdi   ## 8-byte Folded Reload
-	movq	%rbp, -128(%rsp)        ## 8-byte Spill
-	movq	%rdi, -88(%rsp)         ## 8-byte Spill
-	mulxq	(%rsp), %rbx, %rdi      ## 8-byte Folded Reload
-	movq	%rdi, -96(%rsp)         ## 8-byte Spill
-	mulxq	-8(%rsp), %r10, %rbp    ## 8-byte Folded Reload
-	mulxq	-16(%rsp), %r8, %r12    ## 8-byte Folded Reload
-	mulxq	-32(%rsp), %rdi, %r14   ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %r13, %r9    ## 8-byte Folded Reload
-	addq	%rdi, %r9
-	adcq	%r8, %r14
-	adcq	%r10, %r12
-	adcq	%rbx, %rbp
-	movq	-96(%rsp), %rdi         ## 8-byte Reload
-	adcq	-128(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%rsi, %r13
-	adcq	%r11, %r9
-	adcq	%rax, %r14
-	adcq	%rcx, %r12
-	adcq	%r15, %rbp
-	adcq	-112(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	%rdi, -96(%rsp)         ## 8-byte Spill
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	sbbq	%rcx, %rcx
-	movq	8(%rsp), %rdx           ## 8-byte Reload
-	imulq	%r13, %rdx
-	mulxq	-64(%rsp), %r8, %rax    ## 8-byte Folded Reload
-	mulxq	-56(%rsp), %r10, %rdi   ## 8-byte Folded Reload
-	addq	%rax, %r10
-	mulxq	-48(%rsp), %rsi, %rax   ## 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-72(%rsp), %rbx, %r11   ## 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-80(%rsp), %rdi, %r15   ## 8-byte Folded Reload
-	adcq	%r11, %rdi
-	mulxq	-40(%rsp), %rax, %r11   ## 8-byte Folded Reload
-	adcq	%r15, %rax
-	adcq	$0, %r11
-	andl	$1, %ecx
-	addq	%r13, %r8
-	adcq	%r9, %r10
-	adcq	%r14, %rsi
-	adcq	%r12, %rbx
-	adcq	%rbp, %rdi
-	adcq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	$0, %rcx
-	movq	%r10, %rbp
-	subq	-64(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rsi, %rdx
-	sbbq	-56(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rbx, %r8
-	sbbq	-48(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%rdi, %r9
-	sbbq	-72(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%rax, %r14
-	sbbq	-80(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%r11, %r15
-	sbbq	-40(%rsp), %r15         ## 8-byte Folded Reload
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rdi, %r9
-	testb	%cl, %cl
-	cmovneq	%r10, %rbp
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	movq	%rbp, (%rcx)
-	cmovneq	%rsi, %rdx
-	movq	%rdx, 8(%rcx)
-	cmovneq	%rbx, %r8
-	movq	%r8, 16(%rcx)
-	movq	%r9, 24(%rcx)
-	cmovneq	%rax, %r14
-	movq	%r14, 32(%rcx)
-	cmovneq	%r11, %r15
-	movq	%r15, 40(%rcx)
-	addq	$32, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montNF6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_montNF6Lbmi2:                   ## @mcl_fp_montNF6Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	(%rsi), %rax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	8(%rsi), %rdi
-	movq	%rdi, -128(%rsp)        ## 8-byte Spill
-	movq	(%rdx), %rbp
-	movq	%rdi, %rdx
-	mulxq	%rbp, %rdi, %rbx
-	movq	%rax, %rdx
-	mulxq	%rbp, %r9, %r14
-	movq	16(%rsi), %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	addq	%rdi, %r14
-	mulxq	%rbp, %rdi, %r8
-	adcq	%rbx, %rdi
-	movq	24(%rsi), %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	mulxq	%rbp, %rbx, %r10
-	adcq	%r8, %rbx
-	movq	32(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rbp, %r8, %r11
-	adcq	%r10, %r8
-	movq	40(%rsi), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rbp, %rsi, %r15
-	adcq	%r11, %rsi
-	adcq	$0, %r15
-	movq	-8(%rcx), %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%r9, %rdx
-	imulq	%rax, %rdx
-	movq	(%rcx), %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rbp, %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	addq	%r9, %rbp
-	movq	8(%rcx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r12, %r9
-	adcq	%r14, %r12
-	movq	16(%rcx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r14, %rax
-	adcq	%rdi, %r14
-	movq	24(%rcx), %rdi
-	movq	%rdi, -32(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %r13, %rdi
-	adcq	%rbx, %r13
-	movq	32(%rcx), %rbp
-	movq	%rbp, -40(%rsp)         ## 8-byte Spill
-	mulxq	%rbp, %r11, %rbx
-	adcq	%r8, %r11
-	movq	40(%rcx), %rcx
-	movq	%rcx, -48(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %r10, %rcx
-	adcq	%rsi, %r10
-	adcq	$0, %r15
-	addq	-88(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	%r9, %r14
-	adcq	%rax, %r13
-	adcq	%rdi, %r11
-	adcq	%rbx, %r10
-	adcq	%rcx, %r15
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	movq	8(%rax), %rdx
-	mulxq	-128(%rsp), %rcx, %rsi  ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %rbx, %rax  ## 8-byte Folded Reload
-	addq	%rcx, %rax
-	mulxq	-56(%rsp), %rcx, %rdi   ## 8-byte Folded Reload
-	adcq	%rsi, %rcx
-	mulxq	-64(%rsp), %rsi, %r8    ## 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-72(%rsp), %rdi, %rbp   ## 8-byte Folded Reload
-	movq	%rbp, -88(%rsp)         ## 8-byte Spill
-	adcq	%r8, %rdi
-	mulxq	-80(%rsp), %r8, %r9     ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	%r12, %rbx
-	adcq	%r14, %rax
-	adcq	%r13, %rcx
-	adcq	%r11, %rsi
-	adcq	%r10, %rdi
-	adcq	%r15, %r8
-	adcq	$0, %r9
-	movq	%rbx, %rdx
-	imulq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	mulxq	-96(%rsp), %rbp, %r13   ## 8-byte Folded Reload
-	addq	%rbx, %rbp
-	mulxq	-16(%rsp), %r11, %rbx   ## 8-byte Folded Reload
-	adcq	%rax, %r11
-	mulxq	-24(%rsp), %r14, %rax   ## 8-byte Folded Reload
-	adcq	%rcx, %r14
-	mulxq	-32(%rsp), %r10, %rcx   ## 8-byte Folded Reload
-	adcq	%rsi, %r10
-	mulxq	-40(%rsp), %r15, %rsi   ## 8-byte Folded Reload
-	adcq	%rdi, %r15
-	mulxq	-48(%rsp), %r12, %rdx   ## 8-byte Folded Reload
-	adcq	%r8, %r12
-	adcq	$0, %r9
-	addq	%r13, %r11
-	adcq	%rbx, %r14
-	adcq	%rax, %r10
-	adcq	%rcx, %r15
-	adcq	%rsi, %r12
-	adcq	%rdx, %r9
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	movq	16(%rax), %rdx
-	mulxq	-128(%rsp), %rcx, %rax  ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %r13, %rdi  ## 8-byte Folded Reload
-	addq	%rcx, %rdi
-	mulxq	-56(%rsp), %rbx, %rcx   ## 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-64(%rsp), %rsi, %rbp   ## 8-byte Folded Reload
-	adcq	%rcx, %rsi
-	mulxq	-72(%rsp), %rax, %rcx   ## 8-byte Folded Reload
-	movq	%rcx, -88(%rsp)         ## 8-byte Spill
-	adcq	%rbp, %rax
-	mulxq	-80(%rsp), %r8, %rcx    ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	$0, %rcx
-	addq	%r11, %r13
-	adcq	%r14, %rdi
-	adcq	%r10, %rbx
-	adcq	%r15, %rsi
-	adcq	%r12, %rax
-	adcq	%r9, %r8
-	adcq	$0, %rcx
-	movq	%r13, %rdx
-	imulq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	mulxq	-96(%rsp), %rbp, %r12   ## 8-byte Folded Reload
-	addq	%r13, %rbp
-	mulxq	-16(%rsp), %r11, %rbp   ## 8-byte Folded Reload
-	adcq	%rdi, %r11
-	mulxq	-24(%rsp), %r9, %rdi    ## 8-byte Folded Reload
-	adcq	%rbx, %r9
-	mulxq	-32(%rsp), %r10, %rbx   ## 8-byte Folded Reload
-	adcq	%rsi, %r10
-	mulxq	-40(%rsp), %r14, %rsi   ## 8-byte Folded Reload
-	adcq	%rax, %r14
-	mulxq	-48(%rsp), %r15, %rax   ## 8-byte Folded Reload
-	adcq	%r8, %r15
-	adcq	$0, %rcx
-	addq	%r12, %r11
-	adcq	%rbp, %r9
-	adcq	%rdi, %r10
-	adcq	%rbx, %r14
-	adcq	%rsi, %r15
-	adcq	%rax, %rcx
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	movq	24(%rax), %rdx
-	mulxq	-128(%rsp), %rsi, %rax  ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %r13, %rbx  ## 8-byte Folded Reload
-	addq	%rsi, %rbx
-	mulxq	-56(%rsp), %rdi, %rbp   ## 8-byte Folded Reload
-	adcq	%rax, %rdi
-	mulxq	-64(%rsp), %rsi, %r8    ## 8-byte Folded Reload
-	adcq	%rbp, %rsi
-	mulxq	-72(%rsp), %rax, %rbp   ## 8-byte Folded Reload
-	adcq	%r8, %rax
-	mulxq	-80(%rsp), %r8, %r12    ## 8-byte Folded Reload
-	adcq	%rbp, %r8
-	adcq	$0, %r12
-	addq	%r11, %r13
-	adcq	%r9, %rbx
-	adcq	%r10, %rdi
-	adcq	%r14, %rsi
-	adcq	%r15, %rax
-	adcq	%rcx, %r8
-	adcq	$0, %r12
-	movq	%r13, %rdx
-	imulq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	mulxq	-96(%rsp), %rbp, %rcx   ## 8-byte Folded Reload
-	addq	%r13, %rbp
-	mulxq	-16(%rsp), %r11, %rbp   ## 8-byte Folded Reload
-	adcq	%rbx, %r11
-	mulxq	-24(%rsp), %r9, %rbx    ## 8-byte Folded Reload
-	adcq	%rdi, %r9
-	mulxq	-32(%rsp), %r10, %rdi   ## 8-byte Folded Reload
-	adcq	%rsi, %r10
-	mulxq	-40(%rsp), %r14, %rsi   ## 8-byte Folded Reload
-	adcq	%rax, %r14
-	mulxq	-48(%rsp), %r15, %rax   ## 8-byte Folded Reload
-	adcq	%r8, %r15
-	adcq	$0, %r12
-	addq	%rcx, %r11
-	adcq	%rbp, %r9
-	adcq	%rbx, %r10
-	adcq	%rdi, %r14
-	adcq	%rsi, %r15
-	adcq	%rax, %r12
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	movq	32(%rax), %rdx
-	mulxq	-128(%rsp), %rsi, %rcx  ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %r13, %rax  ## 8-byte Folded Reload
-	addq	%rsi, %rax
-	mulxq	-56(%rsp), %rbx, %rsi   ## 8-byte Folded Reload
-	adcq	%rcx, %rbx
-	mulxq	-64(%rsp), %rdi, %rcx   ## 8-byte Folded Reload
-	adcq	%rsi, %rdi
-	mulxq	-72(%rsp), %rsi, %rbp   ## 8-byte Folded Reload
-	adcq	%rcx, %rsi
-	mulxq	-80(%rsp), %r8, %rcx    ## 8-byte Folded Reload
-	adcq	%rbp, %r8
-	adcq	$0, %rcx
-	addq	%r11, %r13
-	adcq	%r9, %rax
-	adcq	%r10, %rbx
-	adcq	%r14, %rdi
-	adcq	%r15, %rsi
-	adcq	%r12, %r8
-	adcq	$0, %rcx
-	movq	%r13, %rdx
-	imulq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	mulxq	-96(%rsp), %rbp, %r9    ## 8-byte Folded Reload
-	addq	%r13, %rbp
-	mulxq	-16(%rsp), %r13, %rbp   ## 8-byte Folded Reload
-	adcq	%rax, %r13
-	mulxq	-24(%rsp), %r11, %rax   ## 8-byte Folded Reload
-	adcq	%rbx, %r11
-	mulxq	-32(%rsp), %r10, %rbx   ## 8-byte Folded Reload
-	adcq	%rdi, %r10
-	mulxq	-40(%rsp), %r14, %rdi   ## 8-byte Folded Reload
-	adcq	%rsi, %r14
-	mulxq	-48(%rsp), %rsi, %rdx   ## 8-byte Folded Reload
-	adcq	%r8, %rsi
-	adcq	$0, %rcx
-	addq	%r9, %r13
-	adcq	%rbp, %r11
-	adcq	%rax, %r10
-	adcq	%rbx, %r14
-	adcq	%rdi, %rsi
-	adcq	%rdx, %rcx
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	movq	40(%rax), %rdx
-	mulxq	-128(%rsp), %rdi, %rax  ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %r8, %rbx   ## 8-byte Folded Reload
-	addq	%rdi, %rbx
-	mulxq	-56(%rsp), %rdi, %rbp   ## 8-byte Folded Reload
-	adcq	%rax, %rdi
-	mulxq	-64(%rsp), %r15, %rax   ## 8-byte Folded Reload
-	adcq	%rbp, %r15
-	mulxq	-72(%rsp), %r12, %rbp   ## 8-byte Folded Reload
-	adcq	%rax, %r12
-	mulxq	-80(%rsp), %r9, %rax    ## 8-byte Folded Reload
-	adcq	%rbp, %r9
-	adcq	$0, %rax
-	addq	%r13, %r8
-	adcq	%r11, %rbx
-	adcq	%r10, %rdi
-	adcq	%r14, %r15
-	adcq	%rsi, %r12
-	adcq	%rcx, %r9
-	adcq	$0, %rax
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	imulq	%r8, %rdx
-	mulxq	-96(%rsp), %rcx, %rsi   ## 8-byte Folded Reload
-	movq	%rsi, -104(%rsp)        ## 8-byte Spill
-	addq	%r8, %rcx
-	movq	-16(%rsp), %r11         ## 8-byte Reload
-	mulxq	%r11, %r8, %rcx
-	movq	%rcx, -112(%rsp)        ## 8-byte Spill
-	adcq	%rbx, %r8
-	movq	-24(%rsp), %r10         ## 8-byte Reload
-	mulxq	%r10, %rsi, %rcx
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	adcq	%rdi, %rsi
-	movq	-32(%rsp), %r13         ## 8-byte Reload
-	mulxq	%r13, %rdi, %rcx
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	adcq	%r15, %rdi
-	movq	-40(%rsp), %rcx         ## 8-byte Reload
-	mulxq	%rcx, %r15, %rbx
-	adcq	%r12, %r15
-	movq	-48(%rsp), %r14         ## 8-byte Reload
-	mulxq	%r14, %r12, %rbp
-	adcq	%r9, %r12
-	adcq	$0, %rax
-	addq	-104(%rsp), %r8         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	%rbx, %r12
-	adcq	%rbp, %rax
-	movq	%r8, %rbp
-	subq	-96(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rsi, %rbx
-	sbbq	%r11, %rbx
-	movq	%rdi, %r11
-	sbbq	%r10, %r11
-	movq	%r15, %r10
-	sbbq	%r13, %r10
-	movq	%r12, %r9
-	sbbq	%rcx, %r9
-	movq	%rax, %rcx
-	sbbq	%r14, %rcx
-	movq	%rcx, %rdx
-	sarq	$63, %rdx
-	cmovsq	%r8, %rbp
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	movq	%rbp, (%rdx)
-	cmovsq	%rsi, %rbx
-	movq	%rbx, 8(%rdx)
-	cmovsq	%rdi, %r11
-	movq	%r11, 16(%rdx)
-	cmovsq	%r15, %r10
-	movq	%r10, 24(%rdx)
-	cmovsq	%r12, %r9
-	movq	%r9, 32(%rdx)
-	cmovsq	%rax, %rcx
-	movq	%rcx, 40(%rdx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montRed6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_montRed6Lbmi2:                  ## @mcl_fp_montRed6Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	pushq	%rax
-	movq	%rdx, %rcx
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	(%rsi), %r9
-	movq	%r9, %rdx
-	imulq	%rax, %rdx
-	movq	40(%rcx), %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r12, %r13
-	movq	32(%rcx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r10, %r8
-	movq	24(%rcx), %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r14, %r15
-	movq	16(%rcx), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rbp, %r11
-	movq	(%rcx), %rdi
-	movq	%rdi, -40(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rax, %rbx
-	mulxq	%rdi, %rdx, %rcx
-	addq	%rax, %rcx
-	adcq	%rbp, %rbx
-	adcq	%r14, %r11
-	adcq	%r10, %r15
-	adcq	%r12, %r8
-	adcq	$0, %r13
-	addq	%r9, %rdx
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %rbx
-	adcq	24(%rsi), %r11
-	adcq	32(%rsi), %r15
-	adcq	40(%rsi), %r8
-	movq	%r8, -112(%rsp)         ## 8-byte Spill
-	adcq	48(%rsi), %r13
-	movq	%r13, -104(%rsp)        ## 8-byte Spill
-	movq	88(%rsi), %r8
-	movq	80(%rsi), %rdx
-	movq	72(%rsi), %rdi
-	movq	64(%rsi), %rax
-	movq	56(%rsi), %r14
-	adcq	$0, %r14
-	adcq	$0, %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -96(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, -24(%rsp)          ## 8-byte Spill
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	movq	%rcx, %rdx
-	imulq	-8(%rsp), %rdx          ## 8-byte Folded Reload
-	mulxq	-72(%rsp), %rsi, %rax   ## 8-byte Folded Reload
-	movq	%rsi, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	mulxq	-16(%rsp), %rax, %r13   ## 8-byte Folded Reload
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	mulxq	-48(%rsp), %rbp, %r10   ## 8-byte Folded Reload
-	mulxq	-32(%rsp), %r9, %r8     ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rsi, %rdi   ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %rdx, %rax   ## 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%r9, %rdi
-	adcq	%rbp, %r8
-	adcq	-56(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r13        ## 8-byte Folded Reload
-	movq	-128(%rsp), %rsi        ## 8-byte Reload
-	adcq	$0, %rsi
-	addq	%rcx, %rdx
-	adcq	%rbx, %rax
-	adcq	%r11, %rdi
-	adcq	%r15, %r8
-	adcq	-112(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	%r14, %rsi
-	movq	%rsi, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, -88(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -96(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -80(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -24(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rdx
-	imulq	-8(%rsp), %rdx          ## 8-byte Folded Reload
-	mulxq	-72(%rsp), %rsi, %rcx   ## 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        ## 8-byte Spill
-	movq	%rcx, -104(%rsp)        ## 8-byte Spill
-	movq	-16(%rsp), %rbx         ## 8-byte Reload
-	mulxq	%rbx, %rcx, %r14
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	mulxq	-48(%rsp), %rcx, %r15   ## 8-byte Folded Reload
-	movq	%rcx, -56(%rsp)         ## 8-byte Spill
-	mulxq	-32(%rsp), %r11, %rbp   ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rsi, %r9    ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %rdx, %rcx   ## 8-byte Folded Reload
-	addq	%rsi, %rcx
-	adcq	%r11, %r9
-	adcq	-56(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	movq	-104(%rsp), %rsi        ## 8-byte Reload
-	adcq	$0, %rsi
-	addq	%rax, %rdx
-	adcq	%rdi, %rcx
-	adcq	%r8, %r9
-	adcq	%r10, %rbp
-	adcq	%r13, %r15
-	adcq	-128(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, -104(%rsp)        ## 8-byte Spill
-	adcq	$0, -96(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -80(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -24(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rcx, %rdx
-	movq	-8(%rsp), %r13          ## 8-byte Reload
-	imulq	%r13, %rdx
-	mulxq	-72(%rsp), %rsi, %rax   ## 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	mulxq	%rbx, %rsi, %rax
-	movq	%rsi, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	-48(%rsp), %r11         ## 8-byte Reload
-	mulxq	%r11, %rax, %rbx
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	mulxq	-32(%rsp), %r10, %r8    ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rsi, %rdi   ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %rdx, %rax   ## 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%r10, %rdi
-	adcq	-56(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	-88(%rsp), %r10         ## 8-byte Reload
-	adcq	-112(%rsp), %r10        ## 8-byte Folded Reload
-	movq	-128(%rsp), %rsi        ## 8-byte Reload
-	adcq	$0, %rsi
-	addq	%rcx, %rdx
-	adcq	%r9, %rax
-	adcq	%rbp, %rdi
-	adcq	%r15, %r8
-	adcq	%r14, %rbx
-	adcq	-104(%rsp), %r10        ## 8-byte Folded Reload
-	movq	%r10, -88(%rsp)         ## 8-byte Spill
-	adcq	-96(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, -80(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -24(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rdx
-	imulq	%r13, %rdx
-	mulxq	-72(%rsp), %rsi, %rcx   ## 8-byte Folded Reload
-	movq	%rsi, -104(%rsp)        ## 8-byte Spill
-	movq	%rcx, -96(%rsp)         ## 8-byte Spill
-	mulxq	-16(%rsp), %rsi, %rcx   ## 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        ## 8-byte Spill
-	mulxq	%r11, %rsi, %r13
-	movq	%rsi, -120(%rsp)        ## 8-byte Spill
-	movq	-32(%rsp), %r10         ## 8-byte Reload
-	mulxq	%r10, %r15, %r14
-	mulxq	-64(%rsp), %rsi, %r9    ## 8-byte Folded Reload
-	movq	-40(%rsp), %r11         ## 8-byte Reload
-	mulxq	%r11, %rdx, %rbp
-	addq	%rsi, %rbp
-	adcq	%r15, %r9
-	adcq	-120(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	-96(%rsp), %rsi         ## 8-byte Reload
-	adcq	$0, %rsi
-	addq	%rax, %rdx
-	adcq	%rdi, %rbp
-	adcq	%r8, %r9
-	adcq	%rbx, %r14
-	adcq	-88(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	adcq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, -96(%rsp)         ## 8-byte Spill
-	adcq	$0, -24(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	imulq	%rbp, %rdx
-	mulxq	-72(%rsp), %rax, %rsi   ## 8-byte Folded Reload
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	mulxq	%r10, %rax, %r15
-	mulxq	%r11, %r10, %rdi
-	mulxq	-64(%rsp), %rbx, %r8    ## 8-byte Folded Reload
-	addq	%rdi, %rbx
-	adcq	%rax, %r8
-	mulxq	-48(%rsp), %rax, %rdi   ## 8-byte Folded Reload
-	adcq	%r15, %rax
-	movq	-16(%rsp), %r15         ## 8-byte Reload
-	mulxq	%r15, %rdx, %r11
-	adcq	%rdi, %rdx
-	adcq	-80(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	$0, %rsi
-	addq	%rbp, %r10
-	adcq	%r9, %rbx
-	adcq	%r14, %r8
-	adcq	%r13, %rax
-	adcq	-128(%rsp), %rdx        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-24(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	$0, %r12
-	movq	%rbx, %rcx
-	subq	-40(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%r8, %rdi
-	sbbq	-64(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rax, %rbp
-	sbbq	-32(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	sbbq	-48(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%r11, %r10
-	sbbq	%r15, %r10
-	movq	%rsi, %r15
-	sbbq	-72(%rsp), %r15         ## 8-byte Folded Reload
-	sbbq	$0, %r12
-	andl	$1, %r12d
-	cmovneq	%rsi, %r15
-	testb	%r12b, %r12b
-	cmovneq	%rbx, %rcx
-	movq	(%rsp), %rsi            ## 8-byte Reload
-	movq	%rcx, (%rsi)
-	cmovneq	%r8, %rdi
-	movq	%rdi, 8(%rsi)
-	cmovneq	%rax, %rbp
-	movq	%rbp, 16(%rsi)
-	cmovneq	%rdx, %r9
-	movq	%r9, 24(%rsi)
-	cmovneq	%r11, %r10
-	movq	%r10, 32(%rsi)
-	movq	%r15, 40(%rsi)
-	addq	$8, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_addPre6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addPre6Lbmi2:                   ## @mcl_fp_addPre6Lbmi2
-## BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	40(%rsi), %r11
-	movq	32(%rdx), %r9
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %r14
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 32(%rdi)
-	adcq	%r8, %r11
-	movq	%r11, 40(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r14
-	retq
-
-	.globl	_mcl_fp_subPre6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subPre6Lbmi2:                   ## @mcl_fp_subPre6Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	40(%rsi), %r9
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %rsi
-	movq	24(%rdx), %r14
-	movq	32(%rdx), %r15
-	sbbq	16(%rdx), %rcx
-	movq	%rbx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r14, %r11
-	movq	%r11, 24(%rdi)
-	sbbq	%r15, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%r8, %r9
-	movq	%r9, 40(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_shr1_6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_shr1_6Lbmi2:                    ## @mcl_fp_shr1_6Lbmi2
-## BB#0:
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %rdx
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rcx
-	movq	%rcx, (%rdi)
-	shrdq	$1, %rax, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rdx, %rax
-	movq	%rax, 16(%rdi)
-	shrdq	$1, %r9, %rdx
-	movq	%rdx, 24(%rdi)
-	shrdq	$1, %r8, %r9
-	movq	%r9, 32(%rdi)
-	shrq	%r8
-	movq	%r8, 40(%rdi)
-	retq
-
-	.globl	_mcl_fp_add6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_add6Lbmi2:                      ## @mcl_fp_add6Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r14
-	movq	40(%rsi), %r8
-	movq	32(%rdx), %r15
-	movq	24(%rdx), %rbx
-	movq	24(%rsi), %r10
-	movq	32(%rsi), %r9
-	movq	16(%rdx), %r11
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r11
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	adcq	%rbx, %r10
-	movq	%r10, 24(%rdi)
-	adcq	%r15, %r9
-	movq	%r9, 32(%rdi)
-	adcq	%r14, %r8
-	movq	%r8, 40(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r11
-	sbbq	24(%rcx), %r10
-	sbbq	32(%rcx), %r9
-	sbbq	40(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB89_2
-## BB#1:                                ## %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	movq	%r10, 24(%rdi)
-	movq	%r9, 32(%rdi)
-	movq	%r8, 40(%rdi)
-LBB89_2:                                ## %carry
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_addNF6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addNF6Lbmi2:                    ## @mcl_fp_addNF6Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	32(%rdx), %r9
-	movq	24(%rdx), %r10
-	movq	16(%rdx), %r11
-	movq	(%rdx), %r15
-	movq	8(%rdx), %r14
-	addq	(%rsi), %r15
-	adcq	8(%rsi), %r14
-	adcq	16(%rsi), %r11
-	adcq	24(%rsi), %r10
-	adcq	32(%rsi), %r9
-	adcq	40(%rsi), %r8
-	movq	%r15, %rsi
-	subq	(%rcx), %rsi
-	movq	%r14, %rbx
-	sbbq	8(%rcx), %rbx
-	movq	%r11, %rdx
-	sbbq	16(%rcx), %rdx
-	movq	%r10, %r13
-	sbbq	24(%rcx), %r13
-	movq	%r9, %r12
-	sbbq	32(%rcx), %r12
-	movq	%r8, %rax
-	sbbq	40(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r15, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r14, %rbx
-	movq	%rbx, 8(%rdi)
-	cmovsq	%r11, %rdx
-	movq	%rdx, 16(%rdi)
-	cmovsq	%r10, %r13
-	movq	%r13, 24(%rdi)
-	cmovsq	%r9, %r12
-	movq	%r12, 32(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 40(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_sub6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_sub6Lbmi2:                      ## @mcl_fp_sub6Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rdx), %r14
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r10
-	movq	16(%rsi), %r11
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %rsi
-	movq	24(%rdx), %r15
-	movq	32(%rdx), %r12
-	sbbq	16(%rdx), %r11
-	movq	%rax, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	sbbq	%r15, %r10
-	movq	%r10, 24(%rdi)
-	sbbq	%r12, %r9
-	movq	%r9, 32(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 40(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	LBB91_2
-## BB#1:                                ## %carry
-	movq	40(%rcx), %r14
-	movq	32(%rcx), %r15
-	movq	24(%rcx), %r12
-	movq	8(%rcx), %rbx
-	movq	16(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
+	movq	%rdx, %r14
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	16(%rsi), %rdi
+	movq	%rdi, -48(%rsp)                 ## 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rdx
+	mulxq	%rax, %r11, %rbx
+	movq	(%rsi), %rdi
+	movq	%rdi, -56(%rsp)                 ## 8-byte Spill
+	movq	8(%rsi), %rdx
+	movq	%rdx, -64(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r15, %rbp
+	movq	%rdi, %rdx
+	mulxq	%rax, %r9, %r8
+	addq	%r15, %r8
+	adcq	%r11, %rbp
+	adcq	$0, %rbx
+	movq	-8(%rcx), %r13
+	movq	%r13, %rdx
+	imulq	%r9, %rdx
+	movq	8(%rcx), %rax
+	movq	%rax, -32(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r12, %r10
+	movq	(%rcx), %rax
+	movq	%rax, -40(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r11, %rax
+	addq	%r12, %rax
+	movq	16(%rcx), %rdi
+	mulxq	%rdi, %rcx, %rsi
+	movq	%rdi, %r15
+	movq	%rdi, -24(%rsp)                 ## 8-byte Spill
+	adcq	%r10, %rcx
+	adcq	$0, %rsi
+	addq	%r9, %r11
+	adcq	%r8, %rax
+	movq	8(%r14), %rdx
+	adcq	%rbp, %rcx
+	adcq	%rbx, %rsi
+	movq	-48(%rsp), %r14                 ## 8-byte Reload
+	mulxq	%r14, %r9, %r8
+	mulxq	-64(%rsp), %rbp, %rbx           ## 8-byte Folded Reload
+	mulxq	-56(%rsp), %r10, %rdi           ## 8-byte Folded Reload
+	setb	%dl
+	addq	%rbp, %rdi
+	adcq	%r9, %rbx
+	adcq	$0, %r8
+	addq	%rax, %r10
+	adcq	%rcx, %rdi
+	movzbl	%dl, %eax
 	adcq	%rsi, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r11, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r10, %r12
-	movq	%r12, 24(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 32(%rdi)
+	adcq	%rax, %r8
+	setb	%r11b
+	movq	%r13, %rdx
+	imulq	%r10, %rdx
+	mulxq	%r15, %r9, %rcx
+	movq	-32(%rsp), %r12                 ## 8-byte Reload
+	mulxq	%r12, %rsi, %rbp
+	movq	-40(%rsp), %r15                 ## 8-byte Reload
+	mulxq	%r15, %rdx, %rax
+	addq	%rsi, %rax
+	adcq	%r9, %rbp
+	adcq	$0, %rcx
+	addq	%r10, %rdx
+	adcq	%rdi, %rax
+	movzbl	%r11b, %r9d
+	adcq	%rbx, %rbp
+	adcq	%r8, %rcx
+	adcq	$0, %r9
+	movq	-16(%rsp), %rdx                 ## 8-byte Reload
+	movq	16(%rdx), %rdx
+	mulxq	%r14, %r8, %rsi
+	mulxq	-64(%rsp), %r10, %r14           ## 8-byte Folded Reload
+	mulxq	-56(%rsp), %r11, %rdi           ## 8-byte Folded Reload
+	addq	%r10, %rdi
 	adcq	%r8, %r14
-	movq	%r14, 40(%rdi)
-LBB91_2:                                ## %nocarry
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_subNF6Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_subNF6Lbmi2:                    ## @mcl_fp_subNF6Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movdqu	(%rdx), %xmm0
-	movdqu	16(%rdx), %xmm1
-	movdqu	32(%rdx), %xmm2
-	pshufd	$78, %xmm2, %xmm3       ## xmm3 = xmm2[2,3,0,1]
-	movd	%xmm3, %r10
-	movdqu	(%rsi), %xmm3
-	movdqu	16(%rsi), %xmm4
-	movdqu	32(%rsi), %xmm5
-	pshufd	$78, %xmm5, %xmm6       ## xmm6 = xmm5[2,3,0,1]
-	movd	%xmm6, %rax
-	movd	%xmm2, %r11
-	movd	%xmm5, %r8
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r14
-	pshufd	$78, %xmm4, %xmm2       ## xmm2 = xmm4[2,3,0,1]
-	movd	%xmm2, %r9
-	movd	%xmm1, %r15
-	movd	%xmm4, %r12
-	pshufd	$78, %xmm0, %xmm1       ## xmm1 = xmm0[2,3,0,1]
-	movd	%xmm1, %r13
-	pshufd	$78, %xmm3, %xmm1       ## xmm1 = xmm3[2,3,0,1]
-	movd	%xmm1, %rbp
-	movd	%xmm0, %rdx
-	movd	%xmm3, %rbx
-	subq	%rdx, %rbx
-	sbbq	%r13, %rbp
-	sbbq	%r15, %r12
-	sbbq	%r14, %r9
-	sbbq	%r11, %r8
-	sbbq	%r10, %rax
-	movq	%rax, %rdx
-	sarq	$63, %rdx
-	movq	%rdx, %rsi
-	shldq	$1, %rax, %rsi
-	andq	(%rcx), %rsi
-	movq	40(%rcx), %r10
-	andq	%rdx, %r10
-	movq	32(%rcx), %r11
-	andq	%rdx, %r11
-	movq	24(%rcx), %r14
-	andq	%rdx, %r14
-	rorxq	$63, %rdx, %r15
-	andq	16(%rcx), %rdx
-	andq	8(%rcx), %r15
-	addq	%rbx, %rsi
-	movq	%rsi, (%rdi)
-	adcq	%rbp, %r15
-	movq	%r15, 8(%rdi)
-	adcq	%r12, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 24(%rdi)
-	adcq	%r8, %r11
-	movq	%r11, 32(%rdi)
-	adcq	%rax, %r10
-	movq	%r10, 40(%rdi)
+	adcq	$0, %rsi
+	addq	%rax, %r11
+	adcq	%rbp, %rdi
+	adcq	%rcx, %r14
+	adcq	%r9, %rsi
+	setb	%r8b
+	imulq	%r11, %r13
+	movq	%r13, %rdx
+	mulxq	%r15, %rax, %rbp
+	movq	%r12, %r10
+	mulxq	%r12, %rcx, %r9
+	addq	%rbp, %rcx
+	movq	-24(%rsp), %r12                 ## 8-byte Reload
+	mulxq	%r12, %rdx, %rbx
+	adcq	%r9, %rdx
+	adcq	$0, %rbx
+	addq	%r11, %rax
+	adcq	%rdi, %rcx
+	adcq	%r14, %rdx
+	movzbl	%r8b, %eax
+	adcq	%rsi, %rbx
+	adcq	$0, %rax
+	movq	%rcx, %rsi
+	subq	%r15, %rsi
+	movq	%rdx, %rdi
+	sbbq	%r10, %rdi
+	movq	%rbx, %rbp
+	sbbq	%r12, %rbp
+	sbbq	$0, %rax
+	testb	$1, %al
+	cmovneq	%rbx, %rbp
+	movq	-8(%rsp), %rax                  ## 8-byte Reload
+	movq	%rbp, 16(%rax)
+	cmovneq	%rdx, %rdi
+	movq	%rdi, 8(%rax)
+	cmovneq	%rcx, %rsi
+	movq	%rsi, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -5978,85 +569,123 @@ _mcl_fp_subNF6Lbmi2:                    ## @mcl_fp_subNF6Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_add6Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montNF3Lbmi2            ## -- Begin function mcl_fp_montNF3Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_add6Lbmi2:                   ## @mcl_fpDbl_add6Lbmi2
-## BB#0:
+_mcl_fp_montNF3Lbmi2:                   ## @mcl_fp_montNF3Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	88(%rdx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	80(%rdx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	72(%rdx), %r14
-	movq	64(%rdx), %r15
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %r13
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %rbp
-	adcq	32(%rdx), %r13
-	movq	56(%rdx), %r11
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %rdx
-	movq	%rbx, (%rdi)
-	movq	88(%rsi), %r8
-	movq	%rax, 8(%rdi)
-	movq	80(%rsi), %r10
-	movq	%r12, 16(%rdi)
-	movq	72(%rsi), %r12
-	movq	%rbp, 24(%rdi)
-	movq	40(%rsi), %rax
-	adcq	%rdx, %rax
-	movq	64(%rsi), %rdx
-	movq	%r13, 32(%rdi)
-	movq	56(%rsi), %r13
-	movq	48(%rsi), %rbp
-	adcq	%r9, %rbp
-	movq	%rax, 40(%rdi)
-	adcq	%r11, %r13
-	adcq	%r15, %rdx
-	adcq	%r14, %r12
-	adcq	-16(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-8(%rsp), %r8           ## 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rbp, %rsi
-	subq	(%rcx), %rsi
-	movq	%r13, %rbx
-	sbbq	8(%rcx), %rbx
-	movq	%rdx, %r9
-	sbbq	16(%rcx), %r9
-	movq	%r12, %r11
-	sbbq	24(%rcx), %r11
-	movq	%r10, %r14
-	sbbq	32(%rcx), %r14
-	movq	%r8, %r15
-	sbbq	40(%rcx), %r15
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rbp, %rsi
-	movq	%rsi, 48(%rdi)
-	testb	%al, %al
-	cmovneq	%r13, %rbx
-	movq	%rbx, 56(%rdi)
-	cmovneq	%rdx, %r9
-	movq	%r9, 64(%rdi)
-	cmovneq	%r12, %r11
-	movq	%r11, 72(%rdi)
-	cmovneq	%r10, %r14
-	movq	%r14, 80(%rdi)
-	cmovneq	%r8, %r15
-	movq	%r15, 88(%rdi)
+	movq	%rdx, %r10
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rbp
+	movq	%rbp, -32(%rsp)                 ## 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
+	movq	%rbp, %rdx
+	mulxq	%rax, %rbx, %r14
+	movq	%r11, %rdx
+	movq	%r11, -24(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r15, %r12
+	movq	16(%rsi), %rdx
+	movq	%rdx, -40(%rsp)                 ## 8-byte Spill
+	addq	%rbx, %r12
+	mulxq	%rax, %rsi, %rbx
+	adcq	%r14, %rsi
+	adcq	$0, %rbx
+	movq	-8(%rcx), %r13
+	movq	(%rcx), %r14
+	movq	%r13, %rax
+	imulq	%r15, %rax
+	movq	%r14, %rdx
+	mulxq	%rax, %rdx, %rbp
+	addq	%r15, %rdx
+	movq	8(%rcx), %r15
+	movq	%r15, %rdx
+	mulxq	%rax, %rdi, %r9
+	adcq	%r12, %rdi
+	movq	16(%rcx), %r12
+	movq	%r12, %rdx
+	mulxq	%rax, %r8, %rax
+	adcq	%rsi, %r8
+	adcq	$0, %rbx
+	addq	%rbp, %rdi
+	movq	8(%r10), %rcx
+	adcq	%r9, %r8
+	adcq	%rax, %rbx
+	movq	-32(%rsp), %r10                 ## 8-byte Reload
+	movq	%r10, %rdx
+	mulxq	%rcx, %rsi, %r9
+	movq	%r11, %rdx
+	mulxq	%rcx, %rbp, %rax
+	addq	%rsi, %rax
+	movq	-40(%rsp), %r11                 ## 8-byte Reload
+	movq	%r11, %rdx
+	mulxq	%rcx, %rsi, %rcx
+	adcq	%r9, %rsi
+	adcq	$0, %rcx
+	addq	%rdi, %rbp
+	adcq	%r8, %rax
+	adcq	%rbx, %rsi
+	adcq	$0, %rcx
+	movq	%r13, %rdx
+	imulq	%rbp, %rdx
+	mulxq	%r14, %rbx, %r8
+	addq	%rbp, %rbx
+	mulxq	%r15, %rdi, %rbx
+	adcq	%rax, %rdi
+	mulxq	%r12, %rbp, %rax
+	adcq	%rsi, %rbp
+	adcq	$0, %rcx
+	addq	%r8, %rdi
+	adcq	%rbx, %rbp
+	adcq	%rax, %rcx
+	movq	-16(%rsp), %rax                 ## 8-byte Reload
+	movq	16(%rax), %rdx
+	mulxq	%r10, %rbx, %r8
+	mulxq	-24(%rsp), %r9, %rsi            ## 8-byte Folded Reload
+	addq	%rbx, %rsi
+	mulxq	%r11, %rax, %rbx
+	adcq	%r8, %rax
+	adcq	$0, %rbx
+	addq	%rdi, %r9
+	adcq	%rbp, %rsi
+	adcq	%rcx, %rax
+	adcq	$0, %rbx
+	imulq	%r9, %r13
+	movq	%r14, %rdx
+	mulxq	%r13, %rdx, %r8
+	addq	%r9, %rdx
+	movq	%r12, %rdx
+	mulxq	%r13, %rbp, %rdi
+	movq	%r15, %rdx
+	mulxq	%r13, %rcx, %rdx
+	adcq	%rsi, %rcx
+	adcq	%rax, %rbp
+	adcq	$0, %rbx
+	addq	%r8, %rcx
+	adcq	%rdx, %rbp
+	adcq	%rdi, %rbx
+	movq	%rcx, %rax
+	subq	%r14, %rax
+	movq	%rbp, %rdx
+	sbbq	%r15, %rdx
+	movq	%rbx, %rsi
+	sbbq	%r12, %rsi
+	movq	%rsi, %rdi
+	sarq	$63, %rdi
+	cmovsq	%rbx, %rsi
+	movq	-8(%rsp), %rdi                  ## 8-byte Reload
+	movq	%rsi, 16(%rdi)
+	cmovsq	%rbp, %rdx
+	movq	%rdx, 8(%rdi)
+	cmovsq	%rcx, %rax
+	movq	%rax, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -6064,82 +693,80 @@ _mcl_fpDbl_add6Lbmi2:                   ## @mcl_fpDbl_add6Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sub6Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montRed3Lbmi2           ## -- Begin function mcl_fp_montRed3Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_sub6Lbmi2:                   ## @mcl_fpDbl_sub6Lbmi2
-## BB#0:
+_mcl_fp_montRed3Lbmi2:                  ## @mcl_fp_montRed3Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	88(%rdx), %r9
-	movq	80(%rdx), %r10
-	movq	72(%rdx), %r14
-	movq	16(%rsi), %r8
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%eax, %eax
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r8
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	64(%rdx), %r13
-	movq	%r15, (%rdi)
-	movq	56(%rdx), %rbp
-	movq	%r11, 8(%rdi)
-	movq	48(%rdx), %r15
-	movq	40(%rdx), %rdx
-	movq	%r8, 16(%rdi)
-	movq	88(%rsi), %r8
-	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%rdx, %rbx
-	movq	80(%rsi), %r11
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %rdx
-	sbbq	%r15, %rdx
-	movq	72(%rsi), %r15
-	movq	%rbx, 40(%rdi)
-	movq	64(%rsi), %r12
-	movq	56(%rsi), %rsi
-	sbbq	%rbp, %rsi
-	sbbq	%r13, %r12
-	sbbq	%r14, %r15
-	sbbq	%r10, %r11
-	sbbq	%r9, %r8
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%rcx), %r14
-	cmoveq	%rax, %r14
-	testb	%bpl, %bpl
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	-8(%rdx), %r14
+	movq	(%rdx), %r8
+	movq	(%rsi), %rax
+	movq	%rax, %rdx
+	imulq	%r14, %rdx
 	movq	16(%rcx), %r9
-	cmoveq	%rax, %r9
-	movq	8(%rcx), %rbp
-	cmoveq	%rax, %rbp
-	movq	40(%rcx), %r10
-	cmoveq	%rax, %r10
-	movq	32(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	cmovneq	24(%rcx), %rax
-	addq	%rdx, %r14
-	movq	%r14, 48(%rdi)
-	adcq	%rsi, %rbp
-	movq	%rbp, 56(%rdi)
-	adcq	%r12, %r9
-	movq	%r9, 64(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 72(%rdi)
-	adcq	%r11, %rbx
-	movq	%rbx, 80(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 88(%rdi)
+	mulxq	%r9, %r15, %r10
+	movq	8(%rcx), %r11
+	mulxq	%r11, %rbx, %r12
+	mulxq	%r8, %rdx, %rcx
+	addq	%rbx, %rcx
+	adcq	%r15, %r12
+	adcq	$0, %r10
+	addq	%rax, %rdx
+	adcq	8(%rsi), %rcx
+	adcq	16(%rsi), %r12
+	adcq	24(%rsi), %r10
+	setb	%r13b
+	movq	%r14, %rdx
+	imulq	%rcx, %rdx
+	mulxq	%r8, %rbp, %rax
+	mulxq	%r11, %rbx, %rdi
+	addq	%rax, %rbx
+	mulxq	%r9, %r15, %rdx
+	adcq	%rdi, %r15
+	movzbl	%r13b, %edi
+	adcq	%rdx, %rdi
+	addq	%rcx, %rbp
+	adcq	%r12, %rbx
+	adcq	%r10, %r15
+	adcq	32(%rsi), %rdi
+	setb	%r10b
+	imulq	%rbx, %r14
+	movq	%r14, %rdx
+	mulxq	%r8, %r13, %rbp
+	mulxq	%r11, %rcx, %r12
+	addq	%rbp, %rcx
+	mulxq	%r9, %rbp, %r14
+	adcq	%r12, %rbp
+	movzbl	%r10b, %eax
+	adcq	%r14, %rax
+	addq	%rbx, %r13
+	adcq	%r15, %rcx
+	adcq	%rdi, %rbp
+	adcq	40(%rsi), %rax
+	xorl	%ebx, %ebx
+	movq	%rcx, %rsi
+	subq	%r8, %rsi
+	movq	%rbp, %rdi
+	sbbq	%r11, %rdi
+	movq	%rax, %rdx
+	sbbq	%r9, %rdx
+	sbbq	%rbx, %rbx
+	testb	$1, %bl
+	cmovneq	%rax, %rdx
+	movq	-8(%rsp), %rax                  ## 8-byte Reload
+	movq	%rdx, 16(%rax)
+	cmovneq	%rbp, %rdi
+	movq	%rdi, 8(%rax)
+	cmovneq	%rcx, %rsi
+	movq	%rsi, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -6147,612 +774,539 @@ _mcl_fpDbl_sub6Lbmi2:                   ## @mcl_fpDbl_sub6Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_mulUnitPre7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montRedNF3Lbmi2         ## -- Begin function mcl_fp_montRedNF3Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_mulUnitPre7Lbmi2:               ## @mcl_fp_mulUnitPre7Lbmi2
-## BB#0:
+_mcl_fp_montRedNF3Lbmi2:                ## @mcl_fp_montRedNF3Lbmi2
+## %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	mulxq	48(%rsi), %r8, %r11
-	mulxq	40(%rsi), %r9, %r13
-	mulxq	32(%rsi), %r10, %rcx
-	mulxq	8(%rsi), %r12, %r14
-	mulxq	(%rsi), %r15, %rbx
-	addq	%r12, %rbx
-	mulxq	24(%rsi), %r12, %rax
-	mulxq	16(%rsi), %rdx, %rsi
-	movq	%r15, (%rdi)
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	-8(%rdx), %r14
+	movq	(%rdx), %r8
+	movq	(%rsi), %rbx
+	movq	%rbx, %rdx
+	imulq	%r14, %rdx
+	movq	16(%rcx), %r9
+	mulxq	%r9, %r12, %r10
+	movq	8(%rcx), %r11
+	mulxq	%r11, %rcx, %r15
+	mulxq	%r8, %rdx, %rax
+	addq	%rcx, %rax
+	adcq	%r12, %r15
+	adcq	$0, %r10
+	addq	%rbx, %rdx
+	adcq	8(%rsi), %rax
+	adcq	16(%rsi), %r15
+	adcq	24(%rsi), %r10
+	setb	%r13b
+	movq	%r14, %rdx
+	imulq	%rax, %rdx
+	mulxq	%r8, %rbp, %rcx
+	mulxq	%r11, %rbx, %rdi
+	addq	%rcx, %rbx
+	mulxq	%r9, %r12, %rdx
+	adcq	%rdi, %r12
+	movzbl	%r13b, %ecx
+	adcq	%rdx, %rcx
+	addq	%rax, %rbp
+	adcq	%r15, %rbx
+	adcq	%r10, %r12
+	adcq	32(%rsi), %rcx
+	setb	%r10b
+	imulq	%rbx, %r14
+	movq	%r14, %rdx
+	mulxq	%r8, %r13, %rdi
+	mulxq	%r11, %rax, %r15
+	addq	%rdi, %rax
+	mulxq	%r9, %rdi, %r14
+	adcq	%r15, %rdi
+	movzbl	%r10b, %r10d
+	adcq	%r14, %r10
+	addq	%rbx, %r13
+	adcq	%r12, %rax
+	adcq	%rcx, %rdi
+	adcq	40(%rsi), %r10
+	movq	%rax, %rcx
+	subq	%r8, %rcx
+	movq	%rdi, %rsi
+	sbbq	%r11, %rsi
+	movq	%r10, %rbp
+	sbbq	%r9, %rbp
+	movq	%rbp, %rdx
+	sarq	$63, %rdx
+	cmovsq	%r10, %rbp
+	movq	-8(%rsp), %rdx                  ## 8-byte Reload
+	movq	%rbp, 16(%rdx)
+	cmovsq	%rdi, %rsi
+	movq	%rsi, 8(%rdx)
+	cmovsq	%rax, %rcx
+	movq	%rcx, (%rdx)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_addPre3Lbmi2            ## -- Begin function mcl_fp_addPre3Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_addPre3Lbmi2:                   ## @mcl_fp_addPre3Lbmi2
+## %bb.0:
+	movq	16(%rsi), %rax
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rcx
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rcx, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subPre3Lbmi2            ## -- Begin function mcl_fp_subPre3Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_subPre3Lbmi2:                   ## @mcl_fp_subPre3Lbmi2
+## %bb.0:
+	movq	16(%rsi), %rcx
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rcx
+	movq	%rcx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_shr1_3Lbmi2             ## -- Begin function mcl_fp_shr1_3Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_shr1_3Lbmi2:                    ## @mcl_fp_shr1_3Lbmi2
+## %bb.0:
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rcx
+	movq	16(%rsi), %rdx
+	movq	%rdx, %rsi
+	shrq	%rsi
+	movq	%rsi, 16(%rdi)
+	shldq	$63, %rcx, %rdx
+	movq	%rdx, 8(%rdi)
+	shrdq	$1, %rcx, %rax
+	movq	%rax, (%rdi)
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_add3Lbmi2               ## -- Begin function mcl_fp_add3Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_add3Lbmi2:                      ## @mcl_fp_add3Lbmi2
+## %bb.0:
+	movq	16(%rsi), %r8
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r8
+	movq	%r8, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rax, (%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r8
+	sbbq	$0, %rdx
+	testb	$1, %dl
+	jne	LBB16_2
+## %bb.1:                               ## %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, 16(%rdi)
+LBB16_2:                                ## %carry
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_addNF3Lbmi2             ## -- Begin function mcl_fp_addNF3Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_addNF3Lbmi2:                    ## @mcl_fp_addNF3Lbmi2
+## %bb.0:
+	movq	16(%rdx), %r10
+	movq	(%rdx), %r8
+	movq	8(%rdx), %r9
+	addq	(%rsi), %r8
+	adcq	8(%rsi), %r9
+	adcq	16(%rsi), %r10
+	movq	%r8, %rsi
+	subq	(%rcx), %rsi
+	movq	%r9, %rdx
+	sbbq	8(%rcx), %rdx
+	movq	%r10, %rax
+	sbbq	16(%rcx), %rax
+	movq	%rax, %rcx
+	sarq	$63, %rcx
+	cmovsq	%r10, %rax
+	movq	%rax, 16(%rdi)
+	cmovsq	%r9, %rdx
+	movq	%rdx, 8(%rdi)
+	cmovsq	%r8, %rsi
+	movq	%rsi, (%rdi)
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_sub3Lbmi2               ## -- Begin function mcl_fp_sub3Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_sub3Lbmi2:                      ## @mcl_fp_sub3Lbmi2
+## %bb.0:
+	movq	16(%rsi), %rax
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	xorl	%r9d, %r9d
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%r9, %r9
+	testb	$1, %r9b
+	jne	LBB18_2
+## %bb.1:                               ## %nocarry
+	retq
+LBB18_2:                                ## %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subNF3Lbmi2             ## -- Begin function mcl_fp_subNF3Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_subNF3Lbmi2:                    ## @mcl_fp_subNF3Lbmi2
+## %bb.0:
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r8
+	movq	8(%rsi), %r9
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %r9
+	sbbq	16(%rdx), %r10
+	movq	%r10, %rdx
+	sarq	$63, %rdx
+	movq	%rdx, %rsi
+	shldq	$1, %r10, %rsi
+	andq	(%rcx), %rsi
+	movq	16(%rcx), %rax
+	andq	%rdx, %rax
+	andq	8(%rcx), %rdx
+	addq	%r8, %rsi
+	movq	%rsi, (%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 8(%rdi)
+	adcq	%r10, %rax
+	movq	%rax, 16(%rdi)
+	retq
+                                        ## -- End function
+	.globl	_mcl_fpDbl_add3Lbmi2            ## -- Begin function mcl_fpDbl_add3Lbmi2
+	.p2align	4, 0x90
+_mcl_fpDbl_add3Lbmi2:                   ## @mcl_fpDbl_add3Lbmi2
+## %bb.0:
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %rax
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r11
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rax
+	adcq	24(%rdx), %r8
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r10
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, (%rdi)
+	setb	%al
+	movzbl	%al, %r11d
+	movq	%r8, %rdx
+	subq	(%rcx), %rdx
+	movq	%r9, %rsi
+	sbbq	8(%rcx), %rsi
+	movq	%r10, %rax
+	sbbq	16(%rcx), %rax
+	sbbq	$0, %r11
+	testb	$1, %r11b
+	cmovneq	%r10, %rax
+	movq	%rax, 40(%rdi)
+	cmovneq	%r9, %rsi
+	movq	%rsi, 32(%rdi)
+	cmovneq	%r8, %rdx
+	movq	%rdx, 24(%rdi)
+	retq
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sub3Lbmi2            ## -- Begin function mcl_fpDbl_sub3Lbmi2
+	.p2align	4, 0x90
+_mcl_fpDbl_sub3Lbmi2:                   ## @mcl_fpDbl_sub3Lbmi2
+## %bb.0:
+	pushq	%rbx
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %rax
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rbx
+	xorl	%esi, %esi
+	subq	(%rdx), %r11
+	sbbq	8(%rdx), %rbx
+	sbbq	16(%rdx), %rax
+	sbbq	24(%rdx), %r10
+	sbbq	32(%rdx), %r9
+	sbbq	40(%rdx), %r8
+	movq	%rax, 16(%rdi)
 	movq	%rbx, 8(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r12, %rsi
+	movq	%r11, (%rdi)
+	sbbq	%rsi, %rsi
+	andl	$1, %esi
+	negq	%rsi
+	movq	16(%rcx), %rax
+	andq	%rsi, %rax
+	movq	8(%rcx), %rdx
+	andq	%rsi, %rdx
+	andq	(%rcx), %rsi
+	addq	%r10, %rsi
 	movq	%rsi, 24(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 32(%rdi)
-	adcq	%r9, %rcx
-	movq	%rcx, 40(%rdi)
-	adcq	%r8, %r13
-	movq	%r13, 48(%rdi)
-	adcq	$0, %r11
-	movq	%r11, 56(%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 32(%rdi)
+	adcq	%r8, %rax
+	movq	%rax, 40(%rdi)
 	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
 	retq
-
-	.globl	_mcl_fpDbl_mulPre7Lbmi2
+                                        ## -- End function
+	.globl	_mulPv256x64bmi2                ## -- Begin function mulPv256x64bmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_mulPre7Lbmi2:                ## @mcl_fpDbl_mulPre7Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %r14
-	movq	%rsi, %r8
-	movq	%rdi, %r13
-	movq	%r13, -48(%rsp)         ## 8-byte Spill
-	movq	(%r8), %rcx
-	movq	%rcx, -72(%rsp)         ## 8-byte Spill
-	movq	8(%r8), %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	(%r14), %rsi
-	movq	%r14, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, %rdx
-	mulxq	%rsi, %rbp, %rax
-	movq	%rcx, %rdx
-	mulxq	%rsi, %rdx, %rcx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	24(%r8), %rdi
-	movq	%rdi, -88(%rsp)         ## 8-byte Spill
-	movq	16(%r8), %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	addq	%rbp, %rcx
-	mulxq	%rsi, %rbx, %rbp
-	adcq	%rax, %rbx
-	movq	%rdi, %rdx
-	mulxq	%rsi, %r12, %rax
-	adcq	%rbp, %r12
-	movq	32(%r8), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rsi, %r9, %rbp
-	adcq	%rax, %r9
-	movq	40(%r8), %rdi
-	movq	%rdi, %rdx
-	mulxq	%rsi, %r10, %rax
-	adcq	%rbp, %r10
-	movq	48(%r8), %r15
-	movq	%r15, %rdx
-	mulxq	%rsi, %rsi, %r11
-	adcq	%rax, %rsi
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	%rax, (%r13)
+_mulPv256x64bmi2:                       ## @mulPv256x64bmi2
+## %bb.0:
+	movq	%rdi, %rax
+	mulxq	(%rsi), %rdi, %rcx
+	movq	%rdi, (%rax)
+	mulxq	8(%rsi), %rdi, %r8
+	addq	%rcx, %rdi
+	movq	%rdi, 8(%rax)
+	mulxq	16(%rsi), %rdi, %rcx
+	adcq	%r8, %rdi
+	movq	%rdi, 16(%rax)
+	mulxq	24(%rsi), %rdx, %rsi
+	adcq	%rcx, %rdx
+	movq	%rdx, 24(%rax)
+	adcq	$0, %rsi
+	movq	%rsi, 32(%rax)
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_mulUnitPre4Lbmi2        ## -- Begin function mcl_fp_mulUnitPre4Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_mulUnitPre4Lbmi2:               ## @mcl_fp_mulUnitPre4Lbmi2
+## %bb.0:
+	mulxq	24(%rsi), %r8, %r11
+	mulxq	16(%rsi), %r9, %rax
+	mulxq	8(%rsi), %r10, %rcx
+	mulxq	(%rsi), %rdx, %rsi
+	movq	%rdx, (%rdi)
+	addq	%r10, %rsi
+	movq	%rsi, 8(%rdi)
+	adcq	%r9, %rcx
+	movq	%rcx, 16(%rdi)
+	adcq	%r8, %rax
+	movq	%rax, 24(%rdi)
 	adcq	$0, %r11
-	movq	8(%r14), %r13
-	movq	-72(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%r13, %r14, %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	addq	%rcx, %r14
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	mulxq	%r13, %rcx, %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	adcq	%rbx, %rcx
-	movq	-96(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%r13, %rbx, %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	adcq	%r12, %rbx
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%r13, %rbp, %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	adcq	%r9, %rbp
-	movq	-80(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%r13, %rax, %r9
-	adcq	%r10, %rax
-	movq	%rdi, %rdx
-	mulxq	%r13, %r10, %rdi
-	adcq	%rsi, %r10
-	movq	%r15, %rdx
-	mulxq	%r13, %r13, %rdx
-	adcq	%r11, %r13
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-72(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rbx        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	%r9, %r10
-	movq	-48(%rsp), %rsi         ## 8-byte Reload
-	movq	%r14, 8(%rsi)
-	adcq	%rdi, %r13
-	adcq	%rdx, %r12
-	movq	(%r8), %rsi
-	movq	%rsi, -88(%rsp)         ## 8-byte Spill
-	movq	8(%r8), %r11
-	movq	%r11, -104(%rsp)        ## 8-byte Spill
-	movq	-64(%rsp), %rdx         ## 8-byte Reload
-	movq	16(%rdx), %rdi
-	movq	%rsi, %rdx
-	mulxq	%rdi, %r9, %rdx
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	addq	%rcx, %r9
-	movq	%r11, %rdx
-	mulxq	%rdi, %r14, %rcx
-	movq	%rcx, -16(%rsp)         ## 8-byte Spill
-	adcq	%rbx, %r14
-	movq	16(%r8), %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %rsi, %rcx
-	movq	%rcx, -24(%rsp)         ## 8-byte Spill
-	adcq	%rbp, %rsi
-	movq	24(%r8), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %rbp, %rcx
-	movq	%rcx, -32(%rsp)         ## 8-byte Spill
-	adcq	%rax, %rbp
-	movq	32(%r8), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %r11, %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	adcq	%r10, %r11
-	movq	40(%r8), %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %r15, %rax
-	adcq	%r13, %r15
-	movq	48(%r8), %r13
-	movq	%r13, %rdx
-	mulxq	%rdi, %rcx, %rdx
-	adcq	%r12, %rcx
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	-8(%rsp), %r14          ## 8-byte Folded Reload
-	adcq	-16(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-24(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-40(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	%rax, %rcx
-	adcq	%rdx, %rbx
-	movq	-48(%rsp), %rax         ## 8-byte Reload
-	movq	%r9, 16(%rax)
-	movq	-64(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rdi
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r9, %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	addq	%r14, %r9
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	mulxq	%rdi, %rax, %rdx
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	adcq	%rsi, %rax
-	movq	-96(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r14, %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	adcq	%rbp, %r14
-	movq	-80(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r10, %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	adcq	%r11, %r10
-	movq	-72(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %rbp, %rsi
-	adcq	%r15, %rbp
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r11, %r15
-	adcq	%rcx, %r11
-	movq	%r13, %rdx
-	mulxq	%rdi, %r13, %rcx
-	adcq	%rbx, %r13
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-88(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	%rsi, %r11
-	movq	-48(%rsp), %rdi         ## 8-byte Reload
-	movq	%r9, 24(%rdi)
-	adcq	%r15, %r13
-	adcq	%rcx, %r12
-	movq	(%r8), %rdx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	8(%r8), %rbx
-	movq	%rbx, -104(%rsp)        ## 8-byte Spill
-	movq	-64(%rsp), %rcx         ## 8-byte Reload
-	movq	32(%rcx), %rcx
-	mulxq	%rcx, %rsi, %rdx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	addq	%rax, %rsi
-	movq	%rbx, %rdx
-	mulxq	%rcx, %r9, %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	adcq	%r14, %r9
-	movq	16(%r8), %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %rax, %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	adcq	%r10, %rax
-	movq	24(%r8), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %r15, %rdx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	adcq	%rbp, %r15
-	movq	32(%r8), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %r10, %rbp
-	adcq	%r11, %r10
-	movq	40(%r8), %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %r11, %rbx
-	adcq	%r13, %r11
-	movq	48(%r8), %rdx
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	mulxq	%rcx, %r14, %rcx
-	adcq	%r12, %r14
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-16(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-24(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-40(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	%rbp, %r11
-	adcq	%rbx, %r14
-	adcq	%rcx, %r12
-	movq	%rsi, 32(%rdi)
-	movq	-64(%rsp), %rsi         ## 8-byte Reload
-	movq	40(%rsi), %rdi
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r13, %rcx
-	movq	%rcx, -88(%rsp)         ## 8-byte Spill
-	addq	%r9, %r13
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	mulxq	%rdi, %rcx, %rdx
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	adcq	%rax, %rcx
-	movq	-96(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %rax, %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	adcq	%r15, %rax
-	movq	-80(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %rbx, %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	adcq	%r10, %rbx
-	movq	-72(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %rbp, %r15
-	adcq	%r11, %rbp
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rdi, %r9, %r11
-	adcq	%r14, %r9
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	mulxq	%rdi, %r10, %rdx
-	adcq	%r12, %r10
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	addq	-88(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	%r15, %r9
-	movq	-48(%rsp), %r14         ## 8-byte Reload
-	movq	%r13, 40(%r14)
-	adcq	%r11, %r10
-	adcq	%rdx, %rdi
-	movq	48(%rsi), %rdx
-	mulxq	(%r8), %r11, %rsi
-	movq	%rsi, -64(%rsp)         ## 8-byte Spill
-	addq	%rcx, %r11
-	mulxq	8(%r8), %rsi, %r15
-	adcq	%rax, %rsi
-	mulxq	16(%r8), %rcx, %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	adcq	%rbx, %rcx
-	mulxq	24(%r8), %rbx, %r12
-	adcq	%rbp, %rbx
-	mulxq	32(%r8), %rbp, %r13
-	adcq	%r9, %rbp
-	mulxq	40(%r8), %rax, %r9
-	adcq	%r10, %rax
-	mulxq	48(%r8), %rdx, %r8
-	adcq	%rdi, %rdx
-	sbbq	%r10, %r10
-	andl	$1, %r10d
-	addq	-64(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	%r15, %rcx
-	movq	%r11, 48(%r14)
-	movq	%rsi, 56(%r14)
-	movq	%rcx, 64(%r14)
-	adcq	-104(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	%rbx, 72(%r14)
-	adcq	%r12, %rbp
-	movq	%rbp, 80(%r14)
-	adcq	%r13, %rax
-	movq	%rax, 88(%r14)
-	adcq	%r9, %rdx
-	movq	%rdx, 96(%r14)
-	adcq	%r8, %r10
-	movq	%r10, 104(%r14)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
+	movq	%r11, 32(%rdi)
 	retq
-
-	.globl	_mcl_fpDbl_sqrPre7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mulPre4Lbmi2         ## -- Begin function mcl_fpDbl_mulPre4Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre7Lbmi2:                ## @mcl_fpDbl_sqrPre7Lbmi2
-## BB#0:
+_mcl_fpDbl_mulPre4Lbmi2:                ## @mcl_fpDbl_mulPre4Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, -40(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %rdx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rax
-	mulxq	%rcx, %r8, %r10
-	movq	24(%rsi), %rbx
-	movq	%rbx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %rdx
-	mulxq	%rcx, %r12, %rbp
-	movq	%rbp, -48(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rdx
-	mulxq	%rcx, %rdx, %rdi
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	addq	%r12, %rdi
-	adcq	%rbp, %r8
-	movq	%rbx, %rdx
-	mulxq	%rcx, %rbp, %r9
-	adcq	%r10, %rbp
-	movq	32(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %r11, %r14
-	adcq	%r9, %r11
-	movq	40(%rsi), %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %r10, %r15
-	adcq	%r14, %r10
-	movq	48(%rsi), %r14
-	movq	%r14, %rdx
-	mulxq	%rcx, %rcx, %r13
-	adcq	%r15, %rcx
-	movq	-40(%rsp), %rdx         ## 8-byte Reload
-	movq	-80(%rsp), %rbx         ## 8-byte Reload
-	movq	%rbx, (%rdx)
-	adcq	$0, %r13
-	addq	%r12, %rdi
-	movq	%rax, %rdx
-	mulxq	%rax, %r12, %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	adcq	%r8, %r12
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r8, %rdx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	adcq	%rbp, %r8
-	movq	-96(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r9, %rbp
-	adcq	%r11, %r9
-	movq	-72(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r15, %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	adcq	%r10, %r15
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r11, %rbx
-	adcq	%rcx, %r11
-	movq	%r14, %rdx
-	mulxq	%rax, %r14, %rax
-	adcq	%r13, %r14
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	addq	-48(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	%rbp, %r15
-	movq	-40(%rsp), %rcx         ## 8-byte Reload
-	movq	%rdi, 8(%rcx)
-	adcq	-96(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	%rbx, %r14
-	adcq	%rax, %r13
-	movq	(%rsi), %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rcx
-	movq	%rcx, -88(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %rbx
-	mulxq	%rbx, %rax, %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	addq	%r12, %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rdx
-	mulxq	%rbx, %r10, %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	adcq	%r8, %r10
-	movq	%rbx, %rdx
-	mulxq	%rbx, %r12, %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	adcq	%r9, %r12
-	movq	24(%rsi), %rax
-	movq	%rax, %rdx
-	mulxq	%rbx, %r8, %rdi
-	movq	%rdi, -56(%rsp)         ## 8-byte Spill
-	adcq	%r8, %r15
-	movq	32(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rbx, %rcx, %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	adcq	%r11, %rcx
-	movq	40(%rsi), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rbx, %rbp, %r11
-	adcq	%r14, %rbp
-	movq	48(%rsi), %r14
-	movq	%r14, %rdx
-	mulxq	%rbx, %r9, %rdx
-	adcq	%r13, %r9
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	-64(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-16(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-24(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	%rdi, %rcx
-	adcq	-32(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	%r11, %r9
-	adcq	%rdx, %rbx
-	movq	-96(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %rdi, %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	addq	%r10, %rdi
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r11, %rdx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	adcq	%r12, %r11
-	adcq	%r8, %r15
-	movq	%rax, %rdx
-	mulxq	%rax, %r8, %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	adcq	%rcx, %r8
-	movq	-72(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r13, %rcx
-	movq	%rcx, -72(%rsp)         ## 8-byte Spill
-	adcq	%rbp, %r13
-	movq	-80(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r12, %rbp
-	adcq	%r9, %r12
+	movq	%rdi, %r9
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rbp
+	movq	(%rdx), %rax
+	movq	%rdx, %rbx
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
 	movq	%r14, %rdx
-	mulxq	%rax, %rcx, %rax
-	adcq	%rbx, %rcx
-	sbbq	%r10, %r10
-	andl	$1, %r10d
-	addq	-96(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-56(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-64(%rsp), %r13         ## 8-byte Folded Reload
-	movq	-40(%rsp), %rdx         ## 8-byte Reload
-	movq	-48(%rsp), %rbx         ## 8-byte Reload
-	movq	%rbx, 16(%rdx)
-	movq	%rdi, 24(%rdx)
-	adcq	-72(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	%rbp, %rcx
-	adcq	%rax, %r10
-	movq	(%rsi), %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rdi
-	movq	%rdi, -88(%rsp)         ## 8-byte Spill
-	movq	32(%rsi), %rbx
-	mulxq	%rbx, %rax, %rdx
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	addq	%r11, %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
+	mulxq	%rax, %rcx, %r10
+	movq	16(%rsi), %rdi
+	movq	24(%rsi), %r11
+	movq	%rcx, (%r9)
+	movq	%r11, %rdx
+	mulxq	%rax, %r12, %r15
+	movq	%rbp, %rdx
+	mulxq	%rax, %rsi, %r8
+	addq	%r10, %rsi
 	movq	%rdi, %rdx
-	mulxq	%rbx, %r9, %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	adcq	%r15, %r9
-	movq	16(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rbx, %r15, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	adcq	%r8, %r15
-	movq	24(%rsi), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rbx, %r8, %rbp
-	adcq	%r13, %r8
-	movq	%rbx, %rdx
-	mulxq	%rbx, %r13, %r14
-	adcq	%r12, %r13
-	movq	40(%rsi), %rax
-	movq	%rax, %rdx
-	mulxq	%rbx, %rdx, %rdi
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	%rdi, -56(%rsp)         ## 8-byte Spill
-	adcq	%rdx, %rcx
-	movq	48(%rsi), %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	mulxq	%rbx, %r11, %rdx
-	adcq	%r10, %r11
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-24(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-8(%rsp), %r8           ## 8-byte Folded Reload
-	adcq	%rbp, %r13
-	adcq	%r14, %rcx
-	adcq	%rdi, %r11
-	adcq	%rdx, %r12
-	movq	-96(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r14, %rdi
-	addq	%r9, %r14
-	movq	-88(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %rbx, %rdx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	adcq	%r15, %rbx
-	movq	-72(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %rbp, %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	adcq	%r8, %rbp
-	movq	-80(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %r10, %r15
-	adcq	%r13, %r10
-	adcq	-16(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rax, %rdx
-	mulxq	%rax, %r9, %r13
-	adcq	%r11, %r9
-	movq	-64(%rsp), %rdx         ## 8-byte Reload
-	mulxq	%rax, %rax, %r11
-	adcq	%r12, %rax
-	sbbq	%r8, %r8
-	andl	$1, %r8d
-	addq	%rdi, %rbx
-	adcq	-88(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r10         ## 8-byte Folded Reload
+	movq	%rdi, %r10
+	mulxq	%rax, %rax, %rcx
+	adcq	%r8, %rax
+	adcq	%r12, %rcx
+	adcq	$0, %r15
+	movq	8(%rbx), %rdx
+	mulxq	%r14, %r13, %r8
+	movq	%r14, -8(%rsp)                  ## 8-byte Spill
+	addq	%rsi, %r13
+	mulxq	%rbp, %rbx, %r12
+	adcq	%rax, %rbx
+	mulxq	%rdi, %rsi, %rax
+	adcq	%rcx, %rsi
+	mulxq	%r11, %rcx, %rdx
 	adcq	%r15, %rcx
-	movq	-40(%rsp), %rdi         ## 8-byte Reload
-	movq	-48(%rsp), %rdx         ## 8-byte Reload
-	movq	%rdx, 32(%rdi)
-	movq	%r14, 40(%rdi)
-	adcq	-56(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	%r13, %rax
-	adcq	%r11, %r8
-	movq	48(%rsi), %rdx
-	mulxq	(%rsi), %r12, %r11
-	addq	%rbx, %r12
-	mulxq	8(%rsi), %rbx, %r14
-	adcq	%rbp, %rbx
-	mulxq	16(%rsi), %rbp, %r15
-	adcq	%r10, %rbp
-	mulxq	24(%rsi), %rdi, %r10
-	adcq	%rcx, %rdi
-	mulxq	32(%rsi), %rcx, %r13
-	adcq	%r9, %rcx
-	mulxq	40(%rsi), %rsi, %r9
-	adcq	%rax, %rsi
-	mulxq	%rdx, %rdx, %rax
+	setb	%r15b
+	addq	%r8, %rbx
+	adcq	%r12, %rsi
+	movq	%r13, 8(%r9)
+	movzbl	%r15b, %r8d
+	adcq	%rax, %rcx
+	adcq	%rdx, %r8
+	movq	-16(%rsp), %rax                 ## 8-byte Reload
+	movq	16(%rax), %rdx
+	mulxq	%rbp, %rdi, %r15
+	mulxq	%r14, %rax, %r12
+	addq	%rdi, %r12
+	mulxq	%r10, %r13, %r14
+	adcq	%r15, %r13
+	mulxq	%r11, %rdi, %r15
+	adcq	%r14, %rdi
+	adcq	$0, %r15
+	addq	%rbx, %rax
+	adcq	%rsi, %r12
+	movq	%rax, 16(%r9)
+	adcq	%rcx, %r13
+	adcq	%r8, %rdi
+	adcq	$0, %r15
+	movq	-16(%rsp), %rax                 ## 8-byte Reload
+	movq	24(%rax), %rdx
+	mulxq	%rbp, %rcx, %r8
+	mulxq	-8(%rsp), %rsi, %rbp            ## 8-byte Folded Reload
+	addq	%rcx, %rbp
+	mulxq	%r11, %rcx, %rbx
+	mulxq	%r10, %rdx, %rax
 	adcq	%r8, %rdx
-	sbbq	%r8, %r8
-	andl	$1, %r8d
-	addq	%r11, %rbx
+	adcq	%rcx, %rax
+	adcq	$0, %rbx
+	addq	%r12, %rsi
+	movq	%rsi, 24(%r9)
+	adcq	%r13, %rbp
+	movq	%rbp, 32(%r9)
+	adcq	%rdi, %rdx
+	movq	%rdx, 40(%r9)
+	adcq	%r15, %rax
+	movq	%rax, 48(%r9)
+	adcq	$0, %rbx
+	movq	%rbx, 56(%r9)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sqrPre4Lbmi2         ## -- Begin function mcl_fpDbl_sqrPre4Lbmi2
+	.p2align	4, 0x90
+_mcl_fpDbl_sqrPre4Lbmi2:                ## @mcl_fpDbl_sqrPre4Lbmi2
+## %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	24(%rsi), %r8
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rcx
+	movq	%r8, %rdx
+	movq	%r8, -64(%rsp)                  ## 8-byte Spill
+	mulxq	%rcx, %r14, %r9
+	movq	%r14, -8(%rsp)                  ## 8-byte Spill
+	movq	16(%rsi), %r12
+	movq	%r12, %rdx
+	mulxq	%rcx, %rbp, %rsi
+	movq	%rbp, -40(%rsp)                 ## 8-byte Spill
+	movq	%rsi, -24(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rdx
+	mulxq	%rcx, %r10, %r11
+	mulxq	%rax, %r15, %rbx
+	movq	%r15, -56(%rsp)                 ## 8-byte Spill
+	addq	%rbx, %r10
+	adcq	%rbp, %r11
+	movq	%rsi, %rbp
 	adcq	%r14, %rbp
-	movq	-40(%rsp), %r11         ## 8-byte Reload
-	movq	%r12, 48(%r11)
-	movq	%rbx, 56(%r11)
-	movq	%rbp, 64(%r11)
-	adcq	%r15, %rdi
-	movq	%rdi, 72(%r11)
-	adcq	%r10, %rcx
-	movq	%rcx, 80(%r11)
-	adcq	%r13, %rsi
-	movq	%rsi, 88(%r11)
-	adcq	%r9, %rdx
-	movq	%rdx, 96(%r11)
+	movq	%r9, %r14
+	adcq	$0, %r14
+	movq	%rax, %rdx
+	mulxq	%rax, %rcx, %rsi
+	movq	%rcx, -48(%rsp)                 ## 8-byte Spill
+	addq	%r15, %rsi
+	movq	%r12, %rdx
+	mulxq	%rax, %rdx, %rcx
+	movq	%rdx, -32(%rsp)                 ## 8-byte Spill
+	adcq	%rdx, %rbx
+	movq	%r8, %rdx
+	mulxq	%rax, %rax, %r15
+	movq	%rax, -16(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %r8
 	adcq	%rax, %r8
-	movq	%r8, 104(%r11)
+	movq	%r15, %r13
+	adcq	$0, %r13
+	addq	-56(%rsp), %rsi                 ## 8-byte Folded Reload
+	adcq	%r10, %rbx
+	adcq	%r11, %r8
+	adcq	%rbp, %r13
+	adcq	$0, %r14
+	addq	-40(%rsp), %rcx                 ## 8-byte Folded Reload
+	movq	%r12, %rdx
+	mulxq	%r12, %rbp, %r11
+	adcq	-24(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	-48(%rsp), %rax                 ## 8-byte Reload
+	movq	%rax, (%rdi)
+	movq	-64(%rsp), %rdx                 ## 8-byte Reload
+	mulxq	%r12, %rdx, %r10
+	adcq	%rdx, %r11
+	movq	%r10, %rax
+	adcq	$0, %rax
+	addq	-32(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%rsi, 8(%rdi)
+	adcq	%r8, %rcx
+	movq	%rbx, 16(%rdi)
+	adcq	%r13, %rbp
+	adcq	%r14, %r11
+	adcq	$0, %rax
+	addq	-8(%rsp), %r15                  ## 8-byte Folded Reload
+	adcq	%rdx, %r9
+	movq	-64(%rsp), %rdx                 ## 8-byte Reload
+	mulxq	%rdx, %rdx, %rsi
+	adcq	%r10, %rdx
+	adcq	$0, %rsi
+	addq	-16(%rsp), %rcx                 ## 8-byte Folded Reload
+	movq	%rcx, 24(%rdi)
+	adcq	%rbp, %r15
+	movq	%r15, 32(%rdi)
+	adcq	%r11, %r9
+	movq	%r9, 40(%rdi)
+	adcq	%rax, %rdx
+	movq	%rdx, 48(%rdi)
+	adcq	$0, %rsi
+	movq	%rsi, 56(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -6760,529 +1314,185 @@ _mcl_fpDbl_sqrPre7Lbmi2:                ## @mcl_fpDbl_sqrPre7Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_mont7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_mont4Lbmi2              ## -- Begin function mcl_fp_mont4Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_mont7Lbmi2:                     ## @mcl_fp_mont7Lbmi2
-## BB#0:
+_mcl_fp_mont4Lbmi2:                     ## @mcl_fp_mont4Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$56, %rsp
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rdi, 48(%rsp)          ## 8-byte Spill
-	movq	48(%rsi), %rdi
-	movq	%rdi, -64(%rsp)         ## 8-byte Spill
+	movq	%rdx, -32(%rsp)                 ## 8-byte Spill
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	24(%rsi), %rdi
+	movq	%rdi, -48(%rsp)                 ## 8-byte Spill
 	movq	(%rdx), %rax
 	movq	%rdi, %rdx
-	mulxq	%rax, %rdx, %r13
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	40(%rsi), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rdx, %r8
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	32(%rsi), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r10, %rdi
-	movq	24(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r14, %rbp
+	mulxq	%rax, %r14, %r11
 	movq	16(%rsi), %rdx
-	movq	%rdx, 32(%rsp)          ## 8-byte Spill
-	mulxq	%rax, %r12, %r15
-	movq	(%rsi), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rbx, %r10
+	movq	(%rsi), %r12
 	movq	8(%rsi), %rdx
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	mulxq	%rax, %rsi, %r11
-	movq	%rbx, %rdx
-	mulxq	%rax, %rdx, %r9
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	addq	%rsi, %r9
-	adcq	%r12, %r11
-	adcq	%r14, %r15
-	adcq	%r10, %rbp
-	movq	%rbp, -112(%rsp)        ## 8-byte Spill
-	adcq	-48(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, -104(%rsp)        ## 8-byte Spill
-	adcq	-40(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, -128(%rsp)         ## 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, -120(%rsp)        ## 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	imulq	%rax, %rdx
-	movq	32(%rcx), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rbx, %r13
-	movq	16(%rcx), %rsi
-	movq	%rsi, -48(%rsp)         ## 8-byte Spill
-	mulxq	%rsi, %r14, %rbp
-	movq	8(%rcx), %rsi
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	mulxq	%rsi, %rsi, %rax
-	movq	(%rcx), %rdi
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	mulxq	%rdi, %r8, %r12
-	addq	%rsi, %r12
-	adcq	%r14, %rax
-	movq	%rax, %rdi
-	movq	24(%rcx), %rsi
-	movq	%rsi, -8(%rsp)          ## 8-byte Spill
-	mulxq	%rsi, %r10, %r14
-	adcq	%rbp, %r10
-	adcq	%rbx, %r14
-	movq	40(%rcx), %rsi
-	movq	%rsi, -16(%rsp)         ## 8-byte Spill
-	mulxq	%rsi, %rbp, %rsi
-	adcq	%r13, %rbp
-	movq	48(%rcx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rax, %rbx
-	adcq	%rsi, %rax
+	movq	%rdx, -40(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rdi, %r8
+	movq	%r12, %rdx
+	movq	%r12, -16(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r15, %r13
+	addq	%rdi, %r13
+	adcq	%rbx, %r8
+	adcq	%r14, %r10
+	adcq	$0, %r11
+	movq	-8(%rcx), %rdx
+	movq	%rdx, -64(%rsp)                 ## 8-byte Spill
+	imulq	%r15, %rdx
+	movq	24(%rcx), %rax
+	movq	%rax, -24(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r14, %rbx
+	movq	16(%rcx), %rax
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r9, %rdi
+	movq	(%rcx), %rbp
+	movq	%rbp, -72(%rsp)                 ## 8-byte Spill
+	movq	8(%rcx), %rax
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rsi, %rcx
+	mulxq	%rbp, %rdx, %rax
+	addq	%rsi, %rax
+	adcq	%r9, %rcx
+	adcq	%r14, %rdi
 	adcq	$0, %rbx
-	addq	-96(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	%r9, %r12
-	adcq	%r11, %rdi
-	movq	%rdi, -96(%rsp)         ## 8-byte Spill
-	adcq	%r15, %r10
-	adcq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rbx        ## 8-byte Folded Reload
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	-56(%rsp), %rcx         ## 8-byte Reload
-	movq	8(%rcx), %rdx
-	mulxq	-64(%rsp), %rdi, %rcx   ## 8-byte Folded Reload
-	movq	%rdi, -104(%rsp)        ## 8-byte Spill
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	mulxq	-72(%rsp), %rdi, %rcx   ## 8-byte Folded Reload
-	movq	%rdi, -88(%rsp)         ## 8-byte Spill
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	mulxq	16(%rsp), %r9, %r8      ## 8-byte Folded Reload
-	mulxq	24(%rsp), %rdi, %r11    ## 8-byte Folded Reload
-	movq	%rdi, -112(%rsp)        ## 8-byte Spill
-	addq	%r9, %r11
-	mulxq	32(%rsp), %rcx, %r9     ## 8-byte Folded Reload
+	addq	%r15, %rdx
+	adcq	%r13, %rax
 	adcq	%r8, %rcx
-	movq	%rcx, %rdi
-	mulxq	-32(%rsp), %r13, %rcx   ## 8-byte Folded Reload
-	adcq	%r9, %r13
-	mulxq	-80(%rsp), %r8, %r15    ## 8-byte Folded Reload
-	adcq	%rcx, %r8
-	adcq	-88(%rsp), %r15         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rcx        ## 8-byte Reload
-	adcq	$0, %rcx
-	movq	-112(%rsp), %r9         ## 8-byte Reload
-	addq	%r12, %r9
-	movq	%r9, -112(%rsp)         ## 8-byte Spill
-	movq	%r11, %r12
-	adcq	-96(%rsp), %r12         ## 8-byte Folded Reload
 	adcq	%r10, %rdi
-	movq	%rdi, -88(%rsp)         ## 8-byte Spill
-	adcq	%r14, %r13
-	adcq	%rbp, %r8
-	adcq	%rax, %r15
-	adcq	%rbx, %rdx
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	%rsi, %rcx
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%r9, %rdx
-	imulq	40(%rsp), %rdx          ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %r10, %rax   ## 8-byte Folded Reload
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulxq	-8(%rsp), %rcx, %rax    ## 8-byte Folded Reload
-	mulxq	8(%rsp), %rdi, %rbx     ## 8-byte Folded Reload
-	mulxq	(%rsp), %r14, %r9       ## 8-byte Folded Reload
-	addq	%rdi, %r9
-	mulxq	-48(%rsp), %rbp, %r11   ## 8-byte Folded Reload
+	adcq	%r11, %rbx
+	movq	-32(%rsp), %r13                 ## 8-byte Reload
+	movq	8(%r13), %rdx
+	mulxq	-48(%rsp), %r11, %r10           ## 8-byte Folded Reload
+	mulxq	-88(%rsp), %r14, %rbp           ## 8-byte Folded Reload
+	mulxq	-40(%rsp), %r15, %r8            ## 8-byte Folded Reload
+	mulxq	%r12, %r9, %rsi
+	setb	%dl
+	addq	%r15, %rsi
+	adcq	%r14, %r8
+	adcq	%r11, %rbp
+	adcq	$0, %r10
+	addq	%rax, %r9
+	adcq	%rcx, %rsi
+	adcq	%rdi, %r8
 	adcq	%rbx, %rbp
-	adcq	%rcx, %r11
-	mulxq	-40(%rsp), %rbx, %rsi   ## 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-16(%rsp), %rax, %rcx   ## 8-byte Folded Reload
-	adcq	%rsi, %rax
-	adcq	%r10, %rcx
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	%r12, %r9
-	adcq	-88(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	%r13, %r11
-	adcq	%r8, %rbx
-	adcq	%r15, %rax
-	adcq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	adcq	$0, -96(%rsp)           ## 8-byte Folded Spill
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	movq	16(%rdx), %rdx
-	mulxq	-64(%rsp), %rdi, %rsi   ## 8-byte Folded Reload
-	movq	%rdi, -112(%rsp)        ## 8-byte Spill
-	movq	%rsi, -120(%rsp)        ## 8-byte Spill
-	mulxq	-72(%rsp), %rdi, %rsi   ## 8-byte Folded Reload
-	movq	%rdi, -88(%rsp)         ## 8-byte Spill
-	movq	%rsi, -128(%rsp)        ## 8-byte Spill
-	mulxq	32(%rsp), %rdi, %r10    ## 8-byte Folded Reload
-	mulxq	16(%rsp), %rsi, %r13    ## 8-byte Folded Reload
-	mulxq	24(%rsp), %r8, %r15     ## 8-byte Folded Reload
-	addq	%rsi, %r15
-	adcq	%rdi, %r13
-	mulxq	-32(%rsp), %r12, %rsi   ## 8-byte Folded Reload
-	adcq	%r10, %r12
-	mulxq	-80(%rsp), %r10, %r14   ## 8-byte Folded Reload
-	adcq	%rsi, %r10
-	adcq	-88(%rsp), %r14         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rsi        ## 8-byte Reload
-	adcq	-112(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r9, %r8
-	movq	%r8, -112(%rsp)         ## 8-byte Spill
-	adcq	%rbp, %r15
-	adcq	%r11, %r13
-	adcq	%rbx, %r12
+	movzbl	%dl, %eax
 	adcq	%rax, %r10
-	adcq	%rcx, %r14
-	adcq	-104(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	%rsi, -128(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, %rbx
-	movq	%r8, %rdx
-	imulq	40(%rsp), %rdx          ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	movq	%rcx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulxq	-8(%rsp), %rcx, %rax    ## 8-byte Folded Reload
-	mulxq	8(%rsp), %rbp, %rsi     ## 8-byte Folded Reload
-	mulxq	(%rsp), %r11, %r8       ## 8-byte Folded Reload
-	addq	%rbp, %r8
-	mulxq	-48(%rsp), %rbp, %r9    ## 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	adcq	%rcx, %r9
-	mulxq	-40(%rsp), %rsi, %rdi   ## 8-byte Folded Reload
-	adcq	%rax, %rsi
-	mulxq	-16(%rsp), %rax, %rcx   ## 8-byte Folded Reload
-	adcq	%rdi, %rax
-	adcq	-96(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	-112(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	%r15, %r8
-	adcq	%r13, %rbp
-	adcq	%r12, %r9
-	adcq	%r10, %rsi
-	adcq	%r14, %rax
-	adcq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
+	setb	-89(%rsp)                       ## 1-byte Folded Spill
+	movq	-64(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%r9, %rdx
+	movq	-24(%rsp), %r12                 ## 8-byte Reload
+	mulxq	%r12, %r14, %rbx
+	mulxq	-80(%rsp), %r15, %rcx           ## 8-byte Folded Reload
+	mulxq	-56(%rsp), %r11, %rdi           ## 8-byte Folded Reload
+	mulxq	-72(%rsp), %rdx, %rax           ## 8-byte Folded Reload
+	addq	%r11, %rax
+	adcq	%r15, %rdi
+	adcq	%r14, %rcx
 	adcq	$0, %rbx
-	movq	%rbx, -128(%rsp)        ## 8-byte Spill
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	movq	24(%rdx), %rdx
-	mulxq	-64(%rsp), %rbx, %rdi   ## 8-byte Folded Reload
-	movq	%rbx, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, -120(%rsp)        ## 8-byte Spill
-	mulxq	-72(%rsp), %rdi, %r13   ## 8-byte Folded Reload
-	movq	%rdi, -88(%rsp)         ## 8-byte Spill
-	mulxq	32(%rsp), %r10, %r11    ## 8-byte Folded Reload
-	mulxq	16(%rsp), %rdi, %r15    ## 8-byte Folded Reload
-	mulxq	24(%rsp), %rbx, %r12    ## 8-byte Folded Reload
-	movq	%rbx, -112(%rsp)        ## 8-byte Spill
-	addq	%rdi, %r12
-	adcq	%r10, %r15
-	mulxq	-32(%rsp), %rbx, %rdi   ## 8-byte Folded Reload
-	adcq	%r11, %rbx
-	mulxq	-80(%rsp), %r10, %r14   ## 8-byte Folded Reload
-	adcq	%rdi, %r10
-	adcq	-88(%rsp), %r14         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r13         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	movq	-112(%rsp), %rdi        ## 8-byte Reload
-	addq	%r8, %rdi
-	movq	%rdi, -112(%rsp)        ## 8-byte Spill
-	adcq	%rbp, %r12
-	adcq	%r9, %r15
-	adcq	%rsi, %rbx
-	adcq	%rax, %r10
-	adcq	%rcx, %r14
-	adcq	-104(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rdx
-	imulq	40(%rsp), %rdx          ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	movq	%rcx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	mulxq	-8(%rsp), %rcx, %rax    ## 8-byte Folded Reload
-	mulxq	8(%rsp), %rbp, %rsi     ## 8-byte Folded Reload
-	mulxq	(%rsp), %r11, %r8       ## 8-byte Folded Reload
-	addq	%rbp, %r8
-	mulxq	-48(%rsp), %rbp, %r9    ## 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	adcq	%rcx, %r9
-	mulxq	-40(%rsp), %rsi, %rdi   ## 8-byte Folded Reload
-	adcq	%rax, %rsi
-	mulxq	-16(%rsp), %rax, %rcx   ## 8-byte Folded Reload
-	adcq	%rdi, %rax
-	adcq	-96(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	-112(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	%r12, %r8
-	adcq	%r15, %rbp
-	adcq	%rbx, %r9
-	adcq	%r10, %rsi
-	adcq	%r14, %rax
-	adcq	%r13, %rcx
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, -104(%rsp)          ## 8-byte Folded Spill
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	movq	32(%rdx), %rdx
-	mulxq	-64(%rsp), %rbx, %rdi   ## 8-byte Folded Reload
-	movq	%rbx, -112(%rsp)        ## 8-byte Spill
-	movq	%rdi, -120(%rsp)        ## 8-byte Spill
-	mulxq	-72(%rsp), %rdi, %r11   ## 8-byte Folded Reload
-	movq	%rdi, -96(%rsp)         ## 8-byte Spill
-	mulxq	32(%rsp), %r10, %r13    ## 8-byte Folded Reload
-	mulxq	16(%rsp), %rdi, %r15    ## 8-byte Folded Reload
-	mulxq	24(%rsp), %rbx, %r12    ## 8-byte Folded Reload
-	addq	%rdi, %r12
-	adcq	%r10, %r15
-	mulxq	-32(%rsp), %r10, %rdi   ## 8-byte Folded Reload
-	adcq	%r13, %r10
-	mulxq	-80(%rsp), %r13, %r14   ## 8-byte Folded Reload
-	adcq	%rdi, %r13
-	adcq	-96(%rsp), %r14         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r11        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r8, %rbx
-	movq	%rbx, -96(%rsp)         ## 8-byte Spill
-	adcq	%rbp, %r12
-	adcq	%r9, %r15
-	adcq	%rsi, %r10
-	adcq	%rax, %r13
-	adcq	%rcx, %r14
-	adcq	-128(%rsp), %r11        ## 8-byte Folded Reload
-	movq	%r11, -88(%rsp)         ## 8-byte Spill
-	adcq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rbx, %rdx
-	imulq	40(%rsp), %rdx          ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulxq	-8(%rsp), %rcx, %rax    ## 8-byte Folded Reload
-	mulxq	8(%rsp), %rbp, %rsi     ## 8-byte Folded Reload
-	mulxq	(%rsp), %r9, %r11       ## 8-byte Folded Reload
-	addq	%rbp, %r11
-	mulxq	-48(%rsp), %rbp, %r8    ## 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	adcq	%rcx, %r8
-	mulxq	-40(%rsp), %rsi, %rdi   ## 8-byte Folded Reload
-	adcq	%rax, %rsi
-	mulxq	-16(%rsp), %rax, %rcx   ## 8-byte Folded Reload
-	adcq	%rdi, %rax
-	adcq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	-96(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	%r12, %r11
-	adcq	%r15, %rbp
-	adcq	%r10, %r8
-	adcq	%r13, %rsi
-	adcq	%r14, %rax
-	adcq	-88(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	adcq	$0, -112(%rsp)          ## 8-byte Folded Spill
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	movq	40(%rdx), %rdx
-	mulxq	-64(%rsp), %rbx, %rdi   ## 8-byte Folded Reload
-	movq	%rbx, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, -120(%rsp)        ## 8-byte Spill
-	mulxq	-72(%rsp), %rbx, %rdi   ## 8-byte Folded Reload
-	movq	%rbx, -88(%rsp)         ## 8-byte Spill
-	movq	%rdi, -128(%rsp)        ## 8-byte Spill
-	mulxq	32(%rsp), %rbx, %r10    ## 8-byte Folded Reload
-	mulxq	16(%rsp), %rdi, %r13    ## 8-byte Folded Reload
-	mulxq	24(%rsp), %r9, %r12     ## 8-byte Folded Reload
-	addq	%rdi, %r12
-	adcq	%rbx, %r13
-	mulxq	-32(%rsp), %r15, %rdi   ## 8-byte Folded Reload
-	adcq	%r10, %r15
-	mulxq	-80(%rsp), %r10, %r14   ## 8-byte Folded Reload
-	adcq	%rdi, %r10
-	adcq	-88(%rsp), %r14         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdi        ## 8-byte Reload
-	adcq	-96(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r11, %r9
-	movq	%r9, -96(%rsp)          ## 8-byte Spill
-	adcq	%rbp, %r12
-	adcq	%r8, %r13
-	adcq	%rsi, %r15
-	adcq	%rax, %r10
-	adcq	%rcx, %r14
-	adcq	-104(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	%rdi, -128(%rsp)        ## 8-byte Spill
-	adcq	-112(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%r9, %rdx
-	imulq	40(%rsp), %rdx          ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	movq	%rcx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulxq	-8(%rsp), %rcx, %rax    ## 8-byte Folded Reload
-	mulxq	8(%rsp), %rdi, %rsi     ## 8-byte Folded Reload
-	mulxq	(%rsp), %r11, %rbx      ## 8-byte Folded Reload
-	addq	%rdi, %rbx
-	mulxq	-48(%rsp), %r8, %r9     ## 8-byte Folded Reload
-	adcq	%rsi, %r8
-	adcq	%rcx, %r9
-	mulxq	-40(%rsp), %rdi, %rbp   ## 8-byte Folded Reload
-	adcq	%rax, %rdi
-	mulxq	-16(%rsp), %rcx, %rsi   ## 8-byte Folded Reload
+	addq	%r9, %rdx
+	adcq	%rsi, %rax
+	adcq	%r8, %rdi
 	adcq	%rbp, %rcx
-	adcq	-88(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	-96(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	%r12, %rbx
-	adcq	%r13, %r8
-	adcq	%r15, %r9
-	adcq	%r10, %rdi
+	adcq	%r10, %rbx
+	movzbl	-89(%rsp), %r11d                ## 1-byte Folded Reload
+	adcq	$0, %r11
+	movq	16(%r13), %rdx
+	mulxq	-48(%rsp), %r14, %r8            ## 8-byte Folded Reload
+	mulxq	-88(%rsp), %r15, %r10           ## 8-byte Folded Reload
+	mulxq	-40(%rsp), %r13, %rbp           ## 8-byte Folded Reload
+	mulxq	-16(%rsp), %r9, %rsi            ## 8-byte Folded Reload
+	addq	%r13, %rsi
+	adcq	%r15, %rbp
+	adcq	%r14, %r10
+	adcq	$0, %r8
+	addq	%rax, %r9
+	adcq	%rdi, %rsi
+	adcq	%rcx, %rbp
+	adcq	%rbx, %r10
+	adcq	%r11, %r8
+	setb	%r11b
+	movq	-64(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%r9, %rdx
+	mulxq	%r12, %r14, %rbx
+	mulxq	-80(%rsp), %r15, %rcx           ## 8-byte Folded Reload
+	movq	-56(%rsp), %r12                 ## 8-byte Reload
+	mulxq	%r12, %r13, %rdi
+	mulxq	-72(%rsp), %rdx, %rax           ## 8-byte Folded Reload
+	addq	%r13, %rax
+	adcq	%r15, %rdi
 	adcq	%r14, %rcx
-	adcq	-128(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	-112(%rsp), %r12        ## 8-byte Reload
-	adcq	$0, %r12
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	48(%rax), %rdx
-	mulxq	-64(%rsp), %rbp, %rax   ## 8-byte Folded Reload
-	movq	%rbp, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	mulxq	-72(%rsp), %rbp, %rax   ## 8-byte Folded Reload
-	movq	%rbp, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	mulxq	-80(%rsp), %rbp, %rax   ## 8-byte Folded Reload
-	movq	%rbp, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulxq	-32(%rsp), %r13, %rbp   ## 8-byte Folded Reload
-	mulxq	32(%rsp), %r14, %r15    ## 8-byte Folded Reload
-	mulxq	16(%rsp), %rax, %r11    ## 8-byte Folded Reload
-	mulxq	24(%rsp), %rdx, %r10    ## 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
+	adcq	$0, %rbx
+	addq	%r9, %rdx
+	adcq	%rsi, %rax
+	adcq	%rbp, %rdi
+	adcq	%r10, %rcx
+	adcq	%r8, %rbx
+	movzbl	%r11b, %r11d
+	adcq	$0, %r11
+	movq	-32(%rsp), %rdx                 ## 8-byte Reload
+	movq	24(%rdx), %rdx
+	mulxq	-48(%rsp), %r14, %r8            ## 8-byte Folded Reload
+	mulxq	-88(%rsp), %r15, %r9            ## 8-byte Folded Reload
+	mulxq	-40(%rsp), %r13, %rbp           ## 8-byte Folded Reload
+	mulxq	-16(%rsp), %r10, %rsi           ## 8-byte Folded Reload
+	addq	%r13, %rsi
+	adcq	%r15, %rbp
+	adcq	%r14, %r9
+	adcq	$0, %r8
 	addq	%rax, %r10
-	adcq	%r14, %r11
-	adcq	%r13, %r15
-	adcq	-112(%rsp), %rbp        ## 8-byte Folded Reload
-	movq	-72(%rsp), %r14         ## 8-byte Reload
-	adcq	-128(%rsp), %r14        ## 8-byte Folded Reload
-	movq	-64(%rsp), %rdx         ## 8-byte Reload
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	movq	-80(%rsp), %r13         ## 8-byte Reload
-	addq	%rbx, %r13
-	movq	%r13, -80(%rsp)         ## 8-byte Spill
-	adcq	%r8, %r10
-	adcq	%r9, %r11
-	adcq	%rdi, %r15
+	adcq	%rdi, %rsi
 	adcq	%rcx, %rbp
-	movq	%rbp, -32(%rsp)         ## 8-byte Spill
-	adcq	%rsi, %r14
-	movq	%r14, -72(%rsp)         ## 8-byte Spill
-	adcq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	adcq	%r12, %rax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	sbbq	%rdi, %rdi
-	movq	40(%rsp), %rdx          ## 8-byte Reload
-	imulq	%r13, %rdx
-	mulxq	-8(%rsp), %rbp, %rsi    ## 8-byte Folded Reload
-	mulxq	8(%rsp), %rcx, %rbx     ## 8-byte Folded Reload
-	mulxq	(%rsp), %r13, %rax      ## 8-byte Folded Reload
-	addq	%rcx, %rax
-	mulxq	-48(%rsp), %rcx, %r9    ## 8-byte Folded Reload
-	adcq	%rbx, %rcx
-	adcq	%rbp, %r9
-	mulxq	-40(%rsp), %rbp, %rbx   ## 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	mulxq	-16(%rsp), %rsi, %r14   ## 8-byte Folded Reload
-	adcq	%rbx, %rsi
-	mulxq	-24(%rsp), %rdx, %rbx   ## 8-byte Folded Reload
-	adcq	%r14, %rdx
-	adcq	$0, %rbx
-	andl	$1, %edi
-	addq	-80(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	%r10, %rax
-	adcq	%r11, %rcx
-	adcq	%r15, %r9
-	adcq	-32(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-64(%rsp), %rdx         ## 8-byte Folded Reload
-	adcq	-56(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	$0, %rdi
-	movq	%rax, %r8
-	subq	(%rsp), %r8             ## 8-byte Folded Reload
-	movq	%rcx, %r10
-	sbbq	8(%rsp), %r10           ## 8-byte Folded Reload
-	movq	%r9, %r11
-	sbbq	-48(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%rbp, %r14
-	sbbq	-8(%rsp), %r14          ## 8-byte Folded Reload
-	movq	%rsi, %r15
-	sbbq	-40(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	sbbq	-16(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%rbx, %r13
-	sbbq	-24(%rsp), %r13         ## 8-byte Folded Reload
-	sbbq	$0, %rdi
-	andl	$1, %edi
-	cmovneq	%rbx, %r13
-	testb	%dil, %dil
-	cmovneq	%rax, %r8
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	%r8, (%rax)
-	cmovneq	%rcx, %r10
-	movq	%r10, 8(%rax)
-	cmovneq	%r9, %r11
-	movq	%r11, 16(%rax)
-	cmovneq	%rbp, %r14
-	movq	%r14, 24(%rax)
-	cmovneq	%rsi, %r15
-	movq	%r15, 32(%rax)
-	cmovneq	%rdx, %r12
-	movq	%r12, 40(%rax)
-	movq	%r13, 48(%rax)
-	addq	$56, %rsp
+	adcq	%rbx, %r9
+	adcq	%r11, %r8
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-64(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%r10, %rdx
+	movq	-72(%rsp), %rcx                 ## 8-byte Reload
+	mulxq	%rcx, %rdi, %rax
+	mulxq	%r12, %r13, %r14
+	addq	%rax, %r13
+	mulxq	-80(%rsp), %rbx, %r15           ## 8-byte Folded Reload
+	adcq	%r14, %rbx
+	movq	-24(%rsp), %r11                 ## 8-byte Reload
+	mulxq	%r11, %r14, %r12
+	adcq	%r15, %r14
+	adcq	$0, %r12
+	addq	%r10, %rdi
+	adcq	%rsi, %r13
+	adcq	%rbp, %rbx
+	adcq	%r9, %r14
+	movzbl	-88(%rsp), %esi                 ## 1-byte Folded Reload
+	adcq	%r8, %r12
+	adcq	$0, %rsi
+	movq	%r13, %rdi
+	subq	%rcx, %rdi
+	movq	%rbx, %rcx
+	sbbq	-56(%rsp), %rcx                 ## 8-byte Folded Reload
+	movq	%r14, %rax
+	sbbq	-80(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	%r12, %rdx
+	sbbq	%r11, %rdx
+	sbbq	$0, %rsi
+	testb	$1, %sil
+	cmovneq	%r12, %rdx
+	movq	-8(%rsp), %rsi                  ## 8-byte Reload
+	movq	%rdx, 24(%rsi)
+	cmovneq	%r14, %rax
+	movq	%rax, 16(%rsi)
+	cmovneq	%rbx, %rcx
+	movq	%rcx, 8(%rsi)
+	cmovneq	%r13, %rdi
+	movq	%rdi, (%rsi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -7290,445 +1500,177 @@ _mcl_fp_mont7Lbmi2:                     ## @mcl_fp_mont7Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montNF7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montNF4Lbmi2            ## -- Begin function mcl_fp_montNF4Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_montNF7Lbmi2:                   ## @mcl_fp_montNF7Lbmi2
-## BB#0:
+_mcl_fp_montNF4Lbmi2:                   ## @mcl_fp_montNF4Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$40, %rsp
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rdi, 32(%rsp)          ## 8-byte Spill
-	movq	(%rsi), %rax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	8(%rsi), %rdi
-	movq	%rdi, -96(%rsp)         ## 8-byte Spill
-	movq	(%rdx), %rbp
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	(%rsi), %rdi
+	movq	%rdi, -56(%rsp)                 ## 8-byte Spill
+	movq	8(%rsi), %rbp
+	movq	%rbp, -64(%rsp)                 ## 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, %r15
+	movq	%rdx, -24(%rsp)                 ## 8-byte Spill
+	movq	%rbp, %rdx
+	mulxq	%rax, %rbp, %r9
 	movq	%rdi, %rdx
-	mulxq	%rbp, %rdi, %rbx
-	movq	%rax, %rdx
-	mulxq	%rbp, %r8, %r14
+	mulxq	%rax, %r12, %rbx
 	movq	16(%rsi), %rdx
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	addq	%rdi, %r14
-	mulxq	%rbp, %r15, %rax
-	adcq	%rbx, %r15
+	movq	%rdx, -40(%rsp)                 ## 8-byte Spill
+	addq	%rbp, %rbx
+	mulxq	%rax, %r14, %rbp
+	adcq	%r9, %r14
 	movq	24(%rsi), %rdx
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	mulxq	%rbp, %rbx, %rdi
-	adcq	%rax, %rbx
-	movq	32(%rsi), %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	mulxq	%rbp, %r11, %rax
-	adcq	%rdi, %r11
-	movq	40(%rsi), %rdx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	mulxq	%rbp, %r9, %rdi
-	adcq	%rax, %r9
-	movq	48(%rsi), %rdx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	mulxq	%rbp, %r10, %rbp
-	adcq	%rdi, %r10
-	adcq	$0, %rbp
-	movq	-8(%rcx), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%r8, %rdx
-	imulq	%rax, %rdx
+	movq	%rdx, -80(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r8, %rdi
+	adcq	%rbp, %r8
+	adcq	$0, %rdi
+	movq	-8(%rcx), %r13
 	movq	(%rcx), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rax, %rsi
-	movq	%rsi, -128(%rsp)        ## 8-byte Spill
-	addq	%r8, %rax
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	movq	%r13, %rdx
+	imulq	%r12, %rdx
+	mulxq	%rax, %rax, %r11
+	addq	%r12, %rax
 	movq	8(%rcx), %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r8, %rsi
-	movq	%rsi, -120(%rsp)        ## 8-byte Spill
-	adcq	%r14, %r8
+	movq	%rax, -16(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rbp, %r10
+	adcq	%rbx, %rbp
 	movq	16(%rcx), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	mulxq	%rax, %rsi, %r13
-	adcq	%r15, %rsi
+	movq	%rax, -32(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rsi, %rbx
+	adcq	%r14, %rsi
 	movq	24(%rcx), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	mulxq	%rax, %r12, %rax
-	adcq	%rbx, %r12
-	movq	32(%rcx), %rdi
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	mulxq	%rdi, %r15, %rbx
-	adcq	%r11, %r15
-	movq	40(%rcx), %rdi
-	movq	%rdi, -16(%rsp)         ## 8-byte Spill
-	mulxq	%rdi, %r14, %rdi
-	adcq	%r9, %r14
-	movq	48(%rcx), %rcx
-	movq	%rcx, -56(%rsp)         ## 8-byte Spill
-	mulxq	%rcx, %r11, %rcx
-	adcq	%r10, %r11
-	adcq	$0, %rbp
-	addq	-128(%rsp), %r8         ## 8-byte Folded Reload
-	movq	%r8, -128(%rsp)         ## 8-byte Spill
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	%rsi, -120(%rsp)        ## 8-byte Spill
-	adcq	%r13, %r12
-	adcq	%rax, %r15
-	adcq	%rbx, %r14
-	adcq	%rdi, %r11
-	adcq	%rcx, %rbp
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	movq	8(%rax), %rdx
-	mulxq	-96(%rsp), %rcx, %rsi   ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %r13, %rax  ## 8-byte Folded Reload
-	addq	%rcx, %rax
-	mulxq	-104(%rsp), %rcx, %rdi  ## 8-byte Folded Reload
-	adcq	%rsi, %rcx
-	mulxq	-24(%rsp), %rsi, %r8    ## 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-32(%rsp), %rdi, %r9    ## 8-byte Folded Reload
-	adcq	%r8, %rdi
-	mulxq	-40(%rsp), %r8, %rbx    ## 8-byte Folded Reload
-	adcq	%r9, %r8
-	mulxq	-48(%rsp), %r9, %r10    ## 8-byte Folded Reload
-	adcq	%rbx, %r9
-	adcq	$0, %r10
-	addq	-128(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	%r12, %rcx
-	adcq	%r15, %rsi
-	adcq	%r14, %rdi
-	adcq	%r11, %r8
-	adcq	%rbp, %r9
-	adcq	$0, %r10
-	movq	%r13, %rdx
-	imulq	-80(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rbp, %rbx   ## 8-byte Folded Reload
-	movq	%rbx, -128(%rsp)        ## 8-byte Spill
-	addq	%r13, %rbp
-	mulxq	-72(%rsp), %rbp, %r14   ## 8-byte Folded Reload
-	adcq	%rax, %rbp
-	mulxq	8(%rsp), %rax, %r11     ## 8-byte Folded Reload
-	adcq	%rcx, %rax
-	mulxq	(%rsp), %r12, %rcx      ## 8-byte Folded Reload
-	adcq	%rsi, %r12
-	mulxq	-8(%rsp), %r15, %rbx    ## 8-byte Folded Reload
-	adcq	%rdi, %r15
-	mulxq	-16(%rsp), %r13, %rdi   ## 8-byte Folded Reload
-	adcq	%r8, %r13
-	mulxq	-56(%rsp), %rsi, %rdx   ## 8-byte Folded Reload
-	adcq	%r9, %rsi
-	adcq	$0, %r10
-	addq	-128(%rsp), %rbp        ## 8-byte Folded Reload
-	movq	%rbp, -128(%rsp)        ## 8-byte Spill
-	adcq	%r14, %rax
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	adcq	%r11, %r12
-	adcq	%rcx, %r15
-	adcq	%rbx, %r13
-	adcq	%rdi, %rsi
-	adcq	%rdx, %r10
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rdx
-	mulxq	-96(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %r14, %rdi  ## 8-byte Folded Reload
-	addq	%rcx, %rdi
-	mulxq	-104(%rsp), %rbp, %rcx  ## 8-byte Folded Reload
-	adcq	%rax, %rbp
-	mulxq	-24(%rsp), %rbx, %r8    ## 8-byte Folded Reload
-	adcq	%rcx, %rbx
-	mulxq	-32(%rsp), %rax, %r9    ## 8-byte Folded Reload
-	adcq	%r8, %rax
-	mulxq	-40(%rsp), %r8, %rcx    ## 8-byte Folded Reload
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	%r9, %r8
-	mulxq	-48(%rsp), %r9, %r11    ## 8-byte Folded Reload
-	adcq	16(%rsp), %r9           ## 8-byte Folded Reload
-	adcq	$0, %r11
-	addq	-128(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	%r12, %rbp
-	adcq	%r15, %rbx
-	adcq	%r13, %rax
-	adcq	%rsi, %r8
-	adcq	%r10, %r9
-	adcq	$0, %r11
-	movq	%r14, %rdx
-	imulq	-80(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rsi, %rcx   ## 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	addq	%r14, %rsi
-	mulxq	-72(%rsp), %rsi, %r13   ## 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	8(%rsp), %rdi, %r15     ## 8-byte Folded Reload
-	adcq	%rbp, %rdi
-	mulxq	(%rsp), %rcx, %rbp      ## 8-byte Folded Reload
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rcx, %rdx
+	adcq	%r8, %rcx
+	adcq	$0, %rdi
+	addq	%r11, %rbp
+	adcq	%r10, %rsi
 	adcq	%rbx, %rcx
-	mulxq	-8(%rsp), %r14, %rbx    ## 8-byte Folded Reload
-	adcq	%rax, %r14
-	mulxq	-16(%rsp), %r12, %rax   ## 8-byte Folded Reload
-	adcq	%r8, %r12
-	mulxq	-56(%rsp), %r10, %rdx   ## 8-byte Folded Reload
-	adcq	%r9, %r10
-	adcq	$0, %r11
-	addq	-128(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	%rsi, -128(%rsp)        ## 8-byte Spill
-	adcq	%r13, %rdi
-	movq	%rdi, -120(%rsp)        ## 8-byte Spill
-	adcq	%r15, %rcx
-	adcq	%rbp, %r14
-	adcq	%rbx, %r12
-	adcq	%rax, %r10
-	adcq	%rdx, %r11
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rdx
-	mulxq	-96(%rsp), %rsi, %rax   ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %r15, %rbp  ## 8-byte Folded Reload
-	addq	%rsi, %rbp
-	mulxq	-104(%rsp), %rbx, %rdi  ## 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-24(%rsp), %rsi, %rax   ## 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-32(%rsp), %rdi, %r9    ## 8-byte Folded Reload
-	adcq	%rax, %rdi
-	mulxq	-40(%rsp), %r8, %rax    ## 8-byte Folded Reload
-	adcq	%r9, %r8
-	mulxq	-48(%rsp), %r9, %r13    ## 8-byte Folded Reload
-	adcq	%rax, %r9
-	adcq	$0, %r13
-	addq	-128(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	%rcx, %rbx
-	adcq	%r14, %rsi
-	adcq	%r12, %rdi
-	adcq	%r10, %r8
-	adcq	%r11, %r9
-	adcq	$0, %r13
-	movq	%r15, %rdx
-	imulq	-80(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	addq	%r15, %rcx
-	mulxq	-72(%rsp), %rcx, %r11   ## 8-byte Folded Reload
-	adcq	%rbp, %rcx
-	mulxq	8(%rsp), %rbp, %r10     ## 8-byte Folded Reload
-	adcq	%rbx, %rbp
-	mulxq	(%rsp), %rax, %rbx      ## 8-byte Folded Reload
+	adcq	%rdx, %rdi
+	movq	8(%r15), %rdx
+	movq	-64(%rsp), %r12                 ## 8-byte Reload
+	mulxq	%r12, %rbx, %r9
+	movq	-56(%rsp), %r15                 ## 8-byte Reload
+	mulxq	%r15, %r10, %r11
+	addq	%rbx, %r11
+	mulxq	-40(%rsp), %rax, %r8            ## 8-byte Folded Reload
+	adcq	%r9, %rax
+	mulxq	-80(%rsp), %r9, %rbx            ## 8-byte Folded Reload
+	adcq	%r8, %r9
+	adcq	$0, %rbx
+	addq	%rbp, %r10
+	adcq	%rsi, %r11
+	adcq	%rcx, %rax
+	adcq	%rdi, %r9
+	adcq	$0, %rbx
+	movq	%r13, %rdx
+	imulq	%r10, %rdx
+	movq	-48(%rsp), %r14                 ## 8-byte Reload
+	mulxq	%r14, %rcx, %r8
+	addq	%r10, %rcx
+	mulxq	-16(%rsp), %r10, %rdi           ## 8-byte Folded Reload
+	adcq	%r11, %r10
+	mulxq	-32(%rsp), %rcx, %rsi           ## 8-byte Folded Reload
+	adcq	%rax, %rcx
+	mulxq	-72(%rsp), %rax, %rdx           ## 8-byte Folded Reload
+	adcq	%r9, %rax
+	adcq	$0, %rbx
+	addq	%r8, %r10
+	adcq	%rdi, %rcx
 	adcq	%rsi, %rax
-	mulxq	-8(%rsp), %r14, %rsi    ## 8-byte Folded Reload
-	adcq	%rdi, %r14
-	mulxq	-16(%rsp), %r15, %rdi   ## 8-byte Folded Reload
-	adcq	%r8, %r15
-	mulxq	-56(%rsp), %r12, %rdx   ## 8-byte Folded Reload
-	adcq	%r9, %r12
-	adcq	$0, %r13
-	addq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	%r11, %rbp
-	movq	%rbp, -128(%rsp)        ## 8-byte Spill
-	adcq	%r10, %rax
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	adcq	%rbx, %r14
-	adcq	%rsi, %r15
-	adcq	%rdi, %r12
-	adcq	%rdx, %r13
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	movq	32(%rax), %rdx
-	mulxq	-96(%rsp), %rsi, %rdi   ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %r11, %r8   ## 8-byte Folded Reload
-	addq	%rsi, %r8
-	mulxq	-104(%rsp), %rbx, %rsi  ## 8-byte Folded Reload
-	adcq	%rdi, %rbx
-	mulxq	-24(%rsp), %rbp, %rdi   ## 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	mulxq	-32(%rsp), %rsi, %r9    ## 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-40(%rsp), %rdi, %rax   ## 8-byte Folded Reload
-	adcq	%r9, %rdi
-	mulxq	-48(%rsp), %r9, %r10    ## 8-byte Folded Reload
-	adcq	%rax, %r9
-	adcq	$0, %r10
-	addq	%rcx, %r11
-	adcq	-128(%rsp), %r8         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rbx        ## 8-byte Folded Reload
-	adcq	%r14, %rbp
-	adcq	%r15, %rsi
-	adcq	%r12, %rdi
-	adcq	%r13, %r9
-	adcq	$0, %r10
-	movq	%r11, %rdx
-	imulq	-80(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	addq	%r11, %rcx
-	mulxq	-72(%rsp), %rcx, %r13   ## 8-byte Folded Reload
+	adcq	%rdx, %rbx
+	movq	-24(%rsp), %rdx                 ## 8-byte Reload
+	movq	16(%rdx), %rdx
+	mulxq	%r12, %rsi, %r8
+	mulxq	%r15, %r11, %rbp
+	addq	%rsi, %rbp
+	movq	-40(%rsp), %r12                 ## 8-byte Reload
+	mulxq	%r12, %rdi, %r9
+	adcq	%r8, %rdi
+	mulxq	-80(%rsp), %r8, %rsi            ## 8-byte Folded Reload
+	adcq	%r9, %r8
+	adcq	$0, %rsi
+	addq	%r10, %r11
+	adcq	%rcx, %rbp
+	adcq	%rax, %rdi
+	adcq	%rbx, %r8
+	adcq	$0, %rsi
+	movq	%r13, %rdx
+	imulq	%r11, %rdx
+	mulxq	%r14, %rax, %r10
+	addq	%r11, %rax
+	movq	-16(%rsp), %r14                 ## 8-byte Reload
+	mulxq	%r14, %r9, %rbx
+	adcq	%rbp, %r9
+	movq	-32(%rsp), %r15                 ## 8-byte Reload
+	mulxq	%r15, %rax, %rbp
+	adcq	%rdi, %rax
+	mulxq	-72(%rsp), %rcx, %rdx           ## 8-byte Folded Reload
 	adcq	%r8, %rcx
-	mulxq	8(%rsp), %rax, %r8      ## 8-byte Folded Reload
+	adcq	$0, %rsi
+	addq	%r10, %r9
 	adcq	%rbx, %rax
-	mulxq	(%rsp), %rbx, %r11      ## 8-byte Folded Reload
-	adcq	%rbp, %rbx
-	mulxq	-8(%rsp), %r14, %rbp    ## 8-byte Folded Reload
-	adcq	%rsi, %r14
-	mulxq	-16(%rsp), %r15, %rsi   ## 8-byte Folded Reload
-	adcq	%rdi, %r15
-	mulxq	-56(%rsp), %r12, %rdx   ## 8-byte Folded Reload
-	adcq	%r9, %r12
+	adcq	%rbp, %rcx
+	adcq	%rdx, %rsi
+	movq	-24(%rsp), %rdx                 ## 8-byte Reload
+	movq	24(%rdx), %rdx
+	mulxq	-64(%rsp), %rbx, %r8            ## 8-byte Folded Reload
+	mulxq	-56(%rsp), %r11, %rbp           ## 8-byte Folded Reload
+	addq	%rbx, %rbp
+	mulxq	%r12, %rdi, %rbx
+	adcq	%r8, %rdi
+	mulxq	-80(%rsp), %r8, %r10            ## 8-byte Folded Reload
+	adcq	%rbx, %r8
 	adcq	$0, %r10
-	addq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	%r13, %rax
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	adcq	%r8, %rbx
-	movq	%rbx, -120(%rsp)        ## 8-byte Spill
-	adcq	%r11, %r14
-	adcq	%rbp, %r15
-	adcq	%rsi, %r12
-	adcq	%rdx, %r10
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	movq	40(%rax), %rdx
-	mulxq	-96(%rsp), %rsi, %rax   ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %r11, %rbp  ## 8-byte Folded Reload
-	addq	%rsi, %rbp
-	mulxq	-104(%rsp), %rbx, %rdi  ## 8-byte Folded Reload
-	adcq	%rax, %rbx
-	mulxq	-24(%rsp), %rsi, %rax   ## 8-byte Folded Reload
-	adcq	%rdi, %rsi
-	mulxq	-32(%rsp), %rdi, %r9    ## 8-byte Folded Reload
-	adcq	%rax, %rdi
-	mulxq	-40(%rsp), %r8, %rax    ## 8-byte Folded Reload
-	adcq	%r9, %r8
-	mulxq	-48(%rsp), %r9, %r13    ## 8-byte Folded Reload
-	adcq	%rax, %r9
-	adcq	$0, %r13
-	addq	%rcx, %r11
-	adcq	-128(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rbx        ## 8-byte Folded Reload
-	adcq	%r14, %rsi
-	adcq	%r15, %rdi
-	adcq	%r12, %r8
-	adcq	%r10, %r9
-	adcq	$0, %r13
-	movq	%r11, %rdx
-	imulq	-80(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-64(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
+	addq	%r9, %r11
+	adcq	%rax, %rbp
+	adcq	%rcx, %rdi
+	adcq	%rsi, %r8
+	adcq	$0, %r10
+	imulq	%r11, %r13
+	movq	%r13, %rdx
+	movq	-48(%rsp), %rbx                 ## 8-byte Reload
+	mulxq	%rbx, %rcx, %r9
 	addq	%r11, %rcx
-	mulxq	-72(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	adcq	%rbp, %rcx
-	mulxq	8(%rsp), %rax, %rbp     ## 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	adcq	%rbx, %rax
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	mulxq	(%rsp), %r14, %rbp      ## 8-byte Folded Reload
-	adcq	%rsi, %r14
-	mulxq	-8(%rsp), %r11, %r12    ## 8-byte Folded Reload
-	adcq	%rdi, %r11
-	mulxq	-16(%rsp), %r10, %rbx   ## 8-byte Folded Reload
-	adcq	%r8, %r10
-	mulxq	-56(%rsp), %rdi, %rax   ## 8-byte Folded Reload
-	adcq	%r9, %rdi
-	adcq	$0, %r13
-	addq	-120(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	movq	-128(%rsp), %rcx        ## 8-byte Reload
-	adcq	16(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	adcq	24(%rsp), %r14          ## 8-byte Folded Reload
+	mulxq	%r14, %r11, %r12
 	adcq	%rbp, %r11
-	adcq	%r12, %r10
-	adcq	%rbx, %rdi
-	adcq	%rax, %r13
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	movq	48(%rax), %rdx
-	mulxq	-96(%rsp), %rbp, %r9    ## 8-byte Folded Reload
-	mulxq	-112(%rsp), %r8, %rax   ## 8-byte Folded Reload
-	addq	%rbp, %rax
-	mulxq	-104(%rsp), %rbx, %rcx  ## 8-byte Folded Reload
-	adcq	%r9, %rbx
-	mulxq	-24(%rsp), %rbp, %r9    ## 8-byte Folded Reload
+	mulxq	%r15, %rax, %rcx
+	adcq	%rdi, %rax
+	movq	-72(%rsp), %rsi                 ## 8-byte Reload
+	mulxq	%rsi, %rbp, %rdx
+	adcq	%r8, %rbp
+	adcq	$0, %r10
+	addq	%r9, %r11
+	adcq	%r12, %rax
 	adcq	%rcx, %rbp
-	mulxq	-32(%rsp), %rcx, %r12   ## 8-byte Folded Reload
-	adcq	%r9, %rcx
-	mulxq	-40(%rsp), %r15, %rsi   ## 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        ## 8-byte Spill
-	adcq	%r12, %r15
-	mulxq	-48(%rsp), %r12, %r9    ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	-120(%rsp), %r8         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	%r14, %rbx
-	adcq	%r11, %rbp
-	adcq	%r10, %rcx
-	adcq	%rdi, %r15
-	adcq	%r13, %r12
-	adcq	$0, %r9
-	movq	-80(%rsp), %rdx         ## 8-byte Reload
-	imulq	%r8, %rdx
-	mulxq	-64(%rsp), %rdi, %rsi   ## 8-byte Folded Reload
-	movq	%rsi, -80(%rsp)         ## 8-byte Spill
-	addq	%r8, %rdi
-	mulxq	-72(%rsp), %r8, %rsi    ## 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        ## 8-byte Spill
-	adcq	%rax, %r8
-	movq	8(%rsp), %r11           ## 8-byte Reload
-	mulxq	%r11, %rsi, %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	adcq	%rbx, %rsi
-	movq	(%rsp), %r14            ## 8-byte Reload
-	mulxq	%r14, %rdi, %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	adcq	%rbp, %rdi
-	movq	-8(%rsp), %rbp          ## 8-byte Reload
-	mulxq	%rbp, %rax, %rbx
-	movq	%rbx, -104(%rsp)        ## 8-byte Spill
-	adcq	%rcx, %rax
-	movq	-16(%rsp), %rbx         ## 8-byte Reload
-	mulxq	%rbx, %rcx, %r13
-	adcq	%r15, %rcx
-	mulxq	-56(%rsp), %rdx, %r15   ## 8-byte Folded Reload
-	adcq	%r12, %rdx
-	adcq	$0, %r9
-	addq	-80(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	%r13, %rdx
-	adcq	%r15, %r9
-	movq	%r8, %r13
-	subq	-64(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%rsi, %r12
-	sbbq	-72(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%rdi, %r10
-	sbbq	%r11, %r10
-	movq	%rax, %r11
-	sbbq	%r14, %r11
-	movq	%rcx, %r14
-	sbbq	%rbp, %r14
-	movq	%rdx, %r15
-	sbbq	%rbx, %r15
-	movq	%r9, %rbp
-	sbbq	-56(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, %rbx
-	sarq	$63, %rbx
-	cmovsq	%r8, %r13
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	movq	%r13, (%rbx)
-	cmovsq	%rsi, %r12
-	movq	%r12, 8(%rbx)
-	cmovsq	%rdi, %r10
-	movq	%r10, 16(%rbx)
-	cmovsq	%rax, %r11
-	movq	%r11, 24(%rbx)
-	cmovsq	%rcx, %r14
-	movq	%r14, 32(%rbx)
-	cmovsq	%rdx, %r15
-	movq	%r15, 40(%rbx)
-	cmovsq	%r9, %rbp
-	movq	%rbp, 48(%rbx)
-	addq	$40, %rsp
+	adcq	%rdx, %r10
+	movq	%r11, %rcx
+	subq	%rbx, %rcx
+	movq	%rax, %rdx
+	sbbq	%r14, %rdx
+	movq	%rbp, %rdi
+	sbbq	%r15, %rdi
+	movq	%r10, %rbx
+	sbbq	%rsi, %rbx
+	cmovsq	%r10, %rbx
+	movq	-8(%rsp), %rsi                  ## 8-byte Reload
+	movq	%rbx, 24(%rsi)
+	cmovsq	%rbp, %rdi
+	movq	%rdi, 16(%rsi)
+	cmovsq	%rax, %rdx
+	movq	%rdx, 8(%rsi)
+	cmovsq	%r11, %rcx
+	movq	%rcx, (%rsi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -7736,342 +1678,125 @@ _mcl_fp_montNF7Lbmi2:                   ## @mcl_fp_montNF7Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montRed7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montRed4Lbmi2           ## -- Begin function mcl_fp_montRed4Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_montRed7Lbmi2:                  ## @mcl_fp_montRed7Lbmi2
-## BB#0:
+_mcl_fp_montRed4Lbmi2:                  ## @mcl_fp_montRed4Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$56, %rsp
 	movq	%rdx, %rcx
-	movq	%rdi, 48(%rsp)          ## 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %r13
-	movq	%r13, %rdx
-	imulq	%rax, %rdx
-	movq	48(%rcx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rdi, %rax
-	movq	%rdi, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	40(%rcx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r10, %rax
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %r14, %r8
-	movq	24(%rcx), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	mulxq	%rax, %r12, %r15
-	movq	16(%rcx), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	mulxq	%rax, %rbp, %rbx
-	movq	(%rcx), %rdi
-	movq	%rdi, -48(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	mulxq	%rax, %rax, %r11
-	mulxq	%rdi, %rdx, %r9
-	addq	%rax, %r9
-	adcq	%rbp, %r11
-	adcq	%r12, %rbx
-	adcq	%r14, %r15
-	adcq	%r10, %r8
-	movq	-128(%rsp), %rcx        ## 8-byte Reload
-	adcq	-64(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%r13, %rdx
-	adcq	8(%rsi), %r9
-	adcq	16(%rsi), %r11
-	adcq	24(%rsi), %rbx
-	adcq	32(%rsi), %r15
-	adcq	40(%rsi), %r8
-	movq	%r8, -112(%rsp)         ## 8-byte Spill
-	adcq	48(%rsi), %rcx
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	adcq	56(%rsi), %rax
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	104(%rsi), %r8
-	movq	96(%rsi), %rdx
-	movq	88(%rsi), %rdi
-	movq	80(%rsi), %rbp
-	movq	72(%rsi), %rax
-	movq	64(%rsi), %rcx
-	adcq	$0, %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	adcq	$0, %rbp
-	movq	%rbp, -56(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -80(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, -64(%rsp)          ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	%r9, %rdx
-	imulq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-16(%rsp), %r13         ## 8-byte Reload
-	mulxq	%r13, %rcx, %rax
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	mulxq	-24(%rsp), %rcx, %rax   ## 8-byte Folded Reload
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulxq	-32(%rsp), %r14, %r12   ## 8-byte Folded Reload
-	mulxq	16(%rsp), %r8, %rax     ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %rsi, %r10   ## 8-byte Folded Reload
-	mulxq	-8(%rsp), %rcx, %rdi    ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %rdx, %rbp   ## 8-byte Folded Reload
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	-8(%rdx), %r15
+	movq	(%rdx), %rdi
+	movq	%rdi, -64(%rsp)                 ## 8-byte Spill
+	movq	(%rsi), %rax
+	movq	%rax, %rdx
+	imulq	%r15, %rdx
+	movq	24(%rcx), %rbp
+	mulxq	%rbp, %r12, %r11
+	movq	%rbp, %r8
+	movq	%rbp, -40(%rsp)                 ## 8-byte Spill
+	movq	16(%rcx), %r9
+	mulxq	%r9, %r10, %r13
+	movq	8(%rcx), %rcx
+	movq	%rcx, -56(%rsp)                 ## 8-byte Spill
+	mulxq	%rcx, %rcx, %rbx
+	mulxq	%rdi, %rdx, %rbp
 	addq	%rcx, %rbp
-	adcq	%rsi, %rdi
-	adcq	%r8, %r10
-	adcq	%r14, %rax
-	movq	%rax, %rcx
-	adcq	40(%rsp), %r12          ## 8-byte Folded Reload
-	movq	-104(%rsp), %rsi        ## 8-byte Reload
-	adcq	32(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%r9, %rdx
-	adcq	%r11, %rbp
-	adcq	%rbx, %rdi
-	adcq	%r15, %r10
-	adcq	-112(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        ## 8-byte Spill
-	adcq	-128(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	%rsi, -104(%rsp)        ## 8-byte Spill
-	adcq	8(%rsp), %rax           ## 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	adcq	$0, -88(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -56(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -80(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, 24(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, (%rsp)              ## 8-byte Folded Spill
-	movq	%rbp, %rdx
-	imulq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	%r13, %rcx, %rax
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	-24(%rsp), %r15         ## 8-byte Reload
-	mulxq	%r15, %rcx, %rax
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	mulxq	-32(%rsp), %r11, %r13   ## 8-byte Folded Reload
-	mulxq	16(%rsp), %r9, %r14     ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %rsi, %r8    ## 8-byte Folded Reload
-	mulxq	-8(%rsp), %rax, %rbx    ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %rdx, %rcx   ## 8-byte Folded Reload
-	addq	%rax, %rcx
-	adcq	%rsi, %rbx
-	adcq	%r9, %r8
-	adcq	%r11, %r14
-	adcq	32(%rsp), %r13          ## 8-byte Folded Reload
-	movq	-128(%rsp), %rsi        ## 8-byte Reload
-	adcq	8(%rsp), %rsi           ## 8-byte Folded Reload
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rbp, %rdx
-	adcq	%rdi, %rcx
 	adcq	%r10, %rbx
-	adcq	-112(%rsp), %r8         ## 8-byte Folded Reload
-	adcq	%r12, %r14
-	adcq	-104(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, -128(%rsp)        ## 8-byte Spill
-	adcq	-88(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	adcq	$0, -56(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -80(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, 24(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, (%rsp)              ## 8-byte Folded Spill
-	movq	%rcx, %rdx
-	imulq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-16(%rsp), %rsi, %rax   ## 8-byte Folded Reload
-	movq	%rsi, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	mulxq	%r15, %rsi, %rax
-	movq	%rsi, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	-32(%rsp), %r15         ## 8-byte Reload
-	mulxq	%r15, %rax, %r12
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	mulxq	16(%rsp), %r9, %rbp     ## 8-byte Folded Reload
-	mulxq	-40(%rsp), %rdi, %r10   ## 8-byte Folded Reload
-	mulxq	-8(%rsp), %rsi, %r11    ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %rdx, %rax   ## 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%rdi, %r11
-	adcq	%r9, %r10
-	adcq	8(%rsp), %rbp           ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r12        ## 8-byte Folded Reload
-	movq	-104(%rsp), %rdi        ## 8-byte Reload
-	adcq	-88(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	-96(%rsp), %rsi         ## 8-byte Reload
-	adcq	$0, %rsi
-	addq	%rcx, %rdx
-	adcq	%rbx, %rax
-	adcq	%r8, %r11
-	adcq	%r14, %r10
-	adcq	%r13, %rbp
-	adcq	-128(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	%rdi, -104(%rsp)        ## 8-byte Spill
-	adcq	-56(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, -96(%rsp)         ## 8-byte Spill
-	adcq	$0, -80(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, 24(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, (%rsp)              ## 8-byte Folded Spill
-	movq	%rax, %rdx
-	imulq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-16(%rsp), %rsi, %rcx   ## 8-byte Folded Reload
-	movq	%rsi, -128(%rsp)        ## 8-byte Spill
-	movq	%rcx, -56(%rsp)         ## 8-byte Spill
-	mulxq	-24(%rsp), %rsi, %rcx   ## 8-byte Folded Reload
-	movq	%rsi, -88(%rsp)         ## 8-byte Spill
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	mulxq	%r15, %rcx, %r13
-	movq	%rcx, -112(%rsp)        ## 8-byte Spill
-	movq	16(%rsp), %r15          ## 8-byte Reload
-	mulxq	%r15, %r9, %r14
-	mulxq	-40(%rsp), %rdi, %rbx   ## 8-byte Folded Reload
-	mulxq	-8(%rsp), %rsi, %r8     ## 8-byte Folded Reload
-	mulxq	-48(%rsp), %rdx, %rcx   ## 8-byte Folded Reload
-	addq	%rsi, %rcx
-	adcq	%rdi, %r8
-	adcq	%r9, %rbx
-	adcq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r13         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdi        ## 8-byte Reload
-	adcq	-128(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	-56(%rsp), %rsi         ## 8-byte Reload
-	adcq	$0, %rsi
+	adcq	%r12, %r13
+	adcq	$0, %r11
 	addq	%rax, %rdx
-	adcq	%r11, %rcx
-	adcq	%r10, %r8
-	adcq	%rbp, %rbx
-	adcq	%r12, %r14
-	adcq	-104(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, -120(%rsp)        ## 8-byte Spill
-	adcq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, -56(%rsp)         ## 8-byte Spill
-	adcq	$0, 24(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, (%rsp)              ## 8-byte Folded Spill
-	movq	%rcx, %rdx
-	imulq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	mulxq	-16(%rsp), %rsi, %rax   ## 8-byte Folded Reload
-	movq	%rsi, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	mulxq	-24(%rsp), %rsi, %rax   ## 8-byte Folded Reload
-	movq	%rsi, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	mulxq	-32(%rsp), %rax, %r12   ## 8-byte Folded Reload
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%r15, %r11
-	mulxq	%r11, %rax, %r15
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	mulxq	-40(%rsp), %rdi, %rbp   ## 8-byte Folded Reload
-	movq	-8(%rsp), %r9           ## 8-byte Reload
-	mulxq	%r9, %rax, %r10
-	mulxq	-48(%rsp), %rdx, %rsi   ## 8-byte Folded Reload
-	addq	%rax, %rsi
-	adcq	%rdi, %r10
-	adcq	-112(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r12        ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdi        ## 8-byte Reload
-	adcq	-96(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	-80(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rcx, %rdx
-	adcq	%r8, %rsi
-	adcq	%rbx, %r10
-	adcq	%r14, %rbp
-	adcq	%r13, %r15
-	adcq	-120(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	-56(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, -128(%rsp)        ## 8-byte Spill
-	adcq	24(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, (%rsp)              ## 8-byte Folded Spill
-	movq	-72(%rsp), %rdx         ## 8-byte Reload
-	imulq	%rsi, %rdx
-	mulxq	%r11, %rcx, %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulxq	%r9, %rbx, %rdi
-	mulxq	-48(%rsp), %r11, %r14   ## 8-byte Folded Reload
-	addq	%rbx, %r14
-	mulxq	-40(%rsp), %rbx, %r13   ## 8-byte Folded Reload
-	adcq	%rdi, %rbx
-	adcq	%rcx, %r13
-	mulxq	-32(%rsp), %r8, %rdi    ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r8          ## 8-byte Folded Reload
-	mulxq	-24(%rsp), %rcx, %r9    ## 8-byte Folded Reload
-	adcq	%rdi, %rcx
-	mulxq	-16(%rsp), %rdx, %rdi   ## 8-byte Folded Reload
-	adcq	%r9, %rdx
-	adcq	$0, %rdi
-	addq	%rsi, %r11
-	adcq	%r10, %r14
-	adcq	%rbp, %rbx
-	adcq	%r15, %r13
-	adcq	%r12, %r8
-	adcq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rdx         ## 8-byte Folded Reload
-	adcq	-64(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	$0, %rax
-	movq	%r14, %rsi
-	subq	-48(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rbx, %rbp
-	sbbq	-8(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%r13, %r9
-	sbbq	-40(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%r8, %r10
-	sbbq	16(%rsp), %r10          ## 8-byte Folded Reload
-	movq	%rcx, %r11
-	sbbq	-32(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	sbbq	-24(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%rdi, %r12
-	sbbq	-16(%rsp), %r12         ## 8-byte Folded Reload
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rdi, %r12
-	testb	%al, %al
-	cmovneq	%r14, %rsi
-	movq	48(%rsp), %rdi          ## 8-byte Reload
-	movq	%rsi, (%rdi)
-	cmovneq	%rbx, %rbp
-	movq	%rbp, 8(%rdi)
-	cmovneq	%r13, %r9
-	movq	%r9, 16(%rdi)
-	cmovneq	%r8, %r10
-	movq	%r10, 24(%rdi)
-	cmovneq	%rcx, %r11
-	movq	%r11, 32(%rdi)
-	cmovneq	%rdx, %r15
-	movq	%r15, 40(%rdi)
-	movq	%r12, 48(%rdi)
-	addq	$56, %rsp
+	movq	%rsi, -48(%rsp)                 ## 8-byte Spill
+	adcq	8(%rsi), %rbp
+	adcq	16(%rsi), %rbx
+	adcq	24(%rsi), %r13
+	adcq	32(%rsi), %r11
+	setb	-65(%rsp)                       ## 1-byte Folded Spill
+	movq	%r15, %rdx
+	imulq	%rbp, %rdx
+	mulxq	%r8, %r14, %r12
+	movq	%r9, -16(%rsp)                  ## 8-byte Spill
+	mulxq	%r9, %r10, %rsi
+	mulxq	-64(%rsp), %rdi, %r8            ## 8-byte Folded Reload
+	mulxq	-56(%rsp), %rax, %rcx           ## 8-byte Folded Reload
+	addq	%r8, %rax
+	adcq	%r10, %rcx
+	adcq	%r14, %rsi
+	movzbl	-65(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %r12
+	addq	%rbp, %rdi
+	adcq	%rbx, %rax
+	adcq	%r13, %rcx
+	adcq	%r11, %rsi
+	movq	-48(%rsp), %r10                 ## 8-byte Reload
+	adcq	40(%r10), %r12
+	setb	-65(%rsp)                       ## 1-byte Folded Spill
+	movq	%r15, %rdx
+	imulq	%rax, %rdx
+	mulxq	-40(%rsp), %rdi, %r11           ## 8-byte Folded Reload
+	movq	%rdi, -24(%rsp)                 ## 8-byte Spill
+	mulxq	%r9, %rdi, %r13
+	movq	%rdi, -32(%rsp)                 ## 8-byte Spill
+	movq	-64(%rsp), %r8                  ## 8-byte Reload
+	mulxq	%r8, %rdi, %r14
+	movq	-56(%rsp), %r9                  ## 8-byte Reload
+	mulxq	%r9, %rbp, %rbx
+	addq	%r14, %rbp
+	adcq	-32(%rsp), %rbx                 ## 8-byte Folded Reload
+	adcq	-24(%rsp), %r13                 ## 8-byte Folded Reload
+	movzbl	-65(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %r11
+	addq	%rax, %rdi
+	adcq	%rcx, %rbp
+	adcq	%rsi, %rbx
+	adcq	%r12, %r13
+	adcq	48(%r10), %r11
+	setb	%dil
+	imulq	%rbp, %r15
+	movq	%r15, %rdx
+	mulxq	%r8, %rcx, %rax
+	mulxq	%r9, %r12, %rsi
+	addq	%rax, %r12
+	movq	-16(%rsp), %r8                  ## 8-byte Reload
+	mulxq	%r8, %rax, %r9
+	adcq	%rsi, %rax
+	movq	-40(%rsp), %r10                 ## 8-byte Reload
+	mulxq	%r10, %r15, %r14
+	adcq	%r9, %r15
+	movzbl	%dil, %edi
+	adcq	%r14, %rdi
+	addq	%rbp, %rcx
+	adcq	%rbx, %r12
+	adcq	%r13, %rax
+	adcq	%r11, %r15
+	movq	-48(%rsp), %rcx                 ## 8-byte Reload
+	adcq	56(%rcx), %rdi
+	xorl	%ebx, %ebx
+	movq	%r12, %rcx
+	subq	-64(%rsp), %rcx                 ## 8-byte Folded Reload
+	movq	%rax, %rbp
+	sbbq	-56(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%r15, %rdx
+	sbbq	%r8, %rdx
+	movq	%rdi, %rsi
+	sbbq	%r10, %rsi
+	sbbq	%rbx, %rbx
+	testb	$1, %bl
+	cmovneq	%rdi, %rsi
+	movq	-8(%rsp), %rdi                  ## 8-byte Reload
+	movq	%rsi, 24(%rdi)
+	cmovneq	%r15, %rdx
+	movq	%rdx, 16(%rdi)
+	cmovneq	%rax, %rbp
+	movq	%rbp, 8(%rdi)
+	cmovneq	%r12, %rcx
+	movq	%rcx, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -8079,966 +1804,674 @@ _mcl_fp_montRed7Lbmi2:                  ## @mcl_fp_montRed7Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_addPre7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montRedNF4Lbmi2         ## -- Begin function mcl_fp_montRedNF4Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_addPre7Lbmi2:                   ## @mcl_fp_addPre7Lbmi2
-## BB#0:
+_mcl_fp_montRedNF4Lbmi2:                ## @mcl_fp_montRedNF4Lbmi2
+## %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
+	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	48(%rdx), %r8
-	movq	48(%rsi), %r14
-	movq	40(%rdx), %r9
-	movq	40(%rsi), %r15
-	movq	32(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rbx
-	adcq	16(%rsi), %r12
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r12, 16(%rdi)
-	adcq	%r11, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 32(%rdi)
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	-8(%rdx), %r15
+	movq	(%rdx), %rdi
+	movq	(%rsi), %rax
+	movq	%rax, %rdx
+	imulq	%r15, %rdx
+	movq	24(%rcx), %rbp
+	mulxq	%rbp, %r12, %r11
+	movq	%rbp, %r14
+	movq	%rbp, -32(%rsp)                 ## 8-byte Spill
+	movq	16(%rcx), %r8
+	mulxq	%r8, %r9, %r13
+	movq	%r8, -40(%rsp)                  ## 8-byte Spill
+	movq	8(%rcx), %rcx
+	movq	%rcx, -64(%rsp)                 ## 8-byte Spill
+	mulxq	%rcx, %rbp, %rbx
+	mulxq	%rdi, %rdx, %rcx
+	movq	%rdi, -56(%rsp)                 ## 8-byte Spill
+	addq	%rbp, %rcx
+	adcq	%r9, %rbx
+	adcq	%r12, %r13
+	adcq	$0, %r11
+	addq	%rax, %rdx
+	movq	%rsi, -48(%rsp)                 ## 8-byte Spill
+	adcq	8(%rsi), %rcx
+	adcq	16(%rsi), %rbx
+	adcq	24(%rsi), %r13
+	adcq	32(%rsi), %r11
+	setb	%r10b
+	movq	%r15, %rdx
+	imulq	%rcx, %rdx
+	mulxq	%r14, %r14, %r12
+	mulxq	%r8, %r9, %rbp
+	mulxq	%rdi, %rdi, %r8
+	mulxq	-64(%rsp), %rax, %rsi           ## 8-byte Folded Reload
+	addq	%r8, %rax
+	adcq	%r9, %rsi
+	adcq	%r14, %rbp
+	movzbl	%r10b, %edx
+	adcq	%rdx, %r12
+	addq	%rcx, %rdi
+	adcq	%rbx, %rax
+	adcq	%r13, %rsi
+	adcq	%r11, %rbp
+	movq	-48(%rsp), %r10                 ## 8-byte Reload
+	adcq	40(%r10), %r12
+	setb	-65(%rsp)                       ## 1-byte Folded Spill
+	movq	%r15, %rdx
+	imulq	%rax, %rdx
+	mulxq	-32(%rsp), %rcx, %r11           ## 8-byte Folded Reload
+	movq	%rcx, -16(%rsp)                 ## 8-byte Spill
+	mulxq	-40(%rsp), %rcx, %r13           ## 8-byte Folded Reload
+	movq	%rcx, -24(%rsp)                 ## 8-byte Spill
+	movq	-56(%rsp), %r9                  ## 8-byte Reload
+	mulxq	%r9, %rdi, %r14
+	movq	-64(%rsp), %r8                  ## 8-byte Reload
+	mulxq	%r8, %rbx, %rcx
+	addq	%r14, %rbx
+	adcq	-24(%rsp), %rcx                 ## 8-byte Folded Reload
+	adcq	-16(%rsp), %r13                 ## 8-byte Folded Reload
+	movzbl	-65(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %r11
+	addq	%rax, %rdi
+	adcq	%rsi, %rbx
+	adcq	%rbp, %rcx
+	adcq	%r12, %r13
+	adcq	48(%r10), %r11
+	setb	%al
+	imulq	%rbx, %r15
+	movq	%r15, %rdx
+	mulxq	%r9, %rsi, %rbp
+	mulxq	%r8, %r12, %rdi
+	addq	%rbp, %r12
+	movq	-40(%rsp), %r8                  ## 8-byte Reload
+	mulxq	%r8, %rbp, %r9
+	adcq	%rdi, %rbp
+	movq	-32(%rsp), %r10                 ## 8-byte Reload
+	mulxq	%r10, %r15, %r14
 	adcq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 48(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+	movzbl	%al, %eax
+	adcq	%r14, %rax
+	addq	%rbx, %rsi
+	adcq	%rcx, %r12
+	adcq	%r13, %rbp
+	adcq	%r11, %r15
+	movq	-48(%rsp), %rcx                 ## 8-byte Reload
+	adcq	56(%rcx), %rax
+	movq	%r12, %rcx
+	subq	-56(%rsp), %rcx                 ## 8-byte Folded Reload
+	movq	%rbp, %rsi
+	sbbq	-64(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	%r15, %rdi
+	sbbq	%r8, %rdi
+	movq	%rax, %rdx
+	sbbq	%r10, %rdx
+	cmovsq	%rax, %rdx
+	movq	-8(%rsp), %rax                  ## 8-byte Reload
+	movq	%rdx, 24(%rax)
+	cmovsq	%r15, %rdi
+	movq	%rdi, 16(%rax)
+	cmovsq	%rbp, %rsi
+	movq	%rsi, 8(%rax)
+	cmovsq	%r12, %rcx
+	movq	%rcx, (%rax)
 	popq	%rbx
 	popq	%r12
+	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_subPre7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_addPre4Lbmi2            ## -- Begin function mcl_fp_addPre4Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_subPre7Lbmi2:                   ## @mcl_fp_subPre7Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r8
-	movq	48(%rsi), %r10
-	movq	40(%rdx), %r9
-	movq	40(%rsi), %r15
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %r12
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %r12
+_mcl_fp_addPre4Lbmi2:                   ## @mcl_fp_addPre4Lbmi2
+## %bb.0:
+	movq	24(%rsi), %rax
 	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	32(%rsi), %rdx
-	movq	24(%rsi), %rsi
-	movq	%rbx, (%rdi)
-	movq	%r12, 8(%rdi)
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r8
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rcx
+	adcq	24(%rdx), %rax
+	movq	%rax, 24(%rdi)
 	movq	%rcx, 16(%rdi)
-	sbbq	%r11, %rsi
-	movq	%rsi, 24(%rdi)
-	sbbq	%r14, %rdx
-	movq	%rdx, 32(%rdi)
-	sbbq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	sbbq	%r8, %r10
-	movq	%r10, 48(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
 	retq
-
-	.globl	_mcl_fp_shr1_7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_subPre4Lbmi2            ## -- Begin function mcl_fp_subPre4Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_shr1_7Lbmi2:                    ## @mcl_fp_shr1_7Lbmi2
-## BB#0:
-	movq	48(%rsi), %r8
-	movq	40(%rsi), %r9
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %rax
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rdx
+_mcl_fp_subPre4Lbmi2:                   ## @mcl_fp_subPre4Lbmi2
+## %bb.0:
+	movq	24(%rsi), %rcx
+	movq	16(%rsi), %r8
+	movq	(%rsi), %r9
 	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rdx
-	movq	%rdx, (%rdi)
-	shrdq	$1, %rcx, %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r9
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %r8
+	sbbq	24(%rdx), %rcx
+	movq	%rcx, 24(%rdi)
+	movq	%r8, 16(%rdi)
 	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rax, %rcx
-	movq	%rcx, 16(%rdi)
-	shrdq	$1, %r10, %rax
-	movq	%rax, 24(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 32(%rdi)
-	shrdq	$1, %r8, %r9
-	movq	%r9, 40(%rdi)
-	shrq	%r8
-	movq	%r8, 48(%rdi)
+	movq	%r9, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
 	retq
-
-	.globl	_mcl_fp_add7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_shr1_4Lbmi2             ## -- Begin function mcl_fp_shr1_4Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_add7Lbmi2:                      ## @mcl_fp_add7Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r14
-	movq	48(%rsi), %r8
-	movq	40(%rdx), %r15
-	movq	40(%rsi), %r9
-	movq	32(%rdx), %r12
-	movq	24(%rdx), %r13
-	movq	16(%rdx), %r10
-	movq	(%rdx), %r11
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %r11
-	adcq	8(%rsi), %rdx
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rbx
-	adcq	16(%rsi), %r10
-	movq	%r11, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	adcq	%r13, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r12, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r15, %r9
-	movq	%r9, 40(%rdi)
-	adcq	%r14, %r8
-	movq	%r8, 48(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %r11
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r10
-	sbbq	24(%rcx), %rax
-	sbbq	32(%rcx), %rbx
-	sbbq	40(%rcx), %r9
-	sbbq	48(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB104_2
-## BB#1:                                ## %nocarry
-	movq	%r11, (%rdi)
+_mcl_fp_shr1_4Lbmi2:                    ## @mcl_fp_shr1_4Lbmi2
+## %bb.0:
+	movq	(%rsi), %rax
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %rdx
+	movq	24(%rsi), %rcx
+	movq	%rcx, %rsi
+	shrq	%rsi
+	movq	%rsi, 24(%rdi)
+	shldq	$63, %rdx, %rcx
+	movq	%rcx, 16(%rdi)
+	shldq	$63, %r8, %rdx
 	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%rax, 24(%rdi)
-	movq	%rbx, 32(%rdi)
-	movq	%r9, 40(%rdi)
-	movq	%r8, 48(%rdi)
-LBB104_2:                               ## %carry
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
+	shrdq	$1, %r8, %rax
+	movq	%rax, (%rdi)
 	retq
-
-	.globl	_mcl_fp_addNF7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_add4Lbmi2               ## -- Begin function mcl_fp_add4Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_addNF7Lbmi2:                    ## @mcl_fp_addNF7Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
+_mcl_fp_add4Lbmi2:                      ## @mcl_fp_add4Lbmi2
+## %bb.0:
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %r9
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r9
+	adcq	24(%rdx), %r8
+	movq	%r8, 24(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rax, (%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r9
+	sbbq	24(%rcx), %r8
+	sbbq	$0, %rdx
+	testb	$1, %dl
+	jne	LBB33_2
+## %bb.1:                               ## %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%r8, 24(%rdi)
+LBB33_2:                                ## %carry
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_addNF4Lbmi2             ## -- Begin function mcl_fp_addNF4Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_addNF4Lbmi2:                    ## @mcl_fp_addNF4Lbmi2
+## %bb.0:
 	pushq	%rbx
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %rbp
-	movq	32(%rdx), %r10
 	movq	24(%rdx), %r11
-	movq	16(%rdx), %r14
-	movq	(%rdx), %r12
-	movq	8(%rdx), %r15
-	addq	(%rsi), %r12
-	adcq	8(%rsi), %r15
-	adcq	16(%rsi), %r14
+	movq	16(%rdx), %r8
+	movq	(%rdx), %r9
+	movq	8(%rdx), %r10
+	addq	(%rsi), %r9
+	adcq	8(%rsi), %r10
+	adcq	16(%rsi), %r8
 	adcq	24(%rsi), %r11
-	adcq	32(%rsi), %r10
-	adcq	40(%rsi), %rbp
-	movq	%rbp, -8(%rsp)          ## 8-byte Spill
-	adcq	48(%rsi), %r9
-	movq	%r12, %rsi
+	movq	%r9, %rsi
 	subq	(%rcx), %rsi
-	movq	%r15, %rdx
+	movq	%r10, %rdx
 	sbbq	8(%rcx), %rdx
-	movq	%r14, %rax
+	movq	%r8, %rax
 	sbbq	16(%rcx), %rax
 	movq	%r11, %rbx
 	sbbq	24(%rcx), %rbx
-	movq	%r10, %r13
-	sbbq	32(%rcx), %r13
-	sbbq	40(%rcx), %rbp
-	movq	%r9, %r8
-	sbbq	48(%rcx), %r8
-	movq	%r8, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r12, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r15, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r14, %rax
-	movq	%rax, 16(%rdi)
 	cmovsq	%r11, %rbx
 	movq	%rbx, 24(%rdi)
-	cmovsq	%r10, %r13
-	movq	%r13, 32(%rdi)
-	cmovsq	-8(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 40(%rdi)
-	cmovsq	%r9, %r8
-	movq	%r8, 48(%rdi)
+	cmovsq	%r8, %rax
+	movq	%rax, 16(%rdi)
+	cmovsq	%r10, %rdx
+	movq	%rdx, 8(%rdi)
+	cmovsq	%r9, %rsi
+	movq	%rsi, (%rdi)
 	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_sub7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_sub4Lbmi2               ## -- Begin function mcl_fp_sub4Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_sub4Lbmi2:                      ## @mcl_fp_sub4Lbmi2
+## %bb.0:
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %r10
+	sbbq	24(%rdx), %r9
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
+	testb	$1, %al
+	jne	LBB35_2
+## %bb.1:                               ## %nocarry
+	retq
+LBB35_2:                                ## %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %r10
+	adcq	24(%rcx), %r9
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subNF4Lbmi2             ## -- Begin function mcl_fp_subNF4Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_sub7Lbmi2:                      ## @mcl_fp_sub7Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
+_mcl_fp_subNF4Lbmi2:                    ## @mcl_fp_subNF4Lbmi2
+## %bb.0:
 	pushq	%rbx
-	movq	48(%rdx), %r14
-	movq	48(%rsi), %r8
-	movq	40(%rdx), %r15
-	movq	40(%rsi), %r9
-	movq	32(%rdx), %r12
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r11
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r11
-	movq	16(%rsi), %r13
-	sbbq	16(%rdx), %r13
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %rsi
-	sbbq	24(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r11, 8(%rdi)
-	movq	%r13, 16(%rdi)
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %r8
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r10
+	subq	(%rdx), %r9
+	sbbq	8(%rdx), %r10
+	sbbq	16(%rdx), %r8
+	sbbq	24(%rdx), %r11
+	movq	%r11, %rdx
+	sarq	$63, %rdx
+	movq	24(%rcx), %rsi
+	andq	%rdx, %rsi
+	movq	16(%rcx), %rax
+	andq	%rdx, %rax
+	movq	8(%rcx), %rbx
+	andq	%rdx, %rbx
+	andq	(%rcx), %rdx
+	addq	%r9, %rdx
+	movq	%rdx, (%rdi)
+	adcq	%r10, %rbx
+	movq	%rbx, 8(%rdi)
+	adcq	%r8, %rax
+	movq	%rax, 16(%rdi)
+	adcq	%r11, %rsi
 	movq	%rsi, 24(%rdi)
-	sbbq	%r12, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%r15, %r9
-	movq	%r9, 40(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 48(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	LBB106_2
-## BB#1:                                ## %carry
-	movq	48(%rcx), %r14
-	movq	40(%rcx), %r15
-	movq	32(%rcx), %r12
-	movq	24(%rcx), %rbx
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rbp
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r11, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r13, %rbp
-	movq	%rbp, 16(%rdi)
-	adcq	%rsi, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%r10, %r12
-	movq	%r12, 32(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 48(%rdi)
-LBB106_2:                               ## %nocarry
 	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_subNF7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_add4Lbmi2            ## -- Begin function mcl_fpDbl_add4Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_subNF7Lbmi2:                    ## @mcl_fp_subNF7Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
+_mcl_fpDbl_add4Lbmi2:                   ## @mcl_fpDbl_add4Lbmi2
+## %bb.0:
 	pushq	%r14
-	pushq	%r13
-	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	48(%rsi), %r11
-	movdqu	(%rdx), %xmm0
-	movdqu	16(%rdx), %xmm1
-	movdqu	32(%rdx), %xmm2
-	pshufd	$78, %xmm2, %xmm3       ## xmm3 = xmm2[2,3,0,1]
-	movd	%xmm3, %r14
-	movdqu	(%rsi), %xmm3
-	movdqu	16(%rsi), %xmm4
-	movdqu	32(%rsi), %xmm5
-	pshufd	$78, %xmm5, %xmm6       ## xmm6 = xmm5[2,3,0,1]
-	movd	%xmm6, %rcx
-	movd	%xmm2, %r15
-	movd	%xmm5, %r9
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r12
-	pshufd	$78, %xmm4, %xmm2       ## xmm2 = xmm4[2,3,0,1]
-	movd	%xmm2, %r10
-	movd	%xmm1, %r13
-	pshufd	$78, %xmm0, %xmm1       ## xmm1 = xmm0[2,3,0,1]
-	movd	%xmm1, %rax
-	pshufd	$78, %xmm3, %xmm1       ## xmm1 = xmm3[2,3,0,1]
-	movd	%xmm0, %rbx
-	movd	%xmm3, %rsi
-	subq	%rbx, %rsi
-	movd	%xmm1, %rbx
-	sbbq	%rax, %rbx
-	movd	%xmm4, %rbp
-	sbbq	%r13, %rbp
-	sbbq	%r12, %r10
-	sbbq	%r15, %r9
-	sbbq	%r14, %rcx
-	movq	%rcx, -8(%rsp)          ## 8-byte Spill
-	sbbq	48(%rdx), %r11
+	movq	56(%rsi), %r11
+	movq	48(%rsi), %r10
+	movq	40(%rsi), %r9
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %rax
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r14
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rbx
+	adcq	24(%rdx), %rax
+	adcq	32(%rdx), %r8
+	adcq	40(%rdx), %r9
+	adcq	48(%rdx), %r10
+	adcq	56(%rdx), %r11
+	movq	%rax, 24(%rdi)
+	movq	%rbx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r14, (%rdi)
+	setb	%al
+	movzbl	%al, %r14d
+	movq	%r8, %rdx
+	subq	(%rcx), %rdx
+	movq	%r9, %rsi
+	sbbq	8(%rcx), %rsi
+	movq	%r10, %rbx
+	sbbq	16(%rcx), %rbx
 	movq	%r11, %rax
-	sarq	$63, %rax
-	movq	%rax, %rdx
-	shldq	$1, %r11, %rdx
-	andq	(%r8), %rdx
-	movq	48(%r8), %r14
-	andq	%rax, %r14
-	movq	40(%r8), %r15
-	andq	%rax, %r15
-	movq	32(%r8), %r12
-	andq	%rax, %r12
-	movq	24(%r8), %r13
-	andq	%rax, %r13
-	movq	16(%r8), %rcx
-	andq	%rax, %rcx
-	andq	8(%r8), %rax
-	addq	%rsi, %rdx
-	adcq	%rbx, %rax
-	movq	%rdx, (%rdi)
-	movq	%rax, 8(%rdi)
-	adcq	%rbp, %rcx
-	movq	%rcx, 16(%rdi)
-	adcq	%r10, %r13
-	movq	%r13, 24(%rdi)
-	adcq	%r9, %r12
-	movq	%r12, 32(%rdi)
-	adcq	-8(%rsp), %r15          ## 8-byte Folded Reload
-	movq	%r15, 40(%rdi)
-	adcq	%r11, %r14
-	movq	%r14, 48(%rdi)
+	sbbq	24(%rcx), %rax
+	sbbq	$0, %r14
+	testb	$1, %r14b
+	cmovneq	%r11, %rax
+	movq	%rax, 56(%rdi)
+	cmovneq	%r10, %rbx
+	movq	%rbx, 48(%rdi)
+	cmovneq	%r9, %rsi
+	movq	%rsi, 40(%rdi)
+	cmovneq	%r8, %rdx
+	movq	%rdx, 32(%rdi)
 	popq	%rbx
-	popq	%r12
-	popq	%r13
 	popq	%r14
-	popq	%r15
-	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_add7Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sub4Lbmi2            ## -- Begin function mcl_fpDbl_sub4Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_add7Lbmi2:                   ## @mcl_fpDbl_add7Lbmi2
-## BB#0:
-	pushq	%rbp
+_mcl_fpDbl_sub4Lbmi2:                   ## @mcl_fpDbl_sub4Lbmi2
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
-	pushq	%r13
-	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	104(%rdx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	96(%rdx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	88(%rdx), %r11
-	movq	80(%rdx), %r14
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %r9
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r11
 	movq	24(%rsi), %r15
-	movq	32(%rsi), %r12
-	movq	16(%rdx), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %r9
-	adcq	24(%rdx), %r15
-	adcq	32(%rdx), %r12
-	movq	72(%rdx), %r13
-	movq	64(%rdx), %rbp
-	movq	%rax, (%rdi)
-	movq	56(%rdx), %r10
-	movq	%rbx, 8(%rdi)
-	movq	48(%rdx), %rcx
-	movq	40(%rdx), %rdx
-	movq	%r9, 16(%rdi)
-	movq	104(%rsi), %r9
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rax
+	xorl	%esi, %esi
+	subq	(%rdx), %r14
+	sbbq	8(%rdx), %rax
+	sbbq	16(%rdx), %rbx
+	sbbq	24(%rdx), %r15
+	sbbq	32(%rdx), %r11
+	sbbq	40(%rdx), %r10
+	sbbq	48(%rdx), %r9
+	sbbq	56(%rdx), %r8
 	movq	%r15, 24(%rdi)
-	movq	40(%rsi), %rbx
-	adcq	%rdx, %rbx
-	movq	96(%rsi), %r15
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %rdx
-	adcq	%rcx, %rdx
-	movq	88(%rsi), %rax
+	movq	%rbx, 16(%rdi)
+	movq	%rax, 8(%rdi)
+	movq	%r14, (%rdi)
+	sbbq	%rsi, %rsi
+	andl	$1, %esi
+	negq	%rsi
+	movq	24(%rcx), %rax
+	andq	%rsi, %rax
+	movq	16(%rcx), %rdx
+	andq	%rsi, %rdx
+	movq	8(%rcx), %rbx
+	andq	%rsi, %rbx
+	andq	(%rcx), %rsi
+	addq	%r11, %rsi
+	movq	%rsi, 32(%rdi)
+	adcq	%r10, %rbx
 	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rcx
-	adcq	%r10, %rcx
-	movq	80(%rsi), %r12
+	adcq	%r9, %rdx
 	movq	%rdx, 48(%rdi)
-	movq	72(%rsi), %rdx
-	movq	64(%rsi), %rsi
-	adcq	%rbp, %rsi
-	adcq	%r13, %rdx
-	adcq	%r14, %r12
-	adcq	%r11, %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	adcq	-24(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%r15, -24(%rsp)         ## 8-byte Spill
-	adcq	-8(%rsp), %r9           ## 8-byte Folded Reload
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	movq	%rcx, %rbx
-	subq	(%r8), %rbx
-	movq	%rsi, %r10
-	sbbq	8(%r8), %r10
-	movq	%rdx, %r11
-	sbbq	16(%r8), %r11
-	movq	%r12, %r14
-	sbbq	24(%r8), %r14
-	movq	-16(%rsp), %r13         ## 8-byte Reload
-	sbbq	32(%r8), %r13
-	sbbq	40(%r8), %r15
-	movq	%r9, %rax
-	sbbq	48(%r8), %rax
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	cmovneq	%rcx, %rbx
-	movq	%rbx, 56(%rdi)
-	testb	%bpl, %bpl
-	cmovneq	%rsi, %r10
-	movq	%r10, 64(%rdi)
-	cmovneq	%rdx, %r11
-	movq	%r11, 72(%rdi)
-	cmovneq	%r12, %r14
-	movq	%r14, 80(%rdi)
-	cmovneq	-16(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%r13, 88(%rdi)
-	cmovneq	-24(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%r15, 96(%rdi)
-	cmovneq	%r9, %rax
-	movq	%rax, 104(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sub7Lbmi2
-	.p2align	4, 0x90
-_mcl_fpDbl_sub7Lbmi2:                   ## @mcl_fpDbl_sub7Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	104(%rdx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	96(%rdx), %r10
-	movq	88(%rdx), %r14
-	movq	16(%rsi), %rax
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%ecx, %ecx
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %rax
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	80(%rdx), %r13
-	movq	72(%rdx), %rbp
-	movq	%r15, (%rdi)
-	movq	64(%rdx), %r9
-	movq	%r11, 8(%rdi)
-	movq	56(%rdx), %r15
-	movq	%rax, 16(%rdi)
-	movq	48(%rdx), %r11
-	movq	40(%rdx), %rdx
-	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%rdx, %rbx
-	movq	104(%rsi), %rax
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %r12
-	sbbq	%r11, %r12
-	movq	96(%rsi), %r11
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rdx
-	sbbq	%r15, %rdx
-	movq	88(%rsi), %r15
-	movq	%r12, 48(%rdi)
-	movq	64(%rsi), %rbx
-	sbbq	%r9, %rbx
-	movq	80(%rsi), %r12
-	movq	72(%rsi), %r9
-	sbbq	%rbp, %r9
-	sbbq	%r13, %r12
-	sbbq	%r14, %r15
-	sbbq	%r10, %r11
-	sbbq	-8(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%r8), %r10
-	cmoveq	%rcx, %r10
-	testb	%bpl, %bpl
-	movq	16(%r8), %rbp
-	cmoveq	%rcx, %rbp
-	movq	8(%r8), %rsi
-	cmoveq	%rcx, %rsi
-	movq	48(%r8), %r14
-	cmoveq	%rcx, %r14
-	movq	40(%r8), %r13
-	cmoveq	%rcx, %r13
-	movq	32(%r8), %rax
-	cmoveq	%rcx, %rax
-	cmovneq	24(%r8), %rcx
-	addq	%rdx, %r10
-	adcq	%rbx, %rsi
-	movq	%r10, 56(%rdi)
-	movq	%rsi, 64(%rdi)
-	adcq	%r9, %rbp
-	movq	%rbp, 72(%rdi)
-	adcq	%r12, %rcx
-	movq	%rcx, 80(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 88(%rdi)
-	adcq	%r11, %r13
-	movq	%r13, 96(%rdi)
-	adcq	-8(%rsp), %r14          ## 8-byte Folded Reload
-	movq	%r14, 104(%rdi)
+	adcq	%r8, %rax
+	movq	%rax, 56(%rdi)
 	popq	%rbx
-	popq	%r12
-	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-
+                                        ## -- End function
+	.globl	_mulPv384x64bmi2                ## -- Begin function mulPv384x64bmi2
 	.p2align	4, 0x90
-l_mulPv512x64:                          ## @mulPv512x64
-## BB#0:
-	mulxq	(%rsi), %rcx, %rax
-	movq	%rcx, (%rdi)
-	mulxq	8(%rsi), %rcx, %r8
-	addq	%rax, %rcx
-	movq	%rcx, 8(%rdi)
-	mulxq	16(%rsi), %rcx, %r9
-	adcq	%r8, %rcx
-	movq	%rcx, 16(%rdi)
-	mulxq	24(%rsi), %rax, %rcx
-	adcq	%r9, %rax
-	movq	%rax, 24(%rdi)
-	mulxq	32(%rsi), %rax, %r8
-	adcq	%rcx, %rax
-	movq	%rax, 32(%rdi)
-	mulxq	40(%rsi), %rcx, %r9
+_mulPv384x64bmi2:                       ## @mulPv384x64bmi2
+## %bb.0:
+	movq	%rdi, %rax
+	mulxq	(%rsi), %rdi, %rcx
+	movq	%rdi, (%rax)
+	mulxq	8(%rsi), %rdi, %r8
+	addq	%rcx, %rdi
+	movq	%rdi, 8(%rax)
+	mulxq	16(%rsi), %rdi, %r9
+	adcq	%r8, %rdi
+	movq	%rdi, 16(%rax)
+	mulxq	24(%rsi), %rcx, %rdi
+	adcq	%r9, %rcx
+	movq	%rcx, 24(%rax)
+	mulxq	32(%rsi), %rcx, %r8
+	adcq	%rdi, %rcx
+	movq	%rcx, 32(%rax)
+	mulxq	40(%rsi), %rcx, %rdx
 	adcq	%r8, %rcx
-	movq	%rcx, 40(%rdi)
-	mulxq	48(%rsi), %rax, %rcx
-	adcq	%r9, %rax
-	movq	%rax, 48(%rdi)
-	mulxq	56(%rsi), %rax, %rdx
-	adcq	%rcx, %rax
-	movq	%rax, 56(%rdi)
+	movq	%rcx, 40(%rax)
 	adcq	$0, %rdx
-	movq	%rdx, 64(%rdi)
-	movq	%rdi, %rax
-	retq
-
-	.globl	_mcl_fp_mulUnitPre8Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre8Lbmi2:               ## @mcl_fp_mulUnitPre8Lbmi2
-## BB#0:
-	pushq	%rbx
-	subq	$80, %rsp
-	movq	%rdi, %rbx
-	leaq	8(%rsp), %rdi
-	callq	l_mulPv512x64
-	movq	72(%rsp), %r8
-	movq	64(%rsp), %r9
-	movq	56(%rsp), %r10
-	movq	48(%rsp), %r11
-	movq	40(%rsp), %rdi
-	movq	32(%rsp), %rax
-	movq	24(%rsp), %rcx
-	movq	8(%rsp), %rdx
-	movq	16(%rsp), %rsi
-	movq	%rdx, (%rbx)
-	movq	%rsi, 8(%rbx)
-	movq	%rcx, 16(%rbx)
-	movq	%rax, 24(%rbx)
-	movq	%rdi, 32(%rbx)
-	movq	%r11, 40(%rbx)
-	movq	%r10, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	movq	%r8, 64(%rbx)
-	addq	$80, %rsp
-	popq	%rbx
+	movq	%rdx, 48(%rax)
 	retq
-
-	.globl	_mcl_fpDbl_mulPre8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_mulUnitPre6Lbmi2        ## -- Begin function mcl_fp_mulUnitPre6Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_mulPre8Lbmi2:                ## @mcl_fpDbl_mulPre8Lbmi2
-## BB#0:
-	pushq	%rbp
-	movq	%rsp, %rbp
+_mcl_fp_mulUnitPre6Lbmi2:               ## @mcl_fp_mulUnitPre6Lbmi2
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
-	pushq	%r13
 	pushq	%r12
-	pushq	%rbx
-	subq	$200, %rsp
-	movq	%rdx, %r15
-	movq	%rsi, %rbx
-	movq	%rdi, %r14
-	callq	_mcl_fpDbl_mulPre4Lbmi2
-	leaq	64(%r14), %rdi
-	leaq	32(%rbx), %rsi
-	leaq	32(%r15), %rdx
-	callq	_mcl_fpDbl_mulPre4Lbmi2
-	movq	56(%rbx), %r10
-	movq	48(%rbx), %rdx
-	movq	(%rbx), %rsi
-	movq	8(%rbx), %rdi
-	addq	32(%rbx), %rsi
-	adcq	40(%rbx), %rdi
-	adcq	16(%rbx), %rdx
-	adcq	24(%rbx), %r10
-	pushfq
-	popq	%r8
-	xorl	%r9d, %r9d
-	movq	56(%r15), %rcx
-	movq	48(%r15), %r13
-	movq	(%r15), %r12
-	movq	8(%r15), %rbx
-	addq	32(%r15), %r12
-	adcq	40(%r15), %rbx
-	adcq	16(%r15), %r13
-	adcq	24(%r15), %rcx
-	movl	$0, %eax
-	cmovbq	%r10, %rax
-	movq	%rax, -88(%rbp)         ## 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rdx, %rax
-	movq	%rax, -80(%rbp)         ## 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rdi, %rax
-	movq	%rax, -72(%rbp)         ## 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rsi, %rax
-	movq	%rax, -64(%rbp)         ## 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%rsi, -168(%rbp)
-	movq	%rdi, -160(%rbp)
-	movq	%rdx, -152(%rbp)
-	movq	%r10, -144(%rbp)
-	movq	%r12, -136(%rbp)
-	movq	%rbx, -128(%rbp)
-	movq	%r13, -120(%rbp)
-	movq	%rcx, -112(%rbp)
-	pushq	%r8
-	popfq
-	cmovaeq	%r9, %rcx
-	movq	%rcx, -48(%rbp)         ## 8-byte Spill
-	cmovaeq	%r9, %r13
-	cmovaeq	%r9, %rbx
-	cmovaeq	%r9, %r12
-	sbbq	%rax, %rax
-	movq	%rax, -56(%rbp)         ## 8-byte Spill
-	leaq	-232(%rbp), %rdi
-	leaq	-168(%rbp), %rsi
-	leaq	-136(%rbp), %rdx
-	callq	_mcl_fpDbl_mulPre4Lbmi2
-	addq	-64(%rbp), %r12         ## 8-byte Folded Reload
-	adcq	-72(%rbp), %rbx         ## 8-byte Folded Reload
-	adcq	-80(%rbp), %r13         ## 8-byte Folded Reload
-	movq	-48(%rbp), %r10         ## 8-byte Reload
-	adcq	-88(%rbp), %r10         ## 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	-56(%rbp), %rdx         ## 8-byte Reload
-	andl	%edx, %r15d
-	andl	$1, %r15d
-	addq	-200(%rbp), %r12
-	adcq	-192(%rbp), %rbx
-	adcq	-184(%rbp), %r13
-	adcq	-176(%rbp), %r10
-	adcq	%rax, %r15
-	movq	-208(%rbp), %rax
-	movq	-216(%rbp), %rcx
-	movq	-232(%rbp), %rsi
-	movq	-224(%rbp), %rdx
-	subq	(%r14), %rsi
-	sbbq	8(%r14), %rdx
-	sbbq	16(%r14), %rcx
-	sbbq	24(%r14), %rax
-	movq	32(%r14), %rdi
-	movq	%rdi, -80(%rbp)         ## 8-byte Spill
-	movq	40(%r14), %r8
-	movq	%r8, -88(%rbp)          ## 8-byte Spill
-	sbbq	%rdi, %r12
-	sbbq	%r8, %rbx
-	movq	48(%r14), %rdi
-	movq	%rdi, -72(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %r13
-	movq	56(%r14), %rdi
-	movq	%rdi, -64(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %r10
-	sbbq	$0, %r15
-	movq	64(%r14), %r11
-	subq	%r11, %rsi
-	movq	72(%r14), %rdi
-	movq	%rdi, -56(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %rdx
-	movq	80(%r14), %rdi
-	movq	%rdi, -48(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %rcx
-	movq	88(%r14), %rdi
-	movq	%rdi, -104(%rbp)        ## 8-byte Spill
-	sbbq	%rdi, %rax
-	movq	96(%r14), %rdi
-	movq	%rdi, -96(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %r12
-	movq	104(%r14), %rdi
-	sbbq	%rdi, %rbx
-	movq	112(%r14), %r8
-	sbbq	%r8, %r13
-	movq	120(%r14), %r9
-	sbbq	%r9, %r10
-	sbbq	$0, %r15
-	addq	-80(%rbp), %rsi         ## 8-byte Folded Reload
-	adcq	-88(%rbp), %rdx         ## 8-byte Folded Reload
-	movq	%rsi, 32(%r14)
-	adcq	-72(%rbp), %rcx         ## 8-byte Folded Reload
-	movq	%rdx, 40(%r14)
-	adcq	-64(%rbp), %rax         ## 8-byte Folded Reload
-	movq	%rcx, 48(%r14)
-	adcq	%r11, %r12
-	movq	%rax, 56(%r14)
-	movq	%r12, 64(%r14)
-	adcq	-56(%rbp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, 72(%r14)
-	adcq	-48(%rbp), %r13         ## 8-byte Folded Reload
-	movq	%r13, 80(%r14)
-	adcq	-104(%rbp), %r10        ## 8-byte Folded Reload
-	movq	%r10, 88(%r14)
-	adcq	-96(%rbp), %r15         ## 8-byte Folded Reload
-	movq	%r15, 96(%r14)
-	adcq	$0, %rdi
-	movq	%rdi, 104(%r14)
-	adcq	$0, %r8
-	movq	%r8, 112(%r14)
-	adcq	$0, %r9
-	movq	%r9, 120(%r14)
-	addq	$200, %rsp
+	pushq	%rbx
+	mulxq	40(%rsi), %r8, %r11
+	mulxq	32(%rsi), %r9, %r12
+	mulxq	24(%rsi), %r10, %rcx
+	mulxq	16(%rsi), %r14, %rbx
+	mulxq	8(%rsi), %r15, %rax
+	mulxq	(%rsi), %rdx, %rsi
+	movq	%rdx, (%rdi)
+	addq	%r15, %rsi
+	movq	%rsi, 8(%rdi)
+	adcq	%r14, %rax
+	movq	%rax, 16(%rdi)
+	adcq	%r10, %rbx
+	movq	%rbx, 24(%rdi)
+	adcq	%r9, %rcx
+	movq	%rcx, 32(%rdi)
+	adcq	%r8, %r12
+	movq	%r12, 40(%rdi)
+	adcq	$0, %r11
+	movq	%r11, 48(%rdi)
 	popq	%rbx
 	popq	%r12
-	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sqrPre8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mulPre6Lbmi2         ## -- Begin function mcl_fpDbl_mulPre6Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre8Lbmi2:                ## @mcl_fpDbl_sqrPre8Lbmi2
-## BB#0:
+_mcl_fpDbl_mulPre6Lbmi2:                ## @mcl_fpDbl_mulPre6Lbmi2
+## %bb.0:
 	pushq	%rbp
-	movq	%rsp, %rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$200, %rsp
-	movq	%rsi, %rbx
-	movq	%rdi, %r14
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r13
+	movq	(%rdx), %rcx
+	movq	%rdx, %r12
+	movq	%rdx, -40(%rsp)                 ## 8-byte Spill
+	movq	%r9, %rdx
+	movq	%r9, -24(%rsp)                  ## 8-byte Spill
+	mulxq	%rcx, %r8, %rax
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	16(%rsi), %rax
+	movq	%rax, -32(%rsp)                 ## 8-byte Spill
+	movq	24(%rsi), %rbx
+	movq	%rbx, -80(%rsp)                 ## 8-byte Spill
+	movq	32(%rsi), %rbp
+	movq	%rbp, -72(%rsp)                 ## 8-byte Spill
+	movq	40(%rsi), %rdx
+	movq	%r8, (%rdi)
+	movq	%rdi, %r15
+	movq	%rdi, -16(%rsp)                 ## 8-byte Spill
+	movq	%rdx, %r8
+	movq	%rdx, -48(%rsp)                 ## 8-byte Spill
+	mulxq	%rcx, %rdx, %rsi
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
+	movq	%rbp, %rdx
+	mulxq	%rcx, %r10, %r14
 	movq	%rbx, %rdx
-	callq	_mcl_fpDbl_mulPre4Lbmi2
-	leaq	64(%r14), %rdi
-	leaq	32(%rbx), %rsi
-	movq	%rsi, %rdx
-	callq	_mcl_fpDbl_mulPre4Lbmi2
-	movq	56(%rbx), %r15
-	movq	48(%rbx), %rax
-	movq	(%rbx), %rcx
-	movq	8(%rbx), %rdx
-	addq	32(%rbx), %rcx
-	adcq	40(%rbx), %rdx
-	adcq	16(%rbx), %rax
-	adcq	24(%rbx), %r15
-	pushfq
-	popq	%r8
-	pushfq
-	popq	%r9
-	pushfq
-	popq	%r10
-	pushfq
-	popq	%rdi
-	pushfq
-	popq	%rbx
-	sbbq	%rsi, %rsi
-	movq	%rsi, -56(%rbp)         ## 8-byte Spill
-	leaq	(%rcx,%rcx), %rsi
-	xorl	%r11d, %r11d
-	pushq	%rbx
-	popfq
-	cmovaeq	%r11, %rsi
-	movq	%rsi, -48(%rbp)         ## 8-byte Spill
-	movq	%rdx, %r13
-	shldq	$1, %rcx, %r13
-	pushq	%rdi
-	popfq
-	cmovaeq	%r11, %r13
-	movq	%rax, %r12
-	shldq	$1, %rdx, %r12
-	pushq	%r10
-	popfq
-	cmovaeq	%r11, %r12
-	movq	%r15, %rbx
-	movq	%rcx, -168(%rbp)
-	movq	%rdx, -160(%rbp)
-	movq	%rax, -152(%rbp)
-	movq	%r15, -144(%rbp)
-	movq	%rcx, -136(%rbp)
-	movq	%rdx, -128(%rbp)
-	movq	%rax, -120(%rbp)
-	movq	%r15, -112(%rbp)
-	shldq	$1, %rax, %r15
-	pushq	%r9
-	popfq
-	cmovaeq	%r11, %r15
-	shrq	$63, %rbx
-	pushq	%r8
-	popfq
-	cmovaeq	%r11, %rbx
-	leaq	-232(%rbp), %rdi
-	leaq	-168(%rbp), %rsi
-	leaq	-136(%rbp), %rdx
-	callq	_mcl_fpDbl_mulPre4Lbmi2
-	movq	-56(%rbp), %rax         ## 8-byte Reload
-	andl	$1, %eax
-	movq	-48(%rbp), %r10         ## 8-byte Reload
-	addq	-200(%rbp), %r10
-	adcq	-192(%rbp), %r13
-	adcq	-184(%rbp), %r12
-	adcq	-176(%rbp), %r15
-	adcq	%rbx, %rax
-	movq	%rax, %rbx
-	movq	-208(%rbp), %rax
-	movq	-216(%rbp), %rcx
-	movq	-232(%rbp), %rsi
-	movq	-224(%rbp), %rdx
-	subq	(%r14), %rsi
-	sbbq	8(%r14), %rdx
-	sbbq	16(%r14), %rcx
-	sbbq	24(%r14), %rax
-	movq	32(%r14), %r9
-	movq	%r9, -56(%rbp)          ## 8-byte Spill
-	movq	40(%r14), %r8
-	movq	%r8, -48(%rbp)          ## 8-byte Spill
-	sbbq	%r9, %r10
-	sbbq	%r8, %r13
-	movq	48(%r14), %rdi
-	movq	%rdi, -104(%rbp)        ## 8-byte Spill
-	sbbq	%rdi, %r12
-	movq	56(%r14), %rdi
-	movq	%rdi, -96(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %r15
-	sbbq	$0, %rbx
-	movq	64(%r14), %r11
-	subq	%r11, %rsi
-	movq	72(%r14), %rdi
-	movq	%rdi, -88(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %rdx
-	movq	80(%r14), %rdi
-	movq	%rdi, -80(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %rcx
-	movq	88(%r14), %rdi
-	movq	%rdi, -72(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %rax
-	movq	96(%r14), %rdi
-	movq	%rdi, -64(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %r10
-	movq	104(%r14), %rdi
-	sbbq	%rdi, %r13
-	movq	112(%r14), %r8
-	sbbq	%r8, %r12
-	movq	120(%r14), %r9
-	sbbq	%r9, %r15
-	sbbq	$0, %rbx
-	addq	-56(%rbp), %rsi         ## 8-byte Folded Reload
-	adcq	-48(%rbp), %rdx         ## 8-byte Folded Reload
-	movq	%rsi, 32(%r14)
-	adcq	-104(%rbp), %rcx        ## 8-byte Folded Reload
-	movq	%rdx, 40(%r14)
-	adcq	-96(%rbp), %rax         ## 8-byte Folded Reload
-	movq	%rcx, 48(%r14)
-	adcq	%r11, %r10
-	movq	%rax, 56(%r14)
-	movq	%r10, 64(%r14)
-	adcq	-88(%rbp), %r13         ## 8-byte Folded Reload
-	movq	%r13, 72(%r14)
-	adcq	-80(%rbp), %r12         ## 8-byte Folded Reload
-	movq	%r12, 80(%r14)
-	adcq	-72(%rbp), %r15         ## 8-byte Folded Reload
-	movq	%r15, 88(%r14)
-	movq	%rbx, %rax
-	adcq	-64(%rbp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 96(%r14)
-	adcq	$0, %rdi
-	movq	%rdi, 104(%r14)
-	adcq	$0, %r8
-	movq	%r8, 112(%r14)
+	mulxq	%rcx, %r11, %rdi
+	movq	%rax, %rdx
+	mulxq	%rcx, %rbx, %rax
+	movq	%r13, %rdx
+	movq	%r13, -64(%rsp)                 ## 8-byte Spill
+	mulxq	%rcx, %rcx, %rbp
+	addq	-112(%rsp), %rcx                ## 8-byte Folded Reload
+	adcq	%rbx, %rbp
+	adcq	%r11, %rax
+	adcq	%r10, %rdi
+	adcq	-104(%rsp), %r14                ## 8-byte Folded Reload
+	adcq	$0, %rsi
+	movq	%rsi, -96(%rsp)                 ## 8-byte Spill
+	movq	8(%r12), %rdx
+	mulxq	%r9, %rbx, %rsi
+	movq	%rsi, -88(%rsp)                 ## 8-byte Spill
+	addq	%rcx, %rbx
+	movq	%rbx, 8(%r15)
+	mulxq	%r8, %r10, %rcx
+	movq	%rcx, -104(%rsp)                ## 8-byte Spill
+	movq	-72(%rsp), %rcx                 ## 8-byte Reload
+	mulxq	%rcx, %r9, %rbx
+	movq	%rbx, -112(%rsp)                ## 8-byte Spill
+	movq	-80(%rsp), %r12                 ## 8-byte Reload
+	mulxq	%r12, %r11, %rsi
+	mulxq	-32(%rsp), %r8, %r15            ## 8-byte Folded Reload
+	mulxq	%r13, %rbx, %rdx
+	adcq	%rbp, %rbx
+	adcq	%rax, %r8
+	adcq	%rdi, %r11
+	adcq	%r14, %r9
+	adcq	-96(%rsp), %r10                 ## 8-byte Folded Reload
+	setb	%al
+	addq	-88(%rsp), %rbx                 ## 8-byte Folded Reload
+	adcq	%rdx, %r8
+	adcq	%r15, %r11
+	adcq	%rsi, %r9
+	adcq	-112(%rsp), %r10                ## 8-byte Folded Reload
+	movzbl	%al, %r13d
+	adcq	-104(%rsp), %r13                ## 8-byte Folded Reload
+	movq	-40(%rsp), %r15                 ## 8-byte Reload
+	movq	16(%r15), %rdx
+	mulxq	-48(%rsp), %rsi, %rax           ## 8-byte Folded Reload
+	movq	%rsi, -104(%rsp)                ## 8-byte Spill
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
+	mulxq	%rcx, %rax, %r14
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	mulxq	%r12, %rax, %rbp
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	mulxq	-64(%rsp), %rcx, %r12           ## 8-byte Folded Reload
+	mulxq	-24(%rsp), %rax, %rsi           ## 8-byte Folded Reload
+	addq	%rcx, %rsi
+	mulxq	-32(%rsp), %rcx, %rdi           ## 8-byte Folded Reload
+	adcq	%r12, %rcx
+	adcq	-96(%rsp), %rdi                 ## 8-byte Folded Reload
+	adcq	-112(%rsp), %rbp                ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r14                ## 8-byte Folded Reload
+	movq	-88(%rsp), %r12                 ## 8-byte Reload
+	adcq	$0, %r12
+	addq	%rbx, %rax
+	movq	-16(%rsp), %rdx                 ## 8-byte Reload
+	movq	%rax, 16(%rdx)
+	adcq	%r8, %rsi
+	adcq	%r11, %rcx
+	adcq	%r9, %rdi
+	adcq	%r10, %rbp
+	adcq	%r13, %r14
+	adcq	$0, %r12
+	movq	%r12, -88(%rsp)                 ## 8-byte Spill
+	movq	24(%r15), %rdx
+	movq	-48(%rsp), %r15                 ## 8-byte Reload
+	mulxq	%r15, %rbx, %rax
+	movq	%rbx, -96(%rsp)                 ## 8-byte Spill
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	mulxq	-72(%rsp), %rbx, %rax           ## 8-byte Folded Reload
+	movq	%rbx, -56(%rsp)                 ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	mulxq	-80(%rsp), %rax, %r11           ## 8-byte Folded Reload
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	mulxq	-64(%rsp), %r8, %r12            ## 8-byte Folded Reload
+	mulxq	-24(%rsp), %rax, %rbx           ## 8-byte Folded Reload
+	addq	%r8, %rbx
+	movq	-32(%rsp), %r13                 ## 8-byte Reload
+	mulxq	%r13, %r9, %r10
+	adcq	%r12, %r9
+	adcq	-8(%rsp), %r10                  ## 8-byte Folded Reload
+	adcq	-56(%rsp), %r11                 ## 8-byte Folded Reload
+	movq	-112(%rsp), %r8                 ## 8-byte Reload
+	adcq	-96(%rsp), %r8                  ## 8-byte Folded Reload
+	movq	-104(%rsp), %r12                ## 8-byte Reload
+	adcq	$0, %r12
+	addq	%rsi, %rax
+	movq	-16(%rsp), %rdx                 ## 8-byte Reload
+	movq	%rax, 24(%rdx)
+	adcq	%rcx, %rbx
+	adcq	%rdi, %r9
+	adcq	%rbp, %r10
+	adcq	%r14, %r11
+	adcq	-88(%rsp), %r8                  ## 8-byte Folded Reload
+	movq	%r8, -112(%rsp)                 ## 8-byte Spill
+	adcq	$0, %r12
+	movq	%r12, -104(%rsp)                ## 8-byte Spill
+	movq	-40(%rsp), %rax                 ## 8-byte Reload
+	movq	32(%rax), %rdx
+	mulxq	%r15, %rcx, %rax
+	movq	%rcx, -88(%rsp)                 ## 8-byte Spill
+	mulxq	-72(%rsp), %rcx, %r14           ## 8-byte Folded Reload
+	movq	%rcx, -96(%rsp)                 ## 8-byte Spill
+	mulxq	-80(%rsp), %rcx, %rbp           ## 8-byte Folded Reload
+	movq	%rcx, -56(%rsp)                 ## 8-byte Spill
+	mulxq	-64(%rsp), %rdi, %r15           ## 8-byte Folded Reload
+	movq	-24(%rsp), %r12                 ## 8-byte Reload
+	mulxq	%r12, %rcx, %rsi
+	addq	%rdi, %rsi
+	mulxq	%r13, %rdi, %r8
+	adcq	%r15, %rdi
+	adcq	-56(%rsp), %r8                  ## 8-byte Folded Reload
+	adcq	-96(%rsp), %rbp                 ## 8-byte Folded Reload
+	adcq	-88(%rsp), %r14                 ## 8-byte Folded Reload
+	adcq	$0, %rax
+	addq	%rbx, %rcx
+	movq	-16(%rsp), %r15                 ## 8-byte Reload
+	movq	%rcx, 32(%r15)
+	adcq	%r9, %rsi
+	adcq	%r10, %rdi
+	adcq	%r11, %r8
+	adcq	-112(%rsp), %rbp                ## 8-byte Folded Reload
+	movq	-40(%rsp), %rcx                 ## 8-byte Reload
+	movq	40(%rcx), %rdx
+	adcq	-104(%rsp), %r14                ## 8-byte Folded Reload
+	mulxq	-64(%rsp), %rbx, %r9            ## 8-byte Folded Reload
+	mulxq	%r12, %rcx, %r11
+	adcq	$0, %rax
+	addq	%rbx, %r11
+	mulxq	%r13, %r12, %r10
+	adcq	%r9, %r12
+	mulxq	-80(%rsp), %r13, %r9            ## 8-byte Folded Reload
+	adcq	%r10, %r13
+	mulxq	-72(%rsp), %rbx, %r10           ## 8-byte Folded Reload
+	adcq	%r9, %rbx
+	mulxq	-48(%rsp), %rdx, %r9            ## 8-byte Folded Reload
+	adcq	%r10, %rdx
+	adcq	$0, %r9
+	addq	%rcx, %rsi
+	movq	%rsi, 40(%r15)
+	adcq	%rdi, %r11
+	movq	%r11, 48(%r15)
+	adcq	%r8, %r12
+	movq	%r12, 56(%r15)
+	adcq	%rbp, %r13
+	movq	%r13, 64(%r15)
+	adcq	%r14, %rbx
+	movq	%rbx, 72(%r15)
+	adcq	%rax, %rdx
+	movq	%rdx, 80(%r15)
 	adcq	$0, %r9
-	movq	%r9, 120(%r14)
-	addq	$200, %rsp
+	movq	%r9, 88(%r15)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -9046,452 +2479,202 @@ _mcl_fpDbl_sqrPre8Lbmi2:                ## @mcl_fpDbl_sqrPre8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_mont8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sqrPre6Lbmi2         ## -- Begin function mcl_fpDbl_sqrPre6Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_mont8Lbmi2:                     ## @mcl_fp_mont8Lbmi2
-## BB#0:
+_mcl_fpDbl_sqrPre6Lbmi2:                ## @mcl_fpDbl_sqrPre6Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$1256, %rsp             ## imm = 0x4E8
-	movq	%rcx, %r13
-	movq	%rdx, 64(%rsp)          ## 8-byte Spill
-	movq	%rsi, 72(%rsp)          ## 8-byte Spill
-	movq	%rdi, 96(%rsp)          ## 8-byte Spill
-	movq	-8(%r13), %rbx
-	movq	%rbx, 80(%rsp)          ## 8-byte Spill
-	movq	%r13, 56(%rsp)          ## 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1184(%rsp), %rdi
-	callq	l_mulPv512x64
-	movq	1184(%rsp), %r15
-	movq	1192(%rsp), %r14
-	movq	%r15, %rdx
-	imulq	%rbx, %rdx
-	movq	1248(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	1240(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	1232(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	1224(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	1216(%rsp), %r12
-	movq	1208(%rsp), %rbx
-	movq	1200(%rsp), %rbp
-	leaq	1112(%rsp), %rdi
-	movq	%r13, %rsi
-	callq	l_mulPv512x64
-	addq	1112(%rsp), %r15
-	adcq	1120(%rsp), %r14
-	adcq	1128(%rsp), %rbp
-	movq	%rbp, 88(%rsp)          ## 8-byte Spill
-	adcq	1136(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	adcq	1144(%rsp), %r12
-	movq	%r12, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	1152(%rsp), %r13
-	movq	(%rsp), %rbx            ## 8-byte Reload
-	adcq	1160(%rsp), %rbx
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	adcq	1168(%rsp), %rbp
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	1176(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	sbbq	%r15, %r15
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1040(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %r15d
-	addq	1040(%rsp), %r14
-	movq	88(%rsp), %rax          ## 8-byte Reload
-	adcq	1048(%rsp), %rax
-	movq	%rax, 88(%rsp)          ## 8-byte Spill
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	1056(%rsp), %rax
-	movq	%rax, %r12
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	1064(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	adcq	1072(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	adcq	1080(%rsp), %rbx
-	movq	%rbx, (%rsp)            ## 8-byte Spill
-	adcq	1088(%rsp), %rbp
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	1096(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	1104(%rsp), %r15
-	movq	%r15, 48(%rsp)          ## 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%r14, %rdx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	968(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %r15d
-	addq	968(%rsp), %r14
-	movq	88(%rsp), %r13          ## 8-byte Reload
-	adcq	976(%rsp), %r13
-	adcq	984(%rsp), %r12
-	movq	%r12, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	992(%rsp), %r14
-	movq	16(%rsp), %rbx          ## 8-byte Reload
-	adcq	1000(%rsp), %rbx
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	1008(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	adcq	1016(%rsp), %rbp
-	movq	%rbp, %r12
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	1024(%rsp), %rbp
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	adcq	1032(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	adcq	$0, %r15
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	896(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	%r13, %rcx
-	addq	896(%rsp), %rcx
-	movq	32(%rsp), %r13          ## 8-byte Reload
-	adcq	904(%rsp), %r13
-	adcq	912(%rsp), %r14
-	adcq	920(%rsp), %rbx
-	movq	%rbx, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	928(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	adcq	936(%rsp), %r12
-	movq	%r12, 40(%rsp)          ## 8-byte Spill
-	adcq	944(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	952(%rsp), %r12
-	adcq	960(%rsp), %r15
-	sbbq	%rbx, %rbx
+	subq	$168, %rsp
+	movq	%rdi, -48(%rsp)                 ## 8-byte Spill
+	movq	40(%rsi), %rdx
+	movq	32(%rsi), %rcx
+	mulxq	%rcx, %rax, %rdi
+	movq	%rdi, -104(%rsp)                ## 8-byte Spill
+	movq	%rax, -128(%rsp)                ## 8-byte Spill
+	movq	24(%rsi), %rax
+	mulxq	%rax, %r14, %r13
+	movq	%r14, -112(%rsp)                ## 8-byte Spill
+	movq	%r13, -64(%rsp)                 ## 8-byte Spill
+	movq	16(%rsi), %r10
+	mulxq	%r10, %r8, %r11
+	movq	%r8, 24(%rsp)                   ## 8-byte Spill
+	movq	%r11, -88(%rsp)                 ## 8-byte Spill
+	movq	(%rsi), %rdi
+	movq	%rdi, -96(%rsp)                 ## 8-byte Spill
+	movq	8(%rsi), %r15
+	mulxq	%r15, %r9, %r12
+	movq	%r9, 40(%rsp)                   ## 8-byte Spill
+	mulxq	%rdi, %rsi, %rbx
+	movq	%rsi, -56(%rsp)                 ## 8-byte Spill
+	mulxq	%rdx, %rbp, %rdx
+	movq	%rbx, %rdi
+	addq	%r9, %rdi
+	movq	%rdi, 120(%rsp)                 ## 8-byte Spill
+	movq	%r12, %rdi
+	adcq	%r8, %rdi
+	movq	%rdi, 128(%rsp)                 ## 8-byte Spill
+	movq	%r11, %rdi
+	adcq	%r14, %rdi
+	movq	%rdi, 136(%rsp)                 ## 8-byte Spill
+	adcq	-128(%rsp), %r13                ## 8-byte Folded Reload
+	movq	%r13, 144(%rsp)                 ## 8-byte Spill
+	movq	-104(%rsp), %r9                 ## 8-byte Reload
+	adcq	%r9, %rbp
+	movq	%rbp, 152(%rsp)                 ## 8-byte Spill
+	adcq	$0, %rdx
+	movq	%rdx, 160(%rsp)                 ## 8-byte Spill
 	movq	%rcx, %rdx
-	movq	%rcx, %rbp
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	824(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %ebx
-	addq	824(%rsp), %rbp
-	adcq	832(%rsp), %r13
-	movq	%r13, 32(%rsp)          ## 8-byte Spill
-	adcq	840(%rsp), %r14
-	movq	%r14, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	848(%rsp), %r13
-	movq	(%rsp), %rbp            ## 8-byte Reload
-	adcq	856(%rsp), %rbp
-	movq	40(%rsp), %r14          ## 8-byte Reload
-	adcq	864(%rsp), %r14
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	872(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	880(%rsp), %r12
-	adcq	888(%rsp), %r15
-	adcq	$0, %rbx
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	752(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	addq	752(%rsp), %rax
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	760(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	adcq	776(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	adcq	784(%rsp), %r14
-	movq	%r14, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	792(%rsp), %rbp
-	adcq	800(%rsp), %r12
-	adcq	808(%rsp), %r15
-	adcq	816(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	680(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	%r13, %rax
-	andl	$1, %eax
-	addq	680(%rsp), %rbx
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	688(%rsp), %r14
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	696(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	704(%rsp), %r13
-	movq	40(%rsp), %rbx          ## 8-byte Reload
-	adcq	712(%rsp), %rbx
-	adcq	720(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	%r12, %rbp
-	adcq	728(%rsp), %rbp
-	adcq	736(%rsp), %r15
-	movq	32(%rsp), %r12          ## 8-byte Reload
-	adcq	744(%rsp), %r12
-	adcq	$0, %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	608(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	%r14, %rax
-	addq	608(%rsp), %rax
-	movq	16(%rsp), %r14          ## 8-byte Reload
-	adcq	616(%rsp), %r14
-	adcq	624(%rsp), %r13
-	movq	%r13, (%rsp)            ## 8-byte Spill
-	adcq	632(%rsp), %rbx
-	movq	%rbx, %r13
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	640(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	648(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	656(%rsp), %r15
-	adcq	664(%rsp), %r12
-	movq	%r12, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	672(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	sbbq	%rbp, %rbp
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	536(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	%rbp, %rax
-	andl	$1, %eax
-	addq	536(%rsp), %rbx
-	adcq	544(%rsp), %r14
-	movq	%r14, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rbx            ## 8-byte Reload
-	adcq	552(%rsp), %rbx
-	adcq	560(%rsp), %r13
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	568(%rsp), %rbp
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	576(%rsp), %r12
-	adcq	584(%rsp), %r15
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	592(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	600(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	464(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	addq	464(%rsp), %rax
-	adcq	472(%rsp), %rbx
-	adcq	480(%rsp), %r13
-	movq	%r13, 40(%rsp)          ## 8-byte Spill
-	adcq	488(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	adcq	496(%rsp), %r12
-	adcq	504(%rsp), %r15
-	movq	%r15, 16(%rsp)          ## 8-byte Spill
-	movq	32(%rsp), %r15          ## 8-byte Reload
-	adcq	512(%rsp), %r15
-	adcq	520(%rsp), %r14
-	movq	%r14, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	528(%rsp), %r14
-	sbbq	%r13, %r13
+	mulxq	%rax, %rdx, %r14
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rdx
+	mulxq	%r10, %r13, %r11
+	movq	%r13, -16(%rsp)                 ## 8-byte Spill
+	movq	%r11, -80(%rsp)                 ## 8-byte Spill
+	mulxq	%r15, %rsi, %rdi
+	movq	%rsi, 16(%rsp)                  ## 8-byte Spill
+	movq	%rdi, -72(%rsp)                 ## 8-byte Spill
+	mulxq	-96(%rsp), %rdx, %r8            ## 8-byte Folded Reload
+	movq	%rdx, 32(%rsp)                  ## 8-byte Spill
+	movq	%rcx, %rdx
+	mulxq	%rcx, %rdx, %rcx
+	movq	%r8, %rbp
+	addq	%rsi, %rbp
+	movq	%rbp, 96(%rsp)                  ## 8-byte Spill
+	adcq	%r13, %rdi
+	movq	%rdi, 88(%rsp)                  ## 8-byte Spill
+	adcq	-120(%rsp), %r11                ## 8-byte Folded Reload
+	movq	%r11, 80(%rsp)                  ## 8-byte Spill
+	adcq	%r14, %rdx
+	movq	%rdx, 104(%rsp)                 ## 8-byte Spill
+	adcq	-128(%rsp), %rcx                ## 8-byte Folded Reload
+	movq	%rcx, 112(%rsp)                 ## 8-byte Spill
+	adcq	$0, %r9
+	movq	%r9, -104(%rsp)                 ## 8-byte Spill
 	movq	%rax, %rdx
-	movq	%rax, %rbp
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	392(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	%r13, %rax
-	andl	$1, %eax
-	addq	392(%rsp), %rbp
-	adcq	400(%rsp), %rbx
-	movq	%rbx, (%rsp)            ## 8-byte Spill
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	adcq	408(%rsp), %rbp
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	416(%rsp), %rbx
-	adcq	424(%rsp), %r12
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	432(%rsp), %r13
-	adcq	440(%rsp), %r15
-	movq	%r15, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r15           ## 8-byte Reload
-	adcq	448(%rsp), %r15
-	adcq	456(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	320(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	(%rsp), %rax            ## 8-byte Reload
-	addq	320(%rsp), %rax
-	adcq	328(%rsp), %rbp
-	movq	%rbp, 40(%rsp)          ## 8-byte Spill
-	adcq	336(%rsp), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	movq	%r12, %rbp
-	adcq	344(%rsp), %rbp
-	adcq	352(%rsp), %r13
-	movq	32(%rsp), %r12          ## 8-byte Reload
-	adcq	360(%rsp), %r12
-	adcq	368(%rsp), %r15
-	movq	%r15, 8(%rsp)           ## 8-byte Spill
-	adcq	376(%rsp), %r14
-	movq	%r14, (%rsp)            ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	384(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	sbbq	%r15, %r15
+	mulxq	%r10, %rdi, %r13
+	mulxq	%r15, %rbp, %rcx
+	movq	%rbp, -24(%rsp)                 ## 8-byte Spill
+	movq	%rcx, -128(%rsp)                ## 8-byte Spill
+	movq	-96(%rsp), %r11                 ## 8-byte Reload
+	mulxq	%r11, %rdx, %r9
+	movq	%rdx, -8(%rsp)                  ## 8-byte Spill
 	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	248(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %r15d
-	addq	248(%rsp), %rbx
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	256(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %r14          ## 8-byte Reload
-	adcq	264(%rsp), %r14
-	adcq	272(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	movq	%r13, %rbx
-	adcq	280(%rsp), %rbx
-	movq	%r12, %rbp
-	adcq	288(%rsp), %rbp
-	movq	8(%rsp), %r13           ## 8-byte Reload
-	adcq	296(%rsp), %r13
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	304(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	312(%rsp), %r12
-	adcq	$0, %r15
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	176(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	addq	176(%rsp), %rax
-	adcq	184(%rsp), %r14
-	movq	%r14, 24(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rcx          ## 8-byte Reload
-	adcq	192(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	adcq	200(%rsp), %rbx
-	movq	%rbx, 16(%rsp)          ## 8-byte Spill
-	adcq	208(%rsp), %rbp
-	adcq	216(%rsp), %r13
-	movq	%r13, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	224(%rsp), %r14
-	adcq	232(%rsp), %r12
-	adcq	240(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	80(%rsp), %rdx          ## 8-byte Reload
-	imulq	%rax, %rdx
-	movq	%rax, %r13
-	leaq	104(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %ebx
-	addq	104(%rsp), %r13
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	112(%rsp), %rcx
-	movq	48(%rsp), %rdx          ## 8-byte Reload
-	adcq	120(%rsp), %rdx
-	movq	16(%rsp), %rsi          ## 8-byte Reload
-	adcq	128(%rsp), %rsi
-	movq	%rbp, %rdi
-	adcq	136(%rsp), %rdi
-	movq	%rdi, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r8            ## 8-byte Reload
-	adcq	144(%rsp), %r8
-	movq	%r8, 8(%rsp)            ## 8-byte Spill
-	movq	%r14, %r9
-	adcq	152(%rsp), %r9
-	movq	%r9, (%rsp)             ## 8-byte Spill
-	adcq	160(%rsp), %r12
-	adcq	168(%rsp), %r15
+	mulxq	%rax, %rdx, %rax
+	movq	%r9, %rsi
+	addq	%rbp, %rsi
+	movq	%rsi, 56(%rsp)                  ## 8-byte Spill
+	adcq	%rdi, %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	adcq	%r13, %rdx
+	movq	%rdx, 64(%rsp)                  ## 8-byte Spill
+	movq	%r13, %rbp
+	adcq	-120(%rsp), %rax                ## 8-byte Folded Reload
+	movq	%rax, 72(%rsp)                  ## 8-byte Spill
+	adcq	-112(%rsp), %r14                ## 8-byte Folded Reload
+	movq	%r14, -120(%rsp)                ## 8-byte Spill
+	adcq	$0, -64(%rsp)                   ## 8-byte Folded Spill
+	movq	%r10, %rdx
+	mulxq	%r15, %r13, %rsi
+	mulxq	%r11, %rcx, %rax
+	movq	%rcx, -32(%rsp)                 ## 8-byte Spill
+	mulxq	%r10, %rcx, %r10
+	movq	%rax, %rdx
+	addq	%r13, %rdx
+	movq	%rdx, (%rsp)                    ## 8-byte Spill
+	adcq	%rsi, %rcx
+	movq	%rcx, -40(%rsp)                 ## 8-byte Spill
+	adcq	%rdi, %r10
+	movq	%r10, 8(%rsp)                   ## 8-byte Spill
+	adcq	-16(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%rbp, -112(%rsp)                ## 8-byte Spill
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	%rcx, -80(%rsp)                 ## 8-byte Folded Spill
+	adcq	$0, -88(%rsp)                   ## 8-byte Folded Spill
+	movq	%r15, %rdx
+	mulxq	%r15, %r14, %rdi
+	mulxq	%r11, %r10, %rcx
+	addq	%rcx, %r14
+	adcq	%r13, %rdi
+	adcq	-24(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	16(%rsp), %rdx                  ## 8-byte Reload
+	adcq	%rdx, -128(%rsp)                ## 8-byte Folded Spill
+	movq	40(%rsp), %rdx                  ## 8-byte Reload
+	adcq	%rdx, -72(%rsp)                 ## 8-byte Folded Spill
+	movq	%r11, %rdx
+	mulxq	%r11, %rdx, %r11
+	movq	-48(%rsp), %rbp                 ## 8-byte Reload
+	movq	%rdx, (%rbp)
+	adcq	$0, %r12
+	addq	%r10, %r11
+	movq	-32(%rsp), %rdx                 ## 8-byte Reload
+	adcq	%rdx, %rcx
+	movq	-8(%rsp), %r15                  ## 8-byte Reload
+	adcq	%r15, %rax
+	movq	32(%rsp), %rbp                  ## 8-byte Reload
+	adcq	%rbp, %r9
+	adcq	-56(%rsp), %r8                  ## 8-byte Folded Reload
 	adcq	$0, %rbx
-	movq	%rcx, %rax
-	movq	%rcx, %r11
-	movq	56(%rsp), %rbp          ## 8-byte Reload
-	subq	(%rbp), %rax
-	movq	%rdx, %rcx
-	movq	%rdx, %r14
-	sbbq	8(%rbp), %rcx
-	movq	%rsi, %rdx
-	movq	%rsi, %r13
-	sbbq	16(%rbp), %rdx
-	movq	%rdi, %rsi
-	sbbq	24(%rbp), %rsi
-	movq	%r8, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%r9, %r10
-	sbbq	40(%rbp), %r10
-	movq	%r12, %r8
-	sbbq	48(%rbp), %r8
-	movq	%r15, %r9
-	sbbq	56(%rbp), %r9
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%r15, %r9
-	testb	%bl, %bl
-	cmovneq	%r11, %rax
-	movq	96(%rsp), %rbx          ## 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovneq	%r14, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovneq	%r13, %rdx
-	movq	%rdx, 16(%rbx)
-	cmovneq	32(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovneq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 32(%rbx)
-	cmovneq	(%rsp), %r10            ## 8-byte Folded Reload
-	movq	%r10, 40(%rbx)
-	cmovneq	%r12, %r8
-	movq	%r8, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	addq	$1256, %rsp             ## imm = 0x4E8
+	addq	%r10, %r11
+	adcq	%r14, %rcx
+	adcq	%rdi, %rax
+	adcq	%rsi, %r9
+	adcq	-128(%rsp), %r8                 ## 8-byte Folded Reload
+	adcq	-72(%rsp), %rbx                 ## 8-byte Folded Reload
+	adcq	$0, %r12
+	addq	%rdx, %rcx
+	adcq	(%rsp), %rax                    ## 8-byte Folded Reload
+	adcq	-40(%rsp), %r9                  ## 8-byte Folded Reload
+	adcq	8(%rsp), %r8                    ## 8-byte Folded Reload
+	adcq	-112(%rsp), %rbx                ## 8-byte Folded Reload
+	adcq	-80(%rsp), %r12                 ## 8-byte Folded Reload
+	movq	-88(%rsp), %rsi                 ## 8-byte Reload
+	adcq	$0, %rsi
+	addq	%r15, %rax
+	adcq	56(%rsp), %r9                   ## 8-byte Folded Reload
+	adcq	48(%rsp), %r8                   ## 8-byte Folded Reload
+	adcq	64(%rsp), %rbx                  ## 8-byte Folded Reload
+	adcq	72(%rsp), %r12                  ## 8-byte Folded Reload
+	adcq	-120(%rsp), %rsi                ## 8-byte Folded Reload
+	movq	-64(%rsp), %rdi                 ## 8-byte Reload
+	adcq	$0, %rdi
+	addq	%rbp, %r9
+	adcq	96(%rsp), %r8                   ## 8-byte Folded Reload
+	adcq	88(%rsp), %rbx                  ## 8-byte Folded Reload
+	adcq	80(%rsp), %r12                  ## 8-byte Folded Reload
+	adcq	104(%rsp), %rsi                 ## 8-byte Folded Reload
+	adcq	112(%rsp), %rdi                 ## 8-byte Folded Reload
+	movq	-104(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	-56(%rsp), %r8                  ## 8-byte Folded Reload
+	movq	-48(%rsp), %rbp                 ## 8-byte Reload
+	movq	%r11, 8(%rbp)
+	movq	%rcx, 16(%rbp)
+	movq	%rax, 24(%rbp)
+	movq	%r9, 32(%rbp)
+	movq	%r8, 40(%rbp)
+	adcq	120(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%rbx, 48(%rbp)
+	adcq	128(%rsp), %r12                 ## 8-byte Folded Reload
+	movq	%r12, 56(%rbp)
+	movq	%rsi, %rax
+	adcq	136(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	%rax, 64(%rbp)
+	movq	%rdi, %rax
+	adcq	144(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	%rax, 72(%rbp)
+	movq	%rdx, %rax
+	adcq	152(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	%rax, 80(%rbp)
+	movq	160(%rsp), %rax                 ## 8-byte Reload
+	adcq	$0, %rax
+	movq	%rax, 88(%rbp)
+	addq	$168, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -9499,394 +2682,382 @@ _mcl_fp_mont8Lbmi2:                     ## @mcl_fp_mont8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montNF8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_mont6Lbmi2              ## -- Begin function mcl_fp_mont6Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_montNF8Lbmi2:                   ## @mcl_fp_montNF8Lbmi2
-## BB#0:
+_mcl_fp_mont6Lbmi2:                     ## @mcl_fp_mont6Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$1240, %rsp             ## imm = 0x4D8
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	%rdx, 48(%rsp)          ## 8-byte Spill
-	movq	%rsi, 56(%rsp)          ## 8-byte Spill
-	movq	%rdi, 80(%rsp)          ## 8-byte Spill
-	movq	-8(%rcx), %rbx
-	movq	%rbx, 64(%rsp)          ## 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1168(%rsp), %rdi
-	callq	l_mulPv512x64
-	movq	1168(%rsp), %r15
-	movq	1176(%rsp), %r12
-	movq	%r15, %rdx
-	imulq	%rbx, %rdx
-	movq	1232(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	1224(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	1216(%rsp), %r13
-	movq	1208(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	1200(%rsp), %r14
-	movq	1192(%rsp), %rbp
-	movq	1184(%rsp), %rbx
-	leaq	1096(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	1096(%rsp), %r15
-	adcq	1104(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	adcq	1112(%rsp), %rbx
-	adcq	1120(%rsp), %rbp
-	adcq	1128(%rsp), %r14
-	movq	%r14, %r12
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	1136(%rsp), %r14
-	adcq	1144(%rsp), %r13
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	1152(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	1160(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1024(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	1088(%rsp), %r15
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	addq	1024(%rsp), %rax
-	adcq	1032(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          ## 8-byte Spill
-	movq	%rbp, %rbx
-	adcq	1040(%rsp), %rbx
-	adcq	1048(%rsp), %r12
-	adcq	1056(%rsp), %r14
-	movq	%r14, 8(%rsp)           ## 8-byte Spill
-	movq	%r13, %rbp
-	adcq	1064(%rsp), %rbp
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	adcq	1072(%rsp), %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %r14          ## 8-byte Reload
-	adcq	1080(%rsp), %r14
-	adcq	$0, %r15
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	952(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	952(%rsp), %r13
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	adcq	960(%rsp), %rax
-	movq	%rax, 72(%rsp)          ## 8-byte Spill
-	adcq	968(%rsp), %rbx
-	movq	%rbx, 16(%rsp)          ## 8-byte Spill
-	movq	%r12, %rbx
-	adcq	976(%rsp), %rbx
-	movq	8(%rsp), %r12           ## 8-byte Reload
-	adcq	984(%rsp), %r12
-	adcq	992(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	1000(%rsp), %r13
-	movq	%r14, %rbp
-	adcq	1008(%rsp), %rbp
-	adcq	1016(%rsp), %r15
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	880(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	944(%rsp), %r14
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	addq	880(%rsp), %rax
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	888(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	896(%rsp), %rbx
-	adcq	904(%rsp), %r12
-	movq	%r12, 8(%rsp)           ## 8-byte Spill
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	912(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	920(%rsp), %r13
-	movq	%r13, (%rsp)            ## 8-byte Spill
-	adcq	928(%rsp), %rbp
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	936(%rsp), %r15
+	subq	$32, %rsp
+	movq	%rdx, -80(%rsp)                 ## 8-byte Spill
+	movq	%rdi, 24(%rsp)                  ## 8-byte Spill
+	movq	40(%rsi), %rdi
+	movq	%rdi, -88(%rsp)                 ## 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdi, %rdx
+	mulxq	%rax, %r8, %rbx
+	movq	32(%rsi), %rdx
+	movq	%rdx, -96(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r11, %rdi
+	movq	24(%rsi), %rdx
+	movq	%rdx, -72(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r13, %r12
+	movq	16(%rsi), %rdx
+	movq	%rdx, -8(%rsp)                  ## 8-byte Spill
+	mulxq	%rax, %r14, %r15
+	movq	(%rsi), %rbp
+	movq	%rbp, -16(%rsp)                 ## 8-byte Spill
+	movq	8(%rsi), %rdx
+	movq	%rdx, -24(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rsi, %r10
+	movq	%rbp, %rdx
+	mulxq	%rax, %rax, %r9
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
+	addq	%rsi, %r9
+	adcq	%r14, %r10
+	adcq	%r13, %r15
+	adcq	%r11, %r12
+	adcq	%r8, %rdi
+	movq	%rdi, -112(%rsp)                ## 8-byte Spill
+	adcq	$0, %rbx
+	movq	%rbx, -128(%rsp)                ## 8-byte Spill
+	movq	-8(%rcx), %rdx
+	movq	%rdx, 8(%rsp)                   ## 8-byte Spill
+	imulq	%rax, %rdx
+	movq	40(%rcx), %rax
+	movq	%rax, -32(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r13, %rbp
+	movq	16(%rcx), %rax
+	movq	%rax, -40(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r8, %r14
+	movq	8(%rcx), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	mulxq	%rax, %rax, %r11
+	movq	(%rcx), %rsi
+	movq	%rsi, -48(%rsp)                 ## 8-byte Spill
+	mulxq	%rsi, %rsi, %rdi
+	addq	%rax, %rdi
+	adcq	%r8, %r11
+	movq	24(%rcx), %rax
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rbx, %r8
+	adcq	%r14, %rbx
+	movq	32(%rcx), %rax
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rcx, %rax
+	adcq	%r8, %rcx
+	adcq	%r13, %rax
+	adcq	$0, %rbp
+	addq	-120(%rsp), %rsi                ## 8-byte Folded Reload
+	adcq	%r9, %rdi
+	adcq	%r10, %r11
+	adcq	%r15, %rbx
+	adcq	%r12, %rcx
+	adcq	-112(%rsp), %rax                ## 8-byte Folded Reload
+	adcq	-128(%rsp), %rbp                ## 8-byte Folded Reload
+	movq	%rbp, -104(%rsp)                ## 8-byte Spill
+	movq	-80(%rsp), %rdx                 ## 8-byte Reload
+	movq	8(%rdx), %rdx
+	mulxq	-88(%rsp), %rbp, %rsi           ## 8-byte Folded Reload
+	movq	%rbp, -120(%rsp)                ## 8-byte Spill
+	movq	%rsi, -128(%rsp)                ## 8-byte Spill
+	mulxq	-96(%rsp), %rbp, %r15           ## 8-byte Folded Reload
+	mulxq	-72(%rsp), %rsi, %r14           ## 8-byte Folded Reload
+	movq	%rsi, 16(%rsp)                  ## 8-byte Spill
+	mulxq	-24(%rsp), %rsi, %r8            ## 8-byte Folded Reload
+	mulxq	-16(%rsp), %r12, %r10           ## 8-byte Folded Reload
+	setb	-112(%rsp)                      ## 1-byte Folded Spill
+	addq	%rsi, %r10
+	mulxq	-8(%rsp), %r9, %r13             ## 8-byte Folded Reload
+	adcq	%r8, %r9
+	adcq	16(%rsp), %r13                  ## 8-byte Folded Reload
+	adcq	%rbp, %r14
+	adcq	-120(%rsp), %r15                ## 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%rdi, %r12
+	adcq	%r11, %r10
+	adcq	%rbx, %r9
+	adcq	%rcx, %r13
+	adcq	%rax, %r14
+	adcq	-104(%rsp), %r15                ## 8-byte Folded Reload
+	movzbl	-112(%rsp), %eax                ## 1-byte Folded Reload
+	adcq	%rax, %rdx
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	setb	-112(%rsp)                      ## 1-byte Folded Spill
+	movq	8(%rsp), %rdx                   ## 8-byte Reload
+	imulq	%r12, %rdx
+	mulxq	-32(%rsp), %rax, %rbp           ## 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
+	mulxq	-64(%rsp), %rax, %r11           ## 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	mulxq	(%rsp), %rdi, %rsi              ## 8-byte Folded Reload
+	mulxq	-48(%rsp), %rcx, %r8            ## 8-byte Folded Reload
+	addq	%rdi, %r8
+	mulxq	-40(%rsp), %rbx, %rax           ## 8-byte Folded Reload
+	adcq	%rsi, %rbx
+	mulxq	-56(%rsp), %rsi, %rdi           ## 8-byte Folded Reload
+	adcq	%rax, %rsi
+	adcq	-104(%rsp), %rdi                ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r11                ## 8-byte Folded Reload
+	adcq	$0, %rbp
+	addq	%r12, %rcx
+	adcq	%r10, %r8
+	adcq	%r9, %rbx
+	adcq	%r13, %rsi
+	adcq	%r14, %rdi
+	adcq	%r15, %r11
+	adcq	-128(%rsp), %rbp                ## 8-byte Folded Reload
+	movzbl	-112(%rsp), %r10d               ## 1-byte Folded Reload
+	adcq	$0, %r10
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	movq	16(%rcx), %rdx
+	mulxq	-88(%rsp), %rcx, %rax           ## 8-byte Folded Reload
+	movq	%rcx, -112(%rsp)                ## 8-byte Spill
+	movq	%rax, -128(%rsp)                ## 8-byte Spill
+	mulxq	-96(%rsp), %rax, %r13           ## 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
+	mulxq	-72(%rsp), %rax, %r15           ## 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	mulxq	-24(%rsp), %rcx, %r14           ## 8-byte Folded Reload
+	mulxq	-16(%rsp), %rax, %r9            ## 8-byte Folded Reload
+	addq	%rcx, %r9
+	mulxq	-8(%rsp), %rcx, %r12            ## 8-byte Folded Reload
+	adcq	%r14, %rcx
+	adcq	-104(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r15                ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r13                ## 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r8, %rax
+	movq	%rax, %r14
+	adcq	%rbx, %r9
+	adcq	%rsi, %rcx
+	adcq	%rdi, %r12
+	adcq	%r11, %r15
+	adcq	%rbp, %r13
+	movq	%r13, -120(%rsp)                ## 8-byte Spill
+	adcq	%r10, %rdx
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	setb	-112(%rsp)                      ## 1-byte Folded Spill
+	movq	8(%rsp), %rdx                   ## 8-byte Reload
+	imulq	%rax, %rdx
+	mulxq	-32(%rsp), %rax, %r13           ## 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	mulxq	-64(%rsp), %r10, %r11           ## 8-byte Folded Reload
+	mulxq	(%rsp), %rbx, %rdi              ## 8-byte Folded Reload
+	mulxq	-48(%rsp), %r8, %rsi            ## 8-byte Folded Reload
+	addq	%rbx, %rsi
+	mulxq	-40(%rsp), %rbx, %rax           ## 8-byte Folded Reload
+	adcq	%rdi, %rbx
+	mulxq	-56(%rsp), %rbp, %rdi           ## 8-byte Folded Reload
+	adcq	%rax, %rbp
+	adcq	%r10, %rdi
+	adcq	-104(%rsp), %r11                ## 8-byte Folded Reload
+	adcq	$0, %r13
+	addq	%r14, %r8
+	adcq	%r9, %rsi
+	adcq	%rcx, %rbx
+	adcq	%r12, %rbp
+	adcq	%r15, %rdi
+	adcq	-120(%rsp), %r11                ## 8-byte Folded Reload
+	adcq	-128(%rsp), %r13                ## 8-byte Folded Reload
+	movzbl	-112(%rsp), %r9d                ## 1-byte Folded Reload
+	adcq	$0, %r9
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	movq	24(%rcx), %rdx
+	mulxq	-88(%rsp), %rcx, %rax           ## 8-byte Folded Reload
+	movq	%rcx, -112(%rsp)                ## 8-byte Spill
+	movq	%rax, -128(%rsp)                ## 8-byte Spill
+	mulxq	-96(%rsp), %rax, %r14           ## 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
+	mulxq	-72(%rsp), %rax, %r15           ## 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	mulxq	-24(%rsp), %rcx, %r10           ## 8-byte Folded Reload
+	mulxq	-16(%rsp), %rax, %r8            ## 8-byte Folded Reload
+	addq	%rcx, %r8
+	mulxq	-8(%rsp), %rcx, %r12            ## 8-byte Folded Reload
+	adcq	%r10, %rcx
+	adcq	-104(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r15                ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r14                ## 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%rsi, %rax
+	movq	%rax, %r10
+	adcq	%rbx, %r8
+	adcq	%rbp, %rcx
+	adcq	%rdi, %r12
+	adcq	%r11, %r15
+	adcq	%r13, %r14
+	movq	%r14, -120(%rsp)                ## 8-byte Spill
+	adcq	%r9, %rdx
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	setb	-112(%rsp)                      ## 1-byte Folded Spill
+	movq	8(%rsp), %rdx                   ## 8-byte Reload
+	imulq	%rax, %rdx
+	mulxq	-32(%rsp), %rax, %r14           ## 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	mulxq	-64(%rsp), %r13, %r11           ## 8-byte Folded Reload
+	mulxq	(%rsp), %rbx, %rsi              ## 8-byte Folded Reload
+	mulxq	-48(%rsp), %rax, %rdi           ## 8-byte Folded Reload
+	addq	%rbx, %rdi
+	mulxq	-40(%rsp), %rbx, %r9            ## 8-byte Folded Reload
+	adcq	%rsi, %rbx
+	mulxq	-56(%rsp), %rbp, %rsi           ## 8-byte Folded Reload
+	adcq	%r9, %rbp
+	adcq	%r13, %rsi
+	adcq	-104(%rsp), %r11                ## 8-byte Folded Reload
 	adcq	$0, %r14
-	movq	%rax, %rdx
-	movq	%rax, %rbp
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	808(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	808(%rsp), %rbp
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	816(%rsp), %r13
-	movq	%rbx, %r12
-	adcq	824(%rsp), %r12
-	movq	8(%rsp), %rbx           ## 8-byte Reload
-	adcq	832(%rsp), %rbx
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	840(%rsp), %rbp
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	848(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	856(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	adcq	864(%rsp), %r15
-	adcq	872(%rsp), %r14
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	736(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	800(%rsp), %rax
-	movq	%r13, %rcx
-	addq	736(%rsp), %rcx
-	adcq	744(%rsp), %r12
-	movq	%r12, 24(%rsp)          ## 8-byte Spill
-	adcq	752(%rsp), %rbx
-	movq	%rbx, 8(%rsp)           ## 8-byte Spill
-	adcq	760(%rsp), %rbp
-	movq	%rbp, %r13
-	movq	(%rsp), %rbp            ## 8-byte Reload
-	adcq	768(%rsp), %rbp
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	adcq	776(%rsp), %rbx
-	adcq	784(%rsp), %r15
-	adcq	792(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rdx
-	movq	%rcx, %r12
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	664(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	664(%rsp), %r12
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	672(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	680(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	adcq	688(%rsp), %r13
-	adcq	696(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	adcq	704(%rsp), %rbx
-	adcq	712(%rsp), %r15
-	adcq	720(%rsp), %r14
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	728(%rsp), %r12
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	592(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	656(%rsp), %rcx
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	addq	592(%rsp), %rax
-	movq	8(%rsp), %rbp           ## 8-byte Reload
-	adcq	600(%rsp), %rbp
-	adcq	608(%rsp), %r13
-	movq	%r13, 24(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	616(%rsp), %r13
-	adcq	624(%rsp), %rbx
-	adcq	632(%rsp), %r15
-	adcq	640(%rsp), %r14
-	adcq	648(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	520(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	520(%rsp), %r12
-	adcq	528(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           ## 8-byte Spill
-	movq	24(%rsp), %r12          ## 8-byte Reload
-	adcq	536(%rsp), %r12
-	movq	%r13, %rbp
-	adcq	544(%rsp), %rbp
-	adcq	552(%rsp), %rbx
-	adcq	560(%rsp), %r15
-	adcq	568(%rsp), %r14
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	576(%rsp), %r13
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	584(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	448(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	512(%rsp), %rcx
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	addq	448(%rsp), %rax
-	adcq	456(%rsp), %r12
-	movq	%r12, 24(%rsp)          ## 8-byte Spill
-	adcq	464(%rsp), %rbp
-	adcq	472(%rsp), %rbx
-	adcq	480(%rsp), %r15
-	adcq	488(%rsp), %r14
-	adcq	496(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	504(%rsp), %r13
-	adcq	$0, %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	376(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	376(%rsp), %r12
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	384(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	392(%rsp), %rbp
-	adcq	400(%rsp), %rbx
-	adcq	408(%rsp), %r15
-	adcq	416(%rsp), %r14
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	424(%rsp), %r12
-	adcq	432(%rsp), %r13
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	440(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	304(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	368(%rsp), %rcx
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	addq	304(%rsp), %rax
-	adcq	312(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	adcq	320(%rsp), %rbx
-	adcq	328(%rsp), %r15
-	adcq	336(%rsp), %r14
-	adcq	344(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	adcq	352(%rsp), %r13
-	movq	8(%rsp), %rbp           ## 8-byte Reload
-	adcq	360(%rsp), %rbp
-	adcq	$0, %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	232(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	232(%rsp), %r12
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	240(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	adcq	248(%rsp), %rbx
-	adcq	256(%rsp), %r15
-	adcq	264(%rsp), %r14
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	272(%rsp), %r12
-	adcq	280(%rsp), %r13
-	adcq	288(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           ## 8-byte Spill
-	movq	32(%rsp), %rbp          ## 8-byte Reload
-	adcq	296(%rsp), %rbp
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	160(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	224(%rsp), %rcx
-	movq	(%rsp), %rax            ## 8-byte Reload
-	addq	160(%rsp), %rax
-	adcq	168(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	adcq	176(%rsp), %r15
-	adcq	184(%rsp), %r14
-	adcq	192(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	adcq	200(%rsp), %r13
-	movq	8(%rsp), %rbx           ## 8-byte Reload
-	adcq	208(%rsp), %rbx
-	adcq	216(%rsp), %rbp
-	movq	%rbp, %r12
-	adcq	$0, %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	64(%rsp), %rdx          ## 8-byte Reload
+	addq	%r10, %rax
+	adcq	%r8, %rdi
+	adcq	%rcx, %rbx
+	adcq	%r12, %rbp
+	adcq	%r15, %rsi
+	adcq	-120(%rsp), %r11                ## 8-byte Folded Reload
+	adcq	-128(%rsp), %r14                ## 8-byte Folded Reload
+	movzbl	-112(%rsp), %r9d                ## 1-byte Folded Reload
+	adcq	$0, %r9
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	movq	32(%rcx), %rdx
+	mulxq	-88(%rsp), %rcx, %rax           ## 8-byte Folded Reload
+	movq	%rcx, -112(%rsp)                ## 8-byte Spill
+	movq	%rax, -128(%rsp)                ## 8-byte Spill
+	mulxq	-96(%rsp), %rax, %r15           ## 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
+	mulxq	-72(%rsp), %rax, %r12           ## 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	mulxq	-24(%rsp), %rcx, %r10           ## 8-byte Folded Reload
+	mulxq	-16(%rsp), %rax, %r13           ## 8-byte Folded Reload
+	addq	%rcx, %r13
+	mulxq	-8(%rsp), %rcx, %r8             ## 8-byte Folded Reload
+	adcq	%r10, %rcx
+	adcq	-104(%rsp), %r8                 ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r15                ## 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%rdi, %rax
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
+	adcq	%rbx, %r13
+	adcq	%rbp, %rcx
+	adcq	%rsi, %r8
+	adcq	%r11, %r12
+	adcq	%r14, %r15
+	adcq	%r9, %rdx
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	setb	-112(%rsp)                      ## 1-byte Folded Spill
+	movq	8(%rsp), %rdx                   ## 8-byte Reload
 	imulq	%rax, %rdx
-	movq	%rax, %rbp
-	leaq	88(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	88(%rsp), %rbp
-	movq	32(%rsp), %r11          ## 8-byte Reload
-	adcq	96(%rsp), %r11
-	adcq	104(%rsp), %r15
-	adcq	112(%rsp), %r14
-	movq	16(%rsp), %rsi          ## 8-byte Reload
-	adcq	120(%rsp), %rsi
-	movq	%rsi, 16(%rsp)          ## 8-byte Spill
-	adcq	128(%rsp), %r13
-	adcq	136(%rsp), %rbx
-	movq	%rbx, 8(%rsp)           ## 8-byte Spill
-	adcq	144(%rsp), %r12
-	movq	(%rsp), %r8             ## 8-byte Reload
-	adcq	152(%rsp), %r8
-	movq	%r11, %rax
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	subq	(%rbp), %rax
-	movq	%r15, %rcx
-	sbbq	8(%rbp), %rcx
-	movq	%r14, %rdx
-	sbbq	16(%rbp), %rdx
-	sbbq	24(%rbp), %rsi
-	movq	%r13, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%rbx, %r9
-	sbbq	40(%rbp), %r9
-	movq	%r12, %r10
-	sbbq	48(%rbp), %r10
-	movq	%rbp, %rbx
-	movq	%r8, %rbp
-	sbbq	56(%rbx), %rbp
-	testq	%rbp, %rbp
-	cmovsq	%r11, %rax
-	movq	80(%rsp), %rbx          ## 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovsq	%r15, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovsq	%r14, %rdx
-	movq	%rdx, 16(%rbx)
-	cmovsq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovsq	%r13, %rdi
-	movq	%rdi, 32(%rbx)
-	cmovsq	8(%rsp), %r9            ## 8-byte Folded Reload
-	movq	%r9, 40(%rbx)
-	cmovsq	%r12, %r10
-	movq	%r10, 48(%rbx)
-	cmovsq	%r8, %rbp
-	movq	%rbp, 56(%rbx)
-	addq	$1240, %rsp             ## imm = 0x4D8
+	mulxq	-32(%rsp), %rax, %r14           ## 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	mulxq	-64(%rsp), %r9, %r10            ## 8-byte Folded Reload
+	mulxq	(%rsp), %rbx, %rsi              ## 8-byte Folded Reload
+	mulxq	-48(%rsp), %rax, %r11           ## 8-byte Folded Reload
+	addq	%rbx, %r11
+	mulxq	-40(%rsp), %rbx, %rdi           ## 8-byte Folded Reload
+	adcq	%rsi, %rbx
+	mulxq	-56(%rsp), %rbp, %rsi           ## 8-byte Folded Reload
+	adcq	%rdi, %rbp
+	adcq	%r9, %rsi
+	adcq	-104(%rsp), %r10                ## 8-byte Folded Reload
+	adcq	$0, %r14
+	addq	-120(%rsp), %rax                ## 8-byte Folded Reload
+	adcq	%r13, %r11
+	adcq	%rcx, %rbx
+	adcq	%r8, %rbp
+	adcq	%r12, %rsi
+	adcq	%r15, %r10
+	adcq	-128(%rsp), %r14                ## 8-byte Folded Reload
+	movq	%r14, -128(%rsp)                ## 8-byte Spill
+	movzbl	-112(%rsp), %edi                ## 1-byte Folded Reload
+	adcq	$0, %rdi
+	movq	-80(%rsp), %rax                 ## 8-byte Reload
+	movq	40(%rax), %rdx
+	mulxq	-88(%rsp), %rcx, %rax           ## 8-byte Folded Reload
+	movq	%rcx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	mulxq	-96(%rsp), %rax, %r8            ## 8-byte Folded Reload
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	mulxq	-72(%rsp), %rax, %r15           ## 8-byte Folded Reload
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	mulxq	-8(%rsp), %r14, %r12            ## 8-byte Folded Reload
+	mulxq	-24(%rsp), %rcx, %r13           ## 8-byte Folded Reload
+	mulxq	-16(%rsp), %r9, %rax            ## 8-byte Folded Reload
+	addq	%rcx, %rax
+	adcq	%r14, %r13
+	adcq	-72(%rsp), %r12                 ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r15                 ## 8-byte Folded Reload
+	adcq	-88(%rsp), %r8                  ## 8-byte Folded Reload
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	adcq	$0, %rcx
+	addq	%r11, %r9
+	adcq	%rbx, %rax
+	adcq	%rbp, %r13
+	adcq	%rsi, %r12
+	adcq	%r10, %r15
+	adcq	-128(%rsp), %r8                 ## 8-byte Folded Reload
+	movq	%r8, -96(%rsp)                  ## 8-byte Spill
+	adcq	%rdi, %rcx
+	movq	%rcx, -80(%rsp)                 ## 8-byte Spill
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	8(%rsp), %rdx                   ## 8-byte Reload
+	imulq	%r9, %rdx
+	mulxq	-48(%rsp), %r11, %rsi           ## 8-byte Folded Reload
+	movq	(%rsp), %r10                    ## 8-byte Reload
+	mulxq	%r10, %rcx, %rbx
+	addq	%rsi, %rcx
+	mulxq	-40(%rsp), %rdi, %rbp           ## 8-byte Folded Reload
+	adcq	%rbx, %rdi
+	mulxq	-56(%rsp), %rsi, %rbx           ## 8-byte Folded Reload
+	adcq	%rbp, %rsi
+	mulxq	-64(%rsp), %rbp, %r14           ## 8-byte Folded Reload
+	adcq	%rbx, %rbp
+	mulxq	-32(%rsp), %rdx, %rbx           ## 8-byte Folded Reload
+	adcq	%r14, %rdx
+	adcq	$0, %rbx
+	addq	%r9, %r11
+	adcq	%rax, %rcx
+	adcq	%r13, %rdi
+	adcq	%r12, %rsi
+	adcq	%r15, %rbp
+	adcq	-96(%rsp), %rdx                 ## 8-byte Folded Reload
+	adcq	-80(%rsp), %rbx                 ## 8-byte Folded Reload
+	movzbl	-88(%rsp), %r11d                ## 1-byte Folded Reload
+	adcq	$0, %r11
+	movq	%rcx, %r8
+	subq	-48(%rsp), %r8                  ## 8-byte Folded Reload
+	movq	%rdi, %r9
+	sbbq	%r10, %r9
+	movq	%rsi, %r10
+	sbbq	-40(%rsp), %r10                 ## 8-byte Folded Reload
+	movq	%rbp, %r14
+	sbbq	-56(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	sbbq	-64(%rsp), %r15                 ## 8-byte Folded Reload
+	movq	%rbx, %rax
+	sbbq	-32(%rsp), %rax                 ## 8-byte Folded Reload
+	sbbq	$0, %r11
+	testb	$1, %r11b
+	cmovneq	%rbx, %rax
+	movq	24(%rsp), %rbx                  ## 8-byte Reload
+	movq	%rax, 40(%rbx)
+	cmovneq	%rdx, %r15
+	movq	%r15, 32(%rbx)
+	cmovneq	%rbp, %r14
+	movq	%r14, 24(%rbx)
+	cmovneq	%rsi, %r10
+	movq	%r10, 16(%rbx)
+	cmovneq	%rdi, %r9
+	movq	%r9, 8(%rbx)
+	cmovneq	%rcx, %r8
+	movq	%r8, (%rbx)
+	addq	$32, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -9894,371 +3065,332 @@ _mcl_fp_montNF8Lbmi2:                   ## @mcl_fp_montNF8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montRed8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montNF6Lbmi2            ## -- Begin function mcl_fp_montNF6Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_montRed8Lbmi2:                  ## @mcl_fp_montRed8Lbmi2
-## BB#0:
+_mcl_fp_montNF6Lbmi2:                   ## @mcl_fp_montNF6Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$776, %rsp              ## imm = 0x308
-	movq	%rdx, %rax
-	movq	%rdi, 192(%rsp)         ## 8-byte Spill
-	movq	-8(%rax), %rcx
-	movq	%rcx, 104(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %r15
-	movq	8(%rsi), %rdx
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	%r15, %rdx
-	imulq	%rcx, %rdx
-	movq	120(%rsi), %rcx
-	movq	%rcx, 112(%rsp)         ## 8-byte Spill
-	movq	112(%rsi), %rcx
-	movq	%rcx, 56(%rsp)          ## 8-byte Spill
-	movq	104(%rsi), %rcx
-	movq	%rcx, 96(%rsp)          ## 8-byte Spill
-	movq	96(%rsi), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	88(%rsi), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	movq	80(%rsi), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	72(%rsi), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	64(%rsi), %r13
-	movq	56(%rsi), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	movq	48(%rsi), %r14
-	movq	40(%rsi), %rcx
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	movq	32(%rsi), %r12
-	movq	24(%rsi), %rbx
-	movq	16(%rsi), %rbp
-	movq	%rax, %rcx
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	(%rsi), %rax
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	8(%rsi), %rdi
+	movq	%rdi, -128(%rsp)                ## 8-byte Spill
+	movq	(%rdx), %rbp
+	movq	%rdi, %rdx
+	mulxq	%rbp, %rdi, %rbx
+	movq	%rax, %rdx
+	mulxq	%rbp, %r9, %r14
+	movq	16(%rsi), %rdx
+	movq	%rdx, -64(%rsp)                 ## 8-byte Spill
+	addq	%rdi, %r14
+	mulxq	%rbp, %rdi, %r8
+	adcq	%rbx, %rdi
+	movq	24(%rsi), %rdx
+	movq	%rdx, -72(%rsp)                 ## 8-byte Spill
+	mulxq	%rbp, %rbx, %r10
+	adcq	%r8, %rbx
+	movq	32(%rsi), %rdx
+	movq	%rdx, -80(%rsp)                 ## 8-byte Spill
+	mulxq	%rbp, %r8, %r11
+	adcq	%r10, %r8
+	movq	40(%rsi), %rdx
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	mulxq	%rbp, %rsi, %r15
+	adcq	%r11, %rsi
+	adcq	$0, %r15
+	movq	-8(%rcx), %rdx
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
+	imulq	%r9, %rdx
 	movq	(%rcx), %rax
-	movq	%rax, 136(%rsp)         ## 8-byte Spill
-	movq	56(%rcx), %rax
-	movq	%rax, 184(%rsp)         ## 8-byte Spill
-	movq	48(%rcx), %rax
-	movq	%rax, 176(%rsp)         ## 8-byte Spill
-	movq	40(%rcx), %rax
-	movq	%rax, 168(%rsp)         ## 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, 160(%rsp)         ## 8-byte Spill
-	movq	24(%rcx), %rax
-	movq	%rax, 152(%rsp)         ## 8-byte Spill
-	movq	16(%rcx), %rax
-	movq	%rax, 144(%rsp)         ## 8-byte Spill
+	movq	%rax, -16(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rbp, %rax
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	addq	%r9, %rbp
 	movq	8(%rcx), %rax
-	movq	%rax, 128(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rsi
-	movq	%rsi, 88(%rsp)          ## 8-byte Spill
-	leaq	704(%rsp), %rdi
-	callq	l_mulPv512x64
-	addq	704(%rsp), %r15
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	712(%rsp), %rcx
-	adcq	720(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          ## 8-byte Spill
-	adcq	728(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	adcq	736(%rsp), %r12
-	movq	%r12, 120(%rsp)         ## 8-byte Spill
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	adcq	744(%rsp), %rax
-	movq	%rax, 72(%rsp)          ## 8-byte Spill
-	adcq	752(%rsp), %r14
-	movq	%r14, %r12
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	adcq	760(%rsp), %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, 16(%rsp)            ## 8-byte Folded Spill
-	movq	40(%rsp), %r15          ## 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 24(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 48(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 96(%rsp)            ## 8-byte Folded Spill
-	movq	56(%rsp), %r13          ## 8-byte Reload
-	adcq	$0, %r13
-	movq	112(%rsp), %r14         ## 8-byte Reload
-	adcq	$0, %r14
-	sbbq	%rbx, %rbx
-	movq	%rcx, %rbp
-	movq	%rbp, %rdx
-	imulq	104(%rsp), %rdx         ## 8-byte Folded Reload
-	leaq	632(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	632(%rsp), %rbp
-	movq	80(%rsp), %rsi          ## 8-byte Reload
-	adcq	640(%rsp), %rsi
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	648(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	120(%rsp), %rcx         ## 8-byte Reload
-	adcq	656(%rsp), %rcx
-	movq	%rcx, 120(%rsp)         ## 8-byte Spill
-	movq	72(%rsp), %rcx          ## 8-byte Reload
-	adcq	664(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	adcq	672(%rsp), %r12
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	adcq	680(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	688(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	696(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	$0, %rbx
-	movq	48(%rsp), %r15          ## 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 96(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %r13
-	movq	%r13, 56(%rsp)          ## 8-byte Spill
-	adcq	$0, %r14
-	movq	%r14, 112(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbp
-	adcq	$0, %rbp
-	movq	%rsi, %rdx
-	movq	%rsi, %r14
-	imulq	104(%rsp), %rdx         ## 8-byte Folded Reload
-	leaq	560(%rsp), %rdi
-	movq	88(%rsp), %r13          ## 8-byte Reload
-	movq	%r13, %rsi
-	callq	l_mulPv512x64
-	addq	560(%rsp), %r14
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	568(%rsp), %rcx
-	movq	120(%rsp), %rax         ## 8-byte Reload
-	adcq	576(%rsp), %rax
-	movq	%rax, 120(%rsp)         ## 8-byte Spill
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	adcq	584(%rsp), %rax
-	movq	%rax, 72(%rsp)          ## 8-byte Spill
-	adcq	592(%rsp), %r12
-	movq	%r12, 32(%rsp)          ## 8-byte Spill
-	movq	64(%rsp), %r14          ## 8-byte Reload
-	adcq	600(%rsp), %r14
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	608(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	adcq	616(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	624(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	$0, %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, 48(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rbx          ## 8-byte Reload
-	adcq	$0, %rbx
-	movq	56(%rsp), %r15          ## 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 112(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %rbp
-	movq	%rbp, 80(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rbp
-	movq	%rbp, %rdx
-	movq	104(%rsp), %r12         ## 8-byte Reload
-	imulq	%r12, %rdx
-	leaq	488(%rsp), %rdi
-	movq	%r13, %rsi
-	callq	l_mulPv512x64
-	addq	488(%rsp), %rbp
-	movq	120(%rsp), %rax         ## 8-byte Reload
-	adcq	496(%rsp), %rax
-	movq	72(%rsp), %rbp          ## 8-byte Reload
-	adcq	504(%rsp), %rbp
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	512(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	adcq	520(%rsp), %r14
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	528(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	536(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %r13          ## 8-byte Reload
-	adcq	544(%rsp), %r13
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	552(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, 48(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 96(%rsp)          ## 8-byte Spill
-	movq	%r15, %rbx
-	adcq	$0, %rbx
-	adcq	$0, 112(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	%r12, %rdx
-	leaq	416(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	416(%rsp), %r15
-	adcq	424(%rsp), %rbp
-	movq	%rbp, %rax
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	%r14, %r12
-	adcq	440(%rsp), %r12
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	448(%rsp), %r14
-	movq	16(%rsp), %rbp          ## 8-byte Reload
-	adcq	456(%rsp), %rbp
-	adcq	464(%rsp), %r13
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	472(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rcx          ## 8-byte Reload
-	adcq	480(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	adcq	$0, 96(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 56(%rsp)          ## 8-byte Spill
-	movq	112(%rsp), %r15         ## 8-byte Reload
+	movq	%rax, -24(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r13, %r9
+	adcq	%r14, %r13
+	movq	16(%rcx), %rax
+	movq	%rax, -32(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r12, %rax
+	adcq	%rdi, %r12
+	movq	24(%rcx), %rdi
+	movq	%rdi, -40(%rsp)                 ## 8-byte Spill
+	mulxq	%rdi, %r14, %rdi
+	adcq	%rbx, %r14
+	movq	32(%rcx), %rbp
+	movq	%rbp, -48(%rsp)                 ## 8-byte Spill
+	mulxq	%rbp, %r11, %rbx
+	adcq	%r8, %r11
+	movq	40(%rcx), %rcx
+	movq	%rcx, -56(%rsp)                 ## 8-byte Spill
+	mulxq	%rcx, %r10, %rcx
+	adcq	%rsi, %r10
 	adcq	$0, %r15
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	104(%rsp), %rdx         ## 8-byte Folded Reload
-	leaq	344(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	344(%rsp), %rbx
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	352(%rsp), %rax
-	adcq	360(%rsp), %r12
-	movq	%r12, 64(%rsp)          ## 8-byte Spill
-	adcq	368(%rsp), %r14
-	movq	%r14, 8(%rsp)           ## 8-byte Spill
-	adcq	376(%rsp), %rbp
-	movq	%rbp, 16(%rsp)          ## 8-byte Spill
-	adcq	384(%rsp), %r13
-	movq	%r13, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %r13          ## 8-byte Reload
-	adcq	392(%rsp), %r13
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	400(%rsp), %r12
-	movq	96(%rsp), %r14          ## 8-byte Reload
-	adcq	408(%rsp), %r14
-	movq	56(%rsp), %rbp          ## 8-byte Reload
-	adcq	$0, %rbp
-	movq	%r15, %rbx
-	adcq	$0, %rbx
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
+	addq	-96(%rsp), %r13                 ## 8-byte Folded Reload
+	adcq	%r9, %r12
+	adcq	%rax, %r14
+	adcq	%rdi, %r11
+	adcq	%rbx, %r10
+	adcq	%rcx, %r15
+	movq	-120(%rsp), %rax                ## 8-byte Reload
+	movq	8(%rax), %rdx
+	mulxq	-128(%rsp), %rcx, %rsi          ## 8-byte Folded Reload
+	mulxq	-112(%rsp), %rbx, %rax          ## 8-byte Folded Reload
+	addq	%rcx, %rax
+	mulxq	-64(%rsp), %rcx, %rdi           ## 8-byte Folded Reload
+	adcq	%rsi, %rcx
+	mulxq	-72(%rsp), %rsi, %r8            ## 8-byte Folded Reload
+	adcq	%rdi, %rsi
+	mulxq	-80(%rsp), %rdi, %rbp           ## 8-byte Folded Reload
+	movq	%rbp, -96(%rsp)                 ## 8-byte Spill
+	adcq	%r8, %rdi
+	mulxq	-88(%rsp), %r8, %r9             ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r8                  ## 8-byte Folded Reload
+	adcq	$0, %r9
+	addq	%r13, %rbx
+	adcq	%r12, %rax
+	adcq	%r14, %rcx
+	adcq	%r11, %rsi
+	adcq	%r10, %rdi
+	adcq	%r15, %r8
+	adcq	$0, %r9
+	movq	-104(%rsp), %rdx                ## 8-byte Reload
+	imulq	%rbx, %rdx
+	mulxq	-16(%rsp), %rbp, %r13           ## 8-byte Folded Reload
+	addq	%rbx, %rbp
+	mulxq	-24(%rsp), %r11, %rbx           ## 8-byte Folded Reload
+	adcq	%rax, %r11
+	mulxq	-32(%rsp), %r14, %rax           ## 8-byte Folded Reload
+	adcq	%rcx, %r14
+	mulxq	-40(%rsp), %r10, %rcx           ## 8-byte Folded Reload
+	adcq	%rsi, %r10
+	mulxq	-48(%rsp), %r15, %rsi           ## 8-byte Folded Reload
+	adcq	%rdi, %r15
+	mulxq	-56(%rsp), %r12, %rdx           ## 8-byte Folded Reload
+	adcq	%r8, %r12
+	adcq	$0, %r9
+	addq	%r13, %r11
+	adcq	%rbx, %r14
+	adcq	%rax, %r10
+	adcq	%rcx, %r15
+	adcq	%rsi, %r12
+	adcq	%rdx, %r9
+	movq	-120(%rsp), %rax                ## 8-byte Reload
+	movq	16(%rax), %rdx
+	mulxq	-128(%rsp), %rcx, %rax          ## 8-byte Folded Reload
+	mulxq	-112(%rsp), %r13, %rdi          ## 8-byte Folded Reload
+	addq	%rcx, %rdi
+	mulxq	-64(%rsp), %rbx, %rcx           ## 8-byte Folded Reload
+	adcq	%rax, %rbx
+	mulxq	-72(%rsp), %rsi, %rbp           ## 8-byte Folded Reload
+	adcq	%rcx, %rsi
+	mulxq	-80(%rsp), %rax, %rcx           ## 8-byte Folded Reload
+	movq	%rcx, -96(%rsp)                 ## 8-byte Spill
+	adcq	%rbp, %rax
+	mulxq	-88(%rsp), %r8, %rcx            ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r8                  ## 8-byte Folded Reload
+	adcq	$0, %rcx
+	addq	%r11, %r13
+	adcq	%r14, %rdi
+	adcq	%r10, %rbx
+	adcq	%r15, %rsi
+	adcq	%r12, %rax
+	adcq	%r9, %r8
+	adcq	$0, %rcx
+	movq	-104(%rsp), %rdx                ## 8-byte Reload
+	imulq	%r13, %rdx
+	mulxq	-16(%rsp), %rbp, %r12           ## 8-byte Folded Reload
+	addq	%r13, %rbp
+	mulxq	-24(%rsp), %r11, %rbp           ## 8-byte Folded Reload
+	adcq	%rdi, %r11
+	mulxq	-32(%rsp), %r9, %rdi            ## 8-byte Folded Reload
+	adcq	%rbx, %r9
+	mulxq	-40(%rsp), %r10, %rbx           ## 8-byte Folded Reload
+	adcq	%rsi, %r10
+	mulxq	-48(%rsp), %r14, %rsi           ## 8-byte Folded Reload
+	adcq	%rax, %r14
+	mulxq	-56(%rsp), %r15, %rax           ## 8-byte Folded Reload
+	adcq	%r8, %r15
+	adcq	$0, %rcx
+	addq	%r12, %r11
+	adcq	%rbp, %r9
+	adcq	%rdi, %r10
+	adcq	%rbx, %r14
+	adcq	%rsi, %r15
+	adcq	%rax, %rcx
+	movq	-120(%rsp), %rax                ## 8-byte Reload
+	movq	24(%rax), %rdx
+	mulxq	-128(%rsp), %rsi, %rax          ## 8-byte Folded Reload
+	mulxq	-112(%rsp), %r13, %rbx          ## 8-byte Folded Reload
+	addq	%rsi, %rbx
+	mulxq	-64(%rsp), %rdi, %rbp           ## 8-byte Folded Reload
+	adcq	%rax, %rdi
+	mulxq	-72(%rsp), %rsi, %r8            ## 8-byte Folded Reload
+	adcq	%rbp, %rsi
+	mulxq	-80(%rsp), %rax, %rbp           ## 8-byte Folded Reload
+	adcq	%r8, %rax
+	mulxq	-88(%rsp), %r8, %r12            ## 8-byte Folded Reload
+	adcq	%rbp, %r8
+	adcq	$0, %r12
+	addq	%r11, %r13
+	adcq	%r9, %rbx
+	adcq	%r10, %rdi
+	adcq	%r14, %rsi
+	adcq	%r15, %rax
+	adcq	%rcx, %r8
+	adcq	$0, %r12
+	movq	-104(%rsp), %rdx                ## 8-byte Reload
+	imulq	%r13, %rdx
+	mulxq	-16(%rsp), %rbp, %rcx           ## 8-byte Folded Reload
+	addq	%r13, %rbp
+	mulxq	-24(%rsp), %r11, %rbp           ## 8-byte Folded Reload
+	adcq	%rbx, %r11
+	mulxq	-32(%rsp), %r9, %rbx            ## 8-byte Folded Reload
+	adcq	%rdi, %r9
+	mulxq	-40(%rsp), %r10, %rdi           ## 8-byte Folded Reload
+	adcq	%rsi, %r10
+	mulxq	-48(%rsp), %r14, %rsi           ## 8-byte Folded Reload
+	adcq	%rax, %r14
+	mulxq	-56(%rsp), %r15, %rax           ## 8-byte Folded Reload
+	adcq	%r8, %r15
+	adcq	$0, %r12
+	addq	%rcx, %r11
+	adcq	%rbp, %r9
+	adcq	%rbx, %r10
+	adcq	%rdi, %r14
+	adcq	%rsi, %r15
+	adcq	%rax, %r12
+	movq	-120(%rsp), %rax                ## 8-byte Reload
+	movq	32(%rax), %rdx
+	mulxq	-128(%rsp), %rsi, %rcx          ## 8-byte Folded Reload
+	mulxq	-112(%rsp), %r13, %rax          ## 8-byte Folded Reload
+	addq	%rsi, %rax
+	mulxq	-64(%rsp), %rbx, %rsi           ## 8-byte Folded Reload
+	adcq	%rcx, %rbx
+	mulxq	-72(%rsp), %rdi, %rcx           ## 8-byte Folded Reload
+	adcq	%rsi, %rdi
+	mulxq	-80(%rsp), %rsi, %rbp           ## 8-byte Folded Reload
+	adcq	%rcx, %rsi
+	mulxq	-88(%rsp), %r8, %rcx            ## 8-byte Folded Reload
+	adcq	%rbp, %r8
+	adcq	$0, %rcx
+	addq	%r11, %r13
+	adcq	%r9, %rax
+	adcq	%r10, %rbx
+	adcq	%r14, %rdi
+	adcq	%r15, %rsi
+	adcq	%r12, %r8
+	adcq	$0, %rcx
+	movq	-104(%rsp), %rdx                ## 8-byte Reload
+	imulq	%r13, %rdx
+	mulxq	-16(%rsp), %rbp, %r15           ## 8-byte Folded Reload
+	addq	%r13, %rbp
+	mulxq	-24(%rsp), %r11, %rbp           ## 8-byte Folded Reload
+	adcq	%rax, %r11
+	mulxq	-32(%rsp), %r9, %rax            ## 8-byte Folded Reload
+	adcq	%rbx, %r9
+	mulxq	-40(%rsp), %r10, %rbx           ## 8-byte Folded Reload
+	adcq	%rdi, %r10
+	mulxq	-48(%rsp), %r14, %rdi           ## 8-byte Folded Reload
+	adcq	%rsi, %r14
+	mulxq	-56(%rsp), %rsi, %rdx           ## 8-byte Folded Reload
+	adcq	%r8, %rsi
+	adcq	$0, %rcx
+	addq	%r15, %r11
+	adcq	%rbp, %r9
+	adcq	%rax, %r10
+	adcq	%rbx, %r14
+	adcq	%rdi, %rsi
+	adcq	%rdx, %rcx
+	movq	-120(%rsp), %rax                ## 8-byte Reload
+	movq	40(%rax), %rdx
+	mulxq	-128(%rsp), %rdi, %rax          ## 8-byte Folded Reload
+	mulxq	-112(%rsp), %r13, %rbx          ## 8-byte Folded Reload
+	addq	%rdi, %rbx
+	mulxq	-64(%rsp), %rdi, %rbp           ## 8-byte Folded Reload
+	adcq	%rax, %rdi
+	mulxq	-72(%rsp), %r8, %rax            ## 8-byte Folded Reload
+	adcq	%rbp, %r8
+	mulxq	-80(%rsp), %r15, %rbp           ## 8-byte Folded Reload
+	adcq	%rax, %r15
+	mulxq	-88(%rsp), %r12, %rax           ## 8-byte Folded Reload
+	adcq	%rbp, %r12
+	adcq	$0, %rax
+	addq	%r11, %r13
+	adcq	%r9, %rbx
+	adcq	%r10, %rdi
+	adcq	%r14, %r8
+	adcq	%rsi, %r15
+	adcq	%rcx, %r12
+	adcq	$0, %rax
+	movq	-104(%rsp), %rdx                ## 8-byte Reload
+	imulq	%r13, %rdx
+	movq	-16(%rsp), %r9                  ## 8-byte Reload
+	mulxq	%r9, %rcx, %rsi
+	movq	%rsi, -104(%rsp)                ## 8-byte Spill
+	addq	%r13, %rcx
+	movq	-24(%rsp), %r10                 ## 8-byte Reload
+	mulxq	%r10, %r13, %rcx
+	movq	%rcx, -112(%rsp)                ## 8-byte Spill
+	adcq	%rbx, %r13
+	movq	-32(%rsp), %r11                 ## 8-byte Reload
+	mulxq	%r11, %rbp, %rcx
+	movq	%rcx, -120(%rsp)                ## 8-byte Spill
+	adcq	%rdi, %rbp
+	movq	%rdx, %rcx
+	movq	-40(%rsp), %rsi                 ## 8-byte Reload
+	mulxq	%rsi, %rdi, %rdx
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	adcq	%r8, %rdi
+	movq	%rcx, %rdx
+	movq	-48(%rsp), %r14                 ## 8-byte Reload
+	mulxq	%r14, %rbx, %r8
+	adcq	%r15, %rbx
+	movq	-56(%rsp), %rcx                 ## 8-byte Reload
+	mulxq	%rcx, %r15, %rdx
+	adcq	%r12, %r15
+	adcq	$0, %rax
+	addq	-104(%rsp), %r13                ## 8-byte Folded Reload
+	adcq	-112(%rsp), %rbp                ## 8-byte Folded Reload
+	adcq	-120(%rsp), %rdi                ## 8-byte Folded Reload
+	adcq	-128(%rsp), %rbx                ## 8-byte Folded Reload
+	adcq	%r8, %r15
+	adcq	%rdx, %rax
+	movq	%r13, %r8
+	subq	%r9, %r8
+	movq	%rbp, %r9
+	sbbq	%r10, %r9
+	movq	%rdi, %r10
+	sbbq	%r11, %r10
+	movq	%rbx, %r11
+	sbbq	%rsi, %r11
+	movq	%r15, %rsi
+	sbbq	%r14, %rsi
 	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	104(%rsp), %rdx         ## 8-byte Folded Reload
-	leaq	272(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	272(%rsp), %r15
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	adcq	280(%rsp), %rcx
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	288(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	adcq	296(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	304(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	312(%rsp), %r13
-	movq	%r13, 24(%rsp)          ## 8-byte Spill
-	adcq	320(%rsp), %r12
-	movq	%r12, 48(%rsp)          ## 8-byte Spill
-	adcq	328(%rsp), %r14
-	movq	%r14, %r13
-	adcq	336(%rsp), %rbp
-	movq	%rbp, %r12
-	adcq	$0, %rbx
-	movq	%rbx, %r14
-	movq	80(%rsp), %r15          ## 8-byte Reload
-	adcq	$0, %r15
-	movq	104(%rsp), %rdx         ## 8-byte Reload
-	movq	%rcx, %rbx
-	imulq	%rbx, %rdx
-	leaq	200(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	200(%rsp), %rbx
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	208(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %r8           ## 8-byte Reload
-	adcq	216(%rsp), %r8
-	movq	%r8, 16(%rsp)           ## 8-byte Spill
-	movq	40(%rsp), %rdx          ## 8-byte Reload
-	adcq	224(%rsp), %rdx
-	movq	24(%rsp), %rsi          ## 8-byte Reload
-	adcq	232(%rsp), %rsi
-	movq	48(%rsp), %rdi          ## 8-byte Reload
-	adcq	240(%rsp), %rdi
-	movq	%r13, %rbp
-	adcq	248(%rsp), %rbp
-	movq	%r12, %rbx
-	adcq	256(%rsp), %rbx
-	movq	%rbx, 56(%rsp)          ## 8-byte Spill
-	movq	%r14, %r9
-	adcq	264(%rsp), %r9
-	adcq	$0, %r15
-	movq	%r15, %r10
-	subq	136(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%r8, %rcx
-	sbbq	128(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	sbbq	144(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%rsi, %r12
-	sbbq	152(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%rdi, %r14
-	sbbq	160(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%rbp, %r11
-	sbbq	168(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%rbx, %r8
-	sbbq	176(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r9, %r15
-	sbbq	184(%rsp), %r9          ## 8-byte Folded Reload
-	sbbq	$0, %r10
-	andl	$1, %r10d
-	cmovneq	%r15, %r9
-	testb	%r10b, %r10b
-	cmovneq	8(%rsp), %rax           ## 8-byte Folded Reload
-	movq	192(%rsp), %rbx         ## 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovneq	16(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 8(%rbx)
-	cmovneq	%rdx, %r13
-	movq	%r13, 16(%rbx)
-	cmovneq	%rsi, %r12
-	movq	%r12, 24(%rbx)
-	cmovneq	%rdi, %r14
-	movq	%r14, 32(%rbx)
-	cmovneq	%rbp, %r11
-	movq	%r11, 40(%rbx)
-	cmovneq	56(%rsp), %r8           ## 8-byte Folded Reload
-	movq	%r8, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	addq	$776, %rsp              ## imm = 0x308
+	sbbq	%rcx, %rdx
+	movq	%rdx, %rcx
+	sarq	$63, %rcx
+	cmovsq	%rax, %rdx
+	movq	-8(%rsp), %rax                  ## 8-byte Reload
+	movq	%rdx, 40(%rax)
+	cmovsq	%r15, %rsi
+	movq	%rsi, 32(%rax)
+	cmovsq	%rbx, %r11
+	movq	%r11, 24(%rax)
+	cmovsq	%rdi, %r10
+	movq	%r10, 16(%rax)
+	cmovsq	%rbp, %r9
+	movq	%r9, 8(%rax)
+	cmovsq	%r13, %r8
+	movq	%r8, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -10266,265 +3398,423 @@ _mcl_fp_montRed8Lbmi2:                  ## @mcl_fp_montRed8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_addPre8Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_addPre8Lbmi2:                   ## @mcl_fp_addPre8Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	56(%rsi), %r15
-	movq	48(%rdx), %r9
-	movq	48(%rsi), %r12
-	movq	40(%rdx), %r10
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %r14
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rsi
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r14, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r10, %r13
-	movq	%r13, 40(%rdi)
-	adcq	%r9, %r12
-	movq	%r12, 48(%rdi)
-	adcq	%r8, %r15
-	movq	%r15, 56(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_subPre8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montRed6Lbmi2           ## -- Begin function mcl_fp_montRed6Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_subPre8Lbmi2:                   ## @mcl_fp_subPre8Lbmi2
-## BB#0:
+_mcl_fp_montRed6Lbmi2:                  ## @mcl_fp_montRed6Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	56(%rsi), %r15
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %r12
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %r12
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	48(%rsi), %r13
-	movq	40(%rsi), %rdx
-	movq	32(%rsi), %rbp
-	movq	24(%rsi), %rsi
-	movq	%rbx, (%rdi)
-	movq	%r12, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r11, %rsi
-	movq	%rsi, 24(%rdi)
-	sbbq	%r14, %rbp
-	movq	%rbp, 32(%rdi)
-	sbbq	%r10, %rdx
-	movq	%rdx, 40(%rdi)
-	sbbq	%r9, %r13
-	movq	%r13, 48(%rdi)
-	sbbq	%r8, %r15
-	movq	%r15, 56(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_shr1_8Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_shr1_8Lbmi2:                    ## @mcl_fp_shr1_8Lbmi2
-## BB#0:
-	movq	56(%rsi), %r8
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %r10
-	movq	32(%rsi), %r11
-	movq	24(%rsi), %rcx
-	movq	16(%rsi), %rdx
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rax
-	movq	%rax, (%rdi)
-	shrdq	$1, %rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 16(%rdi)
-	shrdq	$1, %r11, %rcx
-	movq	%rcx, 24(%rdi)
-	shrdq	$1, %r10, %r11
-	movq	%r11, 32(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 40(%rdi)
-	shrdq	$1, %r8, %r9
-	movq	%r9, 48(%rdi)
-	shrq	%r8
-	movq	%r8, 56(%rdi)
-	retq
-
-	.globl	_mcl_fp_add8Lbmi2
-	.p2align	4, 0x90
-_mcl_fp_add8Lbmi2:                      ## @mcl_fp_add8Lbmi2
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r15
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r12
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %r11
-	movq	32(%rsi), %r10
-	movq	(%rdx), %r14
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %r14
-	adcq	8(%rsi), %rbx
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
-	adcq	24(%rdx), %r11
-	movq	40(%rdx), %rsi
-	adcq	32(%rdx), %r10
-	movq	%r14, (%rdi)
-	movq	%rbx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r10, 32(%rdi)
-	adcq	%r13, %rsi
-	movq	%rsi, 40(%rdi)
-	adcq	%r12, %r9
-	movq	%r9, 48(%rdi)
-	adcq	%r15, %r8
-	movq	%r8, 56(%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %r14
-	sbbq	8(%rcx), %rbx
-	sbbq	16(%rcx), %rax
-	sbbq	24(%rcx), %r11
-	sbbq	32(%rcx), %r10
-	sbbq	40(%rcx), %rsi
-	sbbq	48(%rcx), %r9
-	sbbq	56(%rcx), %r8
-	sbbq	$0, %rdx
-	testb	$1, %dl
-	jne	LBB120_2
-## BB#1:                                ## %nocarry
-	movq	%r14, (%rdi)
-	movq	%rbx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r10, 32(%rdi)
-	movq	%rsi, 40(%rdi)
-	movq	%r9, 48(%rdi)
-	movq	%r8, 56(%rdi)
-LBB120_2:                               ## %carry
+	movq	%rdx, %rcx
+	movq	%rsi, %r11
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	-8(%rdx), %rax
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	(%rsi), %rdi
+	movq	%rdi, %rdx
+	imulq	%rax, %rdx
+	movq	40(%rcx), %rsi
+	movq	%rsi, -56(%rsp)                 ## 8-byte Spill
+	mulxq	%rsi, %rax, %r12
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
+	movq	32(%rcx), %rsi
+	movq	%rsi, -64(%rsp)                 ## 8-byte Spill
+	mulxq	%rsi, %rax, %r13
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	movq	24(%rcx), %rsi
+	mulxq	%rsi, %r8, %r15
+	movq	%rsi, %r14
+	movq	%rsi, -16(%rsp)                 ## 8-byte Spill
+	movq	16(%rcx), %rsi
+	movq	%rsi, -72(%rsp)                 ## 8-byte Spill
+	mulxq	%rsi, %rbp, %r9
+	movq	(%rcx), %rax
+	movq	8(%rcx), %r10
+	mulxq	%r10, %rcx, %rsi
+	movq	%r10, -32(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rdx, %rbx
+	movq	%rax, -40(%rsp)                 ## 8-byte Spill
+	addq	%rcx, %rbx
+	adcq	%rbp, %rsi
+	adcq	%r8, %r9
+	adcq	-48(%rsp), %r15                 ## 8-byte Folded Reload
+	adcq	-88(%rsp), %r13                 ## 8-byte Folded Reload
+	adcq	$0, %r12
+	addq	%rdi, %rdx
+	movq	%r11, -24(%rsp)                 ## 8-byte Spill
+	adcq	8(%r11), %rbx
+	adcq	16(%r11), %rsi
+	adcq	24(%r11), %r9
+	adcq	32(%r11), %r15
+	adcq	40(%r11), %r13
+	adcq	48(%r11), %r12
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-80(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%rbx, %rdx
+	mulxq	%r14, %rcx, %rdi
+	movq	%rdi, -48(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r14, %rdi
+	mulxq	%r10, %rbp, %rax
+	addq	%rdi, %rbp
+	mulxq	-72(%rsp), %r8, %r10            ## 8-byte Folded Reload
+	adcq	%rax, %r8
+	adcq	%rcx, %r10
+	mulxq	-64(%rsp), %rdi, %r11           ## 8-byte Folded Reload
+	adcq	-48(%rsp), %rdi                 ## 8-byte Folded Reload
+	mulxq	-56(%rsp), %rax, %rcx           ## 8-byte Folded Reload
+	adcq	%r11, %rax
+	movzbl	-88(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %rcx
+	addq	%rbx, %r14
+	adcq	%rsi, %rbp
+	adcq	%r9, %r8
+	adcq	%r15, %r10
+	adcq	%r13, %rdi
+	adcq	%r12, %rax
+	movq	-24(%rsp), %rdx                 ## 8-byte Reload
+	adcq	56(%rdx), %rcx
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-80(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%rbp, %rdx
+	mulxq	-16(%rsp), %r11, %rsi           ## 8-byte Folded Reload
+	movq	%rsi, -48(%rsp)                 ## 8-byte Spill
+	mulxq	-40(%rsp), %r15, %rbx           ## 8-byte Folded Reload
+	mulxq	-32(%rsp), %rsi, %r13           ## 8-byte Folded Reload
+	addq	%rbx, %rsi
+	mulxq	-72(%rsp), %r9, %r12            ## 8-byte Folded Reload
+	adcq	%r13, %r9
+	adcq	%r11, %r12
+	mulxq	-64(%rsp), %r11, %r14           ## 8-byte Folded Reload
+	adcq	-48(%rsp), %r11                 ## 8-byte Folded Reload
+	mulxq	-56(%rsp), %rbx, %r13           ## 8-byte Folded Reload
+	adcq	%r14, %rbx
+	movzbl	-88(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %r13
+	addq	%rbp, %r15
+	adcq	%r8, %rsi
+	adcq	%r10, %r9
+	adcq	%rdi, %r12
+	adcq	%rax, %r11
+	adcq	%rcx, %rbx
+	movq	-24(%rsp), %rax                 ## 8-byte Reload
+	adcq	64(%rax), %r13
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-80(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%rsi, %rdx
+	mulxq	-16(%rsp), %rbp, %r8            ## 8-byte Folded Reload
+	mulxq	-40(%rsp), %r15, %rdi           ## 8-byte Folded Reload
+	mulxq	-32(%rsp), %rax, %rcx           ## 8-byte Folded Reload
+	addq	%rdi, %rax
+	mulxq	-72(%rsp), %r10, %r14           ## 8-byte Folded Reload
+	adcq	%rcx, %r10
+	adcq	%rbp, %r14
+	mulxq	-64(%rsp), %rbp, %rdi           ## 8-byte Folded Reload
+	adcq	%r8, %rbp
+	mulxq	-56(%rsp), %rcx, %r8            ## 8-byte Folded Reload
+	adcq	%rdi, %rcx
+	movzbl	-88(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %r8
+	addq	%rsi, %r15
+	adcq	%r9, %rax
+	adcq	%r12, %r10
+	adcq	%r11, %r14
+	adcq	%rbx, %rbp
+	adcq	%r13, %rcx
+	movq	-24(%rsp), %rdx                 ## 8-byte Reload
+	adcq	72(%rdx), %r8
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-80(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%rax, %rdx
+	mulxq	-16(%rsp), %r15, %r13           ## 8-byte Folded Reload
+	mulxq	-40(%rsp), %rbx, %rdi           ## 8-byte Folded Reload
+	mulxq	-32(%rsp), %rsi, %r11           ## 8-byte Folded Reload
+	addq	%rdi, %rsi
+	mulxq	-72(%rsp), %r9, %r12            ## 8-byte Folded Reload
+	adcq	%r11, %r9
+	adcq	%r15, %r12
+	mulxq	-64(%rsp), %r11, %r15           ## 8-byte Folded Reload
+	adcq	%r13, %r11
+	mulxq	-56(%rsp), %rdi, %r13           ## 8-byte Folded Reload
+	adcq	%r15, %rdi
+	movzbl	-88(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %r13
+	addq	%rax, %rbx
+	adcq	%r10, %rsi
+	adcq	%r14, %r9
+	adcq	%rbp, %r12
+	adcq	%rcx, %r11
+	adcq	%r8, %rdi
+	movq	-24(%rsp), %rax                 ## 8-byte Reload
+	adcq	80(%rax), %r13
+	setb	%r14b
+	movq	-80(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%rsi, %rdx
+	mulxq	-40(%rsp), %rax, %rcx           ## 8-byte Folded Reload
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	mulxq	-32(%rsp), %r8, %rbp            ## 8-byte Folded Reload
+	addq	%rcx, %r8
+	mulxq	-72(%rsp), %rbx, %r10           ## 8-byte Folded Reload
+	adcq	%rbp, %rbx
+	mulxq	-16(%rsp), %rcx, %r15           ## 8-byte Folded Reload
+	adcq	%r10, %rcx
+	mulxq	-64(%rsp), %rbp, %r10           ## 8-byte Folded Reload
+	adcq	%r15, %rbp
+	mulxq	-56(%rsp), %rdx, %r15           ## 8-byte Folded Reload
+	adcq	%r10, %rdx
+	movzbl	%r14b, %r14d
+	adcq	%r15, %r14
+	addq	%rsi, -80(%rsp)                 ## 8-byte Folded Spill
+	adcq	%r9, %r8
+	adcq	%r12, %rbx
+	adcq	%r11, %rcx
+	adcq	%rdi, %rbp
+	adcq	%r13, %rdx
+	movq	-24(%rsp), %rax                 ## 8-byte Reload
+	adcq	88(%rax), %r14
+	xorl	%r9d, %r9d
+	movq	%r8, %r10
+	subq	-40(%rsp), %r10                 ## 8-byte Folded Reload
+	movq	%rbx, %r11
+	sbbq	-32(%rsp), %r11                 ## 8-byte Folded Reload
+	movq	%rcx, %r15
+	sbbq	-72(%rsp), %r15                 ## 8-byte Folded Reload
+	movq	%rbp, %r12
+	sbbq	-16(%rsp), %r12                 ## 8-byte Folded Reload
+	movq	%rdx, %rsi
+	sbbq	-64(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	%r14, %rdi
+	sbbq	-56(%rsp), %rdi                 ## 8-byte Folded Reload
+	sbbq	%r9, %r9
+	testb	$1, %r9b
+	cmovneq	%r14, %rdi
+	movq	-8(%rsp), %rax                  ## 8-byte Reload
+	movq	%rdi, 40(%rax)
+	cmovneq	%rdx, %rsi
+	movq	%rsi, 32(%rax)
+	cmovneq	%rbp, %r12
+	movq	%r12, 24(%rax)
+	cmovneq	%rcx, %r15
+	movq	%r15, 16(%rax)
+	cmovneq	%rbx, %r11
+	movq	%r11, 8(%rax)
+	cmovneq	%r8, %r10
+	movq	%r10, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_addNF8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montRedNF6Lbmi2         ## -- Begin function mcl_fp_montRedNF6Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_addNF8Lbmi2:                    ## @mcl_fp_addNF8Lbmi2
-## BB#0:
+_mcl_fp_montRedNF6Lbmi2:                ## @mcl_fp_montRedNF6Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	48(%rdx), %rbp
-	movq	40(%rdx), %rbx
-	movq	32(%rdx), %rax
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r15
-	movq	(%rdx), %r13
-	movq	8(%rdx), %r12
-	addq	(%rsi), %r13
-	adcq	8(%rsi), %r12
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %r11
-	adcq	32(%rsi), %rax
-	movq	%rax, %r10
-	movq	%r10, -24(%rsp)         ## 8-byte Spill
-	adcq	40(%rsi), %rbx
-	movq	%rbx, %r9
-	movq	%r9, -16(%rsp)          ## 8-byte Spill
-	adcq	48(%rsi), %rbp
-	movq	%rbp, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	adcq	56(%rsi), %r8
-	movq	%r13, %rsi
-	subq	(%rcx), %rsi
-	movq	%r12, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r15, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r11, %r14
-	sbbq	24(%rcx), %r14
-	movq	%r10, %rbp
-	sbbq	32(%rcx), %rbp
-	movq	%r9, %r10
-	sbbq	40(%rcx), %r10
-	movq	%rax, %r9
-	sbbq	48(%rcx), %r9
-	movq	%r8, %rax
-	sbbq	56(%rcx), %rax
-	testq	%rax, %rax
-	cmovsq	%r13, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r12, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r15, %rbx
-	movq	%rbx, 16(%rdi)
-	cmovsq	%r11, %r14
-	movq	%r14, 24(%rdi)
-	cmovsq	-24(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, 32(%rdi)
-	cmovsq	-16(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%r10, 40(%rdi)
-	cmovsq	-8(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 48(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 56(%rdi)
+	movq	%rdx, %rcx
+	movq	%rsi, %r11
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	-8(%rdx), %rax
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	movq	(%rsi), %rdi
+	movq	%rdi, %rdx
+	imulq	%rax, %rdx
+	movq	40(%rcx), %rsi
+	movq	%rsi, -48(%rsp)                 ## 8-byte Spill
+	mulxq	%rsi, %rax, %r12
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
+	movq	32(%rcx), %rsi
+	movq	%rsi, -56(%rsp)                 ## 8-byte Spill
+	mulxq	%rsi, %rax, %r13
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	24(%rcx), %rsi
+	mulxq	%rsi, %r8, %r15
+	movq	%rsi, %r14
+	movq	%rsi, -16(%rsp)                 ## 8-byte Spill
+	movq	16(%rcx), %rsi
+	movq	%rsi, -64(%rsp)                 ## 8-byte Spill
+	mulxq	%rsi, %rbp, %r9
+	movq	(%rcx), %rax
+	movq	8(%rcx), %r10
+	mulxq	%r10, %rcx, %rsi
+	movq	%r10, -32(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %rdx, %rbx
+	movq	%rax, -40(%rsp)                 ## 8-byte Spill
+	addq	%rcx, %rbx
+	adcq	%rbp, %rsi
+	adcq	%r8, %r9
+	adcq	-80(%rsp), %r15                 ## 8-byte Folded Reload
+	adcq	-88(%rsp), %r13                 ## 8-byte Folded Reload
+	adcq	$0, %r12
+	addq	%rdi, %rdx
+	movq	%r11, -24(%rsp)                 ## 8-byte Spill
+	adcq	8(%r11), %rbx
+	adcq	16(%r11), %rsi
+	adcq	24(%r11), %r9
+	adcq	32(%r11), %r15
+	adcq	40(%r11), %r13
+	adcq	48(%r11), %r12
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-72(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%rbx, %rdx
+	mulxq	%r14, %rcx, %rdi
+	movq	%rdi, -80(%rsp)                 ## 8-byte Spill
+	mulxq	%rax, %r14, %rdi
+	mulxq	%r10, %rbp, %rax
+	addq	%rdi, %rbp
+	mulxq	-64(%rsp), %r8, %r10            ## 8-byte Folded Reload
+	adcq	%rax, %r8
+	adcq	%rcx, %r10
+	mulxq	-56(%rsp), %rdi, %r11           ## 8-byte Folded Reload
+	adcq	-80(%rsp), %rdi                 ## 8-byte Folded Reload
+	mulxq	-48(%rsp), %rax, %rcx           ## 8-byte Folded Reload
+	adcq	%r11, %rax
+	movzbl	-88(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %rcx
+	addq	%rbx, %r14
+	adcq	%rsi, %rbp
+	adcq	%r9, %r8
+	adcq	%r15, %r10
+	adcq	%r13, %rdi
+	adcq	%r12, %rax
+	movq	-24(%rsp), %rdx                 ## 8-byte Reload
+	adcq	56(%rdx), %rcx
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-72(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%rbp, %rdx
+	mulxq	-16(%rsp), %r11, %rsi           ## 8-byte Folded Reload
+	movq	%rsi, -80(%rsp)                 ## 8-byte Spill
+	mulxq	-40(%rsp), %r15, %rbx           ## 8-byte Folded Reload
+	mulxq	-32(%rsp), %rsi, %r13           ## 8-byte Folded Reload
+	addq	%rbx, %rsi
+	mulxq	-64(%rsp), %r9, %r12            ## 8-byte Folded Reload
+	adcq	%r13, %r9
+	adcq	%r11, %r12
+	mulxq	-56(%rsp), %r11, %r14           ## 8-byte Folded Reload
+	adcq	-80(%rsp), %r11                 ## 8-byte Folded Reload
+	mulxq	-48(%rsp), %rbx, %r13           ## 8-byte Folded Reload
+	adcq	%r14, %rbx
+	movzbl	-88(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %r13
+	addq	%rbp, %r15
+	adcq	%r8, %rsi
+	adcq	%r10, %r9
+	adcq	%rdi, %r12
+	adcq	%rax, %r11
+	adcq	%rcx, %rbx
+	movq	-24(%rsp), %rax                 ## 8-byte Reload
+	adcq	64(%rax), %r13
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-72(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%rsi, %rdx
+	mulxq	-16(%rsp), %rbp, %r8            ## 8-byte Folded Reload
+	mulxq	-40(%rsp), %r15, %rdi           ## 8-byte Folded Reload
+	mulxq	-32(%rsp), %rax, %rcx           ## 8-byte Folded Reload
+	addq	%rdi, %rax
+	mulxq	-64(%rsp), %r10, %r14           ## 8-byte Folded Reload
+	adcq	%rcx, %r10
+	adcq	%rbp, %r14
+	mulxq	-56(%rsp), %rbp, %rdi           ## 8-byte Folded Reload
+	adcq	%r8, %rbp
+	mulxq	-48(%rsp), %rcx, %r8            ## 8-byte Folded Reload
+	adcq	%rdi, %rcx
+	movzbl	-88(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %r8
+	addq	%rsi, %r15
+	adcq	%r9, %rax
+	adcq	%r12, %r10
+	adcq	%r11, %r14
+	adcq	%rbx, %rbp
+	adcq	%r13, %rcx
+	movq	-24(%rsp), %rdx                 ## 8-byte Reload
+	adcq	72(%rdx), %r8
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-72(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%rax, %rdx
+	mulxq	-16(%rsp), %r13, %rsi           ## 8-byte Folded Reload
+	movq	%rsi, -80(%rsp)                 ## 8-byte Spill
+	mulxq	-40(%rsp), %r15, %rdi           ## 8-byte Folded Reload
+	mulxq	-32(%rsp), %rsi, %r11           ## 8-byte Folded Reload
+	addq	%rdi, %rsi
+	mulxq	-64(%rsp), %r12, %r9            ## 8-byte Folded Reload
+	adcq	%r11, %r12
+	adcq	%r13, %r9
+	mulxq	-56(%rsp), %r13, %rbx           ## 8-byte Folded Reload
+	adcq	-80(%rsp), %r13                 ## 8-byte Folded Reload
+	mulxq	-48(%rsp), %rdi, %r11           ## 8-byte Folded Reload
+	adcq	%rbx, %rdi
+	movzbl	-88(%rsp), %edx                 ## 1-byte Folded Reload
+	adcq	%rdx, %r11
+	addq	%rax, %r15
+	adcq	%r10, %rsi
+	adcq	%r14, %r12
+	adcq	%rbp, %r9
+	adcq	%rcx, %r13
+	adcq	%r8, %rdi
+	movq	-24(%rsp), %rax                 ## 8-byte Reload
+	adcq	80(%rax), %r11
+	setb	%r14b
+	movq	-72(%rsp), %rdx                 ## 8-byte Reload
+	imulq	%rsi, %rdx
+	mulxq	-40(%rsp), %rax, %rcx           ## 8-byte Folded Reload
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	mulxq	-32(%rsp), %r8, %rbx            ## 8-byte Folded Reload
+	addq	%rcx, %r8
+	mulxq	-64(%rsp), %rcx, %r10           ## 8-byte Folded Reload
+	adcq	%rbx, %rcx
+	mulxq	-16(%rsp), %rbp, %r15           ## 8-byte Folded Reload
+	adcq	%r10, %rbp
+	mulxq	-56(%rsp), %rbx, %r10           ## 8-byte Folded Reload
+	adcq	%r15, %rbx
+	mulxq	-48(%rsp), %rdx, %r15           ## 8-byte Folded Reload
+	adcq	%r10, %rdx
+	movzbl	%r14b, %r14d
+	adcq	%r15, %r14
+	addq	%rsi, -72(%rsp)                 ## 8-byte Folded Spill
+	adcq	%r12, %r8
+	adcq	%r9, %rcx
+	adcq	%r13, %rbp
+	adcq	%rdi, %rbx
+	adcq	%r11, %rdx
+	movq	-24(%rsp), %rax                 ## 8-byte Reload
+	adcq	88(%rax), %r14
+	movq	%r8, %r9
+	subq	-40(%rsp), %r9                  ## 8-byte Folded Reload
+	movq	%rcx, %r10
+	sbbq	-32(%rsp), %r10                 ## 8-byte Folded Reload
+	movq	%rbp, %r11
+	sbbq	-64(%rsp), %r11                 ## 8-byte Folded Reload
+	movq	%rbx, %r15
+	sbbq	-16(%rsp), %r15                 ## 8-byte Folded Reload
+	movq	%rdx, %rax
+	sbbq	-56(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	%r14, %rdi
+	sbbq	-48(%rsp), %rdi                 ## 8-byte Folded Reload
+	movq	%rdi, %rsi
+	sarq	$63, %rsi
+	cmovsq	%r14, %rdi
+	movq	-8(%rsp), %rsi                  ## 8-byte Reload
+	movq	%rdi, 40(%rsi)
+	cmovsq	%rdx, %rax
+	movq	%rax, 32(%rsi)
+	cmovsq	%rbx, %r15
+	movq	%r15, 24(%rsi)
+	cmovsq	%rbp, %r11
+	movq	%r11, 16(%rsi)
+	cmovsq	%rcx, %r10
+	movq	%r10, 8(%rsi)
+	cmovsq	%r8, %r9
+	movq	%r9, (%rsi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -10532,281 +3822,350 @@ _mcl_fp_addNF8Lbmi2:                    ## @mcl_fp_addNF8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_sub8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_addPre6Lbmi2            ## -- Begin function mcl_fp_addPre6Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_sub8Lbmi2:                      ## @mcl_fp_sub8Lbmi2
-## BB#0:
+_mcl_fp_addPre6Lbmi2:                   ## @mcl_fp_addPre6Lbmi2
+## %bb.0:
+	movq	40(%rsi), %rax
+	movq	32(%rsi), %rcx
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %r9
+	movq	(%rsi), %r10
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r10
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r9
+	adcq	24(%rdx), %r8
+	adcq	32(%rdx), %rcx
+	adcq	40(%rdx), %rax
+	movq	%rax, 40(%rdi)
+	movq	%rcx, 32(%rdi)
+	movq	%r8, 24(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r10, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subPre6Lbmi2            ## -- Begin function mcl_fp_subPre6Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_subPre6Lbmi2:                   ## @mcl_fp_subPre6Lbmi2
+## %bb.0:
+	movq	40(%rsi), %rcx
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r11
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %r10
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r8
+	sbbq	40(%rdx), %rcx
+	movq	%rcx, 40(%rdi)
+	movq	%r8, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_shr1_6Lbmi2             ## -- Begin function mcl_fp_shr1_6Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_shr1_6Lbmi2:                    ## @mcl_fp_shr1_6Lbmi2
+## %bb.0:
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %r10
+	movq	24(%rsi), %rcx
+	movq	32(%rsi), %rax
+	movq	40(%rsi), %rdx
+	movq	%rdx, %rsi
+	shrq	%rsi
+	movq	%rsi, 40(%rdi)
+	shldq	$63, %rax, %rdx
+	movq	%rdx, 32(%rdi)
+	shldq	$63, %rcx, %rax
+	movq	%rax, 24(%rdi)
+	shldq	$63, %r10, %rcx
+	movq	%rcx, 16(%rdi)
+	shldq	$63, %r8, %r10
+	movq	%r10, 8(%rdi)
+	shrdq	$1, %r8, %r9
+	movq	%r9, (%rdi)
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_add6Lbmi2               ## -- Begin function mcl_fp_add6Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_add6Lbmi2:                      ## @mcl_fp_add6Lbmi2
+## %bb.0:
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %r11
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r11
+	adcq	24(%rdx), %r10
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r8
+	movq	%r8, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rax, (%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r11
+	sbbq	24(%rcx), %r10
+	sbbq	32(%rcx), %r9
+	sbbq	40(%rcx), %r8
+	sbbq	$0, %rdx
+	testb	$1, %dl
+	jne	LBB50_2
+## %bb.1:                               ## %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r8, 40(%rdi)
+LBB50_2:                                ## %carry
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_addNF6Lbmi2             ## -- Begin function mcl_fp_addNF6Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_addNF6Lbmi2:                    ## @mcl_fp_addNF6Lbmi2
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	56(%rdx), %r12
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r13
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r10
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r10
-	movq	16(%rsi), %r11
-	sbbq	16(%rdx), %r11
-	movq	24(%rsi), %r15
-	sbbq	24(%rdx), %r15
-	movq	32(%rsi), %r14
-	sbbq	32(%rdx), %r14
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %rsi
-	sbbq	40(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r10, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	movq	%r15, 24(%rdi)
-	movq	%r14, 32(%rdi)
-	movq	%rsi, 40(%rdi)
-	sbbq	%r13, %r9
-	movq	%r9, 48(%rdi)
-	sbbq	%r12, %r8
-	movq	%r8, 56(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	LBB122_2
-## BB#1:                                ## %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	movq	8(%rcx), %rax
-	adcq	%r10, %rax
-	movq	%rax, 8(%rdi)
-	movq	16(%rcx), %rax
-	adcq	%r11, %rax
-	movq	%rax, 16(%rdi)
-	movq	24(%rcx), %rax
-	adcq	%r15, %rax
+	movq	40(%rdx), %r15
+	movq	32(%rdx), %r11
+	movq	24(%rdx), %r10
+	movq	16(%rdx), %r9
+	movq	(%rdx), %r8
+	movq	8(%rdx), %r14
+	addq	(%rsi), %r8
+	adcq	8(%rsi), %r14
+	adcq	16(%rsi), %r9
+	adcq	24(%rsi), %r10
+	adcq	32(%rsi), %r11
+	adcq	40(%rsi), %r15
+	movq	%r8, %r12
+	subq	(%rcx), %r12
+	movq	%r14, %r13
+	sbbq	8(%rcx), %r13
+	movq	%r9, %rdx
+	sbbq	16(%rcx), %rdx
+	movq	%r10, %rax
+	sbbq	24(%rcx), %rax
+	movq	%r11, %rsi
+	sbbq	32(%rcx), %rsi
+	movq	%r15, %rbx
+	sbbq	40(%rcx), %rbx
+	movq	%rbx, %rcx
+	sarq	$63, %rcx
+	cmovsq	%r15, %rbx
+	movq	%rbx, 40(%rdi)
+	cmovsq	%r11, %rsi
+	movq	%rsi, 32(%rdi)
+	cmovsq	%r10, %rax
 	movq	%rax, 24(%rdi)
-	movq	32(%rcx), %rax
-	adcq	%r14, %rax
-	movq	%rax, 32(%rdi)
-	movq	40(%rcx), %rax
-	adcq	%rsi, %rax
-	movq	%rax, 40(%rdi)
-	movq	48(%rcx), %rax
-	adcq	%r9, %rax
-	movq	%rax, 48(%rdi)
-	movq	56(%rcx), %rax
-	adcq	%r8, %rax
-	movq	%rax, 56(%rdi)
-LBB122_2:                               ## %nocarry
+	cmovsq	%r9, %rdx
+	movq	%rdx, 16(%rdi)
+	cmovsq	%r14, %r13
+	movq	%r13, 8(%rdi)
+	cmovsq	%r8, %r12
+	movq	%r12, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
 	retq
-
-	.globl	_mcl_fp_subNF8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_sub6Lbmi2               ## -- Begin function mcl_fp_sub6Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_subNF8Lbmi2:                    ## @mcl_fp_subNF8Lbmi2
-## BB#0:
-	pushq	%rbp
+_mcl_fp_sub6Lbmi2:                      ## @mcl_fp_sub6Lbmi2
+## %bb.0:
+	pushq	%rbx
+	movq	40(%rsi), %r11
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %rax
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	xorl	%ebx, %ebx
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rax
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r11
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rbx, %rbx
+	testb	$1, %bl
+	jne	LBB52_2
+## %bb.1:                               ## %nocarry
+	popq	%rbx
+	retq
+LBB52_2:                                ## %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %rax
+	adcq	24(%rcx), %r9
+	adcq	32(%rcx), %r10
+	adcq	40(%rcx), %r11
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	popq	%rbx
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subNF6Lbmi2             ## -- Begin function mcl_fp_subNF6Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_subNF6Lbmi2:                    ## @mcl_fp_subNF6Lbmi2
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	%rdi, %r9
-	movdqu	(%rdx), %xmm0
-	movdqu	16(%rdx), %xmm1
-	movdqu	32(%rdx), %xmm2
-	movdqu	48(%rdx), %xmm3
-	pshufd	$78, %xmm3, %xmm4       ## xmm4 = xmm3[2,3,0,1]
-	movd	%xmm4, %r12
-	movdqu	(%rsi), %xmm4
-	movdqu	16(%rsi), %xmm5
-	movdqu	32(%rsi), %xmm8
-	movdqu	48(%rsi), %xmm7
-	pshufd	$78, %xmm7, %xmm6       ## xmm6 = xmm7[2,3,0,1]
-	movd	%xmm6, %rcx
-	movd	%xmm3, %r13
-	movd	%xmm7, %rdi
-	pshufd	$78, %xmm2, %xmm3       ## xmm3 = xmm2[2,3,0,1]
-	movd	%xmm3, %rbp
-	pshufd	$78, %xmm8, %xmm3       ## xmm3 = xmm8[2,3,0,1]
-	movd	%xmm3, %rdx
-	movd	%xmm2, %rsi
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r11
-	pshufd	$78, %xmm5, %xmm2       ## xmm2 = xmm5[2,3,0,1]
-	movd	%xmm1, %r15
-	pshufd	$78, %xmm0, %xmm1       ## xmm1 = xmm0[2,3,0,1]
-	movd	%xmm1, %rbx
-	pshufd	$78, %xmm4, %xmm1       ## xmm1 = xmm4[2,3,0,1]
-	movd	%xmm0, %rax
-	movd	%xmm4, %r14
-	subq	%rax, %r14
-	movd	%xmm1, %r10
-	sbbq	%rbx, %r10
-	movd	%xmm5, %rbx
-	sbbq	%r15, %rbx
-	movd	%xmm2, %r15
-	sbbq	%r11, %r15
-	movd	%xmm8, %r11
-	sbbq	%rsi, %r11
-	sbbq	%rbp, %rdx
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	sbbq	%r13, %rdi
-	movq	%rdi, -16(%rsp)         ## 8-byte Spill
-	sbbq	%r12, %rcx
-	movq	%rcx, -8(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rbp
-	sarq	$63, %rbp
-	movq	56(%r8), %r12
-	andq	%rbp, %r12
-	movq	48(%r8), %r13
-	andq	%rbp, %r13
-	movq	40(%r8), %rdi
-	andq	%rbp, %rdi
-	movq	32(%r8), %rsi
-	andq	%rbp, %rsi
-	movq	24(%r8), %rdx
-	andq	%rbp, %rdx
-	movq	16(%r8), %rcx
-	andq	%rbp, %rcx
-	movq	8(%r8), %rax
-	andq	%rbp, %rax
-	andq	(%r8), %rbp
-	addq	%r14, %rbp
+	movq	40(%rsi), %r15
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r11
+	movq	8(%rsi), %r14
+	subq	(%rdx), %r11
+	sbbq	8(%rdx), %r14
+	sbbq	16(%rdx), %r10
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r8
+	sbbq	40(%rdx), %r15
+	movq	%r15, %rdx
+	sarq	$63, %rdx
+	movq	%rdx, %rbx
+	shldq	$1, %r15, %rbx
+	andq	(%rcx), %rbx
+	movq	40(%rcx), %r12
+	andq	%rdx, %r12
+	movq	32(%rcx), %r13
+	andq	%rdx, %r13
+	movq	24(%rcx), %rsi
+	andq	%rdx, %rsi
+	movq	16(%rcx), %rax
+	andq	%rdx, %rax
+	andq	8(%rcx), %rdx
+	addq	%r11, %rbx
+	movq	%rbx, (%rdi)
+	adcq	%r14, %rdx
+	movq	%rdx, 8(%rdi)
 	adcq	%r10, %rax
-	movq	%rbp, (%r9)
-	adcq	%rbx, %rcx
-	movq	%rax, 8(%r9)
-	movq	%rcx, 16(%r9)
-	adcq	%r15, %rdx
-	movq	%rdx, 24(%r9)
-	adcq	%r11, %rsi
-	movq	%rsi, 32(%r9)
-	adcq	-24(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, 40(%r9)
-	adcq	-16(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%r13, 48(%r9)
-	adcq	-8(%rsp), %r12          ## 8-byte Folded Reload
-	movq	%r12, 56(%r9)
+	movq	%rax, 16(%rdi)
+	adcq	%r9, %rsi
+	movq	%rsi, 24(%rdi)
+	adcq	%r8, %r13
+	movq	%r13, 32(%rdi)
+	adcq	%r15, %r12
+	movq	%r12, 40(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_add8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_add6Lbmi2            ## -- Begin function mcl_fpDbl_add6Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_add8Lbmi2:                   ## @mcl_fpDbl_add8Lbmi2
-## BB#0:
+_mcl_fpDbl_add6Lbmi2:                   ## @mcl_fpDbl_add6Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	120(%rdx), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	112(%rdx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	104(%rdx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	96(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r11
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %r15
-	adcq	32(%rdx), %r11
-	movq	88(%rdx), %rbp
-	movq	80(%rdx), %r13
-	movq	%rbx, (%rdi)
-	movq	72(%rdx), %r10
-	movq	%rax, 8(%rdi)
-	movq	64(%rdx), %r9
-	movq	%r12, 16(%rdi)
-	movq	40(%rdx), %r12
-	movq	%r15, 24(%rdi)
-	movq	40(%rsi), %rbx
-	adcq	%r12, %rbx
-	movq	56(%rdx), %r15
-	movq	48(%rdx), %r12
-	movq	%r11, 32(%rdi)
-	movq	48(%rsi), %rdx
-	adcq	%r12, %rdx
-	movq	120(%rsi), %r12
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rax
-	adcq	%r15, %rax
-	movq	112(%rsi), %rcx
-	movq	%rdx, 48(%rdi)
-	movq	64(%rsi), %rbx
-	adcq	%r9, %rbx
-	movq	104(%rsi), %rdx
-	movq	%rax, 56(%rdi)
-	movq	72(%rsi), %r9
-	adcq	%r10, %r9
-	movq	80(%rsi), %r11
-	adcq	%r13, %r11
-	movq	96(%rsi), %rax
 	movq	88(%rsi), %r15
-	adcq	%rbp, %r15
-	adcq	%r14, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rdx, %rax
-	adcq	-24(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	adcq	-16(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -16(%rsp)         ## 8-byte Spill
-	adcq	-32(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%r12, -32(%rsp)         ## 8-byte Spill
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	movq	%rbx, %rsi
-	subq	(%r8), %rsi
-	movq	%r9, %rdx
-	sbbq	8(%r8), %rdx
-	movq	%r11, %r10
-	sbbq	16(%r8), %r10
-	movq	%r15, %r14
-	sbbq	24(%r8), %r14
-	movq	-8(%rsp), %r13          ## 8-byte Reload
-	sbbq	32(%r8), %r13
-	movq	%rax, %r12
-	sbbq	40(%r8), %r12
-	movq	%rcx, %rax
-	sbbq	48(%r8), %rax
-	movq	-32(%rsp), %rcx         ## 8-byte Reload
-	sbbq	56(%r8), %rcx
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	cmovneq	%rbx, %rsi
-	movq	%rsi, 64(%rdi)
-	testb	%bpl, %bpl
-	cmovneq	%r9, %rdx
-	movq	%rdx, 72(%rdi)
-	cmovneq	%r11, %r10
-	movq	%r10, 80(%rdi)
-	cmovneq	%r15, %r14
-	movq	%r14, 88(%rdi)
-	cmovneq	-8(%rsp), %r13          ## 8-byte Folded Reload
-	movq	%r13, 96(%rdi)
-	cmovneq	-24(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%r12, 104(%rdi)
-	cmovneq	-16(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 112(%rdi)
-	cmovneq	-32(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, 120(%rdi)
+	movq	80(%rsi), %r14
+	movq	72(%rsi), %r11
+	movq	64(%rsi), %r10
+	movq	56(%rsi), %r9
+	movq	48(%rsi), %r8
+	movq	40(%rsi), %rax
+	movq	(%rsi), %r12
+	movq	8(%rsi), %r13
+	addq	(%rdx), %r12
+	adcq	8(%rdx), %r13
+	movq	32(%rsi), %rbx
+	movq	24(%rsi), %rbp
+	movq	16(%rsi), %rsi
+	adcq	16(%rdx), %rsi
+	adcq	24(%rdx), %rbp
+	adcq	32(%rdx), %rbx
+	adcq	40(%rdx), %rax
+	adcq	48(%rdx), %r8
+	adcq	56(%rdx), %r9
+	adcq	64(%rdx), %r10
+	adcq	72(%rdx), %r11
+	adcq	80(%rdx), %r14
+	adcq	88(%rdx), %r15
+	movq	%rax, 40(%rdi)
+	movq	%rbx, 32(%rdi)
+	movq	%rbp, 24(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	%r13, 8(%rdi)
+	movq	%r12, (%rdi)
+	setb	%al
+	movzbl	%al, %r12d
+	movq	%r8, %r13
+	subq	(%rcx), %r13
+	movq	%r9, %rsi
+	sbbq	8(%rcx), %rsi
+	movq	%r10, %rbx
+	sbbq	16(%rcx), %rbx
+	movq	%r11, %rbp
+	sbbq	24(%rcx), %rbp
+	movq	%r14, %rax
+	sbbq	32(%rcx), %rax
+	movq	%r15, %rdx
+	sbbq	40(%rcx), %rdx
+	sbbq	$0, %r12
+	testb	$1, %r12b
+	cmovneq	%r15, %rdx
+	movq	%rdx, 88(%rdi)
+	cmovneq	%r14, %rax
+	movq	%rax, 80(%rdi)
+	cmovneq	%r11, %rbp
+	movq	%rbp, 72(%rdi)
+	cmovneq	%r10, %rbx
+	movq	%rbx, 64(%rdi)
+	cmovneq	%r9, %rsi
+	movq	%rsi, 56(%rdi)
+	cmovneq	%r8, %r13
+	movq	%r13, 48(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -10814,111 +4173,80 @@ _mcl_fpDbl_add8Lbmi2:                   ## @mcl_fpDbl_add8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sub8Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sub6Lbmi2            ## -- Begin function mcl_fpDbl_sub6Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_sub8Lbmi2:                   ## @mcl_fpDbl_sub8Lbmi2
-## BB#0:
+_mcl_fpDbl_sub6Lbmi2:                   ## @mcl_fpDbl_sub6Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r15
-	movq	120(%rdx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	112(%rdx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	104(%rdx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %r9
-	movq	(%rsi), %r12
-	movq	8(%rsi), %r14
-	xorl	%r8d, %r8d
-	subq	(%rdx), %r12
-	sbbq	8(%rdx), %r14
-	sbbq	16(%rdx), %r9
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r13
-	sbbq	32(%rdx), %r13
-	movq	96(%rdx), %rbp
-	movq	88(%rdx), %r11
-	movq	%r12, (%rdi)
-	movq	80(%rdx), %r12
-	movq	%r14, 8(%rdi)
-	movq	72(%rdx), %r10
-	movq	%r9, 16(%rdi)
-	movq	40(%rdx), %r9
-	movq	%rbx, 24(%rdi)
+	movq	%rcx, %r10
+	movq	88(%rsi), %r15
+	movq	80(%rsi), %r14
+	movq	72(%rsi), %r11
+	movq	64(%rsi), %r9
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %rax
+	movq	%rax, -16(%rsp)                 ## 8-byte Spill
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %r13
+	xorl	%eax, %eax
+	subq	(%rdx), %rcx
+	movq	%rcx, -8(%rsp)                  ## 8-byte Spill
+	sbbq	8(%rdx), %r13
 	movq	40(%rsi), %rbx
-	sbbq	%r9, %rbx
-	movq	48(%rdx), %r9
-	movq	%r13, 32(%rdi)
-	movq	48(%rsi), %r14
-	sbbq	%r9, %r14
-	movq	64(%rdx), %r13
-	movq	56(%rdx), %r9
+	movq	32(%rsi), %rbp
+	movq	24(%rsi), %rcx
+	movq	16(%rsi), %rsi
+	sbbq	16(%rdx), %rsi
+	sbbq	24(%rdx), %rcx
+	sbbq	32(%rdx), %rbp
+	sbbq	40(%rdx), %rbx
+	movq	-16(%rsp), %r12                 ## 8-byte Reload
+	sbbq	48(%rdx), %r12
+	movq	%r12, -16(%rsp)                 ## 8-byte Spill
+	sbbq	56(%rdx), %r8
+	sbbq	64(%rdx), %r9
+	sbbq	72(%rdx), %r11
+	sbbq	80(%rdx), %r14
+	sbbq	88(%rdx), %r15
 	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rdx
-	sbbq	%r9, %rdx
-	movq	120(%rsi), %rcx
-	movq	%r14, 48(%rdi)
-	movq	64(%rsi), %rbx
-	sbbq	%r13, %rbx
-	movq	112(%rsi), %rax
-	movq	%rdx, 56(%rdi)
-	movq	72(%rsi), %r9
-	sbbq	%r10, %r9
-	movq	80(%rsi), %r13
-	sbbq	%r12, %r13
-	movq	88(%rsi), %r12
-	sbbq	%r11, %r12
-	movq	104(%rsi), %rdx
-	movq	96(%rsi), %r14
-	sbbq	%rbp, %r14
-	sbbq	-24(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	sbbq	-16(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	sbbq	-8(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, -8(%rsp)          ## 8-byte Spill
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%r15), %r11
-	cmoveq	%r8, %r11
-	testb	%bpl, %bpl
-	movq	16(%r15), %rbp
-	cmoveq	%r8, %rbp
-	movq	8(%r15), %rsi
-	cmoveq	%r8, %rsi
-	movq	56(%r15), %r10
-	cmoveq	%r8, %r10
-	movq	48(%r15), %rdx
-	cmoveq	%r8, %rdx
-	movq	40(%r15), %rcx
-	cmoveq	%r8, %rcx
-	movq	32(%r15), %rax
-	cmoveq	%r8, %rax
-	cmovneq	24(%r15), %r8
-	addq	%rbx, %r11
-	adcq	%r9, %rsi
-	movq	%r11, 64(%rdi)
-	adcq	%r13, %rbp
+	movq	%rbp, 32(%rdi)
+	movq	%rcx, 24(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	%r13, 8(%rdi)
+	movq	-8(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rcx, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	negq	%rax
+	movq	40(%r10), %rcx
+	andq	%rax, %rcx
+	movq	32(%r10), %rdx
+	andq	%rax, %rdx
+	movq	24(%r10), %rsi
+	andq	%rax, %rsi
+	movq	16(%r10), %rbx
+	andq	%rax, %rbx
+	movq	8(%r10), %rbp
+	andq	%rax, %rbp
+	andq	(%r10), %rax
+	addq	-16(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	%rax, 48(%rdi)
+	adcq	%r8, %rbp
+	movq	%rbp, 56(%rdi)
+	adcq	%r9, %rbx
+	movq	%rbx, 64(%rdi)
+	adcq	%r11, %rsi
 	movq	%rsi, 72(%rdi)
-	movq	%rbp, 80(%rdi)
-	adcq	%r12, %r8
-	movq	%r8, 88(%rdi)
-	adcq	%r14, %rax
-	movq	%rax, 96(%rdi)
-	adcq	-24(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, 104(%rdi)
-	adcq	-16(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, 112(%rdi)
-	adcq	-8(%rsp), %r10          ## 8-byte Folded Reload
-	movq	%r10, 120(%rdi)
+	adcq	%r14, %rdx
+	movq	%rdx, 80(%rdi)
+	adcq	%r15, %rcx
+	movq	%rcx, 88(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -10926,379 +4254,277 @@ _mcl_fpDbl_sub8Lbmi2:                   ## @mcl_fpDbl_sub8Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
+                                        ## -- End function
+	.globl	_mulPv512x64bmi2                ## -- Begin function mulPv512x64bmi2
 	.p2align	4, 0x90
-l_mulPv576x64:                          ## @mulPv576x64
-## BB#0:
-	mulxq	(%rsi), %rcx, %rax
-	movq	%rcx, (%rdi)
-	mulxq	8(%rsi), %rcx, %r8
-	addq	%rax, %rcx
-	movq	%rcx, 8(%rdi)
-	mulxq	16(%rsi), %rcx, %r9
-	adcq	%r8, %rcx
-	movq	%rcx, 16(%rdi)
-	mulxq	24(%rsi), %rax, %rcx
-	adcq	%r9, %rax
-	movq	%rax, 24(%rdi)
-	mulxq	32(%rsi), %rax, %r8
-	adcq	%rcx, %rax
-	movq	%rax, 32(%rdi)
-	mulxq	40(%rsi), %rcx, %r9
-	adcq	%r8, %rcx
-	movq	%rcx, 40(%rdi)
-	mulxq	48(%rsi), %rax, %rcx
-	adcq	%r9, %rax
-	movq	%rax, 48(%rdi)
-	mulxq	56(%rsi), %rax, %r8
-	adcq	%rcx, %rax
-	movq	%rax, 56(%rdi)
-	mulxq	64(%rsi), %rax, %rcx
-	adcq	%r8, %rax
-	movq	%rax, 64(%rdi)
-	adcq	$0, %rcx
-	movq	%rcx, 72(%rdi)
+_mulPv512x64bmi2:                       ## @mulPv512x64bmi2
+## %bb.0:
 	movq	%rdi, %rax
+	mulxq	(%rsi), %rdi, %rcx
+	movq	%rdi, (%rax)
+	mulxq	8(%rsi), %rdi, %r8
+	addq	%rcx, %rdi
+	movq	%rdi, 8(%rax)
+	mulxq	16(%rsi), %rdi, %r9
+	adcq	%r8, %rdi
+	movq	%rdi, 16(%rax)
+	mulxq	24(%rsi), %rcx, %rdi
+	adcq	%r9, %rcx
+	movq	%rcx, 24(%rax)
+	mulxq	32(%rsi), %rcx, %r8
+	adcq	%rdi, %rcx
+	movq	%rcx, 32(%rax)
+	mulxq	40(%rsi), %rdi, %r9
+	adcq	%r8, %rdi
+	movq	%rdi, 40(%rax)
+	mulxq	48(%rsi), %rcx, %rdi
+	adcq	%r9, %rcx
+	movq	%rcx, 48(%rax)
+	mulxq	56(%rsi), %rcx, %rdx
+	adcq	%rdi, %rcx
+	movq	%rcx, 56(%rax)
+	adcq	$0, %rdx
+	movq	%rdx, 64(%rax)
 	retq
-
-	.globl	_mcl_fp_mulUnitPre9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_mulUnitPre8Lbmi2        ## -- Begin function mcl_fp_mulUnitPre8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_mulUnitPre9Lbmi2:               ## @mcl_fp_mulUnitPre9Lbmi2
-## BB#0:
-	pushq	%r14
+_mcl_fp_mulUnitPre8Lbmi2:               ## @mcl_fp_mulUnitPre8Lbmi2
+## %bb.0:
 	pushq	%rbx
-	subq	$88, %rsp
+	subq	$80, %rsp
 	movq	%rdi, %rbx
 	leaq	8(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	80(%rsp), %r8
-	movq	72(%rsp), %r9
-	movq	64(%rsp), %r10
-	movq	56(%rsp), %r11
-	movq	48(%rsp), %r14
-	movq	40(%rsp), %rax
-	movq	32(%rsp), %rcx
-	movq	24(%rsp), %rdx
-	movq	8(%rsp), %rsi
-	movq	16(%rsp), %rdi
-	movq	%rsi, (%rbx)
-	movq	%rdi, 8(%rbx)
-	movq	%rdx, 16(%rbx)
-	movq	%rcx, 24(%rbx)
-	movq	%rax, 32(%rbx)
-	movq	%r14, 40(%rbx)
-	movq	%r11, 48(%rbx)
-	movq	%r10, 56(%rbx)
-	movq	%r9, 64(%rbx)
-	movq	%r8, 72(%rbx)
-	addq	$88, %rsp
+	callq	_mulPv512x64bmi2
+	movq	8(%rsp), %r8
+	movq	16(%rsp), %r9
+	movq	24(%rsp), %r10
+	movq	32(%rsp), %r11
+	movq	40(%rsp), %rdi
+	movq	48(%rsp), %rax
+	movq	56(%rsp), %rcx
+	movq	64(%rsp), %rdx
+	movq	72(%rsp), %rsi
+	movq	%rsi, 64(%rbx)
+	movq	%rdx, 56(%rbx)
+	movq	%rcx, 48(%rbx)
+	movq	%rax, 40(%rbx)
+	movq	%rdi, 32(%rbx)
+	movq	%r11, 24(%rbx)
+	movq	%r10, 16(%rbx)
+	movq	%r9, 8(%rbx)
+	movq	%r8, (%rbx)
+	addq	$80, %rsp
 	popq	%rbx
-	popq	%r14
 	retq
-
-	.globl	_mcl_fpDbl_mulPre9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mulPre8Lbmi2         ## -- Begin function mcl_fpDbl_mulPre8Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_mulPre9Lbmi2:                ## @mcl_fpDbl_mulPre9Lbmi2
-## BB#0:
+_mcl_fpDbl_mulPre8Lbmi2:                ## @mcl_fpDbl_mulPre8Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$808, %rsp              ## imm = 0x328
+	subq	$648, %rsp                      ## imm = 0x288
 	movq	%rdx, %rax
-	movq	%rdi, %r12
-	movq	(%rax), %rdx
-	movq	%rax, %rbx
-	movq	%rbx, 80(%rsp)          ## 8-byte Spill
-	leaq	728(%rsp), %rdi
-	movq	%rsi, %rbp
-	movq	%rbp, 72(%rsp)          ## 8-byte Spill
-	callq	l_mulPv576x64
-	movq	800(%rsp), %r13
-	movq	792(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	784(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	776(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	768(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	760(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	752(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	744(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	728(%rsp), %rax
-	movq	736(%rsp), %r14
-	movq	%rax, (%r12)
-	movq	%r12, 64(%rsp)          ## 8-byte Spill
-	movq	8(%rbx), %rdx
-	leaq	648(%rsp), %rdi
-	movq	%rbp, %rsi
-	callq	l_mulPv576x64
-	movq	720(%rsp), %r8
-	movq	712(%rsp), %rcx
-	movq	704(%rsp), %rdx
-	movq	696(%rsp), %rsi
-	movq	688(%rsp), %rdi
-	movq	680(%rsp), %rbp
-	addq	648(%rsp), %r14
-	movq	672(%rsp), %rax
-	movq	656(%rsp), %rbx
-	movq	664(%rsp), %r15
-	movq	%r14, 8(%r12)
-	adcq	24(%rsp), %rbx          ## 8-byte Folded Reload
-	adcq	32(%rsp), %r15          ## 8-byte Folded Reload
-	adcq	40(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, %r14
-	adcq	(%rsp), %rbp            ## 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          ## 8-byte Spill
-	adcq	48(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	adcq	%r13, %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 16(%rsp)           ## 8-byte Spill
-	movq	80(%rsp), %r13          ## 8-byte Reload
-	movq	16(%r13), %rdx
-	leaq	568(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	640(%rsp), %r8
-	movq	632(%rsp), %r9
-	movq	624(%rsp), %r10
-	movq	616(%rsp), %rdi
-	movq	608(%rsp), %rbp
-	movq	600(%rsp), %rcx
-	addq	568(%rsp), %rbx
-	movq	592(%rsp), %rdx
-	movq	576(%rsp), %r12
-	movq	584(%rsp), %rsi
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	%rbx, 16(%rax)
-	adcq	%r15, %r12
-	adcq	%r14, %rsi
-	movq	%rsi, 48(%rsp)          ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	(%rsp), %rdi            ## 8-byte Folded Reload
-	movq	%rdi, 40(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %r10           ## 8-byte Folded Reload
-	movq	%r10, (%rsp)            ## 8-byte Spill
-	adcq	16(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 8(%rsp)            ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 16(%rsp)           ## 8-byte Spill
-	movq	24(%r13), %rdx
-	leaq	488(%rsp), %rdi
-	movq	72(%rsp), %r15          ## 8-byte Reload
-	movq	%r15, %rsi
-	callq	l_mulPv576x64
-	movq	560(%rsp), %r8
-	movq	552(%rsp), %rcx
-	movq	544(%rsp), %rdx
-	movq	536(%rsp), %rsi
-	movq	528(%rsp), %rdi
-	movq	520(%rsp), %rbp
-	addq	488(%rsp), %r12
-	movq	512(%rsp), %rax
-	movq	496(%rsp), %rbx
-	movq	504(%rsp), %r13
-	movq	64(%rsp), %r14          ## 8-byte Reload
-	movq	%r12, 24(%r14)
-	adcq	48(%rsp), %rbx          ## 8-byte Folded Reload
-	adcq	56(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	24(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	40(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, 40(%rsp)          ## 8-byte Spill
-	adcq	(%rsp), %rsi            ## 8-byte Folded Reload
-	movq	%rsi, (%rsp)            ## 8-byte Spill
-	adcq	8(%rsp), %rdx           ## 8-byte Folded Reload
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	adcq	16(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 48(%rsp)           ## 8-byte Spill
-	movq	80(%rsp), %r12          ## 8-byte Reload
-	movq	32(%r12), %rdx
-	leaq	408(%rsp), %rdi
+	movq	%rdi, 32(%rsp)                  ## 8-byte Spill
+	movq	(%rdx), %rdx
+	movq	%rax, %r12
+	movq	%rax, 40(%rsp)                  ## 8-byte Spill
+	leaq	576(%rsp), %rdi
+	movq	%rsi, %r15
+	callq	_mulPv512x64bmi2
+	movq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	movq	632(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	624(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	616(%rsp), %r13
+	movq	608(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	movq	600(%rsp), %rbp
+	movq	592(%rsp), %rbx
+	movq	576(%rsp), %rax
+	movq	584(%rsp), %r14
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, (%rcx)
+	movq	8(%r12), %rdx
+	leaq	504(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	l_mulPv576x64
-	movq	480(%rsp), %r8
-	movq	472(%rsp), %r9
-	movq	464(%rsp), %rdx
-	movq	456(%rsp), %rsi
-	movq	448(%rsp), %rdi
-	movq	440(%rsp), %rbp
-	addq	408(%rsp), %rbx
-	movq	432(%rsp), %rax
-	movq	416(%rsp), %r15
-	movq	424(%rsp), %rcx
-	movq	%rbx, 32(%r14)
-	adcq	%r13, %r15
-	adcq	24(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	(%rsp), %rdi            ## 8-byte Folded Reload
-	movq	%rdi, 40(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rsi           ## 8-byte Folded Reload
-	movq	%rsi, (%rsp)            ## 8-byte Spill
-	adcq	16(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	adcq	48(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 16(%rsp)           ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 48(%rsp)           ## 8-byte Spill
-	movq	%r12, %r14
-	movq	40(%r14), %rdx
-	leaq	328(%rsp), %rdi
-	movq	72(%rsp), %r13          ## 8-byte Reload
-	movq	%r13, %rsi
-	callq	l_mulPv576x64
-	movq	400(%rsp), %r8
-	movq	392(%rsp), %r9
-	movq	384(%rsp), %rsi
-	movq	376(%rsp), %rdi
-	movq	368(%rsp), %rbx
-	movq	360(%rsp), %rbp
-	addq	328(%rsp), %r15
-	movq	352(%rsp), %rcx
-	movq	336(%rsp), %r12
-	movq	344(%rsp), %rdx
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	%r15, 40(%rax)
-	adcq	56(%rsp), %r12          ## 8-byte Folded Reload
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	(%rsp), %rbx            ## 8-byte Folded Reload
-	movq	%rbx, 40(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	48(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 16(%rsp)           ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 48(%rsp)           ## 8-byte Spill
-	movq	48(%r14), %rdx
-	leaq	248(%rsp), %rdi
-	movq	%r13, %rsi
-	movq	%r13, %r15
-	callq	l_mulPv576x64
-	movq	320(%rsp), %r8
-	movq	312(%rsp), %r9
-	movq	304(%rsp), %rsi
-	movq	296(%rsp), %rdi
-	movq	288(%rsp), %rbx
-	movq	280(%rsp), %rbp
-	addq	248(%rsp), %r12
-	movq	272(%rsp), %rcx
-	movq	256(%rsp), %r13
-	movq	264(%rsp), %rdx
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	%r12, 48(%rax)
-	adcq	56(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	(%rsp), %rbx            ## 8-byte Folded Reload
-	movq	%rbx, 40(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	48(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 16(%rsp)           ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 48(%rsp)           ## 8-byte Spill
-	movq	56(%r14), %rdx
-	leaq	168(%rsp), %rdi
+	movq	%r15, 56(%rsp)                  ## 8-byte Spill
+	callq	_mulPv512x64bmi2
+	movq	568(%rsp), %r12
+	addq	504(%rsp), %r14
+	adcq	512(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  ## 8-byte Spill
+	adcq	520(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %rax                  ## 8-byte Reload
+	adcq	528(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	adcq	536(%rsp), %r13
+	movq	16(%rsp), %rbp                  ## 8-byte Reload
+	adcq	544(%rsp), %rbp
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	552(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rax                    ## 8-byte Reload
+	adcq	560(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	movq	%r14, 8(%rax)
+	adcq	$0, %r12
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	432(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	l_mulPv576x64
-	movq	240(%rsp), %rcx
-	movq	232(%rsp), %rdx
-	movq	224(%rsp), %rsi
-	movq	216(%rsp), %rdi
-	movq	208(%rsp), %rbx
-	addq	168(%rsp), %r13
-	movq	200(%rsp), %r12
-	movq	192(%rsp), %rbp
-	movq	176(%rsp), %r14
-	movq	184(%rsp), %r15
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	%r13, 56(%rax)
-	adcq	56(%rsp), %r14          ## 8-byte Folded Reload
-	adcq	24(%rsp), %r15          ## 8-byte Folded Reload
-	adcq	32(%rsp), %rbp          ## 8-byte Folded Reload
-	adcq	40(%rsp), %r12          ## 8-byte Folded Reload
-	adcq	(%rsp), %rbx            ## 8-byte Folded Reload
-	movq	%rbx, %r13
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	48(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	88(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	88(%rsp), %r14
+	callq	_mulPv512x64bmi2
+	movq	496(%rsp), %r15
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	addq	432(%rsp), %rcx
+	movq	64(%rsp), %rax                  ## 8-byte Reload
+	adcq	440(%rsp), %rax
+	movq	%rax, 64(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %rbx                  ## 8-byte Reload
+	adcq	448(%rsp), %rbx
+	adcq	456(%rsp), %r13
+	movq	%r13, 24(%rsp)                  ## 8-byte Spill
+	adcq	464(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbp                   ## 8-byte Reload
+	adcq	472(%rsp), %rbp
+	movq	(%rsp), %rax                    ## 8-byte Reload
+	adcq	480(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	adcq	488(%rsp), %r12
+	movq	32(%rsp), %r14                  ## 8-byte Reload
+	movq	%rcx, 16(%r14)
+	adcq	$0, %r15
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	360(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	424(%rsp), %r13
+	movq	64(%rsp), %rcx                  ## 8-byte Reload
+	addq	360(%rsp), %rcx
+	adcq	368(%rsp), %rbx
+	movq	%rbx, 48(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	376(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	384(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rbx                    ## 8-byte Reload
+	adcq	400(%rsp), %rbx
+	adcq	408(%rsp), %r12
+	adcq	416(%rsp), %r15
+	movq	%rcx, 24(%r14)
+	adcq	$0, %r13
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	288(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	352(%rsp), %r14
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	addq	288(%rsp), %rcx
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	296(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	304(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbp                   ## 8-byte Reload
+	adcq	312(%rsp), %rbp
+	adcq	320(%rsp), %rbx
+	movq	%rbx, (%rsp)                    ## 8-byte Spill
+	adcq	328(%rsp), %r12
+	adcq	336(%rsp), %r15
+	adcq	344(%rsp), %r13
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	movq	%rcx, 32(%rax)
+	adcq	$0, %r14
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	216(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	280(%rsp), %rbx
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	addq	216(%rsp), %rax
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	224(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	adcq	232(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	adcq	248(%rsp), %r12
+	adcq	256(%rsp), %r15
+	adcq	264(%rsp), %r13
+	adcq	272(%rsp), %r14
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, 40(%rcx)
+	adcq	$0, %rbx
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	144(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	208(%rsp), %rbp
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	addq	144(%rsp), %rax
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	152(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	160(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	adcq	168(%rsp), %r12
+	adcq	176(%rsp), %r15
+	adcq	184(%rsp), %r13
+	adcq	192(%rsp), %r14
+	adcq	200(%rsp), %rbx
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, 48(%rcx)
+	adcq	$0, %rbp
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	56(%rax), %rdx
+	leaq	72(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	136(%rsp), %rax
+	movq	8(%rsp), %rsi                   ## 8-byte Reload
+	addq	72(%rsp), %rsi
+	movq	(%rsp), %rdx                    ## 8-byte Reload
+	adcq	80(%rsp), %rdx
+	adcq	88(%rsp), %r12
 	adcq	96(%rsp), %r15
-	movq	160(%rsp), %r8
-	adcq	104(%rsp), %rbp
-	movq	152(%rsp), %r9
-	movq	144(%rsp), %rdx
-	movq	136(%rsp), %rsi
-	movq	128(%rsp), %rdi
-	movq	120(%rsp), %rbx
-	movq	112(%rsp), %rax
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	movq	%r14, 64(%rcx)
-	movq	%r15, 72(%rcx)
-	adcq	%r12, %rax
-	movq	%rbp, 80(%rcx)
-	movq	%rax, 88(%rcx)
-	adcq	%r13, %rbx
-	movq	%rbx, 96(%rcx)
-	adcq	(%rsp), %rdi            ## 8-byte Folded Reload
-	movq	%rdi, 104(%rcx)
-	adcq	8(%rsp), %rsi           ## 8-byte Folded Reload
-	movq	%rsi, 112(%rcx)
-	adcq	16(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 120(%rcx)
-	adcq	48(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 128(%rcx)
-	adcq	$0, %r8
-	movq	%r8, 136(%rcx)
-	addq	$808, %rsp              ## imm = 0x328
+	adcq	104(%rsp), %r13
+	adcq	112(%rsp), %r14
+	adcq	120(%rsp), %rbx
+	adcq	128(%rsp), %rbp
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rbp, 112(%rcx)
+	movq	%rbx, 104(%rcx)
+	movq	%r14, 96(%rcx)
+	movq	%r13, 88(%rcx)
+	movq	%r15, 80(%rcx)
+	movq	%r12, 72(%rcx)
+	movq	%rdx, 64(%rcx)
+	movq	%rsi, 56(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 120(%rcx)
+	addq	$648, %rsp                      ## imm = 0x288
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -11306,295 +4532,658 @@ _mcl_fpDbl_mulPre9Lbmi2:                ## @mcl_fpDbl_mulPre9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sqrPre9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sqrPre8Lbmi2         ## -- Begin function mcl_fpDbl_sqrPre8Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre9Lbmi2:                ## @mcl_fpDbl_sqrPre9Lbmi2
-## BB#0:
+_mcl_fpDbl_sqrPre8Lbmi2:                ## @mcl_fpDbl_sqrPre8Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$808, %rsp              ## imm = 0x328
+	subq	$648, %rsp                      ## imm = 0x288
 	movq	%rsi, %r15
-	movq	%rdi, %r14
-	movq	(%r15), %rdx
-	leaq	728(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	800(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	792(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	784(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	776(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	768(%rsp), %rax
-	movq	%rax, 56(%rsp)          ## 8-byte Spill
-	movq	760(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	752(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	744(%rsp), %rax
-	movq	%rax, 80(%rsp)          ## 8-byte Spill
-	movq	728(%rsp), %rax
-	movq	736(%rsp), %r12
-	movq	%rax, (%r14)
-	movq	%r14, 72(%rsp)          ## 8-byte Spill
+	movq	%rdi, %r12
+	movq	%rdi, 56(%rsp)                  ## 8-byte Spill
+	movq	(%rsi), %rdx
+	leaq	576(%rsp), %rdi
+	callq	_mulPv512x64bmi2
+	movq	640(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	632(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	624(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	movq	616(%rsp), %rax
+	movq	%rax, 40(%rsp)                  ## 8-byte Spill
+	movq	608(%rsp), %r13
+	movq	600(%rsp), %rbp
+	movq	592(%rsp), %rbx
+	movq	576(%rsp), %rax
+	movq	584(%rsp), %r14
+	movq	%rax, (%r12)
 	movq	8(%r15), %rdx
-	leaq	648(%rsp), %rdi
+	leaq	504(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	l_mulPv576x64
-	movq	720(%rsp), %r8
-	movq	712(%rsp), %rcx
-	movq	704(%rsp), %rdx
-	movq	696(%rsp), %rsi
-	movq	688(%rsp), %rdi
-	movq	680(%rsp), %rbp
-	addq	648(%rsp), %r12
-	movq	672(%rsp), %rax
-	movq	656(%rsp), %rbx
-	movq	664(%rsp), %r13
-	movq	%r12, 8(%r14)
-	adcq	80(%rsp), %rbx          ## 8-byte Folded Reload
-	adcq	40(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	%r15, 64(%rsp)          ## 8-byte Spill
+	callq	_mulPv512x64bmi2
+	movq	568(%rsp), %rax
+	addq	504(%rsp), %r14
+	adcq	512(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	adcq	520(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  ## 8-byte Spill
+	adcq	528(%rsp), %r13
+	movq	%r13, %rbx
+	movq	40(%rsp), %r13                  ## 8-byte Reload
+	adcq	536(%rsp), %r13
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	adcq	552(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %r12                  ## 8-byte Reload
+	adcq	560(%rsp), %r12
+	movq	56(%rsp), %rcx                  ## 8-byte Reload
+	movq	%r14, 8(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
 	movq	16(%r15), %rdx
-	leaq	568(%rsp), %rdi
+	leaq	432(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	l_mulPv576x64
-	movq	640(%rsp), %r8
-	movq	632(%rsp), %rcx
-	movq	624(%rsp), %rdx
-	movq	616(%rsp), %rsi
-	movq	608(%rsp), %rdi
-	movq	600(%rsp), %rbp
-	addq	568(%rsp), %rbx
-	movq	592(%rsp), %rax
-	movq	576(%rsp), %r14
-	movq	584(%rsp), %r12
-	movq	72(%rsp), %r15          ## 8-byte Reload
-	movq	%rbx, 16(%r15)
-	adcq	%r13, %r14
-	adcq	40(%rsp), %r12          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	24(%rsi), %rdx
-	leaq	488(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	560(%rsp), %r8
-	movq	552(%rsp), %rcx
-	movq	544(%rsp), %rdx
-	movq	536(%rsp), %rsi
-	movq	528(%rsp), %rdi
-	movq	520(%rsp), %rbp
-	addq	488(%rsp), %r14
-	movq	512(%rsp), %rax
-	movq	496(%rsp), %rbx
-	movq	504(%rsp), %r13
-	movq	%r14, 24(%r15)
-	adcq	%r12, %rbx
-	adcq	40(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	32(%rsi), %rdx
-	leaq	408(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	480(%rsp), %r8
-	movq	472(%rsp), %rcx
-	movq	464(%rsp), %rdx
-	movq	456(%rsp), %rsi
-	movq	448(%rsp), %rdi
-	movq	440(%rsp), %rbp
-	addq	408(%rsp), %rbx
-	movq	432(%rsp), %rax
-	movq	416(%rsp), %r14
-	movq	424(%rsp), %r12
-	movq	%rbx, 32(%r15)
-	adcq	%r13, %r14
-	adcq	40(%rsp), %r12          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	40(%rsi), %rdx
-	leaq	328(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	400(%rsp), %r8
-	movq	392(%rsp), %rcx
-	movq	384(%rsp), %rdx
-	movq	376(%rsp), %rsi
-	movq	368(%rsp), %rdi
-	movq	360(%rsp), %rbp
-	addq	328(%rsp), %r14
-	movq	352(%rsp), %rax
-	movq	336(%rsp), %rbx
-	movq	344(%rsp), %r13
-	movq	%r14, 40(%r15)
-	adcq	%r12, %rbx
-	adcq	40(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	48(%rsi), %rdx
-	leaq	248(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	320(%rsp), %r8
-	movq	312(%rsp), %rcx
-	movq	304(%rsp), %rdx
-	movq	296(%rsp), %rsi
-	movq	288(%rsp), %rdi
+	callq	_mulPv512x64bmi2
+	movq	496(%rsp), %rax
+	movq	8(%rsp), %rdx                   ## 8-byte Reload
+	addq	432(%rsp), %rdx
+	movq	64(%rsp), %rcx                  ## 8-byte Reload
+	adcq	440(%rsp), %rcx
+	movq	%rcx, 64(%rsp)                  ## 8-byte Spill
+	adcq	448(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  ## 8-byte Spill
+	adcq	456(%rsp), %r13
+	movq	48(%rsp), %rbx                  ## 8-byte Reload
+	adcq	464(%rsp), %rbx
+	movq	32(%rsp), %rbp                  ## 8-byte Reload
+	adcq	472(%rsp), %rbp
+	adcq	480(%rsp), %r12
+	movq	%r12, 24(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	488(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	movq	56(%rsp), %r12                  ## 8-byte Reload
+	movq	%rdx, 16(%r12)
+	adcq	$0, %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	24(%r15), %rdx
+	leaq	360(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	_mulPv512x64bmi2
+	movq	424(%rsp), %r14
+	movq	64(%rsp), %rax                  ## 8-byte Reload
+	addq	360(%rsp), %rax
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
+	adcq	368(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	adcq	376(%rsp), %r13
+	adcq	384(%rsp), %rbx
+	movq	%rbx, 48(%rsp)                  ## 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, %rbx
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	400(%rsp), %rbp
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	408(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	416(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	movq	%rax, 24(%r12)
+	adcq	$0, %r14
+	movq	32(%r15), %rdx
+	leaq	288(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	_mulPv512x64bmi2
+	movq	352(%rsp), %r12
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	addq	288(%rsp), %rax
+	adcq	296(%rsp), %r13
+	movq	%r13, 40(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %r13                  ## 8-byte Reload
+	adcq	304(%rsp), %r13
+	adcq	312(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  ## 8-byte Spill
+	adcq	320(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rbx                  ## 8-byte Reload
+	adcq	328(%rsp), %rbx
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	336(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	adcq	344(%rsp), %r14
+	movq	56(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, 32(%rcx)
+	adcq	$0, %r12
+	movq	40(%r15), %rdx
+	leaq	216(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	_mulPv512x64bmi2
 	movq	280(%rsp), %rbp
-	addq	248(%rsp), %rbx
-	movq	272(%rsp), %rax
-	movq	256(%rsp), %r12
-	movq	264(%rsp), %r14
-	movq	%rbx, 48(%r15)
-	adcq	%r13, %r12
-	adcq	40(%rsp), %r14          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	56(%rsi), %rdx
-	leaq	168(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	240(%rsp), %r8
-	movq	232(%rsp), %rdx
-	movq	224(%rsp), %rsi
-	movq	216(%rsp), %rdi
-	movq	208(%rsp), %rbx
-	movq	200(%rsp), %rcx
-	addq	168(%rsp), %r12
-	movq	192(%rsp), %r15
-	movq	176(%rsp), %r13
-	movq	184(%rsp), %rbp
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	movq	%r12, 56(%rax)
-	adcq	%r14, %r13
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	adcq	48(%rsp), %r15          ## 8-byte Folded Reload
-	adcq	56(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, %r12
-	adcq	8(%rsp), %rbx           ## 8-byte Folded Reload
-	movq	%rbx, %r14
-	adcq	16(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	64(%rsi), %rdx
-	leaq	88(%rsp), %rdi
-	callq	l_mulPv576x64
-	addq	88(%rsp), %r13
-	adcq	96(%rsp), %rbp
-	movq	160(%rsp), %r8
-	adcq	104(%rsp), %r15
-	movq	152(%rsp), %r9
-	movq	144(%rsp), %rdx
-	movq	136(%rsp), %rsi
-	movq	128(%rsp), %rdi
-	movq	120(%rsp), %rbx
-	movq	112(%rsp), %rax
-	movq	72(%rsp), %rcx          ## 8-byte Reload
-	movq	%r13, 64(%rcx)
-	movq	%rbp, 72(%rcx)
-	adcq	%r12, %rax
-	movq	%r15, 80(%rcx)
-	movq	%rax, 88(%rcx)
-	adcq	%r14, %rbx
-	movq	%rbx, 96(%rcx)
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 104(%rcx)
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 112(%rcx)
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 120(%rcx)
-	adcq	32(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 128(%rcx)
-	adcq	$0, %r8
-	movq	%r8, 136(%rcx)
-	addq	$808, %rsp              ## imm = 0x328
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	addq	216(%rsp), %rax
+	adcq	224(%rsp), %r13
+	movq	%r13, 48(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	adcq	232(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	adcq	248(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	256(%rsp), %rbx
+	adcq	264(%rsp), %r14
+	adcq	272(%rsp), %r12
+	movq	56(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, 40(%rcx)
+	adcq	$0, %rbp
+	movq	48(%r15), %rdx
+	leaq	144(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	_mulPv512x64bmi2
+	movq	208(%rsp), %r13
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	addq	144(%rsp), %rcx
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	adcq	152(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	160(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	168(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	adcq	176(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	adcq	184(%rsp), %r14
+	adcq	192(%rsp), %r12
+	adcq	200(%rsp), %rbp
+	movq	56(%rsp), %rax                  ## 8-byte Reload
+	movq	%rcx, 48(%rax)
+	adcq	$0, %r13
+	movq	56(%r15), %rdx
+	leaq	72(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	_mulPv512x64bmi2
+	movq	136(%rsp), %rax
+	movq	32(%rsp), %rsi                  ## 8-byte Reload
+	addq	72(%rsp), %rsi
+	movq	24(%rsp), %rdi                  ## 8-byte Reload
+	adcq	80(%rsp), %rdi
+	movq	16(%rsp), %rbx                  ## 8-byte Reload
+	adcq	88(%rsp), %rbx
+	movq	8(%rsp), %rdx                   ## 8-byte Reload
+	adcq	96(%rsp), %rdx
+	adcq	104(%rsp), %r14
+	adcq	112(%rsp), %r12
+	adcq	120(%rsp), %rbp
+	adcq	128(%rsp), %r13
+	movq	56(%rsp), %rcx                  ## 8-byte Reload
+	movq	%r13, 112(%rcx)
+	movq	%rbp, 104(%rcx)
+	movq	%r12, 96(%rcx)
+	movq	%r14, 88(%rcx)
+	movq	%rdx, 80(%rcx)
+	movq	%rbx, 72(%rcx)
+	movq	%rdi, 64(%rcx)
+	movq	%rsi, 56(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 120(%rcx)
+	addq	$648, %rsp                      ## imm = 0x288
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_mont8Lbmi2              ## -- Begin function mcl_fp_mont8Lbmi2
+	.p2align	4, 0x90
+_mcl_fp_mont8Lbmi2:                     ## @mcl_fp_mont8Lbmi2
+## %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	subq	$1256, %rsp                     ## imm = 0x4E8
+	movq	%rcx, %r13
+	movq	%rdx, 80(%rsp)                  ## 8-byte Spill
+	movq	%rsi, 88(%rsp)                  ## 8-byte Spill
+	movq	%rdi, 96(%rsp)                  ## 8-byte Spill
+	movq	-8(%rcx), %rbx
+	movq	%rbx, 72(%rsp)                  ## 8-byte Spill
+	movq	%rcx, 56(%rsp)                  ## 8-byte Spill
+	movq	(%rdx), %rdx
+	leaq	1184(%rsp), %rdi
+	callq	_mulPv512x64bmi2
+	movq	1184(%rsp), %r15
+	movq	1192(%rsp), %r12
+	movq	%rbx, %rdx
+	imulq	%r15, %rdx
+	movq	1248(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	1240(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	1232(%rsp), %r14
+	movq	1224(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	movq	1216(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	1208(%rsp), %rbx
+	movq	1200(%rsp), %rbp
+	leaq	1112(%rsp), %rdi
+	movq	%r13, %rsi
+	callq	_mulPv512x64bmi2
+	addq	1112(%rsp), %r15
+	adcq	1120(%rsp), %r12
+	adcq	1128(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  ## 8-byte Spill
+	adcq	1136(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	1144(%rsp), %rbp
+	movq	(%rsp), %r15                    ## 8-byte Reload
+	adcq	1152(%rsp), %r15
+	adcq	1160(%rsp), %r14
+	movq	%r14, 48(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %r13                  ## 8-byte Reload
+	adcq	1168(%rsp), %r13
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	1176(%rsp), %rbx
+	setb	%r14b
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	8(%rax), %rdx
+	leaq	1040(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movzbl	%r14b, %ecx
+	addq	1040(%rsp), %r12
+	movq	64(%rsp), %r14                  ## 8-byte Reload
+	adcq	1048(%rsp), %r14
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	adcq	1056(%rsp), %rax
+	movq	%rax, 40(%rsp)                  ## 8-byte Spill
+	adcq	1064(%rsp), %rbp
+	adcq	1072(%rsp), %r15
+	movq	%r15, (%rsp)                    ## 8-byte Spill
+	movq	48(%rsp), %rax                  ## 8-byte Reload
+	adcq	1080(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	adcq	1088(%rsp), %r13
+	movq	%r13, 16(%rsp)                  ## 8-byte Spill
+	adcq	1096(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	adcq	1104(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	setb	%r15b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r12, %rdx
+	leaq	968(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movzbl	%r15b, %r15d
+	addq	968(%rsp), %r12
+	adcq	976(%rsp), %r14
+	movq	%r14, 64(%rsp)                  ## 8-byte Spill
+	movq	40(%rsp), %r13                  ## 8-byte Reload
+	adcq	984(%rsp), %r13
+	adcq	992(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	(%rsp), %r12                    ## 8-byte Reload
+	adcq	1000(%rsp), %r12
+	movq	48(%rsp), %r14                  ## 8-byte Reload
+	adcq	1008(%rsp), %r14
+	movq	16(%rsp), %rbx                  ## 8-byte Reload
+	adcq	1016(%rsp), %rbx
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	1024(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	32(%rsp), %rbp                  ## 8-byte Reload
+	adcq	1032(%rsp), %rbp
+	adcq	$0, %r15
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	896(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	64(%rsp), %rax                  ## 8-byte Reload
+	addq	896(%rsp), %rax
+	adcq	904(%rsp), %r13
+	movq	%r13, 40(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %r13                  ## 8-byte Reload
+	adcq	912(%rsp), %r13
+	adcq	920(%rsp), %r12
+	adcq	928(%rsp), %r14
+	movq	%r14, 48(%rsp)                  ## 8-byte Spill
+	adcq	936(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	944(%rsp), %rbx
+	adcq	952(%rsp), %rbp
+	movq	%rbp, 32(%rsp)                  ## 8-byte Spill
+	adcq	960(%rsp), %r15
+	setb	%r14b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	824(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movzbl	%r14b, %eax
+	addq	824(%rsp), %rbp
+	movq	40(%rsp), %r14                  ## 8-byte Reload
+	adcq	832(%rsp), %r14
+	adcq	840(%rsp), %r13
+	movq	%r13, 24(%rsp)                  ## 8-byte Spill
+	adcq	848(%rsp), %r12
+	movq	%r12, (%rsp)                    ## 8-byte Spill
+	movq	48(%rsp), %r12                  ## 8-byte Reload
+	adcq	856(%rsp), %r12
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	864(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	movq	%rbx, %rbp
+	adcq	872(%rsp), %rbp
+	movq	32(%rsp), %r13                  ## 8-byte Reload
+	adcq	880(%rsp), %r13
+	adcq	888(%rsp), %r15
+	movq	%rax, %rbx
+	adcq	$0, %rbx
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	752(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	%r14, %rax
+	addq	752(%rsp), %rax
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	760(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	movq	(%rsp), %r14                    ## 8-byte Reload
+	adcq	768(%rsp), %r14
+	adcq	776(%rsp), %r12
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	784(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	adcq	792(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	adcq	800(%rsp), %r13
+	movq	%r13, 32(%rsp)                  ## 8-byte Spill
+	adcq	808(%rsp), %r15
+	movq	%r15, %r13
+	adcq	816(%rsp), %rbx
+	setb	%r15b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	680(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movzbl	%r15b, %eax
+	addq	680(%rsp), %rbp
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	688(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	adcq	696(%rsp), %r14
+	movq	%r14, (%rsp)                    ## 8-byte Spill
+	adcq	704(%rsp), %r12
+	movq	16(%rsp), %rbp                  ## 8-byte Reload
+	adcq	712(%rsp), %rbp
+	movq	8(%rsp), %r14                   ## 8-byte Reload
+	adcq	720(%rsp), %r14
+	movq	32(%rsp), %r15                  ## 8-byte Reload
+	adcq	728(%rsp), %r15
+	adcq	736(%rsp), %r13
+	movq	%r13, 40(%rsp)                  ## 8-byte Spill
+	adcq	744(%rsp), %rbx
+	adcq	$0, %rax
+	movq	%rax, %r13
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	608(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	addq	608(%rsp), %rax
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	616(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	adcq	624(%rsp), %r12
+	adcq	632(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  ## 8-byte Spill
+	adcq	640(%rsp), %r14
+	movq	%r14, 8(%rsp)                   ## 8-byte Spill
+	adcq	648(%rsp), %r15
+	movq	%r15, 32(%rsp)                  ## 8-byte Spill
+	movq	40(%rsp), %rbp                  ## 8-byte Reload
+	adcq	656(%rsp), %rbp
+	adcq	664(%rsp), %rbx
+	movq	%rbx, %r15
+	adcq	672(%rsp), %r13
+	setb	%r14b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	536(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movzbl	%r14b, %eax
+	addq	536(%rsp), %rbx
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	adcq	552(%rsp), %r12
+	movq	%r12, 48(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %r12                  ## 8-byte Reload
+	adcq	560(%rsp), %r12
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	568(%rsp), %rbx
+	movq	32(%rsp), %r14                  ## 8-byte Reload
+	adcq	576(%rsp), %r14
+	adcq	584(%rsp), %rbp
+	adcq	592(%rsp), %r15
+	movq	%r15, 64(%rsp)                  ## 8-byte Spill
+	adcq	600(%rsp), %r13
+	movq	%r13, 24(%rsp)                  ## 8-byte Spill
+	adcq	$0, %rax
+	movq	%rax, %r13
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	464(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	(%rsp), %rax                    ## 8-byte Reload
+	addq	464(%rsp), %rax
+	movq	48(%rsp), %r15                  ## 8-byte Reload
+	adcq	472(%rsp), %r15
+	adcq	480(%rsp), %r12
+	movq	%r12, 16(%rsp)                  ## 8-byte Spill
+	adcq	488(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	adcq	496(%rsp), %r14
+	movq	%r14, %r12
+	adcq	504(%rsp), %rbp
+	movq	64(%rsp), %rcx                  ## 8-byte Reload
+	adcq	512(%rsp), %rcx
+	movq	%rcx, 64(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	520(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	adcq	528(%rsp), %r13
+	movq	%r13, (%rsp)                    ## 8-byte Spill
+	setb	%r14b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	392(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movzbl	%r14b, %eax
+	addq	392(%rsp), %rbx
+	adcq	400(%rsp), %r15
+	movq	%r15, 48(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rbx                  ## 8-byte Reload
+	adcq	408(%rsp), %rbx
+	movq	8(%rsp), %r14                   ## 8-byte Reload
+	adcq	416(%rsp), %r14
+	adcq	424(%rsp), %r12
+	movq	%r12, 32(%rsp)                  ## 8-byte Spill
+	adcq	432(%rsp), %rbp
+	movq	%rbp, 40(%rsp)                  ## 8-byte Spill
+	movq	64(%rsp), %rbp                  ## 8-byte Reload
+	adcq	440(%rsp), %rbp
+	movq	24(%rsp), %r13                  ## 8-byte Reload
+	adcq	448(%rsp), %r13
+	movq	(%rsp), %r12                    ## 8-byte Reload
+	adcq	456(%rsp), %r12
+	movq	%rax, %r15
+	adcq	$0, %r15
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	320(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	leaq	248(%rsp), %rdi
+	movq	48(%rsp), %rax                  ## 8-byte Reload
+	addq	320(%rsp), %rax
+	adcq	328(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  ## 8-byte Spill
+	adcq	336(%rsp), %r14
+	movq	%r14, 8(%rsp)                   ## 8-byte Spill
+	movq	32(%rsp), %rbx                  ## 8-byte Reload
+	adcq	344(%rsp), %rbx
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
+	adcq	352(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	adcq	360(%rsp), %rbp
+	adcq	368(%rsp), %r13
+	adcq	376(%rsp), %r12
+	movq	%r12, (%rsp)                    ## 8-byte Spill
+	adcq	384(%rsp), %r15
+	movq	%r15, 48(%rsp)                  ## 8-byte Spill
+	setb	%r12b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %r14
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movzbl	%r12b, %r12d
+	addq	248(%rsp), %r14
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	256(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %r15                   ## 8-byte Reload
+	adcq	264(%rsp), %r15
+	adcq	272(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  ## 8-byte Spill
+	movq	40(%rsp), %rbx                  ## 8-byte Reload
+	adcq	280(%rsp), %rbx
+	adcq	288(%rsp), %rbp
+	adcq	296(%rsp), %r13
+	movq	(%rsp), %r14                    ## 8-byte Reload
+	adcq	304(%rsp), %r14
+	movq	48(%rsp), %rax                  ## 8-byte Reload
+	adcq	312(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	adcq	$0, %r12
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	56(%rax), %rdx
+	leaq	176(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	addq	176(%rsp), %rax
+	adcq	184(%rsp), %r15
+	movq	%r15, 8(%rsp)                   ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	adcq	192(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	adcq	200(%rsp), %rbx
+	adcq	208(%rsp), %rbp
+	adcq	216(%rsp), %r13
+	movq	%r13, 24(%rsp)                  ## 8-byte Spill
+	adcq	224(%rsp), %r14
+	movq	%r14, (%rsp)                    ## 8-byte Spill
+	movq	48(%rsp), %r15                  ## 8-byte Reload
+	adcq	232(%rsp), %r15
+	adcq	240(%rsp), %r12
+	setb	%r14b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %r13
+	leaq	104(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movzbl	%r14b, %r9d
+	addq	104(%rsp), %r13
+	movq	8(%rsp), %r11                   ## 8-byte Reload
+	adcq	112(%rsp), %r11
+	movq	%r11, 8(%rsp)                   ## 8-byte Spill
+	movq	32(%rsp), %r10                  ## 8-byte Reload
+	adcq	120(%rsp), %r10
+	movq	%r10, 32(%rsp)                  ## 8-byte Spill
+	movq	%rbx, %r8
+	adcq	128(%rsp), %r8
+	movq	%r8, 40(%rsp)                   ## 8-byte Spill
+	movq	%rbp, %r13
+	adcq	136(%rsp), %r13
+	movq	24(%rsp), %r14                  ## 8-byte Reload
+	adcq	144(%rsp), %r14
+	movq	(%rsp), %rsi                    ## 8-byte Reload
+	adcq	152(%rsp), %rsi
+	adcq	160(%rsp), %r15
+	adcq	168(%rsp), %r12
+	adcq	$0, %r9
+	movq	56(%rsp), %rcx                  ## 8-byte Reload
+	subq	(%rcx), %r11
+	sbbq	8(%rcx), %r10
+	sbbq	16(%rcx), %r8
+	movq	%r13, %rdi
+	sbbq	24(%rcx), %rdi
+	movq	%r14, %rbx
+	sbbq	32(%rcx), %rbx
+	movq	%rsi, %rbp
+	sbbq	40(%rcx), %rbp
+	movq	%r15, %rax
+	sbbq	48(%rcx), %rax
+	movq	%rcx, %rdx
+	movq	%r12, %rcx
+	sbbq	56(%rdx), %rcx
+	sbbq	$0, %r9
+	testb	$1, %r9b
+	cmovneq	%r12, %rcx
+	movq	96(%rsp), %rdx                  ## 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovneq	%r15, %rax
+	movq	%rax, 48(%rdx)
+	cmovneq	%rsi, %rbp
+	movq	%rbp, 40(%rdx)
+	cmovneq	%r14, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovneq	%r13, %rdi
+	movq	%rdi, 24(%rdx)
+	cmovneq	40(%rsp), %r8                   ## 8-byte Folded Reload
+	movq	%r8, 16(%rdx)
+	cmovneq	32(%rsp), %r10                  ## 8-byte Folded Reload
+	movq	%r10, 8(%rdx)
+	cmovneq	8(%rsp), %r11                   ## 8-byte Folded Reload
+	movq	%r11, (%rdx)
+	addq	$1256, %rsp                     ## imm = 0x4E8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -11602,556 +5191,411 @@ _mcl_fpDbl_sqrPre9Lbmi2:                ## @mcl_fpDbl_sqrPre9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_mont9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montNF8Lbmi2            ## -- Begin function mcl_fp_montNF8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_mont9Lbmi2:                     ## @mcl_fp_mont9Lbmi2
-## BB#0:
+_mcl_fp_montNF8Lbmi2:                   ## @mcl_fp_montNF8Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$1560, %rsp             ## imm = 0x618
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	movq	%rdx, 96(%rsp)          ## 8-byte Spill
-	movq	%rsi, 88(%rsp)          ## 8-byte Spill
-	movq	%rdi, 112(%rsp)         ## 8-byte Spill
+	subq	$1256, %rsp                     ## imm = 0x4E8
+	movq	%rcx, %rbp
+	movq	%rdx, 88(%rsp)                  ## 8-byte Spill
+	movq	%rsi, 80(%rsp)                  ## 8-byte Spill
+	movq	%rdi, 96(%rsp)                  ## 8-byte Spill
 	movq	-8(%rcx), %rbx
-	movq	%rbx, 80(%rsp)          ## 8-byte Spill
+	movq	%rbx, 64(%rsp)                  ## 8-byte Spill
+	movq	%rcx, 72(%rsp)                  ## 8-byte Spill
 	movq	(%rdx), %rdx
-	leaq	1480(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	1480(%rsp), %r14
-	movq	1488(%rsp), %r15
-	movq	%r14, %rdx
-	imulq	%rbx, %rdx
-	movq	1552(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	1544(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	1536(%rsp), %rax
-	movq	%rax, 56(%rsp)          ## 8-byte Spill
-	movq	1528(%rsp), %r12
-	movq	1520(%rsp), %r13
-	movq	1512(%rsp), %rbx
-	movq	1504(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	1496(%rsp), %rbp
-	leaq	1400(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	1400(%rsp), %r14
-	adcq	1408(%rsp), %r15
-	adcq	1416(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	1424(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	adcq	1432(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	adcq	1440(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	adcq	1448(%rsp), %r12
-	movq	%r12, 48(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %rbx          ## 8-byte Reload
-	adcq	1456(%rsp), %rbx
-	movq	40(%rsp), %r14          ## 8-byte Reload
-	adcq	1464(%rsp), %r14
-	movq	24(%rsp), %r13          ## 8-byte Reload
-	adcq	1472(%rsp), %r13
-	sbbq	%rbp, %rbp
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1320(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %ebp
-	addq	1320(%rsp), %r15
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	1328(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	1336(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %r12          ## 8-byte Reload
-	adcq	1344(%rsp), %r12
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	adcq	1352(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	adcq	1360(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	adcq	1368(%rsp), %rbx
-	adcq	1376(%rsp), %r14
-	movq	%r14, 40(%rsp)          ## 8-byte Spill
-	adcq	1384(%rsp), %r13
-	movq	%r13, 24(%rsp)          ## 8-byte Spill
-	adcq	1392(%rsp), %rbp
-	sbbq	%r14, %r14
-	movq	%r15, %rdx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	1240(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	%r14, %rax
-	andl	$1, %eax
-	addq	1240(%rsp), %r15
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	1248(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	1256(%rsp), %r14
-	adcq	1264(%rsp), %r12
-	movq	%r12, 32(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	1272(%rsp), %r12
-	movq	48(%rsp), %r13          ## 8-byte Reload
-	adcq	1280(%rsp), %r13
-	adcq	1288(%rsp), %rbx
-	movq	%rbx, 56(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %r15          ## 8-byte Reload
-	adcq	1296(%rsp), %r15
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	1304(%rsp), %rbx
-	adcq	1312(%rsp), %rbp
-	adcq	$0, %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	1160(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	addq	1160(%rsp), %rax
-	adcq	1168(%rsp), %r14
-	movq	%r14, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %r14          ## 8-byte Reload
-	adcq	1176(%rsp), %r14
-	adcq	1184(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	movq	%r13, %r12
-	adcq	1192(%rsp), %r12
-	movq	56(%rsp), %rcx          ## 8-byte Reload
-	adcq	1200(%rsp), %rcx
-	movq	%rcx, 56(%rsp)          ## 8-byte Spill
-	adcq	1208(%rsp), %r15
-	movq	%r15, %r13
-	adcq	1216(%rsp), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	adcq	1224(%rsp), %rbp
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	adcq	1232(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	1080(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	%r15, %rax
-	andl	$1, %eax
-	addq	1080(%rsp), %rbx
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	adcq	1088(%rsp), %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	%r14, %r15
-	adcq	1096(%rsp), %r15
-	movq	16(%rsp), %r14          ## 8-byte Reload
-	adcq	1104(%rsp), %r14
-	movq	%r12, %rbx
-	adcq	1112(%rsp), %rbx
-	movq	56(%rsp), %rcx          ## 8-byte Reload
-	adcq	1120(%rsp), %rcx
-	movq	%rcx, 56(%rsp)          ## 8-byte Spill
+	leaq	1184(%rsp), %rdi
+	callq	_mulPv512x64bmi2
+	movq	1184(%rsp), %r15
+	movq	1192(%rsp), %r12
+	movq	%rbx, %rdx
+	imulq	%r15, %rdx
+	movq	1248(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	1240(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	1232(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	1224(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	1216(%rsp), %r14
+	movq	1208(%rsp), %rbx
+	movq	1200(%rsp), %r13
+	leaq	1112(%rsp), %rdi
+	movq	%rbp, %rsi
+	callq	_mulPv512x64bmi2
+	addq	1112(%rsp), %r15
+	adcq	1120(%rsp), %r12
 	adcq	1128(%rsp), %r13
-	movq	%r13, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %r13          ## 8-byte Reload
-	adcq	1136(%rsp), %r13
-	adcq	1144(%rsp), %rbp
-	movq	64(%rsp), %r12          ## 8-byte Reload
-	adcq	1152(%rsp), %r12
-	adcq	$0, %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	1000(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	(%rsp), %rax            ## 8-byte Reload
-	addq	1000(%rsp), %rax
-	adcq	1008(%rsp), %r15
-	movq	%r15, 32(%rsp)          ## 8-byte Spill
-	adcq	1016(%rsp), %r14
+	adcq	1136(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  ## 8-byte Spill
 	movq	%r14, %r15
-	adcq	1024(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %r14          ## 8-byte Reload
-	adcq	1032(%rsp), %r14
-	movq	40(%rsp), %rcx          ## 8-byte Reload
-	adcq	1040(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
+	adcq	1144(%rsp), %r15
+	movq	16(%rsp), %rbx                  ## 8-byte Reload
+	adcq	1152(%rsp), %rbx
+	movq	32(%rsp), %r14                  ## 8-byte Reload
+	adcq	1160(%rsp), %r14
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	1168(%rsp), %rbp
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	1176(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	8(%rax), %rdx
+	leaq	1040(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	1104(%rsp), %rcx
+	addq	1040(%rsp), %r12
 	adcq	1048(%rsp), %r13
-	movq	%r13, 24(%rsp)          ## 8-byte Spill
-	adcq	1056(%rsp), %rbp
-	adcq	1064(%rsp), %r12
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	1072(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	920(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	920(%rsp), %r13
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	928(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	adcq	936(%rsp), %r15
-	movq	%r15, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %r15          ## 8-byte Reload
-	adcq	944(%rsp), %r15
-	movq	%r14, %r13
-	adcq	952(%rsp), %r13
-	movq	40(%rsp), %r14          ## 8-byte Reload
-	adcq	960(%rsp), %r14
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	968(%rsp), %rbx
-	adcq	976(%rsp), %rbp
-	adcq	984(%rsp), %r12
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	992(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	840(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	addq	840(%rsp), %rax
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	848(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	856(%rsp), %r15
-	adcq	864(%rsp), %r13
-	movq	%r13, 56(%rsp)          ## 8-byte Spill
-	adcq	872(%rsp), %r14
-	movq	%r14, 40(%rsp)          ## 8-byte Spill
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	adcq	1056(%rsp), %rax
+	movq	%rax, 40(%rsp)                  ## 8-byte Spill
+	adcq	1064(%rsp), %r15
+	adcq	1072(%rsp), %rbx
+	adcq	1080(%rsp), %r14
+	movq	%r14, 32(%rsp)                  ## 8-byte Spill
+	adcq	1088(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %r14                   ## 8-byte Reload
+	adcq	1096(%rsp), %r14
+	adcq	$0, %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r12, %rdx
+	leaq	968(%rsp), %rdi
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	addq	968(%rsp), %r12
+	adcq	976(%rsp), %r13
+	movq	40(%rsp), %rbp                  ## 8-byte Reload
+	adcq	984(%rsp), %rbp
+	adcq	992(%rsp), %r15
+	movq	%r15, 56(%rsp)                  ## 8-byte Spill
+	adcq	1000(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %r15                  ## 8-byte Reload
+	adcq	1008(%rsp), %r15
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	1016(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	adcq	1024(%rsp), %r14
+	movq	%r14, 8(%rsp)                   ## 8-byte Spill
+	movq	48(%rsp), %rbx                  ## 8-byte Reload
+	adcq	1032(%rsp), %rbx
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	896(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	960(%rsp), %r12
+	addq	896(%rsp), %r13
+	movq	%rbp, %r14
+	adcq	904(%rsp), %r14
+	movq	56(%rsp), %rax                  ## 8-byte Reload
+	adcq	912(%rsp), %rax
+	movq	%rax, 56(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	920(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	adcq	928(%rsp), %r15
+	movq	%r15, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	936(%rsp), %rbp
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	944(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	adcq	952(%rsp), %rbx
+	adcq	$0, %r12
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	824(%rsp), %rdi
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	addq	824(%rsp), %r13
+	adcq	832(%rsp), %r14
+	movq	%r14, 40(%rsp)                  ## 8-byte Spill
+	movq	56(%rsp), %r13                  ## 8-byte Reload
+	adcq	840(%rsp), %r13
+	movq	16(%rsp), %r15                  ## 8-byte Reload
+	adcq	848(%rsp), %r15
+	movq	32(%rsp), %r14                  ## 8-byte Reload
+	adcq	856(%rsp), %r14
+	adcq	864(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbp                   ## 8-byte Reload
+	adcq	872(%rsp), %rbp
 	adcq	880(%rsp), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	adcq	888(%rsp), %rbp
-	adcq	896(%rsp), %r12
-	movq	8(%rsp), %r13           ## 8-byte Reload
-	adcq	904(%rsp), %r13
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	adcq	912(%rsp), %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r14
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	760(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	760(%rsp), %r14
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	768(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
+	adcq	888(%rsp), %r12
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	752(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	816(%rsp), %rcx
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	addq	752(%rsp), %rax
+	adcq	760(%rsp), %r13
+	adcq	768(%rsp), %r15
+	movq	%r15, 16(%rsp)                  ## 8-byte Spill
+	movq	%r14, %r15
 	adcq	776(%rsp), %r15
-	movq	56(%rsp), %r14          ## 8-byte Reload
-	adcq	784(%rsp), %r14
-	movq	40(%rsp), %rcx          ## 8-byte Reload
-	adcq	792(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	800(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	808(%rsp), %rbp
-	movq	%r12, %rbx
-	adcq	816(%rsp), %rbx
-	movq	%r13, %r12
-	adcq	824(%rsp), %r12
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	832(%rsp), %r13
-	adcq	$0, %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	40(%rax), %rdx
+	movq	24(%rsp), %rdx                  ## 8-byte Reload
+	adcq	784(%rsp), %rdx
+	movq	%rdx, 24(%rsp)                  ## 8-byte Spill
+	adcq	792(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	adcq	800(%rsp), %rbx
+	adcq	808(%rsp), %r12
+	adcq	$0, %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
 	leaq	680(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	addq	680(%rsp), %rax
-	adcq	688(%rsp), %r15
-	movq	%r15, 48(%rsp)          ## 8-byte Spill
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	addq	680(%rsp), %rbp
+	adcq	688(%rsp), %r13
+	movq	16(%rsp), %r14                  ## 8-byte Reload
 	adcq	696(%rsp), %r14
-	movq	%r14, 56(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rcx          ## 8-byte Reload
-	adcq	704(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %r15          ## 8-byte Reload
+	adcq	704(%rsp), %r15
+	movq	%r15, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %r15                  ## 8-byte Reload
 	adcq	712(%rsp), %r15
+	movq	8(%rsp), %rbp                   ## 8-byte Reload
 	adcq	720(%rsp), %rbp
 	adcq	728(%rsp), %rbx
-	movq	%rbx, 64(%rsp)          ## 8-byte Spill
 	adcq	736(%rsp), %r12
-	movq	%r12, 8(%rsp)           ## 8-byte Spill
-	adcq	744(%rsp), %r13
-	movq	%r13, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %r13          ## 8-byte Reload
-	adcq	752(%rsp), %r13
-	sbbq	%r14, %r14
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	600(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %r14d
-	addq	600(%rsp), %rbx
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	adcq	608(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %rax          ## 8-byte Reload
-	adcq	616(%rsp), %rax
-	movq	%rax, 56(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rbx          ## 8-byte Reload
-	adcq	624(%rsp), %rbx
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	adcq	744(%rsp), %rax
+	movq	%rax, 40(%rsp)                  ## 8-byte Spill
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	608(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	672(%rsp), %rcx
+	movq	%r13, %rax
+	addq	608(%rsp), %rax
+	adcq	616(%rsp), %r14
+	movq	%r14, 16(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %r13                  ## 8-byte Reload
+	adcq	624(%rsp), %r13
 	adcq	632(%rsp), %r15
-	movq	%r15, 24(%rsp)          ## 8-byte Spill
+	movq	%r15, 24(%rsp)                  ## 8-byte Spill
 	adcq	640(%rsp), %rbp
-	movq	64(%rsp), %r12          ## 8-byte Reload
-	adcq	648(%rsp), %r12
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	656(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %r15            ## 8-byte Reload
-	adcq	664(%rsp), %r15
-	adcq	672(%rsp), %r13
-	adcq	$0, %r14
-	movq	%r14, 16(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	520(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	addq	520(%rsp), %rax
-	movq	56(%rsp), %r14          ## 8-byte Reload
-	adcq	528(%rsp), %r14
-	adcq	536(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	544(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	552(%rsp), %rbp
-	adcq	560(%rsp), %r12
-	movq	%r12, 64(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r12           ## 8-byte Reload
-	adcq	568(%rsp), %r12
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	adcq	648(%rsp), %rbx
+	movq	%rbx, %r15
+	adcq	656(%rsp), %r12
+	movq	40(%rsp), %r14                  ## 8-byte Reload
+	adcq	664(%rsp), %r14
+	movq	%rcx, %rbp
+	adcq	$0, %rbp
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	536(%rsp), %rdi
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	addq	536(%rsp), %rbx
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	544(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	%r13, %rbx
+	adcq	552(%rsp), %rbx
+	movq	24(%rsp), %r13                  ## 8-byte Reload
+	adcq	560(%rsp), %r13
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	568(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
 	adcq	576(%rsp), %r15
-	movq	%r15, (%rsp)            ## 8-byte Spill
-	adcq	584(%rsp), %r13
-	movq	%r13, 32(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %r15          ## 8-byte Reload
+	movq	%r15, 48(%rsp)                  ## 8-byte Spill
+	adcq	584(%rsp), %r12
+	movq	%r14, %r15
 	adcq	592(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	440(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	440(%rsp), %r13
-	adcq	448(%rsp), %r14
-	movq	%r14, 56(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %r14          ## 8-byte Reload
-	adcq	456(%rsp), %r14
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	464(%rsp), %rbx
-	adcq	472(%rsp), %rbp
-	movq	%rbp, 104(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	adcq	480(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	adcq	488(%rsp), %r12
-	movq	%r12, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %rbp            ## 8-byte Reload
-	adcq	496(%rsp), %rbp
-	movq	32(%rsp), %r12          ## 8-byte Reload
+	adcq	600(%rsp), %rbp
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	464(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	528(%rsp), %rcx
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	addq	464(%rsp), %rax
+	adcq	472(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  ## 8-byte Spill
+	adcq	480(%rsp), %r13
+	movq	%r13, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %r14                   ## 8-byte Reload
+	adcq	488(%rsp), %r14
+	movq	48(%rsp), %r13                  ## 8-byte Reload
+	adcq	496(%rsp), %r13
 	adcq	504(%rsp), %r12
+	movq	%r12, 16(%rsp)                  ## 8-byte Spill
 	adcq	512(%rsp), %r15
-	movq	%r15, %r13
-	adcq	$0, %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	360(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	56(%rsp), %rax          ## 8-byte Reload
-	addq	360(%rsp), %rax
-	adcq	368(%rsp), %r14
-	adcq	376(%rsp), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	movq	104(%rsp), %rcx         ## 8-byte Reload
-	adcq	384(%rsp), %rcx
-	movq	%rcx, 104(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %rbx          ## 8-byte Reload
-	adcq	392(%rsp), %rbx
-	movq	8(%rsp), %r15           ## 8-byte Reload
-	adcq	400(%rsp), %r15
-	adcq	408(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	adcq	416(%rsp), %r12
-	movq	%r12, %rbp
+	movq	%r15, %r12
+	adcq	520(%rsp), %rbp
+	movq	%rcx, %r15
+	adcq	$0, %r15
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	392(%rsp), %rdi
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	addq	392(%rsp), %rbx
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	adcq	400(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	408(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	%r14, %rbx
+	adcq	416(%rsp), %rbx
 	adcq	424(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rcx          ## 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	280(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %r13d
-	addq	280(%rsp), %r12
-	adcq	288(%rsp), %r14
-	movq	%r14, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	296(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	104(%rsp), %r14         ## 8-byte Reload
-	adcq	304(%rsp), %r14
-	adcq	312(%rsp), %rbx
-	movq	%rbx, 64(%rsp)          ## 8-byte Spill
-	adcq	320(%rsp), %r15
-	movq	%r15, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %rbx            ## 8-byte Reload
-	adcq	328(%rsp), %rbx
-	adcq	336(%rsp), %rbp
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	344(%rsp), %r12
-	movq	48(%rsp), %rbp          ## 8-byte Reload
-	adcq	352(%rsp), %rbp
+	movq	%r13, 48(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %r14                  ## 8-byte Reload
+	adcq	432(%rsp), %r14
+	adcq	440(%rsp), %r12
+	adcq	448(%rsp), %rbp
+	movq	%rbp, 56(%rsp)                  ## 8-byte Spill
+	adcq	456(%rsp), %r15
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	320(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	leaq	248(%rsp), %rdi
+	movq	384(%rsp), %r13
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	addq	320(%rsp), %rax
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	328(%rsp), %rbp
+	adcq	336(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	344(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	adcq	352(%rsp), %r14
+	movq	%r14, 16(%rsp)                  ## 8-byte Spill
+	adcq	360(%rsp), %r12
+	movq	%r12, 40(%rsp)                  ## 8-byte Spill
+	movq	56(%rsp), %r12                  ## 8-byte Reload
+	adcq	368(%rsp), %r12
+	adcq	376(%rsp), %r15
+	movq	%r15, 32(%rsp)                  ## 8-byte Spill
 	adcq	$0, %r13
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	200(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	addq	200(%rsp), %rax
-	movq	24(%rsp), %r15          ## 8-byte Reload
-	adcq	208(%rsp), %r15
-	adcq	216(%rsp), %r14
-	movq	%r14, 104(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %r14          ## 8-byte Reload
-	adcq	224(%rsp), %r14
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	232(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	adcq	240(%rsp), %rbx
-	movq	%rbx, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	248(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	adcq	256(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	adcq	264(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	272(%rsp), %r13
-	sbbq	%rbx, %rbx
-	movq	80(%rsp), %rdx          ## 8-byte Reload
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
 	imulq	%rax, %rdx
-	movq	%rax, %r12
-	leaq	120(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %ebx
-	addq	120(%rsp), %r12
-	adcq	128(%rsp), %r15
-	movq	104(%rsp), %rbp         ## 8-byte Reload
-	adcq	136(%rsp), %rbp
-	movq	%r14, %rcx
-	adcq	144(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r8            ## 8-byte Reload
-	adcq	152(%rsp), %r8
-	movq	%r8, 8(%rsp)            ## 8-byte Spill
-	movq	(%rsp), %r9             ## 8-byte Reload
-	adcq	160(%rsp), %r9
-	movq	%r9, (%rsp)             ## 8-byte Spill
-	movq	32(%rsp), %r10          ## 8-byte Reload
-	adcq	168(%rsp), %r10
-	movq	%r10, 32(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %rdi          ## 8-byte Reload
-	adcq	176(%rsp), %rdi
-	movq	%rdi, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %r14          ## 8-byte Reload
+	movq	%rax, %rbx
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	leaq	176(%rsp), %rdi
+	addq	248(%rsp), %rbx
+	adcq	256(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %r14                   ## 8-byte Reload
+	adcq	264(%rsp), %r14
+	movq	48(%rsp), %rbp                  ## 8-byte Reload
+	adcq	272(%rsp), %rbp
+	movq	16(%rsp), %r15                  ## 8-byte Reload
+	adcq	280(%rsp), %r15
+	movq	40(%rsp), %rbx                  ## 8-byte Reload
+	adcq	288(%rsp), %rbx
+	adcq	296(%rsp), %r12
+	movq	%r12, 56(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	adcq	304(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	adcq	312(%rsp), %r13
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	56(%rax), %rdx
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	leaq	104(%rsp), %rdi
+	movq	240(%rsp), %r12
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	addq	176(%rsp), %rax
 	adcq	184(%rsp), %r14
-	adcq	192(%rsp), %r13
-	adcq	$0, %rbx
-	movq	%r15, %rsi
-	movq	%r15, %r12
-	movq	72(%rsp), %rdx          ## 8-byte Reload
-	subq	(%rdx), %rsi
-	movq	%rbp, %rax
-	movq	%rbp, %r15
-	sbbq	8(%rdx), %rax
-	movq	%rcx, %rbp
-	sbbq	16(%rdx), %rbp
-	movq	%r8, %rcx
-	sbbq	24(%rdx), %rcx
-	movq	%r9, %r8
-	sbbq	32(%rdx), %r8
-	movq	%r10, %r11
-	sbbq	40(%rdx), %r11
-	movq	%rdi, %r10
-	sbbq	48(%rdx), %r10
-	movq	%r14, %rdi
-	sbbq	56(%rdx), %rdi
-	movq	%r13, %r9
-	sbbq	64(%rdx), %r9
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%r13, %r9
-	testb	%bl, %bl
-	cmovneq	%r12, %rsi
-	movq	112(%rsp), %rbx         ## 8-byte Reload
-	movq	%rsi, (%rbx)
-	cmovneq	%r15, %rax
-	movq	%rax, 8(%rbx)
-	cmovneq	64(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 16(%rbx)
-	cmovneq	8(%rsp), %rcx           ## 8-byte Folded Reload
-	movq	%rcx, 24(%rbx)
-	cmovneq	(%rsp), %r8             ## 8-byte Folded Reload
-	movq	%r8, 32(%rbx)
-	cmovneq	32(%rsp), %r11          ## 8-byte Folded Reload
-	movq	%r11, 40(%rbx)
-	cmovneq	16(%rsp), %r10          ## 8-byte Folded Reload
-	movq	%r10, 48(%rbx)
-	cmovneq	%r14, %rdi
-	movq	%rdi, 56(%rbx)
-	movq	%r9, 64(%rbx)
-	addq	$1560, %rsp             ## imm = 0x618
+	movq	%r14, 8(%rsp)                   ## 8-byte Spill
+	adcq	192(%rsp), %rbp
+	movq	%rbp, 48(%rsp)                  ## 8-byte Spill
+	adcq	200(%rsp), %r15
+	movq	%r15, 16(%rsp)                  ## 8-byte Spill
+	adcq	208(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  ## 8-byte Spill
+	movq	56(%rsp), %rbp                  ## 8-byte Reload
+	adcq	216(%rsp), %rbp
+	movq	32(%rsp), %r15                  ## 8-byte Reload
+	adcq	224(%rsp), %r15
+	adcq	232(%rsp), %r13
+	adcq	$0, %r12
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	movq	72(%rsp), %r14                  ## 8-byte Reload
+	movq	%r14, %rsi
+	callq	_mulPv512x64bmi2
+	addq	104(%rsp), %rbx
+	movq	8(%rsp), %r8                    ## 8-byte Reload
+	adcq	112(%rsp), %r8
+	movq	%r8, 8(%rsp)                    ## 8-byte Spill
+	movq	48(%rsp), %r9                   ## 8-byte Reload
+	adcq	120(%rsp), %r9
+	movq	%r9, 48(%rsp)                   ## 8-byte Spill
+	movq	16(%rsp), %rsi                  ## 8-byte Reload
+	adcq	128(%rsp), %rsi
+	movq	40(%rsp), %r11                  ## 8-byte Reload
+	adcq	136(%rsp), %r11
+	movq	%rbp, %r10
+	adcq	144(%rsp), %r10
+	adcq	152(%rsp), %r15
+	adcq	160(%rsp), %r13
+	adcq	168(%rsp), %r12
+	movq	%r14, %rax
+	subq	(%r14), %r8
+	sbbq	8(%r14), %r9
+	movq	%rsi, %rdx
+	movq	%rsi, %r14
+	sbbq	16(%rax), %rdx
+	movq	%r11, %rsi
+	sbbq	24(%rax), %rsi
+	movq	%r10, %rdi
+	sbbq	32(%rax), %rdi
+	movq	%r15, %rbp
+	sbbq	40(%rax), %rbp
+	movq	%r13, %rbx
+	sbbq	48(%rax), %rbx
+	movq	%rax, %rcx
+	movq	%r12, %rax
+	sbbq	56(%rcx), %rax
+	cmovsq	%r12, %rax
+	movq	96(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, 56(%rcx)
+	cmovsq	%r13, %rbx
+	movq	%rbx, 48(%rcx)
+	cmovsq	%r15, %rbp
+	movq	%rbp, 40(%rcx)
+	cmovsq	%r10, %rdi
+	movq	%rdi, 32(%rcx)
+	cmovsq	%r11, %rsi
+	movq	%rsi, 24(%rcx)
+	cmovsq	%r14, %rdx
+	movq	%rdx, 16(%rcx)
+	cmovsq	48(%rsp), %r9                   ## 8-byte Folded Reload
+	movq	%r9, 8(%rcx)
+	cmovsq	8(%rsp), %r8                    ## 8-byte Folded Reload
+	movq	%r8, (%rcx)
+	addq	$1256, %rsp                     ## imm = 0x4E8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -12159,529 +5603,301 @@ _mcl_fp_mont9Lbmi2:                     ## @mcl_fp_mont9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montNF9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montRed8Lbmi2           ## -- Begin function mcl_fp_montRed8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_montNF9Lbmi2:                   ## @mcl_fp_montNF9Lbmi2
-## BB#0:
+_mcl_fp_montRed8Lbmi2:                  ## @mcl_fp_montRed8Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$1560, %rsp             ## imm = 0x618
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	movq	%rdx, 80(%rsp)          ## 8-byte Spill
-	movq	%rsi, 88(%rsp)          ## 8-byte Spill
-	movq	%rdi, 112(%rsp)         ## 8-byte Spill
-	movq	-8(%rcx), %rbx
-	movq	%rbx, 96(%rsp)          ## 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1480(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	1480(%rsp), %r12
-	movq	1488(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	%r12, %rdx
+	subq	$728, %rsp                      ## imm = 0x2D8
+	movq	%rdi, 144(%rsp)                 ## 8-byte Spill
+	movq	56(%rdx), %rax
+	movq	%rax, 136(%rsp)                 ## 8-byte Spill
+	movq	48(%rdx), %rax
+	movq	%rax, 128(%rsp)                 ## 8-byte Spill
+	movq	40(%rdx), %rax
+	movq	%rax, 120(%rsp)                 ## 8-byte Spill
+	movq	32(%rdx), %rax
+	movq	%rax, 112(%rsp)                 ## 8-byte Spill
+	movq	24(%rdx), %rax
+	movq	%rax, 104(%rsp)                 ## 8-byte Spill
+	movq	16(%rdx), %rax
+	movq	%rax, 96(%rsp)                  ## 8-byte Spill
+	movq	8(%rdx), %rax
+	movq	%rax, 88(%rsp)                  ## 8-byte Spill
+	movq	%rsi, 72(%rsp)                  ## 8-byte Spill
+	movq	56(%rsi), %r12
+	movq	48(%rsi), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	40(%rsi), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	32(%rsi), %r15
+	movq	24(%rsi), %r14
+	movq	16(%rsi), %r13
+	movq	(%rsi), %rbp
+	movq	8(%rsi), %rbx
+	movq	-8(%rdx), %rcx
+	movq	%rcx, 56(%rsp)                  ## 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, %rsi
+	movq	%rdx, 64(%rsp)                  ## 8-byte Spill
+	movq	%rax, 80(%rsp)                  ## 8-byte Spill
+	movq	%rbp, %rdx
+	imulq	%rcx, %rdx
+	leaq	656(%rsp), %rdi
+	callq	_mulPv512x64bmi2
+	addq	656(%rsp), %rbp
+	adcq	664(%rsp), %rbx
+	adcq	672(%rsp), %r13
+	adcq	680(%rsp), %r14
+	adcq	688(%rsp), %r15
+	movq	32(%rsp), %rbp                  ## 8-byte Reload
+	adcq	696(%rsp), %rbp
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	704(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	adcq	712(%rsp), %r12
+	movq	%r12, 24(%rsp)                  ## 8-byte Spill
+	movq	72(%rsp), %rax                  ## 8-byte Reload
+	movq	64(%rax), %rax
+	adcq	720(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
 	imulq	%rbx, %rdx
-	movq	1552(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	1544(%rsp), %r13
-	movq	1536(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	1528(%rsp), %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	movq	1520(%rsp), %r14
-	movq	1512(%rsp), %r15
-	movq	1504(%rsp), %rbx
-	movq	1496(%rsp), %rbp
-	leaq	1400(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	1400(%rsp), %r12
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	adcq	1408(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	adcq	1416(%rsp), %rbp
-	movq	%rbp, 104(%rsp)         ## 8-byte Spill
-	adcq	1424(%rsp), %rbx
-	movq	%rbx, (%rsp)            ## 8-byte Spill
-	adcq	1432(%rsp), %r15
-	movq	%r15, 8(%rsp)           ## 8-byte Spill
-	adcq	1440(%rsp), %r14
-	movq	%r14, 32(%rsp)          ## 8-byte Spill
-	movq	64(%rsp), %rbx          ## 8-byte Reload
-	adcq	1448(%rsp), %rbx
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	1456(%rsp), %r12
-	adcq	1464(%rsp), %r13
-	movq	%r13, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	1472(%rsp), %rbp
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1320(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	1392(%rsp), %rax
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	addq	1320(%rsp), %rcx
-	movq	104(%rsp), %r15         ## 8-byte Reload
-	adcq	1328(%rsp), %r15
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	1336(%rsp), %r14
-	movq	8(%rsp), %rdx           ## 8-byte Reload
-	adcq	1344(%rsp), %rdx
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	32(%rsp), %r13          ## 8-byte Reload
-	adcq	1352(%rsp), %r13
-	adcq	1360(%rsp), %rbx
-	movq	%rbx, 64(%rsp)          ## 8-byte Spill
-	adcq	1368(%rsp), %r12
-	movq	%r12, 48(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rdx          ## 8-byte Reload
-	adcq	1376(%rsp), %rdx
-	movq	%rdx, 40(%rsp)          ## 8-byte Spill
-	adcq	1384(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, %rbp
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	1240(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	1240(%rsp), %rbx
-	adcq	1248(%rsp), %r15
-	movq	%r15, 104(%rsp)         ## 8-byte Spill
-	adcq	1256(%rsp), %r14
-	movq	%r14, (%rsp)            ## 8-byte Spill
-	movq	8(%rsp), %r12           ## 8-byte Reload
-	adcq	1264(%rsp), %r12
-	adcq	1272(%rsp), %r13
-	movq	%r13, %r14
-	movq	64(%rsp), %r13          ## 8-byte Reload
-	adcq	1280(%rsp), %r13
-	movq	48(%rsp), %rbx          ## 8-byte Reload
-	adcq	1288(%rsp), %rbx
-	movq	40(%rsp), %r15          ## 8-byte Reload
-	adcq	1296(%rsp), %r15
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	1304(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	1312(%rsp), %rbp
-	movq	%rbp, 56(%rsp)          ## 8-byte Spill
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	1160(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	1232(%rsp), %rax
-	movq	104(%rsp), %rcx         ## 8-byte Reload
-	addq	1160(%rsp), %rcx
-	movq	(%rsp), %rbp            ## 8-byte Reload
-	adcq	1168(%rsp), %rbp
-	adcq	1176(%rsp), %r12
-	movq	%r12, 8(%rsp)           ## 8-byte Spill
-	adcq	1184(%rsp), %r14
-	adcq	1192(%rsp), %r13
-	movq	%r13, %r12
-	adcq	1200(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          ## 8-byte Spill
-	adcq	1208(%rsp), %r15
-	movq	%r15, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	1216(%rsp), %rbx
-	movq	56(%rsp), %rdx          ## 8-byte Reload
-	adcq	1224(%rsp), %rdx
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	movq	%rax, %r15
-	adcq	$0, %r15
-	movq	%rcx, %rdx
-	movq	%rcx, %r13
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	1080(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	1080(%rsp), %r13
-	adcq	1088(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	movq	8(%rsp), %r13           ## 8-byte Reload
-	adcq	1096(%rsp), %r13
-	adcq	1104(%rsp), %r14
-	adcq	1112(%rsp), %r12
-	movq	%r12, 64(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	1120(%rsp), %r12
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	adcq	1128(%rsp), %rbp
-	adcq	1136(%rsp), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %rbx          ## 8-byte Reload
-	adcq	1144(%rsp), %rbx
-	adcq	1152(%rsp), %r15
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	1000(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	1072(%rsp), %rax
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	addq	1000(%rsp), %rcx
-	adcq	1008(%rsp), %r13
-	movq	%r13, 8(%rsp)           ## 8-byte Spill
-	adcq	1016(%rsp), %r14
-	movq	%r14, 32(%rsp)          ## 8-byte Spill
-	movq	64(%rsp), %r14          ## 8-byte Reload
-	adcq	1024(%rsp), %r14
-	adcq	1032(%rsp), %r12
-	adcq	1040(%rsp), %rbp
-	movq	%rbp, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %r13          ## 8-byte Reload
-	adcq	1048(%rsp), %r13
-	adcq	1056(%rsp), %rbx
-	movq	%rbx, 56(%rsp)          ## 8-byte Spill
-	adcq	1064(%rsp), %r15
-	movq	%r15, 16(%rsp)          ## 8-byte Spill
+	leaq	584(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	648(%rsp), %rax
+	addb	$255, %r12b
 	adcq	$0, %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	920(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	920(%rsp), %rbx
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	928(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	32(%rsp), %rbp          ## 8-byte Reload
-	adcq	936(%rsp), %rbp
-	movq	%r14, %rbx
-	adcq	944(%rsp), %rbx
-	adcq	952(%rsp), %r12
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	960(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	968(%rsp), %r13
-	movq	%r13, %r15
-	movq	56(%rsp), %r13          ## 8-byte Reload
-	adcq	976(%rsp), %r13
-	movq	16(%rsp), %r14          ## 8-byte Reload
-	adcq	984(%rsp), %r14
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	992(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	840(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	912(%rsp), %rax
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	addq	840(%rsp), %rcx
-	adcq	848(%rsp), %rbp
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	856(%rsp), %rbx
-	movq	%rbx, 64(%rsp)          ## 8-byte Spill
-	adcq	864(%rsp), %r12
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	adcq	872(%rsp), %rbp
-	adcq	880(%rsp), %r15
-	movq	%r15, 24(%rsp)          ## 8-byte Spill
-	adcq	888(%rsp), %r13
-	adcq	896(%rsp), %r14
-	movq	%r14, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rdx            ## 8-byte Reload
-	adcq	904(%rsp), %rdx
-	movq	%rdx, (%rsp)            ## 8-byte Spill
+	movq	%rax, %rcx
+	addq	584(%rsp), %rbx
+	adcq	592(%rsp), %r13
+	adcq	600(%rsp), %r14
+	adcq	608(%rsp), %r15
+	adcq	616(%rsp), %rbp
+	movq	%rbp, 32(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	624(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	632(%rsp), %rbp
+	movq	(%rsp), %rax                    ## 8-byte Reload
+	adcq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	movq	72(%rsp), %r12                  ## 8-byte Reload
+	adcq	72(%r12), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	512(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	576(%rsp), %rax
+	addb	$255, %bl
 	adcq	$0, %rax
-	movq	%rax, %r14
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	760(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	760(%rsp), %rbx
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	768(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	64(%rsp), %r15          ## 8-byte Reload
-	adcq	776(%rsp), %r15
-	adcq	784(%rsp), %r12
-	movq	%r12, 48(%rsp)          ## 8-byte Spill
-	movq	%rbp, %rbx
-	adcq	792(%rsp), %rbx
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	800(%rsp), %rbp
-	adcq	808(%rsp), %r13
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	adcq	816(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r12            ## 8-byte Reload
-	adcq	824(%rsp), %r12
-	adcq	832(%rsp), %r14
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	680(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	752(%rsp), %rcx
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	addq	680(%rsp), %rax
-	adcq	688(%rsp), %r15
-	movq	%r15, 64(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rdx          ## 8-byte Reload
-	adcq	696(%rsp), %rdx
-	movq	%rdx, 48(%rsp)          ## 8-byte Spill
-	adcq	704(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          ## 8-byte Spill
-	adcq	712(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	adcq	720(%rsp), %r13
-	movq	%r13, %r15
-	movq	16(%rsp), %rbx          ## 8-byte Reload
-	adcq	728(%rsp), %rbx
-	adcq	736(%rsp), %r12
-	movq	%r12, (%rsp)            ## 8-byte Spill
-	adcq	744(%rsp), %r14
-	movq	%r14, 32(%rsp)          ## 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	600(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	600(%rsp), %r13
-	movq	64(%rsp), %r13          ## 8-byte Reload
-	adcq	608(%rsp), %r13
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	616(%rsp), %r12
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	adcq	624(%rsp), %rbp
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	632(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	640(%rsp), %r15
-	movq	%r15, 56(%rsp)          ## 8-byte Spill
-	adcq	648(%rsp), %rbx
-	movq	%rbx, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	656(%rsp), %r14
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	adcq	664(%rsp), %rbx
-	movq	8(%rsp), %r15           ## 8-byte Reload
-	adcq	672(%rsp), %r15
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	520(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	592(%rsp), %rcx
-	movq	%r13, %rax
-	addq	520(%rsp), %rax
-	adcq	528(%rsp), %r12
-	movq	%r12, 48(%rsp)          ## 8-byte Spill
-	movq	%rbp, %r12
-	adcq	536(%rsp), %r12
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	544(%rsp), %rbp
-	movq	56(%rsp), %rdx          ## 8-byte Reload
-	adcq	552(%rsp), %rdx
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %rdx          ## 8-byte Reload
-	adcq	560(%rsp), %rdx
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	568(%rsp), %r14
-	movq	%r14, (%rsp)            ## 8-byte Spill
-	adcq	576(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	adcq	584(%rsp), %r15
-	movq	%r15, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, %r13
-	movq	%rax, %rdx
-	movq	%rax, %r14
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
+	movq	%rax, %rcx
+	addq	512(%rsp), %r13
+	adcq	520(%rsp), %r14
+	adcq	528(%rsp), %r15
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	adcq	536(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	544(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	adcq	552(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	(%rsp), %rbp                    ## 8-byte Reload
+	adcq	560(%rsp), %rbp
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	568(%rsp), %rbx
+	adcq	80(%r12), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	setb	%r13b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r14, %rdx
 	leaq	440(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
+	movq	64(%rsp), %r12                  ## 8-byte Reload
+	movq	%r12, %rsi
+	callq	_mulPv512x64bmi2
+	movq	504(%rsp), %rax
+	addb	$255, %r13b
+	adcq	$0, %rax
 	addq	440(%rsp), %r14
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	adcq	448(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	adcq	456(%rsp), %r12
-	adcq	464(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %r14          ## 8-byte Reload
-	adcq	472(%rsp), %r14
-	movq	16(%rsp), %r15          ## 8-byte Reload
-	adcq	480(%rsp), %r15
-	movq	(%rsp), %rbp            ## 8-byte Reload
-	adcq	488(%rsp), %rbp
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	adcq	496(%rsp), %rbx
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	504(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	adcq	512(%rsp), %r13
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	360(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	432(%rsp), %rcx
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	addq	360(%rsp), %rax
-	adcq	368(%rsp), %r12
-	movq	%r12, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rdx          ## 8-byte Reload
-	adcq	376(%rsp), %rdx
-	movq	%rdx, 24(%rsp)          ## 8-byte Spill
-	adcq	384(%rsp), %r14
-	movq	%r14, 56(%rsp)          ## 8-byte Spill
-	adcq	392(%rsp), %r15
-	movq	%r15, 16(%rsp)          ## 8-byte Spill
-	adcq	400(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	adcq	408(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	416(%rsp), %r14
-	adcq	424(%rsp), %r13
-	movq	%r13, %r15
-	adcq	$0, %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	280(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	280(%rsp), %r12
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	288(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	296(%rsp), %rbp
-	movq	56(%rsp), %rax          ## 8-byte Reload
+	adcq	448(%rsp), %r15
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	adcq	456(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %r13                  ## 8-byte Reload
+	adcq	464(%rsp), %r13
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	472(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	adcq	480(%rsp), %rbp
+	movq	%rbp, (%rsp)                    ## 8-byte Spill
+	adcq	488(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	movq	40(%rsp), %rbp                  ## 8-byte Reload
+	adcq	496(%rsp), %rbp
+	movq	72(%rsp), %rcx                  ## 8-byte Reload
+	adcq	88(%rcx), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r15, %rdx
+	leaq	368(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	_mulPv512x64bmi2
+	movq	432(%rsp), %r14
+	addb	$255, %bl
+	adcq	$0, %r14
+	addq	368(%rsp), %r15
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	adcq	376(%rsp), %rax
+	adcq	384(%rsp), %r13
+	movq	%r13, 16(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rbx                  ## 8-byte Reload
+	adcq	392(%rsp), %rbx
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	400(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	408(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	adcq	416(%rsp), %rbp
+	movq	%rbp, 40(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	424(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	movq	72(%rsp), %rcx                  ## 8-byte Reload
+	adcq	96(%rcx), %r14
+	setb	%r15b
+	movq	56(%rsp), %r13                  ## 8-byte Reload
+	movq	%r13, %rdx
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	296(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	_mulPv512x64bmi2
+	movq	360(%rsp), %r12
+	addb	$255, %r15b
+	adcq	$0, %r12
+	addq	296(%rsp), %rbp
+	movq	16(%rsp), %rax                  ## 8-byte Reload
 	adcq	304(%rsp), %rax
-	movq	%rax, 56(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	312(%rsp), %r13
-	movq	(%rsp), %r12            ## 8-byte Reload
-	adcq	320(%rsp), %r12
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	adcq	328(%rsp), %rbx
-	adcq	336(%rsp), %r14
-	movq	%r14, 8(%rsp)           ## 8-byte Spill
-	adcq	344(%rsp), %r15
-	movq	%r15, 64(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %r14          ## 8-byte Reload
+	adcq	312(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  ## 8-byte Spill
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	320(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	328(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
+	adcq	336(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	344(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
 	adcq	352(%rsp), %r14
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	200(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	272(%rsp), %rcx
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	addq	200(%rsp), %rax
-	adcq	208(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %rbp          ## 8-byte Reload
-	adcq	216(%rsp), %rbp
-	adcq	224(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	adcq	232(%rsp), %r12
-	movq	%r12, (%rsp)            ## 8-byte Spill
-	adcq	240(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r15           ## 8-byte Reload
-	adcq	248(%rsp), %r15
-	movq	64(%rsp), %r12          ## 8-byte Reload
-	adcq	256(%rsp), %r12
-	adcq	264(%rsp), %r14
-	adcq	$0, %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rdx          ## 8-byte Reload
+	movq	72(%rsp), %rbp                  ## 8-byte Reload
+	adcq	104(%rbp), %r12
+	setb	%r15b
+	movq	%r13, %rdx
 	imulq	%rax, %rdx
 	movq	%rax, %rbx
-	leaq	120(%rsp), %rdi
-	movq	72(%rsp), %r13          ## 8-byte Reload
-	movq	%r13, %rsi
-	callq	l_mulPv576x64
-	addq	120(%rsp), %rbx
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	128(%rsp), %rcx
-	movq	%rbp, %rdx
-	adcq	136(%rsp), %rdx
-	movq	16(%rsp), %rsi          ## 8-byte Reload
-	adcq	144(%rsp), %rsi
-	movq	%rsi, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rdi            ## 8-byte Reload
-	adcq	152(%rsp), %rdi
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	adcq	160(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	movq	%r15, %r8
-	adcq	168(%rsp), %r8
-	movq	%r8, 8(%rsp)            ## 8-byte Spill
-	movq	%r12, %r15
+	leaq	224(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	288(%rsp), %r13
+	addb	$255, %r15b
+	adcq	$0, %r13
+	addq	224(%rsp), %rbx
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	232(%rsp), %rax
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	248(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
+	adcq	256(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	264(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	adcq	272(%rsp), %r14
+	adcq	280(%rsp), %r12
+	adcq	112(%rbp), %r13
+	setb	%r15b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	152(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	addb	$255, %r15b
+	movq	216(%rsp), %rdx
+	adcq	$0, %rdx
+	addq	152(%rsp), %rbx
+	movq	(%rsp), %r9                     ## 8-byte Reload
+	adcq	160(%rsp), %r9
+	movq	%r9, (%rsp)                     ## 8-byte Spill
+	movq	8(%rsp), %r10                   ## 8-byte Reload
+	adcq	168(%rsp), %r10
+	movq	%r10, 8(%rsp)                   ## 8-byte Spill
+	movq	40(%rsp), %r15                  ## 8-byte Reload
 	adcq	176(%rsp), %r15
-	adcq	184(%rsp), %r14
-	movq	40(%rsp), %r9           ## 8-byte Reload
-	adcq	192(%rsp), %r9
-	movq	%rcx, %rax
-	movq	%rcx, %r11
-	movq	%r13, %rbp
-	subq	(%rbp), %rax
+	movq	48(%rsp), %r11                  ## 8-byte Reload
+	adcq	184(%rsp), %r11
+	adcq	192(%rsp), %r14
+	adcq	200(%rsp), %r12
+	adcq	208(%rsp), %r13
+	adcq	120(%rbp), %rdx
+	xorl	%r8d, %r8d
+	subq	80(%rsp), %r9                   ## 8-byte Folded Reload
+	sbbq	88(%rsp), %r10                  ## 8-byte Folded Reload
+	movq	%r15, %rdi
+	sbbq	96(%rsp), %rdi                  ## 8-byte Folded Reload
+	movq	%r11, %rbp
+	sbbq	104(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%r14, %rbx
+	sbbq	112(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%r12, %rsi
+	sbbq	120(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	%r13, %rax
+	sbbq	128(%rsp), %rax                 ## 8-byte Folded Reload
 	movq	%rdx, %rcx
-	movq	%rdx, %r12
-	sbbq	8(%rbp), %rcx
-	movq	%rsi, %rdx
-	sbbq	16(%rbp), %rdx
-	movq	%rdi, %rsi
-	sbbq	24(%rbp), %rsi
-	movq	%rbx, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%r8, %r10
-	sbbq	40(%rbp), %r10
-	movq	%r15, %r13
-	sbbq	48(%rbp), %r13
-	movq	%r14, %r8
-	sbbq	56(%rbp), %r8
-	movq	%rbp, %rbx
-	movq	%r9, %rbp
-	sbbq	64(%rbx), %rbp
-	movq	%rbp, %rbx
-	sarq	$63, %rbx
-	cmovsq	%r11, %rax
-	movq	112(%rsp), %rbx         ## 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovsq	%r12, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovsq	16(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rbx)
-	cmovsq	(%rsp), %rsi            ## 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovsq	32(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, 32(%rbx)
-	cmovsq	8(%rsp), %r10           ## 8-byte Folded Reload
-	movq	%r10, 40(%rbx)
-	cmovsq	%r15, %r13
-	movq	%r13, 48(%rbx)
-	cmovsq	%r14, %r8
-	movq	%r8, 56(%rbx)
-	cmovsq	%r9, %rbp
-	movq	%rbp, 64(%rbx)
-	addq	$1560, %rsp             ## imm = 0x618
+	sbbq	136(%rsp), %rcx                 ## 8-byte Folded Reload
+	sbbq	%r8, %r8
+	testb	$1, %r8b
+	cmovneq	%rdx, %rcx
+	movq	144(%rsp), %rdx                 ## 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovneq	%r13, %rax
+	movq	%rax, 48(%rdx)
+	cmovneq	%r12, %rsi
+	movq	%rsi, 40(%rdx)
+	cmovneq	%r14, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovneq	%r11, %rbp
+	movq	%rbp, 24(%rdx)
+	cmovneq	%r15, %rdi
+	movq	%rdi, 16(%rdx)
+	cmovneq	8(%rsp), %r10                   ## 8-byte Folded Reload
+	movq	%r10, 8(%rdx)
+	cmovneq	(%rsp), %r9                     ## 8-byte Folded Reload
+	movq	%r9, (%rdx)
+	addq	$728, %rsp                      ## imm = 0x2D8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -12689,425 +5905,301 @@ _mcl_fp_montNF9Lbmi2:                   ## @mcl_fp_montNF9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montRed9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_montRedNF8Lbmi2         ## -- Begin function mcl_fp_montRedNF8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_montRed9Lbmi2:                  ## @mcl_fp_montRed9Lbmi2
-## BB#0:
+_mcl_fp_montRedNF8Lbmi2:                ## @mcl_fp_montRedNF8Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$936, %rsp              ## imm = 0x3A8
-	movq	%rdx, %rax
-	movq	%rdi, 208(%rsp)         ## 8-byte Spill
-	movq	-8(%rax), %rcx
-	movq	%rcx, 96(%rsp)          ## 8-byte Spill
-	movq	(%rsi), %r14
-	movq	8(%rsi), %rdx
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	movq	%r14, %rdx
+	subq	$728, %rsp                      ## imm = 0x2D8
+	movq	%rdi, 144(%rsp)                 ## 8-byte Spill
+	movq	56(%rdx), %rax
+	movq	%rax, 136(%rsp)                 ## 8-byte Spill
+	movq	48(%rdx), %rax
+	movq	%rax, 128(%rsp)                 ## 8-byte Spill
+	movq	40(%rdx), %rax
+	movq	%rax, 120(%rsp)                 ## 8-byte Spill
+	movq	32(%rdx), %rax
+	movq	%rax, 112(%rsp)                 ## 8-byte Spill
+	movq	24(%rdx), %rax
+	movq	%rax, 104(%rsp)                 ## 8-byte Spill
+	movq	16(%rdx), %rax
+	movq	%rax, 96(%rsp)                  ## 8-byte Spill
+	movq	8(%rdx), %rax
+	movq	%rax, 88(%rsp)                  ## 8-byte Spill
+	movq	%rsi, 72(%rsp)                  ## 8-byte Spill
+	movq	56(%rsi), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	48(%rsi), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	40(%rsi), %r12
+	movq	32(%rsi), %r13
+	movq	24(%rsi), %r15
+	movq	16(%rsi), %r14
+	movq	(%rsi), %rbx
+	movq	8(%rsi), %rbp
+	movq	-8(%rdx), %rcx
+	movq	%rcx, 56(%rsp)                  ## 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, %rsi
+	movq	%rdx, 64(%rsp)                  ## 8-byte Spill
+	movq	%rax, 80(%rsp)                  ## 8-byte Spill
+	movq	%rbx, %rdx
 	imulq	%rcx, %rdx
-	movq	136(%rsi), %rcx
-	movq	%rcx, 88(%rsp)          ## 8-byte Spill
-	movq	128(%rsi), %rcx
-	movq	%rcx, 56(%rsp)          ## 8-byte Spill
-	movq	120(%rsi), %rcx
-	movq	%rcx, 80(%rsp)          ## 8-byte Spill
-	movq	112(%rsi), %rcx
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	movq	104(%rsi), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	96(%rsi), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	88(%rsi), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	80(%rsi), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	movq	72(%rsi), %r12
-	movq	64(%rsi), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	56(%rsi), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	48(%rsi), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	movq	40(%rsi), %rbp
-	movq	32(%rsi), %rbx
-	movq	24(%rsi), %r13
-	movq	16(%rsi), %r15
-	movq	%rax, %rcx
-	movq	(%rcx), %rax
-	movq	%rax, 144(%rsp)         ## 8-byte Spill
-	movq	64(%rcx), %rax
-	movq	%rax, 200(%rsp)         ## 8-byte Spill
-	movq	56(%rcx), %rax
-	movq	%rax, 192(%rsp)         ## 8-byte Spill
-	movq	48(%rcx), %rax
-	movq	%rax, 184(%rsp)         ## 8-byte Spill
-	movq	40(%rcx), %rax
-	movq	%rax, 176(%rsp)         ## 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, 168(%rsp)         ## 8-byte Spill
-	movq	24(%rcx), %rax
-	movq	%rax, 160(%rsp)         ## 8-byte Spill
-	movq	16(%rcx), %rax
-	movq	%rax, 152(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, 136(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rsi
-	movq	%rsi, 104(%rsp)         ## 8-byte Spill
-	leaq	856(%rsp), %rdi
-	callq	l_mulPv576x64
-	addq	856(%rsp), %r14
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	adcq	864(%rsp), %rcx
-	adcq	872(%rsp), %r15
-	adcq	880(%rsp), %r13
-	adcq	888(%rsp), %rbx
-	movq	%rbx, 120(%rsp)         ## 8-byte Spill
-	adcq	896(%rsp), %rbp
-	movq	%rbp, 112(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	adcq	904(%rsp), %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	912(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	920(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	928(%rsp), %r12
-	movq	%r12, (%rsp)            ## 8-byte Spill
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	$0, %rbp
-	adcq	$0, 8(%rsp)             ## 8-byte Folded Spill
-	adcq	$0, 16(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 48(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 72(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 56(%rsp)            ## 8-byte Folded Spill
-	movq	88(%rsp), %r14          ## 8-byte Reload
-	adcq	$0, %r14
-	sbbq	%r12, %r12
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	776(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %r12d
-	addq	776(%rsp), %rbx
-	adcq	784(%rsp), %r15
-	adcq	792(%rsp), %r13
-	movq	%r13, 128(%rsp)         ## 8-byte Spill
-	movq	120(%rsp), %rax         ## 8-byte Reload
-	adcq	800(%rsp), %rax
-	movq	%rax, 120(%rsp)         ## 8-byte Spill
-	movq	112(%rsp), %rax         ## 8-byte Reload
-	adcq	808(%rsp), %rax
-	movq	%rax, 112(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	adcq	816(%rsp), %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	824(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	832(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	840(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	adcq	848(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r13           ## 8-byte Reload
-	adcq	$0, %r13
-	adcq	$0, 16(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 48(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 72(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	movq	56(%rsp), %rbx          ## 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, %r14
-	movq	%r14, 88(%rsp)          ## 8-byte Spill
-	adcq	$0, %r12
-	movq	%r15, %rdx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	696(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	696(%rsp), %r15
-	movq	128(%rsp), %rcx         ## 8-byte Reload
-	adcq	704(%rsp), %rcx
-	movq	120(%rsp), %rax         ## 8-byte Reload
-	adcq	712(%rsp), %rax
-	movq	%rax, 120(%rsp)         ## 8-byte Spill
-	movq	112(%rsp), %rax         ## 8-byte Reload
+	leaq	656(%rsp), %rdi
+	callq	_mulPv512x64bmi2
+	addq	656(%rsp), %rbx
+	adcq	664(%rsp), %rbp
+	adcq	672(%rsp), %r14
+	adcq	680(%rsp), %r15
+	adcq	688(%rsp), %r13
+	adcq	696(%rsp), %r12
+	movq	%r12, 48(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	704(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	712(%rsp), %rbx
+	movq	72(%rsp), %rax                  ## 8-byte Reload
+	movq	64(%rax), %rax
 	adcq	720(%rsp), %rax
-	movq	%rax, 112(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %rbp          ## 8-byte Reload
-	adcq	728(%rsp), %rbp
-	movq	32(%rsp), %r14          ## 8-byte Reload
-	adcq	736(%rsp), %r14
-	movq	40(%rsp), %r15          ## 8-byte Reload
-	adcq	744(%rsp), %r15
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	752(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	760(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, 16(%rsp)            ## 8-byte Folded Spill
-	movq	48(%rsp), %r13          ## 8-byte Reload
-	adcq	$0, %r13
-	adcq	$0, 72(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 56(%rsp)          ## 8-byte Spill
-	adcq	$0, 88(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rcx, %rbx
-	movq	%rbx, %rdx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	616(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	616(%rsp), %rbx
-	movq	120(%rsp), %rax         ## 8-byte Reload
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rbp, %rdx
+	leaq	584(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	648(%rsp), %rax
+	addb	$255, %r12b
+	adcq	$0, %rax
+	movq	%rax, %rcx
+	addq	584(%rsp), %rbp
+	adcq	592(%rsp), %r14
+	adcq	600(%rsp), %r15
+	adcq	608(%rsp), %r13
+	movq	48(%rsp), %rax                  ## 8-byte Reload
+	adcq	616(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rax                  ## 8-byte Reload
 	adcq	624(%rsp), %rax
-	movq	112(%rsp), %rcx         ## 8-byte Reload
-	adcq	632(%rsp), %rcx
-	movq	%rcx, 112(%rsp)         ## 8-byte Spill
-	adcq	640(%rsp), %rbp
-	movq	%rbp, 64(%rsp)          ## 8-byte Spill
-	adcq	648(%rsp), %r14
-	movq	%r14, 32(%rsp)          ## 8-byte Spill
-	adcq	656(%rsp), %r15
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	664(%rsp), %r14
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	672(%rsp), %rbp
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	680(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	688(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, 48(%rsp)          ## 8-byte Spill
-	adcq	$0, 72(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 56(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 88(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	536(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	536(%rsp), %rbx
-	movq	112(%rsp), %rax         ## 8-byte Reload
-	adcq	544(%rsp), %rax
-	movq	64(%rsp), %rcx          ## 8-byte Reload
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	adcq	632(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rax                    ## 8-byte Reload
+	adcq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	movq	72(%rsp), %rax                  ## 8-byte Reload
+	adcq	72(%rax), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rbp                  ## 8-byte Reload
+	movq	%rbp, %rdx
+	imulq	%r14, %rdx
+	leaq	512(%rsp), %rdi
+	movq	64(%rsp), %r12                  ## 8-byte Reload
+	movq	%r12, %rsi
+	callq	_mulPv512x64bmi2
+	movq	576(%rsp), %rax
+	addb	$255, %bl
+	adcq	$0, %rax
+	addq	512(%rsp), %r14
+	adcq	520(%rsp), %r15
+	adcq	528(%rsp), %r13
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	536(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
 	adcq	552(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	movq	32(%rsp), %rcx          ## 8-byte Reload
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rcx                    ## 8-byte Reload
 	adcq	560(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	adcq	568(%rsp), %r15
-	movq	%r15, 40(%rsp)          ## 8-byte Spill
-	adcq	576(%rsp), %r14
-	movq	%r14, (%rsp)            ## 8-byte Spill
-	adcq	584(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r13           ## 8-byte Reload
-	adcq	592(%rsp), %r13
-	movq	16(%rsp), %r15          ## 8-byte Reload
-	adcq	600(%rsp), %r15
-	movq	48(%rsp), %rbp          ## 8-byte Reload
-	adcq	608(%rsp), %rbp
-	movq	72(%rsp), %rbx          ## 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 56(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 88(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rdx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	568(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
 	movq	%rax, %r14
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	456(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	456(%rsp), %r14
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	adcq	464(%rsp), %rax
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	472(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rcx          ## 8-byte Reload
+	movq	72(%rsp), %rax                  ## 8-byte Reload
+	adcq	80(%rax), %r14
+	setb	%bl
+	movq	%rbp, %rdx
+	imulq	%r15, %rdx
+	leaq	440(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	_mulPv512x64bmi2
+	movq	504(%rsp), %rax
+	addb	$255, %bl
+	adcq	$0, %rax
+	addq	440(%rsp), %r15
+	adcq	448(%rsp), %r13
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	456(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rbx                  ## 8-byte Reload
+	adcq	464(%rsp), %rbx
+	movq	8(%rsp), %rbp                   ## 8-byte Reload
+	adcq	472(%rsp), %rbp
+	movq	(%rsp), %rcx                    ## 8-byte Reload
 	adcq	480(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rcx            ## 8-byte Reload
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
 	adcq	488(%rsp), %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	496(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	504(%rsp), %r13
-	movq	%r13, 8(%rsp)           ## 8-byte Spill
-	adcq	512(%rsp), %r15
-	movq	%r15, 16(%rsp)          ## 8-byte Spill
-	adcq	520(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	528(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          ## 8-byte Spill
-	movq	80(%rsp), %r14          ## 8-byte Reload
-	adcq	$0, %r14
-	movq	56(%rsp), %r13          ## 8-byte Reload
-	adcq	$0, %r13
-	movq	88(%rsp), %rbx          ## 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, %r12
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	376(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	376(%rsp), %r15
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	384(%rsp), %rax
-	movq	40(%rsp), %rcx          ## 8-byte Reload
-	adcq	392(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	adcq	400(%rsp), %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	24(%rsp), %rbp          ## 8-byte Reload
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	adcq	496(%rsp), %r14
+	movq	%r14, 40(%rsp)                  ## 8-byte Spill
+	movq	72(%rsp), %r14                  ## 8-byte Reload
+	adcq	88(%r14), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	368(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	432(%rsp), %r15
+	addb	$255, %r12b
+	adcq	$0, %r15
+	addq	368(%rsp), %r13
+	movq	48(%rsp), %r13                  ## 8-byte Reload
+	adcq	376(%rsp), %r13
+	adcq	384(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  ## 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rbx                    ## 8-byte Reload
+	adcq	400(%rsp), %rbx
+	movq	16(%rsp), %rbp                  ## 8-byte Reload
 	adcq	408(%rsp), %rbp
-	movq	8(%rsp), %rcx           ## 8-byte Reload
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
 	adcq	416(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
 	adcq	424(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rcx          ## 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	72(%rsp), %r15          ## 8-byte Reload
-	adcq	440(%rsp), %r15
-	adcq	448(%rsp), %r14
-	movq	%r14, 80(%rsp)          ## 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, %r14
-	adcq	$0, %rbx
-	movq	%rbx, 88(%rsp)          ## 8-byte Spill
-	adcq	$0, %r12
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	adcq	96(%r14), %r15
+	setb	%r14b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r13, %rdx
 	leaq	296(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	296(%rsp), %rbx
-	movq	40(%rsp), %rax          ## 8-byte Reload
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	360(%rsp), %r12
+	addb	$255, %r14b
+	adcq	$0, %r12
+	addq	296(%rsp), %r13
+	movq	24(%rsp), %rax                  ## 8-byte Reload
 	adcq	304(%rsp), %rax
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	312(%rsp), %r13
-	adcq	320(%rsp), %rbp
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	328(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	312(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	adcq	320(%rsp), %rbx
+	movq	%rbx, (%rsp)                    ## 8-byte Spill
+	adcq	328(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  ## 8-byte Spill
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
 	adcq	336(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rcx          ## 8-byte Reload
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
 	adcq	344(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
 	adcq	352(%rsp), %r15
-	movq	%r15, 72(%rsp)          ## 8-byte Spill
-	movq	80(%rsp), %r15          ## 8-byte Reload
-	adcq	360(%rsp), %r15
-	adcq	368(%rsp), %r14
-	movq	%r14, 56(%rsp)          ## 8-byte Spill
-	movq	88(%rsp), %r14          ## 8-byte Reload
+	movq	72(%rsp), %rbx                  ## 8-byte Reload
+	adcq	104(%rbx), %r12
+	setb	%r13b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	224(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	movq	288(%rsp), %r14
+	addb	$255, %r13b
 	adcq	$0, %r14
-	adcq	$0, %r12
-	movq	96(%rsp), %rdx          ## 8-byte Reload
+	addq	224(%rsp), %rbp
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	232(%rsp), %rax
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	248(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
+	adcq	256(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	adcq	264(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	adcq	272(%rsp), %r15
+	adcq	280(%rsp), %r12
+	adcq	112(%rbx), %r14
+	setb	%r13b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
 	imulq	%rax, %rdx
-	movq	%rax, %rbx
-	leaq	216(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	216(%rsp), %rbx
-	movq	%r13, %rsi
-	adcq	224(%rsp), %rsi
-	movq	%rsi, (%rsp)            ## 8-byte Spill
-	adcq	232(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r9            ## 8-byte Reload
-	adcq	240(%rsp), %r9
-	movq	%r9, 8(%rsp)            ## 8-byte Spill
-	movq	16(%rsp), %r8           ## 8-byte Reload
-	adcq	248(%rsp), %r8
-	movq	%r8, 16(%rsp)           ## 8-byte Spill
-	movq	48(%rsp), %rbx          ## 8-byte Reload
-	adcq	256(%rsp), %rbx
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	adcq	264(%rsp), %rax
-	movq	%r15, %rcx
-	adcq	272(%rsp), %rcx
-	movq	56(%rsp), %rdx          ## 8-byte Reload
-	adcq	280(%rsp), %rdx
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	adcq	288(%rsp), %r14
-	movq	%r14, %r11
-	adcq	$0, %r12
-	subq	144(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rbp, %rdi
-	sbbq	136(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%r9, %rbp
-	sbbq	152(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%r8, %r13
-	sbbq	160(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%rbx, %r15
-	sbbq	168(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%rax, %r14
-	sbbq	176(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%rcx, %r10
-	sbbq	184(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	sbbq	192(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r11, %r9
-	sbbq	200(%rsp), %r9          ## 8-byte Folded Reload
-	sbbq	$0, %r12
-	andl	$1, %r12d
-	cmovneq	%r11, %r9
-	testb	%r12b, %r12b
-	cmovneq	(%rsp), %rsi            ## 8-byte Folded Reload
-	movq	208(%rsp), %rdx         ## 8-byte Reload
-	movq	%rsi, (%rdx)
-	cmovneq	24(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, 8(%rdx)
-	cmovneq	8(%rsp), %rbp           ## 8-byte Folded Reload
-	movq	%rbp, 16(%rdx)
-	cmovneq	16(%rsp), %r13          ## 8-byte Folded Reload
-	movq	%r13, 24(%rdx)
-	cmovneq	%rbx, %r15
-	movq	%r15, 32(%rdx)
-	cmovneq	%rax, %r14
-	movq	%r14, 40(%rdx)
-	cmovneq	%rcx, %r10
-	movq	%r10, 48(%rdx)
-	cmovneq	56(%rsp), %r8           ## 8-byte Folded Reload
-	movq	%r8, 56(%rdx)
-	movq	%r9, 64(%rdx)
-	addq	$936, %rsp              ## imm = 0x3A8
+	movq	%rax, %rbp
+	leaq	152(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64bmi2
+	addb	$255, %r13b
+	movq	216(%rsp), %rdx
+	adcq	$0, %rdx
+	addq	152(%rsp), %rbp
+	movq	(%rsp), %r8                     ## 8-byte Reload
+	adcq	160(%rsp), %r8
+	movq	%r8, (%rsp)                     ## 8-byte Spill
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	168(%rsp), %rcx
+	movq	40(%rsp), %rdi                  ## 8-byte Reload
+	adcq	176(%rsp), %rdi
+	movq	32(%rsp), %r10                  ## 8-byte Reload
+	adcq	184(%rsp), %r10
+	adcq	192(%rsp), %r15
+	adcq	200(%rsp), %r12
+	adcq	208(%rsp), %r14
+	adcq	120(%rbx), %rdx
+	subq	80(%rsp), %r8                   ## 8-byte Folded Reload
+	movq	%rcx, %r9
+	movq	%rcx, %r11
+	sbbq	88(%rsp), %r9                   ## 8-byte Folded Reload
+	movq	%rdi, %rsi
+	movq	%rdi, %r13
+	sbbq	96(%rsp), %rsi                  ## 8-byte Folded Reload
+	movq	%r10, %rdi
+	sbbq	104(%rsp), %rdi                 ## 8-byte Folded Reload
+	movq	%r15, %rbx
+	sbbq	112(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%r12, %rbp
+	sbbq	120(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%r14, %rax
+	sbbq	128(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	%rdx, %rcx
+	sbbq	136(%rsp), %rcx                 ## 8-byte Folded Reload
+	cmovsq	%rdx, %rcx
+	movq	144(%rsp), %rdx                 ## 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovsq	%r14, %rax
+	movq	%rax, 48(%rdx)
+	cmovsq	%r12, %rbp
+	movq	%rbp, 40(%rdx)
+	cmovsq	%r15, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovsq	%r10, %rdi
+	movq	%rdi, 24(%rdx)
+	cmovsq	%r13, %rsi
+	movq	%rsi, 16(%rdx)
+	cmovsq	%r11, %r9
+	movq	%r9, 8(%rdx)
+	cmovsq	(%rsp), %r8                     ## 8-byte Folded Reload
+	movq	%r8, (%rdx)
+	addq	$728, %rsp                      ## imm = 0x2D8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13115,279 +6207,227 @@ _mcl_fp_montRed9Lbmi2:                  ## @mcl_fp_montRed9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_addPre9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_addPre8Lbmi2            ## -- Begin function mcl_fp_addPre8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_addPre9Lbmi2:                   ## @mcl_fp_addPre9Lbmi2
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
+_mcl_fp_addPre8Lbmi2:                   ## @mcl_fp_addPre8Lbmi2
+## %bb.0:
 	pushq	%rbx
-	movq	64(%rdx), %r8
-	movq	64(%rsi), %r15
-	movq	56(%rsi), %r9
-	movq	48(%rsi), %r10
-	movq	40(%rsi), %r11
-	movq	24(%rsi), %r12
-	movq	32(%rsi), %r14
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rcx
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rcx
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
-	adcq	24(%rdx), %r12
-	movq	56(%rdx), %r13
-	movq	48(%rdx), %rsi
-	movq	40(%rdx), %rbp
-	movq	32(%rdx), %rdx
+	movq	56(%rsi), %rax
+	movq	48(%rsi), %rcx
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %r11
+	movq	(%rsi), %rbx
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rbx
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r11
+	adcq	24(%rdx), %r10
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r8
+	adcq	48(%rdx), %rcx
+	adcq	56(%rdx), %rax
+	movq	%rax, 56(%rdi)
+	movq	%rcx, 48(%rdi)
+	movq	%r8, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%rsi, 8(%rdi)
 	movq	%rbx, (%rdi)
-	movq	%rcx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r12, 24(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 32(%rdi)
-	adcq	%r11, %rbp
-	movq	%rbp, 40(%rdi)
-	adcq	%r10, %rsi
-	movq	%rsi, 48(%rdi)
-	adcq	%r9, %r13
-	movq	%r13, 56(%rdi)
-	adcq	%r8, %r15
-	movq	%r15, 64(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+	setb	%al
+	movzbl	%al, %eax
 	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_subPre9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_subPre8Lbmi2            ## -- Begin function mcl_fp_subPre8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_subPre9Lbmi2:                   ## @mcl_fp_subPre9Lbmi2
-## BB#0:
-	movq	32(%rdx), %r8
-	movq	(%rsi), %rcx
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	movq	%rcx, (%rdi)
-	movq	8(%rsi), %rcx
-	sbbq	8(%rdx), %rcx
-	movq	%rcx, 8(%rdi)
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	%rcx, 16(%rdi)
-	movq	24(%rsi), %rcx
-	sbbq	24(%rdx), %rcx
-	movq	%rcx, 24(%rdi)
-	movq	32(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	40(%rdx), %r8
-	movq	%rcx, 32(%rdi)
-	movq	40(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	48(%rdx), %r8
-	movq	%rcx, 40(%rdi)
-	movq	48(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	56(%rdx), %r8
-	movq	%rcx, 48(%rdi)
+_mcl_fp_subPre8Lbmi2:                   ## @mcl_fp_subPre8Lbmi2
+## %bb.0:
+	pushq	%r14
+	pushq	%rbx
 	movq	56(%rsi), %rcx
-	sbbq	%r8, %rcx
+	movq	48(%rsi), %r8
+	movq	40(%rsi), %r9
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r14
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rbx
+	sbbq	24(%rdx), %r11
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r9
+	sbbq	48(%rdx), %r8
+	sbbq	56(%rdx), %rcx
 	movq	%rcx, 56(%rdi)
-	movq	64(%rdx), %rcx
-	movq	64(%rsi), %rdx
-	sbbq	%rcx, %rdx
-	movq	%rdx, 64(%rdi)
-	sbbq	$0, %rax
+	movq	%r8, 48(%rdi)
+	movq	%r9, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r11, 24(%rdi)
+	movq	%rbx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r14, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
+	popq	%rbx
+	popq	%r14
 	retq
-
-	.globl	_mcl_fp_shr1_9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_shr1_8Lbmi2             ## -- Begin function mcl_fp_shr1_8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_shr1_9Lbmi2:                    ## @mcl_fp_shr1_9Lbmi2
-## BB#0:
+_mcl_fp_shr1_8Lbmi2:                    ## @mcl_fp_shr1_8Lbmi2
+## %bb.0:
 	pushq	%rbx
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %r9
-	movq	48(%rsi), %r10
-	movq	40(%rsi), %r11
-	movq	32(%rsi), %rcx
-	movq	24(%rsi), %rdx
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rbx
-	movq	%rbx, (%rdi)
-	shrdq	$1, %rax, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rdx, %rax
-	movq	%rax, 16(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 24(%rdi)
-	shrdq	$1, %r11, %rcx
-	movq	%rcx, 32(%rdi)
-	shrdq	$1, %r10, %r11
-	movq	%r11, 40(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 48(%rdi)
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %r10
+	movq	24(%rsi), %r11
+	movq	32(%rsi), %rax
+	movq	40(%rsi), %rdx
+	movq	48(%rsi), %rcx
+	movq	56(%rsi), %rsi
+	movq	%rsi, %rbx
+	shrq	%rbx
+	movq	%rbx, 56(%rdi)
+	shldq	$63, %rcx, %rsi
+	movq	%rsi, 48(%rdi)
+	shldq	$63, %rdx, %rcx
+	movq	%rcx, 40(%rdi)
+	shldq	$63, %rax, %rdx
+	movq	%rdx, 32(%rdi)
+	shldq	$63, %r11, %rax
+	movq	%rax, 24(%rdi)
+	shldq	$63, %r10, %r11
+	movq	%r11, 16(%rdi)
+	shldq	$63, %r8, %r10
+	movq	%r10, 8(%rdi)
 	shrdq	$1, %r8, %r9
-	movq	%r9, 56(%rdi)
-	shrq	%r8
-	movq	%r8, 64(%rdi)
+	movq	%r9, (%rdi)
 	popq	%rbx
 	retq
-
-	.globl	_mcl_fp_add9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_add8Lbmi2               ## -- Begin function mcl_fp_add8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_add9Lbmi2:                      ## @mcl_fp_add9Lbmi2
-## BB#0:
-	pushq	%r15
+_mcl_fp_add8Lbmi2:                      ## @mcl_fp_add8Lbmi2
+## %bb.0:
 	pushq	%r14
-	pushq	%r13
-	pushq	%r12
 	pushq	%rbx
-	movq	64(%rdx), %r12
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %r13
+	movq	56(%rsi), %r8
 	movq	48(%rsi), %r9
 	movq	40(%rsi), %r10
-	movq	24(%rsi), %r14
 	movq	32(%rsi), %r11
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %r15
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %r15
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
+	movq	24(%rsi), %r14
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rbx
 	adcq	24(%rdx), %r14
 	adcq	32(%rdx), %r11
 	adcq	40(%rdx), %r10
-	movq	56(%rdx), %rsi
 	adcq	48(%rdx), %r9
-	movq	%rbx, (%rdi)
-	movq	%r15, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r14, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r10, 40(%rdi)
+	adcq	56(%rdx), %r8
+	movq	%r8, 56(%rdi)
 	movq	%r9, 48(%rdi)
-	adcq	%r13, %rsi
-	movq	%rsi, 56(%rdi)
-	adcq	%r12, %r8
-	movq	%r8, 64(%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %rbx
-	sbbq	8(%rcx), %r15
-	sbbq	16(%rcx), %rax
+	movq	%r10, 40(%rdi)
+	movq	%r11, 32(%rdi)
+	movq	%r14, 24(%rdi)
+	movq	%rbx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rax, (%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %rbx
 	sbbq	24(%rcx), %r14
 	sbbq	32(%rcx), %r11
 	sbbq	40(%rcx), %r10
 	sbbq	48(%rcx), %r9
-	sbbq	56(%rcx), %rsi
-	sbbq	64(%rcx), %r8
+	sbbq	56(%rcx), %r8
 	sbbq	$0, %rdx
 	testb	$1, %dl
-	jne	LBB136_2
-## BB#1:                                ## %nocarry
-	movq	%rbx, (%rdi)
-	movq	%r15, 8(%rdi)
-	movq	%rax, 16(%rdi)
+	jne	LBB67_2
+## %bb.1:                               ## %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rbx, 16(%rdi)
 	movq	%r14, 24(%rdi)
 	movq	%r11, 32(%rdi)
 	movq	%r10, 40(%rdi)
 	movq	%r9, 48(%rdi)
-	movq	%rsi, 56(%rdi)
-	movq	%r8, 64(%rdi)
-LBB136_2:                               ## %carry
+	movq	%r8, 56(%rdi)
+LBB67_2:                                ## %carry
 	popq	%rbx
-	popq	%r12
-	popq	%r13
 	popq	%r14
-	popq	%r15
 	retq
-
-	.globl	_mcl_fp_addNF9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_addNF8Lbmi2             ## -- Begin function mcl_fp_addNF8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_addNF9Lbmi2:                    ## @mcl_fp_addNF9Lbmi2
-## BB#0:
+_mcl_fp_addNF8Lbmi2:                    ## @mcl_fp_addNF8Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, %r8
-	movq	64(%rdx), %r10
-	movq	56(%rdx), %r11
+	movq	56(%rdx), %r8
 	movq	48(%rdx), %r9
-	movq	40(%rdx), %rax
-	movq	32(%rdx), %rdi
-	movq	24(%rdx), %rbp
-	movq	16(%rdx), %r15
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %r13
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %r13
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %rbp
-	movq	%rbp, -24(%rsp)         ## 8-byte Spill
-	adcq	32(%rsi), %rdi
-	movq	%rdi, -40(%rsp)         ## 8-byte Spill
-	adcq	40(%rsi), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
+	movq	40(%rdx), %r10
+	movq	32(%rdx), %r11
+	movq	24(%rdx), %r15
+	movq	16(%rdx), %rbx
+	movq	(%rdx), %rax
+	movq	8(%rdx), %rdx
+	addq	(%rsi), %rax
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	adcq	8(%rsi), %rdx
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
+	adcq	16(%rsi), %rbx
+	movq	%rbx, -24(%rsp)                 ## 8-byte Spill
+	adcq	24(%rsi), %r15
+	adcq	32(%rsi), %r11
+	adcq	40(%rsi), %r10
 	adcq	48(%rsi), %r9
-	movq	%r9, %rdi
-	movq	%rdi, -16(%rsp)         ## 8-byte Spill
-	adcq	56(%rsi), %r11
-	movq	%r11, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	adcq	64(%rsi), %r10
-	movq	%r10, %r9
-	movq	%rbx, %rsi
+	adcq	56(%rsi), %r8
+	movq	%rax, %rsi
 	subq	(%rcx), %rsi
-	movq	%r13, %rdx
 	sbbq	8(%rcx), %rdx
-	movq	%r15, %r12
-	sbbq	16(%rcx), %r12
-	sbbq	24(%rcx), %rbp
-	movq	-40(%rsp), %r14         ## 8-byte Reload
-	sbbq	32(%rcx), %r14
-	movq	-32(%rsp), %r11         ## 8-byte Reload
-	sbbq	40(%rcx), %r11
-	movq	%rdi, %r10
-	sbbq	48(%rcx), %r10
-	movq	%rax, %rdi
-	sbbq	56(%rcx), %rdi
-	movq	%r9, %rax
-	sbbq	64(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%rbx, %rsi
-	movq	%rsi, (%r8)
-	cmovsq	%r13, %rdx
-	movq	%rdx, 8(%r8)
-	cmovsq	%r15, %r12
-	movq	%r12, 16(%r8)
-	cmovsq	-24(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, 24(%r8)
-	cmovsq	-40(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%r14, 32(%r8)
-	cmovsq	-32(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%r11, 40(%r8)
-	cmovsq	-16(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%r10, 48(%r8)
-	cmovsq	-8(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, 56(%r8)
-	cmovsq	%r9, %rax
-	movq	%rax, 64(%r8)
+	sbbq	16(%rcx), %rbx
+	movq	%r15, %rax
+	sbbq	24(%rcx), %rax
+	movq	%r11, %rbp
+	sbbq	32(%rcx), %rbp
+	movq	%r10, %r14
+	sbbq	40(%rcx), %r14
+	movq	%r9, %r12
+	sbbq	48(%rcx), %r12
+	movq	%r8, %r13
+	sbbq	56(%rcx), %r13
+	cmovsq	%r8, %r13
+	movq	%r13, 56(%rdi)
+	cmovsq	%r9, %r12
+	movq	%r12, 48(%rdi)
+	cmovsq	%r10, %r14
+	movq	%r14, 40(%rdi)
+	cmovsq	%r11, %rbp
+	movq	%rbp, 32(%rdi)
+	cmovsq	%r15, %rax
+	movq	%rax, 24(%rdi)
+	cmovsq	-24(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%rbx, 16(%rdi)
+	cmovsq	-16(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	%rdx, 8(%rdi)
+	cmovsq	-8(%rsp), %rsi                  ## 8-byte Folded Reload
+	movq	%rsi, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13395,180 +6435,129 @@ _mcl_fp_addNF9Lbmi2:                    ## @mcl_fp_addNF9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_sub9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_sub8Lbmi2               ## -- Begin function mcl_fp_sub8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_sub9Lbmi2:                      ## @mcl_fp_sub9Lbmi2
-## BB#0:
+_mcl_fp_sub8Lbmi2:                      ## @mcl_fp_sub8Lbmi2
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
-	pushq	%r13
-	pushq	%r12
 	pushq	%rbx
-	movq	64(%rdx), %r13
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r9
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r9
-	movq	16(%rsi), %r10
-	sbbq	16(%rdx), %r10
-	movq	24(%rsi), %r11
-	sbbq	24(%rdx), %r11
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	40(%rsi), %r14
-	sbbq	40(%rdx), %r14
-	movq	48(%rsi), %r15
-	sbbq	48(%rdx), %r15
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %rsi
-	sbbq	56(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r9, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r12, 32(%rdi)
-	movq	%r14, 40(%rdi)
-	movq	%r15, 48(%rdi)
-	movq	%rsi, 56(%rdi)
-	sbbq	%r13, %r8
-	movq	%r8, 64(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	LBB138_2
-## BB#1:                                ## %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	movq	8(%rcx), %rax
-	adcq	%r9, %rax
-	movq	%rax, 8(%rdi)
-	movq	16(%rcx), %rax
-	adcq	%r10, %rax
-	movq	%rax, 16(%rdi)
-	movq	24(%rcx), %rax
-	adcq	%r11, %rax
-	movq	%rax, 24(%rdi)
-	movq	32(%rcx), %rax
-	adcq	%r12, %rax
-	movq	%rax, 32(%rdi)
-	movq	40(%rcx), %rax
-	adcq	%r14, %rax
-	movq	%rax, 40(%rdi)
-	movq	48(%rcx), %rax
-	adcq	%r15, %rax
-	movq	%rax, 48(%rdi)
-	movq	56(%rcx), %rax
-	adcq	%rsi, %rax
-	movq	%rax, 56(%rdi)
-	movq	64(%rcx), %rax
-	adcq	%r8, %rax
-	movq	%rax, 64(%rdi)
-LBB138_2:                               ## %nocarry
+	movq	56(%rsi), %r14
+	movq	48(%rsi), %rbx
+	movq	40(%rsi), %r11
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r15
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %r15
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r11
+	sbbq	48(%rdx), %rbx
+	sbbq	56(%rdx), %r14
+	movq	%r14, 56(%rdi)
+	movq	%rbx, 48(%rdi)
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r15, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
+	testb	$1, %al
+	je	LBB69_2
+## %bb.1:                               ## %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %r15
+	adcq	24(%rcx), %r9
+	adcq	32(%rcx), %r10
+	adcq	40(%rcx), %r11
+	adcq	48(%rcx), %rbx
+	adcq	56(%rcx), %r14
+	movq	%r14, 56(%rdi)
+	movq	%rbx, 48(%rdi)
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r15, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+LBB69_2:                                ## %nocarry
 	popq	%rbx
-	popq	%r12
-	popq	%r13
 	popq	%r14
 	popq	%r15
 	retq
-
-	.globl	_mcl_fp_subNF9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fp_subNF8Lbmi2             ## -- Begin function mcl_fp_subNF8Lbmi2
 	.p2align	4, 0x90
-_mcl_fp_subNF9Lbmi2:                    ## @mcl_fp_subNF9Lbmi2
-## BB#0:
+_mcl_fp_subNF8Lbmi2:                    ## @mcl_fp_subNF8Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r10
-	movq	%rdi, %rbx
-	movq	64(%rsi), %r11
-	movdqu	(%rdx), %xmm1
-	movdqu	16(%rdx), %xmm2
-	movdqu	32(%rdx), %xmm3
-	movdqu	48(%rdx), %xmm4
-	pshufd	$78, %xmm4, %xmm0       ## xmm0 = xmm4[2,3,0,1]
-	movd	%xmm0, %r8
-	movdqu	(%rsi), %xmm5
-	movdqu	16(%rsi), %xmm6
-	movdqu	32(%rsi), %xmm7
-	movdqu	48(%rsi), %xmm8
-	pshufd	$78, %xmm8, %xmm0       ## xmm0 = xmm8[2,3,0,1]
-	movd	%xmm0, %rax
-	movd	%xmm4, %r9
-	pshufd	$78, %xmm3, %xmm0       ## xmm0 = xmm3[2,3,0,1]
-	movd	%xmm0, %rdi
-	pshufd	$78, %xmm7, %xmm0       ## xmm0 = xmm7[2,3,0,1]
-	movd	%xmm3, %rcx
-	pshufd	$78, %xmm2, %xmm3       ## xmm3 = xmm2[2,3,0,1]
-	movd	%xmm3, %rbp
-	pshufd	$78, %xmm6, %xmm3       ## xmm3 = xmm6[2,3,0,1]
-	movd	%xmm2, %r13
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r12
-	pshufd	$78, %xmm5, %xmm2       ## xmm2 = xmm5[2,3,0,1]
-	movd	%xmm1, %rsi
-	movd	%xmm5, %r15
-	subq	%rsi, %r15
-	movd	%xmm2, %r14
-	sbbq	%r12, %r14
-	movd	%xmm6, %r12
-	sbbq	%r13, %r12
-	movd	%xmm3, %r13
-	sbbq	%rbp, %r13
-	movd	%xmm7, %rsi
-	sbbq	%rcx, %rsi
-	movq	%rsi, -16(%rsp)         ## 8-byte Spill
-	movd	%xmm0, %rcx
-	sbbq	%rdi, %rcx
-	movq	%rcx, -24(%rsp)         ## 8-byte Spill
-	movd	%xmm8, %rcx
-	sbbq	%r9, %rcx
-	movq	%rcx, -32(%rsp)         ## 8-byte Spill
-	sbbq	%r8, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	sbbq	64(%rdx), %r11
-	movq	%r11, -40(%rsp)         ## 8-byte Spill
-	movq	%r11, %rdx
-	sarq	$63, %rdx
-	movq	%rdx, %rbp
-	shldq	$1, %r11, %rbp
-	movq	24(%r10), %r9
-	andq	%rbp, %r9
-	movq	8(%r10), %rdi
-	andq	%rbp, %rdi
-	andq	(%r10), %rbp
-	movq	64(%r10), %r11
-	andq	%rdx, %r11
-	rorxq	$63, %rdx, %rax
-	andq	56(%r10), %rdx
-	movq	48(%r10), %r8
-	andq	%rax, %r8
-	movq	40(%r10), %rsi
-	andq	%rax, %rsi
-	movq	32(%r10), %rcx
-	andq	%rax, %rcx
-	andq	16(%r10), %rax
-	addq	%r15, %rbp
-	adcq	%r14, %rdi
-	movq	%rbp, (%rbx)
+	movq	%rcx, %r8
+	movq	%rdi, %r9
+	movq	56(%rsi), %r14
+	movq	48(%rsi), %rax
+	movq	40(%rsi), %rcx
+	movq	32(%rsi), %rdi
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %r15
+	movq	(%rsi), %r13
+	movq	8(%rsi), %r12
+	subq	(%rdx), %r13
+	sbbq	8(%rdx), %r12
+	sbbq	16(%rdx), %r15
+	sbbq	24(%rdx), %r11
+	sbbq	32(%rdx), %rdi
+	movq	%rdi, -24(%rsp)                 ## 8-byte Spill
+	sbbq	40(%rdx), %rcx
+	movq	%rcx, -16(%rsp)                 ## 8-byte Spill
+	sbbq	48(%rdx), %rax
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	sbbq	56(%rdx), %r14
+	movq	%r14, %rsi
+	sarq	$63, %rsi
+	movq	56(%r8), %r10
+	andq	%rsi, %r10
+	movq	48(%r8), %rbx
+	andq	%rsi, %rbx
+	movq	40(%r8), %rdi
+	andq	%rsi, %rdi
+	movq	32(%r8), %rbp
+	andq	%rsi, %rbp
+	movq	24(%r8), %rdx
+	andq	%rsi, %rdx
+	movq	16(%r8), %rcx
+	andq	%rsi, %rcx
+	movq	8(%r8), %rax
+	andq	%rsi, %rax
+	andq	(%r8), %rsi
+	addq	%r13, %rsi
 	adcq	%r12, %rax
-	movq	%rdi, 8(%rbx)
-	adcq	%r13, %r9
-	movq	%rax, 16(%rbx)
-	movq	%r9, 24(%rbx)
-	adcq	-16(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, 32(%rbx)
-	adcq	-24(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, 40(%rbx)
-	adcq	-32(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, 48(%rbx)
-	adcq	-8(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 56(%rbx)
-	adcq	-40(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%r11, 64(%rbx)
+	movq	%rsi, (%r9)
+	adcq	%r15, %rcx
+	movq	%rax, 8(%r9)
+	movq	%rcx, 16(%r9)
+	adcq	%r11, %rdx
+	movq	%rdx, 24(%r9)
+	adcq	-24(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%rbp, 32(%r9)
+	adcq	-16(%rsp), %rdi                 ## 8-byte Folded Reload
+	movq	%rdi, 40(%r9)
+	adcq	-8(%rsp), %rbx                  ## 8-byte Folded Reload
+	movq	%rbx, 48(%r9)
+	adcq	%r14, %r10
+	movq	%r10, 56(%r9)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13576,11 +6565,11 @@ _mcl_fp_subNF9Lbmi2:                    ## @mcl_fp_subNF9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_add9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_add8Lbmi2            ## -- Begin function mcl_fpDbl_add8Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_add9Lbmi2:                   ## @mcl_fpDbl_add9Lbmi2
-## BB#0:
+_mcl_fpDbl_add8Lbmi2:                   ## @mcl_fpDbl_add8Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
@@ -13588,111 +6577,103 @@ _mcl_fpDbl_add9Lbmi2:                   ## @mcl_fpDbl_add9Lbmi2
 	pushq	%r12
 	pushq	%rbx
 	movq	%rcx, %r15
-	movq	136(%rdx), %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	128(%rdx), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	movq	120(%rdx), %r10
-	movq	112(%rdx), %r11
-	movq	24(%rsi), %rcx
-	movq	32(%rsi), %r14
-	movq	16(%rdx), %rbp
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %rbp
-	adcq	24(%rdx), %rcx
-	adcq	32(%rdx), %r14
-	movq	104(%rdx), %r9
-	movq	96(%rdx), %r13
-	movq	%rax, (%rdi)
-	movq	88(%rdx), %r8
-	movq	%rbx, 8(%rdi)
-	movq	80(%rdx), %r12
-	movq	%rbp, 16(%rdi)
-	movq	40(%rdx), %rax
-	movq	%rcx, 24(%rdi)
-	movq	40(%rsi), %rbp
-	adcq	%rax, %rbp
-	movq	48(%rdx), %rcx
-	movq	%r14, 32(%rdi)
-	movq	48(%rsi), %rax
-	adcq	%rcx, %rax
-	movq	56(%rdx), %r14
-	movq	%rbp, 40(%rdi)
-	movq	56(%rsi), %rbp
-	adcq	%r14, %rbp
-	movq	72(%rdx), %rcx
-	movq	64(%rdx), %rdx
-	movq	%rax, 48(%rdi)
+	movq	120(%rsi), %rax
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	movq	112(%rsi), %rax
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	104(%rsi), %rax
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	96(%rsi), %rbx
+	movq	88(%rsi), %rcx
+	movq	80(%rsi), %r8
+	movq	72(%rsi), %r10
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rbp
+	addq	(%rdx), %rax
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	adcq	8(%rdx), %rbp
+	movq	%rbp, -16(%rsp)                 ## 8-byte Spill
 	movq	64(%rsi), %rax
-	adcq	%rdx, %rax
-	movq	136(%rsi), %rbx
+	movq	56(%rsi), %rbp
+	movq	48(%rsi), %r13
+	movq	40(%rsi), %r14
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %r12
+	adcq	16(%rdx), %r12
+	adcq	24(%rdx), %r11
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r14
+	adcq	48(%rdx), %r13
+	adcq	56(%rdx), %rbp
+	adcq	64(%rdx), %rax
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	adcq	72(%rdx), %r10
+	movq	%r8, %rax
+	adcq	80(%rdx), %rax
+	movq	%rax, -24(%rsp)                 ## 8-byte Spill
+	adcq	88(%rdx), %rcx
+	movq	%rcx, -32(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rsi
+	adcq	96(%rdx), %rsi
+	movq	%rsi, -40(%rsp)                 ## 8-byte Spill
+	movq	-56(%rsp), %r8                  ## 8-byte Reload
+	adcq	104(%rdx), %r8
+	movq	%r8, -56(%rsp)                  ## 8-byte Spill
+	movq	-64(%rsp), %rbx                 ## 8-byte Reload
+	adcq	112(%rdx), %rbx
+	movq	%rbx, -64(%rsp)                 ## 8-byte Spill
+	movq	-72(%rsp), %r8                  ## 8-byte Reload
+	adcq	120(%rdx), %r8
 	movq	%rbp, 56(%rdi)
-	movq	72(%rsi), %rbp
-	adcq	%rcx, %rbp
-	movq	128(%rsi), %rcx
-	movq	%rax, 64(%rdi)
-	movq	80(%rsi), %rdx
-	adcq	%r12, %rdx
-	movq	88(%rsi), %r12
-	adcq	%r8, %r12
-	movq	96(%rsi), %r14
-	adcq	%r13, %r14
-	movq	%r14, -8(%rsp)          ## 8-byte Spill
-	movq	104(%rsi), %rax
-	adcq	%r9, %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	120(%rsi), %rax
-	movq	112(%rsi), %rsi
-	adcq	%r11, %rsi
-	movq	%rsi, -24(%rsp)         ## 8-byte Spill
-	adcq	%r10, %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	adcq	-40(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -40(%rsp)         ## 8-byte Spill
-	adcq	-48(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, -48(%rsp)         ## 8-byte Spill
-	sbbq	%r9, %r9
-	andl	$1, %r9d
-	movq	%rbp, %r10
-	subq	(%r15), %r10
-	movq	%rdx, %r11
-	sbbq	8(%r15), %r11
-	movq	%r12, %rbx
-	sbbq	16(%r15), %rbx
-	sbbq	24(%r15), %r14
-	movq	-32(%rsp), %r13         ## 8-byte Reload
-	sbbq	32(%r15), %r13
-	movq	-24(%rsp), %rsi         ## 8-byte Reload
-	sbbq	40(%r15), %rsi
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	sbbq	48(%r15), %rax
-	sbbq	56(%r15), %rcx
-	movq	-48(%rsp), %r8          ## 8-byte Reload
-	sbbq	64(%r15), %r8
-	sbbq	$0, %r9
-	andl	$1, %r9d
-	cmovneq	%rbp, %r10
-	movq	%r10, 72(%rdi)
-	testb	%r9b, %r9b
-	cmovneq	%rdx, %r11
+	movq	%r13, 48(%rdi)
+	movq	%r14, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r11, 24(%rdi)
+	movq	%r12, 16(%rdi)
+	movq	-16(%rsp), %rdx                 ## 8-byte Reload
+	movq	%rdx, 8(%rdi)
+	movq	-8(%rsp), %rdx                  ## 8-byte Reload
+	movq	%rdx, (%rdi)
+	setb	-72(%rsp)                       ## 1-byte Folded Spill
+	movq	-48(%rsp), %r14                 ## 8-byte Reload
+	subq	(%r15), %r14
+	movq	%r10, %r9
+	movq	%r10, %r13
+	sbbq	8(%r15), %r9
+	movq	%rax, %r11
+	sbbq	16(%r15), %r11
+	movq	%rcx, %rbp
+	sbbq	24(%r15), %rbp
+	movq	%rsi, %rbx
+	sbbq	32(%r15), %rbx
+	movq	-56(%rsp), %r12                 ## 8-byte Reload
+	movq	%r12, %rax
+	sbbq	40(%r15), %rax
+	movq	-64(%rsp), %r10                 ## 8-byte Reload
+	movq	%r10, %rdx
+	sbbq	48(%r15), %rdx
+	movq	%r8, %rsi
+	sbbq	56(%r15), %rsi
+	movzbl	-72(%rsp), %ecx                 ## 1-byte Folded Reload
+	sbbq	$0, %rcx
+	testb	$1, %cl
+	cmovneq	%r8, %rsi
+	movq	%rsi, 120(%rdi)
+	cmovneq	%r10, %rdx
+	movq	%rdx, 112(%rdi)
+	cmovneq	%r12, %rax
+	movq	%rax, 104(%rdi)
+	cmovneq	-40(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%rbx, 96(%rdi)
+	cmovneq	-32(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%rbp, 88(%rdi)
+	cmovneq	-24(%rsp), %r11                 ## 8-byte Folded Reload
 	movq	%r11, 80(%rdi)
-	cmovneq	%r12, %rbx
-	movq	%rbx, 88(%rdi)
-	cmovneq	-8(%rsp), %r14          ## 8-byte Folded Reload
-	movq	%r14, 96(%rdi)
-	cmovneq	-32(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%r13, 104(%rdi)
-	cmovneq	-24(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, 112(%rdi)
-	cmovneq	-16(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 120(%rdi)
-	cmovneq	-40(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, 128(%rdi)
-	cmovneq	-48(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, 136(%rdi)
+	cmovneq	%r13, %r9
+	movq	%r9, 72(%rdi)
+	cmovneq	-48(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	%r14, 64(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13700,124 +6681,109 @@ _mcl_fpDbl_add9Lbmi2:                   ## @mcl_fpDbl_add9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sub9Lbmi2
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sub8Lbmi2            ## -- Begin function mcl_fpDbl_sub8Lbmi2
 	.p2align	4, 0x90
-_mcl_fpDbl_sub9Lbmi2:                   ## @mcl_fpDbl_sub9Lbmi2
-## BB#0:
+_mcl_fpDbl_sub8Lbmi2:                   ## @mcl_fpDbl_sub8Lbmi2
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r14
-	movq	136(%rdx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	128(%rdx), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	120(%rdx), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %r11
-	movq	(%rsi), %r12
-	movq	8(%rsi), %r13
-	xorl	%r9d, %r9d
-	subq	(%rdx), %r12
-	sbbq	8(%rdx), %r13
-	sbbq	16(%rdx), %r11
+	movq	%rcx, %r11
+	movq	120(%rsi), %rax
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	112(%rsi), %r12
+	movq	104(%rsi), %r15
+	movq	96(%rsi), %rax
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	movq	88(%rsi), %r13
+	movq	80(%rsi), %rax
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %rbp
+	xorl	%eax, %eax
+	subq	(%rdx), %rcx
+	movq	%rcx, -32(%rsp)                 ## 8-byte Spill
+	sbbq	8(%rdx), %rbp
+	movq	%rbp, -40(%rsp)                 ## 8-byte Spill
+	movq	72(%rsi), %rbp
+	movq	64(%rsi), %rcx
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %r9
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r14
 	movq	24(%rsi), %rbx
+	movq	16(%rsi), %rsi
+	sbbq	16(%rdx), %rsi
 	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %rbp
-	sbbq	32(%rdx), %rbp
-	movq	112(%rdx), %r10
-	movq	104(%rdx), %rcx
-	movq	%r12, (%rdi)
-	movq	96(%rdx), %rax
-	movq	%r13, 8(%rdi)
-	movq	88(%rdx), %r13
-	movq	%r11, 16(%rdi)
-	movq	40(%rdx), %r11
+	sbbq	32(%rdx), %r14
+	sbbq	40(%rdx), %r10
+	sbbq	48(%rdx), %r9
+	sbbq	56(%rdx), %r8
+	sbbq	64(%rdx), %rcx
+	movq	%rcx, -24(%rsp)                 ## 8-byte Spill
+	sbbq	72(%rdx), %rbp
+	movq	%rbp, -16(%rsp)                 ## 8-byte Spill
+	movq	-56(%rsp), %rbp                 ## 8-byte Reload
+	sbbq	80(%rdx), %rbp
+	movq	%rbp, -56(%rsp)                 ## 8-byte Spill
+	sbbq	88(%rdx), %r13
+	movq	%r13, -8(%rsp)                  ## 8-byte Spill
+	movq	-48(%rsp), %r13                 ## 8-byte Reload
+	sbbq	96(%rdx), %r13
+	movq	%r13, -48(%rsp)                 ## 8-byte Spill
+	sbbq	104(%rdx), %r15
+	sbbq	112(%rdx), %r12
+	movq	-64(%rsp), %rcx                 ## 8-byte Reload
+	sbbq	120(%rdx), %rcx
+	movq	%rcx, -64(%rsp)                 ## 8-byte Spill
+	movq	%r8, 56(%rdi)
+	movq	%r9, 48(%rdi)
+	movq	%r10, 40(%rdi)
+	movq	%r14, 32(%rdi)
 	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	48(%rdx), %r11
-	movq	%rbp, 32(%rdi)
-	movq	48(%rsi), %rbp
-	sbbq	%r11, %rbp
-	movq	56(%rdx), %r11
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	64(%rdx), %r11
-	movq	%rbp, 48(%rdi)
-	movq	64(%rsi), %rbp
-	sbbq	%r11, %rbp
-	movq	80(%rdx), %r8
-	movq	72(%rdx), %r11
-	movq	%rbx, 56(%rdi)
-	movq	72(%rsi), %r15
-	sbbq	%r11, %r15
-	movq	136(%rsi), %rdx
-	movq	%rbp, 64(%rdi)
-	movq	80(%rsi), %rbp
-	sbbq	%r8, %rbp
-	movq	88(%rsi), %r12
-	sbbq	%r13, %r12
-	movq	96(%rsi), %r13
-	sbbq	%rax, %r13
-	movq	104(%rsi), %rax
-	sbbq	%rcx, %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	112(%rsi), %rax
-	sbbq	%r10, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	128(%rsi), %rax
-	movq	120(%rsi), %rcx
-	sbbq	-40(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -40(%rsp)         ## 8-byte Spill
-	sbbq	-32(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	sbbq	-24(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	movl	$0, %r8d
-	sbbq	$0, %r8
-	andl	$1, %r8d
-	movq	(%r14), %r10
-	cmoveq	%r9, %r10
-	testb	%r8b, %r8b
-	movq	16(%r14), %r8
-	cmoveq	%r9, %r8
-	movq	8(%r14), %rdx
-	cmoveq	%r9, %rdx
-	movq	64(%r14), %rbx
-	cmoveq	%r9, %rbx
-	movq	56(%r14), %r11
-	cmoveq	%r9, %r11
-	movq	48(%r14), %rsi
-	cmoveq	%r9, %rsi
-	movq	40(%r14), %rcx
-	cmoveq	%r9, %rcx
-	movq	32(%r14), %rax
-	cmoveq	%r9, %rax
-	cmovneq	24(%r14), %r9
-	addq	%r15, %r10
-	adcq	%rbp, %rdx
-	movq	%r10, 72(%rdi)
-	adcq	%r12, %r8
-	movq	%rdx, 80(%rdi)
-	adcq	%r13, %r9
-	movq	%r8, 88(%rdi)
-	movq	%r9, 96(%rdi)
-	adcq	-16(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 104(%rdi)
-	adcq	-8(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 112(%rdi)
-	adcq	-40(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, 120(%rdi)
-	adcq	-32(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%r11, 128(%rdi)
-	adcq	-24(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, 136(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	-40(%rsp), %rcx                 ## 8-byte Reload
+	movq	%rcx, 8(%rdi)
+	movq	-32(%rsp), %rcx                 ## 8-byte Reload
+	movq	%rcx, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	negq	%rax
+	movq	56(%r11), %r8
+	andq	%rax, %r8
+	movq	48(%r11), %r9
+	andq	%rax, %r9
+	movq	40(%r11), %r10
+	andq	%rax, %r10
+	movq	32(%r11), %rbx
+	andq	%rax, %rbx
+	movq	24(%r11), %rdx
+	andq	%rax, %rdx
+	movq	16(%r11), %rsi
+	andq	%rax, %rsi
+	movq	8(%r11), %rbp
+	andq	%rax, %rbp
+	andq	(%r11), %rax
+	addq	-24(%rsp), %rax                 ## 8-byte Folded Reload
+	adcq	-16(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%rax, 64(%rdi)
+	adcq	-56(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	%rbp, 72(%rdi)
+	movq	%rsi, 80(%rdi)
+	adcq	-8(%rsp), %rdx                  ## 8-byte Folded Reload
+	movq	%rdx, 88(%rdi)
+	adcq	-48(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%rbx, 96(%rdi)
+	adcq	%r15, %r10
+	movq	%r10, 104(%rdi)
+	adcq	%r12, %r9
+	movq	%r9, 112(%rdi)
+	adcq	-64(%rsp), %r8                  ## 8-byte Folded Reload
+	movq	%r8, 120(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13825,6 +6791,5 @@ _mcl_fpDbl_sub9Lbmi2:                   ## @mcl_fpDbl_sub9Lbmi2
 	popq	%r15
 	popq	%rbp
 	retq
-
-
+                                        ## -- End function
 .subsections_via_symbols
diff --git a/src/asm/x86-64mac.s b/src/asm/x86-64mac.s
index 0dc7014a..f1a38798 100644
--- a/src/asm/x86-64mac.s
+++ b/src/asm/x86-64mac.s
@@ -1,73 +1,75 @@
 	.section	__TEXT,__text,regular,pure_instructions
-	.macosx_version_min 10, 12
-	.globl	_makeNIST_P192L
+	.build_version macos, 11, 0
+	.globl	_makeNIST_P192L                 ## -- Begin function makeNIST_P192L
 	.p2align	4, 0x90
 _makeNIST_P192L:                        ## @makeNIST_P192L
-## BB#0:
+## %bb.0:
 	movq	$-1, %rax
 	movq	$-2, %rdx
 	movq	$-1, %rcx
 	retq
-
-	.globl	_mcl_fpDbl_mod_NIST_P192L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mod_NIST_P192L       ## -- Begin function mcl_fpDbl_mod_NIST_P192L
 	.p2align	4, 0x90
 _mcl_fpDbl_mod_NIST_P192L:              ## @mcl_fpDbl_mod_NIST_P192L
-## BB#0:
+## %bb.0:
 	pushq	%r14
 	pushq	%rbx
-	movq	16(%rsi), %r10
+	movq	16(%rsi), %rbx
 	movq	24(%rsi), %r8
 	movq	40(%rsi), %r9
-	movq	8(%rsi), %rax
-	addq	%r9, %rax
-	adcq	$0, %r10
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
+	movq	8(%rsi), %rdx
+	addq	%r9, %rdx
+	adcq	$0, %rbx
+	setb	%cl
+	movzbl	%cl, %r10d
 	movq	32(%rsi), %r11
 	movq	(%rsi), %r14
 	addq	%r8, %r14
-	adcq	%r11, %rax
-	adcq	%r9, %r10
-	adcq	$0, %rcx
-	addq	%r9, %r14
-	adcq	%r8, %rax
-	adcq	%r11, %r10
-	adcq	$0, %rcx
-	addq	%rcx, %r14
-	adcq	%rax, %rcx
+	adcq	%r11, %rdx
+	adcq	%r9, %rbx
 	adcq	$0, %r10
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r14, %rsi
-	addq	$1, %rsi
-	movq	%rcx, %rdx
-	adcq	$1, %rdx
-	movq	%r10, %rbx
+	addq	%r9, %r14
+	adcq	%r8, %rdx
+	adcq	%r11, %rbx
+	setb	%r8b
+	movq	%r10, %r9
+	adcq	$0, %r9
+	addb	$255, %r8b
+	adcq	%r10, %r14
+	adcq	%rdx, %r9
 	adcq	$0, %rbx
-	adcq	$-1, %rax
-	andl	$1, %eax
-	cmovneq	%r14, %rsi
-	movq	%rsi, (%rdi)
-	testb	%al, %al
-	cmovneq	%rcx, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovneq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	movq	%r14, %rcx
+	addq	$1, %rcx
+	movq	%r9, %rsi
+	adcq	$1, %rsi
+	movq	%rbx, %rax
+	adcq	$0, %rax
+	adcq	$-1, %rdx
+	testb	$1, %dl
+	cmovneq	%rbx, %rax
+	movq	%rax, 16(%rdi)
+	cmovneq	%r9, %rsi
+	movq	%rsi, 8(%rdi)
+	cmovneq	%r14, %rcx
+	movq	%rcx, (%rdi)
 	popq	%rbx
 	popq	%r14
 	retq
-
-	.globl	_mcl_fp_sqr_NIST_P192L
+                                        ## -- End function
+	.globl	_mcl_fp_sqr_NIST_P192L          ## -- Begin function mcl_fp_sqr_NIST_P192L
 	.p2align	4, 0x90
 _mcl_fp_sqr_NIST_P192L:                 ## @mcl_fp_sqr_NIST_P192L
-## BB#0:
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
 	movq	16(%rsi), %r11
 	movq	(%rsi), %rbx
 	movq	8(%rsi), %rcx
@@ -83,7 +85,7 @@ _mcl_fp_sqr_NIST_P192L:                 ## @mcl_fp_sqr_NIST_P192L
 	mulq	%rbx
 	movq	%rax, %r13
 	movq	%rdx, %rcx
-	addq	%rcx, %r12
+	addq	%rdx, %r12
 	adcq	%r14, %r15
 	movq	%rdi, %r10
 	adcq	$0, %r10
@@ -114,37 +116,39 @@ _mcl_fp_sqr_NIST_P192L:                 ## @mcl_fp_sqr_NIST_P192L
 	adcq	$0, %rdx
 	addq	%rdx, %rsi
 	adcq	$0, %rcx
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
+	setb	%bl
+	movzbl	%bl, %edi
 	addq	%r9, %r8
 	adcq	%rax, %rsi
 	adcq	%rdx, %rcx
-	adcq	$0, %rbp
+	adcq	$0, %rdi
 	addq	%rdx, %r8
 	adcq	%r9, %rsi
 	adcq	%rax, %rcx
-	adcq	$0, %rbp
-	addq	%rbp, %r8
-	adcq	%rsi, %rbp
+	setb	%al
+	movq	%rdi, %rdx
+	adcq	$0, %rdx
+	addb	$255, %al
+	adcq	%rdi, %r8
+	adcq	%rsi, %rdx
 	adcq	$0, %rcx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r8, %rdx
-	addq	$1, %rdx
-	movq	%rbp, %rsi
-	adcq	$1, %rsi
-	movq	%rcx, %rdi
-	adcq	$0, %rdi
+	setb	%al
+	movzbl	%al, %eax
+	movq	%r8, %rsi
+	addq	$1, %rsi
+	movq	%rdx, %rdi
+	adcq	$1, %rdi
+	movq	%rcx, %rbp
+	adcq	$0, %rbp
 	adcq	$-1, %rax
-	andl	$1, %eax
-	cmovneq	%r8, %rdx
-	movq	-8(%rsp), %rbx          ## 8-byte Reload
-	movq	%rdx, (%rbx)
-	testb	%al, %al
-	cmovneq	%rbp, %rsi
-	movq	%rsi, 8(%rbx)
-	cmovneq	%rcx, %rdi
-	movq	%rdi, 16(%rbx)
+	testb	$1, %al
+	cmovneq	%rcx, %rbp
+	movq	-8(%rsp), %rax                  ## 8-byte Reload
+	movq	%rbp, 16(%rax)
+	cmovneq	%rdx, %rdi
+	movq	%rdi, 8(%rax)
+	cmovneq	%r8, %rsi
+	movq	%rsi, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -152,64 +156,66 @@ _mcl_fp_sqr_NIST_P192L:                 ## @mcl_fp_sqr_NIST_P192L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_mulNIST_P192L
+                                        ## -- End function
+	.globl	_mcl_fp_mulNIST_P192L           ## -- Begin function mcl_fp_mulNIST_P192L
 	.p2align	4, 0x90
 _mcl_fp_mulNIST_P192L:                  ## @mcl_fp_mulNIST_P192L
-## BB#0:
+## %bb.0:
 	pushq	%r14
 	pushq	%rbx
 	subq	$56, %rsp
 	movq	%rdi, %r14
 	leaq	8(%rsp), %rdi
 	callq	_mcl_fpDbl_mulPre3L
-	movq	24(%rsp), %r9
+	movq	24(%rsp), %rbx
 	movq	32(%rsp), %r8
-	movq	48(%rsp), %rdi
-	movq	16(%rsp), %rbx
-	addq	%rdi, %rbx
-	adcq	$0, %r9
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	movq	40(%rsp), %rsi
-	movq	8(%rsp), %rdx
-	addq	%r8, %rdx
-	adcq	%rsi, %rbx
-	adcq	%rdi, %r9
+	movq	48(%rsp), %rax
+	movq	16(%rsp), %rdi
+	addq	%rax, %rdi
+	adcq	$0, %rbx
+	setb	%cl
+	movzbl	%cl, %esi
+	movq	40(%rsp), %rdx
+	movq	8(%rsp), %r9
+	addq	%r8, %r9
+	adcq	%rdx, %rdi
+	adcq	%rax, %rbx
+	adcq	$0, %rsi
+	addq	%rax, %r9
+	adcq	%r8, %rdi
+	adcq	%rdx, %rbx
+	setb	%dl
+	movq	%rsi, %rcx
 	adcq	$0, %rcx
-	addq	%rdi, %rdx
-	adcq	%r8, %rbx
+	addb	$255, %dl
 	adcq	%rsi, %r9
-	adcq	$0, %rcx
-	addq	%rcx, %rdx
-	adcq	%rbx, %rcx
-	adcq	$0, %r9
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rdx, %rdi
+	adcq	%rdi, %rcx
+	adcq	$0, %rbx
+	setb	%dl
+	movzbl	%dl, %edx
+	movq	%r9, %rdi
 	addq	$1, %rdi
-	movq	%rcx, %rbx
-	adcq	$1, %rbx
-	movq	%r9, %rax
+	movq	%rcx, %rsi
+	adcq	$1, %rsi
+	movq	%rbx, %rax
 	adcq	$0, %rax
-	adcq	$-1, %rsi
-	andl	$1, %esi
-	cmovneq	%rdx, %rdi
-	movq	%rdi, (%r14)
-	testb	%sil, %sil
-	cmovneq	%rcx, %rbx
-	movq	%rbx, 8(%r14)
-	cmovneq	%r9, %rax
+	adcq	$-1, %rdx
+	testb	$1, %dl
+	cmovneq	%rbx, %rax
 	movq	%rax, 16(%r14)
+	cmovneq	%rcx, %rsi
+	movq	%rsi, 8(%r14)
+	cmovneq	%r9, %rdi
+	movq	%rdi, (%r14)
 	addq	$56, %rsp
 	popq	%rbx
 	popq	%r14
 	retq
-
-	.globl	_mcl_fpDbl_mod_NIST_P521L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mod_NIST_P521L       ## -- Begin function mcl_fpDbl_mod_NIST_P521L
 	.p2align	4, 0x90
 _mcl_fpDbl_mod_NIST_P521L:              ## @mcl_fpDbl_mod_NIST_P521L
-## BB#0:
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r12
@@ -233,8 +239,8 @@ _mcl_fpDbl_mod_NIST_P521L:              ## @mcl_fpDbl_mod_NIST_P521L
 	shldq	$55, %rax, %rcx
 	shrq	$9, %r14
 	shldq	$55, %rbx, %rax
-                                        ## kill: %EBX<def> %EBX<kill> %RBX<kill> %RBX<def>
-	andl	$511, %ebx              ## imm = 0x1FF
+	movl	%ebx, %edx
+	andl	$511, %edx                      ## imm = 0x1FF
 	addq	(%rsi), %rax
 	adcq	8(%rsi), %rcx
 	adcq	16(%rsi), %r12
@@ -243,8 +249,8 @@ _mcl_fpDbl_mod_NIST_P521L:              ## @mcl_fpDbl_mod_NIST_P521L
 	adcq	40(%rsi), %r10
 	adcq	48(%rsi), %r9
 	adcq	56(%rsi), %r8
-	adcq	%r14, %rbx
-	movl	%ebx, %esi
+	adcq	%r14, %rdx
+	movl	%edx, %esi
 	shrl	$9, %esi
 	andl	$1, %esi
 	addq	%rax, %rsi
@@ -255,7 +261,7 @@ _mcl_fpDbl_mod_NIST_P521L:              ## @mcl_fpDbl_mod_NIST_P521L
 	adcq	$0, %r10
 	adcq	$0, %r9
 	adcq	$0, %r8
-	adcq	$0, %rbx
+	adcq	$0, %rdx
 	movq	%rsi, %rax
 	andq	%r12, %rax
 	andq	%r15, %rax
@@ -263,23 +269,23 @@ _mcl_fpDbl_mod_NIST_P521L:              ## @mcl_fpDbl_mod_NIST_P521L
 	andq	%r10, %rax
 	andq	%r9, %rax
 	andq	%r8, %rax
-	movq	%rbx, %rdx
-	orq	$-512, %rdx             ## imm = 0xFE00
-	andq	%rax, %rdx
-	andq	%rcx, %rdx
-	cmpq	$-1, %rdx
+	movq	%rdx, %rbx
+	orq	$-512, %rbx                     ## imm = 0xFE00
+	andq	%rax, %rbx
+	andq	%rcx, %rbx
+	cmpq	$-1, %rbx
 	je	LBB4_1
-## BB#3:                                ## %nonzero
-	movq	%rsi, (%rdi)
-	movq	%rcx, 8(%rdi)
-	movq	%r12, 16(%rdi)
-	movq	%r15, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r10, 40(%rdi)
-	movq	%r9, 48(%rdi)
+## %bb.3:                               ## %nonzero
 	movq	%r8, 56(%rdi)
-	andl	$511, %ebx              ## imm = 0x1FF
-	movq	%rbx, 64(%rdi)
+	movq	%r9, 48(%rdi)
+	movq	%r10, 40(%rdi)
+	movq	%r11, 32(%rdi)
+	movq	%r15, 24(%rdi)
+	movq	%r12, 16(%rdi)
+	movq	%rcx, 8(%rdi)
+	movq	%rsi, (%rdi)
+	andl	$511, %edx                      ## imm = 0x1FF
+	movq	%rdx, 64(%rdi)
 	jmp	LBB4_2
 LBB4_1:                                 ## %zero
 	movq	$0, 64(%rdi)
@@ -297,404 +303,193 @@ LBB4_2:                                 ## %zero
 	popq	%r14
 	popq	%r15
 	retq
-
-	.globl	_mcl_fp_mulUnitPre1L
+                                        ## -- End function
+	.globl	_mulPv192x64                    ## -- Begin function mulPv192x64
 	.p2align	4, 0x90
-_mcl_fp_mulUnitPre1L:                   ## @mcl_fp_mulUnitPre1L
-## BB#0:
+_mulPv192x64:                           ## @mulPv192x64
+## %bb.0:
+	movq	%rdx, %rcx
 	movq	%rdx, %rax
 	mulq	(%rsi)
+	movq	%rdx, %r8
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
+	movq	%rcx, %rax
+	mulq	16(%rsi)
+	movq	%rdx, %r9
+	movq	%rax, %r10
+	movq	%rcx, %rax
+	mulq	8(%rsi)
+	addq	%r8, %rax
+	movq	%rax, 8(%rdi)
+	adcq	%r10, %rdx
+	movq	%rdx, 16(%rdi)
+	adcq	$0, %r9
+	movq	%r9, 24(%rdi)
+	movq	%rdi, %rax
 	retq
-
-	.globl	_mcl_fpDbl_mulPre1L
+                                        ## -- End function
+	.globl	_mcl_fp_mulUnitPre3L            ## -- Begin function mcl_fp_mulUnitPre3L
 	.p2align	4, 0x90
-_mcl_fpDbl_mulPre1L:                    ## @mcl_fpDbl_mulPre1L
-## BB#0:
-	movq	(%rdx), %rax
+_mcl_fp_mulUnitPre3L:                   ## @mcl_fp_mulUnitPre3L
+## %bb.0:
+	movq	%rdx, %rcx
+	movq	%rdx, %rax
+	mulq	16(%rsi)
+	movq	%rdx, %r8
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	8(%rsi)
+	movq	%rdx, %r10
+	movq	%rax, %r11
+	movq	%rcx, %rax
 	mulq	(%rsi)
 	movq	%rax, (%rdi)
+	addq	%r11, %rdx
 	movq	%rdx, 8(%rdi)
+	adcq	%r9, %r10
+	movq	%r10, 16(%rdi)
+	adcq	$0, %r8
+	movq	%r8, 24(%rdi)
 	retq
-
-	.globl	_mcl_fpDbl_sqrPre1L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mulPre3L             ## -- Begin function mcl_fpDbl_mulPre3L
 	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre1L:                    ## @mcl_fpDbl_sqrPre1L
-## BB#0:
-	movq	(%rsi), %rax
-	mulq	%rax
+_mcl_fpDbl_mulPre3L:                    ## @mcl_fpDbl_mulPre3L
+## %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %r11
+	movq	(%rsi), %r8
+	movq	8(%rsi), %r10
+	movq	(%rdx), %rcx
+	movq	%r8, %rax
+	mulq	%rcx
+	movq	%rdx, -8(%rsp)                  ## 8-byte Spill
+	movq	16(%rsi), %r12
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	retq
-
-	.globl	_mcl_fp_mont1L
-	.p2align	4, 0x90
-_mcl_fp_mont1L:                         ## @mcl_fp_mont1L
-## BB#0:
-	movq	(%rsi), %rax
-	mulq	(%rdx)
-	movq	%rax, %rsi
-	movq	%rdx, %r8
-	movq	-8(%rcx), %rax
-	imulq	%rsi, %rax
-	movq	(%rcx), %rcx
+	movq	%r12, %rax
 	mulq	%rcx
-	addq	%rsi, %rax
-	adcq	%r8, %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rdx, %rsi
-	subq	%rcx, %rsi
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	%rdx, %rsi
-	movq	%rsi, (%rdi)
-	retq
-
-	.globl	_mcl_fp_montNF1L
-	.p2align	4, 0x90
-_mcl_fp_montNF1L:                       ## @mcl_fp_montNF1L
-## BB#0:
-	movq	(%rsi), %rax
-	mulq	(%rdx)
-	movq	%rax, %rsi
-	movq	%rdx, %r8
-	movq	-8(%rcx), %rax
-	imulq	%rsi, %rax
-	movq	(%rcx), %rcx
+	movq	%rdx, %r9
+	movq	%rax, -16(%rsp)                 ## 8-byte Spill
+	movq	%r10, %rax
 	mulq	%rcx
-	addq	%rsi, %rax
-	adcq	%r8, %rdx
-	movq	%rdx, %rax
-	subq	%rcx, %rax
-	cmovsq	%rdx, %rax
-	movq	%rax, (%rdi)
-	retq
-
-	.globl	_mcl_fp_montRed1L
-	.p2align	4, 0x90
-_mcl_fp_montRed1L:                      ## @mcl_fp_montRed1L
-## BB#0:
-	movq	(%rsi), %rcx
-	movq	-8(%rdx), %rax
-	imulq	%rcx, %rax
-	movq	(%rdx), %r8
+	movq	%rax, %rbx
+	movq	%rdx, %rcx
+	movq	8(%r11), %rsi
+	movq	%rsi, %rax
+	mulq	%r12
+	movq	%rdx, %r13
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	%r10
+	movq	%rdx, %r14
+	movq	%rax, %r15
+	movq	%rsi, %rax
+	mulq	%r8
+	addq	%r15, %rdx
+	adcq	%rbp, %r14
+	adcq	$0, %r13
+	addq	-8(%rsp), %rbx                  ## 8-byte Folded Reload
+	adcq	-16(%rsp), %rcx                 ## 8-byte Folded Reload
+	adcq	$0, %r9
+	addq	%rax, %rbx
+	movq	%rbx, 8(%rdi)
+	adcq	%rdx, %rcx
+	adcq	%r14, %r9
+	adcq	$0, %r13
+	movq	16(%r11), %rsi
+	movq	%rsi, %rax
+	mulq	%r12
+	movq	%rdx, %rbp
+	movq	%rax, %r11
+	movq	%rsi, %rax
+	mulq	%r10
+	movq	%rdx, %rbx
+	movq	%rax, %r10
+	movq	%rsi, %rax
 	mulq	%r8
+	addq	%r10, %rdx
+	adcq	%r11, %rbx
+	adcq	$0, %rbp
 	addq	%rcx, %rax
-	adcq	8(%rsi), %rdx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rdx, %rcx
-	subq	%r8, %rcx
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	%rdx, %rcx
-	movq	%rcx, (%rdi)
-	retq
-
-	.globl	_mcl_fp_addPre1L
-	.p2align	4, 0x90
-_mcl_fp_addPre1L:                       ## @mcl_fp_addPre1L
-## BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, (%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_subPre1L
-	.p2align	4, 0x90
-_mcl_fp_subPre1L:                       ## @mcl_fp_subPre1L
-## BB#0:
-	movq	(%rsi), %rcx
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	movq	%rcx, (%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_shr1_1L
-	.p2align	4, 0x90
-_mcl_fp_shr1_1L:                        ## @mcl_fp_shr1_1L
-## BB#0:
-	movq	(%rsi), %rax
-	shrq	%rax
-	movq	%rax, (%rdi)
-	retq
-
-	.globl	_mcl_fp_add1L
-	.p2align	4, 0x90
-_mcl_fp_add1L:                          ## @mcl_fp_add1L
-## BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, (%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %rax
-	sbbq	$0, %rdx
-	testb	$1, %dl
-	jne	LBB14_2
-## BB#1:                                ## %nocarry
-	movq	%rax, (%rdi)
-LBB14_2:                                ## %carry
-	retq
-
-	.globl	_mcl_fp_addNF1L
-	.p2align	4, 0x90
-_mcl_fp_addNF1L:                        ## @mcl_fp_addNF1L
-## BB#0:
-	movq	(%rdx), %rax
-	addq	(%rsi), %rax
-	movq	%rax, %rdx
-	subq	(%rcx), %rdx
-	cmovsq	%rax, %rdx
-	movq	%rdx, (%rdi)
-	retq
-
-	.globl	_mcl_fp_sub1L
-	.p2align	4, 0x90
-_mcl_fp_sub1L:                          ## @mcl_fp_sub1L
-## BB#0:
-	movq	(%rsi), %rax
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	movq	%rax, (%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB16_2
-## BB#1:                                ## %nocarry
-	retq
-LBB16_2:                                ## %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	retq
-
-	.globl	_mcl_fp_subNF1L
-	.p2align	4, 0x90
-_mcl_fp_subNF1L:                        ## @mcl_fp_subNF1L
-## BB#0:
-	movq	(%rsi), %rax
-	subq	(%rdx), %rax
-	movq	%rax, %rdx
-	sarq	$63, %rdx
-	andq	(%rcx), %rdx
-	addq	%rax, %rdx
-	movq	%rdx, (%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_add1L
-	.p2align	4, 0x90
-_mcl_fpDbl_add1L:                       ## @mcl_fpDbl_add1L
-## BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	movq	%rax, (%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rdx, %rsi
-	subq	(%rcx), %rsi
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	%rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_sub1L
-	.p2align	4, 0x90
-_mcl_fpDbl_sub1L:                       ## @mcl_fpDbl_sub1L
-## BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r8
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r8
-	movq	%rax, (%rdi)
-	movl	$0, %eax
-	sbbq	$0, %rax
-	testb	$1, %al
-	cmovneq	(%rcx), %rsi
-	addq	%r8, %rsi
-	movq	%rsi, 8(%rdi)
-	retq
-
-	.globl	_mcl_fp_mulUnitPre2L
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre2L:                   ## @mcl_fp_mulUnitPre2L
-## BB#0:
-	movq	%rdx, %r8
-	movq	%r8, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %rcx
-	movq	%rax, %r9
-	movq	%r8, %rax
-	mulq	(%rsi)
-	movq	%rax, (%rdi)
-	addq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	$0, %rcx
-	movq	%rcx, 16(%rdi)
-	retq
-
-	.globl	_mcl_fpDbl_mulPre2L
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre2L:                    ## @mcl_fpDbl_mulPre2L
-## BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	%rdx, %r10
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r11
-	movq	(%r10), %rcx
-	movq	%r8, %rax
-	mulq	%rcx
-	movq	%rdx, %r9
-	movq	%rax, (%rdi)
-	movq	%r11, %rax
-	mulq	%rcx
-	movq	%rdx, %r14
-	movq	%rax, %rsi
-	addq	%r9, %rsi
-	adcq	$0, %r14
-	movq	8(%r10), %rbx
-	movq	%r11, %rax
-	mulq	%rbx
-	movq	%rdx, %r9
-	movq	%rax, %rcx
-	movq	%r8, %rax
-	mulq	%rbx
-	addq	%rsi, %rax
-	movq	%rax, 8(%rdi)
-	adcq	%r14, %rcx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rdx, %rcx
-	movq	%rcx, 16(%rdi)
-	adcq	%r9, %rax
-	movq	%rax, 24(%rdi)
+	movq	%rax, 16(%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 24(%rdi)
+	adcq	%r13, %rbx
+	movq	%rbx, 32(%rdi)
+	adcq	$0, %rbp
+	movq	%rbp, 40(%rdi)
 	popq	%rbx
+	popq	%r12
+	popq	%r13
 	popq	%r14
+	popq	%r15
+	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sqrPre2L
-	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre2L:                    ## @mcl_fpDbl_sqrPre2L
-## BB#0:
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %r8
-	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rdx, %rsi
-	movq	%rax, (%rdi)
-	movq	%r8, %rax
-	mulq	%rcx
-	movq	%rdx, %r9
-	movq	%rax, %r10
-	addq	%r10, %rsi
-	movq	%r9, %rcx
-	adcq	$0, %rcx
-	movq	%r8, %rax
-	mulq	%r8
-	addq	%r10, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%rcx, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r9, %rax
-	movq	%rax, 16(%rdi)
-	adcq	%rdx, %rcx
-	movq	%rcx, 24(%rdi)
-	retq
-
-	.globl	_mcl_fp_mont2L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sqrPre3L             ## -- Begin function mcl_fpDbl_sqrPre3L
 	.p2align	4, 0x90
-_mcl_fp_mont2L:                         ## @mcl_fp_mont2L
-## BB#0:
+_mcl_fpDbl_sqrPre3L:                    ## @mcl_fpDbl_sqrPre3L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r11
-	movq	(%rdx), %rsi
-	movq	8(%rdx), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rsi
 	movq	%r11, %rax
+	mulq	%r11
+	movq	%rdx, %rcx
+	movq	%rax, (%rdi)
+	movq	%r10, %rax
 	mulq	%rsi
-	movq	%rdx, %r15
-	movq	%rax, %r10
-	movq	%r8, %rax
-	mulq	%rsi
-	movq	%rax, %r14
-	movq	%rdx, %r13
-	addq	%r10, %r13
-	adcq	$0, %r15
-	movq	-8(%rcx), %r10
-	movq	(%rcx), %rbp
-	movq	%r14, %rsi
-	imulq	%r10, %rsi
-	movq	8(%rcx), %rdi
+	movq	%rdx, %r8
+	movq	%rax, %r9
 	movq	%rsi, %rax
-	mulq	%rdi
-	movq	%rdx, %rcx
+	mulq	%rsi
+	movq	%rdx, %r14
 	movq	%rax, %r12
 	movq	%rsi, %rax
-	mulq	%rbp
-	movq	%rdx, %rbx
-	addq	%r12, %rbx
-	adcq	$0, %rcx
-	addq	%r14, %rax
-	adcq	%r13, %rbx
-	adcq	%r15, %rcx
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	movq	%r9, %rax
 	mulq	%r11
-	movq	%rdx, %r14
-	movq	%rax, %r11
-	movq	%r9, %rax
-	mulq	%r8
-	movq	%rax, %r8
+	movq	%rax, %r15
 	movq	%rdx, %rsi
-	addq	%r11, %rsi
-	adcq	$0, %r14
-	addq	%rbx, %r8
-	adcq	%rcx, %rsi
-	adcq	%r15, %r14
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	imulq	%r8, %r10
+	addq	%rdx, %r12
+	adcq	%r9, %r14
+	movq	%r8, %r13
+	adcq	$0, %r13
 	movq	%r10, %rax
-	mulq	%rdi
-	movq	%rdx, %rcx
-	movq	%rax, %r9
+	mulq	%r11
+	movq	%rax, %r11
+	movq	%rdx, %rbx
+	addq	%r15, %rcx
+	adcq	%rax, %rsi
+	movq	%rdx, %rbp
+	adcq	$0, %rbp
+	addq	%r15, %rcx
+	movq	%rcx, 8(%rdi)
+	adcq	%r12, %rsi
+	adcq	%r14, %rbp
+	adcq	$0, %r13
 	movq	%r10, %rax
-	mulq	%rbp
-	addq	%r9, %rdx
-	adcq	$0, %rcx
-	addq	%r8, %rax
-	adcq	%rsi, %rdx
-	adcq	%r14, %rcx
-	adcq	$0, %rbx
-	movq	%rdx, %rax
-	subq	%rbp, %rax
-	movq	%rcx, %rsi
-	sbbq	%rdi, %rsi
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%rcx, %rsi
-	testb	%bl, %bl
-	cmovneq	%rdx, %rax
-	movq	-8(%rsp), %rcx          ## 8-byte Reload
-	movq	%rax, (%rcx)
-	movq	%rsi, 8(%rcx)
+	mulq	%r10
+	addq	%r9, %rbx
+	adcq	%r8, %rax
+	adcq	$0, %rdx
+	addq	%r11, %rsi
+	movq	%rsi, 16(%rdi)
+	adcq	%rbp, %rbx
+	movq	%rbx, 24(%rdi)
+	adcq	%r13, %rax
+	movq	%rax, 32(%rdi)
+	adcq	$0, %rdx
+	movq	%rdx, 40(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -702,84 +497,176 @@ _mcl_fp_mont2L:                         ## @mcl_fp_mont2L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montNF2L
+                                        ## -- End function
+	.globl	_mcl_fp_mont3L                  ## -- Begin function mcl_fp_mont3L
 	.p2align	4, 0x90
-_mcl_fp_montNF2L:                       ## @mcl_fp_montNF2L
-## BB#0:
+_mcl_fp_mont3L:                         ## @mcl_fp_mont3L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	(%rsi), %r8
-	movq	8(%rsi), %r11
-	movq	(%rdx), %rbp
-	movq	8(%rdx), %r9
-	movq	%r8, %rax
-	mulq	%rbp
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	16(%rsi), %r10
+	movq	(%rdx), %rdi
+	movq	%rdx, %r11
+	movq	%rdx, -64(%rsp)                 ## 8-byte Spill
+	movq	%r10, %rax
+	mulq	%rdi
+	movq	%rax, %rbp
+	movq	%rdx, %r15
+	movq	(%rsi), %rbx
+	movq	%rbx, -16(%rsp)                 ## 8-byte Spill
+	movq	8(%rsi), %r14
+	movq	%r14, %rax
+	movq	%r14, -72(%rsp)                 ## 8-byte Spill
+	mulq	%rdi
+	movq	%rdx, %r8
 	movq	%rax, %rsi
-	movq	%rdx, %r14
-	movq	-8(%rcx), %r10
-	movq	(%rcx), %r15
-	movq	%rsi, %rbx
-	imulq	%r10, %rbx
-	movq	8(%rcx), %rdi
 	movq	%rbx, %rax
 	mulq	%rdi
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
+	movq	%rax, %r12
+	movq	%rdx, %rdi
+	addq	%rsi, %rdi
+	adcq	%rbp, %r8
+	adcq	$0, %r15
+	movq	-8(%rcx), %rbp
+	movq	%rbp, -32(%rsp)                 ## 8-byte Spill
+	imulq	%rax, %rbp
+	movq	16(%rcx), %rdx
+	movq	%rdx, -56(%rsp)                 ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	%rdx
 	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	%r15
-	movq	%rdx, %r12
-	movq	%rax, %rbx
-	movq	%r11, %rax
-	mulq	%rbp
-	movq	%rdx, %rcx
-	movq	%rax, %rbp
-	addq	%r14, %rbp
-	adcq	$0, %rcx
-	addq	%rsi, %rbx
-	adcq	%r13, %rbp
-	adcq	$0, %rcx
-	addq	%r12, %rbp
-	adcq	-16(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%r9, %rax
-	mulq	%r11
+	movq	%rdx, %r9
+	movq	(%rcx), %rbx
+	movq	%rbx, -48(%rsp)                 ## 8-byte Spill
+	movq	8(%rcx), %rcx
+	movq	%rcx, -40(%rsp)                 ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	%rcx
 	movq	%rdx, %rsi
-	movq	%rax, %r11
-	movq	%r9, %rax
-	mulq	%r8
+	movq	%rax, %rcx
+	movq	%rbp, %rax
+	mulq	%rbx
+	movq	%rdx, %rbp
+	addq	%rcx, %rbp
+	adcq	%r13, %rsi
+	adcq	$0, %r9
+	addq	%r12, %rax
+	adcq	%rdi, %rbp
+	movq	8(%r11), %rcx
+	adcq	%r8, %rsi
+	adcq	%r15, %r9
+	setb	%r11b
+	movq	%rcx, %rax
+	mulq	%r10
+	movq	%rdx, %r15
+	movq	%rax, %rdi
+	movq	%rcx, %rax
+	mulq	%r14
+	movq	%rdx, %r13
 	movq	%rax, %r8
+	movq	%rcx, %rax
+	movq	-16(%rsp), %rcx                 ## 8-byte Reload
+	mulq	%rcx
+	movq	%rax, %r12
 	movq	%rdx, %rbx
-	addq	%r11, %rbx
-	adcq	$0, %rsi
-	addq	%rbp, %r8
-	adcq	%rcx, %rbx
-	adcq	$0, %rsi
-	imulq	%r8, %r10
-	movq	%r10, %rax
-	mulq	%rdi
-	movq	%rdx, %rcx
+	addq	%r8, %rbx
+	adcq	%rdi, %r13
+	adcq	$0, %r15
+	addq	%rbp, %r12
+	adcq	%rsi, %rbx
+	movzbl	%r11b, %eax
+	adcq	%r9, %r13
+	adcq	%rax, %r15
+	setb	-73(%rsp)                       ## 1-byte Folded Spill
+	movq	-32(%rsp), %rsi                 ## 8-byte Reload
+	imulq	%r12, %rsi
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -24(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, %r11
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
 	movq	%rax, %rbp
-	movq	%r10, %rax
-	mulq	%r15
-	addq	%r8, %rax
+	movq	%rdx, %rsi
+	movq	-64(%rsp), %rax                 ## 8-byte Reload
+	movq	16(%rax), %r9
+	movq	%r9, %rax
+	mulq	%r10
+	movq	%rdx, %r8
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	%r9, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	movq	%rdx, %r10
+	addq	%rdi, %rbp
+	adcq	-24(%rsp), %rsi                 ## 8-byte Folded Reload
+	adcq	$0, %r14
+	addq	%r12, %r11
 	adcq	%rbx, %rbp
-	adcq	$0, %rsi
-	addq	%rdx, %rbp
-	adcq	%rcx, %rsi
-	movq	%rbp, %rax
-	subq	%r15, %rax
-	movq	%rsi, %rcx
-	sbbq	%rdi, %rcx
-	cmovsq	%rbp, %rax
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovsq	%rsi, %rcx
-	movq	%rcx, 8(%rdx)
+	adcq	%r13, %rsi
+	adcq	%r15, %r14
+	movzbl	-73(%rsp), %edi                 ## 1-byte Folded Reload
+	adcq	$0, %rdi
+	movq	%r9, %rax
+	mulq	%rcx
+	movq	%rax, %r9
+	movq	%rdx, %rcx
+	addq	-72(%rsp), %rcx                 ## 8-byte Folded Reload
+	adcq	-64(%rsp), %r10                 ## 8-byte Folded Reload
+	adcq	$0, %r8
+	addq	%rbp, %r9
+	adcq	%rsi, %rcx
+	adcq	%r14, %r10
+	adcq	%rdi, %r8
+	setb	%r11b
+	movq	-32(%rsp), %rsi                 ## 8-byte Reload
+	imulq	%r9, %rsi
+	movq	%rsi, %rax
+	movq	-56(%rsp), %r14                 ## 8-byte Reload
+	mulq	%r14
+	movq	%rdx, %rbx
+	movq	%rax, %r12
+	movq	%rsi, %rax
+	movq	-40(%rsp), %r15                 ## 8-byte Reload
+	mulq	%r15
+	movq	%rdx, %rbp
+	movq	%rax, %rdi
+	movq	%rsi, %rax
+	movq	-48(%rsp), %rsi                 ## 8-byte Reload
+	mulq	%rsi
+	addq	%rdi, %rdx
+	adcq	%r12, %rbp
+	adcq	$0, %rbx
+	addq	%r9, %rax
+	adcq	%rcx, %rdx
+	adcq	%r10, %rbp
+	movzbl	%r11b, %eax
+	adcq	%r8, %rbx
+	adcq	$0, %rax
+	movq	%rdx, %rdi
+	subq	%rsi, %rdi
+	movq	%rbp, %rsi
+	sbbq	%r15, %rsi
+	movq	%rbx, %rcx
+	sbbq	%r14, %rcx
+	sbbq	$0, %rax
+	testb	$1, %al
+	cmovneq	%rbx, %rcx
+	movq	-8(%rsp), %rax                  ## 8-byte Reload
+	movq	%rcx, 16(%rax)
+	cmovneq	%rbp, %rsi
+	movq	%rsi, 8(%rax)
+	cmovneq	%rdx, %rdi
+	movq	%rdi, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -787,347 +674,816 @@ _mcl_fp_montNF2L:                       ## @mcl_fp_montNF2L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montRed2L
+                                        ## -- End function
+	.globl	_mcl_fp_montNF3L                ## -- Begin function mcl_fp_montNF3L
 	.p2align	4, 0x90
-_mcl_fp_montRed2L:                      ## @mcl_fp_montRed2L
-## BB#0:
+_mcl_fp_montNF3L:                       ## @mcl_fp_montNF3L
+## %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
+	pushq	%r13
+	pushq	%r12
 	pushq	%rbx
-	movq	-8(%rdx), %r9
-	movq	(%rdx), %r11
-	movq	(%rsi), %rbx
-	movq	%rbx, %rcx
-	imulq	%r9, %rcx
-	movq	8(%rdx), %r14
-	movq	%rcx, %rax
-	mulq	%r14
-	movq	%rdx, %r8
+	movq	%rcx, %r8
+	movq	%rdx, %r15
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	16(%rsi), %rax
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	movq	(%rdx), %rdi
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
+	mulq	%rdi
+	movq	%rax, %rcx
+	movq	%rdx, %r13
+	movq	(%rsi), %r12
+	movq	8(%rsi), %rax
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	mulq	%rdi
+	movq	%rdx, %rbx
+	movq	%rax, %rsi
+	movq	%r12, %rax
+	movq	%r12, -24(%rsp)                 ## 8-byte Spill
+	mulq	%rdi
 	movq	%rax, %r10
+	movq	%rdx, %rdi
+	addq	%rsi, %rdi
+	adcq	%rcx, %rbx
+	adcq	$0, %r13
+	movq	-8(%r8), %r11
+	movq	%r11, %rbp
+	imulq	%rax, %rbp
+	movq	16(%r8), %rax
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	mulq	%rbp
+	movq	%rax, %r9
+	movq	%rdx, %r14
+	movq	(%r8), %rcx
+	movq	%rcx, -40(%rsp)                 ## 8-byte Spill
+	movq	8(%r8), %rax
+	movq	%rax, -32(%rsp)                 ## 8-byte Spill
+	mulq	%rbp
+	movq	%rdx, %r8
+	movq	%rax, %rsi
 	movq	%rcx, %rax
-	mulq	%r11
-	movq	%rdx, %rcx
-	addq	%r10, %rcx
-	adcq	$0, %r8
-	movq	24(%rsi), %r15
-	addq	%rbx, %rax
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r8
-	adcq	$0, %r15
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	imulq	%rcx, %r9
-	movq	%r9, %rax
+	mulq	%rbp
+	addq	%r10, %rax
+	adcq	%rdi, %rsi
+	adcq	%rbx, %r9
+	adcq	$0, %r13
+	addq	%rdx, %rsi
+	movq	8(%r15), %rdi
+	adcq	%r8, %r9
+	adcq	%r14, %r13
+	movq	-48(%rsp), %r14                 ## 8-byte Reload
+	movq	%r14, %rax
+	mulq	%rdi
+	movq	%rdx, %rbx
+	movq	%rax, %r8
+	movq	-64(%rsp), %rax                 ## 8-byte Reload
+	mulq	%rdi
+	movq	%rdx, %rbp
+	movq	%rax, %rcx
+	movq	%r12, %rax
+	mulq	%rdi
+	movq	%rax, %rdi
+	movq	%rdx, %r10
+	addq	%rcx, %r10
+	adcq	%r8, %rbp
+	adcq	$0, %rbx
+	addq	%rsi, %rdi
+	adcq	%r9, %r10
+	adcq	%r13, %rbp
+	adcq	$0, %rbx
+	movq	%r11, %rsi
+	imulq	%rdi, %rsi
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r13
+	movq	%rsi, %rax
+	movq	-32(%rsp), %r15                 ## 8-byte Reload
+	mulq	%r15
+	movq	%rdx, %r9
+	movq	%rax, %rcx
+	movq	%rsi, %rax
+	movq	-40(%rsp), %r12                 ## 8-byte Reload
+	mulq	%r12
+	addq	%rdi, %rax
+	adcq	%r10, %rcx
+	adcq	%rbp, %r13
+	adcq	$0, %rbx
+	addq	%rdx, %rcx
+	adcq	%r9, %r13
+	adcq	%r8, %rbx
+	movq	-16(%rsp), %rax                 ## 8-byte Reload
+	movq	16(%rax), %rdi
+	movq	%rdi, %rax
 	mulq	%r14
 	movq	%rdx, %rsi
+	movq	%rax, %r8
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r9
+	movq	%rdi, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
 	movq	%rax, %r10
-	movq	%r9, %rax
+	movq	%rdx, %rdi
+	addq	%r9, %rdi
+	adcq	%r8, %rbp
+	adcq	$0, %rsi
+	addq	%rcx, %r10
+	adcq	%r13, %rdi
+	adcq	%rbx, %rbp
+	adcq	$0, %rsi
+	imulq	%r10, %r11
+	movq	-56(%rsp), %r14                 ## 8-byte Reload
+	movq	%r14, %rax
 	mulq	%r11
-	addq	%r10, %rdx
+	movq	%rdx, %r8
+	movq	%rax, %rcx
+	movq	%r15, %rax
+	mulq	%r11
+	movq	%rdx, %r9
+	movq	%rax, %rbx
+	movq	%r12, %rax
+	mulq	%r11
+	addq	%r10, %rax
+	adcq	%rdi, %rbx
+	adcq	%rbp, %rcx
 	adcq	$0, %rsi
-	addq	%rcx, %rax
-	adcq	%r8, %rdx
-	adcq	%r15, %rsi
-	adcq	$0, %rbx
-	movq	%rdx, %rax
-	subq	%r11, %rax
-	movq	%rsi, %rcx
-	sbbq	%r14, %rcx
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%rsi, %rcx
-	testb	%bl, %bl
-	cmovneq	%rdx, %rax
-	movq	%rax, (%rdi)
-	movq	%rcx, 8(%rdi)
+	addq	%rdx, %rbx
+	adcq	%r9, %rcx
+	adcq	%r8, %rsi
+	movq	%rbx, %rax
+	subq	%r12, %rax
+	movq	%rcx, %rdx
+	sbbq	%r15, %rdx
+	movq	%rsi, %rbp
+	sbbq	%r14, %rbp
+	movq	%rbp, %rdi
+	sarq	$63, %rdi
+	cmovsq	%rsi, %rbp
+	movq	-8(%rsp), %rsi                  ## 8-byte Reload
+	movq	%rbp, 16(%rsi)
+	cmovsq	%rcx, %rdx
+	movq	%rdx, 8(%rsi)
+	cmovsq	%rbx, %rax
+	movq	%rax, (%rsi)
 	popq	%rbx
+	popq	%r12
+	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_addPre2L
+                                        ## -- End function
+	.globl	_mcl_fp_montRed3L               ## -- Begin function mcl_fp_montRed3L
 	.p2align	4, 0x90
-_mcl_fp_addPre2L:                       ## @mcl_fp_addPre2L
-## BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rcx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rcx
-	movq	%rax, (%rdi)
-	movq	%rcx, 8(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+_mcl_fp_montRed3L:                      ## @mcl_fp_montRed3L
+## %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	-8(%rdx), %r9
+	movq	(%rdx), %rdi
+	movq	(%rsi), %r14
+	movq	%r14, %rbx
+	imulq	%r9, %rbx
+	movq	16(%rdx), %rbp
+	movq	%rbx, %rax
+	mulq	%rbp
+	movq	%rbp, -16(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r11
+	movq	%rdx, %r8
+	movq	8(%rcx), %rcx
+	movq	%rcx, -32(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%rcx
+	movq	%rdx, %r10
+	movq	%rax, %rcx
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rdi, -24(%rsp)                 ## 8-byte Spill
+	movq	%rdx, %rbx
+	addq	%rcx, %rbx
+	adcq	%r11, %r10
+	adcq	$0, %r8
+	addq	%r14, %rax
+	adcq	8(%rsi), %rbx
+	adcq	16(%rsi), %r10
+	adcq	24(%rsi), %r8
+	setb	-33(%rsp)                       ## 1-byte Folded Spill
+	movq	%r9, %rcx
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	mulq	%rbp
+	movq	%rdx, %r14
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	%rdi
+	movq	%rdx, %r12
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	movq	-32(%rsp), %rbp                 ## 8-byte Reload
+	mulq	%rbp
+	movq	%rdx, %r11
+	movq	%rax, %rcx
+	addq	%r12, %rcx
+	adcq	%r15, %r11
+	movzbl	-33(%rsp), %r15d                ## 1-byte Folded Reload
+	adcq	%r14, %r15
+	addq	%rbx, %r13
+	adcq	%r10, %rcx
+	adcq	%r8, %r11
+	adcq	32(%rsi), %r15
+	setb	%dil
+	imulq	%rcx, %r9
+	movq	%r9, %rax
+	movq	-16(%rsp), %r13                 ## 8-byte Reload
+	mulq	%r13
+	movq	%rdx, %r12
+	movq	%rax, %r8
+	movq	%r9, %rax
+	movq	-24(%rsp), %rbx                 ## 8-byte Reload
+	mulq	%rbx
+	movq	%rdx, %r10
+	movq	%rax, %r14
+	movq	%r9, %rax
+	mulq	%rbp
+	addq	%r10, %rax
+	adcq	%r8, %rdx
+	movzbl	%dil, %edi
+	adcq	%rdi, %r12
+	addq	%rcx, %r14
+	adcq	%r11, %rax
+	adcq	%r15, %rdx
+	adcq	40(%rsi), %r12
+	xorl	%ecx, %ecx
+	movq	%rax, %rsi
+	subq	%rbx, %rsi
+	movq	%rdx, %rdi
+	sbbq	%rbp, %rdi
+	movq	%r12, %rbx
+	sbbq	%r13, %rbx
+	sbbq	%rcx, %rcx
+	testb	$1, %cl
+	cmovneq	%r12, %rbx
+	movq	-8(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rbx, 16(%rcx)
+	cmovneq	%rdx, %rdi
+	movq	%rdi, 8(%rcx)
+	cmovneq	%rax, %rsi
+	movq	%rsi, (%rcx)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_montRedNF3L             ## -- Begin function mcl_fp_montRedNF3L
+	.p2align	4, 0x90
+_mcl_fp_montRedNF3L:                    ## @mcl_fp_montRedNF3L
+## %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	-8(%rdx), %r9
+	movq	(%rdx), %rbp
+	movq	(%rsi), %r14
+	movq	%r14, %rbx
+	imulq	%r9, %rbx
+	movq	16(%rdx), %rdi
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rdi, %r15
+	movq	%rdi, -16(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r11
+	movq	%rdx, %r8
+	movq	8(%rcx), %rdi
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rdx, %r10
+	movq	%rax, %rcx
+	movq	%rbx, %rax
+	mulq	%rbp
+	movq	%rbp, -24(%rsp)                 ## 8-byte Spill
+	movq	%rdx, %rbx
+	addq	%rcx, %rbx
+	adcq	%r11, %r10
+	adcq	$0, %r8
+	addq	%r14, %rax
+	adcq	8(%rsi), %rbx
+	adcq	16(%rsi), %r10
+	adcq	24(%rsi), %r8
+	setb	-25(%rsp)                       ## 1-byte Folded Spill
+	movq	%r9, %rcx
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	mulq	%r15
+	movq	%rdx, %r14
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	%rbp
+	movq	%rdx, %r12
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	%rdi
+	movq	%rdx, %r11
+	movq	%rax, %rcx
+	addq	%r12, %rcx
+	adcq	%r15, %r11
+	movzbl	-25(%rsp), %r15d                ## 1-byte Folded Reload
+	adcq	%r14, %r15
+	addq	%rbx, %r13
+	adcq	%r10, %rcx
+	adcq	%r8, %r11
+	adcq	32(%rsi), %r15
+	setb	%bpl
+	imulq	%rcx, %r9
+	movq	%r9, %rax
+	movq	-16(%rsp), %r13                 ## 8-byte Reload
+	mulq	%r13
+	movq	%rdx, %r12
+	movq	%rax, %r8
+	movq	%r9, %rax
+	movq	-24(%rsp), %rbx                 ## 8-byte Reload
+	mulq	%rbx
+	movq	%rdx, %r10
+	movq	%rax, %r14
+	movq	%r9, %rax
+	mulq	%rdi
+	addq	%r10, %rax
+	adcq	%r8, %rdx
+	movzbl	%bpl, %ebp
+	adcq	%rbp, %r12
+	addq	%rcx, %r14
+	adcq	%r11, %rax
+	adcq	%r15, %rdx
+	adcq	40(%rsi), %r12
+	movq	%rax, %rcx
+	subq	%rbx, %rcx
+	movq	%rdx, %rsi
+	sbbq	%rdi, %rsi
+	movq	%r12, %rbx
+	sbbq	%r13, %rbx
+	movq	%rbx, %rdi
+	sarq	$63, %rdi
+	cmovsq	%r12, %rbx
+	movq	-8(%rsp), %rdi                  ## 8-byte Reload
+	movq	%rbx, 16(%rdi)
+	cmovsq	%rdx, %rsi
+	movq	%rsi, 8(%rdi)
+	cmovsq	%rax, %rcx
+	movq	%rcx, (%rdi)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_subPre2L
+                                        ## -- End function
+	.globl	_mcl_fp_addPre3L                ## -- Begin function mcl_fp_addPre3L
 	.p2align	4, 0x90
-_mcl_fp_subPre2L:                       ## @mcl_fp_subPre2L
-## BB#0:
+_mcl_fp_addPre3L:                       ## @mcl_fp_addPre3L
+## %bb.0:
+	movq	16(%rsi), %rax
 	movq	(%rsi), %rcx
 	movq	8(%rsi), %rsi
+	addq	(%rdx), %rcx
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rcx, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subPre3L                ## -- Begin function mcl_fp_subPre3L
+	.p2align	4, 0x90
+_mcl_fp_subPre3L:                       ## @mcl_fp_subPre3L
+## %bb.0:
+	movq	16(%rsi), %rcx
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
 	xorl	%eax, %eax
-	subq	(%rdx), %rcx
+	subq	(%rdx), %r8
 	sbbq	8(%rdx), %rsi
-	movq	%rcx, (%rdi)
+	sbbq	16(%rdx), %rcx
+	movq	%rcx, 16(%rdi)
 	movq	%rsi, 8(%rdi)
-	sbbq	$0, %rax
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
 	retq
-
-	.globl	_mcl_fp_shr1_2L
+                                        ## -- End function
+	.globl	_mcl_fp_shr1_3L                 ## -- Begin function mcl_fp_shr1_3L
 	.p2align	4, 0x90
-_mcl_fp_shr1_2L:                        ## @mcl_fp_shr1_2L
-## BB#0:
+_mcl_fp_shr1_3L:                        ## @mcl_fp_shr1_3L
+## %bb.0:
 	movq	(%rsi), %rax
 	movq	8(%rsi), %rcx
+	movq	16(%rsi), %rdx
+	movq	%rdx, %rsi
+	shrq	%rsi
+	movq	%rsi, 16(%rdi)
+	shldq	$63, %rcx, %rdx
+	movq	%rdx, 8(%rdi)
 	shrdq	$1, %rcx, %rax
 	movq	%rax, (%rdi)
-	shrq	%rcx
-	movq	%rcx, 8(%rdi)
 	retq
-
-	.globl	_mcl_fp_add2L
+                                        ## -- End function
+	.globl	_mcl_fp_add3L                   ## -- Begin function mcl_fp_add3L
 	.p2align	4, 0x90
-_mcl_fp_add2L:                          ## @mcl_fp_add2L
-## BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
+_mcl_fp_add3L:                          ## @mcl_fp_add3L
+## %bb.0:
+	movq	16(%rsi), %r8
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r8
+	movq	%r8, 16(%rdi)
+	movq	%rsi, 8(%rdi)
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
+	setb	%dl
+	movzbl	%dl, %edx
 	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB29_2
-## BB#1:                                ## %nocarry
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r8
+	sbbq	$0, %rdx
+	testb	$1, %dl
+	jne	LBB16_2
+## %bb.1:                               ## %nocarry
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-LBB29_2:                                ## %carry
+	movq	%rsi, 8(%rdi)
+	movq	%r8, 16(%rdi)
+LBB16_2:                                ## %carry
 	retq
-
-	.globl	_mcl_fp_addNF2L
+                                        ## -- End function
+	.globl	_mcl_fp_addNF3L                 ## -- Begin function mcl_fp_addNF3L
 	.p2align	4, 0x90
-_mcl_fp_addNF2L:                        ## @mcl_fp_addNF2L
-## BB#0:
-	movq	(%rdx), %rax
-	movq	8(%rdx), %r8
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %r8
-	movq	%rax, %rsi
+_mcl_fp_addNF3L:                        ## @mcl_fp_addNF3L
+## %bb.0:
+	movq	16(%rdx), %r10
+	movq	(%rdx), %r8
+	movq	8(%rdx), %r9
+	addq	(%rsi), %r8
+	adcq	8(%rsi), %r9
+	adcq	16(%rsi), %r10
+	movq	%r8, %rsi
 	subq	(%rcx), %rsi
-	movq	%r8, %rdx
+	movq	%r9, %rdx
 	sbbq	8(%rcx), %rdx
-	testq	%rdx, %rdx
-	cmovsq	%rax, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r8, %rdx
-	movq	%rdx, 8(%rdi)
-	retq
-
-	.globl	_mcl_fp_sub2L
-	.p2align	4, 0x90
-_mcl_fp_sub2L:                          ## @mcl_fp_sub2L
-## BB#0:
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r8
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r8
-	movq	%rax, (%rdi)
-	movq	%r8, 8(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB31_2
-## BB#1:                                ## %nocarry
-	retq
-LBB31_2:                                ## %carry
-	movq	8(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r8, %rdx
+	movq	%r10, %rax
+	sbbq	16(%rcx), %rax
+	movq	%rax, %rcx
+	sarq	$63, %rcx
+	cmovsq	%r10, %rax
+	movq	%rax, 16(%rdi)
+	cmovsq	%r9, %rdx
 	movq	%rdx, 8(%rdi)
+	cmovsq	%r8, %rsi
+	movq	%rsi, (%rdi)
 	retq
-
-	.globl	_mcl_fp_subNF2L
+                                        ## -- End function
+	.globl	_mcl_fp_sub3L                   ## -- Begin function mcl_fp_sub3L
 	.p2align	4, 0x90
-_mcl_fp_subNF2L:                        ## @mcl_fp_subNF2L
-## BB#0:
+_mcl_fp_sub3L:                          ## @mcl_fp_sub3L
+## %bb.0:
+	movq	16(%rsi), %rax
 	movq	(%rsi), %r8
 	movq	8(%rsi), %rsi
+	xorl	%r9d, %r9d
 	subq	(%rdx), %r8
 	sbbq	8(%rdx), %rsi
-	movq	%rsi, %rdx
+	sbbq	16(%rdx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%r9, %r9
+	testb	$1, %r9b
+	jne	LBB18_2
+## %bb.1:                               ## %nocarry
+	retq
+LBB18_2:                                ## %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %rax
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subNF3L                 ## -- Begin function mcl_fp_subNF3L
+	.p2align	4, 0x90
+_mcl_fp_subNF3L:                        ## @mcl_fp_subNF3L
+## %bb.0:
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r8
+	movq	8(%rsi), %r9
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %r9
+	sbbq	16(%rdx), %r10
+	movq	%r10, %rdx
 	sarq	$63, %rdx
-	movq	8(%rcx), %rax
+	movq	%rdx, %rsi
+	shldq	$1, %r10, %rsi
+	andq	(%rcx), %rsi
+	movq	16(%rcx), %rax
 	andq	%rdx, %rax
-	andq	(%rcx), %rdx
-	addq	%r8, %rdx
-	movq	%rdx, (%rdi)
-	adcq	%rsi, %rax
-	movq	%rax, 8(%rdi)
+	andq	8(%rcx), %rdx
+	addq	%r8, %rsi
+	movq	%rsi, (%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 8(%rdi)
+	adcq	%r10, %rax
+	movq	%rax, 16(%rdi)
 	retq
-
-	.globl	_mcl_fpDbl_add2L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_add3L                ## -- Begin function mcl_fpDbl_add3L
 	.p2align	4, 0x90
-_mcl_fpDbl_add2L:                       ## @mcl_fpDbl_add2L
-## BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rdx), %r10
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r10
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	adcq	%r8, %r9
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r10, %rdx
+_mcl_fpDbl_add3L:                       ## @mcl_fpDbl_add3L
+## %bb.0:
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %rax
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r11
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rax
+	adcq	24(%rdx), %r8
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r10
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, (%rdi)
+	setb	%al
+	movzbl	%al, %r11d
+	movq	%r8, %rdx
 	subq	(%rcx), %rdx
 	movq	%r9, %rsi
 	sbbq	8(%rcx), %rsi
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%r10, %rdx
-	movq	%rdx, 16(%rdi)
-	testb	%al, %al
+	movq	%r10, %rax
+	sbbq	16(%rcx), %rax
+	sbbq	$0, %r11
+	testb	$1, %r11b
+	cmovneq	%r10, %rax
+	movq	%rax, 40(%rdi)
 	cmovneq	%r9, %rsi
-	movq	%rsi, 24(%rdi)
+	movq	%rsi, 32(%rdi)
+	cmovneq	%r8, %rdx
+	movq	%rdx, 24(%rdi)
 	retq
-
-	.globl	_mcl_fpDbl_sub2L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sub3L                ## -- Begin function mcl_fpDbl_sub3L
 	.p2align	4, 0x90
-_mcl_fpDbl_sub2L:                       ## @mcl_fpDbl_sub2L
-## BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
+_mcl_fpDbl_sub3L:                       ## @mcl_fpDbl_sub3L
+## %bb.0:
+	pushq	%rbx
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %rax
 	movq	(%rsi), %r11
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
+	movq	8(%rsi), %rbx
+	xorl	%esi, %esi
 	subq	(%rdx), %r11
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
+	sbbq	8(%rdx), %rbx
+	sbbq	16(%rdx), %rax
+	sbbq	24(%rdx), %r10
+	sbbq	32(%rdx), %r9
+	sbbq	40(%rdx), %r8
+	movq	%rax, 16(%rdi)
+	movq	%rbx, 8(%rdi)
 	movq	%r11, (%rdi)
-	movq	%rsi, 8(%rdi)
-	sbbq	%r8, %r9
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	cmovneq	8(%rcx), %rax
+	sbbq	%rsi, %rsi
+	andl	$1, %esi
+	negq	%rsi
+	movq	16(%rcx), %rax
+	andq	%rsi, %rax
+	movq	8(%rcx), %rdx
+	andq	%rsi, %rdx
+	andq	(%rcx), %rsi
 	addq	%r10, %rsi
-	movq	%rsi, 16(%rdi)
-	adcq	%r9, %rax
-	movq	%rax, 24(%rdi)
+	movq	%rsi, 24(%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 32(%rdi)
+	adcq	%r8, %rax
+	movq	%rax, 40(%rdi)
+	popq	%rbx
 	retq
-
-	.globl	_mcl_fp_mulUnitPre3L
+                                        ## -- End function
+	.globl	_mulPv256x64                    ## -- Begin function mulPv256x64
 	.p2align	4, 0x90
-_mcl_fp_mulUnitPre3L:                   ## @mcl_fp_mulUnitPre3L
-## BB#0:
+_mulPv256x64:                           ## @mulPv256x64
+## %bb.0:
+	pushq	%rbx
 	movq	%rdx, %rcx
+	movq	%rdx, %rax
+	mulq	(%rsi)
+	movq	%rdx, %r8
+	movq	%rax, (%rdi)
+	movq	%rcx, %rax
+	mulq	24(%rsi)
+	movq	%rdx, %r9
+	movq	%rax, %r10
 	movq	%rcx, %rax
 	mulq	16(%rsi)
+	movq	%rdx, %r11
+	movq	%rax, %rbx
+	movq	%rcx, %rax
+	mulq	8(%rsi)
+	addq	%r8, %rax
+	movq	%rax, 8(%rdi)
+	adcq	%rbx, %rdx
+	movq	%rdx, 16(%rdi)
+	adcq	%r10, %r11
+	movq	%r11, 24(%rdi)
+	adcq	$0, %r9
+	movq	%r9, 32(%rdi)
+	movq	%rdi, %rax
+	popq	%rbx
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_mulUnitPre4L            ## -- Begin function mcl_fp_mulUnitPre4L
+	.p2align	4, 0x90
+_mcl_fp_mulUnitPre4L:                   ## @mcl_fp_mulUnitPre4L
+## %bb.0:
+	pushq	%r14
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdx, %rax
+	mulq	24(%rsi)
 	movq	%rdx, %r8
 	movq	%rax, %r9
 	movq	%rcx, %rax
-	mulq	8(%rsi)
+	mulq	16(%rsi)
 	movq	%rdx, %r10
 	movq	%rax, %r11
 	movq	%rcx, %rax
+	mulq	8(%rsi)
+	movq	%rdx, %rbx
+	movq	%rax, %r14
+	movq	%rcx, %rax
 	mulq	(%rsi)
 	movq	%rax, (%rdi)
-	addq	%r11, %rdx
+	addq	%r14, %rdx
 	movq	%rdx, 8(%rdi)
+	adcq	%r11, %rbx
+	movq	%rbx, 16(%rdi)
 	adcq	%r9, %r10
-	movq	%r10, 16(%rdi)
+	movq	%r10, 24(%rdi)
 	adcq	$0, %r8
-	movq	%r8, 24(%rdi)
+	movq	%r8, 32(%rdi)
+	popq	%rbx
+	popq	%r14
 	retq
-
-	.globl	_mcl_fpDbl_mulPre3L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mulPre4L             ## -- Begin function mcl_fpDbl_mulPre4L
 	.p2align	4, 0x90
-_mcl_fpDbl_mulPre3L:                    ## @mcl_fpDbl_mulPre3L
-## BB#0:
+_mcl_fpDbl_mulPre4L:                    ## @mcl_fpDbl_mulPre4L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, %r10
-	movq	(%rsi), %r8
+	movq	%rdx, %rbp
+	movq	(%rsi), %rax
 	movq	8(%rsi), %r9
-	movq	(%r10), %rbx
-	movq	%r8, %rax
+	movq	(%rdx), %rbx
+	movq	%rax, %r8
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
 	mulq	%rbx
-	movq	%rdx, %rcx
-	movq	16(%rsi), %r11
+	movq	%rdx, -80(%rsp)                 ## 8-byte Spill
+	movq	16(%rsi), %r10
+	movq	24(%rsi), %r13
 	movq	%rax, (%rdi)
-	movq	%r11, %rax
+	movq	8(%rbp), %rcx
+	movq	%rbp, %r11
+	movq	%rbp, -48(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r13
+	movq	%rdx, -96(%rsp)                 ## 8-byte Spill
+	movq	%rax, -24(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r10
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
+	movq	%rax, -40(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r9
+	movq	%rdx, -32(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	mulq	%r8
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r15
+	movq	%r13, %rax
+	movq	%r13, -72(%rsp)                 ## 8-byte Spill
 	mulq	%rbx
-	movq	%rdx, %r14
-	movq	%rax, %rsi
+	movq	%rdx, %rsi
+	movq	%rax, %r12
+	movq	%r10, %rax
+	movq	%r10, %r8
+	movq	%r10, -56(%rsp)                 ## 8-byte Spill
+	mulq	%rbx
+	movq	%rdx, %rcx
+	movq	%rax, %rbp
 	movq	%r9, %rax
+	movq	%r9, %r10
+	movq	%r9, -64(%rsp)                  ## 8-byte Spill
 	mulq	%rbx
+	movq	%rdx, %rbx
+	addq	-80(%rsp), %rax                 ## 8-byte Folded Reload
+	adcq	%rbp, %rbx
+	adcq	%r12, %rcx
+	adcq	$0, %rsi
+	addq	%r15, %rax
+	movq	%rax, 8(%rdi)
+	adcq	%r14, %rbx
+	adcq	-40(%rsp), %rcx                 ## 8-byte Folded Reload
+	adcq	-24(%rsp), %rsi                 ## 8-byte Folded Reload
+	setb	%al
+	addq	-88(%rsp), %rbx                 ## 8-byte Folded Reload
+	adcq	-32(%rsp), %rcx                 ## 8-byte Folded Reload
+	movzbl	%al, %r14d
+	adcq	-16(%rsp), %rsi                 ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	16(%r11), %rbp
+	movq	%rbp, %rax
+	mulq	%r13
 	movq	%rdx, %r15
-	movq	%rax, %rbx
-	addq	%rcx, %rbx
-	adcq	%rsi, %r15
-	adcq	$0, %r14
-	movq	8(%r10), %rcx
-	movq	%r11, %rax
-	mulq	%rcx
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	%r8
 	movq	%rdx, %r12
-	movq	%rax, %rbp
-	movq	%r9, %rax
-	mulq	%rcx
+	movq	%rax, %r9
+	movq	%rbp, %rax
+	mulq	%r10
 	movq	%rdx, %r13
-	movq	%rax, %rsi
-	movq	%r8, %rax
-	mulq	%rcx
+	movq	%rax, %r10
+	movq	%rbp, %rax
+	movq	-8(%rsp), %r8                   ## 8-byte Reload
+	mulq	%r8
+	movq	%rdx, %r11
+	addq	%r10, %r11
+	adcq	%r9, %r13
+	adcq	-96(%rsp), %r12                 ## 8-byte Folded Reload
+	adcq	$0, %r15
 	addq	%rbx, %rax
-	movq	%rax, 8(%rdi)
-	adcq	%r15, %rsi
-	adcq	%r14, %rbp
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	addq	%rdx, %rsi
-	adcq	%r13, %rbp
-	adcq	%r12, %r14
-	movq	16(%r10), %r15
-	movq	%r11, %rax
-	mulq	%r15
-	movq	%rdx, %r10
-	movq	%rax, %rbx
-	movq	%r9, %rax
-	mulq	%r15
-	movq	%rdx, %r9
-	movq	%rax, %rcx
-	movq	%r8, %rax
-	mulq	%r15
-	addq	%rsi, %rax
+	adcq	%rcx, %r11
 	movq	%rax, 16(%rdi)
-	adcq	%rbp, %rcx
+	adcq	%rsi, %r13
+	adcq	%r14, %r12
+	adcq	$0, %r15
+	movq	-48(%rsp), %rax                 ## 8-byte Reload
+	movq	24(%rax), %rsi
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, %r14
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r9
+	movq	%rsi, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r10
+	movq	%rsi, %rax
+	mulq	%r8
+	addq	%r10, %rdx
+	adcq	%r9, %rbp
 	adcq	%r14, %rbx
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rdx, %rcx
-	movq	%rcx, 24(%rdi)
-	adcq	%r9, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 40(%rdi)
+	adcq	$0, %rcx
+	addq	%r11, %rax
+	movq	%rax, 24(%rdi)
+	adcq	%r13, %rdx
+	movq	%rdx, 32(%rdi)
+	adcq	%r12, %rbp
+	movq	%rbp, 40(%rdi)
+	adcq	%r15, %rbx
+	movq	%rbx, 48(%rdi)
+	adcq	$0, %rcx
+	movq	%rcx, 56(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1135,241 +1491,382 @@ _mcl_fpDbl_mulPre3L:                    ## @mcl_fpDbl_mulPre3L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sqrPre3L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sqrPre4L             ## -- Begin function mcl_fpDbl_sqrPre4L
 	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre3L:                    ## @mcl_fpDbl_sqrPre3L
-## BB#0:
+_mcl_fpDbl_sqrPre4L:                    ## @mcl_fpDbl_sqrPre4L
+## %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rdx, %rbx
+	movq	%rdi, %r10
+	movq	24(%rsi), %rbx
+	movq	16(%rsi), %rcx
+	movq	(%rsi), %r11
+	movq	8(%rsi), %r12
+	movq	%r11, %rax
+	mulq	%r11
+	movq	%rdx, %rbp
 	movq	%rax, (%rdi)
-	movq	%r10, %rax
-	mulq	%rcx
-	movq	%rdx, %r8
-	movq	%rax, %r11
-	movq	%rsi, %rax
+	movq	%rbx, %rax
 	mulq	%rcx
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
+	movq	%rax, -24(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	movq	%rbx, -8(%rsp)                  ## 8-byte Spill
+	mulq	%r11
+	movq	%rdx, -48(%rsp)                 ## 8-byte Spill
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r11
+	movq	%rdx, %rsi
+	movq	%rax, %r15
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%r12
 	movq	%rdx, %r14
-	movq	%rax, %r12
-	addq	%r12, %rbx
-	movq	%r14, %r13
-	adcq	%r11, %r13
-	movq	%r8, %rcx
-	adcq	$0, %rcx
-	movq	%r10, %rax
-	mulq	%rsi
+	movq	%rax, %rbx
+	movq	%rax, -32(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r12
 	movq	%rdx, %r9
-	movq	%rax, %r15
-	movq	%rsi, %rax
-	mulq	%rsi
-	movq	%rax, %rsi
-	addq	%r12, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r13, %rsi
-	adcq	%r15, %rcx
-	sbbq	%rbx, %rbx
-	andl	$1, %ebx
-	addq	%r14, %rsi
-	adcq	%rdx, %rcx
-	adcq	%r9, %rbx
-	movq	%r10, %rax
-	mulq	%r10
-	addq	%r11, %rsi
-	movq	%rsi, 16(%rdi)
+	movq	%rax, %rdi
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%rcx
+	movq	%rdx, -40(%rsp)                 ## 8-byte Spill
+	movq	%rax, %rcx
+	movq	%r12, %rax
+	mulq	%r12
+	movq	%rdx, %r13
+	movq	%rax, %r8
+	movq	%r12, %rax
+	mulq	%r11
+	addq	%rdx, %r8
+	adcq	%rdi, %r13
+	movq	%r9, %r12
+	adcq	%rbx, %r12
+	movq	%r14, %r11
+	adcq	$0, %r11
+	addq	%rax, %rbp
+	adcq	%r15, %rdx
+	movq	%rsi, %rbx
+	adcq	-72(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	-48(%rsp), %rdi                 ## 8-byte Reload
+	movq	%rdi, %r15
+	adcq	$0, %r15
+	addq	%rax, %rbp
+	adcq	%r8, %rdx
+	movq	%rbp, 8(%r10)
+	adcq	%r13, %rbx
+	adcq	%r12, %r15
+	adcq	$0, %r11
+	addq	-64(%rsp), %rsi                 ## 8-byte Folded Reload
+	adcq	%r9, %rcx
+	movq	-24(%rsp), %r12                 ## 8-byte Reload
+	movq	-40(%rsp), %rax                 ## 8-byte Reload
+	adcq	%r12, %rax
+	movq	-16(%rsp), %r8                  ## 8-byte Reload
+	movq	%r8, %rbp
+	adcq	$0, %rbp
+	addq	-56(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	%rdx, 16(%r10)
+	adcq	%rbx, %rsi
 	adcq	%r15, %rcx
-	adcq	%rbx, %rax
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	%r8, %rcx
-	movq	%rcx, 24(%rdi)
-	adcq	%r9, %rax
-	movq	%rax, 32(%rdi)
-	adcq	%rdx, %rsi
-	movq	%rsi, 40(%rdi)
+	adcq	%r11, %rax
+	movq	%rax, %r9
+	adcq	$0, %rbp
+	movq	-8(%rsp), %rax                  ## 8-byte Reload
+	mulq	%rax
+	addq	-32(%rsp), %rdi                 ## 8-byte Folded Reload
+	adcq	%r12, %r14
+	adcq	%r8, %rax
+	adcq	$0, %rdx
+	addq	-72(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	%rsi, 24(%r10)
+	adcq	%rcx, %rdi
+	movq	%rdi, 32(%r10)
+	adcq	%r9, %r14
+	movq	%r14, 40(%r10)
+	adcq	%rbp, %rax
+	movq	%rax, 48(%r10)
+	adcq	$0, %rdx
+	movq	%rdx, 56(%r10)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_mont3L
+                                        ## -- End function
+	.globl	_mcl_fp_mont4L                  ## -- Begin function mcl_fp_mont4L
 	.p2align	4, 0x90
-_mcl_fp_mont3L:                         ## @mcl_fp_mont3L
-## BB#0:
+_mcl_fp_mont4L:                         ## @mcl_fp_mont4L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	16(%rsi), %r10
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	24(%rsi), %rax
+	movq	%rax, -40(%rsp)                 ## 8-byte Spill
 	movq	(%rdx), %rdi
-	movq	%rdx, %r11
-	movq	%r11, -16(%rsp)         ## 8-byte Spill
-	movq	%r10, %rax
-	movq	%r10, -24(%rsp)         ## 8-byte Spill
 	mulq	%rdi
-	movq	%rax, %rbx
-	movq	%rdx, %r15
-	movq	(%rsi), %rbp
-	movq	%rbp, -64(%rsp)         ## 8-byte Spill
+	movq	%rax, %r14
+	movq	%rdx, %r8
+	movq	16(%rsi), %rax
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	mulq	%rdi
+	movq	%rax, %r12
+	movq	%rdx, %r9
+	movq	(%rsi), %rbx
+	movq	%rbx, -56(%rsp)                 ## 8-byte Spill
 	movq	8(%rsi), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
 	mulq	%rdi
-	movq	%rdx, %r12
-	movq	%rax, %rsi
-	movq	%rbp, %rax
+	movq	%rdx, %r10
+	movq	%rax, %rbp
+	movq	%rbx, %rax
 	mulq	%rdi
-	movq	%rax, %r8
+	movq	%rax, %r11
+	movq	%rdx, %r15
+	addq	%rbp, %r15
+	adcq	%r12, %r10
+	adcq	%r14, %r9
+	adcq	$0, %r8
+	movq	-8(%rcx), %rdi
+	movq	%rdi, -80(%rsp)                 ## 8-byte Spill
+	imulq	%rax, %rdi
+	movq	24(%rcx), %rdx
+	movq	%rdx, -72(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r12
 	movq	%rdx, %r13
-	addq	%rsi, %r13
-	adcq	%rbx, %r12
-	adcq	$0, %r15
-	movq	-8(%rcx), %r14
-	movq	%r8, %rbp
-	imulq	%r14, %rbp
 	movq	16(%rcx), %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
 	mulq	%rdx
-	movq	%rax, %r9
+	movq	%rax, %r14
 	movq	%rdx, %rbx
-	movq	(%rcx), %rdi
-	movq	%rdi, -40(%rsp)         ## 8-byte Spill
+	movq	(%rcx), %rsi
+	movq	%rsi, -24(%rsp)                 ## 8-byte Spill
 	movq	8(%rcx), %rcx
-	movq	%rcx, -48(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
+	movq	%rcx, -32(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
 	mulq	%rcx
-	movq	%rdx, %rsi
-	movq	%rax, %rcx
-	movq	%rbp, %rax
-	mulq	%rdi
 	movq	%rdx, %rbp
-	addq	%rcx, %rbp
-	adcq	%r9, %rsi
-	adcq	$0, %rbx
-	addq	%r8, %rax
-	adcq	%r13, %rbp
-	movq	8(%r11), %rcx
-	adcq	%r12, %rsi
-	adcq	%r15, %rbx
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
+	movq	%rax, %rcx
+	movq	%rdi, %rax
+	mulq	%rsi
+	movq	%rdx, %rdi
+	addq	%rcx, %rdi
+	adcq	%r14, %rbp
+	adcq	%r12, %rbx
+	adcq	$0, %r13
+	addq	%r11, %rax
+	adcq	%r15, %rdi
+	adcq	%r10, %rbp
+	adcq	%r9, %rbx
+	adcq	%r8, %r13
+	setb	-96(%rsp)                       ## 1-byte Folded Spill
+	movq	-88(%rsp), %rax                 ## 8-byte Reload
+	movq	8(%rax), %rcx
 	movq	%rcx, %rax
-	mulq	%r10
-	movq	%rdx, %r15
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r12
 	movq	%rax, %r8
 	movq	%rcx, %rax
-	movq	-32(%rsp), %r10         ## 8-byte Reload
-	mulq	%r10
-	movq	%rdx, %r12
-	movq	%rax, %r9
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, %r11
 	movq	%rcx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r13
-	movq	%rdx, %rcx
-	addq	%r9, %rcx
-	adcq	%r8, %r12
-	adcq	$0, %r15
-	addq	%rbp, %r13
-	adcq	%rsi, %rcx
-	adcq	%rbx, %r12
-	adcq	%rdi, %r15
-	sbbq	%r11, %r11
-	andl	$1, %r11d
-	movq	%r13, %rdi
-	imulq	%r14, %rdi
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r8
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %rsi
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	addq	%r9, %rbp
-	adcq	%r8, %rsi
-	adcq	$0, %rbx
-	addq	%r13, %rax
-	adcq	%rcx, %rbp
-	adcq	%r12, %rsi
-	adcq	%r15, %rbx
-	adcq	$0, %r11
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
 	movq	%rax, %r15
 	movq	%rcx, %rax
-	mulq	%r10
-	movq	%rdx, %r10
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r9
-	movq	%rdx, %rcx
-	addq	%rdi, %rcx
-	adcq	%r15, %r10
-	adcq	$0, %r8
-	addq	%rbp, %r9
-	adcq	%rsi, %rcx
-	adcq	%rbx, %r10
-	adcq	%r11, %r8
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	imulq	%r9, %r14
-	movq	%r14, %rax
-	movq	-56(%rsp), %r15         ## 8-byte Reload
-	mulq	%r15
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rax, %r10
+	movq	%rdx, %r9
+	addq	%r15, %r9
+	adcq	%r11, %rsi
+	adcq	%r8, %r14
+	adcq	$0, %r12
+	addq	%rdi, %r10
+	adcq	%rbp, %r9
+	adcq	%rbx, %rsi
+	adcq	%r13, %r14
+	movzbl	-96(%rsp), %eax                 ## 1-byte Folded Reload
+	adcq	%rax, %r12
+	setb	-96(%rsp)                       ## 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%r10, %rcx
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %rbp
+	movq	%rcx, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %rdi
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	addq	%rdi, %r11
+	adcq	%rbp, %r8
+	adcq	%r15, %rbx
+	adcq	$0, %r13
+	addq	%r10, %rax
+	adcq	%r9, %r11
+	adcq	%rsi, %r8
+	adcq	%r14, %rbx
+	adcq	%r12, %r13
+	movzbl	-96(%rsp), %r14d                ## 1-byte Folded Reload
+	adcq	$0, %r14
+	movq	-88(%rsp), %rax                 ## 8-byte Reload
+	movq	16(%rax), %rcx
+	movq	%rcx, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %rsi
+	movq	%rcx, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rax, %r9
+	movq	%rdx, %rdi
+	addq	%rsi, %rdi
+	adcq	%r15, %rbp
+	adcq	-96(%rsp), %r10                 ## 8-byte Folded Reload
+	adcq	$0, %r12
+	addq	%r11, %r9
+	adcq	%r8, %rdi
+	adcq	%rbx, %rbp
+	adcq	%r13, %r10
+	adcq	%r14, %r12
+	setb	%r15b
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%r9, %rcx
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %r8
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %rbx
 	movq	%rax, %r11
-	movq	%r14, %rax
-	movq	-48(%rsp), %r12         ## 8-byte Reload
-	mulq	%r12
+	movq	%rcx, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %rsi
-	movq	%rax, %r13
-	movq	%r14, %rax
-	movq	-40(%rsp), %rbp         ## 8-byte Reload
-	mulq	%rbp
-	addq	%r13, %rdx
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rcx
+	addq	%r14, %rcx
 	adcq	%r11, %rsi
-	adcq	$0, %rbx
-	addq	%r9, %rax
-	adcq	%rcx, %rdx
-	adcq	%r10, %rsi
 	adcq	%r8, %rbx
-	adcq	$0, %rdi
-	movq	%rdx, %rax
-	subq	%rbp, %rax
+	adcq	$0, %r13
+	addq	%r9, %rax
+	adcq	%rdi, %rcx
+	adcq	%rbp, %rsi
+	adcq	%r10, %rbx
+	adcq	%r12, %r13
+	movzbl	%r15b, %r12d
+	adcq	$0, %r12
+	movq	-88(%rsp), %rax                 ## 8-byte Reload
+	movq	24(%rax), %rdi
+	movq	%rdi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r11
+	movq	%rdi, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r9
+	movq	%rax, %r14
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r15
+	movq	%rdi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rax, %r10
+	movq	%rdx, %rdi
+	addq	%r15, %rdi
+	adcq	%r14, %rbp
+	adcq	%r11, %r9
+	adcq	$0, %r8
+	addq	%rcx, %r10
+	adcq	%rsi, %rdi
+	adcq	%rbx, %rbp
+	adcq	%r13, %r9
+	adcq	%r12, %r8
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%r10, %rcx
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	movq	-16(%rsp), %r12                 ## 8-byte Reload
+	mulq	%r12
+	movq	%rdx, %rbx
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	movq	-32(%rsp), %r11                 ## 8-byte Reload
+	mulq	%r11
+	movq	%rdx, %rsi
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	movq	-24(%rsp), %rcx                 ## 8-byte Reload
+	mulq	%rcx
+	addq	%r15, %rdx
+	adcq	%r14, %rsi
+	adcq	-80(%rsp), %rbx                 ## 8-byte Folded Reload
+	adcq	$0, %r13
+	addq	%r10, %rax
+	adcq	%rdi, %rdx
+	adcq	%rbp, %rsi
+	adcq	%r9, %rbx
+	movzbl	-88(%rsp), %eax                 ## 1-byte Folded Reload
+	adcq	%r8, %r13
+	adcq	$0, %rax
+	movq	%rdx, %r8
+	subq	%rcx, %r8
 	movq	%rsi, %rcx
-	sbbq	%r12, %rcx
+	sbbq	%r11, %rcx
 	movq	%rbx, %rbp
-	sbbq	%r15, %rbp
-	sbbq	$0, %rdi
-	andl	$1, %edi
+	sbbq	%r12, %rbp
+	movq	%r13, %rdi
+	sbbq	-72(%rsp), %rdi                 ## 8-byte Folded Reload
+	sbbq	$0, %rax
+	testb	$1, %al
+	cmovneq	%r13, %rdi
+	movq	-8(%rsp), %rax                  ## 8-byte Reload
+	movq	%rdi, 24(%rax)
 	cmovneq	%rbx, %rbp
-	testb	%dil, %dil
-	cmovneq	%rdx, %rax
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	movq	%rax, (%rdx)
+	movq	%rbp, 16(%rax)
 	cmovneq	%rsi, %rcx
-	movq	%rcx, 8(%rdx)
-	movq	%rbp, 16(%rdx)
+	movq	%rcx, 8(%rax)
+	cmovneq	%rdx, %r8
+	movq	%r8, (%rax)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1377,165 +1874,258 @@ _mcl_fp_mont3L:                         ## @mcl_fp_mont3L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montNF3L
+                                        ## -- End function
+	.globl	_mcl_fp_montNF4L                ## -- Begin function mcl_fp_montNF4L
 	.p2align	4, 0x90
-_mcl_fp_montNF3L:                       ## @mcl_fp_montNF3L
-## BB#0:
+_mcl_fp_montNF4L:                       ## @mcl_fp_montNF4L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, %r10
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	16(%rsi), %r11
-	movq	(%r10), %rbp
-	movq	%r10, -16(%rsp)         ## 8-byte Spill
-	movq	%r11, %rax
-	movq	%r11, -24(%rsp)         ## 8-byte Spill
-	mulq	%rbp
-	movq	%rax, %r14
-	movq	%rdx, %r15
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	24(%rsi), %rax
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	movq	(%rdx), %rdi
+	mulq	%rdi
+	movq	%rax, %r8
+	movq	%rdx, %r12
+	movq	16(%rsi), %rax
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	mulq	%rdi
+	movq	%rax, %rbp
+	movq	%rdx, %r9
 	movq	(%rsi), %rbx
-	movq	%rbx, -48(%rsp)         ## 8-byte Spill
+	movq	%rbx, -64(%rsp)                 ## 8-byte Spill
 	movq	8(%rsi), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	mulq	%rbp
-	movq	%rdx, %rdi
-	movq	%rax, %r8
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	mulq	%rdi
+	movq	%rdx, %r15
+	movq	%rax, %rsi
 	movq	%rbx, %rax
-	mulq	%rbp
+	mulq	%rdi
+	movq	%rax, %r10
+	movq	%rdx, %rdi
+	addq	%rsi, %rdi
+	adcq	%rbp, %r15
+	adcq	%r8, %r9
+	adcq	$0, %r12
+	movq	-8(%rcx), %rsi
+	movq	%rsi, -80(%rsp)                 ## 8-byte Spill
+	imulq	%rax, %rsi
+	movq	24(%rcx), %rdx
+	movq	%rdx, -32(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	%rdx
 	movq	%rax, %r13
-	movq	%rdx, %rbp
-	addq	%r8, %rbp
-	adcq	%r14, %rdi
-	adcq	$0, %r15
-	movq	-8(%rcx), %r14
-	movq	%r13, %rbx
-	imulq	%r14, %rbx
+	movq	%rdx, %r11
 	movq	16(%rcx), %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rdx, -40(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
 	mulq	%rdx
-	movq	%rax, %r12
-	movq	%rdx, %r8
-	movq	(%rcx), %rsi
-	movq	%rsi, -32(%rsp)         ## 8-byte Spill
+	movq	%rax, %r8
+	movq	%rdx, %r14
+	movq	(%rcx), %rbx
+	movq	%rbx, -16(%rsp)                 ## 8-byte Spill
 	movq	8(%rcx), %rcx
-	movq	%rcx, -40(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rcx, -24(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
 	mulq	%rcx
-	movq	%rdx, %r9
-	movq	%rax, %rcx
-	movq	%rbx, %rax
-	mulq	%rsi
-	addq	%r13, %rax
-	adcq	%rbp, %rcx
-	adcq	%rdi, %r12
-	adcq	$0, %r15
-	addq	%rdx, %rcx
-	movq	8(%r10), %rbp
-	adcq	%r9, %r12
-	adcq	%r8, %r15
-	movq	%rbp, %rax
-	mulq	%r11
-	movq	%rdx, %rsi
-	movq	%rax, %r8
-	movq	%rbp, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	%rbx
+	addq	%r10, %rax
+	adcq	%rdi, %rbp
+	adcq	%r15, %r8
+	adcq	%r9, %r13
+	adcq	$0, %r12
+	addq	%rdx, %rbp
+	adcq	%rcx, %r8
+	adcq	%r14, %r13
+	adcq	%r11, %r12
+	movq	-88(%rsp), %rax                 ## 8-byte Reload
+	movq	8(%rax), %rdi
+	movq	%rdi, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %rbx
-	movq	%rax, %r9
-	movq	%rbp, %rax
-	movq	-48(%rsp), %r10         ## 8-byte Reload
-	mulq	%r10
-	movq	%rax, %r13
-	movq	%rdx, %rbp
-	addq	%r9, %rbp
-	adcq	%r8, %rbx
-	adcq	$0, %rsi
-	addq	%rcx, %r13
-	adcq	%r12, %rbp
-	adcq	%r15, %rbx
-	adcq	$0, %rsi
-	movq	%r13, %rcx
-	imulq	%r14, %rcx
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r15
-	movq	%rcx, %rax
-	movq	-40(%rsp), %rdi         ## 8-byte Reload
-	mulq	%rdi
+	movq	%rax, %rsi
+	movq	%rdi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r11
+	movq	%rdi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, %r14
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rax, %rdi
 	movq	%rdx, %r9
+	addq	%r14, %r9
+	adcq	%r11, %rcx
+	adcq	%rsi, %r10
+	adcq	$0, %rbx
+	addq	%rbp, %rdi
+	adcq	%r8, %r9
+	adcq	%r13, %rcx
+	adcq	%r12, %r10
+	adcq	$0, %rbx
+	movq	-80(%rsp), %rsi                 ## 8-byte Reload
+	imulq	%rdi, %rsi
+	movq	%rsi, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
 	movq	%rax, %r12
-	movq	%rcx, %rax
-	movq	-32(%rsp), %r11         ## 8-byte Reload
-	mulq	%r11
-	addq	%r13, %rax
-	adcq	%rbp, %r12
-	adcq	%rbx, %r15
-	adcq	$0, %rsi
-	addq	%rdx, %r12
-	adcq	%r9, %r15
-	adcq	%r8, %rsi
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rbx
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r8
-	movq	%rbx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rsi, %rax
+	movq	-24(%rsp), %r15                 ## 8-byte Reload
+	mulq	%r15
+	movq	%rdx, %r14
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	addq	%rdi, %rax
+	adcq	%r9, %rbp
+	adcq	%rcx, %r13
+	adcq	%r10, %r12
+	adcq	$0, %rbx
+	addq	%rdx, %rbp
+	adcq	%r14, %r13
+	adcq	%r11, %r12
+	adcq	%r8, %rbx
+	movq	-88(%rsp), %rax                 ## 8-byte Reload
+	movq	16(%rax), %rdi
+	movq	%rdi, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, %r10
+	movq	%rdi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r11
+	movq	%rdi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %rcx
+	movq	%rax, %r14
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
 	movq	%rax, %r9
+	movq	%rdx, %rdi
+	addq	%r14, %rdi
+	adcq	%r11, %rcx
+	adcq	%r10, %r8
+	adcq	$0, %rsi
+	addq	%rbp, %r9
+	adcq	%r13, %rdi
+	adcq	%r12, %rcx
+	adcq	%rbx, %r8
+	adcq	$0, %rsi
+	movq	-80(%rsp), %rbx                 ## 8-byte Reload
+	imulq	%r9, %rbx
 	movq	%rbx, %rax
-	mulq	%r10
-	movq	%rax, %r10
-	movq	%rdx, %rbx
-	addq	%r9, %rbx
-	adcq	%r8, %rcx
-	adcq	$0, %rbp
-	addq	%r12, %r10
-	adcq	%r15, %rbx
-	adcq	%rsi, %rcx
-	adcq	$0, %rbp
-	imulq	%r10, %r14
-	movq	%r14, %rax
-	movq	-56(%rsp), %r15         ## 8-byte Reload
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r12
+	movq	%rbx, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rbx, %rax
+	mulq	%r15
+	movq	%rdx, %r14
+	movq	%rax, %rbp
+	movq	%rbx, %rax
+	movq	-16(%rsp), %r15                 ## 8-byte Reload
 	mulq	%r15
+	addq	%r9, %rax
+	adcq	%rdi, %rbp
+	adcq	%rcx, %r13
+	adcq	%r8, %r12
+	adcq	$0, %rsi
+	addq	%rdx, %rbp
+	adcq	%r14, %r13
+	adcq	%r11, %r12
+	adcq	%r10, %rsi
+	movq	-88(%rsp), %rax                 ## 8-byte Reload
+	movq	24(%rax), %rdi
+	movq	%rdi, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, %rcx
+	movq	%rdi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %r8
-	movq	%rax, %rsi
-	movq	%r14, %rax
-	movq	%rdi, %r12
-	mulq	%r12
-	movq	%rdx, %r9
-	movq	%rax, %rdi
-	movq	%r14, %rax
-	mulq	%r11
-	addq	%r10, %rax
-	adcq	%rbx, %rdi
-	adcq	%rcx, %rsi
-	adcq	$0, %rbp
-	addq	%rdx, %rdi
-	adcq	%r9, %rsi
-	adcq	%r8, %rbp
+	movq	%rax, %rbx
 	movq	%rdi, %rax
-	subq	%r11, %rax
-	movq	%rsi, %rcx
-	sbbq	%r12, %rcx
-	movq	%rbp, %rbx
-	sbbq	%r15, %rbx
-	movq	%rbx, %rdx
-	sarq	$63, %rdx
-	cmovsq	%rdi, %rax
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovsq	%rsi, %rcx
-	movq	%rcx, 8(%rdx)
-	cmovsq	%rbp, %rbx
-	movq	%rbx, 16(%rdx)
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r14
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rax, %r9
+	movq	%rdx, %rdi
+	addq	%r14, %rdi
+	adcq	%rbx, %r10
+	adcq	%rcx, %r8
+	adcq	$0, %r11
+	addq	%rbp, %r9
+	adcq	%r13, %rdi
+	adcq	%r12, %r10
+	adcq	%rsi, %r8
+	adcq	$0, %r11
+	movq	-80(%rsp), %rsi                 ## 8-byte Reload
+	imulq	%r9, %rsi
+	movq	%rsi, %rax
+	movq	-32(%rsp), %r12                 ## 8-byte Reload
+	mulq	%r12
+	movq	%rdx, -80(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r13
+	movq	%rsi, %rax
+	movq	-40(%rsp), %r14                 ## 8-byte Reload
+	mulq	%r14
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	movq	%r15, %rbx
+	mulq	%r15
+	movq	%rdx, %r15
+	movq	%rax, %rcx
+	movq	%rsi, %rax
+	movq	-24(%rsp), %rsi                 ## 8-byte Reload
+	mulq	%rsi
+	addq	%r9, %rcx
+	adcq	%rdi, %rax
+	adcq	%r10, %rbp
+	adcq	%r8, %r13
+	adcq	$0, %r11
+	addq	%r15, %rax
+	adcq	%rdx, %rbp
+	adcq	-88(%rsp), %r13                 ## 8-byte Folded Reload
+	adcq	-80(%rsp), %r11                 ## 8-byte Folded Reload
+	movq	%rax, %rcx
+	subq	%rbx, %rcx
+	movq	%rbp, %rdx
+	sbbq	%rsi, %rdx
+	movq	%r13, %rdi
+	sbbq	%r14, %rdi
+	movq	%r11, %rbx
+	sbbq	%r12, %rbx
+	cmovsq	%r11, %rbx
+	movq	-8(%rsp), %rsi                  ## 8-byte Reload
+	movq	%rbx, 24(%rsi)
+	cmovsq	%r13, %rdi
+	movq	%rdi, 16(%rsi)
+	cmovsq	%rbp, %rdx
+	movq	%rdx, 8(%rsi)
+	cmovsq	%rax, %rcx
+	movq	%rcx, (%rsi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1543,11 +2133,11 @@ _mcl_fp_montNF3L:                       ## @mcl_fp_montNF3L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montRed3L
+                                        ## -- End function
+	.globl	_mcl_fp_montRed4L               ## -- Begin function mcl_fp_montRed4L
 	.p2align	4, 0x90
-_mcl_fp_montRed3L:                      ## @mcl_fp_montRed3L
-## BB#0:
+_mcl_fp_montRed4L:                      ## @mcl_fp_montRed4L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
@@ -1555,103 +2145,153 @@ _mcl_fp_montRed3L:                      ## @mcl_fp_montRed3L
 	pushq	%r12
 	pushq	%rbx
 	movq	%rdx, %rcx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	-8(%rcx), %r9
-	movq	(%rcx), %rdi
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	-8(%rdx), %r8
+	movq	(%rdx), %r13
 	movq	(%rsi), %r15
 	movq	%r15, %rbx
-	imulq	%r9, %rbx
+	imulq	%r8, %rbx
+	movq	24(%rdx), %rdi
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rdi, -40(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r10
+	movq	%rdx, %r9
 	movq	16(%rcx), %rbp
 	movq	%rbx, %rax
 	mulq	%rbp
-	movq	%rbp, -24(%rsp)         ## 8-byte Spill
-	movq	%rax, %r11
-	movq	%rdx, %r8
+	movq	%rbp, -24(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r14
+	movq	%rdx, %r11
 	movq	8(%rcx), %rcx
+	movq	%rcx, -48(%rsp)                 ## 8-byte Spill
 	movq	%rbx, %rax
 	mulq	%rcx
-	movq	%rcx, %r12
-	movq	%r12, -32(%rsp)         ## 8-byte Spill
-	movq	%rdx, %r10
-	movq	%rax, %r14
+	movq	%rdx, %r12
+	movq	%rax, %rcx
 	movq	%rbx, %rax
-	mulq	%rdi
-	movq	%rdi, %rbx
-	movq	%rbx, -16(%rsp)         ## 8-byte Spill
-	movq	%rdx, %rcx
-	addq	%r14, %rcx
-	adcq	%r11, %r10
-	adcq	$0, %r8
-	movq	40(%rsi), %rdi
-	movq	32(%rsi), %r13
+	mulq	%r13
+	movq	%r13, -32(%rsp)                 ## 8-byte Spill
+	movq	%rdx, %rbx
+	addq	%rcx, %rbx
+	adcq	%r14, %r12
+	adcq	%r10, %r11
+	adcq	$0, %r9
 	addq	%r15, %rax
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r10
-	adcq	24(%rsi), %r8
-	adcq	$0, %r13
-	adcq	$0, %rdi
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	movq	%rcx, %rsi
-	imulq	%r9, %rsi
-	movq	%rsi, %rax
+	adcq	8(%rsi), %rbx
+	adcq	16(%rsi), %r12
+	adcq	24(%rsi), %r11
+	adcq	32(%rsi), %r9
+	movq	%rsi, -16(%rsp)                 ## 8-byte Spill
+	setb	-65(%rsp)                       ## 1-byte Folded Spill
+	movq	%r8, %rcx
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	mulq	%rdi
+	movq	%rdx, %r10
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
 	mulq	%rbp
-	movq	%rdx, %r11
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	%r12
 	movq	%rdx, %r14
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	%rbx
-	movq	%rdx, %rbx
-	addq	%r12, %rbx
-	adcq	%rbp, %r14
-	adcq	$0, %r11
-	addq	%rcx, %rax
-	adcq	%r10, %rbx
-	adcq	%r8, %r14
-	adcq	%r13, %r11
-	adcq	$0, %rdi
-	adcq	$0, %r15
-	imulq	%rbx, %r9
-	movq	%r9, %rax
-	movq	-24(%rsp), %r12         ## 8-byte Reload
-	mulq	%r12
-	movq	%rdx, %rbp
-	movq	%rax, %r8
-	movq	%r9, %rax
-	movq	-32(%rsp), %r13         ## 8-byte Reload
+	movq	%rax, %rbp
+	movq	%rcx, %rax
 	mulq	%r13
+	movq	%rdx, %r13
+	movq	%rax, %rdi
+	movq	%rcx, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %rcx
+	addq	%r13, %rcx
+	adcq	%rbp, %r15
+	adcq	-64(%rsp), %r14                 ## 8-byte Folded Reload
+	movzbl	-65(%rsp), %eax                 ## 1-byte Folded Reload
+	adcq	%rax, %r10
+	addq	%rbx, %rdi
+	adcq	%r12, %rcx
+	adcq	%r11, %r15
+	adcq	%r9, %r14
+	adcq	40(%rsi), %r10
+	setb	-65(%rsp)                       ## 1-byte Folded Spill
+	movq	%r8, %rdi
+	imulq	%rcx, %rdi
+	movq	%rdi, %rax
+	movq	-40(%rsp), %rsi                 ## 8-byte Reload
+	mulq	%rsi
+	movq	%rdx, %r9
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rbp
+	movq	%rdi, %rax
+	movq	-48(%rsp), %rdi                 ## 8-byte Reload
+	mulq	%rdi
+	movq	%rdx, %r12
+	movq	%rax, %rbx
+	addq	%r13, %rbx
+	adcq	-56(%rsp), %r12                 ## 8-byte Folded Reload
+	adcq	-64(%rsp), %r11                 ## 8-byte Folded Reload
+	movzbl	-65(%rsp), %eax                 ## 1-byte Folded Reload
+	adcq	%rax, %r9
+	addq	%rcx, %rbp
+	adcq	%r15, %rbx
+	adcq	%r14, %r12
+	adcq	%r10, %r11
+	movq	-16(%rsp), %r15                 ## 8-byte Reload
+	adcq	48(%r15), %r9
+	setb	-65(%rsp)                       ## 1-byte Folded Spill
+	imulq	%rbx, %r8
+	movq	%r8, %rax
+	mulq	%rsi
+	movq	%rdx, -64(%rsp)                 ## 8-byte Spill
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	%r8, %rax
+	movq	-24(%rsp), %r14                 ## 8-byte Reload
+	mulq	%r14
+	movq	%rdx, %r13
+	movq	%rax, %rbp
+	movq	%r8, %rax
+	movq	-32(%rsp), %r10                 ## 8-byte Reload
+	mulq	%r10
 	movq	%rdx, %rsi
-	movq	%rax, %r10
-	movq	%r9, %rax
-	movq	-16(%rsp), %rcx         ## 8-byte Reload
-	mulq	%rcx
-	addq	%r10, %rdx
-	adcq	%r8, %rsi
-	adcq	$0, %rbp
-	addq	%rbx, %rax
-	adcq	%r14, %rdx
-	adcq	%r11, %rsi
-	adcq	%rdi, %rbp
-	adcq	$0, %r15
-	movq	%rdx, %rax
-	subq	%rcx, %rax
-	movq	%rsi, %rdi
-	sbbq	%r13, %rdi
-	movq	%rbp, %rcx
-	sbbq	%r12, %rcx
-	sbbq	$0, %r15
-	andl	$1, %r15d
-	cmovneq	%rbp, %rcx
-	testb	%r15b, %r15b
-	cmovneq	%rdx, %rax
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovneq	%rsi, %rdi
-	movq	%rdi, 8(%rdx)
-	movq	%rcx, 16(%rdx)
+	movq	%rax, %rcx
+	movq	%r8, %rax
+	mulq	%rdi
+	addq	%rsi, %rax
+	adcq	%rbp, %rdx
+	adcq	-56(%rsp), %r13                 ## 8-byte Folded Reload
+	movzbl	-65(%rsp), %edi                 ## 1-byte Folded Reload
+	adcq	-64(%rsp), %rdi                 ## 8-byte Folded Reload
+	addq	%rbx, %rcx
+	adcq	%r12, %rax
+	adcq	%r11, %rdx
+	adcq	%r9, %r13
+	adcq	56(%r15), %rdi
+	xorl	%r8d, %r8d
+	movq	%rax, %rbp
+	subq	%r10, %rbp
+	movq	%rdx, %rbx
+	sbbq	-48(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%r13, %rcx
+	sbbq	%r14, %rcx
+	movq	%rdi, %rsi
+	sbbq	-40(%rsp), %rsi                 ## 8-byte Folded Reload
+	sbbq	%r8, %r8
+	testb	$1, %r8b
+	cmovneq	%rdi, %rsi
+	movq	-8(%rsp), %rdi                  ## 8-byte Reload
+	movq	%rsi, 24(%rdi)
+	cmovneq	%r13, %rcx
+	movq	%rcx, 16(%rdi)
+	cmovneq	%rdx, %rbx
+	movq	%rbx, 8(%rdi)
+	cmovneq	%rax, %rbp
+	movq	%rbp, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -1659,421 +2299,841 @@ _mcl_fp_montRed3L:                      ## @mcl_fp_montRed3L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_addPre3L
+                                        ## -- End function
+	.globl	_mcl_fp_montRedNF4L             ## -- Begin function mcl_fp_montRedNF4L
 	.p2align	4, 0x90
-_mcl_fp_addPre3L:                       ## @mcl_fp_addPre3L
-## BB#0:
-	movq	16(%rdx), %rax
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rax
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+_mcl_fp_montRedNF4L:                    ## @mcl_fp_montRedNF4L
+## %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdi, -8(%rsp)                  ## 8-byte Spill
+	movq	-8(%rdx), %r8
+	movq	(%rdx), %r13
+	movq	(%rsi), %r15
+	movq	%r15, %rbx
+	imulq	%r8, %rbx
+	movq	24(%rdx), %rdi
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rdi, -48(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r10
+	movq	%rdx, %r9
+	movq	16(%rcx), %rbp
+	movq	%rbx, %rax
+	mulq	%rbp
+	movq	%rbp, -32(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r14
+	movq	%rdx, %r11
+	movq	8(%rcx), %rcx
+	movq	%rcx, -24(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%rcx
+	movq	%rdx, %r12
+	movq	%rax, %rcx
+	movq	%rbx, %rax
+	mulq	%r13
+	movq	%r13, -40(%rsp)                 ## 8-byte Spill
+	movq	%rdx, %rbx
+	addq	%rcx, %rbx
+	adcq	%r14, %r12
+	adcq	%r10, %r11
+	adcq	$0, %r9
+	addq	%r15, %rax
+	adcq	8(%rsi), %rbx
+	adcq	16(%rsi), %r12
+	adcq	24(%rsi), %r11
+	adcq	32(%rsi), %r9
+	movq	%rsi, -16(%rsp)                 ## 8-byte Spill
+	setb	-65(%rsp)                       ## 1-byte Folded Spill
+	movq	%r8, %rcx
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	mulq	%rdi
+	movq	%rdx, %r10
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%rbp
+	movq	%rdx, %r14
+	movq	%rax, %rbp
+	movq	%rcx, %rax
+	mulq	%r13
+	movq	%rdx, %r13
+	movq	%rax, %rdi
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %rcx
+	addq	%r13, %rcx
+	adcq	%rbp, %r15
+	adcq	-64(%rsp), %r14                 ## 8-byte Folded Reload
+	movzbl	-65(%rsp), %eax                 ## 1-byte Folded Reload
+	adcq	%rax, %r10
+	addq	%rbx, %rdi
+	adcq	%r12, %rcx
+	adcq	%r11, %r15
+	adcq	%r9, %r14
+	adcq	40(%rsi), %r10
+	setb	-65(%rsp)                       ## 1-byte Folded Spill
+	movq	%r8, %rdi
+	imulq	%rcx, %rdi
+	movq	%rdi, %rax
+	movq	-48(%rsp), %rsi                 ## 8-byte Reload
+	mulq	%rsi
+	movq	%rdx, %r9
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rbp
+	movq	%rdi, %rax
+	movq	-24(%rsp), %rdi                 ## 8-byte Reload
+	mulq	%rdi
+	movq	%rdx, %r12
+	movq	%rax, %rbx
+	addq	%r13, %rbx
+	adcq	-56(%rsp), %r12                 ## 8-byte Folded Reload
+	adcq	-64(%rsp), %r11                 ## 8-byte Folded Reload
+	movzbl	-65(%rsp), %eax                 ## 1-byte Folded Reload
+	adcq	%rax, %r9
+	addq	%rcx, %rbp
+	adcq	%r15, %rbx
+	adcq	%r14, %r12
+	adcq	%r10, %r11
+	movq	-16(%rsp), %r15                 ## 8-byte Reload
+	adcq	48(%r15), %r9
+	setb	-65(%rsp)                       ## 1-byte Folded Spill
+	imulq	%rbx, %r8
+	movq	%r8, %rax
+	mulq	%rsi
+	movq	%rdx, -64(%rsp)                 ## 8-byte Spill
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	%r8, %rax
+	movq	-32(%rsp), %r14                 ## 8-byte Reload
+	mulq	%r14
+	movq	%rdx, %r13
+	movq	%rax, %rbp
+	movq	%r8, %rax
+	movq	-40(%rsp), %r10                 ## 8-byte Reload
+	mulq	%r10
+	movq	%rdx, %rsi
+	movq	%rax, %rcx
+	movq	%r8, %rax
+	mulq	%rdi
+	movq	%rdi, %r8
+	addq	%rsi, %rax
+	adcq	%rbp, %rdx
+	adcq	-56(%rsp), %r13                 ## 8-byte Folded Reload
+	movzbl	-65(%rsp), %edi                 ## 1-byte Folded Reload
+	adcq	-64(%rsp), %rdi                 ## 8-byte Folded Reload
+	addq	%rbx, %rcx
+	adcq	%r12, %rax
+	adcq	%r11, %rdx
+	adcq	%r9, %r13
+	adcq	56(%r15), %rdi
+	movq	%rax, %rbx
+	subq	%r10, %rbx
+	movq	%rdx, %rbp
+	sbbq	%r8, %rbp
+	movq	%r13, %rcx
+	sbbq	%r14, %rcx
+	movq	%rdi, %rsi
+	sbbq	-48(%rsp), %rsi                 ## 8-byte Folded Reload
+	cmovsq	%rdi, %rsi
+	movq	-8(%rsp), %rdi                  ## 8-byte Reload
+	movq	%rsi, 24(%rdi)
+	cmovsq	%r13, %rcx
+	movq	%rcx, 16(%rdi)
+	cmovsq	%rdx, %rbp
+	movq	%rbp, 8(%rdi)
+	cmovsq	%rax, %rbx
+	movq	%rbx, (%rdi)
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_subPre3L
+                                        ## -- End function
+	.globl	_mcl_fp_addPre4L                ## -- Begin function mcl_fp_addPre4L
 	.p2align	4, 0x90
-_mcl_fp_subPre3L:                       ## @mcl_fp_subPre3L
-## BB#0:
+_mcl_fp_addPre4L:                       ## @mcl_fp_addPre4L
+## %bb.0:
+	movq	24(%rsi), %rax
+	movq	16(%rsi), %rcx
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r8
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rcx
+	adcq	24(%rdx), %rax
+	movq	%rax, 24(%rdi)
+	movq	%rcx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subPre4L                ## -- Begin function mcl_fp_subPre4L
+	.p2align	4, 0x90
+_mcl_fp_subPre4L:                       ## @mcl_fp_subPre4L
+## %bb.0:
+	movq	24(%rsi), %rcx
 	movq	16(%rsi), %r8
-	movq	(%rsi), %rcx
+	movq	(%rsi), %r9
 	movq	8(%rsi), %rsi
 	xorl	%eax, %eax
-	subq	(%rdx), %rcx
+	subq	(%rdx), %r9
 	sbbq	8(%rdx), %rsi
 	sbbq	16(%rdx), %r8
-	movq	%rcx, (%rdi)
-	movq	%rsi, 8(%rdi)
+	sbbq	24(%rdx), %rcx
+	movq	%rcx, 24(%rdi)
 	movq	%r8, 16(%rdi)
-	sbbq	$0, %rax
+	movq	%rsi, 8(%rdi)
+	movq	%r9, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
 	retq
-
-	.globl	_mcl_fp_shr1_3L
+                                        ## -- End function
+	.globl	_mcl_fp_shr1_4L                 ## -- Begin function mcl_fp_shr1_4L
 	.p2align	4, 0x90
-_mcl_fp_shr1_3L:                        ## @mcl_fp_shr1_3L
-## BB#0:
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rdx
-	shrdq	$1, %rdx, %rcx
-	movq	%rcx, (%rdi)
-	shrdq	$1, %rax, %rdx
+_mcl_fp_shr1_4L:                        ## @mcl_fp_shr1_4L
+## %bb.0:
+	movq	(%rsi), %rax
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %rdx
+	movq	24(%rsi), %rcx
+	movq	%rcx, %rsi
+	shrq	%rsi
+	movq	%rsi, 24(%rdi)
+	shldq	$63, %rdx, %rcx
+	movq	%rcx, 16(%rdi)
+	shldq	$63, %r8, %rdx
 	movq	%rdx, 8(%rdi)
-	shrq	%rax
-	movq	%rax, 16(%rdi)
+	shrdq	$1, %r8, %rax
+	movq	%rax, (%rdi)
 	retq
-
-	.globl	_mcl_fp_add3L
+                                        ## -- End function
+	.globl	_mcl_fp_add4L                   ## -- Begin function mcl_fp_add4L
 	.p2align	4, 0x90
-_mcl_fp_add3L:                          ## @mcl_fp_add3L
-## BB#0:
-	movq	16(%rdx), %r8
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r8
+_mcl_fp_add4L:                          ## @mcl_fp_add4L
+## %bb.0:
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %r9
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r9
+	adcq	24(%rdx), %r8
+	movq	%r8, 24(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%rsi, 8(%rdi)
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r8, 16(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
+	setb	%dl
+	movzbl	%dl, %edx
 	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB44_2
-## BB#1:                                ## %nocarry
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r9
+	sbbq	24(%rcx), %r8
+	sbbq	$0, %rdx
+	testb	$1, %dl
+	jne	LBB33_2
+## %bb.1:                               ## %nocarry
 	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r8, 16(%rdi)
-LBB44_2:                                ## %carry
+	movq	%rsi, 8(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%r8, 24(%rdi)
+LBB33_2:                                ## %carry
 	retq
-
-	.globl	_mcl_fp_addNF3L
+                                        ## -- End function
+	.globl	_mcl_fp_addNF4L                 ## -- Begin function mcl_fp_addNF4L
 	.p2align	4, 0x90
-_mcl_fp_addNF3L:                        ## @mcl_fp_addNF3L
-## BB#0:
+_mcl_fp_addNF4L:                        ## @mcl_fp_addNF4L
+## %bb.0:
+	pushq	%rbx
+	movq	24(%rdx), %r11
 	movq	16(%rdx), %r8
-	movq	(%rdx), %r10
-	movq	8(%rdx), %r9
-	addq	(%rsi), %r10
-	adcq	8(%rsi), %r9
+	movq	(%rdx), %r9
+	movq	8(%rdx), %r10
+	addq	(%rsi), %r9
+	adcq	8(%rsi), %r10
 	adcq	16(%rsi), %r8
-	movq	%r10, %rsi
+	adcq	24(%rsi), %r11
+	movq	%r9, %rsi
 	subq	(%rcx), %rsi
-	movq	%r9, %rdx
+	movq	%r10, %rdx
 	sbbq	8(%rcx), %rdx
 	movq	%r8, %rax
 	sbbq	16(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r10, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
+	movq	%r11, %rbx
+	sbbq	24(%rcx), %rbx
+	cmovsq	%r11, %rbx
+	movq	%rbx, 24(%rdi)
 	cmovsq	%r8, %rax
 	movq	%rax, 16(%rdi)
+	cmovsq	%r10, %rdx
+	movq	%rdx, 8(%rdi)
+	cmovsq	%r9, %rsi
+	movq	%rsi, (%rdi)
+	popq	%rbx
 	retq
-
-	.globl	_mcl_fp_sub3L
+                                        ## -- End function
+	.globl	_mcl_fp_sub4L                   ## -- Begin function mcl_fp_sub4L
 	.p2align	4, 0x90
-_mcl_fp_sub3L:                          ## @mcl_fp_sub3L
-## BB#0:
-	movq	16(%rsi), %r8
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r9
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r9
-	sbbq	16(%rdx), %r8
-	movq	%rax, (%rdi)
-	movq	%r9, 8(%rdi)
-	movq	%r8, 16(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB46_2
-## BB#1:                                ## %nocarry
-	retq
-LBB46_2:                                ## %carry
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rsi
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r8, %rsi
-	movq	%rsi, 16(%rdi)
-	retq
-
-	.globl	_mcl_fp_subNF3L
-	.p2align	4, 0x90
-_mcl_fp_subNF3L:                        ## @mcl_fp_subNF3L
-## BB#0:
+_mcl_fp_sub4L:                          ## @mcl_fp_sub4L
+## %bb.0:
+	movq	24(%rsi), %r9
 	movq	16(%rsi), %r10
 	movq	(%rsi), %r8
-	movq	8(%rsi), %r9
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
 	subq	(%rdx), %r8
-	sbbq	8(%rdx), %r9
+	sbbq	8(%rdx), %rsi
 	sbbq	16(%rdx), %r10
-	movq	%r10, %rdx
+	sbbq	24(%rdx), %r9
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
+	testb	$1, %al
+	jne	LBB35_2
+## %bb.1:                               ## %nocarry
+	retq
+LBB35_2:                                ## %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %r10
+	adcq	24(%rcx), %r9
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subNF4L                 ## -- Begin function mcl_fp_subNF4L
+	.p2align	4, 0x90
+_mcl_fp_subNF4L:                        ## @mcl_fp_subNF4L
+## %bb.0:
+	pushq	%rbx
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %r8
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r10
+	subq	(%rdx), %r9
+	sbbq	8(%rdx), %r10
+	sbbq	16(%rdx), %r8
+	sbbq	24(%rdx), %r11
+	movq	%r11, %rdx
 	sarq	$63, %rdx
-	movq	%rdx, %rsi
-	shldq	$1, %r10, %rsi
-	andq	(%rcx), %rsi
+	movq	24(%rcx), %rsi
+	andq	%rdx, %rsi
 	movq	16(%rcx), %rax
 	andq	%rdx, %rax
-	andq	8(%rcx), %rdx
-	addq	%r8, %rsi
-	movq	%rsi, (%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r10, %rax
+	movq	8(%rcx), %rbx
+	andq	%rdx, %rbx
+	andq	(%rcx), %rdx
+	addq	%r9, %rdx
+	movq	%rdx, (%rdi)
+	adcq	%r10, %rbx
+	movq	%rbx, 8(%rdi)
+	adcq	%r8, %rax
 	movq	%rax, 16(%rdi)
+	adcq	%r11, %rsi
+	movq	%rsi, 24(%rdi)
+	popq	%rbx
 	retq
-
-	.globl	_mcl_fpDbl_add3L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_add4L                ## -- Begin function mcl_fpDbl_add4L
 	.p2align	4, 0x90
-_mcl_fpDbl_add3L:                       ## @mcl_fpDbl_add3L
-## BB#0:
-	pushq	%r15
+_mcl_fpDbl_add4L:                       ## @mcl_fpDbl_add4L
+## %bb.0:
 	pushq	%r14
 	pushq	%rbx
-	movq	40(%rdx), %r10
-	movq	40(%rsi), %r8
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r9
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
+	movq	56(%rsi), %r11
+	movq	48(%rsi), %r10
+	movq	40(%rsi), %r9
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %rax
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r14
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rbx
+	adcq	24(%rdx), %rax
+	adcq	32(%rdx), %r8
+	adcq	40(%rdx), %r9
+	adcq	48(%rdx), %r10
+	adcq	56(%rdx), %r11
+	movq	%rax, 24(%rdi)
 	movq	%rbx, 16(%rdi)
-	adcq	%r14, %r15
-	adcq	%r11, %r9
-	adcq	%r10, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%r15, %rdx
+	movq	%rsi, 8(%rdi)
+	movq	%r14, (%rdi)
+	setb	%al
+	movzbl	%al, %r14d
+	movq	%r8, %rdx
 	subq	(%rcx), %rdx
 	movq	%r9, %rsi
 	sbbq	8(%rcx), %rsi
-	movq	%r8, %rbx
+	movq	%r10, %rbx
 	sbbq	16(%rcx), %rbx
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%r15, %rdx
-	movq	%rdx, 24(%rdi)
-	testb	%al, %al
+	movq	%r11, %rax
+	sbbq	24(%rcx), %rax
+	sbbq	$0, %r14
+	testb	$1, %r14b
+	cmovneq	%r11, %rax
+	movq	%rax, 56(%rdi)
+	cmovneq	%r10, %rbx
+	movq	%rbx, 48(%rdi)
 	cmovneq	%r9, %rsi
-	movq	%rsi, 32(%rdi)
-	cmovneq	%r8, %rbx
-	movq	%rbx, 40(%rdi)
+	movq	%rsi, 40(%rdi)
+	cmovneq	%r8, %rdx
+	movq	%rdx, 32(%rdi)
 	popq	%rbx
 	popq	%r14
-	popq	%r15
 	retq
-
-	.globl	_mcl_fpDbl_sub3L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sub4L                ## -- Begin function mcl_fpDbl_sub4L
 	.p2align	4, 0x90
-_mcl_fpDbl_sub3L:                       ## @mcl_fpDbl_sub3L
-## BB#0:
+_mcl_fpDbl_sub4L:                       ## @mcl_fpDbl_sub4L
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
-	pushq	%r12
 	pushq	%rbx
-	movq	40(%rdx), %r10
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %r14
-	movq	(%rsi), %rbx
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %r9
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r11
+	movq	24(%rsi), %r15
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
 	movq	8(%rsi), %rax
 	xorl	%esi, %esi
-	subq	(%rdx), %rbx
+	subq	(%rdx), %r14
 	sbbq	8(%rdx), %rax
-	movq	24(%rdx), %r15
-	movq	32(%rdx), %r12
-	sbbq	16(%rdx), %r14
-	movq	%rbx, (%rdi)
+	sbbq	16(%rdx), %rbx
+	sbbq	24(%rdx), %r15
+	sbbq	32(%rdx), %r11
+	sbbq	40(%rdx), %r10
+	sbbq	48(%rdx), %r9
+	sbbq	56(%rdx), %r8
+	movq	%r15, 24(%rdi)
+	movq	%rbx, 16(%rdi)
 	movq	%rax, 8(%rdi)
-	movq	%r14, 16(%rdi)
-	sbbq	%r15, %r11
-	sbbq	%r12, %r9
-	sbbq	%r10, %r8
-	movl	$0, %eax
-	sbbq	$0, %rax
-	andl	$1, %eax
-	movq	(%rcx), %rdx
-	cmoveq	%rsi, %rdx
-	testb	%al, %al
-	movq	16(%rcx), %rax
-	cmoveq	%rsi, %rax
-	cmovneq	8(%rcx), %rsi
-	addq	%r11, %rdx
-	movq	%rdx, 24(%rdi)
-	adcq	%r9, %rsi
+	movq	%r14, (%rdi)
+	sbbq	%rsi, %rsi
+	andl	$1, %esi
+	negq	%rsi
+	movq	24(%rcx), %rax
+	andq	%rsi, %rax
+	movq	16(%rcx), %rdx
+	andq	%rsi, %rdx
+	movq	8(%rcx), %rbx
+	andq	%rsi, %rbx
+	andq	(%rcx), %rsi
+	addq	%r11, %rsi
 	movq	%rsi, 32(%rdi)
+	adcq	%r10, %rbx
+	movq	%rbx, 40(%rdi)
+	adcq	%r9, %rdx
+	movq	%rdx, 48(%rdi)
 	adcq	%r8, %rax
-	movq	%rax, 40(%rdi)
+	movq	%rax, 56(%rdi)
 	popq	%rbx
-	popq	%r12
 	popq	%r14
 	popq	%r15
 	retq
-
-	.globl	_mcl_fp_mulUnitPre4L
+                                        ## -- End function
+	.globl	_mulPv384x64                    ## -- Begin function mulPv384x64
 	.p2align	4, 0x90
-_mcl_fp_mulUnitPre4L:                   ## @mcl_fp_mulUnitPre4L
-## BB#0:
+_mulPv384x64:                           ## @mulPv384x64
+## %bb.0:
+	pushq	%r15
 	pushq	%r14
+	pushq	%r13
+	pushq	%r12
 	pushq	%rbx
 	movq	%rdx, %rcx
+	movq	%rdx, %rax
+	mulq	(%rsi)
+	movq	%rdx, %r9
+	movq	%rax, (%rdi)
 	movq	%rcx, %rax
-	mulq	24(%rsi)
+	mulq	40(%rsi)
 	movq	%rdx, %r8
-	movq	%rax, %r9
+	movq	%rax, %r10
+	movq	%rcx, %rax
+	mulq	32(%rsi)
+	movq	%rdx, %r11
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	mulq	24(%rsi)
+	movq	%rdx, %r12
+	movq	%rax, %r15
 	movq	%rcx, %rax
 	mulq	16(%rsi)
+	movq	%rdx, %rbx
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	8(%rsi)
+	addq	%r9, %rax
+	movq	%rax, 8(%rdi)
+	adcq	%r13, %rdx
+	movq	%rdx, 16(%rdi)
+	adcq	%r15, %rbx
+	movq	%rbx, 24(%rdi)
+	adcq	%r14, %r12
+	movq	%r12, 32(%rdi)
+	adcq	%r10, %r11
+	movq	%r11, 40(%rdi)
+	adcq	$0, %r8
+	movq	%r8, 48(%rdi)
+	movq	%rdi, %rax
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_mulUnitPre6L            ## -- Begin function mcl_fp_mulUnitPre6L
+	.p2align	4, 0x90
+_mcl_fp_mulUnitPre6L:                   ## @mcl_fp_mulUnitPre6L
+## %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	movq	%rdx, %rcx
+	movq	%rdx, %rax
+	mulq	40(%rsi)
+	movq	%rdx, %r9
+	movq	%rax, %r8
+	movq	%rcx, %rax
+	mulq	32(%rsi)
 	movq	%rdx, %r10
 	movq	%rax, %r11
 	movq	%rcx, %rax
+	mulq	24(%rsi)
+	movq	%rdx, %r15
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	mulq	16(%rsi)
+	movq	%rdx, %r13
+	movq	%rax, %r12
+	movq	%rcx, %rax
 	mulq	8(%rsi)
 	movq	%rdx, %rbx
-	movq	%rax, %r14
+	movq	%rax, %rbp
 	movq	%rcx, %rax
 	mulq	(%rsi)
 	movq	%rax, (%rdi)
-	addq	%r14, %rdx
+	addq	%rbp, %rdx
 	movq	%rdx, 8(%rdi)
-	adcq	%r11, %rbx
+	adcq	%r12, %rbx
 	movq	%rbx, 16(%rdi)
-	adcq	%r9, %r10
-	movq	%r10, 24(%rdi)
-	adcq	$0, %r8
-	movq	%r8, 32(%rdi)
+	adcq	%r14, %r13
+	movq	%r13, 24(%rdi)
+	adcq	%r11, %r15
+	movq	%r15, 32(%rdi)
+	adcq	%r8, %r10
+	movq	%r10, 40(%rdi)
+	adcq	$0, %r9
+	movq	%r9, 48(%rdi)
 	popq	%rbx
+	popq	%r12
+	popq	%r13
 	popq	%r14
+	popq	%r15
+	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_mulPre4L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mulPre6L             ## -- Begin function mcl_fpDbl_mulPre6L
 	.p2align	4, 0x90
-_mcl_fpDbl_mulPre4L:                    ## @mcl_fpDbl_mulPre4L
-## BB#0:
+_mcl_fpDbl_mulPre6L:                    ## @mcl_fpDbl_mulPre6L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
+	movq	%rdx, -64(%rsp)                 ## 8-byte Spill
+	movq	%rdi, -48(%rsp)                 ## 8-byte Spill
 	movq	(%rsi), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %r8
-	movq	%r8, -56(%rsp)          ## 8-byte Spill
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	8(%rsi), %r14
 	movq	(%rdx), %rbx
-	movq	%rdx, %rbp
-	mulq	%rbx
-	movq	%rdx, %r15
-	movq	16(%rsi), %rcx
-	movq	24(%rsi), %r11
-	movq	%rax, (%rdi)
-	movq	%r11, %rax
 	mulq	%rbx
 	movq	%rdx, %r12
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	movq	%rcx, -16(%rsp)         ## 8-byte Spill
-	mulq	%rbx
-	movq	%rdx, %r10
-	movq	%rax, %r9
+	movq	16(%rsi), %r13
+	movq	24(%rsi), %r8
+	movq	32(%rsi), %r10
+	movq	40(%rsi), %rdx
+	movq	%rdx, -72(%rsp)                 ## 8-byte Spill
+	movq	%rax, (%rdi)
+	movq	%rdx, %rax
+	mulq	%rbx
+	movq	%rdx, %rcx
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%r10, %rax
+	mulq	%rbx
+	movq	%rdx, %rbp
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
 	movq	%r8, %rax
 	mulq	%rbx
+	movq	%rdx, %r11
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
+	movq	%r13, %rax
+	movq	%r13, %r9
+	movq	%r13, -32(%rsp)                 ## 8-byte Spill
+	mulq	%rbx
 	movq	%rdx, %r13
-	movq	%rax, %r8
-	addq	%r15, %r8
-	adcq	%r9, %r13
-	adcq	%r14, %r10
-	adcq	$0, %r12
-	movq	%rbp, %r9
-	movq	%r9, -8(%rsp)           ## 8-byte Spill
-	movq	8(%r9), %rbp
-	movq	%r11, %rax
-	mulq	%rbp
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
 	movq	%rax, %r15
-	movq	%rcx, %rax
-	mulq	%rbp
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	movq	%rax, %rcx
-	movq	-56(%rsp), %r14         ## 8-byte Reload
-	movq	%r14, %rax
-	mulq	%rbp
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbx
-	movq	-64(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	addq	%r8, %rax
-	movq	%rax, 8(%rdi)
-	adcq	%r13, %rbx
-	adcq	%r10, %rcx
-	adcq	%r12, %r15
-	sbbq	%r13, %r13
-	movq	16(%r9), %rbp
 	movq	%r14, %rax
-	mulq	%rbp
+	movq	%r14, -40(%rsp)                 ## 8-byte Spill
+	mulq	%rbx
+	movq	%rdx, %rsi
+	movq	%rax, %rdi
+	addq	%r12, %rdi
+	adcq	%r15, %rsi
+	adcq	-88(%rsp), %r13                 ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r11                ## 8-byte Folded Reload
+	adcq	-104(%rsp), %rbp                ## 8-byte Folded Reload
+	movq	%rbp, -24(%rsp)                 ## 8-byte Spill
+	adcq	$0, %rcx
+	movq	%rcx, -80(%rsp)                 ## 8-byte Spill
+	movq	-64(%rsp), %rax                 ## 8-byte Reload
+	movq	8(%rax), %r15
+	movq	%r15, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%r15, %rax
+	mulq	%r10
+	movq	%r10, -16(%rsp)                 ## 8-byte Spill
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
 	movq	%rax, %r12
+	movq	%r15, %rax
+	mulq	%r8
+	movq	%r8, -8(%rsp)                   ## 8-byte Spill
+	movq	%rdx, -96(%rsp)                 ## 8-byte Spill
+	movq	%rax, %rbp
+	movq	%r15, %rax
+	mulq	%r9
+	movq	%rdx, %r9
+	movq	%rax, %rcx
+	movq	%r15, %rax
+	mulq	%r14
 	movq	%rdx, %r14
-	andl	$1, %r13d
-	addq	-48(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-40(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-24(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%r11, %rax
-	mulq	%rbp
+	movq	%rax, %rbx
+	movq	%r15, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	addq	%rdi, %rax
+	movq	-48(%rsp), %rdi                 ## 8-byte Reload
+	movq	%rax, 8(%rdi)
+	adcq	%rsi, %rbx
+	adcq	%r13, %rcx
+	adcq	%r11, %rbp
+	adcq	-24(%rsp), %r12                 ## 8-byte Folded Reload
+	movq	-112(%rsp), %rsi                ## 8-byte Reload
+	adcq	-80(%rsp), %rsi                 ## 8-byte Folded Reload
+	setb	%al
+	addq	%rdx, %rbx
+	adcq	%r14, %rcx
+	adcq	%r9, %rbp
+	adcq	-96(%rsp), %r12                 ## 8-byte Folded Reload
+	adcq	-88(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	%rsi, -112(%rsp)                ## 8-byte Spill
+	movzbl	%al, %r9d
+	adcq	-104(%rsp), %r9                 ## 8-byte Folded Reload
+	movq	-64(%rsp), %rax                 ## 8-byte Reload
+	movq	16(%rax), %rsi
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	%r10
+	movq	%rdx, %r10
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	%r8
 	movq	%rdx, %r8
-	movq	%rax, %r11
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, %r9
-	movq	%rax, %r10
-	movq	-64(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %r14
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rdi
+	addq	%r14, %rdi
+	adcq	%r13, %r15
+	adcq	-80(%rsp), %r11                 ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r8                  ## 8-byte Folded Reload
+	adcq	-88(%rsp), %r10                 ## 8-byte Folded Reload
+	movq	-104(%rsp), %rsi                ## 8-byte Reload
+	adcq	$0, %rsi
 	addq	%rbx, %rax
-	movq	%rax, 16(%rdi)
-	adcq	%r12, %rcx
-	adcq	%r15, %r10
-	adcq	%r13, %r11
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	addq	%rdx, %rcx
+	movq	-48(%rsp), %rdx                 ## 8-byte Reload
+	movq	%rax, 16(%rdx)
+	adcq	%rcx, %rdi
+	adcq	%rbp, %r15
+	adcq	%r12, %r11
+	adcq	-112(%rsp), %r8                 ## 8-byte Folded Reload
+	adcq	%r9, %r10
+	adcq	$0, %rsi
+	movq	%rsi, -104(%rsp)                ## 8-byte Spill
+	movq	-64(%rsp), %rax                 ## 8-byte Reload
+	movq	24(%rax), %rsi
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r9
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, %rbx
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	addq	%rbx, %r13
+	adcq	%rbp, %r12
+	adcq	-80(%rsp), %r14                 ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r9                  ## 8-byte Folded Reload
+	adcq	-112(%rsp), %rcx                ## 8-byte Folded Reload
+	movq	-88(%rsp), %rdx                 ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%rdi, %rax
+	movq	-48(%rsp), %rdi                 ## 8-byte Reload
+	movq	%rax, 24(%rdi)
+	adcq	%r15, %r13
+	adcq	%r11, %r12
+	adcq	%r8, %r14
+	adcq	%r10, %r9
+	adcq	-104(%rsp), %rcx                ## 8-byte Folded Reload
+	adcq	$0, %rdx
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	-64(%rsp), %rax                 ## 8-byte Reload
+	movq	32(%rax), %rsi
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                ## 8-byte Spill
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, -24(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r8
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	addq	%r8, %r11
+	adcq	%rbp, %r10
+	adcq	-24(%rsp), %r15                 ## 8-byte Folded Reload
+	adcq	-80(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	-112(%rsp), %rsi                ## 8-byte Reload
+	adcq	-96(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	-104(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r13, %rax
+	movq	%rax, 32(%rdi)
+	adcq	%r12, %r11
 	adcq	%r14, %r10
-	adcq	%r9, %r11
-	adcq	%r8, %r13
-	movq	-8(%rsp), %rax          ## 8-byte Reload
-	movq	24(%rax), %rbx
+	adcq	%r9, %r15
+	adcq	%rcx, %rbx
+	movq	%rbx, -96(%rsp)                 ## 8-byte Spill
+	adcq	-88(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	%rsi, -112(%rsp)                ## 8-byte Spill
+	adcq	$0, %rdx
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
+	movq	-64(%rsp), %rax                 ## 8-byte Reload
+	movq	40(%rax), %rbx
 	movq	%rbx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, %r8
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r9
+	movq	%rbx, %rax
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %rsi
 	movq	%rax, %r14
 	movq	%rbx, %rax
-	mulq	16(%rsi)
-	movq	%rdx, %r9
-	movq	%rax, %r12
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, %rdi
 	movq	%rbx, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %r15
-	movq	%rax, %rbp
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %r8
 	movq	%rbx, %rax
-	mulq	(%rsi)
-	addq	%rcx, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r10, %rbp
-	adcq	%r11, %r12
-	adcq	%r13, %r14
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rdx, %rbp
-	movq	%rbp, 32(%rdi)
-	adcq	%r15, %r12
-	movq	%r12, 40(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 48(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 56(%rdi)
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	addq	%r12, %r8
+	adcq	%r13, %rax
+	adcq	%r14, %rdx
+	adcq	%r9, %rsi
+	adcq	-72(%rsp), %rbp                 ## 8-byte Folded Reload
+	adcq	$0, %rcx
+	addq	%r11, %rdi
+	movq	-48(%rsp), %rbx                 ## 8-byte Reload
+	movq	%rdi, 40(%rbx)
+	adcq	%r10, %r8
+	movq	%r8, 48(%rbx)
+	adcq	%r15, %rax
+	movq	%rax, 56(%rbx)
+	adcq	-96(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	%rdx, 64(%rbx)
+	adcq	-112(%rsp), %rsi                ## 8-byte Folded Reload
+	movq	%rsi, 72(%rbx)
+	adcq	-104(%rsp), %rbp                ## 8-byte Folded Reload
+	movq	%rbp, 80(%rbx)
+	adcq	$0, %rcx
+	movq	%rcx, 88(%rbx)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -2081,115 +3141,246 @@ _mcl_fpDbl_mulPre4L:                    ## @mcl_fpDbl_mulPre4L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sqrPre4L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sqrPre6L             ## -- Begin function mcl_fpDbl_sqrPre6L
 	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre4L:                    ## @mcl_fpDbl_sqrPre4L
-## BB#0:
+_mcl_fpDbl_sqrPre6L:                    ## @mcl_fpDbl_sqrPre6L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rsi, %r10
-	movq	16(%r10), %r9
-	movq	24(%r10), %r11
-	movq	(%r10), %r15
-	movq	8(%r10), %r8
-	movq	%r15, %rax
-	mulq	%r15
-	movq	%rdx, %rbp
-	movq	%rax, (%rdi)
+	subq	$168, %rsp
+	movq	%rdi, -128(%rsp)                ## 8-byte Spill
+	movq	40(%rsi), %r9
+	movq	(%rsi), %r10
+	movq	8(%rsi), %rcx
+	movq	%r9, %rax
+	mulq	%r10
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	%rdx, 16(%rsp)                  ## 8-byte Spill
+	movq	32(%rsi), %r8
+	movq	%r8, %rax
+	mulq	%r10
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	%rdx, (%rsp)                    ## 8-byte Spill
+	movq	24(%rsi), %r11
 	movq	%r11, %rax
-	mulq	%r8
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
+	mulq	%r10
+	movq	%rax, -16(%rsp)                 ## 8-byte Spill
+	movq	%rdx, -80(%rsp)                 ## 8-byte Spill
+	movq	16(%rsi), %r14
+	movq	%r14, %rax
+	mulq	%r10
+	movq	%rdx, 144(%rsp)                 ## 8-byte Spill
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
 	movq	%r9, %rax
-	mulq	%r8
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
+	mulq	%rcx
+	movq	%rdx, -8(%rsp)                  ## 8-byte Spill
+	movq	%rax, -24(%rsp)                 ## 8-byte Spill
+	movq	%r8, %rax
+	mulq	%rcx
+	movq	%rdx, -32(%rsp)                 ## 8-byte Spill
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
 	movq	%r11, %rax
-	mulq	%r15
+	mulq	%rcx
+	movq	%rdx, -96(%rsp)                 ## 8-byte Spill
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%r14, %rax
+	mulq	%rcx
+	movq	%rdx, %rsi
+	movq	%rdx, 40(%rsp)                  ## 8-byte Spill
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	%rcx
+	movq	%rdx, 112(%rsp)                 ## 8-byte Spill
+	movq	%rax, %rbp
+	movq	%rcx, %rax
+	mulq	%r10
 	movq	%rdx, %rbx
-	movq	%rax, %rcx
+	movq	%rax, %r15
+	movq	%r10, %rax
+	mulq	%r10
+	movq	%rdx, %rcx
+	movq	%rax, (%rdi)
 	movq	%r9, %rax
-	mulq	%r15
-	movq	%rdx, %rsi
-	movq	%rsi, -16(%rsp)         ## 8-byte Spill
-	movq	%rax, %r12
-	movq	%r8, %rax
 	mulq	%r8
-	movq	%rdx, %r13
-	movq	%rax, %r14
-	movq	%r8, %rax
-	mulq	%r15
-	addq	%rax, %rbp
-	movq	%rdx, %r8
-	adcq	%r12, %r8
-	adcq	%rsi, %rcx
-	adcq	$0, %rbx
-	addq	%rax, %rbp
-	movq	%rbp, 8(%rdi)
-	adcq	%r14, %r8
-	movq	-40(%rsp), %rsi         ## 8-byte Reload
-	adcq	%rsi, %rcx
-	adcq	-32(%rsp), %rbx         ## 8-byte Folded Reload
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	addq	%rdx, %r8
-	adcq	%r13, %rcx
-	movq	-24(%rsp), %r15         ## 8-byte Reload
-	adcq	%r15, %rbx
-	adcq	-8(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%r11, %rax
-	mulq	%r9
-	movq	%rdx, %r14
-	movq	%rax, %r11
+	movq	%rdx, 136(%rsp)                 ## 8-byte Spill
+	movq	%rax, 128(%rsp)                 ## 8-byte Spill
 	movq	%r9, %rax
-	mulq	%r9
-	movq	%rax, %r9
-	addq	%r12, %r8
-	movq	%r8, 16(%rdi)
-	adcq	%rsi, %rcx
-	adcq	%rbx, %r9
-	adcq	%rbp, %r11
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-16(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	%r15, %r9
-	adcq	%rdx, %r11
-	adcq	%r14, %r12
-	movq	24(%r10), %rbp
-	movq	%rbp, %rax
-	mulq	16(%r10)
-	movq	%rdx, %r8
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	8(%r10)
-	movq	%rdx, %r13
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	(%r10)
-	movq	%rdx, %r15
-	movq	%rax, %rsi
-	movq	%rbp, %rax
-	mulq	%rbp
-	addq	%rcx, %rsi
-	movq	%rsi, 24(%rdi)
-	adcq	%r9, %rbx
-	adcq	%r11, %r14
-	adcq	%r12, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r15, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r13, %r14
-	movq	%r14, 40(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 48(%rdi)
+	mulq	%r11
+	movq	%rdx, -40(%rsp)                 ## 8-byte Spill
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	movq	%r9, %rax
+	mulq	%r14
+	movq	%rdx, -56(%rsp)                 ## 8-byte Spill
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	%r9, %rax
+	mulq	%r9
+	movq	%rdx, 160(%rsp)                 ## 8-byte Spill
+	movq	%rax, 152(%rsp)                 ## 8-byte Spill
+	movq	%r8, %rax
+	mulq	%r11
+	movq	%rdx, 96(%rsp)                  ## 8-byte Spill
+	movq	%rax, 88(%rsp)                  ## 8-byte Spill
+	movq	%r8, %rax
+	mulq	%r14
+	movq	%rdx, -112(%rsp)                ## 8-byte Spill
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
+	movq	%r8, %rax
+	mulq	%r8
+	movq	%rdx, 120(%rsp)                 ## 8-byte Spill
+	movq	%rax, 104(%rsp)                 ## 8-byte Spill
+	movq	%r11, %rax
+	mulq	%r14
+	movq	%rdx, 64(%rsp)                  ## 8-byte Spill
+	movq	%rax, 56(%rsp)                  ## 8-byte Spill
+	movq	%r11, %rax
+	mulq	%r11
+	movq	%rdx, 80(%rsp)                  ## 8-byte Spill
+	movq	%rax, 72(%rsp)                  ## 8-byte Spill
+	movq	%r14, %rax
+	mulq	%r14
+	movq	%rax, %r12
+	movq	%rdx, 48(%rsp)                  ## 8-byte Spill
+	addq	%rbx, %rbp
+	movq	%rbp, 32(%rsp)                  ## 8-byte Spill
+	movq	112(%rsp), %r11                 ## 8-byte Reload
+	adcq	%r13, %r11
+	movq	%rsi, %r10
+	adcq	-104(%rsp), %r10                ## 8-byte Folded Reload
+	movq	-96(%rsp), %r14                 ## 8-byte Reload
+	adcq	-88(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	-32(%rsp), %r9                  ## 8-byte Reload
+	adcq	-24(%rsp), %r9                  ## 8-byte Folded Reload
+	movq	-8(%rsp), %r8                   ## 8-byte Reload
+	adcq	$0, %r8
+	movq	%r15, %rdi
+	addq	%r15, %rcx
+	adcq	-72(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	144(%rsp), %r15                 ## 8-byte Reload
+	movq	%r15, %rbp
+	adcq	-16(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	-80(%rsp), %rax                 ## 8-byte Reload
+	adcq	8(%rsp), %rax                   ## 8-byte Folded Reload
+	movq	(%rsp), %rdx                    ## 8-byte Reload
+	adcq	24(%rsp), %rdx                  ## 8-byte Folded Reload
+	movq	16(%rsp), %rsi                  ## 8-byte Reload
+	adcq	$0, %rsi
+	addq	%rdi, %rcx
+	adcq	32(%rsp), %rbx                  ## 8-byte Folded Reload
+	movq	-128(%rsp), %rdi                ## 8-byte Reload
+	movq	%rcx, 8(%rdi)
+	adcq	%r11, %rbp
+	adcq	%r10, %rax
+	adcq	%r14, %rdx
+	adcq	%r9, %rsi
+	adcq	$0, %r8
+	movq	%r15, %r9
+	addq	%r13, %r9
+	adcq	40(%rsp), %r12                  ## 8-byte Folded Reload
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	movq	56(%rsp), %rdi                  ## 8-byte Reload
+	adcq	%rdi, %rcx
+	movq	64(%rsp), %r15                  ## 8-byte Reload
+	movq	%r15, %r10
+	adcq	-120(%rsp), %r10                ## 8-byte Folded Reload
+	movq	-112(%rsp), %r11                ## 8-byte Reload
+	adcq	-64(%rsp), %r11                 ## 8-byte Folded Reload
+	movq	-56(%rsp), %r13                 ## 8-byte Reload
+	adcq	$0, %r13
+	addq	-72(%rsp), %rbx                 ## 8-byte Folded Reload
+	adcq	%rbp, %r9
+	movq	-128(%rsp), %rbp                ## 8-byte Reload
+	movq	%rbx, 16(%rbp)
+	adcq	%rax, %r12
 	adcq	%rdx, %rcx
-	movq	%rcx, 56(%rdi)
+	movq	%rcx, %rbx
+	adcq	%rsi, %r10
+	adcq	%r8, %r11
+	adcq	$0, %r13
+	movq	-80(%rsp), %rsi                 ## 8-byte Reload
+	addq	-104(%rsp), %rsi                ## 8-byte Folded Reload
+	movq	-96(%rsp), %rax                 ## 8-byte Reload
+	adcq	%rdi, %rax
+	movq	72(%rsp), %rdi                  ## 8-byte Reload
+	adcq	%r15, %rdi
+	movq	80(%rsp), %rdx                  ## 8-byte Reload
+	movq	88(%rsp), %r15                  ## 8-byte Reload
+	adcq	%r15, %rdx
+	movq	96(%rsp), %r8                   ## 8-byte Reload
+	movq	%r8, %r14
+	adcq	-48(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	-40(%rsp), %rcx                 ## 8-byte Reload
+	adcq	$0, %rcx
+	addq	-16(%rsp), %r9                  ## 8-byte Folded Reload
+	adcq	%r12, %rsi
+	movq	%r9, 24(%rbp)
+	adcq	%rbx, %rax
+	adcq	%r10, %rdi
+	movq	%rdi, %r9
+	adcq	%r11, %rdx
+	movq	%rdx, %r12
+	adcq	%r13, %r14
+	adcq	$0, %rcx
+	movq	(%rsp), %rdi                    ## 8-byte Reload
+	addq	-88(%rsp), %rdi                 ## 8-byte Folded Reload
+	movq	-32(%rsp), %rdx                 ## 8-byte Reload
+	adcq	-120(%rsp), %rdx                ## 8-byte Folded Reload
+	movq	-112(%rsp), %rbx                ## 8-byte Reload
+	adcq	%r15, %rbx
+	movq	104(%rsp), %r13                 ## 8-byte Reload
+	adcq	%r8, %r13
+	movq	120(%rsp), %rbp                 ## 8-byte Reload
+	movq	128(%rsp), %r11                 ## 8-byte Reload
+	adcq	%r11, %rbp
+	movq	136(%rsp), %r15                 ## 8-byte Reload
+	movq	%r15, %r10
+	adcq	$0, %r10
+	addq	8(%rsp), %rsi                   ## 8-byte Folded Reload
+	adcq	%rax, %rdi
+	movq	-128(%rsp), %r8                 ## 8-byte Reload
+	movq	%rsi, 32(%r8)
+	adcq	%r9, %rdx
+	movq	%rdx, %r9
+	adcq	%r12, %rbx
+	movq	%rbx, %r12
+	adcq	%r14, %r13
+	adcq	%rcx, %rbp
+	movq	%rbp, %r14
+	adcq	$0, %r10
+	movq	16(%rsp), %rsi                  ## 8-byte Reload
+	addq	-24(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	-8(%rsp), %rdx                  ## 8-byte Reload
+	adcq	-64(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	-56(%rsp), %rbp                 ## 8-byte Reload
+	adcq	-48(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	-40(%rsp), %rbx                 ## 8-byte Reload
+	adcq	%r11, %rbx
+	movq	152(%rsp), %r11                 ## 8-byte Reload
+	adcq	%r15, %r11
+	movq	160(%rsp), %rax                 ## 8-byte Reload
+	adcq	$0, %rax
+	addq	24(%rsp), %rdi                  ## 8-byte Folded Reload
+	movq	%rdi, 40(%r8)
+	adcq	%r9, %rsi
+	movq	%rsi, 48(%r8)
+	adcq	%r12, %rdx
+	movq	%rdx, 56(%r8)
+	movq	%rbp, %rdx
+	adcq	%r13, %rdx
+	movq	%rdx, 64(%r8)
+	movq	%rbx, %rdx
+	adcq	%r14, %rdx
+	movq	%rdx, 72(%r8)
+	movq	%r11, %rdx
+	adcq	%r10, %rdx
+	movq	%rdx, 80(%r8)
+	adcq	$0, %rax
+	movq	%rax, 88(%r8)
+	addq	$168, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -2197,273 +3388,573 @@ _mcl_fpDbl_sqrPre4L:                    ## @mcl_fpDbl_sqrPre4L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_mont4L
+                                        ## -- End function
+	.globl	_mcl_fp_mont6L                  ## -- Begin function mcl_fp_mont6L
 	.p2align	4, 0x90
-_mcl_fp_mont4L:                         ## @mcl_fp_mont4L
-## BB#0:
+_mcl_fp_mont6L:                         ## @mcl_fp_mont6L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	24(%rsi), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
+	subq	$48, %rsp
+	movq	%rdx, -48(%rsp)                 ## 8-byte Spill
+	movq	%rdi, 40(%rsp)                  ## 8-byte Spill
+	movq	40(%rsi), %rax
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
 	movq	(%rdx), %rbp
 	mulq	%rbp
-	movq	%rax, %r9
-	movq	%rdx, %r8
+	movq	%rax, %r8
+	movq	%rdx, %r10
+	movq	32(%rsi), %rax
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	mulq	%rbp
+	movq	%rax, %r11
+	movq	%rdx, %r13
+	movq	24(%rsi), %rax
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	mulq	%rbp
+	movq	%rax, %r15
+	movq	%rdx, %rdi
 	movq	16(%rsi), %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
+	movq	%rax, -40(%rsp)                 ## 8-byte Spill
 	mulq	%rbp
-	movq	%rax, %rbx
-	movq	%rdx, %r11
-	movq	(%rsi), %rdi
-	movq	%rdi, -56(%rsp)         ## 8-byte Spill
+	movq	%rax, %r9
+	movq	%rdx, %r14
+	movq	(%rsi), %rbx
+	movq	%rbx, 24(%rsp)                  ## 8-byte Spill
 	movq	8(%rsi), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
 	mulq	%rbp
 	movq	%rdx, %r12
 	movq	%rax, %rsi
-	movq	%rdi, %rax
+	movq	%rbx, %rax
 	mulq	%rbp
-	movq	%rax, %r13
-	movq	%rdx, %r15
-	addq	%rsi, %r15
-	adcq	%rbx, %r12
-	adcq	%r9, %r11
-	adcq	$0, %r8
-	movq	-8(%rcx), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%r13, %rsi
-	imulq	%rax, %rsi
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rdx, %rbp
+	addq	%rsi, %rbp
+	adcq	%r9, %r12
+	adcq	%r15, %r14
+	adcq	%r11, %rdi
+	movq	%rdi, -88(%rsp)                 ## 8-byte Spill
+	adcq	%r8, %r13
+	movq	%r13, -128(%rsp)                ## 8-byte Spill
+	adcq	$0, %r10
+	movq	%r10, -112(%rsp)                ## 8-byte Spill
+	movq	-8(%rcx), %r8
+	movq	%r8, -32(%rsp)                  ## 8-byte Spill
+	imulq	%rax, %r8
+	movq	40(%rcx), %rdx
+	movq	%rdx, 8(%rsp)                   ## 8-byte Spill
+	movq	%r8, %rax
+	mulq	%rdx
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	32(%rcx), %rdx
+	movq	%rdx, (%rsp)                    ## 8-byte Spill
+	movq	%r8, %rax
+	mulq	%rdx
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	%rdx, %r11
 	movq	24(%rcx), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
+	movq	%rdx, -8(%rsp)                  ## 8-byte Spill
+	movq	%r8, %rax
 	mulq	%rdx
-	movq	%rax, %r10
-	movq	%rdx, %r9
+	movq	%rax, %r13
+	movq	%rdx, %r15
 	movq	16(%rcx), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
+	movq	%r8, %rax
 	mulq	%rdx
-	movq	%rax, %r14
-	movq	%rdx, %rbx
-	movq	(%rcx), %rbp
-	movq	%rbp, -24(%rsp)         ## 8-byte Spill
+	movq	%rax, %r9
+	movq	%rdx, %rsi
+	movq	(%rcx), %rbx
+	movq	%rbx, -24(%rsp)                 ## 8-byte Spill
 	movq	8(%rcx), %rcx
-	movq	%rcx, -32(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	movq	%r8, %rax
 	mulq	%rcx
 	movq	%rdx, %rdi
-	movq	%rax, %rcx
-	movq	%rsi, %rax
-	mulq	%rbp
-	movq	%rdx, %rsi
-	addq	%rcx, %rsi
-	adcq	%r14, %rdi
-	adcq	%r10, %rbx
-	adcq	$0, %r9
-	addq	%r13, %rax
-	adcq	%r15, %rsi
-	adcq	%r12, %rdi
-	adcq	%r11, %rbx
-	adcq	%r8, %r9
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	8(%rax), %rbp
-	movq	%rbp, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
 	movq	%rax, %r10
-	movq	%rbp, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r11
-	movq	%rbp, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
+	movq	%r8, %rax
+	mulq	%rbx
 	movq	%rdx, %rcx
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r8
+	addq	%r10, %rcx
+	adcq	%r9, %rdi
+	adcq	%r13, %rsi
+	adcq	-80(%rsp), %r15                 ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r11                ## 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	-96(%rsp), %rax                 ## 8-byte Folded Reload
+	adcq	%rbp, %rcx
+	adcq	%r12, %rdi
+	adcq	%r14, %rsi
+	adcq	-88(%rsp), %r15                 ## 8-byte Folded Reload
+	adcq	-128(%rsp), %r11                ## 8-byte Folded Reload
+	adcq	-112(%rsp), %rdx                ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	setb	-128(%rsp)                      ## 1-byte Folded Spill
+	movq	-48(%rsp), %rax                 ## 8-byte Reload
+	movq	8(%rax), %rbx
+	movq	%rbx, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                ## 8-byte Spill
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r13
+	movq	%rbx, %rax
+	mulq	16(%rsp)                        ## 8-byte Folded Reload
 	movq	%rdx, %rbp
-	addq	%r14, %rbp
-	adcq	%r11, %rcx
-	adcq	%r10, %r13
-	adcq	$0, %r12
-	addq	%rsi, %r8
-	adcq	%rdi, %rbp
-	adcq	%rbx, %rcx
-	adcq	%r9, %r13
-	adcq	%r15, %r12
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	movq	%r8, %rsi
-	imulq	-88(%rsp), %rsi         ## 8-byte Folded Reload
+	movq	%rax, %r12
+	movq	%rbx, %rax
+	mulq	24(%rsp)                        ## 8-byte Folded Reload
+	movq	%rax, %r9
+	movq	%rdx, %rbx
+	addq	%r12, %rbx
+	adcq	%r13, %rbp
+	adcq	-104(%rsp), %r8                 ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r14                 ## 8-byte Folded Reload
+	adcq	-88(%rsp), %r10                 ## 8-byte Folded Reload
+	movq	-112(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%rcx, %r9
+	adcq	%rdi, %rbx
+	adcq	%rsi, %rbp
+	adcq	%r15, %r8
+	adcq	%r11, %r14
+	adcq	-120(%rsp), %r10                ## 8-byte Folded Reload
+	movzbl	-128(%rsp), %eax                ## 1-byte Folded Reload
+	adcq	%rax, %rdx
+	movq	%rdx, -112(%rsp)                ## 8-byte Spill
+	setb	-120(%rsp)                      ## 1-byte Folded Spill
+	movq	-32(%rsp), %rsi                 ## 8-byte Reload
+	imulq	%r9, %rsi
 	movq	%rsi, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %r10
+	mulq	8(%rsp)                         ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
 	movq	%rsi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r11
+	mulq	(%rsp)                          ## 8-byte Folded Reload
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, %rdi
+	movq	%rsi, %rax
+	mulq	32(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, %r15
+	movq	%rsi, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	addq	%r15, %r11
+	adcq	%rdi, %r12
+	adcq	-80(%rsp), %rcx                 ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r13                ## 8-byte Folded Reload
+	movq	-88(%rsp), %rsi                 ## 8-byte Reload
+	adcq	-96(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r9, %rax
+	adcq	%rbx, %r11
+	adcq	%rbp, %r12
+	adcq	%r8, %rcx
+	adcq	%r14, %r13
+	adcq	%r10, %rsi
+	movq	%rsi, -88(%rsp)                 ## 8-byte Spill
+	adcq	-112(%rsp), %rdx                ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movzbl	-120(%rsp), %ebx                ## 1-byte Folded Reload
+	adcq	$0, %rbx
+	movq	-48(%rsp), %rax                 ## 8-byte Reload
+	movq	16(%rax), %rsi
+	movq	%rsi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
 	movq	%rsi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %rdi
+	movq	%rax, %r9
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
 	movq	%rax, %r14
 	movq	%rsi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	addq	%r14, %rsi
-	adcq	%r11, %rdi
-	adcq	%r10, %rbx
-	adcq	$0, %r9
-	addq	%r8, %rax
-	adcq	%rbp, %rsi
-	adcq	%rcx, %rdi
-	adcq	%r13, %rbx
-	adcq	%r12, %r9
-	adcq	$0, %r15
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rbp
-	movq	%rbp, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
+	mulq	16(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %rbp
+	movq	%rsi, %rax
+	mulq	24(%rsp)                        ## 8-byte Folded Reload
+	movq	%rax, %rsi
+	movq	%rdx, %r8
+	addq	%rbp, %r8
+	adcq	%r14, %r15
+	adcq	%r9, %r10
+	adcq	-104(%rsp), %rdi                ## 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                ## 8-byte Reload
+	adcq	-96(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	-112(%rsp), %rax                ## 8-byte Reload
+	adcq	$0, %rax
+	addq	%r11, %rsi
+	adcq	%r12, %r8
+	adcq	%rcx, %r15
+	adcq	%r13, %r10
+	adcq	-88(%rsp), %rdi                 ## 8-byte Folded Reload
+	adcq	-128(%rsp), %rdx                ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	adcq	%rbx, %rax
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-32(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%rsi, %rcx
+	movq	%rcx, %rax
+	mulq	8(%rsp)                         ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	(%rsp)                          ## 8-byte Folded Reload
 	movq	%rdx, %r12
-	movq	%rax, %r10
-	movq	%rbp, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	32(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	addq	%r9, %r11
+	adcq	%r13, %rbp
+	adcq	-80(%rsp), %r14                 ## 8-byte Folded Reload
+	adcq	-104(%rsp), %rbx                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r12                 ## 8-byte Folded Reload
+	movq	-128(%rsp), %rcx                ## 8-byte Reload
+	adcq	$0, %rcx
+	addq	%rsi, %rax
+	adcq	%r8, %r11
+	adcq	%r15, %rbp
+	adcq	%r10, %r14
+	adcq	%rdi, %rbx
+	adcq	-120(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-112(%rsp), %rcx                ## 8-byte Folded Reload
+	movq	%rcx, -128(%rsp)                ## 8-byte Spill
+	movzbl	-88(%rsp), %esi                 ## 1-byte Folded Reload
+	adcq	$0, %rsi
+	movq	-48(%rsp), %rax                 ## 8-byte Reload
+	movq	24(%rax), %rdi
+	movq	%rdi, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                ## 8-byte Spill
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %r13
-	movq	%rax, %r11
-	movq	%rbp, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, %r10
+	movq	%rdi, %rax
+	mulq	16(%rsp)                        ## 8-byte Folded Reload
 	movq	%rdx, %rcx
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %rbp
+	movq	%rax, %r8
+	movq	%rdi, %rax
+	mulq	24(%rsp)                        ## 8-byte Folded Reload
+	movq	%rax, %r9
+	movq	%rdx, %rdi
+	addq	%r8, %rdi
+	adcq	%r10, %rcx
+	adcq	-104(%rsp), %r15                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r13                 ## 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                ## 8-byte Reload
+	adcq	-88(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	-112(%rsp), %rax                ## 8-byte Reload
+	adcq	$0, %rax
+	addq	%r11, %r9
+	adcq	%rbp, %rdi
+	adcq	%r14, %rcx
+	adcq	%rbx, %r15
+	adcq	%r12, %r13
+	adcq	-128(%rsp), %rdx                ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	adcq	%rsi, %rax
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-32(%rsp), %rsi                 ## 8-byte Reload
+	imulq	%r9, %rsi
+	movq	%rsi, %rax
+	mulq	8(%rsp)                         ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	(%rsp)                          ## 8-byte Folded Reload
 	movq	%rdx, %r8
-	addq	%r14, %r8
-	adcq	%r11, %rcx
-	adcq	%r10, %r13
-	adcq	$0, %r12
-	addq	%rsi, %rbp
-	adcq	%rdi, %r8
-	adcq	%rbx, %rcx
-	adcq	%r9, %r13
-	adcq	%r15, %r12
-	sbbq	%r14, %r14
-	movq	%rbp, %rsi
-	imulq	-88(%rsp), %rsi         ## 8-byte Folded Reload
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
 	movq	%rsi, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
 	movq	%rsi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r10
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r14
 	movq	%rsi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %r15
+	mulq	32(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r11
 	movq	%rsi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	andl	$1, %r14d
-	addq	%r15, %r11
-	adcq	%r10, %r9
-	adcq	-16(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	$0, %rdi
-	addq	%rbp, %rax
-	adcq	%r8, %r11
-	adcq	%rcx, %r9
-	adcq	%r13, %rbx
-	adcq	%r12, %rdi
-	adcq	$0, %r14
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rcx
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	addq	%r11, %r10
+	adcq	%r14, %rbx
+	adcq	-80(%rsp), %rbp                 ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r8                  ## 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r9, %rax
+	adcq	%rdi, %r10
+	adcq	%rcx, %rbx
+	adcq	%r15, %rbp
+	adcq	%r13, %r12
+	adcq	-120(%rsp), %r8                 ## 8-byte Folded Reload
+	adcq	-112(%rsp), %rdx                ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movzbl	-88(%rsp), %r11d                ## 1-byte Folded Reload
+	adcq	$0, %r11
+	movq	-48(%rsp), %rax                 ## 8-byte Reload
+	movq	32(%rax), %rcx
 	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                ## 8-byte Spill
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
 	movq	%rcx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rsi
+	movq	%rcx, %rax
+	mulq	16(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %rdi
 	movq	%rax, %r15
 	movq	%rcx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
+	mulq	24(%rsp)                        ## 8-byte Folded Reload
+	movq	%rax, %r9
+	movq	%rdx, %rcx
+	addq	%r15, %rcx
+	adcq	%rsi, %rdi
+	adcq	-104(%rsp), %r13                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	-120(%rsp), %rax                ## 8-byte Reload
+	adcq	-88(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	-112(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r10, %r9
+	adcq	%rbx, %rcx
+	adcq	%rbp, %rdi
+	adcq	%r12, %r13
+	adcq	%r8, %r14
+	adcq	-128(%rsp), %rax                ## 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
+	adcq	%r11, %rdx
+	movq	%rdx, -112(%rsp)                ## 8-byte Spill
+	setb	-88(%rsp)                       ## 1-byte Folded Spill
+	movq	-32(%rsp), %rbx                 ## 8-byte Reload
+	imulq	%r9, %rbx
+	movq	%rbx, %rax
+	mulq	8(%rsp)                         ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	(%rsp)                          ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, %r10
+	movq	%rbx, %rax
+	mulq	32(%rsp)                        ## 8-byte Folded Reload
 	movq	%rdx, %rsi
-	movq	%rax, %r13
+	movq	%rax, %r11
+	movq	%rbx, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	addq	%r11, %r8
+	adcq	%r10, %rsi
+	adcq	-80(%rsp), %rbp                 ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r15                 ## 8-byte Folded Reload
+	movq	-128(%rsp), %rbx                ## 8-byte Reload
+	adcq	$0, %rbx
+	addq	%r9, %rax
+	adcq	%rcx, %r8
+	adcq	%rdi, %rsi
+	adcq	%r13, %rbp
+	adcq	%r14, %r12
+	adcq	-120(%rsp), %r15                ## 8-byte Folded Reload
+	movq	%r15, -120(%rsp)                ## 8-byte Spill
+	adcq	-112(%rsp), %rbx                ## 8-byte Folded Reload
+	movq	%rbx, -128(%rsp)                ## 8-byte Spill
+	movzbl	-88(%rsp), %edi                 ## 1-byte Folded Reload
+	adcq	$0, %rdi
+	movq	-48(%rsp), %rax                 ## 8-byte Reload
+	movq	40(%rax), %rcx
 	movq	%rcx, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %rbp
-	addq	%r13, %rbp
-	adcq	%r15, %rsi
-	adcq	-96(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%r11, %r10
-	adcq	%r9, %rbp
-	adcq	%rbx, %rsi
-	adcq	%rdi, %r12
-	adcq	%r14, %r8
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	movq	-88(%rsp), %rcx         ## 8-byte Reload
-	imulq	%r10, %rcx
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -48(%rsp)                 ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
 	movq	%rcx, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -56(%rsp)                 ## 8-byte Spill
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
 	movq	%rcx, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
 	movq	%rax, %rbx
 	movq	%rcx, %rax
-	movq	%rcx, %r9
-	movq	-32(%rsp), %r11         ## 8-byte Reload
-	mulq	%r11
-	movq	%rdx, %rcx
+	mulq	16(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	24(%rsp)                        ## 8-byte Folded Reload
 	movq	%rax, %r14
-	movq	%r9, %rax
-	movq	-24(%rsp), %r9          ## 8-byte Reload
-	mulq	%r9
-	addq	%r14, %rdx
-	adcq	%rbx, %rcx
-	adcq	-88(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%r10, %rax
-	adcq	%rbp, %rdx
-	adcq	%rsi, %rcx
-	adcq	%r12, %r15
-	adcq	%r8, %r13
-	adcq	$0, %rdi
-	movq	%rdx, %rax
-	subq	%r9, %rax
-	movq	%rcx, %rsi
-	sbbq	%r11, %rsi
-	movq	%r15, %rbp
-	sbbq	-80(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%r13, %rbx
-	sbbq	-72(%rsp), %rbx         ## 8-byte Folded Reload
-	sbbq	$0, %rdi
-	andl	$1, %edi
-	cmovneq	%r13, %rbx
-	testb	%dil, %dil
-	cmovneq	%rdx, %rax
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovneq	%rcx, %rsi
-	movq	%rsi, 8(%rdx)
-	cmovneq	%r15, %rbp
-	movq	%rbp, 16(%rdx)
-	movq	%rbx, 24(%rdx)
+	movq	%rdx, %r9
+	addq	%r15, %r9
+	adcq	%rbx, %r10
+	adcq	-72(%rsp), %r13                 ## 8-byte Folded Reload
+	adcq	-64(%rsp), %r11                 ## 8-byte Folded Reload
+	movq	-56(%rsp), %rcx                 ## 8-byte Reload
+	adcq	-112(%rsp), %rcx                ## 8-byte Folded Reload
+	movq	-48(%rsp), %rax                 ## 8-byte Reload
+	adcq	$0, %rax
+	addq	%r8, %r14
+	adcq	%rsi, %r9
+	adcq	%rbp, %r10
+	adcq	%r12, %r13
+	adcq	-120(%rsp), %r11                ## 8-byte Folded Reload
+	adcq	-128(%rsp), %rcx                ## 8-byte Folded Reload
+	movq	%rcx, -56(%rsp)                 ## 8-byte Spill
+	adcq	%rdi, %rax
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	setb	-64(%rsp)                       ## 1-byte Folded Spill
+	movq	-32(%rsp), %r12                 ## 8-byte Reload
+	imulq	%r14, %r12
+	movq	%r12, %rax
+	mulq	8(%rsp)                         ## 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, -32(%rsp)                 ## 8-byte Spill
+	movq	%r12, %rax
+	mulq	(%rsp)                          ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	movq	%r12, %rax
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %rcx
+	movq	%rax, -40(%rsp)                 ## 8-byte Spill
+	movq	%r12, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r15
+	movq	%r12, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %rdi
+	movq	%r12, %rax
+	movq	32(%rsp), %r12                  ## 8-byte Reload
+	mulq	%r12
+	addq	%r8, %rax
+	adcq	%r15, %rdx
+	adcq	-40(%rsp), %rbx                 ## 8-byte Folded Reload
+	adcq	-72(%rsp), %rcx                 ## 8-byte Folded Reload
+	adcq	-32(%rsp), %rbp                 ## 8-byte Folded Reload
+	adcq	$0, %rsi
+	addq	%r14, %rdi
+	adcq	%r9, %rax
+	adcq	%r10, %rdx
+	adcq	%r13, %rbx
+	adcq	%r11, %rcx
+	adcq	-56(%rsp), %rbp                 ## 8-byte Folded Reload
+	adcq	-48(%rsp), %rsi                 ## 8-byte Folded Reload
+	movzbl	-64(%rsp), %r11d                ## 1-byte Folded Reload
+	adcq	$0, %r11
+	movq	%rax, %r8
+	subq	-24(%rsp), %r8                  ## 8-byte Folded Reload
+	movq	%rdx, %r9
+	sbbq	%r12, %r9
+	movq	%rbx, %r10
+	sbbq	-16(%rsp), %r10                 ## 8-byte Folded Reload
+	movq	%rcx, %r14
+	sbbq	-8(%rsp), %r14                  ## 8-byte Folded Reload
+	movq	%rbp, %r15
+	sbbq	(%rsp), %r15                    ## 8-byte Folded Reload
+	movq	%rsi, %rdi
+	sbbq	8(%rsp), %rdi                   ## 8-byte Folded Reload
+	sbbq	$0, %r11
+	testb	$1, %r11b
+	cmovneq	%rsi, %rdi
+	movq	40(%rsp), %rsi                  ## 8-byte Reload
+	movq	%rdi, 40(%rsi)
+	cmovneq	%rbp, %r15
+	movq	%r15, 32(%rsi)
+	cmovneq	%rcx, %r14
+	movq	%r14, 24(%rsi)
+	cmovneq	%rbx, %r10
+	movq	%r10, 16(%rsi)
+	cmovneq	%rdx, %r9
+	movq	%r9, 8(%rsi)
+	cmovneq	%rax, %r8
+	movq	%r8, (%rsi)
+	addq	$48, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -2471,442 +3962,535 @@ _mcl_fp_mont4L:                         ## @mcl_fp_mont4L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montNF4L
+                                        ## -- End function
+	.globl	_mcl_fp_montNF6L                ## -- Begin function mcl_fp_montNF6L
 	.p2align	4, 0x90
-_mcl_fp_montNF4L:                       ## @mcl_fp_montNF4L
-## BB#0:
+_mcl_fp_montNF6L:                       ## @mcl_fp_montNF6L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, %r15
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	24(%rsi), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	(%r15), %rdi
-	movq	%r15, -24(%rsp)         ## 8-byte Spill
+	subq	$40, %rsp
+	movq	%rdx, -72(%rsp)                 ## 8-byte Spill
+	movq	%rdi, 32(%rsp)                  ## 8-byte Spill
+	movq	40(%rsi), %rax
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	(%rdx), %rdi
 	mulq	%rdi
-	movq	%rax, %r8
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
 	movq	%rdx, %r12
-	movq	16(%rsi), %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
+	movq	32(%rsi), %rax
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
 	mulq	%rdi
 	movq	%rax, %r14
 	movq	%rdx, %r10
-	movq	(%rsi), %rbp
-	movq	%rbp, -56(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
+	movq	24(%rsi), %rax
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
 	mulq	%rdi
-	movq	%rdx, %rbx
-	movq	%rax, %rsi
-	movq	%rbp, %rax
+	movq	%rax, %r15
+	movq	%rdx, %r9
+	movq	16(%rsi), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
 	mulq	%rdi
 	movq	%rax, %r11
-	movq	%rdx, %r9
-	addq	%rsi, %r9
-	adcq	%r14, %rbx
-	adcq	%r8, %r10
+	movq	%rdx, %r8
+	movq	(%rsi), %rbx
+	movq	%rbx, (%rsp)                    ## 8-byte Spill
+	movq	8(%rsi), %rax
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	mulq	%rdi
+	movq	%rdx, %rbp
+	movq	%rax, %rsi
+	movq	%rbx, %rax
+	mulq	%rdi
+	movq	%rax, %r13
+	movq	%rdx, %rdi
+	addq	%rsi, %rdi
+	adcq	%r11, %rbp
+	adcq	%r15, %r8
+	adcq	%r14, %r9
+	adcq	-64(%rsp), %r10                 ## 8-byte Folded Reload
+	movq	%r10, -128(%rsp)                ## 8-byte Spill
 	adcq	$0, %r12
-	movq	-8(%rcx), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%r11, %rsi
-	imulq	%rax, %rsi
+	movq	%r12, -112(%rsp)                ## 8-byte Spill
+	movq	-8(%rcx), %rbx
+	movq	%rbx, -48(%rsp)                 ## 8-byte Spill
+	imulq	%rax, %rbx
+	movq	40(%rcx), %rdx
+	movq	%rdx, -64(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%rdx
+	movq	%rax, %r14
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	32(%rcx), %rdx
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
+	mulq	%rdx
+	movq	%rax, %r15
+	movq	%rdx, -96(%rsp)                 ## 8-byte Spill
 	movq	24(%rcx), %rdx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
+	movq	%rdx, -24(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
 	mulq	%rdx
-	movq	%rax, %r13
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
+	movq	%rax, %r12
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
 	movq	16(%rcx), %rdx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
+	movq	%rdx, -40(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rax
 	mulq	%rdx
-	movq	%rax, %r8
-	movq	%rdx, %r14
-	movq	(%rcx), %rdi
-	movq	%rdi, -72(%rsp)         ## 8-byte Spill
+	movq	%rax, %r10
+	movq	%rdx, 24(%rsp)                  ## 8-byte Spill
+	movq	(%rcx), %rsi
+	movq	%rsi, -32(%rsp)                 ## 8-byte Spill
 	movq	8(%rcx), %rcx
-	movq	%rcx, -32(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	movq	%rbx, %rax
 	mulq	%rcx
-	movq	%rdx, %rcx
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	%rdi
-	addq	%r11, %rax
-	adcq	%r9, %rbp
-	adcq	%rbx, %r8
-	adcq	%r10, %r13
-	adcq	$0, %r12
-	addq	%rdx, %rbp
-	adcq	%rcx, %r8
-	adcq	%r14, %r13
-	adcq	-16(%rsp), %r12         ## 8-byte Folded Reload
-	movq	8(%r15), %rdi
+	movq	%rdx, %r11
+	movq	%rax, %rcx
+	movq	%rbx, %rax
+	mulq	%rsi
+	addq	%r13, %rax
+	adcq	%rdi, %rcx
+	adcq	%rbp, %r10
+	adcq	%r8, %r12
+	adcq	%r9, %r15
+	adcq	-128(%rsp), %r14                ## 8-byte Folded Reload
+	movq	-112(%rsp), %rax                ## 8-byte Reload
+	adcq	$0, %rax
+	addq	%rdx, %rcx
+	adcq	%r11, %r10
+	adcq	24(%rsp), %r12                  ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r15                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	%r14, -128(%rsp)                ## 8-byte Spill
+	adcq	-120(%rsp), %rax                ## 8-byte Folded Reload
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	-72(%rsp), %rax                 ## 8-byte Reload
+	movq	8(%rax), %rdi
 	movq	%rdi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %rsi
+	mulq	-80(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
 	movq	%rdi, %rax
-	mulq	-96(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %r11
+	mulq	-88(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r9
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
 	movq	%rdi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	8(%rsp)                         ## 8-byte Folded Reload
+	movq	%rdx, %rsi
 	movq	%rax, %r14
 	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r11
+	movq	%rdi, %rax
+	mulq	(%rsp)                          ## 8-byte Folded Reload
 	movq	%rax, %rdi
-	movq	%rdx, %r9
-	addq	%r14, %r9
-	adcq	%r11, %rcx
-	adcq	%rsi, %r10
-	adcq	$0, %rbx
-	addq	%rbp, %rdi
-	adcq	%r8, %r9
-	adcq	%r13, %rcx
-	adcq	%r12, %r10
-	adcq	$0, %rbx
-	movq	%rdi, %rsi
-	imulq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %r13
-	movq	%rsi, %rax
-	movq	-32(%rsp), %r15         ## 8-byte Reload
-	mulq	%r15
-	movq	%rdx, %r14
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	addq	%r11, %rbp
+	adcq	%r14, %rbx
+	adcq	-104(%rsp), %rsi                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r13                 ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r9                 ## 8-byte Folded Reload
+	adcq	$0, %r8
+	addq	%rcx, %rdi
+	adcq	%r10, %rbp
+	adcq	%r12, %rbx
+	adcq	%r15, %rsi
+	adcq	-128(%rsp), %r13                ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r9                 ## 8-byte Folded Reload
+	adcq	$0, %r8
+	movq	-48(%rsp), %r11                 ## 8-byte Reload
+	imulq	%rdi, %r11
+	movq	%r11, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%r11, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -96(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r15
+	movq	%r11, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rax, %rcx
+	movq	%r11, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
+	movq	%rax, %r10
+	movq	%r11, %rax
+	mulq	16(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, %r14
+	movq	%r11, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
 	addq	%rdi, %rax
-	adcq	%r9, %rbp
-	adcq	%rcx, %r13
-	adcq	%r10, %r12
-	adcq	$0, %rbx
-	addq	%rdx, %rbp
-	adcq	%r14, %r13
-	adcq	%r11, %r12
-	adcq	%r8, %rbx
-	movq	-24(%rsp), %rax         ## 8-byte Reload
+	adcq	%rbp, %r14
+	adcq	%rbx, %r10
+	adcq	%rsi, %rcx
+	adcq	%r13, %r15
+	movq	-112(%rsp), %rax                ## 8-byte Reload
+	adcq	%r9, %rax
+	adcq	$0, %r8
+	addq	%rdx, %r14
+	adcq	%r12, %r10
+	adcq	-104(%rsp), %rcx                ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r15                ## 8-byte Folded Reload
+	movq	%r15, -120(%rsp)                ## 8-byte Spill
+	adcq	-96(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	adcq	-128(%rsp), %r8                 ## 8-byte Folded Reload
+	movq	-72(%rsp), %rax                 ## 8-byte Reload
 	movq	16(%rax), %rdi
 	movq	%rdi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	-96(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r11
+	mulq	-80(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, -128(%rsp)                ## 8-byte Spill
 	movq	%rdi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r14
+	mulq	-88(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
 	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r9
-	movq	%rdx, %rdi
-	addq	%r14, %rdi
-	adcq	%r11, %rcx
-	adcq	%r10, %r8
-	adcq	$0, %rsi
-	addq	%rbp, %r9
-	adcq	%r13, %rdi
-	adcq	%r12, %rcx
-	adcq	%rbx, %r8
-	adcq	$0, %rsi
-	movq	%r9, %rbx
-	imulq	-80(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %r12
-	movq	%rbx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %r11
-	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	%r15
-	movq	%rdx, %r14
-	movq	%rax, %rbp
-	movq	%rbx, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	addq	%r9, %rax
-	adcq	%rdi, %rbp
-	adcq	%rcx, %r13
-	adcq	%r8, %r12
-	adcq	$0, %rsi
-	addq	%rdx, %rbp
-	adcq	%r14, %r13
-	adcq	%r11, %r12
-	adcq	%r10, %rsi
-	movq	-24(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %rcx
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
 	movq	%rdi, %rax
-	mulq	-96(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r11
+	mulq	8(%rsp)                         ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
 	movq	%rdi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %r14
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, %r9
 	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
+	mulq	(%rsp)                          ## 8-byte Folded Reload
+	movq	%rax, %rbp
+	movq	%rdx, %rbx
+	addq	%r9, %rbx
+	adcq	24(%rsp), %rsi                  ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r11                 ## 8-byte Folded Reload
+	adcq	-128(%rsp), %r15                ## 8-byte Folded Reload
+	adcq	$0, %r13
+	addq	%r14, %rbp
+	adcq	%r10, %rbx
+	adcq	%rcx, %rsi
+	adcq	-120(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r11                ## 8-byte Folded Reload
+	adcq	%r8, %r15
+	adcq	$0, %r13
+	movq	-48(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%rbp, %rcx
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
 	movq	%rax, %r9
-	movq	%rdx, %rdi
-	addq	%r14, %rdi
-	adcq	%r11, %r10
-	adcq	%rcx, %r8
-	adcq	$0, %rbx
-	addq	%rbp, %r9
-	adcq	%r13, %rdi
-	adcq	%r12, %r10
-	adcq	%rsi, %r8
-	adcq	$0, %rbx
-	movq	-80(%rsp), %rcx         ## 8-byte Reload
-	imulq	%r9, %rcx
 	movq	%rcx, %rax
-	movq	-40(%rsp), %r12         ## 8-byte Reload
-	mulq	%r12
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r13
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -96(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r10
 	movq	%rcx, %rax
-	movq	-48(%rsp), %r11         ## 8-byte Reload
-	mulq	%r11
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbp
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
+	movq	%rax, %r14
 	movq	%rcx, %rax
-	movq	%rcx, %r15
-	movq	-72(%rsp), %rsi         ## 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, %r14
-	movq	%rax, %rcx
-	movq	%r15, %rax
-	movq	-32(%rsp), %r15         ## 8-byte Reload
-	mulq	%r15
-	addq	%r9, %rcx
-	adcq	%rdi, %rax
-	adcq	%r10, %rbp
-	adcq	%r8, %r13
-	adcq	$0, %rbx
-	addq	%r14, %rax
-	adcq	%rdx, %rbp
-	adcq	-96(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rax, %rcx
-	subq	%rsi, %rcx
-	movq	%rbp, %rdx
-	sbbq	%r15, %rdx
-	movq	%r13, %rdi
-	sbbq	%r11, %rdi
-	movq	%rbx, %rsi
-	sbbq	%r12, %rsi
-	cmovsq	%rax, %rcx
-	movq	-8(%rsp), %rax          ## 8-byte Reload
-	movq	%rcx, (%rax)
-	cmovsq	%rbp, %rdx
-	movq	%rdx, 8(%rax)
-	cmovsq	%r13, %rdi
-	movq	%rdi, 16(%rax)
-	cmovsq	%rbx, %rsi
-	movq	%rsi, 24(%rax)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montRed4L
-	.p2align	4, 0x90
-_mcl_fp_montRed4L:                      ## @mcl_fp_montRed4L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	(%rcx), %rdi
-	movq	%rdi, -32(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %r12
-	movq	%r12, %rbx
-	imulq	%rax, %rbx
-	movq	%rax, %r9
-	movq	%r9, -64(%rsp)          ## 8-byte Spill
-	movq	24(%rcx), %rdx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rdx
-	movq	%rax, %r11
+	mulq	16(%rsp)                        ## 8-byte Folded Reload
 	movq	%rdx, %r8
-	movq	16(%rcx), %rbp
-	movq	%rbx, %rax
-	mulq	%rbp
-	movq	%rbp, %r13
-	movq	%r13, -24(%rsp)         ## 8-byte Spill
-	movq	%rax, %r14
-	movq	%rdx, %r10
-	movq	8(%rcx), %rcx
-	movq	%rbx, %rax
-	mulq	%rcx
-	movq	%rcx, %rbp
-	movq	%rbp, -16(%rsp)         ## 8-byte Spill
+	movq	%rax, %rdi
+	movq	%rcx, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	addq	%rbp, %rax
+	adcq	%rbx, %rdi
+	adcq	%rsi, %r14
+	adcq	%r12, %r10
+	adcq	%r11, %r9
+	movq	-112(%rsp), %rax                ## 8-byte Reload
+	adcq	%r15, %rax
+	adcq	$0, %r13
+	addq	%rdx, %rdi
+	adcq	%r8, %r14
+	adcq	-104(%rsp), %r10                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r9                  ## 8-byte Folded Reload
+	adcq	-128(%rsp), %rax                ## 8-byte Folded Reload
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	adcq	-120(%rsp), %r13                ## 8-byte Folded Reload
+	movq	-72(%rsp), %rax                 ## 8-byte Reload
+	movq	24(%rax), %rbp
+	movq	%rbp, %rax
+	mulq	-80(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %r15
-	movq	%rax, %rcx
-	movq	%rbx, %rax
-	mulq	%rdi
-	movq	%rdx, %rbx
-	addq	%rcx, %rbx
-	adcq	%r14, %r15
-	adcq	%r11, %r10
-	adcq	$0, %r8
-	movq	56(%rsi), %rcx
-	movq	48(%rsi), %rdx
-	addq	%r12, %rax
-	movq	40(%rsi), %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %r10
-	adcq	32(%rsi), %r8
-	adcq	$0, %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, %r12
-	adcq	$0, %rcx
-	movq	%rcx, -72(%rsp)         ## 8-byte Spill
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	movq	%rbx, %rsi
-	imulq	%r9, %rsi
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-88(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %r11
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	%r13
-	movq	%rdx, %r14
-	movq	%rax, %r9
-	movq	%rsi, %rax
-	mulq	%rbp
+	movq	%rax, -128(%rsp)                ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %rcx
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	movq	-32(%rsp), %r13         ## 8-byte Reload
-	mulq	%r13
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	8(%rsp)                         ## 8-byte Folded Reload
 	movq	%rdx, %rsi
-	addq	%rbp, %rsi
-	adcq	%r9, %rcx
-	adcq	-56(%rsp), %r14         ## 8-byte Folded Reload
-	adcq	$0, %r11
-	addq	%rbx, %rax
-	adcq	%r15, %rsi
-	adcq	%r10, %rcx
-	adcq	%r8, %r14
-	adcq	-48(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	$0, %r12
-	movq	%r12, -48(%rsp)         ## 8-byte Spill
-	movq	-72(%rsp), %rbp         ## 8-byte Reload
-	adcq	$0, %rbp
-	adcq	$0, %rdi
-	movq	%rsi, %rbx
-	imulq	-64(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	movq	-40(%rsp), %r12         ## 8-byte Reload
-	mulq	%r12
-	movq	%rdx, %r8
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r12
+	movq	%rbp, %rax
+	mulq	(%rsp)                          ## 8-byte Folded Reload
+	movq	%rax, %r8
+	movq	%rdx, %rbp
+	addq	%r12, %rbp
+	adcq	-104(%rsp), %rbx                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %rsi                 ## 8-byte Folded Reload
+	adcq	-128(%rsp), %rcx                ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r11                ## 8-byte Folded Reload
+	adcq	$0, %r15
+	addq	%rdi, %r8
+	adcq	%r14, %rbp
+	adcq	%r10, %rbx
+	adcq	%r9, %rsi
+	adcq	-112(%rsp), %rcx                ## 8-byte Folded Reload
+	adcq	%r13, %r11
+	adcq	$0, %r15
+	movq	-48(%rsp), %r13                 ## 8-byte Reload
+	imulq	%r8, %r13
+	movq	%r13, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%r13, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
 	movq	%rax, %r9
-	movq	%rbx, %rax
-	mulq	%r13
+	movq	%r13, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -112(%rsp)                ## 8-byte Spill
+	movq	%rax, %r10
+	movq	%r13, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
+	movq	%rax, %r12
+	movq	%r13, %rax
+	mulq	16(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, %rdi
+	movq	%r13, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	addq	%r8, %rax
+	adcq	%rbp, %rdi
+	adcq	%rbx, %r12
+	adcq	%rsi, %r10
+	movq	%r9, %rax
+	adcq	%rcx, %rax
+	movq	-96(%rsp), %r9                  ## 8-byte Reload
+	adcq	%r11, %r9
+	adcq	$0, %r15
+	addq	%rdx, %rdi
+	adcq	%r14, %r12
+	adcq	-104(%rsp), %r10                ## 8-byte Folded Reload
+	adcq	-112(%rsp), %rax                ## 8-byte Folded Reload
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	adcq	-128(%rsp), %r9                 ## 8-byte Folded Reload
+	movq	%r9, %rcx
+	adcq	-120(%rsp), %r15                ## 8-byte Folded Reload
+	movq	-72(%rsp), %rax                 ## 8-byte Reload
+	movq	32(%rax), %rbp
+	movq	%rbp, %rax
+	mulq	-80(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -120(%rsp)                ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-88(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -128(%rsp)                ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r9
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	8(%rsp)                         ## 8-byte Folded Reload
+	movq	%rdx, %rsi
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rbp, %rax
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
 	movq	%rdx, %rbx
-	addq	%r9, %rbx
-	adcq	-56(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%rsi, %rax
-	adcq	%rcx, %rbx
-	adcq	%r14, %r15
-	adcq	%r11, %r10
-	adcq	-48(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	$0, %rbp
-	movq	%rbp, -72(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdi
-	movq	-64(%rsp), %rcx         ## 8-byte Reload
-	imulq	%rbx, %rcx
+	movq	%rax, %r8
+	movq	%rbp, %rax
+	mulq	(%rsp)                          ## 8-byte Folded Reload
+	movq	%rax, %r13
+	movq	%rdx, %rbp
+	addq	%r8, %rbp
+	adcq	-104(%rsp), %rbx                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %rsi                 ## 8-byte Folded Reload
+	adcq	-128(%rsp), %r9                 ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r11                ## 8-byte Folded Reload
+	adcq	$0, %r14
+	addq	%rdi, %r13
+	adcq	%r12, %rbp
+	adcq	%r10, %rbx
+	adcq	-112(%rsp), %rsi                ## 8-byte Folded Reload
+	adcq	%rcx, %r9
+	adcq	%r15, %r11
+	adcq	$0, %r14
+	movq	-48(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%r13, %rcx
 	movq	%rcx, %rax
-	mulq	%r12
-	movq	%rdx, %r13
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -96(%rsp)                 ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
 	movq	%rcx, %rax
-	movq	-24(%rsp), %r14         ## 8-byte Reload
-	mulq	%r14
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -104(%rsp)                ## 8-byte Spill
+	movq	%rax, %r15
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rax, %r10
+	movq	%rcx, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	%rax, %r8
+	movq	%rcx, %rax
+	mulq	16(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, %rdi
+	movq	%rcx, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	addq	%r13, %rax
+	adcq	%rbp, %rdi
+	adcq	%rbx, %r8
+	adcq	%rsi, %r10
+	adcq	%r9, %r15
+	movq	-112(%rsp), %rcx                ## 8-byte Reload
+	adcq	%r11, %rcx
+	adcq	$0, %r14
+	addq	%rdx, %rdi
+	adcq	%r12, %r8
+	adcq	-128(%rsp), %r10                ## 8-byte Folded Reload
+	movq	%r10, -128(%rsp)                ## 8-byte Spill
+	adcq	-120(%rsp), %r15                ## 8-byte Folded Reload
+	movq	%r15, -120(%rsp)                ## 8-byte Spill
+	adcq	-104(%rsp), %rcx                ## 8-byte Folded Reload
+	movq	%rcx, -112(%rsp)                ## 8-byte Spill
+	adcq	-96(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	-72(%rsp), %rax                 ## 8-byte Reload
+	movq	40(%rax), %rcx
+	movq	%rcx, %rax
+	mulq	-80(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-88(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %r11
-	movq	%rax, %r12
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
 	movq	%rcx, %rax
-	movq	%rcx, %r9
-	movq	-16(%rsp), %rsi         ## 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, %rbp
+	mulq	-56(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -88(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	8(%rsp)                         ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, %rbp
+	movq	%rcx, %rax
+	mulq	-8(%rsp)                        ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rsi
+	movq	%rcx, %rax
+	mulq	(%rsp)                          ## 8-byte Folded Reload
+	movq	%rax, %r10
+	movq	%rdx, %r9
+	addq	%rsi, %r9
+	adcq	%rbp, %r13
+	adcq	-88(%rsp), %r12                 ## 8-byte Folded Reload
+	adcq	-80(%rsp), %r15                 ## 8-byte Folded Reload
+	adcq	-72(%rsp), %r11                 ## 8-byte Folded Reload
+	adcq	$0, %rbx
+	addq	%rdi, %r10
+	adcq	%r8, %r9
+	adcq	-128(%rsp), %r13                ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r15                ## 8-byte Folded Reload
+	adcq	%r14, %r11
+	adcq	$0, %rbx
+	movq	-48(%rsp), %r14                 ## 8-byte Reload
+	imulq	%r10, %r14
+	movq	%r14, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -48(%rsp)                 ## 8-byte Spill
+	movq	%rax, %rdi
+	movq	%r14, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -72(%rsp)                 ## 8-byte Spill
+	movq	%rax, %rbp
+	movq	%r14, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -80(%rsp)                 ## 8-byte Spill
 	movq	%rax, %rcx
-	movq	%r9, %rax
-	movq	-32(%rsp), %r9          ## 8-byte Reload
-	mulq	%r9
-	addq	%rcx, %rdx
-	adcq	%r12, %rbp
-	adcq	-64(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%rbx, %rax
-	adcq	%r15, %rdx
-	adcq	%r10, %rbp
-	adcq	%r8, %r11
-	adcq	-72(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	$0, %rdi
-	movq	%rdx, %rax
-	subq	%r9, %rax
-	movq	%rbp, %rcx
-	sbbq	%rsi, %rcx
-	movq	%r11, %rbx
-	sbbq	%r14, %rbx
-	movq	%r13, %rsi
-	sbbq	-40(%rsp), %rsi         ## 8-byte Folded Reload
-	sbbq	$0, %rdi
-	andl	$1, %edi
-	cmovneq	%r13, %rsi
-	testb	%dil, %dil
-	cmovneq	%rdx, %rax
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovneq	%rbp, %rcx
-	movq	%rcx, 8(%rdx)
-	cmovneq	%r11, %rbx
-	movq	%rbx, 16(%rdx)
-	movq	%rsi, 24(%rdx)
+	movq	%r14, %rax
+	mulq	-32(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rax, %r8
+	movq	%r14, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -56(%rsp)                 ## 8-byte Spill
+	movq	%rax, %rsi
+	movq	%r14, %rax
+	movq	16(%rsp), %r14                  ## 8-byte Reload
+	mulq	%r14
+	addq	%r10, %r8
+	adcq	%r9, %rax
+	adcq	%r13, %rsi
+	adcq	%r12, %rcx
+	adcq	%r15, %rbp
+	adcq	%r11, %rdi
+	adcq	$0, %rbx
+	addq	-88(%rsp), %rax                 ## 8-byte Folded Reload
+	adcq	%rdx, %rsi
+	adcq	-56(%rsp), %rcx                 ## 8-byte Folded Reload
+	adcq	-80(%rsp), %rbp                 ## 8-byte Folded Reload
+	adcq	-72(%rsp), %rdi                 ## 8-byte Folded Reload
+	adcq	-48(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%rax, %r8
+	subq	-32(%rsp), %r8                  ## 8-byte Folded Reload
+	movq	%rsi, %r9
+	sbbq	%r14, %r9
+	movq	%rcx, %r10
+	sbbq	-40(%rsp), %r10                 ## 8-byte Folded Reload
+	movq	%rbp, %r11
+	sbbq	-24(%rsp), %r11                 ## 8-byte Folded Reload
+	movq	%rdi, %r14
+	sbbq	-16(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	%rbx, %r15
+	sbbq	-64(%rsp), %r15                 ## 8-byte Folded Reload
+	movq	%r15, %rdx
+	sarq	$63, %rdx
+	cmovsq	%rbx, %r15
+	movq	32(%rsp), %rdx                  ## 8-byte Reload
+	movq	%r15, 40(%rdx)
+	cmovsq	%rdi, %r14
+	movq	%r14, 32(%rdx)
+	cmovsq	%rbp, %r11
+	movq	%r11, 24(%rdx)
+	cmovsq	%rcx, %r10
+	movq	%r10, 16(%rdx)
+	cmovsq	%rsi, %r9
+	movq	%r9, 8(%rdx)
+	cmovsq	%rax, %r8
+	movq	%r8, (%rdx)
+	addq	$40, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -2914,9794 +4498,332 @@ _mcl_fp_montRed4L:                      ## @mcl_fp_montRed4L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_addPre4L
-	.p2align	4, 0x90
-_mcl_fp_addPre4L:                       ## @mcl_fp_addPre4L
-## BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rdx), %rax
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rax
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	adcq	%r8, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_subPre4L
+                                        ## -- End function
+	.globl	_mcl_fp_montRed6L               ## -- Begin function mcl_fp_montRed6L
 	.p2align	4, 0x90
-_mcl_fp_subPre4L:                       ## @mcl_fp_subPre4L
-## BB#0:
-	movq	24(%rdx), %r8
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
-	movq	%rcx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	sbbq	%r8, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_shr1_4L
-	.p2align	4, 0x90
-_mcl_fp_shr1_4L:                        ## @mcl_fp_shr1_4L
-## BB#0:
-	movq	24(%rsi), %rax
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rdx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rdx
-	movq	%rdx, (%rdi)
-	shrdq	$1, %rcx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rax, %rcx
-	movq	%rcx, 16(%rdi)
-	shrq	%rax
-	movq	%rax, 24(%rdi)
-	retq
-
-	.globl	_mcl_fp_add4L
-	.p2align	4, 0x90
-_mcl_fp_add4L:                          ## @mcl_fp_add4L
-## BB#0:
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %r8
-	movq	16(%rdx), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r9
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	adcq	%r10, %r8
-	movq	%r8, 24(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r9
-	sbbq	24(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB59_2
-## BB#1:                                ## %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	movq	%r8, 24(%rdi)
-LBB59_2:                                ## %carry
-	retq
-
-	.globl	_mcl_fp_addNF4L
-	.p2align	4, 0x90
-_mcl_fp_addNF4L:                        ## @mcl_fp_addNF4L
-## BB#0:
-	pushq	%rbx
-	movq	24(%rdx), %r8
-	movq	16(%rdx), %r9
-	movq	(%rdx), %r11
-	movq	8(%rdx), %r10
-	addq	(%rsi), %r11
-	adcq	8(%rsi), %r10
-	adcq	16(%rsi), %r9
-	adcq	24(%rsi), %r8
-	movq	%r11, %rsi
-	subq	(%rcx), %rsi
-	movq	%r10, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r9, %rax
-	sbbq	16(%rcx), %rax
-	movq	%r8, %rbx
-	sbbq	24(%rcx), %rbx
-	testq	%rbx, %rbx
-	cmovsq	%r11, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r10, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r9, %rax
-	movq	%rax, 16(%rdi)
-	cmovsq	%r8, %rbx
-	movq	%rbx, 24(%rdi)
-	popq	%rbx
-	retq
-
-	.globl	_mcl_fp_sub4L
-	.p2align	4, 0x90
-_mcl_fp_sub4L:                          ## @mcl_fp_sub4L
-## BB#0:
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r11
-	xorl	%esi, %esi
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r9
-	movq	%rax, (%rdi)
-	movq	%r11, 8(%rdi)
-	movq	%r9, 16(%rdi)
-	sbbq	%r10, %r8
-	movq	%r8, 24(%rdi)
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB61_2
-## BB#1:                                ## %nocarry
-	retq
-LBB61_2:                                ## %carry
-	movq	24(%rcx), %r10
-	movq	8(%rcx), %rsi
-	movq	16(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 24(%rdi)
-	retq
-
-	.globl	_mcl_fp_subNF4L
-	.p2align	4, 0x90
-_mcl_fp_subNF4L:                        ## @mcl_fp_subNF4L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movdqu	(%rdx), %xmm0
-	movdqu	16(%rdx), %xmm1
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r8
-	movdqu	(%rsi), %xmm2
-	movdqu	16(%rsi), %xmm3
-	pshufd	$78, %xmm3, %xmm4       ## xmm4 = xmm3[2,3,0,1]
-	movd	%xmm4, %r15
-	movd	%xmm1, %r9
-	movd	%xmm3, %r11
-	pshufd	$78, %xmm0, %xmm1       ## xmm1 = xmm0[2,3,0,1]
-	movd	%xmm1, %r10
-	pshufd	$78, %xmm2, %xmm1       ## xmm1 = xmm2[2,3,0,1]
-	movd	%xmm1, %r14
-	movd	%xmm0, %rdx
-	movd	%xmm2, %r12
-	subq	%rdx, %r12
-	sbbq	%r10, %r14
-	sbbq	%r9, %r11
-	sbbq	%r8, %r15
-	movq	%r15, %rdx
-	sarq	$63, %rdx
-	movq	24(%rcx), %rsi
-	andq	%rdx, %rsi
-	movq	16(%rcx), %rax
-	andq	%rdx, %rax
-	movq	8(%rcx), %rbx
-	andq	%rdx, %rbx
-	andq	(%rcx), %rdx
-	addq	%r12, %rdx
-	movq	%rdx, (%rdi)
-	adcq	%r14, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r11, %rax
-	movq	%rax, 16(%rdi)
-	adcq	%r15, %rsi
-	movq	%rsi, 24(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fpDbl_add4L
-	.p2align	4, 0x90
-_mcl_fpDbl_add4L:                       ## @mcl_fpDbl_add4L
-## BB#0:
+_mcl_fp_montRed6L:                      ## @mcl_fp_montRed6L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	56(%rdx), %r9
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r10
-	movq	48(%rsi), %r12
-	movq	40(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	24(%rdx), %r15
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %rsi
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r15, %rbp
-	movq	%rbp, 24(%rdi)
-	adcq	%r14, %rsi
-	adcq	%r11, %r13
-	adcq	%r10, %r12
-	adcq	%r9, %r8
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rsi, %rdx
-	subq	(%rcx), %rdx
-	movq	%r13, %rbp
-	sbbq	8(%rcx), %rbp
-	movq	%r12, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r8, %r9
-	sbbq	24(%rcx), %r9
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rsi, %rdx
-	movq	%rdx, 32(%rdi)
-	testb	%al, %al
-	cmovneq	%r13, %rbp
-	movq	%rbp, 40(%rdi)
-	cmovneq	%r12, %rbx
-	movq	%rbx, 48(%rdi)
-	cmovneq	%r8, %r9
-	movq	%r9, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sub4L
-	.p2align	4, 0x90
-_mcl_fpDbl_sub4L:                       ## @mcl_fpDbl_sub4L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r9
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	(%rsi), %rbx
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	movq	%rbx, (%rdi)
-	movq	8(%rsi), %rbx
-	sbbq	8(%rdx), %rbx
-	movq	%rbx, 8(%rdi)
-	movq	16(%rsi), %rbx
-	sbbq	16(%rdx), %rbx
-	movq	%rbx, 16(%rdi)
-	movq	24(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	40(%rdx), %r11
-	movq	32(%rdx), %rdx
-	movq	%rbx, 24(%rdi)
-	movq	32(%rsi), %r12
-	sbbq	%rdx, %r12
-	movq	48(%rsi), %r14
-	movq	40(%rsi), %r15
-	sbbq	%r11, %r15
-	sbbq	%r10, %r14
-	sbbq	%r9, %r8
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
+	pushq	%rax
+	movq	%rdx, %rcx
+	movq	%rdi, (%rsp)                    ## 8-byte Spill
+	movq	-8(%rdx), %rax
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	(%rsi), %r9
+	movq	%r9, %rdi
+	imulq	%rax, %rdi
+	movq	40(%rdx), %rdx
+	movq	%rdx, -40(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, -128(%rsp)                ## 8-byte Spill
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	32(%rcx), %rdx
+	movq	%rdx, -64(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r10
+	movq	%rdx, %r12
+	movq	24(%rcx), %rdx
+	movq	%rdx, -72(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r14
+	movq	%rdx, %r15
 	movq	16(%rcx), %rdx
-	cmoveq	%rax, %rdx
-	movq	24(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	cmovneq	8(%rcx), %rax
-	addq	%r12, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 40(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 48(%rdi)
-	adcq	%r8, %rbx
-	movq	%rbx, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_mulUnitPre5L
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre5L:                   ## @mcl_fp_mulUnitPre5L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
+	movq	%rdx, -48(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r11
+	movq	%rdx, %r13
+	movq	(%rcx), %r8
+	movq	8(%rcx), %rcx
+	movq	%rcx, -24(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rcx
+	movq	%rdx, %rbx
+	movq	%rax, %rbp
+	movq	%rdi, %rax
+	mulq	%r8
+	movq	%r8, %rdi
+	movq	%r8, -16(%rsp)                  ## 8-byte Spill
 	movq	%rdx, %rcx
-	movq	%rcx, %rax
-	mulq	32(%rsi)
+	addq	%rbp, %rcx
+	adcq	%r11, %rbx
+	adcq	%r14, %r13
+	adcq	%r10, %r15
+	adcq	-128(%rsp), %r12                ## 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r9, %rax
+	movq	%rsi, -32(%rsp)                 ## 8-byte Spill
+	adcq	8(%rsi), %rcx
+	adcq	16(%rsi), %rbx
+	adcq	24(%rsi), %r13
+	adcq	32(%rsi), %r15
+	adcq	40(%rsi), %r12
+	movq	%r12, -88(%rsp)                 ## 8-byte Spill
+	adcq	48(%rsi), %rdx
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	setb	-96(%rsp)                       ## 1-byte Folded Spill
+	movq	-80(%rsp), %rsi                 ## 8-byte Reload
+	imulq	%rcx, %rsi
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %r8
 	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	24(%rsi)
+	movq	%rsi, %rax
+	mulq	%rdi
 	movq	%rdx, %r10
 	movq	%rax, %r11
+	movq	%rsi, %rax
+	movq	-24(%rsp), %rsi                 ## 8-byte Reload
+	mulq	%rsi
+	movq	%rdx, %rbp
+	movq	%rax, %rdi
+	addq	%r10, %rdi
+	adcq	%r9, %rbp
+	adcq	-56(%rsp), %r8                  ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r14                ## 8-byte Folded Reload
+	movzbl	-96(%rsp), %eax                 ## 1-byte Folded Reload
+	movq	-128(%rsp), %rdx                ## 8-byte Reload
+	adcq	%rax, %rdx
+	addq	%rcx, %r11
+	adcq	%rbx, %rdi
+	adcq	%r13, %rbp
+	adcq	%r15, %r8
+	adcq	-88(%rsp), %r12                 ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r14                ## 8-byte Folded Reload
+	movq	-32(%rsp), %rax                 ## 8-byte Reload
+	adcq	56(%rax), %rdx
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	setb	-120(%rsp)                      ## 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%rdi, %rcx
+	movq	%rcx, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
 	movq	%rcx, %rax
-	mulq	16(%rsi)
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %r15
-	movq	%rax, %r14
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
 	movq	%rcx, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %rbx
-	movq	%rax, %r12
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rbx
 	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rax, (%rdi)
-	addq	%r12, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r14, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r11, %r15
-	movq	%r15, 24(%rdi)
-	adcq	%r9, %r10
-	movq	%r10, 32(%rdi)
-	adcq	$0, %r8
-	movq	%r8, 40(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fpDbl_mulPre5L
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre5L:                    ## @mcl_fpDbl_mulPre5L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rsi, %r9
-	movq	%rdi, -48(%rsp)         ## 8-byte Spill
-	movq	(%r9), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	(%rdx), %rbp
-	mulq	%rbp
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	16(%r9), %r13
-	movq	24(%r9), %r15
-	movq	32(%r9), %rbx
-	movq	%rax, (%rdi)
-	movq	%rbx, %rax
-	mulq	%rbp
-	movq	%rdx, %r11
-	movq	%rax, %r10
-	movq	%r15, %rax
-	mulq	%rbp
-	movq	%rdx, %r14
-	movq	%rax, %rdi
-	movq	%r13, %rax
-	mulq	%rbp
-	movq	%rax, %rsi
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	%rsi
 	movq	%rdx, %rcx
-	movq	8(%r9), %r8
-	movq	%r8, %rax
-	mulq	%rbp
-	movq	%rdx, %rbp
-	movq	%rax, %r12
-	addq	-88(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	%rsi, %rbp
-	adcq	%rdi, %rcx
-	adcq	%r10, %r14
-	adcq	$0, %r11
-	movq	-72(%rsp), %r10         ## 8-byte Reload
-	movq	8(%r10), %rdi
-	movq	%rbx, %rax
-	mulq	%rdi
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
 	movq	%rax, %rsi
-	movq	%r15, %rax
-	mulq	%rdi
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %r15
-	movq	%r13, %rax
-	mulq	%rdi
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rax, %r13
-	movq	%r8, %rax
-	mulq	%rdi
-	movq	%rdx, %r8
-	movq	%rax, %rbx
-	movq	-80(%rsp), %rax         ## 8-byte Reload
-	mulq	%rdi
-	addq	%r12, %rax
-	movq	-48(%rsp), %rdi         ## 8-byte Reload
-	movq	%rax, 8(%rdi)
-	adcq	%rbp, %rbx
-	adcq	%rcx, %r13
+	addq	%r10, %rsi
+	adcq	%rbx, %rcx
+	adcq	-112(%rsp), %r13                ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r15                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r11                 ## 8-byte Folded Reload
+	movzbl	-120(%rsp), %eax                ## 1-byte Folded Reload
+	movq	-88(%rsp), %rdx                 ## 8-byte Reload
+	adcq	%rax, %rdx
+	addq	%rdi, %r9
+	adcq	%rbp, %rsi
+	adcq	%r8, %rcx
+	adcq	%r12, %r13
 	adcq	%r14, %r15
-	adcq	%r11, %rsi
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%rdx, %rbx
-	adcq	%r8, %r13
-	adcq	-56(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	32(%r9), %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	16(%r10), %r12
-	mulq	%r12
-	movq	%rax, %r11
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	24(%r9), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	mulq	%r12
-	movq	%rax, %r10
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	16(%r9), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulq	%r12
-	movq	%rax, %r8
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	8(%r9), %rdi
-	movq	%rdi, %rax
-	mulq	%r12
-	movq	%rax, %rbp
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	(%r9), %r14
-	movq	%r14, %rax
-	mulq	%r12
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	addq	%rbx, %rax
-	movq	-48(%rsp), %rbx         ## 8-byte Reload
-	movq	%rax, 16(%rbx)
-	adcq	%r13, %rbp
-	adcq	%r15, %r8
-	adcq	%rsi, %r10
-	adcq	%rcx, %r11
-	sbbq	%rsi, %rsi
-	movq	-72(%rsp), %r12         ## 8-byte Reload
-	movq	24(%r12), %rcx
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	movq	%rax, %r15
-	movq	%r14, %rax
-	mulq	%rcx
-	movq	%rdx, %r13
-	movq	%rax, %rdi
-	movq	-64(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, %r14
-	movq	-32(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	andl	$1, %esi
-	addq	-40(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-16(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-56(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	addq	%rdi, %rbp
-	movq	%rbp, 24(%rbx)
-	adcq	%r15, %r8
-	adcq	%rax, %r10
-	adcq	%r14, %r11
-	adcq	-24(%rsp), %rsi         ## 8-byte Folded Reload
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r13, %r8
-	adcq	-8(%rsp), %r10          ## 8-byte Folded Reload
-	adcq	%rdx, %r11
-	adcq	-64(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	32(%r12), %rdi
-	movq	%rdi, %rax
-	mulq	32(%r9)
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, %r15
+	adcq	-128(%rsp), %r11                ## 8-byte Folded Reload
+	movq	-32(%rsp), %rax                 ## 8-byte Reload
+	adcq	64(%rax), %rdx
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	setb	-128(%rsp)                      ## 1-byte Folded Spill
+	movq	-80(%rsp), %rdi                 ## 8-byte Reload
+	imulq	%rsi, %rdi
 	movq	%rdi, %rax
-	mulq	24(%r9)
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %r13
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -96(%rsp)                 ## 8-byte Spill
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
 	movq	%rdi, %rax
-	mulq	16(%r9)
-	movq	%rdx, %r14
-	movq	%rax, %rbx
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
 	movq	%rdi, %rax
-	mulq	8(%r9)
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
 	movq	%rdx, %r12
-	movq	%rax, %rbp
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
 	movq	%rdi, %rax
-	mulq	(%r9)
-	addq	%r8, %rax
-	movq	-48(%rsp), %rdi         ## 8-byte Reload
-	movq	%rax, 32(%rdi)
-	adcq	%r10, %rbp
-	adcq	%r11, %rbx
-	adcq	%rsi, %r13
-	adcq	%rcx, %r15
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rdx, %rbp
-	movq	%rbp, 40(%rdi)
-	adcq	%r12, %rbx
-	movq	%rbx, 48(%rdi)
-	adcq	%r14, %r13
-	movq	%r13, 56(%rdi)
-	adcq	-80(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%r15, 64(%rdi)
-	adcq	-72(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sqrPre5L
-	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre5L:                    ## @mcl_fpDbl_sqrPre5L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	32(%rsi), %r11
-	movq	(%rsi), %rbp
-	movq	8(%rsi), %r13
-	movq	%r11, %rax
-	mulq	%r13
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	24(%rsi), %rbx
-	movq	%rbx, %rax
-	mulq	%r13
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %rcx
-	movq	%rcx, %rax
-	mulq	%r13
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	%r11, %rax
-	mulq	%rbp
-	movq	%rdx, %r8
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rbp
-	movq	%rdx, %r9
-	movq	%rax, %r15
-	movq	%rcx, %rax
-	mulq	%rbp
-	movq	%rdx, %r10
-	movq	%rax, %r12
-	movq	%r13, %rax
-	mulq	%r13
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	movq	%rax, %r14
-	movq	%r13, %rax
-	mulq	%rbp
-	movq	%rdx, %r13
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	%rbp
-	movq	%rdi, -24(%rsp)         ## 8-byte Spill
-	movq	%rax, (%rdi)
-	addq	%rbx, %rdx
-	adcq	%r13, %r12
-	adcq	%r15, %r10
-	adcq	-16(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%rbx, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r14, %r12
-	adcq	-32(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-64(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-56(%rsp), %r8          ## 8-byte Folded Reload
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	addq	%r13, %r12
-	adcq	-8(%rsp), %r10          ## 8-byte Folded Reload
-	adcq	-48(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-40(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%r11, %rax
-	mulq	%rcx
-	movq	%rax, %r11
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	24(%rsi), %rbx
-	movq	%rbx, %rax
-	mulq	%rcx
-	movq	%rax, %r14
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r15
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rax, %r13
-	addq	%r12, %rdi
-	movq	-24(%rsp), %r12         ## 8-byte Reload
-	movq	%rdi, 16(%r12)
-	adcq	%r10, %r15
-	adcq	%r9, %r13
-	adcq	%r8, %r14
-	adcq	%rbp, %r11
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	addq	-32(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-64(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	%rdx, %r14
-	adcq	-72(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-40(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, %r8
-	movq	-48(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbx
-	movq	%rax, %rbp
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	32(%rsi), %rcx
-	movq	%rcx, %rax
-	mulq	%rbx
+	movq	-48(%rsp), %r14                 ## 8-byte Reload
+	mulq	%r14
+	movq	%rdx, %rbp
 	movq	%rax, %r9
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	mulq	%rbx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, %r10
-	movq	%rbx, %rax
-	mulq	%rbx
-	movq	%rax, %rbx
-	addq	%r15, %rbp
-	movq	%rbp, 24(%r12)
-	adcq	%r13, %r8
-	adcq	%r14, %r10
-	adcq	%r11, %rbx
-	adcq	%rdi, %r9
-	sbbq	%r12, %r12
-	andl	$1, %r12d
-	addq	-56(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-64(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	%rdx, %r9
-	adcq	-48(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %r14
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rdx, %r13
-	movq	%rax, %rsi
-	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rdx, %r15
-	movq	%rax, %r11
-	movq	-40(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	addq	%r8, %rsi
-	movq	-24(%rsp), %r8          ## 8-byte Reload
-	movq	%rsi, 32(%r8)
-	adcq	%r10, %rdi
-	adcq	%rbx, %rax
-	adcq	%r9, %rbp
-	adcq	%r12, %r11
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r13, %rdi
-	movq	%r8, %rsi
-	movq	%rdi, 40(%rsi)
-	adcq	%r14, %rax
-	movq	%rax, 48(%rsi)
-	adcq	%rdx, %rbp
-	movq	%rbp, 56(%rsi)
-	adcq	-72(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%r11, 64(%rsi)
-	adcq	%r15, %rcx
-	movq	%rcx, 72(%rsi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mont5L
-	.p2align	4, 0x90
-_mcl_fp_mont5L:                         ## @mcl_fp_mont5L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	pushq	%rax
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	movq	32(%rsi), %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	(%rdx), %rdi
-	mulq	%rdi
-	movq	%rax, %r8
-	movq	%rdx, %r15
-	movq	24(%rsi), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r10
-	movq	%rdx, %rbx
-	movq	16(%rsi), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r11
-	movq	%rdx, %r14
-	movq	(%rsi), %rbp
-	movq	%rbp, -24(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, %r12
-	movq	%rax, %rsi
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rdx, %r9
-	addq	%rsi, %r9
-	adcq	%r11, %r12
-	adcq	%r10, %r14
-	adcq	%r8, %rbx
-	movq	%rbx, -120(%rsp)        ## 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, -112(%rsp)        ## 8-byte Spill
-	movq	-8(%rcx), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbp
-	imulq	%rdx, %rbp
-	movq	32(%rcx), %rdx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rdx, %r8
-	movq	24(%rcx), %rdx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, %r13
-	movq	%rdx, %rsi
-	movq	16(%rcx), %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, %r11
-	movq	%rdx, %rbx
-	movq	(%rcx), %rdi
-	movq	%rdi, -16(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -64(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rcx
-	movq	%rdx, %r10
-	movq	%rax, %r15
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rdx, %rcx
-	addq	%r15, %rcx
-	adcq	%r11, %r10
-	adcq	%r13, %rbx
-	adcq	-8(%rsp), %rsi          ## 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	%r9, %rcx
-	adcq	%r12, %r10
-	adcq	%r14, %rbx
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r8         ## 8-byte Folded Reload
-	sbbq	%r15, %r15
-	andl	$1, %r15d
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	8(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-104(%rsp)              ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %r12
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %rdi
-	movq	%rdx, %r11
-	addq	%r12, %r11
-	adcq	-128(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%rcx, %rdi
-	adcq	%r10, %r11
-	adcq	%rbx, %r9
-	adcq	%rsi, %rbp
-	adcq	%r8, %r14
-	adcq	%r15, %r13
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rbx
-	imulq	-72(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r12
-	movq	%rbx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	addq	%r12, %rbx
-	adcq	%r15, %rcx
-	adcq	-128(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r8         ## 8-byte Folded Reload
-	adcq	$0, %r10
-	addq	%rdi, %rax
-	adcq	%r11, %rbx
-	adcq	%r9, %rcx
-	adcq	%rbp, %rsi
-	adcq	%r14, %r8
-	adcq	%r13, %r10
-	adcq	$0, -112(%rsp)          ## 8-byte Folded Spill
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rbp
-	movq	%rbp, %rax
-	mulq	-104(%rsp)              ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r15
-	movq	%rdx, %rbp
-	addq	%r12, %rbp
-	adcq	%r14, %rdi
-	adcq	-128(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%rbx, %r15
-	adcq	%rcx, %rbp
-	adcq	%rsi, %rdi
-	adcq	%r8, %r11
-	adcq	%r10, %r9
-	adcq	-112(%rsp), %r13        ## 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%r15, %rsi
-	imulq	-72(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r8
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	addq	%r8, %r12
-	adcq	-8(%rsp), %rbx          ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	$0, %r10
-	addq	%r15, %rax
-	adcq	%rbp, %r12
-	adcq	%rdi, %rbx
-	adcq	%r11, %rcx
-	adcq	%r9, %r14
-	adcq	%r13, %r10
-	adcq	$0, -112(%rsp)          ## 8-byte Folded Spill
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-104(%rsp)              ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r15
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r13
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r11
-	movq	%rdx, %rsi
-	addq	%r13, %rsi
-	adcq	%r15, %rdi
-	adcq	-128(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%r12, %r11
-	adcq	%rbx, %rsi
-	adcq	%rcx, %rdi
-	adcq	%r14, %rbp
-	adcq	%r10, %r9
-	adcq	-112(%rsp), %r8         ## 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%r11, %rbx
-	imulq	-72(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r14
-	movq	%rbx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r12
-	movq	%rbx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	addq	%r12, %rbx
-	adcq	%r14, %rcx
-	adcq	-128(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%r11, %rax
-	adcq	%rsi, %rbx
-	adcq	%rdi, %rcx
-	adcq	%rbp, %r15
-	adcq	%r9, %r10
-	adcq	%r8, %r13
-	movq	-112(%rsp), %r8         ## 8-byte Reload
-	adcq	$0, %r8
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	movq	32(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-104(%rsp)              ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r14
-	movq	%rdx, %rbp
-	addq	%rdi, %rbp
-	adcq	-88(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r9         ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rbx, %r14
-	adcq	%rcx, %rbp
-	adcq	%r15, %r12
-	adcq	%r10, %r11
-	adcq	%r13, %r9
-	adcq	%r8, %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	sbbq	%rcx, %rcx
-	movq	-72(%rsp), %rdi         ## 8-byte Reload
-	imulq	%r14, %rdi
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r13
-	movq	%rdi, %rax
-	movq	%rdi, %r15
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r10
-	movq	%r15, %rax
-	movq	-16(%rsp), %r15         ## 8-byte Reload
-	mulq	%r15
-	addq	%r10, %rdx
-	adcq	%r13, %rdi
-	adcq	-104(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-72(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	$0, %r8
-	andl	$1, %ecx
-	addq	%r14, %rax
-	adcq	%rbp, %rdx
-	adcq	%r12, %rdi
-	adcq	%r11, %rsi
-	adcq	%r9, %rbx
-	adcq	-96(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	$0, %rcx
-	movq	%rdx, %rax
-	subq	%r15, %rax
-	movq	%rdi, %rbp
-	sbbq	-64(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rsi, %r9
-	sbbq	-56(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%rbx, %r10
-	sbbq	-48(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%r8, %r11
-	sbbq	-40(%rsp), %r11         ## 8-byte Folded Reload
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rbx, %r10
-	testb	%cl, %cl
-	cmovneq	%rdx, %rax
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	movq	%rax, (%rcx)
-	cmovneq	%rdi, %rbp
-	movq	%rbp, 8(%rcx)
-	cmovneq	%rsi, %r9
-	movq	%r9, 16(%rcx)
-	movq	%r10, 24(%rcx)
-	cmovneq	%r8, %r11
-	movq	%r11, 32(%rcx)
-	addq	$8, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montNF5L
-	.p2align	4, 0x90
-_mcl_fp_montNF5L:                       ## @mcl_fp_montNF5L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	32(%rsi), %rax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	(%rdx), %rbp
-	mulq	%rbp
-	movq	%rax, %r8
-	movq	%rdx, %r13
-	movq	24(%rsi), %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	mulq	%rbp
-	movq	%rax, %r10
-	movq	%rdx, %r11
-	movq	16(%rsi), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulq	%rbp
-	movq	%rax, %r15
-	movq	%rdx, %r9
-	movq	(%rsi), %rdi
-	movq	%rdi, -48(%rsp)         ## 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	mulq	%rbp
-	movq	%rdx, %r12
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	%rbp
-	movq	%rax, %r14
-	movq	%rdx, %rbp
-	addq	%rbx, %rbp
-	adcq	%r15, %r12
-	adcq	%r10, %r9
-	adcq	%r8, %r11
-	adcq	$0, %r13
-	movq	-8(%rcx), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%r14, %rsi
-	imulq	%rax, %rsi
-	movq	32(%rcx), %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	%rdx
-	movq	%rax, %r10
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	24(%rcx), %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	%rdx
-	movq	%rax, %rbx
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	16(%rcx), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	%rdx
-	movq	%rax, %r8
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	(%rcx), %rdi
-	movq	%rdi, -40(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -24(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	%rcx
-	movq	%rdx, %r15
-	movq	%rax, %rcx
-	movq	%rsi, %rax
-	mulq	%rdi
-	addq	%r14, %rax
-	adcq	%rbp, %rcx
-	adcq	%r12, %r8
-	adcq	%r9, %rbx
-	adcq	%r11, %r10
-	adcq	$0, %r13
-	addq	%rdx, %rcx
-	adcq	%r15, %r8
-	adcq	-16(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r13        ## 8-byte Folded Reload
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	movq	8(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-112(%rsp)              ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-96(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %r14
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %rsi
-	movq	%rdx, %r15
-	addq	%r14, %r15
-	adcq	%rdi, %r11
-	adcq	-128(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	$0, %r12
-	addq	%rcx, %rsi
-	adcq	%r8, %r15
-	adcq	%rbx, %r11
-	adcq	%r10, %r9
-	adcq	%r13, %rbp
-	adcq	$0, %r12
-	movq	%rsi, %rdi
-	imulq	-88(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, %r13
-	movq	%rdi, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r8
-	movq	%rdi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	addq	%rsi, %rax
-	adcq	%r15, %r10
-	adcq	%r11, %r14
-	adcq	%r9, %r8
-	adcq	%rbp, %r13
-	adcq	$0, %r12
-	addq	%rdx, %r10
-	adcq	%rbx, %r14
-	adcq	%rcx, %r8
-	adcq	-128(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r12        ## 8-byte Folded Reload
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	movq	16(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-112(%rsp)              ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-96(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r11
-	movq	%rdx, %rsi
-	addq	%rbp, %rsi
-	adcq	%rbx, %rcx
-	adcq	-128(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	%r10, %r11
-	adcq	%r14, %rsi
-	adcq	%r8, %rcx
-	adcq	%r13, %rdi
-	adcq	%r12, %r9
-	adcq	$0, %r15
-	movq	%r11, %rbx
-	imulq	-88(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r8
-	movq	%rbx, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %r10
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rbp
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	addq	%r11, %rax
-	adcq	%rsi, %rbp
-	adcq	%rcx, %r10
-	adcq	%rdi, %r8
-	adcq	%r9, %r13
-	adcq	$0, %r15
-	addq	%rdx, %rbp
-	adcq	%r12, %r10
-	adcq	%r14, %r8
-	adcq	-128(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r15        ## 8-byte Folded Reload
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	movq	24(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-112(%rsp)              ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-96(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r14
-	movq	%rdx, %rsi
-	addq	%r12, %rsi
-	adcq	%rbx, %rcx
-	adcq	-128(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	$0, %r11
-	addq	%rbp, %r14
-	adcq	%r10, %rsi
-	adcq	%r8, %rcx
-	adcq	%r13, %rdi
-	adcq	%r15, %r9
-	adcq	$0, %r11
-	movq	%r14, %rbx
-	imulq	-88(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r8
-	movq	%rbx, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r10
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rbp
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	addq	%r14, %rax
-	adcq	%rsi, %rbp
-	adcq	%rcx, %r10
-	adcq	%rdi, %r8
-	adcq	%r9, %r13
-	adcq	$0, %r11
-	addq	%rdx, %rbp
-	adcq	%r12, %r10
-	adcq	%r15, %r8
-	adcq	-128(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r11        ## 8-byte Folded Reload
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	movq	32(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-112(%rsp)              ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-96(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %rsi
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rax, %r12
-	movq	%rdx, %rdi
-	addq	%rsi, %rdi
-	adcq	-96(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	$0, %rbx
-	addq	%rbp, %r12
-	adcq	%r10, %rdi
-	adcq	%r8, %r15
-	adcq	%r13, %r14
-	adcq	%r11, %r9
-	adcq	$0, %rbx
-	movq	-88(%rsp), %r8          ## 8-byte Reload
-	imulq	%r12, %r8
-	movq	%r8, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %rcx
-	movq	%r8, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, %rbp
-	movq	%r8, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, %rsi
-	movq	%r8, %rax
-	movq	%r8, %r13
-	movq	-40(%rsp), %r10         ## 8-byte Reload
-	mulq	%r10
-	movq	%rdx, %r11
-	movq	%rax, %r8
-	movq	%r13, %rax
-	movq	-24(%rsp), %r13         ## 8-byte Reload
-	mulq	%r13
-	addq	%r12, %r8
-	adcq	%rdi, %rax
-	adcq	%r15, %rsi
-	adcq	%r14, %rbp
-	adcq	%r9, %rcx
-	adcq	$0, %rbx
-	addq	%r11, %rax
-	adcq	%rdx, %rsi
-	adcq	-112(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rax, %r11
-	subq	%r10, %r11
-	movq	%rsi, %r10
-	sbbq	%r13, %r10
-	movq	%rbp, %r8
-	sbbq	-80(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%rcx, %r9
-	sbbq	-72(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%rbx, %rdx
-	sbbq	-64(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	sarq	$63, %rdi
-	cmovsq	%rax, %r11
-	movq	-8(%rsp), %rax          ## 8-byte Reload
-	movq	%r11, (%rax)
-	cmovsq	%rsi, %r10
-	movq	%r10, 8(%rax)
-	cmovsq	%rbp, %r8
-	movq	%r8, 16(%rax)
-	cmovsq	%rcx, %r9
-	movq	%r9, 24(%rax)
-	cmovsq	%rbx, %rdx
-	movq	%rdx, 32(%rax)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montRed5L
-	.p2align	4, 0x90
-_mcl_fp_montRed5L:                      ## @mcl_fp_montRed5L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	(%rsi), %r9
-	movq	%r9, %rdi
-	imulq	%rax, %rdi
-	movq	32(%rcx), %rdx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rax, %r8
-	movq	%rdx, %r13
-	movq	24(%rcx), %rdx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rax, %r11
-	movq	%rdx, %r10
-	movq	16(%rcx), %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rax, %r14
-	movq	%rdx, %r15
-	movq	(%rcx), %rbp
-	movq	%rbp, -40(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -72(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rdx, %r12
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	%rbp
-	movq	%rdx, %rcx
-	addq	%rbx, %rcx
-	adcq	%r14, %r12
-	adcq	%r11, %r15
-	adcq	%r8, %r10
-	adcq	$0, %r13
-	addq	%r9, %rax
-	movq	72(%rsi), %rax
-	movq	64(%rsi), %rdx
-	adcq	8(%rsi), %rcx
-	adcq	16(%rsi), %r12
-	adcq	24(%rsi), %r15
-	adcq	32(%rsi), %r10
-	adcq	40(%rsi), %r13
-	movq	%r13, -112(%rsp)        ## 8-byte Spill
-	movq	56(%rsi), %rdi
-	movq	48(%rsi), %rsi
-	adcq	$0, %rsi
-	movq	%rsi, -24(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -64(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	sbbq	%r8, %r8
-	andl	$1, %r8d
-	movq	%rcx, %rsi
-	movq	-104(%rsp), %r9         ## 8-byte Reload
-	imulq	%r9, %rsi
-	movq	%rsi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	addq	%rbp, %rsi
-	adcq	%rdi, %rbx
-	adcq	-16(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r14         ## 8-byte Folded Reload
-	adcq	$0, %r11
-	addq	%rcx, %rax
-	adcq	%r12, %rsi
-	adcq	%r15, %rbx
-	adcq	%r10, %r13
-	adcq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-24(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -96(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %r8
-	movq	%rsi, %rcx
-	imulq	%r9, %rcx
-	movq	%rcx, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	movq	-56(%rsp), %r9          ## 8-byte Reload
-	mulq	%r9
-	movq	%rdx, %r15
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	addq	%rdi, %rcx
-	adcq	-32(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-24(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	$0, %rbp
-	addq	%rsi, %rax
-	adcq	%rbx, %rcx
-	adcq	%r13, %r12
-	adcq	%r14, %r15
-	adcq	%r11, %r10
-	adcq	-64(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	$0, -96(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %r8
-	movq	%rcx, %rsi
-	imulq	-104(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	%r9
-	movq	%rdx, %r13
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	addq	%rdi, %rsi
-	adcq	%rbx, %r9
-	adcq	-112(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-64(%rsp), %r14         ## 8-byte Folded Reload
-	adcq	$0, %r11
-	addq	%rcx, %rax
-	adcq	%r12, %rsi
-	adcq	%r15, %r9
-	adcq	%r10, %r13
-	adcq	%rbp, %r14
-	adcq	-96(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %r8
-	movq	-104(%rsp), %rdi        ## 8-byte Reload
-	imulq	%rsi, %rdi
-	movq	%rdi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	movq	%rdi, %r10
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r12
-	movq	%r10, %rax
-	movq	-40(%rsp), %r10         ## 8-byte Reload
-	mulq	%r10
-	addq	%r12, %rdx
-	adcq	%r15, %rdi
-	adcq	-104(%rsp), %rbx        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	$0, %rbp
-	addq	%rsi, %rax
-	adcq	%r9, %rdx
-	adcq	%r13, %rdi
-	adcq	%r14, %rbx
-	adcq	%r11, %rcx
-	adcq	-48(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	$0, %r8
-	movq	%rdx, %rax
-	subq	%r10, %rax
-	movq	%rdi, %rsi
-	sbbq	-72(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rbx, %r9
-	sbbq	-56(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%rcx, %r10
-	sbbq	-88(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%rbp, %r11
-	sbbq	-80(%rsp), %r11         ## 8-byte Folded Reload
-	sbbq	$0, %r8
-	andl	$1, %r8d
-	cmovneq	%rbp, %r11
-	testb	%r8b, %r8b
-	cmovneq	%rdx, %rax
-	movq	-8(%rsp), %rdx          ## 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovneq	%rdi, %rsi
-	movq	%rsi, 8(%rdx)
-	cmovneq	%rbx, %r9
-	movq	%r9, 16(%rdx)
-	cmovneq	%rcx, %r10
-	movq	%r10, 24(%rdx)
-	movq	%r11, 32(%rdx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_addPre5L
-	.p2align	4, 0x90
-_mcl_fp_addPre5L:                       ## @mcl_fp_addPre5L
-## BB#0:
-	movq	32(%rdx), %r8
-	movq	24(%rdx), %r9
-	movq	24(%rsi), %r11
-	movq	32(%rsi), %r10
-	movq	16(%rdx), %rcx
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rcx
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	adcq	%r9, %r11
-	movq	%r11, 24(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	retq
-
-	.globl	_mcl_fp_subPre5L
-	.p2align	4, 0x90
-_mcl_fp_subPre5L:                       ## @mcl_fp_subPre5L
-## BB#0:
-	pushq	%rbx
-	movq	32(%rsi), %r10
-	movq	24(%rdx), %r8
-	movq	32(%rdx), %r9
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %rcx
-	movq	%rbx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r8, %r11
-	movq	%r11, 24(%rdi)
-	sbbq	%r9, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	retq
-
-	.globl	_mcl_fp_shr1_5L
-	.p2align	4, 0x90
-_mcl_fp_shr1_5L:                        ## @mcl_fp_shr1_5L
-## BB#0:
-	movq	32(%rsi), %r8
-	movq	24(%rsi), %rcx
-	movq	16(%rsi), %rdx
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rax
-	movq	%rax, (%rdi)
-	shrdq	$1, %rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 16(%rdi)
-	shrdq	$1, %r8, %rcx
-	movq	%rcx, 24(%rdi)
-	shrq	%r8
-	movq	%r8, 32(%rdi)
-	retq
-
-	.globl	_mcl_fp_add5L
-	.p2align	4, 0x90
-_mcl_fp_add5L:                          ## @mcl_fp_add5L
-## BB#0:
-	pushq	%rbx
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %rbx
-	movq	24(%rsi), %r9
-	movq	32(%rsi), %r8
-	movq	16(%rdx), %r10
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r10
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	adcq	%rbx, %r9
-	movq	%r9, 24(%rdi)
-	adcq	%r11, %r8
-	movq	%r8, 32(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r10
-	sbbq	24(%rcx), %r9
-	sbbq	32(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB74_2
-## BB#1:                                ## %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%r9, 24(%rdi)
-	movq	%r8, 32(%rdi)
-LBB74_2:                                ## %carry
-	popq	%rbx
-	retq
-
-	.globl	_mcl_fp_addNF5L
-	.p2align	4, 0x90
-_mcl_fp_addNF5L:                        ## @mcl_fp_addNF5L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	32(%rdx), %r8
-	movq	24(%rdx), %r9
-	movq	16(%rdx), %r10
-	movq	(%rdx), %r14
-	movq	8(%rdx), %r11
-	addq	(%rsi), %r14
-	adcq	8(%rsi), %r11
-	adcq	16(%rsi), %r10
-	adcq	24(%rsi), %r9
-	adcq	32(%rsi), %r8
-	movq	%r14, %rsi
-	subq	(%rcx), %rsi
-	movq	%r11, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r10, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r9, %r15
-	sbbq	24(%rcx), %r15
-	movq	%r8, %rax
-	sbbq	32(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r14, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r11, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
-	cmovsq	%r9, %r15
-	movq	%r15, 24(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 32(%rdi)
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_sub5L
-	.p2align	4, 0x90
-_mcl_fp_sub5L:                          ## @mcl_fp_sub5L
-## BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	32(%rsi), %r8
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	24(%rsi), %r9
-	movq	16(%rsi), %r10
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %rsi
-	sbbq	16(%rdx), %r10
-	movq	%rax, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	sbbq	%r11, %r9
-	movq	%r9, 24(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 32(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	LBB76_2
-## BB#1:                                ## %carry
-	movq	32(%rcx), %r11
-	movq	24(%rcx), %r14
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rbx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%rsi, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 24(%rdi)
-	adcq	%r8, %r11
-	movq	%r11, 32(%rdi)
-LBB76_2:                                ## %nocarry
-	popq	%rbx
-	popq	%r14
-	retq
-
-	.globl	_mcl_fp_subNF5L
-	.p2align	4, 0x90
-_mcl_fp_subNF5L:                        ## @mcl_fp_subNF5L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	32(%rsi), %r13
-	movdqu	(%rdx), %xmm0
-	movdqu	16(%rdx), %xmm1
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r10
-	movdqu	(%rsi), %xmm2
-	movdqu	16(%rsi), %xmm3
-	pshufd	$78, %xmm3, %xmm4       ## xmm4 = xmm3[2,3,0,1]
-	movd	%xmm4, %r8
-	movd	%xmm1, %r11
-	movd	%xmm3, %r9
-	pshufd	$78, %xmm0, %xmm1       ## xmm1 = xmm0[2,3,0,1]
-	movd	%xmm1, %r14
-	pshufd	$78, %xmm2, %xmm1       ## xmm1 = xmm2[2,3,0,1]
-	movd	%xmm1, %r15
-	movd	%xmm0, %rbx
-	movd	%xmm2, %r12
-	subq	%rbx, %r12
-	sbbq	%r14, %r15
-	sbbq	%r11, %r9
-	sbbq	%r10, %r8
-	sbbq	32(%rdx), %r13
-	movq	%r13, %rdx
-	sarq	$63, %rdx
-	movq	%rdx, %rbx
-	shldq	$1, %r13, %rbx
-	movq	8(%rcx), %rsi
-	andq	%rbx, %rsi
-	andq	(%rcx), %rbx
-	movq	32(%rcx), %r10
-	andq	%rdx, %r10
-	movq	24(%rcx), %rax
-	andq	%rdx, %rax
-	rolq	%rdx
-	andq	16(%rcx), %rdx
-	addq	%r12, %rbx
-	movq	%rbx, (%rdi)
-	adcq	%r15, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r9, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r8, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r13, %r10
-	movq	%r10, 32(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fpDbl_add5L
-	.p2align	4, 0x90
-_mcl_fpDbl_add5L:                       ## @mcl_fpDbl_add5L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	72(%rdx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	64(%rdx), %r11
-	movq	56(%rdx), %r14
-	movq	48(%rdx), %r15
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %r13
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %rbp
-	adcq	32(%rdx), %r13
-	movq	40(%rdx), %r9
-	movq	%rbx, (%rdi)
-	movq	72(%rsi), %r8
-	movq	%rax, 8(%rdi)
-	movq	64(%rsi), %r10
-	movq	%r12, 16(%rdi)
-	movq	56(%rsi), %r12
-	movq	%rbp, 24(%rdi)
-	movq	48(%rsi), %rbp
-	movq	40(%rsi), %rbx
-	movq	%r13, 32(%rdi)
-	adcq	%r9, %rbx
-	adcq	%r15, %rbp
-	adcq	%r14, %r12
-	adcq	%r11, %r10
-	adcq	-8(%rsp), %r8           ## 8-byte Folded Reload
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	%rbx, %rax
-	subq	(%rcx), %rax
-	movq	%rbp, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r12, %r9
-	sbbq	16(%rcx), %r9
-	movq	%r10, %r11
-	sbbq	24(%rcx), %r11
-	movq	%r8, %r14
-	sbbq	32(%rcx), %r14
-	sbbq	$0, %rsi
-	andl	$1, %esi
-	cmovneq	%rbx, %rax
-	movq	%rax, 40(%rdi)
-	testb	%sil, %sil
-	cmovneq	%rbp, %rdx
-	movq	%rdx, 48(%rdi)
-	cmovneq	%r12, %r9
-	movq	%r9, 56(%rdi)
-	cmovneq	%r10, %r11
-	movq	%r11, 64(%rdi)
-	cmovneq	%r8, %r14
-	movq	%r14, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sub5L
-	.p2align	4, 0x90
-_mcl_fpDbl_sub5L:                       ## @mcl_fpDbl_sub5L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	72(%rdx), %r9
-	movq	64(%rdx), %r10
-	movq	56(%rdx), %r14
-	movq	16(%rsi), %r8
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%eax, %eax
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r8
-	movq	24(%rsi), %r12
-	sbbq	24(%rdx), %r12
-	movq	%r15, (%rdi)
-	movq	32(%rsi), %rbx
-	sbbq	32(%rdx), %rbx
-	movq	%r11, 8(%rdi)
-	movq	48(%rdx), %r15
-	movq	40(%rdx), %rdx
-	movq	%r8, 16(%rdi)
-	movq	72(%rsi), %r8
-	movq	%r12, 24(%rdi)
-	movq	64(%rsi), %r11
-	movq	%rbx, 32(%rdi)
-	movq	40(%rsi), %rbp
-	sbbq	%rdx, %rbp
-	movq	56(%rsi), %r12
-	movq	48(%rsi), %r13
-	sbbq	%r15, %r13
-	sbbq	%r14, %r12
-	sbbq	%r10, %r11
-	sbbq	%r9, %r8
-	movl	$0, %edx
-	sbbq	$0, %rdx
-	andl	$1, %edx
-	movq	(%rcx), %rsi
-	cmoveq	%rax, %rsi
-	testb	%dl, %dl
-	movq	16(%rcx), %rdx
-	cmoveq	%rax, %rdx
-	movq	8(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	movq	32(%rcx), %r9
-	cmoveq	%rax, %r9
-	cmovneq	24(%rcx), %rax
-	addq	%rbp, %rsi
-	movq	%rsi, 40(%rdi)
-	adcq	%r13, %rbx
-	movq	%rbx, 48(%rdi)
-	adcq	%r12, %rdx
-	movq	%rdx, 56(%rdi)
-	adcq	%r11, %rax
-	movq	%rax, 64(%rdi)
-	adcq	%r8, %r9
-	movq	%r9, 72(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mulUnitPre6L
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre6L:                   ## @mcl_fp_mulUnitPre6L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rcx, %rax
-	mulq	40(%rsi)
-	movq	%rdx, %r9
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	32(%rsi)
-	movq	%rdx, %r10
-	movq	%rax, %r11
-	movq	%rcx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, %r15
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	16(%rsi)
-	movq	%rdx, %r13
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %rbx
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rax, (%rdi)
-	addq	%rbp, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r12, %rbx
-	movq	%rbx, 16(%rdi)
-	adcq	%r14, %r13
-	movq	%r13, 24(%rdi)
-	adcq	%r11, %r15
-	movq	%r15, 32(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 40(%rdi)
-	adcq	$0, %r9
-	movq	%r9, 48(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_mulPre6L
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre6L:                    ## @mcl_fpDbl_mulPre6L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rsi, %r12
-	movq	%rdi, -16(%rsp)         ## 8-byte Spill
-	movq	(%r12), %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	(%rdx), %rsi
-	mulq	%rsi
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	24(%r12), %rbp
-	movq	%rbp, -104(%rsp)        ## 8-byte Spill
-	movq	32(%r12), %rbx
-	movq	40(%r12), %r11
-	movq	%rax, (%rdi)
-	movq	%r11, %rax
-	mulq	%rsi
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rsi
-	movq	%rdx, %rcx
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rsi
-	movq	%rax, %r9
-	movq	%rdx, %rdi
-	movq	16(%r12), %r8
-	movq	%r8, %rax
-	mulq	%rsi
-	movq	%rax, %r14
-	movq	%rdx, %rbp
-	movq	8(%r12), %r10
-	movq	%r10, %rax
-	mulq	%rsi
-	movq	%rdx, %r15
-	movq	%rax, %r13
-	addq	-88(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	%r14, %r15
-	adcq	%r9, %rbp
-	adcq	-112(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        ## 8-byte Spill
-	movq	-120(%rsp), %rsi        ## 8-byte Reload
-	adcq	$0, %rsi
-	movq	-64(%rsp), %r9          ## 8-byte Reload
-	movq	8(%r9), %rcx
-	movq	%r11, %rax
-	mulq	%rcx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rcx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %r11
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, %r14
-	movq	%r8, %rax
-	mulq	%rcx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %r8
-	movq	%r10, %rax
-	mulq	%rcx
-	movq	%rdx, %r10
-	movq	%rax, %rbx
-	movq	-72(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	addq	%r13, %rax
-	movq	-16(%rsp), %r13         ## 8-byte Reload
-	movq	%rax, 8(%r13)
-	adcq	%r15, %rbx
-	adcq	%rbp, %r8
-	adcq	%rdi, %r14
-	adcq	-112(%rsp), %r11        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	adcq	%rsi, %rax
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	%rdx, %rbx
-	adcq	%r10, %r8
-	adcq	-80(%rsp), %r14         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	adcq	-88(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	40(%r12), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	16(%r9), %rcx
-	mulq	%rcx
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	32(%r12), %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r10
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	24(%r12), %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r9
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	16(%r12), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %rbp
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	8(%r12), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %rdi
-	movq	%rdx, %r15
-	movq	(%r12), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	addq	%rbx, %rax
-	movq	%rax, 16(%r13)
-	adcq	%r8, %rdi
-	adcq	%r14, %rbp
-	adcq	%r11, %r9
-	adcq	-120(%rsp), %r10        ## 8-byte Folded Reload
-	movq	-72(%rsp), %rax         ## 8-byte Reload
-	adcq	%rsi, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%rdx, %rdi
-	adcq	%r15, %rbp
-	adcq	-56(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-48(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-40(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	adcq	-112(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	-64(%rsp), %rbx         ## 8-byte Reload
-	movq	24(%rbx), %rsi
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %r14
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, %r15
-	movq	-80(%rsp), %rax         ## 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, %r13
-	movq	-32(%rsp), %rax         ## 8-byte Reload
-	mulq	%rsi
-	movq	%rdx, %r8
-	movq	%rax, %r11
-	movq	-24(%rsp), %rax         ## 8-byte Reload
-	mulq	%rsi
-	addq	%rdi, %rax
-	movq	-16(%rsp), %rsi         ## 8-byte Reload
-	movq	%rax, 24(%rsi)
-	adcq	%rbp, %r11
-	adcq	%r9, %r13
-	adcq	%r10, %r15
-	adcq	-72(%rsp), %r14         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	adcq	%rcx, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%rdx, %r11
-	adcq	%r8, %r13
-	adcq	-112(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	adcq	-88(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	40(%r12), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	32(%rbx), %rdi
-	mulq	%rdi
-	movq	%rax, %r9
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	32(%r12), %rax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r10
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	24(%r12), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r8
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	16(%r12), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %rbx
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	(%r12), %rbp
-	movq	8(%r12), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	addq	%r11, %rax
-	movq	%rax, 32(%rsi)
-	adcq	%r13, %r12
-	adcq	%r15, %rbx
-	adcq	%r14, %r8
-	adcq	-120(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	%rcx, %r9
-	movq	-64(%rsp), %rax         ## 8-byte Reload
-	movq	40(%rax), %rcx
-	sbbq	%rsi, %rsi
-	movq	-80(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, %r15
-	movq	-8(%rsp), %rax          ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %r11
-	movq	%rbp, %rax
-	mulq	%rcx
-	movq	%rdx, %rbp
-	movq	%rax, %rdi
-	movq	-32(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, %r13
-	movq	%rax, %r14
-	movq	-40(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	andl	$1, %esi
-	addq	-48(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rbx        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r8         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-72(%rsp), %rsi         ## 8-byte Folded Reload
-	addq	%rdi, %r12
-	movq	-16(%rsp), %rcx         ## 8-byte Reload
-	movq	%r12, 40(%rcx)
-	adcq	%r11, %rbx
-	adcq	%rax, %r8
-	adcq	%r14, %r10
-	adcq	%r15, %r9
-	adcq	-24(%rsp), %rsi         ## 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rbp, %rbx
-	movq	%rbx, 48(%rcx)
-	adcq	-80(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, 56(%rcx)
-	adcq	%rdx, %r10
-	movq	%r10, 64(%rcx)
-	adcq	%r13, %r9
-	movq	%r9, 72(%rcx)
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	%rsi, 80(%rcx)
-	adcq	-64(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 88(%rcx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sqrPre6L
-	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre6L:                    ## @mcl_fpDbl_sqrPre6L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdi, -48(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %r8
-	movq	%r8, -120(%rsp)         ## 8-byte Spill
-	movq	24(%rsi), %r11
-	movq	%r11, -112(%rsp)        ## 8-byte Spill
-	movq	32(%rsi), %r12
-	movq	40(%rsi), %r9
-	movq	(%rsi), %rcx
-	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rdx, %rbp
-	movq	%rax, (%rdi)
-	movq	%r9, %rax
-	mulq	%rcx
-	movq	%rdx, %rbx
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%r12, %rax
-	mulq	%rcx
-	movq	%rdx, %r10
-	movq	%rax, %r13
-	movq	%r11, %rax
-	mulq	%rcx
-	movq	%rdx, %rdi
-	movq	%rax, %r15
-	movq	%r8, %rax
-	mulq	%rcx
-	movq	%rax, %r11
-	movq	%rdx, %r14
-	movq	8(%rsi), %r8
-	movq	%r8, %rax
-	mulq	%rcx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rax, %rcx
-	addq	%rcx, %rbp
-	adcq	%rdx, %r11
-	adcq	%r15, %r14
-	adcq	%r13, %rdi
-	adcq	-128(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	$0, %rbx
-	movq	%rbx, -72(%rsp)         ## 8-byte Spill
-	movq	%r9, %rax
-	mulq	%r8
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r13
-	movq	%r12, %rax
-	mulq	%r8
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, %r9
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	mulq	%r8
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, %r15
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	mulq	%r8
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, %r12
-	movq	%r8, %rax
-	mulq	%r8
-	movq	%rax, %rbx
-	addq	%rcx, %rbp
-	movq	-48(%rsp), %rax         ## 8-byte Reload
-	movq	%rbp, 8(%rax)
-	adcq	%r11, %rbx
-	adcq	%r14, %r12
-	adcq	%rdi, %r15
-	adcq	%r10, %r9
-	movq	%r13, %rax
-	adcq	-72(%rsp), %rax         ## 8-byte Folded Reload
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	addq	-56(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	%rdx, %r12
-	adcq	-120(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	-64(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	adcq	-128(%rsp), %r13        ## 8-byte Folded Reload
-	movq	40(%rsi), %rax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %rdi
-	mulq	%rdi
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	32(%rsi), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r11
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	24(%rsi), %rbp
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rax, %r8
-	movq	%r8, -24(%rsp)          ## 8-byte Spill
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r10
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	%rdi
-	movq	%rax, %rcx
-	addq	%rbx, %r14
-	movq	-48(%rsp), %rax         ## 8-byte Reload
-	movq	%r14, 16(%rax)
-	adcq	%r12, %r10
-	adcq	%r15, %rcx
-	adcq	%r8, %r9
-	adcq	-88(%rsp), %r11         ## 8-byte Folded Reload
-	movq	-96(%rsp), %r8          ## 8-byte Reload
-	adcq	%r13, %r8
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	addq	-104(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	-32(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	%rdx, %r9
-	adcq	-128(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %r12
-	movq	-64(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r15
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %r14
-	movq	-72(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	%rbp
-	movq	%rax, %r13
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	addq	%r10, %rbx
-	movq	-48(%rsp), %rax         ## 8-byte Reload
-	movq	%rbx, 24(%rax)
-	adcq	%rcx, %r14
-	adcq	-24(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	%r11, %r13
-	adcq	%r8, %r15
-	adcq	%rdi, %r12
-	sbbq	%rcx, %rcx
-	movq	8(%rsi), %rbp
-	movq	40(%rsi), %rbx
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %rdi
-	movq	%rdi, %rax
-	mulq	%rbx
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	32(%rsi), %r10
-	movq	%rbp, %rax
-	mulq	%r10
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%r10
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	andl	$1, %ecx
-	addq	-40(%rsp), %r14         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	24(%rsi), %rdi
-	movq	%rdi, %rax
-	mulq	%rbx
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%r10
-	movq	%rax, %rbp
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %rsi
-	movq	%rsi, %rax
-	mulq	%rbx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	%r10
-	movq	%rdx, %r11
-	movq	%rax, %rsi
-	movq	%rbx, %rax
-	mulq	%r10
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	%rax, %rdi
-	movq	%rbx, %rax
-	mulq	%rbx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbx
-	movq	%r10, %rax
-	mulq	%r10
-	movq	%rdx, %r8
-	addq	-8(%rsp), %r14          ## 8-byte Folded Reload
-	movq	-48(%rsp), %rdx         ## 8-byte Reload
-	movq	%r14, 32(%rdx)
-	adcq	-32(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	%r13, %rsi
-	adcq	%r15, %rbp
-	adcq	%r12, %rax
-	adcq	%rdi, %rcx
-	sbbq	%r10, %r10
-	andl	$1, %r10d
-	addq	-24(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	%r11, %rbp
-	adcq	-40(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	%r8, %rcx
-	movq	-16(%rsp), %r8          ## 8-byte Reload
-	adcq	%r8, %r10
-	addq	-72(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%r9, 40(%rdx)
-	adcq	-112(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	%rdi, %rcx
-	adcq	%rbx, %r10
-	sbbq	%rdi, %rdi
-	andl	$1, %edi
-	addq	-64(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, 48(%rdx)
-	adcq	-56(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, 56(%rdx)
-	adcq	-80(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 64(%rdx)
-	adcq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	%rcx, 72(%rdx)
-	adcq	%r8, %r10
-	movq	%r10, 80(%rdx)
-	adcq	-88(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, 88(%rdx)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mont6L
-	.p2align	4, 0x90
-_mcl_fp_mont6L:                         ## @mcl_fp_mont6L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$48, %rsp
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rdi, 40(%rsp)          ## 8-byte Spill
-	movq	40(%rsi), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	(%rdx), %rdi
-	mulq	%rdi
-	movq	%rax, %r10
-	movq	%rdx, %r11
-	movq	32(%rsi), %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r14
-	movq	%rdx, %r15
-	movq	24(%rsi), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r8
-	movq	%rdx, %rbx
-	movq	16(%rsi), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r9
-	movq	%rdx, %r12
-	movq	(%rsi), %rbp
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, %r13
-	movq	%rax, %rsi
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdx, %rdi
-	addq	%rsi, %rdi
-	adcq	%r9, %r13
-	adcq	%r8, %r12
-	adcq	%r14, %rbx
-	movq	%rbx, -88(%rsp)         ## 8-byte Spill
-	adcq	%r10, %r15
-	movq	%r15, -120(%rsp)        ## 8-byte Spill
-	adcq	$0, %r11
-	movq	%r11, -112(%rsp)        ## 8-byte Spill
-	movq	-8(%rcx), %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbx
-	imulq	%rdx, %rbx
-	movq	40(%rcx), %rdx
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rdx
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	32(%rcx), %rdx
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rdx
-	movq	%rax, %r9
-	movq	%rdx, %r14
-	movq	24(%rcx), %rdx
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rdx
-	movq	%rax, %r8
-	movq	%rdx, %r15
-	movq	16(%rcx), %rdx
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rdx
-	movq	%rax, %r10
-	movq	%rdx, %r11
-	movq	(%rcx), %rsi
-	movq	%rsi, -24(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -16(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rcx
-	movq	%rdx, %rbp
-	movq	%rax, %rcx
-	movq	%rbx, %rax
-	mulq	%rsi
-	movq	%rdx, %rbx
-	addq	%rcx, %rbx
-	adcq	%r10, %rbp
-	adcq	%r8, %r11
-	adcq	%r9, %r15
-	adcq	-104(%rsp), %r14        ## 8-byte Folded Reload
-	movq	-128(%rsp), %rcx        ## 8-byte Reload
-	adcq	$0, %rcx
-	addq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	%rdi, %rbx
-	adcq	%r13, %rbp
-	adcq	%r12, %r11
-	adcq	-88(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	8(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r12
-	movq	%rdx, %rdi
-	addq	%r10, %rdi
-	adcq	%r9, %rcx
-	adcq	-104(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r8          ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	-88(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rbx, %r12
-	adcq	%rbp, %rdi
-	adcq	%r11, %rcx
-	adcq	%r15, %r13
-	adcq	%r14, %r8
-	adcq	-128(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	adcq	%rsi, %rax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%r12, %rbx
-	imulq	-32(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r10
-	movq	%rbx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r11
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	addq	%r11, %r9
-	adcq	%r10, %rbp
-	adcq	-48(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r14         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r12, %rax
-	adcq	%rdi, %r9
-	adcq	%rcx, %rbp
-	adcq	%r13, %rsi
-	adcq	%r8, %r15
-	adcq	-120(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, -88(%rsp)           ## 8-byte Folded Spill
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r10
-	movq	%rcx, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r13
-	movq	%rdx, %rcx
-	addq	%r10, %rcx
-	adcq	%r8, %rbx
-	adcq	%rdi, %r12
-	adcq	-104(%rsp), %r11        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	-96(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%r9, %r13
-	adcq	%rbp, %rcx
-	adcq	%rsi, %rbx
-	adcq	%r15, %r12
-	adcq	%r14, %r11
-	adcq	-128(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	adcq	-88(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%r13, %rdi
-	imulq	-32(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	addq	%r10, %r8
-	adcq	%r15, %rbp
-	adcq	-48(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r14         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r13, %rax
-	adcq	%rcx, %r8
-	adcq	%rbx, %rbp
-	adcq	%r12, %rsi
-	adcq	%r11, %r9
-	adcq	-120(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, -88(%rsp)           ## 8-byte Folded Spill
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r10
-	movq	%rcx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r13
-	movq	%rdx, %rcx
-	addq	%r12, %rcx
-	adcq	%r10, %rbx
-	adcq	%rdi, %r15
-	adcq	-104(%rsp), %r11        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	-96(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%r8, %r13
-	adcq	%rbp, %rcx
-	adcq	%rsi, %rbx
-	adcq	%r9, %r15
-	adcq	%r14, %r11
-	adcq	-128(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	adcq	-88(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%r13, %rsi
-	imulq	-32(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r10
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %r8
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %r9
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	addq	%r9, %rsi
-	adcq	%r8, %r12
-	adcq	%r10, %r14
-	adcq	-104(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r13, %rax
-	adcq	%rcx, %rsi
-	adcq	%rbx, %r12
-	adcq	%r15, %r14
-	adcq	%r11, %rdi
-	adcq	-120(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, -88(%rsp)           ## 8-byte Folded Spill
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	32(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %r11
-	movq	%rcx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r8
-	movq	%rdx, %r13
-	addq	%r9, %r13
-	adcq	%r11, %r15
-	adcq	-48(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rcx        ## 8-byte Reload
-	adcq	-96(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rsi, %r8
-	adcq	%r12, %r13
-	adcq	%r14, %r15
-	adcq	%rdi, %r10
-	adcq	%rbp, %rbx
-	adcq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	adcq	-88(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%r8, %rcx
-	imulq	-32(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	addq	%r12, %r14
-	adcq	%rdi, %rbp
-	adcq	-48(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r9          ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r8, %rax
-	adcq	%r13, %r14
-	adcq	%r15, %rbp
-	adcq	%r10, %rsi
-	adcq	%rbx, %r11
-	adcq	-120(%rsp), %r9         ## 8-byte Folded Reload
-	movq	%r9, -120(%rsp)         ## 8-byte Spill
-	adcq	-112(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	-88(%rsp), %rdi         ## 8-byte Reload
-	adcq	$0, %rdi
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	40(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-72(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rbx
-	movq	%rcx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r15
-	movq	%rdx, %r8
-	addq	%r9, %r8
-	adcq	%rbx, %r10
-	adcq	-80(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r12         ## 8-byte Folded Reload
-	movq	-64(%rsp), %rax         ## 8-byte Reload
-	adcq	-112(%rsp), %rax        ## 8-byte Folded Reload
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r14, %r15
-	adcq	%rbp, %r8
-	adcq	%rsi, %r10
-	adcq	%r11, %r13
-	adcq	-120(%rsp), %r12        ## 8-byte Folded Reload
-	movq	%r12, -72(%rsp)         ## 8-byte Spill
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	adcq	%rdi, %rdx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	sbbq	%rcx, %rcx
-	movq	-32(%rsp), %rdi         ## 8-byte Reload
-	imulq	%r15, %rdi
-	movq	%rdi, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r11
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	andl	$1, %ecx
-	addq	%r14, %rax
-	adcq	%r11, %rdx
-	adcq	-40(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	$0, %rbp
-	addq	%r15, %r9
-	adcq	%r8, %rax
-	adcq	%r10, %rdx
-	adcq	%r13, %rbx
-	adcq	-72(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-64(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-56(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	$0, %rcx
-	movq	%rax, %r8
-	subq	-24(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	sbbq	-16(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%rbx, %r10
-	sbbq	-8(%rsp), %r10          ## 8-byte Folded Reload
-	movq	%rsi, %r11
-	sbbq	(%rsp), %r11            ## 8-byte Folded Reload
-	movq	%r12, %r14
-	sbbq	8(%rsp), %r14           ## 8-byte Folded Reload
-	movq	%rbp, %r15
-	sbbq	16(%rsp), %r15          ## 8-byte Folded Reload
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rsi, %r11
-	testb	%cl, %cl
-	cmovneq	%rax, %r8
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	movq	%r8, (%rax)
-	cmovneq	%rdx, %r9
-	movq	%r9, 8(%rax)
-	cmovneq	%rbx, %r10
-	movq	%r10, 16(%rax)
-	movq	%r11, 24(%rax)
-	cmovneq	%r12, %r14
-	movq	%r14, 32(%rax)
-	cmovneq	%rbp, %r15
-	movq	%r15, 40(%rax)
-	addq	$48, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montNF6L
-	.p2align	4, 0x90
-_mcl_fp_montNF6L:                       ## @mcl_fp_montNF6L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$40, %rsp
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rdi, 32(%rsp)          ## 8-byte Spill
-	movq	40(%rsi), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	(%rdx), %rdi
-	mulq	%rdi
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rdx, %r12
-	movq	32(%rsi), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r14
-	movq	%rdx, %r10
-	movq	24(%rsi), %rax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r15
-	movq	%rdx, %r9
-	movq	16(%rsi), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r11
-	movq	%rdx, %r8
-	movq	(%rsi), %rbx
-	movq	%rbx, 8(%rsp)           ## 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, %rbp
-	movq	%rax, %rsi
-	movq	%rbx, %rax
-	mulq	%rdi
-	movq	%rax, %r13
-	movq	%rdx, %rdi
-	addq	%rsi, %rdi
-	adcq	%r11, %rbp
-	adcq	%r15, %r8
-	adcq	%r14, %r9
-	adcq	-64(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%r10, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, %r12
-	movq	%r12, -112(%rsp)        ## 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	%r13, %rbx
-	imulq	%rax, %rbx
-	movq	40(%rcx), %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rdx
-	movq	%rax, %r14
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	32(%rcx), %rdx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rdx
-	movq	%rax, %r15
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	24(%rcx), %rdx
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rdx
-	movq	%rax, %r12
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	16(%rcx), %rdx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rdx
-	movq	%rax, %r10
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	movq	(%rcx), %rsi
-	movq	%rsi, -32(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	%rcx
-	movq	%rdx, %r11
-	movq	%rax, %rcx
-	movq	%rbx, %rax
-	mulq	%rsi
-	addq	%r13, %rax
-	adcq	%rdi, %rcx
-	adcq	%rbp, %r10
-	adcq	%r8, %r12
-	adcq	%r9, %r15
-	adcq	-128(%rsp), %r14        ## 8-byte Folded Reload
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdx, %rcx
-	adcq	%r11, %r10
-	adcq	-8(%rsp), %r12          ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%r14, -128(%rsp)        ## 8-byte Spill
-	adcq	-120(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	-72(%rsp), %rax         ## 8-byte Reload
-	movq	8(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r11
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rax, %rdi
-	movq	%rdx, %rbp
-	addq	%r11, %rbp
-	adcq	%r14, %rbx
-	adcq	-104(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	$0, %r8
-	addq	%rcx, %rdi
-	adcq	%r10, %rbp
-	adcq	%r12, %rbx
-	adcq	%r15, %rsi
-	adcq	-128(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	$0, %r8
-	movq	%rdi, %r11
-	imulq	-48(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%r11, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%r11, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %r15
-	movq	%r11, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, %rcx
-	movq	%r11, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, %r10
-	movq	%r11, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %r14
-	movq	%r11, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	addq	%rdi, %rax
-	adcq	%rbp, %r14
-	adcq	%rbx, %r10
-	adcq	%rsi, %rcx
-	adcq	%r13, %r15
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	adcq	%r9, %rax
-	adcq	$0, %r8
-	addq	%rdx, %r14
-	adcq	%r12, %r10
-	adcq	-104(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r15        ## 8-byte Folded Reload
-	movq	%r15, -120(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	adcq	-128(%rsp), %r8         ## 8-byte Folded Reload
-	movq	-72(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rax, %rbp
-	movq	%rdx, %rbx
-	addq	%r9, %rbx
-	adcq	-8(%rsp), %rsi          ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	$0, %r13
-	addq	%r14, %rbp
-	adcq	%r10, %rbx
-	adcq	%rcx, %rsi
-	adcq	-120(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	%r8, %r15
-	adcq	$0, %r13
-	movq	%rbp, %rcx
-	imulq	-48(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %r10
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	addq	%rbp, %rax
-	adcq	%rbx, %rdi
-	adcq	%rsi, %r14
-	adcq	%r12, %r10
-	adcq	%r11, %r9
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	adcq	%r15, %rax
-	adcq	$0, %r13
-	addq	%rdx, %rdi
-	adcq	%r8, %r14
-	adcq	-104(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	adcq	-120(%rsp), %r13        ## 8-byte Folded Reload
-	movq	-72(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rbp
-	movq	%rbp, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rax, %r8
-	movq	%rdx, %rbp
-	addq	%r12, %rbp
-	adcq	-104(%rsp), %rbx        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	%rdi, %r8
-	adcq	%r14, %rbp
-	adcq	%r10, %rbx
-	adcq	%r9, %rsi
-	adcq	-112(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	%r13, %r11
-	adcq	$0, %r15
-	movq	%r8, %r14
-	imulq	-48(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%r14, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r9
-	movq	%r14, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, %r13
-	movq	%r14, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, %r10
-	movq	%r14, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %r12
-	movq	%r14, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, %rdi
-	movq	%r14, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	addq	%r8, %rax
-	adcq	%rbp, %rdi
-	adcq	%rbx, %r12
-	adcq	%rsi, %r10
-	adcq	%rcx, %r13
-	adcq	%r11, %r9
-	adcq	$0, %r15
-	addq	%rdx, %rdi
-	adcq	-104(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r13        ## 8-byte Folded Reload
-	movq	%r13, -120(%rsp)        ## 8-byte Spill
-	adcq	-112(%rsp), %r9         ## 8-byte Folded Reload
-	movq	%r9, -112(%rsp)         ## 8-byte Spill
-	adcq	-128(%rsp), %r15        ## 8-byte Folded Reload
-	movq	-72(%rsp), %rax         ## 8-byte Reload
-	movq	32(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r13
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rax, %r11
-	movq	%rdx, %rbp
-	addq	%r13, %rbp
-	adcq	-8(%rsp), %rbx          ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	%rdi, %r11
-	adcq	%r12, %rbp
-	adcq	%r10, %rbx
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r8         ## 8-byte Folded Reload
-	adcq	%r15, %r9
-	adcq	$0, %r14
-	movq	%r11, %rcx
-	imulq	-48(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, %r10
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r15
-	movq	%rcx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	addq	%r11, %rax
-	adcq	%rbp, %rdi
-	adcq	%rbx, %r15
-	adcq	%rsi, %r10
-	adcq	%r8, %r12
-	movq	-112(%rsp), %rcx        ## 8-byte Reload
-	adcq	%r9, %rcx
-	adcq	$0, %r14
-	addq	%rdx, %rdi
-	adcq	%r13, %r15
-	adcq	-128(%rsp), %r10        ## 8-byte Folded Reload
-	movq	%r10, -128(%rsp)        ## 8-byte Spill
-	adcq	-120(%rsp), %r12        ## 8-byte Folded Reload
-	movq	%r12, -120(%rsp)        ## 8-byte Spill
-	adcq	-104(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %r14         ## 8-byte Folded Reload
-	movq	-72(%rsp), %rax         ## 8-byte Reload
-	movq	40(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-80(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-88(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %rsi
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rax, %r9
-	movq	%rdx, %r8
-	addq	%rsi, %r8
-	adcq	%rbp, %r10
-	adcq	-88(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	$0, %rbx
-	addq	%rdi, %r9
-	adcq	%r15, %r8
-	adcq	-128(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	%r14, %r11
-	adcq	$0, %rbx
-	movq	-48(%rsp), %rcx         ## 8-byte Reload
-	imulq	%r9, %rcx
-	movq	%rcx, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	%rax, %rsi
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	movq	%rcx, %r15
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rax, %rcx
-	movq	%r15, %rax
-	movq	24(%rsp), %r15          ## 8-byte Reload
-	mulq	%r15
-	addq	%r9, %r14
-	adcq	%r8, %rax
-	adcq	%r10, %rcx
-	adcq	%r13, %rbp
-	adcq	%r12, %rdi
-	adcq	%r11, %rsi
-	adcq	$0, %rbx
-	addq	-88(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	%rdx, %rcx
-	adcq	-56(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-48(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rax, %r14
-	subq	-32(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%rcx, %r8
-	sbbq	%r15, %r8
-	movq	%rbp, %r9
-	sbbq	-40(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%rdi, %r10
-	sbbq	-24(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%rsi, %r11
-	sbbq	-16(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%rbx, %r15
-	sbbq	-64(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%r15, %rdx
-	sarq	$63, %rdx
-	cmovsq	%rax, %r14
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	movq	%r14, (%rax)
-	cmovsq	%rcx, %r8
-	movq	%r8, 8(%rax)
-	cmovsq	%rbp, %r9
-	movq	%r9, 16(%rax)
-	cmovsq	%rdi, %r10
-	movq	%r10, 24(%rax)
-	cmovsq	%rsi, %r11
-	movq	%r11, 32(%rax)
-	cmovsq	%rbx, %r15
-	movq	%r15, 40(%rax)
-	addq	$40, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montRed6L
-	.p2align	4, 0x90
-_mcl_fp_montRed6L:                      ## @mcl_fp_montRed6L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$24, %rsp
-	movq	%rdx, %rbp
-	movq	%rdi, 16(%rsp)          ## 8-byte Spill
-	movq	-8(%rbp), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %r10
-	movq	%r10, %rdi
-	imulq	%rax, %rdi
-	movq	40(%rbp), %rcx
-	movq	%rcx, -24(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rax, %r14
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	32(%rbp), %rcx
-	movq	%rcx, -40(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rax, %r15
-	movq	%rdx, %r9
-	movq	24(%rbp), %rcx
-	movq	%rcx, -48(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rax, %r12
-	movq	%rdx, %r11
-	movq	16(%rbp), %rcx
-	movq	%rcx, -56(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rax, %rcx
-	movq	%rdx, %r13
-	movq	(%rbp), %rbx
-	movq	8(%rbp), %rdx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rdx, %r8
-	movq	%rax, %rbp
-	movq	%rdi, %rax
-	mulq	%rbx
-	movq	%rbx, %rdi
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	%rdx, %rbx
-	addq	%rbp, %rbx
-	adcq	%rcx, %r8
-	adcq	%r12, %r13
-	adcq	%r15, %r11
-	adcq	%r14, %r9
-	movq	-128(%rsp), %rcx        ## 8-byte Reload
-	adcq	$0, %rcx
-	addq	%r10, %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %r8
-	adcq	24(%rsi), %r13
-	adcq	32(%rsi), %r11
-	adcq	40(%rsi), %r9
-	movq	%r9, -120(%rsp)         ## 8-byte Spill
-	adcq	48(%rsi), %rcx
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	movq	88(%rsi), %rax
-	movq	80(%rsi), %rcx
-	movq	72(%rsi), %rdx
-	movq	64(%rsi), %rbp
-	movq	56(%rsi), %rsi
-	adcq	$0, %rsi
-	movq	%rsi, -104(%rsp)        ## 8-byte Spill
-	adcq	$0, %rbp
-	movq	%rbp, -72(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, -64(%rsp)         ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	movq	%rbx, %rsi
-	imulq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r10
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r9
-	movq	%rsi, %rax
-	mulq	%rdi
-	movq	%rdx, %rdi
-	addq	%r9, %rdi
-	adcq	%r10, %rbp
-	adcq	8(%rsp), %rcx           ## 8-byte Folded Reload
-	adcq	(%rsp), %r12            ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r15         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%rbx, %rax
-	adcq	%r8, %rdi
-	adcq	%r13, %rbp
-	adcq	%r11, %rcx
-	adcq	-120(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	adcq	$0, -72(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -96(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -88(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %r14
-	movq	%rdi, %rbx
-	imulq	-80(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r9
-	movq	%rbx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r11
-	movq	%rbx, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	addq	%r11, %r10
-	adcq	%r9, %r8
-	adcq	(%rsp), %rsi            ## 8-byte Folded Reload
-	adcq	-32(%rsp), %r13         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rbx        ## 8-byte Reload
-	adcq	-104(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%rdi, %rax
-	adcq	%rbp, %r10
-	adcq	%rcx, %r8
-	adcq	%r12, %rsi
-	adcq	%r15, %r13
-	adcq	-112(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	%rbx, -120(%rsp)        ## 8-byte Spill
-	adcq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, -96(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -88(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %r14
-	movq	%r10, %rcx
-	imulq	-80(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, %rax
-	movq	-24(%rsp), %rbp         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rbx
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	addq	%r9, %rcx
-	adcq	%rbx, %rdi
-	adcq	-32(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r11         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r10, %rax
-	adcq	%r8, %rcx
-	adcq	%rsi, %rdi
-	adcq	%r13, %r12
-	adcq	-120(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	movq	-88(%rsp), %r8          ## 8-byte Reload
-	adcq	$0, %r8
-	adcq	$0, %r14
-	movq	%rcx, %rsi
-	imulq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	%rbp
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %r10
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	addq	%r10, %rbx
-	adcq	%rbp, %r9
-	adcq	-104(%rsp), %r13        ## 8-byte Folded Reload
-	movq	-120(%rsp), %rbp        ## 8-byte Reload
-	adcq	-72(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rsi        ## 8-byte Reload
-	adcq	-88(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	-96(%rsp), %rdx         ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%rcx, %rax
-	adcq	%rdi, %rbx
-	adcq	%r12, %r9
-	adcq	%r15, %r13
-	adcq	%r11, %rbp
-	movq	%rbp, -120(%rsp)        ## 8-byte Spill
-	adcq	-112(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	%rsi, -128(%rsp)        ## 8-byte Spill
-	adcq	-64(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, -88(%rsp)          ## 8-byte Spill
-	adcq	$0, %r14
-	movq	-80(%rsp), %r8          ## 8-byte Reload
-	imulq	%rbx, %r8
-	movq	%r8, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%r8, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%r8, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%r8, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r10
-	movq	%r8, %rax
-	movq	-16(%rsp), %r12         ## 8-byte Reload
-	mulq	%r12
-	movq	%rdx, %rcx
-	movq	%rax, %r15
-	movq	%r8, %rax
-	movq	-8(%rsp), %r8           ## 8-byte Reload
-	mulq	%r8
-	addq	%r15, %rdx
-	adcq	%r10, %rcx
-	adcq	-112(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-64(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	$0, %rbp
-	addq	%rbx, %rax
-	adcq	%r9, %rdx
-	adcq	%r13, %rcx
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	$0, %r14
-	movq	%rdx, %rax
-	subq	%r8, %rax
-	movq	%rcx, %rbx
-	sbbq	%r12, %rbx
-	movq	%rsi, %r8
-	sbbq	-56(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%rdi, %r9
-	sbbq	-48(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%r11, %r10
-	sbbq	-40(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%rbp, %r15
-	sbbq	-24(%rsp), %r15         ## 8-byte Folded Reload
-	sbbq	$0, %r14
-	andl	$1, %r14d
-	cmovneq	%rbp, %r15
-	testb	%r14b, %r14b
-	cmovneq	%rdx, %rax
-	movq	16(%rsp), %rdx          ## 8-byte Reload
-	movq	%rax, (%rdx)
-	cmovneq	%rcx, %rbx
-	movq	%rbx, 8(%rdx)
-	cmovneq	%rsi, %r8
-	movq	%r8, 16(%rdx)
-	cmovneq	%rdi, %r9
-	movq	%r9, 24(%rdx)
-	cmovneq	%r11, %r10
-	movq	%r10, 32(%rdx)
-	movq	%r15, 40(%rdx)
-	addq	$24, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_addPre6L
-	.p2align	4, 0x90
-_mcl_fp_addPre6L:                       ## @mcl_fp_addPre6L
-## BB#0:
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	40(%rsi), %r11
-	movq	32(%rdx), %r9
-	movq	24(%rdx), %r10
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %r14
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %rbx
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r10, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r9, %r14
-	movq	%r14, 32(%rdi)
-	adcq	%r8, %r11
-	movq	%r11, 40(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r14
-	retq
-
-	.globl	_mcl_fp_subPre6L
-	.p2align	4, 0x90
-_mcl_fp_subPre6L:                       ## @mcl_fp_subPre6L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	40(%rsi), %r9
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %r11
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %rsi
-	movq	24(%rdx), %r14
-	movq	32(%rdx), %r15
-	sbbq	16(%rdx), %rcx
-	movq	%rbx, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r14, %r11
-	movq	%r11, 24(%rdi)
-	sbbq	%r15, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%r8, %r9
-	movq	%r9, 40(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_shr1_6L
-	.p2align	4, 0x90
-_mcl_fp_shr1_6L:                        ## @mcl_fp_shr1_6L
-## BB#0:
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %rdx
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rcx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rcx
-	movq	%rcx, (%rdi)
-	shrdq	$1, %rax, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rdx, %rax
-	movq	%rax, 16(%rdi)
-	shrdq	$1, %r9, %rdx
-	movq	%rdx, 24(%rdi)
-	shrdq	$1, %r8, %r9
-	movq	%r9, 32(%rdi)
-	shrq	%r8
-	movq	%r8, 40(%rdi)
-	retq
-
-	.globl	_mcl_fp_add6L
-	.p2align	4, 0x90
-_mcl_fp_add6L:                          ## @mcl_fp_add6L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%rbx
-	movq	40(%rdx), %r14
-	movq	40(%rsi), %r8
-	movq	32(%rdx), %r15
-	movq	24(%rdx), %rbx
-	movq	24(%rsi), %r10
-	movq	32(%rsi), %r9
-	movq	16(%rdx), %r11
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rdx
-	adcq	16(%rsi), %r11
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	adcq	%rbx, %r10
-	movq	%r10, 24(%rdi)
-	adcq	%r15, %r9
-	movq	%r9, 32(%rdi)
-	adcq	%r14, %r8
-	movq	%r8, 40(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %rax
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r11
-	sbbq	24(%rcx), %r10
-	sbbq	32(%rcx), %r9
-	sbbq	40(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB89_2
-## BB#1:                                ## %nocarry
-	movq	%rax, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	movq	%r10, 24(%rdi)
-	movq	%r9, 32(%rdi)
-	movq	%r8, 40(%rdi)
-LBB89_2:                                ## %carry
-	popq	%rbx
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_addNF6L
-	.p2align	4, 0x90
-_mcl_fp_addNF6L:                        ## @mcl_fp_addNF6L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rdx), %r8
-	movq	32(%rdx), %r9
-	movq	24(%rdx), %r10
-	movq	16(%rdx), %r11
-	movq	(%rdx), %r15
-	movq	8(%rdx), %r14
-	addq	(%rsi), %r15
-	adcq	8(%rsi), %r14
-	adcq	16(%rsi), %r11
-	adcq	24(%rsi), %r10
-	adcq	32(%rsi), %r9
-	adcq	40(%rsi), %r8
-	movq	%r15, %rsi
-	subq	(%rcx), %rsi
-	movq	%r14, %rbx
-	sbbq	8(%rcx), %rbx
-	movq	%r11, %rdx
-	sbbq	16(%rcx), %rdx
-	movq	%r10, %r13
-	sbbq	24(%rcx), %r13
-	movq	%r9, %r12
-	sbbq	32(%rcx), %r12
-	movq	%r8, %rax
-	sbbq	40(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r15, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r14, %rbx
-	movq	%rbx, 8(%rdi)
-	cmovsq	%r11, %rdx
-	movq	%rdx, 16(%rdi)
-	cmovsq	%r10, %r13
-	movq	%r13, 24(%rdi)
-	cmovsq	%r9, %r12
-	movq	%r12, 32(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 40(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_sub6L
-	.p2align	4, 0x90
-_mcl_fp_sub6L:                          ## @mcl_fp_sub6L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	40(%rdx), %r14
-	movq	40(%rsi), %r8
-	movq	32(%rsi), %r9
-	movq	24(%rsi), %r10
-	movq	16(%rsi), %r11
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %rsi
-	movq	24(%rdx), %r15
-	movq	32(%rdx), %r12
-	sbbq	16(%rdx), %r11
-	movq	%rax, (%rdi)
-	movq	%rsi, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	sbbq	%r15, %r10
-	movq	%r10, 24(%rdi)
-	sbbq	%r12, %r9
-	movq	%r9, 32(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 40(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	LBB91_2
-## BB#1:                                ## %carry
-	movq	40(%rcx), %r14
-	movq	32(%rcx), %r15
-	movq	24(%rcx), %r12
-	movq	8(%rcx), %rbx
-	movq	16(%rcx), %rdx
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%rsi, %rbx
-	movq	%rbx, 8(%rdi)
-	adcq	%r11, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r10, %r12
-	movq	%r12, 24(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 32(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 40(%rdi)
-LBB91_2:                                ## %nocarry
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_subNF6L
-	.p2align	4, 0x90
-_mcl_fp_subNF6L:                        ## @mcl_fp_subNF6L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movdqu	(%rdx), %xmm0
-	movdqu	16(%rdx), %xmm1
-	movdqu	32(%rdx), %xmm2
-	pshufd	$78, %xmm2, %xmm3       ## xmm3 = xmm2[2,3,0,1]
-	movd	%xmm3, %r11
-	movdqu	(%rsi), %xmm3
-	movdqu	16(%rsi), %xmm4
-	movdqu	32(%rsi), %xmm5
-	pshufd	$78, %xmm5, %xmm6       ## xmm6 = xmm5[2,3,0,1]
-	movd	%xmm6, %rax
-	movd	%xmm2, %r14
-	movd	%xmm5, %r8
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r15
-	pshufd	$78, %xmm4, %xmm2       ## xmm2 = xmm4[2,3,0,1]
-	movd	%xmm2, %r9
-	movd	%xmm1, %r12
-	movd	%xmm4, %r10
-	pshufd	$78, %xmm0, %xmm1       ## xmm1 = xmm0[2,3,0,1]
-	movd	%xmm1, %rbx
-	pshufd	$78, %xmm3, %xmm1       ## xmm1 = xmm3[2,3,0,1]
-	movd	%xmm1, %r13
-	movd	%xmm0, %rsi
-	movd	%xmm3, %rbp
-	subq	%rsi, %rbp
-	sbbq	%rbx, %r13
-	sbbq	%r12, %r10
-	sbbq	%r15, %r9
-	sbbq	%r14, %r8
-	sbbq	%r11, %rax
-	movq	%rax, %rsi
-	sarq	$63, %rsi
-	movq	%rsi, %rbx
-	shldq	$1, %rax, %rbx
-	andq	(%rcx), %rbx
-	movq	40(%rcx), %r11
-	andq	%rsi, %r11
-	movq	32(%rcx), %r14
-	andq	%rsi, %r14
-	movq	24(%rcx), %r15
-	andq	%rsi, %r15
-	movq	16(%rcx), %rdx
-	andq	%rsi, %rdx
-	rolq	%rsi
-	andq	8(%rcx), %rsi
-	addq	%rbp, %rbx
-	movq	%rbx, (%rdi)
-	adcq	%r13, %rsi
-	movq	%rsi, 8(%rdi)
-	adcq	%r10, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 24(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 32(%rdi)
-	adcq	%rax, %r11
-	movq	%r11, 40(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_add6L
-	.p2align	4, 0x90
-_mcl_fpDbl_add6L:                       ## @mcl_fpDbl_add6L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	88(%rdx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	80(%rdx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	72(%rdx), %r14
-	movq	64(%rdx), %r15
-	movq	24(%rsi), %rbp
-	movq	32(%rsi), %r13
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %rbp
-	adcq	32(%rdx), %r13
-	movq	56(%rdx), %r11
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %rdx
-	movq	%rbx, (%rdi)
-	movq	88(%rsi), %r8
-	movq	%rax, 8(%rdi)
-	movq	80(%rsi), %r10
-	movq	%r12, 16(%rdi)
-	movq	72(%rsi), %r12
-	movq	%rbp, 24(%rdi)
-	movq	40(%rsi), %rax
-	adcq	%rdx, %rax
-	movq	64(%rsi), %rdx
-	movq	%r13, 32(%rdi)
-	movq	56(%rsi), %r13
-	movq	48(%rsi), %rbp
-	adcq	%r9, %rbp
-	movq	%rax, 40(%rdi)
-	adcq	%r11, %r13
-	adcq	%r15, %rdx
-	adcq	%r14, %r12
-	adcq	-16(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-8(%rsp), %r8           ## 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rbp, %rsi
-	subq	(%rcx), %rsi
-	movq	%r13, %rbx
-	sbbq	8(%rcx), %rbx
-	movq	%rdx, %r9
-	sbbq	16(%rcx), %r9
-	movq	%r12, %r11
-	sbbq	24(%rcx), %r11
-	movq	%r10, %r14
-	sbbq	32(%rcx), %r14
-	movq	%r8, %r15
-	sbbq	40(%rcx), %r15
-	sbbq	$0, %rax
-	andl	$1, %eax
-	cmovneq	%rbp, %rsi
-	movq	%rsi, 48(%rdi)
-	testb	%al, %al
-	cmovneq	%r13, %rbx
-	movq	%rbx, 56(%rdi)
-	cmovneq	%rdx, %r9
-	movq	%r9, 64(%rdi)
-	cmovneq	%r12, %r11
-	movq	%r11, 72(%rdi)
-	cmovneq	%r10, %r14
-	movq	%r14, 80(%rdi)
-	cmovneq	%r8, %r15
-	movq	%r15, 88(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sub6L
-	.p2align	4, 0x90
-_mcl_fpDbl_sub6L:                       ## @mcl_fpDbl_sub6L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	88(%rdx), %r9
-	movq	80(%rdx), %r10
-	movq	72(%rdx), %r14
-	movq	16(%rsi), %r8
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%eax, %eax
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %r8
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	64(%rdx), %r13
-	movq	%r15, (%rdi)
-	movq	56(%rdx), %rbp
-	movq	%r11, 8(%rdi)
-	movq	48(%rdx), %r15
-	movq	40(%rdx), %rdx
-	movq	%r8, 16(%rdi)
-	movq	88(%rsi), %r8
-	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%rdx, %rbx
-	movq	80(%rsi), %r11
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %rdx
-	sbbq	%r15, %rdx
-	movq	72(%rsi), %r15
-	movq	%rbx, 40(%rdi)
-	movq	64(%rsi), %r12
-	movq	56(%rsi), %rsi
-	sbbq	%rbp, %rsi
-	sbbq	%r13, %r12
-	sbbq	%r14, %r15
-	sbbq	%r10, %r11
-	sbbq	%r9, %r8
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%rcx), %r14
-	cmoveq	%rax, %r14
-	testb	%bpl, %bpl
-	movq	16(%rcx), %r9
-	cmoveq	%rax, %r9
-	movq	8(%rcx), %rbp
-	cmoveq	%rax, %rbp
-	movq	40(%rcx), %r10
-	cmoveq	%rax, %r10
-	movq	32(%rcx), %rbx
-	cmoveq	%rax, %rbx
-	cmovneq	24(%rcx), %rax
-	addq	%rdx, %r14
-	movq	%r14, 48(%rdi)
-	adcq	%rsi, %rbp
-	movq	%rbp, 56(%rdi)
-	adcq	%r12, %r9
-	movq	%r9, 64(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 72(%rdi)
-	adcq	%r11, %rbx
-	movq	%rbx, 80(%rdi)
-	adcq	%r8, %r10
-	movq	%r10, 88(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mulUnitPre7L
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre7L:                   ## @mcl_fp_mulUnitPre7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rcx, %rax
-	mulq	48(%rsi)
-	movq	%rdx, %r10
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	40(%rsi)
-	movq	%rdx, %r11
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	32(%rsi)
-	movq	%rdx, %r15
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, %r13
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	16(%rsi)
-	movq	%rdx, %rbx
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	8(%rsi)
-	movq	%rdx, %r8
-	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rax, (%rdi)
-	addq	%r9, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%rbp, %r8
-	movq	%r8, 16(%rdi)
-	adcq	%r12, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%r14, %r13
-	movq	%r13, 32(%rdi)
-	adcq	-16(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%r15, 40(%rdi)
-	adcq	-8(%rsp), %r11          ## 8-byte Folded Reload
-	movq	%r11, 48(%rdi)
-	adcq	$0, %r10
-	movq	%r10, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_mulPre7L
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre7L:                    ## @mcl_fpDbl_mulPre7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$16, %rsp
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rsi, %r9
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	(%r9), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	(%rdx), %rsi
-	mulq	%rsi
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	32(%r9), %rbp
-	movq	%rbp, -88(%rsp)         ## 8-byte Spill
-	movq	40(%r9), %rcx
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	movq	48(%r9), %r14
-	movq	%rax, (%rdi)
-	movq	%r14, %rax
-	mulq	%rsi
-	movq	%rdx, %rdi
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	%rsi
-	movq	%rdx, %rcx
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rsi
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdx, %rbp
-	movq	24(%r9), %r8
-	movq	%r8, %rax
-	mulq	%rsi
-	movq	%rax, %r15
-	movq	%rdx, %rbx
-	movq	16(%r9), %rax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	mulq	%rsi
-	movq	%rax, %r13
-	movq	%rdx, %r12
-	movq	8(%r9), %r11
-	movq	%r11, %rax
-	mulq	%rsi
-	movq	%rdx, %rsi
-	movq	%rax, %r10
-	addq	-120(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	%r13, %rsi
-	adcq	%r15, %r12
-	adcq	-104(%rsp), %rbx        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, -72(%rsp)         ## 8-byte Spill
-	adcq	-80(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -104(%rsp)        ## 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -96(%rsp)         ## 8-byte Spill
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	8(%rax), %rcx
-	movq	%r14, %rax
-	mulq	%rcx
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	-128(%rsp), %rax        ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r13
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r15
-	movq	%r8, %rax
-	mulq	%rcx
-	movq	%rdx, %r8
-	movq	%rax, %r14
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, %rbp
-	movq	%r11, %rax
-	mulq	%rcx
-	movq	%rdx, %r11
-	movq	%rax, %rdi
-	movq	-64(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	addq	%r10, %rax
-	movq	-8(%rsp), %r10          ## 8-byte Reload
-	movq	%rax, 8(%r10)
-	adcq	%rsi, %rdi
-	adcq	%r12, %rbp
-	adcq	%rbx, %r14
-	adcq	-72(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r13        ## 8-byte Folded Reload
-	movq	-80(%rsp), %rax         ## 8-byte Reload
-	adcq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	%rdx, %rdi
-	adcq	%r11, %rbp
-	adcq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	%r8, %r15
-	adcq	-88(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	48(%r9), %rdx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rcx
-	movq	%rdx, %rax
-	mulq	%rcx
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	40(%r9), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	movq	32(%r9), %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r12
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	24(%r9), %rax
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %rbx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	16(%r9), %rax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r8
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	8(%r9), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r11
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	movq	(%r9), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	addq	%rdi, %rax
-	movq	%rax, 16(%r10)
-	adcq	%rbp, %r11
-	adcq	%r14, %r8
-	adcq	%r15, %rbx
-	adcq	%r13, %r12
-	movq	-128(%rsp), %rdi        ## 8-byte Reload
-	adcq	-80(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	adcq	%rsi, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%rdx, %r11
-	adcq	(%rsp), %r8             ## 8-byte Folded Reload
-	adcq	8(%rsp), %rbx           ## 8-byte Folded Reload
-	adcq	-48(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-40(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, -128(%rsp)        ## 8-byte Spill
-	adcq	-32(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	adcq	-104(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rbp
-	movq	-64(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %r13
-	movq	-72(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, %r15
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, %rdi
-	movq	-24(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, %r14
-	movq	%rax, %r10
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	addq	%r11, %rax
-	movq	-8(%rsp), %rsi          ## 8-byte Reload
-	movq	%rax, 24(%rsi)
-	adcq	%r8, %r10
-	adcq	%rbx, %rdi
-	adcq	%r12, %r15
-	adcq	-128(%rsp), %r13        ## 8-byte Folded Reload
-	movq	-64(%rsp), %rbp         ## 8-byte Reload
-	adcq	-120(%rsp), %rbp        ## 8-byte Folded Reload
-	movq	-80(%rsp), %rax         ## 8-byte Reload
-	adcq	%rcx, %rax
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	%rdx, %r10
-	adcq	%r14, %rdi
-	adcq	-112(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, -64(%rsp)         ## 8-byte Spill
-	adcq	-88(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	adcq	-104(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	48(%r9), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	-56(%rsp), %rbx         ## 8-byte Reload
-	movq	32(%rbx), %rcx
-	mulq	%rcx
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	40(%r9), %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	32(%r9), %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r12
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	24(%r9), %rax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %rbp
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%r9), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r14
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	movq	8(%r9), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	movq	%rax, %r11
-	movq	%rdx, %r8
-	movq	(%r9), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	mulq	%rcx
-	addq	%r10, %rax
-	movq	-8(%rsp), %rcx          ## 8-byte Reload
-	movq	%rax, 32(%rcx)
-	adcq	%rdi, %r11
-	adcq	%r15, %r14
-	adcq	%r13, %rbp
-	adcq	-64(%rsp), %r12         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rcx        ## 8-byte Reload
-	adcq	-80(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	adcq	%rsi, %rax
-	sbbq	%r13, %r13
-	andl	$1, %r13d
-	addq	%rdx, %r11
-	adcq	%r8, %r14
-	adcq	(%rsp), %rbp            ## 8-byte Folded Reload
-	adcq	8(%rsp), %r12           ## 8-byte Folded Reload
-	adcq	-48(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	adcq	-40(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	adcq	-72(%rsp), %r13         ## 8-byte Folded Reload
-	movq	40(%rbx), %rcx
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %rdi
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, %r10
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r15
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbx
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, %rsi
-	movq	-32(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	%rax, %r8
-	movq	-24(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	addq	%r11, %rax
-	movq	-8(%rsp), %rcx          ## 8-byte Reload
-	movq	%rax, 40(%rcx)
-	adcq	%r14, %r8
-	adcq	%rbp, %rsi
-	adcq	%r12, %rbx
-	adcq	-128(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	%r13, %rdi
-	movq	-56(%rsp), %rax         ## 8-byte Reload
-	movq	48(%rax), %r11
-	sbbq	%rcx, %rcx
-	movq	%r11, %rax
-	mulq	48(%r9)
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%r11, %rax
-	mulq	40(%r9)
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	%r11, %rax
-	mulq	32(%r9)
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r13
-	movq	%r11, %rax
-	mulq	24(%r9)
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbp
-	movq	%r11, %rax
-	mulq	16(%r9)
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	movq	%rax, %r14
-	movq	%r11, %rax
-	mulq	8(%r9)
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	%rax, %r12
-	movq	%r11, %rax
-	mulq	(%r9)
-	andl	$1, %ecx
-	addq	-40(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	-16(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rbx        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-64(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rcx         ## 8-byte Folded Reload
-	addq	%rax, %r8
-	movq	-8(%rsp), %r9           ## 8-byte Reload
-	movq	%r8, 48(%r9)
-	adcq	%r12, %rsi
-	adcq	%r14, %rbx
-	adcq	%rbp, %r15
-	adcq	%r13, %r10
-	adcq	-32(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rcx        ## 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	addq	%rdx, %rsi
-	adcq	-48(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%r9, %rdx
-	movq	%rsi, 56(%rdx)
-	movq	%rbx, 64(%rdx)
-	adcq	-24(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%r15, 72(%rdx)
-	adcq	-72(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%r10, 80(%rdx)
-	adcq	-128(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	%rdi, 88(%rdx)
-	adcq	-120(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	%rcx, 96(%rdx)
-	adcq	-56(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 104(%rdx)
-	addq	$16, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sqrPre7L
-	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre7L:                    ## @mcl_fpDbl_sqrPre7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$16, %rsp
-	movq	%rsi, %r9
-	movq	%rdi, -24(%rsp)         ## 8-byte Spill
-	movq	24(%r9), %r10
-	movq	%r10, -128(%rsp)        ## 8-byte Spill
-	movq	32(%r9), %r14
-	movq	%r14, -88(%rsp)         ## 8-byte Spill
-	movq	40(%r9), %rsi
-	movq	%rsi, -80(%rsp)         ## 8-byte Spill
-	movq	48(%r9), %rbp
-	movq	%rbp, -120(%rsp)        ## 8-byte Spill
-	movq	(%r9), %rbx
-	movq	%rbx, %rax
-	mulq	%rbx
-	movq	%rdx, %rcx
-	movq	%rax, (%rdi)
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rdx, %r11
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	%rbx
-	movq	%rdx, %r8
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%r14, %rax
-	mulq	%rbx
-	movq	%rdx, %r13
-	movq	%rax, %rsi
-	movq	%r10, %rax
-	mulq	%rbx
-	movq	%rax, %r14
-	movq	%rdx, %rdi
-	movq	16(%r9), %r15
-	movq	%r15, %rax
-	mulq	%rbx
-	movq	%rax, %r10
-	movq	%rdx, %r12
-	movq	8(%r9), %rbp
-	movq	%rbp, %rax
-	mulq	%rbx
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	addq	%rax, %rcx
-	adcq	%rdx, %r10
-	adcq	%r14, %r12
-	adcq	%rsi, %rdi
-	adcq	-104(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, -104(%rsp)         ## 8-byte Spill
-	adcq	$0, %r11
-	movq	%r11, -96(%rsp)         ## 8-byte Spill
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, %r8
-	movq	-80(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %rsi
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r11
-	movq	-128(%rsp), %rax        ## 8-byte Reload
-	mulq	%rbp
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r14
-	movq	%r15, %rax
-	mulq	%rbp
-	movq	%rdx, %r15
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	%rbp
-	movq	%rax, %rbp
-	addq	-72(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	-24(%rsp), %rax         ## 8-byte Reload
-	movq	%rcx, 8(%rax)
-	adcq	%r10, %rbp
-	adcq	%r12, %rbx
-	adcq	%rdi, %r14
-	adcq	%r13, %r11
-	movq	%rsi, %rax
-	adcq	-104(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r8          ## 8-byte Folded Reload
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	-112(%rsp), %rbp        ## 8-byte Folded Reload
-	adcq	%rdx, %rbx
-	adcq	%r15, %r14
-	adcq	-128(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	adcq	-80(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, -40(%rsp)          ## 8-byte Spill
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	48(%r9), %rax
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	16(%r9), %rdi
-	mulq	%rdi
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	40(%r9), %rax
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	32(%r9), %rax
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r13
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	movq	24(%r9), %rcx
-	movq	%rcx, %rax
-	mulq	%rdi
-	movq	%rax, %r10
-	movq	%r10, -8(%rsp)          ## 8-byte Spill
-	movq	%rdx, %r12
-	movq	%r12, -72(%rsp)         ## 8-byte Spill
-	movq	8(%r9), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r15
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	(%r9), %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	%rax, %r8
-	movq	%rdi, %rax
-	mulq	%rdi
-	movq	%rax, %rdi
-	addq	%rbp, %r8
-	movq	-24(%rsp), %rax         ## 8-byte Reload
-	movq	%r8, 16(%rax)
-	adcq	%rbx, %r15
-	adcq	%r14, %rdi
-	adcq	%r10, %r11
-	adcq	-48(%rsp), %r13         ## 8-byte Folded Reload
-	movq	-56(%rsp), %r10         ## 8-byte Reload
-	adcq	-40(%rsp), %r10         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	adcq	%rsi, %rax
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	addq	-16(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-64(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	%rdx, %r11
-	adcq	%r12, %r13
-	adcq	-32(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rax        ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r14
-	movq	-80(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	-88(%rsp), %rax         ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rax, %r8
-	movq	(%rsp), %rax            ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	%rax, %r12
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	mulq	%rcx
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbx
-	movq	%rcx, %rax
-	mulq	%rcx
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	addq	%r15, %rbx
-	movq	-24(%rsp), %rcx         ## 8-byte Reload
-	movq	%rbx, 24(%rcx)
-	adcq	%rdi, %r12
-	adcq	-8(%rsp), %r11          ## 8-byte Folded Reload
-	adcq	%r13, %rax
-	movq	%rax, %r15
-	movq	%r8, %rsi
-	adcq	%r10, %rsi
-	movq	-112(%rsp), %rbx        ## 8-byte Reload
-	adcq	-120(%rsp), %rbx        ## 8-byte Folded Reload
-	adcq	%rbp, %r14
-	sbbq	%r8, %r8
-	movq	8(%r9), %rcx
-	movq	40(%r9), %r13
-	movq	%rcx, %rax
-	mulq	%r13
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	(%r9), %rbp
-	movq	%rbp, %rax
-	mulq	%r13
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	32(%r9), %rdi
-	movq	%rcx, %rax
-	mulq	%rdi
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rax, %rbp
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	andl	$1, %r8d
-	addq	-64(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-48(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%r15, -64(%rsp)         ## 8-byte Spill
-	adcq	-56(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, -56(%rsp)         ## 8-byte Spill
-	adcq	-40(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, -112(%rsp)        ## 8-byte Spill
-	adcq	-32(%rsp), %r14         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r8         ## 8-byte Folded Reload
-	movq	48(%r9), %rax
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, %rcx
-	movq	%r13, %rax
-	mulq	%rdi
-	movq	%rax, %rsi
-	movq	%rsi, -48(%rsp)         ## 8-byte Spill
-	movq	%rdx, %rbx
-	movq	24(%r9), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r15
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	16(%r9), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	%rdi
-	movq	%rax, %rdi
-	addq	%rbp, %r12
-	movq	-24(%rsp), %rbp         ## 8-byte Reload
-	movq	%r12, 32(%rbp)
-	adcq	-8(%rsp), %r11          ## 8-byte Folded Reload
-	adcq	-64(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-56(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	%rsi, %r14
-	adcq	%r8, %rcx
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	addq	(%rsp), %r11            ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r10        ## 8-byte Folded Reload
-	adcq	8(%rsp), %r15           ## 8-byte Folded Reload
-	adcq	-16(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	%rdx, %r14
-	adcq	%rbx, %rcx
-	adcq	-72(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rax        ## 8-byte Reload
-	mulq	%r13
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	-32(%rsp), %rax         ## 8-byte Reload
-	mulq	%r13
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r8
-	movq	-40(%rsp), %rax         ## 8-byte Reload
-	mulq	%r13
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, %r12
-	movq	%r13, %rax
-	mulq	%r13
-	movq	%rax, %r13
-	addq	-104(%rsp), %r11        ## 8-byte Folded Reload
-	movq	%r11, 40(%rbp)
-	adcq	-96(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	%r15, %r12
-	adcq	%rdi, %r8
-	movq	%r14, %rax
-	adcq	-48(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	%rcx, %r13
-	movq	-120(%rsp), %rcx        ## 8-byte Reload
-	adcq	%rsi, %rcx
-	sbbq	%r14, %r14
-	andl	$1, %r14d
-	addq	-88(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, -104(%rsp)         ## 8-byte Spill
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	adcq	%rbx, %r13
-	adcq	%rdx, %rcx
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	adcq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	movq	48(%r9), %rcx
-	movq	%rcx, %rax
-	mulq	40(%r9)
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	32(%r9)
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbx
-	movq	%rcx, %rax
-	mulq	24(%r9)
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	16(%r9)
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r11
-	movq	%rcx, %rax
-	mulq	8(%r9)
-	movq	%rdx, %r15
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	(%r9)
-	movq	%rdx, %r9
-	movq	%rax, %rsi
-	movq	%rcx, %rax
-	mulq	%rcx
-	addq	%r10, %rsi
-	movq	-24(%rsp), %r10         ## 8-byte Reload
-	movq	%rsi, 48(%r10)
-	adcq	%r12, %rdi
-	adcq	-104(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	%r13, %rbx
-	adcq	-120(%rsp), %r8         ## 8-byte Folded Reload
-	adcq	%r14, %rax
-	sbbq	%rcx, %rcx
-	andl	$1, %ecx
-	addq	%r9, %rdi
-	adcq	%r15, %r11
-	movq	%r10, %rsi
-	movq	%rdi, 56(%rsi)
-	movq	%r11, 64(%rsi)
-	adcq	-128(%rsp), %rbp        ## 8-byte Folded Reload
-	movq	%rbp, 72(%rsi)
-	adcq	-88(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, 80(%rsi)
-	adcq	-80(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, 88(%rsi)
-	adcq	-112(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, 96(%rsi)
-	adcq	%rdx, %rcx
-	movq	%rcx, 104(%rsi)
-	addq	$16, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mont7L
-	.p2align	4, 0x90
-_mcl_fp_mont7L:                         ## @mcl_fp_mont7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$88, %rsp
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	%rdi, 80(%rsp)          ## 8-byte Spill
-	movq	48(%rsi), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	(%rdx), %rdi
-	mulq	%rdi
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	%rdx, %r12
-	movq	40(%rsi), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	%rdx, %r8
-	movq	32(%rsi), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rdx, %r9
-	movq	24(%rsi), %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r14
-	movq	%rdx, %r11
-	movq	16(%rsi), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	mulq	%rdi
-	movq	%rax, %r15
-	movq	%rdx, %rbx
-	movq	(%rsi), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	mulq	%rdi
-	movq	%rdx, %r13
-	movq	%rax, %rsi
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rdx, %r10
-	addq	%rsi, %r10
-	adcq	%r15, %r13
-	adcq	%r14, %rbx
-	movq	%rbx, -72(%rsp)         ## 8-byte Spill
-	adcq	-8(%rsp), %r11          ## 8-byte Folded Reload
-	movq	%r11, -56(%rsp)         ## 8-byte Spill
-	adcq	(%rsp), %r9             ## 8-byte Folded Reload
-	movq	%r9, -112(%rsp)         ## 8-byte Spill
-	adcq	8(%rsp), %r8            ## 8-byte Folded Reload
-	movq	%r8, -104(%rsp)         ## 8-byte Spill
-	adcq	$0, %r12
-	movq	%r12, -96(%rsp)         ## 8-byte Spill
-	movq	-8(%rcx), %rdx
-	movq	%rdx, 40(%rsp)          ## 8-byte Spill
-	movq	%rax, %rdi
-	imulq	%rdx, %rdi
-	movq	48(%rcx), %rdx
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	40(%rcx), %rdx
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	32(%rcx), %rdx
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rax, %r14
-	movq	%rdx, %r9
-	movq	24(%rcx), %rdx
-	movq	%rdx, 64(%rsp)          ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rax, %r8
-	movq	%rdx, %rbx
-	movq	16(%rcx), %rdx
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rdx
-	movq	%rax, %r15
-	movq	%rdx, %rbp
-	movq	(%rcx), %rsi
-	movq	%rsi, 48(%rsp)          ## 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	%rcx
-	movq	%rdx, %rcx
-	movq	%rax, %r12
-	movq	%rdi, %rax
-	mulq	%rsi
-	movq	%rdx, %r11
-	addq	%r12, %r11
-	adcq	%r15, %rcx
-	adcq	%r8, %rbp
-	adcq	%r14, %rbx
-	adcq	-64(%rsp), %r9          ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	-88(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdi        ## 8-byte Reload
-	adcq	$0, %rdi
-	addq	-80(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	%r10, %r11
-	adcq	%r13, %rcx
-	adcq	-72(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-56(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r9         ## 8-byte Folded Reload
-	movq	%r9, -56(%rsp)          ## 8-byte Spill
-	adcq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, -120(%rsp)        ## 8-byte Spill
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	movq	8(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r12
-	movq	%rdi, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r8
-	movq	%rdx, %r14
-	addq	%r9, %r14
-	adcq	%r12, %r13
-	adcq	-64(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r10         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rdi        ## 8-byte Reload
-	adcq	-80(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	adcq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%r11, %r8
-	adcq	%rcx, %r14
-	adcq	%rbp, %r13
-	adcq	%rbx, %r15
-	adcq	-56(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	%rdi, -112(%rsp)        ## 8-byte Spill
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	adcq	%rsi, %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	%r8, %rcx
-	imulq	40(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rbx
-	movq	%rcx, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	72(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	addq	%rbp, %rcx
-	adcq	%rdi, %rsi
-	adcq	%rbx, %r9
-	adcq	-88(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r11         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdi        ## 8-byte Reload
-	adcq	-72(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r8, %rax
-	adcq	%r14, %rcx
-	adcq	%r13, %rsi
-	adcq	%r15, %r9
-	adcq	%r10, %r12
-	adcq	-112(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	%rdi, -128(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	adcq	$0, -56(%rsp)           ## 8-byte Folded Spill
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rbx
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r8
-	movq	%rbx, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r14
-	movq	%rdx, %r10
-	addq	%r15, %r10
-	adcq	%r8, %rdi
-	adcq	-64(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r13         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rbx        ## 8-byte Reload
-	adcq	-80(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	adcq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rcx, %r14
-	adcq	%rsi, %r10
-	adcq	%r9, %rdi
-	adcq	%r12, %rbp
-	adcq	%r11, %r13
-	adcq	-128(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	%rbx, -112(%rsp)        ## 8-byte Spill
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	adcq	-56(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	%r14, %rbx
-	imulq	40(%rsp), %rbx          ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	72(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r12
-	movq	%rbx, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	addq	%r12, %r11
-	adcq	%r15, %r8
-	adcq	-64(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r9          ## 8-byte Folded Reload
-	movq	-128(%rsp), %rbx        ## 8-byte Reload
-	adcq	-72(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r14, %rax
-	adcq	%r10, %r11
-	adcq	%rdi, %r8
-	adcq	%rbp, %rsi
-	adcq	%r13, %rcx
-	adcq	-112(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	%rbx, -128(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	adcq	$0, -56(%rsp)           ## 8-byte Folded Spill
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rbx
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r14
-	movq	%rbx, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %r13
-	addq	%r15, %r13
-	adcq	%r14, %rdi
-	adcq	-64(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r12         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rbx        ## 8-byte Reload
-	adcq	-80(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	adcq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%r11, %r10
-	adcq	%r8, %r13
-	adcq	%rsi, %rdi
-	adcq	%rcx, %rbp
-	adcq	%r9, %r12
-	adcq	-128(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	%rbx, -112(%rsp)        ## 8-byte Spill
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	adcq	-56(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	%r10, %rbx
-	imulq	40(%rsp), %rbx          ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r14
-	movq	%rbx, %rax
-	mulq	72(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	addq	%r15, %r11
-	adcq	%r14, %r8
-	adcq	-64(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r9          ## 8-byte Folded Reload
-	movq	-128(%rsp), %rbx        ## 8-byte Reload
-	adcq	-72(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r10, %rax
-	adcq	%r13, %r11
-	adcq	%rdi, %r8
-	adcq	%rbp, %rsi
-	adcq	%r12, %rcx
-	adcq	-112(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	%rbx, -128(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	adcq	$0, -56(%rsp)           ## 8-byte Folded Spill
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	movq	32(%rax), %rbx
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r13
-	movq	%rbx, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r14
-	movq	%rbx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %r12
-	addq	%r14, %r12
-	adcq	%r13, %rdi
-	adcq	-64(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r15         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rbx        ## 8-byte Reload
-	adcq	-80(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	-104(%rsp), %rdx        ## 8-byte Reload
-	adcq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%r11, %r10
-	adcq	%r8, %r12
-	adcq	%rsi, %rdi
-	adcq	%rcx, %rbp
-	adcq	%r9, %r15
-	adcq	-128(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	%rbx, -112(%rsp)        ## 8-byte Spill
-	adcq	-120(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	adcq	-56(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%r10, %rcx
-	imulq	40(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	72(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	addq	%r8, %r11
-	adcq	%r14, %rbx
-	adcq	-64(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r13         ## 8-byte Folded Reload
-	movq	-56(%rsp), %rdx         ## 8-byte Reload
-	adcq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rcx        ## 8-byte Reload
-	adcq	$0, %rcx
-	addq	%r10, %rax
-	adcq	%r12, %r11
-	adcq	%rdi, %rbx
-	adcq	%rbp, %rsi
-	adcq	%r15, %r9
-	adcq	-112(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	adcq	-96(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	movq	-120(%rsp), %r15        ## 8-byte Reload
-	adcq	$0, %r15
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	movq	40(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %r8
-	addq	%r14, %r8
-	adcq	%r12, %rdi
-	adcq	-64(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	-120(%rsp), %r14        ## 8-byte Reload
-	adcq	-88(%rsp), %r14         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rdx        ## 8-byte Reload
-	adcq	-80(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-104(%rsp), %rcx        ## 8-byte Reload
-	adcq	-72(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%r11, %r10
-	adcq	%rbx, %r8
-	adcq	%rsi, %rdi
-	adcq	%r9, %rbp
-	adcq	%r13, %r14
-	movq	%r14, -120(%rsp)        ## 8-byte Spill
-	adcq	-56(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	adcq	-128(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	%rcx, -104(%rsp)        ## 8-byte Spill
-	adcq	%r15, %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	%r10, %rcx
-	imulq	40(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r13
-	movq	%rcx, %rax
-	mulq	72(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r9
-	movq	%rcx, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	addq	%r9, %r11
-	adcq	%r13, %rbx
-	adcq	-64(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r14         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r10, %rax
-	adcq	%r8, %r11
-	adcq	%rdi, %rbx
-	adcq	%rbp, %r15
-	adcq	-120(%rsp), %r12        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        ## 8-byte Spill
-	adcq	-104(%rsp), %r14        ## 8-byte Folded Reload
-	movq	%r14, -104(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	-56(%rsp), %r8          ## 8-byte Reload
-	adcq	$0, %r8
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	movq	48(%rax), %rcx
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-40(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rdi
-	movq	%rcx, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %rbp
-	movq	%rcx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %rsi
-	movq	%rdx, %r10
-	addq	%rbp, %r10
-	adcq	%rdi, %r14
-	adcq	-48(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-40(%rsp), %r9          ## 8-byte Folded Reload
-	movq	-32(%rsp), %rcx         ## 8-byte Reload
-	adcq	-120(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	-24(%rsp), %rax         ## 8-byte Reload
-	adcq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	movq	-16(%rsp), %rdi         ## 8-byte Reload
-	adcq	$0, %rdi
-	addq	%r11, %rsi
-	movq	%rsi, -48(%rsp)         ## 8-byte Spill
-	adcq	%rbx, %r10
-	adcq	%r15, %r14
-	adcq	%r12, %r13
-	adcq	-112(%rsp), %r9         ## 8-byte Folded Reload
-	movq	%r9, -40(%rsp)          ## 8-byte Spill
-	adcq	-104(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	%rcx, -32(%rsp)         ## 8-byte Spill
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	adcq	%r8, %rdi
-	movq	%rdi, -16(%rsp)         ## 8-byte Spill
-	sbbq	%rcx, %rcx
-	movq	40(%rsp), %r8           ## 8-byte Reload
-	imulq	%rsi, %r8
-	movq	%r8, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	%r8, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	%r8, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	%r8, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	%r8, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r11
-	movq	%r8, %rax
-	movq	%r8, %r12
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, %r8
-	movq	%r12, %rax
-	movq	72(%rsp), %r12          ## 8-byte Reload
-	mulq	%r12
-	andl	$1, %ecx
-	addq	%r15, %rax
-	adcq	%r11, %rdx
-	adcq	16(%rsp), %rbp          ## 8-byte Folded Reload
-	adcq	24(%rsp), %rbx          ## 8-byte Folded Reload
-	adcq	32(%rsp), %rsi          ## 8-byte Folded Reload
-	adcq	40(%rsp), %r9           ## 8-byte Folded Reload
-	adcq	$0, %rdi
-	addq	-48(%rsp), %r8          ## 8-byte Folded Reload
-	adcq	%r10, %rax
-	adcq	%r14, %rdx
-	adcq	%r13, %rbp
-	adcq	-40(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-32(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-24(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-16(%rsp), %rdi         ## 8-byte Folded Reload
-	adcq	$0, %rcx
-	movq	%rax, %r8
-	subq	48(%rsp), %r8           ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	sbbq	%r12, %r10
-	movq	%rbp, %r11
-	sbbq	56(%rsp), %r11          ## 8-byte Folded Reload
-	movq	%rbx, %r14
-	sbbq	64(%rsp), %r14          ## 8-byte Folded Reload
-	movq	%rsi, %r15
-	sbbq	-8(%rsp), %r15          ## 8-byte Folded Reload
-	movq	%r9, %r12
-	sbbq	(%rsp), %r12            ## 8-byte Folded Reload
-	movq	%rdi, %r13
-	sbbq	8(%rsp), %r13           ## 8-byte Folded Reload
-	sbbq	$0, %rcx
-	andl	$1, %ecx
-	cmovneq	%rdi, %r13
-	testb	%cl, %cl
-	cmovneq	%rax, %r8
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	%r8, (%rax)
-	cmovneq	%rdx, %r10
-	movq	%r10, 8(%rax)
-	cmovneq	%rbp, %r11
-	movq	%r11, 16(%rax)
-	cmovneq	%rbx, %r14
-	movq	%r14, 24(%rax)
-	cmovneq	%rsi, %r15
-	movq	%r15, 32(%rax)
-	cmovneq	%r9, %r12
-	movq	%r12, 40(%rax)
-	movq	%r13, 48(%rax)
-	addq	$88, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montNF7L
-	.p2align	4, 0x90
-_mcl_fp_montNF7L:                       ## @mcl_fp_montNF7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$80, %rsp
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rdi, 72(%rsp)          ## 8-byte Spill
-	movq	48(%rsi), %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	(%rdx), %rbx
-	mulq	%rbx
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	%rdx, %r12
-	movq	40(%rsi), %rax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	mulq	%rbx
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	%rdx, %r8
-	movq	32(%rsi), %rax
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	mulq	%rbx
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	%rdx, %rbp
-	movq	24(%rsi), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	mulq	%rbx
-	movq	%rax, %r10
-	movq	%rdx, %r15
-	movq	16(%rsi), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	mulq	%rbx
-	movq	%rax, %r9
-	movq	%rdx, %r14
-	movq	(%rsi), %rdi
-	movq	%rdi, -8(%rsp)          ## 8-byte Spill
-	movq	8(%rsi), %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	mulq	%rbx
-	movq	%rdx, %r13
-	movq	%rax, %r11
-	movq	%rdi, %rax
-	mulq	%rbx
-	movq	%rdx, %rsi
-	addq	%r11, %rsi
-	adcq	%r9, %r13
-	adcq	%r10, %r14
-	adcq	-32(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-24(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, -128(%rsp)        ## 8-byte Spill
-	adcq	-16(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, -120(%rsp)         ## 8-byte Spill
-	adcq	$0, %r12
-	movq	%r12, -104(%rsp)        ## 8-byte Spill
-	movq	-8(%rcx), %rdx
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	movq	%rax, %r10
-	movq	%rax, %r8
-	imulq	%rdx, %r10
-	movq	48(%rcx), %rdx
-	movq	%rdx, 32(%rsp)          ## 8-byte Spill
-	movq	%r10, %rax
-	mulq	%rdx
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	40(%rcx), %rdx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	%r10, %rax
-	mulq	%rdx
-	movq	%rax, %r11
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	32(%rcx), %rdx
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	movq	%r10, %rax
-	mulq	%rdx
-	movq	%rax, %rbp
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	24(%rcx), %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	movq	%r10, %rax
-	mulq	%rdx
-	movq	%rax, %r12
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	16(%rcx), %rdx
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	movq	%r10, %rax
-	mulq	%rdx
-	movq	%rax, %rbx
-	movq	%rdx, 24(%rsp)          ## 8-byte Spill
-	movq	(%rcx), %rdi
-	movq	%rdi, 40(%rsp)          ## 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	%r10, %rax
-	mulq	%rcx
-	movq	%rdx, %r9
-	movq	%rax, %rcx
-	movq	%r10, %rax
-	mulq	%rdi
-	addq	%r8, %rax
-	adcq	%rsi, %rcx
-	adcq	%r13, %rbx
-	adcq	%r14, %r12
-	adcq	%r15, %rbp
-	adcq	-128(%rsp), %r11        ## 8-byte Folded Reload
-	movq	-112(%rsp), %rdi        ## 8-byte Reload
-	adcq	-120(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdx, %rcx
-	adcq	%r9, %rbx
-	adcq	24(%rsp), %r12          ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%r11, -120(%rsp)        ## 8-byte Spill
-	adcq	-72(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, -112(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	-40(%rsp), %rax         ## 8-byte Reload
-	movq	8(%rax), %rsi
-	movq	%rsi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %rdi
-	movq	%rsi, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r11
-	movq	%rsi, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %r15
-	addq	%r11, %r15
-	adcq	%rdi, %r8
-	adcq	24(%rsp), %r9           ## 8-byte Folded Reload
-	adcq	-88(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r14         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rcx, %r10
-	adcq	%rbx, %r15
-	adcq	%r12, %r8
-	adcq	%rbp, %r9
-	adcq	-120(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r14        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%r10, %rsi
-	imulq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	movq	%rsi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbx
-	movq	%rsi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r11
-	movq	%rsi, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r12
-	movq	%rsi, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %rbp
-	movq	%rsi, %rax
-	mulq	40(%rsp)                ## 8-byte Folded Reload
-	addq	%r10, %rax
-	adcq	%r15, %rbp
-	adcq	%r8, %r12
-	adcq	%r9, %r11
-	adcq	%r13, %rbx
-	movq	-120(%rsp), %r8         ## 8-byte Reload
-	adcq	%r14, %r8
-	movq	-112(%rsp), %rsi        ## 8-byte Reload
-	adcq	-128(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdx, %rbp
-	adcq	%rdi, %r12
-	adcq	%rcx, %r11
-	adcq	-88(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, -120(%rsp)         ## 8-byte Spill
-	adcq	-72(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, -112(%rsp)        ## 8-byte Spill
-	adcq	-104(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	-40(%rsp), %rax         ## 8-byte Reload
-	movq	16(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -104(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r13
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %r15
-	addq	%r13, %r15
-	adcq	%r14, %rcx
-	adcq	24(%rsp), %r8           ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r9          ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	-72(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rbp, %r10
-	adcq	%r12, %r15
-	adcq	%r11, %rcx
-	adcq	%rbx, %r8
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r9         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%r10, %rdi
-	imulq	16(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r11
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, %r12
-	movq	%rdi, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rbp
-	movq	%rdi, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	40(%rsp)                ## 8-byte Folded Reload
-	addq	%r10, %rax
-	adcq	%r15, %rbx
-	adcq	%rcx, %rbp
-	adcq	%r8, %r12
-	adcq	%rsi, %r11
-	movq	-112(%rsp), %rcx        ## 8-byte Reload
-	adcq	%r9, %rcx
-	movq	-96(%rsp), %rsi         ## 8-byte Reload
-	adcq	-128(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdx, %rbx
-	adcq	%r14, %rbp
-	adcq	%r13, %r12
-	adcq	-120(%rsp), %r11        ## 8-byte Folded Reload
-	movq	%r11, -120(%rsp)        ## 8-byte Spill
-	adcq	-88(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        ## 8-byte Spill
-	adcq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, -96(%rsp)         ## 8-byte Spill
-	adcq	-72(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	-40(%rsp), %rax         ## 8-byte Reload
-	movq	24(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r13
-	movq	%rdi, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r10
-	movq	%rdx, %rdi
-	addq	%r14, %rdi
-	adcq	%r13, %r8
-	adcq	-88(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	%rbx, %r10
-	adcq	%rbp, %rdi
-	adcq	%r12, %r8
-	adcq	-120(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r11        ## 8-byte Folded Reload
-	movq	%r11, -112(%rsp)        ## 8-byte Spill
-	adcq	$0, %r9
-	movq	%r10, %rbp
-	imulq	16(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r11
-	movq	%rbp, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	40(%rsp)                ## 8-byte Folded Reload
-	addq	%r10, %rax
-	adcq	%rdi, %rbx
-	adcq	%r8, %r11
-	adcq	%rcx, %r12
-	adcq	%rsi, %r14
-	movq	-104(%rsp), %rcx        ## 8-byte Reload
-	adcq	%r15, %rcx
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	-112(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	$0, %r9
-	addq	%rdx, %rbx
-	adcq	%r13, %r11
-	adcq	-88(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%r14, -112(%rsp)        ## 8-byte Spill
-	adcq	-72(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -104(%rsp)        ## 8-byte Spill
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	adcq	-120(%rsp), %r9         ## 8-byte Folded Reload
-	movq	-40(%rsp), %rax         ## 8-byte Reload
-	movq	32(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %rbp
-	movq	%rdi, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %rdi
-	movq	%rdx, %r13
-	addq	%r14, %r13
-	adcq	%rbp, %r8
-	adcq	-88(%rsp), %rcx         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r10         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rax        ## 8-byte Reload
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	%rbx, %rdi
-	adcq	%r11, %r13
-	adcq	%r12, %r8
-	adcq	-112(%rsp), %rcx        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r10         ## 8-byte Folded Reload
-	adcq	%r9, %rax
-	movq	%rax, -120(%rsp)        ## 8-byte Spill
-	adcq	$0, %r15
-	movq	%rdi, %rbp
-	imulq	16(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %r9
-	movq	%rbp, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, %r12
-	movq	%rbp, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	40(%rsp)                ## 8-byte Folded Reload
-	addq	%rdi, %rax
-	adcq	%r13, %rbx
-	adcq	%r8, %r14
-	adcq	%rcx, %r12
-	adcq	%rsi, %r9
-	movq	-112(%rsp), %rcx        ## 8-byte Reload
-	adcq	%r10, %rcx
-	movq	-104(%rsp), %rax        ## 8-byte Reload
-	adcq	-120(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	$0, %r15
-	addq	%rdx, %rbx
-	adcq	%r11, %r14
-	adcq	-88(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-128(%rsp), %r9         ## 8-byte Folded Reload
-	movq	%r9, -128(%rsp)         ## 8-byte Spill
-	adcq	-80(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -112(%rsp)        ## 8-byte Spill
-	adcq	-72(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	adcq	-96(%rsp), %r15         ## 8-byte Folded Reload
-	movq	-40(%rsp), %rax         ## 8-byte Reload
-	movq	40(%rax), %rbp
-	movq	%rbp, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %rcx
-	movq	%rbp, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	movq	%rax, %r9
-	movq	%rbp, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r11
-	movq	%rdx, %r10
-	addq	%r9, %r10
-	adcq	%rcx, %r8
-	adcq	24(%rsp), %rdi          ## 8-byte Folded Reload
-	adcq	-88(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-80(%rsp), %r13         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rcx        ## 8-byte Reload
-	adcq	-72(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rbx, %r11
-	adcq	%r14, %r10
-	adcq	%r12, %r8
-	adcq	-128(%rsp), %rdi        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	%r15, %rcx
-	movq	%rcx, -120(%rsp)        ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%r11, %rbx
-	imulq	16(%rsp), %rbx          ## 8-byte Folded Reload
-	movq	%rbx, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	movq	%rax, -112(%rsp)        ## 8-byte Spill
-	movq	%rbx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, %r9
-	movq	%rbx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, %r15
-	movq	%rbx, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, %rbp
-	movq	%rbx, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rcx
-	movq	%rbx, %rax
-	mulq	40(%rsp)                ## 8-byte Folded Reload
-	addq	%r11, %rax
-	adcq	%r10, %rcx
-	adcq	%r8, %rbp
-	adcq	%rdi, %r15
-	adcq	%rsi, %r9
-	movq	-112(%rsp), %rbx        ## 8-byte Reload
-	adcq	%r13, %rbx
-	movq	-104(%rsp), %rsi        ## 8-byte Reload
-	adcq	-120(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	-96(%rsp), %rax         ## 8-byte Reload
-	adcq	$0, %rax
-	addq	%rdx, %rcx
-	adcq	%r12, %rbp
-	adcq	%r14, %r15
-	adcq	-88(%rsp), %r9          ## 8-byte Folded Reload
-	movq	%r9, -120(%rsp)         ## 8-byte Spill
-	adcq	-80(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, -112(%rsp)        ## 8-byte Spill
-	adcq	-72(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, -104(%rsp)        ## 8-byte Spill
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	-40(%rsp), %rax         ## 8-byte Reload
-	movq	48(%rax), %rdi
-	movq	%rdi, %rax
-	mulq	-48(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-56(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-64(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	movq	%rax, -64(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	64(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %rsi
-	movq	%rdi, %rax
-	mulq	-8(%rsp)                ## 8-byte Folded Reload
-	movq	%rax, %r12
-	movq	%rdx, %r8
-	addq	%rsi, %r8
-	adcq	%rbx, %r10
-	adcq	%r9, %r11
-	adcq	-64(%rsp), %r13         ## 8-byte Folded Reload
-	movq	-48(%rsp), %rdx         ## 8-byte Reload
-	adcq	-56(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	-40(%rsp), %rax         ## 8-byte Reload
-	adcq	-128(%rsp), %rax        ## 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	%rcx, %r12
-	adcq	%rbp, %r8
-	adcq	%r15, %r10
-	adcq	-120(%rsp), %r11        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r13        ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -48(%rsp)         ## 8-byte Spill
-	adcq	-96(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	adcq	$0, %r14
-	movq	16(%rsp), %rdi          ## 8-byte Reload
-	imulq	%r12, %rdi
-	movq	%rdi, %rax
-	mulq	32(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	movq	%rax, %r9
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -56(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbp
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, %rsi
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	%rax, %rcx
-	movq	%rdi, %rax
-	mulq	40(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	56(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -8(%rsp)          ## 8-byte Spill
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	48(%rsp)                ## 8-byte Folded Reload
-	addq	%r12, %r15
-	adcq	%r8, %rax
-	adcq	%r10, %rbx
-	adcq	%r11, %rcx
-	adcq	%r13, %rsi
-	adcq	-48(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-40(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	$0, %r14
-	addq	(%rsp), %rax            ## 8-byte Folded Reload
-	adcq	%rdx, %rbx
-	adcq	-8(%rsp), %rcx          ## 8-byte Folded Reload
-	adcq	8(%rsp), %rsi           ## 8-byte Folded Reload
-	adcq	-64(%rsp), %rbp         ## 8-byte Folded Reload
-	adcq	-56(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	16(%rsp), %r14          ## 8-byte Folded Reload
-	movq	%rax, %r13
-	subq	40(%rsp), %r13          ## 8-byte Folded Reload
-	movq	%rbx, %r12
-	sbbq	48(%rsp), %r12          ## 8-byte Folded Reload
-	movq	%rcx, %r8
-	sbbq	56(%rsp), %r8           ## 8-byte Folded Reload
-	movq	%rsi, %r10
-	sbbq	-32(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%rbp, %r11
-	sbbq	-24(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%r9, %r15
-	sbbq	-16(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%r14, %rdx
-	sbbq	32(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	sarq	$63, %rdi
-	cmovsq	%rax, %r13
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	movq	%r13, (%rax)
-	cmovsq	%rbx, %r12
-	movq	%r12, 8(%rax)
-	cmovsq	%rcx, %r8
-	movq	%r8, 16(%rax)
-	cmovsq	%rsi, %r10
-	movq	%r10, 24(%rax)
-	cmovsq	%rbp, %r11
-	movq	%r11, 32(%rax)
-	cmovsq	%r9, %r15
-	movq	%r15, 40(%rax)
-	cmovsq	%r14, %rdx
-	movq	%rdx, 48(%rax)
-	addq	$80, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montRed7L
-	.p2align	4, 0x90
-_mcl_fp_montRed7L:                      ## @mcl_fp_montRed7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$72, %rsp
-	movq	%rdx, %rcx
-	movq	%rdi, 64(%rsp)          ## 8-byte Spill
-	movq	-8(%rcx), %rax
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %rbp
-	movq	%rbp, -48(%rsp)         ## 8-byte Spill
-	imulq	%rax, %rbp
-	movq	48(%rcx), %rdx
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	40(%rcx), %rdx
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rdx, %r15
-	movq	32(%rcx), %rdx
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, %r14
-	movq	%rdx, %r11
-	movq	24(%rcx), %rdx
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, %r13
-	movq	%rdx, %r10
-	movq	16(%rcx), %rdx
-	movq	%rdx, -16(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rdx
-	movq	%rax, %r9
-	movq	%rdx, %r12
-	movq	(%rcx), %rdi
-	movq	%rdi, 24(%rsp)          ## 8-byte Spill
-	movq	8(%rcx), %rcx
-	movq	%rcx, -24(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%rcx
-	movq	%rdx, %rcx
-	movq	%rax, %rbx
-	movq	%rbp, %rax
-	mulq	%rdi
-	movq	%rdx, %r8
-	addq	%rbx, %r8
-	adcq	%r9, %rcx
-	adcq	%r13, %r12
-	adcq	%r14, %r10
-	adcq	-72(%rsp), %r11         ## 8-byte Folded Reload
-	adcq	-104(%rsp), %r15        ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	-48(%rsp), %rax         ## 8-byte Folded Reload
-	adcq	8(%rsi), %r8
-	adcq	16(%rsi), %rcx
-	adcq	24(%rsi), %r12
-	adcq	32(%rsi), %r10
-	movq	%r10, 40(%rsp)          ## 8-byte Spill
-	adcq	40(%rsi), %r11
-	movq	%r11, -40(%rsp)         ## 8-byte Spill
-	adcq	48(%rsi), %r15
-	movq	%r15, -96(%rsp)         ## 8-byte Spill
-	adcq	56(%rsi), %rdx
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	104(%rsi), %rax
-	movq	96(%rsi), %rdx
-	movq	88(%rsi), %rdi
-	movq	80(%rsi), %rbp
-	movq	72(%rsi), %rbx
-	movq	64(%rsi), %r9
-	adcq	$0, %r9
-	adcq	$0, %rbx
-	movq	%rbx, -8(%rsp)          ## 8-byte Spill
-	adcq	$0, %rbp
-	movq	%rbp, -80(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdi
-	movq	%rdi, -64(%rsp)         ## 8-byte Spill
-	adcq	$0, %rdx
-	movq	%rdx, -72(%rsp)         ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	%r8, %rdi
-	imulq	-56(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	%rdi, %rax
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	mulq	%r13
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, 56(%rsp)          ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %rsi
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %r15
-	movq	%rdi, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r14
-	addq	%r15, %r14
-	adcq	%rsi, %r11
-	adcq	%r10, %rbp
-	adcq	56(%rsp), %rbx          ## 8-byte Folded Reload
-	movq	-88(%rsp), %rdi         ## 8-byte Reload
-	adcq	48(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	-120(%rsp), %rsi        ## 8-byte Reload
-	adcq	32(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	-112(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r8, %rax
-	adcq	%rcx, %r14
-	adcq	%r12, %r11
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	adcq	-40(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, -88(%rsp)         ## 8-byte Spill
-	adcq	-128(%rsp), %rsi        ## 8-byte Folded Reload
-	movq	%rsi, -120(%rsp)        ## 8-byte Spill
-	adcq	%r9, %rdx
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	adcq	$0, -8(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, -80(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -72(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -104(%rsp)          ## 8-byte Folded Spill
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	movq	%r14, %rcx
-	imulq	-56(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rax
-	movq	8(%rsp), %r15           ## 8-byte Reload
-	mulq	%r15
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	%r13
-	movq	%rdx, -40(%rsp)         ## 8-byte Spill
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	movq	%rax, %r12
-	movq	%rcx, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, %r13
-	movq	%rcx, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	addq	%r13, %r10
-	adcq	%r12, %r9
-	adcq	%r8, %rdi
-	adcq	48(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	-40(%rsp), %r8          ## 8-byte Reload
-	adcq	32(%rsp), %r8           ## 8-byte Folded Reload
-	movq	-96(%rsp), %rdx         ## 8-byte Reload
-	adcq	40(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	-128(%rsp), %rcx        ## 8-byte Reload
-	adcq	$0, %rcx
-	addq	%r14, %rax
-	adcq	%r11, %r10
-	adcq	%rbp, %r9
-	adcq	%rbx, %rdi
-	adcq	-88(%rsp), %rsi         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r8         ## 8-byte Folded Reload
-	movq	%r8, -40(%rsp)          ## 8-byte Spill
-	adcq	-112(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	%rdx, -96(%rsp)         ## 8-byte Spill
-	adcq	-8(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, -128(%rsp)        ## 8-byte Spill
-	adcq	$0, -80(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -72(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -104(%rsp)          ## 8-byte Folded Spill
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	movq	%r10, %rbp
-	imulq	-56(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, %rax
-	movq	(%rsp), %r8             ## 8-byte Reload
-	mulq	%r8
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	%r15
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -88(%rsp)         ## 8-byte Spill
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %r13
-	movq	%rbp, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, %r14
-	movq	%rbp, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, %r15
-	movq	%rbp, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	addq	%r15, %r11
-	adcq	%r14, %rbx
-	adcq	%r13, %rcx
-	adcq	32(%rsp), %r12          ## 8-byte Folded Reload
-	movq	-88(%rsp), %r14         ## 8-byte Reload
-	adcq	40(%rsp), %r14          ## 8-byte Folded Reload
-	movq	-120(%rsp), %rbp        ## 8-byte Reload
-	adcq	-8(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	-112(%rsp), %rdx        ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r10, %rax
-	adcq	%r9, %r11
-	adcq	%rdi, %rbx
-	adcq	%rsi, %rcx
-	adcq	-40(%rsp), %r12         ## 8-byte Folded Reload
-	adcq	-96(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%r14, -88(%rsp)         ## 8-byte Spill
-	adcq	-128(%rsp), %rbp        ## 8-byte Folded Reload
-	movq	%rbp, -120(%rsp)        ## 8-byte Spill
-	adcq	-80(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	adcq	$0, -64(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -72(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, -104(%rsp)          ## 8-byte Folded Spill
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	movq	%r11, %rdi
-	imulq	-56(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, %rax
-	mulq	%r8
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, -128(%rsp)        ## 8-byte Spill
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	movq	%rax, %r14
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r10
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r8
-	movq	%rdi, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	addq	%r8, %r9
-	adcq	%r10, %rbp
-	adcq	%r14, %rsi
-	adcq	-8(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	-40(%rsp), %r15         ## 8-byte Folded Reload
-	movq	-128(%rsp), %rdi        ## 8-byte Reload
-	adcq	-96(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	-80(%rsp), %rdx         ## 8-byte Reload
-	adcq	$0, %rdx
-	addq	%r11, %rax
-	adcq	%rbx, %r9
-	adcq	%rcx, %rbp
-	adcq	%r12, %rsi
-	adcq	-88(%rsp), %r13         ## 8-byte Folded Reload
-	adcq	-120(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %rdi        ## 8-byte Folded Reload
-	movq	%rdi, -128(%rsp)        ## 8-byte Spill
-	adcq	-64(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -80(%rsp)         ## 8-byte Spill
-	adcq	$0, -72(%rsp)           ## 8-byte Folded Spill
-	movq	-104(%rsp), %r14        ## 8-byte Reload
-	adcq	$0, %r14
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	movq	%r9, %rdi
-	imulq	-56(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, -64(%rsp)         ## 8-byte Spill
-	movq	%rax, -104(%rsp)        ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	movq	%rax, -88(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, -120(%rsp)        ## 8-byte Spill
-	movq	%rax, -96(%rsp)         ## 8-byte Spill
-	movq	%rdi, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r12
-	movq	%rax, %rbx
-	movq	%rdi, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r11
-	movq	%rax, %rcx
-	movq	%rdi, %rax
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %r10
-	movq	%rax, %r8
-	movq	%rdi, %rax
-	mulq	24(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %rdi
-	addq	%r8, %rdi
-	adcq	%rcx, %r10
-	adcq	%rbx, %r11
-	adcq	-96(%rsp), %r12         ## 8-byte Folded Reload
-	movq	-120(%rsp), %rbx        ## 8-byte Reload
-	adcq	-88(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	-112(%rsp), %rdx        ## 8-byte Reload
-	adcq	-104(%rsp), %rdx        ## 8-byte Folded Reload
-	movq	-64(%rsp), %rcx         ## 8-byte Reload
-	adcq	$0, %rcx
-	addq	%r9, %rax
-	adcq	%rbp, %rdi
-	adcq	%rsi, %r10
-	adcq	%r13, %r11
-	adcq	%r15, %r12
-	adcq	-128(%rsp), %rbx        ## 8-byte Folded Reload
-	movq	%rbx, -120(%rsp)        ## 8-byte Spill
-	adcq	-80(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -112(%rsp)        ## 8-byte Spill
-	adcq	-72(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -64(%rsp)         ## 8-byte Spill
-	adcq	$0, %r14
-	movq	%r14, -104(%rsp)        ## 8-byte Spill
-	adcq	$0, -48(%rsp)           ## 8-byte Folded Spill
-	movq	-56(%rsp), %rbp         ## 8-byte Reload
-	imulq	%rdi, %rbp
-	movq	%rbp, %rax
-	mulq	(%rsp)                  ## 8-byte Folded Reload
-	movq	%rdx, %rcx
-	movq	%rax, -56(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	8(%rsp)                 ## 8-byte Folded Reload
-	movq	%rdx, %r9
-	movq	%rax, -72(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	16(%rsp)                ## 8-byte Folded Reload
-	movq	%rdx, %r15
-	movq	%rax, -80(%rsp)         ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-32(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbx
-	movq	%rax, -128(%rsp)        ## 8-byte Spill
-	movq	%rbp, %rax
-	mulq	-16(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rsi
-	movq	%rax, %r13
-	movq	%rbp, %rax
-	movq	%rbp, %r14
-	mulq	-24(%rsp)               ## 8-byte Folded Reload
-	movq	%rdx, %rbp
-	movq	%rax, %r8
-	movq	%r14, %rax
-	movq	24(%rsp), %r14          ## 8-byte Reload
-	mulq	%r14
-	addq	%r8, %rdx
-	adcq	%r13, %rbp
-	adcq	-128(%rsp), %rsi        ## 8-byte Folded Reload
-	adcq	-80(%rsp), %rbx         ## 8-byte Folded Reload
-	adcq	-72(%rsp), %r15         ## 8-byte Folded Reload
-	adcq	-56(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	$0, %rcx
-	addq	%rdi, %rax
-	adcq	%r10, %rdx
-	adcq	%r11, %rbp
-	adcq	%r12, %rsi
-	adcq	-120(%rsp), %rbx        ## 8-byte Folded Reload
-	adcq	-112(%rsp), %r15        ## 8-byte Folded Reload
-	adcq	-64(%rsp), %r9          ## 8-byte Folded Reload
-	adcq	-104(%rsp), %rcx        ## 8-byte Folded Reload
-	movq	-48(%rsp), %rdi         ## 8-byte Reload
-	adcq	$0, %rdi
-	movq	%rdx, %rax
-	subq	%r14, %rax
-	movq	%rbp, %r13
-	sbbq	-24(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%rsi, %r8
-	sbbq	-16(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%rbx, %r10
-	sbbq	-32(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%r15, %r11
-	sbbq	16(%rsp), %r11          ## 8-byte Folded Reload
-	movq	%r9, %r14
-	sbbq	8(%rsp), %r14           ## 8-byte Folded Reload
-	movq	%rcx, %r12
-	sbbq	(%rsp), %r12            ## 8-byte Folded Reload
-	sbbq	$0, %rdi
-	andl	$1, %edi
-	cmovneq	%rcx, %r12
-	testb	%dil, %dil
-	cmovneq	%rdx, %rax
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	movq	%rax, (%rcx)
-	cmovneq	%rbp, %r13
-	movq	%r13, 8(%rcx)
-	cmovneq	%rsi, %r8
-	movq	%r8, 16(%rcx)
-	cmovneq	%rbx, %r10
-	movq	%r10, 24(%rcx)
-	cmovneq	%r15, %r11
-	movq	%r11, 32(%rcx)
-	cmovneq	%r9, %r14
-	movq	%r14, 40(%rcx)
-	movq	%r12, 48(%rcx)
-	addq	$72, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_addPre7L
-	.p2align	4, 0x90
-_mcl_fp_addPre7L:                       ## @mcl_fp_addPre7L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r8
-	movq	48(%rsi), %r14
-	movq	40(%rdx), %r9
-	movq	40(%rsi), %r15
-	movq	32(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rbx
-	adcq	16(%rsi), %r12
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r12, 16(%rdi)
-	adcq	%r11, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r10, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 48(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_subPre7L
-	.p2align	4, 0x90
-_mcl_fp_subPre7L:                       ## @mcl_fp_subPre7L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r8
-	movq	48(%rsi), %r10
-	movq	40(%rdx), %r9
-	movq	40(%rsi), %r15
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %r12
-	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %r12
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	32(%rsi), %rdx
-	movq	24(%rsi), %rsi
-	movq	%rbx, (%rdi)
-	movq	%r12, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r11, %rsi
-	movq	%rsi, 24(%rdi)
-	sbbq	%r14, %rdx
-	movq	%rdx, 32(%rdi)
-	sbbq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	sbbq	%r8, %r10
-	movq	%r10, 48(%rdi)
-	sbbq	$0, %rax
-	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_shr1_7L
-	.p2align	4, 0x90
-_mcl_fp_shr1_7L:                        ## @mcl_fp_shr1_7L
-## BB#0:
-	movq	48(%rsi), %r8
-	movq	40(%rsi), %r9
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %rax
-	movq	16(%rsi), %rcx
-	movq	(%rsi), %rdx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rdx
-	movq	%rdx, (%rdi)
-	shrdq	$1, %rcx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rax, %rcx
-	movq	%rcx, 16(%rdi)
-	shrdq	$1, %r10, %rax
-	movq	%rax, 24(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 32(%rdi)
-	shrdq	$1, %r8, %r9
-	movq	%r9, 40(%rdi)
-	shrq	%r8
-	movq	%r8, 48(%rdi)
-	retq
-
-	.globl	_mcl_fp_add7L
-	.p2align	4, 0x90
-_mcl_fp_add7L:                          ## @mcl_fp_add7L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r14
-	movq	48(%rsi), %r8
-	movq	40(%rdx), %r15
-	movq	40(%rsi), %r9
-	movq	32(%rdx), %r12
-	movq	24(%rdx), %r13
-	movq	16(%rdx), %r10
-	movq	(%rdx), %r11
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %r11
-	adcq	8(%rsi), %rdx
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rbx
-	adcq	16(%rsi), %r10
-	movq	%r11, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	adcq	%r13, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r12, %rbx
-	movq	%rbx, 32(%rdi)
-	adcq	%r15, %r9
-	movq	%r9, 40(%rdi)
-	adcq	%r14, %r8
-	movq	%r8, 48(%rdi)
-	sbbq	%rsi, %rsi
-	andl	$1, %esi
-	subq	(%rcx), %r11
-	sbbq	8(%rcx), %rdx
-	sbbq	16(%rcx), %r10
-	sbbq	24(%rcx), %rax
-	sbbq	32(%rcx), %rbx
-	sbbq	40(%rcx), %r9
-	sbbq	48(%rcx), %r8
-	sbbq	$0, %rsi
-	testb	$1, %sil
-	jne	LBB104_2
-## BB#1:                                ## %nocarry
-	movq	%r11, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%rax, 24(%rdi)
-	movq	%rbx, 32(%rdi)
-	movq	%r9, 40(%rdi)
-	movq	%r8, 48(%rdi)
-LBB104_2:                               ## %carry
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	retq
-
-	.globl	_mcl_fp_addNF7L
-	.p2align	4, 0x90
-_mcl_fp_addNF7L:                        ## @mcl_fp_addNF7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %rbp
-	movq	32(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r14
-	movq	(%rdx), %r12
-	movq	8(%rdx), %r15
-	addq	(%rsi), %r12
-	adcq	8(%rsi), %r15
-	adcq	16(%rsi), %r14
-	adcq	24(%rsi), %r11
-	adcq	32(%rsi), %r10
-	adcq	40(%rsi), %rbp
-	movq	%rbp, -8(%rsp)          ## 8-byte Spill
-	adcq	48(%rsi), %r9
-	movq	%r12, %rsi
-	subq	(%rcx), %rsi
-	movq	%r15, %rdx
-	sbbq	8(%rcx), %rdx
-	movq	%r14, %rax
-	sbbq	16(%rcx), %rax
-	movq	%r11, %rbx
-	sbbq	24(%rcx), %rbx
-	movq	%r10, %r13
-	sbbq	32(%rcx), %r13
-	sbbq	40(%rcx), %rbp
-	movq	%r9, %r8
-	sbbq	48(%rcx), %r8
-	movq	%r8, %rcx
-	sarq	$63, %rcx
-	cmovsq	%r12, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r15, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r14, %rax
-	movq	%rax, 16(%rdi)
-	cmovsq	%r11, %rbx
-	movq	%rbx, 24(%rdi)
-	cmovsq	%r10, %r13
-	movq	%r13, 32(%rdi)
-	cmovsq	-8(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 40(%rdi)
-	cmovsq	%r9, %r8
-	movq	%r8, 48(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_sub7L
-	.p2align	4, 0x90
-_mcl_fp_sub7L:                          ## @mcl_fp_sub7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	48(%rdx), %r14
-	movq	48(%rsi), %r8
-	movq	40(%rdx), %r15
-	movq	40(%rsi), %r9
-	movq	32(%rdx), %r12
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r11
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r11
-	movq	16(%rsi), %r13
-	sbbq	16(%rdx), %r13
-	movq	32(%rsi), %r10
-	movq	24(%rsi), %rsi
-	sbbq	24(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r11, 8(%rdi)
-	movq	%r13, 16(%rdi)
-	movq	%rsi, 24(%rdi)
-	sbbq	%r12, %r10
-	movq	%r10, 32(%rdi)
-	sbbq	%r15, %r9
-	movq	%r9, 40(%rdi)
-	sbbq	%r14, %r8
-	movq	%r8, 48(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	LBB106_2
-## BB#1:                                ## %carry
-	movq	48(%rcx), %r14
-	movq	40(%rcx), %r15
-	movq	32(%rcx), %r12
-	movq	24(%rcx), %rbx
-	movq	8(%rcx), %rdx
-	movq	16(%rcx), %rbp
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	adcq	%r11, %rdx
-	movq	%rdx, 8(%rdi)
-	adcq	%r13, %rbp
-	movq	%rbp, 16(%rdi)
-	adcq	%rsi, %rbx
-	movq	%rbx, 24(%rdi)
-	adcq	%r10, %r12
-	movq	%r12, 32(%rdi)
-	adcq	%r9, %r15
-	movq	%r15, 40(%rdi)
-	adcq	%r8, %r14
-	movq	%r14, 48(%rdi)
-LBB106_2:                               ## %nocarry
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_subNF7L
-	.p2align	4, 0x90
-_mcl_fp_subNF7L:                        ## @mcl_fp_subNF7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	48(%rsi), %r11
-	movdqu	(%rdx), %xmm0
-	movdqu	16(%rdx), %xmm1
-	movdqu	32(%rdx), %xmm2
-	pshufd	$78, %xmm2, %xmm3       ## xmm3 = xmm2[2,3,0,1]
-	movd	%xmm3, %r14
-	movdqu	(%rsi), %xmm3
-	movdqu	16(%rsi), %xmm4
-	movdqu	32(%rsi), %xmm5
-	pshufd	$78, %xmm5, %xmm6       ## xmm6 = xmm5[2,3,0,1]
-	movd	%xmm6, %rcx
-	movd	%xmm2, %r15
-	movd	%xmm5, %r9
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r12
-	pshufd	$78, %xmm4, %xmm2       ## xmm2 = xmm4[2,3,0,1]
-	movd	%xmm2, %r10
-	movd	%xmm1, %r13
-	pshufd	$78, %xmm0, %xmm1       ## xmm1 = xmm0[2,3,0,1]
-	movd	%xmm1, %rax
-	pshufd	$78, %xmm3, %xmm1       ## xmm1 = xmm3[2,3,0,1]
-	movd	%xmm0, %rbx
-	movd	%xmm3, %rsi
-	subq	%rbx, %rsi
-	movd	%xmm1, %rbx
-	sbbq	%rax, %rbx
-	movd	%xmm4, %rbp
-	sbbq	%r13, %rbp
-	sbbq	%r12, %r10
-	sbbq	%r15, %r9
-	sbbq	%r14, %rcx
-	movq	%rcx, -8(%rsp)          ## 8-byte Spill
-	sbbq	48(%rdx), %r11
-	movq	%r11, %rax
-	sarq	$63, %rax
-	movq	%rax, %rdx
-	shldq	$1, %r11, %rdx
-	andq	(%r8), %rdx
-	movq	48(%r8), %r14
-	andq	%rax, %r14
-	movq	40(%r8), %r15
-	andq	%rax, %r15
-	movq	32(%r8), %r12
-	andq	%rax, %r12
-	movq	24(%r8), %r13
-	andq	%rax, %r13
-	movq	16(%r8), %rcx
-	andq	%rax, %rcx
-	andq	8(%r8), %rax
-	addq	%rsi, %rdx
-	adcq	%rbx, %rax
-	movq	%rdx, (%rdi)
-	movq	%rax, 8(%rdi)
-	adcq	%rbp, %rcx
-	movq	%rcx, 16(%rdi)
-	adcq	%r10, %r13
-	movq	%r13, 24(%rdi)
-	adcq	%r9, %r12
-	movq	%r12, 32(%rdi)
-	adcq	-8(%rsp), %r15          ## 8-byte Folded Reload
-	movq	%r15, 40(%rdi)
-	adcq	%r11, %r14
-	movq	%r14, 48(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_add7L
-	.p2align	4, 0x90
-_mcl_fpDbl_add7L:                       ## @mcl_fpDbl_add7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	104(%rdx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	96(%rdx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	88(%rdx), %r11
-	movq	80(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r12
-	movq	16(%rdx), %r9
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %r9
-	adcq	24(%rdx), %r15
-	adcq	32(%rdx), %r12
-	movq	72(%rdx), %r13
-	movq	64(%rdx), %rbp
-	movq	%rax, (%rdi)
-	movq	56(%rdx), %r10
-	movq	%rbx, 8(%rdi)
-	movq	48(%rdx), %rcx
-	movq	40(%rdx), %rdx
-	movq	%r9, 16(%rdi)
-	movq	104(%rsi), %r9
-	movq	%r15, 24(%rdi)
-	movq	40(%rsi), %rbx
-	adcq	%rdx, %rbx
-	movq	96(%rsi), %r15
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %rdx
-	adcq	%rcx, %rdx
-	movq	88(%rsi), %rax
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rcx
-	adcq	%r10, %rcx
-	movq	80(%rsi), %r12
-	movq	%rdx, 48(%rdi)
-	movq	72(%rsi), %rdx
-	movq	64(%rsi), %rsi
-	adcq	%rbp, %rsi
-	adcq	%r13, %rdx
-	adcq	%r14, %r12
-	adcq	%r11, %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	adcq	-24(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%r15, -24(%rsp)         ## 8-byte Spill
-	adcq	-8(%rsp), %r9           ## 8-byte Folded Reload
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	movq	%rcx, %rbx
-	subq	(%r8), %rbx
-	movq	%rsi, %r10
-	sbbq	8(%r8), %r10
-	movq	%rdx, %r11
-	sbbq	16(%r8), %r11
-	movq	%r12, %r14
-	sbbq	24(%r8), %r14
-	movq	-16(%rsp), %r13         ## 8-byte Reload
-	sbbq	32(%r8), %r13
-	sbbq	40(%r8), %r15
-	movq	%r9, %rax
-	sbbq	48(%r8), %rax
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	cmovneq	%rcx, %rbx
-	movq	%rbx, 56(%rdi)
-	testb	%bpl, %bpl
-	cmovneq	%rsi, %r10
-	movq	%r10, 64(%rdi)
-	cmovneq	%rdx, %r11
-	movq	%r11, 72(%rdi)
-	cmovneq	%r12, %r14
-	movq	%r14, 80(%rdi)
-	cmovneq	-16(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%r13, 88(%rdi)
-	cmovneq	-24(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%r15, 96(%rdi)
-	cmovneq	%r9, %rax
-	movq	%rax, 104(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sub7L
-	.p2align	4, 0x90
-_mcl_fpDbl_sub7L:                       ## @mcl_fpDbl_sub7L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rcx, %r8
-	movq	104(%rdx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	96(%rdx), %r10
-	movq	88(%rdx), %r14
-	movq	16(%rsi), %rax
-	movq	(%rsi), %r15
-	movq	8(%rsi), %r11
-	xorl	%ecx, %ecx
-	subq	(%rdx), %r15
-	sbbq	8(%rdx), %r11
-	sbbq	16(%rdx), %rax
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	80(%rdx), %r13
-	movq	72(%rdx), %rbp
-	movq	%r15, (%rdi)
-	movq	64(%rdx), %r9
-	movq	%r11, 8(%rdi)
-	movq	56(%rdx), %r15
-	movq	%rax, 16(%rdi)
-	movq	48(%rdx), %r11
-	movq	40(%rdx), %rdx
-	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%rdx, %rbx
-	movq	104(%rsi), %rax
-	movq	%r12, 32(%rdi)
-	movq	48(%rsi), %r12
-	sbbq	%r11, %r12
-	movq	96(%rsi), %r11
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rdx
-	sbbq	%r15, %rdx
-	movq	88(%rsi), %r15
-	movq	%r12, 48(%rdi)
-	movq	64(%rsi), %rbx
-	sbbq	%r9, %rbx
-	movq	80(%rsi), %r12
-	movq	72(%rsi), %r9
-	sbbq	%rbp, %r9
-	sbbq	%r13, %r12
-	sbbq	%r14, %r15
-	sbbq	%r10, %r11
-	sbbq	-8(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%r8), %r10
-	cmoveq	%rcx, %r10
-	testb	%bpl, %bpl
-	movq	16(%r8), %rbp
-	cmoveq	%rcx, %rbp
-	movq	8(%r8), %rsi
-	cmoveq	%rcx, %rsi
-	movq	48(%r8), %r14
-	cmoveq	%rcx, %r14
-	movq	40(%r8), %r13
-	cmoveq	%rcx, %r13
-	movq	32(%r8), %rax
-	cmoveq	%rcx, %rax
-	cmovneq	24(%r8), %rcx
-	addq	%rdx, %r10
-	adcq	%rbx, %rsi
-	movq	%r10, 56(%rdi)
-	movq	%rsi, 64(%rdi)
-	adcq	%r9, %rbp
-	movq	%rbp, 72(%rdi)
-	adcq	%r12, %rcx
-	movq	%rcx, 80(%rdi)
-	adcq	%r15, %rax
-	movq	%rax, 88(%rdi)
-	adcq	%r11, %r13
-	movq	%r13, 96(%rdi)
-	adcq	-8(%rsp), %r14          ## 8-byte Folded Reload
-	movq	%r14, 104(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.p2align	4, 0x90
-l_mulPv512x64:                          ## @mulPv512x64
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	%rdx, %rcx
-	movq	%rcx, %rax
-	mulq	(%rsi)
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	movq	%rax, (%rdi)
-	movq	%rcx, %rax
-	mulq	56(%rsi)
-	movq	%rdx, %r10
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	48(%rsi)
-	movq	%rdx, %r11
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rax
-	mulq	40(%rsi)
-	movq	%rdx, %r12
-	movq	%rax, %r15
-	movq	%rcx, %rax
-	mulq	32(%rsi)
-	movq	%rdx, %rbx
-	movq	%rax, %r13
-	movq	%rcx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, %rbp
-	movq	%rax, %r8
-	movq	%rcx, %rax
-	mulq	16(%rsi)
-	movq	%rdx, %r9
-	movq	%rax, %r14
-	movq	%rcx, %rax
-	mulq	8(%rsi)
-	addq	-24(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 8(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 16(%rdi)
-	adcq	%r8, %r9
-	movq	%r9, 24(%rdi)
-	adcq	%r13, %rbp
-	movq	%rbp, 32(%rdi)
-	adcq	%r15, %rbx
-	movq	%rbx, 40(%rdi)
-	adcq	-16(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%r12, 48(%rdi)
-	adcq	-8(%rsp), %r11          ## 8-byte Folded Reload
-	movq	%r11, 56(%rdi)
-	adcq	$0, %r10
-	movq	%r10, 64(%rdi)
-	movq	%rdi, %rax
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mulUnitPre8L
-	.p2align	4, 0x90
-_mcl_fp_mulUnitPre8L:                   ## @mcl_fp_mulUnitPre8L
-## BB#0:
-	pushq	%rbx
-	subq	$80, %rsp
-	movq	%rdi, %rbx
-	leaq	8(%rsp), %rdi
-	callq	l_mulPv512x64
-	movq	72(%rsp), %r8
-	movq	64(%rsp), %r9
-	movq	56(%rsp), %r10
-	movq	48(%rsp), %r11
-	movq	40(%rsp), %rdi
-	movq	32(%rsp), %rax
-	movq	24(%rsp), %rcx
-	movq	8(%rsp), %rdx
-	movq	16(%rsp), %rsi
-	movq	%rdx, (%rbx)
-	movq	%rsi, 8(%rbx)
-	movq	%rcx, 16(%rbx)
-	movq	%rax, 24(%rbx)
-	movq	%rdi, 32(%rbx)
-	movq	%r11, 40(%rbx)
-	movq	%r10, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	movq	%r8, 64(%rbx)
-	addq	$80, %rsp
-	popq	%rbx
-	retq
-
-	.globl	_mcl_fpDbl_mulPre8L
-	.p2align	4, 0x90
-_mcl_fpDbl_mulPre8L:                    ## @mcl_fpDbl_mulPre8L
-## BB#0:
-	pushq	%rbp
-	movq	%rsp, %rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$200, %rsp
-	movq	%rdx, %r15
-	movq	%rsi, %rbx
-	movq	%rdi, %r14
-	callq	_mcl_fpDbl_mulPre4L
-	leaq	64(%r14), %rdi
-	leaq	32(%rbx), %rsi
-	leaq	32(%r15), %rdx
-	callq	_mcl_fpDbl_mulPre4L
-	movq	56(%rbx), %r10
-	movq	48(%rbx), %rdx
-	movq	(%rbx), %rsi
-	movq	8(%rbx), %rdi
-	addq	32(%rbx), %rsi
-	adcq	40(%rbx), %rdi
-	adcq	16(%rbx), %rdx
-	adcq	24(%rbx), %r10
-	pushfq
-	popq	%r8
-	xorl	%r9d, %r9d
-	movq	56(%r15), %rcx
-	movq	48(%r15), %r13
-	movq	(%r15), %r12
-	movq	8(%r15), %rbx
-	addq	32(%r15), %r12
-	adcq	40(%r15), %rbx
-	adcq	16(%r15), %r13
-	adcq	24(%r15), %rcx
-	movl	$0, %eax
-	cmovbq	%r10, %rax
-	movq	%rax, -88(%rbp)         ## 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rdx, %rax
-	movq	%rax, -80(%rbp)         ## 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rdi, %rax
-	movq	%rax, -72(%rbp)         ## 8-byte Spill
-	movl	$0, %eax
-	cmovbq	%rsi, %rax
-	movq	%rax, -64(%rbp)         ## 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%rsi, -168(%rbp)
-	movq	%rdi, -160(%rbp)
-	movq	%rdx, -152(%rbp)
-	movq	%r10, -144(%rbp)
-	movq	%r12, -136(%rbp)
-	movq	%rbx, -128(%rbp)
-	movq	%r13, -120(%rbp)
-	movq	%rcx, -112(%rbp)
-	pushq	%r8
-	popfq
-	cmovaeq	%r9, %rcx
-	movq	%rcx, -48(%rbp)         ## 8-byte Spill
-	cmovaeq	%r9, %r13
-	cmovaeq	%r9, %rbx
-	cmovaeq	%r9, %r12
-	sbbq	%rax, %rax
-	movq	%rax, -56(%rbp)         ## 8-byte Spill
-	leaq	-232(%rbp), %rdi
-	leaq	-168(%rbp), %rsi
-	leaq	-136(%rbp), %rdx
-	callq	_mcl_fpDbl_mulPre4L
-	addq	-64(%rbp), %r12         ## 8-byte Folded Reload
-	adcq	-72(%rbp), %rbx         ## 8-byte Folded Reload
-	adcq	-80(%rbp), %r13         ## 8-byte Folded Reload
-	movq	-48(%rbp), %r10         ## 8-byte Reload
-	adcq	-88(%rbp), %r10         ## 8-byte Folded Reload
-	sbbq	%rax, %rax
-	andl	$1, %eax
-	movq	-56(%rbp), %rdx         ## 8-byte Reload
-	andl	%edx, %r15d
-	andl	$1, %r15d
-	addq	-200(%rbp), %r12
-	adcq	-192(%rbp), %rbx
-	adcq	-184(%rbp), %r13
-	adcq	-176(%rbp), %r10
-	adcq	%rax, %r15
-	movq	-208(%rbp), %rax
-	movq	-216(%rbp), %rcx
-	movq	-232(%rbp), %rsi
-	movq	-224(%rbp), %rdx
-	subq	(%r14), %rsi
-	sbbq	8(%r14), %rdx
-	sbbq	16(%r14), %rcx
-	sbbq	24(%r14), %rax
-	movq	32(%r14), %rdi
-	movq	%rdi, -80(%rbp)         ## 8-byte Spill
-	movq	40(%r14), %r8
-	movq	%r8, -88(%rbp)          ## 8-byte Spill
-	sbbq	%rdi, %r12
-	sbbq	%r8, %rbx
-	movq	48(%r14), %rdi
-	movq	%rdi, -72(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %r13
-	movq	56(%r14), %rdi
-	movq	%rdi, -64(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %r10
-	sbbq	$0, %r15
-	movq	64(%r14), %r11
-	subq	%r11, %rsi
-	movq	72(%r14), %rdi
-	movq	%rdi, -56(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %rdx
-	movq	80(%r14), %rdi
-	movq	%rdi, -48(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %rcx
-	movq	88(%r14), %rdi
-	movq	%rdi, -104(%rbp)        ## 8-byte Spill
-	sbbq	%rdi, %rax
-	movq	96(%r14), %rdi
-	movq	%rdi, -96(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %r12
-	movq	104(%r14), %rdi
-	sbbq	%rdi, %rbx
-	movq	112(%r14), %r8
-	sbbq	%r8, %r13
-	movq	120(%r14), %r9
-	sbbq	%r9, %r10
-	sbbq	$0, %r15
-	addq	-80(%rbp), %rsi         ## 8-byte Folded Reload
-	adcq	-88(%rbp), %rdx         ## 8-byte Folded Reload
-	movq	%rsi, 32(%r14)
-	adcq	-72(%rbp), %rcx         ## 8-byte Folded Reload
-	movq	%rdx, 40(%r14)
-	adcq	-64(%rbp), %rax         ## 8-byte Folded Reload
-	movq	%rcx, 48(%r14)
-	adcq	%r11, %r12
-	movq	%rax, 56(%r14)
-	movq	%r12, 64(%r14)
-	adcq	-56(%rbp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, 72(%r14)
-	adcq	-48(%rbp), %r13         ## 8-byte Folded Reload
-	movq	%r13, 80(%r14)
-	adcq	-104(%rbp), %r10        ## 8-byte Folded Reload
-	movq	%r10, 88(%r14)
-	adcq	-96(%rbp), %r15         ## 8-byte Folded Reload
-	movq	%r15, 96(%r14)
-	adcq	$0, %rdi
-	movq	%rdi, 104(%r14)
-	adcq	$0, %r8
-	movq	%r8, 112(%r14)
-	adcq	$0, %r9
-	movq	%r9, 120(%r14)
-	addq	$200, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fpDbl_sqrPre8L
-	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre8L:                    ## @mcl_fpDbl_sqrPre8L
-## BB#0:
-	pushq	%rbp
-	movq	%rsp, %rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$200, %rsp
-	movq	%rsi, %rbx
-	movq	%rdi, %r14
-	movq	%rbx, %rdx
-	callq	_mcl_fpDbl_mulPre4L
-	leaq	64(%r14), %rdi
-	leaq	32(%rbx), %rsi
-	movq	%rsi, %rdx
-	callq	_mcl_fpDbl_mulPre4L
-	movq	56(%rbx), %r15
-	movq	48(%rbx), %rax
-	movq	(%rbx), %rcx
-	movq	8(%rbx), %rdx
-	addq	32(%rbx), %rcx
-	adcq	40(%rbx), %rdx
-	adcq	16(%rbx), %rax
-	adcq	24(%rbx), %r15
-	pushfq
-	popq	%r8
-	pushfq
-	popq	%r9
-	pushfq
-	popq	%r10
-	pushfq
-	popq	%rdi
-	pushfq
-	popq	%rbx
-	sbbq	%rsi, %rsi
-	movq	%rsi, -56(%rbp)         ## 8-byte Spill
-	leaq	(%rcx,%rcx), %rsi
-	xorl	%r11d, %r11d
-	pushq	%rbx
-	popfq
-	cmovaeq	%r11, %rsi
-	movq	%rsi, -48(%rbp)         ## 8-byte Spill
-	movq	%rdx, %r13
-	shldq	$1, %rcx, %r13
-	pushq	%rdi
-	popfq
-	cmovaeq	%r11, %r13
-	movq	%rax, %r12
-	shldq	$1, %rdx, %r12
-	pushq	%r10
-	popfq
-	cmovaeq	%r11, %r12
-	movq	%r15, %rbx
-	movq	%rcx, -168(%rbp)
-	movq	%rdx, -160(%rbp)
-	movq	%rax, -152(%rbp)
-	movq	%r15, -144(%rbp)
-	movq	%rcx, -136(%rbp)
-	movq	%rdx, -128(%rbp)
-	movq	%rax, -120(%rbp)
-	movq	%r15, -112(%rbp)
-	shldq	$1, %rax, %r15
-	pushq	%r9
-	popfq
-	cmovaeq	%r11, %r15
-	shrq	$63, %rbx
-	pushq	%r8
-	popfq
-	cmovaeq	%r11, %rbx
-	leaq	-232(%rbp), %rdi
-	leaq	-168(%rbp), %rsi
-	leaq	-136(%rbp), %rdx
-	callq	_mcl_fpDbl_mulPre4L
-	movq	-56(%rbp), %rax         ## 8-byte Reload
-	andl	$1, %eax
-	movq	-48(%rbp), %r10         ## 8-byte Reload
-	addq	-200(%rbp), %r10
-	adcq	-192(%rbp), %r13
-	adcq	-184(%rbp), %r12
-	adcq	-176(%rbp), %r15
-	adcq	%rbx, %rax
-	movq	%rax, %rbx
-	movq	-208(%rbp), %rax
-	movq	-216(%rbp), %rcx
-	movq	-232(%rbp), %rsi
-	movq	-224(%rbp), %rdx
-	subq	(%r14), %rsi
-	sbbq	8(%r14), %rdx
-	sbbq	16(%r14), %rcx
-	sbbq	24(%r14), %rax
-	movq	32(%r14), %r9
-	movq	%r9, -56(%rbp)          ## 8-byte Spill
-	movq	40(%r14), %r8
-	movq	%r8, -48(%rbp)          ## 8-byte Spill
-	sbbq	%r9, %r10
-	sbbq	%r8, %r13
-	movq	48(%r14), %rdi
-	movq	%rdi, -104(%rbp)        ## 8-byte Spill
-	sbbq	%rdi, %r12
-	movq	56(%r14), %rdi
-	movq	%rdi, -96(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %r15
-	sbbq	$0, %rbx
-	movq	64(%r14), %r11
-	subq	%r11, %rsi
-	movq	72(%r14), %rdi
-	movq	%rdi, -88(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %rdx
-	movq	80(%r14), %rdi
-	movq	%rdi, -80(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %rcx
-	movq	88(%r14), %rdi
-	movq	%rdi, -72(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %rax
-	movq	96(%r14), %rdi
-	movq	%rdi, -64(%rbp)         ## 8-byte Spill
-	sbbq	%rdi, %r10
-	movq	104(%r14), %rdi
-	sbbq	%rdi, %r13
-	movq	112(%r14), %r8
-	sbbq	%r8, %r12
-	movq	120(%r14), %r9
-	sbbq	%r9, %r15
-	sbbq	$0, %rbx
-	addq	-56(%rbp), %rsi         ## 8-byte Folded Reload
-	adcq	-48(%rbp), %rdx         ## 8-byte Folded Reload
-	movq	%rsi, 32(%r14)
-	adcq	-104(%rbp), %rcx        ## 8-byte Folded Reload
-	movq	%rdx, 40(%r14)
-	adcq	-96(%rbp), %rax         ## 8-byte Folded Reload
-	movq	%rcx, 48(%r14)
-	adcq	%r11, %r10
-	movq	%rax, 56(%r14)
-	movq	%r10, 64(%r14)
-	adcq	-88(%rbp), %r13         ## 8-byte Folded Reload
-	movq	%r13, 72(%r14)
-	adcq	-80(%rbp), %r12         ## 8-byte Folded Reload
-	movq	%r12, 80(%r14)
-	adcq	-72(%rbp), %r15         ## 8-byte Folded Reload
-	movq	%r15, 88(%r14)
-	movq	%rbx, %rax
-	adcq	-64(%rbp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 96(%r14)
-	adcq	$0, %rdi
-	movq	%rdi, 104(%r14)
-	adcq	$0, %r8
-	movq	%r8, 112(%r14)
-	adcq	$0, %r9
-	movq	%r9, 120(%r14)
-	addq	$200, %rsp
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_mont8L
-	.p2align	4, 0x90
-_mcl_fp_mont8L:                         ## @mcl_fp_mont8L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$1256, %rsp             ## imm = 0x4E8
-	movq	%rcx, %r13
-	movq	%rdx, 64(%rsp)          ## 8-byte Spill
-	movq	%rsi, 72(%rsp)          ## 8-byte Spill
-	movq	%rdi, 96(%rsp)          ## 8-byte Spill
-	movq	-8(%r13), %rbx
-	movq	%rbx, 80(%rsp)          ## 8-byte Spill
-	movq	%r13, 56(%rsp)          ## 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1184(%rsp), %rdi
-	callq	l_mulPv512x64
-	movq	1184(%rsp), %r15
-	movq	1192(%rsp), %r14
-	movq	%r15, %rdx
-	imulq	%rbx, %rdx
-	movq	1248(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	1240(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	1232(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	1224(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	1216(%rsp), %r12
-	movq	1208(%rsp), %rbx
-	movq	1200(%rsp), %rbp
-	leaq	1112(%rsp), %rdi
-	movq	%r13, %rsi
-	callq	l_mulPv512x64
-	addq	1112(%rsp), %r15
-	adcq	1120(%rsp), %r14
-	adcq	1128(%rsp), %rbp
-	movq	%rbp, 88(%rsp)          ## 8-byte Spill
-	adcq	1136(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	adcq	1144(%rsp), %r12
-	movq	%r12, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	1152(%rsp), %r13
-	movq	(%rsp), %rbx            ## 8-byte Reload
-	adcq	1160(%rsp), %rbx
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	adcq	1168(%rsp), %rbp
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	1176(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	sbbq	%r15, %r15
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1040(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %r15d
-	addq	1040(%rsp), %r14
-	movq	88(%rsp), %rax          ## 8-byte Reload
-	adcq	1048(%rsp), %rax
-	movq	%rax, 88(%rsp)          ## 8-byte Spill
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	1056(%rsp), %rax
-	movq	%rax, %r12
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	1064(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	adcq	1072(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	adcq	1080(%rsp), %rbx
-	movq	%rbx, (%rsp)            ## 8-byte Spill
-	adcq	1088(%rsp), %rbp
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	1096(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	1104(%rsp), %r15
-	movq	%r15, 48(%rsp)          ## 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%r14, %rdx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	968(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %r15d
-	addq	968(%rsp), %r14
-	movq	88(%rsp), %r13          ## 8-byte Reload
-	adcq	976(%rsp), %r13
-	adcq	984(%rsp), %r12
-	movq	%r12, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	992(%rsp), %r14
-	movq	16(%rsp), %rbx          ## 8-byte Reload
-	adcq	1000(%rsp), %rbx
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	1008(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	adcq	1016(%rsp), %rbp
-	movq	%rbp, %r12
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	1024(%rsp), %rbp
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	adcq	1032(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	adcq	$0, %r15
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	896(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	%r13, %rcx
-	addq	896(%rsp), %rcx
-	movq	32(%rsp), %r13          ## 8-byte Reload
-	adcq	904(%rsp), %r13
-	adcq	912(%rsp), %r14
-	adcq	920(%rsp), %rbx
-	movq	%rbx, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	928(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	adcq	936(%rsp), %r12
-	movq	%r12, 40(%rsp)          ## 8-byte Spill
-	adcq	944(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	952(%rsp), %r12
-	adcq	960(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	%rcx, %rdx
-	movq	%rcx, %rbp
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	824(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %ebx
-	addq	824(%rsp), %rbp
-	adcq	832(%rsp), %r13
-	movq	%r13, 32(%rsp)          ## 8-byte Spill
-	adcq	840(%rsp), %r14
-	movq	%r14, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	848(%rsp), %r13
-	movq	(%rsp), %rbp            ## 8-byte Reload
-	adcq	856(%rsp), %rbp
-	movq	40(%rsp), %r14          ## 8-byte Reload
-	adcq	864(%rsp), %r14
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	872(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	880(%rsp), %r12
-	adcq	888(%rsp), %r15
-	adcq	$0, %rbx
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	752(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	addq	752(%rsp), %rax
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	760(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	adcq	776(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	adcq	784(%rsp), %r14
-	movq	%r14, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	792(%rsp), %rbp
-	adcq	800(%rsp), %r12
-	adcq	808(%rsp), %r15
-	adcq	816(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	680(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	%r13, %rax
-	andl	$1, %eax
-	addq	680(%rsp), %rbx
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	688(%rsp), %r14
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	696(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	704(%rsp), %r13
-	movq	40(%rsp), %rbx          ## 8-byte Reload
-	adcq	712(%rsp), %rbx
-	adcq	720(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	%r12, %rbp
-	adcq	728(%rsp), %rbp
-	adcq	736(%rsp), %r15
-	movq	32(%rsp), %r12          ## 8-byte Reload
-	adcq	744(%rsp), %r12
-	adcq	$0, %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	608(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	%r14, %rax
-	addq	608(%rsp), %rax
-	movq	16(%rsp), %r14          ## 8-byte Reload
-	adcq	616(%rsp), %r14
-	adcq	624(%rsp), %r13
-	movq	%r13, (%rsp)            ## 8-byte Spill
-	adcq	632(%rsp), %rbx
-	movq	%rbx, %r13
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	640(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	648(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	656(%rsp), %r15
-	adcq	664(%rsp), %r12
-	movq	%r12, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	672(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	sbbq	%rbp, %rbp
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	536(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	%rbp, %rax
-	andl	$1, %eax
-	addq	536(%rsp), %rbx
-	adcq	544(%rsp), %r14
-	movq	%r14, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rbx            ## 8-byte Reload
-	adcq	552(%rsp), %rbx
-	adcq	560(%rsp), %r13
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	568(%rsp), %rbp
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	576(%rsp), %r12
-	adcq	584(%rsp), %r15
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	592(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	600(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	464(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	addq	464(%rsp), %rax
-	adcq	472(%rsp), %rbx
-	adcq	480(%rsp), %r13
-	movq	%r13, 40(%rsp)          ## 8-byte Spill
-	adcq	488(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	adcq	496(%rsp), %r12
-	adcq	504(%rsp), %r15
-	movq	%r15, 16(%rsp)          ## 8-byte Spill
-	movq	32(%rsp), %r15          ## 8-byte Reload
-	adcq	512(%rsp), %r15
-	adcq	520(%rsp), %r14
-	movq	%r14, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	528(%rsp), %r14
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
-	movq	%rax, %rbp
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	392(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	%r13, %rax
-	andl	$1, %eax
-	addq	392(%rsp), %rbp
-	adcq	400(%rsp), %rbx
-	movq	%rbx, (%rsp)            ## 8-byte Spill
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	adcq	408(%rsp), %rbp
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	416(%rsp), %rbx
-	adcq	424(%rsp), %r12
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	432(%rsp), %r13
-	adcq	440(%rsp), %r15
-	movq	%r15, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r15           ## 8-byte Reload
-	adcq	448(%rsp), %r15
-	adcq	456(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	320(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	(%rsp), %rax            ## 8-byte Reload
-	addq	320(%rsp), %rax
-	adcq	328(%rsp), %rbp
-	movq	%rbp, 40(%rsp)          ## 8-byte Spill
-	adcq	336(%rsp), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	movq	%r12, %rbp
-	adcq	344(%rsp), %rbp
-	adcq	352(%rsp), %r13
-	movq	32(%rsp), %r12          ## 8-byte Reload
-	adcq	360(%rsp), %r12
-	adcq	368(%rsp), %r15
-	movq	%r15, 8(%rsp)           ## 8-byte Spill
-	adcq	376(%rsp), %r14
-	movq	%r14, (%rsp)            ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	384(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	248(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %r15d
-	addq	248(%rsp), %rbx
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	256(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %r14          ## 8-byte Reload
-	adcq	264(%rsp), %r14
-	adcq	272(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	movq	%r13, %rbx
-	adcq	280(%rsp), %rbx
-	movq	%r12, %rbp
-	adcq	288(%rsp), %rbp
-	movq	8(%rsp), %r13           ## 8-byte Reload
-	adcq	296(%rsp), %r13
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	304(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	312(%rsp), %r12
-	adcq	$0, %r15
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	176(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	addq	176(%rsp), %rax
-	adcq	184(%rsp), %r14
-	movq	%r14, 24(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rcx          ## 8-byte Reload
-	adcq	192(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	adcq	200(%rsp), %rbx
-	movq	%rbx, 16(%rsp)          ## 8-byte Spill
-	adcq	208(%rsp), %rbp
-	adcq	216(%rsp), %r13
-	movq	%r13, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	224(%rsp), %r14
-	adcq	232(%rsp), %r12
-	adcq	240(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	80(%rsp), %rdx          ## 8-byte Reload
-	imulq	%rax, %rdx
-	movq	%rax, %r13
-	leaq	104(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %ebx
-	addq	104(%rsp), %r13
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	112(%rsp), %rcx
-	movq	48(%rsp), %rdx          ## 8-byte Reload
-	adcq	120(%rsp), %rdx
-	movq	16(%rsp), %rsi          ## 8-byte Reload
-	adcq	128(%rsp), %rsi
-	movq	%rbp, %rdi
-	adcq	136(%rsp), %rdi
-	movq	%rdi, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r8            ## 8-byte Reload
-	adcq	144(%rsp), %r8
-	movq	%r8, 8(%rsp)            ## 8-byte Spill
-	movq	%r14, %r9
-	adcq	152(%rsp), %r9
-	movq	%r9, (%rsp)             ## 8-byte Spill
-	adcq	160(%rsp), %r12
-	adcq	168(%rsp), %r15
-	adcq	$0, %rbx
-	movq	%rcx, %rax
-	movq	%rcx, %r11
-	movq	56(%rsp), %rbp          ## 8-byte Reload
-	subq	(%rbp), %rax
-	movq	%rdx, %rcx
-	movq	%rdx, %r14
-	sbbq	8(%rbp), %rcx
-	movq	%rsi, %rdx
-	movq	%rsi, %r13
-	sbbq	16(%rbp), %rdx
-	movq	%rdi, %rsi
-	sbbq	24(%rbp), %rsi
-	movq	%r8, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%r9, %r10
-	sbbq	40(%rbp), %r10
-	movq	%r12, %r8
-	sbbq	48(%rbp), %r8
-	movq	%r15, %r9
-	sbbq	56(%rbp), %r9
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%r15, %r9
-	testb	%bl, %bl
-	cmovneq	%r11, %rax
-	movq	96(%rsp), %rbx          ## 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovneq	%r14, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovneq	%r13, %rdx
-	movq	%rdx, 16(%rbx)
-	cmovneq	32(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovneq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 32(%rbx)
-	cmovneq	(%rsp), %r10            ## 8-byte Folded Reload
-	movq	%r10, 40(%rbx)
-	cmovneq	%r12, %r8
-	movq	%r8, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	addq	$1256, %rsp             ## imm = 0x4E8
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montNF8L
-	.p2align	4, 0x90
-_mcl_fp_montNF8L:                       ## @mcl_fp_montNF8L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$1240, %rsp             ## imm = 0x4D8
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	%rdx, 48(%rsp)          ## 8-byte Spill
-	movq	%rsi, 56(%rsp)          ## 8-byte Spill
-	movq	%rdi, 80(%rsp)          ## 8-byte Spill
-	movq	-8(%rcx), %rbx
-	movq	%rbx, 64(%rsp)          ## 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1168(%rsp), %rdi
-	callq	l_mulPv512x64
-	movq	1168(%rsp), %r15
-	movq	1176(%rsp), %r12
-	movq	%r15, %rdx
-	imulq	%rbx, %rdx
-	movq	1232(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	1224(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	1216(%rsp), %r13
-	movq	1208(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	1200(%rsp), %r14
-	movq	1192(%rsp), %rbp
-	movq	1184(%rsp), %rbx
-	leaq	1096(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	1096(%rsp), %r15
-	adcq	1104(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	adcq	1112(%rsp), %rbx
-	adcq	1120(%rsp), %rbp
-	adcq	1128(%rsp), %r14
-	movq	%r14, %r12
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	1136(%rsp), %r14
-	adcq	1144(%rsp), %r13
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	1152(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	1160(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1024(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	1088(%rsp), %r15
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	addq	1024(%rsp), %rax
-	adcq	1032(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          ## 8-byte Spill
-	movq	%rbp, %rbx
-	adcq	1040(%rsp), %rbx
-	adcq	1048(%rsp), %r12
-	adcq	1056(%rsp), %r14
-	movq	%r14, 8(%rsp)           ## 8-byte Spill
-	movq	%r13, %rbp
-	adcq	1064(%rsp), %rbp
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	adcq	1072(%rsp), %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %r14          ## 8-byte Reload
-	adcq	1080(%rsp), %r14
-	adcq	$0, %r15
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	952(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	952(%rsp), %r13
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	adcq	960(%rsp), %rax
-	movq	%rax, 72(%rsp)          ## 8-byte Spill
-	adcq	968(%rsp), %rbx
-	movq	%rbx, 16(%rsp)          ## 8-byte Spill
-	movq	%r12, %rbx
-	adcq	976(%rsp), %rbx
-	movq	8(%rsp), %r12           ## 8-byte Reload
-	adcq	984(%rsp), %r12
-	adcq	992(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	1000(%rsp), %r13
-	movq	%r14, %rbp
-	adcq	1008(%rsp), %rbp
-	adcq	1016(%rsp), %r15
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	880(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	944(%rsp), %r14
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	addq	880(%rsp), %rax
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	888(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	896(%rsp), %rbx
-	adcq	904(%rsp), %r12
-	movq	%r12, 8(%rsp)           ## 8-byte Spill
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	912(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	920(%rsp), %r13
-	movq	%r13, (%rsp)            ## 8-byte Spill
-	adcq	928(%rsp), %rbp
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	936(%rsp), %r15
-	adcq	$0, %r14
-	movq	%rax, %rdx
-	movq	%rax, %rbp
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	808(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	808(%rsp), %rbp
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	816(%rsp), %r13
-	movq	%rbx, %r12
-	adcq	824(%rsp), %r12
-	movq	8(%rsp), %rbx           ## 8-byte Reload
-	adcq	832(%rsp), %rbx
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	840(%rsp), %rbp
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	848(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	856(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	adcq	864(%rsp), %r15
-	adcq	872(%rsp), %r14
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	736(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	800(%rsp), %rax
-	movq	%r13, %rcx
-	addq	736(%rsp), %rcx
-	adcq	744(%rsp), %r12
-	movq	%r12, 24(%rsp)          ## 8-byte Spill
-	adcq	752(%rsp), %rbx
-	movq	%rbx, 8(%rsp)           ## 8-byte Spill
-	adcq	760(%rsp), %rbp
-	movq	%rbp, %r13
-	movq	(%rsp), %rbp            ## 8-byte Reload
-	adcq	768(%rsp), %rbp
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	adcq	776(%rsp), %rbx
-	adcq	784(%rsp), %r15
-	adcq	792(%rsp), %r14
-	adcq	$0, %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rdx
-	movq	%rcx, %r12
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	664(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	664(%rsp), %r12
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	672(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	680(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	adcq	688(%rsp), %r13
-	adcq	696(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	adcq	704(%rsp), %rbx
-	adcq	712(%rsp), %r15
-	adcq	720(%rsp), %r14
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	728(%rsp), %r12
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	592(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	656(%rsp), %rcx
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	addq	592(%rsp), %rax
-	movq	8(%rsp), %rbp           ## 8-byte Reload
-	adcq	600(%rsp), %rbp
-	adcq	608(%rsp), %r13
-	movq	%r13, 24(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	616(%rsp), %r13
-	adcq	624(%rsp), %rbx
-	adcq	632(%rsp), %r15
-	adcq	640(%rsp), %r14
-	adcq	648(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	520(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	520(%rsp), %r12
-	adcq	528(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           ## 8-byte Spill
-	movq	24(%rsp), %r12          ## 8-byte Reload
-	adcq	536(%rsp), %r12
-	movq	%r13, %rbp
-	adcq	544(%rsp), %rbp
-	adcq	552(%rsp), %rbx
-	adcq	560(%rsp), %r15
-	adcq	568(%rsp), %r14
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	576(%rsp), %r13
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	584(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	448(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	512(%rsp), %rcx
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	addq	448(%rsp), %rax
-	adcq	456(%rsp), %r12
-	movq	%r12, 24(%rsp)          ## 8-byte Spill
-	adcq	464(%rsp), %rbp
-	adcq	472(%rsp), %rbx
-	adcq	480(%rsp), %r15
-	adcq	488(%rsp), %r14
-	adcq	496(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	504(%rsp), %r13
-	adcq	$0, %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	376(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	376(%rsp), %r12
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	384(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	392(%rsp), %rbp
-	adcq	400(%rsp), %rbx
-	adcq	408(%rsp), %r15
-	adcq	416(%rsp), %r14
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	424(%rsp), %r12
-	adcq	432(%rsp), %r13
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	440(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	304(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	368(%rsp), %rcx
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	addq	304(%rsp), %rax
-	adcq	312(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	adcq	320(%rsp), %rbx
-	adcq	328(%rsp), %r15
-	adcq	336(%rsp), %r14
-	adcq	344(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	adcq	352(%rsp), %r13
-	movq	8(%rsp), %rbp           ## 8-byte Reload
-	adcq	360(%rsp), %rbp
-	adcq	$0, %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	64(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	232(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	232(%rsp), %r12
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	240(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	adcq	248(%rsp), %rbx
-	adcq	256(%rsp), %r15
-	adcq	264(%rsp), %r14
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	272(%rsp), %r12
-	adcq	280(%rsp), %r13
-	adcq	288(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           ## 8-byte Spill
-	movq	32(%rsp), %rbp          ## 8-byte Reload
-	adcq	296(%rsp), %rbp
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	160(%rsp), %rdi
-	movq	56(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	movq	224(%rsp), %rcx
-	movq	(%rsp), %rax            ## 8-byte Reload
-	addq	160(%rsp), %rax
-	adcq	168(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	adcq	176(%rsp), %r15
-	adcq	184(%rsp), %r14
-	adcq	192(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	adcq	200(%rsp), %r13
-	movq	8(%rsp), %rbx           ## 8-byte Reload
-	adcq	208(%rsp), %rbx
-	adcq	216(%rsp), %rbp
-	movq	%rbp, %r12
-	adcq	$0, %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	64(%rsp), %rdx          ## 8-byte Reload
-	imulq	%rax, %rdx
-	movq	%rax, %rbp
-	leaq	88(%rsp), %rdi
-	movq	40(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	88(%rsp), %rbp
-	movq	32(%rsp), %r11          ## 8-byte Reload
-	adcq	96(%rsp), %r11
-	adcq	104(%rsp), %r15
-	adcq	112(%rsp), %r14
-	movq	16(%rsp), %rsi          ## 8-byte Reload
-	adcq	120(%rsp), %rsi
-	movq	%rsi, 16(%rsp)          ## 8-byte Spill
-	adcq	128(%rsp), %r13
-	adcq	136(%rsp), %rbx
-	movq	%rbx, 8(%rsp)           ## 8-byte Spill
-	adcq	144(%rsp), %r12
-	movq	(%rsp), %r8             ## 8-byte Reload
-	adcq	152(%rsp), %r8
-	movq	%r11, %rax
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	subq	(%rbp), %rax
-	movq	%r15, %rcx
-	sbbq	8(%rbp), %rcx
-	movq	%r14, %rdx
-	sbbq	16(%rbp), %rdx
-	sbbq	24(%rbp), %rsi
-	movq	%r13, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%rbx, %r9
-	sbbq	40(%rbp), %r9
-	movq	%r12, %r10
-	sbbq	48(%rbp), %r10
-	movq	%rbp, %rbx
-	movq	%r8, %rbp
-	sbbq	56(%rbx), %rbp
-	testq	%rbp, %rbp
-	cmovsq	%r11, %rax
-	movq	80(%rsp), %rbx          ## 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovsq	%r15, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovsq	%r14, %rdx
-	movq	%rdx, 16(%rbx)
-	cmovsq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovsq	%r13, %rdi
-	movq	%rdi, 32(%rbx)
-	cmovsq	8(%rsp), %r9            ## 8-byte Folded Reload
-	movq	%r9, 40(%rbx)
-	cmovsq	%r12, %r10
-	movq	%r10, 48(%rbx)
-	cmovsq	%r8, %rbp
-	movq	%rbp, 56(%rbx)
-	addq	$1240, %rsp             ## imm = 0x4D8
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_montRed8L
-	.p2align	4, 0x90
-_mcl_fp_montRed8L:                      ## @mcl_fp_montRed8L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	subq	$776, %rsp              ## imm = 0x308
-	movq	%rdx, %rax
-	movq	%rdi, 192(%rsp)         ## 8-byte Spill
-	movq	-8(%rax), %rcx
-	movq	%rcx, 104(%rsp)         ## 8-byte Spill
-	movq	(%rsi), %r15
-	movq	8(%rsi), %rdx
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	%r15, %rdx
-	imulq	%rcx, %rdx
-	movq	120(%rsi), %rcx
-	movq	%rcx, 112(%rsp)         ## 8-byte Spill
-	movq	112(%rsi), %rcx
-	movq	%rcx, 56(%rsp)          ## 8-byte Spill
-	movq	104(%rsi), %rcx
-	movq	%rcx, 96(%rsp)          ## 8-byte Spill
-	movq	96(%rsi), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	88(%rsi), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	movq	80(%rsi), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	72(%rsi), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	64(%rsi), %r13
-	movq	56(%rsi), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	movq	48(%rsi), %r14
-	movq	40(%rsi), %rcx
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	movq	32(%rsi), %r12
-	movq	24(%rsi), %rbx
-	movq	16(%rsi), %rbp
-	movq	%rax, %rcx
-	movq	(%rcx), %rax
-	movq	%rax, 136(%rsp)         ## 8-byte Spill
-	movq	56(%rcx), %rax
-	movq	%rax, 184(%rsp)         ## 8-byte Spill
-	movq	48(%rcx), %rax
-	movq	%rax, 176(%rsp)         ## 8-byte Spill
-	movq	40(%rcx), %rax
-	movq	%rax, 168(%rsp)         ## 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, 160(%rsp)         ## 8-byte Spill
-	movq	24(%rcx), %rax
-	movq	%rax, 152(%rsp)         ## 8-byte Spill
-	movq	16(%rcx), %rax
-	movq	%rax, 144(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, 128(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rsi
-	movq	%rsi, 88(%rsp)          ## 8-byte Spill
-	leaq	704(%rsp), %rdi
-	callq	l_mulPv512x64
-	addq	704(%rsp), %r15
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	712(%rsp), %rcx
-	adcq	720(%rsp), %rbp
-	movq	%rbp, 80(%rsp)          ## 8-byte Spill
-	adcq	728(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	adcq	736(%rsp), %r12
-	movq	%r12, 120(%rsp)         ## 8-byte Spill
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	adcq	744(%rsp), %rax
-	movq	%rax, 72(%rsp)          ## 8-byte Spill
-	adcq	752(%rsp), %r14
-	movq	%r14, %r12
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	adcq	760(%rsp), %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, 16(%rsp)            ## 8-byte Folded Spill
-	movq	40(%rsp), %r15          ## 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 24(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 48(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 96(%rsp)            ## 8-byte Folded Spill
-	movq	56(%rsp), %r13          ## 8-byte Reload
-	adcq	$0, %r13
-	movq	112(%rsp), %r14         ## 8-byte Reload
-	adcq	$0, %r14
-	sbbq	%rbx, %rbx
-	movq	%rcx, %rbp
-	movq	%rbp, %rdx
-	imulq	104(%rsp), %rdx         ## 8-byte Folded Reload
-	leaq	632(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	632(%rsp), %rbp
-	movq	80(%rsp), %rsi          ## 8-byte Reload
-	adcq	640(%rsp), %rsi
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	648(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	120(%rsp), %rcx         ## 8-byte Reload
-	adcq	656(%rsp), %rcx
-	movq	%rcx, 120(%rsp)         ## 8-byte Spill
-	movq	72(%rsp), %rcx          ## 8-byte Reload
-	adcq	664(%rsp), %rcx
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	adcq	672(%rsp), %r12
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	adcq	680(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	688(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	696(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	$0, %rbx
-	movq	48(%rsp), %r15          ## 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 96(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %r13
-	movq	%r13, 56(%rsp)          ## 8-byte Spill
-	adcq	$0, %r14
-	movq	%r14, 112(%rsp)         ## 8-byte Spill
-	movq	%rax, %rbp
-	adcq	$0, %rbp
-	movq	%rsi, %rdx
-	movq	%rsi, %r14
-	imulq	104(%rsp), %rdx         ## 8-byte Folded Reload
-	leaq	560(%rsp), %rdi
-	movq	88(%rsp), %r13          ## 8-byte Reload
-	movq	%r13, %rsi
-	callq	l_mulPv512x64
-	addq	560(%rsp), %r14
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	568(%rsp), %rcx
-	movq	120(%rsp), %rax         ## 8-byte Reload
-	adcq	576(%rsp), %rax
-	movq	%rax, 120(%rsp)         ## 8-byte Spill
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	adcq	584(%rsp), %rax
-	movq	%rax, 72(%rsp)          ## 8-byte Spill
-	adcq	592(%rsp), %r12
-	movq	%r12, 32(%rsp)          ## 8-byte Spill
-	movq	64(%rsp), %r14          ## 8-byte Reload
-	adcq	600(%rsp), %r14
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	608(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	adcq	616(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	624(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	$0, %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r15
-	movq	%r15, 48(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rbx          ## 8-byte Reload
-	adcq	$0, %rbx
-	movq	56(%rsp), %r15          ## 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 112(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, %rbp
-	movq	%rbp, 80(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rbp
-	movq	%rbp, %rdx
-	movq	104(%rsp), %r12         ## 8-byte Reload
-	imulq	%r12, %rdx
-	leaq	488(%rsp), %rdi
-	movq	%r13, %rsi
-	callq	l_mulPv512x64
-	addq	488(%rsp), %rbp
-	movq	120(%rsp), %rax         ## 8-byte Reload
-	adcq	496(%rsp), %rax
-	movq	72(%rsp), %rbp          ## 8-byte Reload
-	adcq	504(%rsp), %rbp
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	512(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	adcq	520(%rsp), %r14
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	528(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	536(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %r13          ## 8-byte Reload
-	adcq	544(%rsp), %r13
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	552(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, 48(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 96(%rsp)          ## 8-byte Spill
-	movq	%r15, %rbx
-	adcq	$0, %rbx
-	adcq	$0, 112(%rsp)           ## 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	%r12, %rdx
-	leaq	416(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	416(%rsp), %r15
-	adcq	424(%rsp), %rbp
-	movq	%rbp, %rax
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	%r14, %r12
-	adcq	440(%rsp), %r12
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	448(%rsp), %r14
-	movq	16(%rsp), %rbp          ## 8-byte Reload
-	adcq	456(%rsp), %rbp
-	adcq	464(%rsp), %r13
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	472(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rcx          ## 8-byte Reload
-	adcq	480(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	adcq	$0, 96(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 56(%rsp)          ## 8-byte Spill
-	movq	112(%rsp), %r15         ## 8-byte Reload
-	adcq	$0, %r15
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	104(%rsp), %rdx         ## 8-byte Folded Reload
-	leaq	344(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	344(%rsp), %rbx
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	352(%rsp), %rax
-	adcq	360(%rsp), %r12
-	movq	%r12, 64(%rsp)          ## 8-byte Spill
-	adcq	368(%rsp), %r14
-	movq	%r14, 8(%rsp)           ## 8-byte Spill
-	adcq	376(%rsp), %rbp
-	movq	%rbp, 16(%rsp)          ## 8-byte Spill
-	adcq	384(%rsp), %r13
-	movq	%r13, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %r13          ## 8-byte Reload
-	adcq	392(%rsp), %r13
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	400(%rsp), %r12
-	movq	96(%rsp), %r14          ## 8-byte Reload
-	adcq	408(%rsp), %r14
-	movq	56(%rsp), %rbp          ## 8-byte Reload
-	adcq	$0, %rbp
-	movq	%r15, %rbx
-	adcq	$0, %rbx
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	104(%rsp), %rdx         ## 8-byte Folded Reload
-	leaq	272(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	272(%rsp), %r15
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	adcq	280(%rsp), %rcx
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	288(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	adcq	296(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	304(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	312(%rsp), %r13
-	movq	%r13, 24(%rsp)          ## 8-byte Spill
-	adcq	320(%rsp), %r12
-	movq	%r12, 48(%rsp)          ## 8-byte Spill
-	adcq	328(%rsp), %r14
-	movq	%r14, %r13
-	adcq	336(%rsp), %rbp
-	movq	%rbp, %r12
-	adcq	$0, %rbx
+	movq	%rdi, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r10
+	movq	%rdi, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, %rbx
+	addq	%r8, %rbx
+	adcq	%r9, %rdi
+	adcq	-56(%rsp), %rbp                 ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r12                ## 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                ## 8-byte Reload
+	adcq	-104(%rsp), %rdx                ## 8-byte Folded Reload
+	movzbl	-128(%rsp), %eax                ## 1-byte Folded Reload
+	movq	-96(%rsp), %r8                  ## 8-byte Reload
+	adcq	%rax, %r8
+	addq	%rsi, %r10
+	adcq	%rcx, %rbx
+	adcq	%r13, %rdi
+	adcq	%r15, %rbp
+	adcq	%r11, %r12
+	adcq	-88(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	-32(%rsp), %rax                 ## 8-byte Reload
+	adcq	72(%rax), %r8
+	movq	%r8, -96(%rsp)                  ## 8-byte Spill
+	setb	-104(%rsp)                      ## 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	movq	-40(%rsp), %r9                  ## 8-byte Reload
+	mulq	%r9
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r14
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %rsi
+	addq	%r10, %rsi
+	adcq	%r13, %r8
+	adcq	-8(%rsp), %r11                  ## 8-byte Folded Reload
+	adcq	-56(%rsp), %r15                 ## 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                ## 8-byte Reload
+	adcq	-112(%rsp), %rdx                ## 8-byte Folded Reload
+	movzbl	-104(%rsp), %eax                ## 1-byte Folded Reload
+	movq	-88(%rsp), %rcx                 ## 8-byte Reload
+	adcq	%rax, %rcx
+	addq	%rbx, %r14
+	adcq	%rdi, %rsi
+	adcq	%rbp, %r8
+	adcq	%r12, %r11
+	adcq	-120(%rsp), %r15                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	-32(%rsp), %rax                 ## 8-byte Reload
+	adcq	80(%rax), %rcx
+	movq	%rcx, -88(%rsp)                 ## 8-byte Spill
+	setb	-120(%rsp)                      ## 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%rsi, %rcx
+	movq	%rcx, %rax
+	mulq	%r9
+	movq	%rdx, -80(%rsp)                 ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	movq	-16(%rsp), %r13                 ## 8-byte Reload
+	mulq	%r13
+	movq	%rdx, %r14
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r10
+	movq	%rcx, %rax
+	movq	-24(%rsp), %r12                 ## 8-byte Reload
+	mulq	%r12
+	addq	%r14, %rax
+	adcq	%r10, %rdx
+	adcq	-112(%rsp), %rbx                ## 8-byte Folded Reload
+	adcq	-104(%rsp), %rbp                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %rdi                 ## 8-byte Folded Reload
+	movzbl	-120(%rsp), %r10d               ## 1-byte Folded Reload
+	adcq	-80(%rsp), %r10                 ## 8-byte Folded Reload
+	addq	%rsi, %r9
+	adcq	%r8, %rax
+	adcq	%r11, %rdx
+	adcq	%r15, %rbx
+	adcq	-128(%rsp), %rbp                ## 8-byte Folded Reload
+	adcq	-88(%rsp), %rdi                 ## 8-byte Folded Reload
+	movq	-32(%rsp), %rcx                 ## 8-byte Reload
+	adcq	88(%rcx), %r10
+	xorl	%r8d, %r8d
+	movq	%rax, %r9
+	subq	%r13, %r9
+	movq	%rdx, %r11
+	sbbq	%r12, %r11
 	movq	%rbx, %r14
-	movq	80(%rsp), %r15          ## 8-byte Reload
-	adcq	$0, %r15
-	movq	104(%rsp), %rdx         ## 8-byte Reload
-	movq	%rcx, %rbx
-	imulq	%rbx, %rdx
-	leaq	200(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv512x64
-	addq	200(%rsp), %rbx
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	208(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %r8           ## 8-byte Reload
-	adcq	216(%rsp), %r8
-	movq	%r8, 16(%rsp)           ## 8-byte Spill
-	movq	40(%rsp), %rdx          ## 8-byte Reload
-	adcq	224(%rsp), %rdx
-	movq	24(%rsp), %rsi          ## 8-byte Reload
-	adcq	232(%rsp), %rsi
-	movq	48(%rsp), %rdi          ## 8-byte Reload
-	adcq	240(%rsp), %rdi
-	movq	%r13, %rbp
-	adcq	248(%rsp), %rbp
-	movq	%r12, %rbx
-	adcq	256(%rsp), %rbx
-	movq	%rbx, 56(%rsp)          ## 8-byte Spill
-	movq	%r14, %r9
-	adcq	264(%rsp), %r9
-	adcq	$0, %r15
-	movq	%r15, %r10
-	subq	136(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%r8, %rcx
-	sbbq	128(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rdx, %r13
-	sbbq	144(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%rsi, %r12
-	sbbq	152(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%rdi, %r14
-	sbbq	160(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%rbp, %r11
-	sbbq	168(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%rbx, %r8
-	sbbq	176(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r9, %r15
-	sbbq	184(%rsp), %r9          ## 8-byte Folded Reload
-	sbbq	$0, %r10
-	andl	$1, %r10d
-	cmovneq	%r15, %r9
-	testb	%r10b, %r10b
-	cmovneq	8(%rsp), %rax           ## 8-byte Folded Reload
-	movq	192(%rsp), %rbx         ## 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovneq	16(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 8(%rbx)
-	cmovneq	%rdx, %r13
-	movq	%r13, 16(%rbx)
-	cmovneq	%rsi, %r12
-	movq	%r12, 24(%rbx)
-	cmovneq	%rdi, %r14
-	movq	%r14, 32(%rbx)
-	cmovneq	%rbp, %r11
-	movq	%r11, 40(%rbx)
-	cmovneq	56(%rsp), %r8           ## 8-byte Folded Reload
-	movq	%r8, 48(%rbx)
-	movq	%r9, 56(%rbx)
-	addq	$776, %rsp              ## imm = 0x308
+	sbbq	-48(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	%rbp, %r15
+	sbbq	-72(%rsp), %r15                 ## 8-byte Folded Reload
+	movq	%rdi, %r12
+	sbbq	-64(%rsp), %r12                 ## 8-byte Folded Reload
+	movq	%r10, %rcx
+	sbbq	-40(%rsp), %rcx                 ## 8-byte Folded Reload
+	sbbq	%r8, %r8
+	testb	$1, %r8b
+	cmovneq	%r10, %rcx
+	movq	(%rsp), %rsi                    ## 8-byte Reload
+	movq	%rcx, 40(%rsi)
+	cmovneq	%rdi, %r12
+	movq	%r12, 32(%rsi)
+	cmovneq	%rbp, %r15
+	movq	%r15, 24(%rsi)
+	cmovneq	%rbx, %r14
+	movq	%r14, 16(%rsi)
+	cmovneq	%rdx, %r11
+	movq	%r11, 8(%rsi)
+	cmovneq	%rax, %r9
+	movq	%r9, (%rsi)
+	addq	$8, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -12709,547 +4831,682 @@ _mcl_fp_montRed8L:                      ## @mcl_fp_montRed8L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_addPre8L
+                                        ## -- End function
+	.globl	_mcl_fp_montRedNF6L             ## -- Begin function mcl_fp_montRedNF6L
 	.p2align	4, 0x90
-_mcl_fp_addPre8L:                       ## @mcl_fp_addPre8L
-## BB#0:
+_mcl_fp_montRedNF6L:                    ## @mcl_fp_montRedNF6L
+## %bb.0:
+	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	56(%rsi), %r15
-	movq	48(%rdx), %r9
-	movq	48(%rsi), %r12
-	movq	40(%rdx), %r10
-	movq	32(%rdx), %r11
-	movq	24(%rdx), %r14
-	movq	16(%rdx), %rbx
-	movq	(%rdx), %rcx
-	movq	8(%rdx), %rdx
-	addq	(%rsi), %rcx
-	adcq	8(%rsi), %rdx
+	pushq	%rax
+	movq	%rdx, %rcx
+	movq	%rdi, (%rsp)                    ## 8-byte Spill
+	movq	-8(%rdx), %rax
+	movq	%rax, -80(%rsp)                 ## 8-byte Spill
+	movq	(%rsi), %r9
+	movq	%r9, %rdi
+	imulq	%rax, %rdi
+	movq	40(%rdx), %rdx
+	movq	%rdx, -40(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, -128(%rsp)                ## 8-byte Spill
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	32(%rcx), %rdx
+	movq	%rdx, -64(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r10
+	movq	%rdx, %r12
+	movq	24(%rcx), %rdx
+	movq	%rdx, -72(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r14
+	movq	%rdx, %r15
+	movq	16(%rcx), %rdx
+	movq	%rdx, -48(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rdx
+	movq	%rax, %r11
+	movq	%rdx, %r13
+	movq	(%rcx), %r8
+	movq	8(%rcx), %rcx
+	movq	%rcx, -24(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	%rcx
+	movq	%rdx, %rbx
+	movq	%rax, %rbp
+	movq	%rdi, %rax
+	mulq	%r8
+	movq	%r8, %rdi
+	movq	%r8, -16(%rsp)                  ## 8-byte Spill
+	movq	%rdx, %rcx
+	addq	%rbp, %rcx
+	adcq	%r11, %rbx
+	adcq	%r14, %r13
+	adcq	%r10, %r15
+	adcq	-128(%rsp), %r12                ## 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                ## 8-byte Reload
+	adcq	$0, %rdx
+	addq	%r9, %rax
+	movq	%rsi, -32(%rsp)                 ## 8-byte Spill
+	adcq	8(%rsi), %rcx
 	adcq	16(%rsi), %rbx
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %rax
-	movq	32(%rsi), %rsi
-	movq	%rcx, (%rdi)
-	movq	%rdx, 8(%rdi)
-	movq	%rbx, 16(%rdi)
-	adcq	%r14, %rax
-	movq	%rax, 24(%rdi)
-	adcq	%r11, %rsi
-	movq	%rsi, 32(%rdi)
-	adcq	%r10, %r13
-	movq	%r13, 40(%rdi)
-	adcq	%r9, %r12
-	movq	%r12, 48(%rdi)
-	adcq	%r8, %r15
-	movq	%r15, 56(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+	adcq	24(%rsi), %r13
+	adcq	32(%rsi), %r15
+	adcq	40(%rsi), %r12
+	movq	%r12, -88(%rsp)                 ## 8-byte Spill
+	adcq	48(%rsi), %rdx
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	setb	-96(%rsp)                       ## 1-byte Folded Spill
+	movq	-80(%rsp), %rsi                 ## 8-byte Reload
+	imulq	%rcx, %rsi
+	movq	%rsi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r14
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	%rsi, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r9
+	movq	%rsi, %rax
+	mulq	%rdi
+	movq	%rdx, %r10
+	movq	%rax, %r11
+	movq	%rsi, %rax
+	movq	-24(%rsp), %rsi                 ## 8-byte Reload
+	mulq	%rsi
+	movq	%rdx, %rbp
+	movq	%rax, %rdi
+	addq	%r10, %rdi
+	adcq	%r9, %rbp
+	adcq	-56(%rsp), %r8                  ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r12                ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r14                ## 8-byte Folded Reload
+	movzbl	-96(%rsp), %eax                 ## 1-byte Folded Reload
+	movq	-128(%rsp), %rdx                ## 8-byte Reload
+	adcq	%rax, %rdx
+	addq	%rcx, %r11
+	adcq	%rbx, %rdi
+	adcq	%r13, %rbp
+	adcq	%r15, %r8
+	adcq	-88(%rsp), %r12                 ## 8-byte Folded Reload
+	adcq	-120(%rsp), %r14                ## 8-byte Folded Reload
+	movq	-32(%rsp), %rax                 ## 8-byte Reload
+	adcq	56(%rax), %rdx
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	setb	-120(%rsp)                      ## 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%rdi, %rcx
+	movq	%rcx, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r11
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r13
+	movq	%rax, %rbx
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	%rsi
+	movq	%rdx, %rcx
+	movq	%rax, %rsi
+	addq	%r10, %rsi
+	adcq	%rbx, %rcx
+	adcq	-112(%rsp), %r13                ## 8-byte Folded Reload
+	adcq	-104(%rsp), %r15                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %r11                 ## 8-byte Folded Reload
+	movzbl	-120(%rsp), %eax                ## 1-byte Folded Reload
+	movq	-88(%rsp), %rdx                 ## 8-byte Reload
+	adcq	%rax, %rdx
+	addq	%rdi, %r9
+	adcq	%rbp, %rsi
+	adcq	%r8, %rcx
+	adcq	%r12, %r13
+	adcq	%r14, %r15
+	adcq	-128(%rsp), %r11                ## 8-byte Folded Reload
+	movq	-32(%rsp), %rax                 ## 8-byte Reload
+	adcq	64(%rax), %rdx
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	setb	-128(%rsp)                      ## 1-byte Folded Spill
+	movq	-80(%rsp), %rdi                 ## 8-byte Reload
+	imulq	%rsi, %rdi
+	movq	%rdi, %rax
+	mulq	-40(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -96(%rsp)                 ## 8-byte Spill
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%rdi, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r12
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	%rdi, %rax
+	movq	-48(%rsp), %r14                 ## 8-byte Reload
+	mulq	%r14
+	movq	%rdx, %rbp
+	movq	%rax, %r9
+	movq	%rdi, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %r10
+	movq	%rdi, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, %rbx
+	addq	%r8, %rbx
+	adcq	%r9, %rdi
+	adcq	-56(%rsp), %rbp                 ## 8-byte Folded Reload
+	adcq	-112(%rsp), %r12                ## 8-byte Folded Reload
+	movq	-120(%rsp), %rdx                ## 8-byte Reload
+	adcq	-104(%rsp), %rdx                ## 8-byte Folded Reload
+	movzbl	-128(%rsp), %eax                ## 1-byte Folded Reload
+	movq	-96(%rsp), %r8                  ## 8-byte Reload
+	adcq	%rax, %r8
+	addq	%rsi, %r10
+	adcq	%rcx, %rbx
+	adcq	%r13, %rdi
+	adcq	%r15, %rbp
+	adcq	%r11, %r12
+	adcq	-88(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	%rdx, -120(%rsp)                ## 8-byte Spill
+	movq	-32(%rsp), %rax                 ## 8-byte Reload
+	adcq	72(%rax), %r8
+	movq	%r8, -96(%rsp)                  ## 8-byte Spill
+	setb	-104(%rsp)                      ## 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%rbx, %rcx
+	movq	%rcx, %rax
+	movq	-40(%rsp), %r9                  ## 8-byte Reload
+	mulq	%r9
+	movq	%rdx, -88(%rsp)                 ## 8-byte Spill
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r15
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	%r14
+	movq	%rdx, %r11
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	-16(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r10
+	movq	%rax, %r14
+	movq	%rcx, %rax
+	mulq	-24(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %r8
+	movq	%rax, %rsi
+	addq	%r10, %rsi
+	adcq	%r13, %r8
+	adcq	-8(%rsp), %r11                  ## 8-byte Folded Reload
+	adcq	-56(%rsp), %r15                 ## 8-byte Folded Reload
+	movq	-128(%rsp), %rdx                ## 8-byte Reload
+	adcq	-112(%rsp), %rdx                ## 8-byte Folded Reload
+	movzbl	-104(%rsp), %eax                ## 1-byte Folded Reload
+	movq	-88(%rsp), %rcx                 ## 8-byte Reload
+	adcq	%rax, %rcx
+	addq	%rbx, %r14
+	adcq	%rdi, %rsi
+	adcq	%rbp, %r8
+	adcq	%r12, %r11
+	adcq	-120(%rsp), %r15                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	%rdx, -128(%rsp)                ## 8-byte Spill
+	movq	-32(%rsp), %rax                 ## 8-byte Reload
+	adcq	80(%rax), %rcx
+	movq	%rcx, -88(%rsp)                 ## 8-byte Spill
+	setb	-120(%rsp)                      ## 1-byte Folded Spill
+	movq	-80(%rsp), %rcx                 ## 8-byte Reload
+	imulq	%rsi, %rcx
+	movq	%rcx, %rax
+	mulq	%r9
+	movq	%rdx, -80(%rsp)                 ## 8-byte Spill
+	movq	%rax, -96(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-64(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rdi
+	movq	%rax, -104(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	mulq	-72(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbp
+	movq	%rax, -112(%rsp)                ## 8-byte Spill
+	movq	%rcx, %rax
+	movq	-16(%rsp), %r13                 ## 8-byte Reload
+	mulq	%r13
+	movq	%rdx, %r14
+	movq	%rax, %r9
+	movq	%rcx, %rax
+	mulq	-48(%rsp)                       ## 8-byte Folded Reload
+	movq	%rdx, %rbx
+	movq	%rax, %r10
+	movq	%rcx, %rax
+	movq	-24(%rsp), %r12                 ## 8-byte Reload
+	mulq	%r12
+	addq	%r14, %rax
+	adcq	%r10, %rdx
+	adcq	-112(%rsp), %rbx                ## 8-byte Folded Reload
+	adcq	-104(%rsp), %rbp                ## 8-byte Folded Reload
+	adcq	-96(%rsp), %rdi                 ## 8-byte Folded Reload
+	movzbl	-120(%rsp), %r10d               ## 1-byte Folded Reload
+	adcq	-80(%rsp), %r10                 ## 8-byte Folded Reload
+	addq	%rsi, %r9
+	adcq	%r8, %rax
+	adcq	%r11, %rdx
+	adcq	%r15, %rbx
+	adcq	-128(%rsp), %rbp                ## 8-byte Folded Reload
+	adcq	-88(%rsp), %rdi                 ## 8-byte Folded Reload
+	movq	-32(%rsp), %rcx                 ## 8-byte Reload
+	adcq	88(%rcx), %r10
+	movq	%rax, %r8
+	subq	%r13, %r8
+	movq	%rdx, %r9
+	sbbq	%r12, %r9
+	movq	%rbx, %r11
+	sbbq	-48(%rsp), %r11                 ## 8-byte Folded Reload
+	movq	%rbp, %r14
+	sbbq	-72(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	%rdi, %r15
+	sbbq	-64(%rsp), %r15                 ## 8-byte Folded Reload
+	movq	%r10, %rcx
+	sbbq	-40(%rsp), %rcx                 ## 8-byte Folded Reload
+	movq	%rcx, %rsi
+	sarq	$63, %rsi
+	cmovsq	%r10, %rcx
+	movq	(%rsp), %rsi                    ## 8-byte Reload
+	movq	%rcx, 40(%rsi)
+	cmovsq	%rdi, %r15
+	movq	%r15, 32(%rsi)
+	cmovsq	%rbp, %r14
+	movq	%r14, 24(%rsi)
+	cmovsq	%rbx, %r11
+	movq	%r11, 16(%rsi)
+	cmovsq	%rdx, %r9
+	movq	%r9, 8(%rsi)
+	cmovsq	%rax, %r8
+	movq	%r8, (%rsi)
+	addq	$8, %rsp
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
+	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_subPre8L
+                                        ## -- End function
+	.globl	_mcl_fp_addPre6L                ## -- Begin function mcl_fp_addPre6L
 	.p2align	4, 0x90
-_mcl_fp_subPre8L:                       ## @mcl_fp_subPre8L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	56(%rsi), %r15
-	movq	48(%rdx), %r9
-	movq	40(%rdx), %r10
-	movq	24(%rdx), %r11
-	movq	32(%rdx), %r14
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %r12
+_mcl_fp_addPre6L:                       ## @mcl_fp_addPre6L
+## %bb.0:
+	movq	40(%rsi), %rax
+	movq	32(%rsi), %rcx
+	movq	24(%rsi), %r8
+	movq	16(%rsi), %r9
+	movq	(%rsi), %r10
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %r10
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r9
+	adcq	24(%rdx), %r8
+	adcq	32(%rdx), %rcx
+	adcq	40(%rdx), %rax
+	movq	%rax, 40(%rdi)
+	movq	%rcx, 32(%rdi)
+	movq	%r8, 24(%rdi)
+	movq	%r9, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r10, (%rdi)
+	setb	%al
+	movzbl	%al, %eax
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subPre6L                ## -- Begin function mcl_fp_subPre6L
+	.p2align	4, 0x90
+_mcl_fp_subPre6L:                       ## @mcl_fp_subPre6L
+## %bb.0:
+	movq	40(%rsi), %rcx
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r11
+	movq	8(%rsi), %rsi
 	xorl	%eax, %eax
-	subq	(%rdx), %rbx
-	sbbq	8(%rdx), %r12
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	48(%rsi), %r13
-	movq	40(%rsi), %rdx
-	movq	32(%rsi), %rbp
-	movq	24(%rsi), %rsi
-	movq	%rbx, (%rdi)
-	movq	%r12, 8(%rdi)
-	movq	%rcx, 16(%rdi)
-	sbbq	%r11, %rsi
-	movq	%rsi, 24(%rdi)
-	sbbq	%r14, %rbp
-	movq	%rbp, 32(%rdi)
-	sbbq	%r10, %rdx
-	movq	%rdx, 40(%rdi)
-	sbbq	%r9, %r13
-	movq	%r13, 48(%rdi)
-	sbbq	%r8, %r15
-	movq	%r15, 56(%rdi)
-	sbbq	$0, %rax
+	subq	(%rdx), %r11
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %r10
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r8
+	sbbq	40(%rdx), %rcx
+	movq	%rcx, 40(%rdi)
+	movq	%r8, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r10, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_shr1_8L
+                                        ## -- End function
+	.globl	_mcl_fp_shr1_6L                 ## -- Begin function mcl_fp_shr1_6L
 	.p2align	4, 0x90
-_mcl_fp_shr1_8L:                        ## @mcl_fp_shr1_8L
-## BB#0:
-	movq	56(%rsi), %r8
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %r10
-	movq	32(%rsi), %r11
+_mcl_fp_shr1_6L:                        ## @mcl_fp_shr1_6L
+## %bb.0:
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %r10
 	movq	24(%rsi), %rcx
-	movq	16(%rsi), %rdx
-	movq	(%rsi), %rax
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rax
-	movq	%rax, (%rdi)
-	shrdq	$1, %rdx, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 16(%rdi)
-	shrdq	$1, %r11, %rcx
-	movq	%rcx, 24(%rdi)
-	shrdq	$1, %r10, %r11
-	movq	%r11, 32(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 40(%rdi)
+	movq	32(%rsi), %rax
+	movq	40(%rsi), %rdx
+	movq	%rdx, %rsi
+	shrq	%rsi
+	movq	%rsi, 40(%rdi)
+	shldq	$63, %rax, %rdx
+	movq	%rdx, 32(%rdi)
+	shldq	$63, %rcx, %rax
+	movq	%rax, 24(%rdi)
+	shldq	$63, %r10, %rcx
+	movq	%rcx, 16(%rdi)
+	shldq	$63, %r8, %r10
+	movq	%r10, 8(%rdi)
 	shrdq	$1, %r8, %r9
-	movq	%r9, 48(%rdi)
-	shrq	%r8
-	movq	%r8, 56(%rdi)
+	movq	%r9, (%rdi)
 	retq
-
-	.globl	_mcl_fp_add8L
+                                        ## -- End function
+	.globl	_mcl_fp_add6L                   ## -- Begin function mcl_fp_add6L
 	.p2align	4, 0x90
-_mcl_fp_add8L:                          ## @mcl_fp_add8L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r15
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r12
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %r13
-	movq	24(%rsi), %r11
-	movq	32(%rsi), %r10
-	movq	(%rdx), %r14
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %r14
-	adcq	8(%rsi), %rbx
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
-	adcq	24(%rdx), %r11
-	movq	40(%rdx), %rsi
-	adcq	32(%rdx), %r10
-	movq	%r14, (%rdi)
-	movq	%rbx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r10, 32(%rdi)
-	adcq	%r13, %rsi
-	movq	%rsi, 40(%rdi)
-	adcq	%r12, %r9
-	movq	%r9, 48(%rdi)
-	adcq	%r15, %r8
-	movq	%r8, 56(%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %r14
-	sbbq	8(%rcx), %rbx
-	sbbq	16(%rcx), %rax
-	sbbq	24(%rcx), %r11
-	sbbq	32(%rcx), %r10
-	sbbq	40(%rcx), %rsi
-	sbbq	48(%rcx), %r9
-	sbbq	56(%rcx), %r8
+_mcl_fp_add6L:                          ## @mcl_fp_add6L
+## %bb.0:
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %r11
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r11
+	adcq	24(%rdx), %r10
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r8
+	movq	%r8, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rax, (%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %r11
+	sbbq	24(%rcx), %r10
+	sbbq	32(%rcx), %r9
+	sbbq	40(%rcx), %r8
 	sbbq	$0, %rdx
 	testb	$1, %dl
-	jne	LBB120_2
-## BB#1:                                ## %nocarry
-	movq	%r14, (%rdi)
-	movq	%rbx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r10, 32(%rdi)
-	movq	%rsi, 40(%rdi)
-	movq	%r9, 48(%rdi)
-	movq	%r8, 56(%rdi)
-LBB120_2:                               ## %carry
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
+	jne	LBB50_2
+## %bb.1:                               ## %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r8, 40(%rdi)
+LBB50_2:                                ## %carry
 	retq
-
-	.globl	_mcl_fp_addNF8L
+                                        ## -- End function
+	.globl	_mcl_fp_addNF6L                 ## -- Begin function mcl_fp_addNF6L
 	.p2align	4, 0x90
-_mcl_fp_addNF8L:                        ## @mcl_fp_addNF8L
-## BB#0:
-	pushq	%rbp
+_mcl_fp_addNF6L:                        ## @mcl_fp_addNF6L
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	56(%rdx), %r8
-	movq	48(%rdx), %rbp
-	movq	40(%rdx), %rbx
-	movq	32(%rdx), %rax
-	movq	24(%rdx), %r11
-	movq	16(%rdx), %r15
-	movq	(%rdx), %r13
-	movq	8(%rdx), %r12
-	addq	(%rsi), %r13
-	adcq	8(%rsi), %r12
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %r11
-	adcq	32(%rsi), %rax
-	movq	%rax, %r10
-	movq	%r10, -24(%rsp)         ## 8-byte Spill
-	adcq	40(%rsi), %rbx
-	movq	%rbx, %r9
-	movq	%r9, -16(%rsp)          ## 8-byte Spill
-	adcq	48(%rsi), %rbp
-	movq	%rbp, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	adcq	56(%rsi), %r8
-	movq	%r13, %rsi
-	subq	(%rcx), %rsi
-	movq	%r12, %rdx
-	sbbq	8(%rcx), %rdx
+	movq	40(%rdx), %r15
+	movq	32(%rdx), %r11
+	movq	24(%rdx), %r10
+	movq	16(%rdx), %r9
+	movq	(%rdx), %r8
+	movq	8(%rdx), %r14
+	addq	(%rsi), %r8
+	adcq	8(%rsi), %r14
+	adcq	16(%rsi), %r9
+	adcq	24(%rsi), %r10
+	adcq	32(%rsi), %r11
+	adcq	40(%rsi), %r15
+	movq	%r8, %r12
+	subq	(%rcx), %r12
+	movq	%r14, %r13
+	sbbq	8(%rcx), %r13
+	movq	%r9, %rdx
+	sbbq	16(%rcx), %rdx
+	movq	%r10, %rax
+	sbbq	24(%rcx), %rax
+	movq	%r11, %rsi
+	sbbq	32(%rcx), %rsi
 	movq	%r15, %rbx
-	sbbq	16(%rcx), %rbx
-	movq	%r11, %r14
-	sbbq	24(%rcx), %r14
-	movq	%r10, %rbp
-	sbbq	32(%rcx), %rbp
-	movq	%r9, %r10
-	sbbq	40(%rcx), %r10
-	movq	%rax, %r9
-	sbbq	48(%rcx), %r9
-	movq	%r8, %rax
-	sbbq	56(%rcx), %rax
-	testq	%rax, %rax
-	cmovsq	%r13, %rsi
-	movq	%rsi, (%rdi)
-	cmovsq	%r12, %rdx
-	movq	%rdx, 8(%rdi)
-	cmovsq	%r15, %rbx
-	movq	%rbx, 16(%rdi)
-	cmovsq	%r11, %r14
-	movq	%r14, 24(%rdi)
-	cmovsq	-24(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, 32(%rdi)
-	cmovsq	-16(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%r10, 40(%rdi)
-	cmovsq	-8(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 48(%rdi)
-	cmovsq	%r8, %rax
-	movq	%rax, 56(%rdi)
-	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
-	retq
-
-	.globl	_mcl_fp_sub8L
-	.p2align	4, 0x90
-_mcl_fp_sub8L:                          ## @mcl_fp_sub8L
-## BB#0:
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
-	pushq	%rbx
-	movq	56(%rdx), %r12
-	movq	56(%rsi), %r8
-	movq	48(%rdx), %r13
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r10
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r10
-	movq	16(%rsi), %r11
-	sbbq	16(%rdx), %r11
-	movq	24(%rsi), %r15
-	sbbq	24(%rdx), %r15
-	movq	32(%rsi), %r14
-	sbbq	32(%rdx), %r14
-	movq	48(%rsi), %r9
-	movq	40(%rsi), %rsi
-	sbbq	40(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r10, 8(%rdi)
-	movq	%r11, 16(%rdi)
-	movq	%r15, 24(%rdi)
-	movq	%r14, 32(%rdi)
-	movq	%rsi, 40(%rdi)
-	sbbq	%r13, %r9
-	movq	%r9, 48(%rdi)
-	sbbq	%r12, %r8
-	movq	%r8, 56(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	LBB122_2
-## BB#1:                                ## %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	movq	8(%rcx), %rax
-	adcq	%r10, %rax
-	movq	%rax, 8(%rdi)
-	movq	16(%rcx), %rax
-	adcq	%r11, %rax
-	movq	%rax, 16(%rdi)
-	movq	24(%rcx), %rax
-	adcq	%r15, %rax
+	sbbq	40(%rcx), %rbx
+	movq	%rbx, %rcx
+	sarq	$63, %rcx
+	cmovsq	%r15, %rbx
+	movq	%rbx, 40(%rdi)
+	cmovsq	%r11, %rsi
+	movq	%rsi, 32(%rdi)
+	cmovsq	%r10, %rax
 	movq	%rax, 24(%rdi)
-	movq	32(%rcx), %rax
-	adcq	%r14, %rax
-	movq	%rax, 32(%rdi)
-	movq	40(%rcx), %rax
-	adcq	%rsi, %rax
-	movq	%rax, 40(%rdi)
-	movq	48(%rcx), %rax
-	adcq	%r9, %rax
-	movq	%rax, 48(%rdi)
-	movq	56(%rcx), %rax
-	adcq	%r8, %rax
-	movq	%rax, 56(%rdi)
-LBB122_2:                               ## %nocarry
+	cmovsq	%r9, %rdx
+	movq	%rdx, 16(%rdi)
+	cmovsq	%r14, %r13
+	movq	%r13, 8(%rdi)
+	cmovsq	%r8, %r12
+	movq	%r12, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
 	retq
-
-	.globl	_mcl_fp_subNF8L
+                                        ## -- End function
+	.globl	_mcl_fp_sub6L                   ## -- Begin function mcl_fp_sub6L
 	.p2align	4, 0x90
-_mcl_fp_subNF8L:                        ## @mcl_fp_subNF8L
-## BB#0:
-	pushq	%rbp
+_mcl_fp_sub6L:                          ## @mcl_fp_sub6L
+## %bb.0:
+	pushq	%rbx
+	movq	40(%rsi), %r11
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %rax
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	xorl	%ebx, %ebx
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rax
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r11
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rbx, %rbx
+	testb	$1, %bl
+	jne	LBB52_2
+## %bb.1:                               ## %nocarry
+	popq	%rbx
+	retq
+LBB52_2:                                ## %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %rax
+	adcq	24(%rcx), %r9
+	adcq	32(%rcx), %r10
+	adcq	40(%rcx), %r11
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%rax, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	popq	%rbx
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_subNF6L                 ## -- Begin function mcl_fp_subNF6L
+	.p2align	4, 0x90
+_mcl_fp_subNF6L:                        ## @mcl_fp_subNF6L
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	%rdi, %r9
-	movdqu	(%rdx), %xmm0
-	movdqu	16(%rdx), %xmm1
-	movdqu	32(%rdx), %xmm2
-	movdqu	48(%rdx), %xmm3
-	pshufd	$78, %xmm3, %xmm4       ## xmm4 = xmm3[2,3,0,1]
-	movd	%xmm4, %r12
-	movdqu	(%rsi), %xmm4
-	movdqu	16(%rsi), %xmm5
-	movdqu	32(%rsi), %xmm8
-	movdqu	48(%rsi), %xmm7
-	pshufd	$78, %xmm7, %xmm6       ## xmm6 = xmm7[2,3,0,1]
-	movd	%xmm6, %rcx
-	movd	%xmm3, %r13
-	movd	%xmm7, %rdi
-	pshufd	$78, %xmm2, %xmm3       ## xmm3 = xmm2[2,3,0,1]
-	movd	%xmm3, %rbp
-	pshufd	$78, %xmm8, %xmm3       ## xmm3 = xmm8[2,3,0,1]
-	movd	%xmm3, %rdx
-	movd	%xmm2, %rsi
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %r11
-	pshufd	$78, %xmm5, %xmm2       ## xmm2 = xmm5[2,3,0,1]
-	movd	%xmm1, %r15
-	pshufd	$78, %xmm0, %xmm1       ## xmm1 = xmm0[2,3,0,1]
-	movd	%xmm1, %rbx
-	pshufd	$78, %xmm4, %xmm1       ## xmm1 = xmm4[2,3,0,1]
-	movd	%xmm0, %rax
-	movd	%xmm4, %r14
-	subq	%rax, %r14
-	movd	%xmm1, %r10
-	sbbq	%rbx, %r10
-	movd	%xmm5, %rbx
-	sbbq	%r15, %rbx
-	movd	%xmm2, %r15
-	sbbq	%r11, %r15
-	movd	%xmm8, %r11
-	sbbq	%rsi, %r11
-	sbbq	%rbp, %rdx
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	sbbq	%r13, %rdi
-	movq	%rdi, -16(%rsp)         ## 8-byte Spill
-	sbbq	%r12, %rcx
-	movq	%rcx, -8(%rsp)          ## 8-byte Spill
-	movq	%rcx, %rbp
-	sarq	$63, %rbp
-	movq	56(%r8), %r12
-	andq	%rbp, %r12
-	movq	48(%r8), %r13
-	andq	%rbp, %r13
-	movq	40(%r8), %rdi
-	andq	%rbp, %rdi
-	movq	32(%r8), %rsi
-	andq	%rbp, %rsi
-	movq	24(%r8), %rdx
-	andq	%rbp, %rdx
-	movq	16(%r8), %rcx
-	andq	%rbp, %rcx
-	movq	8(%r8), %rax
-	andq	%rbp, %rax
-	andq	(%r8), %rbp
-	addq	%r14, %rbp
+	movq	40(%rsi), %r15
+	movq	32(%rsi), %r8
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r10
+	movq	(%rsi), %r11
+	movq	8(%rsi), %r14
+	subq	(%rdx), %r11
+	sbbq	8(%rdx), %r14
+	sbbq	16(%rdx), %r10
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r8
+	sbbq	40(%rdx), %r15
+	movq	%r15, %rdx
+	sarq	$63, %rdx
+	movq	%rdx, %rbx
+	shldq	$1, %r15, %rbx
+	andq	(%rcx), %rbx
+	movq	40(%rcx), %r12
+	andq	%rdx, %r12
+	movq	32(%rcx), %r13
+	andq	%rdx, %r13
+	movq	24(%rcx), %rsi
+	andq	%rdx, %rsi
+	movq	16(%rcx), %rax
+	andq	%rdx, %rax
+	andq	8(%rcx), %rdx
+	addq	%r11, %rbx
+	movq	%rbx, (%rdi)
+	adcq	%r14, %rdx
+	movq	%rdx, 8(%rdi)
 	adcq	%r10, %rax
-	movq	%rbp, (%r9)
-	adcq	%rbx, %rcx
-	movq	%rax, 8(%r9)
-	movq	%rcx, 16(%r9)
-	adcq	%r15, %rdx
-	movq	%rdx, 24(%r9)
-	adcq	%r11, %rsi
-	movq	%rsi, 32(%r9)
-	adcq	-24(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%rdi, 40(%r9)
-	adcq	-16(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%r13, 48(%r9)
-	adcq	-8(%rsp), %r12          ## 8-byte Folded Reload
-	movq	%r12, 56(%r9)
+	movq	%rax, 16(%rdi)
+	adcq	%r9, %rsi
+	movq	%rsi, 24(%rdi)
+	adcq	%r8, %r13
+	movq	%r13, 32(%rdi)
+	adcq	%r15, %r12
+	movq	%r12, 40(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
 	popq	%r14
 	popq	%r15
-	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_add8L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_add6L                ## -- Begin function mcl_fpDbl_add6L
 	.p2align	4, 0x90
-_mcl_fpDbl_add8L:                       ## @mcl_fpDbl_add8L
-## BB#0:
+_mcl_fpDbl_add6L:                       ## @mcl_fpDbl_add6L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r8
-	movq	120(%rdx), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	112(%rdx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	104(%rdx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	96(%rdx), %r14
-	movq	24(%rsi), %r15
-	movq	32(%rsi), %r11
-	movq	16(%rdx), %r12
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rax
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rax
-	adcq	16(%rsi), %r12
-	adcq	24(%rdx), %r15
-	adcq	32(%rdx), %r11
-	movq	88(%rdx), %rbp
-	movq	80(%rdx), %r13
-	movq	%rbx, (%rdi)
-	movq	72(%rdx), %r10
-	movq	%rax, 8(%rdi)
-	movq	64(%rdx), %r9
-	movq	%r12, 16(%rdi)
-	movq	40(%rdx), %r12
-	movq	%r15, 24(%rdi)
-	movq	40(%rsi), %rbx
-	adcq	%r12, %rbx
-	movq	56(%rdx), %r15
-	movq	48(%rdx), %r12
-	movq	%r11, 32(%rdi)
-	movq	48(%rsi), %rdx
-	adcq	%r12, %rdx
-	movq	120(%rsi), %r12
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rax
-	adcq	%r15, %rax
-	movq	112(%rsi), %rcx
-	movq	%rdx, 48(%rdi)
-	movq	64(%rsi), %rbx
-	adcq	%r9, %rbx
-	movq	104(%rsi), %rdx
-	movq	%rax, 56(%rdi)
-	movq	72(%rsi), %r9
-	adcq	%r10, %r9
-	movq	80(%rsi), %r11
-	adcq	%r13, %r11
-	movq	96(%rsi), %rax
 	movq	88(%rsi), %r15
-	adcq	%rbp, %r15
-	adcq	%r14, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rdx, %rax
-	adcq	-24(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	adcq	-16(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -16(%rsp)         ## 8-byte Spill
-	adcq	-32(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%r12, -32(%rsp)         ## 8-byte Spill
-	sbbq	%rbp, %rbp
-	andl	$1, %ebp
-	movq	%rbx, %rsi
-	subq	(%r8), %rsi
-	movq	%r9, %rdx
-	sbbq	8(%r8), %rdx
-	movq	%r11, %r10
-	sbbq	16(%r8), %r10
-	movq	%r15, %r14
-	sbbq	24(%r8), %r14
-	movq	-8(%rsp), %r13          ## 8-byte Reload
-	sbbq	32(%r8), %r13
-	movq	%rax, %r12
-	sbbq	40(%r8), %r12
-	movq	%rcx, %rax
-	sbbq	48(%r8), %rax
-	movq	-32(%rsp), %rcx         ## 8-byte Reload
-	sbbq	56(%r8), %rcx
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	cmovneq	%rbx, %rsi
-	movq	%rsi, 64(%rdi)
-	testb	%bpl, %bpl
-	cmovneq	%r9, %rdx
-	movq	%rdx, 72(%rdi)
-	cmovneq	%r11, %r10
-	movq	%r10, 80(%rdi)
-	cmovneq	%r15, %r14
-	movq	%r14, 88(%rdi)
-	cmovneq	-8(%rsp), %r13          ## 8-byte Folded Reload
-	movq	%r13, 96(%rdi)
-	cmovneq	-24(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%r12, 104(%rdi)
-	cmovneq	-16(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 112(%rdi)
-	cmovneq	-32(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, 120(%rdi)
+	movq	80(%rsi), %r14
+	movq	72(%rsi), %r11
+	movq	64(%rsi), %r10
+	movq	56(%rsi), %r9
+	movq	48(%rsi), %r8
+	movq	40(%rsi), %rax
+	movq	(%rsi), %r12
+	movq	8(%rsi), %r13
+	addq	(%rdx), %r12
+	adcq	8(%rdx), %r13
+	movq	32(%rsi), %rbx
+	movq	24(%rsi), %rbp
+	movq	16(%rsi), %rsi
+	adcq	16(%rdx), %rsi
+	adcq	24(%rdx), %rbp
+	adcq	32(%rdx), %rbx
+	adcq	40(%rdx), %rax
+	adcq	48(%rdx), %r8
+	adcq	56(%rdx), %r9
+	adcq	64(%rdx), %r10
+	adcq	72(%rdx), %r11
+	adcq	80(%rdx), %r14
+	adcq	88(%rdx), %r15
+	movq	%rax, 40(%rdi)
+	movq	%rbx, 32(%rdi)
+	movq	%rbp, 24(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	%r13, 8(%rdi)
+	movq	%r12, (%rdi)
+	setb	%al
+	movzbl	%al, %r12d
+	movq	%r8, %r13
+	subq	(%rcx), %r13
+	movq	%r9, %rsi
+	sbbq	8(%rcx), %rsi
+	movq	%r10, %rbx
+	sbbq	16(%rcx), %rbx
+	movq	%r11, %rbp
+	sbbq	24(%rcx), %rbp
+	movq	%r14, %rax
+	sbbq	32(%rcx), %rax
+	movq	%r15, %rdx
+	sbbq	40(%rcx), %rdx
+	sbbq	$0, %r12
+	testb	$1, %r12b
+	cmovneq	%r15, %rdx
+	movq	%rdx, 88(%rdi)
+	cmovneq	%r14, %rax
+	movq	%rax, 80(%rdi)
+	cmovneq	%r11, %rbp
+	movq	%rbp, 72(%rdi)
+	cmovneq	%r10, %rbx
+	movq	%rbx, 64(%rdi)
+	cmovneq	%r9, %rsi
+	movq	%rsi, 56(%rdi)
+	cmovneq	%r8, %r13
+	movq	%r13, 48(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13257,111 +5514,80 @@ _mcl_fpDbl_add8L:                       ## @mcl_fpDbl_add8L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sub8L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sub6L                ## -- Begin function mcl_fpDbl_sub6L
 	.p2align	4, 0x90
-_mcl_fpDbl_sub8L:                       ## @mcl_fpDbl_sub8L
-## BB#0:
+_mcl_fpDbl_sub6L:                       ## @mcl_fpDbl_sub6L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r15
-	movq	120(%rdx), %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	112(%rdx), %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	104(%rdx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %r9
-	movq	(%rsi), %r12
-	movq	8(%rsi), %r14
-	xorl	%r8d, %r8d
-	subq	(%rdx), %r12
-	sbbq	8(%rdx), %r14
-	sbbq	16(%rdx), %r9
-	movq	24(%rsi), %rbx
-	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %r13
-	sbbq	32(%rdx), %r13
-	movq	96(%rdx), %rbp
-	movq	88(%rdx), %r11
-	movq	%r12, (%rdi)
-	movq	80(%rdx), %r12
-	movq	%r14, 8(%rdi)
-	movq	72(%rdx), %r10
-	movq	%r9, 16(%rdi)
-	movq	40(%rdx), %r9
-	movq	%rbx, 24(%rdi)
+	movq	%rcx, %r10
+	movq	88(%rsi), %r15
+	movq	80(%rsi), %r14
+	movq	72(%rsi), %r11
+	movq	64(%rsi), %r9
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %rax
+	movq	%rax, -16(%rsp)                 ## 8-byte Spill
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %r13
+	xorl	%eax, %eax
+	subq	(%rdx), %rcx
+	movq	%rcx, -8(%rsp)                  ## 8-byte Spill
+	sbbq	8(%rdx), %r13
 	movq	40(%rsi), %rbx
-	sbbq	%r9, %rbx
-	movq	48(%rdx), %r9
-	movq	%r13, 32(%rdi)
-	movq	48(%rsi), %r14
-	sbbq	%r9, %r14
-	movq	64(%rdx), %r13
-	movq	56(%rdx), %r9
+	movq	32(%rsi), %rbp
+	movq	24(%rsi), %rcx
+	movq	16(%rsi), %rsi
+	sbbq	16(%rdx), %rsi
+	sbbq	24(%rdx), %rcx
+	sbbq	32(%rdx), %rbp
+	sbbq	40(%rdx), %rbx
+	movq	-16(%rsp), %r12                 ## 8-byte Reload
+	sbbq	48(%rdx), %r12
+	movq	%r12, -16(%rsp)                 ## 8-byte Spill
+	sbbq	56(%rdx), %r8
+	sbbq	64(%rdx), %r9
+	sbbq	72(%rdx), %r11
+	sbbq	80(%rdx), %r14
+	sbbq	88(%rdx), %r15
 	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rdx
-	sbbq	%r9, %rdx
-	movq	120(%rsi), %rcx
-	movq	%r14, 48(%rdi)
-	movq	64(%rsi), %rbx
-	sbbq	%r13, %rbx
-	movq	112(%rsi), %rax
-	movq	%rdx, 56(%rdi)
-	movq	72(%rsi), %r9
-	sbbq	%r10, %r9
-	movq	80(%rsi), %r13
-	sbbq	%r12, %r13
-	movq	88(%rsi), %r12
-	sbbq	%r11, %r12
-	movq	104(%rsi), %rdx
-	movq	96(%rsi), %r14
-	sbbq	%rbp, %r14
-	sbbq	-24(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	sbbq	-16(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	sbbq	-8(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, -8(%rsp)          ## 8-byte Spill
-	movl	$0, %ebp
-	sbbq	$0, %rbp
-	andl	$1, %ebp
-	movq	(%r15), %r11
-	cmoveq	%r8, %r11
-	testb	%bpl, %bpl
-	movq	16(%r15), %rbp
-	cmoveq	%r8, %rbp
-	movq	8(%r15), %rsi
-	cmoveq	%r8, %rsi
-	movq	56(%r15), %r10
-	cmoveq	%r8, %r10
-	movq	48(%r15), %rdx
-	cmoveq	%r8, %rdx
-	movq	40(%r15), %rcx
-	cmoveq	%r8, %rcx
-	movq	32(%r15), %rax
-	cmoveq	%r8, %rax
-	cmovneq	24(%r15), %r8
-	addq	%rbx, %r11
-	adcq	%r9, %rsi
-	movq	%r11, 64(%rdi)
-	adcq	%r13, %rbp
+	movq	%rbp, 32(%rdi)
+	movq	%rcx, 24(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	%r13, 8(%rdi)
+	movq	-8(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rcx, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	negq	%rax
+	movq	40(%r10), %rcx
+	andq	%rax, %rcx
+	movq	32(%r10), %rdx
+	andq	%rax, %rdx
+	movq	24(%r10), %rsi
+	andq	%rax, %rsi
+	movq	16(%r10), %rbx
+	andq	%rax, %rbx
+	movq	8(%r10), %rbp
+	andq	%rax, %rbp
+	andq	(%r10), %rax
+	addq	-16(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	%rax, 48(%rdi)
+	adcq	%r8, %rbp
+	movq	%rbp, 56(%rdi)
+	adcq	%r9, %rbx
+	movq	%rbx, 64(%rdi)
+	adcq	%r11, %rsi
 	movq	%rsi, 72(%rdi)
-	movq	%rbp, 80(%rdi)
-	adcq	%r12, %r8
-	movq	%r8, 88(%rdi)
-	adcq	%r14, %rax
-	movq	%rax, 96(%rdi)
-	adcq	-24(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, 104(%rdi)
-	adcq	-16(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, 112(%rdi)
-	adcq	-8(%rsp), %r10          ## 8-byte Folded Reload
-	movq	%r10, 120(%rdi)
+	adcq	%r14, %rdx
+	movq	%rdx, 80(%rdi)
+	adcq	%r15, %rcx
+	movq	%rcx, 88(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13369,69 +5595,64 @@ _mcl_fpDbl_sub8L:                       ## @mcl_fpDbl_sub8L
 	popq	%r15
 	popq	%rbp
 	retq
-
+                                        ## -- End function
+	.globl	_mulPv512x64                    ## -- Begin function mulPv512x64
 	.p2align	4, 0x90
-l_mulPv576x64:                          ## @mulPv576x64
-## BB#0:
+_mulPv512x64:                           ## @mulPv512x64
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdx, %rbx
-	movq	%rbx, %rax
+	movq	%rdx, %rcx
+	movq	%rdx, %rax
 	mulq	(%rsi)
-	movq	%rdx, -32(%rsp)         ## 8-byte Spill
+	movq	%rdx, -24(%rsp)                 ## 8-byte Spill
 	movq	%rax, (%rdi)
-	movq	%rbx, %rax
-	mulq	64(%rsi)
-	movq	%rdx, %r10
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rcx, %rax
 	mulq	56(%rsi)
-	movq	%rdx, %r14
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rdx, %r10
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	movq	%rcx, %rax
 	mulq	48(%rsi)
-	movq	%rdx, %r12
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rdx, %r11
+	movq	%rax, -16(%rsp)                 ## 8-byte Spill
+	movq	%rcx, %rax
 	mulq	40(%rsi)
-	movq	%rdx, %rcx
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	movq	%rbx, %rax
+	movq	%rdx, %r12
+	movq	%rax, %r15
+	movq	%rcx, %rax
 	mulq	32(%rsi)
+	movq	%rdx, %rbx
+	movq	%rax, %r13
+	movq	%rcx, %rax
+	mulq	24(%rsi)
 	movq	%rdx, %rbp
 	movq	%rax, %r8
-	movq	%rbx, %rax
-	mulq	24(%rsi)
-	movq	%rdx, %r9
-	movq	%rax, %r11
-	movq	%rbx, %rax
+	movq	%rcx, %rax
 	mulq	16(%rsi)
-	movq	%rdx, %r15
-	movq	%rax, %r13
-	movq	%rbx, %rax
+	movq	%rdx, %r9
+	movq	%rax, %r14
+	movq	%rcx, %rax
 	mulq	8(%rsi)
-	addq	-32(%rsp), %rax         ## 8-byte Folded Reload
+	addq	-24(%rsp), %rax                 ## 8-byte Folded Reload
 	movq	%rax, 8(%rdi)
-	adcq	%r13, %rdx
+	adcq	%r14, %rdx
 	movq	%rdx, 16(%rdi)
-	adcq	%r11, %r15
-	movq	%r15, 24(%rdi)
 	adcq	%r8, %r9
-	movq	%r9, 32(%rdi)
-	adcq	-40(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, 40(%rdi)
-	adcq	-24(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, 48(%rdi)
-	adcq	-16(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%r12, 56(%rdi)
-	adcq	-8(%rsp), %r14          ## 8-byte Folded Reload
-	movq	%r14, 64(%rdi)
+	movq	%r9, 24(%rdi)
+	adcq	%r13, %rbp
+	movq	%rbp, 32(%rdi)
+	adcq	%r15, %rbx
+	movq	%rbx, 40(%rdi)
+	adcq	-16(%rsp), %r12                 ## 8-byte Folded Reload
+	movq	%r12, 48(%rdi)
+	adcq	-8(%rsp), %r11                  ## 8-byte Folded Reload
+	movq	%r11, 56(%rdi)
 	adcq	$0, %r10
-	movq	%r10, 72(%rdi)
+	movq	%r10, 64(%rdi)
 	movq	%rdi, %rax
 	popq	%rbx
 	popq	%r12
@@ -13440,345 +5661,245 @@ l_mulPv576x64:                          ## @mulPv576x64
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_mulUnitPre9L
+                                        ## -- End function
+	.globl	_mcl_fp_mulUnitPre8L            ## -- Begin function mcl_fp_mulUnitPre8L
 	.p2align	4, 0x90
-_mcl_fp_mulUnitPre9L:                   ## @mcl_fp_mulUnitPre9L
-## BB#0:
-	pushq	%r14
+_mcl_fp_mulUnitPre8L:                   ## @mcl_fp_mulUnitPre8L
+## %bb.0:
 	pushq	%rbx
-	subq	$88, %rsp
+	subq	$80, %rsp
 	movq	%rdi, %rbx
 	leaq	8(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	80(%rsp), %r8
-	movq	72(%rsp), %r9
-	movq	64(%rsp), %r10
-	movq	56(%rsp), %r11
-	movq	48(%rsp), %r14
-	movq	40(%rsp), %rax
-	movq	32(%rsp), %rcx
-	movq	24(%rsp), %rdx
-	movq	8(%rsp), %rsi
-	movq	16(%rsp), %rdi
-	movq	%rsi, (%rbx)
-	movq	%rdi, 8(%rbx)
-	movq	%rdx, 16(%rbx)
-	movq	%rcx, 24(%rbx)
-	movq	%rax, 32(%rbx)
-	movq	%r14, 40(%rbx)
-	movq	%r11, 48(%rbx)
-	movq	%r10, 56(%rbx)
-	movq	%r9, 64(%rbx)
-	movq	%r8, 72(%rbx)
-	addq	$88, %rsp
+	callq	_mulPv512x64
+	movq	8(%rsp), %r8
+	movq	16(%rsp), %r9
+	movq	24(%rsp), %r10
+	movq	32(%rsp), %r11
+	movq	40(%rsp), %rdi
+	movq	48(%rsp), %rax
+	movq	56(%rsp), %rcx
+	movq	64(%rsp), %rdx
+	movq	72(%rsp), %rsi
+	movq	%rsi, 64(%rbx)
+	movq	%rdx, 56(%rbx)
+	movq	%rcx, 48(%rbx)
+	movq	%rax, 40(%rbx)
+	movq	%rdi, 32(%rbx)
+	movq	%r11, 24(%rbx)
+	movq	%r10, 16(%rbx)
+	movq	%r9, 8(%rbx)
+	movq	%r8, (%rbx)
+	addq	$80, %rsp
 	popq	%rbx
-	popq	%r14
 	retq
-
-	.globl	_mcl_fpDbl_mulPre9L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_mulPre8L             ## -- Begin function mcl_fpDbl_mulPre8L
 	.p2align	4, 0x90
-_mcl_fpDbl_mulPre9L:                    ## @mcl_fpDbl_mulPre9L
-## BB#0:
+_mcl_fpDbl_mulPre8L:                    ## @mcl_fpDbl_mulPre8L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$808, %rsp              ## imm = 0x328
+	subq	$648, %rsp                      ## imm = 0x288
 	movq	%rdx, %rax
-	movq	%rdi, %r12
-	movq	(%rax), %rdx
-	movq	%rax, %rbx
-	movq	%rbx, 80(%rsp)          ## 8-byte Spill
-	leaq	728(%rsp), %rdi
-	movq	%rsi, %rbp
-	movq	%rbp, 72(%rsp)          ## 8-byte Spill
-	callq	l_mulPv576x64
-	movq	800(%rsp), %r13
-	movq	792(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	784(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	776(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	768(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	760(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	752(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	744(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	728(%rsp), %rax
-	movq	736(%rsp), %r14
-	movq	%rax, (%r12)
-	movq	%r12, 64(%rsp)          ## 8-byte Spill
-	movq	8(%rbx), %rdx
-	leaq	648(%rsp), %rdi
-	movq	%rbp, %rsi
-	callq	l_mulPv576x64
-	movq	720(%rsp), %r8
-	movq	712(%rsp), %rcx
-	movq	704(%rsp), %rdx
-	movq	696(%rsp), %rsi
-	movq	688(%rsp), %rdi
-	movq	680(%rsp), %rbp
-	addq	648(%rsp), %r14
-	movq	672(%rsp), %rax
-	movq	656(%rsp), %rbx
-	movq	664(%rsp), %r15
-	movq	%r14, 8(%r12)
-	adcq	24(%rsp), %rbx          ## 8-byte Folded Reload
-	adcq	32(%rsp), %r15          ## 8-byte Folded Reload
-	adcq	40(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, %r14
-	adcq	(%rsp), %rbp            ## 8-byte Folded Reload
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 32(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 40(%rsp)          ## 8-byte Spill
-	adcq	48(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	adcq	%r13, %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 16(%rsp)           ## 8-byte Spill
-	movq	80(%rsp), %r13          ## 8-byte Reload
-	movq	16(%r13), %rdx
-	leaq	568(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	640(%rsp), %r8
-	movq	632(%rsp), %r9
-	movq	624(%rsp), %r10
-	movq	616(%rsp), %rdi
-	movq	608(%rsp), %rbp
-	movq	600(%rsp), %rcx
-	addq	568(%rsp), %rbx
-	movq	592(%rsp), %rdx
-	movq	576(%rsp), %r12
-	movq	584(%rsp), %rsi
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	%rbx, 16(%rax)
-	adcq	%r15, %r12
-	adcq	%r14, %rsi
-	movq	%rsi, 48(%rsp)          ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	(%rsp), %rdi            ## 8-byte Folded Reload
-	movq	%rdi, 40(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %r10           ## 8-byte Folded Reload
-	movq	%r10, (%rsp)            ## 8-byte Spill
-	adcq	16(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 8(%rsp)            ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 16(%rsp)           ## 8-byte Spill
-	movq	24(%r13), %rdx
-	leaq	488(%rsp), %rdi
-	movq	72(%rsp), %r15          ## 8-byte Reload
-	movq	%r15, %rsi
-	callq	l_mulPv576x64
-	movq	560(%rsp), %r8
-	movq	552(%rsp), %rcx
-	movq	544(%rsp), %rdx
-	movq	536(%rsp), %rsi
-	movq	528(%rsp), %rdi
-	movq	520(%rsp), %rbp
-	addq	488(%rsp), %r12
-	movq	512(%rsp), %rax
-	movq	496(%rsp), %rbx
-	movq	504(%rsp), %r13
-	movq	64(%rsp), %r14          ## 8-byte Reload
-	movq	%r12, 24(%r14)
-	adcq	48(%rsp), %rbx          ## 8-byte Folded Reload
-	adcq	56(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	24(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	40(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, 40(%rsp)          ## 8-byte Spill
-	adcq	(%rsp), %rsi            ## 8-byte Folded Reload
-	movq	%rsi, (%rsp)            ## 8-byte Spill
-	adcq	8(%rsp), %rdx           ## 8-byte Folded Reload
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	adcq	16(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 48(%rsp)           ## 8-byte Spill
-	movq	80(%rsp), %r12          ## 8-byte Reload
-	movq	32(%r12), %rdx
-	leaq	408(%rsp), %rdi
+	movq	%rdi, 32(%rsp)                  ## 8-byte Spill
+	movq	(%rdx), %rdx
+	movq	%rax, %r12
+	movq	%rax, 40(%rsp)                  ## 8-byte Spill
+	leaq	576(%rsp), %rdi
+	movq	%rsi, %r15
+	callq	_mulPv512x64
+	movq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	movq	632(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	624(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	616(%rsp), %r13
+	movq	608(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	movq	600(%rsp), %rbp
+	movq	592(%rsp), %rbx
+	movq	576(%rsp), %rax
+	movq	584(%rsp), %r14
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, (%rcx)
+	movq	8(%r12), %rdx
+	leaq	504(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	l_mulPv576x64
-	movq	480(%rsp), %r8
-	movq	472(%rsp), %r9
-	movq	464(%rsp), %rdx
-	movq	456(%rsp), %rsi
-	movq	448(%rsp), %rdi
-	movq	440(%rsp), %rbp
-	addq	408(%rsp), %rbx
-	movq	432(%rsp), %rax
-	movq	416(%rsp), %r15
-	movq	424(%rsp), %rcx
-	movq	%rbx, 32(%r14)
-	adcq	%r13, %r15
-	adcq	24(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 56(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	(%rsp), %rdi            ## 8-byte Folded Reload
-	movq	%rdi, 40(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rsi           ## 8-byte Folded Reload
-	movq	%rsi, (%rsp)            ## 8-byte Spill
-	adcq	16(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	adcq	48(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 16(%rsp)           ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 48(%rsp)           ## 8-byte Spill
-	movq	%r12, %r14
-	movq	40(%r14), %rdx
-	leaq	328(%rsp), %rdi
-	movq	72(%rsp), %r13          ## 8-byte Reload
-	movq	%r13, %rsi
-	callq	l_mulPv576x64
-	movq	400(%rsp), %r8
-	movq	392(%rsp), %r9
-	movq	384(%rsp), %rsi
-	movq	376(%rsp), %rdi
-	movq	368(%rsp), %rbx
-	movq	360(%rsp), %rbp
-	addq	328(%rsp), %r15
-	movq	352(%rsp), %rcx
-	movq	336(%rsp), %r12
-	movq	344(%rsp), %rdx
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	%r15, 40(%rax)
-	adcq	56(%rsp), %r12          ## 8-byte Folded Reload
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	(%rsp), %rbx            ## 8-byte Folded Reload
-	movq	%rbx, 40(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	48(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 16(%rsp)           ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 48(%rsp)           ## 8-byte Spill
-	movq	48(%r14), %rdx
-	leaq	248(%rsp), %rdi
-	movq	%r13, %rsi
-	movq	%r13, %r15
-	callq	l_mulPv576x64
-	movq	320(%rsp), %r8
-	movq	312(%rsp), %r9
-	movq	304(%rsp), %rsi
-	movq	296(%rsp), %rdi
-	movq	288(%rsp), %rbx
-	movq	280(%rsp), %rbp
-	addq	248(%rsp), %r12
-	movq	272(%rsp), %rcx
-	movq	256(%rsp), %r13
-	movq	264(%rsp), %rdx
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	%r12, 48(%rax)
-	adcq	56(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	(%rsp), %rbx            ## 8-byte Folded Reload
-	movq	%rbx, 40(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	48(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 16(%rsp)           ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 48(%rsp)           ## 8-byte Spill
-	movq	56(%r14), %rdx
-	leaq	168(%rsp), %rdi
+	movq	%r15, 56(%rsp)                  ## 8-byte Spill
+	callq	_mulPv512x64
+	movq	568(%rsp), %r12
+	addq	504(%rsp), %r14
+	adcq	512(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  ## 8-byte Spill
+	adcq	520(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %rax                  ## 8-byte Reload
+	adcq	528(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	adcq	536(%rsp), %r13
+	movq	16(%rsp), %rbp                  ## 8-byte Reload
+	adcq	544(%rsp), %rbp
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	552(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rax                    ## 8-byte Reload
+	adcq	560(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	movq	%r14, 8(%rax)
+	adcq	$0, %r12
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	432(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	l_mulPv576x64
-	movq	240(%rsp), %rcx
-	movq	232(%rsp), %rdx
-	movq	224(%rsp), %rsi
-	movq	216(%rsp), %rdi
-	movq	208(%rsp), %rbx
-	addq	168(%rsp), %r13
-	movq	200(%rsp), %r12
-	movq	192(%rsp), %rbp
-	movq	176(%rsp), %r14
-	movq	184(%rsp), %r15
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	movq	%r13, 56(%rax)
-	adcq	56(%rsp), %r14          ## 8-byte Folded Reload
-	adcq	24(%rsp), %r15          ## 8-byte Folded Reload
-	adcq	32(%rsp), %rbp          ## 8-byte Folded Reload
-	adcq	40(%rsp), %r12          ## 8-byte Folded Reload
-	adcq	(%rsp), %rbx            ## 8-byte Folded Reload
-	movq	%rbx, %r13
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	48(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	88(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	88(%rsp), %r14
+	callq	_mulPv512x64
+	movq	496(%rsp), %r15
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	addq	432(%rsp), %rcx
+	movq	64(%rsp), %rax                  ## 8-byte Reload
+	adcq	440(%rsp), %rax
+	movq	%rax, 64(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %rbx                  ## 8-byte Reload
+	adcq	448(%rsp), %rbx
+	adcq	456(%rsp), %r13
+	movq	%r13, 24(%rsp)                  ## 8-byte Spill
+	adcq	464(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbp                   ## 8-byte Reload
+	adcq	472(%rsp), %rbp
+	movq	(%rsp), %rax                    ## 8-byte Reload
+	adcq	480(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	adcq	488(%rsp), %r12
+	movq	32(%rsp), %r14                  ## 8-byte Reload
+	movq	%rcx, 16(%r14)
+	adcq	$0, %r15
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	360(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	424(%rsp), %r13
+	movq	64(%rsp), %rcx                  ## 8-byte Reload
+	addq	360(%rsp), %rcx
+	adcq	368(%rsp), %rbx
+	movq	%rbx, 48(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	376(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	384(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rbx                    ## 8-byte Reload
+	adcq	400(%rsp), %rbx
+	adcq	408(%rsp), %r12
+	adcq	416(%rsp), %r15
+	movq	%rcx, 24(%r14)
+	adcq	$0, %r13
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	288(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	352(%rsp), %r14
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	addq	288(%rsp), %rcx
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	296(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	304(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbp                   ## 8-byte Reload
+	adcq	312(%rsp), %rbp
+	adcq	320(%rsp), %rbx
+	movq	%rbx, (%rsp)                    ## 8-byte Spill
+	adcq	328(%rsp), %r12
+	adcq	336(%rsp), %r15
+	adcq	344(%rsp), %r13
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	movq	%rcx, 32(%rax)
+	adcq	$0, %r14
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	216(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	280(%rsp), %rbx
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	addq	216(%rsp), %rax
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	224(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	adcq	232(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	adcq	248(%rsp), %r12
+	adcq	256(%rsp), %r15
+	adcq	264(%rsp), %r13
+	adcq	272(%rsp), %r14
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, 40(%rcx)
+	adcq	$0, %rbx
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	144(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	208(%rsp), %rbp
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	addq	144(%rsp), %rax
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	152(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	160(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	adcq	168(%rsp), %r12
+	adcq	176(%rsp), %r15
+	adcq	184(%rsp), %r13
+	adcq	192(%rsp), %r14
+	adcq	200(%rsp), %rbx
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, 48(%rcx)
+	adcq	$0, %rbp
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	movq	56(%rax), %rdx
+	leaq	72(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	136(%rsp), %rax
+	movq	8(%rsp), %rsi                   ## 8-byte Reload
+	addq	72(%rsp), %rsi
+	movq	(%rsp), %rdx                    ## 8-byte Reload
+	adcq	80(%rsp), %rdx
+	adcq	88(%rsp), %r12
 	adcq	96(%rsp), %r15
-	movq	160(%rsp), %r8
-	adcq	104(%rsp), %rbp
-	movq	152(%rsp), %r9
-	movq	144(%rsp), %rdx
-	movq	136(%rsp), %rsi
-	movq	128(%rsp), %rdi
-	movq	120(%rsp), %rbx
-	movq	112(%rsp), %rax
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	movq	%r14, 64(%rcx)
-	movq	%r15, 72(%rcx)
-	adcq	%r12, %rax
-	movq	%rbp, 80(%rcx)
-	movq	%rax, 88(%rcx)
-	adcq	%r13, %rbx
-	movq	%rbx, 96(%rcx)
-	adcq	(%rsp), %rdi            ## 8-byte Folded Reload
-	movq	%rdi, 104(%rcx)
-	adcq	8(%rsp), %rsi           ## 8-byte Folded Reload
-	movq	%rsi, 112(%rcx)
-	adcq	16(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 120(%rcx)
-	adcq	48(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 128(%rcx)
-	adcq	$0, %r8
-	movq	%r8, 136(%rcx)
-	addq	$808, %rsp              ## imm = 0x328
+	adcq	104(%rsp), %r13
+	adcq	112(%rsp), %r14
+	adcq	120(%rsp), %rbx
+	adcq	128(%rsp), %rbp
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rbp, 112(%rcx)
+	movq	%rbx, 104(%rcx)
+	movq	%r14, 96(%rcx)
+	movq	%r13, 88(%rcx)
+	movq	%r15, 80(%rcx)
+	movq	%r12, 72(%rcx)
+	movq	%rdx, 64(%rcx)
+	movq	%rsi, 56(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 120(%rcx)
+	addq	$648, %rsp                      ## imm = 0x288
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -13786,295 +5907,658 @@ _mcl_fpDbl_mulPre9L:                    ## @mcl_fpDbl_mulPre9L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sqrPre9L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sqrPre8L             ## -- Begin function mcl_fpDbl_sqrPre8L
 	.p2align	4, 0x90
-_mcl_fpDbl_sqrPre9L:                    ## @mcl_fpDbl_sqrPre9L
-## BB#0:
+_mcl_fpDbl_sqrPre8L:                    ## @mcl_fpDbl_sqrPre8L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$808, %rsp              ## imm = 0x328
+	subq	$648, %rsp                      ## imm = 0x288
 	movq	%rsi, %r15
-	movq	%rdi, %r14
-	movq	(%r15), %rdx
-	leaq	728(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	800(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	792(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	784(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	776(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	768(%rsp), %rax
-	movq	%rax, 56(%rsp)          ## 8-byte Spill
-	movq	760(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	752(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	744(%rsp), %rax
-	movq	%rax, 80(%rsp)          ## 8-byte Spill
-	movq	728(%rsp), %rax
-	movq	736(%rsp), %r12
-	movq	%rax, (%r14)
-	movq	%r14, 72(%rsp)          ## 8-byte Spill
+	movq	%rdi, %r12
+	movq	%rdi, 56(%rsp)                  ## 8-byte Spill
+	movq	(%rsi), %rdx
+	leaq	576(%rsp), %rdi
+	callq	_mulPv512x64
+	movq	640(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	632(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	624(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	movq	616(%rsp), %rax
+	movq	%rax, 40(%rsp)                  ## 8-byte Spill
+	movq	608(%rsp), %r13
+	movq	600(%rsp), %rbp
+	movq	592(%rsp), %rbx
+	movq	576(%rsp), %rax
+	movq	584(%rsp), %r14
+	movq	%rax, (%r12)
 	movq	8(%r15), %rdx
-	leaq	648(%rsp), %rdi
+	leaq	504(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	l_mulPv576x64
-	movq	720(%rsp), %r8
-	movq	712(%rsp), %rcx
-	movq	704(%rsp), %rdx
-	movq	696(%rsp), %rsi
-	movq	688(%rsp), %rdi
-	movq	680(%rsp), %rbp
-	addq	648(%rsp), %r12
-	movq	672(%rsp), %rax
-	movq	656(%rsp), %rbx
-	movq	664(%rsp), %r13
-	movq	%r12, 8(%r14)
-	adcq	80(%rsp), %rbx          ## 8-byte Folded Reload
-	adcq	40(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	%r15, 64(%rsp)          ## 8-byte Spill
+	callq	_mulPv512x64
+	movq	568(%rsp), %rax
+	addq	504(%rsp), %r14
+	adcq	512(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	adcq	520(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  ## 8-byte Spill
+	adcq	528(%rsp), %r13
+	movq	%r13, %rbx
+	movq	40(%rsp), %r13                  ## 8-byte Reload
+	adcq	536(%rsp), %r13
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	adcq	552(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %r12                  ## 8-byte Reload
+	adcq	560(%rsp), %r12
+	movq	56(%rsp), %rcx                  ## 8-byte Reload
+	movq	%r14, 8(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
 	movq	16(%r15), %rdx
-	leaq	568(%rsp), %rdi
+	leaq	432(%rsp), %rdi
 	movq	%r15, %rsi
-	callq	l_mulPv576x64
-	movq	640(%rsp), %r8
-	movq	632(%rsp), %rcx
-	movq	624(%rsp), %rdx
-	movq	616(%rsp), %rsi
-	movq	608(%rsp), %rdi
-	movq	600(%rsp), %rbp
-	addq	568(%rsp), %rbx
-	movq	592(%rsp), %rax
-	movq	576(%rsp), %r14
-	movq	584(%rsp), %r12
-	movq	72(%rsp), %r15          ## 8-byte Reload
-	movq	%rbx, 16(%r15)
-	adcq	%r13, %r14
-	adcq	40(%rsp), %r12          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	24(%rsi), %rdx
-	leaq	488(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	560(%rsp), %r8
-	movq	552(%rsp), %rcx
-	movq	544(%rsp), %rdx
-	movq	536(%rsp), %rsi
-	movq	528(%rsp), %rdi
-	movq	520(%rsp), %rbp
-	addq	488(%rsp), %r14
-	movq	512(%rsp), %rax
-	movq	496(%rsp), %rbx
-	movq	504(%rsp), %r13
-	movq	%r14, 24(%r15)
-	adcq	%r12, %rbx
-	adcq	40(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	32(%rsi), %rdx
-	leaq	408(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	480(%rsp), %r8
-	movq	472(%rsp), %rcx
-	movq	464(%rsp), %rdx
-	movq	456(%rsp), %rsi
-	movq	448(%rsp), %rdi
-	movq	440(%rsp), %rbp
-	addq	408(%rsp), %rbx
-	movq	432(%rsp), %rax
-	movq	416(%rsp), %r14
-	movq	424(%rsp), %r12
-	movq	%rbx, 32(%r15)
-	adcq	%r13, %r14
-	adcq	40(%rsp), %r12          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	40(%rsi), %rdx
-	leaq	328(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	400(%rsp), %r8
-	movq	392(%rsp), %rcx
-	movq	384(%rsp), %rdx
-	movq	376(%rsp), %rsi
-	movq	368(%rsp), %rdi
-	movq	360(%rsp), %rbp
-	addq	328(%rsp), %r14
-	movq	352(%rsp), %rax
-	movq	336(%rsp), %rbx
-	movq	344(%rsp), %r13
-	movq	%r14, 40(%r15)
-	adcq	%r12, %rbx
-	adcq	40(%rsp), %r13          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	48(%rsi), %rdx
+	callq	_mulPv512x64
+	movq	496(%rsp), %rax
+	movq	8(%rsp), %rdx                   ## 8-byte Reload
+	addq	432(%rsp), %rdx
+	movq	64(%rsp), %rcx                  ## 8-byte Reload
+	adcq	440(%rsp), %rcx
+	movq	%rcx, 64(%rsp)                  ## 8-byte Spill
+	adcq	448(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  ## 8-byte Spill
+	adcq	456(%rsp), %r13
+	movq	48(%rsp), %rbx                  ## 8-byte Reload
+	adcq	464(%rsp), %rbx
+	movq	32(%rsp), %rbp                  ## 8-byte Reload
+	adcq	472(%rsp), %rbp
+	adcq	480(%rsp), %r12
+	movq	%r12, 24(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	488(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	movq	56(%rsp), %r12                  ## 8-byte Reload
+	movq	%rdx, 16(%r12)
+	adcq	$0, %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	24(%r15), %rdx
+	leaq	360(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	_mulPv512x64
+	movq	424(%rsp), %r14
+	movq	64(%rsp), %rax                  ## 8-byte Reload
+	addq	360(%rsp), %rax
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
+	adcq	368(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	adcq	376(%rsp), %r13
+	adcq	384(%rsp), %rbx
+	movq	%rbx, 48(%rsp)                  ## 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, %rbx
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	400(%rsp), %rbp
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	408(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	416(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	movq	%rax, 24(%r12)
+	adcq	$0, %r14
+	movq	32(%r15), %rdx
+	leaq	288(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	_mulPv512x64
+	movq	352(%rsp), %r12
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	addq	288(%rsp), %rax
+	adcq	296(%rsp), %r13
+	movq	%r13, 40(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %r13                  ## 8-byte Reload
+	adcq	304(%rsp), %r13
+	adcq	312(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  ## 8-byte Spill
+	adcq	320(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rbx                  ## 8-byte Reload
+	adcq	328(%rsp), %rbx
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	336(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	adcq	344(%rsp), %r14
+	movq	56(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, 32(%rcx)
+	adcq	$0, %r12
+	movq	40(%r15), %rdx
+	leaq	216(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	_mulPv512x64
+	movq	280(%rsp), %rbp
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	addq	216(%rsp), %rax
+	adcq	224(%rsp), %r13
+	movq	%r13, 48(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	adcq	232(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	adcq	248(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	256(%rsp), %rbx
+	adcq	264(%rsp), %r14
+	adcq	272(%rsp), %r12
+	movq	56(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, 40(%rcx)
+	adcq	$0, %rbp
+	movq	48(%r15), %rdx
+	leaq	144(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	_mulPv512x64
+	movq	208(%rsp), %r13
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	addq	144(%rsp), %rcx
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	adcq	152(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	160(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	168(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	adcq	176(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	adcq	184(%rsp), %r14
+	adcq	192(%rsp), %r12
+	adcq	200(%rsp), %rbp
+	movq	56(%rsp), %rax                  ## 8-byte Reload
+	movq	%rcx, 48(%rax)
+	adcq	$0, %r13
+	movq	56(%r15), %rdx
+	leaq	72(%rsp), %rdi
+	movq	%r15, %rsi
+	callq	_mulPv512x64
+	movq	136(%rsp), %rax
+	movq	32(%rsp), %rsi                  ## 8-byte Reload
+	addq	72(%rsp), %rsi
+	movq	24(%rsp), %rdi                  ## 8-byte Reload
+	adcq	80(%rsp), %rdi
+	movq	16(%rsp), %rbx                  ## 8-byte Reload
+	adcq	88(%rsp), %rbx
+	movq	8(%rsp), %rdx                   ## 8-byte Reload
+	adcq	96(%rsp), %rdx
+	adcq	104(%rsp), %r14
+	adcq	112(%rsp), %r12
+	adcq	120(%rsp), %rbp
+	adcq	128(%rsp), %r13
+	movq	56(%rsp), %rcx                  ## 8-byte Reload
+	movq	%r13, 112(%rcx)
+	movq	%rbp, 104(%rcx)
+	movq	%r12, 96(%rcx)
+	movq	%r14, 88(%rcx)
+	movq	%rdx, 80(%rcx)
+	movq	%rbx, 72(%rcx)
+	movq	%rdi, 64(%rcx)
+	movq	%rsi, 56(%rcx)
+	adcq	$0, %rax
+	movq	%rax, 120(%rcx)
+	addq	$648, %rsp                      ## imm = 0x288
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	retq
+                                        ## -- End function
+	.globl	_mcl_fp_mont8L                  ## -- Begin function mcl_fp_mont8L
+	.p2align	4, 0x90
+_mcl_fp_mont8L:                         ## @mcl_fp_mont8L
+## %bb.0:
+	pushq	%rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	subq	$1256, %rsp                     ## imm = 0x4E8
+	movq	%rcx, %r13
+	movq	%rdx, 80(%rsp)                  ## 8-byte Spill
+	movq	%rsi, 88(%rsp)                  ## 8-byte Spill
+	movq	%rdi, 96(%rsp)                  ## 8-byte Spill
+	movq	-8(%rcx), %rbx
+	movq	%rbx, 72(%rsp)                  ## 8-byte Spill
+	movq	%rcx, 56(%rsp)                  ## 8-byte Spill
+	movq	(%rdx), %rdx
+	leaq	1184(%rsp), %rdi
+	callq	_mulPv512x64
+	movq	1184(%rsp), %r15
+	movq	1192(%rsp), %r12
+	movq	%rbx, %rdx
+	imulq	%r15, %rdx
+	movq	1248(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	1240(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	1232(%rsp), %r14
+	movq	1224(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	movq	1216(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	1208(%rsp), %rbx
+	movq	1200(%rsp), %rbp
+	leaq	1112(%rsp), %rdi
+	movq	%r13, %rsi
+	callq	_mulPv512x64
+	addq	1112(%rsp), %r15
+	adcq	1120(%rsp), %r12
+	adcq	1128(%rsp), %rbp
+	movq	%rbp, 64(%rsp)                  ## 8-byte Spill
+	adcq	1136(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	1144(%rsp), %rbp
+	movq	(%rsp), %r15                    ## 8-byte Reload
+	adcq	1152(%rsp), %r15
+	adcq	1160(%rsp), %r14
+	movq	%r14, 48(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %r13                  ## 8-byte Reload
+	adcq	1168(%rsp), %r13
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	1176(%rsp), %rbx
+	setb	%r14b
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	8(%rax), %rdx
+	leaq	1040(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movzbl	%r14b, %ecx
+	addq	1040(%rsp), %r12
+	movq	64(%rsp), %r14                  ## 8-byte Reload
+	adcq	1048(%rsp), %r14
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	adcq	1056(%rsp), %rax
+	movq	%rax, 40(%rsp)                  ## 8-byte Spill
+	adcq	1064(%rsp), %rbp
+	adcq	1072(%rsp), %r15
+	movq	%r15, (%rsp)                    ## 8-byte Spill
+	movq	48(%rsp), %rax                  ## 8-byte Reload
+	adcq	1080(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	adcq	1088(%rsp), %r13
+	movq	%r13, 16(%rsp)                  ## 8-byte Spill
+	adcq	1096(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	adcq	1104(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	setb	%r15b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r12, %rdx
+	leaq	968(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movzbl	%r15b, %r15d
+	addq	968(%rsp), %r12
+	adcq	976(%rsp), %r14
+	movq	%r14, 64(%rsp)                  ## 8-byte Spill
+	movq	40(%rsp), %r13                  ## 8-byte Reload
+	adcq	984(%rsp), %r13
+	adcq	992(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	(%rsp), %r12                    ## 8-byte Reload
+	adcq	1000(%rsp), %r12
+	movq	48(%rsp), %r14                  ## 8-byte Reload
+	adcq	1008(%rsp), %r14
+	movq	16(%rsp), %rbx                  ## 8-byte Reload
+	adcq	1016(%rsp), %rbx
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	1024(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	32(%rsp), %rbp                  ## 8-byte Reload
+	adcq	1032(%rsp), %rbp
+	adcq	$0, %r15
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	896(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	64(%rsp), %rax                  ## 8-byte Reload
+	addq	896(%rsp), %rax
+	adcq	904(%rsp), %r13
+	movq	%r13, 40(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %r13                  ## 8-byte Reload
+	adcq	912(%rsp), %r13
+	adcq	920(%rsp), %r12
+	adcq	928(%rsp), %r14
+	movq	%r14, 48(%rsp)                  ## 8-byte Spill
+	adcq	936(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	944(%rsp), %rbx
+	adcq	952(%rsp), %rbp
+	movq	%rbp, 32(%rsp)                  ## 8-byte Spill
+	adcq	960(%rsp), %r15
+	setb	%r14b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	824(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movzbl	%r14b, %eax
+	addq	824(%rsp), %rbp
+	movq	40(%rsp), %r14                  ## 8-byte Reload
+	adcq	832(%rsp), %r14
+	adcq	840(%rsp), %r13
+	movq	%r13, 24(%rsp)                  ## 8-byte Spill
+	adcq	848(%rsp), %r12
+	movq	%r12, (%rsp)                    ## 8-byte Spill
+	movq	48(%rsp), %r12                  ## 8-byte Reload
+	adcq	856(%rsp), %r12
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	864(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	movq	%rbx, %rbp
+	adcq	872(%rsp), %rbp
+	movq	32(%rsp), %r13                  ## 8-byte Reload
+	adcq	880(%rsp), %r13
+	adcq	888(%rsp), %r15
+	movq	%rax, %rbx
+	adcq	$0, %rbx
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	752(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	%r14, %rax
+	addq	752(%rsp), %rax
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	760(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	movq	(%rsp), %r14                    ## 8-byte Reload
+	adcq	768(%rsp), %r14
+	adcq	776(%rsp), %r12
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	784(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	adcq	792(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	adcq	800(%rsp), %r13
+	movq	%r13, 32(%rsp)                  ## 8-byte Spill
+	adcq	808(%rsp), %r15
+	movq	%r15, %r13
+	adcq	816(%rsp), %rbx
+	setb	%r15b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	680(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movzbl	%r15b, %eax
+	addq	680(%rsp), %rbp
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	688(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	adcq	696(%rsp), %r14
+	movq	%r14, (%rsp)                    ## 8-byte Spill
+	adcq	704(%rsp), %r12
+	movq	16(%rsp), %rbp                  ## 8-byte Reload
+	adcq	712(%rsp), %rbp
+	movq	8(%rsp), %r14                   ## 8-byte Reload
+	adcq	720(%rsp), %r14
+	movq	32(%rsp), %r15                  ## 8-byte Reload
+	adcq	728(%rsp), %r15
+	adcq	736(%rsp), %r13
+	movq	%r13, 40(%rsp)                  ## 8-byte Spill
+	adcq	744(%rsp), %rbx
+	adcq	$0, %rax
+	movq	%rax, %r13
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	608(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	addq	608(%rsp), %rax
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	616(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	adcq	624(%rsp), %r12
+	adcq	632(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  ## 8-byte Spill
+	adcq	640(%rsp), %r14
+	movq	%r14, 8(%rsp)                   ## 8-byte Spill
+	adcq	648(%rsp), %r15
+	movq	%r15, 32(%rsp)                  ## 8-byte Spill
+	movq	40(%rsp), %rbp                  ## 8-byte Reload
+	adcq	656(%rsp), %rbp
+	adcq	664(%rsp), %rbx
+	movq	%rbx, %r15
+	adcq	672(%rsp), %r13
+	setb	%r14b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	536(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movzbl	%r14b, %eax
+	addq	536(%rsp), %rbx
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	adcq	552(%rsp), %r12
+	movq	%r12, 48(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %r12                  ## 8-byte Reload
+	adcq	560(%rsp), %r12
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	568(%rsp), %rbx
+	movq	32(%rsp), %r14                  ## 8-byte Reload
+	adcq	576(%rsp), %r14
+	adcq	584(%rsp), %rbp
+	adcq	592(%rsp), %r15
+	movq	%r15, 64(%rsp)                  ## 8-byte Spill
+	adcq	600(%rsp), %r13
+	movq	%r13, 24(%rsp)                  ## 8-byte Spill
+	adcq	$0, %rax
+	movq	%rax, %r13
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	464(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	(%rsp), %rax                    ## 8-byte Reload
+	addq	464(%rsp), %rax
+	movq	48(%rsp), %r15                  ## 8-byte Reload
+	adcq	472(%rsp), %r15
+	adcq	480(%rsp), %r12
+	movq	%r12, 16(%rsp)                  ## 8-byte Spill
+	adcq	488(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	adcq	496(%rsp), %r14
+	movq	%r14, %r12
+	adcq	504(%rsp), %rbp
+	movq	64(%rsp), %rcx                  ## 8-byte Reload
+	adcq	512(%rsp), %rcx
+	movq	%rcx, 64(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	520(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	adcq	528(%rsp), %r13
+	movq	%r13, (%rsp)                    ## 8-byte Spill
+	setb	%r14b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	392(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movzbl	%r14b, %eax
+	addq	392(%rsp), %rbx
+	adcq	400(%rsp), %r15
+	movq	%r15, 48(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rbx                  ## 8-byte Reload
+	adcq	408(%rsp), %rbx
+	movq	8(%rsp), %r14                   ## 8-byte Reload
+	adcq	416(%rsp), %r14
+	adcq	424(%rsp), %r12
+	movq	%r12, 32(%rsp)                  ## 8-byte Spill
+	adcq	432(%rsp), %rbp
+	movq	%rbp, 40(%rsp)                  ## 8-byte Spill
+	movq	64(%rsp), %rbp                  ## 8-byte Reload
+	adcq	440(%rsp), %rbp
+	movq	24(%rsp), %r13                  ## 8-byte Reload
+	adcq	448(%rsp), %r13
+	movq	(%rsp), %r12                    ## 8-byte Reload
+	adcq	456(%rsp), %r12
+	movq	%rax, %r15
+	adcq	$0, %r15
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	320(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
 	leaq	248(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	320(%rsp), %r8
-	movq	312(%rsp), %rcx
-	movq	304(%rsp), %rdx
-	movq	296(%rsp), %rsi
-	movq	288(%rsp), %rdi
-	movq	280(%rsp), %rbp
-	addq	248(%rsp), %rbx
-	movq	272(%rsp), %rax
-	movq	256(%rsp), %r12
-	movq	264(%rsp), %r14
-	movq	%rbx, 48(%r15)
-	adcq	%r13, %r12
-	adcq	40(%rsp), %r14          ## 8-byte Folded Reload
-	adcq	48(%rsp), %rax          ## 8-byte Folded Reload
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	56(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 56(%rsp)          ## 8-byte Spill
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	56(%rsi), %rdx
-	leaq	168(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	240(%rsp), %r8
-	movq	232(%rsp), %rdx
-	movq	224(%rsp), %rsi
-	movq	216(%rsp), %rdi
-	movq	208(%rsp), %rbx
-	movq	200(%rsp), %rcx
-	addq	168(%rsp), %r12
-	movq	192(%rsp), %r15
-	movq	176(%rsp), %r13
-	movq	184(%rsp), %rbp
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	movq	%r12, 56(%rax)
-	adcq	%r14, %r13
-	adcq	40(%rsp), %rbp          ## 8-byte Folded Reload
-	adcq	48(%rsp), %r15          ## 8-byte Folded Reload
-	adcq	56(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, %r12
-	adcq	8(%rsp), %rbx           ## 8-byte Folded Reload
-	movq	%rbx, %r14
-	adcq	16(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, 8(%rsp)           ## 8-byte Spill
-	adcq	24(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 16(%rsp)          ## 8-byte Spill
-	adcq	32(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %r8
-	movq	%r8, 32(%rsp)           ## 8-byte Spill
-	movq	64(%rsp), %rsi          ## 8-byte Reload
-	movq	64(%rsi), %rdx
-	leaq	88(%rsp), %rdi
-	callq	l_mulPv576x64
-	addq	88(%rsp), %r13
-	adcq	96(%rsp), %rbp
-	movq	160(%rsp), %r8
-	adcq	104(%rsp), %r15
-	movq	152(%rsp), %r9
-	movq	144(%rsp), %rdx
-	movq	136(%rsp), %rsi
-	movq	128(%rsp), %rdi
-	movq	120(%rsp), %rbx
-	movq	112(%rsp), %rax
-	movq	72(%rsp), %rcx          ## 8-byte Reload
-	movq	%r13, 64(%rcx)
-	movq	%rbp, 72(%rcx)
-	adcq	%r12, %rax
-	movq	%r15, 80(%rcx)
-	movq	%rax, 88(%rcx)
-	adcq	%r14, %rbx
-	movq	%rbx, 96(%rcx)
-	adcq	8(%rsp), %rdi           ## 8-byte Folded Reload
-	movq	%rdi, 104(%rcx)
-	adcq	16(%rsp), %rsi          ## 8-byte Folded Reload
-	movq	%rsi, 112(%rcx)
-	adcq	24(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 120(%rcx)
-	adcq	32(%rsp), %r9           ## 8-byte Folded Reload
-	movq	%r9, 128(%rcx)
-	adcq	$0, %r8
-	movq	%r8, 136(%rcx)
-	addq	$808, %rsp              ## imm = 0x328
+	movq	48(%rsp), %rax                  ## 8-byte Reload
+	addq	320(%rsp), %rax
+	adcq	328(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  ## 8-byte Spill
+	adcq	336(%rsp), %r14
+	movq	%r14, 8(%rsp)                   ## 8-byte Spill
+	movq	32(%rsp), %rbx                  ## 8-byte Reload
+	adcq	344(%rsp), %rbx
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
+	adcq	352(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	adcq	360(%rsp), %rbp
+	adcq	368(%rsp), %r13
+	adcq	376(%rsp), %r12
+	movq	%r12, (%rsp)                    ## 8-byte Spill
+	adcq	384(%rsp), %r15
+	movq	%r15, 48(%rsp)                  ## 8-byte Spill
+	setb	%r12b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %r14
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movzbl	%r12b, %r12d
+	addq	248(%rsp), %r14
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	256(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %r15                   ## 8-byte Reload
+	adcq	264(%rsp), %r15
+	adcq	272(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  ## 8-byte Spill
+	movq	40(%rsp), %rbx                  ## 8-byte Reload
+	adcq	280(%rsp), %rbx
+	adcq	288(%rsp), %rbp
+	adcq	296(%rsp), %r13
+	movq	(%rsp), %r14                    ## 8-byte Reload
+	adcq	304(%rsp), %r14
+	movq	48(%rsp), %rax                  ## 8-byte Reload
+	adcq	312(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	adcq	$0, %r12
+	movq	80(%rsp), %rax                  ## 8-byte Reload
+	movq	56(%rax), %rdx
+	leaq	176(%rsp), %rdi
+	movq	88(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	addq	176(%rsp), %rax
+	adcq	184(%rsp), %r15
+	movq	%r15, 8(%rsp)                   ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	adcq	192(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	adcq	200(%rsp), %rbx
+	adcq	208(%rsp), %rbp
+	adcq	216(%rsp), %r13
+	movq	%r13, 24(%rsp)                  ## 8-byte Spill
+	adcq	224(%rsp), %r14
+	movq	%r14, (%rsp)                    ## 8-byte Spill
+	movq	48(%rsp), %r15                  ## 8-byte Reload
+	adcq	232(%rsp), %r15
+	adcq	240(%rsp), %r12
+	setb	%r14b
+	movq	72(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %r13
+	leaq	104(%rsp), %rdi
+	movq	56(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movzbl	%r14b, %r9d
+	addq	104(%rsp), %r13
+	movq	8(%rsp), %r11                   ## 8-byte Reload
+	adcq	112(%rsp), %r11
+	movq	%r11, 8(%rsp)                   ## 8-byte Spill
+	movq	32(%rsp), %r10                  ## 8-byte Reload
+	adcq	120(%rsp), %r10
+	movq	%r10, 32(%rsp)                  ## 8-byte Spill
+	movq	%rbx, %r8
+	adcq	128(%rsp), %r8
+	movq	%r8, 40(%rsp)                   ## 8-byte Spill
+	movq	%rbp, %r13
+	adcq	136(%rsp), %r13
+	movq	24(%rsp), %r14                  ## 8-byte Reload
+	adcq	144(%rsp), %r14
+	movq	(%rsp), %rsi                    ## 8-byte Reload
+	adcq	152(%rsp), %rsi
+	adcq	160(%rsp), %r15
+	adcq	168(%rsp), %r12
+	adcq	$0, %r9
+	movq	56(%rsp), %rcx                  ## 8-byte Reload
+	subq	(%rcx), %r11
+	sbbq	8(%rcx), %r10
+	sbbq	16(%rcx), %r8
+	movq	%r13, %rdi
+	sbbq	24(%rcx), %rdi
+	movq	%r14, %rbx
+	sbbq	32(%rcx), %rbx
+	movq	%rsi, %rbp
+	sbbq	40(%rcx), %rbp
+	movq	%r15, %rax
+	sbbq	48(%rcx), %rax
+	movq	%rcx, %rdx
+	movq	%r12, %rcx
+	sbbq	56(%rdx), %rcx
+	sbbq	$0, %r9
+	testb	$1, %r9b
+	cmovneq	%r12, %rcx
+	movq	96(%rsp), %rdx                  ## 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovneq	%r15, %rax
+	movq	%rax, 48(%rdx)
+	cmovneq	%rsi, %rbp
+	movq	%rbp, 40(%rdx)
+	cmovneq	%r14, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovneq	%r13, %rdi
+	movq	%rdi, 24(%rdx)
+	cmovneq	40(%rsp), %r8                   ## 8-byte Folded Reload
+	movq	%r8, 16(%rdx)
+	cmovneq	32(%rsp), %r10                  ## 8-byte Folded Reload
+	movq	%r10, 8(%rdx)
+	cmovneq	8(%rsp), %r11                   ## 8-byte Folded Reload
+	movq	%r11, (%rdx)
+	addq	$1256, %rsp                     ## imm = 0x4E8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -14082,556 +6566,411 @@ _mcl_fpDbl_sqrPre9L:                    ## @mcl_fpDbl_sqrPre9L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_mont9L
+                                        ## -- End function
+	.globl	_mcl_fp_montNF8L                ## -- Begin function mcl_fp_montNF8L
 	.p2align	4, 0x90
-_mcl_fp_mont9L:                         ## @mcl_fp_mont9L
-## BB#0:
+_mcl_fp_montNF8L:                       ## @mcl_fp_montNF8L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$1560, %rsp             ## imm = 0x618
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	movq	%rdx, 96(%rsp)          ## 8-byte Spill
-	movq	%rsi, 88(%rsp)          ## 8-byte Spill
-	movq	%rdi, 112(%rsp)         ## 8-byte Spill
+	subq	$1256, %rsp                     ## imm = 0x4E8
+	movq	%rcx, %rbp
+	movq	%rdx, 88(%rsp)                  ## 8-byte Spill
+	movq	%rsi, 80(%rsp)                  ## 8-byte Spill
+	movq	%rdi, 96(%rsp)                  ## 8-byte Spill
 	movq	-8(%rcx), %rbx
-	movq	%rbx, 80(%rsp)          ## 8-byte Spill
+	movq	%rbx, 64(%rsp)                  ## 8-byte Spill
+	movq	%rcx, 72(%rsp)                  ## 8-byte Spill
 	movq	(%rdx), %rdx
-	leaq	1480(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	1480(%rsp), %r14
-	movq	1488(%rsp), %r15
-	movq	%r14, %rdx
-	imulq	%rbx, %rdx
-	movq	1552(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	1544(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	1536(%rsp), %rax
-	movq	%rax, 56(%rsp)          ## 8-byte Spill
-	movq	1528(%rsp), %r12
-	movq	1520(%rsp), %r13
-	movq	1512(%rsp), %rbx
-	movq	1504(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	1496(%rsp), %rbp
-	leaq	1400(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	1400(%rsp), %r14
-	adcq	1408(%rsp), %r15
-	adcq	1416(%rsp), %rbp
-	movq	%rbp, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	1424(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	adcq	1432(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	adcq	1440(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	adcq	1448(%rsp), %r12
-	movq	%r12, 48(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %rbx          ## 8-byte Reload
-	adcq	1456(%rsp), %rbx
-	movq	40(%rsp), %r14          ## 8-byte Reload
-	adcq	1464(%rsp), %r14
-	movq	24(%rsp), %r13          ## 8-byte Reload
-	adcq	1472(%rsp), %r13
-	sbbq	%rbp, %rbp
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1320(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %ebp
-	addq	1320(%rsp), %r15
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	1328(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	1336(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %r12          ## 8-byte Reload
-	adcq	1344(%rsp), %r12
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	adcq	1352(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	adcq	1360(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	adcq	1368(%rsp), %rbx
-	adcq	1376(%rsp), %r14
-	movq	%r14, 40(%rsp)          ## 8-byte Spill
-	adcq	1384(%rsp), %r13
-	movq	%r13, 24(%rsp)          ## 8-byte Spill
-	adcq	1392(%rsp), %rbp
-	sbbq	%r14, %r14
-	movq	%r15, %rdx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	1240(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	%r14, %rax
-	andl	$1, %eax
-	addq	1240(%rsp), %r15
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	1248(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	1256(%rsp), %r14
-	adcq	1264(%rsp), %r12
-	movq	%r12, 32(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	1272(%rsp), %r12
-	movq	48(%rsp), %r13          ## 8-byte Reload
-	adcq	1280(%rsp), %r13
-	adcq	1288(%rsp), %rbx
-	movq	%rbx, 56(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %r15          ## 8-byte Reload
-	adcq	1296(%rsp), %r15
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	1304(%rsp), %rbx
-	adcq	1312(%rsp), %rbp
-	adcq	$0, %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	1160(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	addq	1160(%rsp), %rax
-	adcq	1168(%rsp), %r14
-	movq	%r14, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %r14          ## 8-byte Reload
-	adcq	1176(%rsp), %r14
-	adcq	1184(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	movq	%r13, %r12
-	adcq	1192(%rsp), %r12
-	movq	56(%rsp), %rcx          ## 8-byte Reload
-	adcq	1200(%rsp), %rcx
-	movq	%rcx, 56(%rsp)          ## 8-byte Spill
-	adcq	1208(%rsp), %r15
-	movq	%r15, %r13
-	adcq	1216(%rsp), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	adcq	1224(%rsp), %rbp
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	adcq	1232(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	sbbq	%r15, %r15
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	1080(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	%r15, %rax
-	andl	$1, %eax
-	addq	1080(%rsp), %rbx
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	adcq	1088(%rsp), %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	%r14, %r15
-	adcq	1096(%rsp), %r15
-	movq	16(%rsp), %r14          ## 8-byte Reload
-	adcq	1104(%rsp), %r14
-	movq	%r12, %rbx
-	adcq	1112(%rsp), %rbx
-	movq	56(%rsp), %rcx          ## 8-byte Reload
-	adcq	1120(%rsp), %rcx
-	movq	%rcx, 56(%rsp)          ## 8-byte Spill
+	leaq	1184(%rsp), %rdi
+	callq	_mulPv512x64
+	movq	1184(%rsp), %r15
+	movq	1192(%rsp), %r12
+	movq	%rbx, %rdx
+	imulq	%r15, %rdx
+	movq	1248(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	1240(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	1232(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	1224(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	1216(%rsp), %r14
+	movq	1208(%rsp), %rbx
+	movq	1200(%rsp), %r13
+	leaq	1112(%rsp), %rdi
+	movq	%rbp, %rsi
+	callq	_mulPv512x64
+	addq	1112(%rsp), %r15
+	adcq	1120(%rsp), %r12
 	adcq	1128(%rsp), %r13
-	movq	%r13, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %r13          ## 8-byte Reload
-	adcq	1136(%rsp), %r13
-	adcq	1144(%rsp), %rbp
-	movq	64(%rsp), %r12          ## 8-byte Reload
-	adcq	1152(%rsp), %r12
-	adcq	$0, %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	1000(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	(%rsp), %rax            ## 8-byte Reload
-	addq	1000(%rsp), %rax
-	adcq	1008(%rsp), %r15
-	movq	%r15, 32(%rsp)          ## 8-byte Spill
-	adcq	1016(%rsp), %r14
+	adcq	1136(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  ## 8-byte Spill
 	movq	%r14, %r15
-	adcq	1024(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %r14          ## 8-byte Reload
-	adcq	1032(%rsp), %r14
-	movq	40(%rsp), %rcx          ## 8-byte Reload
-	adcq	1040(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
+	adcq	1144(%rsp), %r15
+	movq	16(%rsp), %rbx                  ## 8-byte Reload
+	adcq	1152(%rsp), %rbx
+	movq	32(%rsp), %r14                  ## 8-byte Reload
+	adcq	1160(%rsp), %r14
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	1168(%rsp), %rbp
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	1176(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	8(%rax), %rdx
+	leaq	1040(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	1104(%rsp), %rcx
+	addq	1040(%rsp), %r12
 	adcq	1048(%rsp), %r13
-	movq	%r13, 24(%rsp)          ## 8-byte Spill
-	adcq	1056(%rsp), %rbp
-	adcq	1064(%rsp), %r12
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	1072(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	920(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	920(%rsp), %r13
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	928(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	adcq	936(%rsp), %r15
-	movq	%r15, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %r15          ## 8-byte Reload
-	adcq	944(%rsp), %r15
-	movq	%r14, %r13
-	adcq	952(%rsp), %r13
-	movq	40(%rsp), %r14          ## 8-byte Reload
-	adcq	960(%rsp), %r14
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	968(%rsp), %rbx
-	adcq	976(%rsp), %rbp
-	adcq	984(%rsp), %r12
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	992(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	840(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	addq	840(%rsp), %rax
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	848(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	856(%rsp), %r15
-	adcq	864(%rsp), %r13
-	movq	%r13, 56(%rsp)          ## 8-byte Spill
-	adcq	872(%rsp), %r14
-	movq	%r14, 40(%rsp)          ## 8-byte Spill
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	adcq	1056(%rsp), %rax
+	movq	%rax, 40(%rsp)                  ## 8-byte Spill
+	adcq	1064(%rsp), %r15
+	adcq	1072(%rsp), %rbx
+	adcq	1080(%rsp), %r14
+	movq	%r14, 32(%rsp)                  ## 8-byte Spill
+	adcq	1088(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %r14                   ## 8-byte Reload
+	adcq	1096(%rsp), %r14
+	adcq	$0, %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r12, %rdx
+	leaq	968(%rsp), %rdi
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	addq	968(%rsp), %r12
+	adcq	976(%rsp), %r13
+	movq	40(%rsp), %rbp                  ## 8-byte Reload
+	adcq	984(%rsp), %rbp
+	adcq	992(%rsp), %r15
+	movq	%r15, 56(%rsp)                  ## 8-byte Spill
+	adcq	1000(%rsp), %rbx
+	movq	%rbx, 16(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %r15                  ## 8-byte Reload
+	adcq	1008(%rsp), %r15
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	1016(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	adcq	1024(%rsp), %r14
+	movq	%r14, 8(%rsp)                   ## 8-byte Spill
+	movq	48(%rsp), %rbx                  ## 8-byte Reload
+	adcq	1032(%rsp), %rbx
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	16(%rax), %rdx
+	leaq	896(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	960(%rsp), %r12
+	addq	896(%rsp), %r13
+	movq	%rbp, %r14
+	adcq	904(%rsp), %r14
+	movq	56(%rsp), %rax                  ## 8-byte Reload
+	adcq	912(%rsp), %rax
+	movq	%rax, 56(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	920(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	adcq	928(%rsp), %r15
+	movq	%r15, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	936(%rsp), %rbp
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	944(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	adcq	952(%rsp), %rbx
+	adcq	$0, %r12
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	824(%rsp), %rdi
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	addq	824(%rsp), %r13
+	adcq	832(%rsp), %r14
+	movq	%r14, 40(%rsp)                  ## 8-byte Spill
+	movq	56(%rsp), %r13                  ## 8-byte Reload
+	adcq	840(%rsp), %r13
+	movq	16(%rsp), %r15                  ## 8-byte Reload
+	adcq	848(%rsp), %r15
+	movq	32(%rsp), %r14                  ## 8-byte Reload
+	adcq	856(%rsp), %r14
+	adcq	864(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbp                   ## 8-byte Reload
+	adcq	872(%rsp), %rbp
 	adcq	880(%rsp), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	adcq	888(%rsp), %rbp
-	adcq	896(%rsp), %r12
-	movq	8(%rsp), %r13           ## 8-byte Reload
-	adcq	904(%rsp), %r13
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	adcq	912(%rsp), %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r14
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	760(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	760(%rsp), %r14
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	768(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
+	adcq	888(%rsp), %r12
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	24(%rax), %rdx
+	leaq	752(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	816(%rsp), %rcx
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	addq	752(%rsp), %rax
+	adcq	760(%rsp), %r13
+	adcq	768(%rsp), %r15
+	movq	%r15, 16(%rsp)                  ## 8-byte Spill
+	movq	%r14, %r15
 	adcq	776(%rsp), %r15
-	movq	56(%rsp), %r14          ## 8-byte Reload
-	adcq	784(%rsp), %r14
-	movq	40(%rsp), %rcx          ## 8-byte Reload
-	adcq	792(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	800(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	808(%rsp), %rbp
-	movq	%r12, %rbx
-	adcq	816(%rsp), %rbx
-	movq	%r13, %r12
-	adcq	824(%rsp), %r12
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	832(%rsp), %r13
-	adcq	$0, %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	40(%rax), %rdx
+	movq	24(%rsp), %rdx                  ## 8-byte Reload
+	adcq	784(%rsp), %rdx
+	movq	%rdx, 24(%rsp)                  ## 8-byte Spill
+	adcq	792(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	adcq	800(%rsp), %rbx
+	adcq	808(%rsp), %r12
+	adcq	$0, %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
 	leaq	680(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	addq	680(%rsp), %rax
-	adcq	688(%rsp), %r15
-	movq	%r15, 48(%rsp)          ## 8-byte Spill
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	addq	680(%rsp), %rbp
+	adcq	688(%rsp), %r13
+	movq	16(%rsp), %r14                  ## 8-byte Reload
 	adcq	696(%rsp), %r14
-	movq	%r14, 56(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rcx          ## 8-byte Reload
-	adcq	704(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %r15          ## 8-byte Reload
+	adcq	704(%rsp), %r15
+	movq	%r15, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %r15                  ## 8-byte Reload
 	adcq	712(%rsp), %r15
+	movq	8(%rsp), %rbp                   ## 8-byte Reload
 	adcq	720(%rsp), %rbp
 	adcq	728(%rsp), %rbx
-	movq	%rbx, 64(%rsp)          ## 8-byte Spill
 	adcq	736(%rsp), %r12
-	movq	%r12, 8(%rsp)           ## 8-byte Spill
-	adcq	744(%rsp), %r13
-	movq	%r13, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %r13          ## 8-byte Reload
-	adcq	752(%rsp), %r13
-	sbbq	%r14, %r14
-	movq	%rax, %rdx
-	movq	%rax, %rbx
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	600(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %r14d
-	addq	600(%rsp), %rbx
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	adcq	608(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %rax          ## 8-byte Reload
-	adcq	616(%rsp), %rax
-	movq	%rax, 56(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rbx          ## 8-byte Reload
-	adcq	624(%rsp), %rbx
+	movq	40(%rsp), %rax                  ## 8-byte Reload
+	adcq	744(%rsp), %rax
+	movq	%rax, 40(%rsp)                  ## 8-byte Spill
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	32(%rax), %rdx
+	leaq	608(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	672(%rsp), %rcx
+	movq	%r13, %rax
+	addq	608(%rsp), %rax
+	adcq	616(%rsp), %r14
+	movq	%r14, 16(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %r13                  ## 8-byte Reload
+	adcq	624(%rsp), %r13
 	adcq	632(%rsp), %r15
-	movq	%r15, 24(%rsp)          ## 8-byte Spill
+	movq	%r15, 24(%rsp)                  ## 8-byte Spill
 	adcq	640(%rsp), %rbp
-	movq	64(%rsp), %r12          ## 8-byte Reload
-	adcq	648(%rsp), %r12
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	656(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %r15            ## 8-byte Reload
-	adcq	664(%rsp), %r15
-	adcq	672(%rsp), %r13
-	adcq	$0, %r14
-	movq	%r14, 16(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	520(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	addq	520(%rsp), %rax
-	movq	56(%rsp), %r14          ## 8-byte Reload
-	adcq	528(%rsp), %r14
-	adcq	536(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	544(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	552(%rsp), %rbp
-	adcq	560(%rsp), %r12
-	movq	%r12, 64(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r12           ## 8-byte Reload
-	adcq	568(%rsp), %r12
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	adcq	648(%rsp), %rbx
+	movq	%rbx, %r15
+	adcq	656(%rsp), %r12
+	movq	40(%rsp), %r14                  ## 8-byte Reload
+	adcq	664(%rsp), %r14
+	movq	%rcx, %rbp
+	adcq	$0, %rbp
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	536(%rsp), %rdi
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	addq	536(%rsp), %rbx
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	544(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	%r13, %rbx
+	adcq	552(%rsp), %rbx
+	movq	24(%rsp), %r13                  ## 8-byte Reload
+	adcq	560(%rsp), %r13
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	568(%rsp), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
 	adcq	576(%rsp), %r15
-	movq	%r15, (%rsp)            ## 8-byte Spill
-	adcq	584(%rsp), %r13
-	movq	%r13, 32(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %r15          ## 8-byte Reload
+	movq	%r15, 48(%rsp)                  ## 8-byte Spill
+	adcq	584(%rsp), %r12
+	movq	%r14, %r15
 	adcq	592(%rsp), %r15
-	sbbq	%rbx, %rbx
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	440(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %ebx
-	movq	%rbx, %rax
-	addq	440(%rsp), %r13
-	adcq	448(%rsp), %r14
-	movq	%r14, 56(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %r14          ## 8-byte Reload
-	adcq	456(%rsp), %r14
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	464(%rsp), %rbx
-	adcq	472(%rsp), %rbp
-	movq	%rbp, 104(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	adcq	480(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	adcq	488(%rsp), %r12
-	movq	%r12, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %rbp            ## 8-byte Reload
-	adcq	496(%rsp), %rbp
-	movq	32(%rsp), %r12          ## 8-byte Reload
+	adcq	600(%rsp), %rbp
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	40(%rax), %rdx
+	leaq	464(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	528(%rsp), %rcx
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	addq	464(%rsp), %rax
+	adcq	472(%rsp), %rbx
+	movq	%rbx, 32(%rsp)                  ## 8-byte Spill
+	adcq	480(%rsp), %r13
+	movq	%r13, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %r14                   ## 8-byte Reload
+	adcq	488(%rsp), %r14
+	movq	48(%rsp), %r13                  ## 8-byte Reload
+	adcq	496(%rsp), %r13
 	adcq	504(%rsp), %r12
+	movq	%r12, 16(%rsp)                  ## 8-byte Spill
 	adcq	512(%rsp), %r15
-	movq	%r15, %r13
-	adcq	$0, %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	360(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	56(%rsp), %rax          ## 8-byte Reload
-	addq	360(%rsp), %rax
-	adcq	368(%rsp), %r14
-	adcq	376(%rsp), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	movq	104(%rsp), %rcx         ## 8-byte Reload
-	adcq	384(%rsp), %rcx
-	movq	%rcx, 104(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %rbx          ## 8-byte Reload
-	adcq	392(%rsp), %rbx
-	movq	8(%rsp), %r15           ## 8-byte Reload
-	adcq	400(%rsp), %r15
-	adcq	408(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	adcq	416(%rsp), %r12
-	movq	%r12, %rbp
+	movq	%r15, %r12
+	adcq	520(%rsp), %rbp
+	movq	%rcx, %r15
+	adcq	$0, %r15
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	392(%rsp), %rdi
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	addq	392(%rsp), %rbx
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	adcq	400(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	408(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	%r14, %rbx
+	adcq	416(%rsp), %rbx
 	adcq	424(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rcx          ## 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	sbbq	%r13, %r13
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	80(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	280(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %r13d
-	addq	280(%rsp), %r12
-	adcq	288(%rsp), %r14
-	movq	%r14, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	296(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	104(%rsp), %r14         ## 8-byte Reload
-	adcq	304(%rsp), %r14
-	adcq	312(%rsp), %rbx
-	movq	%rbx, 64(%rsp)          ## 8-byte Spill
-	adcq	320(%rsp), %r15
-	movq	%r15, 8(%rsp)           ## 8-byte Spill
-	movq	(%rsp), %rbx            ## 8-byte Reload
-	adcq	328(%rsp), %rbx
-	adcq	336(%rsp), %rbp
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %r12          ## 8-byte Reload
-	adcq	344(%rsp), %r12
-	movq	48(%rsp), %rbp          ## 8-byte Reload
-	adcq	352(%rsp), %rbp
+	movq	%r13, 48(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %r14                  ## 8-byte Reload
+	adcq	432(%rsp), %r14
+	adcq	440(%rsp), %r12
+	adcq	448(%rsp), %rbp
+	movq	%rbp, 56(%rsp)                  ## 8-byte Spill
+	adcq	456(%rsp), %r15
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	48(%rax), %rdx
+	leaq	320(%rsp), %rdi
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	leaq	248(%rsp), %rdi
+	movq	384(%rsp), %r13
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	addq	320(%rsp), %rax
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	328(%rsp), %rbp
+	adcq	336(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	344(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	adcq	352(%rsp), %r14
+	movq	%r14, 16(%rsp)                  ## 8-byte Spill
+	adcq	360(%rsp), %r12
+	movq	%r12, 40(%rsp)                  ## 8-byte Spill
+	movq	56(%rsp), %r12                  ## 8-byte Reload
+	adcq	368(%rsp), %r12
+	adcq	376(%rsp), %r15
+	movq	%r15, 32(%rsp)                  ## 8-byte Spill
 	adcq	$0, %r13
-	movq	96(%rsp), %rax          ## 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	200(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	addq	200(%rsp), %rax
-	movq	24(%rsp), %r15          ## 8-byte Reload
-	adcq	208(%rsp), %r15
-	adcq	216(%rsp), %r14
-	movq	%r14, 104(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %r14          ## 8-byte Reload
-	adcq	224(%rsp), %r14
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	232(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	adcq	240(%rsp), %rbx
-	movq	%rbx, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	248(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	adcq	256(%rsp), %r12
-	movq	%r12, 16(%rsp)          ## 8-byte Spill
-	adcq	264(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	272(%rsp), %r13
-	sbbq	%rbx, %rbx
-	movq	80(%rsp), %rdx          ## 8-byte Reload
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
 	imulq	%rax, %rdx
-	movq	%rax, %r12
-	leaq	120(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %ebx
-	addq	120(%rsp), %r12
-	adcq	128(%rsp), %r15
-	movq	104(%rsp), %rbp         ## 8-byte Reload
-	adcq	136(%rsp), %rbp
-	movq	%r14, %rcx
-	adcq	144(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r8            ## 8-byte Reload
-	adcq	152(%rsp), %r8
-	movq	%r8, 8(%rsp)            ## 8-byte Spill
-	movq	(%rsp), %r9             ## 8-byte Reload
-	adcq	160(%rsp), %r9
-	movq	%r9, (%rsp)             ## 8-byte Spill
-	movq	32(%rsp), %r10          ## 8-byte Reload
-	adcq	168(%rsp), %r10
-	movq	%r10, 32(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %rdi          ## 8-byte Reload
-	adcq	176(%rsp), %rdi
-	movq	%rdi, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %r14          ## 8-byte Reload
-	adcq	184(%rsp), %r14
-	adcq	192(%rsp), %r13
-	adcq	$0, %rbx
-	movq	%r15, %rsi
-	movq	%r15, %r12
-	movq	72(%rsp), %rdx          ## 8-byte Reload
-	subq	(%rdx), %rsi
-	movq	%rbp, %rax
-	movq	%rbp, %r15
-	sbbq	8(%rdx), %rax
-	movq	%rcx, %rbp
-	sbbq	16(%rdx), %rbp
-	movq	%r8, %rcx
-	sbbq	24(%rdx), %rcx
-	movq	%r9, %r8
-	sbbq	32(%rdx), %r8
-	movq	%r10, %r11
-	sbbq	40(%rdx), %r11
-	movq	%rdi, %r10
-	sbbq	48(%rdx), %r10
-	movq	%r14, %rdi
-	sbbq	56(%rdx), %rdi
-	movq	%r13, %r9
-	sbbq	64(%rdx), %r9
-	sbbq	$0, %rbx
-	andl	$1, %ebx
-	cmovneq	%r13, %r9
-	testb	%bl, %bl
-	cmovneq	%r12, %rsi
-	movq	112(%rsp), %rbx         ## 8-byte Reload
-	movq	%rsi, (%rbx)
-	cmovneq	%r15, %rax
-	movq	%rax, 8(%rbx)
-	cmovneq	64(%rsp), %rbp          ## 8-byte Folded Reload
-	movq	%rbp, 16(%rbx)
-	cmovneq	8(%rsp), %rcx           ## 8-byte Folded Reload
-	movq	%rcx, 24(%rbx)
-	cmovneq	(%rsp), %r8             ## 8-byte Folded Reload
-	movq	%r8, 32(%rbx)
-	cmovneq	32(%rsp), %r11          ## 8-byte Folded Reload
-	movq	%r11, 40(%rbx)
-	cmovneq	16(%rsp), %r10          ## 8-byte Folded Reload
-	movq	%r10, 48(%rbx)
-	cmovneq	%r14, %rdi
-	movq	%rdi, 56(%rbx)
-	movq	%r9, 64(%rbx)
-	addq	$1560, %rsp             ## imm = 0x618
+	movq	%rax, %rbx
+	movq	72(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	leaq	176(%rsp), %rdi
+	addq	248(%rsp), %rbx
+	adcq	256(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %r14                   ## 8-byte Reload
+	adcq	264(%rsp), %r14
+	movq	48(%rsp), %rbp                  ## 8-byte Reload
+	adcq	272(%rsp), %rbp
+	movq	16(%rsp), %r15                  ## 8-byte Reload
+	adcq	280(%rsp), %r15
+	movq	40(%rsp), %rbx                  ## 8-byte Reload
+	adcq	288(%rsp), %rbx
+	adcq	296(%rsp), %r12
+	movq	%r12, 56(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	adcq	304(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	adcq	312(%rsp), %r13
+	movq	88(%rsp), %rax                  ## 8-byte Reload
+	movq	56(%rax), %rdx
+	movq	80(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	leaq	104(%rsp), %rdi
+	movq	240(%rsp), %r12
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	addq	176(%rsp), %rax
+	adcq	184(%rsp), %r14
+	movq	%r14, 8(%rsp)                   ## 8-byte Spill
+	adcq	192(%rsp), %rbp
+	movq	%rbp, 48(%rsp)                  ## 8-byte Spill
+	adcq	200(%rsp), %r15
+	movq	%r15, 16(%rsp)                  ## 8-byte Spill
+	adcq	208(%rsp), %rbx
+	movq	%rbx, 40(%rsp)                  ## 8-byte Spill
+	movq	56(%rsp), %rbp                  ## 8-byte Reload
+	adcq	216(%rsp), %rbp
+	movq	32(%rsp), %r15                  ## 8-byte Reload
+	adcq	224(%rsp), %r15
+	adcq	232(%rsp), %r13
+	adcq	$0, %r12
+	movq	64(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	movq	72(%rsp), %r14                  ## 8-byte Reload
+	movq	%r14, %rsi
+	callq	_mulPv512x64
+	addq	104(%rsp), %rbx
+	movq	8(%rsp), %r8                    ## 8-byte Reload
+	adcq	112(%rsp), %r8
+	movq	%r8, 8(%rsp)                    ## 8-byte Spill
+	movq	48(%rsp), %r9                   ## 8-byte Reload
+	adcq	120(%rsp), %r9
+	movq	%r9, 48(%rsp)                   ## 8-byte Spill
+	movq	16(%rsp), %rsi                  ## 8-byte Reload
+	adcq	128(%rsp), %rsi
+	movq	40(%rsp), %r11                  ## 8-byte Reload
+	adcq	136(%rsp), %r11
+	movq	%rbp, %r10
+	adcq	144(%rsp), %r10
+	adcq	152(%rsp), %r15
+	adcq	160(%rsp), %r13
+	adcq	168(%rsp), %r12
+	movq	%r14, %rax
+	subq	(%r14), %r8
+	sbbq	8(%r14), %r9
+	movq	%rsi, %rdx
+	movq	%rsi, %r14
+	sbbq	16(%rax), %rdx
+	movq	%r11, %rsi
+	sbbq	24(%rax), %rsi
+	movq	%r10, %rdi
+	sbbq	32(%rax), %rdi
+	movq	%r15, %rbp
+	sbbq	40(%rax), %rbp
+	movq	%r13, %rbx
+	sbbq	48(%rax), %rbx
+	movq	%rax, %rcx
+	movq	%r12, %rax
+	sbbq	56(%rcx), %rax
+	cmovsq	%r12, %rax
+	movq	96(%rsp), %rcx                  ## 8-byte Reload
+	movq	%rax, 56(%rcx)
+	cmovsq	%r13, %rbx
+	movq	%rbx, 48(%rcx)
+	cmovsq	%r15, %rbp
+	movq	%rbp, 40(%rcx)
+	cmovsq	%r10, %rdi
+	movq	%rdi, 32(%rcx)
+	cmovsq	%r11, %rsi
+	movq	%rsi, 24(%rcx)
+	cmovsq	%r14, %rdx
+	movq	%rdx, 16(%rcx)
+	cmovsq	48(%rsp), %r9                   ## 8-byte Folded Reload
+	movq	%r9, 8(%rcx)
+	cmovsq	8(%rsp), %r8                    ## 8-byte Folded Reload
+	movq	%r8, (%rcx)
+	addq	$1256, %rsp                     ## imm = 0x4E8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -14639,529 +6978,301 @@ _mcl_fp_mont9L:                         ## @mcl_fp_mont9L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montNF9L
+                                        ## -- End function
+	.globl	_mcl_fp_montRed8L               ## -- Begin function mcl_fp_montRed8L
 	.p2align	4, 0x90
-_mcl_fp_montNF9L:                       ## @mcl_fp_montNF9L
-## BB#0:
+_mcl_fp_montRed8L:                      ## @mcl_fp_montRed8L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$1560, %rsp             ## imm = 0x618
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	movq	%rdx, 80(%rsp)          ## 8-byte Spill
-	movq	%rsi, 88(%rsp)          ## 8-byte Spill
-	movq	%rdi, 112(%rsp)         ## 8-byte Spill
-	movq	-8(%rcx), %rbx
-	movq	%rbx, 96(%rsp)          ## 8-byte Spill
-	movq	(%rdx), %rdx
-	leaq	1480(%rsp), %rdi
-	callq	l_mulPv576x64
-	movq	1480(%rsp), %r12
-	movq	1488(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	%r12, %rdx
+	subq	$728, %rsp                      ## imm = 0x2D8
+	movq	%rdi, 144(%rsp)                 ## 8-byte Spill
+	movq	56(%rdx), %rax
+	movq	%rax, 136(%rsp)                 ## 8-byte Spill
+	movq	48(%rdx), %rax
+	movq	%rax, 128(%rsp)                 ## 8-byte Spill
+	movq	40(%rdx), %rax
+	movq	%rax, 120(%rsp)                 ## 8-byte Spill
+	movq	32(%rdx), %rax
+	movq	%rax, 112(%rsp)                 ## 8-byte Spill
+	movq	24(%rdx), %rax
+	movq	%rax, 104(%rsp)                 ## 8-byte Spill
+	movq	16(%rdx), %rax
+	movq	%rax, 96(%rsp)                  ## 8-byte Spill
+	movq	8(%rdx), %rax
+	movq	%rax, 88(%rsp)                  ## 8-byte Spill
+	movq	%rsi, 72(%rsp)                  ## 8-byte Spill
+	movq	56(%rsi), %r12
+	movq	48(%rsi), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	40(%rsi), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	32(%rsi), %r15
+	movq	24(%rsi), %r14
+	movq	16(%rsi), %r13
+	movq	(%rsi), %rbp
+	movq	8(%rsi), %rbx
+	movq	-8(%rdx), %rcx
+	movq	%rcx, 56(%rsp)                  ## 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, %rsi
+	movq	%rdx, 64(%rsp)                  ## 8-byte Spill
+	movq	%rax, 80(%rsp)                  ## 8-byte Spill
+	movq	%rbp, %rdx
+	imulq	%rcx, %rdx
+	leaq	656(%rsp), %rdi
+	callq	_mulPv512x64
+	addq	656(%rsp), %rbp
+	adcq	664(%rsp), %rbx
+	adcq	672(%rsp), %r13
+	adcq	680(%rsp), %r14
+	adcq	688(%rsp), %r15
+	movq	32(%rsp), %rbp                  ## 8-byte Reload
+	adcq	696(%rsp), %rbp
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	704(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	adcq	712(%rsp), %r12
+	movq	%r12, 24(%rsp)                  ## 8-byte Spill
+	movq	72(%rsp), %rax                  ## 8-byte Reload
+	movq	64(%rax), %rax
+	adcq	720(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
 	imulq	%rbx, %rdx
-	movq	1552(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	movq	1544(%rsp), %r13
-	movq	1536(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	movq	1528(%rsp), %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	movq	1520(%rsp), %r14
-	movq	1512(%rsp), %r15
-	movq	1504(%rsp), %rbx
-	movq	1496(%rsp), %rbp
-	leaq	1400(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	1400(%rsp), %r12
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	adcq	1408(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	adcq	1416(%rsp), %rbp
-	movq	%rbp, 104(%rsp)         ## 8-byte Spill
-	adcq	1424(%rsp), %rbx
-	movq	%rbx, (%rsp)            ## 8-byte Spill
-	adcq	1432(%rsp), %r15
-	movq	%r15, 8(%rsp)           ## 8-byte Spill
-	adcq	1440(%rsp), %r14
-	movq	%r14, 32(%rsp)          ## 8-byte Spill
-	movq	64(%rsp), %rbx          ## 8-byte Reload
-	adcq	1448(%rsp), %rbx
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	1456(%rsp), %r12
-	adcq	1464(%rsp), %r13
-	movq	%r13, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	1472(%rsp), %rbp
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	8(%rax), %rdx
-	leaq	1320(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	1392(%rsp), %rax
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	addq	1320(%rsp), %rcx
-	movq	104(%rsp), %r15         ## 8-byte Reload
-	adcq	1328(%rsp), %r15
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	1336(%rsp), %r14
-	movq	8(%rsp), %rdx           ## 8-byte Reload
-	adcq	1344(%rsp), %rdx
-	movq	%rdx, 8(%rsp)           ## 8-byte Spill
-	movq	32(%rsp), %r13          ## 8-byte Reload
-	adcq	1352(%rsp), %r13
-	adcq	1360(%rsp), %rbx
-	movq	%rbx, 64(%rsp)          ## 8-byte Spill
-	adcq	1368(%rsp), %r12
-	movq	%r12, 48(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rdx          ## 8-byte Reload
-	adcq	1376(%rsp), %rdx
-	movq	%rdx, 40(%rsp)          ## 8-byte Spill
-	adcq	1384(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	adcq	$0, %rax
-	movq	%rax, %rbp
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	1240(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	1240(%rsp), %rbx
-	adcq	1248(%rsp), %r15
-	movq	%r15, 104(%rsp)         ## 8-byte Spill
-	adcq	1256(%rsp), %r14
-	movq	%r14, (%rsp)            ## 8-byte Spill
-	movq	8(%rsp), %r12           ## 8-byte Reload
-	adcq	1264(%rsp), %r12
-	adcq	1272(%rsp), %r13
-	movq	%r13, %r14
-	movq	64(%rsp), %r13          ## 8-byte Reload
-	adcq	1280(%rsp), %r13
-	movq	48(%rsp), %rbx          ## 8-byte Reload
-	adcq	1288(%rsp), %rbx
-	movq	40(%rsp), %r15          ## 8-byte Reload
-	adcq	1296(%rsp), %r15
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	1304(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	1312(%rsp), %rbp
-	movq	%rbp, 56(%rsp)          ## 8-byte Spill
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	16(%rax), %rdx
-	leaq	1160(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	1232(%rsp), %rax
-	movq	104(%rsp), %rcx         ## 8-byte Reload
-	addq	1160(%rsp), %rcx
-	movq	(%rsp), %rbp            ## 8-byte Reload
-	adcq	1168(%rsp), %rbp
-	adcq	1176(%rsp), %r12
-	movq	%r12, 8(%rsp)           ## 8-byte Spill
-	adcq	1184(%rsp), %r14
-	adcq	1192(%rsp), %r13
-	movq	%r13, %r12
-	adcq	1200(%rsp), %rbx
-	movq	%rbx, 48(%rsp)          ## 8-byte Spill
-	adcq	1208(%rsp), %r15
-	movq	%r15, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rbx          ## 8-byte Reload
-	adcq	1216(%rsp), %rbx
-	movq	56(%rsp), %rdx          ## 8-byte Reload
-	adcq	1224(%rsp), %rdx
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	movq	%rax, %r15
-	adcq	$0, %r15
-	movq	%rcx, %rdx
-	movq	%rcx, %r13
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	1080(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	1080(%rsp), %r13
-	adcq	1088(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	movq	8(%rsp), %r13           ## 8-byte Reload
-	adcq	1096(%rsp), %r13
-	adcq	1104(%rsp), %r14
-	adcq	1112(%rsp), %r12
-	movq	%r12, 64(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	1120(%rsp), %r12
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	adcq	1128(%rsp), %rbp
-	adcq	1136(%rsp), %rbx
-	movq	%rbx, 24(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %rbx          ## 8-byte Reload
-	adcq	1144(%rsp), %rbx
-	adcq	1152(%rsp), %r15
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	24(%rax), %rdx
-	leaq	1000(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	1072(%rsp), %rax
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	addq	1000(%rsp), %rcx
-	adcq	1008(%rsp), %r13
-	movq	%r13, 8(%rsp)           ## 8-byte Spill
-	adcq	1016(%rsp), %r14
-	movq	%r14, 32(%rsp)          ## 8-byte Spill
-	movq	64(%rsp), %r14          ## 8-byte Reload
-	adcq	1024(%rsp), %r14
-	adcq	1032(%rsp), %r12
-	adcq	1040(%rsp), %rbp
-	movq	%rbp, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %r13          ## 8-byte Reload
-	adcq	1048(%rsp), %r13
-	adcq	1056(%rsp), %rbx
-	movq	%rbx, 56(%rsp)          ## 8-byte Spill
-	adcq	1064(%rsp), %r15
-	movq	%r15, 16(%rsp)          ## 8-byte Spill
+	leaq	584(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	648(%rsp), %rax
+	addb	$255, %r12b
 	adcq	$0, %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	920(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	920(%rsp), %rbx
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	928(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	movq	32(%rsp), %rbp          ## 8-byte Reload
-	adcq	936(%rsp), %rbp
-	movq	%r14, %rbx
-	adcq	944(%rsp), %rbx
-	adcq	952(%rsp), %r12
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	960(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	968(%rsp), %r13
-	movq	%r13, %r15
-	movq	56(%rsp), %r13          ## 8-byte Reload
-	adcq	976(%rsp), %r13
-	movq	16(%rsp), %r14          ## 8-byte Reload
-	adcq	984(%rsp), %r14
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	992(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	32(%rax), %rdx
-	leaq	840(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	912(%rsp), %rax
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	addq	840(%rsp), %rcx
-	adcq	848(%rsp), %rbp
-	movq	%rbp, 32(%rsp)          ## 8-byte Spill
-	adcq	856(%rsp), %rbx
-	movq	%rbx, 64(%rsp)          ## 8-byte Spill
-	adcq	864(%rsp), %r12
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	adcq	872(%rsp), %rbp
-	adcq	880(%rsp), %r15
-	movq	%r15, 24(%rsp)          ## 8-byte Spill
-	adcq	888(%rsp), %r13
-	adcq	896(%rsp), %r14
-	movq	%r14, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rdx            ## 8-byte Reload
-	adcq	904(%rsp), %rdx
-	movq	%rdx, (%rsp)            ## 8-byte Spill
+	movq	%rax, %rcx
+	addq	584(%rsp), %rbx
+	adcq	592(%rsp), %r13
+	adcq	600(%rsp), %r14
+	adcq	608(%rsp), %r15
+	adcq	616(%rsp), %rbp
+	movq	%rbp, 32(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	624(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rbp                  ## 8-byte Reload
+	adcq	632(%rsp), %rbp
+	movq	(%rsp), %rax                    ## 8-byte Reload
+	adcq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	movq	72(%rsp), %r12                  ## 8-byte Reload
+	adcq	72(%r12), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	512(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	576(%rsp), %rax
+	addb	$255, %bl
 	adcq	$0, %rax
-	movq	%rax, %r14
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	760(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	760(%rsp), %rbx
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	768(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	64(%rsp), %r15          ## 8-byte Reload
-	adcq	776(%rsp), %r15
-	adcq	784(%rsp), %r12
-	movq	%r12, 48(%rsp)          ## 8-byte Spill
-	movq	%rbp, %rbx
-	adcq	792(%rsp), %rbx
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	800(%rsp), %rbp
-	adcq	808(%rsp), %r13
-	movq	16(%rsp), %rax          ## 8-byte Reload
-	adcq	816(%rsp), %rax
-	movq	%rax, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r12            ## 8-byte Reload
-	adcq	824(%rsp), %r12
-	adcq	832(%rsp), %r14
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	40(%rax), %rdx
-	leaq	680(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	752(%rsp), %rcx
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	addq	680(%rsp), %rax
-	adcq	688(%rsp), %r15
-	movq	%r15, 64(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rdx          ## 8-byte Reload
-	adcq	696(%rsp), %rdx
-	movq	%rdx, 48(%rsp)          ## 8-byte Spill
-	adcq	704(%rsp), %rbx
-	movq	%rbx, 40(%rsp)          ## 8-byte Spill
-	adcq	712(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	adcq	720(%rsp), %r13
-	movq	%r13, %r15
-	movq	16(%rsp), %rbx          ## 8-byte Reload
-	adcq	728(%rsp), %rbx
-	adcq	736(%rsp), %r12
-	movq	%r12, (%rsp)            ## 8-byte Spill
-	adcq	744(%rsp), %r14
-	movq	%r14, 32(%rsp)          ## 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r13
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	600(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	600(%rsp), %r13
-	movq	64(%rsp), %r13          ## 8-byte Reload
-	adcq	608(%rsp), %r13
-	movq	48(%rsp), %r12          ## 8-byte Reload
-	adcq	616(%rsp), %r12
-	movq	40(%rsp), %rbp          ## 8-byte Reload
-	adcq	624(%rsp), %rbp
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	632(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	640(%rsp), %r15
-	movq	%r15, 56(%rsp)          ## 8-byte Spill
-	adcq	648(%rsp), %rbx
-	movq	%rbx, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	656(%rsp), %r14
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	adcq	664(%rsp), %rbx
-	movq	8(%rsp), %r15           ## 8-byte Reload
-	adcq	672(%rsp), %r15
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	48(%rax), %rdx
-	leaq	520(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	592(%rsp), %rcx
-	movq	%r13, %rax
-	addq	520(%rsp), %rax
-	adcq	528(%rsp), %r12
-	movq	%r12, 48(%rsp)          ## 8-byte Spill
-	movq	%rbp, %r12
-	adcq	536(%rsp), %r12
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	544(%rsp), %rbp
-	movq	56(%rsp), %rdx          ## 8-byte Reload
-	adcq	552(%rsp), %rdx
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %rdx          ## 8-byte Reload
-	adcq	560(%rsp), %rdx
-	movq	%rdx, 16(%rsp)          ## 8-byte Spill
-	adcq	568(%rsp), %r14
-	movq	%r14, (%rsp)            ## 8-byte Spill
-	adcq	576(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	adcq	584(%rsp), %r15
-	movq	%r15, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, %rcx
-	movq	%rcx, %r13
-	movq	%rax, %rdx
-	movq	%rax, %r14
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
+	movq	%rax, %rcx
+	addq	512(%rsp), %r13
+	adcq	520(%rsp), %r14
+	adcq	528(%rsp), %r15
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	adcq	536(%rsp), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %rax                  ## 8-byte Reload
+	adcq	544(%rsp), %rax
+	movq	%rax, 16(%rsp)                  ## 8-byte Spill
+	adcq	552(%rsp), %rbp
+	movq	%rbp, 24(%rsp)                  ## 8-byte Spill
+	movq	(%rsp), %rbp                    ## 8-byte Reload
+	adcq	560(%rsp), %rbp
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	568(%rsp), %rbx
+	adcq	80(%r12), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	setb	%r13b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r14, %rdx
 	leaq	440(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
+	movq	64(%rsp), %r12                  ## 8-byte Reload
+	movq	%r12, %rsi
+	callq	_mulPv512x64
+	movq	504(%rsp), %rax
+	addb	$255, %r13b
+	adcq	$0, %rax
 	addq	440(%rsp), %r14
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	adcq	448(%rsp), %rax
-	movq	%rax, 48(%rsp)          ## 8-byte Spill
-	adcq	456(%rsp), %r12
-	adcq	464(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %r14          ## 8-byte Reload
-	adcq	472(%rsp), %r14
-	movq	16(%rsp), %r15          ## 8-byte Reload
-	adcq	480(%rsp), %r15
-	movq	(%rsp), %rbp            ## 8-byte Reload
-	adcq	488(%rsp), %rbp
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	adcq	496(%rsp), %rbx
-	movq	8(%rsp), %rax           ## 8-byte Reload
-	adcq	504(%rsp), %rax
-	movq	%rax, 8(%rsp)           ## 8-byte Spill
-	adcq	512(%rsp), %r13
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	56(%rax), %rdx
-	leaq	360(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	432(%rsp), %rcx
-	movq	48(%rsp), %rax          ## 8-byte Reload
-	addq	360(%rsp), %rax
-	adcq	368(%rsp), %r12
-	movq	%r12, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rdx          ## 8-byte Reload
-	adcq	376(%rsp), %rdx
-	movq	%rdx, 24(%rsp)          ## 8-byte Spill
-	adcq	384(%rsp), %r14
-	movq	%r14, 56(%rsp)          ## 8-byte Spill
-	adcq	392(%rsp), %r15
-	movq	%r15, 16(%rsp)          ## 8-byte Spill
-	adcq	400(%rsp), %rbp
-	movq	%rbp, (%rsp)            ## 8-byte Spill
-	adcq	408(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r14           ## 8-byte Reload
-	adcq	416(%rsp), %r14
-	adcq	424(%rsp), %r13
-	movq	%r13, %r15
-	adcq	$0, %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	%rax, %rdx
-	movq	%rax, %r12
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	280(%rsp), %rdi
-	movq	72(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	280(%rsp), %r12
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	288(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	296(%rsp), %rbp
-	movq	56(%rsp), %rax          ## 8-byte Reload
+	adcq	448(%rsp), %r15
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	adcq	456(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	movq	16(%rsp), %r13                  ## 8-byte Reload
+	adcq	464(%rsp), %r13
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	472(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	adcq	480(%rsp), %rbp
+	movq	%rbp, (%rsp)                    ## 8-byte Spill
+	adcq	488(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	movq	40(%rsp), %rbp                  ## 8-byte Reload
+	adcq	496(%rsp), %rbp
+	movq	72(%rsp), %rcx                  ## 8-byte Reload
+	adcq	88(%rcx), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r15, %rdx
+	leaq	368(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	_mulPv512x64
+	movq	432(%rsp), %r14
+	addb	$255, %bl
+	adcq	$0, %r14
+	addq	368(%rsp), %r15
+	movq	32(%rsp), %rax                  ## 8-byte Reload
+	adcq	376(%rsp), %rax
+	adcq	384(%rsp), %r13
+	movq	%r13, 16(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rbx                  ## 8-byte Reload
+	adcq	392(%rsp), %rbx
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	400(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	408(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	adcq	416(%rsp), %rbp
+	movq	%rbp, 40(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	424(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	movq	72(%rsp), %rcx                  ## 8-byte Reload
+	adcq	96(%rcx), %r14
+	setb	%r15b
+	movq	56(%rsp), %r13                  ## 8-byte Reload
+	movq	%r13, %rdx
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	296(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	_mulPv512x64
+	movq	360(%rsp), %r12
+	addb	$255, %r15b
+	adcq	$0, %r12
+	addq	296(%rsp), %rbp
+	movq	16(%rsp), %rax                  ## 8-byte Reload
 	adcq	304(%rsp), %rax
-	movq	%rax, 56(%rsp)          ## 8-byte Spill
-	movq	16(%rsp), %r13          ## 8-byte Reload
-	adcq	312(%rsp), %r13
-	movq	(%rsp), %r12            ## 8-byte Reload
-	adcq	320(%rsp), %r12
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	adcq	328(%rsp), %rbx
-	adcq	336(%rsp), %r14
-	movq	%r14, 8(%rsp)           ## 8-byte Spill
-	adcq	344(%rsp), %r15
-	movq	%r15, 64(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %r14          ## 8-byte Reload
+	adcq	312(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  ## 8-byte Spill
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	320(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	328(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
+	adcq	336(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	344(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
 	adcq	352(%rsp), %r14
-	movq	80(%rsp), %rax          ## 8-byte Reload
-	movq	64(%rax), %rdx
-	leaq	200(%rsp), %rdi
-	movq	88(%rsp), %rsi          ## 8-byte Reload
-	callq	l_mulPv576x64
-	movq	272(%rsp), %rcx
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	addq	200(%rsp), %rax
-	adcq	208(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	56(%rsp), %rbp          ## 8-byte Reload
-	adcq	216(%rsp), %rbp
-	adcq	224(%rsp), %r13
-	movq	%r13, 16(%rsp)          ## 8-byte Spill
-	adcq	232(%rsp), %r12
-	movq	%r12, (%rsp)            ## 8-byte Spill
-	adcq	240(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r15           ## 8-byte Reload
-	adcq	248(%rsp), %r15
-	movq	64(%rsp), %r12          ## 8-byte Reload
-	adcq	256(%rsp), %r12
-	adcq	264(%rsp), %r14
-	adcq	$0, %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	96(%rsp), %rdx          ## 8-byte Reload
+	movq	72(%rsp), %rbp                  ## 8-byte Reload
+	adcq	104(%rbp), %r12
+	setb	%r15b
+	movq	%r13, %rdx
 	imulq	%rax, %rdx
 	movq	%rax, %rbx
-	leaq	120(%rsp), %rdi
-	movq	72(%rsp), %r13          ## 8-byte Reload
-	movq	%r13, %rsi
-	callq	l_mulPv576x64
-	addq	120(%rsp), %rbx
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	128(%rsp), %rcx
-	movq	%rbp, %rdx
-	adcq	136(%rsp), %rdx
-	movq	16(%rsp), %rsi          ## 8-byte Reload
-	adcq	144(%rsp), %rsi
-	movq	%rsi, 16(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rdi            ## 8-byte Reload
-	adcq	152(%rsp), %rdi
-	movq	%rdi, (%rsp)            ## 8-byte Spill
-	movq	32(%rsp), %rbx          ## 8-byte Reload
-	adcq	160(%rsp), %rbx
-	movq	%rbx, 32(%rsp)          ## 8-byte Spill
-	movq	%r15, %r8
-	adcq	168(%rsp), %r8
-	movq	%r8, 8(%rsp)            ## 8-byte Spill
-	movq	%r12, %r15
+	leaq	224(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	288(%rsp), %r13
+	addb	$255, %r15b
+	adcq	$0, %r13
+	addq	224(%rsp), %rbx
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	232(%rsp), %rax
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	248(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
+	adcq	256(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	264(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	adcq	272(%rsp), %r14
+	adcq	280(%rsp), %r12
+	adcq	112(%rbp), %r13
+	setb	%r15b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbx
+	leaq	152(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	addb	$255, %r15b
+	movq	216(%rsp), %rdx
+	adcq	$0, %rdx
+	addq	152(%rsp), %rbx
+	movq	(%rsp), %r9                     ## 8-byte Reload
+	adcq	160(%rsp), %r9
+	movq	%r9, (%rsp)                     ## 8-byte Spill
+	movq	8(%rsp), %r10                   ## 8-byte Reload
+	adcq	168(%rsp), %r10
+	movq	%r10, 8(%rsp)                   ## 8-byte Spill
+	movq	40(%rsp), %r15                  ## 8-byte Reload
 	adcq	176(%rsp), %r15
-	adcq	184(%rsp), %r14
-	movq	40(%rsp), %r9           ## 8-byte Reload
-	adcq	192(%rsp), %r9
-	movq	%rcx, %rax
-	movq	%rcx, %r11
-	movq	%r13, %rbp
-	subq	(%rbp), %rax
+	movq	48(%rsp), %r11                  ## 8-byte Reload
+	adcq	184(%rsp), %r11
+	adcq	192(%rsp), %r14
+	adcq	200(%rsp), %r12
+	adcq	208(%rsp), %r13
+	adcq	120(%rbp), %rdx
+	xorl	%r8d, %r8d
+	subq	80(%rsp), %r9                   ## 8-byte Folded Reload
+	sbbq	88(%rsp), %r10                  ## 8-byte Folded Reload
+	movq	%r15, %rdi
+	sbbq	96(%rsp), %rdi                  ## 8-byte Folded Reload
+	movq	%r11, %rbp
+	sbbq	104(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%r14, %rbx
+	sbbq	112(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%r12, %rsi
+	sbbq	120(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	%r13, %rax
+	sbbq	128(%rsp), %rax                 ## 8-byte Folded Reload
 	movq	%rdx, %rcx
-	movq	%rdx, %r12
-	sbbq	8(%rbp), %rcx
-	movq	%rsi, %rdx
-	sbbq	16(%rbp), %rdx
-	movq	%rdi, %rsi
-	sbbq	24(%rbp), %rsi
-	movq	%rbx, %rdi
-	sbbq	32(%rbp), %rdi
-	movq	%r8, %r10
-	sbbq	40(%rbp), %r10
-	movq	%r15, %r13
-	sbbq	48(%rbp), %r13
-	movq	%r14, %r8
-	sbbq	56(%rbp), %r8
-	movq	%rbp, %rbx
-	movq	%r9, %rbp
-	sbbq	64(%rbx), %rbp
-	movq	%rbp, %rbx
-	sarq	$63, %rbx
-	cmovsq	%r11, %rax
-	movq	112(%rsp), %rbx         ## 8-byte Reload
-	movq	%rax, (%rbx)
-	cmovsq	%r12, %rcx
-	movq	%rcx, 8(%rbx)
-	cmovsq	16(%rsp), %rdx          ## 8-byte Folded Reload
-	movq	%rdx, 16(%rbx)
-	cmovsq	(%rsp), %rsi            ## 8-byte Folded Reload
-	movq	%rsi, 24(%rbx)
-	cmovsq	32(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, 32(%rbx)
-	cmovsq	8(%rsp), %r10           ## 8-byte Folded Reload
-	movq	%r10, 40(%rbx)
-	cmovsq	%r15, %r13
-	movq	%r13, 48(%rbx)
-	cmovsq	%r14, %r8
-	movq	%r8, 56(%rbx)
-	cmovsq	%r9, %rbp
-	movq	%rbp, 64(%rbx)
-	addq	$1560, %rsp             ## imm = 0x618
+	sbbq	136(%rsp), %rcx                 ## 8-byte Folded Reload
+	sbbq	%r8, %r8
+	testb	$1, %r8b
+	cmovneq	%rdx, %rcx
+	movq	144(%rsp), %rdx                 ## 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovneq	%r13, %rax
+	movq	%rax, 48(%rdx)
+	cmovneq	%r12, %rsi
+	movq	%rsi, 40(%rdx)
+	cmovneq	%r14, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovneq	%r11, %rbp
+	movq	%rbp, 24(%rdx)
+	cmovneq	%r15, %rdi
+	movq	%rdi, 16(%rdx)
+	cmovneq	8(%rsp), %r10                   ## 8-byte Folded Reload
+	movq	%r10, 8(%rdx)
+	cmovneq	(%rsp), %r9                     ## 8-byte Folded Reload
+	movq	%r9, (%rdx)
+	addq	$728, %rsp                      ## imm = 0x2D8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -15169,425 +7280,301 @@ _mcl_fp_montNF9L:                       ## @mcl_fp_montNF9L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_montRed9L
+                                        ## -- End function
+	.globl	_mcl_fp_montRedNF8L             ## -- Begin function mcl_fp_montRedNF8L
 	.p2align	4, 0x90
-_mcl_fp_montRed9L:                      ## @mcl_fp_montRed9L
-## BB#0:
+_mcl_fp_montRedNF8L:                    ## @mcl_fp_montRedNF8L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	subq	$936, %rsp              ## imm = 0x3A8
-	movq	%rdx, %rax
-	movq	%rdi, 208(%rsp)         ## 8-byte Spill
-	movq	-8(%rax), %rcx
-	movq	%rcx, 96(%rsp)          ## 8-byte Spill
-	movq	(%rsi), %r14
-	movq	8(%rsi), %rdx
-	movq	%rdx, (%rsp)            ## 8-byte Spill
-	movq	%r14, %rdx
+	subq	$728, %rsp                      ## imm = 0x2D8
+	movq	%rdi, 144(%rsp)                 ## 8-byte Spill
+	movq	56(%rdx), %rax
+	movq	%rax, 136(%rsp)                 ## 8-byte Spill
+	movq	48(%rdx), %rax
+	movq	%rax, 128(%rsp)                 ## 8-byte Spill
+	movq	40(%rdx), %rax
+	movq	%rax, 120(%rsp)                 ## 8-byte Spill
+	movq	32(%rdx), %rax
+	movq	%rax, 112(%rsp)                 ## 8-byte Spill
+	movq	24(%rdx), %rax
+	movq	%rax, 104(%rsp)                 ## 8-byte Spill
+	movq	16(%rdx), %rax
+	movq	%rax, 96(%rsp)                  ## 8-byte Spill
+	movq	8(%rdx), %rax
+	movq	%rax, 88(%rsp)                  ## 8-byte Spill
+	movq	%rsi, 72(%rsp)                  ## 8-byte Spill
+	movq	56(%rsi), %rax
+	movq	%rax, 8(%rsp)                   ## 8-byte Spill
+	movq	48(%rsi), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	40(%rsi), %r12
+	movq	32(%rsi), %r13
+	movq	24(%rsi), %r15
+	movq	16(%rsi), %r14
+	movq	(%rsi), %rbx
+	movq	8(%rsi), %rbp
+	movq	-8(%rdx), %rcx
+	movq	%rcx, 56(%rsp)                  ## 8-byte Spill
+	movq	(%rdx), %rax
+	movq	%rdx, %rsi
+	movq	%rdx, 64(%rsp)                  ## 8-byte Spill
+	movq	%rax, 80(%rsp)                  ## 8-byte Spill
+	movq	%rbx, %rdx
 	imulq	%rcx, %rdx
-	movq	136(%rsi), %rcx
-	movq	%rcx, 88(%rsp)          ## 8-byte Spill
-	movq	128(%rsi), %rcx
-	movq	%rcx, 56(%rsp)          ## 8-byte Spill
-	movq	120(%rsi), %rcx
-	movq	%rcx, 80(%rsp)          ## 8-byte Spill
-	movq	112(%rsi), %rcx
-	movq	%rcx, 72(%rsp)          ## 8-byte Spill
-	movq	104(%rsi), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	96(%rsi), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	88(%rsi), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	80(%rsi), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	movq	72(%rsi), %r12
-	movq	64(%rsi), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	56(%rsi), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	48(%rsi), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	movq	40(%rsi), %rbp
-	movq	32(%rsi), %rbx
-	movq	24(%rsi), %r13
-	movq	16(%rsi), %r15
-	movq	%rax, %rcx
-	movq	(%rcx), %rax
-	movq	%rax, 144(%rsp)         ## 8-byte Spill
-	movq	64(%rcx), %rax
-	movq	%rax, 200(%rsp)         ## 8-byte Spill
-	movq	56(%rcx), %rax
-	movq	%rax, 192(%rsp)         ## 8-byte Spill
-	movq	48(%rcx), %rax
-	movq	%rax, 184(%rsp)         ## 8-byte Spill
-	movq	40(%rcx), %rax
-	movq	%rax, 176(%rsp)         ## 8-byte Spill
-	movq	32(%rcx), %rax
-	movq	%rax, 168(%rsp)         ## 8-byte Spill
-	movq	24(%rcx), %rax
-	movq	%rax, 160(%rsp)         ## 8-byte Spill
-	movq	16(%rcx), %rax
-	movq	%rax, 152(%rsp)         ## 8-byte Spill
-	movq	8(%rcx), %rax
-	movq	%rax, 136(%rsp)         ## 8-byte Spill
-	movq	%rcx, %rsi
-	movq	%rsi, 104(%rsp)         ## 8-byte Spill
-	leaq	856(%rsp), %rdi
-	callq	l_mulPv576x64
-	addq	856(%rsp), %r14
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	adcq	864(%rsp), %rcx
-	adcq	872(%rsp), %r15
-	adcq	880(%rsp), %r13
-	adcq	888(%rsp), %rbx
-	movq	%rbx, 120(%rsp)         ## 8-byte Spill
-	adcq	896(%rsp), %rbp
-	movq	%rbp, 112(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	adcq	904(%rsp), %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	912(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	920(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	adcq	928(%rsp), %r12
-	movq	%r12, (%rsp)            ## 8-byte Spill
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	$0, %rbp
-	adcq	$0, 8(%rsp)             ## 8-byte Folded Spill
-	adcq	$0, 16(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 48(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 72(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 56(%rsp)            ## 8-byte Folded Spill
-	movq	88(%rsp), %r14          ## 8-byte Reload
-	adcq	$0, %r14
-	sbbq	%r12, %r12
-	movq	%rcx, %rdx
-	movq	%rcx, %rbx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	776(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	andl	$1, %r12d
-	addq	776(%rsp), %rbx
-	adcq	784(%rsp), %r15
-	adcq	792(%rsp), %r13
-	movq	%r13, 128(%rsp)         ## 8-byte Spill
-	movq	120(%rsp), %rax         ## 8-byte Reload
-	adcq	800(%rsp), %rax
-	movq	%rax, 120(%rsp)         ## 8-byte Spill
-	movq	112(%rsp), %rax         ## 8-byte Reload
-	adcq	808(%rsp), %rax
-	movq	%rax, 112(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	adcq	816(%rsp), %rax
-	movq	%rax, 64(%rsp)          ## 8-byte Spill
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	824(%rsp), %rax
-	movq	%rax, 32(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rax          ## 8-byte Reload
-	adcq	832(%rsp), %rax
-	movq	%rax, 40(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	840(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	adcq	848(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r13           ## 8-byte Reload
-	adcq	$0, %r13
-	adcq	$0, 16(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 48(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 72(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	movq	56(%rsp), %rbx          ## 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, %r14
-	movq	%r14, 88(%rsp)          ## 8-byte Spill
-	adcq	$0, %r12
-	movq	%r15, %rdx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	696(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	696(%rsp), %r15
-	movq	128(%rsp), %rcx         ## 8-byte Reload
-	adcq	704(%rsp), %rcx
-	movq	120(%rsp), %rax         ## 8-byte Reload
-	adcq	712(%rsp), %rax
-	movq	%rax, 120(%rsp)         ## 8-byte Spill
-	movq	112(%rsp), %rax         ## 8-byte Reload
+	leaq	656(%rsp), %rdi
+	callq	_mulPv512x64
+	addq	656(%rsp), %rbx
+	adcq	664(%rsp), %rbp
+	adcq	672(%rsp), %r14
+	adcq	680(%rsp), %r15
+	adcq	688(%rsp), %r13
+	adcq	696(%rsp), %r12
+	movq	%r12, 48(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	704(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rbx                   ## 8-byte Reload
+	adcq	712(%rsp), %rbx
+	movq	72(%rsp), %rax                  ## 8-byte Reload
+	movq	64(%rax), %rax
 	adcq	720(%rsp), %rax
-	movq	%rax, 112(%rsp)         ## 8-byte Spill
-	movq	64(%rsp), %rbp          ## 8-byte Reload
-	adcq	728(%rsp), %rbp
-	movq	32(%rsp), %r14          ## 8-byte Reload
-	adcq	736(%rsp), %r14
-	movq	40(%rsp), %r15          ## 8-byte Reload
-	adcq	744(%rsp), %r15
-	movq	(%rsp), %rax            ## 8-byte Reload
-	adcq	752(%rsp), %rax
-	movq	%rax, (%rsp)            ## 8-byte Spill
-	movq	24(%rsp), %rax          ## 8-byte Reload
-	adcq	760(%rsp), %rax
-	movq	%rax, 24(%rsp)          ## 8-byte Spill
-	adcq	768(%rsp), %r13
-	movq	%r13, 8(%rsp)           ## 8-byte Spill
-	adcq	$0, 16(%rsp)            ## 8-byte Folded Spill
-	movq	48(%rsp), %r13          ## 8-byte Reload
-	adcq	$0, %r13
-	adcq	$0, 72(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %rbx
-	movq	%rbx, 56(%rsp)          ## 8-byte Spill
-	adcq	$0, 88(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rcx, %rbx
-	movq	%rbx, %rdx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	616(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	616(%rsp), %rbx
-	movq	120(%rsp), %rax         ## 8-byte Reload
-	adcq	624(%rsp), %rax
-	movq	112(%rsp), %rcx         ## 8-byte Reload
-	adcq	632(%rsp), %rcx
-	movq	%rcx, 112(%rsp)         ## 8-byte Spill
-	adcq	640(%rsp), %rbp
-	movq	%rbp, 64(%rsp)          ## 8-byte Spill
-	adcq	648(%rsp), %r14
-	movq	%r14, 32(%rsp)          ## 8-byte Spill
-	adcq	656(%rsp), %r15
-	movq	(%rsp), %r14            ## 8-byte Reload
-	adcq	664(%rsp), %r14
-	movq	24(%rsp), %rbp          ## 8-byte Reload
-	adcq	672(%rsp), %rbp
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	680(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
-	adcq	688(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, 48(%rsp)          ## 8-byte Spill
-	adcq	$0, 72(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 56(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 88(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	536(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	536(%rsp), %rbx
-	movq	112(%rsp), %rax         ## 8-byte Reload
-	adcq	544(%rsp), %rax
-	movq	64(%rsp), %rcx          ## 8-byte Reload
-	adcq	552(%rsp), %rcx
-	movq	%rcx, 64(%rsp)          ## 8-byte Spill
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	560(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	adcq	568(%rsp), %r15
-	movq	%r15, 40(%rsp)          ## 8-byte Spill
-	adcq	576(%rsp), %r14
-	movq	%r14, (%rsp)            ## 8-byte Spill
-	adcq	584(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r13           ## 8-byte Reload
-	adcq	592(%rsp), %r13
-	movq	16(%rsp), %r15          ## 8-byte Reload
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rbp, %rdx
+	leaq	584(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	648(%rsp), %rax
+	addb	$255, %r12b
+	adcq	$0, %rax
+	movq	%rax, %rcx
+	addq	584(%rsp), %rbp
+	adcq	592(%rsp), %r14
 	adcq	600(%rsp), %r15
-	movq	48(%rsp), %rbp          ## 8-byte Reload
-	adcq	608(%rsp), %rbp
-	movq	72(%rsp), %rbx          ## 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, 80(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 56(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, 88(%rsp)            ## 8-byte Folded Spill
-	adcq	$0, %r12
-	movq	%rax, %rdx
+	adcq	608(%rsp), %r13
+	movq	48(%rsp), %rax                  ## 8-byte Reload
+	adcq	616(%rsp), %rax
+	movq	%rax, 48(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rax                  ## 8-byte Reload
+	adcq	624(%rsp), %rax
+	movq	%rax, 24(%rsp)                  ## 8-byte Spill
+	adcq	632(%rsp), %rbx
+	movq	%rbx, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rax                    ## 8-byte Reload
+	adcq	640(%rsp), %rax
+	movq	%rax, (%rsp)                    ## 8-byte Spill
+	movq	72(%rsp), %rax                  ## 8-byte Reload
+	adcq	72(%rax), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	setb	%bl
+	movq	56(%rsp), %rbp                  ## 8-byte Reload
+	movq	%rbp, %rdx
+	imulq	%r14, %rdx
+	leaq	512(%rsp), %rdi
+	movq	64(%rsp), %r12                  ## 8-byte Reload
+	movq	%r12, %rsi
+	callq	_mulPv512x64
+	movq	576(%rsp), %rax
+	addb	$255, %bl
+	adcq	$0, %rax
+	addq	512(%rsp), %r14
+	adcq	520(%rsp), %r15
+	adcq	528(%rsp), %r13
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	536(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rcx                  ## 8-byte Reload
+	adcq	544(%rsp), %rcx
+	movq	%rcx, 24(%rsp)                  ## 8-byte Spill
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	552(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	560(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	568(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
 	movq	%rax, %r14
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	456(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	456(%rsp), %r14
-	movq	64(%rsp), %rax          ## 8-byte Reload
-	adcq	464(%rsp), %rax
-	movq	32(%rsp), %rcx          ## 8-byte Reload
-	adcq	472(%rsp), %rcx
-	movq	%rcx, 32(%rsp)          ## 8-byte Spill
-	movq	40(%rsp), %rcx          ## 8-byte Reload
+	movq	72(%rsp), %rax                  ## 8-byte Reload
+	adcq	80(%rax), %r14
+	setb	%bl
+	movq	%rbp, %rdx
+	imulq	%r15, %rdx
+	leaq	440(%rsp), %rdi
+	movq	%r12, %rsi
+	callq	_mulPv512x64
+	movq	504(%rsp), %rax
+	addb	$255, %bl
+	adcq	$0, %rax
+	addq	440(%rsp), %r15
+	adcq	448(%rsp), %r13
+	movq	48(%rsp), %rcx                  ## 8-byte Reload
+	adcq	456(%rsp), %rcx
+	movq	%rcx, 48(%rsp)                  ## 8-byte Spill
+	movq	24(%rsp), %rbx                  ## 8-byte Reload
+	adcq	464(%rsp), %rbx
+	movq	8(%rsp), %rbp                   ## 8-byte Reload
+	adcq	472(%rsp), %rbp
+	movq	(%rsp), %rcx                    ## 8-byte Reload
 	adcq	480(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rcx            ## 8-byte Reload
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
 	adcq	488(%rsp), %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	24(%rsp), %rcx          ## 8-byte Reload
-	adcq	496(%rsp), %rcx
-	movq	%rcx, 24(%rsp)          ## 8-byte Spill
-	adcq	504(%rsp), %r13
-	movq	%r13, 8(%rsp)           ## 8-byte Spill
-	adcq	512(%rsp), %r15
-	movq	%r15, 16(%rsp)          ## 8-byte Spill
-	adcq	520(%rsp), %rbp
-	movq	%rbp, 48(%rsp)          ## 8-byte Spill
-	adcq	528(%rsp), %rbx
-	movq	%rbx, 72(%rsp)          ## 8-byte Spill
-	movq	80(%rsp), %r14          ## 8-byte Reload
-	adcq	$0, %r14
-	movq	56(%rsp), %r13          ## 8-byte Reload
-	adcq	$0, %r13
-	movq	88(%rsp), %rbx          ## 8-byte Reload
-	adcq	$0, %rbx
-	adcq	$0, %r12
-	movq	%rax, %rdx
-	movq	%rax, %r15
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
-	leaq	376(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	376(%rsp), %r15
-	movq	32(%rsp), %rax          ## 8-byte Reload
-	adcq	384(%rsp), %rax
-	movq	40(%rsp), %rcx          ## 8-byte Reload
-	adcq	392(%rsp), %rcx
-	movq	%rcx, 40(%rsp)          ## 8-byte Spill
-	movq	(%rsp), %rcx            ## 8-byte Reload
-	adcq	400(%rsp), %rcx
-	movq	%rcx, (%rsp)            ## 8-byte Spill
-	movq	24(%rsp), %rbp          ## 8-byte Reload
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	adcq	496(%rsp), %r14
+	movq	%r14, 40(%rsp)                  ## 8-byte Spill
+	movq	72(%rsp), %r14                  ## 8-byte Reload
+	adcq	88(%r14), %rax
+	movq	%rax, 32(%rsp)                  ## 8-byte Spill
+	setb	%r12b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r13, %rdx
+	leaq	368(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	432(%rsp), %r15
+	addb	$255, %r12b
+	adcq	$0, %r15
+	addq	368(%rsp), %r13
+	movq	48(%rsp), %r13                  ## 8-byte Reload
+	adcq	376(%rsp), %r13
+	adcq	384(%rsp), %rbx
+	movq	%rbx, 24(%rsp)                  ## 8-byte Spill
+	adcq	392(%rsp), %rbp
+	movq	%rbp, 8(%rsp)                   ## 8-byte Spill
+	movq	(%rsp), %rbx                    ## 8-byte Reload
+	adcq	400(%rsp), %rbx
+	movq	16(%rsp), %rbp                  ## 8-byte Reload
 	adcq	408(%rsp), %rbp
-	movq	8(%rsp), %rcx           ## 8-byte Reload
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
 	adcq	416(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
 	adcq	424(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rcx          ## 8-byte Reload
-	adcq	432(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
-	movq	72(%rsp), %r15          ## 8-byte Reload
-	adcq	440(%rsp), %r15
-	adcq	448(%rsp), %r14
-	movq	%r14, 80(%rsp)          ## 8-byte Spill
-	adcq	$0, %r13
-	movq	%r13, %r14
-	adcq	$0, %rbx
-	movq	%rbx, 88(%rsp)          ## 8-byte Spill
-	adcq	$0, %r12
-	movq	%rax, %rbx
-	movq	%rbx, %rdx
-	imulq	96(%rsp), %rdx          ## 8-byte Folded Reload
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	adcq	96(%r14), %r15
+	setb	%r14b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%r13, %rdx
 	leaq	296(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	296(%rsp), %rbx
-	movq	40(%rsp), %rax          ## 8-byte Reload
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	360(%rsp), %r12
+	addb	$255, %r14b
+	adcq	$0, %r12
+	addq	296(%rsp), %r13
+	movq	24(%rsp), %rax                  ## 8-byte Reload
 	adcq	304(%rsp), %rax
-	movq	(%rsp), %r13            ## 8-byte Reload
-	adcq	312(%rsp), %r13
-	adcq	320(%rsp), %rbp
-	movq	8(%rsp), %rcx           ## 8-byte Reload
-	adcq	328(%rsp), %rcx
-	movq	%rcx, 8(%rsp)           ## 8-byte Spill
-	movq	16(%rsp), %rcx          ## 8-byte Reload
+	movq	8(%rsp), %rcx                   ## 8-byte Reload
+	adcq	312(%rsp), %rcx
+	movq	%rcx, 8(%rsp)                   ## 8-byte Spill
+	adcq	320(%rsp), %rbx
+	movq	%rbx, (%rsp)                    ## 8-byte Spill
+	adcq	328(%rsp), %rbp
+	movq	%rbp, 16(%rsp)                  ## 8-byte Spill
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
 	adcq	336(%rsp), %rcx
-	movq	%rcx, 16(%rsp)          ## 8-byte Spill
-	movq	48(%rsp), %rcx          ## 8-byte Reload
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
 	adcq	344(%rsp), %rcx
-	movq	%rcx, 48(%rsp)          ## 8-byte Spill
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
 	adcq	352(%rsp), %r15
-	movq	%r15, 72(%rsp)          ## 8-byte Spill
-	movq	80(%rsp), %r15          ## 8-byte Reload
-	adcq	360(%rsp), %r15
-	adcq	368(%rsp), %r14
-	movq	%r14, 56(%rsp)          ## 8-byte Spill
-	movq	88(%rsp), %r14          ## 8-byte Reload
+	movq	72(%rsp), %rbx                  ## 8-byte Reload
+	adcq	104(%rbx), %r12
+	setb	%r13b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
+	imulq	%rax, %rdx
+	movq	%rax, %rbp
+	leaq	224(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	movq	288(%rsp), %r14
+	addb	$255, %r13b
 	adcq	$0, %r14
-	adcq	$0, %r12
-	movq	96(%rsp), %rdx          ## 8-byte Reload
+	addq	224(%rsp), %rbp
+	movq	8(%rsp), %rax                   ## 8-byte Reload
+	adcq	232(%rsp), %rax
+	movq	(%rsp), %rcx                    ## 8-byte Reload
+	adcq	240(%rsp), %rcx
+	movq	%rcx, (%rsp)                    ## 8-byte Spill
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	248(%rsp), %rcx
+	movq	%rcx, 16(%rsp)                  ## 8-byte Spill
+	movq	40(%rsp), %rcx                  ## 8-byte Reload
+	adcq	256(%rsp), %rcx
+	movq	%rcx, 40(%rsp)                  ## 8-byte Spill
+	movq	32(%rsp), %rcx                  ## 8-byte Reload
+	adcq	264(%rsp), %rcx
+	movq	%rcx, 32(%rsp)                  ## 8-byte Spill
+	adcq	272(%rsp), %r15
+	adcq	280(%rsp), %r12
+	adcq	112(%rbx), %r14
+	setb	%r13b
+	movq	56(%rsp), %rdx                  ## 8-byte Reload
 	imulq	%rax, %rdx
-	movq	%rax, %rbx
-	leaq	216(%rsp), %rdi
-	movq	104(%rsp), %rsi         ## 8-byte Reload
-	callq	l_mulPv576x64
-	addq	216(%rsp), %rbx
-	movq	%r13, %rsi
-	adcq	224(%rsp), %rsi
-	movq	%rsi, (%rsp)            ## 8-byte Spill
-	adcq	232(%rsp), %rbp
-	movq	%rbp, 24(%rsp)          ## 8-byte Spill
-	movq	8(%rsp), %r9            ## 8-byte Reload
-	adcq	240(%rsp), %r9
-	movq	%r9, 8(%rsp)            ## 8-byte Spill
-	movq	16(%rsp), %r8           ## 8-byte Reload
-	adcq	248(%rsp), %r8
-	movq	%r8, 16(%rsp)           ## 8-byte Spill
-	movq	48(%rsp), %rbx          ## 8-byte Reload
-	adcq	256(%rsp), %rbx
-	movq	72(%rsp), %rax          ## 8-byte Reload
-	adcq	264(%rsp), %rax
-	movq	%r15, %rcx
-	adcq	272(%rsp), %rcx
-	movq	56(%rsp), %rdx          ## 8-byte Reload
-	adcq	280(%rsp), %rdx
-	movq	%rdx, 56(%rsp)          ## 8-byte Spill
-	adcq	288(%rsp), %r14
-	movq	%r14, %r11
-	adcq	$0, %r12
-	subq	144(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rbp, %rdi
-	sbbq	136(%rsp), %rdi         ## 8-byte Folded Reload
-	movq	%r9, %rbp
-	sbbq	152(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%r8, %r13
-	sbbq	160(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%rbx, %r15
-	sbbq	168(%rsp), %r15         ## 8-byte Folded Reload
-	movq	%rax, %r14
-	sbbq	176(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%rcx, %r10
-	sbbq	184(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%rdx, %r8
-	sbbq	192(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r11, %r9
-	sbbq	200(%rsp), %r9          ## 8-byte Folded Reload
-	sbbq	$0, %r12
-	andl	$1, %r12d
-	cmovneq	%r11, %r9
-	testb	%r12b, %r12b
-	cmovneq	(%rsp), %rsi            ## 8-byte Folded Reload
-	movq	208(%rsp), %rdx         ## 8-byte Reload
-	movq	%rsi, (%rdx)
-	cmovneq	24(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, 8(%rdx)
-	cmovneq	8(%rsp), %rbp           ## 8-byte Folded Reload
-	movq	%rbp, 16(%rdx)
-	cmovneq	16(%rsp), %r13          ## 8-byte Folded Reload
-	movq	%r13, 24(%rdx)
-	cmovneq	%rbx, %r15
-	movq	%r15, 32(%rdx)
-	cmovneq	%rax, %r14
-	movq	%r14, 40(%rdx)
-	cmovneq	%rcx, %r10
-	movq	%r10, 48(%rdx)
-	cmovneq	56(%rsp), %r8           ## 8-byte Folded Reload
-	movq	%r8, 56(%rdx)
-	movq	%r9, 64(%rdx)
-	addq	$936, %rsp              ## imm = 0x3A8
+	movq	%rax, %rbp
+	leaq	152(%rsp), %rdi
+	movq	64(%rsp), %rsi                  ## 8-byte Reload
+	callq	_mulPv512x64
+	addb	$255, %r13b
+	movq	216(%rsp), %rdx
+	adcq	$0, %rdx
+	addq	152(%rsp), %rbp
+	movq	(%rsp), %r8                     ## 8-byte Reload
+	adcq	160(%rsp), %r8
+	movq	%r8, (%rsp)                     ## 8-byte Spill
+	movq	16(%rsp), %rcx                  ## 8-byte Reload
+	adcq	168(%rsp), %rcx
+	movq	40(%rsp), %rdi                  ## 8-byte Reload
+	adcq	176(%rsp), %rdi
+	movq	32(%rsp), %r10                  ## 8-byte Reload
+	adcq	184(%rsp), %r10
+	adcq	192(%rsp), %r15
+	adcq	200(%rsp), %r12
+	adcq	208(%rsp), %r14
+	adcq	120(%rbx), %rdx
+	subq	80(%rsp), %r8                   ## 8-byte Folded Reload
+	movq	%rcx, %r9
+	movq	%rcx, %r11
+	sbbq	88(%rsp), %r9                   ## 8-byte Folded Reload
+	movq	%rdi, %rsi
+	movq	%rdi, %r13
+	sbbq	96(%rsp), %rsi                  ## 8-byte Folded Reload
+	movq	%r10, %rdi
+	sbbq	104(%rsp), %rdi                 ## 8-byte Folded Reload
+	movq	%r15, %rbx
+	sbbq	112(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%r12, %rbp
+	sbbq	120(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%r14, %rax
+	sbbq	128(%rsp), %rax                 ## 8-byte Folded Reload
+	movq	%rdx, %rcx
+	sbbq	136(%rsp), %rcx                 ## 8-byte Folded Reload
+	cmovsq	%rdx, %rcx
+	movq	144(%rsp), %rdx                 ## 8-byte Reload
+	movq	%rcx, 56(%rdx)
+	cmovsq	%r14, %rax
+	movq	%rax, 48(%rdx)
+	cmovsq	%r12, %rbp
+	movq	%rbp, 40(%rdx)
+	cmovsq	%r15, %rbx
+	movq	%rbx, 32(%rdx)
+	cmovsq	%r10, %rdi
+	movq	%rdi, 24(%rdx)
+	cmovsq	%r13, %rsi
+	movq	%rsi, 16(%rdx)
+	cmovsq	%r11, %r9
+	movq	%r9, 8(%rdx)
+	cmovsq	(%rsp), %r8                     ## 8-byte Folded Reload
+	movq	%r8, (%rdx)
+	addq	$728, %rsp                      ## imm = 0x2D8
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -15595,279 +7582,227 @@ _mcl_fp_montRed9L:                      ## @mcl_fp_montRed9L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_addPre9L
+                                        ## -- End function
+	.globl	_mcl_fp_addPre8L                ## -- Begin function mcl_fp_addPre8L
 	.p2align	4, 0x90
-_mcl_fp_addPre9L:                       ## @mcl_fp_addPre9L
-## BB#0:
-	pushq	%rbp
-	pushq	%r15
-	pushq	%r14
-	pushq	%r13
-	pushq	%r12
+_mcl_fp_addPre8L:                       ## @mcl_fp_addPre8L
+## %bb.0:
 	pushq	%rbx
-	movq	64(%rdx), %r8
-	movq	64(%rsi), %r15
-	movq	56(%rsi), %r9
-	movq	48(%rsi), %r10
-	movq	40(%rsi), %r11
-	movq	24(%rsi), %r12
-	movq	32(%rsi), %r14
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %rcx
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %rcx
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
-	adcq	24(%rdx), %r12
-	movq	56(%rdx), %r13
-	movq	48(%rdx), %rsi
-	movq	40(%rdx), %rbp
-	movq	32(%rdx), %rdx
+	movq	56(%rsi), %rax
+	movq	48(%rsi), %rcx
+	movq	40(%rsi), %r8
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r10
+	movq	16(%rsi), %r11
+	movq	(%rsi), %rbx
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rbx
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %r11
+	adcq	24(%rdx), %r10
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r8
+	adcq	48(%rdx), %rcx
+	adcq	56(%rdx), %rax
+	movq	%rax, 56(%rdi)
+	movq	%rcx, 48(%rdi)
+	movq	%r8, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r10, 24(%rdi)
+	movq	%r11, 16(%rdi)
+	movq	%rsi, 8(%rdi)
 	movq	%rbx, (%rdi)
-	movq	%rcx, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r12, 24(%rdi)
-	adcq	%r14, %rdx
-	movq	%rdx, 32(%rdi)
-	adcq	%r11, %rbp
-	movq	%rbp, 40(%rdi)
-	adcq	%r10, %rsi
-	movq	%rsi, 48(%rdi)
-	adcq	%r9, %r13
-	movq	%r13, 56(%rdi)
-	adcq	%r8, %r15
-	movq	%r15, 64(%rdi)
-	sbbq	%rax, %rax
-	andl	$1, %eax
+	setb	%al
+	movzbl	%al, %eax
 	popq	%rbx
-	popq	%r12
-	popq	%r13
-	popq	%r14
-	popq	%r15
-	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_subPre9L
+                                        ## -- End function
+	.globl	_mcl_fp_subPre8L                ## -- Begin function mcl_fp_subPre8L
 	.p2align	4, 0x90
-_mcl_fp_subPre9L:                       ## @mcl_fp_subPre9L
-## BB#0:
-	movq	32(%rdx), %r8
-	movq	(%rsi), %rcx
-	xorl	%eax, %eax
-	subq	(%rdx), %rcx
-	movq	%rcx, (%rdi)
-	movq	8(%rsi), %rcx
-	sbbq	8(%rdx), %rcx
-	movq	%rcx, 8(%rdi)
-	movq	16(%rsi), %rcx
-	sbbq	16(%rdx), %rcx
-	movq	%rcx, 16(%rdi)
-	movq	24(%rsi), %rcx
-	sbbq	24(%rdx), %rcx
-	movq	%rcx, 24(%rdi)
-	movq	32(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	40(%rdx), %r8
-	movq	%rcx, 32(%rdi)
-	movq	40(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	48(%rdx), %r8
-	movq	%rcx, 40(%rdi)
-	movq	48(%rsi), %rcx
-	sbbq	%r8, %rcx
-	movq	56(%rdx), %r8
-	movq	%rcx, 48(%rdi)
+_mcl_fp_subPre8L:                       ## @mcl_fp_subPre8L
+## %bb.0:
+	pushq	%r14
+	pushq	%rbx
 	movq	56(%rsi), %rcx
-	sbbq	%r8, %rcx
+	movq	48(%rsi), %r8
+	movq	40(%rsi), %r9
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %r14
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r14
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %rbx
+	sbbq	24(%rdx), %r11
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r9
+	sbbq	48(%rdx), %r8
+	sbbq	56(%rdx), %rcx
 	movq	%rcx, 56(%rdi)
-	movq	64(%rdx), %rcx
-	movq	64(%rsi), %rdx
-	sbbq	%rcx, %rdx
-	movq	%rdx, 64(%rdi)
-	sbbq	$0, %rax
+	movq	%r8, 48(%rdi)
+	movq	%r9, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r11, 24(%rdi)
+	movq	%rbx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r14, (%rdi)
+	sbbq	%rax, %rax
 	andl	$1, %eax
+	popq	%rbx
+	popq	%r14
 	retq
-
-	.globl	_mcl_fp_shr1_9L
+                                        ## -- End function
+	.globl	_mcl_fp_shr1_8L                 ## -- Begin function mcl_fp_shr1_8L
 	.p2align	4, 0x90
-_mcl_fp_shr1_9L:                        ## @mcl_fp_shr1_9L
-## BB#0:
+_mcl_fp_shr1_8L:                        ## @mcl_fp_shr1_8L
+## %bb.0:
 	pushq	%rbx
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %r9
-	movq	48(%rsi), %r10
-	movq	40(%rsi), %r11
-	movq	32(%rsi), %rcx
-	movq	24(%rsi), %rdx
-	movq	16(%rsi), %rax
-	movq	(%rsi), %rbx
-	movq	8(%rsi), %rsi
-	shrdq	$1, %rsi, %rbx
-	movq	%rbx, (%rdi)
-	shrdq	$1, %rax, %rsi
-	movq	%rsi, 8(%rdi)
-	shrdq	$1, %rdx, %rax
-	movq	%rax, 16(%rdi)
-	shrdq	$1, %rcx, %rdx
-	movq	%rdx, 24(%rdi)
-	shrdq	$1, %r11, %rcx
-	movq	%rcx, 32(%rdi)
-	shrdq	$1, %r10, %r11
-	movq	%r11, 40(%rdi)
-	shrdq	$1, %r9, %r10
-	movq	%r10, 48(%rdi)
+	movq	(%rsi), %r9
+	movq	8(%rsi), %r8
+	movq	16(%rsi), %r10
+	movq	24(%rsi), %r11
+	movq	32(%rsi), %rax
+	movq	40(%rsi), %rdx
+	movq	48(%rsi), %rcx
+	movq	56(%rsi), %rsi
+	movq	%rsi, %rbx
+	shrq	%rbx
+	movq	%rbx, 56(%rdi)
+	shldq	$63, %rcx, %rsi
+	movq	%rsi, 48(%rdi)
+	shldq	$63, %rdx, %rcx
+	movq	%rcx, 40(%rdi)
+	shldq	$63, %rax, %rdx
+	movq	%rdx, 32(%rdi)
+	shldq	$63, %r11, %rax
+	movq	%rax, 24(%rdi)
+	shldq	$63, %r10, %r11
+	movq	%r11, 16(%rdi)
+	shldq	$63, %r8, %r10
+	movq	%r10, 8(%rdi)
 	shrdq	$1, %r8, %r9
-	movq	%r9, 56(%rdi)
-	shrq	%r8
-	movq	%r8, 64(%rdi)
+	movq	%r9, (%rdi)
 	popq	%rbx
 	retq
-
-	.globl	_mcl_fp_add9L
+                                        ## -- End function
+	.globl	_mcl_fp_add8L                   ## -- Begin function mcl_fp_add8L
 	.p2align	4, 0x90
-_mcl_fp_add9L:                          ## @mcl_fp_add9L
-## BB#0:
-	pushq	%r15
+_mcl_fp_add8L:                          ## @mcl_fp_add8L
+## %bb.0:
 	pushq	%r14
-	pushq	%r13
-	pushq	%r12
 	pushq	%rbx
-	movq	64(%rdx), %r12
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %r13
+	movq	56(%rsi), %r8
 	movq	48(%rsi), %r9
 	movq	40(%rsi), %r10
-	movq	24(%rsi), %r14
 	movq	32(%rsi), %r11
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %r15
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %r15
-	movq	16(%rdx), %rax
-	adcq	16(%rsi), %rax
+	movq	24(%rsi), %r14
+	movq	16(%rsi), %rbx
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rsi
+	addq	(%rdx), %rax
+	adcq	8(%rdx), %rsi
+	adcq	16(%rdx), %rbx
 	adcq	24(%rdx), %r14
 	adcq	32(%rdx), %r11
 	adcq	40(%rdx), %r10
-	movq	56(%rdx), %rsi
 	adcq	48(%rdx), %r9
-	movq	%rbx, (%rdi)
-	movq	%r15, 8(%rdi)
-	movq	%rax, 16(%rdi)
-	movq	%r14, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r10, 40(%rdi)
+	adcq	56(%rdx), %r8
+	movq	%r8, 56(%rdi)
 	movq	%r9, 48(%rdi)
-	adcq	%r13, %rsi
-	movq	%rsi, 56(%rdi)
-	adcq	%r12, %r8
-	movq	%r8, 64(%rdi)
-	sbbq	%rdx, %rdx
-	andl	$1, %edx
-	subq	(%rcx), %rbx
-	sbbq	8(%rcx), %r15
-	sbbq	16(%rcx), %rax
+	movq	%r10, 40(%rdi)
+	movq	%r11, 32(%rdi)
+	movq	%r14, 24(%rdi)
+	movq	%rbx, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rax, (%rdi)
+	setb	%dl
+	movzbl	%dl, %edx
+	subq	(%rcx), %rax
+	sbbq	8(%rcx), %rsi
+	sbbq	16(%rcx), %rbx
 	sbbq	24(%rcx), %r14
 	sbbq	32(%rcx), %r11
 	sbbq	40(%rcx), %r10
 	sbbq	48(%rcx), %r9
-	sbbq	56(%rcx), %rsi
-	sbbq	64(%rcx), %r8
+	sbbq	56(%rcx), %r8
 	sbbq	$0, %rdx
 	testb	$1, %dl
-	jne	LBB136_2
-## BB#1:                                ## %nocarry
-	movq	%rbx, (%rdi)
-	movq	%r15, 8(%rdi)
-	movq	%rax, 16(%rdi)
+	jne	LBB67_2
+## %bb.1:                               ## %nocarry
+	movq	%rax, (%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%rbx, 16(%rdi)
 	movq	%r14, 24(%rdi)
 	movq	%r11, 32(%rdi)
 	movq	%r10, 40(%rdi)
 	movq	%r9, 48(%rdi)
-	movq	%rsi, 56(%rdi)
-	movq	%r8, 64(%rdi)
-LBB136_2:                               ## %carry
+	movq	%r8, 56(%rdi)
+LBB67_2:                                ## %carry
 	popq	%rbx
-	popq	%r12
-	popq	%r13
 	popq	%r14
-	popq	%r15
 	retq
-
-	.globl	_mcl_fp_addNF9L
+                                        ## -- End function
+	.globl	_mcl_fp_addNF8L                 ## -- Begin function mcl_fp_addNF8L
 	.p2align	4, 0x90
-_mcl_fp_addNF9L:                        ## @mcl_fp_addNF9L
-## BB#0:
+_mcl_fp_addNF8L:                        ## @mcl_fp_addNF8L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rdi, %r8
-	movq	64(%rdx), %r10
-	movq	56(%rdx), %r11
+	movq	56(%rdx), %r8
 	movq	48(%rdx), %r9
-	movq	40(%rdx), %rax
-	movq	32(%rdx), %rdi
-	movq	24(%rdx), %rbp
-	movq	16(%rdx), %r15
-	movq	(%rdx), %rbx
-	movq	8(%rdx), %r13
-	addq	(%rsi), %rbx
-	adcq	8(%rsi), %r13
-	adcq	16(%rsi), %r15
-	adcq	24(%rsi), %rbp
-	movq	%rbp, -24(%rsp)         ## 8-byte Spill
-	adcq	32(%rsi), %rdi
-	movq	%rdi, -40(%rsp)         ## 8-byte Spill
-	adcq	40(%rsi), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
+	movq	40(%rdx), %r10
+	movq	32(%rdx), %r11
+	movq	24(%rdx), %r15
+	movq	16(%rdx), %rbx
+	movq	(%rdx), %rax
+	movq	8(%rdx), %rdx
+	addq	(%rsi), %rax
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	adcq	8(%rsi), %rdx
+	movq	%rdx, -16(%rsp)                 ## 8-byte Spill
+	adcq	16(%rsi), %rbx
+	movq	%rbx, -24(%rsp)                 ## 8-byte Spill
+	adcq	24(%rsi), %r15
+	adcq	32(%rsi), %r11
+	adcq	40(%rsi), %r10
 	adcq	48(%rsi), %r9
-	movq	%r9, %rdi
-	movq	%rdi, -16(%rsp)         ## 8-byte Spill
-	adcq	56(%rsi), %r11
-	movq	%r11, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	adcq	64(%rsi), %r10
-	movq	%r10, %r9
-	movq	%rbx, %rsi
+	adcq	56(%rsi), %r8
+	movq	%rax, %rsi
 	subq	(%rcx), %rsi
-	movq	%r13, %rdx
 	sbbq	8(%rcx), %rdx
-	movq	%r15, %r12
-	sbbq	16(%rcx), %r12
-	sbbq	24(%rcx), %rbp
-	movq	-40(%rsp), %r14         ## 8-byte Reload
-	sbbq	32(%rcx), %r14
-	movq	-32(%rsp), %r11         ## 8-byte Reload
-	sbbq	40(%rcx), %r11
-	movq	%rdi, %r10
-	sbbq	48(%rcx), %r10
-	movq	%rax, %rdi
-	sbbq	56(%rcx), %rdi
-	movq	%r9, %rax
-	sbbq	64(%rcx), %rax
-	movq	%rax, %rcx
-	sarq	$63, %rcx
-	cmovsq	%rbx, %rsi
-	movq	%rsi, (%r8)
-	cmovsq	%r13, %rdx
-	movq	%rdx, 8(%r8)
-	cmovsq	%r15, %r12
-	movq	%r12, 16(%r8)
-	cmovsq	-24(%rsp), %rbp         ## 8-byte Folded Reload
-	movq	%rbp, 24(%r8)
-	cmovsq	-40(%rsp), %r14         ## 8-byte Folded Reload
-	movq	%r14, 32(%r8)
-	cmovsq	-32(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%r11, 40(%r8)
-	cmovsq	-16(%rsp), %r10         ## 8-byte Folded Reload
-	movq	%r10, 48(%r8)
-	cmovsq	-8(%rsp), %rdi          ## 8-byte Folded Reload
-	movq	%rdi, 56(%r8)
-	cmovsq	%r9, %rax
-	movq	%rax, 64(%r8)
+	sbbq	16(%rcx), %rbx
+	movq	%r15, %rax
+	sbbq	24(%rcx), %rax
+	movq	%r11, %rbp
+	sbbq	32(%rcx), %rbp
+	movq	%r10, %r14
+	sbbq	40(%rcx), %r14
+	movq	%r9, %r12
+	sbbq	48(%rcx), %r12
+	movq	%r8, %r13
+	sbbq	56(%rcx), %r13
+	cmovsq	%r8, %r13
+	movq	%r13, 56(%rdi)
+	cmovsq	%r9, %r12
+	movq	%r12, 48(%rdi)
+	cmovsq	%r10, %r14
+	movq	%r14, 40(%rdi)
+	cmovsq	%r11, %rbp
+	movq	%rbp, 32(%rdi)
+	cmovsq	%r15, %rax
+	movq	%rax, 24(%rdi)
+	cmovsq	-24(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%rbx, 16(%rdi)
+	cmovsq	-16(%rsp), %rdx                 ## 8-byte Folded Reload
+	movq	%rdx, 8(%rdi)
+	cmovsq	-8(%rsp), %rsi                  ## 8-byte Folded Reload
+	movq	%rsi, (%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -15875,183 +7810,129 @@ _mcl_fp_addNF9L:                        ## @mcl_fp_addNF9L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fp_sub9L
+                                        ## -- End function
+	.globl	_mcl_fp_sub8L                   ## -- Begin function mcl_fp_sub8L
 	.p2align	4, 0x90
-_mcl_fp_sub9L:                          ## @mcl_fp_sub9L
-## BB#0:
+_mcl_fp_sub8L:                          ## @mcl_fp_sub8L
+## %bb.0:
 	pushq	%r15
 	pushq	%r14
-	pushq	%r13
-	pushq	%r12
 	pushq	%rbx
-	movq	64(%rdx), %r13
-	movq	(%rsi), %rax
-	movq	8(%rsi), %r9
-	xorl	%ebx, %ebx
-	subq	(%rdx), %rax
-	sbbq	8(%rdx), %r9
-	movq	16(%rsi), %r10
-	sbbq	16(%rdx), %r10
-	movq	24(%rsi), %r11
-	sbbq	24(%rdx), %r11
-	movq	32(%rsi), %r12
-	sbbq	32(%rdx), %r12
-	movq	40(%rsi), %r14
-	sbbq	40(%rdx), %r14
-	movq	48(%rsi), %r15
-	sbbq	48(%rdx), %r15
-	movq	64(%rsi), %r8
-	movq	56(%rsi), %rsi
-	sbbq	56(%rdx), %rsi
-	movq	%rax, (%rdi)
-	movq	%r9, 8(%rdi)
-	movq	%r10, 16(%rdi)
-	movq	%r11, 24(%rdi)
-	movq	%r12, 32(%rdi)
-	movq	%r14, 40(%rdi)
-	movq	%r15, 48(%rdi)
-	movq	%rsi, 56(%rdi)
-	sbbq	%r13, %r8
-	movq	%r8, 64(%rdi)
-	sbbq	$0, %rbx
-	testb	$1, %bl
-	je	LBB138_2
-## BB#1:                                ## %carry
-	addq	(%rcx), %rax
-	movq	%rax, (%rdi)
-	movq	8(%rcx), %rax
-	adcq	%r9, %rax
-	movq	%rax, 8(%rdi)
-	movq	16(%rcx), %rax
-	adcq	%r10, %rax
-	movq	%rax, 16(%rdi)
-	movq	24(%rcx), %rax
-	adcq	%r11, %rax
-	movq	%rax, 24(%rdi)
-	movq	32(%rcx), %rax
-	adcq	%r12, %rax
-	movq	%rax, 32(%rdi)
-	movq	40(%rcx), %rax
-	adcq	%r14, %rax
-	movq	%rax, 40(%rdi)
-	movq	48(%rcx), %rax
-	adcq	%r15, %rax
-	movq	%rax, 48(%rdi)
-	movq	56(%rcx), %rax
-	adcq	%rsi, %rax
-	movq	%rax, 56(%rdi)
-	movq	64(%rcx), %rax
-	adcq	%r8, %rax
-	movq	%rax, 64(%rdi)
-LBB138_2:                               ## %nocarry
+	movq	56(%rsi), %r14
+	movq	48(%rsi), %rbx
+	movq	40(%rsi), %r11
+	movq	32(%rsi), %r10
+	movq	24(%rsi), %r9
+	movq	16(%rsi), %r15
+	movq	(%rsi), %r8
+	movq	8(%rsi), %rsi
+	xorl	%eax, %eax
+	subq	(%rdx), %r8
+	sbbq	8(%rdx), %rsi
+	sbbq	16(%rdx), %r15
+	sbbq	24(%rdx), %r9
+	sbbq	32(%rdx), %r10
+	sbbq	40(%rdx), %r11
+	sbbq	48(%rdx), %rbx
+	sbbq	56(%rdx), %r14
+	movq	%r14, 56(%rdi)
+	movq	%rbx, 48(%rdi)
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r15, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+	sbbq	%rax, %rax
+	testb	$1, %al
+	je	LBB69_2
+## %bb.1:                               ## %carry
+	addq	(%rcx), %r8
+	adcq	8(%rcx), %rsi
+	adcq	16(%rcx), %r15
+	adcq	24(%rcx), %r9
+	adcq	32(%rcx), %r10
+	adcq	40(%rcx), %r11
+	adcq	48(%rcx), %rbx
+	adcq	56(%rcx), %r14
+	movq	%r14, 56(%rdi)
+	movq	%rbx, 48(%rdi)
+	movq	%r11, 40(%rdi)
+	movq	%r10, 32(%rdi)
+	movq	%r9, 24(%rdi)
+	movq	%r15, 16(%rdi)
+	movq	%rsi, 8(%rdi)
+	movq	%r8, (%rdi)
+LBB69_2:                                ## %nocarry
 	popq	%rbx
-	popq	%r12
-	popq	%r13
 	popq	%r14
 	popq	%r15
 	retq
-
-	.globl	_mcl_fp_subNF9L
+                                        ## -- End function
+	.globl	_mcl_fp_subNF8L                 ## -- Begin function mcl_fp_subNF8L
 	.p2align	4, 0x90
-_mcl_fp_subNF9L:                        ## @mcl_fp_subNF9L
-## BB#0:
+_mcl_fp_subNF8L:                        ## @mcl_fp_subNF8L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r11
-	movq	%rdi, %rbx
-	movq	64(%rsi), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	movdqu	(%rdx), %xmm1
-	movdqu	16(%rdx), %xmm2
-	movdqu	32(%rdx), %xmm3
-	movdqu	48(%rdx), %xmm4
-	pshufd	$78, %xmm4, %xmm0       ## xmm0 = xmm4[2,3,0,1]
-	movd	%xmm0, %r12
-	movdqu	(%rsi), %xmm5
-	movdqu	16(%rsi), %xmm6
-	movdqu	32(%rsi), %xmm7
-	movdqu	48(%rsi), %xmm8
-	pshufd	$78, %xmm8, %xmm0       ## xmm0 = xmm8[2,3,0,1]
-	movd	%xmm0, %rax
-	movd	%xmm4, %r10
-	pshufd	$78, %xmm3, %xmm0       ## xmm0 = xmm3[2,3,0,1]
-	movd	%xmm0, %r9
-	pshufd	$78, %xmm7, %xmm0       ## xmm0 = xmm7[2,3,0,1]
-	movd	%xmm3, %r8
-	pshufd	$78, %xmm2, %xmm3       ## xmm3 = xmm2[2,3,0,1]
-	movd	%xmm3, %rcx
-	pshufd	$78, %xmm6, %xmm3       ## xmm3 = xmm6[2,3,0,1]
-	movd	%xmm2, %rbp
-	pshufd	$78, %xmm1, %xmm2       ## xmm2 = xmm1[2,3,0,1]
-	movd	%xmm2, %rsi
-	pshufd	$78, %xmm5, %xmm2       ## xmm2 = xmm5[2,3,0,1]
-	movd	%xmm1, %rdi
-	movd	%xmm5, %r15
-	subq	%rdi, %r15
-	movd	%xmm2, %r14
-	sbbq	%rsi, %r14
-	movd	%xmm6, %r13
-	sbbq	%rbp, %r13
-	movd	%xmm3, %rbp
-	sbbq	%rcx, %rbp
-	movd	%xmm7, %rcx
-	sbbq	%r8, %rcx
-	movq	%rcx, -16(%rsp)         ## 8-byte Spill
-	movd	%xmm0, %rcx
-	sbbq	%r9, %rcx
-	movq	%rcx, -24(%rsp)         ## 8-byte Spill
-	movd	%xmm8, %rcx
-	sbbq	%r10, %rcx
-	movq	%rcx, -32(%rsp)         ## 8-byte Spill
-	sbbq	%r12, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	-40(%rsp), %rsi         ## 8-byte Reload
-	sbbq	64(%rdx), %rsi
-	movq	%rsi, -40(%rsp)         ## 8-byte Spill
-	movq	%rsi, %rax
-	sarq	$63, %rax
-	movq	%rax, %rcx
-	shldq	$1, %rsi, %rcx
-	movq	24(%r11), %r9
-	andq	%rcx, %r9
-	movq	8(%r11), %rdi
-	andq	%rcx, %rdi
-	andq	(%r11), %rcx
-	movq	64(%r11), %r12
-	andq	%rax, %r12
-	movq	56(%r11), %r10
-	andq	%rax, %r10
-	rolq	%rax
-	movq	48(%r11), %r8
-	andq	%rax, %r8
-	movq	40(%r11), %rsi
-	andq	%rax, %rsi
-	movq	32(%r11), %rdx
-	andq	%rax, %rdx
-	andq	16(%r11), %rax
-	addq	%r15, %rcx
-	adcq	%r14, %rdi
-	movq	%rcx, (%rbx)
-	adcq	%r13, %rax
-	movq	%rdi, 8(%rbx)
-	adcq	%rbp, %r9
-	movq	%rax, 16(%rbx)
-	movq	%r9, 24(%rbx)
-	adcq	-16(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, 32(%rbx)
-	adcq	-24(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, 40(%rbx)
-	adcq	-32(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, 48(%rbx)
-	adcq	-8(%rsp), %r10          ## 8-byte Folded Reload
-	movq	%r10, 56(%rbx)
-	adcq	-40(%rsp), %r12         ## 8-byte Folded Reload
-	movq	%r12, 64(%rbx)
+	movq	%rcx, %r8
+	movq	%rdi, %r9
+	movq	56(%rsi), %r14
+	movq	48(%rsi), %rax
+	movq	40(%rsi), %rcx
+	movq	32(%rsi), %rdi
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %r15
+	movq	(%rsi), %r13
+	movq	8(%rsi), %r12
+	subq	(%rdx), %r13
+	sbbq	8(%rdx), %r12
+	sbbq	16(%rdx), %r15
+	sbbq	24(%rdx), %r11
+	sbbq	32(%rdx), %rdi
+	movq	%rdi, -24(%rsp)                 ## 8-byte Spill
+	sbbq	40(%rdx), %rcx
+	movq	%rcx, -16(%rsp)                 ## 8-byte Spill
+	sbbq	48(%rdx), %rax
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	sbbq	56(%rdx), %r14
+	movq	%r14, %rsi
+	sarq	$63, %rsi
+	movq	56(%r8), %r10
+	andq	%rsi, %r10
+	movq	48(%r8), %rbx
+	andq	%rsi, %rbx
+	movq	40(%r8), %rdi
+	andq	%rsi, %rdi
+	movq	32(%r8), %rbp
+	andq	%rsi, %rbp
+	movq	24(%r8), %rdx
+	andq	%rsi, %rdx
+	movq	16(%r8), %rcx
+	andq	%rsi, %rcx
+	movq	8(%r8), %rax
+	andq	%rsi, %rax
+	andq	(%r8), %rsi
+	addq	%r13, %rsi
+	adcq	%r12, %rax
+	movq	%rsi, (%r9)
+	adcq	%r15, %rcx
+	movq	%rax, 8(%r9)
+	movq	%rcx, 16(%r9)
+	adcq	%r11, %rdx
+	movq	%rdx, 24(%r9)
+	adcq	-24(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%rbp, 32(%r9)
+	adcq	-16(%rsp), %rdi                 ## 8-byte Folded Reload
+	movq	%rdi, 40(%r9)
+	adcq	-8(%rsp), %rbx                  ## 8-byte Folded Reload
+	movq	%rbx, 48(%r9)
+	adcq	%r14, %r10
+	movq	%r10, 56(%r9)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -16059,11 +7940,11 @@ _mcl_fp_subNF9L:                        ## @mcl_fp_subNF9L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_add9L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_add8L                ## -- Begin function mcl_fpDbl_add8L
 	.p2align	4, 0x90
-_mcl_fpDbl_add9L:                       ## @mcl_fpDbl_add9L
-## BB#0:
+_mcl_fpDbl_add8L:                       ## @mcl_fpDbl_add8L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
@@ -16071,111 +7952,103 @@ _mcl_fpDbl_add9L:                       ## @mcl_fpDbl_add9L
 	pushq	%r12
 	pushq	%rbx
 	movq	%rcx, %r15
-	movq	136(%rdx), %rax
-	movq	%rax, -48(%rsp)         ## 8-byte Spill
-	movq	128(%rdx), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	movq	120(%rdx), %r10
-	movq	112(%rdx), %r11
-	movq	24(%rsi), %rcx
-	movq	32(%rsi), %r14
-	movq	16(%rdx), %rbp
-	movq	(%rdx), %rax
-	movq	8(%rdx), %rbx
-	addq	(%rsi), %rax
-	adcq	8(%rsi), %rbx
-	adcq	16(%rsi), %rbp
-	adcq	24(%rdx), %rcx
-	adcq	32(%rdx), %r14
-	movq	104(%rdx), %r9
-	movq	96(%rdx), %r13
-	movq	%rax, (%rdi)
-	movq	88(%rdx), %r8
-	movq	%rbx, 8(%rdi)
-	movq	80(%rdx), %r12
-	movq	%rbp, 16(%rdi)
-	movq	40(%rdx), %rax
-	movq	%rcx, 24(%rdi)
-	movq	40(%rsi), %rbp
-	adcq	%rax, %rbp
-	movq	48(%rdx), %rcx
-	movq	%r14, 32(%rdi)
-	movq	48(%rsi), %rax
-	adcq	%rcx, %rax
-	movq	56(%rdx), %r14
-	movq	%rbp, 40(%rdi)
-	movq	56(%rsi), %rbp
-	adcq	%r14, %rbp
-	movq	72(%rdx), %rcx
-	movq	64(%rdx), %rdx
-	movq	%rax, 48(%rdi)
+	movq	120(%rsi), %rax
+	movq	%rax, -72(%rsp)                 ## 8-byte Spill
+	movq	112(%rsi), %rax
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	104(%rsi), %rax
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	96(%rsi), %rbx
+	movq	88(%rsi), %rcx
+	movq	80(%rsi), %r8
+	movq	72(%rsi), %r10
+	movq	(%rsi), %rax
+	movq	8(%rsi), %rbp
+	addq	(%rdx), %rax
+	movq	%rax, -8(%rsp)                  ## 8-byte Spill
+	adcq	8(%rdx), %rbp
+	movq	%rbp, -16(%rsp)                 ## 8-byte Spill
 	movq	64(%rsi), %rax
-	adcq	%rdx, %rax
-	movq	136(%rsi), %rbx
+	movq	56(%rsi), %rbp
+	movq	48(%rsi), %r13
+	movq	40(%rsi), %r14
+	movq	32(%rsi), %r9
+	movq	24(%rsi), %r11
+	movq	16(%rsi), %r12
+	adcq	16(%rdx), %r12
+	adcq	24(%rdx), %r11
+	adcq	32(%rdx), %r9
+	adcq	40(%rdx), %r14
+	adcq	48(%rdx), %r13
+	adcq	56(%rdx), %rbp
+	adcq	64(%rdx), %rax
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	adcq	72(%rdx), %r10
+	movq	%r8, %rax
+	adcq	80(%rdx), %rax
+	movq	%rax, -24(%rsp)                 ## 8-byte Spill
+	adcq	88(%rdx), %rcx
+	movq	%rcx, -32(%rsp)                 ## 8-byte Spill
+	movq	%rbx, %rsi
+	adcq	96(%rdx), %rsi
+	movq	%rsi, -40(%rsp)                 ## 8-byte Spill
+	movq	-56(%rsp), %r8                  ## 8-byte Reload
+	adcq	104(%rdx), %r8
+	movq	%r8, -56(%rsp)                  ## 8-byte Spill
+	movq	-64(%rsp), %rbx                 ## 8-byte Reload
+	adcq	112(%rdx), %rbx
+	movq	%rbx, -64(%rsp)                 ## 8-byte Spill
+	movq	-72(%rsp), %r8                  ## 8-byte Reload
+	adcq	120(%rdx), %r8
 	movq	%rbp, 56(%rdi)
-	movq	72(%rsi), %rbp
-	adcq	%rcx, %rbp
-	movq	128(%rsi), %rcx
-	movq	%rax, 64(%rdi)
-	movq	80(%rsi), %rdx
-	adcq	%r12, %rdx
-	movq	88(%rsi), %r12
-	adcq	%r8, %r12
-	movq	96(%rsi), %r14
-	adcq	%r13, %r14
-	movq	%r14, -8(%rsp)          ## 8-byte Spill
-	movq	104(%rsi), %rax
-	adcq	%r9, %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	120(%rsi), %rax
-	movq	112(%rsi), %rsi
-	adcq	%r11, %rsi
-	movq	%rsi, -24(%rsp)         ## 8-byte Spill
-	adcq	%r10, %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	adcq	-40(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -40(%rsp)         ## 8-byte Spill
-	adcq	-48(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, -48(%rsp)         ## 8-byte Spill
-	sbbq	%r9, %r9
-	andl	$1, %r9d
-	movq	%rbp, %r10
-	subq	(%r15), %r10
-	movq	%rdx, %r11
-	sbbq	8(%r15), %r11
-	movq	%r12, %rbx
-	sbbq	16(%r15), %rbx
-	sbbq	24(%r15), %r14
-	movq	-32(%rsp), %r13         ## 8-byte Reload
-	sbbq	32(%r15), %r13
-	movq	-24(%rsp), %rsi         ## 8-byte Reload
-	sbbq	40(%r15), %rsi
-	movq	-16(%rsp), %rax         ## 8-byte Reload
-	sbbq	48(%r15), %rax
-	sbbq	56(%r15), %rcx
-	movq	-48(%rsp), %r8          ## 8-byte Reload
-	sbbq	64(%r15), %r8
-	sbbq	$0, %r9
-	andl	$1, %r9d
-	cmovneq	%rbp, %r10
-	movq	%r10, 72(%rdi)
-	testb	%r9b, %r9b
-	cmovneq	%rdx, %r11
+	movq	%r13, 48(%rdi)
+	movq	%r14, 40(%rdi)
+	movq	%r9, 32(%rdi)
+	movq	%r11, 24(%rdi)
+	movq	%r12, 16(%rdi)
+	movq	-16(%rsp), %rdx                 ## 8-byte Reload
+	movq	%rdx, 8(%rdi)
+	movq	-8(%rsp), %rdx                  ## 8-byte Reload
+	movq	%rdx, (%rdi)
+	setb	-72(%rsp)                       ## 1-byte Folded Spill
+	movq	-48(%rsp), %r14                 ## 8-byte Reload
+	subq	(%r15), %r14
+	movq	%r10, %r9
+	movq	%r10, %r13
+	sbbq	8(%r15), %r9
+	movq	%rax, %r11
+	sbbq	16(%r15), %r11
+	movq	%rcx, %rbp
+	sbbq	24(%r15), %rbp
+	movq	%rsi, %rbx
+	sbbq	32(%r15), %rbx
+	movq	-56(%rsp), %r12                 ## 8-byte Reload
+	movq	%r12, %rax
+	sbbq	40(%r15), %rax
+	movq	-64(%rsp), %r10                 ## 8-byte Reload
+	movq	%r10, %rdx
+	sbbq	48(%r15), %rdx
+	movq	%r8, %rsi
+	sbbq	56(%r15), %rsi
+	movzbl	-72(%rsp), %ecx                 ## 1-byte Folded Reload
+	sbbq	$0, %rcx
+	testb	$1, %cl
+	cmovneq	%r8, %rsi
+	movq	%rsi, 120(%rdi)
+	cmovneq	%r10, %rdx
+	movq	%rdx, 112(%rdi)
+	cmovneq	%r12, %rax
+	movq	%rax, 104(%rdi)
+	cmovneq	-40(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%rbx, 96(%rdi)
+	cmovneq	-32(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%rbp, 88(%rdi)
+	cmovneq	-24(%rsp), %r11                 ## 8-byte Folded Reload
 	movq	%r11, 80(%rdi)
-	cmovneq	%r12, %rbx
-	movq	%rbx, 88(%rdi)
-	cmovneq	-8(%rsp), %r14          ## 8-byte Folded Reload
-	movq	%r14, 96(%rdi)
-	cmovneq	-32(%rsp), %r13         ## 8-byte Folded Reload
-	movq	%r13, 104(%rdi)
-	cmovneq	-24(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, 112(%rdi)
-	cmovneq	-16(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 120(%rdi)
-	cmovneq	-40(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, 128(%rdi)
-	cmovneq	-48(%rsp), %r8          ## 8-byte Folded Reload
-	movq	%r8, 136(%rdi)
+	cmovneq	%r13, %r9
+	movq	%r9, 72(%rdi)
+	cmovneq	-48(%rsp), %r14                 ## 8-byte Folded Reload
+	movq	%r14, 64(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -16183,124 +8056,109 @@ _mcl_fpDbl_add9L:                       ## @mcl_fpDbl_add9L
 	popq	%r15
 	popq	%rbp
 	retq
-
-	.globl	_mcl_fpDbl_sub9L
+                                        ## -- End function
+	.globl	_mcl_fpDbl_sub8L                ## -- Begin function mcl_fpDbl_sub8L
 	.p2align	4, 0x90
-_mcl_fpDbl_sub9L:                       ## @mcl_fpDbl_sub9L
-## BB#0:
+_mcl_fpDbl_sub8L:                       ## @mcl_fpDbl_sub8L
+## %bb.0:
 	pushq	%rbp
 	pushq	%r15
 	pushq	%r14
 	pushq	%r13
 	pushq	%r12
 	pushq	%rbx
-	movq	%rcx, %r14
-	movq	136(%rdx), %rax
-	movq	%rax, -24(%rsp)         ## 8-byte Spill
-	movq	128(%rdx), %rax
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	movq	120(%rdx), %rax
-	movq	%rax, -40(%rsp)         ## 8-byte Spill
-	movq	16(%rsi), %r11
-	movq	(%rsi), %r12
-	movq	8(%rsi), %r13
-	xorl	%r9d, %r9d
-	subq	(%rdx), %r12
-	sbbq	8(%rdx), %r13
-	sbbq	16(%rdx), %r11
+	movq	%rcx, %r11
+	movq	120(%rsi), %rax
+	movq	%rax, -64(%rsp)                 ## 8-byte Spill
+	movq	112(%rsi), %r12
+	movq	104(%rsi), %r15
+	movq	96(%rsi), %rax
+	movq	%rax, -48(%rsp)                 ## 8-byte Spill
+	movq	88(%rsi), %r13
+	movq	80(%rsi), %rax
+	movq	%rax, -56(%rsp)                 ## 8-byte Spill
+	movq	(%rsi), %rcx
+	movq	8(%rsi), %rbp
+	xorl	%eax, %eax
+	subq	(%rdx), %rcx
+	movq	%rcx, -32(%rsp)                 ## 8-byte Spill
+	sbbq	8(%rdx), %rbp
+	movq	%rbp, -40(%rsp)                 ## 8-byte Spill
+	movq	72(%rsi), %rbp
+	movq	64(%rsi), %rcx
+	movq	56(%rsi), %r8
+	movq	48(%rsi), %r9
+	movq	40(%rsi), %r10
+	movq	32(%rsi), %r14
 	movq	24(%rsi), %rbx
+	movq	16(%rsi), %rsi
+	sbbq	16(%rdx), %rsi
 	sbbq	24(%rdx), %rbx
-	movq	32(%rsi), %rbp
-	sbbq	32(%rdx), %rbp
-	movq	112(%rdx), %r10
-	movq	104(%rdx), %rcx
-	movq	%r12, (%rdi)
-	movq	96(%rdx), %rax
-	movq	%r13, 8(%rdi)
-	movq	88(%rdx), %r13
-	movq	%r11, 16(%rdi)
-	movq	40(%rdx), %r11
+	sbbq	32(%rdx), %r14
+	sbbq	40(%rdx), %r10
+	sbbq	48(%rdx), %r9
+	sbbq	56(%rdx), %r8
+	sbbq	64(%rdx), %rcx
+	movq	%rcx, -24(%rsp)                 ## 8-byte Spill
+	sbbq	72(%rdx), %rbp
+	movq	%rbp, -16(%rsp)                 ## 8-byte Spill
+	movq	-56(%rsp), %rbp                 ## 8-byte Reload
+	sbbq	80(%rdx), %rbp
+	movq	%rbp, -56(%rsp)                 ## 8-byte Spill
+	sbbq	88(%rdx), %r13
+	movq	%r13, -8(%rsp)                  ## 8-byte Spill
+	movq	-48(%rsp), %r13                 ## 8-byte Reload
+	sbbq	96(%rdx), %r13
+	movq	%r13, -48(%rsp)                 ## 8-byte Spill
+	sbbq	104(%rdx), %r15
+	sbbq	112(%rdx), %r12
+	movq	-64(%rsp), %rcx                 ## 8-byte Reload
+	sbbq	120(%rdx), %rcx
+	movq	%rcx, -64(%rsp)                 ## 8-byte Spill
+	movq	%r8, 56(%rdi)
+	movq	%r9, 48(%rdi)
+	movq	%r10, 40(%rdi)
+	movq	%r14, 32(%rdi)
 	movq	%rbx, 24(%rdi)
-	movq	40(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	48(%rdx), %r11
-	movq	%rbp, 32(%rdi)
-	movq	48(%rsi), %rbp
-	sbbq	%r11, %rbp
-	movq	56(%rdx), %r11
-	movq	%rbx, 40(%rdi)
-	movq	56(%rsi), %rbx
-	sbbq	%r11, %rbx
-	movq	64(%rdx), %r11
-	movq	%rbp, 48(%rdi)
-	movq	64(%rsi), %rbp
-	sbbq	%r11, %rbp
-	movq	80(%rdx), %r8
-	movq	72(%rdx), %r11
-	movq	%rbx, 56(%rdi)
-	movq	72(%rsi), %r15
-	sbbq	%r11, %r15
-	movq	136(%rsi), %rdx
-	movq	%rbp, 64(%rdi)
-	movq	80(%rsi), %rbp
-	sbbq	%r8, %rbp
-	movq	88(%rsi), %r12
-	sbbq	%r13, %r12
-	movq	96(%rsi), %r13
-	sbbq	%rax, %r13
-	movq	104(%rsi), %rax
-	sbbq	%rcx, %rax
-	movq	%rax, -16(%rsp)         ## 8-byte Spill
-	movq	112(%rsi), %rax
-	sbbq	%r10, %rax
-	movq	%rax, -8(%rsp)          ## 8-byte Spill
-	movq	128(%rsi), %rax
-	movq	120(%rsi), %rcx
-	sbbq	-40(%rsp), %rcx         ## 8-byte Folded Reload
-	movq	%rcx, -40(%rsp)         ## 8-byte Spill
-	sbbq	-32(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, -32(%rsp)         ## 8-byte Spill
-	sbbq	-24(%rsp), %rdx         ## 8-byte Folded Reload
-	movq	%rdx, -24(%rsp)         ## 8-byte Spill
-	movl	$0, %r8d
-	sbbq	$0, %r8
-	andl	$1, %r8d
-	movq	(%r14), %r10
-	cmoveq	%r9, %r10
-	testb	%r8b, %r8b
-	movq	16(%r14), %r8
-	cmoveq	%r9, %r8
-	movq	8(%r14), %rdx
-	cmoveq	%r9, %rdx
-	movq	64(%r14), %rbx
-	cmoveq	%r9, %rbx
-	movq	56(%r14), %r11
-	cmoveq	%r9, %r11
-	movq	48(%r14), %rsi
-	cmoveq	%r9, %rsi
-	movq	40(%r14), %rcx
-	cmoveq	%r9, %rcx
-	movq	32(%r14), %rax
-	cmoveq	%r9, %rax
-	cmovneq	24(%r14), %r9
-	addq	%r15, %r10
-	adcq	%rbp, %rdx
-	movq	%r10, 72(%rdi)
-	adcq	%r12, %r8
-	movq	%rdx, 80(%rdi)
-	adcq	%r13, %r9
-	movq	%r8, 88(%rdi)
-	movq	%r9, 96(%rdi)
-	adcq	-16(%rsp), %rax         ## 8-byte Folded Reload
-	movq	%rax, 104(%rdi)
-	adcq	-8(%rsp), %rcx          ## 8-byte Folded Reload
-	movq	%rcx, 112(%rdi)
-	adcq	-40(%rsp), %rsi         ## 8-byte Folded Reload
-	movq	%rsi, 120(%rdi)
-	adcq	-32(%rsp), %r11         ## 8-byte Folded Reload
-	movq	%r11, 128(%rdi)
-	adcq	-24(%rsp), %rbx         ## 8-byte Folded Reload
-	movq	%rbx, 136(%rdi)
+	movq	%rsi, 16(%rdi)
+	movq	-40(%rsp), %rcx                 ## 8-byte Reload
+	movq	%rcx, 8(%rdi)
+	movq	-32(%rsp), %rcx                 ## 8-byte Reload
+	movq	%rcx, (%rdi)
+	sbbq	%rax, %rax
+	andl	$1, %eax
+	negq	%rax
+	movq	56(%r11), %r8
+	andq	%rax, %r8
+	movq	48(%r11), %r9
+	andq	%rax, %r9
+	movq	40(%r11), %r10
+	andq	%rax, %r10
+	movq	32(%r11), %rbx
+	andq	%rax, %rbx
+	movq	24(%r11), %rdx
+	andq	%rax, %rdx
+	movq	16(%r11), %rsi
+	andq	%rax, %rsi
+	movq	8(%r11), %rbp
+	andq	%rax, %rbp
+	andq	(%r11), %rax
+	addq	-24(%rsp), %rax                 ## 8-byte Folded Reload
+	adcq	-16(%rsp), %rbp                 ## 8-byte Folded Reload
+	movq	%rax, 64(%rdi)
+	adcq	-56(%rsp), %rsi                 ## 8-byte Folded Reload
+	movq	%rbp, 72(%rdi)
+	movq	%rsi, 80(%rdi)
+	adcq	-8(%rsp), %rdx                  ## 8-byte Folded Reload
+	movq	%rdx, 88(%rdi)
+	adcq	-48(%rsp), %rbx                 ## 8-byte Folded Reload
+	movq	%rbx, 96(%rdi)
+	adcq	%r15, %r10
+	movq	%r10, 104(%rdi)
+	adcq	%r12, %r9
+	movq	%r9, 112(%rdi)
+	adcq	-64(%rsp), %r8                  ## 8-byte Folded Reload
+	movq	%r8, 120(%rdi)
 	popq	%rbx
 	popq	%r12
 	popq	%r13
@@ -16308,6 +8166,5 @@ _mcl_fpDbl_sub9L:                       ## @mcl_fpDbl_sub9L
 	popq	%r15
 	popq	%rbp
 	retq
-
-
+                                        ## -- End function
 .subsections_via_symbols

From 6d8a7d98300e25212c6ccc969c1e9e0a4b46d739 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 15:14:37 +0900
Subject: [PATCH 520/553] enable montRedNF

---
 src/low_func_llvm.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/low_func_llvm.hpp b/src/low_func_llvm.hpp
index c305ed16..e5567fed 100644
--- a/src/low_func_llvm.hpp
+++ b/src/low_func_llvm.hpp
@@ -25,7 +25,6 @@ template<>const void3u MulPreCore<n, tag>::f = &mcl_fpDbl_mulPre ## n ## suf; \
 template<>const void2u SqrPreCore<n, tag>::f = &mcl_fpDbl_sqrPre ## n ## suf;
 #endif
 
-// QQQ : set mcl_fp_montRedNF after updating asm
 #define MCL_DEF_LLVM_FUNC2(n, tag, suf) \
 template<>const u3u AddPre<n, tag>::f = &mcl_fp_addPre ## n ## suf; \
 template<>const u3u SubPre<n, tag>::f = &mcl_fp_subPre ## n ## suf; \
@@ -39,7 +38,7 @@ template<>const void4u Sub<n, false, tag>::f = &mcl_fp_subNF ## n ## suf; \
 template<>const void4u Mont<n, true, tag>::f = &mcl_fp_mont ## n ## suf; \
 template<>const void4u Mont<n, false, tag>::f = &mcl_fp_montNF ## n ## suf; \
 template<>const void3u MontRed<n, true, tag>::f = &mcl_fp_montRed ## n ## suf; \
-template<>const void3u MontRed<n, false, tag>::f = &mcl_fp_montRed ## n ## suf; \
+template<>const void3u MontRed<n, false, tag>::f = &mcl_fp_montRedNF ## n ## suf; \
 template<>const void4u DblAdd<n, tag>::f = &mcl_fpDbl_add ## n ## suf; \
 template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \
 

From 36e08dc5b400325ffd4460080dcfd62fd20698bb Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 15:58:53 +0900
Subject: [PATCH 521/553] asm/aarch64 generated by clang-4.0.0

---
 src/asm/aarch64.s | 18428 ++++++++++++++------------------------------
 1 file changed, 5886 insertions(+), 12542 deletions(-)

diff --git a/src/asm/aarch64.s b/src/asm/aarch64.s
index a49a36e3..5e327a4c 100644
--- a/src/asm/aarch64.s
+++ b/src/asm/aarch64.s
@@ -1,180 +1,175 @@
 	.text
 	.file	"<stdin>"
 	.globl	makeNIST_P192L
-	.align	2
+	.p2align	2
 	.type	makeNIST_P192L,@function
 makeNIST_P192L:                         // @makeNIST_P192L
 // BB#0:
-	movn	x0, #0
+	mov	x0, #-1
 	orr	x1, xzr, #0xfffffffffffffffe
-	movn	x2, #0
+	mov	x2, #-1
 	ret
 .Lfunc_end0:
 	.size	makeNIST_P192L, .Lfunc_end0-makeNIST_P192L
 
 	.globl	mcl_fpDbl_mod_NIST_P192L
-	.align	2
+	.p2align	2
 	.type	mcl_fpDbl_mod_NIST_P192L,@function
 mcl_fpDbl_mod_NIST_P192L:               // @mcl_fpDbl_mod_NIST_P192L
 // BB#0:
-	ldp	x8, x9, [x1, #16]
+	ldp		x8, x9, [x1]
 	ldp	x10, x11, [x1, #32]
-	ldp	 x12, x13, [x1]
+	ldp	x12, x13, [x1, #16]
 	orr	w14, wzr, #0x1
-	adds	 x13, x11, x13
-	adcs	x8, x8, xzr
+	adds		x9, x11, x9
+	adcs	x12, x12, xzr
 	adcs	x15, xzr, xzr
-	adds	 x12, x12, x9
-	adcs	x13, x13, x10
-	adcs	x8, x8, x11
+	adds		x8, x8, x13
+	adcs	x9, x9, x10
+	adcs	x12, x12, x11
 	adcs	x15, x15, xzr
-	adds	 x11, x12, x11
-	movn	x12, #0
-	adcs	x9, x13, x9
-	adcs	x8, x8, x10
-	adcs	x10, x15, xzr
-	adds	 x11, x10, x11
-	adcs	x9, x10, x9
-	adcs	x8, x8, xzr
-	adcs	x10, xzr, xzr
-	adds	x13, x11, #1            // =1
+	adds		x8, x8, x11
+	adcs	x9, x9, x13
+	adcs	x10, x12, x10
+	adcs	x12, x15, xzr
+	adds		x8, x12, x8
+	adcs	x9, x12, x9
+	adcs	x10, x10, xzr
+	adcs	x12, xzr, xzr
+	adds	x13, x8, #1             // =1
 	adcs	x14, x9, x14
-	adcs	x15, x8, xzr
-	adcs	x10, x10, x12
-	tst	 x10, #0x1
-	csel	x10, x11, x13, ne
+	mov	x11, #-1
+	adcs	x15, x10, xzr
+	adcs	x11, x12, x11
+	tst	 x11, #0x1
+	csel	x8, x8, x13, ne
 	csel	x9, x9, x14, ne
-	csel	x8, x8, x15, ne
-	stp	 x10, x9, [x0]
-	str	x8, [x0, #16]
+	csel	x10, x10, x15, ne
+	stp		x8, x9, [x0]
+	str	x10, [x0, #16]
 	ret
 .Lfunc_end1:
 	.size	mcl_fpDbl_mod_NIST_P192L, .Lfunc_end1-mcl_fpDbl_mod_NIST_P192L
 
 	.globl	mcl_fp_sqr_NIST_P192L
-	.align	2
+	.p2align	2
 	.type	mcl_fp_sqr_NIST_P192L,@function
 mcl_fp_sqr_NIST_P192L:                  // @mcl_fp_sqr_NIST_P192L
 // BB#0:
-	ldp	 x8, x9, [x1]
-	ldr	x10, [x1, #16]
-	orr	w11, wzr, #0x1
-	umulh	x12, x8, x8
-	mul	 x13, x9, x8
-	mul	 x14, x10, x8
-	umulh	x15, x9, x8
-	adds	 x12, x12, x13
-	umulh	x16, x10, x8
-	adcs	x17, x15, x14
-	adcs	x18, x16, xzr
-	mul	 x1, x9, x9
-	mul	 x2, x10, x9
-	adds	 x15, x15, x1
-	umulh	x1, x9, x9
-	umulh	x9, x10, x9
-	adcs	x1, x1, x2
-	adcs	x3, x9, xzr
-	adds	 x12, x13, x12
-	adcs	x13, x15, x17
-	adcs	x15, x1, x18
-	movn	x17, #0
+	ldp	x10, x8, [x1, #8]
+	ldr		x9, [x1]
+	umulh	x16, x8, x10
+	mul		x14, x10, x9
+	umulh	x15, x9, x9
+	mul		x12, x8, x9
+	umulh	x13, x10, x9
+	adds		x15, x15, x14
+	umulh	x11, x8, x9
+	adcs	x1, x13, x12
+	mul		x17, x8, x10
 	umulh	x18, x10, x10
-	mul	 x10, x10, x10
-	mul	 x8, x8, x8
-	adcs	x1, x3, xzr
-	adds	 x16, x16, x2
-	adcs	x9, x9, x10
-	adcs	x10, x18, xzr
-	adds	 x13, x14, x13
-	adcs	x14, x16, x15
-	adcs	x9, x9, x1
+	mul		x10, x10, x10
+	adcs	x2, x11, xzr
+	adds		x10, x13, x10
+	adcs	x13, x18, x17
+	adcs	x18, x16, xzr
+	adds		x14, x14, x15
+	adcs	x10, x10, x1
+	adcs	x13, x13, x2
+	adcs	x18, x18, xzr
+	umulh	x2, x8, x8
+	mul		x8, x8, x8
+	adds		x11, x11, x17
+	adcs	x8, x16, x8
+	adcs	x16, x2, xzr
+	adds		x10, x12, x10
+	adcs	x11, x11, x13
+	adcs	x8, x8, x18
+	adcs	x12, x16, xzr
+	adds		x13, x14, x12
 	adcs	x10, x10, xzr
-	adds	 x12, x12, x10
-	adcs	x13, x13, xzr
-	adcs	x15, xzr, xzr
-	adds	 x8, x8, x14
-	adcs	x12, x12, x9
-	adcs	x13, x13, x10
-	adcs	x15, x15, xzr
-	adds	 x8, x8, x10
-	adcs	x10, x12, x14
-	adcs	x9, x13, x9
-	adcs	x12, x15, xzr
-	adds	 x8, x12, x8
-	adcs	x10, x12, x10
-	adcs	x9, x9, xzr
-	adcs	x12, xzr, xzr
-	adds	x13, x8, #1             // =1
-	adcs	x11, x10, x11
-	adcs	x14, x9, xzr
-	adcs	x12, x12, x17
-	tst	 x12, #0x1
-	csel	x8, x8, x13, ne
-	csel	x10, x10, x11, ne
-	csel	x9, x9, x14, ne
-	stp	 x8, x10, [x0]
-	str	x9, [x0, #16]
+	mul		x9, x9, x9
+	adcs	x14, xzr, xzr
+	adds		x9, x9, x11
+	adcs	x13, x13, x8
+	adcs	x10, x10, x12
+	adcs	x14, x14, xzr
+	adds		x9, x9, x12
+	adcs	x11, x13, x11
+	adcs	x8, x10, x8
+	adcs	x10, x14, xzr
+	adds		x9, x10, x9
+	adcs	x10, x10, x11
+	adcs	x8, x8, xzr
+	adcs	x11, xzr, xzr
+	orr	w15, wzr, #0x1
+	adds	x12, x9, #1             // =1
+	adcs	x13, x10, x15
+	mov	x1, #-1
+	adcs	x14, x8, xzr
+	adcs	x11, x11, x1
+	tst	 x11, #0x1
+	csel	x9, x9, x12, ne
+	csel	x10, x10, x13, ne
+	csel	x8, x8, x14, ne
+	stp		x9, x10, [x0]
+	str	x8, [x0, #16]
 	ret
 .Lfunc_end2:
 	.size	mcl_fp_sqr_NIST_P192L, .Lfunc_end2-mcl_fp_sqr_NIST_P192L
 
 	.globl	mcl_fp_mulNIST_P192L
-	.align	2
+	.p2align	2
 	.type	mcl_fp_mulNIST_P192L,@function
 mcl_fp_mulNIST_P192L:                   // @mcl_fp_mulNIST_P192L
 // BB#0:
-	stp	x20, x19, [sp, #-32]!
-	stp	x29, x30, [sp, #16]
-	add	x29, sp, #16            // =16
-	sub	sp, sp, #48             // =48
+	sub	sp, sp, #64             // =64
+	stp	x19, x30, [sp, #48]     // 8-byte Folded Spill
 	mov	 x19, x0
 	mov	 x0, sp
 	bl	mcl_fpDbl_mulPre3L
 	ldp	x9, x8, [sp, #8]
 	ldp	x11, x10, [sp, #32]
 	ldr	x12, [sp, #24]
-	ldr	 x13, [sp]
+	ldr		x13, [sp]
 	orr	w14, wzr, #0x1
-	adds	 x9, x10, x9
+	adds		x9, x10, x9
 	adcs	x8, x8, xzr
 	adcs	x15, xzr, xzr
-	adds	 x13, x13, x12
+	adds		x13, x13, x12
 	adcs	x9, x9, x11
 	adcs	x8, x8, x10
 	adcs	x15, x15, xzr
-	adds	 x10, x13, x10
-	movn	x13, #0
+	adds		x10, x13, x10
 	adcs	x9, x9, x12
 	adcs	x8, x8, x11
 	adcs	x11, x15, xzr
-	adds	 x10, x11, x10
+	adds		x10, x11, x10
 	adcs	x9, x11, x9
 	adcs	x8, x8, xzr
 	adcs	x11, xzr, xzr
 	adds	x12, x10, #1            // =1
 	adcs	x14, x9, x14
+	mov	x13, #-1
 	adcs	x15, x8, xzr
 	adcs	x11, x11, x13
 	tst	 x11, #0x1
 	csel	x10, x10, x12, ne
 	csel	x9, x9, x14, ne
 	csel	x8, x8, x15, ne
-	stp	 x10, x9, [x19]
+	stp		x10, x9, [x19]
 	str	x8, [x19, #16]
-	sub	sp, x29, #16            // =16
-	ldp	x29, x30, [sp, #16]
-	ldp	x20, x19, [sp], #32
+	ldp	x19, x30, [sp, #48]     // 8-byte Folded Reload
+	add	sp, sp, #64             // =64
 	ret
 .Lfunc_end3:
 	.size	mcl_fp_mulNIST_P192L, .Lfunc_end3-mcl_fp_mulNIST_P192L
 
 	.globl	mcl_fpDbl_mod_NIST_P521L
-	.align	2
+	.p2align	2
 	.type	mcl_fpDbl_mod_NIST_P521L,@function
 mcl_fpDbl_mod_NIST_P521L:               // @mcl_fpDbl_mod_NIST_P521L
 // BB#0:
-	stp	x29, x30, [sp, #-16]!
-	mov	 x29, sp
 	ldp	x8, x9, [x1, #112]
 	ldr	x10, [x1, #128]
 	ldp	x11, x12, [x1, #96]
@@ -183,7 +178,7 @@ mcl_fpDbl_mod_NIST_P521L:               // @mcl_fpDbl_mod_NIST_P521L
 	ldp	x17, x18, [x1, #48]
 	ldp	x2, x3, [x1, #32]
 	ldp	x4, x5, [x1, #16]
-	ldp	 x6, x1, [x1]
+	ldp		x6, x1, [x1]
 	extr	x7, x10, x9, #9
 	extr	x9, x9, x8, #9
 	extr	x8, x8, x12, #9
@@ -192,13006 +187,6355 @@ mcl_fpDbl_mod_NIST_P521L:               // @mcl_fpDbl_mod_NIST_P521L
 	extr	x14, x14, x13, #9
 	extr	x13, x13, x16, #9
 	extr	x16, x16, x15, #9
-	and	x15, x15, #0x1ff
-	lsr	x10, x10, #9
-	adds	 x16, x16, x6
+	adds		x16, x16, x6
 	adcs	x13, x13, x1
 	adcs	x14, x14, x4
 	adcs	x11, x11, x5
 	adcs	x12, x12, x2
 	adcs	x1, x8, x3
 	adcs	x17, x9, x17
+	and	x15, x15, #0x1ff
+	lsr	x10, x10, #9
 	adcs	x18, x7, x18
 	adcs	x2, x10, x15
 	ubfx	x8, x2, #9, #1
-	adds	 x8, x8, x16
+	adds		x8, x8, x16
 	adcs	x9, x13, xzr
-	and	 x13, x9, x8
+	and		x13, x9, x8
 	adcs	x10, x14, xzr
-	and	 x13, x13, x10
+	and		x13, x13, x10
 	adcs	x11, x11, xzr
-	and	 x13, x13, x11
+	and		x13, x13, x11
 	adcs	x12, x12, xzr
-	and	 x14, x13, x12
+	and		x14, x13, x12
 	adcs	x13, x1, xzr
-	and	 x15, x14, x13
+	and		x15, x14, x13
 	adcs	x14, x17, xzr
-	and	 x16, x15, x14
+	and		x16, x15, x14
 	adcs	x15, x18, xzr
-	and	 x17, x16, x15
+	and		x17, x16, x15
 	adcs	x16, x2, xzr
 	orr	x18, x16, #0xfffffffffffffe00
-	and	 x17, x17, x18
-	cmn	 x17, #1                // =1
+	and		x17, x17, x18
+	cmn		x17, #1         // =1
 	b.eq	.LBB4_2
 // BB#1:                                // %nonzero
-	stp	 x8, x9, [x0]
+	stp		x8, x9, [x0]
+	and	x8, x16, #0x1ff
 	stp	x10, x11, [x0, #16]
 	stp	x12, x13, [x0, #32]
 	stp	x14, x15, [x0, #48]
-	and	x8, x16, #0x1ff
 	str	x8, [x0, #64]
-	ldp	x29, x30, [sp], #16
 	ret
 .LBB4_2:                                // %zero
+	str	x30, [sp, #-16]!        // 8-byte Folded Spill
+	mov	w2, #72
 	mov	 w1, wzr
-	movz	w2, #0x48
 	bl	memset
-	ldp	x29, x30, [sp], #16
+	ldr	x30, [sp], #16          // 8-byte Folded Reload
 	ret
 .Lfunc_end4:
 	.size	mcl_fpDbl_mod_NIST_P521L, .Lfunc_end4-mcl_fpDbl_mod_NIST_P521L
 
-	.globl	mcl_fp_mulUnitPre1L
-	.align	2
-	.type	mcl_fp_mulUnitPre1L,@function
-mcl_fp_mulUnitPre1L:                    // @mcl_fp_mulUnitPre1L
-// BB#0:
-	ldr	 x8, [x1]
-	mul	 x9, x8, x2
-	umulh	x8, x8, x2
-	stp	 x9, x8, [x0]
+	.globl	mulPv192x64
+	.p2align	2
+	.type	mulPv192x64,@function
+mulPv192x64:                            // @mulPv192x64
+// BB#0:
+	ldp	x9, x8, [x0, #8]
+	ldr		x10, [x0]
+	umulh	x11, x8, x1
+	mul		x12, x8, x1
+	umulh	x13, x9, x1
+	mul		x8, x9, x1
+	umulh	x9, x10, x1
+	adds		x8, x9, x8
+	adcs	x2, x13, x12
+	adcs	x3, x11, xzr
+	mul		x0, x10, x1
+	mov	 x1, x8
 	ret
 .Lfunc_end5:
-	.size	mcl_fp_mulUnitPre1L, .Lfunc_end5-mcl_fp_mulUnitPre1L
+	.size	mulPv192x64, .Lfunc_end5-mulPv192x64
 
-	.globl	mcl_fpDbl_mulPre1L
-	.align	2
-	.type	mcl_fpDbl_mulPre1L,@function
-mcl_fpDbl_mulPre1L:                     // @mcl_fpDbl_mulPre1L
+	.globl	mcl_fp_mulUnitPre3L
+	.p2align	2
+	.type	mcl_fp_mulUnitPre3L,@function
+mcl_fp_mulUnitPre3L:                    // @mcl_fp_mulUnitPre3L
 // BB#0:
-	ldr	 x8, [x1]
-	ldr	 x9, [x2]
-	mul	 x10, x9, x8
-	umulh	x8, x9, x8
-	stp	 x10, x8, [x0]
+	ldp		x8, x9, [x1]
+	ldr	x10, [x1, #16]
+	mul		x11, x8, x2
+	mul		x12, x9, x2
+	umulh	x8, x8, x2
+	mul		x13, x10, x2
+	umulh	x9, x9, x2
+	adds		x8, x8, x12
+	umulh	x10, x10, x2
+	stp		x11, x8, [x0]
+	adcs	x8, x9, x13
+	adcs	x9, x10, xzr
+	stp	x8, x9, [x0, #16]
 	ret
 .Lfunc_end6:
-	.size	mcl_fpDbl_mulPre1L, .Lfunc_end6-mcl_fpDbl_mulPre1L
+	.size	mcl_fp_mulUnitPre3L, .Lfunc_end6-mcl_fp_mulUnitPre3L
 
-	.globl	mcl_fpDbl_sqrPre1L
-	.align	2
-	.type	mcl_fpDbl_sqrPre1L,@function
-mcl_fpDbl_sqrPre1L:                     // @mcl_fpDbl_sqrPre1L
+	.globl	mcl_fpDbl_mulPre3L
+	.p2align	2
+	.type	mcl_fpDbl_mulPre3L,@function
+mcl_fpDbl_mulPre3L:                     // @mcl_fpDbl_mulPre3L
 // BB#0:
-	ldr	 x8, [x1]
-	mul	 x9, x8, x8
-	umulh	x8, x8, x8
-	stp	 x9, x8, [x0]
+	str	x19, [sp, #-16]!        // 8-byte Folded Spill
+	ldp		x8, x9, [x1]
+	ldp		x10, x12, [x2]
+	ldr	x11, [x1, #16]
+	ldr	x13, [x2, #16]
+	mul		x14, x8, x10
+	umulh	x15, x11, x10
+	mul		x16, x11, x10
+	umulh	x17, x9, x10
+	mul		x18, x9, x10
+	umulh	x10, x8, x10
+	adds		x10, x10, x18
+	mul		x1, x8, x12
+	mul		x2, x11, x12
+	mul		x3, x9, x12
+	umulh	x4, x11, x12
+	umulh	x5, x9, x12
+	umulh	x12, x8, x12
+	mul		x6, x8, x13
+	mul		x7, x11, x13
+	mul		x19, x9, x13
+	umulh	x8, x8, x13
+	umulh	x9, x9, x13
+	umulh	x11, x11, x13
+	adcs	x13, x17, x16
+	adcs	x15, x15, xzr
+	adds		x10, x1, x10
+	stp		x14, x10, [x0]
+	adcs	x10, x3, x13
+	adcs	x13, x2, x15
+	adcs	x14, xzr, xzr
+	adds		x10, x10, x12
+	adcs	x12, x13, x5
+	adcs	x13, x14, x4
+	adds		x10, x10, x6
+	adcs	x12, x12, x19
+	adcs	x13, x13, x7
+	adcs	x14, xzr, xzr
+	adds		x8, x12, x8
+	stp	x10, x8, [x0, #16]
+	adcs	x8, x13, x9
+	adcs	x9, x14, x11
+	stp	x8, x9, [x0, #32]
+	ldr	x19, [sp], #16          // 8-byte Folded Reload
 	ret
 .Lfunc_end7:
-	.size	mcl_fpDbl_sqrPre1L, .Lfunc_end7-mcl_fpDbl_sqrPre1L
+	.size	mcl_fpDbl_mulPre3L, .Lfunc_end7-mcl_fpDbl_mulPre3L
 
-	.globl	mcl_fp_mont1L
-	.align	2
-	.type	mcl_fp_mont1L,@function
-mcl_fp_mont1L:                          // @mcl_fp_mont1L
+	.globl	mcl_fpDbl_sqrPre3L
+	.p2align	2
+	.type	mcl_fpDbl_sqrPre3L,@function
+mcl_fpDbl_sqrPre3L:                     // @mcl_fpDbl_sqrPre3L
 // BB#0:
-	ldr	 x8, [x2]
-	ldr	 x9, [x1]
-	ldur	x10, [x3, #-8]
-	ldr	 x11, [x3]
+	ldp		x8, x10, [x1]
+	ldr	x9, [x1, #16]
+	mul		x11, x8, x8
 	umulh	x12, x9, x8
-	mul	 x8, x9, x8
-	mul	 x9, x8, x10
-	umulh	x10, x9, x11
-	mul	 x9, x9, x11
-	cmn	 x9, x8
-	adcs	x8, x10, x12
-	adcs	x9, xzr, xzr
-	subs	 x10, x8, x11
-	sbcs	x9, x9, xzr
-	tst	 x9, #0x1
-	csel	x8, x8, x10, ne
-	str	 x8, [x0]
+	mul		x13, x9, x8
+	umulh	x14, x10, x8
+	mul		x15, x10, x8
+	umulh	x8, x8, x8
+	adds		x8, x8, x15
+	adcs	x17, x14, x13
+	adcs	x18, x12, xzr
+	adds		x8, x8, x15
+	mul		x15, x10, x10
+	mul		x16, x9, x10
+	stp		x11, x8, [x0]
+	adcs	x11, x17, x15
+	adcs	x15, x18, x16
+	adcs	x17, xzr, xzr
+	umulh	x8, x9, x10
+	umulh	x10, x10, x10
+	adds		x11, x11, x14
+	adcs	x10, x15, x10
+	adcs	x15, x17, x8
+	adds		x11, x11, x13
+	mul		x14, x9, x9
+	adcs	x10, x10, x16
+	adcs	x13, x15, x14
+	adcs	x14, xzr, xzr
+	adds		x10, x10, x12
+	umulh	x9, x9, x9
+	adcs	x8, x13, x8
+	adcs	x9, x14, x9
+	stp	x11, x10, [x0, #16]
+	stp	x8, x9, [x0, #32]
 	ret
 .Lfunc_end8:
-	.size	mcl_fp_mont1L, .Lfunc_end8-mcl_fp_mont1L
+	.size	mcl_fpDbl_sqrPre3L, .Lfunc_end8-mcl_fpDbl_sqrPre3L
 
-	.globl	mcl_fp_montNF1L
-	.align	2
-	.type	mcl_fp_montNF1L,@function
-mcl_fp_montNF1L:                        // @mcl_fp_montNF1L
+	.globl	mcl_fp_mont3L
+	.p2align	2
+	.type	mcl_fp_mont3L,@function
+mcl_fp_mont3L:                          // @mcl_fp_mont3L
 // BB#0:
-	ldr	 x8, [x2]
-	ldr	 x9, [x1]
-	ldur	x10, [x3, #-8]
-	ldr	 x11, [x3]
-	umulh	x12, x9, x8
-	mul	 x8, x9, x8
-	mul	 x9, x8, x10
-	umulh	x10, x9, x11
-	mul	 x9, x9, x11
-	cmn	 x9, x8
-	adcs	x8, x10, x12
-	sub	 x9, x8, x11
-	cmp	 x9, #0                 // =0
-	csel	x8, x8, x9, lt
-	str	 x8, [x0]
+	str	x23, [sp, #-48]!        // 8-byte Folded Spill
+	stp	x22, x21, [sp, #16]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #32]     // 8-byte Folded Spill
+	ldp		x15, x16, [x2]
+	ldp	x13, x14, [x1, #8]
+	ldr		x12, [x1]
+	ldp	x11, x10, [x3, #-8]
+	ldp	x9, x8, [x3, #8]
+	mul		x3, x13, x15
+	umulh	x4, x12, x15
+	ldr	x17, [x2, #16]
+	umulh	x18, x14, x15
+	mul		x1, x14, x15
+	umulh	x2, x13, x15
+	mul		x15, x12, x15
+	adds		x3, x4, x3
+	mul		x4, x15, x11
+	adcs	x1, x2, x1
+	mul		x22, x4, x9
+	umulh	x23, x4, x10
+	adcs	x18, x18, xzr
+	mul		x2, x4, x8
+	adds		x22, x23, x22
+	umulh	x23, x4, x9
+	adcs	x2, x23, x2
+	umulh	x23, x4, x8
+	mul		x4, x4, x10
+	adcs	x23, x23, xzr
+	cmn		x4, x15
+	umulh	x5, x16, x14
+	mul		x6, x16, x14
+	umulh	x7, x16, x13
+	mul		x19, x16, x13
+	umulh	x20, x16, x12
+	mul		x16, x16, x12
+	umulh	x21, x17, x14
+	mul		x14, x17, x14
+	umulh	x15, x17, x13
+	mul		x13, x17, x13
+	umulh	x4, x17, x12
+	mul		x12, x17, x12
+	adcs	x17, x22, x3
+	adcs	x1, x2, x1
+	adcs	x18, x23, x18
+	adcs	x2, xzr, xzr
+	adds		x3, x20, x19
+	adcs	x6, x7, x6
+	adcs	x5, x5, xzr
+	adds		x16, x17, x16
+	adcs	x17, x1, x3
+	adcs	x18, x18, x6
+	mul		x1, x16, x11
+	adcs	x2, x2, x5
+	mul		x6, x1, x9
+	umulh	x7, x1, x10
+	adcs	x5, xzr, xzr
+	mul		x3, x1, x8
+	adds		x6, x7, x6
+	umulh	x7, x1, x9
+	adcs	x3, x7, x3
+	umulh	x7, x1, x8
+	mul		x1, x1, x10
+	adcs	x7, x7, xzr
+	cmn		x1, x16
+	adcs	x16, x6, x17
+	adcs	x17, x3, x18
+	adcs	x18, x7, x2
+	adcs	x1, x5, xzr
+	adds		x13, x4, x13
+	adcs	x14, x15, x14
+	adcs	x15, x21, xzr
+	adds		x12, x16, x12
+	adcs	x13, x17, x13
+	adcs	x14, x18, x14
+	mul		x11, x12, x11
+	adcs	x15, x1, x15
+	mul		x2, x11, x9
+	umulh	x3, x11, x10
+	adcs	x1, xzr, xzr
+	mul		x17, x11, x8
+	umulh	x18, x11, x9
+	adds		x2, x3, x2
+	umulh	x16, x11, x8
+	adcs	x17, x18, x17
+	mul		x11, x11, x10
+	adcs	x16, x16, xzr
+	cmn		x11, x12
+	adcs	x11, x2, x13
+	adcs	x12, x17, x14
+	adcs	x13, x16, x15
+	adcs	x14, x1, xzr
+	subs		x10, x11, x10
+	sbcs	x9, x12, x9
+	sbcs	x8, x13, x8
+	sbcs	x14, x14, xzr
+	tst	 x14, #0x1
+	csel	x10, x11, x10, ne
+	csel	x9, x12, x9, ne
+	csel	x8, x13, x8, ne
+	stp		x10, x9, [x0]
+	str	x8, [x0, #16]
+	ldp	x20, x19, [sp, #32]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #16]     // 8-byte Folded Reload
+	ldr	x23, [sp], #48          // 8-byte Folded Reload
 	ret
 .Lfunc_end9:
-	.size	mcl_fp_montNF1L, .Lfunc_end9-mcl_fp_montNF1L
+	.size	mcl_fp_mont3L, .Lfunc_end9-mcl_fp_mont3L
 
-	.globl	mcl_fp_montRed1L
-	.align	2
-	.type	mcl_fp_montRed1L,@function
-mcl_fp_montRed1L:                       // @mcl_fp_montRed1L
+	.globl	mcl_fp_montNF3L
+	.p2align	2
+	.type	mcl_fp_montNF3L,@function
+mcl_fp_montNF3L:                        // @mcl_fp_montNF3L
 // BB#0:
-	ldur	x8, [x2, #-8]
-	ldp	 x9, x11, [x1]
-	ldr	 x10, [x2]
-	mul	 x8, x9, x8
-	umulh	x12, x8, x10
-	mul	 x8, x8, x10
-	cmn	 x9, x8
-	adcs	x8, x11, x12
-	adcs	x9, xzr, xzr
-	subs	 x10, x8, x10
-	sbcs	x9, x9, xzr
-	tst	 x9, #0x1
-	csel	x8, x8, x10, ne
-	str	 x8, [x0]
+	str	x21, [sp, #-32]!        // 8-byte Folded Spill
+	stp	x20, x19, [sp, #16]     // 8-byte Folded Spill
+	ldp		x14, x16, [x2]
+	ldp	x15, x13, [x1, #8]
+	ldr		x12, [x1]
+	ldp	x11, x10, [x3, #-8]
+	ldp	x9, x8, [x3, #8]
+	ldr	x17, [x2, #16]
+	mul		x3, x15, x14
+	umulh	x4, x12, x14
+	umulh	x18, x13, x14
+	mul		x1, x13, x14
+	umulh	x2, x15, x14
+	mul		x14, x12, x14
+	adds		x3, x4, x3
+	mul		x4, x14, x11
+	adcs	x1, x2, x1
+	mul		x2, x4, x10
+	adcs	x18, x18, xzr
+	umulh	x5, x16, x13
+	mul		x6, x16, x13
+	umulh	x7, x16, x15
+	mul		x19, x16, x15
+	umulh	x20, x16, x12
+	mul		x16, x16, x12
+	umulh	x21, x17, x13
+	mul		x13, x17, x13
+	cmn		x2, x14
+	umulh	x14, x17, x15
+	mul		x15, x17, x15
+	umulh	x2, x17, x12
+	mul		x12, x17, x12
+	mul		x17, x4, x9
+	adcs	x17, x17, x3
+	mul		x3, x4, x8
+	adcs	x1, x3, x1
+	umulh	x3, x4, x10
+	adcs	x18, x18, xzr
+	adds		x17, x17, x3
+	umulh	x3, x4, x9
+	adcs	x1, x1, x3
+	umulh	x3, x4, x8
+	adcs	x18, x18, x3
+	adds		x3, x20, x19
+	adcs	x4, x7, x6
+	adcs	x5, x5, xzr
+	adds		x16, x16, x17
+	adcs	x17, x3, x1
+	mul		x1, x16, x11
+	adcs	x18, x4, x18
+	mul		x4, x1, x10
+	adcs	x5, x5, xzr
+	cmn		x4, x16
+	mul		x16, x1, x9
+	mul		x3, x1, x8
+	adcs	x16, x16, x17
+	adcs	x18, x3, x18
+	umulh	x4, x1, x8
+	umulh	x17, x1, x9
+	umulh	x1, x1, x10
+	adcs	x3, x5, xzr
+	adds		x16, x16, x1
+	adcs	x17, x18, x17
+	adcs	x18, x3, x4
+	adds		x15, x2, x15
+	adcs	x13, x14, x13
+	adcs	x14, x21, xzr
+	adds		x12, x12, x16
+	adcs	x15, x15, x17
+	mul		x11, x12, x11
+	adcs	x13, x13, x18
+	mul		x18, x11, x10
+	adcs	x14, x14, xzr
+	mul		x17, x11, x9
+	cmn		x18, x12
+	mul		x16, x11, x8
+	adcs	x12, x17, x15
+	adcs	x13, x16, x13
+	umulh	x1, x11, x8
+	umulh	x2, x11, x9
+	umulh	x11, x11, x10
+	adcs	x14, x14, xzr
+	adds		x11, x12, x11
+	adcs	x12, x13, x2
+	adcs	x13, x14, x1
+	subs		x10, x11, x10
+	sbcs	x9, x12, x9
+	sbcs	x8, x13, x8
+	asr	x14, x8, #63
+	cmp		x14, #0         // =0
+	csel	x10, x11, x10, lt
+	csel	x9, x12, x9, lt
+	csel	x8, x13, x8, lt
+	stp		x10, x9, [x0]
+	str	x8, [x0, #16]
+	ldp	x20, x19, [sp, #16]     // 8-byte Folded Reload
+	ldr	x21, [sp], #32          // 8-byte Folded Reload
 	ret
 .Lfunc_end10:
-	.size	mcl_fp_montRed1L, .Lfunc_end10-mcl_fp_montRed1L
+	.size	mcl_fp_montNF3L, .Lfunc_end10-mcl_fp_montNF3L
 
-	.globl	mcl_fp_addPre1L
-	.align	2
-	.type	mcl_fp_addPre1L,@function
-mcl_fp_addPre1L:                        // @mcl_fp_addPre1L
+	.globl	mcl_fp_montRed3L
+	.p2align	2
+	.type	mcl_fp_montRed3L,@function
+mcl_fp_montRed3L:                       // @mcl_fp_montRed3L
 // BB#0:
-	ldr	 x8, [x1]
-	ldr	 x9, [x2]
-	adds	 x9, x9, x8
-	adcs	x8, xzr, xzr
-	str	 x9, [x0]
-	mov	 x0, x8
-	ret
-.Lfunc_end11:
-	.size	mcl_fp_addPre1L, .Lfunc_end11-mcl_fp_addPre1L
+	ldp	x9, x10, [x2, #-8]
+	ldp		x14, x15, [x1]
+	ldp	x11, x8, [x2, #8]
+	ldp	x12, x13, [x1, #16]
+	ldp	x16, x17, [x1, #32]
+	mul		x18, x14, x9
+	mul		x3, x18, x11
+	umulh	x4, x18, x10
+	mul		x2, x18, x8
+	adds		x3, x4, x3
+	umulh	x4, x18, x11
+	umulh	x1, x18, x8
+	adcs	x2, x4, x2
+	mul		x18, x18, x10
+	adcs	x1, x1, xzr
+	cmn		x18, x14
+	adcs	x14, x3, x15
+	adcs	x12, x2, x12
+	mul		x15, x14, x9
+	adcs	x13, x1, x13
+	umulh	x2, x15, x10
+	mul		x3, x15, x11
+	adcs	x4, xzr, xzr
+	umulh	x1, x15, x11
+	adds		x2, x3, x2
+	mul		x3, x15, x8
+	umulh	x18, x15, x8
+	adcs	x1, x3, x1
+	mul		x15, x15, x10
+	adcs	x18, x4, x18
+	cmn		x15, x14
+	adcs	x12, x2, x12
+	adcs	x13, x1, x13
+	mul		x9, x12, x9
+	adcs	x15, x18, x16
+	umulh	x1, x9, x10
+	mul		x2, x9, x11
+	adcs	x3, xzr, xzr
+	umulh	x16, x9, x11
+	mul		x18, x9, x8
+	adds		x1, x2, x1
+	umulh	x14, x9, x8
+	adcs	x16, x18, x16
+	mul		x9, x9, x10
+	adcs	x14, x3, x14
+	cmn		x9, x12
+	adcs	x9, x1, x13
+	adcs	x12, x16, x15
+	adcs	x13, x14, x17
+	subs		x10, x9, x10
+	sbcs	x11, x12, x11
+	sbcs	x8, x13, x8
+	ngcs	 x14, xzr
+	tst	 x14, #0x1
+	csel	x9, x9, x10, ne
+	csel	x10, x12, x11, ne
+	csel	x8, x13, x8, ne
+	stp		x9, x10, [x0]
+	str	x8, [x0, #16]
+	ret
+.Lfunc_end11:
+	.size	mcl_fp_montRed3L, .Lfunc_end11-mcl_fp_montRed3L
 
-	.globl	mcl_fp_subPre1L
-	.align	2
-	.type	mcl_fp_subPre1L,@function
-mcl_fp_subPre1L:                        // @mcl_fp_subPre1L
+	.globl	mcl_fp_montRedNF3L
+	.p2align	2
+	.type	mcl_fp_montRedNF3L,@function
+mcl_fp_montRedNF3L:                     // @mcl_fp_montRedNF3L
 // BB#0:
-	ldr	 x8, [x2]
-	ldr	 x9, [x1]
-	subs	 x9, x9, x8
-	ngcs	 x8, xzr
-	and	x8, x8, #0x1
-	str	 x9, [x0]
-	mov	 x0, x8
+	ldp	x9, x10, [x2, #-8]
+	ldp		x14, x15, [x1]
+	ldp	x11, x8, [x2, #8]
+	ldp	x12, x13, [x1, #16]
+	ldp	x16, x17, [x1, #32]
+	mul		x18, x14, x9
+	mul		x3, x18, x11
+	umulh	x4, x18, x10
+	mul		x2, x18, x8
+	adds		x3, x4, x3
+	umulh	x4, x18, x11
+	umulh	x1, x18, x8
+	adcs	x2, x4, x2
+	mul		x18, x18, x10
+	adcs	x1, x1, xzr
+	cmn		x18, x14
+	adcs	x14, x3, x15
+	adcs	x12, x2, x12
+	mul		x15, x14, x9
+	adcs	x13, x1, x13
+	umulh	x2, x15, x10
+	mul		x3, x15, x11
+	adcs	x4, xzr, xzr
+	umulh	x1, x15, x11
+	adds		x2, x3, x2
+	mul		x3, x15, x8
+	umulh	x18, x15, x8
+	adcs	x1, x3, x1
+	mul		x15, x15, x10
+	adcs	x18, x4, x18
+	cmn		x15, x14
+	adcs	x12, x2, x12
+	adcs	x13, x1, x13
+	mul		x9, x12, x9
+	adcs	x15, x18, x16
+	umulh	x1, x9, x10
+	mul		x2, x9, x11
+	adcs	x3, xzr, xzr
+	umulh	x16, x9, x11
+	mul		x18, x9, x8
+	adds		x1, x2, x1
+	umulh	x14, x9, x8
+	adcs	x16, x18, x16
+	mul		x9, x9, x10
+	adcs	x14, x3, x14
+	cmn		x9, x12
+	adcs	x9, x1, x13
+	adcs	x12, x16, x15
+	adcs	x13, x14, x17
+	subs		x10, x9, x10
+	sbcs	x11, x12, x11
+	sbcs	x8, x13, x8
+	asr	x14, x8, #63
+	cmp		x14, #0         // =0
+	csel	x9, x9, x10, lt
+	csel	x10, x12, x11, lt
+	csel	x8, x13, x8, lt
+	stp		x9, x10, [x0]
+	str	x8, [x0, #16]
 	ret
 .Lfunc_end12:
-	.size	mcl_fp_subPre1L, .Lfunc_end12-mcl_fp_subPre1L
+	.size	mcl_fp_montRedNF3L, .Lfunc_end12-mcl_fp_montRedNF3L
 
-	.globl	mcl_fp_shr1_1L
-	.align	2
-	.type	mcl_fp_shr1_1L,@function
-mcl_fp_shr1_1L:                         // @mcl_fp_shr1_1L
+	.globl	mcl_fp_addPre3L
+	.p2align	2
+	.type	mcl_fp_addPre3L,@function
+mcl_fp_addPre3L:                        // @mcl_fp_addPre3L
 // BB#0:
-	ldr	 x8, [x1]
-	lsr	x8, x8, #1
-	str	 x8, [x0]
+	ldp		x8, x9, [x2]
+	ldp		x10, x11, [x1]
+	ldr	x12, [x2, #16]
+	ldr	x13, [x1, #16]
+	adds		x8, x8, x10
+	adcs	x9, x9, x11
+	stp		x8, x9, [x0]
+	adcs	x9, x12, x13
+	adcs	x8, xzr, xzr
+	str	x9, [x0, #16]
+	mov	 x0, x8
 	ret
 .Lfunc_end13:
-	.size	mcl_fp_shr1_1L, .Lfunc_end13-mcl_fp_shr1_1L
+	.size	mcl_fp_addPre3L, .Lfunc_end13-mcl_fp_addPre3L
 
-	.globl	mcl_fp_add1L
-	.align	2
-	.type	mcl_fp_add1L,@function
-mcl_fp_add1L:                           // @mcl_fp_add1L
+	.globl	mcl_fp_subPre3L
+	.p2align	2
+	.type	mcl_fp_subPre3L,@function
+mcl_fp_subPre3L:                        // @mcl_fp_subPre3L
 // BB#0:
-	ldr	 x8, [x1]
-	ldr	 x9, [x2]
-	ldr	 x10, [x3]
-	adds	 x8, x9, x8
-	str	 x8, [x0]
-	adcs	x9, xzr, xzr
-	subs	 x8, x8, x10
-	sbcs	x9, x9, xzr
-	and	w9, w9, #0x1
-	tbnz	w9, #0, .LBB14_2
-// BB#1:                                // %nocarry
-	str	 x8, [x0]
-.LBB14_2:                               // %carry
+	ldp		x8, x9, [x2]
+	ldp		x10, x11, [x1]
+	ldr	x12, [x2, #16]
+	ldr	x13, [x1, #16]
+	subs		x8, x10, x8
+	sbcs	x9, x11, x9
+	stp		x8, x9, [x0]
+	sbcs	x9, x13, x12
+	ngcs	 x8, xzr
+	and	x8, x8, #0x1
+	str	x9, [x0, #16]
+	mov	 x0, x8
 	ret
 .Lfunc_end14:
-	.size	mcl_fp_add1L, .Lfunc_end14-mcl_fp_add1L
+	.size	mcl_fp_subPre3L, .Lfunc_end14-mcl_fp_subPre3L
 
-	.globl	mcl_fp_addNF1L
-	.align	2
-	.type	mcl_fp_addNF1L,@function
-mcl_fp_addNF1L:                         // @mcl_fp_addNF1L
+	.globl	mcl_fp_shr1_3L
+	.p2align	2
+	.type	mcl_fp_shr1_3L,@function
+mcl_fp_shr1_3L:                         // @mcl_fp_shr1_3L
 // BB#0:
-	ldr	 x8, [x1]
-	ldr	 x9, [x2]
-	ldr	 x10, [x3]
-	add	 x8, x9, x8
-	sub	 x9, x8, x10
-	cmp	 x9, #0                 // =0
-	csel	x8, x8, x9, lt
-	str	 x8, [x0]
+	ldp		x8, x9, [x1]
+	ldr	x10, [x1, #16]
+	extr	x8, x9, x8, #1
+	extr	x9, x10, x9, #1
+	lsr	x10, x10, #1
+	stp		x8, x9, [x0]
+	str	x10, [x0, #16]
 	ret
 .Lfunc_end15:
-	.size	mcl_fp_addNF1L, .Lfunc_end15-mcl_fp_addNF1L
+	.size	mcl_fp_shr1_3L, .Lfunc_end15-mcl_fp_shr1_3L
 
-	.globl	mcl_fp_sub1L
-	.align	2
-	.type	mcl_fp_sub1L,@function
-mcl_fp_sub1L:                           // @mcl_fp_sub1L
+	.globl	mcl_fp_add3L
+	.p2align	2
+	.type	mcl_fp_add3L,@function
+mcl_fp_add3L:                           // @mcl_fp_add3L
 // BB#0:
-	ldr	 x8, [x2]
-	ldr	 x9, [x1]
-	subs	 x8, x9, x8
-	str	 x8, [x0]
-	ngcs	 x9, xzr
-	and	w9, w9, #0x1
-	tbnz	w9, #0, .LBB16_2
+	ldp		x8, x9, [x2]
+	ldp		x10, x11, [x1]
+	ldr	x12, [x2, #16]
+	ldr	x13, [x1, #16]
+	adds		x8, x8, x10
+	adcs	x9, x9, x11
+	ldp		x10, x11, [x3]
+	adcs	x12, x12, x13
+	ldr	x13, [x3, #16]
+	adcs	x14, xzr, xzr
+	subs		x10, x8, x10
+	stp		x8, x9, [x0]
+	sbcs	x9, x9, x11
+	sbcs	x8, x12, x13
+	sbcs	x11, x14, xzr
+	str	x12, [x0, #16]
+	tbnz	w11, #0, .LBB16_2
 // BB#1:                                // %nocarry
-	ret
+	stp		x10, x9, [x0]
+	str	x8, [x0, #16]
 .LBB16_2:                               // %carry
-	ldr	 x9, [x3]
-	add	 x8, x9, x8
-	str	 x8, [x0]
 	ret
 .Lfunc_end16:
-	.size	mcl_fp_sub1L, .Lfunc_end16-mcl_fp_sub1L
+	.size	mcl_fp_add3L, .Lfunc_end16-mcl_fp_add3L
 
-	.globl	mcl_fp_subNF1L
-	.align	2
-	.type	mcl_fp_subNF1L,@function
-mcl_fp_subNF1L:                         // @mcl_fp_subNF1L
+	.globl	mcl_fp_addNF3L
+	.p2align	2
+	.type	mcl_fp_addNF3L,@function
+mcl_fp_addNF3L:                         // @mcl_fp_addNF3L
 // BB#0:
-	ldr	 x8, [x2]
-	ldr	 x9, [x1]
-	ldr	 x10, [x3]
-	sub	 x8, x9, x8
-	and	x9, x10, x8, asr #63
-	add	 x8, x9, x8
-	str	 x8, [x0]
+	ldp		x8, x9, [x1]
+	ldp		x10, x11, [x2]
+	ldr	x12, [x1, #16]
+	ldr	x13, [x2, #16]
+	ldr	x14, [x3, #16]
+	adds		x8, x10, x8
+	adcs	x9, x11, x9
+	ldp		x10, x11, [x3]
+	adcs	x12, x13, x12
+	subs		x10, x8, x10
+	sbcs	x11, x9, x11
+	sbcs	x13, x12, x14
+	asr	x14, x13, #63
+	cmp		x14, #0         // =0
+	csel	x8, x8, x10, lt
+	csel	x9, x9, x11, lt
+	csel	x10, x12, x13, lt
+	stp		x8, x9, [x0]
+	str	x10, [x0, #16]
 	ret
 .Lfunc_end17:
-	.size	mcl_fp_subNF1L, .Lfunc_end17-mcl_fp_subNF1L
+	.size	mcl_fp_addNF3L, .Lfunc_end17-mcl_fp_addNF3L
 
-	.globl	mcl_fpDbl_add1L
-	.align	2
-	.type	mcl_fpDbl_add1L,@function
-mcl_fpDbl_add1L:                        // @mcl_fpDbl_add1L
+	.globl	mcl_fp_sub3L
+	.p2align	2
+	.type	mcl_fp_sub3L,@function
+mcl_fp_sub3L:                           // @mcl_fp_sub3L
 // BB#0:
-	ldp	 x8, x11, [x1]
-	ldp	 x9, x10, [x2]
-	ldr	 x12, [x3]
-	adds	 x8, x9, x8
-	str	 x8, [x0]
-	adcs	x8, x10, x11
-	adcs	x9, xzr, xzr
-	subs	 x10, x8, x12
-	sbcs	x9, x9, xzr
-	tst	 x9, #0x1
-	csel	x8, x8, x10, ne
-	str	x8, [x0, #8]
+	ldp		x8, x9, [x2]
+	ldp		x10, x11, [x1]
+	ldr	x12, [x2, #16]
+	ldr	x13, [x1, #16]
+	subs		x10, x10, x8
+	sbcs	x8, x11, x9
+	sbcs	x9, x13, x12
+	ngcs	 x11, xzr
+	stp		x10, x8, [x0]
+	str	x9, [x0, #16]
+	tbnz	w11, #0, .LBB18_2
+// BB#1:                                // %nocarry
+	ret
+.LBB18_2:                               // %carry
+	ldp		x11, x12, [x3]
+	ldr	x13, [x3, #16]
+	adds		x10, x11, x10
+	adcs	x8, x12, x8
+	stp		x10, x8, [x0]
+	adcs	x8, x13, x9
+	str	x8, [x0, #16]
 	ret
 .Lfunc_end18:
-	.size	mcl_fpDbl_add1L, .Lfunc_end18-mcl_fpDbl_add1L
+	.size	mcl_fp_sub3L, .Lfunc_end18-mcl_fp_sub3L
 
-	.globl	mcl_fpDbl_sub1L
-	.align	2
-	.type	mcl_fpDbl_sub1L,@function
-mcl_fpDbl_sub1L:                        // @mcl_fpDbl_sub1L
+	.globl	mcl_fp_subNF3L
+	.p2align	2
+	.type	mcl_fp_subNF3L,@function
+mcl_fp_subNF3L:                         // @mcl_fp_subNF3L
 // BB#0:
-	ldp	 x8, x11, [x1]
-	ldp	 x9, x10, [x2]
-	ldr	 x12, [x3]
-	subs	 x8, x8, x9
-	str	 x8, [x0]
-	sbcs	x8, x11, x10
-	ngcs	 x9, xzr
-	tst	 x9, #0x1
-	csel	x9, x12, xzr, ne
-	add	 x8, x9, x8
-	str	x8, [x0, #8]
+	ldp		x8, x9, [x2]
+	ldp		x10, x11, [x1]
+	ldr	x12, [x2, #16]
+	ldr	x13, [x1, #16]
+	ldr	x14, [x3, #16]
+	subs		x8, x10, x8
+	sbcs	x9, x11, x9
+	ldp		x10, x11, [x3]
+	sbcs	x12, x13, x12
+	asr	x13, x12, #63
+	and		x14, x13, x14
+	and		x11, x13, x11
+	extr	x13, x13, x12, #63
+	and		x10, x13, x10
+	adds		x8, x10, x8
+	adcs	x9, x11, x9
+	stp		x8, x9, [x0]
+	adcs	x8, x14, x12
+	str	x8, [x0, #16]
 	ret
 .Lfunc_end19:
-	.size	mcl_fpDbl_sub1L, .Lfunc_end19-mcl_fpDbl_sub1L
+	.size	mcl_fp_subNF3L, .Lfunc_end19-mcl_fp_subNF3L
 
-	.globl	mcl_fp_mulUnitPre2L
-	.align	2
-	.type	mcl_fp_mulUnitPre2L,@function
-mcl_fp_mulUnitPre2L:                    // @mcl_fp_mulUnitPre2L
+	.globl	mcl_fpDbl_add3L
+	.p2align	2
+	.type	mcl_fpDbl_add3L,@function
+mcl_fpDbl_add3L:                        // @mcl_fpDbl_add3L
 // BB#0:
-	ldp	 x8, x9, [x1]
-	mul	 x10, x8, x2
-	mul	 x11, x9, x2
-	umulh	x8, x8, x2
-	umulh	x9, x9, x2
-	adds	 x8, x8, x11
-	stp	 x10, x8, [x0]
-	adcs	x8, x9, xzr
-	str	x8, [x0, #16]
+	ldp		x14, x15, [x2]
+	ldp		x16, x17, [x1]
+	ldp	x10, x11, [x1, #32]
+	ldp	x12, x13, [x2, #16]
+	ldp	x18, x1, [x1, #16]
+	ldp	x8, x9, [x2, #32]
+	adds		x14, x14, x16
+	adcs	x15, x15, x17
+	adcs	x12, x12, x18
+	ldr		x17, [x3]
+	adcs	x13, x13, x1
+	ldp	x2, x16, [x3, #8]
+	adcs	x8, x8, x10
+	adcs	x9, x9, x11
+	adcs	x10, xzr, xzr
+	subs		x11, x13, x17
+	stp		x14, x15, [x0]
+	sbcs	x14, x8, x2
+	sbcs	x15, x9, x16
+	sbcs	x10, x10, xzr
+	tst	 x10, #0x1
+	csel	x10, x13, x11, ne
+	csel	x8, x8, x14, ne
+	csel	x9, x9, x15, ne
+	stp	x12, x10, [x0, #16]
+	stp	x8, x9, [x0, #32]
 	ret
 .Lfunc_end20:
-	.size	mcl_fp_mulUnitPre2L, .Lfunc_end20-mcl_fp_mulUnitPre2L
+	.size	mcl_fpDbl_add3L, .Lfunc_end20-mcl_fpDbl_add3L
 
-	.globl	mcl_fpDbl_mulPre2L
-	.align	2
-	.type	mcl_fpDbl_mulPre2L,@function
-mcl_fpDbl_mulPre2L:                     // @mcl_fpDbl_mulPre2L
+	.globl	mcl_fpDbl_sub3L
+	.p2align	2
+	.type	mcl_fpDbl_sub3L,@function
+mcl_fpDbl_sub3L:                        // @mcl_fpDbl_sub3L
 // BB#0:
-	ldp	 x8, x11, [x2]
-	ldp	 x9, x10, [x1]
-	mul	 x12, x9, x8
-	umulh	x13, x10, x8
-	mul	 x14, x10, x8
-	umulh	x8, x9, x8
-	mul	 x15, x9, x11
-	mul	 x16, x10, x11
-	umulh	x9, x9, x11
-	umulh	x10, x10, x11
-	adds	 x8, x8, x14
-	adcs	x11, x13, xzr
-	adds	 x8, x8, x15
-	stp	 x12, x8, [x0]
-	adcs	x8, x11, x16
-	adcs	x11, xzr, xzr
-	adds	 x8, x8, x9
-	str	x8, [x0, #16]
-	adcs	x8, x11, x10
-	str	x8, [x0, #24]
+	ldp		x14, x15, [x2]
+	ldp		x16, x17, [x1]
+	ldp	x10, x11, [x1, #32]
+	ldp	x12, x13, [x2, #16]
+	ldp	x18, x1, [x1, #16]
+	ldp	x8, x9, [x2, #32]
+	subs		x14, x16, x14
+	sbcs	x15, x17, x15
+	sbcs	x12, x18, x12
+	sbcs	x13, x1, x13
+	ldr		x17, [x3]
+	sbcs	x8, x10, x8
+	ldp	x2, x16, [x3, #8]
+	sbcs	x9, x11, x9
+	ngcs	 x10, xzr
+	tst	 x10, #0x1
+	stp		x14, x15, [x0]
+	csel	x14, x17, xzr, ne
+	csel	x10, x16, xzr, ne
+	csel	x11, x2, xzr, ne
+	adds		x13, x14, x13
+	adcs	x8, x11, x8
+	adcs	x9, x10, x9
+	stp	x12, x13, [x0, #16]
+	stp	x8, x9, [x0, #32]
 	ret
 .Lfunc_end21:
-	.size	mcl_fpDbl_mulPre2L, .Lfunc_end21-mcl_fpDbl_mulPre2L
-
-	.globl	mcl_fpDbl_sqrPre2L
-	.align	2
-	.type	mcl_fpDbl_sqrPre2L,@function
-mcl_fpDbl_sqrPre2L:                     // @mcl_fpDbl_sqrPre2L
-// BB#0:
-	ldp	 x8, x9, [x1]
-	mul	 x10, x8, x8
-	umulh	x11, x9, x8
-	mul	 x12, x9, x8
-	umulh	x8, x8, x8
-	umulh	x13, x9, x9
-	mul	 x9, x9, x9
-	str	 x10, [x0]
-	adds	 x8, x8, x12
-	adcs	x10, x11, xzr
-	adds	 x9, x11, x9
-	adcs	x11, x13, xzr
-	adds	 x8, x12, x8
-	str	x8, [x0, #8]
-	adcs	x8, x9, x10
-	str	x8, [x0, #16]
-	adcs	x8, x11, xzr
-	str	x8, [x0, #24]
+	.size	mcl_fpDbl_sub3L, .Lfunc_end21-mcl_fpDbl_sub3L
+
+	.globl	mulPv256x64
+	.p2align	2
+	.type	mulPv256x64,@function
+mulPv256x64:                            // @mulPv256x64
+// BB#0:
+	ldp	x9, x8, [x0, #16]
+	ldp		x10, x11, [x0]
+	umulh	x12, x8, x1
+	mul		x13, x8, x1
+	umulh	x15, x11, x1
+	mul		x8, x11, x1
+	umulh	x11, x10, x1
+	umulh	x14, x9, x1
+	mul		x9, x9, x1
+	adds		x8, x11, x8
+	adcs	x2, x15, x9
+	adcs	x3, x14, x13
+	adcs	x4, x12, xzr
+	mul		x0, x10, x1
+	mov	 x1, x8
 	ret
 .Lfunc_end22:
-	.size	mcl_fpDbl_sqrPre2L, .Lfunc_end22-mcl_fpDbl_sqrPre2L
+	.size	mulPv256x64, .Lfunc_end22-mulPv256x64
 
-	.globl	mcl_fp_mont2L
-	.align	2
-	.type	mcl_fp_mont2L,@function
-mcl_fp_mont2L:                          // @mcl_fp_mont2L
+	.globl	mcl_fp_mulUnitPre4L
+	.p2align	2
+	.type	mcl_fp_mulUnitPre4L,@function
+mcl_fp_mulUnitPre4L:                    // @mcl_fp_mulUnitPre4L
 // BB#0:
-	ldp	 x8, x14, [x2]
-	ldp	 x9, x10, [x1]
-	ldur	x11, [x3, #-8]
-	ldp	 x12, x13, [x3]
-	umulh	x15, x10, x8
-	mul	 x16, x10, x8
-	umulh	x17, x9, x8
-	mul	 x8, x9, x8
-	umulh	x18, x14, x10
-	mul	 x10, x14, x10
-	umulh	x1, x14, x9
-	mul	 x9, x14, x9
-	adds	 x14, x17, x16
-	mul	 x16, x8, x11
-	adcs	x15, x15, xzr
-	mul	 x17, x16, x13
-	umulh	x2, x16, x12
-	adds	 x17, x2, x17
-	umulh	x2, x16, x13
-	mul	 x16, x16, x12
-	adcs	x2, x2, xzr
-	cmn	 x16, x8
-	adcs	x8, x17, x14
-	adcs	x14, x2, x15
-	adcs	x15, xzr, xzr
-	adds	 x10, x1, x10
-	adcs	x16, x18, xzr
-	adds	 x8, x8, x9
-	adcs	x9, x14, x10
-	mul	 x10, x8, x11
-	adcs	x11, x15, x16
-	umulh	x14, x10, x13
-	mul	 x15, x10, x13
-	umulh	x16, x10, x12
-	mul	 x10, x10, x12
-	adcs	x17, xzr, xzr
-	adds	 x15, x16, x15
-	adcs	x14, x14, xzr
-	cmn	 x10, x8
-	adcs	x8, x15, x9
-	adcs	x9, x14, x11
-	adcs	x10, x17, xzr
-	subs	 x11, x8, x12
-	sbcs	x12, x9, x13
-	sbcs	x10, x10, xzr
-	tst	 x10, #0x1
-	csel	x8, x8, x11, ne
-	csel	x9, x9, x12, ne
-	stp	 x8, x9, [x0]
+	ldp		x8, x9, [x1]
+	ldp	x10, x11, [x1, #16]
+	mul		x12, x8, x2
+	mul		x13, x9, x2
+	umulh	x8, x8, x2
+	mul		x14, x10, x2
+	umulh	x9, x9, x2
+	adds		x8, x8, x13
+	mul		x15, x11, x2
+	umulh	x10, x10, x2
+	stp		x12, x8, [x0]
+	adcs	x8, x9, x14
+	umulh	x11, x11, x2
+	adcs	x9, x10, x15
+	stp	x8, x9, [x0, #16]
+	adcs	x8, x11, xzr
+	str	x8, [x0, #32]
 	ret
 .Lfunc_end23:
-	.size	mcl_fp_mont2L, .Lfunc_end23-mcl_fp_mont2L
+	.size	mcl_fp_mulUnitPre4L, .Lfunc_end23-mcl_fp_mulUnitPre4L
 
-	.globl	mcl_fp_montNF2L
-	.align	2
-	.type	mcl_fp_montNF2L,@function
-mcl_fp_montNF2L:                        // @mcl_fp_montNF2L
+	.globl	mcl_fpDbl_mulPre4L
+	.p2align	2
+	.type	mcl_fpDbl_mulPre4L,@function
+mcl_fpDbl_mulPre4L:                     // @mcl_fpDbl_mulPre4L
 // BB#0:
-	ldp	 x8, x14, [x2]
-	ldp	 x9, x10, [x1]
-	ldur	x11, [x3, #-8]
-	ldp	 x12, x13, [x3]
-	umulh	x15, x10, x8
-	mul	 x16, x10, x8
-	umulh	x17, x9, x8
-	mul	 x8, x9, x8
-	umulh	x18, x14, x10
-	mul	 x10, x14, x10
-	umulh	x1, x14, x9
-	mul	 x9, x14, x9
-	adds	 x14, x17, x16
-	mul	 x16, x8, x11
-	adcs	x15, x15, xzr
-	mul	 x17, x16, x12
-	cmn	 x17, x8
-	mul	 x8, x16, x13
-	umulh	x17, x16, x13
-	umulh	x16, x16, x12
-	adcs	x8, x8, x14
-	adcs	x14, x15, xzr
-	adds	 x8, x8, x16
-	adcs	x14, x14, x17
-	adds	 x10, x1, x10
-	adcs	x15, x18, xzr
-	adds	 x8, x9, x8
-	adcs	x9, x10, x14
-	mul	 x10, x8, x11
-	adcs	x11, x15, xzr
-	mul	 x14, x10, x13
-	mul	 x15, x10, x12
-	umulh	x16, x10, x13
-	umulh	x10, x10, x12
-	cmn	 x15, x8
-	adcs	x8, x14, x9
-	adcs	x9, x11, xzr
-	adds	 x8, x8, x10
-	adcs	x9, x9, x16
-	subs	 x10, x8, x12
-	sbcs	x11, x9, x13
-	cmp	 x11, #0                // =0
-	csel	x8, x8, x10, lt
-	csel	x9, x9, x11, lt
-	stp	 x8, x9, [x0]
+	sub	sp, sp, #128            // =128
+	ldp		x8, x10, [x1]
+	ldp		x15, x16, [x2]
+	ldp	x12, x14, [x1, #16]
+	ldp	x17, x18, [x2, #16]
+	ldp		x9, x11, [x1]
+	ldp	x13, x1, [x1, #16]
+	mul		x2, x8, x15
+	stp	x20, x19, [sp, #96]     // 8-byte Folded Spill
+	str	x2, [sp, #24]           // 8-byte Folded Spill
+	umulh	x2, x14, x15
+	mul		x4, x14, x15
+	umulh	x5, x12, x15
+	mul		x6, x12, x15
+	umulh	x7, x10, x15
+	mul		x19, x10, x15
+	umulh	x15, x8, x15
+	stp	x28, x27, [sp, #32]     // 8-byte Folded Spill
+	stp	x24, x23, [sp, #64]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #80]     // 8-byte Folded Spill
+	mul		x21, x14, x16
+	umulh	x24, x14, x16
+	mul		x28, x14, x17
+	umulh	x14, x14, x17
+	adds		x15, x15, x19
+	stp	x26, x25, [sp, #48]     // 8-byte Folded Spill
+	stp	x29, x30, [sp, #112]    // 8-byte Folded Spill
+	mul		x20, x8, x16
+	mul		x22, x12, x16
+	mul		x23, x10, x16
+	umulh	x25, x12, x16
+	umulh	x26, x10, x16
+	umulh	x16, x8, x16
+	mul		x27, x8, x17
+	mul		x29, x12, x17
+	mul		x30, x10, x17
+	stp	x2, x14, [sp, #8]       // 8-byte Folded Spill
+	umulh	x3, x12, x17
+	umulh	x2, x10, x17
+	umulh	x14, x8, x17
+	mul		x17, x9, x18
+	umulh	x12, x9, x18
+	mul		x10, x11, x18
+	umulh	x11, x11, x18
+	mul		x9, x13, x18
+	umulh	x13, x13, x18
+	mul		x8, x1, x18
+	umulh	x18, x1, x18
+	adcs	x1, x7, x6
+	adcs	x4, x5, x4
+	ldr	x5, [sp, #8]            // 8-byte Folded Reload
+	ldr	x6, [sp, #24]           // 8-byte Folded Reload
+	adcs	x5, x5, xzr
+	adds		x15, x20, x15
+	stp		x6, x15, [x0]
+	adcs	x15, x23, x1
+	adcs	x1, x22, x4
+	adcs	x4, x21, x5
+	adcs	x5, xzr, xzr
+	adds		x15, x15, x16
+	adcs	x16, x1, x26
+	adcs	x1, x4, x25
+	adcs	x4, x5, x24
+	adds		x15, x27, x15
+	adcs	x16, x30, x16
+	adcs	x1, x29, x1
+	adcs	x4, x28, x4
+	adcs	x5, xzr, xzr
+	adds		x14, x16, x14
+	adcs	x16, x1, x2
+	ldr	x2, [sp, #16]           // 8-byte Folded Reload
+	adcs	x1, x4, x3
+	ldp	x29, x30, [sp, #112]    // 8-byte Folded Reload
+	ldp	x20, x19, [sp, #96]     // 8-byte Folded Reload
+	adcs	x2, x5, x2
+	adds		x14, x17, x14
+	adcs	x10, x10, x16
+	adcs	x9, x9, x1
+	adcs	x8, x8, x2
+	stp	x15, x14, [x0, #16]
+	adcs	x14, xzr, xzr
+	adds		x10, x10, x12
+	adcs	x9, x9, x11
+	ldp	x22, x21, [sp, #80]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #64]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #48]     // 8-byte Folded Reload
+	ldp	x28, x27, [sp, #32]     // 8-byte Folded Reload
+	adcs	x8, x8, x13
+	stp	x10, x9, [x0, #32]
+	adcs	x9, x14, x18
+	stp	x8, x9, [x0, #48]
+	add	sp, sp, #128            // =128
 	ret
 .Lfunc_end24:
-	.size	mcl_fp_montNF2L, .Lfunc_end24-mcl_fp_montNF2L
+	.size	mcl_fpDbl_mulPre4L, .Lfunc_end24-mcl_fpDbl_mulPre4L
 
-	.globl	mcl_fp_montRed2L
-	.align	2
-	.type	mcl_fp_montRed2L,@function
-mcl_fp_montRed2L:                       // @mcl_fp_montRed2L
+	.globl	mcl_fpDbl_sqrPre4L
+	.p2align	2
+	.type	mcl_fpDbl_sqrPre4L,@function
+mcl_fpDbl_sqrPre4L:                     // @mcl_fpDbl_sqrPre4L
 // BB#0:
-	ldur	x8, [x2, #-8]
-	ldp	 x9, x14, [x1]
-	ldp	 x10, x11, [x2]
-	ldp	x12, x13, [x1, #16]
-	mul	 x15, x9, x8
-	mul	 x16, x15, x11
-	umulh	x17, x15, x10
-	adds	 x16, x17, x16
-	umulh	x17, x15, x11
-	mul	 x15, x15, x10
-	adcs	x17, x17, xzr
-	cmn	 x9, x15
-	adcs	x9, x14, x16
-	adcs	x12, x12, x17
-	mul	 x8, x9, x8
-	adcs	x13, x13, xzr
-	umulh	x14, x8, x11
-	mul	 x15, x8, x11
-	umulh	x16, x8, x10
-	mul	 x8, x8, x10
-	adcs	x17, xzr, xzr
-	adds	 x15, x16, x15
-	adcs	x14, x14, xzr
-	cmn	 x8, x9
-	adcs	x8, x15, x12
-	adcs	x9, x14, x13
-	adcs	x12, x17, xzr
-	subs	 x10, x8, x10
-	sbcs	x11, x9, x11
-	sbcs	x12, x12, xzr
-	tst	 x12, #0x1
-	csel	x8, x8, x10, ne
-	csel	x9, x9, x11, ne
-	stp	 x8, x9, [x0]
+	ldp		x8, x9, [x1]
+	ldp		x10, x13, [x1]
+	ldp	x11, x12, [x1, #16]
+	ldr	x14, [x1, #16]
+	ldr	x1, [x1, #24]
+	mul		x15, x10, x10
+	umulh	x16, x12, x10
+	mul		x17, x12, x10
+	umulh	x18, x14, x10
+	mul		x2, x14, x10
+	umulh	x3, x9, x10
+	mul		x4, x9, x10
+	umulh	x10, x10, x10
+	adds		x10, x10, x4
+	adcs	x5, x3, x2
+	adcs	x17, x18, x17
+	adcs	x16, x16, xzr
+	adds		x10, x4, x10
+	stp		x15, x10, [x0]
+	mul		x10, x9, x9
+	adcs	x10, x10, x5
+	mul		x15, x14, x9
+	mul		x4, x12, x9
+	adcs	x17, x15, x17
+	adcs	x16, x4, x16
+	adcs	x4, xzr, xzr
+	adds		x10, x10, x3
+	umulh	x3, x9, x9
+	adcs	x17, x17, x3
+	umulh	x3, x12, x9
+	umulh	x9, x14, x9
+	adcs	x16, x16, x9
+	adcs	x3, x4, x3
+	adds		x10, x2, x10
+	adcs	x15, x15, x17
+	mul		x17, x14, x14
+	mul		x2, x12, x14
+	adcs	x16, x17, x16
+	adcs	x2, x2, x3
+	adcs	x3, xzr, xzr
+	adds		x15, x15, x18
+	umulh	x12, x12, x14
+	umulh	x14, x14, x14
+	adcs	x9, x16, x9
+	adcs	x14, x2, x14
+	mul		x17, x8, x1
+	adcs	x12, x3, x12
+	mul		x16, x13, x1
+	adds		x15, x17, x15
+	mul		x18, x11, x1
+	adcs	x9, x16, x9
+	mul		x2, x1, x1
+	stp	x10, x15, [x0, #16]
+	adcs	x10, x18, x14
+	adcs	x12, x2, x12
+	umulh	x8, x8, x1
+	adcs	x14, xzr, xzr
+	umulh	x13, x13, x1
+	adds		x8, x9, x8
+	umulh	x11, x11, x1
+	adcs	x9, x10, x13
+	umulh	x1, x1, x1
+	stp	x8, x9, [x0, #32]
+	adcs	x8, x12, x11
+	adcs	x9, x14, x1
+	stp	x8, x9, [x0, #48]
 	ret
 .Lfunc_end25:
-	.size	mcl_fp_montRed2L, .Lfunc_end25-mcl_fp_montRed2L
+	.size	mcl_fpDbl_sqrPre4L, .Lfunc_end25-mcl_fpDbl_sqrPre4L
 
-	.globl	mcl_fp_addPre2L
-	.align	2
-	.type	mcl_fp_addPre2L,@function
-mcl_fp_addPre2L:                        // @mcl_fp_addPre2L
+	.globl	mcl_fp_mont4L
+	.p2align	2
+	.type	mcl_fp_mont4L,@function
+mcl_fp_mont4L:                          // @mcl_fp_mont4L
 // BB#0:
-	ldp	 x8, x11, [x1]
-	ldp	 x9, x10, [x2]
-	adds	 x8, x9, x8
-	str	 x8, [x0]
-	adcs	x9, x10, x11
-	adcs	x8, xzr, xzr
-	str	x9, [x0, #8]
-	mov	 x0, x8
+	sub	sp, sp, #112            // =112
+	stp	x28, x27, [sp, #16]     // 8-byte Folded Spill
+	stp	x26, x25, [sp, #32]     // 8-byte Folded Spill
+	stp	x24, x23, [sp, #48]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #64]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #80]     // 8-byte Folded Spill
+	stp	x29, x30, [sp, #96]     // 8-byte Folded Spill
+	str	x0, [sp, #8]            // 8-byte Folded Spill
+	ldp		x14, x15, [x1]
+	ldp		x17, x18, [x2]
+	ldp	x13, x16, [x1, #16]
+	ldp	x0, x11, [x3, #-8]
+	ldr	x10, [x3, #8]
+	mul		x19, x15, x17
+	umulh	x20, x14, x17
+	ldp	x9, x8, [x3, #16]
+	mul		x6, x13, x17
+	umulh	x7, x15, x17
+	adds		x19, x20, x19
+	umulh	x3, x16, x17
+	mul		x4, x16, x17
+	umulh	x5, x13, x17
+	mul		x17, x14, x17
+	adcs	x6, x7, x6
+	mul		x20, x17, x0
+	adcs	x4, x5, x4
+	mul		x30, x20, x10
+	umulh	x5, x20, x11
+	adcs	x3, x3, xzr
+	mul		x29, x20, x9
+	adds		x5, x5, x30
+	umulh	x30, x20, x10
+	mul		x7, x20, x8
+	adcs	x29, x30, x29
+	umulh	x30, x20, x9
+	adcs	x7, x30, x7
+	umulh	x30, x20, x8
+	mul		x20, x20, x11
+	adcs	x30, x30, xzr
+	cmn		x20, x17
+	adcs	x5, x5, x19
+	adcs	x6, x29, x6
+	adcs	x4, x7, x4
+	adcs	x3, x30, x3
+	mul		x26, x18, x15
+	umulh	x27, x18, x14
+	adcs	x30, xzr, xzr
+	mul		x24, x18, x13
+	umulh	x25, x18, x15
+	adds		x26, x27, x26
+	mul		x22, x18, x16
+	umulh	x23, x18, x13
+	adcs	x24, x25, x24
+	umulh	x21, x18, x16
+	adcs	x22, x23, x22
+	mul		x18, x18, x14
+	adcs	x21, x21, xzr
+	adds		x18, x5, x18
+	adcs	x5, x6, x26
+	adcs	x4, x4, x24
+	adcs	x3, x3, x22
+	mul		x6, x18, x0
+	adcs	x21, x30, x21
+	mul		x26, x6, x10
+	umulh	x22, x6, x11
+	adcs	x30, xzr, xzr
+	mul		x24, x6, x9
+	adds		x22, x22, x26
+	umulh	x26, x6, x10
+	ldp	x1, x2, [x2, #16]
+	mul		x23, x6, x8
+	adcs	x24, x26, x24
+	umulh	x26, x6, x9
+	adcs	x23, x26, x23
+	umulh	x26, x6, x8
+	mul		x6, x6, x11
+	adcs	x26, x26, xzr
+	cmn		x6, x18
+	umulh	x28, x1, x16
+	mul		x17, x1, x16
+	umulh	x20, x1, x13
+	mul		x19, x1, x13
+	umulh	x29, x1, x15
+	mul		x7, x1, x15
+	umulh	x27, x1, x14
+	mul		x1, x1, x14
+	umulh	x25, x2, x16
+	mul		x16, x2, x16
+	umulh	x18, x2, x13
+	mul		x13, x2, x13
+	umulh	x6, x2, x15
+	mul		x15, x2, x15
+	umulh	x12, x2, x14
+	mul		x14, x2, x14
+	adcs	x2, x22, x5
+	adcs	x4, x24, x4
+	adcs	x3, x23, x3
+	adcs	x5, x26, x21
+	adcs	x21, x30, xzr
+	adds		x7, x27, x7
+	adcs	x19, x29, x19
+	adcs	x17, x20, x17
+	adcs	x20, x28, xzr
+	adds		x1, x2, x1
+	adcs	x2, x4, x7
+	adcs	x3, x3, x19
+	adcs	x17, x5, x17
+	mul		x4, x1, x0
+	adcs	x20, x21, x20
+	mul		x22, x4, x10
+	umulh	x5, x4, x11
+	adcs	x21, xzr, xzr
+	mul		x19, x4, x9
+	adds		x5, x5, x22
+	umulh	x22, x4, x10
+	mul		x7, x4, x8
+	adcs	x19, x22, x19
+	umulh	x22, x4, x9
+	adcs	x7, x22, x7
+	umulh	x22, x4, x8
+	mul		x4, x4, x11
+	adcs	x22, x22, xzr
+	cmn		x4, x1
+	adcs	x1, x5, x2
+	adcs	x2, x19, x3
+	adcs	x17, x7, x17
+	adcs	x3, x22, x20
+	adcs	x4, x21, xzr
+	adds		x12, x12, x15
+	adcs	x13, x6, x13
+	adcs	x15, x18, x16
+	adcs	x16, x25, xzr
+	adds		x14, x1, x14
+	adcs	x12, x2, x12
+	adcs	x13, x17, x13
+	adcs	x15, x3, x15
+	mul		x18, x14, x0
+	adcs	x16, x4, x16
+	mul		x6, x18, x10
+	umulh	x7, x18, x11
+	adcs	x3, xzr, xzr
+	mul		x2, x18, x9
+	umulh	x5, x18, x10
+	adds		x4, x7, x6
+	mul		x0, x18, x8
+	umulh	x1, x18, x9
+	adcs	x2, x5, x2
+	umulh	x17, x18, x8
+	adcs	x0, x1, x0
+	mul		x18, x18, x11
+	adcs	x17, x17, xzr
+	cmn		x18, x14
+	adcs	x12, x4, x12
+	adcs	x13, x2, x13
+	adcs	x14, x0, x15
+	adcs	x15, x17, x16
+	adcs	x16, x3, xzr
+	subs		x11, x12, x11
+	sbcs	x10, x13, x10
+	sbcs	x9, x14, x9
+	sbcs	x8, x15, x8
+	sbcs	x16, x16, xzr
+	tst	 x16, #0x1
+	csel	x11, x12, x11, ne
+	ldr	x12, [sp, #8]           // 8-byte Folded Reload
+	csel	x10, x13, x10, ne
+	csel	x9, x14, x9, ne
+	csel	x8, x15, x8, ne
+	stp		x11, x10, [x12]
+	stp	x9, x8, [x12, #16]
+	ldp	x29, x30, [sp, #96]     // 8-byte Folded Reload
+	ldp	x20, x19, [sp, #80]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #64]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #48]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #32]     // 8-byte Folded Reload
+	ldp	x28, x27, [sp, #16]     // 8-byte Folded Reload
+	add	sp, sp, #112            // =112
 	ret
 .Lfunc_end26:
-	.size	mcl_fp_addPre2L, .Lfunc_end26-mcl_fp_addPre2L
+	.size	mcl_fp_mont4L, .Lfunc_end26-mcl_fp_mont4L
 
-	.globl	mcl_fp_subPre2L
-	.align	2
-	.type	mcl_fp_subPre2L,@function
-mcl_fp_subPre2L:                        // @mcl_fp_subPre2L
+	.globl	mcl_fp_montNF4L
+	.p2align	2
+	.type	mcl_fp_montNF4L,@function
+mcl_fp_montNF4L:                        // @mcl_fp_montNF4L
 // BB#0:
-	ldp	 x8, x11, [x1]
-	ldp	 x9, x10, [x2]
-	subs	 x8, x8, x9
-	str	 x8, [x0]
-	sbcs	x9, x11, x10
-	ngcs	 x8, xzr
-	and	x8, x8, #0x1
-	str	x9, [x0, #8]
-	mov	 x0, x8
+	str	x27, [sp, #-80]!        // 8-byte Folded Spill
+	stp	x26, x25, [sp, #16]     // 8-byte Folded Spill
+	stp	x24, x23, [sp, #32]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #48]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #64]     // 8-byte Folded Spill
+	ldp		x13, x16, [x1]
+	ldp		x17, x18, [x2]
+	ldp	x14, x15, [x1, #16]
+	ldp	x12, x11, [x3, #-8]
+	ldr	x10, [x3, #8]
+	mul		x19, x16, x17
+	umulh	x20, x13, x17
+	mul		x6, x14, x17
+	umulh	x7, x16, x17
+	adds		x19, x20, x19
+	ldp	x9, x8, [x3, #16]
+	umulh	x3, x15, x17
+	mul		x4, x15, x17
+	umulh	x5, x14, x17
+	mul		x17, x13, x17
+	adcs	x6, x7, x6
+	mul		x7, x17, x12
+	adcs	x4, x5, x4
+	mul		x5, x7, x11
+	adcs	x3, x3, xzr
+	cmn		x5, x17
+	mul		x5, x7, x10
+	adcs	x5, x5, x19
+	mul		x19, x7, x9
+	adcs	x6, x19, x6
+	mul		x19, x7, x8
+	adcs	x4, x19, x4
+	umulh	x19, x7, x11
+	adcs	x3, x3, xzr
+	adds		x5, x5, x19
+	umulh	x19, x7, x10
+	adcs	x6, x6, x19
+	umulh	x19, x7, x9
+	adcs	x4, x4, x19
+	umulh	x7, x7, x8
+	mul		x26, x18, x16
+	umulh	x27, x18, x13
+	adcs	x3, x3, x7
+	mul		x24, x18, x14
+	umulh	x25, x18, x16
+	adds		x26, x27, x26
+	mul		x22, x18, x15
+	umulh	x23, x18, x14
+	adcs	x24, x25, x24
+	umulh	x21, x18, x15
+	adcs	x22, x23, x22
+	mul		x18, x18, x13
+	adcs	x21, x21, xzr
+	adds		x18, x18, x5
+	ldp	x1, x2, [x2, #16]
+	adcs	x6, x26, x6
+	adcs	x4, x24, x4
+	mul		x24, x18, x12
+	adcs	x3, x22, x3
+	mul		x22, x24, x11
+	adcs	x21, x21, xzr
+	umulh	x20, x1, x15
+	mul		x17, x1, x15
+	umulh	x19, x1, x14
+	mul		x7, x1, x14
+	umulh	x27, x1, x16
+	mul		x25, x1, x16
+	umulh	x23, x1, x13
+	mul		x1, x1, x13
+	umulh	x5, x2, x15
+	mul		x15, x2, x15
+	umulh	x26, x2, x14
+	mul		x14, x2, x14
+	cmn		x22, x18
+	umulh	x18, x2, x16
+	mul		x16, x2, x16
+	umulh	x22, x2, x13
+	mul		x13, x2, x13
+	mul		x2, x24, x10
+	adcs	x2, x2, x6
+	mul		x6, x24, x9
+	adcs	x4, x6, x4
+	mul		x6, x24, x8
+	adcs	x3, x6, x3
+	umulh	x6, x24, x11
+	adcs	x21, x21, xzr
+	adds		x2, x2, x6
+	umulh	x6, x24, x10
+	adcs	x4, x4, x6
+	umulh	x6, x24, x9
+	adcs	x3, x3, x6
+	umulh	x6, x24, x8
+	adcs	x6, x21, x6
+	adds		x21, x23, x25
+	adcs	x7, x27, x7
+	adcs	x17, x19, x17
+	adcs	x19, x20, xzr
+	adds		x1, x1, x2
+	adcs	x2, x21, x4
+	adcs	x3, x7, x3
+	mul		x4, x1, x12
+	adcs	x17, x17, x6
+	mul		x6, x4, x11
+	adcs	x19, x19, xzr
+	cmn		x6, x1
+	mul		x1, x4, x10
+	mul		x20, x4, x9
+	adcs	x1, x1, x2
+	mul		x7, x4, x8
+	adcs	x3, x20, x3
+	adcs	x17, x7, x17
+	umulh	x6, x4, x8
+	umulh	x2, x4, x9
+	umulh	x20, x4, x10
+	umulh	x4, x4, x11
+	adcs	x7, x19, xzr
+	adds		x1, x1, x4
+	adcs	x3, x3, x20
+	adcs	x17, x17, x2
+	adcs	x2, x7, x6
+	adds		x16, x22, x16
+	adcs	x14, x18, x14
+	adcs	x15, x26, x15
+	adcs	x18, x5, xzr
+	adds		x13, x13, x1
+	adcs	x16, x16, x3
+	adcs	x14, x14, x17
+	mul		x12, x13, x12
+	adcs	x15, x15, x2
+	mul		x4, x12, x11
+	adcs	x18, x18, xzr
+	mul		x3, x12, x10
+	cmn		x4, x13
+	mul		x1, x12, x9
+	adcs	x13, x3, x16
+	mul		x17, x12, x8
+	adcs	x14, x1, x14
+	adcs	x15, x17, x15
+	umulh	x5, x12, x8
+	umulh	x6, x12, x9
+	umulh	x7, x12, x10
+	umulh	x12, x12, x11
+	adcs	x16, x18, xzr
+	adds		x12, x13, x12
+	adcs	x13, x14, x7
+	adcs	x14, x15, x6
+	adcs	x15, x16, x5
+	subs		x11, x12, x11
+	sbcs	x10, x13, x10
+	sbcs	x9, x14, x9
+	sbcs	x8, x15, x8
+	cmp		x8, #0          // =0
+	csel	x11, x12, x11, lt
+	csel	x10, x13, x10, lt
+	csel	x9, x14, x9, lt
+	csel	x8, x15, x8, lt
+	stp		x11, x10, [x0]
+	stp	x9, x8, [x0, #16]
+	ldp	x20, x19, [sp, #64]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #48]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #32]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #16]     // 8-byte Folded Reload
+	ldr	x27, [sp], #80          // 8-byte Folded Reload
 	ret
 .Lfunc_end27:
-	.size	mcl_fp_subPre2L, .Lfunc_end27-mcl_fp_subPre2L
+	.size	mcl_fp_montNF4L, .Lfunc_end27-mcl_fp_montNF4L
 
-	.globl	mcl_fp_shr1_2L
-	.align	2
-	.type	mcl_fp_shr1_2L,@function
-mcl_fp_shr1_2L:                         // @mcl_fp_shr1_2L
+	.globl	mcl_fp_montRed4L
+	.p2align	2
+	.type	mcl_fp_montRed4L,@function
+mcl_fp_montRed4L:                       // @mcl_fp_montRed4L
 // BB#0:
-	ldp	 x8, x9, [x1]
-	extr	x8, x9, x8, #1
-	lsr	x9, x9, #1
-	stp	 x8, x9, [x0]
+	str	x19, [sp, #-16]!        // 8-byte Folded Spill
+	ldp	x10, x12, [x2, #-8]
+	ldp		x16, x17, [x1]
+	ldr	x11, [x2, #8]
+	ldp	x9, x8, [x2, #16]
+	ldp	x15, x14, [x1, #16]
+	mul		x3, x16, x10
+	mul		x7, x3, x11
+	umulh	x19, x3, x12
+	mul		x6, x3, x9
+	adds		x7, x19, x7
+	umulh	x19, x3, x11
+	mul		x5, x3, x8
+	adcs	x6, x19, x6
+	umulh	x19, x3, x9
+	umulh	x4, x3, x8
+	adcs	x5, x19, x5
+	mul		x3, x3, x12
+	adcs	x4, x4, xzr
+	ldp	x13, x18, [x1, #32]
+	cmn		x3, x16
+	adcs	x16, x7, x17
+	adcs	x15, x6, x15
+	adcs	x14, x5, x14
+	mul		x17, x16, x10
+	adcs	x13, x4, x13
+	umulh	x7, x17, x12
+	mul		x19, x17, x11
+	adcs	x4, xzr, xzr
+	umulh	x6, x17, x11
+	adds		x7, x19, x7
+	mul		x19, x17, x9
+	umulh	x5, x17, x9
+	adcs	x6, x19, x6
+	mul		x19, x17, x8
+	umulh	x3, x17, x8
+	adcs	x5, x19, x5
+	mul		x17, x17, x12
+	adcs	x3, x4, x3
+	cmn		x17, x16
+	adcs	x15, x7, x15
+	adcs	x14, x6, x14
+	adcs	x13, x5, x13
+	mul		x16, x15, x10
+	adcs	x18, x3, x18
+	umulh	x7, x16, x12
+	mul		x19, x16, x11
+	adcs	x3, xzr, xzr
+	umulh	x6, x16, x11
+	adds		x7, x19, x7
+	mul		x19, x16, x9
+	umulh	x4, x16, x9
+	mul		x5, x16, x8
+	adcs	x6, x19, x6
+	umulh	x17, x16, x8
+	adcs	x4, x5, x4
+	mul		x16, x16, x12
+	adcs	x17, x3, x17
+	ldp	x2, x1, [x1, #48]
+	cmn		x16, x15
+	adcs	x14, x7, x14
+	adcs	x13, x6, x13
+	adcs	x16, x4, x18
+	mul		x10, x14, x10
+	adcs	x17, x17, x2
+	umulh	x6, x10, x12
+	mul		x7, x10, x11
+	adcs	x2, xzr, xzr
+	umulh	x4, x10, x11
+	mul		x5, x10, x9
+	adds		x6, x7, x6
+	umulh	x18, x10, x9
+	mul		x3, x10, x8
+	adcs	x4, x5, x4
+	umulh	x15, x10, x8
+	adcs	x18, x3, x18
+	mul		x10, x10, x12
+	adcs	x15, x2, x15
+	cmn		x10, x14
+	adcs	x10, x6, x13
+	adcs	x13, x4, x16
+	adcs	x14, x18, x17
+	adcs	x15, x15, x1
+	subs		x12, x10, x12
+	sbcs	x11, x13, x11
+	sbcs	x9, x14, x9
+	sbcs	x8, x15, x8
+	ngcs	 x16, xzr
+	tst	 x16, #0x1
+	csel	x10, x10, x12, ne
+	csel	x11, x13, x11, ne
+	csel	x9, x14, x9, ne
+	csel	x8, x15, x8, ne
+	stp		x10, x11, [x0]
+	stp	x9, x8, [x0, #16]
+	ldr	x19, [sp], #16          // 8-byte Folded Reload
 	ret
 .Lfunc_end28:
-	.size	mcl_fp_shr1_2L, .Lfunc_end28-mcl_fp_shr1_2L
+	.size	mcl_fp_montRed4L, .Lfunc_end28-mcl_fp_montRed4L
 
-	.globl	mcl_fp_add2L
-	.align	2
-	.type	mcl_fp_add2L,@function
-mcl_fp_add2L:                           // @mcl_fp_add2L
+	.globl	mcl_fp_montRedNF4L
+	.p2align	2
+	.type	mcl_fp_montRedNF4L,@function
+mcl_fp_montRedNF4L:                     // @mcl_fp_montRedNF4L
 // BB#0:
-	ldp	 x8, x11, [x1]
-	ldp	 x9, x10, [x2]
-	adds	 x8, x9, x8
-	ldp	 x9, x12, [x3]
-	adcs	x10, x10, x11
-	stp	 x8, x10, [x0]
-	adcs	x11, xzr, xzr
-	subs	 x9, x8, x9
-	sbcs	x8, x10, x12
-	sbcs	x10, x11, xzr
-	and	w10, w10, #0x1
-	tbnz	w10, #0, .LBB29_2
-// BB#1:                                // %nocarry
-	stp	 x9, x8, [x0]
-.LBB29_2:                               // %carry
+	str	x19, [sp, #-16]!        // 8-byte Folded Spill
+	ldp	x10, x12, [x2, #-8]
+	ldp		x16, x17, [x1]
+	ldr	x11, [x2, #8]
+	ldp	x9, x8, [x2, #16]
+	ldp	x15, x14, [x1, #16]
+	mul		x3, x16, x10
+	mul		x7, x3, x11
+	umulh	x19, x3, x12
+	mul		x6, x3, x9
+	adds		x7, x19, x7
+	umulh	x19, x3, x11
+	mul		x5, x3, x8
+	adcs	x6, x19, x6
+	umulh	x19, x3, x9
+	umulh	x4, x3, x8
+	adcs	x5, x19, x5
+	mul		x3, x3, x12
+	adcs	x4, x4, xzr
+	ldp	x13, x18, [x1, #32]
+	cmn		x3, x16
+	adcs	x16, x7, x17
+	adcs	x15, x6, x15
+	adcs	x14, x5, x14
+	mul		x17, x16, x10
+	adcs	x13, x4, x13
+	umulh	x7, x17, x12
+	mul		x19, x17, x11
+	adcs	x4, xzr, xzr
+	umulh	x6, x17, x11
+	adds		x7, x19, x7
+	mul		x19, x17, x9
+	umulh	x5, x17, x9
+	adcs	x6, x19, x6
+	mul		x19, x17, x8
+	umulh	x3, x17, x8
+	adcs	x5, x19, x5
+	mul		x17, x17, x12
+	adcs	x3, x4, x3
+	cmn		x17, x16
+	adcs	x15, x7, x15
+	adcs	x14, x6, x14
+	adcs	x13, x5, x13
+	mul		x16, x15, x10
+	adcs	x18, x3, x18
+	umulh	x7, x16, x12
+	mul		x19, x16, x11
+	adcs	x3, xzr, xzr
+	umulh	x6, x16, x11
+	adds		x7, x19, x7
+	mul		x19, x16, x9
+	umulh	x4, x16, x9
+	mul		x5, x16, x8
+	adcs	x6, x19, x6
+	umulh	x17, x16, x8
+	adcs	x4, x5, x4
+	mul		x16, x16, x12
+	adcs	x17, x3, x17
+	ldp	x2, x1, [x1, #48]
+	cmn		x16, x15
+	adcs	x14, x7, x14
+	adcs	x13, x6, x13
+	adcs	x16, x4, x18
+	mul		x10, x14, x10
+	adcs	x17, x17, x2
+	umulh	x6, x10, x12
+	mul		x7, x10, x11
+	adcs	x2, xzr, xzr
+	umulh	x4, x10, x11
+	mul		x5, x10, x9
+	adds		x6, x7, x6
+	umulh	x18, x10, x9
+	mul		x3, x10, x8
+	adcs	x4, x5, x4
+	umulh	x15, x10, x8
+	adcs	x18, x3, x18
+	mul		x10, x10, x12
+	adcs	x15, x2, x15
+	cmn		x10, x14
+	adcs	x10, x6, x13
+	adcs	x13, x4, x16
+	adcs	x14, x18, x17
+	adcs	x15, x15, x1
+	subs		x12, x10, x12
+	sbcs	x11, x13, x11
+	sbcs	x9, x14, x9
+	sbcs	x8, x15, x8
+	cmp		x8, #0          // =0
+	csel	x10, x10, x12, lt
+	csel	x11, x13, x11, lt
+	csel	x9, x14, x9, lt
+	csel	x8, x15, x8, lt
+	stp		x10, x11, [x0]
+	stp	x9, x8, [x0, #16]
+	ldr	x19, [sp], #16          // 8-byte Folded Reload
 	ret
 .Lfunc_end29:
-	.size	mcl_fp_add2L, .Lfunc_end29-mcl_fp_add2L
+	.size	mcl_fp_montRedNF4L, .Lfunc_end29-mcl_fp_montRedNF4L
 
-	.globl	mcl_fp_addNF2L
-	.align	2
-	.type	mcl_fp_addNF2L,@function
-mcl_fp_addNF2L:                         // @mcl_fp_addNF2L
+	.globl	mcl_fp_addPre4L
+	.p2align	2
+	.type	mcl_fp_addPre4L,@function
+mcl_fp_addPre4L:                        // @mcl_fp_addPre4L
 // BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	 x10, x11, [x2]
-	ldp	 x12, x13, [x3]
-	adds	 x8, x10, x8
-	adcs	x9, x11, x9
-	subs	 x10, x8, x12
-	sbcs	x11, x9, x13
-	cmp	 x11, #0                // =0
-	csel	x8, x8, x10, lt
-	csel	x9, x9, x11, lt
-	stp	 x8, x9, [x0]
+	ldp		x10, x11, [x2]
+	ldp		x12, x13, [x1]
+	ldp	x8, x9, [x2, #16]
+	ldp	x14, x15, [x1, #16]
+	adds		x10, x10, x12
+	adcs	x11, x11, x13
+	stp		x10, x11, [x0]
+	adcs	x10, x8, x14
+	adcs	x9, x9, x15
+	adcs	x8, xzr, xzr
+	stp	x10, x9, [x0, #16]
+	mov	 x0, x8
 	ret
 .Lfunc_end30:
-	.size	mcl_fp_addNF2L, .Lfunc_end30-mcl_fp_addNF2L
+	.size	mcl_fp_addPre4L, .Lfunc_end30-mcl_fp_addPre4L
 
-	.globl	mcl_fp_sub2L
-	.align	2
-	.type	mcl_fp_sub2L,@function
-mcl_fp_sub2L:                           // @mcl_fp_sub2L
+	.globl	mcl_fp_subPre4L
+	.p2align	2
+	.type	mcl_fp_subPre4L,@function
+mcl_fp_subPre4L:                        // @mcl_fp_subPre4L
 // BB#0:
-	ldp	 x8, x11, [x1]
-	ldp	 x9, x10, [x2]
-	subs	 x9, x8, x9
-	sbcs	x8, x11, x10
-	stp	 x9, x8, [x0]
-	ngcs	 x10, xzr
-	and	w10, w10, #0x1
-	tbnz	w10, #0, .LBB31_2
-// BB#1:                                // %nocarry
-	ret
-.LBB31_2:                               // %carry
-	ldp	 x10, x11, [x3]
-	adds	 x9, x10, x9
-	adcs	x8, x11, x8
-	stp	 x9, x8, [x0]
+	ldp		x10, x11, [x2]
+	ldp		x12, x13, [x1]
+	ldp	x8, x9, [x2, #16]
+	ldp	x14, x15, [x1, #16]
+	subs		x10, x12, x10
+	sbcs	x11, x13, x11
+	stp		x10, x11, [x0]
+	sbcs	x10, x14, x8
+	sbcs	x9, x15, x9
+	ngcs	 x8, xzr
+	and	x8, x8, #0x1
+	stp	x10, x9, [x0, #16]
+	mov	 x0, x8
 	ret
 .Lfunc_end31:
-	.size	mcl_fp_sub2L, .Lfunc_end31-mcl_fp_sub2L
+	.size	mcl_fp_subPre4L, .Lfunc_end31-mcl_fp_subPre4L
 
-	.globl	mcl_fp_subNF2L
-	.align	2
-	.type	mcl_fp_subNF2L,@function
-mcl_fp_subNF2L:                         // @mcl_fp_subNF2L
+	.globl	mcl_fp_shr1_4L
+	.p2align	2
+	.type	mcl_fp_shr1_4L,@function
+mcl_fp_shr1_4L:                         // @mcl_fp_shr1_4L
 // BB#0:
-	ldp	 x8, x11, [x1]
-	ldp	 x9, x10, [x2]
-	subs	 x8, x8, x9
-	ldp	 x9, x12, [x3]
-	sbcs	x10, x11, x10
-	asr	x11, x10, #63
-	and	 x9, x11, x9
-	and	 x11, x11, x12
-	adds	 x8, x9, x8
-	str	 x8, [x0]
-	adcs	x8, x11, x10
-	str	x8, [x0, #8]
+	ldp		x8, x9, [x1]
+	ldp	x10, x11, [x1, #16]
+	extr	x8, x9, x8, #1
+	extr	x9, x10, x9, #1
+	extr	x10, x11, x10, #1
+	lsr	x11, x11, #1
+	stp		x8, x9, [x0]
+	stp	x10, x11, [x0, #16]
 	ret
 .Lfunc_end32:
-	.size	mcl_fp_subNF2L, .Lfunc_end32-mcl_fp_subNF2L
+	.size	mcl_fp_shr1_4L, .Lfunc_end32-mcl_fp_shr1_4L
 
-	.globl	mcl_fpDbl_add2L
-	.align	2
-	.type	mcl_fpDbl_add2L,@function
-mcl_fpDbl_add2L:                        // @mcl_fpDbl_add2L
+	.globl	mcl_fp_add4L
+	.p2align	2
+	.type	mcl_fp_add4L,@function
+mcl_fp_add4L:                           // @mcl_fp_add4L
 // BB#0:
+	ldp		x10, x11, [x2]
+	ldp		x12, x13, [x1]
 	ldp	x8, x9, [x2, #16]
-	ldp	 x10, x15, [x1]
-	ldp	 x11, x14, [x2]
-	ldp	x12, x13, [x1, #16]
-	adds	 x10, x11, x10
-	ldp	 x11, x16, [x3]
-	str	 x10, [x0]
-	adcs	x10, x14, x15
-	str	x10, [x0, #8]
-	adcs	x8, x8, x12
-	adcs	x9, x9, x13
-	adcs	x10, xzr, xzr
-	subs	 x11, x8, x11
-	sbcs	x12, x9, x16
-	sbcs	x10, x10, xzr
-	tst	 x10, #0x1
-	csel	x8, x8, x11, ne
-	csel	x9, x9, x12, ne
-	stp	x8, x9, [x0, #16]
+	ldp	x14, x15, [x1, #16]
+	adds		x10, x10, x12
+	adcs	x12, x11, x13
+	adcs	x14, x8, x14
+	ldp		x8, x17, [x3]
+	ldp	x13, x16, [x3, #16]
+	adcs	x15, x9, x15
+	adcs	x18, xzr, xzr
+	subs		x11, x10, x8
+	stp		x10, x12, [x0]
+	sbcs	x10, x12, x17
+	sbcs	x9, x14, x13
+	sbcs	x8, x15, x16
+	sbcs	x12, x18, xzr
+	stp	x14, x15, [x0, #16]
+	tbnz	w12, #0, .LBB33_2
+// BB#1:                                // %nocarry
+	stp		x11, x10, [x0]
+	stp	x9, x8, [x0, #16]
+.LBB33_2:                               // %carry
 	ret
 .Lfunc_end33:
-	.size	mcl_fpDbl_add2L, .Lfunc_end33-mcl_fpDbl_add2L
+	.size	mcl_fp_add4L, .Lfunc_end33-mcl_fp_add4L
 
-	.globl	mcl_fpDbl_sub2L
-	.align	2
-	.type	mcl_fpDbl_sub2L,@function
-mcl_fpDbl_sub2L:                        // @mcl_fpDbl_sub2L
+	.globl	mcl_fp_addNF4L
+	.p2align	2
+	.type	mcl_fp_addNF4L,@function
+mcl_fp_addNF4L:                         // @mcl_fp_addNF4L
 // BB#0:
-	ldp	x8, x9, [x2, #16]
-	ldp	 x10, x14, [x2]
-	ldp	 x11, x15, [x1]
-	ldp	x12, x13, [x1, #16]
-	subs	 x10, x11, x10
-	ldp	 x11, x16, [x3]
-	str	 x10, [x0]
-	sbcs	x10, x15, x14
-	str	x10, [x0, #8]
-	sbcs	x8, x12, x8
-	sbcs	x9, x13, x9
-	ngcs	 x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x16, xzr, ne
-	csel	x11, x11, xzr, ne
-	adds	 x8, x11, x8
-	str	x8, [x0, #16]
-	adcs	x8, x10, x9
-	str	x8, [x0, #24]
+	ldp		x10, x11, [x1]
+	ldp		x12, x13, [x2]
+	ldp	x8, x9, [x1, #16]
+	ldp	x14, x15, [x2, #16]
+	adds		x10, x12, x10
+	adcs	x11, x13, x11
+	ldp		x12, x13, [x3]
+	adcs	x8, x14, x8
+	ldp	x14, x16, [x3, #16]
+	adcs	x9, x15, x9
+	subs		x12, x10, x12
+	sbcs	x13, x11, x13
+	sbcs	x14, x8, x14
+	sbcs	x15, x9, x16
+	cmp		x15, #0         // =0
+	csel	x10, x10, x12, lt
+	csel	x11, x11, x13, lt
+	csel	x8, x8, x14, lt
+	csel	x9, x9, x15, lt
+	stp		x10, x11, [x0]
+	stp	x8, x9, [x0, #16]
 	ret
 .Lfunc_end34:
-	.size	mcl_fpDbl_sub2L, .Lfunc_end34-mcl_fpDbl_sub2L
+	.size	mcl_fp_addNF4L, .Lfunc_end34-mcl_fp_addNF4L
 
-	.globl	mcl_fp_mulUnitPre3L
-	.align	2
-	.type	mcl_fp_mulUnitPre3L,@function
-mcl_fp_mulUnitPre3L:                    // @mcl_fp_mulUnitPre3L
+	.globl	mcl_fp_sub4L
+	.p2align	2
+	.type	mcl_fp_sub4L,@function
+mcl_fp_sub4L:                           // @mcl_fp_sub4L
 // BB#0:
-	ldp	 x8, x9, [x1]
-	ldr	x10, [x1, #16]
-	mul	 x11, x8, x2
-	mul	 x12, x9, x2
-	umulh	x8, x8, x2
-	mul	 x13, x10, x2
-	umulh	x9, x9, x2
-	umulh	x10, x10, x2
-	adds	 x8, x8, x12
-	stp	 x11, x8, [x0]
-	adcs	x8, x9, x13
-	str	x8, [x0, #16]
-	adcs	x8, x10, xzr
-	str	x8, [x0, #24]
+	ldp		x8, x9, [x2]
+	ldp		x12, x13, [x1]
+	ldp	x10, x11, [x2, #16]
+	ldp	x14, x15, [x1, #16]
+	subs		x8, x12, x8
+	sbcs	x9, x13, x9
+	sbcs	x10, x14, x10
+	sbcs	x11, x15, x11
+	ngcs	 x12, xzr
+	stp		x8, x9, [x0]
+	stp	x10, x11, [x0, #16]
+	tbnz	w12, #0, .LBB35_2
+// BB#1:                                // %nocarry
+	ret
+.LBB35_2:                               // %carry
+	ldp		x12, x13, [x3]
+	ldp	x14, x15, [x3, #16]
+	adds		x8, x12, x8
+	adcs	x9, x13, x9
+	stp		x8, x9, [x0]
+	adcs	x8, x14, x10
+	adcs	x9, x15, x11
+	stp	x8, x9, [x0, #16]
 	ret
 .Lfunc_end35:
-	.size	mcl_fp_mulUnitPre3L, .Lfunc_end35-mcl_fp_mulUnitPre3L
+	.size	mcl_fp_sub4L, .Lfunc_end35-mcl_fp_sub4L
 
-	.globl	mcl_fpDbl_mulPre3L
-	.align	2
-	.type	mcl_fpDbl_mulPre3L,@function
-mcl_fpDbl_mulPre3L:                     // @mcl_fpDbl_mulPre3L
+	.globl	mcl_fp_subNF4L
+	.p2align	2
+	.type	mcl_fp_subNF4L,@function
+mcl_fp_subNF4L:                         // @mcl_fp_subNF4L
 // BB#0:
-	stp	x20, x19, [sp, #-16]!
-	ldp	 x8, x9, [x1]
-	ldp	 x10, x12, [x2]
-	ldr	x11, [x1, #16]
-	ldr	x13, [x2, #16]
-	mul	 x14, x8, x10
-	umulh	x15, x11, x10
-	mul	 x16, x11, x10
-	umulh	x17, x9, x10
-	mul	 x18, x9, x10
-	umulh	x10, x8, x10
-	mul	 x1, x8, x12
-	mul	 x2, x11, x12
-	mul	 x3, x9, x12
-	umulh	x4, x11, x12
-	umulh	x5, x9, x12
-	umulh	x12, x8, x12
-	mul	 x6, x8, x13
-	mul	 x7, x11, x13
-	mul	 x19, x9, x13
-	umulh	x8, x8, x13
-	umulh	x9, x9, x13
-	umulh	x11, x11, x13
-	str	 x14, [x0]
-	adds	 x10, x10, x18
-	adcs	x13, x17, x16
-	adcs	x14, x15, xzr
-	adds	 x10, x10, x1
-	str	x10, [x0, #8]
-	adcs	x10, x13, x3
-	adcs	x13, x14, x2
-	adcs	x14, xzr, xzr
-	adds	 x10, x10, x12
-	adcs	x12, x13, x5
-	adcs	x13, x14, x4
-	adds	 x10, x10, x6
-	str	x10, [x0, #16]
-	adcs	x10, x12, x19
-	adcs	x12, x13, x7
-	adcs	x13, xzr, xzr
-	adds	 x8, x10, x8
-	str	x8, [x0, #24]
-	adcs	x8, x12, x9
-	str	x8, [x0, #32]
-	adcs	x8, x13, x11
-	str	x8, [x0, #40]
-	ldp	x20, x19, [sp], #16
+	ldp		x10, x11, [x2]
+	ldp		x12, x13, [x1]
+	ldp	x8, x9, [x2, #16]
+	ldp	x14, x15, [x1, #16]
+	subs		x10, x12, x10
+	sbcs	x11, x13, x11
+	sbcs	x8, x14, x8
+	ldp		x14, x16, [x3]
+	ldp	x12, x13, [x3, #16]
+	sbcs	x9, x15, x9
+	asr	x15, x9, #63
+	and		x14, x15, x14
+	and		x16, x15, x16
+	adds		x10, x14, x10
+	and		x12, x15, x12
+	adcs	x11, x16, x11
+	and		x13, x15, x13
+	adcs	x8, x12, x8
+	adcs	x9, x13, x9
+	stp		x10, x11, [x0]
+	stp	x8, x9, [x0, #16]
 	ret
 .Lfunc_end36:
-	.size	mcl_fpDbl_mulPre3L, .Lfunc_end36-mcl_fpDbl_mulPre3L
+	.size	mcl_fp_subNF4L, .Lfunc_end36-mcl_fp_subNF4L
 
-	.globl	mcl_fpDbl_sqrPre3L
-	.align	2
-	.type	mcl_fpDbl_sqrPre3L,@function
-mcl_fpDbl_sqrPre3L:                     // @mcl_fpDbl_sqrPre3L
+	.globl	mcl_fpDbl_add4L
+	.p2align	2
+	.type	mcl_fpDbl_add4L,@function
+mcl_fpDbl_add4L:                        // @mcl_fpDbl_add4L
 // BB#0:
-	ldp	 x8, x10, [x1]
-	ldr	x9, [x1, #16]
-	mul	 x11, x8, x8
-	umulh	x12, x9, x8
-	mul	 x13, x9, x8
-	umulh	x14, x10, x8
-	mul	 x15, x10, x8
-	umulh	x8, x8, x8
-	mul	 x16, x9, x10
-	str	 x11, [x0]
-	adds	 x8, x8, x15
-	adcs	x11, x14, x13
-	adcs	x17, x12, xzr
-	adds	 x8, x8, x15
-	mul	 x15, x10, x10
-	str	x8, [x0, #8]
-	umulh	x8, x9, x10
-	umulh	x10, x10, x10
-	adcs	x11, x11, x15
-	adcs	x15, x17, x16
-	adcs	x17, xzr, xzr
-	adds	 x11, x11, x14
-	umulh	x14, x9, x9
-	mul	 x9, x9, x9
-	adcs	x10, x15, x10
-	adcs	x15, x17, x8
-	adds	 x12, x12, x16
-	adcs	x8, x8, x9
-	adcs	x9, x14, xzr
-	adds	 x11, x13, x11
-	adcs	x10, x12, x10
-	stp	x11, x10, [x0, #16]
-	adcs	x8, x8, x15
-	str	x8, [x0, #32]
-	adcs	x8, x9, xzr
-	str	x8, [x0, #40]
+	ldp	x8, x9, [x2, #48]
+	ldp	x10, x11, [x1, #48]
+	ldp	x12, x13, [x2, #32]
+	ldp	x14, x15, [x1, #32]
+	ldp	x16, x17, [x2, #16]
+	ldp		x4, x2, [x2]
+	ldp	x5, x6, [x1, #16]
+	ldp		x18, x1, [x1]
+	adds		x18, x4, x18
+	adcs	x1, x2, x1
+	adcs	x16, x16, x5
+	adcs	x17, x17, x6
+	adcs	x12, x12, x14
+	ldp	x4, x7, [x3, #16]
+	ldp		x2, x3, [x3]
+	adcs	x13, x13, x15
+	adcs	x8, x8, x10
+	adcs	x9, x9, x11
+	adcs	x10, xzr, xzr
+	subs		x11, x12, x2
+	sbcs	x14, x13, x3
+	sbcs	x15, x8, x4
+	stp	x16, x17, [x0, #16]
+	sbcs	x16, x9, x7
+	sbcs	x10, x10, xzr
+	tst	 x10, #0x1
+	csel	x10, x12, x11, ne
+	csel	x11, x13, x14, ne
+	csel	x8, x8, x15, ne
+	csel	x9, x9, x16, ne
+	stp		x18, x1, [x0]
+	stp	x10, x11, [x0, #32]
+	stp	x8, x9, [x0, #48]
 	ret
 .Lfunc_end37:
-	.size	mcl_fpDbl_sqrPre3L, .Lfunc_end37-mcl_fpDbl_sqrPre3L
+	.size	mcl_fpDbl_add4L, .Lfunc_end37-mcl_fpDbl_add4L
 
-	.globl	mcl_fp_mont3L
-	.align	2
-	.type	mcl_fp_mont3L,@function
-mcl_fp_mont3L:                          // @mcl_fp_mont3L
+	.globl	mcl_fpDbl_sub4L
+	.p2align	2
+	.type	mcl_fpDbl_sub4L,@function
+mcl_fpDbl_sub4L:                        // @mcl_fpDbl_sub4L
 // BB#0:
-	stp	x24, x23, [sp, #-48]!
-	stp	x22, x21, [sp, #16]
-	stp	x20, x19, [sp, #32]
-	ldp	 x15, x16, [x2]
-	ldp	x13, x14, [x1, #8]
-	ldr	 x12, [x1]
-	ldur	x11, [x3, #-8]
-	ldp	x9, x8, [x3, #8]
-	ldr	 x10, [x3]
-	ldr	x17, [x2, #16]
-	umulh	x18, x14, x15
-	mul	 x1, x14, x15
-	umulh	x2, x13, x15
-	mul	 x3, x13, x15
-	umulh	x4, x12, x15
-	mul	 x15, x12, x15
-	umulh	x5, x16, x14
-	mul	 x6, x16, x14
-	umulh	x7, x16, x13
-	mul	 x19, x16, x13
-	umulh	x20, x16, x12
-	mul	 x16, x16, x12
-	umulh	x21, x17, x14
-	mul	 x14, x17, x14
-	adds	 x3, x4, x3
-	mul	 x4, x15, x11
-	adcs	x1, x2, x1
-	mul	 x2, x4, x8
-	mul	 x22, x4, x9
-	umulh	x23, x4, x10
-	adcs	x18, x18, xzr
-	adds	 x22, x23, x22
-	umulh	x23, x4, x9
-	adcs	x2, x23, x2
-	umulh	x23, x4, x8
-	mul	 x4, x4, x10
-	adcs	x23, x23, xzr
-	cmn	 x4, x15
-	umulh	x15, x17, x13
-	mul	 x13, x17, x13
-	umulh	x4, x17, x12
-	mul	 x12, x17, x12
-	adcs	x17, x22, x3
-	adcs	x1, x2, x1
-	adcs	x18, x23, x18
-	adcs	x2, xzr, xzr
-	adds	 x3, x20, x19
-	adcs	x6, x7, x6
-	adcs	x5, x5, xzr
-	adds	 x16, x17, x16
-	adcs	x17, x1, x3
-	mul	 x1, x16, x11
-	adcs	x18, x18, x6
-	mul	 x3, x1, x8
-	mul	 x6, x1, x9
-	umulh	x7, x1, x10
-	adcs	x2, x2, x5
-	adcs	x5, xzr, xzr
-	adds	 x6, x7, x6
-	umulh	x7, x1, x9
-	adcs	x3, x7, x3
-	umulh	x7, x1, x8
-	mul	 x1, x1, x10
-	adcs	x7, x7, xzr
-	cmn	 x1, x16
-	adcs	x16, x6, x17
-	adcs	x17, x3, x18
-	adcs	x18, x7, x2
-	adcs	x1, x5, xzr
-	adds	 x13, x4, x13
-	adcs	x14, x15, x14
-	adcs	x15, x21, xzr
-	adds	 x12, x16, x12
-	adcs	x13, x17, x13
-	mul	 x11, x12, x11
-	adcs	x14, x18, x14
-	umulh	x16, x11, x8
-	mul	 x17, x11, x8
-	umulh	x18, x11, x9
-	mul	 x2, x11, x9
-	umulh	x3, x11, x10
-	mul	 x11, x11, x10
-	adcs	x15, x1, x15
-	adcs	x1, xzr, xzr
-	adds	 x2, x3, x2
-	adcs	x17, x18, x17
-	adcs	x16, x16, xzr
-	cmn	 x11, x12
-	adcs	x11, x2, x13
-	adcs	x12, x17, x14
-	adcs	x13, x16, x15
-	adcs	x14, x1, xzr
-	subs	 x10, x11, x10
-	sbcs	x9, x12, x9
-	sbcs	x8, x13, x8
-	sbcs	x14, x14, xzr
-	tst	 x14, #0x1
-	csel	x10, x11, x10, ne
-	csel	x9, x12, x9, ne
-	csel	x8, x13, x8, ne
-	stp	 x10, x9, [x0]
-	str	x8, [x0, #16]
-	ldp	x20, x19, [sp, #32]
-	ldp	x22, x21, [sp, #16]
-	ldp	x24, x23, [sp], #48
+	ldp	x8, x9, [x2, #48]
+	ldp	x10, x11, [x1, #48]
+	ldp	x12, x13, [x2, #32]
+	ldp	x14, x15, [x1, #32]
+	ldp	x16, x17, [x2, #16]
+	ldp		x18, x2, [x2]
+	ldp	x5, x6, [x1, #16]
+	ldp		x4, x1, [x1]
+	subs		x18, x4, x18
+	sbcs	x1, x1, x2
+	sbcs	x16, x5, x16
+	sbcs	x17, x6, x17
+	sbcs	x12, x14, x12
+	sbcs	x13, x15, x13
+	ldp	x4, x7, [x3, #16]
+	ldp		x2, x3, [x3]
+	sbcs	x8, x10, x8
+	sbcs	x9, x11, x9
+	ngcs	 x10, xzr
+	tst	 x10, #0x1
+	csel	x15, x2, xzr, ne
+	csel	x10, x7, xzr, ne
+	csel	x11, x4, xzr, ne
+	csel	x14, x3, xzr, ne
+	adds		x12, x15, x12
+	adcs	x13, x14, x13
+	adcs	x8, x11, x8
+	adcs	x9, x10, x9
+	stp		x18, x1, [x0]
+	stp	x16, x17, [x0, #16]
+	stp	x12, x13, [x0, #32]
+	stp	x8, x9, [x0, #48]
 	ret
 .Lfunc_end38:
-	.size	mcl_fp_mont3L, .Lfunc_end38-mcl_fp_mont3L
-
-	.globl	mcl_fp_montNF3L
-	.align	2
-	.type	mcl_fp_montNF3L,@function
-mcl_fp_montNF3L:                        // @mcl_fp_montNF3L
-// BB#0:
-	stp	x22, x21, [sp, #-32]!
-	stp	x20, x19, [sp, #16]
-	ldp	 x14, x16, [x2]
-	ldp	x15, x13, [x1, #8]
-	ldr	 x12, [x1]
-	ldur	x11, [x3, #-8]
-	ldp	x9, x8, [x3, #8]
-	ldr	 x10, [x3]
-	ldr	x17, [x2, #16]
-	umulh	x18, x13, x14
-	mul	 x1, x13, x14
-	umulh	x2, x15, x14
-	mul	 x3, x15, x14
-	umulh	x4, x12, x14
-	mul	 x14, x12, x14
-	umulh	x5, x16, x13
-	mul	 x6, x16, x13
-	umulh	x7, x16, x15
-	mul	 x19, x16, x15
-	umulh	x20, x16, x12
-	mul	 x16, x16, x12
-	umulh	x21, x17, x13
-	mul	 x13, x17, x13
-	adds	 x3, x4, x3
-	mul	 x4, x14, x11
-	adcs	x1, x2, x1
-	mul	 x2, x4, x10
-	adcs	x18, x18, xzr
-	cmn	 x2, x14
-	umulh	x14, x17, x15
-	mul	 x15, x17, x15
-	umulh	x2, x17, x12
-	mul	 x12, x17, x12
-	mul	 x17, x4, x9
-	adcs	x17, x17, x3
-	mul	 x3, x4, x8
-	adcs	x1, x3, x1
-	umulh	x3, x4, x10
-	adcs	x18, x18, xzr
-	adds	 x17, x17, x3
-	umulh	x3, x4, x9
-	adcs	x1, x1, x3
-	umulh	x3, x4, x8
-	adcs	x18, x18, x3
-	adds	 x3, x20, x19
-	adcs	x4, x7, x6
-	adcs	x5, x5, xzr
-	adds	 x16, x16, x17
-	adcs	x17, x3, x1
-	mul	 x1, x16, x11
-	adcs	x18, x4, x18
-	mul	 x3, x1, x8
-	mul	 x4, x1, x10
-	adcs	x5, x5, xzr
-	cmn	 x4, x16
-	mul	 x16, x1, x9
-	umulh	x4, x1, x8
-	adcs	x16, x16, x17
-	umulh	x17, x1, x9
-	umulh	x1, x1, x10
-	adcs	x18, x3, x18
-	adcs	x3, x5, xzr
-	adds	 x16, x16, x1
-	adcs	x17, x18, x17
-	adcs	x18, x3, x4
-	adds	 x15, x2, x15
-	adcs	x13, x14, x13
-	adcs	x14, x21, xzr
-	adds	 x12, x12, x16
-	adcs	x15, x15, x17
-	mul	 x11, x12, x11
-	adcs	x13, x13, x18
-	mul	 x16, x11, x8
-	mul	 x17, x11, x9
-	mul	 x18, x11, x10
-	umulh	x1, x11, x8
-	umulh	x2, x11, x9
-	umulh	x11, x11, x10
-	adcs	x14, x14, xzr
-	cmn	 x18, x12
-	adcs	x12, x17, x15
-	adcs	x13, x16, x13
-	adcs	x14, x14, xzr
-	adds	 x11, x12, x11
-	adcs	x12, x13, x2
-	adcs	x13, x14, x1
-	subs	 x10, x11, x10
-	sbcs	x9, x12, x9
-	sbcs	x8, x13, x8
-	asr	x14, x8, #63
-	cmp	 x14, #0                // =0
-	csel	x10, x11, x10, lt
-	csel	x9, x12, x9, lt
-	csel	x8, x13, x8, lt
-	stp	 x10, x9, [x0]
-	str	x8, [x0, #16]
-	ldp	x20, x19, [sp, #16]
-	ldp	x22, x21, [sp], #32
+	.size	mcl_fpDbl_sub4L, .Lfunc_end38-mcl_fpDbl_sub4L
+
+	.globl	mulPv384x64
+	.p2align	2
+	.type	mulPv384x64,@function
+mulPv384x64:                            // @mulPv384x64
+// BB#0:
+	ldp	x8, x9, [x0, #32]
+	ldp		x12, x13, [x0]
+	ldp	x10, x11, [x0, #16]
+	umulh	x15, x8, x1
+	mul		x16, x8, x1
+	umulh	x0, x13, x1
+	mul		x8, x13, x1
+	umulh	x13, x12, x1
+	umulh	x18, x10, x1
+	mul		x10, x10, x1
+	adds		x8, x13, x8
+	umulh	x17, x11, x1
+	mul		x11, x11, x1
+	adcs	x2, x0, x10
+	adcs	x3, x18, x11
+	umulh	x14, x9, x1
+	mul		x9, x9, x1
+	adcs	x4, x17, x16
+	adcs	x5, x15, x9
+	adcs	x6, x14, xzr
+	mul		x0, x12, x1
+	mov	 x1, x8
 	ret
 .Lfunc_end39:
-	.size	mcl_fp_montNF3L, .Lfunc_end39-mcl_fp_montNF3L
+	.size	mulPv384x64, .Lfunc_end39-mulPv384x64
 
-	.globl	mcl_fp_montRed3L
-	.align	2
-	.type	mcl_fp_montRed3L,@function
-mcl_fp_montRed3L:                       // @mcl_fp_montRed3L
+	.globl	mcl_fp_mulUnitPre6L
+	.p2align	2
+	.type	mcl_fp_mulUnitPre6L,@function
+mcl_fp_mulUnitPre6L:                    // @mcl_fp_mulUnitPre6L
 // BB#0:
-	ldur	x8, [x2, #-8]
-	ldp	 x9, x17, [x1]
-	ldp	x12, x10, [x2, #8]
-	ldr	 x11, [x2]
-	ldp	x13, x14, [x1, #32]
-	ldp	x15, x16, [x1, #16]
-	mul	 x18, x9, x8
-	umulh	x1, x18, x10
-	mul	 x2, x18, x10
-	umulh	x3, x18, x12
-	mul	 x4, x18, x12
-	umulh	x5, x18, x11
-	mul	 x18, x18, x11
-	adds	 x4, x5, x4
-	adcs	x2, x3, x2
-	adcs	x1, x1, xzr
-	cmn	 x9, x18
-	adcs	x9, x17, x4
-	adcs	x15, x15, x2
-	mul	 x17, x9, x8
-	adcs	x16, x16, x1
-	umulh	x18, x17, x10
-	mul	 x1, x17, x10
-	umulh	x2, x17, x12
-	mul	 x3, x17, x12
-	umulh	x4, x17, x11
-	mul	 x17, x17, x11
-	adcs	x13, x13, xzr
-	adcs	x14, x14, xzr
-	adcs	x5, xzr, xzr
-	adds	 x3, x4, x3
-	adcs	x1, x2, x1
-	adcs	x18, x18, xzr
-	cmn	 x17, x9
-	adcs	x9, x3, x15
-	adcs	x15, x1, x16
-	mul	 x8, x9, x8
-	adcs	x13, x18, x13
-	umulh	x16, x8, x10
-	mul	 x17, x8, x10
-	umulh	x18, x8, x12
-	mul	 x1, x8, x12
-	umulh	x2, x8, x11
-	mul	 x8, x8, x11
-	adcs	x14, x14, xzr
-	adcs	x3, x5, xzr
-	adds	 x1, x2, x1
-	adcs	x17, x18, x17
-	adcs	x16, x16, xzr
-	cmn	 x8, x9
-	adcs	x8, x1, x15
-	adcs	x9, x17, x13
-	adcs	x13, x16, x14
-	adcs	x14, x3, xzr
-	subs	 x11, x8, x11
-	sbcs	x12, x9, x12
-	sbcs	x10, x13, x10
-	sbcs	x14, x14, xzr
-	tst	 x14, #0x1
-	csel	x8, x8, x11, ne
-	csel	x9, x9, x12, ne
-	csel	x10, x13, x10, ne
-	stp	 x8, x9, [x0]
-	str	x10, [x0, #16]
+	ldp		x10, x11, [x1]
+	ldp	x12, x13, [x1, #16]
+	ldp	x8, x9, [x1, #32]
+	mul		x14, x10, x2
+	mul		x15, x11, x2
+	umulh	x10, x10, x2
+	mul		x16, x12, x2
+	umulh	x11, x11, x2
+	adds		x10, x10, x15
+	mul		x17, x13, x2
+	umulh	x12, x12, x2
+	stp		x14, x10, [x0]
+	adcs	x10, x11, x16
+	mul		x18, x8, x2
+	umulh	x13, x13, x2
+	adcs	x11, x12, x17
+	mul		x1, x9, x2
+	umulh	x8, x8, x2
+	stp	x10, x11, [x0, #16]
+	adcs	x10, x13, x18
+	umulh	x9, x9, x2
+	adcs	x8, x8, x1
+	stp	x10, x8, [x0, #32]
+	adcs	x8, x9, xzr
+	str	x8, [x0, #48]
 	ret
 .Lfunc_end40:
-	.size	mcl_fp_montRed3L, .Lfunc_end40-mcl_fp_montRed3L
+	.size	mcl_fp_mulUnitPre6L, .Lfunc_end40-mcl_fp_mulUnitPre6L
 
-	.globl	mcl_fp_addPre3L
-	.align	2
-	.type	mcl_fp_addPre3L,@function
-mcl_fp_addPre3L:                        // @mcl_fp_addPre3L
+	.globl	mcl_fpDbl_mulPre6L
+	.p2align	2
+	.type	mcl_fpDbl_mulPre6L,@function
+mcl_fpDbl_mulPre6L:                     // @mcl_fpDbl_mulPre6L
 // BB#0:
-	ldp	x11, x8, [x2, #8]
-	ldp	 x9, x12, [x1]
-	ldr	 x10, [x2]
-	ldr	x13, [x1, #16]
-	adds	 x9, x10, x9
-	str	 x9, [x0]
-	adcs	x9, x11, x12
-	str	x9, [x0, #8]
-	adcs	x9, x8, x13
+	sub	sp, sp, #496            // =496
+	ldp		x8, x18, [x1]
+	ldp		x14, x15, [x2]
+	ldp	x12, x13, [x1, #32]
+	ldp	x10, x17, [x1, #16]
+	ldp		x16, x11, [x1]
+	mul		x3, x8, x14
+	str	x3, [sp, #392]          // 8-byte Folded Spill
+	umulh	x3, x13, x14
+	str	x3, [sp, #384]          // 8-byte Folded Spill
+	mul		x3, x13, x14
+	str	x3, [sp, #376]          // 8-byte Folded Spill
+	umulh	x3, x12, x14
+	str	x3, [sp, #360]          // 8-byte Folded Spill
+	mul		x3, x12, x14
+	str	x3, [sp, #336]          // 8-byte Folded Spill
+	umulh	x3, x17, x14
+	str	x3, [sp, #328]          // 8-byte Folded Spill
+	mul		x3, x17, x14
+	str	x3, [sp, #304]          // 8-byte Folded Spill
+	umulh	x3, x10, x14
+	str	x3, [sp, #280]          // 8-byte Folded Spill
+	mul		x3, x10, x14
+	str	x3, [sp, #248]          // 8-byte Folded Spill
+	umulh	x3, x18, x14
+	str	x3, [sp, #240]          // 8-byte Folded Spill
+	mul		x3, x18, x14
+	umulh	x14, x8, x14
+	stp	x14, x3, [sp, #208]     // 8-byte Folded Spill
+	mul		x14, x8, x15
+	str	x14, [sp, #272]         // 8-byte Folded Spill
+	mul		x14, x13, x15
+	str	x14, [sp, #352]         // 8-byte Folded Spill
+	mul		x14, x12, x15
+	str	x14, [sp, #320]         // 8-byte Folded Spill
+	mul		x14, x17, x15
+	str	x14, [sp, #296]         // 8-byte Folded Spill
+	mul		x14, x10, x15
+	umulh	x12, x12, x15
+	str	x14, [sp, #264]         // 8-byte Folded Spill
+	mul		x14, x18, x15
+	str	x12, [sp, #344]         // 8-byte Folded Spill
+	umulh	x12, x17, x15
+	umulh	x10, x10, x15
+	umulh	x8, x8, x15
+	str	x12, [sp, #312]         // 8-byte Folded Spill
+	str	x10, [sp, #288]         // 8-byte Folded Spill
+	umulh	x10, x18, x15
+	stp	x8, x14, [sp, #224]     // 8-byte Folded Spill
+	ldp	x12, x8, [x2, #16]
+	umulh	x13, x13, x15
+	str	x10, [sp, #256]         // 8-byte Folded Spill
+	ldp	x10, x15, [x1, #24]
+	ldr	x9, [x1, #16]
+	mul		x14, x16, x12
+	str	x13, [sp, #368]         // 8-byte Folded Spill
+	ldr	x13, [x1, #40]
+	str	x14, [sp, #144]         // 8-byte Folded Spill
+	mul		x14, x15, x12
+	str	x14, [sp, #176]         // 8-byte Folded Spill
+	mul		x14, x10, x12
+	str	x14, [sp, #160]         // 8-byte Folded Spill
+	mul		x14, x9, x12
+	str	x14, [sp, #136]         // 8-byte Folded Spill
+	mul		x14, x11, x12
+	str	x14, [sp, #112]         // 8-byte Folded Spill
+	umulh	x14, x13, x12
+	str	x14, [sp, #200]         // 8-byte Folded Spill
+	umulh	x14, x15, x12
+	str	x14, [sp, #184]         // 8-byte Folded Spill
+	umulh	x14, x10, x12
+	str	x14, [sp, #168]         // 8-byte Folded Spill
+	umulh	x14, x9, x12
+	stp	x29, x30, [sp, #480]    // 8-byte Folded Spill
+	mul		x29, x13, x12
+	str	x14, [sp, #152]         // 8-byte Folded Spill
+	umulh	x14, x11, x12
+	umulh	x12, x16, x12
+	str	x12, [sp, #104]         // 8-byte Folded Spill
+	mul		x12, x13, x8
+	str	x12, [sp, #192]         // 8-byte Folded Spill
+	umulh	x12, x13, x8
+	stp	x12, x14, [sp, #120]    // 8-byte Folded Spill
+	mul		x12, x15, x8
+	str	x12, [sp, #80]          // 8-byte Folded Spill
+	umulh	x12, x15, x8
+	str	x12, [sp, #96]          // 8-byte Folded Spill
+	mul		x12, x10, x8
+	umulh	x10, x10, x8
+	str	x10, [sp, #88]          // 8-byte Folded Spill
+	mul		x10, x9, x8
+	umulh	x9, x9, x8
+	stp	x12, x9, [sp, #64]      // 8-byte Folded Spill
+	mul		x9, x11, x8
+	str	x9, [sp, #32]           // 8-byte Folded Spill
+	umulh	x9, x11, x8
+	stp	x10, x9, [sp, #48]      // 8-byte Folded Spill
+	mul		x9, x16, x8
+	umulh	x8, x16, x8
+	str	x9, [sp, #24]           // 8-byte Folded Spill
+	str	x8, [sp, #40]           // 8-byte Folded Spill
+	ldp		x16, x9, [x1]
+	ldp	x10, x11, [x1, #16]
+	ldp	x8, x2, [x2, #32]
+	ldp	x1, x12, [x1, #32]
+	stp	x28, x27, [sp, #400]    // 8-byte Folded Spill
+	stp	x26, x25, [sp, #416]    // 8-byte Folded Spill
+	stp	x24, x23, [sp, #432]    // 8-byte Folded Spill
+	umulh	x13, x12, x8
+	stp	x22, x21, [sp, #448]    // 8-byte Folded Spill
+	stp	x20, x19, [sp, #464]    // 8-byte Folded Spill
+	mul		x23, x16, x8
+	mul		x30, x12, x8
+	mul		x27, x1, x8
+	mul		x24, x11, x8
+	mul		x22, x10, x8
+	mul		x19, x9, x8
+	str	x13, [sp, #8]           // 8-byte Folded Spill
+	umulh	x13, x1, x8
+	umulh	x28, x11, x8
+	umulh	x25, x10, x8
+	umulh	x21, x9, x8
+	umulh	x20, x16, x8
+	mul		x5, x9, x2
+	umulh	x6, x9, x2
+	ldp	x9, x8, [sp, #208]      // 8-byte Folded Reload
+	mul		x26, x16, x2
+	umulh	x7, x16, x2
+	mul		x3, x10, x2
+	umulh	x4, x10, x2
+	mul		x17, x11, x2
+	umulh	x18, x11, x2
+	mul		x15, x1, x2
+	umulh	x1, x1, x2
+	mul		x14, x12, x2
+	umulh	x16, x12, x2
+	adds		x2, x9, x8
+	ldp	x9, x8, [sp, #240]      // 8-byte Folded Reload
+	str	x13, [sp, #16]          // 8-byte Folded Spill
+	ldp	x13, x10, [sp, #272]    // 8-byte Folded Reload
+	ldr	x12, [sp, #360]         // 8-byte Folded Reload
+	adcs	x8, x9, x8
+	ldr	x9, [sp, #304]          // 8-byte Folded Reload
+	adcs	x9, x10, x9
+	ldp	x11, x10, [sp, #328]    // 8-byte Folded Reload
+	adcs	x10, x11, x10
+	ldr	x11, [sp, #376]         // 8-byte Folded Reload
+	adcs	x11, x12, x11
+	ldr	x12, [sp, #384]         // 8-byte Folded Reload
+	adcs	x12, x12, xzr
+	adds		x2, x13, x2
+	ldr	x13, [sp, #392]         // 8-byte Folded Reload
+	stp		x13, x2, [x0]
+	ldr	x13, [sp, #232]         // 8-byte Folded Reload
+	adcs	x8, x13, x8
+	ldr	x13, [sp, #264]         // 8-byte Folded Reload
+	adcs	x9, x13, x9
+	ldr	x13, [sp, #296]         // 8-byte Folded Reload
+	adcs	x10, x13, x10
+	ldr	x13, [sp, #320]         // 8-byte Folded Reload
+	adcs	x11, x13, x11
+	ldr	x13, [sp, #352]         // 8-byte Folded Reload
+	adcs	x12, x13, x12
+	ldr	x13, [sp, #224]         // 8-byte Folded Reload
+	adcs	x2, xzr, xzr
+	adds		x8, x8, x13
+	ldr	x13, [sp, #256]         // 8-byte Folded Reload
+	adcs	x9, x9, x13
+	ldr	x13, [sp, #288]         // 8-byte Folded Reload
+	adcs	x10, x10, x13
+	ldr	x13, [sp, #312]         // 8-byte Folded Reload
+	adcs	x11, x11, x13
+	ldr	x13, [sp, #344]         // 8-byte Folded Reload
+	adcs	x12, x12, x13
+	ldr	x13, [sp, #368]         // 8-byte Folded Reload
+	adcs	x2, x2, x13
+	ldr	x13, [sp, #144]         // 8-byte Folded Reload
+	adds		x13, x13, x8
+	ldr	x8, [sp, #112]          // 8-byte Folded Reload
+	adcs	x9, x8, x9
+	ldr	x8, [sp, #136]          // 8-byte Folded Reload
+	adcs	x10, x8, x10
+	ldr	x8, [sp, #160]          // 8-byte Folded Reload
+	adcs	x11, x8, x11
+	ldr	x8, [sp, #176]          // 8-byte Folded Reload
+	adcs	x12, x8, x12
+	adcs	x2, x29, x2
+	ldr	x29, [sp, #104]         // 8-byte Folded Reload
 	adcs	x8, xzr, xzr
-	str	x9, [x0, #16]
-	mov	 x0, x8
-	ret
-.Lfunc_end41:
-	.size	mcl_fp_addPre3L, .Lfunc_end41-mcl_fp_addPre3L
-
-	.globl	mcl_fp_subPre3L
-	.align	2
-	.type	mcl_fp_subPre3L,@function
-mcl_fp_subPre3L:                        // @mcl_fp_subPre3L
-// BB#0:
-	ldp	x11, x8, [x2, #8]
-	ldp	 x9, x12, [x1]
-	ldr	 x10, [x2]
-	ldr	x13, [x1, #16]
-	subs	 x9, x9, x10
-	str	 x9, [x0]
-	sbcs	x9, x12, x11
-	str	x9, [x0, #8]
-	sbcs	x9, x13, x8
-	ngcs	 x8, xzr
-	and	x8, x8, #0x1
-	str	x9, [x0, #16]
-	mov	 x0, x8
-	ret
-.Lfunc_end42:
-	.size	mcl_fp_subPre3L, .Lfunc_end42-mcl_fp_subPre3L
-
-	.globl	mcl_fp_shr1_3L
-	.align	2
-	.type	mcl_fp_shr1_3L,@function
-mcl_fp_shr1_3L:                         // @mcl_fp_shr1_3L
-// BB#0:
-	ldp	 x8, x9, [x1]
-	ldr	x10, [x1, #16]
-	extr	x8, x9, x8, #1
-	extr	x9, x10, x9, #1
-	lsr	x10, x10, #1
-	stp	 x8, x9, [x0]
-	str	x10, [x0, #16]
-	ret
-.Lfunc_end43:
-	.size	mcl_fp_shr1_3L, .Lfunc_end43-mcl_fp_shr1_3L
-
-	.globl	mcl_fp_add3L
-	.align	2
-	.type	mcl_fp_add3L,@function
-mcl_fp_add3L:                           // @mcl_fp_add3L
-// BB#0:
-	ldp	x11, x8, [x2, #8]
-	ldp	 x9, x12, [x1]
-	ldr	 x10, [x2]
-	ldr	x13, [x1, #16]
-	adds	 x9, x10, x9
+	adds		x9, x9, x29
+	ldr	x29, [sp, #128]         // 8-byte Folded Reload
+	adcs	x10, x10, x29
+	ldr	x29, [sp, #152]         // 8-byte Folded Reload
+	adcs	x11, x11, x29
+	ldr	x29, [sp, #168]         // 8-byte Folded Reload
+	adcs	x12, x12, x29
+	ldr	x29, [sp, #184]         // 8-byte Folded Reload
+	adcs	x2, x2, x29
+	ldr	x29, [sp, #200]         // 8-byte Folded Reload
+	adcs	x8, x8, x29
+	ldr	x29, [sp, #24]          // 8-byte Folded Reload
+	adds		x9, x29, x9
+	stp	x13, x9, [x0, #16]
+	ldr	x9, [sp, #32]           // 8-byte Folded Reload
+	ldr	x13, [sp, #192]         // 8-byte Folded Reload
+	adcs	x9, x9, x10
+	ldr	x10, [sp, #48]          // 8-byte Folded Reload
+	adcs	x10, x10, x11
+	ldr	x11, [sp, #64]          // 8-byte Folded Reload
 	adcs	x11, x11, x12
-	ldr	 x10, [x3]
-	ldp	x12, x14, [x3, #8]
-	stp	 x9, x11, [x0]
-	adcs	x8, x8, x13
-	str	x8, [x0, #16]
+	ldr	x12, [sp, #80]          // 8-byte Folded Reload
+	adcs	x12, x12, x2
+	ldr	x2, [sp, #40]           // 8-byte Folded Reload
+	adcs	x8, x13, x8
 	adcs	x13, xzr, xzr
-	subs	 x10, x9, x10
-	sbcs	x9, x11, x12
-	sbcs	x8, x8, x14
-	sbcs	x11, x13, xzr
-	and	w11, w11, #0x1
-	tbnz	w11, #0, .LBB44_2
-// BB#1:                                // %nocarry
-	stp	 x10, x9, [x0]
-	str	x8, [x0, #16]
-.LBB44_2:                               // %carry
-	ret
-.Lfunc_end44:
-	.size	mcl_fp_add3L, .Lfunc_end44-mcl_fp_add3L
-
-	.globl	mcl_fp_addNF3L
-	.align	2
-	.type	mcl_fp_addNF3L,@function
-mcl_fp_addNF3L:                         // @mcl_fp_addNF3L
-// BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	 x10, x11, [x2]
-	ldr	x12, [x1, #16]
-	ldr	x13, [x2, #16]
-	adds	 x8, x10, x8
-	adcs	x9, x11, x9
-	ldp	 x10, x11, [x3]
-	ldr	x14, [x3, #16]
-	adcs	x12, x13, x12
-	subs	 x10, x8, x10
-	sbcs	x11, x9, x11
-	sbcs	x13, x12, x14
-	asr	x14, x13, #63
-	cmp	 x14, #0                // =0
-	csel	x8, x8, x10, lt
-	csel	x9, x9, x11, lt
-	csel	x10, x12, x13, lt
-	stp	 x8, x9, [x0]
-	str	x10, [x0, #16]
-	ret
-.Lfunc_end45:
-	.size	mcl_fp_addNF3L, .Lfunc_end45-mcl_fp_addNF3L
-
-	.globl	mcl_fp_sub3L
-	.align	2
-	.type	mcl_fp_sub3L,@function
-mcl_fp_sub3L:                           // @mcl_fp_sub3L
-// BB#0:
-	ldp	x11, x10, [x2, #8]
-	ldp	 x8, x12, [x1]
-	ldr	 x9, [x2]
-	ldr	x13, [x1, #16]
-	subs	 x8, x8, x9
-	sbcs	x9, x12, x11
-	stp	 x8, x9, [x0]
-	sbcs	x10, x13, x10
-	str	x10, [x0, #16]
-	ngcs	 x11, xzr
-	and	w11, w11, #0x1
-	tbnz	w11, #0, .LBB46_2
-// BB#1:                                // %nocarry
-	ret
-.LBB46_2:                               // %carry
-	ldp	x13, x11, [x3, #8]
-	ldr	 x12, [x3]
-	adds	 x8, x12, x8
-	adcs	x9, x13, x9
-	adcs	x10, x11, x10
-	stp	 x8, x9, [x0]
-	str	x10, [x0, #16]
-	ret
-.Lfunc_end46:
-	.size	mcl_fp_sub3L, .Lfunc_end46-mcl_fp_sub3L
-
-	.globl	mcl_fp_subNF3L
-	.align	2
-	.type	mcl_fp_subNF3L,@function
-mcl_fp_subNF3L:                         // @mcl_fp_subNF3L
-// BB#0:
-	ldp	 x8, x9, [x2]
-	ldp	 x10, x11, [x1]
-	ldr	x12, [x2, #16]
-	ldr	x13, [x1, #16]
-	subs	 x8, x10, x8
-	sbcs	x9, x11, x9
-	ldp	 x10, x11, [x3]
-	ldr	x14, [x3, #16]
-	sbcs	x12, x13, x12
-	asr	x13, x12, #63
-	and	 x11, x13, x11
-	and	 x14, x13, x14
-	extr	x13, x13, x12, #63
-	and	 x10, x13, x10
-	adds	 x8, x10, x8
-	str	 x8, [x0]
-	adcs	x8, x11, x9
-	str	x8, [x0, #8]
-	adcs	x8, x14, x12
-	str	x8, [x0, #16]
-	ret
-.Lfunc_end47:
-	.size	mcl_fp_subNF3L, .Lfunc_end47-mcl_fp_subNF3L
-
-	.globl	mcl_fpDbl_add3L
-	.align	2
-	.type	mcl_fpDbl_add3L,@function
-mcl_fpDbl_add3L:                        // @mcl_fpDbl_add3L
-// BB#0:
-	ldp	x8, x9, [x2, #32]
-	ldp	x10, x11, [x1, #32]
-	ldp	x12, x13, [x2, #16]
-	ldp	 x15, x18, [x2]
-	ldp	x16, x17, [x1, #16]
-	ldp	 x14, x1, [x1]
-	adds	 x14, x15, x14
-	ldr	x15, [x3, #16]
-	str	 x14, [x0]
-	ldp	 x14, x2, [x3]
-	adcs	x18, x18, x1
-	adcs	x12, x12, x16
-	stp	x18, x12, [x0, #8]
-	adcs	x12, x13, x17
-	adcs	x8, x8, x10
-	adcs	x9, x9, x11
-	adcs	x10, xzr, xzr
-	subs	 x11, x12, x14
-	sbcs	x13, x8, x2
-	sbcs	x14, x9, x15
-	sbcs	x10, x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x12, x11, ne
-	csel	x8, x8, x13, ne
-	csel	x9, x9, x14, ne
-	stp	x10, x8, [x0, #24]
-	str	x9, [x0, #40]
-	ret
-.Lfunc_end48:
-	.size	mcl_fpDbl_add3L, .Lfunc_end48-mcl_fpDbl_add3L
-
-	.globl	mcl_fpDbl_sub3L
-	.align	2
-	.type	mcl_fpDbl_sub3L,@function
-mcl_fpDbl_sub3L:                        // @mcl_fpDbl_sub3L
-// BB#0:
-	ldp	x8, x9, [x2, #32]
-	ldp	x10, x11, [x1, #32]
-	ldp	x12, x13, [x2, #16]
-	ldp	 x14, x18, [x2]
-	ldp	x16, x17, [x1, #16]
-	ldp	 x15, x1, [x1]
-	subs	 x14, x15, x14
-	ldr	x15, [x3, #16]
-	str	 x14, [x0]
-	ldp	 x14, x2, [x3]
-	sbcs	x18, x1, x18
-	sbcs	x12, x16, x12
-	stp	x18, x12, [x0, #8]
-	sbcs	x12, x17, x13
-	sbcs	x8, x10, x8
-	sbcs	x9, x11, x9
-	ngcs	 x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x15, xzr, ne
-	csel	x11, x2, xzr, ne
-	csel	x13, x14, xzr, ne
-	adds	 x12, x13, x12
-	adcs	x8, x11, x8
-	stp	x12, x8, [x0, #24]
-	adcs	x8, x10, x9
-	str	x8, [x0, #40]
-	ret
-.Lfunc_end49:
-	.size	mcl_fpDbl_sub3L, .Lfunc_end49-mcl_fpDbl_sub3L
-
-	.globl	mcl_fp_mulUnitPre4L
-	.align	2
-	.type	mcl_fp_mulUnitPre4L,@function
-mcl_fp_mulUnitPre4L:                    // @mcl_fp_mulUnitPre4L
-// BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	x10, x11, [x1, #16]
-	mul	 x12, x8, x2
-	mul	 x13, x9, x2
-	umulh	x8, x8, x2
-	mul	 x14, x10, x2
-	umulh	x9, x9, x2
-	mul	 x15, x11, x2
-	umulh	x10, x10, x2
-	umulh	x11, x11, x2
-	adds	 x8, x8, x13
-	stp	 x12, x8, [x0]
-	adcs	x8, x9, x14
-	str	x8, [x0, #16]
-	adcs	x8, x10, x15
-	str	x8, [x0, #24]
-	adcs	x8, x11, xzr
-	str	x8, [x0, #32]
+	adds		x9, x9, x2
+	ldr	x2, [sp, #56]           // 8-byte Folded Reload
+	adcs	x10, x10, x2
+	ldr	x2, [sp, #72]           // 8-byte Folded Reload
+	adcs	x11, x11, x2
+	ldr	x2, [sp, #88]           // 8-byte Folded Reload
+	adcs	x12, x12, x2
+	ldr	x2, [sp, #96]           // 8-byte Folded Reload
+	adcs	x8, x8, x2
+	ldr	x2, [sp, #120]          // 8-byte Folded Reload
+	adcs	x13, x13, x2
+	adds		x9, x23, x9
+	adcs	x10, x19, x10
+	adcs	x11, x22, x11
+	adcs	x12, x24, x12
+	adcs	x8, x27, x8
+	adcs	x13, x30, x13
+	adcs	x2, xzr, xzr
+	adds		x10, x10, x20
+	ldr	x19, [sp, #16]          // 8-byte Folded Reload
+	adcs	x11, x11, x21
+	adcs	x12, x12, x25
+	adcs	x8, x8, x28
+	adcs	x13, x13, x19
+	ldr	x19, [sp, #8]           // 8-byte Folded Reload
+	ldp	x29, x30, [sp, #480]    // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #448]    // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #432]    // 8-byte Folded Reload
+	adcs	x2, x2, x19
+	adds		x10, x26, x10
+	stp	x9, x10, [x0, #32]
+	adcs	x9, x5, x11
+	adcs	x10, x3, x12
+	adcs	x8, x17, x8
+	adcs	x11, x15, x13
+	adcs	x12, x14, x2
+	adcs	x13, xzr, xzr
+	adds		x9, x9, x7
+	adcs	x10, x10, x6
+	adcs	x8, x8, x4
+	stp	x9, x10, [x0, #48]
+	adcs	x9, x11, x18
+	ldp	x20, x19, [sp, #464]    // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #416]    // 8-byte Folded Reload
+	ldp	x28, x27, [sp, #400]    // 8-byte Folded Reload
+	stp	x8, x9, [x0, #64]
+	adcs	x8, x12, x1
+	adcs	x9, x13, x16
+	stp	x8, x9, [x0, #80]
+	add	sp, sp, #496            // =496
 	ret
-.Lfunc_end50:
-	.size	mcl_fp_mulUnitPre4L, .Lfunc_end50-mcl_fp_mulUnitPre4L
+.Lfunc_end41:
+	.size	mcl_fpDbl_mulPre6L, .Lfunc_end41-mcl_fpDbl_mulPre6L
 
-	.globl	mcl_fpDbl_mulPre4L
-	.align	2
-	.type	mcl_fpDbl_mulPre4L,@function
-mcl_fpDbl_mulPre4L:                     // @mcl_fpDbl_mulPre4L
+	.globl	mcl_fpDbl_sqrPre6L
+	.p2align	2
+	.type	mcl_fpDbl_sqrPre6L,@function
+mcl_fpDbl_sqrPre6L:                     // @mcl_fpDbl_sqrPre6L
 // BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #32             // =32
-	ldp	 x8, x10, [x1]
-	ldp	 x9, x11, [x1]
-	ldp	x12, x14, [x1, #16]
-	ldp	x13, x1, [x1, #16]
-	ldp	 x15, x16, [x2]
-	ldp	x17, x18, [x2, #16]
-	mul	 x2, x8, x15
-	umulh	x3, x14, x15
-	mul	 x4, x14, x15
-	umulh	x5, x12, x15
-	mul	 x6, x12, x15
-	umulh	x7, x10, x15
-	mul	 x19, x10, x15
-	umulh	x15, x8, x15
-	mul	 x20, x8, x16
-	mul	 x21, x14, x16
-	mul	 x22, x12, x16
-	mul	 x23, x10, x16
-	umulh	x24, x14, x16
-	umulh	x25, x12, x16
-	umulh	x26, x10, x16
-	umulh	x16, x8, x16
-	mul	 x27, x8, x17
-	mul	 x28, x14, x17
-	mul	 x29, x12, x17
-	mul	 x30, x10, x17
-	umulh	x14, x14, x17
-	stp	x3, x14, [sp, #16]
-	umulh	x12, x12, x17
-	str	x12, [sp, #8]           // 8-byte Folded Spill
-	umulh	x3, x10, x17
-	umulh	x14, x8, x17
-	mul	 x17, x9, x18
-	umulh	x12, x9, x18
-	mul	 x10, x11, x18
-	umulh	x11, x11, x18
-	mul	 x9, x13, x18
-	umulh	x13, x13, x18
-	mul	 x8, x1, x18
-	umulh	x18, x1, x18
-	str	 x2, [x0]
-	adds	 x15, x15, x19
-	adcs	x1, x7, x6
-	adcs	x2, x5, x4
-	ldr	x4, [sp, #16]           // 8-byte Folded Reload
-	adcs	x4, x4, xzr
-	adds	 x15, x20, x15
-	str	x15, [x0, #8]
-	adcs	x15, x23, x1
-	adcs	x1, x22, x2
-	adcs	x2, x21, x4
-	adcs	x4, xzr, xzr
-	adds	 x15, x15, x16
-	adcs	x16, x1, x26
-	adcs	x1, x2, x25
-	adcs	x2, x4, x24
-	adds	 x15, x15, x27
-	str	x15, [x0, #16]
-	adcs	x15, x16, x30
-	adcs	x16, x1, x29
-	adcs	x1, x2, x28
+	str	x23, [sp, #-48]!        // 8-byte Folded Spill
+	ldp		x9, x12, [x1]
+	ldp		x18, x4, [x1]
+	ldp	x11, x13, [x1, #8]
+	ldp	x10, x14, [x1, #16]
+	ldp	x8, x15, [x1, #24]
+	stp	x22, x21, [sp, #16]     // 8-byte Folded Spill
+	ldp	x16, x17, [x1, #32]
+	mul		x22, x12, x18
+	umulh	x23, x18, x18
+	stp	x20, x19, [sp, #32]     // 8-byte Folded Spill
+	mul		x20, x13, x18
+	umulh	x21, x12, x18
+	adds		x23, x23, x22
+	mul		x7, x14, x18
+	umulh	x19, x13, x18
+	adcs	x20, x21, x20
+	mul		x6, x15, x18
+	adcs	x7, x19, x7
+	umulh	x19, x14, x18
+	mul		x5, x17, x18
+	adcs	x6, x19, x6
+	umulh	x19, x15, x18
+	adcs	x5, x19, x5
+	umulh	x19, x17, x18
+	adcs	x19, x19, xzr
+	adds		x22, x22, x23
+	mul		x18, x18, x18
+	stp		x18, x22, [x0]
+	mul		x18, x12, x12
+	adcs	x18, x18, x20
+	mul		x20, x13, x12
+	adcs	x7, x20, x7
+	mul		x20, x14, x12
+	adcs	x6, x20, x6
+	mul		x20, x15, x12
+	adcs	x5, x20, x5
+	mul		x20, x17, x12
+	adcs	x19, x20, x19
+	adcs	x20, xzr, xzr
+	adds		x18, x18, x21
+	umulh	x17, x17, x12
+	umulh	x15, x15, x12
+	umulh	x14, x14, x12
+	umulh	x13, x13, x12
+	umulh	x12, x12, x12
+	ldr		x3, [x1]
+	ldp	x2, x23, [x1, #16]
+	adcs	x12, x7, x12
+	adcs	x13, x6, x13
+	adcs	x14, x5, x14
+	adcs	x15, x19, x15
+	ldp	x21, x7, [x1, #32]
+	mul		x6, x3, x2
+	adcs	x17, x20, x17
+	adds		x18, x6, x18
+	mul		x6, x4, x2
+	adcs	x12, x6, x12
+	mul		x6, x2, x2
+	mul		x20, x23, x2
+	adcs	x13, x6, x13
+	mul		x19, x21, x2
+	adcs	x14, x20, x14
+	mul		x5, x7, x2
+	adcs	x15, x19, x15
+	adcs	x17, x5, x17
+	umulh	x19, x3, x2
+	adcs	x5, xzr, xzr
+	adds		x12, x12, x19
+	umulh	x19, x4, x2
+	adcs	x13, x13, x19
+	umulh	x19, x2, x2
+	adcs	x14, x14, x19
+	umulh	x19, x23, x2
+	umulh	x6, x21, x2
+	adcs	x15, x15, x19
+	adcs	x17, x17, x6
+	umulh	x2, x7, x2
+	adcs	x2, x5, x2
+	mul		x5, x3, x23
+	adds		x12, x5, x12
+	stp	x18, x12, [x0, #16]
+	mul		x12, x4, x23
+	adcs	x12, x12, x13
+	adcs	x14, x20, x14
+	mul		x18, x23, x23
+	adcs	x15, x18, x15
+	mul		x18, x21, x23
+	mul		x13, x7, x23
+	adcs	x17, x18, x17
+	adcs	x13, x13, x2
+	umulh	x3, x3, x23
 	adcs	x2, xzr, xzr
-	adds	 x14, x15, x14
-	adcs	x15, x16, x3
-	ldr	x16, [sp, #8]           // 8-byte Folded Reload
-	adcs	x16, x1, x16
-	ldr	x1, [sp, #24]           // 8-byte Folded Reload
-	adcs	x1, x2, x1
-	adds	 x14, x14, x17
-	str	x14, [x0, #24]
-	adcs	x10, x15, x10
-	adcs	x9, x16, x9
-	adcs	x8, x1, x8
-	adcs	x14, xzr, xzr
-	adds	 x10, x10, x12
-	adcs	x9, x9, x11
-	stp	x10, x9, [x0, #32]
-	adcs	x8, x8, x13
-	str	x8, [x0, #48]
-	adcs	x8, x14, x18
-	str	x8, [x0, #56]
-	add	sp, sp, #32             // =32
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
+	umulh	x4, x4, x23
+	adds		x12, x12, x3
+	adcs	x14, x14, x4
+	umulh	x5, x7, x23
+	umulh	x7, x23, x23
+	adcs	x15, x15, x19
+	umulh	x6, x21, x23
+	adcs	x17, x17, x7
+	adcs	x13, x13, x6
+	mul		x18, x9, x16
+	adcs	x2, x2, x5
+	ldr	x1, [x1, #40]
+	mul		x7, x11, x16
+	adds		x12, x18, x12
+	mul		x19, x10, x16
+	adcs	x14, x7, x14
+	mul		x4, x8, x16
+	adcs	x15, x19, x15
+	mul		x6, x16, x16
+	adcs	x17, x4, x17
+	mul		x3, x1, x16
+	adcs	x13, x6, x13
+	adcs	x2, x3, x2
+	umulh	x4, x9, x16
+	adcs	x6, xzr, xzr
+	umulh	x19, x11, x16
+	adds		x14, x14, x4
+	umulh	x7, x10, x16
+	adcs	x15, x15, x19
+	umulh	x18, x8, x16
+	adcs	x17, x17, x7
+	umulh	x5, x1, x16
+	umulh	x16, x16, x16
+	adcs	x13, x13, x18
+	adcs	x16, x2, x16
+	mul		x4, x9, x1
+	adcs	x6, x6, x5
+	mul		x18, x11, x1
+	adds		x14, x4, x14
+	mul		x7, x10, x1
+	stp	x12, x14, [x0, #32]
+	adcs	x12, x18, x15
+	mul		x19, x8, x1
+	adcs	x14, x7, x17
+	adcs	x13, x19, x13
+	mul		x2, x1, x1
+	adcs	x15, x3, x16
+	adcs	x16, x2, x6
+	umulh	x9, x9, x1
+	adcs	x17, xzr, xzr
+	umulh	x11, x11, x1
+	adds		x9, x12, x9
+	umulh	x10, x10, x1
+	adcs	x11, x14, x11
+	umulh	x8, x8, x1
+	stp	x9, x11, [x0, #48]
+	adcs	x9, x13, x10
+	adcs	x8, x15, x8
+	ldp	x20, x19, [sp, #32]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #16]     // 8-byte Folded Reload
+	umulh	x1, x1, x1
+	stp	x9, x8, [x0, #64]
+	adcs	x8, x16, x5
+	adcs	x9, x17, x1
+	stp	x8, x9, [x0, #80]
+	ldr	x23, [sp], #48          // 8-byte Folded Reload
 	ret
-.Lfunc_end51:
-	.size	mcl_fpDbl_mulPre4L, .Lfunc_end51-mcl_fpDbl_mulPre4L
+.Lfunc_end42:
+	.size	mcl_fpDbl_sqrPre6L, .Lfunc_end42-mcl_fpDbl_sqrPre6L
 
-	.globl	mcl_fpDbl_sqrPre4L
-	.align	2
-	.type	mcl_fpDbl_sqrPre4L,@function
-mcl_fpDbl_sqrPre4L:                     // @mcl_fpDbl_sqrPre4L
-// BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	 x10, x13, [x1]
-	ldp	x11, x12, [x1, #16]
-	ldr	x14, [x1, #16]
-	mul	 x15, x10, x10
-	umulh	x16, x12, x10
-	mul	 x17, x12, x10
-	umulh	x18, x14, x10
-	mul	 x2, x14, x10
-	umulh	x3, x9, x10
-	mul	 x4, x9, x10
-	umulh	x10, x10, x10
-	str	 x15, [x0]
-	adds	 x10, x10, x4
-	adcs	x15, x3, x2
-	adcs	x17, x18, x17
-	adcs	x16, x16, xzr
-	adds	 x10, x10, x4
-	mul	 x4, x12, x9
-	str	x10, [x0, #8]
-	mul	 x10, x9, x9
-	adcs	x10, x15, x10
-	mul	 x15, x14, x9
-	adcs	x17, x17, x15
-	adcs	x16, x16, x4
-	adcs	x4, xzr, xzr
-	adds	 x10, x10, x3
-	umulh	x3, x9, x9
-	adcs	x17, x17, x3
-	umulh	x3, x12, x9
-	umulh	x9, x14, x9
-	adcs	x16, x16, x9
-	adcs	x3, x4, x3
-	ldr	x1, [x1, #24]
-	adds	 x10, x10, x2
-	mul	 x2, x12, x14
-	str	x10, [x0, #16]
-	mul	 x10, x14, x14
-	umulh	x12, x12, x14
-	umulh	x14, x14, x14
-	adcs	x15, x17, x15
-	mul	 x17, x8, x1
-	adcs	x10, x16, x10
-	mul	 x16, x11, x1
-	adcs	x2, x3, x2
-	adcs	x3, xzr, xzr
-	adds	 x15, x15, x18
-	mul	 x18, x13, x1
-	adcs	x9, x10, x9
-	mul	 x10, x1, x1
-	umulh	x8, x8, x1
-	umulh	x13, x13, x1
-	umulh	x11, x11, x1
-	umulh	x1, x1, x1
-	adcs	x14, x2, x14
-	adcs	x12, x3, x12
-	adds	 x15, x15, x17
-	adcs	x9, x9, x18
-	adcs	x14, x14, x16
-	adcs	x10, x12, x10
-	adcs	x12, xzr, xzr
-	adds	 x8, x9, x8
-	stp	x15, x8, [x0, #24]
-	adcs	x8, x14, x13
-	str	x8, [x0, #40]
-	adcs	x8, x10, x11
-	str	x8, [x0, #48]
-	adcs	x8, x12, x1
-	str	x8, [x0, #56]
-	ret
-.Lfunc_end52:
-	.size	mcl_fpDbl_sqrPre4L, .Lfunc_end52-mcl_fpDbl_sqrPre4L
-
-	.globl	mcl_fp_mont4L
-	.align	2
-	.type	mcl_fp_mont4L,@function
-mcl_fp_mont4L:                          // @mcl_fp_mont4L
+	.globl	mcl_fp_mont6L
+	.p2align	2
+	.type	mcl_fp_mont6L,@function
+mcl_fp_mont6L:                          // @mcl_fp_mont6L
 // BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #16             // =16
-	str	x0, [sp, #8]            // 8-byte Folded Spill
-	ldp	x13, x16, [x1, #16]
-	ldp	 x14, x15, [x1]
-	ldur	x0, [x3, #-8]
-	ldp	x9, x8, [x3, #16]
-	ldp	 x11, x10, [x3]
-	ldp	 x17, x18, [x2]
-	ldp	x1, x2, [x2, #16]
-	umulh	x3, x16, x17
-	mul	 x4, x16, x17
-	umulh	x5, x13, x17
-	mul	 x6, x13, x17
-	umulh	x7, x15, x17
-	mul	 x19, x15, x17
-	umulh	x20, x14, x17
-	mul	 x17, x14, x17
-	umulh	x21, x18, x16
-	mul	 x22, x18, x16
-	umulh	x23, x18, x13
-	mul	 x24, x18, x13
-	umulh	x25, x18, x15
-	mul	 x26, x18, x15
-	umulh	x27, x18, x14
-	mul	 x18, x18, x14
-	umulh	x28, x1, x16
-	adds	 x19, x20, x19
-	mul	 x20, x17, x0
-	adcs	x6, x7, x6
-	mul	 x7, x20, x8
-	mul	 x29, x20, x9
-	mul	 x30, x20, x10
-	adcs	x4, x5, x4
-	umulh	x5, x20, x11
-	adcs	x3, x3, xzr
-	adds	 x5, x5, x30
-	umulh	x30, x20, x10
-	adcs	x29, x30, x29
-	umulh	x30, x20, x9
-	adcs	x7, x30, x7
-	umulh	x30, x20, x8
-	mul	 x20, x20, x11
-	adcs	x30, x30, xzr
-	cmn	 x20, x17
-	mul	 x17, x1, x16
-	umulh	x20, x1, x13
-	adcs	x5, x5, x19
-	mul	 x19, x1, x13
-	adcs	x6, x29, x6
-	umulh	x29, x1, x15
-	adcs	x4, x7, x4
-	mul	 x7, x1, x15
-	adcs	x3, x30, x3
-	adcs	x30, xzr, xzr
-	adds	 x26, x27, x26
-	umulh	x27, x1, x14
-	mul	 x1, x1, x14
+	sub	sp, sp, #144            // =144
+	stp	x28, x27, [sp, #48]     // 8-byte Folded Spill
+	stp	x26, x25, [sp, #64]     // 8-byte Folded Spill
+	stp	x24, x23, [sp, #80]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #96]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #112]    // 8-byte Folded Spill
+	stp	x29, x30, [sp, #128]    // 8-byte Folded Spill
+	str	x0, [sp, #24]           // 8-byte Folded Spill
+	ldr		x5, [x2]
+	str	x2, [sp, #32]           // 8-byte Folded Spill
+	ldp	x0, x4, [x1, #32]
+	ldp	x16, x18, [x1, #16]
+	ldp		x10, x1, [x1]
+	ldp	x8, x14, [x3, #-8]
+	ldr	x15, [x3, #8]
+	mul		x24, x16, x5
+	mul		x26, x1, x5
+	umulh	x27, x10, x5
+	umulh	x25, x1, x5
+	adds		x26, x27, x26
+	mul		x22, x18, x5
+	umulh	x23, x16, x5
 	adcs	x24, x25, x24
-	umulh	x25, x2, x16
-	mul	 x16, x2, x16
+	ldp	x13, x17, [x3, #16]
+	mul		x20, x0, x5
+	umulh	x21, x18, x5
 	adcs	x22, x23, x22
-	adcs	x21, x21, xzr
-	adds	 x18, x5, x18
-	adcs	x5, x6, x26
-	mul	 x6, x18, x0
-	adcs	x4, x4, x24
-	mul	 x23, x6, x8
-	mul	 x24, x6, x9
-	mul	 x26, x6, x10
-	adcs	x3, x3, x22
-	umulh	x22, x6, x11
-	adcs	x21, x30, x21
-	adcs	x30, xzr, xzr
-	adds	 x22, x22, x26
-	umulh	x26, x6, x10
-	adcs	x24, x26, x24
-	umulh	x26, x6, x9
-	adcs	x23, x26, x23
-	umulh	x26, x6, x8
-	mul	 x6, x6, x11
-	adcs	x26, x26, xzr
-	cmn	 x6, x18
-	umulh	x18, x2, x13
-	mul	 x13, x2, x13
-	umulh	x6, x2, x15
-	mul	 x15, x2, x15
-	umulh	x12, x2, x14
-	mul	 x14, x2, x14
-	adcs	x2, x22, x5
-	adcs	x4, x24, x4
-	adcs	x3, x23, x3
-	adcs	x5, x26, x21
-	adcs	x21, x30, xzr
-	adds	 x7, x27, x7
-	adcs	x19, x29, x19
-	adcs	x17, x20, x17
-	adcs	x20, x28, xzr
-	adds	 x1, x2, x1
-	adcs	x2, x4, x7
-	mul	 x4, x1, x0
-	adcs	x3, x3, x19
-	mul	 x7, x4, x8
-	mul	 x19, x4, x9
-	mul	 x22, x4, x10
-	adcs	x17, x5, x17
-	umulh	x5, x4, x11
+	umulh	x6, x4, x5
+	mul		x7, x4, x5
+	umulh	x19, x0, x5
+	mul		x5, x10, x5
 	adcs	x20, x21, x20
-	adcs	x21, xzr, xzr
-	adds	 x5, x5, x22
-	umulh	x22, x4, x10
-	adcs	x19, x22, x19
-	umulh	x22, x4, x9
-	adcs	x7, x22, x7
-	umulh	x22, x4, x8
-	mul	 x4, x4, x11
-	adcs	x22, x22, xzr
-	cmn	 x4, x1
-	adcs	x1, x5, x2
-	adcs	x2, x19, x3
-	adcs	x17, x7, x17
-	adcs	x3, x22, x20
-	adcs	x4, x21, xzr
-	adds	 x12, x12, x15
-	adcs	x13, x6, x13
-	adcs	x15, x18, x16
-	adcs	x16, x25, xzr
-	adds	 x14, x1, x14
-	adcs	x12, x2, x12
-	mul	 x18, x14, x0
-	adcs	x13, x17, x13
-	umulh	x17, x18, x8
-	mul	 x0, x18, x8
-	umulh	x1, x18, x9
-	mul	 x2, x18, x9
-	umulh	x5, x18, x10
-	mul	 x6, x18, x10
-	umulh	x7, x18, x11
-	mul	 x18, x18, x11
-	adcs	x15, x3, x15
-	adcs	x16, x4, x16
-	adcs	x3, xzr, xzr
-	adds	 x4, x7, x6
-	adcs	x2, x5, x2
-	adcs	x0, x1, x0
-	adcs	x17, x17, xzr
-	cmn	 x18, x14
-	adcs	x12, x4, x12
-	adcs	x13, x2, x13
-	adcs	x14, x0, x15
-	adcs	x15, x17, x16
-	adcs	x16, x3, xzr
-	subs	 x11, x12, x11
-	sbcs	x10, x13, x10
-	sbcs	x9, x14, x9
-	sbcs	x8, x15, x8
-	sbcs	x16, x16, xzr
-	tst	 x16, #0x1
-	csel	x11, x12, x11, ne
-	csel	x10, x13, x10, ne
-	csel	x9, x14, x9, ne
-	csel	x8, x15, x8, ne
-	ldr	x12, [sp, #8]           // 8-byte Folded Reload
-	stp	 x11, x10, [x12]
-	stp	x9, x8, [x12, #16]
-	add	sp, sp, #16             // =16
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end53:
-	.size	mcl_fp_mont4L, .Lfunc_end53-mcl_fp_mont4L
-
-	.globl	mcl_fp_montNF4L
-	.align	2
-	.type	mcl_fp_montNF4L,@function
-mcl_fp_montNF4L:                        // @mcl_fp_montNF4L
-// BB#0:
-	stp	x28, x27, [sp, #-80]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	ldp	x14, x15, [x1, #16]
-	ldp	 x13, x16, [x1]
-	ldur	x12, [x3, #-8]
-	ldp	x9, x8, [x3, #16]
-	ldp	 x11, x10, [x3]
-	ldp	 x17, x18, [x2]
-	ldp	x1, x2, [x2, #16]
-	umulh	x3, x15, x17
-	mul	 x4, x15, x17
-	umulh	x5, x14, x17
-	mul	 x6, x14, x17
-	umulh	x7, x16, x17
-	mul	 x19, x16, x17
-	umulh	x20, x13, x17
-	mul	 x17, x13, x17
-	umulh	x21, x18, x15
-	mul	 x22, x18, x15
-	umulh	x23, x18, x14
-	mul	 x24, x18, x14
-	umulh	x25, x18, x16
-	mul	 x26, x18, x16
-	umulh	x27, x18, x13
-	mul	 x18, x18, x13
-	adds	 x19, x20, x19
-	umulh	x20, x1, x15
-	adcs	x6, x7, x6
-	mul	 x7, x17, x12
-	adcs	x4, x5, x4
-	mul	 x5, x7, x11
-	adcs	x3, x3, xzr
-	cmn	 x5, x17
-	mul	 x17, x1, x15
-	mul	 x5, x7, x10
-	adcs	x5, x5, x19
-	mul	 x19, x7, x9
-	adcs	x6, x19, x6
-	mul	 x19, x7, x8
-	adcs	x4, x19, x4
-	umulh	x19, x7, x11
-	adcs	x3, x3, xzr
-	adds	 x5, x5, x19
-	umulh	x19, x7, x10
-	adcs	x6, x6, x19
-	umulh	x19, x7, x9
-	adcs	x4, x4, x19
-	umulh	x19, x1, x14
-	umulh	x7, x7, x8
-	adcs	x3, x3, x7
-	mul	 x7, x1, x14
-	adds	 x26, x27, x26
-	umulh	x27, x1, x16
-	adcs	x24, x25, x24
-	mul	 x25, x1, x16
+	ldp	x11, x12, [x3, #32]
+	mul		x27, x5, x8
+	adcs	x7, x19, x7
+	mul		x21, x27, x15
+	umulh	x19, x27, x14
+	adcs	x6, x6, xzr
+	mul		x23, x27, x13
+	adds		x19, x19, x21
+	umulh	x21, x27, x15
+	mul		x30, x27, x17
+	adcs	x21, x21, x23
+	umulh	x23, x27, x13
+	mul		x29, x27, x11
+	adcs	x23, x23, x30
+	umulh	x30, x27, x17
+	mul		x25, x27, x12
+	adcs	x29, x30, x29
+	umulh	x30, x27, x11
+	adcs	x25, x30, x25
+	umulh	x30, x27, x12
+	mul		x27, x27, x14
+	adcs	x30, x30, xzr
+	cmn		x27, x5
+	adcs	x19, x19, x26
+	adcs	x21, x21, x24
+	ldr	x3, [x2, #8]
 	adcs	x22, x23, x22
-	umulh	x23, x1, x13
-	mul	 x1, x1, x13
-	adcs	x21, x21, xzr
-	adds	 x18, x18, x5
-	umulh	x5, x2, x15
-	mul	 x15, x2, x15
-	adcs	x6, x26, x6
-	umulh	x26, x2, x14
-	mul	 x14, x2, x14
-	adcs	x4, x24, x4
-	mul	 x24, x18, x12
-	adcs	x3, x22, x3
-	mul	 x22, x24, x11
-	adcs	x21, x21, xzr
-	cmn	 x22, x18
-	umulh	x18, x2, x16
-	mul	 x16, x2, x16
-	umulh	x22, x2, x13
-	mul	 x13, x2, x13
-	mul	 x2, x24, x10
-	adcs	x2, x2, x6
-	mul	 x6, x24, x9
-	adcs	x4, x6, x4
-	mul	 x6, x24, x8
-	adcs	x3, x6, x3
-	umulh	x6, x24, x11
-	adcs	x21, x21, xzr
-	adds	 x2, x2, x6
-	umulh	x6, x24, x10
-	adcs	x4, x4, x6
-	umulh	x6, x24, x9
-	adcs	x3, x3, x6
-	umulh	x6, x24, x8
-	adcs	x6, x21, x6
-	adds	 x21, x23, x25
-	adcs	x7, x27, x7
-	adcs	x17, x19, x17
-	adcs	x19, x20, xzr
-	adds	 x1, x1, x2
-	adcs	x2, x21, x4
-	mul	 x4, x1, x12
-	adcs	x3, x7, x3
-	mul	 x7, x4, x8
-	mul	 x20, x4, x9
-	adcs	x17, x17, x6
-	mul	 x6, x4, x11
-	adcs	x19, x19, xzr
-	cmn	 x6, x1
-	mul	 x1, x4, x10
-	umulh	x6, x4, x8
-	adcs	x1, x1, x2
-	umulh	x2, x4, x9
-	adcs	x3, x20, x3
-	umulh	x20, x4, x10
-	umulh	x4, x4, x11
-	adcs	x17, x7, x17
-	adcs	x7, x19, xzr
-	adds	 x1, x1, x4
-	adcs	x3, x3, x20
-	adcs	x17, x17, x2
-	adcs	x2, x7, x6
-	adds	 x16, x22, x16
-	adcs	x14, x18, x14
-	adcs	x15, x26, x15
-	adcs	x18, x5, xzr
-	adds	 x13, x13, x1
-	adcs	x16, x16, x3
-	mul	 x12, x13, x12
-	adcs	x14, x14, x17
-	mul	 x17, x12, x8
-	mul	 x1, x12, x9
-	mul	 x3, x12, x10
-	mul	 x4, x12, x11
-	umulh	x5, x12, x8
-	umulh	x6, x12, x9
-	umulh	x7, x12, x10
-	umulh	x12, x12, x11
-	adcs	x15, x15, x2
-	adcs	x18, x18, xzr
-	cmn	 x4, x13
-	adcs	x13, x3, x16
-	adcs	x14, x1, x14
-	adcs	x15, x17, x15
-	adcs	x16, x18, xzr
-	adds	 x12, x13, x12
-	adcs	x13, x14, x7
-	adcs	x14, x15, x6
-	adcs	x15, x16, x5
-	subs	 x11, x12, x11
-	sbcs	x10, x13, x10
-	sbcs	x9, x14, x9
-	sbcs	x8, x15, x8
-	cmp	 x8, #0                 // =0
-	csel	x11, x12, x11, lt
-	csel	x10, x13, x10, lt
-	csel	x9, x14, x9, lt
-	csel	x8, x15, x8, lt
-	stp	 x11, x10, [x0]
-	stp	x9, x8, [x0, #16]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #80
+	adcs	x20, x29, x20
+	adcs	x7, x25, x7
+	adcs	x30, x30, x6
+	mul		x29, x3, x1
+	umulh	x25, x3, x10
+	adcs	x6, xzr, xzr
+	mul		x23, x3, x16
+	adds		x25, x25, x29
+	umulh	x29, x3, x1
+	mul		x24, x3, x18
+	adcs	x23, x29, x23
+	umulh	x29, x3, x16
+	mul		x26, x3, x0
+	adcs	x24, x29, x24
+	umulh	x29, x3, x18
+	mul		x5, x3, x4
+	umulh	x27, x3, x0
+	adcs	x26, x29, x26
+	umulh	x28, x3, x4
+	adcs	x27, x27, x5
+	mul		x3, x3, x10
+	adcs	x29, x28, xzr
+	adds		x3, x19, x3
+	adcs	x5, x21, x25
+	adcs	x28, x22, x23
+	adcs	x19, x20, x24
+	adcs	x20, x7, x26
+	mov	 x2, x8
+	adcs	x30, x30, x27
+	mul		x21, x3, x2
+	adcs	x6, x6, x29
+	mul		x24, x21, x15
+	umulh	x26, x21, x14
+	adcs	x7, xzr, xzr
+	mul		x8, x21, x13
+	adds		x24, x26, x24
+	umulh	x26, x21, x15
+	mul		x9, x21, x17
+	adcs	x29, x26, x8
+	umulh	x8, x21, x13
+	mul		x25, x21, x11
+	adcs	x26, x8, x9
+	umulh	x8, x21, x17
+	mul		x23, x21, x12
+	adcs	x27, x8, x25
+	umulh	x8, x21, x11
+	umulh	x22, x21, x12
+	adcs	x8, x8, x23
+	mul		x9, x21, x14
+	ldr	x25, [sp, #32]          // 8-byte Folded Reload
+	adcs	x21, x22, xzr
+	cmn		x9, x3
+	adcs	x5, x24, x5
+	str	x2, [sp, #40]           // 8-byte Folded Spill
+	adcs	x24, x29, x28
+	ldp	x23, x3, [x25, #16]
+	adcs	x19, x26, x19
+	adcs	x20, x27, x20
+	adcs	x8, x8, x30
+	adcs	x21, x21, x6
+	mul		x28, x23, x1
+	umulh	x6, x23, x10
+	adcs	x7, x7, xzr
+	mul		x27, x23, x16
+	adds		x6, x6, x28
+	umulh	x28, x23, x1
+	mul		x26, x23, x18
+	adcs	x27, x28, x27
+	umulh	x28, x23, x16
+	mul		x25, x23, x0
+	adcs	x26, x28, x26
+	umulh	x28, x23, x18
+	mul		x22, x23, x4
+	adcs	x25, x28, x25
+	umulh	x28, x23, x0
+	umulh	x9, x23, x4
+	adcs	x22, x28, x22
+	mul		x23, x23, x10
+	adcs	x9, x9, xzr
+	adds		x23, x5, x23
+	adcs	x5, x24, x6
+	adcs	x6, x19, x27
+	adcs	x19, x20, x26
+	adcs	x20, x8, x25
+	adcs	x21, x21, x22
+	mul		x29, x23, x2
+	adcs	x22, x7, x9
+	mul		x8, x29, x15
+	umulh	x24, x29, x14
+	adcs	x7, xzr, xzr
+	mul		x26, x29, x13
+	adds		x24, x24, x8
+	umulh	x8, x29, x15
+	mul		x30, x29, x17
+	adcs	x25, x8, x26
+	umulh	x8, x29, x13
+	mul		x27, x29, x11
+	adcs	x26, x8, x30
+	umulh	x8, x29, x17
+	mul		x28, x29, x12
+	adcs	x27, x8, x27
+	umulh	x8, x29, x11
+	adcs	x28, x8, x28
+	umulh	x8, x29, x12
+	mul		x9, x29, x14
+	adcs	x29, x8, xzr
+	cmn		x9, x23
+	adcs	x2, x24, x5
+	adcs	x6, x25, x6
+	adcs	x19, x26, x19
+	adcs	x20, x27, x20
+	adcs	x21, x28, x21
+	adcs	x22, x29, x22
+	mov	 x9, x10
+	mul		x27, x3, x1
+	umulh	x28, x3, x9
+	adcs	x7, x7, xzr
+	mul		x26, x3, x16
+	adds		x27, x28, x27
+	umulh	x28, x3, x1
+	mul		x25, x3, x18
+	adcs	x26, x28, x26
+	umulh	x28, x3, x16
+	mul		x24, x3, x0
+	adcs	x25, x28, x25
+	umulh	x28, x3, x18
+	mul		x5, x3, x4
+	adcs	x24, x28, x24
+	umulh	x28, x3, x0
+	umulh	x30, x3, x4
+	adcs	x5, x28, x5
+	mul		x3, x3, x9
+	adcs	x29, x30, xzr
+	adds		x2, x2, x3
+	adcs	x3, x6, x27
+	ldr	x10, [sp, #40]          // 8-byte Folded Reload
+	adcs	x19, x19, x26
+	adcs	x20, x20, x25
+	adcs	x21, x21, x24
+	adcs	x5, x22, x5
+	mul		x6, x2, x10
+	mov	 x30, x17
+	mov	 x17, x15
+	adcs	x29, x7, x29
+	mul		x24, x6, x17
+	umulh	x22, x6, x14
+	adcs	x7, xzr, xzr
+	mul		x25, x6, x13
+	adds		x22, x22, x24
+	umulh	x24, x6, x17
+	mul		x28, x6, x30
+	adcs	x24, x24, x25
+	umulh	x25, x6, x13
+	mul		x27, x6, x11
+	adcs	x25, x25, x28
+	umulh	x28, x6, x30
+	mul		x26, x6, x12
+	adcs	x27, x28, x27
+	umulh	x28, x6, x11
+	adcs	x26, x28, x26
+	umulh	x28, x6, x12
+	ldr	x8, [sp, #32]           // 8-byte Folded Reload
+	mul		x6, x6, x14
+	adcs	x28, x28, xzr
+	cmn		x6, x2
+	adcs	x3, x22, x3
+	adcs	x19, x24, x19
+	ldp	x23, x8, [x8, #32]
+	adcs	x20, x25, x20
+	adcs	x21, x27, x21
+	adcs	x5, x26, x5
+	adcs	x29, x28, x29
+	mul		x26, x23, x1
+	umulh	x28, x23, x9
+	adcs	x7, x7, xzr
+	mul		x27, x23, x16
+	adds		x26, x28, x26
+	umulh	x28, x23, x1
+	mul		x25, x23, x18
+	adcs	x27, x28, x27
+	umulh	x28, x23, x16
+	mul		x24, x23, x0
+	adcs	x25, x28, x25
+	umulh	x28, x23, x18
+	mul		x6, x23, x4
+	umulh	x22, x23, x0
+	adcs	x24, x28, x24
+	umulh	x2, x23, x4
+	adcs	x6, x22, x6
+	mul		x23, x23, x9
+	adcs	x2, x2, xzr
+	adds		x3, x3, x23
+	adcs	x19, x19, x26
+	adcs	x20, x20, x27
+	adcs	x21, x21, x25
+	umulh	x28, x8, x4
+	adcs	x5, x5, x24
+	str	x28, [sp, #32]          // 8-byte Folded Spill
+	mul		x28, x8, x4
+	adcs	x4, x29, x6
+	mul		x22, x3, x10
+	adcs	x2, x7, x2
+	mov	 x15, x13
+	mul		x24, x22, x17
+	umulh	x6, x22, x14
+	adcs	x7, xzr, xzr
+	mov	 x13, x30
+	mul		x25, x22, x15
+	adds		x6, x6, x24
+	umulh	x24, x22, x17
+	mul		x27, x22, x13
+	adcs	x24, x24, x25
+	umulh	x25, x22, x15
+	mul		x26, x22, x11
+	adcs	x25, x25, x27
+	umulh	x27, x22, x13
+	mul		x23, x22, x12
+	adcs	x26, x27, x26
+	umulh	x27, x22, x11
+	adcs	x23, x27, x23
+	umulh	x27, x22, x12
+	mul		x22, x22, x14
+	adcs	x27, x27, xzr
+	cmn		x22, x3
+	adcs	x6, x6, x19
+	adcs	x19, x24, x20
+	adcs	x20, x25, x21
+	adcs	x5, x26, x5
+	umulh	x3, x8, x0
+	mul		x0, x8, x0
+	umulh	x22, x8, x18
+	mul		x18, x8, x18
+	umulh	x29, x8, x16
+	mul		x16, x8, x16
+	umulh	x30, x8, x1
+	mul		x1, x8, x1
+	umulh	x10, x8, x9
+	mul		x8, x8, x9
+	adcs	x9, x23, x4
+	adcs	x2, x27, x2
+	adcs	x7, x7, xzr
+	stp	x9, x12, [sp, #8]       // 8-byte Folded Spill
+	adds		x9, x10, x1
+	adcs	x16, x30, x16
+	ldr	x10, [sp, #32]          // 8-byte Folded Reload
+	adcs	x18, x29, x18
+	adcs	x0, x22, x0
+	adcs	x1, x3, x28
+	adcs	x3, x10, xzr
+	ldr	x10, [sp, #40]          // 8-byte Folded Reload
+	adds		x8, x6, x8
+	adcs	x9, x19, x9
+	adcs	x16, x20, x16
+	mul		x4, x8, x10
+	ldr	x10, [sp, #8]           // 8-byte Folded Reload
+	adcs	x18, x5, x18
+	mul		x27, x4, x17
+	umulh	x28, x4, x14
+	adcs	x10, x10, x0
+	adcs	x0, x2, x1
+	adcs	x1, x7, x3
+	adcs	x2, xzr, xzr
+	mul		x25, x4, x15
+	umulh	x26, x4, x17
+	adds		x3, x28, x27
+	mov	 x30, x11
+	mul		x23, x4, x13
+	umulh	x24, x4, x15
+	adcs	x5, x26, x25
+	mul		x21, x4, x30
+	umulh	x22, x4, x13
+	adcs	x7, x24, x23
+	mul		x19, x4, x12
+	umulh	x20, x4, x30
+	adcs	x21, x22, x21
+	umulh	x6, x4, x12
+	adcs	x19, x20, x19
+	mul		x4, x4, x14
+	adcs	x6, x6, xzr
+	cmn		x4, x8
+	adcs	x8, x3, x9
+	adcs	x9, x5, x16
+	adcs	x16, x7, x18
+	adcs	x10, x21, x10
+	adcs	x18, x19, x0
+	adcs	x0, x6, x1
+	adcs	x1, x2, xzr
+	mov	 x29, x13
+	subs		x13, x8, x14
+	sbcs	x12, x9, x17
+	ldr	x17, [sp, #16]          // 8-byte Folded Reload
+	sbcs	x11, x16, x15
+	sbcs	x14, x10, x29
+	sbcs	x15, x18, x30
+	sbcs	x17, x0, x17
+	sbcs	x1, x1, xzr
+	tst	 x1, #0x1
+	csel	x10, x10, x14, ne
+	ldr	x14, [sp, #24]          // 8-byte Folded Reload
+	csel	x8, x8, x13, ne
+	csel	x9, x9, x12, ne
+	csel	x11, x16, x11, ne
+	csel	x12, x18, x15, ne
+	csel	x13, x0, x17, ne
+	stp		x8, x9, [x14]
+	stp	x11, x10, [x14, #16]
+	stp	x12, x13, [x14, #32]
+	ldp	x29, x30, [sp, #128]    // 8-byte Folded Reload
+	ldp	x20, x19, [sp, #112]    // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #96]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #80]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #64]     // 8-byte Folded Reload
+	ldp	x28, x27, [sp, #48]     // 8-byte Folded Reload
+	add	sp, sp, #144            // =144
 	ret
-.Lfunc_end54:
-	.size	mcl_fp_montNF4L, .Lfunc_end54-mcl_fp_montNF4L
+.Lfunc_end43:
+	.size	mcl_fp_mont6L, .Lfunc_end43-mcl_fp_mont6L
 
-	.globl	mcl_fp_montRed4L
-	.align	2
-	.type	mcl_fp_montRed4L,@function
-mcl_fp_montRed4L:                       // @mcl_fp_montRed4L
+	.globl	mcl_fp_montNF6L
+	.p2align	2
+	.type	mcl_fp_montNF6L,@function
+mcl_fp_montNF6L:                        // @mcl_fp_montNF6L
 // BB#0:
-	stp	x22, x21, [sp, #-32]!
-	stp	x20, x19, [sp, #16]
-	ldur	x12, [x2, #-8]
-	ldp	x9, x8, [x2, #16]
-	ldp	 x11, x10, [x2]
-	ldp	x14, x15, [x1, #48]
-	ldp	x16, x17, [x1, #32]
-	ldp	x18, x2, [x1, #16]
-	ldp	 x13, x1, [x1]
-	mul	 x3, x13, x12
-	umulh	x4, x3, x8
-	mul	 x5, x3, x8
-	umulh	x6, x3, x9
-	mul	 x7, x3, x9
-	umulh	x19, x3, x10
-	mul	 x20, x3, x10
-	umulh	x21, x3, x11
-	mul	 x3, x3, x11
-	adds	 x20, x21, x20
-	adcs	x7, x19, x7
-	adcs	x5, x6, x5
-	adcs	x4, x4, xzr
-	cmn	 x13, x3
-	adcs	x13, x1, x20
-	adcs	x18, x18, x7
-	mul	 x1, x13, x12
-	adcs	x2, x2, x5
-	umulh	x3, x1, x8
-	mul	 x5, x1, x8
-	umulh	x6, x1, x9
-	mul	 x7, x1, x9
-	umulh	x19, x1, x10
-	mul	 x20, x1, x10
-	umulh	x21, x1, x11
-	mul	 x1, x1, x11
-	adcs	x16, x16, x4
-	adcs	x17, x17, xzr
-	adcs	x14, x14, xzr
-	adcs	x15, x15, xzr
-	adcs	x4, xzr, xzr
-	adds	 x20, x21, x20
-	adcs	x7, x19, x7
-	adcs	x5, x6, x5
-	adcs	x3, x3, xzr
-	cmn	 x1, x13
-	adcs	x13, x20, x18
-	adcs	x18, x7, x2
-	mul	 x1, x13, x12
-	adcs	x16, x5, x16
-	umulh	x2, x1, x8
-	mul	 x5, x1, x8
-	umulh	x6, x1, x9
-	mul	 x7, x1, x9
-	umulh	x19, x1, x10
-	mul	 x20, x1, x10
-	umulh	x21, x1, x11
-	mul	 x1, x1, x11
-	adcs	x17, x3, x17
-	adcs	x14, x14, xzr
-	adcs	x15, x15, xzr
-	adcs	x3, x4, xzr
-	adds	 x4, x21, x20
-	adcs	x7, x19, x7
-	adcs	x5, x6, x5
-	adcs	x2, x2, xzr
-	cmn	 x1, x13
-	adcs	x13, x4, x18
-	adcs	x16, x7, x16
-	mul	 x12, x13, x12
-	adcs	x17, x5, x17
-	umulh	x18, x12, x8
-	mul	 x1, x12, x8
-	umulh	x4, x12, x9
-	mul	 x5, x12, x9
-	umulh	x6, x12, x10
-	mul	 x7, x12, x10
-	umulh	x19, x12, x11
-	mul	 x12, x12, x11
-	adcs	x14, x2, x14
-	adcs	x15, x15, xzr
-	adcs	x2, x3, xzr
-	adds	 x3, x19, x7
-	adcs	x5, x6, x5
-	adcs	x1, x4, x1
-	adcs	x18, x18, xzr
-	cmn	 x12, x13
-	adcs	x12, x3, x16
-	adcs	x13, x5, x17
-	adcs	x14, x1, x14
-	adcs	x15, x18, x15
-	adcs	x16, x2, xzr
-	subs	 x11, x12, x11
-	sbcs	x10, x13, x10
-	sbcs	x9, x14, x9
-	sbcs	x8, x15, x8
-	sbcs	x16, x16, xzr
-	tst	 x16, #0x1
-	csel	x11, x12, x11, ne
-	csel	x10, x13, x10, ne
-	csel	x9, x14, x9, ne
-	csel	x8, x15, x8, ne
-	stp	 x11, x10, [x0]
-	stp	x9, x8, [x0, #16]
-	ldp	x20, x19, [sp, #16]
-	ldp	x22, x21, [sp], #32
-	ret
-.Lfunc_end55:
-	.size	mcl_fp_montRed4L, .Lfunc_end55-mcl_fp_montRed4L
-
-	.globl	mcl_fp_addPre4L
-	.align	2
-	.type	mcl_fp_addPre4L,@function
-mcl_fp_addPre4L:                        // @mcl_fp_addPre4L
-// BB#0:
-	ldp	x8, x9, [x2, #16]
-	ldp	 x10, x11, [x2]
-	ldp	 x12, x13, [x1]
-	ldp	x14, x15, [x1, #16]
-	adds	 x10, x10, x12
-	str	 x10, [x0]
-	adcs	x10, x11, x13
-	adcs	x8, x8, x14
-	stp	x10, x8, [x0, #8]
-	adcs	x9, x9, x15
-	adcs	x8, xzr, xzr
-	str	x9, [x0, #24]
-	mov	 x0, x8
-	ret
-.Lfunc_end56:
-	.size	mcl_fp_addPre4L, .Lfunc_end56-mcl_fp_addPre4L
-
-	.globl	mcl_fp_subPre4L
-	.align	2
-	.type	mcl_fp_subPre4L,@function
-mcl_fp_subPre4L:                        // @mcl_fp_subPre4L
-// BB#0:
-	ldp	x8, x9, [x2, #16]
-	ldp	 x10, x11, [x2]
-	ldp	 x12, x13, [x1]
-	ldp	x14, x15, [x1, #16]
-	subs	 x10, x12, x10
-	str	 x10, [x0]
-	sbcs	x10, x13, x11
-	sbcs	x8, x14, x8
-	stp	x10, x8, [x0, #8]
-	sbcs	x9, x15, x9
-	ngcs	 x8, xzr
-	and	x8, x8, #0x1
-	str	x9, [x0, #24]
-	mov	 x0, x8
-	ret
-.Lfunc_end57:
-	.size	mcl_fp_subPre4L, .Lfunc_end57-mcl_fp_subPre4L
-
-	.globl	mcl_fp_shr1_4L
-	.align	2
-	.type	mcl_fp_shr1_4L,@function
-mcl_fp_shr1_4L:                         // @mcl_fp_shr1_4L
-// BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	x10, x11, [x1, #16]
-	extr	x8, x9, x8, #1
-	extr	x9, x10, x9, #1
-	extr	x10, x11, x10, #1
-	lsr	x11, x11, #1
-	stp	 x8, x9, [x0]
-	stp	x10, x11, [x0, #16]
-	ret
-.Lfunc_end58:
-	.size	mcl_fp_shr1_4L, .Lfunc_end58-mcl_fp_shr1_4L
-
-	.globl	mcl_fp_add4L
-	.align	2
-	.type	mcl_fp_add4L,@function
-mcl_fp_add4L:                           // @mcl_fp_add4L
-// BB#0:
-	ldp	x8, x9, [x2, #16]
-	ldp	 x10, x11, [x2]
-	ldp	 x12, x13, [x1]
-	ldp	x14, x15, [x1, #16]
-	adds	 x10, x10, x12
-	adcs	x12, x11, x13
-	ldp	 x11, x13, [x3]
-	stp	 x10, x12, [x0]
-	adcs	x8, x8, x14
-	adcs	x14, x9, x15
-	stp	x8, x14, [x0, #16]
-	adcs	x15, xzr, xzr
-	ldp	x9, x16, [x3, #16]
-	subs	 x11, x10, x11
-	sbcs	x10, x12, x13
-	sbcs	x9, x8, x9
-	sbcs	x8, x14, x16
-	sbcs	x12, x15, xzr
-	and	w12, w12, #0x1
-	tbnz	w12, #0, .LBB59_2
-// BB#1:                                // %nocarry
-	stp	 x11, x10, [x0]
-	stp	x9, x8, [x0, #16]
-.LBB59_2:                               // %carry
-	ret
-.Lfunc_end59:
-	.size	mcl_fp_add4L, .Lfunc_end59-mcl_fp_add4L
-
-	.globl	mcl_fp_addNF4L
-	.align	2
-	.type	mcl_fp_addNF4L,@function
-mcl_fp_addNF4L:                         // @mcl_fp_addNF4L
-// BB#0:
-	ldp	x8, x9, [x1, #16]
-	ldp	 x10, x11, [x1]
-	ldp	 x12, x13, [x2]
-	ldp	x14, x15, [x2, #16]
-	adds	 x10, x12, x10
-	adcs	x11, x13, x11
-	ldp	 x12, x13, [x3]
-	adcs	x8, x14, x8
-	ldp	x14, x16, [x3, #16]
-	adcs	x9, x15, x9
-	subs	 x12, x10, x12
-	sbcs	x13, x11, x13
-	sbcs	x14, x8, x14
-	sbcs	x15, x9, x16
-	cmp	 x15, #0                // =0
-	csel	x10, x10, x12, lt
-	csel	x11, x11, x13, lt
-	csel	x8, x8, x14, lt
-	csel	x9, x9, x15, lt
-	stp	 x10, x11, [x0]
-	stp	x8, x9, [x0, #16]
-	ret
-.Lfunc_end60:
-	.size	mcl_fp_addNF4L, .Lfunc_end60-mcl_fp_addNF4L
-
-	.globl	mcl_fp_sub4L
-	.align	2
-	.type	mcl_fp_sub4L,@function
-mcl_fp_sub4L:                           // @mcl_fp_sub4L
-// BB#0:
-	ldp	x10, x11, [x2, #16]
-	ldp	 x8, x9, [x2]
-	ldp	 x12, x13, [x1]
-	ldp	x14, x15, [x1, #16]
-	subs	 x8, x12, x8
-	sbcs	x9, x13, x9
-	stp	 x8, x9, [x0]
-	sbcs	x10, x14, x10
-	sbcs	x11, x15, x11
-	stp	x10, x11, [x0, #16]
-	ngcs	 x12, xzr
-	and	w12, w12, #0x1
-	tbnz	w12, #0, .LBB61_2
-// BB#1:                                // %nocarry
-	ret
-.LBB61_2:                               // %carry
-	ldp	x12, x13, [x3, #16]
-	ldp	 x14, x15, [x3]
-	adds	 x8, x14, x8
-	adcs	x9, x15, x9
-	adcs	x10, x12, x10
-	adcs	x11, x13, x11
-	stp	 x8, x9, [x0]
-	stp	x10, x11, [x0, #16]
-	ret
-.Lfunc_end61:
-	.size	mcl_fp_sub4L, .Lfunc_end61-mcl_fp_sub4L
-
-	.globl	mcl_fp_subNF4L
-	.align	2
-	.type	mcl_fp_subNF4L,@function
-mcl_fp_subNF4L:                         // @mcl_fp_subNF4L
-// BB#0:
-	ldp	x8, x9, [x2, #16]
-	ldp	 x10, x11, [x2]
-	ldp	 x12, x13, [x1]
-	ldp	x14, x15, [x1, #16]
-	subs	 x10, x12, x10
-	sbcs	x11, x13, x11
-	ldp	x12, x13, [x3, #16]
-	sbcs	x8, x14, x8
-	ldp	 x14, x16, [x3]
-	sbcs	x9, x15, x9
-	asr	x15, x9, #63
-	and	 x14, x15, x14
-	and	 x16, x15, x16
-	and	 x12, x15, x12
-	and	 x13, x15, x13
-	adds	 x10, x14, x10
-	str	 x10, [x0]
-	adcs	x10, x16, x11
-	adcs	x8, x12, x8
-	stp	x10, x8, [x0, #8]
-	adcs	x8, x13, x9
-	str	x8, [x0, #24]
-	ret
-.Lfunc_end62:
-	.size	mcl_fp_subNF4L, .Lfunc_end62-mcl_fp_subNF4L
-
-	.globl	mcl_fpDbl_add4L
-	.align	2
-	.type	mcl_fpDbl_add4L,@function
-mcl_fpDbl_add4L:                        // @mcl_fpDbl_add4L
-// BB#0:
-	ldp	x8, x9, [x2, #48]
-	ldp	x10, x11, [x1, #48]
-	ldp	x12, x13, [x2, #32]
-	ldp	x14, x15, [x1, #32]
-	ldp	x16, x17, [x2, #16]
-	ldp	 x4, x2, [x2]
-	ldp	x5, x6, [x1, #16]
-	ldp	 x18, x1, [x1]
-	adds	 x18, x4, x18
-	str	 x18, [x0]
-	ldp	x18, x4, [x3, #16]
-	adcs	x1, x2, x1
-	ldp	 x2, x3, [x3]
-	adcs	x16, x16, x5
-	stp	x1, x16, [x0, #8]
-	adcs	x16, x17, x6
-	str	x16, [x0, #24]
-	adcs	x12, x12, x14
-	adcs	x13, x13, x15
-	adcs	x8, x8, x10
-	adcs	x9, x9, x11
-	adcs	x10, xzr, xzr
-	subs	 x11, x12, x2
-	sbcs	x14, x13, x3
-	sbcs	x15, x8, x18
-	sbcs	x16, x9, x4
-	sbcs	x10, x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x12, x11, ne
-	csel	x11, x13, x14, ne
-	csel	x8, x8, x15, ne
-	csel	x9, x9, x16, ne
-	stp	x10, x11, [x0, #32]
-	stp	x8, x9, [x0, #48]
-	ret
-.Lfunc_end63:
-	.size	mcl_fpDbl_add4L, .Lfunc_end63-mcl_fpDbl_add4L
-
-	.globl	mcl_fpDbl_sub4L
-	.align	2
-	.type	mcl_fpDbl_sub4L,@function
-mcl_fpDbl_sub4L:                        // @mcl_fpDbl_sub4L
-// BB#0:
-	ldp	x8, x9, [x2, #48]
-	ldp	x10, x11, [x1, #48]
-	ldp	x12, x13, [x2, #32]
-	ldp	x14, x15, [x1, #32]
-	ldp	x16, x17, [x2, #16]
-	ldp	 x18, x2, [x2]
-	ldp	x5, x6, [x1, #16]
-	ldp	 x4, x1, [x1]
-	subs	 x18, x4, x18
-	str	 x18, [x0]
-	ldp	x18, x4, [x3, #16]
-	sbcs	x1, x1, x2
-	ldp	 x2, x3, [x3]
-	sbcs	x16, x5, x16
-	stp	x1, x16, [x0, #8]
-	sbcs	x16, x6, x17
-	sbcs	x12, x14, x12
-	sbcs	x13, x15, x13
-	sbcs	x8, x10, x8
-	sbcs	x9, x11, x9
-	ngcs	 x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x4, xzr, ne
-	csel	x11, x18, xzr, ne
-	csel	x14, x3, xzr, ne
-	csel	x15, x2, xzr, ne
-	adds	 x12, x15, x12
-	stp	x16, x12, [x0, #24]
-	adcs	x12, x14, x13
-	adcs	x8, x11, x8
-	stp	x12, x8, [x0, #40]
-	adcs	x8, x10, x9
-	str	x8, [x0, #56]
-	ret
-.Lfunc_end64:
-	.size	mcl_fpDbl_sub4L, .Lfunc_end64-mcl_fpDbl_sub4L
-
-	.globl	mcl_fp_mulUnitPre5L
-	.align	2
-	.type	mcl_fp_mulUnitPre5L,@function
-mcl_fp_mulUnitPre5L:                    // @mcl_fp_mulUnitPre5L
-// BB#0:
-	ldp	x12, x8, [x1, #24]
-	ldp	 x9, x10, [x1]
-	ldr	x11, [x1, #16]
-	mul	 x13, x9, x2
-	mul	 x14, x10, x2
-	umulh	x9, x9, x2
-	mul	 x15, x11, x2
-	umulh	x10, x10, x2
-	mul	 x16, x12, x2
-	umulh	x11, x11, x2
-	mul	 x17, x8, x2
-	umulh	x12, x12, x2
-	umulh	x8, x8, x2
-	adds	 x9, x9, x14
-	stp	 x13, x9, [x0]
-	adcs	x9, x10, x15
-	str	x9, [x0, #16]
-	adcs	x9, x11, x16
-	str	x9, [x0, #24]
-	adcs	x9, x12, x17
-	adcs	x8, x8, xzr
-	stp	x9, x8, [x0, #32]
-	ret
-.Lfunc_end65:
-	.size	mcl_fp_mulUnitPre5L, .Lfunc_end65-mcl_fp_mulUnitPre5L
-
-	.globl	mcl_fpDbl_mulPre5L
-	.align	2
-	.type	mcl_fpDbl_mulPre5L,@function
-mcl_fpDbl_mulPre5L:                     // @mcl_fpDbl_mulPre5L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #176            // =176
-	ldp	 x8, x10, [x1]
-	ldp	 x9, x15, [x1]
-	ldp	x11, x12, [x1, #24]
-	ldp	 x13, x14, [x2]
-	ldp	x16, x18, [x1, #16]
-	ldr	x17, [x1, #16]
-	ldr	x3, [x1, #32]
-	ldp	x4, x5, [x2, #16]
-	mul	 x6, x8, x13
-	str	x6, [sp, #72]           // 8-byte Folded Spill
-	umulh	x6, x12, x13
-	str	x6, [sp, #168]          // 8-byte Folded Spill
-	mul	 x6, x12, x13
-	str	x6, [sp, #152]          // 8-byte Folded Spill
-	umulh	x6, x11, x13
-	str	x6, [sp, #112]          // 8-byte Folded Spill
-	mul	 x6, x11, x13
-	str	x6, [sp, #64]           // 8-byte Folded Spill
-	umulh	x6, x17, x13
-	mul	 x23, x17, x13
-	umulh	x24, x10, x13
-	mul	 x25, x10, x13
-	umulh	x7, x8, x13
-	mul	 x26, x8, x14
-	mul	 x13, x12, x14
-	str	x13, [sp, #104]         // 8-byte Folded Spill
-	mul	 x13, x11, x14
-	stp	x13, x6, [sp, #40]
-	mul	 x29, x17, x14
-	mul	 x30, x10, x14
-	umulh	x12, x12, x14
-	umulh	x11, x11, x14
-	str	x11, [sp, #96]          // 8-byte Folded Spill
-	umulh	x11, x17, x14
-	umulh	x27, x10, x14
-	umulh	x20, x8, x14
-	mul	 x8, x9, x4
-	stp	x8, x11, [sp, #24]
-	mul	 x8, x3, x4
-	stp	x8, x12, [sp, #136]
-	mul	 x8, x18, x4
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	mul	 x8, x16, x4
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	mul	 x28, x15, x4
-	umulh	x8, x3, x4
-	str	x8, [sp, #160]          // 8-byte Folded Spill
-	umulh	x8, x18, x4
-	str	x8, [sp, #128]          // 8-byte Folded Spill
-	umulh	x8, x16, x4
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	umulh	x8, x15, x4
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	umulh	x22, x9, x4
-	mul	 x8, x3, x5
-	str	x8, [sp, #120]          // 8-byte Folded Spill
-	umulh	x8, x3, x5
-	str	x8, [sp, #56]           // 8-byte Folded Spill
-	mul	 x6, x18, x5
-	umulh	x21, x18, x5
-	mul	 x3, x16, x5
-	umulh	x19, x16, x5
-	mul	 x17, x15, x5
-	umulh	x4, x15, x5
-	mul	 x16, x9, x5
-	umulh	x18, x9, x5
-	ldr	x2, [x2, #32]
-	ldp	x10, x5, [x1, #16]
-	ldp	 x8, x9, [x1]
-	ldr	x1, [x1, #32]
-	mul	 x15, x8, x2
-	umulh	x14, x8, x2
-	mul	 x12, x9, x2
-	umulh	x13, x9, x2
-	mul	 x11, x10, x2
-	umulh	x10, x10, x2
-	mul	 x9, x5, x2
-	umulh	x5, x5, x2
-	mul	 x8, x1, x2
-	umulh	x1, x1, x2
-	ldr	x2, [sp, #72]           // 8-byte Folded Reload
-	str	 x2, [x0]
-	adds	 x2, x7, x25
-	adcs	x7, x24, x23
-	ldr	x23, [sp, #64]          // 8-byte Folded Reload
-	ldr	x24, [sp, #48]          // 8-byte Folded Reload
-	adcs	x23, x24, x23
-	ldr	x24, [sp, #152]         // 8-byte Folded Reload
-	ldr	x25, [sp, #112]         // 8-byte Folded Reload
-	adcs	x24, x25, x24
-	ldr	x25, [sp, #168]         // 8-byte Folded Reload
-	adcs	x25, x25, xzr
-	adds	 x2, x26, x2
-	str	x2, [x0, #8]
-	adcs	x2, x30, x7
-	adcs	x7, x29, x23
-	ldr	x23, [sp, #40]          // 8-byte Folded Reload
-	adcs	x23, x23, x24
-	ldr	x24, [sp, #104]         // 8-byte Folded Reload
-	adcs	x24, x24, x25
-	adcs	x25, xzr, xzr
-	adds	 x2, x2, x20
-	adcs	x7, x7, x27
-	ldr	x20, [sp, #32]          // 8-byte Folded Reload
-	adcs	x20, x23, x20
-	ldr	x23, [sp, #96]          // 8-byte Folded Reload
-	adcs	x23, x24, x23
-	ldr	x24, [sp, #144]         // 8-byte Folded Reload
-	adcs	x24, x25, x24
-	ldr	x25, [sp, #24]          // 8-byte Folded Reload
-	adds	 x2, x25, x2
-	str	x2, [x0, #16]
-	adcs	x2, x28, x7
-	ldr	x7, [sp, #16]           // 8-byte Folded Reload
-	adcs	x7, x7, x20
-	ldr	x20, [sp, #88]          // 8-byte Folded Reload
-	adcs	x20, x20, x23
-	ldr	x23, [sp, #136]         // 8-byte Folded Reload
-	adcs	x23, x23, x24
-	adcs	x24, xzr, xzr
-	adds	 x2, x2, x22
-	ldr	x22, [sp, #8]           // 8-byte Folded Reload
-	adcs	x7, x7, x22
-	ldr	x22, [sp, #80]          // 8-byte Folded Reload
-	adcs	x20, x20, x22
-	ldr	x22, [sp, #128]         // 8-byte Folded Reload
-	adcs	x22, x23, x22
-	ldr	x23, [sp, #160]         // 8-byte Folded Reload
+	sub	sp, sp, #208            // =208
+	stp	x28, x27, [sp, #112]    // 8-byte Folded Spill
+	stp	x26, x25, [sp, #128]    // 8-byte Folded Spill
+	stp	x24, x23, [sp, #144]    // 8-byte Folded Spill
+	stp	x22, x21, [sp, #160]    // 8-byte Folded Spill
+	stp	x20, x19, [sp, #176]    // 8-byte Folded Spill
+	stp	x29, x30, [sp, #192]    // 8-byte Folded Spill
+	str	x0, [sp, #96]           // 8-byte Folded Spill
+	ldr	x9, [x3, #32]
+	ldp	x16, x12, [x1, #32]
+	ldp	x13, x11, [x1, #16]
+	ldp		x17, x0, [x1]
+	ldur	x18, [x3, #-8]
+	str	x9, [sp, #104]          // 8-byte Folded Spill
+	ldr	x14, [x3, #40]
+	ldp	x4, x10, [x3, #16]
+	ldp		x15, x9, [x3]
+	ldp		x5, x3, [x2]
+	mov	 x1, x13
+	mov	 x13, x0
+	mov	 x8, x17
+	mul		x29, x13, x5
+	umulh	x30, x8, x5
+	mul		x27, x1, x5
+	umulh	x28, x13, x5
+	adds		x29, x30, x29
+	mul		x25, x11, x5
+	umulh	x26, x1, x5
+	adcs	x27, x28, x27
+	mul		x23, x16, x5
+	umulh	x24, x11, x5
+	adcs	x25, x26, x25
+	umulh	x20, x12, x5
+	mul		x21, x12, x5
+	umulh	x22, x16, x5
+	mul		x5, x8, x5
 	adcs	x23, x24, x23
-	adds	 x16, x16, x2
-	str	x16, [x0, #24]
-	adcs	x16, x17, x7
-	adcs	x17, x3, x20
-	adcs	x2, x6, x22
-	ldr	x3, [sp, #120]          // 8-byte Folded Reload
-	adcs	x3, x3, x23
-	adcs	x6, xzr, xzr
-	adds	 x16, x16, x18
-	adcs	x17, x17, x4
-	adcs	x18, x2, x19
-	adcs	x2, x3, x21
-	ldr	x3, [sp, #56]           // 8-byte Folded Reload
-	adcs	x3, x6, x3
-	adds	 x15, x15, x16
-	str	x15, [x0, #32]
-	adcs	x12, x12, x17
-	adcs	x11, x11, x18
-	adcs	x9, x9, x2
-	adcs	x8, x8, x3
-	adcs	x15, xzr, xzr
-	adds	 x12, x12, x14
-	adcs	x11, x11, x13
-	stp	x12, x11, [x0, #40]
-	adcs	x9, x9, x10
-	adcs	x8, x8, x5
-	stp	x9, x8, [x0, #56]
-	adcs	x8, x15, x1
-	str	x8, [x0, #72]
-	add	sp, sp, #176            // =176
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end66:
-	.size	mcl_fpDbl_mulPre5L, .Lfunc_end66-mcl_fpDbl_mulPre5L
-
-	.globl	mcl_fpDbl_sqrPre5L
-	.align	2
-	.type	mcl_fpDbl_sqrPre5L,@function
-mcl_fpDbl_sqrPre5L:                     // @mcl_fpDbl_sqrPre5L
-// BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	x10, x11, [x1, #16]
-	ldp	 x12, x15, [x1]
-	ldp	x13, x14, [x1, #24]
-	ldr	x16, [x1, #16]
-	mul	 x17, x12, x12
-	mul	 x18, x14, x12
-	mul	 x2, x11, x12
-	umulh	x3, x16, x12
-	mul	 x4, x16, x12
-	umulh	x5, x9, x12
-	mul	 x6, x9, x12
-	str	 x17, [x0]
-	umulh	x17, x12, x12
-	adds	 x17, x17, x6
-	adcs	x4, x5, x4
-	adcs	x2, x3, x2
-	umulh	x3, x11, x12
-	adcs	x18, x3, x18
-	umulh	x12, x14, x12
-	adcs	x12, x12, xzr
-	adds	 x17, x6, x17
-	ldr	 x3, [x1]
-	str	x17, [x0, #8]
-	mul	 x17, x9, x9
-	adcs	x17, x17, x4
-	mul	 x4, x16, x9
-	adcs	x2, x4, x2
-	mul	 x4, x11, x9
-	adcs	x18, x4, x18
-	mul	 x4, x14, x9
-	adcs	x12, x4, x12
-	adcs	x4, xzr, xzr
-	adds	 x17, x17, x5
-	umulh	x5, x9, x9
-	adcs	x2, x2, x5
-	umulh	x5, x16, x9
-	adcs	x18, x18, x5
-	ldr	x5, [x1, #8]
-	umulh	x11, x11, x9
-	adcs	x11, x12, x11
-	ldr	x12, [x1, #24]
-	umulh	x9, x14, x9
-	adcs	x9, x4, x9
-	mul	 x4, x3, x16
-	adds	 x17, x4, x17
-	mul	 x4, x14, x16
-	str	x17, [x0, #16]
-	mul	 x17, x5, x16
-	adcs	x17, x17, x2
-	mul	 x2, x16, x16
-	adcs	x18, x2, x18
-	mul	 x2, x12, x16
-	adcs	x11, x2, x11
-	umulh	x2, x3, x16
-	adcs	x9, x4, x9
-	adcs	x4, xzr, xzr
-	adds	 x17, x17, x2
-	umulh	x2, x5, x16
-	adcs	x18, x18, x2
-	umulh	x2, x16, x16
-	adcs	x11, x11, x2
-	umulh	x14, x14, x16
-	umulh	x16, x12, x16
-	adcs	x9, x9, x16
-	ldr	x16, [x1, #32]
-	adcs	x14, x4, x14
-	mul	 x1, x3, x12
-	adds	 x17, x1, x17
-	mul	 x1, x16, x12
-	str	x17, [x0, #24]
-	mul	 x17, x5, x12
-	adcs	x17, x17, x18
-	mul	 x18, x10, x12
-	adcs	x11, x18, x11
-	mul	 x18, x12, x12
-	adcs	x9, x18, x9
-	umulh	x18, x16, x12
-	umulh	x2, x3, x12
-	adcs	x14, x1, x14
-	adcs	x1, xzr, xzr
-	adds	 x17, x17, x2
-	umulh	x2, x10, x12
-	umulh	x3, x5, x12
-	umulh	x12, x12, x12
-	adcs	x11, x11, x3
-	mul	 x3, x8, x16
-	adcs	x9, x9, x2
-	mul	 x2, x13, x16
-	adcs	x12, x14, x12
-	mul	 x14, x10, x16
-	adcs	x18, x1, x18
-	mul	 x1, x15, x16
-	adds	 x17, x17, x3
-	mul	 x3, x16, x16
-	umulh	x8, x8, x16
-	umulh	x15, x15, x16
-	umulh	x10, x10, x16
-	umulh	x13, x13, x16
-	umulh	x16, x16, x16
-	str	x17, [x0, #32]
-	adcs	x11, x11, x1
-	adcs	x9, x9, x14
-	adcs	x12, x12, x2
-	adcs	x14, x18, x3
-	adcs	x17, xzr, xzr
-	adds	 x8, x11, x8
-	str	x8, [x0, #40]
-	adcs	x8, x9, x15
-	str	x8, [x0, #48]
-	adcs	x8, x12, x10
-	str	x8, [x0, #56]
-	adcs	x8, x14, x13
-	str	x8, [x0, #64]
-	adcs	x8, x17, x16
-	str	x8, [x0, #72]
-	ret
-.Lfunc_end67:
-	.size	mcl_fpDbl_sqrPre5L, .Lfunc_end67-mcl_fpDbl_sqrPre5L
-
-	.globl	mcl_fp_mont5L
-	.align	2
-	.type	mcl_fp_mont5L,@function
-mcl_fp_mont5L:                          // @mcl_fp_mont5L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #80             // =80
-	str	x0, [sp, #72]           // 8-byte Folded Spill
-	ldp	x16, x10, [x1, #24]
-	ldp	x18, x0, [x1, #8]
-	ldr	 x17, [x1]
-	ldur	x9, [x3, #-8]
-	str	x9, [sp, #16]           // 8-byte Folded Spill
-	ldp	x11, x8, [x3, #24]
-	ldp	x14, x12, [x3, #8]
-	ldr	 x13, [x3]
-	ldp	 x3, x1, [x2]
-	ldp	x4, x5, [x2, #16]
-	ldr	x2, [x2, #32]
-	umulh	x6, x10, x3
-	mul	 x7, x10, x3
-	umulh	x19, x16, x3
-	mul	 x20, x16, x3
-	umulh	x21, x0, x3
-	mul	 x22, x0, x3
-	umulh	x23, x18, x3
-	mul	 x24, x18, x3
-	umulh	x25, x17, x3
-	mul	 x3, x17, x3
-	umulh	x26, x1, x10
-	mul	 x27, x1, x10
-	umulh	x28, x1, x16
-	adds	 x24, x25, x24
-	mul	 x25, x3, x9
-	adcs	x22, x23, x22
-	mul	 x23, x25, x8
-	mul	 x29, x25, x11
-	mul	 x30, x25, x12
-	adcs	x20, x21, x20
-	mul	 x21, x25, x14
-	adcs	x7, x19, x7
-	umulh	x19, x25, x13
-	adcs	x6, x6, xzr
-	adds	 x19, x19, x21
-	umulh	x21, x25, x14
-	adcs	x21, x21, x30
-	umulh	x30, x25, x12
-	adcs	x29, x30, x29
-	umulh	x30, x25, x11
-	adcs	x23, x30, x23
-	umulh	x30, x25, x8
-	mul	 x25, x25, x13
+	mul		x24, x5, x18
+	adcs	x21, x22, x21
+	mul		x22, x24, x15
+	adcs	x20, x20, xzr
+	mov	 x0, x9
+	ldr	x9, [sp, #104]          // 8-byte Folded Reload
+	cmn		x22, x5
+	mul		x22, x24, x0
+	adcs	x22, x22, x29
+	mul		x29, x24, x4
+	adcs	x17, x29, x27
+	mul		x29, x24, x10
+	adcs	x25, x29, x25
+	mul		x29, x24, x9
+	adcs	x23, x29, x23
+	mul		x29, x24, x14
+	adcs	x21, x29, x21
+	umulh	x29, x24, x15
+	adcs	x20, x20, xzr
+	adds		x22, x22, x29
+	umulh	x29, x24, x0
+	ldp	x6, x7, [x2, #16]
+	ldp	x19, x2, [x2, #32]
+	str	x15, [sp, #8]           // 8-byte Folded Spill
+	adcs	x15, x17, x29
+	umulh	x29, x24, x4
+	adcs	x25, x25, x29
+	umulh	x29, x24, x10
+	adcs	x23, x23, x29
+	umulh	x29, x24, x9
+	adcs	x21, x21, x29
+	umulh	x24, x24, x14
+	mul		x29, x3, x13
+	adcs	x20, x20, x24
+	umulh	x24, x3, x8
+	mul		x5, x3, x1
+	adds		x24, x24, x29
+	umulh	x29, x3, x13
+	mul		x26, x3, x11
+	adcs	x5, x29, x5
+	umulh	x29, x3, x1
+	mul		x28, x3, x16
+	adcs	x26, x29, x26
+	umulh	x29, x3, x11
+	mul		x30, x3, x12
+	adcs	x28, x29, x28
+	umulh	x29, x3, x16
+	adcs	x29, x29, x30
+	umulh	x30, x3, x12
+	mul		x3, x3, x8
 	adcs	x30, x30, xzr
-	cmn	 x25, x3
-	mul	 x3, x1, x16
-	umulh	x25, x1, x0
-	adcs	x19, x19, x24
-	mul	 x24, x1, x0
-	adcs	x21, x21, x22
-	umulh	x22, x1, x18
+	adds		x3, x3, x22
+	adcs	x24, x24, x15
+	adcs	x5, x5, x25
+	adcs	x23, x26, x23
+	mov	 x17, x4
+	adcs	x21, x28, x21
+	mul		x28, x3, x18
+	mov	 x4, x18
+	ldr	x18, [sp, #8]           // 8-byte Folded Reload
 	adcs	x20, x29, x20
-	mul	 x29, x1, x18
-	adcs	x7, x23, x7
-	umulh	x23, x1, x17
-	mul	 x1, x1, x17
-	adcs	x6, x30, x6
-	adcs	x30, xzr, xzr
-	adds	 x23, x23, x29
-	umulh	x29, x4, x10
-	adcs	x22, x22, x24
-	mul	 x24, x4, x10
-	adcs	x3, x25, x3
-	umulh	x25, x4, x16
-	adcs	x27, x28, x27
-	adcs	x26, x26, xzr
-	adds	 x1, x19, x1
-	adcs	x19, x21, x23
-	mul	 x21, x1, x9
-	adcs	x20, x20, x22
-	mul	 x22, x21, x8
-	mul	 x23, x21, x11
-	mul	 x28, x21, x12
-	adcs	x3, x7, x3
-	mul	 x7, x21, x14
-	adcs	x6, x6, x27
-	umulh	x27, x21, x13
-	adcs	x26, x30, x26
-	adcs	x30, xzr, xzr
-	adds	 x7, x27, x7
-	umulh	x27, x21, x14
-	adcs	x27, x27, x28
-	umulh	x28, x21, x12
-	adcs	x23, x28, x23
-	umulh	x28, x21, x11
-	adcs	x22, x28, x22
-	umulh	x28, x21, x8
-	mul	 x21, x21, x13
-	adcs	x28, x28, xzr
-	cmn	 x21, x1
-	mul	 x1, x4, x16
-	umulh	x21, x4, x0
-	adcs	x7, x7, x19
-	mul	 x19, x4, x0
-	adcs	x20, x27, x20
-	umulh	x27, x4, x18
-	adcs	x3, x23, x3
-	mul	 x23, x4, x18
-	adcs	x6, x22, x6
-	umulh	x22, x4, x17
-	mul	 x4, x4, x17
-	adcs	x26, x28, x26
-	umulh	x15, x5, x10
-	str	x15, [sp, #64]          // 8-byte Folded Spill
 	adcs	x30, x30, xzr
-	adds	 x22, x22, x23
-	mul	 x15, x5, x10
-	str	x15, [sp, #56]          // 8-byte Folded Spill
-	adcs	x19, x27, x19
-	umulh	x15, x5, x16
-	str	x15, [sp, #40]          // 8-byte Folded Spill
-	adcs	x1, x21, x1
-	mul	 x15, x5, x16
-	str	x15, [sp, #32]          // 8-byte Folded Spill
-	adcs	x24, x25, x24
-	adcs	x25, x29, xzr
-	adds	 x4, x7, x4
-	adcs	x7, x20, x22
-	mul	 x20, x4, x9
-	adcs	x3, x3, x19
-	mul	 x19, x20, x8
-	mul	 x22, x20, x11
-	mov	 x15, x12
-	mul	 x29, x20, x15
-	adcs	x1, x6, x1
-	mov	 x21, x14
-	mul	 x6, x20, x21
-	adcs	x24, x26, x24
-	mov	 x9, x13
-	umulh	x26, x20, x9
+	mul		x26, x6, x11
+	mul		x29, x28, x18
+	cmn		x29, x3
+	mul		x29, x28, x0
+	adcs	x24, x29, x24
+	mul		x29, x28, x17
+	adcs	x5, x29, x5
+	mul		x29, x28, x10
+	adcs	x23, x29, x23
+	mul		x29, x28, x9
+	adcs	x21, x29, x21
+	mul		x29, x28, x14
+	adcs	x20, x29, x20
+	umulh	x29, x28, x18
+	adcs	x30, x30, xzr
+	adds		x24, x24, x29
+	umulh	x29, x28, x0
+	adcs	x5, x5, x29
+	umulh	x29, x28, x17
+	adcs	x23, x23, x29
+	umulh	x29, x28, x10
+	adcs	x21, x21, x29
+	umulh	x29, x28, x9
+	adcs	x20, x20, x29
+	umulh	x28, x28, x14
+	mul		x29, x6, x13
+	adcs	x28, x30, x28
+	umulh	x30, x6, x8
+	mul		x3, x6, x1
+	adds		x29, x30, x29
+	umulh	x30, x6, x13
+	adcs	x3, x30, x3
+	umulh	x30, x6, x1
+	mul		x25, x6, x16
+	adcs	x26, x30, x26
+	umulh	x30, x6, x11
+	mul		x27, x6, x12
 	adcs	x25, x30, x25
-	adcs	x30, xzr, xzr
-	adds	 x6, x26, x6
-	umulh	x26, x20, x21
-	adcs	x26, x26, x29
-	umulh	x29, x20, x15
-	adcs	x22, x29, x22
-	umulh	x29, x20, x11
-	mov	 x13, x11
-	adcs	x19, x29, x19
-	umulh	x29, x20, x8
-	mov	 x12, x8
-	mul	 x20, x20, x9
-	mov	 x14, x9
-	adcs	x29, x29, xzr
-	cmn	 x20, x4
-	umulh	x4, x5, x0
-	mul	 x20, x5, x0
-	umulh	x11, x5, x18
-	mul	 x9, x5, x18
-	umulh	x8, x5, x17
-	mul	 x5, x5, x17
-	umulh	x23, x2, x10
-	str	x23, [sp, #48]          // 8-byte Folded Spill
-	mul	 x10, x2, x10
-	str	x10, [sp, #24]          // 8-byte Folded Spill
-	umulh	x10, x2, x16
-	str	x10, [sp, #8]           // 8-byte Folded Spill
-	mul	 x28, x2, x16
-	umulh	x27, x2, x0
-	mul	 x23, x2, x0
-	umulh	x16, x2, x18
-	mul	 x18, x2, x18
-	umulh	x0, x2, x17
-	mul	 x17, x2, x17
-	adcs	x2, x6, x7
-	adcs	x3, x26, x3
-	adcs	x1, x22, x1
-	adcs	x6, x19, x24
-	adcs	x7, x29, x25
-	adcs	x19, x30, xzr
-	adds	 x8, x8, x9
-	adcs	x9, x11, x20
-	ldr	x10, [sp, #32]          // 8-byte Folded Reload
-	adcs	x10, x4, x10
-	ldr	x11, [sp, #56]          // 8-byte Folded Reload
-	ldr	x4, [sp, #40]           // 8-byte Folded Reload
-	adcs	x4, x4, x11
-	ldr	x11, [sp, #64]          // 8-byte Folded Reload
-	adcs	x20, x11, xzr
-	adds	 x2, x2, x5
-	adcs	x8, x3, x8
-	ldr	x24, [sp, #16]          // 8-byte Folded Reload
-	mul	 x3, x2, x24
-	adcs	x9, x1, x9
-	mul	 x1, x3, x12
-	mul	 x5, x3, x13
-	mul	 x22, x3, x15
-	adcs	x10, x6, x10
-	mul	 x6, x3, x21
-	adcs	x4, x7, x4
-	umulh	x7, x3, x14
-	adcs	x19, x19, x20
-	adcs	x20, xzr, xzr
-	adds	 x6, x7, x6
-	umulh	x7, x3, x21
-	adcs	x7, x7, x22
-	umulh	x22, x3, x15
-	mov	 x25, x15
-	adcs	x5, x22, x5
-	umulh	x22, x3, x13
-	adcs	x1, x22, x1
-	umulh	x22, x3, x12
-	mul	 x3, x3, x14
+	umulh	x30, x6, x16
+	umulh	x22, x6, x12
+	adcs	x27, x30, x27
+	mul		x6, x6, x8
 	adcs	x22, x22, xzr
-	cmn	 x3, x2
-	adcs	x8, x6, x8
-	adcs	x9, x7, x9
-	adcs	x10, x5, x10
-	adcs	x1, x1, x4
-	adcs	x2, x22, x19
-	adcs	x3, x20, xzr
-	adds	 x11, x0, x18
-	adcs	x15, x16, x23
-	adcs	x16, x27, x28
-	ldr	x18, [sp, #24]          // 8-byte Folded Reload
-	ldr	x0, [sp, #8]            // 8-byte Folded Reload
-	adcs	x18, x0, x18
-	ldr	x0, [sp, #48]           // 8-byte Folded Reload
-	adcs	x4, x0, xzr
-	adds	 x8, x8, x17
-	adcs	x9, x9, x11
-	mul	 x11, x8, x24
-	adcs	x10, x10, x15
-	umulh	x15, x11, x12
-	mul	 x17, x11, x12
-	umulh	x5, x11, x13
-	mul	 x6, x11, x13
-	mov	 x0, x13
-	mov	 x20, x25
-	umulh	x7, x11, x20
-	mul	 x19, x11, x20
-	mov	 x23, x20
-	mov	 x13, x21
-	umulh	x20, x11, x13
-	mul	 x21, x11, x13
-	umulh	x22, x11, x14
-	mul	 x11, x11, x14
-	adcs	x16, x1, x16
-	adcs	x18, x2, x18
-	adcs	x1, x3, x4
-	adcs	x2, xzr, xzr
-	adds	 x3, x22, x21
-	adcs	x4, x20, x19
-	adcs	x6, x7, x6
-	adcs	x17, x5, x17
-	adcs	x15, x15, xzr
-	cmn	 x11, x8
-	adcs	x8, x3, x9
-	adcs	x9, x4, x10
-	adcs	x10, x6, x16
-	adcs	x11, x17, x18
-	adcs	x15, x15, x1
-	adcs	x16, x2, xzr
-	subs	 x1, x8, x14
-	sbcs	x13, x9, x13
-	sbcs	x14, x10, x23
-	sbcs	x17, x11, x0
-	sbcs	x18, x15, x12
-	sbcs	x16, x16, xzr
-	tst	 x16, #0x1
-	csel	x8, x8, x1, ne
-	csel	x9, x9, x13, ne
-	csel	x10, x10, x14, ne
-	csel	x11, x11, x17, ne
-	csel	x12, x15, x18, ne
-	ldr	x13, [sp, #72]          // 8-byte Folded Reload
-	stp	 x8, x9, [x13]
-	stp	x10, x11, [x13, #16]
-	str	x12, [x13, #32]
-	add	sp, sp, #80             // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end68:
-	.size	mcl_fp_mont5L, .Lfunc_end68-mcl_fp_mont5L
-
-	.globl	mcl_fp_montNF5L
-	.align	2
-	.type	mcl_fp_montNF5L,@function
-mcl_fp_montNF5L:                        // @mcl_fp_montNF5L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #32             // =32
-	str	x0, [sp, #24]           // 8-byte Folded Spill
-	ldp	x16, x14, [x1, #24]
-	ldp	x18, x15, [x1, #8]
-	ldr	 x17, [x1]
-	ldur	x13, [x3, #-8]
-	ldp	x9, x8, [x3, #24]
-	ldp	x11, x10, [x3, #8]
-	ldr	 x12, [x3]
-	ldp	 x1, x3, [x2]
-	ldp	x4, x5, [x2, #16]
-	ldr	x2, [x2, #32]
-	umulh	x6, x14, x1
-	mul	 x7, x14, x1
-	umulh	x19, x16, x1
-	mul	 x20, x16, x1
-	umulh	x21, x15, x1
-	mul	 x22, x15, x1
-	umulh	x23, x18, x1
-	mul	 x24, x18, x1
-	umulh	x25, x17, x1
-	mul	 x1, x17, x1
-	umulh	x26, x3, x14
-	mul	 x27, x3, x14
-	umulh	x28, x3, x16
-	mul	 x29, x3, x16
-	umulh	x30, x3, x15
-	adds	 x24, x25, x24
-	mul	 x25, x3, x15
-	adcs	x22, x23, x22
-	umulh	x23, x3, x18
-	adcs	x20, x21, x20
-	mul	 x21, x1, x13
-	adcs	x7, x19, x7
-	mul	 x19, x21, x12
-	adcs	x6, x6, xzr
-	cmn	 x19, x1
-	mul	 x1, x3, x18
-	mul	 x19, x21, x11
-	adcs	x19, x19, x24
-	mul	 x24, x21, x10
-	adcs	x22, x24, x22
-	mul	 x24, x21, x9
-	adcs	x20, x24, x20
-	mul	 x24, x21, x8
-	adcs	x7, x24, x7
-	umulh	x24, x21, x12
-	adcs	x6, x6, xzr
-	adds	 x19, x19, x24
-	umulh	x24, x21, x11
-	adcs	x22, x22, x24
-	umulh	x24, x21, x10
-	adcs	x20, x20, x24
-	umulh	x24, x21, x9
-	adcs	x7, x7, x24
-	umulh	x24, x3, x17
-	mul	 x3, x3, x17
-	umulh	x21, x21, x8
-	adcs	x6, x6, x21
-	umulh	x21, x4, x14
-	adds	 x1, x24, x1
-	mul	 x24, x4, x14
-	adcs	x23, x23, x25
-	umulh	x25, x4, x16
-	adcs	x29, x30, x29
-	mul	 x30, x4, x16
+	adds		x6, x6, x24
+	adcs	x5, x29, x5
+	adcs	x3, x3, x23
+	adcs	x21, x26, x21
+	adcs	x20, x25, x20
+	mul		x25, x6, x4
+	adcs	x27, x27, x28
+	mul		x28, x25, x18
+	adcs	x22, x22, xzr
+	cmn		x28, x6
+	mul		x28, x25, x0
+	adcs	x5, x28, x5
+	mul		x28, x25, x17
+	adcs	x3, x28, x3
+	mul		x28, x25, x10
+	adcs	x21, x28, x21
+	mul		x28, x25, x9
+	adcs	x20, x28, x20
+	mul		x28, x25, x14
 	adcs	x27, x28, x27
-	umulh	x28, x4, x15
-	adcs	x26, x26, xzr
-	adds	 x3, x3, x19
-	mul	 x19, x4, x15
-	adcs	x1, x1, x22
-	umulh	x22, x4, x18
-	adcs	x20, x23, x20
-	mul	 x23, x4, x18
-	adcs	x7, x29, x7
-	mul	 x29, x3, x13
-	adcs	x6, x27, x6
-	mul	 x27, x29, x12
-	adcs	x26, x26, xzr
-	cmn	 x27, x3
-	umulh	x3, x4, x17
-	mul	 x4, x4, x17
-	mul	 x27, x29, x11
-	adcs	x1, x27, x1
-	mul	 x27, x29, x10
-	adcs	x20, x27, x20
-	mul	 x27, x29, x9
-	adcs	x7, x27, x7
-	mul	 x27, x29, x8
-	adcs	x6, x27, x6
-	umulh	x27, x29, x12
-	adcs	x26, x26, xzr
-	adds	 x1, x1, x27
-	umulh	x27, x29, x11
-	adcs	x20, x20, x27
-	umulh	x27, x29, x10
-	adcs	x7, x7, x27
-	umulh	x27, x29, x9
-	adcs	x6, x6, x27
-	umulh	x27, x5, x14
-	umulh	x29, x29, x8
-	adcs	x26, x26, x29
-	mul	 x29, x5, x14
-	adds	 x3, x3, x23
-	umulh	x23, x5, x16
-	adcs	x19, x22, x19
-	mul	 x22, x5, x16
-	adcs	x28, x28, x30
-	umulh	x30, x5, x15
-	adcs	x24, x25, x24
-	mul	 x25, x5, x15
-	adcs	x21, x21, xzr
-	adds	 x1, x4, x1
-	umulh	x4, x5, x18
-	adcs	x3, x3, x20
-	mul	 x20, x5, x18
-	adcs	x7, x19, x7
-	umulh	x19, x5, x17
-	mul	 x5, x5, x17
+	umulh	x28, x25, x18
+	adcs	x22, x22, xzr
+	adds		x5, x5, x28
+	umulh	x28, x25, x0
+	adcs	x3, x3, x28
+	umulh	x28, x25, x17
+	adcs	x21, x21, x28
+	umulh	x28, x25, x10
+	adcs	x20, x20, x28
+	umulh	x28, x25, x9
+	adcs	x27, x27, x28
+	umulh	x25, x25, x14
+	mul		x28, x7, x13
+	adcs	x22, x22, x25
+	umulh	x25, x7, x8
+	mul		x6, x7, x1
+	adds		x25, x25, x28
+	umulh	x28, x7, x13
+	mul		x26, x7, x11
 	adcs	x6, x28, x6
-	mul	 x28, x1, x13
-	adcs	x24, x24, x26
-	mul	 x26, x28, x12
-	adcs	x21, x21, xzr
-	cmn	 x26, x1
-	umulh	x0, x2, x14
-	mul	 x14, x2, x14
-	stp	x14, x0, [sp, #8]
-	umulh	x26, x2, x16
-	mul	 x1, x2, x16
-	umulh	x0, x2, x15
-	mul	 x16, x2, x15
-	umulh	x15, x2, x18
-	mul	 x18, x2, x18
-	umulh	x14, x2, x17
-	mul	 x17, x2, x17
-	mul	 x2, x28, x11
-	adcs	x2, x2, x3
-	mul	 x3, x28, x10
-	adcs	x3, x3, x7
-	mul	 x7, x28, x9
-	adcs	x6, x7, x6
-	mul	 x7, x28, x8
-	adcs	x7, x7, x24
-	adcs	x21, x21, xzr
-	umulh	x24, x28, x12
-	adds	 x2, x2, x24
-	umulh	x24, x28, x11
-	adcs	x3, x3, x24
-	umulh	x24, x28, x10
-	adcs	x6, x6, x24
-	umulh	x24, x28, x9
-	adcs	x7, x7, x24
-	umulh	x24, x28, x8
-	adcs	x21, x21, x24
-	adds	 x19, x19, x20
-	adcs	x4, x4, x25
-	adcs	x20, x30, x22
-	adcs	x22, x23, x29
-	adcs	x23, x27, xzr
-	adds	 x2, x5, x2
-	adcs	x3, x19, x3
-	mov	 x24, x13
-	mul	 x5, x2, x24
-	adcs	x4, x4, x6
-	mul	 x6, x5, x8
-	mul	 x19, x5, x9
-	adcs	x7, x20, x7
-	mul	 x20, x5, x10
-	adcs	x21, x22, x21
-	mul	 x22, x5, x12
-	adcs	x23, x23, xzr
-	cmn	 x22, x2
-	mul	 x2, x5, x11
-	umulh	x22, x5, x8
-	adcs	x2, x2, x3
-	umulh	x3, x5, x9
-	adcs	x4, x20, x4
-	umulh	x20, x5, x10
-	adcs	x7, x19, x7
-	umulh	x19, x5, x11
-	umulh	x5, x5, x12
+	umulh	x28, x7, x1
+	mul		x23, x7, x16
+	adcs	x26, x28, x26
+	umulh	x28, x7, x11
+	mul		x24, x7, x12
+	umulh	x29, x7, x16
+	adcs	x23, x28, x23
+	umulh	x30, x7, x12
+	adcs	x24, x29, x24
+	mul		x7, x7, x8
+	adcs	x30, x30, xzr
+	adds		x5, x7, x5
+	adcs	x3, x25, x3
+	umulh	x9, x19, x12
 	adcs	x6, x6, x21
-	adcs	x21, x23, xzr
-	adds	 x2, x2, x5
-	adcs	x4, x4, x19
-	adcs	x5, x7, x20
-	adcs	x3, x6, x3
-	adcs	x6, x21, x22
-	adds	 x13, x14, x18
-	adcs	x14, x15, x16
-	adcs	x15, x0, x1
-	ldp	x16, x18, [sp, #8]
-	adcs	x16, x26, x16
-	adcs	x18, x18, xzr
-	adds	 x17, x17, x2
-	adcs	x13, x13, x4
-	mul	 x0, x17, x24
-	adcs	x14, x14, x5
-	mul	 x1, x0, x8
-	mul	 x2, x0, x9
-	mul	 x4, x0, x10
-	mul	 x5, x0, x11
-	mul	 x7, x0, x12
-	umulh	x19, x0, x8
-	umulh	x20, x0, x9
-	umulh	x21, x0, x10
-	umulh	x22, x0, x11
-	umulh	x0, x0, x12
-	adcs	x15, x15, x3
-	adcs	x16, x16, x6
-	adcs	x18, x18, xzr
-	cmn	 x7, x17
-	adcs	x13, x5, x13
-	adcs	x14, x4, x14
-	adcs	x15, x2, x15
-	adcs	x16, x1, x16
-	adcs	x17, x18, xzr
-	adds	 x13, x13, x0
-	adcs	x14, x14, x22
-	adcs	x15, x15, x21
-	adcs	x16, x16, x20
-	adcs	x17, x17, x19
-	subs	 x12, x13, x12
-	sbcs	x11, x14, x11
-	sbcs	x10, x15, x10
-	sbcs	x9, x16, x9
-	sbcs	x8, x17, x8
-	asr	x18, x8, #63
-	cmp	 x18, #0                // =0
-	csel	x12, x13, x12, lt
-	csel	x11, x14, x11, lt
-	csel	x10, x15, x10, lt
-	csel	x9, x16, x9, lt
-	csel	x8, x17, x8, lt
-	ldr	x13, [sp, #24]          // 8-byte Folded Reload
-	stp	 x12, x11, [x13]
-	stp	x10, x9, [x13, #16]
-	str	x8, [x13, #32]
-	add	sp, sp, #32             // =32
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end69:
-	.size	mcl_fp_montNF5L, .Lfunc_end69-mcl_fp_montNF5L
-
-	.globl	mcl_fp_montRed5L
-	.align	2
-	.type	mcl_fp_montRed5L,@function
-mcl_fp_montRed5L:                       // @mcl_fp_montRed5L
-// BB#0:
-	stp	x26, x25, [sp, #-64]!
-	stp	x24, x23, [sp, #16]
-	stp	x22, x21, [sp, #32]
-	stp	x20, x19, [sp, #48]
-	ldur	x13, [x2, #-8]
-	ldp	x9, x8, [x2, #24]
-	ldp	x11, x10, [x2, #8]
-	ldr	 x12, [x2]
-	ldp	x15, x16, [x1, #64]
-	ldp	x17, x18, [x1, #48]
-	ldp	x2, x3, [x1, #32]
-	ldp	x4, x5, [x1, #16]
-	ldp	 x14, x1, [x1]
-	mul	 x6, x14, x13
-	umulh	x7, x6, x8
-	mul	 x19, x6, x8
-	umulh	x20, x6, x9
-	mul	 x21, x6, x9
-	umulh	x22, x6, x10
-	mul	 x23, x6, x10
-	umulh	x24, x6, x11
-	mul	 x25, x6, x11
-	umulh	x26, x6, x12
-	mul	 x6, x6, x12
-	adds	 x25, x26, x25
-	adcs	x23, x24, x23
-	adcs	x21, x22, x21
-	adcs	x19, x20, x19
-	adcs	x7, x7, xzr
-	cmn	 x14, x6
-	adcs	x14, x1, x25
-	adcs	x1, x4, x23
-	mul	 x4, x14, x13
-	adcs	x5, x5, x21
-	umulh	x6, x4, x8
-	mul	 x20, x4, x8
-	umulh	x21, x4, x9
-	mul	 x22, x4, x9
-	umulh	x23, x4, x10
-	mul	 x24, x4, x10
-	umulh	x25, x4, x11
-	mul	 x26, x4, x11
-	adcs	x2, x2, x19
-	umulh	x19, x4, x12
-	mul	 x4, x4, x12
-	adcs	x3, x3, x7
-	adcs	x17, x17, xzr
-	adcs	x18, x18, xzr
-	adcs	x15, x15, xzr
-	adcs	x16, x16, xzr
-	adcs	x7, xzr, xzr
-	adds	 x19, x19, x26
-	adcs	x24, x25, x24
-	adcs	x22, x23, x22
-	adcs	x20, x21, x20
-	adcs	x6, x6, xzr
-	cmn	 x4, x14
-	adcs	x14, x19, x1
-	adcs	x1, x24, x5
-	mul	 x4, x14, x13
-	adcs	x2, x22, x2
-	umulh	x5, x4, x8
-	mul	 x19, x4, x8
-	umulh	x21, x4, x9
-	mul	 x22, x4, x9
-	umulh	x23, x4, x10
-	mul	 x24, x4, x10
-	umulh	x25, x4, x11
-	mul	 x26, x4, x11
-	adcs	x3, x20, x3
-	umulh	x20, x4, x12
-	mul	 x4, x4, x12
-	adcs	x17, x6, x17
-	adcs	x18, x18, xzr
-	adcs	x15, x15, xzr
-	adcs	x16, x16, xzr
-	adcs	x6, x7, xzr
-	adds	 x7, x20, x26
-	adcs	x20, x25, x24
+	umulh	x29, x2, x12
+	str	x9, [sp, #16]           // 8-byte Folded Spill
+	mul		x9, x19, x12
+	adcs	x20, x26, x20
+	str	x29, [sp, #88]          // 8-byte Folded Spill
+	mul		x29, x2, x12
+	umulh	x12, x2, x16
+	adcs	x23, x23, x27
+	str	x12, [sp, #80]          // 8-byte Folded Spill
+	mul		x12, x2, x16
+	umulh	x21, x19, x11
+	mul		x26, x19, x11
+	mul		x27, x5, x4
+	adcs	x22, x24, x22
+	mov	 x28, x1
+	str	x12, [sp, #72]          // 8-byte Folded Spill
+	umulh	x12, x2, x11
+	mul		x11, x2, x11
+	mul		x24, x27, x18
+	adcs	x30, x30, xzr
+	stp	x11, x12, [sp, #56]     // 8-byte Folded Spill
+	umulh	x11, x2, x28
+	str	x9, [sp, #32]           // 8-byte Folded Spill
+	umulh	x7, x19, x16
+	mul		x25, x19, x16
+	cmn		x24, x5
+	mul		x5, x19, x28
+	mul		x24, x19, x13
+	umulh	x1, x19, x8
+	umulh	x9, x19, x13
+	umulh	x15, x19, x28
+	mul		x19, x19, x8
+	str	x11, [sp, #48]          // 8-byte Folded Spill
+	mul		x11, x2, x28
+	umulh	x16, x2, x8
+	mul		x28, x2, x8
+	ldr	x8, [sp, #104]          // 8-byte Folded Reload
+	str	x11, [sp, #40]          // 8-byte Folded Spill
+	umulh	x11, x2, x13
+	mul		x13, x2, x13
+	mul		x2, x27, x0
+	adcs	x2, x2, x3
+	mul		x3, x27, x17
+	adcs	x3, x3, x6
+	mul		x6, x27, x10
+	adcs	x6, x6, x20
+	mul		x20, x27, x8
+	adcs	x20, x20, x23
+	mul		x23, x27, x14
 	adcs	x22, x23, x22
-	adcs	x19, x21, x19
-	adcs	x5, x5, xzr
-	cmn	 x4, x14
-	adcs	x14, x7, x1
-	adcs	x1, x20, x2
-	mul	 x2, x14, x13
-	adcs	x3, x22, x3
-	umulh	x4, x2, x8
-	mul	 x7, x2, x8
-	umulh	x20, x2, x9
-	mul	 x21, x2, x9
-	umulh	x22, x2, x10
-	mul	 x23, x2, x10
-	umulh	x24, x2, x11
-	mul	 x25, x2, x11
-	umulh	x26, x2, x12
-	mul	 x2, x2, x12
-	adcs	x17, x19, x17
-	adcs	x18, x5, x18
-	adcs	x15, x15, xzr
-	adcs	x16, x16, xzr
-	adcs	x5, x6, xzr
-	adds	 x6, x26, x25
-	adcs	x19, x24, x23
-	adcs	x21, x22, x21
-	adcs	x7, x20, x7
-	adcs	x4, x4, xzr
-	cmn	 x2, x14
-	adcs	x14, x6, x1
-	adcs	x1, x19, x3
-	mul	 x13, x14, x13
-	adcs	x17, x21, x17
-	umulh	x2, x13, x8
-	mul	 x3, x13, x8
-	umulh	x6, x13, x9
-	mul	 x19, x13, x9
-	umulh	x20, x13, x10
-	mul	 x21, x13, x10
-	umulh	x22, x13, x11
-	mul	 x23, x13, x11
-	umulh	x24, x13, x12
-	mul	 x13, x13, x12
-	adcs	x18, x7, x18
-	adcs	x15, x4, x15
-	adcs	x16, x16, xzr
-	adcs	x4, x5, xzr
-	adds	 x5, x24, x23
-	adcs	x7, x22, x21
-	adcs	x19, x20, x19
-	adcs	x3, x6, x3
-	adcs	x2, x2, xzr
-	cmn	 x13, x14
-	adcs	x13, x5, x1
-	adcs	x14, x7, x17
-	adcs	x17, x19, x18
-	adcs	x15, x3, x15
-	adcs	x16, x2, x16
-	adcs	x18, x4, xzr
-	subs	 x12, x13, x12
-	sbcs	x11, x14, x11
-	sbcs	x10, x17, x10
-	sbcs	x9, x15, x9
-	sbcs	x8, x16, x8
-	sbcs	x18, x18, xzr
-	tst	 x18, #0x1
-	csel	x12, x13, x12, ne
-	csel	x11, x14, x11, ne
-	csel	x10, x17, x10, ne
-	csel	x9, x15, x9, ne
-	csel	x8, x16, x8, ne
-	stp	 x12, x11, [x0]
-	stp	x10, x9, [x0, #16]
-	str	x8, [x0, #32]
-	ldp	x20, x19, [sp, #48]
-	ldp	x22, x21, [sp, #32]
-	ldp	x24, x23, [sp, #16]
-	ldp	x26, x25, [sp], #64
-	ret
-.Lfunc_end70:
-	.size	mcl_fp_montRed5L, .Lfunc_end70-mcl_fp_montRed5L
-
-	.globl	mcl_fp_addPre5L
-	.align	2
-	.type	mcl_fp_addPre5L,@function
-mcl_fp_addPre5L:                        // @mcl_fp_addPre5L
-// BB#0:
-	ldp	x11, x8, [x2, #24]
-	ldp	x17, x9, [x1, #24]
-	ldp	x13, x10, [x2, #8]
-	ldr	 x12, [x2]
-	ldp	 x14, x15, [x1]
-	ldr	x16, [x1, #16]
-	adds	 x12, x12, x14
-	str	 x12, [x0]
-	adcs	x12, x13, x15
-	adcs	x10, x10, x16
-	stp	x12, x10, [x0, #8]
-	adcs	x10, x11, x17
-	adcs	x9, x8, x9
-	adcs	x8, xzr, xzr
-	stp	x10, x9, [x0, #24]
-	mov	 x0, x8
-	ret
-.Lfunc_end71:
-	.size	mcl_fp_addPre5L, .Lfunc_end71-mcl_fp_addPre5L
-
-	.globl	mcl_fp_subPre5L
-	.align	2
-	.type	mcl_fp_subPre5L,@function
-mcl_fp_subPre5L:                        // @mcl_fp_subPre5L
-// BB#0:
-	ldp	x11, x8, [x2, #24]
-	ldp	x17, x9, [x1, #24]
-	ldp	x13, x10, [x2, #8]
-	ldr	 x12, [x2]
-	ldp	 x14, x15, [x1]
-	ldr	x16, [x1, #16]
-	subs	 x12, x14, x12
-	str	 x12, [x0]
-	sbcs	x12, x15, x13
-	sbcs	x10, x16, x10
-	stp	x12, x10, [x0, #8]
-	sbcs	x10, x17, x11
-	sbcs	x9, x9, x8
-	ngcs	 x8, xzr
-	and	x8, x8, #0x1
-	stp	x10, x9, [x0, #24]
-	mov	 x0, x8
-	ret
-.Lfunc_end72:
-	.size	mcl_fp_subPre5L, .Lfunc_end72-mcl_fp_subPre5L
-
-	.globl	mcl_fp_shr1_5L
-	.align	2
-	.type	mcl_fp_shr1_5L,@function
-mcl_fp_shr1_5L:                         // @mcl_fp_shr1_5L
-// BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	x10, x11, [x1, #16]
-	ldr	x12, [x1, #32]
-	extr	x8, x9, x8, #1
-	extr	x9, x10, x9, #1
-	extr	x10, x11, x10, #1
-	extr	x11, x12, x11, #1
-	lsr	x12, x12, #1
-	stp	 x8, x9, [x0]
-	stp	x10, x11, [x0, #16]
-	str	x12, [x0, #32]
-	ret
-.Lfunc_end73:
-	.size	mcl_fp_shr1_5L, .Lfunc_end73-mcl_fp_shr1_5L
-
-	.globl	mcl_fp_add5L
-	.align	2
-	.type	mcl_fp_add5L,@function
-mcl_fp_add5L:                           // @mcl_fp_add5L
-// BB#0:
-	ldp	x11, x8, [x2, #24]
-	ldp	x17, x9, [x1, #24]
-	ldp	x13, x10, [x2, #8]
-	ldr	 x12, [x2]
-	ldp	 x14, x15, [x1]
-	ldr	x16, [x1, #16]
-	adds	 x12, x12, x14
-	ldr	x14, [x3, #32]
-	adcs	x13, x13, x15
-	adcs	x10, x10, x16
-	ldp	 x15, x16, [x3]
-	stp	 x12, x13, [x0]
-	adcs	x17, x11, x17
-	stp	x10, x17, [x0, #16]
-	adcs	x8, x8, x9
-	str	x8, [x0, #32]
-	adcs	x18, xzr, xzr
-	ldp	x9, x1, [x3, #16]
-	subs	 x12, x12, x15
-	sbcs	x11, x13, x16
-	sbcs	x10, x10, x9
-	sbcs	x9, x17, x1
-	sbcs	x8, x8, x14
-	sbcs	x13, x18, xzr
-	and	w13, w13, #0x1
-	tbnz	w13, #0, .LBB74_2
-// BB#1:                                // %nocarry
-	stp	 x12, x11, [x0]
-	stp	x10, x9, [x0, #16]
-	str	x8, [x0, #32]
-.LBB74_2:                               // %carry
-	ret
-.Lfunc_end74:
-	.size	mcl_fp_add5L, .Lfunc_end74-mcl_fp_add5L
-
-	.globl	mcl_fp_addNF5L
-	.align	2
-	.type	mcl_fp_addNF5L,@function
-mcl_fp_addNF5L:                         // @mcl_fp_addNF5L
-// BB#0:
-	ldp	x11, x8, [x1, #24]
-	ldp	x17, x9, [x2, #24]
-	ldp	x13, x10, [x1, #8]
-	ldr	 x12, [x1]
-	ldp	 x14, x15, [x2]
-	ldr	x16, [x2, #16]
-	adds	 x12, x14, x12
-	ldp	x18, x14, [x3, #24]
-	adcs	x13, x15, x13
-	adcs	x10, x16, x10
-	ldp	 x15, x16, [x3]
-	adcs	x11, x17, x11
-	ldr	x17, [x3, #16]
-	adcs	x8, x9, x8
-	subs	 x9, x12, x15
-	sbcs	x15, x13, x16
-	sbcs	x16, x10, x17
-	sbcs	x17, x11, x18
-	sbcs	x14, x8, x14
-	asr	x18, x14, #63
-	cmp	 x18, #0                // =0
-	csel	x9, x12, x9, lt
-	csel	x12, x13, x15, lt
-	csel	x10, x10, x16, lt
-	csel	x11, x11, x17, lt
-	csel	x8, x8, x14, lt
-	stp	 x9, x12, [x0]
-	stp	x10, x11, [x0, #16]
-	str	x8, [x0, #32]
-	ret
-.Lfunc_end75:
-	.size	mcl_fp_addNF5L, .Lfunc_end75-mcl_fp_addNF5L
-
-	.globl	mcl_fp_sub5L
-	.align	2
-	.type	mcl_fp_sub5L,@function
-mcl_fp_sub5L:                           // @mcl_fp_sub5L
-// BB#0:
-	ldp	x11, x12, [x2, #24]
-	ldp	x17, x13, [x1, #24]
-	ldp	x9, x10, [x2, #8]
-	ldr	 x8, [x2]
-	ldp	 x14, x15, [x1]
-	ldr	x16, [x1, #16]
-	subs	 x8, x14, x8
-	sbcs	x9, x15, x9
-	stp	 x8, x9, [x0]
-	sbcs	x10, x16, x10
-	sbcs	x11, x17, x11
-	stp	x10, x11, [x0, #16]
-	sbcs	x12, x13, x12
-	str	x12, [x0, #32]
-	ngcs	 x13, xzr
-	and	w13, w13, #0x1
-	tbnz	w13, #0, .LBB76_2
-// BB#1:                                // %nocarry
-	ret
-.LBB76_2:                               // %carry
-	ldp	x17, x13, [x3, #24]
-	ldp	 x14, x15, [x3]
-	ldr	x16, [x3, #16]
-	adds	 x8, x14, x8
+	adcs	x23, x30, xzr
+	umulh	x30, x27, x18
+	adds		x2, x2, x30
+	umulh	x30, x27, x0
+	adcs	x3, x3, x30
+	umulh	x30, x27, x17
+	adcs	x6, x6, x30
+	umulh	x30, x27, x10
+	adcs	x20, x20, x30
+	umulh	x30, x27, x8
+	adcs	x22, x22, x30
+	mov	 x30, x14
+	umulh	x27, x27, x30
+	adcs	x23, x23, x27
+	str	x11, [sp, #24]          // 8-byte Folded Spill
+	mov	 x11, x8
+	adds		x8, x1, x24
+	adcs	x9, x9, x5
+	adcs	x14, x15, x26
+	ldr	x15, [sp, #32]          // 8-byte Folded Reload
+	adcs	x5, x21, x25
+	mov	 x24, x4
+	mov	 x12, x17
+	adcs	x7, x7, x15
+	ldr	x15, [sp, #16]          // 8-byte Folded Reload
+	mov	 x27, x11
+	adcs	x21, x15, xzr
+	adds		x2, x19, x2
+	adcs	x8, x8, x3
+	adcs	x9, x9, x6
+	adcs	x14, x14, x20
+	adcs	x5, x5, x22
+	mul		x3, x2, x24
+	adcs	x7, x7, x23
+	mul		x20, x3, x18
+	adcs	x21, x21, xzr
+	cmn		x20, x2
+	mul		x20, x3, x0
+	adcs	x8, x20, x8
+	mul		x20, x3, x12
+	mul		x2, x3, x10
+	adcs	x9, x20, x9
+	mul		x19, x3, x11
+	adcs	x14, x2, x14
+	mul		x6, x3, x30
+	adcs	x5, x19, x5
+	adcs	x6, x6, x7
+	umulh	x2, x3, x11
+	mov	 x11, x10
+	umulh	x7, x3, x18
+	adcs	x21, x21, xzr
+	umulh	x20, x3, x30
+	umulh	x19, x3, x11
+	adds		x8, x8, x7
+	umulh	x7, x3, x12
+	umulh	x3, x3, x0
+	adcs	x9, x9, x3
+	adcs	x10, x14, x7
+	adcs	x3, x5, x19
+	adcs	x2, x6, x2
+	adcs	x5, x21, x20
+	adds		x15, x16, x13
+	ldr	x13, [sp, #40]          // 8-byte Folded Reload
+	ldr	x14, [sp, #24]          // 8-byte Folded Reload
+	adcs	x16, x14, x13
+	ldp	x14, x13, [sp, #48]     // 8-byte Folded Reload
+	adcs	x17, x14, x13
+	ldp	x14, x13, [sp, #64]     // 8-byte Folded Reload
+	adcs	x1, x14, x13
+	ldr	x13, [sp, #80]          // 8-byte Folded Reload
+	adcs	x4, x13, x29
+	ldr	x13, [sp, #88]          // 8-byte Folded Reload
+	adcs	x6, x13, xzr
+	adds		x8, x28, x8
 	adcs	x9, x15, x9
 	adcs	x10, x16, x10
-	adcs	x11, x17, x11
-	adcs	x12, x13, x12
-	stp	 x8, x9, [x0]
-	stp	x10, x11, [x0, #16]
-	str	x12, [x0, #32]
-	ret
-.Lfunc_end76:
-	.size	mcl_fp_sub5L, .Lfunc_end76-mcl_fp_sub5L
-
-	.globl	mcl_fp_subNF5L
-	.align	2
-	.type	mcl_fp_subNF5L,@function
-mcl_fp_subNF5L:                         // @mcl_fp_subNF5L
-// BB#0:
-	ldp	x11, x8, [x2, #24]
-	ldp	x17, x9, [x1, #24]
-	ldp	x13, x10, [x2, #8]
-	ldr	 x12, [x2]
-	ldp	 x14, x15, [x1]
-	ldr	x16, [x1, #16]
-	subs	 x12, x14, x12
-	sbcs	x13, x15, x13
-	ldp	x1, x14, [x3, #8]
-	ldp	x15, x18, [x3, #24]
-	sbcs	x10, x16, x10
-	ldr	 x16, [x3]
-	sbcs	x11, x17, x11
-	sbcs	x8, x9, x8
-	asr	x9, x8, #63
-	extr	x17, x9, x8, #63
-	and	 x16, x17, x16
-	and	x14, x14, x9, ror #63
-	and	 x15, x9, x15
-	and	 x17, x9, x18
-	ror	 x9, x9, #63
-	and	 x9, x9, x1
-	adds	 x12, x16, x12
-	adcs	x9, x9, x13
-	stp	 x12, x9, [x0]
-	adcs	x9, x14, x10
-	str	x9, [x0, #16]
-	adcs	x9, x15, x11
-	adcs	x8, x17, x8
-	stp	x9, x8, [x0, #24]
-	ret
-.Lfunc_end77:
-	.size	mcl_fp_subNF5L, .Lfunc_end77-mcl_fp_subNF5L
-
-	.globl	mcl_fpDbl_add5L
-	.align	2
-	.type	mcl_fpDbl_add5L,@function
-mcl_fpDbl_add5L:                        // @mcl_fpDbl_add5L
-// BB#0:
-	stp	x22, x21, [sp, #-32]!
-	stp	x20, x19, [sp, #16]
-	ldp	x8, x9, [x2, #64]
-	ldp	x10, x11, [x1, #64]
-	ldp	x12, x13, [x2, #48]
-	ldp	x14, x15, [x1, #48]
-	ldp	x16, x17, [x2, #32]
-	ldp	x18, x4, [x1, #32]
-	ldp	x5, x6, [x2, #16]
-	ldp	 x19, x2, [x2]
-	ldp	x20, x21, [x1, #16]
-	ldp	 x7, x1, [x1]
-	adds	 x7, x19, x7
-	ldr	x19, [x3, #32]
-	str	 x7, [x0]
-	adcs	x1, x2, x1
-	ldp	x2, x7, [x3, #16]
-	str	x1, [x0, #8]
-	ldp	 x1, x3, [x3]
-	adcs	x5, x5, x20
-	str	x5, [x0, #16]
-	adcs	x5, x6, x21
-	adcs	x16, x16, x18
-	stp	x5, x16, [x0, #24]
-	adcs	x16, x17, x4
-	adcs	x12, x12, x14
-	adcs	x13, x13, x15
-	adcs	x8, x8, x10
-	adcs	x9, x9, x11
-	adcs	x10, xzr, xzr
-	subs	 x11, x16, x1
-	sbcs	x14, x12, x3
-	sbcs	x15, x13, x2
-	sbcs	x17, x8, x7
-	sbcs	x18, x9, x19
-	sbcs	x10, x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x16, x11, ne
-	csel	x11, x12, x14, ne
-	csel	x12, x13, x15, ne
-	csel	x8, x8, x17, ne
-	csel	x9, x9, x18, ne
-	stp	x10, x11, [x0, #40]
-	stp	x12, x8, [x0, #56]
-	str	x9, [x0, #72]
-	ldp	x20, x19, [sp, #16]
-	ldp	x22, x21, [sp], #32
-	ret
-.Lfunc_end78:
-	.size	mcl_fpDbl_add5L, .Lfunc_end78-mcl_fpDbl_add5L
-
-	.globl	mcl_fpDbl_sub5L
-	.align	2
-	.type	mcl_fpDbl_sub5L,@function
-mcl_fpDbl_sub5L:                        // @mcl_fpDbl_sub5L
-// BB#0:
-	stp	x22, x21, [sp, #-32]!
-	stp	x20, x19, [sp, #16]
-	ldp	x8, x9, [x2, #64]
-	ldp	x10, x11, [x1, #64]
-	ldp	x12, x13, [x2, #48]
-	ldp	x14, x15, [x1, #48]
-	ldp	x16, x17, [x2, #32]
-	ldp	x18, x4, [x1, #32]
-	ldp	x5, x6, [x2, #16]
-	ldp	 x7, x2, [x2]
-	ldp	x20, x21, [x1, #16]
-	ldp	 x19, x1, [x1]
-	subs	 x7, x19, x7
-	ldr	x19, [x3, #32]
-	str	 x7, [x0]
-	sbcs	x1, x1, x2
-	ldp	x2, x7, [x3, #16]
-	str	x1, [x0, #8]
-	ldp	 x1, x3, [x3]
-	sbcs	x5, x20, x5
-	str	x5, [x0, #16]
-	sbcs	x5, x21, x6
-	sbcs	x16, x18, x16
-	stp	x5, x16, [x0, #24]
-	sbcs	x16, x4, x17
-	sbcs	x12, x14, x12
-	sbcs	x13, x15, x13
-	sbcs	x8, x10, x8
-	sbcs	x9, x11, x9
-	ngcs	 x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x19, xzr, ne
-	csel	x11, x7, xzr, ne
-	csel	x14, x2, xzr, ne
-	csel	x15, x3, xzr, ne
-	csel	x17, x1, xzr, ne
-	adds	 x16, x17, x16
-	adcs	x12, x15, x12
-	stp	x16, x12, [x0, #40]
-	adcs	x12, x14, x13
-	adcs	x8, x11, x8
-	stp	x12, x8, [x0, #56]
-	adcs	x8, x10, x9
-	str	x8, [x0, #72]
-	ldp	x20, x19, [sp, #16]
-	ldp	x22, x21, [sp], #32
-	ret
-.Lfunc_end79:
-	.size	mcl_fpDbl_sub5L, .Lfunc_end79-mcl_fpDbl_sub5L
-
-	.globl	mcl_fp_mulUnitPre6L
-	.align	2
-	.type	mcl_fp_mulUnitPre6L,@function
-mcl_fp_mulUnitPre6L:                    // @mcl_fp_mulUnitPre6L
-// BB#0:
-	ldp	x8, x9, [x1, #32]
-	ldp	 x10, x11, [x1]
-	ldp	x12, x13, [x1, #16]
-	mul	 x14, x10, x2
-	mul	 x15, x11, x2
-	umulh	x10, x10, x2
-	mul	 x16, x12, x2
-	umulh	x11, x11, x2
-	mul	 x17, x13, x2
-	umulh	x12, x12, x2
-	mul	 x18, x8, x2
-	umulh	x13, x13, x2
-	mul	 x1, x9, x2
-	umulh	x8, x8, x2
-	umulh	x9, x9, x2
-	adds	 x10, x10, x15
-	stp	 x14, x10, [x0]
-	adcs	x10, x11, x16
-	str	x10, [x0, #16]
-	adcs	x10, x12, x17
-	str	x10, [x0, #24]
-	adcs	x10, x13, x18
-	adcs	x8, x8, x1
-	stp	x10, x8, [x0, #32]
-	adcs	x8, x9, xzr
-	str	x8, [x0, #48]
-	ret
-.Lfunc_end80:
-	.size	mcl_fp_mulUnitPre6L, .Lfunc_end80-mcl_fp_mulUnitPre6L
-
-	.globl	mcl_fpDbl_mulPre6L
-	.align	2
-	.type	mcl_fpDbl_mulPre6L,@function
-mcl_fpDbl_mulPre6L:                     // @mcl_fpDbl_mulPre6L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #400            // =400
-	ldp	 x8, x9, [x1]
-	ldp	 x11, x13, [x1]
-	ldp	x10, x17, [x1, #16]
-	ldp	x12, x14, [x1, #32]
-	ldp	 x15, x16, [x2]
-	ldr	x3, [x1, #32]
-	mul	 x30, x8, x15
-	umulh	x18, x14, x15
-	str	x18, [sp, #392]         // 8-byte Folded Spill
-	mul	 x18, x14, x15
-	str	x18, [sp, #384]         // 8-byte Folded Spill
-	umulh	x18, x12, x15
-	str	x18, [sp, #376]         // 8-byte Folded Spill
-	mul	 x18, x12, x15
-	str	x18, [sp, #360]         // 8-byte Folded Spill
-	umulh	x18, x17, x15
-	str	x18, [sp, #336]         // 8-byte Folded Spill
-	mul	 x18, x17, x15
-	str	x18, [sp, #312]         // 8-byte Folded Spill
-	umulh	x18, x10, x15
-	str	x18, [sp, #304]         // 8-byte Folded Spill
-	mul	 x18, x10, x15
-	str	x18, [sp, #272]         // 8-byte Folded Spill
-	umulh	x18, x9, x15
-	str	x18, [sp, #248]         // 8-byte Folded Spill
-	mul	 x18, x9, x15
-	umulh	x15, x8, x15
-	stp	x15, x18, [sp, #216]
-	mul	 x15, x8, x16
-	str	x15, [sp, #280]         // 8-byte Folded Spill
-	mul	 x15, x14, x16
-	str	x15, [sp, #352]         // 8-byte Folded Spill
-	mul	 x15, x12, x16
-	str	x15, [sp, #328]         // 8-byte Folded Spill
-	mul	 x15, x17, x16
-	str	x15, [sp, #296]         // 8-byte Folded Spill
-	mul	 x15, x10, x16
-	str	x15, [sp, #264]         // 8-byte Folded Spill
-	mul	 x15, x9, x16
-	umulh	x14, x14, x16
-	str	x14, [sp, #368]         // 8-byte Folded Spill
-	umulh	x12, x12, x16
-	str	x12, [sp, #344]         // 8-byte Folded Spill
-	umulh	x12, x17, x16
-	str	x12, [sp, #320]         // 8-byte Folded Spill
-	umulh	x10, x10, x16
-	str	x10, [sp, #288]         // 8-byte Folded Spill
-	umulh	x9, x9, x16
-	str	x9, [sp, #256]          // 8-byte Folded Spill
-	umulh	x8, x8, x16
-	stp	x8, x15, [sp, #232]
-	ldp	x12, x8, [x2, #16]
-	ldr	x9, [x1, #40]
-	ldp	x15, x10, [x1, #16]
-	mul	 x14, x11, x12
-	str	x14, [sp, #144]         // 8-byte Folded Spill
-	mul	 x14, x9, x12
-	str	x14, [sp, #200]         // 8-byte Folded Spill
-	mul	 x14, x3, x12
-	str	x14, [sp, #176]         // 8-byte Folded Spill
-	mul	 x14, x10, x12
-	str	x14, [sp, #160]         // 8-byte Folded Spill
-	mul	 x14, x15, x12
-	str	x14, [sp, #128]         // 8-byte Folded Spill
-	mul	 x14, x13, x12
-	str	x14, [sp, #112]         // 8-byte Folded Spill
-	umulh	x14, x9, x12
-	str	x14, [sp, #208]         // 8-byte Folded Spill
-	umulh	x14, x3, x12
-	str	x14, [sp, #192]         // 8-byte Folded Spill
-	umulh	x14, x10, x12
-	str	x14, [sp, #168]         // 8-byte Folded Spill
-	umulh	x14, x15, x12
-	str	x14, [sp, #152]         // 8-byte Folded Spill
-	umulh	x14, x13, x12
-	str	x14, [sp, #120]         // 8-byte Folded Spill
-	umulh	x12, x11, x12
-	str	x12, [sp, #104]         // 8-byte Folded Spill
-	mul	 x12, x9, x8
-	str	x12, [sp, #184]         // 8-byte Folded Spill
-	umulh	x9, x9, x8
-	str	x9, [sp, #136]          // 8-byte Folded Spill
-	mul	 x9, x3, x8
-	str	x9, [sp, #80]           // 8-byte Folded Spill
-	umulh	x9, x3, x8
-	str	x9, [sp, #96]           // 8-byte Folded Spill
-	mul	 x9, x10, x8
-	str	x9, [sp, #64]           // 8-byte Folded Spill
-	umulh	x9, x10, x8
-	str	x9, [sp, #88]           // 8-byte Folded Spill
-	mul	 x9, x15, x8
-	str	x9, [sp, #48]           // 8-byte Folded Spill
-	umulh	x9, x15, x8
-	str	x9, [sp, #72]           // 8-byte Folded Spill
-	mul	 x9, x13, x8
-	str	x9, [sp, #32]           // 8-byte Folded Spill
-	umulh	x9, x13, x8
-	str	x9, [sp, #56]           // 8-byte Folded Spill
-	mul	 x9, x11, x8
-	str	x9, [sp, #24]           // 8-byte Folded Spill
-	umulh	x8, x11, x8
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldp	x12, x13, [x1, #32]
-	ldp	 x9, x10, [x1]
-	ldp	x11, x1, [x1, #16]
-	ldp	x8, x2, [x2, #32]
-	mul	 x22, x9, x8
-	mul	 x28, x13, x8
-	mul	 x27, x12, x8
-	mul	 x24, x1, x8
-	mul	 x20, x11, x8
-	mul	 x19, x10, x8
-	umulh	x14, x13, x8
-	str	x14, [sp, #16]          // 8-byte Folded Spill
-	umulh	x29, x12, x8
-	umulh	x26, x1, x8
-	umulh	x23, x11, x8
-	umulh	x21, x10, x8
-	umulh	x7, x9, x8
-	mul	 x25, x9, x2
-	umulh	x6, x9, x2
-	mul	 x4, x10, x2
-	umulh	x5, x10, x2
-	mul	 x18, x11, x2
-	umulh	x3, x11, x2
-	mul	 x16, x1, x2
-	umulh	x1, x1, x2
-	mul	 x15, x12, x2
-	umulh	x17, x12, x2
-	mul	 x14, x13, x2
-	umulh	x13, x13, x2
-	str	 x30, [x0]
-	ldp	x9, x8, [sp, #216]
-	adds	 x2, x9, x8
-	ldp	x8, x30, [sp, #272]
-	ldr	x9, [sp, #248]          // 8-byte Folded Reload
-	adcs	x8, x9, x8
-	ldp	x10, x9, [sp, #304]
-	adcs	x9, x10, x9
-	ldr	x10, [sp, #360]         // 8-byte Folded Reload
-	ldr	x11, [sp, #336]         // 8-byte Folded Reload
-	adcs	x10, x11, x10
-	ldp	x12, x11, [sp, #376]
-	adcs	x11, x12, x11
-	ldr	x12, [sp, #392]         // 8-byte Folded Reload
-	adcs	x12, x12, xzr
-	adds	 x2, x30, x2
-	str	x2, [x0, #8]
-	ldp	x30, x2, [sp, #232]
-	adcs	x8, x2, x8
-	ldr	x2, [sp, #264]          // 8-byte Folded Reload
-	adcs	x9, x2, x9
-	ldr	x2, [sp, #296]          // 8-byte Folded Reload
-	adcs	x10, x2, x10
-	ldr	x2, [sp, #328]          // 8-byte Folded Reload
-	adcs	x11, x2, x11
-	ldr	x2, [sp, #352]          // 8-byte Folded Reload
-	adcs	x12, x2, x12
-	adcs	x2, xzr, xzr
-	adds	 x8, x8, x30
-	ldr	x30, [sp, #256]         // 8-byte Folded Reload
-	adcs	x9, x9, x30
-	ldr	x30, [sp, #288]         // 8-byte Folded Reload
-	adcs	x10, x10, x30
-	ldr	x30, [sp, #320]         // 8-byte Folded Reload
-	adcs	x11, x11, x30
-	ldr	x30, [sp, #344]         // 8-byte Folded Reload
-	adcs	x12, x12, x30
-	ldr	x30, [sp, #368]         // 8-byte Folded Reload
-	adcs	x2, x2, x30
-	ldr	x30, [sp, #144]         // 8-byte Folded Reload
-	adds	 x8, x30, x8
-	str	x8, [x0, #16]
-	ldp	x30, x8, [sp, #104]
-	adcs	x8, x8, x9
-	ldr	x9, [sp, #128]          // 8-byte Folded Reload
-	adcs	x9, x9, x10
-	ldr	x10, [sp, #160]         // 8-byte Folded Reload
-	adcs	x10, x10, x11
-	ldr	x11, [sp, #176]         // 8-byte Folded Reload
-	adcs	x11, x11, x12
-	ldr	x12, [sp, #200]         // 8-byte Folded Reload
-	adcs	x12, x12, x2
-	adcs	x2, xzr, xzr
-	adds	 x8, x8, x30
-	ldr	x30, [sp, #120]         // 8-byte Folded Reload
-	adcs	x9, x9, x30
-	ldr	x30, [sp, #152]         // 8-byte Folded Reload
-	adcs	x10, x10, x30
-	ldr	x30, [sp, #168]         // 8-byte Folded Reload
-	adcs	x11, x11, x30
-	ldr	x30, [sp, #192]         // 8-byte Folded Reload
-	adcs	x12, x12, x30
-	ldr	x30, [sp, #208]         // 8-byte Folded Reload
-	adcs	x2, x2, x30
-	ldr	x30, [sp, #24]          // 8-byte Folded Reload
-	adds	 x8, x30, x8
-	str	x8, [x0, #24]
-	ldp	x8, x30, [sp, #32]
-	adcs	x8, x8, x9
-	ldr	x9, [sp, #48]           // 8-byte Folded Reload
-	adcs	x9, x9, x10
-	ldr	x10, [sp, #64]          // 8-byte Folded Reload
-	adcs	x10, x10, x11
-	ldr	x11, [sp, #80]          // 8-byte Folded Reload
-	adcs	x11, x11, x12
-	ldr	x12, [sp, #184]         // 8-byte Folded Reload
-	adcs	x12, x12, x2
-	adcs	x2, xzr, xzr
-	adds	 x8, x8, x30
-	ldr	x30, [sp, #56]          // 8-byte Folded Reload
-	adcs	x9, x9, x30
-	ldr	x30, [sp, #72]          // 8-byte Folded Reload
-	adcs	x10, x10, x30
-	ldr	x30, [sp, #88]          // 8-byte Folded Reload
-	adcs	x11, x11, x30
-	ldr	x30, [sp, #96]          // 8-byte Folded Reload
-	adcs	x12, x12, x30
-	ldr	x30, [sp, #136]         // 8-byte Folded Reload
-	adcs	x2, x2, x30
-	adds	 x8, x22, x8
-	str	x8, [x0, #32]
-	adcs	x8, x19, x9
-	adcs	x9, x20, x10
-	adcs	x10, x24, x11
-	adcs	x11, x27, x12
-	adcs	x12, x28, x2
-	adcs	x2, xzr, xzr
-	adds	 x8, x8, x7
-	adcs	x9, x9, x21
-	adcs	x10, x10, x23
-	adcs	x11, x11, x26
-	adcs	x12, x12, x29
-	ldr	x7, [sp, #16]           // 8-byte Folded Reload
-	adcs	x2, x2, x7
-	adds	 x8, x25, x8
-	str	x8, [x0, #40]
-	adcs	x8, x4, x9
-	adcs	x9, x18, x10
-	adcs	x10, x16, x11
-	adcs	x11, x15, x12
-	adcs	x12, x14, x2
-	adcs	x14, xzr, xzr
-	adds	 x8, x8, x6
-	str	x8, [x0, #48]
-	adcs	x8, x9, x5
-	str	x8, [x0, #56]
-	adcs	x8, x10, x3
-	str	x8, [x0, #64]
-	adcs	x8, x11, x1
-	str	x8, [x0, #72]
-	adcs	x8, x12, x17
-	str	x8, [x0, #80]
-	adcs	x8, x14, x13
-	str	x8, [x0, #88]
-	add	sp, sp, #400            // =400
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end81:
-	.size	mcl_fpDbl_mulPre6L, .Lfunc_end81-mcl_fpDbl_mulPre6L
-
-	.globl	mcl_fpDbl_sqrPre6L
-	.align	2
-	.type	mcl_fpDbl_sqrPre6L,@function
-mcl_fpDbl_sqrPre6L:                     // @mcl_fpDbl_sqrPre6L
-// BB#0:
-	stp	x20, x19, [sp, #-16]!
-	ldp	x8, x9, [x1, #8]
-	ldp	x15, x10, [x1, #32]
-	ldp	 x11, x13, [x1]
-	ldr	 x12, [x1]
-	ldp	x17, x14, [x1, #32]
-	ldr	x16, [x1, #24]
-	mul	 x18, x11, x11
-	umulh	x2, x10, x11
-	mul	 x3, x15, x11
-	mul	 x4, x16, x11
-	umulh	x5, x9, x11
-	mul	 x6, x9, x11
-	umulh	x7, x8, x11
-	mul	 x19, x8, x11
-	str	 x18, [x0]
-	umulh	x18, x11, x11
-	adds	 x18, x18, x19
-	adcs	x6, x7, x6
-	adcs	x4, x5, x4
-	umulh	x5, x16, x11
-	adcs	x3, x5, x3
-	mul	 x5, x10, x11
-	umulh	x11, x15, x11
-	adcs	x11, x11, x5
-	adcs	x2, x2, xzr
-	adds	 x18, x19, x18
-	ldp	x5, x19, [x1, #16]
-	str	x18, [x0, #8]
-	mul	 x18, x8, x8
-	adcs	x18, x18, x6
-	mul	 x6, x9, x8
-	adcs	x4, x6, x4
-	mul	 x6, x16, x8
-	adcs	x3, x6, x3
-	mul	 x6, x15, x8
-	adcs	x11, x6, x11
-	mul	 x6, x10, x8
-	adcs	x2, x6, x2
-	adcs	x6, xzr, xzr
-	adds	 x18, x18, x7
-	ldr	x7, [x1, #32]
-	umulh	x10, x10, x8
-	umulh	x15, x15, x8
-	umulh	x16, x16, x8
-	umulh	x9, x9, x8
-	umulh	x8, x8, x8
-	adcs	x8, x4, x8
-	adcs	x9, x3, x9
-	ldp	 x3, x4, [x1]
-	adcs	x11, x11, x16
-	mul	 x16, x12, x5
-	adcs	x15, x2, x15
-	mul	 x2, x14, x5
-	adcs	x10, x6, x10
-	mul	 x6, x7, x5
-	adds	 x16, x16, x18
-	mul	 x18, x19, x5
-	str	x16, [x0, #16]
-	mul	 x16, x13, x5
-	adcs	x8, x16, x8
-	mul	 x16, x5, x5
-	adcs	x9, x16, x9
-	umulh	x16, x7, x5
-	adcs	x11, x18, x11
-	adcs	x15, x6, x15
-	umulh	x6, x12, x5
-	adcs	x10, x2, x10
-	adcs	x2, xzr, xzr
-	adds	 x8, x8, x6
-	umulh	x6, x13, x5
-	adcs	x9, x9, x6
-	umulh	x6, x5, x5
-	adcs	x11, x11, x6
-	umulh	x6, x19, x5
-	adcs	x15, x15, x6
-	adcs	x10, x10, x16
-	umulh	x5, x14, x5
-	adcs	x2, x2, x5
-	mul	 x5, x12, x19
-	adds	 x8, x5, x8
-	ldp	x16, x5, [x1, #16]
-	ldr	x1, [x1, #40]
-	str	x8, [x0, #24]
-	mul	 x8, x13, x19
-	adcs	x8, x8, x9
-	mul	 x9, x14, x19
-	adcs	x11, x18, x11
-	mul	 x18, x19, x19
-	adcs	x15, x18, x15
-	mul	 x18, x7, x19
-	umulh	x14, x14, x19
-	umulh	x7, x7, x19
-	umulh	x13, x13, x19
-	umulh	x12, x12, x19
-	umulh	x19, x19, x19
-	adcs	x10, x18, x10
-	mul	 x18, x3, x17
-	adcs	x9, x9, x2
-	adcs	x2, xzr, xzr
-	adds	 x8, x8, x12
-	mul	 x12, x1, x17
-	adcs	x11, x11, x13
-	mul	 x13, x5, x17
-	adcs	x15, x15, x6
-	mul	 x6, x16, x17
-	adcs	x10, x10, x19
-	mul	 x19, x4, x17
-	adcs	x9, x9, x7
-	mul	 x7, x17, x17
-	adcs	x14, x2, x14
-	umulh	x2, x1, x17
-	adds	 x8, x18, x8
-	umulh	x18, x5, x17
-	str	x8, [x0, #32]
-	umulh	x8, x16, x17
-	adcs	x11, x19, x11
-	umulh	x19, x4, x17
-	adcs	x15, x6, x15
-	umulh	x6, x3, x17
-	umulh	x17, x17, x17
-	adcs	x10, x13, x10
-	mul	 x13, x3, x1
-	adcs	x9, x7, x9
-	adcs	x14, x12, x14
-	adcs	x7, xzr, xzr
-	adds	 x11, x11, x6
-	mul	 x6, x5, x1
-	adcs	x15, x15, x19
-	mul	 x19, x16, x1
-	adcs	x8, x10, x8
-	mul	 x10, x4, x1
-	adcs	x9, x9, x18
-	mul	 x18, x1, x1
-	umulh	x3, x3, x1
-	umulh	x4, x4, x1
-	umulh	x16, x16, x1
-	umulh	x5, x5, x1
-	umulh	x1, x1, x1
-	adcs	x14, x14, x17
-	adcs	x17, x7, x2
-	adds	 x11, x13, x11
-	str	x11, [x0, #40]
-	adcs	x10, x10, x15
-	adcs	x8, x19, x8
-	adcs	x9, x6, x9
-	adcs	x11, x12, x14
-	adcs	x12, x18, x17
-	adcs	x13, xzr, xzr
-	adds	 x10, x10, x3
-	adcs	x8, x8, x4
-	stp	x10, x8, [x0, #48]
-	adcs	x8, x9, x16
-	str	x8, [x0, #64]
-	adcs	x8, x11, x5
-	str	x8, [x0, #72]
-	adcs	x8, x12, x2
-	str	x8, [x0, #80]
-	adcs	x8, x13, x1
-	str	x8, [x0, #88]
-	ldp	x20, x19, [sp], #16
-	ret
-.Lfunc_end82:
-	.size	mcl_fpDbl_sqrPre6L, .Lfunc_end82-mcl_fpDbl_sqrPre6L
-
-	.globl	mcl_fp_mont6L
-	.align	2
-	.type	mcl_fp_mont6L,@function
-mcl_fp_mont6L:                          // @mcl_fp_mont6L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #48             // =48
-	str	x0, [sp, #24]           // 8-byte Folded Spill
-	ldr	 x5, [x2]
-	ldp	x0, x4, [x1, #32]
-	ldp	x16, x18, [x1, #16]
-	ldp	 x10, x1, [x1]
-	ldur	x12, [x3, #-8]
-	str	x12, [sp, #40]          // 8-byte Folded Spill
-	ldp	x11, x8, [x3, #32]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldp	x13, x17, [x3, #16]
-	ldp	 x14, x15, [x3]
-	ldr	x3, [x2, #8]
-	umulh	x6, x4, x5
-	mul	 x7, x4, x5
-	umulh	x19, x0, x5
-	mul	 x20, x0, x5
-	umulh	x21, x18, x5
-	mul	 x22, x18, x5
-	umulh	x23, x16, x5
-	mul	 x24, x16, x5
-	umulh	x25, x1, x5
-	mul	 x26, x1, x5
-	umulh	x27, x10, x5
-	mul	 x5, x10, x5
-	umulh	x28, x3, x4
-	adds	 x26, x27, x26
-	mul	 x27, x5, x12
-	adcs	x24, x25, x24
-	mul	 x25, x27, x8
-	mul	 x29, x27, x11
-	mul	 x30, x27, x17
-	adcs	x22, x23, x22
-	mul	 x23, x27, x13
-	adcs	x20, x21, x20
-	mul	 x21, x27, x15
-	adcs	x7, x19, x7
-	umulh	x19, x27, x14
-	adcs	x6, x6, xzr
-	adds	 x19, x19, x21
-	umulh	x21, x27, x15
-	adcs	x21, x21, x23
-	umulh	x23, x27, x13
-	adcs	x23, x23, x30
-	umulh	x30, x27, x17
-	adcs	x29, x30, x29
-	umulh	x30, x27, x11
-	adcs	x25, x30, x25
-	umulh	x30, x27, x8
-	mul	 x27, x27, x14
-	adcs	x30, x30, xzr
-	cmn	 x27, x5
-	mul	 x5, x3, x4
-	umulh	x27, x3, x0
-	adcs	x19, x19, x26
-	mul	 x26, x3, x0
-	adcs	x21, x21, x24
-	mul	 x24, x3, x18
-	adcs	x22, x23, x22
-	mul	 x23, x3, x16
-	adcs	x20, x29, x20
-	mul	 x29, x3, x1
-	adcs	x7, x25, x7
-	umulh	x25, x3, x10
-	adcs	x30, x30, x6
-	adcs	x6, xzr, xzr
-	adds	 x25, x25, x29
-	umulh	x29, x3, x1
-	adcs	x23, x29, x23
-	umulh	x29, x3, x16
-	adcs	x24, x29, x24
-	umulh	x29, x3, x18
-	mul	 x3, x3, x10
-	adcs	x26, x29, x26
-	adcs	x27, x27, x5
-	adcs	x29, x28, xzr
-	adds	 x3, x19, x3
-	adcs	x5, x21, x25
-	mul	 x21, x3, x12
-	adcs	x28, x22, x23
-	umulh	x22, x21, x8
-	mul	 x23, x21, x8
-	mul	 x25, x21, x11
-	mul	 x9, x21, x17
-	adcs	x19, x20, x24
-	mul	 x8, x21, x13
-	adcs	x20, x7, x26
-	mul	 x24, x21, x15
-	adcs	x30, x30, x27
-	umulh	x26, x21, x14
-	adcs	x6, x6, x29
-	adcs	x7, xzr, xzr
-	adds	 x24, x26, x24
-	umulh	x26, x21, x15
-	adcs	x29, x26, x8
-	umulh	x8, x21, x13
-	adcs	x26, x8, x9
-	umulh	x8, x21, x17
-	adcs	x27, x8, x25
-	umulh	x8, x21, x11
-	mul	 x9, x21, x14
-	adcs	x8, x8, x23
-	adcs	x21, x22, xzr
-	cmn	 x9, x3
-	ldp	x23, x3, [x2, #16]
-	umulh	x9, x23, x4
-	adcs	x5, x24, x5
-	mul	 x22, x23, x4
-	adcs	x24, x29, x28
-	mul	 x25, x23, x0
-	adcs	x19, x26, x19
-	mul	 x26, x23, x18
-	adcs	x20, x27, x20
-	mul	 x27, x23, x16
-	adcs	x8, x8, x30
-	mul	 x28, x23, x1
-	adcs	x21, x21, x6
-	umulh	x6, x23, x10
-	adcs	x7, x7, xzr
-	adds	 x6, x6, x28
-	umulh	x28, x23, x1
-	adcs	x27, x28, x27
-	umulh	x28, x23, x16
-	adcs	x26, x28, x26
-	umulh	x28, x23, x18
-	adcs	x25, x28, x25
-	umulh	x28, x23, x0
-	mul	 x23, x23, x10
-	adcs	x22, x28, x22
-	adcs	x9, x9, xzr
-	adds	 x23, x5, x23
-	adcs	x5, x24, x6
-	mul	 x29, x23, x12
-	adcs	x6, x19, x27
-	ldr	x12, [sp, #32]          // 8-byte Folded Reload
-	mul	 x28, x29, x12
-	mul	 x27, x29, x11
-	mul	 x30, x29, x17
-	adcs	x19, x20, x26
-	mul	 x26, x29, x13
-	adcs	x20, x8, x25
-	mul	 x8, x29, x15
-	adcs	x21, x21, x22
-	umulh	x24, x29, x14
-	adcs	x22, x7, x9
-	adcs	x7, xzr, xzr
-	adds	 x24, x24, x8
-	umulh	x8, x29, x15
-	adcs	x25, x8, x26
-	umulh	x8, x29, x13
-	adcs	x26, x8, x30
-	umulh	x8, x29, x17
-	adcs	x27, x8, x27
-	umulh	x8, x29, x11
-	adcs	x28, x8, x28
-	umulh	x8, x29, x12
-	mul	 x9, x29, x14
-	adcs	x29, x8, xzr
-	cmn	 x9, x23
-	ldp	x23, x8, [x2, #32]
-	umulh	x30, x3, x4
-	adcs	x2, x24, x5
-	mul	 x5, x3, x4
-	adcs	x6, x25, x6
-	mul	 x24, x3, x0
-	adcs	x19, x26, x19
-	mul	 x25, x3, x18
-	adcs	x20, x27, x20
-	mul	 x26, x3, x16
-	adcs	x21, x28, x21
-	mul	 x27, x3, x1
-	adcs	x22, x29, x22
-	mov	 x9, x10
-	umulh	x28, x3, x9
-	adcs	x7, x7, xzr
-	adds	 x27, x28, x27
-	umulh	x28, x3, x1
-	adcs	x26, x28, x26
-	umulh	x28, x3, x16
-	adcs	x25, x28, x25
-	umulh	x28, x3, x18
-	adcs	x24, x28, x24
-	umulh	x28, x3, x0
-	mul	 x3, x3, x9
-	adcs	x5, x28, x5
-	adcs	x29, x30, xzr
-	adds	 x2, x2, x3
-	adcs	x3, x6, x27
-	ldr	x10, [sp, #40]          // 8-byte Folded Reload
-	mul	 x6, x2, x10
-	adcs	x19, x19, x26
-	mul	 x26, x6, x12
-	mul	 x27, x6, x11
-	mov	 x30, x17
-	mul	 x28, x6, x30
-	adcs	x20, x20, x25
-	mul	 x25, x6, x13
-	adcs	x21, x21, x24
-	mov	 x17, x15
-	mul	 x24, x6, x17
-	adcs	x5, x22, x5
-	umulh	x22, x6, x14
-	adcs	x29, x7, x29
-	adcs	x7, xzr, xzr
-	adds	 x22, x22, x24
-	umulh	x24, x6, x17
-	adcs	x24, x24, x25
-	umulh	x25, x6, x13
-	mov	 x15, x13
-	adcs	x25, x25, x28
-	umulh	x28, x6, x30
-	mov	 x13, x30
-	adcs	x27, x28, x27
-	umulh	x28, x6, x11
-	adcs	x26, x28, x26
-	umulh	x28, x6, x12
-	mul	 x6, x6, x14
-	adcs	x28, x28, xzr
-	cmn	 x6, x2
-	umulh	x2, x23, x4
-	mul	 x6, x23, x4
-	adcs	x3, x22, x3
-	umulh	x22, x23, x0
-	adcs	x19, x24, x19
-	mul	 x24, x23, x0
-	adcs	x20, x25, x20
-	mul	 x25, x23, x18
-	adcs	x21, x27, x21
-	mul	 x27, x23, x16
-	adcs	x5, x26, x5
-	mul	 x26, x23, x1
-	adcs	x29, x28, x29
-	umulh	x28, x23, x9
-	adcs	x7, x7, xzr
-	adds	 x26, x28, x26
-	umulh	x28, x23, x1
-	adcs	x27, x28, x27
-	umulh	x28, x23, x16
-	adcs	x25, x28, x25
-	umulh	x28, x23, x18
-	mul	 x23, x23, x9
-	adcs	x24, x28, x24
-	umulh	x28, x8, x4
-	str	x28, [sp, #16]          // 8-byte Folded Spill
-	mul	 x28, x8, x4
-	adcs	x6, x22, x6
-	adcs	x2, x2, xzr
-	adds	 x3, x3, x23
-	adcs	x19, x19, x26
-	mul	 x22, x3, x10
-	adcs	x20, x20, x27
-	mul	 x23, x22, x12
-	mul	 x26, x22, x11
-	mul	 x27, x22, x13
-	adcs	x21, x21, x25
-	mul	 x25, x22, x15
-	adcs	x5, x5, x24
-	mul	 x24, x22, x17
-	adcs	x4, x29, x6
-	umulh	x6, x22, x14
-	adcs	x2, x7, x2
-	adcs	x7, xzr, xzr
-	adds	 x6, x6, x24
-	umulh	x24, x22, x17
-	adcs	x24, x24, x25
-	umulh	x25, x22, x15
-	adcs	x25, x25, x27
-	umulh	x27, x22, x13
-	adcs	x26, x27, x26
-	umulh	x27, x22, x11
-	adcs	x23, x27, x23
-	umulh	x27, x22, x12
-	mul	 x22, x22, x14
-	adcs	x27, x27, xzr
-	cmn	 x22, x3
-	umulh	x3, x8, x0
-	mul	 x0, x8, x0
-	umulh	x22, x8, x18
-	mul	 x18, x8, x18
-	umulh	x29, x8, x16
-	mul	 x16, x8, x16
-	umulh	x30, x8, x1
-	mul	 x1, x8, x1
-	umulh	x10, x8, x9
-	mul	 x8, x8, x9
-	adcs	x6, x6, x19
-	adcs	x19, x24, x20
-	adcs	x20, x25, x21
-	adcs	x5, x26, x5
-	adcs	x9, x23, x4
-	str	x9, [sp, #8]            // 8-byte Folded Spill
-	adcs	x2, x27, x2
-	adcs	x7, x7, xzr
-	adds	 x9, x10, x1
-	adcs	x16, x30, x16
-	adcs	x18, x29, x18
-	adcs	x0, x22, x0
-	adcs	x1, x3, x28
-	ldr	x10, [sp, #16]          // 8-byte Folded Reload
-	adcs	x3, x10, xzr
-	adds	 x8, x6, x8
-	adcs	x9, x19, x9
-	ldr	x10, [sp, #40]          // 8-byte Folded Reload
-	mul	 x4, x8, x10
-	adcs	x16, x20, x16
-	umulh	x6, x4, x12
-	mul	 x19, x4, x12
-	mov	 x30, x11
-	umulh	x20, x4, x30
-	mul	 x21, x4, x30
-	umulh	x22, x4, x13
-	mul	 x23, x4, x13
-	mov	 x29, x13
-	umulh	x24, x4, x15
-	mul	 x25, x4, x15
-	umulh	x26, x4, x17
-	mul	 x27, x4, x17
-	umulh	x28, x4, x14
-	mul	 x4, x4, x14
-	adcs	x18, x5, x18
-	ldr	x10, [sp, #8]           // 8-byte Folded Reload
-	adcs	x10, x10, x0
-	adcs	x0, x2, x1
-	adcs	x1, x7, x3
-	adcs	x2, xzr, xzr
-	adds	 x3, x28, x27
-	adcs	x5, x26, x25
-	adcs	x7, x24, x23
-	adcs	x21, x22, x21
-	adcs	x19, x20, x19
-	adcs	x6, x6, xzr
-	cmn	 x4, x8
-	adcs	x8, x3, x9
-	adcs	x9, x5, x16
-	adcs	x16, x7, x18
-	adcs	x10, x21, x10
-	adcs	x18, x19, x0
-	adcs	x0, x6, x1
-	adcs	x1, x2, xzr
-	subs	 x13, x8, x14
-	sbcs	x12, x9, x17
-	sbcs	x11, x16, x15
-	sbcs	x14, x10, x29
-	sbcs	x15, x18, x30
-	ldr	x17, [sp, #32]          // 8-byte Folded Reload
-	sbcs	x17, x0, x17
-	sbcs	x1, x1, xzr
-	tst	 x1, #0x1
-	csel	x8, x8, x13, ne
-	csel	x9, x9, x12, ne
-	csel	x11, x16, x11, ne
-	csel	x10, x10, x14, ne
-	csel	x12, x18, x15, ne
-	csel	x13, x0, x17, ne
-	ldr	x14, [sp, #24]          // 8-byte Folded Reload
-	stp	 x8, x9, [x14]
-	stp	x11, x10, [x14, #16]
-	stp	x12, x13, [x14, #32]
-	add	sp, sp, #48             // =48
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end83:
-	.size	mcl_fp_mont6L, .Lfunc_end83-mcl_fp_mont6L
-
-	.globl	mcl_fp_montNF6L
-	.align	2
-	.type	mcl_fp_montNF6L,@function
-mcl_fp_montNF6L:                        // @mcl_fp_montNF6L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #112            // =112
-	str	x0, [sp, #96]           // 8-byte Folded Spill
-	ldp	x16, x12, [x1, #32]
-	ldp	x13, x11, [x1, #16]
-	ldp	 x17, x0, [x1]
-	ldur	x18, [x3, #-8]
-	ldr	x9, [x3, #32]
-	str	x9, [sp, #104]          // 8-byte Folded Spill
-	ldr	x14, [x3, #40]
-	ldp	x4, x10, [x3, #16]
-	ldr	 x15, [x3]
-	str	x15, [sp, #8]           // 8-byte Folded Spill
-	ldr	x9, [x3, #8]
-	ldp	 x5, x3, [x2]
-	ldp	x6, x7, [x2, #16]
-	ldp	x19, x2, [x2, #32]
-	umulh	x20, x12, x5
-	mul	 x21, x12, x5
-	umulh	x22, x16, x5
-	mul	 x23, x16, x5
-	umulh	x24, x11, x5
-	mul	 x25, x11, x5
-	mov	 x1, x13
-	umulh	x26, x1, x5
-	mul	 x27, x1, x5
-	mov	 x13, x0
-	umulh	x28, x13, x5
-	mul	 x29, x13, x5
-	mov	 x8, x17
-	umulh	x30, x8, x5
-	mul	 x5, x8, x5
-	adds	 x29, x30, x29
-	mul	 x30, x3, x12
-	adcs	x27, x28, x27
-	mul	 x28, x3, x16
-	adcs	x25, x26, x25
-	mul	 x26, x3, x11
-	adcs	x23, x24, x23
-	mul	 x24, x5, x18
-	adcs	x21, x22, x21
-	mul	 x22, x24, x15
-	adcs	x20, x20, xzr
-	cmn	 x22, x5
-	mul	 x5, x3, x1
-	mov	 x0, x9
-	mul	 x22, x24, x0
-	adcs	x22, x22, x29
-	mul	 x29, x24, x4
-	adcs	x17, x29, x27
-	mul	 x29, x24, x10
-	adcs	x25, x29, x25
-	ldr	x9, [sp, #104]          // 8-byte Folded Reload
-	mul	 x29, x24, x9
-	adcs	x23, x29, x23
-	mul	 x29, x24, x14
-	adcs	x21, x29, x21
-	umulh	x29, x24, x15
-	adcs	x20, x20, xzr
-	adds	 x22, x22, x29
-	umulh	x29, x24, x0
-	adcs	x15, x17, x29
-	umulh	x29, x24, x4
-	mov	 x17, x4
-	adcs	x25, x25, x29
-	umulh	x29, x24, x10
-	adcs	x23, x23, x29
-	umulh	x29, x24, x9
-	adcs	x21, x21, x29
-	mul	 x29, x3, x13
-	umulh	x24, x24, x14
-	adcs	x20, x20, x24
-	umulh	x24, x3, x8
-	adds	 x24, x24, x29
-	umulh	x29, x3, x13
-	adcs	x5, x29, x5
-	umulh	x29, x3, x1
-	adcs	x26, x29, x26
-	umulh	x29, x3, x11
-	adcs	x28, x29, x28
-	umulh	x29, x3, x16
-	adcs	x29, x29, x30
-	umulh	x30, x3, x12
-	mul	 x3, x3, x8
-	adcs	x30, x30, xzr
-	adds	 x3, x3, x22
-	umulh	x22, x6, x12
-	adcs	x24, x24, x15
-	mul	 x27, x6, x12
-	adcs	x5, x5, x25
-	mul	 x25, x6, x16
-	adcs	x23, x26, x23
-	mul	 x26, x6, x11
-	adcs	x21, x28, x21
-	mul	 x28, x3, x18
-	mov	 x4, x18
-	adcs	x20, x29, x20
-	ldr	x18, [sp, #8]           // 8-byte Folded Reload
-	mul	 x29, x28, x18
-	adcs	x30, x30, xzr
-	cmn	 x29, x3
-	mul	 x3, x6, x1
-	mul	 x29, x28, x0
-	adcs	x24, x29, x24
-	mul	 x29, x28, x17
-	adcs	x5, x29, x5
-	mul	 x29, x28, x10
-	adcs	x23, x29, x23
-	mul	 x29, x28, x9
-	adcs	x21, x29, x21
-	mul	 x29, x28, x14
-	adcs	x20, x29, x20
-	umulh	x29, x28, x18
-	adcs	x30, x30, xzr
-	adds	 x24, x24, x29
-	umulh	x29, x28, x0
-	adcs	x5, x5, x29
-	umulh	x29, x28, x17
-	adcs	x23, x23, x29
-	umulh	x29, x28, x10
-	adcs	x21, x21, x29
-	umulh	x29, x28, x9
-	adcs	x20, x20, x29
-	mul	 x29, x6, x13
-	umulh	x28, x28, x14
-	adcs	x28, x30, x28
-	umulh	x30, x6, x8
-	adds	 x29, x30, x29
-	umulh	x30, x6, x13
-	adcs	x3, x30, x3
-	umulh	x30, x6, x1
-	adcs	x26, x30, x26
-	umulh	x30, x6, x11
-	adcs	x25, x30, x25
-	umulh	x30, x6, x16
-	mul	 x6, x6, x8
-	adcs	x27, x30, x27
-	umulh	x30, x7, x12
-	adcs	x22, x22, xzr
-	adds	 x6, x6, x24
-	mul	 x24, x7, x12
-	adcs	x5, x29, x5
-	umulh	x29, x7, x16
-	adcs	x3, x3, x23
-	mul	 x23, x7, x16
-	adcs	x21, x26, x21
-	mul	 x26, x7, x11
-	adcs	x20, x25, x20
-	mul	 x25, x6, x4
-	adcs	x27, x27, x28
-	mul	 x28, x25, x18
-	adcs	x22, x22, xzr
-	cmn	 x28, x6
-	mul	 x6, x7, x1
-	mul	 x28, x25, x0
-	adcs	x5, x28, x5
-	mul	 x28, x25, x17
-	adcs	x3, x28, x3
-	mul	 x28, x25, x10
-	adcs	x21, x28, x21
-	mul	 x28, x25, x9
-	adcs	x20, x28, x20
-	mul	 x28, x25, x14
-	adcs	x27, x28, x27
-	umulh	x28, x25, x18
-	adcs	x22, x22, xzr
-	adds	 x5, x5, x28
-	umulh	x28, x25, x0
-	adcs	x3, x3, x28
-	umulh	x28, x25, x17
-	adcs	x21, x21, x28
-	umulh	x28, x25, x10
-	adcs	x20, x20, x28
-	umulh	x28, x25, x9
-	adcs	x27, x27, x28
-	mul	 x28, x7, x13
-	umulh	x25, x25, x14
-	adcs	x22, x22, x25
-	umulh	x25, x7, x8
-	adds	 x25, x25, x28
-	umulh	x28, x7, x13
-	adcs	x6, x28, x6
-	umulh	x28, x7, x1
-	adcs	x26, x28, x26
-	umulh	x28, x7, x11
-	mul	 x7, x7, x8
-	adcs	x23, x28, x23
-	umulh	x9, x19, x12
-	str	x9, [sp, #16]           // 8-byte Folded Spill
-	adcs	x24, x29, x24
-	mul	 x9, x19, x12
-	str	x9, [sp, #32]           // 8-byte Folded Spill
-	adcs	x30, x30, xzr
-	adds	 x5, x7, x5
-	umulh	x7, x19, x16
-	adcs	x3, x25, x3
-	mul	 x25, x19, x16
-	adcs	x6, x6, x21
-	umulh	x21, x19, x11
-	adcs	x20, x26, x20
-	mul	 x26, x19, x11
-	adcs	x23, x23, x27
-	mul	 x27, x5, x4
-	adcs	x22, x24, x22
-	mul	 x24, x27, x18
-	adcs	x30, x30, xzr
-	cmn	 x24, x5
-	mov	 x28, x1
-	mul	 x5, x19, x28
-	mul	 x24, x19, x13
-	umulh	x1, x19, x8
-	umulh	x9, x19, x13
-	umulh	x15, x19, x28
-	mul	 x19, x19, x8
-	umulh	x29, x2, x12
-	str	x29, [sp, #88]          // 8-byte Folded Spill
-	mul	 x29, x2, x12
-	umulh	x12, x2, x16
-	str	x12, [sp, #80]          // 8-byte Folded Spill
-	mul	 x12, x2, x16
-	str	x12, [sp, #72]          // 8-byte Folded Spill
-	umulh	x12, x2, x11
-	mul	 x11, x2, x11
-	stp	x11, x12, [sp, #56]
-	umulh	x11, x2, x28
-	str	x11, [sp, #48]          // 8-byte Folded Spill
-	mul	 x11, x2, x28
-	str	x11, [sp, #40]          // 8-byte Folded Spill
-	umulh	x11, x2, x13
-	str	x11, [sp, #24]          // 8-byte Folded Spill
-	mul	 x13, x2, x13
-	umulh	x16, x2, x8
-	mul	 x28, x2, x8
-	mul	 x2, x27, x0
-	adcs	x2, x2, x3
-	mul	 x3, x27, x17
-	adcs	x3, x3, x6
-	mul	 x6, x27, x10
-	adcs	x6, x6, x20
-	ldr	x8, [sp, #104]          // 8-byte Folded Reload
-	mul	 x20, x27, x8
-	adcs	x20, x20, x23
-	mul	 x23, x27, x14
-	adcs	x22, x23, x22
-	adcs	x23, x30, xzr
-	umulh	x30, x27, x18
-	adds	 x2, x2, x30
-	umulh	x30, x27, x0
-	adcs	x3, x3, x30
-	umulh	x30, x27, x17
-	mov	 x12, x17
-	adcs	x6, x6, x30
-	umulh	x30, x27, x10
-	adcs	x20, x20, x30
-	umulh	x30, x27, x8
-	mov	 x11, x8
-	adcs	x22, x22, x30
-	mov	 x30, x14
-	umulh	x27, x27, x30
-	adcs	x23, x23, x27
-	adds	 x8, x1, x24
-	adcs	x9, x9, x5
-	adcs	x14, x15, x26
-	adcs	x5, x21, x25
-	ldr	x15, [sp, #32]          // 8-byte Folded Reload
-	adcs	x7, x7, x15
-	ldr	x15, [sp, #16]          // 8-byte Folded Reload
-	adcs	x21, x15, xzr
-	adds	 x2, x19, x2
-	adcs	x8, x8, x3
-	adcs	x9, x9, x6
-	mov	 x24, x4
-	mul	 x3, x2, x24
-	adcs	x14, x14, x20
-	mul	 x6, x3, x30
-	adcs	x5, x5, x22
-	mul	 x19, x3, x11
-	adcs	x7, x7, x23
-	mul	 x20, x3, x18
-	adcs	x21, x21, xzr
-	cmn	 x20, x2
-	mul	 x2, x3, x10
-	mul	 x20, x3, x0
-	adcs	x8, x20, x8
-	mul	 x20, x3, x12
-	adcs	x9, x20, x9
-	umulh	x20, x3, x30
-	adcs	x14, x2, x14
-	umulh	x2, x3, x11
-	mov	 x27, x11
-	adcs	x5, x19, x5
-	mov	 x11, x10
-	umulh	x19, x3, x11
-	adcs	x6, x6, x7
-	umulh	x7, x3, x18
-	adcs	x21, x21, xzr
-	adds	 x8, x8, x7
-	umulh	x7, x3, x12
-	umulh	x3, x3, x0
-	adcs	x9, x9, x3
-	adcs	x10, x14, x7
-	adcs	x3, x5, x19
-	adcs	x2, x6, x2
-	adcs	x5, x21, x20
-	adds	 x15, x16, x13
-	ldr	x13, [sp, #40]          // 8-byte Folded Reload
-	ldr	x14, [sp, #24]          // 8-byte Folded Reload
-	adcs	x16, x14, x13
-	ldp	x14, x13, [sp, #48]
-	adcs	x17, x14, x13
-	ldp	x14, x13, [sp, #64]
-	adcs	x1, x14, x13
-	ldr	x13, [sp, #80]          // 8-byte Folded Reload
-	adcs	x4, x13, x29
-	ldr	x13, [sp, #88]          // 8-byte Folded Reload
-	adcs	x6, x13, xzr
-	adds	 x8, x28, x8
-	adcs	x9, x15, x9
-	mul	 x15, x8, x24
-	adcs	x10, x16, x10
-	mul	 x16, x15, x30
-	mul	 x14, x15, x27
-	mul	 x7, x15, x11
-	mul	 x19, x15, x12
-	mul	 x20, x15, x0
-	mul	 x21, x15, x18
-	umulh	x22, x15, x30
-	umulh	x23, x15, x27
-	umulh	x24, x15, x11
-	mov	 x28, x11
-	umulh	x25, x15, x12
-	umulh	x26, x15, x0
-	umulh	x15, x15, x18
-	adcs	x17, x17, x3
-	adcs	x1, x1, x2
-	adcs	x2, x4, x5
-	adcs	x3, x6, xzr
-	cmn	 x21, x8
-	adcs	x8, x20, x9
-	adcs	x9, x19, x10
-	adcs	x10, x7, x17
-	adcs	x17, x14, x1
-	adcs	x16, x16, x2
-	adcs	x11, x3, xzr
-	adds	 x8, x8, x15
-	adcs	x9, x9, x26
-	adcs	x10, x10, x25
-	adcs	x15, x17, x24
-	adcs	x16, x16, x23
-	adcs	x17, x11, x22
-	subs	 x3, x8, x18
-	sbcs	x2, x9, x0
-	sbcs	x11, x10, x12
-	sbcs	x14, x15, x28
-	sbcs	x18, x16, x27
-	sbcs	x0, x17, x30
-	asr	x1, x0, #63
-	cmp	 x1, #0                 // =0
-	csel	x8, x8, x3, lt
-	csel	x9, x9, x2, lt
-	csel	x10, x10, x11, lt
-	csel	x11, x15, x14, lt
-	csel	x12, x16, x18, lt
-	csel	x13, x17, x0, lt
-	ldr	x14, [sp, #96]          // 8-byte Folded Reload
-	stp	 x8, x9, [x14]
-	stp	x10, x11, [x14, #16]
-	stp	x12, x13, [x14, #32]
-	add	sp, sp, #112            // =112
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end84:
-	.size	mcl_fp_montNF6L, .Lfunc_end84-mcl_fp_montNF6L
-
-	.globl	mcl_fp_montRed6L
-	.align	2
-	.type	mcl_fp_montRed6L,@function
-mcl_fp_montRed6L:                       // @mcl_fp_montRed6L
-// BB#0:
-	stp	x26, x25, [sp, #-64]!
-	stp	x24, x23, [sp, #16]
-	stp	x22, x21, [sp, #32]
-	stp	x20, x19, [sp, #48]
-	ldur	x14, [x2, #-8]
-	ldp	x9, x8, [x2, #32]
-	ldp	x11, x10, [x2, #16]
-	ldp	 x13, x12, [x2]
-	ldp	x16, x17, [x1, #80]
-	ldp	x18, x2, [x1, #64]
-	ldp	x3, x4, [x1, #48]
-	ldp	x5, x6, [x1, #32]
-	ldp	x7, x19, [x1, #16]
-	ldp	 x15, x1, [x1]
-	mul	 x20, x15, x14
-	mul	 x21, x20, x8
-	mul	 x22, x20, x9
-	mul	 x23, x20, x10
-	mul	 x24, x20, x11
-	mul	 x25, x20, x12
-	umulh	x26, x20, x13
-	adds	 x25, x26, x25
-	umulh	x26, x20, x12
-	adcs	x24, x26, x24
-	umulh	x26, x20, x11
-	adcs	x23, x26, x23
-	umulh	x26, x20, x10
-	adcs	x22, x26, x22
-	umulh	x26, x20, x9
-	adcs	x21, x26, x21
-	umulh	x26, x20, x8
-	mul	 x20, x20, x13
-	adcs	x26, x26, xzr
-	cmn	 x15, x20
-	adcs	x15, x1, x25
-	adcs	x1, x7, x24
-	mul	 x7, x15, x14
-	adcs	x19, x19, x23
-	mul	 x20, x7, x8
-	mul	 x23, x7, x9
-	mul	 x24, x7, x10
-	mul	 x25, x7, x11
-	adcs	x5, x5, x22
-	mul	 x22, x7, x12
-	adcs	x6, x6, x21
-	umulh	x21, x7, x13
-	adcs	x3, x3, x26
-	adcs	x4, x4, xzr
-	adcs	x18, x18, xzr
-	adcs	x2, x2, xzr
-	adcs	x16, x16, xzr
-	adcs	x17, x17, xzr
-	adcs	x26, xzr, xzr
-	adds	 x21, x21, x22
-	umulh	x22, x7, x12
-	adcs	x22, x22, x25
-	umulh	x25, x7, x11
-	adcs	x24, x25, x24
-	umulh	x25, x7, x10
-	adcs	x23, x25, x23
-	umulh	x25, x7, x9
-	adcs	x20, x25, x20
-	umulh	x25, x7, x8
-	mul	 x7, x7, x13
-	adcs	x25, x25, xzr
-	cmn	 x7, x15
-	adcs	x15, x21, x1
-	adcs	x1, x22, x19
-	mul	 x7, x15, x14
-	adcs	x5, x24, x5
-	mul	 x19, x7, x8
-	mul	 x21, x7, x9
-	mul	 x22, x7, x10
-	adcs	x6, x23, x6
-	mul	 x23, x7, x11
-	adcs	x3, x20, x3
-	mul	 x20, x7, x12
-	adcs	x4, x25, x4
-	umulh	x24, x7, x13
-	adcs	x18, x18, xzr
-	adcs	x2, x2, xzr
-	adcs	x16, x16, xzr
-	adcs	x17, x17, xzr
-	adcs	x25, x26, xzr
-	adds	 x20, x24, x20
-	umulh	x24, x7, x12
-	adcs	x23, x24, x23
-	umulh	x24, x7, x11
-	adcs	x22, x24, x22
-	umulh	x24, x7, x10
-	adcs	x21, x24, x21
-	umulh	x24, x7, x9
-	adcs	x19, x24, x19
-	umulh	x24, x7, x8
-	mul	 x7, x7, x13
-	adcs	x24, x24, xzr
-	cmn	 x7, x15
-	adcs	x15, x20, x1
-	adcs	x1, x23, x5
-	mul	 x5, x15, x14
-	adcs	x6, x22, x6
-	mul	 x7, x5, x8
-	mul	 x20, x5, x9
-	mul	 x22, x5, x10
-	adcs	x3, x21, x3
-	mul	 x21, x5, x11
-	adcs	x4, x19, x4
-	mul	 x19, x5, x12
-	adcs	x18, x24, x18
-	umulh	x23, x5, x13
-	adcs	x2, x2, xzr
-	adcs	x16, x16, xzr
-	adcs	x17, x17, xzr
-	adcs	x24, x25, xzr
-	adds	 x19, x23, x19
-	umulh	x23, x5, x12
-	adcs	x21, x23, x21
-	umulh	x23, x5, x11
-	adcs	x22, x23, x22
-	umulh	x23, x5, x10
-	adcs	x20, x23, x20
-	umulh	x23, x5, x9
-	adcs	x7, x23, x7
-	umulh	x23, x5, x8
-	mul	 x5, x5, x13
-	adcs	x23, x23, xzr
-	cmn	 x5, x15
-	adcs	x15, x19, x1
-	adcs	x1, x21, x6
-	mul	 x5, x15, x14
-	adcs	x3, x22, x3
-	mul	 x6, x5, x8
-	mul	 x19, x5, x9
-	mul	 x21, x5, x10
-	adcs	x4, x20, x4
-	mul	 x20, x5, x11
-	adcs	x18, x7, x18
-	mul	 x7, x5, x12
-	adcs	x2, x23, x2
-	umulh	x22, x5, x13
-	adcs	x16, x16, xzr
-	adcs	x17, x17, xzr
-	adcs	x23, x24, xzr
-	adds	 x7, x22, x7
-	umulh	x22, x5, x12
-	adcs	x20, x22, x20
-	umulh	x22, x5, x11
-	adcs	x21, x22, x21
-	umulh	x22, x5, x10
-	adcs	x19, x22, x19
-	umulh	x22, x5, x9
-	adcs	x6, x22, x6
-	umulh	x22, x5, x8
-	mul	 x5, x5, x13
-	adcs	x22, x22, xzr
-	cmn	 x5, x15
-	adcs	x15, x7, x1
-	adcs	x1, x20, x3
-	mul	 x14, x15, x14
-	adcs	x3, x21, x4
-	mul	 x4, x14, x8
-	mul	 x5, x14, x9
-	mul	 x7, x14, x10
-	adcs	x18, x19, x18
-	mul	 x19, x14, x11
-	adcs	x2, x6, x2
-	mul	 x6, x14, x12
-	adcs	x16, x22, x16
-	umulh	x20, x14, x13
-	adcs	x17, x17, xzr
-	adcs	x21, x23, xzr
-	adds	 x6, x20, x6
-	umulh	x20, x14, x12
-	adcs	x19, x20, x19
-	umulh	x20, x14, x11
-	adcs	x7, x20, x7
-	umulh	x20, x14, x10
-	adcs	x5, x20, x5
-	umulh	x20, x14, x9
-	adcs	x4, x20, x4
-	umulh	x20, x14, x8
-	mul	 x14, x14, x13
-	adcs	x20, x20, xzr
-	cmn	 x14, x15
-	adcs	x14, x6, x1
-	adcs	x15, x19, x3
-	adcs	x18, x7, x18
-	adcs	x1, x5, x2
-	adcs	x16, x4, x16
-	adcs	x17, x20, x17
-	adcs	x2, x21, xzr
-	subs	 x13, x14, x13
-	sbcs	x12, x15, x12
-	sbcs	x11, x18, x11
-	sbcs	x10, x1, x10
-	sbcs	x9, x16, x9
-	sbcs	x8, x17, x8
-	sbcs	x2, x2, xzr
-	tst	 x2, #0x1
-	csel	x13, x14, x13, ne
-	csel	x12, x15, x12, ne
-	csel	x11, x18, x11, ne
-	csel	x10, x1, x10, ne
-	csel	x9, x16, x9, ne
-	csel	x8, x17, x8, ne
-	stp	 x13, x12, [x0]
-	stp	x11, x10, [x0, #16]
-	stp	x9, x8, [x0, #32]
-	ldp	x20, x19, [sp, #48]
-	ldp	x22, x21, [sp, #32]
-	ldp	x24, x23, [sp, #16]
-	ldp	x26, x25, [sp], #64
-	ret
-.Lfunc_end85:
-	.size	mcl_fp_montRed6L, .Lfunc_end85-mcl_fp_montRed6L
-
-	.globl	mcl_fp_addPre6L
-	.align	2
-	.type	mcl_fp_addPre6L,@function
-mcl_fp_addPre6L:                        // @mcl_fp_addPre6L
-// BB#0:
-	ldp	x8, x9, [x2, #32]
-	ldp	x10, x11, [x1, #32]
-	ldp	x12, x13, [x2, #16]
-	ldp	 x14, x15, [x2]
-	ldp	 x16, x17, [x1]
-	ldp	x18, x1, [x1, #16]
-	adds	 x14, x14, x16
-	str	 x14, [x0]
-	adcs	x14, x15, x17
-	adcs	x12, x12, x18
-	stp	x14, x12, [x0, #8]
-	adcs	x12, x13, x1
-	adcs	x8, x8, x10
-	stp	x12, x8, [x0, #24]
-	adcs	x9, x9, x11
-	adcs	x8, xzr, xzr
-	str	x9, [x0, #40]
-	mov	 x0, x8
-	ret
-.Lfunc_end86:
-	.size	mcl_fp_addPre6L, .Lfunc_end86-mcl_fp_addPre6L
-
-	.globl	mcl_fp_subPre6L
-	.align	2
-	.type	mcl_fp_subPre6L,@function
-mcl_fp_subPre6L:                        // @mcl_fp_subPre6L
-// BB#0:
-	ldp	x8, x9, [x2, #32]
-	ldp	x10, x11, [x1, #32]
-	ldp	x12, x13, [x2, #16]
-	ldp	 x14, x15, [x2]
-	ldp	 x16, x17, [x1]
-	ldp	x18, x1, [x1, #16]
-	subs	 x14, x16, x14
-	str	 x14, [x0]
-	sbcs	x14, x17, x15
-	sbcs	x12, x18, x12
-	stp	x14, x12, [x0, #8]
-	sbcs	x12, x1, x13
-	sbcs	x8, x10, x8
-	stp	x12, x8, [x0, #24]
-	sbcs	x9, x11, x9
-	ngcs	 x8, xzr
-	and	x8, x8, #0x1
-	str	x9, [x0, #40]
-	mov	 x0, x8
-	ret
-.Lfunc_end87:
-	.size	mcl_fp_subPre6L, .Lfunc_end87-mcl_fp_subPre6L
-
-	.globl	mcl_fp_shr1_6L
-	.align	2
-	.type	mcl_fp_shr1_6L,@function
-mcl_fp_shr1_6L:                         // @mcl_fp_shr1_6L
-// BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	x10, x11, [x1, #16]
-	ldp	x12, x13, [x1, #32]
-	extr	x8, x9, x8, #1
-	extr	x9, x10, x9, #1
-	extr	x10, x11, x10, #1
-	extr	x11, x12, x11, #1
-	extr	x12, x13, x12, #1
-	lsr	x13, x13, #1
-	stp	 x8, x9, [x0]
-	stp	x10, x11, [x0, #16]
-	stp	x12, x13, [x0, #32]
-	ret
-.Lfunc_end88:
-	.size	mcl_fp_shr1_6L, .Lfunc_end88-mcl_fp_shr1_6L
-
-	.globl	mcl_fp_add6L
-	.align	2
-	.type	mcl_fp_add6L,@function
-mcl_fp_add6L:                           // @mcl_fp_add6L
-// BB#0:
-	ldp	x8, x9, [x2, #32]
-	ldp	x10, x11, [x1, #32]
-	ldp	x12, x13, [x2, #16]
-	ldp	 x14, x15, [x2]
-	ldp	 x16, x17, [x1]
-	ldp	x18, x1, [x1, #16]
-	adds	 x14, x14, x16
-	adcs	x15, x15, x17
-	ldp	x16, x17, [x3, #32]
-	adcs	x18, x12, x18
-	adcs	x1, x13, x1
-	ldp	 x12, x2, [x3]
-	stp	 x14, x15, [x0]
-	stp	x18, x1, [x0, #16]
-	adcs	x8, x8, x10
-	adcs	x4, x9, x11
-	stp	x8, x4, [x0, #32]
-	adcs	x5, xzr, xzr
-	ldp	x9, x10, [x3, #16]
-	subs	 x13, x14, x12
-	sbcs	x12, x15, x2
-	sbcs	x11, x18, x9
-	sbcs	x10, x1, x10
-	sbcs	x9, x8, x16
-	sbcs	x8, x4, x17
-	sbcs	x14, x5, xzr
-	and	w14, w14, #0x1
-	tbnz	w14, #0, .LBB89_2
-// BB#1:                                // %nocarry
-	stp	 x13, x12, [x0]
-	stp	x11, x10, [x0, #16]
-	stp	x9, x8, [x0, #32]
-.LBB89_2:                               // %carry
-	ret
-.Lfunc_end89:
-	.size	mcl_fp_add6L, .Lfunc_end89-mcl_fp_add6L
-
-	.globl	mcl_fp_addNF6L
-	.align	2
-	.type	mcl_fp_addNF6L,@function
-mcl_fp_addNF6L:                         // @mcl_fp_addNF6L
-// BB#0:
-	ldp	x8, x9, [x1, #32]
-	ldp	x10, x11, [x2, #32]
-	ldp	x12, x13, [x1, #16]
-	ldp	 x14, x15, [x1]
-	ldp	 x16, x17, [x2]
-	ldp	x18, x1, [x2, #16]
-	adds	 x14, x16, x14
-	adcs	x15, x17, x15
-	ldp	x16, x17, [x3, #32]
-	adcs	x12, x18, x12
-	adcs	x13, x1, x13
-	ldp	 x18, x1, [x3]
-	adcs	x8, x10, x8
-	ldp	x10, x2, [x3, #16]
-	adcs	x9, x11, x9
-	subs	 x11, x14, x18
-	sbcs	x18, x15, x1
-	sbcs	x10, x12, x10
-	sbcs	x1, x13, x2
-	sbcs	x16, x8, x16
-	sbcs	x17, x9, x17
-	asr	x2, x17, #63
-	cmp	 x2, #0                 // =0
-	csel	x11, x14, x11, lt
-	csel	x14, x15, x18, lt
-	csel	x10, x12, x10, lt
-	csel	x12, x13, x1, lt
-	csel	x8, x8, x16, lt
-	csel	x9, x9, x17, lt
-	stp	 x11, x14, [x0]
-	stp	x10, x12, [x0, #16]
-	stp	x8, x9, [x0, #32]
-	ret
-.Lfunc_end90:
-	.size	mcl_fp_addNF6L, .Lfunc_end90-mcl_fp_addNF6L
-
-	.globl	mcl_fp_sub6L
-	.align	2
-	.type	mcl_fp_sub6L,@function
-mcl_fp_sub6L:                           // @mcl_fp_sub6L
-// BB#0:
-	ldp	x12, x13, [x2, #32]
-	ldp	x14, x15, [x1, #32]
-	ldp	x10, x11, [x2, #16]
-	ldp	 x8, x9, [x2]
-	ldp	 x16, x17, [x1]
-	ldp	x18, x1, [x1, #16]
-	subs	 x8, x16, x8
-	sbcs	x9, x17, x9
-	stp	 x8, x9, [x0]
-	sbcs	x10, x18, x10
-	sbcs	x11, x1, x11
-	stp	x10, x11, [x0, #16]
-	sbcs	x12, x14, x12
-	sbcs	x13, x15, x13
-	stp	x12, x13, [x0, #32]
-	ngcs	 x14, xzr
-	and	w14, w14, #0x1
-	tbnz	w14, #0, .LBB91_2
-// BB#1:                                // %nocarry
-	ret
-.LBB91_2:                               // %carry
-	ldp	x14, x15, [x3, #32]
-	ldp	 x16, x17, [x3]
-	ldp	x18, x1, [x3, #16]
-	adds	 x8, x16, x8
-	adcs	x9, x17, x9
-	adcs	x10, x18, x10
-	adcs	x11, x1, x11
-	adcs	x12, x14, x12
-	adcs	x13, x15, x13
-	stp	 x8, x9, [x0]
-	stp	x10, x11, [x0, #16]
-	stp	x12, x13, [x0, #32]
-	ret
-.Lfunc_end91:
-	.size	mcl_fp_sub6L, .Lfunc_end91-mcl_fp_sub6L
-
-	.globl	mcl_fp_subNF6L
-	.align	2
-	.type	mcl_fp_subNF6L,@function
-mcl_fp_subNF6L:                         // @mcl_fp_subNF6L
-// BB#0:
-	ldp	x8, x9, [x2, #32]
-	ldp	x10, x11, [x1, #32]
-	ldp	x12, x13, [x2, #16]
-	ldp	 x14, x18, [x2]
-	ldp	x16, x17, [x1, #16]
-	ldp	 x15, x1, [x1]
-	subs	 x14, x15, x14
-	ldp	x15, x2, [x3, #32]
-	sbcs	x18, x1, x18
-	sbcs	x12, x16, x12
-	ldp	x16, x1, [x3, #16]
-	sbcs	x13, x17, x13
-	ldp	 x17, x3, [x3]
-	sbcs	x8, x10, x8
-	sbcs	x9, x11, x9
-	asr	x10, x9, #63
-	adds	 x11, x10, x10
-	and	 x16, x10, x16
-	and	 x1, x10, x1
-	and	 x15, x10, x15
-	and	 x2, x10, x2
-	adcs	x10, x10, x10
-	orr	x11, x11, x9, lsr #63
-	and	 x11, x11, x17
-	and	 x10, x10, x3
-	adds	 x11, x11, x14
-	adcs	x10, x10, x18
-	stp	 x11, x10, [x0]
-	adcs	x10, x16, x12
-	str	x10, [x0, #16]
-	adcs	x10, x1, x13
-	adcs	x8, x15, x8
-	stp	x10, x8, [x0, #24]
-	adcs	x8, x2, x9
-	str	x8, [x0, #40]
-	ret
-.Lfunc_end92:
-	.size	mcl_fp_subNF6L, .Lfunc_end92-mcl_fp_subNF6L
-
-	.globl	mcl_fpDbl_add6L
-	.align	2
-	.type	mcl_fpDbl_add6L,@function
-mcl_fpDbl_add6L:                        // @mcl_fpDbl_add6L
-// BB#0:
-	stp	x26, x25, [sp, #-64]!
-	stp	x24, x23, [sp, #16]
-	stp	x22, x21, [sp, #32]
-	stp	x20, x19, [sp, #48]
-	ldp	x8, x9, [x2, #80]
-	ldp	x10, x11, [x1, #80]
-	ldp	x12, x13, [x2, #64]
-	ldp	x14, x15, [x1, #64]
-	ldp	x16, x17, [x2, #48]
-	ldp	x18, x4, [x1, #48]
-	ldp	x5, x6, [x2, #32]
-	ldp	x7, x19, [x1, #32]
-	ldp	x20, x21, [x2, #16]
-	ldp	 x23, x2, [x2]
-	ldp	x24, x25, [x1, #16]
-	ldp	 x22, x1, [x1]
-	adds	 x22, x23, x22
-	str	 x22, [x0]
-	ldp	x22, x23, [x3, #32]
-	adcs	x1, x2, x1
-	str	x1, [x0, #8]
-	ldp	x1, x2, [x3, #16]
-	adcs	x20, x20, x24
-	ldp	 x24, x3, [x3]
-	str	x20, [x0, #16]
-	adcs	x20, x21, x25
-	adcs	x5, x5, x7
-	stp	x20, x5, [x0, #24]
-	adcs	x5, x6, x19
-	str	x5, [x0, #40]
-	adcs	x16, x16, x18
-	adcs	x17, x17, x4
-	adcs	x12, x12, x14
-	adcs	x13, x13, x15
-	adcs	x8, x8, x10
-	adcs	x9, x9, x11
-	adcs	x10, xzr, xzr
-	subs	 x11, x16, x24
-	sbcs	x14, x17, x3
-	sbcs	x15, x12, x1
-	sbcs	x18, x13, x2
-	sbcs	x1, x8, x22
-	sbcs	x2, x9, x23
-	sbcs	x10, x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x16, x11, ne
-	csel	x11, x17, x14, ne
-	csel	x12, x12, x15, ne
-	csel	x13, x13, x18, ne
-	csel	x8, x8, x1, ne
-	csel	x9, x9, x2, ne
-	stp	x10, x11, [x0, #48]
-	stp	x12, x13, [x0, #64]
-	stp	x8, x9, [x0, #80]
-	ldp	x20, x19, [sp, #48]
-	ldp	x22, x21, [sp, #32]
-	ldp	x24, x23, [sp, #16]
-	ldp	x26, x25, [sp], #64
-	ret
-.Lfunc_end93:
-	.size	mcl_fpDbl_add6L, .Lfunc_end93-mcl_fpDbl_add6L
-
-	.globl	mcl_fpDbl_sub6L
-	.align	2
-	.type	mcl_fpDbl_sub6L,@function
-mcl_fpDbl_sub6L:                        // @mcl_fpDbl_sub6L
-// BB#0:
-	stp	x26, x25, [sp, #-64]!
-	stp	x24, x23, [sp, #16]
-	stp	x22, x21, [sp, #32]
-	stp	x20, x19, [sp, #48]
-	ldp	x8, x9, [x2, #80]
-	ldp	x10, x11, [x1, #80]
-	ldp	x12, x13, [x2, #64]
-	ldp	x14, x15, [x1, #64]
-	ldp	x16, x17, [x2, #48]
-	ldp	x18, x4, [x1, #48]
-	ldp	x5, x6, [x2, #32]
-	ldp	x7, x19, [x1, #32]
-	ldp	x20, x21, [x2, #16]
-	ldp	 x22, x2, [x2]
-	ldp	x24, x25, [x1, #16]
-	ldp	 x23, x1, [x1]
-	subs	 x22, x23, x22
-	str	 x22, [x0]
-	ldp	x22, x23, [x3, #32]
-	sbcs	x1, x1, x2
-	str	x1, [x0, #8]
-	ldp	x1, x2, [x3, #16]
-	sbcs	x20, x24, x20
-	ldp	 x24, x3, [x3]
-	str	x20, [x0, #16]
-	sbcs	x20, x25, x21
-	sbcs	x5, x7, x5
-	stp	x20, x5, [x0, #24]
-	sbcs	x5, x19, x6
-	sbcs	x16, x18, x16
-	sbcs	x17, x4, x17
-	sbcs	x12, x14, x12
-	sbcs	x13, x15, x13
-	sbcs	x8, x10, x8
-	sbcs	x9, x11, x9
-	ngcs	 x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x23, xzr, ne
-	csel	x11, x22, xzr, ne
-	csel	x14, x2, xzr, ne
-	csel	x15, x1, xzr, ne
-	csel	x18, x3, xzr, ne
-	csel	x1, x24, xzr, ne
-	adds	 x16, x1, x16
-	stp	x5, x16, [x0, #40]
-	adcs	x16, x18, x17
-	adcs	x12, x15, x12
-	stp	x16, x12, [x0, #56]
-	adcs	x12, x14, x13
-	adcs	x8, x11, x8
-	stp	x12, x8, [x0, #72]
-	adcs	x8, x10, x9
-	str	x8, [x0, #88]
-	ldp	x20, x19, [sp, #48]
-	ldp	x22, x21, [sp, #32]
-	ldp	x24, x23, [sp, #16]
-	ldp	x26, x25, [sp], #64
-	ret
-.Lfunc_end94:
-	.size	mcl_fpDbl_sub6L, .Lfunc_end94-mcl_fpDbl_sub6L
-
-	.globl	mcl_fp_mulUnitPre7L
-	.align	2
-	.type	mcl_fp_mulUnitPre7L,@function
-mcl_fp_mulUnitPre7L:                    // @mcl_fp_mulUnitPre7L
-// BB#0:
-	ldp	x10, x8, [x1, #40]
-	ldp	x14, x9, [x1, #24]
-	ldp	 x11, x12, [x1]
-	ldr	x13, [x1, #16]
-	mul	 x15, x11, x2
-	mul	 x16, x12, x2
-	umulh	x11, x11, x2
-	mul	 x17, x13, x2
-	umulh	x12, x12, x2
-	mul	 x18, x14, x2
-	umulh	x13, x13, x2
-	mul	 x1, x9, x2
-	umulh	x14, x14, x2
-	mul	 x3, x10, x2
-	umulh	x9, x9, x2
-	mul	 x4, x8, x2
-	umulh	x10, x10, x2
-	umulh	x8, x8, x2
-	adds	 x11, x11, x16
-	stp	 x15, x11, [x0]
-	adcs	x11, x12, x17
-	str	x11, [x0, #16]
-	adcs	x11, x13, x18
-	str	x11, [x0, #24]
-	adcs	x11, x14, x1
-	adcs	x9, x9, x3
-	stp	x11, x9, [x0, #32]
-	adcs	x9, x10, x4
-	adcs	x8, x8, xzr
-	stp	x9, x8, [x0, #48]
-	ret
-.Lfunc_end95:
-	.size	mcl_fp_mulUnitPre7L, .Lfunc_end95-mcl_fp_mulUnitPre7L
-
-	.globl	mcl_fpDbl_mulPre7L
-	.align	2
-	.type	mcl_fpDbl_mulPre7L,@function
-mcl_fpDbl_mulPre7L:                     // @mcl_fpDbl_mulPre7L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #624            // =624
-	ldp	 x8, x9, [x1]
-	ldp	x10, x11, [x1, #24]
-	ldp	x12, x13, [x1, #40]
-	ldp	 x14, x15, [x2]
-	ldp	x16, x18, [x1, #16]
-	mul	 x17, x8, x14
-	str	x17, [sp, #528]         // 8-byte Folded Spill
-	umulh	x17, x13, x14
-	str	x17, [sp, #616]         // 8-byte Folded Spill
-	mul	 x17, x13, x14
-	str	x17, [sp, #608]         // 8-byte Folded Spill
-	umulh	x17, x12, x14
-	str	x17, [sp, #592]         // 8-byte Folded Spill
-	mul	 x17, x12, x14
-	str	x17, [sp, #568]         // 8-byte Folded Spill
-	umulh	x17, x11, x14
-	str	x17, [sp, #552]         // 8-byte Folded Spill
-	mul	 x17, x11, x14
-	str	x17, [sp, #512]         // 8-byte Folded Spill
-	umulh	x17, x10, x14
-	str	x17, [sp, #496]         // 8-byte Folded Spill
-	mul	 x17, x10, x14
-	str	x17, [sp, #456]         // 8-byte Folded Spill
-	umulh	x17, x16, x14
-	str	x17, [sp, #424]         // 8-byte Folded Spill
-	mul	 x17, x16, x14
-	str	x17, [sp, #368]         // 8-byte Folded Spill
-	umulh	x17, x9, x14
-	str	x17, [sp, #352]         // 8-byte Folded Spill
-	mul	 x17, x9, x14
-	str	x17, [sp, #304]         // 8-byte Folded Spill
-	umulh	x14, x8, x14
-	str	x14, [sp, #272]         // 8-byte Folded Spill
-	mul	 x14, x13, x15
-	str	x14, [sp, #560]         // 8-byte Folded Spill
-	mul	 x14, x12, x15
-	str	x14, [sp, #520]         // 8-byte Folded Spill
-	mul	 x14, x11, x15
-	str	x14, [sp, #488]         // 8-byte Folded Spill
-	mul	 x14, x10, x15
-	str	x14, [sp, #448]         // 8-byte Folded Spill
-	mul	 x14, x16, x15
-	umulh	x13, x13, x15
-	str	x13, [sp, #600]         // 8-byte Folded Spill
-	umulh	x12, x12, x15
-	str	x12, [sp, #576]         // 8-byte Folded Spill
-	umulh	x11, x11, x15
-	str	x11, [sp, #544]         // 8-byte Folded Spill
-	umulh	x10, x10, x15
-	str	x10, [sp, #504]         // 8-byte Folded Spill
-	umulh	x10, x16, x15
-	str	x10, [sp, #472]         // 8-byte Folded Spill
-	mul	 x10, x9, x15
-	str	x10, [sp, #208]         // 8-byte Folded Spill
-	umulh	x9, x9, x15
-	stp	x9, x14, [sp, #400]
-	mul	 x9, x8, x15
-	str	x9, [sp, #96]           // 8-byte Folded Spill
-	umulh	x8, x8, x15
-	str	x8, [sp, #320]          // 8-byte Folded Spill
-	ldp	 x9, x11, [x1]
-	ldp	x10, x17, [x2, #16]
-	ldp	x12, x13, [x1, #16]
-	ldp	x14, x16, [x1, #32]
-	ldr	x15, [x1, #48]
-	mul	 x8, x9, x10
-	str	x8, [sp, #248]          // 8-byte Folded Spill
-	mul	 x8, x15, x10
-	str	x8, [sp, #392]          // 8-byte Folded Spill
-	mul	 x8, x16, x10
-	str	x8, [sp, #344]          // 8-byte Folded Spill
-	mul	 x8, x14, x10
-	str	x8, [sp, #296]          // 8-byte Folded Spill
-	mul	 x8, x13, x10
-	str	x8, [sp, #240]          // 8-byte Folded Spill
-	mul	 x8, x12, x10
-	str	x8, [sp, #192]          // 8-byte Folded Spill
-	mul	 x8, x11, x10
-	str	x8, [sp, #136]          // 8-byte Folded Spill
-	umulh	x8, x15, x10
-	str	x8, [sp, #440]          // 8-byte Folded Spill
-	umulh	x8, x16, x10
-	str	x8, [sp, #384]          // 8-byte Folded Spill
-	umulh	x8, x14, x10
-	str	x8, [sp, #336]          // 8-byte Folded Spill
-	umulh	x8, x13, x10
-	str	x8, [sp, #288]          // 8-byte Folded Spill
-	umulh	x8, x12, x10
-	str	x8, [sp, #232]          // 8-byte Folded Spill
-	umulh	x8, x11, x10
-	str	x8, [sp, #184]          // 8-byte Folded Spill
-	umulh	x8, x9, x10
-	str	x8, [sp, #128]          // 8-byte Folded Spill
-	mul	 x8, x15, x17
-	str	x8, [sp, #464]          // 8-byte Folded Spill
-	umulh	x8, x15, x17
-	str	x8, [sp, #584]          // 8-byte Folded Spill
-	mul	 x8, x16, x17
-	str	x8, [sp, #376]          // 8-byte Folded Spill
-	umulh	x8, x16, x17
-	str	x8, [sp, #536]          // 8-byte Folded Spill
-	mul	 x8, x14, x17
-	str	x8, [sp, #312]          // 8-byte Folded Spill
-	umulh	x8, x14, x17
-	str	x8, [sp, #480]          // 8-byte Folded Spill
-	mul	 x8, x13, x17
-	str	x8, [sp, #224]          // 8-byte Folded Spill
-	umulh	x8, x13, x17
-	str	x8, [sp, #416]          // 8-byte Folded Spill
-	mul	 x8, x12, x17
-	str	x8, [sp, #144]          // 8-byte Folded Spill
-	umulh	x8, x12, x17
-	str	x8, [sp, #328]          // 8-byte Folded Spill
-	mul	 x8, x11, x17
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	umulh	x8, x11, x17
-	str	x8, [sp, #264]          // 8-byte Folded Spill
-	mul	 x28, x9, x17
-	umulh	x8, x9, x17
-	str	x8, [sp, #176]          // 8-byte Folded Spill
-	ldp	x14, x12, [x1, #24]
-	ldp	 x10, x9, [x1]
-	ldr	x7, [x1, #16]
-	ldp	x30, x5, [x1, #40]
-	ldp	x27, x8, [x2, #32]
-	ldr	x13, [x1, #48]
-	mul	 x11, x10, x27
-	str	x11, [sp, #48]          // 8-byte Folded Spill
-	mul	 x11, x5, x27
-	str	x11, [sp, #168]         // 8-byte Folded Spill
-	mul	 x11, x30, x27
-	str	x11, [sp, #120]         // 8-byte Folded Spill
-	mul	 x11, x12, x27
-	str	x11, [sp, #72]          // 8-byte Folded Spill
-	mul	 x11, x14, x27
-	str	x11, [sp, #40]          // 8-byte Folded Spill
-	mul	 x11, x7, x27
-	str	x11, [sp, #16]          // 8-byte Folded Spill
-	mul	 x24, x9, x27
-	umulh	x11, x5, x27
-	str	x11, [sp, #216]         // 8-byte Folded Spill
-	umulh	x11, x30, x27
-	str	x11, [sp, #160]         // 8-byte Folded Spill
-	umulh	x11, x12, x27
-	str	x11, [sp, #112]         // 8-byte Folded Spill
-	umulh	x11, x14, x27
-	str	x11, [sp, #64]          // 8-byte Folded Spill
-	umulh	x11, x7, x27
-	str	x11, [sp, #32]          // 8-byte Folded Spill
-	umulh	x29, x9, x27
-	umulh	x23, x10, x27
-	mul	 x11, x5, x8
-	str	x11, [sp, #256]         // 8-byte Folded Spill
-	umulh	x11, x5, x8
-	str	x11, [sp, #432]         // 8-byte Folded Spill
-	mul	 x11, x30, x8
-	str	x11, [sp, #152]         // 8-byte Folded Spill
-	umulh	x11, x30, x8
-	str	x11, [sp, #360]         // 8-byte Folded Spill
-	mul	 x11, x12, x8
-	str	x11, [sp, #88]          // 8-byte Folded Spill
-	umulh	x11, x12, x8
-	str	x11, [sp, #280]         // 8-byte Folded Spill
-	mul	 x11, x14, x8
-	str	x11, [sp, #24]          // 8-byte Folded Spill
-	umulh	x11, x14, x8
-	str	x11, [sp, #200]         // 8-byte Folded Spill
-	mul	 x25, x7, x8
-	umulh	x11, x7, x8
-	str	x11, [sp, #104]         // 8-byte Folded Spill
-	mul	 x22, x9, x8
-	umulh	x9, x9, x8
-	str	x9, [sp, #56]           // 8-byte Folded Spill
-	mul	 x20, x10, x8
-	umulh	x26, x10, x8
-	ldr	x10, [x2, #48]
-	ldp	 x2, x8, [x1]
-	ldr	x9, [x1, #16]
-	ldp	x11, x1, [x1, #32]
-	mul	 x27, x2, x10
-	umulh	x21, x2, x10
-	mul	 x5, x8, x10
-	umulh	x19, x8, x10
-	mul	 x3, x9, x10
-	umulh	x7, x9, x10
-	mul	 x2, x18, x10
-	umulh	x6, x18, x10
-	mul	 x17, x11, x10
-	umulh	x4, x11, x10
-	mul	 x16, x1, x10
-	umulh	x1, x1, x10
-	mul	 x15, x13, x10
-	umulh	x18, x13, x10
-	ldr	x8, [sp, #528]          // 8-byte Folded Reload
-	str	 x8, [x0]
-	ldr	x8, [sp, #304]          // 8-byte Folded Reload
-	ldr	x9, [sp, #272]          // 8-byte Folded Reload
-	adds	 x13, x9, x8
-	ldr	x8, [sp, #368]          // 8-byte Folded Reload
-	ldr	x9, [sp, #352]          // 8-byte Folded Reload
-	adcs	x8, x9, x8
-	ldr	x9, [sp, #456]          // 8-byte Folded Reload
-	ldr	x10, [sp, #424]         // 8-byte Folded Reload
-	adcs	x9, x10, x9
-	ldr	x10, [sp, #512]         // 8-byte Folded Reload
-	ldr	x11, [sp, #496]         // 8-byte Folded Reload
-	adcs	x10, x11, x10
-	ldr	x11, [sp, #568]         // 8-byte Folded Reload
-	ldr	x12, [sp, #552]         // 8-byte Folded Reload
-	adcs	x11, x12, x11
-	ldr	x12, [sp, #608]         // 8-byte Folded Reload
-	ldr	x14, [sp, #592]         // 8-byte Folded Reload
-	adcs	x12, x14, x12
-	ldr	x14, [sp, #616]         // 8-byte Folded Reload
-	adcs	x14, x14, xzr
-	ldr	x30, [sp, #96]          // 8-byte Folded Reload
-	adds	 x13, x30, x13
-	str	x13, [x0, #8]
-	ldr	x13, [sp, #208]         // 8-byte Folded Reload
-	adcs	x8, x13, x8
-	ldr	x13, [sp, #408]         // 8-byte Folded Reload
-	adcs	x9, x13, x9
-	ldr	x13, [sp, #448]         // 8-byte Folded Reload
-	adcs	x10, x13, x10
-	ldr	x13, [sp, #488]         // 8-byte Folded Reload
-	adcs	x11, x13, x11
-	ldr	x13, [sp, #520]         // 8-byte Folded Reload
-	adcs	x12, x13, x12
-	ldr	x13, [sp, #560]         // 8-byte Folded Reload
-	adcs	x13, x13, x14
-	adcs	x14, xzr, xzr
-	ldr	x30, [sp, #320]         // 8-byte Folded Reload
-	adds	 x8, x8, x30
-	ldr	x30, [sp, #400]         // 8-byte Folded Reload
-	adcs	x9, x9, x30
-	ldr	x30, [sp, #472]         // 8-byte Folded Reload
-	adcs	x10, x10, x30
-	ldr	x30, [sp, #504]         // 8-byte Folded Reload
-	adcs	x11, x11, x30
-	ldr	x30, [sp, #544]         // 8-byte Folded Reload
-	adcs	x12, x12, x30
-	ldr	x30, [sp, #576]         // 8-byte Folded Reload
-	adcs	x13, x13, x30
-	ldr	x30, [sp, #600]         // 8-byte Folded Reload
-	adcs	x14, x14, x30
-	ldr	x30, [sp, #248]         // 8-byte Folded Reload
-	adds	 x8, x30, x8
-	str	x8, [x0, #16]
-	ldp	x30, x8, [sp, #128]
-	adcs	x8, x8, x9
-	ldr	x9, [sp, #192]          // 8-byte Folded Reload
-	adcs	x9, x9, x10
-	ldr	x10, [sp, #240]         // 8-byte Folded Reload
-	adcs	x10, x10, x11
-	ldr	x11, [sp, #296]         // 8-byte Folded Reload
-	adcs	x11, x11, x12
-	ldr	x12, [sp, #344]         // 8-byte Folded Reload
-	adcs	x12, x12, x13
-	ldr	x13, [sp, #392]         // 8-byte Folded Reload
-	adcs	x13, x13, x14
-	adcs	x14, xzr, xzr
-	adds	 x8, x8, x30
-	ldr	x30, [sp, #184]         // 8-byte Folded Reload
-	adcs	x9, x9, x30
-	ldr	x30, [sp, #232]         // 8-byte Folded Reload
-	adcs	x10, x10, x30
-	ldr	x30, [sp, #288]         // 8-byte Folded Reload
-	adcs	x11, x11, x30
-	ldr	x30, [sp, #336]         // 8-byte Folded Reload
-	adcs	x12, x12, x30
-	ldr	x30, [sp, #384]         // 8-byte Folded Reload
-	adcs	x13, x13, x30
-	ldr	x30, [sp, #440]         // 8-byte Folded Reload
-	adcs	x14, x14, x30
-	adds	 x8, x28, x8
-	str	x8, [x0, #24]
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, x9
-	ldr	x9, [sp, #144]          // 8-byte Folded Reload
-	adcs	x9, x9, x10
-	ldr	x10, [sp, #224]         // 8-byte Folded Reload
-	adcs	x10, x10, x11
-	ldr	x11, [sp, #312]         // 8-byte Folded Reload
-	adcs	x11, x11, x12
-	ldr	x12, [sp, #376]         // 8-byte Folded Reload
-	adcs	x12, x12, x13
-	ldr	x13, [sp, #464]         // 8-byte Folded Reload
-	adcs	x13, x13, x14
-	adcs	x14, xzr, xzr
-	ldr	x28, [sp, #176]         // 8-byte Folded Reload
-	adds	 x8, x8, x28
-	ldr	x28, [sp, #264]         // 8-byte Folded Reload
-	adcs	x9, x9, x28
-	ldr	x28, [sp, #328]         // 8-byte Folded Reload
-	adcs	x10, x10, x28
-	ldr	x28, [sp, #416]         // 8-byte Folded Reload
-	adcs	x11, x11, x28
-	ldr	x28, [sp, #480]         // 8-byte Folded Reload
-	adcs	x12, x12, x28
-	ldr	x28, [sp, #536]         // 8-byte Folded Reload
-	adcs	x13, x13, x28
-	ldr	x28, [sp, #584]         // 8-byte Folded Reload
-	adcs	x14, x14, x28
-	ldr	x28, [sp, #48]          // 8-byte Folded Reload
-	adds	 x8, x28, x8
-	str	x8, [x0, #32]
-	adcs	x8, x24, x9
-	ldr	x9, [sp, #16]           // 8-byte Folded Reload
-	adcs	x9, x9, x10
-	ldr	x10, [sp, #40]          // 8-byte Folded Reload
-	adcs	x10, x10, x11
-	ldr	x11, [sp, #72]          // 8-byte Folded Reload
-	adcs	x11, x11, x12
-	ldr	x12, [sp, #120]         // 8-byte Folded Reload
-	adcs	x12, x12, x13
-	ldr	x13, [sp, #168]         // 8-byte Folded Reload
-	adcs	x13, x13, x14
-	adcs	x14, xzr, xzr
-	adds	 x8, x8, x23
-	adcs	x9, x9, x29
-	ldr	x23, [sp, #32]          // 8-byte Folded Reload
-	adcs	x10, x10, x23
-	ldr	x23, [sp, #64]          // 8-byte Folded Reload
-	adcs	x11, x11, x23
-	ldr	x23, [sp, #112]         // 8-byte Folded Reload
-	adcs	x12, x12, x23
-	ldr	x23, [sp, #160]         // 8-byte Folded Reload
-	adcs	x13, x13, x23
-	ldr	x23, [sp, #216]         // 8-byte Folded Reload
-	adcs	x14, x14, x23
-	adds	 x8, x20, x8
-	str	x8, [x0, #40]
-	adcs	x8, x22, x9
-	adcs	x9, x25, x10
-	ldr	x10, [sp, #24]          // 8-byte Folded Reload
-	adcs	x10, x10, x11
-	ldr	x11, [sp, #88]          // 8-byte Folded Reload
-	adcs	x11, x11, x12
-	ldr	x12, [sp, #152]         // 8-byte Folded Reload
-	adcs	x12, x12, x13
-	ldr	x13, [sp, #256]         // 8-byte Folded Reload
-	adcs	x13, x13, x14
-	adcs	x14, xzr, xzr
-	adds	 x8, x8, x26
-	ldr	x20, [sp, #56]          // 8-byte Folded Reload
-	adcs	x9, x9, x20
-	ldr	x20, [sp, #104]         // 8-byte Folded Reload
-	adcs	x10, x10, x20
-	ldr	x20, [sp, #200]         // 8-byte Folded Reload
-	adcs	x11, x11, x20
-	ldr	x20, [sp, #280]         // 8-byte Folded Reload
-	adcs	x12, x12, x20
-	ldr	x20, [sp, #360]         // 8-byte Folded Reload
-	adcs	x13, x13, x20
-	ldr	x20, [sp, #432]         // 8-byte Folded Reload
-	adcs	x14, x14, x20
-	adds	 x8, x27, x8
-	str	x8, [x0, #48]
-	adcs	x8, x5, x9
-	adcs	x9, x3, x10
-	adcs	x10, x2, x11
-	adcs	x11, x17, x12
-	adcs	x12, x16, x13
-	adcs	x13, x15, x14
-	adcs	x14, xzr, xzr
-	adds	 x8, x8, x21
-	str	x8, [x0, #56]
-	adcs	x8, x9, x19
-	str	x8, [x0, #64]
-	adcs	x8, x10, x7
-	str	x8, [x0, #72]
-	adcs	x8, x11, x6
-	str	x8, [x0, #80]
-	adcs	x8, x12, x4
-	str	x8, [x0, #88]
-	adcs	x8, x13, x1
-	str	x8, [x0, #96]
-	adcs	x8, x14, x18
-	str	x8, [x0, #104]
-	add	sp, sp, #624            // =624
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end96:
-	.size	mcl_fpDbl_mulPre7L, .Lfunc_end96-mcl_fpDbl_mulPre7L
-
-	.globl	mcl_fpDbl_sqrPre7L
-	.align	2
-	.type	mcl_fpDbl_sqrPre7L,@function
-mcl_fpDbl_sqrPre7L:                     // @mcl_fpDbl_sqrPre7L
-// BB#0:
-	stp	x24, x23, [sp, #-48]!
-	stp	x22, x21, [sp, #16]
-	stp	x20, x19, [sp, #32]
-	ldp	 x11, x8, [x1]
-	ldp	x9, x10, [x1, #40]
-	ldp	x15, x12, [x1, #16]
-	ldp	x16, x3, [x1, #16]
-	ldp	x13, x14, [x1, #32]
-	ldp	x18, x17, [x1, #32]
-	ldr	x2, [x1, #32]
-	mul	 x4, x11, x11
-	umulh	x5, x10, x11
-	mul	 x6, x9, x11
-	mul	 x7, x18, x11
-	mul	 x19, x3, x11
-	umulh	x20, x16, x11
-	mul	 x21, x16, x11
-	umulh	x22, x8, x11
-	mul	 x23, x8, x11
-	str	 x4, [x0]
-	umulh	x4, x11, x11
-	adds	 x4, x4, x23
-	adcs	x21, x22, x21
-	adcs	x19, x20, x19
-	umulh	x20, x3, x11
-	adcs	x7, x20, x7
-	umulh	x20, x18, x11
-	adcs	x6, x20, x6
-	mul	 x20, x10, x11
-	umulh	x11, x9, x11
-	adcs	x20, x11, x20
-	adcs	x5, x5, xzr
-	adds	 x4, x23, x4
-	ldp	x11, x23, [x1, #40]
-	str	x4, [x0, #8]
-	mul	 x4, x8, x8
-	adcs	x4, x4, x21
-	mul	 x21, x16, x8
-	adcs	x19, x21, x19
-	mul	 x21, x3, x8
-	adcs	x7, x21, x7
-	mul	 x21, x18, x8
-	adcs	x6, x21, x6
-	mul	 x21, x9, x8
-	adcs	x20, x21, x20
-	mul	 x21, x10, x8
-	umulh	x10, x10, x8
-	umulh	x9, x9, x8
-	umulh	x18, x18, x8
-	umulh	x3, x3, x8
-	umulh	x16, x16, x8
-	umulh	x8, x8, x8
-	adcs	x5, x21, x5
-	adcs	x21, xzr, xzr
-	adds	 x4, x4, x22
-	adcs	x8, x19, x8
-	ldp	 x19, x22, [x1]
-	adcs	x16, x7, x16
-	adcs	x3, x6, x3
-	ldp	x6, x7, [x1, #8]
-	adcs	x18, x20, x18
-	mul	 x20, x19, x15
-	adcs	x9, x5, x9
-	mul	 x5, x23, x15
-	adcs	x10, x21, x10
-	mul	 x21, x14, x15
-	adds	 x4, x20, x4
-	mul	 x20, x13, x15
-	str	x4, [x0, #16]
-	mul	 x4, x6, x15
-	adcs	x8, x4, x8
-	mul	 x4, x15, x15
-	adcs	x16, x4, x16
-	mul	 x4, x12, x15
-	adcs	x3, x4, x3
-	adcs	x18, x20, x18
-	umulh	x20, x13, x15
-	adcs	x9, x21, x9
-	umulh	x21, x19, x15
-	adcs	x10, x5, x10
-	adcs	x5, xzr, xzr
-	adds	 x8, x8, x21
-	umulh	x21, x6, x15
-	adcs	x16, x16, x21
-	umulh	x21, x15, x15
-	adcs	x3, x3, x21
-	umulh	x21, x12, x15
-	adcs	x18, x18, x21
-	adcs	x9, x9, x20
-	umulh	x20, x14, x15
-	adcs	x10, x10, x20
-	umulh	x15, x23, x15
-	adcs	x15, x5, x15
-	mul	 x5, x19, x12
-	adds	 x8, x5, x8
-	ldr	x5, [x1, #32]
-	str	x8, [x0, #24]
-	mul	 x8, x6, x12
-	adcs	x8, x8, x16
-	ldr	 x16, [x1]
-	adcs	x3, x4, x3
-	mul	 x4, x12, x12
-	adcs	x18, x4, x18
-	mul	 x4, x13, x12
-	adcs	x9, x4, x9
-	mul	 x4, x14, x12
-	adcs	x10, x4, x10
-	mul	 x4, x23, x12
-	umulh	x19, x19, x12
-	adcs	x15, x4, x15
-	adcs	x4, xzr, xzr
-	adds	 x8, x8, x19
-	ldr	x19, [x1, #24]
-	umulh	x6, x6, x12
-	adcs	x3, x3, x6
-	ldr	x6, [x1, #48]
-	adcs	x18, x18, x21
-	ldr	x20, [x1, #48]
-	umulh	x21, x23, x12
-	umulh	x14, x14, x12
-	umulh	x13, x13, x12
-	umulh	x12, x12, x12
-	adcs	x9, x9, x12
-	adcs	x10, x10, x13
-	ldp	 x12, x13, [x1]
-	adcs	x14, x15, x14
-	mul	 x15, x16, x5
-	adcs	x4, x4, x21
-	mul	 x21, x6, x5
-	adds	 x8, x15, x8
-	mul	 x15, x17, x5
-	str	x8, [x0, #32]
-	mul	 x8, x22, x5
-	adcs	x8, x8, x3
-	mul	 x3, x7, x5
-	adcs	x18, x3, x18
-	mul	 x3, x19, x5
-	adcs	x9, x3, x9
-	mul	 x3, x5, x5
-	adcs	x10, x3, x10
-	umulh	x3, x16, x5
-	adcs	x14, x15, x14
-	adcs	x4, x21, x4
-	adcs	x21, xzr, xzr
-	adds	 x8, x8, x3
-	umulh	x3, x22, x5
-	adcs	x18, x18, x3
-	umulh	x3, x7, x5
-	adcs	x9, x9, x3
-	umulh	x3, x19, x5
-	adcs	x10, x10, x3
-	umulh	x3, x5, x5
-	adcs	x14, x14, x3
-	umulh	x3, x6, x5
-	umulh	x5, x17, x5
-	adcs	x4, x4, x5
-	adcs	x3, x21, x3
-	mul	 x21, x16, x17
-	adds	 x8, x21, x8
-	ldp	x21, x1, [x1, #16]
-	str	x8, [x0, #40]
-	mul	 x8, x22, x17
-	adcs	x8, x8, x18
-	mul	 x18, x7, x17
-	adcs	x9, x18, x9
-	mul	 x18, x19, x17
-	adcs	x10, x18, x10
-	mul	 x18, x6, x17
-	adcs	x14, x15, x14
-	mul	 x15, x17, x17
-	umulh	x6, x6, x17
-	umulh	x19, x19, x17
-	umulh	x7, x7, x17
-	umulh	x22, x22, x17
-	umulh	x16, x16, x17
-	umulh	x17, x17, x17
-	adcs	x15, x15, x4
-	mul	 x4, x12, x20
-	adcs	x18, x18, x3
-	adcs	x3, xzr, xzr
-	adds	 x8, x8, x16
-	mul	 x16, x11, x20
-	adcs	x9, x9, x22
-	mul	 x22, x2, x20
-	adcs	x10, x10, x7
-	mul	 x7, x1, x20
-	adcs	x14, x14, x19
-	mul	 x19, x21, x20
-	adcs	x15, x15, x5
-	mul	 x5, x13, x20
-	adcs	x17, x18, x17
-	mul	 x18, x20, x20
-	umulh	x12, x12, x20
-	umulh	x13, x13, x20
-	umulh	x21, x21, x20
-	umulh	x1, x1, x20
-	umulh	x2, x2, x20
-	umulh	x11, x11, x20
-	umulh	x20, x20, x20
-	adcs	x3, x3, x6
-	adds	 x8, x4, x8
-	str	x8, [x0, #48]
-	adcs	x8, x5, x9
-	adcs	x9, x19, x10
-	adcs	x10, x7, x14
-	adcs	x14, x22, x15
-	adcs	x15, x16, x17
-	adcs	x16, x18, x3
-	adcs	x17, xzr, xzr
-	adds	 x8, x8, x12
-	str	x8, [x0, #56]
-	adcs	x8, x9, x13
-	str	x8, [x0, #64]
-	adcs	x8, x10, x21
-	str	x8, [x0, #72]
-	adcs	x8, x14, x1
-	str	x8, [x0, #80]
-	adcs	x8, x15, x2
-	str	x8, [x0, #88]
-	adcs	x8, x16, x11
-	str	x8, [x0, #96]
-	adcs	x8, x17, x20
-	str	x8, [x0, #104]
-	ldp	x20, x19, [sp, #32]
-	ldp	x22, x21, [sp, #16]
-	ldp	x24, x23, [sp], #48
-	ret
-.Lfunc_end97:
-	.size	mcl_fpDbl_sqrPre7L, .Lfunc_end97-mcl_fpDbl_sqrPre7L
-
-	.globl	mcl_fp_mont7L
-	.align	2
-	.type	mcl_fp_mont7L,@function
-mcl_fp_mont7L:                          // @mcl_fp_mont7L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #144            // =144
-	str	x2, [sp, #112]          // 8-byte Folded Spill
-	str	x0, [sp, #64]           // 8-byte Folded Spill
-	ldr	 x6, [x2]
-	ldr	x15, [x1, #48]
-	str	x15, [sp, #96]          // 8-byte Folded Spill
-	ldr	x0, [x1, #32]
-	str	x0, [sp, #56]           // 8-byte Folded Spill
-	ldr	x18, [x1, #40]
-	ldp	x11, x13, [x1, #16]
-	ldp	 x17, x5, [x1]
-	str	x5, [sp, #88]           // 8-byte Folded Spill
-	ldur	x12, [x3, #-8]
-	str	x12, [sp, #128]         // 8-byte Folded Spill
-	ldr	x1, [x3, #32]
-	str	x1, [sp, #104]          // 8-byte Folded Spill
-	ldr	x9, [x3, #40]
-	str	x9, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [x3, #16]
-	str	x8, [sp, #136]          // 8-byte Folded Spill
-	ldr	x10, [x3, #24]
-	str	x10, [sp, #120]         // 8-byte Folded Spill
-	ldr	 x14, [x3]
-	str	x14, [sp, #24]          // 8-byte Folded Spill
-	ldr	x4, [x3, #8]
-	str	x4, [sp, #72]           // 8-byte Folded Spill
-	ldr	x7, [x2, #8]
-	umulh	x19, x15, x6
-	mul	 x20, x15, x6
-	umulh	x21, x18, x6
-	mul	 x22, x18, x6
-	mov	 x15, x0
-	umulh	x23, x15, x6
-	mul	 x24, x15, x6
-	mov	 x16, x13
-	umulh	x25, x16, x6
-	mul	 x26, x16, x6
-	mov	 x13, x11
-	umulh	x27, x13, x6
-	mul	 x28, x13, x6
-	mul	 x29, x5, x6
-	mov	 x11, x17
-	umulh	x30, x11, x6
-	adds	 x29, x30, x29
-	umulh	x30, x5, x6
-	mul	 x6, x11, x6
-	adcs	x28, x30, x28
-	mul	 x30, x6, x12
-	adcs	x26, x27, x26
-	mul	 x27, x30, x10
-	adcs	x24, x25, x24
-	mul	 x25, x30, x8
-	adcs	x22, x23, x22
-	mul	 x23, x30, x4
-	adcs	x20, x21, x20
-	umulh	x21, x30, x14
-	adcs	x19, x19, xzr
-	adds	 x21, x21, x23
-	umulh	x23, x30, x4
-	adcs	x23, x23, x25
-	umulh	x25, x30, x8
-	adcs	x25, x25, x27
-	mul	 x27, x30, x1
-	umulh	x17, x30, x10
-	adcs	x17, x17, x27
-	ldr	x3, [x3, #48]
-	str	x3, [sp, #48]           // 8-byte Folded Spill
-	mul	 x27, x30, x9
-	umulh	x0, x30, x1
-	adcs	x0, x0, x27
-	mul	 x27, x30, x3
-	umulh	x2, x30, x9
-	adcs	x2, x2, x27
-	umulh	x27, x30, x3
-	mul	 x30, x30, x14
-	adcs	x27, x27, xzr
-	cmn	 x30, x6
-	adcs	x6, x21, x29
-	adcs	x21, x23, x28
-	mul	 x23, x7, x15
-	adcs	x25, x25, x26
-	mul	 x26, x7, x16
-	adcs	x17, x17, x24
-	mul	 x24, x7, x13
-	adcs	x0, x0, x22
-	mul	 x22, x7, x5
-	adcs	x2, x2, x20
-	umulh	x20, x7, x11
-	adcs	x19, x27, x19
-	adcs	x27, xzr, xzr
-	adds	 x20, x20, x22
-	umulh	x22, x7, x5
-	adcs	x22, x22, x24
-	umulh	x24, x7, x13
-	mov	 x5, x13
-	adcs	x24, x24, x26
-	umulh	x26, x7, x16
-	adcs	x23, x26, x23
-	mul	 x26, x7, x18
-	umulh	x28, x7, x15
-	adcs	x26, x28, x26
-	ldr	x15, [sp, #96]          // 8-byte Folded Reload
-	mul	 x28, x7, x15
-	umulh	x29, x7, x18
-	adcs	x28, x29, x28
-	umulh	x29, x7, x15
-	mul	 x7, x7, x11
-	adcs	x29, x29, xzr
-	adds	 x30, x6, x7
-	adcs	x6, x21, x20
-	adcs	x25, x25, x22
-	mul	 x22, x30, x12
-	adcs	x24, x17, x24
-	mul	 x17, x22, x10
-	adcs	x0, x0, x23
-	mul	 x23, x22, x8
-	adcs	x7, x2, x26
-	mul	 x2, x22, x4
-	adcs	x20, x19, x28
-	umulh	x26, x22, x14
-	adcs	x21, x27, x29
-	adcs	x19, xzr, xzr
-	adds	 x2, x26, x2
-	umulh	x26, x22, x4
-	adcs	x23, x26, x23
-	umulh	x26, x22, x8
-	adcs	x17, x26, x17
-	mul	 x26, x22, x1
-	umulh	x27, x22, x10
-	adcs	x26, x27, x26
-	mul	 x27, x22, x9
-	umulh	x28, x22, x1
-	adcs	x27, x28, x27
-	mul	 x28, x22, x3
-	umulh	x29, x22, x9
-	adcs	x28, x29, x28
-	umulh	x29, x22, x3
-	mul	 x22, x22, x14
-	mov	 x10, x14
-	adcs	x29, x29, xzr
-	cmn	 x22, x30
-	adcs	x22, x2, x6
-	adcs	x23, x23, x25
-	ldr	x8, [sp, #112]          // 8-byte Folded Reload
-	adcs	x24, x17, x24
-	ldp	x25, x17, [x8, #16]
-	adcs	x0, x26, x0
-	mul	 x2, x25, x16
-	adcs	x6, x27, x7
-	mul	 x7, x25, x5
-	adcs	x20, x28, x20
-	ldp	x15, x8, [sp, #88]
-	mul	 x26, x25, x15
-	adcs	x21, x29, x21
-	mov	 x12, x11
-	umulh	x27, x25, x12
-	adcs	x19, x19, xzr
-	adds	 x26, x27, x26
-	umulh	x27, x25, x15
-	adcs	x7, x27, x7
-	umulh	x27, x25, x5
-	mov	 x9, x5
-	adcs	x2, x27, x2
-	ldr	x11, [sp, #56]          // 8-byte Folded Reload
-	mul	 x27, x25, x11
-	umulh	x28, x25, x16
-	mov	 x13, x16
-	adcs	x27, x28, x27
-	mul	 x28, x25, x18
-	umulh	x29, x25, x11
-	adcs	x28, x29, x28
-	mul	 x29, x25, x8
-	umulh	x30, x25, x18
-	adcs	x29, x30, x29
-	umulh	x30, x25, x8
-	mov	 x14, x8
-	mul	 x25, x25, x12
-	mov	 x5, x12
-	adcs	x30, x30, xzr
-	adds	 x22, x22, x25
-	adcs	x23, x23, x26
-	adcs	x7, x24, x7
-	adcs	x0, x0, x2
-	ldp	x8, x12, [sp, #128]
-	mul	 x2, x22, x8
-	adcs	x6, x6, x27
-	mul	 x24, x2, x12
-	adcs	x20, x20, x28
-	mul	 x25, x2, x4
-	adcs	x21, x21, x29
-	mov	 x1, x10
-	umulh	x26, x2, x1
-	adcs	x19, x19, x30
-	adcs	x27, xzr, xzr
-	adds	 x25, x26, x25
-	umulh	x26, x2, x4
-	adcs	x24, x26, x24
-	ldr	x10, [sp, #120]         // 8-byte Folded Reload
-	mul	 x26, x2, x10
-	umulh	x28, x2, x12
-	adcs	x26, x28, x26
-	ldr	x12, [sp, #104]         // 8-byte Folded Reload
-	mul	 x28, x2, x12
-	umulh	x29, x2, x10
-	adcs	x28, x29, x28
-	ldr	x10, [sp, #80]          // 8-byte Folded Reload
-	mul	 x29, x2, x10
-	umulh	x30, x2, x12
-	adcs	x29, x30, x29
-	mul	 x30, x2, x3
-	umulh	x12, x2, x10
-	adcs	x12, x12, x30
-	umulh	x30, x2, x3
-	mul	 x2, x2, x1
-	adcs	x30, x30, xzr
-	cmn	 x2, x22
-	adcs	x2, x25, x23
-	adcs	x7, x24, x7
-	adcs	x0, x26, x0
-	mul	 x22, x17, x11
-	adcs	x6, x28, x6
-	mul	 x23, x17, x13
-	adcs	x20, x29, x20
-	mul	 x24, x17, x9
-	adcs	x12, x12, x21
-	mul	 x21, x17, x15
-	adcs	x19, x30, x19
-	umulh	x25, x17, x5
-	adcs	x26, x27, xzr
-	adds	 x21, x25, x21
-	umulh	x25, x17, x15
-	adcs	x24, x25, x24
-	umulh	x25, x17, x9
-	mov	 x16, x9
-	adcs	x23, x25, x23
-	umulh	x25, x17, x13
-	adcs	x22, x25, x22
-	mul	 x25, x17, x18
-	umulh	x27, x17, x11
-	adcs	x25, x27, x25
-	mov	 x9, x14
-	mul	 x27, x17, x9
-	umulh	x28, x17, x18
-	adcs	x27, x28, x27
-	umulh	x28, x17, x9
-	mul	 x17, x17, x5
-	mov	 x15, x5
-	adcs	x28, x28, xzr
-	adds	 x17, x2, x17
-	adcs	x2, x7, x21
-	adcs	x0, x0, x24
-	mul	 x24, x17, x8
-	adcs	x29, x6, x23
-	ldr	x9, [sp, #120]          // 8-byte Folded Reload
-	mul	 x23, x24, x9
-	adcs	x6, x20, x22
-	ldr	x8, [sp, #136]          // 8-byte Folded Reload
-	mul	 x22, x24, x8
-	adcs	x7, x12, x25
-	mul	 x12, x24, x4
-	adcs	x20, x19, x27
-	umulh	x25, x24, x1
-	adcs	x21, x26, x28
-	adcs	x19, xzr, xzr
-	adds	 x12, x25, x12
-	umulh	x25, x24, x4
-	adcs	x25, x25, x22
-	umulh	x22, x24, x8
-	adcs	x26, x22, x23
-	ldr	x5, [sp, #104]          // 8-byte Folded Reload
-	mul	 x22, x24, x5
-	umulh	x23, x24, x9
-	adcs	x27, x23, x22
-	mov	 x9, x10
-	mul	 x22, x24, x9
-	umulh	x23, x24, x5
-	adcs	x28, x23, x22
-	mul	 x22, x24, x3
-	umulh	x23, x24, x9
-	adcs	x30, x23, x22
-	umulh	x22, x24, x3
-	mul	 x23, x24, x1
-	mov	 x3, x1
-	adcs	x24, x22, xzr
-	cmn	 x23, x17
-	adcs	x22, x12, x2
-	adcs	x23, x25, x0
-	ldr	x10, [sp, #112]         // 8-byte Folded Reload
-	ldp	x12, x0, [x10, #32]
-	adcs	x17, x26, x29
-	adcs	x2, x27, x6
-	mul	 x6, x12, x13
-	adcs	x7, x28, x7
-	mov	 x10, x16
-	mul	 x25, x12, x10
-	adcs	x20, x30, x20
-	ldr	x16, [sp, #88]          // 8-byte Folded Reload
-	mul	 x26, x12, x16
-	adcs	x21, x24, x21
-	umulh	x24, x12, x15
-	adcs	x1, x19, xzr
-	adds	 x24, x24, x26
-	umulh	x26, x12, x16
-	adcs	x25, x26, x25
-	umulh	x26, x12, x10
-	adcs	x6, x26, x6
-	mul	 x26, x12, x11
-	umulh	x27, x12, x13
-	adcs	x26, x27, x26
-	mul	 x27, x12, x18
-	umulh	x28, x12, x11
-	adcs	x27, x28, x27
-	mul	 x28, x12, x14
-	umulh	x29, x12, x18
-	adcs	x28, x29, x28
-	umulh	x29, x12, x14
-	mul	 x12, x12, x15
-	adcs	x29, x29, xzr
-	adds	 x12, x22, x12
-	adcs	x22, x23, x24
-	adcs	x17, x17, x25
-	adcs	x2, x2, x6
-	ldr	x19, [sp, #128]         // 8-byte Folded Reload
-	mul	 x6, x12, x19
-	adcs	x7, x7, x26
-	mov	 x30, x8
-	mul	 x23, x6, x30
-	adcs	x20, x20, x27
-	mul	 x24, x6, x4
-	adcs	x21, x21, x28
-	mov	 x8, x3
-	umulh	x25, x6, x8
-	adcs	x1, x1, x29
-	adcs	x26, xzr, xzr
-	adds	 x24, x25, x24
-	umulh	x25, x6, x4
-	adcs	x23, x25, x23
-	ldr	x4, [sp, #120]          // 8-byte Folded Reload
-	mul	 x25, x6, x4
-	umulh	x27, x6, x30
-	adcs	x25, x27, x25
-	mul	 x27, x6, x5
-	umulh	x28, x6, x4
-	adcs	x27, x28, x27
-	mov	 x3, x9
-	mul	 x28, x6, x3
-	umulh	x29, x6, x5
-	adcs	x28, x29, x28
-	ldr	x9, [sp, #48]           // 8-byte Folded Reload
-	mul	 x29, x6, x9
-	umulh	x30, x6, x3
-	adcs	x29, x30, x29
-	umulh	x30, x6, x9
-	mov	 x3, x9
-	mul	 x6, x6, x8
-	mov	 x5, x8
-	adcs	x30, x30, xzr
-	cmn	 x6, x12
-	adcs	x12, x24, x22
-	adcs	x17, x23, x17
-	adcs	x2, x25, x2
-	mul	 x6, x0, x11
-	adcs	x7, x27, x7
-	mul	 x22, x0, x13
-	adcs	x20, x28, x20
-	mul	 x23, x0, x10
-	adcs	x21, x29, x21
-	mul	 x24, x0, x16
-	adcs	x29, x30, x1
-	mov	 x1, x15
-	umulh	x25, x0, x1
-	adcs	x26, x26, xzr
-	adds	 x24, x25, x24
-	umulh	x25, x0, x16
-	adcs	x23, x25, x23
-	umulh	x25, x0, x10
-	adcs	x22, x25, x22
-	umulh	x25, x0, x13
-	adcs	x6, x25, x6
-	mul	 x25, x0, x18
-	umulh	x27, x0, x11
-	adcs	x25, x27, x25
-	mov	 x9, x14
-	mul	 x27, x0, x9
-	umulh	x28, x0, x18
-	adcs	x27, x28, x27
-	umulh	x28, x0, x9
-	mul	 x0, x0, x1
-	adcs	x28, x28, xzr
-	adds	 x12, x12, x0
-	adcs	x8, x17, x24
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	adcs	x8, x2, x23
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	mul	 x2, x12, x19
-	adcs	x7, x7, x22
-	mul	 x22, x2, x4
-	adcs	x8, x20, x6
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x8, [sp, #136]          // 8-byte Folded Reload
-	mul	 x20, x2, x8
-	adcs	x21, x21, x25
-	ldr	x9, [sp, #72]           // 8-byte Folded Reload
-	mul	 x23, x2, x9
-	adcs	x19, x29, x27
-	mov	 x15, x5
-	umulh	x24, x2, x15
-	adcs	x17, x26, x28
-	str	x17, [sp, #8]           // 8-byte Folded Spill
-	adcs	x26, xzr, xzr
-	adds	 x23, x24, x23
-	umulh	x24, x2, x9
-	adcs	x20, x24, x20
-	umulh	x24, x2, x8
-	adcs	x22, x24, x22
-	ldp	x25, x8, [sp, #104]
-	mul	 x24, x2, x25
-	umulh	x27, x2, x4
-	adcs	x6, x27, x24
-	ldr	x5, [sp, #80]           // 8-byte Folded Reload
-	mul	 x27, x2, x5
-	umulh	x28, x2, x25
-	adcs	x27, x28, x27
-	mul	 x28, x2, x3
-	umulh	x29, x2, x5
-	adcs	x28, x29, x28
-	ldr	x29, [x8, #48]
-	mul	 x30, x2, x15
-	umulh	x2, x2, x3
-	adcs	x2, x2, xzr
-	cmn	 x30, x12
-	umulh	x24, x29, x14
-	mul	 x30, x29, x14
-	umulh	x0, x29, x18
-	mul	 x18, x29, x18
-	umulh	x17, x29, x11
-	mul	 x15, x29, x11
-	umulh	x14, x29, x13
-	mul	 x13, x29, x13
-	umulh	x12, x29, x10
-	mul	 x11, x29, x10
-	mul	 x10, x29, x16
-	umulh	x9, x29, x16
-	umulh	x8, x29, x1
-	mul	 x29, x29, x1
-	ldr	x16, [sp, #40]          // 8-byte Folded Reload
-	adcs	x23, x23, x16
-	ldr	x16, [sp, #32]          // 8-byte Folded Reload
-	adcs	x20, x20, x16
-	adcs	x7, x22, x7
-	ldr	x16, [sp, #16]          // 8-byte Folded Reload
-	adcs	x6, x6, x16
-	adcs	x21, x27, x21
-	adcs	x19, x28, x19
-	ldr	x16, [sp, #8]           // 8-byte Folded Reload
-	adcs	x2, x2, x16
-	adcs	x22, x26, xzr
-	adds	 x8, x8, x10
-	adcs	x9, x9, x11
-	adcs	x10, x12, x13
-	adcs	x11, x14, x15
-	adcs	x12, x17, x18
-	adcs	x13, x0, x30
-	adcs	x14, x24, xzr
-	adds	 x15, x23, x29
-	adcs	x8, x20, x8
-	ldr	x16, [sp, #128]         // 8-byte Folded Reload
-	mul	 x16, x15, x16
-	adcs	x9, x7, x9
-	mul	 x17, x16, x3
-	mul	 x18, x16, x5
-	mul	 x0, x16, x25
-	adcs	x10, x6, x10
-	mul	 x6, x16, x4
-	adcs	x11, x21, x11
-	ldr	x21, [sp, #136]         // 8-byte Folded Reload
-	mul	 x7, x16, x21
-	adcs	x12, x19, x12
-	ldr	x23, [sp, #72]          // 8-byte Folded Reload
-	mul	 x19, x16, x23
-	adcs	x13, x2, x13
-	ldr	x24, [sp, #24]          // 8-byte Folded Reload
-	umulh	x2, x16, x24
-	adcs	x14, x22, x14
-	adcs	x20, xzr, xzr
-	adds	 x2, x2, x19
-	umulh	x19, x16, x23
-	adcs	x7, x19, x7
-	umulh	x19, x16, x21
-	adcs	x6, x19, x6
-	umulh	x19, x16, x4
-	adcs	x0, x19, x0
-	umulh	x19, x16, x25
-	adcs	x18, x19, x18
-	umulh	x19, x16, x5
-	adcs	x17, x19, x17
-	umulh	x19, x16, x3
-	mul	 x16, x16, x24
-	adcs	x19, x19, xzr
-	cmn	 x16, x15
-	adcs	x8, x2, x8
-	adcs	x9, x7, x9
-	adcs	x10, x6, x10
-	adcs	x11, x0, x11
-	adcs	x12, x18, x12
-	adcs	x13, x17, x13
-	adcs	x14, x19, x14
-	adcs	x15, x20, xzr
-	subs	 x16, x8, x24
-	sbcs	x17, x9, x23
-	sbcs	x18, x10, x21
-	sbcs	x0, x11, x4
-	sbcs	x1, x12, x25
-	sbcs	x2, x13, x5
-	sbcs	x3, x14, x3
-	sbcs	x15, x15, xzr
-	tst	 x15, #0x1
-	csel	x8, x8, x16, ne
-	csel	x9, x9, x17, ne
-	csel	x10, x10, x18, ne
-	csel	x11, x11, x0, ne
-	csel	x12, x12, x1, ne
-	csel	x13, x13, x2, ne
-	csel	x14, x14, x3, ne
-	ldr	x15, [sp, #64]          // 8-byte Folded Reload
-	stp	 x8, x9, [x15]
-	stp	x10, x11, [x15, #16]
-	stp	x12, x13, [x15, #32]
-	str	x14, [x15, #48]
-	add	sp, sp, #144            // =144
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end98:
-	.size	mcl_fp_mont7L, .Lfunc_end98-mcl_fp_mont7L
-
-	.globl	mcl_fp_montNF7L
-	.align	2
-	.type	mcl_fp_montNF7L,@function
-mcl_fp_montNF7L:                        // @mcl_fp_montNF7L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	sub	sp, sp, #32             // =32
-	stp	x0, x2, [sp, #8]
-	ldr	 x7, [x2]
-	ldp	x5, x16, [x1, #40]
-	ldp	x6, x17, [x1, #24]
-	ldr	 x4, [x1]
-	ldp	x1, x18, [x1, #8]
-	ldur	x8, [x3, #-8]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldp	x15, x0, [x3, #40]
-	ldp	x11, x10, [x3, #24]
-	ldp	x13, x12, [x3, #8]
-	ldr	 x14, [x3]
-	ldr	x25, [x2, #8]
-	umulh	x3, x16, x7
-	mul	 x19, x16, x7
-	umulh	x20, x5, x7
-	mul	 x21, x5, x7
-	umulh	x22, x17, x7
-	mul	 x23, x17, x7
-	umulh	x24, x6, x7
-	mul	 x26, x6, x7
-	umulh	x27, x18, x7
-	mul	 x28, x18, x7
-	mul	 x29, x1, x7
-	umulh	x30, x4, x7
-	adds	 x29, x30, x29
-	umulh	x30, x1, x7
-	mul	 x7, x4, x7
-	adcs	x28, x30, x28
-	mul	 x30, x25, x5
-	adcs	x26, x27, x26
-	mul	 x27, x25, x17
-	adcs	x23, x24, x23
-	mul	 x24, x25, x6
-	adcs	x21, x22, x21
-	mul	 x22, x7, x8
-	adcs	x19, x20, x19
-	mul	 x20, x22, x14
-	adcs	x3, x3, xzr
-	cmn	 x20, x7
-	mul	 x9, x25, x18
-	mul	 x7, x22, x13
-	adcs	x7, x7, x29
-	mul	 x20, x22, x12
-	adcs	x20, x20, x28
-	mul	 x28, x22, x11
-	adcs	x26, x28, x26
-	mul	 x28, x22, x10
-	adcs	x23, x28, x23
-	mul	 x28, x22, x15
-	adcs	x21, x28, x21
-	mul	 x28, x22, x0
-	adcs	x19, x28, x19
-	umulh	x28, x22, x14
-	adcs	x29, x3, xzr
-	adds	 x28, x7, x28
-	umulh	x3, x22, x13
-	adcs	x8, x20, x3
-	umulh	x3, x22, x12
-	adcs	x26, x26, x3
-	umulh	x3, x22, x11
-	adcs	x3, x23, x3
-	umulh	x7, x22, x10
-	adcs	x7, x21, x7
-	umulh	x20, x22, x15
-	adcs	x19, x19, x20
-	mul	 x21, x25, x1
-	umulh	x20, x22, x0
-	adcs	x20, x29, x20
-	umulh	x22, x25, x4
-	adds	 x29, x22, x21
-	umulh	x21, x25, x1
-	adcs	x23, x21, x9
-	umulh	x9, x25, x18
-	adcs	x21, x9, x24
-	umulh	x9, x25, x6
-	adcs	x22, x9, x27
-	umulh	x9, x25, x17
-	adcs	x30, x9, x30
-	mul	 x9, x25, x16
-	umulh	x24, x25, x5
-	adcs	x24, x24, x9
-	umulh	x9, x25, x16
-	mul	 x25, x25, x4
-	adcs	x9, x9, xzr
-	adds	 x27, x25, x28
-	adcs	x25, x29, x8
-	ldp	x28, x8, [x2, #16]
-	adcs	x29, x23, x26
-	adcs	x3, x21, x3
-	mul	 x21, x28, x17
-	adcs	x7, x22, x7
-	mul	 x22, x28, x6
-	adcs	x19, x30, x19
-	ldr	x2, [sp, #24]           // 8-byte Folded Reload
-	mul	 x23, x27, x2
-	adcs	x20, x24, x20
-	mul	 x24, x23, x14
-	adcs	x9, x9, xzr
-	cmn	 x24, x27
-	mul	 x24, x28, x18
-	mul	 x26, x23, x13
-	adcs	x25, x26, x25
-	mul	 x26, x23, x12
-	adcs	x26, x26, x29
-	mul	 x27, x23, x11
-	adcs	x3, x27, x3
-	mul	 x27, x23, x10
-	adcs	x7, x27, x7
-	mul	 x27, x23, x15
-	adcs	x19, x27, x19
-	mul	 x27, x23, x0
-	adcs	x20, x27, x20
-	umulh	x27, x23, x14
-	adcs	x9, x9, xzr
-	adds	 x25, x25, x27
-	umulh	x27, x23, x13
-	adcs	x26, x26, x27
-	umulh	x27, x23, x12
-	adcs	x3, x3, x27
-	umulh	x27, x23, x11
-	adcs	x7, x7, x27
-	umulh	x27, x23, x10
-	adcs	x19, x19, x27
-	umulh	x27, x23, x15
-	adcs	x20, x20, x27
-	mul	 x27, x28, x1
-	umulh	x23, x23, x0
-	adcs	x9, x9, x23
-	umulh	x23, x28, x4
-	adds	 x23, x23, x27
-	umulh	x27, x28, x1
-	adcs	x24, x27, x24
-	umulh	x27, x28, x18
-	adcs	x22, x27, x22
-	umulh	x27, x28, x6
-	adcs	x21, x27, x21
-	mul	 x27, x28, x5
-	umulh	x29, x28, x17
-	adcs	x27, x29, x27
-	mul	 x29, x28, x16
-	umulh	x30, x28, x5
-	adcs	x29, x30, x29
-	umulh	x30, x28, x16
-	mul	 x28, x28, x4
-	adcs	x30, x30, xzr
-	adds	 x25, x28, x25
-	adcs	x23, x23, x26
-	adcs	x3, x24, x3
-	mul	 x26, x8, x5
-	adcs	x7, x22, x7
-	mul	 x22, x8, x17
-	adcs	x19, x21, x19
-	mul	 x24, x8, x6
-	adcs	x20, x27, x20
-	mul	 x21, x25, x2
-	adcs	x9, x29, x9
-	mul	 x27, x21, x14
-	adcs	x28, x30, xzr
-	cmn	 x27, x25
-	mul	 x25, x8, x18
-	mul	 x27, x21, x13
-	adcs	x23, x27, x23
-	mul	 x27, x21, x12
-	adcs	x3, x27, x3
-	mul	 x27, x21, x11
-	adcs	x7, x27, x7
-	mul	 x27, x21, x10
-	adcs	x19, x27, x19
-	mul	 x27, x21, x15
-	adcs	x20, x27, x20
-	mul	 x27, x21, x0
-	adcs	x9, x27, x9
-	umulh	x27, x21, x14
-	adcs	x28, x28, xzr
-	adds	 x27, x23, x27
-	umulh	x23, x21, x13
-	adcs	x3, x3, x23
-	umulh	x23, x21, x12
-	adcs	x30, x7, x23
-	umulh	x7, x21, x11
-	adcs	x7, x19, x7
-	umulh	x19, x21, x10
-	adcs	x19, x20, x19
-	umulh	x20, x21, x15
-	adcs	x20, x9, x20
-	mul	 x9, x8, x1
-	umulh	x21, x21, x0
-	adcs	x21, x28, x21
-	umulh	x23, x8, x4
-	adds	 x9, x23, x9
-	umulh	x23, x8, x1
-	adcs	x28, x23, x25
-	umulh	x23, x8, x18
-	adcs	x23, x23, x24
-	umulh	x24, x8, x6
-	adcs	x24, x24, x22
-	umulh	x22, x8, x17
-	adcs	x25, x22, x26
-	mul	 x22, x8, x16
-	umulh	x26, x8, x5
-	adcs	x26, x26, x22
-	umulh	x22, x8, x16
-	mul	 x29, x8, x4
-	adcs	x2, x22, xzr
-	adds	 x29, x29, x27
-	adcs	x27, x9, x3
-	ldr	x8, [sp, #16]           // 8-byte Folded Reload
-	ldp	x22, x3, [x8, #32]
-	adcs	x9, x28, x30
-	adcs	x7, x23, x7
-	mul	 x23, x22, x17
-	adcs	x19, x24, x19
-	mul	 x24, x22, x6
-	adcs	x20, x25, x20
-	ldr	x8, [sp, #24]           // 8-byte Folded Reload
-	mul	 x25, x29, x8
-	adcs	x21, x26, x21
-	mul	 x26, x25, x14
-	adcs	x2, x2, xzr
-	cmn	 x26, x29
-	mul	 x26, x22, x18
-	mul	 x28, x25, x13
-	adcs	x27, x28, x27
-	mul	 x28, x25, x12
-	adcs	x9, x28, x9
-	mul	 x28, x25, x11
-	adcs	x7, x28, x7
-	mul	 x28, x25, x10
-	adcs	x19, x28, x19
-	mul	 x28, x25, x15
-	adcs	x20, x28, x20
-	mul	 x28, x25, x0
-	adcs	x21, x28, x21
-	umulh	x28, x25, x14
-	adcs	x2, x2, xzr
-	adds	 x27, x27, x28
-	umulh	x28, x25, x13
-	adcs	x9, x9, x28
-	umulh	x28, x25, x12
-	adcs	x7, x7, x28
-	umulh	x28, x25, x11
-	adcs	x19, x19, x28
-	umulh	x28, x25, x10
-	adcs	x20, x20, x28
-	umulh	x28, x25, x15
-	adcs	x21, x21, x28
-	mul	 x28, x22, x1
-	umulh	x25, x25, x0
-	adcs	x2, x2, x25
-	umulh	x25, x22, x4
-	adds	 x25, x25, x28
-	umulh	x28, x22, x1
-	adcs	x26, x28, x26
-	umulh	x28, x22, x18
-	adcs	x24, x28, x24
-	umulh	x28, x22, x6
-	adcs	x23, x28, x23
-	mul	 x28, x22, x5
-	umulh	x29, x22, x17
-	adcs	x28, x29, x28
-	mul	 x29, x22, x16
-	umulh	x30, x22, x5
-	adcs	x29, x30, x29
-	umulh	x30, x22, x16
-	mul	 x22, x22, x4
-	adcs	x30, x30, xzr
-	adds	 x22, x22, x27
-	adcs	x9, x25, x9
-	adcs	x7, x26, x7
-	mul	 x25, x3, x5
-	adcs	x19, x24, x19
-	mul	 x24, x3, x17
-	adcs	x20, x23, x20
-	mul	 x23, x3, x6
-	adcs	x21, x28, x21
-	mul	 x26, x22, x8
-	adcs	x8, x29, x2
-	mul	 x27, x26, x14
-	adcs	x28, x30, xzr
-	cmn	 x27, x22
-	mul	 x22, x3, x18
-	mul	 x27, x26, x13
-	adcs	x9, x27, x9
-	mul	 x27, x26, x12
-	adcs	x7, x27, x7
-	mul	 x27, x26, x11
-	adcs	x19, x27, x19
-	mul	 x27, x26, x10
-	adcs	x20, x27, x20
-	mul	 x27, x26, x15
-	adcs	x21, x27, x21
-	mul	 x27, x26, x0
-	adcs	x8, x27, x8
-	umulh	x27, x26, x14
-	adcs	x28, x28, xzr
-	adds	 x9, x9, x27
-	umulh	x27, x26, x13
-	adcs	x7, x7, x27
-	umulh	x27, x26, x12
-	adcs	x19, x19, x27
-	umulh	x27, x26, x11
-	adcs	x20, x20, x27
-	umulh	x27, x26, x10
-	adcs	x21, x21, x27
-	umulh	x27, x26, x15
-	adcs	x8, x8, x27
-	mul	 x27, x3, x1
-	umulh	x26, x26, x0
-	adcs	x26, x28, x26
-	umulh	x28, x3, x4
-	adds	 x27, x28, x27
-	umulh	x28, x3, x1
-	adcs	x22, x28, x22
-	umulh	x28, x3, x18
-	adcs	x23, x28, x23
-	umulh	x28, x3, x6
-	adcs	x24, x28, x24
-	umulh	x28, x3, x17
-	adcs	x25, x28, x25
-	mul	 x28, x3, x16
-	umulh	x29, x3, x5
-	adcs	x28, x29, x28
-	ldp	x2, x30, [sp, #16]
-	ldr	x2, [x2, #48]
-	umulh	x29, x3, x16
-	mul	 x3, x3, x4
-	adcs	x29, x29, xzr
-	adds	 x9, x3, x9
-	adcs	x3, x27, x7
-	umulh	x7, x2, x16
-	mul	 x16, x2, x16
-	adcs	x19, x22, x19
-	umulh	x22, x2, x5
-	mul	 x5, x2, x5
-	adcs	x20, x23, x20
-	umulh	x23, x2, x17
-	mul	 x17, x2, x17
-	adcs	x21, x24, x21
-	umulh	x24, x2, x6
-	mul	 x6, x2, x6
-	adcs	x8, x25, x8
-	mul	 x25, x9, x30
-	adcs	x26, x28, x26
-	mul	 x27, x25, x14
-	adcs	x28, x29, xzr
-	cmn	 x27, x9
-	umulh	x9, x2, x18
-	mul	 x18, x2, x18
-	umulh	x27, x2, x1
-	mul	 x1, x2, x1
-	umulh	x29, x2, x4
-	mul	 x2, x2, x4
-	mul	 x4, x25, x13
-	adcs	x3, x4, x3
-	mul	 x4, x25, x12
-	adcs	x4, x4, x19
-	mul	 x19, x25, x11
-	adcs	x19, x19, x20
-	mul	 x20, x25, x10
-	adcs	x20, x20, x21
-	mul	 x21, x25, x15
-	adcs	x8, x21, x8
-	mul	 x21, x25, x0
-	adcs	x21, x21, x26
-	adcs	x26, x28, xzr
-	umulh	x28, x25, x14
-	adds	 x3, x3, x28
-	umulh	x28, x25, x13
-	adcs	x4, x4, x28
-	umulh	x28, x25, x12
-	adcs	x19, x19, x28
-	umulh	x28, x25, x11
-	adcs	x20, x20, x28
-	umulh	x28, x25, x10
-	adcs	x8, x8, x28
-	umulh	x28, x25, x15
-	adcs	x21, x21, x28
-	umulh	x25, x25, x0
-	adcs	x25, x26, x25
-	adds	 x1, x29, x1
-	adcs	x18, x27, x18
-	adcs	x9, x9, x6
-	adcs	x17, x24, x17
-	adcs	x5, x23, x5
-	adcs	x16, x22, x16
-	adcs	x6, x7, xzr
-	adds	 x2, x2, x3
-	adcs	x1, x1, x4
-	adcs	x18, x18, x19
-	adcs	x9, x9, x20
-	adcs	x8, x17, x8
-	adcs	x17, x5, x21
-	mul	 x3, x2, x30
-	adcs	x16, x16, x25
-	mul	 x4, x3, x14
-	adcs	x5, x6, xzr
-	cmn	 x4, x2
-	mul	 x2, x3, x13
-	adcs	x1, x2, x1
-	mul	 x2, x3, x12
-	adcs	x18, x2, x18
-	mul	 x2, x3, x11
-	adcs	x9, x2, x9
-	mul	 x2, x3, x10
-	adcs	x8, x2, x8
-	mul	 x2, x3, x15
-	adcs	x17, x2, x17
-	mul	 x2, x3, x0
-	adcs	x16, x2, x16
-	umulh	x2, x3, x14
-	adcs	x4, x5, xzr
-	adds	 x1, x1, x2
-	umulh	x2, x3, x13
-	adcs	x18, x18, x2
-	umulh	x2, x3, x12
-	adcs	x9, x9, x2
-	umulh	x2, x3, x11
-	adcs	x8, x8, x2
-	umulh	x2, x3, x10
-	adcs	x17, x17, x2
-	umulh	x2, x3, x15
-	adcs	x16, x16, x2
-	umulh	x2, x3, x0
-	adcs	x2, x4, x2
-	subs	 x14, x1, x14
-	sbcs	x13, x18, x13
-	sbcs	x12, x9, x12
-	sbcs	x11, x8, x11
-	sbcs	x10, x17, x10
-	sbcs	x15, x16, x15
-	sbcs	x0, x2, x0
-	asr	x3, x0, #63
-	cmp	 x3, #0                 // =0
-	csel	x14, x1, x14, lt
-	csel	x13, x18, x13, lt
-	csel	x9, x9, x12, lt
-	csel	x8, x8, x11, lt
-	csel	x10, x17, x10, lt
-	csel	x11, x16, x15, lt
-	csel	x12, x2, x0, lt
-	ldr	x15, [sp, #8]           // 8-byte Folded Reload
-	stp	 x14, x13, [x15]
-	stp	x9, x8, [x15, #16]
-	stp	x10, x11, [x15, #32]
-	str	x12, [x15, #48]
-	add	sp, sp, #32             // =32
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end99:
-	.size	mcl_fp_montNF7L, .Lfunc_end99-mcl_fp_montNF7L
-
-	.globl	mcl_fp_montRed7L
-	.align	2
-	.type	mcl_fp_montRed7L,@function
-mcl_fp_montRed7L:                       // @mcl_fp_montRed7L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	ldur	x15, [x2, #-8]
-	ldp	x9, x8, [x2, #40]
-	ldp	x11, x10, [x2, #24]
-	ldp	x13, x12, [x2, #8]
-	ldr	 x14, [x2]
-	ldp	x17, x18, [x1, #96]
-	ldp	x2, x3, [x1, #80]
-	ldp	x4, x5, [x1, #64]
-	ldp	x6, x7, [x1, #48]
-	ldp	x19, x20, [x1, #32]
-	ldp	x21, x22, [x1, #16]
-	ldp	 x16, x1, [x1]
-	mul	 x23, x16, x15
-	mul	 x24, x23, x8
-	mul	 x25, x23, x9
-	mul	 x26, x23, x10
-	mul	 x27, x23, x11
-	mul	 x28, x23, x12
-	mul	 x29, x23, x13
-	umulh	x30, x23, x14
-	adds	 x29, x30, x29
-	umulh	x30, x23, x13
-	adcs	x28, x30, x28
-	umulh	x30, x23, x12
-	adcs	x27, x30, x27
-	umulh	x30, x23, x11
-	adcs	x26, x30, x26
-	umulh	x30, x23, x10
-	adcs	x25, x30, x25
-	umulh	x30, x23, x9
-	adcs	x24, x30, x24
-	umulh	x30, x23, x8
-	mul	 x23, x23, x14
-	adcs	x30, x30, xzr
-	cmn	 x16, x23
-	adcs	x16, x1, x29
-	adcs	x1, x21, x28
-	mul	 x21, x16, x15
-	adcs	x22, x22, x27
-	mul	 x23, x21, x8
-	mul	 x27, x21, x9
-	mul	 x28, x21, x10
-	mul	 x29, x21, x11
-	adcs	x19, x19, x26
-	mul	 x26, x21, x12
-	adcs	x20, x20, x25
-	mul	 x25, x21, x13
-	adcs	x6, x6, x24
-	umulh	x24, x21, x14
-	adcs	x7, x7, x30
-	adcs	x4, x4, xzr
-	adcs	x5, x5, xzr
-	adcs	x2, x2, xzr
-	adcs	x3, x3, xzr
-	adcs	x17, x17, xzr
-	adcs	x18, x18, xzr
-	adcs	x30, xzr, xzr
-	adds	 x24, x24, x25
-	umulh	x25, x21, x13
-	adcs	x25, x25, x26
-	umulh	x26, x21, x12
-	adcs	x26, x26, x29
-	umulh	x29, x21, x11
-	adcs	x28, x29, x28
-	umulh	x29, x21, x10
-	adcs	x27, x29, x27
-	umulh	x29, x21, x9
-	adcs	x23, x29, x23
-	umulh	x29, x21, x8
-	mul	 x21, x21, x14
-	adcs	x29, x29, xzr
-	cmn	 x21, x16
-	adcs	x16, x24, x1
-	adcs	x1, x25, x22
-	mul	 x21, x16, x15
-	adcs	x19, x26, x19
-	mul	 x22, x21, x8
-	mul	 x24, x21, x9
-	mul	 x25, x21, x10
-	adcs	x20, x28, x20
-	mul	 x26, x21, x11
-	adcs	x6, x27, x6
-	mul	 x27, x21, x12
-	adcs	x7, x23, x7
-	mul	 x23, x21, x13
-	adcs	x4, x29, x4
-	umulh	x28, x21, x14
-	adcs	x5, x5, xzr
-	adcs	x2, x2, xzr
-	adcs	x3, x3, xzr
-	adcs	x17, x17, xzr
-	adcs	x18, x18, xzr
-	adcs	x29, x30, xzr
-	adds	 x23, x28, x23
-	umulh	x28, x21, x13
-	adcs	x27, x28, x27
-	umulh	x28, x21, x12
-	adcs	x26, x28, x26
-	umulh	x28, x21, x11
-	adcs	x25, x28, x25
-	umulh	x28, x21, x10
-	adcs	x24, x28, x24
-	umulh	x28, x21, x9
-	adcs	x22, x28, x22
-	umulh	x28, x21, x8
-	mul	 x21, x21, x14
-	adcs	x28, x28, xzr
-	cmn	 x21, x16
-	adcs	x16, x23, x1
-	adcs	x1, x27, x19
-	mul	 x19, x16, x15
-	adcs	x20, x26, x20
-	mul	 x21, x19, x8
-	mul	 x23, x19, x9
-	mul	 x26, x19, x10
-	adcs	x6, x25, x6
-	mul	 x25, x19, x11
-	adcs	x7, x24, x7
-	mul	 x24, x19, x12
-	adcs	x4, x22, x4
-	mul	 x22, x19, x13
-	adcs	x5, x28, x5
-	umulh	x27, x19, x14
-	adcs	x2, x2, xzr
-	adcs	x3, x3, xzr
-	adcs	x17, x17, xzr
-	adcs	x18, x18, xzr
-	adcs	x28, x29, xzr
-	adds	 x22, x27, x22
-	umulh	x27, x19, x13
-	adcs	x24, x27, x24
-	umulh	x27, x19, x12
-	adcs	x25, x27, x25
-	umulh	x27, x19, x11
-	adcs	x26, x27, x26
-	umulh	x27, x19, x10
-	adcs	x23, x27, x23
-	umulh	x27, x19, x9
-	adcs	x21, x27, x21
-	umulh	x27, x19, x8
-	mul	 x19, x19, x14
-	adcs	x27, x27, xzr
-	cmn	 x19, x16
-	adcs	x16, x22, x1
-	adcs	x1, x24, x20
-	mul	 x19, x16, x15
-	adcs	x6, x25, x6
-	mul	 x20, x19, x8
-	mul	 x22, x19, x9
-	mul	 x24, x19, x10
-	adcs	x7, x26, x7
-	mul	 x25, x19, x11
-	adcs	x4, x23, x4
-	mul	 x23, x19, x12
-	adcs	x5, x21, x5
-	mul	 x21, x19, x13
-	adcs	x2, x27, x2
-	umulh	x26, x19, x14
-	adcs	x3, x3, xzr
-	adcs	x17, x17, xzr
-	adcs	x18, x18, xzr
-	adcs	x27, x28, xzr
-	adds	 x21, x26, x21
-	umulh	x26, x19, x13
-	adcs	x23, x26, x23
-	umulh	x26, x19, x12
-	adcs	x25, x26, x25
-	umulh	x26, x19, x11
-	adcs	x24, x26, x24
-	umulh	x26, x19, x10
-	adcs	x22, x26, x22
-	umulh	x26, x19, x9
-	adcs	x20, x26, x20
-	umulh	x26, x19, x8
-	mul	 x19, x19, x14
-	adcs	x26, x26, xzr
-	cmn	 x19, x16
-	adcs	x16, x21, x1
-	adcs	x1, x23, x6
-	mul	 x6, x16, x15
-	adcs	x7, x25, x7
-	mul	 x19, x6, x8
-	mul	 x21, x6, x9
-	mul	 x23, x6, x10
-	adcs	x4, x24, x4
-	mul	 x24, x6, x11
-	adcs	x5, x22, x5
-	mul	 x22, x6, x12
-	adcs	x2, x20, x2
-	mul	 x20, x6, x13
-	adcs	x3, x26, x3
-	umulh	x25, x6, x14
-	adcs	x17, x17, xzr
-	adcs	x18, x18, xzr
-	adcs	x26, x27, xzr
-	adds	 x20, x25, x20
-	umulh	x25, x6, x13
-	adcs	x22, x25, x22
-	umulh	x25, x6, x12
-	adcs	x24, x25, x24
-	umulh	x25, x6, x11
-	adcs	x23, x25, x23
-	umulh	x25, x6, x10
-	adcs	x21, x25, x21
-	umulh	x25, x6, x9
-	adcs	x19, x25, x19
-	umulh	x25, x6, x8
-	mul	 x6, x6, x14
-	adcs	x25, x25, xzr
-	cmn	 x6, x16
-	adcs	x16, x20, x1
-	adcs	x1, x22, x7
-	mul	 x15, x16, x15
-	adcs	x4, x24, x4
-	mul	 x6, x15, x8
-	mul	 x7, x15, x9
-	mul	 x20, x15, x10
-	adcs	x5, x23, x5
-	mul	 x22, x15, x11
-	adcs	x2, x21, x2
-	mul	 x21, x15, x12
-	adcs	x3, x19, x3
-	mul	 x19, x15, x13
-	adcs	x17, x25, x17
-	umulh	x23, x15, x14
-	adcs	x18, x18, xzr
-	adcs	x24, x26, xzr
-	adds	 x19, x23, x19
-	umulh	x23, x15, x13
-	adcs	x21, x23, x21
-	umulh	x23, x15, x12
-	adcs	x22, x23, x22
-	umulh	x23, x15, x11
-	adcs	x20, x23, x20
-	umulh	x23, x15, x10
-	adcs	x7, x23, x7
-	umulh	x23, x15, x9
-	adcs	x6, x23, x6
-	umulh	x23, x15, x8
-	mul	 x15, x15, x14
-	adcs	x23, x23, xzr
-	cmn	 x15, x16
-	adcs	x15, x19, x1
-	adcs	x16, x21, x4
-	adcs	x1, x22, x5
-	adcs	x2, x20, x2
-	adcs	x3, x7, x3
-	adcs	x17, x6, x17
-	adcs	x18, x23, x18
-	adcs	x4, x24, xzr
-	subs	 x14, x15, x14
-	sbcs	x13, x16, x13
-	sbcs	x12, x1, x12
-	sbcs	x11, x2, x11
-	sbcs	x10, x3, x10
-	sbcs	x9, x17, x9
-	sbcs	x8, x18, x8
-	sbcs	x4, x4, xzr
-	tst	 x4, #0x1
-	csel	x14, x15, x14, ne
-	csel	x13, x16, x13, ne
-	csel	x12, x1, x12, ne
-	csel	x11, x2, x11, ne
-	csel	x10, x3, x10, ne
-	csel	x9, x17, x9, ne
-	csel	x8, x18, x8, ne
-	stp	 x14, x13, [x0]
-	stp	x12, x11, [x0, #16]
-	stp	x10, x9, [x0, #32]
-	str	x8, [x0, #48]
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end100:
-	.size	mcl_fp_montRed7L, .Lfunc_end100-mcl_fp_montRed7L
-
-	.globl	mcl_fp_addPre7L
-	.align	2
-	.type	mcl_fp_addPre7L,@function
-mcl_fp_addPre7L:                        // @mcl_fp_addPre7L
-// BB#0:
-	ldp	x11, x8, [x2, #40]
-	ldp	x13, x9, [x1, #40]
-	ldp	x15, x10, [x2, #24]
-	ldp	x17, x14, [x2, #8]
-	ldr	 x16, [x2]
-	ldp	 x18, x2, [x1]
-	ldr	x3, [x1, #16]
-	ldp	x1, x12, [x1, #24]
-	adds	 x16, x16, x18
-	str	 x16, [x0]
-	adcs	x16, x17, x2
-	adcs	x14, x14, x3
-	stp	x16, x14, [x0, #8]
-	adcs	x14, x15, x1
-	adcs	x10, x10, x12
-	stp	x14, x10, [x0, #24]
-	adcs	x10, x11, x13
-	adcs	x9, x8, x9
-	adcs	x8, xzr, xzr
-	stp	x10, x9, [x0, #40]
-	mov	 x0, x8
-	ret
-.Lfunc_end101:
-	.size	mcl_fp_addPre7L, .Lfunc_end101-mcl_fp_addPre7L
-
-	.globl	mcl_fp_subPre7L
-	.align	2
-	.type	mcl_fp_subPre7L,@function
-mcl_fp_subPre7L:                        // @mcl_fp_subPre7L
-// BB#0:
-	ldp	x11, x8, [x2, #40]
-	ldp	x13, x9, [x1, #40]
-	ldp	x15, x10, [x2, #24]
-	ldp	x17, x14, [x2, #8]
-	ldr	 x16, [x2]
-	ldp	 x18, x2, [x1]
-	ldr	x3, [x1, #16]
-	ldp	x1, x12, [x1, #24]
-	subs	 x16, x18, x16
-	str	 x16, [x0]
-	sbcs	x16, x2, x17
-	sbcs	x14, x3, x14
-	stp	x16, x14, [x0, #8]
-	sbcs	x14, x1, x15
-	sbcs	x10, x12, x10
-	stp	x14, x10, [x0, #24]
-	sbcs	x10, x13, x11
-	sbcs	x9, x9, x8
-	ngcs	 x8, xzr
-	and	x8, x8, #0x1
-	stp	x10, x9, [x0, #40]
-	mov	 x0, x8
-	ret
-.Lfunc_end102:
-	.size	mcl_fp_subPre7L, .Lfunc_end102-mcl_fp_subPre7L
-
-	.globl	mcl_fp_shr1_7L
-	.align	2
-	.type	mcl_fp_shr1_7L,@function
-mcl_fp_shr1_7L:                         // @mcl_fp_shr1_7L
-// BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	x14, x10, [x1, #40]
-	ldp	x11, x12, [x1, #16]
-	ldr	x13, [x1, #32]
-	extr	x8, x9, x8, #1
-	extr	x9, x11, x9, #1
-	extr	x11, x12, x11, #1
-	extr	x12, x13, x12, #1
-	extr	x13, x14, x13, #1
-	extr	x14, x10, x14, #1
-	lsr	x10, x10, #1
-	stp	 x8, x9, [x0]
-	stp	x11, x12, [x0, #16]
-	stp	x13, x14, [x0, #32]
-	str	x10, [x0, #48]
-	ret
-.Lfunc_end103:
-	.size	mcl_fp_shr1_7L, .Lfunc_end103-mcl_fp_shr1_7L
-
-	.globl	mcl_fp_add7L
-	.align	2
-	.type	mcl_fp_add7L,@function
-mcl_fp_add7L:                           // @mcl_fp_add7L
-// BB#0:
-	ldp	x11, x8, [x2, #40]
-	ldp	x13, x9, [x1, #40]
-	ldp	x15, x10, [x2, #24]
-	ldp	x17, x14, [x2, #8]
-	ldr	 x16, [x2]
-	ldp	 x18, x2, [x1]
-	ldr	x4, [x1, #16]
-	ldp	x1, x12, [x1, #24]
-	adds	 x16, x16, x18
-	ldp	x5, x18, [x3, #40]
-	adcs	x17, x17, x2
-	adcs	x2, x14, x4
-	ldr	x4, [x3, #32]
-	adcs	x15, x15, x1
-	adcs	x10, x10, x12
-	ldp	 x12, x1, [x3]
-	stp	 x16, x17, [x0]
-	stp	x2, x15, [x0, #16]
-	adcs	x6, x11, x13
-	stp	x10, x6, [x0, #32]
-	adcs	x8, x8, x9
-	str	x8, [x0, #48]
-	adcs	x7, xzr, xzr
-	ldp	x9, x11, [x3, #16]
-	subs	 x14, x16, x12
-	sbcs	x13, x17, x1
-	sbcs	x12, x2, x9
-	sbcs	x11, x15, x11
-	sbcs	x10, x10, x4
-	sbcs	x9, x6, x5
-	sbcs	x8, x8, x18
-	sbcs	x15, x7, xzr
-	and	w15, w15, #0x1
-	tbnz	w15, #0, .LBB104_2
-// BB#1:                                // %nocarry
-	stp	 x14, x13, [x0]
-	stp	x12, x11, [x0, #16]
-	stp	x10, x9, [x0, #32]
-	str	x8, [x0, #48]
-.LBB104_2:                              // %carry
-	ret
-.Lfunc_end104:
-	.size	mcl_fp_add7L, .Lfunc_end104-mcl_fp_add7L
-
-	.globl	mcl_fp_addNF7L
-	.align	2
-	.type	mcl_fp_addNF7L,@function
-mcl_fp_addNF7L:                         // @mcl_fp_addNF7L
-// BB#0:
-	ldp	x11, x8, [x1, #40]
-	ldp	x13, x9, [x2, #40]
-	ldp	x15, x10, [x1, #24]
-	ldp	x17, x14, [x1, #8]
-	ldr	 x16, [x1]
-	ldp	 x18, x1, [x2]
-	ldr	x4, [x2, #16]
-	ldp	x2, x12, [x2, #24]
-	adds	 x16, x18, x16
-	adcs	x17, x1, x17
-	adcs	x14, x4, x14
-	ldp	x4, x18, [x3, #40]
-	adcs	x15, x2, x15
-	adcs	x10, x12, x10
-	ldp	 x12, x2, [x3]
-	adcs	x11, x13, x11
-	ldr	x13, [x3, #16]
-	ldp	x3, x1, [x3, #24]
-	adcs	x8, x9, x8
-	subs	 x9, x16, x12
-	sbcs	x12, x17, x2
-	sbcs	x13, x14, x13
-	sbcs	x2, x15, x3
-	sbcs	x1, x10, x1
-	sbcs	x3, x11, x4
-	sbcs	x18, x8, x18
-	asr	x4, x18, #63
-	cmp	 x4, #0                 // =0
-	csel	x9, x16, x9, lt
-	csel	x12, x17, x12, lt
-	csel	x13, x14, x13, lt
-	csel	x14, x15, x2, lt
-	csel	x10, x10, x1, lt
-	csel	x11, x11, x3, lt
-	csel	x8, x8, x18, lt
-	stp	 x9, x12, [x0]
-	stp	x13, x14, [x0, #16]
-	stp	x10, x11, [x0, #32]
-	str	x8, [x0, #48]
-	ret
-.Lfunc_end105:
-	.size	mcl_fp_addNF7L, .Lfunc_end105-mcl_fp_addNF7L
-
-	.globl	mcl_fp_sub7L
-	.align	2
-	.type	mcl_fp_sub7L,@function
-mcl_fp_sub7L:                           // @mcl_fp_sub7L
-// BB#0:
-	ldp	x13, x14, [x2, #40]
-	ldp	x17, x15, [x1, #40]
-	ldp	x11, x12, [x2, #24]
-	ldp	x9, x10, [x2, #8]
-	ldr	 x8, [x2]
-	ldp	 x18, x2, [x1]
-	ldr	x4, [x1, #16]
-	ldp	x1, x16, [x1, #24]
-	subs	 x8, x18, x8
-	sbcs	x9, x2, x9
-	stp	 x8, x9, [x0]
-	sbcs	x10, x4, x10
-	sbcs	x11, x1, x11
-	stp	x10, x11, [x0, #16]
-	sbcs	x12, x16, x12
-	sbcs	x13, x17, x13
-	stp	x12, x13, [x0, #32]
-	sbcs	x14, x15, x14
-	str	x14, [x0, #48]
-	ngcs	 x15, xzr
-	and	w15, w15, #0x1
-	tbnz	w15, #0, .LBB106_2
-// BB#1:                                // %nocarry
-	ret
-.LBB106_2:                              // %carry
-	ldp	 x16, x17, [x3]
-	ldp	x18, x1, [x3, #16]
-	ldr	x2, [x3, #32]
-	ldp	x3, x15, [x3, #40]
-	adds	 x8, x16, x8
-	adcs	x9, x17, x9
-	adcs	x10, x18, x10
-	adcs	x11, x1, x11
-	adcs	x12, x2, x12
-	adcs	x13, x3, x13
-	adcs	x14, x15, x14
-	stp	 x8, x9, [x0]
-	stp	x10, x11, [x0, #16]
-	stp	x12, x13, [x0, #32]
-	str	x14, [x0, #48]
-	ret
-.Lfunc_end106:
-	.size	mcl_fp_sub7L, .Lfunc_end106-mcl_fp_sub7L
-
-	.globl	mcl_fp_subNF7L
-	.align	2
-	.type	mcl_fp_subNF7L,@function
-mcl_fp_subNF7L:                         // @mcl_fp_subNF7L
-// BB#0:
-	ldp	x11, x8, [x2, #40]
-	ldp	x13, x9, [x1, #40]
-	ldp	x15, x10, [x2, #24]
-	ldp	x17, x14, [x2, #8]
-	ldr	 x16, [x2]
-	ldp	 x18, x2, [x1]
-	ldr	x4, [x1, #16]
-	ldp	x1, x12, [x1, #24]
-	subs	 x16, x18, x16
-	sbcs	x17, x2, x17
-	sbcs	x14, x4, x14
-	ldp	x4, x18, [x3, #40]
-	sbcs	x15, x1, x15
-	sbcs	x10, x12, x10
-	ldp	 x12, x1, [x3]
-	sbcs	x11, x13, x11
-	ldr	x13, [x3, #16]
-	ldp	x3, x2, [x3, #24]
-	sbcs	x8, x9, x8
-	asr	x9, x8, #63
-	and	 x1, x9, x1
-	and	 x13, x9, x13
-	and	 x3, x9, x3
-	and	 x2, x9, x2
-	and	 x4, x9, x4
-	and	 x18, x9, x18
-	extr	x9, x9, x8, #63
-	and	 x9, x9, x12
-	adds	 x9, x9, x16
-	str	 x9, [x0]
-	adcs	x9, x1, x17
-	str	x9, [x0, #8]
-	adcs	x9, x13, x14
-	str	x9, [x0, #16]
-	adcs	x9, x3, x15
-	str	x9, [x0, #24]
-	adcs	x9, x2, x10
-	str	x9, [x0, #32]
-	adcs	x9, x4, x11
-	adcs	x8, x18, x8
-	stp	x9, x8, [x0, #40]
-	ret
-.Lfunc_end107:
-	.size	mcl_fp_subNF7L, .Lfunc_end107-mcl_fp_subNF7L
-
-	.globl	mcl_fpDbl_add7L
-	.align	2
-	.type	mcl_fpDbl_add7L,@function
-mcl_fpDbl_add7L:                        // @mcl_fpDbl_add7L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	ldp	x8, x9, [x2, #96]
-	ldp	x10, x11, [x1, #96]
-	ldp	x12, x13, [x2, #80]
-	ldp	x14, x15, [x1, #80]
-	ldp	x16, x17, [x2, #64]
-	ldp	x18, x4, [x1, #64]
-	ldp	x5, x6, [x2, #48]
-	ldp	x7, x19, [x1, #48]
-	ldp	x20, x21, [x2, #32]
-	ldp	x22, x23, [x1, #32]
-	ldp	x24, x25, [x2, #16]
-	ldp	 x27, x2, [x2]
-	ldp	x28, x29, [x1, #16]
-	ldp	 x26, x1, [x1]
-	adds	 x26, x27, x26
-	ldr	x27, [x3, #48]
-	str	 x26, [x0]
-	adcs	x1, x2, x1
-	ldp	x2, x26, [x3, #32]
-	str	x1, [x0, #8]
-	adcs	x1, x24, x28
-	ldp	x24, x28, [x3, #16]
-	str	x1, [x0, #16]
-	ldp	 x1, x3, [x3]
-	adcs	x25, x25, x29
-	adcs	x20, x20, x22
-	stp	x25, x20, [x0, #24]
-	adcs	x20, x21, x23
-	adcs	x5, x5, x7
-	stp	x20, x5, [x0, #40]
-	adcs	x5, x6, x19
-	adcs	x16, x16, x18
-	adcs	x17, x17, x4
-	adcs	x12, x12, x14
-	adcs	x13, x13, x15
-	adcs	x8, x8, x10
-	adcs	x9, x9, x11
-	adcs	x10, xzr, xzr
-	subs	 x11, x5, x1
-	sbcs	x14, x16, x3
-	sbcs	x15, x17, x24
-	sbcs	x18, x12, x28
-	sbcs	x1, x13, x2
-	sbcs	x2, x8, x26
-	sbcs	x3, x9, x27
-	sbcs	x10, x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x5, x11, ne
-	csel	x11, x16, x14, ne
-	csel	x14, x17, x15, ne
-	csel	x12, x12, x18, ne
-	csel	x13, x13, x1, ne
-	csel	x8, x8, x2, ne
-	csel	x9, x9, x3, ne
-	stp	x10, x11, [x0, #56]
-	stp	x14, x12, [x0, #72]
-	stp	x13, x8, [x0, #88]
-	str	x9, [x0, #104]
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end108:
-	.size	mcl_fpDbl_add7L, .Lfunc_end108-mcl_fpDbl_add7L
-
-	.globl	mcl_fpDbl_sub7L
-	.align	2
-	.type	mcl_fpDbl_sub7L,@function
-mcl_fpDbl_sub7L:                        // @mcl_fpDbl_sub7L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	ldp	x9, x8, [x2, #96]
-	ldp	x11, x10, [x1, #96]
-	ldp	x12, x13, [x2, #80]
-	ldp	x14, x15, [x1, #80]
-	ldp	x16, x17, [x2, #64]
-	ldp	x18, x4, [x1, #64]
-	ldp	x5, x6, [x2, #48]
-	ldp	x7, x19, [x1, #48]
-	ldp	x20, x21, [x2, #32]
-	ldp	x22, x23, [x1, #32]
-	ldp	x24, x25, [x2, #16]
-	ldp	 x26, x2, [x2]
-	ldp	x28, x29, [x1, #16]
-	ldp	 x27, x1, [x1]
-	subs	 x26, x27, x26
-	ldr	x27, [x3, #48]
-	str	 x26, [x0]
-	sbcs	x1, x1, x2
-	ldp	x2, x26, [x3, #32]
-	str	x1, [x0, #8]
-	sbcs	x1, x28, x24
-	ldp	x24, x28, [x3, #16]
-	str	x1, [x0, #16]
-	ldp	 x1, x3, [x3]
-	sbcs	x25, x29, x25
-	sbcs	x20, x22, x20
-	stp	x25, x20, [x0, #24]
-	sbcs	x20, x23, x21
-	sbcs	x5, x7, x5
-	stp	x20, x5, [x0, #40]
-	sbcs	x5, x19, x6
-	sbcs	x16, x18, x16
-	sbcs	x17, x4, x17
-	sbcs	x12, x14, x12
-	sbcs	x13, x15, x13
-	sbcs	x9, x11, x9
-	sbcs	x8, x10, x8
-	ngcs	 x10, xzr
-	tst	 x10, #0x1
-	csel	x10, x27, xzr, ne
-	csel	x11, x26, xzr, ne
-	csel	x14, x2, xzr, ne
-	csel	x15, x28, xzr, ne
-	csel	x18, x24, xzr, ne
-	csel	x2, x3, xzr, ne
-	csel	x1, x1, xzr, ne
-	adds	 x1, x1, x5
-	adcs	x16, x2, x16
-	stp	x1, x16, [x0, #56]
-	adcs	x16, x18, x17
-	adcs	x12, x15, x12
-	stp	x16, x12, [x0, #72]
-	adcs	x12, x14, x13
-	adcs	x9, x11, x9
-	stp	x12, x9, [x0, #88]
-	adcs	x8, x10, x8
-	str	x8, [x0, #104]
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end109:
-	.size	mcl_fpDbl_sub7L, .Lfunc_end109-mcl_fpDbl_sub7L
-
-	.align	2
-	.type	.LmulPv512x64,@function
-.LmulPv512x64:                          // @mulPv512x64
-// BB#0:
-	ldr	 x9, [x0]
-	mul	 x10, x9, x1
-	str	 x10, [x8]
-	ldr	x10, [x0, #8]
-	umulh	x9, x9, x1
-	mul	 x11, x10, x1
-	adds	 x9, x9, x11
-	str	x9, [x8, #8]
-	ldr	x9, [x0, #16]
-	umulh	x10, x10, x1
-	mul	 x11, x9, x1
-	adcs	x10, x10, x11
-	str	x10, [x8, #16]
-	ldr	x10, [x0, #24]
-	umulh	x9, x9, x1
-	mul	 x11, x10, x1
-	adcs	x9, x9, x11
-	str	x9, [x8, #24]
-	ldr	x9, [x0, #32]
-	umulh	x10, x10, x1
-	mul	 x11, x9, x1
-	adcs	x10, x10, x11
-	str	x10, [x8, #32]
-	ldr	x10, [x0, #40]
-	umulh	x9, x9, x1
-	mul	 x11, x10, x1
-	adcs	x9, x9, x11
-	str	x9, [x8, #40]
-	ldr	x9, [x0, #48]
-	umulh	x10, x10, x1
-	mul	 x11, x9, x1
-	adcs	x10, x10, x11
-	str	x10, [x8, #48]
-	ldr	x10, [x0, #56]
-	umulh	x9, x9, x1
-	mul	 x11, x10, x1
-	umulh	x10, x10, x1
-	adcs	x9, x9, x11
-	str	x9, [x8, #56]
-	adcs	x9, x10, xzr
-	str	x9, [x8, #64]
-	ret
-.Lfunc_end110:
-	.size	.LmulPv512x64, .Lfunc_end110-.LmulPv512x64
-
-	.globl	mcl_fp_mulUnitPre8L
-	.align	2
-	.type	mcl_fp_mulUnitPre8L,@function
-mcl_fp_mulUnitPre8L:                    // @mcl_fp_mulUnitPre8L
-// BB#0:
-	stp	x20, x19, [sp, #-32]!
-	stp	x29, x30, [sp, #16]
-	add	x29, sp, #16            // =16
-	sub	sp, sp, #80             // =80
-	mov	 x19, x0
-	mov	 x8, sp
-	mov	 x0, x1
-	mov	 x1, x2
-	bl	.LmulPv512x64
-	ldp	x9, x8, [sp, #56]
-	ldp	x11, x10, [sp, #40]
-	ldp	x16, x12, [sp, #24]
-	ldp	 x13, x14, [sp]
-	ldr	x15, [sp, #16]
-	stp	 x13, x14, [x19]
-	stp	x15, x16, [x19, #16]
-	stp	x12, x11, [x19, #32]
-	stp	x10, x9, [x19, #48]
-	str	x8, [x19, #64]
-	sub	sp, x29, #16            // =16
-	ldp	x29, x30, [sp, #16]
-	ldp	x20, x19, [sp], #32
-	ret
-.Lfunc_end111:
-	.size	mcl_fp_mulUnitPre8L, .Lfunc_end111-mcl_fp_mulUnitPre8L
-
-	.globl	mcl_fpDbl_mulPre8L
-	.align	2
-	.type	mcl_fpDbl_mulPre8L,@function
-mcl_fpDbl_mulPre8L:                     // @mcl_fpDbl_mulPre8L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	add	x29, sp, #80            // =80
-	sub	sp, sp, #144            // =144
-	mov	 x20, x2
-	mov	 x21, x1
-	mov	 x19, x0
-	bl	mcl_fpDbl_mulPre4L
-	add	x0, x19, #64            // =64
-	add	x1, x21, #32            // =32
-	add	x2, x20, #32            // =32
-	bl	mcl_fpDbl_mulPre4L
-	ldp	x8, x9, [x20, #48]
-	ldp	x10, x11, [x20, #32]
-	ldp	 x12, x13, [x20]
-	ldp	x14, x15, [x20, #16]
-	adds	 x18, x12, x10
-	str	x18, [sp, #8]           // 8-byte Folded Spill
-	ldp	x10, x12, [x21, #16]
-	ldp	x16, x17, [x21, #48]
-	adcs	x22, x13, x11
-	ldp	 x11, x13, [x21]
-	adcs	x23, x14, x8
-	ldp	x8, x14, [x21, #32]
-	stp	x18, x22, [sp, #16]
-	adcs	x21, x15, x9
-	stp	x23, x21, [sp, #32]
-	adcs	x24, xzr, xzr
-	adds	 x25, x11, x8
-	adcs	x26, x13, x14
-	stp	x25, x26, [sp, #48]
-	adcs	x27, x10, x16
-	adcs	x28, x12, x17
-	stp	x27, x28, [sp, #64]
-	adcs	x20, xzr, xzr
-	add	x0, sp, #80             // =80
-	add	x1, sp, #48             // =48
-	add	x2, sp, #16             // =16
-	bl	mcl_fpDbl_mulPre4L
-	cmp	 x24, #0                // =0
-	csel	x8, x28, xzr, ne
-	and	 x9, x24, x20
-	ldp	x11, x10, [sp, #128]
-	ldp	x13, x12, [sp, #112]
-	ldp	x14, x15, [x19, #48]
-	ldp	x16, x17, [x19, #32]
-	ldp	x18, x0, [x19, #16]
-	csel	x1, x27, xzr, ne
-	csel	x2, x26, xzr, ne
-	csel	x3, x25, xzr, ne
-	cmp	 x20, #0                // =0
-	ldp	 x4, x5, [x19]
-	csel	x6, x21, xzr, ne
-	csel	x7, x23, xzr, ne
-	csel	x20, x22, xzr, ne
-	ldr	x21, [sp, #8]           // 8-byte Folded Reload
-	csel	x21, x21, xzr, ne
-	adds	 x3, x21, x3
-	adcs	x2, x20, x2
-	ldp	x20, x21, [sp, #96]
-	adcs	x1, x7, x1
-	adcs	x8, x6, x8
-	adcs	x6, xzr, xzr
-	adds	 x13, x3, x13
-	ldp	x3, x7, [sp, #80]
-	adcs	x12, x2, x12
-	adcs	x11, x1, x11
-	ldp	x1, x2, [x19, #112]
-	adcs	x8, x8, x10
-	adcs	x9, x6, x9
-	ldp	x10, x6, [x19, #96]
-	subs	 x3, x3, x4
-	sbcs	x4, x7, x5
-	ldp	x5, x7, [x19, #80]
-	sbcs	x18, x20, x18
-	sbcs	x0, x21, x0
-	ldp	x20, x21, [x19, #64]
-	sbcs	x13, x13, x16
-	sbcs	x12, x12, x17
-	sbcs	x11, x11, x14
-	sbcs	x8, x8, x15
-	sbcs	x9, x9, xzr
-	subs	 x3, x3, x20
-	sbcs	x4, x4, x21
-	sbcs	x18, x18, x5
-	sbcs	x0, x0, x7
-	sbcs	x13, x13, x10
-	sbcs	x12, x12, x6
-	sbcs	x11, x11, x1
-	sbcs	x8, x8, x2
-	sbcs	x9, x9, xzr
-	adds	 x16, x16, x3
-	str	x16, [x19, #32]
-	adcs	x16, x17, x4
-	adcs	x14, x14, x18
-	stp	x16, x14, [x19, #40]
-	adcs	x14, x15, x0
-	adcs	x13, x20, x13
-	stp	x14, x13, [x19, #56]
-	adcs	x12, x21, x12
-	adcs	x11, x5, x11
-	stp	x12, x11, [x19, #72]
-	adcs	x8, x7, x8
-	str	x8, [x19, #88]
-	adcs	x8, x10, x9
-	str	x8, [x19, #96]
-	adcs	x8, x6, xzr
-	str	x8, [x19, #104]
-	adcs	x8, x1, xzr
-	str	x8, [x19, #112]
-	adcs	x8, x2, xzr
-	str	x8, [x19, #120]
-	sub	sp, x29, #80            // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end112:
-	.size	mcl_fpDbl_mulPre8L, .Lfunc_end112-mcl_fpDbl_mulPre8L
-
-	.globl	mcl_fpDbl_sqrPre8L
-	.align	2
-	.type	mcl_fpDbl_sqrPre8L,@function
-mcl_fpDbl_sqrPre8L:                     // @mcl_fpDbl_sqrPre8L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	add	x29, sp, #80            // =80
-	sub	sp, sp, #128            // =128
-	mov	 x20, x1
-	mov	 x19, x0
-	mov	 x2, x20
-	bl	mcl_fpDbl_mulPre4L
-	add	x0, x19, #64            // =64
-	add	x1, x20, #32            // =32
-	mov	 x2, x1
-	bl	mcl_fpDbl_mulPre4L
-	ldp	x8, x9, [x20, #16]
-	ldp	x10, x11, [x20, #32]
-	ldp	 x12, x13, [x20]
-	ldp	x14, x15, [x20, #48]
-	adds	 x22, x12, x10
-	adcs	x23, x13, x11
-	adcs	x20, x8, x14
-	adcs	x21, x9, x15
-	stp	x22, x23, [sp, #32]
-	stp	 x22, x23, [sp]
-	stp	x20, x21, [sp, #48]
-	stp	x20, x21, [sp, #16]
-	adcs	x24, xzr, xzr
-	add	x0, sp, #64             // =64
-	add	x1, sp, #32             // =32
-	mov	 x2, sp
-	bl	mcl_fpDbl_mulPre4L
-	ldp	x8, x9, [x19, #48]
-	ldp	 x10, x11, [x19]
-	ldp	x12, x13, [sp, #64]
-	ldp	x14, x15, [x19, #16]
-	ldp	x16, x17, [sp, #80]
-	ldp	x18, x0, [x19, #32]
-	subs	 x10, x12, x10
-	ldp	x1, x12, [sp, #96]
-	sbcs	x11, x13, x11
-	sbcs	x14, x16, x14
-	ldp	x13, x16, [sp, #112]
-	sbcs	x15, x17, x15
-	sbcs	x17, x1, x18
-	ldp	x1, x2, [x19, #64]
-	ldp	x3, x4, [x19, #80]
-	ldp	x5, x6, [x19, #96]
-	ldp	x7, x25, [x19, #112]
-	lsr	x26, x21, #63
-	sbcs	x12, x12, x0
-	sbcs	x13, x13, x8
-	sbcs	x16, x16, x9
-	sbcs	x27, x24, xzr
-	subs	 x10, x10, x1
-	sbcs	x11, x11, x2
-	sbcs	x14, x14, x3
-	sbcs	x15, x15, x4
-	sbcs	x17, x17, x5
-	sbcs	x12, x12, x6
-	sbcs	x13, x13, x7
-	sbcs	x16, x16, x25
-	sbcs	x27, x27, xzr
-	adds	 x22, x22, x22
-	adcs	x23, x23, x23
-	adcs	x20, x20, x20
-	adcs	x21, x21, x21
-	cmp	 x24, #0                // =0
-	csel	x24, x26, xzr, ne
-	csel	x21, x21, xzr, ne
-	csel	x20, x20, xzr, ne
-	csel	x23, x23, xzr, ne
-	csel	x22, x22, xzr, ne
-	adds	 x17, x17, x22
-	adcs	x12, x12, x23
-	adcs	x13, x13, x20
-	adcs	x16, x16, x21
-	adcs	x20, x27, x24
-	adds	 x10, x10, x18
-	str	x10, [x19, #32]
-	adcs	x10, x11, x0
-	adcs	x8, x14, x8
-	stp	x10, x8, [x19, #40]
-	adcs	x8, x15, x9
-	str	x8, [x19, #56]
-	adcs	x8, x17, x1
-	str	x8, [x19, #64]
-	adcs	x8, x12, x2
-	str	x8, [x19, #72]
-	adcs	x8, x13, x3
-	str	x8, [x19, #80]
-	adcs	x8, x16, x4
-	str	x8, [x19, #88]
-	adcs	x8, x20, x5
-	str	x8, [x19, #96]
-	adcs	x8, x6, xzr
-	str	x8, [x19, #104]
-	adcs	x8, x7, xzr
-	str	x8, [x19, #112]
-	adcs	x8, x25, xzr
-	str	x8, [x19, #120]
-	sub	sp, x29, #80            // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end113:
-	.size	mcl_fpDbl_sqrPre8L, .Lfunc_end113-mcl_fpDbl_sqrPre8L
-
-	.globl	mcl_fp_mont8L
-	.align	2
-	.type	mcl_fp_mont8L,@function
-mcl_fp_mont8L:                          // @mcl_fp_mont8L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	add	x29, sp, #80            // =80
-	sub	sp, sp, #1424           // =1424
-	mov	 x20, x3
-	mov	 x26, x2
-	str	x26, [sp, #120]         // 8-byte Folded Spill
-	ldur	x19, [x20, #-8]
-	str	x19, [sp, #136]         // 8-byte Folded Spill
-	ldr	 x9, [x26]
-	mov	 x27, x1
-	str	x27, [sp, #128]         // 8-byte Folded Spill
-	str	x0, [sp, #112]          // 8-byte Folded Spill
-	sub	x8, x29, #160           // =160
-	mov	 x0, x27
-	mov	 x1, x9
-	bl	.LmulPv512x64
-	ldur	x24, [x29, #-160]
-	ldur	x8, [x29, #-96]
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldur	x8, [x29, #-104]
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-112]
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-120]
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-128]
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-136]
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-144]
-	str	x8, [sp, #56]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-152]
-	str	x8, [sp, #48]           // 8-byte Folded Spill
-	mul	 x1, x24, x19
-	sub	x8, x29, #240           // =240
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldur	x8, [x29, #-176]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-184]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-192]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldp	x19, x28, [x29, #-208]
-	ldp	x21, x23, [x29, #-224]
-	ldp	x25, x22, [x29, #-240]
-	ldr	x1, [x26, #8]
-	add	x8, sp, #1184           // =1184
-	mov	 x0, x27
-	bl	.LmulPv512x64
-	cmn	 x25, x24
-	ldr	x8, [sp, #1248]
-	ldr	x9, [sp, #1240]
-	ldp	x10, x12, [sp, #48]
-	adcs	x10, x22, x10
-	ldr	x11, [sp, #1232]
-	adcs	x12, x21, x12
-	ldr	x13, [sp, #1224]
-	ldp	x14, x16, [sp, #64]
-	adcs	x14, x23, x14
-	ldr	x15, [sp, #1216]
-	adcs	x16, x19, x16
-	ldr	x17, [sp, #1208]
-	ldp	x18, x1, [sp, #80]
-	adcs	x18, x28, x18
-	ldr	x0, [sp, #1200]
-	ldp	x2, x4, [sp, #24]
-	adcs	x1, x2, x1
-	ldr	x2, [sp, #1184]
-	ldp	x3, x5, [sp, #96]
-	adcs	x3, x4, x3
-	ldr	x4, [sp, #1192]
-	ldr	x6, [sp, #40]           // 8-byte Folded Reload
-	adcs	x5, x6, x5
-	adcs	x6, xzr, xzr
-	adds	 x19, x10, x2
-	adcs	x10, x12, x4
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x0
-	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	adcs	x8, x6, x8
-	stp	x8, x9, [sp, #96]
-	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #48]
-	ldr	x22, [sp, #136]         // 8-byte Folded Reload
-	mul	 x1, x19, x22
-	add	x8, sp, #1104           // =1104
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #1168]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1160]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1152]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1144]
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	ldr	x25, [sp, #1136]
-	ldr	x26, [sp, #1128]
-	ldr	x27, [sp, #1120]
-	ldr	x21, [sp, #1112]
-	ldr	x28, [sp, #1104]
-	ldp	x24, x23, [sp, #120]
-	ldr	x1, [x24, #16]
-	add	x8, sp, #1024           // =1024
-	mov	 x0, x23
-	bl	.LmulPv512x64
-	cmn	 x19, x28
-	ldr	x8, [sp, #1088]
-	ldr	x9, [sp, #1080]
-	ldr	x10, [sp, #40]          // 8-byte Folded Reload
-	adcs	x10, x10, x21
-	ldr	x11, [sp, #1072]
-	ldp	x14, x12, [sp, #80]
-	adcs	x12, x12, x27
-	ldr	x13, [sp, #1064]
-	adcs	x14, x14, x26
-	ldr	x15, [sp, #1056]
-	ldp	x18, x16, [sp, #64]
-	adcs	x16, x16, x25
-	ldr	x17, [sp, #1048]
-	ldp	x0, x2, [sp, #8]
-	adcs	x18, x18, x0
-	ldr	x0, [sp, #1040]
-	ldr	x1, [sp, #56]           // 8-byte Folded Reload
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #1024]
-	ldp	x5, x3, [sp, #96]
-	ldp	x4, x6, [sp, #24]
-	adcs	x3, x3, x4
-	ldr	x4, [sp, #1032]
-	adcs	x5, x5, x6
-	ldr	x6, [sp, #48]           // 8-byte Folded Reload
-	adcs	x6, x6, xzr
-	adds	 x19, x10, x2
-	adcs	x10, x12, x4
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x0
-	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	adcs	x8, x6, x8
-	stp	x8, x9, [sp, #96]
-	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #48]
-	mul	 x1, x19, x22
-	add	x8, sp, #944            // =944
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #1008]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1000]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #992]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x8, [sp, #984]
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	ldr	x25, [sp, #976]
-	ldr	x26, [sp, #968]
-	ldr	x27, [sp, #960]
-	ldr	x21, [sp, #952]
-	ldr	x28, [sp, #944]
-	mov	 x22, x24
-	ldr	x1, [x22, #24]
-	add	x8, sp, #864            // =864
-	mov	 x0, x23
-	bl	.LmulPv512x64
-	cmn	 x19, x28
-	ldr	x8, [sp, #928]
-	ldr	x9, [sp, #920]
-	ldr	x10, [sp, #40]          // 8-byte Folded Reload
-	adcs	x10, x10, x21
-	ldr	x11, [sp, #912]
-	ldp	x14, x12, [sp, #80]
-	adcs	x12, x12, x27
-	ldr	x13, [sp, #904]
-	adcs	x14, x14, x26
-	ldr	x15, [sp, #896]
-	ldp	x18, x16, [sp, #64]
-	adcs	x16, x16, x25
-	ldr	x17, [sp, #888]
-	ldp	x0, x2, [sp, #8]
-	adcs	x18, x18, x0
-	ldr	x0, [sp, #880]
-	ldr	x1, [sp, #56]           // 8-byte Folded Reload
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #864]
-	ldp	x5, x3, [sp, #96]
-	ldp	x4, x6, [sp, #24]
-	adcs	x3, x3, x4
-	ldr	x4, [sp, #872]
-	adcs	x5, x5, x6
-	ldr	x6, [sp, #48]           // 8-byte Folded Reload
-	adcs	x6, x6, xzr
-	adds	 x19, x10, x2
-	adcs	x10, x12, x4
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x0
-	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	adcs	x8, x6, x8
-	stp	x8, x9, [sp, #96]
-	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #48]
-	ldr	x23, [sp, #136]         // 8-byte Folded Reload
-	mul	 x1, x19, x23
-	add	x8, sp, #784            // =784
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #848]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #840]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #832]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x24, [sp, #824]
-	ldr	x25, [sp, #816]
-	ldr	x26, [sp, #808]
-	ldr	x27, [sp, #800]
-	ldr	x21, [sp, #792]
-	ldr	x28, [sp, #784]
-	ldr	x1, [x22, #32]
-	add	x8, sp, #704            // =704
-	ldr	x22, [sp, #128]         // 8-byte Folded Reload
-	mov	 x0, x22
-	bl	.LmulPv512x64
-	cmn	 x19, x28
-	ldr	x8, [sp, #768]
-	ldr	x9, [sp, #760]
-	ldr	x10, [sp, #40]          // 8-byte Folded Reload
-	adcs	x10, x10, x21
-	ldr	x11, [sp, #752]
-	ldp	x14, x12, [sp, #80]
-	adcs	x12, x12, x27
-	ldr	x13, [sp, #744]
-	adcs	x14, x14, x26
-	ldr	x15, [sp, #736]
-	ldp	x18, x16, [sp, #64]
-	adcs	x16, x16, x25
-	ldr	x17, [sp, #728]
-	adcs	x18, x18, x24
-	ldr	x0, [sp, #720]
-	ldr	x1, [sp, #56]           // 8-byte Folded Reload
-	ldp	x2, x4, [sp, #16]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #704]
-	ldp	x5, x3, [sp, #96]
-	adcs	x3, x3, x4
-	ldr	x4, [sp, #712]
-	ldr	x6, [sp, #32]           // 8-byte Folded Reload
-	adcs	x5, x5, x6
-	ldr	x6, [sp, #48]           // 8-byte Folded Reload
-	adcs	x6, x6, xzr
-	adds	 x19, x10, x2
-	adcs	x10, x12, x4
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x0
-	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	adcs	x8, x6, x8
-	stp	x8, x9, [sp, #96]
-	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #48]
-	mul	 x1, x19, x23
-	add	x8, sp, #624            // =624
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #688]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #680]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #672]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x24, [sp, #664]
-	ldr	x25, [sp, #656]
-	ldr	x26, [sp, #648]
-	ldr	x27, [sp, #640]
-	ldr	x21, [sp, #632]
-	ldr	x28, [sp, #624]
-	ldr	x23, [sp, #120]         // 8-byte Folded Reload
-	ldr	x1, [x23, #40]
-	add	x8, sp, #544            // =544
-	mov	 x0, x22
-	bl	.LmulPv512x64
-	cmn	 x19, x28
-	ldr	x8, [sp, #608]
-	ldr	x9, [sp, #600]
-	ldr	x10, [sp, #40]          // 8-byte Folded Reload
-	adcs	x10, x10, x21
-	ldr	x11, [sp, #592]
-	ldp	x14, x12, [sp, #80]
-	adcs	x12, x12, x27
-	ldr	x13, [sp, #584]
-	adcs	x14, x14, x26
-	ldr	x15, [sp, #576]
-	ldp	x18, x16, [sp, #64]
-	adcs	x16, x16, x25
-	ldr	x17, [sp, #568]
-	adcs	x18, x18, x24
-	ldr	x0, [sp, #560]
-	ldr	x1, [sp, #56]           // 8-byte Folded Reload
-	ldp	x2, x4, [sp, #16]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #544]
-	ldp	x5, x3, [sp, #96]
-	adcs	x3, x3, x4
-	ldr	x4, [sp, #552]
-	ldr	x6, [sp, #32]           // 8-byte Folded Reload
-	adcs	x5, x5, x6
-	ldr	x6, [sp, #48]           // 8-byte Folded Reload
-	adcs	x6, x6, xzr
-	adds	 x19, x10, x2
-	adcs	x10, x12, x4
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x0
-	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	adcs	x8, x6, x8
-	stp	x8, x9, [sp, #96]
-	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #48]
-	ldr	x22, [sp, #136]         // 8-byte Folded Reload
-	mul	 x1, x19, x22
-	add	x8, sp, #464            // =464
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #528]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #520]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #512]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldp	x25, x24, [sp, #496]
-	ldp	x27, x26, [sp, #480]
-	ldp	x28, x21, [sp, #464]
-	ldr	x1, [x23, #48]
-	add	x8, sp, #384            // =384
-	ldr	x23, [sp, #128]         // 8-byte Folded Reload
-	mov	 x0, x23
-	bl	.LmulPv512x64
-	cmn	 x19, x28
-	ldp	x9, x8, [sp, #440]
-	ldr	x10, [sp, #40]          // 8-byte Folded Reload
-	adcs	x10, x10, x21
-	ldp	x13, x11, [sp, #424]
-	ldp	x14, x12, [sp, #80]
-	adcs	x12, x12, x27
-	adcs	x14, x14, x26
-	ldp	x17, x15, [sp, #408]
-	ldp	x18, x16, [sp, #64]
-	adcs	x16, x16, x25
-	adcs	x18, x18, x24
-	ldr	x1, [sp, #56]           // 8-byte Folded Reload
-	ldp	x2, x4, [sp, #16]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #384]
-	ldp	x5, x3, [sp, #96]
-	adcs	x3, x3, x4
-	ldp	x4, x0, [sp, #392]
-	ldr	x6, [sp, #32]           // 8-byte Folded Reload
-	adcs	x5, x5, x6
-	ldr	x6, [sp, #48]           // 8-byte Folded Reload
-	adcs	x6, x6, xzr
-	adds	 x19, x10, x2
-	adcs	x10, x12, x4
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x0
-	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	adcs	x8, x6, x8
-	stp	x8, x9, [sp, #96]
-	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #48]
-	mul	 x1, x19, x22
-	add	x8, sp, #304            // =304
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #368]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldp	x22, x8, [sp, #352]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldp	x25, x24, [sp, #336]
-	ldp	x27, x26, [sp, #320]
-	ldp	x28, x21, [sp, #304]
-	ldr	x8, [sp, #120]          // 8-byte Folded Reload
-	ldr	x1, [x8, #56]
-	add	x8, sp, #224            // =224
-	mov	 x0, x23
-	bl	.LmulPv512x64
-	cmn	 x19, x28
-	ldp	x9, x8, [sp, #280]
-	ldr	x10, [sp, #40]          // 8-byte Folded Reload
-	adcs	x10, x10, x21
-	ldp	x13, x11, [sp, #264]
-	ldp	x14, x12, [sp, #80]
-	adcs	x12, x12, x27
-	adcs	x14, x14, x26
-	ldp	x17, x15, [sp, #248]
-	ldp	x18, x16, [sp, #64]
-	adcs	x16, x16, x25
-	adcs	x18, x18, x24
-	ldr	x1, [sp, #56]           // 8-byte Folded Reload
-	adcs	x1, x1, x22
-	ldr	x2, [sp, #224]
-	ldp	x5, x3, [sp, #96]
-	ldp	x4, x6, [sp, #24]
-	adcs	x3, x3, x4
-	ldp	x4, x0, [sp, #232]
-	adcs	x5, x5, x6
-	ldr	x6, [sp, #48]           // 8-byte Folded Reload
-	adcs	x6, x6, xzr
-	adds	 x19, x10, x2
-	adcs	x21, x12, x4
-	adcs	x22, x14, x0
-	adcs	x23, x16, x17
-	adcs	x24, x18, x15
-	adcs	x25, x1, x13
-	adcs	x10, x3, x11
-	str	x10, [sp, #128]         // 8-byte Folded Spill
-	adcs	x27, x5, x9
-	adcs	x28, x6, x8
-	adcs	x26, xzr, xzr
-	ldr	x8, [sp, #136]          // 8-byte Folded Reload
-	mul	 x1, x19, x8
-	add	x8, sp, #144            // =144
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldp	x15, x8, [sp, #200]
-	ldp	x9, x10, [sp, #144]
-	ldp	x11, x12, [sp, #160]
-	cmn	 x19, x9
-	ldp	x13, x9, [sp, #176]
-	adcs	x10, x21, x10
-	ldr	x14, [sp, #192]
-	adcs	x11, x22, x11
-	adcs	x12, x23, x12
-	adcs	x13, x24, x13
-	adcs	x9, x25, x9
-	ldp	x16, x17, [x20, #48]
-	ldp	x18, x0, [x20, #32]
-	ldp	x1, x2, [x20, #16]
-	ldp	 x3, x4, [x20]
-	ldr	x5, [sp, #128]          // 8-byte Folded Reload
-	adcs	x14, x5, x14
-	adcs	x15, x27, x15
-	adcs	x8, x28, x8
-	adcs	x5, x26, xzr
-	subs	 x3, x10, x3
-	sbcs	x4, x11, x4
-	sbcs	x1, x12, x1
-	sbcs	x2, x13, x2
-	sbcs	x18, x9, x18
-	sbcs	x0, x14, x0
-	sbcs	x16, x15, x16
-	sbcs	x17, x8, x17
-	sbcs	x5, x5, xzr
-	tst	 x5, #0x1
-	csel	x10, x10, x3, ne
-	csel	x11, x11, x4, ne
-	csel	x12, x12, x1, ne
-	csel	x13, x13, x2, ne
-	csel	x9, x9, x18, ne
-	csel	x14, x14, x0, ne
-	csel	x15, x15, x16, ne
-	csel	x8, x8, x17, ne
-	ldr	x16, [sp, #112]         // 8-byte Folded Reload
-	stp	 x10, x11, [x16]
-	stp	x12, x13, [x16, #16]
-	stp	x9, x14, [x16, #32]
-	stp	x15, x8, [x16, #48]
-	sub	sp, x29, #80            // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
-	ret
-.Lfunc_end114:
-	.size	mcl_fp_mont8L, .Lfunc_end114-mcl_fp_mont8L
-
-	.globl	mcl_fp_montNF8L
-	.align	2
-	.type	mcl_fp_montNF8L,@function
-mcl_fp_montNF8L:                        // @mcl_fp_montNF8L
-// BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	add	x29, sp, #80            // =80
-	sub	sp, sp, #1424           // =1424
-	mov	 x20, x3
-	mov	 x26, x2
-	str	x26, [sp, #128]         // 8-byte Folded Spill
-	ldur	x19, [x20, #-8]
-	str	x19, [sp, #136]         // 8-byte Folded Spill
-	ldr	 x9, [x26]
-	mov	 x27, x1
-	stp	x0, x27, [sp, #112]
-	sub	x8, x29, #160           // =160
-	mov	 x0, x27
-	mov	 x1, x9
-	bl	.LmulPv512x64
-	ldur	x24, [x29, #-160]
-	ldur	x8, [x29, #-96]
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldur	x8, [x29, #-104]
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-112]
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-120]
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-128]
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-136]
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-144]
-	str	x8, [sp, #56]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-152]
-	str	x8, [sp, #48]           // 8-byte Folded Spill
-	mul	 x1, x24, x19
-	sub	x8, x29, #240           // =240
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldur	x8, [x29, #-176]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-184]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-192]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldp	x19, x28, [x29, #-208]
-	ldp	x21, x23, [x29, #-224]
-	ldp	x25, x22, [x29, #-240]
-	ldr	x1, [x26, #8]
-	add	x8, sp, #1184           // =1184
-	mov	 x0, x27
-	bl	.LmulPv512x64
-	cmn	 x25, x24
-	ldr	x8, [sp, #1248]
-	ldr	x9, [sp, #1240]
-	ldp	x10, x12, [sp, #48]
-	adcs	x10, x22, x10
-	ldr	x11, [sp, #1232]
-	adcs	x12, x21, x12
-	ldr	x13, [sp, #1224]
-	ldp	x14, x16, [sp, #64]
-	adcs	x14, x23, x14
-	ldr	x15, [sp, #1216]
-	adcs	x16, x19, x16
-	ldr	x17, [sp, #1208]
-	ldp	x18, x1, [sp, #80]
-	adcs	x18, x28, x18
-	ldr	x0, [sp, #1192]
-	ldp	x2, x4, [sp, #24]
-	adcs	x1, x2, x1
-	ldr	x2, [sp, #1184]
-	ldp	x3, x5, [sp, #96]
-	adcs	x3, x4, x3
-	ldr	x4, [sp, #1200]
-	ldr	x6, [sp, #40]           // 8-byte Folded Reload
-	adcs	x5, x6, x5
-	adds	 x19, x10, x2
-	adcs	x10, x12, x0
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x4
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	stp	x9, x10, [sp, #96]
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x27, [sp, #136]         // 8-byte Folded Reload
-	mul	 x1, x19, x27
-	add	x8, sp, #1104           // =1104
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #1168]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1160]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1152]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1144]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x23, [sp, #1136]
-	ldr	x24, [sp, #1128]
-	ldr	x25, [sp, #1120]
-	ldr	x21, [sp, #1112]
-	ldr	x26, [sp, #1104]
-	ldp	x22, x28, [sp, #120]
-	ldr	x1, [x28, #16]
-	add	x8, sp, #1024           // =1024
-	mov	 x0, x22
-	bl	.LmulPv512x64
-	cmn	 x19, x26
-	ldr	x8, [sp, #1088]
-	ldr	x9, [sp, #1080]
-	ldp	x10, x18, [sp, #48]
-	adcs	x10, x10, x21
-	ldr	x11, [sp, #1072]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x25
-	ldr	x13, [sp, #1064]
-	adcs	x14, x14, x24
-	ldr	x15, [sp, #1056]
-	ldr	x16, [sp, #64]          // 8-byte Folded Reload
-	adcs	x16, x16, x23
-	ldr	x17, [sp, #1048]
-	ldp	x0, x2, [sp, #16]
-	adcs	x18, x18, x0
-	ldr	x0, [sp, #1032]
-	ldp	x3, x1, [sp, #96]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #1024]
-	ldp	x4, x6, [sp, #32]
-	adcs	x3, x3, x4
-	ldr	x4, [sp, #1040]
-	ldr	x5, [sp, #88]           // 8-byte Folded Reload
-	adcs	x5, x5, x6
-	adds	 x19, x10, x2
-	adcs	x10, x12, x0
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x4
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	stp	x9, x10, [sp, #96]
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	mul	 x1, x19, x27
-	add	x8, sp, #944            // =944
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #1008]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1000]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #992]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #984]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x23, [sp, #976]
-	ldr	x24, [sp, #968]
-	ldr	x25, [sp, #960]
-	ldr	x21, [sp, #952]
-	ldr	x26, [sp, #944]
-	ldr	x1, [x28, #24]
-	add	x8, sp, #864            // =864
-	mov	 x27, x22
-	mov	 x0, x27
-	bl	.LmulPv512x64
-	cmn	 x19, x26
-	ldr	x8, [sp, #928]
-	ldr	x9, [sp, #920]
-	ldp	x10, x18, [sp, #48]
-	adcs	x10, x10, x21
-	ldr	x11, [sp, #912]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x25
-	ldr	x13, [sp, #904]
-	adcs	x14, x14, x24
-	ldr	x15, [sp, #896]
-	ldr	x16, [sp, #64]          // 8-byte Folded Reload
-	adcs	x16, x16, x23
-	ldr	x17, [sp, #888]
-	ldp	x0, x2, [sp, #16]
-	adcs	x18, x18, x0
-	ldr	x0, [sp, #872]
-	ldp	x3, x1, [sp, #96]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #864]
-	ldp	x4, x6, [sp, #32]
-	adcs	x3, x3, x4
-	ldr	x4, [sp, #880]
-	ldr	x5, [sp, #88]           // 8-byte Folded Reload
-	adcs	x5, x5, x6
-	adds	 x19, x10, x2
-	adcs	x10, x12, x0
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x4
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	stp	x9, x10, [sp, #96]
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x28, [sp, #136]         // 8-byte Folded Reload
-	mul	 x1, x19, x28
-	add	x8, sp, #784            // =784
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #848]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #840]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #832]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #824]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x23, [sp, #816]
-	ldr	x24, [sp, #808]
-	ldr	x25, [sp, #800]
-	ldr	x21, [sp, #792]
-	ldr	x26, [sp, #784]
-	ldr	x22, [sp, #128]         // 8-byte Folded Reload
-	ldr	x1, [x22, #32]
-	add	x8, sp, #704            // =704
-	mov	 x0, x27
-	bl	.LmulPv512x64
-	cmn	 x19, x26
-	ldr	x8, [sp, #768]
-	ldr	x9, [sp, #760]
-	ldp	x10, x18, [sp, #48]
-	adcs	x10, x10, x21
-	ldr	x11, [sp, #752]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x25
-	ldr	x13, [sp, #744]
-	adcs	x14, x14, x24
-	ldr	x15, [sp, #736]
-	ldr	x16, [sp, #64]          // 8-byte Folded Reload
-	adcs	x16, x16, x23
-	ldr	x17, [sp, #728]
-	ldp	x0, x2, [sp, #16]
-	adcs	x18, x18, x0
-	ldr	x0, [sp, #712]
-	ldp	x3, x1, [sp, #96]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #704]
-	ldp	x4, x6, [sp, #32]
-	adcs	x3, x3, x4
-	ldr	x4, [sp, #720]
-	ldr	x5, [sp, #88]           // 8-byte Folded Reload
-	adcs	x5, x5, x6
-	adds	 x19, x10, x2
-	adcs	x10, x12, x0
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x4
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	stp	x9, x10, [sp, #96]
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	mul	 x1, x19, x28
-	add	x8, sp, #624            // =624
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #688]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #680]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #672]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #664]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x23, [sp, #656]
-	ldr	x24, [sp, #648]
-	ldr	x25, [sp, #640]
-	ldr	x21, [sp, #632]
-	ldr	x26, [sp, #624]
-	mov	 x27, x22
-	ldr	x1, [x27, #40]
-	add	x8, sp, #544            // =544
-	ldr	x28, [sp, #120]         // 8-byte Folded Reload
-	mov	 x0, x28
-	bl	.LmulPv512x64
-	cmn	 x19, x26
-	ldr	x8, [sp, #608]
-	ldr	x9, [sp, #600]
-	ldp	x10, x18, [sp, #48]
-	adcs	x10, x10, x21
-	ldr	x11, [sp, #592]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x25
-	ldr	x13, [sp, #584]
-	adcs	x14, x14, x24
-	ldr	x15, [sp, #576]
-	ldr	x16, [sp, #64]          // 8-byte Folded Reload
-	adcs	x16, x16, x23
-	ldr	x17, [sp, #568]
-	ldp	x0, x2, [sp, #16]
-	adcs	x18, x18, x0
-	ldr	x0, [sp, #552]
-	ldp	x3, x1, [sp, #96]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #544]
-	ldp	x4, x6, [sp, #32]
-	adcs	x3, x3, x4
-	ldr	x4, [sp, #560]
-	ldr	x5, [sp, #88]           // 8-byte Folded Reload
-	adcs	x5, x5, x6
-	adds	 x19, x10, x2
-	adcs	x10, x12, x0
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x4
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	stp	x9, x10, [sp, #96]
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x22, [sp, #136]         // 8-byte Folded Reload
-	mul	 x1, x19, x22
-	add	x8, sp, #464            // =464
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #528]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #520]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #512]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldp	x23, x8, [sp, #496]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldp	x25, x24, [sp, #480]
-	ldp	x26, x21, [sp, #464]
-	ldr	x1, [x27, #48]
-	add	x8, sp, #384            // =384
-	mov	 x0, x28
-	bl	.LmulPv512x64
-	cmn	 x19, x26
-	ldp	x9, x8, [sp, #440]
-	ldp	x10, x18, [sp, #48]
-	adcs	x10, x10, x21
-	ldp	x13, x11, [sp, #424]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x25
-	adcs	x14, x14, x24
-	ldp	x17, x15, [sp, #408]
-	ldr	x16, [sp, #64]          // 8-byte Folded Reload
-	adcs	x16, x16, x23
-	ldp	x0, x2, [sp, #16]
-	adcs	x18, x18, x0
-	ldp	x3, x1, [sp, #96]
+	adcs	x17, x17, x3
 	adcs	x1, x1, x2
-	ldp	x2, x0, [sp, #384]
-	ldp	x4, x6, [sp, #32]
-	adcs	x3, x3, x4
-	ldr	x4, [sp, #400]
-	ldr	x5, [sp, #88]           // 8-byte Folded Reload
-	adcs	x5, x5, x6
-	adds	 x19, x10, x2
-	adcs	x10, x12, x0
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x4
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x17
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x15
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x13
-	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x11
-	adcs	x9, x5, x9
-	stp	x9, x10, [sp, #96]
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	mul	 x1, x19, x22
-	add	x8, sp, #304            // =304
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldp	x27, x8, [sp, #360]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldp	x22, x28, [sp, #344]
-	ldp	x24, x23, [sp, #328]
-	ldp	x21, x25, [sp, #312]
-	ldr	x26, [sp, #304]
-	ldp	x0, x8, [sp, #120]
-	ldr	x1, [x8, #56]
-	add	x8, sp, #224            // =224
-	bl	.LmulPv512x64
-	cmn	 x19, x26
-	ldp	x9, x8, [sp, #280]
-	ldp	x10, x18, [sp, #48]
-	adcs	x10, x10, x21
-	ldp	x13, x11, [sp, #264]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x25
-	adcs	x14, x14, x24
-	ldp	x17, x15, [sp, #248]
-	ldr	x16, [sp, #64]          // 8-byte Folded Reload
+	mul		x15, x8, x24
+	adcs	x2, x4, x5
+	mul		x21, x15, x18
+	adcs	x3, x6, xzr
+	mul		x20, x15, x0
+	cmn		x21, x8
+	mul		x19, x15, x12
+	adcs	x8, x20, x9
+	mul		x7, x15, x11
+	adcs	x9, x19, x10
+	mul		x14, x15, x27
+	adcs	x10, x7, x17
+	mul		x16, x15, x30
+	adcs	x17, x14, x1
+	adcs	x16, x16, x2
+	umulh	x22, x15, x30
+	umulh	x23, x15, x27
+	umulh	x24, x15, x11
+	mov	 x28, x11
+	umulh	x25, x15, x12
+	umulh	x26, x15, x0
+	umulh	x15, x15, x18
+	adcs	x11, x3, xzr
+	adds		x8, x8, x15
+	adcs	x9, x9, x26
+	adcs	x10, x10, x25
+	adcs	x15, x17, x24
 	adcs	x16, x16, x23
-	adcs	x18, x18, x22
-	ldp	x2, x0, [sp, #224]
-	ldp	x3, x1, [sp, #96]
-	adcs	x1, x1, x28
-	adcs	x3, x3, x27
-	ldr	x4, [sp, #240]
-	ldr	x5, [sp, #88]           // 8-byte Folded Reload
-	ldr	x6, [sp, #40]           // 8-byte Folded Reload
-	adcs	x5, x5, x6
-	adds	 x19, x10, x2
-	adcs	x21, x12, x0
-	adcs	x22, x14, x4
-	adcs	x23, x16, x17
-	adcs	x24, x18, x15
-	adcs	x25, x1, x13
-	adcs	x26, x3, x11
-	adcs	x27, x5, x9
-	adcs	x28, x8, xzr
-	ldr	x8, [sp, #136]          // 8-byte Folded Reload
-	mul	 x1, x19, x8
-	add	x8, sp, #144            // =144
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldp	x15, x8, [sp, #200]
-	ldp	x9, x10, [sp, #144]
-	ldp	x11, x12, [sp, #160]
-	cmn	 x19, x9
-	ldp	x13, x9, [sp, #176]
-	adcs	x10, x21, x10
-	ldr	x14, [sp, #192]
-	adcs	x11, x22, x11
-	adcs	x12, x23, x12
-	adcs	x13, x24, x13
-	adcs	x9, x25, x9
-	ldp	x16, x17, [x20, #48]
-	ldp	x18, x0, [x20, #32]
-	ldp	x1, x2, [x20, #16]
-	ldp	 x3, x4, [x20]
-	adcs	x14, x26, x14
-	adcs	x15, x27, x15
-	adcs	x8, x28, x8
-	subs	 x3, x10, x3
-	sbcs	x4, x11, x4
-	sbcs	x1, x12, x1
-	sbcs	x2, x13, x2
-	sbcs	x18, x9, x18
-	sbcs	x0, x14, x0
-	sbcs	x16, x15, x16
-	sbcs	x17, x8, x17
-	cmp	 x17, #0                // =0
-	csel	x10, x10, x3, lt
-	csel	x11, x11, x4, lt
-	csel	x12, x12, x1, lt
-	csel	x13, x13, x2, lt
-	csel	x9, x9, x18, lt
-	csel	x14, x14, x0, lt
-	csel	x15, x15, x16, lt
-	csel	x8, x8, x17, lt
-	ldr	x16, [sp, #112]         // 8-byte Folded Reload
-	stp	 x10, x11, [x16]
-	stp	x12, x13, [x16, #16]
-	stp	x9, x14, [x16, #32]
-	stp	x15, x8, [x16, #48]
-	sub	sp, x29, #80            // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
+	adcs	x17, x11, x22
+	subs		x3, x8, x18
+	sbcs	x2, x9, x0
+	sbcs	x11, x10, x12
+	sbcs	x14, x15, x28
+	sbcs	x18, x16, x27
+	sbcs	x0, x17, x30
+	asr	x1, x0, #63
+	cmp		x1, #0          // =0
+	csel	x10, x10, x11, lt
+	csel	x11, x15, x14, lt
+	ldr	x14, [sp, #96]          // 8-byte Folded Reload
+	csel	x8, x8, x3, lt
+	csel	x9, x9, x2, lt
+	csel	x12, x16, x18, lt
+	csel	x13, x17, x0, lt
+	stp		x8, x9, [x14]
+	stp	x10, x11, [x14, #16]
+	stp	x12, x13, [x14, #32]
+	ldp	x29, x30, [sp, #192]    // 8-byte Folded Reload
+	ldp	x20, x19, [sp, #176]    // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #160]    // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #144]    // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #128]    // 8-byte Folded Reload
+	ldp	x28, x27, [sp, #112]    // 8-byte Folded Reload
+	add	sp, sp, #208            // =208
 	ret
-.Lfunc_end115:
-	.size	mcl_fp_montNF8L, .Lfunc_end115-mcl_fp_montNF8L
+.Lfunc_end44:
+	.size	mcl_fp_montNF6L, .Lfunc_end44-mcl_fp_montNF6L
 
-	.globl	mcl_fp_montRed8L
-	.align	2
-	.type	mcl_fp_montRed8L,@function
-mcl_fp_montRed8L:                       // @mcl_fp_montRed8L
+	.globl	mcl_fp_montRed6L
+	.p2align	2
+	.type	mcl_fp_montRed6L,@function
+mcl_fp_montRed6L:                       // @mcl_fp_montRed6L
 // BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	add	x29, sp, #80            // =80
-	sub	sp, sp, #800            // =800
-	mov	 x20, x2
-	ldur	x9, [x20, #-8]
-	str	x9, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [x20, #48]
-	str	x8, [sp, #144]          // 8-byte Folded Spill
-	ldr	x8, [x20, #56]
-	str	x8, [sp, #152]          // 8-byte Folded Spill
-	ldr	x8, [x20, #32]
-	str	x8, [sp, #120]          // 8-byte Folded Spill
-	ldr	x8, [x20, #40]
-	str	x8, [sp, #128]          // 8-byte Folded Spill
-	ldr	x8, [x20, #16]
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldr	x8, [x20, #24]
-	str	x8, [sp, #112]          // 8-byte Folded Spill
-	ldr	 x8, [x20]
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x8, [x20, #8]
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldr	x8, [x1, #112]
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldr	x8, [x1, #120]
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [x1, #96]
-	str	x8, [sp, #56]           // 8-byte Folded Spill
-	ldr	x8, [x1, #104]
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldr	x8, [x1, #80]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [x1, #88]
-	str	x8, [sp, #48]           // 8-byte Folded Spill
-	ldp	x28, x8, [x1, #64]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldp	x22, x25, [x1, #48]
-	ldp	x24, x19, [x1, #32]
-	ldp	x27, x26, [x1, #16]
-	ldp	 x21, x23, [x1]
-	str	x0, [sp, #136]          // 8-byte Folded Spill
-	mul	 x1, x21, x9
-	sub	x8, x29, #160           // =160
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldp	x9, x8, [x29, #-104]
-	ldp	x11, x10, [x29, #-120]
-	ldp	x16, x12, [x29, #-136]
-	ldp	x13, x14, [x29, #-160]
-	ldur	x15, [x29, #-144]
-	cmn	 x21, x13
-	adcs	x21, x23, x14
-	adcs	x13, x27, x15
-	adcs	x26, x26, x16
-	adcs	x24, x24, x12
-	adcs	x11, x19, x11
-	stp	x11, x13, [sp, #8]
-	adcs	x22, x22, x10
-	adcs	x25, x25, x9
-	adcs	x27, x28, x8
-	ldr	x8, [sp, #24]           // 8-byte Folded Reload
-	adcs	x28, x8, xzr
-	ldp	x19, x8, [sp, #32]
-	adcs	x23, x8, xzr
-	ldr	x8, [sp, #48]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #48]           // 8-byte Folded Spill
-	ldr	x8, [sp, #56]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #56]           // 8-byte Folded Spill
-	ldr	x8, [sp, #64]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	adcs	x8, xzr, xzr
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	sub	x8, x29, #240           // =240
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldp	x9, x8, [x29, #-184]
-	ldp	x11, x10, [x29, #-200]
-	ldp	x16, x12, [x29, #-216]
-	ldp	x13, x14, [x29, #-240]
-	ldur	x15, [x29, #-224]
-	cmn	 x21, x13
-	ldr	x13, [sp, #16]          // 8-byte Folded Reload
-	adcs	x21, x13, x14
-	adcs	x13, x26, x15
-	str	x13, [sp, #24]          // 8-byte Folded Spill
-	adcs	x24, x24, x16
-	ldr	x13, [sp, #8]           // 8-byte Folded Reload
-	adcs	x12, x13, x12
-	str	x12, [sp, #16]          // 8-byte Folded Spill
-	adcs	x22, x22, x11
-	adcs	x25, x25, x10
-	adcs	x27, x27, x9
-	adcs	x28, x28, x8
-	adcs	x23, x23, xzr
-	ldr	x8, [sp, #48]           // 8-byte Folded Reload
-	adcs	x26, x8, xzr
-	ldr	x8, [sp, #56]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #56]           // 8-byte Folded Spill
-	ldr	x8, [sp, #64]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [sp, #40]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #48]           // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	add	x8, sp, #560            // =560
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #624]
-	ldr	x9, [sp, #616]
-	ldr	x10, [sp, #608]
-	ldr	x11, [sp, #600]
-	ldr	x12, [sp, #592]
-	ldr	x13, [sp, #560]
-	ldr	x14, [sp, #568]
-	ldr	x15, [sp, #576]
-	ldr	x16, [sp, #584]
-	cmn	 x21, x13
-	ldr	x13, [sp, #24]          // 8-byte Folded Reload
-	adcs	x21, x13, x14
-	adcs	x13, x24, x15
-	str	x13, [sp, #40]          // 8-byte Folded Spill
-	ldr	x13, [sp, #16]          // 8-byte Folded Reload
-	adcs	x13, x13, x16
-	str	x13, [sp, #24]          // 8-byte Folded Spill
-	adcs	x22, x22, x12
-	adcs	x25, x25, x11
-	adcs	x27, x27, x10
-	adcs	x28, x28, x9
-	adcs	x23, x23, x8
-	adcs	x26, x26, xzr
-	ldr	x8, [sp, #56]           // 8-byte Folded Reload
-	adcs	x24, x8, xzr
-	ldr	x8, [sp, #64]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [sp, #48]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #56]           // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	add	x8, sp, #480            // =480
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldr	x8, [sp, #544]
-	ldr	x9, [sp, #536]
-	ldr	x10, [sp, #528]
-	ldr	x11, [sp, #520]
-	ldr	x12, [sp, #512]
-	ldp	x13, x14, [sp, #480]
-	ldp	x15, x16, [sp, #496]
-	cmn	 x21, x13
-	ldr	x13, [sp, #40]          // 8-byte Folded Reload
-	adcs	x21, x13, x14
-	ldr	x13, [sp, #24]          // 8-byte Folded Reload
-	adcs	x13, x13, x15
-	adcs	x22, x22, x16
-	adcs	x25, x25, x12
-	adcs	x27, x27, x11
-	adcs	x28, x28, x10
-	adcs	x23, x23, x9
-	adcs	x26, x26, x8
-	adcs	x24, x24, xzr
-	ldr	x8, [sp, #64]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [sp, #56]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	stp	x13, x8, [sp, #48]
-	mul	 x1, x21, x19
-	add	x8, sp, #400            // =400
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldp	x9, x8, [sp, #456]
-	ldp	x11, x10, [sp, #440]
-	ldp	x16, x12, [sp, #424]
-	ldp	x13, x14, [sp, #400]
-	ldr	x15, [sp, #416]
-	cmn	 x21, x13
-	ldr	x13, [sp, #48]          // 8-byte Folded Reload
-	adcs	x21, x13, x14
-	adcs	x13, x22, x15
-	str	x13, [sp, #48]          // 8-byte Folded Spill
-	adcs	x25, x25, x16
-	adcs	x27, x27, x12
-	adcs	x28, x28, x11
-	adcs	x23, x23, x10
-	adcs	x26, x26, x9
-	adcs	x24, x24, x8
-	ldr	x8, [sp, #64]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x22, x8, xzr
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [sp, #56]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	add	x8, sp, #320            // =320
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldp	x9, x8, [sp, #376]
-	ldp	x11, x10, [sp, #360]
-	ldp	x16, x12, [sp, #344]
-	ldp	x13, x14, [sp, #320]
-	ldr	x15, [sp, #336]
-	cmn	 x21, x13
-	ldr	x13, [sp, #48]          // 8-byte Folded Reload
-	adcs	x21, x13, x14
-	adcs	x13, x25, x15
-	adcs	x27, x27, x16
-	adcs	x28, x28, x12
-	adcs	x23, x23, x11
-	adcs	x26, x26, x10
-	adcs	x24, x24, x9
-	ldr	x9, [sp, #64]           // 8-byte Folded Reload
-	adcs	x8, x9, x8
-	stp	x13, x8, [sp, #56]
-	adcs	x22, x22, xzr
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x25, x8, xzr
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	add	x8, sp, #240            // =240
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldp	x9, x8, [sp, #296]
-	ldp	x11, x10, [sp, #280]
-	ldp	x16, x12, [sp, #264]
-	ldp	x13, x14, [sp, #240]
-	ldr	x15, [sp, #256]
-	cmn	 x21, x13
-	ldr	x13, [sp, #56]          // 8-byte Folded Reload
-	adcs	x21, x13, x14
-	adcs	x13, x27, x15
-	adcs	x28, x28, x16
-	adcs	x23, x23, x12
-	adcs	x26, x26, x11
-	adcs	x24, x24, x10
-	ldr	x10, [sp, #64]          // 8-byte Folded Reload
-	adcs	x9, x10, x9
-	stp	x9, x13, [sp, #64]
-	adcs	x22, x22, x8
-	adcs	x25, x25, xzr
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x27, x8, xzr
-	mul	 x1, x21, x19
-	add	x8, sp, #160            // =160
-	mov	 x0, x20
-	bl	.LmulPv512x64
-	ldp	x9, x8, [sp, #216]
-	ldp	x11, x10, [sp, #200]
-	ldp	x16, x12, [sp, #184]
-	ldp	x13, x14, [sp, #160]
-	ldr	x15, [sp, #176]
-	cmn	 x21, x13
-	ldr	x13, [sp, #72]          // 8-byte Folded Reload
-	adcs	x13, x13, x14
-	adcs	x14, x28, x15
-	adcs	x15, x23, x16
-	adcs	x12, x26, x12
-	adcs	x11, x24, x11
-	ldr	x16, [sp, #64]          // 8-byte Folded Reload
-	adcs	x10, x16, x10
-	adcs	x9, x22, x9
-	adcs	x8, x25, x8
-	adcs	x16, x27, xzr
-	ldp	x17, x18, [sp, #88]
-	subs	 x17, x13, x17
-	sbcs	x18, x14, x18
-	ldp	x0, x1, [sp, #104]
-	sbcs	x0, x15, x0
-	sbcs	x1, x12, x1
-	ldp	x2, x3, [sp, #120]
-	sbcs	x2, x11, x2
-	sbcs	x3, x10, x3
-	ldp	x4, x5, [sp, #144]
-	sbcs	x4, x9, x4
-	sbcs	x5, x8, x5
-	sbcs	x16, x16, xzr
-	tst	 x16, #0x1
-	csel	x13, x13, x17, ne
-	csel	x14, x14, x18, ne
-	csel	x15, x15, x0, ne
-	csel	x12, x12, x1, ne
-	csel	x11, x11, x2, ne
-	csel	x10, x10, x3, ne
-	csel	x9, x9, x4, ne
-	csel	x8, x8, x5, ne
-	ldr	x16, [sp, #136]         // 8-byte Folded Reload
-	stp	 x13, x14, [x16]
-	stp	x15, x12, [x16, #16]
-	stp	x11, x10, [x16, #32]
-	stp	x9, x8, [x16, #48]
-	sub	sp, x29, #80            // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
+	str	x27, [sp, #-80]!        // 8-byte Folded Spill
+	ldp	x12, x14, [x2, #-8]
+	ldp		x3, x4, [x1]
+	ldr	x13, [x2, #8]
+	ldp	x11, x10, [x2, #16]
+	stp	x20, x19, [sp, #64]     // 8-byte Folded Spill
+	ldp	x9, x8, [x2, #32]
+	mul		x20, x3, x12
+	stp	x26, x25, [sp, #16]     // 8-byte Folded Spill
+	mul		x26, x20, x13
+	umulh	x27, x20, x14
+	mul		x25, x20, x11
+	adds		x26, x27, x26
+	umulh	x27, x20, x13
+	stp	x24, x23, [sp, #32]     // 8-byte Folded Spill
+	mul		x24, x20, x10
+	adcs	x25, x27, x25
+	umulh	x27, x20, x11
+	mul		x23, x20, x9
+	adcs	x24, x27, x24
+	umulh	x27, x20, x10
+	stp	x22, x21, [sp, #48]     // 8-byte Folded Spill
+	mul		x22, x20, x8
+	adcs	x23, x27, x23
+	umulh	x27, x20, x9
+	ldp	x2, x18, [x1, #16]
+	umulh	x21, x20, x8
+	adcs	x22, x27, x22
+	mul		x20, x20, x14
+	adcs	x21, x21, xzr
+	ldp	x16, x15, [x1, #32]
+	cmn		x20, x3
+	adcs	x3, x26, x4
+	ldp	x17, x5, [x1, #48]
+	adcs	x2, x25, x2
+	adcs	x18, x24, x18
+	adcs	x16, x23, x16
+	adcs	x15, x22, x15
+	mul		x4, x3, x12
+	adcs	x17, x21, x17
+	umulh	x23, x4, x14
+	mul		x22, x4, x13
+	adcs	x21, xzr, xzr
+	umulh	x27, x4, x13
+	adds		x22, x22, x23
+	mul		x23, x4, x11
+	umulh	x26, x4, x11
+	adcs	x23, x23, x27
+	mul		x27, x4, x10
+	umulh	x25, x4, x10
+	adcs	x26, x27, x26
+	mul		x27, x4, x9
+	umulh	x24, x4, x9
+	adcs	x25, x27, x25
+	mul		x27, x4, x8
+	umulh	x20, x4, x8
+	adcs	x24, x27, x24
+	mul		x4, x4, x14
+	adcs	x20, x21, x20
+	cmn		x4, x3
+	adcs	x2, x22, x2
+	adcs	x18, x23, x18
+	adcs	x16, x26, x16
+	adcs	x15, x25, x15
+	adcs	x17, x24, x17
+	mul		x3, x2, x12
+	adcs	x5, x20, x5
+	umulh	x25, x3, x14
+	mul		x24, x3, x13
+	adcs	x20, xzr, xzr
+	umulh	x27, x3, x13
+	adds		x24, x24, x25
+	mul		x25, x3, x11
+	umulh	x26, x3, x11
+	adcs	x25, x25, x27
+	mul		x27, x3, x10
+	umulh	x23, x3, x10
+	adcs	x26, x27, x26
+	mul		x27, x3, x9
+	umulh	x21, x3, x9
+	mul		x22, x3, x8
+	adcs	x23, x27, x23
+	umulh	x4, x3, x8
+	adcs	x21, x22, x21
+	mul		x3, x3, x14
+	adcs	x4, x20, x4
+	cmn		x3, x2
+	adcs	x18, x24, x18
+	ldp	x6, x7, [x1, #64]
+	adcs	x16, x25, x16
+	adcs	x15, x26, x15
+	adcs	x17, x23, x17
+	adcs	x5, x21, x5
+	mul		x2, x18, x12
+	adcs	x4, x4, x6
+	umulh	x23, x2, x14
+	mul		x21, x2, x13
+	adcs	x6, xzr, xzr
+	umulh	x27, x2, x13
+	adds		x21, x21, x23
+	mul		x23, x2, x11
+	umulh	x26, x2, x11
+	adcs	x23, x23, x27
+	mul		x27, x2, x10
+	umulh	x24, x2, x10
+	mul		x25, x2, x9
+	adcs	x26, x27, x26
+	umulh	x20, x2, x9
+	mul		x22, x2, x8
+	adcs	x24, x25, x24
+	umulh	x3, x2, x8
+	adcs	x20, x22, x20
+	mul		x2, x2, x14
+	adcs	x3, x6, x3
+	cmn		x2, x18
+	adcs	x16, x21, x16
+	adcs	x15, x23, x15
+	adcs	x17, x26, x17
+	adcs	x5, x24, x5
+	adcs	x4, x20, x4
+	mul		x18, x16, x12
+	adcs	x3, x3, x7
+	umulh	x24, x18, x14
+	mul		x20, x18, x13
+	adcs	x7, xzr, xzr
+	umulh	x27, x18, x13
+	adds		x20, x20, x24
+	mul		x24, x18, x11
+	umulh	x25, x18, x11
+	mul		x26, x18, x10
+	adcs	x24, x24, x27
+	umulh	x22, x18, x10
+	mul		x23, x18, x9
+	adcs	x25, x26, x25
+	umulh	x6, x18, x9
+	mul		x21, x18, x8
+	adcs	x22, x23, x22
+	umulh	x2, x18, x8
+	adcs	x6, x21, x6
+	mul		x18, x18, x14
+	adcs	x2, x7, x2
+	cmn		x18, x16
+	adcs	x15, x20, x15
+	ldp	x19, x1, [x1, #80]
+	adcs	x16, x24, x17
+	adcs	x18, x25, x5
+	adcs	x4, x22, x4
+	adcs	x3, x6, x3
+	mul		x12, x15, x12
+	adcs	x2, x2, x19
+	umulh	x27, x12, x14
+	mul		x22, x12, x13
+	adcs	x6, xzr, xzr
+	umulh	x25, x12, x13
+	mul		x26, x12, x11
+	adds		x19, x22, x27
+	umulh	x23, x12, x11
+	mul		x24, x12, x10
+	adcs	x22, x26, x25
+	umulh	x20, x12, x10
+	mul		x21, x12, x9
+	adcs	x23, x24, x23
+	umulh	x5, x12, x9
+	mul		x7, x12, x8
+	adcs	x20, x21, x20
+	umulh	x17, x12, x8
+	adcs	x5, x7, x5
+	mul		x12, x12, x14
+	adcs	x17, x6, x17
+	cmn		x12, x15
+	adcs	x12, x19, x16
+	adcs	x15, x22, x18
+	adcs	x16, x23, x4
+	adcs	x18, x20, x3
+	adcs	x2, x5, x2
+	adcs	x17, x17, x1
+	subs		x14, x12, x14
+	sbcs	x13, x15, x13
+	sbcs	x11, x16, x11
+	sbcs	x10, x18, x10
+	sbcs	x9, x2, x9
+	sbcs	x8, x17, x8
+	ngcs	 x1, xzr
+	ldp	x20, x19, [sp, #64]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #48]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #32]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #16]     // 8-byte Folded Reload
+	tst	 x1, #0x1
+	csel	x12, x12, x14, ne
+	csel	x13, x15, x13, ne
+	csel	x11, x16, x11, ne
+	csel	x10, x18, x10, ne
+	csel	x9, x2, x9, ne
+	csel	x8, x17, x8, ne
+	stp		x12, x13, [x0]
+	stp	x11, x10, [x0, #16]
+	stp	x9, x8, [x0, #32]
+	ldr	x27, [sp], #80          // 8-byte Folded Reload
 	ret
-.Lfunc_end116:
-	.size	mcl_fp_montRed8L, .Lfunc_end116-mcl_fp_montRed8L
+.Lfunc_end45:
+	.size	mcl_fp_montRed6L, .Lfunc_end45-mcl_fp_montRed6L
 
-	.globl	mcl_fp_addPre8L
-	.align	2
-	.type	mcl_fp_addPre8L,@function
-mcl_fp_addPre8L:                        // @mcl_fp_addPre8L
+	.globl	mcl_fp_montRedNF6L
+	.p2align	2
+	.type	mcl_fp_montRedNF6L,@function
+mcl_fp_montRedNF6L:                     // @mcl_fp_montRedNF6L
 // BB#0:
-	ldp	x8, x9, [x2, #48]
-	ldp	x10, x11, [x1, #48]
-	ldp	x12, x13, [x2, #32]
-	ldp	x14, x15, [x1, #32]
-	ldp	x16, x17, [x2, #16]
-	ldp	 x18, x2, [x2]
-	ldp	 x3, x4, [x1]
-	ldp	x5, x1, [x1, #16]
-	adds	 x18, x18, x3
-	str	 x18, [x0]
-	adcs	x18, x2, x4
-	adcs	x16, x16, x5
-	stp	x18, x16, [x0, #8]
-	adcs	x16, x17, x1
-	adcs	x12, x12, x14
-	stp	x16, x12, [x0, #24]
-	adcs	x12, x13, x15
-	adcs	x8, x8, x10
-	stp	x12, x8, [x0, #40]
+	str	x27, [sp, #-80]!        // 8-byte Folded Spill
+	ldp	x12, x14, [x2, #-8]
+	ldp		x3, x4, [x1]
+	ldr	x13, [x2, #8]
+	ldp	x11, x10, [x2, #16]
+	stp	x20, x19, [sp, #64]     // 8-byte Folded Spill
+	ldp	x9, x8, [x2, #32]
+	mul		x20, x3, x12
+	stp	x26, x25, [sp, #16]     // 8-byte Folded Spill
+	mul		x26, x20, x13
+	umulh	x27, x20, x14
+	mul		x25, x20, x11
+	adds		x26, x27, x26
+	umulh	x27, x20, x13
+	stp	x24, x23, [sp, #32]     // 8-byte Folded Spill
+	mul		x24, x20, x10
+	adcs	x25, x27, x25
+	umulh	x27, x20, x11
+	mul		x23, x20, x9
+	adcs	x24, x27, x24
+	umulh	x27, x20, x10
+	stp	x22, x21, [sp, #48]     // 8-byte Folded Spill
+	mul		x22, x20, x8
+	adcs	x23, x27, x23
+	umulh	x27, x20, x9
+	ldp	x2, x18, [x1, #16]
+	umulh	x21, x20, x8
+	adcs	x22, x27, x22
+	mul		x20, x20, x14
+	adcs	x21, x21, xzr
+	ldp	x16, x15, [x1, #32]
+	cmn		x20, x3
+	adcs	x3, x26, x4
+	ldp	x17, x5, [x1, #48]
+	adcs	x2, x25, x2
+	adcs	x18, x24, x18
+	adcs	x16, x23, x16
+	adcs	x15, x22, x15
+	mul		x4, x3, x12
+	adcs	x17, x21, x17
+	umulh	x23, x4, x14
+	mul		x22, x4, x13
+	adcs	x21, xzr, xzr
+	umulh	x27, x4, x13
+	adds		x22, x22, x23
+	mul		x23, x4, x11
+	umulh	x26, x4, x11
+	adcs	x23, x23, x27
+	mul		x27, x4, x10
+	umulh	x25, x4, x10
+	adcs	x26, x27, x26
+	mul		x27, x4, x9
+	umulh	x24, x4, x9
+	adcs	x25, x27, x25
+	mul		x27, x4, x8
+	umulh	x20, x4, x8
+	adcs	x24, x27, x24
+	mul		x4, x4, x14
+	adcs	x20, x21, x20
+	cmn		x4, x3
+	adcs	x2, x22, x2
+	adcs	x18, x23, x18
+	adcs	x16, x26, x16
+	adcs	x15, x25, x15
+	adcs	x17, x24, x17
+	mul		x3, x2, x12
+	adcs	x5, x20, x5
+	umulh	x25, x3, x14
+	mul		x24, x3, x13
+	adcs	x20, xzr, xzr
+	umulh	x27, x3, x13
+	adds		x24, x24, x25
+	mul		x25, x3, x11
+	umulh	x26, x3, x11
+	adcs	x25, x25, x27
+	mul		x27, x3, x10
+	umulh	x23, x3, x10
+	adcs	x26, x27, x26
+	mul		x27, x3, x9
+	umulh	x21, x3, x9
+	mul		x22, x3, x8
+	adcs	x23, x27, x23
+	umulh	x4, x3, x8
+	adcs	x21, x22, x21
+	mul		x3, x3, x14
+	adcs	x4, x20, x4
+	cmn		x3, x2
+	adcs	x18, x24, x18
+	ldp	x6, x7, [x1, #64]
+	adcs	x16, x25, x16
+	adcs	x15, x26, x15
+	adcs	x17, x23, x17
+	adcs	x5, x21, x5
+	mul		x2, x18, x12
+	adcs	x4, x4, x6
+	umulh	x23, x2, x14
+	mul		x21, x2, x13
+	adcs	x6, xzr, xzr
+	umulh	x27, x2, x13
+	adds		x21, x21, x23
+	mul		x23, x2, x11
+	umulh	x26, x2, x11
+	adcs	x23, x23, x27
+	mul		x27, x2, x10
+	umulh	x24, x2, x10
+	mul		x25, x2, x9
+	adcs	x26, x27, x26
+	umulh	x20, x2, x9
+	mul		x22, x2, x8
+	adcs	x24, x25, x24
+	umulh	x3, x2, x8
+	adcs	x20, x22, x20
+	mul		x2, x2, x14
+	adcs	x3, x6, x3
+	cmn		x2, x18
+	adcs	x16, x21, x16
+	adcs	x15, x23, x15
+	adcs	x17, x26, x17
+	adcs	x5, x24, x5
+	adcs	x4, x20, x4
+	mul		x18, x16, x12
+	adcs	x3, x3, x7
+	umulh	x24, x18, x14
+	mul		x20, x18, x13
+	adcs	x7, xzr, xzr
+	umulh	x27, x18, x13
+	adds		x20, x20, x24
+	mul		x24, x18, x11
+	umulh	x25, x18, x11
+	mul		x26, x18, x10
+	adcs	x24, x24, x27
+	umulh	x22, x18, x10
+	mul		x23, x18, x9
+	adcs	x25, x26, x25
+	umulh	x6, x18, x9
+	mul		x21, x18, x8
+	adcs	x22, x23, x22
+	umulh	x2, x18, x8
+	adcs	x6, x21, x6
+	mul		x18, x18, x14
+	adcs	x2, x7, x2
+	cmn		x18, x16
+	adcs	x15, x20, x15
+	ldp	x19, x1, [x1, #80]
+	adcs	x16, x24, x17
+	adcs	x18, x25, x5
+	adcs	x4, x22, x4
+	adcs	x3, x6, x3
+	mul		x12, x15, x12
+	adcs	x2, x2, x19
+	umulh	x27, x12, x14
+	mul		x22, x12, x13
+	adcs	x6, xzr, xzr
+	umulh	x25, x12, x13
+	mul		x26, x12, x11
+	adds		x19, x22, x27
+	umulh	x23, x12, x11
+	mul		x24, x12, x10
+	adcs	x22, x26, x25
+	umulh	x20, x12, x10
+	mul		x21, x12, x9
+	adcs	x23, x24, x23
+	umulh	x5, x12, x9
+	mul		x7, x12, x8
+	adcs	x20, x21, x20
+	umulh	x17, x12, x8
+	adcs	x5, x7, x5
+	mul		x12, x12, x14
+	adcs	x17, x6, x17
+	cmn		x12, x15
+	adcs	x12, x19, x16
+	adcs	x15, x22, x18
+	adcs	x16, x23, x4
+	adcs	x18, x20, x3
+	adcs	x2, x5, x2
+	adcs	x17, x17, x1
+	subs		x14, x12, x14
+	sbcs	x13, x15, x13
+	sbcs	x11, x16, x11
+	sbcs	x10, x18, x10
+	sbcs	x9, x2, x9
+	sbcs	x8, x17, x8
+	asr	x1, x8, #63
+	ldp	x20, x19, [sp, #64]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #48]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #32]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #16]     // 8-byte Folded Reload
+	cmp		x1, #0          // =0
+	csel	x12, x12, x14, lt
+	csel	x13, x15, x13, lt
+	csel	x11, x16, x11, lt
+	csel	x10, x18, x10, lt
+	csel	x9, x2, x9, lt
+	csel	x8, x17, x8, lt
+	stp		x12, x13, [x0]
+	stp	x11, x10, [x0, #16]
+	stp	x9, x8, [x0, #32]
+	ldr	x27, [sp], #80          // 8-byte Folded Reload
+	ret
+.Lfunc_end46:
+	.size	mcl_fp_montRedNF6L, .Lfunc_end46-mcl_fp_montRedNF6L
+
+	.globl	mcl_fp_addPre6L
+	.p2align	2
+	.type	mcl_fp_addPre6L,@function
+mcl_fp_addPre6L:                        // @mcl_fp_addPre6L
+// BB#0:
+	ldp		x14, x15, [x2]
+	ldp		x16, x17, [x1]
+	ldp	x10, x11, [x1, #32]
+	ldp	x12, x13, [x2, #16]
+	ldp	x18, x1, [x1, #16]
+	ldp	x8, x9, [x2, #32]
+	adds		x14, x14, x16
+	adcs	x15, x15, x17
+	adcs	x12, x12, x18
+	adcs	x13, x13, x1
+	adcs	x10, x8, x10
 	adcs	x9, x9, x11
 	adcs	x8, xzr, xzr
-	str	x9, [x0, #56]
+	stp		x14, x15, [x0]
+	stp	x12, x13, [x0, #16]
+	stp	x10, x9, [x0, #32]
 	mov	 x0, x8
 	ret
-.Lfunc_end117:
-	.size	mcl_fp_addPre8L, .Lfunc_end117-mcl_fp_addPre8L
+.Lfunc_end47:
+	.size	mcl_fp_addPre6L, .Lfunc_end47-mcl_fp_addPre6L
 
-	.globl	mcl_fp_subPre8L
-	.align	2
-	.type	mcl_fp_subPre8L,@function
-mcl_fp_subPre8L:                        // @mcl_fp_subPre8L
+	.globl	mcl_fp_subPre6L
+	.p2align	2
+	.type	mcl_fp_subPre6L,@function
+mcl_fp_subPre6L:                        // @mcl_fp_subPre6L
 // BB#0:
-	ldp	x8, x9, [x2, #48]
-	ldp	x10, x11, [x1, #48]
-	ldp	x12, x13, [x2, #32]
-	ldp	x14, x15, [x1, #32]
-	ldp	x16, x17, [x2, #16]
-	ldp	 x18, x2, [x2]
-	ldp	 x3, x4, [x1]
-	ldp	x5, x1, [x1, #16]
-	subs	 x18, x3, x18
-	str	 x18, [x0]
-	sbcs	x18, x4, x2
-	sbcs	x16, x5, x16
-	stp	x18, x16, [x0, #8]
-	sbcs	x16, x1, x17
-	sbcs	x12, x14, x12
-	stp	x16, x12, [x0, #24]
-	sbcs	x12, x15, x13
-	sbcs	x8, x10, x8
-	stp	x12, x8, [x0, #40]
+	ldp		x14, x15, [x2]
+	ldp		x16, x17, [x1]
+	ldp	x10, x11, [x1, #32]
+	ldp	x12, x13, [x2, #16]
+	ldp	x18, x1, [x1, #16]
+	ldp	x8, x9, [x2, #32]
+	subs		x14, x16, x14
+	sbcs	x15, x17, x15
+	sbcs	x12, x18, x12
+	sbcs	x13, x1, x13
+	sbcs	x10, x10, x8
 	sbcs	x9, x11, x9
 	ngcs	 x8, xzr
 	and	x8, x8, #0x1
-	str	x9, [x0, #56]
+	stp		x14, x15, [x0]
+	stp	x12, x13, [x0, #16]
+	stp	x10, x9, [x0, #32]
 	mov	 x0, x8
 	ret
-.Lfunc_end118:
-	.size	mcl_fp_subPre8L, .Lfunc_end118-mcl_fp_subPre8L
+.Lfunc_end48:
+	.size	mcl_fp_subPre6L, .Lfunc_end48-mcl_fp_subPre6L
 
-	.globl	mcl_fp_shr1_8L
-	.align	2
-	.type	mcl_fp_shr1_8L,@function
-mcl_fp_shr1_8L:                         // @mcl_fp_shr1_8L
+	.globl	mcl_fp_shr1_6L
+	.p2align	2
+	.type	mcl_fp_shr1_6L,@function
+mcl_fp_shr1_6L:                         // @mcl_fp_shr1_6L
 // BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	x10, x11, [x1, #48]
-	ldp	x12, x13, [x1, #16]
-	ldp	x14, x15, [x1, #32]
+	ldp		x8, x9, [x1]
+	ldp	x10, x11, [x1, #16]
+	ldp	x12, x13, [x1, #32]
 	extr	x8, x9, x8, #1
-	extr	x9, x12, x9, #1
-	extr	x12, x13, x12, #1
-	extr	x13, x14, x13, #1
-	extr	x14, x15, x14, #1
-	extr	x15, x10, x15, #1
+	extr	x9, x10, x9, #1
 	extr	x10, x11, x10, #1
-	lsr	x11, x11, #1
-	stp	 x8, x9, [x0]
-	stp	x12, x13, [x0, #16]
-	stp	x14, x15, [x0, #32]
-	stp	x10, x11, [x0, #48]
+	extr	x11, x12, x11, #1
+	extr	x12, x13, x12, #1
+	lsr	x13, x13, #1
+	stp		x8, x9, [x0]
+	stp	x10, x11, [x0, #16]
+	stp	x12, x13, [x0, #32]
 	ret
-.Lfunc_end119:
-	.size	mcl_fp_shr1_8L, .Lfunc_end119-mcl_fp_shr1_8L
+.Lfunc_end49:
+	.size	mcl_fp_shr1_6L, .Lfunc_end49-mcl_fp_shr1_6L
 
-	.globl	mcl_fp_add8L
-	.align	2
-	.type	mcl_fp_add8L,@function
-mcl_fp_add8L:                           // @mcl_fp_add8L
+	.globl	mcl_fp_add6L
+	.p2align	2
+	.type	mcl_fp_add6L,@function
+mcl_fp_add6L:                           // @mcl_fp_add6L
 // BB#0:
-	stp	x22, x21, [sp, #-32]!
-	stp	x20, x19, [sp, #16]
-	ldp	x8, x9, [x2, #48]
-	ldp	x10, x11, [x1, #48]
-	ldp	x12, x13, [x2, #32]
-	ldp	x14, x15, [x1, #32]
-	ldp	x16, x17, [x2, #16]
-	ldp	 x18, x2, [x2]
-	ldp	 x4, x5, [x1]
-	ldp	x6, x1, [x1, #16]
-	adds	 x18, x18, x4
-	adcs	x2, x2, x5
-	ldp	x4, x5, [x3, #48]
-	adcs	x16, x16, x6
-	adcs	x17, x17, x1
-	ldp	x1, x6, [x3, #32]
-	adcs	x7, x12, x14
-	adcs	x19, x13, x15
-	ldp	 x12, x13, [x3]
-	stp	 x18, x2, [x0]
-	stp	x16, x17, [x0, #16]
-	stp	x7, x19, [x0, #32]
-	adcs	x8, x8, x10
-	adcs	x20, x9, x11
-	stp	x8, x20, [x0, #48]
-	adcs	x21, xzr, xzr
-	ldp	x9, x10, [x3, #16]
-	subs	 x15, x18, x12
-	sbcs	x14, x2, x13
-	sbcs	x13, x16, x9
-	sbcs	x12, x17, x10
-	sbcs	x11, x7, x1
-	sbcs	x10, x19, x6
-	sbcs	x9, x8, x4
-	sbcs	x8, x20, x5
-	sbcs	x16, x21, xzr
-	and	w16, w16, #0x1
-	tbnz	w16, #0, .LBB120_2
+	ldp		x14, x15, [x2]
+	ldp		x16, x17, [x1]
+	ldp	x10, x11, [x1, #32]
+	ldp	x12, x13, [x2, #16]
+	ldp	x18, x1, [x1, #16]
+	ldp	x8, x9, [x2, #32]
+	adds		x14, x14, x16
+	adcs	x15, x15, x17
+	adcs	x18, x12, x18
+	adcs	x1, x13, x1
+	adcs	x5, x8, x10
+	ldp		x8, x10, [x3]
+	ldp	x16, x17, [x3, #32]
+	ldp	x2, x4, [x3, #16]
+	adcs	x3, x9, x11
+	adcs	x6, xzr, xzr
+	subs		x13, x14, x8
+	sbcs	x12, x15, x10
+	sbcs	x11, x18, x2
+	sbcs	x10, x1, x4
+	sbcs	x9, x5, x16
+	sbcs	x8, x3, x17
+	stp		x14, x15, [x0]
+	sbcs	x14, x6, xzr
+	stp	x18, x1, [x0, #16]
+	stp	x5, x3, [x0, #32]
+	tbnz	w14, #0, .LBB50_2
 // BB#1:                                // %nocarry
-	stp	 x15, x14, [x0]
-	stp	x13, x12, [x0, #16]
-	stp	x11, x10, [x0, #32]
-	stp	x9, x8, [x0, #48]
-.LBB120_2:                              // %carry
-	ldp	x20, x19, [sp, #16]
-	ldp	x22, x21, [sp], #32
+	stp		x13, x12, [x0]
+	stp	x11, x10, [x0, #16]
+	stp	x9, x8, [x0, #32]
+.LBB50_2:                               // %carry
 	ret
-.Lfunc_end120:
-	.size	mcl_fp_add8L, .Lfunc_end120-mcl_fp_add8L
+.Lfunc_end50:
+	.size	mcl_fp_add6L, .Lfunc_end50-mcl_fp_add6L
 
-	.globl	mcl_fp_addNF8L
-	.align	2
-	.type	mcl_fp_addNF8L,@function
-mcl_fp_addNF8L:                         // @mcl_fp_addNF8L
+	.globl	mcl_fp_addNF6L
+	.p2align	2
+	.type	mcl_fp_addNF6L,@function
+mcl_fp_addNF6L:                         // @mcl_fp_addNF6L
 // BB#0:
-	ldp	x8, x9, [x1, #48]
-	ldp	x10, x11, [x2, #48]
-	ldp	x12, x13, [x1, #32]
-	ldp	x14, x15, [x2, #32]
-	ldp	x16, x17, [x1, #16]
-	ldp	 x18, x1, [x1]
-	ldp	 x4, x5, [x2]
-	ldp	x6, x2, [x2, #16]
-	adds	 x18, x4, x18
-	adcs	x1, x5, x1
-	ldp	x4, x5, [x3, #48]
-	adcs	x16, x6, x16
-	adcs	x17, x2, x17
-	ldp	x2, x6, [x3, #32]
-	adcs	x12, x14, x12
-	adcs	x13, x15, x13
-	ldp	 x14, x15, [x3]
+	ldp		x14, x15, [x1]
+	ldp		x16, x17, [x2]
+	ldp	x8, x9, [x1, #32]
+	ldp	x12, x13, [x1, #16]
+	ldp	x18, x1, [x2, #16]
+	adds		x14, x16, x14
+	ldp	x10, x11, [x2, #32]
+	adcs	x15, x17, x15
+	adcs	x12, x18, x12
+	adcs	x13, x1, x13
+	ldp		x18, x1, [x3]
 	adcs	x8, x10, x8
-	ldp	x10, x3, [x3, #16]
+	ldp	x10, x2, [x3, #16]
 	adcs	x9, x11, x9
-	subs	 x11, x18, x14
-	sbcs	x14, x1, x15
-	sbcs	x10, x16, x10
-	sbcs	x15, x17, x3
-	sbcs	x2, x12, x2
-	sbcs	x3, x13, x6
-	sbcs	x4, x8, x4
-	sbcs	x5, x9, x5
-	cmp	 x5, #0                 // =0
-	csel	x11, x18, x11, lt
-	csel	x14, x1, x14, lt
-	csel	x10, x16, x10, lt
-	csel	x15, x17, x15, lt
-	csel	x12, x12, x2, lt
-	csel	x13, x13, x3, lt
-	csel	x8, x8, x4, lt
-	csel	x9, x9, x5, lt
-	stp	 x11, x14, [x0]
-	stp	x10, x15, [x0, #16]
-	stp	x12, x13, [x0, #32]
-	stp	x8, x9, [x0, #48]
+	ldp	x16, x17, [x3, #32]
+	subs		x11, x14, x18
+	sbcs	x18, x15, x1
+	sbcs	x10, x12, x10
+	sbcs	x1, x13, x2
+	sbcs	x16, x8, x16
+	sbcs	x17, x9, x17
+	asr	x2, x17, #63
+	cmp		x2, #0          // =0
+	csel	x11, x14, x11, lt
+	csel	x14, x15, x18, lt
+	csel	x10, x12, x10, lt
+	csel	x12, x13, x1, lt
+	csel	x8, x8, x16, lt
+	csel	x9, x9, x17, lt
+	stp		x11, x14, [x0]
+	stp	x10, x12, [x0, #16]
+	stp	x8, x9, [x0, #32]
 	ret
-.Lfunc_end121:
-	.size	mcl_fp_addNF8L, .Lfunc_end121-mcl_fp_addNF8L
+.Lfunc_end51:
+	.size	mcl_fp_addNF6L, .Lfunc_end51-mcl_fp_addNF6L
 
-	.globl	mcl_fp_sub8L
-	.align	2
-	.type	mcl_fp_sub8L,@function
-mcl_fp_sub8L:                           // @mcl_fp_sub8L
+	.globl	mcl_fp_sub6L
+	.p2align	2
+	.type	mcl_fp_sub6L,@function
+mcl_fp_sub6L:                           // @mcl_fp_sub6L
 // BB#0:
-	ldp	x14, x15, [x2, #48]
-	ldp	x16, x17, [x1, #48]
-	ldp	x12, x13, [x2, #32]
-	ldp	x18, x4, [x1, #32]
+	ldp		x8, x9, [x2]
+	ldp		x16, x17, [x1]
+	ldp	x14, x15, [x1, #32]
 	ldp	x10, x11, [x2, #16]
-	ldp	 x8, x9, [x2]
-	ldp	 x2, x5, [x1]
-	ldp	x6, x1, [x1, #16]
-	subs	 x8, x2, x8
-	sbcs	x9, x5, x9
-	stp	 x8, x9, [x0]
-	sbcs	x10, x6, x10
+	ldp	x18, x1, [x1, #16]
+	ldp	x12, x13, [x2, #32]
+	subs		x8, x16, x8
+	sbcs	x9, x17, x9
+	sbcs	x10, x18, x10
 	sbcs	x11, x1, x11
+	sbcs	x12, x14, x12
+	sbcs	x13, x15, x13
+	ngcs	 x14, xzr
+	stp		x8, x9, [x0]
 	stp	x10, x11, [x0, #16]
-	sbcs	x12, x18, x12
-	sbcs	x13, x4, x13
 	stp	x12, x13, [x0, #32]
-	sbcs	x14, x16, x14
-	sbcs	x15, x17, x15
-	stp	x14, x15, [x0, #48]
-	ngcs	 x16, xzr
-	and	w16, w16, #0x1
-	tbnz	w16, #0, .LBB122_2
+	tbnz	w14, #0, .LBB52_2
 // BB#1:                                // %nocarry
 	ret
-.LBB122_2:                              // %carry
-	ldp	x16, x17, [x3, #48]
-	ldp	 x18, x1, [x3]
-	ldp	x2, x4, [x3, #16]
-	ldp	x5, x3, [x3, #32]
-	adds	 x8, x18, x8
-	adcs	x9, x1, x9
-	adcs	x10, x2, x10
-	adcs	x11, x4, x11
-	adcs	x12, x5, x12
-	adcs	x13, x3, x13
-	adcs	x14, x16, x14
-	adcs	x15, x17, x15
-	stp	 x8, x9, [x0]
-	stp	x10, x11, [x0, #16]
-	stp	x12, x13, [x0, #32]
-	stp	x14, x15, [x0, #48]
+.LBB52_2:                               // %carry
+	ldp		x16, x17, [x3]
+	ldp	x18, x1, [x3, #16]
+	ldp	x14, x15, [x3, #32]
+	adds		x8, x16, x8
+	adcs	x9, x17, x9
+	stp		x8, x9, [x0]
+	adcs	x8, x18, x10
+	adcs	x9, x1, x11
+	stp	x8, x9, [x0, #16]
+	adcs	x8, x14, x12
+	adcs	x9, x15, x13
+	stp	x8, x9, [x0, #32]
 	ret
-.Lfunc_end122:
-	.size	mcl_fp_sub8L, .Lfunc_end122-mcl_fp_sub8L
+.Lfunc_end52:
+	.size	mcl_fp_sub6L, .Lfunc_end52-mcl_fp_sub6L
 
-	.globl	mcl_fp_subNF8L
-	.align	2
-	.type	mcl_fp_subNF8L,@function
-mcl_fp_subNF8L:                         // @mcl_fp_subNF8L
+	.globl	mcl_fp_subNF6L
+	.p2align	2
+	.type	mcl_fp_subNF6L,@function
+mcl_fp_subNF6L:                         // @mcl_fp_subNF6L
 // BB#0:
-	ldp	x8, x9, [x2, #48]
-	ldp	x10, x11, [x1, #48]
-	ldp	x12, x13, [x2, #32]
-	ldp	x14, x15, [x1, #32]
-	ldp	x16, x17, [x2, #16]
-	ldp	 x18, x2, [x2]
-	ldp	 x4, x5, [x1]
-	ldp	x6, x1, [x1, #16]
-	subs	 x18, x4, x18
-	sbcs	x2, x5, x2
-	ldp	x4, x5, [x3, #48]
-	sbcs	x16, x6, x16
-	sbcs	x17, x1, x17
-	ldp	x1, x6, [x3, #32]
-	sbcs	x12, x14, x12
-	sbcs	x13, x15, x13
-	ldp	x14, x15, [x3, #16]
+	ldp		x14, x15, [x2]
+	ldp		x16, x17, [x1]
+	ldp	x10, x11, [x1, #32]
+	ldp	x12, x13, [x2, #16]
+	ldp	x18, x1, [x1, #16]
+	ldp	x8, x9, [x2, #32]
+	subs		x14, x16, x14
+	sbcs	x15, x17, x15
+	sbcs	x12, x18, x12
+	sbcs	x13, x1, x13
+	ldp	x16, x17, [x3, #32]
+	ldp		x18, x1, [x3]
 	sbcs	x8, x10, x8
-	ldp	 x10, x3, [x3]
+	ldp	x10, x2, [x3, #16]
 	sbcs	x9, x11, x9
 	asr	x11, x9, #63
-	and	 x10, x11, x10
-	and	 x3, x11, x3
-	and	 x14, x11, x14
-	and	 x15, x11, x15
-	and	 x1, x11, x1
-	and	 x6, x11, x6
-	and	 x4, x11, x4
-	and	 x11, x11, x5
-	adds	 x10, x10, x18
-	str	 x10, [x0]
-	adcs	x10, x3, x2
-	str	x10, [x0, #8]
-	adcs	x10, x14, x16
-	str	x10, [x0, #16]
-	adcs	x10, x15, x17
-	str	x10, [x0, #24]
-	adcs	x10, x1, x12
-	str	x10, [x0, #32]
-	adcs	x10, x6, x13
-	adcs	x8, x4, x8
-	stp	x10, x8, [x0, #40]
-	adcs	x8, x11, x9
-	str	x8, [x0, #56]
+	and	x1, x1, x11, ror #63
+	and		x10, x11, x10
+	and		x2, x11, x2
+	and		x16, x11, x16
+	and		x17, x11, x17
+	extr	x11, x11, x9, #63
+	and		x11, x11, x18
+	adds		x11, x11, x14
+	adcs	x14, x1, x15
+	adcs	x10, x10, x12
+	stp		x11, x14, [x0]
+	adcs	x11, x2, x13
+	adcs	x8, x16, x8
+	adcs	x9, x17, x9
+	stp	x10, x11, [x0, #16]
+	stp	x8, x9, [x0, #32]
 	ret
-.Lfunc_end123:
-	.size	mcl_fp_subNF8L, .Lfunc_end123-mcl_fp_subNF8L
+.Lfunc_end53:
+	.size	mcl_fp_subNF6L, .Lfunc_end53-mcl_fp_subNF6L
 
-	.globl	mcl_fpDbl_add8L
-	.align	2
-	.type	mcl_fpDbl_add8L,@function
-mcl_fpDbl_add8L:                        // @mcl_fpDbl_add8L
+	.globl	mcl_fpDbl_add6L
+	.p2align	2
+	.type	mcl_fpDbl_add6L,@function
+mcl_fpDbl_add6L:                        // @mcl_fpDbl_add6L
 // BB#0:
-	ldp	x8, x9, [x2, #112]
-	ldp	x10, x11, [x1, #112]
-	ldp	x12, x13, [x2, #96]
-	ldp	x14, x15, [x1, #96]
-	ldp	 x16, x5, [x2]
-	ldp	 x17, x6, [x1]
-	ldp	x18, x4, [x2, #80]
-	adds	 x16, x16, x17
-	ldr	x17, [x1, #16]
-	str	 x16, [x0]
-	adcs	x16, x5, x6
-	ldp	x5, x6, [x2, #16]
-	str	x16, [x0, #8]
-	adcs	x17, x5, x17
-	ldp	x16, x5, [x1, #24]
-	str	x17, [x0, #16]
-	adcs	x16, x6, x16
-	ldp	x17, x6, [x2, #32]
-	str	x16, [x0, #24]
-	adcs	x17, x17, x5
-	ldp	x16, x5, [x1, #40]
-	str	x17, [x0, #32]
-	adcs	x16, x6, x16
-	ldp	x17, x6, [x2, #48]
-	str	x16, [x0, #40]
-	ldr	x16, [x1, #56]
-	adcs	x17, x17, x5
-	ldp	x5, x2, [x2, #64]
-	str	x17, [x0, #48]
-	adcs	x16, x6, x16
-	ldp	x17, x6, [x1, #64]
-	str	x16, [x0, #56]
-	ldp	x16, x1, [x1, #80]
-	adcs	x17, x5, x17
-	adcs	x2, x2, x6
-	ldp	x5, x6, [x3, #48]
-	adcs	x16, x18, x16
-	adcs	x18, x4, x1
-	ldp	x1, x4, [x3, #32]
+	str	x25, [sp, #-64]!        // 8-byte Folded Spill
+	stp	x24, x23, [sp, #16]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #32]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #48]     // 8-byte Folded Spill
+	ldp	x8, x9, [x2, #80]
+	ldp	x12, x13, [x2, #64]
+	ldp	x16, x17, [x2, #48]
+	ldp	x5, x6, [x2, #32]
+	ldp	x20, x21, [x2, #16]
+	ldp		x22, x2, [x2]
+	ldp		x23, x24, [x1]
+	ldp	x10, x11, [x1, #80]
+	ldp	x14, x15, [x1, #64]
+	ldp	x18, x4, [x1, #48]
+	ldp	x7, x19, [x1, #32]
+	ldp	x25, x1, [x1, #16]
+	adds		x22, x22, x23
+	adcs	x2, x2, x24
+	ldp	x23, x24, [x3, #32]
+	adcs	x20, x20, x25
+	adcs	x1, x21, x1
+	stp	x20, x1, [x0, #16]
+	adcs	x1, x5, x7
+	adcs	x5, x6, x19
+	adcs	x16, x16, x18
+	adcs	x17, x17, x4
 	adcs	x12, x12, x14
+	stp		x22, x2, [x0]
+	ldp	x2, x22, [x3, #16]
+	ldp		x25, x3, [x3]
 	adcs	x13, x13, x15
-	ldp	x14, x15, [x3, #16]
 	adcs	x8, x8, x10
-	ldp	 x10, x3, [x3]
 	adcs	x9, x9, x11
-	adcs	x11, xzr, xzr
-	subs	 x10, x17, x10
-	sbcs	x3, x2, x3
-	sbcs	x14, x16, x14
-	sbcs	x15, x18, x15
-	sbcs	x1, x12, x1
-	sbcs	x4, x13, x4
-	sbcs	x5, x8, x5
-	sbcs	x6, x9, x6
-	sbcs	x11, x11, xzr
-	tst	 x11, #0x1
-	csel	x10, x17, x10, ne
-	csel	x11, x2, x3, ne
-	csel	x14, x16, x14, ne
-	csel	x15, x18, x15, ne
-	csel	x12, x12, x1, ne
-	csel	x13, x13, x4, ne
-	csel	x8, x8, x5, ne
-	csel	x9, x9, x6, ne
-	stp	x10, x11, [x0, #64]
-	stp	x14, x15, [x0, #80]
-	stp	x12, x13, [x0, #96]
-	stp	x8, x9, [x0, #112]
+	adcs	x10, xzr, xzr
+	subs		x11, x16, x25
+	sbcs	x14, x17, x3
+	sbcs	x15, x12, x2
+	sbcs	x18, x13, x22
+	stp	x1, x5, [x0, #32]
+	sbcs	x1, x8, x23
+	sbcs	x2, x9, x24
+	sbcs	x10, x10, xzr
+	ldp	x20, x19, [sp, #48]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #32]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #16]     // 8-byte Folded Reload
+	tst	 x10, #0x1
+	csel	x10, x16, x11, ne
+	csel	x11, x17, x14, ne
+	csel	x12, x12, x15, ne
+	csel	x13, x13, x18, ne
+	csel	x8, x8, x1, ne
+	csel	x9, x9, x2, ne
+	stp	x10, x11, [x0, #48]
+	stp	x12, x13, [x0, #64]
+	stp	x8, x9, [x0, #80]
+	ldr	x25, [sp], #64          // 8-byte Folded Reload
 	ret
-.Lfunc_end124:
-	.size	mcl_fpDbl_add8L, .Lfunc_end124-mcl_fpDbl_add8L
+.Lfunc_end54:
+	.size	mcl_fpDbl_add6L, .Lfunc_end54-mcl_fpDbl_add6L
 
-	.globl	mcl_fpDbl_sub8L
-	.align	2
-	.type	mcl_fpDbl_sub8L,@function
-mcl_fpDbl_sub8L:                        // @mcl_fpDbl_sub8L
+	.globl	mcl_fpDbl_sub6L
+	.p2align	2
+	.type	mcl_fpDbl_sub6L,@function
+mcl_fpDbl_sub6L:                        // @mcl_fpDbl_sub6L
 // BB#0:
-	ldp	x10, x8, [x2, #112]
-	ldp	x11, x9, [x1, #112]
-	ldp	x12, x13, [x2, #96]
-	ldp	x14, x15, [x1, #96]
-	ldp	 x16, x5, [x1]
-	ldp	 x17, x4, [x2]
-	ldr	x18, [x1, #80]
-	subs	 x16, x16, x17
-	ldr	x17, [x1, #16]
-	str	 x16, [x0]
-	sbcs	x16, x5, x4
-	ldp	x4, x5, [x2, #16]
-	str	x16, [x0, #8]
-	sbcs	x17, x17, x4
-	ldp	x16, x4, [x1, #24]
-	str	x17, [x0, #16]
-	sbcs	x16, x16, x5
-	ldp	x17, x5, [x2, #32]
-	str	x16, [x0, #24]
-	sbcs	x17, x4, x17
-	ldp	x16, x4, [x1, #40]
-	str	x17, [x0, #32]
-	sbcs	x16, x16, x5
-	ldp	x17, x5, [x2, #48]
-	str	x16, [x0, #40]
-	sbcs	x17, x4, x17
-	ldp	x16, x4, [x1, #56]
-	str	x17, [x0, #48]
-	sbcs	x16, x16, x5
-	ldp	x17, x5, [x2, #64]
-	str	x16, [x0, #56]
-	ldr	x16, [x1, #72]
+	str	x25, [sp, #-64]!        // 8-byte Folded Spill
+	stp	x24, x23, [sp, #16]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #32]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #48]     // 8-byte Folded Spill
+	ldp	x8, x9, [x2, #80]
+	ldp	x12, x13, [x2, #64]
+	ldp	x16, x17, [x2, #48]
+	ldp	x5, x6, [x2, #32]
+	ldp	x20, x21, [x2, #16]
+	ldp		x22, x2, [x2]
+	ldp		x23, x24, [x1]
+	ldp	x10, x11, [x1, #80]
+	ldp	x14, x15, [x1, #64]
+	ldp	x18, x4, [x1, #48]
+	ldp	x7, x19, [x1, #32]
+	ldp	x25, x1, [x1, #16]
+	subs		x22, x23, x22
+	sbcs	x2, x24, x2
+	ldp	x23, x24, [x3, #32]
+	sbcs	x20, x25, x20
+	sbcs	x1, x1, x21
+	stp	x20, x1, [x0, #16]
+	sbcs	x1, x7, x5
+	sbcs	x5, x19, x6
+	sbcs	x16, x18, x16
 	sbcs	x17, x4, x17
-	ldp	x4, x2, [x2, #80]
-	ldr	x1, [x1, #88]
-	sbcs	x16, x16, x5
-	sbcs	x18, x18, x4
-	ldp	x4, x5, [x3, #48]
-	sbcs	x1, x1, x2
 	sbcs	x12, x14, x12
-	ldp	x14, x2, [x3, #32]
 	sbcs	x13, x15, x13
-	sbcs	x10, x11, x10
-	ldp	x11, x15, [x3, #16]
-	sbcs	x8, x9, x8
-	ngcs	 x9, xzr
-	tst	 x9, #0x1
-	ldp	 x9, x3, [x3]
-	csel	x5, x5, xzr, ne
-	csel	x4, x4, xzr, ne
-	csel	x2, x2, xzr, ne
-	csel	x14, x14, xzr, ne
-	csel	x15, x15, xzr, ne
-	csel	x11, x11, xzr, ne
-	csel	x3, x3, xzr, ne
-	csel	x9, x9, xzr, ne
-	adds	 x9, x9, x17
-	str	x9, [x0, #64]
-	adcs	x9, x3, x16
-	str	x9, [x0, #72]
-	adcs	x9, x11, x18
-	str	x9, [x0, #80]
-	adcs	x9, x15, x1
-	str	x9, [x0, #88]
-	adcs	x9, x14, x12
-	str	x9, [x0, #96]
-	adcs	x9, x2, x13
-	str	x9, [x0, #104]
-	adcs	x9, x4, x10
-	adcs	x8, x5, x8
-	stp	x9, x8, [x0, #112]
+	stp		x22, x2, [x0]
+	ldp	x2, x22, [x3, #16]
+	ldp		x25, x3, [x3]
+	sbcs	x8, x10, x8
+	sbcs	x9, x11, x9
+	ngcs	 x10, xzr
+	tst	 x10, #0x1
+	stp	x1, x5, [x0, #32]
+	csel	x1, x25, xzr, ne
+	csel	x10, x24, xzr, ne
+	csel	x11, x23, xzr, ne
+	csel	x14, x22, xzr, ne
+	csel	x15, x2, xzr, ne
+	csel	x18, x3, xzr, ne
+	adds		x16, x1, x16
+	adcs	x17, x18, x17
+	adcs	x12, x15, x12
+	adcs	x13, x14, x13
+	ldp	x20, x19, [sp, #48]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #32]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #16]     // 8-byte Folded Reload
+	adcs	x8, x11, x8
+	adcs	x9, x10, x9
+	stp	x16, x17, [x0, #48]
+	stp	x12, x13, [x0, #64]
+	stp	x8, x9, [x0, #80]
+	ldr	x25, [sp], #64          // 8-byte Folded Reload
 	ret
-.Lfunc_end125:
-	.size	mcl_fpDbl_sub8L, .Lfunc_end125-mcl_fpDbl_sub8L
+.Lfunc_end55:
+	.size	mcl_fpDbl_sub6L, .Lfunc_end55-mcl_fpDbl_sub6L
 
-	.align	2
-	.type	.LmulPv576x64,@function
-.LmulPv576x64:                          // @mulPv576x64
+	.globl	mulPv512x64
+	.p2align	2
+	.type	mulPv512x64,@function
+mulPv512x64:                            // @mulPv512x64
 // BB#0:
-	ldr	 x9, [x0]
-	mul	 x10, x9, x1
-	str	 x10, [x8]
+	ldr		x9, [x0]
+	mul		x10, x9, x1
+	str		x10, [x8]
 	ldr	x10, [x0, #8]
 	umulh	x9, x9, x1
-	mul	 x11, x10, x1
-	adds	 x9, x9, x11
+	mul		x11, x10, x1
+	adds		x9, x9, x11
 	str	x9, [x8, #8]
 	ldr	x9, [x0, #16]
 	umulh	x10, x10, x1
-	mul	 x11, x9, x1
+	mul		x11, x9, x1
 	adcs	x10, x10, x11
 	str	x10, [x8, #16]
 	ldr	x10, [x0, #24]
 	umulh	x9, x9, x1
-	mul	 x11, x10, x1
+	mul		x11, x10, x1
 	adcs	x9, x9, x11
 	str	x9, [x8, #24]
 	ldr	x9, [x0, #32]
 	umulh	x10, x10, x1
-	mul	 x11, x9, x1
+	mul		x11, x9, x1
 	adcs	x10, x10, x11
 	str	x10, [x8, #32]
 	ldr	x10, [x0, #40]
 	umulh	x9, x9, x1
-	mul	 x11, x10, x1
+	mul		x11, x10, x1
 	adcs	x9, x9, x11
 	str	x9, [x8, #40]
 	ldr	x9, [x0, #48]
 	umulh	x10, x10, x1
-	mul	 x11, x9, x1
+	mul		x11, x9, x1
 	adcs	x10, x10, x11
 	str	x10, [x8, #48]
 	ldr	x10, [x0, #56]
 	umulh	x9, x9, x1
-	mul	 x11, x10, x1
-	adcs	x9, x9, x11
-	str	x9, [x8, #56]
-	ldr	x9, [x0, #64]
+	mul		x11, x10, x1
 	umulh	x10, x10, x1
-	mul	 x11, x9, x1
-	umulh	x9, x9, x1
-	adcs	x10, x10, x11
-	adcs	x9, x9, xzr
-	stp	x10, x9, [x8, #64]
+	adcs	x9, x9, x11
+	adcs	x10, x10, xzr
+	stp	x9, x10, [x8, #56]
 	ret
-.Lfunc_end126:
-	.size	.LmulPv576x64, .Lfunc_end126-.LmulPv576x64
+.Lfunc_end56:
+	.size	mulPv512x64, .Lfunc_end56-mulPv512x64
 
-	.globl	mcl_fp_mulUnitPre9L
-	.align	2
-	.type	mcl_fp_mulUnitPre9L,@function
-mcl_fp_mulUnitPre9L:                    // @mcl_fp_mulUnitPre9L
+	.globl	mcl_fp_mulUnitPre8L
+	.p2align	2
+	.type	mcl_fp_mulUnitPre8L,@function
+mcl_fp_mulUnitPre8L:                    // @mcl_fp_mulUnitPre8L
 // BB#0:
-	stp	x20, x19, [sp, #-32]!
-	stp	x29, x30, [sp, #16]
-	add	x29, sp, #16            // =16
-	sub	sp, sp, #80             // =80
+	sub	sp, sp, #96             // =96
+	stp	x19, x30, [sp, #80]     // 8-byte Folded Spill
 	mov	 x19, x0
 	mov	 x8, sp
 	mov	 x0, x1
 	mov	 x1, x2
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #64]
-	ldp	x11, x10, [sp, #48]
-	ldp	x13, x12, [sp, #32]
-	ldp	 x14, x15, [sp]
-	ldp	x16, x17, [sp, #16]
-	stp	 x14, x15, [x19]
-	stp	x16, x17, [x19, #16]
-	stp	x13, x12, [x19, #32]
-	stp	x11, x10, [x19, #48]
-	stp	x9, x8, [x19, #64]
-	sub	sp, x29, #16            // =16
-	ldp	x29, x30, [sp, #16]
-	ldp	x20, x19, [sp], #32
+	bl	mulPv512x64
+	ldp	x9, x8, [sp, #56]
+	ldp	x11, x10, [sp, #40]
+	ldp	x16, x12, [sp, #24]
+	ldp		x13, x14, [sp]
+	ldr	x15, [sp, #16]
+	stp	x10, x9, [x19, #48]
+	stp	x12, x11, [x19, #32]
+	stp		x13, x14, [x19]
+	stp	x15, x16, [x19, #16]
+	str	x8, [x19, #64]
+	ldp	x19, x30, [sp, #80]     // 8-byte Folded Reload
+	add	sp, sp, #96             // =96
 	ret
-.Lfunc_end127:
-	.size	mcl_fp_mulUnitPre9L, .Lfunc_end127-mcl_fp_mulUnitPre9L
+.Lfunc_end57:
+	.size	mcl_fp_mulUnitPre8L, .Lfunc_end57-mcl_fp_mulUnitPre8L
 
-	.globl	mcl_fpDbl_mulPre9L
-	.align	2
-	.type	mcl_fpDbl_mulPre9L,@function
-mcl_fpDbl_mulPre9L:                     // @mcl_fpDbl_mulPre9L
+	.globl	mcl_fpDbl_mulPre8L
+	.p2align	2
+	.type	mcl_fpDbl_mulPre8L,@function
+mcl_fpDbl_mulPre8L:                     // @mcl_fpDbl_mulPre8L
 // BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	add	x29, sp, #80            // =80
-	sub	sp, sp, #752            // =752
+	stp	x28, x27, [sp, #-96]!   // 8-byte Folded Spill
+	stp	x26, x25, [sp, #16]     // 8-byte Folded Spill
+	stp	x24, x23, [sp, #32]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #48]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #64]     // 8-byte Folded Spill
+	stp	x29, x30, [sp, #80]     // 8-byte Folded Spill
+	sub	sp, sp, #656            // =656
 	mov	 x21, x2
-	ldr	 x9, [x21]
 	mov	 x20, x1
+	ldr		x1, [x21]
 	mov	 x19, x0
-	sub	x8, x29, #160           // =160
+	add	x8, sp, #576            // =576
 	mov	 x0, x20
-	mov	 x1, x9
-	bl	.LmulPv576x64
-	ldur	x8, [x29, #-88]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-96]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldp	x25, x24, [x29, #-112]
-	ldp	x27, x26, [x29, #-128]
-	ldp	x22, x28, [x29, #-144]
-	ldp	x8, x23, [x29, #-160]
+	bl	mulPv512x64
+	ldr	x8, [sp, #576]
 	ldr	x1, [x21, #8]
-	str	 x8, [x19]
-	sub	x8, x29, #240           // =240
+	ldr	x22, [sp, #640]
+	ldr	x23, [sp, #632]
+	ldr	x24, [sp, #624]
+	ldr	x25, [sp, #616]
+	ldr	x26, [sp, #608]
+	ldr	x27, [sp, #600]
+	ldr	x28, [sp, #592]
+	ldr	x29, [sp, #584]
+	str		x8, [x19]
+	add	x8, sp, #496            // =496
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [x29, #-176]
-	ldp	x11, x10, [x29, #-192]
-	ldp	x13, x12, [x29, #-208]
-	ldp	x14, x16, [x29, #-240]
-	ldp	x17, x15, [x29, #-224]
-	adds	 x14, x14, x23
-	str	x14, [x19, #8]
-	adcs	x22, x16, x22
-	adcs	x23, x17, x28
-	adcs	x27, x15, x27
-	adcs	x26, x13, x26
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #496]
+	ldr	x16, [sp, #512]
+	ldr	x14, [sp, #520]
+	ldr	x12, [sp, #528]
+	adds		x13, x13, x29
+	ldr	x11, [sp, #536]
+	adcs	x28, x15, x28
+	ldr	x10, [sp, #544]
+	adcs	x27, x16, x27
+	ldr	x9, [sp, #552]
+	adcs	x26, x14, x26
+	ldr	x8, [sp, #560]
 	adcs	x25, x12, x25
 	adcs	x24, x11, x24
 	ldr	x1, [x21, #16]
-	ldr	x11, [sp, #16]          // 8-byte Folded Reload
-	adcs	x28, x10, x11
-	ldr	x10, [sp, #24]          // 8-byte Folded Reload
-	adcs	x9, x9, x10
-	adcs	x8, x8, xzr
-	stp	x8, x9, [sp, #16]
-	add	x8, sp, #512            // =512
+	adcs	x23, x10, x23
+	adcs	x22, x9, x22
+	adcs	x29, x8, xzr
+	add	x8, sp, #416            // =416
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #584]
-	ldr	x9, [sp, #576]
-	ldr	x10, [sp, #568]
-	ldr	x11, [sp, #560]
-	ldr	x12, [sp, #552]
-	ldr	x13, [sp, #544]
-	ldr	x14, [sp, #512]
-	ldr	x15, [sp, #536]
-	ldr	x16, [sp, #520]
-	ldr	x17, [sp, #528]
-	adds	 x14, x22, x14
-	str	x14, [x19, #16]
-	adcs	x22, x23, x16
-	adcs	x23, x27, x17
-	adcs	x26, x26, x15
-	adcs	x25, x25, x13
-	adcs	x24, x24, x12
-	adcs	x27, x28, x11
+	str	x13, [x19, #8]
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #416]
+	ldr	x16, [sp, #432]
+	ldp	x14, x12, [sp, #440]
+	ldp	x11, x10, [sp, #456]
+	adds		x13, x13, x28
+	adcs	x27, x15, x27
+	adcs	x26, x16, x26
+	ldp	x9, x8, [sp, #472]
+	adcs	x25, x14, x25
+	adcs	x24, x12, x24
+	adcs	x23, x11, x23
 	ldr	x1, [x21, #24]
-	ldr	x11, [sp, #24]          // 8-byte Folded Reload
-	adcs	x28, x11, x10
-	ldr	x10, [sp, #16]          // 8-byte Folded Reload
-	adcs	x9, x10, x9
-	adcs	x8, x8, xzr
-	stp	x8, x9, [sp, #16]
-	add	x8, sp, #432            // =432
+	adcs	x22, x10, x22
+	adcs	x28, x9, x29
+	adcs	x29, x8, xzr
+	add	x8, sp, #336            // =336
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #496]
-	ldp	x11, x10, [sp, #480]
-	ldp	x13, x12, [sp, #464]
-	ldp	x14, x16, [sp, #432]
-	ldp	x17, x15, [sp, #448]
-	adds	 x14, x22, x14
-	str	x14, [x19, #24]
-	adcs	x22, x23, x16
-	adcs	x23, x26, x17
-	adcs	x25, x25, x15
-	adcs	x24, x24, x13
-	adcs	x26, x27, x12
-	adcs	x27, x28, x11
+	str	x13, [x19, #16]
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #336]
+	ldr	x16, [sp, #352]
+	ldp	x14, x12, [sp, #360]
+	ldp	x11, x10, [sp, #376]
+	adds		x13, x13, x27
+	adcs	x26, x15, x26
+	adcs	x25, x16, x25
+	ldp	x9, x8, [sp, #392]
+	adcs	x24, x14, x24
+	adcs	x23, x12, x23
+	adcs	x22, x11, x22
 	ldr	x1, [x21, #32]
-	ldr	x11, [sp, #24]          // 8-byte Folded Reload
-	adcs	x28, x11, x10
-	ldr	x10, [sp, #16]          // 8-byte Folded Reload
-	adcs	x9, x10, x9
-	adcs	x8, x8, xzr
-	stp	x8, x9, [sp, #16]
-	add	x8, sp, #352            // =352
+	adcs	x27, x10, x28
+	adcs	x28, x9, x29
+	adcs	x29, x8, xzr
+	add	x8, sp, #256            // =256
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #416]
-	ldp	x11, x10, [sp, #400]
-	ldp	x13, x12, [sp, #384]
-	ldp	x14, x16, [sp, #352]
-	ldp	x17, x15, [sp, #368]
-	adds	 x14, x22, x14
-	str	x14, [x19, #32]
-	adcs	x22, x23, x16
-	adcs	x23, x25, x17
-	adcs	x24, x24, x15
-	adcs	x25, x26, x13
-	adcs	x26, x27, x12
-	adcs	x27, x28, x11
+	str	x13, [x19, #24]
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #256]
+	ldr	x16, [sp, #272]
+	ldp	x14, x12, [sp, #280]
+	ldp	x11, x10, [sp, #296]
+	adds		x13, x13, x26
+	adcs	x25, x15, x25
+	adcs	x24, x16, x24
+	ldp	x9, x8, [sp, #312]
+	adcs	x23, x14, x23
+	adcs	x22, x12, x22
+	adcs	x26, x11, x27
 	ldr	x1, [x21, #40]
-	ldr	x11, [sp, #24]          // 8-byte Folded Reload
-	adcs	x28, x11, x10
-	ldr	x10, [sp, #16]          // 8-byte Folded Reload
-	adcs	x9, x10, x9
-	adcs	x8, x8, xzr
-	stp	x8, x9, [sp, #16]
-	add	x8, sp, #272            // =272
+	adcs	x27, x10, x28
+	adcs	x28, x9, x29
+	adcs	x29, x8, xzr
+	add	x8, sp, #176            // =176
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #336]
-	ldp	x11, x10, [sp, #320]
-	ldp	x13, x12, [sp, #304]
-	ldp	x14, x16, [sp, #272]
-	ldp	x17, x15, [sp, #288]
-	adds	 x14, x22, x14
-	str	x14, [x19, #40]
-	adcs	x22, x23, x16
-	adcs	x23, x24, x17
-	adcs	x24, x25, x15
-	adcs	x25, x26, x13
-	adcs	x26, x27, x12
-	adcs	x27, x28, x11
+	str	x13, [x19, #32]
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #176]
+	ldr	x16, [sp, #192]
+	ldp	x14, x12, [sp, #200]
+	ldp	x11, x10, [sp, #216]
+	adds		x13, x13, x25
+	adcs	x24, x15, x24
+	adcs	x23, x16, x23
+	ldp	x9, x8, [sp, #232]
+	adcs	x22, x14, x22
+	adcs	x25, x12, x26
+	adcs	x26, x11, x27
 	ldr	x1, [x21, #48]
-	ldr	x11, [sp, #24]          // 8-byte Folded Reload
-	adcs	x28, x11, x10
-	ldr	x10, [sp, #16]          // 8-byte Folded Reload
-	adcs	x9, x10, x9
-	adcs	x8, x8, xzr
-	stp	x8, x9, [sp, #16]
-	add	x8, sp, #192            // =192
+	adcs	x27, x10, x28
+	adcs	x28, x9, x29
+	adcs	x29, x8, xzr
+	add	x8, sp, #96             // =96
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #256]
-	ldp	x11, x10, [sp, #240]
-	ldp	x13, x12, [sp, #224]
-	ldp	x14, x16, [sp, #192]
-	ldp	x17, x15, [sp, #208]
-	adds	 x14, x22, x14
-	str	x14, [x19, #48]
-	adcs	x22, x23, x16
-	adcs	x23, x24, x17
-	adcs	x24, x25, x15
-	adcs	x25, x26, x13
-	adcs	x26, x27, x12
-	adcs	x27, x28, x11
+	str	x13, [x19, #40]
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #96]
+	ldr	x16, [sp, #112]
+	ldp	x14, x12, [sp, #120]
+	ldp	x11, x10, [sp, #136]
+	adds		x13, x13, x24
+	adcs	x23, x15, x23
+	adcs	x22, x16, x22
+	ldp	x9, x8, [sp, #152]
+	adcs	x24, x14, x25
+	adcs	x25, x12, x26
+	adcs	x26, x11, x27
 	ldr	x1, [x21, #56]
-	ldr	x11, [sp, #24]          // 8-byte Folded Reload
-	adcs	x28, x11, x10
-	ldr	x10, [sp, #16]          // 8-byte Folded Reload
-	adcs	x9, x10, x9
-	adcs	x8, x8, xzr
-	stp	x8, x9, [sp, #16]
-	add	x8, sp, #112            // =112
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #176]
-	ldp	x11, x10, [sp, #160]
-	ldp	x13, x12, [sp, #144]
-	ldp	x14, x16, [sp, #112]
-	ldp	x17, x15, [sp, #128]
-	adds	 x14, x22, x14
-	str	x14, [x19, #56]
-	adcs	x22, x23, x16
-	adcs	x23, x24, x17
-	adcs	x24, x25, x15
-	adcs	x25, x26, x13
-	adcs	x26, x27, x12
-	adcs	x27, x28, x11
-	ldr	x1, [x21, #64]
-	ldr	x11, [sp, #24]          // 8-byte Folded Reload
-	adcs	x21, x11, x10
-	ldr	x10, [sp, #16]          // 8-byte Folded Reload
-	adcs	x28, x10, x9
-	adcs	x8, x8, xzr
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	add	x8, sp, #32             // =32
+	adcs	x21, x10, x28
+	adcs	x27, x9, x29
+	adcs	x28, x8, xzr
+	add	x8, sp, #16             // =16
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #96]
-	ldp	x11, x10, [sp, #80]
-	ldp	x13, x12, [sp, #64]
-	ldp	x14, x16, [sp, #32]
-	ldp	x17, x15, [sp, #48]
-	adds	 x14, x22, x14
-	str	x14, [x19, #64]
-	adcs	x14, x23, x16
-	str	x14, [x19, #72]
-	adcs	x14, x24, x17
-	str	x14, [x19, #80]
-	adcs	x14, x25, x15
-	adcs	x13, x26, x13
-	stp	x14, x13, [x19, #88]
-	adcs	x12, x27, x12
-	adcs	x11, x21, x11
-	stp	x12, x11, [x19, #104]
-	adcs	x10, x28, x10
-	str	x10, [x19, #120]
-	ldr	x10, [sp, #24]          // 8-byte Folded Reload
-	adcs	x9, x10, x9
+	str	x13, [x19, #48]
+	bl	mulPv512x64
+	ldp	x13, x14, [sp, #16]
+	ldr	x16, [sp, #32]
+	ldp	x15, x12, [sp, #40]
+	ldp	x11, x10, [sp, #56]
+	adds		x13, x13, x23
+	adcs	x14, x14, x22
+	ldp	x9, x8, [sp, #72]
+	stp	x13, x14, [x19, #56]
+	adcs	x13, x16, x24
+	adcs	x14, x15, x25
+	adcs	x12, x12, x26
+	adcs	x11, x11, x21
+	adcs	x10, x10, x27
+	adcs	x9, x9, x28
 	adcs	x8, x8, xzr
-	stp	x9, x8, [x19, #128]
-	sub	sp, x29, #80            // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
+	stp	x13, x14, [x19, #72]
+	stp	x12, x11, [x19, #88]
+	stp	x10, x9, [x19, #104]
+	str	x8, [x19, #120]
+	add	sp, sp, #656            // =656
+	ldp	x29, x30, [sp, #80]     // 8-byte Folded Reload
+	ldp	x20, x19, [sp, #64]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #48]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #32]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #16]     // 8-byte Folded Reload
+	ldp	x28, x27, [sp], #96     // 8-byte Folded Reload
 	ret
-.Lfunc_end128:
-	.size	mcl_fpDbl_mulPre9L, .Lfunc_end128-mcl_fpDbl_mulPre9L
+.Lfunc_end58:
+	.size	mcl_fpDbl_mulPre8L, .Lfunc_end58-mcl_fpDbl_mulPre8L
 
-	.globl	mcl_fpDbl_sqrPre9L
-	.align	2
-	.type	mcl_fpDbl_sqrPre9L,@function
-mcl_fpDbl_sqrPre9L:                     // @mcl_fpDbl_sqrPre9L
+	.globl	mcl_fpDbl_sqrPre8L
+	.p2align	2
+	.type	mcl_fpDbl_sqrPre8L,@function
+mcl_fpDbl_sqrPre8L:                     // @mcl_fpDbl_sqrPre8L
 // BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	add	x29, sp, #80            // =80
-	sub	sp, sp, #736            // =736
+	stp	x28, x27, [sp, #-96]!   // 8-byte Folded Spill
+	stp	x26, x25, [sp, #16]     // 8-byte Folded Spill
+	stp	x24, x23, [sp, #32]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #48]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #64]     // 8-byte Folded Spill
+	stp	x29, x30, [sp, #80]     // 8-byte Folded Spill
+	sub	sp, sp, #640            // =640
 	mov	 x20, x1
-	ldr	 x1, [x20]
+	ldr		x1, [x20]
 	mov	 x19, x0
-	sub	x8, x29, #160           // =160
+	add	x8, sp, #560            // =560
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldur	x8, [x29, #-88]
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	ldp	x23, x22, [x29, #-104]
-	ldp	x25, x24, [x29, #-120]
-	ldp	x27, x26, [x29, #-136]
-	ldp	x21, x28, [x29, #-152]
-	ldur	x8, [x29, #-160]
+	bl	mulPv512x64
+	ldr	x8, [sp, #560]
 	ldr	x1, [x20, #8]
-	str	 x8, [x19]
-	sub	x8, x29, #240           // =240
+	ldr	x21, [sp, #624]
+	ldr	x22, [sp, #616]
+	ldr	x23, [sp, #608]
+	ldr	x24, [sp, #600]
+	ldr	x25, [sp, #592]
+	ldr	x26, [sp, #584]
+	ldr	x27, [sp, #576]
+	ldr	x28, [sp, #568]
+	str		x8, [x19]
+	add	x8, sp, #480            // =480
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [x29, #-176]
-	ldp	x11, x10, [x29, #-192]
-	ldp	x13, x12, [x29, #-208]
-	ldp	x14, x16, [x29, #-240]
-	ldp	x17, x15, [x29, #-224]
-	adds	 x14, x14, x21
-	str	x14, [x19, #8]
-	adcs	x21, x16, x28
-	adcs	x27, x17, x27
-	adcs	x26, x15, x26
-	adcs	x25, x13, x25
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #480]
+	ldr	x16, [sp, #496]
+	ldp	x14, x12, [sp, #504]
+	ldr	x11, [sp, #520]
+	adds		x13, x13, x28
+	adcs	x27, x15, x27
+	ldr	x10, [sp, #528]
+	adcs	x26, x16, x26
+	ldr	x9, [sp, #536]
+	adcs	x25, x14, x25
+	ldr	x8, [sp, #544]
 	adcs	x24, x12, x24
 	adcs	x23, x11, x23
 	ldr	x1, [x20, #16]
 	adcs	x22, x10, x22
-	ldr	x10, [sp, #8]           // 8-byte Folded Reload
-	adcs	x28, x9, x10
-	adcs	x8, x8, xzr
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	add	x8, sp, #496            // =496
+	adcs	x21, x9, x21
+	adcs	x28, x8, xzr
+	add	x8, sp, #400            // =400
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #568]
-	ldr	x9, [sp, #560]
-	ldr	x10, [sp, #552]
-	ldr	x11, [sp, #544]
-	ldr	x12, [sp, #536]
-	ldr	x13, [sp, #528]
-	ldp	x14, x16, [sp, #496]
-	ldr	x15, [sp, #520]
-	ldr	x17, [sp, #512]
-	adds	 x14, x21, x14
-	str	x14, [x19, #16]
-	adcs	x21, x27, x16
-	adcs	x26, x26, x17
-	adcs	x25, x25, x15
-	adcs	x24, x24, x13
-	adcs	x23, x23, x12
-	adcs	x22, x22, x11
+	str	x13, [x19, #8]
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #400]
+	ldr	x16, [sp, #416]
+	ldp	x14, x12, [sp, #424]
+	ldp	x11, x10, [sp, #440]
+	adds		x13, x13, x27
+	adcs	x26, x15, x26
+	adcs	x25, x16, x25
+	ldp	x9, x8, [sp, #456]
+	adcs	x24, x14, x24
+	adcs	x23, x12, x23
+	adcs	x22, x11, x22
 	ldr	x1, [x20, #24]
-	adcs	x27, x28, x10
-	ldr	x10, [sp, #8]           // 8-byte Folded Reload
-	adcs	x28, x10, x9
-	adcs	x8, x8, xzr
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	add	x8, sp, #416            // =416
+	adcs	x21, x10, x21
+	adcs	x27, x9, x28
+	adcs	x28, x8, xzr
+	add	x8, sp, #320            // =320
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #480]
-	ldp	x11, x10, [sp, #464]
-	ldp	x13, x12, [sp, #448]
-	ldp	x14, x16, [sp, #416]
-	ldp	x17, x15, [sp, #432]
-	adds	 x14, x21, x14
-	str	x14, [x19, #24]
-	adcs	x21, x26, x16
-	adcs	x25, x25, x17
-	adcs	x24, x24, x15
-	adcs	x23, x23, x13
-	adcs	x22, x22, x12
-	adcs	x26, x27, x11
+	str	x13, [x19, #16]
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #320]
+	ldr	x16, [sp, #336]
+	ldp	x14, x12, [sp, #344]
+	ldp	x11, x10, [sp, #360]
+	adds		x13, x13, x26
+	adcs	x25, x15, x25
+	adcs	x24, x16, x24
+	ldp	x9, x8, [sp, #376]
+	adcs	x23, x14, x23
+	adcs	x22, x12, x22
+	adcs	x21, x11, x21
 	ldr	x1, [x20, #32]
-	adcs	x27, x28, x10
-	ldr	x10, [sp, #8]           // 8-byte Folded Reload
-	adcs	x28, x10, x9
-	adcs	x8, x8, xzr
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	add	x8, sp, #336            // =336
+	adcs	x26, x10, x27
+	adcs	x27, x9, x28
+	adcs	x28, x8, xzr
+	add	x8, sp, #240            // =240
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #400]
-	ldp	x11, x10, [sp, #384]
-	ldp	x13, x12, [sp, #368]
-	ldp	x14, x16, [sp, #336]
-	ldp	x17, x15, [sp, #352]
-	adds	 x14, x21, x14
-	str	x14, [x19, #32]
-	adcs	x21, x25, x16
-	adcs	x24, x24, x17
-	adcs	x23, x23, x15
-	adcs	x22, x22, x13
-	adcs	x25, x26, x12
-	adcs	x26, x27, x11
+	str	x13, [x19, #24]
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #240]
+	ldr	x16, [sp, #256]
+	ldp	x14, x12, [sp, #264]
+	ldp	x11, x10, [sp, #280]
+	adds		x13, x13, x25
+	adcs	x24, x15, x24
+	adcs	x23, x16, x23
+	ldp	x9, x8, [sp, #296]
+	adcs	x22, x14, x22
+	adcs	x21, x12, x21
+	adcs	x25, x11, x26
 	ldr	x1, [x20, #40]
-	adcs	x27, x28, x10
-	ldr	x10, [sp, #8]           // 8-byte Folded Reload
-	adcs	x28, x10, x9
-	adcs	x8, x8, xzr
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	add	x8, sp, #256            // =256
+	adcs	x26, x10, x27
+	adcs	x27, x9, x28
+	adcs	x28, x8, xzr
+	add	x8, sp, #160            // =160
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #320]
-	ldp	x11, x10, [sp, #304]
-	ldp	x13, x12, [sp, #288]
-	ldp	x14, x16, [sp, #256]
-	ldp	x17, x15, [sp, #272]
-	adds	 x14, x21, x14
-	str	x14, [x19, #40]
-	adcs	x21, x24, x16
-	adcs	x23, x23, x17
-	adcs	x22, x22, x15
-	adcs	x24, x25, x13
-	adcs	x25, x26, x12
-	adcs	x26, x27, x11
+	str	x13, [x19, #32]
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #160]
+	ldr	x16, [sp, #176]
+	ldp	x14, x12, [sp, #184]
+	ldp	x11, x10, [sp, #200]
+	adds		x13, x13, x24
+	adcs	x23, x15, x23
+	adcs	x22, x16, x22
+	ldp	x9, x8, [sp, #216]
+	adcs	x21, x14, x21
+	adcs	x24, x12, x25
+	adcs	x25, x11, x26
 	ldr	x1, [x20, #48]
-	adcs	x27, x28, x10
-	ldr	x10, [sp, #8]           // 8-byte Folded Reload
-	adcs	x28, x10, x9
-	adcs	x8, x8, xzr
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	add	x8, sp, #176            // =176
+	adcs	x26, x10, x27
+	adcs	x27, x9, x28
+	adcs	x28, x8, xzr
+	add	x8, sp, #80             // =80
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #240]
-	ldp	x11, x10, [sp, #224]
-	ldp	x13, x12, [sp, #208]
-	ldp	x14, x16, [sp, #176]
-	ldp	x17, x15, [sp, #192]
-	adds	 x14, x21, x14
-	str	x14, [x19, #48]
-	adcs	x21, x23, x16
-	adcs	x22, x22, x17
-	adcs	x23, x24, x15
-	adcs	x24, x25, x13
-	adcs	x25, x26, x12
-	adcs	x26, x27, x11
+	str	x13, [x19, #40]
+	bl	mulPv512x64
+	ldp	x13, x15, [sp, #80]
+	ldr	x16, [sp, #96]
+	ldp	x14, x12, [sp, #104]
+	ldp	x11, x10, [sp, #120]
+	adds		x13, x13, x23
+	adcs	x22, x15, x22
+	adcs	x21, x16, x21
+	ldp	x9, x8, [sp, #136]
+	adcs	x23, x14, x24
+	adcs	x24, x12, x25
+	adcs	x25, x11, x26
 	ldr	x1, [x20, #56]
-	adcs	x27, x28, x10
-	ldr	x10, [sp, #8]           // 8-byte Folded Reload
-	adcs	x28, x10, x9
-	adcs	x8, x8, xzr
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	add	x8, sp, #96             // =96
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #160]
-	ldp	x11, x10, [sp, #144]
-	ldp	x13, x12, [sp, #128]
-	ldp	x14, x16, [sp, #96]
-	ldp	x17, x15, [sp, #112]
-	adds	 x14, x21, x14
-	str	x14, [x19, #56]
-	adcs	x21, x22, x16
-	adcs	x22, x23, x17
-	adcs	x23, x24, x15
-	adcs	x24, x25, x13
-	adcs	x25, x26, x12
-	adcs	x26, x27, x11
-	ldr	x1, [x20, #64]
-	adcs	x27, x28, x10
-	ldr	x10, [sp, #8]           // 8-byte Folded Reload
-	adcs	x28, x10, x9
-	adcs	x8, x8, xzr
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	add	x8, sp, #16             // =16
+	adcs	x26, x10, x27
+	adcs	x27, x9, x28
+	adcs	x28, x8, xzr
+	mov	 x8, sp
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #80]
-	ldp	x11, x10, [sp, #64]
-	ldp	x13, x12, [sp, #48]
-	ldp	x14, x16, [sp, #16]
-	ldp	x17, x15, [sp, #32]
-	adds	 x14, x21, x14
-	str	x14, [x19, #64]
-	adcs	x14, x22, x16
-	str	x14, [x19, #72]
-	adcs	x14, x23, x17
-	str	x14, [x19, #80]
-	adcs	x14, x24, x15
-	adcs	x13, x25, x13
-	stp	x14, x13, [x19, #88]
-	adcs	x12, x26, x12
-	adcs	x11, x27, x11
-	stp	x12, x11, [x19, #104]
-	adcs	x10, x28, x10
-	str	x10, [x19, #120]
-	ldr	x10, [sp, #8]           // 8-byte Folded Reload
-	adcs	x9, x10, x9
+	str	x13, [x19, #48]
+	bl	mulPv512x64
+	ldp		x13, x14, [sp]
+	ldr	x16, [sp, #16]
+	ldp	x15, x12, [sp, #24]
+	ldp	x11, x10, [sp, #40]
+	adds		x13, x13, x22
+	adcs	x14, x14, x21
+	ldp	x9, x8, [sp, #56]
+	stp	x13, x14, [x19, #56]
+	adcs	x13, x16, x23
+	adcs	x14, x15, x24
+	adcs	x12, x12, x25
+	adcs	x11, x11, x26
+	adcs	x10, x10, x27
+	adcs	x9, x9, x28
 	adcs	x8, x8, xzr
-	stp	x9, x8, [x19, #128]
-	sub	sp, x29, #80            // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
+	stp	x13, x14, [x19, #72]
+	stp	x12, x11, [x19, #88]
+	stp	x10, x9, [x19, #104]
+	str	x8, [x19, #120]
+	add	sp, sp, #640            // =640
+	ldp	x29, x30, [sp, #80]     // 8-byte Folded Reload
+	ldp	x20, x19, [sp, #64]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #48]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #32]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #16]     // 8-byte Folded Reload
+	ldp	x28, x27, [sp], #96     // 8-byte Folded Reload
 	ret
-.Lfunc_end129:
-	.size	mcl_fpDbl_sqrPre9L, .Lfunc_end129-mcl_fpDbl_sqrPre9L
+.Lfunc_end59:
+	.size	mcl_fpDbl_sqrPre8L, .Lfunc_end59-mcl_fpDbl_sqrPre8L
 
-	.globl	mcl_fp_mont9L
-	.align	2
-	.type	mcl_fp_mont9L,@function
-mcl_fp_mont9L:                          // @mcl_fp_mont9L
+	.globl	mcl_fp_mont8L
+	.p2align	2
+	.type	mcl_fp_mont8L,@function
+mcl_fp_mont8L:                          // @mcl_fp_mont8L
 // BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	add	x29, sp, #80            // =80
-	sub	sp, sp, #1600           // =1600
+	stp	x28, x27, [sp, #-96]!   // 8-byte Folded Spill
+	stp	x26, x25, [sp, #16]     // 8-byte Folded Spill
+	stp	x24, x23, [sp, #32]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #48]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #64]     // 8-byte Folded Spill
+	stp	x29, x30, [sp, #80]     // 8-byte Folded Spill
+	sub	sp, sp, #1424           // =1424
 	mov	 x20, x3
-	mov	 x28, x2
-	str	x28, [sp, #136]         // 8-byte Folded Spill
 	ldur	x19, [x20, #-8]
-	str	x19, [sp, #144]         // 8-byte Folded Spill
-	ldr	 x9, [x28]
-	mov	 x23, x1
-	str	x23, [sp, #152]         // 8-byte Folded Spill
-	str	x0, [sp, #128]          // 8-byte Folded Spill
-	sub	x8, x29, #160           // =160
-	mov	 x0, x23
-	mov	 x1, x9
-	bl	.LmulPv576x64
-	ldur	x24, [x29, #-160]
-	ldur	x8, [x29, #-88]
-	str	x8, [sp, #120]          // 8-byte Folded Spill
-	ldur	x8, [x29, #-96]
-	str	x8, [sp, #112]          // 8-byte Folded Spill
-	ldur	x8, [x29, #-104]
+	mov	 x22, x2
+	mov	 x25, x1
+	add	x8, sp, #1344           // =1344
+	str	x19, [sp, #136]         // 8-byte Folded Spill
+	ldr		x1, [x22]
+	str	x0, [sp, #112]          // 8-byte Folded Spill
+	mov	 x0, x25
+	str	x25, [sp, #128]         // 8-byte Folded Spill
+	bl	mulPv512x64
+	ldr	x8, [sp, #1408]
+	ldr	x24, [sp, #1344]
+	mov	 x0, x20
 	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldur	x8, [x29, #-112]
+	ldr	x8, [sp, #1400]
+	mul		x1, x24, x19
 	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-120]
+	ldr	x8, [sp, #1392]
 	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-128]
+	ldr	x8, [sp, #1384]
 	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-136]
+	ldr	x8, [sp, #1376]
 	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-144]
+	ldr	x8, [sp, #1368]
 	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-152]
-	str	x8, [sp, #48]           // 8-byte Folded Spill
-	mul	 x1, x24, x19
-	sub	x8, x29, #240           // =240
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldur	x8, [x29, #-168]
+	ldr	x8, [sp, #1360]
 	str	x8, [sp, #56]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-176]
+	ldr	x8, [sp, #1352]
+	str	x8, [sp, #48]           // 8-byte Folded Spill
+	add	x8, sp, #1264           // =1264
+	bl	mulPv512x64
+	ldr	x8, [sp, #1328]
+	ldr	x26, [sp, #1312]
+	ldr	x27, [sp, #1304]
+	ldr	x28, [sp, #1296]
 	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-184]
+	ldr	x8, [sp, #1320]
+	ldr	x29, [sp, #1288]
+	ldr	x19, [sp, #1280]
+	ldr	x23, [sp, #1272]
 	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-192]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldp	x21, x19, [x29, #-208]
-	ldp	x26, x22, [x29, #-224]
-	ldp	x27, x25, [x29, #-240]
-	ldr	x1, [x28, #8]
-	add	x8, sp, #1360           // =1360
-	mov	 x0, x23
-	bl	.LmulPv576x64
-	cmn	 x27, x24
-	ldr	x8, [sp, #1432]
-	ldr	x9, [sp, #1424]
-	ldr	x10, [sp, #48]          // 8-byte Folded Reload
-	adcs	x10, x25, x10
-	ldr	x11, [sp, #1416]
-	ldp	x12, x14, [sp, #64]
-	adcs	x12, x26, x12
-	ldr	x13, [sp, #1408]
-	adcs	x14, x22, x14
-	ldr	x15, [sp, #1400]
-	ldp	x16, x18, [sp, #80]
-	adcs	x16, x21, x16
-	ldr	x17, [sp, #1392]
-	adcs	x18, x19, x18
-	ldr	x0, [sp, #1384]
-	ldp	x1, x3, [sp, #96]
-	ldp	x2, x4, [sp, #24]
-	adcs	x1, x2, x1
-	ldr	x2, [sp, #1376]
+	ldr	x1, [x22, #8]
+	ldr	x21, [sp, #1264]
+	add	x8, sp, #1184           // =1184
+	mov	 x0, x25
+	str	x22, [sp, #120]         // 8-byte Folded Spill
+	bl	mulPv512x64
+	ldp	x10, x12, [sp, #48]     // 8-byte Folded Reload
+	ldp	x14, x16, [sp, #64]     // 8-byte Folded Reload
+	cmn		x21, x24
+	ldp	x18, x1, [sp, #80]      // 8-byte Folded Reload
+	adcs	x10, x23, x10
+	adcs	x12, x19, x12
+	adcs	x14, x29, x14
+	ldp	x3, x5, [sp, #96]       // 8-byte Folded Reload
+	ldp	x4, x6, [sp, #32]       // 8-byte Folded Reload
+	adcs	x16, x28, x16
+	adcs	x18, x27, x18
+	adcs	x1, x26, x1
+	ldr	x2, [sp, #1184]
 	adcs	x3, x4, x3
-	ldr	x4, [sp, #1360]
-	ldp	x5, x7, [sp, #112]
-	ldr	x6, [sp, #40]           // 8-byte Folded Reload
+	ldr	x4, [sp, #1192]
+	ldr	x0, [sp, #1200]
 	adcs	x5, x6, x5
-	ldr	x6, [sp, #1368]
-	ldr	x19, [sp, #56]          // 8-byte Folded Reload
-	adcs	x7, x19, x7
-	adcs	x19, xzr, xzr
-	adds	 x21, x10, x4
-	adcs	x10, x12, x6
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x2
-	str	x10, [sp, #104]         // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #96]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
-	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	adcs	x8, x19, x8
-	stp	x8, x9, [sp, #112]
-	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #56]
-	ldr	x24, [sp, #144]         // 8-byte Folded Reload
-	mul	 x1, x21, x24
-	add	x8, sp, #1280           // =1280
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #1352]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1344]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1336]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1328]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x26, [sp, #1320]
-	ldr	x27, [sp, #1312]
-	ldr	x28, [sp, #1304]
-	ldr	x22, [sp, #1296]
-	ldr	x19, [sp, #1288]
-	ldr	x23, [sp, #1280]
-	ldr	x25, [sp, #136]         // 8-byte Folded Reload
-	ldr	x1, [x25, #16]
-	add	x8, sp, #1200           // =1200
-	ldr	x0, [sp, #152]          // 8-byte Folded Reload
-	bl	.LmulPv576x64
-	cmn	 x21, x23
-	ldr	x8, [sp, #1272]
-	ldr	x9, [sp, #1264]
-	ldr	x10, [sp, #48]          // 8-byte Folded Reload
-	adcs	x10, x10, x19
-	ldr	x11, [sp, #1256]
-	ldp	x14, x12, [sp, #96]
-	adcs	x12, x12, x22
-	ldr	x13, [sp, #1248]
-	adcs	x14, x14, x28
-	ldr	x15, [sp, #1240]
-	ldp	x18, x16, [sp, #80]
-	adcs	x16, x16, x27
-	ldr	x17, [sp, #1232]
-	adcs	x18, x18, x26
-	ldr	x0, [sp, #1224]
-	ldp	x3, x1, [sp, #64]
-	ldp	x2, x4, [sp, #16]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #1216]
-	adcs	x3, x3, x4
-	ldr	x4, [sp, #1200]
-	ldp	x7, x5, [sp, #112]
-	ldp	x6, x19, [sp, #32]
-	adcs	x5, x5, x6
-	ldr	x6, [sp, #1208]
-	adcs	x7, x7, x19
-	ldr	x19, [sp, #56]          // 8-byte Folded Reload
-	adcs	x19, x19, xzr
-	adds	 x21, x10, x4
-	adcs	x10, x12, x6
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x2
-	str	x10, [sp, #104]         // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #96]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x17, [sp, #1208]
+	adcs	x6, xzr, xzr
+	ldr	x15, [sp, #1216]
+	adds		x19, x10, x2
+	ldr	x13, [sp, #1224]
+	adcs	x10, x12, x4
+	ldr	x11, [sp, #1232]
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x14, x0
+	ldr	x9, [sp, #1240]
 	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
+	ldr	x8, [sp, #1248]
 	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	adcs	x8, x19, x8
-	stp	x8, x9, [sp, #112]
+	adcs	x10, x1, x13
+	str	x10, [sp, #64]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	ldr	x27, [sp, #136]         // 8-byte Folded Reload
+	adcs	x9, x5, x9
+	adcs	x8, x6, x8
+	stp	x8, x9, [sp, #96]       // 8-byte Folded Spill
 	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #56]
-	mul	 x1, x21, x24
-	add	x8, sp, #1120           // =1120
+	stp	x8, x10, [sp, #48]      // 8-byte Folded Spill
+	mul		x1, x19, x27
+	add	x8, sp, #1104           // =1104
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #1192]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1184]
+	bl	mulPv512x64
+	ldr	x8, [sp, #1168]
+	ldr	x28, [sp, #128]         // 8-byte Folded Reload
+	ldr	x23, [sp, #1144]
+	ldr	x24, [sp, #1136]
 	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1176]
+	ldr	x8, [sp, #1160]
+	ldr	x25, [sp, #1128]
+	ldr	x26, [sp, #1120]
+	ldr	x29, [sp, #1112]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1168]
+	ldr	x8, [sp, #1152]
+	ldr	x21, [sp, #1104]
+	mov	 x0, x28
 	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x26, [sp, #1160]
-	ldr	x27, [sp, #1152]
-	ldr	x28, [sp, #1144]
-	ldr	x22, [sp, #1136]
-	ldr	x19, [sp, #1128]
-	ldr	x23, [sp, #1120]
-	ldr	x1, [x25, #24]
-	add	x8, sp, #1040           // =1040
-	ldr	x24, [sp, #152]         // 8-byte Folded Reload
-	mov	 x0, x24
-	bl	.LmulPv576x64
-	cmn	 x21, x23
-	ldr	x8, [sp, #1112]
-	ldr	x9, [sp, #1104]
-	ldr	x10, [sp, #48]          // 8-byte Folded Reload
-	adcs	x10, x10, x19
-	ldr	x11, [sp, #1096]
-	ldp	x14, x12, [sp, #96]
-	adcs	x12, x12, x22
-	ldr	x13, [sp, #1088]
-	adcs	x14, x14, x28
-	ldr	x15, [sp, #1080]
-	ldp	x18, x16, [sp, #80]
-	adcs	x16, x16, x27
-	ldr	x17, [sp, #1072]
-	adcs	x18, x18, x26
-	ldr	x0, [sp, #1064]
-	ldp	x3, x1, [sp, #64]
-	ldp	x2, x4, [sp, #16]
+	ldr	x1, [x22, #16]
+	add	x8, sp, #1024           // =1024
+	bl	mulPv512x64
+	ldr	x10, [sp, #40]          // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #80]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldp	x18, x16, [sp, #64]     // 8-byte Folded Reload
+	adcs	x10, x10, x29
+	adcs	x12, x12, x26
+	ldr	x1, [sp, #56]           // 8-byte Folded Reload
+	ldp	x2, x4, [sp, #16]       // 8-byte Folded Reload
+	adcs	x14, x14, x25
+	ldp	x5, x3, [sp, #96]       // 8-byte Folded Reload
+	adcs	x16, x16, x24
+	ldr	x6, [sp, #32]           // 8-byte Folded Reload
+	adcs	x18, x18, x23
 	adcs	x1, x1, x2
-	ldr	x2, [sp, #1056]
 	adcs	x3, x3, x4
-	ldr	x4, [sp, #1040]
-	ldp	x7, x5, [sp, #112]
-	ldp	x6, x19, [sp, #32]
 	adcs	x5, x5, x6
-	ldr	x6, [sp, #1048]
-	adcs	x7, x7, x19
-	ldr	x19, [sp, #56]          // 8-byte Folded Reload
-	adcs	x19, x19, xzr
-	adds	 x21, x10, x4
-	adcs	x10, x12, x6
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x2
-	str	x10, [sp, #104]         // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #96]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x6, [sp, #48]           // 8-byte Folded Reload
+	ldr	x2, [sp, #1024]
+	ldr	x4, [sp, #1032]
+	ldr	x0, [sp, #1040]
+	ldr	x17, [sp, #1048]
+	adcs	x6, x6, xzr
+	ldr	x15, [sp, #1056]
+	adds		x19, x10, x2
+	ldr	x13, [sp, #1064]
+	adcs	x10, x12, x4
+	ldr	x11, [sp, #1072]
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x14, x0
+	ldr	x9, [sp, #1080]
 	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
+	ldr	x8, [sp, #1088]
 	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	adcs	x8, x19, x8
-	stp	x8, x9, [sp, #112]
+	adcs	x10, x1, x13
+	str	x10, [sp, #64]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	adcs	x9, x5, x9
+	adcs	x8, x6, x8
+	stp	x8, x9, [sp, #96]       // 8-byte Folded Spill
 	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #56]
-	ldr	x8, [sp, #144]          // 8-byte Folded Reload
-	mul	 x1, x21, x8
-	add	x8, sp, #960            // =960
+	stp	x8, x10, [sp, #48]      // 8-byte Folded Spill
+	mul		x1, x19, x27
+	add	x8, sp, #944            // =944
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #1032]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1024]
+	bl	mulPv512x64
+	ldr	x8, [sp, #1008]
+	ldr	x27, [sp, #120]         // 8-byte Folded Reload
+	ldr	x22, [sp, #992]
+	ldr	x23, [sp, #984]
 	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1016]
+	ldr	x8, [sp, #1000]
+	ldr	x24, [sp, #976]
+	ldr	x25, [sp, #968]
+	ldr	x26, [sp, #960]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1008]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x26, [sp, #1000]
-	ldr	x27, [sp, #992]
-	ldr	x28, [sp, #984]
-	ldr	x22, [sp, #976]
-	ldr	x19, [sp, #968]
-	ldr	x23, [sp, #960]
-	ldr	x1, [x25, #32]
-	add	x8, sp, #880            // =880
-	mov	 x0, x24
-	bl	.LmulPv576x64
-	cmn	 x21, x23
-	ldr	x8, [sp, #952]
-	ldr	x9, [sp, #944]
-	ldr	x10, [sp, #48]          // 8-byte Folded Reload
-	adcs	x10, x10, x19
-	ldr	x11, [sp, #936]
-	ldp	x14, x12, [sp, #96]
-	adcs	x12, x12, x22
-	ldr	x13, [sp, #928]
-	adcs	x14, x14, x28
-	ldr	x15, [sp, #920]
-	ldp	x18, x16, [sp, #80]
-	adcs	x16, x16, x27
-	ldr	x17, [sp, #912]
-	adcs	x18, x18, x26
-	ldr	x0, [sp, #904]
-	ldp	x3, x1, [sp, #64]
-	ldp	x2, x4, [sp, #16]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #896]
+	ldr	x1, [x27, #24]
+	ldr	x29, [sp, #952]
+	ldr	x21, [sp, #944]
+	add	x8, sp, #864            // =864
+	mov	 x0, x28
+	bl	mulPv512x64
+	ldr	x10, [sp, #40]          // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #80]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldp	x18, x16, [sp, #64]     // 8-byte Folded Reload
+	adcs	x10, x10, x29
+	adcs	x12, x12, x26
+	ldr	x1, [sp, #56]           // 8-byte Folded Reload
+	adcs	x14, x14, x25
+	ldp	x5, x3, [sp, #96]       // 8-byte Folded Reload
+	ldp	x4, x6, [sp, #24]       // 8-byte Folded Reload
+	adcs	x16, x16, x24
+	adcs	x18, x18, x23
+	adcs	x1, x1, x22
 	adcs	x3, x3, x4
-	ldr	x4, [sp, #880]
-	ldp	x7, x5, [sp, #112]
-	ldp	x6, x19, [sp, #32]
 	adcs	x5, x5, x6
-	ldr	x6, [sp, #888]
-	adcs	x7, x7, x19
-	ldr	x19, [sp, #56]          // 8-byte Folded Reload
-	adcs	x19, x19, xzr
-	adds	 x21, x10, x4
-	adcs	x10, x12, x6
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x2
-	str	x10, [sp, #104]         // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #96]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x6, [sp, #48]           // 8-byte Folded Reload
+	ldr	x2, [sp, #864]
+	ldr	x4, [sp, #872]
+	ldr	x0, [sp, #880]
+	ldr	x17, [sp, #888]
+	adcs	x6, x6, xzr
+	ldr	x15, [sp, #896]
+	adds		x19, x10, x2
+	ldr	x13, [sp, #904]
+	adcs	x10, x12, x4
+	ldr	x11, [sp, #912]
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x14, x0
+	ldr	x9, [sp, #920]
 	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
+	ldr	x8, [sp, #928]
 	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	adcs	x8, x19, x8
-	stp	x8, x9, [sp, #112]
+	adcs	x10, x1, x13
+	str	x10, [sp, #64]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	ldr	x28, [sp, #136]         // 8-byte Folded Reload
+	adcs	x9, x5, x9
+	adcs	x8, x6, x8
+	stp	x8, x9, [sp, #96]       // 8-byte Folded Spill
 	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #56]
-	ldr	x25, [sp, #144]         // 8-byte Folded Reload
-	mul	 x1, x21, x25
-	add	x8, sp, #800            // =800
+	stp	x8, x10, [sp, #48]      // 8-byte Folded Spill
+	mul		x1, x19, x28
+	add	x8, sp, #784            // =784
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #872]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #864]
+	bl	mulPv512x64
+	ldr	x8, [sp, #848]
+	ldr	x22, [sp, #832]
+	ldr	x23, [sp, #824]
+	ldr	x24, [sp, #816]
 	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #856]
+	ldr	x8, [sp, #840]
+	ldr	x25, [sp, #808]
+	ldr	x26, [sp, #800]
+	ldr	x29, [sp, #792]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #848]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x26, [sp, #840]
-	ldr	x27, [sp, #832]
-	ldr	x28, [sp, #824]
-	ldr	x22, [sp, #816]
-	ldr	x19, [sp, #808]
-	ldr	x23, [sp, #800]
-	ldr	x24, [sp, #136]         // 8-byte Folded Reload
-	ldr	x1, [x24, #40]
-	add	x8, sp, #720            // =720
-	ldr	x0, [sp, #152]          // 8-byte Folded Reload
-	bl	.LmulPv576x64
-	cmn	 x21, x23
-	ldr	x8, [sp, #792]
-	ldr	x9, [sp, #784]
-	ldr	x10, [sp, #48]          // 8-byte Folded Reload
-	adcs	x10, x10, x19
-	ldr	x11, [sp, #776]
-	ldp	x14, x12, [sp, #96]
-	adcs	x12, x12, x22
-	ldr	x13, [sp, #768]
-	adcs	x14, x14, x28
-	ldr	x15, [sp, #760]
-	ldp	x18, x16, [sp, #80]
-	adcs	x16, x16, x27
-	ldr	x17, [sp, #752]
-	adcs	x18, x18, x26
-	ldr	x0, [sp, #744]
-	ldp	x3, x1, [sp, #64]
-	ldp	x2, x4, [sp, #16]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #736]
+	ldr	x1, [x27, #32]
+	ldr	x27, [sp, #128]         // 8-byte Folded Reload
+	ldr	x21, [sp, #784]
+	add	x8, sp, #704            // =704
+	mov	 x0, x27
+	bl	mulPv512x64
+	ldr	x10, [sp, #40]          // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #80]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldp	x18, x16, [sp, #64]     // 8-byte Folded Reload
+	adcs	x10, x10, x29
+	adcs	x12, x12, x26
+	ldr	x1, [sp, #56]           // 8-byte Folded Reload
+	adcs	x14, x14, x25
+	ldp	x5, x3, [sp, #96]       // 8-byte Folded Reload
+	ldp	x4, x6, [sp, #24]       // 8-byte Folded Reload
+	adcs	x16, x16, x24
+	adcs	x18, x18, x23
+	adcs	x1, x1, x22
 	adcs	x3, x3, x4
-	ldr	x4, [sp, #720]
-	ldp	x7, x5, [sp, #112]
-	ldp	x6, x19, [sp, #32]
 	adcs	x5, x5, x6
-	ldr	x6, [sp, #728]
-	adcs	x7, x7, x19
-	ldr	x19, [sp, #56]          // 8-byte Folded Reload
-	adcs	x19, x19, xzr
-	adds	 x21, x10, x4
-	adcs	x10, x12, x6
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x2
-	str	x10, [sp, #104]         // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #96]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x6, [sp, #48]           // 8-byte Folded Reload
+	ldr	x2, [sp, #704]
+	ldr	x4, [sp, #712]
+	ldr	x0, [sp, #720]
+	ldr	x17, [sp, #728]
+	adcs	x6, x6, xzr
+	ldr	x15, [sp, #736]
+	adds		x19, x10, x2
+	ldr	x13, [sp, #744]
+	adcs	x10, x12, x4
+	ldr	x11, [sp, #752]
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x14, x0
+	ldr	x9, [sp, #760]
 	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
+	ldr	x8, [sp, #768]
 	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	adcs	x8, x19, x8
-	stp	x8, x9, [sp, #112]
+	adcs	x10, x1, x13
+	str	x10, [sp, #64]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	adcs	x9, x5, x9
+	adcs	x8, x6, x8
+	stp	x8, x9, [sp, #96]       // 8-byte Folded Spill
 	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #56]
-	mul	 x1, x21, x25
-	add	x8, sp, #640            // =640
+	stp	x8, x10, [sp, #48]      // 8-byte Folded Spill
+	mul		x1, x19, x28
+	add	x8, sp, #624            // =624
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #712]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #704]
+	bl	mulPv512x64
+	ldr	x8, [sp, #688]
+	ldr	x28, [sp, #120]         // 8-byte Folded Reload
+	ldr	x22, [sp, #672]
+	ldr	x23, [sp, #664]
 	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #696]
+	ldr	x8, [sp, #680]
+	ldr	x24, [sp, #656]
+	ldr	x25, [sp, #648]
+	ldr	x26, [sp, #640]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #688]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x26, [sp, #680]
-	ldr	x27, [sp, #672]
-	ldr	x28, [sp, #664]
-	ldr	x22, [sp, #656]
-	ldr	x19, [sp, #648]
-	ldr	x23, [sp, #640]
-	ldr	x1, [x24, #48]
-	add	x8, sp, #560            // =560
-	ldr	x25, [sp, #152]         // 8-byte Folded Reload
-	mov	 x0, x25
-	bl	.LmulPv576x64
-	cmn	 x21, x23
-	ldr	x8, [sp, #632]
-	ldr	x9, [sp, #624]
-	ldr	x10, [sp, #48]          // 8-byte Folded Reload
-	adcs	x10, x10, x19
-	ldr	x11, [sp, #616]
-	ldp	x14, x12, [sp, #96]
-	adcs	x12, x12, x22
-	ldr	x13, [sp, #608]
-	adcs	x14, x14, x28
-	ldr	x15, [sp, #600]
-	ldp	x18, x16, [sp, #80]
-	adcs	x16, x16, x27
-	ldr	x17, [sp, #592]
-	adcs	x18, x18, x26
-	ldr	x0, [sp, #584]
-	ldp	x3, x1, [sp, #64]
-	ldp	x2, x4, [sp, #16]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #576]
+	ldr	x1, [x28, #40]
+	ldr	x29, [sp, #632]
+	ldr	x21, [sp, #624]
+	add	x8, sp, #544            // =544
+	mov	 x0, x27
+	bl	mulPv512x64
+	ldr	x10, [sp, #40]          // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #80]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldp	x18, x16, [sp, #64]     // 8-byte Folded Reload
+	adcs	x10, x10, x29
+	adcs	x12, x12, x26
+	ldr	x1, [sp, #56]           // 8-byte Folded Reload
+	adcs	x14, x14, x25
+	ldp	x5, x3, [sp, #96]       // 8-byte Folded Reload
+	ldp	x4, x6, [sp, #24]       // 8-byte Folded Reload
+	adcs	x16, x16, x24
+	adcs	x18, x18, x23
+	adcs	x1, x1, x22
 	adcs	x3, x3, x4
-	ldr	x4, [sp, #560]
-	ldp	x7, x5, [sp, #112]
-	ldp	x6, x19, [sp, #32]
 	adcs	x5, x5, x6
-	ldr	x6, [sp, #568]
-	adcs	x7, x7, x19
-	ldr	x19, [sp, #56]          // 8-byte Folded Reload
-	adcs	x19, x19, xzr
-	adds	 x21, x10, x4
-	adcs	x10, x12, x6
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x2
-	str	x10, [sp, #104]         // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #96]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x6, [sp, #48]           // 8-byte Folded Reload
+	ldr	x2, [sp, #544]
+	ldr	x4, [sp, #552]
+	ldr	x0, [sp, #560]
+	ldr	x17, [sp, #568]
+	adcs	x6, x6, xzr
+	ldr	x15, [sp, #576]
+	adds		x19, x10, x2
+	ldr	x13, [sp, #584]
+	adcs	x10, x12, x4
+	ldr	x11, [sp, #592]
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x14, x0
+	ldr	x9, [sp, #600]
 	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
+	ldr	x8, [sp, #608]
 	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	adcs	x8, x19, x8
-	stp	x8, x9, [sp, #112]
+	adcs	x10, x1, x13
+	str	x10, [sp, #64]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	ldr	x27, [sp, #136]         // 8-byte Folded Reload
+	adcs	x9, x5, x9
+	adcs	x8, x6, x8
+	stp	x8, x9, [sp, #96]       // 8-byte Folded Spill
 	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #56]
-	ldr	x24, [sp, #144]         // 8-byte Folded Reload
-	mul	 x1, x21, x24
-	add	x8, sp, #480            // =480
+	stp	x8, x10, [sp, #48]      // 8-byte Folded Spill
+	mul		x1, x19, x27
+	add	x8, sp, #464            // =464
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #552]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [sp, #544]
+	bl	mulPv512x64
+	ldr	x8, [sp, #528]
+	ldp	x23, x22, [sp, #504]
+	ldp	x25, x24, [sp, #488]
+	ldp	x29, x26, [sp, #472]
 	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #536]
+	ldr	x8, [sp, #520]
+	ldr	x21, [sp, #464]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #528]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x26, [sp, #520]
-	ldr	x27, [sp, #512]
-	ldp	x22, x28, [sp, #496]
-	ldp	x23, x19, [sp, #480]
-	ldr	x8, [sp, #136]          // 8-byte Folded Reload
-	ldr	x1, [x8, #56]
-	add	x8, sp, #400            // =400
-	mov	 x0, x25
-	bl	.LmulPv576x64
-	cmn	 x21, x23
-	ldp	x9, x8, [sp, #464]
-	ldr	x10, [sp, #48]          // 8-byte Folded Reload
-	adcs	x10, x10, x19
-	ldp	x13, x11, [sp, #448]
-	ldp	x14, x12, [sp, #96]
-	adcs	x12, x12, x22
-	adcs	x14, x14, x28
-	ldp	x17, x15, [sp, #432]
-	ldp	x18, x16, [sp, #80]
-	adcs	x16, x16, x27
-	adcs	x18, x18, x26
-	ldp	x3, x1, [sp, #64]
-	ldp	x2, x4, [sp, #16]
-	adcs	x1, x1, x2
-	ldp	x2, x0, [sp, #416]
+	ldr	x1, [x28, #48]
+	ldr	x28, [sp, #128]         // 8-byte Folded Reload
+	add	x8, sp, #384            // =384
+	mov	 x0, x28
+	bl	mulPv512x64
+	ldr	x10, [sp, #40]          // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #80]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldp	x18, x16, [sp, #64]     // 8-byte Folded Reload
+	adcs	x10, x10, x29
+	adcs	x12, x12, x26
+	ldr	x1, [sp, #56]           // 8-byte Folded Reload
+	adcs	x14, x14, x25
+	ldp	x5, x3, [sp, #96]       // 8-byte Folded Reload
+	ldp	x4, x6, [sp, #24]       // 8-byte Folded Reload
+	adcs	x16, x16, x24
+	adcs	x18, x18, x23
+	adcs	x1, x1, x22
 	adcs	x3, x3, x4
-	ldp	x7, x5, [sp, #112]
-	ldp	x6, x19, [sp, #32]
 	adcs	x5, x5, x6
-	ldp	x4, x6, [sp, #400]
-	adcs	x7, x7, x19
-	ldr	x19, [sp, #56]          // 8-byte Folded Reload
-	adcs	x19, x19, xzr
-	adds	 x21, x10, x4
-	adcs	x10, x12, x6
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x14, x2
-	str	x10, [sp, #104]         // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #96]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x6, [sp, #48]           // 8-byte Folded Reload
+	ldr	x2, [sp, #384]
+	ldp	x4, x0, [sp, #392]
+	ldp	x17, x15, [sp, #408]
+	adcs	x6, x6, xzr
+	adds		x19, x10, x2
+	ldp	x13, x11, [sp, #424]
+	adcs	x10, x12, x4
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x14, x0
+	ldp	x9, x8, [sp, #440]
 	str	x10, [sp, #88]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
 	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	adcs	x8, x19, x8
-	stp	x8, x9, [sp, #112]
+	adcs	x10, x1, x13
+	str	x10, [sp, #64]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	adcs	x9, x5, x9
+	adcs	x8, x6, x8
+	stp	x8, x9, [sp, #96]       // 8-byte Folded Spill
 	adcs	x8, xzr, xzr
-	stp	x8, x10, [sp, #56]
-	mul	 x1, x21, x24
-	add	x8, sp, #320            // =320
+	stp	x8, x10, [sp, #48]      // 8-byte Folded Spill
+	mul		x1, x19, x27
+	add	x8, sp, #304            // =304
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #392]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldp	x24, x8, [sp, #376]
+	bl	mulPv512x64
+	ldp	x27, x8, [sp, #360]
+	ldp	x23, x22, [sp, #344]
+	ldp	x25, x24, [sp, #328]
+	ldp	x29, x26, [sp, #312]
 	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldp	x26, x25, [sp, #360]
-	ldp	x28, x27, [sp, #344]
-	ldp	x19, x22, [sp, #328]
-	ldr	x23, [sp, #320]
-	ldr	x8, [sp, #136]          // 8-byte Folded Reload
-	ldr	x1, [x8, #64]
-	add	x8, sp, #240            // =240
-	ldr	x0, [sp, #152]          // 8-byte Folded Reload
-	bl	.LmulPv576x64
-	cmn	 x21, x23
-	ldp	x9, x8, [sp, #304]
-	ldr	x10, [sp, #48]          // 8-byte Folded Reload
-	adcs	x10, x10, x19
-	ldp	x13, x11, [sp, #288]
-	ldp	x14, x12, [sp, #96]
-	adcs	x12, x12, x22
-	adcs	x14, x14, x28
-	ldp	x17, x15, [sp, #272]
-	ldp	x18, x16, [sp, #80]
-	adcs	x16, x16, x27
-	adcs	x18, x18, x26
-	ldp	x2, x0, [sp, #256]
-	ldp	x3, x1, [sp, #64]
-	adcs	x1, x1, x25
-	adcs	x3, x3, x24
-	ldp	x7, x5, [sp, #112]
-	ldp	x6, x19, [sp, #32]
+	ldr	x8, [sp, #120]          // 8-byte Folded Reload
+	ldr	x21, [sp, #304]
+	mov	 x0, x28
+	ldr	x1, [x8, #56]
+	add	x8, sp, #224            // =224
+	bl	mulPv512x64
+	ldr	x10, [sp, #40]          // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #80]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldp	x18, x16, [sp, #64]     // 8-byte Folded Reload
+	adcs	x10, x10, x29
+	adcs	x12, x12, x26
+	ldr	x1, [sp, #56]           // 8-byte Folded Reload
+	adcs	x14, x14, x25
+	ldp	x5, x3, [sp, #96]       // 8-byte Folded Reload
+	adcs	x16, x16, x24
+	ldr	x6, [sp, #32]           // 8-byte Folded Reload
+	adcs	x18, x18, x23
+	adcs	x1, x1, x22
+	adcs	x3, x3, x27
 	adcs	x5, x5, x6
-	ldp	x4, x6, [sp, #240]
-	adcs	x7, x7, x19
-	ldr	x19, [sp, #56]          // 8-byte Folded Reload
-	adcs	x19, x19, xzr
-	adds	 x21, x10, x4
-	adcs	x22, x12, x6
-	adcs	x23, x14, x2
-	adcs	x24, x16, x0
-	adcs	x25, x18, x17
-	adcs	x26, x1, x15
-	adcs	x27, x3, x13
-	adcs	x10, x5, x11
-	str	x10, [sp, #152]         // 8-byte Folded Spill
-	adcs	x9, x7, x9
-	str	x9, [sp, #136]          // 8-byte Folded Spill
-	adcs	x19, x19, x8
-	adcs	x28, xzr, xzr
-	ldr	x8, [sp, #144]          // 8-byte Folded Reload
-	mul	 x1, x21, x8
-	add	x8, sp, #160            // =160
+	ldr	x6, [sp, #48]           // 8-byte Folded Reload
+	ldr	x2, [sp, #224]
+	ldp	x4, x0, [sp, #232]
+	ldp	x17, x15, [sp, #248]
+	adcs	x6, x6, xzr
+	adds		x19, x10, x2
+	ldp	x13, x11, [sp, #264]
+	adcs	x21, x12, x4
+	adcs	x22, x14, x0
+	ldp	x9, x8, [sp, #280]
+	adcs	x23, x16, x17
+	adcs	x24, x18, x15
+	adcs	x25, x1, x13
+	adcs	x26, x3, x11
+	adcs	x27, x5, x9
+	adcs	x28, x6, x8
+	ldr	x8, [sp, #136]          // 8-byte Folded Reload
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x16, x8, [sp, #224]
-	ldp	x9, x10, [sp, #160]
-	ldp	x11, x12, [sp, #176]
-	cmn	 x21, x9
-	ldp	x13, x9, [sp, #192]
-	adcs	x10, x22, x10
-	ldp	x14, x15, [sp, #208]
-	adcs	x11, x23, x11
+	adcs	x29, xzr, xzr
+	mul		x1, x19, x8
+	add	x8, sp, #144            // =144
+	bl	mulPv512x64
+	ldp	x13, x16, [sp, #144]
+	ldr	x15, [sp, #160]
+	ldp	x14, x12, [sp, #168]
+	ldp	x11, x10, [sp, #184]
+	cmn		x19, x13
+	adcs	x16, x21, x16
+	adcs	x15, x22, x15
+	ldp	x9, x8, [sp, #200]
+	adcs	x14, x23, x14
 	adcs	x12, x24, x12
-	adcs	x13, x25, x13
-	adcs	x9, x26, x9
-	adcs	x14, x27, x14
-	ldp	x0, x17, [x20, #56]
-	ldp	x2, x18, [x20, #40]
-	ldp	x4, x1, [x20, #24]
-	ldp	x6, x3, [x20, #8]
-	ldr	 x5, [x20]
-	ldr	x7, [sp, #152]          // 8-byte Folded Reload
-	adcs	x15, x7, x15
-	ldr	x7, [sp, #136]          // 8-byte Folded Reload
-	adcs	x16, x7, x16
-	adcs	x8, x19, x8
-	adcs	x7, x28, xzr
-	subs	 x5, x10, x5
-	sbcs	x6, x11, x6
-	sbcs	x3, x12, x3
-	sbcs	x4, x13, x4
-	sbcs	x1, x9, x1
-	sbcs	x2, x14, x2
-	sbcs	x18, x15, x18
-	sbcs	x0, x16, x0
+	adcs	x11, x25, x11
+	ldp		x3, x4, [x20]
+	adcs	x10, x26, x10
+	adcs	x9, x27, x9
+	ldp	x1, x2, [x20, #16]
+	adcs	x8, x28, x8
+	adcs	x5, x29, xzr
+	ldp	x18, x0, [x20, #32]
+	subs		x3, x16, x3
+	sbcs	x4, x15, x4
+	ldp	x13, x17, [x20, #48]
+	sbcs	x1, x14, x1
+	sbcs	x2, x12, x2
+	sbcs	x18, x11, x18
+	sbcs	x0, x10, x0
+	sbcs	x13, x9, x13
 	sbcs	x17, x8, x17
-	sbcs	x7, x7, xzr
-	tst	 x7, #0x1
-	csel	x10, x10, x5, ne
-	csel	x11, x11, x6, ne
-	csel	x12, x12, x3, ne
-	csel	x13, x13, x4, ne
-	csel	x9, x9, x1, ne
-	csel	x14, x14, x2, ne
-	csel	x15, x15, x18, ne
-	csel	x16, x16, x0, ne
+	sbcs	x5, x5, xzr
+	tst	 x5, #0x1
+	csel	x9, x9, x13, ne
+	ldr	x13, [sp, #112]         // 8-byte Folded Reload
+	csel	x16, x16, x3, ne
+	csel	x15, x15, x4, ne
+	csel	x14, x14, x1, ne
+	csel	x12, x12, x2, ne
+	csel	x11, x11, x18, ne
+	csel	x10, x10, x0, ne
 	csel	x8, x8, x17, ne
-	ldr	x17, [sp, #128]         // 8-byte Folded Reload
-	stp	 x10, x11, [x17]
-	stp	x12, x13, [x17, #16]
-	stp	x9, x14, [x17, #32]
-	stp	x15, x16, [x17, #48]
-	str	x8, [x17, #64]
-	sub	sp, x29, #80            // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
+	stp		x16, x15, [x13]
+	stp	x14, x12, [x13, #16]
+	stp	x11, x10, [x13, #32]
+	stp	x9, x8, [x13, #48]
+	add	sp, sp, #1424           // =1424
+	ldp	x29, x30, [sp, #80]     // 8-byte Folded Reload
+	ldp	x20, x19, [sp, #64]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #48]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #32]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #16]     // 8-byte Folded Reload
+	ldp	x28, x27, [sp], #96     // 8-byte Folded Reload
 	ret
-.Lfunc_end130:
-	.size	mcl_fp_mont9L, .Lfunc_end130-mcl_fp_mont9L
+.Lfunc_end60:
+	.size	mcl_fp_mont8L, .Lfunc_end60-mcl_fp_mont8L
 
-	.globl	mcl_fp_montNF9L
-	.align	2
-	.type	mcl_fp_montNF9L,@function
-mcl_fp_montNF9L:                        // @mcl_fp_montNF9L
+	.globl	mcl_fp_montNF8L
+	.p2align	2
+	.type	mcl_fp_montNF8L,@function
+mcl_fp_montNF8L:                        // @mcl_fp_montNF8L
 // BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	add	x29, sp, #80            // =80
-	sub	sp, sp, #1584           // =1584
+	stp	x28, x27, [sp, #-96]!   // 8-byte Folded Spill
+	stp	x26, x25, [sp, #16]     // 8-byte Folded Spill
+	stp	x24, x23, [sp, #32]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #48]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #64]     // 8-byte Folded Spill
+	stp	x29, x30, [sp, #80]     // 8-byte Folded Spill
+	sub	sp, sp, #1408           // =1408
 	mov	 x20, x3
-	mov	 x28, x2
-	str	x28, [sp, #120]         // 8-byte Folded Spill
 	ldur	x19, [x20, #-8]
-	str	x19, [sp, #128]         // 8-byte Folded Spill
-	ldr	 x9, [x28]
-	mov	 x23, x1
-	str	x23, [sp, #136]         // 8-byte Folded Spill
-	str	x0, [sp, #112]          // 8-byte Folded Spill
-	sub	x8, x29, #160           // =160
-	mov	 x0, x23
-	mov	 x1, x9
-	bl	.LmulPv576x64
-	ldur	x24, [x29, #-160]
-	ldur	x8, [x29, #-88]
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldur	x8, [x29, #-96]
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-104]
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-112]
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-120]
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-128]
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-136]
-	str	x8, [sp, #56]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-144]
-	str	x8, [sp, #48]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-152]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	mul	 x1, x24, x19
-	sub	x8, x29, #240           // =240
+	mov	 x26, x2
+	mov	 x22, x1
+	add	x8, sp, #1328           // =1328
+	str	x19, [sp, #120]         // 8-byte Folded Spill
+	ldr		x1, [x26]
+	str	x0, [sp, #96]           // 8-byte Folded Spill
+	mov	 x0, x22
+	str	x22, [sp, #112]         // 8-byte Folded Spill
+	bl	mulPv512x64
+	ldr	x8, [sp, #1392]
+	ldr	x24, [sp, #1328]
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldur	x8, [x29, #-168]
-	str	x8, [sp, #40]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-176]
-	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-184]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldur	x8, [x29, #-192]
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	ldp	x21, x19, [x29, #-208]
-	ldp	x26, x22, [x29, #-224]
-	ldp	x27, x25, [x29, #-240]
-	ldr	x1, [x28, #8]
-	add	x8, sp, #1344           // =1344
-	mov	 x0, x23
-	bl	.LmulPv576x64
-	cmn	 x27, x24
-	ldr	x8, [sp, #1416]
-	ldr	x9, [sp, #1408]
-	ldr	x10, [sp, #32]          // 8-byte Folded Reload
-	adcs	x10, x25, x10
-	ldr	x11, [sp, #1400]
-	ldp	x12, x14, [sp, #48]
-	adcs	x12, x26, x12
-	ldr	x13, [sp, #1392]
-	adcs	x14, x22, x14
-	ldr	x15, [sp, #1384]
-	ldp	x16, x18, [sp, #64]
-	adcs	x16, x21, x16
-	ldr	x17, [sp, #1376]
-	adcs	x18, x19, x18
-	ldr	x0, [sp, #1368]
-	ldp	x1, x3, [sp, #80]
-	ldp	x2, x4, [sp, #8]
-	adcs	x1, x2, x1
-	ldr	x2, [sp, #1352]
-	adcs	x3, x4, x3
-	ldr	x4, [sp, #1344]
-	ldp	x5, x7, [sp, #96]
-	ldr	x6, [sp, #24]           // 8-byte Folded Reload
-	adcs	x5, x6, x5
-	ldr	x6, [sp, #1360]
-	ldr	x19, [sp, #40]          // 8-byte Folded Reload
-	adcs	x7, x19, x7
-	adds	 x19, x10, x4
-	adcs	x10, x12, x2
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x6
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
-	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
-	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
-	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	stp	x9, x10, [sp, #96]
-	adcs	x8, x8, xzr
 	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x22, [sp, #128]         // 8-byte Folded Reload
-	mul	 x1, x19, x22
-	add	x8, sp, #1264           // =1264
-	mov	 x0, x20
-	bl	.LmulPv576x64
+	ldr	x8, [sp, #1384]
+	mul		x1, x24, x19
+	str	x8, [sp, #80]           // 8-byte Folded Spill
+	ldr	x8, [sp, #1376]
+	str	x8, [sp, #72]           // 8-byte Folded Spill
+	ldr	x8, [sp, #1368]
+	str	x8, [sp, #64]           // 8-byte Folded Spill
+	ldr	x8, [sp, #1360]
+	str	x8, [sp, #56]           // 8-byte Folded Spill
+	ldr	x8, [sp, #1352]
+	str	x8, [sp, #48]           // 8-byte Folded Spill
+	ldr	x8, [sp, #1344]
+	str	x8, [sp, #40]           // 8-byte Folded Spill
 	ldr	x8, [sp, #1336]
 	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1328]
+	add	x8, sp, #1248           // =1248
+	bl	mulPv512x64
+	ldr	x8, [sp, #1312]
+	ldr	x27, [sp, #1288]
+	ldr	x28, [sp, #1280]
+	ldr	x29, [sp, #1272]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1320]
+	ldr	x8, [sp, #1304]
+	ldr	x19, [sp, #1264]
+	ldr	x23, [sp, #1256]
+	ldr	x21, [sp, #1248]
 	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1312]
+	ldr	x8, [sp, #1296]
+	mov	 x0, x22
 	str	x8, [sp, #8]            // 8-byte Folded Spill
-	ldr	x24, [sp, #1304]
-	ldr	x25, [sp, #1296]
-	ldr	x26, [sp, #1288]
-	ldr	x21, [sp, #1280]
-	ldr	x27, [sp, #1272]
-	ldr	x28, [sp, #1264]
-	ldr	x23, [sp, #120]         // 8-byte Folded Reload
-	ldr	x1, [x23, #16]
-	add	x8, sp, #1184           // =1184
-	ldr	x0, [sp, #136]          // 8-byte Folded Reload
-	bl	.LmulPv576x64
-	cmn	 x19, x28
-	ldr	x8, [sp, #1256]
-	ldr	x9, [sp, #1248]
-	ldp	x10, x1, [sp, #40]
-	adcs	x10, x10, x27
-	ldr	x11, [sp, #1240]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x21
-	ldr	x13, [sp, #1232]
-	adcs	x14, x14, x26
-	ldr	x15, [sp, #1224]
-	ldp	x18, x16, [sp, #56]
-	adcs	x16, x16, x25
-	ldr	x17, [sp, #1216]
-	adcs	x18, x18, x24
-	ldr	x0, [sp, #1208]
-	ldp	x2, x4, [sp, #8]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #1192]
-	ldp	x5, x3, [sp, #96]
-	adcs	x3, x3, x4
+	ldr	x1, [x26, #8]
+	add	x8, sp, #1168           // =1168
+	str	x26, [sp, #104]         // 8-byte Folded Spill
+	bl	mulPv512x64
+	ldp	x10, x12, [sp, #32]     // 8-byte Folded Reload
+	ldp	x14, x16, [sp, #48]     // 8-byte Folded Reload
+	cmn		x21, x24
+	ldp	x18, x1, [sp, #64]      // 8-byte Folded Reload
+	adcs	x10, x23, x10
+	adcs	x12, x19, x12
+	ldp	x2, x4, [sp, #8]        // 8-byte Folded Reload
+	adcs	x14, x29, x14
+	adcs	x16, x28, x16
+	ldp	x3, x5, [sp, #80]       // 8-byte Folded Reload
+	adcs	x18, x27, x18
+	ldr	x6, [sp, #24]           // 8-byte Folded Reload
+	adcs	x1, x2, x1
+	ldr	x2, [sp, #1168]
+	ldr	x0, [sp, #1176]
+	adcs	x3, x4, x3
 	ldr	x4, [sp, #1184]
-	ldp	x6, x19, [sp, #24]
-	adcs	x5, x5, x6
-	ldr	x6, [sp, #1200]
-	ldr	x7, [sp, #88]           // 8-byte Folded Reload
-	adcs	x7, x7, x19
-	adds	 x19, x10, x4
-	adcs	x10, x12, x2
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x6
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x17, [sp, #1192]
+	adcs	x5, x6, x5
+	ldr	x15, [sp, #1200]
+	adds		x19, x10, x2
+	ldr	x13, [sp, #1208]
+	adcs	x10, x12, x0
+	ldr	x11, [sp, #1216]
+	str	x10, [sp, #32]          // 8-byte Folded Spill
+	adcs	x10, x14, x4
+	ldr	x9, [sp, #1224]
 	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
+	ldr	x8, [sp, #1232]
 	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	stp	x9, x10, [sp, #96]
+	adcs	x10, x1, x13
+	ldr	x29, [sp, #120]         // 8-byte Folded Reload
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	adcs	x9, x5, x9
 	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	mul	 x1, x19, x22
-	add	x8, sp, #1104           // =1104
+	str	x8, [sp, #72]           // 8-byte Folded Spill
+	mul		x1, x19, x29
+	add	x8, sp, #1088           // =1088
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #1176]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1168]
+	stp	x9, x10, [sp, #80]      // 8-byte Folded Spill
+	bl	mulPv512x64
+	ldr	x8, [sp, #1152]
+	ldr	x27, [sp, #1136]
+	ldr	x28, [sp, #1128]
+	ldr	x22, [sp, #1120]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1160]
+	ldr	x8, [sp, #1144]
+	ldr	x23, [sp, #1112]
+	ldr	x24, [sp, #1104]
+	ldr	x25, [sp, #1096]
 	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1152]
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	ldr	x24, [sp, #1144]
-	ldr	x25, [sp, #1136]
-	ldr	x26, [sp, #1128]
-	ldr	x21, [sp, #1120]
-	ldr	x27, [sp, #1112]
-	ldr	x28, [sp, #1104]
-	ldr	x1, [x23, #24]
-	add	x8, sp, #1024           // =1024
-	ldr	x22, [sp, #136]         // 8-byte Folded Reload
-	mov	 x0, x22
-	bl	.LmulPv576x64
-	cmn	 x19, x28
-	ldr	x8, [sp, #1096]
-	ldr	x9, [sp, #1088]
-	ldp	x10, x1, [sp, #40]
-	adcs	x10, x10, x27
-	ldr	x11, [sp, #1080]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x21
-	ldr	x13, [sp, #1072]
-	adcs	x14, x14, x26
-	ldr	x15, [sp, #1064]
-	ldp	x18, x16, [sp, #56]
-	adcs	x16, x16, x25
-	ldr	x17, [sp, #1056]
-	adcs	x18, x18, x24
-	ldr	x0, [sp, #1048]
-	ldp	x2, x4, [sp, #8]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #1032]
-	ldp	x5, x3, [sp, #96]
+	ldr	x1, [x26, #16]
+	ldr	x26, [sp, #112]         // 8-byte Folded Reload
+	ldr	x21, [sp, #1088]
+	add	x8, sp, #1008           // =1008
+	mov	 x0, x26
+	bl	mulPv512x64
+	ldp	x10, x18, [sp, #32]     // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #56]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldr	x16, [sp, #48]          // 8-byte Folded Reload
+	adcs	x10, x10, x25
+	adcs	x12, x12, x24
+	ldp	x3, x1, [sp, #80]       // 8-byte Folded Reload
+	adcs	x14, x14, x23
+	ldp	x4, x6, [sp, #16]       // 8-byte Folded Reload
+	adcs	x16, x16, x22
+	ldr	x5, [sp, #72]           // 8-byte Folded Reload
+	adcs	x18, x18, x28
+	ldr	x2, [sp, #1008]
+	ldr	x0, [sp, #1016]
+	adcs	x1, x1, x27
 	adcs	x3, x3, x4
 	ldr	x4, [sp, #1024]
-	ldp	x6, x19, [sp, #24]
+	ldr	x17, [sp, #1032]
 	adcs	x5, x5, x6
-	ldr	x6, [sp, #1040]
-	ldr	x7, [sp, #88]           // 8-byte Folded Reload
-	adcs	x7, x7, x19
-	adds	 x19, x10, x4
-	adcs	x10, x12, x2
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x6
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x15, [sp, #1040]
+	adds		x19, x10, x2
+	ldr	x13, [sp, #1048]
+	adcs	x10, x12, x0
+	ldr	x11, [sp, #1056]
+	str	x10, [sp, #32]          // 8-byte Folded Spill
+	adcs	x10, x14, x4
+	ldr	x9, [sp, #1064]
 	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
+	ldr	x8, [sp, #1072]
 	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	stp	x9, x10, [sp, #96]
+	adcs	x10, x1, x13
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	adcs	x9, x5, x9
 	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x8, [sp, #128]          // 8-byte Folded Reload
-	mul	 x1, x19, x8
-	add	x8, sp, #944            // =944
+	str	x8, [sp, #72]           // 8-byte Folded Spill
+	mul		x1, x19, x29
+	add	x8, sp, #928            // =928
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #1016]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1008]
+	stp	x9, x10, [sp, #80]      // 8-byte Folded Spill
+	bl	mulPv512x64
+	ldr	x8, [sp, #992]
+	ldr	x29, [sp, #104]         // 8-byte Folded Reload
+	ldr	x27, [sp, #976]
+	ldr	x28, [sp, #968]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #1000]
+	ldr	x8, [sp, #984]
+	ldr	x22, [sp, #960]
+	ldr	x23, [sp, #952]
+	ldr	x24, [sp, #944]
 	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x8, [sp, #992]
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	ldr	x24, [sp, #984]
-	ldr	x25, [sp, #976]
-	ldr	x26, [sp, #968]
-	ldr	x21, [sp, #960]
-	ldr	x27, [sp, #952]
-	ldr	x28, [sp, #944]
-	ldr	x1, [x23, #32]
-	add	x8, sp, #864            // =864
-	mov	 x0, x22
-	bl	.LmulPv576x64
-	cmn	 x19, x28
-	ldr	x8, [sp, #936]
-	ldr	x9, [sp, #928]
-	ldp	x10, x1, [sp, #40]
-	adcs	x10, x10, x27
-	ldr	x11, [sp, #920]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x21
-	ldr	x13, [sp, #912]
-	adcs	x14, x14, x26
-	ldr	x15, [sp, #904]
-	ldp	x18, x16, [sp, #56]
-	adcs	x16, x16, x25
-	ldr	x17, [sp, #896]
-	adcs	x18, x18, x24
-	ldr	x0, [sp, #888]
-	ldp	x2, x4, [sp, #8]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #872]
-	ldp	x5, x3, [sp, #96]
+	ldr	x1, [x29, #24]
+	ldr	x25, [sp, #936]
+	ldr	x21, [sp, #928]
+	add	x8, sp, #848            // =848
+	mov	 x0, x26
+	bl	mulPv512x64
+	ldp	x10, x18, [sp, #32]     // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #56]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldr	x16, [sp, #48]          // 8-byte Folded Reload
+	adcs	x10, x10, x25
+	adcs	x12, x12, x24
+	ldp	x3, x1, [sp, #80]       // 8-byte Folded Reload
+	adcs	x14, x14, x23
+	ldp	x4, x6, [sp, #16]       // 8-byte Folded Reload
+	adcs	x16, x16, x22
+	ldr	x5, [sp, #72]           // 8-byte Folded Reload
+	adcs	x18, x18, x28
+	ldr	x2, [sp, #848]
+	ldr	x0, [sp, #856]
+	adcs	x1, x1, x27
 	adcs	x3, x3, x4
 	ldr	x4, [sp, #864]
-	ldp	x6, x19, [sp, #24]
+	ldr	x17, [sp, #872]
 	adcs	x5, x5, x6
-	ldr	x6, [sp, #880]
-	ldr	x7, [sp, #88]           // 8-byte Folded Reload
-	adcs	x7, x7, x19
-	adds	 x19, x10, x4
-	adcs	x10, x12, x2
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x6
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x15, [sp, #880]
+	adds		x19, x10, x2
+	ldr	x13, [sp, #888]
+	adcs	x10, x12, x0
+	ldr	x11, [sp, #896]
+	str	x10, [sp, #32]          // 8-byte Folded Spill
+	adcs	x10, x14, x4
+	ldr	x9, [sp, #904]
 	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
+	ldr	x8, [sp, #912]
 	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	stp	x9, x10, [sp, #96]
+	adcs	x10, x1, x13
+	ldr	x26, [sp, #120]         // 8-byte Folded Reload
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	adcs	x9, x5, x9
 	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x23, [sp, #128]         // 8-byte Folded Reload
-	mul	 x1, x19, x23
-	add	x8, sp, #784            // =784
+	str	x8, [sp, #72]           // 8-byte Folded Spill
+	mul		x1, x19, x26
+	add	x8, sp, #768            // =768
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #856]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #848]
+	stp	x9, x10, [sp, #80]      // 8-byte Folded Spill
+	bl	mulPv512x64
+	ldr	x8, [sp, #832]
+	ldr	x27, [sp, #816]
+	ldr	x28, [sp, #808]
+	ldr	x22, [sp, #800]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #840]
+	ldr	x8, [sp, #824]
+	ldr	x23, [sp, #792]
+	ldr	x24, [sp, #784]
+	ldr	x25, [sp, #776]
 	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x8, [sp, #832]
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	ldr	x24, [sp, #824]
-	ldr	x25, [sp, #816]
-	ldr	x26, [sp, #808]
-	ldr	x21, [sp, #800]
-	ldr	x27, [sp, #792]
-	ldr	x28, [sp, #784]
-	ldr	x22, [sp, #120]         // 8-byte Folded Reload
-	ldr	x1, [x22, #40]
-	add	x8, sp, #704            // =704
-	ldr	x0, [sp, #136]          // 8-byte Folded Reload
-	bl	.LmulPv576x64
-	cmn	 x19, x28
-	ldr	x8, [sp, #776]
-	ldr	x9, [sp, #768]
-	ldp	x10, x1, [sp, #40]
-	adcs	x10, x10, x27
-	ldr	x11, [sp, #760]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x21
-	ldr	x13, [sp, #752]
-	adcs	x14, x14, x26
-	ldr	x15, [sp, #744]
-	ldp	x18, x16, [sp, #56]
-	adcs	x16, x16, x25
-	ldr	x17, [sp, #736]
-	adcs	x18, x18, x24
-	ldr	x0, [sp, #728]
-	ldp	x2, x4, [sp, #8]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #712]
-	ldp	x5, x3, [sp, #96]
+	ldr	x1, [x29, #32]
+	ldr	x29, [sp, #112]         // 8-byte Folded Reload
+	ldr	x21, [sp, #768]
+	add	x8, sp, #688            // =688
+	mov	 x0, x29
+	bl	mulPv512x64
+	ldp	x10, x18, [sp, #32]     // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #56]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldr	x16, [sp, #48]          // 8-byte Folded Reload
+	adcs	x10, x10, x25
+	adcs	x12, x12, x24
+	ldp	x3, x1, [sp, #80]       // 8-byte Folded Reload
+	adcs	x14, x14, x23
+	ldp	x4, x6, [sp, #16]       // 8-byte Folded Reload
+	adcs	x16, x16, x22
+	ldr	x5, [sp, #72]           // 8-byte Folded Reload
+	adcs	x18, x18, x28
+	ldr	x2, [sp, #688]
+	ldr	x0, [sp, #696]
+	adcs	x1, x1, x27
 	adcs	x3, x3, x4
 	ldr	x4, [sp, #704]
-	ldp	x6, x19, [sp, #24]
+	ldr	x17, [sp, #712]
 	adcs	x5, x5, x6
-	ldr	x6, [sp, #720]
-	ldr	x7, [sp, #88]           // 8-byte Folded Reload
-	adcs	x7, x7, x19
-	adds	 x19, x10, x4
-	adcs	x10, x12, x2
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x6
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x15, [sp, #720]
+	adds		x19, x10, x2
+	ldr	x13, [sp, #728]
+	adcs	x10, x12, x0
+	ldr	x11, [sp, #736]
+	str	x10, [sp, #32]          // 8-byte Folded Spill
+	adcs	x10, x14, x4
+	ldr	x9, [sp, #744]
 	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
+	ldr	x8, [sp, #752]
 	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	stp	x9, x10, [sp, #96]
+	adcs	x10, x1, x13
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	adcs	x9, x5, x9
 	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	mul	 x1, x19, x23
-	add	x8, sp, #624            // =624
+	str	x8, [sp, #72]           // 8-byte Folded Spill
+	mul		x1, x19, x26
+	add	x8, sp, #608            // =608
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #696]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #688]
+	stp	x9, x10, [sp, #80]      // 8-byte Folded Spill
+	bl	mulPv512x64
+	ldr	x8, [sp, #672]
+	ldr	x26, [sp, #104]         // 8-byte Folded Reload
+	ldr	x27, [sp, #656]
+	ldr	x28, [sp, #648]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #680]
+	ldr	x8, [sp, #664]
+	ldr	x22, [sp, #640]
+	ldr	x23, [sp, #632]
+	ldr	x24, [sp, #624]
 	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x8, [sp, #672]
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	ldr	x24, [sp, #664]
-	ldr	x25, [sp, #656]
-	ldr	x26, [sp, #648]
-	ldr	x21, [sp, #640]
-	ldr	x27, [sp, #632]
-	ldr	x28, [sp, #624]
-	ldr	x1, [x22, #48]
-	add	x8, sp, #544            // =544
-	ldr	x23, [sp, #136]         // 8-byte Folded Reload
-	mov	 x0, x23
-	bl	.LmulPv576x64
-	cmn	 x19, x28
-	ldr	x8, [sp, #616]
-	ldr	x9, [sp, #608]
-	ldp	x10, x1, [sp, #40]
-	adcs	x10, x10, x27
-	ldr	x11, [sp, #600]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x21
-	ldr	x13, [sp, #592]
-	adcs	x14, x14, x26
-	ldr	x15, [sp, #584]
-	ldp	x18, x16, [sp, #56]
-	adcs	x16, x16, x25
-	ldr	x17, [sp, #576]
-	adcs	x18, x18, x24
-	ldr	x0, [sp, #568]
-	ldp	x2, x4, [sp, #8]
-	adcs	x1, x1, x2
-	ldr	x2, [sp, #552]
-	ldp	x5, x3, [sp, #96]
+	ldr	x1, [x26, #40]
+	ldr	x25, [sp, #616]
+	ldr	x21, [sp, #608]
+	add	x8, sp, #528            // =528
+	mov	 x0, x29
+	bl	mulPv512x64
+	ldp	x10, x18, [sp, #32]     // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #56]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldr	x16, [sp, #48]          // 8-byte Folded Reload
+	adcs	x10, x10, x25
+	adcs	x12, x12, x24
+	ldp	x3, x1, [sp, #80]       // 8-byte Folded Reload
+	adcs	x14, x14, x23
+	ldp	x4, x6, [sp, #16]       // 8-byte Folded Reload
+	adcs	x16, x16, x22
+	ldr	x5, [sp, #72]           // 8-byte Folded Reload
+	adcs	x18, x18, x28
+	ldr	x2, [sp, #528]
+	ldr	x0, [sp, #536]
+	adcs	x1, x1, x27
 	adcs	x3, x3, x4
 	ldr	x4, [sp, #544]
-	ldp	x6, x19, [sp, #24]
+	ldr	x17, [sp, #552]
 	adcs	x5, x5, x6
-	ldr	x6, [sp, #560]
-	ldr	x7, [sp, #88]           // 8-byte Folded Reload
-	adcs	x7, x7, x19
-	adds	 x19, x10, x4
-	adcs	x10, x12, x2
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x6
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	ldr	x15, [sp, #560]
+	adds		x19, x10, x2
+	ldr	x13, [sp, #568]
+	adcs	x10, x12, x0
+	ldr	x11, [sp, #576]
+	str	x10, [sp, #32]          // 8-byte Folded Spill
+	adcs	x10, x14, x4
+	ldr	x9, [sp, #584]
 	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
+	ldr	x8, [sp, #592]
 	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	stp	x9, x10, [sp, #96]
+	adcs	x10, x1, x13
+	ldr	x29, [sp, #120]         // 8-byte Folded Reload
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	adcs	x9, x5, x9
 	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x22, [sp, #128]         // 8-byte Folded Reload
-	mul	 x1, x19, x22
-	add	x8, sp, #464            // =464
+	str	x8, [sp, #72]           // 8-byte Folded Spill
+	mul		x1, x19, x29
+	add	x8, sp, #448            // =448
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #536]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldr	x8, [sp, #528]
+	stp	x9, x10, [sp, #80]      // 8-byte Folded Spill
+	bl	mulPv512x64
+	ldr	x8, [sp, #512]
+	ldp	x22, x28, [sp, #480]
+	ldp	x24, x23, [sp, #464]
+	ldp	x21, x25, [sp, #448]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldr	x8, [sp, #520]
+	ldp	x27, x8, [sp, #496]
 	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldr	x8, [sp, #512]
-	str	x8, [sp, #8]            // 8-byte Folded Spill
-	ldp	x25, x24, [sp, #496]
-	ldp	x21, x26, [sp, #480]
-	ldp	x28, x27, [sp, #464]
-	ldr	x8, [sp, #120]          // 8-byte Folded Reload
-	ldr	x1, [x8, #56]
-	add	x8, sp, #384            // =384
-	mov	 x0, x23
-	bl	.LmulPv576x64
-	cmn	 x19, x28
-	ldp	x9, x8, [sp, #448]
-	ldp	x10, x1, [sp, #40]
-	adcs	x10, x10, x27
-	ldp	x13, x11, [sp, #432]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x21
-	adcs	x14, x14, x26
-	ldp	x17, x15, [sp, #416]
-	ldp	x18, x16, [sp, #56]
-	adcs	x16, x16, x25
-	adcs	x18, x18, x24
-	ldp	x2, x4, [sp, #8]
-	adcs	x1, x1, x2
-	ldp	x5, x3, [sp, #96]
+	ldr	x1, [x26, #48]
+	ldr	x26, [sp, #112]         // 8-byte Folded Reload
+	add	x8, sp, #368            // =368
+	mov	 x0, x26
+	bl	mulPv512x64
+	ldp	x10, x18, [sp, #32]     // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #56]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldr	x16, [sp, #48]          // 8-byte Folded Reload
+	adcs	x10, x10, x25
+	adcs	x12, x12, x24
+	ldp	x3, x1, [sp, #80]       // 8-byte Folded Reload
+	adcs	x14, x14, x23
+	ldp	x4, x6, [sp, #16]       // 8-byte Folded Reload
+	adcs	x16, x16, x22
+	ldr	x5, [sp, #72]           // 8-byte Folded Reload
+	adcs	x18, x18, x28
+	ldp	x2, x0, [sp, #368]
+	adcs	x1, x1, x27
 	adcs	x3, x3, x4
-	ldp	x4, x2, [sp, #384]
-	ldp	x6, x19, [sp, #24]
+	ldr	x4, [sp, #384]
+	ldp	x17, x15, [sp, #392]
 	adcs	x5, x5, x6
-	ldp	x6, x0, [sp, #400]
-	ldr	x7, [sp, #88]           // 8-byte Folded Reload
-	adcs	x7, x7, x19
-	adds	 x19, x10, x4
-	adcs	x10, x12, x2
-	str	x10, [sp, #40]          // 8-byte Folded Spill
-	adcs	x10, x14, x6
-	str	x10, [sp, #80]          // 8-byte Folded Spill
-	adcs	x10, x16, x0
-	str	x10, [sp, #72]          // 8-byte Folded Spill
-	adcs	x10, x18, x17
+	adds		x19, x10, x2
+	ldp	x13, x11, [sp, #408]
+	adcs	x10, x12, x0
+	str	x10, [sp, #32]          // 8-byte Folded Spill
+	adcs	x10, x14, x4
+	ldp	x9, x8, [sp, #424]
 	str	x10, [sp, #64]          // 8-byte Folded Spill
-	adcs	x10, x1, x15
+	adcs	x10, x16, x17
 	str	x10, [sp, #56]          // 8-byte Folded Spill
-	adcs	x10, x3, x13
+	adcs	x10, x18, x15
 	str	x10, [sp, #48]          // 8-byte Folded Spill
-	adcs	x10, x5, x11
-	adcs	x9, x7, x9
-	stp	x9, x10, [sp, #96]
+	adcs	x10, x1, x13
+	str	x10, [sp, #40]          // 8-byte Folded Spill
+	adcs	x10, x3, x11
+	adcs	x9, x5, x9
 	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	mul	 x1, x19, x22
-	add	x8, sp, #304            // =304
+	str	x8, [sp, #72]           // 8-byte Folded Spill
+	mul		x1, x19, x29
+	add	x8, sp, #288            // =288
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #376]
-	str	x8, [sp, #32]           // 8-byte Folded Spill
-	ldp	x22, x8, [sp, #360]
+	stp	x9, x10, [sp, #80]      // 8-byte Folded Spill
+	bl	mulPv512x64
+	ldp	x29, x8, [sp, #344]
+	ldp	x28, x27, [sp, #328]
+	ldp	x23, x22, [sp, #312]
+	ldp	x25, x24, [sp, #296]
 	str	x8, [sp, #24]           // 8-byte Folded Spill
-	ldp	x24, x23, [sp, #344]
-	ldp	x26, x25, [sp, #328]
-	ldp	x27, x21, [sp, #312]
-	ldr	x28, [sp, #304]
-	ldr	x8, [sp, #120]          // 8-byte Folded Reload
-	ldr	x1, [x8, #64]
-	add	x8, sp, #224            // =224
-	ldr	x0, [sp, #136]          // 8-byte Folded Reload
-	bl	.LmulPv576x64
-	cmn	 x19, x28
-	ldp	x9, x8, [sp, #288]
-	ldp	x10, x1, [sp, #40]
-	adcs	x10, x10, x27
-	ldp	x13, x11, [sp, #272]
-	ldp	x14, x12, [sp, #72]
-	adcs	x12, x12, x21
-	adcs	x14, x14, x26
-	ldp	x17, x15, [sp, #256]
-	ldp	x18, x16, [sp, #56]
-	adcs	x16, x16, x25
-	adcs	x18, x18, x24
-	adcs	x1, x1, x23
-	ldp	x4, x2, [sp, #224]
-	ldp	x5, x3, [sp, #96]
-	adcs	x3, x3, x22
-	ldp	x6, x19, [sp, #24]
+	ldr	x8, [sp, #104]          // 8-byte Folded Reload
+	ldr	x21, [sp, #288]
+	mov	 x0, x26
+	ldr	x1, [x8, #56]
+	add	x8, sp, #208            // =208
+	bl	mulPv512x64
+	ldp	x10, x18, [sp, #32]     // 8-byte Folded Reload
+	ldp	x14, x12, [sp, #56]     // 8-byte Folded Reload
+	cmn		x19, x21
+	ldr	x16, [sp, #48]          // 8-byte Folded Reload
+	adcs	x10, x10, x25
+	adcs	x12, x12, x24
+	ldp	x3, x1, [sp, #80]       // 8-byte Folded Reload
+	adcs	x14, x14, x23
+	adcs	x16, x16, x22
+	ldr	x5, [sp, #72]           // 8-byte Folded Reload
+	ldr	x6, [sp, #24]           // 8-byte Folded Reload
+	adcs	x18, x18, x28
+	ldp	x2, x0, [sp, #208]
+	adcs	x1, x1, x27
+	adcs	x3, x3, x29
+	ldr	x4, [sp, #224]
+	ldp	x17, x15, [sp, #232]
 	adcs	x5, x5, x6
-	ldp	x6, x0, [sp, #240]
-	ldr	x7, [sp, #88]           // 8-byte Folded Reload
-	adcs	x7, x7, x19
-	adds	 x19, x10, x4
-	adcs	x21, x12, x2
-	adcs	x22, x14, x6
-	adcs	x23, x16, x0
-	adcs	x24, x18, x17
-	adcs	x25, x1, x15
-	adcs	x26, x3, x13
-	adcs	x10, x5, x11
-	str	x10, [sp, #136]         // 8-byte Folded Spill
-	adcs	x28, x7, x9
-	adcs	x27, x8, xzr
-	ldr	x8, [sp, #128]          // 8-byte Folded Reload
-	mul	 x1, x19, x8
-	add	x8, sp, #144            // =144
+	adds		x19, x10, x2
+	ldp	x13, x11, [sp, #248]
+	adcs	x21, x12, x0
+	adcs	x22, x14, x4
+	ldp	x9, x8, [sp, #264]
+	adcs	x23, x16, x17
+	adcs	x24, x18, x15
+	adcs	x25, x1, x13
+	adcs	x26, x3, x11
+	adcs	x27, x5, x9
+	adcs	x28, x8, xzr
+	ldr	x8, [sp, #120]          // 8-byte Folded Reload
 	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x16, x8, [sp, #208]
-	ldp	x9, x10, [sp, #144]
-	ldp	x11, x12, [sp, #160]
-	cmn	 x19, x9
-	ldp	x13, x9, [sp, #176]
-	adcs	x10, x21, x10
-	ldp	x14, x15, [sp, #192]
-	adcs	x11, x22, x11
-	adcs	x12, x23, x12
-	adcs	x13, x24, x13
-	adcs	x9, x25, x9
-	adcs	x14, x26, x14
-	ldp	x0, x17, [x20, #56]
-	ldp	x2, x18, [x20, #40]
-	ldp	x4, x1, [x20, #24]
-	ldp	x6, x3, [x20, #8]
-	ldr	 x5, [x20]
-	ldr	x7, [sp, #136]          // 8-byte Folded Reload
-	adcs	x15, x7, x15
-	adcs	x16, x28, x16
-	adcs	x8, x27, x8
-	subs	 x5, x10, x5
-	sbcs	x6, x11, x6
-	sbcs	x3, x12, x3
-	sbcs	x4, x13, x4
-	sbcs	x1, x9, x1
-	sbcs	x2, x14, x2
-	sbcs	x18, x15, x18
-	sbcs	x0, x16, x0
+	mul		x1, x19, x8
+	add	x8, sp, #128            // =128
+	bl	mulPv512x64
+	ldp	x13, x16, [sp, #128]
+	ldr	x15, [sp, #144]
+	ldp	x14, x12, [sp, #152]
+	ldp	x11, x10, [sp, #168]
+	cmn		x19, x13
+	adcs	x16, x21, x16
+	adcs	x15, x22, x15
+	ldp	x9, x8, [sp, #184]
+	adcs	x14, x23, x14
+	adcs	x12, x24, x12
+	adcs	x11, x25, x11
+	ldp		x3, x4, [x20]
+	adcs	x10, x26, x10
+	ldp	x1, x2, [x20, #16]
+	adcs	x9, x27, x9
+	adcs	x8, x28, x8
+	ldp	x18, x0, [x20, #32]
+	subs		x3, x16, x3
+	sbcs	x4, x15, x4
+	ldp	x13, x17, [x20, #48]
+	sbcs	x1, x14, x1
+	sbcs	x2, x12, x2
+	sbcs	x18, x11, x18
+	sbcs	x0, x10, x0
+	sbcs	x13, x9, x13
 	sbcs	x17, x8, x17
-	asr	x7, x17, #63
-	cmp	 x7, #0                 // =0
-	csel	x10, x10, x5, lt
-	csel	x11, x11, x6, lt
-	csel	x12, x12, x3, lt
-	csel	x13, x13, x4, lt
-	csel	x9, x9, x1, lt
-	csel	x14, x14, x2, lt
-	csel	x15, x15, x18, lt
-	csel	x16, x16, x0, lt
+	cmp		x17, #0         // =0
+	csel	x9, x9, x13, lt
+	ldr	x13, [sp, #96]          // 8-byte Folded Reload
+	csel	x16, x16, x3, lt
+	csel	x15, x15, x4, lt
+	csel	x14, x14, x1, lt
+	csel	x12, x12, x2, lt
+	csel	x11, x11, x18, lt
+	csel	x10, x10, x0, lt
 	csel	x8, x8, x17, lt
-	ldr	x17, [sp, #112]         // 8-byte Folded Reload
-	stp	 x10, x11, [x17]
-	stp	x12, x13, [x17, #16]
-	stp	x9, x14, [x17, #32]
-	stp	x15, x16, [x17, #48]
-	str	x8, [x17, #64]
-	sub	sp, x29, #80            // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
+	stp		x16, x15, [x13]
+	stp	x14, x12, [x13, #16]
+	stp	x11, x10, [x13, #32]
+	stp	x9, x8, [x13, #48]
+	add	sp, sp, #1408           // =1408
+	ldp	x29, x30, [sp, #80]     // 8-byte Folded Reload
+	ldp	x20, x19, [sp, #64]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #48]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #32]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #16]     // 8-byte Folded Reload
+	ldp	x28, x27, [sp], #96     // 8-byte Folded Reload
 	ret
-.Lfunc_end131:
-	.size	mcl_fp_montNF9L, .Lfunc_end131-mcl_fp_montNF9L
+.Lfunc_end61:
+	.size	mcl_fp_montNF8L, .Lfunc_end61-mcl_fp_montNF8L
 
-	.globl	mcl_fp_montRed9L
-	.align	2
-	.type	mcl_fp_montRed9L,@function
-mcl_fp_montRed9L:                       // @mcl_fp_montRed9L
+	.globl	mcl_fp_montRed8L
+	.p2align	2
+	.type	mcl_fp_montRed8L,@function
+mcl_fp_montRed8L:                       // @mcl_fp_montRed8L
 // BB#0:
-	stp	x28, x27, [sp, #-96]!
-	stp	x26, x25, [sp, #16]
-	stp	x24, x23, [sp, #32]
-	stp	x22, x21, [sp, #48]
-	stp	x20, x19, [sp, #64]
-	stp	x29, x30, [sp, #80]
-	add	x29, sp, #80            // =80
-	sub	sp, sp, #912            // =912
-	mov	 x20, x2
-	ldur	x9, [x20, #-8]
-	str	x9, [sp, #40]           // 8-byte Folded Spill
-	ldr	x8, [x20, #64]
-	str	x8, [sp, #184]          // 8-byte Folded Spill
-	ldr	x8, [x20, #48]
-	str	x8, [sp, #168]          // 8-byte Folded Spill
-	ldr	x8, [x20, #56]
-	str	x8, [sp, #176]          // 8-byte Folded Spill
-	ldr	x8, [x20, #32]
-	str	x8, [sp, #144]          // 8-byte Folded Spill
-	ldr	x8, [x20, #40]
-	str	x8, [sp, #152]          // 8-byte Folded Spill
-	ldr	x8, [x20, #16]
-	str	x8, [sp, #128]          // 8-byte Folded Spill
-	ldr	x8, [x20, #24]
-	str	x8, [sp, #136]          // 8-byte Folded Spill
-	ldr	 x8, [x20]
-	str	x8, [sp, #112]          // 8-byte Folded Spill
-	ldr	x8, [x20, #8]
-	str	x8, [sp, #120]          // 8-byte Folded Spill
-	ldr	x8, [x1, #128]
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldr	x8, [x1, #136]
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldr	x8, [x1, #112]
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [x1, #120]
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x8, [x1, #96]
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldr	x8, [x1, #104]
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldr	x8, [x1, #80]
-	str	x8, [sp, #48]           // 8-byte Folded Spill
-	ldr	x8, [x1, #88]
-	str	x8, [sp, #56]           // 8-byte Folded Spill
-	ldp	x23, x8, [x1, #64]
-	str	x8, [sp, #16]           // 8-byte Folded Spill
-	ldp	x25, x19, [x1, #48]
-	ldp	x28, x27, [x1, #32]
-	ldp	x22, x24, [x1, #16]
-	ldp	 x21, x26, [x1]
-	str	x0, [sp, #160]          // 8-byte Folded Spill
-	mul	 x1, x21, x9
-	sub	x8, x29, #160           // =160
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [x29, #-96]
-	ldp	x11, x10, [x29, #-112]
-	ldp	x13, x12, [x29, #-128]
-	ldp	x14, x15, [x29, #-160]
-	ldp	x16, x17, [x29, #-144]
-	cmn	 x21, x14
-	adcs	x21, x26, x15
-	adcs	x14, x22, x16
-	adcs	x24, x24, x17
-	adcs	x26, x28, x13
-	adcs	x27, x27, x12
-	adcs	x25, x25, x11
-	adcs	x10, x19, x10
-	stp	x10, x14, [sp, #24]
-	adcs	x23, x23, x9
-	ldr	x9, [sp, #16]           // 8-byte Folded Reload
-	adcs	x28, x9, x8
-	ldr	x8, [sp, #48]           // 8-byte Folded Reload
-	adcs	x22, x8, xzr
-	ldr	x8, [sp, #56]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #56]           // 8-byte Folded Spill
-	ldr	x8, [sp, #64]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [sp, #88]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x8, [sp, #96]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldr	x8, [sp, #104]          // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	adcs	x8, xzr, xzr
-	str	x8, [sp, #48]           // 8-byte Folded Spill
-	ldr	x19, [sp, #40]          // 8-byte Folded Reload
-	mul	 x1, x21, x19
-	sub	x8, x29, #240           // =240
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [x29, #-176]
-	ldp	x11, x10, [x29, #-192]
-	ldp	x13, x12, [x29, #-208]
-	ldp	x14, x15, [x29, #-240]
-	ldp	x16, x17, [x29, #-224]
-	cmn	 x21, x14
-	ldr	x14, [sp, #32]          // 8-byte Folded Reload
-	adcs	x21, x14, x15
-	adcs	x14, x24, x16
-	adcs	x26, x26, x17
-	adcs	x27, x27, x13
-	adcs	x25, x25, x12
-	ldr	x12, [sp, #24]          // 8-byte Folded Reload
-	adcs	x11, x12, x11
-	stp	x11, x14, [sp, #24]
-	adcs	x23, x23, x10
-	adcs	x28, x28, x9
-	adcs	x22, x22, x8
-	ldr	x8, [sp, #56]           // 8-byte Folded Reload
-	adcs	x24, x8, xzr
-	ldr	x8, [sp, #64]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #64]           // 8-byte Folded Spill
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
+	stp	x28, x27, [sp, #-96]!   // 8-byte Folded Spill
+	stp	x26, x25, [sp, #16]     // 8-byte Folded Spill
+	stp	x24, x23, [sp, #32]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #48]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #64]     // 8-byte Folded Spill
+	stp	x29, x30, [sp, #80]     // 8-byte Folded Spill
+	sub	sp, sp, #736            // =736
+	mov	 x21, x2
+	ldr	x8, [x21, #48]
+	mov	 x20, x1
+	ldur	x23, [x21, #-8]
+	ldp		x29, x19, [x20]
 	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
+	ldr	x8, [x21, #56]
+	ldp	x22, x24, [x20, #48]
+	ldp	x25, x26, [x20, #32]
+	ldp	x27, x28, [x20, #16]
 	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [sp, #88]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x8, [sp, #96]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldr	x8, [sp, #104]          // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldr	x8, [sp, #48]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
+	ldr	x8, [x21, #32]
+	str	x0, [sp, #88]           // 8-byte Folded Spill
+	mul		x1, x29, x23
+	mov	 x0, x21
 	str	x8, [sp, #56]           // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	add	x8, sp, #672            // =672
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #744]
-	ldr	x9, [sp, #736]
-	ldr	x10, [sp, #728]
-	ldr	x11, [sp, #720]
-	ldr	x12, [sp, #712]
-	ldr	x13, [sp, #704]
-	ldr	x14, [sp, #672]
-	ldr	x15, [sp, #680]
-	ldr	x16, [sp, #688]
-	ldr	x17, [sp, #696]
-	cmn	 x21, x14
-	ldr	x14, [sp, #32]          // 8-byte Folded Reload
-	adcs	x21, x14, x15
-	adcs	x14, x26, x16
-	str	x14, [sp, #48]          // 8-byte Folded Spill
-	adcs	x27, x27, x17
+	ldr	x8, [x21, #40]
+	str	x8, [sp, #64]           // 8-byte Folded Spill
+	ldr	x8, [x21, #16]
+	str	x8, [sp, #40]           // 8-byte Folded Spill
+	ldr	x8, [x21, #24]
+	str	x8, [sp, #48]           // 8-byte Folded Spill
+	ldr		x8, [x21]
+	str	x8, [sp, #24]           // 8-byte Folded Spill
+	ldr	x8, [x21, #8]
+	str	x8, [sp, #32]           // 8-byte Folded Spill
+	add	x8, sp, #656            // =656
+	bl	mulPv512x64
+	ldr	x14, [sp, #656]
+	ldr	x15, [sp, #664]
+	ldr	x16, [sp, #672]
+	ldr	x13, [sp, #680]
+	ldr	x12, [sp, #688]
+	cmn		x29, x14
+	ldr	x11, [sp, #696]
+	adcs	x19, x19, x15
+	ldr	x10, [sp, #704]
+	adcs	x27, x27, x16
+	ldr	x9, [sp, #712]
+	adcs	x28, x28, x13
+	ldr	x8, [sp, #720]
+	ldr	x17, [x20, #64]
+	adcs	x25, x25, x12
+	adcs	x26, x26, x11
+	adcs	x10, x22, x10
+	adcs	x24, x24, x9
+	adcs	x29, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #576            // =576
+	mov	 x0, x21
+	str	x10, [sp, #16]          // 8-byte Folded Spill
+	adcs	x22, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #576]
+	ldr	x16, [sp, #584]
+	ldr	x8, [sp, #640]
+	ldr	x14, [sp, #592]
+	ldr	x13, [sp, #600]
+	ldr	x12, [sp, #608]
+	cmn		x19, x15
+	adcs	x19, x27, x16
+	add		x8, x22, x8
+	adcs	x22, x28, x14
 	adcs	x25, x25, x13
-	ldr	x13, [sp, #24]          // 8-byte Folded Reload
+	ldr	x11, [sp, #616]
+	adcs	x26, x26, x12
+	ldr	x12, [sp, #16]          // 8-byte Folded Reload
+	ldr	x10, [sp, #624]
+	ldr	x9, [sp, #632]
+	ldr	x17, [x20, #72]
+	adcs	x11, x12, x11
+	adcs	x24, x24, x10
+	adcs	x27, x29, x9
+	adcs	x28, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #496            // =496
+	mov	 x0, x21
+	str	x11, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #496]
+	ldp	x16, x14, [sp, #504]
+	ldr	x13, [sp, #520]
+	ldr	x12, [sp, #528]
+	cmn		x19, x15
+	adcs	x19, x22, x16
+	adcs	x22, x25, x14
+	adcs	x25, x26, x13
+	ldr	x13, [sp, #16]          // 8-byte Folded Reload
+	ldr	x11, [sp, #536]
+	ldr	x10, [sp, #544]
+	ldr	x8, [sp, #560]
+	ldr	x9, [sp, #552]
+	ldr	x17, [x20, #80]
 	adcs	x12, x13, x12
-	str	x12, [sp, #32]          // 8-byte Folded Spill
-	adcs	x23, x23, x11
-	adcs	x28, x28, x10
-	adcs	x22, x22, x9
-	adcs	x24, x24, x8
-	ldr	x8, [sp, #64]           // 8-byte Folded Reload
-	adcs	x26, x8, xzr
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
+	adcs	x24, x24, x11
+	adcs	x26, x27, x10
+	add		x8, x29, x8
+	adcs	x27, x28, x9
+	adcs	x28, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #416            // =416
+	mov	 x0, x21
+	str	x12, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #416]
+	ldp	x16, x14, [sp, #424]
+	ldp	x13, x12, [sp, #440]
+	ldp	x11, x10, [sp, #456]
+	cmn		x19, x15
+	adcs	x19, x22, x16
+	adcs	x22, x25, x14
+	ldr	x14, [sp, #16]          // 8-byte Folded Reload
+	ldp	x9, x8, [sp, #472]
+	ldr	x17, [x20, #88]
+	mul		x1, x19, x23
+	adcs	x13, x14, x13
+	adcs	x24, x24, x12
+	adcs	x25, x26, x11
+	adcs	x26, x27, x10
+	add		x8, x29, x8
+	adcs	x27, x28, x9
+	adcs	x28, x17, x8
+	add	x8, sp, #336            // =336
+	mov	 x0, x21
+	str	x13, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #336]
+	ldp	x16, x14, [sp, #344]
+	ldp	x13, x12, [sp, #360]
+	ldp	x11, x10, [sp, #376]
+	cmn		x19, x15
+	ldr	x15, [sp, #16]          // 8-byte Folded Reload
+	adcs	x19, x22, x16
+	ldp	x9, x8, [sp, #392]
+	ldr	x17, [x20, #96]
+	adcs	x22, x15, x14
+	adcs	x13, x24, x13
+	adcs	x24, x25, x12
+	adcs	x25, x26, x11
+	adcs	x26, x27, x10
+	add		x8, x29, x8
+	adcs	x27, x28, x9
+	adcs	x28, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #256            // =256
+	mov	 x0, x21
+	str	x13, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #256]
+	ldp	x16, x14, [sp, #264]
+	ldp	x13, x12, [sp, #280]
+	ldp	x11, x10, [sp, #296]
+	cmn		x19, x15
+	ldr	x15, [sp, #16]          // 8-byte Folded Reload
+	adcs	x19, x22, x16
+	ldp	x9, x8, [sp, #312]
+	ldr	x17, [x20, #104]
+	adcs	x22, x15, x14
+	adcs	x13, x24, x13
+	adcs	x24, x25, x12
+	adcs	x25, x26, x11
+	adcs	x26, x27, x10
+	add		x8, x29, x8
+	adcs	x27, x28, x9
+	adcs	x28, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #176            // =176
+	mov	 x0, x21
+	str	x13, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #176]
+	ldp	x16, x14, [sp, #184]
+	ldp	x13, x12, [sp, #200]
+	ldp	x11, x10, [sp, #216]
+	cmn		x19, x15
+	ldr	x15, [sp, #16]          // 8-byte Folded Reload
+	adcs	x19, x22, x16
+	ldp	x9, x8, [sp, #232]
+	ldr	x17, [x20, #112]
+	adcs	x22, x15, x14
+	adcs	x13, x24, x13
+	adcs	x24, x25, x12
+	adcs	x25, x26, x11
+	adcs	x26, x27, x10
+	add		x8, x29, x8
+	adcs	x27, x28, x9
+	adcs	x28, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #96             // =96
+	mov	 x0, x21
+	str	x13, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldp	x15, x14, [sp, #96]
+	ldr	x16, [sp, #112]
+	ldp	x13, x12, [sp, #120]
+	ldp	x11, x10, [sp, #136]
+	cmn		x19, x15
+	ldr	x15, [sp, #16]          // 8-byte Folded Reload
+	adcs	x14, x22, x14
+	ldp	x9, x8, [sp, #152]
+	ldr	x17, [x20, #120]
+	adcs	x15, x15, x16
+	adcs	x13, x24, x13
+	adcs	x12, x25, x12
+	adcs	x11, x26, x11
+	adcs	x10, x27, x10
+	add		x8, x29, x8
+	adcs	x9, x28, x9
+	adcs	x8, x17, x8
+	ldp	x16, x17, [sp, #24]     // 8-byte Folded Reload
+	ldp	x18, x0, [sp, #40]      // 8-byte Folded Reload
+	ldp	x1, x2, [sp, #56]       // 8-byte Folded Reload
+	ldp	x3, x4, [sp, #72]       // 8-byte Folded Reload
+	subs		x16, x14, x16
+	sbcs	x17, x15, x17
+	sbcs	x18, x13, x18
+	sbcs	x0, x12, x0
+	sbcs	x1, x11, x1
+	sbcs	x2, x10, x2
+	sbcs	x3, x9, x3
+	sbcs	x4, x8, x4
+	ngcs	 x5, xzr
+	tst	 x5, #0x1
+	csel	x14, x14, x16, ne
+	ldr	x16, [sp, #88]          // 8-byte Folded Reload
+	csel	x15, x15, x17, ne
+	csel	x13, x13, x18, ne
+	csel	x12, x12, x0, ne
+	csel	x11, x11, x1, ne
+	csel	x10, x10, x2, ne
+	csel	x9, x9, x3, ne
+	csel	x8, x8, x4, ne
+	stp		x14, x15, [x16]
+	stp	x13, x12, [x16, #16]
+	stp	x11, x10, [x16, #32]
+	stp	x9, x8, [x16, #48]
+	add	sp, sp, #736            // =736
+	ldp	x29, x30, [sp, #80]     // 8-byte Folded Reload
+	ldp	x20, x19, [sp, #64]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #48]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #32]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #16]     // 8-byte Folded Reload
+	ldp	x28, x27, [sp], #96     // 8-byte Folded Reload
+	ret
+.Lfunc_end62:
+	.size	mcl_fp_montRed8L, .Lfunc_end62-mcl_fp_montRed8L
+
+	.globl	mcl_fp_montRedNF8L
+	.p2align	2
+	.type	mcl_fp_montRedNF8L,@function
+mcl_fp_montRedNF8L:                     // @mcl_fp_montRedNF8L
+// BB#0:
+	stp	x28, x27, [sp, #-96]!   // 8-byte Folded Spill
+	stp	x26, x25, [sp, #16]     // 8-byte Folded Spill
+	stp	x24, x23, [sp, #32]     // 8-byte Folded Spill
+	stp	x22, x21, [sp, #48]     // 8-byte Folded Spill
+	stp	x20, x19, [sp, #64]     // 8-byte Folded Spill
+	stp	x29, x30, [sp, #80]     // 8-byte Folded Spill
+	sub	sp, sp, #736            // =736
+	mov	 x21, x2
+	ldr	x8, [x21, #48]
+	mov	 x20, x1
+	ldur	x23, [x21, #-8]
+	ldp		x29, x19, [x20]
 	str	x8, [sp, #72]           // 8-byte Folded Spill
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
+	ldr	x8, [x21, #56]
+	ldp	x22, x24, [x20, #48]
+	ldp	x25, x26, [x20, #32]
+	ldp	x27, x28, [x20, #16]
 	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [sp, #88]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x8, [sp, #96]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldr	x8, [sp, #104]          // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldr	x8, [sp, #56]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
+	ldr	x8, [x21, #32]
+	str	x0, [sp, #88]           // 8-byte Folded Spill
+	mul		x1, x29, x23
+	mov	 x0, x21
+	str	x8, [sp, #56]           // 8-byte Folded Spill
+	ldr	x8, [x21, #40]
 	str	x8, [sp, #64]           // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	add	x8, sp, #592            // =592
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #664]
-	ldr	x9, [sp, #656]
-	ldr	x10, [sp, #648]
-	ldr	x11, [sp, #640]
-	ldr	x12, [sp, #632]
-	ldr	x13, [sp, #624]
-	ldr	x14, [sp, #592]
-	ldr	x15, [sp, #600]
-	ldr	x16, [sp, #608]
-	ldr	x17, [sp, #616]
-	cmn	 x21, x14
-	ldr	x14, [sp, #48]          // 8-byte Folded Reload
-	adcs	x21, x14, x15
-	adcs	x14, x27, x16
-	str	x14, [sp, #56]          // 8-byte Folded Spill
-	adcs	x25, x25, x17
-	ldr	x14, [sp, #32]          // 8-byte Folded Reload
-	adcs	x13, x14, x13
-	str	x13, [sp, #48]          // 8-byte Folded Spill
-	adcs	x23, x23, x12
-	adcs	x28, x28, x11
-	adcs	x22, x22, x10
+	ldr	x8, [x21, #16]
+	str	x8, [sp, #40]           // 8-byte Folded Spill
+	ldr	x8, [x21, #24]
+	str	x8, [sp, #48]           // 8-byte Folded Spill
+	ldr		x8, [x21]
+	str	x8, [sp, #24]           // 8-byte Folded Spill
+	ldr	x8, [x21, #8]
+	str	x8, [sp, #32]           // 8-byte Folded Spill
+	add	x8, sp, #656            // =656
+	bl	mulPv512x64
+	ldr	x14, [sp, #656]
+	ldr	x15, [sp, #664]
+	ldr	x16, [sp, #672]
+	ldr	x13, [sp, #680]
+	ldr	x12, [sp, #688]
+	cmn		x29, x14
+	ldr	x11, [sp, #696]
+	adcs	x19, x19, x15
+	ldr	x10, [sp, #704]
+	adcs	x27, x27, x16
+	ldr	x9, [sp, #712]
+	adcs	x28, x28, x13
+	ldr	x8, [sp, #720]
+	ldr	x17, [x20, #64]
+	adcs	x25, x25, x12
+	adcs	x26, x26, x11
+	adcs	x10, x22, x10
 	adcs	x24, x24, x9
-	adcs	x26, x26, x8
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x27, x8, xzr
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	ldr	x8, [sp, #88]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x8, [sp, #96]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldr	x8, [sp, #104]          // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldr	x8, [sp, #64]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #72]           // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	add	x8, sp, #512            // =512
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldr	x8, [sp, #584]
-	ldr	x9, [sp, #576]
-	ldr	x10, [sp, #568]
-	ldr	x11, [sp, #560]
-	ldr	x12, [sp, #552]
-	ldr	x13, [sp, #544]
-	ldr	x14, [sp, #512]
-	ldr	x15, [sp, #520]
-	ldr	x16, [sp, #528]
-	ldr	x17, [sp, #536]
-	cmn	 x21, x14
-	ldr	x14, [sp, #56]          // 8-byte Folded Reload
-	adcs	x21, x14, x15
-	adcs	x14, x25, x16
-	str	x14, [sp, #64]          // 8-byte Folded Spill
-	ldr	x14, [sp, #48]          // 8-byte Folded Reload
-	adcs	x14, x14, x17
-	str	x14, [sp, #56]          // 8-byte Folded Spill
-	adcs	x23, x23, x13
-	adcs	x28, x28, x12
-	adcs	x22, x22, x11
+	adcs	x29, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #576            // =576
+	mov	 x0, x21
+	str	x10, [sp, #16]          // 8-byte Folded Spill
+	adcs	x22, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #576]
+	ldr	x16, [sp, #584]
+	ldr	x8, [sp, #640]
+	ldr	x14, [sp, #592]
+	ldr	x13, [sp, #600]
+	ldr	x12, [sp, #608]
+	cmn		x19, x15
+	adcs	x19, x27, x16
+	add		x8, x22, x8
+	adcs	x22, x28, x14
+	adcs	x25, x25, x13
+	ldr	x11, [sp, #616]
+	adcs	x26, x26, x12
+	ldr	x12, [sp, #16]          // 8-byte Folded Reload
+	ldr	x10, [sp, #624]
+	ldr	x9, [sp, #632]
+	ldr	x17, [x20, #72]
+	adcs	x11, x12, x11
 	adcs	x24, x24, x10
-	adcs	x26, x26, x9
-	adcs	x27, x27, x8
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x25, x8, xzr
-	ldr	x8, [sp, #88]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x8, [sp, #96]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldr	x8, [sp, #104]          // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldr	x8, [sp, #72]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #80]           // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	add	x8, sp, #432            // =432
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #496]
-	ldp	x11, x10, [sp, #480]
-	ldp	x13, x12, [sp, #464]
-	ldp	x14, x15, [sp, #432]
-	ldp	x16, x17, [sp, #448]
-	cmn	 x21, x14
-	ldr	x14, [sp, #64]          // 8-byte Folded Reload
-	adcs	x21, x14, x15
-	ldr	x14, [sp, #56]          // 8-byte Folded Reload
-	adcs	x14, x14, x16
-	adcs	x23, x23, x17
-	adcs	x28, x28, x13
-	adcs	x22, x22, x12
+	adcs	x27, x29, x9
+	adcs	x28, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #496            // =496
+	mov	 x0, x21
+	str	x11, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #496]
+	ldp	x16, x14, [sp, #504]
+	ldr	x13, [sp, #520]
+	ldr	x12, [sp, #528]
+	cmn		x19, x15
+	adcs	x19, x22, x16
+	adcs	x22, x25, x14
+	adcs	x25, x26, x13
+	ldr	x13, [sp, #16]          // 8-byte Folded Reload
+	ldr	x11, [sp, #536]
+	ldr	x10, [sp, #544]
+	ldr	x8, [sp, #560]
+	ldr	x9, [sp, #552]
+	ldr	x17, [x20, #80]
+	adcs	x12, x13, x12
 	adcs	x24, x24, x11
-	adcs	x26, x26, x10
-	adcs	x27, x27, x9
-	adcs	x25, x25, x8
-	ldr	x8, [sp, #88]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x8, [sp, #96]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	ldr	x8, [sp, #104]          // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	stp	x14, x8, [sp, #72]
-	mul	 x1, x21, x19
-	add	x8, sp, #352            // =352
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #416]
-	ldp	x11, x10, [sp, #400]
-	ldp	x13, x12, [sp, #384]
-	ldp	x14, x15, [sp, #352]
-	ldp	x16, x17, [sp, #368]
-	cmn	 x21, x14
-	ldr	x14, [sp, #72]          // 8-byte Folded Reload
-	adcs	x21, x14, x15
-	adcs	x14, x23, x16
-	str	x14, [sp, #72]          // 8-byte Folded Spill
-	adcs	x28, x28, x17
-	adcs	x22, x22, x13
+	adcs	x26, x27, x10
+	add		x8, x29, x8
+	adcs	x27, x28, x9
+	adcs	x28, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #416            // =416
+	mov	 x0, x21
+	str	x12, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #416]
+	ldp	x16, x14, [sp, #424]
+	ldp	x13, x12, [sp, #440]
+	ldp	x11, x10, [sp, #456]
+	cmn		x19, x15
+	adcs	x19, x22, x16
+	adcs	x22, x25, x14
+	ldr	x14, [sp, #16]          // 8-byte Folded Reload
+	ldp	x9, x8, [sp, #472]
+	ldr	x17, [x20, #88]
+	mul		x1, x19, x23
+	adcs	x13, x14, x13
 	adcs	x24, x24, x12
-	adcs	x26, x26, x11
-	adcs	x27, x27, x10
-	adcs	x25, x25, x9
-	ldr	x9, [sp, #88]           // 8-byte Folded Reload
-	adcs	x8, x9, x8
-	str	x8, [sp, #88]           // 8-byte Folded Spill
-	ldr	x8, [sp, #96]           // 8-byte Folded Reload
-	adcs	x23, x8, xzr
-	ldr	x8, [sp, #104]          // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	ldr	x8, [sp, #80]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #96]           // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	add	x8, sp, #272            // =272
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #336]
-	ldp	x11, x10, [sp, #320]
-	ldp	x13, x12, [sp, #304]
-	ldp	x14, x15, [sp, #272]
-	ldp	x16, x17, [sp, #288]
-	cmn	 x21, x14
-	ldr	x14, [sp, #72]          // 8-byte Folded Reload
-	adcs	x21, x14, x15
-	adcs	x14, x28, x16
-	adcs	x22, x22, x17
-	adcs	x24, x24, x13
-	adcs	x26, x26, x12
-	adcs	x27, x27, x11
-	adcs	x25, x25, x10
-	ldr	x10, [sp, #88]          // 8-byte Folded Reload
-	adcs	x9, x10, x9
-	stp	x14, x9, [sp, #80]
-	adcs	x23, x23, x8
-	ldr	x8, [sp, #104]          // 8-byte Folded Reload
-	adcs	x28, x8, xzr
-	ldr	x8, [sp, #96]           // 8-byte Folded Reload
-	adcs	x8, x8, xzr
-	str	x8, [sp, #104]          // 8-byte Folded Spill
-	mul	 x1, x21, x19
-	add	x8, sp, #192            // =192
-	mov	 x0, x20
-	bl	.LmulPv576x64
-	ldp	x9, x8, [sp, #256]
-	ldp	x11, x10, [sp, #240]
-	ldp	x13, x12, [sp, #224]
-	ldp	x14, x15, [sp, #192]
-	ldp	x16, x17, [sp, #208]
-	cmn	 x21, x14
-	ldr	x14, [sp, #80]          // 8-byte Folded Reload
-	adcs	x14, x14, x15
-	adcs	x15, x22, x16
-	adcs	x16, x24, x17
-	adcs	x13, x26, x13
-	adcs	x12, x27, x12
-	adcs	x11, x25, x11
-	ldr	x17, [sp, #88]          // 8-byte Folded Reload
-	adcs	x10, x17, x10
-	adcs	x9, x23, x9
-	adcs	x8, x28, x8
-	ldp	x17, x18, [sp, #104]
-	adcs	x17, x17, xzr
-	subs	 x18, x14, x18
-	ldp	x0, x1, [sp, #120]
-	sbcs	x0, x15, x0
-	sbcs	x1, x16, x1
-	ldp	x2, x3, [sp, #136]
-	sbcs	x2, x13, x2
-	sbcs	x3, x12, x3
-	ldr	x4, [sp, #152]          // 8-byte Folded Reload
-	sbcs	x4, x11, x4
-	ldp	x5, x6, [sp, #168]
-	sbcs	x5, x10, x5
-	sbcs	x6, x9, x6
-	ldr	x7, [sp, #184]          // 8-byte Folded Reload
-	sbcs	x7, x8, x7
-	sbcs	x17, x17, xzr
-	tst	 x17, #0x1
-	csel	x14, x14, x18, ne
-	csel	x15, x15, x0, ne
-	csel	x16, x16, x1, ne
-	csel	x13, x13, x2, ne
-	csel	x12, x12, x3, ne
-	csel	x11, x11, x4, ne
-	csel	x10, x10, x5, ne
-	csel	x9, x9, x6, ne
-	csel	x8, x8, x7, ne
-	ldr	x17, [sp, #160]         // 8-byte Folded Reload
-	stp	 x14, x15, [x17]
-	stp	x16, x13, [x17, #16]
-	stp	x12, x11, [x17, #32]
-	stp	x10, x9, [x17, #48]
-	str	x8, [x17, #64]
-	sub	sp, x29, #80            // =80
-	ldp	x29, x30, [sp, #80]
-	ldp	x20, x19, [sp, #64]
-	ldp	x22, x21, [sp, #48]
-	ldp	x24, x23, [sp, #32]
-	ldp	x26, x25, [sp, #16]
-	ldp	x28, x27, [sp], #96
+	adcs	x25, x26, x11
+	adcs	x26, x27, x10
+	add		x8, x29, x8
+	adcs	x27, x28, x9
+	adcs	x28, x17, x8
+	add	x8, sp, #336            // =336
+	mov	 x0, x21
+	str	x13, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #336]
+	ldp	x16, x14, [sp, #344]
+	ldp	x13, x12, [sp, #360]
+	ldp	x11, x10, [sp, #376]
+	cmn		x19, x15
+	ldr	x15, [sp, #16]          // 8-byte Folded Reload
+	adcs	x19, x22, x16
+	ldp	x9, x8, [sp, #392]
+	ldr	x17, [x20, #96]
+	adcs	x22, x15, x14
+	adcs	x13, x24, x13
+	adcs	x24, x25, x12
+	adcs	x25, x26, x11
+	adcs	x26, x27, x10
+	add		x8, x29, x8
+	adcs	x27, x28, x9
+	adcs	x28, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #256            // =256
+	mov	 x0, x21
+	str	x13, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #256]
+	ldp	x16, x14, [sp, #264]
+	ldp	x13, x12, [sp, #280]
+	ldp	x11, x10, [sp, #296]
+	cmn		x19, x15
+	ldr	x15, [sp, #16]          // 8-byte Folded Reload
+	adcs	x19, x22, x16
+	ldp	x9, x8, [sp, #312]
+	ldr	x17, [x20, #104]
+	adcs	x22, x15, x14
+	adcs	x13, x24, x13
+	adcs	x24, x25, x12
+	adcs	x25, x26, x11
+	adcs	x26, x27, x10
+	add		x8, x29, x8
+	adcs	x27, x28, x9
+	adcs	x28, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #176            // =176
+	mov	 x0, x21
+	str	x13, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldr	x15, [sp, #176]
+	ldp	x16, x14, [sp, #184]
+	ldp	x13, x12, [sp, #200]
+	ldp	x11, x10, [sp, #216]
+	cmn		x19, x15
+	ldr	x15, [sp, #16]          // 8-byte Folded Reload
+	adcs	x19, x22, x16
+	ldp	x9, x8, [sp, #232]
+	ldr	x17, [x20, #112]
+	adcs	x22, x15, x14
+	adcs	x13, x24, x13
+	adcs	x24, x25, x12
+	adcs	x25, x26, x11
+	adcs	x26, x27, x10
+	add		x8, x29, x8
+	adcs	x27, x28, x9
+	adcs	x28, x17, x8
+	mul		x1, x19, x23
+	add	x8, sp, #96             // =96
+	mov	 x0, x21
+	str	x13, [sp, #16]          // 8-byte Folded Spill
+	adcs	x29, xzr, xzr
+	bl	mulPv512x64
+	ldp	x15, x14, [sp, #96]
+	ldr	x16, [sp, #112]
+	ldp	x13, x12, [sp, #120]
+	ldp	x11, x10, [sp, #136]
+	cmn		x19, x15
+	ldr	x15, [sp, #16]          // 8-byte Folded Reload
+	adcs	x14, x22, x14
+	ldp	x9, x8, [sp, #152]
+	ldr	x17, [x20, #120]
+	adcs	x15, x15, x16
+	adcs	x13, x24, x13
+	adcs	x12, x25, x12
+	adcs	x11, x26, x11
+	adcs	x10, x27, x10
+	add		x8, x29, x8
+	adcs	x9, x28, x9
+	adcs	x8, x17, x8
+	ldp	x16, x17, [sp, #24]     // 8-byte Folded Reload
+	ldp	x18, x0, [sp, #40]      // 8-byte Folded Reload
+	ldp	x1, x2, [sp, #56]       // 8-byte Folded Reload
+	ldp	x3, x4, [sp, #72]       // 8-byte Folded Reload
+	subs		x16, x14, x16
+	sbcs	x17, x15, x17
+	sbcs	x18, x13, x18
+	sbcs	x0, x12, x0
+	sbcs	x1, x11, x1
+	sbcs	x2, x10, x2
+	sbcs	x3, x9, x3
+	sbcs	x4, x8, x4
+	cmp		x4, #0          // =0
+	csel	x14, x14, x16, lt
+	ldr	x16, [sp, #88]          // 8-byte Folded Reload
+	csel	x15, x15, x17, lt
+	csel	x13, x13, x18, lt
+	csel	x12, x12, x0, lt
+	csel	x11, x11, x1, lt
+	csel	x10, x10, x2, lt
+	csel	x9, x9, x3, lt
+	csel	x8, x8, x4, lt
+	stp		x14, x15, [x16]
+	stp	x13, x12, [x16, #16]
+	stp	x11, x10, [x16, #32]
+	stp	x9, x8, [x16, #48]
+	add	sp, sp, #736            // =736
+	ldp	x29, x30, [sp, #80]     // 8-byte Folded Reload
+	ldp	x20, x19, [sp, #64]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp, #48]     // 8-byte Folded Reload
+	ldp	x24, x23, [sp, #32]     // 8-byte Folded Reload
+	ldp	x26, x25, [sp, #16]     // 8-byte Folded Reload
+	ldp	x28, x27, [sp], #96     // 8-byte Folded Reload
 	ret
-.Lfunc_end132:
-	.size	mcl_fp_montRed9L, .Lfunc_end132-mcl_fp_montRed9L
+.Lfunc_end63:
+	.size	mcl_fp_montRedNF8L, .Lfunc_end63-mcl_fp_montRedNF8L
 
-	.globl	mcl_fp_addPre9L
-	.align	2
-	.type	mcl_fp_addPre9L,@function
-mcl_fp_addPre9L:                        // @mcl_fp_addPre9L
+	.globl	mcl_fp_addPre8L
+	.p2align	2
+	.type	mcl_fp_addPre8L,@function
+mcl_fp_addPre8L:                        // @mcl_fp_addPre8L
 // BB#0:
-	ldp	x11, x8, [x2, #56]
-	ldp	x13, x9, [x1, #56]
-	ldp	x15, x10, [x2, #40]
-	ldp	x17, x12, [x1, #40]
-	ldp	x3, x14, [x2, #24]
-	ldr	 x4, [x2]
-	ldp	x2, x18, [x2, #8]
-	ldp	 x5, x6, [x1]
-	ldr	x7, [x1, #16]
-	ldp	x1, x16, [x1, #24]
-	adds	 x4, x4, x5
-	adcs	x2, x2, x6
-	stp	 x4, x2, [x0]
-	adcs	x18, x18, x7
-	str	x18, [x0, #16]
-	adcs	x18, x3, x1
-	adcs	x14, x14, x16
-	stp	x18, x14, [x0, #24]
-	adcs	x14, x15, x17
-	adcs	x10, x10, x12
-	stp	x14, x10, [x0, #40]
-	adcs	x10, x11, x13
-	adcs	x9, x8, x9
+	ldp	x8, x9, [x2, #48]
+	ldp	x12, x13, [x2, #32]
+	ldp	x16, x17, [x2, #16]
+	ldp		x18, x2, [x2]
+	ldp		x3, x4, [x1]
+	ldp	x10, x11, [x1, #48]
+	ldp	x14, x15, [x1, #32]
+	ldp	x5, x1, [x1, #16]
+	adds		x18, x18, x3
+	adcs	x2, x2, x4
+	stp		x18, x2, [x0]
+	adcs	x16, x16, x5
+	adcs	x17, x17, x1
+	adcs	x12, x12, x14
+	adcs	x13, x13, x15
+	adcs	x10, x8, x10
+	adcs	x9, x9, x11
 	adcs	x8, xzr, xzr
-	stp	x10, x9, [x0, #56]
+	stp	x16, x17, [x0, #16]
+	stp	x12, x13, [x0, #32]
+	stp	x10, x9, [x0, #48]
 	mov	 x0, x8
 	ret
-.Lfunc_end133:
-	.size	mcl_fp_addPre9L, .Lfunc_end133-mcl_fp_addPre9L
+.Lfunc_end64:
+	.size	mcl_fp_addPre8L, .Lfunc_end64-mcl_fp_addPre8L
 
-	.globl	mcl_fp_subPre9L
-	.align	2
-	.type	mcl_fp_subPre9L,@function
-mcl_fp_subPre9L:                        // @mcl_fp_subPre9L
+	.globl	mcl_fp_subPre8L
+	.p2align	2
+	.type	mcl_fp_subPre8L,@function
+mcl_fp_subPre8L:                        // @mcl_fp_subPre8L
 // BB#0:
-	ldp	x11, x8, [x2, #56]
-	ldp	x13, x9, [x1, #56]
-	ldp	x15, x10, [x2, #40]
-	ldp	x17, x12, [x1, #40]
-	ldp	x3, x14, [x2, #24]
-	ldr	 x4, [x2]
-	ldp	x2, x18, [x2, #8]
-	ldp	 x5, x6, [x1]
-	ldr	x7, [x1, #16]
-	ldp	x1, x16, [x1, #24]
-	subs	 x4, x5, x4
-	sbcs	x2, x6, x2
-	stp	 x4, x2, [x0]
-	sbcs	x18, x7, x18
-	str	x18, [x0, #16]
-	sbcs	x18, x1, x3
-	sbcs	x14, x16, x14
-	stp	x18, x14, [x0, #24]
-	sbcs	x14, x17, x15
-	sbcs	x10, x12, x10
-	stp	x14, x10, [x0, #40]
-	sbcs	x10, x13, x11
-	sbcs	x9, x9, x8
+	ldp	x8, x9, [x2, #48]
+	ldp	x12, x13, [x2, #32]
+	ldp	x16, x17, [x2, #16]
+	ldp		x18, x2, [x2]
+	ldp		x3, x4, [x1]
+	ldp	x10, x11, [x1, #48]
+	ldp	x14, x15, [x1, #32]
+	ldp	x5, x1, [x1, #16]
+	subs		x18, x3, x18
+	sbcs	x2, x4, x2
+	stp		x18, x2, [x0]
+	sbcs	x16, x5, x16
+	sbcs	x17, x1, x17
+	sbcs	x12, x14, x12
+	sbcs	x13, x15, x13
+	sbcs	x10, x10, x8
+	sbcs	x9, x11, x9
 	ngcs	 x8, xzr
 	and	x8, x8, #0x1
-	stp	x10, x9, [x0, #56]
+	stp	x16, x17, [x0, #16]
+	stp	x12, x13, [x0, #32]
+	stp	x10, x9, [x0, #48]
 	mov	 x0, x8
 	ret
-.Lfunc_end134:
-	.size	mcl_fp_subPre9L, .Lfunc_end134-mcl_fp_subPre9L
+.Lfunc_end65:
+	.size	mcl_fp_subPre8L, .Lfunc_end65-mcl_fp_subPre8L
 
-	.globl	mcl_fp_shr1_9L
-	.align	2
-	.type	mcl_fp_shr1_9L,@function
-mcl_fp_shr1_9L:                         // @mcl_fp_shr1_9L
+	.globl	mcl_fp_shr1_8L
+	.p2align	2
+	.type	mcl_fp_shr1_8L,@function
+mcl_fp_shr1_8L:                         // @mcl_fp_shr1_8L
 // BB#0:
-	ldp	 x8, x9, [x1]
-	ldp	x12, x10, [x1, #56]
-	ldp	x16, x11, [x1, #40]
-	ldp	x13, x14, [x1, #16]
-	ldr	x15, [x1, #32]
+	ldp		x8, x9, [x1]
+	ldp	x10, x11, [x1, #48]
+	ldp	x12, x13, [x1, #16]
+	ldp	x14, x15, [x1, #32]
 	extr	x8, x9, x8, #1
-	extr	x9, x13, x9, #1
+	extr	x9, x12, x9, #1
+	extr	x12, x13, x12, #1
 	extr	x13, x14, x13, #1
 	extr	x14, x15, x14, #1
-	extr	x15, x16, x15, #1
-	extr	x16, x11, x16, #1
-	extr	x11, x12, x11, #1
-	extr	x12, x10, x12, #1
-	lsr	x10, x10, #1
-	stp	 x8, x9, [x0]
-	stp	x13, x14, [x0, #16]
-	stp	x15, x16, [x0, #32]
-	stp	x11, x12, [x0, #48]
-	str	x10, [x0, #64]
+	extr	x15, x10, x15, #1
+	extr	x10, x11, x10, #1
+	lsr	x11, x11, #1
+	stp		x8, x9, [x0]
+	stp	x12, x13, [x0, #16]
+	stp	x14, x15, [x0, #32]
+	stp	x10, x11, [x0, #48]
 	ret
-.Lfunc_end135:
-	.size	mcl_fp_shr1_9L, .Lfunc_end135-mcl_fp_shr1_9L
+.Lfunc_end66:
+	.size	mcl_fp_shr1_8L, .Lfunc_end66-mcl_fp_shr1_8L
 
-	.globl	mcl_fp_add9L
-	.align	2
-	.type	mcl_fp_add9L,@function
-mcl_fp_add9L:                           // @mcl_fp_add9L
-// BB#0:
-	stp	x24, x23, [sp, #-48]!
-	stp	x22, x21, [sp, #16]
-	stp	x20, x19, [sp, #32]
-	ldp	x11, x8, [x2, #56]
-	ldp	x13, x9, [x1, #56]
-	ldp	x15, x10, [x2, #40]
-	ldp	x17, x12, [x1, #40]
-	ldp	x4, x14, [x2, #24]
-	ldr	 x5, [x2]
-	ldp	x2, x18, [x2, #8]
-	ldp	 x6, x7, [x1]
-	ldr	x19, [x1, #16]
-	ldp	x1, x16, [x1, #24]
-	adds	 x5, x5, x6
-	adcs	x2, x2, x7
-	adcs	x18, x18, x19
-	ldp	x21, x7, [x3, #40]
-	ldp	x19, x6, [x3, #56]
-	adcs	x1, x4, x1
-	adcs	x4, x14, x16
-	ldr	x20, [x3, #32]
-	adcs	x17, x15, x17
-	adcs	x10, x10, x12
-	ldp	 x12, x14, [x3]
-	stp	 x5, x2, [x0]
-	stp	x18, x1, [x0, #16]
-	stp	x4, x17, [x0, #32]
-	adcs	x22, x11, x13
-	stp	x10, x22, [x0, #48]
-	adcs	x8, x8, x9
-	str	x8, [x0, #64]
-	adcs	x23, xzr, xzr
-	ldp	x9, x11, [x3, #16]
-	subs	 x16, x5, x12
-	sbcs	x15, x2, x14
-	sbcs	x14, x18, x9
-	sbcs	x13, x1, x11
-	sbcs	x12, x4, x20
-	sbcs	x11, x17, x21
-	sbcs	x10, x10, x7
-	sbcs	x9, x22, x19
-	sbcs	x8, x8, x6
-	sbcs	x17, x23, xzr
-	and	w17, w17, #0x1
-	tbnz	w17, #0, .LBB136_2
+	.globl	mcl_fp_add8L
+	.p2align	2
+	.type	mcl_fp_add8L,@function
+mcl_fp_add8L:                           // @mcl_fp_add8L
+// BB#0:
+	stp	x22, x21, [sp, #-32]!   // 8-byte Folded Spill
+	ldp	x8, x9, [x2, #48]
+	ldp	x12, x13, [x2, #32]
+	ldp	x16, x17, [x2, #16]
+	ldp		x18, x2, [x2]
+	ldp		x4, x5, [x1]
+	ldp	x10, x11, [x1, #48]
+	ldp	x14, x15, [x1, #32]
+	ldp	x6, x1, [x1, #16]
+	adds		x18, x18, x4
+	adcs	x2, x2, x5
+	stp	x20, x19, [sp, #16]     // 8-byte Folded Spill
+	adcs	x16, x16, x6
+	adcs	x17, x17, x1
+	adcs	x7, x12, x14
+	adcs	x19, x13, x15
+	adcs	x21, x8, x10
+	ldp		x8, x10, [x3]
+	ldp	x4, x5, [x3, #48]
+	ldp	x1, x6, [x3, #32]
+	ldp	x12, x20, [x3, #16]
+	adcs	x3, x9, x11
+	adcs	x22, xzr, xzr
+	subs		x15, x18, x8
+	sbcs	x14, x2, x10
+	sbcs	x13, x16, x12
+	sbcs	x12, x17, x20
+	sbcs	x11, x7, x1
+	sbcs	x10, x19, x6
+	sbcs	x9, x21, x4
+	sbcs	x8, x3, x5
+	stp	x16, x17, [x0, #16]
+	sbcs	x16, x22, xzr
+	stp		x18, x2, [x0]
+	stp	x7, x19, [x0, #32]
+	stp	x21, x3, [x0, #48]
+	tbnz	w16, #0, .LBB67_2
 // BB#1:                                // %nocarry
-	stp	 x16, x15, [x0]
-	stp	x14, x13, [x0, #16]
-	stp	x12, x11, [x0, #32]
-	stp	x10, x9, [x0, #48]
-	str	x8, [x0, #64]
-.LBB136_2:                              // %carry
-	ldp	x20, x19, [sp, #32]
-	ldp	x22, x21, [sp, #16]
-	ldp	x24, x23, [sp], #48
+	stp		x15, x14, [x0]
+	stp	x13, x12, [x0, #16]
+	stp	x11, x10, [x0, #32]
+	stp	x9, x8, [x0, #48]
+.LBB67_2:                               // %carry
+	ldp	x20, x19, [sp, #16]     // 8-byte Folded Reload
+	ldp	x22, x21, [sp], #32     // 8-byte Folded Reload
 	ret
-.Lfunc_end136:
-	.size	mcl_fp_add9L, .Lfunc_end136-mcl_fp_add9L
+.Lfunc_end67:
+	.size	mcl_fp_add8L, .Lfunc_end67-mcl_fp_add8L
 
-	.globl	mcl_fp_addNF9L
-	.align	2
-	.type	mcl_fp_addNF9L,@function
-mcl_fp_addNF9L:                         // @mcl_fp_addNF9L
+	.globl	mcl_fp_addNF8L
+	.p2align	2
+	.type	mcl_fp_addNF8L,@function
+mcl_fp_addNF8L:                         // @mcl_fp_addNF8L
 // BB#0:
-	stp	x20, x19, [sp, #-16]!
-	ldp	x11, x8, [x1, #56]
-	ldp	x13, x9, [x2, #56]
-	ldp	x15, x10, [x1, #40]
-	ldp	x17, x12, [x2, #40]
-	ldp	x4, x14, [x1, #24]
-	ldr	 x5, [x1]
-	ldp	x1, x18, [x1, #8]
-	ldp	 x6, x7, [x2]
-	ldr	x19, [x2, #16]
-	ldp	x2, x16, [x2, #24]
-	adds	 x5, x6, x5
-	adcs	x1, x7, x1
-	adcs	x18, x19, x18
-	ldp	x19, x6, [x3, #56]
-	adcs	x2, x2, x4
-	adcs	x14, x16, x14
-	ldp	x4, x7, [x3, #40]
-	adcs	x15, x17, x15
-	adcs	x10, x12, x10
-	ldp	 x12, x17, [x3]
-	adcs	x11, x13, x11
-	ldr	x13, [x3, #16]
-	ldp	x3, x16, [x3, #24]
-	adcs	x8, x9, x8
-	subs	 x9, x5, x12
-	sbcs	x12, x1, x17
-	sbcs	x13, x18, x13
-	sbcs	x17, x2, x3
-	sbcs	x16, x14, x16
-	sbcs	x3, x15, x4
-	sbcs	x4, x10, x7
-	sbcs	x7, x11, x19
-	sbcs	x6, x8, x6
-	asr	x19, x6, #63
-	cmp	 x19, #0                // =0
-	csel	x9, x5, x9, lt
-	csel	x12, x1, x12, lt
-	csel	x13, x18, x13, lt
-	csel	x17, x2, x17, lt
-	csel	x14, x14, x16, lt
-	csel	x15, x15, x3, lt
-	csel	x10, x10, x4, lt
-	csel	x11, x11, x7, lt
-	csel	x8, x8, x6, lt
-	stp	 x9, x12, [x0]
-	stp	x13, x17, [x0, #16]
-	stp	x14, x15, [x0, #32]
-	stp	x10, x11, [x0, #48]
-	str	x8, [x0, #64]
-	ldp	x20, x19, [sp], #16
+	ldp	x8, x9, [x1, #48]
+	ldp	x12, x13, [x1, #32]
+	ldp	x16, x17, [x1, #16]
+	ldp		x18, x1, [x1]
+	ldp		x4, x5, [x2]
+	ldp	x10, x11, [x2, #48]
+	ldp	x14, x15, [x2, #32]
+	ldp	x6, x2, [x2, #16]
+	adds		x18, x4, x18
+	adcs	x1, x5, x1
+	ldp	x4, x5, [x3, #48]
+	adcs	x16, x6, x16
+	adcs	x17, x2, x17
+	adcs	x12, x14, x12
+	adcs	x13, x15, x13
+	ldp		x14, x15, [x3]
+	ldp	x2, x6, [x3, #32]
+	adcs	x8, x10, x8
+	ldp	x10, x3, [x3, #16]
+	adcs	x9, x11, x9
+	subs		x11, x18, x14
+	sbcs	x14, x1, x15
+	sbcs	x10, x16, x10
+	sbcs	x15, x17, x3
+	sbcs	x2, x12, x2
+	sbcs	x3, x13, x6
+	sbcs	x4, x8, x4
+	sbcs	x5, x9, x5
+	cmp		x5, #0          // =0
+	csel	x11, x18, x11, lt
+	csel	x14, x1, x14, lt
+	csel	x10, x16, x10, lt
+	csel	x15, x17, x15, lt
+	csel	x12, x12, x2, lt
+	csel	x13, x13, x3, lt
+	csel	x8, x8, x4, lt
+	csel	x9, x9, x5, lt
+	stp		x11, x14, [x0]
+	stp	x10, x15, [x0, #16]
+	stp	x12, x13, [x0, #32]
+	stp	x8, x9, [x0, #48]
 	ret
-.Lfunc_end137:
-	.size	mcl_fp_addNF9L, .Lfunc_end137-mcl_fp_addNF9L
+.Lfunc_end68:
+	.size	mcl_fp_addNF8L, .Lfunc_end68-mcl_fp_addNF8L
 
-	.globl	mcl_fp_sub9L
-	.align	2
-	.type	mcl_fp_sub9L,@function
-mcl_fp_sub9L:                           // @mcl_fp_sub9L
+	.globl	mcl_fp_sub8L
+	.p2align	2
+	.type	mcl_fp_sub8L,@function
+mcl_fp_sub8L:                           // @mcl_fp_sub8L
 // BB#0:
-	stp	x20, x19, [sp, #-16]!
-	ldp	x15, x16, [x2, #56]
-	ldp	x4, x17, [x1, #56]
-	ldp	x13, x14, [x2, #40]
-	ldp	x6, x18, [x1, #40]
-	ldp	x11, x12, [x2, #24]
-	ldp	x9, x10, [x2, #8]
-	ldr	 x8, [x2]
-	ldp	 x2, x7, [x1]
-	ldr	x19, [x1, #16]
-	ldp	x1, x5, [x1, #24]
-	subs	 x8, x2, x8
-	sbcs	x9, x7, x9
-	stp	 x8, x9, [x0]
-	sbcs	x10, x19, x10
+	ldp	x14, x15, [x2, #48]
+	ldp	x12, x13, [x2, #32]
+	ldp	x10, x11, [x2, #16]
+	ldp		x8, x9, [x2]
+	ldp		x2, x5, [x1]
+	ldp	x16, x17, [x1, #48]
+	ldp	x18, x4, [x1, #32]
+	ldp	x6, x1, [x1, #16]
+	subs		x8, x2, x8
+	sbcs	x9, x5, x9
+	stp		x8, x9, [x0]
+	sbcs	x10, x6, x10
 	sbcs	x11, x1, x11
+	sbcs	x12, x18, x12
+	sbcs	x13, x4, x13
+	sbcs	x14, x16, x14
+	sbcs	x15, x17, x15
+	ngcs	 x16, xzr
 	stp	x10, x11, [x0, #16]
-	sbcs	x12, x5, x12
-	sbcs	x13, x6, x13
 	stp	x12, x13, [x0, #32]
-	sbcs	x14, x18, x14
-	sbcs	x15, x4, x15
 	stp	x14, x15, [x0, #48]
-	sbcs	x16, x17, x16
-	str	x16, [x0, #64]
-	ngcs	 x17, xzr
-	and	w17, w17, #0x1
-	tbnz	w17, #0, .LBB138_2
+	tbnz	w16, #0, .LBB69_2
 // BB#1:                                // %nocarry
-	ldp	x20, x19, [sp], #16
 	ret
-.LBB138_2:                              // %carry
-	ldp	 x18, x1, [x3]
-	ldp	x2, x4, [x3, #16]
-	ldp	x5, x6, [x3, #32]
-	adds	 x8, x18, x8
-	adcs	x9, x1, x9
-	ldr	x18, [x3, #48]
-	ldp	x1, x17, [x3, #56]
-	adcs	x10, x2, x10
-	adcs	x11, x4, x11
-	adcs	x12, x5, x12
-	adcs	x13, x6, x13
-	adcs	x14, x18, x14
-	adcs	x15, x1, x15
-	adcs	x16, x17, x16
-	stp	 x8, x9, [x0]
-	stp	x10, x11, [x0, #16]
-	stp	x12, x13, [x0, #32]
-	stp	x14, x15, [x0, #48]
-	str	x16, [x0, #64]
-	ldp	x20, x19, [sp], #16
+.LBB69_2:                               // %carry
+	ldp		x2, x4, [x3]
+	ldp	x16, x17, [x3, #48]
+	ldp	x18, x1, [x3, #32]
+	ldp	x5, x3, [x3, #16]
+	adds		x8, x2, x8
+	adcs	x9, x4, x9
+	stp		x8, x9, [x0]
+	adcs	x8, x5, x10
+	adcs	x9, x3, x11
+	stp	x8, x9, [x0, #16]
+	adcs	x8, x18, x12
+	adcs	x9, x1, x13
+	stp	x8, x9, [x0, #32]
+	adcs	x8, x16, x14
+	adcs	x9, x17, x15
+	stp	x8, x9, [x0, #48]
 	ret
-.Lfunc_end138:
-	.size	mcl_fp_sub9L, .Lfunc_end138-mcl_fp_sub9L
+.Lfunc_end69:
+	.size	mcl_fp_sub8L, .Lfunc_end69-mcl_fp_sub8L
 
-	.globl	mcl_fp_subNF9L
-	.align	2
-	.type	mcl_fp_subNF9L,@function
-mcl_fp_subNF9L:                         // @mcl_fp_subNF9L
+	.globl	mcl_fp_subNF8L
+	.p2align	2
+	.type	mcl_fp_subNF8L,@function
+mcl_fp_subNF8L:                         // @mcl_fp_subNF8L
 // BB#0:
-	stp	x20, x19, [sp, #-16]!
-	ldp	x11, x8, [x2, #56]
-	ldp	x13, x9, [x1, #56]
-	ldp	x15, x10, [x2, #40]
-	ldp	x17, x12, [x1, #40]
-	ldp	x4, x14, [x2, #24]
-	ldr	 x5, [x2]
-	ldp	x2, x18, [x2, #8]
-	ldp	 x6, x7, [x1]
-	ldr	x19, [x1, #16]
-	ldp	x1, x16, [x1, #24]
-	subs	 x5, x6, x5
-	sbcs	x2, x7, x2
-	sbcs	x18, x19, x18
-	ldp	x19, x6, [x3, #56]
-	sbcs	x1, x1, x4
+	add	x9, x1, #24             // =24
+	ld1	{ v0.d }[0], [x9]
+	add	x8, x1, #16             // =16
+	add	x10, x2, #24            // =24
+	add	x9, x2, #16             // =16
+	mov	 x11, x2
+	ld1	{ v1.d }[0], [x8]
+	ld1	{ v2.d }[0], [x9]
+	ld1	{ v0.d }[1], [x10]
+	ld1	{ v3.d }[0], [x11], #8
+	ins	v1.d[1], v0.d[0]
+	ldr		x8, [x11]
+	mov	 x11, x1
+	ext	v4.16b, v0.16b, v0.16b, #8
+	fmov	x9, d0
+	ld1	{ v0.d }[0], [x11], #8
+	ins	v3.d[1], x8
+	fmov	x12, d3
+	ldp	x13, x14, [x2, #48]
+	ldr		x11, [x11]
+	ldp	x15, x16, [x1, #48]
+	ldp	x17, x18, [x2, #32]
+	ldp	x2, x1, [x1, #32]
+	ins	v0.d[1], x11
+	fmov	x6, d0
+	ins	v2.d[1], v4.d[0]
+	subs		x12, x6, x12
+	fmov	x10, d1
+	sbcs	x8, x11, x8
+	fmov	x11, d2
+	fmov	x6, d4
+	sbcs	x10, x10, x11
+	sbcs	x9, x9, x6
+	sbcs	x17, x2, x17
+	sbcs	x18, x1, x18
+	ldp	x4, x5, [x3, #48]
+	ldp	x11, x6, [x3, #32]
+	ldp	x1, x2, [x3, #16]
+	sbcs	x13, x15, x13
+	ldp		x15, x3, [x3]
 	sbcs	x14, x16, x14
-	ldp	x4, x7, [x3, #40]
-	sbcs	x15, x17, x15
-	sbcs	x10, x12, x10
-	ldp	 x12, x17, [x3]
-	sbcs	x11, x13, x11
-	sbcs	x8, x9, x8
-	asr	x9, x8, #63
-	extr	x13, x9, x8, #63
-	and	 x12, x13, x12
-	ldr	x13, [x3, #16]
-	ldp	x3, x16, [x3, #24]
-	and	 x19, x9, x19
-	and	 x6, x9, x6
-	ror	 x9, x9, #63
-	and	 x17, x9, x17
-	and	 x13, x9, x13
-	and	 x3, x9, x3
-	and	 x16, x9, x16
-	and	 x4, x9, x4
-	and	 x9, x9, x7
-	adds	 x12, x12, x5
-	str	 x12, [x0]
-	adcs	x12, x17, x2
-	str	x12, [x0, #8]
-	adcs	x12, x13, x18
-	str	x12, [x0, #16]
-	adcs	x12, x3, x1
-	str	x12, [x0, #24]
-	adcs	x12, x16, x14
-	str	x12, [x0, #32]
-	adcs	x12, x4, x15
-	adcs	x9, x9, x10
-	stp	x12, x9, [x0, #40]
-	adcs	x9, x19, x11
-	adcs	x8, x6, x8
-	stp	x9, x8, [x0, #56]
-	ldp	x20, x19, [sp], #16
+	asr	x16, x14, #63
+	and		x1, x16, x1
+	and		x15, x16, x15
+	and		x3, x16, x3
+	adds		x12, x15, x12
+	adcs	x8, x3, x8
+	and		x2, x16, x2
+	stp		x12, x8, [x0]
+	adcs	x8, x1, x10
+	and		x11, x16, x11
+	adcs	x9, x2, x9
+	and		x6, x16, x6
+	stp	x8, x9, [x0, #16]
+	adcs	x8, x11, x17
+	and		x4, x16, x4
+	adcs	x9, x6, x18
+	and		x16, x16, x5
+	stp	x8, x9, [x0, #32]
+	adcs	x8, x4, x13
+	adcs	x9, x16, x14
+	stp	x8, x9, [x0, #48]
 	ret
-.Lfunc_end139:
-	.size	mcl_fp_subNF9L, .Lfunc_end139-mcl_fp_subNF9L
+.Lfunc_end70:
+	.size	mcl_fp_subNF8L, .Lfunc_end70-mcl_fp_subNF8L
 
-	.globl	mcl_fpDbl_add9L
-	.align	2
-	.type	mcl_fpDbl_add9L,@function
-mcl_fpDbl_add9L:                        // @mcl_fpDbl_add9L
+	.globl	mcl_fpDbl_add8L
+	.p2align	2
+	.type	mcl_fpDbl_add8L,@function
+mcl_fpDbl_add8L:                        // @mcl_fpDbl_add8L
 // BB#0:
-	stp	x20, x19, [sp, #-16]!
-	ldp	x10, x8, [x2, #128]
-	ldp	x11, x9, [x1, #128]
-	ldp	x12, x13, [x2, #112]
-	ldp	x14, x15, [x1, #112]
-	ldp	x16, x17, [x2, #96]
-	ldp	 x18, x4, [x2]
-	ldp	 x5, x6, [x1]
-	ldp	x7, x19, [x2, #16]
-	adds	 x18, x18, x5
-	adcs	x4, x4, x6
-	ldp	x5, x6, [x1, #16]
-	str	 x18, [x0]
-	adcs	x18, x7, x5
-	ldp	x5, x7, [x1, #96]
-	str	x4, [x0, #8]
-	ldr	x4, [x1, #32]
-	str	x18, [x0, #16]
-	adcs	x18, x19, x6
-	ldp	x6, x19, [x2, #32]
-	str	x18, [x0, #24]
-	adcs	x4, x6, x4
-	ldp	x18, x6, [x1, #40]
-	str	x4, [x0, #32]
-	adcs	x18, x19, x18
-	ldp	x4, x19, [x2, #48]
-	str	x18, [x0, #40]
-	adcs	x4, x4, x6
-	ldp	x18, x6, [x1, #56]
-	str	x4, [x0, #48]
-	adcs	x18, x19, x18
-	ldp	x4, x19, [x2, #64]
-	str	x18, [x0, #56]
-	ldr	x18, [x1, #72]
-	adcs	x4, x4, x6
-	ldp	x6, x2, [x2, #80]
-	str	x4, [x0, #64]
-	ldp	x4, x1, [x1, #80]
-	adcs	x18, x19, x18
-	adcs	x4, x6, x4
-	adcs	x1, x2, x1
-	ldp	x6, x19, [x3, #56]
-	adcs	x16, x16, x5
-	adcs	x17, x17, x7
-	ldp	x7, x2, [x3, #40]
+	ldp		x16, x5, [x2]
+	ldp		x17, x6, [x1]
+	ldp	x8, x9, [x2, #112]
+	ldp	x12, x13, [x2, #96]
+	ldp	x18, x4, [x2, #80]
+	adds		x16, x16, x17
+	ldr	x17, [x1, #16]
+	str		x16, [x0]
+	adcs	x16, x5, x6
+	ldp	x5, x6, [x2, #16]
+	str	x16, [x0, #8]
+	ldp	x10, x11, [x1, #112]
+	ldp	x14, x15, [x1, #96]
+	adcs	x17, x5, x17
+	ldp	x16, x5, [x1, #24]
+	str	x17, [x0, #16]
+	adcs	x16, x6, x16
+	ldp	x17, x6, [x2, #32]
+	str	x16, [x0, #24]
+	adcs	x17, x17, x5
+	ldp	x16, x5, [x1, #40]
+	str	x17, [x0, #32]
+	adcs	x16, x6, x16
+	ldp	x17, x6, [x2, #48]
+	str	x16, [x0, #40]
+	ldr	x16, [x1, #56]
+	adcs	x17, x17, x5
+	ldp	x5, x2, [x2, #64]
+	str	x17, [x0, #48]
+	adcs	x16, x6, x16
+	ldp	x17, x6, [x1, #64]
+	str	x16, [x0, #56]
+	ldp	x16, x1, [x1, #80]
+	adcs	x17, x5, x17
+	adcs	x2, x2, x6
+	adcs	x16, x18, x16
+	adcs	x18, x4, x1
 	adcs	x12, x12, x14
 	adcs	x13, x13, x15
-	ldp	x15, x5, [x3, #24]
-	adcs	x10, x10, x11
-	ldr	 x11, [x3]
-	ldp	x3, x14, [x3, #8]
-	adcs	x8, x8, x9
-	adcs	x9, xzr, xzr
-	subs	 x11, x18, x11
-	sbcs	x3, x4, x3
-	sbcs	x14, x1, x14
-	sbcs	x15, x16, x15
-	sbcs	x5, x17, x5
-	sbcs	x7, x12, x7
-	sbcs	x2, x13, x2
-	sbcs	x6, x10, x6
-	sbcs	x19, x8, x19
-	sbcs	x9, x9, xzr
-	tst	 x9, #0x1
-	csel	x9, x18, x11, ne
-	csel	x11, x4, x3, ne
-	csel	x14, x1, x14, ne
-	csel	x15, x16, x15, ne
-	csel	x16, x17, x5, ne
-	csel	x12, x12, x7, ne
-	csel	x13, x13, x2, ne
-	csel	x10, x10, x6, ne
-	csel	x8, x8, x19, ne
-	stp	x9, x11, [x0, #72]
-	stp	x14, x15, [x0, #88]
-	stp	x16, x12, [x0, #104]
-	stp	x13, x10, [x0, #120]
-	str	x8, [x0, #136]
-	ldp	x20, x19, [sp], #16
+	ldp	x5, x6, [x3, #48]
+	ldp	x1, x4, [x3, #32]
+	ldp	x14, x15, [x3, #16]
+	adcs	x8, x8, x10
+	ldp		x10, x3, [x3]
+	adcs	x9, x9, x11
+	adcs	x11, xzr, xzr
+	subs		x10, x17, x10
+	sbcs	x3, x2, x3
+	sbcs	x14, x16, x14
+	sbcs	x15, x18, x15
+	sbcs	x1, x12, x1
+	sbcs	x4, x13, x4
+	sbcs	x5, x8, x5
+	sbcs	x6, x9, x6
+	sbcs	x11, x11, xzr
+	tst	 x11, #0x1
+	csel	x10, x17, x10, ne
+	csel	x11, x2, x3, ne
+	csel	x14, x16, x14, ne
+	csel	x15, x18, x15, ne
+	csel	x12, x12, x1, ne
+	csel	x13, x13, x4, ne
+	csel	x8, x8, x5, ne
+	csel	x9, x9, x6, ne
+	stp	x10, x11, [x0, #64]
+	stp	x14, x15, [x0, #80]
+	stp	x12, x13, [x0, #96]
+	stp	x8, x9, [x0, #112]
 	ret
-.Lfunc_end140:
-	.size	mcl_fpDbl_add9L, .Lfunc_end140-mcl_fpDbl_add9L
+.Lfunc_end71:
+	.size	mcl_fpDbl_add8L, .Lfunc_end71-mcl_fpDbl_add8L
 
-	.globl	mcl_fpDbl_sub9L
-	.align	2
-	.type	mcl_fpDbl_sub9L,@function
-mcl_fpDbl_sub9L:                        // @mcl_fpDbl_sub9L
+	.globl	mcl_fpDbl_sub8L
+	.p2align	2
+	.type	mcl_fpDbl_sub8L,@function
+mcl_fpDbl_sub8L:                        // @mcl_fpDbl_sub8L
 // BB#0:
-	ldp	x10, x8, [x2, #128]
-	ldp	x11, x9, [x1, #128]
-	ldp	x14, x12, [x2, #112]
-	ldp	x15, x13, [x1, #112]
-	ldp	 x16, x17, [x2]
-	ldp	 x18, x4, [x1]
-	ldp	x5, x6, [x2, #96]
-	ldr	x7, [x1, #16]
-	subs	 x16, x18, x16
-	sbcs	x17, x4, x17
-	ldp	x18, x4, [x2, #16]
-	str	 x16, [x0]
-	ldr	x16, [x1, #24]
-	sbcs	x18, x7, x18
-	str	x17, [x0, #8]
-	ldp	x17, x7, [x2, #32]
-	str	x18, [x0, #16]
-	sbcs	x16, x16, x4
-	ldp	x18, x4, [x1, #32]
+	ldp		x16, x5, [x1]
+	ldp		x17, x4, [x2]
+	ldp	x10, x8, [x2, #112]
+	ldp	x12, x13, [x2, #96]
+	ldr	x18, [x1, #80]
+	subs		x16, x16, x17
+	ldr	x17, [x1, #16]
+	str		x16, [x0]
+	sbcs	x16, x5, x4
+	ldp	x4, x5, [x2, #16]
+	str	x16, [x0, #8]
+	ldp	x11, x9, [x1, #112]
+	ldp	x14, x15, [x1, #96]
+	sbcs	x17, x17, x4
+	ldp	x16, x4, [x1, #24]
+	str	x17, [x0, #16]
+	sbcs	x16, x16, x5
+	ldp	x17, x5, [x2, #32]
 	str	x16, [x0, #24]
-	sbcs	x16, x18, x17
-	ldp	x17, x18, [x2, #48]
-	str	x16, [x0, #32]
-	sbcs	x4, x4, x7
-	ldp	x16, x7, [x1, #48]
-	str	x4, [x0, #40]
-	sbcs	x16, x16, x17
-	ldp	x17, x4, [x2, #80]
-	str	x16, [x0, #48]
-	ldr	x16, [x1, #64]
-	sbcs	x18, x7, x18
-	ldp	x7, x2, [x2, #64]
-	str	x18, [x0, #56]
-	ldr	x18, [x1, #72]
-	sbcs	x16, x16, x7
-	str	x16, [x0, #64]
-	ldp	x16, x7, [x1, #80]
-	sbcs	x18, x18, x2
-	ldp	x2, x1, [x1, #96]
-	sbcs	x16, x16, x17
-	sbcs	x4, x7, x4
-	sbcs	x2, x2, x5
-	ldp	x7, x17, [x3, #56]
-	sbcs	x1, x1, x6
-	sbcs	x14, x15, x14
-	ldp	x6, x5, [x3, #40]
-	sbcs	x12, x13, x12
+	sbcs	x17, x4, x17
+	ldp	x16, x4, [x1, #40]
+	str	x17, [x0, #32]
+	sbcs	x16, x16, x5
+	ldp	x17, x5, [x2, #48]
+	str	x16, [x0, #40]
+	sbcs	x17, x4, x17
+	ldp	x16, x4, [x1, #56]
+	str	x17, [x0, #48]
+	sbcs	x16, x16, x5
+	ldp	x17, x5, [x2, #64]
+	str	x16, [x0, #56]
+	ldr	x16, [x1, #72]
+	ldr	x1, [x1, #88]
+	sbcs	x17, x4, x17
+	ldp	x4, x2, [x2, #80]
+	sbcs	x16, x16, x5
+	sbcs	x18, x18, x4
+	sbcs	x1, x1, x2
+	sbcs	x12, x14, x12
+	sbcs	x13, x15, x13
 	sbcs	x10, x11, x10
-	ldp	x13, x15, [x3, #24]
 	sbcs	x8, x9, x8
 	ngcs	 x9, xzr
+	ldp	x4, x5, [x3, #48]
+	ldp	x14, x2, [x3, #32]
+	ldp	x11, x15, [x3, #16]
 	tst	 x9, #0x1
-	ldr	 x9, [x3]
-	ldp	x3, x11, [x3, #8]
-	csel	x17, x17, xzr, ne
-	csel	x7, x7, xzr, ne
+	ldp		x9, x3, [x3]
 	csel	x5, x5, xzr, ne
-	csel	x6, x6, xzr, ne
+	csel	x4, x4, xzr, ne
+	csel	x2, x2, xzr, ne
+	csel	x9, x9, xzr, ne
+	csel	x14, x14, xzr, ne
 	csel	x15, x15, xzr, ne
-	csel	x13, x13, xzr, ne
 	csel	x11, x11, xzr, ne
 	csel	x3, x3, xzr, ne
-	csel	x9, x9, xzr, ne
-	adds	 x9, x9, x18
-	str	x9, [x0, #72]
-	adcs	x9, x3, x16
-	str	x9, [x0, #80]
-	adcs	x9, x11, x4
-	str	x9, [x0, #88]
-	adcs	x9, x13, x2
-	str	x9, [x0, #96]
-	adcs	x9, x15, x1
-	str	x9, [x0, #104]
-	adcs	x9, x6, x14
-	str	x9, [x0, #112]
-	adcs	x9, x5, x12
-	str	x9, [x0, #120]
-	adcs	x9, x7, x10
-	adcs	x8, x17, x8
-	stp	x9, x8, [x0, #128]
+	adds		x9, x9, x17
+	adcs	x16, x3, x16
+	stp	x9, x16, [x0, #64]
+	adcs	x9, x11, x18
+	adcs	x11, x15, x1
+	stp	x9, x11, [x0, #80]
+	adcs	x9, x14, x12
+	adcs	x11, x2, x13
+	stp	x9, x11, [x0, #96]
+	adcs	x9, x4, x10
+	adcs	x8, x5, x8
+	stp	x9, x8, [x0, #112]
 	ret
-.Lfunc_end141:
-	.size	mcl_fpDbl_sub9L, .Lfunc_end141-mcl_fpDbl_sub9L
+.Lfunc_end72:
+	.size	mcl_fpDbl_sub8L, .Lfunc_end72-mcl_fpDbl_sub8L
 
 
 	.section	".note.GNU-stack","",@progbits

From 266d20173d61f1280345670a8b3e68ee95848700 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 15:59:26 +0900
Subject: [PATCH 522/553] llvm_test supports 32bit

---
 test/llvm_test.cpp | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/test/llvm_test.cpp b/test/llvm_test.cpp
index ab0d8216..876a2353 100644
--- a/test/llvm_test.cpp
+++ b/test/llvm_test.cpp
@@ -1,3 +1,26 @@
+/*
+32bit raspi
+N=6
+mulPre 511.30nsec
+sqrPre 598.33nsec
+mod    769.64nsec
+mont     1.283usec
+N=8
+mulPre   1.463usec
+sqrPre   1.422usec
+mod      1.972usec
+mont     2.962usec
+N=12
+mulPre   2.229usec
+sqrPre   2.056usec
+mod      3.811usec
+mont     6.802usec
+N=16
+mulPre   4.955usec
+sqrPre   4.706usec
+mod      6.817usec
+mont    12.916usec
+*/
 #include <stdio.h>
 #include <stdint.h>
 #include <cybozu/inttype.hpp>
@@ -42,10 +65,12 @@ template<>void sqrPre<n>(Unit *z, const Unit *x) { mcl_fpDbl_sqrPre ## n ## suf(
 template<>void mod<n>(Unit *z, const Unit *x, const Unit *p) { mcl_fp_montRedNF ## n ## suf(z, x, p); } \
 template<>void mont<n>(Unit *z, const Unit *x, const Unit *y, const Unit *p) { mcl_fp_montNF ## n ## suf(z, x, y, p); }
 
+#if CYBOZU_OS_BIT == 64
 MCL_FP_DEF_FUNC_SUB(4, L)
 MCL_FP_DEF_FUNC_SUB(5, L)
+#endif
 MCL_FP_DEF_FUNC_SUB(6, L)
-MCL_FP_DEF_FUNC_SUB(7, L)
+//MCL_FP_DEF_FUNC_SUB(7, L)
 MCL_FP_DEF_FUNC_SUB(8, L)
 #if CYBOZU_OS_BIT == 32
 MCL_FP_DEF_FUNC_SUB(12, L)
@@ -70,7 +95,11 @@ void bench(Unit *x, Unit *y, const Unit *p)
 {
 	printf("N=%zd\n", N);
 	Unit xx[N * 2], yy[N * 2];
+#if CYBOZU_OS_BIT == 64
 	const int C = 10000;
+#else
+	const int C = 1000;
+#endif
 	CYBOZU_BENCH_C("mulPre", C, mulPre<N>, xx, x, y);
 	CYBOZU_BENCH_C("sqrPre", C, sqrPre<N>, yy, x);
 	CYBOZU_BENCH_C("mod   ", C, mod<N>, yy, xx, p);
@@ -86,10 +115,12 @@ int main()
 	setRand(x, maxN, rg);
 	setRand(y, maxN, rg);
 	setRand(p, maxN + 1, rg);
+#if CYBOZU_OS_BIT == 64
 	bench<4>(x, y, p + 1);
 	bench<5>(x, y, p + 1);
+#endif
 	bench<6>(x, y, p + 1);
-	bench<7>(x, y, p + 1);
+//	bench<7>(x, y, p + 1);
 	bench<8>(x, y, p + 1);
 #if CYBOZU_OS_BIT == 32
 	bench<12>(x, y, p + 1);

From 8047bfe6aa797877c9ab341f6cdbc7637d6322ca Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 16:01:22 +0900
Subject: [PATCH 523/553] arm.s generated by clang-11

---
 src/asm/arm.s | 109779 ++++++++++++-----------------------------------
 1 file changed, 27128 insertions(+), 82651 deletions(-)

diff --git a/src/asm/arm.s b/src/asm/arm.s
index 2df9bfb9..78cc32db 100644
--- a/src/asm/arm.s
+++ b/src/asm/arm.s
@@ -3,25 +3,26 @@
 	.eabi_attribute	67, "2.09"	@ Tag_conformance
 	.eabi_attribute	6, 1	@ Tag_CPU_arch
 	.eabi_attribute	8, 1	@ Tag_ARM_ISA_use
+	.eabi_attribute	34, 1	@ Tag_CPU_unaligned_access
 	.eabi_attribute	15, 1	@ Tag_ABI_PCS_RW_data
 	.eabi_attribute	16, 1	@ Tag_ABI_PCS_RO_data
 	.eabi_attribute	17, 2	@ Tag_ABI_PCS_GOT_use
 	.eabi_attribute	20, 1	@ Tag_ABI_FP_denormal
 	.eabi_attribute	21, 1	@ Tag_ABI_FP_exceptions
 	.eabi_attribute	23, 3	@ Tag_ABI_FP_number_model
-	.eabi_attribute	34, 1	@ Tag_CPU_unaligned_access
 	.eabi_attribute	24, 1	@ Tag_ABI_align_needed
 	.eabi_attribute	25, 1	@ Tag_ABI_align_preserved
 	.eabi_attribute	28, 1	@ Tag_ABI_VFP_args
 	.eabi_attribute	38, 1	@ Tag_ABI_FP_16bit_format
 	.eabi_attribute	14, 0	@ Tag_ABI_PCS_R9_use
-	.file	"<stdin>"
-	.globl	makeNIST_P192L
-	.align	2
+	.file	"base32.ll"
+	.globl	makeNIST_P192L                  @ -- Begin function makeNIST_P192L
+	.p2align	2
 	.type	makeNIST_P192L,%function
-makeNIST_P192L:                         @ @makeNIST_P192L
+	.code	32                              @ @makeNIST_P192L
+makeNIST_P192L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	mvn	r1, #0
 	mvn	r2, #1
 	str	r1, [r0]
@@ -34,174 +35,168 @@ makeNIST_P192L:                         @ @makeNIST_P192L
 	.size	makeNIST_P192L, .Lfunc_end0-makeNIST_P192L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_mod_NIST_P192L
-	.align	2
+                                        @ -- End function
+	.globl	mcl_fpDbl_mod_NIST_P192L        @ -- Begin function mcl_fpDbl_mod_NIST_P192L
+	.p2align	2
 	.type	mcl_fpDbl_mod_NIST_P192L,%function
-mcl_fpDbl_mod_NIST_P192L:               @ @mcl_fpDbl_mod_NIST_P192L
+	.code	32                              @ @mcl_fpDbl_mod_NIST_P192L
+mcl_fpDbl_mod_NIST_P192L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#8
-	sub	sp, sp, #8
-	add	lr, r1, #24
+	add	lr, r1, #28
+	add	r9, r1, #8
+	ldm	r1, {r10, r12}
+	ldr	r4, [r1, #24]
 	ldr	r2, [r1, #40]
 	ldr	r3, [r1, #44]
-	ldr	r7, [r1, #16]
-	ldr	r8, [r1, #20]
-	ldm	lr, {r4, r5, r6, lr}
-	ldm	r1, {r1, r9, r10, r12}
-	adds	r11, r4, r1
-	adcs	r9, r5, r9
-	adcs	r10, r6, r10
-	adcs	r1, lr, r12
-	str	r1, [sp, #4]            @ 4-byte Spill
-	adcs	r1, r2, r7
-	mov	r7, #0
-	str	r1, [sp]                @ 4-byte Spill
-	adcs	r8, r3, r8
-	mov	r1, #0
-	adcs	r1, r1, #0
-	adc	r12, r7, #0
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	adds	r11, r11, r2
+	adds	r10, r10, r4
+	ldm	lr, {r1, r5, lr}
+	ldm	r9, {r6, r7, r8, r9}
+	adcs	r11, r12, r1
+	adcs	r12, r6, r5
+	mov	r6, #0
+	adcs	r7, r7, lr
+	adcs	r8, r8, r2
 	adcs	r9, r9, r3
-	adcs	r4, r10, r4
-	adcs	r5, r7, r5
-	ldr	r7, [sp]                @ 4-byte Reload
-	adcs	r6, r7, r6
-	adcs	r7, r8, lr
-	adcs	r1, r1, #0
-	adc	r12, r12, #0
-	adds	lr, r4, r2
-	adcs	r3, r5, r3
+	adc	r6, r6, #0
+	adds	r10, r10, r2
+	adcs	r11, r11, r3
+	adcs	r4, r12, r4
+	adcs	r12, r7, r1
+	mov	r1, #0
+	adcs	r5, r8, r5
+	adcs	r7, r9, lr
 	adcs	r6, r6, #0
+	adc	r1, r1, #0
+	adds	r2, r4, r2
+	adcs	lr, r12, r3
+	adcs	r3, r5, #0
 	adcs	r7, r7, #0
-	adcs	r1, r1, #0
-	adc	r5, r12, #0
-	adds	r12, r1, r11
-	adcs	r11, r5, r9
+	mrs	r5, apsr
+	adcs	r4, r6, #0
+	adc	r1, r1, #0
+	msr	APSR_nzcvq, r5
+	adcs	r12, r10, r6
+	adcs	r8, r1, r11
+	adcs	r9, r4, r2
 	adcs	r10, r1, lr
 	mov	r1, #0
-	adcs	r8, r5, r3
-	adcs	lr, r6, #0
-	adcs	r2, r7, #0
-	adc	r9, r1, #0
-	adds	r7, r12, #1
-	str	r2, [sp, #4]            @ 4-byte Spill
-	adcs	r6, r11, #0
-	adcs	r3, r10, #1
+	adcs	r11, r3, #0
+	adcs	r7, r7, #0
+	adc	r6, r1, #0
+	adds	lr, r12, #1
 	adcs	r5, r8, #0
-	adcs	r1, lr, #0
-	adcs	r2, r2, #0
-	sbc	r4, r9, #0
-	ands	r4, r4, #1
-	movne	r7, r12
-	movne	r6, r11
-	movne	r3, r10
-	cmp	r4, #0
+	adcs	r2, r9, #1
+	adcs	r4, r10, #0
+	adcs	r1, r11, #0
+	adcs	r3, r7, #0
+	sbc	r6, r6, #0
+	ands	r6, r6, #1
+	movne	r3, r7
+	movne	r1, r11
+	movne	r4, r10
+	cmp	r6, #0
+	movne	r2, r9
 	movne	r5, r8
-	movne	r1, lr
-	str	r7, [r0]
-	str	r6, [r0, #4]
-	str	r3, [r0, #8]
-	str	r5, [r0, #12]
+	movne	lr, r12
+	str	r3, [r0, #20]
 	str	r1, [r0, #16]
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	movne	r2, r1
-	str	r2, [r0, #20]
-	add	sp, sp, #8
+	str	r4, [r0, #12]
+	str	r2, [r0, #8]
+	str	r5, [r0, #4]
+	str	lr, [r0]
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
 .Lfunc_end1:
 	.size	mcl_fpDbl_mod_NIST_P192L, .Lfunc_end1-mcl_fpDbl_mod_NIST_P192L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_sqr_NIST_P192L
-	.align	2
+                                        @ -- End function
+	.globl	mcl_fp_sqr_NIST_P192L           @ -- Begin function mcl_fp_sqr_NIST_P192L
+	.p2align	2
 	.type	mcl_fp_sqr_NIST_P192L,%function
-mcl_fp_sqr_NIST_P192L:                  @ @mcl_fp_sqr_NIST_P192L
+	.code	32                              @ @mcl_fp_sqr_NIST_P192L
+mcl_fp_sqr_NIST_P192L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	.pad	#60
 	sub	sp, sp, #60
 	mov	r8, r0
 	add	r0, sp, #12
-	bl	mcl_fpDbl_sqrPre6L(PLT)
-	add	r12, sp, #12
-	ldr	lr, [sp, #48]
-	ldr	r2, [sp, #44]
-	ldr	r3, [sp, #40]
+	bl	mcl_fpDbl_sqrPre6L
+	add	r5, sp, #16
+	ldr	r3, [sp, #12]
+	ldr	r6, [sp, #36]
+	ldr	r2, [sp, #40]
+	ldm	r5, {r0, r1, r5}
+	adds	r10, r3, r6
+	ldr	lr, [sp, #44]
+	adcs	r4, r0, r2
+	ldr	r12, [sp, #48]
+	adcs	r9, r1, lr
+	ldr	r7, [sp, #52]
+	ldr	r0, [sp, #28]
+	adcs	r11, r5, r12
+	ldr	r1, [sp, #56]
+	ldr	r3, [sp, #32]
+	adcs	r5, r0, r7
+	mov	r0, #0
+	adcs	r3, r3, r1
+	adc	r0, r0, #0
+	adds	r10, r10, r7
+	adcs	r4, r4, r1
+	str	r4, [sp, #4]                    @ 4-byte Spill
+	adcs	r6, r9, r6
 	mov	r4, #0
-	ldm	r12, {r0, r1, r5, r6, r12}
-	ldr	r7, [sp, #36]
-	adds	r0, r7, r0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	adcs	r0, r3, r1
-	mov	r1, #0
-	adcs	r10, r2, r5
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #52]
-	ldr	r5, [sp, #32]
-	adcs	r11, lr, r6
-	ldr	r6, [sp, #56]
-	adcs	r9, r0, r12
-	adcs	r5, r6, r5
-	adcs	r1, r1, #0
-	adc	r12, r4, #0
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	adds	r4, r4, r0
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	adcs	r4, r4, r6
-	adcs	r7, r10, r7
-	adcs	r3, r11, r3
-	adcs	r2, r9, r2
+	adcs	r2, r11, r2
 	adcs	r5, r5, lr
-	adcs	r1, r1, #0
-	adc	r12, r12, #0
-	adds	lr, r7, r0
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r3, r3, r6
-	adcs	r2, r2, #0
-	adcs	r7, r5, #0
-	adcs	r1, r1, #0
-	adc	r6, r12, #0
-	adds	r5, r1, r0
-	mov	r0, #0
-	adcs	r11, r6, r4
+	adcs	r3, r3, r12
+	adcs	r0, r0, #0
+	adc	r12, r4, #0
+	adds	r7, r6, r7
+	adcs	lr, r2, r1
+	mov	r4, #0
+	adcs	r2, r5, #0
+	adcs	r3, r3, #0
+	mrs	r6, apsr
+	adcs	r5, r0, #0
+	adc	r1, r12, #0
+	msr	APSR_nzcvq, r6
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	adcs	r0, r10, r0
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	adcs	r12, r1, r6
+	adcs	r9, r5, r7
 	adcs	r10, r1, lr
-	adcs	r12, r6, r3
-	adcs	lr, r2, #0
-	adcs	r4, r7, #0
-	adc	r9, r0, #0
-	adds	r7, r5, #1
-	str	r4, [sp, #8]            @ 4-byte Spill
-	adcs	r2, r11, #0
-	adcs	r3, r10, #1
+	adcs	r11, r2, #0
+	adcs	r3, r3, #0
+	adc	lr, r4, #0
+	adds	r0, r0, #1
 	adcs	r6, r12, #0
-	adcs	r1, lr, #0
-	adcs	r0, r4, #0
-	sbc	r4, r9, #0
+	adcs	r7, r9, #1
+	adcs	r5, r10, #0
+	adcs	r1, r11, #0
+	adcs	r2, r3, #0
+	sbc	r4, lr, #0
 	ands	r4, r4, #1
-	movne	r7, r5
-	movne	r2, r11
-	movne	r3, r10
+	movne	r1, r11
+	movne	r2, r3
+	str	r1, [r8, #16]
+	movne	r5, r10
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
 	cmp	r4, #0
+	movne	r7, r9
 	movne	r6, r12
-	movne	r1, lr
-	str	r7, [r8]
-	str	r2, [r8, #4]
-	str	r3, [r8, #8]
-	str	r6, [r8, #12]
-	str	r1, [r8, #16]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
+	str	r2, [r8, #20]
 	movne	r0, r1
-	str	r0, [r8, #20]
+	str	r5, [r8, #12]
+	str	r7, [r8, #8]
+	str	r6, [r8, #4]
+	str	r0, [r8]
 	add	sp, sp, #60
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
@@ -209,90 +204,90 @@ mcl_fp_sqr_NIST_P192L:                  @ @mcl_fp_sqr_NIST_P192L
 	.size	mcl_fp_sqr_NIST_P192L, .Lfunc_end2-mcl_fp_sqr_NIST_P192L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_mulNIST_P192L
-	.align	2
+                                        @ -- End function
+	.globl	mcl_fp_mulNIST_P192L            @ -- Begin function mcl_fp_mulNIST_P192L
+	.p2align	2
 	.type	mcl_fp_mulNIST_P192L,%function
-mcl_fp_mulNIST_P192L:                   @ @mcl_fp_mulNIST_P192L
+	.code	32                              @ @mcl_fp_mulNIST_P192L
+mcl_fp_mulNIST_P192L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	.pad	#60
 	sub	sp, sp, #60
 	mov	r8, r0
 	add	r0, sp, #12
-	bl	mcl_fpDbl_mulPre6L(PLT)
-	add	r12, sp, #12
-	ldr	lr, [sp, #48]
-	ldr	r2, [sp, #44]
-	ldr	r3, [sp, #40]
+	bl	mcl_fpDbl_mulPre6L
+	add	r5, sp, #16
+	ldr	r3, [sp, #12]
+	ldr	r6, [sp, #36]
+	ldr	r2, [sp, #40]
+	ldm	r5, {r0, r1, r5}
+	adds	r10, r3, r6
+	ldr	lr, [sp, #44]
+	adcs	r4, r0, r2
+	ldr	r12, [sp, #48]
+	adcs	r9, r1, lr
+	ldr	r7, [sp, #52]
+	ldr	r0, [sp, #28]
+	adcs	r11, r5, r12
+	ldr	r1, [sp, #56]
+	ldr	r3, [sp, #32]
+	adcs	r5, r0, r7
+	mov	r0, #0
+	adcs	r3, r3, r1
+	adc	r0, r0, #0
+	adds	r10, r10, r7
+	adcs	r4, r4, r1
+	str	r4, [sp, #4]                    @ 4-byte Spill
+	adcs	r6, r9, r6
 	mov	r4, #0
-	ldm	r12, {r0, r1, r5, r6, r12}
-	ldr	r7, [sp, #36]
-	adds	r0, r7, r0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	adcs	r0, r3, r1
-	mov	r1, #0
-	adcs	r10, r2, r5
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #52]
-	ldr	r5, [sp, #32]
-	adcs	r11, lr, r6
-	ldr	r6, [sp, #56]
-	adcs	r9, r0, r12
-	adcs	r5, r6, r5
-	adcs	r1, r1, #0
-	adc	r12, r4, #0
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	adds	r4, r4, r0
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	adcs	r4, r4, r6
-	adcs	r7, r10, r7
-	adcs	r3, r11, r3
-	adcs	r2, r9, r2
+	adcs	r2, r11, r2
 	adcs	r5, r5, lr
-	adcs	r1, r1, #0
-	adc	r12, r12, #0
-	adds	lr, r7, r0
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r3, r3, r6
-	adcs	r2, r2, #0
-	adcs	r7, r5, #0
-	adcs	r1, r1, #0
-	adc	r6, r12, #0
-	adds	r5, r1, r0
-	mov	r0, #0
-	adcs	r11, r6, r4
+	adcs	r3, r3, r12
+	adcs	r0, r0, #0
+	adc	r12, r4, #0
+	adds	r7, r6, r7
+	adcs	lr, r2, r1
+	mov	r4, #0
+	adcs	r2, r5, #0
+	adcs	r3, r3, #0
+	mrs	r6, apsr
+	adcs	r5, r0, #0
+	adc	r1, r12, #0
+	msr	APSR_nzcvq, r6
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	adcs	r0, r10, r0
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	adcs	r12, r1, r6
+	adcs	r9, r5, r7
 	adcs	r10, r1, lr
-	adcs	r12, r6, r3
-	adcs	lr, r2, #0
-	adcs	r4, r7, #0
-	adc	r9, r0, #0
-	adds	r7, r5, #1
-	str	r4, [sp, #8]            @ 4-byte Spill
-	adcs	r2, r11, #0
-	adcs	r3, r10, #1
+	adcs	r11, r2, #0
+	adcs	r3, r3, #0
+	adc	lr, r4, #0
+	adds	r0, r0, #1
 	adcs	r6, r12, #0
-	adcs	r1, lr, #0
-	adcs	r0, r4, #0
-	sbc	r4, r9, #0
+	adcs	r7, r9, #1
+	adcs	r5, r10, #0
+	adcs	r1, r11, #0
+	adcs	r2, r3, #0
+	sbc	r4, lr, #0
 	ands	r4, r4, #1
-	movne	r7, r5
-	movne	r2, r11
-	movne	r3, r10
+	movne	r1, r11
+	movne	r2, r3
+	str	r1, [r8, #16]
+	movne	r5, r10
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
 	cmp	r4, #0
+	movne	r7, r9
 	movne	r6, r12
-	movne	r1, lr
-	str	r7, [r8]
-	str	r2, [r8, #4]
-	str	r3, [r8, #8]
-	str	r6, [r8, #12]
-	str	r1, [r8, #16]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
+	str	r2, [r8, #20]
 	movne	r0, r1
-	str	r0, [r8, #20]
+	str	r5, [r8, #12]
+	str	r7, [r8, #8]
+	str	r6, [r8, #4]
+	str	r0, [r8]
 	add	sp, sp, #60
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
@@ -300,83890 +295,28372 @@ mcl_fp_mulNIST_P192L:                   @ @mcl_fp_mulNIST_P192L
 	.size	mcl_fp_mulNIST_P192L, .Lfunc_end3-mcl_fp_mulNIST_P192L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_mod_NIST_P521L
-	.align	2
+                                        @ -- End function
+	.globl	mcl_fpDbl_mod_NIST_P521L        @ -- Begin function mcl_fpDbl_mod_NIST_P521L
+	.p2align	2
 	.type	mcl_fpDbl_mod_NIST_P521L,%function
-mcl_fpDbl_mod_NIST_P521L:               @ @mcl_fpDbl_mod_NIST_P521L
+	.code	32                              @ @mcl_fpDbl_mod_NIST_P521L
+mcl_fpDbl_mod_NIST_P521L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#44
-	sub	sp, sp, #44
-	ldr	r6, [r1, #64]
-	mov	r5, #255
-	ldr	r3, [r1, #72]
-	ldr	r2, [r1, #76]
-	mov	r9, r0
-	orr	r5, r5, #256
-	and	r5, r6, r5
-	lsr	r6, r6, #9
-	lsr	r7, r3, #9
-	str	r5, [sp, #40]           @ 4-byte Spill
-	ldr	r5, [r1, #68]
-	orr	r12, r7, r2, lsl #23
+	.pad	#60
+	sub	sp, sp, #60
+	mov	r3, #255
+	ldr	r2, [r1, #64]
+	orr	r3, r3, #256
+	ldr	r7, [r1, #72]
+	and	r11, r2, r3
+	ldr	r3, [r1, #68]
 	lsr	r2, r2, #9
-	lsr	r4, r5, #9
-	orr	r6, r6, r5, lsl #23
-	ldr	r5, [r1]
-	orr	r3, r4, r3, lsl #23
-	ldmib	r1, {r4, r7, lr}
-	adds	r5, r6, r5
-	ldr	r6, [r1, #36]
-	str	r5, [sp, #36]           @ 4-byte Spill
+	ldr	r5, [r1, #4]
+	ldr	r4, [r1, #8]
+	lsr	r6, r3, #9
+	orr	r2, r2, r3, lsl #23
+	orr	r6, r6, r7, lsl #23
+	lsr	r3, r7, #9
+	ldr	r7, [r1]
+	ldr	lr, [r1, #12]
+	adds	r12, r7, r2
+	ldr	r2, [r1, #76]
+	adcs	r8, r5, r6
 	ldr	r5, [r1, #80]
-	adcs	r3, r3, r4
-	str	r3, [sp, #32]           @ 4-byte Spill
-	adcs	r7, r12, r7
+	ldr	r6, [r1, #24]
+	orr	r3, r3, r2, lsl #23
+	lsr	r2, r2, #9
+	adcs	r9, r4, r3
 	ldr	r3, [r1, #84]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r1, #88]
 	orr	r2, r2, r5, lsl #23
-	lsr	r5, r5, #9
-	adcs	r12, r2, lr
+	lsr	r7, r5, #9
+	adcs	r4, lr, r2
 	ldr	r2, [r1, #16]
-	orr	r4, r5, r3, lsl #23
+	orr	r7, r7, r3, lsl #23
 	lsr	r3, r3, #9
-	orr	r3, r3, r7, lsl #23
-	lsr	r5, r7, #9
-	ldr	r7, [r1, #40]
-	adcs	r2, r4, r2
-	ldr	r4, [r1, #24]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [r1, #20]
-	adcs	r2, r3, r2
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	orr	r3, r5, r2, lsl #23
+	adcs	lr, r2, r7
+	ldr	r7, [r1, #88]
 	ldr	r5, [r1, #28]
-	lsr	r2, r2, #9
-	adcs	lr, r3, r4
-	ldr	r3, [r1, #96]
-	ldr	r4, [r1, #44]
-	orr	r2, r2, r3, lsl #23
-	adcs	r2, r2, r5
+	orr	r2, r3, r7, lsl #23
+	ldr	r3, [r1, #20]
+	adcs	r10, r3, r2
+	lsr	r3, r7, #9
+	ldr	r7, [r1, #92]
+	orr	r3, r3, r7, lsl #23
+	lsr	r7, r7, #9
+	adcs	r2, r6, r3
+	ldr	r6, [r1, #96]
+	orr	r7, r7, r6, lsl #23
+	adcs	r3, r5, r7
+	lsr	r7, r6, #9
+	ldr	r6, [r1, #100]
 	ldr	r5, [r1, #32]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	lsr	r2, r3, #9
-	ldr	r3, [r1, #100]
-	orr	r2, r2, r3, lsl #23
-	adcs	r2, r2, r5
+	orr	r7, r7, r6, lsl #23
+	adcs	r5, r5, r7
+	lsr	r7, r6, #9
+	ldr	r6, [r1, #104]
+	str	r5, [sp, #56]                   @ 4-byte Spill
+	ldr	r5, [r1, #36]
+	orr	r7, r7, r6, lsl #23
+	adcs	r5, r5, r7
+	lsr	r7, r6, #9
+	ldr	r6, [r1, #108]
+	str	r5, [sp, #52]                   @ 4-byte Spill
+	ldr	r5, [r1, #40]
+	orr	r7, r7, r6, lsl #23
+	adcs	r5, r5, r7
+	lsr	r7, r6, #9
+	ldr	r6, [r1, #112]
+	str	r5, [sp, #48]                   @ 4-byte Spill
+	ldr	r5, [r1, #44]
+	orr	r7, r7, r6, lsl #23
+	adcs	r5, r5, r7
+	lsr	r7, r6, #9
+	ldr	r6, [r1, #116]
+	str	r5, [sp, #44]                   @ 4-byte Spill
 	ldr	r5, [r1, #48]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	lsr	r2, r3, #9
-	ldr	r3, [r1, #104]
-	orr	r2, r2, r3, lsl #23
-	adcs	r0, r2, r6
-	lsr	r2, r3, #9
-	ldr	r3, [r1, #108]
-	ldr	r6, [r1, #52]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	orr	r2, r2, r3, lsl #23
-	adcs	r7, r2, r7
-	lsr	r2, r3, #9
-	ldr	r3, [r1, #112]
-	orr	r2, r2, r3, lsl #23
-	lsr	r3, r3, #9
-	adcs	r2, r2, r4
-	ldr	r4, [r1, #116]
-	orr	r3, r3, r4, lsl #23
-	lsr	r4, r4, #9
-	adcs	r3, r3, r5
-	ldr	r5, [r1, #120]
-	orr	r4, r4, r5, lsl #23
-	adcs	r11, r4, r6
-	lsr	r4, r5, #9
-	ldr	r5, [r1, #124]
-	ldr	r6, [r1, #56]
-	orr	r4, r4, r5, lsl #23
-	adcs	r10, r4, r6
-	lsr	r4, r5, #9
-	ldr	r5, [r1, #128]
+	orr	r7, r7, r6, lsl #23
+	adcs	r5, r5, r7
+	lsr	r7, r6, #9
+	ldr	r6, [r1, #120]
+	str	r5, [sp, #40]                   @ 4-byte Spill
+	ldr	r5, [r1, #52]
+	orr	r7, r7, r6, lsl #23
+	adcs	r5, r5, r7
+	lsr	r7, r6, #9
+	ldr	r6, [r1, #124]
+	str	r5, [sp, #36]                   @ 4-byte Spill
+	ldr	r5, [r1, #56]
+	orr	r7, r7, r6, lsl #23
+	adcs	r5, r5, r7
+	lsr	r7, r6, #9
+	ldr	r6, [r1, #128]
 	ldr	r1, [r1, #60]
-	orr	r4, r4, r5, lsl #23
-	adcs	r8, r4, r1
-	ldr	r4, [sp, #40]           @ 4-byte Reload
-	lsr	r1, r5, #9
-	ldr	r5, [sp, #36]           @ 4-byte Reload
-	adc	r1, r1, r4
-	mov	r4, #1
-	and	r4, r4, r1, lsr #9
-	adds	r5, r4, r5
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	str	r5, [sp, #40]           @ 4-byte Spill
+	str	r5, [sp, #32]                   @ 4-byte Spill
+	orr	r7, r7, r6, lsl #23
+	adcs	r7, r1, r7
+	mov	r1, #1
+	adc	r11, r11, r6, lsr #9
+	and	r1, r1, r11, lsr #9
+	adds	r1, r1, r12
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r6, r8, #0
+	str	r6, [sp, #24]                   @ 4-byte Spill
+	adcs	r5, r9, #0
+	and	r1, r6, r1
 	adcs	r6, r4, #0
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	str	r6, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r4, #0
-	and	r4, r6, r5
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	and	r4, r4, r0
-	adcs	r0, r12, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	and	r6, r4, r0
-	adcs	r0, r5, #0
-	and	r4, r6, r0
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adcs	r0, r6, #0
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	and	r5, r4, r0
-	adcs	r0, lr, #0
-	and	r5, r5, r0
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	lr, r6, #0
-	and	r6, r5, lr
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adcs	r5, r5, #0
-	and	r12, r6, r5
-	adcs	r6, r0, #0
-	adcs	r7, r7, #0
-	and	r4, r12, r6
+	and	r1, r1, r5
+	str	r6, [sp, #16]                   @ 4-byte Spill
+	and	r1, r1, r6
+	adcs	r6, lr, #0
+	str	r6, [sp, #12]                   @ 4-byte Spill
+	and	r1, r1, r6
+	adcs	r6, r10, #0
 	adcs	r2, r2, #0
-	and	r4, r4, r7
-	adcs	r3, r3, #0
-	and	r4, r4, r2
-	adcs	r0, r11, #0
-	and	r4, r4, r3
-	adcs	r10, r10, #0
-	and	r4, r4, r0
-	adcs	r11, r8, #0
-	and	r4, r4, r10
-	adc	r8, r1, #0
-	ldr	r1, .LCPI4_0
-	and	r4, r4, r11
-	orr	r1, r8, r1
-	and	r1, r4, r1
-	cmn	r1, #1
+	and	r1, r1, r6
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	and	r2, r1, r2
+	adcs	r1, r3, #0
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	and	r2, r2, r1
+	str	r5, [sp, #20]                   @ 4-byte Spill
+	adcs	r9, r3, #0
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	and	r2, r2, r9
+	str	r6, [sp, #8]                    @ 4-byte Spill
+	adcs	r8, r3, #0
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	and	r2, r2, r8
+	adcs	lr, r3, #0
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	and	r2, r2, lr
+	adcs	r4, r3, #0
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	and	r2, r2, r4
+	adcs	r5, r3, #0
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	and	r2, r2, r5
+	adcs	r10, r3, #0
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	and	r2, r2, r10
+	adcs	r6, r3, #0
+	ldr	r3, .LCPI4_0
+	and	r2, r2, r6
+	adcs	r7, r7, #0
+	and	r12, r2, r7
+	adc	r2, r11, #0
+	orr	r3, r2, r3
+	and	r3, r3, r12
+	cmn	r3, #1
 	beq	.LBB4_2
-@ BB#1:                                 @ %nonzero
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r1, [r9]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r1, [r9, #4]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r1, [r9, #8]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r1, [r9, #12]
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r1, [r9, #16]
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r1, [r9, #20]
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r1, [r9, #24]
-	add	r1, r9, #32
-	str	lr, [r9, #28]
-	stm	r1, {r5, r6, r7}
-	add	r1, r9, #52
-	str	r2, [r9, #44]
-	str	r3, [r9, #48]
-	stm	r1, {r0, r10, r11}
-	mov	r1, #255
-	orr	r1, r1, #256
-	and	r1, r8, r1
-	str	r1, [r9, #64]
-	b	.LBB4_3
+@ %bb.1:                                @ %nonzero
+	mov	r3, #255
+	str	r9, [r0, #32]
+	orr	r3, r3, #256
+	str	r8, [r0, #36]
+	and	r2, r2, r3
+	str	r2, [r0, #64]
+	add	r2, r0, #44
+	str	lr, [r0, #40]
+	stm	r2, {r4, r5, r10}
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	str	r2, [r0]
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	str	r2, [r0, #4]
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	str	r2, [r0, #8]
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	str	r2, [r0, #12]
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	str	r2, [r0, #16]
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	str	r2, [r0, #20]
+	ldr	r2, [sp, #4]                    @ 4-byte Reload
+	str	r6, [r0, #56]
+	str	r7, [r0, #60]
+	str	r2, [r0, #24]
+	str	r1, [r0, #28]
+	add	sp, sp, #60
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
 .LBB4_2:                                @ %zero
-	mov	r0, r9
 	mov	r1, #0
 	mov	r2, #68
-	bl	memset(PLT)
-.LBB4_3:                                @ %zero
-	add	sp, sp, #44
+	bl	memset
+	add	sp, sp, #60
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-	.align	2
-@ BB#4:
+	.p2align	2
+@ %bb.3:
 .LCPI4_0:
-	.long	4294966784              @ 0xfffffe00
+	.long	4294966784                      @ 0xfffffe00
 .Lfunc_end4:
 	.size	mcl_fpDbl_mod_NIST_P521L, .Lfunc_end4-mcl_fpDbl_mod_NIST_P521L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_mulUnitPre1L
-	.align	2
-	.type	mcl_fp_mulUnitPre1L,%function
-mcl_fp_mulUnitPre1L:                    @ @mcl_fp_mulUnitPre1L
+                                        @ -- End function
+	.globl	mulPv192x32                     @ -- Begin function mulPv192x32
+	.p2align	2
+	.type	mulPv192x32,%function
+	.code	32                              @ @mulPv192x32
+mulPv192x32:
 	.fnstart
-@ BB#0:
-	ldr	r1, [r1]
-	umull	r3, r12, r1, r2
-	stm	r0, {r3, r12}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r11, lr}
+	ldr	r12, [r1]
+	ldmib	r1, {r3, lr}
+	umull	r7, r6, r12, r2
+	ldr	r4, [r1, #12]
+	umull	r5, r8, r3, r2
+	str	r7, [r0]
+	umull	r9, r12, r4, r2
+	adds	r7, r6, r5
+	umull	r5, r4, lr, r2
+	adcs	r7, r8, r5
+	umlal	r6, r5, r3, r2
+	adcs	r7, r4, r9
+	str	r7, [r0, #12]
+	str	r5, [r0, #8]
+	str	r6, [r0, #4]
+	ldr	r3, [r1, #16]
+	umull	r7, r6, r3, r2
+	adcs	r3, r12, r7
+	str	r3, [r0, #16]
+	ldr	r1, [r1, #20]
+	umull	r3, r7, r1, r2
+	adcs	r1, r6, r3
+	str	r1, [r0, #20]
+	adc	r1, r7, #0
+	str	r1, [r0, #24]
+	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
 	mov	pc, lr
 .Lfunc_end5:
-	.size	mcl_fp_mulUnitPre1L, .Lfunc_end5-mcl_fp_mulUnitPre1L
+	.size	mulPv192x32, .Lfunc_end5-mulPv192x32
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_mulPre1L
-	.align	2
-	.type	mcl_fpDbl_mulPre1L,%function
-mcl_fpDbl_mulPre1L:                     @ @mcl_fpDbl_mulPre1L
+                                        @ -- End function
+	.globl	mcl_fp_mulUnitPre6L             @ -- Begin function mcl_fp_mulUnitPre6L
+	.p2align	2
+	.type	mcl_fp_mulUnitPre6L,%function
+	.code	32                              @ @mcl_fp_mulUnitPre6L
+mcl_fp_mulUnitPre6L:
 	.fnstart
-@ BB#0:
-	ldr	r1, [r1]
-	ldr	r2, [r2]
-	umull	r3, r12, r2, r1
-	stm	r0, {r3, r12}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r11, lr}
+	ldr	r12, [r1]
+	ldmib	r1, {r3, lr}
+	umull	r7, r6, r12, r2
+	ldr	r4, [r1, #12]
+	umull	r5, r8, r3, r2
+	str	r7, [r0]
+	umull	r9, r12, r4, r2
+	adds	r7, r6, r5
+	umull	r5, r4, lr, r2
+	adcs	r7, r8, r5
+	umlal	r6, r5, r3, r2
+	ldr	r3, [r1, #16]
+	adcs	r7, r4, r9
+	str	r7, [r0, #12]
+	str	r6, [r0, #4]
+	umull	r7, r6, r3, r2
+	ldr	r1, [r1, #20]
+	str	r5, [r0, #8]
+	adcs	r3, r12, r7
+	str	r3, [r0, #16]
+	umull	r3, r7, r1, r2
+	adcs	r1, r6, r3
+	str	r1, [r0, #20]
+	adc	r1, r7, #0
+	str	r1, [r0, #24]
+	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
 	mov	pc, lr
 .Lfunc_end6:
-	.size	mcl_fpDbl_mulPre1L, .Lfunc_end6-mcl_fpDbl_mulPre1L
+	.size	mcl_fp_mulUnitPre6L, .Lfunc_end6-mcl_fp_mulUnitPre6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_sqrPre1L
-	.align	2
-	.type	mcl_fpDbl_sqrPre1L,%function
-mcl_fpDbl_sqrPre1L:                     @ @mcl_fpDbl_sqrPre1L
+                                        @ -- End function
+	.globl	mcl_fpDbl_mulPre6L              @ -- Begin function mcl_fpDbl_mulPre6L
+	.p2align	2
+	.type	mcl_fpDbl_mulPre6L,%function
+	.code	32                              @ @mcl_fpDbl_mulPre6L
+mcl_fpDbl_mulPre6L:
 	.fnstart
-@ BB#0:
-	ldr	r1, [r1]
-	umull	r2, r3, r1, r1
-	stm	r0, {r2, r3}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#148
+	sub	sp, sp, #148
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r3, r2
+	ldm	r2, {r0, r10}
+	ldmib	r1, {r4, r12}
+	ldr	lr, [r1]
+	ldr	r2, [r2, #8]
+	umull	r5, r9, r4, r0
+	str	r2, [sp, #124]                  @ 4-byte Spill
+	ldr	r2, [r3, #12]
+	str	r2, [sp, #104]                  @ 4-byte Spill
+	umull	r2, r11, lr, r0
+	ldr	r6, [r1, #12]
+	str	r12, [sp, #144]                 @ 4-byte Spill
+	str	r6, [sp, #28]                   @ 4-byte Spill
+	str	r2, [sp, #136]                  @ 4-byte Spill
+	adds	r5, r11, r5
+	str	lr, [sp, #8]                    @ 4-byte Spill
+	umull	r2, r5, r12, r0
+	ldr	r12, [r1, #20]
+	str	r12, [sp, #20]                  @ 4-byte Spill
+	adcs	r7, r9, r2
+	umlal	r11, r2, r4, r0
+	umull	r7, r9, r6, r0
+	adcs	r7, r5, r7
+	str	r7, [sp, #132]                  @ 4-byte Spill
+	ldr	r7, [r1, #16]
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	umull	r5, r8, r7, r0
+	adcs	r6, r9, r5
+	umull	r1, r9, r12, r0
+	str	r6, [sp, #128]                  @ 4-byte Spill
+	adcs	r1, r8, r1
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	umull	r5, r1, r10, lr
+	adc	r0, r9, #0
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r8, [sp, #28]                   @ 4-byte Reload
+	adds	r0, r11, r5
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	umull	r5, r0, r10, r4
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	adcs	r0, r2, r5
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	ldr	r6, [sp, #100]                  @ 4-byte Reload
+	umull	r5, r2, r10, r0
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	str	r2, [sp, #112]                  @ 4-byte Spill
+	adcs	r5, r0, r5
+	umull	r9, r0, r10, r8
+	ldr	r2, [sp, #108]                  @ 4-byte Reload
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	adcs	r9, r0, r9
+	umull	r11, r0, r10, r7
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r11, r0, r11
+	umull	lr, r0, r10, r12
+	ldr	r10, [sp, #8]                   @ 4-byte Reload
+	adcs	lr, r2, lr
+	mov	r2, #0
+	adc	r2, r2, #0
+	adds	r1, r6, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	mov	r6, r8
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adcs	r1, r5, r1
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	adcs	r1, r9, r1
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	adcs	r1, r11, r1
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r11, [sp, #144]                 @ 4-byte Reload
+	adcs	r1, lr, r1
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	adc	r0, r2, r0
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [r3, #20]
+	umull	r2, r9, r0, r10
+	umull	r1, lr, r0, r4
+	str	r2, [sp, #120]                  @ 4-byte Spill
+	adds	r1, r9, r1
+	umull	r5, r1, r0, r11
+	adcs	r2, lr, r5
+	umlal	r9, r5, r0, r4
+	umull	r2, lr, r0, r8
+	str	r5, [sp, #108]                  @ 4-byte Spill
+	str	r9, [sp, #100]                  @ 4-byte Spill
+	mov	r9, r10
+	adcs	r1, r1, r2
+	str	r1, [sp, #132]                  @ 4-byte Spill
+	umull	r1, r2, r0, r7
+	adcs	r1, lr, r1
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	umull	r1, r5, r0, r12
+	adcs	r0, r2, r1
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	adc	r0, r5, #0
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [r3, #16]
+	umull	r5, r2, r0, r10
+	umull	r1, r3, r0, r4
+	str	r5, [sp, #72]                   @ 4-byte Spill
+	adds	r1, r2, r1
+	umull	r5, r1, r0, r11
+	adcs	r3, r3, r5
+	umlal	r2, r5, r0, r4
+	umull	r3, lr, r0, r8
+	str	r2, [sp, #48]                   @ 4-byte Spill
+	str	r5, [sp, #52]                   @ 4-byte Spill
+	adcs	r1, r1, r3
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	umull	r1, r3, r0, r7
+	ldr	r7, [sp, #124]                  @ 4-byte Reload
+	adcs	r1, lr, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	umull	r1, r2, r0, r12
+	adcs	r0, r3, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	adc	r0, r2, #0
+	ldr	r2, [sp, #104]                  @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	umull	r1, r8, r2, r10
+	mov	r10, r6
+	umull	r0, r3, r2, r4
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	adds	r0, r8, r0
+	umull	lr, r0, r2, r11
+	adcs	r3, r3, lr
+	umlal	r8, lr, r2, r4
+	umull	r3, r1, r2, r6
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	adcs	r0, r0, r3
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	mov	r0, r2
+	umull	r6, r2, r7, r11
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	umull	r2, r12, r7, r9
+	mov	r3, r6
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	umull	r1, r2, r7, r10
+	mov	r5, r12
+	stmib	sp, {r1, r2}                    @ 8-byte Folded Spill
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	umull	r11, r1, r7, r4
+	umlal	r5, r3, r7, r4
+	str	r1, [sp]                        @ 4-byte Spill
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	umull	r4, r10, r0, r2
+	adcs	r1, r1, r4
+	str	r1, [sp, #144]                  @ 4-byte Spill
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	umull	r4, r9, r0, r1
+	adcs	r0, r10, r4
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	adc	r0, r9, #0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	adds	r0, r12, r11
+	ldr	r11, [sp, #36]                  @ 4-byte Reload
+	ldr	r0, [sp]                        @ 4-byte Reload
+	adcs	r0, r0, r6
+	umull	r0, r10, r7, r1
+	umull	r4, r1, r7, r2
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	ldr	r7, [sp, #4]                    @ 4-byte Reload
+	adcs	r9, r2, r7
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	ldr	r7, [sp, #96]                   @ 4-byte Reload
+	adcs	r4, r2, r4
+	ldr	r2, [sp, #136]                  @ 4-byte Reload
+	adcs	r1, r1, r0
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	str	r2, [r11]
+	adc	r2, r10, #0
+	adds	r10, r0, r7
+	ldr	r7, [sp, #92]                   @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r5, r5, r7
+	ldr	r7, [sp, #88]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	adcs	r7, r9, r7
+	adcs	r4, r4, r0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r6, r1, r0
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adc	r2, r2, #0
+	adds	r9, r0, r5
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r3, r8, r3
+	ldr	r5, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, lr, r7
+	adcs	r7, r0, r4
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	ldr	r6, [sp, #104]                  @ 4-byte Reload
+	adcs	r2, r6, r2
+	ldr	r6, [sp, #28]                   @ 4-byte Reload
+	adc	r4, r6, #0
+	adds	r3, r5, r3
+	ldr	r5, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r5, r1
+	ldr	r5, [sp, #52]                   @ 4-byte Reload
+	adcs	r7, r5, r7
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	adcs	r12, r5, r2
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	adcs	r4, r2, r4
+	ldr	r2, [sp, #56]                   @ 4-byte Reload
+	adc	r5, r2, #0
+	ldr	r2, [sp, #120]                  @ 4-byte Reload
+	adds	r1, r2, r1
+	ldr	r2, [sp, #100]                  @ 4-byte Reload
+	adcs	r7, r2, r7
+	ldr	r2, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r2, r0
+	ldr	r2, [sp, #140]                  @ 4-byte Reload
+	stmib	r11, {r2, r10}
+	str	r0, [r11, #28]
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	str	r1, [r11, #20]
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	ldr	r2, [sp, #116]                  @ 4-byte Reload
+	add	r12, r11, #32
+	adcs	r1, r1, r4
+	str	r3, [r11, #16]
+	ldr	r3, [sp, #112]                  @ 4-byte Reload
+	adcs	r2, r2, r5
+	str	r9, [r11, #12]
+	str	r7, [r11, #24]
+	adc	r3, r3, #0
+	stm	r12, {r0, r1, r2, r3}
+	add	sp, sp, #148
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
 .Lfunc_end7:
-	.size	mcl_fpDbl_sqrPre1L, .Lfunc_end7-mcl_fpDbl_sqrPre1L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont1L
-	.align	2
-	.type	mcl_fp_mont1L,%function
-mcl_fp_mont1L:                          @ @mcl_fp_mont1L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, lr}
-	push	{r4, r5, r6, lr}
-	ldr	r12, [r2]
-	ldr	r1, [r1]
-	mov	r6, #0
-	umull	lr, r2, r1, r12
-	ldr	r12, [r3, #-4]
-	ldr	r3, [r3]
-	mul	r1, lr, r12
-	umull	r12, r4, r1, r3
-	adds	r5, r12, lr
-	adcs	r5, r4, r2
-	umlal	lr, r2, r1, r3
-	adc	r6, r6, #0
-	subs	r1, r2, r3
-	sbc	r3, r6, #0
-	tst	r3, #1
-	movne	r1, r2
-	str	r1, [r0]
-	pop	{r4, r5, r6, lr}
-	mov	pc, lr
-.Lfunc_end8:
-	.size	mcl_fp_mont1L, .Lfunc_end8-mcl_fp_mont1L
+	.size	mcl_fpDbl_mulPre6L, .Lfunc_end7-mcl_fpDbl_mulPre6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montNF1L
-	.align	2
-	.type	mcl_fp_montNF1L,%function
-mcl_fp_montNF1L:                        @ @mcl_fp_montNF1L
-	.fnstart
-@ BB#0:
-	.save	{r11, lr}
-	push	{r11, lr}
-	ldr	r12, [r2]
-	ldr	r1, [r1]
-	umull	lr, r2, r1, r12
-	ldr	r12, [r3, #-4]
-	ldr	r3, [r3]
-	mul	r1, lr, r12
-	umlal	lr, r2, r1, r3
-	sub	r1, r2, r3
-	cmp	r1, #0
-	movge	r2, r1
-	str	r2, [r0]
-	pop	{r11, lr}
-	mov	pc, lr
-.Lfunc_end9:
-	.size	mcl_fp_montNF1L, .Lfunc_end9-mcl_fp_montNF1L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed1L
-	.align	2
-	.type	mcl_fp_montRed1L,%function
-mcl_fp_montRed1L:                       @ @mcl_fp_montRed1L
+                                        @ -- End function
+	.globl	mcl_fpDbl_sqrPre6L              @ -- Begin function mcl_fpDbl_sqrPre6L
+	.p2align	2
+	.type	mcl_fpDbl_sqrPre6L,%function
+	.code	32                              @ @mcl_fpDbl_sqrPre6L
+mcl_fpDbl_sqrPre6L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, lr}
-	push	{r4, r5, r6, lr}
-	ldr	r12, [r2, #-4]
-	ldr	r3, [r1]
-	ldr	r2, [r2]
-	ldr	r1, [r1, #4]
-	mov	r6, #0
-	mul	lr, r3, r12
-	umull	r12, r4, lr, r2
-	adds	r5, r3, r12
-	adcs	r5, r1, r4
-	umlal	r3, r1, lr, r2
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#152
+	sub	sp, sp, #152
+	mov	r5, r0
+	ldr	r9, [r1]
+	ldmib	r1, {r0, lr}
+	ldr	r2, [r1, #20]
+	ldr	r12, [r1, #12]
+	ldr	r3, [r1, #16]
+	umull	r6, r7, r2, r0
+	umull	r4, r1, r2, r9
+	str	r6, [sp, #60]                   @ 4-byte Spill
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	str	r4, [sp, #132]                  @ 4-byte Spill
+	adds	r4, r1, r6
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	umull	r4, r1, r2, lr
+	str	r4, [sp, #148]                  @ 4-byte Spill
+	adcs	r4, r7, r4
+	umull	r7, r10, r2, r3
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	umull	r4, r6, r2, r12
+	str	r4, [sp, #140]                  @ 4-byte Spill
+	adcs	r4, r1, r4
+	str	r4, [sp, #112]                  @ 4-byte Spill
+	adcs	r4, r6, r7
+	str	r4, [sp, #108]                  @ 4-byte Spill
+	umull	r4, r1, r2, r2
+	str	r6, [sp, #116]                  @ 4-byte Spill
+	umull	r6, r8, r3, r0
+	adcs	r4, r10, r4
+	str	r4, [sp, #104]                  @ 4-byte Spill
+	adc	r4, r1, #0
+	str	r4, [sp, #100]                  @ 4-byte Spill
+	umull	r4, r1, r3, r9
+	str	r6, [sp, #36]                   @ 4-byte Spill
+	str	r8, [sp, #24]                   @ 4-byte Spill
+	str	r4, [sp, #128]                  @ 4-byte Spill
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	adds	r4, r1, r6
+	umull	r1, r6, r3, lr
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	adcs	r4, r8, r1
+	umull	r8, r11, r3, r3
+	str	r6, [sp, #48]                   @ 4-byte Spill
+	umull	r4, r1, r3, r12
+	adcs	r6, r6, r4
+	str	r6, [sp, #96]                   @ 4-byte Spill
+	adcs	r6, r1, r8
+	str	r6, [sp, #92]                   @ 4-byte Spill
+	adcs	r6, r11, r7
+	str	r6, [sp, #88]                   @ 4-byte Spill
+	adc	r6, r10, #0
+	umull	r8, r10, r12, r0
+	str	r6, [sp, #84]                   @ 4-byte Spill
+	umull	r7, r6, r12, r9
+	str	r8, [sp, #32]                   @ 4-byte Spill
+	str	r10, [sp, #28]                  @ 4-byte Spill
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	str	r6, [sp, #16]                   @ 4-byte Spill
+	adds	r7, r6, r8
+	umull	r6, r8, r12, lr
+	str	r6, [sp, #144]                  @ 4-byte Spill
+	adcs	r7, r10, r6
+	mov	r10, r6
+	umull	r7, r6, r12, r12
+	adcs	r7, r8, r7
+	str	r7, [sp, #80]                   @ 4-byte Spill
+	adcs	r4, r6, r4
+	str	r4, [sp, #76]                   @ 4-byte Spill
+	ldr	r4, [sp, #140]                  @ 4-byte Reload
+	adcs	r1, r1, r4
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	umull	r4, r7, lr, r0
+	adc	r1, r1, #0
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	umull	r6, r1, lr, r9
+	str	r7, [sp]                        @ 4-byte Spill
+	str	r6, [sp, #116]                  @ 4-byte Spill
+	adds	r6, r1, r4
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	umull	r1, r6, lr, lr
+	str	r1, [sp, #140]                  @ 4-byte Spill
+	adcs	r11, r7, r1
+	ldr	r11, [sp, #136]                 @ 4-byte Reload
+	adcs	r1, r6, r10
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	adcs	r1, r8, r11
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #148]                  @ 4-byte Reload
+	umull	r8, r10, r0, r9
+	ldr	r6, [sp, #48]                   @ 4-byte Reload
+	adcs	r6, r6, r1
+	str	r6, [sp, #48]                   @ 4-byte Spill
+	ldr	r6, [sp, #44]                   @ 4-byte Reload
+	str	r8, [sp, #12]                   @ 4-byte Spill
 	adc	r6, r6, #0
-	subs	r2, r1, r2
-	sbc	r3, r6, #0
-	tst	r3, #1
-	movne	r2, r1
-	str	r2, [r0]
-	pop	{r4, r5, r6, lr}
-	mov	pc, lr
-.Lfunc_end10:
-	.size	mcl_fp_montRed1L, .Lfunc_end10-mcl_fp_montRed1L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre1L
-	.align	2
-	.type	mcl_fp_addPre1L,%function
-mcl_fp_addPre1L:                        @ @mcl_fp_addPre1L
-	.fnstart
-@ BB#0:
-	ldr	r1, [r1]
-	ldr	r2, [r2]
-	adds	r1, r2, r1
-	str	r1, [r0]
-	mov	r0, #0
+	str	r6, [sp, #44]                   @ 4-byte Spill
+	umull	r7, r6, r0, r0
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	str	r6, [sp, #20]                   @ 4-byte Spill
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	ldr	r7, [sp]                        @ 4-byte Reload
+	adds	r6, r10, r6
+	ldr	r6, [sp, #20]                   @ 4-byte Reload
+	adcs	r6, r6, r4
+	ldr	r6, [sp, #32]                   @ 4-byte Reload
+	adcs	r6, r7, r6
+	str	r6, [sp, #32]                   @ 4-byte Spill
+	ldr	r6, [sp, #36]                   @ 4-byte Reload
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	adcs	r6, r7, r6
+	str	r6, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #60]                   @ 4-byte Reload
+	ldr	r7, [sp, #24]                   @ 4-byte Reload
+	adcs	r6, r7, r6
+	str	r6, [sp, #24]                   @ 4-byte Spill
+	ldr	r6, [sp, #124]                  @ 4-byte Reload
+	mov	r7, r10
+	umlal	r7, r4, r0, r0
+	umlal	r6, r1, r2, r0
+	str	r1, [sp, #148]                  @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	str	r6, [sp, #60]                   @ 4-byte Spill
+	umlal	r1, r11, r3, r0
+	ldr	r3, [sp, #8]                    @ 4-byte Reload
+	mov	r8, r3
+	str	r11, [sp, #136]                 @ 4-byte Spill
+	ldr	r11, [sp, #16]                  @ 4-byte Reload
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #144]                  @ 4-byte Reload
+	mov	r2, r11
+	umlal	r2, r1, r12, r0
+	ldr	r12, [sp, #116]                 @ 4-byte Reload
+	str	r1, [sp, #144]                  @ 4-byte Spill
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	umlal	r8, r1, lr, r0
+	ldr	lr, [sp, #12]                   @ 4-byte Reload
+	str	r1, [sp, #140]                  @ 4-byte Spill
+	umull	r1, r2, r9, r9
+	str	r1, [sp, #4]                    @ 4-byte Spill
+	mov	r1, r12
+	mov	r6, r2
+	umlal	r6, r1, r0, r9
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adc	r0, r0, #0
+	adds	r2, r2, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	adcs	r2, r10, r12
+	ldr	r12, [sp, #40]                  @ 4-byte Reload
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	adcs	r9, r3, r12
+	ldr	r2, [sp, #120]                  @ 4-byte Reload
+	adcs	r10, r11, r0
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	adcs	r3, r2, r0
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adc	r0, r0, #0
+	adds	r11, lr, r6
+	adcs	r2, r7, r1
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	str	r1, [r5]
+	adcs	r1, r4, r9
+	ldr	r4, [sp, #32]                   @ 4-byte Reload
+	ldr	r6, [sp, #28]                   @ 4-byte Reload
+	adcs	r7, r4, r10
+	str	r11, [r5, #4]
+	adcs	r6, r6, r3
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r3, r0
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	adc	r4, r3, #0
+	ldr	r3, [sp, #116]                  @ 4-byte Reload
+	adds	r9, r3, r2
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	adcs	lr, r8, r1
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	str	r9, [r5, #8]
+	adcs	r3, r1, r7
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	adcs	r7, r1, r6
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r6, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r4
+	ldr	r4, [sp, #44]                   @ 4-byte Reload
+	adc	r4, r4, #0
+	adds	r8, r12, lr
+	adcs	r3, r2, r3
+	ldr	r2, [sp, #144]                  @ 4-byte Reload
+	str	r8, [r5, #12]
+	adcs	r7, r2, r7
+	ldr	r2, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r2, r0
+	ldr	r2, [sp, #76]                   @ 4-byte Reload
+	adcs	r2, r2, r1
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	adcs	r4, r1, r4
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	adds	r3, r6, r3
+	ldr	r6, [sp, #36]                   @ 4-byte Reload
+	str	r3, [r5, #16]
+	adcs	r7, r6, r7
+	ldr	r6, [sp, #136]                  @ 4-byte Reload
+	ldr	r3, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r6, r0
+	ldr	r6, [sp, #96]                   @ 4-byte Reload
+	adcs	r2, r6, r2
+	ldr	r6, [sp, #92]                   @ 4-byte Reload
+	adcs	r4, r6, r4
+	ldr	r6, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r6, r1
+	ldr	r6, [sp, #84]                   @ 4-byte Reload
+	adc	r12, r6, #0
+	ldr	r6, [sp, #132]                  @ 4-byte Reload
+	adds	r7, r6, r7
+	ldr	r6, [sp, #60]                   @ 4-byte Reload
+	str	r7, [r5, #20]
+	adcs	r0, r6, r0
+	ldr	r6, [sp, #148]                  @ 4-byte Reload
+	str	r0, [r5, #24]
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [r5, #28]
+	adcs	r0, r0, r4
+	adcs	r1, r3, r1
+	ldr	r3, [sp, #104]                  @ 4-byte Reload
+	adcs	r2, r3, r12
+	ldr	r3, [sp, #100]                  @ 4-byte Reload
+	add	r12, r5, #32
+	adc	r3, r3, #0
+	stm	r12, {r0, r1, r2, r3}
+	add	sp, sp, #152
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end11:
-	.size	mcl_fp_addPre1L, .Lfunc_end11-mcl_fp_addPre1L
+.Lfunc_end8:
+	.size	mcl_fpDbl_sqrPre6L, .Lfunc_end8-mcl_fpDbl_sqrPre6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_subPre1L
-	.align	2
-	.type	mcl_fp_subPre1L,%function
-mcl_fp_subPre1L:                        @ @mcl_fp_subPre1L
+                                        @ -- End function
+	.globl	mcl_fp_mont6L                   @ -- Begin function mcl_fp_mont6L
+	.p2align	2
+	.type	mcl_fp_mont6L,%function
+	.code	32                              @ @mcl_fp_mont6L
+mcl_fp_mont6L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#112
+	sub	sp, sp, #112
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	mov	r0, r2
+	ldr	r7, [r0, #8]
+	ldr	lr, [r0, #4]
+	ldr	r0, [r0, #12]
+	str	r2, [sp, #56]                   @ 4-byte Spill
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [r1]
 	ldr	r2, [r2]
-	ldr	r1, [r1]
-	subs	r1, r1, r2
-	str	r1, [r0]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	mov	pc, lr
-.Lfunc_end12:
-	.size	mcl_fp_subPre1L, .Lfunc_end12-mcl_fp_subPre1L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_1L
-	.align	2
-	.type	mcl_fp_shr1_1L,%function
-mcl_fp_shr1_1L:                         @ @mcl_fp_shr1_1L
-	.fnstart
-@ BB#0:
-	ldr	r1, [r1]
-	lsr	r1, r1, #1
-	str	r1, [r0]
-	mov	pc, lr
-.Lfunc_end13:
-	.size	mcl_fp_shr1_1L, .Lfunc_end13-mcl_fp_shr1_1L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add1L
-	.align	2
-	.type	mcl_fp_add1L,%function
-mcl_fp_add1L:                           @ @mcl_fp_add1L
-	.fnstart
-@ BB#0:
-	ldr	r1, [r1]
-	ldr	r2, [r2]
-	ldr	r3, [r3]
-	adds	r1, r2, r1
-	mov	r2, #0
-	str	r1, [r0]
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	ldr	r7, [r1, #4]
+	umull	r4, r6, r0, r2
+	str	r7, [sp, #104]                  @ 4-byte Spill
+	ldr	r7, [r3, #-4]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r5, [r3, #8]
+	str	r4, [sp, #40]                   @ 4-byte Spill
+	ldr	r9, [r3]
+	mul	r0, r7, r4
+	str	r5, [sp, #108]                  @ 4-byte Spill
+	ldr	r8, [r3, #4]
+	str	r7, [sp, #84]                   @ 4-byte Spill
+	ldr	r7, [r1, #20]
+	ldr	r12, [r1, #8]
+	ldr	r11, [r1, #12]
+	umull	r10, r4, r0, r5
+	ldr	r1, [r1, #16]
+	str	r9, [sp, #80]                   @ 4-byte Spill
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	str	r7, [sp, #72]                   @ 4-byte Spill
+	str	r4, [sp, #32]                   @ 4-byte Spill
+	umull	r4, r5, r0, r9
+	str	r10, [sp, #12]                  @ 4-byte Spill
+	str	r11, [sp, #96]                  @ 4-byte Spill
+	str	r12, [sp, #68]                  @ 4-byte Spill
+	str	r5, [sp, #8]                    @ 4-byte Spill
+	umlal	r5, r10, r0, r8
+	str	r4, [sp, #36]                   @ 4-byte Spill
+	str	r8, [sp, #88]                   @ 4-byte Spill
+	str	r5, [sp, #4]                    @ 4-byte Spill
+	umull	r5, r4, r7, r2
+	ldr	r7, [sp, #104]                  @ 4-byte Reload
+	str	r5, [sp, #92]                   @ 4-byte Spill
+	str	r4, [sp, #100]                  @ 4-byte Spill
+	umull	r5, r4, r1, r2
+	umull	r9, r1, r11, r2
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	umull	r11, r1, r7, r2
+	adds	r7, r6, r11
+	umull	r7, r11, r12, r2
+	ldr	r12, [r3, #16]
+	adcs	r1, r1, r7
+	adcs	r1, r11, r9
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r11, [r3, #12]
+	adcs	r1, r1, r5
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	str	r11, [sp, #92]                  @ 4-byte Spill
+	adcs	r1, r4, r1
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r4, [sp, #8]                    @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [r3, #20]
+	umull	r3, r9, r0, r8
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	str	r12, [sp, #100]                 @ 4-byte Spill
+	umull	r8, r5, r0, r1
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adds	r3, r4, r3
+	adcs	r1, r9, r1
+	ldr	r4, [sp, #32]                   @ 4-byte Reload
+	umull	r1, r3, r0, r11
+	adcs	r11, r4, r1
+	umull	r9, r1, r0, r12
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	umlal	r6, r7, r0, r2
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	adcs	r12, r3, r9
+	adcs	r9, r1, r8
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adc	r3, r5, #0
+	mov	r8, #0
+	adds	r2, r1, r2
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	mov	r2, r0
+	adcs	r1, r1, r6
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	adcs	r1, r10, r7
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	ldr	r6, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r11, r1
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	ldr	r11, [sp, #72]                  @ 4-byte Reload
+	adcs	r1, r12, r1
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r9, r1
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	umull	r4, r9, lr, r11
+	adcs	r1, r3, r1
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	adc	r1, r8, #0
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	umull	r3, r5, lr, r0
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	umull	r12, r7, lr, r1
+	umull	r10, r1, lr, r0
+	adds	r3, r1, r3
+	umull	r0, r3, lr, r6
+	adcs	r5, r5, r0
+	umlal	r1, r0, lr, r2
+	adcs	r8, r3, r12
+	ldr	r12, [sp, #60]                  @ 4-byte Reload
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	umull	r3, r5, lr, r12
+	adcs	r3, r7, r3
+	adcs	r5, r5, r4
+	adc	r6, r9, #0
+	adds	r7, r2, r10
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	ldr	r10, [sp, #76]                  @ 4-byte Reload
+	adcs	r9, r2, r1
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	ldr	r5, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mul	r0, r1, r7
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	umull	r6, r8, r0, r1
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	umull	r3, r2, r0, r1
+	mov	r1, r6
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	umull	lr, r3, r0, r5
+	mov	r4, r2
+	umlal	r4, r1, r0, r5
+	ldr	r5, [sp, #92]                   @ 4-byte Reload
+	adds	r2, r2, lr
+	umull	r2, lr, r0, r5
+	adcs	r3, r3, r6
+	adcs	r8, r8, r2
+	ldr	r2, [sp, #100]                  @ 4-byte Reload
+	umull	r3, r6, r0, r2
+	umull	r5, r2, r0, r10
+	adcs	r3, lr, r3
+	adcs	r0, r6, r5
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adc	r2, r2, #0
+	adds	r7, r7, r6
+	ldr	r6, [sp, #68]                   @ 4-byte Reload
+	adcs	r7, r9, r4
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r7, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r7, r1
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	ldr	r7, [sp, #104]                  @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldr	r8, [sp, #64]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	umull	r2, r1, r3, r11
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	umull	r9, r1, r3, r12
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	str	r2, [sp, #8]                    @ 4-byte Spill
+	umull	r11, r0, r3, r8
+	str	r1, [sp]                        @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	umull	r2, r12, r3, r7
+	str	r11, [sp, #4]                   @ 4-byte Spill
+	umull	r4, r5, r3, r1
+	adds	r2, r0, r2
+	umull	r1, lr, r3, r6
+	adcs	r2, r12, r1
+	umlal	r0, r1, r3, r7
+	adcs	r2, lr, r4
+	adcs	r12, r5, r9
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	ldr	r4, [sp]                        @ 4-byte Reload
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	adcs	r4, r4, r5
+	ldr	r5, [sp, #12]                   @ 4-byte Reload
+	ldr	r7, [sp, #4]                    @ 4-byte Reload
+	adc	r5, r5, #0
+	adds	r9, r3, r7
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	ldr	r7, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r3, r0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mul	r0, r1, r9
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	umull	r2, r3, r0, r1
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	umull	r3, r4, r0, r1
+	mov	r1, r2
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	umull	r11, r3, r0, r10
+	mov	r5, r4
+	umlal	r5, r1, r0, r7
+	str	r3, [sp]                        @ 4-byte Spill
+	ldr	r3, [sp, #100]                  @ 4-byte Reload
+	umull	r12, lr, r0, r3
+	umull	r10, r3, r0, r7
+	ldr	r7, [sp, #92]                   @ 4-byte Reload
+	adds	r4, r4, r10
+	umull	r4, r10, r0, r7
+	adcs	r0, r3, r2
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	adcs	r0, r10, r12
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	ldr	r0, [sp]                        @ 4-byte Reload
+	adcs	r3, lr, r11
+	ldr	r12, [sp, #104]                 @ 4-byte Reload
+	adc	r10, r0, #0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adds	r7, r9, r0
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r7, [sp, #96]                   @ 4-byte Reload
+	umull	r4, r11, r0, r12
+	umull	r9, r2, r0, r7
+	umull	r7, lr, r0, r6
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	umull	r2, r6, r0, r8
+	ldr	r8, [sp, #60]                   @ 4-byte Reload
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r5, r2, r5
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldr	r2, [sp, #4]                    @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adc	r10, r1, #0
+	adds	r4, r6, r4
+	adcs	r4, r11, r7
+	umlal	r6, r7, r0, r12
+	adcs	lr, lr, r9
+	ldr	r11, [sp, #80]                  @ 4-byte Reload
+	umull	r1, r4, r0, r8
+	adcs	r9, r2, r1
+	umull	r2, r1, r0, r3
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r2, r4, r2
+	adc	r1, r1, #0
+	adds	r4, r5, r0
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r7, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r9, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r10, r1
+	ldr	r10, [sp, #84]                  @ 4-byte Reload
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	mul	r0, r10, r4
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	umull	r2, r12, r0, r1
+	umull	r1, r6, r0, r11
+	umull	lr, r3, r0, r7
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	mov	r1, r2
+	mov	r5, r6
+	umlal	r5, r1, r0, r7
+	adds	r6, r6, lr
+	adcs	r2, r3, r2
+	umull	r6, lr, r0, r9
+	ldr	r2, [sp, #100]                  @ 4-byte Reload
+	adcs	r12, r12, r6
+	umull	r3, r6, r0, r2
+	adcs	lr, lr, r3
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	umull	r7, r2, r0, r3
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r6, r7
+	adc	r2, r2, #0
+	adds	r7, r4, r3
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #48]                   @ 4-byte Spill
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r1, r12
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r1, r1, lr
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r4, [r0, #16]
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	umull	r12, lr, r4, r8
+	umull	r3, r2, r4, r0
+	umull	r8, r0, r4, r7
+	ldr	r7, [sp, #96]                   @ 4-byte Reload
+	umull	r5, r6, r4, r1
+	adds	r5, r0, r5
+	adcs	r5, r6, r3
+	umlal	r0, r3, r4, r1
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	umull	r5, r6, r4, r7
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	adcs	r2, r2, r5
+	adcs	r12, r6, r12
+	umull	r5, r6, r4, r7
+	adcs	r5, lr, r5
+	adc	r6, r6, #0
+	adds	r7, r1, r8
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r5, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	mul	r0, r10, r7
+	umull	r8, r2, r0, r11
+	ldr	r11, [sp, #64]                  @ 4-byte Reload
+	umull	lr, r3, r0, r5
+	umull	r6, r12, r0, r1
+	mov	r4, r2
+	adds	r2, r2, lr
+	umull	r2, lr, r0, r9
+	adcs	r3, r3, r6
+	mov	r1, r6
+	umlal	r4, r1, r0, r5
+	adcs	r12, r12, r2
+	ldr	r2, [sp, #100]                  @ 4-byte Reload
+	umull	r3, r6, r0, r2
+	adcs	lr, lr, r3
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	umull	r5, r2, r0, r3
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r6, r5
 	adc	r2, r2, #0
-	subs	r1, r1, r3
+	adds	r7, r7, r8
+	adcs	r3, r3, r4
+	str	r3, [sp, #48]                   @ 4-byte Spill
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	ldr	r3, [sp, #104]                  @ 4-byte Reload
+	adcs	r1, r1, r12
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r1, r1, lr
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r4, [r0, #20]
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	umull	r2, r12, r4, r3
+	umull	r9, r1, r4, r0
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	umull	r7, r8, r4, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	umull	r5, r6, r4, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	umull	r1, lr, r4, r0
+	umull	r10, r0, r4, r11
+	ldr	r11, [sp, #100]                 @ 4-byte Reload
+	adds	r2, r0, r2
+	adcs	r2, r12, r1
+	umlal	r0, r1, r4, r3
+	adcs	r2, lr, r5
+	adcs	r5, r6, r7
+	ldr	r6, [sp, #56]                   @ 4-byte Reload
+	adcs	r7, r8, r9
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adc	r6, r6, #0
+	ldr	r12, [sp, #88]                  @ 4-byte Reload
+	adds	r8, r3, r10
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	ldr	lr, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r3, r0
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r5, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	mul	r0, r1, r8
+	umull	r3, r4, r0, r7
+	umull	r1, r2, r0, r12
+	str	r3, [sp, #84]                   @ 4-byte Spill
+	ldr	r3, [sp, #108]                  @ 4-byte Reload
+	adds	r1, r4, r1
+	umull	r6, r1, r0, r3
+	adcs	r2, r2, r6
+	umlal	r4, r6, r0, r12
+	umull	r2, r3, r0, r5
+	adcs	r10, r1, r2
+	umull	r2, r1, r0, r11
+	adcs	r9, r3, r2
+	umull	r3, r2, r0, lr
+	adcs	r1, r1, r3
+	adc	r0, r2, #0
+	ldr	r2, [sp, #84]                   @ 4-byte Reload
+	adds	r2, r8, r2
+	ldr	r2, [sp, #104]                  @ 4-byte Reload
+	adcs	r4, r2, r4
+	ldr	r2, [sp, #96]                   @ 4-byte Reload
+	str	r4, [sp, #104]                  @ 4-byte Spill
+	adcs	r5, r2, r6
+	ldr	r2, [sp, #72]                   @ 4-byte Reload
+	adcs	r3, r2, r10
+	ldr	r2, [sp, #68]                   @ 4-byte Reload
+	adcs	r8, r2, r9
+	ldr	r2, [sp, #64]                   @ 4-byte Reload
+	adcs	r9, r2, r1
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	adc	r2, r1, #0
+	subs	r10, r4, r7
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	sbcs	r7, r5, r12
+	mov	r12, r5
+	sbcs	r5, r3, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	sbcs	r6, r8, r1
+	sbcs	r1, r9, r11
+	sbcs	r4, r0, lr
 	sbc	r2, r2, #0
-	tst	r2, #1
-	streq	r1, [r0]
-	mov	pc, lr
-.Lfunc_end14:
-	.size	mcl_fp_add1L, .Lfunc_end14-mcl_fp_add1L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF1L
-	.align	2
-	.type	mcl_fp_addNF1L,%function
-mcl_fp_addNF1L:                         @ @mcl_fp_addNF1L
-	.fnstart
-@ BB#0:
-	ldr	r1, [r1]
-	ldr	r2, [r2]
-	add	r1, r2, r1
-	ldr	r2, [r3]
-	sub	r2, r1, r2
+	ands	r2, r2, #1
+	movne	r4, r0
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	movne	r1, r9
+	movne	r6, r8
 	cmp	r2, #0
-	movlt	r2, r1
-	str	r2, [r0]
-	mov	pc, lr
-.Lfunc_end15:
-	.size	mcl_fp_addNF1L, .Lfunc_end15-mcl_fp_addNF1L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub1L
-	.align	2
-	.type	mcl_fp_sub1L,%function
-mcl_fp_sub1L:                           @ @mcl_fp_sub1L
-	.fnstart
-@ BB#0:
-	ldr	r2, [r2]
-	ldr	r1, [r1]
-	subs	r1, r1, r2
-	mov	r2, #0
-	sbc	r2, r2, #0
-	str	r1, [r0]
-	tst	r2, #1
-	ldrne	r2, [r3]
-	addne	r1, r2, r1
-	strne	r1, [r0]
-	movne	pc, lr
-	mov	pc, lr
-.Lfunc_end16:
-	.size	mcl_fp_sub1L, .Lfunc_end16-mcl_fp_sub1L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF1L
-	.align	2
-	.type	mcl_fp_subNF1L,%function
-mcl_fp_subNF1L:                         @ @mcl_fp_subNF1L
-	.fnstart
-@ BB#0:
-	ldr	r2, [r2]
-	ldr	r1, [r1]
-	sub	r1, r1, r2
-	ldr	r2, [r3]
-	cmp	r1, #0
-	addlt	r1, r1, r2
-	str	r1, [r0]
+	str	r1, [r0, #16]
+	movne	r5, r3
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	movne	r7, r12
+	str	r4, [r0, #20]
+	str	r6, [r0, #12]
+	movne	r10, r1
+	str	r5, [r0, #8]
+	str	r7, [r0, #4]
+	str	r10, [r0]
+	add	sp, sp, #112
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end17:
-	.size	mcl_fp_subNF1L, .Lfunc_end17-mcl_fp_subNF1L
+.Lfunc_end9:
+	.size	mcl_fp_mont6L, .Lfunc_end9-mcl_fp_mont6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_add1L
-	.align	2
-	.type	mcl_fpDbl_add1L,%function
-mcl_fpDbl_add1L:                        @ @mcl_fpDbl_add1L
+                                        @ -- End function
+	.globl	mcl_fp_montNF6L                 @ -- Begin function mcl_fp_montNF6L
+	.p2align	2
+	.type	mcl_fp_montNF6L,%function
+	.code	32                              @ @mcl_fp_montNF6L
+mcl_fp_montNF6L:
 	.fnstart
-@ BB#0:
-	.save	{r11, lr}
-	push	{r11, lr}
-	ldm	r1, {r12, lr}
-	ldm	r2, {r1, r2}
-	ldr	r3, [r3]
-	adds	r1, r1, r12
-	str	r1, [r0]
-	mov	r1, #0
-	adcs	r2, r2, lr
-	adc	r1, r1, #0
-	subs	r3, r2, r3
-	sbc	r1, r1, #0
-	tst	r1, #1
-	movne	r3, r2
-	str	r3, [r0, #4]
-	pop	{r11, lr}
-	mov	pc, lr
-.Lfunc_end18:
-	.size	mcl_fpDbl_add1L, .Lfunc_end18-mcl_fpDbl_add1L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub1L
-	.align	2
-	.type	mcl_fpDbl_sub1L,%function
-mcl_fpDbl_sub1L:                        @ @mcl_fpDbl_sub1L
-	.fnstart
-@ BB#0:
-	.save	{r11, lr}
-	push	{r11, lr}
-	ldm	r2, {r12, lr}
-	ldr	r2, [r1]
-	ldr	r1, [r1, #4]
-	ldr	r3, [r3]
-	subs	r2, r2, r12
-	str	r2, [r0]
-	mov	r2, #0
-	sbcs	r1, r1, lr
-	sbc	r2, r2, #0
-	tst	r2, #1
-	addne	r1, r1, r3
-	str	r1, [r0, #4]
-	pop	{r11, lr}
-	mov	pc, lr
-.Lfunc_end19:
-	.size	mcl_fpDbl_sub1L, .Lfunc_end19-mcl_fpDbl_sub1L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre2L
-	.align	2
-	.type	mcl_fp_mulUnitPre2L,%function
-mcl_fp_mulUnitPre2L:                    @ @mcl_fp_mulUnitPre2L
-	.fnstart
-@ BB#0:
-	.save	{r11, lr}
-	push	{r11, lr}
-	ldm	r1, {r3, lr}
-	umull	r12, r1, r3, r2
-	mov	r3, #0
-	umlal	r1, r3, lr, r2
-	str	r12, [r0]
-	stmib	r0, {r1, r3}
-	pop	{r11, lr}
-	mov	pc, lr
-.Lfunc_end20:
-	.size	mcl_fp_mulUnitPre2L, .Lfunc_end20-mcl_fp_mulUnitPre2L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre2L
-	.align	2
-	.type	mcl_fpDbl_mulPre2L,%function
-mcl_fpDbl_mulPre2L:                     @ @mcl_fpDbl_mulPre2L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, lr}
-	push	{r4, r5, r6, lr}
-	ldr	r3, [r2]
-	ldm	r1, {r12, lr}
-	ldr	r2, [r2, #4]
-	mov	r5, #0
-	umull	r1, r4, r12, r3
-	umlal	r4, r5, lr, r3
-	umull	r3, r6, r12, r2
-	str	r1, [r0]
-	mov	r1, #0
-	adds	r3, r3, r4
-	str	r3, [r0, #4]
-	umull	r3, r4, lr, r2
-	adcs	r2, r3, r5
-	adc	r1, r1, #0
-	adds	r2, r2, r6
-	adc	r1, r1, r4
-	str	r2, [r0, #8]
-	str	r1, [r0, #12]
-	pop	{r4, r5, r6, lr}
-	mov	pc, lr
-.Lfunc_end21:
-	.size	mcl_fpDbl_mulPre2L, .Lfunc_end21-mcl_fpDbl_mulPre2L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre2L
-	.align	2
-	.type	mcl_fpDbl_sqrPre2L,%function
-mcl_fpDbl_sqrPre2L:                     @ @mcl_fpDbl_sqrPre2L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, lr}
-	push	{r4, r5, r6, lr}
-	ldr	r2, [r1]
-	ldr	r1, [r1, #4]
-	mov	r4, #0
-	mov	lr, #0
-	umull	r12, r3, r2, r2
-	umull	r5, r6, r1, r2
-	umlal	r3, r4, r1, r2
-	str	r12, [r0]
-	adds	r2, r3, r5
-	umull	r3, r5, r1, r1
-	adcs	r1, r4, r3
-	str	r2, [r0, #4]
-	adc	r3, lr, #0
-	adds	r1, r1, r6
-	adc	r3, r3, r5
-	str	r1, [r0, #8]
-	str	r3, [r0, #12]
-	pop	{r4, r5, r6, lr}
-	mov	pc, lr
-.Lfunc_end22:
-	.size	mcl_fpDbl_sqrPre2L, .Lfunc_end22-mcl_fpDbl_sqrPre2L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont2L
-	.align	2
-	.type	mcl_fp_mont2L,%function
-mcl_fp_mont2L:                          @ @mcl_fp_mont2L
-	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldm	r1, {r12, lr}
-	ldm	r2, {r1, r2}
-	mov	r7, #0
-	mov	r5, #0
-	mov	r6, #0
-	umull	r8, r9, r2, r12
-	umull	r11, r4, r12, r1
-	umlal	r9, r7, r2, lr
-	umlal	r4, r5, lr, r1
-	ldmda	r3, {r12, lr}
-	ldr	r10, [r3, #4]
-	mul	r1, r11, r12
-	umull	r3, r2, r1, lr
-	adds	r3, r3, r11
-	mov	r3, #0
-	umlal	r2, r3, r1, r10
-	adcs	r1, r2, r4
-	adcs	r2, r3, r5
+	.pad	#92
+	sub	sp, sp, #92
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	ldm	r2, {r4, r12}
+	ldr	r0, [r2, #12]
+	ldr	r9, [r2, #8]
+	ldr	r2, [r1]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	str	r2, [sp, #88]                   @ 4-byte Spill
+	ldmib	r1, {r5, r7}
+	ldr	r0, [r1, #12]
+	mov	r10, r5
+	umull	r6, r8, r5, r4
+	str	r7, [sp, #56]                   @ 4-byte Spill
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	umull	r11, r5, r2, r4
+	ldr	lr, [r3, #8]
+	str	lr, [sp, #40]                   @ 4-byte Spill
+	str	r10, [sp, #44]                  @ 4-byte Spill
+	adds	r6, r5, r6
+	umull	r2, r6, r7, r4
+	adcs	r7, r8, r2
+	umlal	r5, r2, r10, r4
+	umull	r7, r8, r0, r4
+	adcs	r0, r6, r7
+	ldr	r6, [r1, #16]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	str	r6, [sp, #72]                   @ 4-byte Spill
+	umull	r7, r0, r6, r4
+	ldr	r6, [r3]
+	str	r6, [sp, #84]                   @ 4-byte Spill
+	adcs	r7, r8, r7
+	str	r7, [sp, #60]                   @ 4-byte Spill
+	ldr	r7, [r1, #20]
+	str	r7, [sp, #80]                   @ 4-byte Spill
+	umull	r1, r8, r7, r4
+	adcs	r0, r0, r1
+	ldr	r1, [r3, #-4]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adc	r0, r8, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mul	r0, r1, r11
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r8, [r3, #4]
+	str	r8, [sp, #68]                   @ 4-byte Spill
+	umull	r1, r7, r0, r6
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	adds	r1, r11, r1
+	umull	r1, r4, r0, r8
+	str	r4, [sp, #12]                   @ 4-byte Spill
+	adcs	r8, r5, r1
+	umull	r5, r11, r0, lr
+	ldr	r1, [r3, #12]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	adcs	r6, r2, r5
+	umull	r5, r7, r0, r1
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adcs	lr, r1, r5
+	ldr	r1, [r3, #16]
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	umull	r5, r4, r0, r1
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	adcs	r5, r1, r5
+	ldr	r1, [r3, #20]
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	umull	r3, r2, r0, r1
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	adc	r3, r1, #0
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r1, r8, r1
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r8, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r6, r1
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	adcs	r11, lr, r11
+	ldr	lr, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r5, r7
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	adcs	r0, r0, r4
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adc	r0, r3, r2
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	umull	r3, r6, r12, r10
+	ldr	r5, [sp, #76]                   @ 4-byte Reload
+	umull	r7, r1, r12, r0
+	adds	r3, r1, r3
+	umull	r2, r3, r12, r8
+	adcs	r6, r6, r2
+	umlal	r1, r2, r12, r10
+	ldr	r10, [sp, #68]                  @ 4-byte Reload
+	umull	r6, r0, r12, r5
+	ldr	r5, [sp, #72]                   @ 4-byte Reload
+	adcs	r4, r3, r6
+	umull	r6, r3, r12, r5
+	adcs	r5, r0, r6
+	umull	r6, r0, r12, lr
+	ldr	r12, [sp, #60]                  @ 4-byte Reload
+	adcs	r3, r3, r6
+	ldr	r6, [sp, #24]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	adds	r7, r7, r6
+	ldr	r6, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r1, r6
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adcs	r2, r2, r11
+	adcs	r6, r4, r6
+	ldr	r4, [sp, #12]                   @ 4-byte Reload
+	adcs	r11, r5, r4
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	mul	r4, r0, r7
+	umull	r0, r5, r4, r3
+	str	r5, [sp, #16]                   @ 4-byte Spill
+	adds	r0, r7, r0
+	ldr	r5, [sp, #16]                   @ 4-byte Reload
+	umull	r0, r3, r4, r10
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	adcs	r3, r1, r0
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	umull	r1, r7, r4, r0
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	adcs	r1, r2, r1
+	umull	r2, r7, r4, r0
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	adcs	r2, r6, r2
+	umull	r6, r0, r4, r7
+	adcs	r6, r11, r6
+	umull	r7, r11, r4, r12
+	ldr	r4, [sp, #24]                   @ 4-byte Reload
+	adcs	r4, r4, r7
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	adc	r7, r7, #0
+	adds	r3, r3, r5
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	ldr	r3, [sp, #12]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	adcs	r1, r6, r1
+	ldr	r6, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r4, r0
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adc	r11, r7, r11
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	umull	r5, r4, r9, r6
+	umull	r12, r1, r9, r0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adds	r5, r1, r5
+	umull	r2, r5, r9, r8
+	ldr	r8, [sp, #52]                   @ 4-byte Reload
+	adcs	r4, r4, r2
+	umlal	r1, r2, r9, r6
+	ldr	r6, [sp, #20]                   @ 4-byte Reload
+	umull	r4, r7, r9, r0
+	adcs	r4, r5, r4
+	umull	r5, r0, r9, r3
+	adcs	r5, r7, r5
+	umull	r7, r3, r9, lr
+	ldr	lr, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	ldr	r7, [sp, #24]                   @ 4-byte Reload
+	adc	r3, r3, #0
+	adds	r7, r12, r7
+	adcs	r1, r1, r6
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adcs	r2, r2, r6
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	adcs	r6, r4, r6
+	ldr	r4, [sp, #8]                    @ 4-byte Reload
+	adcs	r9, r5, r4
+	mul	r4, r8, r7
+	adcs	r0, r0, r11
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adc	r0, r3, #0
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r11, [sp, #40]                  @ 4-byte Reload
+	umull	r0, r5, r4, r3
+	str	r5, [sp, #16]                   @ 4-byte Spill
+	adds	r0, r7, r0
+	ldr	r5, [sp, #16]                   @ 4-byte Reload
+	umull	r0, r3, r4, r10
+	ldr	r10, [sp, #48]                  @ 4-byte Reload
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	adcs	r0, r1, r0
+	umull	r1, r3, r4, r11
+	str	r3, [sp, #8]                    @ 4-byte Spill
+	adcs	r1, r2, r1
+	umull	r2, r12, r4, r10
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	adcs	r2, r6, r2
+	umull	r6, r7, r4, r3
+	adcs	r6, r9, r6
+	umull	r3, r9, r4, lr
+	ldr	r4, [sp, #24]                   @ 4-byte Reload
+	adcs	r3, r4, r3
+	ldr	r4, [sp, #20]                   @ 4-byte Reload
+	adc	r4, r4, #0
+	adds	r0, r0, r5
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adcs	r0, r6, r12
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adcs	r0, r3, r7
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	adc	r0, r4, r9
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r12, [sp, #76]                  @ 4-byte Reload
+	ldr	r9, [sp, #80]                   @ 4-byte Reload
+	umull	r3, lr, r0, r1
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	umull	r4, r2, r0, r1
+	mov	r5, r3
+	str	r4, [sp]                        @ 4-byte Spill
+	ldr	r4, [sp, #44]                   @ 4-byte Reload
+	mov	r1, r2
+	umull	r6, r7, r0, r4
+	umlal	r1, r5, r0, r4
+	adds	r2, r2, r6
+	adcs	r2, r7, r3
+	umull	r2, r3, r0, r12
+	adcs	r2, lr, r2
+	ldr	lr, [sp, #72]                   @ 4-byte Reload
+	umull	r4, r6, r0, lr
+	adcs	r3, r3, r4
+	umull	r4, r7, r0, r9
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r9, [sp, #64]                   @ 4-byte Reload
+	adcs	r4, r6, r4
+	adc	r6, r7, #0
+	ldr	r7, [sp]                        @ 4-byte Reload
+	adds	r0, r7, r0
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r1, r7
+	ldr	r7, [sp, #16]                   @ 4-byte Reload
+	adcs	r7, r5, r7
+	ldr	r5, [sp, #12]                   @ 4-byte Reload
+	adcs	r2, r2, r5
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	ldr	r3, [sp, #4]                    @ 4-byte Reload
+	adcs	r3, r4, r3
+	mul	r4, r8, r0
+	ldr	r8, [sp, #84]                   @ 4-byte Reload
+	str	r3, [sp, #28]                   @ 4-byte Spill
 	adc	r3, r6, #0
-	adds	r1, r1, r8
-	adcs	r8, r2, r9
-	mul	r5, r1, r12
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	umull	r5, r3, r4, r8
+	str	r3, [sp, #20]                   @ 4-byte Spill
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	adds	r0, r0, r5
+	umull	r0, r5, r4, r3
+	str	r5, [sp, #12]                   @ 4-byte Spill
+	adcs	r0, r1, r0
+	umull	r1, r3, r4, r11
+	str	r3, [sp, #8]                    @ 4-byte Spill
+	adcs	r1, r7, r1
+	umull	r7, r3, r4, r10
+	ldr	r10, [sp, #60]                  @ 4-byte Reload
+	umull	r6, r11, r4, r10
+	str	r3, [sp, #4]                    @ 4-byte Spill
+	adcs	r2, r2, r7
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	umull	r7, r5, r4, r9
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	adcs	r7, r3, r7
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adcs	r4, r4, r6
+	ldr	r6, [sp, #24]                   @ 4-byte Reload
+	adc	r6, r6, #0
+	adds	r0, r0, r3
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adcs	r0, r4, r5
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adc	r0, r6, r11
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r7, [sp, #44]                   @ 4-byte Reload
+	ldr	r5, [r0, #16]
+	umull	r11, r2, r5, r1
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	umull	r4, r0, r5, r7
+	adds	r4, r2, r4
+	umull	r3, r4, r5, r1
+	adcs	r0, r0, r3
+	umlal	r2, r3, r5, r7
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	umull	r0, r6, r5, r12
+	adcs	r12, r4, r0
+	umull	r4, r1, r5, lr
+	adcs	r4, r6, r4
+	umull	r6, r0, r5, r7
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r1, r6
+	adc	r0, r0, #0
+	adds	r6, r11, r7
+	ldr	r7, [sp, #24]                   @ 4-byte Reload
+	adcs	r2, r2, r7
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
 	adcs	r3, r3, r7
-	umull	r7, r2, r5, lr
-	adc	r4, r6, #0
-	umlal	r2, r6, r5, r10
-	adds	r1, r7, r1
-	adcs	r1, r2, r8
-	adcs	r2, r6, r3
-	adc	r3, r4, #0
-	subs	r7, r1, lr
-	sbcs	r6, r2, r10
-	sbc	r3, r3, #0
-	ands	r3, r3, #1
-	movne	r7, r1
-	movne	r6, r2
-	str	r7, [r0]
-	str	r6, [r0, #4]
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end23:
-	.size	mcl_fp_mont2L, .Lfunc_end23-mcl_fp_mont2L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF2L
-	.align	2
-	.type	mcl_fp_montNF2L,%function
-mcl_fp_montNF2L:                        @ @mcl_fp_montNF2L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldm	r2, {r12, lr}
-	ldr	r11, [r1]
-	ldr	r8, [r3, #-4]
-	ldr	r7, [r3]
-	ldr	r9, [r1, #4]
-	ldr	r3, [r3, #4]
-	umull	r4, r5, r11, r12
-	mul	r6, r4, r8
-	umull	r1, r10, r6, r7
-	adds	r1, r1, r4
-	mov	r4, #0
-	umlal	r5, r4, r9, r12
-	umull	r2, r12, r6, r3
-	mov	r1, #0
-	adcs	r2, r2, r5
+	ldr	r7, [sp, #16]                   @ 4-byte Reload
+	adcs	r5, r12, r7
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	adcs	r7, r4, r7
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r7, [sp, #8]                    @ 4-byte Reload
+	adcs	r1, r1, r7
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	mul	r4, r0, r6
+	umull	r0, r1, r4, r8
+	ldr	r8, [sp, #40]                   @ 4-byte Reload
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adds	r0, r6, r0
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	umull	r0, r11, r4, r1
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r2, r0
+	umull	r2, lr, r4, r8
+	adcs	r2, r3, r2
+	umull	r3, r12, r4, r1
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r3, r5, r3
+	umull	r5, r6, r4, r9
+	adcs	r5, r1, r5
+	umull	r1, r9, r4, r10
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r4, r1
+	ldr	r4, [sp, #24]                   @ 4-byte Reload
 	adc	r4, r4, #0
-	adds	r2, r2, r10
-	adc	r6, r4, r12
-	umull	r5, r4, lr, r11
-	adds	r2, r5, r2
-	umlal	r4, r1, lr, r9
-	adcs	r9, r4, r6
-	mul	r5, r2, r8
-	adc	lr, r1, #0
-	umull	r1, r6, r5, r7
-	umull	r4, r12, r5, r3
-	adds	r1, r1, r2
-	adcs	r1, r4, r9
-	adc	r2, lr, #0
-	adds	r1, r1, r6
-	adc	r2, r2, r12
-	subs	r7, r1, r7
-	sbc	r3, r2, r3
-	cmp	r3, #0
-	movlt	r7, r1
-	movlt	r3, r2
-	str	r7, [r0]
-	str	r3, [r0, #4]
+	adds	r0, r0, r7
+	adcs	r10, r2, r11
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	adcs	r11, r3, lr
+	ldr	r7, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r5, r12
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r1, r6
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	adc	r0, r4, r9
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r5, [r0, #20]
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	umull	r6, r1, r5, r0
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	umull	lr, r3, r5, r0
+	umull	r12, r0, r5, r7
+	mov	r4, r6
+	mov	r2, r3
+	umlal	r2, r4, r5, r7
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
+	adds	r3, r3, r12
+	adcs	r0, r0, r6
+	umull	r0, r3, r5, r7
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	adcs	r12, r1, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	umull	r1, r6, r5, r0
+	adcs	r1, r3, r1
+	umull	r3, r0, r5, r7
+	ldr	r5, [sp, #28]                   @ 4-byte Reload
+	adcs	r3, r6, r3
+	adc	r0, r0, #0
+	adds	r6, lr, r5
+	adcs	r7, r2, r10
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r9, r4, r11
+	ldr	r5, [sp, #84]                   @ 4-byte Reload
+	adcs	r10, r12, r2
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	ldr	lr, [sp, #48]                   @ 4-byte Reload
+	adcs	r12, r1, r2
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	ldr	r2, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	mul	r4, r0, r6
+	umull	r0, r1, r4, r5
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	adds	r0, r6, r0
+	umull	r6, r0, r4, r8
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	umull	r3, r0, r4, r2
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	adcs	r11, r7, r3
+	umull	r3, r0, r4, lr
+	adcs	r1, r9, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	adcs	r10, r10, r3
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	umull	r7, r0, r4, r3
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	adcs	r9, r12, r7
+	ldr	r12, [sp, #60]                  @ 4-byte Reload
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	umull	r6, r0, r4, r12
+	ldr	r4, [sp, #88]                   @ 4-byte Reload
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	adcs	r6, r4, r6
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adc	r4, r4, #0
+	adds	r0, r11, r0
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	adcs	r1, r1, r7
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	adcs	r10, r10, r7
+	ldr	r7, [sp, #52]                   @ 4-byte Reload
+	adcs	r9, r9, r7
+	ldr	r7, [sp, #44]                   @ 4-byte Reload
+	adcs	r11, r6, r7
+	ldr	r6, [sp, #36]                   @ 4-byte Reload
+	adc	r6, r4, r6
+	subs	r5, r0, r5
+	sbcs	r4, r1, r2
+	sbcs	r2, r10, r8
+	sbcs	r0, r9, lr
+	sbcs	r3, r11, r3
+	sbc	r7, r6, r12
+	asr	r1, r7, #31
+	cmn	r1, #1
+	movgt	r6, r7
+	ldr	r7, [sp, #32]                   @ 4-byte Reload
+	movle	r0, r9
+	movle	r3, r11
+	cmn	r1, #1
+	str	r0, [r7, #12]
+	movle	r2, r10
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	str	r6, [r7, #20]
+	str	r3, [r7, #16]
+	movle	r4, r0
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	str	r2, [r7, #8]
+	str	r4, [r7, #4]
+	movle	r5, r0
+	str	r5, [r7]
+	add	sp, sp, #92
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end24:
-	.size	mcl_fp_montNF2L, .Lfunc_end24-mcl_fp_montNF2L
+.Lfunc_end10:
+	.size	mcl_fp_montNF6L, .Lfunc_end10-mcl_fp_montNF6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montRed2L
-	.align	2
-	.type	mcl_fp_montRed2L,%function
-mcl_fp_montRed2L:                       @ @mcl_fp_montRed2L
+                                        @ -- End function
+	.globl	mcl_fp_montRed6L                @ -- Begin function mcl_fp_montRed6L
+	.p2align	2
+	.type	mcl_fp_montRed6L,%function
+	.code	32                              @ @mcl_fp_montRed6L
+mcl_fp_montRed6L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, lr}
-	ldr	r12, [r2, #-4]
-	ldm	r2, {r3, lr}
-	ldm	r1, {r2, r9, r10}
-	ldr	r8, [r1, #12]
-	mov	r5, #0
-	mov	r7, #0
-	mul	r6, r2, r12
-	umull	r1, r4, r6, r3
-	umlal	r4, r5, r6, lr
-	adds	r1, r2, r1
-	adcs	r1, r9, r4
-	adcs	r9, r10, r5
-	mul	r6, r1, r12
-	adcs	r8, r8, #0
-	umull	r2, r4, r6, r3
-	adc	r5, r7, #0
-	umlal	r4, r7, r6, lr
-	adds	r1, r2, r1
-	adcs	r1, r4, r9
-	adcs	r2, r7, r8
-	adc	r7, r5, #0
-	subs	r3, r1, r3
-	sbcs	r6, r2, lr
-	sbc	r7, r7, #0
-	ands	r7, r7, #1
-	movne	r3, r1
-	movne	r6, r2
-	stm	r0, {r3, r6}
-	pop	{r4, r5, r6, r7, r8, r9, r10, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#72
+	sub	sp, sp, #72
+	mov	r8, r1
+	ldr	r3, [r2, #-4]
+	ldr	r6, [r8, #4]
+	ldr	r9, [r8]
+	str	r6, [sp, #36]                   @ 4-byte Spill
+	ldr	r6, [r8, #8]
+	str	r6, [sp, #32]                   @ 4-byte Spill
+	ldr	r6, [r8, #12]
+	str	r6, [sp, #28]                   @ 4-byte Spill
+	mul	r6, r9, r3
+	ldr	r1, [r2, #4]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [r2]
+	ldr	r7, [r2, #8]
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	umull	r4, r5, r6, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	str	r3, [sp, #60]                   @ 4-byte Spill
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	umull	lr, r1, r6, r0
+	umull	r0, r12, r6, r7
+	adds	r4, r1, r4
+	adcs	r4, r5, r0
+	ldr	r4, [r2, #12]
+	str	r4, [sp, #48]                   @ 4-byte Spill
+	umull	r5, r3, r6, r4
+	ldr	r4, [r2, #16]
+	str	r4, [sp, #52]                   @ 4-byte Spill
+	adcs	r11, r12, r5
+	umull	r10, r12, r6, r4
+	adcs	r5, r3, r10
+	ldr	r3, [r2, #20]
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	umull	r4, r2, r6, r3
+	adcs	r12, r12, r4
+	ldr	r4, [sp, #68]                   @ 4-byte Reload
+	adc	r2, r2, #0
+	adds	r7, lr, r9
+	ldr	r7, [sp, #36]                   @ 4-byte Reload
+	umlal	r1, r0, r6, r4
+	ldr	lr, [sp, #48]                   @ 4-byte Reload
+	adcs	r9, r1, r7
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldr	r7, [r8, #16]
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r11, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r5, r7
+	ldr	r7, [r8, #20]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	adcs	r0, r12, r7
+	ldr	r7, [r8, #24]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r2, r7
+	ldr	r12, [sp, #60]                  @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mul	r0, r12, r9
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	umull	r2, r1, r0, r7
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	umull	r6, r2, r0, r4
+	adds	r1, r6, r1
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	umull	r6, r1, r0, r5
+	adcs	r2, r6, r2
+	umull	r6, r4, r0, lr
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r6, r1
+	umull	r6, r10, r0, r2
+	str	r1, [sp]                        @ 4-byte Spill
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	adcs	r11, r6, r4
+	umull	r6, r4, r0, r3
+	adcs	r0, r6, r10
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adc	r10, r6, r4
+	ldr	r4, [sp, #12]                   @ 4-byte Reload
+	adds	r6, r4, r9
+	ldr	r4, [sp, #36]                   @ 4-byte Reload
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	adcs	r6, r6, r4
+	ldr	r4, [sp, #32]                   @ 4-byte Reload
+	adcs	r4, r1, r4
+	str	r4, [sp, #36]                   @ 4-byte Spill
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp]                        @ 4-byte Reload
+	adcs	r4, r1, r4
+	str	r4, [sp, #32]                   @ 4-byte Spill
+	ldr	r4, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r11, r4
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	ldr	r4, [r8, #28]
+	adcs	r0, r0, r1
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r10, r4
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mul	r0, r12, r6
+	umull	r1, r4, r0, r7
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	umull	r7, r12, r0, r1
+	adds	r9, r7, r4
+	umull	r7, r11, r0, r5
+	adcs	r7, r7, r12
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	umull	r7, r10, r0, lr
+	adcs	r11, r7, r11
+	umull	r7, r12, r0, r2
+	adcs	r5, r7, r10
+	umull	r7, r2, r0, r3
+	mov	r10, r3
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r7, r12
+	ldr	r7, [sp, #8]                    @ 4-byte Reload
+	adc	r3, r3, r2
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	ldr	r12, [sp, #52]                  @ 4-byte Reload
+	adds	r2, r2, r6
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r9, r9, r2
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adcs	r2, r7, r2
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r2, r11, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	ldr	r5, [r8, #32]
+	adcs	r0, r0, r2
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r3, r5
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mul	r0, r3, r9
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	umull	r2, r6, r0, r5
+	umull	r7, r4, r0, r1
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	adds	r1, r7, r6
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	umull	r7, r2, r0, r1
+	adcs	r7, r7, r4
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	umull	r7, r4, r0, lr
+	adcs	r11, r7, r2
+	umull	r7, r6, r0, r12
+	adcs	r4, r7, r4
+	umull	r7, r2, r0, r10
+	mov	r10, r5
+	adcs	r0, r7, r6
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adc	r7, r6, r2
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	adds	r2, r2, r9
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r9, r6, r2
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r2, r11, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r2, r4, r2
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	ldr	r4, [r8, #36]
+	adcs	r0, r0, r2
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r7, r4
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mul	r0, r3, r9
+	umull	r2, r6, r0, r5
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	umull	r7, r3, r0, r5
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	adds	r11, r7, r6
+	umull	r7, r2, r0, r1
+	adcs	r6, r7, r3
+	umull	r7, r1, r0, lr
+	ldr	lr, [sp, #44]                   @ 4-byte Reload
+	adcs	r4, r7, r2
+	umull	r7, r2, r0, r12
+	adcs	r12, r7, r1
+	umull	r7, r1, r0, lr
+	adcs	r0, r7, r2
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adc	r1, r2, r1
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	adds	r2, r2, r9
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r3, r11, r2
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r2, r4, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	ldr	r4, [r8, #40]
+	adcs	r2, r12, r2
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	ldr	r12, [sp, #52]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r1, r4
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mul	r0, r1, r3
+	umull	r1, r6, r0, r10
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	umull	r7, r1, r0, r5
+	ldr	r5, [sp, #48]                   @ 4-byte Reload
+	adds	r11, r7, r6
+	ldr	r6, [sp, #64]                   @ 4-byte Reload
+	umull	r7, r2, r0, r6
+	adcs	r10, r7, r1
+	umull	r7, r1, r0, r5
+	adcs	r9, r7, r2
+	umull	r7, r2, r0, r12
+	adcs	r4, r7, r1
+	umull	r7, r1, r0, lr
+	adcs	r0, r7, r2
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adc	r1, r2, r1
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	adds	r2, r2, r3
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r2, r11, r2
+	adcs	lr, r10, r3
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adcs	r9, r9, r3
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	r10, r4, r3
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adcs	r11, r0, r3
+	ldr	r3, [r8, #44]
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	mov	r8, r2
+	adc	r1, r1, r3
+	subs	r3, r2, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	sbcs	r4, lr, r0
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	sbcs	r2, r9, r6
+	sbcs	r7, r10, r5
+	sbcs	r6, r11, r12
+	sbcs	r5, r1, r0
+	mov	r0, #0
+	sbc	r0, r0, #0
+	ands	r0, r0, #1
+	movne	r5, r1
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	movne	r6, r11
+	movne	r7, r10
+	cmp	r0, #0
+	movne	r2, r9
+	movne	r4, lr
+	movne	r3, r8
+	str	r5, [r1, #20]
+	str	r6, [r1, #16]
+	str	r7, [r1, #12]
+	str	r2, [r1, #8]
+	str	r4, [r1, #4]
+	str	r3, [r1]
+	add	sp, sp, #72
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end25:
-	.size	mcl_fp_montRed2L, .Lfunc_end25-mcl_fp_montRed2L
+.Lfunc_end11:
+	.size	mcl_fp_montRed6L, .Lfunc_end11-mcl_fp_montRed6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_addPre2L
-	.align	2
-	.type	mcl_fp_addPre2L,%function
-mcl_fp_addPre2L:                        @ @mcl_fp_addPre2L
+                                        @ -- End function
+	.globl	mcl_fp_montRedNF6L              @ -- Begin function mcl_fp_montRedNF6L
+	.p2align	2
+	.type	mcl_fp_montRedNF6L,%function
+	.code	32                              @ @mcl_fp_montRedNF6L
+mcl_fp_montRedNF6L:
 	.fnstart
-@ BB#0:
-	ldm	r1, {r3, r12}
-	ldm	r2, {r1, r2}
-	adds	r1, r1, r3
-	adcs	r2, r2, r12
-	stm	r0, {r1, r2}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#72
+	sub	sp, sp, #72
+	mov	r8, r1
+	ldr	r3, [r2, #-4]
+	ldr	r6, [r8, #4]
+	ldr	r9, [r8]
+	str	r6, [sp, #36]                   @ 4-byte Spill
+	ldr	r6, [r8, #8]
+	str	r6, [sp, #32]                   @ 4-byte Spill
+	ldr	r6, [r8, #12]
+	str	r6, [sp, #28]                   @ 4-byte Spill
+	mul	r6, r9, r3
+	ldr	r1, [r2, #4]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [r2]
+	ldr	r7, [r2, #8]
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	umull	r4, r5, r6, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	str	r3, [sp, #60]                   @ 4-byte Spill
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	umull	lr, r1, r6, r0
+	umull	r0, r12, r6, r7
+	adds	r4, r1, r4
+	adcs	r4, r5, r0
+	ldr	r4, [r2, #12]
+	str	r4, [sp, #48]                   @ 4-byte Spill
+	umull	r5, r3, r6, r4
+	ldr	r4, [r2, #16]
+	str	r4, [sp, #52]                   @ 4-byte Spill
+	adcs	r11, r12, r5
+	umull	r10, r12, r6, r4
+	adcs	r5, r3, r10
+	ldr	r3, [r2, #20]
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	umull	r4, r2, r6, r3
+	adcs	r12, r12, r4
+	ldr	r4, [sp, #68]                   @ 4-byte Reload
+	adc	r2, r2, #0
+	adds	r7, lr, r9
+	ldr	r7, [sp, #36]                   @ 4-byte Reload
+	umlal	r1, r0, r6, r4
+	ldr	lr, [sp, #48]                   @ 4-byte Reload
+	adcs	r9, r1, r7
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldr	r7, [r8, #16]
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r11, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r5, r7
+	ldr	r7, [r8, #20]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	adcs	r0, r12, r7
+	ldr	r7, [r8, #24]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r2, r7
+	ldr	r12, [sp, #60]                  @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mul	r0, r12, r9
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	umull	r2, r1, r0, r7
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	umull	r6, r2, r0, r4
+	adds	r1, r6, r1
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	umull	r6, r1, r0, r5
+	adcs	r2, r6, r2
+	umull	r6, r4, r0, lr
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r6, r1
+	umull	r6, r10, r0, r2
+	str	r1, [sp]                        @ 4-byte Spill
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	adcs	r11, r6, r4
+	umull	r6, r4, r0, r3
+	adcs	r0, r6, r10
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adc	r10, r6, r4
+	ldr	r4, [sp, #12]                   @ 4-byte Reload
+	adds	r6, r4, r9
+	ldr	r4, [sp, #36]                   @ 4-byte Reload
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	adcs	r6, r6, r4
+	ldr	r4, [sp, #32]                   @ 4-byte Reload
+	adcs	r4, r1, r4
+	str	r4, [sp, #36]                   @ 4-byte Spill
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp]                        @ 4-byte Reload
+	adcs	r4, r1, r4
+	str	r4, [sp, #32]                   @ 4-byte Spill
+	ldr	r4, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r11, r4
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	ldr	r4, [r8, #28]
+	adcs	r0, r0, r1
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r10, r4
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mul	r0, r12, r6
+	umull	r1, r4, r0, r7
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	umull	r7, r12, r0, r1
+	adds	r9, r7, r4
+	umull	r7, r11, r0, r5
+	adcs	r7, r7, r12
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	umull	r7, r10, r0, lr
+	adcs	r11, r7, r11
+	umull	r7, r12, r0, r2
+	adcs	r5, r7, r10
+	umull	r7, r2, r0, r3
+	mov	r10, r3
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r7, r12
+	ldr	r7, [sp, #8]                    @ 4-byte Reload
+	adc	r3, r3, r2
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	ldr	r12, [sp, #52]                  @ 4-byte Reload
+	adds	r2, r2, r6
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r9, r9, r2
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adcs	r2, r7, r2
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r2, r11, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	ldr	r5, [r8, #32]
+	adcs	r0, r0, r2
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r3, r5
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mul	r0, r3, r9
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	umull	r2, r6, r0, r5
+	umull	r7, r4, r0, r1
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	adds	r1, r7, r6
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	umull	r7, r2, r0, r1
+	adcs	r7, r7, r4
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	umull	r7, r4, r0, lr
+	adcs	r11, r7, r2
+	umull	r7, r6, r0, r12
+	adcs	r4, r7, r4
+	umull	r7, r2, r0, r10
+	mov	r10, r5
+	adcs	r0, r7, r6
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adc	r7, r6, r2
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	adds	r2, r2, r9
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r9, r6, r2
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r2, r11, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r2, r4, r2
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	ldr	r4, [r8, #36]
+	adcs	r0, r0, r2
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r7, r4
+	str	r0, [sp, #20]                   @ 4-byte Spill
 	mov	r0, #0
 	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mul	r0, r3, r9
+	umull	r2, r6, r0, r5
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	umull	r7, r3, r0, r5
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	adds	r11, r7, r6
+	umull	r7, r2, r0, r1
+	adcs	r6, r7, r3
+	umull	r7, r1, r0, lr
+	ldr	lr, [sp, #44]                   @ 4-byte Reload
+	adcs	r4, r7, r2
+	umull	r7, r2, r0, r12
+	adcs	r12, r7, r1
+	umull	r7, r1, r0, lr
+	adcs	r0, r7, r2
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adc	r1, r2, r1
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	adds	r2, r2, r9
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r3, r11, r2
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r2, r4, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	ldr	r4, [r8, #40]
+	adcs	r2, r12, r2
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	ldr	r12, [sp, #52]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r1, r4
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mul	r0, r1, r3
+	umull	r1, r6, r0, r10
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	umull	r7, r1, r0, r5
+	ldr	r5, [sp, #48]                   @ 4-byte Reload
+	adds	r11, r7, r6
+	ldr	r6, [sp, #64]                   @ 4-byte Reload
+	umull	r7, r2, r0, r6
+	adcs	r10, r7, r1
+	umull	r7, r1, r0, r5
+	adcs	r9, r7, r2
+	umull	r7, r2, r0, r12
+	adcs	r4, r7, r1
+	umull	r7, r1, r0, lr
+	adcs	r0, r7, r2
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adc	r1, r2, r1
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	adds	r2, r2, r3
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r2, r11, r2
+	adcs	lr, r10, r3
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adcs	r9, r9, r3
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	r10, r4, r3
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adcs	r11, r0, r3
+	ldr	r3, [r8, #44]
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	mov	r8, r2
+	adc	r1, r1, r3
+	subs	r3, r2, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	sbcs	r4, lr, r0
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	sbcs	r2, r9, r6
+	sbcs	r7, r10, r5
+	sbcs	r6, r11, r12
+	sbc	r5, r1, r0
+	asr	r0, r5, #31
+	cmn	r0, #1
+	movgt	r1, r5
+	ldr	r5, [sp, #40]                   @ 4-byte Reload
+	movle	r6, r11
+	movle	r7, r10
+	cmn	r0, #1
+	movle	r2, r9
+	movle	r4, lr
+	movle	r3, r8
+	str	r1, [r5, #20]
+	str	r6, [r5, #16]
+	str	r7, [r5, #12]
+	str	r2, [r5, #8]
+	str	r4, [r5, #4]
+	str	r3, [r5]
+	add	sp, sp, #72
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end26:
-	.size	mcl_fp_addPre2L, .Lfunc_end26-mcl_fp_addPre2L
+.Lfunc_end12:
+	.size	mcl_fp_montRedNF6L, .Lfunc_end12-mcl_fp_montRedNF6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_subPre2L
-	.align	2
-	.type	mcl_fp_subPre2L,%function
-mcl_fp_subPre2L:                        @ @mcl_fp_subPre2L
+                                        @ -- End function
+	.globl	mcl_fp_addPre6L                 @ -- Begin function mcl_fp_addPre6L
+	.p2align	2
+	.type	mcl_fp_addPre6L,%function
+	.code	32                              @ @mcl_fp_addPre6L
+mcl_fp_addPre6L:
 	.fnstart
-@ BB#0:
-	ldm	r2, {r3, r12}
-	ldr	r2, [r1]
-	ldr	r1, [r1, #4]
-	subs	r2, r2, r3
-	sbcs	r1, r1, r12
-	str	r2, [r0]
-	str	r1, [r0, #4]
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, lr}
+	push	{r4, r5, r6, r7, r8, lr}
+	ldm	r2, {r3, r12, lr}
+	ldm	r1, {r5, r6, r7}
+	adds	r3, r5, r3
+	str	r3, [r0]
+	adcs	r3, r6, r12
+	ldr	r8, [r2, #12]
+	ldr	r4, [r1, #12]
+	str	r3, [r0, #4]
+	adcs	r3, r7, lr
+	str	r3, [r0, #8]
+	adcs	r3, r4, r8
+	str	r3, [r0, #12]
+	ldr	r3, [r2, #16]
+	ldr	r7, [r1, #16]
+	ldr	r2, [r2, #20]
+	ldr	r1, [r1, #20]
+	adcs	r3, r7, r3
+	str	r3, [r0, #16]
+	adcs	r1, r1, r2
+	str	r1, [r0, #20]
+	mov	r0, #0
+	adc	r0, r0, #0
+	pop	{r4, r5, r6, r7, r8, lr}
+	mov	pc, lr
+.Lfunc_end13:
+	.size	mcl_fp_addPre6L, .Lfunc_end13-mcl_fp_addPre6L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_subPre6L                 @ -- Begin function mcl_fp_subPre6L
+	.p2align	2
+	.type	mcl_fp_subPre6L,%function
+	.code	32                              @ @mcl_fp_subPre6L
+mcl_fp_subPre6L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, lr}
+	push	{r4, r5, r6, r7, r8, lr}
+	ldm	r2, {r3, r12, lr}
+	ldm	r1, {r5, r6, r7}
+	subs	r3, r5, r3
+	str	r3, [r0]
+	sbcs	r3, r6, r12
+	ldr	r8, [r2, #12]
+	ldr	r4, [r1, #12]
+	str	r3, [r0, #4]
+	sbcs	r3, r7, lr
+	str	r3, [r0, #8]
+	sbcs	r3, r4, r8
+	str	r3, [r0, #12]
+	ldr	r3, [r2, #16]
+	ldr	r7, [r1, #16]
+	ldr	r2, [r2, #20]
+	ldr	r1, [r1, #20]
+	sbcs	r3, r7, r3
+	str	r3, [r0, #16]
+	sbcs	r1, r1, r2
+	str	r1, [r0, #20]
 	mov	r0, #0
 	sbc	r0, r0, #0
 	and	r0, r0, #1
+	pop	{r4, r5, r6, r7, r8, lr}
 	mov	pc, lr
-.Lfunc_end27:
-	.size	mcl_fp_subPre2L, .Lfunc_end27-mcl_fp_subPre2L
+.Lfunc_end14:
+	.size	mcl_fp_subPre6L, .Lfunc_end14-mcl_fp_subPre6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_shr1_2L
-	.align	2
-	.type	mcl_fp_shr1_2L,%function
-mcl_fp_shr1_2L:                         @ @mcl_fp_shr1_2L
+                                        @ -- End function
+	.globl	mcl_fp_shr1_6L                  @ -- Begin function mcl_fp_shr1_6L
+	.p2align	2
+	.type	mcl_fp_shr1_6L,%function
+	.code	32                              @ @mcl_fp_shr1_6L
+mcl_fp_shr1_6L:
 	.fnstart
-@ BB#0:
-	ldr	r2, [r1]
-	ldr	r1, [r1, #4]
-	lsrs	r3, r1, #1
-	lsr	r1, r1, #1
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r11, lr}
+	push	{r4, r5, r6, r7, r11, lr}
+	add	r4, r1, #8
+	ldm	r1, {r12, lr}
+	lsr	r6, lr, #1
+	ldr	r1, [r1, #20]
+	ldm	r4, {r2, r3, r4}
+	lsr	r5, r3, #1
+	orr	r5, r5, r4, lsl #31
+	lsr	r7, r1, #1
+	lsrs	r1, r1, #1
+	rrx	r1, r4
+	lsrs	r3, r3, #1
+	orr	r6, r6, r2, lsl #31
 	rrx	r2, r2
-	str	r2, [r0]
-	str	r1, [r0, #4]
+	lsrs	r3, lr, #1
+	rrx	r3, r12
+	stm	r0, {r3, r6}
+	str	r2, [r0, #8]
+	str	r5, [r0, #12]
+	str	r1, [r0, #16]
+	str	r7, [r0, #20]
+	pop	{r4, r5, r6, r7, r11, lr}
 	mov	pc, lr
-.Lfunc_end28:
-	.size	mcl_fp_shr1_2L, .Lfunc_end28-mcl_fp_shr1_2L
+.Lfunc_end15:
+	.size	mcl_fp_shr1_6L, .Lfunc_end15-mcl_fp_shr1_6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_add2L
-	.align	2
-	.type	mcl_fp_add2L,%function
-mcl_fp_add2L:                           @ @mcl_fp_add2L
+                                        @ -- End function
+	.globl	mcl_fp_add6L                    @ -- Begin function mcl_fp_add6L
+	.p2align	2
+	.type	mcl_fp_add6L,%function
+	.code	32                              @ @mcl_fp_add6L
+mcl_fp_add6L:
 	.fnstart
-@ BB#0:
-	.save	{r4, lr}
-	push	{r4, lr}
-	ldm	r1, {r12, lr}
-	ldm	r2, {r1, r2}
-	adds	r12, r1, r12
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	ldm	r2, {r8, r12, lr}
+	ldm	r1, {r6, r7}
+	adds	r10, r6, r8
+	ldr	r4, [r1, #8]
+	adcs	r11, r7, r12
+	ldr	r9, [r2, #12]
+	ldr	r5, [r1, #12]
+	adcs	r12, r4, lr
+	ldr	r4, [r2, #16]
+	adcs	lr, r5, r9
+	ldr	r5, [r1, #16]
+	ldr	r2, [r2, #20]
+	ldr	r1, [r1, #20]
+	adcs	r4, r5, r4
+	stm	r0, {r10, r11, r12, lr}
+	adcs	r5, r1, r2
 	mov	r1, #0
-	adcs	r2, r2, lr
-	str	r12, [r0]
+	ldr	r2, [r3]
+	adc	r8, r1, #0
+	ldmib	r3, {r1, r6, r7, r9}
+	subs	r10, r10, r2
+	sbcs	r2, r11, r1
+	ldr	r3, [r3, #20]
+	sbcs	r1, r12, r6
+	str	r4, [r0, #16]
+	sbcs	r12, lr, r7
+	str	r5, [r0, #20]
+	sbcs	lr, r4, r9
+	sbcs	r4, r5, r3
+	sbc	r3, r8, #0
+	tst	r3, #1
+	bne	.LBB16_2
+@ %bb.1:                                @ %nocarry
 	str	r2, [r0, #4]
-	adc	lr, r1, #0
-	ldm	r3, {r1, r4}
-	subs	r3, r12, r1
-	sbcs	r2, r2, r4
-	sbc	r1, lr, #0
-	tst	r1, #1
-	streq	r3, [r0]
-	streq	r2, [r0, #4]
-	pop	{r4, lr}
+	add	r2, r0, #8
+	str	r10, [r0]
+	stm	r2, {r1, r12, lr}
+	str	r4, [r0, #20]
+.LBB16_2:                               @ %carry
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end29:
-	.size	mcl_fp_add2L, .Lfunc_end29-mcl_fp_add2L
+.Lfunc_end16:
+	.size	mcl_fp_add6L, .Lfunc_end16-mcl_fp_add6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_addNF2L
-	.align	2
-	.type	mcl_fp_addNF2L,%function
-mcl_fp_addNF2L:                         @ @mcl_fp_addNF2L
+                                        @ -- End function
+	.globl	mcl_fp_addNF6L                  @ -- Begin function mcl_fp_addNF6L
+	.p2align	2
+	.type	mcl_fp_addNF6L,%function
+	.code	32                              @ @mcl_fp_addNF6L
+mcl_fp_addNF6L:
 	.fnstart
-@ BB#0:
-	.save	{r4, lr}
-	push	{r4, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	add	r11, r1, #8
 	ldm	r1, {r12, lr}
-	ldm	r2, {r1, r2}
-	adds	r1, r1, r12
-	adc	r4, r2, lr
-	ldm	r3, {r12, lr}
-	subs	r3, r1, r12
-	sbc	r2, r4, lr
-	cmp	r2, #0
-	movlt	r3, r1
-	movlt	r2, r4
-	str	r3, [r0]
-	str	r2, [r0, #4]
-	pop	{r4, lr}
+	ldm	r2, {r1, r4, r5, r6, r7}
+	adds	r12, r1, r12
+	ldm	r11, {r8, r9, r10, r11}
+	adcs	lr, r4, lr
+	adcs	r8, r5, r8
+	ldr	r2, [r2, #20]
+	adcs	r9, r6, r9
+	adcs	r7, r7, r10
+	adc	r2, r2, r11
+	ldm	r3, {r1, r4, r5, r6, r10, r11}
+	subs	r1, r12, r1
+	sbcs	r4, lr, r4
+	sbcs	r5, r8, r5
+	sbcs	r6, r9, r6
+	sbcs	r3, r7, r10
+	sbc	r10, r2, r11
+	asr	r11, r10, #31
+	cmn	r11, #1
+	movgt	r2, r10
+	movle	r3, r7
+	movle	r6, r9
+	cmn	r11, #1
+	movle	r5, r8
+	movle	r4, lr
+	movle	r1, r12
+	str	r2, [r0, #20]
+	str	r3, [r0, #16]
+	str	r6, [r0, #12]
+	str	r5, [r0, #8]
+	str	r4, [r0, #4]
+	str	r1, [r0]
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end30:
-	.size	mcl_fp_addNF2L, .Lfunc_end30-mcl_fp_addNF2L
+.Lfunc_end17:
+	.size	mcl_fp_addNF6L, .Lfunc_end17-mcl_fp_addNF6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_sub2L
-	.align	2
-	.type	mcl_fp_sub2L,%function
-mcl_fp_sub2L:                           @ @mcl_fp_sub2L
+                                        @ -- End function
+	.globl	mcl_fp_sub6L                    @ -- Begin function mcl_fp_sub6L
+	.p2align	2
+	.type	mcl_fp_sub6L,%function
+	.code	32                              @ @mcl_fp_sub6L
+mcl_fp_sub6L:
 	.fnstart
-@ BB#0:
-	.save	{r4, lr}
-	push	{r4, lr}
-	ldm	r2, {r12, lr}
-	ldm	r1, {r2, r4}
-	subs	r1, r2, r12
-	sbcs	r2, r4, lr
-	mov	r4, #0
-	sbc	r4, r4, #0
-	stm	r0, {r1, r2}
-	tst	r4, #1
-	popeq	{r4, lr}
-	moveq	pc, lr
-	ldr	r4, [r3]
-	ldr	r3, [r3, #4]
-	adds	r1, r4, r1
-	adc	r2, r3, r2
-	stm	r0, {r1, r2}
-	pop	{r4, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r11, lr}
+	ldr	r9, [r2]
+	ldm	r1, {r4, r7}
+	ldmib	r2, {r8, lr}
+	subs	r4, r4, r9
+	ldr	r5, [r1, #8]
+	sbcs	r7, r7, r8
+	ldr	r12, [r2, #12]
+	ldr	r6, [r1, #12]
+	sbcs	r8, r5, lr
+	ldr	r5, [r1, #16]
+	sbcs	r12, r6, r12
+	ldr	r6, [r2, #16]
+	ldr	r2, [r2, #20]
+	ldr	r1, [r1, #20]
+	sbcs	lr, r5, r6
+	stm	r0, {r4, r7, r8, r12, lr}
+	sbcs	r1, r1, r2
+	mov	r2, #0
+	str	r1, [r0, #20]
+	sbc	r2, r2, #0
+	tst	r2, #1
+	bne	.LBB18_2
+@ %bb.1:                                @ %nocarry
+	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
 	mov	pc, lr
-.Lfunc_end31:
-	.size	mcl_fp_sub2L, .Lfunc_end31-mcl_fp_sub2L
+.LBB18_2:                               @ %carry
+	ldm	r3, {r2, r5, r6, r9}
+	adds	r2, r2, r4
+	str	r2, [r0]
+	adcs	r2, r5, r7
+	str	r2, [r0, #4]
+	adcs	r2, r6, r8
+	str	r2, [r0, #8]
+	adcs	r2, r9, r12
+	str	r2, [r0, #12]
+	ldr	r2, [r3, #16]
+	adcs	r2, r2, lr
+	str	r2, [r0, #16]
+	ldr	r2, [r3, #20]
+	adc	r1, r2, r1
+	str	r1, [r0, #20]
+	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
+	mov	pc, lr
+.Lfunc_end18:
+	.size	mcl_fp_sub6L, .Lfunc_end18-mcl_fp_sub6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_subNF2L
-	.align	2
-	.type	mcl_fp_subNF2L,%function
-mcl_fp_subNF2L:                         @ @mcl_fp_subNF2L
+                                        @ -- End function
+	.globl	mcl_fp_subNF6L                  @ -- Begin function mcl_fp_subNF6L
+	.p2align	2
+	.type	mcl_fp_subNF6L,%function
+	.code	32                              @ @mcl_fp_subNF6L
+mcl_fp_subNF6L:
 	.fnstart
-@ BB#0:
-	.save	{r4, lr}
-	push	{r4, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	add	r11, r2, #8
 	ldm	r2, {r12, lr}
-	ldr	r2, [r1]
-	ldr	r1, [r1, #4]
-	subs	r4, r2, r12
-	sbc	r1, r1, lr
-	ldm	r3, {r12, lr}
-	adds	r3, r4, r12
-	adc	r2, r1, lr
-	cmp	r1, #0
-	movge	r3, r4
-	movge	r2, r1
-	str	r3, [r0]
+	ldm	r1, {r2, r4, r5, r6, r7}
+	subs	r12, r2, r12
+	ldm	r11, {r8, r9, r10, r11}
+	sbcs	lr, r4, lr
+	sbcs	r8, r5, r8
+	ldr	r1, [r1, #20]
+	sbcs	r9, r6, r9
+	ldr	r4, [r3]
+	sbcs	r7, r7, r10
+	sbc	r1, r1, r11
+	ldmib	r3, {r2, r5, r6, r10, r11}
+	adds	r4, r12, r4
+	adcs	r2, lr, r2
+	adcs	r5, r8, r5
+	adcs	r6, r9, r6
+	adcs	r10, r7, r10
+	adc	r3, r1, r11
+	asr	r11, r1, #31
+	cmp	r11, #0
+	movpl	r3, r1
+	movpl	r10, r7
+	movpl	r6, r9
+	cmp	r11, #0
+	movpl	r5, r8
+	movpl	r2, lr
+	movpl	r4, r12
+	str	r3, [r0, #20]
+	str	r10, [r0, #16]
+	str	r6, [r0, #12]
+	str	r5, [r0, #8]
 	str	r2, [r0, #4]
-	pop	{r4, lr}
+	str	r4, [r0]
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end32:
-	.size	mcl_fp_subNF2L, .Lfunc_end32-mcl_fp_subNF2L
+.Lfunc_end19:
+	.size	mcl_fp_subNF6L, .Lfunc_end19-mcl_fp_subNF6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_add2L
-	.align	2
-	.type	mcl_fpDbl_add2L,%function
-mcl_fpDbl_add2L:                        @ @mcl_fpDbl_add2L
+                                        @ -- End function
+	.globl	mcl_fpDbl_add6L                 @ -- Begin function mcl_fpDbl_add6L
+	.p2align	2
+	.type	mcl_fpDbl_add6L,%function
+	.code	32                              @ @mcl_fpDbl_add6L
+mcl_fpDbl_add6L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r11, lr}
-	push	{r4, r5, r6, r7, r11, lr}
-	ldm	r1, {r12, lr}
-	ldr	r4, [r1, #8]
-	ldr	r1, [r1, #12]
-	ldm	r2, {r5, r6, r7}
-	ldr	r2, [r2, #12]
-	adds	r5, r5, r12
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#28
+	sub	sp, sp, #28
+	ldm	r2, {r8, r9, lr}
+	ldm	r1, {r4, r5, r6, r7}
+	adds	r4, r4, r8
+	str	r4, [sp, #24]                   @ 4-byte Spill
+	adcs	r4, r5, r9
+	ldr	r12, [r2, #12]
 	adcs	r6, r6, lr
-	str	r5, [r0]
-	adcs	r7, r7, r4
-	str	r6, [r0, #4]
-	mov	r6, #0
-	adcs	r1, r2, r1
-	adc	r2, r6, #0
-	ldr	r6, [r3]
-	ldr	r3, [r3, #4]
-	subs	r6, r7, r6
-	sbcs	r3, r1, r3
-	sbc	r2, r2, #0
-	ands	r2, r2, #1
-	movne	r6, r7
-	movne	r3, r1
-	str	r6, [r0, #8]
-	str	r3, [r0, #12]
-	pop	{r4, r5, r6, r7, r11, lr}
-	mov	pc, lr
-.Lfunc_end33:
-	.size	mcl_fpDbl_add2L, .Lfunc_end33-mcl_fpDbl_add2L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub2L
-	.align	2
-	.type	mcl_fpDbl_sub2L,%function
-mcl_fpDbl_sub2L:                        @ @mcl_fpDbl_sub2L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r11, lr}
-	push	{r4, r5, r6, r7, r11, lr}
-	ldm	r2, {r12, lr}
-	ldr	r4, [r2, #8]
-	ldr	r2, [r2, #12]
-	ldm	r1, {r5, r6, r7}
-	ldr	r1, [r1, #12]
-	subs	r5, r5, r12
-	sbcs	r6, r6, lr
-	str	r5, [r0]
-	sbcs	r7, r7, r4
+	str	r4, [sp, #20]                   @ 4-byte Spill
+	str	r6, [sp, #16]                   @ 4-byte Spill
+	adcs	r7, r7, r12
+	ldr	r4, [r2, #16]
+	add	lr, r1, #32
+	ldr	r6, [r1, #16]
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	adcs	r7, r6, r4
+	ldr	r5, [r2, #20]
+	ldr	r6, [r1, #20]
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	adcs	r7, r6, r5
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	ldr	r6, [r2, #24]
+	ldr	r7, [r1, #24]
+	ldr	r4, [r2, #40]
+	adcs	r11, r7, r6
+	ldr	r6, [r1, #28]
+	ldr	r7, [r2, #28]
+	ldr	r5, [r2, #44]
+	adcs	r10, r6, r7
+	ldr	r6, [r2, #32]
+	ldr	r7, [r2, #36]
+	ldm	lr, {r2, r12, lr}
+	adcs	r8, r2, r6
+	ldr	r1, [r1, #44]
+	adcs	r7, r12, r7
+	mov	r2, #0
+	adcs	lr, lr, r4
+	ldr	r4, [r3, #8]
+	adcs	r12, r1, r5
+	ldr	r6, [sp, #20]                   @ 4-byte Reload
+	adc	r9, r2, #0
+	ldm	r3, {r2, r5}
+	subs	r2, r11, r2
+	str	r11, [sp]                       @ 4-byte Spill
+	sbcs	r5, r10, r5
+	ldr	r11, [sp, #24]                  @ 4-byte Reload
+	ldr	r1, [r3, #12]
+	sbcs	r4, r8, r4
+	str	r11, [r0]
 	str	r6, [r0, #4]
-	mov	r6, #0
-	sbcs	r1, r1, r2
-	sbc	r2, r6, #0
-	ldr	r6, [r3]
-	ldr	r3, [r3, #4]
-	adds	r6, r7, r6
-	adc	r3, r1, r3
-	ands	r2, r2, #1
-	moveq	r6, r7
-	moveq	r3, r1
+	sbcs	r1, r7, r1
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	ldr	r11, [r3, #20]
+	ldr	r3, [r3, #16]
 	str	r6, [r0, #8]
-	str	r3, [r0, #12]
-	pop	{r4, r5, r6, r7, r11, lr}
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	sbcs	r3, lr, r3
+	str	r6, [r0, #12]
+	sbcs	r11, r12, r11
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	str	r6, [r0, #16]
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	str	r6, [r0, #20]
+	sbc	r6, r9, #0
+	ands	r6, r6, #1
+	movne	r11, r12
+	movne	r3, lr
+	movne	r1, r7
+	cmp	r6, #0
+	movne	r4, r8
+	add	r12, r0, #36
+	str	r4, [r0, #32]
+	movne	r5, r10
+	stm	r12, {r1, r3, r11}
+	ldr	r1, [sp]                        @ 4-byte Reload
+	str	r5, [r0, #28]
+	movne	r2, r1
+	str	r2, [r0, #24]
+	add	sp, sp, #28
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end34:
-	.size	mcl_fpDbl_sub2L, .Lfunc_end34-mcl_fpDbl_sub2L
+.Lfunc_end20:
+	.size	mcl_fpDbl_add6L, .Lfunc_end20-mcl_fpDbl_add6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_mulUnitPre3L
-	.align	2
-	.type	mcl_fp_mulUnitPre3L,%function
-mcl_fp_mulUnitPre3L:                    @ @mcl_fp_mulUnitPre3L
+                                        @ -- End function
+	.globl	mcl_fpDbl_sub6L                 @ -- Begin function mcl_fpDbl_sub6L
+	.p2align	2
+	.type	mcl_fpDbl_sub6L,%function
+	.code	32                              @ @mcl_fpDbl_sub6L
+mcl_fpDbl_sub6L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, lr}
-	push	{r4, r5, r6, r7, r8, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, r5}
-	umull	lr, r4, r12, r2
-	umull	r1, r12, r5, r2
-	umull	r7, r8, r3, r2
-	mov	r5, r1
-	mov	r6, r4
-	str	lr, [r0]
-	umlal	r6, r5, r3, r2
-	adds	r2, r4, r7
-	adcs	r1, r8, r1
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#56
+	sub	sp, sp, #56
+	ldr	r7, [r2, #32]
+	add	r8, r1, #12
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [r2, #36]
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	ldr	r7, [r2, #40]
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r7, [r2, #44]
+	str	r7, [sp, #52]                   @ 4-byte Spill
+	ldr	r7, [r1, #44]
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r7, [r1, #40]
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [r2, #8]
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r7, [r2, #16]
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	ldr	r7, [r2, #20]
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	ldr	r7, [r2, #24]
+	ldm	r2, {r9, r10}
+	ldr	r11, [r2, #12]
+	ldr	r2, [r2, #28]
+	str	r2, [sp]                        @ 4-byte Spill
+	ldm	r1, {r2, r12, lr}
+	subs	r2, r2, r9
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	sbcs	r2, r12, r10
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	str	r7, [sp, #20]                   @ 4-byte Spill
+	ldm	r8, {r4, r5, r6, r7, r8}
+	sbcs	r2, lr, r2
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	sbcs	r2, r4, r11
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	ldr	r9, [r1, #36]
+	sbcs	r2, r5, r2
+	str	r2, [sp, #8]                    @ 4-byte Spill
+	ldr	r2, [sp, #4]                    @ 4-byte Reload
+	mov	r5, #0
+	ldr	r1, [r1, #32]
+	sbcs	r2, r6, r2
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	ldr	r11, [sp, #28]                  @ 4-byte Reload
+	sbcs	r6, r7, r2
+	ldr	r2, [sp]                        @ 4-byte Reload
+	ldr	r7, [sp, #40]                   @ 4-byte Reload
+	sbcs	r10, r8, r2
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	sbcs	r8, r1, r7
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	str	r11, [r0]
+	sbcs	r4, r9, r1
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	ldr	r11, [r3, #20]
+	sbcs	r7, r2, r1
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	str	r6, [sp, #20]                   @ 4-byte Spill
+	sbcs	lr, r2, r1
+	ldmib	r3, {r1, r2, r12}
+	sbc	r9, r5, #0
+	ldr	r5, [r3]
+	ldr	r3, [r3, #16]
+	adds	r5, r6, r5
+	ldr	r6, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r10, r1
 	str	r6, [r0, #4]
-	str	r5, [r0, #8]
-	adc	r1, r12, #0
-	str	r1, [r0, #12]
-	pop	{r4, r5, r6, r7, r8, lr}
+	adcs	r2, r8, r2
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adcs	r12, r4, r12
+	str	r6, [r0, #8]
+	adcs	r3, r7, r3
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	adc	r11, lr, r11
+	ands	r9, r9, #1
+	moveq	r11, lr
+	moveq	r3, r7
+	moveq	r12, r4
+	cmp	r9, #0
+	moveq	r1, r10
+	str	r6, [r0, #12]
+	str	r1, [r0, #28]
+	moveq	r2, r8
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	str	r6, [r0, #16]
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	moveq	r5, r1
+	str	r6, [r0, #20]
+	str	r2, [r0, #32]
+	str	r12, [r0, #36]
+	str	r3, [r0, #40]
+	str	r11, [r0, #44]
+	str	r5, [r0, #24]
+	add	sp, sp, #56
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end35:
-	.size	mcl_fp_mulUnitPre3L, .Lfunc_end35-mcl_fp_mulUnitPre3L
+.Lfunc_end21:
+	.size	mcl_fpDbl_sub6L, .Lfunc_end21-mcl_fpDbl_sub6L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_mulPre3L
-	.align	2
-	.type	mcl_fpDbl_mulPre3L,%function
-mcl_fpDbl_mulPre3L:                     @ @mcl_fpDbl_mulPre3L
+                                        @ -- End function
+	.globl	mulPv224x32                     @ -- Begin function mulPv224x32
+	.p2align	2
+	.type	mulPv224x32,%function
+	.code	32                              @ @mulPv224x32
+mulPv224x32:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldr	r3, [r2]
-	ldm	r1, {r12, lr}
-	ldr	r1, [r1, #8]
-	umull	r4, r5, r12, r3
-	str	r4, [r0]
-	umull	r4, r6, lr, r3
-	adds	r4, r5, r4
-	umull	r7, r4, r1, r3
-	adcs	r6, r6, r7
-	umlal	r5, r7, lr, r3
-	ldr	r3, [r2, #4]
-	ldr	r2, [r2, #8]
-	adc	r8, r4, #0
-	umull	r6, r10, r12, r3
-	adds	r9, r6, r5
-	umull	r6, r5, lr, r3
-	adcs	r6, r6, r7
-	umull	r7, r4, r1, r3
-	str	r9, [r0, #4]
-	adcs	r3, r7, r8
-	mov	r8, #0
-	adc	r7, r8, #0
-	adds	r6, r6, r10
-	adcs	r11, r3, r5
-	umull	r5, r9, r1, r2
-	umull	r1, r10, lr, r2
-	adc	r4, r7, r4
-	umull	r7, r3, r12, r2
-	adds	r2, r6, r7
-	adcs	r1, r11, r1
-	str	r2, [r0, #8]
-	adcs	r2, r4, r5
-	adc	r7, r8, #0
-	adds	r1, r1, r3
-	str	r1, [r0, #12]
-	adcs	r1, r2, r10
-	str	r1, [r0, #16]
-	adc	r1, r7, r9
-	str	r1, [r0, #20]
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r11, lr}
+	ldr	r12, [r1]
+	ldmib	r1, {r3, lr}
+	umull	r7, r6, r12, r2
+	ldr	r4, [r1, #12]
+	umull	r5, r8, r3, r2
+	str	r7, [r0]
+	umull	r9, r12, r4, r2
+	adds	r7, r6, r5
+	umull	r5, r4, lr, r2
+	adcs	r7, r8, r5
+	umlal	r6, r5, r3, r2
+	adcs	r7, r4, r9
+	str	r7, [r0, #12]
+	str	r5, [r0, #8]
+	str	r6, [r0, #4]
+	ldr	r3, [r1, #16]
+	umull	r7, r6, r3, r2
+	adcs	r3, r12, r7
+	str	r3, [r0, #16]
+	ldr	r3, [r1, #20]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #20]
+	ldr	r1, [r1, #24]
+	umull	r3, r7, r1, r2
+	adcs	r1, r5, r3
+	str	r1, [r0, #24]
+	adc	r1, r7, #0
+	str	r1, [r0, #28]
+	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
 	mov	pc, lr
-.Lfunc_end36:
-	.size	mcl_fpDbl_mulPre3L, .Lfunc_end36-mcl_fpDbl_mulPre3L
+.Lfunc_end22:
+	.size	mulPv224x32, .Lfunc_end22-mulPv224x32
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_sqrPre3L
-	.align	2
-	.type	mcl_fpDbl_sqrPre3L,%function
-mcl_fpDbl_sqrPre3L:                     @ @mcl_fpDbl_sqrPre3L
+                                        @ -- End function
+	.globl	mcl_fp_mulUnitPre7L             @ -- Begin function mcl_fp_mulUnitPre7L
+	.p2align	2
+	.type	mcl_fp_mulUnitPre7L,%function
+	.code	32                              @ @mcl_fp_mulUnitPre7L
+mcl_fp_mulUnitPre7L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, lr}
-	ldm	r1, {r2, r3, r12}
-	mov	r10, #0
-	umull	r1, lr, r2, r2
-	umull	r7, r4, r3, r2
-	str	r1, [r0]
-	umull	r1, r8, r12, r2
-	mov	r5, lr
-	mov	r6, r1
-	umlal	r5, r6, r3, r2
-	adds	r2, lr, r7
-	adcs	r2, r4, r1
-	adc	r2, r8, #0
-	adds	lr, r5, r7
-	umull	r5, r9, r3, r3
-	adcs	r5, r6, r5
-	umull	r6, r7, r12, r3
-	str	lr, [r0, #4]
-	adcs	r2, r2, r6
-	adc	r3, r10, #0
-	adds	r4, r5, r4
-	adcs	r2, r2, r9
-	adc	r3, r3, r7
-	adds	r1, r4, r1
-	umull	r5, r4, r12, r12
-	str	r1, [r0, #8]
-	adcs	r1, r2, r6
-	adcs	r2, r3, r5
-	adc	r3, r10, #0
-	adds	r1, r1, r8
-	str	r1, [r0, #12]
-	adcs	r1, r2, r7
-	str	r1, [r0, #16]
-	adc	r1, r3, r4
-	str	r1, [r0, #20]
-	pop	{r4, r5, r6, r7, r8, r9, r10, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r11, lr}
+	ldr	r12, [r1]
+	ldmib	r1, {r3, lr}
+	umull	r7, r6, r12, r2
+	ldr	r4, [r1, #12]
+	umull	r5, r8, r3, r2
+	str	r7, [r0]
+	umull	r9, r12, r4, r2
+	adds	r7, r6, r5
+	umull	r5, r4, lr, r2
+	adcs	r7, r8, r5
+	umlal	r6, r5, r3, r2
+	ldr	r3, [r1, #16]
+	adcs	r7, r4, r9
+	str	r7, [r0, #12]
+	str	r6, [r0, #4]
+	umull	r7, r6, r3, r2
+	str	r5, [r0, #8]
+	adcs	r3, r12, r7
+	str	r3, [r0, #16]
+	ldr	r3, [r1, #20]
+	ldr	r1, [r1, #24]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #20]
+	umull	r3, r7, r1, r2
+	adcs	r1, r5, r3
+	str	r1, [r0, #24]
+	adc	r1, r7, #0
+	str	r1, [r0, #28]
+	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
 	mov	pc, lr
-.Lfunc_end37:
-	.size	mcl_fpDbl_sqrPre3L, .Lfunc_end37-mcl_fpDbl_sqrPre3L
+.Lfunc_end23:
+	.size	mcl_fp_mulUnitPre7L, .Lfunc_end23-mcl_fp_mulUnitPre7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_mont3L
-	.align	2
-	.type	mcl_fp_mont3L,%function
-mcl_fp_mont3L:                          @ @mcl_fp_mont3L
+                                        @ -- End function
+	.globl	mcl_fpDbl_mulPre7L              @ -- Begin function mcl_fpDbl_mulPre7L
+	.p2align	2
+	.type	mcl_fpDbl_mulPre7L,%function
+	.code	32                              @ @mcl_fpDbl_mulPre7L
+mcl_fpDbl_mulPre7L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#44
-	sub	sp, sp, #44
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r2, {r8, lr}
-	ldr	r0, [r2, #8]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldmib	r1, {r4, r9}
-	ldr	r2, [r3, #-4]
-	umull	r7, r6, r0, r8
-	ldr	r0, [r3]
-	ldr	r1, [r3, #8]
-	ldr	r10, [r3, #4]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	mul	r5, r7, r2
-	str	r2, [sp, #16]           @ 4-byte Spill
-	str	r9, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #40]           @ 4-byte Spill
-	str	r1, [sp, #28]           @ 4-byte Spill
-	umull	r12, r2, r5, r1
-	umull	r1, r3, r5, r0
-	umull	r0, r7, r9, r8
-	umull	r11, r9, r4, r8
-	str	r7, [sp]                @ 4-byte Spill
-	adds	r7, r6, r11
-	str	r1, [sp, #8]            @ 4-byte Spill
-	mov	r1, r3
-	str	r2, [sp, #4]            @ 4-byte Spill
-	mov	r2, r12
-	adcs	r7, r9, r0
-	umlal	r1, r2, r5, r10
-	umlal	r6, r0, r4, r8
-	mov	r8, #0
-	ldr	r7, [sp]                @ 4-byte Reload
-	adc	r9, r7, #0
-	umull	r7, r11, r5, r10
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	adds	r3, r3, r7
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adcs	r3, r11, r12
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	adc	r3, r3, #0
-	adds	r7, r5, r7
-	adcs	r11, r1, r6
-	adcs	r12, r2, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r9, r3, r9
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	adc	r8, r8, #0
-	umull	r6, r7, lr, r0
-	umull	r5, r0, lr, r4
-	umull	r1, r2, lr, r3
-	adds	r5, r2, r5
-	adcs	r0, r0, r6
-	umlal	r2, r6, lr, r4
-	adc	r0, r7, #0
-	adds	r1, r11, r1
-	ldr	r11, [sp, #16]          @ 4-byte Reload
-	adcs	r2, r12, r2
-	ldr	r12, [sp, #28]          @ 4-byte Reload
-	str	r2, [sp, #12]           @ 4-byte Spill
-	adcs	r2, r9, r6
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #4]            @ 4-byte Spill
-	mov	r0, #0
-	mul	r6, r1, r11
-	adc	r0, r0, #0
-	umull	r7, r9, r6, r12
-	str	r0, [sp]                @ 4-byte Spill
-	mov	r5, r7
-	umull	r8, r0, r6, r2
-	umull	lr, r2, r6, r10
-	mov	r3, r0
-	adds	r0, r0, lr
-	ldr	lr, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r2, r7
-	umlal	r3, r5, r6, r10
-	adc	r0, r9, #0
-	adds	r1, r8, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r1, r3, r1
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r8, r5, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r9, r0, r1
-	ldr	r0, [sp]                @ 4-byte Reload
-	umull	r1, r2, r3, lr
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	umull	r6, r7, r3, r0
-	umull	r5, r0, r3, r4
-	adds	r5, r2, r5
-	adcs	r0, r0, r6
-	umlal	r2, r6, r3, r4
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	adc	r0, r7, #0
-	adds	r1, r3, r1
-	adcs	r2, r8, r2
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r9, r9, r6
-	mul	r6, r1, r11
-	umull	r7, r4, r6, r12
-	ldr	r12, [sp, #40]          @ 4-byte Reload
-	mov	r5, r7
-	adcs	r0, r2, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	umull	r11, r3, r6, r12
-	adc	r8, r0, #0
-	umull	r0, lr, r6, r10
-	mov	r2, r3
-	adds	r0, r3, r0
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	umlal	r2, r5, r6, r10
-	adcs	r0, lr, r7
-	adc	r0, r4, #0
-	adds	r1, r11, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
+	.pad	#116
+	sub	sp, sp, #116
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	str	r2, [sp, #88]                   @ 4-byte Spill
+	ldm	r2, {r0, r10}
+	ldr	r4, [r2, #8]
+	ldr	r9, [r2, #12]
+	ldr	r2, [r1, #8]
+	ldr	lr, [r1]
+	str	r2, [sp, #108]                  @ 4-byte Spill
+	ldr	r5, [r1, #12]
+	ldr	r6, [r1, #16]
+	ldr	r2, [r1, #20]
+	ldr	r12, [r1, #4]
+	ldr	r1, [r1, #24]
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	str	r5, [sp, #96]                   @ 4-byte Spill
+	umull	r3, r7, r1, r0
+	str	r6, [sp, #112]                  @ 4-byte Spill
+	str	r2, [sp, #92]                   @ 4-byte Spill
+	str	lr, [sp, #84]                   @ 4-byte Spill
+	str	r3, [sp, #72]                   @ 4-byte Spill
+	umull	r3, r1, r2, r0
+	str	r7, [sp, #76]                   @ 4-byte Spill
+	umull	r8, r7, r5, r0
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	str	r3, [sp, #64]                   @ 4-byte Spill
+	umull	r11, r1, r6, r0
+	ldr	r3, [sp, #108]                  @ 4-byte Reload
+	umull	r2, r5, r12, r0
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	umull	r6, r1, lr, r0
+	str	r6, [sp, #80]                   @ 4-byte Spill
+	mov	r6, lr
+	adds	r2, r1, r2
+	umull	r2, lr, r3, r0
+	adcs	r5, r5, r2
+	umlal	r1, r2, r12, r0
+	adcs	r8, lr, r8
+	adcs	r7, r7, r11
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	ldr	r5, [sp, #60]                   @ 4-byte Reload
+	adcs	r5, r5, r7
+	str	r5, [sp, #56]                   @ 4-byte Spill
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	adcs	r5, r5, r7
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
+	str	r5, [sp, #64]                   @ 4-byte Spill
+	adc	r5, r7, #0
+	str	r5, [sp, #68]                   @ 4-byte Spill
+	umull	r0, r5, r10, r3
+	umull	r11, r3, r10, r6
+	str	r5, [sp, #72]                   @ 4-byte Spill
+	str	r3, [sp, #60]                   @ 4-byte Spill
+	adds	r1, r1, r11
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	umull	r1, r3, r10, r12
+	str	r3, [sp, #52]                   @ 4-byte Spill
+	ldr	r3, [sp, #96]                   @ 4-byte Reload
+	adcs	r7, r2, r1
+	adcs	r0, r8, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	umull	r2, r0, r10, r3
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r8, r0, r2
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	umull	r5, r1, r10, r0
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	adcs	r5, r0, r5
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	umull	r6, lr, r10, r0
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r6, r0, r6
+	umull	r11, r0, r10, r1
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r10, [sp, #92]                  @ 4-byte Reload
+	adcs	r11, r1, r11
+	mov	r1, #0
+	adc	r1, r1, #0
+	adds	r2, r7, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	ldr	r7, [sp, #40]                   @ 4-byte Reload
+	adcs	r2, r7, r2
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r2, [sp, #72]                   @ 4-byte Reload
+	adcs	r2, r8, r2
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	str	r2, [sp, #60]                   @ 4-byte Spill
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #56]                   @ 4-byte Spill
+	adcs	r2, r11, lr
+	str	r2, [sp, #52]                   @ 4-byte Spill
+	adc	r0, r1, r0
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	umull	r0, r2, r9, r12
+	ldr	r11, [sp, #112]                 @ 4-byte Reload
+	mov	lr, r3
+	umull	r6, r7, r9, r1
+	str	r6, [sp, #64]                   @ 4-byte Spill
+	adds	r0, r7, r0
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	umull	r8, r6, r9, r0
+	adcs	r2, r2, r8
+	umlal	r7, r8, r9, r12
+	umull	r2, r5, r9, r3
+	ldr	r3, [sp, #104]                  @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #72]                   @ 4-byte Spill
+	umull	r2, r6, r9, r11
+	adcs	r2, r5, r2
+	str	r2, [sp, #68]                   @ 4-byte Spill
+	umull	r2, r5, r9, r10
+	adcs	r2, r6, r2
+	str	r2, [sp, #44]                   @ 4-byte Spill
+	umull	r2, r6, r9, r3
+	adcs	r2, r5, r2
+	str	r2, [sp, #40]                   @ 4-byte Spill
+	adc	r2, r6, #0
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	umull	r2, r5, r4, r12
+	str	r5, [sp, #16]                   @ 4-byte Spill
+	umull	r6, r5, r4, r1
+	str	r6, [sp, #20]                   @ 4-byte Spill
+	adds	r2, r5, r2
+	umull	r6, r2, r4, r0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r1, r0, r6
+	umlal	r5, r6, r4, r12
+	umull	r1, r0, r4, lr
 	adcs	r1, r2, r1
-	adcs	r2, r5, r9
-	ldr	r5, [sp, #28]           @ 4-byte Reload
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	umull	r2, r1, r4, r11
+	adcs	r0, r0, r2
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	umull	r2, r0, r4, r10
+	adcs	r11, r1, r2
+	umull	r2, r1, r4, r3
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	lr, r0, r2
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r3, [r0]
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adds	r4, r3, r2
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r5, r5, r2
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	stmib	r0, {r3, r4}
+	adcs	r2, r6, r2
+	str	r2, [sp, #80]                   @ 4-byte Spill
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adcs	r4, r2, r3
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	adcs	r6, r2, r3
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	adcs	r3, r11, r3
+	ldr	r11, [sp, #104]                 @ 4-byte Reload
+	adcs	r2, lr, r2
+	adc	lr, r1, #0
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adds	r5, r1, r5
+	str	r5, [r0, #12]
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	adcs	r0, r8, r4
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r8, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	adc	r3, r8, #0
-	subs	r7, r1, r12
-	sbcs	r6, r2, r10
-	sbcs	r5, r0, r5
-	sbc	r3, r3, #0
-	ands	r3, r3, #1
-	movne	r5, r0
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	movne	r7, r1
-	movne	r6, r2
-	str	r7, [r0]
-	str	r6, [r0, #4]
-	str	r5, [r0, #8]
-	add	sp, sp, #44
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r0, [r0, #16]
+	umull	r2, r1, r0, r11
+	umull	r3, lr, r0, r12
+	str	r2, [sp, #48]                   @ 4-byte Spill
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	umull	r2, r1, r0, r10
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	str	r2, [sp, #40]                   @ 4-byte Spill
+	umull	r10, r2, r0, r1
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	umull	r5, r6, r0, r1
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	umull	r2, r4, r0, r1
+	umull	r9, r1, r0, r8
+	adds	r7, r1, r3
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r7, lr, r2
+	umlal	r1, r2, r0, r12
+	adcs	r7, r4, r5
+	ldr	r5, [sp, #36]                   @ 4-byte Reload
+	adcs	r6, r6, r10
+	ldr	r4, [sp, #44]                   @ 4-byte Reload
+	adcs	r5, r5, r3
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	lr, r4, r3
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adc	r10, r3, #0
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adds	r4, r9, r3
+	ldr	r3, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	str	r4, [r3, #16]
+	adcs	r0, r7, r0
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r6, [r1, #24]
+	adcs	r0, r5, r0
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	umull	r3, r2, r6, r8
+	adcs	r0, lr, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	adc	r0, r10, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [r1, #20]
+	umull	r9, r1, r6, r7
+	str	r3, [sp, #88]                   @ 4-byte Spill
+	mov	r3, r2
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	mov	r5, r9
+	umlal	r3, r5, r6, r12
+	str	r3, [sp, #48]                   @ 4-byte Spill
+	umull	r4, r3, r0, r7
+	str	r5, [sp, #52]                   @ 4-byte Spill
+	str	r3, [sp, #40]                   @ 4-byte Spill
+	umull	r3, r10, r0, r8
+	str	r4, [sp]                        @ 4-byte Spill
+	mov	r7, r4
+	ldr	r8, [sp, #92]                   @ 4-byte Reload
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	umull	r4, r3, r0, r1
+	mov	lr, r10
+	umlal	lr, r7, r0, r12
+	str	r4, [sp, #28]                   @ 4-byte Spill
+	str	r3, [sp, #32]                   @ 4-byte Spill
+	umull	r4, r3, r0, r12
+	str	r4, [sp, #20]                   @ 4-byte Spill
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	umull	r4, r3, r6, r11
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	umull	r5, r3, r6, r8
+	str	r4, [sp, #84]                   @ 4-byte Spill
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	ldr	r3, [sp, #112]                  @ 4-byte Reload
+	str	r5, [sp, #8]                    @ 4-byte Spill
+	umull	r4, r5, r6, r3
+	str	r4, [sp, #108]                  @ 4-byte Spill
+	str	r5, [sp, #4]                    @ 4-byte Spill
+	umull	r5, r4, r6, r1
+	umull	r11, r1, r6, r12
+	adds	r2, r2, r11
+	adcs	r1, r1, r9
+	ldr	r2, [sp, #4]                    @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r11, r1, r5
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	adcs	r1, r4, r1
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r4, [sp, #104]                  @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	ldr	r2, [sp]                        @ 4-byte Reload
+	adc	r5, r1, #0
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adds	r6, r10, r1
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r6, r1, r2
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r12, r1, r2
+	umull	r1, r2, r0, r3
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	adcs	r9, r3, r1
+	umull	r3, r6, r0, r8
+	adcs	r2, r2, r3
+	umull	r3, r1, r0, r4
+	ldr	r4, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r6, r3
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	adds	r3, r4, r3
+	ldr	r4, [sp, #76]                   @ 4-byte Reload
+	adcs	r6, lr, r4
+	ldr	r4, [sp, #72]                   @ 4-byte Reload
+	adcs	r7, r7, r4
+	ldr	r4, [sp, #68]                   @ 4-byte Reload
+	adcs	r12, r12, r4
+	ldr	r4, [sp, #64]                   @ 4-byte Reload
+	adcs	lr, r9, r4
+	ldr	r4, [sp, #60]                   @ 4-byte Reload
+	adcs	r2, r2, r4
+	ldr	r4, [sp, #56]                   @ 4-byte Reload
+	adcs	r9, r0, r4
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	ldr	r4, [sp, #100]                  @ 4-byte Reload
+	adds	r6, r0, r6
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r7, r0, r7
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	add	r12, r4, #20
+	stm	r12, {r3, r6, r7}
+	adcs	r3, r11, lr
+	ldr	r7, [sp, #108]                  @ 4-byte Reload
+	ldr	r6, [sp, #84]                   @ 4-byte Reload
+	adcs	r2, r7, r2
+	ldr	r7, [sp, #96]                   @ 4-byte Reload
+	str	r0, [r4, #32]
+	adcs	r7, r7, r9
+	str	r3, [r4, #36]
+	adcs	r1, r6, r1
+	str	r2, [r4, #40]
+	adc	r6, r5, #0
+	str	r7, [r4, #44]
+	str	r1, [r4, #48]
+	str	r6, [r4, #52]
+	add	sp, sp, #116
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end38:
-	.size	mcl_fp_mont3L, .Lfunc_end38-mcl_fp_mont3L
+.Lfunc_end24:
+	.size	mcl_fpDbl_mulPre7L, .Lfunc_end24-mcl_fpDbl_mulPre7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montNF3L
-	.align	2
-	.type	mcl_fp_montNF3L,%function
-mcl_fp_montNF3L:                        @ @mcl_fp_montNF3L
+                                        @ -- End function
+	.globl	mcl_fpDbl_sqrPre7L              @ -- Begin function mcl_fpDbl_sqrPre7L
+	.p2align	2
+	.type	mcl_fpDbl_sqrPre7L,%function
+	.code	32                              @ @mcl_fpDbl_sqrPre7L
+mcl_fpDbl_sqrPre7L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#68
-	sub	sp, sp, #68
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r8, [r1]
-	ldmib	r1, {r6, r9}
-	ldm	r2, {r4, r7}
-	ldr	r0, [r2, #8]
-	mov	r10, r3
-	umull	r3, r1, r0, r9
-	str	r1, [sp, #52]           @ 4-byte Spill
-	umull	r1, r2, r0, r8
-	str	r3, [sp, #44]           @ 4-byte Spill
-	str	r1, [sp, #48]           @ 4-byte Spill
-	str	r2, [sp, #40]           @ 4-byte Spill
-	mov	r1, r2
-	mov	r2, r3
-	umull	r3, r5, r0, r6
-	umlal	r1, r2, r0, r6
-	str	r3, [sp, #32]           @ 4-byte Spill
-	umull	r3, r0, r7, r6
-	str	r5, [sp, #36]           @ 4-byte Spill
-	str	r1, [sp, #56]           @ 4-byte Spill
-	str	r2, [sp, #60]           @ 4-byte Spill
-	umull	r2, r1, r7, r9
-	str	r0, [sp, #8]            @ 4-byte Spill
-	str	r3, [sp, #4]            @ 4-byte Spill
-	str	r1, [sp, #28]           @ 4-byte Spill
-	umull	r1, r11, r7, r8
-	str	r2, [sp, #16]           @ 4-byte Spill
-	str	r1, [sp, #24]           @ 4-byte Spill
-	mov	r1, r2
-	str	r11, [sp, #12]          @ 4-byte Spill
-	umlal	r11, r1, r7, r6
-	umull	r0, r7, r6, r4
-	str	r1, [sp, #20]           @ 4-byte Spill
-	umull	lr, r1, r9, r4
-	umull	r9, r2, r8, r4
-	ldr	r8, [r10, #-4]
-	adds	r0, r2, r0
-	str	r1, [sp]                @ 4-byte Spill
-	mov	r1, r2
-	mov	r12, lr
-	adcs	r0, r7, lr
-	umlal	r1, r12, r6, r4
-	ldr	r0, [sp]                @ 4-byte Reload
-	ldm	r10, {r6, r7}
-	mul	r2, r9, r8
-	adc	r3, r0, #0
-	ldr	r0, [r10, #8]
-	umull	r4, lr, r2, r6
-	adds	r4, r4, r9
-	umull	r4, r9, r2, r7
-	adcs	r1, r4, r1
-	umull	r4, r5, r2, r0
-	adcs	r2, r4, r12
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	adc	r3, r3, #0
-	adds	r1, r1, lr
-	adcs	r2, r2, r9
-	adc	r3, r3, r5
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adds	r5, r5, r4
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	adcs	r5, r4, r5
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	ldr	r5, [sp, #28]           @ 4-byte Reload
+	.pad	#212
+	sub	sp, sp, #212
+	ldr	r11, [r1]
+	ldmib	r1, {r2, r3}
+	ldr	r12, [r1, #24]
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r10, [r1, #12]
+	umull	r5, r0, r12, r11
+	ldr	r4, [r1, #16]
+	ldr	lr, [r1, #20]
+	str	r11, [sp, #164]                 @ 4-byte Spill
+	umull	r6, r1, r12, r2
+	str	r3, [sp, #8]                    @ 4-byte Spill
+	str	r5, [sp, #188]                  @ 4-byte Spill
+	str	r0, [sp, #176]                  @ 4-byte Spill
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adds	r5, r0, r6
+	str	r6, [sp, #72]                   @ 4-byte Spill
+	umull	r5, r0, r12, r3
+	str	r5, [sp, #208]                  @ 4-byte Spill
+	adcs	r5, r1, r5
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	umull	r5, r1, r12, r10
+	str	r5, [sp, #192]                  @ 4-byte Spill
+	adcs	r5, r0, r5
+	str	r5, [sp, #160]                  @ 4-byte Spill
+	umull	r5, r0, r12, r4
+	str	r1, [sp, #168]                  @ 4-byte Spill
+	str	r5, [sp, #204]                  @ 4-byte Spill
+	adcs	r5, r1, r5
+	umull	r6, r1, r12, lr
+	str	r5, [sp, #156]                  @ 4-byte Spill
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	str	r1, [sp, #196]                  @ 4-byte Spill
+	adcs	r5, r0, r6
+	str	r5, [sp, #152]                  @ 4-byte Spill
+	umull	r8, r5, r12, r12
+	adcs	r7, r1, r8
+	umull	r0, r1, lr, r11
 	adc	r5, r5, #0
-	adds	r1, r4, r1
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r11, r2
-	adcs	r12, r4, r3
-	mul	r4, r1, r8
-	umull	r3, r9, r4, r6
-	adc	lr, r5, #0
-	adds	r1, r3, r1
-	umull	r1, r3, r4, r7
-	adcs	r1, r1, r2
-	umull	r2, r5, r4, r0
-	adcs	r2, r2, r12
-	adc	r4, lr, #0
-	adds	r1, r1, r9
-	adcs	r12, r2, r3
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adc	r9, r4, r5
-	adds	r5, r2, r3
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	adcs	r5, r3, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	adc	lr, r2, #0
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	adds	r1, r2, r1
-	mul	r4, r1, r8
-	umull	r10, r2, r4, r0
-	umull	r3, r8, r4, r7
-	str	r2, [sp, #52]           @ 4-byte Spill
-	umull	r2, r11, r4, r6
-	ldr	r4, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r4, r12
-	adcs	r12, r5, r9
-	adc	r5, lr, #0
-	adds	r1, r2, r1
-	adcs	r1, r3, r4
-	adcs	r2, r10, r12
-	adc	r3, r5, #0
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	adds	r1, r1, r11
-	adcs	r2, r2, r8
-	adc	r3, r3, r5
-	subs	r6, r1, r6
-	sbcs	r7, r2, r7
-	sbc	r0, r3, r0
-	asr	r5, r0, #31
-	cmp	r5, #0
-	movlt	r6, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	movlt	r7, r2
-	movlt	r0, r3
-	stm	r1, {r6, r7}
-	str	r0, [r1, #8]
-	add	sp, sp, #68
+	str	r7, [sp, #148]                  @ 4-byte Spill
+	str	r5, [sp, #144]                  @ 4-byte Spill
+	umull	r5, r7, lr, r2
+	str	r0, [sp, #184]                  @ 4-byte Spill
+	str	r1, [sp, #172]                  @ 4-byte Spill
+	umull	r0, r8, lr, r3
+	str	r5, [sp, #40]                   @ 4-byte Spill
+	adds	r5, r1, r5
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	str	r0, [sp, #200]                  @ 4-byte Spill
+	adcs	r5, r7, r0
+	umull	r1, r0, lr, r10
+	str	r8, [sp, #52]                   @ 4-byte Spill
+	umull	r9, r7, lr, lr
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	adcs	r5, r8, r1
+	umull	r8, r1, lr, r4
+	str	r5, [sp, #140]                  @ 4-byte Spill
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	adcs	r5, r0, r8
+	str	r5, [sp, #136]                  @ 4-byte Spill
+	adcs	r5, r1, r9
+	ldr	r0, [sp, #196]                  @ 4-byte Reload
+	str	r5, [sp, #132]                  @ 4-byte Spill
+	adcs	r5, r7, r6
+	str	r5, [sp, #128]                  @ 4-byte Spill
+	adc	r5, r0, #0
+	umull	r6, r9, r4, r2
+	str	r5, [sp, #124]                  @ 4-byte Spill
+	umull	r5, r1, r4, r11
+	str	r6, [sp, #24]                   @ 4-byte Spill
+	str	r9, [sp, #20]                   @ 4-byte Spill
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	adds	r7, r1, r6
+	umull	r1, r0, r4, r3
+	str	r5, [sp, #180]                  @ 4-byte Spill
+	str	r1, [sp, #196]                  @ 4-byte Spill
+	adcs	r7, r9, r1
+	umull	r5, r9, r4, r4
+	umull	r7, r6, r4, r10
+	adcs	r1, r0, r7
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	adcs	r1, r6, r5
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	adcs	r1, r9, r8
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #204]                  @ 4-byte Reload
+	umull	r8, r9, r10, r2
+	ldr	r5, [sp, #104]                  @ 4-byte Reload
+	adcs	r1, r5, r1
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	str	r8, [sp, #28]                   @ 4-byte Spill
+	adc	r1, r1, #0
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	umull	r5, r1, r10, r11
+	str	r9, [sp]                        @ 4-byte Spill
+	str	r5, [sp, #44]                   @ 4-byte Spill
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	adds	r5, r1, r8
+	umull	r1, r8, r10, r3
+	str	r1, [sp, #204]                  @ 4-byte Spill
+	adcs	r5, r9, r1
+	umull	r5, r9, r10, r10
+	adcs	r5, r8, r5
+	str	r5, [sp, #92]                   @ 4-byte Spill
+	adcs	r5, r9, r7
+	str	r5, [sp, #88]                   @ 4-byte Spill
+	ldr	r5, [sp, #84]                   @ 4-byte Reload
+	umull	r7, r9, r3, r11
+	adcs	r5, r6, r5
+	str	r5, [sp, #84]                   @ 4-byte Spill
+	ldr	r5, [sp, #192]                  @ 4-byte Reload
+	ldr	r6, [sp, #80]                   @ 4-byte Reload
+	str	r9, [sp, #12]                   @ 4-byte Spill
+	adcs	r5, r6, r5
+	str	r5, [sp, #80]                   @ 4-byte Spill
+	ldr	r5, [sp, #168]                  @ 4-byte Reload
+	str	r7, [sp, #168]                  @ 4-byte Spill
+	adc	r5, r5, #0
+	str	r5, [sp, #76]                   @ 4-byte Spill
+	umull	r5, r6, r3, r2
+	adds	r7, r9, r5
+	umull	r7, r9, r3, r3
+	str	r7, [sp, #192]                  @ 4-byte Spill
+	adcs	r11, r6, r7
+	ldr	r7, [sp, #200]                  @ 4-byte Reload
+	adcs	r1, r9, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #196]                  @ 4-byte Reload
+	adcs	r3, r8, r1
+	str	r3, [sp, #60]                   @ 4-byte Spill
+	adcs	r3, r0, r7
+	str	r3, [sp, #56]                   @ 4-byte Spill
+	mov	r3, r7
+	ldr	r0, [sp, #208]                  @ 4-byte Reload
+	ldr	r7, [sp, #52]                   @ 4-byte Reload
+	adcs	r7, r7, r0
+	str	r7, [sp, #52]                   @ 4-byte Spill
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
+	adc	r7, r7, #0
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	umull	r9, r7, r2, r2
+	str	r7, [sp, #68]                   @ 4-byte Spill
+	ldr	r7, [sp, #164]                  @ 4-byte Reload
+	umull	r8, r11, r2, r7
+	str	r8, [sp, #4]                    @ 4-byte Spill
+	adds	r7, r11, r9
+	ldr	r8, [sp, #32]                   @ 4-byte Reload
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
+	adcs	r7, r7, r5
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	adcs	r6, r6, r7
+	str	r6, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #24]                   @ 4-byte Reload
+	ldr	r7, [sp]                        @ 4-byte Reload
+	adcs	r6, r7, r6
+	str	r6, [sp, #24]                   @ 4-byte Spill
+	ldr	r6, [sp, #40]                   @ 4-byte Reload
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	adcs	r6, r7, r6
+	str	r6, [sp, #20]                   @ 4-byte Spill
+	ldr	r6, [sp, #72]                   @ 4-byte Reload
+	ldr	r7, [sp, #36]                   @ 4-byte Reload
+	adcs	r6, r7, r6
+	str	r6, [sp, #36]                   @ 4-byte Spill
+	ldr	r6, [sp, #176]                  @ 4-byte Reload
+	ldr	r7, [sp, #168]                  @ 4-byte Reload
+	umlal	r6, r0, r12, r2
+	str	r0, [sp, #208]                  @ 4-byte Spill
+	ldr	r0, [sp, #172]                  @ 4-byte Reload
+	str	r6, [sp, #72]                   @ 4-byte Spill
+	umlal	r0, r3, lr, r2
+	mov	lr, r11
+	umlal	lr, r5, r2, r2
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	mov	r0, r8
+	umlal	r0, r1, r4, r2
+	str	r3, [sp, #200]                  @ 4-byte Spill
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	mov	r4, r7
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	mov	r9, r3
+	ldr	r0, [sp, #204]                  @ 4-byte Reload
+	str	r1, [sp, #196]                  @ 4-byte Spill
+	umlal	r9, r0, r10, r2
+	ldr	r10, [sp, #12]                  @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	mov	r12, r10
+	str	r0, [sp, #204]                  @ 4-byte Spill
+	ldr	r0, [sp, #192]                  @ 4-byte Reload
+	umlal	r12, r0, r1, r2
+	str	r0, [sp, #192]                  @ 4-byte Spill
+	ldr	r0, [sp, #164]                  @ 4-byte Reload
+	umull	r1, r6, r0, r0
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	mov	r1, r6
+	umlal	r1, r4, r2, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [sp, #4]                    @ 4-byte Reload
+	adc	r0, r0, #0
+	adds	r6, r6, r2
+	str	r0, [sp, #164]                  @ 4-byte Spill
+	adcs	r6, r11, r7
+	ldr	r6, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r10, r6
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #180]                  @ 4-byte Reload
+	adcs	r10, r3, r0
+	ldr	r0, [sp, #184]                  @ 4-byte Reload
+	ldr	r3, [sp, #172]                  @ 4-byte Reload
+	adcs	r11, r8, r0
+	ldr	r0, [sp, #188]                  @ 4-byte Reload
+	adcs	r7, r3, r0
+	ldr	r0, [sp, #176]                  @ 4-byte Reload
+	adc	r3, r0, #0
+	adds	r0, r2, r1
+	adcs	lr, lr, r4
+	ldr	r4, [sp, #120]                  @ 4-byte Reload
+	str	r0, [sp, #176]                  @ 4-byte Spill
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	str	r0, [r4]
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r5, r5, r0
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	adcs	r2, r1, r11
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r10, r1, r7
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	ldr	r7, [sp, #168]                  @ 4-byte Reload
+	adcs	r3, r1, r3
+	ldr	r1, [sp, #164]                  @ 4-byte Reload
+	adc	r1, r1, #0
+	adds	r7, r7, lr
+	str	r7, [sp, #172]                  @ 4-byte Spill
+	adcs	r7, r12, r5
+	ldr	r5, [sp, #192]                  @ 4-byte Reload
+	adcs	r11, r5, r0
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	ldr	r5, [sp, #60]                   @ 4-byte Reload
+	adcs	r10, r5, r10
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	adcs	r5, r5, r3
+	adcs	lr, r0, r1
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #204]                  @ 4-byte Reload
+	adc	r12, r0, #0
+	adds	r0, r6, r7
+	str	r0, [sp, #192]                  @ 4-byte Spill
+	adcs	r0, r9, r11
+	adcs	r3, r1, r2
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	adcs	r2, r1, r10
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	adcs	r5, r1, r5
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r6, r1, lr
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	adcs	r7, r1, r12
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	add	r12, r4, #12
+	adc	lr, r1, #0
+	ldr	r1, [sp, #180]                  @ 4-byte Reload
+	adds	r8, r1, r0
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r3, r0, r3
+	ldr	r0, [sp, #196]                  @ 4-byte Reload
+	adcs	r2, r0, r2
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r5, r0, r5
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r6, r0, r6
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	adcs	r7, r0, r7
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, lr
+	adc	lr, r1, #0
+	ldr	r1, [sp, #184]                  @ 4-byte Reload
+	adds	r10, r1, r3
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adcs	r2, r1, r2
+	ldr	r1, [sp, #200]                  @ 4-byte Reload
+	adcs	r5, r1, r5
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adcs	r6, r1, r6
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	adcs	r7, r1, r7
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	adcs	r3, r1, lr
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	adc	lr, r1, #0
+	ldr	r1, [sp, #188]                  @ 4-byte Reload
+	adds	r2, r1, r2
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	adcs	r5, r1, r5
+	ldr	r1, [sp, #208]                  @ 4-byte Reload
+	adcs	r6, r1, r6
+	ldr	r1, [sp, #176]                  @ 4-byte Reload
+	str	r1, [r4, #4]
+	ldr	r1, [sp, #172]                  @ 4-byte Reload
+	str	r1, [r4, #8]
+	ldr	r1, [sp, #192]                  @ 4-byte Reload
+	stm	r12, {r1, r8, r10}
+	ldr	r1, [sp, #160]                  @ 4-byte Reload
+	str	r2, [r4, #24]
+	ldr	r2, [sp, #156]                  @ 4-byte Reload
+	adcs	r1, r1, r7
+	ldr	r7, [sp, #144]                  @ 4-byte Reload
+	adcs	r0, r2, r0
+	ldr	r2, [sp, #152]                  @ 4-byte Reload
+	str	r1, [r4, #36]
+	add	r1, r4, #40
+	adcs	r2, r2, r3
+	ldr	r3, [sp, #148]                  @ 4-byte Reload
+	str	r5, [r4, #28]
+	adcs	r3, r3, lr
+	str	r6, [r4, #32]
+	adc	r7, r7, #0
+	stm	r1, {r0, r2, r3, r7}
+	add	sp, sp, #212
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end39:
-	.size	mcl_fp_montNF3L, .Lfunc_end39-mcl_fp_montNF3L
+.Lfunc_end25:
+	.size	mcl_fpDbl_sqrPre7L, .Lfunc_end25-mcl_fpDbl_sqrPre7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montRed3L
-	.align	2
-	.type	mcl_fp_montRed3L,%function
-mcl_fp_montRed3L:                       @ @mcl_fp_montRed3L
+                                        @ -- End function
+	.globl	mcl_fp_mont7L                   @ -- Begin function mcl_fp_mont7L
+	.p2align	2
+	.type	mcl_fp_mont7L,%function
+	.code	32                              @ @mcl_fp_mont7L
+mcl_fp_mont7L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#28
-	sub	sp, sp, #28
-	ldr	r5, [r2]
-	ldr	lr, [r2, #-4]
-	ldr	r3, [r2, #4]
-	ldr	r2, [r2, #8]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	str	r5, [sp, #20]           @ 4-byte Spill
-	str	r2, [sp]                @ 4-byte Spill
-	ldm	r1, {r4, r7}
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r1, #8]
-	mul	r6, r4, lr
-	umull	r10, r8, r6, r3
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r1, #12]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	umull	r7, r9, r6, r2
-	umull	r11, r2, r6, r5
+	.pad	#132
+	sub	sp, sp, #132
+	str	r0, [sp, #64]                   @ 4-byte Spill
 	mov	r0, r2
-	adds	r2, r2, r10
-	mov	r12, r7
-	adcs	r2, r8, r7
-	umlal	r0, r12, r6, r3
-	ldr	r8, [r1, #20]
+	ldr	r7, [r0, #8]
+	ldr	r12, [r0, #4]
+	ldr	r0, [r0, #12]
+	str	r2, [sp, #68]                   @ 4-byte Spill
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r1]
+	ldr	r2, [r2]
+	ldr	r4, [r3, #-4]
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	umull	r6, r8, r0, r2
+	ldr	r5, [r3, #8]
+	str	r4, [sp, #116]                  @ 4-byte Spill
+	str	r7, [sp, #56]                   @ 4-byte Spill
+	ldr	r7, [r3]
+	str	r6, [sp, #52]                   @ 4-byte Spill
+	str	r5, [sp, #128]                  @ 4-byte Spill
+	mul	r0, r4, r6
+	ldr	r9, [r3, #4]
+	ldr	lr, [r1, #4]
+	ldr	r10, [r1, #8]
+	ldr	r11, [r1, #12]
+	str	r11, [sp, #88]                  @ 4-byte Spill
+	str	r7, [sp, #112]                  @ 4-byte Spill
+	umull	r6, r4, r0, r5
+	str	lr, [sp, #72]                   @ 4-byte Spill
+	str	r10, [sp, #80]                  @ 4-byte Spill
+	str	r9, [sp, #76]                   @ 4-byte Spill
+	str	r4, [sp, #36]                   @ 4-byte Spill
+	umull	r4, r5, r0, r7
+	str	r6, [sp, #12]                   @ 4-byte Spill
+	str	r4, [sp, #48]                   @ 4-byte Spill
+	str	r5, [sp, #8]                    @ 4-byte Spill
+	mov	r4, r5
+	mov	r5, r6
+	umlal	r4, r5, r0, r9
+	str	r4, [sp, #40]                   @ 4-byte Spill
+	ldr	r4, [r1, #24]
+	str	r5, [sp, #44]                   @ 4-byte Spill
+	str	r4, [sp, #96]                   @ 4-byte Spill
+	umull	r6, r5, r4, r2
+	ldr	r4, [r1, #20]
 	ldr	r1, [r1, #16]
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adc	r10, r9, #0
-	adds	r7, r4, r11
-	mov	r11, lr
-	adcs	r9, r2, r0
-	ldr	r2, [sp]                @ 4-byte Reload
-	mul	r7, r9, lr
-	umull	lr, r0, r7, r2
-	str	r0, [sp, #8]            @ 4-byte Spill
-	umull	r4, r0, r7, r5
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	mov	r6, lr
-	str	r4, [sp, #4]            @ 4-byte Spill
-	mov	r4, r0
-	umlal	r4, r6, r7, r3
-	adcs	r12, r5, r12
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adcs	r10, r5, r10
-	adcs	r1, r1, #0
-	str	r1, [sp, #16]           @ 4-byte Spill
-	adcs	r1, r8, #0
-	str	r1, [sp, #12]           @ 4-byte Spill
-	mov	r1, #0
-	adc	r8, r1, #0
-	umull	r1, r5, r7, r3
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	adds	r1, r0, r1
-	adcs	r0, r5, lr
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	ldr	r0, [sp, #8]            @ 4-byte Reload
+	str	r4, [sp, #92]                   @ 4-byte Spill
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	str	r6, [sp, #120]                  @ 4-byte Spill
+	str	r5, [sp, #124]                  @ 4-byte Spill
+	umull	r6, r5, r4, r2
+	str	r5, [sp, #104]                  @ 4-byte Spill
+	umull	r4, r5, r1, r2
+	str	r6, [sp, #100]                  @ 4-byte Spill
+	str	r5, [sp, #24]                   @ 4-byte Spill
+	umull	r6, r5, r11, r2
+	umull	r11, r1, lr, r2
+	ldr	lr, [r3, #24]
+	adds	r7, r8, r11
+	umull	r7, r11, r10, r2
+	adcs	r1, r1, r7
+	adcs	r1, r11, r6
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	adcs	r1, r5, r4
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r6, [sp, #24]                   @ 4-byte Reload
+	ldr	r5, [r3, #12]
+	adcs	r1, r6, r1
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r6, [sp, #104]                  @ 4-byte Reload
+	ldr	r4, [sp, #8]                    @ 4-byte Reload
+	adcs	r1, r6, r1
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	ldr	r6, [r3, #20]
+	adc	r1, r1, #0
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [r3, #16]
+	umull	r3, r10, r0, r9
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	str	r5, [sp, #124]                  @ 4-byte Spill
+	umull	r11, r9, r0, r1
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	str	r6, [sp, #104]                  @ 4-byte Spill
+	adds	r3, r4, r3
+	adcs	r1, r10, r1
+	ldr	r4, [sp, #36]                   @ 4-byte Reload
+	umull	r1, r3, r0, r5
+	str	lr, [sp, #100]                  @ 4-byte Spill
+	adcs	r1, r4, r1
+	ldr	r4, [sp, #48]                   @ 4-byte Reload
+	adcs	r11, r3, r11
+	umull	r5, r3, r0, r6
+	adcs	r10, r9, r5
+	umull	r6, r5, r0, lr
+	adcs	r0, r3, r6
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adc	r5, r5, #0
+	mov	r6, #0
+	umlal	r8, r7, r3, r2
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	adds	r2, r4, r2
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	adcs	r2, r2, r8
+	str	r2, [sp, #52]                   @ 4-byte Spill
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r2, r2, r7
+	str	r2, [sp, #48]                   @ 4-byte Spill
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r11, r1
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	ldr	r11, [sp, #108]                 @ 4-byte Reload
+	adcs	r1, r10, r1
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	adc	r0, r6, #0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	umull	r2, r1, r12, r0
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	umull	r9, r1, r12, r0
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	umull	r2, lr, r12, r3
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	umull	r7, r8, r12, r0
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	umull	r5, r6, r12, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	umull	r1, r4, r12, r0
+	umull	r10, r0, r12, r11
+	str	r10, [sp, #12]                  @ 4-byte Spill
+	adds	r2, r0, r2
+	adcs	r2, lr, r1
+	umlal	r0, r1, r12, r3
+	adcs	lr, r4, r5
+	adcs	r7, r6, r7
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adcs	r6, r8, r9
+	ldr	r4, [sp, #8]                    @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r4, r4, r2
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	adc	r5, r2, #0
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	adds	r3, r3, r2
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	str	r3, [sp, #20]                   @ 4-byte Spill
+	adcs	r0, r2, r0
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r7, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	mov	r0, #0
 	adc	r0, r0, #0
-	adds	r1, r1, r9
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	mul	r0, r1, r3
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	umull	r12, r2, r0, r1
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	umull	r4, r6, r0, r7
+	ldr	r7, [sp, #104]                  @ 4-byte Reload
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	umull	r2, r3, r0, r1
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	stmib	sp, {r4, r6}                    @ 8-byte Folded Spill
+	umull	r10, r6, r0, r7
+	ldr	r7, [sp, #120]                  @ 4-byte Reload
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	mov	r5, r3
+	umull	r11, lr, r0, r1
+	mov	r2, r12
+	str	r6, [sp]                        @ 4-byte Spill
+	umull	r8, r9, r0, r7
+	ldr	r7, [sp, #124]                  @ 4-byte Reload
+	umull	r4, r6, r0, r7
+	ldr	r7, [sp]                        @ 4-byte Reload
+	umlal	r5, r2, r0, r1
+	adds	r0, r3, r11
+	adcs	r0, lr, r12
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	ldr	r11, [sp, #108]                 @ 4-byte Reload
+	adcs	r12, r0, r4
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
+	adcs	r1, r6, r8
+	ldr	r4, [sp, #20]                   @ 4-byte Reload
+	adcs	r3, r9, r10
+	ldr	r10, [sp, #56]                  @ 4-byte Reload
+	adcs	r7, r7, r0
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	adc	r6, r0, #0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adds	r4, r4, r0
+	ldr	r4, [sp, #52]                   @ 4-byte Reload
+	adcs	r5, r4, r5
+	str	r5, [sp, #52]                   @ 4-byte Spill
+	ldr	r5, [sp, #48]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	str	r2, [sp, #48]                   @ 4-byte Spill
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r2, r12
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	umull	r2, r1, r10, r0
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	umull	r8, r1, r10, r0
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	umull	r2, r12, r10, r3
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	umull	r6, r7, r10, r0
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	umull	r4, r5, r10, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	umull	r1, lr, r10, r0
+	umull	r9, r0, r10, r11
+	str	r9, [sp, #12]                   @ 4-byte Spill
+	adds	r2, r0, r2
+	adcs	r2, r12, r1
+	umlal	r0, r1, r10, r3
+	adcs	r2, lr, r4
+	adcs	r12, r5, r6
+	adcs	r6, r7, r8
+	ldr	r7, [sp, #16]                   @ 4-byte Reload
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r4, r5, r7
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	adc	r5, r7, #0
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	adds	r3, r3, r7
+	ldr	r7, [sp, #48]                   @ 4-byte Reload
+	str	r3, [sp, #20]                   @ 4-byte Spill
+	adcs	r0, r7, r0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r6, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	mul	r0, r1, r3
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	umull	r9, r2, r0, r1
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	ldr	r2, [sp, #112]                  @ 4-byte Reload
+	umull	r7, r1, r0, r2
+	mov	r2, r9
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	ldr	r7, [sp, #100]                  @ 4-byte Reload
+	mov	r5, r1
+	umlal	r5, r2, r0, r6
+	umull	r3, r4, r0, r7
+	ldr	r7, [sp, #104]                  @ 4-byte Reload
+	str	r3, [sp, #8]                    @ 4-byte Spill
+	umull	r3, r10, r0, r7
+	ldr	r7, [sp, #120]                  @ 4-byte Reload
+	str	r4, [sp, #12]                   @ 4-byte Spill
+	umull	r12, r8, r0, r7
+	ldr	r7, [sp, #124]                  @ 4-byte Reload
+	umull	lr, r4, r0, r7
+	umull	r11, r7, r0, r6
+	adds	r0, r1, r11
+	ldr	r11, [sp, #108]                 @ 4-byte Reload
+	adcs	r0, r7, r9
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	lr, r0, lr
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
 	adcs	r1, r4, r12
-	adcs	lr, r6, r10
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	mul	r5, r1, r11
-	mov	r11, r2
-	adcs	r0, r0, r7
-	umull	r4, r12, r5, r2
-	umull	r2, r7, r5, r3
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r10, r0, #0
-	umull	r9, r0, r5, r6
-	adc	r8, r8, #0
+	ldr	r4, [sp, #20]                   @ 4-byte Reload
+	adcs	r3, r8, r3
+	adcs	r7, r10, r0
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	ldr	r10, [sp, #60]                  @ 4-byte Reload
+	adc	r6, r0, #0
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adds	r4, r4, r0
+	ldr	r4, [sp, #56]                   @ 4-byte Reload
+	adcs	r5, r4, r5
+	str	r5, [sp, #56]                   @ 4-byte Spill
+	ldr	r5, [sp, #52]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	str	r2, [sp, #52]                   @ 4-byte Spill
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r2, lr
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	umull	r2, r1, r10, r0
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	umull	r8, r1, r10, r0
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	umull	r2, r12, r10, r3
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	umull	r6, r7, r10, r0
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	umull	r4, r5, r10, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	umull	r1, lr, r10, r0
+	umull	r9, r0, r10, r11
+	str	r9, [sp, #16]                   @ 4-byte Spill
 	adds	r2, r0, r2
-	mov	r2, r4
-	adcs	r4, r7, r4
-	adc	r7, r12, #0
-	adds	r1, r9, r1
-	umlal	r0, r2, r5, r3
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	adcs	r1, r2, r1
-	adcs	r2, r7, r10
-	adc	r7, r8, #0
-	subs	r6, r0, r6
-	sbcs	r3, r1, r3
-	sbcs	r5, r2, r11
-	sbc	r7, r7, #0
-	ands	r7, r7, #1
-	movne	r6, r0
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	movne	r3, r1
-	movne	r5, r2
-	str	r6, [r0]
-	stmib	r0, {r3, r5}
-	add	sp, sp, #28
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end40:
-	.size	mcl_fp_montRed3L, .Lfunc_end40-mcl_fp_montRed3L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre3L
-	.align	2
-	.type	mcl_fp_addPre3L,%function
-mcl_fp_addPre3L:                        @ @mcl_fp_addPre3L
-	.fnstart
-@ BB#0:
-	.save	{r4, lr}
-	push	{r4, lr}
-	ldm	r1, {r3, r12, lr}
-	ldm	r2, {r1, r4}
-	ldr	r2, [r2, #8]
-	adds	r1, r1, r3
-	adcs	r3, r4, r12
-	adcs	r2, r2, lr
-	stm	r0, {r1, r3}
-	str	r2, [r0, #8]
+	adcs	r2, r12, r1
+	umlal	r0, r1, r10, r3
+	adcs	r2, lr, r4
+	adcs	r12, r5, r6
+	adcs	r6, r7, r8
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	ldr	r5, [sp, #12]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r4, r5, r7
+	ldr	r7, [sp, #24]                   @ 4-byte Reload
+	adc	r5, r7, #0
+	ldr	r7, [sp, #16]                   @ 4-byte Reload
+	adds	r3, r3, r7
+	ldr	r7, [sp, #52]                   @ 4-byte Reload
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r7, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r6, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #36]                   @ 4-byte Spill
 	mov	r0, #0
 	adc	r0, r0, #0
-	pop	{r4, lr}
-	mov	pc, lr
-.Lfunc_end41:
-	.size	mcl_fp_addPre3L, .Lfunc_end41-mcl_fp_addPre3L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre3L
-	.align	2
-	.type	mcl_fp_subPre3L,%function
-mcl_fp_subPre3L:                        @ @mcl_fp_subPre3L
-	.fnstart
-@ BB#0:
-	.save	{r4, lr}
-	push	{r4, lr}
-	ldm	r2, {r3, r12, lr}
-	ldm	r1, {r2, r4}
-	ldr	r1, [r1, #8]
-	subs	r2, r2, r3
-	sbcs	r3, r4, r12
-	sbcs	r1, r1, lr
-	stm	r0, {r2, r3}
-	str	r1, [r0, #8]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	mul	r0, r1, r3
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	umull	r9, r2, r0, r1
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	ldr	r2, [sp, #112]                  @ 4-byte Reload
+	umull	r7, r1, r0, r2
+	mov	r2, r9
+	str	r7, [sp, #28]                   @ 4-byte Spill
+	ldr	r7, [sp, #100]                  @ 4-byte Reload
+	mov	r5, r1
+	umlal	r5, r2, r0, r6
+	umull	r3, r4, r0, r7
+	ldr	r7, [sp, #104]                  @ 4-byte Reload
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	umull	r3, r10, r0, r7
+	ldr	r7, [sp, #120]                  @ 4-byte Reload
+	str	r4, [sp, #16]                   @ 4-byte Spill
+	umull	r12, r8, r0, r7
+	ldr	r7, [sp, #124]                  @ 4-byte Reload
+	umull	lr, r4, r0, r7
+	umull	r11, r7, r0, r6
+	adds	r0, r1, r11
+	ldr	r11, [sp, #108]                 @ 4-byte Reload
+	adcs	r0, r7, r9
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	lr, r0, lr
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r1, r4, r12
+	ldr	r4, [sp, #24]                   @ 4-byte Reload
+	adcs	r3, r8, r3
+	ldr	r12, [sp, #72]                  @ 4-byte Reload
+	adcs	r7, r10, r0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adc	r6, r0, #0
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adds	r4, r4, r0
+	ldr	r4, [sp, #60]                   @ 4-byte Reload
+	adcs	r5, r4, r5
+	str	r5, [sp, #60]                   @ 4-byte Spill
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	str	r2, [sp, #56]                   @ 4-byte Spill
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r2, lr
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r0, [r0, #16]
+	umull	r3, r2, r0, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	umull	r9, r2, r0, r1
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	umull	r3, lr, r0, r12
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	umull	r7, r8, r0, r1
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	umull	r5, r6, r0, r1
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	umull	r2, r4, r0, r1
+	umull	r10, r1, r0, r11
+	str	r10, [sp, #20]                  @ 4-byte Spill
+	adds	r3, r1, r3
+	adcs	r3, lr, r2
+	umlal	r1, r2, r0, r12
+	adcs	lr, r4, r5
+	adcs	r4, r6, r7
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	r6, r8, r9
+	ldr	r5, [sp, #16]                   @ 4-byte Reload
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r5, r5, r3
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adc	r7, r3, #0
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adds	r3, r0, r3
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	str	r3, [sp, #28]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r6, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #36]                   @ 4-byte Spill
 	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	pop	{r4, lr}
-	mov	pc, lr
-.Lfunc_end42:
-	.size	mcl_fp_subPre3L, .Lfunc_end42-mcl_fp_subPre3L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_3L
-	.align	2
-	.type	mcl_fp_shr1_3L,%function
-mcl_fp_shr1_3L:                         @ @mcl_fp_shr1_3L
-	.fnstart
-@ BB#0:
-	ldr	r3, [r1, #4]
-	ldr	r12, [r1]
-	ldr	r1, [r1, #8]
-	lsrs	r2, r3, #1
-	lsr	r3, r3, #1
-	orr	r3, r3, r1, lsl #31
-	rrx	r2, r12
-	lsr	r1, r1, #1
-	stm	r0, {r2, r3}
-	str	r1, [r0, #8]
-	mov	pc, lr
-.Lfunc_end43:
-	.size	mcl_fp_shr1_3L, .Lfunc_end43-mcl_fp_shr1_3L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add3L
-	.align	2
-	.type	mcl_fp_add3L,%function
-mcl_fp_add3L:                           @ @mcl_fp_add3L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r11, lr}
-	push	{r4, r5, r11, lr}
-	ldm	r1, {r12, lr}
-	ldr	r1, [r1, #8]
-	ldm	r2, {r4, r5}
-	ldr	r2, [r2, #8]
-	adds	r4, r4, r12
-	adcs	r5, r5, lr
-	adcs	r1, r2, r1
-	stm	r0, {r4, r5}
-	mov	r2, #0
-	str	r1, [r0, #8]
-	adc	r12, r2, #0
-	ldm	r3, {r2, lr}
-	ldr	r3, [r3, #8]
-	subs	r4, r4, r2
-	sbcs	r5, r5, lr
-	sbcs	r3, r1, r3
-	sbc	r1, r12, #0
-	tst	r1, #1
-	stmeq	r0, {r4, r5}
-	streq	r3, [r0, #8]
-	pop	{r4, r5, r11, lr}
-	mov	pc, lr
-.Lfunc_end44:
-	.size	mcl_fp_add3L, .Lfunc_end44-mcl_fp_add3L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF3L
-	.align	2
-	.type	mcl_fp_addNF3L,%function
-mcl_fp_addNF3L:                         @ @mcl_fp_addNF3L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r11, lr}
-	push	{r4, r5, r6, r7, r11, lr}
-	ldm	r1, {r12, lr}
-	ldr	r1, [r1, #8]
-	ldm	r2, {r4, r5}
-	ldr	r2, [r2, #8]
-	adds	r4, r4, r12
-	adcs	r5, r5, lr
-	adc	r7, r2, r1
-	ldm	r3, {r2, r12, lr}
-	subs	r2, r4, r2
-	sbcs	r3, r5, r12
-	sbc	r1, r7, lr
-	asr	r6, r1, #31
-	cmp	r6, #0
-	movlt	r2, r4
-	movlt	r3, r5
-	movlt	r1, r7
-	stm	r0, {r2, r3}
-	str	r1, [r0, #8]
-	pop	{r4, r5, r6, r7, r11, lr}
-	mov	pc, lr
-.Lfunc_end45:
-	.size	mcl_fp_addNF3L, .Lfunc_end45-mcl_fp_addNF3L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub3L
-	.align	2
-	.type	mcl_fp_sub3L,%function
-mcl_fp_sub3L:                           @ @mcl_fp_sub3L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, lr}
-	push	{r4, r5, r6, lr}
-	ldm	r2, {r12, lr}
-	ldr	r4, [r2, #8]
-	ldm	r1, {r2, r5, r6}
-	subs	r1, r2, r12
-	sbcs	r2, r5, lr
-	sbcs	r12, r6, r4
-	mov	r6, #0
-	sbc	r6, r6, #0
-	stm	r0, {r1, r2, r12}
-	tst	r6, #1
-	popeq	{r4, r5, r6, lr}
-	moveq	pc, lr
-	ldr	r6, [r3]
-	ldr	r5, [r3, #4]
-	ldr	r3, [r3, #8]
-	adds	r1, r6, r1
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	mul	r1, r0, r3
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	umull	r9, r2, r1, r0
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	ldr	r2, [sp, #112]                  @ 4-byte Reload
+	umull	r7, r0, r1, r2
+	mov	r2, r9
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	ldr	r7, [sp, #100]                  @ 4-byte Reload
+	mov	r5, r0
+	umlal	r5, r2, r1, r6
+	umull	r3, r4, r1, r7
+	ldr	r7, [sp, #104]                  @ 4-byte Reload
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	umull	r3, r10, r1, r7
+	ldr	r7, [sp, #120]                  @ 4-byte Reload
+	str	r4, [sp, #16]                   @ 4-byte Spill
+	umull	r12, r8, r1, r7
+	ldr	r7, [sp, #124]                  @ 4-byte Reload
+	umull	lr, r4, r1, r7
+	umull	r11, r7, r1, r6
+	adds	r0, r0, r11
+	ldr	r11, [sp, #108]                 @ 4-byte Reload
+	adcs	r0, r7, r9
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	lr, r0, lr
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r1, r4, r12
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	adcs	r3, r8, r3
+	ldr	r12, [sp, #72]                  @ 4-byte Reload
+	adcs	r7, r10, r0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adc	r6, r0, #0
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adds	r4, r4, r0
+	ldr	r4, [sp, #60]                   @ 4-byte Reload
+	adcs	r5, r4, r5
+	str	r5, [sp, #60]                   @ 4-byte Spill
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
 	adcs	r2, r5, r2
-	adc	r3, r3, r12
-	stm	r0, {r1, r2, r3}
-	pop	{r4, r5, r6, lr}
-	mov	pc, lr
-.Lfunc_end46:
-	.size	mcl_fp_sub3L, .Lfunc_end46-mcl_fp_sub3L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF3L
-	.align	2
-	.type	mcl_fp_subNF3L,%function
-mcl_fp_subNF3L:                         @ @mcl_fp_subNF3L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r11, lr}
-	push	{r4, r5, r6, r7, r11, lr}
-	ldm	r2, {r12, lr}
-	ldr	r2, [r2, #8]
-	ldm	r1, {r4, r5}
-	ldr	r1, [r1, #8]
-	subs	r4, r4, r12
-	sbcs	r7, r5, lr
-	sbc	r1, r1, r2
-	ldm	r3, {r2, r12, lr}
-	asr	r6, r1, #31
-	adds	r2, r4, r2
-	adcs	r3, r7, r12
-	adc	r5, r1, lr
+	str	r2, [sp, #56]                   @ 4-byte Spill
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r2, lr
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r0, [r0, #20]
+	umull	r3, r2, r0, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	umull	r9, r2, r0, r1
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	umull	r3, lr, r0, r12
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	umull	r7, r8, r0, r1
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	umull	r5, r6, r0, r1
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	umull	r2, r4, r0, r1
+	umull	r10, r1, r0, r11
+	str	r10, [sp, #20]                  @ 4-byte Spill
+	adds	r3, r1, r3
+	adcs	r3, lr, r2
+	umlal	r1, r2, r0, r12
+	adcs	lr, r4, r5
+	adcs	r4, r6, r7
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	r6, r8, r9
+	ldr	r5, [sp, #16]                   @ 4-byte Reload
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r5, r5, r3
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adc	r7, r3, #0
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adds	r3, r0, r3
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	str	r3, [sp, #28]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r6, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	mul	r1, r0, r3
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	umull	r9, r2, r1, r0
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	ldr	r2, [sp, #112]                  @ 4-byte Reload
+	umull	r7, r0, r1, r2
+	mov	r2, r9
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	ldr	r7, [sp, #100]                  @ 4-byte Reload
+	mov	r5, r0
+	umlal	r5, r2, r1, r6
+	umull	r3, r4, r1, r7
+	ldr	r7, [sp, #104]                  @ 4-byte Reload
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	umull	r3, r10, r1, r7
+	ldr	r7, [sp, #120]                  @ 4-byte Reload
+	str	r4, [sp, #16]                   @ 4-byte Spill
+	umull	r12, r8, r1, r7
+	ldr	r7, [sp, #124]                  @ 4-byte Reload
+	umull	lr, r4, r1, r7
+	umull	r11, r7, r1, r6
+	adds	r0, r0, r11
+	ldr	r11, [sp, #108]                 @ 4-byte Reload
+	adcs	r0, r7, r9
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	lr, r0, lr
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r1, r4, r12
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	adcs	r3, r8, r3
+	ldr	r12, [sp, #72]                  @ 4-byte Reload
+	adcs	r7, r10, r0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adc	r6, r0, #0
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adds	r4, r4, r0
+	ldr	r4, [sp, #60]                   @ 4-byte Reload
+	adcs	r5, r4, r5
+	str	r5, [sp, #60]                   @ 4-byte Spill
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	str	r2, [sp, #56]                   @ 4-byte Spill
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r2, lr
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r0, [r0, #24]
+	umull	r3, r2, r0, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	str	r2, [sp, #68]                   @ 4-byte Spill
+	umull	r9, r2, r0, r1
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	str	r3, [sp, #28]                   @ 4-byte Spill
+	umull	r3, lr, r0, r12
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	umull	r7, r8, r0, r1
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	umull	r5, r6, r0, r1
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	umull	r2, r4, r0, r1
+	umull	r10, r1, r0, r11
+	str	r10, [sp, #96]                  @ 4-byte Spill
+	adds	r3, r1, r3
+	adcs	r3, lr, r2
+	umlal	r1, r2, r0, r12
+	adcs	lr, r4, r5
+	adcs	r6, r6, r7
+	ldr	r5, [sp, #28]                   @ 4-byte Reload
+	adcs	r7, r8, r9
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	ldr	r4, [sp, #68]                   @ 4-byte Reload
+	adcs	r5, r3, r5
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #96]                   @ 4-byte Reload
+	adc	r4, r4, #0
+	ldr	r9, [sp, #76]                   @ 4-byte Reload
+	adds	r10, r0, r3
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r8, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r6, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r4, [sp, #120]                  @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	mul	r1, r0, r10
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	mov	lr, r0
+	umull	r7, r5, r1, r0
+	umull	r2, r3, r1, r9
+	str	r7, [sp, #116]                  @ 4-byte Spill
+	ldr	r7, [sp, #128]                  @ 4-byte Reload
+	adds	r2, r5, r2
+	umull	r0, r2, r1, r7
+	ldr	r7, [sp, #124]                  @ 4-byte Reload
+	adcs	r3, r3, r0
+	umlal	r5, r0, r1, r9
+	umull	r3, r12, r1, r7
+	adcs	r2, r2, r3
+	str	r2, [sp, #60]                   @ 4-byte Spill
+	umull	r2, r3, r1, r4
+	adcs	r4, r12, r2
+	umull	r7, r2, r1, r8
+	adcs	r11, r3, r7
+	umull	r7, r3, r1, r6
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adcs	r12, r2, r7
+	mov	r7, r9
+	adc	r9, r3, #0
+	adds	r1, r10, r1
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r2, [sp, #96]                   @ 4-byte Reload
+	adcs	r1, r1, r5
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	adcs	r2, r2, r0
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r5, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	adcs	r3, r5, r4
+	ldr	r5, [sp, #84]                   @ 4-byte Reload
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	adcs	r5, r5, r11
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	adcs	r11, r4, r12
+	ldr	r4, [sp, #72]                   @ 4-byte Reload
+	str	r2, [sp, #108]                  @ 4-byte Spill
+	adcs	r12, r4, r9
+	ldr	r4, [sp, #68]                   @ 4-byte Reload
+	str	r3, [sp, #92]                   @ 4-byte Spill
+	adc	r9, r4, #0
+	subs	r10, r1, lr
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	sbcs	lr, r2, r7
+	sbcs	r7, r0, r1
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	sbcs	r4, r3, r0
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	sbcs	r1, r5, r0
+	sbcs	r2, r11, r8
+	sbcs	r0, r12, r6
+	sbc	r6, r9, #0
+	ands	r6, r6, #1
+	movne	r0, r12
+	movne	r2, r11
+	str	r0, [r3, #24]
+	movne	r1, r5
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	cmp	r6, #0
-	movge	r2, r4
-	movge	r3, r7
-	movge	r5, r1
-	stm	r0, {r2, r3, r5}
-	pop	{r4, r5, r6, r7, r11, lr}
-	mov	pc, lr
-.Lfunc_end47:
-	.size	mcl_fp_subNF3L, .Lfunc_end47-mcl_fp_subNF3L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add3L
-	.align	2
-	.type	mcl_fpDbl_add3L,%function
-mcl_fpDbl_add3L:                        @ @mcl_fpDbl_add3L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldm	r1, {r12, lr}
-	ldr	r7, [r2]
-	ldr	r11, [r1, #8]
-	ldr	r9, [r1, #12]
-	ldr	r10, [r1, #16]
-	ldr	r8, [r1, #20]
-	ldmib	r2, {r1, r5, r6}
-	ldr	r4, [r2, #16]
-	ldr	r2, [r2, #20]
-	adds	r7, r7, r12
-	adcs	r1, r1, lr
-	str	r7, [r0]
-	str	r1, [r0, #4]
-	adcs	r1, r5, r11
-	ldr	r5, [r3]
-	adcs	r7, r6, r9
-	str	r1, [r0, #8]
-	mov	r1, #0
-	adcs	r6, r4, r10
-	ldr	r4, [r3, #4]
-	ldr	r3, [r3, #8]
-	adcs	r2, r2, r8
-	adc	r1, r1, #0
-	subs	r5, r7, r5
-	sbcs	r4, r6, r4
-	sbcs	r3, r2, r3
-	sbc	r1, r1, #0
-	ands	r1, r1, #1
-	movne	r5, r7
-	movne	r4, r6
-	movne	r3, r2
-	str	r5, [r0, #12]
-	str	r4, [r0, #16]
-	str	r3, [r0, #20]
+	str	r2, [r3, #20]
+	str	r1, [r3, #16]
+	movne	r4, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	str	r4, [r3, #12]
+	movne	r7, r0
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	str	r7, [r3, #8]
+	movne	lr, r0
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	cmp	r6, #0
+	str	lr, [r3, #4]
+	movne	r10, r0
+	str	r10, [r3]
+	add	sp, sp, #132
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end48:
-	.size	mcl_fpDbl_add3L, .Lfunc_end48-mcl_fpDbl_add3L
+.Lfunc_end26:
+	.size	mcl_fp_mont7L, .Lfunc_end26-mcl_fp_mont7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_sub3L
-	.align	2
-	.type	mcl_fpDbl_sub3L,%function
-mcl_fpDbl_sub3L:                        @ @mcl_fpDbl_sub3L
+                                        @ -- End function
+	.globl	mcl_fp_montNF7L                 @ -- Begin function mcl_fp_montNF7L
+	.p2align	2
+	.type	mcl_fp_montNF7L,%function
+	.code	32                              @ @mcl_fp_montNF7L
+mcl_fp_montNF7L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldm	r2, {r12, lr}
+	.pad	#104
+	sub	sp, sp, #104
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r0, r2
+	str	r2, [sp, #40]                   @ 4-byte Spill
+	ldm	r2, {r4, r12}
+	ldr	r6, [r1, #4]
 	ldr	r7, [r1]
-	ldr	r11, [r2, #8]
-	ldr	r9, [r2, #12]
-	ldr	r10, [r2, #16]
-	ldr	r8, [r2, #20]
-	ldmib	r1, {r2, r5, r6}
-	ldr	r4, [r1, #16]
-	ldr	r1, [r1, #20]
-	subs	r7, r7, r12
-	sbcs	r2, r2, lr
-	str	r7, [r0]
-	str	r2, [r0, #4]
-	sbcs	r2, r5, r11
-	ldr	r5, [r3]
-	sbcs	r7, r6, r9
-	str	r2, [r0, #8]
-	mov	r2, #0
-	sbcs	r6, r4, r10
-	ldr	r4, [r3, #4]
-	ldr	r3, [r3, #8]
-	sbcs	r1, r1, r8
-	sbc	r2, r2, #0
-	adds	r5, r7, r5
-	adcs	r4, r6, r4
-	adc	r3, r1, r3
-	ands	r2, r2, #1
-	moveq	r5, r7
-	moveq	r4, r6
-	moveq	r3, r1
-	str	r5, [r0, #12]
-	str	r4, [r0, #16]
-	str	r3, [r0, #20]
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end49:
-	.size	mcl_fpDbl_sub3L, .Lfunc_end49-mcl_fpDbl_sub3L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre4L
-	.align	2
-	.type	mcl_fp_mulUnitPre4L,%function
-mcl_fp_mulUnitPre4L:                    @ @mcl_fp_mulUnitPre4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r11, lr}
-	push	{r4, r5, r6, r7, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r1, [r1, #12]
-	umull	r4, r6, r12, r2
-	umull	r7, r12, lr, r2
-	str	r4, [r0]
-	mov	r5, r6
-	mov	r4, r7
-	umlal	r5, r4, r3, r2
-	str	r5, [r0, #4]
-	str	r4, [r0, #8]
-	umull	r5, lr, r1, r2
-	umull	r1, r4, r3, r2
-	adds	r1, r6, r1
-	adcs	r1, r4, r7
-	adcs	r1, r12, r5
-	str	r1, [r0, #12]
-	adc	r1, lr, #0
-	str	r1, [r0, #16]
-	pop	{r4, r5, r6, r7, r11, lr}
-	mov	pc, lr
-.Lfunc_end50:
-	.size	mcl_fp_mulUnitPre4L, .Lfunc_end50-mcl_fp_mulUnitPre4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre4L
-	.align	2
-	.type	mcl_fpDbl_mulPre4L,%function
-mcl_fpDbl_mulPre4L:                     @ @mcl_fpDbl_mulPre4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#40
-	sub	sp, sp, #40
-	mov	lr, r2
-	ldr	r11, [r1]
-	ldr	r4, [lr]
-	ldmib	r1, {r8, r12}
-	ldr	r3, [r1, #12]
-	umull	r2, r7, r11, r4
-	umull	r6, r9, r8, r4
-	str	r12, [sp]               @ 4-byte Spill
-	adds	r6, r7, r6
-	str	r2, [sp, #36]           @ 4-byte Spill
-	mov	r2, r3
-	umull	r6, r10, r12, r4
-	adcs	r5, r9, r6
-	umlal	r7, r6, r8, r4
-	umull	r5, r9, r3, r4
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	ldr	r4, [lr, #4]
-	adcs	r10, r10, r5
-	str	r3, [r0]
-	adc	r3, r9, #0
-	str	r3, [sp, #24]           @ 4-byte Spill
-	umull	r5, r3, r11, r4
-	adds	r7, r5, r7
-	str	r3, [sp, #32]           @ 4-byte Spill
-	str	r7, [sp, #36]           @ 4-byte Spill
-	umull	r7, r3, r8, r4
-	str	r3, [sp, #28]           @ 4-byte Spill
-	adcs	r3, r7, r6
-	umull	r7, r9, r12, r4
-	mov	r12, r2
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r7, r10
-	umull	r5, r10, r2, r4
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	mov	r4, #0
-	adcs	r5, r5, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adc	r4, r4, #0
-	adds	r6, r3, r6
-	adcs	r7, r7, r2
-	ldr	r2, [lr, #12]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	adcs	r7, r5, r9
-	str	r7, [sp, #20]           @ 4-byte Spill
-	adc	r7, r4, r10
-	ldr	r4, [lr, #8]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	str	r7, [r0, #4]
-	umull	r5, r7, r11, r4
-	adds	r5, r5, r6
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r5, [r0, #8]
-	ldm	r1, {r11, lr}
 	ldr	r5, [r1, #8]
-	ldr	r1, [r1, #12]
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	umull	r6, r7, r1, r2
-	umull	r10, r1, r5, r2
-	str	r1, [sp, #32]           @ 4-byte Spill
-	umull	r5, r1, lr, r2
-	str	r6, [sp, #8]            @ 4-byte Spill
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	umull	r6, r1, r11, r2
-	umull	r2, r11, r12, r4
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [sp]                @ 4-byte Reload
-	umull	lr, r12, r1, r4
-	umull	r9, r1, r8, r4
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	mov	r8, #0
-	adcs	r3, r9, r3
-	adcs	r4, lr, r4
-	adcs	r2, r2, r7
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adc	lr, r8, #0
-	adds	r3, r3, r7
-	adcs	r1, r4, r1
-	adcs	r2, r2, r12
-	adc	r4, lr, r11
-	adds	r3, r6, r3
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	str	r3, [r0, #12]
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r5, r1
-	adcs	r2, r10, r2
-	adcs	r3, r3, r4
-	adc	r7, r8, #0
-	adds	r1, r1, r6
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	str	r1, [r0, #20]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r3, r1
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adc	r1, r7, r1
-	str	r1, [r0, #28]
-	add	sp, sp, #40
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end51:
-	.size	mcl_fpDbl_mulPre4L, .Lfunc_end51-mcl_fpDbl_mulPre4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre4L
-	.align	2
-	.type	mcl_fpDbl_sqrPre4L,%function
-mcl_fpDbl_sqrPre4L:                     @ @mcl_fpDbl_sqrPre4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	ldm	r1, {r2, r3, r12}
-	ldr	r8, [r1, #12]
-	umull	r4, r6, r2, r2
-	umull	r11, lr, r12, r2
-	str	r4, [r0]
-	umull	r10, r4, r8, r2
-	mov	r7, r11
-	mov	r5, r6
-	str	lr, [sp, #12]           @ 4-byte Spill
-	str	r4, [sp, #8]            @ 4-byte Spill
-	umull	r4, r9, r3, r2
-	umlal	r5, r7, r3, r2
-	adds	r2, r6, r4
-	adcs	r2, r9, r11
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r10, lr, r10
-	adc	r2, r2, #0
-	adds	r4, r4, r5
-	str	r2, [sp]                @ 4-byte Spill
-	umull	r6, r2, r3, r3
-	str	r4, [sp, #8]            @ 4-byte Spill
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [sp]                @ 4-byte Reload
-	adcs	r5, r6, r7
-	umull	r6, r7, r12, r3
-	adcs	lr, r6, r10
-	umull	r4, r10, r8, r3
-	adcs	r3, r4, r2
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	mov	r4, #0
-	adc	r4, r4, #0
-	adds	r5, r5, r9
-	adcs	r9, lr, r2
-	adcs	r2, r3, r7
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	adc	r4, r4, r10
-	adds	r5, r11, r5
-	str	r2, [sp, #4]            @ 4-byte Spill
-	umull	r2, r10, r8, r12
-	umull	lr, r8, r12, r12
-	adcs	r6, r6, r9
-	stmib	r0, {r3, r5}
-	mov	r5, #0
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	adcs	r3, lr, r3
-	adcs	r2, r2, r4
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	adc	r5, r5, #0
-	adds	r6, r6, r4
-	adcs	r11, r3, r7
-	adcs	lr, r2, r8
-	adc	r8, r5, r10
-	ldr	r5, [r1]
-	ldmib	r1, {r4, r7}
-	ldr	r1, [r1, #12]
-	umull	r12, r2, r1, r1
-	umull	r3, r9, r7, r1
-	umull	r7, r10, r4, r1
-	str	r2, [sp, #12]           @ 4-byte Spill
-	umull	r4, r2, r5, r1
-	adds	r1, r4, r6
-	adcs	r4, r7, r11
-	str	r1, [r0, #12]
-	mov	r7, #0
-	adcs	r3, r3, lr
-	adcs	r1, r12, r8
-	adc	r7, r7, #0
-	adds	r2, r4, r2
-	str	r2, [r0, #16]
-	adcs	r2, r3, r10
-	adcs	r1, r1, r9
-	str	r2, [r0, #20]
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adc	r1, r7, r1
-	str	r1, [r0, #28]
-	add	sp, sp, #16
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end52:
-	.size	mcl_fpDbl_sqrPre4L, .Lfunc_end52-mcl_fpDbl_sqrPre4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont4L
-	.align	2
-	.type	mcl_fp_mont4L,%function
-mcl_fp_mont4L:                          @ @mcl_fp_mont4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#76
-	sub	sp, sp, #76
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r2, #8]
-	ldr	r9, [r2]
-	ldr	r8, [r2, #4]
-	ldr	r6, [r3, #-4]
-	ldr	r11, [r1, #8]
-	ldr	r10, [r1, #12]
-	ldr	r7, [r3, #8]
-	ldr	r5, [r3, #4]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r2, #12]
-	ldr	r2, [r1, #4]
-	str	r6, [sp, #44]           @ 4-byte Spill
-	str	r7, [sp, #40]           @ 4-byte Spill
-	str	r5, [sp, #52]           @ 4-byte Spill
-	str	r11, [sp, #60]          @ 4-byte Spill
-	str	r10, [sp, #56]          @ 4-byte Spill
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1]
-	ldr	r1, [r3]
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldr	r3, [r3, #12]
-	umull	r4, r2, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	str	r1, [sp, #48]           @ 4-byte Spill
-	mul	r0, r4, r6
-	str	r4, [sp, #24]           @ 4-byte Spill
-	mov	r4, r5
-	umull	lr, r6, r0, r7
-	umull	r7, r12, r0, r1
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	str	r6, [sp, #16]           @ 4-byte Spill
-	mov	r6, r12
-	str	lr, [sp, #8]            @ 4-byte Spill
-	umlal	r6, lr, r0, r5
-	umull	r5, r1, r10, r9
-	str	r1, [sp, #68]           @ 4-byte Spill
-	str	r5, [sp, #12]           @ 4-byte Spill
-	umull	r1, r10, r11, r9
-	umull	r11, r5, r7, r9
-	adds	r7, r2, r11
-	adcs	r5, r5, r1
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adcs	r11, r10, r5
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	str	r3, [sp, #68]           @ 4-byte Spill
-	adc	r5, r5, #0
-	str	r5, [sp, #12]           @ 4-byte Spill
-	umull	r5, r7, r0, r3
-	umull	r10, r3, r0, r4
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adds	r0, r12, r10
-	mov	r12, #0
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r3, r0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	adc	r3, r7, #0
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adds	r4, r5, r4
-	umlal	r2, r1, r7, r9
-	adcs	r2, r6, r2
-	adcs	r1, lr, r1
-	str	r2, [sp, #24]           @ 4-byte Spill
-	adcs	r9, r0, r11
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #20]           @ 4-byte Spill
-	adcs	r6, r3, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	mov	r3, r7
-	adc	r10, r12, #0
-	umull	r2, r12, r8, r7
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	umull	r5, r4, r8, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	umull	r1, lr, r8, r0
-	umull	r11, r0, r8, r7
-	adds	r2, r0, r2
-	adcs	r2, r12, r1
-	umlal	r0, r1, r8, r3
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	ldr	r8, [sp, #48]           @ 4-byte Reload
-	adcs	r2, lr, r5
-	adc	r5, r4, #0
-	adds	r7, r3, r11
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	ldr	r11, [sp, #40]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adcs	r0, r9, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adcs	r0, r6, r2
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adcs	r0, r10, r5
-	ldr	r10, [sp, #44]          @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	mul	r5, r7, r10
-	umull	r6, r0, r5, r11
-	str	r0, [sp]                @ 4-byte Spill
-	umull	r0, r3, r5, r8
-	mov	r4, r6
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	mov	r2, r3
-	umlal	r2, r4, r5, r1
-	umull	r9, r12, r5, r0
-	umull	lr, r0, r5, r1
-	adds	r3, r3, lr
-	adcs	r0, r0, r6
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	ldr	r0, [sp]                @ 4-byte Reload
-	adcs	r0, r0, r9
-	adc	r1, r12, #0
-	adds	r3, r3, r7
-	ldr	r12, [sp, #64]          @ 4-byte Reload
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	adcs	r2, r2, r3
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	umull	r9, r7, r3, r12
-	adcs	r2, r4, r2
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	umull	r6, r5, r3, r0
-	umull	r0, r4, r3, r1
-	umull	r1, lr, r3, r2
-	adds	r1, r7, r1
-	adcs	r1, lr, r0
-	umlal	r7, r0, r3, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r4, r6
-	adc	r6, r5, #0
-	adds	r3, r2, r9
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r2, r7
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
+	umull	r9, r8, r6, r4
+	mov	r11, r6
+	str	r6, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r0, #12]
+	umull	lr, r10, r7, r4
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [r1, #12]
+	str	r7, [sp, #96]                   @ 4-byte Spill
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	str	r5, [sp, #80]                   @ 4-byte Spill
+	adds	r6, r10, r9
+	ldr	r2, [r2, #8]
+	umull	r6, r9, r5, r4
+	ldr	r5, [r1, #20]
+	str	r5, [sp, #48]                   @ 4-byte Spill
+	str	r2, [sp]                        @ 4-byte Spill
+	adcs	r7, r8, r6
+	umlal	r10, r6, r11, r4
+	umull	r7, r8, r0, r4
+	adcs	r0, r9, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [r1, #16]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	umull	r7, r9, r0, r4
+	adcs	r0, r8, r7
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	umull	r7, r0, r5, r4
+	adcs	r5, r9, r7
+	str	r5, [sp, #76]                   @ 4-byte Spill
+	ldr	r5, [r1, #24]
+	str	r5, [sp, #72]                   @ 4-byte Spill
+	ldr	r7, [r3, #4]
+	umull	r1, r9, r5, r4
+	ldr	r5, [r3]
+	str	r5, [sp, #44]                   @ 4-byte Spill
+	str	r7, [sp, #56]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
+	ldr	r1, [r3, #-4]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	adc	r0, r9, #0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	mul	r0, r1, lr
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r9, [r3, #8]
+	str	r9, [sp, #100]                  @ 4-byte Spill
+	umull	r1, r2, r0, r5
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	adds	r1, lr, r1
+	umull	r1, lr, r0, r7
+	adcs	r11, r10, r1
+	umull	r5, r1, r0, r9
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [r3, #12]
+	adcs	r9, r6, r5
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	umull	r5, r10, r0, r1
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	adcs	r7, r1, r5
+	ldr	r1, [r3, #16]
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	umull	r5, r8, r0, r1
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r4, r1, r5
+	ldr	r1, [r3, #20]
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	umull	r5, r6, r0, r1
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	adcs	r5, r1, r5
+	ldr	r1, [r3, #24]
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	umull	r3, r2, r0, r1
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	adc	r3, r1, #0
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adds	r11, r11, r1
+	adcs	r1, r9, lr
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r1, r7, r1
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	adcs	r1, r4, r10
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	adcs	r1, r5, r8
+	str	r1, [sp, #16]                   @ 4-byte Spill
 	adcs	r0, r0, r6
-	mul	r6, r3, r10
-	str	r0, [sp, #16]           @ 4-byte Spill
-	mov	r0, #0
-	umull	r7, r9, r6, r11
-	umull	r10, r4, r6, r8
-	adc	r0, r0, #0
-	mov	r2, r4
-	mov	r5, r7
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	umlal	r2, r5, r6, r1
-	umull	r8, r12, r6, r0
-	umull	lr, r0, r6, r1
-	adds	r6, r4, lr
-	adcs	r0, r0, r7
-	adcs	r0, r9, r8
-	adc	r1, r12, #0
-	adds	r3, r10, r3
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adcs	r2, r2, r3
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r8, r5, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adcs	r9, r0, r2
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	umull	lr, r7, r3, r5
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	umull	r6, r10, r3, r0
-	umull	r0, r4, r3, r1
-	umull	r1, r12, r3, r2
-	adds	r1, r7, r1
-	adcs	r1, r12, r0
-	umlal	r7, r0, r3, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	ldr	r12, [sp, #68]          @ 4-byte Reload
-	adcs	r1, r4, r6
-	ldr	r4, [sp, #40]           @ 4-byte Reload
-	adc	r6, r10, #0
-	adds	lr, r2, lr
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	adcs	r10, r8, r7
-	adcs	r0, r9, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	adcs	r0, r11, r1
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r8, r0, r6
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	mul	r6, lr, r0
-	umull	r1, r3, r6, r5
-	umull	r11, r7, r6, r2
-	umull	r0, r9, r6, r4
-	adds	r1, r7, r1
-	adcs	r1, r3, r0
-	umlal	r7, r0, r6, r5
-	umull	r1, r3, r6, r12
-	adcs	r1, r9, r1
-	mov	r9, r5
-	adc	r5, r3, #0
-	adds	r3, r11, lr
-	adcs	r3, r7, r10
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	adcs	lr, r5, r8
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	adc	r8, r5, #0
-	subs	r6, r3, r2
-	sbcs	r5, r0, r9
-	sbcs	r4, r1, r4
-	sbcs	r7, lr, r12
-	sbc	r2, r8, #0
-	ands	r2, r2, #1
-	movne	r5, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	movne	r6, r3
-	movne	r4, r1
-	cmp	r2, #0
-	movne	r7, lr
-	str	r6, [r0]
-	str	r5, [r0, #4]
-	str	r4, [r0, #8]
-	str	r7, [r0, #12]
-	add	sp, sp, #76
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end53:
-	.size	mcl_fp_mont4L, .Lfunc_end53-mcl_fp_mont4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF4L
-	.align	2
-	.type	mcl_fp_montNF4L,%function
-mcl_fp_montNF4L:                        @ @mcl_fp_montNF4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#140
-	sub	sp, sp, #140
-	mov	r10, r3
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	lr, [r1]
-	ldmib	r1, {r4, r8, r12}
-	ldr	r3, [r2]
-	ldr	r1, [r2, #4]
-	ldr	r0, [r2, #8]
-	ldr	r2, [r2, #12]
-	umull	r6, r5, r2, r8
-	str	r5, [sp, #124]          @ 4-byte Spill
-	umull	r5, r7, r2, lr
-	str	r6, [sp, #112]          @ 4-byte Spill
-	str	r5, [sp, #128]          @ 4-byte Spill
-	mov	r5, r6
-	mov	r6, r7
-	str	r7, [sp, #108]          @ 4-byte Spill
-	umlal	r6, r5, r2, r4
-	str	r5, [sp, #120]          @ 4-byte Spill
-	umull	r7, r5, r0, r8
-	str	r6, [sp, #116]          @ 4-byte Spill
-	str	r5, [sp, #84]           @ 4-byte Spill
-	umull	r5, r6, r0, lr
-	str	r7, [sp, #72]           @ 4-byte Spill
-	str	r5, [sp, #88]           @ 4-byte Spill
-	str	r6, [sp, #68]           @ 4-byte Spill
-	mov	r5, r6
-	mov	r6, r7
-	umlal	r5, r6, r0, r4
-	str	r5, [sp, #76]           @ 4-byte Spill
-	str	r6, [sp, #80]           @ 4-byte Spill
-	umull	r6, r5, r1, r8
-	str	r5, [sp, #44]           @ 4-byte Spill
-	umull	r5, r7, r1, lr
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r5, [sp, #48]           @ 4-byte Spill
-	mov	r5, r6
-	mov	r6, r7
-	str	r7, [sp, #28]           @ 4-byte Spill
-	umlal	r6, r5, r1, r4
-	str	r5, [sp, #40]           @ 4-byte Spill
-	umull	r9, r5, r8, r3
-	str	r6, [sp, #36]           @ 4-byte Spill
-	str	r5, [sp, #136]          @ 4-byte Spill
-	umull	r6, r5, lr, r3
-	mov	r8, r9
-	str	r6, [sp, #4]            @ 4-byte Spill
-	umull	r11, r6, r2, r12
-	mov	lr, r5
-	str	r6, [sp, #104]          @ 4-byte Spill
-	umull	r7, r6, r2, r4
-	umlal	lr, r8, r4, r3
-	str	r11, [sp, #100]         @ 4-byte Spill
-	str	r6, [sp, #96]           @ 4-byte Spill
-	umull	r6, r2, r0, r12
-	str	r7, [sp, #92]           @ 4-byte Spill
-	str	r6, [sp, #60]           @ 4-byte Spill
-	str	r2, [sp, #64]           @ 4-byte Spill
-	umull	r6, r2, r0, r4
-	str	r2, [sp, #56]           @ 4-byte Spill
-	umull	r2, r0, r1, r12
-	str	r6, [sp, #52]           @ 4-byte Spill
-	str	r2, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #24]           @ 4-byte Spill
-	umull	r2, r0, r1, r4
-	str	r2, [sp, #12]           @ 4-byte Spill
-	umull	r2, r6, r4, r3
-	str	r0, [sp, #16]           @ 4-byte Spill
-	umull	r0, r1, r12, r3
-	ldr	r4, [r10, #4]
-	adds	r2, r5, r2
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	adcs	r2, r6, r9
-	ldr	r9, [r10, #8]
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	str	r4, [sp, #136]          @ 4-byte Spill
-	adcs	r12, r2, r0
-	ldr	r2, [r10, #-4]
-	adc	r0, r1, #0
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [r10]
-	mul	r1, r5, r2
-	mov	r7, r2
-	umull	r3, r11, r1, r0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	mov	r6, r0
-	umull	r2, r0, r1, r9
-	adds	r3, r3, r5
-	umull	r3, r5, r1, r4
-	adcs	r3, r3, lr
-	ldr	lr, [r10, #12]
-	adcs	r2, r2, r8
-	umull	r4, r8, r1, lr
-	adcs	r1, r4, r12
-	ldr	r4, [sp]                @ 4-byte Reload
-	adc	r4, r4, #0
-	adds	r3, r3, r11
-	adcs	r2, r2, r5
-	adcs	r12, r1, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r1, r4, r8
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	adds	r4, r0, r4
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	adcs	r4, r0, r4
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r5, r0, #0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adc	r0, r3, r2
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	umull	r3, r4, r12, r2
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	ldr	r5, [sp, #52]                   @ 4-byte Reload
+	umull	r9, r0, r12, r1
 	adds	r3, r0, r3
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r0, r2
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	mov	r12, r7
-	adcs	r8, r4, r1
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	adc	r10, r5, #0
-	mul	r5, r3, r7
-	umull	r7, r11, r5, r6
-	adds	r3, r7, r3
-	umull	r3, r7, r5, r1
-	adcs	r2, r3, r2
-	umull	r3, r4, r5, r9
-	adcs	r0, r3, r0
-	umull	r3, r6, r5, lr
-	adcs	r3, r3, r8
-	ldr	r8, [sp, #8]            @ 4-byte Reload
-	adc	r5, r10, #0
-	adds	r2, r2, r11
+	umull	r1, r3, r12, r7
+	ldr	r7, [sp, #48]                   @ 4-byte Reload
+	adcs	r4, r4, r1
+	umlal	r0, r1, r12, r2
+	umull	r4, r6, r12, r5
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	adcs	r10, r3, r4
+	umull	r4, r3, r12, r5
+	adcs	r8, r6, r4
+	umull	r6, r4, r12, r7
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	adcs	r5, r3, r6
+	umull	r6, r3, r12, r7
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	adcs	r4, r4, r6
+	adc	r2, r3, #0
+	adds	r3, r9, r11
 	adcs	r0, r0, r7
-	adcs	r3, r3, r4
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	adc	r7, r5, r6
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	adds	r4, r4, r5
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	adcs	r4, r5, r4
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	ldr	r4, [sp, #84]           @ 4-byte Reload
-	adcs	r4, r4, r5
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adc	r5, r5, #0
-	adds	r2, r6, r2
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #80]           @ 4-byte Reload
+	ldr	r7, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r1, r7
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	adcs	r6, r10, r7
+	ldr	r7, [sp, #16]                   @ 4-byte Reload
+	adcs	r11, r8, r7
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	ldr	r8, [sp, #56]                   @ 4-byte Reload
+	adcs	r7, r5, r7
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	adcs	r7, r4, r5
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	adc	r2, r2, #0
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	str	r7, [sp, #20]                   @ 4-byte Spill
+	mul	r2, r5, r3
+	ldr	r5, [sp, #44]                   @ 4-byte Reload
+	umull	r4, r7, r2, r5
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	adds	r3, r3, r4
+	ldr	r4, [sp, #24]                   @ 4-byte Reload
+	umull	r3, r7, r2, r8
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	adcs	lr, r0, r3
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	umull	r3, r7, r2, r0
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	adcs	r12, r1, r3
+	umull	r3, r10, r2, r0
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r3, r6, r3
-	adcs	r6, r4, r7
-	adc	r10, r5, #0
-	mul	r5, r2, r12
-	umull	r7, r11, r5, r8
-	adds	r2, r7, r2
-	umull	r2, r7, r5, r1
-	adcs	r0, r2, r0
-	umull	r2, r4, r5, r9
-	adcs	r2, r2, r3
-	umull	r3, r1, r5, lr
-	adcs	r3, r3, r6
-	ldr	r6, [sp, #128]          @ 4-byte Reload
-	adc	r5, r10, #0
-	adds	r0, r0, r11
+	umull	r6, r9, r2, r0
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r5, r11, r6
+	umull	r6, r1, r2, r0
+	ldr	r11, [sp, #76]                  @ 4-byte Reload
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r6, r0, r6
+	umull	r7, r0, r2, r11
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
 	adcs	r2, r2, r7
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	adc	r7, r7, #0
+	adds	r4, lr, r4
+	str	r4, [sp, #28]                   @ 4-byte Spill
+	ldr	r4, [sp, #12]                   @ 4-byte Reload
+	adcs	r4, r12, r4
+	str	r4, [sp, #24]                   @ 4-byte Spill
+	ldr	r4, [sp, #8]                    @ 4-byte Reload
+	ldr	r12, [sp, #60]                  @ 4-byte Reload
 	adcs	r3, r3, r4
-	ldr	r4, [sp, #108]          @ 4-byte Reload
-	adc	r1, r5, r1
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	adds	r4, r4, r5
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	ldr	r4, [sp, #112]          @ 4-byte Reload
-	adcs	r4, r5, r4
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	ldr	r4, [sp, #124]          @ 4-byte Reload
-	adcs	r4, r4, r5
-	ldr	r5, [sp, #104]          @ 4-byte Reload
-	adc	r5, r5, #0
-	adds	r0, r6, r0
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	adcs	r2, r6, r2
-	ldr	r6, [sp, #120]          @ 4-byte Reload
+	str	r3, [sp, #20]                   @ 4-byte Spill
+	adcs	r3, r5, r10
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	adcs	r3, r6, r9
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	adcs	r1, r2, r1
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	adc	r0, r7, r0
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	ldr	r0, [sp]                        @ 4-byte Reload
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	umull	r2, r6, r0, r12
+	ldr	r5, [sp, #52]                   @ 4-byte Reload
+	ldr	r4, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #48]                   @ 4-byte Reload
+	umull	r11, lr, r0, r1
+	ldr	r9, [sp, #72]                   @ 4-byte Reload
+	adds	r2, lr, r2
+	umull	r1, r2, r0, r3
+	adcs	r6, r6, r1
+	umlal	lr, r1, r0, r12
+	umull	r6, r3, r0, r5
+	adcs	r5, r2, r6
+	umull	r6, r2, r0, r4
+	adcs	r10, r3, r6
+	umull	r6, r3, r0, r7
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	adcs	r4, r2, r6
+	umull	r6, r2, r0, r9
+	ldr	r9, [sp, #44]                   @ 4-byte Reload
+	adcs	r3, r3, r6
+	ldr	r6, [sp, #24]                   @ 4-byte Reload
+	adc	r2, r2, #0
+	adds	r7, r11, r7
+	adcs	r0, lr, r6
+	ldr	r6, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r1, r6
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adcs	r6, r5, r6
+	ldr	r5, [sp, #12]                   @ 4-byte Reload
+	adcs	r11, r10, r5
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	adcs	r10, r4, r5
+	ldr	r5, [sp, #4]                    @ 4-byte Reload
+	ldr	r4, [sp, #92]                   @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #28]                   @ 4-byte Spill
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	adc	r2, r2, #0
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	mul	r2, r3, r7
+	umull	r3, r5, r2, r9
+	str	r5, [sp, #20]                   @ 4-byte Spill
+	adds	r3, r7, r3
+	umull	r3, r7, r2, r8
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r7, [sp, #100]                  @ 4-byte Reload
+	adcs	r8, r0, r3
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	umull	r3, lr, r2, r7
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	umull	r3, r12, r2, r4
+	ldr	r4, [sp, #88]                   @ 4-byte Reload
 	adcs	r3, r6, r3
-	adcs	r11, r4, r1
-	adc	r10, r5, #0
-	mul	r5, r0, r12
-	umull	r7, r1, r5, r8
-	adds	r0, r7, r0
-	ldr	r7, [sp, #136]          @ 4-byte Reload
-	umull	r0, r12, r5, r9
-	umull	r6, r4, r5, r7
-	adcs	r2, r6, r2
-	adcs	r0, r0, r3
-	umull	r3, r6, r5, lr
-	adcs	r3, r3, r11
-	adc	r5, r10, #0
-	adds	r1, r2, r1
-	adcs	r0, r0, r4
-	adcs	r2, r3, r12
-	adc	r3, r5, r6
-	subs	r4, r1, r8
-	sbcs	r7, r0, r7
-	sbcs	r6, r2, r9
-	sbc	r5, r3, lr
-	cmp	r5, #0
-	movlt	r7, r0
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	movlt	r4, r1
-	movlt	r6, r2
-	cmp	r5, #0
-	movlt	r5, r3
-	stm	r0, {r4, r7}
-	str	r6, [r0, #8]
-	str	r5, [r0, #12]
-	add	sp, sp, #140
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end54:
-	.size	mcl_fp_montNF4L, .Lfunc_end54-mcl_fp_montNF4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed4L
-	.align	2
-	.type	mcl_fp_montRed4L,%function
-mcl_fp_montRed4L:                       @ @mcl_fp_montRed4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#60
-	sub	sp, sp, #60
-	ldr	r7, [r1, #4]
-	ldr	r6, [r2, #-4]
-	ldr	r10, [r1]
-	ldr	r3, [r2, #8]
-	ldr	r8, [r2]
-	ldr	r12, [r2, #4]
-	ldr	r2, [r2, #12]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r1, #8]
-	str	r6, [sp, #56]           @ 4-byte Spill
-	str	r3, [sp, #40]           @ 4-byte Spill
-	str	r2, [sp, #36]           @ 4-byte Spill
-	str	r8, [sp, #32]           @ 4-byte Spill
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r1, #12]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	mul	r7, r10, r6
-	umull	r6, r5, r7, r3
-	str	r5, [sp, #20]           @ 4-byte Spill
-	mov	r5, r3
-	umull	r4, r3, r7, r8
-	mov	lr, r6
-	str	r4, [sp, #24]           @ 4-byte Spill
-	umull	r9, r4, r7, r2
-	umull	r11, r2, r7, r12
-	mov	r0, r3
-	adds	r3, r3, r11
-	umlal	r0, lr, r7, r12
-	adcs	r2, r2, r6
-	ldr	r6, [sp, #56]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r2, r9
-	str	r2, [sp, #20]           @ 4-byte Spill
-	adc	r2, r4, #0
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adds	r4, r10, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	add	r10, r1, #16
-	adcs	r11, r2, r0
-	mul	r4, r11, r6
-	umull	r9, r0, r4, r5
-	str	r0, [sp, #24]           @ 4-byte Spill
-	umull	r0, r2, r4, r8
-	mov	r5, r9
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #28]
-	mov	r7, r2
-	umlal	r7, r5, r4, r12
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r1, r8, r10}
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r3, lr
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r3, r0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	str	r3, [sp, #48]           @ 4-byte Spill
-	adcs	r1, r1, r0
-	adcs	r0, r8, #0
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r8, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adcs	r0, r10, #0
-	ldr	r10, [sp, #36]          @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	umull	r1, lr, r4, r10
-	adcs	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	umull	r3, r0, r4, r12
-	adds	r3, r2, r3
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #40]           @ 4-byte Reload
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adc	r1, lr, #0
-	adds	r2, r2, r11
-	adcs	r11, r7, r0
-	mul	r3, r11, r6
-	umull	r2, r0, r3, r9
-	str	r0, [sp, #24]           @ 4-byte Spill
-	umull	r0, r6, r3, r8
-	mov	r7, r2
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	mov	r4, r6
-	umlal	r4, r7, r3, r12
-	adcs	r0, r5, r0
-	ldr	r5, [sp]                @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
+	umull	r6, r5, r2, r4
+	adcs	r6, r11, r6
+	umull	r4, r11, r2, r7
+	adcs	r4, r10, r4
+	umull	r7, r10, r2, r0
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r2, r0, r7
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adc	r7, r0, #0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adds	r0, r8, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	ldr	r8, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r1, r0
-	umull	r1, r5, r3, r10
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	umull	lr, r0, r3, r12
-	adds	r3, r6, lr
-	mov	lr, r8
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	adc	r1, r5, #0
-	adds	r2, r2, r11
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	adcs	r2, r4, r2
-	adcs	r3, r7, r3
-	str	r3, [sp, #48]           @ 4-byte Spill
-	ldr	r3, [sp, #20]           @ 4-byte Reload
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r3, lr
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	adcs	r0, r6, r12
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adcs	r0, r4, r5
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r6, [sp, #32]                   @ 4-byte Reload
+	ldr	r3, [sp, #96]                   @ 4-byte Reload
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adcs	r0, r2, r11
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	adc	r0, r7, r10
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	umull	r4, r0, r6, r1
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	ldr	r10, [sp, #48]                  @ 4-byte Reload
+	umull	r11, r2, r6, r3
+	adds	r4, r2, r4
+	umull	r3, r4, r6, r7
 	adcs	r0, r0, r3
-	mov	r3, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
+	umlal	r2, r3, r6, r1
+	umull	r0, r7, r6, r8
+	adcs	r5, r4, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	umull	r4, r1, r6, r0
+	mov	r0, r6
+	adcs	r4, r7, r4
+	umull	r7, r12, r6, r10
+	ldr	r6, [sp, #72]                   @ 4-byte Reload
+	adcs	lr, r1, r7
+	umull	r7, r1, r0, r6
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r7, r12, r7
+	adc	r12, r1, #0
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adds	r0, r11, r0
+	adcs	r2, r2, r1
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r3, r3, r1
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r6, r5, r1
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r11, r4, r1
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r1, lr, r1
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	adcs	r1, r7, r1
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	adc	r1, r12, #0
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r12, [sp, #76]                  @ 4-byte Reload
+	mul	r4, r1, r0
+	umull	r7, r1, r4, r9
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	adds	r0, r0, r7
+	umull	r0, r7, r4, r1
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	adcs	lr, r2, r0
+	umull	r2, r0, r4, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adcs	r2, r3, r2
+	umull	r3, r0, r4, r1
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	adcs	r3, r6, r3
+	umull	r6, r5, r4, r1
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r6, r11, r6
+	umull	r1, r11, r4, r7
+	umull	r7, r9, r4, r12
+	ldr	r12, [sp, #60]                  @ 4-byte Reload
+	adcs	r1, r0, r1
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r4, r0, r7
+	ldr	r7, [sp, #32]                   @ 4-byte Reload
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adc	r7, r7, #0
+	adds	r0, lr, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	ldr	r2, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r3, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	adcs	r0, r1, r5
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adcs	r0, r4, r11
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adc	r0, r7, r9
+	ldr	r9, [sp, #40]                   @ 4-byte Reload
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r6, [sp, #72]                   @ 4-byte Reload
+	ldr	r4, [r9, #16]
+	umull	r11, r3, r4, r2
+	ldr	r2, [sp, #80]                   @ 4-byte Reload
+	umull	r0, r1, r4, r12
+	adds	r0, r3, r0
+	umull	r5, r0, r4, r2
+	ldr	r2, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r5
+	umlal	r3, r5, r4, r12
+	umull	r1, r7, r4, r8
+	adcs	r8, r0, r1
+	umull	r1, r0, r4, r2
+	adcs	lr, r7, r1
+	umull	r7, r1, r4, r10
+	adcs	r2, r0, r7
+	umull	r7, r0, r4, r6
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adcs	r1, r1, r7
+	ldr	r7, [sp, #32]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	mul	r5, r2, r0
-	umull	r4, r0, r5, r12
-	umull	r8, r6, r5, lr
-	adds	r4, r6, r4
-	umull	r1, r4, r5, r3
+	adds	r4, r11, r7
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	ldr	r7, [sp, #24]                   @ 4-byte Reload
+	adcs	r5, r5, r7
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	adcs	r7, r8, r7
+	adcs	r11, lr, r6
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	adcs	r10, r2, r6
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	mul	r0, r1, r4
+	umull	r1, r6, r0, r2
+	ldr	r2, [sp, #56]                   @ 4-byte Reload
+	str	r6, [sp, #24]                   @ 4-byte Spill
+	adds	r1, r4, r1
+	ldr	r4, [sp, #84]                   @ 4-byte Reload
+	umull	r1, r6, r0, r2
+	str	r6, [sp, #20]                   @ 4-byte Spill
+	adcs	lr, r3, r1
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	umull	r3, r2, r0, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	adcs	r3, r5, r3
+	umull	r5, r8, r0, r1
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r5, r7, r5
+	umull	r7, r12, r0, r1
+	adcs	r6, r11, r7
+	umull	r7, r1, r0, r4
+	ldr	r11, [sp, #76]                  @ 4-byte Reload
+	adcs	r7, r10, r7
+	umull	r4, r10, r0, r11
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	adc	r4, r4, #0
+	adds	r2, lr, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	adcs	r2, r3, r2
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	adcs	r11, r5, r2
+	adcs	r2, r6, r8
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	adcs	r2, r7, r12
+	ldr	r7, [r9, #20]
 	adcs	r0, r0, r1
-	umlal	r6, r1, r5, r12
-	umull	r0, r7, r5, r10
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #44]           @ 4-byte Reload
-	adc	r5, r7, #0
-	adds	r2, r8, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	adcs	r2, r6, r2
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adc	r0, r4, r10
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	umull	r10, r2, r7, r1
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r6, [sp, #52]                   @ 4-byte Reload
+	umull	r4, r0, r7, r3
+	ldr	r8, [sp, #76]                   @ 4-byte Reload
+	adds	r4, r2, r4
+	umull	r5, r4, r7, r1
+	adcs	r0, r0, r5
+	umlal	r2, r5, r7, r3
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	umull	r0, r1, r7, r6
+	ldr	r6, [sp, #68]                   @ 4-byte Reload
+	adcs	lr, r4, r0
+	umull	r4, r0, r7, r6
+	ldr	r6, [sp, #48]                   @ 4-byte Reload
+	adcs	r12, r1, r4
+	umull	r4, r1, r7, r6
+	adcs	r9, r0, r4
+	umull	r4, r0, r7, r3
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
 	adcs	r1, r1, r4
-	ldr	r4, [sp, #28]           @ 4-byte Reload
+	adc	r0, r0, #0
+	adds	r4, r10, r3
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adcs	r2, r2, r3
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	r5, r5, r11
+	adcs	r7, lr, r3
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adcs	r11, r12, r3
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	adcs	r9, r9, r3
+	ldr	r3, [sp, #12]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	mul	r0, r1, r4
+	umull	r1, r6, r0, r3
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	str	r6, [sp, #24]                   @ 4-byte Spill
+	adds	r1, r4, r1
+	ldr	r4, [sp, #84]                   @ 4-byte Reload
+	umull	r1, r6, r0, r3
+	ldr	r3, [sp, #100]                  @ 4-byte Reload
+	str	r6, [sp, #20]                   @ 4-byte Spill
+	adcs	r12, r2, r1
+	umull	r2, r10, r0, r3
+	ldr	r3, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	umull	r5, lr, r0, r3
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r5, r7, r5
+	umull	r7, r6, r0, r3
+	adcs	r7, r11, r7
+	umull	r3, r11, r0, r4
+	adcs	r3, r9, r3
+	umull	r4, r9, r0, r8
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	adc	r4, r4, #0
+	adds	r8, r12, r1
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	adcs	r1, r5, r10
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	adcs	r1, r7, lr
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r1, r3, r6
+	ldr	r5, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r11
-	adcs	r9, r5, r4
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adc	r8, r4, #0
-	subs	r6, r2, lr
-	sbcs	r5, r1, r12
-	sbcs	r4, r0, r3
-	sbcs	r7, r9, r10
-	sbc	r3, r8, #0
-	ands	r3, r3, #1
-	movne	r4, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	movne	r6, r2
-	movne	r5, r1
-	cmp	r3, #0
-	movne	r7, r9
-	str	r6, [r0]
-	str	r5, [r0, #4]
-	str	r4, [r0, #8]
-	str	r7, [r0, #12]
-	add	sp, sp, #60
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adc	r9, r4, r9
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
+	ldr	r4, [r0, #24]
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r10, [sp, #44]                  @ 4-byte Reload
+	umull	r12, r1, r4, r5
+	umull	r6, lr, r4, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	umull	r11, r2, r4, r0
+	mov	r0, r6
+	mov	r3, r2
+	adds	r2, r2, r12
+	adcs	r1, r1, r6
+	ldr	r6, [sp, #52]                   @ 4-byte Reload
+	umlal	r3, r0, r4, r5
+	umull	r1, r2, r4, r6
+	adcs	r5, lr, r1
+	umull	r6, r1, r4, r7
+	ldr	r7, [sp, #48]                   @ 4-byte Reload
+	adcs	lr, r2, r6
+	umull	r6, r2, r4, r7
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	adcs	r12, r1, r6
+	umull	r6, r1, r4, r7
+	adcs	r2, r2, r6
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	adds	r4, r11, r8
+	adcs	r6, r3, r6
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	ldr	r8, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adcs	r7, r5, r3
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	lr, lr, r3
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adcs	r11, r12, r3
+	ldr	r12, [sp, #56]                  @ 4-byte Reload
+	adcs	r2, r2, r9
+	str	r2, [sp, #96]                   @ 4-byte Spill
+	ldr	r2, [sp, #64]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r9, [sp, #88]                   @ 4-byte Reload
+	mul	r1, r2, r4
+	umull	r2, r3, r1, r10
+	str	r3, [sp, #72]                   @ 4-byte Spill
+	ldr	r3, [sp, #100]                  @ 4-byte Reload
+	adds	r2, r4, r2
+	umull	r2, r5, r1, r3
+	umull	r4, r3, r1, r12
+	str	r5, [sp, #68]                   @ 4-byte Spill
+	ldr	r5, [sp, #92]                   @ 4-byte Reload
+	str	r3, [sp, #64]                   @ 4-byte Spill
+	adcs	r3, r6, r4
+	str	r3, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r0, r2
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	umull	r0, r2, r1, r5
+	str	r2, [sp, #60]                   @ 4-byte Spill
+	adcs	r0, r7, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	umull	r0, r2, r1, r9
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
+	str	r2, [sp, #52]                   @ 4-byte Spill
+	adcs	r3, lr, r0
+	umull	r6, r0, r1, r8
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	umull	r4, r0, r1, r7
+	adcs	r11, r11, r6
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	lr, r0, r4
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r4, [sp, #40]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adds	r0, r2, r0
+	ldr	r2, [sp, #64]                   @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	adcs	r2, r4, r2
+	ldr	r4, [sp, #68]                   @ 4-byte Reload
+	str	r2, [sp, #80]                   @ 4-byte Spill
+	adcs	r6, r1, r4
+	ldr	r4, [sp, #60]                   @ 4-byte Reload
+	str	r6, [sp, #72]                   @ 4-byte Spill
+	adcs	r3, r3, r4
+	ldr	r4, [sp, #52]                   @ 4-byte Reload
+	str	r3, [sp, #68]                   @ 4-byte Spill
+	adcs	r1, r11, r4
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldr	r4, [sp, #20]                   @ 4-byte Reload
+	adcs	r11, lr, r1
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adc	r1, r4, r1
+	subs	r10, r0, r10
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	sbcs	lr, r2, r12
+	sbcs	r12, r6, r0
+	sbcs	r5, r3, r5
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	sbcs	r0, r3, r9
+	sbcs	r4, r11, r8
+	sbc	r6, r1, r7
+	asr	r2, r6, #31
+	cmn	r2, #1
+	movgt	r1, r6
+	ldr	r6, [sp, #36]                   @ 4-byte Reload
+	movle	r0, r3
+	movle	r4, r11
+	cmn	r2, #1
+	str	r0, [r6, #16]
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	str	r1, [r6, #24]
+	str	r4, [r6, #20]
+	movle	r5, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	str	r5, [r6, #12]
+	movle	r12, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	str	r12, [r6, #8]
+	movle	lr, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	cmn	r2, #1
+	str	lr, [r6, #4]
+	movle	r10, r0
+	str	r10, [r6]
+	add	sp, sp, #104
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end55:
-	.size	mcl_fp_montRed4L, .Lfunc_end55-mcl_fp_montRed4L
+.Lfunc_end27:
+	.size	mcl_fp_montNF7L, .Lfunc_end27-mcl_fp_montNF7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_addPre4L
-	.align	2
-	.type	mcl_fp_addPre4L,%function
-mcl_fp_addPre4L:                        @ @mcl_fp_addPre4L
+                                        @ -- End function
+	.globl	mcl_fp_montRed7L                @ -- Begin function mcl_fp_montRed7L
+	.p2align	2
+	.type	mcl_fp_montRed7L,%function
+	.code	32                              @ @mcl_fp_montRed7L
+mcl_fp_montRed7L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, lr}
-	push	{r4, r5, r6, lr}
-	ldm	r1, {r3, r12, lr}
-	ldr	r1, [r1, #12]
-	ldm	r2, {r4, r5, r6}
-	ldr	r2, [r2, #12]
-	adds	r3, r4, r3
-	adcs	r5, r5, r12
-	adcs	r6, r6, lr
-	adcs	r1, r2, r1
-	stm	r0, {r3, r5, r6}
-	str	r1, [r0, #12]
-	mov	r0, #0
-	adc	r0, r0, #0
-	pop	{r4, r5, r6, lr}
-	mov	pc, lr
-.Lfunc_end56:
-	.size	mcl_fp_addPre4L, .Lfunc_end56-mcl_fp_addPre4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre4L
-	.align	2
-	.type	mcl_fp_subPre4L,%function
-mcl_fp_subPre4L:                        @ @mcl_fp_subPre4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, lr}
-	push	{r4, r5, r6, lr}
-	ldm	r2, {r3, r12, lr}
-	ldr	r2, [r2, #12]
-	ldm	r1, {r4, r5, r6}
-	ldr	r1, [r1, #12]
-	subs	r3, r4, r3
-	sbcs	r5, r5, r12
-	sbcs	r6, r6, lr
-	sbcs	r1, r1, r2
-	stm	r0, {r3, r5, r6}
-	str	r1, [r0, #12]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	pop	{r4, r5, r6, lr}
-	mov	pc, lr
-.Lfunc_end57:
-	.size	mcl_fp_subPre4L, .Lfunc_end57-mcl_fp_subPre4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_4L
-	.align	2
-	.type	mcl_fp_shr1_4L,%function
-mcl_fp_shr1_4L:                         @ @mcl_fp_shr1_4L
-	.fnstart
-@ BB#0:
-	.save	{r11, lr}
-	push	{r11, lr}
-	ldr	r3, [r1, #4]
-	ldr	r12, [r1]
-	ldr	lr, [r1, #12]
-	ldr	r2, [r1, #8]
-	lsrs	r1, r3, #1
-	lsr	r3, r3, #1
-	rrx	r12, r12
-	lsrs	r1, lr, #1
-	orr	r3, r3, r2, lsl #31
-	rrx	r1, r2
-	lsr	r2, lr, #1
-	str	r12, [r0]
-	str	r3, [r0, #4]
-	str	r1, [r0, #8]
-	str	r2, [r0, #12]
-	pop	{r11, lr}
-	mov	pc, lr
-.Lfunc_end58:
-	.size	mcl_fp_shr1_4L, .Lfunc_end58-mcl_fp_shr1_4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add4L
-	.align	2
-	.type	mcl_fp_add4L,%function
-mcl_fp_add4L:                           @ @mcl_fp_add4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r11, lr}
-	push	{r4, r5, r6, r7, r11, lr}
-	ldm	r1, {r12, lr}
-	ldr	r4, [r1, #8]
-	ldr	r1, [r1, #12]
-	ldm	r2, {r5, r6, r7}
-	ldr	r2, [r2, #12]
-	adds	r5, r5, r12
-	adcs	r6, r6, lr
-	adcs	r7, r7, r4
-	stm	r0, {r5, r6, r7}
-	adcs	r4, r2, r1
-	mov	r1, #0
-	ldr	r2, [r3]
-	adc	lr, r1, #0
-	str	r4, [r0, #12]
-	ldmib	r3, {r1, r12}
-	ldr	r3, [r3, #12]
-	subs	r5, r5, r2
-	sbcs	r2, r6, r1
-	sbcs	r1, r7, r12
-	sbcs	r12, r4, r3
-	sbc	r3, lr, #0
-	tst	r3, #1
-	streq	r5, [r0]
-	streq	r2, [r0, #4]
-	streq	r1, [r0, #8]
-	streq	r12, [r0, #12]
-	pop	{r4, r5, r6, r7, r11, lr}
-	mov	pc, lr
-.Lfunc_end59:
-	.size	mcl_fp_add4L, .Lfunc_end59-mcl_fp_add4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF4L
-	.align	2
-	.type	mcl_fp_addNF4L,%function
-mcl_fp_addNF4L:                         @ @mcl_fp_addNF4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, lr}
-	push	{r4, r5, r6, r7, r8, lr}
-	ldm	r1, {r12, lr}
-	ldr	r4, [r1, #8]
-	ldr	r1, [r1, #12]
-	ldm	r2, {r5, r6, r7}
-	ldr	r2, [r2, #12]
-	adds	r5, r5, r12
-	adcs	r6, r6, lr
-	adcs	r7, r7, r4
-	adc	r8, r2, r1
-	ldm	r3, {r2, r4, r12, lr}
-	subs	r2, r5, r2
-	sbcs	r4, r6, r4
-	sbcs	r3, r7, r12
-	sbc	r1, r8, lr
-	cmp	r1, #0
-	movlt	r2, r5
-	movlt	r4, r6
-	movlt	r3, r7
-	cmp	r1, #0
-	movlt	r1, r8
-	stm	r0, {r2, r4}
-	str	r3, [r0, #8]
-	str	r1, [r0, #12]
-	pop	{r4, r5, r6, r7, r8, lr}
-	mov	pc, lr
-.Lfunc_end60:
-	.size	mcl_fp_addNF4L, .Lfunc_end60-mcl_fp_addNF4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub4L
-	.align	2
-	.type	mcl_fp_sub4L,%function
-mcl_fp_sub4L:                           @ @mcl_fp_sub4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, lr}
-	push	{r4, r5, r6, r7, r8, lr}
-	ldm	r2, {r12, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#84
+	sub	sp, sp, #84
+	mov	r9, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r3, [r2, #-4]
+	ldr	r0, [r9, #4]
+	ldr	lr, [r9]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [r9, #8]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [r9, #12]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mul	r0, lr, r3
+	ldr	r8, [r2, #4]
+	ldr	r1, [r2]
+	str	r3, [sp, #60]                   @ 4-byte Spill
 	ldr	r4, [r2, #8]
-	ldr	r5, [r2, #12]
-	ldm	r1, {r2, r6, r7}
-	ldr	r1, [r1, #12]
-	subs	r8, r2, r12
-	sbcs	r2, r6, lr
-	str	r8, [r0]
-	sbcs	r12, r7, r4
-	sbcs	lr, r1, r5
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	umull	r7, r3, r0, r1
+	str	r4, [sp, #64]                   @ 4-byte Spill
+	mov	r10, r4
+	str	r8, [sp, #52]                   @ 4-byte Spill
+	umull	r5, r6, r0, r8
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	adds	r5, r3, r5
+	umull	r1, r5, r0, r4
+	ldr	r4, [r2, #12]
+	str	r4, [sp, #56]                   @ 4-byte Spill
+	adcs	r6, r6, r1
+	umlal	r3, r1, r0, r8
+	umull	r6, r12, r0, r4
+	adcs	r4, r5, r6
+	ldr	r5, [r2, #16]
+	str	r4, [sp, #28]                   @ 4-byte Spill
+	str	r5, [sp, #76]                   @ 4-byte Spill
+	umull	r6, r4, r0, r5
+	adcs	r7, r12, r6
+	ldr	r6, [r2, #20]
+	str	r6, [sp, #72]                   @ 4-byte Spill
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	umull	r11, r5, r0, r6
+	adcs	r6, r4, r11
+	ldr	r4, [r2, #24]
+	str	r4, [sp, #68]                   @ 4-byte Spill
+	umull	r2, r12, r0, r4
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	ldr	r5, [sp, #32]                   @ 4-byte Reload
+	adc	r4, r12, #0
+	ldr	r12, [sp, #76]                  @ 4-byte Reload
+	adds	r7, r5, lr
+	ldr	lr, [sp, #68]                   @ 4-byte Reload
+	adcs	r3, r3, r0
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	mov	r7, r8
+	adcs	r0, r1, r0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [r9, #16]
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [r9, #20]
+	adcs	r0, r6, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [r9, #24]
+	adcs	r0, r2, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [r9, #28]
+	adcs	r0, r4, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	mul	r1, r0, r3
+	umull	r2, r3, r1, r4
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	umull	r8, r2, r1, r7
+	umull	r6, r7, r1, r10
+	adds	r3, r8, r3
+	str	r3, [sp, #8]                    @ 4-byte Spill
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	umull	r6, r2, r1, r3
+	adcs	r7, r6, r7
+	umull	r6, r8, r1, r12
+	str	r7, [sp]                        @ 4-byte Spill
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	adcs	r11, r6, r2
+	umull	r6, r5, r1, r7
+	adcs	r10, r6, r8
+	umull	r6, r2, r1, lr
+	adcs	r1, r6, r5
+	ldr	r5, [sp, #20]                   @ 4-byte Reload
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	adc	r5, r5, r2
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adds	r2, r6, r2
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r8, r6, r2
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #44]                   @ 4-byte Spill
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	ldr	r6, [sp]                        @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #40]                   @ 4-byte Spill
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	ldr	r6, [r9, #32]
+	adcs	r2, r11, r2
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	mov	r11, r3
+	adcs	r2, r10, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	ldr	r10, [sp, #64]                  @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r1, r5, r6
+	str	r1, [sp, #24]                   @ 4-byte Spill
 	mov	r1, #0
-	sbc	r1, r1, #0
-	stmib	r0, {r2, r12, lr}
-	tst	r1, #1
-	popeq	{r4, r5, r6, r7, r8, lr}
-	moveq	pc, lr
-	ldm	r3, {r1, r4, r5}
-	ldr	r3, [r3, #12]
-	adds	r1, r1, r8
-	adcs	r2, r4, r2
-	adcs	r7, r5, r12
-	adc	r3, r3, lr
-	stm	r0, {r1, r2, r7}
-	str	r3, [r0, #12]
-	pop	{r4, r5, r6, r7, r8, lr}
-	mov	pc, lr
-.Lfunc_end61:
-	.size	mcl_fp_sub4L, .Lfunc_end61-mcl_fp_sub4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF4L
-	.align	2
-	.type	mcl_fp_subNF4L,%function
-mcl_fp_subNF4L:                         @ @mcl_fp_subNF4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, lr}
-	push	{r4, r5, r6, r7, r8, lr}
-	ldm	r2, {r12, lr}
-	ldr	r4, [r2, #8]
-	ldr	r2, [r2, #12]
-	ldm	r1, {r5, r6, r7}
-	ldr	r1, [r1, #12]
-	subs	r5, r5, r12
-	sbcs	r6, r6, lr
-	sbcs	r8, r7, r4
-	sbc	r1, r1, r2
-	ldm	r3, {r2, r4, r12, lr}
+	adc	r1, r1, #0
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	mul	r1, r0, r8
+	umull	r0, r2, r1, r4
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	umull	r5, r6, r1, r0
 	adds	r2, r5, r2
-	adcs	r4, r6, r4
-	adcs	r3, r8, r12
-	adc	r7, r1, lr
-	cmp	r1, #0
-	movge	r2, r5
-	movge	r4, r6
-	movge	r3, r8
-	cmp	r1, #0
-	movge	r7, r1
-	stm	r0, {r2, r4}
-	str	r3, [r0, #8]
-	str	r7, [r0, #12]
-	pop	{r4, r5, r6, r7, r8, lr}
-	mov	pc, lr
-.Lfunc_end62:
-	.size	mcl_fp_subNF4L, .Lfunc_end62-mcl_fp_subNF4L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add4L
-	.align	2
-	.type	mcl_fpDbl_add4L,%function
-mcl_fpDbl_add4L:                        @ @mcl_fpDbl_add4L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	ldm	r1, {r8, r9, r10, r11}
-	ldr	r7, [r1, #16]
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [r1, #20]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r1, #24]
-	ldr	r1, [r1, #28]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r2, {r1, r6, r7, r12, lr}
-	ldr	r4, [r2, #20]
-	ldr	r5, [r2, #24]
-	ldr	r2, [r2, #28]
-	adds	r1, r1, r8
-	adcs	r6, r6, r9
-	adcs	r7, r7, r10
-	adcs	r12, r12, r11
-	stm	r0, {r1, r6, r7, r12}
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	umull	r5, r2, r1, r10
+	adcs	r4, r5, r6
+	str	r4, [sp, #8]                    @ 4-byte Spill
+	umull	r5, r4, r1, r3
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	adcs	r2, r5, r2
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	umull	r5, r2, r1, r12
+	adcs	r4, r5, r4
+	umull	r5, r12, r1, r7
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	umull	r5, r3, r1, lr
+	adcs	r1, r5, r12
+	ldr	r12, [sp, #76]                  @ 4-byte Reload
+	adc	r5, r7, r3
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	adds	r3, r3, r8
+	ldr	r8, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r7, r7, r3
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r3, r6, r3
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	adcs	r3, r6, r3
+	str	r3, [sp, #40]                   @ 4-byte Spill
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #36]                   @ 4-byte Spill
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	ldr	r4, [r9, #36]
+	adcs	r2, r2, r3
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r1, r5, r4
+	str	r1, [sp, #24]                   @ 4-byte Spill
 	mov	r1, #0
-	ldr	r7, [sp]                @ 4-byte Reload
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	adcs	r7, lr, r7
-	adcs	r6, r4, r6
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	adcs	r8, r5, r4
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	ldr	r4, [r3]
-	adcs	lr, r2, r5
-	adc	r12, r1, #0
-	ldmib	r3, {r1, r2, r3}
-	subs	r4, r7, r4
-	sbcs	r1, r6, r1
-	sbcs	r2, r8, r2
-	sbcs	r3, lr, r3
-	sbc	r5, r12, #0
-	ands	r5, r5, #1
-	movne	r4, r7
-	movne	r1, r6
-	movne	r2, r8
-	cmp	r5, #0
-	movne	r3, lr
-	str	r4, [r0, #16]
-	str	r1, [r0, #20]
-	str	r2, [r0, #24]
-	str	r3, [r0, #28]
-	add	sp, sp, #16
+	ldr	r2, [sp, #80]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	mul	r1, r8, r7
+	umull	r3, r5, r1, r2
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	umull	r6, r3, r1, r0
+	adds	r0, r6, r5
+	umull	r6, r5, r1, r10
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adcs	r10, r6, r3
+	umull	r6, r3, r1, r11
+	adcs	r0, r6, r5
+	umull	r6, lr, r1, r12
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	adcs	r11, r6, r3
+	umull	r6, r3, r1, r0
+	adcs	r4, r6, lr
+	umull	r6, r0, r1, r5
+	adcs	r1, r6, r3
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	adc	r0, r3, r0
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	adds	r3, r3, r7
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r7, r7, r3
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r3, r10, r3
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	mov	r10, r8
+	adcs	r3, r6, r3
+	str	r3, [sp, #40]                   @ 4-byte Spill
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	adcs	r3, r11, r3
+	str	r3, [sp, #36]                   @ 4-byte Spill
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	ldr	r11, [sp, #52]                  @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #32]                   @ 4-byte Spill
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	ldr	r4, [r9, #40]
+	adcs	r1, r1, r3
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	mul	r1, r8, r7
+	adcs	r0, r0, r4
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	umull	r0, r3, r1, r2
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	umull	r6, r0, r1, r11
+	adds	r8, r6, r3
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	umull	r6, r2, r1, r3
+	adcs	r0, r6, r0
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	umull	r6, r3, r1, r0
+	adcs	r2, r6, r2
+	str	r2, [sp, #8]                    @ 4-byte Spill
+	umull	r6, r2, r1, r12
+	ldr	r12, [sp, #72]                  @ 4-byte Reload
+	adcs	lr, r6, r3
+	umull	r6, r3, r1, r12
+	adcs	r4, r6, r2
+	umull	r6, r2, r1, r5
+	ldr	r5, [sp, #12]                   @ 4-byte Reload
+	adcs	r1, r6, r3
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adc	r2, r3, r2
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	adds	r3, r3, r7
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r8, r8, r3
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r3, r5, r3
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	adcs	r3, r5, r3
+	str	r3, [sp, #40]                   @ 4-byte Spill
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	adcs	r3, lr, r3
+	str	r3, [sp, #36]                   @ 4-byte Spill
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	ldr	lr, [sp, #80]                   @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #32]                   @ 4-byte Spill
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	ldr	r4, [r9, #44]
+	adcs	r1, r1, r3
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r1, r2, r4
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	mov	r1, #0
+	adc	r1, r1, #0
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	mul	r1, r10, r8
+	umull	r2, r5, r1, lr
+	umull	r6, r3, r1, r11
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	mov	r2, r11
+	adds	r4, r6, r5
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	str	r4, [sp, #12]                   @ 4-byte Spill
+	umull	r6, r4, r1, r5
+	adcs	r3, r6, r3
+	str	r3, [sp, #8]                    @ 4-byte Spill
+	umull	r6, r3, r1, r0
+	adcs	r0, r6, r4
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	umull	r6, r4, r1, r0
+	adcs	r11, r6, r3
+	umull	r6, r7, r1, r12
+	ldr	r12, [sp, #68]                  @ 4-byte Reload
+	adcs	r10, r6, r4
+	umull	r6, r3, r1, r12
+	ldr	r4, [sp, #8]                    @ 4-byte Reload
+	adcs	r1, r6, r7
+	ldr	r6, [sp, #20]                   @ 4-byte Reload
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	adc	r6, r6, r3
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	adds	r3, r3, r8
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r7, r7, r3
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	ldr	r4, [sp, #4]                    @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #40]                   @ 4-byte Spill
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	ldr	r4, [r9, #48]
+	adcs	r3, r11, r3
+	str	r3, [sp, #36]                   @ 4-byte Spill
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adcs	r3, r10, r3
+	str	r3, [sp, #32]                   @ 4-byte Spill
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	ldr	r10, [sp, #72]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r1, r6, r4
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	mov	r1, #0
+	adc	r1, r1, #0
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	mul	r1, r3, r7
+	umull	r4, r3, r1, lr
+	str	r4, [sp, #60]                   @ 4-byte Spill
+	umull	r6, r4, r1, r2
+	adds	r11, r6, r3
+	umull	r6, r2, r1, r5
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	adcs	r8, r6, r4
+	umull	r6, r3, r1, r5
+	adcs	lr, r6, r2
+	umull	r6, r2, r1, r0
+	adcs	r4, r6, r3
+	umull	r6, r3, r1, r10
+	adcs	r2, r6, r2
+	umull	r6, r0, r1, r12
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r12, r6, r3
+	adc	r6, r1, r0
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adds	r0, r0, r7
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r11, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	adcs	r7, r8, r1
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	adcs	r11, lr, r1
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	str	r11, [sp, #40]                  @ 4-byte Spill
+	adcs	r1, r4, r1
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	adcs	r3, r2, r4
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	ldr	r4, [r9, #52]
+	adcs	r8, r12, r2
+	ldr	r2, [sp, #80]                   @ 4-byte Reload
+	adc	r6, r6, r4
+	subs	r9, r0, r2
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r2, [sp, #76]                   @ 4-byte Reload
+	sbcs	lr, r7, r0
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	sbcs	r0, r11, r0
+	sbcs	r7, r1, r5
+	sbcs	r5, r3, r2
+	ldr	r2, [sp, #68]                   @ 4-byte Reload
+	sbcs	r4, r8, r10
+	sbcs	r11, r6, r2
+	mov	r2, #0
+	sbc	r12, r2, #0
+	ands	r12, r12, #1
+	movne	r11, r6
+	movne	r4, r8
+	movne	r5, r3
+	cmp	r12, #0
+	movne	r7, r1
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	ldr	r6, [sp, #48]                   @ 4-byte Reload
+	movne	r0, r1
+	str	r0, [r6, #8]
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	str	r11, [r6, #24]
+	str	r4, [r6, #20]
+	movne	lr, r0
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	cmp	r12, #0
+	str	r5, [r6, #16]
+	str	r7, [r6, #12]
+	movne	r9, r0
+	str	lr, [r6, #4]
+	str	r9, [r6]
+	add	sp, sp, #84
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end63:
-	.size	mcl_fpDbl_add4L, .Lfunc_end63-mcl_fpDbl_add4L
+.Lfunc_end28:
+	.size	mcl_fp_montRed7L, .Lfunc_end28-mcl_fp_montRed7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_sub4L
-	.align	2
-	.type	mcl_fpDbl_sub4L,%function
-mcl_fpDbl_sub4L:                        @ @mcl_fpDbl_sub4L
+                                        @ -- End function
+	.globl	mcl_fp_montRedNF7L              @ -- Begin function mcl_fp_montRedNF7L
+	.p2align	2
+	.type	mcl_fp_montRedNF7L,%function
+	.code	32                              @ @mcl_fp_montRedNF7L
+mcl_fp_montRedNF7L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	ldm	r2, {r8, r9, r10, r11}
-	ldr	r7, [r2, #16]
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	ldr	r2, [r2, #28]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldm	r1, {r2, r6, r7, r12, lr}
-	ldr	r4, [r1, #20]
-	ldr	r5, [r1, #24]
-	ldr	r1, [r1, #28]
-	subs	r2, r2, r8
-	str	r2, [r0]
-	sbcs	r2, r6, r9
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	str	r2, [r0, #4]
-	sbcs	r2, r7, r10
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #8]
-	sbcs	r2, r12, r11
-	str	r2, [r0, #12]
-	mov	r2, #0
-	sbcs	r7, lr, r7
-	sbcs	r6, r4, r6
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	sbcs	r5, r5, r4
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	sbcs	lr, r1, r4
-	ldr	r4, [r3]
-	ldr	r1, [r3, #8]
-	sbc	r12, r2, #0
-	ldr	r2, [r3, #4]
-	ldr	r3, [r3, #12]
-	adds	r4, r7, r4
+	.pad	#84
+	sub	sp, sp, #84
+	mov	r9, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r3, [r2, #-4]
+	ldr	r0, [r9, #4]
+	ldr	lr, [r9]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [r9, #8]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [r9, #12]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mul	r0, lr, r3
+	ldr	r8, [r2, #4]
+	ldr	r1, [r2]
+	str	r3, [sp, #60]                   @ 4-byte Spill
+	ldr	r4, [r2, #8]
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	umull	r7, r3, r0, r1
+	str	r4, [sp, #64]                   @ 4-byte Spill
+	mov	r10, r4
+	str	r8, [sp, #52]                   @ 4-byte Spill
+	umull	r5, r6, r0, r8
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	adds	r5, r3, r5
+	umull	r1, r5, r0, r4
+	ldr	r4, [r2, #12]
+	str	r4, [sp, #56]                   @ 4-byte Spill
+	adcs	r6, r6, r1
+	umlal	r3, r1, r0, r8
+	umull	r6, r12, r0, r4
+	adcs	r4, r5, r6
+	ldr	r5, [r2, #16]
+	str	r4, [sp, #28]                   @ 4-byte Spill
+	str	r5, [sp, #76]                   @ 4-byte Spill
+	umull	r6, r4, r0, r5
+	adcs	r7, r12, r6
+	ldr	r6, [r2, #20]
+	str	r6, [sp, #72]                   @ 4-byte Spill
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	umull	r11, r5, r0, r6
+	adcs	r6, r4, r11
+	ldr	r4, [r2, #24]
+	str	r4, [sp, #68]                   @ 4-byte Spill
+	umull	r2, r12, r0, r4
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	ldr	r5, [sp, #32]                   @ 4-byte Reload
+	adc	r4, r12, #0
+	ldr	r12, [sp, #76]                  @ 4-byte Reload
+	adds	r7, r5, lr
+	ldr	lr, [sp, #68]                   @ 4-byte Reload
+	adcs	r3, r3, r0
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	mov	r7, r8
+	adcs	r0, r1, r0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [r9, #16]
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [r9, #20]
+	adcs	r0, r6, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [r9, #24]
+	adcs	r0, r2, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [r9, #28]
+	adcs	r0, r4, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	mul	r1, r0, r3
+	umull	r2, r3, r1, r4
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	umull	r8, r2, r1, r7
+	umull	r6, r7, r1, r10
+	adds	r3, r8, r3
+	str	r3, [sp, #8]                    @ 4-byte Spill
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
 	adcs	r2, r6, r2
-	adcs	r1, r5, r1
-	adc	r3, lr, r3
-	ands	r12, r12, #1
-	moveq	r4, r7
-	moveq	r2, r6
-	moveq	r1, r5
-	cmp	r12, #0
-	moveq	r3, lr
-	str	r4, [r0, #16]
-	str	r2, [r0, #20]
-	str	r1, [r0, #24]
-	str	r3, [r0, #28]
-	add	sp, sp, #16
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	umull	r6, r2, r1, r3
+	adcs	r7, r6, r7
+	umull	r6, r8, r1, r12
+	str	r7, [sp]                        @ 4-byte Spill
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	adcs	r11, r6, r2
+	umull	r6, r5, r1, r7
+	adcs	r10, r6, r8
+	umull	r6, r2, r1, lr
+	adcs	r1, r6, r5
+	ldr	r5, [sp, #20]                   @ 4-byte Reload
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	adc	r5, r5, r2
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adds	r2, r6, r2
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r8, r6, r2
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #44]                   @ 4-byte Spill
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	ldr	r6, [sp]                        @ 4-byte Reload
+	adcs	r2, r6, r2
+	str	r2, [sp, #40]                   @ 4-byte Spill
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	ldr	r6, [r9, #32]
+	adcs	r2, r11, r2
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	mov	r11, r3
+	adcs	r2, r10, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	ldr	r10, [sp, #64]                  @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r1, r5, r6
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	mov	r1, #0
+	adc	r1, r1, #0
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	mul	r1, r0, r8
+	umull	r0, r2, r1, r4
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	umull	r5, r6, r1, r0
+	adds	r2, r5, r2
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	umull	r5, r2, r1, r10
+	adcs	r4, r5, r6
+	str	r4, [sp, #8]                    @ 4-byte Spill
+	umull	r5, r4, r1, r3
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	adcs	r2, r5, r2
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	umull	r5, r2, r1, r12
+	adcs	r4, r5, r4
+	umull	r5, r12, r1, r7
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	adcs	r2, r5, r2
+	umull	r5, r3, r1, lr
+	adcs	r1, r5, r12
+	ldr	r12, [sp, #76]                  @ 4-byte Reload
+	adc	r5, r7, r3
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	adds	r3, r3, r8
+	ldr	r8, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r7, r7, r3
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r3, r6, r3
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	ldr	r6, [sp, #4]                    @ 4-byte Reload
+	adcs	r3, r6, r3
+	str	r3, [sp, #40]                   @ 4-byte Spill
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #36]                   @ 4-byte Spill
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	ldr	r4, [r9, #36]
+	adcs	r2, r2, r3
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r1, r5, r4
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	mov	r1, #0
+	ldr	r2, [sp, #80]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	mul	r1, r8, r7
+	umull	r3, r5, r1, r2
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	umull	r6, r3, r1, r0
+	adds	r0, r6, r5
+	umull	r6, r5, r1, r10
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adcs	r10, r6, r3
+	umull	r6, r3, r1, r11
+	ldr	r11, [sp, #72]                  @ 4-byte Reload
+	adcs	r0, r6, r5
+	umull	r6, lr, r1, r12
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	adcs	r5, r6, r3
+	umull	r6, r3, r1, r11
+	adcs	r4, r6, lr
+	ldr	lr, [sp, #68]                   @ 4-byte Reload
+	umull	r6, r0, r1, lr
+	adcs	r1, r6, r3
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	adc	r0, r3, r0
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	adds	r3, r3, r7
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r7, r6, r3
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	ldr	r6, [sp, #8]                    @ 4-byte Reload
+	adcs	r3, r10, r3
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	ldr	r10, [sp, #52]                  @ 4-byte Reload
+	adcs	r3, r6, r3
+	str	r3, [sp, #40]                   @ 4-byte Spill
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	adcs	r3, r5, r3
+	str	r3, [sp, #36]                   @ 4-byte Spill
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #32]                   @ 4-byte Spill
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	ldr	r4, [r9, #40]
+	adcs	r1, r1, r3
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	mul	r1, r8, r7
+	adcs	r0, r0, r4
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	umull	r0, r5, r1, r2
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	umull	r6, r0, r1, r10
+	adds	r2, r6, r5
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	umull	r6, r2, r1, r3
+	adcs	r0, r6, r0
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	umull	r6, r3, r1, r0
+	adcs	r2, r6, r2
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	umull	r6, r2, r1, r12
+	adcs	r5, r6, r3
+	umull	r6, r3, r1, r11
+	adcs	r4, r6, r2
+	umull	r6, r2, r1, lr
+	ldr	lr, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r6, r3
+	ldr	r3, [sp, #20]                   @ 4-byte Reload
+	adc	r2, r3, r2
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	adds	r3, r3, r7
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r12, r7, r3
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	ldr	r7, [sp, #8]                    @ 4-byte Reload
+	adcs	r3, r7, r3
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	ldr	r7, [sp, #4]                    @ 4-byte Reload
+	adcs	r3, r7, r3
+	str	r3, [sp, #40]                   @ 4-byte Spill
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	adcs	r3, r5, r3
+	str	r3, [sp, #36]                   @ 4-byte Spill
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #32]                   @ 4-byte Spill
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	ldr	r4, [r9, #44]
+	adcs	r1, r1, r3
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r1, r2, r4
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	mov	r1, #0
+	ldr	r2, [sp, #80]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	mul	r1, r8, r12
+	umull	r3, r5, r1, r2
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	umull	r6, r3, r1, r10
+	adds	r10, r6, r5
+	umull	r6, r5, r1, r7
+	adcs	r3, r6, r3
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	umull	r6, r3, r1, r0
+	adcs	r0, r6, r5
+	umull	r6, r11, r1, lr
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	adcs	r8, r6, r3
+	umull	r6, r4, r1, r0
+	adcs	r11, r6, r11
+	umull	r6, r3, r1, r5
+	adcs	r1, r6, r4
+	ldr	r4, [sp, #20]                   @ 4-byte Reload
+	adc	r6, r4, r3
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	ldr	r4, [sp, #12]                   @ 4-byte Reload
+	adds	r3, r3, r12
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r10, r10, r3
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	ldr	r4, [sp, #8]                    @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #40]                   @ 4-byte Spill
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	ldr	r4, [r9, #48]
+	adcs	r3, r8, r3
+	str	r3, [sp, #36]                   @ 4-byte Spill
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	ldr	r8, [sp, #52]                   @ 4-byte Reload
+	adcs	r3, r11, r3
+	str	r3, [sp, #32]                   @ 4-byte Spill
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r1, r6, r4
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	mov	r1, #0
+	adc	r1, r1, #0
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	mul	r1, r3, r10
+	umull	r3, r4, r1, r2
+	str	r3, [sp, #60]                   @ 4-byte Spill
+	umull	r6, r3, r1, r8
+	adds	r4, r6, r4
+	umull	r6, r2, r1, r7
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	adcs	r11, r6, r3
+	umull	r6, r3, r1, r7
+	adcs	r12, r6, r2
+	umull	r6, r2, r1, lr
+	adcs	lr, r6, r3
+	umull	r6, r3, r1, r0
+	adcs	r2, r6, r2
+	umull	r6, r0, r1, r5
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	ldr	r5, [sp, #36]                   @ 4-byte Reload
+	adcs	r3, r6, r3
+	adc	r6, r1, r0
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adds	r0, r0, r10
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r4, r0
+	ldr	r4, [sp, #32]                   @ 4-byte Reload
+	adcs	r1, r11, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	adcs	r5, r12, r5
+	adcs	r10, lr, r4
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	adcs	r12, r2, r4
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	ldr	r4, [r9, #52]
+	mov	r9, r1
+	adcs	r3, r3, r2
+	ldr	r2, [sp, #80]                   @ 4-byte Reload
+	adc	r6, r6, r4
+	subs	lr, r0, r2
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	sbcs	r2, r1, r8
+	mov	r8, r5
+	sbcs	r1, r5, r0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	sbcs	r7, r10, r7
+	sbcs	r5, r12, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	sbcs	r4, r3, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	sbc	r11, r6, r0
+	asr	r0, r11, #31
+	cmn	r0, #1
+	movgt	r6, r11
+	movle	r4, r3
+	movle	r5, r12
+	cmn	r0, #1
+	movle	r7, r10
+	movle	r1, r8
+	movle	r2, r9
+	cmn	r0, #1
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r11, [sp, #48]                  @ 4-byte Reload
+	movle	lr, r0
+	str	r6, [r11, #24]
+	str	r4, [r11, #20]
+	str	r5, [r11, #16]
+	str	r7, [r11, #12]
+	str	r1, [r11, #8]
+	str	r2, [r11, #4]
+	str	lr, [r11]
+	add	sp, sp, #84
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end64:
-	.size	mcl_fpDbl_sub4L, .Lfunc_end64-mcl_fpDbl_sub4L
+.Lfunc_end29:
+	.size	mcl_fp_montRedNF7L, .Lfunc_end29-mcl_fp_montRedNF7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_mulUnitPre5L
-	.align	2
-	.type	mcl_fp_mulUnitPre5L,%function
-mcl_fp_mulUnitPre5L:                    @ @mcl_fp_mulUnitPre5L
+                                        @ -- End function
+	.globl	mcl_fp_addPre7L                 @ -- Begin function mcl_fp_addPre7L
+	.p2align	2
+	.type	mcl_fp_addPre7L,%function
+	.code	32                              @ @mcl_fp_addPre7L
+mcl_fp_addPre7L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r10, [r1, #12]
-	ldr	r8, [r1, #16]
-	umull	r4, r9, lr, r2
-	umull	r1, r6, r12, r2
-	mov	r7, r6
-	mov	r5, r4
-	umlal	r7, r5, r3, r2
-	stm	r0, {r1, r7}
-	str	r5, [r0, #8]
-	umull	r5, r7, r3, r2
-	umull	r1, r12, r10, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r1, r9, r1
-	str	r1, [r0, #12]
-	umull	r1, r3, r8, r2
-	adcs	r1, r12, r1
-	str	r1, [r0, #16]
-	adc	r1, r3, #0
-	str	r1, [r0, #20]
-	pop	{r4, r5, r6, r7, r8, r9, r10, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, lr}
+	push	{r4, r5, r6, r7, r8, lr}
+	ldm	r2, {r3, r12, lr}
+	ldm	r1, {r5, r6, r7}
+	adds	r3, r5, r3
+	str	r3, [r0]
+	adcs	r3, r6, r12
+	ldr	r8, [r2, #12]
+	ldr	r4, [r1, #12]
+	str	r3, [r0, #4]
+	adcs	r3, r7, lr
+	str	r3, [r0, #8]
+	adcs	r3, r4, r8
+	str	r3, [r0, #12]
+	ldr	r3, [r2, #16]
+	ldr	r7, [r1, #16]
+	adcs	r3, r7, r3
+	str	r3, [r0, #16]
+	ldr	r3, [r2, #20]
+	ldr	r7, [r1, #20]
+	ldr	r2, [r2, #24]
+	ldr	r1, [r1, #24]
+	adcs	r3, r7, r3
+	str	r3, [r0, #20]
+	adcs	r1, r1, r2
+	str	r1, [r0, #24]
+	mov	r0, #0
+	adc	r0, r0, #0
+	pop	{r4, r5, r6, r7, r8, lr}
 	mov	pc, lr
-.Lfunc_end65:
-	.size	mcl_fp_mulUnitPre5L, .Lfunc_end65-mcl_fp_mulUnitPre5L
+.Lfunc_end30:
+	.size	mcl_fp_addPre7L, .Lfunc_end30-mcl_fp_addPre7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_mulPre5L
-	.align	2
-	.type	mcl_fpDbl_mulPre5L,%function
-mcl_fpDbl_mulPre5L:                     @ @mcl_fpDbl_mulPre5L
+                                        @ -- End function
+	.globl	mcl_fp_subPre7L                 @ -- Begin function mcl_fp_subPre7L
+	.p2align	2
+	.type	mcl_fp_subPre7L,%function
+	.code	32                              @ @mcl_fp_subPre7L
+mcl_fp_subPre7L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#36
-	sub	sp, sp, #36
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [r2]
-	ldm	r1, {r12, lr}
-	ldr	r9, [r1, #8]
-	ldr	r10, [r1, #12]
-	umull	r5, r4, r12, r3
-	umull	r6, r7, lr, r3
-	adds	r6, r4, r6
-	str	r5, [sp, #24]           @ 4-byte Spill
-	umull	r5, r6, r9, r3
-	adcs	r7, r7, r5
-	umlal	r4, r5, lr, r3
-	umull	r7, r11, r10, r3
-	adcs	r6, r6, r7
-	ldr	r7, [r1, #16]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	umull	r6, r8, r7, r3
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	adcs	r11, r11, r6
-	ldr	r6, [r2, #4]
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, lr}
+	push	{r4, r5, r6, r7, r8, lr}
+	ldm	r2, {r3, r12, lr}
+	ldm	r1, {r5, r6, r7}
+	subs	r3, r5, r3
 	str	r3, [r0]
-	umull	r3, r2, r12, r6
-	adc	r12, r8, #0
-	adds	r8, r3, r4
-	str	r2, [sp, #24]           @ 4-byte Spill
-	umull	r3, r2, lr, r6
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r5, r3, r5
-	umull	r3, lr, r10, r6
-	umull	r4, r10, r9, r6
-	str	r8, [r0, #4]
-	adcs	r4, r4, r2
-	umull	r2, r9, r7, r6
-	adcs	r3, r3, r11
-	adcs	r7, r2, r12
-	mov	r2, #0
-	adc	r6, r2, #0
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adds	r5, r5, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r11, r4, r2
-	adcs	r2, r3, r10
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	str	r2, [sp, #16]           @ 4-byte Spill
-	adcs	r2, r7, lr
-	ldr	r7, [r1]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	adc	r2, r6, r9
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [r3, #8]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldmib	r1, {r8, lr}
-	ldr	r6, [r1, #12]
-	umull	r12, r4, r7, r2
-	adds	r7, r12, r5
-	str	r4, [sp, #12]           @ 4-byte Spill
-	ldr	r12, [r1, #16]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	umull	r5, r7, r8, r2
-	str	r7, [sp, #4]            @ 4-byte Spill
-	adcs	r10, r5, r11
-	umull	r5, r7, lr, r2
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	adcs	r9, r5, r7
-	umull	r4, r7, r6, r2
-	mov	r5, #0
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	adcs	r4, r4, r7
-	umull	r11, r7, r12, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r2, r11, r2
-	adc	r11, r5, #0
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adds	r5, r10, r5
-	str	r5, [sp, #12]           @ 4-byte Spill
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	adcs	r5, r9, r5
-	str	r5, [sp, #8]            @ 4-byte Spill
-	ldr	r5, [sp]                @ 4-byte Reload
-	adcs	r4, r4, r5
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	adcs	r10, r2, r5
-	adc	r2, r11, r7
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r2, [r0, #8]
-	ldr	r2, [r3, #12]
-	umull	r11, r3, r6, r2
-	str	r3, [sp, #20]           @ 4-byte Spill
-	umull	r6, r3, lr, r2
-	umull	lr, r9, r8, r2
-	str	r3, [sp, #24]           @ 4-byte Spill
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	umull	r7, r8, r3, r2
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	adds	r3, r7, r3
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	adcs	r5, lr, r3
-	mov	r3, #0
-	adcs	r6, r6, r4
-	umull	r4, lr, r12, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r7, r11, r10
-	adcs	r2, r4, r2
-	adc	r3, r3, #0
-	adds	r10, r5, r8
-	adcs	r11, r6, r9
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	adcs	r7, r7, r6
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r2, r7
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r2, [r0, #12]
-	adc	r2, r3, lr
-	ldr	r3, [r1]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	ldr	r4, [r2, #16]
-	ldmib	r1, {r2, r5, r6}
-	ldr	r1, [r1, #16]
-	umull	lr, r9, r6, r4
-	umull	r6, r8, r5, r4
-	umull	r5, r7, r2, r4
-	umull	r2, r12, r3, r4
-	adds	r10, r2, r10
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r3, r5, r11
-	str	r10, [r0, #16]
-	adcs	r5, r6, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r6, lr, r2
-	umull	r2, lr, r1, r4
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	mov	r2, #0
-	adc	r2, r2, #0
-	adds	r3, r3, r12
-	adcs	r7, r5, r7
+	sbcs	r3, r6, r12
+	ldr	r8, [r2, #12]
+	ldr	r4, [r1, #12]
+	str	r3, [r0, #4]
+	sbcs	r3, r7, lr
+	str	r3, [r0, #8]
+	sbcs	r3, r4, r8
+	str	r3, [r0, #12]
+	ldr	r3, [r2, #16]
+	ldr	r7, [r1, #16]
+	sbcs	r3, r7, r3
+	str	r3, [r0, #16]
+	ldr	r3, [r2, #20]
+	ldr	r7, [r1, #20]
+	ldr	r2, [r2, #24]
+	ldr	r1, [r1, #24]
+	sbcs	r3, r7, r3
 	str	r3, [r0, #20]
-	adcs	r6, r6, r8
-	str	r7, [r0, #24]
-	adcs	r1, r1, r9
-	str	r6, [r0, #28]
-	adc	r2, r2, lr
-	str	r1, [r0, #32]
-	str	r2, [r0, #36]
-	add	sp, sp, #36
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	sbcs	r1, r1, r2
+	str	r1, [r0, #24]
+	mov	r0, #0
+	sbc	r0, r0, #0
+	and	r0, r0, #1
+	pop	{r4, r5, r6, r7, r8, lr}
 	mov	pc, lr
-.Lfunc_end66:
-	.size	mcl_fpDbl_mulPre5L, .Lfunc_end66-mcl_fpDbl_mulPre5L
+.Lfunc_end31:
+	.size	mcl_fp_subPre7L, .Lfunc_end31-mcl_fp_subPre7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_sqrPre5L
-	.align	2
-	.type	mcl_fpDbl_sqrPre5L,%function
-mcl_fpDbl_sqrPre5L:                     @ @mcl_fpDbl_sqrPre5L
+                                        @ -- End function
+	.globl	mcl_fp_shr1_7L                  @ -- Begin function mcl_fp_shr1_7L
+	.p2align	2
+	.type	mcl_fp_shr1_7L,%function
+	.code	32                              @ @mcl_fp_shr1_7L
+mcl_fp_shr1_7L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#32
-	sub	sp, sp, #32
-	ldm	r1, {r2, r3, r12}
-	ldr	lr, [r1, #16]
-	ldr	r9, [r1, #12]
-	umull	r5, r6, r2, r2
-	umull	r7, r11, r3, r2
-	str	r5, [r0]
-	umull	r5, r4, lr, r2
-	adds	r8, r6, r7
-	str	r5, [sp, #24]           @ 4-byte Spill
-	umull	r5, r10, r12, r2
-	str	r4, [sp, #28]           @ 4-byte Spill
-	adcs	r4, r11, r5
-	umlal	r6, r5, r3, r2
-	umull	r4, r8, r9, r2
-	adcs	r10, r10, r4
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adcs	r8, r8, r4
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	adc	r4, r4, #0
-	str	r4, [sp, #24]           @ 4-byte Spill
-	umull	r2, r4, r3, r3
-	str	r4, [sp, #28]           @ 4-byte Spill
-	adds	r4, r7, r6
-	str	r4, [sp, #16]           @ 4-byte Spill
-	adcs	r5, r2, r5
-	umull	r2, r4, r12, r3
-	str	r4, [sp, #12]           @ 4-byte Spill
-	adcs	r4, r2, r10
-	umull	r2, r6, r9, r3
-	adcs	r2, r2, r8
-	umull	r7, r8, lr, r3
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	adcs	r7, r7, r3
-	mov	r3, #0
-	adc	r3, r3, #0
-	adds	r5, r5, r11
-	str	r5, [sp, #24]           @ 4-byte Spill
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	adcs	r4, r4, r5
-	str	r4, [sp, #20]           @ 4-byte Spill
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	str	r4, [r0, #4]
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	adcs	r2, r2, r4
-	str	r2, [sp, #12]           @ 4-byte Spill
-	adcs	r2, r7, r6
-	str	r2, [sp, #8]            @ 4-byte Spill
-	adc	r2, r3, r8
-	str	r2, [sp, #4]            @ 4-byte Spill
-	umull	r11, r2, lr, r12
-	umull	lr, r10, r12, r12
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldm	r1, {r4, r6}
-	ldr	r2, [r1, #12]
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	umull	r8, r3, r2, r12
-	str	r3, [sp, #16]           @ 4-byte Spill
-	umull	r5, r3, r6, r12
-	str	r3, [sp]                @ 4-byte Spill
-	umull	r3, r9, r4, r12
-	adds	r3, r3, r7
-	str	r3, [sp, #24]           @ 4-byte Spill
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	adcs	r5, r5, r3
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	adcs	r12, lr, r3
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	adcs	r7, r8, r3
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	adcs	lr, r11, r3
-	mov	r3, #0
-	adc	r11, r3, #0
-	ldr	r3, [sp]                @ 4-byte Reload
-	adds	r5, r5, r9
-	adcs	r12, r12, r3
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	adcs	r9, r7, r10
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r8, lr, r3
-	adc	r11, r11, r7
-	umull	r7, r3, r4, r2
-	adds	r7, r7, r5
-	str	r3, [sp, #20]           @ 4-byte Spill
-	umull	r5, r3, r6, r2
-	ldr	r6, [r1, #8]
-	str	r3, [sp, #16]           @ 4-byte Spill
-	adcs	r10, r5, r12
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	ldr	r5, [r1, #16]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	umull	r4, lr, r6, r2
-	adcs	r12, r4, r9
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	umull	r7, r9, r2, r2
-	str	r3, [r0, #8]
-	adcs	r7, r7, r8
-	umull	r3, r8, r5, r2
-	adcs	r2, r3, r11
-	mov	r3, #0
-	adc	r3, r3, #0
-	adds	r11, r10, r4
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	adcs	r4, r12, r4
-	adcs	r10, r7, lr
-	adcs	r12, r2, r9
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adc	r8, r3, r8
-	ldr	r3, [r1]
-	str	r2, [r0, #12]
-	ldr	r2, [r1, #4]
-	ldr	r1, [r1, #12]
-	umull	r7, r9, r3, r5
-	adds	lr, r7, r11
-	str	lr, [r0, #16]
-	umull	r7, r11, r2, r5
-	adcs	r2, r7, r4
-	umull	r4, r7, r6, r5
-	adcs	r4, r4, r10
-	umull	r6, r10, r1, r5
-	adcs	r1, r6, r12
-	umull	r6, r3, r5, r5
-	mov	r5, #0
-	adcs	r6, r6, r8
-	adc	r5, r5, #0
-	adds	r2, r2, r9
-	adcs	r4, r4, r11
-	str	r2, [r0, #20]
-	adcs	r1, r1, r7
-	str	r4, [r0, #24]
-	adcs	r7, r6, r10
-	str	r1, [r0, #28]
-	adc	r3, r5, r3
-	str	r7, [r0, #32]
-	str	r3, [r0, #36]
-	add	sp, sp, #32
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, lr}
+	push	{r4, r5, r6, r7, r8, lr}
+	add	r5, r1, #8
+	ldm	r1, {r12, lr}
+	ldr	r1, [r1, #24]
+	ldm	r5, {r2, r3, r4, r5}
+	lsr	r6, r5, #1
+	lsr	r7, r3, #1
+	lsrs	r5, r5, #1
+	orr	r8, r6, r1, lsl #31
+	lsr	r6, lr, #1
+	orr	r7, r7, r4, lsl #31
+	rrx	r4, r4
+	lsrs	r3, r3, #1
+	orr	r6, r6, r2, lsl #31
+	rrx	r2, r2
+	lsrs	r3, lr, #1
+	lsr	r1, r1, #1
+	rrx	r3, r12
+	stm	r0, {r3, r6}
+	str	r2, [r0, #8]
+	str	r7, [r0, #12]
+	str	r4, [r0, #16]
+	str	r8, [r0, #20]
+	str	r1, [r0, #24]
+	pop	{r4, r5, r6, r7, r8, lr}
 	mov	pc, lr
-.Lfunc_end67:
-	.size	mcl_fpDbl_sqrPre5L, .Lfunc_end67-mcl_fpDbl_sqrPre5L
+.Lfunc_end32:
+	.size	mcl_fp_shr1_7L, .Lfunc_end32-mcl_fp_shr1_7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_mont5L
-	.align	2
-	.type	mcl_fp_mont5L,%function
-mcl_fp_mont5L:                          @ @mcl_fp_mont5L
+                                        @ -- End function
+	.globl	mcl_fp_add7L                    @ -- Begin function mcl_fp_add7L
+	.p2align	2
+	.type	mcl_fp_add7L,%function
+	.code	32                              @ @mcl_fp_add7L
+mcl_fp_add7L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#100
-	sub	sp, sp, #100
-	str	r0, [sp, #52]           @ 4-byte Spill
-	mov	r0, r2
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldm	r0, {r2, r8}
-	ldr	r7, [r0, #8]
-	ldr	r0, [r0, #12]
-	ldr	r6, [r3, #-4]
-	ldr	r5, [r3, #8]
-	ldr	r9, [r3]
-	ldr	r11, [r1, #8]
-	ldr	r12, [r1, #12]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r1, #4]
-	ldr	r1, [r1, #16]
-	str	r6, [sp, #84]           @ 4-byte Spill
-	str	r5, [sp, #88]           @ 4-byte Spill
-	str	r9, [sp, #80]           @ 4-byte Spill
-	str	r11, [sp, #60]          @ 4-byte Spill
-	str	r12, [sp, #56]          @ 4-byte Spill
-	umull	r4, lr, r0, r2
-	str	r0, [sp, #72]           @ 4-byte Spill
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r3, #4]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	mul	r0, r4, r6
-	str	r4, [sp, #36]           @ 4-byte Spill
-	umull	r6, r4, r0, r5
-	str	r4, [sp, #28]           @ 4-byte Spill
-	umull	r4, r5, r0, r9
-	mov	r10, r6
-	mov	r9, r5
-	str	r4, [sp, #32]           @ 4-byte Spill
-	str	r7, [sp, #76]           @ 4-byte Spill
-	str	r5, [sp, #12]           @ 4-byte Spill
-	mov	r4, r7
-	umlal	r9, r10, r0, r7
-	umull	r7, r5, r1, r2
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	str	r5, [sp, #92]           @ 4-byte Spill
-	umull	r5, r1, r12, r2
-	str	r1, [sp, #20]           @ 4-byte Spill
-	str	r5, [sp, #24]           @ 4-byte Spill
-	umull	r12, r1, r11, r2
-	umull	r11, r5, r7, r2
-	adds	r7, lr, r11
-	adcs	r5, r5, r12
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r5, r1
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	str	r1, [sp, #68]           @ 4-byte Spill
-	umull	r7, r11, r0, r1
-	ldr	r1, [r3, #12]
-	umull	r3, r5, r0, r4
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	adds	r3, r4, r3
-	str	r1, [sp, #92]           @ 4-byte Spill
-	umull	r3, r4, r0, r1
-	adcs	r0, r5, r6
+	.pad	#12
+	sub	sp, sp, #12
+	ldr	r9, [r2]
+	ldm	r1, {r6, r7}
+	ldmib	r2, {r8, r12}
+	adds	r5, r6, r9
+	ldr	r4, [r1, #8]
+	adcs	r7, r7, r8
+	ldr	r11, [r2, #12]
+	ldr	lr, [r1, #12]
+	adcs	r9, r4, r12
+	ldr	r6, [r2, #16]
+	ldr	r4, [r1, #16]
+	adcs	r8, lr, r11
+	ldr	r10, [r2, #20]
+	adcs	r11, r4, r6
+	ldr	r4, [r1, #20]
+	ldr	r2, [r2, #24]
+	ldr	r1, [r1, #24]
+	adcs	r4, r4, r10
+	str	r5, [sp, #8]                    @ 4-byte Spill
+	adcs	r6, r1, r2
 	mov	r1, #0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	adcs	r3, r4, r7
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	adc	r5, r11, #0
-	umlal	lr, r12, r7, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adds	r2, r4, r2
-	adcs	r2, r9, lr
-	ldr	r9, [sp, #64]           @ 4-byte Reload
-	str	r2, [sp, #36]           @ 4-byte Spill
-	adcs	r2, r10, r12
-	ldr	r10, [sp, #72]          @ 4-byte Reload
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	umull	r5, lr, r8, r9
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adc	r0, r1, #0
-	umull	r6, r1, r8, r7
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	umull	r12, r4, r8, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	umull	r3, r2, r8, r0
-	umull	r11, r0, r8, r10
-	ldr	r10, [sp, #68]          @ 4-byte Reload
-	adds	r6, r0, r6
-	adcs	r1, r1, r3
-	umlal	r0, r3, r8, r7
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r2, r12
-	adcs	r2, r4, r5
-	adc	r6, lr, #0
-	adds	r8, r7, r11
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	adcs	r11, r7, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	mul	r4, r8, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	umull	r6, r1, r4, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r1, [sp, #12]           @ 4-byte Spill
-	umull	r1, r5, r4, r0
-	mov	r0, r6
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	mov	r3, r5
-	umull	r12, lr, r4, r1
-	umlal	r3, r0, r4, r1
-	umull	r1, r2, r4, r7
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adds	r5, r5, r12
-	adcs	r6, lr, r6
-	umull	r5, r12, r4, r10
-	adcs	r1, r7, r1
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	adc	r6, r12, #0
-	adds	r7, r7, r8
-	ldr	r8, [sp, #60]           @ 4-byte Reload
-	adcs	r3, r3, r11
-	ldr	r11, [sp, #72]          @ 4-byte Reload
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	umull	r2, r1, r0, r9
-	ldr	r9, [sp, #56]           @ 4-byte Reload
-	umull	r3, r12, r0, r8
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r2, [sp, #4]            @ 4-byte Spill
-	mov	r2, r0
-	umull	r4, r5, r0, r9
-	umull	r6, r7, r0, r1
-	umull	lr, r0, r2, r11
-	adds	r6, r0, r6
-	str	lr, [sp, #8]            @ 4-byte Spill
-	adcs	r6, r7, r3
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	umlal	r0, r3, r2, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r12, r12, r4
-	adcs	r4, r5, r7
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adc	r7, r7, #0
-	adds	r2, r1, r2
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r2, [sp]                @ 4-byte Spill
-	adcs	r0, r1, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	mul	r4, r2, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	umull	r5, r1, r4, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r1, [sp, #12]           @ 4-byte Spill
-	mov	r2, r5
-	umull	r1, r7, r4, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r1, [sp, #16]           @ 4-byte Spill
-	umull	r6, r1, r4, r10
-	mov	r3, r7
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r6, [sp, #4]            @ 4-byte Spill
-	umlal	r3, r2, r4, r0
-	umull	r12, lr, r4, r1
-	umull	r10, r1, r4, r0
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adds	r4, r7, r10
-	adcs	r1, r1, r5
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	ldr	r1, [sp]                @ 4-byte Reload
-	adcs	r10, r0, r12
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r12, lr, r0
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	lr, r0, #0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adds	r6, r0, r1
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	umull	r5, r1, r0, r4
-	mov	r6, r0
-	str	r1, [sp, #16]           @ 4-byte Spill
-	umull	r4, r1, r0, r9
-	str	r5, [sp, #8]            @ 4-byte Spill
-	umull	r5, r9, r0, r8
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r4, [sp]                @ 4-byte Spill
-	umull	r4, r8, r0, r1
-	umull	r7, r0, r6, r11
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	adcs	r11, r3, r7
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r2, r3
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	str	r2, [sp, #40]           @ 4-byte Spill
-	adcs	r10, r10, r3
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adcs	r12, r12, r3
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	adcs	r7, lr, r3
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	adc	r2, r3, #0
-	adds	r4, r0, r4
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [sp]                @ 4-byte Reload
-	adcs	r4, r8, r5
-	umlal	r0, r5, r6, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r4, r9, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r3, r3, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adc	r8, r2, #0
-	adds	lr, r11, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r9, r10, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r10, [sp, #92]          @ 4-byte Reload
-	adcs	r0, r12, r4
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	mul	r4, lr, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	umull	r12, r3, r4, r1
-	umull	r7, r11, r4, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	umull	r8, r6, r4, r0
-	mov	r0, r7
-	mov	r5, r6
-	adds	r6, r6, r12
-	umlal	r5, r0, r4, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r3, r3, r7
-	umull	r6, r12, r4, r1
-	umull	r1, r2, r4, r10
-	adcs	r1, r11, r1
-	adcs	r2, r2, r6
-	adc	r3, r12, #0
-	adds	r7, r8, lr
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	adcs	r7, r5, r7
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #72]           @ 4-byte Reload
-	str	r7, [sp, #44]           @ 4-byte Spill
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r5, [r0, #16]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	umull	r4, r8, r5, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	umull	r7, r1, r5, r2
-	umull	r12, lr, r5, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	umull	r6, r3, r5, r0
-	umull	r11, r0, r5, r9
-	ldr	r9, [sp, #76]           @ 4-byte Reload
-	adds	r7, r0, r7
-	adcs	r1, r1, r6
-	umlal	r0, r6, r5, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r3, r12
-	ldr	r12, [sp, #80]          @ 4-byte Reload
-	adcs	r4, lr, r4
-	ldr	lr, [sp, #88]           @ 4-byte Reload
-	adc	r3, r8, #0
-	adds	r7, r2, r11
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r11, r0, r6
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	mul	r4, r7, r0
-	umull	r0, r1, r4, r9
-	umull	r8, r3, r4, r12
-	adds	r0, r3, r0
-	umull	r5, r0, r4, lr
-	adcs	r1, r1, r5
-	umlal	r3, r5, r4, r9
-	umull	r1, r6, r4, r10
-	adcs	r10, r0, r1
-	umull	r1, r0, r4, r2
-	mov	r4, r9
-	adcs	r1, r6, r1
-	ldr	r6, [sp, #96]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r7, r8, r7
-	adcs	r3, r3, r6
-	adcs	r7, r5, r11
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r10, r5
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adcs	r8, r1, r5
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adc	r9, r0, #0
-	subs	r5, r3, r12
-	sbcs	r4, r7, r4
-	sbcs	r0, r11, lr
-	sbcs	r6, r8, r1
-	sbcs	r1, r10, r2
-	sbc	r2, r9, #0
-	ands	r2, r2, #1
-	movne	r5, r3
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	movne	r4, r7
-	movne	r0, r11
-	cmp	r2, #0
-	movne	r6, r8
-	movne	r1, r10
-	str	r5, [r3]
-	str	r4, [r3, #4]
-	str	r0, [r3, #8]
-	str	r6, [r3, #12]
-	str	r1, [r3, #16]
-	add	sp, sp, #100
+	ldr	r2, [r3]
+	adc	r1, r1, #0
+	ldr	r10, [sp, #8]                   @ 4-byte Reload
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	stm	r0, {r5, r7, r9}
+	subs	r10, r10, r2
+	str	r1, [sp]                        @ 4-byte Spill
+	ldmib	r3, {r1, r5, r7, r12, lr}
+	ldr	r2, [sp, #4]                    @ 4-byte Reload
+	ldr	r3, [r3, #24]
+	sbcs	r2, r2, r1
+	str	r4, [r0, #20]
+	sbcs	r9, r9, r5
+	str	r8, [r0, #12]
+	sbcs	r1, r8, r7
+	str	r11, [r0, #16]
+	sbcs	r7, r11, r12
+	str	r6, [r0, #24]
+	sbcs	r4, r4, lr
+	sbcs	r5, r6, r3
+	ldr	r3, [sp]                        @ 4-byte Reload
+	sbc	r3, r3, #0
+	tst	r3, #1
+	bne	.LBB33_2
+@ %bb.1:                                @ %nocarry
+	str	r10, [r0]
+	stmib	r0, {r2, r9}
+	str	r1, [r0, #12]
+	str	r7, [r0, #16]
+	str	r4, [r0, #20]
+	str	r5, [r0, #24]
+.LBB33_2:                               @ %carry
+	add	sp, sp, #12
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end68:
-	.size	mcl_fp_mont5L, .Lfunc_end68-mcl_fp_mont5L
+.Lfunc_end33:
+	.size	mcl_fp_add7L, .Lfunc_end33-mcl_fp_add7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montNF5L
-	.align	2
-	.type	mcl_fp_montNF5L,%function
-mcl_fp_montNF5L:                        @ @mcl_fp_montNF5L
+                                        @ -- End function
+	.globl	mcl_fp_addNF7L                  @ -- Begin function mcl_fp_addNF7L
+	.p2align	2
+	.type	mcl_fp_addNF7L,%function
+	.code	32                              @ @mcl_fp_addNF7L
+mcl_fp_addNF7L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#76
-	sub	sp, sp, #76
-	str	r2, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r2, {r4, r9, r10}
-	ldr	r6, [r1, #4]
-	ldr	r0, [r2, #12]
-	ldr	r7, [r1]
-	ldr	r5, [r1, #8]
-	ldr	lr, [r3, #8]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1, #12]
-	str	r6, [sp, #32]           @ 4-byte Spill
-	umull	r2, r8, r6, r4
-	mov	r11, r6
-	umull	r6, r12, r7, r4
-	str	r7, [sp, #56]           @ 4-byte Spill
-	str	r5, [sp, #48]           @ 4-byte Spill
-	str	lr, [sp, #36]           @ 4-byte Spill
-	adds	r7, r12, r2
-	umull	r2, r7, r5, r4
-	adcs	r5, r8, r2
-	umlal	r12, r2, r11, r4
-	umull	r5, r8, r0, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	adcs	r0, r7, r5
-	ldr	r5, [r3, #4]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	str	r5, [sp, #60]           @ 4-byte Spill
-	umull	r1, r7, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	adcs	r0, r8, r1
-	ldr	r1, [r3]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adc	r0, r7, #0
-	ldr	r7, [r3, #-4]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	str	r1, [sp, #40]           @ 4-byte Spill
-	mul	r0, r6, r7
-	str	r7, [sp, #72]           @ 4-byte Spill
-	umull	r8, r7, r0, r1
-	ldr	r1, [r3, #12]
-	ldr	r3, [r3, #16]
-	adds	r6, r8, r6
-	umull	r4, r8, r0, r5
-	str	r7, [sp, #8]            @ 4-byte Spill
-	umull	r5, r7, r0, lr
-	ldr	lr, [sp, #64]           @ 4-byte Reload
-	adcs	r6, r4, r12
-	adcs	r5, r5, r2
-	str	r1, [sp, #52]           @ 4-byte Spill
-	umull	r2, r4, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r3, [sp, #44]           @ 4-byte Spill
-	adcs	r2, r2, r1
-	umull	r12, r1, r0, r3
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	adc	r12, r3, #0
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	adds	r6, r6, r3
-	adcs	r3, r5, r8
-	ldr	r8, [sp, #56]           @ 4-byte Reload
-	adcs	r2, r2, r7
-	str	r3, [sp, #16]           @ 4-byte Spill
-	adcs	r0, r0, r4
-	umull	r7, r4, r9, r11
-	str	r2, [sp, #12]           @ 4-byte Spill
-	str	r0, [sp, #8]            @ 4-byte Spill
-	adc	r0, r12, r1
-	ldr	r12, [sp, #68]          @ 4-byte Reload
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	umull	r5, r1, r9, r8
-	adds	r7, r1, r7
-	umull	r2, r7, r9, r0
-	adcs	r4, r4, r2
-	umlal	r1, r2, r9, r11
-	ldr	r11, [sp, #44]          @ 4-byte Reload
-	umull	r4, r0, r9, r12
-	adcs	r4, r7, r4
-	umull	r7, r3, r9, lr
-	ldr	r9, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	adc	r3, r3, #0
-	adds	r7, r5, r6
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	adcs	r6, r4, r5
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adc	r0, r3, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r5, r7, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	umull	r4, r3, r5, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adds	r7, r4, r7
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	str	r3, [sp, #8]            @ 4-byte Spill
-	umull	r7, r3, r5, r0
-	adcs	r1, r7, r1
-	umull	r7, r0, r5, r9
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp]                @ 4-byte Spill
-	adcs	r2, r7, r2
-	umull	r7, r0, r5, r4
-	adcs	r6, r7, r6
-	umull	r7, r4, r5, r11
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	adcs	r7, r7, r5
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adc	r5, r5, #0
-	adds	r1, r1, r3
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r1, r2, r1
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp]                @ 4-byte Reload
-	adcs	r1, r6, r1
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #8]            @ 4-byte Spill
-	adc	r11, r5, r4
-	str	r0, [sp, #4]            @ 4-byte Spill
-	umull	r4, r0, r10, r8
-	ldr	r8, [sp, #60]           @ 4-byte Reload
-	umull	r6, r5, r10, r7
-	adds	r6, r0, r6
-	umull	r1, r6, r10, r3
-	adcs	r5, r5, r1
-	umlal	r0, r1, r10, r7
-	umull	r5, r2, r10, r12
-	adcs	r12, r6, r5
-	umull	r6, r5, r10, lr
-	mov	lr, r7
-	adcs	r2, r2, r6
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adc	r5, r5, #0
-	adds	r6, r4, r6
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r1, r4
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	adcs	r10, r12, r4
-	adcs	r2, r2, r11
-	ldr	r11, [sp, #40]          @ 4-byte Reload
-	str	r2, [sp, #8]            @ 4-byte Spill
-	adc	r2, r5, #0
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	mul	r7, r6, r2
-	umull	r4, r2, r7, r11
-	adds	r6, r4, r6
-	str	r2, [sp, #12]           @ 4-byte Spill
-	umull	r6, r2, r7, r8
-	str	r2, [sp, #4]            @ 4-byte Spill
-	adcs	r0, r6, r0
-	umull	r6, r2, r7, r9
-	ldr	r9, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r6, r1
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	umull	r6, r12, r7, r9
-	adcs	r5, r6, r10
-	ldr	r10, [sp, #44]          @ 4-byte Reload
-	umull	r6, r4, r7, r10
-	adcs	r7, r6, r2
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	adc	r6, r6, #0
-	adds	r0, r0, r2
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r0, r1, r0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp]                @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	adcs	r0, r7, r12
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #4]            @ 4-byte Spill
-	adc	r0, r6, r4
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	umull	r1, r5, r7, r3
-	mov	r6, r1
-	umull	r4, r2, r7, r0
-	mov	r0, lr
-	mov	r12, r2
-	umull	r3, lr, r7, r0
-	umlal	r12, r6, r7, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adds	r2, r2, r3
-	adcs	r1, lr, r1
-	umull	r1, r2, r7, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r5, r1
-	umull	r3, r5, r7, r0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r2, r3
-	adc	r3, r5, #0
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	adds	r7, r4, r0
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	adcs	r6, r6, r5
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	adcs	r1, r1, r5
-	ldr	r5, [sp]                @ 4-byte Reload
-	adcs	r2, r2, r5
-	str	r2, [sp, #20]           @ 4-byte Spill
-	adc	r2, r3, #0
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	mul	r5, r7, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	umull	r4, lr, r5, r11
-	adds	r7, r4, r7
-	umull	r7, r12, r5, r8
-	adcs	r0, r7, r0
-	umull	r7, r3, r5, r2
-	adcs	r6, r7, r6
-	umull	r7, r2, r5, r9
-	adcs	r1, r7, r1
-	umull	r7, r4, r5, r10
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	adcs	r7, r7, r5
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	adc	r5, r5, #0
-	adds	r0, r0, lr
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r10, r6, r12
-	adcs	lr, r1, r3
-	adcs	r8, r7, r2
-	adc	r9, r5, r4
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	ldr	r7, [r0, #16]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	umull	r3, r11, r7, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	mov	r5, r3
-	umull	r12, r2, r7, r0
-	umull	r6, r0, r7, r4
-	mov	r1, r2
-	adds	r2, r2, r6
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	umlal	r1, r5, r7, r4
-	umull	r0, r2, r7, r3
-	umull	r3, r4, r7, r6
-	adcs	r0, r11, r0
-	adcs	r2, r2, r3
-	adc	r3, r4, #0
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	adds	r7, r12, r4
-	ldr	r12, [sp, #60]          @ 4-byte Reload
-	adcs	r1, r1, r10
-	adcs	r6, r5, lr
-	adcs	r11, r0, r8
-	ldr	r8, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r2, r9
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	adc	r0, r3, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r5, r7, r0
-	umull	r4, r0, r5, r8
-	umull	r3, lr, r5, r12
-	adds	r7, r4, r7
-	ldr	r4, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	adcs	r1, r3, r1
-	ldr	r9, [sp, #72]           @ 4-byte Reload
-	umull	r7, r0, r5, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r3, r7, r6
-	umull	r6, r10, r5, r2
-	adcs	r7, r6, r11
-	umull	r6, r11, r5, r0
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	adcs	r6, r6, r5
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adc	r5, r5, #0
-	adds	r1, r1, r9
-	adcs	lr, r3, lr
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	adcs	r9, r7, r3
+	.pad	#16
+	sub	sp, sp, #16
+	ldm	r1, {r5, r7}
+	add	r11, r1, #8
+	add	lr, r2, #12
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	ldr	r7, [r1, #20]
+	ldr	r8, [r1, #24]
+	ldm	r2, {r1, r4, r12}
+	adds	r1, r1, r5
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	ldm	r11, {r9, r10, r11}
+	adcs	r5, r4, r5
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	adcs	r4, r12, r9
+	ldm	lr, {r6, r7, lr}
 	adcs	r10, r6, r10
-	adc	r11, r5, r11
-	subs	r6, r1, r8
-	sbcs	r5, lr, r12
-	sbcs	r4, r9, r4
-	sbcs	r7, r10, r2
-	sbc	r3, r11, r0
-	asr	r0, r3, #31
-	cmp	r0, #0
-	movlt	r6, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	movlt	r5, lr
-	movlt	r4, r9
-	cmp	r0, #0
-	movlt	r7, r10
-	movlt	r3, r11
-	str	r6, [r1]
-	str	r5, [r1, #4]
-	str	r4, [r1, #8]
-	str	r7, [r1, #12]
-	str	r3, [r1, #16]
-	add	sp, sp, #76
+	str	r4, [sp]                        @ 4-byte Spill
+	adcs	r9, r7, r11
+	ldr	r7, [sp, #4]                    @ 4-byte Reload
+	ldr	r2, [r2, #24]
+	ldm	r3, {r4, r12}
+	adcs	r7, lr, r7
+	adc	r2, r2, r8
+	subs	r4, r1, r4
+	str	r5, [sp, #8]                    @ 4-byte Spill
+	sbcs	r5, r5, r12
+	ldr	r6, [r3, #8]
+	ldr	r12, [sp]                       @ 4-byte Reload
+	ldr	lr, [r3, #12]
+	sbcs	r6, r12, r6
+	ldr	r8, [r3, #16]
+	sbcs	lr, r10, lr
+	ldr	r11, [r3, #20]
+	sbcs	r8, r9, r8
+	ldr	r3, [r3, #24]
+	sbcs	r11, r7, r11
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	sbc	r3, r2, r3
+	asr	r1, r3, #31
+	cmn	r1, #1
+	movgt	r2, r3
+	movle	r11, r7
+	str	r2, [r0, #24]
+	movle	r8, r9
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	cmn	r1, #1
+	movle	lr, r10
+	movle	r6, r12
+	str	r11, [r0, #20]
+	movle	r5, r2
+	cmn	r1, #1
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	str	r8, [r0, #16]
+	str	lr, [r0, #12]
+	movle	r4, r1
+	str	r6, [r0, #8]
+	str	r5, [r0, #4]
+	str	r4, [r0]
+	add	sp, sp, #16
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end69:
-	.size	mcl_fp_montNF5L, .Lfunc_end69-mcl_fp_montNF5L
+.Lfunc_end34:
+	.size	mcl_fp_addNF7L, .Lfunc_end34-mcl_fp_addNF7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montRed5L
-	.align	2
-	.type	mcl_fp_montRed5L,%function
-mcl_fp_montRed5L:                       @ @mcl_fp_montRed5L
+                                        @ -- End function
+	.globl	mcl_fp_sub7L                    @ -- Begin function mcl_fp_sub7L
+	.p2align	2
+	.type	mcl_fp_sub7L,%function
+	.code	32                              @ @mcl_fp_sub7L
+mcl_fp_sub7L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#84
-	sub	sp, sp, #84
-	ldr	r6, [r1, #4]
-	ldr	r9, [r2, #-4]
-	ldr	r4, [r1]
-	ldr	r8, [r2, #8]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r2]
-	ldr	r10, [r2, #4]
-	str	r6, [sp, #48]           @ 4-byte Spill
-	ldr	r6, [r1, #8]
-	mul	r5, r4, r9
-	str	r4, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #72]           @ 4-byte Spill
-	str	r9, [sp, #64]           @ 4-byte Spill
-	str	r8, [sp, #68]           @ 4-byte Spill
-	umull	lr, r4, r5, r8
-	str	r4, [sp, #40]           @ 4-byte Spill
-	umull	r4, r3, r5, r0
-	mov	r12, lr
-	str	r4, [sp, #44]           @ 4-byte Spill
-	ldr	r4, [r2, #16]
-	ldr	r2, [r2, #12]
-	mov	r0, r3
-	str	r6, [sp, #56]           @ 4-byte Spill
-	ldr	r6, [r1, #12]
-	umlal	r0, r12, r5, r10
-	str	r4, [sp, #76]           @ 4-byte Spill
-	str	r2, [sp, #80]           @ 4-byte Spill
-	str	r6, [sp, #52]           @ 4-byte Spill
-	umull	r7, r6, r5, r4
-	str	r6, [sp, #28]           @ 4-byte Spill
-	umull	r4, r6, r5, r2
-	umull	r11, r2, r5, r10
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adds	r3, r3, r11
-	ldr	r11, [r1, #36]
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, lr}
+	ldr	r9, [r2]
+	ldm	r1, {r4, r5, r6, r7}
+	ldmib	r2, {r8, lr}
+	subs	r4, r4, r9
+	sbcs	r8, r5, r8
+	ldr	r12, [r2, #12]
+	sbcs	r9, r6, lr
+	ldr	r6, [r2, #16]
+	sbcs	r10, r7, r12
+	ldr	r7, [r1, #16]
+	ldr	r5, [r2, #20]
+	sbcs	lr, r7, r6
+	ldr	r7, [r1, #20]
+	ldr	r2, [r2, #24]
+	ldr	r1, [r1, #24]
+	sbcs	r12, r7, r5
+	stm	r0, {r4, r8, r9, r10, lr}
+	sbcs	r1, r1, r2
+	mov	r2, #0
+	str	r12, [r0, #20]
+	sbc	r2, r2, #0
+	str	r1, [r0, #24]
+	tst	r2, #1
+	bne	.LBB35_2
+@ %bb.1:                                @ %nocarry
+	pop	{r4, r5, r6, r7, r8, r9, r10, lr}
+	mov	pc, lr
+.LBB35_2:                               @ %carry
+	ldm	r3, {r2, r5, r6, r7}
+	adds	r2, r2, r4
+	str	r2, [r0]
+	adcs	r2, r5, r8
+	str	r2, [r0, #4]
+	adcs	r2, r6, r9
+	str	r2, [r0, #8]
+	adcs	r2, r7, r10
+	str	r2, [r0, #12]
+	ldr	r2, [r3, #16]
 	adcs	r2, r2, lr
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	add	lr, r1, #16
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	adcs	r2, r2, r4
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	adcs	r2, r6, r2
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adc	r2, r2, #0
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adds	r5, r3, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	ldr	r3, [sp, #72]           @ 4-byte Reload
-	adcs	r2, r2, r0
-	mul	r0, r2, r9
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r9, [r1, #28]
-	umull	r6, r2, r0, r8
-	str	r2, [sp, #40]           @ 4-byte Spill
-	umull	r2, r4, r0, r3
-	mov	r5, r6
-	mov	r8, r6
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #32]
-	mov	r7, r4
-	umlal	r7, r5, r0, r10
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r1, r2, lr}
-	ldr	r6, [sp, #56]           @ 4-byte Reload
-	adcs	r3, r6, r12
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	adcs	r6, r6, r3
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	str	r6, [sp, #56]           @ 4-byte Spill
-	adcs	r1, r1, r3
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	str	r1, [sp, #36]           @ 4-byte Spill
-	adcs	r1, lr, #0
-	ldr	lr, [sp, #76]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	adcs	r1, r9, #0
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, #0
-	str	r1, [sp, #24]           @ 4-byte Spill
-	adcs	r1, r11, #0
-	umull	r6, r11, r0, lr
-	str	r1, [sp, #20]           @ 4-byte Spill
-	mov	r1, #0
-	adc	r1, r1, #0
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	umull	r2, r3, r0, r1
-	umull	r9, r1, r0, r10
-	adds	r0, r4, r9
-	adcs	r0, r1, r8
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r9, r0, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r3, r6
-	ldr	r6, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r11, r11, #0
-	adds	r3, r1, r0
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	mul	r7, r0, r2
-	str	r0, [sp, #12]           @ 4-byte Spill
-	umull	r8, r0, r7, r1
-	str	r0, [sp, #4]            @ 4-byte Spill
-	umull	r3, r0, r7, r6
-	mov	r12, r8
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	mov	r4, r0
-	umlal	r4, r12, r7, r10
-	adcs	r3, r5, r3
-	ldr	r5, [sp, #40]           @ 4-byte Reload
-	str	r3, [sp]                @ 4-byte Spill
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	adcs	r3, r9, r3
-	str	r3, [sp, #56]           @ 4-byte Spill
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	adcs	r3, r5, r3
-	str	r3, [sp, #52]           @ 4-byte Spill
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adcs	r3, r11, r3
-	str	r3, [sp, #48]           @ 4-byte Spill
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adcs	r3, r3, #0
-	str	r3, [sp, #44]           @ 4-byte Spill
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	adcs	r3, r3, #0
-	str	r3, [sp, #40]           @ 4-byte Spill
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r3, #0
-	str	r3, [sp, #36]           @ 4-byte Spill
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	adc	r3, r3, #0
-	str	r3, [sp, #32]           @ 4-byte Spill
-	umull	r5, r3, r7, lr
-	ldr	lr, [sp, #80]           @ 4-byte Reload
-	str	r3, [sp, #28]           @ 4-byte Spill
-	umull	r9, r3, r7, r10
-	str	r5, [sp, #24]           @ 4-byte Spill
-	adds	r0, r0, r9
-	adcs	r0, r3, r8
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	umull	r5, r11, r7, lr
-	adcs	r9, r0, r5
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r8, r0, #0
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adds	r3, r3, r0
-	ldr	r0, [sp]                @ 4-byte Reload
-	adcs	r11, r4, r0
-	mul	r7, r11, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	umull	r3, r0, r7, r1
-	str	r0, [sp, #24]           @ 4-byte Spill
-	umull	r1, r0, r7, r6
-	mov	r5, r3
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	mov	r4, r0
-	umlal	r4, r5, r7, r10
-	adcs	r1, r12, r1
-	umull	r12, r6, r7, lr
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r9, r1
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r8, r1
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, #0
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r1, #0
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #32]           @ 4-byte Spill
-	umull	r9, r1, r7, r2
-	str	r1, [sp, #20]           @ 4-byte Spill
-	umull	r8, r1, r7, r10
-	adds	r0, r0, r8
-	ldr	r8, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r3
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	adcs	r1, r6, r9
-	adc	r7, r3, #0
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adds	r3, r3, r11
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	adcs	r12, r4, r3
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	adcs	r3, r5, r3
-	str	r3, [sp, #56]           @ 4-byte Spill
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	mul	r4, r12, r0
-	umull	r0, r1, r4, r10
-	umull	r11, r5, r4, r8
-	adds	r0, r5, r0
-	umull	r6, r0, r4, r7
-	adcs	r1, r1, r6
-	umlal	r5, r6, r4, r10
-	umull	r1, r3, r4, lr
-	adcs	r9, r0, r1
-	umull	r1, r0, r4, r2
-	adcs	r1, r3, r1
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r2, r11, r12
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	adcs	r2, r5, r2
-	adcs	r3, r6, r3
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	adcs	lr, r9, r6
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	adcs	r9, r1, r6
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adc	r12, r0, #0
-	subs	r5, r2, r8
-	sbcs	r4, r3, r10
-	sbcs	r0, lr, r7
-	sbcs	r6, r9, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	sbcs	r1, r11, r1
-	sbc	r7, r12, #0
-	ands	r7, r7, #1
-	movne	r5, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	movne	r4, r3
-	movne	r0, lr
-	cmp	r7, #0
-	movne	r6, r9
-	movne	r1, r11
-	str	r5, [r2]
-	str	r4, [r2, #4]
-	str	r0, [r2, #8]
-	str	r6, [r2, #12]
-	str	r1, [r2, #16]
-	add	sp, sp, #84
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	str	r2, [r0, #16]
+	ldr	r2, [r3, #20]
+	adcs	r2, r2, r12
+	str	r2, [r0, #20]
+	ldr	r2, [r3, #24]
+	adc	r1, r2, r1
+	str	r1, [r0, #24]
+	pop	{r4, r5, r6, r7, r8, r9, r10, lr}
 	mov	pc, lr
-.Lfunc_end70:
-	.size	mcl_fp_montRed5L, .Lfunc_end70-mcl_fp_montRed5L
+.Lfunc_end35:
+	.size	mcl_fp_sub7L, .Lfunc_end35-mcl_fp_sub7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_addPre5L
-	.align	2
-	.type	mcl_fp_addPre5L,%function
-mcl_fp_addPre5L:                        @ @mcl_fp_addPre5L
+                                        @ -- End function
+	.globl	mcl_fp_subNF7L                  @ -- Begin function mcl_fp_subNF7L
+	.p2align	2
+	.type	mcl_fp_subNF7L,%function
+	.code	32                              @ @mcl_fp_subNF7L
+mcl_fp_subNF7L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, lr}
-	push	{r4, r5, r6, r7, r8, lr}
-	ldm	r2, {r3, r12, lr}
-	ldr	r4, [r2, #12]
-	ldr	r8, [r2, #16]
-	ldm	r1, {r5, r6, r7}
-	ldr	r2, [r1, #12]
-	ldr	r1, [r1, #16]
-	adds	r3, r3, r5
-	adcs	r6, r12, r6
-	adcs	r7, lr, r7
-	adcs	r2, r4, r2
-	stm	r0, {r3, r6, r7}
-	adcs	r1, r8, r1
-	str	r2, [r0, #12]
-	str	r1, [r0, #16]
-	mov	r0, #0
-	adc	r0, r0, #0
-	pop	{r4, r5, r6, r7, r8, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#20
+	sub	sp, sp, #20
+	ldm	r2, {r5, r7}
+	add	r11, r2, #8
+	add	lr, r1, #12
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	ldr	r7, [r2, #20]
+	ldr	r8, [r2, #24]
+	ldm	r1, {r2, r4, r12}
+	subs	r2, r2, r5
+	ldr	r5, [sp, #12]                   @ 4-byte Reload
+	ldm	r11, {r9, r10, r11}
+	sbcs	r5, r4, r5
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	sbcs	r4, r12, r9
+	ldm	lr, {r6, r7, lr}
+	sbcs	r6, r6, r10
+	ldr	r1, [r1, #24]
+	sbcs	r10, r7, r11
+	ldr	r7, [sp, #4]                    @ 4-byte Reload
+	str	r6, [sp]                        @ 4-byte Spill
+	sbcs	r7, lr, r7
+	ldm	r3, {r6, r12, lr}
+	sbc	r1, r1, r8
+	adds	r6, r2, r6
+	str	r5, [sp, #12]                   @ 4-byte Spill
+	adcs	r5, r5, r12
+	str	r4, [sp, #8]                    @ 4-byte Spill
+	adcs	lr, r4, lr
+	ldr	r8, [r3, #12]
+	ldr	r4, [sp]                        @ 4-byte Reload
+	asr	r12, r1, #31
+	ldr	r11, [r3, #16]
+	adcs	r8, r4, r8
+	ldr	r9, [r3, #20]
+	ldr	r3, [r3, #24]
+	adcs	r11, r10, r11
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	adcs	r2, r7, r9
+	adc	r3, r1, r3
+	cmp	r12, #0
+	movpl	r3, r1
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	movpl	r2, r7
+	movpl	r11, r10
+	cmp	r12, #0
+	str	r3, [r0, #24]
+	movpl	lr, r1
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	movpl	r8, r4
+	str	r2, [r0, #20]
+	str	r11, [r0, #16]
+	movpl	r5, r1
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	cmp	r12, #0
+	str	r8, [r0, #12]
+	str	lr, [r0, #8]
+	movpl	r6, r1
+	str	r5, [r0, #4]
+	str	r6, [r0]
+	add	sp, sp, #20
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end71:
-	.size	mcl_fp_addPre5L, .Lfunc_end71-mcl_fp_addPre5L
+.Lfunc_end36:
+	.size	mcl_fp_subNF7L, .Lfunc_end36-mcl_fp_subNF7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_subPre5L
-	.align	2
-	.type	mcl_fp_subPre5L,%function
-mcl_fp_subPre5L:                        @ @mcl_fp_subPre5L
+                                        @ -- End function
+	.globl	mcl_fpDbl_add7L                 @ -- Begin function mcl_fpDbl_add7L
+	.p2align	2
+	.type	mcl_fpDbl_add7L,%function
+	.code	32                              @ @mcl_fpDbl_add7L
+mcl_fpDbl_add7L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, lr}
-	push	{r4, r5, r6, r7, r8, lr}
-	ldm	r2, {r3, r12, lr}
-	ldr	r4, [r2, #12]
-	ldr	r8, [r2, #16]
-	ldm	r1, {r5, r6, r7}
-	ldr	r2, [r1, #12]
-	ldr	r1, [r1, #16]
-	subs	r3, r5, r3
-	sbcs	r6, r6, r12
-	sbcs	r7, r7, lr
-	sbcs	r2, r2, r4
-	stm	r0, {r3, r6, r7}
-	sbcs	r1, r1, r8
-	str	r2, [r0, #12]
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#40
+	sub	sp, sp, #40
+	ldr	r9, [r2]
+	ldm	r1, {r4, r5, r6, r7}
+	ldmib	r2, {r8, lr}
+	adds	r4, r4, r9
+	str	r4, [sp, #32]                   @ 4-byte Spill
+	adcs	r4, r5, r8
+	ldr	r12, [r2, #12]
+	adcs	r6, r6, lr
+	str	r6, [sp, #24]                   @ 4-byte Spill
+	adcs	r7, r7, r12
+	str	r7, [sp, #20]                   @ 4-byte Spill
+	ldr	r6, [r2, #16]
+	ldr	r7, [r1, #16]
+	str	r4, [sp, #28]                   @ 4-byte Spill
+	adcs	r7, r7, r6
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r4, [r2, #20]
+	ldr	r7, [r1, #20]
+	ldr	r6, [r1, #24]
+	adcs	r7, r7, r4
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	ldr	r7, [r2, #24]
+	ldr	r5, [r1, #32]
+	adcs	r7, r6, r7
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	ldr	r7, [r2, #28]
+	ldr	r6, [r1, #28]
+	ldr	r4, [r1, #36]
+	adcs	r9, r6, r7
+	ldr	r6, [r2, #32]
+	str	r9, [sp, #8]                    @ 4-byte Spill
+	adcs	r7, r5, r6
+	ldr	r5, [r2, #36]
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	adcs	r8, r4, r5
+	ldr	r5, [r2, #40]
+	ldr	r7, [r1, #40]
+	ldr	r6, [r1, #44]
+	adcs	r5, r7, r5
+	ldr	r7, [r2, #44]
+	adcs	r7, r6, r7
+	str	r7, [sp]                        @ 4-byte Spill
+	ldr	r7, [r2, #48]
+	ldr	r6, [r1, #48]
+	ldr	r2, [r2, #52]
+	ldr	r1, [r1, #52]
+	adcs	r11, r6, r7
+	adcs	r10, r1, r2
+	mov	r1, #0
+	adc	r7, r1, #0
+	ldm	r3, {r1, r2, r12, lr}
+	subs	r6, r9, r1
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	sbcs	r9, r1, r2
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	str	r1, [r0]
+	sbcs	r4, r8, r12
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	sbcs	lr, r5, lr
+	str	r1, [r0, #4]
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	str	r1, [r0, #16]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	pop	{r4, r5, r6, r7, r8, lr}
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	str	r1, [r0, #20]
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	str	r1, [r0, #24]
+	ldr	r2, [r3, #24]
+	ldr	r1, [r3, #20]
+	ldr	r3, [r3, #16]
+	ldr	r12, [sp]                       @ 4-byte Reload
+	sbcs	r3, r12, r3
+	sbcs	r1, r11, r1
+	sbcs	r2, r10, r2
+	sbc	r7, r7, #0
+	ands	r7, r7, #1
+	movne	r1, r11
+	movne	r2, r10
+	str	r1, [r0, #48]
+	movne	r3, r12
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	cmp	r7, #0
+	movne	lr, r5
+	movne	r4, r8
+	str	r2, [r0, #52]
+	movne	r9, r1
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	cmp	r7, #0
+	str	r3, [r0, #44]
+	str	lr, [r0, #40]
+	movne	r6, r1
+	str	r4, [r0, #36]
+	str	r9, [r0, #32]
+	str	r6, [r0, #28]
+	add	sp, sp, #40
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end72:
-	.size	mcl_fp_subPre5L, .Lfunc_end72-mcl_fp_subPre5L
+.Lfunc_end37:
+	.size	mcl_fpDbl_add7L, .Lfunc_end37-mcl_fpDbl_add7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_shr1_5L
-	.align	2
-	.type	mcl_fp_shr1_5L,%function
-mcl_fp_shr1_5L:                         @ @mcl_fp_shr1_5L
+                                        @ -- End function
+	.globl	mcl_fpDbl_sub7L                 @ -- Begin function mcl_fpDbl_sub7L
+	.p2align	2
+	.type	mcl_fpDbl_sub7L,%function
+	.code	32                              @ @mcl_fpDbl_sub7L
+mcl_fpDbl_sub7L:
 	.fnstart
-@ BB#0:
-	.save	{r4, lr}
-	push	{r4, lr}
-	ldr	r3, [r1, #4]
-	ldr	r12, [r1]
-	ldr	lr, [r1, #12]
-	ldr	r2, [r1, #8]
-	ldr	r1, [r1, #16]
-	lsrs	r4, r3, #1
-	lsr	r3, r3, #1
-	rrx	r12, r12
-	lsrs	r4, lr, #1
-	orr	r3, r3, r2, lsl #31
-	lsr	r4, lr, #1
-	rrx	r2, r2
-	str	r12, [r0]
-	str	r3, [r0, #4]
-	orr	r4, r4, r1, lsl #31
-	lsr	r1, r1, #1
-	str	r2, [r0, #8]
-	str	r4, [r0, #12]
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#60
+	sub	sp, sp, #60
+	ldr	r7, [r2, #32]
+	str	r7, [sp, #28]                   @ 4-byte Spill
+	ldr	r7, [r2, #36]
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r7, [r2, #40]
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [r2, #44]
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	ldr	r7, [r2, #48]
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r7, [r2, #52]
+	str	r7, [sp, #52]                   @ 4-byte Spill
+	ldr	r7, [r1, #52]
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r6, [r2]
+	ldmib	r2, {r5, r7, r8, r9, r10, r11}
+	ldr	r2, [r2, #28]
+	str	r2, [sp, #56]                   @ 4-byte Spill
+	ldm	r1, {r2, r12, lr}
+	subs	r2, r2, r6
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	ldr	r2, [r1, #48]
+	str	r2, [sp]                        @ 4-byte Spill
+	sbcs	r2, r12, r5
+	ldr	r4, [r1, #12]
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	sbcs	r2, lr, r7
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	sbcs	r2, r4, r8
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	mov	lr, #0
+	ldr	r2, [r1, #16]
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	sbcs	r2, r2, r9
+	str	r2, [sp, #8]                    @ 4-byte Spill
+	ldr	r2, [r1, #20]
+	ldr	r6, [r1, #44]
+	sbcs	r2, r2, r10
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	ldr	r2, [r1, #24]
+	ldr	r7, [r1, #40]
+	sbcs	r11, r2, r11
+	ldr	r2, [r1, #28]
+	str	r11, [r0, #24]
+	sbcs	r4, r2, r5
+	ldr	r5, [r1, #36]
+	ldr	r1, [r1, #32]
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	ldr	r11, [r3, #24]
+	sbcs	r12, r1, r2
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	sbcs	r1, r5, r1
+	ldr	r5, [sp, #44]                   @ 4-byte Reload
+	sbcs	r2, r7, r2
+	str	r12, [sp, #28]                  @ 4-byte Spill
+	sbcs	r10, r6, r5
+	ldr	r5, [sp, #48]                   @ 4-byte Reload
+	ldr	r6, [sp]                        @ 4-byte Reload
+	str	r4, [sp, #56]                   @ 4-byte Spill
+	sbcs	r5, r6, r5
+	str	r5, [sp, #48]                   @ 4-byte Spill
+	ldr	r5, [sp, #52]                   @ 4-byte Reload
+	ldr	r6, [sp, #32]                   @ 4-byte Reload
+	sbcs	r9, r6, r5
+	ldr	r6, [r3, #12]
+	sbc	r7, lr, #0
+	str	r7, [sp, #52]                   @ 4-byte Spill
+	ldr	r7, [r3]
+	ldmib	r3, {r5, lr}
+	adds	r8, r4, r7
+	adcs	r5, r12, r5
+	ldr	r4, [sp, #52]                   @ 4-byte Reload
+	adcs	r7, r1, lr
+	mov	lr, r1
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r12, r2, r6
+	str	r1, [r0]
+	mov	r6, r10
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	str	r1, [r0, #4]
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
 	str	r1, [r0, #16]
-	pop	{r4, lr}
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	str	r1, [r0, #20]
+	ldr	r1, [r3, #20]
+	ldr	r3, [r3, #16]
+	adcs	r3, r10, r3
+	ldr	r10, [sp, #48]                  @ 4-byte Reload
+	adcs	r1, r10, r1
+	adc	r11, r9, r11
+	ands	r4, r4, #1
+	moveq	r1, r10
+	moveq	r11, r9
+	str	r1, [r0, #48]
+	moveq	r3, r6
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	cmp	r4, #0
+	moveq	r12, r2
+	moveq	r7, lr
+	str	r11, [r0, #52]
+	moveq	r5, r1
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	cmp	r4, #0
+	str	r3, [r0, #44]
+	str	r12, [r0, #40]
+	moveq	r8, r1
+	str	r7, [r0, #36]
+	str	r5, [r0, #32]
+	str	r8, [r0, #28]
+	add	sp, sp, #60
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end73:
-	.size	mcl_fp_shr1_5L, .Lfunc_end73-mcl_fp_shr1_5L
+.Lfunc_end38:
+	.size	mcl_fpDbl_sub7L, .Lfunc_end38-mcl_fpDbl_sub7L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_add5L
-	.align	2
-	.type	mcl_fp_add5L,%function
-mcl_fp_add5L:                           @ @mcl_fp_add5L
+                                        @ -- End function
+	.globl	mulPv256x32                     @ -- Begin function mulPv256x32
+	.p2align	2
+	.type	mulPv256x32,%function
+	.code	32                              @ @mulPv256x32
+mulPv256x32:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldm	r2, {r12, lr}
-	ldr	r9, [r2, #8]
-	ldr	r5, [r2, #12]
-	ldr	r8, [r2, #16]
-	ldm	r1, {r6, r7}
-	ldr	r2, [r1, #8]
+	ldr	r12, [r1]
+	ldmib	r1, {r3, lr}
+	umull	r7, r6, r12, r2
 	ldr	r4, [r1, #12]
-	ldr	r1, [r1, #16]
-	adds	r6, r12, r6
-	adcs	r7, lr, r7
-	adcs	r2, r9, r2
-	stm	r0, {r6, r7}
-	adcs	r5, r5, r4
-	mov	r4, #0
-	str	r2, [r0, #8]
-	adcs	r1, r8, r1
-	str	r5, [r0, #12]
-	str	r1, [r0, #16]
-	adc	r8, r4, #0
-	ldm	r3, {r4, r12, lr}
-	ldr	r9, [r3, #12]
-	ldr	r3, [r3, #16]
-	subs	r6, r6, r4
-	sbcs	r7, r7, r12
-	sbcs	r2, r2, lr
-	sbcs	r12, r5, r9
-	sbcs	lr, r1, r3
-	sbc	r1, r8, #0
-	tst	r1, #1
-	stmeq	r0!, {r6, r7}
-	stmeq	r0, {r2, r12, lr}
+	umull	r5, r8, r3, r2
+	str	r7, [r0]
+	umull	r9, r12, r4, r2
+	adds	r7, r6, r5
+	umull	r5, r4, lr, r2
+	adcs	r7, r8, r5
+	umlal	r6, r5, r3, r2
+	adcs	r7, r4, r9
+	str	r7, [r0, #12]
+	str	r5, [r0, #8]
+	str	r6, [r0, #4]
+	ldr	r3, [r1, #16]
+	umull	r7, r6, r3, r2
+	adcs	r3, r12, r7
+	str	r3, [r0, #16]
+	ldr	r3, [r1, #20]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #20]
+	ldr	r3, [r1, #24]
+	umull	r7, r6, r3, r2
+	adcs	r3, r5, r7
+	str	r3, [r0, #24]
+	ldr	r1, [r1, #28]
+	umull	r3, r7, r1, r2
+	adcs	r1, r6, r3
+	str	r1, [r0, #28]
+	adc	r1, r7, #0
+	str	r1, [r0, #32]
 	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
 	mov	pc, lr
-.Lfunc_end74:
-	.size	mcl_fp_add5L, .Lfunc_end74-mcl_fp_add5L
+.Lfunc_end39:
+	.size	mulPv256x32, .Lfunc_end39-mulPv256x32
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_addNF5L
-	.align	2
-	.type	mcl_fp_addNF5L,%function
-mcl_fp_addNF5L:                         @ @mcl_fp_addNF5L
+                                        @ -- End function
+	.globl	mcl_fp_mulUnitPre8L             @ -- Begin function mcl_fp_mulUnitPre8L
+	.p2align	2
+	.type	mcl_fp_mulUnitPre8L,%function
+	.code	32                              @ @mcl_fp_mulUnitPre8L
+mcl_fp_mulUnitPre8L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, lr}
-	ldm	r1, {r12, lr}
-	ldr	r9, [r1, #8]
-	ldr	r5, [r1, #12]
-	ldr	r8, [r1, #16]
-	ldm	r2, {r6, r7}
-	ldr	r1, [r2, #8]
-	ldr	r4, [r2, #12]
-	ldr	r2, [r2, #16]
-	adds	r6, r6, r12
-	adcs	r10, r7, lr
-	adcs	r9, r1, r9
-	adcs	lr, r4, r5
-	ldr	r4, [r3]
-	adc	r12, r2, r8
-	ldmib	r3, {r2, r5}
-	ldr	r1, [r3, #12]
-	ldr	r3, [r3, #16]
-	subs	r4, r6, r4
-	sbcs	r2, r10, r2
-	sbcs	r5, r9, r5
-	sbcs	r1, lr, r1
-	sbc	r3, r12, r3
-	asr	r7, r3, #31
-	cmp	r7, #0
-	movlt	r4, r6
-	movlt	r2, r10
-	movlt	r5, r9
-	cmp	r7, #0
-	movlt	r1, lr
-	movlt	r3, r12
-	str	r4, [r0]
-	str	r2, [r0, #4]
-	str	r5, [r0, #8]
-	str	r1, [r0, #12]
-	str	r3, [r0, #16]
-	pop	{r4, r5, r6, r7, r8, r9, r10, lr}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r11, lr}
+	push	{r4, r5, r6, r7, r11, lr}
+	.pad	#40
+	sub	sp, sp, #40
+	mov	r4, r0
+	mov	r0, sp
+	bl	mulPv256x32
+	add	r7, sp, #24
+	ldm	sp, {r0, r1, r2, r3, r12, lr}
+	ldm	r7, {r5, r6, r7}
+	stm	r4, {r0, r1, r2, r3, r12, lr}
+	add	r0, r4, #24
+	stm	r0, {r5, r6, r7}
+	add	sp, sp, #40
+	pop	{r4, r5, r6, r7, r11, lr}
 	mov	pc, lr
-.Lfunc_end75:
-	.size	mcl_fp_addNF5L, .Lfunc_end75-mcl_fp_addNF5L
+.Lfunc_end40:
+	.size	mcl_fp_mulUnitPre8L, .Lfunc_end40-mcl_fp_mulUnitPre8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_sub5L
-	.align	2
-	.type	mcl_fp_sub5L,%function
-mcl_fp_sub5L:                           @ @mcl_fp_sub5L
+                                        @ -- End function
+	.globl	mcl_fpDbl_mulPre8L              @ -- Begin function mcl_fpDbl_mulPre8L
+	.p2align	2
+	.type	mcl_fpDbl_mulPre8L,%function
+	.code	32                              @ @mcl_fpDbl_mulPre8L
+mcl_fpDbl_mulPre8L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldm	r2, {r8, r12, lr}
-	ldr	r9, [r2, #12]
-	ldr	r6, [r2, #16]
-	ldm	r1, {r2, r7}
-	ldr	r4, [r1, #8]
-	ldr	r5, [r1, #12]
-	ldr	r1, [r1, #16]
-	subs	r8, r2, r8
-	sbcs	r2, r7, r12
-	str	r8, [r0]
-	sbcs	r12, r4, lr
-	sbcs	lr, r5, r9
-	sbcs	r4, r1, r6
-	mov	r1, #0
-	stmib	r0, {r2, r12, lr}
-	sbc	r1, r1, #0
-	str	r4, [r0, #16]
-	tst	r1, #1
-	popeq	{r4, r5, r6, r7, r8, r9, r11, lr}
-	moveq	pc, lr
-	ldm	r3, {r1, r5, r6, r7}
-	ldr	r3, [r3, #16]
-	adds	r1, r1, r8
-	adcs	r2, r5, r2
-	adcs	r6, r6, r12
-	adcs	r7, r7, lr
-	adc	r3, r3, r4
-	stm	r0, {r1, r2, r6, r7}
-	str	r3, [r0, #16]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end76:
-	.size	mcl_fp_sub5L, .Lfunc_end76-mcl_fp_sub5L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF5L
-	.align	2
-	.type	mcl_fp_subNF5L,%function
-mcl_fp_subNF5L:                         @ @mcl_fp_subNF5L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldm	r2, {r12, lr}
-	ldr	r9, [r2, #8]
-	ldr	r5, [r2, #12]
-	ldr	r8, [r2, #16]
-	ldm	r1, {r6, r7}
-	ldr	r2, [r1, #8]
-	ldr	r4, [r1, #12]
-	ldr	r1, [r1, #16]
-	subs	r11, r6, r12
-	sbcs	r10, r7, lr
-	sbcs	lr, r2, r9
-	add	r9, r3, #8
-	sbcs	r12, r4, r5
-	ldm	r3, {r4, r5}
-	sbc	r1, r1, r8
-	ldm	r9, {r2, r8, r9}
-	asr	r6, r1, #31
-	adds	r4, r11, r4
-	adcs	r5, r10, r5
-	adcs	r2, lr, r2
-	adcs	r3, r12, r8
-	adc	r7, r1, r9
-	cmp	r6, #0
-	movge	r4, r11
-	movge	r5, r10
-	movge	r2, lr
-	cmp	r6, #0
-	movge	r3, r12
-	movge	r7, r1
-	str	r4, [r0]
-	str	r5, [r0, #4]
-	str	r2, [r0, #8]
-	str	r3, [r0, #12]
-	str	r7, [r0, #16]
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end77:
-	.size	mcl_fp_subNF5L, .Lfunc_end77-mcl_fp_subNF5L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add5L
-	.align	2
-	.type	mcl_fpDbl_add5L,%function
-mcl_fpDbl_add5L:                        @ @mcl_fpDbl_add5L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#20
-	sub	sp, sp, #20
-	ldr	r12, [r1]
-	ldr	r9, [r1, #4]
-	ldr	r8, [r1, #8]
-	ldr	r10, [r1, #12]
-	ldmib	r2, {r6, r7}
-	ldr	r5, [r2, #16]
-	ldr	r11, [r2]
-	ldr	r4, [r2, #12]
-	str	r5, [sp]                @ 4-byte Spill
-	ldr	r5, [r2, #20]
-	adds	lr, r11, r12
-	ldr	r11, [r2, #32]
-	add	r12, r1, #16
-	adcs	r6, r6, r9
-	add	r9, r1, #28
-	adcs	r7, r7, r8
-	str	r5, [sp, #4]            @ 4-byte Spill
-	ldr	r5, [r2, #24]
-	str	r5, [sp, #12]           @ 4-byte Spill
-	ldr	r5, [r2, #28]
-	ldr	r2, [r2, #36]
-	str	r5, [sp, #16]           @ 4-byte Spill
-	str	r2, [sp, #8]            @ 4-byte Spill
-	adcs	r5, r4, r10
-	ldm	r9, {r4, r8, r9}
-	ldm	r12, {r1, r2, r12}
-	str	lr, [r0]
-	stmib	r0, {r6, r7}
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r5, [r0, #12]
-	adcs	r1, r7, r1
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	mov	r7, #0
-	adcs	r12, r1, r12
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r10, r1, r4
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r8, r11, r8
-	adcs	lr, r1, r9
-	adc	r1, r7, #0
-	ldr	r7, [r3]
-	ldmib	r3, {r4, r5, r6}
-	ldr	r3, [r3, #16]
-	subs	r7, r2, r7
-	sbcs	r4, r12, r4
-	sbcs	r5, r10, r5
-	sbcs	r6, r8, r6
-	sbcs	r3, lr, r3
-	sbc	r1, r1, #0
-	ands	r1, r1, #1
-	movne	r7, r2
-	movne	r4, r12
-	movne	r5, r10
-	cmp	r1, #0
-	movne	r6, r8
-	movne	r3, lr
-	str	r7, [r0, #20]
-	str	r4, [r0, #24]
-	str	r5, [r0, #28]
-	str	r6, [r0, #32]
-	str	r3, [r0, #36]
-	add	sp, sp, #20
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end78:
-	.size	mcl_fpDbl_add5L, .Lfunc_end78-mcl_fpDbl_add5L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub5L
-	.align	2
-	.type	mcl_fpDbl_sub5L,%function
-mcl_fpDbl_sub5L:                        @ @mcl_fpDbl_sub5L
-	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#32
-	sub	sp, sp, #32
-	ldr	r7, [r2, #32]
-	add	r8, r1, #12
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r1, #32]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r1, #36]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldmib	r2, {r9, r10, r11}
-	ldr	r7, [r2, #16]
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r2, #28]
+	.pad	#356
+	sub	sp, sp, #356
+	mov	r9, r2
 	ldr	r2, [r2]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldm	r8, {r4, r5, r6, r7, r8}
-	ldm	r1, {r1, r12, lr}
-	subs	r1, r1, r2
-	sbcs	r2, r12, r9
-	stm	r0, {r1, r2}
-	sbcs	r1, lr, r10
-	str	r1, [r0, #8]
-	sbcs	r1, r4, r11
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	r1, [r0, #12]
-	ldr	r1, [sp]                @ 4-byte Reload
-	sbcs	r1, r5, r1
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	sbcs	r2, r6, r2
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	str	r1, [r0, #16]
-	mov	r1, #0
-	sbcs	r7, r7, r6
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	sbcs	r9, r8, r6
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	sbcs	r8, r5, r6
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	sbcs	lr, r5, r6
-	sbc	r12, r1, #0
-	ldm	r3, {r1, r4, r5, r6}
-	ldr	r3, [r3, #16]
-	adds	r1, r2, r1
-	adcs	r4, r7, r4
-	adcs	r5, r9, r5
-	adcs	r6, r8, r6
-	adc	r3, lr, r3
-	ands	r12, r12, #1
-	moveq	r1, r2
-	moveq	r4, r7
-	moveq	r5, r9
-	cmp	r12, #0
-	moveq	r6, r8
-	moveq	r3, lr
-	str	r1, [r0, #20]
-	str	r4, [r0, #24]
-	str	r5, [r0, #28]
-	str	r6, [r0, #32]
-	str	r3, [r0, #36]
-	add	sp, sp, #32
+	mov	r4, r0
+	add	r0, sp, #312
+	mov	r7, r1
+	bl	mulPv256x32
+	ldr	r0, [sp, #344]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #340]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #336]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #332]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #328]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r1, [sp, #320]
+	ldr	r0, [sp, #312]
+	ldr	r8, [sp, #316]
+	str	r1, [sp, #4]                    @ 4-byte Spill
+	mov	r1, r7
+	ldr	r2, [r9, #4]
+	ldr	r10, [sp, #324]
+	str	r0, [r4]
+	add	r0, sp, #272
+	bl	mulPv256x32
+	ldr	r5, [sp, #272]
+	add	lr, sp, #280
+	ldr	r12, [sp, #304]
+	adds	r5, r5, r8
+	ldr	r6, [sp, #276]
+	ldm	lr, {r0, r1, r2, r3, r11, lr}
+	str	r5, [r4, #4]
+	ldr	r5, [sp, #4]                    @ 4-byte Reload
+	adcs	r8, r6, r5
+	adcs	r0, r0, r10
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	mov	r1, r7
+	adcs	r0, r2, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r2, [r9, #8]
+	adcs	r10, r3, r0
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r11, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, lr, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	adc	r0, r12, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	add	r0, sp, #232
+	bl	mulPv256x32
+	ldr	r1, [sp, #232]
+	add	r11, sp, #256
+	add	lr, sp, #236
+	adds	r1, r1, r8
+	ldm	r11, {r5, r6, r11}
+	ldm	lr, {r0, r2, r3, r12, lr}
+	str	r1, [r4, #8]
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	adcs	r8, r0, r1
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	mov	r1, r7
+	adcs	r0, r2, r0
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	ldr	r2, [r9, #12]
+	adcs	r0, r3, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r10, r12, r10
+	adcs	r0, lr, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	mov	r5, r7
+	adcs	r0, r6, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	adc	r0, r11, #0
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #192
+	bl	mulPv256x32
+	ldr	r6, [sp, #192]
+	add	lr, sp, #200
+	ldr	r12, [sp, #224]
+	adds	r6, r6, r8
+	ldr	r7, [sp, #196]
+	ldm	lr, {r0, r1, r2, r3, r11, lr}
+	str	r6, [r4, #12]
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	adcs	r8, r7, r6
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adcs	r0, r1, r10
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	mov	r1, r5
+	adcs	r0, r2, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r2, [r9, #16]
+	adcs	r10, r3, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r11, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
+	adcs	r0, lr, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	adc	r0, r12, #0
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #152
+	bl	mulPv256x32
+	ldr	r1, [sp, #152]
+	add	lr, sp, #156
+	ldr	r11, [sp, #184]
+	adds	r1, r1, r8
+	ldr	r6, [sp, #180]
+	ldr	r7, [sp, #176]
+	ldm	lr, {r0, r2, r3, r12, lr}
+	str	r1, [r4, #16]
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	str	r5, [sp, #8]                    @ 4-byte Spill
+	adcs	r8, r0, r1
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	mov	r1, r5
+	adcs	r0, r2, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r2, [r9, #20]
+	adcs	r0, r3, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r10, r12, r10
+	adcs	r0, lr, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adc	r0, r11, #0
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #112
+	bl	mulPv256x32
+	ldr	r6, [sp, #112]
+	add	lr, sp, #120
+	ldr	r12, [sp, #144]
+	adds	r6, r6, r8
+	ldr	r7, [sp, #116]
+	ldm	lr, {r0, r1, r2, r3, r11, lr}
+	str	r6, [r4, #20]
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
+	adcs	r8, r7, r6
+	ldr	r6, [sp, #24]                   @ 4-byte Reload
+	adcs	r7, r0, r6
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r10, r1, r10
+	mov	r1, r5
+	adcs	r0, r2, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r2, [r9, #24]
+	adcs	r0, r3, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r11, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
+	adcs	r0, lr, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adc	r0, r12, #0
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #72
+	bl	mulPv256x32
+	add	lr, sp, #76
+	ldr	r2, [sp, #72]
+	add	r11, sp, #96
+	ldm	lr, {r0, r1, r3, r12, lr}
+	adds	r2, r2, r8
+	adcs	r8, r0, r7
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldm	r11, {r5, r6, r11}
+	str	r2, [r4, #24]
+	ldr	r2, [r9, #28]
+	adcs	r9, r1, r10
+	adcs	r7, r3, r0
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r10, r12, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, lr, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	add	r0, sp, #32
+	adc	r11, r11, #0
+	bl	mulPv256x32
+	add	r3, sp, #32
+	ldr	r6, [sp, #56]
+	ldr	r5, [sp, #60]
+	ldm	r3, {r0, r1, r2, r3}
+	adds	lr, r0, r8
+	ldr	r0, [sp, #48]
+	adcs	r8, r1, r9
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r2, r2, r7
+	ldr	r7, [sp, #52]
+	adcs	r3, r3, r10
+	ldr	r12, [sp, #64]
+	adcs	r0, r0, r1
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	str	lr, [r4, #28]
+	adcs	r7, r7, r1
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	str	r8, [r4, #32]
+	adcs	r6, r6, r1
+	str	r2, [r4, #36]
+	adcs	r5, r5, r11
+	str	r0, [r4, #44]
+	adc	r0, r12, #0
+	str	r3, [r4, #40]
+	str	r7, [r4, #48]
+	str	r6, [r4, #52]
+	str	r5, [r4, #56]
+	str	r0, [r4, #60]
+	add	sp, sp, #356
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end79:
-	.size	mcl_fpDbl_sub5L, .Lfunc_end79-mcl_fpDbl_sub5L
+.Lfunc_end41:
+	.size	mcl_fpDbl_mulPre8L, .Lfunc_end41-mcl_fpDbl_mulPre8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_mulUnitPre6L
-	.align	2
-	.type	mcl_fp_mulUnitPre6L,%function
-mcl_fp_mulUnitPre6L:                    @ @mcl_fp_mulUnitPre6L
+                                        @ -- End function
+	.globl	mcl_fpDbl_sqrPre8L              @ -- Begin function mcl_fpDbl_sqrPre8L
+	.p2align	2
+	.type	mcl_fpDbl_sqrPre8L,%function
+	.code	32                              @ @mcl_fpDbl_sqrPre8L
+mcl_fpDbl_sqrPre8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r11, [r1, #12]
-	ldr	r9, [r1, #16]
-	ldr	r8, [r1, #20]
-	umull	r4, r10, lr, r2
-	umull	r1, r7, r12, r2
-	mov	r5, r7
-	mov	r6, r4
-	umlal	r5, r6, r3, r2
-	stm	r0, {r1, r5, r6}
-	umull	r5, r6, r3, r2
-	umull	r1, r12, r11, r2
-	adds	r3, r7, r5
-	adcs	r3, r6, r4
-	adcs	r1, r10, r1
-	str	r1, [r0, #12]
-	umull	r1, r3, r9, r2
-	adcs	r1, r12, r1
-	str	r1, [r0, #16]
-	umull	r1, r7, r8, r2
-	adcs	r1, r3, r1
-	str	r1, [r0, #20]
-	adc	r1, r7, #0
-	str	r1, [r0, #24]
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end80:
-	.size	mcl_fp_mulUnitPre6L, .Lfunc_end80-mcl_fp_mulUnitPre6L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre6L
-	.align	2
-	.type	mcl_fpDbl_mulPre6L,%function
-mcl_fpDbl_mulPre6L:                     @ @mcl_fpDbl_mulPre6L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#48
-	sub	sp, sp, #48
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r3, [r2]
-	ldm	r1, {r12, lr}
-	ldr	r2, [r1, #8]
-	mov	r8, r0
-	ldr	r10, [r1, #12]
-	umull	r0, r4, r12, r3
-	umull	r6, r7, lr, r3
-	str	r2, [sp, #24]           @ 4-byte Spill
-	adds	r6, r4, r6
-	str	r0, [sp, #32]           @ 4-byte Spill
-	umull	r5, r6, r2, r3
-	adcs	r7, r7, r5
-	umlal	r4, r5, lr, r3
-	umull	r7, r11, r10, r3
-	adcs	r0, r6, r7
-	ldr	r7, [r1, #16]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	umull	r6, r0, r7, r3
-	adcs	r2, r11, r6
-	ldr	r6, [r1, #20]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	umull	r11, r2, r6, r3
-	adcs	r0, r0, r11
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r2, r2, #0
-	str	r2, [sp, #12]           @ 4-byte Spill
-	str	r0, [r8]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r3, [r0, #4]
-	umull	r11, r9, r12, r3
-	adds	r2, r11, r4
-	umull	r4, r11, lr, r3
-	str	r9, [sp, #28]           @ 4-byte Spill
-	adcs	lr, r4, r5
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	str	r2, [sp, #32]           @ 4-byte Spill
-	umull	r4, r2, r10, r3
-	str	r2, [sp, #20]           @ 4-byte Spill
-	umull	r2, r10, r5, r3
-	ldr	r5, [sp, #40]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	ldr	r5, [sp, #36]           @ 4-byte Reload
-	adcs	r4, r4, r5
-	umull	r5, r9, r7, r3
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	adcs	r5, r5, r7
-	umull	r7, r12, r6, r3
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	adcs	r7, r7, r3
-	mov	r3, #0
-	adc	r6, r3, #0
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adds	r3, lr, r3
-	adcs	r2, r2, r11
-	adcs	lr, r4, r10
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	adcs	r10, r5, r4
-	ldr	r4, [r1, #8]
-	adcs	r11, r7, r9
-	ldr	r9, [r1, #4]
-	adc	r7, r6, r12
-	ldr	r6, [r0, #8]
-	ldr	r0, [r1]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	str	r9, [sp, #8]            @ 4-byte Spill
-	umull	r12, r5, r0, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adds	r0, r12, r3
-	str	r7, [r8, #4]
-	ldr	r7, [r1, #12]
-	ldr	r12, [r1, #20]
-	str	r5, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #32]           @ 4-byte Spill
-	umull	r3, r0, r9, r6
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adcs	r0, r3, r2
-	str	r0, [sp, #12]           @ 4-byte Spill
-	umull	r3, r0, r4, r6
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adcs	r0, r3, lr
-	ldr	lr, [r1, #16]
-	ldr	r9, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #4]            @ 4-byte Spill
-	umull	r2, r0, r7, r6
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r2, r10
-	umull	r10, r5, lr, r6
-	adcs	r10, r10, r11
-	umull	r11, r3, r12, r6
-	adcs	r6, r11, r0
-	mov	r0, #0
-	adc	r11, r0, #0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adds	r0, r9, r0
-	ldr	r9, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r9, r2, r0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r10, r10, r0
-	adcs	r0, r6, r5
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adc	r0, r11, r3
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	str	r0, [r8, #8]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r6, [r0, #12]
-	umull	r11, r3, r7, r6
-	str	r3, [sp, #36]           @ 4-byte Spill
-	umull	r7, r3, r4, r6
-	str	r3, [sp, #32]           @ 4-byte Spill
-	umull	r4, r3, r5, r6
-	str	r3, [sp, #20]           @ 4-byte Spill
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	umull	r5, r2, r3, r6
-	ldr	r3, [sp]                @ 4-byte Reload
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adds	r3, r5, r3
-	str	r3, [sp, #40]           @ 4-byte Spill
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	adcs	r4, r4, r3
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	adcs	r7, r7, r9
-	adcs	r9, r11, r10
-	umull	r5, r11, lr, r6
-	adcs	r3, r5, r3
-	umull	r5, r10, r12, r6
-	mov	r6, #0
-	adcs	r2, r5, r2
-	adc	r5, r6, #0
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adds	r12, r4, r6
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	adcs	lr, r7, r4
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adcs	r9, r9, r4
-	adcs	r3, r3, r7
-	adcs	r2, r2, r11
-	str	r3, [sp, #20]           @ 4-byte Spill
-	str	r2, [sp, #28]           @ 4-byte Spill
-	adc	r2, r5, r10
-	ldr	r5, [r0, #16]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r2, [r8, #12]
+	.pad	#356
+	sub	sp, sp, #356
 	ldr	r2, [r1]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldmib	r1, {r0, r6}
-	umull	r7, r4, r2, r5
-	ldr	r3, [r1, #12]
-	adds	r2, r7, r12
-	str	r4, [sp, #24]           @ 4-byte Spill
-	str	r2, [sp, #32]           @ 4-byte Spill
-	umull	r7, r2, r0, r5
-	str	r2, [sp, #16]           @ 4-byte Spill
-	adcs	r2, r7, lr
-	str	r2, [sp, #4]            @ 4-byte Spill
-	umull	r4, r2, r6, r5
-	str	r2, [sp, #12]           @ 4-byte Spill
-	adcs	r2, r4, r9
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	ldr	r9, [sp, #4]            @ 4-byte Reload
-	str	r2, [sp]                @ 4-byte Spill
-	umull	r7, r2, r3, r5
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r7, r7, r2
-	ldr	r2, [r1, #16]
-	ldr	r1, [r1, #20]
-	umull	r10, lr, r2, r5
-	umull	r11, r12, r1, r5
-	adcs	r10, r10, r4
-	ldr	r4, [sp, #36]           @ 4-byte Reload
-	adcs	r5, r11, r4
-	mov	r4, #0
-	adc	r11, r4, #0
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adds	r4, r9, r4
-	ldr	r9, [sp]                @ 4-byte Reload
-	str	r4, [sp, #4]            @ 4-byte Spill
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	adcs	r4, r9, r4
-	str	r4, [sp, #24]           @ 4-byte Spill
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	adcs	r4, r7, r4
-	str	r4, [sp, #20]           @ 4-byte Spill
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	adcs	r10, r10, r4
-	adcs	lr, r5, lr
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	adc	r7, r11, r12
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	ldr	r5, [r5, #20]
-	str	r7, [r8, #16]
-	umull	r11, r7, r3, r5
-	str	r7, [sp, #44]           @ 4-byte Spill
-	umull	r3, r7, r6, r5
-	umull	r6, r12, r0, r5
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	umull	r4, r0, r7, r5
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	adds	r9, r4, r7
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	str	r9, [r8, #20]
-	adcs	r6, r6, r4
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r3, r4
-	adcs	r7, r11, r10
-	umull	r4, r10, r2, r5
-	adcs	r2, r4, lr
-	umull	r4, lr, r1, r5
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r4, r1
-	mov	r4, #0
-	adc	r4, r4, #0
-	adds	r5, r6, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r3, r3, r12
-	str	r5, [r8, #24]
-	str	r3, [r8, #28]
-	adcs	r3, r7, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	str	r3, [r8, #32]
-	adcs	r2, r2, r0
-	adcs	r1, r1, r10
-	str	r2, [r8, #36]
-	str	r1, [r8, #40]
-	adc	r1, r4, lr
-	str	r1, [r8, #44]
-	add	sp, sp, #48
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end81:
-	.size	mcl_fpDbl_mulPre6L, .Lfunc_end81-mcl_fpDbl_mulPre6L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre6L
-	.align	2
-	.type	mcl_fpDbl_sqrPre6L,%function
-mcl_fpDbl_sqrPre6L:                     @ @mcl_fpDbl_sqrPre6L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#56
-	sub	sp, sp, #56
-	ldm	r1, {r2, r3}
-	ldr	r7, [r1, #12]
-	mov	lr, r0
-	ldr	r0, [r1, #8]
-	ldr	r9, [r1, #16]
-	ldr	r12, [r1, #20]
-	umull	r10, r6, r7, r2
-	str	r0, [sp, #48]           @ 4-byte Spill
-	umull	r4, r8, r0, r2
-	umull	r5, r0, r2, r2
-	str	r7, [sp, #44]           @ 4-byte Spill
-	str	r6, [sp, #36]           @ 4-byte Spill
-	umull	r6, r7, r3, r2
-	str	r5, [sp, #24]           @ 4-byte Spill
-	adds	r11, r0, r6
-	ldr	r5, [sp, #36]           @ 4-byte Reload
-	str	r7, [sp, #52]           @ 4-byte Spill
-	adcs	r7, r7, r4
-	umlal	r0, r4, r3, r2
-	adcs	r7, r8, r10
-	str	r7, [sp, #40]           @ 4-byte Spill
-	umull	r7, r10, r9, r2
-	adcs	r7, r5, r7
-	str	r7, [sp, #32]           @ 4-byte Spill
-	umull	r7, r8, r12, r2
-	adcs	r11, r10, r7
-	adc	r2, r8, #0
-	adds	r0, r6, r0
-	umull	r6, r10, r3, r3
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r4, r6, r4
-	str	r0, [lr]
-	umull	r6, r0, r12, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	umull	r5, r0, r9, r3
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	umull	r9, r12, r0, r3
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	umull	r7, r8, r0, r3
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r3, r7, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r9, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r5, r5, r11
-	adcs	r6, r6, r2
-	mov	r2, #0
-	adc	r2, r2, #0
-	adds	r4, r4, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r11, r3, r10
-	adcs	r8, r7, r8
-	ldr	r7, [r1, #4]
-	adcs	r10, r5, r12
-	ldr	r5, [r1, #12]
-	str	r0, [lr, #4]
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	str	r7, [sp, #16]           @ 4-byte Spill
-	adcs	r0, r6, r0
-	ldr	r6, [r1, #8]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r2, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1]
-	umull	r3, r2, r0, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adds	r0, r3, r4
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r1, #16]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	umull	r3, r0, r7, r6
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r3, r11
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	umull	r4, r0, r6, r6
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r4, r8
-	umull	r12, r4, r5, r6
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adcs	r0, r12, r10
-	ldr	r10, [sp, #24]          @ 4-byte Reload
-	str	r4, [sp, #40]           @ 4-byte Spill
-	str	r0, [sp, #8]            @ 4-byte Spill
-	umull	r9, r0, r2, r6
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r9, r9, r0
-	ldr	r0, [r1, #20]
-	umull	r11, r8, r0, r6
-	adcs	r6, r11, r3
-	mov	r3, #0
-	adc	r11, r3, #0
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	adds	r3, r10, r3
-	str	r3, [sp, #24]           @ 4-byte Spill
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adcs	r3, r7, r3
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adcs	r3, r7, r3
-	str	r3, [sp, #28]           @ 4-byte Spill
-	adcs	r3, r9, r4
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	ldr	r9, [sp, #48]           @ 4-byte Reload
-	str	r3, [sp, #20]           @ 4-byte Spill
-	ldr	r3, [sp]                @ 4-byte Reload
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r6, r3
-	str	r3, [sp, #12]           @ 4-byte Spill
-	umull	r6, r3, r0, r5
-	adc	r11, r11, r8
-	str	r3, [sp, #44]           @ 4-byte Spill
-	umull	r3, r0, r2, r5
-	str	r0, [sp, #36]           @ 4-byte Spill
-	umull	r2, r0, r5, r5
-	str	r0, [sp, #32]           @ 4-byte Spill
-	umull	r0, r10, r4, r5
-	umull	r4, r8, r9, r5
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	adds	r4, r4, r5
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	adcs	r5, r12, r5
-	adcs	r2, r2, r7
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adcs	r3, r3, r7
-	mov	r7, #0
-	adcs	r6, r6, r11
-	adc	r7, r7, #0
-	adds	r9, r0, r8
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r11, r5, r10
+	mov	r4, r0
+	add	r0, sp, #312
+	mov	r5, r1
+	bl	mulPv256x32
+	ldr	r0, [sp, #344]
+	mov	r1, r5
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #340]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #336]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #332]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #328]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #312]
+	ldr	r2, [r5, #4]
+	ldr	r11, [sp, #316]
+	ldr	r10, [sp, #320]
+	ldr	r9, [sp, #324]
+	str	r0, [r4]
+	add	r0, sp, #272
+	bl	mulPv256x32
+	add	r6, sp, #276
+	ldr	r7, [sp, #272]
+	add	lr, sp, #292
+	ldr	r8, [sp, #304]
+	ldm	r6, {r0, r1, r2, r6}
+	adds	r7, r7, r11
+	adcs	r10, r0, r10
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r9, r1, r9
+	ldm	lr, {r3, r12, lr}
 	adcs	r0, r2, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	mov	r1, r5
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	ldr	r2, [r5, #8]
+	adcs	r0, r6, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	str	r7, [r4, #4]
+	adcs	r11, r3, r0
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r12, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, lr, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	adc	r0, r8, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	add	r0, sp, #232
+	bl	mulPv256x32
+	add	lr, sp, #236
+	ldr	r1, [sp, #232]
+	add	r8, sp, #256
+	ldm	lr, {r0, r2, r3, r12, lr}
+	adds	r1, r1, r10
+	adcs	r9, r0, r9
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	ldm	r8, {r6, r7, r8}
+	adcs	r10, r2, r0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	ldr	r2, [r5, #12]
 	adcs	r0, r3, r0
-	add	r3, r1, #8
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r12, r6, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	str	r0, [lr, #8]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	str	r4, [lr, #12]
-	adc	r0, r7, r0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r1, {r4, r6}
-	ldm	r3, {r0, r2, r3}
-	ldr	r1, [r1, #20]
-	umull	r5, r7, r2, r1
-	str	r5, [sp, #32]           @ 4-byte Spill
-	str	r7, [sp, #52]           @ 4-byte Spill
-	umull	r5, r7, r0, r1
-	str	r5, [sp, #28]           @ 4-byte Spill
-	str	r7, [sp, #48]           @ 4-byte Spill
-	umull	r5, r7, r6, r1
-	str	r5, [sp, #24]           @ 4-byte Spill
-	str	r7, [sp, #44]           @ 4-byte Spill
-	umull	r5, r7, r4, r1
-	str	r5, [sp, #8]            @ 4-byte Spill
-	str	r7, [sp, #36]           @ 4-byte Spill
-	umull	r7, r5, r2, r3
-	str	r5, [sp, #4]            @ 4-byte Spill
-	umull	r2, r5, r0, r3
-	umull	r0, r10, r6, r3
-	umull	r6, r8, r4, r3
-	adds	r4, r6, r9
-	str	r5, [sp]                @ 4-byte Spill
-	adcs	r11, r0, r11
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	str	r4, [sp, #40]           @ 4-byte Spill
-	umull	r4, r9, r3, r3
-	adcs	r5, r2, r0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r6, r7, r0
-	umull	r0, r2, r1, r3
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	mov	r7, #0
-	adcs	r12, r4, r12
-	ldr	r4, [sp]                @ 4-byte Reload
-	adcs	r3, r0, r3
-	adc	r7, r7, #0
-	adds	r8, r11, r8
-	adcs	r5, r5, r10
-	adcs	r6, r6, r4
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	adcs	r4, r12, r4
-	adcs	r3, r3, r9
-	adc	r10, r7, r2
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	adds	r12, r7, r8
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	adcs	r9, r7, r5
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adcs	r6, r5, r6
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	adcs	r4, r5, r4
-	adcs	r0, r0, r3
-	umull	r3, r8, r1, r1
-	adcs	r1, r3, r10
-	mov	r3, #0
-	adc	r3, r3, #0
-	adds	r5, r9, r7
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	adcs	r6, r6, r7
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	adcs	r4, r4, r7
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	adcs	r1, r1, r2
-	adc	r2, r3, r8
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	str	r3, [lr, #16]
-	add	r3, lr, #36
-	str	r12, [lr, #20]
-	str	r5, [lr, #24]
-	str	r6, [lr, #28]
-	str	r4, [lr, #32]
-	stm	r3, {r0, r1, r2}
-	add	sp, sp, #56
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r11, r12, r11
+	str	r1, [r4, #8]
+	mov	r1, r5
+	adcs	r0, lr, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	adc	r0, r8, #0
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #192
+	bl	mulPv256x32
+	add	r6, sp, #196
+	ldr	r7, [sp, #192]
+	add	lr, sp, #212
+	ldr	r8, [sp, #224]
+	ldm	r6, {r0, r1, r2, r6}
+	adds	r7, r7, r9
+	adcs	r9, r0, r10
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	ldm	lr, {r3, r12, lr}
+	adcs	r0, r1, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adcs	r10, r2, r11
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	mov	r1, r5
+	ldr	r2, [r5, #16]
+	adcs	r0, r6, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	str	r7, [r4, #12]
+	adcs	r11, r3, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r12, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, lr, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	adc	r0, r8, #0
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #152
+	bl	mulPv256x32
+	ldr	r1, [sp, #152]
+	add	r8, sp, #176
+	add	lr, sp, #156
+	adds	r1, r1, r9
+	ldm	r8, {r6, r7, r8}
+	ldm	lr, {r0, r2, r3, r12, lr}
+	str	r1, [r4, #16]
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r9, r0, r1
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r10, r2, r10
+	ldr	r2, [r5, #20]
+	adcs	r0, r3, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r11, r12, r11
+	mov	r1, r5
+	adcs	r0, lr, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adc	r0, r8, #0
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #112
+	bl	mulPv256x32
+	add	r6, sp, #116
+	ldr	r7, [sp, #112]
+	add	lr, sp, #132
+	ldr	r8, [sp, #144]
+	ldm	r6, {r0, r1, r2, r6}
+	adds	r7, r7, r9
+	adcs	r9, r0, r10
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldm	lr, {r3, r12, lr}
+	adcs	r0, r1, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	adcs	r10, r2, r11
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	mov	r1, r5
+	ldr	r2, [r5, #24]
+	adcs	r11, r6, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	str	r7, [r4, #20]
+	adcs	r0, r3, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r12, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	adcs	r0, lr, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adc	r0, r8, #0
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #72
+	bl	mulPv256x32
+	ldr	r0, [sp, #104]
+	add	lr, sp, #76
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #100]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	ldr	r2, [sp, #72]
+	ldm	lr, {r0, r1, r3, r12, lr}
+	adds	r2, r2, r9
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	ldr	r6, [sp, #96]
+	adcs	r9, r0, r7
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r10, r1, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	adcs	r8, r3, r11
+	str	r2, [r4, #24]
+	adcs	r11, r12, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r2, [r5, #28]
+	adcs	r0, lr, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	mov	r1, r5
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, sp, #32
+	bl	mulPv256x32
+	add	r3, sp, #32
+	add	r7, sp, #48
+	ldr	r6, [sp, #60]
+	ldm	r3, {r0, r1, r2, r3}
+	adds	lr, r0, r9
+	ldm	r7, {r0, r5, r7}
+	adcs	r9, r1, r10
+	adcs	r2, r2, r8
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r3, r3, r11
+	ldr	r12, [sp, #64]
+	adcs	r0, r0, r1
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	str	lr, [r4, #28]
+	adcs	r5, r5, r1
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	str	r9, [r4, #32]
+	adcs	r7, r7, r1
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	str	r2, [r4, #36]
+	adcs	r6, r6, r1
+	add	r1, r4, #44
+	str	r3, [r4, #40]
+	stm	r1, {r0, r5, r7}
+	adc	r0, r12, #0
+	str	r6, [r4, #56]
+	str	r0, [r4, #60]
+	add	sp, sp, #356
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end82:
-	.size	mcl_fpDbl_sqrPre6L, .Lfunc_end82-mcl_fpDbl_sqrPre6L
+.Lfunc_end42:
+	.size	mcl_fpDbl_sqrPre8L, .Lfunc_end42-mcl_fpDbl_sqrPre8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_mont6L
-	.align	2
-	.type	mcl_fp_mont6L,%function
-mcl_fp_mont6L:                          @ @mcl_fp_mont6L
+                                        @ -- End function
+	.globl	mcl_fp_mont8L                   @ -- Begin function mcl_fp_mont8L
+	.p2align	2
+	.type	mcl_fp_mont8L,%function
+	.code	32                              @ @mcl_fp_mont8L
+mcl_fp_mont8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#116
-	sub	sp, sp, #116
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, r2
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldm	r0, {r2, r6, r7}
-	ldr	r0, [r0, #12]
-	ldr	r5, [r3, #8]
-	ldr	r9, [r3]
-	ldr	r11, [r1, #8]
-	ldr	lr, [r1, #12]
-	ldr	r12, [r3, #4]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r1, #4]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r1]
-	str	r5, [sp, #92]           @ 4-byte Spill
-	str	r9, [sp, #84]           @ 4-byte Spill
-	str	r11, [sp, #100]         @ 4-byte Spill
-	str	lr, [sp, #64]           @ 4-byte Spill
-	str	r12, [sp, #112]         @ 4-byte Spill
-	str	r7, [sp, #108]          @ 4-byte Spill
-	ldr	r7, [r3, #-4]
-	umull	r4, r8, r0, r2
-	str	r0, [sp, #88]           @ 4-byte Spill
-	str	r4, [sp, #44]           @ 4-byte Spill
-	mul	r0, r4, r7
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r1, #20]
-	ldr	r1, [r1, #16]
-	umull	r10, r4, r0, r5
-	str	r4, [sp, #36]           @ 4-byte Spill
-	umull	r4, r5, r0, r9
-	str	r10, [sp, #16]          @ 4-byte Spill
-	mov	r9, r5
-	str	r5, [sp, #12]           @ 4-byte Spill
-	str	r4, [sp, #40]           @ 4-byte Spill
-	umull	r5, r4, r7, r2
-	str	r7, [sp, #104]          @ 4-byte Spill
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	str	r1, [sp, #96]           @ 4-byte Spill
-	umlal	r9, r10, r0, r12
-	str	r5, [sp, #72]           @ 4-byte Spill
-	str	r4, [sp, #76]           @ 4-byte Spill
-	umull	r5, r4, r1, r2
-	str	r4, [sp, #68]           @ 4-byte Spill
-	umull	r1, r4, lr, r2
-	str	r5, [sp, #28]           @ 4-byte Spill
-	umull	lr, r5, r11, r2
-	str	r4, [sp, #24]           @ 4-byte Spill
-	umull	r11, r4, r7, r2
-	adds	r7, r8, r11
-	adcs	r4, r4, lr
-	ldr	r7, [r3, #12]
-	adcs	r1, r5, r1
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r4, r1
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r7, [sp, #72]           @ 4-byte Spill
-	adcs	r1, r4, r1
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	umull	r11, r4, r0, r1
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	str	r4, [sp, #8]            @ 4-byte Spill
-	umull	r3, r4, r0, r12
-	adds	r3, r5, r3
-	str	r1, [sp, #68]           @ 4-byte Spill
-	umull	r5, r12, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r4, r4, r1
-	umull	r4, r3, r0, r7
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r0, r4
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r3, r3, r5
-	adcs	r4, r12, r11
-	mov	r12, #0
-	adc	r5, r0, #0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	umlal	r8, lr, r0, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adds	r2, r7, r2
-	adcs	r2, r9, r8
-	str	r2, [sp, #44]           @ 4-byte Spill
-	adcs	r2, r10, lr
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r3, r1
-	mov	r3, r0
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r4, r1
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r1, r5, r1
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adc	r11, r12, #0
-	umull	lr, r10, r6, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	umull	r7, r4, r6, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	umull	r5, r12, r6, r1
-	umull	r1, r8, r6, r0
-	umull	r9, r0, r6, r2
-	adds	r1, r0, r1
-	adcs	r1, r8, r5
-	ldr	r8, [sp, #64]           @ 4-byte Reload
-	umlal	r0, r5, r6, r3
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	umull	r1, r2, r6, r8
-	adcs	r1, r12, r1
-	adcs	r2, r2, r7
-	adcs	r12, r4, lr
-	adc	r4, r10, #0
-	adds	r7, r3, r9
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	ldr	r10, [sp, #68]          @ 4-byte Reload
-	adcs	r9, r3, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
+	.pad	#716
+	sub	sp, sp, #716
+	mov	r7, r2
+	ldr	r2, [r2]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	add	r0, sp, #672
+	ldr	r5, [r3, #-4]
+	mov	r4, r3
+	str	r5, [sp, #64]                   @ 4-byte Spill
+	mov	r11, r1
+	str	r3, [sp, #68]                   @ 4-byte Spill
+	str	r7, [sp, #60]                   @ 4-byte Spill
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	bl	mulPv256x32
+	ldr	r0, [sp, #676]
+	mov	r1, r4
+	ldr	r9, [sp, #672]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #680]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mul	r2, r5, r9
+	ldr	r0, [sp, #684]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #704]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #700]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #696]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #692]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #688]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	add	r0, sp, #632
+	bl	mulPv256x32
+	ldr	r0, [sp, #664]
+	add	r10, sp, #636
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mov	r1, r11
+	ldr	r0, [sp, #660]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #656]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #592
+	ldr	r2, [r7, #4]
+	ldm	r10, {r5, r8, r10}
+	ldr	r7, [sp, #652]
+	ldr	r4, [sp, #648]
+	ldr	r6, [sp, #632]
+	bl	mulPv256x32
+	adds	r0, r6, r9
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	mov	r1, #0
+	add	r12, sp, #596
+	adcs	r0, r5, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	lr, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r8, r0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r5, [sp, #36]                   @ 4-byte Reload
+	adcs	r10, r10, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r4, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r11, r2, r0
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	adcs	r7, r2, r0
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	adc	r0, r1, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r4, [sp, #592]
+	ldm	r12, {r0, r1, r2, r3, r6, r12}
+	adds	r4, lr, r4
+	adcs	r0, r5, r0
+	ldr	r8, [sp, #624]
+	ldr	r9, [sp, #620]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r10, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r11, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r11, r6
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r7, r12
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #20]                   @ 4-byte Spill
 	mov	r0, #0
 	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mul	r0, r7, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	umull	lr, r3, r0, r5
-	umull	r6, r12, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	umull	r11, r2, r0, r1
-	mov	r1, r6
-	mov	r4, r2
-	adds	r2, r2, lr
-	umlal	r4, r1, r0, r5
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	adcs	r3, r3, r6
-	umull	r2, lr, r0, r5
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	umull	r3, r6, r0, r5
-	adcs	r12, r12, r3
-	umull	r5, r3, r0, r10
-	adcs	r0, r6, r5
-	adcs	r2, r3, r2
-	adc	r3, lr, #0
-	adds	r7, r11, r7
-	adcs	r7, r4, r9
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r12, r1
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	mul	r2, r0, r4
+	add	r0, sp, #552
+	bl	mulPv256x32
+	ldr	r0, [sp, #584]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r5, [sp, #60]                   @ 4-byte Reload
+	ldr	r0, [sp, #580]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #576]
+	ldr	r2, [r5, #8]
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #512
+	ldr	r6, [sp, #572]
+	ldr	r7, [sp, #568]
+	ldr	r10, [sp, #552]
+	ldr	r11, [sp, #556]
+	ldr	r8, [sp, #560]
+	ldr	r9, [sp, #564]
+	bl	mulPv256x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #512
+	adcs	r0, r0, r11
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r10, r0, r8
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r11, r0, r9
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r6, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	umull	r4, r5, r2, r8
-	ldr	r8, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	umull	r3, r1, r2, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r3, [sp, #8]            @ 4-byte Spill
-	mov	r3, r2
-	str	r1, [sp, #16]           @ 4-byte Spill
-	umull	r6, r9, r2, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	umull	r1, lr, r2, r0
-	umull	r11, r0, r3, r8
-	umull	r2, r12, r3, r7
-	adds	r2, r0, r2
-	str	r11, [sp, #12]          @ 4-byte Spill
-	adcs	r2, r12, r1
-	umlal	r0, r1, r3, r7
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adcs	r2, lr, r4
-	adcs	r4, r5, r6
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	adcs	r6, r9, r6
-	adc	r5, r5, #0
-	adds	r8, r3, r7
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r7, r0, r1
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mul	r0, r8, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	umull	r2, r3, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r3, [sp, #16]           @ 4-byte Spill
-	umull	r3, r5, r0, r1
-	mov	r1, r2
-	str	r3, [sp, #20]           @ 4-byte Spill
-	ldr	r3, [sp, #76]           @ 4-byte Reload
-	mov	r4, r5
-	umlal	r4, r1, r0, r7
-	umull	r9, r6, r0, r3
-	ldr	r3, [sp, #72]           @ 4-byte Reload
-	str	r6, [sp, #12]           @ 4-byte Spill
-	umull	r6, lr, r0, r10
-	umull	r12, r10, r0, r3
-	umull	r11, r3, r0, r7
-	adds	r0, r5, r11
-	adcs	r0, r3, r2
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r11, r0, r12
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r10, r10, r6
-	adcs	lr, lr, r9
-	adc	r9, r0, #0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adds	r6, r0, r8
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r8, [sp, #88]           @ 4-byte Reload
-	umull	r7, r2, r3, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #20]           @ 4-byte Spill
-	umull	r7, r2, r3, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	r2, [sp, #8]            @ 4-byte Spill
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	umull	r5, r2, r3, r0
-	str	r2, [sp]                @ 4-byte Spill
-	umull	r2, r0, r3, r8
-	umull	r6, r12, r3, r7
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	adcs	r4, r4, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r11, r11, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r10, r10, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, lr, r1
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r9, r1
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adc	lr, r1, #0
-	adds	r6, r0, r6
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r2, r12, r5
-	umlal	r0, r5, r3, r7
-	ldr	r2, [sp]                @ 4-byte Reload
-	adcs	r9, r2, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	umull	r6, r2, r3, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r6, r1, r6
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r2, r2, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adc	r8, r1, #0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adds	r4, r4, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	adcs	r0, r11, r5
-	ldr	r5, [sp, #112]          @ 4-byte Reload
-	ldr	r11, [sp, #76]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r10, r9
-	ldr	r10, [sp, #80]          @ 4-byte Reload
-	ldr	r9, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, lr, r8
-	ldr	r8, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r6, r0
+	ldr	r8, [sp, #544]
+	adcs	r0, r10, r1
+	ldr	r9, [sp, #540]
+	ldr	r4, [sp, #536]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r11, r2
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r7, r4
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #20]                   @ 4-byte Spill
 	mov	r0, #0
 	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mul	r0, r4, r10
-	umull	r2, r12, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	umull	r3, r7, r0, r1
-	mov	r1, r2
-	str	r3, [sp, #24]           @ 4-byte Spill
-	umull	lr, r3, r0, r5
-	mov	r6, r7
-	adds	r7, r7, lr
-	umlal	r6, r1, r0, r5
-	adcs	r2, r3, r2
-	umull	r7, lr, r0, r11
-	umull	r2, r3, r0, r9
-	adcs	r12, r12, r2
-	umull	r5, r2, r0, r8
-	adcs	r0, r3, r5
-	adcs	r2, r2, r7
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	adc	r3, lr, #0
-	adds	r7, r7, r4
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adcs	r7, r6, r7
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r12, r1
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, sp, #472
+	bl	mulPv256x32
+	ldr	r0, [sp, #504]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #500]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r2, [r5, #12]
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	ldr	r0, [sp, #496]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #432
+	mov	r1, r5
+	ldr	r4, [sp, #492]
+	ldr	r7, [sp, #488]
+	ldr	r10, [sp, #472]
+	ldr	r11, [sp, #476]
+	ldr	r8, [sp, #480]
+	ldr	r9, [sp, #484]
+	bl	mulPv256x32
+	adds	r0, r6, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #436
+	adcs	r10, r0, r11
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r11, r0, r8
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r6, r0, r1
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r7, r0, r1
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	ldr	r3, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r4, [r0, #16]
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	umull	r12, lr, r4, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	umull	r5, r6, r4, r3
-	umull	r2, r8, r4, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	umull	r7, r1, r4, r0
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	adds	r5, r1, r5
-	umull	r0, r5, r4, r7
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adcs	r6, r6, r0
-	umlal	r1, r0, r4, r3
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	adcs	r2, r5, r2
-	umull	r5, r6, r4, r7
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adcs	r7, r8, r5
-	adcs	r6, r6, r12
-	adc	r5, lr, #0
-	adds	r8, r3, r4
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r3, r1
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r4, [sp, #432]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r4, r10, r4
+	adcs	r0, r11, r0
+	ldr	r8, [sp, #464]
+	ldr	r9, [sp, #460]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #32]           @ 4-byte Spill
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r6, r12
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r7, lr
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #20]                   @ 4-byte Spill
 	mov	r0, #0
 	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mul	r0, r8, r10
-	umull	r5, r12, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	umull	lr, r3, r0, r6
-	umull	r10, r2, r0, r1
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	mul	r2, r0, r4
+	add	r0, sp, #392
+	bl	mulPv256x32
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	mov	r1, r5
-	mov	r4, r2
-	adds	r2, r2, lr
-	adcs	r3, r3, r5
-	umlal	r4, r1, r0, r6
-	umull	r2, lr, r0, r11
-	ldr	r11, [sp, #88]          @ 4-byte Reload
-	umull	r3, r5, r0, r9
-	adcs	r12, r12, r3
-	umull	r6, r3, r0, r7
-	adcs	r0, r5, r6
-	adcs	r2, r3, r2
-	adc	r3, lr, #0
-	adds	r7, r10, r8
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adcs	r7, r4, r7
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r12, r1
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
+	ldr	r2, [r0, #16]
+	ldr	r0, [sp, #424]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #420]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #416]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #352
+	ldr	r6, [sp, #412]
+	ldr	r7, [sp, #408]
+	ldr	r10, [sp, #392]
+	ldr	r11, [sp, #396]
+	ldr	r8, [sp, #400]
+	ldr	r9, [sp, #404]
+	bl	mulPv256x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #352
+	adcs	r5, r0, r11
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r10, r0, r8
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r11, r0, r9
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	ldr	r3, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r7, r0, r1
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r4, [r0, #20]
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	umull	r9, r1, r4, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	umull	r2, r12, r4, r3
-	str	r1, [sp, #60]           @ 4-byte Spill
-	umull	r7, r8, r4, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	umull	r5, r6, r4, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	umull	r1, lr, r4, r0
-	umull	r10, r0, r4, r11
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adds	r2, r0, r2
-	adcs	r2, r12, r1
-	umlal	r0, r1, r4, r3
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	ldr	r12, [sp, #112]         @ 4-byte Reload
-	adcs	r2, lr, r5
-	adcs	r5, r6, r7
-	ldr	r6, [sp, #60]           @ 4-byte Reload
-	adcs	r7, r8, r9
-	ldr	r9, [sp, #68]           @ 4-byte Reload
-	adc	r6, r6, #0
-	adds	r8, r3, r10
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	ldr	r10, [sp, #84]          @ 4-byte Reload
-	adcs	lr, r3, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r5, r0
+	ldr	r8, [sp, #384]
+	adcs	r0, r10, r1
+	ldr	r9, [sp, #380]
+	ldr	r4, [sp, #376]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r11, r2
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	mul	r2, r5, r6
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r7, r4
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, sp, #312
+	bl	mulPv256x32
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	ldr	r2, [r0, #20]
+	ldr	r0, [sp, #344]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #340]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #336]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #272
+	ldr	r4, [sp, #332]
+	ldr	r7, [sp, #328]
+	ldr	r10, [sp, #312]
+	ldr	r11, [sp, #316]
+	ldr	r8, [sp, #320]
+	ldr	r9, [sp, #324]
+	bl	mulPv256x32
+	adds	r0, r6, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #276
+	adcs	r10, r0, r11
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r11, r0, r8
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r6, r0, r1
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r7, r0, r1
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r4, [sp, #272]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r4, r10, r4
+	adcs	r0, r11, r0
+	ldr	r8, [sp, #304]
+	ldr	r9, [sp, #300]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	mul	r2, r5, r4
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r6, r12
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r7, lr
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	mov	r1, r5
+	adcs	r0, r0, r9
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, sp, #232
+	bl	mulPv256x32
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	ldr	r2, [r0, #24]
+	ldr	r0, [sp, #264]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #260]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #256]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #192
+	ldr	r6, [sp, #252]
+	ldr	r7, [sp, #248]
+	ldr	r10, [sp, #232]
+	ldr	r11, [sp, #236]
+	ldr	r8, [sp, #240]
+	ldr	r9, [sp, #244]
+	bl	mulPv256x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #192
+	adcs	r0, r0, r11
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r10, r0, r8
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r11, r0, r9
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r7
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r6
-	str	r0, [sp, #88]           @ 4-byte Spill
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r6, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r7, r0, r1
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r6, r0
+	ldr	r8, [sp, #224]
+	adcs	r0, r10, r1
+	ldr	r9, [sp, #220]
+	ldr	r4, [sp, #216]
+	mov	r1, r5
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r11, r2
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r7, r4
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #20]                   @ 4-byte Spill
 	mov	r0, #0
 	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	mul	r0, r8, r1
-	umull	r3, r4, r0, r10
-	umull	r1, r2, r0, r12
-	adds	r1, r4, r1
-	str	r3, [sp, #80]           @ 4-byte Spill
-	umull	r6, r1, r0, r11
-	adcs	r2, r2, r6
-	umlal	r4, r6, r0, r12
-	umull	r2, r3, r0, r5
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, sp, #152
+	bl	mulPv256x32
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	ldr	r2, [r0, #28]
+	ldr	r0, [sp, #184]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #180]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #112
+	ldr	r5, [sp, #176]
+	ldr	r4, [sp, #172]
+	ldr	r7, [sp, #168]
+	ldr	r10, [sp, #152]
+	ldr	r11, [sp, #156]
+	ldr	r8, [sp, #160]
+	ldr	r9, [sp, #164]
+	bl	mulPv256x32
+	adds	r0, r6, r10
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #128
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	adcs	r8, r1, r8
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r9, r1, r9
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r1, r1, r7
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r11, r1, r4
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r1, r5
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r10, r1, r2
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
 	adcs	r1, r1, r2
-	str	r1, [sp, #60]           @ 4-byte Spill
-	umull	r2, r1, r0, r9
-	adcs	r2, r3, r2
-	str	r2, [sp, #52]           @ 4-byte Spill
-	umull	r3, r2, r0, r7
-	adcs	r1, r1, r3
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	adc	r0, r2, #0
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	adds	r2, r2, r8
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	adcs	r12, r4, lr
-	adcs	lr, r6, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	adcs	r8, r3, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	adcs	r6, r3, r2
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	adcs	r3, r1, r2
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r2, r0, r1
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r2, [sp, #112]
+	ldr	r12, [sp, #116]
+	adds	r5, r0, r2
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #120]
+	adcs	r8, r8, r12
+	ldr	r4, [sp, #124]
+	adcs	r9, r9, r3
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	mul	r7, r0, r5
+	ldr	r6, [sp, #144]
+	adcs	r4, r3, r4
+	ldm	lr, {r0, r1, r2, lr}
+	adcs	r11, r11, r0
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r10, r10, r2
+	mov	r2, r7
+	adcs	r0, r0, lr
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r6, [sp, #68]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	subs	r4, r12, r10
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	sbcs	r0, lr, r0
-	sbcs	r1, r8, r11
-	mov	r11, r6
-	sbcs	r5, r6, r5
-	sbcs	r6, r3, r9
-	mov	r9, r2
-	sbcs	r10, r2, r7
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	sbc	r7, r2, #0
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	ands	r7, r7, #1
-	movne	r4, r12
-	movne	r0, lr
-	movne	r1, r8
-	cmp	r7, #0
-	movne	r5, r11
-	movne	r6, r3
-	movne	r10, r9
-	str	r4, [r2]
-	str	r0, [r2, #4]
-	str	r1, [r2, #8]
-	str	r5, [r2, #12]
-	str	r6, [r2, #16]
-	str	r10, [r2, #20]
-	add	sp, sp, #116
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	add	r0, sp, #72
+	mov	r1, r6
+	bl	mulPv256x32
+	add	r3, sp, #72
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r5, r0
+	adcs	r1, r8, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	adcs	r9, r9, r2
+	str	r9, [sp, #60]                   @ 4-byte Spill
+	adcs	lr, r4, r3
+	str	lr, [sp, #44]                   @ 4-byte Spill
+	ldr	r3, [sp, #88]
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r2, r11, r3
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [sp, #92]
+	ldr	r5, [sp, #96]
+	adcs	r12, r0, r7
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r10, r5
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r4, [sp, #100]
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #104]
+	mov	r4, r6
+	ldr	r6, [r6]
+	add	r11, r4, #8
+	adcs	r0, r3, r0
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adc	r3, r3, #0
+	str	r3, [sp, #56]                   @ 4-byte Spill
+	ldr	r3, [r4, #4]
+	subs	r8, r1, r6
+	str	r3, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	ldm	r11, {r3, r5, r7, r10, r11}
+	sbcs	r6, r9, r1
+	sbcs	r3, lr, r3
+	mov	r9, r12
+	sbcs	r1, r2, r5
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	sbcs	r7, r12, r7
+	ldr	r4, [r4, #28]
+	sbcs	r12, r2, r10
+	ldr	r10, [sp, #40]                  @ 4-byte Reload
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	sbcs	lr, r10, r11
+	sbcs	r4, r0, r4
+	sbc	r5, r5, #0
+	ands	r5, r5, #1
+	movne	r12, r2
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	movne	r4, r0
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	movne	lr, r10
+	cmp	r5, #0
+	movne	r1, r2
+	movne	r7, r9
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	str	r4, [r0, #28]
+	str	lr, [r0, #24]
+	movne	r3, r1
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	cmp	r5, #0
+	str	r12, [r0, #20]
+	str	r7, [r0, #16]
+	movne	r6, r1
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	str	r3, [r0, #8]
+	str	r6, [r0, #4]
+	movne	r8, r1
+	str	r8, [r0]
+	add	sp, sp, #716
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end83:
-	.size	mcl_fp_mont6L, .Lfunc_end83-mcl_fp_mont6L
+.Lfunc_end43:
+	.size	mcl_fp_mont8L, .Lfunc_end43-mcl_fp_mont8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montNF6L
-	.align	2
-	.type	mcl_fp_montNF6L,%function
-mcl_fp_montNF6L:                        @ @mcl_fp_montNF6L
+                                        @ -- End function
+	.globl	mcl_fp_montNF8L                 @ -- Begin function mcl_fp_montNF8L
+	.p2align	2
+	.type	mcl_fp_montNF8L,%function
+	.code	32                              @ @mcl_fp_montNF8L
+mcl_fp_montNF8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#88
-	sub	sp, sp, #88
-	str	r2, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r2, {r4, r12}
-	ldr	r5, [r1, #4]
-	ldr	r0, [r2, #12]
-	ldr	r9, [r2, #8]
-	ldr	r2, [r1]
-	ldr	r7, [r1, #8]
-	ldr	lr, [r3, #8]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #12]
-	str	r5, [sp, #44]           @ 4-byte Spill
-	umull	r6, r8, r5, r4
-	mov	r10, r5
-	umull	r11, r5, r2, r4
-	str	r2, [sp, #52]           @ 4-byte Spill
-	str	r7, [sp, #48]           @ 4-byte Spill
-	str	lr, [sp, #40]           @ 4-byte Spill
-	adds	r6, r5, r6
-	umull	r2, r6, r7, r4
-	adcs	r7, r8, r2
-	umlal	r5, r2, r10, r4
-	umull	r7, r8, r0, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r6, r7
-	ldr	r6, [r1, #16]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	umull	r7, r0, r6, r4
-	str	r6, [sp, #72]           @ 4-byte Spill
-	ldr	r6, [r3]
-	adcs	r7, r8, r7
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r1, #20]
-	str	r6, [sp, #80]           @ 4-byte Spill
-	umull	r1, r8, r7, r4
-	str	r7, [sp, #76]           @ 4-byte Spill
-	adcs	r0, r0, r1
-	ldr	r1, [r3, #-4]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adc	r0, r8, #0
-	ldr	r8, [r3, #4]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	mul	r0, r11, r1
-	str	r1, [sp, #56]           @ 4-byte Spill
-	umull	r1, r7, r0, r6
-	str	r8, [sp, #68]           @ 4-byte Spill
-	adds	r1, r1, r11
-	str	r7, [sp, #12]           @ 4-byte Spill
-	umull	r1, r4, r0, r8
-	adcs	r8, r1, r5
-	ldr	r1, [r3, #12]
-	umull	r5, r11, r0, lr
-	str	r4, [sp, #8]            @ 4-byte Spill
-	adcs	r6, r5, r2
-	str	r1, [sp, #84]           @ 4-byte Spill
-	umull	r5, r7, r0, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	lr, r5, r1
-	ldr	r1, [r3, #16]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	umull	r5, r4, r0, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r5, r5, r1
-	ldr	r1, [r3, #20]
-	umull	r3, r2, r0, r1
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	adc	r3, r1, #0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adds	r1, r8, r1
-	ldr	r8, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r6, r1
-	adcs	r11, lr, r11
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	lr, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r5, r7
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r1, [sp, #12]           @ 4-byte Spill
-	str	r0, [sp, #8]            @ 4-byte Spill
-	adc	r0, r3, r2
-	umull	r3, r6, r12, r10
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	umull	r7, r1, r12, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adds	r3, r1, r3
-	umull	r2, r3, r12, r0
-	adcs	r6, r6, r2
-	umlal	r1, r2, r12, r10
-	ldr	r10, [sp, #68]          @ 4-byte Reload
-	umull	r6, r0, r12, r8
-	adcs	r4, r3, r6
-	umull	r6, r3, r12, r5
-	adcs	r5, r0, r6
-	umull	r6, r0, r12, lr
-	ldr	r12, [sp, #60]          @ 4-byte Reload
-	adcs	r3, r3, r6
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r7, r7, r6
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	adcs	r2, r2, r11
-	adcs	r6, r4, r6
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	adcs	r11, r5, r4
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	adcs	r3, r3, r5
-	adc	r0, r0, #0
-	str	r3, [sp, #20]           @ 4-byte Spill
-	ldr	r3, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	mul	r4, r7, r0
-	umull	r0, r5, r4, r3
-	adds	r0, r0, r7
-	str	r5, [sp, #12]           @ 4-byte Spill
-	umull	r0, r3, r4, r10
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	str	r3, [sp, #8]            @ 4-byte Spill
-	adcs	r3, r0, r1
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	umull	r1, r7, r4, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r7, [sp, #4]            @ 4-byte Spill
-	adcs	r1, r1, r2
-	umull	r2, r7, r4, r0
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adcs	r2, r2, r6
-	umull	r6, r0, r4, r7
-	adcs	r6, r6, r11
-	umull	r7, r11, r4, r12
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	ldr	r12, [sp, #48]          @ 4-byte Reload
-	adcs	r4, r7, r4
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	adc	r7, r7, #0
-	adds	r3, r3, r5
-	str	r3, [sp, #20]           @ 4-byte Spill
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r1, r3
-	ldr	r3, [sp, #72]           @ 4-byte Reload
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r1, r2, r1
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp]                @ 4-byte Reload
-	adcs	r1, r6, r1
-	adcs	r0, r4, r0
-	str	r1, [sp, #8]            @ 4-byte Spill
-	str	r0, [sp, #4]            @ 4-byte Spill
-	adc	r0, r7, r11
-	ldr	r11, [sp, #52]          @ 4-byte Reload
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	umull	r6, r1, r9, r11
-	umull	r5, r4, r9, r0
-	adds	r5, r1, r5
-	umull	r2, r5, r9, r12
-	adcs	r4, r4, r2
-	umlal	r1, r2, r9, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	umull	r4, r7, r9, r8
-	adcs	r8, r5, r4
-	umull	r5, r4, r9, r3
-	adcs	r5, r7, r5
-	umull	r7, r3, r9, lr
-	ldr	lr, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r4, r7
-	adc	r3, r3, #0
-	adds	r7, r6, r0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r1, r1, r0
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r2, r2, r0
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r6, r8, r0
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	ldr	r8, [sp, #56]           @ 4-byte Reload
-	adcs	r9, r5, r0
-	ldr	r0, [sp]                @ 4-byte Reload
+	.pad	#716
+	sub	sp, sp, #716
+	mov	r7, r2
+	ldr	r2, [r2]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	add	r0, sp, #672
+	ldr	r5, [r3, #-4]
+	mov	r4, r3
+	str	r5, [sp, #60]                   @ 4-byte Spill
+	mov	r6, r7
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	mov	r7, r1
+	str	r3, [sp, #56]                   @ 4-byte Spill
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	bl	mulPv256x32
+	ldr	r0, [sp, #676]
+	mov	r1, r4
+	ldr	r10, [sp, #672]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #680]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mul	r2, r5, r10
+	ldr	r0, [sp, #684]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #704]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #700]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #696]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #692]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #688]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	add	r0, sp, #632
+	bl	mulPv256x32
+	ldr	r0, [sp, #664]
+	add	r11, sp, #640
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	mov	r1, r7
+	ldr	r0, [sp, #660]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #656]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #592
+	ldr	r2, [r6, #4]
+	ldm	r11, {r5, r9, r11}
+	ldr	r4, [sp, #652]
+	ldr	r6, [sp, #632]
+	ldr	r8, [sp, #636]
+	bl	mulPv256x32
+	adds	r0, r6, r10
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	add	r6, sp, #596
+	adcs	r0, r8, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	lr, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r5, [sp, #36]                   @ 4-byte Reload
+	adcs	r9, r9, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r11, r11, r0
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r4, r0
-	mul	r4, r7, r8
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adc	r0, r3, #0
-	ldr	r3, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	umull	r0, r5, r4, r3
-	adds	r0, r0, r7
-	str	r5, [sp, #12]           @ 4-byte Spill
-	umull	r0, r3, r4, r10
-	ldr	r10, [sp, #40]          @ 4-byte Reload
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	str	r3, [sp, #8]            @ 4-byte Spill
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r10, r1, r0
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r7, r1, r0
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adc	r0, r1, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #624]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r4, [sp, #592]
+	ldm	r6, {r0, r1, r2, r6}
+	adds	r4, lr, r4
+	adcs	r0, r5, r0
+	ldr	r8, [sp, #620]
+	ldr	r12, [sp, #616]
+	ldr	r3, [sp, #612]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r9, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	adcs	r0, r11, r2
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r10, r3
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r7, r12
+	ldr	r7, [sp, #60]                   @ 4-byte Reload
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	mul	r2, r7, r4
+	adcs	r0, r0, r8
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	add	r0, sp, #552
+	bl	mulPv256x32
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	ldr	r2, [r0, #8]
+	ldr	r0, [sp, #584]
+	mov	r1, r5
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #580]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #576]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #572]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #512
+	ldr	r9, [sp, #568]
+	ldr	r10, [sp, #552]
+	ldr	r11, [sp, #556]
+	ldr	r8, [sp, #560]
+	ldr	r6, [sp, #564]
+	bl	mulPv256x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #516
+	adcs	r0, r0, r11
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r10, r0, r8
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r11, r0, r6
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r6, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	umull	r1, r3, r4, r10
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	umull	r2, r7, r4, r3
-	ldr	r3, [sp, #64]           @ 4-byte Reload
-	str	r7, [sp]                @ 4-byte Spill
-	adcs	r2, r2, r6
-	umull	r6, r7, r4, r3
-	adcs	r6, r6, r9
-	umull	r3, r9, r4, lr
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r3, r4
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	adc	r4, r4, #0
-	adds	r0, r0, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r1, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp]                @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	adcs	r0, r3, r7
-	str	r0, [sp, #4]            @ 4-byte Spill
-	adc	r0, r4, r9
-	ldr	r4, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	umull	r3, lr, r0, r12
-	ldr	r12, [sp, #36]          @ 4-byte Reload
-	umull	r9, r2, r0, r11
-	umull	r6, r7, r0, r4
-	mov	r1, r2
-	adds	r2, r2, r6
-	mov	r5, r3
-	adcs	r2, r7, r3
-	umlal	r1, r5, r0, r4
-	umull	r2, r3, r0, r12
-	adcs	r11, lr, r2
-	ldr	lr, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	umull	r4, r6, r0, lr
-	adcs	r3, r3, r4
-	umull	r4, r7, r0, r2
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r4, r6, r4
-	adc	r6, r7, #0
-	adds	r0, r9, r0
-	ldr	r9, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	adcs	r7, r5, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	adcs	r2, r11, r2
-	adcs	r11, r3, r5
-	ldr	r3, [sp]                @ 4-byte Reload
-	adcs	r3, r4, r3
-	mul	r4, r0, r8
-	ldr	r8, [sp, #80]           @ 4-byte Reload
-	str	r3, [sp, #24]           @ 4-byte Spill
-	adc	r3, r6, #0
-	str	r3, [sp, #20]           @ 4-byte Spill
-	umull	r5, r3, r4, r8
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	adds	r0, r5, r0
-	umull	r0, r5, r4, r3
-	str	r5, [sp, #12]           @ 4-byte Spill
-	ldr	r5, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
 	adcs	r0, r0, r1
-	umull	r1, r3, r4, r10
-	ldr	r10, [sp, #60]          @ 4-byte Reload
-	str	r3, [sp, #8]            @ 4-byte Spill
-	adcs	r1, r1, r7
-	umull	r7, r3, r4, r5
-	adcs	r2, r7, r2
-	umull	r7, r5, r4, r9
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	adcs	r7, r7, r11
-	umull	r6, r11, r4, r10
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adcs	r4, r6, r4
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	adc	r6, r6, #0
-	adds	r0, r0, r3
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	adcs	r0, r4, r5
-	str	r0, [sp, #8]            @ 4-byte Spill
-	adc	r0, r6, r11
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r5, [r0, #16]
-	umull	r11, r2, r5, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	umull	r4, r0, r5, r7
-	adds	r4, r2, r4
-	umull	r3, r4, r5, r1
-	adcs	r0, r0, r3
-	umlal	r2, r3, r5, r7
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	umull	r0, r6, r5, r12
-	adcs	r12, r4, r0
-	umull	r4, r1, r5, lr
-	adcs	r4, r6, r4
-	umull	r6, r0, r5, r7
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	adc	r0, r0, #0
-	adds	r6, r11, r7
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r2, r7
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	adcs	r3, r3, r7
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adcs	r5, r12, r7
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	adcs	r7, r4, r7
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	adcs	r1, r1, r7
-	adc	r0, r0, #0
-	str	r1, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	mul	r4, r6, r0
-	umull	r0, r1, r4, r8
-	ldr	r8, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adds	r0, r0, r6
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	umull	r0, r11, r4, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #544]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r3, [sp, #512]
+	ldm	lr, {r0, r1, r2, r12, lr}
+	adds	r9, r6, r3
+	adcs	r0, r10, r0
+	ldr	r8, [sp, #540]
+	ldr	r4, [sp, #536]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r11, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	umull	r2, lr, r4, r8
-	adcs	r2, r2, r3
-	umull	r3, r12, r4, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r3, r3, r5
-	umull	r5, r6, r4, r9
-	adcs	r5, r5, r1
-	umull	r1, r9, r4, r10
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	adc	r4, r4, #0
-	adds	r0, r0, r7
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adcs	r0, r2, r11
-	adcs	r11, r3, lr
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adcs	r10, r5, r12
-	adcs	r0, r1, r6
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adc	r0, r4, r9
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r5, [r0, #20]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	umull	r6, r1, r5, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mov	r4, r6
-	umull	lr, r3, r5, r0
-	umull	r12, r0, r5, r7
-	mov	r2, r3
-	adds	r3, r3, r12
-	umlal	r2, r4, r5, r7
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	umull	r0, r3, r5, r7
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adcs	r12, r1, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	umull	r1, r6, r5, r0
-	adcs	r1, r3, r1
-	umull	r3, r0, r5, r7
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r6, r3
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	mul	r2, r7, r9
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	mov	r1, r7
+	adcs	r0, r0, lr
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	adds	r6, lr, r5
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	ldr	lr, [sp, #68]           @ 4-byte Reload
-	adcs	r2, r2, r7
-	adcs	r7, r4, r11
-	adcs	r9, r12, r10
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #80]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r1, r3, r1
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	add	r0, sp, #472
+	bl	mulPv256x32
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	mov	r1, r5
+	ldr	r2, [r0, #12]
+	ldr	r0, [sp, #504]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #500]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #496]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #492]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #432
+	ldr	r4, [sp, #488]
+	ldr	r10, [sp, #472]
+	ldr	r11, [sp, #476]
+	ldr	r8, [sp, #480]
+	ldr	r6, [sp, #484]
+	bl	mulPv256x32
+	adds	r0, r9, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #436
+	adcs	r5, r0, r11
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r10, r0, r8
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r11, r0, r6
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r6, r0, r1
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #464]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r3, [sp, #432]
+	ldm	lr, {r0, r1, r2, r12, lr}
+	adds	r9, r5, r3
+	adcs	r0, r10, r0
+	ldr	r8, [sp, #460]
+	ldr	r4, [sp, #456]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r11, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	mov	r1, r7
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r5, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	mul	r2, r5, r9
+	adcs	r0, r0, r12
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r6, lr
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r1, [sp, #76]           @ 4-byte Spill
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	mul	r4, r6, r0
-	umull	r0, r1, r4, r5
-	umull	r3, r11, r4, lr
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adds	r0, r0, r6
-	umull	r6, r0, r4, r8
-	adcs	r12, r3, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	adcs	r10, r6, r7
-	umull	r3, r0, r4, r1
-	adcs	r9, r3, r9
-	ldr	r3, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	umull	r7, r0, r4, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r7, r7, r0
-	umull	r6, r0, r4, r2
-	ldr	r4, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r6, r6, r4
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	adc	r4, r4, #0
-	adds	r12, r12, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r11, r10, r11
-	adcs	r9, r9, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r10, r7, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r7, r6, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r4, r0
-	subs	r5, r12, r5
-	sbcs	r4, r11, lr
-	mov	lr, r0
-	sbcs	r6, r9, r8
-	sbcs	r1, r10, r1
-	sbcs	r8, r7, r3
-	sbc	r3, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	asr	r0, r3, #31
-	cmp	r0, #0
-	movlt	r5, r12
-	movlt	r4, r11
-	movlt	r6, r9
-	cmp	r0, #0
-	movlt	r1, r10
-	movlt	r8, r7
-	movlt	r3, lr
-	str	r5, [r2]
-	str	r4, [r2, #4]
-	str	r6, [r2, #8]
-	str	r1, [r2, #12]
-	str	r8, [r2, #16]
-	str	r3, [r2, #20]
-	add	sp, sp, #88
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	add	r0, sp, #392
+	bl	mulPv256x32
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r2, [r0, #16]
+	ldr	r0, [sp, #424]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #420]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #416]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #412]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #352
+	ldr	r4, [sp, #408]
+	ldr	r10, [sp, #392]
+	ldr	r11, [sp, #396]
+	ldr	r8, [sp, #400]
+	ldr	r6, [sp, #404]
+	bl	mulPv256x32
+	adds	r0, r9, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #356
+	adcs	r7, r0, r11
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r10, r0, r8
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r11, r0, r6
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r6, r0, r1
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #384]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r3, [sp, #352]
+	ldm	lr, {r0, r1, r2, r12, lr}
+	adds	r9, r7, r3
+	adcs	r0, r10, r0
+	ldr	r8, [sp, #380]
+	ldr	r4, [sp, #376]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r11, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	mul	r2, r5, r9
+	adcs	r0, r0, r12
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r6, lr
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	add	r0, sp, #312
+	bl	mulPv256x32
+	ldr	r0, [sp, #344]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #340]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	ldr	r0, [sp, #336]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #332]
+	ldr	r2, [r7, #20]
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #272
+	ldr	r4, [sp, #328]
+	ldr	r10, [sp, #312]
+	ldr	r11, [sp, #316]
+	ldr	r8, [sp, #320]
+	ldr	r6, [sp, #324]
+	bl	mulPv256x32
+	adds	r0, r9, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #276
+	adcs	r5, r0, r11
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r10, r0, r8
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r11, r0, r6
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r6, r0, r1
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #304]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r3, [sp, #272]
+	ldm	lr, {r0, r1, r2, r12, lr}
+	adds	r9, r5, r3
+	adcs	r0, r10, r0
+	ldr	r8, [sp, #300]
+	ldr	r4, [sp, #296]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r11, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	mov	r1, r5
+	adcs	r0, r0, r12
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r6, lr
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	mul	r2, r0, r9
+	add	r0, sp, #232
+	bl	mulPv256x32
+	ldr	r0, [sp, #264]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #260]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #256]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #252]
+	ldr	r2, [r7, #24]
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #192
+	ldr	r4, [sp, #248]
+	ldr	r10, [sp, #232]
+	ldr	r11, [sp, #236]
+	ldr	r8, [sp, #240]
+	ldr	r6, [sp, #244]
+	bl	mulPv256x32
+	adds	r0, r9, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #196
+	adcs	r7, r0, r11
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r10, r0, r8
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r11, r0, r6
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r9, r0, r4
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r6, r0, r1
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #224]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r3, [sp, #192]
+	ldm	lr, {r0, r1, r2, r12, lr}
+	adds	r7, r7, r3
+	adcs	r0, r10, r0
+	ldr	r8, [sp, #220]
+	ldr	r4, [sp, #216]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r11, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	adcs	r0, r9, r2
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	mov	r1, r5
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r6, lr
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #152
+	bl	mulPv256x32
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r2, [r0, #28]
+	ldr	r0, [sp, #184]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #180]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, sp, #112
+	ldr	r5, [sp, #176]
+	ldr	r4, [sp, #172]
+	ldr	r9, [sp, #168]
+	ldr	r10, [sp, #152]
+	ldr	r11, [sp, #156]
+	ldr	r8, [sp, #160]
+	ldr	r6, [sp, #164]
+	bl	mulPv256x32
+	adds	r0, r7, r10
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	add	lr, sp, #128
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	adcs	r1, r1, r8
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	ldr	r8, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r6
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r11, r1, r9
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r10, r1, r4
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r1, r5
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	ldr	r2, [sp, #64]                   @ 4-byte Reload
+	adc	r1, r1, r2
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r2, [sp, #112]
+	ldr	r12, [sp, #116]
+	adds	r5, r0, r2
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r7, r8, r12
+	ldr	r3, [sp, #120]
+	ldr	r4, [sp, #124]
+	mul	r9, r0, r5
+	ldr	r8, [sp, #56]                   @ 4-byte Reload
+	ldm	lr, {r0, r1, r2, r6, lr}
+	str	r7, [sp, #68]                   @ 4-byte Spill
+	ldr	r7, [sp, #48]                   @ 4-byte Reload
+	adcs	r7, r7, r3
+	adcs	r4, r11, r4
+	adcs	r10, r10, r0
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r11, r0, r1
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	mov	r1, r8
+	adcs	r0, r0, r2
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	mov	r2, r9
+	adcs	r6, r0, r6
+	adc	r0, lr, #0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	add	r0, sp, #72
+	bl	mulPv256x32
+	add	r3, sp, #72
+	add	lr, r8, #20
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r5, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	adcs	r2, r7, r2
+	str	r2, [sp, #64]                   @ 4-byte Spill
+	adcs	r0, r4, r3
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r3, [sp, #88]
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r3, r10, r3
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	ldr	r7, [sp, #92]
+	ldr	r5, [sp, #96]
+	adcs	r0, r11, r7
+	ldr	r7, [r8, #8]
+	adcs	r1, r1, r5
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r4, [sp, #100]
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	adcs	r11, r6, r4
+	ldr	r6, [sp, #104]
+	ldr	r4, [sp, #68]                   @ 4-byte Reload
+	adc	r1, r1, r6
+	ldm	r8, {r6, r9}
+	str	r7, [sp, #60]                   @ 4-byte Spill
+	ldr	r10, [r8, #12]
+	ldr	r7, [r8, #16]
+	subs	r8, r4, r6
+	sbcs	r6, r2, r9
+	ldr	r9, [sp, #48]                   @ 4-byte Reload
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	ldm	lr, {r5, r12, lr}
+	sbcs	r4, r9, r2
+	sbcs	r3, r3, r10
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	sbcs	r7, r0, r7
+	mov	r10, r0
+	sbcs	r5, r2, r5
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	sbcs	r12, r11, r12
+	sbc	lr, r1, lr
+	cmn	lr, #1
+	movgt	r1, lr
+	movle	r12, r11
+	str	r1, [r0, #28]
+	movle	r5, r2
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	cmn	lr, #1
+	movle	r7, r10
+	movle	r4, r9
+	str	r12, [r0, #24]
+	movle	r3, r1
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	cmn	lr, #1
+	str	r5, [r0, #20]
+	str	r7, [r0, #16]
+	movle	r6, r1
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	str	r3, [r0, #12]
+	str	r4, [r0, #8]
+	movle	r8, r1
+	str	r6, [r0, #4]
+	str	r8, [r0]
+	add	sp, sp, #716
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end84:
-	.size	mcl_fp_montNF6L, .Lfunc_end84-mcl_fp_montNF6L
+.Lfunc_end44:
+	.size	mcl_fp_montNF8L, .Lfunc_end44-mcl_fp_montNF8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montRed6L
-	.align	2
-	.type	mcl_fp_montRed6L,%function
-mcl_fp_montRed6L:                       @ @mcl_fp_montRed6L
+                                        @ -- End function
+	.globl	mcl_fp_montRed8L                @ -- Begin function mcl_fp_montRed8L
+	.p2align	2
+	.type	mcl_fp_montRed8L,%function
+	.code	32                              @ @mcl_fp_montRed8L
+mcl_fp_montRed8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#100
-	sub	sp, sp, #100
-	ldr	r6, [r1, #4]
-	ldr	r10, [r2, #-4]
-	ldr	r9, [r1]
-	ldr	r3, [r2, #8]
-	str	r0, [sp, #72]           @ 4-byte Spill
+	.pad	#436
+	sub	sp, sp, #436
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	mov	r6, r2
 	ldr	r0, [r2]
-	ldr	r8, [r2, #4]
-	str	r6, [sp, #68]           @ 4-byte Spill
-	ldr	r6, [r1, #8]
-	mul	r4, r9, r10
-	str	r3, [sp, #80]           @ 4-byte Spill
-	str	r0, [sp, #76]           @ 4-byte Spill
-	str	r10, [sp, #92]          @ 4-byte Spill
-	umull	r12, r7, r4, r3
-	str	r7, [sp, #52]           @ 4-byte Spill
-	umull	r7, r3, r4, r0
-	mov	lr, r12
-	str	r7, [sp, #56]           @ 4-byte Spill
-	mov	r0, r3
-	str	r6, [sp, #64]           @ 4-byte Spill
-	ldr	r6, [r1, #12]
-	umlal	r0, lr, r4, r8
-	str	r6, [sp, #60]           @ 4-byte Spill
-	ldr	r6, [r2, #20]
-	umull	r5, r7, r4, r6
-	str	r6, [sp, #84]           @ 4-byte Spill
-	ldr	r6, [r2, #16]
-	ldr	r2, [r2, #12]
-	str	r5, [sp, #44]           @ 4-byte Spill
-	str	r7, [sp, #48]           @ 4-byte Spill
-	umull	r5, r7, r4, r6
-	str	r6, [sp, #96]           @ 4-byte Spill
-	str	r2, [sp, #88]           @ 4-byte Spill
-	str	r7, [sp, #40]           @ 4-byte Spill
-	umull	r6, r7, r4, r2
-	umull	r11, r2, r4, r8
-	adds	r3, r3, r11
-	adcs	r2, r2, r12
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	adcs	r12, r2, r6
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adcs	r11, r7, r5
-	adcs	r2, r3, r2
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	adc	r2, r2, #0
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	adds	r6, r9, r2
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	add	r9, r1, #16
-	adcs	r0, r2, r0
-	mul	r6, r0, r10
-	ldr	r10, [sp, #80]          @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	umull	r3, r0, r6, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r9, {r2, r4, r7, r9}
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	umull	r0, r1, r6, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	mov	r5, r1
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [r2, #4]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [r2, #8]
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [r2, #12]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [r2, #16]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [r2, #20]
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [r2, #24]
+	ldr	r9, [r2, #-4]
+	ldr	r4, [r1]
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r1, #4]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	mul	r2, r4, r9
+	ldr	r0, [r1, #8]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [r1, #20]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [r1, #24]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r1, #28]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [r6, #28]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	add	r0, sp, #392
+	ldr	r10, [r1, #12]
+	ldr	r11, [r1, #16]
+	mov	r1, r6
+	bl	mulPv256x32
+	ldr	r7, [sp, #392]
+	add	lr, sp, #396
+	ldr	r8, [sp, #420]
+	adds	r4, r4, r7
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	ldr	r4, [sp, #72]                   @ 4-byte Reload
+	adcs	r4, r4, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	adcs	r0, r10, r2
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r10, r11, r3
+	mul	r2, r9, r4
+	mov	r1, r6
+	adcs	r11, r0, r12
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	ldr	lr, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	mov	r12, r3
-	adcs	r2, r2, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	mov	r0, r1
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	umlal	r0, r12, r6, r8
-	adcs	r2, r4, r2
-	ldr	r4, [sp, #96]           @ 4-byte Reload
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	str	r2, [sp, #48]           @ 4-byte Spill
-	adcs	r2, r9, #0
-	umull	r9, r11, r6, lr
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #28]           @ 4-byte Spill
-	mov	r2, #0
-	adc	r2, r2, #0
-	str	r2, [sp, #24]           @ 4-byte Spill
-	umull	r7, r2, r6, r8
-	adds	r1, r1, r7
-	adcs	r2, r2, r3
-	ldr	r3, [sp, #88]           @ 4-byte Reload
-	umull	r1, r7, r6, r4
-	umull	r2, r4, r6, r3
-	ldr	r6, [sp, #56]           @ 4-byte Reload
-	adcs	r2, r6, r2
-	adcs	r1, r4, r1
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	str	r2, [sp, #56]           @ 4-byte Spill
-	str	r1, [sp, #4]            @ 4-byte Spill
-	adcs	r1, r7, r9
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp]                @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adc	r7, r11, #0
-	adds	r6, r4, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r1, [sp, #20]           @ 4-byte Spill
-	mul	r6, r1, r0
-	umull	r9, r0, r6, r10
-	str	r0, [sp, #8]            @ 4-byte Spill
-	umull	r0, r1, r6, r5
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	mov	r4, r9
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	adcs	r5, r2, r5
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	mov	r0, r1
-	str	r5, [sp, #68]           @ 4-byte Spill
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	umlal	r0, r4, r6, r8
-	adcs	r2, r2, r5
-	ldr	r5, [sp]                @ 4-byte Reload
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	adcs	r2, r5, r2
-	umull	r5, r10, r6, lr
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	umull	r7, r12, r6, r8
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adc	r2, r2, #0
-	adds	r1, r1, r7
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r2, [sp, #36]           @ 4-byte Spill
-	umull	r7, r2, r6, r3
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	umull	r11, lr, r6, r1
-	adcs	r6, r12, r9
-	adcs	r3, r3, r7
-	adcs	r12, r2, r11
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	adcs	r2, lr, r5
-	ldr	r5, [sp, #80]           @ 4-byte Reload
-	ldr	lr, [sp, #76]           @ 4-byte Reload
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adc	r9, r10, #0
-	adds	r6, r3, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	ldr	r3, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r6, r0, r3
-	str	r0, [sp, #32]           @ 4-byte Spill
-	umull	r11, r0, r6, r5
-	str	r0, [sp, #24]           @ 4-byte Spill
-	umull	r0, r7, r6, lr
-	mov	r10, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	mov	r2, r7
-	umlal	r2, r10, r6, r8
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	ldr	r12, [sp, #84]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	umull	r4, r0, r6, r12
-	str	r4, [sp, #12]           @ 4-byte Spill
-	str	r0, [sp, #36]           @ 4-byte Spill
-	umull	r4, r0, r6, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #8]            @ 4-byte Spill
-	umull	r9, r0, r6, r8
-	adds	r7, r7, r9
-	adcs	r0, r0, r11
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	umull	r7, r9, r6, r1
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adcs	r0, r9, r4
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r7, r4, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	adc	r11, r0, #0
-	adds	r4, r6, r4
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r2, r4
-	mul	r4, r2, r3
-	str	r2, [sp, #36]           @ 4-byte Spill
-	umull	r9, r2, r4, r5
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	str	r2, [sp, #28]           @ 4-byte Spill
-	umull	r3, r2, r4, lr
-	mov	r6, r2
-	str	r3, [sp, #32]           @ 4-byte Spill
-	mov	r3, r9
-	umlal	r6, r3, r4, r8
-	adcs	r5, r10, r5
-	str	r5, [sp, #68]           @ 4-byte Spill
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adcs	r5, r0, r5
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	str	r5, [sp, #64]           @ 4-byte Spill
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	umull	r7, r0, r4, r12
-	mov	r12, r1
-	str	r0, [sp, #24]           @ 4-byte Spill
-	umull	r11, r0, r4, r8
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	umull	r1, r5, r4, r12
-	adds	r2, r2, r11
-	adcs	r0, r0, r9
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	umull	lr, r10, r4, r7
-	ldr	r4, [sp, #36]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r8, r0, r8
+	mrs	r0, apsr
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [r5, #32]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #424]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	add	r0, sp, #352
+	bl	mulPv256x32
+	ldr	r3, [sp, #352]
+	add	lr, sp, #356
+	ldr	r7, [sp, #376]
+	adds	r3, r4, r3
+	ldm	lr, {r0, r1, r2, r12, lr}
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r4, r3, r0
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	adcs	r1, r5, lr
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	adcs	r2, r10, r2
-	adc	lr, r5, #0
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	adds	r4, r5, r4
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	adcs	r9, r6, r4
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	adcs	r3, r3, r4
-	str	r3, [sp, #68]           @ 4-byte Spill
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	adcs	r10, r10, r2
+	mul	r2, r9, r4
+	adcs	r0, r11, r12
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mov	r11, r9
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	adcs	r0, r8, r7
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	ldr	r0, [sp, #384]
+	adcs	r1, r1, r3
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	add	r0, sp, #312
+	mov	r1, r6
+	ldr	r9, [r5, #36]
+	ldr	r8, [sp, #380]
+	bl	mulPv256x32
+	add	r7, sp, #312
+	add	r12, sp, #324
+	ldm	r7, {r2, r3, r7}
+	adds	r2, r4, r2
+	ldm	r12, {r0, r1, r12}
+	ldr	r2, [sp, #64]                   @ 4-byte Reload
+	adcs	r4, r2, r3
+	adcs	r2, r10, r7
+	str	r2, [sp, #64]                   @ 4-byte Spill
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r2, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	mul	r2, r11, r4
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	ldr	r0, [sp, #344]
+	adcs	r1, r1, r8
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	ldr	r10, [r5, #40]
+	adcs	r1, r9, r1
+	str	r1, [sp, #44]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mul	r0, r9, r1
-	umull	r2, r4, r0, r5
-	umull	r1, r3, r0, r8
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	add	r0, sp, #272
+	mov	r1, r6
+	ldr	r8, [sp, #340]
+	ldr	r9, [sp, #336]
+	bl	mulPv256x32
+	add	r7, sp, #272
+	ldr	r0, [sp, #288]
+	ldm	r7, {r1, r2, r3, r7}
 	adds	r1, r4, r1
-	str	r2, [sp, #92]           @ 4-byte Spill
-	umull	r1, r2, r0, r6
-	adcs	r3, r3, r1
-	umlal	r4, r1, r0, r8
-	umull	r3, lr, r0, r12
-	adcs	r10, r2, r3
-	umull	r3, r2, r0, r7
-	adcs	r11, lr, r3
-	ldr	lr, [sp, #84]           @ 4-byte Reload
-	umull	r7, r3, r0, lr
-	adcs	r2, r2, r7
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adc	r0, r3, #0
-	ldr	r3, [sp, #92]           @ 4-byte Reload
-	adds	r3, r3, r9
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	adcs	r3, r4, r3
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	r12, r1, r7
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r10, r10, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r9, r11, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r7, r2, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r11, r0, #0
-	subs	r0, r3, r5
-	sbcs	r5, r12, r8
-	mov	r8, r7
-	sbcs	r2, r10, r6
-	ldr	r6, [sp, #96]           @ 4-byte Reload
-	sbcs	r4, r9, r4
-	sbcs	r6, r7, r6
-	sbcs	r7, r1, lr
-	mov	lr, r1
-	sbc	r1, r11, #0
-	ands	r1, r1, #1
-	movne	r0, r3
-	ldr	r3, [sp, #72]           @ 4-byte Reload
-	movne	r5, r12
-	movne	r2, r10
-	cmp	r1, #0
-	movne	r4, r9
-	movne	r6, r8
-	movne	r7, lr
-	str	r0, [r3]
-	str	r5, [r3, #4]
-	str	r2, [r3, #8]
-	str	r4, [r3, #12]
-	str	r6, [r3, #16]
-	str	r7, [r3, #20]
-	add	sp, sp, #100
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end85:
-	.size	mcl_fp_montRed6L, .Lfunc_end85-mcl_fp_montRed6L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre6L
-	.align	2
-	.type	mcl_fp_addPre6L,%function
-mcl_fp_addPre6L:                        @ @mcl_fp_addPre6L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, lr}
-	ldm	r1, {r9, r12, lr}
-	ldr	r10, [r1, #12]
-	ldr	r5, [r1, #16]
-	ldr	r8, [r1, #20]
-	ldm	r2, {r6, r7}
-	add	r4, r2, #8
-	ldm	r4, {r1, r3, r4}
-	ldr	r2, [r2, #20]
-	adds	r6, r6, r9
-	adcs	r7, r7, r12
-	add	r12, r0, #8
-	adcs	r1, r1, lr
-	stm	r0, {r6, r7}
-	adcs	r3, r3, r10
-	adcs	r5, r4, r5
-	adcs	r2, r2, r8
-	stm	r12, {r1, r3, r5}
-	str	r2, [r0, #20]
-	mov	r0, #0
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adcs	r4, r1, r2
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	mul	r2, r11, r4
+	adcs	r1, r1, r7
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	ldr	r0, [sp, #304]
+	adcs	r1, r1, r9
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	ldr	r7, [r5, #44]
+	adcs	r1, r1, r8
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r10, r1
+	str	r1, [sp, #44]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	pop	{r4, r5, r6, r7, r8, r9, r10, lr}
-	mov	pc, lr
-.Lfunc_end86:
-	.size	mcl_fp_addPre6L, .Lfunc_end86-mcl_fp_addPre6L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre6L
-	.align	2
-	.type	mcl_fp_subPre6L,%function
-mcl_fp_subPre6L:                        @ @mcl_fp_subPre6L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, lr}
-	ldm	r2, {r9, r12, lr}
-	ldr	r10, [r2, #12]
-	ldr	r5, [r2, #16]
-	ldr	r8, [r2, #20]
-	ldm	r1, {r6, r7}
-	add	r4, r1, #8
-	ldm	r4, {r2, r3, r4}
-	ldr	r1, [r1, #20]
-	subs	r6, r6, r9
-	sbcs	r7, r7, r12
-	add	r12, r0, #8
-	sbcs	r2, r2, lr
-	stm	r0, {r6, r7}
-	sbcs	r3, r3, r10
-	sbcs	r5, r4, r5
-	sbcs	r1, r1, r8
-	stm	r12, {r2, r3, r5}
-	str	r1, [r0, #20]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	pop	{r4, r5, r6, r7, r8, r9, r10, lr}
-	mov	pc, lr
-.Lfunc_end87:
-	.size	mcl_fp_subPre6L, .Lfunc_end87-mcl_fp_subPre6L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_6L
-	.align	2
-	.type	mcl_fp_shr1_6L,%function
-mcl_fp_shr1_6L:                         @ @mcl_fp_shr1_6L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, lr}
-	push	{r4, r5, r6, lr}
-	ldr	r3, [r1, #4]
-	ldr	r12, [r1]
-	ldr	lr, [r1, #12]
-	ldr	r2, [r1, #8]
-	ldr	r4, [r1, #16]
-	ldr	r1, [r1, #20]
-	lsrs	r5, r3, #1
-	lsr	r3, r3, #1
-	rrx	r12, r12
-	lsrs	r5, lr, #1
-	orr	r6, r3, r2, lsl #31
-	lsr	r5, lr, #1
-	rrx	r2, r2
-	lsrs	r3, r1, #1
-	lsr	r1, r1, #1
-	str	r12, [r0]
-	str	r6, [r0, #4]
-	orr	r5, r5, r4, lsl #31
-	rrx	r3, r4
-	str	r2, [r0, #8]
-	str	r5, [r0, #12]
-	str	r3, [r0, #16]
-	str	r1, [r0, #20]
-	pop	{r4, r5, r6, lr}
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	add	r0, sp, #232
+	mov	r1, r6
+	ldr	r8, [sp, #300]
+	ldr	r9, [sp, #296]
+	ldr	r10, [sp, #292]
+	bl	mulPv256x32
+	add	r3, sp, #232
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r4, r0, r1
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	mul	r2, r11, r4
+	adcs	r0, r0, r3
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r0, [sp, #264]
+	adcs	r1, r1, r10
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r9
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r7, r1
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [r5, #48]
+	mov	r1, r6
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	add	r0, sp, #192
+	ldr	r8, [sp, #260]
+	ldr	r9, [sp, #256]
+	ldr	r10, [sp, #252]
+	ldr	r7, [sp, #248]
+	bl	mulPv256x32
+	add	r3, sp, #192
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	str	r3, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r4, r0, r1
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	mrs	r0, apsr
+	mul	r2, r11, r4
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adcs	r7, r1, r7
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r0, [sp, #224]
+	adcs	r10, r1, r10
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r9, r1, r9
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r8, r1, r8
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r5, #52]
+	mov	r1, r6
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #220]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #216]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #212]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #208]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	add	r0, sp, #152
+	bl	mulPv256x32
+	add	r2, sp, #152
+	ldm	r2, {r0, r1, r2}
+	adds	r0, r4, r0
+	str	r2, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r2, [sp, #164]
+	adcs	r0, r0, r1
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	mrs	r1, apsr
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	mul	r2, r11, r0
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r1
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [r5, #56]
+	adcs	r4, r10, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #180]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	ldr	r0, [sp, #176]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	adcs	r11, r9, r1
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	ldr	r0, [sp, #172]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adcs	r7, r8, r1
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	ldr	r0, [sp, #168]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	adcs	r10, r1, r3
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	ldr	r0, [sp, #184]
+	adcs	r8, r3, r1
+	mov	r1, r6
+	adc	r9, r0, #0
+	add	r0, sp, #112
+	bl	mulPv256x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	ldr	r6, [sp, #120]
+	adcs	r12, r1, r0
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r3, [sp, #124]
+	adcs	r1, r4, r0
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	ldr	r4, [sp, #116]
+	adcs	r2, r11, r0
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r11, r7, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r7, [sp, #144]
+	adcs	r10, r10, r0
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r8, r8, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r9, r0, r9
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adc	lr, r7, #0
+	ldr	r7, [sp, #112]
+	adds	r7, r0, r7
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r4, r12, r4
+	str	r4, [sp, #72]                   @ 4-byte Spill
+	adcs	r1, r1, r6
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	adcs	r2, r2, r3
+	str	r2, [sp, #64]                   @ 4-byte Spill
+	ldr	r3, [sp, #128]
+	adcs	r6, r11, r3
+	str	r6, [sp, #60]                   @ 4-byte Spill
+	ldr	r3, [sp, #132]
+	adcs	r11, r10, r3
+	ldr	r3, [sp, #136]
+	adcs	r10, r8, r3
+	ldr	r3, [sp, #140]
+	adcs	r12, r9, r3
+	ldr	r3, [r5, #60]
+	mov	r9, #0
+	adc	r5, r3, lr
+	subs	r8, r4, r0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	sbcs	lr, r1, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	sbcs	r4, r2, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	sbcs	r6, r6, r0
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	sbcs	r3, r11, r0
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	sbcs	r7, r10, r0
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	sbcs	r1, r12, r0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	sbcs	r0, r5, r0
+	sbc	r2, r9, #0
+	ands	r2, r2, #1
+	movne	r0, r5
+	ldr	r5, [sp, #108]                  @ 4-byte Reload
+	movne	r1, r12
+	movne	r7, r10
+	cmp	r2, #0
+	str	r0, [r5, #28]
+	movne	r3, r11
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	str	r1, [r5, #24]
+	str	r7, [r5, #20]
+	movne	r6, r0
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	str	r3, [r5, #16]
+	str	r6, [r5, #12]
+	movne	r4, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	cmp	r2, #0
+	str	r4, [r5, #8]
+	movne	lr, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	str	lr, [r5, #4]
+	movne	r8, r0
+	str	r8, [r5]
+	add	sp, sp, #436
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end88:
-	.size	mcl_fp_shr1_6L, .Lfunc_end88-mcl_fp_shr1_6L
+.Lfunc_end45:
+	.size	mcl_fp_montRed8L, .Lfunc_end45-mcl_fp_montRed8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_add6L
-	.align	2
-	.type	mcl_fp_add6L,%function
-mcl_fp_add6L:                           @ @mcl_fp_add6L
+                                        @ -- End function
+	.globl	mcl_fp_montRedNF8L              @ -- Begin function mcl_fp_montRedNF8L
+	.p2align	2
+	.type	mcl_fp_montRedNF8L,%function
+	.code	32                              @ @mcl_fp_montRedNF8L
+mcl_fp_montRedNF8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldm	r1, {r9, r12, lr}
-	ldr	r7, [r2]
-	ldr	r10, [r1, #12]
-	ldr	r11, [r1, #16]
-	ldr	r8, [r1, #20]
-	ldmib	r2, {r1, r4, r5, r6}
-	ldr	r2, [r2, #20]
-	adds	r7, r7, r9
-	adcs	r12, r1, r12
-	add	r1, r0, #8
-	adcs	r4, r4, lr
-	stm	r0, {r7, r12}
-	adcs	r5, r5, r10
-	adcs	r6, r6, r11
-	stm	r1, {r4, r5, r6}
-	adcs	r2, r2, r8
-	mov	r1, #0
-	str	r2, [r0, #20]
-	adc	r9, r1, #0
-	ldm	r3, {r1, lr}
-	ldr	r10, [r3, #8]
-	ldr	r11, [r3, #12]
-	ldr	r8, [r3, #16]
-	ldr	r3, [r3, #20]
-	subs	r7, r7, r1
-	sbcs	r1, r12, lr
-	sbcs	r10, r4, r10
-	sbcs	r12, r5, r11
-	sbcs	lr, r6, r8
-	sbcs	r4, r2, r3
-	sbc	r2, r9, #0
-	tst	r2, #1
-	streq	r7, [r0]
-	stmibeq	r0, {r1, r10, r12, lr}
-	streq	r4, [r0, #20]
+	.pad	#436
+	sub	sp, sp, #436
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	mov	r6, r2
+	ldr	r0, [r2]
+	mov	r5, r1
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [r2, #4]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [r2, #8]
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [r2, #12]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [r2, #16]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [r2, #20]
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [r2, #24]
+	ldr	r9, [r2, #-4]
+	ldr	r4, [r1]
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r1, #4]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	mul	r2, r4, r9
+	ldr	r0, [r1, #8]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [r1, #20]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [r1, #24]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r1, #28]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [r6, #28]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	add	r0, sp, #392
+	ldr	r10, [r1, #12]
+	ldr	r11, [r1, #16]
+	mov	r1, r6
+	bl	mulPv256x32
+	ldr	r7, [sp, #392]
+	add	lr, sp, #396
+	ldr	r8, [sp, #420]
+	adds	r4, r4, r7
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	ldr	r4, [sp, #72]                   @ 4-byte Reload
+	adcs	r4, r4, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	adcs	r0, r10, r2
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r10, r11, r3
+	mul	r2, r9, r4
+	mov	r1, r6
+	adcs	r11, r0, r12
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r8, r0, r8
+	mrs	r0, apsr
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [r5, #32]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #424]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	add	r0, sp, #352
+	bl	mulPv256x32
+	ldr	r3, [sp, #352]
+	add	lr, sp, #356
+	ldr	r7, [sp, #376]
+	adds	r3, r4, r3
+	ldm	lr, {r0, r1, r2, r12, lr}
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r4, r3, r0
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	adcs	r10, r10, r2
+	mul	r2, r9, r4
+	adcs	r0, r11, r12
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mov	r11, r9
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	adcs	r0, r8, r7
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	ldr	r0, [sp, #384]
+	adcs	r1, r1, r3
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	add	r0, sp, #312
+	mov	r1, r6
+	ldr	r9, [r5, #36]
+	ldr	r8, [sp, #380]
+	bl	mulPv256x32
+	add	r7, sp, #312
+	add	r12, sp, #324
+	ldm	r7, {r2, r3, r7}
+	adds	r2, r4, r2
+	ldm	r12, {r0, r1, r12}
+	ldr	r2, [sp, #64]                   @ 4-byte Reload
+	adcs	r4, r2, r3
+	adcs	r2, r10, r7
+	str	r2, [sp, #64]                   @ 4-byte Spill
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	mul	r2, r11, r4
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	ldr	r0, [sp, #344]
+	adcs	r1, r1, r8
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	ldr	r10, [r5, #40]
+	adcs	r1, r9, r1
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	add	r0, sp, #272
+	mov	r1, r6
+	ldr	r8, [sp, #340]
+	ldr	r9, [sp, #336]
+	bl	mulPv256x32
+	add	r7, sp, #272
+	ldr	r0, [sp, #288]
+	ldm	r7, {r1, r2, r3, r7}
+	adds	r1, r4, r1
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adcs	r4, r1, r2
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	mul	r2, r11, r4
+	adcs	r1, r1, r7
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	ldr	r0, [sp, #304]
+	adcs	r1, r1, r9
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	ldr	r7, [r5, #44]
+	adcs	r1, r1, r8
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r10, r1
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	add	r0, sp, #232
+	mov	r1, r6
+	ldr	r8, [sp, #300]
+	ldr	r9, [sp, #296]
+	ldr	r10, [sp, #292]
+	bl	mulPv256x32
+	add	r3, sp, #232
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r4, r0, r1
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	mul	r2, r11, r4
+	adcs	r0, r0, r3
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r0, [sp, #264]
+	adcs	r1, r1, r10
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r9
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r7, r1
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [r5, #48]
+	mov	r1, r6
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	add	r0, sp, #192
+	ldr	r8, [sp, #260]
+	ldr	r9, [sp, #256]
+	ldr	r10, [sp, #252]
+	ldr	r7, [sp, #248]
+	bl	mulPv256x32
+	add	r3, sp, #192
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	str	r3, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r4, r0, r1
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	mrs	r0, apsr
+	mul	r2, r11, r4
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	ldr	r3, [sp, #28]                   @ 4-byte Reload
+	adcs	r7, r1, r7
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r0, [sp, #224]
+	adcs	r10, r1, r10
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r9, r1, r9
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r8, r1, r8
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r5, #52]
+	mov	r1, r6
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #220]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #216]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #212]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #208]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	add	r0, sp, #152
+	bl	mulPv256x32
+	add	r2, sp, #152
+	ldm	r2, {r0, r1, r2}
+	adds	r0, r4, r0
+	str	r2, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r2, [sp, #164]
+	adcs	r0, r0, r1
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	mrs	r1, apsr
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	mul	r2, r11, r0
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r1
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [r5, #56]
+	adcs	r4, r10, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #180]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	ldr	r0, [sp, #176]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	adcs	r11, r9, r1
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	ldr	r0, [sp, #172]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adcs	r7, r8, r1
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	ldr	r0, [sp, #168]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	adcs	r10, r1, r3
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	ldr	r0, [sp, #184]
+	adcs	r8, r3, r1
+	mov	r1, r6
+	adc	r9, r0, #0
+	add	r0, sp, #112
+	bl	mulPv256x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r12, r1, r0
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r1, r4, r0
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	adcs	r2, r11, r0
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r11, r7, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	add	r7, sp, #112
+	adcs	r10, r10, r0
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r8, r8, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r4, [sp, #144]
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adc	lr, r4, #0
+	ldm	r7, {r4, r6, r7}
+	adds	r4, r0, r4
+	ldr	r3, [sp, #124]
+	adcs	r4, r12, r6
+	str	r4, [sp, #72]                   @ 4-byte Spill
+	adcs	r1, r1, r7
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	adcs	r2, r2, r3
+	str	r2, [sp, #64]                   @ 4-byte Spill
+	ldr	r3, [sp, #128]
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r11, r11, r3
+	ldr	r3, [sp, #132]
+	adcs	r10, r10, r3
+	ldr	r3, [sp, #136]
+	adcs	r9, r8, r3
+	ldr	r3, [sp, #140]
+	adcs	r12, r0, r3
+	ldr	r3, [r5, #60]
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adc	r5, r3, lr
+	subs	r8, r4, r0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	sbcs	lr, r1, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	sbcs	r6, r2, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r2, [sp, #108]                  @ 4-byte Reload
+	sbcs	r7, r11, r0
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	sbcs	r3, r10, r0
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	sbcs	r4, r9, r0
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	sbcs	r0, r12, r0
+	sbc	r1, r5, r1
+	cmn	r1, #1
+	movle	r0, r12
+	movgt	r5, r1
+	str	r0, [r2, #24]
+	movle	r4, r9
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	cmn	r1, #1
+	movle	r3, r10
+	movle	r7, r11
+	str	r5, [r2, #28]
+	movle	r6, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	cmn	r1, #1
+	str	r4, [r2, #20]
+	str	r3, [r2, #16]
+	movle	lr, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	str	r7, [r2, #12]
+	str	r6, [r2, #8]
+	movle	r8, r0
+	str	lr, [r2, #4]
+	str	r8, [r2]
+	add	sp, sp, #436
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end89:
-	.size	mcl_fp_add6L, .Lfunc_end89-mcl_fp_add6L
+.Lfunc_end46:
+	.size	mcl_fp_montRedNF8L, .Lfunc_end46-mcl_fp_montRedNF8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_addNF6L
-	.align	2
-	.type	mcl_fp_addNF6L,%function
-mcl_fp_addNF6L:                         @ @mcl_fp_addNF6L
+                                        @ -- End function
+	.globl	mcl_fp_addPre8L                 @ -- Begin function mcl_fp_addPre8L
+	.p2align	2
+	.type	mcl_fp_addPre8L,%function
+	.code	32                              @ @mcl_fp_addPre8L
+mcl_fp_addPre8L:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	add	r11, r1, #8
-	ldm	r1, {r12, lr}
-	ldm	r11, {r9, r10, r11}
-	ldr	r7, [r2]
-	ldr	r8, [r1, #20]
-	ldmib	r2, {r1, r4, r5, r6}
-	ldr	r2, [r2, #20]
-	adds	r7, r7, r12
-	adcs	r1, r1, lr
-	adcs	r4, r4, r9
-	adcs	r9, r5, r10
-	adcs	lr, r6, r11
-	add	r11, r3, #8
-	adc	r12, r2, r8
-	ldm	r3, {r2, r6}
-	ldm	r11, {r5, r8, r10, r11}
-	subs	r2, r7, r2
-	sbcs	r6, r1, r6
-	sbcs	r5, r4, r5
-	sbcs	r3, r9, r8
-	sbcs	r8, lr, r10
-	sbc	r10, r12, r11
-	asr	r11, r10, #31
-	cmp	r11, #0
-	movlt	r2, r7
-	movlt	r6, r1
-	movlt	r5, r4
-	cmp	r11, #0
-	movlt	r3, r9
-	movlt	r8, lr
-	movlt	r10, r12
-	str	r2, [r0]
-	str	r6, [r0, #4]
-	str	r5, [r0, #8]
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, lr}
+	push	{r4, r5, r6, r7, r8, lr}
+	ldm	r2, {r3, r12, lr}
+	ldm	r1, {r5, r6, r7}
+	adds	r3, r5, r3
+	str	r3, [r0]
+	adcs	r3, r6, r12
+	ldr	r8, [r2, #12]
+	ldr	r4, [r1, #12]
+	str	r3, [r0, #4]
+	adcs	r3, r7, lr
+	str	r3, [r0, #8]
+	adcs	r3, r4, r8
 	str	r3, [r0, #12]
-	str	r8, [r0, #16]
-	str	r10, [r0, #20]
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end90:
-	.size	mcl_fp_addNF6L, .Lfunc_end90-mcl_fp_addNF6L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub6L
-	.align	2
-	.type	mcl_fp_sub6L,%function
-mcl_fp_sub6L:                           @ @mcl_fp_sub6L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldr	r9, [r2]
-	ldmib	r2, {r8, r12, lr}
-	ldr	r10, [r2, #16]
-	ldr	r11, [r2, #20]
-	ldm	r1, {r2, r4, r5, r6, r7}
-	ldr	r1, [r1, #20]
-	subs	r9, r2, r9
-	sbcs	r2, r4, r8
-	str	r9, [r0]
-	sbcs	r12, r5, r12
-	sbcs	lr, r6, lr
-	sbcs	r4, r7, r10
-	stmib	r0, {r2, r12, lr}
-	sbcs	r5, r1, r11
-	mov	r1, #0
-	str	r4, [r0, #16]
-	sbc	r1, r1, #0
-	str	r5, [r0, #20]
-	tst	r1, #1
-	popeq	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	moveq	pc, lr
-	ldm	r3, {r1, r6, r7, r8, r10}
-	ldr	r3, [r3, #20]
-	adds	r1, r1, r9
-	adcs	r2, r6, r2
-	adcs	r7, r7, r12
-	adcs	r6, r8, lr
-	stm	r0, {r1, r2, r7}
-	adcs	r4, r10, r4
-	str	r6, [r0, #12]
-	adc	r3, r3, r5
-	str	r4, [r0, #16]
+	ldr	r3, [r2, #16]
+	ldr	r7, [r1, #16]
+	adcs	r3, r7, r3
+	str	r3, [r0, #16]
+	ldr	r3, [r2, #20]
+	ldr	r7, [r1, #20]
+	adcs	r3, r7, r3
 	str	r3, [r0, #20]
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	ldr	r3, [r2, #24]
+	ldr	r7, [r1, #24]
+	ldr	r2, [r2, #28]
+	ldr	r1, [r1, #28]
+	adcs	r3, r7, r3
+	str	r3, [r0, #24]
+	adcs	r1, r1, r2
+	str	r1, [r0, #28]
+	mov	r0, #0
+	adc	r0, r0, #0
+	pop	{r4, r5, r6, r7, r8, lr}
 	mov	pc, lr
-.Lfunc_end91:
-	.size	mcl_fp_sub6L, .Lfunc_end91-mcl_fp_sub6L
+.Lfunc_end47:
+	.size	mcl_fp_addPre8L, .Lfunc_end47-mcl_fp_addPre8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_subNF6L
-	.align	2
-	.type	mcl_fp_subNF6L,%function
-mcl_fp_subNF6L:                         @ @mcl_fp_subNF6L
+                                        @ -- End function
+	.globl	mcl_fp_subPre8L                 @ -- Begin function mcl_fp_subPre8L
+	.p2align	2
+	.type	mcl_fp_subPre8L,%function
+	.code	32                              @ @mcl_fp_subPre8L
+mcl_fp_subPre8L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, lr}
+	push	{r4, r5, r6, r7, r8, lr}
+	ldm	r2, {r3, r12, lr}
+	ldm	r1, {r5, r6, r7}
+	subs	r3, r5, r3
+	str	r3, [r0]
+	sbcs	r3, r6, r12
+	ldr	r8, [r2, #12]
+	ldr	r4, [r1, #12]
+	str	r3, [r0, #4]
+	sbcs	r3, r7, lr
+	str	r3, [r0, #8]
+	sbcs	r3, r4, r8
+	str	r3, [r0, #12]
+	ldr	r3, [r2, #16]
+	ldr	r7, [r1, #16]
+	sbcs	r3, r7, r3
+	str	r3, [r0, #16]
+	ldr	r3, [r2, #20]
+	ldr	r7, [r1, #20]
+	sbcs	r3, r7, r3
+	str	r3, [r0, #20]
+	ldr	r3, [r2, #24]
+	ldr	r7, [r1, #24]
+	ldr	r2, [r2, #28]
+	ldr	r1, [r1, #28]
+	sbcs	r3, r7, r3
+	str	r3, [r0, #24]
+	sbcs	r1, r1, r2
+	str	r1, [r0, #28]
+	mov	r0, #0
+	sbc	r0, r0, #0
+	and	r0, r0, #1
+	pop	{r4, r5, r6, r7, r8, lr}
+	mov	pc, lr
+.Lfunc_end48:
+	.size	mcl_fp_subPre8L, .Lfunc_end48-mcl_fp_subPre8L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_shr1_8L                  @ -- Begin function mcl_fp_shr1_8L
+	.p2align	2
+	.type	mcl_fp_shr1_8L,%function
+	.code	32                              @ @mcl_fp_shr1_8L
+mcl_fp_shr1_8L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, lr}
+	add	r6, r1, #8
+	ldm	r1, {r12, lr}
+	ldr	r1, [r1, #28]
+	ldm	r6, {r2, r3, r4, r5, r6}
+	lsr	r7, r5, #1
+	orr	r8, r7, r6, lsl #31
+	lsr	r10, r1, #1
+	lsrs	r1, r1, #1
+	lsr	r7, r3, #1
+	rrx	r1, r6
+	lsrs	r5, r5, #1
+	orr	r9, r7, r4, lsl #31
+	lsr	r7, lr, #1
+	rrx	r4, r4
+	lsrs	r3, r3, #1
+	orr	r7, r7, r2, lsl #31
+	rrx	r2, r2
+	lsrs	r3, lr, #1
+	rrx	r3, r12
+	stm	r0, {r3, r7}
+	str	r2, [r0, #8]
+	str	r9, [r0, #12]
+	str	r4, [r0, #16]
+	str	r8, [r0, #20]
+	str	r1, [r0, #24]
+	str	r10, [r0, #28]
+	pop	{r4, r5, r6, r7, r8, r9, r10, lr}
+	mov	pc, lr
+.Lfunc_end49:
+	.size	mcl_fp_shr1_8L, .Lfunc_end49-mcl_fp_shr1_8L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_add8L                    @ -- Begin function mcl_fp_add8L
+	.p2align	2
+	.type	mcl_fp_add8L,%function
+	.code	32                              @ @mcl_fp_add8L
+mcl_fp_add8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	add	r11, r2, #8
+	.pad	#20
+	sub	sp, sp, #20
 	ldm	r2, {r12, lr}
-	ldm	r11, {r9, r10, r11}
 	ldr	r7, [r1]
-	ldr	r8, [r2, #20]
-	ldmib	r1, {r2, r4, r5, r6}
-	ldr	r1, [r1, #20]
-	subs	r7, r7, r12
-	sbcs	r2, r2, lr
-	sbcs	r9, r4, r9
-	sbcs	lr, r5, r10
-	ldr	r5, [r3, #4]
-	sbcs	r12, r6, r11
-	ldr	r6, [r3]
-	add	r11, r3, #8
-	sbc	r1, r1, r8
-	ldm	r11, {r4, r8, r10, r11}
-	adds	r6, r7, r6
-	adcs	r5, r2, r5
-	adcs	r4, r9, r4
-	adcs	r3, lr, r8
-	adcs	r8, r12, r10
-	adc	r10, r1, r11
-	asr	r11, r1, #31
-	cmp	r11, #0
-	movge	r6, r7
-	movge	r5, r2
-	movge	r4, r9
-	cmp	r11, #0
-	movge	r3, lr
-	movge	r8, r12
-	movge	r10, r1
-	str	r6, [r0]
-	str	r5, [r0, #4]
-	str	r4, [r0, #8]
-	str	r3, [r0, #12]
+	ldmib	r1, {r4, r5, r6}
+	adds	r12, r7, r12
+	ldr	r9, [r2, #8]
+	adcs	lr, r4, lr
+	ldr	r8, [r2, #12]
+	adcs	r10, r5, r9
+	ldr	r5, [r2, #16]
+	ldr	r4, [r1, #16]
+	adcs	r9, r6, r8
+	ldr	r7, [r2, #20]
+	adcs	r8, r4, r5
+	ldr	r4, [r1, #20]
+	ldr	r6, [r1, #24]
+	adcs	r7, r4, r7
+	ldr	r4, [r2, #24]
+	str	r12, [sp, #16]                  @ 4-byte Spill
+	ldr	r2, [r2, #28]
+	adcs	r6, r6, r4
+	ldr	r1, [r1, #28]
+	stm	r0, {r12, lr}
+	adcs	r5, r1, r2
+	str	r9, [sp, #4]                    @ 4-byte Spill
+	mov	r2, #0
+	str	r9, [r0, #12]
+	adc	r1, r2, #0
+	ldm	r3, {r4, r11}
+	ldr	r9, [sp, #16]                   @ 4-byte Reload
+	str	lr, [sp, #12]                   @ 4-byte Spill
+	add	lr, r3, #12
+	subs	r9, r9, r4
+	ldr	r4, [sp, #12]                   @ 4-byte Reload
+	str	r10, [sp, #8]                   @ 4-byte Spill
+	str	r10, [r0, #8]
+	sbcs	r11, r4, r11
+	ldr	r10, [r3, #8]
+	ldr	r4, [sp, #8]                    @ 4-byte Reload
+	str	r1, [sp]                        @ 4-byte Spill
+	sbcs	r10, r4, r10
+	ldm	lr, {r1, r2, r12, lr}
+	ldr	r4, [sp, #4]                    @ 4-byte Reload
+	ldr	r3, [r3, #28]
+	sbcs	r4, r4, r1
+	str	r7, [r0, #20]
+	sbcs	r1, r8, r2
+	str	r6, [r0, #24]
+	sbcs	r2, r7, r12
 	str	r8, [r0, #16]
-	str	r10, [r0, #20]
+	sbcs	r7, r6, lr
+	str	r5, [r0, #28]
+	sbcs	r6, r5, r3
+	ldr	r3, [sp]                        @ 4-byte Reload
+	sbc	r3, r3, #0
+	tst	r3, #1
+	bne	.LBB50_2
+@ %bb.1:                                @ %nocarry
+	add	r3, r0, #16
+	stm	r0, {r9, r11}
+	str	r10, [r0, #8]
+	str	r4, [r0, #12]
+	stm	r3, {r1, r2, r7}
+	str	r6, [r0, #28]
+.LBB50_2:                               @ %carry
+	add	sp, sp, #20
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end92:
-	.size	mcl_fp_subNF6L, .Lfunc_end92-mcl_fp_subNF6L
+.Lfunc_end50:
+	.size	mcl_fp_add8L, .Lfunc_end50-mcl_fp_add8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_add6L
-	.align	2
-	.type	mcl_fpDbl_add6L,%function
-mcl_fpDbl_add6L:                        @ @mcl_fpDbl_add6L
+                                        @ -- End function
+	.globl	mcl_fp_addNF8L                  @ -- Begin function mcl_fp_addNF8L
+	.p2align	2
+	.type	mcl_fp_addNF8L,%function
+	.code	32                              @ @mcl_fp_addNF8L
+mcl_fp_addNF8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#32
-	sub	sp, sp, #32
+	.pad	#28
+	sub	sp, sp, #28
 	ldm	r1, {r12, lr}
+	ldr	r7, [r2]
+	ldmib	r2, {r4, r5, r6}
+	adds	r9, r7, r12
 	ldr	r8, [r1, #8]
-	ldr	r10, [r1, #12]
-	ldmib	r2, {r6, r7}
-	ldr	r5, [r2, #16]
-	ldr	r11, [r2]
-	ldr	r4, [r2, #12]
-	str	r5, [sp]                @ 4-byte Spill
-	ldr	r5, [r2, #20]
-	adds	r9, r11, r12
-	add	r11, r1, #32
-	adcs	r6, r6, lr
-	add	lr, r1, #16
-	adcs	r7, r7, r8
-	str	r5, [sp, #4]            @ 4-byte Spill
+	adcs	r10, r4, lr
+	ldr	r11, [r1, #12]
+	adcs	lr, r5, r8
+	ldr	r4, [r1, #16]
+	adcs	r12, r6, r11
+	ldr	r6, [r2, #16]
+	ldr	r7, [r1, #20]
+	adcs	r4, r6, r4
+	str	r4, [sp, #12]                   @ 4-byte Spill
+	ldr	r4, [r2, #20]
 	ldr	r5, [r2, #24]
-	str	r5, [sp, #16]           @ 4-byte Spill
-	ldr	r5, [r2, #28]
-	str	r5, [sp, #28]           @ 4-byte Spill
-	ldr	r5, [r2, #32]
-	str	r5, [sp, #8]            @ 4-byte Spill
-	ldr	r5, [r2, #36]
-	str	r5, [sp, #12]           @ 4-byte Spill
-	ldr	r5, [r2, #40]
-	ldr	r2, [r2, #44]
-	str	r5, [sp, #20]           @ 4-byte Spill
-	str	r2, [sp, #24]           @ 4-byte Spill
-	adcs	r5, r4, r10
-	ldm	r11, {r4, r8, r11}
-	ldr	r10, [r1, #44]
-	ldm	lr, {r1, r2, r12, lr}
-	str	r9, [r0]
-	stmib	r0, {r6, r7}
-	ldr	r6, [sp]                @ 4-byte Reload
-	str	r5, [r0, #12]
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r6, r1
-	adcs	r2, r5, r2
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r5, [r3]
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r1, r12
-	adcs	r2, r2, lr
-	adcs	r12, r7, r4
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	mov	r4, #0
-	adcs	r9, r7, r8
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	adcs	r8, r7, r11
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	adcs	lr, r7, r10
-	adc	r7, r4, #0
-	ldmib	r3, {r4, r6, r10, r11}
-	subs	r5, r1, r5
-	ldr	r3, [r3, #20]
-	sbcs	r4, r2, r4
-	sbcs	r6, r12, r6
-	sbcs	r10, r9, r10
-	sbcs	r11, r8, r11
-	sbcs	r3, lr, r3
-	sbc	r7, r7, #0
-	ands	r7, r7, #1
-	movne	r5, r1
-	movne	r4, r2
-	movne	r6, r12
-	cmp	r7, #0
-	add	r1, r0, #32
-	movne	r10, r9
-	movne	r11, r8
-	movne	r3, lr
+	adcs	r4, r4, r7
+	ldr	r7, [r1, #24]
+	ldr	r1, [r1, #28]
+	ldr	r2, [r2, #28]
+	str	r4, [sp, #8]                    @ 4-byte Spill
+	adcs	r4, r5, r7
+	adc	r2, r2, r1
+	ldm	r3, {r1, r7}
+	str	r9, [sp, #24]                   @ 4-byte Spill
+	subs	r9, r9, r1
+	ldr	r5, [r3, #16]
+	sbcs	r8, r10, r7
+	ldr	r6, [r3, #8]
+	str	r5, [sp]                        @ 4-byte Spill
+	str	r4, [sp, #4]                    @ 4-byte Spill
+	sbcs	r6, lr, r6
+	ldr	r4, [r3, #12]
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	ldr	r1, [sp]                        @ 4-byte Reload
+	sbcs	r4, r12, r4
+	str	r10, [sp, #20]                  @ 4-byte Spill
+	mov	r10, r12
+	sbcs	r12, r7, r1
+	ldr	r11, [r3, #20]
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	str	lr, [sp, #16]                   @ 4-byte Spill
+	ldr	r5, [r3, #24]
+	sbcs	lr, r1, r11
+	ldr	r11, [sp, #4]                   @ 4-byte Reload
+	ldr	r3, [r3, #28]
+	sbcs	r5, r11, r5
+	sbc	r3, r2, r3
+	cmn	r3, #1
+	movle	lr, r1
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	movgt	r2, r3
+	movle	r5, r11
+	cmn	r3, #1
+	str	r2, [r0, #28]
+	movle	r6, r1
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	movle	r12, r7
+	movle	r4, r10
+	cmn	r3, #1
 	str	r5, [r0, #24]
-	str	r4, [r0, #28]
-	stm	r1, {r6, r10, r11}
-	str	r3, [r0, #44]
-	add	sp, sp, #32
+	movle	r8, r1
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	str	lr, [r0, #20]
+	str	r12, [r0, #16]
+	movle	r9, r1
+	str	r4, [r0, #12]
+	str	r6, [r0, #8]
+	str	r8, [r0, #4]
+	str	r9, [r0]
+	add	sp, sp, #28
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end93:
-	.size	mcl_fpDbl_add6L, .Lfunc_end93-mcl_fpDbl_add6L
+.Lfunc_end51:
+	.size	mcl_fp_addNF8L, .Lfunc_end51-mcl_fp_addNF8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_sub6L
-	.align	2
-	.type	mcl_fpDbl_sub6L,%function
-mcl_fpDbl_sub6L:                        @ @mcl_fpDbl_sub6L
+                                        @ -- End function
+	.globl	mcl_fp_sub8L                    @ -- Begin function mcl_fp_sub8L
+	.p2align	2
+	.type	mcl_fp_sub8L,%function
+	.code	32                              @ @mcl_fp_sub8L
+mcl_fp_sub8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#44
-	sub	sp, sp, #44
-	ldr	r6, [r2, #8]
-	ldr	r7, [r2, #32]
-	add	r10, r1, #12
-	str	r6, [sp]                @ 4-byte Spill
-	ldr	r6, [r2, #12]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r6, [sp, #4]            @ 4-byte Spill
+	ldr	r9, [r2]
+	ldm	r1, {r4, r5, r6, r7}
+	ldmib	r2, {r8, lr}
+	subs	r4, r4, r9
+	sbcs	r9, r5, r8
+	ldr	r12, [r2, #12]
+	sbcs	r10, r6, lr
 	ldr	r6, [r2, #16]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r6, [sp, #8]            @ 4-byte Spill
-	ldr	r6, [r2, #20]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r6, [sp, #12]           @ 4-byte Spill
-	ldr	r6, [r2, #24]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r1, #44]
-	str	r6, [sp, #16]           @ 4-byte Spill
-	ldr	r6, [r2, #28]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [r2, #4]
-	ldr	r2, [r2]
-	str	r6, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldm	r1, {r11, r12, lr}
-	ldr	r6, [sp]                @ 4-byte Reload
-	subs	r2, r11, r2
-	ldr	r11, [r1, #40]
-	sbcs	r7, r12, r7
-	ldr	r12, [r1, #36]
-	ldr	r1, [r1, #32]
-	sbcs	lr, lr, r6
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	stm	r0, {r2, r7, lr}
-	mov	lr, #0
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	sbcs	r4, r4, r6
-	str	r4, [r0, #12]
-	sbcs	r2, r5, r2
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	sbcs	r2, r8, r2
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	sbcs	r7, r9, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	sbcs	r6, r10, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
+	sbcs	r11, r7, r12
+	ldr	r7, [r1, #16]
+	ldr	r5, [r2, #20]
+	sbcs	r8, r7, r6
+	ldr	r7, [r1, #20]
+	ldr	r6, [r1, #24]
+	sbcs	r12, r7, r5
+	ldr	r7, [r2, #24]
+	ldr	r2, [r2, #28]
+	ldr	r1, [r1, #28]
+	sbcs	lr, r6, r7
+	stm	r0, {r4, r9, r10, r11}
 	sbcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	sbcs	r10, r12, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	sbcs	r9, r11, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	sbcs	r8, r5, r2
-	sbc	r12, lr, #0
-	ldm	r3, {r2, r4, r5, lr}
-	ldr	r11, [r3, #16]
-	ldr	r3, [r3, #20]
-	adds	r2, r7, r2
-	adcs	r4, r6, r4
-	adcs	r5, r1, r5
-	adcs	lr, r10, lr
-	adcs	r11, r9, r11
-	adc	r3, r8, r3
-	ands	r12, r12, #1
-	moveq	r2, r7
-	moveq	r4, r6
-	moveq	r5, r1
-	cmp	r12, #0
-	moveq	lr, r10
-	moveq	r11, r9
-	moveq	r3, r8
-	str	r2, [r0, #24]
-	str	r4, [r0, #28]
-	str	r5, [r0, #32]
-	str	lr, [r0, #36]
-	str	r11, [r0, #40]
-	str	r3, [r0, #44]
-	add	sp, sp, #44
+	add	r2, r0, #16
+	stm	r2, {r8, r12, lr}
+	mov	r2, #0
+	sbc	r2, r2, #0
+	tst	r2, #1
+	str	r1, [r0, #28]
+	bne	.LBB52_2
+@ %bb.1:                                @ %nocarry
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end94:
-	.size	mcl_fpDbl_sub6L, .Lfunc_end94-mcl_fpDbl_sub6L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre7L
-	.align	2
-	.type	mcl_fp_mulUnitPre7L,%function
-mcl_fp_mulUnitPre7L:                    @ @mcl_fp_mulUnitPre7L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r11, [r1, #12]
-	ldr	r10, [r1, #16]
-	ldr	r9, [r1, #20]
-	ldr	r8, [r1, #24]
-	umull	r7, r1, lr, r2
-	umull	lr, r4, r12, r2
-	mov	r5, r4
-	mov	r6, r7
-	str	lr, [r0]
-	umlal	r5, r6, r3, r2
-	stmib	r0, {r5, r6}
-	umull	r6, r5, r3, r2
-	adds	r3, r4, r6
-	umull	r3, r6, r11, r2
-	adcs	r7, r5, r7
-	adcs	r1, r1, r3
-	str	r1, [r0, #12]
-	umull	r1, r3, r10, r2
-	adcs	r1, r6, r1
-	str	r1, [r0, #16]
-	umull	r1, r7, r9, r2
-	adcs	r1, r3, r1
-	str	r1, [r0, #20]
-	umull	r1, r3, r8, r2
-	adcs	r1, r7, r1
-	str	r1, [r0, #24]
-	adc	r1, r3, #0
+.LBB52_2:                               @ %carry
+	ldm	r3, {r2, r5, r6, r7}
+	adds	r2, r2, r4
+	str	r2, [r0]
+	adcs	r2, r5, r9
+	str	r2, [r0, #4]
+	adcs	r2, r6, r10
+	str	r2, [r0, #8]
+	adcs	r2, r7, r11
+	str	r2, [r0, #12]
+	ldr	r2, [r3, #16]
+	adcs	r2, r2, r8
+	str	r2, [r0, #16]
+	ldr	r2, [r3, #20]
+	adcs	r2, r2, r12
+	str	r2, [r0, #20]
+	ldr	r2, [r3, #24]
+	adcs	r2, r2, lr
+	str	r2, [r0, #24]
+	ldr	r2, [r3, #28]
+	adc	r1, r2, r1
 	str	r1, [r0, #28]
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end95:
-	.size	mcl_fp_mulUnitPre7L, .Lfunc_end95-mcl_fp_mulUnitPre7L
+.Lfunc_end52:
+	.size	mcl_fp_sub8L, .Lfunc_end52-mcl_fp_sub8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_mulPre7L
-	.align	2
-	.type	mcl_fpDbl_mulPre7L,%function
-mcl_fpDbl_mulPre7L:                     @ @mcl_fpDbl_mulPre7L
+                                        @ -- End function
+	.globl	mcl_fp_subNF8L                  @ -- Begin function mcl_fp_subNF8L
+	.p2align	2
+	.type	mcl_fp_subNF8L,%function
+	.code	32                              @ @mcl_fp_subNF8L
+mcl_fp_subNF8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#68
-	sub	sp, sp, #68
-	mov	r3, r2
-	ldr	r7, [r1]
-	ldr	lr, [r1, #4]
-	mov	r9, r0
-	ldr	r0, [r1, #8]
-	ldr	r2, [r1, #12]
-	ldr	r10, [r1, #16]
-	ldr	r8, [r1, #20]
-	str	r3, [sp, #64]           @ 4-byte Spill
-	ldr	r3, [r3]
-	str	r9, [sp, #60]           @ 4-byte Spill
-	str	r7, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	str	r2, [sp, #44]           @ 4-byte Spill
-	umull	r5, r4, r7, r3
-	umull	r6, r12, lr, r3
-	adds	r6, r4, r6
-	str	r5, [sp, #48]           @ 4-byte Spill
-	umull	r5, r6, r0, r3
-	adcs	r7, r12, r5
-	umlal	r4, r5, lr, r3
-	umull	r7, r11, r2, r3
-	adcs	r0, r6, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	umull	r6, r0, r10, r3
-	adcs	r2, r11, r6
-	umull	r11, r7, r8, r3
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r2, [sp, #40]           @ 4-byte Spill
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #24]
-	umull	r11, r12, r0, r3
-	adcs	r2, r7, r11
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r2, [r9]
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	ldr	r3, [r2, #4]
-	umull	r11, r7, r6, r3
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adc	r7, r12, #0
-	str	r7, [sp, #16]           @ 4-byte Spill
-	adds	r7, r11, r4
-	str	r7, [sp, #48]           @ 4-byte Spill
-	umull	r4, r7, lr, r3
-	str	r7, [sp, #28]           @ 4-byte Spill
-	adcs	r7, r4, r5
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	umull	r4, r5, r7, r3
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	str	r5, [sp, #24]           @ 4-byte Spill
-	umull	r5, r6, r7, r3
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	str	r6, [sp, #44]           @ 4-byte Spill
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	adcs	r11, r5, r7
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adcs	lr, r4, r7
-	umull	r9, r7, r10, r3
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adcs	r7, r9, r7
-	umull	r4, r9, r8, r3
-	adcs	r4, r4, r6
-	umull	r6, r12, r0, r3
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r3, r6, r0
-	mov	r0, #0
-	adc	r6, r0, #0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adds	r8, r5, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r5, r11, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	ldr	lr, [r1, #12]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r7, r7, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r7, [sp, #24]           @ 4-byte Spill
-	adcs	r7, r4, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r4, [r1, #4]
-	adcs	r3, r3, r9
-	ldr	r9, [r1, #8]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	str	r3, [sp, #40]           @ 4-byte Spill
-	adc	r3, r6, r12
-	ldr	r6, [r2, #8]
-	str	r3, [sp, #44]           @ 4-byte Spill
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	str	r4, [sp, #52]           @ 4-byte Spill
-	str	r3, [r0, #4]
-	ldr	r3, [r1]
-	umull	r12, r7, r3, r6
-	str	r3, [sp, #56]           @ 4-byte Spill
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adds	r3, r12, r8
-	umull	r7, r0, r4, r6
-	ldr	r12, [r1, #24]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r2, r7, r5
-	umull	r7, r0, r9, r6
-	str	r3, [sp, #48]           @ 4-byte Spill
-	ldr	r10, [sp, #32]          @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	umull	r5, r0, lr, r6
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	umull	r11, r3, r0, r6
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [r1, #20]
-	adcs	r11, r11, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	umull	r8, r4, r3, r6
-	adcs	r8, r8, r0
-	umull	r7, r0, r12, r6
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	adcs	r6, r7, r6
-	mov	r7, #0
-	adc	r7, r7, #0
-	adds	r2, r2, r10
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r2, r5, r2
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r10, r5, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r11, r11, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r2, r8, r2
-	ldr	r8, [sp, #56]           @ 4-byte Reload
-	str	r2, [sp, #28]           @ 4-byte Spill
-	adcs	r2, r6, r4
-	adc	r0, r7, r0
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	str	r2, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	str	r0, [r7, #8]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldr	r6, [r0, #12]
-	umull	r2, r4, lr, r6
-	str	r4, [sp, #48]           @ 4-byte Spill
-	umull	lr, r4, r9, r6
-	str	r4, [sp, #44]           @ 4-byte Spill
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	umull	r9, r5, r4, r6
-	str	r5, [sp, #32]           @ 4-byte Spill
-	umull	r4, r5, r8, r6
-	str	r5, [sp, #52]           @ 4-byte Spill
-	ldr	r5, [sp]                @ 4-byte Reload
-	adds	r4, r4, r5
-	umull	r5, r8, r3, r6
-	str	r4, [sp, #56]           @ 4-byte Spill
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	adcs	r9, r9, r4
-	adcs	lr, lr, r10
-	adcs	r11, r2, r11
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	umull	r4, r10, r2, r6
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r4, r4, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adcs	r3, r5, r2
-	umull	r5, r2, r12, r6
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	adcs	r12, r5, r6
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	mov	r5, #0
-	adc	r5, r5, #0
-	adds	r9, r9, r6
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	adcs	lr, lr, r6
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	adcs	r6, r11, r6
-	ldr	r11, [r1, #8]
-	str	r6, [sp, #20]           @ 4-byte Spill
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	adcs	r4, r4, r6
-	adcs	r3, r3, r10
-	str	r4, [sp, #28]           @ 4-byte Spill
-	ldr	r4, [r1, #12]
-	adcs	r12, r12, r8
-	str	r3, [sp, #40]           @ 4-byte Spill
-	adc	r2, r5, r2
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r2, [r7, #12]
-	ldr	r7, [r0, #16]
-	ldr	r0, [r1]
-	ldr	r2, [r1, #4]
-	umull	r8, r3, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	str	r2, [sp, #52]           @ 4-byte Spill
-	adds	r0, r8, r9
-	str	r3, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	umull	r6, r0, r2, r7
-	ldr	r2, [r1, #24]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r6, lr
-	ldr	lr, [r1, #16]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	umull	r6, r0, r11, r7
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	mov	r6, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	umull	r3, r0, r4, r7
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	ldr	r3, [r1, #20]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	umull	r10, r0, lr, r7
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	umull	r9, r5, r3, r7
-	adcs	r10, r10, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r9, r9, r12
-	umull	r8, r12, r2, r7
-	adcs	r7, r8, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r8, r6, #0
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adds	r0, r6, r0
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r10, r10, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adcs	r0, r7, r5
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adc	r0, r8, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	str	r7, [r0, #16]
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	ldr	r7, [r7, #20]
-	umull	r8, r6, r4, r7
-	str	r6, [sp, #48]           @ 4-byte Spill
-	umull	r4, r6, r11, r7
-	str	r6, [sp, #40]           @ 4-byte Spill
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	umull	r11, r5, r6, r7
-	ldr	r6, [sp, #56]           @ 4-byte Reload
-	str	r5, [sp, #28]           @ 4-byte Spill
-	umull	r5, r9, r6, r7
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	adds	r6, r5, r6
-	str	r6, [sp, #44]           @ 4-byte Spill
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adcs	r11, r11, r6
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	adcs	r12, r4, r6
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	adcs	r10, r8, r10
-	umull	r5, r8, lr, r7
-	umull	r4, lr, r3, r7
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adcs	r5, r5, r6
-	adcs	r3, r4, r3
-	umull	r4, r6, r2, r7
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r4, r2
-	mov	r4, #0
-	adc	r4, r4, #0
-	adds	r7, r11, r9
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r7, r12, r7
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	adcs	r9, r10, r7
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	adcs	r11, r5, r7
-	adcs	r3, r3, r8
-	adcs	r2, r2, lr
-	str	r3, [sp, #40]           @ 4-byte Spill
-	str	r2, [sp, #52]           @ 4-byte Spill
-	adc	r2, r4, r6
-	ldr	r6, [r1]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldr	r4, [r0, #24]
-	ldmib	r1, {r0, r3, r5}
-	umull	r12, r2, r5, r4
-	str	r2, [sp, #64]           @ 4-byte Spill
-	umull	r5, r2, r3, r4
-	umull	r3, r10, r0, r4
-	umull	r0, r8, r6, r4
-	ldr	r6, [r1, #16]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adds	r0, r0, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	lr, r3, r0
+	.pad	#36
+	sub	sp, sp, #36
+	ldr	r7, [r2, #8]
+	str	r7, [sp, #20]                   @ 4-byte Spill
+	ldr	r7, [r2, #16]
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	ldr	r7, [r2, #20]
+	str	r7, [sp, #28]                   @ 4-byte Spill
+	ldr	r7, [r2, #24]
+	ldm	r2, {r6, r8}
+	ldr	r11, [r2, #12]
+	ldr	r9, [r2, #28]
+	ldm	r1, {r2, r4, r5, r12, lr}
+	subs	r6, r2, r6
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	sbcs	r8, r4, r8
+	ldr	r4, [sp, #24]                   @ 4-byte Reload
+	sbcs	r5, r5, r2
+	ldr	r10, [r1, #20]
+	sbcs	r2, r12, r11
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	sbcs	r12, lr, r4
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	ldr	r7, [r1, #24]
+	sbcs	lr, r10, r4
+	ldr	r4, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [r1, #28]
+	sbcs	r4, r7, r4
+	ldr	r7, [r3]
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	sbc	r1, r1, r9
+	ldr	r7, [r3, #4]
+	str	r7, [sp]                        @ 4-byte Spill
+	ldr	r7, [r3, #20]
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	ldr	r7, [r3, #24]
+	ldr	r9, [r3, #8]
+	ldr	r11, [r3, #12]
+	ldr	r10, [r3, #16]
+	ldr	r3, [r3, #28]
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	ldr	r3, [sp, #4]                    @ 4-byte Reload
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	adds	r7, r6, r3
+	ldr	r3, [sp]                        @ 4-byte Reload
+	str	r6, [sp, #16]                   @ 4-byte Spill
+	adcs	r6, r8, r3
+	ldr	r3, [sp, #8]                    @ 4-byte Reload
 	adcs	r9, r5, r9
-	adcs	r11, r12, r11
-	umull	r0, r12, r6, r4
-	ldr	r6, [r1, #20]
-	ldr	r1, [r1, #24]
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	umull	r3, r5, r6, r4
-	umull	r6, r7, r1, r4
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	mov	r4, #0
-	adcs	r3, r3, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r6, r1
-	adc	r4, r4, #0
-	adds	r6, lr, r8
-	adcs	lr, r9, r10
-	adcs	r8, r11, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	adcs	r3, r3, r12
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	adc	r7, r4, r7
-	add	r12, r2, #24
-	stm	r12, {r5, r6, lr}
-	str	r8, [r2, #36]
-	str	r0, [r2, #40]
-	str	r3, [r2, #44]
-	str	r1, [r2, #48]
-	str	r7, [r2, #52]
-	add	sp, sp, #68
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	adcs	r11, r2, r11
+	str	r12, [sp, #24]                  @ 4-byte Spill
+	adcs	r2, r12, r10
+	str	lr, [sp, #28]                   @ 4-byte Spill
+	adcs	r12, lr, r3
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	ldr	r10, [sp, #16]                  @ 4-byte Reload
+	adcs	lr, r4, r3
+	ldr	r3, [sp, #12]                   @ 4-byte Reload
+	adc	r3, r1, r3
+	cmp	r1, #0
+	movpl	r9, r5
+	ldr	r5, [sp, #20]                   @ 4-byte Reload
+	movpl	r7, r10
+	movpl	r6, r8
+	cmp	r1, #0
+	str	r7, [r0]
+	movpl	r11, r5
+	ldr	r5, [sp, #24]                   @ 4-byte Reload
+	stmib	r0, {r6, r9, r11}
+	movpl	r2, r5
+	ldr	r5, [sp, #28]                   @ 4-byte Reload
+	movpl	r12, r5
+	cmp	r1, #0
+	movpl	r3, r1
+	add	r1, r0, #16
+	movpl	lr, r4
+	stm	r1, {r2, r12, lr}
+	str	r3, [r0, #28]
+	add	sp, sp, #36
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end96:
-	.size	mcl_fpDbl_mulPre7L, .Lfunc_end96-mcl_fpDbl_mulPre7L
+.Lfunc_end53:
+	.size	mcl_fp_subNF8L, .Lfunc_end53-mcl_fp_subNF8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_sqrPre7L
-	.align	2
-	.type	mcl_fpDbl_sqrPre7L,%function
-mcl_fpDbl_sqrPre7L:                     @ @mcl_fpDbl_sqrPre7L
+                                        @ -- End function
+	.globl	mcl_fpDbl_add8L                 @ -- Begin function mcl_fpDbl_add8L
+	.p2align	2
+	.type	mcl_fpDbl_add8L,%function
+	.code	32                              @ @mcl_fpDbl_add8L
+mcl_fpDbl_add8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	.pad	#60
 	sub	sp, sp, #60
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	ldr	r9, [r1, #20]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r1, {r2, r3}
-	ldr	r0, [r1, #8]
-	ldr	r11, [r1, #12]
-	umull	r6, r7, r2, r2
-	str	r0, [sp, #48]           @ 4-byte Spill
-	umull	r5, r4, r0, r2
-	umull	r12, r0, r3, r2
-	umull	r8, r10, r11, r2
-	adds	lr, r7, r12
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	adcs	r6, r0, r5
-	umlal	r7, r5, r3, r2
-	adcs	r0, r4, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	umull	r4, r6, r0, r2
-	adcs	r4, r10, r4
-	mov	r10, r9
-	str	r4, [sp, #40]           @ 4-byte Spill
-	umull	r4, r8, r10, r2
-	adcs	r6, r6, r4
-	str	r6, [sp, #28]           @ 4-byte Spill
+	ldr	r9, [r2]
+	ldm	r1, {r4, r5, r6, r7}
+	ldmib	r2, {r8, lr}
+	adds	r4, r4, r9
+	str	r4, [sp, #52]                   @ 4-byte Spill
+	adcs	r4, r5, r8
+	ldr	r12, [r2, #12]
+	adcs	r6, r6, lr
+	str	r6, [sp, #44]                   @ 4-byte Spill
+	adcs	r7, r7, r12
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	ldr	r6, [r2, #16]
+	ldr	r7, [r1, #16]
+	str	r4, [sp, #48]                   @ 4-byte Spill
+	adcs	r7, r7, r6
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r4, [r2, #20]
+	ldr	r7, [r1, #20]
 	ldr	r6, [r1, #24]
-	umull	lr, r9, r6, r2
-	adcs	r4, r8, lr
-	str	r4, [sp, #20]           @ 4-byte Spill
-	adc	r4, r9, #0
-	adds	r2, r12, r7
-	ldr	r12, [sp, #56]          @ 4-byte Reload
-	str	r2, [sp, #36]           @ 4-byte Spill
-	umull	r2, r7, r3, r3
-	adcs	r2, r2, r5
-	str	r7, [sp, #16]           @ 4-byte Spill
-	umull	r5, r8, r11, r3
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r2, [r12]
-	umull	lr, r2, r6, r3
-	str	r2, [sp, #32]           @ 4-byte Spill
-	umull	r6, r2, r10, r3
-	str	r2, [sp, #24]           @ 4-byte Spill
-	umull	r2, r10, r0, r3
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	umull	r7, r9, r0, r3
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r3, r7, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r5, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	mov	r5, #0
-	adcs	r2, r2, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r6, r6, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	lr, lr, r4
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	adc	r5, r5, #0
-	adds	r11, r4, r0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	ldr	r4, [r1, #4]
-	adcs	r3, r3, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	str	r4, [sp, #44]           @ 4-byte Spill
-	adcs	r7, r7, r9
-	adcs	r9, r2, r8
-	ldr	r2, [r1, #12]
-	str	r0, [r12, #4]
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r12, r6, r10
-	adcs	r10, lr, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	lr, [r1, #8]
-	adc	r0, r5, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1]
-	umull	r8, r5, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adds	r0, r8, r11
-	str	r5, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	umull	r5, r0, r4, lr
-	ldr	r4, [r1, #16]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r5, r3
-	str	r0, [sp, #20]           @ 4-byte Spill
-	umull	r3, r0, lr, lr
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adcs	r0, r3, r7
-	ldr	r3, [r1, #20]
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	umull	r0, r5, r2, lr
-	str	r0, [sp, #12]           @ 4-byte Spill
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #20]           @ 4-byte Reload
-	str	r5, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #4]            @ 4-byte Spill
-	umull	r11, r0, r4, lr
-	str	r0, [sp, #8]            @ 4-byte Spill
-	umull	r8, r0, r3, lr
-	adcs	r11, r11, r12
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [r1, #24]
-	adcs	r8, r8, r10
-	umull	r10, r12, r0, lr
-	adcs	lr, r10, r7
-	mov	r7, #0
-	adc	r10, r7, #0
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	adds	r6, r9, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r6, [sp, #20]           @ 4-byte Spill
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adcs	r6, r6, r7
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	str	r6, [sp, #16]           @ 4-byte Spill
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	adcs	r6, r6, r7
-	adcs	r11, r11, r5
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	adcs	r5, r8, r5
-	str	r5, [sp, #8]            @ 4-byte Spill
-	ldr	r5, [sp]                @ 4-byte Reload
-	adcs	r7, lr, r5
-	str	r7, [sp, #4]            @ 4-byte Spill
-	adc	r7, r10, r12
-	ldr	r10, [sp, #48]          @ 4-byte Reload
-	str	r7, [sp]                @ 4-byte Spill
-	umull	r9, r7, r0, r2
-	umull	r5, r0, r3, r2
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	umull	r3, r0, r4, r2
-	str	r0, [sp, #28]           @ 4-byte Spill
-	umull	r4, r0, r2, r2
-	str	r0, [sp, #24]           @ 4-byte Spill
-	umull	r8, lr, r10, r2
-	umull	r0, r12, r7, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	mov	r7, #0
-	adds	r8, r8, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	adcs	r6, r2, r6
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r4, r4, r11
-	adcs	r3, r3, r2
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	adcs	r5, r5, r2
-	ldr	r2, [sp]                @ 4-byte Reload
-	adcs	r2, r9, r2
-	adc	r9, r7, #0
-	adds	r0, r0, lr
-	adcs	r7, r6, r12
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	adcs	r4, r4, r6
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	adcs	r11, r3, r6
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adcs	r12, r5, r3
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	ldr	r5, [r1, #12]
-	adcs	r10, r2, r3
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	adc	r2, r9, r2
-	ldr	r9, [r1, #4]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r9, [sp, #16]           @ 4-byte Spill
-	str	r2, [r3, #8]
-	str	r8, [r3, #12]
-	ldr	r2, [r1]
-	ldr	r3, [r1, #16]
-	ldr	r8, [r1, #8]
-	umull	lr, r6, r2, r3
-	str	r2, [sp, #48]           @ 4-byte Spill
-	str	r8, [sp, #4]            @ 4-byte Spill
-	adds	r0, lr, r0
-	ldr	lr, [r1, #24]
-	str	r6, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	umull	r0, r2, r9, r3
-	adcs	r0, r0, r7
-	str	r2, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #20]           @ 4-byte Spill
-	umull	r7, r0, r8, r3
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	ldr	r9, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #8]            @ 4-byte Spill
-	umull	r7, r0, r5, r3
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adcs	r0, r7, r11
-	mov	r7, #0
-	str	r0, [sp]                @ 4-byte Spill
-	umull	r11, r0, r3, r3
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r1, #20]
-	adcs	r11, r11, r12
-	umull	r12, r2, r0, r3
-	adcs	r4, r12, r10
-	umull	r10, r8, lr, r3
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	str	r2, [sp, #40]           @ 4-byte Spill
-	adcs	r3, r10, r3
-	adc	r10, r7, #0
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adds	r6, r9, r7
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	str	r6, [sp, #36]           @ 4-byte Spill
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	adcs	r6, r6, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r6, [sp, #20]           @ 4-byte Spill
-	ldr	r6, [sp]                @ 4-byte Reload
-	adcs	r6, r6, r7
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	str	r6, [sp, #8]            @ 4-byte Spill
-	adcs	r11, r11, r7
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adcs	r4, r4, r7
-	adcs	r2, r3, r2
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	str	r2, [sp, #24]           @ 4-byte Spill
-	umull	r6, r2, r5, r0
-	adc	r10, r10, r8
-	str	r2, [sp, #44]           @ 4-byte Spill
-	umull	r5, r2, r3, r0
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	umull	r8, r3, r2, r0
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r3, [sp, #28]           @ 4-byte Spill
-	umull	r3, r9, r2, r0
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adds	r2, r3, r2
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r7, r8, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r5, r5, r2
-	adcs	r6, r6, r11
-	adcs	r2, r12, r4
-	umull	r4, r8, r0, r0
-	adcs	r4, r4, r3
-	umull	r3, r11, lr, r0
-	adcs	r0, r3, r10
-	mov	r3, #0
-	adc	r3, r3, #0
-	adds	r7, r7, r9
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r9, r5, r7
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	adcs	r6, r6, r5
-	str	r6, [sp, #32]           @ 4-byte Spill
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	adcs	r10, r2, r6
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	adcs	r12, r4, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adc	r0, r3, r11
-	ldr	r3, [r1, #24]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	str	r0, [r2, #16]
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	str	r0, [r2, #20]
-	ldm	r1, {r0, r4}
-	ldr	r5, [r1, #12]
-	ldr	r2, [r1, #8]
-	umull	lr, r6, r5, r3
-	umull	r5, r11, r2, r3
-	umull	r2, r8, r4, r3
-	str	r6, [sp, #52]           @ 4-byte Spill
-	umull	r4, r6, r0, r3
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adds	r0, r4, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r9, r2, r9
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	adcs	r4, r5, r0
-	ldr	r0, [r1, #16]
-	ldr	r1, [r1, #20]
-	adcs	r10, lr, r10
-	umull	r7, lr, r0, r3
-	adcs	r0, r7, r12
-	umull	r7, r12, r1, r3
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r7, r1
-	umull	r7, r5, r3, r3
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	adcs	r3, r7, r3
-	mov	r7, #0
-	adc	r7, r7, #0
-	adds	r6, r9, r6
-	adcs	r4, r4, r8
-	adcs	r8, r10, r11
-	adcs	r0, r0, r2
-	adcs	r1, r1, lr
-	adcs	r2, r3, r12
-	adc	r3, r7, r5
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	ldr	r5, [sp, #40]           @ 4-byte Reload
-	add	r12, r7, #40
-	str	r5, [r7, #24]
-	str	r6, [r7, #28]
-	str	r4, [r7, #32]
-	str	r8, [r7, #36]
-	stm	r12, {r0, r1, r2, r3}
+	adcs	r7, r7, r4
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [r2, #24]
+	ldr	r5, [r1, #48]
+	adcs	r7, r6, r7
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	ldr	r7, [r2, #28]
+	ldr	r6, [r1, #28]
+	ldr	r4, [r1, #52]
+	adcs	r7, r6, r7
+	str	r7, [sp, #20]                   @ 4-byte Spill
+	ldr	r7, [r2, #32]
+	ldr	r6, [r1, #32]
+	adcs	r10, r6, r7
+	ldr	r7, [r2, #36]
+	ldr	r6, [r1, #36]
+	str	r10, [sp, #28]                  @ 4-byte Spill
+	adcs	r7, r6, r7
+	str	r7, [sp, #56]                   @ 4-byte Spill
+	ldr	r7, [r2, #40]
+	ldr	r6, [r1, #40]
+	adcs	r9, r6, r7
+	ldr	r7, [r2, #44]
+	ldr	r6, [r1, #44]
+	str	r9, [sp, #16]                   @ 4-byte Spill
+	adcs	r8, r6, r7
+	ldr	r7, [r2, #48]
+	ldr	r6, [r1, #56]
+	adcs	r7, r5, r7
+	ldr	r5, [r2, #52]
+	ldr	r1, [r1, #60]
+	adcs	r5, r4, r5
+	ldr	r4, [r2, #56]
+	ldr	r2, [r2, #60]
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	adcs	r7, r6, r4
+	adcs	r1, r1, r2
+	str	r1, [sp]                        @ 4-byte Spill
+	mov	r1, #0
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	adc	r7, r1, #0
+	ldm	r3, {r1, r2, r12, lr}
+	subs	r1, r10, r1
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	ldr	r4, [r3, #28]
+	sbcs	r11, r1, r2
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	str	r1, [r0]
+	sbcs	r10, r9, r12
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	sbcs	r9, r8, lr
+	str	r1, [r0, #4]
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	str	r1, [r0, #16]
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	str	r1, [r0, #20]
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	str	r1, [r0, #24]
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	str	r1, [r0, #28]
+	ldr	lr, [r3, #24]
+	ldr	r1, [r3, #20]
+	ldr	r3, [r3, #16]
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	ldr	r12, [sp, #4]                   @ 4-byte Reload
+	sbcs	r3, r6, r3
+	ldr	r2, [sp]                        @ 4-byte Reload
+	sbcs	r1, r5, r1
+	sbcs	lr, r12, lr
+	sbcs	r4, r2, r4
+	sbc	r7, r7, #0
+	ands	r7, r7, #1
+	movne	r1, r5
+	movne	r4, r2
+	str	r1, [r0, #52]
+	movne	lr, r12
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	cmp	r7, #0
+	movne	r3, r6
+	movne	r9, r8
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	movne	r10, r1
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	cmp	r7, #0
+	str	r4, [r0, #60]
+	str	lr, [r0, #56]
+	movne	r11, r1
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	str	r3, [r0, #48]
+	str	r9, [r0, #44]
+	movne	r2, r1
+	str	r10, [r0, #40]
+	str	r11, [r0, #36]
+	str	r2, [r0, #32]
 	add	sp, sp, #60
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end97:
-	.size	mcl_fpDbl_sqrPre7L, .Lfunc_end97-mcl_fpDbl_sqrPre7L
+.Lfunc_end54:
+	.size	mcl_fpDbl_add8L, .Lfunc_end54-mcl_fpDbl_add8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_mont7L
-	.align	2
-	.type	mcl_fp_mont7L,%function
-mcl_fp_mont7L:                          @ @mcl_fp_mont7L
+                                        @ -- End function
+	.globl	mcl_fpDbl_sub8L                 @ -- Begin function mcl_fpDbl_sub8L
+	.p2align	2
+	.type	mcl_fpDbl_sub8L,%function
+	.code	32                              @ @mcl_fpDbl_sub8L
+mcl_fpDbl_sub8L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#124
-	sub	sp, sp, #124
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, r2
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldm	r0, {r2, lr}
-	ldr	r7, [r0, #8]
-	ldr	r0, [r0, #12]
-	ldr	r5, [r3, #-4]
-	ldr	r6, [r3, #8]
-	ldr	r9, [r3, #4]
-	ldr	r11, [r1, #8]
-	ldr	r12, [r1, #12]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r1, #4]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r1]
-	str	r5, [sp, #80]           @ 4-byte Spill
-	str	r6, [sp, #116]          @ 4-byte Spill
-	str	r9, [sp, #108]          @ 4-byte Spill
-	str	r11, [sp, #104]         @ 4-byte Spill
-	str	r12, [sp, #72]          @ 4-byte Spill
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r3]
-	umull	r4, r8, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	mul	r0, r4, r5
-	str	r4, [sp, #44]           @ 4-byte Spill
-	umull	r10, r4, r0, r6
-	str	r4, [sp, #32]           @ 4-byte Spill
-	str	r10, [sp, #8]           @ 4-byte Spill
-	umull	r4, r5, r0, r7
-	str	r7, [sp, #112]          @ 4-byte Spill
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	str	r4, [sp, #40]           @ 4-byte Spill
-	mov	r4, r5
-	str	r5, [sp, #4]            @ 4-byte Spill
-	umlal	r4, r10, r0, r9
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [r1, #24]
-	umull	r6, r5, r4, r2
-	str	r4, [sp, #88]           @ 4-byte Spill
-	ldr	r4, [r1, #20]
-	ldr	r1, [r1, #16]
-	str	r6, [sp, #96]           @ 4-byte Spill
-	str	r5, [sp, #120]          @ 4-byte Spill
-	umull	r6, r5, r4, r2
-	str	r4, [sp, #64]           @ 4-byte Spill
-	umull	r9, r4, r1, r2
-	str	r1, [sp, #100]          @ 4-byte Spill
-	str	r6, [sp, #76]           @ 4-byte Spill
-	str	r5, [sp, #92]           @ 4-byte Spill
-	str	r4, [sp, #20]           @ 4-byte Spill
-	umull	r6, r5, r12, r2
-	umull	r12, r4, r11, r2
-	umull	r11, r1, r7, r2
-	adds	r7, r8, r11
-	adcs	r7, r1, r12
-	adcs	r1, r4, r6
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	ldr	r6, [sp, #108]          @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	adcs	r1, r5, r9
-	ldr	r5, [r3, #12]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r5, [sp, #76]           @ 4-byte Spill
-	adcs	r1, r4, r1
-	ldr	r4, [sp, #92]           @ 4-byte Reload
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r4, r1
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	umull	r9, r4, r0, r1
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	str	r4, [sp]                @ 4-byte Spill
-	ldr	r4, [r3, #20]
-	umull	r3, r7, r0, r6
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	str	r1, [sp, #120]          @ 4-byte Spill
-	adds	r3, r6, r3
-	str	r4, [sp, #92]           @ 4-byte Spill
-	umull	r3, r6, r0, r5
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	adcs	r7, r7, r5
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	adcs	r11, r5, r3
-	umull	r7, r5, r0, r1
-	adcs	r1, r6, r7
-	umull	r7, r3, r0, r4
-	ldr	r4, [sp]                @ 4-byte Reload
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r5, r7
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	adcs	r3, r3, r9
-	adc	r7, r4, #0
-	mov	r4, #0
-	umlal	r8, r12, r5, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adds	r2, r6, r2
-	mov	r6, r5
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r2, r8
-	str	r2, [sp, #44]           @ 4-byte Spill
-	adcs	r2, r10, r12
-	ldr	r10, [sp, #84]          @ 4-byte Reload
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r2, r11, r2
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	umull	r2, r3, lr, r5
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adc	r0, r4, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	umull	r12, r9, lr, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	umull	r8, r4, lr, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	umull	r1, r7, lr, r0
-	umull	r11, r0, lr, r10
-	adds	r2, r0, r2
-	adcs	r2, r3, r1
-	umlal	r0, r1, lr, r6
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	umull	r2, r3, lr, r5
-	adcs	r2, r7, r2
-	adcs	r10, r3, r8
-	ldr	r8, [sp, #64]           @ 4-byte Reload
-	umull	r7, r3, lr, r8
-	adcs	r4, r4, r7
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	adcs	r3, r3, r12
-	adc	r5, r9, #0
-	adds	r7, r7, r11
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #96]          @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	mul	r0, r7, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	umull	lr, r12, r0, r6
-	umull	r3, r4, r0, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	mov	r2, r3
-	umull	r9, r5, r0, r1
-	mov	r1, r5
-	adds	r5, r5, lr
-	umlal	r1, r2, r0, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	adcs	r3, r12, r3
-	umull	r5, lr, r0, r6
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	umull	r3, r12, r0, r6
-	ldr	r6, [sp, #92]           @ 4-byte Reload
-	adcs	r3, r4, r3
-	adcs	r12, r12, r5
-	umull	r4, r5, r0, r6
-	adcs	lr, lr, r4
-	umull	r6, r4, r0, r10
-	adcs	r0, r5, r6
-	adc	r4, r4, #0
-	adds	r5, r9, r7
-	ldr	r9, [sp, #84]           @ 4-byte Reload
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r3, r1
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r12, r1
-	ldr	r12, [sp, #48]          @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, lr, r1
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	umull	r2, r1, r12, r0
-	umull	r10, r0, r12, r8
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	r2, [sp, #8]            @ 4-byte Spill
-	str	r1, [sp, #12]           @ 4-byte Spill
-	umull	r2, lr, r12, r3
-	umull	r7, r8, r12, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	umull	r5, r6, r12, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	umull	r1, r4, r12, r0
-	umull	r11, r0, r12, r9
-	adds	r2, r0, r2
-	str	r11, [sp]               @ 4-byte Spill
-	adcs	r2, lr, r1
-	umlal	r0, r1, r12, r3
-	adcs	lr, r4, r5
-	ldmib	sp, {r4, r5}
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	ldr	r2, [sp]                @ 4-byte Reload
-	adcs	r7, r6, r7
-	adcs	r6, r8, r10
-	adcs	r4, r4, r5
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adc	r5, r5, #0
-	adds	r9, r3, r2
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	mul	r0, r9, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	umull	r3, r2, r0, r1
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	umull	r7, r1, r0, r2
-	mov	r2, r3
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	mov	r5, r1
-	umlal	r5, r2, r0, r6
-	umull	r10, r4, r0, r7
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	str	r4, [sp, #8]            @ 4-byte Spill
-	umull	r12, r8, r0, r7
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	umull	lr, r4, r0, r7
-	umull	r11, r7, r0, r6
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	adds	r1, r1, r11
-	ldr	r11, [sp, #76]          @ 4-byte Reload
-	adcs	r1, r7, r3
-	umull	r1, r3, r0, r11
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	adcs	r1, r3, lr
-	adcs	r3, r4, r12
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	adcs	r7, r8, r10
-	ldr	r10, [sp, #52]          @ 4-byte Reload
-	ldr	r8, [sp, #64]           @ 4-byte Reload
-	adc	r6, r6, #0
-	adds	r4, r4, r9
-	ldr	r9, [sp, #72]           @ 4-byte Reload
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	adcs	r5, r5, r4
-	str	r5, [sp, #48]           @ 4-byte Spill
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	umull	r4, r5, r10, r7
-	adcs	r0, r6, r0
-	str	r4, [sp, #16]           @ 4-byte Spill
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	umull	r1, r6, r10, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	umull	r2, r3, r10, r0
-	adds	r2, r5, r2
-	adcs	r2, r3, r1
-	umull	r2, r3, r10, r9
-	adcs	r7, r6, r2
-	ldr	r6, [sp, #100]          @ 4-byte Reload
-	umull	r2, r12, r10, r6
-	adcs	r6, r3, r2
-	umull	r3, lr, r10, r8
-	mov	r2, r10
-	ldr	r10, [sp, #88]          @ 4-byte Reload
-	adcs	r4, r12, r3
-	umlal	r5, r1, r2, r0
-	umull	r3, r12, r2, r10
-	mov	r10, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r3, lr, r3
-	adc	r12, r12, #0
-	adds	lr, r0, r2
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mul	r0, lr, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	umull	r5, r12, r0, r7
-	umull	r3, r6, r0, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	umull	r2, r4, r0, r1
-	str	r2, [sp, #20]           @ 4-byte Spill
-	mov	r1, r4
-	mov	r2, r3
-	adds	r4, r4, r5
-	umlal	r1, r2, r0, r7
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	adcs	r3, r12, r3
-	umull	r3, r12, r0, r11
-	adcs	r11, r6, r3
-	ldr	r3, [sp, #92]           @ 4-byte Reload
-	umull	r4, r5, r0, r7
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adcs	r12, r12, r4
-	umull	r4, r6, r0, r3
-	adcs	r4, r5, r4
-	umull	r5, r3, r0, r7
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r6, r5
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	adc	r3, r3, #0
-	adds	r6, r5, lr
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r11, r1
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r12, r1
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r11, r4, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r0, [r0, #16]
-	umull	lr, r6, r0, r8
-	umull	r5, r3, r0, r10
-	umull	r8, r2, r0, r1
-	umull	r12, r4, r0, r9
-	adds	r5, r2, r5
-	umull	r1, r5, r0, r7
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	adcs	r3, r3, r1
-	umlal	r2, r1, r0, r10
-	adcs	r9, r5, r12
-	umull	r5, r3, r0, r7
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	adcs	r12, r4, r5
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	lr, r3, lr
-	umull	r5, r3, r0, r4
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r5, r6, r5
-	adc	r3, r3, #0
-	adds	r4, r0, r8
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r11, r12
-	ldr	r11, [sp, #80]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	mul	r1, r4, r11
-	adcs	r0, r0, lr
-	umull	lr, r12, r1, r7
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	umull	r2, r6, r1, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mov	r3, r2
-	umull	r8, r5, r1, r0
-	mov	r0, r5
-	adds	r5, r5, lr
-	umlal	r0, r3, r1, r7
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	adcs	r2, r12, r2
-	umull	r5, lr, r1, r7
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	umull	r2, r12, r1, r7
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	adcs	r9, r6, r2
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	adcs	r12, r12, r5
-	umull	r5, r6, r1, r7
-	adcs	lr, lr, r5
-	umull	r7, r5, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r6, r7
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	adc	r5, r5, #0
-	adds	r4, r8, r4
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	mov	r12, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r0, [r0, #20]
-	umull	lr, r8, r0, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	umull	r6, r3, r0, r12
-	umull	r4, r5, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	umull	r10, r2, r0, r1
-	adds	r6, r2, r6
-	umull	r1, r6, r0, r7
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	adcs	r3, r3, r1
-	umlal	r2, r1, r0, r12
-	ldr	r3, [sp, #100]          @ 4-byte Reload
-	adcs	r9, r6, r4
-	umull	r4, r6, r0, r3
-	adcs	r4, r5, r4
-	adcs	r3, r6, lr
-	umull	r5, r6, r0, r7
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	adcs	r5, r8, r5
-	adc	r6, r6, #0
-	adds	lr, r0, r10
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r10, r0, r2
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mul	r1, lr, r11
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	umull	r6, r12, r1, r7
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	umull	r3, r4, r1, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mov	r2, r3
-	umull	r8, r5, r1, r0
-	mov	r0, r5
-	adds	r5, r5, r6
-	umlal	r0, r2, r1, r7
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	adcs	r3, r12, r3
-	umull	r5, r6, r1, r7
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	umull	r3, r12, r1, r7
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adcs	r9, r4, r3
-	ldr	r3, [sp, #92]           @ 4-byte Reload
-	adcs	r12, r12, r5
-	umull	r4, r5, r1, r3
-	adcs	r4, r6, r4
-	umull	r6, r3, r1, r7
-	adcs	r1, r5, r6
-	adc	r3, r3, #0
-	adds	r6, r8, lr
-	adcs	r0, r0, r10
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	ldr	r12, [sp, #68]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r0, [r0, #24]
-	umull	r3, r2, r0, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r2, [sp, #60]           @ 4-byte Spill
-	str	r3, [sp, #20]           @ 4-byte Spill
-	umull	r3, lr, r0, r12
-	umull	r9, r2, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r2, [sp, #88]           @ 4-byte Spill
-	umull	r7, r8, r0, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	umull	r5, r6, r0, r1
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	umull	r2, r4, r0, r1
-	umull	r10, r1, r0, r11
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adds	r3, r1, r3
-	str	r10, [sp, #104]         @ 4-byte Spill
-	ldr	r10, [sp, #96]          @ 4-byte Reload
-	adcs	r3, lr, r2
-	umlal	r1, r2, r0, r12
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	lr, r4, r5
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	ldr	r3, [sp, #88]           @ 4-byte Reload
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	adcs	r6, r6, r7
-	adcs	r7, r8, r9
-	ldr	r8, [sp, #108]          @ 4-byte Reload
-	adcs	r5, r3, r5
-	ldr	r3, [sp, #104]          @ 4-byte Reload
-	adc	r4, r4, #0
-	adds	r9, r0, r3
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	ldr	lr, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r1, r9, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	umull	r2, r3, r1, r8
-	umull	r4, r5, r1, r0
-	adds	r2, r5, r2
-	umull	r0, r2, r1, r7
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	adcs	r3, r3, r0
-	umull	r3, r12, r1, lr
-	adcs	r6, r2, r3
-	umull	r3, r2, r1, r7
-	adcs	r12, r12, r3
-	umull	r7, r3, r1, r11
-	adcs	r2, r2, r7
-	str	r2, [sp, #80]           @ 4-byte Spill
-	umull	r7, r2, r1, r10
-	adcs	r3, r3, r7
-	mov	r7, r8
-	umlal	r5, r0, r1, r7
-	adc	r1, r2, #0
-	adds	r2, r4, r9
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	adcs	r8, r5, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	ldr	r5, [sp, #116]          @ 4-byte Reload
-	adcs	r9, r0, r2
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	adcs	r4, r6, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r4, [sp, #88]           @ 4-byte Spill
-	adcs	r6, r12, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r6, [sp, #100]          @ 4-byte Spill
-	adcs	r12, r2, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r2, r3, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r2, [sp, #104]          @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	subs	r1, r8, r1
-	sbcs	r3, r9, r7
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	sbcs	r5, r4, r5
-	sbcs	r6, r6, lr
-	sbcs	r4, r12, r7
-	sbcs	r11, r2, r11
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	sbcs	lr, r0, r10
-	sbc	r7, r2, #0
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	ands	r7, r7, #1
-	movne	r1, r8
-	movne	r3, r9
-	str	r1, [r2]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r3, [r2, #4]
-	movne	r5, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	cmp	r7, #0
-	movne	r4, r12
-	str	r5, [r2, #8]
-	movne	r6, r1
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r6, [r2, #12]
-	str	r4, [r2, #16]
-	movne	r11, r1
-	cmp	r7, #0
-	movne	lr, r0
-	str	r11, [r2, #20]
-	str	lr, [r2, #24]
-	add	sp, sp, #124
+	.pad	#68
+	sub	sp, sp, #68
+	ldr	r7, [r2, #32]
+	add	r11, r2, #16
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	ldr	r7, [r2, #36]
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	ldr	r7, [r2, #40]
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r7, [r2, #44]
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [r2, #48]
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r7, [r2, #52]
+	str	r7, [sp, #52]                   @ 4-byte Spill
+	ldr	r7, [r2, #56]
+	str	r7, [sp, #56]                   @ 4-byte Spill
+	ldr	r7, [r2, #60]
+	str	r7, [sp, #60]                   @ 4-byte Spill
+	ldr	r6, [r2]
+	ldmib	r2, {r5, r7, r9}
+	ldr	r2, [r2, #28]
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	ldm	r1, {r2, r12, lr}
+	subs	r2, r2, r6
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	ldr	r2, [r1, #60]
+	str	r2, [sp]                        @ 4-byte Spill
+	sbcs	r2, r12, r5
+	ldr	r4, [r1, #12]
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	sbcs	r2, lr, r7
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	sbcs	r2, r4, r9
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	ldm	r11, {r8, r10, r11}
+	ldr	r2, [r1, #16]
+	ldr	r4, [sp, #4]                    @ 4-byte Reload
+	sbcs	r2, r2, r8
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	ldr	r2, [r1, #20]
+	mov	r8, #0
+	ldr	r5, [sp, #44]                   @ 4-byte Reload
+	sbcs	r2, r2, r10
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	ldr	r2, [r1, #24]
+	ldr	r12, [r1, #56]
+	sbcs	r2, r2, r11
+	str	r2, [sp, #8]                    @ 4-byte Spill
+	ldr	r2, [r1, #28]
+	ldr	lr, [r1, #52]
+	sbcs	r2, r2, r4
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	ldr	r2, [r1, #32]
+	ldr	r4, [sp, #64]                   @ 4-byte Reload
+	ldr	r6, [sp, #40]                   @ 4-byte Reload
+	sbcs	r4, r2, r4
+	ldr	r2, [r1, #36]
+	str	r4, [sp, #64]                   @ 4-byte Spill
+	sbcs	r7, r2, r5
+	ldr	r2, [r1, #40]
+	ldr	r5, [sp, #36]                   @ 4-byte Reload
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	sbcs	r2, r2, r5
+	ldr	r5, [r1, #48]
+	ldr	r1, [r1, #44]
+	str	r2, [sp, #36]                   @ 4-byte Spill
+	sbcs	r1, r1, r6
+	ldr	r6, [sp, #48]                   @ 4-byte Reload
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	sbcs	r11, r5, r6
+	ldr	r5, [sp, #52]                   @ 4-byte Reload
+	ldr	r6, [sp]                        @ 4-byte Reload
+	sbcs	r10, lr, r5
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	sbcs	r9, r12, r5
+	ldr	r5, [sp, #60]                   @ 4-byte Reload
+	sbcs	r6, r6, r5
+	sbc	r5, r8, #0
+	ldr	r8, [r3]
+	str	r5, [sp, #60]                   @ 4-byte Spill
+	ldmib	r3, {r5, r12, lr}
+	adds	r8, r4, r8
+	adcs	r5, r7, r5
+	ldr	r4, [sp, #60]                   @ 4-byte Reload
+	adcs	r7, r2, r12
+	adcs	r2, r1, lr
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	str	r1, [r0]
+	add	lr, r3, #20
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	str	r1, [r0, #4]
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	str	r1, [r0, #16]
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	str	r1, [r0, #20]
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r3, [r3, #16]
+	str	r1, [r0, #24]
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	adcs	r3, r11, r3
+	str	r1, [r0, #28]
+	ldm	lr, {r1, r12, lr}
+	adcs	r1, r10, r1
+	adcs	r12, r9, r12
+	adc	lr, r6, lr
+	ands	r4, r4, #1
+	moveq	r1, r10
+	moveq	lr, r6
+	str	r1, [r0, #52]
+	moveq	r12, r9
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	cmp	r4, #0
+	moveq	r3, r11
+	str	lr, [r0, #60]
+	str	r12, [r0, #56]
+	moveq	r2, r1
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	str	r3, [r0, #48]
+	str	r2, [r0, #44]
+	moveq	r7, r1
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	cmp	r4, #0
+	str	r7, [r0, #40]
+	moveq	r5, r1
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	str	r5, [r0, #36]
+	moveq	r8, r1
+	str	r8, [r0, #32]
+	add	sp, sp, #68
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end98:
-	.size	mcl_fp_mont7L, .Lfunc_end98-mcl_fp_mont7L
+.Lfunc_end55:
+	.size	mcl_fpDbl_sub8L, .Lfunc_end55-mcl_fpDbl_sub8L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montNF7L
-	.align	2
-	.type	mcl_fp_montNF7L,%function
-mcl_fp_montNF7L:                        @ @mcl_fp_montNF7L
+                                        @ -- End function
+	.globl	mulPv384x32                     @ -- Begin function mulPv384x32
+	.p2align	2
+	.type	mulPv384x32,%function
+	.code	32                              @ @mulPv384x32
+mulPv384x32:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r11, lr}
+	ldr	r12, [r1]
+	ldmib	r1, {r3, lr}
+	umull	r7, r6, r12, r2
+	ldr	r4, [r1, #12]
+	umull	r5, r8, r3, r2
+	str	r7, [r0]
+	umull	r9, r12, r4, r2
+	adds	r7, r6, r5
+	umull	r5, r4, lr, r2
+	adcs	r7, r8, r5
+	umlal	r6, r5, r3, r2
+	adcs	r7, r4, r9
+	str	r7, [r0, #12]
+	str	r5, [r0, #8]
+	str	r6, [r0, #4]
+	ldr	r3, [r1, #16]
+	umull	r7, r6, r3, r2
+	adcs	r3, r12, r7
+	str	r3, [r0, #16]
+	ldr	r3, [r1, #20]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #20]
+	ldr	r3, [r1, #24]
+	umull	r7, r6, r3, r2
+	adcs	r3, r5, r7
+	str	r3, [r0, #24]
+	ldr	r3, [r1, #28]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #28]
+	ldr	r3, [r1, #32]
+	umull	r7, r6, r3, r2
+	adcs	r3, r5, r7
+	str	r3, [r0, #32]
+	ldr	r3, [r1, #36]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #36]
+	ldr	r3, [r1, #40]
+	umull	r7, r6, r3, r2
+	adcs	r3, r5, r7
+	str	r3, [r0, #40]
+	ldr	r1, [r1, #44]
+	umull	r3, r7, r1, r2
+	adcs	r1, r6, r3
+	str	r1, [r0, #44]
+	adc	r1, r7, #0
+	str	r1, [r0, #48]
+	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
+	mov	pc, lr
+.Lfunc_end56:
+	.size	mulPv384x32, .Lfunc_end56-mulPv384x32
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_mulUnitPre12L            @ -- Begin function mcl_fp_mulUnitPre12L
+	.p2align	2
+	.type	mcl_fp_mulUnitPre12L,%function
+	.code	32                              @ @mcl_fp_mulUnitPre12L
+mcl_fp_mulUnitPre12L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#104
-	sub	sp, sp, #104
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, r2
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldm	r0, {r4, r12}
-	ldr	r6, [r1, #4]
-	ldr	r2, [r0, #8]
-	ldr	r7, [r1]
-	ldr	r0, [r0, #12]
-	ldr	r5, [r1, #8]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #12]
-	umull	r9, r8, r6, r4
-	umull	lr, r10, r7, r4
-	str	r6, [sp, #52]           @ 4-byte Spill
-	mov	r11, r6
-	str	r7, [sp, #96]           @ 4-byte Spill
-	str	r5, [sp, #80]           @ 4-byte Spill
-	str	r2, [sp]                @ 4-byte Spill
-	adds	r6, r10, r9
-	umull	r6, r9, r5, r4
-	ldr	r5, [r1, #20]
-	adcs	r7, r8, r6
-	umlal	r10, r6, r11, r4
-	umull	r7, r8, r0, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r9, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	str	r5, [sp, #44]           @ 4-byte Spill
-	umull	r7, r9, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	adcs	r0, r8, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	umull	r7, r0, r5, r4
-	adcs	r5, r9, r7
-	ldr	r7, [r3, #4]
-	str	r5, [sp, #76]           @ 4-byte Spill
-	ldr	r5, [r1, #24]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	umull	r1, r9, r5, r4
-	str	r5, [sp, #68]           @ 4-byte Spill
-	ldr	r5, [r3]
-	adcs	r0, r0, r1
-	ldr	r1, [r3, #-4]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r9, #0
-	ldr	r9, [r3, #8]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	str	r5, [sp, #56]           @ 4-byte Spill
-	mul	r0, lr, r1
-	str	r1, [sp, #60]           @ 4-byte Spill
-	umull	r1, r2, r0, r5
-	str	r9, [sp, #100]          @ 4-byte Spill
-	adds	r1, r1, lr
-	str	r2, [sp, #20]           @ 4-byte Spill
-	umull	r1, lr, r0, r7
-	adcs	r11, r1, r10
-	umull	r5, r1, r0, r9
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [r3, #12]
-	adcs	r9, r5, r6
-	str	r1, [sp, #92]           @ 4-byte Spill
-	umull	r5, r10, r0, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r7, r5, r1
-	ldr	r1, [r3, #16]
-	str	r1, [sp, #88]           @ 4-byte Spill
-	umull	r5, r8, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r4, r5, r1
-	ldr	r1, [r3, #20]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	umull	r5, r6, r0, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r5, r5, r1
-	ldr	r1, [r3, #24]
-	umull	r3, r2, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	adc	r3, r1, #0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adds	r11, r11, r1
-	adcs	r1, r9, lr
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r1, r7, r1
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	adcs	r1, r4, r10
-	str	r1, [sp, #20]           @ 4-byte Spill
-	adcs	r1, r5, r8
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	adc	r0, r3, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #8]            @ 4-byte Spill
-	umull	r9, r0, r12, r1
-	umull	r3, r4, r12, r2
-	adds	r3, r0, r3
-	umull	r1, r3, r12, r7
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	adcs	r4, r4, r1
-	umlal	r0, r1, r12, r2
-	umull	r4, r6, r12, r5
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r3, r4
-	umull	r4, r3, r12, r5
-	adcs	r8, r6, r4
-	umull	r6, r4, r12, r7
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	adcs	r5, r3, r6
-	umull	r6, r3, r12, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r4, r4, r6
-	adc	r2, r3, #0
-	adds	r3, r9, r11
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	adcs	r6, r10, r7
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	adcs	r11, r8, r7
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	ldr	r8, [sp, #72]           @ 4-byte Reload
-	adcs	r7, r5, r7
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	str	r7, [sp, #16]           @ 4-byte Spill
-	adcs	r7, r4, r5
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	adc	r2, r2, #0
-	str	r7, [sp, #20]           @ 4-byte Spill
-	str	r2, [sp, #28]           @ 4-byte Spill
-	mul	r2, r3, r5
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	umull	r4, r7, r2, r5
-	adds	r3, r4, r3
-	str	r7, [sp, #24]           @ 4-byte Spill
-	umull	r3, r7, r2, r8
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adcs	lr, r3, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	r7, [sp, #12]           @ 4-byte Spill
-	umull	r3, r7, r2, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r12, r3, r1
-	str	r7, [sp, #8]            @ 4-byte Spill
-	umull	r3, r10, r2, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r3, r3, r6
-	umull	r6, r9, r2, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r5, r6, r11
-	ldr	r11, [sp, #76]          @ 4-byte Reload
-	umull	r6, r1, r2, r0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r6, r6, r0
-	umull	r7, r0, r2, r11
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adc	r7, r7, #0
-	adds	r4, lr, r4
-	str	r4, [sp, #28]           @ 4-byte Spill
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	adcs	r4, r12, r4
-	ldr	r12, [sp, #52]          @ 4-byte Reload
-	str	r4, [sp, #24]           @ 4-byte Spill
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	adcs	r3, r3, r4
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	str	r3, [sp, #20]           @ 4-byte Spill
-	adcs	r3, r5, r10
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	str	r3, [sp, #16]           @ 4-byte Spill
-	adcs	r3, r6, r9
-	ldr	r9, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [sp, #80]           @ 4-byte Reload
-	adc	r0, r7, r0
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp]                @ 4-byte Reload
-	umull	r2, r6, r0, r12
-	umull	r11, lr, r0, r1
-	adds	r2, lr, r2
-	umull	r1, r2, r0, r3
-	adcs	r6, r6, r1
-	umlal	lr, r1, r0, r12
-	umull	r6, r3, r0, r5
-	adcs	r5, r2, r6
-	umull	r6, r2, r0, r4
-	adcs	r10, r3, r6
-	umull	r6, r3, r0, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r4, r2, r6
-	umull	r6, r2, r0, r9
-	ldr	r9, [sp, #56]           @ 4-byte Reload
-	adcs	r3, r3, r6
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	adc	r2, r2, #0
-	adds	r7, r11, r7
-	adcs	r0, lr, r6
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adcs	r6, r5, r6
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adcs	r11, r10, r5
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	adcs	r10, r4, r5
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	ldr	r4, [sp, #92]           @ 4-byte Reload
-	adcs	r3, r3, r5
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	adc	r2, r2, #0
-	str	r2, [sp, #24]           @ 4-byte Spill
-	mul	r2, r7, r3
-	umull	r3, r5, r2, r9
-	adds	r3, r3, r7
-	str	r5, [sp, #20]           @ 4-byte Spill
-	umull	r3, r7, r2, r8
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	adcs	r8, r3, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	umull	r3, lr, r2, r7
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r3, r1
-	umull	r3, r12, r2, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	r3, r3, r6
-	umull	r6, r5, r2, r4
-	adcs	r6, r6, r11
-	umull	r4, r11, r2, r7
-	adcs	r4, r4, r10
-	umull	r7, r10, r2, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r2, r7, r0
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r7, r0, #0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adds	r0, r8, r0
-	ldr	r8, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adcs	r0, r3, lr
-	ldr	r3, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
+	.pad	#60
+	sub	sp, sp, #60
+	mov	r4, r0
+	mov	r0, sp
+	bl	mulPv384x32
+	add	r7, sp, #24
+	add	r3, sp, #36
+	ldm	sp, {r8, r9, r10, r11, r12, lr}
+	ldm	r7, {r5, r6, r7}
+	ldm	r3, {r0, r1, r2, r3}
+	str	r0, [r4, #36]
+	add	r0, r4, #24
+	str	r1, [r4, #40]
+	str	r2, [r4, #44]
+	str	r3, [r4, #48]
+	stm	r4, {r8, r9, r10, r11, r12, lr}
+	stm	r0, {r5, r6, r7}
+	add	sp, sp, #60
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end57:
+	.size	mcl_fp_mulUnitPre12L, .Lfunc_end57-mcl_fp_mulUnitPre12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fpDbl_mulPre12L             @ -- Begin function mcl_fpDbl_mulPre12L
+	.p2align	2
+	.type	mcl_fpDbl_mulPre12L,%function
+	.code	32                              @ @mcl_fpDbl_mulPre12L
+mcl_fpDbl_mulPre12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#196
+	sub	sp, sp, #196
+	mov	r6, r2
+	mov	r5, r1
+	mov	r4, r0
+	bl	mcl_fpDbl_mulPre6L
+	add	r0, r4, #48
+	add	r1, r5, #24
+	add	r2, r6, #24
+	bl	mcl_fpDbl_mulPre6L
+	add	r7, r6, #24
+	ldr	lr, [r6]
+	ldmib	r6, {r8, r9, r10, r11, r12}
+	ldm	r7, {r0, r1, r2, r3, r7}
+	adds	lr, r0, lr
+	ldr	r6, [r6, #44]
+	adcs	r0, r1, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adcs	r0, r2, r9
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	adcs	r0, r3, r10
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	adcs	r0, r7, r11
+	str	r0, [sp, #68]                   @ 4-byte Spill
 	adcs	r0, r6, r12
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adcs	r0, r4, r5
-	str	r0, [sp, #12]           @ 4-byte Spill
-	adcs	r0, r2, r11
-	str	r0, [sp, #8]            @ 4-byte Spill
-	adc	r0, r7, r10
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	ldr	r10, [sp, #44]          @ 4-byte Reload
-	str	r0, [sp, #4]            @ 4-byte Spill
-	umull	r4, r0, r6, r1
-	umull	r11, r2, r6, r3
-	adds	r4, r2, r4
-	umull	r3, r4, r6, r7
-	adcs	r0, r0, r3
-	umlal	r2, r3, r6, r1
-	umull	r0, r7, r6, r8
-	adcs	r5, r4, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	umull	r4, r1, r6, r0
-	mov	r0, r6
-	adcs	r4, r7, r4
-	umull	r7, r12, r6, r10
-	ldr	r6, [sp, #68]           @ 4-byte Reload
-	adcs	lr, r1, r7
-	umull	r7, r1, r0, r6
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r7, r12, r7
-	adc	r12, r1, #0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adds	r0, r11, r0
-	adcs	r2, r2, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r3, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r6, r5, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r11, r4, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r1, lr, r1
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r1, r7, r1
-	str	r1, [sp, #24]           @ 4-byte Spill
-	adc	r1, r12, #0
-	ldr	r12, [sp, #76]          @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	mul	r4, r0, r1
-	umull	r7, r1, r4, r9
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r1, [r5, #16]
+	adc	r0, r0, #0
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	add	r9, r5, #36
+	ldr	r1, [r5, #20]
+	ldm	r5, {r0, r8, r12}
+	ldr	r7, [r5, #24]
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [r5, #28]
 	adds	r0, r7, r0
-	umull	r0, r7, r4, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	lr, r0, r2
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	umull	r2, r0, r4, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	adcs	r2, r2, r3
-	umull	r3, r0, r4, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r3, r6
-	umull	r6, r5, r4, r1
-	adcs	r6, r6, r11
-	umull	r1, r11, r4, r7
-	umull	r7, r9, r4, r12
-	ldr	r12, [sp, #52]          @ 4-byte Reload
-	adcs	r1, r1, r0
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r4, r7, r0
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r7, r7, #0
-	adds	r0, lr, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
+	ldr	r10, [r5, #32]
+	ldr	r6, [r5, #12]
+	ldm	r9, {r2, r3, r9}
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	str	r0, [sp, #124]
+	adcs	r0, r1, r8
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	add	r1, sp, #124
+	str	r0, [sp, #128]
+	adcs	r0, r10, r12
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	str	r0, [sp, #132]
+	adcs	r0, r2, r6
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	add	r2, sp, #100
+	str	r0, [sp, #136]
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r11, [sp, #48]                  @ 4-byte Reload
 	adcs	r0, r3, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adcs	r0, r1, r5
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adcs	r0, r4, r11
-	str	r0, [sp, #12]           @ 4-byte Spill
-	adc	r0, r7, r9
-	ldr	r9, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [r9, #16]
-	umull	r11, r3, r4, r2
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	umull	r0, r1, r4, r12
-	adds	r0, r3, r0
-	umull	r5, r0, r4, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	umlal	r3, r5, r4, r12
-	umull	r1, r7, r4, r8
-	adcs	r8, r0, r1
-	umull	r1, r0, r4, r2
-	adcs	lr, r7, r1
-	umull	r7, r1, r4, r10
-	adcs	r2, r0, r7
-	umull	r7, r0, r4, r6
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r4, r11, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r3, r3, r7
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	adcs	r5, r5, r7
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	adcs	r7, r8, r7
-	adcs	r11, lr, r6
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	adcs	r10, r2, r6
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mul	r0, r4, r1
-	umull	r1, r6, r0, r2
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	adds	r1, r1, r4
-	str	r6, [sp, #24]           @ 4-byte Spill
-	ldr	r4, [sp, #84]           @ 4-byte Reload
-	umull	r1, r6, r0, r2
-	adcs	lr, r1, r3
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r6, [sp, #20]           @ 4-byte Spill
-	umull	r3, r2, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r3, r3, r5
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	umull	r5, r8, r0, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r5, r5, r7
-	umull	r7, r12, r0, r1
-	adcs	r6, r7, r11
-	ldr	r11, [sp, #76]          @ 4-byte Reload
-	umull	r7, r1, r0, r4
-	adcs	r7, r7, r10
-	umull	r4, r10, r0, r11
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	adc	r4, r4, #0
-	adds	r2, lr, r2
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r3, r2
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r11, r5, r2
-	adcs	r2, r6, r8
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	ldr	r8, [sp, #76]           @ 4-byte Reload
-	str	r2, [sp, #24]           @ 4-byte Spill
-	adcs	r2, r7, r12
-	ldr	r7, [r9, #20]
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r2, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adc	r0, r4, r10
-	str	r0, [sp, #12]           @ 4-byte Spill
-	umull	r4, r0, r7, r3
-	umull	r10, r2, r7, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adds	r4, r2, r4
-	umull	r5, r4, r7, r1
-	adcs	r0, r0, r5
-	umlal	r2, r5, r7, r3
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	umull	r0, r1, r7, r6
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adcs	lr, r4, r0
-	umull	r4, r0, r7, r6
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	adcs	r12, r1, r4
-	umull	r4, r1, r7, r6
-	adcs	r9, r0, r4
-	umull	r4, r0, r7, r3
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	adc	r0, r0, #0
-	adds	r4, r10, r3
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adcs	r2, r2, r3
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	adcs	r5, r5, r11
-	adcs	r7, lr, r3
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	adcs	r11, r12, r3
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	adcs	r9, r9, r3
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	adcs	r1, r1, r3
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	str	r0, [sp, #140]
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r8, [sp, #56]                   @ 4-byte Reload
+	adcs	r10, r9, r0
+	mov	r0, #0
+	adc	r9, r0, #0
+	add	r0, sp, #148
+	ldr	r5, [sp, #60]                   @ 4-byte Reload
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
+	ldr	r6, [sp, #76]                   @ 4-byte Reload
+	str	lr, [sp, #84]                   @ 4-byte Spill
+	str	lr, [sp, #100]
+	str	r11, [sp, #104]
+	str	r8, [sp, #108]
+	str	r5, [sp, #112]
+	str	r7, [sp, #116]
+	str	r10, [sp, #144]
+	str	r6, [sp, #120]
+	bl	mcl_fpDbl_mulPre6L
+	rsb	r0, r9, #0
+	mov	r1, r9
+	str	r9, [sp, #52]                   @ 4-byte Spill
+	and	r2, r6, r0
+	and	r9, r7, r0
+	and	r12, r5, r0
+	and	r7, r8, r0
+	and	lr, r11, r0
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	sub	r6, r1, r1, lsl #1
+	and	r6, r0, r6
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adds	r1, r6, r0
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r5, lr, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r3, r7, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r11, r12, r0
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r8, r9, r0
+	mov	r0, #0
+	adcs	r10, r2, r10
 	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mul	r0, r4, r1
-	umull	r1, r6, r0, r3
-	ldr	r3, [sp, #72]           @ 4-byte Reload
-	adds	r1, r1, r4
-	str	r6, [sp, #24]           @ 4-byte Spill
-	ldr	r4, [sp, #84]           @ 4-byte Reload
-	umull	r1, r6, r0, r3
-	ldr	r3, [sp, #100]          @ 4-byte Reload
-	adcs	r12, r1, r2
-	str	r6, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	umull	r2, r10, r0, r3
-	ldr	r3, [sp, #92]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	umull	r5, lr, r0, r3
-	ldr	r3, [sp, #88]           @ 4-byte Reload
-	adcs	r5, r5, r7
-	umull	r7, r6, r0, r3
-	adcs	r7, r7, r11
-	umull	r3, r11, r0, r4
-	adcs	r3, r3, r9
-	umull	r4, r9, r0, r8
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	adc	r4, r4, #0
-	adds	r8, r12, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	str	r1, [sp, #16]           @ 4-byte Spill
-	adcs	r1, r5, r10
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	adcs	r1, r7, lr
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	adcs	r1, r3, r6
-	adcs	r0, r0, r11
-	str	r1, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r9, r4, r9
-	ldr	r4, [r0, #24]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	umull	r6, lr, r4, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	umull	r12, r1, r4, r5
-	umull	r11, r2, r4, r0
-	mov	r0, r6
-	mov	r3, r2
-	adds	r2, r2, r12
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	umlal	r3, r0, r4, r5
-	umull	r1, r2, r4, r6
-	adcs	r5, lr, r1
-	umull	r6, r1, r4, r7
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	adcs	lr, r2, r6
-	umull	r6, r2, r4, r7
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	adcs	r12, r1, r6
-	umull	r6, r1, r4, r7
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r2, r6
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adc	r1, r1, #0
-	adds	r4, r11, r8
-	ldr	r11, [sp, #88]          @ 4-byte Reload
-	adcs	r3, r3, r6
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	adcs	r6, r0, r6
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r5, r5, r0
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r10, lr, r0
-	adcs	r7, r12, r7
-	adcs	r12, r2, r9
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	adc	lr, r1, #0
-	mul	r1, r4, r2
-	umull	r2, r8, r1, r7
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	adds	r2, r2, r4
-	umull	r2, r9, r1, r7
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	umull	r4, r0, r1, r7
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	adcs	r3, r4, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	adcs	r0, r2, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	umull	r2, r0, r1, r7
-	str	r0, [sp, #68]           @ 4-byte Spill
-	adcs	r0, r2, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	umull	r5, r0, r1, r11
-	adcs	r2, r5, r10
-	ldr	r10, [sp, #84]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	umull	r6, r0, r1, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r6, r6, r0
-	umull	r4, r0, r1, r5
-	adcs	r1, r4, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adc	r4, lr, #0
-	adds	r8, r3, r8
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	adcs	lr, r3, r9
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	adcs	r12, r2, r3
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	adcs	r3, r6, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r3, [sp, #96]           @ 4-byte Spill
-	adcs	r2, r1, r2
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adc	r9, r4, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	subs	r4, r8, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	sbcs	r6, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	sbcs	r1, lr, r1
-	sbcs	r7, r12, r7
-	sbcs	r11, r3, r11
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	sbcs	r10, r2, r10
-	sbc	r5, r9, r5
-	asr	r0, r5, #31
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	cmp	r0, #0
-	movlt	r4, r8
-	movlt	r1, lr
-	str	r4, [r3]
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	movlt	r6, r4
-	cmp	r0, #0
-	str	r6, [r3, #4]
-	str	r1, [r3, #8]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	movlt	r7, r12
-	movlt	r10, r2
-	str	r7, [r3, #12]
-	movlt	r11, r1
+	moveq	r3, r7
+	moveq	r5, lr
+	moveq	r1, r6
+	moveq	r11, r12
+	moveq	r8, r9
 	cmp	r0, #0
-	movlt	r5, r9
-	str	r11, [r3, #16]
-	str	r10, [r3, #20]
-	str	r5, [r3, #24]
-	add	sp, sp, #104
+	moveq	r10, r2
+	ldr	r2, [sp, #92]                   @ 4-byte Reload
+	ldr	r7, [sp, #172]
+	and	r12, r0, r2
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	ldr	r6, [sp, #180]
+	and	r2, r0, r2
+	adds	r0, r7, r1
+	ldr	r7, [sp, #176]
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	adcs	r0, r7, r5
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	adcs	r0, r6, r3
+	ldr	r6, [sp, #184]
+	ldr	r5, [sp, #188]
+	adcs	r11, r6, r11
+	ldr	r1, [sp, #192]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	adcs	r0, r5, r8
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	adcs	r0, r1, r10
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	adc	r0, r2, r12
+	ldm	r4, {r5, r8}
+	add	r10, r4, #12
+	ldr	r12, [sp, #148]
+	ldr	lr, [sp, #152]
+	subs	r5, r12, r5
+	ldr	r7, [r4, #8]
+	ldr	r2, [sp, #156]
+	sbcs	r12, lr, r8
+	ldm	r10, {r6, r9, r10}
+	sbcs	r2, r2, r7
+	ldr	r3, [sp, #160]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	sbcs	r3, r3, r6
+	ldr	r6, [sp, #164]
+	ldr	r0, [r4, #24]
+	sbcs	r7, r6, r9
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	ldr	r7, [sp, #168]
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	sbcs	r7, r7, r10
+	str	r7, [sp, #60]                   @ 4-byte Spill
+	ldr	r7, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [r4, #28]
+	sbcs	r0, r7, r0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	sbcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [r4, #32]
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	sbcs	r0, r1, r0
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [r4, #36]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	sbcs	r0, r11, r0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [r4, #40]
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	sbcs	r0, r1, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [r4, #44]
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	sbcs	r0, r1, r0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r9, [r4, #48]
+	sbc	r0, r0, #0
+	ldr	r10, [r4, #52]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	subs	r0, r5, r9
+	ldr	r11, [r4, #56]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	sbcs	r0, r12, r10
+	ldr	r8, [r4, #60]
+	add	r12, r4, #76
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	sbcs	r0, r2, r11
+	ldr	r5, [r4, #64]
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	sbcs	r0, r3, r8
+	sbcs	r7, r7, r5
+	str	r7, [sp]                        @ 4-byte Spill
+	ldr	lr, [r4, #68]
+	ldr	r7, [sp, #60]                   @ 4-byte Reload
+	ldr	r6, [r4, #72]
+	sbcs	r7, r7, lr
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	ldm	r12, {r2, r3, r12}
+	sbcs	r7, r7, r6
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	ldr	r7, [sp, #52]                   @ 4-byte Reload
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	sbcs	r7, r7, r2
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	ldr	r7, [sp, #48]                   @ 4-byte Reload
+	ldr	r0, [r4, #88]
+	sbcs	r7, r7, r3
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r7, [sp, #44]                   @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	sbcs	r7, r7, r12
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	ldr	r7, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [r4, #92]
+	sbcs	r0, r7, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	sbcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	sbc	r0, r0, #0
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r7, [sp, #24]                   @ 4-byte Reload
+	adds	r0, r0, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	str	r0, [r4, #24]
+	adcs	r1, r1, r7
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	str	r1, [r4, #28]
+	adcs	r0, r0, r7
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r7, [sp, #16]                   @ 4-byte Reload
+	str	r0, [r4, #32]
+	adcs	r1, r1, r7
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r7, [sp]                        @ 4-byte Reload
+	str	r1, [r4, #36]
+	adcs	r0, r0, r7
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r7, [sp, #4]                    @ 4-byte Reload
+	str	r0, [r4, #40]
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	adcs	r1, r1, r7
+	str	r1, [r4, #44]
+	adcs	r0, r9, r0
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	str	r0, [r4, #48]
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r10, r1
+	str	r1, [r4, #52]
+	adcs	r9, r11, r0
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	str	r9, [r4, #56]
+	adcs	r1, r8, r0
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r5, r5, r0
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r7, lr, r0
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r6, r6, r0
+	adcs	r2, r2, #0
+	adcs	r3, r3, #0
+	adcs	r0, r12, #0
+	add	r12, r4, #60
+	stm	r12, {r1, r5, r7}
+	str	r0, [r4, #84]
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, #0
+	str	r6, [r4, #72]
+	adc	r1, r1, #0
+	str	r2, [r4, #76]
+	str	r3, [r4, #80]
+	str	r0, [r4, #88]
+	str	r1, [r4, #92]
+	add	sp, sp, #196
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end99:
-	.size	mcl_fp_montNF7L, .Lfunc_end99-mcl_fp_montNF7L
+.Lfunc_end58:
+	.size	mcl_fpDbl_mulPre12L, .Lfunc_end58-mcl_fpDbl_mulPre12L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montRed7L
-	.align	2
-	.type	mcl_fp_montRed7L,%function
-mcl_fp_montRed7L:                       @ @mcl_fp_montRed7L
+                                        @ -- End function
+	.globl	mcl_fpDbl_sqrPre12L             @ -- Begin function mcl_fpDbl_sqrPre12L
+	.p2align	2
+	.type	mcl_fpDbl_sqrPre12L,%function
+	.code	32                              @ @mcl_fpDbl_sqrPre12L
+mcl_fpDbl_sqrPre12L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#120
-	sub	sp, sp, #120
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r1, #4]
-	ldr	r10, [r2, #-4]
-	ldr	r4, [r1]
-	ldr	r3, [r2]
-	ldr	r7, [r2, #8]
-	ldr	r5, [r2, #4]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r1, #8]
-	str	r4, [sp, #60]           @ 4-byte Spill
-	str	r7, [sp, #108]          @ 4-byte Spill
-	str	r3, [sp, #116]          @ 4-byte Spill
-	str	r5, [sp, #24]           @ 4-byte Spill
-	str	r10, [sp, #92]          @ 4-byte Spill
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r1, #12]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	mul	r0, r4, r10
-	umull	r4, r12, r0, r3
-	umull	lr, r6, r0, r7
-	str	r4, [sp, #52]           @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	str	r6, [sp, #72]           @ 4-byte Spill
-	mov	r9, lr
-	mov	r3, r12
-	umlal	r3, r9, r0, r5
-	umull	r7, r6, r0, r4
-	str	r4, [sp, #104]          @ 4-byte Spill
-	ldr	r4, [r2, #20]
-	str	r7, [sp, #68]           @ 4-byte Spill
-	str	r6, [sp, #64]           @ 4-byte Spill
-	umull	r7, r6, r0, r4
-	str	r4, [sp, #112]          @ 4-byte Spill
-	ldr	r4, [r2, #16]
-	ldr	r2, [r2, #12]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	str	r6, [sp, #48]           @ 4-byte Spill
-	str	r4, [sp, #96]           @ 4-byte Spill
-	umull	r8, r7, r0, r4
-	str	r2, [sp, #100]          @ 4-byte Spill
-	umull	r4, r6, r0, r2
-	umull	r11, r2, r0, r5
-	adds	r0, r12, r11
-	ldr	r11, [r1, #36]
-	adcs	r0, r2, lr
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	ldr	lr, [r1, #28]
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r5, r6, r8
-	ldr	r8, [sp, #108]          @ 4-byte Reload
-	ldr	r6, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adds	r0, r0, r2
-	ldr	r2, [r1, #24]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	ldr	r3, [r1, #20]
-	mul	r4, r0, r10
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	ldr	r10, [r1, #40]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	umull	r12, r1, r4, r8
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	ldr	r9, [sp, #96]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #80]           @ 4-byte Spill
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	umull	r7, r1, r4, r6
-	str	r7, [sp, #28]           @ 4-byte Spill
-	mov	r7, r12
-	adcs	r0, r3, r0
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	mov	r0, r1
-	umlal	r0, r7, r4, r5
-	adcs	r2, r2, r3
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	adcs	r2, lr, r2
-	ldr	lr, [sp, #100]          @ 4-byte Reload
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #60]           @ 4-byte Spill
-	adcs	r2, r11, #0
-	mov	r11, r5
-	str	r2, [sp, #56]           @ 4-byte Spill
-	adcs	r2, r10, #0
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	adcs	r2, r2, #0
-	str	r2, [sp, #40]           @ 4-byte Spill
-	mov	r2, #0
-	adc	r2, r2, #0
-	str	r2, [sp, #36]           @ 4-byte Spill
-	umull	r3, r2, r4, r5
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	adds	r1, r1, r3
-	adcs	r2, r2, r12
-	umull	r1, r3, r4, r9
-	umull	r2, r12, r4, lr
-	adcs	r2, r5, r2
-	adcs	r10, r12, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r12, [sp, #92]          @ 4-byte Reload
-	umull	r5, r2, r4, r1
-	adcs	r1, r3, r5
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	umull	r5, r3, r4, r1
-	adcs	r2, r2, r5
-	ldr	r5, [sp]                @ 4-byte Reload
-	str	r2, [sp, #8]            @ 4-byte Spill
-	adc	r2, r3, #0
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	adds	r4, r3, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r4, r0, r12
-	str	r0, [sp, #32]           @ 4-byte Spill
-	umull	r3, r0, r4, r8
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	umull	r0, r2, r4, r6
-	ldr	r6, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	mov	r5, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	adcs	r6, r7, r6
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	mov	r0, r2
-	str	r6, [sp, #76]           @ 4-byte Spill
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	umlal	r0, r5, r4, r11
-	adcs	r6, r7, r6
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r6, [sp, #72]           @ 4-byte Spill
-	ldr	r6, [sp, #60]           @ 4-byte Reload
-	adcs	r6, r7, r6
-	umull	r7, r8, r4, r1
-	str	r6, [sp, #68]           @ 4-byte Spill
-	ldr	r6, [sp, #56]           @ 4-byte Reload
-	adcs	r6, r6, #0
-	str	r6, [sp, #64]           @ 4-byte Spill
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	adcs	r6, r6, #0
-	str	r6, [sp, #60]           @ 4-byte Spill
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	adcs	r6, r6, #0
-	str	r6, [sp, #56]           @ 4-byte Spill
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	adcs	r6, r6, #0
-	str	r6, [sp, #52]           @ 4-byte Spill
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	adcs	r6, r6, #0
-	str	r6, [sp, #48]           @ 4-byte Spill
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	adc	r6, r6, #0
-	str	r6, [sp, #44]           @ 4-byte Spill
-	umull	r6, r10, r4, r11
-	adds	r1, r2, r6
-	adcs	r2, r10, r3
-	umull	r1, r6, r4, lr
-	ldr	lr, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r10, r2, r1
-	umull	r2, r3, r4, r9
-	adcs	r9, r6, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	umull	r6, r1, r4, r2
-	adcs	r3, r3, r6
-	adcs	r1, r1, r7
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adc	r8, r8, #0
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adds	r7, r3, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mul	r7, r0, r12
-	str	r0, [sp, #40]           @ 4-byte Spill
-	umull	r3, r0, r7, lr
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	umull	r4, r1, r7, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r4, [sp, #36]           @ 4-byte Spill
-	mov	r4, r3
+	.pad	#196
+	sub	sp, sp, #196
+	mov	r2, r1
+	mov	r5, r1
+	mov	r4, r0
+	bl	mcl_fpDbl_mulPre6L
+	add	r1, r5, #24
+	add	r0, r4, #48
+	mov	r2, r1
+	bl	mcl_fpDbl_mulPre6L
+	ldm	r5, {r0, r1, r10}
+	add	r3, r5, #32
+	add	lr, r5, #12
+	ldr	r6, [r5, #24]
+	ldr	r7, [r5, #28]
+	adds	r9, r6, r0
+	ldm	lr, {r8, r12, lr}
+	adcs	r11, r7, r1
+	ldm	r3, {r0, r1, r3}
+	adcs	r10, r0, r10
+	ldr	r2, [r5, #44]
+	adcs	r5, r1, r8
+	mov	r0, #0
+	adcs	r7, r3, r12
+	add	r1, sp, #124
+	adcs	r6, r2, lr
+	add	r2, sp, #100
+	adc	r8, r0, #0
+	add	r0, sp, #148
+	str	r9, [sp, #124]
+	str	r9, [sp, #100]
+	str	r11, [sp, #128]
+	str	r11, [sp, #104]
+	str	r10, [sp, #132]
+	str	r10, [sp, #108]
+	str	r5, [sp, #136]
+	str	r5, [sp, #112]
+	str	r7, [sp, #140]
+	str	r7, [sp, #116]
+	str	r6, [sp, #144]
+	str	r6, [sp, #120]
+	bl	mcl_fpDbl_mulPre6L
+	rsb	r0, r8, #0
+	ldr	lr, [sp, #152]
+	and	r1, r7, r0
+	and	r7, r5, r0
+	and	r2, r6, r0
+	lsr	r6, r7, #31
+	lsl	r7, r7, #1
+	lsl	r3, r2, #1
+	orr	r12, r3, r1, lsr #31
+	orr	r1, r6, r1, lsl #1
+	and	r6, r10, r0
+	and	r0, r11, r0
+	ldr	r3, [sp, #172]
+	orr	r7, r7, r6, lsr #31
+	add	r10, r4, #12
+	lsr	r5, r0, #31
+	orr	r6, r5, r6, lsl #1
+	sub	r5, r8, r8, lsl #1
+	lsl	r0, r0, #1
+	and	r5, r9, r5
+	adds	r3, r3, r5, lsl #1
+	orr	r0, r0, r5, lsr #31
+	ldr	r5, [sp, #176]
+	str	r3, [sp, #88]                   @ 4-byte Spill
 	adcs	r0, r5, r0
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	ldr	r10, [sp, #104]         @ 4-byte Reload
-	adcs	r5, r9, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	mov	r0, r1
-	str	r5, [sp, #80]           @ 4-byte Spill
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	umlal	r0, r4, r7, r11
-	adcs	r5, r6, r5
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	str	r5, [sp, #76]           @ 4-byte Spill
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	adcs	r5, r6, r5
-	str	r5, [sp, #72]           @ 4-byte Spill
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adcs	r6, r8, r5
-	ldr	r8, [sp, #100]          @ 4-byte Reload
-	str	r6, [sp, #68]           @ 4-byte Spill
-	ldr	r6, [sp, #60]           @ 4-byte Reload
-	adcs	r6, r6, #0
-	str	r6, [sp, #64]           @ 4-byte Spill
-	ldr	r6, [sp, #56]           @ 4-byte Reload
-	adcs	r6, r6, #0
-	str	r6, [sp, #60]           @ 4-byte Spill
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	adcs	r6, r6, #0
-	str	r6, [sp, #56]           @ 4-byte Spill
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	adcs	r6, r6, #0
-	str	r6, [sp, #52]           @ 4-byte Spill
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	adc	r6, r6, #0
-	str	r6, [sp, #48]           @ 4-byte Spill
-	umull	r9, r6, r7, r10
-	str	r6, [sp, #44]           @ 4-byte Spill
-	umull	r6, r5, r7, r11
-	adds	r1, r1, r6
-	umull	r6, r12, r7, r2
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r2, r5, r3
-	umull	r2, r3, r7, r8
-	adcs	r1, r1, r2
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	umull	r5, r2, r7, r1
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adcs	r3, r3, r5
-	ldr	r5, [sp, #116]          @ 4-byte Reload
-	adcs	r2, r2, r6
-	str	r3, [sp, #20]           @ 4-byte Spill
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	str	r2, [sp, #16]           @ 4-byte Spill
-	adcs	r2, r12, r9
-	ldr	r9, [sp, #92]           @ 4-byte Reload
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adc	r2, r2, #0
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	adds	r6, r7, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r6, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	umull	r7, r0, r6, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	umull	r0, r2, r6, r5
-	mov	r12, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r2
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	umlal	r4, r12, r6, r11
-	adcs	r0, r3, r0
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	umull	r3, r0, r6, r10
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	umull	lr, r0, r6, r3
-	str	r0, [sp, #20]           @ 4-byte Spill
-	umull	r10, r0, r6, r11
-	adds	r2, r2, r10
-	adcs	r0, r0, r7
-	umull	r2, r10, r6, r1
-	umull	r0, r1, r6, r8
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	adcs	r8, r6, r0
-	adcs	r0, r1, r2
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r10, r10, lr
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	lr, r0, #0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adds	r7, r2, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	mul	r4, r0, r9
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	umull	r7, r2, r4, r0
-	str	r2, [sp, #40]           @ 4-byte Spill
-	umull	r2, r0, r4, r5
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	str	r2, [sp, #44]           @ 4-byte Spill
-	mov	r6, r0
-	mov	r2, r7
-	umlal	r6, r2, r4, r11
-	adcs	r5, r12, r5
-	ldr	r12, [sp, #100]         @ 4-byte Reload
-	str	r5, [sp, #84]           @ 4-byte Spill
-	ldr	r5, [sp, #80]           @ 4-byte Reload
-	adcs	r5, r8, r5
-	ldr	r8, [sp, #104]          @ 4-byte Reload
-	str	r5, [sp, #80]           @ 4-byte Spill
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	adcs	r5, r1, r5
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r5, [sp, #76]           @ 4-byte Spill
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	adcs	r5, r10, r5
-	str	r5, [sp, #72]           @ 4-byte Spill
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, lr, r1
-	ldr	lr, [sp, #96]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, #0
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, #0
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #52]           @ 4-byte Spill
-	umull	r5, r1, r4, r8
-	str	r5, [sp, #32]           @ 4-byte Spill
-	str	r1, [sp, #36]           @ 4-byte Spill
-	umull	r5, r1, r4, r3
-	str	r5, [sp, #20]           @ 4-byte Spill
-	umull	r9, r5, r4, r11
-	str	r1, [sp, #28]           @ 4-byte Spill
-	adds	r0, r0, r9
-	umull	r3, r9, r4, lr
-	umull	r0, r1, r4, r12
-	adcs	r4, r5, r7
-	ldr	r4, [sp, #40]           @ 4-byte Reload
-	adcs	r10, r4, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r1, r3
-	adcs	r3, r9, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r9, [sp, #112]          @ 4-byte Reload
-	adcs	r7, r4, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	adc	r5, r0, #0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adds	r4, r0, r4
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r4, r6, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r6, [sp, #108]          @ 4-byte Reload
-	adcs	r2, r2, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r2, [sp, #84]           @ 4-byte Spill
-	adcs	r0, r10, r0
-	mov	r10, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	ldr	r5, [sp, #180]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	adcs	r11, r5, r6
+	ldr	r5, [sp, #184]
+	ldr	r3, [sp, #160]
+	adcs	r0, r5, r7
+	ldr	r5, [sp, #188]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	adcs	r0, r5, r1
+	ldr	r5, [sp, #192]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	adcs	r0, r5, r12
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	adc	r0, r8, r2, lsr #31
+	ldm	r4, {r6, r7, r8}
+	ldr	r12, [sp, #148]
+	ldr	r2, [sp, #156]
+	subs	r12, r12, r6
+	ldm	r10, {r5, r9, r10}
+	sbcs	lr, lr, r7
+	sbcs	r2, r2, r8
+	ldr	r6, [sp, #164]
+	sbcs	r3, r3, r5
+	ldr	r7, [sp, #168]
+	sbcs	r5, r6, r9
+	str	r5, [sp, #64]                   @ 4-byte Spill
+	sbcs	r5, r7, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [r4, #24]
+	str	r5, [sp, #60]                   @ 4-byte Spill
+	ldr	r5, [sp, #88]                   @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	sbcs	r0, r5, r0
+	ldr	r1, [r4, #28]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	sbcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [r4, #32]
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	sbcs	r0, r11, r0
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [r4, #36]
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	sbcs	r0, r1, r0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [r4, #40]
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	sbcs	r0, r1, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [r4, #44]
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	sbcs	r0, r1, r0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r9, [r4, #48]
+	sbc	r0, r0, #0
+	ldr	r10, [r4, #52]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	subs	r0, r12, r9
+	ldr	r11, [r4, #56]
+	add	r12, r4, #76
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	sbcs	r0, lr, r10
+	ldr	r8, [r4, #60]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	sbcs	r0, r2, r11
+	ldr	r5, [r4, #64]
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	sbcs	r0, r3, r8
+	sbcs	r7, r7, r5
+	str	r7, [sp]                        @ 4-byte Spill
+	ldr	lr, [r4, #68]
+	ldr	r7, [sp, #60]                   @ 4-byte Reload
+	ldr	r6, [r4, #72]
+	sbcs	r7, r7, lr
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	ldm	r12, {r2, r3, r12}
+	sbcs	r7, r7, r6
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	ldr	r7, [sp, #52]                   @ 4-byte Reload
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	sbcs	r7, r7, r2
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	ldr	r7, [sp, #48]                   @ 4-byte Reload
+	ldr	r0, [r4, #88]
+	sbcs	r7, r7, r3
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r7, [sp, #44]                   @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	sbcs	r7, r7, r12
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	ldr	r7, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [r4, #92]
+	sbcs	r0, r7, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	sbcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	sbc	r0, r0, #0
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r7, [sp, #24]                   @ 4-byte Reload
+	adds	r0, r0, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	str	r0, [r4, #24]
+	adcs	r1, r1, r7
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	str	r1, [r4, #28]
+	adcs	r0, r0, r7
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r7, [sp, #16]                   @ 4-byte Reload
+	str	r0, [r4, #32]
+	adcs	r1, r1, r7
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r7, [sp]                        @ 4-byte Reload
+	str	r1, [r4, #36]
+	adcs	r0, r0, r7
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r7, [sp, #4]                    @ 4-byte Reload
+	str	r0, [r4, #40]
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	adcs	r1, r1, r7
+	str	r1, [r4, #44]
+	adcs	r0, r9, r0
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	str	r0, [r4, #48]
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r10, r1
+	str	r1, [r4, #52]
+	adcs	r9, r11, r0
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	str	r9, [r4, #56]
+	adcs	r1, r8, r0
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r5, r5, r0
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r7, lr, r0
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r6, r6, r0
+	adcs	r2, r2, #0
+	adcs	r3, r3, #0
+	adcs	r0, r12, #0
+	add	r12, r4, #60
+	stm	r12, {r1, r5, r7}
+	str	r0, [r4, #84]
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mul	r0, r4, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	umull	r2, r7, r0, r11
-	umull	r4, r3, r0, r1
-	adds	r2, r3, r2
-	str	r4, [sp, #92]           @ 4-byte Spill
-	umull	r1, r2, r0, r6
-	adcs	r4, r7, r1
-	umlal	r3, r1, r0, r11
-	umull	r4, r5, r0, r12
-	adcs	r2, r2, r4
-	str	r2, [sp, #52]           @ 4-byte Spill
-	umull	r4, r2, r0, lr
-	adcs	r7, r5, r4
-	str	r7, [sp, #48]           @ 4-byte Spill
-	umull	r7, r4, r0, r9
-	adcs	r5, r2, r7
-	umull	r7, r2, r0, r8
-	adcs	r7, r4, r7
-	adc	r0, r2, #0
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	adds	r2, r2, r10
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	adcs	r12, r3, r2
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	adcs	lr, r1, r2
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	adcs	r10, r2, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	adcs	r4, r2, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r8, r5, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r8, [sp, #84]           @ 4-byte Spill
-	adcs	r2, r7, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	str	r2, [sp, #92]           @ 4-byte Spill
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r3, r0, #0
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	subs	r0, r12, r0
-	sbcs	r5, lr, r11
-	mov	r11, r4
-	sbcs	r6, r10, r6
-	sbcs	r7, r4, r7
-	ldr	r4, [sp, #96]           @ 4-byte Reload
-	sbcs	r4, r8, r4
-	sbcs	r8, r2, r9
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	sbcs	r9, r1, r2
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	sbc	r3, r3, #0
-	ands	r3, r3, #1
-	movne	r0, r12
-	movne	r5, lr
-	movne	r6, r10
-	cmp	r3, #0
-	str	r0, [r2]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	movne	r7, r11
-	str	r5, [r2, #4]
-	str	r6, [r2, #8]
-	str	r7, [r2, #12]
-	movne	r4, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r4, [r2, #16]
-	movne	r8, r0
-	cmp	r3, #0
-	movne	r9, r1
-	str	r8, [r2, #20]
-	str	r9, [r2, #24]
-	add	sp, sp, #120
+	str	r6, [r4, #72]
+	adc	r1, r1, #0
+	str	r2, [r4, #76]
+	str	r3, [r4, #80]
+	str	r0, [r4, #88]
+	str	r1, [r4, #92]
+	add	sp, sp, #196
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end100:
-	.size	mcl_fp_montRed7L, .Lfunc_end100-mcl_fp_montRed7L
+.Lfunc_end59:
+	.size	mcl_fpDbl_sqrPre12L, .Lfunc_end59-mcl_fpDbl_sqrPre12L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_addPre7L
-	.align	2
-	.type	mcl_fp_addPre7L,%function
-mcl_fp_addPre7L:                        @ @mcl_fp_addPre7L
+                                        @ -- End function
+	.globl	mcl_fp_mont12L                  @ -- Begin function mcl_fp_mont12L
+	.p2align	2
+	.type	mcl_fp_mont12L,%function
+	.code	32                              @ @mcl_fp_mont12L
+mcl_fp_mont12L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#8
-	sub	sp, sp, #8
-	ldr	r3, [r1, #4]
-	ldr	r9, [r1]
-	ldr	r7, [r2]
-	ldr	lr, [r1, #8]
-	ldr	r10, [r1, #12]
-	ldr	r11, [r1, #16]
-	ldr	r8, [r1, #24]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [r1, #20]
-	adds	r7, r7, r9
-	str	r3, [sp]                @ 4-byte Spill
-	ldmib	r2, {r1, r3, r4, r5, r12}
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	ldr	r2, [r2, #24]
-	str	r7, [r0]
-	adcs	r1, r1, r6
-	ldr	r6, [sp]                @ 4-byte Reload
-	adcs	r3, r3, lr
-	adcs	r4, r4, r10
-	adcs	r5, r5, r11
-	adcs	r6, r12, r6
-	adcs	r2, r2, r8
-	stmib	r0, {r1, r3, r4, r5, r6}
-	str	r2, [r0, #24]
+	.pad	#428
+	sub	sp, sp, #428
+	.pad	#1024
+	sub	sp, sp, #1024
+	mov	r7, r2
+	ldr	r2, [r2]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	add	r0, sp, #1392
+	ldr	r5, [r3, #-4]
+	mov	r4, r3
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	mov	r6, r7
+	str	r5, [sp, #88]                   @ 4-byte Spill
+	str	r3, [sp, #100]                  @ 4-byte Spill
+	str	r7, [sp, #92]                   @ 4-byte Spill
+	bl	mulPv384x32
+	ldr	r0, [sp, #1396]
+	add	lr, sp, #1024
+	ldr	r7, [sp, #1392]
+	mov	r1, r4
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #1400]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mul	r2, r5, r7
+	ldr	r0, [sp, #1404]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #1440]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #1436]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #1432]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #1428]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #1424]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1420]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1416]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1412]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1408]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	add	r0, lr, #312
+	bl	mulPv384x32
+	ldr	r0, [sp, #1384]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1380]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1376]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1372]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1368]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1364]
+	ldr	r2, [r6, #4]
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #1280
+	ldr	r4, [sp, #1360]
+	ldr	r6, [sp, #1356]
+	ldr	r8, [sp, #1352]
+	ldr	r11, [sp, #1336]
+	ldr	r5, [sp, #1340]
+	ldr	r9, [sp, #1344]
+	ldr	r10, [sp, #1348]
+	bl	mulPv384x32
+	adds	r0, r11, r7
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r11, [sp, #64]                  @ 4-byte Reload
+	adcs	r0, r9, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r10, r0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	adcs	r1, r8, r1
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	mov	r0, #0
+	adcs	r1, r6, r1
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r4, r1
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #32]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	add	sp, sp, #8
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end101:
-	.size	mcl_fp_addPre7L, .Lfunc_end101-mcl_fp_addPre7L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre7L
-	.align	2
-	.type	mcl_fp_subPre7L,%function
-mcl_fp_subPre7L:                        @ @mcl_fp_subPre7L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#8
-	sub	sp, sp, #8
-	ldr	r3, [r2, #4]
-	ldr	r9, [r2]
-	ldr	r7, [r1]
-	ldr	lr, [r2, #8]
-	ldr	r10, [r2, #12]
-	ldr	r11, [r2, #16]
-	ldr	r8, [r2, #24]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [r2, #20]
-	subs	r7, r7, r9
-	str	r3, [sp]                @ 4-byte Spill
-	ldmib	r1, {r2, r3, r4, r5, r12}
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	ldr	r1, [r1, #24]
-	str	r7, [r0]
-	sbcs	r2, r2, r6
-	ldr	r6, [sp]                @ 4-byte Reload
-	sbcs	r3, r3, lr
-	sbcs	r4, r4, r10
-	sbcs	r5, r5, r11
-	sbcs	r6, r12, r6
-	sbcs	r1, r1, r8
-	stmib	r0, {r2, r3, r4, r5, r6}
-	str	r1, [r0, #24]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r4, [sp, #1280]
+	ldr	r0, [sp, #1284]
+	adds	r11, r11, r4
+	ldr	r4, [sp, #60]                   @ 4-byte Reload
+	ldr	r10, [sp, #1328]
+	adcs	r0, r4, r0
+	ldr	r9, [sp, #1324]
+	ldr	r8, [sp, #1320]
+	mov	r4, r11
+	ldr	r7, [sp, #1316]
+	ldr	r6, [sp, #1312]
+	ldr	r5, [sp, #1308]
+	ldr	lr, [sp, #1304]
+	ldr	r12, [sp, #1300]
+	ldr	r3, [sp, #1296]
+	ldr	r1, [sp, #1288]
+	ldr	r2, [sp, #1292]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	add	lr, sp, #1024
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #36]                   @ 4-byte Spill
 	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	add	sp, sp, #8
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end102:
-	.size	mcl_fp_subPre7L, .Lfunc_end102-mcl_fp_subPre7L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_7L
-	.align	2
-	.type	mcl_fp_shr1_7L,%function
-mcl_fp_shr1_7L:                         @ @mcl_fp_shr1_7L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r11, lr}
-	push	{r4, r5, r6, r7, r11, lr}
-	ldr	r3, [r1, #4]
-	ldr	r12, [r1]
-	ldr	lr, [r1, #12]
-	ldr	r2, [r1, #8]
-	ldr	r5, [r1, #20]
-	ldr	r4, [r1, #16]
-	ldr	r1, [r1, #24]
-	lsrs	r6, r3, #1
-	lsr	r3, r3, #1
-	rrx	r12, r12
-	lsrs	r6, lr, #1
-	orr	r7, r3, r2, lsl #31
-	lsr	r6, lr, #1
-	rrx	r2, r2
-	lsrs	r3, r5, #1
-	lsr	r5, r5, #1
-	str	r12, [r0]
-	str	r7, [r0, #4]
-	orr	r5, r5, r1, lsl #31
-	orr	r6, r6, r4, lsl #31
-	rrx	r3, r4
-	lsr	r1, r1, #1
-	str	r2, [r0, #8]
-	str	r6, [r0, #12]
-	str	r3, [r0, #16]
-	str	r5, [r0, #20]
-	str	r1, [r0, #24]
-	pop	{r4, r5, r6, r7, r11, lr}
-	mov	pc, lr
-.Lfunc_end103:
-	.size	mcl_fp_shr1_7L, .Lfunc_end103-mcl_fp_shr1_7L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add7L
-	.align	2
-	.type	mcl_fp_add7L,%function
-mcl_fp_add7L:                           @ @mcl_fp_add7L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#12
-	sub	sp, sp, #12
-	ldr	r7, [r1, #8]
-	ldr	r10, [r1]
-	ldr	r9, [r1, #4]
-	ldr	r11, [r1, #16]
-	ldr	r8, [r1, #24]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r1, #12]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r1, #20]
-	ldm	r2, {r1, r4, r5, r6, r12, lr}
-	ldr	r2, [r2, #24]
-	adds	r10, r1, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r4, r4, r9
-	str	r10, [r0]
-	adcs	r5, r5, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r6, r6, r1
-	mov	r1, #0
-	adcs	r9, r12, r11
-	adcs	r7, lr, r7
-	stmib	r0, {r4, r5, r6, r9}
-	adcs	r2, r2, r8
-	str	r7, [r0, #20]
-	adc	r1, r1, #0
-	str	r2, [r0, #24]
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [r3]
-	str	r1, [sp]                @ 4-byte Spill
-	ldmib	r3, {r12, lr}
-	ldr	r1, [r3, #20]
-	ldr	r8, [r3, #12]
-	ldr	r11, [r3, #16]
-	ldr	r3, [r3, #24]
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [sp]                @ 4-byte Reload
-	subs	r10, r10, r1
-	sbcs	r1, r4, r12
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	sbcs	r5, r5, lr
-	sbcs	r12, r6, r8
-	str	r5, [sp]                @ 4-byte Spill
-	sbcs	lr, r9, r11
-	sbcs	r4, r7, r4
-	sbcs	r5, r2, r3
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	sbc	r2, r2, #0
-	tst	r2, #1
-	bne	.LBB104_2
-@ BB#1:                                 @ %nocarry
-	str	r10, [r0]
-	str	r1, [r0, #4]
-	ldr	r1, [sp]                @ 4-byte Reload
-	add	r2, r0, #8
-	stm	r2, {r1, r12, lr}
-	str	r4, [r0, #20]
-	str	r5, [r0, #24]
-.LBB104_2:                              @ %carry
-	add	sp, sp, #12
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end104:
-	.size	mcl_fp_add7L, .Lfunc_end104-mcl_fp_add7L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF7L
-	.align	2
-	.type	mcl_fp_addNF7L,%function
-mcl_fp_addNF7L:                         @ @mcl_fp_addNF7L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#20
-	sub	sp, sp, #20
-	ldm	r1, {r6, r7}
-	ldr	r11, [r1, #16]
-	ldr	r9, [r1, #20]
-	ldr	r8, [r1, #24]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r1, #8]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [r1, #12]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldm	r2, {r1, r4, r5, r10, r12, lr}
-	ldr	r2, [r2, #24]
-	adds	r7, r1, r6
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r7, [sp, #4]            @ 4-byte Spill
-	adcs	r6, r4, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r6, [sp, #16]           @ 4-byte Spill
-	adcs	r5, r5, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r4, r10, r1
-	ldr	r10, [r3, #8]
-	adcs	r12, r12, r11
-	ldr	r11, [r3, #16]
-	adcs	lr, lr, r9
-	ldr	r9, [r3, #20]
-	adc	r1, r2, r8
-	ldr	r2, [r3]
-	ldr	r8, [r3, #12]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r3, #4]
-	ldr	r3, [r3, #24]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [sp]                @ 4-byte Reload
-	subs	r2, r7, r2
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	sbcs	r7, r6, r7
-	sbcs	r6, r5, r10
-	mov	r10, r12
-	sbcs	r8, r4, r8
-	sbcs	r11, r12, r11
-	sbcs	r12, lr, r9
-	ldr	r9, [sp, #4]            @ 4-byte Reload
-	sbc	r3, r1, r3
-	asr	r1, r3, #31
-	cmp	r1, #0
-	movlt	r2, r9
-	movlt	r6, r5
-	str	r2, [r0]
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	movlt	r7, r2
-	cmp	r1, #0
-	movlt	r8, r4
-	movlt	r11, r10
-	movlt	r12, lr
-	cmp	r1, #0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r7, [r0, #4]
-	str	r6, [r0, #8]
-	str	r8, [r0, #12]
-	str	r11, [r0, #16]
-	str	r12, [r0, #20]
-	movlt	r3, r1
-	str	r3, [r0, #24]
-	add	sp, sp, #20
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end105:
-	.size	mcl_fp_addNF7L, .Lfunc_end105-mcl_fp_addNF7L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub7L
-	.align	2
-	.type	mcl_fp_sub7L,%function
-mcl_fp_sub7L:                           @ @mcl_fp_sub7L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#12
-	sub	sp, sp, #12
-	ldr	r7, [r2, #8]
-	ldr	r11, [r2]
-	ldr	r9, [r2, #4]
-	ldr	r8, [r2, #20]
-	ldr	r10, [r2, #24]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r2, #12]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r2, #16]
-	str	r7, [sp]                @ 4-byte Spill
-	ldm	r1, {r2, r4, r5, r6, r7, lr}
-	ldr	r1, [r1, #24]
-	subs	r12, r2, r11
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	sbcs	r9, r4, r9
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	str	r12, [r0]
-	str	r9, [r0, #4]
-	sbcs	r2, r5, r2
-	sbcs	r11, r6, r4
-	ldr	r4, [sp]                @ 4-byte Reload
-	str	r2, [r0, #8]
-	str	r11, [r0, #12]
-	sbcs	r4, r7, r4
-	sbcs	r5, lr, r8
-	sbcs	r6, r1, r10
-	add	r1, r0, #16
-	stm	r1, {r4, r5, r6}
-	mov	r1, #0
-	sbc	r1, r1, #0
-	tst	r1, #1
-	beq	.LBB106_2
-@ BB#1:                                 @ %carry
-	ldr	r1, [r3]
-	ldr	r7, [r3, #4]
-	ldr	lr, [r3, #12]
-	ldr	r8, [r3, #16]
-	ldr	r10, [r3, #20]
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [r3, #8]
-	ldr	r3, [r3, #24]
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adds	r1, r1, r12
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r7, r7, r9
-	adcs	r2, r1, r2
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r12, lr, r11
-	adcs	r4, r8, r4
-	adcs	r5, r10, r5
-	adc	r3, r3, r6
-	stm	r0, {r1, r7}
-	str	r2, [r0, #8]
-	str	r12, [r0, #12]
-	str	r4, [r0, #16]
-	str	r5, [r0, #20]
-	str	r3, [r0, #24]
-.LBB106_2:                              @ %nocarry
-	add	sp, sp, #12
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end106:
-	.size	mcl_fp_sub7L, .Lfunc_end106-mcl_fp_sub7L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF7L
-	.align	2
-	.type	mcl_fp_subNF7L,%function
-mcl_fp_subNF7L:                         @ @mcl_fp_subNF7L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	ldm	r2, {r5, lr}
-	ldr	r7, [r2, #8]
-	ldr	r11, [r2, #16]
-	ldr	r10, [r2, #24]
-	add	r9, r1, #12
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [r2, #12]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldm	r1, {r2, r4, r12}
-	ldm	r9, {r6, r8, r9}
-	ldr	r7, [r1, #24]
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	subs	r5, r2, r5
-	sbcs	lr, r4, lr
-	sbcs	r4, r12, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	lr, [sp]                @ 4-byte Spill
-	sbcs	r12, r6, r1
-	ldr	r6, [r3, #4]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	sbcs	r2, r8, r11
-	ldr	r8, [r3, #12]
-	ldr	r11, [r3, #16]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	str	r6, [sp, #4]            @ 4-byte Spill
-	ldr	r6, [r3, #20]
-	sbcs	r1, r9, r1
-	sbc	r9, r7, r10
-	ldr	r7, [r3]
-	ldr	r10, [r3, #8]
-	ldr	r3, [r3, #24]
-	str	r6, [sp, #8]            @ 4-byte Spill
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	adds	r7, r5, r7
-	adcs	r6, lr, r6
-	adcs	lr, r4, r10
-	mov	r10, r1
-	adcs	r8, r12, r8
-	adcs	r11, r2, r11
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r2, r1, r2
-	asr	r1, r9, #31
-	adc	r3, r9, r3
-	cmp	r1, #0
-	movge	r7, r5
-	ldr	r5, [sp]                @ 4-byte Reload
-	movge	lr, r4
-	str	r7, [r0]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	movge	r6, r5
-	cmp	r1, #0
-	movge	r8, r12
-	movge	r11, r7
-	movge	r2, r10
-	cmp	r1, #0
-	str	r6, [r0, #4]
-	str	lr, [r0, #8]
-	movge	r3, r9
-	str	r8, [r0, #12]
-	str	r11, [r0, #16]
-	str	r2, [r0, #20]
-	str	r3, [r0, #24]
-	add	sp, sp, #16
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end107:
-	.size	mcl_fp_subNF7L, .Lfunc_end107-mcl_fp_subNF7L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add7L
-	.align	2
-	.type	mcl_fpDbl_add7L,%function
-mcl_fpDbl_add7L:                        @ @mcl_fpDbl_add7L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#48
-	sub	sp, sp, #48
-	ldm	r1, {r12, lr}
-	ldr	r8, [r1, #8]
-	ldr	r10, [r1, #12]
-	ldmib	r2, {r6, r7}
-	ldr	r4, [r2, #16]
-	ldr	r11, [r2]
-	ldr	r5, [r2, #12]
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [r2, #20]
-	adds	r9, r11, r12
-	ldr	r11, [r1, #44]
-	str	r4, [sp, #12]           @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	str	r4, [sp, #20]           @ 4-byte Spill
-	ldr	r4, [r2, #28]
-	str	r4, [sp, #40]           @ 4-byte Spill
-	ldr	r4, [r2, #32]
-	str	r4, [sp, #16]           @ 4-byte Spill
-	ldr	r4, [r2, #36]
-	str	r4, [sp, #24]           @ 4-byte Spill
-	ldr	r4, [r2, #40]
-	str	r4, [sp, #28]           @ 4-byte Spill
-	ldr	r4, [r2, #44]
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [r2, #48]
-	ldr	r2, [r2, #52]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #36]
-	str	r4, [sp, #36]           @ 4-byte Spill
-	adcs	r4, r6, lr
-	add	lr, r1, #16
-	adcs	r7, r7, r8
-	ldr	r8, [r1, #52]
-	adcs	r6, r5, r10
-	ldr	r5, [r1, #32]
-	ldr	r10, [r1, #48]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #40]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	str	r9, [r0]
-	stmib	r0, {r4, r7}
-	str	r6, [r0, #12]
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	ldr	r7, [sp]                @ 4-byte Reload
-	adcs	r1, r4, r1
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r4, r2
-	str	r2, [r0, #20]
-	adcs	r1, r1, r12
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r2, lr
-	str	r2, [sp, #20]           @ 4-byte Spill
-	adcs	r2, r1, r5
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r2, [sp, #16]           @ 4-byte Spill
-	adcs	r5, r1, r7
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	adcs	r12, r1, r7
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	mov	r7, #0
-	str	r12, [sp, #40]          @ 4-byte Spill
-	adcs	lr, r1, r11
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r4, r1, r10
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r9, r1, r8
-	adc	r1, r7, #0
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldm	r3, {r1, r7, r11}
-	ldr	r10, [r3, #12]
-	ldr	r8, [r3, #16]
-	ldr	r6, [r3, #20]
-	ldr	r3, [r3, #24]
-	str	r3, [sp, #36]           @ 4-byte Spill
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	subs	r1, r3, r1
-	sbcs	r7, r2, r7
-	sbcs	r2, r5, r11
-	mov	r11, lr
-	sbcs	r10, r12, r10
-	sbcs	r12, lr, r8
-	sbcs	lr, r4, r6
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	sbcs	r8, r9, r6
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	sbc	r6, r6, #0
-	ands	r6, r6, #1
-	movne	r1, r3
-	movne	r2, r5
-	str	r1, [r0, #28]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	movne	r7, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	cmp	r6, #0
-	movne	r12, r11
-	movne	lr, r4
-	str	r7, [r0, #32]
-	str	r2, [r0, #36]
-	movne	r10, r1
-	cmp	r6, #0
-	movne	r8, r9
-	str	r10, [r0, #40]
-	str	r12, [r0, #44]
-	str	lr, [r0, #48]
-	str	r8, [r0, #52]
-	add	sp, sp, #48
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end108:
-	.size	mcl_fpDbl_add7L, .Lfunc_end108-mcl_fpDbl_add7L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub7L
-	.align	2
-	.type	mcl_fpDbl_sub7L,%function
-mcl_fpDbl_sub7L:                        @ @mcl_fpDbl_sub7L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#68
-	sub	sp, sp, #68
-	ldr	r7, [r2, #32]
-	add	r8, r1, #16
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #64]           @ 4-byte Spill
-	ldm	r2, {r4, r7}
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #8]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #12]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [r2, #16]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	ldr	r2, [r2, #20]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldmib	r1, {r2, r12, lr}
-	ldm	r8, {r5, r6, r8}
-	ldr	r7, [r1, #28]
-	ldr	r11, [r1]
-	ldr	r9, [r1, #32]
-	ldr	r10, [r1, #44]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r1, #36]
-	subs	r4, r11, r4
-	str	r4, [r0]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r1, #40]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r1, #48]
-	ldr	r1, [r1, #52]
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	sbcs	r2, r2, r7
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	sbcs	r12, r12, r7
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	stmib	r0, {r2, r12}
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	sbcs	lr, lr, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	lr, [r0, #12]
-	sbcs	r2, r5, r2
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	sbcs	r2, r6, r2
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, r8, r2
-	mov	r8, #0
-	str	r2, [r0, #24]
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	sbcs	lr, r7, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	sbcs	r4, r9, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	ldr	r9, [r3, #20]
-	str	r4, [sp, #44]           @ 4-byte Spill
-	sbcs	r7, r7, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	sbcs	r12, r6, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	ldr	r6, [sp]                @ 4-byte Reload
-	str	r12, [sp, #52]          @ 4-byte Spill
-	sbcs	r11, r10, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	ldr	r10, [r3, #12]
-	sbcs	r6, r6, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	sbcs	r5, r1, r2
-	ldr	r2, [r3, #8]
-	sbc	r1, r8, #0
-	ldr	r8, [r3, #4]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [r3]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adds	r1, lr, r1
-	adcs	r4, r4, r8
-	adcs	r2, r7, r2
-	adcs	r10, r12, r10
-	adcs	r12, r11, r3
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	adcs	r8, r6, r9
-	adc	r9, r5, r3
-	ldr	r3, [sp, #64]           @ 4-byte Reload
-	ands	r3, r3, #1
-	moveq	r1, lr
-	moveq	r2, r7
-	str	r1, [r0, #28]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	moveq	r4, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	cmp	r3, #0
-	moveq	r12, r11
-	moveq	r8, r6
-	str	r4, [r0, #32]
-	str	r2, [r0, #36]
-	moveq	r10, r1
-	cmp	r3, #0
-	moveq	r9, r5
-	str	r10, [r0, #40]
-	str	r12, [r0, #44]
-	str	r8, [r0, #48]
-	str	r9, [r0, #52]
-	add	sp, sp, #68
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end109:
-	.size	mcl_fpDbl_sub7L, .Lfunc_end109-mcl_fpDbl_sub7L
-	.cantunwind
-	.fnend
-
-	.align	2
-	.type	.LmulPv256x32,%function
-.LmulPv256x32:                          @ @mulPv256x32
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r9, [r1, #12]
-	umull	r4, r8, lr, r2
-	umull	lr, r6, r12, r2
-	mov	r5, r4
-	mov	r7, r6
-	str	lr, [r0]
-	umull	lr, r12, r9, r2
-	umlal	r7, r5, r3, r2
-	str	r5, [r0, #8]
-	str	r7, [r0, #4]
-	umull	r5, r7, r3, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r3, r8, lr
-	str	r3, [r0, #12]
-	ldr	r3, [r1, #16]
-	umull	r7, r6, r3, r2
-	adcs	r3, r12, r7
-	str	r3, [r0, #16]
-	ldr	r3, [r1, #20]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #20]
-	ldr	r3, [r1, #24]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #24]
-	ldr	r1, [r1, #28]
-	umull	r3, r7, r1, r2
-	adcs	r1, r6, r3
-	str	r1, [r0, #28]
-	adc	r1, r7, #0
-	str	r1, [r0, #32]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end110:
-	.size	.LmulPv256x32, .Lfunc_end110-.LmulPv256x32
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre8L
-	.align	2
-	.type	mcl_fp_mulUnitPre8L,%function
-mcl_fp_mulUnitPre8L:                    @ @mcl_fp_mulUnitPre8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r11, lr}
-	push	{r4, r5, r6, r7, r11, lr}
-	.pad	#40
-	sub	sp, sp, #40
-	mov	r4, r0
-	mov	r0, sp
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #32]
-	add	lr, sp, #16
-	ldr	r12, [sp, #28]
-	ldm	lr, {r1, r3, lr}
-	ldm	sp, {r2, r5, r6, r7}
-	str	r0, [r4, #32]
-	add	r0, r4, #16
-	stm	r4, {r2, r5, r6, r7}
-	stm	r0, {r1, r3, lr}
-	str	r12, [r4, #28]
-	add	sp, sp, #40
-	pop	{r4, r5, r6, r7, r11, lr}
-	mov	pc, lr
-.Lfunc_end111:
-	.size	mcl_fp_mulUnitPre8L, .Lfunc_end111-mcl_fp_mulUnitPre8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre8L
-	.align	2
-	.type	mcl_fpDbl_mulPre8L,%function
-mcl_fpDbl_mulPre8L:                     @ @mcl_fpDbl_mulPre8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#124
-	sub	sp, sp, #124
-	mov	r6, r2
-	mov	r5, r1
-	mov	r4, r0
-	bl	mcl_fpDbl_mulPre4L(PLT)
-	add	r0, r4, #32
-	add	r1, r5, #16
-	add	r2, r6, #16
-	bl	mcl_fpDbl_mulPre4L(PLT)
-	ldm	r6, {r12, lr}
-	ldr	r7, [r6, #16]
-	ldr	r9, [r6, #8]
-	ldr	r3, [r6, #12]
-	add	r6, r6, #20
-	mov	r8, #0
-	ldm	r6, {r0, r1, r6}
-	adds	r2, r12, r7
-	adcs	r0, lr, r0
-	str	r2, [sp, #56]           @ 4-byte Spill
-	adcs	r1, r9, r1
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r9, [r5]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	adcs	r1, r3, r6
-	str	r1, [sp, #48]           @ 4-byte Spill
-	adc	r6, r8, #0
-	ldmib	r5, {r8, r10, r12}
-	ldr	r7, [r5, #16]
-	ldr	r3, [r5, #20]
-	ldr	lr, [r5, #24]
-	ldr	r11, [r5, #28]
-	str	r2, [sp, #60]
-	str	r0, [sp, #64]
-	mov	r0, #0
-	add	r2, sp, #60
-	adds	r5, r9, r7
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	adcs	r8, r8, r3
-	str	r5, [sp, #76]
-	adcs	r10, r10, lr
-	str	r8, [sp, #80]
-	adcs	r9, r12, r11
-	str	r10, [sp, #84]
-	str	r7, [sp, #68]
-	str	r1, [sp, #72]
-	adc	r11, r0, #0
-	add	r0, sp, #92
-	add	r1, sp, #76
-	str	r9, [sp, #88]
-	bl	mcl_fpDbl_mulPre4L(PLT)
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	cmp	r6, #0
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	and	r12, r6, r11
-	ldr	lr, [sp, #120]
-	moveq	r5, r6
-	moveq	r9, r6
-	moveq	r10, r6
-	moveq	r8, r6
-	ldr	r6, [sp, #116]
-	adds	r0, r5, r0
-	adcs	r1, r8, r1
-	adcs	r2, r10, r7
-	mov	r7, #0
-	adcs	r3, r9, r3
-	adc	r7, r7, #0
-	cmp	r11, #0
-	moveq	r0, r5
-	ldr	r5, [sp, #108]
-	moveq	r2, r10
-	moveq	r3, r9
-	moveq	r7, r11
-	moveq	r1, r8
-	adds	r8, r0, r5
-	ldr	r5, [sp, #112]
-	adcs	r10, r1, r5
-	adcs	r9, r2, r6
-	ldr	r6, [r4]
-	ldmib	r4, {r5, r11}
-	ldr	r2, [sp, #92]
-	adcs	lr, r3, lr
-	add	r3, sp, #96
-	adc	r12, r7, r12
-	ldr	r7, [r4, #12]
-	ldm	r3, {r0, r1, r3}
-	subs	r2, r2, r6
-	str	r2, [sp, #52]           @ 4-byte Spill
-	sbcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	sbcs	r0, r1, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	sbcs	r0, r3, r7
-	ldr	r7, [r4, #20]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r4, #16]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	sbcs	r0, r8, r0
-	ldr	r8, [r4, #28]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	sbcs	r0, r10, r7
-	ldr	r10, [r4, #24]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	sbcs	r0, r9, r10
-	str	r0, [sp, #20]           @ 4-byte Spill
-	sbcs	r0, lr, r8
-	add	lr, r4, #32
-	str	r0, [sp, #16]           @ 4-byte Spill
-	sbc	r0, r12, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	lr, {r5, r9, lr}
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	ldr	r12, [r4, #44]
-	ldr	r2, [r4, #48]
-	ldr	r0, [r4, #52]
-	ldr	r1, [r4, #56]
-	ldr	r3, [r4, #60]
-	subs	r6, r6, r5
-	str	r1, [sp, #36]           @ 4-byte Spill
-	str	r3, [sp, #32]           @ 4-byte Spill
-	str	r6, [sp]                @ 4-byte Spill
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	sbcs	r11, r6, r9
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	sbcs	r6, r6, lr
-	str	r6, [sp, #4]            @ 4-byte Spill
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	sbcs	r6, r6, r12
-	str	r6, [sp, #8]            @ 4-byte Spill
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	sbcs	r6, r6, r2
-	str	r6, [sp, #28]           @ 4-byte Spill
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	sbcs	r6, r6, r0
-	str	r6, [sp, #40]           @ 4-byte Spill
-	mov	r6, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp]                @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	sbcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adds	r3, r0, r1
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r7, r7, r11
-	str	r3, [r4, #16]
-	str	r7, [r4, #20]
-	adcs	r3, r10, r0
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	str	r3, [r4, #24]
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r8, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	str	r1, [r4, #28]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [r4, #32]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r9, r1
-	str	r1, [r4, #36]
-	adcs	r0, lr, r0
-	str	r0, [r4, #40]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	add	r12, r4, #48
-	str	r0, [r4, #44]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r6, #0
-	adcs	r2, r2, #0
-	adc	r3, r3, #0
-	stm	r12, {r0, r1, r2, r3}
-	add	sp, sp, #124
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end112:
-	.size	mcl_fpDbl_mulPre8L, .Lfunc_end112-mcl_fpDbl_mulPre8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre8L
-	.align	2
-	.type	mcl_fpDbl_sqrPre8L,%function
-mcl_fpDbl_sqrPre8L:                     @ @mcl_fpDbl_sqrPre8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#124
-	sub	sp, sp, #124
-	mov	r5, r1
-	mov	r4, r0
-	mov	r2, r5
-	bl	mcl_fpDbl_mulPre4L(PLT)
-	add	r1, r5, #16
-	add	r0, r4, #32
-	mov	r2, r1
-	bl	mcl_fpDbl_mulPre4L(PLT)
-	ldm	r5, {r0, r8, lr}
-	ldr	r3, [r5, #16]
-	ldr	r2, [r5, #20]
-	ldr	r6, [r5, #24]
-	ldr	r12, [r5, #12]
-	ldr	r1, [r5, #28]
-	adds	r9, r0, r3
-	add	r0, sp, #64
-	adcs	r5, r8, r2
-	str	r9, [sp, #76]
-	str	r9, [sp, #60]
-	add	r2, sp, #60
-	adcs	r6, lr, r6
-	str	r5, [sp, #80]
-	adcs	r7, r12, r1
-	str	r6, [sp, #84]
-	add	r1, sp, #76
-	str	r7, [sp, #88]
-	stm	r0, {r5, r6, r7}
-	mov	r0, #0
-	adc	r8, r0, #0
-	add	r0, sp, #92
-	bl	mcl_fpDbl_mulPre4L(PLT)
-	adds	r12, r9, r9
-	adcs	lr, r5, r5
-	adcs	r9, r6, r6
-	add	r6, sp, #112
-	ldm	r6, {r0, r5, r6}
-	ldr	r1, [sp, #108]
-	adc	r10, r7, r7
-	adds	r2, r1, r12
-	adcs	r3, r0, lr
-	adcs	r12, r5, r9
-	adcs	lr, r6, r10
-	adc	r7, r8, r7, lsr #31
-	cmp	r8, #0
-	moveq	lr, r6
-	add	r6, sp, #92
-	moveq	r7, r8
-	moveq	r12, r5
-	moveq	r3, r0
-	moveq	r2, r1
-	ldm	r4, {r8, r9, r10, r11}
-	ldm	r6, {r0, r1, r5, r6}
-	subs	r0, r0, r8
-	ldr	r8, [r4, #20]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	sbcs	r0, r1, r9
-	ldr	r9, [r4, #24]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	sbcs	r0, r5, r10
-	ldr	r10, [r4, #28]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	sbcs	r0, r6, r11
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r4, #16]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	sbcs	r0, r2, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	sbcs	r0, r3, r8
-	str	r0, [sp, #24]           @ 4-byte Spill
-	sbcs	r0, r12, r9
-	str	r0, [sp, #20]           @ 4-byte Spill
-	sbcs	r0, lr, r10
-	add	lr, r4, #32
-	str	r0, [sp, #16]           @ 4-byte Spill
-	sbc	r0, r7, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	lr, {r5, r7, lr}
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	ldr	r12, [r4, #44]
-	ldr	r2, [r4, #48]
-	ldr	r0, [r4, #52]
-	ldr	r1, [r4, #56]
-	ldr	r3, [r4, #60]
-	subs	r6, r6, r5
-	str	r1, [sp, #36]           @ 4-byte Spill
-	str	r3, [sp, #32]           @ 4-byte Spill
-	str	r6, [sp]                @ 4-byte Spill
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	sbcs	r11, r6, r7
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	sbcs	r6, r6, lr
-	str	r6, [sp, #4]            @ 4-byte Spill
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	sbcs	r6, r6, r12
-	str	r6, [sp, #8]            @ 4-byte Spill
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	sbcs	r6, r6, r2
-	str	r6, [sp, #28]           @ 4-byte Spill
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	sbcs	r6, r6, r0
-	str	r6, [sp, #40]           @ 4-byte Spill
-	mov	r6, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp]                @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	sbcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adds	r3, r1, r0
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r1, r11, r8
-	str	r3, [r4, #16]
-	str	r1, [r4, #20]
-	adcs	r3, r0, r9
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	str	r3, [r4, #24]
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r0, r10
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	str	r1, [r4, #28]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [r4, #32]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [r4, #36]
-	adcs	r0, r0, lr
-	str	r0, [r4, #40]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	add	r12, r4, #48
-	str	r0, [r4, #44]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r6, #0
-	adcs	r2, r2, #0
-	adc	r3, r3, #0
-	stm	r12, {r0, r1, r2, r3}
-	add	sp, sp, #124
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end113:
-	.size	mcl_fpDbl_sqrPre8L, .Lfunc_end113-mcl_fpDbl_sqrPre8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont8L
-	.align	2
-	.type	mcl_fp_mont8L,%function
-mcl_fp_mont8L:                          @ @mcl_fp_mont8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#724
-	sub	sp, sp, #724
-	mov	r7, r2
-	ldr	r5, [r3, #-4]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #680
-	str	r3, [sp, #64]           @ 4-byte Spill
-	str	r1, [sp, #68]           @ 4-byte Spill
-	mov	r4, r3
-	mov	r11, r1
-	ldr	r2, [r7]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	str	r5, [sp, #72]           @ 4-byte Spill
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #684]
-	ldr	r9, [sp, #680]
-	mov	r1, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #688]
-	mul	r2, r9, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #712]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #640
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #672]
-	add	r10, sp, #644
-	ldr	r4, [sp, #656]
-	ldr	r6, [sp, #640]
-	mov	r1, r11
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #668]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #664]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #660]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r10}
-	ldr	r2, [r7, #4]
-	add	r0, sp, #600
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r6, r9
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	mov	r1, #0
-	add	r12, sp, #604
-	ldr	r9, [sp, #628]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #632]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r10, r10, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #600]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r11, r2, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r7, r2, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r1, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r12, {r0, r1, r2, r3, r6, r12}
-	ldr	lr, [sp, #48]           @ 4-byte Reload
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	adds	r4, lr, r4
-	adcs	r0, r5, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	adcs	r0, r10, r1
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r11, r6
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	mov	r1, r6
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #560
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #592]
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r7, [sp, #576]
-	ldr	r10, [sp, #560]
-	ldr	r11, [sp, #564]
-	ldr	r8, [sp, #568]
-	ldr	r9, [sp, #572]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #588]
-	ldr	r2, [r5, #8]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #584]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	add	r0, sp, #520
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r4, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #520
-	ldr	r4, [sp, #544]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r10, r0, r8
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r8, [sp, #552]
-	adcs	r11, r0, r9
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r9, [sp, #548]
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	adds	r7, r7, r0
-	adcs	r0, r10, r1
-	mov	r1, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	adcs	r0, r11, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #480
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #512]
-	ldr	r2, [r5, #12]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r4, [sp, #500]
-	ldr	r6, [sp, #496]
-	ldr	r10, [sp, #480]
-	ldr	r11, [sp, #484]
-	ldr	r8, [sp, #488]
-	ldr	r9, [sp, #492]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #508]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #504]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	add	r0, sp, #440
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r7, r10
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #440
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r0, r11
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r10, r0, r8
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r8, [sp, #472]
-	adcs	r11, r0, r9
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r9, [sp, #468]
-	adcs	r0, r0, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #464]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r7, r5, r0
-	adcs	r0, r10, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	adcs	r0, r11, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r6, r4
-	ldr	r6, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	mul	r2, r7, r6
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #400
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #432]
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	ldr	r4, [sp, #416]
-	ldr	r10, [sp, #400]
-	ldr	r11, [sp, #404]
-	ldr	r8, [sp, #408]
-	ldr	r9, [sp, #412]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #428]
-	mov	r1, r5
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #424]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #420]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, sp, #360
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r7, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #360
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r7, r0, r11
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r10, r0, r8
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r8, [sp, #392]
-	adcs	r11, r0, r9
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r9, [sp, #388]
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #384]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r7, r7, r0
-	adcs	r0, r10, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	adcs	r0, r11, r2
-	mul	r2, r7, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #320
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #352]
-	ldr	r6, [sp, #340]
-	ldr	r4, [sp, #336]
-	ldr	r10, [sp, #320]
-	ldr	r11, [sp, #324]
-	ldr	r8, [sp, #328]
-	ldr	r9, [sp, #332]
-	mov	r1, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #348]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #344]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, sp, #280
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r7, r10
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #280
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r0, r11
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r10, r0, r8
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r8, [sp, #312]
-	adcs	r11, r0, r9
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r9, [sp, #308]
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #304]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r7, r5, r0
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r10, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	adcs	r0, r11, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r6, r4
-	ldr	r6, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	mul	r2, r7, r6
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #240
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #272]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r4, [sp, #256]
-	ldr	r10, [sp, #240]
-	ldr	r11, [sp, #244]
-	ldr	r8, [sp, #248]
-	ldr	r9, [sp, #252]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #268]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #264]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #260]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #200
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r7, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #200
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r7, r0, r11
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r10, r0, r8
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r8, [sp, #232]
-	adcs	r11, r0, r9
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r9, [sp, #228]
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #224]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r7, r7, r0
-	adcs	r0, r10, r1
-	mov	r1, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	adcs	r0, r11, r2
-	mul	r2, r7, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #160
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #192]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r6, [sp, #184]
-	ldr	r4, [sp, #180]
-	ldr	r5, [sp, #176]
-	ldr	r10, [sp, #160]
-	ldr	r11, [sp, #164]
-	ldr	r8, [sp, #168]
-	ldr	r9, [sp, #172]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #120
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r7, r10
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	ldr	r12, [sp, #124]
-	ldr	r3, [sp, #128]
-	add	lr, sp, #136
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	adcs	r8, r1, r8
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r9, r1, r9
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r11, r1, r4
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	ldr	r4, [sp, #132]
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #152]
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r10, r1, r2
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #120]
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adc	r1, r1, #0
-	adds	r5, r0, r2
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r8, r8, r12
-	str	r1, [sp, #52]           @ 4-byte Spill
-	adcs	r3, r9, r3
-	mul	r7, r5, r0
-	ldm	lr, {r0, r1, r2, lr}
-	str	r3, [sp, #48]           @ 4-byte Spill
-	ldr	r3, [sp, #76]           @ 4-byte Reload
-	adcs	r3, r3, r4
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r9, r11, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r3, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	adcs	r0, r10, r2
-	mov	r2, r7
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r10, r0, r6
-	mov	r0, #0
-	adc	r11, r0, #0
-	add	r0, sp, #80
-	bl	.LmulPv256x32(PLT)
-	add	r3, sp, #80
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r5, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	lr, r8, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	lr, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r7, r0, r3
-	ldr	r0, [sp, #96]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	adcs	r9, r9, r0
-	ldr	r0, [sp, #100]
-	adcs	r12, r1, r0
-	ldr	r0, [sp, #104]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r12, [sp, #68]          @ 4-byte Spill
-	adcs	r8, r1, r0
-	ldr	r0, [sp, #108]
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r8, [sp, #72]           @ 4-byte Spill
-	adcs	r6, r1, r0
-	ldr	r0, [sp, #112]
-	adcs	r5, r10, r0
-	adc	r0, r11, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldm	r4, {r1, r2, r3, r11}
-	ldr	r0, [r4, #16]
-	ldr	r10, [r4, #24]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r4, #20]
-	subs	r1, lr, r1
-	ldr	lr, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r4, #28]
-	sbcs	r2, lr, r2
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	sbcs	r3, r7, r3
-	sbcs	r7, r9, r11
-	mov	r11, r6
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	sbcs	r0, r12, r0
-	sbcs	r12, r8, r4
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	sbcs	r8, r6, r10
-	mov	r10, r5
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	sbc	r6, r5, #0
-	ldr	r5, [sp, #40]           @ 4-byte Reload
-	ands	r6, r6, #1
-	movne	r2, lr
-	movne	r1, r5
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	str	r1, [r5]
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r2, [r5, #4]
-	movne	r3, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	cmp	r6, #0
-	movne	r7, r9
-	str	r3, [r5, #8]
-	str	r7, [r5, #12]
-	movne	r0, r1
-	str	r0, [r5, #16]
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	movne	r12, r0
-	cmp	r6, #0
-	movne	r8, r11
-	movne	r4, r10
-	str	r12, [r5, #20]
-	str	r8, [r5, #24]
-	str	r4, [r5, #28]
-	add	sp, sp, #724
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end114:
-	.size	mcl_fp_mont8L, .Lfunc_end114-mcl_fp_mont8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF8L
-	.align	2
-	.type	mcl_fp_montNF8L,%function
-mcl_fp_montNF8L:                        @ @mcl_fp_montNF8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#716
-	sub	sp, sp, #716
-	mov	r7, r2
-	ldr	r5, [r3, #-4]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #672
-	str	r3, [sp, #60]           @ 4-byte Spill
-	str	r1, [sp, #68]           @ 4-byte Spill
-	mov	r4, r3
-	mov	r10, r1
-	ldr	r2, [r7]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	str	r5, [sp, #64]           @ 4-byte Spill
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #676]
-	ldr	r11, [sp, #672]
-	mov	r1, r4
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #680]
-	mul	r2, r11, r5
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #684]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #688]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #632
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #664]
-	ldr	r2, [r7, #4]
-	ldr	r4, [sp, #648]
-	ldr	r6, [sp, #632]
-	ldr	r8, [sp, #636]
-	ldr	r5, [sp, #640]
-	ldr	r9, [sp, #644]
-	mov	r1, r10
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #660]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #656]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #652]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	add	r0, sp, #592
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r6, r11
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	r6, sp, #596
-	ldr	r12, [sp, #616]
-	ldr	r3, [sp, #612]
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #620]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r9, r9, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r11, r4, r0
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r4, [sp, #592]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r10, r1, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r7, r1, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adc	r0, r1, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #624]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r6, {r0, r1, r2, r6}
-	ldr	lr, [sp, #40]           @ 4-byte Reload
-	ldr	r5, [sp, #36]           @ 4-byte Reload
-	adds	r4, lr, r4
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r9, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r11, r2
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	mul	r2, r4, r5
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r10, r3
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r7, r12
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	mov	r1, r7
-	adcs	r0, r0, r8
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #552
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #584]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r9, [sp, #568]
-	ldr	r10, [sp, #552]
-	ldr	r11, [sp, #556]
-	ldr	r8, [sp, #560]
-	ldr	r6, [sp, #564]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #576]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #572]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, sp, #512
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r4, r10
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #516
-	ldr	r4, [sp, #536]
-	ldr	r3, [sp, #512]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r10, r0, r8
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	ldr	r8, [sp, #540]
-	adcs	r11, r0, r6
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #544]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r12, lr}
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	adds	r9, r6, r3
-	adcs	r0, r10, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r11, r1
-	mov	r1, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r9, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #472
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #504]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r4, [sp, #492]
-	ldr	r7, [sp, #488]
-	ldr	r10, [sp, #472]
-	ldr	r11, [sp, #476]
-	ldr	r8, [sp, #480]
-	ldr	r6, [sp, #484]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #500]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #496]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, sp, #432
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r9, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r3, [sp, #432]
-	add	lr, sp, #436
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r5, r0, r11
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r10, r0, r8
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	ldr	r8, [sp, #460]
-	adcs	r11, r0, r6
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r7, r0, r7
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #456]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r9, r5, r3
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #464]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r12, lr}
-	adcs	r0, r10, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r11, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r7, r2
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r6, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	mul	r2, r9, r0
-	add	r0, sp, #392
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #424]
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	ldr	r4, [sp, #408]
-	ldr	r10, [sp, #392]
-	ldr	r11, [sp, #396]
-	ldr	r8, [sp, #400]
-	ldr	r6, [sp, #404]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #420]
-	ldr	r2, [r5, #16]
-	mov	r1, r7
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	add	r0, sp, #352
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r9, r10
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	ldr	r3, [sp, #352]
-	add	lr, sp, #356
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r9, r0, r11
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r10, r0, r8
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	ldr	r8, [sp, #380]
-	adcs	r11, r0, r6
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #376]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r9, r9, r3
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #384]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r12, lr}
-	adcs	r0, r10, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r11, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r6, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	mul	r2, r9, r0
-	add	r0, sp, #312
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #344]
-	ldr	r2, [r5, #20]
-	ldr	r4, [sp, #328]
-	ldr	r10, [sp, #312]
-	ldr	r11, [sp, #316]
-	ldr	r8, [sp, #320]
-	ldr	r6, [sp, #324]
-	mov	r1, r7
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #340]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #336]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #332]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	add	r0, sp, #272
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r9, r10
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	ldr	r3, [sp, #272]
-	add	lr, sp, #276
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r5, r0, r11
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r10, r0, r8
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	ldr	r8, [sp, #300]
-	adcs	r11, r0, r6
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r7, r0, r4
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r4, [sp, #296]
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r9, r5, r3
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #304]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r12, lr}
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r11, r1
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r7, r2
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	mul	r2, r9, r7
-	adcs	r0, r0, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r6, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #232
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #264]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r4, [sp, #248]
-	ldr	r10, [sp, #232]
-	ldr	r11, [sp, #236]
-	ldr	r8, [sp, #240]
-	ldr	r6, [sp, #244]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #260]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #256]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #252]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #192
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r9, r10
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	ldr	r3, [sp, #192]
-	add	lr, sp, #196
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r9, r0, r11
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r10, r0, r8
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	ldr	r8, [sp, #220]
-	adcs	r11, r0, r6
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #216]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r9, r9, r3
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #224]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r12, lr}
-	adcs	r0, r10, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r11, r1
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r9, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r6, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #152
-	bl	.LmulPv256x32(PLT)
-	ldr	r0, [sp, #184]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r5, [sp, #176]
-	ldr	r4, [sp, #172]
-	ldr	r7, [sp, #168]
-	ldr	r10, [sp, #152]
-	ldr	r11, [sp, #156]
-	ldr	r8, [sp, #160]
-	ldr	r6, [sp, #164]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #180]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #112
-	bl	.LmulPv256x32(PLT)
-	adds	r0, r9, r10
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #128
-	ldr	r12, [sp, #116]
-	ldr	r3, [sp, #120]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	adcs	r1, r1, r8
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r11, r1, r7
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r10, r1, r4
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	ldr	r4, [sp, #124]
-	adcs	r1, r1, r5
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adc	r1, r1, r2
-	ldr	r2, [sp, #112]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	adds	r5, r0, r2
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	mul	r9, r5, r0
-	ldm	lr, {r0, r1, r2, r6, lr}
-	ldr	r8, [sp, #68]           @ 4-byte Reload
-	adcs	r7, r8, r12
-	ldr	r8, [sp, #60]           @ 4-byte Reload
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	adcs	r3, r7, r3
-	adcs	r11, r11, r4
-	str	r3, [sp, #56]           @ 4-byte Spill
-	adcs	r4, r10, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mov	r2, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r10, r0, r6
-	add	r0, sp, #72
-	adc	r7, lr, #0
-	bl	.LmulPv256x32(PLT)
-	add	r3, sp, #72
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r5, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r5, r0, r1
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r11, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #88]
-	adcs	r3, r4, r0
-	ldr	r0, [sp, #92]
-	str	r3, [sp, #40]           @ 4-byte Spill
-	adcs	r6, r1, r0
-	ldr	r0, [sp, #96]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r6, [sp, #64]           @ 4-byte Spill
-	adcs	r12, r1, r0
-	ldr	r0, [sp, #100]
-	ldr	r1, [sp, #104]
-	str	r12, [sp, #68]          @ 4-byte Spill
-	adcs	r11, r10, r0
-	adc	r4, r7, r1
-	ldm	r8, {r1, r2, r9, r10}
-	ldr	r0, [r8, #20]
-	ldr	r7, [r8, #16]
-	ldr	lr, [r8, #28]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r8, #24]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, r5
-	subs	r5, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbcs	r8, r1, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	sbcs	r9, r2, r9
-	sbcs	r10, r3, r10
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	sbcs	r7, r6, r7
-	sbcs	r6, r12, r3
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	sbcs	r12, r11, r3
-	sbc	lr, r4, lr
-	cmp	lr, #0
-	movlt	r5, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	movlt	r8, r1
-	movlt	r9, r2
-	cmp	lr, #0
-	movlt	r10, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	movlt	r7, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	movlt	r6, r0
-	cmp	lr, #0
-	movlt	lr, r4
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	movlt	r12, r11
-	add	r0, r4, #20
-	stm	r4, {r5, r8, r9, r10}
-	str	r7, [r4, #16]
-	stm	r0, {r6, r12, lr}
-	add	sp, sp, #716
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end115:
-	.size	mcl_fp_montNF8L, .Lfunc_end115-mcl_fp_montNF8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed8L
-	.align	2
-	.type	mcl_fp_montRed8L,%function
-mcl_fp_montRed8L:                       @ @mcl_fp_montRed8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#420
-	sub	sp, sp, #420
-	mov	r5, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r2, [r1, #4]
-	ldr	r4, [r1]
-	ldr	r9, [r1, #40]
-	ldr	r10, [r1, #44]
-	ldr	r0, [r5]
-	ldr	r11, [r5, #-4]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r5, #4]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #12]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r5, #8]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #16]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r5, #12]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r1, #20]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r5, #16]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r1, #24]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r5, #20]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #28]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r5, #24]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	mul	r2, r4, r11
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r5, #28]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	mov	r1, r5
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #376
-	bl	.LmulPv256x32(PLT)
-	add	lr, sp, #396
-	ldr	r8, [sp, #408]
-	add	r6, sp, #384
-	ldm	lr, {r3, r12, lr}
-	ldr	r7, [sp, #376]
-	ldr	r1, [sp, #380]
-	ldm	r6, {r0, r2, r6}
-	adds	r4, r4, r7
-	ldr	r4, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r4, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r4, r11
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	adcs	r9, r9, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	adcs	r0, r10, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #4]            @ 4-byte Spill
-	add	r0, sp, #336
-	bl	.LmulPv256x32(PLT)
-	add	lr, sp, #356
-	ldr	r8, [sp, #368]
-	add	r6, sp, #340
-	ldm	lr, {r3, r12, lr}
-	ldr	r7, [sp, #336]
-	ldm	r6, {r0, r1, r2, r6}
-	adds	r4, r4, r7
-	ldr	r4, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r10, r0, r2
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	mul	r2, r4, r11
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r9, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	add	r0, sp, #296
-	bl	.LmulPv256x32(PLT)
-	add	r8, sp, #320
-	add	lr, sp, #300
-	ldm	r8, {r6, r7, r8}
-	ldr	r1, [sp, #296]
-	ldm	lr, {r0, r2, r3, r12, lr}
-	adds	r1, r4, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r10, r10, r2
-	mul	r2, r4, r11
-	adcs	r9, r0, r3
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, sp, #256
-	bl	.LmulPv256x32(PLT)
-	add	lr, sp, #276
-	ldr	r8, [sp, #288]
-	add	r6, sp, #260
-	ldm	lr, {r3, r12, lr}
-	ldr	r7, [sp, #256]
-	ldm	r6, {r0, r1, r2, r6}
-	adds	r4, r4, r7
-	adcs	r4, r10, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r9, r9, r1
-	mov	r1, r5
-	adcs	r10, r0, r2
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mul	r2, r4, r11
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #216
-	bl	.LmulPv256x32(PLT)
-	add	r8, sp, #240
-	add	lr, sp, #220
-	ldm	r8, {r6, r7, r8}
-	ldr	r1, [sp, #216]
-	ldm	lr, {r0, r2, r3, r12, lr}
-	adds	r1, r4, r1
-	adcs	r4, r9, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r10, r10, r2
-	mul	r2, r4, r11
-	adcs	r9, r0, r3
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #176
-	bl	.LmulPv256x32(PLT)
-	add	lr, sp, #196
-	ldr	r8, [sp, #208]
-	add	r6, sp, #180
-	ldm	lr, {r3, r12, lr}
-	ldr	r7, [sp, #176]
-	ldm	r6, {r0, r1, r2, r6}
-	adds	r4, r4, r7
-	adcs	r4, r10, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r7, r9, r1
-	mov	r1, r5
-	adcs	r9, r0, r2
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mul	r2, r4, r11
-	adcs	r6, r0, r6
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r10, r0, r3
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #136
-	bl	.LmulPv256x32(PLT)
-	add	r12, sp, #136
-	ldm	r12, {r0, r1, r3, r12}
-	adds	r0, r4, r0
-	adcs	r4, r7, r1
-	ldr	r7, [sp, #152]
-	ldr	r0, [sp, #168]
-	adcs	r1, r9, r3
-	ldr	r3, [sp, #160]
-	mul	r2, r4, r11
-	adcs	r9, r6, r12
-	ldr	r6, [sp, #156]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #164]
-	adcs	r10, r10, r7
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	adcs	r6, r7, r6
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adcs	r8, r7, r3
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	adcs	r11, r3, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #96
-	bl	.LmulPv256x32(PLT)
-	add	r3, sp, #96
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r4, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r5, r0, r1
-	ldr	r0, [sp, #112]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r9, r9, r2
-	adcs	r10, r10, r3
-	adcs	r3, r6, r0
-	ldr	r0, [sp, #116]
-	str	r3, [sp, #36]           @ 4-byte Spill
-	adcs	lr, r8, r0
-	ldr	r0, [sp, #120]
-	str	lr, [sp, #40]           @ 4-byte Spill
-	adcs	r7, r11, r0
-	ldr	r0, [sp, #124]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #128]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r4, [sp, #48]           @ 4-byte Spill
-	adcs	r12, r1, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r8, r0, #0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	subs	r1, r5, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	sbcs	r2, r9, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	sbcs	r6, r10, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r3, r0
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	sbcs	r11, lr, r3
-	ldr	r3, [sp, #72]           @ 4-byte Reload
-	sbcs	r3, r7, r3
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	sbcs	lr, r4, r7
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	sbcs	r4, r12, r7
-	sbc	r7, r8, #0
-	ands	r7, r7, #1
-	movne	r1, r5
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	movne	r2, r9
-	movne	r6, r10
-	cmp	r7, #0
-	str	r1, [r5]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r2, [r5, #4]
-	str	r6, [r5, #8]
-	movne	r0, r1
-	str	r0, [r5, #12]
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	movne	r11, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	str	r11, [r5, #16]
-	movne	r3, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	cmp	r7, #0
-	movne	r4, r12
-	str	r3, [r5, #20]
-	movne	lr, r0
-	str	lr, [r5, #24]
-	str	r4, [r5, #28]
-	add	sp, sp, #420
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end116:
-	.size	mcl_fp_montRed8L, .Lfunc_end116-mcl_fp_montRed8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre8L
-	.align	2
-	.type	mcl_fp_addPre8L,%function
-mcl_fp_addPre8L:                        @ @mcl_fp_addPre8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	ldr	r3, [r1, #4]
-	ldr	r9, [r1]
-	ldr	r10, [r1, #12]
-	ldr	r11, [r1, #16]
-	ldr	r8, [r1, #28]
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [r1, #8]
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [r1, #20]
-	str	r3, [sp]                @ 4-byte Spill
-	ldr	r3, [r1, #24]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldm	r2, {r1, r3, r4, r5, r12, lr}
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	ldr	r6, [r2, #24]
-	ldr	r2, [r2, #28]
-	adds	r1, r1, r9
-	adcs	r3, r3, r7
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	adcs	r4, r4, r7
-	ldr	r7, [sp]                @ 4-byte Reload
-	adcs	r5, r5, r10
-	adcs	r12, r12, r11
-	adcs	lr, lr, r7
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	stm	r0, {r1, r3, r4, r5, r12, lr}
-	adcs	r6, r6, r7
-	adcs	r2, r2, r8
-	str	r6, [r0, #24]
-	str	r2, [r0, #28]
-	mov	r0, #0
-	adc	r0, r0, #0
-	add	sp, sp, #16
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end117:
-	.size	mcl_fp_addPre8L, .Lfunc_end117-mcl_fp_addPre8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre8L
-	.align	2
-	.type	mcl_fp_subPre8L,%function
-mcl_fp_subPre8L:                        @ @mcl_fp_subPre8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	ldr	r3, [r2, #4]
-	ldr	r9, [r2]
-	ldr	r10, [r2, #12]
-	ldr	r11, [r2, #16]
-	ldr	r8, [r2, #28]
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [r2, #8]
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [r2, #20]
-	str	r3, [sp]                @ 4-byte Spill
-	ldr	r3, [r2, #24]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldm	r1, {r2, r3, r4, r5, r12, lr}
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	ldr	r6, [r1, #24]
-	ldr	r1, [r1, #28]
-	subs	r2, r2, r9
-	sbcs	r3, r3, r7
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	sbcs	r4, r4, r7
-	ldr	r7, [sp]                @ 4-byte Reload
-	sbcs	r5, r5, r10
-	sbcs	r12, r12, r11
-	sbcs	lr, lr, r7
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	stm	r0, {r2, r3, r4, r5, r12, lr}
-	sbcs	r6, r6, r7
-	sbcs	r1, r1, r8
-	str	r6, [r0, #24]
-	str	r1, [r0, #28]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	add	sp, sp, #16
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end118:
-	.size	mcl_fp_subPre8L, .Lfunc_end118-mcl_fp_subPre8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_8L
-	.align	2
-	.type	mcl_fp_shr1_8L,%function
-mcl_fp_shr1_8L:                         @ @mcl_fp_shr1_8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, lr}
-	push	{r4, r5, r6, r7, r8, lr}
-	ldr	r3, [r1, #4]
-	ldr	r12, [r1]
-	ldr	lr, [r1, #12]
-	add	r6, r1, #16
-	ldr	r2, [r1, #8]
-	ldm	r6, {r4, r5, r6}
-	ldr	r1, [r1, #28]
-	lsrs	r7, r3, #1
-	lsr	r3, r3, #1
-	rrx	r12, r12
-	lsrs	r7, lr, #1
-	orr	r8, r3, r2, lsl #31
-	lsr	r7, lr, #1
-	rrx	r2, r2
-	lsrs	r3, r5, #1
-	lsr	r5, r5, #1
-	str	r12, [r0]
-	str	r8, [r0, #4]
-	orr	r7, r7, r4, lsl #31
-	rrx	r3, r4
-	lsrs	r4, r1, #1
-	str	r2, [r0, #8]
-	orr	r5, r5, r6, lsl #31
-	lsr	r1, r1, #1
-	add	r2, r0, #16
-	rrx	r6, r6
-	str	r7, [r0, #12]
-	stm	r2, {r3, r5, r6}
-	str	r1, [r0, #28]
-	pop	{r4, r5, r6, r7, r8, lr}
-	mov	pc, lr
-.Lfunc_end119:
-	.size	mcl_fp_shr1_8L, .Lfunc_end119-mcl_fp_shr1_8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add8L
-	.align	2
-	.type	mcl_fp_add8L,%function
-mcl_fp_add8L:                           @ @mcl_fp_add8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#28
-	sub	sp, sp, #28
-	ldr	r7, [r1, #12]
-	ldr	lr, [r1]
-	ldr	r11, [r1, #4]
-	ldr	r10, [r1, #8]
-	add	r8, r2, #20
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r1, #16]
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [r1, #20]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r1, #24]
-	ldr	r1, [r1, #28]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldm	r2, {r1, r4, r5, r12}
-	ldr	r9, [r2, #16]
-	ldm	r8, {r6, r7, r8}
-	ldr	r2, [sp]                @ 4-byte Reload
-	adds	lr, r1, lr
-	adcs	r1, r4, r11
-	str	lr, [r0]
-	adcs	r4, r5, r10
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	str	r4, [sp, #20]           @ 4-byte Spill
-	adcs	r10, r12, r5
-	adcs	r5, r9, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r5, [sp, #16]           @ 4-byte Spill
-	adcs	r12, r6, r2
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	stmib	r0, {r1, r4, r10}
-	mov	r1, #0
-	str	r5, [r0, #16]
-	str	r12, [r0, #20]
-	adcs	r7, r7, r6
-	mov	r6, r12
-	adcs	r11, r8, r2
-	str	r7, [r0, #24]
-	mov	r8, lr
-	adc	r1, r1, #0
-	str	r11, [r0, #28]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r3, {r1, r2, r9, r12, lr}
-	ldr	r4, [r3, #20]
-	ldr	r5, [r3, #24]
-	ldr	r3, [r3, #28]
-	subs	r1, r8, r1
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	sbcs	r8, r1, r2
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	sbcs	r2, r1, r9
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	sbcs	r12, r10, r12
-	sbcs	lr, r1, lr
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	sbcs	r4, r6, r4
-	sbcs	r5, r7, r5
-	sbcs	r6, r11, r3
-	sbc	r3, r1, #0
-	tst	r3, #1
-	bne	.LBB120_2
-@ BB#1:                                 @ %nocarry
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	stm	r0, {r1, r8}
-	add	r1, r0, #8
-	add	r0, r0, #20
-	stm	r1, {r2, r12, lr}
-	stm	r0, {r4, r5, r6}
-.LBB120_2:                              @ %carry
-	add	sp, sp, #28
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end120:
-	.size	mcl_fp_add8L, .Lfunc_end120-mcl_fp_add8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF8L
-	.align	2
-	.type	mcl_fp_addNF8L,%function
-mcl_fp_addNF8L:                         @ @mcl_fp_addNF8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#32
-	sub	sp, sp, #32
-	ldm	r1, {r6, r8}
-	ldr	r7, [r1, #8]
-	ldr	r9, [r1, #28]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r1, #12]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [r1, #16]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [r1, #20]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r1, #24]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldm	r2, {r1, r4, r5, r12, lr}
-	ldr	r10, [r2, #20]
-	ldr	r11, [r2, #24]
-	ldr	r2, [r2, #28]
-	adds	r7, r1, r6
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r6, r4, r8
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	str	r7, [sp, #4]            @ 4-byte Spill
-	str	r6, [sp, #8]            @ 4-byte Spill
-	adcs	r8, r5, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adcs	r1, r12, r1
-	adcs	r12, lr, r5
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	adcs	lr, r10, r5
-	adcs	r5, r11, r4
-	ldr	r4, [r3, #4]
-	ldr	r11, [r3, #16]
-	str	lr, [sp, #24]           @ 4-byte Spill
-	adc	r10, r2, r9
-	ldr	r2, [r3]
-	ldr	r9, [r3, #12]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r3, #8]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [r3, #20]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r3, #24]
-	ldr	r3, [r3, #28]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	subs	r2, r7, r2
-	sbcs	r7, r6, r4
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	sbcs	r6, r8, r4
-	sbcs	r9, r1, r9
-	ldr	r1, [sp]                @ 4-byte Reload
-	sbcs	r4, r12, r11
-	mov	r11, r12
-	sbcs	r12, lr, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	sbcs	lr, r5, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	sbc	r3, r10, r3
-	cmp	r3, #0
-	movlt	r6, r8
-	movlt	r2, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	movlt	r7, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	cmp	r3, #0
-	movlt	r4, r11
-	movlt	r9, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	stm	r0, {r2, r7}
-	str	r6, [r0, #8]
-	str	r9, [r0, #12]
-	movlt	r12, r1
-	cmp	r3, #0
-	add	r1, r0, #16
-	movlt	lr, r5
-	movlt	r3, r10
-	stm	r1, {r4, r12, lr}
-	str	r3, [r0, #28]
-	add	sp, sp, #32
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end121:
-	.size	mcl_fp_addNF8L, .Lfunc_end121-mcl_fp_addNF8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub8L
-	.align	2
-	.type	mcl_fp_sub8L,%function
-mcl_fp_sub8L:                           @ @mcl_fp_sub8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	ldm	r2, {r12, lr}
-	ldr	r4, [r2, #8]
-	ldr	r9, [r2, #20]
-	ldr	r10, [r2, #24]
-	add	r8, r1, #12
-	str	r4, [sp, #12]           @ 4-byte Spill
-	ldr	r4, [r2, #12]
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [r2, #16]
-	ldr	r2, [r2, #28]
-	str	r4, [sp]                @ 4-byte Spill
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldm	r1, {r4, r5, r11}
-	ldm	r8, {r2, r7, r8}
-	ldr	r6, [r1, #24]
-	ldr	r1, [r1, #28]
-	subs	r12, r4, r12
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	sbcs	lr, r5, lr
-	sbcs	r11, r11, r4
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	sbcs	r2, r2, r4
-	ldr	r4, [sp]                @ 4-byte Reload
-	sbcs	r4, r7, r4
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	stm	r0, {r12, lr}
-	str	r11, [r0, #8]
-	sbcs	r5, r8, r9
-	sbcs	r6, r6, r10
-	sbcs	r7, r1, r7
-	add	r1, r0, #12
-	stm	r1, {r2, r4, r5, r6, r7}
-	mov	r1, #0
-	sbc	r1, r1, #0
-	tst	r1, #1
-	beq	.LBB122_2
-@ BB#1:                                 @ %carry
-	ldr	r1, [r3]
-	add	r10, r3, #12
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [r3, #4]
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [r3, #8]
-	str	r1, [sp]                @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r1, [r3, #24]
-	ldr	r3, [r3, #28]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adds	r1, r1, r12
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r12, r1, lr
-	ldr	r1, [sp]                @ 4-byte Reload
-	adcs	lr, r1, r11
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r2, r8, r2
-	adcs	r4, r9, r4
-	adcs	r5, r10, r5
-	adcs	r6, r1, r6
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adc	r3, r3, r7
-	stm	r0, {r1, r12, lr}
-	add	r1, r0, #12
-	stm	r1, {r2, r4, r5, r6}
-	str	r3, [r0, #28]
-.LBB122_2:                              @ %nocarry
-	add	sp, sp, #16
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end122:
-	.size	mcl_fp_sub8L, .Lfunc_end122-mcl_fp_sub8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF8L
-	.align	2
-	.type	mcl_fp_subNF8L,%function
-mcl_fp_subNF8L:                         @ @mcl_fp_subNF8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#36
-	sub	sp, sp, #36
-	ldm	r2, {r6, r8}
-	ldr	r7, [r2, #8]
-	ldr	r11, [r2, #12]
-	ldr	r9, [r2, #28]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #16]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldm	r1, {r2, r4, r5, r12, lr}
-	ldr	r10, [r1, #20]
-	ldr	r7, [r1, #24]
-	ldr	r1, [r1, #28]
-	subs	r6, r2, r6
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	sbcs	r8, r4, r8
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	str	r6, [sp, #16]           @ 4-byte Spill
-	sbcs	r5, r5, r2
-	sbcs	r2, r12, r11
-	ldr	r11, [r3, #12]
-	sbcs	r12, lr, r4
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	str	r2, [sp, #20]           @ 4-byte Spill
-	str	r12, [sp, #24]          @ 4-byte Spill
-	sbcs	lr, r10, r4
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	ldr	r10, [r3, #16]
-	str	lr, [sp, #28]           @ 4-byte Spill
-	sbcs	r4, r7, r4
-	ldr	r7, [r3]
-	sbc	r1, r1, r9
-	ldr	r9, [r3, #8]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r3, #4]
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [r3, #20]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r3, #24]
-	ldr	r3, [r3, #28]
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adds	r7, r6, r3
-	ldr	r3, [sp]                @ 4-byte Reload
-	adcs	r6, r8, r3
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	adcs	r9, r5, r9
-	adcs	r11, r2, r11
-	adcs	r2, r12, r10
-	ldr	r10, [sp, #16]          @ 4-byte Reload
-	adcs	r12, lr, r3
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adcs	lr, r4, r3
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	adc	r3, r1, r3
-	cmp	r1, #0
-	movge	r9, r5
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	movge	r7, r10
-	movge	r6, r8
-	cmp	r1, #0
-	str	r7, [r0]
-	movge	r11, r5
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	movge	r2, r5
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	stmib	r0, {r6, r9, r11}
-	movge	r12, r5
-	cmp	r1, #0
-	movge	r3, r1
-	movge	lr, r4
-	add	r1, r0, #16
-	stm	r1, {r2, r12, lr}
-	str	r3, [r0, #28]
-	add	sp, sp, #36
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end123:
-	.size	mcl_fp_subNF8L, .Lfunc_end123-mcl_fp_subNF8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add8L
-	.align	2
-	.type	mcl_fpDbl_add8L,%function
-mcl_fpDbl_add8L:                        @ @mcl_fpDbl_add8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#68
-	sub	sp, sp, #68
-	ldm	r1, {r7, r9}
-	ldr	r6, [r1, #8]
-	ldr	r8, [r1, #12]
-	ldm	r2, {r4, r12, lr}
-	ldr	r5, [r2, #12]
-	adds	r4, r4, r7
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [r2, #32]
-	adcs	r7, r12, r9
-	adcs	r6, lr, r6
-	add	lr, r1, #16
-	adcs	r9, r5, r8
-	ldr	r5, [r2, #28]
-	add	r8, r2, #16
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [r2, #36]
-	str	r5, [sp, #28]           @ 4-byte Spill
-	str	r4, [sp, #40]           @ 4-byte Spill
-	ldr	r4, [r2, #40]
-	str	r4, [sp, #44]           @ 4-byte Spill
-	ldr	r4, [r2, #44]
-	str	r4, [sp, #48]           @ 4-byte Spill
-	ldr	r4, [r2, #48]
-	str	r4, [sp, #52]           @ 4-byte Spill
-	ldr	r4, [r2, #52]
-	str	r4, [sp, #56]           @ 4-byte Spill
-	ldr	r4, [r2, #56]
-	str	r4, [sp, #60]           @ 4-byte Spill
-	ldr	r4, [r2, #60]
-	str	r4, [sp, #64]           @ 4-byte Spill
-	ldm	r8, {r4, r5, r8}
-	ldr	r2, [r1, #36]
-	ldr	r10, [r1, #32]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #40]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #44]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #32]          @ 4-byte Reload
-	adcs	r1, r4, r1
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	str	r6, [r0, #8]
-	str	r9, [r0, #12]
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r5, r2
-	str	r1, [r0, #16]
-	str	r2, [r0, #20]
-	adcs	r1, r8, r12
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r2, lr
-	adcs	r1, r1, r10
-	str	r2, [r0, #28]
-	ldr	r2, [sp]                @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r1, r2
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	adcs	r2, r1, r2
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r2, [sp, #44]           @ 4-byte Spill
-	adcs	r12, r1, r6
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	str	r12, [sp, #48]          @ 4-byte Spill
-	adcs	lr, r1, r6
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	lr, [sp, #52]           @ 4-byte Spill
-	adcs	r5, r1, r4
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	str	r5, [sp, #56]           @ 4-byte Spill
-	adcs	r8, r1, r4
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adcs	r10, r1, r4
-	mov	r1, #0
-	adc	r1, r1, #0
-	str	r10, [sp, #60]          @ 4-byte Spill
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [r3]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldmib	r3, {r4, r11}
-	ldr	r6, [r3, #12]
-	ldr	r1, [r3, #24]
-	ldr	r9, [r3, #16]
-	str	r6, [sp, #40]           @ 4-byte Spill
-	ldr	r6, [r3, #20]
-	ldr	r3, [r3, #28]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	subs	r1, r3, r1
-	sbcs	r4, r7, r4
-	sbcs	r11, r2, r11
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, r12, r2
-	sbcs	r12, lr, r9
-	mov	r9, r8
-	sbcs	lr, r5, r6
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	sbcs	r6, r8, r5
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	sbcs	r8, r10, r5
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	sbc	r10, r5, #0
-	ands	r10, r10, #1
-	movne	r1, r3
-	movne	r4, r7
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r4, [r0, #36]
-	movne	r11, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	cmp	r10, #0
-	str	r11, [r0, #40]
-	movne	r2, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r2, [r0, #44]
-	movne	r12, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r12, [r0, #48]
-	movne	lr, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	cmp	r10, #0
-	movne	r6, r9
-	str	lr, [r0, #52]
-	str	r6, [r0, #56]
-	movne	r8, r1
-	str	r8, [r0, #60]
-	add	sp, sp, #68
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end124:
-	.size	mcl_fpDbl_add8L, .Lfunc_end124-mcl_fpDbl_add8L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub8L
-	.align	2
-	.type	mcl_fpDbl_sub8L,%function
-mcl_fpDbl_sub8L:                        @ @mcl_fpDbl_sub8L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#64
-	sub	sp, sp, #64
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldm	r2, {r4, r5, r8}
-	ldr	r6, [r2, #20]
-	ldr	r7, [r2, #12]
-	ldr	r9, [r2, #16]
-	ldr	r11, [r2, #24]
-	ldr	r10, [r2, #28]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	ldm	r1, {r2, r12, lr}
-	ldr	r6, [r1, #12]
-	subs	r4, r2, r4
-	ldr	r2, [r1, #32]
-	sbcs	r5, r12, r5
-	ldr	r12, [r1, #36]
-	sbcs	lr, lr, r8
-	add	r8, r1, #16
-	sbcs	r6, r6, r7
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #40]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #44]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldm	r8, {r1, r2, r7, r8}
-	stm	r0, {r4, r5, lr}
-	str	r6, [r0, #12]
-	mov	r4, #0
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	sbcs	r1, r1, r9
-	sbcs	r2, r2, r6
-	str	r1, [r0, #16]
-	sbcs	r1, r7, r11
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	str	r1, [r0, #24]
-	sbcs	r1, r8, r10
-	str	r1, [r0, #28]
-	ldr	r1, [sp]                @ 4-byte Reload
-	sbcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	sbcs	r6, r12, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r6, [sp, #36]           @ 4-byte Spill
-	sbcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	sbcs	r9, r7, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	sbcs	r12, r7, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	str	r12, [sp, #48]          @ 4-byte Spill
-	sbcs	lr, r7, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	lr, [sp, #52]           @ 4-byte Spill
-	sbcs	r8, r5, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	sbcs	r11, r5, r2
-	sbc	r2, r4, #0
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldm	r3, {r4, r5}
-	ldr	r2, [r3, #8]
-	ldr	r10, [r3, #20]
-	ldr	r7, [r3, #24]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r3, #12]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [r3, #16]
-	ldr	r3, [r3, #28]
-	str	r3, [sp, #56]           @ 4-byte Spill
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adds	r4, r3, r4
-	adcs	r5, r6, r5
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	adcs	r6, r1, r6
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r9, r1
-	adcs	r2, r12, r2
-	adcs	r12, lr, r10
-	adcs	lr, r8, r7
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	adc	r10, r11, r7
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	ands	r7, r7, #1
-	moveq	r4, r3
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	str	r4, [r0, #32]
-	moveq	r5, r3
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	str	r5, [r0, #36]
-	moveq	r6, r3
-	cmp	r7, #0
-	moveq	r1, r9
-	str	r6, [r0, #40]
-	str	r1, [r0, #44]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r2, [r0, #48]
-	moveq	r12, r1
-	cmp	r7, #0
-	moveq	lr, r8
-	moveq	r10, r11
-	str	r12, [r0, #52]
-	str	lr, [r0, #56]
-	str	r10, [r0, #60]
-	add	sp, sp, #64
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end125:
-	.size	mcl_fpDbl_sub8L, .Lfunc_end125-mcl_fpDbl_sub8L
-	.cantunwind
-	.fnend
-
-	.align	2
-	.type	.LmulPv288x32,%function
-.LmulPv288x32:                          @ @mulPv288x32
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r9, [r1, #12]
-	umull	r4, r8, lr, r2
-	umull	lr, r6, r12, r2
-	mov	r5, r4
-	mov	r7, r6
-	str	lr, [r0]
-	umull	lr, r12, r9, r2
-	umlal	r7, r5, r3, r2
-	str	r5, [r0, #8]
-	str	r7, [r0, #4]
-	umull	r5, r7, r3, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r3, r8, lr
-	str	r3, [r0, #12]
-	ldr	r3, [r1, #16]
-	umull	r7, r6, r3, r2
-	adcs	r3, r12, r7
-	str	r3, [r0, #16]
-	ldr	r3, [r1, #20]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #20]
-	ldr	r3, [r1, #24]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #24]
-	ldr	r3, [r1, #28]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #28]
-	ldr	r1, [r1, #32]
-	umull	r3, r7, r1, r2
-	adcs	r1, r5, r3
-	adc	r2, r7, #0
-	str	r1, [r0, #32]
-	str	r2, [r0, #36]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end126:
-	.size	.LmulPv288x32, .Lfunc_end126-.LmulPv288x32
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre9L
-	.align	2
-	.type	mcl_fp_mulUnitPre9L,%function
-mcl_fp_mulUnitPre9L:                    @ @mcl_fp_mulUnitPre9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, lr}
-	push	{r4, r5, r6, r7, r8, lr}
-	.pad	#40
-	sub	sp, sp, #40
-	mov	r4, r0
-	mov	r0, sp
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #20
-	ldr	r12, [sp, #36]
-	ldm	lr, {r0, r3, r8, lr}
-	ldr	r1, [sp, #16]
-	ldm	sp, {r5, r6, r7}
-	ldr	r2, [sp, #12]
-	stm	r4, {r5, r6, r7}
-	str	r2, [r4, #12]
-	str	r1, [r4, #16]
-	add	r1, r4, #20
-	stm	r1, {r0, r3, r8, lr}
-	str	r12, [r4, #36]
-	add	sp, sp, #40
-	pop	{r4, r5, r6, r7, r8, lr}
-	mov	pc, lr
-.Lfunc_end127:
-	.size	mcl_fp_mulUnitPre9L, .Lfunc_end127-mcl_fp_mulUnitPre9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre9L
-	.align	2
-	.type	mcl_fpDbl_mulPre9L,%function
-mcl_fpDbl_mulPre9L:                     @ @mcl_fpDbl_mulPre9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#412
-	sub	sp, sp, #412
-	mov	r10, r2
-	mov	r8, r0
-	add	r0, sp, #368
-	str	r1, [sp, #44]           @ 4-byte Spill
-	mov	r4, r1
-	ldr	r2, [r10]
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #404]
-	ldr	r1, [sp, #376]
-	ldr	r2, [r10, #4]
-	ldr	r9, [sp, #372]
-	ldr	r11, [sp, #380]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #400]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	mov	r1, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #396]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #392]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #388]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #384]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #368]
-	str	r0, [r8]
-	add	r0, sp, #328
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #352
-	ldr	r4, [sp, #364]
-	add	r7, sp, #332
-	ldm	lr, {r3, r12, lr}
-	ldr	r6, [sp, #328]
-	ldm	r7, {r0, r1, r2, r5, r7}
-	adds	r6, r6, r9
-	str	r6, [r8, #4]
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #12]           @ 4-byte Spill
-	adcs	r0, r1, r11
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r10, #8]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r4, #0
-	ldr	r4, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, sp, #288
-	mov	r1, r4
-	bl	.LmulPv288x32(PLT)
-	add	r9, sp, #312
-	add	lr, sp, #288
-	ldm	r9, {r5, r6, r7, r9}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #12]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r8, #8]
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	mov	r1, r4
-	adcs	r0, r2, r0
-	ldr	r2, [r10, #12]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adc	r0, r9, #0
-	mov	r9, r4
-	str	r0, [sp, #12]           @ 4-byte Spill
-	add	r0, sp, #248
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #272
-	ldr	r4, [sp, #284]
-	add	r6, sp, #252
-	ldm	lr, {r3, r12, lr}
-	ldr	r7, [sp, #248]
-	ldr	r5, [sp, #268]
-	ldm	r6, {r0, r1, r2, r6}
-	adds	r7, r7, r11
-	str	r7, [r8, #12]
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	adcs	r11, r0, r7
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r10, #16]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adc	r0, r4, #0
-	mov	r4, r9
-	str	r0, [sp, #12]           @ 4-byte Spill
-	add	r0, sp, #208
-	mov	r1, r4
-	bl	.LmulPv288x32(PLT)
-	add	r9, sp, #232
-	add	lr, sp, #208
-	ldm	r9, {r5, r6, r7, r9}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r0, r11
-	str	r0, [r8, #16]
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	mov	r1, r4
-	adcs	r0, r2, r0
-	ldr	r2, [r10, #20]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adc	r0, r9, #0
-	mov	r9, r4
-	str	r0, [sp, #12]           @ 4-byte Spill
-	add	r0, sp, #168
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #192
-	ldr	r4, [sp, #204]
-	add	r6, sp, #172
-	ldm	lr, {r3, r12, lr}
-	ldr	r7, [sp, #168]
-	ldr	r5, [sp, #188]
-	ldm	r6, {r0, r1, r2, r6}
-	adds	r7, r7, r11
-	str	r7, [r8, #20]
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r11, r0, r7
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r10, #24]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	adc	r0, r4, #0
-	mov	r4, r9
-	str	r0, [sp, #8]            @ 4-byte Spill
-	add	r0, sp, #128
-	mov	r1, r4
-	bl	.LmulPv288x32(PLT)
-	add	r9, sp, #152
-	add	lr, sp, #128
-	ldm	r9, {r5, r6, r7, r9}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r0, r11
-	str	r0, [r8, #24]
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	mov	r1, r4
-	adcs	r0, r2, r0
-	ldr	r2, [r10, #28]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	add	r0, sp, #88
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #124]
-	add	lr, sp, #112
-	add	r7, sp, #92
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r5, r12, lr}
-	ldr	r2, [sp, #88]
-	ldr	r6, [sp, #108]
-	ldm	r7, {r0, r1, r3, r7}
-	ldr	r4, [sp, #40]           @ 4-byte Reload
-	adds	r2, r2, r11
-	adcs	r9, r0, r4
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	str	r2, [r8, #28]
-	ldr	r2, [r10, #32]
-	adcs	r10, r1, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r11, r3, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r7, r7, r0
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r6, r6, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r5, r5, r0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r4, r0, #0
-	add	r0, sp, #48
-	bl	.LmulPv288x32(PLT)
-	add	r3, sp, #48
-	ldm	r3, {r0, r1, r2, r3}
-	ldr	r12, [sp, #84]
-	ldr	lr, [sp, #80]
-	adds	r0, r0, r9
-	ldr	r9, [sp, #76]
-	adcs	r1, r1, r10
-	adcs	r2, r2, r11
-	ldr	r11, [sp, #72]
-	adcs	r10, r3, r7
-	ldr	r7, [sp, #64]
-	ldr	r3, [sp, #68]
-	str	r0, [r8, #32]
-	str	r1, [r8, #36]
-	str	r2, [r8, #40]
-	str	r10, [r8, #44]
-	adcs	r0, r7, r6
-	str	r0, [r8, #48]
-	adcs	r0, r3, r5
-	str	r0, [r8, #52]
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [r8, #56]
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [r8, #60]
-	adcs	r0, lr, r4
-	adc	r1, r12, #0
-	str	r0, [r8, #64]
-	str	r1, [r8, #68]
-	add	sp, sp, #412
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end128:
-	.size	mcl_fpDbl_mulPre9L, .Lfunc_end128-mcl_fpDbl_mulPre9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre9L
-	.align	2
-	.type	mcl_fpDbl_sqrPre9L,%function
-mcl_fpDbl_sqrPre9L:                     @ @mcl_fpDbl_sqrPre9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#412
-	sub	sp, sp, #412
-	mov	r5, r1
-	mov	r4, r0
-	add	r0, sp, #368
-	ldr	r2, [r5]
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #404]
-	add	r11, sp, #368
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #400]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #396]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #392]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #388]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #384]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r0, r10, r11}
-	ldr	r1, [sp, #380]
-	ldr	r2, [r5, #4]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	str	r0, [r4]
-	add	r0, sp, #328
-	mov	r1, r5
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #348
-	add	r7, sp, #328
-	ldr	r9, [sp, #364]
-	ldr	r8, [sp, #360]
-	ldm	lr, {r6, r12, lr}
-	ldm	r7, {r0, r1, r2, r3, r7}
-	adds	r0, r0, r10
-	str	r0, [r4, #4]
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r10, r1, r11
-	mov	r1, r5
-	adcs	r11, r2, r0
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r2, [r5, #8]
-	adcs	r0, r3, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #288
-	bl	.LmulPv288x32(PLT)
-	add	r9, sp, #312
-	add	lr, sp, #288
-	ldm	r9, {r6, r7, r8, r9}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r0, r10
-	str	r0, [r4, #8]
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r10, r1, r11
-	mov	r1, r5
-	adcs	r11, r2, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r2, [r5, #12]
-	adcs	r0, r3, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #248
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #268
-	add	r7, sp, #248
-	ldr	r9, [sp, #284]
-	ldr	r8, [sp, #280]
-	ldm	lr, {r6, r12, lr}
-	ldm	r7, {r0, r1, r2, r3, r7}
-	adds	r0, r0, r10
-	str	r0, [r4, #12]
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r10, r1, r11
-	mov	r1, r5
-	adcs	r11, r2, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r2, [r5, #16]
-	adcs	r0, r3, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #208
-	bl	.LmulPv288x32(PLT)
-	add	r9, sp, #232
-	add	lr, sp, #208
-	ldm	r9, {r6, r7, r8, r9}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r0, r10
-	str	r0, [r4, #16]
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r10, r1, r11
-	mov	r1, r5
-	adcs	r11, r2, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r2, [r5, #20]
-	adcs	r0, r3, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #168
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #188
-	add	r7, sp, #168
-	ldr	r9, [sp, #204]
-	ldr	r8, [sp, #200]
-	ldm	lr, {r6, r12, lr}
-	ldm	r7, {r0, r1, r2, r3, r7}
-	adds	r0, r0, r10
-	str	r0, [r4, #20]
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r10, r1, r11
-	mov	r1, r5
-	adcs	r11, r2, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	ldr	r2, [r5, #24]
-	adcs	r0, r3, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #128
-	bl	.LmulPv288x32(PLT)
-	add	r9, sp, #152
-	add	lr, sp, #128
-	ldm	r9, {r6, r7, r8, r9}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r0, r10
-	str	r0, [r4, #24]
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r10, r1, r11
-	mov	r1, r5
-	adcs	r11, r2, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r2, [r5, #28]
-	adcs	r0, r3, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #88
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #124]
-	ldr	r2, [sp, #88]
-	ldr	r1, [sp, #92]
-	add	r12, sp, #96
-	ldr	lr, [sp, #116]
-	ldr	r6, [sp, #112]
-	ldr	r7, [sp, #108]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #120]
-	adds	r2, r2, r10
-	adcs	r10, r1, r11
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r12, {r0, r3, r12}
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r2, [r4, #28]
-	ldr	r2, [r5, #32]
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r8, r3, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r9, r12, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #48
-	bl	.LmulPv288x32(PLT)
-	add	r3, sp, #48
-	add	lr, sp, #72
-	ldm	r3, {r0, r1, r2, r3}
-	ldr	r12, [sp, #84]
-	adds	r0, r0, r10
-	adcs	r1, r1, r11
-	adcs	r2, r2, r8
-	ldm	lr, {r5, r8, lr}
-	ldr	r6, [sp, #68]
-	ldr	r7, [sp, #64]
-	adcs	r3, r3, r9
-	add	r9, r4, #32
-	stm	r9, {r0, r1, r2}
-	str	r3, [r4, #44]
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [r4, #48]
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [r4, #52]
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [r4, #56]
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [r4, #60]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	adc	r1, r12, #0
-	str	r0, [r4, #64]
-	str	r1, [r4, #68]
-	add	sp, sp, #412
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end129:
-	.size	mcl_fpDbl_sqrPre9L, .Lfunc_end129-mcl_fpDbl_sqrPre9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont9L
-	.align	2
-	.type	mcl_fp_mont9L,%function
-mcl_fp_mont9L:                          @ @mcl_fp_mont9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#804
-	sub	sp, sp, #804
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r6, [r3, #-4]
-	ldr	r2, [r2]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #760
-	str	r3, [sp, #76]           @ 4-byte Spill
-	str	r1, [sp, #68]           @ 4-byte Spill
-	mov	r4, r3
-	mov	r7, r1
-	str	r6, [sp, #72]           @ 4-byte Spill
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #764]
-	ldr	r5, [sp, #760]
-	mov	r1, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #768]
-	mul	r2, r5, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #772]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #784]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #780]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #776]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #720
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #756]
-	add	r11, sp, #724
-	ldr	r4, [sp, #736]
-	ldr	r9, [sp, #720]
-	mov	r1, r7
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #752]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #748]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #744]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #740]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r8, r10, r11}
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	add	r0, sp, #680
-	ldr	r2, [r6, #4]
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r9, r5
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	mov	r1, #0
-	add	lr, sp, #680
-	ldr	r9, [sp, #716]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r5, r8, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	ldr	r10, [sp, #712]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	ldr	r11, [sp, #708]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #704]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r7, r2, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adc	r8, r1, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r5, r5, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r8, r9
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r2, r5, r0
-	add	r0, sp, #640
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #676]
-	add	r10, sp, #640
-	ldr	r11, [sp, #660]
-	ldr	r7, [sp, #656]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #672]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #668]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #664]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldr	r2, [r6, #8]
-	ldr	r6, [sp, #68]           @ 4-byte Reload
-	add	r0, sp, #600
-	mov	r1, r6
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r5, r4
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #600
-	ldr	r4, [sp, #624]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #636]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #632]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #628]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r8, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r5, r5, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r8, r9
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r2, r5, r0
-	add	r0, sp, #560
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #596]
-	add	r10, sp, #560
-	ldr	r11, [sp, #580]
-	ldr	r7, [sp, #576]
-	mov	r1, r6
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #592]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #588]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #584]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, sp, #520
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r5, r4
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #520
-	ldr	r4, [sp, #544]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #556]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #552]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #548]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r8, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r5, r5, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r6, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r8, r9
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r2, r5, r0
-	add	r0, sp, #480
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #516]
-	add	r10, sp, #480
-	ldr	r11, [sp, #500]
-	ldr	r7, [sp, #496]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #512]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #508]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #504]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	add	r0, sp, #440
-	ldr	r2, [r6, #16]
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r5, r4
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #440
-	ldr	r4, [sp, #464]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #476]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #472]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #468]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r8, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r5, r5, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r8, r9
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r2, r5, r0
-	add	r0, sp, #400
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #436]
-	add	r10, sp, #400
-	ldr	r11, [sp, #420]
-	ldr	r7, [sp, #416]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #432]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #428]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #424]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldr	r2, [r6, #20]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	add	r0, sp, #360
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r5, r4
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #360
-	ldr	r4, [sp, #384]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #396]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #392]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #388]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r8, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r5, r5, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r6, lr
-	ldr	r6, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	mul	r2, r5, r6
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r8, r9
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #320
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #356]
-	add	r10, sp, #320
-	ldr	r11, [sp, #340]
-	ldr	r7, [sp, #336]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #352]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #348]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #344]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #280
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r5, r4
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #280
-	ldr	r4, [sp, #304]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #316]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #312]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #308]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r8, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r5, r5, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r5, r6
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	mov	r1, r6
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r8, r9
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #240
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #276]
-	add	r10, sp, #240
-	ldr	r11, [sp, #260]
-	ldr	r7, [sp, #256]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #272]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #268]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #264]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #200
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r5, r4
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #200
-	ldr	r4, [sp, #224]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #236]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #232]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #228]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r8, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r5, r5, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r8, r9
-	str	r0, [sp, #24]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r2, r5, r0
-	add	r0, sp, #160
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #196]
-	add	r10, sp, #160
-	ldr	r11, [sp, #184]
-	ldr	r6, [sp, #180]
-	ldr	r7, [sp, #176]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #192]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #120
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r5, r4
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r2, [sp, #120]
-	ldr	lr, [sp, #124]
-	ldr	r5, [sp, #128]
-	ldr	r12, [sp, #132]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r9, r0, r9
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r10, r0, r10
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	add	r7, sp, #136
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r11, r0, r11
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r4, r4, r2
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r9, r9, lr
-	adcs	r10, r10, r5
-	mul	r8, r4, r0
-	ldm	r7, {r0, r1, r2, r3, r6, r7}
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	adcs	r5, r5, r12
-	str	r5, [sp, #36]           @ 4-byte Spill
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adcs	r5, r5, r0
-	adcs	r0, r11, r1
-	ldr	r11, [sp, #76]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	mov	r1, r11
-	adcs	r0, r0, r2
-	mov	r2, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #72]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	add	r0, sp, #80
-	bl	.LmulPv288x32(PLT)
-	add	r3, sp, #80
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r4, r0
-	adcs	r0, r9, r1
-	ldr	r1, [sp, #96]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r7, r10, r2
-	str	r7, [sp, #40]           @ 4-byte Spill
-	adcs	r8, r0, r3
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r10, r5, r1
-	ldr	r1, [sp, #100]
-	adcs	r4, r0, r1
-	ldr	r1, [sp, #104]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r4, [sp, #44]           @ 4-byte Spill
-	adcs	r6, r0, r1
-	ldr	r1, [sp, #108]
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r6, [sp, #48]           @ 4-byte Spill
-	adcs	r12, r0, r1
-	ldr	r1, [sp, #112]
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	str	r12, [sp, #56]          @ 4-byte Spill
-	adcs	lr, r0, r1
-	ldr	r1, [sp, #116]
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	lr, [sp, #68]           @ 4-byte Spill
-	adcs	r5, r0, r1
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r5, [sp, #72]           @ 4-byte Spill
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	mov	r0, r11
-	ldmib	r0, {r2, r3, r11}
-	ldr	r1, [r0, #16]
-	ldr	r9, [r0]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [r0, #20]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [r0, #24]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [r0, #28]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	mov	r1, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	subs	r9, r0, r9
-	sbcs	r2, r7, r2
-	sbcs	r3, r8, r3
-	sbcs	r7, r10, r11
-	ldr	r11, [r1, #32]
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	sbcs	r1, r4, r1
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	sbcs	r4, r6, r4
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	sbcs	r12, r12, r6
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	sbcs	lr, lr, r6
-	sbcs	r11, r5, r11
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	sbc	r6, r5, #0
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	ands	r6, r6, #1
-	movne	r9, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	movne	r3, r8
-	str	r9, [r5]
-	movne	r2, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	cmp	r6, #0
-	movne	r7, r10
-	str	r2, [r5, #4]
-	str	r3, [r5, #8]
-	str	r7, [r5, #12]
-	movne	r1, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	str	r1, [r5, #16]
-	movne	r4, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r4, [r5, #20]
-	movne	r12, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r12, [r5, #24]
-	movne	lr, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	lr, [r5, #28]
-	movne	r11, r0
-	str	r11, [r5, #32]
-	add	sp, sp, #804
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end130:
-	.size	mcl_fp_mont9L, .Lfunc_end130-mcl_fp_mont9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF9L
-	.align	2
-	.type	mcl_fp_montNF9L,%function
-mcl_fp_montNF9L:                        @ @mcl_fp_montNF9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#804
-	sub	sp, sp, #804
-	add	r12, sp, #60
-	str	r2, [sp, #72]           @ 4-byte Spill
-	mov	r4, r3
-	mov	r7, r1
-	stm	r12, {r0, r1, r3}
-	add	r0, sp, #760
-	ldr	r6, [r3, #-4]
-	ldr	r2, [r2]
-	str	r6, [sp, #76]           @ 4-byte Spill
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #764]
-	ldr	r5, [sp, #760]
-	mov	r1, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #768]
-	mul	r2, r5, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #772]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #784]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #780]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #776]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #720
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #756]
-	add	r10, sp, #724
-	ldr	r6, [sp, #736]
-	ldr	r11, [sp, #720]
-	mov	r1, r7
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #752]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #748]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #744]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #740]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	add	r0, sp, #680
-	ldr	r2, [r4, #4]
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r11, r5
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #680
-	ldr	r11, [sp, #704]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #716]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	ldr	r9, [sp, #712]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	ldr	r10, [sp, #708]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r5, r1, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r7, r1, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r1, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	adds	r6, r6, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r5, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r8, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #640
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #676]
-	add	r10, sp, #644
-	ldr	r7, [sp, #656]
-	ldr	r11, [sp, #640]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #672]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #668]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #664]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #660]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [r4, #8]
-	add	r0, sp, #600
-	mov	r1, r5
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r6, r11
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #600
-	ldr	r11, [sp, #624]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r8, [sp, #636]
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #632]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #628]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r6, r4, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r8, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #560
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #596]
-	add	r10, sp, #564
-	ldr	r7, [sp, #576]
-	ldr	r11, [sp, #560]
-	mov	r1, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #592]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #588]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #584]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, sp, #520
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r6, r11
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #520
-	ldr	r11, [sp, #544]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r6, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r8, [sp, #556]
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #552]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #548]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r5, r0, r1
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r6, r6, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r5, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r8, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #480
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #516]
-	add	r10, sp, #484
-	ldr	r7, [sp, #496]
-	ldr	r11, [sp, #480]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #512]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #508]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #504]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #500]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	add	r0, sp, #440
-	ldr	r2, [r5, #16]
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r6, r11
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #440
-	ldr	r11, [sp, #464]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r8, [sp, #476]
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #472]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #468]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r6, r4, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r4, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r6, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r8, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #400
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #436]
-	add	r10, sp, #404
-	ldr	r7, [sp, #416]
-	ldr	r11, [sp, #400]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #432]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #428]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #424]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #420]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r2, [r5, #20]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	add	r0, sp, #360
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r6, r11
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #360
-	ldr	r11, [sp, #384]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r8, [sp, #396]
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #392]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #388]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r6, r5, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r6, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r8, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #320
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #356]
-	add	r10, sp, #324
-	ldr	r7, [sp, #336]
-	ldr	r11, [sp, #320]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #352]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #348]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #344]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #340]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #280
-	mov	r1, r5
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r6, r11
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #280
-	ldr	r11, [sp, #304]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r8, [sp, #316]
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #312]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #308]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r6, r4, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r8, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #240
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #276]
-	add	r10, sp, #244
-	ldr	r7, [sp, #256]
-	ldr	r11, [sp, #240]
-	mov	r1, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #272]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #268]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #264]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #260]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #200
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r6, r11
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #200
-	ldr	r11, [sp, #224]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r8, [sp, #236]
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #232]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #228]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r5, r5, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r6, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adcs	r0, r7, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r8, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	mul	r2, r5, r0
-	add	r0, sp, #160
-	bl	.LmulPv288x32(PLT)
-	ldr	r0, [sp, #196]
-	add	r10, sp, #164
-	ldr	r4, [sp, #184]
-	ldr	r6, [sp, #180]
-	ldr	r7, [sp, #176]
-	ldr	r11, [sp, #160]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #192]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #120
-	bl	.LmulPv288x32(PLT)
-	adds	r0, r5, r11
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #120
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	add	r8, sp, #136
-	adcs	r1, r1, r9
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r10, r1, r10
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r11, r1, r7
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adc	r1, r1, r2
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldm	lr, {r2, r12, lr}
-	ldr	r4, [sp, #132]
-	adds	r5, r0, r2
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	mul	r9, r5, r0
-	ldm	r8, {r0, r1, r2, r3, r6, r8}
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	adcs	r7, r7, r12
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adcs	r7, r10, lr
-	ldr	r10, [sp, #68]          @ 4-byte Reload
-	adcs	r11, r11, r4
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	str	r7, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r4, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mov	r2, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r4, r0, r3
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #76]           @ 4-byte Spill
-	adc	r0, r8, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	add	r0, sp, #80
-	bl	.LmulPv288x32(PLT)
-	add	r3, sp, #80
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r5, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r9, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #96]
-	str	r9, [sp, #32]           @ 4-byte Spill
-	adcs	r2, r0, r2
-	adcs	r0, r11, r3
-	str	r2, [sp, #44]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r1, [sp, #100]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r7, [sp, #48]           @ 4-byte Spill
-	adcs	r6, r0, r1
-	ldr	r1, [sp, #104]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	lr, r0, r1
-	ldr	r1, [sp, #108]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	lr, [sp, #56]           @ 4-byte Spill
-	adcs	r4, r4, r1
-	ldr	r1, [sp, #112]
-	str	r4, [sp, #64]           @ 4-byte Spill
-	adcs	r5, r0, r1
-	ldr	r1, [sp, #116]
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r5, [sp, #76]           @ 4-byte Spill
-	adc	r12, r0, r1
-	mov	r0, r10
-	ldr	r1, [r0, #16]
-	ldr	r8, [r0]
-	ldr	r11, [r0, #4]
-	ldr	r10, [r0, #8]
-	ldr	r3, [r0, #12]
-	str	r12, [sp, #72]          @ 4-byte Spill
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [r0, #20]
-	subs	r8, r9, r8
-	ldr	r9, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [r0, #24]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [r0, #28]
-	ldr	r0, [r0, #32]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	sbcs	r1, r2, r11
-	sbcs	r2, r9, r10
-	mov	r10, r6
-	sbcs	r3, r7, r3
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	sbcs	r7, r6, r7
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	sbcs	r11, lr, r6
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	sbcs	lr, r4, r6
-	ldr	r4, [sp, #40]           @ 4-byte Reload
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	sbc	r0, r12, r0
-	asr	r12, r0, #31
-	cmp	r12, #0
-	movlt	r8, r5
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	movlt	r1, r6
-	movlt	r2, r9
-	cmp	r12, #0
-	movlt	r7, r10
-	str	r8, [r5]
-	str	r1, [r5, #4]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r2, [r5, #8]
-	movlt	r3, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r3, [r5, #12]
-	str	r7, [r5, #16]
-	movlt	r11, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	cmp	r12, #0
-	str	r11, [r5, #20]
-	movlt	lr, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	lr, [r5, #24]
-	movlt	r4, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r4, [r5, #28]
-	movlt	r0, r1
-	str	r0, [r5, #32]
-	add	sp, sp, #804
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end131:
-	.size	mcl_fp_montNF9L, .Lfunc_end131-mcl_fp_montNF9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed9L
-	.align	2
-	.type	mcl_fp_montRed9L,%function
-mcl_fp_montRed9L:                       @ @mcl_fp_montRed9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#476
-	sub	sp, sp, #476
-	mov	r5, r2
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r1, #4]
-	ldr	r4, [r1]
-	ldr	r11, [r1, #32]
-	ldr	r10, [r1, #36]
-	ldr	r0, [r5]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r5, #4]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #12]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r5, #8]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #16]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r5, #12]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r1, #20]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r5, #16]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r1, #24]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r5, #20]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #28]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r5, #24]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r5, #-4]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	mul	r2, r4, r0
-	ldr	r0, [r5, #28]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r5, #32]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r1, #64]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r1, #68]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	mov	r1, r5
-	str	r0, [sp, #8]            @ 4-byte Spill
-	add	r0, sp, #432
-	bl	.LmulPv288x32(PLT)
-	ldr	r1, [sp, #432]
-	add	lr, sp, #436
-	ldr	r9, [sp, #468]
-	ldr	r8, [sp, #464]
-	ldm	lr, {r0, r2, r3, r6, r7, r12, lr}
-	adds	r1, r4, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	mul	r2, r4, r7
-	adcs	r0, r0, r12
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r11, r8
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r9, r10, r9
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #4]            @ 4-byte Spill
-	add	r0, sp, #392
-	bl	.LmulPv288x32(PLT)
-	add	r11, sp, #408
-	add	r6, sp, #392
-	ldr	r12, [sp, #428]
-	ldr	lr, [sp, #424]
-	ldr	r8, [sp, #420]
-	ldm	r11, {r2, r10, r11}
-	ldm	r6, {r0, r1, r3, r6}
-	adds	r0, r4, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r0, r1
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r4, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r11, r0, r11
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r0, r9, lr
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	add	r0, sp, #352
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #372
-	add	r7, sp, #352
-	ldr	r10, [sp, #388]
-	ldr	r9, [sp, #384]
-	ldm	lr, {r6, r12, lr}
-	ldm	r7, {r0, r1, r2, r3, r7}
-	adds	r0, r4, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r0, r1
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r0, r2
-	mul	r2, r4, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r11, r6
-	mov	r11, r8
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	add	r0, sp, #312
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #332
-	ldr	r7, [sp, #348]
-	add	r9, sp, #320
-	ldm	lr, {r6, r8, r12, lr}
-	ldr	r1, [sp, #312]
-	ldr	r3, [sp, #316]
-	ldm	r9, {r0, r2, r9}
-	adds	r1, r4, r1
-	mov	r4, r11
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r10, r1, r3
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	add	r0, sp, #272
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #272
-	ldr	r11, [sp, #308]
-	ldr	r9, [sp, #304]
-	ldm	lr, {r0, r1, r2, r3, r6, r7, r12, lr}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r0, r2
-	mul	r2, r8, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	mov	r6, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, sp, #232
-	bl	.LmulPv288x32(PLT)
-	add	r11, sp, #256
-	add	lr, sp, #232
-	ldm	r11, {r7, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r8, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r0, r1
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r0, r2
-	mul	r2, r4, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, sp, #192
-	bl	.LmulPv288x32(PLT)
-	add	lr, sp, #212
-	add	r7, sp, #192
-	ldr	r9, [sp, #228]
-	ldr	r8, [sp, #224]
-	ldm	lr, {r6, r12, lr}
-	ldm	r7, {r0, r1, r2, r3, r7}
-	adds	r0, r4, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r0, r1
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r10, r0, r2
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r11, r0, r3
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r7, r0, r7
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r6, r0, r6
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	mul	r2, r4, r8
-	adcs	r9, r0, r9
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #152
-	bl	.LmulPv288x32(PLT)
-	add	r12, sp, #152
-	ldm	r12, {r0, r1, r3, r12}
-	ldr	lr, [sp, #188]
-	adds	r0, r4, r0
-	adcs	r4, r10, r1
-	ldr	r1, [sp, #168]
-	adcs	r11, r11, r3
-	mul	r2, r4, r8
-	ldr	r3, [sp, #180]
-	adcs	r0, r7, r12
-	ldr	r7, [sp, #176]
-	ldr	r12, [sp, #184]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #172]
-	adcs	r10, r6, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r8, r1, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r7, r0, r7
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r9, r12
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #112
-	bl	.LmulPv288x32(PLT)
-	add	r3, sp, #112
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r4, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r6, r11, r1
-	ldr	r1, [sp, #128]
-	adcs	r9, r0, r2
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r11, r10, r3
-	adcs	lr, r8, r1
-	ldr	r1, [sp, #132]
-	str	r11, [sp, #28]          @ 4-byte Spill
-	str	lr, [sp, #32]           @ 4-byte Spill
-	adcs	r7, r7, r1
-	ldr	r1, [sp, #136]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	adcs	r8, r0, r1
-	ldr	r1, [sp, #140]
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	str	r8, [sp, #48]           @ 4-byte Spill
-	adcs	r4, r0, r1
-	ldr	r1, [sp, #144]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r4, [sp, #52]           @ 4-byte Spill
-	adcs	r5, r0, r1
-	ldr	r1, [sp, #148]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r5, [sp, #108]          @ 4-byte Spill
-	adcs	r12, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adc	r10, r0, #0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	subs	r2, r6, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	sbcs	r3, r9, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	sbcs	r1, r11, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	sbcs	r11, lr, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	sbcs	r0, r7, r0
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	sbcs	lr, r8, r7
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	sbcs	r8, r4, r7
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	sbcs	r5, r12, r5
-	sbc	r7, r10, #0
-	ands	r7, r7, #1
-	movne	r2, r6
-	ldr	r6, [sp, #104]          @ 4-byte Reload
-	movne	r3, r9
-	str	r2, [r6]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r3, [r6, #4]
-	movne	r1, r2
-	cmp	r7, #0
-	str	r1, [r6, #8]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	movne	r11, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r11, [r6, #12]
-	movne	r0, r1
-	str	r0, [r6, #16]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	movne	lr, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	cmp	r7, #0
-	movne	r5, r12
-	str	lr, [r6, #20]
-	movne	r8, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	str	r8, [r6, #24]
-	movne	r4, r0
-	str	r4, [r6, #28]
-	str	r5, [r6, #32]
-	add	sp, sp, #476
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end132:
-	.size	mcl_fp_montRed9L, .Lfunc_end132-mcl_fp_montRed9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre9L
-	.align	2
-	.type	mcl_fp_addPre9L,%function
-mcl_fp_addPre9L:                        @ @mcl_fp_addPre9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	ldm	r1, {r3, r12, lr}
-	ldr	r9, [r1, #12]
-	ldmib	r2, {r5, r6, r7}
-	ldr	r4, [r2, #16]
-	ldr	r8, [r2]
-	ldr	r11, [r2, #28]
-	str	r4, [sp, #12]           @ 4-byte Spill
-	ldr	r4, [r2, #20]
-	adds	r10, r8, r3
-	adcs	r5, r5, r12
-	ldr	r12, [r1, #32]
-	ldr	r8, [sp, #12]           @ 4-byte Reload
-	str	r10, [r0]
-	adcs	lr, r6, lr
-	ldr	r6, [r1, #20]
-	adcs	r7, r7, r9
-	str	r4, [sp, #4]            @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	ldr	r2, [r2, #32]
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	str	r4, [sp]                @ 4-byte Spill
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #28]
-	ldr	r4, [r1, #24]
-	ldr	r1, [r1, #16]
-	adcs	r1, r8, r1
-	adcs	r6, r3, r6
-	ldr	r3, [sp]                @ 4-byte Reload
-	stmib	r0, {r5, lr}
-	str	r7, [r0, #12]
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r6, [r0, #20]
-	adcs	r4, r3, r4
-	adcs	r2, r11, r2
-	str	r4, [r0, #24]
-	adcs	r1, r1, r12
-	str	r2, [r0, #28]
-	str	r1, [r0, #32]
-	mov	r0, #0
-	adc	r0, r0, #0
-	add	sp, sp, #16
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end133:
-	.size	mcl_fp_addPre9L, .Lfunc_end133-mcl_fp_addPre9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre9L
-	.align	2
-	.type	mcl_fp_subPre9L,%function
-mcl_fp_subPre9L:                        @ @mcl_fp_subPre9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#20
-	sub	sp, sp, #20
-	ldr	r3, [r2, #8]
-	add	lr, r1, #16
-	ldr	r11, [r2, #4]
-	ldr	r10, [r2, #12]
-	ldr	r4, [r2]
-	str	r3, [sp]                @ 4-byte Spill
-	ldr	r3, [r2, #16]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [r2, #20]
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [r2, #24]
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldmib	r1, {r5, r6, r7}
-	ldm	lr, {r3, r12, lr}
-	ldr	r9, [r1]
-	ldr	r8, [r1, #28]
-	subs	r4, r9, r4
-	ldr	r9, [r2, #32]
-	ldr	r2, [sp]                @ 4-byte Reload
-	sbcs	r11, r5, r11
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	sbcs	r6, r6, r2
-	sbcs	r7, r7, r10
-	ldr	r10, [r1, #32]
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	sbcs	r3, r3, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	sbcs	r2, r12, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	stm	r0, {r4, r11}
-	str	r6, [r0, #8]
-	str	r7, [r0, #12]
-	str	r3, [r0, #16]
-	str	r2, [r0, #20]
-	sbcs	r1, lr, r1
-	sbcs	r5, r8, r5
-	str	r1, [r0, #24]
-	sbcs	r1, r10, r9
-	str	r5, [r0, #28]
-	str	r1, [r0, #32]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	add	sp, sp, #20
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end134:
-	.size	mcl_fp_subPre9L, .Lfunc_end134-mcl_fp_subPre9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_9L
-	.align	2
-	.type	mcl_fp_shr1_9L,%function
-mcl_fp_shr1_9L:                         @ @mcl_fp_shr1_9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, lr}
-	push	{r4, r5, r6, r7, r8, lr}
-	add	r12, r1, #16
-	ldr	r2, [r1, #8]
-	ldr	lr, [r1, #12]
-	ldm	r12, {r4, r5, r6, r8, r12}
-	ldm	r1, {r1, r3}
-	lsrs	r7, r3, #1
-	rrx	r1, r1
-	str	r1, [r0]
-	lsr	r1, r3, #1
-	orr	r1, r1, r2, lsl #31
-	str	r1, [r0, #4]
-	lsrs	r1, lr, #1
-	rrx	r1, r2
-	str	r1, [r0, #8]
-	lsr	r1, lr, #1
-	orr	r1, r1, r4, lsl #31
-	str	r1, [r0, #12]
-	lsrs	r1, r5, #1
-	rrx	r1, r4
-	str	r1, [r0, #16]
-	lsr	r1, r5, #1
-	orr	r1, r1, r6, lsl #31
-	str	r1, [r0, #20]
-	lsrs	r1, r8, #1
-	rrx	r1, r6
-	str	r1, [r0, #24]
-	lsr	r1, r8, #1
-	orr	r1, r1, r12, lsl #31
-	str	r1, [r0, #28]
-	lsr	r1, r12, #1
-	str	r1, [r0, #32]
-	pop	{r4, r5, r6, r7, r8, lr}
-	mov	pc, lr
-.Lfunc_end135:
-	.size	mcl_fp_shr1_9L, .Lfunc_end135-mcl_fp_shr1_9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add9L
-	.align	2
-	.type	mcl_fp_add9L,%function
-mcl_fp_add9L:                           @ @mcl_fp_add9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	ldm	r1, {r12, lr}
-	ldr	r5, [r2]
-	ldr	r9, [r1, #8]
-	ldr	r8, [r1, #12]
-	ldmib	r2, {r4, r6, r7}
-	adds	r12, r5, r12
-	ldr	r5, [r1, #24]
-	adcs	lr, r4, lr
-	ldr	r4, [r1, #20]
-	str	r12, [sp, #8]           @ 4-byte Spill
-	adcs	r10, r6, r9
-	ldr	r6, [r1, #16]
-	adcs	r9, r7, r8
-	ldr	r7, [r2, #16]
-	str	r10, [sp, #4]           @ 4-byte Spill
-	adcs	r6, r7, r6
-	ldr	r7, [r2, #20]
-	adcs	r7, r7, r4
-	ldr	r4, [r2, #24]
-	adcs	r11, r4, r5
-	ldr	r5, [r1, #28]
-	ldr	r4, [r2, #28]
-	ldr	r1, [r1, #32]
-	ldr	r2, [r2, #32]
-	adcs	r8, r4, r5
-	adcs	r4, r2, r1
-	mov	r2, lr
-	add	r1, r0, #16
-	str	r4, [r0, #32]
-	str	r12, [r0]
-	stmib	r0, {r2, r10}
-	str	r9, [r0, #12]
-	stm	r1, {r6, r7, r11}
-	mov	r1, #0
-	str	r8, [r0, #28]
-	adc	r1, r1, #0
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r3, {r1, r5, lr}
-	ldr	r10, [sp, #8]           @ 4-byte Reload
-	ldr	r12, [r3, #12]
-	subs	r1, r10, r1
-	str	r1, [sp, #8]            @ 4-byte Spill
-	sbcs	r1, r2, r5
-	ldr	r5, [r3, #20]
-	str	r1, [sp]                @ 4-byte Spill
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	sbcs	r2, r1, lr
-	ldr	r1, [r3, #16]
-	sbcs	r12, r9, r12
-	sbcs	r1, r6, r1
-	ldr	r6, [r3, #24]
-	sbcs	r5, r7, r5
-	ldr	r7, [r3, #28]
-	ldr	r3, [r3, #32]
-	sbcs	r6, r11, r6
-	sbcs	r7, r8, r7
-	sbcs	r3, r4, r3
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	sbc	r4, r4, #0
-	tst	r4, #1
-	bne	.LBB136_2
-@ BB#1:                                 @ %nocarry
-	str	r3, [r0, #32]
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	str	r3, [r0]
-	ldr	r3, [sp]                @ 4-byte Reload
-	str	r3, [r0, #4]
-	str	r2, [r0, #8]
-	str	r12, [r0, #12]
-	add	r0, r0, #16
-	stm	r0, {r1, r5, r6, r7}
-.LBB136_2:                              @ %carry
-	add	sp, sp, #16
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end136:
-	.size	mcl_fp_add9L, .Lfunc_end136-mcl_fp_add9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF9L
-	.align	2
-	.type	mcl_fp_addNF9L,%function
-mcl_fp_addNF9L:                         @ @mcl_fp_addNF9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#52
-	sub	sp, sp, #52
-	ldr	r9, [r1]
-	ldmib	r1, {r8, lr}
-	ldr	r5, [r2]
-	ldr	r12, [r1, #12]
-	ldmib	r2, {r4, r6, r7}
-	ldr	r10, [r3, #4]
-	adds	r5, r5, r9
-	adcs	r9, r4, r8
-	ldr	r4, [r1, #16]
-	ldr	r8, [r1, #20]
-	str	r5, [sp, #16]           @ 4-byte Spill
-	ldr	r5, [r1, #24]
-	adcs	r11, r6, lr
-	ldr	lr, [sp, #16]           @ 4-byte Reload
-	str	r9, [sp, #28]           @ 4-byte Spill
-	adcs	r12, r7, r12
-	ldr	r7, [r2, #16]
-	str	r12, [sp, #32]          @ 4-byte Spill
-	adcs	r6, r7, r4
-	ldr	r7, [r2, #20]
-	str	r6, [sp, #36]           @ 4-byte Spill
-	adcs	r4, r7, r8
-	ldr	r7, [r2, #24]
-	ldr	r8, [r3]
-	str	r4, [sp, #40]           @ 4-byte Spill
-	adcs	r7, r7, r5
-	ldr	r5, [r2, #28]
-	ldr	r2, [r2, #32]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r1, #28]
-	ldr	r1, [r1, #32]
-	adcs	r7, r5, r7
-	ldr	r5, [r3, #8]
-	adc	r1, r2, r1
-	ldr	r2, [r3, #16]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r3, #12]
-	subs	r8, lr, r8
-	str	r1, [sp, #24]           @ 4-byte Spill
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r3, #20]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r3, #24]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r3, #28]
-	ldr	r3, [r3, #32]
-	str	r3, [sp]                @ 4-byte Spill
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	str	r2, [sp, #20]           @ 4-byte Spill
-	sbcs	r2, r9, r10
-	sbcs	r5, r11, r5
-	sbcs	r7, r12, r7
-	sbcs	r12, r6, r3
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	sbcs	r6, r4, r3
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	sbcs	r9, r4, r3
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	sbcs	r10, r3, r4
-	ldr	r3, [sp]                @ 4-byte Reload
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	sbc	r3, r1, r3
-	asr	r1, r3, #31
-	cmp	r1, #0
-	movlt	r8, lr
-	movlt	r2, r4
-	movlt	r5, r11
-	cmp	r1, #0
-	str	r8, [r0]
-	str	r2, [r0, #4]
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r5, [r0, #8]
-	movlt	r7, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	movlt	r12, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r12, [r0, #16]
-	movlt	r6, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r6, [r0, #20]
-	movlt	r9, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r9, [r0, #24]
-	movlt	r10, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r10, [r0, #28]
-	movlt	r3, r1
-	str	r3, [r0, #32]
-	add	sp, sp, #52
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end137:
-	.size	mcl_fp_addNF9L, .Lfunc_end137-mcl_fp_addNF9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub9L
-	.align	2
-	.type	mcl_fp_sub9L,%function
-mcl_fp_sub9L:                           @ @mcl_fp_sub9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#24
-	sub	sp, sp, #24
-	ldm	r2, {r12, lr}
-	ldr	r5, [r1]
-	ldr	r8, [r2, #8]
-	ldr	r9, [r2, #12]
-	ldmib	r1, {r4, r6, r7}
-	subs	r12, r5, r12
-	ldr	r5, [r2, #24]
-	sbcs	lr, r4, lr
-	ldr	r4, [r2, #20]
-	sbcs	r8, r6, r8
-	ldr	r6, [r2, #16]
-	sbcs	r9, r7, r9
-	ldr	r7, [r1, #16]
-	sbcs	r10, r7, r6
-	ldr	r7, [r1, #20]
-	ldr	r6, [r1, #28]
-	sbcs	r7, r7, r4
-	ldr	r4, [r1, #24]
-	ldr	r1, [r1, #32]
-	sbcs	r4, r4, r5
-	ldr	r5, [r2, #28]
-	ldr	r2, [r2, #32]
-	sbcs	r5, r6, r5
-	sbcs	r1, r1, r2
-	add	r2, r0, #8
-	str	r1, [r0, #32]
-	stm	r0, {r12, lr}
-	stm	r2, {r8, r9, r10}
-	mov	r2, #0
-	str	r7, [r0, #20]
-	str	r4, [r0, #24]
-	str	r5, [r0, #28]
-	sbc	r2, r2, #0
-	tst	r2, #1
-	beq	.LBB138_2
-@ BB#1:                                 @ %carry
-	ldr	r2, [r3, #32]
-	ldr	r6, [r3, #4]
-	ldr	r11, [r3, #12]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [r3, #8]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r3, #16]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r3, #20]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r3, #24]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r3, #28]
-	ldr	r3, [r3]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [sp]                @ 4-byte Reload
-	adds	r3, r3, r12
-	adcs	r6, r6, lr
-	adcs	r8, r2, r8
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	adcs	r12, r11, r9
-	adcs	lr, r2, r10
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r7, r2, r7
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	adcs	r4, r2, r4
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	stm	r0, {r3, r6, r8, r12, lr}
-	str	r7, [r0, #20]
-	str	r4, [r0, #24]
-	adcs	r5, r2, r5
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r5, [r0, #28]
-	adc	r1, r2, r1
-	str	r1, [r0, #32]
-.LBB138_2:                              @ %nocarry
-	add	sp, sp, #24
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end138:
-	.size	mcl_fp_sub9L, .Lfunc_end138-mcl_fp_sub9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF9L
-	.align	2
-	.type	mcl_fp_subNF9L,%function
-mcl_fp_subNF9L:                         @ @mcl_fp_subNF9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#52
-	sub	sp, sp, #52
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r1, #32]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldm	r2, {r6, r8}
-	ldr	r7, [r2, #8]
-	ldr	r5, [r2, #16]
-	ldr	r4, [r1, #16]
-	ldr	r11, [r1, #20]
-	ldr	r10, [r1, #24]
-	ldr	r9, [r1, #28]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r2, #12]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	ldr	r2, [r2, #28]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldm	r1, {r1, r2, r12, lr}
-	subs	r6, r1, r6
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	sbcs	r7, r2, r8
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r6, [sp, #12]           @ 4-byte Spill
-	str	r7, [sp, #16]           @ 4-byte Spill
-	sbcs	r8, r12, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r8, [sp, #20]           @ 4-byte Spill
-	sbcs	r12, lr, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r5, r4, r5
-	str	r12, [sp, #32]          @ 4-byte Spill
-	str	r5, [sp, #36]           @ 4-byte Spill
-	sbcs	lr, r11, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	ldr	r11, [r3, #16]
-	str	lr, [sp, #40]           @ 4-byte Spill
-	sbcs	r4, r10, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	ldr	r10, [r3, #20]
-	str	r4, [sp, #24]           @ 4-byte Spill
-	sbcs	r9, r9, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbc	r1, r2, r1
-	ldr	r2, [r3, #24]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [r3, #32]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [r3, #4]
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [r3, #8]
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [r3, #12]
-	str	r1, [sp]                @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	ldr	r3, [r3]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adds	r3, r6, r3
-	adcs	r6, r7, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r7, r8, r1
-	ldr	r1, [sp]                @ 4-byte Reload
-	adcs	r1, r12, r1
-	adcs	r12, r5, r11
-	adcs	r5, lr, r10
-	ldr	r10, [sp, #12]          @ 4-byte Reload
-	adcs	lr, r4, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	adcs	r8, r9, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adc	r11, r4, r2
-	asr	r2, r4, #31
-	cmp	r2, #0
-	movge	r3, r10
-	str	r3, [r0]
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	movge	r6, r3
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	str	r6, [r0, #4]
-	movge	r7, r3
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	cmp	r2, #0
-	str	r7, [r0, #8]
-	movge	r1, r3
-	str	r1, [r0, #12]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	movge	r12, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r12, [r0, #16]
-	movge	r5, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	cmp	r2, #0
-	movge	r8, r9
-	movge	r11, r4
-	str	r5, [r0, #20]
-	movge	lr, r1
-	str	lr, [r0, #24]
-	str	r8, [r0, #28]
-	str	r11, [r0, #32]
-	add	sp, sp, #52
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end139:
-	.size	mcl_fp_subNF9L, .Lfunc_end139-mcl_fp_subNF9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add9L
-	.align	2
-	.type	mcl_fpDbl_add9L,%function
-mcl_fpDbl_add9L:                        @ @mcl_fpDbl_add9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#88
-	sub	sp, sp, #88
-	ldm	r1, {r7, r9}
-	ldr	r8, [r1, #8]
-	ldr	lr, [r1, #12]
-	ldm	r2, {r4, r5, r6, r12}
-	add	r11, r2, #16
-	adds	r4, r4, r7
-	ldr	r7, [r2, #28]
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [r2, #64]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	str	r4, [sp, #76]           @ 4-byte Spill
-	ldr	r4, [r2, #68]
-	str	r4, [sp, #80]           @ 4-byte Spill
-	adcs	r4, r5, r9
-	str	r4, [sp, #32]           @ 4-byte Spill
-	adcs	r4, r6, r8
-	str	r4, [sp, #28]           @ 4-byte Spill
-	ldr	r4, [r2, #32]
-	adcs	r9, r12, lr
-	add	lr, r1, #16
-	str	r4, [sp, #48]           @ 4-byte Spill
-	ldr	r4, [r2, #36]
-	str	r4, [sp, #52]           @ 4-byte Spill
-	ldr	r4, [r2, #40]
-	str	r4, [sp, #56]           @ 4-byte Spill
-	ldr	r4, [r2, #44]
-	str	r4, [sp, #60]           @ 4-byte Spill
-	ldr	r4, [r2, #48]
-	str	r4, [sp, #64]           @ 4-byte Spill
-	ldr	r4, [r2, #52]
-	str	r4, [sp, #68]           @ 4-byte Spill
-	ldr	r4, [r2, #56]
-	str	r4, [sp, #72]           @ 4-byte Spill
-	ldr	r4, [r2, #60]
-	str	r4, [sp, #84]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r11}
-	ldr	r2, [r1, #64]
-	ldr	r8, [r1, #32]
-	ldr	r4, [r1, #36]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #40]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #44]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r10, [sp, #36]          @ 4-byte Reload
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	str	r10, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r5, r1
-	adcs	r2, r6, r2
-	str	r7, [r0, #8]
-	str	r9, [r0, #12]
-	str	r1, [r0, #16]
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r11, r12
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r2, r2, lr
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r2, r4
-	ldr	r2, [sp]                @ 4-byte Reload
-	adcs	r5, r1, r2
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	r5, [sp, #56]           @ 4-byte Spill
-	adcs	lr, r1, r2
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	lr, [sp, #60]           @ 4-byte Spill
-	adcs	r12, r1, r2
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r12, [sp, #64]          @ 4-byte Spill
-	adcs	r7, r1, r2
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r7, [sp, #68]           @ 4-byte Spill
-	adcs	r8, r1, r2
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r8, [sp, #72]           @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r6, r1, r2
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	adcs	r9, r1, r2
-	mov	r2, #0
-	adc	r1, r2, #0
-	str	r9, [sp, #76]           @ 4-byte Spill
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldmib	r3, {r2, r11}
-	ldr	r1, [r3, #12]
-	ldr	r10, [r3]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	subs	r10, r4, r10
-	sbcs	r2, r5, r2
-	sbcs	r11, lr, r11
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	ldr	r5, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	ldr	r3, [r3, #32]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	sbcs	r1, r12, r1
-	sbcs	r12, r7, r5
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	sbcs	lr, r8, r7
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	mov	r8, r6
-	sbcs	r7, r5, r7
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	sbcs	r5, r6, r5
-	sbcs	r6, r9, r3
-	ldr	r3, [sp, #80]           @ 4-byte Reload
-	sbc	r9, r3, #0
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	ands	r9, r9, #1
-	movne	r10, r4
-	str	r10, [r0, #36]
-	movne	r2, r3
-	str	r2, [r0, #40]
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	movne	r11, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	cmp	r9, #0
-	str	r11, [r0, #44]
-	movne	r1, r2
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	movne	r12, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r12, [r0, #52]
-	movne	lr, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	cmp	r9, #0
-	movne	r5, r8
-	str	lr, [r0, #56]
-	movne	r7, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r7, [r0, #60]
-	str	r5, [r0, #64]
-	movne	r6, r1
-	str	r6, [r0, #68]
-	add	sp, sp, #88
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end140:
-	.size	mcl_fpDbl_add9L, .Lfunc_end140-mcl_fpDbl_add9L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub9L
-	.align	2
-	.type	mcl_fpDbl_sub9L,%function
-mcl_fpDbl_sub9L:                        @ @mcl_fpDbl_sub9L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#80
-	sub	sp, sp, #80
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #64]           @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldm	r2, {r5, r6, r7, r8}
-	ldr	r4, [r2, #16]
-	ldr	r10, [r2, #24]
-	str	r4, [sp, #20]           @ 4-byte Spill
-	ldr	r4, [r2, #20]
-	ldr	r2, [r2, #28]
-	str	r4, [sp, #24]           @ 4-byte Spill
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldm	r1, {r2, r12, lr}
-	ldr	r4, [r1, #12]
-	ldr	r11, [r1, #60]
-	subs	r9, r2, r5
-	ldr	r2, [r1, #64]
-	sbcs	r5, r12, r6
-	sbcs	r6, lr, r7
-	add	lr, r1, #16
-	ldr	r7, [r1, #36]
-	sbcs	r4, r4, r8
-	ldr	r8, [r1, #32]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [r1, #40]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #44]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	str	r9, [r0]
-	stmib	r0, {r5, r6}
-	str	r4, [r0, #12]
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	sbcs	r1, r1, r5
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	sbcs	r2, r2, r4
-	str	r1, [r0, #16]
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	sbcs	r1, r12, r10
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbcs	r2, lr, r2
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	sbcs	r1, r8, r1
-	str	r1, [r0, #32]
-	sbcs	r1, r7, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	sbcs	r4, r7, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	sbcs	r9, r7, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	sbcs	r12, r7, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r12, [sp, #56]          @ 4-byte Spill
-	sbcs	lr, r7, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	str	lr, [sp, #60]           @ 4-byte Spill
-	sbcs	r10, r5, r2
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	str	r10, [sp, #64]          @ 4-byte Spill
-	sbcs	r6, r11, r2
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	str	r6, [sp, #68]           @ 4-byte Spill
-	sbcs	r8, r7, r2
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	str	r8, [sp, #44]           @ 4-byte Spill
-	sbcs	r11, r5, r2
-	mov	r2, #0
-	sbc	r2, r2, #0
-	str	r11, [sp, #76]          @ 4-byte Spill
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldr	r2, [r3, #32]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldmib	r3, {r5, r7}
-	ldr	r2, [r3, #12]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [r3, #16]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r3, #20]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [r3, #24]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r3, #28]
-	ldr	r3, [r3]
-	adds	r3, r1, r3
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r5, r4, r5
-	adcs	r1, r9, r7
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	adcs	r2, r12, r2
-	adcs	r12, lr, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	lr, r10, r7
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adcs	r10, r6, r7
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adcs	r6, r8, r6
-	adc	r11, r11, r7
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	ands	r8, r7, #1
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	moveq	r5, r4
-	moveq	r1, r9
-	moveq	r3, r7
-	cmp	r8, #0
-	str	r3, [r0, #36]
-	str	r5, [r0, #40]
-	str	r1, [r0, #44]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r2, [r0, #48]
-	moveq	r12, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r12, [r0, #52]
-	moveq	lr, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	cmp	r8, #0
-	str	lr, [r0, #56]
-	moveq	r10, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r10, [r0, #60]
-	moveq	r6, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r6, [r0, #64]
-	moveq	r11, r1
-	str	r11, [r0, #68]
-	add	sp, sp, #80
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end141:
-	.size	mcl_fpDbl_sub9L, .Lfunc_end141-mcl_fpDbl_sub9L
-	.cantunwind
-	.fnend
-
-	.align	2
-	.type	.LmulPv320x32,%function
-.LmulPv320x32:                          @ @mulPv320x32
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r9, [r1, #12]
-	umull	r4, r8, lr, r2
-	umull	lr, r6, r12, r2
-	mov	r5, r4
-	mov	r7, r6
-	str	lr, [r0]
-	umull	lr, r12, r9, r2
-	umlal	r7, r5, r3, r2
-	str	r5, [r0, #8]
-	str	r7, [r0, #4]
-	umull	r5, r7, r3, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r3, r8, lr
-	str	r3, [r0, #12]
-	ldr	r3, [r1, #16]
-	umull	r7, r6, r3, r2
-	adcs	r3, r12, r7
-	str	r3, [r0, #16]
-	ldr	r3, [r1, #20]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #20]
-	ldr	r3, [r1, #24]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #24]
-	ldr	r3, [r1, #28]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #28]
-	ldr	r3, [r1, #32]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #32]
-	ldr	r1, [r1, #36]
-	umull	r3, r7, r1, r2
-	adcs	r1, r6, r3
-	str	r1, [r0, #36]
-	adc	r1, r7, #0
-	str	r1, [r0, #40]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end142:
-	.size	.LmulPv320x32, .Lfunc_end142-.LmulPv320x32
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre10L
-	.align	2
-	.type	mcl_fp_mulUnitPre10L,%function
-mcl_fp_mulUnitPre10L:                   @ @mcl_fp_mulUnitPre10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	.pad	#48
-	sub	sp, sp, #48
-	mov	r4, r0
-	mov	r0, sp
-	bl	.LmulPv320x32(PLT)
-	ldr	r12, [sp, #40]
-	ldr	lr, [sp, #36]
-	ldr	r8, [sp, #32]
-	ldr	r9, [sp, #28]
-	ldr	r0, [sp, #24]
-	ldr	r1, [sp, #20]
-	ldm	sp, {r6, r7}
-	add	r5, sp, #8
-	ldm	r5, {r2, r3, r5}
-	stm	r4, {r6, r7}
-	add	r6, r4, #8
-	stm	r6, {r2, r3, r5}
-	str	r1, [r4, #20]
-	str	r0, [r4, #24]
-	str	r9, [r4, #28]
-	str	r8, [r4, #32]
-	str	lr, [r4, #36]
-	str	r12, [r4, #40]
-	add	sp, sp, #48
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end143:
-	.size	mcl_fp_mulUnitPre10L, .Lfunc_end143-mcl_fp_mulUnitPre10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre10L
-	.align	2
-	.type	mcl_fpDbl_mulPre10L,%function
-mcl_fpDbl_mulPre10L:                    @ @mcl_fpDbl_mulPre10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#156
-	sub	sp, sp, #156
-	mov	r6, r2
-	mov	r5, r1
-	mov	r4, r0
-	bl	mcl_fpDbl_mulPre5L(PLT)
-	add	r0, r4, #40
-	add	r1, r5, #20
-	add	r2, r6, #20
-	bl	mcl_fpDbl_mulPre5L(PLT)
-	add	r11, r6, #24
-	ldr	r7, [r6, #12]
-	ldr	r8, [r6, #16]
-	ldr	r1, [r6, #20]
-	ldm	r11, {r0, r2, r10, r11}
-	ldm	r6, {r6, r9, r12}
-	adds	lr, r6, r1
-	adcs	r3, r9, r0
-	mov	r0, #0
-	str	lr, [sp, #72]           @ 4-byte Spill
-	adcs	r2, r12, r2
-	str	r3, [sp, #68]           @ 4-byte Spill
-	adcs	r12, r7, r10
-	str	r2, [sp, #64]           @ 4-byte Spill
-	adcs	r10, r8, r11
-	str	r12, [sp, #60]          @ 4-byte Spill
-	adc	r6, r0, #0
-	ldr	r0, [r5, #32]
-	str	r10, [sp, #56]          @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r5, #36]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldmib	r5, {r8, r9, r11}
-	ldr	r0, [r5, #16]
-	ldr	r7, [r5, #20]
-	ldr	r1, [r5, #28]
-	str	lr, [sp, #76]
-	str	r3, [sp, #80]
-	str	r2, [sp, #84]
-	str	r12, [sp, #88]
-	str	r10, [sp, #92]
-	add	r2, sp, #76
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r5, #24]
-	ldr	r5, [r5]
-	adds	r5, r5, r7
-	adcs	r7, r8, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	str	r5, [sp, #96]
-	adcs	r9, r9, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r7, [sp, #100]
-	str	r9, [sp, #104]
-	adcs	r11, r11, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	str	r11, [sp, #108]
-	adcs	r8, r1, r0
-	mov	r0, #0
-	add	r1, sp, #96
-	adc	r10, r0, #0
-	add	r0, sp, #116
-	str	r8, [sp, #112]
-	bl	mcl_fpDbl_mulPre5L(PLT)
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	cmp	r6, #0
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	moveq	r5, r6
-	moveq	r8, r6
-	moveq	r11, r6
-	moveq	r9, r6
-	moveq	r7, r6
-	str	r5, [sp, #52]           @ 4-byte Spill
-	adds	r0, r5, r0
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r7, r1
-	adcs	r2, r9, r2
-	adcs	r3, r11, r3
-	adcs	r12, r8, r5
-	mov	r5, #0
-	adc	lr, r5, #0
-	cmp	r10, #0
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	moveq	r1, r7
-	ldr	r7, [sp, #136]
-	moveq	r3, r11
-	moveq	r2, r9
-	moveq	r12, r8
-	moveq	lr, r10
-	cmp	r10, #0
-	moveq	r0, r5
-	and	r5, r6, r10
-	ldr	r6, [sp, #152]
-	adds	r8, r0, r7
-	ldr	r7, [sp, #140]
-	adcs	r10, r1, r7
-	ldr	r7, [sp, #144]
-	adcs	r11, r2, r7
-	ldr	r7, [sp, #148]
-	adcs	r0, r3, r7
-	adcs	r12, r12, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	adc	r9, lr, r5
-	ldm	r4, {r5, r6, r7, lr}
-	ldr	r1, [sp, #116]
-	ldr	r2, [sp, #120]
-	ldr	r0, [sp, #124]
-	ldr	r3, [sp, #128]
-	subs	r1, r1, r5
-	sbcs	r2, r2, r6
-	ldr	r6, [sp, #132]
-	sbcs	r0, r0, r7
-	ldr	r7, [r4, #16]
-	sbcs	lr, r3, lr
-	ldr	r3, [r4, #20]
-	sbcs	r5, r6, r7
-	ldr	r6, [r4, #32]
-	ldr	r7, [r4, #52]
-	str	r3, [sp, #72]           @ 4-byte Spill
-	sbcs	r3, r8, r3
-	ldr	r8, [r4, #56]
-	str	r3, [sp, #44]           @ 4-byte Spill
-	ldr	r3, [r4, #24]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r3, [sp, #68]           @ 4-byte Spill
-	sbcs	r3, r10, r3
-	ldr	r10, [r4, #44]
-	str	r3, [sp, #56]           @ 4-byte Spill
-	ldr	r3, [r4, #28]
-	str	r3, [sp, #64]           @ 4-byte Spill
-	sbcs	r3, r11, r3
-	str	r3, [sp, #52]           @ 4-byte Spill
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	sbcs	r3, r3, r6
-	str	r3, [sp, #48]           @ 4-byte Spill
-	ldr	r3, [r4, #36]
-	str	r3, [sp, #60]           @ 4-byte Spill
-	sbcs	r3, r12, r3
-	ldr	r12, [r4, #64]
-	str	r3, [sp, #40]           @ 4-byte Spill
-	sbc	r3, r9, #0
-	ldr	r9, [r4, #40]
-	str	r3, [sp, #36]           @ 4-byte Spill
-	ldr	r3, [r4, #76]
-	subs	r1, r1, r9
-	sbcs	r2, r2, r10
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r4, #48]
-	ldr	r11, [sp, #32]          @ 4-byte Reload
-	sbcs	r0, r0, r2
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [r4, #72]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	sbcs	r0, lr, r7
-	ldr	lr, [r4, #68]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	sbcs	r0, r5, r8
-	ldr	r5, [r4, #60]
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	sbcs	r0, r0, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	sbcs	r0, r0, r12
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	sbcs	r0, r0, lr
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	sbcs	r0, r0, r2
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	sbcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adds	r0, r0, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [r4, #20]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	adcs	r0, r0, r6
-	str	r1, [r4, #24]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	str	r0, [r4, #28]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	str	r1, [r4, #32]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	str	r0, [r4, #36]
-	adcs	r1, r9, r1
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	str	r1, [r4, #40]
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	adcs	r1, r1, r6
-	str	r0, [r4, #44]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	str	r1, [r4, #48]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	adcs	r1, r8, r1
-	adcs	r5, r5, r6
-	adcs	r7, r12, #0
-	add	r12, r4, #52
-	adcs	r6, lr, #0
-	stm	r12, {r0, r1, r5, r7}
-	adcs	r2, r2, #0
-	str	r6, [r4, #68]
-	adc	r3, r3, #0
-	str	r2, [r4, #72]
-	str	r3, [r4, #76]
-	add	sp, sp, #156
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end144:
-	.size	mcl_fpDbl_mulPre10L, .Lfunc_end144-mcl_fpDbl_mulPre10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre10L
-	.align	2
-	.type	mcl_fpDbl_sqrPre10L,%function
-mcl_fpDbl_sqrPre10L:                    @ @mcl_fpDbl_sqrPre10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#156
-	sub	sp, sp, #156
-	mov	r5, r1
-	mov	r4, r0
-	mov	r2, r5
-	bl	mcl_fpDbl_mulPre5L(PLT)
-	add	r1, r5, #20
-	add	r0, r4, #40
-	mov	r2, r1
-	bl	mcl_fpDbl_mulPre5L(PLT)
-	ldr	lr, [r5, #32]
-	ldr	r12, [r5, #36]
-	ldmib	r5, {r2, r3, r6, r8}
-	ldr	r0, [r5, #20]
-	ldr	r7, [r5, #24]
-	ldr	r1, [r5, #28]
-	ldr	r5, [r5]
-	adds	r5, r5, r0
-	adcs	r0, r2, r7
-	str	r5, [sp, #96]
-	str	r5, [sp, #76]
-	adcs	r1, r3, r1
-	add	r3, sp, #80
-	str	r0, [sp, #100]
-	adcs	r2, r6, lr
-	str	r1, [sp, #104]
-	adcs	r6, r8, r12
-	str	r2, [sp, #108]
-	str	r6, [sp, #112]
-	stm	r3, {r0, r1, r2, r6}
-	lsr	r3, r2, #31
-	orr	r3, r3, r6, lsl #1
-	str	r3, [sp, #72]           @ 4-byte Spill
-	lsr	r3, r1, #31
-	lsl	r1, r1, #1
-	orr	r1, r1, r0, lsr #31
-	orr	r2, r3, r2, lsl #1
-	str	r1, [sp, #64]           @ 4-byte Spill
-	lsr	r1, r5, #31
-	str	r2, [sp, #68]           @ 4-byte Spill
-	add	r2, sp, #76
-	orr	r11, r1, r0, lsl #1
-	mov	r0, #0
-	add	r1, sp, #96
-	adc	r7, r0, #0
-	add	r0, sp, #116
-	bl	mcl_fpDbl_mulPre5L(PLT)
-	ldr	r10, [sp, #136]
-	ldr	r9, [sp, #140]
-	ldr	r8, [sp, #144]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [sp, #148]
-	ldr	r1, [sp, #152]
-	adds	r3, r10, r5, lsl #1
-	adcs	r5, r9, r11
-	adcs	r12, r8, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	lr, r2, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r1, r0
-	adc	r6, r7, r6, lsr #31
-	cmp	r7, #0
-	moveq	lr, r2
-	moveq	r12, r8
-	moveq	r11, r1
-	moveq	r6, r7
-	moveq	r5, r9
-	cmp	r7, #0
-	add	r7, sp, #116
-	moveq	r3, r10
-	ldm	r4, {r9, r10}
-	ldr	r0, [r4, #8]
-	ldr	r8, [r4, #12]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldm	r7, {r1, r2, r7}
-	ldr	r0, [sp, #128]
-	subs	r1, r1, r9
-	ldr	r9, [r4, #40]
-	sbcs	r2, r2, r10
-	ldr	r10, [r4, #44]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	sbcs	r7, r7, r2
-	ldr	r2, [r4, #48]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	sbcs	r8, r0, r8
-	ldr	r0, [r4, #16]
-	ldr	r7, [sp, #132]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	sbcs	r0, r7, r0
-	ldr	r7, [r4, #52]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r4, #20]
-	sbcs	r3, r3, r0
-	str	r3, [sp, #36]           @ 4-byte Spill
-	ldr	r3, [r4, #24]
-	str	r3, [sp, #72]           @ 4-byte Spill
-	sbcs	r3, r5, r3
-	ldr	r5, [r4, #60]
-	str	r3, [sp, #56]           @ 4-byte Spill
-	ldr	r3, [r4, #28]
-	str	r3, [sp, #68]           @ 4-byte Spill
-	sbcs	r3, r12, r3
-	ldr	r12, [r4, #64]
-	str	r3, [sp, #52]           @ 4-byte Spill
-	ldr	r3, [r4, #32]
-	str	r3, [sp, #64]           @ 4-byte Spill
-	sbcs	r3, lr, r3
-	ldr	lr, [r4, #68]
-	str	r3, [sp, #48]           @ 4-byte Spill
-	ldr	r3, [r4, #36]
-	str	r3, [sp, #60]           @ 4-byte Spill
-	sbcs	r3, r11, r3
-	str	r3, [sp, #32]           @ 4-byte Spill
-	sbc	r3, r6, #0
-	subs	r1, r1, r9
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [r4, #76]
-	sbcs	r1, r1, r10
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	sbcs	r1, r1, r2
-	ldr	r2, [r4, #72]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	sbcs	r1, r8, r7
-	ldr	r8, [r4, #56]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	sbcs	r1, r1, r8
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	sbcs	r1, r1, r5
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	sbcs	r1, r1, r12
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	sbcs	r1, r1, lr
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbcs	r1, r1, r2
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	sbcs	r1, r1, r3
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	sbc	r1, r1, #0
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adds	r0, r0, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r0, [r4, #20]
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	adcs	r0, r0, r6
-	str	r1, [r4, #24]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	str	r0, [r4, #28]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	str	r1, [r4, #32]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	str	r0, [r4, #36]
-	adcs	r1, r9, r1
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	str	r1, [r4, #40]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	adcs	r1, r1, r6
-	str	r0, [r4, #44]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	str	r1, [r4, #48]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	adcs	r1, r8, r1
-	adcs	r5, r5, r6
-	adcs	r7, r12, #0
-	add	r12, r4, #52
-	adcs	r6, lr, #0
-	stm	r12, {r0, r1, r5, r7}
-	adcs	r2, r2, #0
-	str	r6, [r4, #68]
-	adc	r3, r3, #0
-	str	r2, [r4, #72]
-	str	r3, [r4, #76]
-	add	sp, sp, #156
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end145:
-	.size	mcl_fpDbl_sqrPre10L, .Lfunc_end145-mcl_fpDbl_sqrPre10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont10L
-	.align	2
-	.type	mcl_fp_mont10L,%function
-mcl_fp_mont10L:                         @ @mcl_fp_mont10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#28
-	sub	sp, sp, #28
-	.pad	#1024
-	sub	sp, sp, #1024
-	mov	r7, r2
-	ldr	r5, [r3, #-4]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	add	r0, sp, #1000
-	str	r3, [sp, #84]           @ 4-byte Spill
-	str	r1, [sp, #76]           @ 4-byte Spill
-	mov	r4, r3
-	mov	r6, r1
-	ldr	r2, [r7]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	str	r5, [sp, #80]           @ 4-byte Spill
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #1004]
-	ldr	r10, [sp, #1000]
-	mov	r1, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1008]
-	mul	r2, r10, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1012]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1032]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1028]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1024]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1020]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1016]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #952
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #992]
-	ldr	r2, [r7, #4]
-	ldr	r9, [sp, #968]
-	ldr	r8, [sp, #952]
-	ldr	r11, [sp, #956]
-	ldr	r5, [sp, #960]
-	ldr	r4, [sp, #964]
-	mov	r1, r6
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #988]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #984]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #980]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #976]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #972]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	add	r0, sp, #904
-	bl	.LmulPv320x32(PLT)
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adds	r0, r8, r10
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #908
-	ldr	r10, [sp, #944]
-	mov	r0, #0
-	adcs	r1, r11, r1
-	add	r11, sp, #932
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r5, r1
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r4, r1
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r9, r1
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	ldm	r11, {r5, r6, r11}
-	ldr	r4, [sp, #904]
-	adcs	r8, r2, r1
-	adc	r9, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adds	r4, r7, r4
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #856
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #896]
-	add	r11, sp, #856
-	ldr	r6, [sp, #880]
-	ldr	r7, [sp, #876]
-	ldr	r5, [sp, #872]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #888]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #884]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, sp, #808
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r4, r8
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #808
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #848]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #832
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r6, r11}
-	adc	r9, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #760
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #800]
-	add	r11, sp, #760
-	ldr	r6, [sp, #784]
-	ldr	r4, [sp, #780]
-	ldr	r5, [sp, #776]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, sp, #712
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r7, r8
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #716
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #752]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #740
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r5, r6, r11}
-	ldr	r4, [sp, #712]
-	adc	r9, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r4, r7, r4
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #664
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #704]
-	add	r11, sp, #664
-	ldr	r6, [sp, #688]
-	ldr	r7, [sp, #684]
-	ldr	r5, [sp, #680]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, sp, #616
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r4, r8
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #616
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #656]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #640
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r6, r11}
-	adc	r9, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #568
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #608]
-	add	r11, sp, #568
-	ldr	r6, [sp, #592]
-	ldr	r4, [sp, #588]
-	ldr	r5, [sp, #584]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #604]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #600]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #596]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, sp, #520
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r7, r8
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #524
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #560]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #548
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r5, r6, r11}
-	ldr	r4, [sp, #520]
-	adc	r9, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r4, r7, r4
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #472
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #512]
-	add	r11, sp, #472
-	ldr	r6, [sp, #496]
-	ldr	r7, [sp, #492]
-	ldr	r5, [sp, #488]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #508]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #504]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #500]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #424
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r4, r8
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #424
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #464]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #448
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r6, r11}
-	adc	r9, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #376
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #416]
-	add	r11, sp, #376
-	ldr	r6, [sp, #400]
-	ldr	r4, [sp, #396]
-	ldr	r5, [sp, #392]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #408]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #404]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #328
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r7, r8
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #332
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #368]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #356
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r5, r6, r11}
-	ldr	r4, [sp, #328]
-	adc	r9, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r4, r7, r4
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #280
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #320]
-	add	r11, sp, #280
-	ldr	r6, [sp, #304]
-	ldr	r7, [sp, #300]
-	ldr	r5, [sp, #296]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #316]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #312]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #308]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #232
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r4, r8
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #232
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #272]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #256
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r6, r11}
-	adc	r9, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #184
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #224]
-	add	r11, sp, #184
-	ldr	r6, [sp, #208]
-	ldr	r4, [sp, #204]
-	ldr	r5, [sp, #200]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #220]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #216]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #212]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #136
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r7, r8
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #136
-	add	r7, sp, #152
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	add	r9, sp, #164
-	adcs	r10, r1, r10
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r11, r1, r11
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldm	lr, {r2, r6, r12, lr}
-	ldr	r8, [sp, #176]
-	adds	r4, r0, r2
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldm	r9, {r3, r5, r9}
-	adcs	r6, r10, r6
-	mul	r2, r4, r0
-	ldm	r7, {r0, r1, r7}
-	str	r6, [sp, #40]           @ 4-byte Spill
-	adcs	r6, r11, r12
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	str	r6, [sp, #36]           @ 4-byte Spill
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	adcs	r10, r6, lr
-	ldr	r6, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	mov	r1, r11
-	adcs	r0, r0, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r7, r0, r8
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #88
-	bl	.LmulPv320x32(PLT)
-	add	r3, sp, #88
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r4, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r4, r0, r2
-	ldr	r2, [sp, #104]
-	adcs	r0, r10, r3
-	str	r4, [sp, #40]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #108]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r5, r6, r2
-	ldr	r2, [sp, #112]
-	str	r5, [sp, #48]           @ 4-byte Spill
-	adcs	r12, r0, r2
-	ldr	r2, [sp, #116]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r12, [sp, #52]          @ 4-byte Spill
-	adcs	lr, r0, r2
-	ldr	r2, [sp, #120]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	lr, [sp, #60]           @ 4-byte Spill
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #124]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #128]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r9, r7, r2
-	adc	r0, r0, #0
-	str	r9, [sp, #64]           @ 4-byte Spill
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, r11
-	ldr	r2, [r0, #16]
-	ldr	r10, [r0]
-	ldr	r3, [r0, #4]
-	ldr	r1, [r0, #8]
-	ldr	r6, [r0, #12]
-	ldr	r7, [r0, #24]
-	ldr	r11, [r0, #32]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [r0, #20]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r0, #28]
-	ldr	r0, [r0, #36]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	mov	r2, r8
-	ldr	r8, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	subs	r10, r2, r10
-	sbcs	r3, r4, r3
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	sbcs	r1, r8, r1
-	sbcs	r6, r4, r6
-	sbcs	r4, r5, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	sbcs	r5, r12, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	sbcs	r12, lr, r7
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	sbcs	lr, r0, r7
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	sbcs	r11, r0, r11
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	sbcs	r0, r9, r0
-	ldr	r9, [sp, #68]           @ 4-byte Reload
-	sbc	r7, r7, #0
-	ands	r7, r7, #1
-	movne	r10, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	movne	r1, r8
-	str	r10, [r9]
-	movne	r3, r2
-	cmp	r7, #0
-	str	r3, [r9, #4]
-	str	r1, [r9, #8]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	movne	r6, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r6, [r9, #12]
-	movne	r4, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r4, [r9, #16]
-	movne	r5, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	cmp	r7, #0
-	str	r5, [r9, #20]
-	movne	r12, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r12, [r9, #24]
-	movne	lr, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	lr, [r9, #28]
-	movne	r11, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	cmp	r7, #0
-	str	r11, [r9, #32]
-	movne	r0, r1
-	str	r0, [r9, #36]
-	add	sp, sp, #28
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end146:
-	.size	mcl_fp_mont10L, .Lfunc_end146-mcl_fp_mont10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF10L
-	.align	2
-	.type	mcl_fp_montNF10L,%function
-mcl_fp_montNF10L:                       @ @mcl_fp_montNF10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#28
-	sub	sp, sp, #28
-	.pad	#1024
-	sub	sp, sp, #1024
-	mov	r7, r2
-	ldr	r5, [r3, #-4]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	add	r0, sp, #1000
-	str	r3, [sp, #84]           @ 4-byte Spill
-	str	r1, [sp, #76]           @ 4-byte Spill
-	mov	r4, r3
-	mov	r6, r1
-	ldr	r2, [r7]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	str	r5, [sp, #80]           @ 4-byte Spill
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #1004]
-	ldr	r10, [sp, #1000]
-	mov	r1, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1008]
-	mul	r2, r10, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1012]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1032]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1028]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1024]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1020]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1016]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #952
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #992]
-	ldr	r2, [r7, #4]
-	ldr	r9, [sp, #968]
-	ldr	r8, [sp, #952]
-	ldr	r11, [sp, #956]
-	ldr	r5, [sp, #960]
-	ldr	r4, [sp, #964]
-	mov	r1, r6
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #988]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #984]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #980]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #976]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #972]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	add	r0, sp, #904
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r8, r10
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #908
-	ldr	r10, [sp, #940]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	ldr	r11, [sp, #936]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #932]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #904]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	ldr	r9, [sp, #944]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r1, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adc	r8, r1, r0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	adds	r4, r6, r4
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r7, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #856
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #896]
-	add	r11, sp, #856
-	ldr	r6, [sp, #880]
-	ldr	r7, [sp, #876]
-	ldr	r5, [sp, #872]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #888]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #884]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, sp, #808
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r4, r8
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #808
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #848]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #844]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #832
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r11}
-	adc	r8, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adds	r6, r6, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #760
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #800]
-	add	r11, sp, #760
-	ldr	r5, [sp, #784]
-	ldr	r7, [sp, #780]
-	ldr	r4, [sp, #776]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, sp, #712
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r6, r8
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #716
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #752]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #748]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #744]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #712]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #740]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r8, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adds	r4, r6, r4
-	ldr	r6, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r7, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #664
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #704]
-	add	r11, sp, #664
-	ldr	r6, [sp, #688]
-	ldr	r7, [sp, #684]
-	ldr	r5, [sp, #680]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, sp, #616
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r4, r8
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #616
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #656]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #652]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #640
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r11}
-	adc	r8, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adds	r6, r6, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #568
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #608]
-	add	r11, sp, #568
-	ldr	r5, [sp, #592]
-	ldr	r7, [sp, #588]
-	ldr	r4, [sp, #584]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #604]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #600]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #596]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, sp, #520
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r6, r8
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #524
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #560]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #556]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #552]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #520]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #548]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r8, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adds	r4, r6, r4
-	ldr	r6, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r7, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #472
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #512]
-	add	r11, sp, #472
-	ldr	r6, [sp, #496]
-	ldr	r7, [sp, #492]
-	ldr	r5, [sp, #488]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #508]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #504]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #500]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #424
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r4, r8
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #424
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #464]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #460]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #448
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r11}
-	adc	r8, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adds	r6, r6, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #376
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #416]
-	add	r11, sp, #376
-	ldr	r5, [sp, #400]
-	ldr	r7, [sp, #396]
-	ldr	r4, [sp, #392]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #408]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #404]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #328
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r6, r8
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #332
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #368]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #364]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #360]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #328]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #356]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r8, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adds	r4, r6, r4
-	ldr	r6, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r7, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #280
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #320]
-	add	r11, sp, #280
-	ldr	r6, [sp, #304]
-	ldr	r7, [sp, #300]
-	ldr	r5, [sp, #296]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #316]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #312]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #308]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #232
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r4, r8
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #232
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #272]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #268]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #256
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r7, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r11}
-	adc	r8, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adds	r6, r6, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r7, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r8, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #184
-	bl	.LmulPv320x32(PLT)
-	ldr	r0, [sp, #224]
-	add	r11, sp, #184
-	ldr	r5, [sp, #208]
-	ldr	r7, [sp, #204]
-	ldr	r4, [sp, #200]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #220]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #216]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #212]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #136
-	bl	.LmulPv320x32(PLT)
-	adds	r0, r6, r8
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	ldr	lr, [sp, #140]
-	ldr	r6, [sp, #144]
-	add	r8, sp, #152
-	ldr	r12, [sp, #148]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	adcs	r9, r1, r10
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r10, r1, r11
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adc	r1, r1, r2
-	ldr	r2, [sp, #136]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	adds	r4, r0, r2
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r9, r9, lr
-	adcs	r11, r10, r6
-	mul	r1, r4, r0
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldm	r8, {r0, r1, r2, r3, r5, r7, r8}
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	adcs	r10, r6, r12
-	ldr	r6, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r0, r7
-	str	r0, [sp, #72]           @ 4-byte Spill
-	add	r0, sp, #88
-	adc	r8, r8, #0
-	bl	.LmulPv320x32(PLT)
-	add	r3, sp, #88
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r4, r0
-	adcs	r7, r9, r1
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r9, r11, r2
-	ldr	r2, [sp, #104]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	adcs	lr, r10, r3
-	str	lr, [sp, #52]           @ 4-byte Spill
-	adcs	r6, r0, r2
-	ldr	r2, [sp, #108]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r6, [sp, #56]           @ 4-byte Spill
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #112]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r0, r2
-	ldr	r2, [sp, #116]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r4, [sp, #60]           @ 4-byte Spill
-	adcs	r12, r0, r2
-	ldr	r2, [sp, #120]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r12, [sp, #64]          @ 4-byte Spill
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #124]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r0, r2
-	ldr	r2, [sp, #128]
-	mov	r0, r5
-	str	r11, [sp, #72]          @ 4-byte Spill
-	adc	r1, r8, r2
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldmib	r0, {r2, r8}
-	ldr	r5, [r0, #16]
-	ldr	r10, [r0]
-	ldr	r3, [r0, #12]
-	str	r5, [sp, #28]           @ 4-byte Spill
-	ldr	r5, [r0, #20]
-	subs	r10, r7, r10
-	str	r5, [sp, #32]           @ 4-byte Spill
-	ldr	r5, [r0, #24]
-	str	r5, [sp, #36]           @ 4-byte Spill
-	ldr	r5, [r0, #28]
-	str	r5, [sp, #40]           @ 4-byte Spill
-	mov	r5, r0
-	sbcs	r0, r9, r2
-	sbcs	r2, lr, r8
-	ldr	r8, [r5, #32]
-	sbcs	r7, r6, r3
-	ldr	r3, [r5, #36]
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	str	r3, [sp, #84]           @ 4-byte Spill
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	sbcs	r6, r6, r3
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	sbcs	lr, r4, r3
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	sbcs	r4, r12, r3
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	sbcs	r12, r5, r3
-	ldr	r3, [sp, #84]           @ 4-byte Reload
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	sbcs	r11, r11, r8
-	ldr	r8, [sp, #68]           @ 4-byte Reload
-	sbc	r3, r1, r3
-	asr	r1, r3, #31
-	cmp	r1, #0
-	movlt	r10, r5
-	movlt	r0, r9
-	str	r10, [r8]
-	str	r0, [r8, #4]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	movlt	r2, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r2, [r8, #8]
-	movlt	r7, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r7, [r8, #12]
-	movlt	r6, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	str	r6, [r8, #16]
-	movlt	lr, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	cmp	r1, #0
-	str	lr, [r8, #20]
-	movlt	r4, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r4, [r8, #24]
-	movlt	r12, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r12, [r8, #28]
-	movlt	r11, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r11, [r8, #32]
-	movlt	r3, r0
-	str	r3, [r8, #36]
-	add	sp, sp, #28
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end147:
-	.size	mcl_fp_montNF10L, .Lfunc_end147-mcl_fp_montNF10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed10L
-	.align	2
-	.type	mcl_fp_montRed10L,%function
-mcl_fp_montRed10L:                      @ @mcl_fp_montRed10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#612
-	sub	sp, sp, #612
-	mov	r5, r2
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r2, [r1, #4]
-	ldr	r9, [r1]
-	ldr	r11, [r1, #16]
-	ldr	r0, [r5]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [r5, #4]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #12]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [r5, #8]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [r5, #12]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r5, #16]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r5, #20]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r5, #24]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [r5, #-4]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	mul	r2, r9, r0
-	ldr	r0, [r5, #28]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r5, #32]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r5, #36]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r1, #64]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r1, #68]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r1, #72]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r1, #76]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r1, #28]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r1, #24]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1, #20]
-	mov	r1, r5
-	str	r0, [sp, #8]            @ 4-byte Spill
-	add	r0, sp, #560
-	bl	.LmulPv320x32(PLT)
-	add	lr, sp, #584
-	ldr	r10, [sp, #600]
-	ldr	r8, [sp, #596]
-	add	r7, sp, #564
-	ldm	lr, {r6, r12, lr}
-	ldr	r4, [sp, #560]
-	ldm	r7, {r0, r1, r2, r3, r7}
-	adds	r4, r9, r4
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r9, r0, r1
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	adcs	r0, r11, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #512
-	bl	.LmulPv320x32(PLT)
-	add	r6, sp, #512
-	ldr	r12, [sp, #552]
-	ldr	lr, [sp, #548]
-	ldr	r2, [sp, #544]
-	ldr	r10, [sp, #540]
-	ldr	r11, [sp, #536]
-	ldr	r7, [sp, #532]
-	ldr	r8, [sp, #528]
-	ldm	r6, {r1, r3, r6}
-	ldr	r0, [sp, #524]
-	adds	r1, r4, r1
-	ldr	r4, [sp, #124]          @ 4-byte Reload
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r9, r9, r3
-	adcs	r1, r1, r6
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r9, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	add	r0, sp, #464
-	bl	.LmulPv320x32(PLT)
-	ldr	r1, [sp, #464]
-	ldr	r0, [sp, #504]
-	add	r12, sp, #468
-	ldr	r10, [sp, #500]
-	ldr	r8, [sp, #496]
-	ldr	lr, [sp, #492]
-	ldr	r6, [sp, #488]
-	ldr	r7, [sp, #484]
-	adds	r1, r9, r1
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r12, {r0, r2, r3, r12}
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	add	r0, sp, #416
-	bl	.LmulPv320x32(PLT)
-	add	r7, sp, #416
-	ldr	r12, [sp, #456]
-	ldr	lr, [sp, #452]
-	ldr	r2, [sp, #448]
-	ldr	r3, [sp, #444]
-	add	r10, sp, #428
-	ldm	r7, {r1, r6, r7}
-	ldm	r10, {r0, r8, r9, r10}
-	adds	r1, r11, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r11, r1, r6
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, sp, #368
-	bl	.LmulPv320x32(PLT)
-	add	r10, sp, #400
-	add	r12, sp, #372
-	ldm	r10, {r8, r9, r10}
-	ldr	r1, [sp, #368]
-	ldr	lr, [sp, #396]
-	ldr	r6, [sp, #392]
-	ldr	r7, [sp, #388]
-	ldm	r12, {r0, r2, r3, r12}
-	adds	r1, r11, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #320
-	bl	.LmulPv320x32(PLT)
-	add	r7, sp, #320
-	ldr	r12, [sp, #360]
-	ldr	lr, [sp, #356]
-	ldr	r2, [sp, #352]
-	ldr	r3, [sp, #348]
-	add	r10, sp, #332
-	ldm	r7, {r1, r6, r7}
-	ldm	r10, {r0, r8, r9, r10}
-	adds	r1, r11, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r11, r1, r6
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #272
-	bl	.LmulPv320x32(PLT)
-	add	r10, sp, #304
-	add	r12, sp, #276
-	ldm	r10, {r8, r9, r10}
-	ldr	r1, [sp, #272]
-	ldr	lr, [sp, #300]
-	ldr	r6, [sp, #296]
-	ldr	r7, [sp, #292]
-	ldm	r12, {r0, r2, r3, r12}
-	adds	r1, r11, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #224
-	bl	.LmulPv320x32(PLT)
-	add	r10, sp, #240
-	add	r6, sp, #224
-	ldr	r12, [sp, #264]
-	ldr	lr, [sp, #260]
-	ldr	r8, [sp, #256]
-	ldr	r9, [sp, #252]
-	ldm	r10, {r0, r7, r10}
-	ldm	r6, {r1, r2, r3, r6}
-	adds	r1, r11, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r1, r2
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r11, r1, r3
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r6, r1, r6
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	mul	r2, r4, r7
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r9, r0, r9
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, sp, #176
-	bl	.LmulPv320x32(PLT)
-	add	r12, sp, #176
-	ldm	r12, {r0, r1, r3, r12}
-	ldr	lr, [sp, #216]
-	adds	r0, r4, r0
-	ldr	r4, [sp, #76]           @ 4-byte Reload
-	adcs	r10, r11, r1
-	ldr	r1, [sp, #192]
-	adcs	r0, r6, r3
-	mul	r2, r10, r7
-	ldr	r7, [sp, #200]
-	ldr	r6, [sp, #204]
-	ldr	r3, [sp, #208]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	ldr	r12, [sp, #212]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	adcs	r8, r4, r1
-	ldr	r0, [sp, #196]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r9, r9, r7
-	adcs	r6, r0, r6
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	add	r0, sp, #128
-	bl	.LmulPv320x32(PLT)
-	add	r3, sp, #128
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r0, r2
-	ldr	r0, [sp, #144]
-	adcs	r2, r8, r3
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	str	r2, [sp, #44]           @ 4-byte Spill
-	adcs	r7, r11, r0
-	ldr	r0, [sp, #148]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	adcs	r12, r9, r0
-	ldr	r0, [sp, #152]
-	str	r12, [sp, #52]          @ 4-byte Spill
-	adcs	r4, r6, r0
-	ldr	r0, [sp, #156]
-	str	r4, [sp, #56]           @ 4-byte Spill
-	adcs	r5, r3, r0
-	ldr	r0, [sp, #160]
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	str	r5, [sp, #60]           @ 4-byte Spill
-	adcs	r6, r3, r0
-	ldr	r0, [sp, #164]
-	ldr	r3, [sp, #64]           @ 4-byte Reload
-	str	r6, [sp, #68]           @ 4-byte Spill
-	adcs	r8, r3, r0
-	ldr	r0, [sp, #168]
-	ldr	r3, [sp, #76]           @ 4-byte Reload
-	str	r8, [sp, #124]          @ 4-byte Spill
-	adcs	lr, r3, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adc	r11, r0, #0
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	subs	r3, r10, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	sbcs	r12, r12, r7
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	sbcs	r7, r4, r7
-	ldr	r4, [sp, #104]          @ 4-byte Reload
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #80]           @ 4-byte Reload
-	sbcs	r5, r6, r5
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	sbcs	r9, r8, r6
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	sbcs	r8, lr, r6
-	sbc	r6, r11, #0
-	ands	r11, r6, #1
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	movne	r3, r10
-	str	r3, [r6]
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	movne	r0, r3
-	str	r0, [r6, #4]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	movne	r1, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	cmp	r11, #0
-	str	r1, [r6, #8]
-	movne	r2, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	str	r2, [r6, #12]
-	movne	r12, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r12, [r6, #16]
-	movne	r7, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	cmp	r11, #0
-	str	r7, [r6, #20]
-	movne	r4, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r4, [r6, #24]
-	movne	r5, r0
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	str	r5, [r6, #28]
-	movne	r9, r0
-	cmp	r11, #0
-	movne	r8, lr
-	str	r9, [r6, #32]
-	str	r8, [r6, #36]
-	add	sp, sp, #612
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end148:
-	.size	mcl_fp_montRed10L, .Lfunc_end148-mcl_fp_montRed10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre10L
-	.align	2
-	.type	mcl_fp_addPre10L,%function
-mcl_fp_addPre10L:                       @ @mcl_fp_addPre10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	ldm	r1, {r3, r8, lr}
-	ldr	r9, [r1, #12]
-	ldmib	r2, {r5, r6, r7, r10}
-	ldr	r4, [r2, #20]
-	ldr	r11, [r2]
-	str	r4, [sp]                @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	adds	r12, r11, r3
-	ldr	r11, [r2, #32]
-	adcs	r5, r5, r8
-	ldr	r8, [r1, #36]
-	adcs	r6, r6, lr
-	add	lr, r1, #16
-	adcs	r7, r7, r9
-	str	r4, [sp, #4]            @ 4-byte Spill
-	ldr	r4, [r2, #28]
-	ldr	r2, [r2, #36]
-	str	r4, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r3, r4, lr}
-	str	r12, [r0]
-	stmib	r0, {r5, r6}
-	str	r7, [r0, #12]
-	ldr	r7, [sp]                @ 4-byte Reload
-	adcs	r1, r10, r1
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r2, r7, r2
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r1, r3
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [r0, #28]
-	adcs	r1, r11, lr
-	adcs	r2, r2, r8
-	str	r1, [r0, #32]
-	str	r2, [r0, #36]
-	mov	r0, #0
-	adc	r0, r0, #0
-	add	sp, sp, #16
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end149:
-	.size	mcl_fp_addPre10L, .Lfunc_end149-mcl_fp_addPre10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre10L
-	.align	2
-	.type	mcl_fp_subPre10L,%function
-mcl_fp_subPre10L:                       @ @mcl_fp_subPre10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#24
-	sub	sp, sp, #24
-	ldr	r3, [r2, #4]
-	ldr	r7, [r2]
-	ldr	r11, [r1]
-	ldr	r6, [r1, #4]
-	ldr	r9, [r2, #8]
-	ldr	r5, [r1, #8]
-	ldr	lr, [r2, #12]
-	ldr	r4, [r1, #12]
-	ldr	r12, [r1, #16]
-	ldr	r8, [r1, #20]
-	ldr	r10, [r1, #24]
-	str	r3, [sp]                @ 4-byte Spill
-	ldr	r3, [r2, #16]
-	subs	r7, r11, r7
-	ldr	r11, [r2, #32]
-	str	r7, [r0]
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [r2, #20]
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [r2, #24]
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	ldr	r2, [r2, #36]
-	str	r3, [sp, #20]           @ 4-byte Spill
-	ldr	r3, [r1, #28]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [sp]                @ 4-byte Reload
-	sbcs	r6, r6, r3
-	sbcs	r5, r5, r9
-	str	r6, [r0, #4]
-	str	r5, [r0, #8]
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	sbcs	r4, r4, lr
-	ldr	lr, [r1, #32]
-	ldr	r1, [r1, #36]
-	str	r4, [r0, #12]
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	sbcs	r3, r12, r5
-	str	r3, [r0, #16]
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	sbcs	r7, r8, r4
-	str	r7, [r0, #20]
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	sbcs	r3, r10, r3
-	str	r3, [r0, #24]
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	sbcs	r3, r7, r3
-	str	r3, [r0, #28]
-	sbcs	r3, lr, r11
-	sbcs	r1, r1, r2
-	str	r3, [r0, #32]
-	str	r1, [r0, #36]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	add	sp, sp, #24
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end150:
-	.size	mcl_fp_subPre10L, .Lfunc_end150-mcl_fp_subPre10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_10L
-	.align	2
-	.type	mcl_fp_shr1_10L,%function
-mcl_fp_shr1_10L:                        @ @mcl_fp_shr1_10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	lr, [r1, #32]
-	ldr	r12, [r1, #36]
-	ldr	r8, [r1, #28]
-	ldm	r1, {r1, r2, r3, r4, r5, r6, r9}
-	lsrs	r7, r2, #1
-	rrx	r1, r1
-	str	r1, [r0]
-	lsr	r1, r2, #1
-	lsr	r2, r12, #1
-	orr	r1, r1, r3, lsl #31
-	str	r1, [r0, #4]
-	lsrs	r1, r4, #1
-	rrx	r1, r3
-	str	r1, [r0, #8]
-	lsr	r1, r4, #1
-	orr	r1, r1, r5, lsl #31
-	str	r1, [r0, #12]
-	lsrs	r1, r6, #1
-	rrx	r1, r5
-	str	r1, [r0, #16]
-	lsr	r1, r6, #1
-	orr	r1, r1, r9, lsl #31
-	str	r1, [r0, #20]
-	lsrs	r1, r8, #1
-	rrx	r1, r9
-	str	r1, [r0, #24]
-	lsr	r1, r8, #1
-	orr	r1, r1, lr, lsl #31
-	str	r1, [r0, #28]
-	lsrs	r1, r12, #1
-	rrx	r1, lr
-	str	r1, [r0, #32]
-	str	r2, [r0, #36]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end151:
-	.size	mcl_fp_shr1_10L, .Lfunc_end151-mcl_fp_shr1_10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add10L
-	.align	2
-	.type	mcl_fp_add10L,%function
-mcl_fp_add10L:                          @ @mcl_fp_add10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#20
-	sub	sp, sp, #20
-	ldm	r1, {r12, lr}
-	ldr	r5, [r2]
-	ldr	r10, [r1, #8]
-	ldr	r8, [r1, #12]
-	ldmib	r2, {r4, r6, r7}
-	adds	r9, r5, r12
-	ldr	r5, [r1, #24]
-	adcs	lr, r4, lr
-	ldr	r4, [r1, #20]
-	adcs	r6, r6, r10
-	ldr	r10, [r1, #36]
-	str	lr, [sp]                @ 4-byte Spill
-	str	r6, [sp, #12]           @ 4-byte Spill
-	adcs	r12, r7, r8
-	ldr	r6, [r1, #16]
-	ldr	r7, [r2, #16]
-	adcs	r6, r7, r6
-	ldr	r7, [r2, #20]
-	str	r6, [sp, #4]            @ 4-byte Spill
-	adcs	r8, r7, r4
-	ldr	r4, [r2, #24]
-	adcs	r6, r4, r5
-	ldr	r4, [r1, #28]
-	ldr	r5, [r2, #28]
-	str	r6, [sp, #8]            @ 4-byte Spill
-	adcs	r7, r5, r4
-	ldr	r5, [r1, #32]
-	ldr	r1, [r2, #32]
-	ldr	r2, [r2, #36]
-	stm	r0, {r9, lr}
-	mov	lr, r12
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	adcs	r11, r1, r5
-	add	r1, r0, #24
-	adcs	r10, r2, r10
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r2, [r0, #8]
-	str	lr, [r0, #12]
-	str	r4, [r0, #16]
-	str	r8, [r0, #20]
-	stm	r1, {r6, r7, r11}
-	mov	r1, #0
-	str	r10, [r0, #36]
-	adc	r1, r1, #0
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r3, {r1, r6, r12}
-	ldr	r5, [r3, #12]
-	subs	r9, r9, r1
-	ldr	r1, [sp]                @ 4-byte Reload
-	sbcs	r6, r1, r6
-	sbcs	r1, r2, r12
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	sbcs	r12, lr, r5
-	sbcs	lr, r4, r1
-	ldr	r1, [r3, #20]
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	sbcs	r8, r8, r1
-	ldr	r1, [r3, #24]
-	sbcs	r5, r2, r1
-	ldr	r2, [r3, #28]
-	sbcs	r1, r7, r2
-	ldr	r2, [r3, #32]
-	ldr	r7, [r3, #36]
-	sbcs	r3, r11, r2
-	sbcs	r2, r10, r7
-	sbc	r4, r4, #0
-	tst	r4, #1
-	bne	.LBB152_2
-@ BB#1:                                 @ %nocarry
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	str	r9, [r0]
-	str	r6, [r0, #4]
-	str	r4, [r0, #8]
-	str	r12, [r0, #12]
-	str	lr, [r0, #16]
-	str	r8, [r0, #20]
-	str	r5, [r0, #24]
-	str	r1, [r0, #28]
-	str	r3, [r0, #32]
-	str	r2, [r0, #36]
-.LBB152_2:                              @ %carry
-	add	sp, sp, #20
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end152:
-	.size	mcl_fp_add10L, .Lfunc_end152-mcl_fp_add10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF10L
-	.align	2
-	.type	mcl_fp_addNF10L,%function
-mcl_fp_addNF10L:                        @ @mcl_fp_addNF10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#56
-	sub	sp, sp, #56
-	ldr	r9, [r1]
-	ldmib	r1, {r8, lr}
-	ldr	r5, [r2]
-	ldr	r12, [r1, #12]
-	ldmib	r2, {r4, r6, r7}
-	ldr	r10, [r1, #24]
-	adds	r9, r5, r9
-	ldr	r5, [r1, #16]
-	adcs	r11, r4, r8
-	ldr	r8, [r1, #20]
-	str	r9, [sp, #16]           @ 4-byte Spill
-	adcs	r6, r6, lr
-	str	r11, [sp, #20]          @ 4-byte Spill
-	str	r6, [sp, #32]           @ 4-byte Spill
-	adcs	r6, r7, r12
-	ldr	r7, [r2, #16]
-	str	r6, [sp, #24]           @ 4-byte Spill
-	adcs	r4, r7, r5
-	ldr	r7, [r2, #20]
-	ldr	r5, [r2, #28]
-	str	r4, [sp, #28]           @ 4-byte Spill
-	adcs	r7, r7, r8
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	adcs	r7, r7, r10
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r1, #28]
-	adcs	r7, r5, r7
-	ldr	r5, [r1, #32]
-	ldr	r1, [r1, #36]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	ldr	r2, [r2, #36]
-	adcs	lr, r7, r5
-	adc	r1, r2, r1
-	str	lr, [sp, #36]           @ 4-byte Spill
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldmib	r3, {r1, r2, r12}
-	ldr	r7, [r3, #20]
-	ldr	r8, [r3]
-	ldr	r10, [sp, #32]          @ 4-byte Reload
-	ldr	r5, [r3, #16]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r3, #24]
-	subs	r8, r9, r8
-	sbcs	r1, r11, r1
-	ldr	r11, [r3, #32]
-	sbcs	r2, r10, r2
-	sbcs	r12, r6, r12
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r3, #28]
-	ldr	r3, [r3, #36]
-	sbcs	r6, r4, r5
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	str	r3, [sp]                @ 4-byte Spill
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	sbcs	r3, r3, r4
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	sbcs	r4, r4, r5
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	sbcs	r9, r5, r7
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	ldr	r5, [sp]                @ 4-byte Reload
-	sbcs	r11, lr, r11
-	sbc	lr, r7, r5
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	asr	r7, lr, #31
-	cmp	r7, #0
-	movlt	r2, r10
-	movlt	r8, r5
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	str	r8, [r0]
-	movlt	r1, r5
-	cmp	r7, #0
-	str	r1, [r0, #4]
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r2, [r0, #8]
-	movlt	r12, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r12, [r0, #12]
-	movlt	r6, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r6, [r0, #16]
-	movlt	r3, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	cmp	r7, #0
-	str	r3, [r0, #20]
-	movlt	r4, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r4, [r0, #24]
-	movlt	r9, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r9, [r0, #28]
-	movlt	r11, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	cmp	r7, #0
-	str	r11, [r0, #32]
-	movlt	lr, r1
-	str	lr, [r0, #36]
-	add	sp, sp, #56
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end153:
-	.size	mcl_fp_addNF10L, .Lfunc_end153-mcl_fp_addNF10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub10L
-	.align	2
-	.type	mcl_fp_sub10L,%function
-mcl_fp_sub10L:                          @ @mcl_fp_sub10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#36
-	sub	sp, sp, #36
-	ldm	r2, {r12, lr}
-	ldr	r8, [r2, #8]
-	ldr	r10, [r2, #12]
-	ldm	r1, {r4, r5, r6, r7}
-	subs	r4, r4, r12
-	ldr	r12, [r1, #36]
-	sbcs	r9, r5, lr
-	ldr	r5, [r2, #20]
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	sbcs	lr, r6, r8
-	ldr	r6, [r2, #16]
-	sbcs	r8, r7, r10
-	ldr	r7, [r1, #16]
-	sbcs	r10, r7, r6
-	ldr	r6, [r1, #20]
-	sbcs	r7, r6, r5
-	ldr	r5, [r1, #24]
-	ldr	r6, [r1, #32]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	sbcs	r11, r5, r4
-	ldr	r4, [r2, #28]
-	ldr	r5, [r1, #28]
-	sbcs	r5, r5, r4
-	ldr	r4, [r2, #32]
-	ldr	r2, [r2, #36]
-	sbcs	r1, r6, r4
-	mov	r6, #0
-	sbcs	r2, r12, r2
-	ldr	r12, [sp, #32]          @ 4-byte Reload
-	sbc	r6, r6, #0
-	tst	r6, #1
-	str	r12, [r0]
-	stmib	r0, {r9, lr}
-	str	r8, [r0, #12]
-	str	r10, [r0, #16]
-	str	r7, [r0, #20]
-	mov	r7, r11
-	str	r7, [r0, #24]
-	str	r5, [r0, #28]
-	str	r1, [r0, #32]
-	str	r2, [r0, #36]
-	beq	.LBB154_2
-@ BB#1:                                 @ %carry
-	ldr	r4, [r3, #32]
-	str	r4, [sp, #20]           @ 4-byte Spill
-	ldr	r4, [r3, #36]
-	str	r4, [sp, #24]           @ 4-byte Spill
-	ldmib	r3, {r4, r11}
-	ldr	r6, [r3, #12]
-	str	r6, [sp]                @ 4-byte Spill
-	ldr	r6, [r3, #16]
-	str	r6, [sp, #4]            @ 4-byte Spill
-	ldr	r6, [r3, #20]
-	str	r6, [sp, #8]            @ 4-byte Spill
-	ldr	r6, [r3, #24]
-	str	r6, [sp, #12]           @ 4-byte Spill
-	ldr	r6, [r3, #28]
-	ldr	r3, [r3]
-	adds	r3, r3, r12
-	str	r6, [sp, #16]           @ 4-byte Spill
-	adcs	r4, r4, r9
-	stm	r0, {r3, r4}
-	adcs	r3, r11, lr
-	str	r3, [r0, #8]
-	ldr	r3, [sp]                @ 4-byte Reload
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	adcs	r3, r3, r8
-	str	r3, [r0, #12]
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	adcs	r3, r3, r10
-	str	r3, [r0, #16]
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adcs	r3, r6, r3
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	adcs	r3, r3, r7
-	str	r3, [r0, #24]
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	adcs	r3, r3, r5
-	str	r3, [r0, #28]
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	adcs	r1, r3, r1
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	str	r1, [r0, #32]
-	adc	r2, r3, r2
-	str	r2, [r0, #36]
-.LBB154_2:                              @ %nocarry
-	add	sp, sp, #36
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end154:
-	.size	mcl_fp_sub10L, .Lfunc_end154-mcl_fp_sub10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF10L
-	.align	2
-	.type	mcl_fp_subNF10L,%function
-mcl_fp_subNF10L:                        @ @mcl_fp_subNF10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#64
-	sub	sp, sp, #64
-	mov	r12, r0
-	ldr	r0, [r2, #32]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r2, #36]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldm	r2, {r4, r5}
-	ldr	r0, [r2, #8]
-	ldr	r7, [r2, #16]
-	ldr	r8, [r2, #20]
-	ldr	lr, [r1, #12]
-	ldr	r6, [r1, #16]
-	ldr	r11, [r1, #20]
-	ldr	r9, [r1, #24]
-	ldr	r10, [r1, #28]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r2, #12]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r2, #24]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r2, #28]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r1, #8]
-	ldm	r1, {r1, r2}
-	subs	r1, r1, r4
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	sbcs	r2, r2, r5
-	str	r2, [sp, #16]           @ 4-byte Spill
-	sbcs	r4, r0, r1
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r4, [sp, #20]           @ 4-byte Spill
-	sbcs	r5, lr, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	sbcs	r7, r6, r7
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	str	r5, [sp, #28]           @ 4-byte Spill
-	sbcs	lr, r11, r8
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	lr, [sp, #36]           @ 4-byte Spill
-	sbcs	r8, r9, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r8, [sp, #48]           @ 4-byte Spill
-	sbcs	r9, r10, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	str	r9, [sp, #56]           @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	sbc	r1, r6, r1
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [r3, #32]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldmib	r3, {r1, r6}
-	ldr	r11, [r3, #24]
-	ldr	r10, [sp, #24]          @ 4-byte Reload
-	str	r6, [sp, #4]            @ 4-byte Spill
-	ldr	r6, [r3, #12]
-	str	r6, [sp]                @ 4-byte Spill
-	ldr	r6, [r3, #16]
-	str	r6, [sp, #8]            @ 4-byte Spill
-	ldr	r6, [r3, #20]
-	str	r6, [sp, #12]           @ 4-byte Spill
-	ldr	r6, [r3, #28]
-	ldr	r3, [r3]
-	adds	r3, r10, r3
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	adcs	r2, r4, r2
-	ldr	r4, [sp]                @ 4-byte Reload
-	adcs	r4, r5, r4
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	adcs	r5, r7, r5
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adcs	r7, lr, r7
-	adcs	r11, r8, r11
-	adcs	r8, r9, r6
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	adcs	r9, r0, r6
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	asr	lr, r0, #31
-	adc	r6, r0, r6
-	cmp	lr, #0
-	movge	r3, r10
-	str	r3, [r12]
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	movge	r1, r3
-	str	r1, [r12, #4]
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	movge	r2, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	cmp	lr, #0
-	str	r2, [r12, #8]
-	movge	r4, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r4, [r12, #12]
-	movge	r5, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r5, [r12, #16]
-	movge	r7, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	cmp	lr, #0
-	str	r7, [r12, #20]
-	movge	r11, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r11, [r12, #24]
-	movge	r8, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r8, [r12, #28]
-	movge	r9, r1
-	cmp	lr, #0
-	movge	r6, r0
-	str	r9, [r12, #32]
-	str	r6, [r12, #36]
-	add	sp, sp, #64
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end155:
-	.size	mcl_fp_subNF10L, .Lfunc_end155-mcl_fp_subNF10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add10L
-	.align	2
-	.type	mcl_fpDbl_add10L,%function
-mcl_fpDbl_add10L:                       @ @mcl_fpDbl_add10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#104
-	sub	sp, sp, #104
-	ldm	r1, {r7, r9}
-	ldr	r8, [r1, #8]
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r10}
-	add	lr, r1, #16
-	adds	r7, r4, r7
-	ldr	r4, [r2, #16]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #100]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	adcs	r7, r5, r9
-	str	r7, [sp, #28]           @ 4-byte Spill
-	adcs	r7, r6, r8
-	ldr	r8, [r2, #20]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	adcs	r7, r10, r12
-	add	r10, r1, #32
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #64]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r7, [sp]                @ 4-byte Spill
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldm	r10, {r7, r9, r10}
-	ldr	r2, [r1, #48]
-	ldr	r5, [r1, #44]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #36]          @ 4-byte Reload
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r4, r1
-	str	r11, [r0]
-	str	r6, [r0, #4]
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	adcs	r2, r8, r2
-	str	r6, [r0, #8]
-	str	r4, [r0, #12]
-	str	r1, [r0, #16]
-	ldr	r1, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r1, r1, r12
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r2, r2, lr
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r2, r2, r9
-	str	r2, [r0, #36]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	adcs	lr, r1, r10
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r1, r5
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r4, [sp, #68]           @ 4-byte Spill
-	adcs	r12, r1, r2
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r12, [sp, #72]          @ 4-byte Spill
-	adcs	r5, r1, r2
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r5, [sp, #76]           @ 4-byte Spill
-	adcs	r7, r1, r2
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r7, [sp, #80]           @ 4-byte Spill
-	adcs	r9, r1, r2
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r9, [sp, #84]           @ 4-byte Spill
-	adcs	r10, r1, r2
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r10, [sp, #64]          @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #92]           @ 4-byte Spill
-	mov	r1, #0
-	adc	r1, r1, #0
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldmib	r3, {r1, r2, r8}
-	ldr	r6, [r3, #16]
-	ldr	r11, [r3]
-	str	r6, [sp, #48]           @ 4-byte Spill
-	ldr	r6, [r3, #20]
-	subs	r11, lr, r11
-	sbcs	r1, r4, r1
-	sbcs	r2, r12, r2
-	sbcs	r12, r5, r8
-	ldr	r8, [r3, #32]
-	ldr	r5, [r3, #36]
-	str	r6, [sp, #52]           @ 4-byte Spill
-	ldr	r6, [r3, #24]
-	str	r6, [sp, #56]           @ 4-byte Spill
-	ldr	r6, [r3, #28]
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	str	r6, [sp, #60]           @ 4-byte Spill
-	sbcs	r6, r7, r3
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	sbcs	r7, r9, r3
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	sbcs	r9, r10, r3
-	ldr	r3, [sp, #100]          @ 4-byte Reload
-	sbcs	r10, r3, r4
-	ldr	r3, [sp, #96]           @ 4-byte Reload
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	sbcs	r8, r3, r8
-	ldr	r3, [sp, #92]           @ 4-byte Reload
-	sbcs	r5, r3, r5
-	ldr	r3, [sp, #88]           @ 4-byte Reload
-	sbc	r3, r3, #0
-	ands	r3, r3, #1
-	movne	r11, lr
-	movne	r1, r4
-	str	r11, [r0, #40]
-	str	r1, [r0, #44]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	movne	r2, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	cmp	r3, #0
-	str	r2, [r0, #48]
-	movne	r12, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r12, [r0, #52]
-	movne	r6, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r6, [r0, #56]
-	movne	r7, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	cmp	r3, #0
-	str	r7, [r0, #60]
-	movne	r9, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r9, [r0, #64]
-	movne	r10, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r10, [r0, #68]
-	movne	r8, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	cmp	r3, #0
-	str	r8, [r0, #72]
-	movne	r5, r1
-	str	r5, [r0, #76]
-	add	sp, sp, #104
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end156:
-	.size	mcl_fpDbl_add10L, .Lfunc_end156-mcl_fpDbl_add10L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub10L
-	.align	2
-	.type	mcl_fpDbl_sub10L,%function
-mcl_fpDbl_sub10L:                       @ @mcl_fpDbl_sub10L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#96
-	sub	sp, sp, #96
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #64]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldm	r2, {r6, r7, r8, r9}
-	ldm	r1, {r12, lr}
-	ldr	r4, [r1, #8]
-	ldr	r10, [r2, #20]
-	ldr	r5, [r1, #12]
-	subs	r11, r12, r6
-	ldr	r6, [r2, #28]
-	sbcs	r7, lr, r7
-	add	lr, r1, #16
-	sbcs	r8, r4, r8
-	ldr	r4, [r2, #16]
-	sbcs	r5, r5, r9
-	ldr	r9, [r1, #32]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	ldr	r6, [r2, #24]
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r6, [sp, #24]           @ 4-byte Spill
-	ldr	r6, [r1, #44]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #36]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #40]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	str	r11, [r0]
-	stmib	r0, {r7, r8}
-	str	r5, [r0, #12]
-	ldr	r7, [sp]                @ 4-byte Reload
-	ldr	r8, [r3, #20]
-	sbcs	r1, r1, r4
-	str	r1, [r0, #16]
-	sbcs	r2, r2, r10
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	sbcs	r1, r12, r1
-	str	r1, [r0, #24]
-	sbcs	r2, lr, r2
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	sbcs	r1, r9, r1
-	sbcs	r2, r7, r2
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r2, [r0, #36]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	sbcs	r12, r2, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r12, [sp, #48]          @ 4-byte Spill
-	sbcs	r4, r6, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	sbcs	r11, r2, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r11, [sp, #52]          @ 4-byte Spill
-	sbcs	r6, r2, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r6, [sp, #64]           @ 4-byte Spill
-	sbcs	r7, r2, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r7, [sp, #68]           @ 4-byte Spill
-	sbcs	r9, r2, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r9, [sp, #76]           @ 4-byte Spill
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	sbcs	r10, r2, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r10, [sp, #80]          @ 4-byte Spill
-	sbcs	lr, r2, r1
-	mov	r1, #0
-	ldr	r2, [r3, #4]
-	sbc	r1, r1, #0
-	str	lr, [sp, #84]           @ 4-byte Spill
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [r3, #32]
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [r3, #8]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [r3, #12]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	ldr	r3, [r3]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	adds	r1, r12, r3
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adcs	r2, r4, r2
-	adcs	r3, r11, r3
-	adcs	r12, r6, r5
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	adcs	r6, r7, r6
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	adcs	r8, r9, r8
-	adcs	r9, r5, r7
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	adcs	r7, r7, r5
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	adcs	r11, r10, r5
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	adc	r10, lr, r5
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	ands	lr, r5, #1
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	moveq	r2, r4
-	moveq	r1, r5
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r2, [r0, #44]
-	moveq	r3, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	cmp	lr, #0
-	str	r3, [r0, #48]
-	moveq	r12, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r12, [r0, #52]
-	moveq	r6, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r6, [r0, #56]
-	moveq	r8, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	cmp	lr, #0
-	str	r8, [r0, #60]
-	moveq	r9, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r9, [r0, #64]
-	moveq	r7, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r7, [r0, #68]
-	moveq	r11, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	cmp	lr, #0
-	str	r11, [r0, #72]
-	moveq	r10, r1
-	str	r10, [r0, #76]
-	add	sp, sp, #96
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end157:
-	.size	mcl_fpDbl_sub10L, .Lfunc_end157-mcl_fpDbl_sub10L
-	.cantunwind
-	.fnend
-
-	.align	2
-	.type	.LmulPv352x32,%function
-.LmulPv352x32:                          @ @mulPv352x32
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r9, [r1, #12]
-	umull	r4, r8, lr, r2
-	umull	lr, r6, r12, r2
-	mov	r5, r4
-	mov	r7, r6
-	str	lr, [r0]
-	umull	lr, r12, r9, r2
-	umlal	r7, r5, r3, r2
-	str	r5, [r0, #8]
-	str	r7, [r0, #4]
-	umull	r5, r7, r3, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r3, r8, lr
-	str	r3, [r0, #12]
-	ldr	r3, [r1, #16]
-	umull	r7, r6, r3, r2
-	adcs	r3, r12, r7
-	str	r3, [r0, #16]
-	ldr	r3, [r1, #20]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #20]
-	ldr	r3, [r1, #24]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #24]
-	ldr	r3, [r1, #28]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #28]
-	ldr	r3, [r1, #32]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #32]
-	ldr	r3, [r1, #36]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #36]
-	ldr	r1, [r1, #40]
-	umull	r3, r7, r1, r2
-	adcs	r1, r5, r3
-	str	r1, [r0, #40]
-	adc	r1, r7, #0
-	str	r1, [r0, #44]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end158:
-	.size	.LmulPv352x32, .Lfunc_end158-.LmulPv352x32
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre11L
-	.align	2
-	.type	mcl_fp_mulUnitPre11L,%function
-mcl_fp_mulUnitPre11L:                   @ @mcl_fp_mulUnitPre11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, lr}
-	.pad	#48
-	sub	sp, sp, #48
-	mov	r4, r0
-	mov	r0, sp
-	bl	.LmulPv352x32(PLT)
-	ldr	r12, [sp, #44]
-	ldr	lr, [sp, #40]
-	ldr	r8, [sp, #36]
-	ldr	r9, [sp, #32]
-	ldr	r10, [sp, #28]
-	ldr	r1, [sp, #24]
-	ldr	r5, [sp, #20]
-	ldr	r6, [sp, #16]
-	ldr	r7, [sp]
-	ldmib	sp, {r2, r3}
-	ldr	r0, [sp, #12]
-	str	r7, [r4]
-	stmib	r4, {r2, r3}
-	str	r0, [r4, #12]
-	str	r6, [r4, #16]
-	str	r5, [r4, #20]
-	str	r1, [r4, #24]
-	str	r10, [r4, #28]
-	str	r9, [r4, #32]
-	str	r8, [r4, #36]
-	str	lr, [r4, #40]
-	str	r12, [r4, #44]
-	add	sp, sp, #48
-	pop	{r4, r5, r6, r7, r8, r9, r10, lr}
-	mov	pc, lr
-.Lfunc_end159:
-	.size	mcl_fp_mulUnitPre11L, .Lfunc_end159-mcl_fp_mulUnitPre11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre11L
-	.align	2
-	.type	mcl_fpDbl_mulPre11L,%function
-mcl_fpDbl_mulPre11L:                    @ @mcl_fpDbl_mulPre11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#604
-	sub	sp, sp, #604
-	mov	r3, r2
-	mov	r4, r0
-	add	r0, sp, #552
-	str	r1, [sp, #68]           @ 4-byte Spill
-	mov	r5, r1
-	ldr	r2, [r3]
-	str	r3, [sp, #64]           @ 4-byte Spill
-	str	r4, [sp, #60]           @ 4-byte Spill
-	mov	r6, r3
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #596]
-	ldr	r1, [sp, #560]
-	ldr	r2, [r6, #4]
-	ldr	r11, [sp, #556]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #592]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #564]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #588]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #584]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #576]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #572]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #568]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #552]
-	str	r0, [r4]
-	add	r0, sp, #504
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #548]
-	add	r10, sp, #532
-	add	r12, sp, #508
-	mov	r6, r4
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldr	r1, [sp, #504]
-	ldr	lr, [sp, #528]
-	ldr	r7, [sp, #524]
-	ldm	r12, {r0, r2, r3, r12}
-	adds	r1, r1, r11
-	str	r1, [r4, #4]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r2, [r5, #8]
-	adcs	r0, r8, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #456
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #500]
-	add	r10, sp, #484
-	add	r12, sp, #460
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #496]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	lr, [sp, #480]
-	ldr	r7, [sp, #476]
-	ldr	r1, [sp, #456]
-	ldm	r12, {r0, r2, r3, r12}
-	ldr	r11, [sp, #16]          @ 4-byte Reload
-	adds	r1, r1, r11
-	str	r1, [r6, #8]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #12]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #408
-	bl	.LmulPv352x32(PLT)
-	add	r10, sp, #444
-	add	lr, sp, #432
-	add	r12, sp, #412
-	ldm	r10, {r8, r9, r10}
-	ldm	lr, {r6, r11, lr}
-	ldr	r7, [sp, #428]
-	ldr	r1, [sp, #408]
-	ldm	r12, {r0, r2, r3, r12}
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	adds	r1, r1, r4
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	str	r1, [r4, #12]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #16]
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r3, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #360
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #404]
-	add	r10, sp, #392
-	add	r12, sp, #364
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	lr, [sp, #388]
-	ldr	r6, [sp, #384]
-	ldr	r7, [sp, #380]
-	ldr	r1, [sp, #360]
-	ldm	r12, {r0, r2, r3, r12}
-	ldr	r11, [sp, #16]          @ 4-byte Reload
-	adds	r1, r1, r11
-	str	r1, [r4, #16]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r4, #20]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #312
-	bl	.LmulPv352x32(PLT)
-	add	r11, sp, #344
-	add	r12, sp, #316
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	lr, [sp, #340]
-	ldr	r6, [sp, #336]
-	ldr	r7, [sp, #332]
-	ldr	r1, [sp, #312]
-	ldm	r12, {r0, r2, r3, r12}
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	adds	r1, r1, r5
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	str	r1, [r5, #20]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r4, #24]
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	mov	r1, r4
-	adcs	r0, r3, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	adc	r0, r11, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #264
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #308]
-	add	r10, sp, #296
-	add	r12, sp, #268
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	lr, [sp, #292]
-	ldr	r6, [sp, #288]
-	ldr	r7, [sp, #284]
-	ldr	r1, [sp, #264]
-	ldm	r12, {r0, r2, r3, r12}
-	ldr	r11, [sp, #16]          @ 4-byte Reload
-	adds	r1, r1, r11
-	str	r1, [r5, #24]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #28]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #216
-	bl	.LmulPv352x32(PLT)
-	add	r10, sp, #252
-	add	lr, sp, #240
-	add	r12, sp, #220
-	ldm	r10, {r8, r9, r10}
-	ldm	lr, {r6, r11, lr}
-	ldr	r7, [sp, #236]
-	ldr	r1, [sp, #216]
-	ldm	r12, {r0, r2, r3, r12}
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	adds	r1, r1, r4
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	str	r1, [r4, #28]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #32]
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mov	r1, r5
-	adcs	r0, r3, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, sp, #168
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #212]
-	add	r10, sp, #200
-	add	r12, sp, #172
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	lr, [sp, #196]
-	ldr	r6, [sp, #192]
-	ldr	r7, [sp, #188]
-	ldr	r1, [sp, #168]
-	ldm	r12, {r0, r2, r3, r12}
-	ldr	r11, [sp, #12]          @ 4-byte Reload
-	adds	r1, r1, r11
-	ldr	r11, [sp, #64]          @ 4-byte Reload
-	str	r1, [r4, #32]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r11, #36]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #120
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #164]
-	add	lr, sp, #152
-	add	r10, sp, #140
-	add	r8, sp, #128
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	lr, {r9, r12, lr}
-	ldm	r10, {r0, r6, r10}
-	ldr	r2, [sp, #120]
-	ldr	r3, [sp, #124]
-	ldm	r8, {r1, r7, r8}
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	adds	r2, r2, r5
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	str	r2, [r4, #36]
-	ldr	r2, [r11, #40]
-	adcs	r11, r3, r5
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	adcs	r5, r1, r3
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r7, r7, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r8, r8, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r10, r10, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #72
-	bl	.LmulPv352x32(PLT)
-	add	r3, sp, #72
-	ldm	r3, {r0, r1, r2, r3}
-	ldr	r9, [sp, #116]
-	ldr	r6, [sp, #112]
-	adds	r12, r0, r11
-	add	r11, sp, #88
-	adcs	lr, r1, r5
-	adcs	r2, r2, r7
-	adcs	r3, r3, r8
-	ldr	r8, [sp, #108]
-	ldm	r11, {r0, r1, r5, r7, r11}
-	str	r12, [r4, #40]
-	str	lr, [r4, #44]
-	str	r2, [r4, #48]
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	add	r12, r4, #72
-	str	r3, [r4, #52]
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r0, [r4, #56]
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r5, r10
-	str	r1, [r4, #60]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [r4, #64]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [r4, #68]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	adcs	r1, r8, r1
-	adcs	r2, r6, r2
-	adc	r3, r9, #0
-	stm	r12, {r0, r1, r2, r3}
-	add	sp, sp, #604
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end160:
-	.size	mcl_fpDbl_mulPre11L, .Lfunc_end160-mcl_fpDbl_mulPre11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre11L
-	.align	2
-	.type	mcl_fpDbl_sqrPre11L,%function
-mcl_fpDbl_sqrPre11L:                    @ @mcl_fpDbl_sqrPre11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#596
-	sub	sp, sp, #596
-	mov	r5, r1
-	mov	r4, r0
-	add	r0, sp, #544
-	ldr	r2, [r5]
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #588]
-	ldr	r1, [sp, #548]
-	ldr	r2, [r5, #4]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #584]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #552]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #556]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #576]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #572]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #568]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #564]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #560]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #544]
-	str	r0, [r4]
-	add	r0, sp, #496
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #540]
-	add	r10, sp, #520
-	add	lr, sp, #496
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #4]
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #8]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #448
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #492]
-	add	r10, sp, #476
-	add	lr, sp, #448
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r9, r10}
-	ldr	r7, [sp, #472]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #8]
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #12]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #400
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #444]
-	add	r10, sp, #428
-	add	lr, sp, #400
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r9, r10}
-	ldr	r7, [sp, #424]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #12]
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #16]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #352
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #396]
-	add	r10, sp, #380
-	add	lr, sp, #352
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r9, r10}
-	ldr	r7, [sp, #376]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #16]
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #20]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #304
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #348]
-	add	r10, sp, #332
-	add	lr, sp, #304
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r9, r10}
-	ldr	r7, [sp, #328]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #20]
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #24]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #256
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #300]
-	add	r10, sp, #284
-	add	lr, sp, #256
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r9, r10}
-	ldr	r7, [sp, #280]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #24]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #28]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #208
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #252]
-	add	r10, sp, #236
-	add	lr, sp, #208
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r9, r10}
-	ldr	r7, [sp, #232]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #28]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #32]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #160
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #204]
-	add	r10, sp, #188
-	add	lr, sp, #160
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r9, r10}
-	ldr	r7, [sp, #184]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #16]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #32]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #36]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #112
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #156]
-	add	lr, sp, #140
-	add	r12, sp, #124
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #152]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r8, r11, lr}
-	ldr	r9, [sp, #136]
-	ldr	r2, [sp, #112]
-	ldr	r7, [sp, #116]
-	ldr	r6, [sp, #120]
-	ldm	r12, {r0, r3, r12}
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adds	r2, r2, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r2, [r4, #36]
-	ldr	r2, [r5, #40]
-	adcs	r7, r7, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r6, r6, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r11, r11, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #64
-	bl	.LmulPv352x32(PLT)
-	add	r3, sp, #64
-	ldm	r3, {r0, r1, r2, r3}
-	ldr	r9, [sp, #108]
-	ldr	r8, [sp, #104]
-	adds	r12, r0, r7
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	lr, r1, r6
-	adcs	r2, r2, r10
-	add	r10, sp, #80
-	adcs	r3, r3, r0
-	ldm	r10, {r0, r1, r5, r6, r7, r10}
-	str	r12, [r4, #40]
-	str	lr, [r4, #44]
-	str	r2, [r4, #48]
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	add	r12, r4, #72
-	str	r3, [r4, #52]
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r0, [r4, #56]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r1, [r4, #60]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [r4, #64]
-	adcs	r0, r6, r11
-	str	r0, [r4, #68]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	adcs	r1, r10, r1
-	adcs	r2, r8, r2
-	adc	r3, r9, #0
-	stm	r12, {r0, r1, r2, r3}
-	add	sp, sp, #596
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end161:
-	.size	mcl_fpDbl_sqrPre11L, .Lfunc_end161-mcl_fpDbl_sqrPre11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont11L
-	.align	2
-	.type	mcl_fp_mont11L,%function
-mcl_fp_mont11L:                         @ @mcl_fp_mont11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#132
-	sub	sp, sp, #132
-	.pad	#1024
-	sub	sp, sp, #1024
-	mov	r7, r2
-	ldr	r5, [r3, #-4]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	add	r0, sp, #1104
-	str	r3, [sp, #92]           @ 4-byte Spill
-	str	r1, [sp, #84]           @ 4-byte Spill
-	mov	r4, r3
-	mov	r6, r1
-	ldr	r2, [r7]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	str	r5, [sp, #88]           @ 4-byte Spill
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #1108]
-	ldr	r8, [sp, #1104]
-	mov	r1, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1112]
-	mul	r2, r8, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1116]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1148]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #1144]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #1140]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1136]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1132]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1128]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1124]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1120]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #1056
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #1100]
-	ldr	r2, [r7, #4]
-	ldr	r11, [sp, #1072]
-	ldr	r5, [sp, #1056]
-	ldr	r4, [sp, #1060]
-	ldr	r10, [sp, #1064]
-	ldr	r9, [sp, #1068]
-	mov	r1, r6
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1096]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1092]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1088]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1084]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1080]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1076]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	add	r0, sp, #1008
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r5, r8
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #1008
-	ldr	r7, [sp, #1044]
-	ldr	r6, [sp, #1040]
-	ldr	r5, [sp, #1036]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r8, r4, r0
-	mov	r0, #0
-	ldr	r4, [sp, #1032]
-	adcs	r1, r10, r1
-	ldr	r10, [sp, #1052]
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r9, r1
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r11, r1
-	ldr	r11, [sp, #1048]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	str	r1, [sp, #28]           @ 4-byte Spill
-	adc	r9, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r8, r8, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r9, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, sp, #960
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #1004]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r5, [sp, #984]
-	ldr	r6, [sp, #980]
-	ldr	r9, [sp, #976]
-	ldr	r10, [sp, #960]
-	ldr	r11, [sp, #964]
-	ldr	r7, [sp, #968]
-	ldr	r4, [sp, #972]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1000]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #996]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #992]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #988]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, sp, #912
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r8, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #916
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #940
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldm	r11, {r5, r6, r7, r8, r11}
-	ldr	r4, [sp, #912]
-	adc	r10, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r9, [sp, #76]           @ 4-byte Reload
-	adds	r9, r9, r4
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r10, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r9, r0
-	add	r0, sp, #864
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #908]
-	add	r11, sp, #864
-	ldr	r7, [sp, #888]
-	ldr	r5, [sp, #884]
-	ldr	r8, [sp, #880]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #904]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #900]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #896]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r6, [sp, #876]
-	ldr	r2, [r0, #12]
-	add	r0, sp, #816
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r4, r9
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #816
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #840
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adds	r8, r7, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, sp, #768
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #812]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r5, [sp, #792]
-	ldr	r6, [sp, #788]
-	ldr	r9, [sp, #784]
-	ldr	r10, [sp, #768]
-	ldr	r11, [sp, #772]
-	ldr	r7, [sp, #776]
-	ldr	r4, [sp, #780]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #808]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #804]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #800]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, sp, #720
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r8, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #724
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #748
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldm	r11, {r5, r6, r7, r8, r11}
-	ldr	r4, [sp, #720]
-	adc	r10, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r9, [sp, #76]           @ 4-byte Reload
-	adds	r9, r9, r4
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r10, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r9, r0
-	add	r0, sp, #672
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #716]
-	add	r11, sp, #672
-	ldr	r7, [sp, #696]
-	ldr	r5, [sp, #692]
-	ldr	r8, [sp, #688]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #712]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r6, [sp, #684]
-	ldr	r2, [r0, #20]
-	add	r0, sp, #624
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r4, r9
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #624
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #648
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adds	r8, r7, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, sp, #576
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #620]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r5, [sp, #600]
-	ldr	r6, [sp, #596]
-	ldr	r9, [sp, #592]
-	ldr	r10, [sp, #576]
-	ldr	r11, [sp, #580]
-	ldr	r7, [sp, #584]
-	ldr	r4, [sp, #588]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #616]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #612]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #608]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #604]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #528
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r8, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #532
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #556
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldm	r11, {r5, r6, r7, r8, r11}
-	ldr	r4, [sp, #528]
-	adc	r10, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r9, [sp, #76]           @ 4-byte Reload
-	adds	r9, r9, r4
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r10, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r9, r0
-	add	r0, sp, #480
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #524]
-	add	r11, sp, #480
-	ldr	r7, [sp, #504]
-	ldr	r5, [sp, #500]
-	ldr	r8, [sp, #496]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #520]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #516]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #512]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #508]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r6, [sp, #492]
-	ldr	r2, [r0, #28]
-	add	r0, sp, #432
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r4, r9
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #432
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #456
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adds	r8, r7, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, sp, #384
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #428]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r5, [sp, #408]
-	ldr	r6, [sp, #404]
-	ldr	r9, [sp, #400]
-	ldr	r10, [sp, #384]
-	ldr	r11, [sp, #388]
-	ldr	r7, [sp, #392]
-	ldr	r4, [sp, #396]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #424]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #420]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #336
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r8, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #340
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #364
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldm	r11, {r5, r6, r7, r8, r11}
-	ldr	r4, [sp, #336]
-	adc	r10, r0, #0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r9, [sp, #76]           @ 4-byte Reload
-	adds	r9, r9, r4
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adcs	r0, r10, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r9, r0
-	add	r0, sp, #288
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #332]
-	add	r11, sp, #288
-	ldr	r7, [sp, #312]
-	ldr	r5, [sp, #308]
-	ldr	r8, [sp, #304]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #328]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #324]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #320]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #316]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r6, [sp, #300]
-	ldr	r2, [r0, #36]
-	add	r0, sp, #240
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r4, r9
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #240
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #264
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adds	r8, r7, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, sp, #192
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #236]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r5, [sp, #216]
-	ldr	r6, [sp, #212]
-	ldr	r9, [sp, #208]
-	ldr	r10, [sp, #192]
-	ldr	r11, [sp, #196]
-	ldr	r7, [sp, #200]
-	ldr	r4, [sp, #204]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #232]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #228]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #224]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #220]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, sp, #144
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r8, r10
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #144
-	add	r12, sp, #160
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	adcs	r10, r1, r7
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r11, r1, r4
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	add	r9, sp, #180
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldm	lr, {r2, r6, lr}
-	ldr	r5, [sp, #156]
-	adds	r4, r0, r2
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r6, r10, r6
-	mul	r1, r4, r0
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldm	r9, {r7, r8, r9}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	str	r6, [sp, #40]           @ 4-byte Spill
-	adcs	r6, r11, lr
-	ldr	r10, [sp, #92]          @ 4-byte Reload
-	str	r6, [sp, #36]           @ 4-byte Spill
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	adcs	r11, r6, r5
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	adcs	r6, r6, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r5, r0, r3
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r8, r0, r9
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	add	r0, sp, #96
-	bl	.LmulPv352x32(PLT)
-	add	r7, sp, #96
-	ldm	r7, {r0, r1, r3, r7}
-	adds	r0, r4, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	lr, r0, r1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	str	lr, [sp, #44]           @ 4-byte Spill
-	adcs	r1, r0, r3
-	ldr	r3, [sp, #112]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r9, r11, r7
-	str	r1, [sp, #48]           @ 4-byte Spill
-	adcs	r6, r6, r3
-	ldr	r3, [sp, #116]
-	str	r6, [sp, #52]           @ 4-byte Spill
-	adcs	r0, r0, r3
-	ldr	r3, [sp, #120]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r12, r0, r3
-	ldr	r3, [sp, #124]
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	str	r12, [sp, #56]          @ 4-byte Spill
-	adcs	r5, r5, r3
-	ldr	r3, [sp, #128]
-	str	r5, [sp, #60]           @ 4-byte Spill
-	adcs	r0, r0, r3
-	ldr	r3, [sp, #132]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	ldr	r3, [sp, #136]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	ldr	r3, [sp, #140]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r8, r8, r3
-	adc	r0, r0, #0
-	str	r8, [sp, #68]           @ 4-byte Spill
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldmib	r10, {r3, r7}
-	ldr	r4, [r10, #16]
-	ldr	r11, [r10]
-	ldr	r2, [r10, #12]
-	mov	r0, r10
-	str	r4, [sp, #28]           @ 4-byte Spill
-	ldr	r4, [r10, #20]
-	subs	r11, lr, r11
-	ldr	lr, [sp, #84]           @ 4-byte Reload
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [r10, #24]
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [r10, #28]
-	sbcs	r10, r1, r3
-	mov	r3, r9
-	ldr	r9, [r0, #32]
-	sbcs	r1, r3, r7
-	ldr	r7, [r0, #36]
-	ldr	r0, [r0, #40]
-	sbcs	r2, r6, r2
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	str	r4, [sp, #40]           @ 4-byte Spill
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	sbcs	lr, lr, r4
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	sbcs	r4, r12, r4
-	ldr	r12, [sp, #88]          @ 4-byte Reload
-	sbcs	r5, r5, r6
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	sbcs	r12, r12, r6
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	sbcs	r9, r6, r9
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	sbcs	r7, r6, r7
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r8, r0
-	ldr	r8, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	sbc	r6, r6, #0
-	ands	r6, r6, #1
-	movne	r11, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	movne	r1, r3
-	str	r11, [r8]
-	movne	r10, r0
-	cmp	r6, #0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r10, [r8, #4]
-	str	r1, [r8, #8]
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	movne	r2, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r2, [r8, #12]
-	movne	lr, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	lr, [r8, #16]
-	movne	r4, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r4, [r8, #20]
-	movne	r5, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r5, [r8, #24]
-	movne	r12, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r12, [r8, #28]
-	movne	r9, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r9, [r8, #32]
-	movne	r7, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r7, [r8, #36]
-	movne	r0, r1
-	str	r0, [r8, #40]
-	add	sp, sp, #132
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end162:
-	.size	mcl_fp_mont11L, .Lfunc_end162-mcl_fp_mont11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF11L
-	.align	2
-	.type	mcl_fp_montNF11L,%function
-mcl_fp_montNF11L:                       @ @mcl_fp_montNF11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#124
-	sub	sp, sp, #124
-	.pad	#1024
-	sub	sp, sp, #1024
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	add	r6, sp, #1024
-	str	r0, [sp, #68]           @ 4-byte Spill
-	str	r3, [sp, #84]           @ 4-byte Spill
-	str	r1, [sp, #76]           @ 4-byte Spill
-	mov	r4, r3
-	add	r0, r6, #72
-	str	r5, [sp, #80]           @ 4-byte Spill
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #1100]
-	ldr	r10, [sp, #1096]
-	add	r9, sp, #1024
-	mov	r1, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1104]
-	mul	r2, r10, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1108]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1140]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1136]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1132]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1128]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1124]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1120]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1116]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1112]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, r9, #24
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #1092]
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r5, [sp, #1072]
-	ldr	r7, [sp, #1068]
-	ldr	r8, [sp, #1064]
-	ldr	r11, [sp, #1048]
-	ldr	r4, [sp, #1052]
-	ldr	r6, [sp, #1056]
-	ldr	r9, [sp, #1060]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1088]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1084]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1080]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1076]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, sp, #1000
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r11, r10
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	r11, sp, #1024
-	add	lr, sp, #1000
-	ldr	r10, [sp, #1044]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r6, r8, r11}
-	adc	r9, r1, r0
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #952
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #996]
-	add	r11, sp, #952
-	ldr	r6, [sp, #976]
-	ldr	r4, [sp, #972]
-	ldr	r8, [sp, #968]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #992]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #988]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #984]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #980]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r5, [sp, #964]
-	ldr	r2, [r0, #8]
-	add	r0, sp, #904
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r7, r9
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #908
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #948]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #932
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r9, r11}
-	ldr	r4, [sp, #904]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r8, [sp, #64]           @ 4-byte Reload
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adds	r4, r8, r4
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #856
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #900]
-	add	r11, sp, #856
-	ldr	r7, [sp, #880]
-	ldr	r5, [sp, #876]
-	ldr	r8, [sp, #872]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #896]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #888]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #884]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r6, [sp, #868]
-	ldr	r2, [r0, #12]
-	add	r0, sp, #808
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r4, r9
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #808
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #852]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #832
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r6, r8, r11}
-	adc	r9, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #760
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #804]
-	add	r11, sp, #760
-	ldr	r6, [sp, #784]
-	ldr	r4, [sp, #780]
-	ldr	r8, [sp, #776]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #800]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r5, [sp, #772]
-	ldr	r2, [r0, #16]
-	add	r0, sp, #712
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r7, r9
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #716
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #756]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #740
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r9, r11}
-	ldr	r4, [sp, #712]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r8, [sp, #64]           @ 4-byte Reload
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adds	r4, r8, r4
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #664
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #708]
-	add	r11, sp, #664
-	ldr	r7, [sp, #688]
-	ldr	r5, [sp, #684]
-	ldr	r8, [sp, #680]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r6, [sp, #676]
-	ldr	r2, [r0, #20]
-	add	r0, sp, #616
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r4, r9
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #616
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #660]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #640
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r6, r8, r11}
-	adc	r9, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #568
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #612]
-	add	r11, sp, #568
-	ldr	r6, [sp, #592]
-	ldr	r4, [sp, #588]
-	ldr	r8, [sp, #584]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #608]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #604]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #600]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #596]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r5, [sp, #580]
-	ldr	r2, [r0, #24]
-	add	r0, sp, #520
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r7, r9
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #524
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #564]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #548
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r9, r11}
-	ldr	r4, [sp, #520]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r8, [sp, #64]           @ 4-byte Reload
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adds	r4, r8, r4
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #472
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #516]
-	add	r11, sp, #472
-	ldr	r7, [sp, #496]
-	ldr	r5, [sp, #492]
-	ldr	r8, [sp, #488]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #512]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #508]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #504]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #500]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r6, [sp, #484]
-	ldr	r2, [r0, #28]
-	add	r0, sp, #424
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r4, r9
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #424
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #468]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #448
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r6, r8, r11}
-	adc	r9, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #376
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #420]
-	add	r11, sp, #376
-	ldr	r6, [sp, #400]
-	ldr	r4, [sp, #396]
-	ldr	r8, [sp, #392]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #408]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #404]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r5, [sp, #388]
-	ldr	r2, [r0, #32]
-	add	r0, sp, #328
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r7, r9
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #332
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #372]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #356
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r9, r11}
-	ldr	r4, [sp, #328]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r8, [sp, #64]           @ 4-byte Reload
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adds	r4, r8, r4
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r4, r0
-	add	r0, sp, #280
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #324]
-	add	r11, sp, #280
-	ldr	r7, [sp, #304]
-	ldr	r5, [sp, #300]
-	ldr	r8, [sp, #296]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #320]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #316]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #312]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #308]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r6, [sp, #292]
-	ldr	r2, [r0, #36]
-	add	r0, sp, #232
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r4, r9
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #232
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #276]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #256
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldm	r11, {r4, r5, r6, r8, r11}
-	adc	r9, r0, r1
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r9, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #184
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #228]
-	add	r11, sp, #184
-	ldr	r6, [sp, #208]
-	ldr	r4, [sp, #204]
-	ldr	r8, [sp, #200]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #224]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #220]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #216]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #212]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r5, [sp, #196]
-	ldr	r2, [r0, #40]
-	add	r0, sp, #136
-	bl	.LmulPv352x32(PLT)
-	adds	r0, r7, r9
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	ldr	lr, [sp, #140]
-	add	r9, sp, #172
-	add	r12, sp, #152
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	adcs	r11, r1, r11
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r10, r1, r5
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	ldr	r4, [sp, #148]
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #144]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adc	r1, r1, r2
-	ldr	r2, [sp, #136]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	adds	r5, r0, r2
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r11, r11, lr
-	adcs	r6, r10, r6
-	mul	r1, r5, r0
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldm	r9, {r7, r8, r9}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	str	r6, [sp, #32]           @ 4-byte Spill
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	adcs	r10, r6, r4
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #88
-	adc	r9, r9, #0
-	bl	.LmulPv352x32(PLT)
-	add	r7, sp, #88
-	ldm	r7, {r0, r1, r3, r7}
-	adds	r0, r5, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r8, r11, r1
-	str	r8, [sp, #28]           @ 4-byte Spill
-	adcs	r6, r0, r3
-	ldr	r3, [sp, #104]
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r10, r7
-	str	r6, [sp, #44]           @ 4-byte Spill
-	str	r2, [sp, #48]           @ 4-byte Spill
-	adcs	r7, r0, r3
-	ldr	r3, [sp, #108]
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r7, [sp, #52]           @ 4-byte Spill
-	adcs	r0, r0, r3
-	ldr	r3, [sp, #112]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r5, r0, r3
-	ldr	r3, [sp, #116]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r5, [sp, #56]           @ 4-byte Spill
-	adcs	lr, r0, r3
-	ldr	r3, [sp, #120]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	lr, [sp, #60]           @ 4-byte Spill
-	adcs	r0, r0, r3
-	ldr	r3, [sp, #124]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	ldr	r3, [sp, #128]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r10, r0, r3
-	ldr	r3, [sp, #132]
-	str	r10, [sp, #64]          @ 4-byte Spill
-	adc	r12, r9, r3
-	mov	r3, r4
-	str	r12, [sp, #40]          @ 4-byte Spill
-	ldmib	r3, {r0, r1, r9}
-	ldr	r4, [r3, #16]
-	ldr	r11, [r3]
-	str	r4, [sp, #20]           @ 4-byte Spill
-	ldr	r4, [r3, #20]
-	subs	r11, r8, r11
-	ldr	r8, [r3, #36]
-	sbcs	r0, r6, r0
-	sbcs	r1, r2, r1
-	sbcs	r2, r7, r9
-	ldr	r9, [r3, #32]
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	str	r4, [sp, #24]           @ 4-byte Spill
-	ldr	r4, [r3, #24]
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [r3, #28]
-	ldr	r3, [r3, #40]
-	str	r4, [sp, #36]           @ 4-byte Spill
-	str	r3, [sp, #84]           @ 4-byte Spill
-	ldr	r3, [sp, #72]           @ 4-byte Reload
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	sbcs	r3, r3, r4
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	sbcs	r5, lr, r5
-	sbcs	lr, r7, r6
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	sbcs	r9, r7, r9
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	sbcs	r10, r10, r8
-	ldr	r8, [sp, #68]           @ 4-byte Reload
-	sbc	r12, r12, r6
-	asr	r6, r12, #31
-	cmp	r6, #0
-	movlt	r11, r7
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r11, [r8]
-	movlt	r0, r7
-	str	r0, [r8, #4]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	movlt	r1, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r1, [r8, #8]
-	movlt	r2, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r2, [r8, #12]
-	movlt	r3, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r3, [r8, #16]
-	movlt	r4, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r4, [r8, #20]
-	movlt	r5, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r5, [r8, #24]
-	movlt	lr, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	lr, [r8, #28]
-	movlt	r9, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	cmp	r6, #0
-	movlt	r10, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	movlt	r12, r0
-	add	r0, r8, #32
-	stm	r0, {r9, r10, r12}
-	add	sp, sp, #124
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end163:
-	.size	mcl_fp_montNF11L, .Lfunc_end163-mcl_fp_montNF11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed11L
-	.align	2
-	.type	mcl_fp_montRed11L,%function
-mcl_fp_montRed11L:                      @ @mcl_fp_montRed11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#676
-	sub	sp, sp, #676
-	mov	r10, r2
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r2, [r1, #4]
-	ldr	r5, [r1]
-	ldr	r0, [r10]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [r10, #4]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #12]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [r10, #8]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [r10, #12]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [r10, #16]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [r10, #20]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [r10, #24]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [r10, #-4]
-	str	r0, [sp, #140]          @ 4-byte Spill
-	mul	r2, r5, r0
-	ldr	r0, [r10, #28]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r10, #32]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r10, #36]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r10, #40]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [r1, #64]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r1, #68]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r1, #72]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r1, #76]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r1, #80]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r1, #84]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1, #28]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1, #24]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r1, #20]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	mov	r1, r10
-	str	r0, [sp, #8]            @ 4-byte Spill
-	add	r0, sp, #624
-	bl	.LmulPv352x32(PLT)
-	add	r11, sp, #656
-	add	lr, sp, #624
-	ldm	r11, {r4, r8, r9, r11}
-	ldr	r7, [sp, #652]
-	ldr	r6, [sp, #648]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r5, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r5, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	mov	r1, r10
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	mul	r2, r5, r0
-	add	r0, sp, #576
-	bl	.LmulPv352x32(PLT)
-	ldr	r4, [sp, #576]
-	add	r9, sp, #584
-	ldr	r12, [sp, #620]
-	ldr	lr, [sp, #616]
-	ldr	r2, [sp, #612]
-	ldr	r3, [sp, #608]
-	ldr	r11, [sp, #604]
-	ldr	r7, [sp, #600]
-	ldr	r6, [sp, #580]
-	ldm	r9, {r0, r1, r8, r9}
-	adds	r4, r5, r4
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r5, r4, r6
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	mov	r9, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r5, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	add	r0, sp, #528
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #572]
-	add	r11, sp, #560
-	add	lr, sp, #528
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r8, r11}
-	ldr	r6, [sp, #556]
-	ldr	r7, [sp, #552]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r9, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	mov	r5, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r1, r4
-	mov	r1, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, sp, #480
-	bl	.LmulPv352x32(PLT)
-	ldr	r4, [sp, #480]
-	add	r9, sp, #488
-	ldr	r12, [sp, #524]
-	ldr	lr, [sp, #520]
-	ldr	r2, [sp, #516]
-	ldr	r3, [sp, #512]
-	ldr	r11, [sp, #508]
-	ldr	r7, [sp, #504]
-	ldr	r6, [sp, #484]
-	ldm	r9, {r0, r1, r8, r9}
-	adds	r4, r5, r4
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r5, r4, r6
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r5, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #432
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #476]
-	add	r11, sp, #460
-	add	lr, sp, #432
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r6, r8, r9, r11}
-	ldr	r7, [sp, #456]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r5, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r1, r4
-	mov	r4, r1
-	mov	r1, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #384
-	bl	.LmulPv352x32(PLT)
-	ldr	r6, [sp, #384]
-	add	r9, sp, #392
-	ldr	r12, [sp, #428]
-	ldr	lr, [sp, #424]
-	ldr	r2, [sp, #420]
-	ldr	r3, [sp, #416]
-	ldr	r11, [sp, #412]
-	ldr	r5, [sp, #408]
-	ldr	r7, [sp, #388]
-	ldm	r9, {r0, r1, r8, r9}
-	adds	r4, r4, r6
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r6, r4, r7
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	mov	r5, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r6, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #336
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #380]
-	add	r11, sp, #364
-	add	lr, sp, #336
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r6, r8, r9, r11}
-	ldr	r7, [sp, #360]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r5, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r1, r4
-	mov	r4, r1
-	mov	r1, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #288
-	bl	.LmulPv352x32(PLT)
-	ldr	r6, [sp, #288]
-	add	r9, sp, #296
-	ldr	r12, [sp, #332]
-	ldr	lr, [sp, #328]
-	ldr	r2, [sp, #324]
-	ldr	r3, [sp, #320]
-	ldr	r11, [sp, #316]
-	ldr	r5, [sp, #312]
-	ldr	r7, [sp, #292]
-	ldm	r9, {r0, r1, r8, r9}
-	adds	r4, r4, r6
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r6, r4, r7
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	mov	r5, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r6, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #240
-	bl	.LmulPv352x32(PLT)
-	ldr	r0, [sp, #284]
-	add	r11, sp, #264
-	add	lr, sp, #240
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r6, r7, r8, r9, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r5, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r5, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r5, r4
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r11, r0, r11
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #192
-	bl	.LmulPv352x32(PLT)
-	add	r6, sp, #192
-	add	r7, sp, #208
-	ldm	r6, {r0, r1, r3, r6}
-	ldr	r12, [sp, #236]
-	ldr	lr, [sp, #232]
-	adds	r0, r5, r0
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	mul	r2, r8, r4
-	adcs	r0, r0, r3
-	ldr	r3, [sp, #228]
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #224]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldm	r7, {r0, r1, r4, r7}
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	adcs	r9, r5, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r4, r0, r4
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r5, r0, r6
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r11, r11, r3
-	adcs	r0, r0, lr
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r6, r0, #0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	add	r0, sp, #144
-	bl	.LmulPv352x32(PLT)
-	add	r3, sp, #144
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r8, r0
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r12, r0, r1
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r1, [sp, #160]
-	str	r12, [sp, #44]          @ 4-byte Spill
-	adcs	r2, r0, r2
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r3, r9, r3
-	str	r2, [sp, #52]           @ 4-byte Spill
-	str	r3, [sp, #56]           @ 4-byte Spill
-	adcs	r7, r0, r1
-	ldr	r1, [sp, #164]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r7, [sp, #60]           @ 4-byte Spill
-	adcs	r8, r4, r1
-	ldr	r1, [sp, #168]
-	str	r8, [sp, #64]           @ 4-byte Spill
-	adcs	r4, r0, r1
-	ldr	r1, [sp, #172]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r4, [sp, #68]           @ 4-byte Spill
-	adcs	r5, r5, r1
-	ldr	r1, [sp, #176]
-	str	r5, [sp, #72]           @ 4-byte Spill
-	adcs	r11, r11, r1
-	ldr	r1, [sp, #180]
-	str	r11, [sp, #76]          @ 4-byte Spill
-	adcs	r9, r0, r1
-	ldr	r1, [sp, #184]
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	str	r9, [sp, #84]           @ 4-byte Spill
-	adcs	lr, r0, r1
-	ldr	r1, [sp, #188]
-	str	lr, [sp, #88]           @ 4-byte Spill
-	adcs	r0, r6, r1
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r6, [sp, #140]          @ 4-byte Reload
-	adc	r10, r0, #0
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	subs	r0, r12, r0
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	sbcs	r2, r3, r2
-	ldr	r3, [sp, #108]          @ 4-byte Reload
-	sbcs	r3, r7, r3
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	sbcs	r12, r8, r7
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	sbcs	r8, r4, r7
-	ldr	r4, [sp, #120]          @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	sbcs	r5, r11, r5
-	sbcs	r11, r9, r7
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	sbcs	r9, lr, r7
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	sbcs	lr, r6, r7
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	sbc	r6, r10, #0
-	ldr	r10, [sp, #136]         @ 4-byte Reload
-	ands	r6, r6, #1
-	movne	r0, r7
-	str	r0, [r10]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	movne	r1, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r1, [r10, #4]
-	movne	r2, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r2, [r10, #8]
-	movne	r3, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r3, [r10, #12]
-	movne	r12, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r12, [r10, #16]
-	movne	r8, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r8, [r10, #20]
-	movne	r4, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r4, [r10, #24]
-	movne	r5, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r5, [r10, #28]
-	movne	r11, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r11, [r10, #32]
-	movne	r9, r0
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	str	r9, [r10, #36]
-	movne	lr, r0
-	str	lr, [r10, #40]
-	add	sp, sp, #676
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end164:
-	.size	mcl_fp_montRed11L, .Lfunc_end164-mcl_fp_montRed11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre11L
-	.align	2
-	.type	mcl_fp_addPre11L,%function
-mcl_fp_addPre11L:                       @ @mcl_fp_addPre11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#20
-	sub	sp, sp, #20
-	ldm	r1, {r3, r12}
-	ldr	r8, [r1, #8]
-	ldr	r9, [r1, #12]
-	ldmib	r2, {r5, r6, r7, r10}
-	ldr	r4, [r2, #20]
-	ldr	r11, [r2]
-	str	r4, [sp]                @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	adds	lr, r11, r3
-	ldr	r3, [r2, #36]
-	ldr	r11, [r2, #32]
-	adcs	r5, r5, r12
-	add	r12, r1, #16
-	adcs	r6, r6, r8
-	adcs	r7, r7, r9
-	add	r9, r1, #32
-	str	r4, [sp, #4]            @ 4-byte Spill
-	ldr	r4, [r2, #28]
-	ldr	r2, [r2, #40]
-	str	r3, [sp, #8]            @ 4-byte Spill
-	str	r4, [sp, #16]           @ 4-byte Spill
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r4, r8, r9}
-	ldm	r12, {r1, r2, r3, r12}
-	str	lr, [r0]
-	stmib	r0, {r5, r6}
-	str	r7, [r0, #12]
-	ldr	r7, [sp]                @ 4-byte Reload
-	adcs	r1, r10, r1
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r2, r7, r2
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r1, r3
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r1, r1, r12
-	str	r1, [r0, #28]
-	adcs	r1, r11, r4
-	add	r0, r0, #32
-	adcs	r2, r2, r8
-	adcs	r3, r3, r9
-	stm	r0, {r1, r2, r3}
-	mov	r0, #0
-	adc	r0, r0, #0
-	add	sp, sp, #20
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end165:
-	.size	mcl_fp_addPre11L, .Lfunc_end165-mcl_fp_addPre11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre11L
-	.align	2
-	.type	mcl_fp_subPre11L,%function
-mcl_fp_subPre11L:                       @ @mcl_fp_subPre11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#20
-	sub	sp, sp, #20
-	ldmib	r2, {r8, r12, lr}
-	ldr	r3, [r2, #16]
-	ldr	r7, [r2]
-	ldr	r6, [r1]
-	ldr	r5, [r1, #4]
-	ldr	r4, [r1, #8]
-	ldr	r11, [r2, #32]
-	ldr	r10, [r2, #40]
-	ldr	r9, [r1, #36]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [r2, #20]
-	subs	r6, r6, r7
-	ldr	r7, [r2, #36]
-	sbcs	r5, r5, r8
-	ldr	r8, [r1, #40]
-	sbcs	r4, r4, r12
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [r2, #24]
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [r1, #32]
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [r1, #12]
-	sbcs	r12, r3, lr
-	add	lr, r1, #16
-	ldm	lr, {r1, r2, r3, lr}
-	str	r6, [r0]
-	str	r5, [r0, #4]
-	str	r4, [r0, #8]
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	str	r12, [r0, #12]
-	sbcs	r1, r1, r4
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	sbcs	r2, r2, r6
-	str	r2, [r0, #20]
-	ldr	r2, [sp]                @ 4-byte Reload
-	sbcs	r1, r3, r1
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	sbcs	r1, lr, r1
-	str	r1, [r0, #28]
-	sbcs	r1, r7, r11
-	add	r0, r0, #32
-	sbcs	r2, r9, r2
-	sbcs	r3, r8, r10
-	stm	r0, {r1, r2, r3}
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	add	sp, sp, #20
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end166:
-	.size	mcl_fp_subPre11L, .Lfunc_end166-mcl_fp_subPre11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_11L
-	.align	2
-	.type	mcl_fp_shr1_11L,%function
-mcl_fp_shr1_11L:                        @ @mcl_fp_shr1_11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldmib	r1, {r2, r3, r12, lr}
-	add	r8, r1, #20
-	add	r11, r1, #32
-	ldm	r8, {r4, r5, r8}
-	ldr	r7, [r1]
-	ldm	r11, {r9, r10, r11}
-	lsrs	r1, r12, #1
-	lsr	r6, r2, #1
-	rrx	r1, r3
-	lsrs	r2, r2, #1
-	orr	r6, r6, r3, lsl #31
-	lsr	r3, r11, #1
-	rrx	r2, r7
-	stm	r0, {r2, r6}
-	str	r1, [r0, #8]
-	lsr	r1, r12, #1
-	lsr	r2, r10, #1
-	orr	r1, r1, lr, lsl #31
-	orr	r2, r2, r11, lsl #31
-	str	r1, [r0, #12]
-	lsrs	r1, r4, #1
-	rrx	r1, lr
-	str	r1, [r0, #16]
-	lsr	r1, r4, #1
-	orr	r1, r1, r5, lsl #31
-	str	r1, [r0, #20]
-	lsrs	r1, r8, #1
-	rrx	r1, r5
-	str	r1, [r0, #24]
-	lsr	r1, r8, #1
-	orr	r1, r1, r9, lsl #31
-	str	r1, [r0, #28]
-	lsrs	r1, r10, #1
-	add	r0, r0, #32
-	rrx	r1, r9
-	stm	r0, {r1, r2, r3}
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end167:
-	.size	mcl_fp_shr1_11L, .Lfunc_end167-mcl_fp_shr1_11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add11L
-	.align	2
-	.type	mcl_fp_add11L,%function
-mcl_fp_add11L:                          @ @mcl_fp_add11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#32
-	sub	sp, sp, #32
-	ldm	r1, {r12, lr}
-	ldr	r5, [r2]
-	ldr	r8, [r1, #8]
-	ldr	r9, [r1, #12]
-	ldmib	r2, {r4, r6, r7}
-	adds	r5, r5, r12
-	ldr	r12, [r1, #32]
-	adcs	r4, r4, lr
-	str	r5, [sp, #28]           @ 4-byte Spill
-	ldr	r5, [r1, #24]
-	ldr	lr, [r1, #40]
-	adcs	r6, r6, r8
-	str	r4, [sp, #24]           @ 4-byte Spill
-	ldr	r4, [r1, #20]
-	adcs	r7, r7, r9
-	str	r6, [sp, #12]           @ 4-byte Spill
-	ldr	r6, [r1, #16]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r2, #16]
-	adcs	r9, r7, r6
-	ldr	r7, [r2, #20]
-	str	r9, [sp]                @ 4-byte Spill
-	adcs	r7, r7, r4
-	ldr	r4, [r2, #24]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	adcs	r8, r4, r5
-	ldr	r4, [r1, #28]
-	ldr	r5, [r2, #28]
-	adcs	r6, r5, r4
-	ldr	r5, [r2, #32]
-	ldr	r4, [r1, #36]
-	ldr	r1, [r2, #36]
-	ldr	r2, [r2, #40]
-	adcs	r10, r5, r12
-	ldr	r12, [sp, #24]          @ 4-byte Reload
-	adcs	r1, r1, r4
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	adcs	r11, r2, lr
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	ldr	lr, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #20]           @ 4-byte Spill
-	str	r2, [r0]
-	str	r12, [r0, #4]
-	str	lr, [r0, #8]
-	str	r4, [r0, #12]
-	str	r9, [r0, #16]
-	str	r7, [r0, #20]
-	str	r8, [r0, #24]
-	str	r6, [r0, #28]
-	str	r10, [r0, #32]
-	str	r1, [r0, #36]
-	mov	r1, #0
-	str	r11, [r0, #40]
-	mov	r9, r6
-	adc	r1, r1, #0
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r3, {r1, r7}
-	ldr	r5, [r3, #8]
-	ldr	r6, [r3, #12]
-	subs	r1, r2, r1
-	ldr	r2, [sp]                @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	sbcs	r1, r12, r7
-	str	r1, [sp, #24]           @ 4-byte Spill
-	sbcs	r1, lr, r5
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	sbcs	r5, r4, r6
-	sbcs	r7, r2, r1
-	ldr	r1, [r3, #20]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	sbcs	r4, r2, r1
-	ldr	r1, [r3, #24]
-	sbcs	r12, r8, r1
-	ldr	r1, [r3, #28]
-	add	r3, r3, #32
-	sbcs	lr, r9, r1
-	ldm	r3, {r1, r2, r3}
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	sbcs	r1, r10, r1
-	sbcs	r2, r6, r2
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	sbcs	r3, r11, r3
-	sbc	r6, r6, #0
-	tst	r6, #1
-	bne	.LBB168_2
-@ BB#1:                                 @ %nocarry
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	str	r6, [r0]
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	str	r6, [r0, #4]
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	str	r6, [r0, #8]
-	str	r5, [r0, #12]
-	str	r7, [r0, #16]
-	str	r4, [r0, #20]
-	str	r12, [r0, #24]
-	str	lr, [r0, #28]
-	add	r0, r0, #32
-	stm	r0, {r1, r2, r3}
-.LBB168_2:                              @ %carry
-	add	sp, sp, #32
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end168:
-	.size	mcl_fp_add11L, .Lfunc_end168-mcl_fp_add11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF11L
-	.align	2
-	.type	mcl_fp_addNF11L,%function
-mcl_fp_addNF11L:                        @ @mcl_fp_addNF11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#56
-	sub	sp, sp, #56
-	ldm	r1, {r5, r8, lr}
-	ldr	r6, [r2]
-	ldr	r12, [r1, #12]
-	ldmib	r2, {r4, r7, r9}
-	ldr	r11, [r1, #24]
-	adds	r10, r6, r5
-	adcs	r4, r4, r8
-	ldr	r8, [r1, #20]
-	adcs	r7, r7, lr
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [r2, #16]
-	ldr	lr, [r1, #36]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r1, #16]
-	adcs	r6, r9, r12
-	ldr	r12, [r2, #36]
-	str	r6, [sp, #16]           @ 4-byte Spill
-	adcs	r7, r4, r7
-	ldr	r4, [r2, #28]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	adcs	r7, r7, r8
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	adcs	r8, r7, r11
-	ldr	r7, [r1, #28]
-	ldr	r11, [r1, #40]
-	str	r8, [sp, #20]           @ 4-byte Spill
-	adcs	r7, r4, r7
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r1, #32]
-	ldr	r1, [r2, #32]
-	ldr	r2, [r2, #40]
-	adcs	r4, r1, r7
-	adcs	r1, r12, lr
-	str	r4, [sp, #24]           @ 4-byte Spill
-	str	r1, [sp, #48]           @ 4-byte Spill
-	adc	r9, r2, r11
-	ldmib	r3, {r1, r2, lr}
-	ldr	r5, [r3, #20]
-	ldr	r11, [r3]
-	ldr	r7, [r3, #16]
-	ldr	r12, [r3, #24]
-	str	r5, [sp, #12]           @ 4-byte Spill
-	ldr	r5, [r3, #28]
-	subs	r11, r10, r11
-	str	r5, [sp, #28]           @ 4-byte Spill
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	sbcs	r1, r5, r1
-	ldr	r5, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, r5, r2
-	ldr	r5, [r3, #32]
-	sbcs	lr, r6, lr
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	str	r5, [sp, #8]            @ 4-byte Spill
-	ldr	r5, [r3, #36]
-	ldr	r3, [r3, #40]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	str	r5, [sp]                @ 4-byte Spill
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	sbcs	r7, r3, r7
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	sbcs	r3, r3, r5
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	sbcs	r12, r8, r12
-	sbcs	r8, r6, r5
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	sbcs	r4, r4, r5
-	ldr	r5, [sp]                @ 4-byte Reload
-	str	r4, [sp, #12]           @ 4-byte Spill
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	sbcs	r4, r4, r5
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	str	r4, [sp, #28]           @ 4-byte Spill
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	sbc	r6, r9, r4
-	asr	r4, r6, #31
-	cmp	r4, #0
-	movlt	r11, r10
-	movlt	r1, r5
-	str	r11, [r0]
-	str	r1, [r0, #4]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	movlt	r2, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	cmp	r4, #0
-	str	r2, [r0, #8]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	movlt	lr, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	lr, [r0, #12]
-	movlt	r7, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r7, [r0, #16]
-	movlt	r3, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	cmp	r4, #0
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	movlt	r12, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r12, [r0, #24]
-	movlt	r8, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r8, [r0, #28]
-	movlt	r3, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	cmp	r4, #0
-	movlt	r6, r9
-	str	r3, [r0, #32]
-	movlt	r2, r1
-	str	r2, [r0, #36]
-	str	r6, [r0, #40]
-	add	sp, sp, #56
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end169:
-	.size	mcl_fp_addNF11L, .Lfunc_end169-mcl_fp_addNF11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub11L
-	.align	2
-	.type	mcl_fp_sub11L,%function
-mcl_fp_sub11L:                          @ @mcl_fp_sub11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#48
-	sub	sp, sp, #48
-	mov	r10, r3
-	ldr	r12, [r2]
-	ldr	r9, [r2, #4]
-	ldr	r8, [r2, #8]
-	ldr	r3, [r2, #12]
-	ldm	r1, {r4, r5, r6, r7}
-	subs	r4, r4, r12
-	sbcs	r5, r5, r9
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	sbcs	r6, r6, r8
-	str	r5, [sp, #44]           @ 4-byte Spill
-	ldr	r5, [r2, #20]
-	add	r8, r1, #32
-	sbcs	r12, r7, r3
-	str	r6, [sp, #40]           @ 4-byte Spill
-	ldr	r6, [r2, #16]
-	ldr	r7, [r1, #16]
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	str	r12, [sp, #24]          @ 4-byte Spill
-	sbcs	r11, r7, r6
-	ldr	r6, [r1, #20]
-	ldr	r7, [r2, #40]
-	sbcs	r9, r6, r5
-	ldr	r5, [r1, #24]
-	sbcs	r6, r5, r4
-	ldr	r4, [r2, #28]
-	ldr	r5, [r1, #28]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	sbcs	lr, r5, r4
-	ldr	r4, [r2, #36]
-	ldr	r5, [r2, #32]
-	str	lr, [sp, #20]           @ 4-byte Spill
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldm	r8, {r2, r4, r8}
-	str	r3, [r0]
-	sbcs	r1, r2, r5
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	sbcs	r2, r4, r2
-	mov	r4, r3
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	sbcs	r8, r8, r7
-	mov	r7, #0
-	sbc	r7, r7, #0
-	tst	r7, #1
-	str	r3, [r0, #4]
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	str	r3, [r0, #8]
-	add	r3, r0, #32
-	str	r12, [r0, #12]
-	str	r11, [r0, #16]
-	str	r9, [r0, #20]
-	str	r6, [r0, #24]
-	str	lr, [r0, #28]
-	stm	r3, {r1, r2, r8}
-	beq	.LBB170_2
-@ BB#1:                                 @ %carry
-	ldr	r3, [r10, #32]
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [r10, #36]
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [r10, #40]
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldmib	r10, {r5, lr}
-	ldr	r3, [r10, #20]
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	ldr	r7, [r10, #12]
-	ldr	r12, [r10, #16]
-	str	r3, [sp]                @ 4-byte Spill
-	ldr	r3, [r10, #24]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [r10, #28]
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [r10]
-	adds	r3, r3, r4
-	ldr	r4, [sp, #40]           @ 4-byte Reload
-	adcs	r5, r5, r6
-	stm	r0, {r3, r5}
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	adcs	r4, lr, r4
-	str	r4, [r0, #8]
-	adcs	r3, r7, r3
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r3, [r0, #12]
-	adcs	r3, r12, r11
-	str	r3, [r0, #16]
-	ldr	r3, [sp]                @ 4-byte Reload
-	adcs	r3, r3, r9
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adcs	r3, r7, r3
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	str	r3, [r0, #24]
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r7, r3
-	str	r3, [r0, #28]
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	add	r0, r0, #32
-	adcs	r1, r3, r1
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r3, r2
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adc	r3, r3, r8
-	stm	r0, {r1, r2, r3}
-.LBB170_2:                              @ %nocarry
-	add	sp, sp, #48
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end170:
-	.size	mcl_fp_sub11L, .Lfunc_end170-mcl_fp_sub11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF11L
-	.align	2
-	.type	mcl_fp_subNF11L,%function
-mcl_fp_subNF11L:                        @ @mcl_fp_subNF11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#68
-	sub	sp, sp, #68
-	mov	r12, r0
-	ldr	r0, [r2, #32]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r2, #36]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r2, #40]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldm	r2, {r8, r10}
-	ldr	r0, [r2, #8]
-	ldr	r5, [r2, #16]
-	ldr	r11, [r2, #20]
-	ldr	lr, [r1, #16]
-	ldr	r6, [r1, #20]
-	ldr	r9, [r1, #24]
-	ldr	r7, [r1, #28]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r2, #12]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r2, #24]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r2, #28]
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r1, #12]
-	ldm	r1, {r1, r4}
-	subs	r1, r1, r8
-	sbcs	r8, r4, r10
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	str	r8, [sp, #16]           @ 4-byte Spill
-	sbcs	r2, r2, r4
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	sbcs	r4, r0, r2
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	sbcs	r5, lr, r5
-	ldr	lr, [r3, #12]
-	str	r4, [sp, #20]           @ 4-byte Spill
-	sbcs	r11, r6, r11
-	mov	r6, r1
-	str	r5, [sp, #28]           @ 4-byte Spill
-	str	r11, [sp, #32]          @ 4-byte Spill
-	sbcs	r0, r9, r0
-	ldr	r9, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r7, r0
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	sbcs	r0, r2, r0
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	sbcs	r10, r2, r0
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r10, [sp, #48]          @ 4-byte Spill
-	sbc	r0, r7, r2
-	ldr	r2, [r3, #36]
-	ldr	r7, [r3, #4]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r3, #40]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r3, #16]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r3, #8]
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [r3, #20]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [r3, #24]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [r3, #28]
-	ldr	r3, [r3]
-	adds	r1, r6, r3
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp]                @ 4-byte Reload
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	adcs	r7, r8, r7
-	adcs	r2, r9, r2
-	adcs	lr, r4, lr
-	adcs	r4, r5, r0
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r5, r11, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r8, r0, r3
-	ldr	r3, [sp, #64]           @ 4-byte Reload
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r11, r3, r0
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r3, r3, r0
-	str	r3, [sp, #40]           @ 4-byte Spill
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r10, r3
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r10, r0, r3
-	asr	r3, r0, #31
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	cmp	r3, #0
-	movge	r1, r6
-	movge	r2, r9
-	str	r1, [r12]
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	movge	r7, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	cmp	r3, #0
-	str	r7, [r12, #4]
-	str	r2, [r12, #8]
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	movge	lr, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	str	lr, [r12, #12]
-	movge	r4, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	str	r4, [r12, #16]
-	movge	r5, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	cmp	r3, #0
-	str	r5, [r12, #20]
-	movge	r8, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r8, [r12, #24]
-	movge	r11, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	movge	r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	cmp	r3, #0
-	str	r11, [r12, #28]
-	movge	r1, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	movge	r10, r2
-	add	r2, r12, #32
-	stm	r2, {r0, r1, r10}
-	add	sp, sp, #68
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end171:
-	.size	mcl_fp_subNF11L, .Lfunc_end171-mcl_fp_subNF11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add11L
-	.align	2
-	.type	mcl_fpDbl_add11L,%function
-mcl_fpDbl_add11L:                       @ @mcl_fpDbl_add11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#120
-	sub	sp, sp, #120
-	ldm	r1, {r7, r12, lr}
-	ldr	r8, [r1, #12]
-	ldm	r2, {r4, r5, r6, r9}
-	ldr	r10, [r2, #20]
-	adds	r4, r4, r7
-	adcs	r7, r5, r12
-	str	r4, [sp, #40]           @ 4-byte Spill
-	ldr	r4, [r2, #64]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	adcs	r7, r6, lr
-	add	lr, r1, #16
-	str	r7, [sp, #24]           @ 4-byte Spill
-	adcs	r7, r9, r8
-	add	r8, r1, #32
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r4, [sp, #108]          @ 4-byte Spill
-	ldr	r4, [r2, #68]
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r4, [sp, #104]          @ 4-byte Spill
-	ldr	r4, [r2, #72]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r4, [sp, #96]           @ 4-byte Spill
-	ldr	r4, [r2, #76]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r4, [sp, #116]          @ 4-byte Spill
-	ldr	r4, [r2, #80]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r4, [sp, #100]          @ 4-byte Spill
-	ldr	r4, [r2, #84]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r4, [sp, #112]          @ 4-byte Spill
-	ldr	r4, [r2, #16]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldm	r8, {r5, r6, r8}
-	ldr	r2, [r1, #44]
-	ldr	r11, [r1, #52]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r9, [sp, #40]           @ 4-byte Reload
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r4, r1
-	str	r9, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	adcs	r2, r10, r2
-	add	r10, r3, #32
-	str	r7, [r0, #8]
-	str	r4, [r0, #12]
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	ldr	r7, [sp]                @ 4-byte Reload
-	adcs	r1, r1, r12
-	str	r1, [r0, #24]
-	adcs	r2, r2, lr
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [r0, #32]
-	adcs	r2, r2, r6
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r2, [r0, #36]
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	adcs	r6, r2, r7
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	str	r6, [sp, #72]           @ 4-byte Spill
-	adcs	r4, r1, r2
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r4, [sp, #76]           @ 4-byte Spill
-	adcs	r2, r1, r11
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r2, [sp, #80]           @ 4-byte Spill
-	adcs	r5, r1, r7
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r5, [sp, #92]           @ 4-byte Spill
-	adcs	r8, r1, r7
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r8, [sp, #84]           @ 4-byte Spill
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r12, r1, r7
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	str	r12, [sp, #96]          @ 4-byte Spill
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #112]          @ 4-byte Spill
-	mov	r1, #0
-	adc	r1, r1, #0
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldmib	r3, {r1, r9, lr}
-	ldr	r7, [r3, #16]
-	ldr	r11, [r3]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r3, #20]
-	subs	r11, r6, r11
-	sbcs	r1, r4, r1
-	sbcs	r4, r2, r9
-	sbcs	r2, r5, lr
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r3, #24]
-	str	r7, [sp, #64]           @ 4-byte Spill
-	ldr	r7, [r3, #28]
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldm	r10, {r5, r9, r10}
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	ldr	r6, [sp, #60]           @ 4-byte Reload
-	sbcs	r7, r8, r3
-	ldr	r3, [sp, #108]          @ 4-byte Reload
-	sbcs	r8, r3, r6
-	ldr	r3, [sp, #104]          @ 4-byte Reload
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	sbcs	r3, r3, r6
-	ldr	r6, [sp, #68]           @ 4-byte Reload
-	sbcs	r12, r12, r6
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	sbcs	lr, r6, r5
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	ldr	r6, [sp, #112]          @ 4-byte Reload
-	sbcs	r9, r5, r9
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	sbcs	r10, r6, r10
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	sbc	r6, r6, #0
-	ands	r6, r6, #1
-	movne	r11, r5
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	str	r11, [r0, #44]
-	movne	r1, r5
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	movne	r4, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r4, [r0, #52]
-	movne	r2, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r2, [r0, #56]
-	movne	r7, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r7, [r0, #60]
-	movne	r8, r1
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	cmp	r6, #0
-	str	r8, [r0, #64]
-	movne	r3, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r3, [r0, #68]
-	movne	r12, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r12, [r0, #72]
-	movne	lr, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	cmp	r6, #0
-	str	lr, [r0, #76]
-	movne	r9, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r9, [r0, #80]
-	movne	r10, r1
-	str	r10, [r0, #84]
-	add	sp, sp, #120
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end172:
-	.size	mcl_fpDbl_add11L, .Lfunc_end172-mcl_fpDbl_add11L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub11L
-	.align	2
-	.type	mcl_fpDbl_sub11L,%function
-mcl_fpDbl_sub11L:                       @ @mcl_fpDbl_sub11L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#120
-	sub	sp, sp, #120
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #100]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #104]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #112]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #108]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #116]          @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r2]
-	ldmib	r2, {r4, r8, r10}
-	ldm	r1, {r5, r6, r12, lr}
-	ldr	r9, [r2, #20]
-	subs	r5, r5, r7
-	ldr	r7, [r2, #24]
-	sbcs	r4, r6, r4
-	str	r5, [sp, #16]           @ 4-byte Spill
-	ldr	r5, [r2, #32]
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [r2, #28]
-	sbcs	r8, r12, r8
-	str	r7, [sp, #32]           @ 4-byte Spill
-	sbcs	r7, lr, r10
-	add	r10, r1, #32
-	add	lr, r1, #16
-	str	r5, [sp, #40]           @ 4-byte Spill
-	str	r7, [sp]                @ 4-byte Spill
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [r2, #16]
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r10}
-	ldr	r2, [r1, #44]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #16]          @ 4-byte Reload
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	str	r11, [r0]
-	stmib	r0, {r7, r8}
-	sbcs	r1, r1, r4
-	mov	r8, #0
-	ldr	r4, [sp]                @ 4-byte Reload
-	sbcs	r2, r2, r9
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r4, [r0, #12]
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	sbcs	r1, r12, r1
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, lr, r2
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	sbcs	r1, r5, r1
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	sbcs	r2, r6, r2
-	str	r2, [r0, #36]
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	sbcs	r1, r10, r1
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	sbcs	r4, r2, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r4, [sp, #40]           @ 4-byte Spill
-	sbcs	r2, r2, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r2, [sp, #68]           @ 4-byte Spill
-	sbcs	r9, r7, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	sbcs	r12, r7, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r12, [sp, #80]          @ 4-byte Spill
-	sbcs	lr, r7, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	lr, [sp, #84]           @ 4-byte Spill
-	sbcs	r5, r7, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	str	r5, [sp, #96]           @ 4-byte Spill
-	sbcs	r6, r7, r1
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	str	r6, [sp, #100]          @ 4-byte Spill
-	sbcs	r11, r7, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	str	r11, [sp, #104]         @ 4-byte Spill
-	sbcs	r1, r7, r1
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	sbcs	r10, r7, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	str	r10, [sp, #108]         @ 4-byte Spill
-	sbcs	r1, r7, r1
-	ldr	r7, [r3, #4]
-	str	r1, [sp, #116]          @ 4-byte Spill
-	sbc	r1, r8, #0
-	ldr	r8, [r3, #28]
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [r3, #32]
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [r3, #8]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [r3, #12]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	ldr	r3, [r3]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	adds	r1, r4, r3
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	ldr	r4, [sp, #56]           @ 4-byte Reload
-	adcs	r7, r2, r7
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	adcs	r2, r9, r2
-	adcs	r3, r12, r3
-	adcs	r12, lr, r4
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r5, r4
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	adcs	lr, r6, r5
-	ldr	r6, [sp, #112]          @ 4-byte Reload
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	adcs	r8, r11, r8
-	adcs	r11, r6, r5
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	ldr	r5, [sp, #116]          @ 4-byte Reload
-	adcs	r10, r10, r6
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	adc	r6, r5, r6
-	str	r6, [sp, #88]           @ 4-byte Spill
-	ldr	r6, [sp, #92]           @ 4-byte Reload
-	ands	r5, r6, #1
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	moveq	r2, r9
-	moveq	r1, r6
-	str	r1, [r0, #44]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	moveq	r7, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	cmp	r5, #0
-	str	r7, [r0, #48]
-	str	r2, [r0, #52]
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	moveq	r3, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r3, [r0, #56]
-	moveq	r12, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r12, [r0, #60]
-	moveq	r4, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	cmp	r5, #0
-	str	r4, [r0, #64]
-	moveq	lr, r1
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	lr, [r0, #68]
-	moveq	r8, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r8, [r0, #72]
-	moveq	r11, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	cmp	r5, #0
-	str	r11, [r0, #76]
-	moveq	r10, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r10, [r0, #80]
-	moveq	r2, r1
-	str	r2, [r0, #84]
-	add	sp, sp, #120
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end173:
-	.size	mcl_fpDbl_sub11L, .Lfunc_end173-mcl_fpDbl_sub11L
-	.cantunwind
-	.fnend
-
-	.align	2
-	.type	.LmulPv384x32,%function
-.LmulPv384x32:                          @ @mulPv384x32
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r9, [r1, #12]
-	umull	r4, r8, lr, r2
-	umull	lr, r6, r12, r2
-	mov	r5, r4
-	mov	r7, r6
-	str	lr, [r0]
-	umull	lr, r12, r9, r2
-	umlal	r7, r5, r3, r2
-	str	r5, [r0, #8]
-	str	r7, [r0, #4]
-	umull	r5, r7, r3, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r3, r8, lr
-	str	r3, [r0, #12]
-	ldr	r3, [r1, #16]
-	umull	r7, r6, r3, r2
-	adcs	r3, r12, r7
-	str	r3, [r0, #16]
-	ldr	r3, [r1, #20]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #20]
-	ldr	r3, [r1, #24]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #24]
-	ldr	r3, [r1, #28]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #28]
-	ldr	r3, [r1, #32]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #32]
-	ldr	r3, [r1, #36]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #36]
-	ldr	r3, [r1, #40]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #40]
-	ldr	r1, [r1, #44]
-	umull	r3, r7, r1, r2
-	adcs	r1, r6, r3
-	str	r1, [r0, #44]
-	adc	r1, r7, #0
-	str	r1, [r0, #48]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end174:
-	.size	.LmulPv384x32, .Lfunc_end174-.LmulPv384x32
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre12L
-	.align	2
-	.type	mcl_fp_mulUnitPre12L,%function
-mcl_fp_mulUnitPre12L:                   @ @mcl_fp_mulUnitPre12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#60
-	sub	sp, sp, #60
-	mov	r4, r0
-	mov	r0, sp
-	bl	.LmulPv384x32(PLT)
-	ldr	r12, [sp, #48]
-	ldr	lr, [sp, #44]
-	ldr	r8, [sp, #40]
-	ldr	r9, [sp, #36]
-	ldr	r10, [sp, #32]
-	ldr	r11, [sp, #28]
-	ldr	r5, [sp, #24]
-	ldr	r6, [sp, #20]
-	ldm	sp, {r2, r3}
-	add	r7, sp, #8
-	ldm	r7, {r0, r1, r7}
-	stm	r4, {r2, r3}
-	add	r2, r4, #8
-	stm	r2, {r0, r1, r7}
-	str	r6, [r4, #20]
-	str	r5, [r4, #24]
-	str	r11, [r4, #28]
-	str	r10, [r4, #32]
-	str	r9, [r4, #36]
-	str	r8, [r4, #40]
-	str	lr, [r4, #44]
-	str	r12, [r4, #48]
-	add	sp, sp, #60
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end175:
-	.size	mcl_fp_mulUnitPre12L, .Lfunc_end175-mcl_fp_mulUnitPre12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre12L
-	.align	2
-	.type	mcl_fpDbl_mulPre12L,%function
-mcl_fpDbl_mulPre12L:                    @ @mcl_fpDbl_mulPre12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#196
-	sub	sp, sp, #196
-	mov	r6, r2
-	mov	r5, r1
-	mov	r4, r0
-	bl	mcl_fpDbl_mulPre6L(PLT)
-	add	r0, r4, #48
-	add	r1, r5, #24
-	add	r2, r6, #24
-	bl	mcl_fpDbl_mulPre6L(PLT)
-	add	lr, r6, #24
-	ldr	r8, [r6, #40]
-	ldr	r9, [r6, #44]
-	ldr	r2, [r6, #16]
-	ldr	r3, [r6, #20]
-	ldm	lr, {r0, r1, r12, lr}
-	ldm	r6, {r6, r7, r10, r11}
-	adds	r0, r6, r0
-	adcs	r1, r7, r1
-	str	r0, [sp, #80]           @ 4-byte Spill
-	adcs	r12, r10, r12
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r10, [r5, #36]
-	adcs	r0, r11, lr
-	add	lr, r5, #8
-	str	r12, [sp, #68]          @ 4-byte Spill
-	str	r0, [sp, #92]           @ 4-byte Spill
-	adcs	r0, r2, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	adcs	r0, r3, r9
-	ldr	r9, [r5, #32]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r6, r0, #0
-	ldr	r0, [r5, #40]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r5, #44]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldm	lr, {r3, r11, lr}
-	ldr	r8, [r5, #20]
-	ldr	r0, [r5, #24]
-	ldr	r2, [r5, #28]
-	ldm	r5, {r5, r7}
-	adds	r0, r5, r0
-	ldr	r5, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	str	r0, [sp, #124]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r7, r7, r2
-	add	r2, sp, #100
-	adcs	r9, r3, r9
-	str	r7, [sp, #128]
-	adcs	r11, r11, r10
-	str	r9, [sp, #132]
-	str	r5, [sp, #100]
-	str	r1, [sp, #104]
-	str	r12, [sp, #108]
-	add	r1, sp, #124
-	str	r11, [sp, #136]
-	adcs	r10, lr, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r10, [sp, #140]
-	adcs	r8, r8, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r8, [sp, #144]
-	str	r0, [sp, #112]
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #116]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #120]
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	add	r0, sp, #148
-	bl	mcl_fpDbl_mulPre6L(PLT)
-	cmp	r6, #0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r3, [sp, #92]           @ 4-byte Reload
-	moveq	r8, r6
-	moveq	r10, r6
-	moveq	r11, r6
-	moveq	r9, r6
-	moveq	r7, r6
-	cmp	r6, #0
-	moveq	r0, r6
-	adds	r2, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r7, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r12, r9, r0
-	adcs	r3, r11, r3
-	adcs	lr, r10, r5
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r8, r5
-	str	r0, [sp, #92]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r5, r0, #0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	cmp	r0, #0
-	and	r6, r6, r0
-	moveq	r1, r7
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	moveq	r12, r9
-	ldr	r9, [sp, #92]           @ 4-byte Reload
-	moveq	lr, r10
-	moveq	r3, r11
-	moveq	r2, r7
-	ldr	r7, [sp, #172]
-	cmp	r0, #0
-	moveq	r9, r8
-	moveq	r5, r0
-	adds	r8, r2, r7
-	ldr	r7, [sp, #176]
-	adcs	r10, r1, r7
-	ldr	r7, [sp, #180]
-	adcs	r0, r12, r7
-	ldr	r7, [sp, #184]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	adcs	r0, r3, r7
-	ldr	r7, [sp, #188]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	adcs	r0, lr, r7
-	ldr	r7, [sp, #192]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	adcs	r0, r9, r7
-	ldr	r7, [r4]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	adc	r0, r5, r6
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldmib	r4, {r6, r9, lr}
-	ldr	r0, [sp, #148]
-	ldr	r5, [sp, #152]
-	ldr	r1, [sp, #156]
-	ldr	r2, [sp, #160]
-	ldr	r11, [r4, #24]
-	subs	r3, r0, r7
-	ldr	r0, [r4, #16]
-	sbcs	r12, r5, r6
-	ldr	r5, [r4, #68]
-	sbcs	r6, r1, r9
-	ldr	r1, [sp, #164]
-	ldr	r9, [r4, #32]
-	sbcs	r2, r2, lr
-	ldr	lr, [r4, #72]
-	str	r5, [sp, #56]           @ 4-byte Spill
-	sbcs	r7, r1, r0
-	ldr	r0, [r4, #20]
-	ldr	r1, [sp, #168]
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	sbcs	r0, r8, r11
-	ldr	r8, [r4, #28]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	sbcs	r0, r10, r8
-	ldr	r10, [r4, #52]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	sbcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r4, #36]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r4, #40]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r4, #44]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [r4, #92]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	sbc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r4, #48]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	subs	r0, r3, r0
-	ldr	r3, [r4, #80]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	sbcs	r0, r12, r10
-	ldr	r12, [r4, #76]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r4, #56]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	sbcs	r0, r6, r0
-	ldr	r6, [r4, #64]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r4, #60]
-	str	r6, [sp, #44]           @ 4-byte Spill
-	str	r0, [sp, #72]           @ 4-byte Spill
-	sbcs	r0, r2, r0
-	ldr	r2, [r4, #84]
-	sbcs	r7, r7, r6
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r4, #88]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	sbcs	r6, r6, r5
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	sbcs	r5, r5, lr
-	str	r5, [sp]                @ 4-byte Spill
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	sbcs	r5, r5, r12
-	str	r5, [sp, #4]            @ 4-byte Spill
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	sbcs	r5, r5, r3
-	str	r5, [sp, #8]            @ 4-byte Spill
-	ldr	r5, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, r5, r2
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	sbcs	r2, r2, r0
-	str	r2, [sp, #52]           @ 4-byte Spill
-	mov	r2, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adds	r11, r11, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	str	r11, [r4, #24]
-	adcs	r8, r8, r0
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	str	r8, [r4, #28]
-	adcs	r9, r9, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r9, [r4, #32]
-	adcs	r5, r0, r1
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r1, [sp]                @ 4-byte Reload
-	str	r5, [r4, #36]
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	adcs	r7, r0, r7
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r7, [r4, #40]
-	adcs	r6, r0, r6
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r6, [r4, #44]
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r0, [r4, #48]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r10, r1
-	adcs	r0, r0, r5
-	str	r1, [r4, #52]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	str	r0, [r4, #56]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	str	r1, [r4, #60]
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [r4, #64]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [r4, #68]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [r4, #72]
-	adcs	r0, r12, #0
-	str	r0, [r4, #76]
-	adcs	r0, r3, #0
-	str	r0, [r4, #80]
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [r4, #84]
-	adcs	r0, r2, #0
-	adc	r1, r1, #0
-	str	r0, [r4, #88]
-	str	r1, [r4, #92]
-	add	sp, sp, #196
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end176:
-	.size	mcl_fpDbl_mulPre12L, .Lfunc_end176-mcl_fpDbl_mulPre12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre12L
-	.align	2
-	.type	mcl_fpDbl_sqrPre12L,%function
-mcl_fpDbl_sqrPre12L:                    @ @mcl_fpDbl_sqrPre12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#204
-	sub	sp, sp, #204
-	mov	r5, r1
-	mov	r4, r0
-	mov	r2, r5
-	bl	mcl_fpDbl_mulPre6L(PLT)
-	add	r1, r5, #24
-	add	r0, r4, #48
-	mov	r2, r1
-	bl	mcl_fpDbl_mulPre6L(PLT)
-	ldr	r10, [r5, #32]
-	ldr	r9, [r5, #36]
-	ldr	lr, [r5, #40]
-	ldr	r12, [r5, #44]
-	ldr	r3, [r5, #8]
-	ldr	r2, [r5, #12]
-	ldr	r1, [r5, #16]
-	ldr	r11, [r5, #20]
-	ldr	r6, [r5, #24]
-	ldr	r0, [r5, #28]
-	ldm	r5, {r5, r7}
-	adds	r8, r5, r6
-	adcs	r6, r7, r0
-	mov	r0, #0
-	str	r8, [sp, #132]
-	str	r8, [sp, #108]
-	adcs	r10, r3, r10
-	str	r6, [sp, #136]
-	str	r6, [sp, #112]
-	adcs	r5, r2, r9
-	add	r2, sp, #108
-	str	r10, [sp, #140]
-	str	r10, [sp, #116]
-	adcs	r9, r1, lr
-	add	r1, sp, #132
-	str	r5, [sp, #144]
-	str	r5, [sp, #120]
-	adcs	r7, r11, r12
-	str	r9, [sp, #148]
-	str	r9, [sp, #124]
-	adc	r11, r0, #0
-	add	r0, sp, #156
-	str	r7, [sp, #152]
-	str	r7, [sp, #128]
-	bl	mcl_fpDbl_mulPre6L(PLT)
-	adds	r0, r9, r9
-	ldr	lr, [sp, #192]
-	ldr	r12, [sp, #196]
-	ldr	r9, [sp, #200]
-	orr	r0, r0, r5, lsr #31
-	str	r0, [sp, #104]          @ 4-byte Spill
-	adc	r0, r7, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	adds	r0, r10, r10
-	ldr	r10, [sp, #180]
-	adc	r1, r5, r5
-	orr	r0, r0, r6, lsr #31
-	str	r1, [sp, #92]           @ 4-byte Spill
-	adds	r1, r8, r8
-	ldr	r8, [sp, #184]
-	adc	r5, r6, r6
-	ldr	r6, [sp, #188]
-	adds	r1, r10, r1
-	str	r1, [sp, #96]           @ 4-byte Spill
-	adcs	r3, r8, r5
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	adcs	r2, r6, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r1, lr, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	adcs	r5, r9, r5
-	adc	r7, r11, r7, lsr #31
-	cmp	r11, #0
-	moveq	r3, r8
-	moveq	r2, r6
-	moveq	r5, r9
-	moveq	r0, r12
-	moveq	r1, lr
-	cmp	r11, #0
-	ldr	r6, [sp, #96]           @ 4-byte Reload
-	mov	r8, r3
-	add	r3, sp, #156
-	str	r0, [sp, #104]          @ 4-byte Spill
-	str	r1, [sp, #100]          @ 4-byte Spill
-	str	r2, [sp, #88]           @ 4-byte Spill
-	mov	r9, r5
-	ldm	r4, {r12, lr}
-	moveq	r7, r11
-	ldr	r11, [r4, #8]
-	ldr	r5, [r4, #12]
-	moveq	r6, r10
-	ldm	r3, {r0, r1, r2, r3}
-	ldr	r10, [r4, #64]
-	subs	r12, r0, r12
-	ldr	r0, [r4, #16]
-	sbcs	lr, r1, lr
-	ldr	r1, [sp, #172]
-	sbcs	r2, r2, r11
-	ldr	r11, [r4, #48]
-	sbcs	r3, r3, r5
-	ldr	r5, [r4, #68]
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #176]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r4, #20]
-	str	r5, [sp, #60]           @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r4, #24]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	sbcs	r0, r6, r0
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r4, #28]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	sbcs	r0, r8, r0
-	ldr	r8, [r4, #56]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r4, #32]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r4, #36]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [r4, #40]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	sbcs	r0, r0, r1
-	ldr	r1, [r4, #92]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r4, #44]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	str	r0, [sp, #104]          @ 4-byte Spill
-	sbcs	r0, r9, r0
-	ldr	r9, [r4, #60]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	sbc	r0, r7, #0
-	ldr	r7, [r4, #52]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	subs	r0, r12, r11
-	ldr	r12, [r4, #76]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	sbcs	r0, lr, r7
-	ldr	lr, [r4, #72]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	sbcs	r0, r2, r8
-	ldr	r2, [r4, #84]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	sbcs	r0, r3, r9
-	ldr	r3, [r4, #80]
-	sbcs	r6, r6, r10
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r4, #88]
-	str	r6, [sp, #4]            @ 4-byte Spill
-	ldr	r6, [sp, #68]           @ 4-byte Reload
-	str	r2, [sp, #80]           @ 4-byte Spill
-	sbcs	r5, r6, r5
-	str	r5, [sp, #8]            @ 4-byte Spill
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	sbcs	r5, r5, lr
-	str	r5, [sp, #12]           @ 4-byte Spill
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	sbcs	r5, r5, r12
-	str	r5, [sp, #16]           @ 4-byte Spill
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	sbcs	r5, r5, r3
-	str	r5, [sp, #52]           @ 4-byte Spill
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	sbcs	r2, r5, r2
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	sbcs	r2, r2, r0
-	str	r2, [sp, #64]           @ 4-byte Spill
-	mov	r2, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adds	r0, r0, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r0, [r4, #24]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r6, r1, r5
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	str	r6, [r4, #28]
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [r4, #32]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r6, r1, r5
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	str	r6, [r4, #36]
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r0, [r4, #40]
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r5, r1, r5
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r5, [r4, #44]
-	str	r0, [r4, #48]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r7, r1
-	str	r1, [r4, #52]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [r4, #56]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r9, r1
-	str	r1, [r4, #60]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [r4, #64]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [r4, #68]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [r4, #72]
-	adcs	r0, r12, #0
-	str	r0, [r4, #76]
-	adcs	r0, r3, #0
-	str	r0, [r4, #80]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [r4, #84]
-	adcs	r0, r2, #0
-	adc	r1, r1, #0
-	str	r0, [r4, #88]
-	str	r1, [r4, #92]
-	add	sp, sp, #204
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end177:
-	.size	mcl_fpDbl_sqrPre12L, .Lfunc_end177-mcl_fpDbl_sqrPre12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont12L
-	.align	2
-	.type	mcl_fp_mont12L,%function
-mcl_fp_mont12L:                         @ @mcl_fp_mont12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#428
-	sub	sp, sp, #428
-	.pad	#1024
-	sub	sp, sp, #1024
-	str	r2, [sp, #92]           @ 4-byte Spill
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	add	r0, sp, #1392
-	str	r3, [sp, #100]          @ 4-byte Spill
-	str	r1, [sp, #96]           @ 4-byte Spill
-	mov	r4, r3
-	str	r5, [sp, #88]           @ 4-byte Spill
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #1396]
-	ldr	r6, [sp, #1392]
-	add	r11, sp, #1024
-	mov	r1, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1400]
-	mul	r2, r6, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1404]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1440]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #1436]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #1432]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #1428]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #1424]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1420]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1416]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1412]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1408]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, r11, #312
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #1384]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r5, [sp, #1360]
-	ldr	r8, [sp, #1356]
-	ldr	r7, [sp, #1352]
-	ldr	r10, [sp, #1336]
-	ldr	r9, [sp, #1340]
-	ldr	r4, [sp, #1344]
-	ldr	r11, [sp, #1348]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1380]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1376]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1372]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1368]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1364]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, sp, #1280
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r10, r6
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	ldr	r3, [sp, #1296]
-	ldr	r12, [sp, #1300]
-	ldr	lr, [sp, #1304]
-	ldr	r6, [sp, #1312]
-	ldr	r10, [sp, #1328]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	ldr	r9, [sp, #1324]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #1280]
-	adcs	r1, r11, r1
-	ldr	r11, [sp, #60]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r7, r1
-	ldr	r7, [sp, #1316]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r8, r1
-	ldr	r8, [sp, #1320]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r5, r1
-	ldr	r5, [sp, #1308]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #1292]
-	adc	r0, r0, #0
-	adds	r11, r11, r4
-	ldr	r4, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #1288]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1284]
-	adcs	r0, r4, r0
-	mov	r4, r11
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	add	r7, sp, #1024
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, r7, #200
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #1272]
-	add	r9, sp, #1232
-	ldr	r5, [sp, #1248]
-	ldr	r8, [sp, #1244]
-	ldr	r10, [sp, #1224]
-	ldr	r11, [sp, #1228]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1268]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1264]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1260]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1256]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1252]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r9, {r6, r7, r9}
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, sp, #1168
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r4, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #1168
-	ldr	r10, [sp, #1212]
-	ldr	r4, [sp, #1192]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #1216]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1200]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1208]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1204]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1196]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	add	r5, sp, #1024
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, r5, #88
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #1160]
-	add	r10, sp, #1120
-	ldr	r6, [sp, #1136]
-	ldr	r9, [sp, #1132]
-	ldr	r11, [sp, #1112]
-	ldr	r7, [sp, #1116]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1156]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1152]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1148]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1144]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1140]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, sp, #1056
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r8, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r2, [sp, #1068]
-	ldr	r3, [sp, #1072]
-	ldr	r12, [sp, #1076]
-	ldr	lr, [sp, #1080]
-	ldr	r8, [sp, #1096]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1092]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1056]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1084]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1104]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1100]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1088]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1064]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r11, r11, r4
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1060]
-	adcs	r0, r4, r0
-	mov	r4, r11
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #1000
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #1048]
-	add	r9, sp, #1008
-	ldr	r5, [sp, #1024]
-	ldr	r8, [sp, #1020]
-	ldr	r10, [sp, #1000]
-	ldr	r11, [sp, #1004]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1032]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1028]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r9, {r6, r7, r9}
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, sp, #944
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r4, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #944
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #968
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #888
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #936]
-	add	r10, sp, #896
-	ldr	r6, [sp, #912]
-	ldr	r9, [sp, #908]
-	ldr	r11, [sp, #888]
-	ldr	r7, [sp, #892]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #932]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #928]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #924]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #920]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #916]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, sp, #832
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r8, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #836
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #860
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r8, r9, r10}
-	ldr	r4, [sp, #832]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	adds	r11, r11, r4
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r11
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #776
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #824]
-	add	r9, sp, #784
-	ldr	r5, [sp, #800]
-	ldr	r8, [sp, #796]
-	ldr	r10, [sp, #776]
-	ldr	r11, [sp, #780]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #820]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #816]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #812]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #808]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #804]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r9, {r6, r7, r9}
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #720
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r4, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #720
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #744
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #664
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #712]
-	add	r10, sp, #672
-	ldr	r6, [sp, #688]
-	ldr	r9, [sp, #684]
-	ldr	r11, [sp, #664]
-	ldr	r7, [sp, #668]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #608
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r8, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #612
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #636
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r8, r9, r10}
-	ldr	r4, [sp, #608]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	adds	r11, r11, r4
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r11
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #552
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #600]
-	add	r9, sp, #560
-	ldr	r5, [sp, #576]
-	ldr	r8, [sp, #572]
-	ldr	r10, [sp, #552]
-	ldr	r11, [sp, #556]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #596]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #592]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #588]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #584]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r9, {r6, r7, r9}
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #496
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r4, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #496
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #520
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #440
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #488]
-	add	r10, sp, #448
-	ldr	r6, [sp, #464]
-	ldr	r9, [sp, #460]
-	ldr	r11, [sp, #440]
-	ldr	r7, [sp, #444]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #484]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #480]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #476]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #472]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #468]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #384
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r8, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #388
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #412
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r8, r9, r10}
-	ldr	r4, [sp, #384]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	adds	r11, r11, r4
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r11
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	mul	r2, r11, r6
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #328
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #376]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r5, [sp, #348]
-	ldr	r9, [sp, #344]
-	ldr	r10, [sp, #328]
-	ldr	r11, [sp, #332]
-	ldr	r8, [sp, #336]
-	ldr	r7, [sp, #340]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #372]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #368]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #364]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #360]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #356]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #352]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, sp, #272
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r4, r10
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	add	r12, sp, #288
-	ldr	lr, [sp, #276]
-	ldr	r4, [sp, #284]
-	ldr	r10, [sp, #312]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r0, r11
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #316]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #320]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #280]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #272]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	adds	r0, r1, r2
-	mul	r11, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r6, [sp, #308]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	adcs	r7, r7, lr
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adcs	r7, r7, r5
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adcs	r7, r7, r4
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mov	r2, r11
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #216
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #264]
-	add	r10, sp, #220
-	ldr	r6, [sp, #244]
-	ldr	r7, [sp, #240]
-	ldr	r8, [sp, #236]
-	ldr	r9, [sp, #232]
-	ldr	r11, [sp, #216]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #260]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #256]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #252]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #248]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #160
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #160
-	add	r12, sp, #176
-	adds	r0, r0, r11
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r4, r0, r4
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r11, r0, r5
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #196
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldm	lr, {r2, r7, lr}
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r6, [sp, #172]
-	adds	r4, r4, r2
-	mul	r1, r4, r0
-	adcs	r7, r11, r7
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adcs	r11, r7, lr
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #100]          @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r7, r0, r5
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r8, r0, r8
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r10, r0, r10
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	add	r0, sp, #104
-	bl	.LmulPv384x32(PLT)
-	add	r5, sp, #104
-	mov	r3, r6
-	ldm	r5, {r0, r1, r2, r5}
-	adds	r0, r4, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	lr, r0, r1
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r4, r11, r2
-	str	lr, [sp, #44]           @ 4-byte Spill
-	str	r4, [sp, #48]           @ 4-byte Spill
-	adcs	r2, r0, r5
-	ldr	r0, [sp, #120]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	adcs	r5, r1, r0
-	ldr	r0, [sp, #124]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r5, [sp, #56]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #128]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #132]
-	adcs	r12, r1, r0
-	ldr	r0, [sp, #136]
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r12, [sp, #60]          @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #140]
-	adcs	r0, r7, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #144]
-	adcs	r0, r8, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #148]
-	adcs	r0, r1, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #152]
-	adcs	r0, r10, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldmib	r3, {r0, r1, r7, r10}
-	ldr	r11, [r3]
-	ldr	r6, [r3, #24]
-	ldr	r9, [r3, #20]
-	ldr	r8, [r3, #36]
-	subs	r11, lr, r11
-	str	r6, [sp, #36]           @ 4-byte Spill
-	ldr	r6, [r3, #28]
-	ldr	lr, [r3, #44]
-	sbcs	r0, r4, r0
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	sbcs	r1, r2, r1
-	sbcs	r2, r5, r7
-	ldr	r7, [r3, #32]
-	ldr	r5, [r3, #40]
-	ldr	r3, [sp, #80]           @ 4-byte Reload
-	str	r6, [sp, #40]           @ 4-byte Spill
-	sbcs	r10, r3, r10
-	ldr	r3, [sp, #84]           @ 4-byte Reload
-	sbcs	r6, r3, r9
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	ldr	r9, [sp, #40]           @ 4-byte Reload
-	sbcs	r3, r12, r3
-	ldr	r12, [sp, #88]          @ 4-byte Reload
-	sbcs	r12, r12, r9
-	sbcs	r7, r4, r7
-	ldr	r4, [sp, #76]           @ 4-byte Reload
-	str	r7, [sp, #100]          @ 4-byte Spill
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	sbcs	r9, r4, r8
-	ldr	r4, [sp, #96]           @ 4-byte Reload
-	sbcs	r8, r4, r5
-	ldr	r4, [sp, #92]           @ 4-byte Reload
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	sbcs	lr, r4, lr
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	sbc	r4, r4, #0
-	ands	r4, r4, #1
-	movne	r11, r5
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	movne	r0, r7
-	str	r11, [r5]
-	str	r0, [r5, #4]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	movne	r1, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	cmp	r4, #0
-	str	r1, [r5, #8]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	movne	r2, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r2, [r5, #12]
-	movne	r10, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r10, [r5, #16]
-	movne	r6, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	cmp	r4, #0
-	str	r6, [r5, #20]
-	movne	r3, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	str	r3, [r5, #24]
-	movne	r12, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r12, [r5, #28]
-	movne	r1, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	cmp	r4, #0
-	str	r1, [r5, #32]
-	movne	r9, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r9, [r5, #36]
-	movne	r8, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r8, [r5, #40]
-	movne	lr, r0
-	str	lr, [r5, #44]
-	add	sp, sp, #428
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end178:
-	.size	mcl_fp_mont12L, .Lfunc_end178-mcl_fp_mont12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF12L
-	.align	2
-	.type	mcl_fp_montNF12L,%function
-mcl_fp_montNF12L:                       @ @mcl_fp_montNF12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#428
-	sub	sp, sp, #428
-	.pad	#1024
-	sub	sp, sp, #1024
-	add	r12, sp, #92
-	mov	r4, r3
-	mov	r7, r1
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #68]           @ 4-byte Spill
-	add	r0, sp, #1392
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	str	r5, [sp, #88]           @ 4-byte Spill
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #1396]
-	ldr	r8, [sp, #1392]
-	add	r10, sp, #1024
-	mov	r1, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1400]
-	mul	r2, r8, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1404]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1440]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #1436]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #1432]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #1428]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #1424]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1420]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1416]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1412]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1408]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, r10, #312
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #1384]
-	add	r11, sp, #1344
-	ldr	r9, [sp, #1356]
-	ldr	r4, [sp, #1336]
-	ldr	r6, [sp, #1340]
-	mov	r1, r7
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1380]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1376]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1372]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1368]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1364]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1360]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r10, r11}
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, sp, #1280
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r4, r8
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #1280
-	ldr	r7, [sp, #1316]
-	ldr	r4, [sp, #1304]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r8, r6, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r6, [sp, #1312]
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #1308]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	ldr	r10, [sp, #1324]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	ldr	r11, [sp, #1328]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	ldr	r9, [sp, #1320]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adc	r0, r1, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r8, r8, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	add	r5, sp, #1024
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r11, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, r5, #200
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #1272]
-	add	r10, sp, #1232
-	ldr	r6, [sp, #1248]
-	ldr	r9, [sp, #1244]
-	ldr	r11, [sp, #1224]
-	ldr	r7, [sp, #1228]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1268]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1264]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1260]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1256]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1252]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, sp, #1168
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r8, r11
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1180]
-	ldr	r3, [sp, #1184]
-	ldr	r12, [sp, #1188]
-	ldr	lr, [sp, #1192]
-	ldr	r8, [sp, #1208]
-	ldr	r11, [sp, #1216]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1204]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1168]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1196]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #84]          @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1212]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1200]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r10, r10, r4
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	ldr	r1, [sp, #1176]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1172]
-	adcs	r0, r4, r0
-	mov	r4, r10
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	add	r7, sp, #1024
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r11, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r10, r0
-	add	r0, r7, #88
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #1160]
-	add	r9, sp, #1120
-	ldr	r5, [sp, #1136]
-	ldr	r8, [sp, #1132]
-	ldr	r10, [sp, #1112]
-	ldr	r11, [sp, #1116]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1156]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1152]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1148]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1144]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1140]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r9}
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, sp, #1056
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r4, r10
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #1056
-	ldr	r10, [sp, #1100]
-	ldr	r4, [sp, #1080]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #1104]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1088]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1096]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1092]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1084]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r11, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1000
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #1048]
-	add	r10, sp, #1008
-	ldr	r6, [sp, #1024]
-	ldr	r9, [sp, #1020]
-	ldr	r11, [sp, #1000]
-	ldr	r7, [sp, #1004]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1032]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1028]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, sp, #944
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r8, r11
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	r11, sp, #972
-	add	lr, sp, #948
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r7, r8, r9, r11}
-	ldr	r4, [sp, #944]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r10, [sp, #84]          @ 4-byte Reload
-	adds	r10, r10, r4
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r10
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r11, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r10, r0
-	add	r0, sp, #888
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #936]
-	add	r9, sp, #896
-	ldr	r5, [sp, #912]
-	ldr	r8, [sp, #908]
-	ldr	r10, [sp, #888]
-	ldr	r11, [sp, #892]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #932]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #928]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #924]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #920]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #916]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r9}
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, sp, #832
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r4, r10
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #832
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #856
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r11, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #776
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #824]
-	add	r10, sp, #784
-	ldr	r6, [sp, #800]
-	ldr	r9, [sp, #796]
-	ldr	r11, [sp, #776]
-	ldr	r7, [sp, #780]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #820]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #816]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #812]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #808]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #804]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #720
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r8, r11
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	r11, sp, #748
-	add	lr, sp, #724
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r7, r8, r9, r11}
-	ldr	r4, [sp, #720]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r10, [sp, #84]          @ 4-byte Reload
-	adds	r10, r10, r4
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r10
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r11, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r10, r0
-	add	r0, sp, #664
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #712]
-	add	r9, sp, #672
-	ldr	r5, [sp, #688]
-	ldr	r8, [sp, #684]
-	ldr	r10, [sp, #664]
-	ldr	r11, [sp, #668]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r9}
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #608
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r4, r10
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #608
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #632
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r11, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #552
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #600]
-	add	r10, sp, #560
-	ldr	r6, [sp, #576]
-	ldr	r9, [sp, #572]
-	ldr	r11, [sp, #552]
-	ldr	r7, [sp, #556]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #596]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #592]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #588]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #584]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #496
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r8, r11
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	r11, sp, #524
-	add	lr, sp, #500
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r7, r8, r9, r11}
-	ldr	r4, [sp, #496]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r10, [sp, #84]          @ 4-byte Reload
-	adds	r10, r10, r4
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r10
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r11, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r2, r10, r0
-	add	r0, sp, #440
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #488]
-	add	r9, sp, #448
-	ldr	r5, [sp, #464]
-	ldr	r8, [sp, #460]
-	ldr	r10, [sp, #440]
-	ldr	r11, [sp, #444]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #484]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #480]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #476]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #472]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #468]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r9}
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #384
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r4, r10
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #384
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #408
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mul	r2, r7, r4
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r11, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #328
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #376]
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r6, [sp, #348]
-	ldr	r10, [sp, #344]
-	ldr	r11, [sp, #328]
-	ldr	r7, [sp, #332]
-	ldr	r9, [sp, #336]
-	ldr	r5, [sp, #340]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #372]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #368]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #364]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #360]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #356]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #352]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, sp, #272
-	bl	.LmulPv384x32(PLT)
-	adds	r0, r8, r11
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	ldr	lr, [sp, #276]
-	add	r12, sp, #288
-	ldr	r8, [sp, #316]
-	ldr	r11, [sp, #312]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	adcs	r7, r1, r9
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r9, [sp, #320]
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #280]
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #284]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adc	r1, r1, r2
-	ldr	r2, [sp, #272]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	adds	r0, r0, r2
-	adcs	r7, r7, lr
-	mul	r10, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r4, [sp, #308]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adcs	r7, r7, r5
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adcs	r7, r7, r6
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mov	r2, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	adc	r0, r9, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #216
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #264]
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r5, [sp, #244]
-	ldr	r6, [sp, #240]
-	ldr	r8, [sp, #236]
-	ldr	r9, [sp, #232]
-	ldr	r10, [sp, #216]
-	ldr	r7, [sp, #220]
-	ldr	r4, [sp, #224]
-	ldr	r11, [sp, #228]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #260]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #256]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #252]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #248]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #160
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	r12, sp, #176
-	ldr	lr, [sp, #164]
-	adds	r0, r0, r10
-	add	r10, sp, #200
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #172]
-	adcs	r1, r1, r4
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #168]
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adc	r1, r1, r2
-	ldr	r2, [sp, #160]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	adds	r4, r0, r2
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	mul	r1, r4, r0
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r11, [sp, #196]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	adcs	r5, r5, lr
-	str	r5, [sp, #36]           @ 4-byte Spill
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	adcs	r6, r5, r6
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	str	r6, [sp, #32]           @ 4-byte Spill
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	adcs	r7, r6, r7
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r11, r0, r11
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r8, r0, r8
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r9, r0, r9
-	adc	r0, r10, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	add	r0, sp, #104
-	bl	.LmulPv384x32(PLT)
-	add	r6, sp, #104
-	ldm	r6, {r0, r1, r2, r6}
-	adds	r0, r4, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	lr, r0, r1
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r10, r0, r2
-	ldr	r0, [sp, #120]
-	mov	r2, r5
-	adcs	r3, r7, r6
-	str	r10, [sp, #52]          @ 4-byte Spill
-	str	r3, [sp, #56]           @ 4-byte Spill
-	adcs	r6, r1, r0
-	ldr	r0, [sp, #124]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r6, [sp, #60]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #128]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #132]
-	adcs	r12, r1, r0
-	ldr	r0, [sp, #136]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r12, [sp, #64]          @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #140]
-	adcs	r0, r11, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #144]
-	adcs	r0, r8, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #148]
-	adcs	r0, r9, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #152]
-	adc	r0, r1, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldmib	r2, {r0, r1, r7, r9}
-	ldr	r4, [r2, #24]
-	ldr	r8, [r2]
-	ldr	r5, [r2, #20]
-	str	r4, [sp, #44]           @ 4-byte Spill
-	ldr	r4, [r2, #28]
-	subs	r8, lr, r8
-	sbcs	r0, r10, r0
-	sbcs	r1, r3, r1
-	sbcs	r7, r6, r7
-	str	r4, [sp, #48]           @ 4-byte Spill
-	mov	r4, r2
-	ldr	r2, [r4, #44]
-	ldr	r10, [r4, #32]
-	ldr	r6, [r4, #36]
-	ldr	r11, [r4, #40]
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	sbcs	r9, r2, r9
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	sbcs	r5, r2, r5
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	sbcs	r3, r12, r2
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	sbcs	r12, r2, r4
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	ldr	r4, [sp, #40]           @ 4-byte Reload
-	sbcs	r10, r2, r10
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	sbcs	r2, r2, r6
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	sbcs	r2, r2, r11
-	ldr	r11, [sp, #68]          @ 4-byte Reload
-	str	r2, [sp, #100]          @ 4-byte Spill
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	sbc	r2, r2, r4
-	asr	r4, r2, #31
-	cmp	r4, #0
-	movlt	r8, lr
-	movlt	r0, r6
-	str	r8, [r11]
-	str	r0, [r11, #4]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	movlt	r1, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	cmp	r4, #0
-	str	r1, [r11, #8]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	movlt	r7, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r7, [r11, #12]
-	movlt	r9, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r9, [r11, #16]
-	movlt	r5, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	cmp	r4, #0
-	str	r5, [r11, #20]
-	movlt	r3, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r3, [r11, #24]
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	movlt	r12, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	str	r12, [r11, #28]
-	movlt	r10, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	cmp	r4, #0
-	str	r10, [r11, #32]
-	movlt	r3, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r3, [r11, #36]
-	movlt	r1, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r1, [r11, #40]
-	movlt	r2, r0
-	str	r2, [r11, #44]
-	add	sp, sp, #428
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end179:
-	.size	mcl_fp_montNF12L, .Lfunc_end179-mcl_fp_montNF12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed12L
-	.align	2
-	.type	mcl_fp_montRed12L,%function
-mcl_fp_montRed12L:                      @ @mcl_fp_montRed12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#836
-	sub	sp, sp, #836
-	mov	r3, r2
-	str	r0, [sp, #148]          @ 4-byte Spill
-	ldr	r2, [r1, #4]
-	ldr	r10, [r1]
-	ldr	r0, [r3]
-	str	r3, [sp, #152]          @ 4-byte Spill
-	mov	r5, r3
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #144]          @ 4-byte Spill
-	ldr	r0, [r3, #4]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #12]
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [r3, #8]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [r3, #12]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [r3, #16]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [r3, #20]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [r3, #24]
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [r3, #-4]
-	str	r0, [sp, #156]          @ 4-byte Spill
-	mul	r2, r10, r0
-	ldr	r0, [r3, #28]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [r1, #64]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r1, #68]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r1, #72]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r1, #76]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r1, #80]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r1, #84]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r1, #88]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r1, #92]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r1, #28]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1, #24]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r1, #20]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	mov	r1, r3
-	str	r0, [sp, #8]            @ 4-byte Spill
-	add	r0, sp, #776
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #824]
-	add	r11, sp, #808
-	add	lr, sp, #776
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r6, r8, r9, r11}
-	ldr	r7, [sp, #804]
-	ldr	r4, [sp, #800]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #156]          @ 4-byte Reload
-	mul	r2, r10, r0
-	add	r0, sp, #720
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #768]
-	add	lr, sp, #756
-	add	r9, sp, #732
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r4, [sp, #720]
-	ldr	r6, [sp, #752]
-	ldr	r11, [sp, #748]
-	ldr	r2, [sp, #744]
-	ldr	r1, [sp, #724]
-	ldr	r7, [sp, #728]
-	ldm	r9, {r0, r8, r9}
-	adds	r4, r10, r4
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r4, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	mov	r4, r5
-	adcs	r1, r1, r7
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #156]          @ 4-byte Reload
-	mul	r2, r10, r0
-	add	r0, sp, #664
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #712]
-	add	r11, sp, #696
-	add	lr, sp, #664
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r6, r8, r9, r11}
-	ldr	r7, [sp, #692]
-	ldr	r5, [sp, #688]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #156]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	mul	r2, r10, r5
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, sp, #608
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #656]
-	add	lr, sp, #644
-	add	r9, sp, #620
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r4, [sp, #608]
-	ldr	r6, [sp, #640]
-	ldr	r11, [sp, #636]
-	ldr	r2, [sp, #632]
-	ldr	r1, [sp, #612]
-	ldr	r7, [sp, #616]
-	ldm	r9, {r0, r8, r9}
-	adds	r4, r10, r4
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r4, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r4, [sp, #152]          @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	mov	r0, r5
-	mul	r2, r10, r0
-	add	r0, sp, #552
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #600]
-	add	r11, sp, #584
-	add	lr, sp, #552
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r6, r8, r9, r11}
-	ldr	r7, [sp, #580]
-	ldr	r5, [sp, #576]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #156]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	mul	r2, r10, r5
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #496
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #544]
-	add	lr, sp, #532
-	add	r9, sp, #508
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r4, [sp, #496]
-	ldr	r6, [sp, #528]
-	ldr	r11, [sp, #524]
-	ldr	r2, [sp, #520]
-	ldr	r1, [sp, #500]
-	ldr	r7, [sp, #504]
-	ldm	r9, {r0, r8, r9}
-	adds	r4, r10, r4
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r4, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r4, [sp, #152]          @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #440
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #488]
-	add	r11, sp, #472
-	add	lr, sp, #440
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r6, r8, r9, r11}
-	ldr	r7, [sp, #468]
-	ldr	r5, [sp, #464]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #156]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	mul	r2, r10, r5
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #384
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #432]
-	add	lr, sp, #420
-	add	r9, sp, #396
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r4, [sp, #384]
-	ldr	r6, [sp, #416]
-	ldr	r11, [sp, #412]
-	ldr	r2, [sp, #408]
-	ldr	r1, [sp, #388]
-	ldr	r7, [sp, #392]
-	ldm	r9, {r0, r8, r9}
-	adds	r4, r10, r4
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r4, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	mov	r4, r5
-	adcs	r1, r1, r7
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #152]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #328
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #376]
-	add	r11, sp, #352
-	add	lr, sp, #328
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #372]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r11, {r5, r7, r8, r9, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	mov	r5, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #272
-	bl	.LmulPv384x32(PLT)
-	ldr	r0, [sp, #320]
-	add	lr, sp, #300
-	add	r6, sp, #272
-	add	r12, sp, #284
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	lr, {r4, r8, r9, r11, lr}
-	ldr	r7, [sp, #296]
-	ldm	r6, {r2, r3, r6}
-	ldm	r12, {r0, r1, r12}
-	adds	r2, r10, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	adcs	r10, r2, r3
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	adcs	r6, r2, r6
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #156]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r2, r10, r4
-	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	mov	r11, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #216
-	bl	.LmulPv384x32(PLT)
-	add	r7, sp, #216
-	add	lr, sp, #252
-	ldm	r7, {r0, r1, r3, r7}
-	ldr	r8, [sp, #264]
-	adds	r0, r10, r0
-	adcs	r10, r6, r1
-	mul	r0, r10, r4
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #156]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	add	r7, sp, #232
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldm	lr, {r6, r12, lr}
-	ldm	r7, {r0, r1, r2, r3, r7}
-	ldr	r4, [sp, #96]           @ 4-byte Reload
-	adcs	r9, r4, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r11
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r4, r0, r3
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r5, r0, r7
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r6, r0, r6
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	add	r0, sp, #160
-	bl	.LmulPv384x32(PLT)
-	add	r3, sp, #160
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #156]          @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	adcs	r12, r0, r2
-	ldr	r2, [sp, #176]
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r3, r9, r3
-	str	r12, [sp, #52]          @ 4-byte Spill
-	str	r3, [sp, #56]           @ 4-byte Spill
-	adcs	r7, r0, r2
-	ldr	r2, [sp, #180]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	str	r7, [sp, #60]           @ 4-byte Spill
-	adcs	r8, r0, r2
-	ldr	r2, [sp, #184]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r8, [sp, #64]           @ 4-byte Spill
-	adcs	r4, r4, r2
-	ldr	r2, [sp, #188]
-	str	r4, [sp, #68]           @ 4-byte Spill
-	adcs	r5, r5, r2
-	ldr	r2, [sp, #192]
-	str	r5, [sp, #72]           @ 4-byte Spill
-	adcs	r6, r6, r2
-	ldr	r2, [sp, #196]
-	str	r6, [sp, #76]           @ 4-byte Spill
-	adcs	r9, r0, r2
-	ldr	r2, [sp, #200]
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r9, [sp, #84]           @ 4-byte Spill
-	adcs	r10, r0, r2
-	ldr	r2, [sp, #204]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r10, [sp, #96]          @ 4-byte Spill
-	adcs	lr, r0, r2
-	ldr	r2, [sp, #208]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	lr, [sp, #156]          @ 4-byte Spill
-	adcs	r11, r0, r2
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	subs	r0, r1, r0
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	sbcs	r1, r12, r1
-	sbcs	r2, r3, r2
-	ldr	r3, [sp, #120]          @ 4-byte Reload
-	sbcs	r3, r7, r3
-	ldr	r7, [sp, #124]          @ 4-byte Reload
-	sbcs	r12, r8, r7
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	sbcs	r7, r4, r7
-	ldr	r4, [sp, #132]          @ 4-byte Reload
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	sbcs	r8, r6, r5
-	ldr	r6, [sp, #104]          @ 4-byte Reload
-	sbcs	r5, r9, r6
-	ldr	r6, [sp, #108]          @ 4-byte Reload
-	str	r5, [sp, #144]          @ 4-byte Spill
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	sbcs	r9, r10, r6
-	ldr	r6, [sp, #112]          @ 4-byte Reload
-	sbcs	r6, lr, r6
-	mov	lr, r11
-	ldr	r11, [sp, #148]         @ 4-byte Reload
-	str	r6, [sp, #152]          @ 4-byte Spill
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	sbcs	r10, lr, r6
-	sbc	r6, r5, #0
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	ands	r6, r6, #1
-	movne	r0, r5
-	str	r0, [r11]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	movne	r1, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r1, [r11, #4]
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	movne	r2, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r2, [r11, #8]
-	ldr	r2, [sp, #144]          @ 4-byte Reload
-	movne	r3, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r3, [r11, #12]
-	movne	r12, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r12, [r11, #16]
-	movne	r7, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	cmp	r6, #0
-	str	r7, [r11, #20]
-	movne	r4, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r4, [r11, #24]
-	movne	r8, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r8, [r11, #28]
-	movne	r2, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	cmp	r6, #0
-	movne	r10, lr
-	str	r2, [r11, #32]
-	movne	r9, r0
-	ldr	r0, [sp, #152]          @ 4-byte Reload
-	movne	r0, r1
-	str	r9, [r11, #36]
-	str	r0, [r11, #40]
-	str	r10, [r11, #44]
-	add	sp, sp, #836
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end180:
-	.size	mcl_fp_montRed12L, .Lfunc_end180-mcl_fp_montRed12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre12L
-	.align	2
-	.type	mcl_fp_addPre12L,%function
-mcl_fp_addPre12L:                       @ @mcl_fp_addPre12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#28
-	sub	sp, sp, #28
-	ldm	r1, {r3, r12, lr}
-	ldr	r9, [r1, #12]
-	ldmib	r2, {r5, r6, r7}
-	ldr	r4, [r2, #16]
-	ldr	r11, [r2]
-	str	r4, [sp]                @ 4-byte Spill
-	ldr	r4, [r2, #20]
-	adds	r8, r11, r3
-	ldr	r3, [r2, #36]
-	ldr	r11, [r2, #32]
-	adcs	r5, r5, r12
-	add	r12, r1, #16
-	adcs	r6, r6, lr
-	add	lr, r1, #32
-	adcs	r7, r7, r9
-	str	r4, [sp, #4]            @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [r2, #40]
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [r2, #28]
-	ldr	r2, [r2, #44]
-	str	r3, [sp, #20]           @ 4-byte Spill
-	str	r4, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r4, r10, lr}
-	ldr	r9, [r1, #44]
-	ldm	r12, {r1, r2, r3, r12}
-	str	r8, [r0]
-	stmib	r0, {r5, r6}
-	str	r7, [r0, #12]
-	ldr	r5, [sp]                @ 4-byte Reload
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r5, r1
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r2, r5, r2
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	adcs	r1, r1, r3
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	adcs	r2, r2, r12
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r1, r11, r4
-	add	r0, r0, #32
-	adcs	r2, r2, r10
-	adcs	r3, r3, lr
-	adcs	r7, r7, r9
-	stm	r0, {r1, r2, r3, r7}
-	mov	r0, #0
-	adc	r0, r0, #0
-	add	sp, sp, #28
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end181:
-	.size	mcl_fp_addPre12L, .Lfunc_end181-mcl_fp_addPre12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre12L
-	.align	2
-	.type	mcl_fp_subPre12L,%function
-mcl_fp_subPre12L:                       @ @mcl_fp_subPre12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#28
-	sub	sp, sp, #28
-	ldmib	r2, {r8, r12, lr}
-	ldr	r3, [r2, #16]
-	ldr	r7, [r2]
-	ldr	r6, [r1]
-	ldr	r5, [r1, #4]
-	ldr	r4, [r1, #8]
-	ldr	r11, [r2, #44]
-	ldr	r9, [r1, #32]
-	ldr	r10, [r1, #36]
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [r2, #20]
-	subs	r6, r6, r7
-	ldr	r7, [r2, #32]
-	sbcs	r5, r5, r8
-	ldr	r8, [r1, #40]
-	sbcs	r4, r4, r12
-	add	r12, r1, #16
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [r2, #24]
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r3, [sp, #20]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r3, [sp, #24]           @ 4-byte Spill
-	ldr	r3, [r1, #12]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r1, #44]
-	sbcs	lr, r3, lr
-	ldm	r12, {r1, r2, r3, r12}
-	str	r6, [r0]
-	str	r5, [r0, #4]
-	str	r4, [r0, #8]
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	str	lr, [r0, #12]
-	sbcs	r1, r1, r4
-	str	r1, [r0, #16]
-	sbcs	r2, r2, r6
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	sbcs	r1, r3, r1
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	str	r1, [r0, #24]
-	sbcs	r2, r12, r2
-	ldr	r1, [sp]                @ 4-byte Reload
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	add	r0, r0, #32
-	sbcs	r1, r9, r1
-	sbcs	r2, r10, r2
-	sbcs	r3, r8, r3
-	sbcs	r7, r7, r11
-	stm	r0, {r1, r2, r3, r7}
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	add	sp, sp, #28
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end182:
-	.size	mcl_fp_subPre12L, .Lfunc_end182-mcl_fp_subPre12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_12L
-	.align	2
-	.type	mcl_fp_shr1_12L,%function
-mcl_fp_shr1_12L:                        @ @mcl_fp_shr1_12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#4
-	sub	sp, sp, #4
-	add	r6, r1, #20
-	ldr	r3, [r1, #8]
-	ldr	r2, [r1, #12]
-	ldr	lr, [r1, #16]
-	add	r11, r1, #32
-	ldm	r6, {r4, r5, r6}
-	ldm	r1, {r8, r12}
-	lsr	r7, r12, #1
-	orr	r9, r7, r3, lsl #31
-	ldm	r11, {r7, r10, r11}
-	ldr	r1, [r1, #44]
-	str	r1, [sp]                @ 4-byte Spill
-	lsr	r1, r2, #1
-	lsrs	r2, r2, #1
-	rrx	r2, r3
-	lsrs	r3, r12, #1
-	orr	r1, r1, lr, lsl #31
-	rrx	r3, r8
-	stm	r0, {r3, r9}
-	str	r2, [r0, #8]
-	str	r1, [r0, #12]
-	lsrs	r1, r4, #1
-	lsr	r2, r10, #1
-	rrx	r1, lr
-	orr	r2, r2, r11, lsl #31
-	str	r1, [r0, #16]
-	lsr	r1, r4, #1
-	orr	r1, r1, r5, lsl #31
-	str	r1, [r0, #20]
-	lsrs	r1, r6, #1
-	rrx	r1, r5
-	str	r1, [r0, #24]
-	lsr	r1, r6, #1
-	orr	r1, r1, r7, lsl #31
-	str	r1, [r0, #28]
-	lsrs	r1, r10, #1
-	add	r0, r0, #32
-	rrx	r1, r7
-	ldr	r7, [sp]                @ 4-byte Reload
-	lsrs	r3, r7, #1
-	lsr	r7, r7, #1
-	rrx	r3, r11
-	stm	r0, {r1, r2, r3, r7}
-	add	sp, sp, #4
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end183:
-	.size	mcl_fp_shr1_12L, .Lfunc_end183-mcl_fp_shr1_12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add12L
-	.align	2
-	.type	mcl_fp_add12L,%function
-mcl_fp_add12L:                          @ @mcl_fp_add12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#44
-	sub	sp, sp, #44
-	ldm	r1, {r12, lr}
-	ldr	r5, [r2]
-	ldr	r8, [r1, #8]
-	ldr	r9, [r1, #12]
-	ldmib	r2, {r4, r6, r7}
-	ldr	r11, [r1, #40]
-	adds	r5, r5, r12
-	ldr	r12, [r2, #40]
-	adcs	r4, r4, lr
-	str	r5, [sp, #40]           @ 4-byte Spill
-	ldr	r5, [r1, #24]
-	ldr	lr, [r1, #32]
-	adcs	r6, r6, r8
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [r1, #20]
-	ldr	r8, [r1, #36]
-	adcs	r7, r7, r9
-	str	r6, [sp, #32]           @ 4-byte Spill
-	ldr	r6, [r1, #16]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #16]
-	adcs	r10, r7, r6
-	ldr	r6, [r2, #20]
-	adcs	r7, r6, r4
-	ldr	r4, [r2, #24]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	adcs	r7, r4, r5
-	ldr	r4, [r1, #28]
-	ldr	r5, [r2, #28]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	adcs	r6, r5, r4
-	ldr	r5, [r2, #32]
-	ldr	r4, [r1, #44]
-	ldr	r1, [r2, #36]
-	ldr	r2, [r2, #44]
-	str	r6, [sp, #8]            @ 4-byte Spill
-	adcs	r9, r5, lr
-	ldr	lr, [sp, #32]           @ 4-byte Reload
-	adcs	r5, r1, r8
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	ldr	r8, [sp, #12]           @ 4-byte Reload
-	adcs	r11, r12, r11
-	ldr	r12, [sp, #36]          @ 4-byte Reload
-	str	r5, [sp, #28]           @ 4-byte Spill
-	adcs	r2, r2, r4
-	ldr	r4, [sp, #16]           @ 4-byte Reload
-	str	r2, [sp, #24]           @ 4-byte Spill
-	str	r1, [r0]
-	str	r12, [r0, #4]
-	str	lr, [r0, #8]
-	str	r4, [r0, #12]
-	str	r10, [r0, #16]
-	str	r8, [r0, #20]
-	str	r7, [r0, #24]
-	str	r6, [r0, #28]
-	str	r9, [r0, #32]
-	str	r5, [r0, #36]
-	str	r11, [r0, #40]
-	str	r2, [r0, #44]
-	mov	r2, #0
-	adc	r2, r2, #0
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldm	r3, {r2, r6, r7}
-	ldr	r5, [r3, #12]
-	subs	r1, r1, r2
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	sbcs	r1, r12, r6
-	str	r1, [sp]                @ 4-byte Spill
-	sbcs	r1, lr, r7
-	str	r1, [sp, #36]           @ 4-byte Spill
-	sbcs	r1, r4, r5
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	sbcs	r1, r10, r1
-	add	r10, r3, #36
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	sbcs	r6, r8, r1
-	ldr	r1, [r3, #24]
-	sbcs	lr, r2, r1
-	ldr	r2, [r3, #28]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	sbcs	r12, r1, r2
-	ldr	r2, [r3, #32]
-	ldm	r10, {r1, r4, r10}
-	sbcs	r7, r9, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	sbcs	r2, r2, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	sbcs	r3, r11, r4
-	sbcs	r5, r1, r10
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	sbc	r1, r1, #0
-	tst	r1, #1
-	bne	.LBB184_2
-@ BB#1:                                 @ %nocarry
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r1, [r0]
-	ldr	r1, [sp]                @ 4-byte Reload
-	str	r1, [r0, #4]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r1, [r0, #8]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r1, [r0, #12]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	str	r6, [r0, #20]
-	str	lr, [r0, #24]
-	str	r12, [r0, #28]
-	str	r7, [r0, #32]
-	add	r0, r0, #36
-	stm	r0, {r2, r3, r5}
-.LBB184_2:                              @ %carry
-	add	sp, sp, #44
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end184:
-	.size	mcl_fp_add12L, .Lfunc_end184-mcl_fp_add12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF12L
-	.align	2
-	.type	mcl_fp_addNF12L,%function
-mcl_fp_addNF12L:                        @ @mcl_fp_addNF12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#60
-	sub	sp, sp, #60
-	ldm	r1, {r5, r8, lr}
-	ldr	r6, [r2]
-	ldr	r10, [r1, #12]
-	ldmib	r2, {r4, r7, r9}
-	ldr	r12, [r1, #20]
-	adds	r6, r6, r5
-	ldr	r5, [r1, #24]
-	adcs	r8, r4, r8
-	ldr	r4, [r2, #16]
-	str	r6, [sp, #16]           @ 4-byte Spill
-	adcs	r7, r7, lr
-	add	lr, r2, #32
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [r1, #16]
-	adcs	r6, r9, r10
-	str	r6, [sp, #32]           @ 4-byte Spill
-	ldr	r6, [r1, #44]
-	adcs	r7, r4, r7
-	ldr	r4, [r1, #40]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	adcs	r7, r7, r12
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	adcs	r7, r7, r5
-	ldr	r5, [r2, #28]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r1, #28]
-	adcs	r7, r5, r7
-	ldr	r5, [r1, #36]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r1, #32]
-	ldm	lr, {r1, r12, lr}
-	ldr	r2, [r2, #44]
-	adcs	r1, r1, r7
-	str	r1, [sp, #20]           @ 4-byte Spill
-	adcs	r1, r12, r5
-	str	r1, [sp, #28]           @ 4-byte Spill
-	adcs	r1, lr, r4
-	str	r1, [sp, #36]           @ 4-byte Spill
-	adc	r1, r2, r6
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldmib	r3, {r1, r2, r6, r11}
-	ldr	r7, [r3, #20]
-	ldr	r4, [r3, #32]
-	ldr	r9, [r3]
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	ldr	lr, [r3, #24]
-	ldr	r10, [r3, #28]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [r3, #36]
-	subs	r9, r5, r9
-	sbcs	r1, r8, r1
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	str	r4, [sp]                @ 4-byte Spill
-	ldr	r4, [r3, #40]
-	sbcs	r12, r7, r6
-	ldr	r7, [r3, #44]
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	str	r4, [sp, #4]            @ 4-byte Spill
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	sbcs	r3, r3, r11
-	sbcs	r11, r4, r6
-	ldr	r4, [sp, #56]           @ 4-byte Reload
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	sbcs	lr, r4, lr
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	sbcs	r10, r4, r10
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	sbcs	r4, r4, r6
-	ldr	r6, [sp]                @ 4-byte Reload
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	sbcs	r4, r4, r6
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	str	r4, [sp]                @ 4-byte Spill
-	ldr	r4, [sp, #4]            @ 4-byte Reload
-	sbcs	r6, r6, r4
-	str	r6, [sp, #12]           @ 4-byte Spill
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	sbc	r6, r6, r7
-	asr	r7, r6, #31
-	cmp	r7, #0
-	movlt	r9, r5
-	movlt	r1, r8
-	str	r9, [r0]
-	str	r1, [r0, #4]
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	movlt	r2, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	cmp	r7, #0
-	str	r2, [r0, #8]
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	movlt	r12, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r12, [r0, #12]
-	movlt	r3, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r3, [r0, #16]
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	movlt	r11, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	cmp	r7, #0
-	str	r11, [r0, #20]
-	movlt	lr, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	lr, [r0, #24]
-	movlt	r10, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r10, [r0, #28]
-	movlt	r2, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	cmp	r7, #0
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #32]
-	movlt	r7, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r7, [r0, #36]
-	movlt	r3, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r3, [r0, #40]
-	movlt	r6, r1
-	str	r6, [r0, #44]
-	add	sp, sp, #60
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end185:
-	.size	mcl_fp_addNF12L, .Lfunc_end185-mcl_fp_addNF12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub12L
-	.align	2
-	.type	mcl_fp_sub12L,%function
-mcl_fp_sub12L:                          @ @mcl_fp_sub12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#64
-	sub	sp, sp, #64
-	ldr	r9, [r2]
-	ldmib	r2, {r8, r12, lr}
-	ldm	r1, {r4, r5, r6, r7}
-	add	r10, r1, #32
-	subs	r4, r4, r9
-	sbcs	r5, r5, r8
-	str	r4, [sp, #48]           @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	sbcs	r6, r6, r12
-	str	r5, [sp, #52]           @ 4-byte Spill
-	ldr	r5, [r2, #20]
-	sbcs	r7, r7, lr
-	str	r6, [sp, #56]           @ 4-byte Spill
-	ldr	r6, [r2, #16]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r1, #16]
-	sbcs	r11, r7, r6
-	ldr	r6, [r1, #20]
-	str	r11, [sp, #28]          @ 4-byte Spill
-	sbcs	lr, r6, r5
-	ldr	r5, [r1, #24]
-	str	lr, [sp, #40]           @ 4-byte Spill
-	sbcs	r7, r5, r4
-	ldr	r4, [r2, #28]
-	ldr	r5, [r1, #28]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	add	r7, r2, #32
-	sbcs	r12, r5, r4
-	str	r12, [sp, #36]          @ 4-byte Spill
-	ldm	r7, {r4, r5, r6, r7}
-	ldm	r10, {r2, r8, r9, r10}
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbcs	r4, r2, r4
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [r0]
-	sbcs	r8, r8, r5
-	str	r4, [sp, #32]           @ 4-byte Spill
-	sbcs	r6, r9, r6
-	sbcs	r7, r10, r7
-	ldr	r10, [sp, #52]          @ 4-byte Reload
-	str	r10, [r0, #4]
-	str	r2, [r0, #8]
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r2, [r0, #12]
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r11, [r0, #16]
-	str	lr, [r0, #20]
-	str	r2, [r0, #24]
-	str	r12, [r0, #28]
-	str	r4, [r0, #32]
-	mov	r4, #0
-	str	r8, [r0, #36]
-	str	r6, [r0, #40]
-	str	r7, [r0, #44]
-	sbc	r4, r4, #0
-	tst	r4, #1
-	beq	.LBB186_2
-@ BB#1:                                 @ %carry
-	ldr	r5, [r3, #32]
-	ldr	r4, [r3, #20]
-	ldr	r12, [r3, #28]
-	ldr	r9, [r3, #4]
-	ldr	lr, [r3, #12]
-	ldr	r11, [r3, #16]
-	str	r5, [sp, #12]           @ 4-byte Spill
-	ldr	r5, [r3, #36]
-	str	r4, [sp]                @ 4-byte Spill
-	ldr	r4, [r3, #24]
-	str	r12, [sp, #8]           @ 4-byte Spill
-	str	r5, [sp, #16]           @ 4-byte Spill
-	ldr	r5, [r3, #40]
-	str	r4, [sp, #4]            @ 4-byte Spill
-	str	r5, [sp, #20]           @ 4-byte Spill
-	ldr	r5, [r3, #44]
-	str	r5, [sp, #24]           @ 4-byte Spill
-	ldr	r5, [r3, #8]
-	ldr	r3, [r3]
-	adds	r3, r3, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r4, r9, r10
-	adcs	r5, r5, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	stm	r0, {r3, r4, r5}
-	ldr	r3, [sp]                @ 4-byte Reload
-	adcs	r1, lr, r1
-	str	r1, [r0, #12]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r11, r1
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r3, r1
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	str	r1, [r0, #20]
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #28]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	add	r0, r0, #32
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r2, r8
-	adcs	r3, r3, r6
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	adc	r7, r6, r7
-	stm	r0, {r1, r2, r3, r7}
-.LBB186_2:                              @ %nocarry
-	add	sp, sp, #64
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end186:
-	.size	mcl_fp_sub12L, .Lfunc_end186-mcl_fp_sub12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF12L
-	.align	2
-	.type	mcl_fp_subNF12L,%function
-mcl_fp_subNF12L:                        @ @mcl_fp_subNF12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#56
-	sub	sp, sp, #56
-	mov	r12, r0
-	ldr	r0, [r2, #32]
-	add	r11, r2, #8
-	ldr	r6, [r2]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r2, #36]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r2, #40]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r2, #44]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r2, #4]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r8, r10, r11}
-	ldr	r0, [r2, #20]
-	ldr	lr, [r1, #16]
-	ldr	r7, [r1, #20]
-	ldr	r5, [r1, #24]
-	ldr	r4, [r1, #28]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r2, #24]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r2, #28]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r1, #12]
-	ldm	r1, {r1, r2, r9}
-	subs	r1, r1, r6
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	sbcs	r2, r2, r6
-	sbcs	r6, r9, r8
-	mov	r9, r2
-	sbcs	r10, r0, r10
-	str	r6, [sp, #4]            @ 4-byte Spill
-	sbcs	r0, lr, r11
-	add	r11, r3, #8
-	ldr	lr, [r3, #4]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	sbcs	r0, r7, r0
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	sbcs	r0, r5, r0
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	sbcs	r0, r4, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	sbcs	r0, r7, r0
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	sbcs	r0, r7, r0
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	sbcs	r0, r7, r0
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	sbc	r0, r5, r7
-	ldr	r7, [r3, #36]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [r3, #40]
-	str	r0, [sp]                @ 4-byte Spill
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r3, #44]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldm	r11, {r7, r8, r11}
-	ldr	r4, [r3, #28]
-	ldr	r5, [r3, #20]
-	ldr	r0, [r3, #24]
-	ldr	r3, [r3]
-	str	r4, [sp, #8]            @ 4-byte Spill
-	mov	r4, r1
-	adds	r1, r4, r3
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r9, lr
-	adcs	lr, r6, r7
-	adcs	r6, r10, r8
-	adcs	r7, r3, r11
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	adcs	r8, r3, r5
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	adcs	r5, r3, r0
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r11, r3, r0
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	ldr	r0, [sp]                @ 4-byte Reload
-	adcs	r3, r3, r0
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adcs	r3, r3, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r3, r0, r3
-	str	r3, [sp, #20]           @ 4-byte Spill
-	asr	r3, r0, #31
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	cmp	r3, #0
-	movge	r1, r4
-	movge	r2, r9
-	str	r1, [r12]
-	str	r2, [r12, #4]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	movge	lr, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	cmp	r3, #0
-	movge	r6, r10
-	str	lr, [r12, #8]
-	str	r6, [r12, #12]
-	movge	r7, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	str	r7, [r12, #16]
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	movge	r8, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	cmp	r3, #0
-	str	r8, [r12, #20]
-	movge	r5, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	str	r5, [r12, #24]
-	movge	r11, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	str	r11, [r12, #28]
-	movge	r1, r0
-	cmp	r3, #0
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	movge	r0, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [r12, #32]
-	add	r1, r12, #36
-	movge	r2, r3
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	movge	r3, r7
-	stm	r1, {r0, r2, r3}
-	add	sp, sp, #56
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end187:
-	.size	mcl_fp_subNF12L, .Lfunc_end187-mcl_fp_subNF12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add12L
-	.align	2
-	.type	mcl_fpDbl_add12L,%function
-mcl_fpDbl_add12L:                       @ @mcl_fpDbl_add12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#136
-	sub	sp, sp, #136
-	ldm	r1, {r7, r8, r12, lr}
-	ldm	r2, {r4, r5, r6, r9}
-	ldr	r10, [r2, #20]
-	adds	r4, r4, r7
-	str	r4, [sp, #80]           @ 4-byte Spill
-	ldr	r4, [r2, #64]
-	str	r4, [sp, #108]          @ 4-byte Spill
-	ldr	r4, [r2, #68]
-	str	r4, [sp, #112]          @ 4-byte Spill
-	ldr	r4, [r2, #72]
-	str	r4, [sp, #116]          @ 4-byte Spill
-	ldr	r4, [r2, #76]
-	str	r4, [sp, #120]          @ 4-byte Spill
-	ldr	r4, [r2, #80]
-	str	r4, [sp, #124]          @ 4-byte Spill
-	ldr	r4, [r2, #84]
-	str	r4, [sp, #128]          @ 4-byte Spill
-	ldr	r4, [r2, #88]
-	str	r4, [sp, #132]          @ 4-byte Spill
-	ldr	r4, [r2, #92]
-	str	r4, [sp, #76]           @ 4-byte Spill
-	adcs	r4, r5, r8
-	adcs	r7, r6, r12
-	ldr	r6, [r2, #16]
-	str	r4, [sp, #28]           @ 4-byte Spill
-	str	r7, [sp, #24]           @ 4-byte Spill
-	adcs	r7, r9, lr
-	add	r9, r1, #32
-	add	lr, r1, #16
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #100]          @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #104]          @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldm	r9, {r4, r5, r8, r9}
-	ldr	r2, [r1, #48]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #80]          @ 4-byte Reload
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r6, r1
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	adcs	r2, r10, r2
-	ldr	r10, [r3]
-	str	r7, [r0, #8]
-	str	r6, [r0, #12]
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	ldr	r7, [sp]                @ 4-byte Reload
-	adcs	r1, r1, r12
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r2, lr
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	ldr	r5, [r3, #12]
-	str	r2, [r0, #36]
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r2, r2, r9
-	str	r2, [r0, #44]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	adcs	r12, r1, r7
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r12, [sp, #80]          @ 4-byte Spill
-	adcs	r8, r1, r2
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r8, [sp, #88]           @ 4-byte Spill
-	adcs	lr, r1, r2
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	lr, [sp, #92]           @ 4-byte Spill
-	adcs	r4, r1, r2
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r4, [sp, #104]          @ 4-byte Spill
-	adcs	r9, r1, r2
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r9, [sp, #96]           @ 4-byte Spill
-	adcs	r11, r1, r2
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r11, [sp, #108]         @ 4-byte Spill
-	adcs	r6, r1, r2
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r6, [sp, #112]          @ 4-byte Spill
-	adcs	r7, r1, r2
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r7, [sp, #116]          @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [r3, #8]
-	str	r1, [sp, #132]          @ 4-byte Spill
-	mov	r1, #0
-	adc	r1, r1, #0
-	subs	r10, r12, r10
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [r3, #4]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	sbcs	r1, r8, r1
-	ldr	r8, [r3, #40]
-	sbcs	r2, lr, r2
-	ldr	lr, [r3, #32]
-	sbcs	r12, r4, r5
-	ldr	r4, [r3, #36]
-	ldr	r3, [r3, #44]
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	str	r3, [sp, #64]           @ 4-byte Spill
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	sbcs	r3, r9, r3
-	sbcs	r9, r11, r5
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	sbcs	r5, r6, r5
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	sbcs	r6, r7, r6
-	ldr	r7, [sp, #124]          @ 4-byte Reload
-	sbcs	r11, r7, lr
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	sbcs	lr, r7, r4
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	sbcs	r8, r7, r8
-	ldr	r7, [sp, #132]          @ 4-byte Reload
-	sbcs	r4, r7, r4
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	str	r4, [sp, #84]           @ 4-byte Spill
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	sbc	r7, r7, #0
-	ands	r7, r7, #1
-	movne	r10, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	str	r10, [r0, #48]
-	movne	r1, r4
-	str	r1, [r0, #52]
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	movne	r2, r1
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r2, [r0, #56]
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	movne	r12, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r12, [r0, #60]
-	movne	r3, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r3, [r0, #64]
-	movne	r9, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r9, [r0, #68]
-	movne	r5, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r5, [r0, #72]
-	movne	r6, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r6, [r0, #76]
-	movne	r11, r1
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r11, [r0, #80]
-	movne	lr, r1
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	lr, [r0, #84]
-	movne	r8, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r8, [r0, #88]
-	movne	r2, r1
-	str	r2, [r0, #92]
-	add	sp, sp, #136
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end188:
-	.size	mcl_fpDbl_add12L, .Lfunc_end188-mcl_fpDbl_add12L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub12L
-	.align	2
-	.type	mcl_fpDbl_sub12L,%function
-mcl_fpDbl_sub12L:                       @ @mcl_fpDbl_sub12L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#136
-	sub	sp, sp, #136
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #128]          @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #104]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #132]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #108]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #112]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #116]          @ 4-byte Spill
-	ldr	r7, [r2, #88]
-	str	r7, [sp, #124]          @ 4-byte Spill
-	ldr	r7, [r2, #92]
-	str	r7, [sp, #120]          @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #100]          @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2]
-	ldmib	r2, {r6, r9}
-	ldr	r5, [r1]
-	ldr	r8, [r2, #12]
-	ldmib	r1, {r4, lr}
-	ldr	r12, [r1, #12]
-	ldr	r10, [r2, #20]
-	subs	r5, r5, r7
-	sbcs	r4, r4, r6
-	str	r5, [sp, #32]           @ 4-byte Spill
-	ldr	r5, [r2, #36]
-	ldr	r6, [r2, #16]
-	sbcs	r7, lr, r9
-	str	r4, [sp, #24]           @ 4-byte Spill
-	ldr	r4, [r2, #32]
-	add	r9, r1, #32
-	add	lr, r1, #16
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r5, [sp, #44]           @ 4-byte Spill
-	str	r4, [sp, #40]           @ 4-byte Spill
-	str	r7, [sp, #36]           @ 4-byte Spill
-	sbcs	r7, r12, r8
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #76]           @ 4-byte Spill
-	ldm	r9, {r4, r5, r8, r9}
-	ldr	r2, [r1, #48]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #32]          @ 4-byte Reload
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	sbcs	r1, r1, r6
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	sbcs	r2, r2, r10
-	str	r7, [r0, #8]
-	str	r6, [r0, #12]
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	ldr	r7, [sp]                @ 4-byte Reload
-	sbcs	r1, r12, r1
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, lr, r2
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	sbcs	r1, r4, r1
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	sbcs	r2, r5, r2
-	str	r2, [r0, #36]
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	sbcs	r1, r8, r1
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	sbcs	r2, r9, r2
-	str	r2, [r0, #44]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	sbcs	r9, r7, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	str	r9, [sp, #40]           @ 4-byte Spill
-	sbcs	lr, r2, r1
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	mov	r1, #0
-	str	lr, [sp, #44]           @ 4-byte Spill
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	str	r2, [sp, #92]           @ 4-byte Spill
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	sbcs	r4, r7, r2
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	str	r4, [sp, #88]           @ 4-byte Spill
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	str	r2, [sp, #128]          @ 4-byte Spill
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	sbcs	r5, r7, r2
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	str	r5, [sp, #96]           @ 4-byte Spill
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	str	r2, [sp, #132]          @ 4-byte Spill
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	sbcs	r8, r7, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	str	r8, [sp, #104]          @ 4-byte Spill
-	sbcs	r10, r7, r2
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	str	r10, [sp, #108]         @ 4-byte Spill
-	sbcs	r6, r7, r2
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	str	r6, [sp, #112]          @ 4-byte Spill
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r2, [sp, #124]          @ 4-byte Spill
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	sbc	r1, r1, #0
-	str	r2, [sp, #120]          @ 4-byte Spill
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [r3, #32]
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldmib	r3, {r1, r2, r12}
-	ldr	r7, [r3, #16]
-	ldr	r11, [r3, #20]
-	str	r7, [sp, #64]           @ 4-byte Spill
-	ldr	r7, [r3, #24]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r3, #28]
-	ldr	r3, [r3]
-	adds	r3, r9, r3
-	ldr	r9, [sp, #92]           @ 4-byte Reload
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adcs	r1, lr, r1
-	ldr	lr, [sp, #128]          @ 4-byte Reload
-	adcs	r2, r9, r2
-	adcs	r12, r4, r12
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	lr, lr, r4
-	adcs	r4, r5, r11
-	ldr	r5, [sp, #132]          @ 4-byte Reload
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	adcs	r5, r5, r7
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	adcs	r8, r8, r7
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adcs	r10, r10, r7
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	adcs	r6, r6, r7
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	str	r6, [sp, #80]           @ 4-byte Spill
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	adcs	r6, r6, r7
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	str	r6, [sp, #84]           @ 4-byte Spill
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	adc	r6, r6, r11
-	str	r6, [sp, #116]          @ 4-byte Spill
-	ldr	r6, [sp, #100]          @ 4-byte Reload
-	ands	r6, r6, #1
-	moveq	r3, r7
-	moveq	r2, r9
-	str	r3, [r0, #48]
-	ldr	r3, [sp, #44]           @ 4-byte Reload
-	moveq	r1, r3
-	cmp	r6, #0
-	str	r1, [r0, #52]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r2, [r0, #56]
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	moveq	r12, r1
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r12, [r0, #60]
-	moveq	lr, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	lr, [r0, #64]
-	moveq	r4, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	cmp	r6, #0
-	str	r4, [r0, #68]
-	moveq	r5, r1
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r5, [r0, #72]
-	moveq	r8, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r8, [r0, #76]
-	moveq	r10, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	cmp	r6, #0
-	str	r10, [r0, #80]
-	moveq	r2, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r2, [r0, #84]
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r2, [r0, #88]
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	moveq	r2, r1
-	str	r2, [r0, #92]
-	add	sp, sp, #136
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end189:
-	.size	mcl_fpDbl_sub12L, .Lfunc_end189-mcl_fpDbl_sub12L
-	.cantunwind
-	.fnend
-
-	.align	2
-	.type	.LmulPv416x32,%function
-.LmulPv416x32:                          @ @mulPv416x32
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r9, [r1, #12]
-	umull	r4, r8, lr, r2
-	umull	lr, r6, r12, r2
-	mov	r5, r4
-	mov	r7, r6
-	str	lr, [r0]
-	umull	lr, r12, r9, r2
-	umlal	r7, r5, r3, r2
-	str	r5, [r0, #8]
-	str	r7, [r0, #4]
-	umull	r5, r7, r3, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r3, r8, lr
-	str	r3, [r0, #12]
-	ldr	r3, [r1, #16]
-	umull	r7, r6, r3, r2
-	adcs	r3, r12, r7
-	str	r3, [r0, #16]
-	ldr	r3, [r1, #20]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #20]
-	ldr	r3, [r1, #24]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #24]
-	ldr	r3, [r1, #28]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #28]
-	ldr	r3, [r1, #32]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #32]
-	ldr	r3, [r1, #36]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #36]
-	ldr	r3, [r1, #40]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #40]
-	ldr	r3, [r1, #44]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #44]
-	ldr	r1, [r1, #48]
-	umull	r3, r7, r1, r2
-	adcs	r1, r5, r3
-	str	r1, [r0, #48]
-	adc	r1, r7, #0
-	str	r1, [r0, #52]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end190:
-	.size	.LmulPv416x32, .Lfunc_end190-.LmulPv416x32
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre13L
-	.align	2
-	.type	mcl_fp_mulUnitPre13L,%function
-mcl_fp_mulUnitPre13L:                   @ @mcl_fp_mulUnitPre13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#68
-	sub	sp, sp, #68
-	mov	r4, r0
-	add	r0, sp, #8
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #60]
-	add	r12, sp, #12
-	ldr	lr, [sp, #56]
-	ldr	r8, [sp, #52]
-	ldr	r9, [sp, #48]
-	ldr	r10, [sp, #44]
-	ldr	r11, [sp, #40]
-	ldr	r5, [sp, #36]
-	ldr	r6, [sp, #32]
-	ldr	r7, [sp, #28]
-	ldr	r3, [sp, #8]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r12, {r0, r1, r2, r12}
-	str	r3, [r4]
-	stmib	r4, {r0, r1, r2, r12}
-	str	r7, [r4, #20]
-	str	r6, [r4, #24]
-	str	r5, [r4, #28]
-	str	r11, [r4, #32]
-	str	r10, [r4, #36]
-	str	r9, [r4, #40]
-	str	r8, [r4, #44]
-	str	lr, [r4, #48]
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	str	r0, [r4, #52]
-	add	sp, sp, #68
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end191:
-	.size	mcl_fp_mulUnitPre13L, .Lfunc_end191-mcl_fp_mulUnitPre13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre13L
-	.align	2
-	.type	mcl_fpDbl_mulPre13L,%function
-mcl_fpDbl_mulPre13L:                    @ @mcl_fpDbl_mulPre13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#820
-	sub	sp, sp, #820
-	mov	r7, r2
-	mov	r4, r0
-	add	r0, sp, #760
-	str	r1, [sp, #84]           @ 4-byte Spill
-	mov	r5, r1
-	ldr	r2, [r7]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	str	r4, [sp, #76]           @ 4-byte Spill
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #812]
-	ldr	r1, [sp, #764]
-	ldr	r2, [r7, #4]
-	mov	r6, r5
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #808]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #768]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #804]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #772]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #800]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	mov	r1, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #784]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #780]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #776]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #760]
-	str	r0, [r4]
-	add	r0, sp, #704
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #756]
-	add	r10, sp, #728
-	add	lr, sp, #704
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #752]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #748]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #744]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #24]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #4]
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #8]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r6
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #648
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #700]
-	add	lr, sp, #676
-	add	r9, sp, #656
-	ldr	r11, [sp, #692]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	lr, {r5, r7, r12, lr}
-	ldr	r8, [sp, #648]
-	ldr	r10, [sp, #652]
-	ldm	r9, {r0, r1, r2, r3, r9}
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	adds	r6, r8, r6
-	str	r6, [r4, #8]
-	mov	r6, r4
-	ldr	r4, [sp, #40]           @ 4-byte Reload
-	adcs	r4, r10, r4
-	str	r4, [sp, #24]           @ 4-byte Spill
-	ldr	r4, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r2, [r5, #12]
-	adcs	r0, r7, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #592
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #644]
-	add	lr, sp, #612
-	add	r7, sp, #600
-	ldr	r8, [sp, #628]
-	ldr	r11, [sp, #624]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #640]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #636]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #632]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r0, [sp, #592]
-	ldr	r9, [sp, #596]
-	ldm	r7, {r1, r2, r7}
-	ldr	r10, [sp, #24]          @ 4-byte Reload
-	adds	r0, r0, r10
-	str	r0, [r6, #12]
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r6, r9, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #16]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #536
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #588]
-	ldr	r8, [sp, #536]
-	add	r4, sp, #540
-	ldr	r11, [sp, #580]
-	ldr	r9, [sp, #576]
-	ldr	lr, [sp, #572]
-	ldr	r5, [sp, #568]
-	ldr	r10, [sp, #564]
-	ldr	r12, [sp, #560]
-	ldr	r3, [sp, #556]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #584]
-	adds	r6, r8, r6
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r4, {r0, r1, r2, r4}
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r6, [r7, #16]
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r2, [r4, #20]
-	adcs	r0, r3, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #480
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #532]
-	add	r10, sp, #480
-	add	r12, sp, #492
-	ldr	r6, [sp, #516]
-	ldr	r11, [sp, #512]
-	ldr	lr, [sp, #508]
-	ldr	r9, [sp, #504]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #528]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #524]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #520]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r0, r1, r10}
-	ldm	r12, {r2, r3, r12}
-	ldr	r8, [sp, #24]           @ 4-byte Reload
-	adds	r0, r0, r8
-	str	r0, [r7, #20]
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	mov	r7, r5
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r4, #24]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #424
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #476]
-	add	r5, sp, #428
-	ldr	r11, [sp, #464]
-	ldr	r9, [sp, #460]
-	ldr	lr, [sp, #456]
-	ldr	r10, [sp, #452]
-	ldr	r12, [sp, #448]
-	ldr	r3, [sp, #444]
-	ldr	r8, [sp, #424]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #472]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #468]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r5, {r0, r1, r2, r5}
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adds	r6, r8, r4
-	ldr	r4, [sp, #76]           @ 4-byte Reload
-	str	r6, [r4, #24]
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [r5, #28]
-	adcs	r0, r3, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #368
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #420]
-	add	r12, sp, #388
-	add	r10, sp, #368
-	ldr	lr, [sp, #408]
-	ldr	r6, [sp, #404]
-	ldr	r11, [sp, #400]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r12, {r3, r9, r12}
-	ldr	r7, [sp, #384]
-	ldm	r10, {r0, r1, r10}
-	ldr	r8, [sp, #24]           @ 4-byte Reload
-	ldr	r2, [sp, #380]
-	adds	r0, r0, r8
-	str	r0, [r4, #28]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r4, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #32]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #312
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #364]
-	add	r11, sp, #344
-	add	lr, sp, #316
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #360]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #356]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r7, r9, r11}
-	ldr	r10, [sp, #340]
-	ldr	r8, [sp, #312]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	adds	r6, r8, r5
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	str	r6, [r5, #32]
-	ldr	r6, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r6, #36]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #256
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #308]
-	add	lr, sp, #288
-	add	r12, sp, #268
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #304]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #300]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	lr, {r7, r8, lr}
-	ldr	r11, [sp, #284]
-	ldr	r1, [sp, #256]
-	ldr	r0, [sp, #260]
-	ldr	r10, [sp, #264]
-	ldm	r12, {r2, r3, r9, r12}
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adds	r1, r1, r4
-	str	r1, [r5, #36]
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	adcs	r4, r0, r1
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r6, #40]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #200
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #252]
-	add	r11, sp, #228
-	add	lr, sp, #204
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #248]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #244]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r7, r8, r10, r11}
-	ldr	r9, [sp, #200]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r6, r9, r4
-	ldr	r4, [sp, #76]           @ 4-byte Reload
-	str	r6, [r4, #40]
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r6, #44]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #144
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #196]
-	add	r11, sp, #164
-	add	r12, sp, #152
-	ldr	lr, [sp, #184]
-	ldr	r7, [sp, #180]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #192]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r8, r10, r11}
-	ldr	r2, [sp, #144]
-	ldr	r1, [sp, #148]
-	ldm	r12, {r0, r3, r12}
-	ldr	r9, [sp, #24]           @ 4-byte Reload
-	adds	r2, r2, r9
-	str	r2, [r4, #44]
-	ldr	r2, [r6, #48]
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	adcs	r6, r1, r6
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r9, r0, r1
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	add	r0, sp, #88
-	bl	.LmulPv416x32(PLT)
-	add	r3, sp, #88
-	add	r11, sp, #104
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r12, r0, r6
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	lr, r1, r9
-	adcs	r5, r2, r0
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r6, r3, r0
-	ldr	r0, [sp, #140]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldm	r11, {r0, r1, r2, r3, r7, r8, r9, r10, r11}
-	str	r12, [r4, #48]
-	str	lr, [r4, #52]
-	str	r5, [r4, #56]
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	str	r6, [r4, #60]
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	add	r12, r4, #80
-	adcs	r0, r0, r5
-	adcs	r1, r1, r6
-	str	r0, [r4, #64]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	str	r1, [r4, #68]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r3, r1
-	str	r0, [r4, #72]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	str	r1, [r4, #76]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r8, r1
-	adcs	r2, r9, r2
-	adcs	r3, r10, r3
-	adcs	r7, r11, r7
-	adc	r6, r6, #0
-	stm	r12, {r0, r1, r2, r3, r7}
-	str	r6, [r4, #100]
-	add	sp, sp, #820
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end192:
-	.size	mcl_fpDbl_mulPre13L, .Lfunc_end192-mcl_fpDbl_mulPre13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre13L
-	.align	2
-	.type	mcl_fpDbl_sqrPre13L,%function
-mcl_fpDbl_sqrPre13L:                    @ @mcl_fpDbl_sqrPre13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#820
-	sub	sp, sp, #820
-	mov	r5, r1
-	mov	r4, r0
-	add	r0, sp, #760
-	ldr	r2, [r5]
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #812]
-	ldr	r1, [sp, #764]
-	ldr	r2, [r5, #4]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #808]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #768]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #804]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #772]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #800]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	mov	r1, r5
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #784]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #780]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #776]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #760]
-	str	r0, [r4]
-	add	r0, sp, #704
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #756]
-	add	r10, sp, #728
-	add	lr, sp, #704
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #752]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #748]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #36]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #4]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #8]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #648
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #700]
-	add	lr, sp, #680
-	add	r11, sp, #656
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r6, r12, lr}
-	ldr	r8, [sp, #648]
-	ldr	r10, [sp, #652]
-	ldm	r11, {r0, r1, r2, r3, r9, r11}
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adds	r7, r8, r7
-	str	r7, [r4, #8]
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adcs	r7, r10, r7
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #12]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #592
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #644]
-	add	r9, sp, #620
-	add	lr, sp, #600
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #640]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #636]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r8, r9}
-	ldr	r0, [sp, #592]
-	ldr	r11, [sp, #596]
-	ldm	lr, {r1, r2, r3, r12, lr}
-	ldr	r10, [sp, #36]          @ 4-byte Reload
-	adds	r0, r0, r10
-	str	r0, [r4, #12]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #16]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #536
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #588]
-	add	r12, sp, #540
-	ldr	r11, [sp, #576]
-	ldr	lr, [sp, #572]
-	ldr	r6, [sp, #568]
-	ldr	r8, [sp, #536]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #584]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r12, {r0, r1, r2, r3, r9, r10, r12}
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adds	r7, r8, r7
-	str	r7, [r4, #16]
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adcs	r7, r0, r7
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #20]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #480
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #532]
-	add	r10, sp, #512
-	add	lr, sp, #484
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #528]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #524]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r10}
-	ldr	r9, [sp, #480]
-	ldr	r11, [sp, #508]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r7, r9, r7
-	str	r7, [r4, #20]
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #24]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #424
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #476]
-	add	r8, sp, #456
-	add	r12, sp, #432
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #472]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #468]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r8, {r6, r7, r8}
-	ldr	lr, [sp, #452]
-	ldr	r10, [sp, #448]
-	ldr	r0, [sp, #424]
-	ldr	r11, [sp, #428]
-	ldm	r12, {r1, r2, r3, r12}
-	ldr	r9, [sp, #36]           @ 4-byte Reload
-	adds	r0, r0, r9
-	str	r0, [r4, #24]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #28]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #368
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #420]
-	add	r11, sp, #400
-	add	lr, sp, #372
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r6, r8, r11}
-	ldr	r10, [sp, #368]
-	ldm	lr, {r0, r1, r2, r3, r9, r12, lr}
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adds	r7, r10, r7
-	str	r7, [r4, #28]
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adcs	r7, r0, r7
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #32]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #312
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #364]
-	add	r10, sp, #344
-	add	lr, sp, #316
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #360]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #356]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r10}
-	ldr	r9, [sp, #312]
-	ldr	r11, [sp, #340]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r7, r9, r7
-	str	r7, [r4, #32]
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #36]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #256
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #308]
-	add	r8, sp, #288
-	add	r12, sp, #264
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #304]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #300]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r8, {r6, r7, r8}
-	ldr	lr, [sp, #284]
-	ldr	r10, [sp, #280]
-	ldr	r0, [sp, #256]
-	ldr	r11, [sp, #260]
-	ldm	r12, {r1, r2, r3, r12}
-	ldr	r9, [sp, #36]           @ 4-byte Reload
-	adds	r0, r0, r9
-	str	r0, [r4, #36]
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #40]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #200
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #252]
-	add	r10, sp, #228
-	add	r12, sp, #200
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #248]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #244]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r10}
-	ldr	lr, [sp, #224]
-	ldr	r9, [sp, #220]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r11, [sp, #32]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #40]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #44]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #144
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #196]
-	add	r12, sp, #148
-	ldr	r7, [sp, #180]
-	ldr	r11, [sp, #176]
-	ldr	r8, [sp, #172]
-	ldr	lr, [sp, #168]
-	ldr	r10, [sp, #164]
-	ldr	r2, [sp, #144]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #192]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #184]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r12, {r0, r1, r3, r12}
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	adds	r2, r2, r6
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	str	r2, [r4, #44]
-	ldr	r2, [r5, #48]
-	adcs	r6, r0, r6
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r9, r1, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	add	r0, sp, #88
-	bl	.LmulPv416x32(PLT)
-	add	r3, sp, #88
-	add	r11, sp, #104
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r12, r0, r6
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	lr, r1, r9
-	adcs	r5, r2, r0
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r6, r3, r0
-	ldr	r0, [sp, #140]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldm	r11, {r0, r1, r2, r3, r7, r8, r9, r10, r11}
-	str	r12, [r4, #48]
-	str	lr, [r4, #52]
-	str	r5, [r4, #56]
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	str	r6, [r4, #60]
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	add	r12, r4, #80
-	adcs	r0, r0, r5
-	adcs	r1, r1, r6
-	str	r0, [r4, #64]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r6, [sp, #56]           @ 4-byte Reload
-	str	r1, [r4, #68]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r3, r1
-	str	r0, [r4, #72]
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r3, [sp, #76]           @ 4-byte Reload
-	str	r1, [r4, #76]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r8, r1
-	adcs	r2, r9, r2
-	adcs	r3, r10, r3
-	adcs	r7, r11, r7
-	adc	r6, r6, #0
-	stm	r12, {r0, r1, r2, r3, r7}
-	str	r6, [r4, #100]
-	add	sp, sp, #820
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end193:
-	.size	mcl_fpDbl_sqrPre13L, .Lfunc_end193-mcl_fpDbl_sqrPre13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont13L
-	.align	2
-	.type	mcl_fp_mont13L,%function
-mcl_fp_mont13L:                         @ @mcl_fp_mont13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#548
-	sub	sp, sp, #548
-	.pad	#1024
-	sub	sp, sp, #1024
-	add	r12, sp, #100
-	add	r6, sp, #1024
-	mov	r4, r3
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #68]           @ 4-byte Spill
-	add	r0, r6, #488
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	str	r5, [sp, #96]           @ 4-byte Spill
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1516]
-	ldr	r7, [sp, #1512]
-	mov	r1, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #1520]
-	mul	r2, r7, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1524]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1564]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #1560]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #1556]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #1552]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #1548]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #1544]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1540]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1536]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1532]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1528]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #1456
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1508]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r5, [sp, #1480]
-	ldr	r10, [sp, #1476]
-	ldr	r11, [sp, #1472]
-	ldr	r6, [sp, #1456]
-	ldr	r9, [sp, #1460]
-	ldr	r8, [sp, #1464]
-	ldr	r4, [sp, #1468]
-	add	lr, sp, #1024
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1504]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1500]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1496]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1492]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1488]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1484]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, lr, #376
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r6, r7
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	ldr	r3, [sp, #1416]
-	ldr	r12, [sp, #1420]
-	ldr	lr, [sp, #1424]
-	ldr	r6, [sp, #1432]
-	ldr	r7, [sp, #1436]
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	ldr	r9, [sp, #1444]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #1440]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #1428]
-	adcs	r1, r11, r1
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	ldr	r11, [sp, #72]          @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r10, r1
-	ldr	r10, [sp, #1448]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r5, r1
-	ldr	r5, [sp, #1400]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #1412]
-	adc	r0, r0, #0
-	adds	r11, r11, r5
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #1408]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1452]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1404]
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #1344
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1396]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #1368]
-	ldr	r9, [sp, #1364]
-	ldr	r10, [sp, #1360]
-	ldr	r11, [sp, #1344]
-	ldr	r6, [sp, #1348]
-	ldr	r7, [sp, #1352]
-	ldr	r4, [sp, #1356]
-	add	lr, sp, #1024
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1392]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1388]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1384]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1380]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1376]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1372]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, lr, #264
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r5, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r5, [sp, #1288]
-	ldr	r2, [sp, #1300]
-	ldr	r3, [sp, #1304]
-	ldr	r12, [sp, #1308]
-	ldr	lr, [sp, #1312]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1320]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1324]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1316]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1336]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1332]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1328]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1296]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r11, r11, r5
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1292]
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #1232
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1284]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #1256]
-	ldr	r9, [sp, #1252]
-	ldr	r10, [sp, #1248]
-	ldr	r11, [sp, #1232]
-	ldr	r6, [sp, #1236]
-	ldr	r7, [sp, #1240]
-	ldr	r4, [sp, #1244]
-	add	lr, sp, #1024
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1280]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1276]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1272]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1268]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1264]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1260]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, lr, #152
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r5, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r5, [sp, #1176]
-	ldr	r2, [sp, #1188]
-	ldr	r3, [sp, #1192]
-	ldr	r12, [sp, #1196]
-	ldr	lr, [sp, #1200]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1208]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1212]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1204]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1224]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1220]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1216]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1184]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r11, r11, r5
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1228]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1180]
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #1120
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1172]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #1144]
-	ldr	r9, [sp, #1140]
-	ldr	r10, [sp, #1136]
-	ldr	r11, [sp, #1120]
-	ldr	r6, [sp, #1124]
-	ldr	r7, [sp, #1128]
-	ldr	r4, [sp, #1132]
-	add	lr, sp, #1024
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1168]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1164]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1160]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1156]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1152]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1148]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, lr, #40
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r5, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r5, [sp, #1064]
-	ldr	r2, [sp, #1076]
-	ldr	r3, [sp, #1080]
-	ldr	r12, [sp, #1084]
-	ldr	lr, [sp, #1088]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1096]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1100]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1092]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1112]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1108]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1104]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1072]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r11, r11, r5
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1116]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1068]
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #1008
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1060]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #1032]
-	ldr	r9, [sp, #1028]
-	ldr	r10, [sp, #1024]
-	ldr	r11, [sp, #1008]
-	ldr	r6, [sp, #1012]
-	ldr	r7, [sp, #1016]
-	ldr	r4, [sp, #1020]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, sp, #952
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r5, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #956
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #980
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1004]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r7, r8, r9, r10}
-	ldr	r5, [sp, #952]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adds	r11, r11, r5
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #896
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #948]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #920]
-	ldr	r9, [sp, #916]
-	ldr	r10, [sp, #912]
-	ldr	r11, [sp, #896]
-	ldr	r6, [sp, #900]
-	ldr	r7, [sp, #904]
-	ldr	r4, [sp, #908]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #944]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #940]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #936]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #932]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #928]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #924]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #840
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r5, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #844
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #868
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r7, r8, r9, r10}
-	ldr	r5, [sp, #840]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adds	r11, r11, r5
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #784
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #836]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #808]
-	ldr	r9, [sp, #804]
-	ldr	r10, [sp, #800]
-	ldr	r11, [sp, #784]
-	ldr	r6, [sp, #788]
-	ldr	r7, [sp, #792]
-	ldr	r4, [sp, #796]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #832]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #828]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #824]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #820]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #816]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #812]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #728
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r5, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #732
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #756
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #780]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r7, r8, r9, r10}
-	ldr	r5, [sp, #728]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adds	r11, r11, r5
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #672
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #724]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #696]
-	ldr	r9, [sp, #692]
-	ldr	r10, [sp, #688]
-	ldr	r11, [sp, #672]
-	ldr	r6, [sp, #676]
-	ldr	r7, [sp, #680]
-	ldr	r4, [sp, #684]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #720]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #716]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #712]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #616
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r5, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #620
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #644
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #668]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r7, r8, r9, r10}
-	ldr	r5, [sp, #616]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adds	r11, r11, r5
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #560
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #612]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #584]
-	ldr	r9, [sp, #580]
-	ldr	r10, [sp, #576]
-	ldr	r11, [sp, #560]
-	ldr	r6, [sp, #564]
-	ldr	r7, [sp, #568]
-	ldr	r4, [sp, #572]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #608]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #604]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #600]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #596]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #592]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #588]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #504
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r5, r11
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #508
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #532
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #556]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #552]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r8, r9, r10}
-	ldr	r5, [sp, #504]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	adds	r5, r11, r5
-	adcs	r0, r7, r0
-	str	r5, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	mul	r2, r5, r8
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #448
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #500]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r5, [sp, #472]
-	ldr	r9, [sp, #468]
-	ldr	r10, [sp, #464]
-	ldr	r11, [sp, #448]
-	ldr	r6, [sp, #452]
-	ldr	r7, [sp, #456]
-	ldr	r4, [sp, #460]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #496]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #492]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #488]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #484]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #480]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #476]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, sp, #392
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #408
-	adds	r0, r0, r11
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	add	r6, sp, #392
-	adcs	r11, r1, r7
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #432
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldm	r6, {r2, r5, r6}
-	ldr	r4, [sp, #404]
-	adds	r0, r0, r2
-	mul	r1, r0, r8
-	adcs	r5, r11, r5
-	str	r0, [sp, #92]           @ 4-byte Spill
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	str	r5, [sp, #88]           @ 4-byte Spill
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	adcs	r5, r5, r6
-	str	r5, [sp, #84]           @ 4-byte Spill
-	ldr	r5, [sp, #80]           @ 4-byte Reload
-	adcs	r4, r5, r4
-	str	r4, [sp, #80]           @ 4-byte Spill
-	ldr	r4, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #336
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #388]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r6, [sp, #364]
-	ldr	r8, [sp, #360]
-	ldr	r9, [sp, #356]
-	ldr	r10, [sp, #352]
-	ldr	r7, [sp, #336]
-	ldr	r4, [sp, #340]
-	ldr	r11, [sp, #344]
-	ldr	r5, [sp, #348]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #384]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #380]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #376]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #372]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #368]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #280
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #296
-	adds	r0, r0, r7
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #292]
-	adcs	r11, r1, r11
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #288]
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #320
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #284]
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #280]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #28]           @ 4-byte Spill
-	adds	r1, r0, r2
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r1, [sp, #92]           @ 4-byte Spill
-	mul	r2, r1, r0
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	str	r6, [sp, #40]           @ 4-byte Spill
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	adcs	r5, r6, r5
-	str	r5, [sp, #36]           @ 4-byte Spill
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	adcs	r4, r5, r4
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #48]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #224
-	bl	.LmulPv416x32(PLT)
-	ldr	r1, [sp, #276]
-	add	r11, sp, #224
-	ldr	r4, [sp, #252]
-	ldr	r8, [sp, #248]
-	ldr	r9, [sp, #244]
-	ldr	r10, [sp, #240]
-	add	r0, sp, #168
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #272]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #268]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #264]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #260]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #256]
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r6, r7, r11}
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	ldr	r5, [sp, #236]
-	ldr	r2, [r1, #48]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #184
-	adds	r0, r0, r6
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	add	r7, sp, #168
-	adcs	r1, r1, r11
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #208
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldm	r7, {r2, r6, r7}
-	ldr	r5, [sp, #180]
-	adds	r4, r0, r2
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r1, r4, r0
-	ldr	r0, [sp, #220]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #104]         @ 4-byte Reload
-	adcs	r11, r11, r6
-	ldr	r6, [sp, #100]          @ 4-byte Reload
-	adcs	r6, r6, r7
-	str	r6, [sp, #36]           @ 4-byte Spill
-	ldr	r6, [sp, #92]           @ 4-byte Reload
-	adcs	r5, r6, r5
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r8, r0, r8
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r9, r0, r9
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #108]         @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	mov	r0, #0
-	mov	r1, r10
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	add	r0, sp, #112
-	bl	.LmulPv416x32(PLT)
-	add	r3, sp, #112
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r4, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r7, r11, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r7, [sp, #48]           @ 4-byte Spill
-	adcs	lr, r0, r2
-	ldr	r0, [sp, #128]
-	adcs	r12, r5, r3
-	str	lr, [sp, #52]           @ 4-byte Spill
-	str	r12, [sp, #56]          @ 4-byte Spill
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #132]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r4, [sp, #60]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #136]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #140]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #144]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #148]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #152]
-	adcs	r0, r8, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #156]
-	adcs	r0, r9, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #160]
-	adcs	r0, r1, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #164]
-	adcs	r0, r6, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	mov	r0, r10
-	ldmib	r0, {r1, r2, r3, r5}
-	ldr	r6, [r0]
-	ldr	r10, [r0, #20]
-	ldr	r11, [r0, #28]
-	str	r5, [sp, #40]           @ 4-byte Spill
-	ldr	r5, [r0, #24]
-	subs	r6, r7, r6
-	sbcs	r9, lr, r1
-	str	r5, [sp, #44]           @ 4-byte Spill
-	mov	r5, r0
-	sbcs	r0, r12, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	ldr	r1, [r5, #48]
-	sbcs	r3, r4, r3
-	ldr	lr, [r5, #32]
-	ldr	r12, [r5, #36]
-	ldr	r8, [r5, #40]
-	ldr	r4, [r5, #44]
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	sbcs	r1, r1, r2
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	sbcs	r7, r2, r10
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	sbcs	r2, r2, r5
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	sbcs	r10, r5, r11
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	sbcs	r11, r5, lr
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	sbcs	r12, r5, r12
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	sbcs	lr, r5, r8
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #104]          @ 4-byte Reload
-	str	r4, [sp, #44]           @ 4-byte Spill
-	ldr	r4, [sp, #108]          @ 4-byte Reload
-	sbcs	r5, r5, r4
-	str	r5, [sp, #108]          @ 4-byte Spill
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	sbc	r5, r5, #0
-	ands	r8, r5, #1
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	movne	r6, r5
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	str	r6, [r5]
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	movne	r9, r6
-	ldr	r6, [sp, #56]           @ 4-byte Reload
-	str	r9, [r5, #4]
-	movne	r0, r6
-	cmp	r8, #0
-	str	r0, [r5, #8]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	movne	r3, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r3, [r5, #12]
-	movne	r1, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r1, [r5, #16]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	movne	r7, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	cmp	r8, #0
-	str	r7, [r5, #20]
-	movne	r2, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r2, [r5, #24]
-	movne	r10, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	str	r10, [r5, #28]
-	movne	r11, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	cmp	r8, #0
-	str	r11, [r5, #32]
-	movne	r12, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r12, [r5, #36]
-	movne	lr, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	lr, [r5, #40]
-	movne	r1, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	cmp	r8, #0
-	str	r1, [r5, #44]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	movne	r1, r0
-	str	r1, [r5, #48]
-	add	sp, sp, #548
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end194:
-	.size	mcl_fp_mont13L, .Lfunc_end194-mcl_fp_mont13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF13L
-	.align	2
-	.type	mcl_fp_montNF13L,%function
-mcl_fp_montNF13L:                       @ @mcl_fp_montNF13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#548
-	sub	sp, sp, #548
-	.pad	#1024
-	sub	sp, sp, #1024
-	add	r12, sp, #100
-	add	r6, sp, #1024
-	mov	r4, r3
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #72]           @ 4-byte Spill
-	add	r0, r6, #488
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	str	r5, [sp, #96]           @ 4-byte Spill
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1516]
-	ldr	r8, [sp, #1512]
-	mov	r1, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #1520]
-	mul	r2, r8, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #1524]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1564]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #1560]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #1556]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #1552]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #1548]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1544]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1540]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1536]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1532]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1528]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #1456
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1508]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r10, [sp, #1480]
-	ldr	r11, [sp, #1476]
-	ldr	r6, [sp, #1472]
-	ldr	r7, [sp, #1456]
-	ldr	r9, [sp, #1460]
-	ldr	r4, [sp, #1464]
-	ldr	r5, [sp, #1468]
-	add	lr, sp, #1024
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1504]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1500]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1496]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1492]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1488]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1484]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, lr, #376
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r7, r8
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1412]
-	ldr	r3, [sp, #1416]
-	ldr	r12, [sp, #1420]
-	ldr	lr, [sp, #1424]
-	ldr	r7, [sp, #1436]
-	ldr	r8, [sp, #1440]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	ldr	r9, [sp, #1444]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #1400]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #1428]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #1432]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	ldr	r11, [sp, #76]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	ldr	r10, [sp, #1448]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adc	r0, r1, r0
-	adds	r11, r11, r4
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	ldr	r1, [sp, #1408]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1452]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1404]
-	adcs	r0, r4, r0
-	mov	r4, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #1344
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1396]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #1368]
-	ldr	r9, [sp, #1364]
-	ldr	r10, [sp, #1360]
-	ldr	r11, [sp, #1344]
-	ldr	r6, [sp, #1348]
-	ldr	r7, [sp, #1352]
-	ldr	r5, [sp, #1356]
-	add	lr, sp, #1024
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1392]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1388]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1384]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1380]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1376]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1372]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, lr, #264
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r4, r11
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	r11, sp, #1312
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
-	ldr	r0, [sp, #1288]
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #1292]
-	ldr	r2, [sp, #1296]
-	ldr	r3, [sp, #1300]
-	ldr	r12, [sp, #1304]
-	ldr	lr, [sp, #1308]
-	adds	r7, r7, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1232
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1284]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r9, [sp, #1256]
-	ldr	r10, [sp, #1252]
-	ldr	r11, [sp, #1248]
-	ldr	r7, [sp, #1232]
-	ldr	r5, [sp, #1236]
-	ldr	r4, [sp, #1240]
-	ldr	r6, [sp, #1244]
-	add	lr, sp, #1024
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1280]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1276]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1272]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1268]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1264]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1260]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, lr, #152
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r8, r7
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1188]
-	ldr	r3, [sp, #1192]
-	ldr	r12, [sp, #1196]
-	ldr	lr, [sp, #1200]
-	ldr	r7, [sp, #1212]
-	ldr	r8, [sp, #1216]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1204]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1176]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1208]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1224]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1220]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r11, r11, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	ldr	r1, [sp, #1184]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1228]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1180]
-	adcs	r0, r4, r0
-	mov	r4, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #1120
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1172]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #1144]
-	ldr	r9, [sp, #1140]
-	ldr	r10, [sp, #1136]
-	ldr	r11, [sp, #1120]
-	ldr	r6, [sp, #1124]
-	ldr	r7, [sp, #1128]
-	ldr	r5, [sp, #1132]
-	add	lr, sp, #1024
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1168]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1164]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1160]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1156]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1152]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1148]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, lr, #40
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r4, r11
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	r11, sp, #1088
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1116]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
-	ldr	r0, [sp, #1064]
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #1068]
-	ldr	r2, [sp, #1072]
-	ldr	r3, [sp, #1076]
-	ldr	r12, [sp, #1080]
-	ldr	lr, [sp, #1084]
-	adds	r7, r7, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1008
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #1060]
-	add	r11, sp, #1016
-	ldr	r9, [sp, #1032]
-	ldr	r10, [sp, #1028]
-	ldr	r7, [sp, #1008]
-	ldr	r5, [sp, #1012]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r6, r11}
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, sp, #952
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r8, r7
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #956
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #980
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1004]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r8, r9, r10}
-	ldr	r4, [sp, #952]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adds	r11, r11, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #896
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #948]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #920]
-	ldr	r9, [sp, #916]
-	ldr	r10, [sp, #912]
-	ldr	r11, [sp, #896]
-	ldr	r6, [sp, #900]
-	ldr	r7, [sp, #904]
-	ldr	r5, [sp, #908]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #944]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #940]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #936]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #932]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #928]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #924]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #840
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r4, r11
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	r11, sp, #864
-	add	lr, sp, #840
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #784
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #836]
-	add	r11, sp, #792
-	ldr	r9, [sp, #808]
-	ldr	r10, [sp, #804]
-	ldr	r7, [sp, #784]
-	ldr	r5, [sp, #788]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #832]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #828]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #824]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #820]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #816]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #812]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r6, r11}
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #728
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r8, r7
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #732
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #756
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #780]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r8, r9, r10}
-	ldr	r4, [sp, #728]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adds	r11, r11, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #672
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #724]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r8, [sp, #696]
-	ldr	r9, [sp, #692]
-	ldr	r10, [sp, #688]
-	ldr	r11, [sp, #672]
-	ldr	r6, [sp, #676]
-	ldr	r7, [sp, #680]
-	ldr	r5, [sp, #684]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #720]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #716]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #712]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #616
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r4, r11
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	r11, sp, #640
-	add	lr, sp, #616
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #668]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	adds	r7, r7, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #560
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #612]
-	add	r11, sp, #568
-	ldr	r9, [sp, #584]
-	ldr	r10, [sp, #580]
-	ldr	r7, [sp, #560]
-	ldr	r5, [sp, #564]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #608]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #604]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #600]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #596]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #592]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #588]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r6, r11}
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #504
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r8, r7
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #508
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #532
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #556]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r8, r9, r10}
-	ldr	r4, [sp, #504]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	adds	r11, r11, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	mov	r4, r11
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	mul	r2, r11, r8
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #448
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #500]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r9, [sp, #468]
-	ldr	r10, [sp, #464]
-	ldr	r11, [sp, #448]
-	ldr	r6, [sp, #452]
-	ldr	r7, [sp, #456]
-	ldr	r5, [sp, #460]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #496]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #492]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #488]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #484]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #480]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #476]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #472]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, sp, #392
-	bl	.LmulPv416x32(PLT)
-	adds	r0, r4, r11
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #408
-	ldr	r4, [sp, #400]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #396]
-	adcs	r1, r1, r7
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #404]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #432
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adc	r1, r1, r2
-	ldr	r2, [sp, #392]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	adds	r0, r0, r2
-	mul	r1, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #88]          @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #88]           @ 4-byte Spill
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	adcs	r4, r6, r4
-	str	r4, [sp, #84]           @ 4-byte Spill
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	adcs	r4, r4, r5
-	str	r4, [sp, #80]           @ 4-byte Spill
-	ldr	r4, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #336
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #388]
-	add	r9, sp, #344
-	ldr	r6, [sp, #364]
-	ldr	r7, [sp, #360]
-	ldr	r8, [sp, #356]
-	ldr	r10, [sp, #336]
-	ldr	r11, [sp, #340]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #384]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #380]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #376]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #372]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #368]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r4, r5, r9}
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #280
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #296
-	adds	r0, r0, r10
-	add	r10, sp, #320
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	adcs	r1, r1, r4
-	ldr	r4, [sp, #288]
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #292]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #284]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adc	r1, r1, r2
-	ldr	r2, [sp, #280]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	adds	r1, r0, r2
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r1, [sp, #92]           @ 4-byte Spill
-	mul	r2, r1, r0
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #88]          @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #44]           @ 4-byte Spill
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	adcs	r4, r6, r4
-	str	r4, [sp, #40]           @ 4-byte Spill
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	adcs	r4, r4, r5
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	adc	r0, r10, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, sp, #224
-	bl	.LmulPv416x32(PLT)
-	ldr	r1, [sp, #276]
-	add	r9, sp, #232
-	ldr	r6, [sp, #252]
-	ldr	r7, [sp, #248]
-	ldr	r8, [sp, #244]
-	ldr	r10, [sp, #224]
-	ldr	r11, [sp, #228]
-	add	r0, sp, #168
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #272]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #268]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #264]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #260]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #256]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r4, r5, r9}
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [r1, #48]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #184
-	adds	r0, r0, r10
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	adcs	r1, r1, r4
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	add	r7, sp, #168
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adc	r1, r1, r2
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldm	r7, {r2, r6, r7}
-	ldr	r5, [sp, #180]
-	ldr	r4, [sp, #216]
-	ldr	r9, [sp, #212]
-	ldr	r8, [sp, #208]
-	adds	r10, r0, r2
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	mul	r1, r10, r0
-	ldr	r0, [sp, #220]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #104]         @ 4-byte Reload
-	adcs	r11, r11, r6
-	ldr	r6, [sp, #100]          @ 4-byte Reload
-	adcs	r7, r6, r7
-	ldr	r6, [sp, #92]           @ 4-byte Reload
-	adcs	r5, r6, r5
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r8, r0, r8
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	mov	r1, r4
-	adc	r6, r0, #0
-	add	r0, sp, #112
-	bl	.LmulPv416x32(PLT)
-	add	r3, sp, #112
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r10, r0
-	adcs	r12, r11, r1
-	ldr	r0, [sp, #128]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	str	r12, [sp, #52]          @ 4-byte Spill
-	adcs	lr, r5, r3
-	str	r2, [sp, #56]           @ 4-byte Spill
-	str	lr, [sp, #60]           @ 4-byte Spill
-	adcs	r9, r1, r0
-	ldr	r0, [sp, #132]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r9, [sp, #64]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #136]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #140]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #144]
-	adcs	r10, r1, r0
-	ldr	r0, [sp, #148]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r10, [sp, #68]          @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #152]
-	adcs	r0, r8, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #156]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #160]
-	adcs	r0, r1, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #164]
-	adc	r0, r6, r0
-	mov	r6, r4
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldmib	r6, {r0, r1, r7}
-	ldr	r5, [r6, #24]
-	ldr	r4, [r6, #28]
-	ldr	r3, [r6, #16]
-	ldr	r11, [r6, #20]
-	str	r5, [sp, #48]           @ 4-byte Spill
-	ldr	r5, [r6]
-	str	r4, [sp, #44]           @ 4-byte Spill
-	subs	r5, r12, r5
-	sbcs	r8, r2, r0
-	sbcs	r2, lr, r1
-	sbcs	lr, r9, r7
-	add	r7, r6, #32
-	ldm	r7, {r0, r1, r7}
-	ldr	r4, [r6, #44]
-	ldr	r9, [r6, #48]
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	sbcs	r3, r6, r3
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	str	r4, [sp, #40]           @ 4-byte Spill
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	sbcs	r12, r6, r11
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	sbcs	r11, r6, r4
-	ldr	r4, [sp, #44]           @ 4-byte Reload
-	sbcs	r10, r10, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	sbcs	r4, r4, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	sbcs	r6, r0, r1
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r7, r0, r7
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	sbc	r9, r0, r9
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	asr	r1, r9, #31
-	cmp	r1, #0
-	movlt	r5, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r5, [r0]
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	movlt	r8, r5
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	str	r8, [r0, #4]
-	movlt	r2, r5
-	cmp	r1, #0
-	str	r2, [r0, #8]
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	movlt	lr, r2
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	str	lr, [r0, #12]
-	movlt	r3, r2
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r3, [r0, #16]
-	ldr	r3, [sp, #108]          @ 4-byte Reload
-	movlt	r12, r2
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r12, [r0, #20]
-	movlt	r11, r2
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r11, [r0, #24]
-	movlt	r10, r2
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	str	r10, [r0, #28]
-	movlt	r4, r2
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r4, [r0, #32]
-	movlt	r6, r2
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	str	r6, [r0, #36]
-	movlt	r7, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	str	r7, [r0, #40]
-	movlt	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r3, [r0, #44]
-	movlt	r9, r1
-	str	r9, [r0, #48]
-	add	sp, sp, #548
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end195:
-	.size	mcl_fp_montNF13L, .Lfunc_end195-mcl_fp_montNF13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed13L
-	.align	2
-	.type	mcl_fp_montRed13L,%function
-mcl_fp_montRed13L:                      @ @mcl_fp_montRed13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#908
-	sub	sp, sp, #908
-	mov	r3, r2
-	str	r0, [sp, #164]          @ 4-byte Spill
-	ldr	r2, [r1, #4]
-	ldr	r11, [r1]
-	ldr	r0, [r3]
-	str	r3, [sp, #168]          @ 4-byte Spill
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #160]          @ 4-byte Spill
-	ldr	r0, [r3, #4]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldr	r2, [r1, #12]
-	str	r0, [sp, #156]          @ 4-byte Spill
-	ldr	r0, [r3, #8]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	str	r0, [sp, #152]          @ 4-byte Spill
-	ldr	r0, [r3, #12]
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [r3, #16]
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [r3, #20]
-	str	r0, [sp, #144]          @ 4-byte Spill
-	ldr	r0, [r3, #24]
-	str	r0, [sp, #148]          @ 4-byte Spill
-	ldr	r0, [r3, #-4]
-	str	r0, [sp, #172]          @ 4-byte Spill
-	mul	r2, r11, r0
-	ldr	r0, [r3, #28]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [r1, #96]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [r1, #100]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [r1, #64]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r1, #72]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r1, #76]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r1, #80]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r1, #84]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r1, #88]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r1, #92]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r1, #68]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r1, #28]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r1, #24]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r1, #20]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	mov	r1, r3
-	str	r0, [sp, #12]           @ 4-byte Spill
-	add	r0, sp, #848
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #900]
-	add	r10, sp, #872
-	add	lr, sp, #848
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #168]          @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #172]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #792
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #844]
-	add	lr, sp, #832
-	add	r9, sp, #800
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r4, [sp, #792]
-	ldr	r5, [sp, #828]
-	ldr	r6, [sp, #824]
-	ldr	r7, [sp, #820]
-	ldr	r10, [sp, #816]
-	ldr	r8, [sp, #812]
-	ldr	r1, [sp, #796]
-	ldm	r9, {r0, r2, r9}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r4, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r4, [sp, #172]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #168]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, sp, #736
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #788]
-	add	r10, sp, #760
-	add	lr, sp, #736
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #784]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #780]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #680
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #732]
-	add	lr, sp, #720
-	add	r10, sp, #688
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r4, [sp, #680]
-	ldr	r5, [sp, #716]
-	ldr	r6, [sp, #712]
-	ldr	r7, [sp, #708]
-	ldr	r1, [sp, #684]
-	ldm	r10, {r0, r2, r8, r9, r10}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r4, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r4, [sp, #172]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #168]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #624
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #676]
-	add	r10, sp, #648
-	add	lr, sp, #624
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #672]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #668]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #568
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #620]
-	add	lr, sp, #608
-	add	r10, sp, #576
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r4, [sp, #568]
-	ldr	r5, [sp, #604]
-	ldr	r6, [sp, #600]
-	ldr	r7, [sp, #596]
-	ldr	r1, [sp, #572]
-	ldm	r10, {r0, r2, r8, r9, r10}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r4, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r4, [sp, #172]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #168]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #512
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #564]
-	add	r10, sp, #536
-	add	lr, sp, #512
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #560]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #556]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #456
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #508]
-	add	lr, sp, #496
-	add	r10, sp, #464
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r4, [sp, #456]
-	ldr	r5, [sp, #492]
-	ldr	r6, [sp, #488]
-	ldr	r7, [sp, #484]
-	ldr	r1, [sp, #460]
-	ldm	r10, {r0, r2, r8, r9, r10}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r4, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r4, [sp, #172]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #168]          @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #400
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #452]
-	add	r10, sp, #424
-	add	lr, sp, #400
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #448]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #444]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #344
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #396]
-	add	lr, sp, #384
-	add	r10, sp, #352
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r4, [sp, #344]
-	ldr	r5, [sp, #380]
-	ldr	r6, [sp, #376]
-	ldr	r7, [sp, #372]
-	ldr	r1, [sp, #348]
-	ldm	r10, {r0, r2, r8, r9, r10}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r4, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #168]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #172]          @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	mul	r2, r11, r7
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r8
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #288
-	bl	.LmulPv416x32(PLT)
-	ldr	r0, [sp, #340]
-	add	r10, sp, #312
-	add	lr, sp, #288
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #336]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #332]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	mov	r4, r7
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	mul	r2, r11, r4
-	adcs	r0, r0, r5
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	mov	r9, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	add	r0, sp, #232
-	bl	.LmulPv416x32(PLT)
-	add	r7, sp, #232
-	add	lr, sp, #272
-	ldm	r7, {r0, r1, r3, r7}
-	ldr	r8, [sp, #284]
-	adds	r0, r11, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r11, r0, r1
-	mul	r0, r11, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #172]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	add	r7, sp, #256
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldm	lr, {r5, r12, lr}
-	ldr	r6, [sp, #268]
-	ldm	r7, {r1, r2, r7}
-	ldr	r0, [sp, #248]
-	ldr	r3, [sp, #108]          @ 4-byte Reload
-	ldr	r4, [sp, #252]
-	adcs	r10, r3, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r4, r0, r4
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r8, r0, r8
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	add	r0, sp, #176
-	bl	.LmulPv416x32(PLT)
-	add	r3, sp, #176
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #172]          @ 4-byte Reload
-	adcs	r12, r0, r1
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r12, [sp, #52]          @ 4-byte Spill
-	adcs	r2, r0, r2
-	ldr	r0, [sp, #192]
-	adcs	r3, r10, r3
-	str	r2, [sp, #64]           @ 4-byte Spill
-	str	r3, [sp, #68]           @ 4-byte Spill
-	adcs	r7, r4, r0
-	ldr	r0, [sp, #196]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #200]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r4, [sp, #76]           @ 4-byte Spill
-	adcs	r5, r1, r0
-	ldr	r0, [sp, #204]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r5, [sp, #80]           @ 4-byte Spill
-	adcs	r6, r1, r0
-	ldr	r0, [sp, #208]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r6, [sp, #84]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #172]          @ 4-byte Spill
-	ldr	r0, [sp, #212]
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #216]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r11, [sp, #92]          @ 4-byte Spill
-	adcs	r10, r1, r0
-	ldr	r0, [sp, #220]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r10, [sp, #100]         @ 4-byte Spill
-	adcs	r9, r1, r0
-	ldr	r0, [sp, #224]
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r9, [sp, #108]          @ 4-byte Spill
-	adcs	r8, r8, r0
-	ldr	r0, [sp, #228]
-	str	r8, [sp, #168]          @ 4-byte Spill
-	adcs	lr, r1, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	str	lr, [sp, #104]          @ 4-byte Spill
-	adc	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #160]          @ 4-byte Reload
-	subs	r0, r12, r0
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #152]          @ 4-byte Reload
-	sbcs	r2, r3, r2
-	ldr	r3, [sp, #136]          @ 4-byte Reload
-	sbcs	r3, r7, r3
-	ldr	r7, [sp, #140]          @ 4-byte Reload
-	sbcs	r12, r4, r7
-	ldr	r4, [sp, #144]          @ 4-byte Reload
-	ldr	r7, [sp, #172]          @ 4-byte Reload
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #148]          @ 4-byte Reload
-	sbcs	r5, r6, r5
-	ldr	r6, [sp, #112]          @ 4-byte Reload
-	sbcs	r6, r7, r6
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	sbcs	r7, r11, r7
-	str	r7, [sp, #160]          @ 4-byte Spill
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	sbcs	r11, r10, r7
-	ldr	r7, [sp, #124]          @ 4-byte Reload
-	sbcs	r9, r9, r7
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	sbcs	r10, r8, r7
-	ldr	r7, [sp, #132]          @ 4-byte Reload
-	sbcs	r8, lr, r7
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	sbc	r7, r7, #0
-	ands	lr, r7, #1
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	movne	r0, r7
-	ldr	r7, [sp, #164]          @ 4-byte Reload
-	str	r0, [r7]
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	movne	r1, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r1, [r7, #4]
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	movne	r2, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	cmp	lr, #0
-	str	r2, [r7, #8]
-	movne	r3, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r3, [r7, #12]
-	movne	r12, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r12, [r7, #16]
-	movne	r4, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	cmp	lr, #0
-	str	r4, [r7, #20]
-	movne	r5, r0
-	ldr	r0, [sp, #172]          @ 4-byte Reload
-	str	r5, [r7, #24]
-	movne	r6, r0
-	ldr	r0, [sp, #160]          @ 4-byte Reload
-	movne	r0, r1
-	str	r6, [r7, #28]
-	cmp	lr, #0
-	str	r0, [r7, #32]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	movne	r11, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	str	r11, [r7, #36]
-	movne	r9, r0
-	ldr	r0, [sp, #168]          @ 4-byte Reload
-	str	r9, [r7, #40]
-	movne	r10, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	cmp	lr, #0
-	str	r10, [r7, #44]
-	movne	r8, r0
-	str	r8, [r7, #48]
-	add	sp, sp, #908
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end196:
-	.size	mcl_fp_montRed13L, .Lfunc_end196-mcl_fp_montRed13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre13L
-	.align	2
-	.type	mcl_fp_addPre13L,%function
-mcl_fp_addPre13L:                       @ @mcl_fp_addPre13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#36
-	sub	sp, sp, #36
-	ldm	r1, {r3, r12, lr}
-	ldr	r9, [r1, #12]
-	ldmib	r2, {r5, r6, r7}
-	ldr	r11, [r2]
-	ldr	r4, [r2, #16]
-	ldr	r10, [r2, #32]
-	adds	r8, r11, r3
-	ldr	r3, [r2, #48]
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [r2, #20]
-	ldr	r11, [r1, #44]
-	adcs	r5, r5, r12
-	add	r12, r1, #16
-	adcs	r6, r6, lr
-	ldr	lr, [r1, #32]
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [r2, #44]
-	str	r4, [sp, #12]           @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [r2, #40]
-	str	r4, [sp, #20]           @ 4-byte Spill
-	ldr	r4, [r2, #28]
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [r2, #36]
-	ldr	r2, [r1, #36]
-	str	r4, [sp, #24]           @ 4-byte Spill
-	adcs	r4, r7, r9
-	ldr	r7, [r1, #40]
-	ldr	r9, [r1, #48]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	str	r2, [sp]                @ 4-byte Spill
-	ldm	r12, {r1, r2, r3, r12}
-	str	r8, [r0]
-	stmib	r0, {r5, r6}
-	str	r4, [r0, #12]
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r5, r1
-	str	r1, [r0, #16]
-	adcs	r2, r4, r2
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, r3
-	ldr	r3, [sp]                @ 4-byte Reload
-	adcs	r2, r2, r12
-	str	r1, [r0, #24]
-	add	r12, r0, #32
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	adcs	r1, r10, lr
-	adcs	r2, r2, r3
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	adcs	r3, r3, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r7, r7, r11
-	adcs	r6, r6, r9
-	stm	r12, {r1, r2, r3, r7}
-	str	r6, [r0, #48]
-	mov	r0, #0
-	adc	r0, r0, #0
-	add	sp, sp, #36
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end197:
-	.size	mcl_fp_addPre13L, .Lfunc_end197-mcl_fp_addPre13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre13L
-	.align	2
-	.type	mcl_fp_subPre13L,%function
-mcl_fp_subPre13L:                       @ @mcl_fp_subPre13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#36
-	sub	sp, sp, #36
-	ldr	r3, [r2, #16]
-	ldr	r7, [r2]
-	ldr	r6, [r1]
-	ldr	r12, [r2, #4]
-	ldr	r4, [r2, #8]
-	ldr	r11, [r2, #12]
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [r2, #20]
-	subs	r7, r6, r7
-	str	r3, [sp, #20]           @ 4-byte Spill
-	ldr	r3, [r2, #24]
-	str	r3, [sp, #24]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldmib	r1, {r5, lr}
-	ldr	r6, [r2, #48]
-	ldr	r3, [r1, #12]
-	ldr	r10, [r2, #32]
-	ldr	r8, [r1, #44]
-	ldr	r9, [r1, #48]
-	str	r6, [sp, #32]           @ 4-byte Spill
-	ldr	r6, [r2, #44]
-	sbcs	r5, r5, r12
-	add	r12, r1, #16
-	sbcs	r4, lr, r4
-	sbcs	lr, r3, r11
-	ldr	r3, [r2, #36]
-	ldr	r11, [r1, #36]
-	str	r6, [sp, #16]           @ 4-byte Spill
-	ldr	r6, [r2, #40]
-	ldr	r2, [r1, #40]
-	str	r3, [sp, #4]            @ 4-byte Spill
-	str	r6, [sp, #8]            @ 4-byte Spill
-	ldr	r6, [r1, #32]
-	str	r2, [sp]                @ 4-byte Spill
-	ldm	r12, {r1, r2, r3, r12}
-	str	r7, [r0]
-	str	r5, [r0, #4]
-	str	r4, [r0, #8]
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	str	lr, [r0, #12]
-	sbcs	r1, r1, r4
-	sbcs	r2, r2, r7
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	sbcs	r1, r3, r1
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	sbcs	r2, r12, r2
-	str	r1, [r0, #24]
-	add	r12, r0, #32
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	sbcs	r1, r6, r10
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	sbcs	r2, r11, r2
-	sbcs	r3, r7, r3
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	sbcs	r7, r8, r7
-	sbcs	r6, r9, r6
-	stm	r12, {r1, r2, r3, r7}
-	str	r6, [r0, #48]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	add	sp, sp, #36
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end198:
-	.size	mcl_fp_subPre13L, .Lfunc_end198-mcl_fp_subPre13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_13L
-	.align	2
-	.type	mcl_fp_shr1_13L,%function
-mcl_fp_shr1_13L:                        @ @mcl_fp_shr1_13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#8
-	sub	sp, sp, #8
-	add	r9, r1, #8
-	ldm	r9, {r2, r3, r4, r5, r8, r9}
-	ldm	r1, {r10, lr}
-	ldr	r12, [r1, #36]
-	lsr	r7, lr, #1
-	lsr	r6, r3, #1
-	lsrs	r3, r3, #1
-	orr	r11, r7, r2, lsl #31
-	ldr	r7, [r1, #48]
-	rrx	r2, r2
-	lsrs	r3, lr, #1
-	rrx	r3, r10
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r7, [r1, #44]
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [r1, #40]
-	ldr	r1, [r1, #32]
-	stm	r0, {r3, r11}
-	str	r2, [r0, #8]
-	orr	r2, r6, r4, lsl #31
-	str	r2, [r0, #12]
-	lsrs	r2, r5, #1
-	ldr	r6, [sp]                @ 4-byte Reload
-	rrx	r2, r4
-	str	r2, [r0, #16]
-	lsr	r2, r5, #1
-	orr	r2, r2, r8, lsl #31
-	str	r2, [r0, #20]
-	lsrs	r2, r9, #1
-	rrx	r2, r8
-	str	r2, [r0, #24]
-	lsr	r2, r9, #1
-	orr	r2, r2, r1, lsl #31
-	str	r2, [r0, #28]
-	lsrs	r2, r12, #1
-	lsr	r2, r12, #1
-	rrx	r1, r1
-	lsrs	r3, r6, #1
-	add	r12, r0, #32
-	orr	r2, r2, r7, lsl #31
-	rrx	r3, r7
-	lsr	r7, r6, #1
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	orr	r7, r7, r6, lsl #31
-	lsr	r6, r6, #1
-	stm	r12, {r1, r2, r3, r7}
-	str	r6, [r0, #48]
-	add	sp, sp, #8
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end199:
-	.size	mcl_fp_shr1_13L, .Lfunc_end199-mcl_fp_shr1_13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add13L
-	.align	2
-	.type	mcl_fp_add13L,%function
-mcl_fp_add13L:                          @ @mcl_fp_add13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#44
-	sub	sp, sp, #44
-	ldr	r9, [r1]
-	ldmib	r1, {r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r7}
-	adds	r11, r4, r9
-	ldr	r9, [r1, #24]
-	adcs	r4, r5, r8
-	ldr	r5, [r1, #20]
-	adcs	r6, r6, lr
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [r1, #16]
-	mov	lr, r11
-	adcs	r7, r7, r12
-	str	r6, [sp, #28]           @ 4-byte Spill
-	ldr	r6, [r2, #32]
-	str	lr, [r0]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #16]
-	adcs	r8, r7, r4
-	ldr	r4, [r2, #20]
-	adcs	r7, r4, r5
-	ldr	r5, [r2, #24]
-	ldr	r4, [r1, #28]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	adcs	r7, r5, r9
-	ldr	r5, [r2, #28]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	ldr	r11, [sp, #4]           @ 4-byte Reload
-	adcs	r7, r5, r4
-	ldr	r5, [r1, #32]
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	adcs	r10, r6, r5
-	ldr	r6, [r1, #36]
-	ldr	r5, [r2, #36]
-	str	r4, [r0, #4]
-	str	r10, [sp, #24]          @ 4-byte Spill
-	adcs	r9, r5, r6
-	ldr	r6, [r1, #40]
-	ldr	r5, [r2, #40]
-	adcs	r12, r5, r6
-	ldr	r6, [r1, #44]
-	ldr	r5, [r2, #44]
-	ldr	r1, [r1, #48]
-	ldr	r2, [r2, #48]
-	adcs	r6, r5, r6
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	adcs	r2, r2, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r6, [sp, #16]           @ 4-byte Spill
-	str	r2, [sp, #12]           @ 4-byte Spill
-	str	r5, [r0, #8]
-	str	r7, [r0, #12]
-	str	r8, [r0, #16]
-	str	r1, [r0, #20]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r11, [r0, #24]
-	str	r1, [r0, #28]
-	str	r10, [r0, #32]
-	str	r9, [r0, #36]
-	str	r12, [r0, #40]
-	str	r6, [r0, #44]
-	str	r2, [r0, #48]
-	mov	r2, #0
-	mov	r10, r12
-	adc	r1, r2, #0
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldm	r3, {r2, r6}
-	ldr	r1, [r3, #8]
-	ldr	r12, [r3, #12]
-	subs	r2, lr, r2
-	str	r2, [sp]                @ 4-byte Spill
-	sbcs	r2, r4, r6
-	sbcs	r1, r5, r1
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	sbcs	r7, r7, r12
-	add	r12, r3, #32
-	sbcs	r8, r8, r1
-	ldr	r1, [r3, #20]
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	sbcs	r1, r11, r1
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	sbcs	r5, r2, r1
-	ldm	r12, {r1, r2, r6, r11, r12}
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	sbcs	r3, r3, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	sbcs	r4, r9, r2
-	sbcs	lr, r10, r6
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	sbcs	r2, r1, r11
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	sbcs	r1, r1, r12
-	sbc	r6, r6, #0
-	tst	r6, #1
-	bne	.LBB200_2
-@ BB#1:                                 @ %nocarry
-	mov	r6, r7
-	ldr	r7, [sp]                @ 4-byte Reload
-	add	r12, r0, #32
-	str	r7, [r0]
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r7, [r0, #8]
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	str	r6, [r0, #12]
-	str	r8, [r0, #16]
-	str	r7, [r0, #20]
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	str	r7, [r0, #24]
-	str	r5, [r0, #28]
-	stm	r12, {r3, r4, lr}
-	str	r2, [r0, #44]
-	str	r1, [r0, #48]
-.LBB200_2:                              @ %carry
-	add	sp, sp, #44
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end200:
-	.size	mcl_fp_add13L, .Lfunc_end200-mcl_fp_add13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF13L
-	.align	2
-	.type	mcl_fp_addNF13L,%function
-mcl_fp_addNF13L:                        @ @mcl_fp_addNF13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#64
-	sub	sp, sp, #64
-	ldm	r1, {r7, r8, lr}
-	ldr	r6, [r2]
-	ldr	r12, [r1, #12]
-	ldmib	r2, {r4, r5, r9}
-	adds	r10, r6, r7
-	ldr	r7, [r2, #16]
-	ldr	r6, [r1, #24]
-	adcs	r4, r4, r8
-	adcs	lr, r5, lr
-	ldr	r5, [r1, #16]
-	str	r4, [sp, #28]           @ 4-byte Spill
-	ldr	r4, [r1, #20]
-	adcs	r9, r9, r12
-	str	lr, [sp, #8]            @ 4-byte Spill
-	str	r9, [sp, #12]           @ 4-byte Spill
-	adcs	r7, r7, r5
-	ldr	r5, [r2, #20]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adcs	r7, r5, r4
-	ldr	r5, [r2, #24]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	adcs	r8, r5, r6
-	ldr	r6, [r1, #28]
-	ldr	r5, [r2, #28]
-	str	r8, [sp, #16]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r6, [r1, #32]
-	ldr	r5, [r2, #32]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r6, [r1, #36]
-	ldr	r5, [r2, #36]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r6, [r1, #40]
-	ldr	r5, [r2, #40]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r6, [r1, #44]
-	ldr	r5, [r2, #44]
-	ldr	r1, [r1, #48]
-	ldr	r2, [r2, #48]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	adc	r1, r2, r1
-	str	r7, [sp, #48]           @ 4-byte Spill
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldmib	r3, {r1, r12}
-	ldr	r2, [r3, #24]
-	ldr	r7, [r3]
-	ldr	r6, [r3, #12]
-	ldr	r5, [r3, #16]
-	ldr	r4, [r3, #20]
-	ldr	r11, [r3, #28]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	subs	r7, r10, r7
-	sbcs	r2, r2, r1
-	ldr	r1, [r3, #40]
-	sbcs	r12, lr, r12
-	sbcs	lr, r9, r6
-	ldr	r9, [r3, #32]
-	ldr	r6, [r3, #36]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp]                @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	sbcs	r5, r1, r5
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	sbcs	r3, r1, r4
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	sbcs	r4, r8, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r8, r1, r11
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	sbcs	r9, r1, r9
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	sbcs	r11, r1, r6
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	sbcs	r1, r1, r6
-	ldr	r6, [sp]                @ 4-byte Reload
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbcs	r1, r1, r6
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	sbc	r6, r1, r6
-	asr	r1, r6, #31
-	cmp	r1, #0
-	movlt	r7, r10
-	str	r7, [r0]
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	movlt	r2, r7
-	str	r2, [r0, #4]
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	movlt	r12, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r12, [r0, #8]
-	movlt	lr, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	lr, [r0, #12]
-	movlt	r5, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r5, [r0, #16]
-	movlt	r3, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	movlt	r4, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r4, [r0, #24]
-	movlt	r8, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r8, [r0, #28]
-	movlt	r9, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r9, [r0, #32]
-	movlt	r11, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r11, [r0, #36]
-	movlt	r3, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r3, [r0, #40]
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	movlt	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r3, [r0, #44]
-	movlt	r6, r1
-	str	r6, [r0, #48]
-	add	sp, sp, #64
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end201:
-	.size	mcl_fp_addNF13L, .Lfunc_end201-mcl_fp_addNF13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub13L
-	.align	2
-	.type	mcl_fp_sub13L,%function
-mcl_fp_sub13L:                          @ @mcl_fp_sub13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#56
-	sub	sp, sp, #56
-	ldr	r9, [r2]
-	ldmib	r2, {r8, lr}
-	ldr	r12, [r2, #12]
-	ldm	r1, {r4, r5, r6, r7}
-	subs	r11, r4, r9
-	ldr	r4, [r2, #24]
-	sbcs	r5, r5, r8
-	str	r11, [sp, #28]          @ 4-byte Spill
-	str	r11, [r0]
-	sbcs	r6, r6, lr
-	str	r5, [sp, #52]           @ 4-byte Spill
-	ldr	r5, [r2, #20]
-	sbcs	r7, r7, r12
-	str	r6, [sp, #48]           @ 4-byte Spill
-	ldr	r6, [r2, #16]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r1, #16]
-	ldr	r11, [sp, #44]          @ 4-byte Reload
-	sbcs	r10, r7, r6
-	ldr	r7, [r1, #20]
-	str	r10, [sp, #36]          @ 4-byte Spill
-	sbcs	r12, r7, r5
-	ldr	r7, [r1, #24]
-	ldr	r5, [r1, #28]
-	sbcs	r8, r7, r4
-	ldr	r7, [r2, #28]
-	ldr	r4, [r1, #36]
-	str	r8, [sp, #40]           @ 4-byte Spill
-	sbcs	r9, r5, r7
-	ldr	r7, [r2, #32]
-	ldr	r5, [r1, #32]
-	sbcs	r5, r5, r7
-	ldr	r7, [r2, #36]
-	sbcs	r6, r4, r7
-	ldr	r7, [r2, #40]
-	ldr	r4, [r1, #40]
-	sbcs	lr, r4, r7
-	ldr	r7, [r2, #44]
-	ldr	r4, [r1, #44]
-	ldr	r2, [r2, #48]
-	ldr	r1, [r1, #48]
-	sbcs	r7, r4, r7
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	sbcs	r2, r1, r2
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r2, [sp, #24]           @ 4-byte Spill
-	str	r4, [r0, #4]
-	str	r1, [r0, #8]
-	str	r11, [r0, #12]
-	str	r10, [r0, #16]
-	str	r12, [r0, #20]
-	str	r8, [r0, #24]
-	str	r9, [r0, #28]
-	str	r5, [r0, #32]
-	str	r6, [r0, #36]
-	str	lr, [r0, #40]
-	str	r7, [r0, #44]
-	str	r2, [r0, #48]
-	mov	r2, #0
-	sbc	r2, r2, #0
-	tst	r2, #1
-	beq	.LBB202_2
-@ BB#1:                                 @ %carry
-	ldr	r2, [r3, #48]
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	ldr	r10, [r3, #4]
-	ldr	r8, [r3, #8]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [r3, #12]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r3, #16]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r3, #20]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r3, #24]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r3, #28]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r3]
-	adds	r2, r2, r7
-	ldr	r7, [r3, #44]
-	adcs	r4, r10, r4
-	ldr	r10, [r3, #36]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r3, #40]
-	ldr	r3, [r3, #32]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	adcs	r7, r8, r1
-	ldr	r1, [sp]                @ 4-byte Reload
-	stm	r0, {r2, r4, r7}
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [r0, #12]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r2, r7, r2
-	str	r2, [r0, #16]
-	adcs	r2, r1, r12
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	r12, r0, #32
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	adcs	r2, r1, r2
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r2, [r0, #24]
-	adcs	r2, r1, r9
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r2, [r0, #28]
-	adcs	r2, r3, r5
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r10, r6
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	adcs	r7, r1, lr
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r6, r6, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	stm	r12, {r2, r3, r7}
-	str	r6, [r0, #44]
-	adc	r1, r5, r1
-	str	r1, [r0, #48]
-.LBB202_2:                              @ %nocarry
-	add	sp, sp, #56
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end202:
-	.size	mcl_fp_sub13L, .Lfunc_end202-mcl_fp_sub13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF13L
-	.align	2
-	.type	mcl_fp_subNF13L,%function
-mcl_fp_subNF13L:                        @ @mcl_fp_subNF13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#72
-	sub	sp, sp, #72
-	mov	r12, r0
-	ldr	r0, [r2, #32]
-	add	r9, r1, #20
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r2, #36]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r2, #40]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r2, #44]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r2, #48]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r2, {r7, r11}
-	ldr	r0, [r2, #8]
-	ldr	r10, [r2, #12]
-	ldr	r8, [r2, #16]
-	ldr	lr, [r1, #16]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r2, #20]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r2, #24]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r2, #28]
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r1, #12]
-	ldm	r9, {r4, r5, r9}
-	ldm	r1, {r1, r6}
-	subs	r7, r1, r7
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	sbcs	r6, r6, r11
-	str	r7, [sp]                @ 4-byte Spill
-	str	r6, [sp, #4]            @ 4-byte Spill
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	sbcs	r0, r0, r10
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	sbcs	r0, lr, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	sbcs	r0, r4, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	sbcs	r0, r5, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r9, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	sbcs	r11, r1, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r11, [sp, #20]          @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	sbc	r0, r2, r1
-	ldr	r1, [r3, #40]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldm	r3, {r2, lr}
-	ldr	r1, [r3, #20]
-	ldr	r5, [r3, #8]
-	ldr	r10, [sp, #8]           @ 4-byte Reload
-	ldr	r4, [r3, #12]
-	ldr	r8, [r3, #24]
-	ldr	r9, [r3, #28]
-	adds	r2, r7, r2
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	adcs	r3, r6, lr
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	adcs	lr, r10, r5
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	adcs	r4, r5, r4
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	adcs	r5, r5, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r6, r1, r6
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r7, r1, r8
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r8, r1, r9
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r9, r11, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r11, r1, r0
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r1, r0, r1
-	str	r1, [sp, #32]           @ 4-byte Spill
-	asr	r1, r0, #31
-	ldr	r0, [sp]                @ 4-byte Reload
-	cmp	r1, #0
-	movge	lr, r10
-	movge	r2, r0
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	str	r2, [r12]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	movge	r3, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r12, #4]
-	str	lr, [r12, #8]
-	movge	r4, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	str	r4, [r12, #12]
-	movge	r5, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r5, [r12, #16]
-	movge	r6, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r6, [r12, #20]
-	movge	r7, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r7, [r12, #24]
-	movge	r8, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	str	r8, [r12, #28]
-	movge	r9, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r9, [r12, #32]
-	movge	r11, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r11, [r12, #36]
-	movge	r2, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	str	r2, [r12, #40]
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	movge	r0, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [r12, #44]
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	movge	r0, r1
-	str	r0, [r12, #48]
-	add	sp, sp, #72
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end203:
-	.size	mcl_fp_subNF13L, .Lfunc_end203-mcl_fp_subNF13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add13L
-	.align	2
-	.type	mcl_fpDbl_add13L,%function
-mcl_fpDbl_add13L:                       @ @mcl_fpDbl_add13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#152
-	sub	sp, sp, #152
-	ldm	r1, {r7, r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r9}
-	add	r10, r1, #32
-	adds	r4, r4, r7
-	str	r4, [sp, #84]           @ 4-byte Spill
-	ldr	r4, [r2, #96]
-	str	r4, [sp, #144]          @ 4-byte Spill
-	ldr	r4, [r2, #100]
-	str	r4, [sp, #148]          @ 4-byte Spill
-	adcs	r4, r5, r8
-	ldr	r8, [r2, #16]
-	adcs	r7, r6, lr
-	str	r4, [sp, #72]           @ 4-byte Spill
-	add	lr, r1, #16
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #112]          @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #116]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #124]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #120]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #128]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #132]          @ 4-byte Spill
-	ldr	r7, [r2, #88]
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [r2, #92]
-	str	r7, [sp, #140]          @ 4-byte Spill
-	adcs	r7, r9, r12
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #64]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #100]          @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	ldr	r2, [r1, #96]
-	str	r2, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r1, #100]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r9, r10}
-	ldr	r2, [r1, #52]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r8, r1
-	str	r7, [r0, #8]
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r1, r1, r12
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r2, lr
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	str	r2, [r0, #36]
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r2, r2, r9
-	str	r2, [r0, #44]
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r6, r2, r7
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	r6, [sp, #88]           @ 4-byte Spill
-	adcs	r5, r1, r2
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r5, [sp, #92]           @ 4-byte Spill
-	adcs	r4, r1, r2
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r4, [sp, #96]           @ 4-byte Spill
-	adcs	r7, r1, r2
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r7, [sp, #112]          @ 4-byte Spill
-	adcs	lr, r1, r2
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	lr, [sp, #100]          @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	adcs	r8, r1, r2
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r8, [sp, #116]          @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #132]          @ 4-byte Spill
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp, #136]          @ 4-byte Spill
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	r1, [sp, #140]          @ 4-byte Spill
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	str	r1, [sp, #144]          @ 4-byte Spill
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #148]          @ 4-byte Spill
-	mov	r1, #0
-	adc	r1, r1, #0
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldmib	r3, {r2, r9, r12}
-	ldr	r1, [r3, #20]
-	ldr	r11, [r3]
-	ldr	r10, [r3, #16]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	subs	r11, r6, r11
-	sbcs	r2, r5, r2
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	str	r1, [sp, #120]          @ 4-byte Spill
-	sbcs	r1, r4, r9
-	add	r9, r3, #32
-	sbcs	r12, r7, r12
-	ldm	r9, {r5, r7, r9}
-	ldr	r4, [r3, #44]
-	ldr	r3, [r3, #48]
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	sbcs	r10, lr, r10
-	str	r3, [sp, #80]           @ 4-byte Spill
-	ldr	r3, [sp, #124]          @ 4-byte Reload
-	str	r4, [sp, #76]           @ 4-byte Spill
-	sbcs	lr, r3, r6
-	ldr	r3, [sp, #104]          @ 4-byte Reload
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	sbcs	r4, r8, r3
-	ldr	r3, [sp, #128]          @ 4-byte Reload
-	sbcs	r6, r3, r6
-	ldr	r3, [sp, #132]          @ 4-byte Reload
-	sbcs	r5, r3, r5
-	ldr	r3, [sp, #136]          @ 4-byte Reload
-	sbcs	r8, r3, r7
-	ldr	r3, [sp, #140]          @ 4-byte Reload
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	sbcs	r9, r3, r9
-	ldr	r3, [sp, #144]          @ 4-byte Reload
-	sbcs	r3, r3, r7
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	str	r3, [sp, #120]          @ 4-byte Spill
-	ldr	r3, [sp, #148]          @ 4-byte Reload
-	sbcs	r3, r3, r7
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	str	r3, [sp, #104]          @ 4-byte Spill
-	ldr	r3, [sp, #108]          @ 4-byte Reload
-	sbc	r3, r3, #0
-	ands	r3, r3, #1
-	movne	r11, r7
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	str	r11, [r0, #52]
-	movne	r2, r7
-	str	r2, [r0, #56]
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	movne	r1, r2
-	cmp	r3, #0
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	str	r1, [r0, #60]
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	movne	r12, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r12, [r0, #64]
-	movne	r10, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r10, [r0, #68]
-	movne	lr, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	cmp	r3, #0
-	str	lr, [r0, #72]
-	movne	r4, r1
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r4, [r0, #76]
-	movne	r6, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r6, [r0, #80]
-	movne	r5, r1
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	cmp	r3, #0
-	str	r5, [r0, #84]
-	movne	r8, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r8, [r0, #88]
-	movne	r9, r1
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	str	r9, [r0, #92]
-	movne	r2, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	cmp	r3, #0
-	ldr	r3, [sp, #104]          @ 4-byte Reload
-	str	r2, [r0, #96]
-	movne	r3, r1
-	str	r3, [r0, #100]
-	add	sp, sp, #152
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end204:
-	.size	mcl_fpDbl_add13L, .Lfunc_end204-mcl_fpDbl_add13L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub13L
-	.align	2
-	.type	mcl_fpDbl_sub13L,%function
-mcl_fpDbl_sub13L:                       @ @mcl_fpDbl_sub13L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#152
-	sub	sp, sp, #152
-	ldr	r7, [r2, #96]
-	add	r10, r1, #32
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [r2, #100]
-	str	r7, [sp, #148]          @ 4-byte Spill
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #124]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #112]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #140]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #132]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #128]          @ 4-byte Spill
-	ldr	r7, [r2, #88]
-	str	r7, [sp, #116]          @ 4-byte Spill
-	ldr	r7, [r2, #92]
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #108]          @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #120]          @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #104]          @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #100]          @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	ldm	r2, {r9, lr}
-	ldr	r6, [r1]
-	ldr	r5, [r1, #4]
-	ldr	r12, [r2, #8]
-	ldr	r4, [r1, #8]
-	ldr	r8, [r2, #12]
-	ldr	r7, [r1, #12]
-	subs	r6, r6, r9
-	str	r6, [sp, #32]           @ 4-byte Spill
-	ldr	r6, [r2, #40]
-	str	r6, [sp, #80]           @ 4-byte Spill
-	sbcs	r6, r5, lr
-	add	lr, r1, #16
-	str	r6, [sp, #28]           @ 4-byte Spill
-	ldr	r6, [r2, #36]
-	str	r6, [sp, #48]           @ 4-byte Spill
-	sbcs	r6, r4, r12
-	sbcs	r7, r7, r8
-	str	r6, [sp, #20]           @ 4-byte Spill
-	ldr	r6, [r2, #32]
-	ldr	r8, [r2, #16]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r6, [sp, #40]           @ 4-byte Spill
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	ldr	r2, [r1, #96]
-	str	r2, [sp, #84]           @ 4-byte Spill
-	ldr	r2, [r1, #100]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #88]           @ 4-byte Spill
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #76]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r9, r10}
-	ldr	r2, [r1, #52]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #32]          @ 4-byte Reload
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	sbcs	r1, r1, r8
-	str	r7, [r0, #8]
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	sbcs	r2, r2, r7
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	sbcs	r1, r12, r1
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, lr, r2
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	sbcs	r1, r4, r1
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	sbcs	r2, r5, r2
-	str	r2, [r0, #36]
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	sbcs	r1, r6, r1
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	sbcs	r2, r9, r2
-	str	r2, [r0, #44]
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	sbcs	r1, r10, r1
-	add	r10, r3, #16
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	sbcs	r9, r7, r2
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	sbcs	r11, r2, r1
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	str	r1, [sp, #120]          @ 4-byte Spill
-	mov	r1, #0
-	sbcs	r6, r7, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r6, [sp, #92]           @ 4-byte Spill
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	str	r2, [sp, #124]          @ 4-byte Spill
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	sbcs	r8, r7, r2
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	str	r8, [sp, #96]           @ 4-byte Spill
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	str	r2, [sp, #140]          @ 4-byte Spill
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	str	r2, [sp, #132]          @ 4-byte Spill
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	str	r2, [sp, #128]          @ 4-byte Spill
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r2, [sp, #116]          @ 4-byte Spill
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	str	r2, [sp, #136]          @ 4-byte Spill
-	ldr	r2, [sp, #144]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	str	r2, [sp, #144]          @ 4-byte Spill
-	ldr	r2, [sp, #148]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	mov	r7, r9
-	mov	r9, r11
-	sbc	r1, r1, #0
-	str	r2, [sp, #148]          @ 4-byte Spill
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [r3, #32]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldm	r3, {r1, r2, r12, lr}
-	ldm	r10, {r3, r4, r5, r10}
-	ldr	r11, [sp, #120]         @ 4-byte Reload
-	adds	r1, r7, r1
-	adcs	r2, r9, r2
-	adcs	r12, r11, r12
-	ldr	r11, [sp, #112]         @ 4-byte Reload
-	adcs	lr, r6, lr
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	adcs	r3, r6, r3
-	ldr	r6, [sp, #140]          @ 4-byte Reload
-	adcs	r4, r8, r4
-	adcs	r8, r6, r5
-	ldr	r5, [sp, #132]          @ 4-byte Reload
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	adcs	r10, r5, r10
-	ldr	r5, [sp, #128]          @ 4-byte Reload
-	adcs	r5, r5, r6
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	str	r5, [sp, #84]           @ 4-byte Spill
-	ldr	r5, [sp, #116]          @ 4-byte Reload
-	adcs	r5, r5, r6
-	ldr	r6, [sp, #104]          @ 4-byte Reload
-	str	r5, [sp, #88]           @ 4-byte Spill
-	ldr	r5, [sp, #136]          @ 4-byte Reload
-	adcs	r5, r5, r6
-	ldr	r6, [sp, #108]          @ 4-byte Reload
-	str	r5, [sp, #104]          @ 4-byte Spill
-	ldr	r5, [sp, #144]          @ 4-byte Reload
-	adcs	r5, r5, r6
-	str	r5, [sp, #108]          @ 4-byte Spill
-	ldr	r5, [sp, #148]          @ 4-byte Reload
-	adc	r5, r5, r11
-	str	r5, [sp, #112]          @ 4-byte Spill
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	ands	r5, r5, #1
-	moveq	r1, r7
-	moveq	r2, r9
-	str	r1, [r0, #52]
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r2, [r0, #56]
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	moveq	r12, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	cmp	r5, #0
-	str	r12, [r0, #60]
-	moveq	lr, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	lr, [r0, #64]
-	moveq	r3, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r3, [r0, #68]
-	ldr	r3, [sp, #112]          @ 4-byte Reload
-	moveq	r4, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	cmp	r5, #0
-	str	r4, [r0, #72]
-	moveq	r8, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r8, [r0, #76]
-	moveq	r10, r1
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r10, [r0, #80]
-	moveq	r2, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	cmp	r5, #0
-	str	r2, [r0, #84]
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	str	r2, [r0, #88]
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	str	r2, [r0, #92]
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	cmp	r5, #0
-	str	r2, [r0, #96]
-	moveq	r3, r1
-	str	r3, [r0, #100]
-	add	sp, sp, #152
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end205:
-	.size	mcl_fpDbl_sub13L, .Lfunc_end205-mcl_fpDbl_sub13L
-	.cantunwind
-	.fnend
-
-	.align	2
-	.type	.LmulPv448x32,%function
-.LmulPv448x32:                          @ @mulPv448x32
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r9, [r1, #12]
-	umull	r4, r8, lr, r2
-	umull	lr, r6, r12, r2
-	mov	r5, r4
-	mov	r7, r6
-	str	lr, [r0]
-	umull	lr, r12, r9, r2
-	umlal	r7, r5, r3, r2
-	str	r5, [r0, #8]
-	str	r7, [r0, #4]
-	umull	r5, r7, r3, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r3, r8, lr
-	str	r3, [r0, #12]
-	ldr	r3, [r1, #16]
-	umull	r7, r6, r3, r2
-	adcs	r3, r12, r7
-	str	r3, [r0, #16]
-	ldr	r3, [r1, #20]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #20]
-	ldr	r3, [r1, #24]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #24]
-	ldr	r3, [r1, #28]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #28]
-	ldr	r3, [r1, #32]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #32]
-	ldr	r3, [r1, #36]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #36]
-	ldr	r3, [r1, #40]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #40]
-	ldr	r3, [r1, #44]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #44]
-	ldr	r3, [r1, #48]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #48]
-	ldr	r1, [r1, #52]
-	umull	r3, r7, r1, r2
-	adcs	r1, r6, r3
-	str	r1, [r0, #52]
-	adc	r1, r7, #0
-	str	r1, [r0, #56]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end206:
-	.size	.LmulPv448x32, .Lfunc_end206-.LmulPv448x32
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre14L
-	.align	2
-	.type	mcl_fp_mulUnitPre14L,%function
-mcl_fp_mulUnitPre14L:                   @ @mcl_fp_mulUnitPre14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#76
-	sub	sp, sp, #76
-	mov	r4, r0
-	add	r0, sp, #8
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #64]
-	add	lr, sp, #8
-	ldr	r8, [sp, #56]
-	ldr	r9, [sp, #52]
-	ldr	r10, [sp, #48]
-	ldr	r11, [sp, #44]
-	ldr	r5, [sp, #40]
-	ldr	r6, [sp, #36]
-	ldr	r7, [sp, #32]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #60]
-	str	r0, [sp]                @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	stm	r4, {r0, r1, r2, r3, r12, lr}
-	str	r7, [r4, #24]
-	str	r6, [r4, #28]
-	str	r5, [r4, #32]
-	str	r11, [r4, #36]
-	str	r10, [r4, #40]
-	str	r9, [r4, #44]
-	str	r8, [r4, #48]
-	ldr	r0, [sp]                @ 4-byte Reload
-	str	r0, [r4, #52]
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	str	r0, [r4, #56]
-	add	sp, sp, #76
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end207:
-	.size	mcl_fp_mulUnitPre14L, .Lfunc_end207-mcl_fp_mulUnitPre14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre14L
-	.align	2
-	.type	mcl_fpDbl_mulPre14L,%function
-mcl_fpDbl_mulPre14L:                    @ @mcl_fpDbl_mulPre14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#228
-	sub	sp, sp, #228
-	mov	r6, r2
-	mov	r5, r1
-	mov	r4, r0
-	bl	mcl_fpDbl_mulPre7L(PLT)
-	add	r0, r4, #56
-	add	r1, r5, #28
-	add	r2, r6, #28
-	bl	mcl_fpDbl_mulPre7L(PLT)
-	ldr	r0, [r6, #32]
-	add	r11, r6, #36
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [r6, #52]
-	ldr	r12, [r6]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldmib	r6, {r1, r2, r3, r7}
-	ldr	r0, [r6, #28]
-	ldr	lr, [r6, #24]
-	ldr	r6, [r6, #20]
-	adds	r0, r12, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	adcs	r0, r2, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	adcs	r0, r3, r9
-	str	r0, [sp, #96]           @ 4-byte Spill
-	adcs	r0, r7, r10
-	str	r0, [sp, #92]           @ 4-byte Spill
-	adcs	r0, r6, r11
-	add	r11, r5, #32
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	add	lr, r5, #12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	mov	r0, #0
-	ldm	r11, {r8, r10, r11}
-	ldr	r7, [r5]
-	ldr	r3, [r5, #4]
-	ldr	r2, [r5, #8]
-	adc	r6, r0, #0
-	ldr	r0, [r5, #44]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r5, #48]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r5, #52]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r5, #28]
-	ldm	lr, {r1, r9, r12, lr}
-	adds	r0, r7, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	str	r0, [sp, #144]
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r7, r3, r8
-	adcs	r10, r2, r10
-	add	r2, sp, #116
-	str	r7, [sp, #148]
-	adcs	r11, r1, r11
-	add	r1, sp, #144
-	str	r10, [sp, #152]
-	str	r11, [sp, #156]
-	adcs	r5, r9, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r5, [sp, #160]
-	adcs	r9, r12, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r9, [sp, #164]
-	adcs	r8, lr, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	str	r8, [sp, #168]
-	str	r0, [sp, #116]
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	str	r0, [sp, #120]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #124]
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #128]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #132]
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #136]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #140]
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	add	r0, sp, #172
-	bl	mcl_fpDbl_mulPre7L(PLT)
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	cmp	r6, #0
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	moveq	r8, r6
-	moveq	r9, r6
-	moveq	r5, r6
-	moveq	r11, r6
-	moveq	r10, r6
-	cmp	r6, #0
-	moveq	r2, r6
-	moveq	r7, r6
-	str	r2, [sp, #112]          @ 4-byte Spill
-	str	r7, [sp, #76]           @ 4-byte Spill
-	adds	r3, r2, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	adcs	lr, r10, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r11, r1
-	adcs	r2, r5, r2
-	adcs	r12, r9, r7
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adcs	r7, r8, r7
-	str	r7, [sp, #104]          @ 4-byte Spill
-	mov	r7, #0
-	adc	r7, r7, #0
-	str	r7, [sp, #108]          @ 4-byte Spill
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	cmp	r7, #0
-	moveq	r2, r5
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	moveq	r1, r11
-	moveq	lr, r10
-	ldr	r11, [sp, #104]         @ 4-byte Reload
-	moveq	r0, r5
-	ldr	r5, [sp, #112]          @ 4-byte Reload
-	moveq	r3, r5
-	cmp	r7, #0
-	ldr	r5, [sp, #108]          @ 4-byte Reload
-	moveq	r5, r7
-	and	r7, r6, r7
-	ldr	r6, [sp, #200]
-	moveq	r12, r9
-	moveq	r11, r8
-	adds	r10, r3, r6
-	ldr	r3, [sp, #204]
-	adcs	r8, r0, r3
-	ldr	r0, [sp, #208]
-	add	r3, sp, #172
-	adcs	r9, lr, r0
-	ldr	r0, [sp, #212]
-	ldr	lr, [r4]
-	adcs	r0, r1, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #216]
-	adcs	r0, r2, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #220]
-	adcs	r0, r12, r0
-	ldr	r12, [r4, #4]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #224]
-	adcs	r0, r11, r0
-	ldr	r11, [r4, #12]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	adc	r0, r5, r7
-	ldr	r5, [r4, #8]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldm	r3, {r0, r1, r2, r3}
-	subs	lr, r0, lr
-	sbcs	r12, r1, r12
-	ldr	r1, [sp, #188]
-	sbcs	r5, r2, r5
-	ldr	r2, [r4, #36]
-	sbcs	r0, r3, r11
-	ldr	r3, [sp, #104]          @ 4-byte Reload
-	ldr	r11, [r4, #60]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r4, #16]
-	str	r2, [sp, #112]          @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #192]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r4, #20]
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #196]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r4, #24]
-	sbcs	r6, r1, r0
-	ldr	r0, [r4, #28]
-	sbcs	r7, r10, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r4, #32]
-	ldr	r10, [r4, #56]
-	sbcs	r8, r8, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	sbcs	r9, r9, r2
-	ldr	r2, [r4, #40]
-	sbcs	r0, r3, r2
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r4, #44]
-	ldr	r3, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	sbcs	r0, r3, r2
-	str	r2, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r4, #48]
-	ldr	r3, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	sbcs	r0, r3, r2
-	str	r2, [sp, #100]          @ 4-byte Spill
-	ldr	r2, [r4, #52]
-	ldr	r3, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	sbcs	r0, r3, r2
-	str	r2, [sp, #96]           @ 4-byte Spill
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	ldr	r3, [r4, #68]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	sbc	r0, r2, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	subs	r0, lr, r10
-	ldr	lr, [r4, #76]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	sbcs	r0, r12, r11
-	ldr	r12, [r4, #72]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r4, #64]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	sbcs	r0, r5, r0
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	sbcs	r0, r5, r3
-	ldr	r5, [r4, #80]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	sbcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	sbcs	r0, r6, r5
-	ldr	r6, [r4, #84]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	sbcs	r0, r7, r6
-	str	r6, [sp, #92]           @ 4-byte Spill
-	ldr	r6, [r4, #88]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	sbcs	r0, r8, r6
-	str	r6, [sp, #88]           @ 4-byte Spill
-	ldr	r6, [r4, #92]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	sbcs	r0, r9, r6
-	add	r9, r4, #96
-	str	r6, [sp, #84]           @ 4-byte Spill
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r8, r9}
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	sbcs	r0, r0, r6
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	sbcs	r0, r0, r7
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	sbcs	r0, r0, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	sbcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adds	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [r4, #28]
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [r4, #32]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r0, [r4, #36]
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	str	r1, [r4, #40]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	r0, [r4, #44]
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [r4, #48]
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r0, [r4, #52]
-	adcs	r1, r10, r1
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	str	r1, [r4, #56]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [r4, #60]
-	adcs	r1, r1, r2
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	str	r1, [r4, #64]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r3, r0
-	adcs	r1, r12, r1
-	str	r0, [r4, #68]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	add	r12, r4, #92
-	str	r1, [r4, #72]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	adcs	r1, r5, r1
-	str	r0, [r4, #76]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r1, [r4, #80]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [r4, #84]
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [r4, #88]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	adcs	r1, r6, #0
-	adcs	r2, r7, #0
-	adcs	r3, r8, #0
-	adc	r7, r9, #0
-	stm	r12, {r0, r1, r2, r3, r7}
-	add	sp, sp, #228
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end208:
-	.size	mcl_fpDbl_mulPre14L, .Lfunc_end208-mcl_fpDbl_mulPre14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre14L
-	.align	2
-	.type	mcl_fpDbl_sqrPre14L,%function
-mcl_fpDbl_sqrPre14L:                    @ @mcl_fpDbl_sqrPre14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#220
-	sub	sp, sp, #220
-	mov	r5, r1
-	mov	r4, r0
-	mov	r2, r5
-	bl	mcl_fpDbl_mulPre7L(PLT)
-	add	r1, r5, #28
-	add	r0, r4, #56
-	mov	r2, r1
-	bl	mcl_fpDbl_mulPre7L(PLT)
-	ldr	r0, [r5, #44]
-	ldr	r11, [r5, #32]
-	ldr	r10, [r5, #36]
-	ldr	r8, [r5, #40]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r5, #48]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r5, #52]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldm	r5, {r6, r7}
-	ldr	r0, [r5, #28]
-	ldr	r3, [r5, #8]
-	ldr	r2, [r5, #12]
-	ldr	r12, [r5, #16]
-	ldr	lr, [r5, #24]
-	ldr	r1, [r5, #20]
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	adds	r9, r6, r0
-	adcs	r0, r7, r11
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	str	r9, [sp, #136]
-	str	r9, [sp, #108]
-	adcs	r3, r3, r10
-	str	r0, [sp, #140]
-	str	r0, [sp, #112]
-	adcs	r2, r2, r8
-	str	r3, [sp, #144]
-	str	r3, [sp, #116]
-	adcs	r6, r12, r5
-	str	r2, [sp, #148]
-	str	r2, [sp, #120]
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	str	r6, [sp, #152]
-	str	r6, [sp, #124]
-	lsr	r5, r1, #31
-	str	r1, [sp, #156]
-	str	r1, [sp, #128]
-	adcs	r8, lr, r7
-	orr	r5, r5, r8, lsl #1
-	str	r8, [sp, #160]
-	str	r8, [sp, #132]
-	str	r5, [sp, #104]          @ 4-byte Spill
-	lsr	r5, r6, #31
-	orr	r1, r5, r1, lsl #1
-	str	r1, [sp, #100]          @ 4-byte Spill
-	lsr	r1, r2, #31
-	orr	r1, r1, r6, lsl #1
-	str	r1, [sp, #96]           @ 4-byte Spill
-	lsr	r1, r3, #31
-	orr	r1, r1, r2, lsl #1
-	add	r2, sp, #108
-	str	r1, [sp, #92]           @ 4-byte Spill
-	lsr	r1, r0, #31
-	orr	r1, r1, r3, lsl #1
-	str	r1, [sp, #84]           @ 4-byte Spill
-	lsr	r1, r9, #31
-	orr	r0, r1, r0, lsl #1
-	add	r1, sp, #136
-	str	r0, [sp, #76]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r6, r0, #0
-	add	r0, sp, #164
-	bl	mcl_fpDbl_mulPre7L(PLT)
-	add	lr, sp, #204
-	add	r7, sp, #192
-	ldm	lr, {r5, r10, r11, lr}
-	ldm	r7, {r0, r1, r7}
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	ldr	r3, [sp, #104]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	adds	r0, r0, r9, lsl #1
-	mov	r9, r1
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r12, r7, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r5, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	adcs	r2, r11, r2
-	adcs	r3, lr, r3
-	adc	r8, r6, r8, lsr #31
-	cmp	r6, #0
-	moveq	r0, r10
-	moveq	r1, r5
-	moveq	r3, lr
-	moveq	r2, r11
-	moveq	r12, r7
-	cmp	r6, #0
-	ldr	lr, [r4]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	moveq	r8, r6
-	str	r2, [sp, #100]          @ 4-byte Spill
-	mov	r5, r3
-	ldr	r3, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r1, [sp, #96]           @ 4-byte Spill
-	mov	r7, r8
-	add	r8, sp, #164
-	moveq	r3, r9
-	ldmib	r4, {r9, r10, r11}
-	moveq	r2, r0
-	ldm	r8, {r0, r1, r8}
-	ldr	r6, [sp, #176]
-	subs	lr, r0, lr
-	sbcs	r0, r1, r9
-	ldr	r1, [sp, #180]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	sbcs	r0, r8, r10
-	ldr	r10, [r4, #56]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	sbcs	r0, r6, r11
-	ldr	r11, [r4, #60]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r4, #16]
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #184]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r4, #20]
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #188]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r4, #24]
-	sbcs	r6, r1, r0
-	ldr	r1, [r4, #28]
-	ldr	r0, [r4, #32]
-	sbcs	r9, r2, r1
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	sbcs	r8, r3, r0
-	ldr	r0, [r4, #36]
-	ldr	r3, [r4, #68]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	sbcs	r0, r12, r0
-	ldr	r12, [r4, #72]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r4, #40]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	sbcs	r0, r2, r0
-	ldr	r2, [r4, #44]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	str	r2, [sp, #96]           @ 4-byte Spill
-	sbcs	r0, r0, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r4, #48]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	sbcs	r0, r2, r0
-	ldr	r2, [r4, #64]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r4, #52]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #100]          @ 4-byte Spill
-	sbcs	r0, r5, r0
-	ldr	r5, [r4, #80]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	sbc	r0, r7, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	subs	r0, lr, r10
-	ldr	lr, [r4, #76]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	sbcs	r0, r0, r11
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	sbcs	r0, r0, r2
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	sbcs	r0, r0, r3
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	sbcs	r0, r0, r12
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	sbcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	sbcs	r0, r6, r5
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r4, #84]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	sbcs	r0, r9, r0
-	add	r9, r4, #96
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r4, #88]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	sbcs	r0, r8, r0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [r4, #92]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	sbcs	r0, r6, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r8, r9}
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	sbcs	r0, r0, r6
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r0, r7
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	sbcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	sbcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adds	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [r4, #28]
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [r4, #32]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [r4, #36]
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [r4, #40]
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r0, [r4, #44]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [r4, #48]
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r0, [r4, #52]
-	adcs	r1, r10, r1
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	str	r1, [r4, #56]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [r4, #60]
-	adcs	r1, r1, r2
-	ldr	r0, [sp]                @ 4-byte Reload
-	str	r1, [r4, #64]
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r0, r3, r0
-	adcs	r1, r12, r1
-	str	r0, [r4, #68]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	add	r12, r4, #92
-	str	r1, [r4, #72]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	adcs	r1, r5, r1
-	str	r0, [r4, #76]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r1, [r4, #80]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [r4, #84]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [r4, #88]
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	adcs	r1, r6, #0
-	adcs	r2, r7, #0
-	adcs	r3, r8, #0
-	adc	r7, r9, #0
-	stm	r12, {r0, r1, r2, r3, r7}
-	add	sp, sp, #220
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end209:
-	.size	mcl_fpDbl_sqrPre14L, .Lfunc_end209-mcl_fpDbl_sqrPre14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont14L
-	.align	2
-	.type	mcl_fp_mont14L,%function
-mcl_fp_mont14L:                         @ @mcl_fp_mont14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#892
-	sub	sp, sp, #892
-	.pad	#1024
-	sub	sp, sp, #1024
-	add	r12, sp, #108
-	add	r7, sp, #1024
-	mov	r4, r3
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #72]           @ 4-byte Spill
-	add	r0, r7, #824
-	ldr	r6, [r3, #-4]
-	ldr	r2, [r2]
-	str	r6, [sp, #104]          @ 4-byte Spill
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1852]
-	ldr	r5, [sp, #1848]
-	add	r8, sp, #1024
-	mov	r1, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #1856]
-	mul	r2, r5, r6
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #1860]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #1904]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #1900]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #1896]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #1892]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #1888]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #1884]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1880]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1876]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1872]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1868]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1864]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, r8, #760
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1840]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r10, [sp, #1808]
-	ldr	r11, [sp, #1804]
-	ldr	r7, [sp, #1800]
-	ldr	r9, [sp, #1784]
-	ldr	r4, [sp, #1788]
-	ldr	r6, [sp, #1792]
-	ldr	r8, [sp, #1796]
-	add	lr, sp, #1024
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1836]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1832]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1828]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1824]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1820]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1816]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1812]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, lr, #696
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r9, r5
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	ldr	r3, [sp, #1736]
-	ldr	r12, [sp, #1740]
-	ldr	lr, [sp, #1744]
-	ldr	r5, [sp, #1752]
-	ldr	r9, [sp, #1760]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #1748]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #1720]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #1756]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r11, r1
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, #0
-	ldr	r11, [sp, #80]          @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r10, r1
-	ldr	r10, [sp, #1764]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #1732]
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #1728]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1776]
-	str	r6, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1772]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1768]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #1724]
-	adcs	r0, r7, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r6, sp, #1024
-	add	r0, r6, #632
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1712]
-	add	r11, sp, #1664
-	ldr	r8, [sp, #1684]
-	ldr	r9, [sp, #1680]
-	ldr	r10, [sp, #1676]
-	ldr	r4, [sp, #1656]
-	ldr	r7, [sp, #1660]
-	add	lr, sp, #1024
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1708]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1704]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1700]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1696]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1692]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1688]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r6, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, lr, #568
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r2, [sp, #1604]
-	ldr	r3, [sp, #1608]
-	ldr	r12, [sp, #1612]
-	ldr	lr, [sp, #1616]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1620]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1624]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1592]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1636]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1632]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1628]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1600]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1648]
-	str	r6, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1644]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1640]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1596]
-	adcs	r0, r7, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r6, sp, #1024
-	add	r0, r6, #504
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1584]
-	add	r11, sp, #1536
-	ldr	r8, [sp, #1556]
-	ldr	r9, [sp, #1552]
-	ldr	r10, [sp, #1548]
-	ldr	r4, [sp, #1528]
-	ldr	r7, [sp, #1532]
-	add	lr, sp, #1024
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1580]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1576]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1572]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1568]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1564]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1560]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r6, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, lr, #440
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r2, [sp, #1476]
-	ldr	r3, [sp, #1480]
-	ldr	r12, [sp, #1484]
-	ldr	lr, [sp, #1488]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1492]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1496]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1464]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1508]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1504]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1500]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1472]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1520]
-	str	r6, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1516]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1512]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1468]
-	adcs	r0, r7, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r6, sp, #1024
-	add	r0, r6, #376
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1456]
-	add	r11, sp, #1408
-	ldr	r8, [sp, #1428]
-	ldr	r9, [sp, #1424]
-	ldr	r10, [sp, #1420]
-	ldr	r4, [sp, #1400]
-	ldr	r7, [sp, #1404]
-	add	lr, sp, #1024
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1452]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1448]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1444]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1440]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1436]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1432]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r6, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, lr, #312
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r2, [sp, #1348]
-	ldr	r3, [sp, #1352]
-	ldr	r12, [sp, #1356]
-	ldr	lr, [sp, #1360]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1364]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1368]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1336]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1380]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1376]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1372]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1344]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1392]
-	str	r6, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1388]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1384]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	adcs	r0, r7, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r6, sp, #1024
-	add	r0, r6, #248
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1328]
-	add	r11, sp, #1280
-	ldr	r8, [sp, #1300]
-	ldr	r9, [sp, #1296]
-	ldr	r10, [sp, #1292]
-	ldr	r4, [sp, #1272]
-	ldr	r7, [sp, #1276]
-	add	lr, sp, #1024
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1324]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1320]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1316]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1312]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1308]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1304]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r6, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, lr, #184
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r2, [sp, #1220]
-	ldr	r3, [sp, #1224]
-	ldr	r12, [sp, #1228]
-	ldr	lr, [sp, #1232]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1236]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1240]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1208]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1252]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1248]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1244]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1216]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1264]
-	str	r6, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1260]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1256]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1212]
-	adcs	r0, r7, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r6, sp, #1024
-	add	r0, r6, #120
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1200]
-	add	r11, sp, #1152
-	ldr	r8, [sp, #1172]
-	ldr	r9, [sp, #1168]
-	ldr	r10, [sp, #1164]
-	ldr	r4, [sp, #1144]
-	ldr	r7, [sp, #1148]
-	add	lr, sp, #1024
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1196]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1192]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1188]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1184]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1180]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1176]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r6, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, lr, #56
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	ldr	r2, [sp, #1092]
-	ldr	r3, [sp, #1096]
-	ldr	r12, [sp, #1100]
-	ldr	lr, [sp, #1104]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1108]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1112]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1080]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1124]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1120]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1116]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1088]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1136]
-	str	r6, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1132]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1128]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1084]
-	adcs	r0, r7, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #1016
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1072]
-	add	r11, sp, #1024
-	ldr	r8, [sp, #1044]
-	ldr	r9, [sp, #1040]
-	ldr	r10, [sp, #1036]
-	ldr	r4, [sp, #1016]
-	ldr	r7, [sp, #1020]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1068]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1064]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1060]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r6, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #952
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #956
-	adds	r0, r0, r4
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #980
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1008]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1004]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1000]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #952]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #888
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #944]
-	add	r11, sp, #896
-	ldr	r8, [sp, #916]
-	ldr	r9, [sp, #912]
-	ldr	r10, [sp, #908]
-	ldr	r4, [sp, #888]
-	ldr	r7, [sp, #892]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #940]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #936]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #932]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #928]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #924]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #920]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r6, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #824
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #828
-	adds	r0, r0, r4
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #852
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #880]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #876]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #872]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #824]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #760
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #816]
-	add	r11, sp, #768
-	ldr	r8, [sp, #788]
-	ldr	r9, [sp, #784]
-	ldr	r10, [sp, #780]
-	ldr	r4, [sp, #760]
-	ldr	r7, [sp, #764]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #812]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #808]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #804]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #800]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r5, r6, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #696
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #700
-	adds	r0, r0, r4
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #724
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #752]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #748]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #744]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #696]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #104]          @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	mul	r2, r6, r5
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #632
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #688]
-	add	r11, sp, #632
-	ldr	r6, [sp, #656]
-	ldr	r4, [sp, #652]
-	ldr	r7, [sp, #648]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #684]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #680]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #676]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #672]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #668]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #664]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #660]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, sp, #568
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	add	lr, sp, #584
-	adds	r0, r0, r8
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r2, r0, r9
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #608
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #568
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r11, {r4, r6, r7, r11}
-	adds	r0, r2, r4
-	mul	r1, r0, r5
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #624]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	adcs	r6, r5, r6
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	str	r6, [sp, #96]           @ 4-byte Spill
-	adcs	r6, r5, r7
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	str	r6, [sp, #92]           @ 4-byte Spill
-	adcs	r6, r5, r11
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	str	r6, [sp, #88]           @ 4-byte Spill
-	adcs	r0, r5, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #504
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #560]
-	add	r10, sp, #504
-	ldr	r11, [sp, #532]
-	ldr	r4, [sp, #528]
-	ldr	r6, [sp, #524]
-	ldr	r7, [sp, #520]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #556]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #552]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #548]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #544]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #540]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #536]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #440
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #456
-	adds	r0, r0, r5
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	adcs	r1, r1, r9
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #480
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	add	r7, sp, #440
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldm	r7, {r4, r6, r7}
-	ldr	r5, [sp, #452]
-	adds	r1, r0, r4
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	mul	r2, r1, r0
-	ldr	r0, [sp, #496]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #96]          @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #48]           @ 4-byte Spill
-	ldr	r6, [sp, #92]           @ 4-byte Reload
-	adcs	r6, r6, r7
-	str	r6, [sp, #44]           @ 4-byte Spill
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	adcs	r5, r6, r5
-	str	r5, [sp, #40]           @ 4-byte Spill
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #376
-	bl	.LmulPv448x32(PLT)
-	ldr	r1, [sp, #432]
-	ldr	r8, [sp, #404]
-	ldr	r9, [sp, #400]
-	ldr	r10, [sp, #396]
-	ldr	r11, [sp, #392]
-	ldr	r6, [sp, #376]
-	ldr	r5, [sp, #380]
-	ldr	r7, [sp, #384]
-	ldr	r4, [sp, #388]
-	add	r0, sp, #312
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #428]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #424]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #420]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #416]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #412]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #408]
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	ldr	r2, [r1, #48]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #316
-	adds	r0, r0, r6
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #340
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #368]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #364]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r7, r8, r9, r10}
-	ldr	r5, [sp, #312]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	adds	r11, r11, r5
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #248
-	bl	.LmulPv448x32(PLT)
-	ldr	r1, [sp, #304]
-	ldr	r10, [sp, #272]
-	ldr	r11, [sp, #268]
-	ldr	r8, [sp, #264]
-	ldr	r6, [sp, #248]
-	ldr	r7, [sp, #252]
-	ldr	r4, [sp, #256]
-	ldr	r9, [sp, #260]
-	add	r0, sp, #184
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #300]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #296]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #292]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #288]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #284]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #280]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #276]
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	ldr	r2, [r1, #52]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r6
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #200
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r3, r0, r7
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	add	r8, sp, #184
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #224
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldm	r8, {r4, r7, r8}
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r5, [sp, #196]
-	adds	r4, r3, r4
-	mul	r1, r4, r0
-	ldr	r0, [sp, #240]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #236]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r6, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #112]         @ 4-byte Reload
-	adcs	r11, r11, r7
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	adcs	r8, r7, r8
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adcs	r5, r7, r5
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r9, r0, r9
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r7, r0, #0
-	add	r0, sp, #120
-	bl	.LmulPv448x32(PLT)
-	add	r3, sp, #120
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r4, r0
-	adcs	r4, r11, r1
-	ldr	r0, [sp, #136]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r6, r8, r2
-	str	r4, [sp, #36]           @ 4-byte Spill
-	adcs	r12, r5, r3
-	str	r6, [sp, #48]           @ 4-byte Spill
-	str	r12, [sp, #56]          @ 4-byte Spill
-	adcs	r8, r1, r0
-	ldr	r0, [sp, #140]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r8, [sp, #64]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #144]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #148]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #152]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #156]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #160]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #164]
-	adcs	r0, r9, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #168]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #172]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #176]
-	adcs	r0, r1, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	adc	r0, r7, #0
-	mov	r7, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldmib	r7, {r1, r2, r3, r10, r11, lr}
-	ldr	r5, [r7]
-	ldr	r0, [r7, #28]
-	ldr	r9, [r7, #44]
-	subs	r5, r4, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r7, #40]
-	sbcs	r6, r6, r1
-	ldr	r1, [r7, #32]
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	sbcs	r2, r12, r2
-	sbcs	r12, r8, r3
-	ldr	r3, [r7, #48]
-	ldr	r8, [r7, #36]
-	str	r3, [sp, #52]           @ 4-byte Spill
-	ldr	r3, [r7, #52]
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	str	r3, [sp, #116]          @ 4-byte Spill
-	ldr	r3, [sp, #80]           @ 4-byte Reload
-	sbcs	r10, r3, r10
-	ldr	r3, [sp, #76]           @ 4-byte Reload
-	sbcs	r3, r3, r11
-	sbcs	lr, r7, lr
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	sbcs	r4, r7, r4
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	sbcs	r7, r7, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	sbcs	r8, r1, r8
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	sbcs	r11, r1, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	sbcs	r9, r0, r9
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	ands	r1, r0, #1
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	movne	r5, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r5, [r0]
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	movne	r6, r5
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	str	r6, [r0, #4]
-	movne	r2, r5
-	cmp	r1, #0
-	str	r2, [r0, #8]
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	movne	r12, r2
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r12, [r0, #12]
-	movne	r10, r2
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	str	r10, [r0, #16]
-	movne	r3, r2
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #20]
-	movne	lr, r2
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	str	lr, [r0, #24]
-	movne	r4, r2
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	str	r4, [r0, #28]
-	movne	r7, r2
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r7, [r0, #32]
-	movne	r8, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	str	r8, [r0, #36]
-	movne	r11, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	r11, [r0, #40]
-	movne	r9, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r9, [r0, #44]
-	movne	r2, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r2, [r0, #48]
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	movne	r2, r1
-	str	r2, [r0, #52]
-	add	sp, sp, #892
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end210:
-	.size	mcl_fp_mont14L, .Lfunc_end210-mcl_fp_mont14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF14L
-	.align	2
-	.type	mcl_fp_montNF14L,%function
-mcl_fp_montNF14L:                       @ @mcl_fp_montNF14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#892
-	sub	sp, sp, #892
-	.pad	#1024
-	sub	sp, sp, #1024
-	add	r12, sp, #108
-	add	r6, sp, #1024
-	mov	r4, r3
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #76]           @ 4-byte Spill
-	add	r0, r6, #824
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	str	r5, [sp, #104]          @ 4-byte Spill
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1852]
-	ldr	r8, [sp, #1848]
-	add	r10, sp, #1024
-	mov	r1, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #1856]
-	mul	r2, r8, r5
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #1860]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #1904]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #1900]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #1896]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #1892]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #1888]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #1884]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1880]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1876]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1872]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1868]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1864]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, r10, #760
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1840]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r11, [sp, #1808]
-	ldr	r6, [sp, #1804]
-	ldr	r7, [sp, #1800]
-	ldr	r5, [sp, #1784]
-	ldr	r9, [sp, #1788]
-	ldr	r10, [sp, #1792]
-	ldr	r4, [sp, #1796]
-	add	lr, sp, #1024
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1836]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1832]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1828]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1824]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1820]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1816]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1812]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, lr, #696
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r8
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r5, [sp, #1720]
-	ldr	r2, [sp, #1732]
-	ldr	r3, [sp, #1736]
-	ldr	r12, [sp, #1740]
-	ldr	lr, [sp, #1744]
-	ldr	r8, [sp, #1760]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	ldr	r9, [sp, #1764]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	ldr	r10, [sp, #1768]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #1748]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #1756]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #1752]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	ldr	r11, [sp, #80]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adc	r0, r1, r0
-	adds	r11, r11, r5
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #1728]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1776]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1772]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1724]
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	add	r8, sp, #1024
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, r8, #632
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1712]
-	add	r11, sp, #1664
-	ldr	r9, [sp, #1680]
-	ldr	r10, [sp, #1676]
-	ldr	r6, [sp, #1656]
-	ldr	r7, [sp, #1660]
-	add	lr, sp, #1024
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1708]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1704]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1700]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1696]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1692]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1688]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1684]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, lr, #568
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r6
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r5, [sp, #1592]
-	ldr	r2, [sp, #1604]
-	ldr	r3, [sp, #1608]
-	ldr	r12, [sp, #1612]
-	ldr	lr, [sp, #1616]
-	ldr	r6, [sp, #1624]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1628]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1620]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1632]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1640]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1636]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r11, r11, r5
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #1600]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1648]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1644]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1596]
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	add	r8, sp, #1024
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, r8, #504
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1584]
-	add	r11, sp, #1536
-	ldr	r9, [sp, #1552]
-	ldr	r10, [sp, #1548]
-	ldr	r6, [sp, #1528]
-	ldr	r7, [sp, #1532]
-	add	lr, sp, #1024
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1580]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1576]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1572]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1568]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1564]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1560]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1556]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, lr, #440
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r6
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r5, [sp, #1464]
-	ldr	r2, [sp, #1476]
-	ldr	r3, [sp, #1480]
-	ldr	r12, [sp, #1484]
-	ldr	lr, [sp, #1488]
-	ldr	r6, [sp, #1496]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1500]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1492]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1504]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1512]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1508]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r11, r11, r5
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #1472]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1520]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1516]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1468]
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	add	r8, sp, #1024
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, r8, #376
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1456]
-	add	r11, sp, #1408
-	ldr	r9, [sp, #1424]
-	ldr	r10, [sp, #1420]
-	ldr	r6, [sp, #1400]
-	ldr	r7, [sp, #1404]
-	add	lr, sp, #1024
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1452]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1448]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1444]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1440]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1436]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1432]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1428]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, lr, #312
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r6
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r5, [sp, #1336]
-	ldr	r2, [sp, #1348]
-	ldr	r3, [sp, #1352]
-	ldr	r12, [sp, #1356]
-	ldr	lr, [sp, #1360]
-	ldr	r6, [sp, #1368]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1372]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1364]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1376]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1384]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1380]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r11, r11, r5
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #1344]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1392]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1388]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	add	r8, sp, #1024
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, r8, #248
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1328]
-	add	r11, sp, #1280
-	ldr	r9, [sp, #1296]
-	ldr	r10, [sp, #1292]
-	ldr	r6, [sp, #1272]
-	ldr	r7, [sp, #1276]
-	add	lr, sp, #1024
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1324]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1320]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1316]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1312]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1308]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1304]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1300]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, lr, #184
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r6
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r5, [sp, #1208]
-	ldr	r2, [sp, #1220]
-	ldr	r3, [sp, #1224]
-	ldr	r12, [sp, #1228]
-	ldr	lr, [sp, #1232]
-	ldr	r6, [sp, #1240]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1244]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1236]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1248]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1256]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1252]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r11, r11, r5
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #1216]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1264]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1260]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1212]
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	add	r8, sp, #1024
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, r8, #120
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1200]
-	add	r11, sp, #1152
-	ldr	r9, [sp, #1168]
-	ldr	r10, [sp, #1164]
-	ldr	r6, [sp, #1144]
-	ldr	r7, [sp, #1148]
-	add	lr, sp, #1024
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1196]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1192]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1188]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1184]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1180]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1176]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1172]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, lr, #56
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r6
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r5, [sp, #1080]
-	ldr	r2, [sp, #1092]
-	ldr	r3, [sp, #1096]
-	ldr	r12, [sp, #1100]
-	ldr	lr, [sp, #1104]
-	ldr	r6, [sp, #1112]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1116]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1108]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1120]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1128]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1124]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r11, r11, r5
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	ldr	r1, [sp, #1088]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1136]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1132]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1084]
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #1016
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1072]
-	add	r11, sp, #1024
-	ldr	r9, [sp, #1040]
-	ldr	r10, [sp, #1036]
-	ldr	r6, [sp, #1016]
-	ldr	r7, [sp, #1020]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1068]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1064]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1060]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #952
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r6
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #956
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #980
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1008]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1004]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r7, r8, r9, r10}
-	ldr	r5, [sp, #952]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	adds	r11, r11, r5
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #888
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #944]
-	add	r11, sp, #896
-	ldr	r9, [sp, #912]
-	ldr	r10, [sp, #908]
-	ldr	r6, [sp, #888]
-	ldr	r7, [sp, #892]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #940]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #936]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #932]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #928]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #924]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #920]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #916]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #824
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r6
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #828
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #852
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #880]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #876]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r7, r8, r9, r10}
-	ldr	r5, [sp, #824]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	adds	r11, r11, r5
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #760
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #816]
-	add	r11, sp, #768
-	ldr	r9, [sp, #784]
-	ldr	r10, [sp, #780]
-	ldr	r6, [sp, #760]
-	ldr	r7, [sp, #764]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #812]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #808]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #804]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #800]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #696
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r6
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #700
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #724
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #752]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #748]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #744]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r8, r9, r10}
-	ldr	r5, [sp, #696]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adds	r5, r11, r5
-	adcs	r0, r7, r0
-	str	r5, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #104]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mul	r2, r5, r9
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #632
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #688]
-	add	r11, sp, #640
-	ldr	r5, [sp, #656]
-	ldr	r10, [sp, #652]
-	ldr	r6, [sp, #632]
-	ldr	r7, [sp, #636]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #684]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #680]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #676]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #672]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #668]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #664]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #660]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, sp, #568
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #584
-	adds	r0, r0, r6
-	ldr	r6, [sp, #580]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #572]
-	adcs	r1, r1, r4
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #608
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #576]
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adc	r1, r1, r2
-	ldr	r2, [sp, #568]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	adds	r0, r0, r2
-	mul	r1, r0, r9
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #624]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #96]          @ 4-byte Reload
-	adcs	r7, r11, r7
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	adcs	r5, r7, r5
-	str	r5, [sp, #92]           @ 4-byte Spill
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	adcs	r5, r5, r6
-	str	r5, [sp, #88]           @ 4-byte Spill
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #504
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #560]
-	add	r10, sp, #508
-	ldr	r7, [sp, #532]
-	ldr	r8, [sp, #528]
-	ldr	r9, [sp, #524]
-	ldr	r11, [sp, #504]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #556]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #552]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #548]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #544]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #540]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #536]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r10}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #440
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #456
-	adds	r0, r0, r11
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #448]
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #452]
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #480
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #444]
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adc	r1, r1, r2
-	ldr	r2, [sp, #440]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	adds	r1, r0, r2
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	mul	r2, r1, r0
-	ldr	r0, [sp, #496]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #96]          @ 4-byte Reload
-	adcs	r7, r11, r7
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	adcs	r5, r7, r5
-	str	r5, [sp, #48]           @ 4-byte Spill
-	ldr	r5, [sp, #88]           @ 4-byte Reload
-	adcs	r5, r5, r6
-	str	r5, [sp, #44]           @ 4-byte Spill
-	ldr	r5, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #376
-	bl	.LmulPv448x32(PLT)
-	ldr	r1, [sp, #432]
-	add	r10, sp, #380
-	ldr	r7, [sp, #404]
-	ldr	r8, [sp, #400]
-	ldr	r9, [sp, #396]
-	ldr	r11, [sp, #376]
-	add	r0, sp, #312
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #428]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #424]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #420]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #416]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #412]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #408]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r10}
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	ldr	r2, [r1, #48]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #316
-	adds	r0, r0, r11
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #340
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #368]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #364]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r7, r8, r9, r10}
-	ldr	r5, [sp, #312]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	adds	r11, r11, r5
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	mov	r5, r11
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #248
-	bl	.LmulPv448x32(PLT)
-	ldr	r1, [sp, #304]
-	ldr	r10, [sp, #272]
-	ldr	r11, [sp, #268]
-	ldr	r8, [sp, #264]
-	ldr	r6, [sp, #248]
-	ldr	r7, [sp, #252]
-	ldr	r4, [sp, #256]
-	ldr	r9, [sp, #260]
-	add	r0, sp, #184
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #300]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #296]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #292]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #288]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #284]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #280]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #276]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	ldr	r2, [r1, #52]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	bl	.LmulPv448x32(PLT)
-	adds	r0, r5, r6
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #200
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	adcs	r1, r1, r4
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	add	r8, sp, #184
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #224
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adc	r1, r1, r2
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldm	r8, {r2, r7, r8}
-	ldr	r6, [sp, #196]
-	adds	r4, r0, r2
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mul	r1, r4, r0
-	ldr	r0, [sp, #240]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #236]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r5, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #112]         @ 4-byte Reload
-	adcs	r11, r11, r7
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	adcs	r8, r7, r8
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	adcs	r6, r7, r6
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r7, r0, r5
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r5, [sp, #116]          @ 4-byte Reload
-	adcs	r9, r0, r9
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	add	r0, sp, #120
-	bl	.LmulPv448x32(PLT)
-	add	r3, sp, #120
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r4, r0
-	mov	r4, r5
-	adcs	r11, r11, r1
-	ldr	r0, [sp, #136]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r2, r8, r2
-	str	r11, [sp, #44]          @ 4-byte Spill
-	adcs	lr, r6, r3
-	str	r2, [sp, #52]           @ 4-byte Spill
-	str	lr, [sp, #60]           @ 4-byte Spill
-	adcs	r8, r1, r0
-	ldr	r0, [sp, #140]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r8, [sp, #64]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #144]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #148]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #152]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #156]
-	adcs	r10, r1, r0
-	ldr	r0, [sp, #160]
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r10, [sp, #68]          @ 4-byte Spill
-	adcs	r0, r7, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #164]
-	adcs	r0, r9, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #168]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #172]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #176]
-	adc	r0, r1, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldmib	r4, {r0, r1, r7, r9, r12}
-	ldr	r6, [r4]
-	ldr	r3, [r4, #24]
-	ldr	r5, [r4, #28]
-	subs	r6, r11, r6
-	str	r3, [sp, #72]           @ 4-byte Spill
-	add	r11, r4, #32
-	sbcs	r3, r2, r0
-	sbcs	r2, lr, r1
-	ldm	r11, {r0, r1, r11}
-	sbcs	lr, r8, r7
-	ldr	r7, [r4, #44]
-	ldr	r8, [r4, #52]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r4, #48]
-	ldr	r4, [sp, #80]           @ 4-byte Reload
-	sbcs	r9, r4, r9
-	ldr	r4, [sp, #84]           @ 4-byte Reload
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	sbcs	r12, r4, r12
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	sbcs	r4, r4, r7
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	sbcs	r5, r7, r5
-	sbcs	r7, r10, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	sbcs	r10, r0, r1
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbcs	r11, r0, r11
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	sbc	r8, r0, r8
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	asr	r1, r8, #31
-	cmp	r1, #0
-	movlt	r6, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r6, [r0]
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	movlt	r3, r6
-	str	r3, [r0, #4]
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	movlt	r2, r3
-	cmp	r1, #0
-	ldr	r3, [sp, #72]           @ 4-byte Reload
-	str	r2, [r0, #8]
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	movlt	lr, r2
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	lr, [r0, #12]
-	movlt	r9, r2
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	str	r9, [r0, #16]
-	movlt	r12, r2
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r12, [r0, #20]
-	movlt	r4, r2
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	str	r4, [r0, #24]
-	movlt	r5, r2
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r5, [r0, #28]
-	movlt	r7, r2
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r7, [r0, #32]
-	movlt	r10, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	str	r10, [r0, #36]
-	movlt	r11, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	r11, [r0, #40]
-	movlt	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r3, [r0, #44]
-	movlt	r2, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r2, [r0, #48]
-	movlt	r8, r1
-	str	r8, [r0, #52]
-	add	sp, sp, #892
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end211:
-	.size	mcl_fp_montNF14L, .Lfunc_end211-mcl_fp_montNF14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed14L
-	.align	2
-	.type	mcl_fp_montRed14L,%function
-mcl_fp_montRed14L:                      @ @mcl_fp_montRed14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#68
-	sub	sp, sp, #68
-	.pad	#1024
-	sub	sp, sp, #1024
-	mov	r3, r2
-	str	r0, [sp, #180]          @ 4-byte Spill
-	ldr	r2, [r1, #4]
-	ldr	r6, [r1]
-	ldr	r0, [r3]
-	str	r3, [sp, #184]          @ 4-byte Spill
-	str	r2, [sp, #88]           @ 4-byte Spill
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #176]          @ 4-byte Spill
-	ldr	r0, [r3, #4]
-	str	r2, [sp, #84]           @ 4-byte Spill
-	ldr	r2, [r1, #12]
-	str	r0, [sp, #172]          @ 4-byte Spill
-	ldr	r0, [r3, #8]
-	str	r2, [sp, #80]           @ 4-byte Spill
-	str	r0, [sp, #168]          @ 4-byte Spill
-	ldr	r0, [r3, #12]
-	str	r0, [sp, #152]          @ 4-byte Spill
-	ldr	r0, [r3, #16]
-	str	r0, [sp, #156]          @ 4-byte Spill
-	ldr	r0, [r3, #20]
-	str	r0, [sp, #160]          @ 4-byte Spill
-	ldr	r0, [r3, #24]
-	str	r0, [sp, #164]          @ 4-byte Spill
-	ldr	r0, [r3, #-4]
-	str	r0, [sp, #188]          @ 4-byte Spill
-	mul	r2, r6, r0
-	ldr	r0, [r3, #28]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #144]          @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #148]          @ 4-byte Spill
-	ldr	r0, [r1, #96]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [r1, #100]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [r1, #104]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [r1, #108]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [r1, #64]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r1, #68]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r1, #72]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r1, #80]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r1, #84]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r1, #88]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r1, #92]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [r1, #76]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1, #28]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r1, #24]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r1, #20]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	mov	r1, r3
-	str	r0, [sp, #12]           @ 4-byte Spill
-	add	r0, sp, #1024
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1080]
-	ldr	r8, [sp, #1024]
-	ldr	r1, [sp, #1032]
-	ldr	r2, [sp, #1036]
-	ldr	r3, [sp, #1040]
-	ldr	r12, [sp, #1044]
-	ldr	lr, [sp, #1048]
-	ldr	r4, [sp, #1052]
-	ldr	r5, [sp, #1056]
-	ldr	r7, [sp, #1060]
-	ldr	r9, [sp, #1064]
-	ldr	r10, [sp, #1068]
-	ldr	r11, [sp, #1072]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1076]
-	adds	r6, r6, r8
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1028]
-	adcs	r8, r6, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #184]          @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #188]          @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, sp, #960
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #1016]
-	add	lr, sp, #996
-	add	r10, sp, #964
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1012]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r4, r5, r12, lr}
-	ldr	r6, [sp, #960]
-	ldr	r7, [sp, #992]
-	ldr	r11, [sp, #988]
-	ldr	r3, [sp, #984]
-	ldm	r10, {r0, r1, r2, r9, r10}
-	adds	r6, r8, r6
-	ldr	r6, [sp, #88]           @ 4-byte Reload
-	adcs	r8, r6, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r6, [sp, #188]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r8, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	mov	r11, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #184]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #896
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #952]
-	add	r10, sp, #924
-	add	lr, sp, #900
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #948]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #944]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #940]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldr	r4, [sp, #896]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r4, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #832
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #888]
-	add	lr, sp, #872
-	add	r11, sp, #832
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #884]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	lr, {r5, r12, lr}
-	ldr	r6, [sp, #868]
-	ldr	r7, [sp, #864]
-	ldm	r11, {r0, r1, r2, r3, r8, r9, r10, r11}
-	adds	r0, r4, r0
-	ldr	r4, [sp, #188]          @ 4-byte Reload
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	mov	r11, r1
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #184]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r1, r4
-	mov	r1, r5
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #768
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #824]
-	add	r10, sp, #796
-	add	lr, sp, #784
-	add	r9, sp, #768
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #820]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #816]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #812]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r10}
-	ldm	lr, {r3, r12, lr}
-	ldm	r9, {r0, r1, r2, r9}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	mov	r10, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r1, r4
-	mov	r1, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #704
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #760]
-	add	lr, sp, #744
-	add	r9, sp, #708
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #756]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r5, r12, lr}
-	ldr	r4, [sp, #704]
-	ldr	r6, [sp, #740]
-	ldr	r7, [sp, #736]
-	ldr	r11, [sp, #732]
-	ldr	r3, [sp, #728]
-	ldm	r9, {r0, r1, r2, r8, r9}
-	adds	r4, r10, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	mov	r11, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #188]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	mul	r2, r4, r5
-	ldr	r4, [sp, #184]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #640
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #696]
-	add	r10, sp, #664
-	add	lr, sp, #640
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #688]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #684]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	mov	r10, r1
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r1, r5
-	mov	r1, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #576
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #632]
-	add	lr, sp, #616
-	add	r9, sp, #580
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #628]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	lr, {r5, r12, lr}
-	ldr	r4, [sp, #576]
-	ldr	r6, [sp, #612]
-	ldr	r7, [sp, #608]
-	ldr	r11, [sp, #604]
-	ldr	r3, [sp, #600]
-	ldm	r9, {r0, r1, r2, r8, r9}
-	adds	r4, r10, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	r10, r4, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r4, [sp, #188]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #184]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r9
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #512
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #568]
-	add	r11, sp, #536
-	add	lr, sp, #512
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #564]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #560]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #556]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r7, r8, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r4
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	mov	r5, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, sp, #448
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #504]
-	add	lr, sp, #484
-	add	r9, sp, #452
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #500]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #496]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	lr, {r6, r12, lr}
-	ldr	r4, [sp, #448]
-	ldr	r7, [sp, #480]
-	ldr	r11, [sp, #476]
-	ldr	r3, [sp, #472]
-	ldm	r9, {r0, r1, r2, r8, r9}
-	adds	r4, r10, r4
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	r10, r4, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r4, [sp, #188]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #384
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #440]
-	add	r11, sp, #408
-	add	lr, sp, #384
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #436]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #432]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r7, r8, r9, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r4
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #184]          @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r7
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #320
-	bl	.LmulPv448x32(PLT)
-	ldr	r0, [sp, #376]
-	add	r9, sp, #348
-	ldr	r11, [sp, #364]
-	ldr	r8, [sp, #360]
-	add	lr, sp, #328
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #372]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #368]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r9, {r4, r6, r9}
-	ldr	r3, [sp, #320]
-	ldr	r5, [sp, #324]
-	ldm	lr, {r0, r1, r2, r12, lr}
-	adds	r3, r10, r3
-	ldr	r3, [sp, #88]           @ 4-byte Reload
-	adcs	r5, r3, r5
-	ldr	r3, [sp, #84]           @ 4-byte Reload
-	adcs	r10, r3, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #188]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	mul	r2, r5, r6
-	adcs	r0, r0, r9
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	mov	r11, r7
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r11
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	add	r0, sp, #256
-	bl	.LmulPv448x32(PLT)
-	add	r7, sp, #256
-	add	r12, sp, #272
-	ldm	r7, {r0, r1, r3, r7}
-	ldr	r9, [sp, #312]
-	ldr	r8, [sp, #308]
-	ldr	lr, [sp, #304]
-	adds	r0, r5, r0
-	ldr	r5, [sp, #300]
-	adcs	r10, r10, r1
-	mul	r0, r10, r6
-	ldr	r6, [sp, #296]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #188]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #292]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r4, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r11
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r4, r0, r2
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r8, r0, r9
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	add	r0, sp, #192
-	bl	.LmulPv448x32(PLT)
-	add	r3, sp, #192
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #188]          @ 4-byte Reload
-	adcs	lr, r0, r1
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	lr, [sp, #72]           @ 4-byte Spill
-	adcs	r2, r0, r2
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	str	r2, [sp, #76]           @ 4-byte Spill
-	adcs	r3, r0, r3
-	ldr	r0, [sp, #208]
-	str	r3, [sp, #80]           @ 4-byte Spill
-	adcs	r7, r1, r0
-	ldr	r0, [sp, #212]
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r7, [sp, #84]           @ 4-byte Spill
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #216]
-	str	r4, [sp, #88]           @ 4-byte Spill
-	adcs	r5, r1, r0
-	ldr	r0, [sp, #220]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r5, [sp, #92]           @ 4-byte Spill
-	adcs	r6, r1, r0
-	ldr	r0, [sp, #224]
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r6, [sp, #96]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #184]          @ 4-byte Spill
-	ldr	r0, [sp, #228]
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #232]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r11, [sp, #100]         @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #188]          @ 4-byte Spill
-	ldr	r0, [sp, #236]
-	adcs	r10, r1, r0
-	ldr	r0, [sp, #240]
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r10, [sp, #108]         @ 4-byte Spill
-	adcs	r9, r1, r0
-	ldr	r0, [sp, #244]
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r9, [sp, #116]          @ 4-byte Spill
-	adcs	r8, r8, r0
-	ldr	r0, [sp, #248]
-	str	r8, [sp, #120]          @ 4-byte Spill
-	adcs	r12, r1, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	str	r12, [sp, #112]         @ 4-byte Spill
-	adc	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #176]          @ 4-byte Reload
-	subs	r0, lr, r0
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #168]          @ 4-byte Reload
-	sbcs	r2, r3, r2
-	ldr	r3, [sp, #152]          @ 4-byte Reload
-	sbcs	r3, r7, r3
-	ldr	r7, [sp, #156]          @ 4-byte Reload
-	sbcs	lr, r4, r7
-	ldr	r4, [sp, #160]          @ 4-byte Reload
-	ldr	r7, [sp, #184]          @ 4-byte Reload
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #164]          @ 4-byte Reload
-	sbcs	r5, r6, r5
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	sbcs	r6, r7, r6
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	sbcs	r7, r11, r7
-	ldr	r11, [sp, #188]         @ 4-byte Reload
-	str	r7, [sp, #172]          @ 4-byte Spill
-	ldr	r7, [sp, #132]          @ 4-byte Reload
-	sbcs	r11, r11, r7
-	ldr	r7, [sp, #136]          @ 4-byte Reload
-	sbcs	r7, r10, r7
-	str	r7, [sp, #176]          @ 4-byte Spill
-	ldr	r7, [sp, #140]          @ 4-byte Reload
-	sbcs	r9, r9, r7
-	ldr	r7, [sp, #144]          @ 4-byte Reload
-	sbcs	r10, r8, r7
-	ldr	r7, [sp, #148]          @ 4-byte Reload
-	sbcs	r8, r12, r7
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	sbc	r7, r7, #0
-	ands	r12, r7, #1
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	movne	r0, r7
-	ldr	r7, [sp, #180]          @ 4-byte Reload
-	str	r0, [r7]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	movne	r1, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r1, [r7, #4]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	movne	r2, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	cmp	r12, #0
-	str	r2, [r7, #8]
-	movne	r3, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	str	r3, [r7, #12]
-	movne	lr, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	lr, [r7, #16]
-	movne	r4, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	cmp	r12, #0
-	str	r4, [r7, #20]
-	movne	r5, r0
-	ldr	r0, [sp, #184]          @ 4-byte Reload
-	str	r5, [r7, #24]
-	movne	r6, r0
-	ldr	r0, [sp, #172]          @ 4-byte Reload
-	movne	r0, r1
-	str	r6, [r7, #28]
-	cmp	r12, #0
-	str	r0, [r7, #32]
-	ldr	r0, [sp, #188]          @ 4-byte Reload
-	movne	r11, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	str	r11, [r7, #36]
-	ldr	r11, [sp, #176]         @ 4-byte Reload
-	movne	r11, r0
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	str	r11, [r7, #40]
-	movne	r9, r0
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	cmp	r12, #0
-	str	r9, [r7, #44]
-	movne	r10, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	str	r10, [r7, #48]
-	movne	r8, r0
-	str	r8, [r7, #52]
-	add	sp, sp, #68
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end212:
-	.size	mcl_fp_montRed14L, .Lfunc_end212-mcl_fp_montRed14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre14L
-	.align	2
-	.type	mcl_fp_addPre14L,%function
-mcl_fp_addPre14L:                       @ @mcl_fp_addPre14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#44
-	sub	sp, sp, #44
-	ldm	r1, {r3, r12, lr}
-	ldr	r9, [r1, #12]
-	ldmib	r2, {r5, r6, r7}
-	ldr	r11, [r2]
-	ldr	r4, [r2, #16]
-	ldr	r10, [r1, #44]
-	adds	r8, r11, r3
-	ldr	r3, [r2, #32]
-	str	r4, [sp, #4]            @ 4-byte Spill
-	ldr	r4, [r2, #20]
-	ldr	r11, [r1, #48]
-	adcs	r5, r5, r12
-	add	r12, r1, #16
-	adcs	r6, r6, lr
-	ldr	lr, [r1, #40]
-	adcs	r7, r7, r9
-	ldr	r9, [r1, #52]
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [r2, #36]
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	str	r3, [sp, #24]           @ 4-byte Spill
-	ldr	r3, [r2, #40]
-	str	r4, [sp, #12]           @ 4-byte Spill
-	ldr	r4, [r2, #28]
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [r2, #44]
-	str	r4, [sp, #20]           @ 4-byte Spill
-	ldr	r4, [r1, #32]
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [r2, #48]
-	ldr	r2, [r2, #52]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #36]
-	str	r3, [sp, #36]           @ 4-byte Spill
-	str	r2, [sp]                @ 4-byte Spill
-	ldm	r12, {r1, r2, r3, r12}
-	str	r8, [r0]
-	stmib	r0, {r5, r6}
-	str	r7, [r0, #12]
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	adcs	r1, r5, r1
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r2, r5, r2
-	ldr	r5, [sp, #40]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	adcs	r1, r1, r3
-	ldr	r3, [sp]                @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r2, r12
-	add	r12, r0, #32
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	adcs	r2, r2, r3
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	adcs	r3, r3, lr
-	adcs	r7, r7, r10
-	adcs	r6, r6, r11
-	stm	r12, {r1, r2, r3, r7}
-	adcs	r5, r5, r9
-	str	r6, [r0, #48]
-	str	r5, [r0, #52]
-	mov	r0, #0
-	adc	r0, r0, #0
-	add	sp, sp, #44
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end213:
-	.size	mcl_fp_addPre14L, .Lfunc_end213-mcl_fp_addPre14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre14L
-	.align	2
-	.type	mcl_fp_subPre14L,%function
-mcl_fp_subPre14L:                       @ @mcl_fp_subPre14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#44
-	sub	sp, sp, #44
-	ldmib	r2, {r10, r11}
-	ldr	r3, [r2, #16]
-	ldr	r7, [r1]
-	ldr	r6, [r2, #12]
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [r2, #20]
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [r2, #24]
-	str	r3, [sp, #36]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	str	r3, [sp, #40]           @ 4-byte Spill
-	ldr	r3, [r2]
-	ldmib	r1, {r4, r5, r12}
-	subs	lr, r7, r3
-	ldr	r3, [r2, #32]
-	sbcs	r4, r4, r10
-	sbcs	r5, r5, r11
-	add	r11, r1, #32
-	sbcs	r6, r12, r6
-	add	r12, r1, #16
-	str	r3, [sp, #4]            @ 4-byte Spill
-	ldr	r3, [r2, #36]
-	str	r3, [sp, #8]            @ 4-byte Spill
-	ldr	r3, [r2, #40]
-	str	r3, [sp, #12]           @ 4-byte Spill
-	ldr	r3, [r2, #44]
-	str	r3, [sp, #16]           @ 4-byte Spill
-	ldr	r3, [r2, #48]
-	ldr	r2, [r2, #52]
-	str	r3, [sp, #20]           @ 4-byte Spill
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r7, r10, r11}
-	ldr	r2, [r1, #52]
-	ldr	r8, [r1, #44]
-	ldr	r9, [r1, #48]
-	str	r2, [sp]                @ 4-byte Spill
-	ldm	r12, {r1, r2, r3, r12}
-	str	lr, [r0]
-	stmib	r0, {r4, r5}
-	str	r6, [r0, #12]
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	ldr	r4, [sp]                @ 4-byte Reload
-	sbcs	r1, r1, r5
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	sbcs	r2, r2, r6
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	sbcs	r1, r3, r1
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	sbcs	r2, r12, r2
-	add	r12, r0, #32
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	sbcs	r1, r7, r1
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	sbcs	r2, r10, r2
-	sbcs	r3, r11, r3
-	sbcs	r7, r8, r7
-	sbcs	r6, r9, r6
-	stm	r12, {r1, r2, r3, r7}
-	sbcs	r5, r4, r5
-	str	r6, [r0, #48]
-	str	r5, [r0, #52]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	add	sp, sp, #44
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end214:
-	.size	mcl_fp_subPre14L, .Lfunc_end214-mcl_fp_subPre14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_14L
-	.align	2
-	.type	mcl_fp_shr1_14L,%function
-mcl_fp_shr1_14L:                        @ @mcl_fp_shr1_14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#16
-	sub	sp, sp, #16
-	add	r9, r1, #8
-	add	r12, r1, #32
-	ldm	r9, {r2, r3, r4, r5, r6, r9}
-	ldm	r1, {r7, lr}
-	str	r7, [sp, #4]            @ 4-byte Spill
-	lsr	r7, lr, #1
-	orr	r7, r7, r2, lsl #31
-	str	r7, [sp]                @ 4-byte Spill
-	ldm	r12, {r7, r11, r12}
-	ldr	r10, [r1, #48]
-	ldr	r8, [r1, #44]
-	ldr	r1, [r1, #52]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	lsr	r1, r3, #1
-	lsrs	r3, r3, #1
-	str	r10, [sp, #8]           @ 4-byte Spill
-	rrx	r2, r2
-	lsrs	r3, lr, #1
-	orr	r1, r1, r4, lsl #31
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	rrx	r3, r3
-	str	r3, [r0]
-	ldr	r3, [sp]                @ 4-byte Reload
-	str	r3, [r0, #4]
-	str	r2, [r0, #8]
-	str	r1, [r0, #12]
-	lsrs	r1, r5, #1
-	lsr	r2, r11, #1
-	rrx	r1, r4
-	ldr	r4, [sp, #8]            @ 4-byte Reload
-	orr	r2, r2, r12, lsl #31
-	str	r1, [r0, #16]
-	lsr	r1, r5, #1
-	ldr	r5, [sp, #12]           @ 4-byte Reload
-	orr	r1, r1, r6, lsl #31
-	str	r1, [r0, #20]
-	lsrs	r1, r9, #1
-	rrx	r1, r6
-	str	r1, [r0, #24]
-	lsr	r1, r9, #1
-	orr	r1, r1, r7, lsl #31
-	str	r1, [r0, #28]
-	lsrs	r1, r11, #1
-	rrx	r1, r7
-	lsrs	r3, r8, #1
-	lsr	r7, r8, #1
-	rrx	r3, r12
-	lsrs	r6, r5, #1
-	orr	r7, r7, r4, lsl #31
-	add	r12, r0, #32
-	lsr	r5, r5, #1
-	rrx	r6, r4
-	stm	r12, {r1, r2, r3, r7}
-	str	r6, [r0, #48]
-	str	r5, [r0, #52]
-	add	sp, sp, #16
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end215:
-	.size	mcl_fp_shr1_14L, .Lfunc_end215-mcl_fp_shr1_14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add14L
-	.align	2
-	.type	mcl_fp_add14L,%function
-mcl_fp_add14L:                          @ @mcl_fp_add14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#52
-	sub	sp, sp, #52
-	ldr	r9, [r1]
-	ldmib	r1, {r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r7}
-	adds	r9, r4, r9
-	ldr	r4, [r1, #24]
-	adcs	r10, r5, r8
-	ldr	r5, [r1, #20]
-	str	r9, [r0]
-	adcs	r6, r6, lr
-	mov	lr, r10
-	adcs	r7, r7, r12
-	str	r6, [sp, #32]           @ 4-byte Spill
-	ldr	r6, [r1, #16]
-	str	lr, [r0, #4]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r2, #16]
-	adcs	r7, r7, r6
-	ldr	r6, [r2, #44]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	adcs	r7, r7, r5
-	ldr	r5, [r2, #28]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	ldr	r10, [sp, #16]          @ 4-byte Reload
-	adcs	r7, r7, r4
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r1, #28]
-	str	r4, [r0, #8]
-	adcs	r7, r5, r7
-	ldr	r5, [r2, #32]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r1, #32]
-	adcs	r7, r5, r7
-	ldr	r5, [r2, #36]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r1, #36]
-	adcs	r11, r5, r7
-	ldr	r7, [r1, #40]
-	ldr	r5, [r2, #40]
-	str	r11, [sp, #24]          @ 4-byte Spill
-	adcs	r8, r5, r7
-	ldr	r7, [r1, #44]
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	str	r8, [sp, #12]           @ 4-byte Spill
-	adcs	r12, r6, r7
-	ldr	r7, [r1, #48]
-	ldr	r6, [r2, #48]
-	ldr	r1, [r1, #52]
-	ldr	r2, [r2, #52]
-	str	r5, [r0, #12]
-	str	r12, [sp, #8]           @ 4-byte Spill
-	adcs	r6, r6, r7
-	adcs	r2, r2, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r10, [r0, #20]
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r1, [r0, #28]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r1, [r0, #32]
-	str	r11, [r0, #36]
-	str	r8, [r0, #40]
-	str	r12, [r0, #44]
-	str	r6, [r0, #48]
-	str	r2, [r0, #52]
-	mov	r8, r2
-	mov	r2, #0
-	mov	r12, r6
-	add	r11, r3, #32
-	adc	r1, r2, #0
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldm	r3, {r6, r7}
-	ldr	r1, [r3, #8]
-	ldr	r2, [r3, #12]
-	subs	r6, r9, r6
-	sbcs	r7, lr, r7
-	str	r6, [sp, #4]            @ 4-byte Spill
-	sbcs	r1, r4, r1
-	str	r7, [sp]                @ 4-byte Spill
-	str	r1, [sp, #32]           @ 4-byte Spill
-	sbcs	r1, r5, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	sbcs	r10, r10, r1
-	ldr	r1, [r3, #24]
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	sbcs	r5, r2, r1
-	ldm	r11, {r1, r2, r6, r7, r11}
-	ldr	r9, [r3, #52]
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	sbcs	r3, r3, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	sbcs	lr, r1, r2
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	sbcs	r4, r1, r6
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	sbcs	r7, r1, r7
-	sbcs	r6, r12, r11
-	sbcs	r1, r8, r9
-	sbc	r2, r2, #0
-	tst	r2, #1
-	bne	.LBB216_2
-@ BB#1:                                 @ %nocarry
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	r2, [r0]
-	ldr	r2, [sp]                @ 4-byte Reload
-	str	r2, [r0, #4]
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r2, [r0, #8]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r2, [r0, #12]
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r10, [r0, #20]
-	str	r2, [r0, #24]
-	str	r5, [r0, #28]
-	str	r3, [r0, #32]
-	str	lr, [r0, #36]
-	str	r4, [r0, #40]
-	str	r7, [r0, #44]
-	str	r6, [r0, #48]
-	str	r1, [r0, #52]
-.LBB216_2:                              @ %carry
-	add	sp, sp, #52
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end216:
-	.size	mcl_fp_add14L, .Lfunc_end216-mcl_fp_add14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF14L
-	.align	2
-	.type	mcl_fp_addNF14L,%function
-mcl_fp_addNF14L:                        @ @mcl_fp_addNF14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#80
-	sub	sp, sp, #80
-	ldm	r1, {r7, r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r10}
-	adds	r4, r4, r7
-	ldr	r7, [r2, #16]
-	adcs	r5, r5, r8
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [r1, #24]
-	adcs	lr, r6, lr
-	ldr	r6, [r1, #16]
-	str	r5, [sp, #40]           @ 4-byte Spill
-	ldr	r5, [r1, #20]
-	adcs	r9, r10, r12
-	str	lr, [sp, #12]           @ 4-byte Spill
-	str	r9, [sp, #16]           @ 4-byte Spill
-	adcs	r7, r7, r6
-	ldr	r6, [r2, #20]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	adcs	r7, r6, r5
-	ldr	r6, [r2, #24]
-	ldr	r5, [r2, #28]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	adcs	r8, r6, r4
-	ldr	r6, [r1, #28]
-	str	r8, [sp, #20]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r6, [r1, #32]
-	ldr	r5, [r2, #32]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r6, [r1, #36]
-	ldr	r5, [r2, #36]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r6, [r1, #40]
-	ldr	r5, [r2, #40]
-	str	r7, [sp, #68]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r6, [r1, #44]
-	ldr	r5, [r2, #44]
-	str	r7, [sp, #64]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r6, [r1, #48]
-	ldr	r5, [r2, #48]
-	ldr	r1, [r1, #52]
-	ldr	r2, [r2, #52]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	adc	r1, r2, r1
-	str	r7, [sp, #76]           @ 4-byte Spill
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldmib	r3, {r1, r4, r6}
-	ldr	r2, [r3, #24]
-	ldr	r7, [r3]
-	ldr	r5, [r3, #16]
-	ldr	r11, [r3, #20]
-	ldr	r10, [r3, #40]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [r3, #28]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	subs	r7, r2, r7
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, r2, r1
-	ldr	r1, [r3, #36]
-	sbcs	r12, lr, r4
-	sbcs	lr, r9, r6
-	ldr	r9, [r3, #32]
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp]                @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [r3, #52]
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	sbcs	r5, r1, r5
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbcs	r3, r1, r11
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	sbcs	r4, r8, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	sbcs	r8, r1, r6
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	sbcs	r11, r1, r9
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	sbcs	r9, r1, r6
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	ldr	r6, [sp]                @ 4-byte Reload
-	sbcs	r1, r1, r10
-	ldr	r10, [sp, #36]          @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	sbcs	r1, r1, r6
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	sbcs	r1, r1, r6
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	sbc	r6, r1, r6
-	asr	r1, r6, #31
-	cmp	r1, #0
-	movlt	r7, r10
-	str	r7, [r0]
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	movlt	r2, r7
-	str	r2, [r0, #4]
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	movlt	r12, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r12, [r0, #8]
-	movlt	lr, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	lr, [r0, #12]
-	movlt	r5, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r5, [r0, #16]
-	movlt	r3, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	movlt	r4, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r4, [r0, #24]
-	movlt	r8, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r8, [r0, #28]
-	movlt	r11, r2
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r11, [r0, #32]
-	movlt	r9, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	r9, [r0, #36]
-	movlt	r3, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r3, [r0, #40]
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	movlt	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r3, [r0, #44]
-	movlt	r2, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r2, [r0, #48]
-	movlt	r6, r1
-	str	r6, [r0, #52]
-	add	sp, sp, #80
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end217:
-	.size	mcl_fp_addNF14L, .Lfunc_end217-mcl_fp_addNF14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub14L
-	.align	2
-	.type	mcl_fp_sub14L,%function
-mcl_fp_sub14L:                          @ @mcl_fp_sub14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#60
-	sub	sp, sp, #60
-	ldr	r9, [r2]
-	ldmib	r2, {r8, lr}
-	ldr	r5, [r1]
-	ldr	r12, [r2, #12]
-	ldmib	r1, {r4, r6, r7}
-	subs	r5, r5, r9
-	sbcs	r4, r4, r8
-	str	r5, [sp, #52]           @ 4-byte Spill
-	ldr	r5, [r2, #24]
-	sbcs	r6, r6, lr
-	str	r4, [sp, #48]           @ 4-byte Spill
-	ldr	r4, [r2, #20]
-	sbcs	r7, r7, r12
-	str	r6, [sp, #56]           @ 4-byte Spill
-	ldr	r6, [r2, #16]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r1, #16]
-	sbcs	r8, r7, r6
-	ldr	r7, [r1, #20]
-	ldr	r6, [r1, #28]
-	str	r8, [sp, #40]           @ 4-byte Spill
-	sbcs	r10, r7, r4
-	ldr	r7, [r1, #24]
-	ldr	r4, [r1, #40]
-	str	r10, [sp, #36]          @ 4-byte Spill
-	sbcs	r9, r7, r5
-	ldr	r7, [r2, #28]
-	sbcs	r11, r6, r7
-	ldr	r7, [r2, #32]
-	ldr	r6, [r1, #32]
-	str	r11, [sp, #32]          @ 4-byte Spill
-	sbcs	r12, r6, r7
-	ldr	r7, [r2, #36]
-	ldr	r6, [r1, #36]
-	str	r12, [sp, #28]          @ 4-byte Spill
-	sbcs	r6, r6, r7
-	ldr	r7, [r2, #40]
-	sbcs	r5, r4, r7
-	ldr	r7, [r2, #44]
-	ldr	r4, [r1, #44]
-	str	r5, [sp, #24]           @ 4-byte Spill
-	sbcs	lr, r4, r7
-	ldr	r4, [r2, #48]
-	ldr	r7, [r1, #48]
-	ldr	r2, [r2, #52]
-	ldr	r1, [r1, #52]
-	sbcs	r7, r7, r4
-	ldr	r4, [sp, #44]           @ 4-byte Reload
-	sbcs	r2, r1, r2
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r1, [r0]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r1, [r0, #4]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r1, [r0, #8]
-	str	r4, [r0, #12]
-	str	r8, [r0, #16]
-	mov	r1, lr
-	add	r8, r0, #24
-	str	r10, [r0, #20]
-	stm	r8, {r9, r11, r12}
-	str	r6, [r0, #36]
-	str	r5, [r0, #40]
-	str	r1, [r0, #44]
-	str	r7, [r0, #48]
-	mov	r8, r2
-	str	r2, [r0, #52]
-	mov	r2, #0
-	sbc	r2, r2, #0
-	tst	r2, #1
-	beq	.LBB218_2
-@ BB#1:                                 @ %carry
-	ldr	r2, [r3, #52]
-	ldr	r5, [r3, #48]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	ldr	lr, [r3, #4]
-	ldr	r12, [r3, #8]
-	ldr	r10, [r3, #12]
-	ldr	r11, [r3, #40]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [r3, #16]
-	str	r5, [sp, #52]           @ 4-byte Spill
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r3, #20]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r3, #24]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r3, #28]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r3]
-	adds	r2, r2, r7
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	adcs	lr, lr, r5
-	ldr	r5, [r3, #44]
-	adcs	r7, r12, r7
-	add	r12, r0, #32
-	str	r5, [sp, #48]           @ 4-byte Spill
-	adcs	r5, r10, r4
-	ldr	r10, [r3, #36]
-	ldr	r3, [r3, #32]
-	stm	r0, {r2, lr}
-	str	r7, [r0, #8]
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	ldr	r7, [sp]                @ 4-byte Reload
-	ldr	r4, [sp, #36]           @ 4-byte Reload
-	str	r5, [r0, #12]
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	adcs	r4, r7, r4
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adcs	r2, r2, r9
-	str	r4, [r0, #20]
-	str	r2, [r0, #24]
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r2, r3, r2
-	adcs	r3, r10, r6
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	adcs	r7, r11, r7
-	adcs	r6, r6, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r5, r5, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	stm	r12, {r2, r3, r7}
-	str	r6, [r0, #44]
-	str	r5, [r0, #48]
-	adc	r1, r1, r8
-	str	r1, [r0, #52]
-.LBB218_2:                              @ %nocarry
-	add	sp, sp, #60
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end218:
-	.size	mcl_fp_sub14L, .Lfunc_end218-mcl_fp_sub14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF14L
-	.align	2
-	.type	mcl_fp_subNF14L,%function
-mcl_fp_subNF14L:                        @ @mcl_fp_subNF14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#80
-	sub	sp, sp, #80
-	mov	r12, r0
-	ldr	r0, [r2, #32]
-	add	r7, r1, #16
-	ldr	r9, [r2]
-	ldr	r11, [r2, #20]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r2, #36]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r2, #40]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r2, #44]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r2, #48]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r2, #52]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [r2, #4]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r2, #8]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r2, #12]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r2, #16]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r2, #24]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r2, #28]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldmib	r1, {r2, r8, lr}
-	ldm	r7, {r4, r5, r6, r7}
-	ldr	r10, [r1]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r1, [r1, #32]
-	subs	r10, r10, r9
-	sbcs	r9, r2, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	str	r10, [sp]               @ 4-byte Spill
-	str	r9, [sp, #4]            @ 4-byte Spill
-	sbcs	r0, r8, r0
-	add	r8, r3, #20
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	sbcs	r0, lr, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	sbcs	r0, r4, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	sbcs	r0, r5, r11
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r6, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	sbcs	r0, r7, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	sbcs	r11, r1, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r11, [sp, #20]          @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	sbc	r0, r1, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r3, {r2, r4, r6}
-	ldr	r5, [r3, #12]
-	ldr	lr, [r3, #16]
-	ldm	r8, {r0, r7, r8}
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	adds	r1, r10, r2
-	ldr	r10, [sp, #12]          @ 4-byte Reload
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	adcs	r4, r9, r4
-	adcs	r6, r10, r6
-	adcs	r2, r2, r5
-	ldr	r5, [sp, #60]           @ 4-byte Reload
-	adcs	r3, r3, lr
-	adcs	lr, r5, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r5, r0, r7
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	adcs	r8, r0, r8
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r9, r11, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r11, r0, r7
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r7, r0, r7
-	str	r7, [sp, #36]           @ 4-byte Spill
-	asr	r7, r0, #31
-	ldr	r0, [sp]                @ 4-byte Reload
-	cmp	r7, #0
-	movge	r6, r10
-	movge	r1, r0
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	str	r1, [r12]
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	movge	r4, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	cmp	r7, #0
-	str	r4, [r12, #4]
-	str	r6, [r12, #8]
-	movge	r2, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r2, [r12, #12]
-	movge	r3, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	str	r3, [r12, #16]
-	movge	lr, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	cmp	r7, #0
-	str	lr, [r12, #20]
-	movge	r5, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	str	r5, [r12, #24]
-	movge	r8, r0
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	str	r8, [r12, #28]
-	movge	r9, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	cmp	r7, #0
-	str	r9, [r12, #32]
-	movge	r11, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r11, [r12, #36]
-	movge	r1, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r1, [r12, #40]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	movge	r1, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	cmp	r7, #0
-	str	r1, [r12, #44]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	movge	r1, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	str	r1, [r12, #48]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	movge	r0, r1
-	str	r0, [r12, #52]
-	add	sp, sp, #80
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end219:
-	.size	mcl_fp_subNF14L, .Lfunc_end219-mcl_fp_subNF14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add14L
-	.align	2
-	.type	mcl_fpDbl_add14L,%function
-mcl_fpDbl_add14L:                       @ @mcl_fpDbl_add14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#168
-	sub	sp, sp, #168
-	ldr	r7, [r1]
-	ldmib	r1, {r6, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r8, r9}
-	add	r10, r1, #32
-	adds	r4, r4, r7
-	str	r4, [sp, #92]           @ 4-byte Spill
-	ldr	r4, [r2, #96]
-	str	r4, [sp, #152]          @ 4-byte Spill
-	ldr	r4, [r2, #100]
-	str	r4, [sp, #156]          @ 4-byte Spill
-	ldr	r4, [r2, #104]
-	str	r4, [sp, #160]          @ 4-byte Spill
-	ldr	r4, [r2, #108]
-	str	r4, [sp, #164]          @ 4-byte Spill
-	adcs	r4, r5, r6
-	adcs	r7, r8, lr
-	str	r4, [sp, #68]           @ 4-byte Spill
-	add	lr, r1, #16
-	str	r7, [sp, #64]           @ 4-byte Spill
-	adcs	r7, r9, r12
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #124]          @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #128]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #140]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [r2, #88]
-	str	r7, [sp, #132]          @ 4-byte Spill
-	ldr	r7, [r2, #92]
-	str	r7, [sp, #148]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #120]          @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #100]          @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	ldr	r2, [r2, #16]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #96]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r1, #100]
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r1, #104]
-	str	r2, [sp, #112]          @ 4-byte Spill
-	ldr	r2, [r1, #108]
-	str	r2, [sp, #116]          @ 4-byte Spill
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r10}
-	ldr	r2, [r1, #56]
-	ldr	r8, [r1, #48]
-	ldr	r9, [r1, #52]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	str	r7, [r0, #8]
-	ldr	r7, [sp]                @ 4-byte Reload
-	adcs	r1, r7, r1
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r2, [r0, #20]
-	adcs	r1, r1, r12
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r2, r2, lr
-	str	r2, [r0, #28]
-	adcs	r1, r1, r4
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	str	r2, [r0, #36]
-	adcs	r1, r1, r6
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r2, r2, r10
-	str	r2, [r0, #44]
-	adcs	r1, r1, r8
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r2, r2, r9
-	adcs	r6, r1, r7
-	str	r2, [r0, #52]
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r6, [sp, #84]           @ 4-byte Spill
-	adcs	r5, r1, r2
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r5, [sp, #88]           @ 4-byte Spill
-	adcs	r4, r1, r2
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r4, [sp, #96]           @ 4-byte Spill
-	adcs	r7, r1, r2
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r7, [sp, #100]          @ 4-byte Spill
-	adcs	lr, r1, r2
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	lr, [sp, #92]           @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #136]          @ 4-byte Spill
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #140]          @ 4-byte Spill
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #144]          @ 4-byte Spill
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	adcs	r8, r1, r2
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r8, [sp, #124]          @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	r1, [sp, #148]          @ 4-byte Spill
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	str	r1, [sp, #152]          @ 4-byte Spill
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	str	r1, [sp, #156]          @ 4-byte Spill
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r1, [sp, #160]          @ 4-byte Spill
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #164]          @ 4-byte Spill
-	mov	r1, #0
-	adc	r1, r1, #0
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldmib	r3, {r2, r12}
-	ldr	r1, [r3, #16]
-	ldr	r11, [r3]
-	ldr	r9, [r3, #12]
-	ldr	r10, [r3, #36]
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	subs	r11, r6, r11
-	sbcs	r2, r5, r2
-	sbcs	r12, r4, r12
-	sbcs	r4, r7, r9
-	ldr	r7, [r3, #32]
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	ldr	r5, [sp, #128]          @ 4-byte Reload
-	str	r1, [sp, #132]          @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [r3, #52]
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	sbcs	r3, lr, r1
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	sbcs	lr, r1, r6
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r6, [sp, #132]          @ 4-byte Reload
-	sbcs	r5, r1, r5
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	sbcs	r6, r1, r6
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	sbcs	r8, r8, r7
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	sbcs	r9, r1, r10
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	sbcs	r10, r1, r7
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	str	r1, [sp, #132]          @ 4-byte Spill
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	sbc	r1, r1, #0
-	ands	r1, r1, #1
-	movne	r11, r7
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	str	r11, [r0, #56]
-	movne	r2, r7
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	str	r2, [r0, #60]
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	movne	r12, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r12, [r0, #64]
-	movne	r4, r2
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	str	r4, [r0, #68]
-	movne	r3, r2
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	str	r3, [r0, #72]
-	ldr	r3, [sp, #128]          @ 4-byte Reload
-	movne	lr, r2
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	cmp	r1, #0
-	str	lr, [r0, #76]
-	movne	r5, r2
-	ldr	r2, [sp, #144]          @ 4-byte Reload
-	str	r5, [r0, #80]
-	movne	r6, r2
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	str	r6, [r0, #84]
-	movne	r8, r2
-	ldr	r2, [sp, #148]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r8, [r0, #88]
-	movne	r9, r2
-	ldr	r2, [sp, #152]          @ 4-byte Reload
-	str	r9, [r0, #92]
-	movne	r10, r2
-	ldr	r2, [sp, #156]          @ 4-byte Reload
-	str	r10, [r0, #96]
-	movne	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	str	r3, [r0, #100]
-	movne	r2, r1
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	str	r2, [r0, #104]
-	movne	r7, r1
-	str	r7, [r0, #108]
-	add	sp, sp, #168
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end220:
-	.size	mcl_fpDbl_add14L, .Lfunc_end220-mcl_fpDbl_add14L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub14L
-	.align	2
-	.type	mcl_fpDbl_sub14L,%function
-mcl_fpDbl_sub14L:                       @ @mcl_fpDbl_sub14L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#168
-	sub	sp, sp, #168
-	ldr	r7, [r2, #96]
-	add	r9, r1, #32
-	str	r7, [sp, #160]          @ 4-byte Spill
-	ldr	r7, [r2, #100]
-	str	r7, [sp, #156]          @ 4-byte Spill
-	ldr	r7, [r2, #104]
-	str	r7, [sp, #140]          @ 4-byte Spill
-	ldr	r7, [r2, #108]
-	str	r7, [sp, #164]          @ 4-byte Spill
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #128]          @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #148]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #152]          @ 4-byte Spill
-	ldr	r7, [r2, #88]
-	str	r7, [sp, #124]          @ 4-byte Spill
-	ldr	r7, [r2, #92]
-	str	r7, [sp, #132]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #120]          @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #116]          @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #112]          @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #108]          @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #104]          @ 4-byte Spill
-	ldm	r2, {r5, r8, r12, lr}
-	ldr	r6, [r1]
-	ldmib	r1, {r4, r7, r10}
-	subs	r5, r6, r5
-	sbcs	r4, r4, r8
-	str	r5, [sp, #32]           @ 4-byte Spill
-	ldr	r5, [r2, #44]
-	sbcs	r7, r7, r12
-	str	r4, [sp, #28]           @ 4-byte Spill
-	ldr	r4, [r2, #40]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r5, [sp, #84]           @ 4-byte Spill
-	str	r4, [sp, #80]           @ 4-byte Spill
-	str	r7, [sp, #48]           @ 4-byte Spill
-	sbcs	r7, r10, lr
-	ldr	r10, [r2, #16]
-	add	lr, r1, #16
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	ldr	r2, [r1, #96]
-	str	r2, [sp, #88]           @ 4-byte Spill
-	ldr	r2, [r1, #100]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #92]           @ 4-byte Spill
-	ldr	r2, [r1, #104]
-	str	r2, [sp, #96]           @ 4-byte Spill
-	ldr	r2, [r1, #108]
-	str	r2, [sp, #100]          @ 4-byte Spill
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #76]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldm	r9, {r4, r5, r6, r8, r9}
-	ldr	r2, [r1, #52]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #32]          @ 4-byte Reload
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	sbcs	r1, r1, r10
-	str	r7, [r0, #8]
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	sbcs	r2, r2, r7
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	sbcs	r1, r12, r1
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, lr, r2
-	add	lr, r3, #8
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	sbcs	r1, r4, r1
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	sbcs	r2, r5, r2
-	str	r2, [r0, #36]
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	sbcs	r1, r6, r1
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	sbcs	r2, r8, r2
-	str	r2, [r0, #44]
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	sbcs	r1, r9, r1
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r2, [r0, #52]
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	sbcs	r10, r7, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	str	r10, [sp, #80]          @ 4-byte Spill
-	sbcs	r11, r2, r1
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r11, [sp, #84]          @ 4-byte Spill
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #144]          @ 4-byte Reload
-	str	r1, [sp, #136]          @ 4-byte Spill
-	mov	r1, #0
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r2, [sp, #128]          @ 4-byte Spill
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	str	r2, [sp, #144]          @ 4-byte Spill
-	ldr	r2, [sp, #148]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	str	r2, [sp, #148]          @ 4-byte Spill
-	ldr	r2, [sp, #152]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	str	r2, [sp, #152]          @ 4-byte Spill
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	sbcs	r9, r7, r2
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r9, [sp, #108]          @ 4-byte Spill
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	str	r2, [sp, #132]          @ 4-byte Spill
-	ldr	r2, [sp, #160]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	str	r2, [sp, #160]          @ 4-byte Spill
-	ldr	r2, [sp, #156]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	str	r2, [sp, #156]          @ 4-byte Spill
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	str	r2, [sp, #140]          @ 4-byte Spill
-	ldr	r2, [sp, #164]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	sbc	r1, r1, #0
-	str	r2, [sp, #164]          @ 4-byte Spill
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [r3, #32]
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [r3, #52]
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldm	r3, {r2, r5}
-	ldm	lr, {r4, r6, lr}
-	ldr	r7, [r3, #24]
-	ldr	r8, [r3, #28]
-	ldr	r12, [r3, #20]
-	ldr	r3, [sp, #128]          @ 4-byte Reload
-	adds	r1, r10, r2
-	ldr	r10, [sp, #104]         @ 4-byte Reload
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	adcs	r5, r11, r5
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	adcs	r4, r10, r4
-	adcs	r2, r2, r6
-	ldr	r6, [sp, #144]          @ 4-byte Reload
-	adcs	r3, r3, lr
-	adcs	r12, r6, r12
-	ldr	r6, [sp, #148]          @ 4-byte Reload
-	adcs	lr, r6, r7
-	ldr	r6, [sp, #152]          @ 4-byte Reload
-	ldr	r7, [sp, #132]          @ 4-byte Reload
-	adcs	r8, r6, r8
-	ldr	r6, [sp, #92]           @ 4-byte Reload
-	adcs	r9, r9, r6
-	ldr	r6, [sp, #96]           @ 4-byte Reload
-	adcs	r6, r7, r6
-	ldr	r7, [sp, #160]          @ 4-byte Reload
-	str	r6, [sp, #96]           @ 4-byte Spill
-	ldr	r6, [sp, #112]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	str	r7, [sp, #112]          @ 4-byte Spill
-	ldr	r7, [sp, #156]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #116]          @ 4-byte Spill
-	ldr	r7, [sp, #140]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	str	r7, [sp, #120]          @ 4-byte Spill
-	ldr	r7, [sp, #164]          @ 4-byte Reload
-	adc	r7, r7, r11
-	str	r7, [sp, #124]          @ 4-byte Spill
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	ands	r7, r7, #1
-	moveq	r1, r6
-	moveq	r4, r10
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	str	r1, [r0, #56]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	moveq	r5, r1
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r5, [r0, #60]
-	str	r4, [r0, #64]
-	moveq	r2, r1
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r2, [r0, #68]
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	moveq	r3, r1
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	str	r3, [r0, #72]
-	ldr	r3, [sp, #116]          @ 4-byte Reload
-	moveq	r12, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r12, [r0, #76]
-	moveq	lr, r1
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	str	lr, [r0, #80]
-	moveq	r8, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r8, [r0, #84]
-	moveq	r9, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r9, [r0, #88]
-	moveq	r2, r1
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	str	r2, [r0, #92]
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	moveq	r3, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	cmp	r7, #0
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	moveq	r7, r1
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	moveq	r6, r1
-	add	r1, r0, #96
-	stm	r1, {r2, r3, r7}
-	str	r6, [r0, #108]
-	add	sp, sp, #168
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end221:
-	.size	mcl_fpDbl_sub14L, .Lfunc_end221-mcl_fpDbl_sub14L
-	.cantunwind
-	.fnend
-
-	.align	2
-	.type	.LmulPv480x32,%function
-.LmulPv480x32:                          @ @mulPv480x32
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r9, [r1, #12]
-	umull	r4, r8, lr, r2
-	umull	lr, r6, r12, r2
-	mov	r5, r4
-	mov	r7, r6
-	str	lr, [r0]
-	umull	lr, r12, r9, r2
-	umlal	r7, r5, r3, r2
-	str	r5, [r0, #8]
-	str	r7, [r0, #4]
-	umull	r5, r7, r3, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r3, r8, lr
-	str	r3, [r0, #12]
-	ldr	r3, [r1, #16]
-	umull	r7, r6, r3, r2
-	adcs	r3, r12, r7
-	str	r3, [r0, #16]
-	ldr	r3, [r1, #20]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #20]
-	ldr	r3, [r1, #24]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #24]
-	ldr	r3, [r1, #28]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #28]
-	ldr	r3, [r1, #32]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #32]
-	ldr	r3, [r1, #36]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #36]
-	ldr	r3, [r1, #40]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #40]
-	ldr	r3, [r1, #44]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #44]
-	ldr	r3, [r1, #48]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #48]
-	ldr	r3, [r1, #52]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #52]
-	ldr	r1, [r1, #56]
-	umull	r3, r7, r1, r2
-	adcs	r1, r5, r3
-	str	r1, [r0, #56]
-	adc	r1, r7, #0
-	str	r1, [r0, #60]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end222:
-	.size	.LmulPv480x32, .Lfunc_end222-.LmulPv480x32
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre15L
-	.align	2
-	.type	mcl_fp_mulUnitPre15L,%function
-mcl_fp_mulUnitPre15L:                   @ @mcl_fp_mulUnitPre15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#84
-	sub	sp, sp, #84
-	mov	r4, r0
-	add	r0, sp, #16
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #76]
-	add	r11, sp, #48
-	add	lr, sp, #20
-	ldr	r9, [sp, #64]
-	ldr	r10, [sp, #60]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #72]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #68]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r6, r8, r11}
-	ldr	r7, [sp, #44]
-	ldr	r5, [sp, #40]
-	ldr	r1, [sp, #16]
-	ldm	lr, {r0, r2, r3, r12, lr}
-	str	r1, [r4]
-	stmib	r4, {r0, r2, r3, r12, lr}
-	add	r0, r4, #32
-	str	r5, [r4, #24]
-	str	r7, [r4, #28]
-	stm	r0, {r6, r8, r11}
-	str	r10, [r4, #44]
-	str	r9, [r4, #48]
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	str	r0, [r4, #52]
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	str	r0, [r4, #56]
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	str	r0, [r4, #60]
-	add	sp, sp, #84
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end223:
-	.size	mcl_fp_mulUnitPre15L, .Lfunc_end223-mcl_fp_mulUnitPre15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre15L
-	.align	2
-	.type	mcl_fpDbl_mulPre15L,%function
-mcl_fpDbl_mulPre15L:                    @ @mcl_fpDbl_mulPre15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#44
-	sub	sp, sp, #44
-	.pad	#1024
-	sub	sp, sp, #1024
-	mov	r3, r2
-	mov	r4, r0
-	add	r0, sp, #1000
-	str	r1, [sp, #96]           @ 4-byte Spill
-	mov	r8, r1
-	ldr	r2, [r3]
-	str	r3, [sp, #92]           @ 4-byte Spill
-	str	r4, [sp, #100]          @ 4-byte Spill
-	mov	r6, r3
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1060]
-	ldr	r1, [sp, #1004]
-	ldr	r2, [r6, #4]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #1008]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #1012]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	mov	r1, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1032]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1028]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1024]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1020]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1016]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1000]
-	str	r0, [r4]
-	add	r0, sp, #936
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #996]
-	add	r10, sp, #960
-	add	lr, sp, #936
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #992]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #988]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #984]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #980]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #24]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #4]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [r6, #8]
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #872
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #932]
-	ldr	r8, [sp, #872]
-	add	r12, sp, #880
-	ldr	lr, [sp, #912]
-	ldr	r7, [sp, #908]
-	ldr	r11, [sp, #904]
-	ldr	r9, [sp, #900]
-	ldr	r10, [sp, #876]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #928]
-	adds	r4, r8, r4
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #924]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #920]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #916]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	str	r4, [r5, #8]
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	adcs	r4, r10, r4
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r6, #12]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #808
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #868]
-	add	r9, sp, #836
-	add	lr, sp, #816
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #864]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #860]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #856]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #852]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r8, r9}
-	ldr	r0, [sp, #808]
-	ldr	r11, [sp, #812]
-	ldm	lr, {r1, r2, r3, r12, lr}
-	ldr	r10, [sp, #32]          @ 4-byte Reload
-	adds	r0, r0, r10
-	str	r0, [r5, #12]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #16]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	mov	r6, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #744
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #804]
-	add	lr, sp, #768
-	add	r12, sp, #748
-	ldr	r11, [sp, #780]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #800]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #784]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r9, r10, lr}
-	ldr	r8, [sp, #744]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	adds	r4, r8, r4
-	str	r4, [r7, #16]
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	mov	r7, r5
-	adcs	r4, r0, r4
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #20]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #680
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #740]
-	ldr	r9, [sp, #680]
-	add	lr, sp, #684
-	ldr	r10, [sp, #720]
-	ldr	r8, [sp, #716]
-	ldr	r11, [sp, #712]
-	ldr	r6, [sp, #708]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #736]
-	adds	r4, r9, r4
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #732]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #728]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #724]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	str	r4, [r5, #20]
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #24]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #616
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #676]
-	add	r8, sp, #648
-	add	lr, sp, #624
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #672]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #668]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #664]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #660]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r8, {r6, r7, r8}
-	ldr	r10, [sp, #644]
-	ldr	r0, [sp, #616]
-	ldr	r11, [sp, #620]
-	ldm	lr, {r1, r2, r3, r12, lr}
-	ldr	r9, [sp, #32]           @ 4-byte Reload
-	adds	r0, r0, r9
-	str	r0, [r5, #24]
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #28]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #552
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #612]
-	add	r11, sp, #584
-	add	r12, sp, #556
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #608]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #604]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #600]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #596]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r6, r7, r11}
-	ldr	lr, [sp, #580]
-	ldr	r9, [sp, #576]
-	ldr	r10, [sp, #552]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	ldr	r8, [sp, #100]          @ 4-byte Reload
-	adds	r4, r10, r4
-	str	r4, [r8, #28]
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r0, r4
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #32]
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #488
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #548]
-	ldr	r9, [sp, #488]
-	add	lr, sp, #492
-	mov	r6, r8
-	ldr	r10, [sp, #524]
-	ldr	r11, [sp, #520]
-	ldr	r7, [sp, #516]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #544]
-	adds	r4, r9, r4
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #540]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #536]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #532]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #528]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	str	r4, [r6, #32]
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r4, #36]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #424
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #484]
-	add	r8, sp, #456
-	add	lr, sp, #432
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #480]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #476]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #472]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #468]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r8, {r5, r7, r8}
-	ldr	r10, [sp, #452]
-	ldr	r0, [sp, #424]
-	ldr	r11, [sp, #428]
-	ldm	lr, {r1, r2, r3, r12, lr}
-	ldr	r9, [sp, #32]           @ 4-byte Reload
-	adds	r0, r0, r9
-	str	r0, [r6, #36]
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r4, #40]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	mov	r7, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #360
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #420]
-	add	r12, sp, #364
-	ldr	r11, [sp, #396]
-	ldr	r6, [sp, #392]
-	ldr	lr, [sp, #388]
-	ldr	r9, [sp, #384]
-	ldr	r10, [sp, #360]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #408]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #404]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #400]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	ldr	r8, [sp, #100]          @ 4-byte Reload
-	adds	r4, r10, r4
-	str	r4, [r8, #40]
-	ldr	r4, [sp, #72]           @ 4-byte Reload
-	adcs	r4, r0, r4
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #44]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #296
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #356]
-	ldr	r9, [sp, #296]
-	add	lr, sp, #300
-	mov	r5, r8
-	ldr	r10, [sp, #336]
-	ldr	r7, [sp, #332]
-	ldr	r11, [sp, #328]
-	ldr	r6, [sp, #324]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #352]
-	adds	r4, r9, r4
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #348]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #344]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #340]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	str	r4, [r5, #44]
-	ldr	r4, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r4, #48]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #232
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #292]
-	add	lr, sp, #240
-	ldr	r8, [sp, #268]
-	ldr	r7, [sp, #264]
-	ldr	r10, [sp, #260]
-	ldr	r3, [sp, #232]
-	ldr	r11, [sp, #236]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #288]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #284]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #280]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #276]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #272]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r12, lr}
-	ldr	r9, [sp, #28]           @ 4-byte Reload
-	adds	r3, r3, r9
-	add	r9, sp, #168
-	str	r3, [r5, #48]
-	ldr	r3, [r4, #52]
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	adcs	r4, r11, r4
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [sp, #84]           @ 4-byte Reload
-	adcs	r11, r0, r4
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	mov	r2, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r6
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	mov	r0, r9
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #228]
-	add	r12, sp, #172
-	ldr	r6, [sp, #204]
-	ldr	r4, [sp, #200]
-	ldr	lr, [sp, #196]
-	ldr	r8, [sp, #192]
-	ldr	r9, [sp, #188]
-	ldr	r2, [sp, #168]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #224]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #220]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #216]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #212]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #208]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r12, {r0, r1, r3, r12}
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	adds	r2, r2, r7
-	str	r2, [r5, #52]
-	adcs	r5, r0, r11
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	adcs	r7, r1, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	ldr	r2, [r2, #56]
-	adcs	r10, r3, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r11, r12, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	add	r0, sp, #104
-	bl	.LmulPv480x32(PLT)
-	add	r3, sp, #104
-	add	r12, sp, #120
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r6, r0, r5
-	ldr	r0, [sp, #164]
-	adcs	lr, r1, r7
-	adcs	r4, r2, r10
-	adcs	r7, r3, r11
-	add	r11, sp, #136
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #160]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #156]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldm	r11, {r5, r8, r9, r10, r11}
-	ldm	r12, {r1, r2, r3, r12}
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	r6, [r0, #56]
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	str	lr, [r0, #60]
-	str	r4, [r0, #64]
-	str	r7, [r0, #68]
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	ldr	r4, [sp, #56]           @ 4-byte Reload
-	adcs	r6, r1, r6
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r6, [r0, #72]
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [r0, #76]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r2, r3, r2
-	ldr	r3, [sp, #84]           @ 4-byte Reload
-	str	r2, [r0, #80]
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r12, r1
-	str	r1, [r0, #84]
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r12, r5, r2
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r8, r1
-	str	r12, [r0, #88]
-	add	r12, r0, #92
-	adcs	r2, r9, r2
-	adcs	r3, r10, r3
-	adcs	r7, r11, r7
-	adcs	r6, r5, r6
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	adcs	r5, r4, r5
-	ldr	r4, [sp, #96]           @ 4-byte Reload
-	stm	r12, {r1, r2, r3, r7}
-	str	r6, [r0, #108]
-	str	r5, [r0, #112]
-	adc	r4, r4, #0
-	str	r4, [r0, #116]
-	add	sp, sp, #44
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end224:
-	.size	mcl_fpDbl_mulPre15L, .Lfunc_end224-mcl_fpDbl_mulPre15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre15L
-	.align	2
-	.type	mcl_fpDbl_sqrPre15L,%function
-mcl_fpDbl_sqrPre15L:                    @ @mcl_fpDbl_sqrPre15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#44
-	sub	sp, sp, #44
-	.pad	#1024
-	sub	sp, sp, #1024
-	mov	r5, r1
-	mov	r4, r0
-	add	r0, sp, #1000
-	ldr	r2, [r5]
-	str	r4, [sp, #100]          @ 4-byte Spill
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1060]
-	ldr	r1, [sp, #1004]
-	ldr	r2, [r5, #4]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #1008]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #1012]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	mov	r1, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #1032]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #1028]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1024]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1020]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1016]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1000]
-	str	r0, [r4]
-	add	r0, sp, #936
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #996]
-	add	r10, sp, #960
-	add	lr, sp, #936
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #992]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #988]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #984]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #980]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #32]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #4]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #8]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #872
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #932]
-	add	r12, sp, #896
-	ldr	lr, [sp, #912]
-	ldr	r6, [sp, #908]
-	add	r10, sp, #876
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #928]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #924]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #920]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #916]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r12, {r9, r11, r12}
-	ldr	r8, [sp, #872]
-	ldm	r10, {r0, r1, r2, r3, r10}
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	adds	r4, r8, r4
-	str	r4, [r7, #8]
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r0, r4
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #12]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #808
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #868]
-	add	r10, sp, #836
-	add	lr, sp, #812
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #864]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #860]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #856]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #852]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r9, r10}
-	ldr	r11, [sp, #808]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r7, r11, r4
-	ldr	r4, [sp, #100]          @ 4-byte Reload
-	str	r7, [r4, #12]
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #16]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #744
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #804]
-	add	r8, sp, #776
-	add	lr, sp, #764
-	add	r12, sp, #744
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #800]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #792]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r8, {r6, r7, r8}
-	ldm	lr, {r9, r10, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r11, [sp, #40]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #16]
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #20]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #680
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #740]
-	add	r8, sp, #712
-	add	lr, sp, #684
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #736]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #732]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #728]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #724]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r8, {r4, r6, r8}
-	ldr	r11, [sp, #708]
-	ldr	r10, [sp, #680]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	ldr	r9, [sp, #100]          @ 4-byte Reload
-	adds	r7, r10, r7
-	str	r7, [r9, #20]
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adcs	r7, r0, r7
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #24]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #616
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #676]
-	add	r10, sp, #644
-	add	lr, sp, #620
-	mov	r4, r9
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #672]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #668]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #664]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #660]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #656]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r10}
-	ldr	r11, [sp, #616]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r7, r11, r7
-	str	r7, [r4, #24]
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #28]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #552
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #612]
-	add	r8, sp, #584
-	add	lr, sp, #572
-	add	r12, sp, #552
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #608]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #604]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #600]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #596]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r8, {r6, r7, r8}
-	ldm	lr, {r9, r10, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r11, [sp, #40]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #28]
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #32]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #488
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #548]
-	add	r8, sp, #520
-	add	lr, sp, #492
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #544]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #540]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #536]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #532]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r8, {r4, r6, r8}
-	ldr	r11, [sp, #516]
-	ldr	r10, [sp, #488]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	ldr	r9, [sp, #100]          @ 4-byte Reload
-	adds	r7, r10, r7
-	str	r7, [r9, #32]
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adcs	r7, r0, r7
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #36]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #424
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #484]
-	add	r10, sp, #452
-	add	lr, sp, #428
-	mov	r4, r9
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #480]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #476]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #472]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #468]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #464]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r10}
-	ldr	r11, [sp, #424]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r7, r11, r7
-	str	r7, [r4, #36]
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #40]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #360
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #420]
-	add	r8, sp, #392
-	add	lr, sp, #380
-	add	r12, sp, #360
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #408]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #404]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r8, {r6, r7, r8}
-	ldm	lr, {r9, r10, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r11, [sp, #40]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #40]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #44]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #296
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #356]
-	add	r9, sp, #328
-	add	lr, sp, #300
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #352]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #348]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #344]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #340]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r9, {r6, r8, r9}
-	ldr	r11, [sp, #324]
-	ldr	r10, [sp, #296]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adds	r7, r10, r7
-	str	r7, [r4, #44]
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adcs	r7, r0, r7
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #48]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #232
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #292]
-	add	r11, sp, #256
-	add	lr, sp, #236
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #288]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #284]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #280]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #276]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r6, r8, r9, r10, r11}
-	ldr	r12, [sp, #232]
-	ldm	lr, {r0, r1, r2, r3, lr}
-	adds	r7, r12, r7
-	ldr	r12, [r5, #52]
-	str	r7, [r4, #48]
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r7, r1, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	mov	r2, r12
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #168
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #228]
-	add	lr, sp, #196
-	add	r12, sp, #172
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #224]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #220]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #216]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #212]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #208]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	lr, {r8, r11, lr}
-	ldr	r9, [sp, #192]
-	ldr	r10, [sp, #188]
-	ldr	r2, [sp, #168]
-	ldm	r12, {r0, r1, r3, r12}
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	adds	r2, r2, r6
-	add	r6, sp, #104
-	str	r2, [r4, #52]
-	adcs	r4, r0, r7
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r2, [r5, #56]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r7, r3, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	mov	r0, r6
-	bl	.LmulPv480x32(PLT)
-	add	r3, sp, #104
-	add	r11, sp, #136
-	add	r12, sp, #120
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r6, r0, r4
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	lr, r1, r0
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r4, r2, r7
-	adcs	r7, r3, r0
-	ldr	r0, [sp, #164]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #160]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #156]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldm	r11, {r5, r8, r9, r10, r11}
-	ldm	r12, {r1, r2, r3, r12}
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	r6, [r0, #56]
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	str	lr, [r0, #60]
-	str	r4, [r0, #64]
-	str	r7, [r0, #68]
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	ldr	r4, [sp, #56]           @ 4-byte Reload
-	adcs	r6, r1, r6
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r6, [r0, #72]
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	r1, [r0, #76]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r2, r3, r2
-	ldr	r3, [sp, #88]           @ 4-byte Reload
-	str	r2, [r0, #80]
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r12, r1
-	str	r1, [r0, #84]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r12, r5, r2
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r8, r1
-	str	r12, [r0, #88]
-	add	r12, r0, #92
-	adcs	r2, r9, r2
-	adcs	r3, r10, r3
-	adcs	r7, r11, r7
-	adcs	r6, r5, r6
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	adcs	r5, r4, r5
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	stm	r12, {r1, r2, r3, r7}
-	str	r6, [r0, #108]
-	str	r5, [r0, #112]
-	adc	r4, r4, #0
-	str	r4, [r0, #116]
-	add	sp, sp, #44
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end225:
-	.size	mcl_fpDbl_sqrPre15L, .Lfunc_end225-mcl_fpDbl_sqrPre15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont15L
-	.align	2
-	.type	mcl_fp_mont15L,%function
-mcl_fp_mont15L:                         @ @mcl_fp_mont15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#12
-	sub	sp, sp, #12
-	.pad	#2048
-	sub	sp, sp, #2048
-	add	r12, sp, #124
-	add	r7, sp, #1024
-	mov	r4, r3
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #88]           @ 4-byte Spill
-	add	r0, r7, #968
-	ldr	r6, [r3, #-4]
-	ldr	r2, [r2]
-	str	r6, [sp, #120]          @ 4-byte Spill
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1996]
-	ldr	r5, [sp, #1992]
-	add	r7, sp, #1024
-	mov	r1, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #2000]
-	mul	r2, r5, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #2004]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #2052]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #2048]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #2044]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #2040]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #2036]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #2032]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #2028]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #2024]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #2020]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #2016]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #2012]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2008]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, r7, #904
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1988]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r9, [sp, #1952]
-	ldr	r6, [sp, #1948]
-	ldr	r8, [sp, #1944]
-	ldr	r4, [sp, #1928]
-	ldr	r10, [sp, #1932]
-	ldr	r11, [sp, #1936]
-	ldr	r7, [sp, #1940]
-	add	lr, sp, #1024
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1984]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1980]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1976]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1972]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1968]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1964]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1960]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1956]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, lr, #840
-	bl	.LmulPv480x32(PLT)
-	adds	r0, r4, r5
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	ldr	r3, [sp, #1880]
-	ldr	r12, [sp, #1884]
-	ldr	lr, [sp, #1888]
-	ldr	r4, [sp, #1892]
-	ldr	r5, [sp, #1896]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	ldr	r10, [sp, #1908]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	ldr	r11, [sp, #92]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #1900]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #1864]
-	adcs	r1, r9, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	ldr	r9, [sp, #1904]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #1876]
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #1872]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1924]
-	str	r6, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1920]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1916]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1912]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1868]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1024
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r10, #776
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1860]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1828]
-	ldr	r11, [sp, #1824]
-	ldr	r8, [sp, #1820]
-	ldr	r4, [sp, #1816]
-	ldr	r5, [sp, #1800]
-	ldr	r7, [sp, #1804]
-	ldr	r9, [sp, #1808]
-	ldr	r10, [sp, #1812]
-	add	lr, sp, #1024
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1856]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1852]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1848]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1844]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1840]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1836]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1832]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, lr, #712
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1748]
-	ldr	r3, [sp, #1752]
-	ldr	r12, [sp, #1756]
-	ldr	lr, [sp, #1760]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1768]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1776]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1780]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1764]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1772]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1736]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1744]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1796]
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1792]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1788]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1784]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1740]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1024
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r10, #648
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1732]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1700]
-	ldr	r11, [sp, #1696]
-	ldr	r8, [sp, #1692]
-	ldr	r4, [sp, #1688]
-	ldr	r5, [sp, #1672]
-	ldr	r7, [sp, #1676]
-	ldr	r9, [sp, #1680]
-	ldr	r10, [sp, #1684]
-	add	lr, sp, #1024
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1728]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1724]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1720]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1716]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1712]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1708]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1704]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, lr, #584
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1620]
-	ldr	r3, [sp, #1624]
-	ldr	r12, [sp, #1628]
-	ldr	lr, [sp, #1632]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1640]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1648]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1652]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1636]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1644]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1608]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1616]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1668]
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1664]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1660]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1656]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1612]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1024
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r10, #520
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1604]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1572]
-	ldr	r11, [sp, #1568]
-	ldr	r8, [sp, #1564]
-	ldr	r4, [sp, #1560]
-	ldr	r5, [sp, #1544]
-	ldr	r7, [sp, #1548]
-	ldr	r9, [sp, #1552]
-	ldr	r10, [sp, #1556]
-	add	lr, sp, #1024
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1600]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1596]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1592]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1588]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1584]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1580]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1576]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, lr, #456
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1492]
-	ldr	r3, [sp, #1496]
-	ldr	r12, [sp, #1500]
-	ldr	lr, [sp, #1504]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1512]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1520]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1524]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1508]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1516]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1480]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1488]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1540]
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1536]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1532]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1528]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1484]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1024
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r10, #392
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1476]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1444]
-	ldr	r11, [sp, #1440]
-	ldr	r8, [sp, #1436]
-	ldr	r4, [sp, #1432]
-	ldr	r5, [sp, #1416]
-	ldr	r7, [sp, #1420]
-	ldr	r9, [sp, #1424]
-	ldr	r10, [sp, #1428]
-	add	lr, sp, #1024
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1472]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1468]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1464]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1460]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1456]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1452]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1448]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, lr, #328
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1364]
-	ldr	r3, [sp, #1368]
-	ldr	r12, [sp, #1372]
-	ldr	lr, [sp, #1376]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1384]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1392]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1396]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1380]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1388]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1352]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1360]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1412]
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1408]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1404]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1400]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1356]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1024
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r10, #264
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1348]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1316]
-	ldr	r11, [sp, #1312]
-	ldr	r8, [sp, #1308]
-	ldr	r4, [sp, #1304]
-	ldr	r5, [sp, #1288]
-	ldr	r7, [sp, #1292]
-	ldr	r9, [sp, #1296]
-	ldr	r10, [sp, #1300]
-	add	lr, sp, #1024
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1344]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1336]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1332]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1328]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1324]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1320]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, lr, #200
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1236]
-	ldr	r3, [sp, #1240]
-	ldr	r12, [sp, #1244]
-	ldr	lr, [sp, #1248]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1256]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1264]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1268]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1252]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1260]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1224]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1232]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1284]
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1280]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1276]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1272]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1228]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1024
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r10, #136
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1220]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1188]
-	ldr	r11, [sp, #1184]
-	ldr	r8, [sp, #1180]
-	ldr	r4, [sp, #1176]
-	ldr	r5, [sp, #1160]
-	ldr	r7, [sp, #1164]
-	ldr	r9, [sp, #1168]
-	ldr	r10, [sp, #1172]
-	add	lr, sp, #1024
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1216]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1212]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1208]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1204]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1200]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1196]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1192]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, lr, #72
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1108]
-	ldr	r3, [sp, #1112]
-	ldr	r12, [sp, #1116]
-	ldr	lr, [sp, #1120]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1128]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1136]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1140]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1124]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1132]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1096]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1104]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r6, r11, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1156]
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1152]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1148]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1144]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1100]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1024
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r10, #8
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1092]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1060]
-	ldr	r11, [sp, #1056]
-	ldr	r8, [sp, #1052]
-	ldr	r4, [sp, #1048]
-	ldr	r5, [sp, #1032]
-	ldr	r7, [sp, #1036]
-	ldr	r9, [sp, #1040]
-	ldr	r10, [sp, #1044]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1088]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1084]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1080]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1076]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1072]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1068]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1064]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #968
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #972
-	adds	r0, r0, r5
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #996
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1028]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1024]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1020]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1016]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #968]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #904
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #964]
-	add	r11, sp, #920
-	add	r10, sp, #904
-	ldr	r6, [sp, #932]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #960]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #956]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #952]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #948]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #944]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #940]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #936]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldm	r10, {r5, r7, r9, r10}
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #840
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #844
-	adds	r0, r0, r5
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #880
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	add	r11, sp, #868
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #900]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #896]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldm	r11, {r4, r5, r11}
-	ldr	r6, [sp, #840]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	adds	r6, r7, r6
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r6, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #120]         @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	mul	r2, r6, r11
-	adcs	r0, r0, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #776
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #836]
-	add	r10, sp, #776
-	ldr	r4, [sp, #800]
-	ldr	r5, [sp, #796]
-	ldr	r6, [sp, #792]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #832]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #828]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #824]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #820]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #816]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #812]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #808]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #804]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, sp, #712
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #728
-	adds	r0, r0, r7
-	ldr	r7, [sp, #724]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	adcs	r1, r1, r9
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #752
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #716]
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #720]
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	ldr	r4, [sp, #712]
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adc	r1, r1, #0
-	adds	r0, r0, r4
-	str	r1, [sp, #52]           @ 4-byte Spill
-	mul	r1, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #772]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #768]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #112]         @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #112]          @ 4-byte Spill
-	ldr	r6, [sp, #108]          @ 4-byte Reload
-	adcs	r5, r6, r5
-	str	r5, [sp, #108]          @ 4-byte Spill
-	ldr	r5, [sp, #104]          @ 4-byte Reload
-	adcs	r5, r5, r7
-	str	r5, [sp, #104]          @ 4-byte Spill
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, sp, #648
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #708]
-	add	r10, sp, #648
-	ldr	r11, [sp, #676]
-	ldr	r4, [sp, #672]
-	ldr	r6, [sp, #668]
-	ldr	r5, [sp, #664]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #688]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #684]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #680]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #584
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #600
-	adds	r0, r0, r7
-	add	r7, sp, #584
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	adcs	r1, r1, r9
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #624
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldm	r7, {r4, r5, r6, r7}
-	adds	r1, r0, r4
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	str	r1, [sp, #116]          @ 4-byte Spill
-	mul	r2, r1, r0
-	ldr	r0, [sp, #644]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #640]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #112]         @ 4-byte Reload
-	adcs	r5, r11, r5
-	str	r5, [sp, #64]           @ 4-byte Spill
-	ldr	r5, [sp, #108]          @ 4-byte Reload
-	adcs	r5, r5, r6
-	str	r5, [sp, #60]           @ 4-byte Spill
-	ldr	r5, [sp, #104]          @ 4-byte Reload
-	adcs	r5, r5, r7
-	str	r5, [sp, #56]           @ 4-byte Spill
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	add	r0, sp, #520
-	bl	.LmulPv480x32(PLT)
-	ldr	r1, [sp, #580]
-	add	r11, sp, #524
-	ldr	r10, [sp, #548]
-	ldr	r5, [sp, #544]
-	ldr	r6, [sp, #540]
-	ldr	r7, [sp, #520]
-	add	r0, sp, #456
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #576]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #572]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #568]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #564]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #560]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #556]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #552]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r9, r11}
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r1, #48]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #460
-	adds	r0, r0, r7
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #484
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #516]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #512]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #508]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #504]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #456]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #392
-	bl	.LmulPv480x32(PLT)
-	ldr	r1, [sp, #452]
-	ldr	r6, [sp, #420]
-	ldr	r7, [sp, #416]
-	ldr	r9, [sp, #412]
-	ldr	r4, [sp, #408]
-	ldr	r10, [sp, #392]
-	ldr	r11, [sp, #396]
-	ldr	r8, [sp, #400]
-	ldr	r5, [sp, #404]
-	add	r0, sp, #328
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #448]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #444]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #440]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #436]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #432]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #428]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #424]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r1, #52]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #332
-	adds	r0, r0, r10
-	add	r10, sp, #356
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #388]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #384]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #380]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #376]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #328]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #68]          @ 4-byte Reload
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #264
-	bl	.LmulPv480x32(PLT)
-	ldr	r1, [sp, #324]
-	add	r9, sp, #276
-	ldr	r6, [sp, #292]
-	ldr	r7, [sp, #288]
-	ldr	r10, [sp, #264]
-	ldr	r11, [sp, #268]
-	ldr	r5, [sp, #272]
-	add	r0, sp, #200
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #320]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #316]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #312]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #308]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #304]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #300]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #296]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r9, {r4, r8, r9}
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r1, #56]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #216
-	adds	r0, r0, r10
-	ldr	r10, [sp, #212]
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #208]
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	ldr	r4, [sp, #200]
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	add	r9, sp, #240
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #204]
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adc	r1, r1, #0
-	adds	r7, r0, r4
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	str	r1, [sp, #72]           @ 4-byte Spill
-	mul	r1, r7, r0
-	ldr	r0, [sp, #260]
-	str	r1, [sp, #60]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #256]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #252]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldm	r9, {r4, r8, r9}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #128]         @ 4-byte Reload
-	adcs	r11, r11, r6
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	adcs	r5, r6, r5
-	ldr	r6, [sp, #68]           @ 4-byte Reload
-	adcs	r10, r6, r10
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r8, r0, r8
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r6, r0, r9
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r9, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r9
-	str	r0, [sp, #128]          @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	add	r0, sp, #136
-	bl	.LmulPv480x32(PLT)
-	add	r3, sp, #136
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r7, r0
-	adcs	r11, r11, r1
-	ldr	r0, [sp, #152]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	lr, r5, r2
-	mov	r5, r9
-	str	r11, [sp, #44]          @ 4-byte Spill
-	adcs	r10, r10, r3
-	str	lr, [sp, #52]           @ 4-byte Spill
-	str	r10, [sp, #60]          @ 4-byte Spill
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #156]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r4, [sp, #76]           @ 4-byte Spill
-	adcs	r12, r1, r0
-	ldr	r0, [sp, #160]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r12, [sp, #56]          @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #164]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #168]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #172]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #176]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #180]
-	adcs	r0, r8, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #184]
-	adcs	r0, r6, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #192]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #196]
-	adcs	r0, r1, r0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldmib	r5, {r1, r2}
-	ldr	r3, [r5, #16]
-	ldr	r7, [r5]
-	ldr	r0, [r5, #12]
-	ldr	r6, [r5, #20]
-	ldr	r9, [r5, #24]
-	ldr	r8, [r5, #32]
-	str	r3, [sp, #80]           @ 4-byte Spill
-	ldr	r3, [r5, #28]
-	subs	r7, r11, r7
-	add	r11, r5, #36
-	str	r3, [sp, #84]           @ 4-byte Spill
-	sbcs	r3, lr, r1
-	sbcs	lr, r10, r2
-	ldm	r11, {r1, r10, r11}
-	sbcs	r4, r4, r0
-	ldr	r0, [r5, #48]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r5, #52]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r5, #56]
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	sbcs	r2, r12, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	sbcs	r12, r0, r6
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	sbcs	r5, r0, r9
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	sbcs	r6, r0, r6
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	sbcs	r8, r0, r8
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	sbcs	r9, r0, r1
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	sbcs	r10, r0, r10
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	sbcs	r11, r0, r11
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	ands	r1, r0, #1
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	movne	r7, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	str	r7, [r0]
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	movne	r3, r7
-	str	r3, [r0, #4]
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	movne	lr, r3
-	ldr	r3, [sp, #76]           @ 4-byte Reload
-	cmp	r1, #0
-	str	lr, [r0, #8]
-	movne	r4, r3
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	str	r4, [r0, #12]
-	movne	r2, r3
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	movne	r12, r2
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r12, [r0, #20]
-	movne	r5, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	str	r5, [r0, #24]
-	movne	r6, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	r6, [r0, #28]
-	movne	r8, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r8, [r0, #32]
-	movne	r9, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	str	r9, [r0, #36]
-	movne	r10, r2
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r10, [r0, #40]
-	movne	r11, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r11, [r0, #44]
-	movne	r2, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r2, [r0, #48]
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	movne	r2, r1
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r2, [r0, #52]
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	movne	r2, r1
-	str	r2, [r0, #56]
-	add	sp, sp, #12
-	add	sp, sp, #2048
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end226:
-	.size	mcl_fp_mont15L, .Lfunc_end226-mcl_fp_mont15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF15L
-	.align	2
-	.type	mcl_fp_montNF15L,%function
-mcl_fp_montNF15L:                       @ @mcl_fp_montNF15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#4
-	sub	sp, sp, #4
-	.pad	#2048
-	sub	sp, sp, #2048
-	add	r12, sp, #116
-	mov	r4, r3
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #76]           @ 4-byte Spill
-	add	r0, sp, #1984
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	str	r5, [sp, #112]          @ 4-byte Spill
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1984]
-	ldr	r1, [sp, #1988]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mul	r2, r0, r5
-	ldr	r0, [sp, #2044]
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #1992]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #2040]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #1996]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #2036]
-	str	r1, [sp, #80]           @ 4-byte Spill
-	mov	r1, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #2032]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #2028]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #2024]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #2020]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #2016]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #2012]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2008]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2004]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2000]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #1920
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1980]
-	add	r7, sp, #1936
-	add	r11, sp, #1920
-	ldr	r6, [sp, #1948]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1976]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1972]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1968]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1964]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1960]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1956]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1952]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r7, {r4, r5, r7}
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r8, [sp, #1932]
-	ldr	r2, [r0, #4]
-	add	r0, sp, #1856
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1868]
-	ldr	r3, [sp, #1872]
-	ldr	r12, [sp, #1876]
-	ldr	lr, [sp, #1880]
-	adds	r0, r9, r0
-	ldr	r9, [sp, #1896]
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	ldr	r10, [sp, #1900]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	ldr	r11, [sp, #88]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #1892]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #1884]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #1888]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #1856]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adc	r0, r1, r0
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1864]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1916]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1912]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1908]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1904]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1860]
-	adcs	r0, r7, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #1792
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1852]
-	add	r11, sp, #1808
-	add	r10, sp, #1792
-	ldr	r6, [sp, #1820]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1848]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1844]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1840]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1836]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1832]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1828]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1824]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldm	r10, {r5, r7, r9, r10}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, sp, #1728
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1740]
-	ldr	r3, [sp, #1744]
-	ldr	r12, [sp, #1748]
-	ldr	lr, [sp, #1752]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1760]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1768]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1772]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1756]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1764]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #108]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1728]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1736]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1788]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1784]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1780]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1776]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1732]
-	adcs	r0, r7, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #1664
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1724]
-	add	r11, sp, #1680
-	add	r10, sp, #1664
-	ldr	r6, [sp, #1692]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1720]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1716]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1712]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1708]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1704]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1700]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1696]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldm	r10, {r5, r7, r9, r10}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, sp, #1600
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1612]
-	ldr	r3, [sp, #1616]
-	ldr	r12, [sp, #1620]
-	ldr	lr, [sp, #1624]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1632]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1640]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1644]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1628]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1636]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #108]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1600]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1608]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1660]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1656]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1652]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1648]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1604]
-	adcs	r0, r7, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #1536
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1596]
-	add	r11, sp, #1552
-	add	r10, sp, #1536
-	ldr	r6, [sp, #1564]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1592]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1588]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1584]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1580]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1576]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1572]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1568]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldm	r10, {r5, r7, r9, r10}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, sp, #1472
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1484]
-	ldr	r3, [sp, #1488]
-	ldr	r12, [sp, #1492]
-	ldr	lr, [sp, #1496]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1504]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1512]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1516]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1500]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1508]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #108]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1472]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1480]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1532]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1528]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1524]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1520]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1476]
-	adcs	r0, r7, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #1408
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1468]
-	add	r11, sp, #1424
-	add	r10, sp, #1408
-	ldr	r6, [sp, #1436]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1464]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1460]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1456]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1452]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1448]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1444]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1440]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldm	r10, {r5, r7, r9, r10}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, sp, #1344
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1356]
-	ldr	r3, [sp, #1360]
-	ldr	r12, [sp, #1364]
-	ldr	lr, [sp, #1368]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1376]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1384]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1388]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1372]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1380]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #108]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1344]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1352]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1404]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1400]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1396]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1392]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1348]
-	adcs	r0, r7, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #1280
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1340]
-	add	r11, sp, #1296
-	add	r10, sp, #1280
-	ldr	r6, [sp, #1308]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1336]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1332]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1328]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1324]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1320]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1316]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1312]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldm	r10, {r5, r7, r9, r10}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #1216
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1228]
-	ldr	r3, [sp, #1232]
-	ldr	r12, [sp, #1236]
-	ldr	lr, [sp, #1240]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1248]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1256]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1260]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1244]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1252]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #108]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1216]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1224]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1276]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1272]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1268]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1264]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1220]
-	adcs	r0, r7, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #1152
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1212]
-	add	r11, sp, #1168
-	add	r10, sp, #1152
-	ldr	r6, [sp, #1180]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1208]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1204]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1200]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1196]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1192]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1188]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1184]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldm	r10, {r5, r7, r9, r10}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #1088
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1100]
-	ldr	r3, [sp, #1104]
-	ldr	r12, [sp, #1108]
-	ldr	lr, [sp, #1112]
-	adds	r0, r0, r5
-	ldr	r5, [sp, #1120]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1128]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1132]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1116]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1124]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #108]         @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1088]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1096]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1148]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1144]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1140]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1136]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1092]
-	adcs	r0, r7, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #1024
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1084]
-	add	r11, sp, #1040
-	add	r10, sp, #1024
-	ldr	r6, [sp, #1052]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1080]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1076]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1072]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1068]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1064]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1060]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldm	r10, {r5, r7, r9, r10}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #960
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #964
-	adds	r0, r0, r5
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #988
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1020]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1016]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1012]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1008]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #960]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #108]         @ 4-byte Reload
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #896
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #956]
-	add	r11, sp, #912
-	add	r10, sp, #896
-	ldr	r6, [sp, #924]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #952]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #948]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #944]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #940]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #936]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #932]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #928]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r8, r11}
-	ldm	r10, {r5, r7, r9, r10}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #832
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #836
-	adds	r0, r0, r5
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #860
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #888]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #884]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #880]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #832]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #108]         @ 4-byte Reload
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r6, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, sp, #768
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #828]
-	add	r11, sp, #768
-	ldr	r6, [sp, #792]
-	ldr	r5, [sp, #788]
-	ldr	r8, [sp, #784]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #824]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #820]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #816]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #812]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #808]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #804]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #800]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #796]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r4, [sp, #780]
-	ldr	r2, [r0, #40]
-	add	r0, sp, #704
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #720
-	adds	r0, r0, r9
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r2, r0, r10
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	add	r10, sp, #744
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #708]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #716]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #704]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #712]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #48]           @ 4-byte Spill
-	adds	r0, r2, r5
-	mul	r1, r0, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #764]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #760]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	adcs	r7, r7, r11
-	str	r7, [sp, #104]          @ 4-byte Spill
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	adcs	r6, r7, r6
-	str	r6, [sp, #100]          @ 4-byte Spill
-	ldr	r6, [sp, #96]           @ 4-byte Reload
-	adcs	r6, r6, r8
-	str	r6, [sp, #96]           @ 4-byte Spill
-	ldr	r6, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #640
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #700]
-	add	r7, sp, #656
-	add	r11, sp, #640
-	ldr	r4, [sp, #668]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #688]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #684]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #680]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #676]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #672]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r7, {r5, r6, r7}
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #576
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #592
-	adds	r0, r0, r8
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r2, r0, r9
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #616
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	add	r7, sp, #576
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r7, {r4, r6, r7}
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r5, [sp, #588]
-	adds	r1, r2, r4
-	mul	r2, r1, r0
-	ldr	r0, [sp, #636]
-	str	r1, [sp, #108]          @ 4-byte Spill
-	str	r2, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #632]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #104]         @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #60]           @ 4-byte Spill
-	ldr	r6, [sp, #100]          @ 4-byte Reload
-	adcs	r6, r6, r7
-	str	r6, [sp, #56]           @ 4-byte Spill
-	ldr	r6, [sp, #96]           @ 4-byte Reload
-	adcs	r5, r6, r5
-	str	r5, [sp, #52]           @ 4-byte Spill
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	add	r0, sp, #512
-	bl	.LmulPv480x32(PLT)
-	ldr	r1, [sp, #572]
-	add	r11, sp, #520
-	ldr	r8, [sp, #540]
-	ldr	r9, [sp, #536]
-	ldr	r10, [sp, #532]
-	ldr	r6, [sp, #512]
-	ldr	r7, [sp, #516]
-	add	r0, sp, #448
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #568]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #564]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #560]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #556]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #552]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #548]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #544]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r11}
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	ldr	r2, [r1, #48]
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #452
-	adds	r0, r0, r6
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #476
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #508]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #504]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #500]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #496]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #448]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #108]         @ 4-byte Reload
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #384
-	bl	.LmulPv480x32(PLT)
-	ldr	r1, [sp, #444]
-	add	r9, sp, #396
-	ldr	r11, [sp, #412]
-	ldr	r8, [sp, #408]
-	ldr	r5, [sp, #384]
-	ldr	r4, [sp, #388]
-	ldr	r10, [sp, #392]
-	add	r0, sp, #320
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #440]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #436]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #432]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #428]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #424]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #420]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #416]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r9}
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	ldr	r2, [r1, #52]
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #324
-	adds	r0, r0, r5
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #348
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #380]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #376]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #372]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #368]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #320]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #64]          @ 4-byte Reload
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #256
-	bl	.LmulPv480x32(PLT)
-	ldr	r1, [sp, #316]
-	add	r11, sp, #260
-	ldr	r8, [sp, #284]
-	ldr	r9, [sp, #280]
-	ldr	r10, [sp, #276]
-	ldr	r7, [sp, #256]
-	add	r0, sp, #192
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #312]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #308]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #304]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #300]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #296]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #292]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #288]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r11}
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	ldr	r2, [r1, #56]
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #208
-	adds	r0, r0, r7
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	adcs	r1, r1, r5
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r1, r9
-	add	r9, sp, #192
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adc	r1, r1, r2
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldm	r9, {r4, r8, r9}
-	ldr	r7, [sp, #204]
-	ldr	r10, [sp, #236]
-	adds	r5, r0, r4
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r4, [sp, #232]
-	mul	r1, r5, r0
-	ldr	r0, [sp, #252]
-	str	r1, [sp, #56]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #248]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #244]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #240]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #120]         @ 4-byte Reload
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	adcs	r8, r11, r8
-	adcs	r9, r6, r9
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	adcs	r7, r6, r7
-	ldr	r6, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r6, r0, r10
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r10, [sp, #124]         @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	add	r0, sp, #128
-	bl	.LmulPv480x32(PLT)
-	add	r3, sp, #128
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r5, r0
-	adcs	r11, r8, r1
-	ldr	r0, [sp, #144]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	lr, r9, r2
-	str	r11, [sp, #40]          @ 4-byte Spill
-	adcs	r8, r7, r3
-	str	lr, [sp, #48]           @ 4-byte Spill
-	str	r8, [sp, #56]           @ 4-byte Spill
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #148]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r4, [sp, #64]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #152]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #156]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #160]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #164]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #168]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #172]
-	adcs	r0, r6, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #176]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #180]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #184]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	adc	r0, r1, r0
-	mov	r1, r10
-	add	r10, r1, #20
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldmib	r1, {r0, r6}
-	ldr	r2, [r1, #12]
-	ldr	r12, [r1, #16]
-	ldm	r10, {r5, r9, r10}
-	ldr	r7, [r1]
-	subs	r7, r11, r7
-	ldr	r11, [r1, #36]
-	sbcs	r3, lr, r0
-	ldr	r0, [r1, #32]
-	sbcs	lr, r8, r6
-	ldr	r8, [r1, #40]
-	sbcs	r4, r4, r2
-	ldr	r2, [r1, #44]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	ldr	r1, [r1, #56]
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r2, [sp, #52]           @ 4-byte Spill
-	sbcs	r2, r1, r12
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	sbcs	r12, r1, r5
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	sbcs	r5, r1, r9
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	sbcs	r6, r1, r10
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	sbcs	r9, r1, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	sbcs	r10, r0, r11
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	sbcs	r11, r0, r8
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	sbc	r8, r0, r1
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	asr	r1, r8, #31
-	cmp	r1, #0
-	movlt	r7, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r7, [r0]
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	movlt	r3, r7
-	str	r3, [r0, #4]
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	movlt	lr, r3
-	ldr	r3, [sp, #64]           @ 4-byte Reload
-	cmp	r1, #0
-	str	lr, [r0, #8]
-	movlt	r4, r3
-	ldr	r3, [sp, #80]           @ 4-byte Reload
-	str	r4, [r0, #12]
-	movlt	r2, r3
-	ldr	r3, [sp, #68]           @ 4-byte Reload
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	movlt	r12, r2
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r12, [r0, #20]
-	movlt	r5, r2
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	str	r5, [r0, #24]
-	movlt	r6, r2
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	str	r6, [r0, #28]
-	movlt	r9, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r9, [r0, #32]
-	movlt	r10, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	r10, [r0, #36]
-	movlt	r11, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	str	r11, [r0, #40]
-	movlt	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	str	r3, [r0, #44]
-	movlt	r2, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r2, [r0, #48]
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	movlt	r2, r1
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r2, [r0, #52]
-	movlt	r8, r1
-	str	r8, [r0, #56]
-	add	sp, sp, #4
-	add	sp, sp, #2048
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end227:
-	.size	mcl_fp_montNF15L, .Lfunc_end227-mcl_fp_montNF15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed15L
-	.align	2
-	.type	mcl_fp_montRed15L,%function
-mcl_fp_montRed15L:                      @ @mcl_fp_montRed15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#148
-	sub	sp, sp, #148
-	.pad	#1024
-	sub	sp, sp, #1024
-	mov	r3, r2
-	str	r0, [sp, #192]          @ 4-byte Spill
-	ldr	r2, [r1, #4]
-	ldr	r7, [r1]
-	ldr	r0, [r3]
-	str	r3, [sp, #200]          @ 4-byte Spill
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #188]          @ 4-byte Spill
-	ldr	r0, [r3, #4]
-	str	r2, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r1, #12]
-	str	r0, [sp, #184]          @ 4-byte Spill
-	ldr	r0, [r3, #8]
-	str	r2, [sp, #100]          @ 4-byte Spill
-	str	r0, [sp, #180]          @ 4-byte Spill
-	ldr	r0, [r3, #12]
-	str	r0, [sp, #164]          @ 4-byte Spill
-	ldr	r0, [r3, #16]
-	str	r0, [sp, #168]          @ 4-byte Spill
-	ldr	r0, [r3, #20]
-	str	r0, [sp, #172]          @ 4-byte Spill
-	ldr	r0, [r3, #24]
-	str	r0, [sp, #176]          @ 4-byte Spill
-	ldr	r0, [r3, #-4]
-	str	r0, [sp, #204]          @ 4-byte Spill
-	mul	r2, r7, r0
-	ldr	r0, [r3, #28]
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #144]          @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #148]          @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #152]          @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #156]          @ 4-byte Spill
-	ldr	r0, [r3, #56]
-	str	r0, [sp, #160]          @ 4-byte Spill
-	ldr	r0, [r1, #96]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [r1, #100]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [r1, #104]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [r1, #108]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [r1, #112]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [r1, #116]
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [r1, #64]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r1, #68]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r1, #72]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r1, #76]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r1, #80]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r1, #88]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r1, #92]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r1, #84]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #28]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r1, #24]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r1, #20]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	mov	r1, r3
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, sp, #1104
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1164]
-	ldr	r9, [sp, #1104]
-	ldr	r1, [sp, #1112]
-	ldr	r2, [sp, #1116]
-	ldr	r3, [sp, #1120]
-	ldr	r12, [sp, #1124]
-	ldr	lr, [sp, #1128]
-	ldr	r4, [sp, #1132]
-	ldr	r5, [sp, #1136]
-	ldr	r6, [sp, #1140]
-	ldr	r8, [sp, #1144]
-	ldr	r10, [sp, #1148]
-	ldr	r11, [sp, #1152]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1160]
-	adds	r7, r7, r9
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1156]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1108]
-	adcs	r9, r7, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #200]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	mul	r2, r9, r0
-	add	r0, sp, #1040
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1100]
-	ldr	r4, [sp, #1040]
-	ldr	r1, [sp, #1048]
-	ldr	r2, [sp, #1052]
-	ldr	r8, [sp, #1056]
-	ldr	r3, [sp, #1060]
-	ldr	r10, [sp, #1064]
-	ldr	r11, [sp, #1068]
-	ldr	r12, [sp, #1072]
-	ldr	r7, [sp, #1076]
-	ldr	r6, [sp, #1080]
-	ldr	lr, [sp, #1084]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1096]
-	adds	r4, r9, r4
-	ldr	r4, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1092]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1088]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	adcs	r9, r4, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r4, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r9, r4
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	mov	r11, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	add	r0, sp, #976
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #1036]
-	add	lr, sp, #1000
-	add	r10, sp, #976
-	ldr	r5, [sp, #1020]
-	ldr	r6, [sp, #1016]
-	ldr	r7, [sp, #1012]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1032]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1028]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1024]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	lr, {r3, r12, lr}
-	ldr	r9, [sp, #996]
-	ldr	r2, [sp, #992]
-	ldm	r10, {r0, r1, r8, r10}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r1
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r1, r4
-	ldr	r1, [sp, #200]          @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, sp, #912
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #972]
-	ldr	r4, [sp, #912]
-	add	lr, sp, #916
-	ldr	r11, [sp, #960]
-	ldr	r5, [sp, #956]
-	ldr	r6, [sp, #952]
-	ldr	r7, [sp, #948]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #968]
-	adds	r4, r8, r4
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #964]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r9, r10, r12, lr}
-	ldr	r4, [sp, #108]          @ 4-byte Reload
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	str	r4, [sp, #12]           @ 4-byte Spill
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #200]          @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #204]          @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	mul	r2, r4, r5
-	adcs	r0, r0, r11
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #848
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #908]
-	add	r10, sp, #872
-	add	lr, sp, #848
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #904]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #900]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #896]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #12]          @ 4-byte Reload
-	adds	r0, r11, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mov	r11, r1
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r1, r5
-	mov	r1, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #784
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #844]
-	ldr	r4, [sp, #784]
-	add	r10, sp, #788
-	ldr	lr, [sp, #832]
-	ldr	r5, [sp, #828]
-	ldr	r6, [sp, #824]
-	ldr	r7, [sp, #820]
-	ldr	r12, [sp, #816]
-	ldr	r3, [sp, #812]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #840]
-	adds	r4, r11, r4
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #836]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r0, r1, r2, r8, r9, r10}
-	ldr	r4, [sp, #108]          @ 4-byte Reload
-	adcs	r11, r4, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r4, [sp, #204]          @ 4-byte Reload
-	str	r11, [sp, #20]          @ 4-byte Spill
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #200]          @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #720
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #780]
-	add	r10, sp, #744
-	add	lr, sp, #720
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #776]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #772]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #768]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #764]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	adds	r0, r11, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	mov	r11, r1
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r1, r4
-	mov	r1, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #656
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #716]
-	ldr	r4, [sp, #656]
-	add	r10, sp, #660
-	ldr	lr, [sp, #704]
-	ldr	r5, [sp, #700]
-	ldr	r6, [sp, #696]
-	ldr	r7, [sp, #692]
-	ldr	r12, [sp, #688]
-	ldr	r3, [sp, #684]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #712]
-	adds	r4, r11, r4
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r0, r1, r2, r8, r9, r10}
-	ldr	r4, [sp, #108]          @ 4-byte Reload
-	adcs	r11, r4, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r4, [sp, #200]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	mul	r2, r11, r0
-	add	r0, sp, #592
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #652]
-	add	r10, sp, #616
-	add	lr, sp, #592
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #648]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #644]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #640]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #204]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mul	r2, r11, r5
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #528
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #588]
-	ldr	r4, [sp, #528]
-	add	r10, sp, #532
-	ldr	lr, [sp, #572]
-	ldr	r6, [sp, #568]
-	ldr	r7, [sp, #564]
-	ldr	r12, [sp, #560]
-	ldr	r3, [sp, #556]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #584]
-	adds	r4, r11, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #576]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r0, r1, r2, r8, r9, r10}
-	ldr	r4, [sp, #108]          @ 4-byte Reload
-	adcs	r11, r4, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #200]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r9
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #464
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #524]
-	add	r10, sp, #488
-	add	lr, sp, #464
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #520]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #516]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #512]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r7, r8, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #204]          @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r11, r5
-	adcs	r0, r0, r6
-	mov	r6, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #400
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #460]
-	ldr	r4, [sp, #400]
-	add	r10, sp, #404
-	ldr	lr, [sp, #440]
-	ldr	r7, [sp, #436]
-	ldr	r12, [sp, #432]
-	ldr	r3, [sp, #428]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #456]
-	adds	r4, r11, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #452]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #448]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #444]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r10, {r0, r1, r2, r8, r9, r10}
-	ldr	r4, [sp, #108]          @ 4-byte Reload
-	adcs	r11, r4, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #336
-	bl	.LmulPv480x32(PLT)
-	ldr	r0, [sp, #396]
-	add	r10, sp, #360
-	add	lr, sp, #336
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #392]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #388]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #384]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r7, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #204]          @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	mul	r2, r11, r6
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	add	r0, sp, #272
-	bl	.LmulPv480x32(PLT)
-	add	r5, sp, #272
-	add	lr, sp, #288
-	ldm	r5, {r0, r1, r3, r5}
-	ldr	r9, [sp, #332]
-	ldr	r8, [sp, #328]
-	ldr	r7, [sp, #312]
-	adds	r0, r11, r0
-	ldr	r11, [sp, #324]
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	mul	r0, r10, r6
-	ldr	r6, [sp, #316]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #320]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r4, [sp, #196]          @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #200]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r4, r0, r2
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r8, r0, r9
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	add	r0, sp, #208
-	bl	.LmulPv480x32(PLT)
-	add	r3, sp, #208
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r10, r0
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	lr, r0, r1
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	lr, [sp, #80]           @ 4-byte Spill
-	adcs	r2, r0, r2
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	str	r2, [sp, #84]           @ 4-byte Spill
-	adcs	r3, r0, r3
-	ldr	r0, [sp, #224]
-	str	r3, [sp, #88]           @ 4-byte Spill
-	adcs	r7, r1, r0
-	ldr	r0, [sp, #228]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r7, [sp, #92]           @ 4-byte Spill
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #232]
-	str	r4, [sp, #96]           @ 4-byte Spill
-	adcs	r5, r1, r0
-	ldr	r0, [sp, #236]
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r5, [sp, #100]          @ 4-byte Spill
-	adcs	r6, r1, r0
-	ldr	r0, [sp, #240]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r6, [sp, #104]          @ 4-byte Spill
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #244]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r11, [sp, #108]         @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #196]          @ 4-byte Reload
-	str	r0, [sp, #200]          @ 4-byte Spill
-	ldr	r0, [sp, #248]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #252]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #256]
-	adcs	r10, r1, r0
-	ldr	r0, [sp, #260]
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r10, [sp, #124]         @ 4-byte Spill
-	adcs	r9, r1, r0
-	ldr	r0, [sp, #264]
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r9, [sp, #128]          @ 4-byte Spill
-	adcs	r8, r8, r0
-	ldr	r0, [sp, #268]
-	adcs	r12, r1, r0
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	ldr	r1, [sp, #184]          @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #188]          @ 4-byte Reload
-	subs	r0, lr, r0
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #180]          @ 4-byte Reload
-	sbcs	r2, r3, r2
-	ldr	r3, [sp, #164]          @ 4-byte Reload
-	sbcs	r3, r7, r3
-	ldr	r7, [sp, #168]          @ 4-byte Reload
-	sbcs	lr, r4, r7
-	ldr	r4, [sp, #172]          @ 4-byte Reload
-	ldr	r7, [sp, #136]          @ 4-byte Reload
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #176]          @ 4-byte Reload
-	sbcs	r5, r6, r5
-	ldr	r6, [sp, #132]          @ 4-byte Reload
-	sbcs	r6, r11, r6
-	ldr	r11, [sp, #200]         @ 4-byte Reload
-	str	r6, [sp, #172]          @ 4-byte Spill
-	sbcs	r6, r11, r7
-	ldr	r7, [sp, #140]          @ 4-byte Reload
-	ldr	r11, [sp, #204]         @ 4-byte Reload
-	str	r6, [sp, #176]          @ 4-byte Spill
-	ldr	r6, [sp, #196]          @ 4-byte Reload
-	sbcs	r6, r6, r7
-	ldr	r7, [sp, #144]          @ 4-byte Reload
-	str	r6, [sp, #180]          @ 4-byte Spill
-	sbcs	r6, r11, r7
-	ldr	r7, [sp, #148]          @ 4-byte Reload
-	str	r6, [sp, #184]          @ 4-byte Spill
-	sbcs	r6, r10, r7
-	ldr	r7, [sp, #152]          @ 4-byte Reload
-	mov	r10, r8
-	str	r6, [sp, #188]          @ 4-byte Spill
-	sbcs	r6, r9, r7
-	ldr	r7, [sp, #156]          @ 4-byte Reload
-	sbcs	r11, r8, r7
-	ldr	r7, [sp, #160]          @ 4-byte Reload
-	mov	r8, r12
-	sbcs	r9, r12, r7
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	sbc	r7, r7, #0
-	ands	r12, r7, #1
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	movne	r0, r7
-	ldr	r7, [sp, #192]          @ 4-byte Reload
-	str	r0, [r7]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	movne	r1, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	str	r1, [r7, #4]
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	movne	r2, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	cmp	r12, #0
-	str	r2, [r7, #8]
-	movne	r3, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r3, [r7, #12]
-	movne	lr, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	lr, [r7, #16]
-	movne	r4, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	cmp	r12, #0
-	str	r4, [r7, #20]
-	movne	r5, r0
-	ldr	r0, [sp, #172]          @ 4-byte Reload
-	movne	r0, r1
-	str	r5, [r7, #24]
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	str	r0, [r7, #28]
-	ldr	r0, [sp, #200]          @ 4-byte Reload
-	movne	r1, r0
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	cmp	r12, #0
-	str	r1, [r7, #32]
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	movne	r1, r0
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	str	r1, [r7, #36]
-	ldr	r1, [sp, #184]          @ 4-byte Reload
-	movne	r1, r0
-	ldr	r0, [sp, #188]          @ 4-byte Reload
-	str	r1, [r7, #40]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	movne	r0, r1
-	cmp	r12, #0
-	str	r0, [r7, #44]
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	movne	r11, r10
-	movne	r9, r8
-	movne	r6, r0
-	str	r6, [r7, #48]
-	str	r11, [r7, #52]
-	str	r9, [r7, #56]
-	add	sp, sp, #148
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end228:
-	.size	mcl_fp_montRed15L, .Lfunc_end228-mcl_fp_montRed15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre15L
-	.align	2
-	.type	mcl_fp_addPre15L,%function
-mcl_fp_addPre15L:                       @ @mcl_fp_addPre15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#52
-	sub	sp, sp, #52
-	ldm	r1, {r3, r7, r11}
-	ldr	r10, [r2]
-	ldr	r5, [r2, #16]
-	ldr	r6, [r2, #4]
-	ldr	r4, [r2, #8]
-	ldr	r12, [r2, #12]
-	ldr	r8, [r1, #12]
-	ldr	r9, [r1, #56]
-	adds	lr, r10, r3
-	ldr	r3, [r2, #32]
-	str	r5, [sp, #8]            @ 4-byte Spill
-	ldr	r5, [r2, #20]
-	ldr	r10, [r1, #44]
-	adcs	r6, r6, r7
-	adcs	r4, r4, r11
-	ldr	r11, [r1, #40]
-	adcs	r7, r12, r8
-	add	r12, r1, #16
-	ldr	r8, [r1, #52]
-	str	r3, [sp, #20]           @ 4-byte Spill
-	ldr	r3, [r2, #36]
-	str	r5, [sp, #12]           @ 4-byte Spill
-	ldr	r5, [r2, #24]
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [r2, #40]
-	str	r5, [sp, #16]           @ 4-byte Spill
-	ldr	r5, [r2, #28]
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [r2, #44]
-	str	r5, [sp, #24]           @ 4-byte Spill
-	ldr	r5, [r1, #32]
-	str	r3, [sp, #36]           @ 4-byte Spill
-	ldr	r3, [r2, #48]
-	str	r3, [sp, #40]           @ 4-byte Spill
-	ldr	r3, [r2, #52]
-	ldr	r2, [r2, #56]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #36]
-	str	r3, [sp, #44]           @ 4-byte Spill
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldm	r12, {r1, r2, r3, r12}
-	str	lr, [r0]
-	str	r6, [r0, #4]
-	ldr	r6, [sp, #8]            @ 4-byte Reload
-	str	r4, [r0, #8]
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	adcs	r1, r6, r1
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	adcs	r1, r1, r3
-	ldr	r3, [sp]                @ 4-byte Reload
-	adcs	r2, r2, r12
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	add	r12, r0, #32
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r1, r5
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	adcs	r2, r2, r3
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	adcs	r3, r3, r11
-	adcs	r7, r7, r10
-	adcs	r6, r6, r5
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	stm	r12, {r1, r2, r3, r7}
-	str	r6, [r0, #48]
-	adcs	r5, r5, r8
-	adcs	r4, r4, r9
-	str	r5, [r0, #52]
-	str	r4, [r0, #56]
-	mov	r0, #0
-	adc	r0, r0, #0
-	add	sp, sp, #52
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end229:
-	.size	mcl_fp_addPre15L, .Lfunc_end229-mcl_fp_addPre15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre15L
-	.align	2
-	.type	mcl_fp_subPre15L,%function
-mcl_fp_subPre15L:                       @ @mcl_fp_subPre15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#56
-	sub	sp, sp, #56
-	ldm	r2, {r3, r11}
-	ldr	r7, [r1]
-	ldr	r5, [r2, #8]
-	ldr	r6, [r2, #12]
-	ldmib	r1, {r4, r12, lr}
-	ldr	r8, [r1, #32]
-	ldr	r10, [r1, #52]
-	subs	r3, r7, r3
-	ldr	r7, [r2, #24]
-	str	r3, [sp, #24]           @ 4-byte Spill
-	ldr	r3, [r2, #32]
-	sbcs	r4, r4, r11
-	sbcs	r5, r12, r5
-	add	r12, r1, #16
-	sbcs	r11, lr, r6
-	ldr	r6, [r2, #20]
-	ldr	lr, [r2, #16]
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [r2, #36]
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [r2, #40]
-	str	r3, [sp, #36]           @ 4-byte Spill
-	ldr	r3, [r2, #44]
-	str	r3, [sp, #40]           @ 4-byte Spill
-	ldr	r3, [r2, #48]
-	str	r3, [sp, #44]           @ 4-byte Spill
-	ldr	r3, [r2, #52]
-	str	r3, [sp, #48]           @ 4-byte Spill
-	ldr	r3, [r2, #56]
-	str	r3, [sp, #52]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	ldr	r2, [r1, #36]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #40]
-	str	r3, [sp, #20]           @ 4-byte Spill
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #44]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldm	r12, {r1, r2, r3, r12}
-	ldr	r9, [sp, #24]           @ 4-byte Reload
-	sbcs	r1, r1, lr
-	str	r9, [r0]
-	stmib	r0, {r4, r5}
-	str	r11, [r0, #12]
-	sbcs	r2, r2, r6
-	str	r1, [r0, #16]
-	ldr	r6, [sp, #44]           @ 4-byte Reload
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	sbcs	r1, r3, r7
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	sbcs	r2, r12, r2
-	sbcs	r12, r8, r1
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp]                @ 4-byte Reload
-	str	r12, [r0, #32]
-	sbcs	r2, r1, r2
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	sbcs	r3, r1, r3
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	sbcs	r7, r1, r7
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	sbcs	r6, r1, r6
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	sbcs	r5, r10, r5
-	sbcs	r4, r1, r4
-	add	r1, r0, #36
-	stm	r1, {r2, r3, r7}
-	str	r6, [r0, #48]
-	str	r5, [r0, #52]
-	str	r4, [r0, #56]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	add	sp, sp, #56
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end230:
-	.size	mcl_fp_subPre15L, .Lfunc_end230-mcl_fp_subPre15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_15L
-	.align	2
-	.type	mcl_fp_shr1_15L,%function
-mcl_fp_shr1_15L:                        @ @mcl_fp_shr1_15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#24
-	sub	sp, sp, #24
-	ldmib	r1, {r2, r3, r4, r5, r6, r10}
-	ldr	r7, [r1]
-	ldr	r11, [r1, #52]
-	ldr	r8, [r1, #28]
-	ldr	lr, [r1, #32]
-	ldr	r12, [r1, #36]
-	ldr	r9, [r1, #44]
-	str	r7, [sp, #4]            @ 4-byte Spill
-	lsr	r7, r2, #1
-	str	r11, [sp, #16]          @ 4-byte Spill
-	orr	r7, r7, r3, lsl #31
-	str	r7, [sp]                @ 4-byte Spill
-	ldr	r7, [r1, #40]
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [r1, #48]
-	ldr	r1, [r1, #56]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	lsr	r1, r4, #1
-	lsrs	r4, r4, #1
-	str	r7, [sp, #12]           @ 4-byte Spill
-	rrx	r3, r3
-	lsrs	r2, r2, #1
-	orr	r1, r1, r5, lsl #31
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	rrx	r2, r2
-	str	r2, [r0]
-	ldr	r2, [sp]                @ 4-byte Reload
-	stmib	r0, {r2, r3}
-	str	r1, [r0, #12]
-	lsrs	r1, r6, #1
-	lsr	r2, r12, #1
-	rrx	r1, r5
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	ldr	r4, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	lsr	r1, r6, #1
-	orr	r1, r1, r10, lsl #31
-	str	r1, [r0, #20]
-	lsrs	r1, r8, #1
-	rrx	r1, r10
-	orr	r2, r2, r7, lsl #31
-	str	r1, [r0, #24]
-	lsr	r1, r8, #1
-	orr	r1, r1, lr, lsl #31
-	str	r1, [r0, #28]
-	lsrs	r1, r12, #1
-	add	r12, r0, #32
-	rrx	r1, lr
-	lsrs	r3, r9, #1
-	rrx	r3, r7
-	lsrs	r6, r5, #1
-	lsr	r7, r9, #1
-	lsr	r5, r5, #1
-	orr	r7, r7, r4, lsl #31
-	rrx	r6, r4
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	stm	r12, {r1, r2, r3, r7}
-	str	r6, [r0, #48]
-	orr	r5, r5, r4, lsl #31
-	lsr	r4, r4, #1
-	str	r5, [r0, #52]
-	str	r4, [r0, #56]
-	add	sp, sp, #24
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end231:
-	.size	mcl_fp_shr1_15L, .Lfunc_end231-mcl_fp_shr1_15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add15L
-	.align	2
-	.type	mcl_fp_add15L,%function
-mcl_fp_add15L:                          @ @mcl_fp_add15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#60
-	sub	sp, sp, #60
-	ldr	r9, [r1]
-	ldmib	r1, {r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r7}
-	adds	r10, r4, r9
-	ldr	r4, [r1, #24]
-	adcs	r11, r5, r8
-	ldr	r5, [r1, #20]
-	mov	r8, r10
-	adcs	r6, r6, lr
-	mov	lr, r11
-	str	r8, [r0]
-	adcs	r9, r7, r12
-	str	r6, [sp, #40]           @ 4-byte Spill
-	ldr	r6, [r1, #16]
-	ldr	r7, [r2, #16]
-	str	lr, [r0, #4]
-	str	r9, [sp, #8]            @ 4-byte Spill
-	adcs	r7, r7, r6
-	ldr	r6, [r2, #48]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	adcs	r7, r7, r5
-	ldr	r5, [r2, #28]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	ldr	r10, [sp, #32]          @ 4-byte Reload
-	adcs	r7, r7, r4
-	ldr	r4, [r2, #32]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r1, #28]
-	adcs	r7, r5, r7
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [r1, #32]
-	ldr	r11, [sp, #12]          @ 4-byte Reload
-	adcs	r7, r4, r7
-	ldr	r4, [r2, #36]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r1, #36]
-	adcs	r7, r4, r7
-	ldr	r4, [r2, #40]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r1, #40]
-	adcs	r7, r4, r7
-	ldr	r4, [r2, #44]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r1, #44]
-	adcs	r5, r4, r7
-	ldr	r7, [r1, #48]
-	ldr	r4, [sp, #40]           @ 4-byte Reload
-	str	r5, [sp, #28]           @ 4-byte Spill
-	adcs	r12, r6, r7
-	ldr	r7, [r1, #52]
-	ldr	r6, [r2, #52]
-	ldr	r1, [r1, #56]
-	ldr	r2, [r2, #56]
-	str	r4, [r0, #8]
-	str	r9, [r0, #12]
-	ldr	r9, [sp, #36]           @ 4-byte Reload
-	adcs	r6, r6, r7
-	str	r9, [r0, #16]
-	str	r10, [r0, #20]
-	add	r7, r0, #40
-	adcs	r2, r2, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r6, [sp, #24]           @ 4-byte Spill
-	str	r2, [sp, #20]           @ 4-byte Spill
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r11, [r0, #28]
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r1, [r0, #36]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	stm	r7, {r1, r5, r12}
-	str	r6, [r0, #52]
-	str	r2, [r0, #56]
-	mov	r2, #0
-	adc	r1, r2, #0
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r3, {r6, r7}
-	ldr	r1, [r3, #8]
-	ldr	r2, [r3, #12]
-	subs	r5, r8, r6
-	sbcs	r7, lr, r7
-	str	r5, [sp, #4]            @ 4-byte Spill
-	sbcs	r1, r4, r1
-	str	r7, [sp]                @ 4-byte Spill
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	sbcs	r1, r1, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	sbcs	r9, r9, r1
-	ldr	r1, [r3, #20]
-	sbcs	r1, r10, r1
-	add	r10, r3, #32
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	sbcs	r1, r2, r1
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	sbcs	r11, r11, r1
-	ldm	r10, {r1, r2, r6, r10}
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	ldr	r8, [r3, #48]
-	ldr	r7, [r3, #52]
-	ldr	r3, [r3, #56]
-	sbcs	r1, r5, r1
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	sbcs	r4, r1, r2
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbcs	r2, r1, r6
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	sbcs	lr, r1, r10
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	sbcs	r6, r12, r8
-	sbcs	r5, r1, r7
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	sbcs	r1, r1, r3
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	sbc	r3, r3, #0
-	tst	r3, #1
-	bne	.LBB232_2
-@ BB#1:                                 @ %nocarry
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	str	r3, [r0]
-	ldr	r3, [sp]                @ 4-byte Reload
-	str	r3, [r0, #4]
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	str	r3, [r0, #8]
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	str	r3, [r0, #12]
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	str	r9, [r0, #16]
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	str	r3, [r0, #24]
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	str	r11, [r0, #28]
-	str	r3, [r0, #32]
-	str	r4, [r0, #36]
-	str	r2, [r0, #40]
-	str	lr, [r0, #44]
-	str	r6, [r0, #48]
-	str	r5, [r0, #52]
-	str	r1, [r0, #56]
-.LBB232_2:                              @ %carry
-	add	sp, sp, #60
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end232:
-	.size	mcl_fp_add15L, .Lfunc_end232-mcl_fp_add15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF15L
-	.align	2
-	.type	mcl_fp_addNF15L,%function
-mcl_fp_addNF15L:                        @ @mcl_fp_addNF15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#92
-	sub	sp, sp, #92
-	ldr	r9, [r1]
-	ldmib	r1, {r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r7}
-	add	r11, r3, #32
-	adds	r10, r4, r9
-	ldr	r4, [r1, #24]
-	adcs	r9, r5, r8
-	ldr	r5, [r1, #20]
-	str	r10, [sp, #20]          @ 4-byte Spill
-	adcs	lr, r6, lr
-	ldr	r6, [r1, #16]
-	str	r9, [sp, #24]           @ 4-byte Spill
-	adcs	r8, r7, r12
-	ldr	r7, [r2, #16]
-	str	lr, [sp, #28]           @ 4-byte Spill
-	str	r8, [sp, #32]           @ 4-byte Spill
-	adcs	r7, r7, r6
-	ldr	r6, [r2, #28]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	adcs	r7, r7, r5
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	adcs	r7, r7, r4
-	str	r7, [sp, #64]           @ 4-byte Spill
-	ldr	r7, [r1, #28]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #32]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r1, #32]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #36]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r1, #36]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #40]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r1, #40]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #44]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r1, #44]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #48]
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r1, #48]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #52]
-	ldr	r2, [r2, #56]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r1, #52]
-	ldr	r1, [r1, #56]
-	adcs	r7, r6, r7
-	adc	r1, r2, r1
-	str	r7, [sp, #84]           @ 4-byte Spill
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldmib	r3, {r1, r5, r7}
-	ldr	r2, [r3, #16]
-	ldr	r4, [r3]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r3, #20]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r3, #24]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r3, #28]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	subs	r2, r10, r4
-	sbcs	r12, r9, r1
-	ldm	r11, {r9, r10, r11}
-	ldr	r1, [r3, #44]
-	ldr	r4, [sp, #36]           @ 4-byte Reload
-	sbcs	lr, lr, r5
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	sbcs	r6, r8, r7
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp]                @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [r3, #52]
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [r3, #56]
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbcs	r3, r1, r3
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	sbcs	r4, r1, r4
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r5, r5, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	sbcs	r8, r7, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	ldr	r7, [sp]                @ 4-byte Reload
-	sbcs	r9, r1, r9
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	sbcs	r10, r1, r10
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	sbcs	r1, r1, r11
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	sbc	r7, r1, r7
-	asr	r1, r7, #31
-	cmp	r1, #0
-	movlt	r2, r11
-	str	r2, [r0]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	movlt	r12, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r12, [r0, #4]
-	movlt	lr, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	cmp	r1, #0
-	str	lr, [r0, #8]
-	movlt	r6, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r6, [r0, #12]
-	movlt	r3, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r3, [r0, #16]
-	ldr	r3, [sp, #16]           @ 4-byte Reload
-	movlt	r4, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r4, [r0, #20]
-	movlt	r5, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r5, [r0, #24]
-	movlt	r8, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r8, [r0, #28]
-	movlt	r9, r2
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r9, [r0, #32]
-	movlt	r10, r2
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	str	r10, [r0, #36]
-	movlt	r3, r2
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r3, [r0, #40]
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	movlt	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r3, [r0, #44]
-	movlt	r2, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r2, [r0, #48]
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	movlt	r2, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r2, [r0, #52]
-	movlt	r7, r1
-	str	r7, [r0, #56]
-	add	sp, sp, #92
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end233:
-	.size	mcl_fp_addNF15L, .Lfunc_end233-mcl_fp_addNF15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub15L
-	.align	2
-	.type	mcl_fp_sub15L,%function
-mcl_fp_sub15L:                          @ @mcl_fp_sub15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#64
-	sub	sp, sp, #64
-	ldr	r9, [r2]
-	ldmib	r2, {r8, lr}
-	ldr	r5, [r1]
-	ldr	r12, [r2, #12]
-	ldmib	r1, {r4, r6, r7}
-	subs	r5, r5, r9
-	sbcs	r4, r4, r8
-	str	r5, [sp, #48]           @ 4-byte Spill
-	ldr	r5, [r2, #24]
-	sbcs	r6, r6, lr
-	str	r4, [sp, #60]           @ 4-byte Spill
-	ldr	r4, [r2, #20]
-	sbcs	r7, r7, r12
-	str	r6, [sp, #56]           @ 4-byte Spill
-	ldr	r6, [r2, #16]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r1, #16]
-	sbcs	r9, r7, r6
-	ldr	r7, [r1, #20]
-	ldr	r6, [r1, #28]
-	str	r9, [sp, #40]           @ 4-byte Spill
-	sbcs	r7, r7, r4
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r1, #24]
-	sbcs	r5, r7, r5
-	ldr	r7, [r2, #28]
-	sbcs	r10, r6, r7
-	ldr	r7, [r2, #32]
-	ldr	r6, [r1, #32]
-	str	r10, [sp, #36]          @ 4-byte Spill
-	sbcs	r11, r6, r7
-	ldr	r7, [r2, #36]
-	ldr	r6, [r1, #36]
-	str	r11, [sp, #32]          @ 4-byte Spill
-	sbcs	lr, r6, r7
-	ldr	r7, [r2, #40]
-	ldr	r6, [r1, #40]
-	str	lr, [sp, #28]           @ 4-byte Spill
-	sbcs	r12, r6, r7
-	ldr	r7, [r2, #44]
-	ldr	r6, [r1, #44]
-	str	r12, [sp, #24]          @ 4-byte Spill
-	sbcs	r4, r6, r7
-	ldr	r6, [r2, #48]
-	ldr	r7, [r1, #48]
-	sbcs	r8, r7, r6
-	ldr	r6, [r2, #52]
-	ldr	r7, [r1, #52]
-	ldr	r2, [r2, #56]
-	ldr	r1, [r1, #56]
-	sbcs	r6, r7, r6
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	sbcs	r2, r1, r2
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r2, [sp, #20]           @ 4-byte Spill
-	str	r7, [r0]
-	str	r1, [r0, #4]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r1, [r0, #8]
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r1, [r0, #12]
-	str	r9, [r0, #16]
-	mov	r9, r6
-	mov	r6, r5
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	mov	r1, r4
-	str	r5, [r0, #20]
-	str	r6, [r0, #24]
-	str	r10, [r0, #28]
-	str	r11, [r0, #32]
-	str	lr, [r0, #36]
-	str	r12, [r0, #40]
-	add	r12, r0, #44
-	stm	r12, {r1, r8, r9}
-	str	r2, [r0, #56]
-	mov	r2, #0
-	sbc	r2, r2, #0
-	tst	r2, #1
-	beq	.LBB234_2
-@ BB#1:                                 @ %carry
-	ldr	r2, [r3, #56]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldmib	r3, {r2, lr}
-	ldr	r4, [r3, #16]
-	ldr	r12, [r3, #12]
-	str	r4, [sp]                @ 4-byte Spill
-	ldr	r4, [r3, #20]
-	str	r4, [sp, #4]            @ 4-byte Spill
-	ldr	r4, [r3, #24]
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [r3, #28]
-	str	r4, [sp, #12]           @ 4-byte Spill
-	ldr	r4, [r3]
-	adds	r4, r4, r7
-	ldr	r7, [r3, #52]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adcs	r11, r2, r7
-	ldr	r2, [r3, #48]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	adcs	r7, lr, r2
-	ldr	r2, [r3, #44]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	adcs	r2, r12, r2
-	add	r12, r3, #32
-	ldm	r12, {r3, r10, r12}
-	stm	r0, {r4, r11}
-	str	r7, [r0, #8]
-	str	r2, [r0, #12]
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	ldr	r4, [sp]                @ 4-byte Reload
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	adcs	r4, r4, r7
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	adcs	r2, r2, r5
-	str	r4, [r0, #16]
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	adcs	r4, r7, r6
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r4, [r0, #24]
-	adcs	r2, r7, r2
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	adcs	lr, r3, r2
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	lr, [r0, #32]
-	adcs	r3, r10, r3
-	adcs	r7, r12, r7
-	str	r3, [r0, #36]
-	adcs	r6, r2, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r7, [r0, #40]
-	str	r6, [r0, #44]
-	adcs	r5, r1, r8
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r5, [r0, #48]
-	adcs	r4, r1, r9
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r4, [r0, #52]
-	adc	r1, r2, r1
-	str	r1, [r0, #56]
-.LBB234_2:                              @ %nocarry
-	add	sp, sp, #64
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end234:
-	.size	mcl_fp_sub15L, .Lfunc_end234-mcl_fp_sub15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF15L
-	.align	2
-	.type	mcl_fp_subNF15L,%function
-mcl_fp_subNF15L:                        @ @mcl_fp_subNF15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#84
-	sub	sp, sp, #84
-	mov	r12, r0
-	ldr	r0, [r2, #32]
-	add	r9, r2, #8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r2, #36]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r2, #40]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r2, #44]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r2, #48]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r2, #52]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r2, #56]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r2, {r10, r11}
-	ldm	r9, {r5, r6, r7, r9}
-	ldr	r0, [r2, #28]
-	ldr	r8, [r2, #24]
-	ldr	r2, [r1]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldmib	r1, {r0, lr}
-	ldr	r4, [r1, #12]
-	subs	r2, r2, r10
-	add	r10, r3, #12
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #36]
-	sbcs	r11, r0, r11
-	ldr	r0, [r1, #32]
-	sbcs	lr, lr, r5
-	ldr	r5, [r1, #28]
-	str	r11, [sp]               @ 4-byte Spill
-	sbcs	r6, r4, r6
-	str	r6, [sp, #48]           @ 4-byte Spill
-	ldr	r6, [r1, #16]
-	sbcs	r7, r6, r7
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r1, #24]
-	ldr	r1, [r1, #20]
-	sbcs	r1, r1, r9
-	str	r1, [sp, #52]           @ 4-byte Spill
-	sbcs	r1, r7, r8
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	sbcs	r1, r5, r1
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	sbcs	r0, r2, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	sbc	r0, r1, r0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r3, #56]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r3, {r2, r5, r7}
-	ldm	r10, {r6, r9, r10}
-	ldr	r8, [sp, #8]            @ 4-byte Reload
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	ldr	r0, [r3, #28]
-	ldr	r1, [r3, #24]
-	adds	r2, r8, r2
-	adcs	r3, r11, r5
-	mov	r11, lr
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	adcs	lr, r11, r7
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r4, r6
-	ldr	r6, [sp, #52]           @ 4-byte Reload
-	adcs	r5, r5, r9
-	adcs	r6, r6, r10
-	adcs	r7, r7, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r9, r1, r0
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r10, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r1, r0, r1
-	str	r1, [sp, #32]           @ 4-byte Spill
-	asr	r1, r0, #31
-	ldr	r0, [sp]                @ 4-byte Reload
-	cmp	r1, #0
-	movge	r2, r8
-	movge	lr, r11
-	str	r2, [r12]
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	movge	r3, r0
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r12, #4]
-	str	lr, [r12, #8]
-	movge	r4, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r4, [r12, #12]
-	movge	r5, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	str	r5, [r12, #16]
-	movge	r6, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r6, [r12, #20]
-	movge	r7, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r7, [r12, #24]
-	movge	r9, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r9, [r12, #28]
-	movge	r10, r0
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r10, [r12, #32]
-	movge	r2, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r2, [r12, #36]
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	movge	r2, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	str	r2, [r12, #40]
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	movge	r2, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	cmp	r1, #0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r2, [r12, #44]
-	movge	r1, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	str	r1, [r12, #48]
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	movge	r1, r0
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	str	r1, [r12, #52]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	movge	r0, r1
-	str	r0, [r12, #56]
-	add	sp, sp, #84
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end235:
-	.size	mcl_fp_subNF15L, .Lfunc_end235-mcl_fp_subNF15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add15L
-	.align	2
-	.type	mcl_fpDbl_add15L,%function
-mcl_fpDbl_add15L:                       @ @mcl_fpDbl_add15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#184
-	sub	sp, sp, #184
-	ldm	r1, {r7, r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r10}
-	adds	r4, r4, r7
-	str	r4, [sp, #100]          @ 4-byte Spill
-	ldr	r4, [r2, #96]
-	str	r4, [sp, #148]          @ 4-byte Spill
-	ldr	r4, [r2, #100]
-	str	r4, [sp, #164]          @ 4-byte Spill
-	ldr	r4, [r2, #104]
-	str	r4, [sp, #168]          @ 4-byte Spill
-	ldr	r4, [r2, #108]
-	str	r4, [sp, #172]          @ 4-byte Spill
-	ldr	r4, [r2, #112]
-	str	r4, [sp, #176]          @ 4-byte Spill
-	ldr	r4, [r2, #116]
-	str	r4, [sp, #180]          @ 4-byte Spill
-	adcs	r4, r5, r8
-	adcs	r7, r6, lr
-	str	r4, [sp, #68]           @ 4-byte Spill
-	add	lr, r1, #16
-	str	r7, [sp, #64]           @ 4-byte Spill
-	adcs	r7, r10, r12
-	add	r10, r1, #32
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #132]          @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #140]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #152]          @ 4-byte Spill
-	ldr	r7, [r2, #88]
-	str	r7, [sp, #156]          @ 4-byte Spill
-	ldr	r7, [r2, #92]
-	str	r7, [sp, #160]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #128]          @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	ldr	r2, [r2, #16]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #96]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r1, #100]
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r1, #104]
-	str	r2, [sp, #112]          @ 4-byte Spill
-	ldr	r2, [r1, #108]
-	str	r2, [sp, #116]          @ 4-byte Spill
-	ldr	r2, [r1, #112]
-	str	r2, [sp, #120]          @ 4-byte Spill
-	ldr	r2, [r1, #116]
-	str	r2, [sp, #124]          @ 4-byte Spill
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r8, r9, r10}
-	ldr	r2, [r1, #56]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	add	r11, r3, #32
-	str	r7, [r0, #8]
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r7, r1
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	adcs	r1, r1, r12
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r2, r2, lr
-	str	r2, [r0, #28]
-	adcs	r1, r1, r4
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	str	r2, [r0, #36]
-	adcs	r1, r1, r6
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r2, r2, r8
-	str	r2, [r0, #44]
-	adcs	r1, r1, r9
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r2, r2, r10
-	adcs	r1, r1, r7
-	str	r2, [r0, #52]
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r1, [r0, #56]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	adcs	r12, r2, r7
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r12, [sp, #84]          @ 4-byte Spill
-	adcs	r9, r1, r2
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r9, [sp, #88]           @ 4-byte Spill
-	adcs	r6, r1, r2
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r6, [sp, #96]           @ 4-byte Spill
-	adcs	r7, r1, r2
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r7, [sp, #132]          @ 4-byte Spill
-	adcs	r4, r1, r2
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r4, [sp, #92]           @ 4-byte Spill
-	adcs	r5, r1, r2
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r5, [sp, #100]          @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #152]          @ 4-byte Spill
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #156]          @ 4-byte Spill
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	r1, [sp, #160]          @ 4-byte Spill
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	str	r1, [sp, #148]          @ 4-byte Spill
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	str	r1, [sp, #164]          @ 4-byte Spill
-	ldr	r1, [sp, #168]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r1, [sp, #168]          @ 4-byte Spill
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	str	r1, [sp, #172]          @ 4-byte Spill
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	str	r1, [sp, #176]          @ 4-byte Spill
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #180]          @ 4-byte Spill
-	mov	r1, #0
-	adc	r1, r1, #0
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldmib	r3, {r2, lr}
-	ldr	r1, [r3, #16]
-	ldr	r8, [r3, #12]
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	str	r1, [sp, #136]          @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	str	r1, [sp, #140]          @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	str	r1, [sp, #144]          @ 4-byte Spill
-	ldr	r1, [r3]
-	subs	r1, r12, r1
-	sbcs	r12, r9, r2
-	ldm	r11, {r9, r10, r11}
-	ldr	r2, [r3, #44]
-	sbcs	lr, r6, lr
-	sbcs	r6, r7, r8
-	ldr	r7, [sp, #144]          @ 4-byte Reload
-	str	r2, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r3, #48]
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r3, #52]
-	str	r2, [sp, #112]          @ 4-byte Spill
-	ldr	r2, [r3, #56]
-	str	r2, [sp, #120]          @ 4-byte Spill
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	sbcs	r3, r4, r2
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	sbcs	r4, r5, r2
-	ldr	r2, [sp, #152]          @ 4-byte Reload
-	ldr	r5, [sp, #140]          @ 4-byte Reload
-	sbcs	r5, r2, r5
-	ldr	r2, [sp, #156]          @ 4-byte Reload
-	sbcs	r8, r2, r7
-	ldr	r2, [sp, #160]          @ 4-byte Reload
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	sbcs	r9, r2, r9
-	ldr	r2, [sp, #148]          @ 4-byte Reload
-	sbcs	r10, r2, r10
-	ldr	r2, [sp, #164]          @ 4-byte Reload
-	sbcs	r2, r2, r11
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	str	r2, [sp, #116]          @ 4-byte Spill
-	ldr	r2, [sp, #168]          @ 4-byte Reload
-	sbcs	r2, r2, r7
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	str	r2, [sp, #124]          @ 4-byte Spill
-	ldr	r2, [sp, #172]          @ 4-byte Reload
-	sbcs	r2, r2, r7
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r2, [sp, #136]          @ 4-byte Spill
-	ldr	r2, [sp, #176]          @ 4-byte Reload
-	sbcs	r2, r2, r7
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	str	r2, [sp, #140]          @ 4-byte Spill
-	ldr	r2, [sp, #180]          @ 4-byte Reload
-	sbcs	r2, r2, r7
-	str	r2, [sp, #144]          @ 4-byte Spill
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	sbc	r2, r2, #0
-	ands	r2, r2, #1
-	movne	r1, r11
-	str	r1, [r0, #60]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	movne	r12, r1
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	str	r12, [r0, #64]
-	movne	lr, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	cmp	r2, #0
-	str	lr, [r0, #68]
-	movne	r6, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r6, [r0, #72]
-	movne	r3, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r3, [r0, #76]
-	ldr	r3, [sp, #116]          @ 4-byte Reload
-	movne	r4, r1
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	cmp	r2, #0
-	str	r4, [r0, #80]
-	movne	r5, r1
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	str	r5, [r0, #84]
-	movne	r8, r1
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	str	r8, [r0, #88]
-	movne	r9, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	cmp	r2, #0
-	str	r9, [r0, #92]
-	movne	r10, r1
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	str	r10, [r0, #96]
-	movne	r3, r1
-	ldr	r1, [sp, #168]          @ 4-byte Reload
-	str	r3, [r0, #100]
-	ldr	r3, [sp, #124]          @ 4-byte Reload
-	movne	r3, r1
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	cmp	r2, #0
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	str	r3, [r0, #104]
-	movne	r2, r1
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	str	r2, [r0, #108]
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	movne	r2, r1
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	str	r2, [r0, #112]
-	ldr	r2, [sp, #144]          @ 4-byte Reload
-	movne	r2, r1
-	str	r2, [r0, #116]
-	add	sp, sp, #184
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end236:
-	.size	mcl_fpDbl_add15L, .Lfunc_end236-mcl_fpDbl_add15L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub15L
-	.align	2
-	.type	mcl_fpDbl_sub15L,%function
-mcl_fpDbl_sub15L:                       @ @mcl_fpDbl_sub15L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#184
-	sub	sp, sp, #184
-	ldr	r7, [r2, #96]
-	ldr	r9, [r2]
-	add	r10, r1, #32
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [r2, #100]
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [r2, #104]
-	str	r7, [sp, #168]          @ 4-byte Spill
-	ldr	r7, [r2, #108]
-	str	r7, [sp, #172]          @ 4-byte Spill
-	ldr	r7, [r2, #112]
-	str	r7, [sp, #176]          @ 4-byte Spill
-	ldr	r7, [r2, #116]
-	str	r7, [sp, #180]          @ 4-byte Spill
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #132]          @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #148]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #152]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #156]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #160]          @ 4-byte Spill
-	ldr	r7, [r2, #88]
-	str	r7, [sp, #164]          @ 4-byte Spill
-	ldr	r7, [r2, #92]
-	str	r7, [sp, #140]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #128]          @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #124]          @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #120]          @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #116]          @ 4-byte Spill
-	ldmib	r2, {r8, lr}
-	ldr	r5, [r1]
-	ldr	r12, [r2, #12]
-	ldmib	r1, {r4, r6, r7}
-	subs	r5, r5, r9
-	sbcs	r4, r4, r8
-	str	r5, [sp, #36]           @ 4-byte Spill
-	ldr	r5, [r2, #48]
-	sbcs	r6, r6, lr
-	str	r4, [sp, #28]           @ 4-byte Spill
-	ldr	r4, [r2, #44]
-	add	lr, r1, #16
-	sbcs	r7, r7, r12
-	str	r6, [sp, #24]           @ 4-byte Spill
-	ldr	r6, [r2, #40]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r5, [sp, #88]           @ 4-byte Spill
-	str	r4, [sp, #84]           @ 4-byte Spill
-	str	r6, [sp, #80]           @ 4-byte Spill
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	ldr	r2, [r2, #16]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #96]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #92]           @ 4-byte Spill
-	ldr	r2, [r1, #100]
-	str	r2, [sp, #96]           @ 4-byte Spill
-	ldr	r2, [r1, #104]
-	str	r2, [sp, #100]          @ 4-byte Spill
-	ldr	r2, [r1, #108]
-	str	r2, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r1, #112]
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r1, #116]
-	str	r2, [sp, #112]          @ 4-byte Spill
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #76]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r8, r9, r10}
-	ldr	r2, [r1, #56]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #36]          @ 4-byte Reload
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	ldr	r11, [r3, #32]
-	str	r7, [r0, #8]
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	sbcs	r2, r2, r7
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	sbcs	r1, r12, r1
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, lr, r2
-	str	r2, [r0, #28]
-	sbcs	r1, r4, r1
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	sbcs	r2, r5, r2
-	str	r2, [r0, #36]
-	sbcs	r1, r6, r1
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	sbcs	r2, r8, r2
-	str	r2, [r0, #44]
-	sbcs	r1, r9, r1
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	sbcs	r2, r10, r2
-	sbcs	r1, r7, r1
-	str	r2, [r0, #52]
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r1, [r0, #56]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	sbcs	lr, r7, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	sbcs	r9, r2, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r9, [sp, #88]           @ 4-byte Spill
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp, #132]          @ 4-byte Spill
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	r1, [sp, #148]          @ 4-byte Spill
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #160]          @ 4-byte Reload
-	str	r1, [sp, #152]          @ 4-byte Spill
-	mov	r1, #0
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	str	r2, [sp, #156]          @ 4-byte Spill
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	str	r2, [sp, #160]          @ 4-byte Spill
-	ldr	r2, [sp, #164]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r2, [sp, #164]          @ 4-byte Spill
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	str	r2, [sp, #140]          @ 4-byte Spill
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	str	r2, [sp, #136]          @ 4-byte Spill
-	ldr	r2, [sp, #144]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	str	r2, [sp, #144]          @ 4-byte Spill
-	ldr	r2, [sp, #168]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	str	r2, [sp, #168]          @ 4-byte Spill
-	ldr	r2, [sp, #172]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	str	r2, [sp, #172]          @ 4-byte Spill
-	ldr	r2, [sp, #176]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r2, [sp, #176]          @ 4-byte Spill
-	ldr	r2, [sp, #180]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	sbc	r1, r1, #0
-	str	r2, [sp, #180]          @ 4-byte Spill
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [r3, #52]
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [r3, #56]
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldm	r3, {r2, r5, r6}
-	ldr	r4, [r3, #12]
-	ldr	r12, [r3, #16]
-	ldr	r8, [r3, #20]
-	ldr	r10, [r3, #28]
-	ldr	r7, [r3, #24]
-	ldr	r3, [sp, #152]          @ 4-byte Reload
-	adds	r1, lr, r2
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	adcs	r5, r9, r5
-	adcs	r6, r2, r6
-	ldr	r2, [sp, #148]          @ 4-byte Reload
-	adcs	r2, r2, r4
-	ldr	r4, [sp, #156]          @ 4-byte Reload
-	adcs	r3, r3, r12
-	adcs	r12, r4, r8
-	ldr	r4, [sp, #160]          @ 4-byte Reload
-	adcs	r8, r4, r7
-	ldr	r4, [sp, #164]          @ 4-byte Reload
-	ldr	r7, [sp, #140]          @ 4-byte Reload
-	adcs	r9, r4, r10
-	ldr	r4, [sp, #104]          @ 4-byte Reload
-	ldr	r10, [sp, #128]         @ 4-byte Reload
-	adcs	r11, r7, r11
-	ldr	r7, [sp, #136]          @ 4-byte Reload
-	adcs	r7, r7, r4
-	ldr	r4, [sp, #112]          @ 4-byte Reload
-	str	r7, [sp, #104]          @ 4-byte Spill
-	ldr	r7, [sp, #144]          @ 4-byte Reload
-	adcs	r7, r7, r4
-	ldr	r4, [sp, #116]          @ 4-byte Reload
-	str	r7, [sp, #112]          @ 4-byte Spill
-	ldr	r7, [sp, #168]          @ 4-byte Reload
-	adcs	r7, r7, r4
-	ldr	r4, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #116]          @ 4-byte Spill
-	ldr	r7, [sp, #172]          @ 4-byte Reload
-	adcs	r7, r7, r4
-	ldr	r4, [sp, #124]          @ 4-byte Reload
-	str	r7, [sp, #120]          @ 4-byte Spill
-	ldr	r7, [sp, #176]          @ 4-byte Reload
-	adcs	r7, r7, r4
-	str	r7, [sp, #124]          @ 4-byte Spill
-	ldr	r7, [sp, #180]          @ 4-byte Reload
-	adc	r7, r7, r10
-	str	r7, [sp, #128]          @ 4-byte Spill
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	ands	r7, r7, #1
-	moveq	r1, lr
-	str	r1, [r0, #60]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	moveq	r5, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r5, [r0, #64]
-	moveq	r6, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r6, [r0, #68]
-	moveq	r2, r1
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	str	r2, [r0, #72]
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	moveq	r3, r1
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	str	r3, [r0, #76]
-	moveq	r12, r1
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r12, [r0, #80]
-	moveq	r8, r1
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	str	r8, [r0, #84]
-	moveq	r9, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r9, [r0, #88]
-	moveq	r11, r1
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r11, [r0, #92]
-	moveq	r2, r1
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	str	r2, [r0, #96]
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #168]          @ 4-byte Reload
-	str	r2, [r0, #100]
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r2, [r0, #104]
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	str	r2, [r0, #108]
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	str	r2, [r0, #112]
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	moveq	r2, r1
-	str	r2, [r0, #116]
-	add	sp, sp, #184
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end237:
-	.size	mcl_fpDbl_sub15L, .Lfunc_end237-mcl_fpDbl_sub15L
-	.cantunwind
-	.fnend
-
-	.align	2
-	.type	.LmulPv512x32,%function
-.LmulPv512x32:                          @ @mulPv512x32
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r9, [r1, #12]
-	umull	r4, r8, lr, r2
-	umull	lr, r6, r12, r2
-	mov	r5, r4
-	mov	r7, r6
-	str	lr, [r0]
-	umull	lr, r12, r9, r2
-	umlal	r7, r5, r3, r2
-	str	r5, [r0, #8]
-	str	r7, [r0, #4]
-	umull	r5, r7, r3, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r3, r8, lr
-	str	r3, [r0, #12]
-	ldr	r3, [r1, #16]
-	umull	r7, r6, r3, r2
-	adcs	r3, r12, r7
-	str	r3, [r0, #16]
-	ldr	r3, [r1, #20]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #20]
-	ldr	r3, [r1, #24]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #24]
-	ldr	r3, [r1, #28]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #28]
-	ldr	r3, [r1, #32]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #32]
-	ldr	r3, [r1, #36]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #36]
-	ldr	r3, [r1, #40]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #40]
-	ldr	r3, [r1, #44]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #44]
-	ldr	r3, [r1, #48]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #48]
-	ldr	r3, [r1, #52]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #52]
-	ldr	r3, [r1, #56]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #56]
-	ldr	r1, [r1, #60]
-	umull	r3, r7, r1, r2
-	adcs	r1, r6, r3
-	str	r1, [r0, #60]
-	adc	r1, r7, #0
-	str	r1, [r0, #64]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end238:
-	.size	.LmulPv512x32, .Lfunc_end238-.LmulPv512x32
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mulUnitPre16L
-	.align	2
-	.type	mcl_fp_mulUnitPre16L,%function
-mcl_fp_mulUnitPre16L:                   @ @mcl_fp_mulUnitPre16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#92
-	sub	sp, sp, #92
-	mov	r4, r0
-	add	r0, sp, #16
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #76]
-	add	r11, sp, #40
-	add	lr, sp, #16
-	ldr	r10, [sp, #80]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #72]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #68]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #64]
-	str	r0, [sp]                @ 4-byte Spill
-	ldm	r11, {r5, r6, r7, r8, r9, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	stm	r4, {r0, r1, r2, r3, r12, lr}
-	add	r0, r4, #24
-	str	r10, [r4, #64]
-	stm	r0, {r5, r6, r7, r8, r9, r11}
-	ldr	r0, [sp]                @ 4-byte Reload
-	str	r0, [r4, #48]
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	str	r0, [r4, #52]
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	str	r0, [r4, #56]
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	str	r0, [r4, #60]
-	add	sp, sp, #92
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end239:
-	.size	mcl_fp_mulUnitPre16L, .Lfunc_end239-mcl_fp_mulUnitPre16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre16L
-	.align	2
-	.type	mcl_fpDbl_mulPre16L,%function
-mcl_fpDbl_mulPre16L:                    @ @mcl_fpDbl_mulPre16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#276
-	sub	sp, sp, #276
-	mov	r6, r2
-	mov	r5, r1
-	mov	r4, r0
-	bl	mcl_fpDbl_mulPre8L(PLT)
-	add	r0, r4, #64
-	add	r1, r5, #32
-	add	r2, r6, #32
-	bl	mcl_fpDbl_mulPre8L(PLT)
-	add	r11, r6, #32
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [r6, #44]
-	ldr	r8, [r6, #60]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [r6, #48]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [r6, #52]
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [r6, #56]
-	str	r0, [sp, #144]          @ 4-byte Spill
-	ldm	r6, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [r6, #24]
-	ldr	r6, [r6, #28]
-	adds	r0, r0, r9
-	str	r0, [sp, #136]          @ 4-byte Spill
-	adcs	r0, r1, r10
-	str	r0, [sp, #132]          @ 4-byte Spill
-	adcs	r0, r2, r11
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	add	lr, r5, #44
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	adcs	r0, r6, r8
-	str	r0, [sp, #108]          @ 4-byte Spill
-	mov	r0, #0
-	ldm	r5, {r8, r10, r11}
-	ldr	r7, [r5, #32]
-	ldr	r3, [r5, #36]
-	ldr	r2, [r5, #40]
-	adc	r6, r0, #0
-	ldr	r0, [r5, #12]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r5, #16]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r5, #20]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r5, #24]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r5, #28]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldm	lr, {r0, r1, r12, lr}
-	ldr	r9, [r5, #60]
-	adds	r5, r8, r7
-	adcs	r3, r10, r3
-	str	r5, [sp, #180]
-	str	r5, [sp, #144]          @ 4-byte Spill
-	adcs	r8, r11, r2
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	str	r3, [sp, #184]
-	str	r3, [sp, #140]          @ 4-byte Spill
-	str	r8, [sp, #188]
-	adcs	r11, r2, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	add	r2, sp, #148
-	str	r11, [sp, #192]
-	adcs	r5, r0, r1
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	add	r1, sp, #180
-	str	r5, [sp, #196]
-	adcs	r7, r0, r12
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	r7, [sp, #200]
-	adcs	r10, r0, lr
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	str	r10, [sp, #204]
-	adcs	r0, r0, r9
-	str	r0, [sp, #208]
-	mov	r9, r0
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	str	r0, [sp, #148]
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #152]
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #156]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #160]
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #164]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #168]
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #172]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #176]
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	add	r0, sp, #212
-	bl	mcl_fpDbl_mulPre8L(PLT)
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	cmp	r6, #0
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	ldr	r3, [sp, #124]          @ 4-byte Reload
-	moveq	r9, r6
-	moveq	r10, r6
-	moveq	r7, r6
-	moveq	r5, r6
-	moveq	r11, r6
-	cmp	r6, #0
-	moveq	r1, r6
-	moveq	r8, r6
-	moveq	r2, r6
-	str	r9, [sp, #104]          @ 4-byte Spill
-	str	r1, [sp, #144]          @ 4-byte Spill
-	str	r2, [sp, #140]          @ 4-byte Spill
-	str	r8, [sp, #96]           @ 4-byte Spill
-	adds	r12, r1, r0
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	adcs	lr, r2, r1
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	adcs	r2, r8, r2
-	ldr	r8, [sp, #104]          @ 4-byte Reload
-	adcs	r9, r11, r3
-	ldr	r3, [sp, #120]          @ 4-byte Reload
-	adcs	r1, r5, r3
-	ldr	r3, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r7, r3
-	ldr	r3, [sp, #112]          @ 4-byte Reload
-	adcs	r3, r10, r3
-	str	r3, [sp, #124]          @ 4-byte Spill
-	ldr	r3, [sp, #108]          @ 4-byte Reload
-	adcs	r3, r8, r3
-	ldr	r8, [sp, #124]          @ 4-byte Reload
-	str	r3, [sp, #128]          @ 4-byte Spill
-	mov	r3, #0
-	adc	r3, r3, #0
-	str	r3, [sp, #136]          @ 4-byte Spill
-	ldr	r3, [sp, #100]          @ 4-byte Reload
-	cmp	r3, #0
-	moveq	r0, r7
-	moveq	r1, r5
-	moveq	r9, r11
-	ldr	r5, [sp, #136]          @ 4-byte Reload
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	moveq	r2, r0
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	moveq	lr, r0
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	cmp	r3, #0
-	moveq	r5, r3
-	and	r3, r6, r3
-	ldr	r6, [sp, #244]
-	moveq	r8, r10
-	moveq	r12, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	moveq	r7, r0
-	adds	r0, r12, r6
-	add	r6, sp, #216
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #248]
-	adcs	r0, lr, r0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #252]
-	adcs	r10, r2, r0
-	ldr	r0, [sp, #256]
-	adcs	r0, r9, r0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #260]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #264]
-	adcs	r0, r1, r0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #268]
-	adcs	r0, r8, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #272]
-	adcs	r0, r7, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	adc	r0, r5, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldm	r4, {r1, r12, lr}
-	ldr	r5, [sp, #212]
-	ldr	r8, [r4, #12]
-	ldm	r6, {r2, r3, r6}
-	ldr	r0, [sp, #236]
-	ldr	r7, [sp, #240]
-	ldr	r9, [r4, #72]
-	subs	r1, r5, r1
-	ldr	r5, [sp, #228]
-	sbcs	r2, r2, r12
-	sbcs	r12, r3, lr
-	ldr	r3, [sp, #140]          @ 4-byte Reload
-	sbcs	r11, r6, r8
-	ldr	r6, [r4, #16]
-	ldr	r8, [r4, #68]
-	sbcs	lr, r5, r6
-	ldr	r5, [r4, #20]
-	ldr	r6, [sp, #232]
-	sbcs	r5, r6, r5
-	ldr	r6, [r4, #24]
-	sbcs	r6, r0, r6
-	ldr	r0, [r4, #28]
-	sbcs	r0, r7, r0
-	ldr	r7, [r4, #32]
-	sbcs	r3, r3, r7
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [r4, #36]
-	str	r3, [sp, #84]           @ 4-byte Spill
-	ldr	r3, [sp, #136]          @ 4-byte Reload
-	str	r7, [sp, #140]          @ 4-byte Spill
-	sbcs	r3, r3, r7
-	ldr	r7, [r4, #40]
-	str	r3, [sp, #76]           @ 4-byte Spill
-	sbcs	r3, r10, r7
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [r4, #44]
-	ldr	r10, [r4, #76]
-	str	r3, [sp, #72]           @ 4-byte Spill
-	ldr	r3, [sp, #128]          @ 4-byte Reload
-	str	r7, [sp, #132]          @ 4-byte Spill
-	sbcs	r3, r3, r7
-	ldr	r7, [r4, #48]
-	str	r3, [sp, #68]           @ 4-byte Spill
-	ldr	r3, [sp, #124]          @ 4-byte Reload
-	str	r7, [sp, #128]          @ 4-byte Spill
-	sbcs	r3, r3, r7
-	ldr	r7, [r4, #52]
-	str	r3, [sp, #64]           @ 4-byte Spill
-	ldr	r3, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #124]          @ 4-byte Spill
-	sbcs	r3, r3, r7
-	ldr	r7, [r4, #56]
-	str	r3, [sp, #60]           @ 4-byte Spill
-	ldr	r3, [sp, #116]          @ 4-byte Reload
-	str	r7, [sp, #120]          @ 4-byte Spill
-	sbcs	r3, r3, r7
-	ldr	r7, [r4, #60]
-	str	r3, [sp, #56]           @ 4-byte Spill
-	ldr	r3, [sp, #112]          @ 4-byte Reload
-	str	r7, [sp, #116]          @ 4-byte Spill
-	sbcs	r3, r3, r7
-	str	r3, [sp, #52]           @ 4-byte Spill
-	ldr	r3, [sp, #108]          @ 4-byte Reload
-	sbc	r3, r3, #0
-	str	r3, [sp, #48]           @ 4-byte Spill
-	ldr	r3, [r4, #64]
-	subs	r1, r1, r3
-	str	r3, [sp, #80]           @ 4-byte Spill
-	str	r1, [sp, #44]           @ 4-byte Spill
-	sbcs	r1, r2, r8
-	str	r1, [sp, #40]           @ 4-byte Spill
-	sbcs	r1, r12, r9
-	add	r12, r4, #104
-	str	r1, [sp, #36]           @ 4-byte Spill
-	sbcs	r1, r11, r10
-	ldr	r11, [r4, #80]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	sbcs	r1, lr, r11
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [r4, #84]
-	str	r1, [sp, #112]          @ 4-byte Spill
-	sbcs	r1, r5, r1
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [r4, #88]
-	str	r1, [sp, #108]          @ 4-byte Spill
-	sbcs	r1, r6, r1
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [r4, #92]
-	sbcs	r0, r0, r1
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [r4, #100]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r4, #96]
-	str	r1, [sp, #96]           @ 4-byte Spill
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldm	r12, {r2, r3, r12}
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	ldr	lr, [r4, #116]
-	ldr	r5, [r4, #120]
-	ldr	r6, [r4, #124]
-	sbcs	r0, r7, r0
-	str	r12, [sp, #92]          @ 4-byte Spill
-	str	r6, [sp, #88]           @ 4-byte Spill
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	sbcs	r0, r0, r2
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	sbcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r0, r12
-	mov	r12, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	sbcs	r0, r0, lr
-	mov	lr, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	sbcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	sbcs	r7, r0, r6
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	sbc	r5, r0, #0
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	adds	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [r4, #32]
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	str	r1, [r4, #36]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	str	r0, [r4, #40]
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	str	r1, [r4, #44]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	str	r0, [r4, #48]
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	str	r1, [r4, #52]
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	str	r0, [r4, #56]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	str	r1, [r4, #60]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	str	r0, [r4, #64]
-	adcs	r1, r8, r1
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	str	r1, [r4, #68]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [r4, #72]
-	adcs	r1, r10, r1
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r1, [r4, #76]
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	adcs	r1, r1, r6
-	str	r0, [r4, #80]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	str	r1, [r4, #84]
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [r4, #88]
-	adcs	r1, r1, r7
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	r1, [r4, #92]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r8, r0, r5
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, #0
-	str	r8, [r4, #96]
-	adcs	r2, r2, #0
-	adcs	r3, r3, #0
-	adcs	r7, r5, #0
-	adcs	r6, r12, #0
-	adcs	r5, lr, #0
-	adc	r12, r0, #0
-	add	r0, r4, #100
-	stm	r0, {r1, r2, r3, r7}
-	str	r6, [r4, #116]
-	str	r5, [r4, #120]
-	str	r12, [r4, #124]
-	add	sp, sp, #276
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end240:
-	.size	mcl_fpDbl_mulPre16L, .Lfunc_end240-mcl_fpDbl_mulPre16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sqrPre16L
-	.align	2
-	.type	mcl_fpDbl_sqrPre16L,%function
-mcl_fpDbl_sqrPre16L:                    @ @mcl_fpDbl_sqrPre16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#276
-	sub	sp, sp, #276
-	mov	r5, r1
-	mov	r4, r0
-	mov	r2, r5
-	bl	mcl_fpDbl_mulPre8L(PLT)
-	add	r1, r5, #32
-	add	r0, r4, #64
-	mov	r2, r1
-	bl	mcl_fpDbl_mulPre8L(PLT)
-	ldm	r5, {r8, r9, r10}
-	ldr	r0, [r5, #12]
-	ldr	r6, [r5, #32]
-	ldr	r7, [r5, #36]
-	ldr	r3, [r5, #40]
-	add	lr, r5, #44
-	ldr	r11, [r5, #16]
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [r5, #20]
-	adds	r6, r8, r6
-	adcs	r7, r9, r7
-	adcs	r3, r10, r3
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [r5, #24]
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [r5, #28]
-	str	r0, [sp, #144]          @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r12, lr}
-	ldr	r5, [sp, #136]          @ 4-byte Reload
-	str	r6, [sp, #180]
-	str	r7, [sp, #184]
-	str	r6, [sp, #148]
-	str	r3, [sp, #128]          @ 4-byte Spill
-	str	r3, [sp, #188]
-	str	r7, [sp, #152]
-	adcs	r10, r5, r0
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r11, r11, r1
-	str	r10, [sp, #192]
-	add	r1, sp, #180
-	str	r11, [sp, #196]
-	adcs	r8, r0, r2
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	add	r2, sp, #148
-	str	r8, [sp, #200]
-	adcs	r9, r0, r12
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	str	r9, [sp, #204]
-	adcs	r5, r0, lr
-	add	r0, sp, #156
-	str	r5, [sp, #208]
-	stm	r0, {r3, r10, r11}
-	mov	r0, #0
-	str	r8, [sp, #168]
-	str	r9, [sp, #172]
-	str	r5, [sp, #176]
-	adc	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	add	r0, sp, #212
-	bl	mcl_fpDbl_mulPre8L(PLT)
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adds	r2, r6, r6
-	ldr	r1, [sp, #244]
-	ldr	r6, [sp, #248]
-	ldr	lr, [sp, #264]
-	ldr	r12, [sp, #268]
-	adcs	r3, r7, r7
-	adcs	r7, r0, r0
-	str	r1, [sp, #128]          @ 4-byte Spill
-	str	r6, [sp, #116]          @ 4-byte Spill
-	str	r12, [sp, #108]         @ 4-byte Spill
-	adcs	r10, r10, r10
-	adcs	r0, r11, r11
-	ldr	r11, [sp, #252]
-	str	r0, [sp, #144]          @ 4-byte Spill
-	adcs	r0, r8, r8
-	ldr	r8, [sp, #260]
-	str	r0, [sp, #140]          @ 4-byte Spill
-	adcs	r0, r9, r9
-	ldr	r9, [sp, #256]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	adc	r0, r5, r5
-	adds	r2, r1, r2
-	adcs	r1, r6, r3
-	str	r2, [sp, #132]          @ 4-byte Spill
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #272]
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	adcs	r7, r11, r7
-	adcs	r3, r9, r10
-	adcs	r2, r8, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	adcs	r1, lr, r1
-	adcs	r10, r12, r6
-	ldr	r6, [sp, #112]          @ 4-byte Reload
-	adcs	r12, r0, r6
-	mov	r6, r0
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adc	r5, r0, r5, lsr #31
-	cmp	r0, #0
-	moveq	r1, lr
-	moveq	r2, r8
-	moveq	r3, r9
-	moveq	r7, r11
-	str	r1, [sp, #144]          @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r2, [sp, #140]          @ 4-byte Spill
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	str	r3, [sp, #120]          @ 4-byte Spill
-	add	r3, sp, #216
-	moveq	r10, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	cmp	r0, #0
-	moveq	r12, r6
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	moveq	r5, r0
-	str	r12, [sp, #112]         @ 4-byte Spill
-	moveq	r6, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldm	r4, {r12, lr}
-	ldr	r9, [sp, #212]
-	ldr	r11, [r4, #8]
-	ldr	r8, [r4, #12]
-	moveq	r1, r2
-	ldm	r3, {r0, r2, r3}
-	subs	r12, r9, r12
-	sbcs	r9, r0, lr
-	ldr	r0, [r4, #16]
-	sbcs	r11, r2, r11
-	ldr	r2, [sp, #228]
-	sbcs	lr, r3, r8
-	ldr	r8, [r4, #68]
-	sbcs	r0, r2, r0
-	ldr	r2, [sp, #232]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [r4, #20]
-	sbcs	r0, r2, r0
-	ldr	r2, [sp, #236]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [r4, #24]
-	sbcs	r0, r2, r0
-	ldr	r2, [sp, #240]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r4, #28]
-	sbcs	r3, r2, r0
-	ldr	r0, [r4, #32]
-	str	r0, [sp, #136]          @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r4, #36]
-	str	r0, [sp, #132]          @ 4-byte Spill
-	sbcs	r0, r6, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r4, #40]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	sbcs	r0, r7, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r4, #44]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r4, #48]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [r4, #52]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	str	r1, [sp, #140]          @ 4-byte Spill
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r4, #56]
-	str	r0, [sp, #144]          @ 4-byte Spill
-	sbcs	r0, r10, r0
-	ldr	r10, [r4, #76]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r4, #60]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	sbc	r0, r5, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r4, #64]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	subs	r0, r12, r0
-	add	r12, r4, #104
-	str	r0, [sp, #44]           @ 4-byte Spill
-	sbcs	r0, r9, r8
-	ldr	r9, [r4, #72]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	sbcs	r0, r11, r9
-	ldr	r11, [r4, #80]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	sbcs	r0, lr, r10
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	sbcs	r0, r0, r11
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r4, #84]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r4, #88]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	sbcs	r0, r1, r0
-	ldr	r1, [r4, #100]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r4, #92]
-	str	r1, [sp, #96]           @ 4-byte Spill
-	str	r0, [sp, #104]          @ 4-byte Spill
-	sbcs	r0, r3, r0
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r4, #96]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldm	r12, {r2, r3, r12}
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	ldr	lr, [r4, #116]
-	ldr	r5, [r4, #120]
-	ldr	r6, [r4, #124]
-	sbcs	r0, r7, r0
-	str	r12, [sp, #92]          @ 4-byte Spill
-	str	r6, [sp, #88]           @ 4-byte Spill
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	sbcs	r0, r0, r2
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	sbcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r0, r12
-	mov	r12, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	sbcs	r0, r0, lr
-	mov	lr, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	sbcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	sbcs	r7, r0, r6
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	sbc	r5, r0, #0
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adds	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [r4, #32]
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	str	r1, [r4, #36]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	str	r0, [r4, #40]
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	str	r1, [r4, #44]
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	str	r0, [r4, #48]
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	str	r1, [r4, #52]
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	str	r0, [r4, #56]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	str	r1, [r4, #60]
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	str	r0, [r4, #64]
-	adcs	r1, r8, r1
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	str	r1, [r4, #68]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [r4, #72]
-	adcs	r1, r10, r1
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r1, [r4, #76]
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	adcs	r1, r1, r6
-	str	r0, [r4, #80]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	str	r1, [r4, #84]
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [r4, #88]
-	adcs	r1, r1, r7
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	str	r1, [r4, #92]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r8, r0, r5
-	ldr	r5, [sp, #92]           @ 4-byte Reload
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, #0
-	str	r8, [r4, #96]
-	adcs	r2, r2, #0
-	adcs	r3, r3, #0
-	adcs	r7, r5, #0
-	adcs	r6, r12, #0
-	adcs	r5, lr, #0
-	adc	r12, r0, #0
-	add	r0, r4, #100
-	stm	r0, {r1, r2, r3, r7}
-	str	r6, [r4, #116]
-	str	r5, [r4, #120]
-	str	r12, [r4, #124]
-	add	sp, sp, #276
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end241:
-	.size	mcl_fpDbl_sqrPre16L, .Lfunc_end241-mcl_fpDbl_sqrPre16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont16L
-	.align	2
-	.type	mcl_fp_mont16L,%function
-mcl_fp_mont16L:                         @ @mcl_fp_mont16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#404
-	sub	sp, sp, #404
-	.pad	#2048
-	sub	sp, sp, #2048
-	add	r12, sp, #132
-	add	r6, sp, #2048
-	mov	r4, r3
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #92]           @ 4-byte Spill
-	add	r0, r6, #328
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	str	r5, [sp, #128]          @ 4-byte Spill
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #2376]
-	ldr	r1, [sp, #2380]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	mul	r2, r0, r5
-	ldr	r0, [sp, #2440]
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #2384]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #2436]
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #2388]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #2432]
-	str	r1, [sp, #88]           @ 4-byte Spill
-	mov	r1, r4
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #2428]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #2424]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #2420]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #2416]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #2412]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #2408]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #2404]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #2400]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #2396]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2392]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #2304
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #2368]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r7, [sp, #2332]
-	ldr	r4, [sp, #2328]
-	ldr	r8, [sp, #2324]
-	ldr	r11, [sp, #2320]
-	ldr	r9, [sp, #2304]
-	ldr	r10, [sp, #2308]
-	ldr	r6, [sp, #2312]
-	ldr	r5, [sp, #2316]
-	add	lr, sp, #2048
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #2364]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2360]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2356]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2352]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2348]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2344]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2340]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2336]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, lr, #184
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	ldr	r3, [sp, #2248]
-	ldr	r12, [sp, #2252]
-	ldr	lr, [sp, #2256]
-	adds	r0, r9, r0
-	ldr	r9, [sp, #2272]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	ldr	r10, [sp, #2276]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r6, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #2264]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #2268]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #2260]
-	adcs	r1, r7, r1
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	ldr	r7, [sp, #2232]
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #2244]
-	adc	r0, r0, #0
-	adds	r7, r11, r7
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #2240]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2296]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2292]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2288]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2284]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #2280]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #2236]
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #2160
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #2224]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r5, [sp, #2188]
-	ldr	r6, [sp, #2184]
-	ldr	r8, [sp, #2180]
-	ldr	r9, [sp, #2176]
-	ldr	r10, [sp, #2160]
-	ldr	r11, [sp, #2164]
-	ldr	r4, [sp, #2168]
-	ldr	r7, [sp, #2172]
-	add	lr, sp, #2048
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2220]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2216]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2212]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2208]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2204]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2200]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2196]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2192]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, lr, #40
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #2100]
-	ldr	r3, [sp, #2104]
-	ldr	r12, [sp, #2108]
-	ldr	lr, [sp, #2112]
-	adds	r0, r0, r10
-	ldr	r10, [sp, #2132]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #2116]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #2088]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #2128]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #2124]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #2120]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #2096]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r7, r11, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2152]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2148]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2144]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2140]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2136]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2092]
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #2016
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #2080]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r5, [sp, #2044]
-	ldr	r6, [sp, #2040]
-	ldr	r8, [sp, #2036]
-	ldr	r9, [sp, #2032]
-	ldr	r10, [sp, #2016]
-	ldr	r11, [sp, #2020]
-	ldr	r4, [sp, #2024]
-	ldr	r7, [sp, #2028]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2076]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2072]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2068]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2064]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2060]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2056]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2052]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2048]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, lr, #920
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1956]
-	ldr	r3, [sp, #1960]
-	ldr	r12, [sp, #1964]
-	ldr	lr, [sp, #1968]
-	adds	r0, r0, r10
-	ldr	r10, [sp, #1988]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1972]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1944]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1984]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1980]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1976]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1952]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r7, r11, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2008]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2004]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2000]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1996]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1992]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1948]
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1872
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1936]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r5, [sp, #1900]
-	ldr	r6, [sp, #1896]
-	ldr	r8, [sp, #1892]
-	ldr	r9, [sp, #1888]
-	ldr	r10, [sp, #1872]
-	ldr	r11, [sp, #1876]
-	ldr	r4, [sp, #1880]
-	ldr	r7, [sp, #1884]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1932]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1928]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1924]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1920]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1916]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1912]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1908]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1904]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, lr, #776
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1812]
-	ldr	r3, [sp, #1816]
-	ldr	r12, [sp, #1820]
-	ldr	lr, [sp, #1824]
-	adds	r0, r0, r10
-	ldr	r10, [sp, #1844]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1828]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1800]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1840]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1836]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1832]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1808]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r7, r11, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1864]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1860]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1856]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1852]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1848]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1804]
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1728
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1792]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r5, [sp, #1756]
-	ldr	r6, [sp, #1752]
-	ldr	r8, [sp, #1748]
-	ldr	r9, [sp, #1744]
-	ldr	r10, [sp, #1728]
-	ldr	r11, [sp, #1732]
-	ldr	r4, [sp, #1736]
-	ldr	r7, [sp, #1740]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1788]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1784]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1780]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1776]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1772]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1768]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1764]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1760]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, lr, #632
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1668]
-	ldr	r3, [sp, #1672]
-	ldr	r12, [sp, #1676]
-	ldr	lr, [sp, #1680]
-	adds	r0, r0, r10
-	ldr	r10, [sp, #1700]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1684]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1656]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1696]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1692]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1688]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1664]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r7, r11, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1720]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1716]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1712]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1708]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1704]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1660]
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1584
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1648]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r5, [sp, #1612]
-	ldr	r6, [sp, #1608]
-	ldr	r8, [sp, #1604]
-	ldr	r9, [sp, #1600]
-	ldr	r10, [sp, #1584]
-	ldr	r11, [sp, #1588]
-	ldr	r4, [sp, #1592]
-	ldr	r7, [sp, #1596]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1644]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1640]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1636]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1632]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1628]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1624]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1620]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1616]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, lr, #488
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1524]
-	ldr	r3, [sp, #1528]
-	ldr	r12, [sp, #1532]
-	ldr	lr, [sp, #1536]
-	adds	r0, r0, r10
-	ldr	r10, [sp, #1556]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1540]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1512]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1552]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1548]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1544]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1520]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r7, r11, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1576]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1572]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1568]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1564]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1560]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1516]
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1440
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1504]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r5, [sp, #1468]
-	ldr	r6, [sp, #1464]
-	ldr	r8, [sp, #1460]
-	ldr	r9, [sp, #1456]
-	ldr	r10, [sp, #1440]
-	ldr	r11, [sp, #1444]
-	ldr	r4, [sp, #1448]
-	ldr	r7, [sp, #1452]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1500]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1496]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1492]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1488]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1484]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1480]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1476]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1472]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, lr, #344
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1380]
-	ldr	r3, [sp, #1384]
-	ldr	r12, [sp, #1388]
-	ldr	lr, [sp, #1392]
-	adds	r0, r0, r10
-	ldr	r10, [sp, #1412]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1396]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1368]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1408]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1404]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1400]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1376]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r7, r11, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1432]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1428]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1424]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1420]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1416]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1372]
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1296
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1360]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r5, [sp, #1324]
-	ldr	r6, [sp, #1320]
-	ldr	r8, [sp, #1316]
-	ldr	r9, [sp, #1312]
-	ldr	r10, [sp, #1296]
-	ldr	r11, [sp, #1300]
-	ldr	r4, [sp, #1304]
-	ldr	r7, [sp, #1308]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1356]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1352]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1348]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1344]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1336]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1332]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1328]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, lr, #200
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1236]
-	ldr	r3, [sp, #1240]
-	ldr	r12, [sp, #1244]
-	ldr	lr, [sp, #1248]
-	adds	r0, r0, r10
-	ldr	r10, [sp, #1268]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1252]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1224]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1264]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1260]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1256]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1232]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r7, r11, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1288]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1284]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1280]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1276]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1272]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1228]
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1152
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1216]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r5, [sp, #1180]
-	ldr	r6, [sp, #1176]
-	ldr	r8, [sp, #1172]
-	ldr	r9, [sp, #1168]
-	ldr	r10, [sp, #1152]
-	ldr	r11, [sp, #1156]
-	ldr	r4, [sp, #1160]
-	ldr	r7, [sp, #1164]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1212]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1208]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1204]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1200]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1196]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1192]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1188]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1184]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, lr, #56
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1092]
-	ldr	r3, [sp, #1096]
-	ldr	r12, [sp, #1100]
-	ldr	lr, [sp, #1104]
-	adds	r0, r0, r10
-	ldr	r10, [sp, #1124]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1108]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #1080]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1120]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1116]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1112]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1088]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r7, r11, r7
-	ldr	r11, [sp, #128]         @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1144]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1140]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1136]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1132]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1128]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1084]
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r7, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #1008
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1072]
-	add	r10, sp, #1008
-	ldr	r4, [sp, #1032]
-	ldr	r5, [sp, #1028]
-	ldr	r6, [sp, #1024]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1068]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1064]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1060]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, sp, #936
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #952
-	adds	r0, r0, r7
-	ldr	r7, [sp, #948]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r2, r0, r8
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #976
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #940]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #944]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #936]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	adds	r0, r2, r4
-	mul	r1, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #1000]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #996]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #992]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #120]         @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #120]          @ 4-byte Spill
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	adcs	r5, r6, r5
-	str	r5, [sp, #116]          @ 4-byte Spill
-	ldr	r5, [sp, #112]          @ 4-byte Reload
-	adcs	r5, r5, r7
-	str	r5, [sp, #112]          @ 4-byte Spill
-	ldr	r5, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #864
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #928]
-	add	r10, sp, #864
-	ldr	r11, [sp, #892]
-	ldr	r4, [sp, #888]
-	ldr	r5, [sp, #884]
-	ldr	r6, [sp, #880]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #924]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #920]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #916]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #912]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #908]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #904]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #900]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #896]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #792
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #808
-	adds	r0, r0, r7
-	ldr	r7, [sp, #804]
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r2, r0, r8
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #832
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #796]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #800]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #792]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r1, r2, r4
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	str	r1, [sp, #124]          @ 4-byte Spill
-	mul	r2, r1, r0
-	ldr	r0, [sp, #856]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #852]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #848]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #120]         @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #72]           @ 4-byte Spill
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	adcs	r5, r6, r5
-	str	r5, [sp, #68]           @ 4-byte Spill
-	ldr	r5, [sp, #112]          @ 4-byte Reload
-	adcs	r5, r5, r7
-	str	r5, [sp, #64]           @ 4-byte Spill
-	ldr	r5, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	add	r0, sp, #720
-	bl	.LmulPv512x32(PLT)
-	ldr	r1, [sp, #784]
-	add	r10, sp, #720
-	ldr	r5, [sp, #748]
-	ldr	r6, [sp, #744]
-	ldr	r7, [sp, #740]
-	ldr	r11, [sp, #736]
-	add	r0, sp, #648
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #780]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #776]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #772]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #768]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #764]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #760]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #756]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #752]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r4, [sp, #732]
-	ldr	r2, [r1, #48]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #652
-	adds	r0, r0, r8
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #676
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #712]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #648]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #576
-	bl	.LmulPv512x32(PLT)
-	ldr	r1, [sp, #640]
-	add	r11, sp, #584
-	ldr	r6, [sp, #604]
-	ldr	r5, [sp, #600]
-	ldr	r8, [sp, #596]
-	ldr	r9, [sp, #576]
-	ldr	r10, [sp, #580]
-	add	r0, sp, #504
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #636]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #632]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #628]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #624]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #620]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #616]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #612]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #608]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r4, r7, r11}
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r1, #52]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #508
-	adds	r0, r0, r9
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #532
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #568]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #564]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #560]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #556]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #552]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #504]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #76]          @ 4-byte Reload
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #432
-	bl	.LmulPv512x32(PLT)
-	ldr	r1, [sp, #496]
-	add	r11, sp, #440
-	ldr	r6, [sp, #460]
-	ldr	r5, [sp, #456]
-	ldr	r8, [sp, #452]
-	ldr	r9, [sp, #432]
-	ldr	r10, [sp, #436]
-	add	r0, sp, #360
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #492]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #488]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #484]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #480]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #476]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #472]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #468]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #464]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r4, r7, r11}
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r1, #56]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #364
-	adds	r0, r0, r9
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #388
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #424]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #420]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #408]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #360]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #76]          @ 4-byte Reload
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #288
-	bl	.LmulPv512x32(PLT)
-	ldr	r1, [sp, #352]
-	add	r11, sp, #296
-	ldr	r7, [sp, #316]
-	ldr	r9, [sp, #288]
-	ldr	r5, [sp, #292]
-	add	r0, sp, #216
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #348]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #344]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #340]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #336]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #332]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #328]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #324]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #320]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r4, r6, r8, r10, r11}
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r1, #60]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #232
-	adds	r0, r0, r9
-	add	r9, sp, #216
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	adcs	r1, r1, r4
-	str	r1, [sp, #136]          @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #132]          @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r8
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #256
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	adcs	r1, r1, r7
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldm	r9, {r4, r7, r9}
-	ldr	r5, [sp, #228]
-	adds	r8, r0, r4
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r1, r8, r0
-	ldr	r0, [sp, #280]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #276]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #272]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #268]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r6, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #136]         @ 4-byte Reload
-	adcs	r11, r11, r7
-	ldr	r7, [sp, #132]          @ 4-byte Reload
-	adcs	r9, r7, r9
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adcs	r5, r7, r5
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r10, r0, r1
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	add	r0, sp, #144
-	bl	.LmulPv512x32(PLT)
-	add	r3, sp, #144
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r8, r0
-	adcs	r7, r11, r1
-	ldr	r0, [sp, #160]
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r8, r9, r2
-	str	r7, [sp, #56]           @ 4-byte Spill
-	adcs	r5, r5, r3
-	mov	r3, r6
-	str	r8, [sp, #64]           @ 4-byte Spill
-	str	r5, [sp, #72]           @ 4-byte Spill
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #164]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r4, [sp, #76]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #168]
-	adcs	lr, r1, r0
-	ldr	r0, [sp, #172]
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	lr, [sp, #52]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #176]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #180]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #184]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #192]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #196]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #200]
-	adcs	r0, r10, r0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #204]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #208]
-	adcs	r0, r1, r0
-	ldr	r1, [r3]
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adc	r0, r0, #0
-	subs	r12, r7, r1
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldmib	r3, {r0, r2, r6}
-	ldr	r1, [r3, #32]
-	ldr	r11, [r3, #40]
-	ldr	r9, [r3, #28]
-	sbcs	r7, r8, r0
-	ldr	r0, [r3, #36]
-	sbcs	r5, r5, r2
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	sbcs	r10, r4, r6
-	ldr	r6, [r3, #20]
-	ldr	r4, [r3, #24]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r3, #56]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r3, #60]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r3, #16]
-	sbcs	r2, r2, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	sbcs	r3, lr, r6
-	ldr	r6, [sp, #64]           @ 4-byte Reload
-	sbcs	lr, r0, r4
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	sbcs	r4, r0, r9
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	sbcs	r8, r0, r1
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	sbcs	r9, r0, r1
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	sbcs	r11, r0, r11
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	ands	r1, r0, #1
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	movne	r7, r6
-	movne	r12, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r12, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	movne	r5, r7
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r5, [r0, #8]
-	movne	r10, r7
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	str	r10, [r0, #12]
-	movne	r2, r7
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	movne	lr, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	lr, [r0, #24]
-	movne	r4, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	str	r4, [r0, #28]
-	movne	r8, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r8, [r0, #32]
-	movne	r9, r2
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r9, [r0, #36]
-	movne	r11, r2
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	str	r11, [r0, #40]
-	movne	r3, r2
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #44]
-	ldr	r3, [sp, #80]           @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	str	r3, [r0, #48]
-	ldr	r3, [sp, #84]           @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	str	r3, [r0, #52]
-	ldr	r3, [sp, #88]           @ 4-byte Reload
-	movne	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	str	r3, [r0, #56]
-	movne	r2, r1
-	str	r2, [r0, #60]
-	add	sp, sp, #404
-	add	sp, sp, #2048
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end242:
-	.size	mcl_fp_mont16L, .Lfunc_end242-mcl_fp_mont16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF16L
-	.align	2
-	.type	mcl_fp_montNF16L,%function
-mcl_fp_montNF16L:                       @ @mcl_fp_montNF16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#396
-	sub	sp, sp, #396
-	.pad	#2048
-	sub	sp, sp, #2048
-	add	r12, sp, #124
-	mov	r4, r3
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #92]           @ 4-byte Spill
-	add	r0, sp, #2368
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	str	r5, [sp, #120]          @ 4-byte Spill
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #2368]
-	ldr	r1, [sp, #2372]
-	add	r9, sp, #2048
-	str	r0, [sp, #68]           @ 4-byte Spill
-	mul	r2, r0, r5
-	ldr	r0, [sp, #2432]
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #2376]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #2428]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #2380]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #2424]
-	str	r1, [sp, #80]           @ 4-byte Spill
-	mov	r1, r4
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #2420]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #2416]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #2412]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #2408]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #2404]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #2400]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #2396]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #2392]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2388]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2384]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, r9, #248
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #2360]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r5, [sp, #2324]
-	ldr	r6, [sp, #2320]
-	ldr	r7, [sp, #2316]
-	ldr	r8, [sp, #2312]
-	ldr	r10, [sp, #2296]
-	ldr	r11, [sp, #2300]
-	ldr	r4, [sp, #2304]
-	ldr	r9, [sp, #2308]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2356]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2352]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2348]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2344]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2340]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2336]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2332]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2328]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, sp, #2224
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #2236]
-	ldr	r3, [sp, #2240]
-	ldr	r12, [sp, #2244]
-	ldr	lr, [sp, #2248]
-	adds	r0, r10, r0
-	ldr	r10, [sp, #2268]
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r11, [sp, #88]          @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #2252]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	ldr	r9, [sp, #2264]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #2260]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #2224]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #2256]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adc	r0, r1, r0
-	adds	r6, r11, r6
-	ldr	r1, [sp, #2232]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2288]
-	str	r6, [sp, #20]           @ 4-byte Spill
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2284]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2280]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #2276]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #2272]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #2228]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	add	r4, sp, #2048
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r4, #104
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #2216]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #2180]
-	ldr	r7, [sp, #2176]
-	ldr	r5, [sp, #2172]
-	ldr	r8, [sp, #2168]
-	ldr	r9, [sp, #2152]
-	ldr	r10, [sp, #2156]
-	ldr	r11, [sp, #2160]
-	ldr	r4, [sp, #2164]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2212]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2208]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2204]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2200]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2196]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2192]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2188]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2184]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, sp, #2080
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #2092]
-	ldr	r3, [sp, #2096]
-	ldr	r12, [sp, #2100]
-	ldr	lr, [sp, #2104]
-	adds	r0, r0, r9
-	ldr	r9, [sp, #2120]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #2124]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #2108]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #2116]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #2112]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #2080]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #2088]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2144]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2140]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2136]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2132]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2128]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2084]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r4, #984
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #2072]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #2036]
-	ldr	r7, [sp, #2032]
-	ldr	r5, [sp, #2028]
-	ldr	r8, [sp, #2024]
-	ldr	r9, [sp, #2008]
-	ldr	r10, [sp, #2012]
-	ldr	r11, [sp, #2016]
-	ldr	r4, [sp, #2020]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2068]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2064]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2060]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2056]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2052]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2048]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2044]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2040]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, sp, #1936
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1948]
-	ldr	r3, [sp, #1952]
-	ldr	r12, [sp, #1956]
-	ldr	lr, [sp, #1960]
-	adds	r0, r0, r9
-	ldr	r9, [sp, #1976]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1980]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1964]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1972]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1968]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1936]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1944]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2000]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1996]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1992]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1988]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1984]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1940]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r4, #840
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1928]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1892]
-	ldr	r7, [sp, #1888]
-	ldr	r5, [sp, #1884]
-	ldr	r8, [sp, #1880]
-	ldr	r9, [sp, #1864]
-	ldr	r10, [sp, #1868]
-	ldr	r11, [sp, #1872]
-	ldr	r4, [sp, #1876]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1924]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1920]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1916]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1912]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1908]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1904]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1900]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1896]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, sp, #1792
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1804]
-	ldr	r3, [sp, #1808]
-	ldr	r12, [sp, #1812]
-	ldr	lr, [sp, #1816]
-	adds	r0, r0, r9
-	ldr	r9, [sp, #1832]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1836]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1820]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1828]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1824]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1792]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1800]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1856]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1852]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1848]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1844]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1840]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1796]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r4, #696
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1784]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1748]
-	ldr	r7, [sp, #1744]
-	ldr	r5, [sp, #1740]
-	ldr	r8, [sp, #1736]
-	ldr	r9, [sp, #1720]
-	ldr	r10, [sp, #1724]
-	ldr	r11, [sp, #1728]
-	ldr	r4, [sp, #1732]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1780]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1776]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1772]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1768]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1764]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1760]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1756]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1752]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, sp, #1648
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1660]
-	ldr	r3, [sp, #1664]
-	ldr	r12, [sp, #1668]
-	ldr	lr, [sp, #1672]
-	adds	r0, r0, r9
-	ldr	r9, [sp, #1688]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1692]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1676]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1684]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1680]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1648]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1656]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1712]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1708]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1704]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1700]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1696]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1652]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r4, #552
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1640]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1604]
-	ldr	r7, [sp, #1600]
-	ldr	r5, [sp, #1596]
-	ldr	r8, [sp, #1592]
-	ldr	r9, [sp, #1576]
-	ldr	r10, [sp, #1580]
-	ldr	r11, [sp, #1584]
-	ldr	r4, [sp, #1588]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1636]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1632]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1628]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1624]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1620]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1616]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1612]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1608]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #1504
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1516]
-	ldr	r3, [sp, #1520]
-	ldr	r12, [sp, #1524]
-	ldr	lr, [sp, #1528]
-	adds	r0, r0, r9
-	ldr	r9, [sp, #1544]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1548]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1532]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1540]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1536]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1504]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1512]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1568]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1564]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1560]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1556]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1552]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1508]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r4, #408
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1496]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1460]
-	ldr	r7, [sp, #1456]
-	ldr	r5, [sp, #1452]
-	ldr	r8, [sp, #1448]
-	ldr	r9, [sp, #1432]
-	ldr	r10, [sp, #1436]
-	ldr	r11, [sp, #1440]
-	ldr	r4, [sp, #1444]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1492]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1488]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1484]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1480]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1476]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1472]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1468]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1464]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #1360
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1372]
-	ldr	r3, [sp, #1376]
-	ldr	r12, [sp, #1380]
-	ldr	lr, [sp, #1384]
-	adds	r0, r0, r9
-	ldr	r9, [sp, #1400]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1404]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1388]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1396]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1392]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1360]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1368]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1424]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1420]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1416]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1412]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1408]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1364]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r4, #264
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1352]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1316]
-	ldr	r7, [sp, #1312]
-	ldr	r5, [sp, #1308]
-	ldr	r8, [sp, #1304]
-	ldr	r9, [sp, #1288]
-	ldr	r10, [sp, #1292]
-	ldr	r11, [sp, #1296]
-	ldr	r4, [sp, #1300]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1348]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1344]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1336]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1332]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1328]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1324]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1320]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #1216
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1228]
-	ldr	r3, [sp, #1232]
-	ldr	r12, [sp, #1236]
-	ldr	lr, [sp, #1240]
-	adds	r0, r0, r9
-	ldr	r9, [sp, #1256]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1260]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1244]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1252]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1248]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1216]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1224]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1280]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1276]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1272]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1268]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1264]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1220]
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, r4, #120
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1208]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #1172]
-	ldr	r7, [sp, #1168]
-	ldr	r5, [sp, #1164]
-	ldr	r8, [sp, #1160]
-	ldr	r9, [sp, #1144]
-	ldr	r10, [sp, #1148]
-	ldr	r11, [sp, #1152]
-	ldr	r4, [sp, #1156]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1204]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1200]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1196]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1192]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1188]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1184]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1180]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1176]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #1072
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	ldr	r2, [sp, #1084]
-	ldr	r3, [sp, #1088]
-	ldr	r12, [sp, #1092]
-	ldr	lr, [sp, #1096]
-	adds	r0, r0, r9
-	ldr	r9, [sp, #1112]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1116]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1100]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1108]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1104]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1072]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r6, r11, r6
-	ldr	r1, [sp, #1080]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1136]
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1132]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1128]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1124]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1120]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1076]
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r6, r7
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #1000
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1064]
-	add	r11, sp, #1000
-	ldr	r6, [sp, #1024]
-	ldr	r5, [sp, #1020]
-	ldr	r8, [sp, #1016]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1060]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1032]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1028]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r4, [sp, #1012]
-	ldr	r2, [r0, #40]
-	add	r0, sp, #928
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	add	lr, sp, #944
-	adds	r0, r0, r9
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r2, r0, r10
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	add	r10, sp, #968
-	adcs	r0, r0, r11
-	ldr	r11, [sp, #932]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #940]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #928]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #936]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #52]           @ 4-byte Spill
-	adds	r0, r2, r5
-	mul	r1, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #992]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #988]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #984]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	adcs	r7, r7, r11
-	str	r7, [sp, #112]          @ 4-byte Spill
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	adcs	r6, r7, r6
-	str	r6, [sp, #108]          @ 4-byte Spill
-	ldr	r6, [sp, #104]          @ 4-byte Reload
-	adcs	r6, r6, r8
-	str	r6, [sp, #104]          @ 4-byte Spill
-	ldr	r6, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, sp, #856
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #920]
-	add	r11, sp, #856
-	ldr	r4, [sp, #884]
-	ldr	r7, [sp, #880]
-	ldr	r5, [sp, #876]
-	ldr	r6, [sp, #872]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #916]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #912]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #908]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #904]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #900]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #896]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #888]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #784
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #800
-	adds	r0, r0, r8
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r2, r0, r9
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #824
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #788]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #792]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #796]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #784]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, r1
-	adds	r1, r2, r4
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	str	r1, [sp, #116]          @ 4-byte Spill
-	mul	r2, r1, r0
-	ldr	r0, [sp, #848]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #844]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #840]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r4, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #112]         @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #68]           @ 4-byte Spill
-	ldr	r6, [sp, #108]          @ 4-byte Reload
-	adcs	r5, r6, r5
-	str	r5, [sp, #64]           @ 4-byte Spill
-	ldr	r5, [sp, #104]          @ 4-byte Reload
-	adcs	r5, r5, r7
-	str	r5, [sp, #60]           @ 4-byte Spill
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	add	r0, sp, #712
-	bl	.LmulPv512x32(PLT)
-	ldr	r1, [sp, #776]
-	ldr	r11, [sp, #740]
-	ldr	r8, [sp, #736]
-	ldr	r9, [sp, #732]
-	ldr	r10, [sp, #728]
-	ldr	r6, [sp, #712]
-	ldr	r7, [sp, #716]
-	ldr	r5, [sp, #720]
-	ldr	r4, [sp, #724]
-	add	r0, sp, #640
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #772]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #768]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #764]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #760]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #756]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #752]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #748]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #744]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r1, #48]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #644
-	adds	r0, r0, r6
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #668
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #688]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #640]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #116]         @ 4-byte Reload
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #568
-	bl	.LmulPv512x32(PLT)
-	ldr	r1, [sp, #632]
-	ldr	r6, [sp, #596]
-	ldr	r7, [sp, #592]
-	ldr	r8, [sp, #588]
-	ldr	r5, [sp, #584]
-	ldr	r9, [sp, #568]
-	ldr	r10, [sp, #572]
-	ldr	r4, [sp, #576]
-	ldr	r11, [sp, #580]
-	add	r0, sp, #496
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #628]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #624]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #620]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #616]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #612]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #608]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #604]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #600]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r1, #52]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #500
-	adds	r0, r0, r9
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #524
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #560]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #556]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #552]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #548]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #544]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #496]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #72]          @ 4-byte Reload
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #424
-	bl	.LmulPv512x32(PLT)
-	ldr	r1, [sp, #488]
-	ldr	r6, [sp, #452]
-	ldr	r7, [sp, #448]
-	ldr	r8, [sp, #444]
-	ldr	r5, [sp, #440]
-	ldr	r9, [sp, #424]
-	ldr	r10, [sp, #428]
-	ldr	r4, [sp, #432]
-	ldr	r11, [sp, #436]
-	add	r0, sp, #352
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #484]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #480]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #476]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #472]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #468]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #464]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #460]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #456]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r1, #56]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #356
-	adds	r0, r0, r9
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #380
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #408]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #404]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #400]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r6, [sp, #352]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #72]          @ 4-byte Reload
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	adds	r6, r11, r6
-	adcs	r0, r7, r0
-	str	r6, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r2, r6, r0
-	add	r0, sp, #280
-	bl	.LmulPv512x32(PLT)
-	ldr	r1, [sp, #344]
-	add	r11, sp, #284
-	ldr	r8, [sp, #308]
-	ldr	r9, [sp, #304]
-	ldr	r10, [sp, #300]
-	ldr	r7, [sp, #280]
-	add	r0, sp, #208
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #340]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #336]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #332]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #328]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #324]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #320]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #316]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #312]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r11}
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [r1, #60]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #224
-	adds	r0, r0, r7
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	adcs	r1, r1, r5
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #248
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r1, r1, r8
-	add	r8, sp, #208
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adc	r1, r1, r2
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldm	r8, {r4, r5, r6, r8}
-	adds	r9, r0, r4
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	mul	r1, r9, r0
-	ldr	r0, [sp, #272]
-	str	r1, [sp, #60]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #268]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #264]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #260]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r10, {r4, r7, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #128]         @ 4-byte Reload
-	adcs	r11, r11, r5
-	ldr	r5, [sp, #124]          @ 4-byte Reload
-	adcs	r6, r5, r6
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	adcs	r8, r5, r8
-	ldr	r5, [sp, #68]           @ 4-byte Reload
-	adcs	r5, r5, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r7
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r10, r0, #0
-	add	r0, sp, #136
-	bl	.LmulPv512x32(PLT)
-	add	r3, sp, #136
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r9, r0
-	ldr	r0, [sp, #152]
-	adcs	r4, r11, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r6, r6, r2
-	str	r4, [sp, #48]           @ 4-byte Spill
-	adcs	lr, r8, r3
-	mov	r3, r7
-	str	r6, [sp, #56]           @ 4-byte Spill
-	str	lr, [sp, #60]           @ 4-byte Spill
-	adcs	r5, r5, r0
-	ldr	r0, [sp, #156]
-	str	r5, [sp, #68]           @ 4-byte Spill
-	adcs	r9, r1, r0
-	ldr	r0, [sp, #160]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #164]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #168]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #172]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #176]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #180]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #184]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #192]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #196]
-	adcs	r0, r1, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #200]
-	adc	r10, r10, r0
-	ldm	r3, {r0, r7}
-	ldr	r1, [r3, #8]
-	ldr	r2, [r3, #12]
-	subs	r12, r4, r0
-	ldr	r0, [r3, #32]
-	sbcs	r4, r6, r7
-	ldr	r7, [r3, #60]
-	sbcs	r6, lr, r1
-	add	lr, r3, #16
-	ldr	r1, [r3, #28]
-	sbcs	r8, r5, r2
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r3, #56]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldm	lr, {r0, r5, lr}
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	sbcs	r2, r9, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	sbcs	r3, r0, r5
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	sbcs	lr, r11, lr
-	sbcs	r5, r0, r1
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	sbc	r1, r10, r7
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	cmp	r1, #0
-	movlt	r12, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	movlt	r4, r7
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	str	r12, [r0]
-	str	r4, [r0, #4]
-	ldr	r4, [sp, #60]           @ 4-byte Reload
-	ldr	r12, [sp, #64]          @ 4-byte Reload
-	movlt	r6, r4
-	cmp	r1, #0
-	ldr	r4, [sp, #88]           @ 4-byte Reload
-	str	r6, [r0, #8]
-	ldr	r6, [sp, #68]           @ 4-byte Reload
-	movlt	r2, r9
-	movlt	r8, r6
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	str	r8, [r0, #12]
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	movlt	r3, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	cmp	r1, #0
-	movlt	lr, r11
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #132]          @ 4-byte Reload
-	str	lr, [r0, #24]
-	ldr	lr, [sp, #72]           @ 4-byte Reload
-	movlt	r5, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	str	r5, [r0, #28]
-	ldr	r5, [sp, #80]           @ 4-byte Reload
-	movlt	r12, r2
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r12, [r0, #32]
-	movlt	lr, r2
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	str	lr, [r0, #36]
-	movlt	r6, r2
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r6, [r0, #40]
-	movlt	r5, r2
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r5, [r0, #44]
-	movlt	r4, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	str	r4, [r0, #48]
-	movlt	r3, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	r3, [r0, #52]
-	movlt	r7, r2
-	cmp	r1, #0
-	movlt	r1, r10
-	str	r7, [r0, #56]
-	str	r1, [r0, #60]
-	add	sp, sp, #396
-	add	sp, sp, #2048
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end243:
-	.size	mcl_fp_montNF16L, .Lfunc_end243-mcl_fp_montNF16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montRed16L
-	.align	2
-	.type	mcl_fp_montRed16L,%function
-mcl_fp_montRed16L:                      @ @mcl_fp_montRed16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#356
-	sub	sp, sp, #356
-	.pad	#1024
-	sub	sp, sp, #1024
-	mov	r3, r2
-	str	r0, [sp, #200]          @ 4-byte Spill
-	ldr	r2, [r1, #4]
-	ldr	r7, [r1]
-	add	r10, sp, #1024
-	ldr	r0, [r3]
-	str	r3, [sp, #216]          @ 4-byte Spill
-	str	r2, [sp, #112]          @ 4-byte Spill
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #192]          @ 4-byte Spill
-	ldr	r0, [r3, #4]
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r1, #12]
-	str	r0, [sp, #188]          @ 4-byte Spill
-	ldr	r0, [r3, #8]
-	str	r2, [sp, #104]          @ 4-byte Spill
-	str	r0, [sp, #184]          @ 4-byte Spill
-	ldr	r0, [r3, #12]
-	str	r0, [sp, #168]          @ 4-byte Spill
-	ldr	r0, [r3, #16]
-	str	r0, [sp, #172]          @ 4-byte Spill
-	ldr	r0, [r3, #20]
-	str	r0, [sp, #176]          @ 4-byte Spill
-	ldr	r0, [r3, #24]
-	str	r0, [sp, #180]          @ 4-byte Spill
-	ldr	r0, [r3, #-4]
-	str	r0, [sp, #220]          @ 4-byte Spill
-	mul	r2, r7, r0
-	ldr	r0, [r3, #60]
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r0, [sp, #144]          @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #148]          @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #152]          @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #156]          @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #160]          @ 4-byte Spill
-	ldr	r0, [r3, #56]
-	str	r0, [sp, #164]          @ 4-byte Spill
-	ldr	r0, [r3, #28]
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [r1, #96]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [r1, #100]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [r1, #104]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [r1, #108]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [r1, #112]
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [r1, #116]
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [r1, #120]
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [r1, #124]
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [r1, #64]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r1, #68]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r1, #72]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r1, #76]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r1, #80]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r1, #84]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r1, #88]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r1, #92]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #28]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r1, #24]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r1, #20]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r1, #16]
-	mov	r1, r3
-	str	r0, [sp, #16]           @ 4-byte Spill
-	add	r0, r10, #280
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1368]
-	ldr	r10, [sp, #1304]
-	ldr	r1, [sp, #1312]
-	ldr	r2, [sp, #1316]
-	ldr	r3, [sp, #1320]
-	ldr	r12, [sp, #1324]
-	ldr	lr, [sp, #1328]
-	ldr	r4, [sp, #1332]
-	ldr	r5, [sp, #1336]
-	ldr	r6, [sp, #1340]
-	ldr	r8, [sp, #1344]
-	ldr	r9, [sp, #1348]
-	ldr	r11, [sp, #1352]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1364]
-	adds	r7, r7, r10
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1360]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1356]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1308]
-	adcs	r10, r7, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #216]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	mul	r2, r10, r0
-	add	r0, sp, #1232
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1296]
-	ldr	r4, [sp, #1232]
-	ldr	r1, [sp, #1240]
-	ldr	r2, [sp, #1244]
-	ldr	r3, [sp, #1248]
-	ldr	r9, [sp, #1252]
-	ldr	r12, [sp, #1256]
-	ldr	r11, [sp, #1260]
-	ldr	lr, [sp, #1264]
-	ldr	r6, [sp, #1268]
-	ldr	r7, [sp, #1272]
-	ldr	r8, [sp, #1276]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1292]
-	adds	r4, r10, r4
-	ldr	r4, [sp, #112]          @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1288]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1284]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1280]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #1236]
-	adcs	r10, r4, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r4, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r4
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	mov	r11, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	add	r8, sp, #1024
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	add	r0, r8, #136
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1224]
-	add	r12, sp, #1168
-	ldr	r9, [sp, #1204]
-	ldr	r7, [sp, #1200]
-	ldr	r6, [sp, #1196]
-	ldr	r5, [sp, #1192]
-	ldr	lr, [sp, #1188]
-	ldr	r10, [sp, #1184]
-	ldr	r8, [sp, #1164]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1220]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1216]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1212]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1208]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1160]
-	ldm	r12, {r1, r2, r3, r12}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r8, r0, r8
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r8, r4
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	mov	r10, r8
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #216]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
-	add	r0, sp, #1088
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1152]
-	add	r9, sp, #1120
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1148]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1144]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1140]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1136]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r8, r9}
-	ldr	r4, [sp, #1088]
-	ldr	r0, [sp, #1092]
-	ldr	r1, [sp, #1096]
-	ldr	r2, [sp, #1100]
-	ldr	r3, [sp, #1104]
-	ldr	r12, [sp, #1108]
-	ldr	lr, [sp, #1112]
-	ldr	r11, [sp, #1116]
-	adds	r4, r10, r4
-	ldr	r4, [sp, #112]          @ 4-byte Reload
-	adcs	r10, r4, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r4, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r4
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	mov	r8, r10
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, sp, #1016
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1080]
-	add	r11, sp, #1016
-	ldr	r6, [sp, #1060]
-	ldr	r7, [sp, #1056]
-	ldr	r5, [sp, #1052]
-	ldr	lr, [sp, #1048]
-	ldr	r12, [sp, #1044]
-	ldr	r10, [sp, #1040]
-	ldr	r9, [sp, #1036]
-	ldr	r3, [sp, #1032]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1076]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1072]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1068]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1064]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r0, r1, r2, r11}
-	adds	r0, r8, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r0, r1
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	mov	r10, r1
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #216]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r1, r4
-	mov	r1, r5
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #944
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #1008]
-	add	r9, sp, #976
-	add	lr, sp, #948
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1004]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1000]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #996]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #992]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r8, r9}
-	ldr	r4, [sp, #944]
-	ldr	r11, [sp, #972]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r4, r10, r4
-	ldr	r4, [sp, #112]          @ 4-byte Reload
-	adcs	r10, r4, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r4, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r10, r4
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	mov	r11, r10
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, sp, #872
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #936]
-	add	lr, sp, #888
-	add	r8, sp, #872
-	ldr	r6, [sp, #916]
-	ldr	r7, [sp, #912]
-	ldr	r5, [sp, #908]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #932]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #928]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #924]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #920]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r3, r9, r10, r12, lr}
-	ldm	r8, {r0, r1, r2, r8}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #216]          @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, sp, #800
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #864]
-	add	r10, sp, #828
-	add	lr, sp, #804
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #860]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #856]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #852]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #848]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldr	r4, [sp, #800]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #112]          @ 4-byte Reload
-	adcs	r11, r4, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r4, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #728
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #792]
-	add	r8, sp, #760
-	add	lr, sp, #748
-	add	r12, sp, #728
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #788]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #784]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #780]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #776]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r8, {r5, r6, r7, r8}
-	ldm	lr, {r9, r10, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #216]          @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #656
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #720]
-	add	r10, sp, #684
-	add	lr, sp, #660
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #716]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #712]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldr	r4, [sp, #656]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #112]          @ 4-byte Reload
-	adcs	r11, r4, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r4, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #584
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #648]
-	add	r8, sp, #616
-	add	lr, sp, #604
-	add	r12, sp, #584
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #644]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #640]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #636]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #632]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r8, {r5, r6, r7, r8}
-	ldm	lr, {r9, r10, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #216]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	add	r0, sp, #512
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #576]
-	add	r10, sp, #540
-	add	lr, sp, #516
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #572]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #568]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #564]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #560]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldr	r4, [sp, #512]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #112]          @ 4-byte Reload
-	adcs	r11, r4, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r4, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	add	r0, sp, #440
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #504]
-	add	r8, sp, #472
-	add	lr, sp, #460
-	add	r12, sp, #440
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #500]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #496]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #492]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #488]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldm	r8, {r5, r6, r7, r8}
-	ldm	lr, {r9, r10, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #216]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	add	r0, sp, #368
-	bl	.LmulPv512x32(PLT)
-	ldr	r0, [sp, #432]
-	add	r10, sp, #396
-	add	lr, sp, #372
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #428]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #424]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #420]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldr	r4, [sp, #368]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #112]          @ 4-byte Reload
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #220]          @ 4-byte Reload
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	mul	r2, r4, r6
-	adcs	r0, r0, r7
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	add	r0, sp, #296
-	bl	.LmulPv512x32(PLT)
-	add	r5, sp, #296
-	add	r7, sp, #336
-	add	lr, sp, #312
-	ldm	r5, {r0, r1, r3, r5}
-	ldr	r9, [sp, #356]
-	adds	r0, r4, r0
-	adcs	r8, r11, r1
-	ldr	r11, [sp, #352]
-	mul	r0, r8, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #360]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #348]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldm	r7, {r4, r6, r7}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r10, [sp, #212]         @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r10, r0, r11
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	add	r0, sp, #224
-	bl	.LmulPv512x32(PLT)
-	add	r3, sp, #224
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r8, r0
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r12, r0, r1
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r12, [sp, #92]          @ 4-byte Spill
-	adcs	r2, r0, r2
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	str	r2, [sp, #96]           @ 4-byte Spill
-	adcs	lr, r0, r3
-	ldr	r0, [sp, #240]
-	str	lr, [sp, #100]          @ 4-byte Spill
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #244]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r4, [sp, #104]          @ 4-byte Spill
-	adcs	r5, r1, r0
-	ldr	r0, [sp, #248]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r5, [sp, #108]          @ 4-byte Spill
-	adcs	r7, r1, r0
-	ldr	r0, [sp, #252]
-	ldr	r1, [sp, #208]          @ 4-byte Reload
-	str	r7, [sp, #112]          @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #204]          @ 4-byte Reload
-	str	r0, [sp, #208]          @ 4-byte Spill
-	ldr	r0, [sp, #256]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [sp, #260]
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #264]
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r11, [sp, #116]         @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #212]          @ 4-byte Reload
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #268]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r0, [sp, #272]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #276]
-	adcs	r10, r10, r0
-	ldr	r0, [sp, #280]
-	str	r10, [sp, #128]         @ 4-byte Spill
-	adcs	r8, r1, r0
-	ldr	r0, [sp, #284]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r8, [sp, #132]          @ 4-byte Spill
-	adcs	r6, r6, r0
-	ldr	r0, [sp, #288]
-	adcs	r3, r1, r0
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #192]          @ 4-byte Reload
-	subs	r1, r12, r0
-	ldr	r0, [sp, #188]          @ 4-byte Reload
-	sbcs	r2, r2, r0
-	ldr	r0, [sp, #184]          @ 4-byte Reload
-	sbcs	r12, lr, r0
-	ldr	r0, [sp, #168]          @ 4-byte Reload
-	sbcs	lr, r4, r0
-	ldr	r0, [sp, #172]          @ 4-byte Reload
-	sbcs	r4, r5, r0
-	ldr	r0, [sp, #176]          @ 4-byte Reload
-	sbcs	r5, r7, r0
-	ldr	r0, [sp, #180]          @ 4-byte Reload
-	ldr	r7, [sp, #208]          @ 4-byte Reload
-	sbcs	r9, r7, r0
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r7, [sp, #204]          @ 4-byte Reload
-	sbcs	r0, r7, r0
-	ldr	r7, [sp, #212]          @ 4-byte Reload
-	str	r0, [sp, #172]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	sbcs	r0, r11, r0
-	ldr	r11, [sp, #216]         @ 4-byte Reload
-	str	r0, [sp, #176]          @ 4-byte Spill
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	sbcs	r0, r11, r0
-	ldr	r11, [sp, #220]         @ 4-byte Reload
-	str	r0, [sp, #180]          @ 4-byte Spill
-	ldr	r0, [sp, #148]          @ 4-byte Reload
-	sbcs	r0, r7, r0
-	str	r0, [sp, #184]          @ 4-byte Spill
-	ldr	r0, [sp, #152]          @ 4-byte Reload
-	sbcs	r0, r11, r0
-	str	r0, [sp, #188]          @ 4-byte Spill
-	ldr	r0, [sp, #156]          @ 4-byte Reload
-	sbcs	r0, r10, r0
-	mov	r10, r6
-	str	r0, [sp, #192]          @ 4-byte Spill
-	ldr	r0, [sp, #160]          @ 4-byte Reload
-	sbcs	r7, r8, r0
-	ldr	r0, [sp, #164]          @ 4-byte Reload
-	mov	r8, r3
-	sbcs	r11, r6, r0
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	sbcs	r6, r3, r0
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	sbc	r3, r0, #0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ands	r3, r3, #1
-	movne	r1, r0
-	ldr	r0, [sp, #200]          @ 4-byte Reload
-	str	r1, [r0]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	movne	r2, r1
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	str	r2, [r0, #4]
-	ldr	r2, [sp, #172]          @ 4-byte Reload
-	movne	r12, r1
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	cmp	r3, #0
-	str	r12, [r0, #8]
-	movne	lr, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	lr, [r0, #12]
-	movne	r4, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	str	r4, [r0, #16]
-	movne	r5, r1
-	ldr	r1, [sp, #208]          @ 4-byte Reload
-	cmp	r3, #0
-	str	r5, [r0, #20]
-	movne	r9, r1
-	ldr	r1, [sp, #204]          @ 4-byte Reload
-	str	r9, [r0, #24]
-	movne	r2, r1
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	str	r2, [r0, #28]
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	movne	r1, r2
-	cmp	r3, #0
-	ldr	r2, [sp, #180]          @ 4-byte Reload
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #216]          @ 4-byte Reload
-	movne	r2, r1
-	ldr	r1, [sp, #212]          @ 4-byte Reload
-	str	r2, [r0, #36]
-	ldr	r2, [sp, #184]          @ 4-byte Reload
-	movne	r2, r1
-	ldr	r1, [sp, #220]          @ 4-byte Reload
-	str	r2, [r0, #40]
-	ldr	r2, [sp, #188]          @ 4-byte Reload
-	movne	r2, r1
-	cmp	r3, #0
-	ldr	r1, [sp, #192]          @ 4-byte Reload
-	str	r2, [r0, #44]
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	movne	r11, r10
-	movne	r1, r2
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	movne	r7, r1
-	cmp	r3, #0
-	movne	r6, r8
-	str	r7, [r0, #52]
-	str	r11, [r0, #56]
-	str	r6, [r0, #60]
-	add	sp, sp, #356
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end244:
-	.size	mcl_fp_montRed16L, .Lfunc_end244-mcl_fp_montRed16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addPre16L
-	.align	2
-	.type	mcl_fp_addPre16L,%function
-mcl_fp_addPre16L:                       @ @mcl_fp_addPre16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#64
-	sub	sp, sp, #64
-	ldm	r1, {r3, r8}
-	ldr	r5, [r1, #8]
-	ldr	r6, [r1, #12]
-	ldm	r2, {r7, r12, lr}
-	ldr	r4, [r2, #12]
-	ldr	r9, [r1, #32]
-	ldr	r11, [r1, #52]
-	adds	r3, r7, r3
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [r2, #32]
-	adcs	r7, r12, r8
-	ldr	r8, [r2, #24]
-	add	r12, r1, #16
-	adcs	r5, lr, r5
-	ldr	lr, [r2, #16]
-	adcs	r6, r4, r6
-	ldr	r4, [r2, #20]
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [r2, #36]
-	str	r3, [sp, #36]           @ 4-byte Spill
-	ldr	r3, [r2, #40]
-	str	r3, [sp, #40]           @ 4-byte Spill
-	ldr	r3, [r2, #44]
-	str	r3, [sp, #44]           @ 4-byte Spill
-	ldr	r3, [r2, #48]
-	str	r3, [sp, #48]           @ 4-byte Spill
-	ldr	r3, [r2, #52]
-	str	r3, [sp, #52]           @ 4-byte Spill
-	ldr	r3, [r2, #56]
-	str	r3, [sp, #56]           @ 4-byte Spill
-	ldr	r3, [r2, #60]
-	str	r3, [sp, #60]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	ldr	r2, [r1, #36]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #40]
-	str	r3, [sp, #24]           @ 4-byte Spill
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #44]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldm	r12, {r1, r2, r3, r12}
-	ldr	r10, [sp, #28]          @ 4-byte Reload
-	adcs	r1, lr, r1
-	str	r10, [r0]
-	str	r7, [r0, #4]
-	str	r5, [r0, #8]
-	str	r6, [r0, #12]
-	adcs	r2, r4, r2
-	str	r1, [r0, #16]
-	str	r2, [r0, #20]
-	adcs	r1, r8, r3
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adcs	r2, r2, r12
-	adcs	r12, r1, r9
-	str	r2, [r0, #28]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	ldr	r2, [sp]                @ 4-byte Reload
-	str	r12, [r0, #32]
-	adcs	lr, r1, r2
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	lr, [r0, #36]
-	adcs	r3, r1, r2
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r3, [r0, #40]
-	adcs	r7, r1, r2
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r7, [r0, #44]
-	adcs	r6, r1, r2
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r6, [r0, #48]
-	adcs	r5, r1, r11
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r5, [r0, #52]
-	adcs	r4, r1, r2
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r4, [r0, #56]
-	adcs	r1, r1, r2
-	str	r1, [r0, #60]
-	mov	r0, #0
-	adc	r0, r0, #0
-	add	sp, sp, #64
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end245:
-	.size	mcl_fp_addPre16L, .Lfunc_end245-mcl_fp_addPre16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subPre16L
-	.align	2
-	.type	mcl_fp_subPre16L,%function
-mcl_fp_subPre16L:                       @ @mcl_fp_subPre16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#64
-	sub	sp, sp, #64
-	ldm	r2, {r3, r8}
-	ldr	r5, [r2, #8]
-	ldr	r6, [r2, #12]
-	ldm	r1, {r7, r12, lr}
-	ldr	r4, [r1, #12]
-	ldr	r9, [r1, #32]
-	ldr	r11, [r1, #52]
-	subs	r3, r7, r3
-	str	r3, [sp, #28]           @ 4-byte Spill
-	ldr	r3, [r2, #32]
-	sbcs	r7, r12, r8
-	ldr	r8, [r2, #24]
-	add	r12, r1, #16
-	sbcs	r5, lr, r5
-	ldr	lr, [r2, #16]
-	sbcs	r6, r4, r6
-	ldr	r4, [r2, #20]
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [r2, #36]
-	str	r3, [sp, #36]           @ 4-byte Spill
-	ldr	r3, [r2, #40]
-	str	r3, [sp, #40]           @ 4-byte Spill
-	ldr	r3, [r2, #44]
-	str	r3, [sp, #44]           @ 4-byte Spill
-	ldr	r3, [r2, #48]
-	str	r3, [sp, #48]           @ 4-byte Spill
-	ldr	r3, [r2, #52]
-	str	r3, [sp, #52]           @ 4-byte Spill
-	ldr	r3, [r2, #56]
-	str	r3, [sp, #56]           @ 4-byte Spill
-	ldr	r3, [r2, #60]
-	str	r3, [sp, #60]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	ldr	r2, [r1, #36]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #40]
-	str	r3, [sp, #24]           @ 4-byte Spill
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r1, #44]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldm	r12, {r1, r2, r3, r12}
-	ldr	r10, [sp, #28]          @ 4-byte Reload
-	sbcs	r1, r1, lr
-	str	r10, [r0]
-	str	r7, [r0, #4]
-	str	r5, [r0, #8]
-	str	r6, [r0, #12]
-	sbcs	r2, r2, r4
-	str	r1, [r0, #16]
-	str	r2, [r0, #20]
-	sbcs	r1, r3, r8
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	sbcs	r2, r12, r2
-	sbcs	r12, r9, r1
-	str	r2, [r0, #28]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	ldr	r2, [sp]                @ 4-byte Reload
-	str	r12, [r0, #32]
-	sbcs	lr, r2, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	lr, [r0, #36]
-	sbcs	r3, r2, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	str	r3, [r0, #40]
-	sbcs	r7, r2, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r7, [r0, #44]
-	sbcs	r6, r2, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r6, [r0, #48]
-	sbcs	r5, r11, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r5, [r0, #52]
-	sbcs	r4, r2, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r4, [r0, #56]
-	sbcs	r1, r2, r1
-	str	r1, [r0, #60]
-	mov	r0, #0
-	sbc	r0, r0, #0
-	and	r0, r0, #1
-	add	sp, sp, #64
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end246:
-	.size	mcl_fp_subPre16L, .Lfunc_end246-mcl_fp_subPre16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_shr1_16L
-	.align	2
-	.type	mcl_fp_shr1_16L,%function
-mcl_fp_shr1_16L:                        @ @mcl_fp_shr1_16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#24
-	sub	sp, sp, #24
-	ldr	r3, [r1, #16]
-	ldr	r2, [r1, #20]
-	ldr	r12, [r1, #24]
-	ldr	r11, [r1, #28]
-	ldm	r1, {r4, r5, r6, r7}
-	ldr	r8, [r1, #56]
-	ldr	lr, [r1, #32]
-	ldr	r9, [r1, #36]
-	ldr	r10, [r1, #40]
-	str	r4, [sp, #4]            @ 4-byte Spill
-	lsr	r4, r5, #1
-	str	r8, [sp, #16]           @ 4-byte Spill
-	orr	r4, r4, r6, lsl #31
-	str	r4, [sp]                @ 4-byte Spill
-	ldr	r4, [r1, #44]
-	str	r4, [sp, #8]            @ 4-byte Spill
-	ldr	r4, [r1, #48]
-	str	r4, [sp, #12]           @ 4-byte Spill
-	ldr	r4, [r1, #52]
-	ldr	r1, [r1, #60]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	lsr	r1, r7, #1
-	lsrs	r7, r7, #1
-	rrx	r6, r6
-	lsrs	r5, r5, #1
-	orr	r1, r1, r3, lsl #31
-	ldr	r5, [sp, #4]            @ 4-byte Reload
-	rrx	r5, r5
-	str	r5, [r0]
-	ldr	r5, [sp]                @ 4-byte Reload
-	stmib	r0, {r5, r6}
-	str	r1, [r0, #12]
-	lsrs	r1, r2, #1
-	rrx	r1, r3
-	str	r1, [r0, #16]
-	lsr	r1, r2, #1
-	lsr	r2, r9, #1
-	orr	r1, r1, r12, lsl #31
-	str	r1, [r0, #20]
-	lsrs	r1, r11, #1
-	rrx	r1, r12
-	str	r1, [r0, #24]
-	lsr	r1, r11, #1
-	orr	r1, r1, lr, lsl #31
-	str	r1, [r0, #28]
-	lsrs	r1, r9, #1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	rrx	r12, lr
-	orr	lr, r2, r10, lsl #31
-	mov	r2, r4
-	lsr	r5, r2, #1
-	str	r12, [r0, #32]
-	str	lr, [r0, #36]
-	lsrs	r3, r1, #1
-	lsr	r7, r1, #1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	rrx	r3, r10
-	lsrs	r6, r2, #1
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r3, [r0, #40]
-	orr	r7, r7, r1, lsl #31
-	rrx	r6, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	orr	r5, r5, r2, lsl #31
-	str	r7, [r0, #44]
-	str	r6, [r0, #48]
-	str	r5, [r0, #52]
-	lsrs	r4, r1, #1
-	lsr	r1, r1, #1
-	rrx	r4, r2
-	str	r4, [r0, #56]
-	str	r1, [r0, #60]
-	add	sp, sp, #24
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end247:
-	.size	mcl_fp_shr1_16L, .Lfunc_end247-mcl_fp_shr1_16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_add16L
-	.align	2
-	.type	mcl_fp_add16L,%function
-mcl_fp_add16L:                          @ @mcl_fp_add16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#60
-	sub	sp, sp, #60
-	ldr	r9, [r1]
-	ldmib	r1, {r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r7}
-	adds	r9, r4, r9
-	ldr	r4, [r1, #24]
-	adcs	r5, r5, r8
-	mov	r8, r9
-	adcs	r6, r6, lr
-	str	r5, [sp, #36]           @ 4-byte Spill
-	ldr	r5, [r1, #20]
-	str	r8, [r0]
-	adcs	r10, r7, r12
-	str	r6, [sp, #32]           @ 4-byte Spill
-	ldr	r6, [r1, #16]
-	ldr	r7, [r2, #16]
-	ldr	lr, [sp, #36]           @ 4-byte Reload
-	str	r10, [sp]               @ 4-byte Spill
-	adcs	r7, r7, r6
-	ldr	r6, [r1, #28]
-	str	lr, [r0, #4]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	ldr	r9, [sp, #28]           @ 4-byte Reload
-	adcs	r7, r7, r5
-	ldr	r5, [r2, #28]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	adcs	r7, r7, r4
-	ldr	r4, [r2, #32]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r5, [r1, #32]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	adcs	r7, r4, r5
-	ldr	r5, [r1, #36]
-	ldr	r4, [r2, #36]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	adcs	r7, r4, r5
-	ldr	r5, [r1, #40]
-	ldr	r4, [r2, #40]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	adcs	r7, r4, r5
-	ldr	r5, [r1, #44]
-	ldr	r4, [r2, #44]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	adcs	r11, r4, r5
-	ldr	r4, [r1, #48]
-	ldr	r5, [r2, #52]
-	str	r11, [sp, #20]          @ 4-byte Spill
-	adcs	r12, r7, r4
-	ldr	r7, [r1, #52]
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	str	r12, [sp, #16]          @ 4-byte Spill
-	adcs	r6, r5, r7
-	ldr	r7, [r1, #56]
-	ldr	r5, [r2, #56]
-	ldr	r1, [r1, #60]
-	ldr	r2, [r2, #60]
-	str	r4, [r0, #8]
-	str	r10, [r0, #12]
-	ldr	r10, [sp, #24]          @ 4-byte Reload
-	str	r9, [r0, #16]
-	str	r6, [sp, #4]            @ 4-byte Spill
-	adcs	r5, r5, r7
-	str	r10, [r0, #20]
-	add	r7, r0, #40
-	adcs	r2, r2, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r2, [sp, #8]            @ 4-byte Spill
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r1, [r0, #28]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r1, [r0, #36]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	stm	r7, {r1, r11, r12}
-	str	r6, [r0, #52]
-	str	r5, [r0, #56]
-	str	r2, [r0, #60]
-	mov	r2, #0
-	mov	r12, r5
-	add	r11, r3, #32
-	adc	r1, r2, #0
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r3, {r5, r7}
-	ldr	r1, [r3, #8]
-	ldr	r2, [r3, #12]
-	subs	r8, r8, r5
-	sbcs	lr, lr, r7
-	sbcs	r1, r4, r1
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp]                @ 4-byte Reload
-	sbcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	sbcs	r1, r9, r1
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	sbcs	r1, r10, r1
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [r3, #24]
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	sbcs	r1, r2, r1
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldm	r11, {r1, r2, r5, r7, r9, r10, r11}
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	ldr	r3, [r3, #60]
-	sbcs	r1, r6, r1
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	sbcs	r1, r1, r2
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	sbcs	r2, r1, r5
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	sbcs	r5, r1, r7
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	sbcs	r7, r1, r9
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	sbcs	r4, r1, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	sbcs	r6, r12, r11
-	sbcs	r1, r1, r3
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	sbc	r3, r3, #0
-	tst	r3, #1
-	bne	.LBB248_2
-@ BB#1:                                 @ %nocarry
-	stm	r0, {r8, lr}
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	str	r3, [r0, #8]
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	str	r3, [r0, #12]
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	str	r3, [r0, #16]
-	ldr	r3, [sp, #24]           @ 4-byte Reload
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #52]           @ 4-byte Reload
-	str	r3, [r0, #24]
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	str	r3, [r0, #28]
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	str	r3, [r0, #32]
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	str	r3, [r0, #36]
-	add	r3, r0, #40
-	stm	r3, {r2, r5, r7}
-	str	r4, [r0, #52]
-	str	r6, [r0, #56]
-	str	r1, [r0, #60]
-.LBB248_2:                              @ %carry
-	add	sp, sp, #60
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end248:
-	.size	mcl_fp_add16L, .Lfunc_end248-mcl_fp_add16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF16L
-	.align	2
-	.type	mcl_fp_addNF16L,%function
-mcl_fp_addNF16L:                        @ @mcl_fp_addNF16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#88
-	sub	sp, sp, #88
-	mov	r12, r0
-	ldm	r1, {r0, r9}
-	ldr	r8, [r1, #8]
-	ldr	lr, [r1, #12]
-	ldm	r2, {r4, r5, r6, r7}
-	adds	r10, r4, r0
-	ldr	r4, [r1, #20]
-	ldr	r0, [r1, #24]
-	adcs	r9, r5, r9
-	ldr	r5, [r1, #16]
-	adcs	r8, r6, r8
-	str	r9, [sp, #4]            @ 4-byte Spill
-	adcs	r6, r7, lr
-	ldr	r7, [r2, #16]
-	str	r8, [sp, #8]            @ 4-byte Spill
-	str	r6, [sp, #16]           @ 4-byte Spill
-	adcs	r7, r7, r5
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	adcs	r7, r7, r4
-	str	r7, [sp, #44]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	adcs	r0, r7, r0
-	ldr	r7, [r2, #28]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r1, #28]
-	adcs	r0, r7, r0
-	ldr	r7, [r2, #32]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r1, #32]
-	adcs	r0, r7, r0
-	ldr	r7, [r2, #36]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r1, #36]
-	adcs	r0, r7, r0
-	ldr	r7, [r2, #40]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r1, #40]
-	adcs	r0, r7, r0
-	ldr	r7, [r2, #44]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r1, #44]
-	adcs	r0, r7, r0
-	ldr	r7, [r2, #48]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	adcs	r0, r7, r0
-	ldr	r7, [r2, #52]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	adcs	r0, r7, r0
-	ldr	r7, [r2, #56]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	adcs	r0, r7, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	ldr	r1, [r2, #60]
-	adc	r11, r1, r0
-	ldm	r3, {r0, r7}
-	ldr	r1, [r3, #8]
-	ldr	r4, [r3, #12]
-	subs	lr, r10, r0
-	ldr	r0, [r3, #32]
-	sbcs	r5, r9, r7
-	ldr	r9, [sp, #44]           @ 4-byte Reload
-	sbcs	r7, r8, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	sbcs	r8, r6, r4
-	ldr	r4, [r3, #24]
-	ldr	r6, [r3, #20]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r3, #56]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r3, #60]
-	str	r0, [sp]                @ 4-byte Spill
-	ldr	r0, [r3, #28]
-	ldr	r3, [r3, #16]
-	sbcs	r1, r1, r3
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	sbcs	r2, r9, r6
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	sbcs	r3, r3, r4
-	ldr	r4, [sp, #84]           @ 4-byte Reload
-	sbcs	r4, r4, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	sbcs	r0, r0, r6
-	ldr	r6, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	sbcs	r0, r0, r6
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	sbcs	r0, r0, r6
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	sbcs	r0, r0, r6
-	ldr	r6, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r0, r6
-	ldr	r6, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	sbcs	r0, r0, r6
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	sbcs	r0, r0, r6
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp]                @ 4-byte Reload
-	sbc	r0, r11, r0
-	cmp	r0, #0
-	movlt	lr, r10
-	movlt	r5, r6
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	str	lr, [r12]
-	str	r5, [r12, #4]
-	ldr	r5, [sp, #8]            @ 4-byte Reload
-	ldr	lr, [sp, #12]           @ 4-byte Reload
-	movlt	r7, r5
-	cmp	r0, #0
-	ldr	r5, [sp, #32]           @ 4-byte Reload
-	str	r7, [r12, #8]
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	movlt	r2, r9
-	movlt	r8, r7
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	str	r8, [r12, #12]
-	movlt	r1, r7
-	cmp	r0, #0
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	str	r1, [r12, #16]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r2, [r12, #20]
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	movlt	r3, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r3, [r12, #24]
-	ldr	r3, [sp, #20]           @ 4-byte Reload
-	movlt	r4, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r4, [r12, #28]
-	ldr	r4, [sp, #36]           @ 4-byte Reload
-	movlt	lr, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	cmp	r0, #0
-	str	lr, [r12, #32]
-	movlt	r3, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r3, [r12, #36]
-	movlt	r7, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r7, [r12, #40]
-	movlt	r6, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	cmp	r0, #0
-	str	r6, [r12, #44]
-	movlt	r5, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r5, [r12, #48]
-	movlt	r4, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r4, [r12, #52]
-	movlt	r2, r1
-	cmp	r0, #0
-	movlt	r0, r11
-	str	r2, [r12, #56]
-	str	r0, [r12, #60]
-	add	sp, sp, #88
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end249:
-	.size	mcl_fp_addNF16L, .Lfunc_end249-mcl_fp_addNF16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_sub16L
-	.align	2
-	.type	mcl_fp_sub16L,%function
-mcl_fp_sub16L:                          @ @mcl_fp_sub16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#84
-	sub	sp, sp, #84
-	ldr	r9, [r2]
-	ldmib	r2, {r8, lr}
-	ldr	r5, [r1]
-	ldr	r12, [r2, #12]
-	ldmib	r1, {r4, r6, r7}
-	subs	r5, r5, r9
-	sbcs	r4, r4, r8
-	str	r5, [sp, #60]           @ 4-byte Spill
-	ldr	r5, [r2, #24]
-	sbcs	r6, r6, lr
-	str	r4, [sp, #64]           @ 4-byte Spill
-	ldr	r4, [r2, #20]
-	sbcs	r7, r7, r12
-	str	r6, [sp, #68]           @ 4-byte Spill
-	ldr	r6, [r2, #16]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r1, #16]
-	sbcs	r7, r7, r6
-	ldr	r6, [r1, #28]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r1, #20]
-	sbcs	r7, r7, r4
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r1, #24]
-	sbcs	r7, r7, r5
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	sbcs	r11, r6, r7
-	ldr	r7, [r2, #32]
-	ldr	r6, [r1, #32]
-	sbcs	r10, r6, r7
-	ldr	r7, [r2, #36]
-	ldr	r6, [r1, #36]
-	sbcs	r8, r6, r7
-	ldr	r7, [r2, #40]
-	ldr	r6, [r1, #40]
-	str	r8, [sp, #52]           @ 4-byte Spill
-	sbcs	r5, r6, r7
-	ldr	r7, [r2, #44]
-	ldr	r6, [r1, #44]
-	str	r5, [sp, #48]           @ 4-byte Spill
-	sbcs	r4, r6, r7
-	ldr	r6, [r2, #48]
-	ldr	r7, [r1, #48]
-	str	r4, [sp, #44]           @ 4-byte Spill
-	sbcs	lr, r7, r6
-	ldr	r6, [r2, #52]
-	ldr	r7, [r1, #52]
-	str	lr, [sp, #40]           @ 4-byte Spill
-	sbcs	r9, r7, r6
-	ldr	r6, [r2, #56]
-	ldr	r7, [r1, #56]
-	ldr	r2, [r2, #60]
-	ldr	r1, [r1, #60]
-	sbcs	r6, r7, r6
-	sbcs	r12, r1, r2
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	mov	r2, #0
-	str	r6, [sp, #36]           @ 4-byte Spill
-	sbc	r2, r2, #0
-	str	r12, [sp, #32]          @ 4-byte Spill
-	tst	r2, #1
-	str	r1, [r0]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r1, [r0, #4]
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r1, [r0, #8]
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r1, [r0, #12]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r1, [r0, #20]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	str	r11, [r0, #28]
-	str	r10, [r0, #32]
-	str	r8, [r0, #36]
-	str	r5, [r0, #40]
-	str	r4, [r0, #44]
-	str	lr, [r0, #48]
-	str	r9, [r0, #52]
-	str	r6, [r0, #56]
-	str	r12, [r0, #60]
-	beq	.LBB250_2
-@ BB#1:                                 @ %carry
-	ldr	r2, [r3, #32]
-	ldr	r8, [r3, #60]
-	str	r11, [sp]               @ 4-byte Spill
-	ldr	r5, [r3]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldr	r2, [r3, #36]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r3, #40]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r3, #44]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r3, #48]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r2, [r3, #52]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [r3, #56]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldmib	r3, {r4, r11, r12}
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	ldr	lr, [r3, #20]
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	adds	r5, r5, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r5, [r0]
-	adcs	r4, r4, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r4, [r0, #4]
-	adcs	r1, r11, r1
-	ldr	r11, [r3, #24]
-	adcs	r6, r12, r7
-	str	r1, [r0, #8]
-	ldr	r12, [r3, #28]
-	ldr	r3, [r3, #16]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r6, [r0, #12]
-	adcs	r1, r3, r1
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r3, lr, r2
-	ldr	r2, [sp]                @ 4-byte Reload
-	str	r3, [r0, #20]
-	adcs	r1, r11, r1
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r3, r12, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r3, [r0, #28]
-	ldr	r3, [sp, #48]           @ 4-byte Reload
-	adcs	lr, r1, r10
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	lr, [r0, #32]
-	adcs	r2, r1, r2
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r3, r1, r3
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r6, r1, r7
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	adcs	r5, r1, r7
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adcs	r4, r1, r9
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r7, r1, r7
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	adc	r12, r8, r1
-	add	r1, r0, #36
-	stm	r1, {r2, r3, r6}
-	str	r5, [r0, #48]
-	add	r0, r0, #52
-	stm	r0, {r4, r7, r12}
-.LBB250_2:                              @ %nocarry
-	add	sp, sp, #84
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end250:
-	.size	mcl_fp_sub16L, .Lfunc_end250-mcl_fp_sub16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_subNF16L
-	.align	2
-	.type	mcl_fp_subNF16L,%function
-mcl_fp_subNF16L:                        @ @mcl_fp_subNF16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#92
-	sub	sp, sp, #92
-	ldr	r7, [r2, #32]
-	add	r9, r2, #8
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [r1, #60]
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [r1, #56]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r1, #52]
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [r1, #48]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r1, #44]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldm	r2, {r10, r11}
-	ldm	r9, {r5, r6, r7, r9}
-	ldr	r4, [r2, #24]
-	ldr	r2, [r2, #28]
-	str	r4, [sp, #60]           @ 4-byte Spill
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldm	r1, {r2, r12, lr}
-	ldr	r4, [r1, #12]
-	ldr	r8, [r1, #40]
-	subs	r2, r2, r10
-	str	r2, [sp, #40]           @ 4-byte Spill
-	sbcs	r2, r12, r11
-	ldr	r12, [r1, #36]
-	sbcs	lr, lr, r5
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldr	r5, [r1, #32]
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	sbcs	r4, r4, r6
-	ldr	r6, [r1, #16]
-	str	lr, [sp]                @ 4-byte Spill
-	str	r4, [sp, #44]           @ 4-byte Spill
-	sbcs	r4, r6, r7
-	ldr	r7, [r1, #20]
-	str	r4, [sp, #52]           @ 4-byte Spill
-	sbcs	r4, r7, r9
-	ldr	r7, [r1, #28]
-	ldr	r1, [r1, #24]
-	str	r4, [sp, #48]           @ 4-byte Spill
-	sbcs	r1, r1, r2
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	sbcs	r1, r7, r1
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	sbcs	r1, r5, r1
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	sbcs	r1, r12, r1
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	sbcs	r1, r8, r1
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	sbc	r2, r2, r1
-	ldr	r1, [r3, #32]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [r3, #52]
-	str	r1, [sp, #8]            @ 4-byte Spill
-	ldr	r1, [r3, #56]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [r3, #60]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldm	r3, {r1, r4, r5, r6, r7, r8, r9, r10}
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	ldr	r11, [sp, #20]          @ 4-byte Reload
-	adds	r1, r3, r1
-	adcs	r3, r11, r4
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	adcs	r12, lr, r5
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	adcs	lr, r5, r6
-	ldr	r5, [sp, #48]           @ 4-byte Reload
-	ldr	r6, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r4, r7
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	adcs	r5, r5, r8
-	ldr	r8, [sp, #88]           @ 4-byte Reload
-	adcs	r9, r6, r9
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	adcs	r10, r7, r10
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	str	r7, [sp, #12]           @ 4-byte Spill
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #4]            @ 4-byte Reload
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	adcs	r6, r7, r6
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	str	r6, [sp, #4]            @ 4-byte Spill
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #28]           @ 4-byte Reload
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #40]           @ 4-byte Reload
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	adcs	r7, r8, r7
-	ldr	r8, [sp, #32]           @ 4-byte Reload
-	str	r7, [sp, #8]            @ 4-byte Spill
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	adcs	r7, r7, r8
-	str	r7, [sp, #32]           @ 4-byte Spill
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	adc	r7, r2, r7
-	cmp	r2, #0
-	movge	r1, r6
-	movge	r3, r11
-	str	r7, [sp, #36]           @ 4-byte Spill
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	str	r1, [r0]
-	ldr	r1, [sp]                @ 4-byte Reload
-	str	r3, [r0, #4]
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	movge	r12, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	cmp	r2, #0
-	str	r12, [r0, #8]
-	ldr	r12, [sp, #12]          @ 4-byte Reload
-	movge	lr, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	lr, [r0, #12]
-	ldr	lr, [sp, #16]           @ 4-byte Reload
-	movge	r4, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r4, [r0, #16]
-	ldr	r4, [sp, #32]           @ 4-byte Reload
-	movge	r5, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	cmp	r2, #0
-	str	r5, [r0, #20]
-	ldr	r5, [sp, #28]           @ 4-byte Reload
-	movge	r9, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r9, [r0, #24]
-	movge	r10, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r10, [r0, #28]
-	movge	r12, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	cmp	r2, #0
-	str	r12, [r0, #32]
-	movge	lr, r1
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	lr, [r0, #36]
-	movge	r7, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r7, [r0, #40]
-	movge	r6, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	cmp	r2, #0
-	str	r6, [r0, #44]
-	movge	r5, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r5, [r0, #48]
-	movge	r3, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r3, [r0, #52]
-	movge	r4, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	cmp	r2, #0
-	movge	r1, r2
-	str	r4, [r0, #56]
-	str	r1, [r0, #60]
-	add	sp, sp, #92
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end251:
-	.size	mcl_fp_subNF16L, .Lfunc_end251-mcl_fp_subNF16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add16L
-	.align	2
-	.type	mcl_fpDbl_add16L,%function
-mcl_fpDbl_add16L:                       @ @mcl_fpDbl_add16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#200
-	sub	sp, sp, #200
-	ldm	r1, {r7, r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r9}
-	add	r10, r1, #32
-	adds	r4, r4, r7
-	str	r4, [sp, #100]          @ 4-byte Spill
-	ldr	r4, [r2, #96]
-	str	r4, [sp, #164]          @ 4-byte Spill
-	ldr	r4, [r2, #100]
-	str	r4, [sp, #160]          @ 4-byte Spill
-	ldr	r4, [r2, #104]
-	str	r4, [sp, #156]          @ 4-byte Spill
-	ldr	r4, [r2, #108]
-	str	r4, [sp, #180]          @ 4-byte Spill
-	ldr	r4, [r2, #112]
-	str	r4, [sp, #184]          @ 4-byte Spill
-	ldr	r4, [r2, #116]
-	str	r4, [sp, #188]          @ 4-byte Spill
-	ldr	r4, [r2, #120]
-	str	r4, [sp, #192]          @ 4-byte Spill
-	ldr	r4, [r2, #124]
-	str	r4, [sp, #196]          @ 4-byte Spill
-	adcs	r4, r5, r8
-	adcs	r7, r6, lr
-	str	r4, [sp, #68]           @ 4-byte Spill
-	add	lr, r1, #16
-	str	r7, [sp, #64]           @ 4-byte Spill
-	adcs	r7, r9, r12
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #140]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #148]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #168]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #152]          @ 4-byte Spill
-	ldr	r7, [r2, #88]
-	str	r7, [sp, #172]          @ 4-byte Spill
-	ldr	r7, [r2, #92]
-	str	r7, [sp, #176]          @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	str	r7, [sp, #88]           @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	ldr	r2, [r2, #16]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #96]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r1, #100]
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r1, #104]
-	str	r2, [sp, #112]          @ 4-byte Spill
-	ldr	r2, [r1, #108]
-	str	r2, [sp, #116]          @ 4-byte Spill
-	ldr	r2, [r1, #112]
-	str	r2, [sp, #120]          @ 4-byte Spill
-	ldr	r2, [r1, #116]
-	str	r2, [sp, #124]          @ 4-byte Spill
-	ldr	r2, [r1, #120]
-	str	r2, [sp, #128]          @ 4-byte Spill
-	ldr	r2, [r1, #124]
-	str	r2, [sp, #132]          @ 4-byte Spill
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r8, r9, r10}
-	ldr	r2, [r1, #56]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #100]         @ 4-byte Reload
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #64]           @ 4-byte Reload
-	str	r7, [r0, #8]
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r7, r1
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	adcs	r1, r1, r12
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r2, r2, lr
-	str	r2, [r0, #28]
-	adcs	r1, r1, r4
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	str	r2, [r0, #36]
-	adcs	r1, r1, r6
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r2, r2, r8
-	str	r2, [r0, #44]
-	adcs	r1, r1, r9
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r2, r2, r10
-	adcs	r1, r1, r7
-	str	r2, [r0, #52]
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r1, [r0, #56]
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	adcs	r2, r2, r7
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	str	r2, [r0, #60]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	adcs	r12, r1, r7
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r12, [sp, #92]          @ 4-byte Spill
-	adcs	r9, r1, r2
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r9, [sp, #96]           @ 4-byte Spill
-	adcs	r8, r1, r2
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r8, [sp, #100]          @ 4-byte Spill
-	adcs	r4, r1, r2
-	ldr	r1, [sp, #168]          @ 4-byte Reload
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r4, [sp, #136]          @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #168]          @ 4-byte Spill
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	adcs	r10, r1, r2
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r10, [sp, #88]          @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #172]          @ 4-byte Spill
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	r1, [sp, #176]          @ 4-byte Spill
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	str	r1, [sp, #164]          @ 4-byte Spill
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	str	r1, [sp, #160]          @ 4-byte Spill
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	adcs	r11, r1, r2
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r11, [sp, #140]         @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	str	r1, [sp, #180]          @ 4-byte Spill
-	ldr	r1, [sp, #184]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	str	r1, [sp, #184]          @ 4-byte Spill
-	ldr	r1, [sp, #188]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	str	r1, [sp, #188]          @ 4-byte Spill
-	ldr	r1, [sp, #192]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	str	r1, [sp, #192]          @ 4-byte Spill
-	ldr	r1, [sp, #196]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #196]          @ 4-byte Spill
-	mov	r1, #0
-	adc	r1, r1, #0
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldm	r3, {r2, r7}
-	ldr	r1, [r3, #36]
-	ldr	r6, [r3, #8]
-	ldr	r5, [r3, #12]
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	subs	r12, r12, r2
-	ldr	r2, [sp, #168]          @ 4-byte Reload
-	sbcs	lr, r9, r7
-	sbcs	r7, r8, r6
-	ldr	r8, [r3, #32]
-	ldr	r6, [r3, #24]
-	sbcs	r9, r4, r5
-	ldr	r5, [r3, #28]
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp, #132]          @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #144]          @ 4-byte Spill
-	ldr	r1, [r3, #52]
-	str	r1, [sp, #148]          @ 4-byte Spill
-	ldr	r1, [r3, #56]
-	str	r1, [sp, #152]          @ 4-byte Spill
-	ldr	r1, [r3, #60]
-	str	r1, [sp, #156]          @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	ldr	r3, [r3, #16]
-	sbcs	r2, r2, r3
-	sbcs	r3, r10, r1
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	sbcs	r4, r1, r6
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	sbcs	r5, r1, r5
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	sbcs	r8, r1, r8
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	sbcs	r10, r1, r6
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r6, [sp, #132]          @ 4-byte Reload
-	sbcs	r11, r11, r1
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	sbcs	r1, r1, r6
-	ldr	r6, [sp, #144]          @ 4-byte Reload
-	str	r1, [sp, #132]          @ 4-byte Spill
-	ldr	r1, [sp, #184]          @ 4-byte Reload
-	sbcs	r1, r1, r6
-	ldr	r6, [sp, #148]          @ 4-byte Reload
-	str	r1, [sp, #144]          @ 4-byte Spill
-	ldr	r1, [sp, #188]          @ 4-byte Reload
-	sbcs	r1, r1, r6
-	ldr	r6, [sp, #152]          @ 4-byte Reload
-	str	r1, [sp, #148]          @ 4-byte Spill
-	ldr	r1, [sp, #192]          @ 4-byte Reload
-	sbcs	r1, r1, r6
-	ldr	r6, [sp, #156]          @ 4-byte Reload
-	str	r1, [sp, #152]          @ 4-byte Spill
-	ldr	r1, [sp, #196]          @ 4-byte Reload
-	sbcs	r1, r1, r6
-	ldr	r6, [sp, #92]           @ 4-byte Reload
-	str	r1, [sp, #156]          @ 4-byte Spill
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	sbc	r1, r1, #0
-	ands	r1, r1, #1
-	movne	r12, r6
-	ldr	r6, [sp, #96]           @ 4-byte Reload
-	str	r12, [r0, #64]
-	movne	lr, r6
-	ldr	r6, [sp, #100]          @ 4-byte Reload
-	str	lr, [r0, #68]
-	movne	r7, r6
-	cmp	r1, #0
-	str	r7, [r0, #72]
-	ldr	r7, [sp, #136]          @ 4-byte Reload
-	movne	r9, r7
-	ldr	r7, [sp, #168]          @ 4-byte Reload
-	str	r9, [r0, #76]
-	movne	r2, r7
-	str	r2, [r0, #80]
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #172]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #84]
-	ldr	r3, [sp, #132]          @ 4-byte Reload
-	movne	r4, r2
-	ldr	r2, [sp, #176]          @ 4-byte Reload
-	str	r4, [r0, #88]
-	movne	r5, r2
-	ldr	r2, [sp, #164]          @ 4-byte Reload
-	str	r5, [r0, #92]
-	movne	r8, r2
-	ldr	r2, [sp, #160]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r8, [r0, #96]
-	movne	r10, r2
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	str	r10, [r0, #100]
-	movne	r11, r2
-	ldr	r2, [sp, #180]          @ 4-byte Reload
-	str	r11, [r0, #104]
-	movne	r3, r2
-	ldr	r2, [sp, #184]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #108]
-	ldr	r3, [sp, #144]          @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #188]          @ 4-byte Reload
-	str	r3, [r0, #112]
-	ldr	r3, [sp, #148]          @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #192]          @ 4-byte Reload
-	str	r3, [r0, #116]
-	ldr	r3, [sp, #152]          @ 4-byte Reload
-	movne	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #196]          @ 4-byte Reload
-	ldr	r2, [sp, #156]          @ 4-byte Reload
-	str	r3, [r0, #120]
-	movne	r2, r1
-	str	r2, [r0, #124]
-	add	sp, sp, #200
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end252:
-	.size	mcl_fpDbl_add16L, .Lfunc_end252-mcl_fpDbl_add16L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_sub16L
-	.align	2
-	.type	mcl_fpDbl_sub16L,%function
-mcl_fpDbl_sub16L:                       @ @mcl_fpDbl_sub16L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#200
-	sub	sp, sp, #200
-	ldr	r7, [r2, #96]
-	ldr	r9, [r2]
-	add	r10, r1, #32
-	str	r7, [sp, #168]          @ 4-byte Spill
-	ldr	r7, [r2, #100]
-	str	r7, [sp, #172]          @ 4-byte Spill
-	ldr	r7, [r2, #104]
-	str	r7, [sp, #176]          @ 4-byte Spill
-	ldr	r7, [r2, #108]
-	str	r7, [sp, #180]          @ 4-byte Spill
-	ldr	r7, [r2, #112]
-	str	r7, [sp, #184]          @ 4-byte Spill
-	ldr	r7, [r2, #116]
-	str	r7, [sp, #188]          @ 4-byte Spill
-	ldr	r7, [r2, #120]
-	str	r7, [sp, #192]          @ 4-byte Spill
-	ldr	r7, [r2, #124]
-	str	r7, [sp, #196]          @ 4-byte Spill
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #140]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #148]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #152]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #156]          @ 4-byte Spill
-	ldr	r7, [r2, #88]
-	str	r7, [sp, #160]          @ 4-byte Spill
-	ldr	r7, [r2, #92]
-	str	r7, [sp, #164]          @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #132]          @ 4-byte Spill
-	ldr	r7, [r2, #56]
-	str	r7, [sp, #128]          @ 4-byte Spill
-	ldmib	r2, {r8, r12, lr}
-	ldm	r1, {r4, r5, r6, r7}
-	subs	r4, r4, r9
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [r2, #52]
-	str	r4, [sp, #92]           @ 4-byte Spill
-	sbcs	r4, r5, r8
-	sbcs	r6, r6, r12
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [r2, #48]
-	sbcs	r7, r7, lr
-	str	r6, [sp, #24]           @ 4-byte Spill
-	ldr	r6, [r2, #44]
-	add	lr, r1, #16
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r4, [sp, #88]           @ 4-byte Spill
-	str	r6, [sp, #84]           @ 4-byte Spill
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r2, #32]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	ldr	r2, [r2, #16]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #96]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #96]           @ 4-byte Spill
-	ldr	r2, [r1, #100]
-	str	r2, [sp, #100]          @ 4-byte Spill
-	ldr	r2, [r1, #104]
-	str	r2, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r1, #108]
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r1, #112]
-	str	r2, [sp, #112]          @ 4-byte Spill
-	ldr	r2, [r1, #116]
-	str	r2, [sp, #116]          @ 4-byte Spill
-	ldr	r2, [r1, #120]
-	str	r2, [sp, #120]          @ 4-byte Spill
-	ldr	r2, [r1, #124]
-	str	r2, [sp, #124]          @ 4-byte Spill
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #76]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r8, r9, r10}
-	ldr	r2, [r1, #56]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #36]          @ 4-byte Reload
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	add	r11, r3, #12
-	str	r7, [r0, #8]
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #20]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	sbcs	r2, r2, r7
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	sbcs	r1, r12, r1
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, lr, r2
-	str	r2, [r0, #28]
-	sbcs	r1, r4, r1
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	sbcs	r2, r5, r2
-	ldr	r5, [sp, #72]           @ 4-byte Reload
-	str	r2, [r0, #36]
-	sbcs	r1, r6, r1
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	sbcs	r2, r8, r2
-	str	r2, [r0, #44]
-	sbcs	r1, r9, r1
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	str	r1, [r0, #48]
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	sbcs	r2, r10, r2
-	sbcs	r1, r7, r1
-	str	r2, [r0, #52]
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	ldr	r7, [sp, #4]            @ 4-byte Reload
-	str	r1, [r0, #56]
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r2, [r0, #60]
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	sbcs	r9, r7, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	str	r9, [sp, #80]           @ 4-byte Spill
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #132]          @ 4-byte Spill
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	sbcs	r12, r2, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r12, [sp, #84]          @ 4-byte Spill
-	sbcs	lr, r2, r1
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	lr, [sp, #88]           @ 4-byte Spill
-	sbcs	r4, r2, r1
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r4, [sp, #92]           @ 4-byte Spill
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #160]          @ 4-byte Reload
-	str	r1, [sp, #156]          @ 4-byte Spill
-	mov	r1, #0
-	sbcs	r2, r5, r2
-	ldr	r5, [sp, #76]           @ 4-byte Reload
-	str	r2, [sp, #160]          @ 4-byte Spill
-	ldr	r2, [sp, #164]          @ 4-byte Reload
-	sbcs	r2, r5, r2
-	str	r2, [sp, #164]          @ 4-byte Spill
-	ldr	r2, [sp, #168]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	str	r2, [sp, #168]          @ 4-byte Spill
-	ldr	r2, [sp, #172]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	str	r2, [sp, #172]          @ 4-byte Spill
-	ldr	r2, [sp, #176]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	str	r2, [sp, #176]          @ 4-byte Spill
-	ldr	r2, [sp, #180]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r2, [sp, #180]          @ 4-byte Spill
-	ldr	r2, [sp, #184]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	str	r2, [sp, #184]          @ 4-byte Spill
-	ldr	r2, [sp, #188]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	str	r2, [sp, #188]          @ 4-byte Spill
-	ldr	r2, [sp, #192]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #124]          @ 4-byte Reload
-	str	r2, [sp, #192]          @ 4-byte Spill
-	ldr	r2, [sp, #196]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	sbc	r1, r1, #0
-	str	r2, [sp, #196]          @ 4-byte Spill
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldr	r1, [r3, #32]
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	str	r1, [sp, #136]          @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp, #140]          @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #144]          @ 4-byte Spill
-	ldr	r1, [r3, #52]
-	str	r1, [sp, #148]          @ 4-byte Spill
-	ldr	r1, [r3, #56]
-	str	r1, [sp, #152]          @ 4-byte Spill
-	ldr	r1, [r3, #60]
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldm	r3, {r2, r6, r7}
-	ldm	r11, {r5, r8, r11}
-	ldr	r1, [r3, #28]
-	ldr	r10, [r3, #24]
-	str	r1, [sp, #112]          @ 4-byte Spill
-	adds	r1, r9, r2
-	ldr	r9, [sp, #132]          @ 4-byte Reload
-	adcs	r2, r9, r6
-	ldr	r6, [sp, #164]          @ 4-byte Reload
-	adcs	r3, r12, r7
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	adcs	r12, lr, r5
-	ldr	r5, [sp, #160]          @ 4-byte Reload
-	adcs	lr, r4, r8
-	ldr	r4, [sp, #156]          @ 4-byte Reload
-	adcs	r4, r4, r11
-	adcs	r5, r5, r10
-	adcs	r8, r6, r7
-	ldr	r7, [sp, #168]          @ 4-byte Reload
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	adcs	r11, r7, r6
-	ldr	r7, [sp, #172]          @ 4-byte Reload
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	adcs	r6, r7, r6
-	ldr	r7, [sp, #176]          @ 4-byte Reload
-	str	r6, [sp, #120]          @ 4-byte Spill
-	ldr	r6, [sp, #136]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #140]          @ 4-byte Reload
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [sp, #180]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #144]          @ 4-byte Reload
-	str	r7, [sp, #140]          @ 4-byte Spill
-	ldr	r7, [sp, #184]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #148]          @ 4-byte Reload
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [sp, #188]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #152]          @ 4-byte Reload
-	str	r7, [sp, #148]          @ 4-byte Spill
-	ldr	r7, [sp, #192]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	str	r7, [sp, #152]          @ 4-byte Spill
-	ldr	r7, [sp, #196]          @ 4-byte Reload
-	adc	r7, r7, r6
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	str	r7, [sp, #124]          @ 4-byte Spill
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	ands	r10, r7, #1
-	moveq	r1, r6
-	moveq	r2, r9
-	str	r1, [r0, #64]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r2, [r0, #68]
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	moveq	r3, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	cmp	r10, #0
-	str	r3, [r0, #72]
-	moveq	r12, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r12, [r0, #76]
-	moveq	lr, r1
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	str	lr, [r0, #80]
-	moveq	r4, r1
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	cmp	r10, #0
-	str	r4, [r0, #84]
-	moveq	r5, r1
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	str	r5, [r0, #88]
-	moveq	r8, r1
-	ldr	r1, [sp, #168]          @ 4-byte Reload
-	str	r8, [r0, #92]
-	moveq	r11, r1
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	cmp	r10, #0
-	str	r11, [r0, #96]
-	moveq	r2, r1
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	str	r2, [r0, #100]
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	str	r2, [r0, #104]
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #184]          @ 4-byte Reload
-	cmp	r10, #0
-	str	r2, [r0, #108]
-	ldr	r2, [sp, #144]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #188]          @ 4-byte Reload
-	str	r2, [r0, #112]
-	ldr	r2, [sp, #148]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #192]          @ 4-byte Reload
-	str	r2, [r0, #116]
-	ldr	r2, [sp, #152]          @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #196]          @ 4-byte Reload
-	cmp	r10, #0
-	str	r2, [r0, #120]
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	moveq	r2, r1
-	str	r2, [r0, #124]
-	add	sp, sp, #200
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r11
+	add	r0, lr, #200
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r9, sp, #1232
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #8]
+	ldr	r0, [sp, #1272]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1268]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1264]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1260]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1256]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #1252]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #1168
+	ldm	r9, {r6, r7, r9}
+	ldr	r5, [sp, #1248]
+	ldr	r8, [sp, #1244]
+	ldr	r10, [sp, #1224]
+	ldr	r11, [sp, #1228]
+	bl	mulPv384x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #1168
+	adcs	r0, r0, r11
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r7, r7, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r11, [sp, #1216]
+	adcs	r0, r0, r1
+	ldr	r10, [sp, #1212]
+	ldr	r9, [sp, #1208]
+	ldr	r8, [sp, #1204]
+	ldr	r6, [sp, #1200]
+	ldr	r5, [sp, #1196]
+	ldr	r4, [sp, #1192]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	add	lr, sp, #1024
+	adcs	r0, r0, r4
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	mov	r8, r7
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, lr, #88
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r10, sp, #1120
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #12]
+	ldr	r0, [sp, #1160]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1156]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1152]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1148]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1144]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #1140]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #1056
+	ldm	r10, {r4, r5, r10}
+	ldr	r6, [sp, #1136]
+	ldr	r9, [sp, #1132]
+	ldr	r11, [sp, #1112]
+	ldr	r7, [sp, #1116]
+	bl	mulPv384x32
+	adds	r0, r8, r11
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r11, [sp, #84]                  @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r4, [sp, #1056]
+	ldr	r0, [sp, #1060]
+	adds	r11, r11, r4
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	ldr	r10, [sp, #1104]
+	adcs	r0, r4, r0
+	ldr	r9, [sp, #1100]
+	ldr	r8, [sp, #1096]
+	mov	r4, r11
+	ldr	r7, [sp, #1092]
+	ldr	r6, [sp, #1088]
+	ldr	r5, [sp, #1084]
+	ldr	lr, [sp, #1080]
+	ldr	r12, [sp, #1076]
+	ldr	r3, [sp, #1072]
+	ldr	r1, [sp, #1064]
+	ldr	r2, [sp, #1068]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r11
+	add	r0, sp, #1000
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r9, sp, #1008
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #16]
+	ldr	r0, [sp, #1048]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1044]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1040]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1036]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1032]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #1028]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #944
+	ldm	r9, {r6, r7, r9}
+	ldr	r5, [sp, #1024]
+	ldr	r8, [sp, #1020]
+	ldr	r10, [sp, #1000]
+	ldr	r11, [sp, #1004]
+	bl	mulPv384x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #944
+	adcs	r0, r0, r11
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	r11, sp, #968
+	adcs	r0, r0, r6
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r7, r7, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	mov	r8, r7
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #888
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r10, sp, #896
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #20]
+	ldr	r0, [sp, #936]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #932]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #928]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #924]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #920]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #916]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #832
+	ldm	r10, {r4, r5, r10}
+	ldr	r6, [sp, #912]
+	ldr	r9, [sp, #908]
+	ldr	r11, [sp, #888]
+	ldr	r7, [sp, #892]
+	bl	mulPv384x32
+	adds	r0, r8, r11
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #836
+	adcs	r0, r0, r7
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r11, [sp, #84]                  @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	add	r10, sp, #860
+	adcs	r0, r0, r9
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r4, [sp, #832]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r11, r11, r4
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	ldm	r10, {r5, r6, r7, r8, r9, r10}
+	adcs	r0, r4, r0
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	mov	r4, r11
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r11
+	add	r0, sp, #776
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r9, sp, #784
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #24]
+	ldr	r0, [sp, #824]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #820]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #816]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #812]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #808]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #804]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #720
+	ldm	r9, {r6, r7, r9}
+	ldr	r5, [sp, #800]
+	ldr	r8, [sp, #796]
+	ldr	r10, [sp, #776]
+	ldr	r11, [sp, #780]
+	bl	mulPv384x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #720
+	adcs	r0, r0, r11
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	r11, sp, #744
+	adcs	r0, r0, r6
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r7, r7, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	mov	r8, r7
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #664
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r10, sp, #672
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #28]
+	ldr	r0, [sp, #712]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #708]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #704]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #700]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #696]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #692]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #608
+	ldm	r10, {r4, r5, r10}
+	ldr	r6, [sp, #688]
+	ldr	r9, [sp, #684]
+	ldr	r11, [sp, #664]
+	ldr	r7, [sp, #668]
+	bl	mulPv384x32
+	adds	r0, r8, r11
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #612
+	adcs	r0, r0, r7
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r11, [sp, #84]                  @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	add	r10, sp, #636
+	adcs	r0, r0, r9
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r4, [sp, #608]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r11, r11, r4
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	ldm	r10, {r5, r6, r7, r8, r9, r10}
+	adcs	r0, r4, r0
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	mov	r4, r11
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r11
+	add	r0, sp, #552
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r9, sp, #560
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #32]
+	ldr	r0, [sp, #600]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #596]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #592]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #588]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #584]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #580]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #496
+	ldm	r9, {r6, r7, r9}
+	ldr	r5, [sp, #576]
+	ldr	r8, [sp, #572]
+	ldr	r10, [sp, #552]
+	ldr	r11, [sp, #556]
+	bl	mulPv384x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #496
+	adcs	r0, r0, r11
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	r11, sp, #520
+	adcs	r0, r0, r6
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r7, r7, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	mov	r8, r7
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #440
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r10, sp, #448
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #36]
+	ldr	r0, [sp, #488]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #484]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #480]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #476]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #472]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #468]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #384
+	ldm	r10, {r4, r5, r10}
+	ldr	r6, [sp, #464]
+	ldr	r9, [sp, #460]
+	ldr	r11, [sp, #440]
+	ldr	r7, [sp, #444]
+	bl	mulPv384x32
+	adds	r0, r8, r11
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #388
+	adcs	r0, r0, r7
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r11, [sp, #84]                  @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	add	r10, sp, #412
+	adcs	r0, r0, r9
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r4, [sp, #384]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r11, r11, r4
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	ldm	r10, {r5, r6, r7, r8, r9, r10}
+	adcs	r0, r4, r0
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	mov	r4, r11
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r8, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	mul	r2, r8, r11
+	adcs	r0, r0, r10
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	add	r0, sp, #328
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r9, sp, #336
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #40]
+	ldr	r0, [sp, #376]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #372]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #368]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #364]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #360]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #356]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #352]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #272
+	ldm	r9, {r6, r7, r9}
+	ldr	r5, [sp, #348]
+	ldr	r10, [sp, #328]
+	ldr	r11, [sp, #332]
+	bl	mulPv384x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #272
+	add	r12, sp, #288
+	adcs	r4, r0, r11
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r10, r0, r6
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	add	r9, sp, #308
+	adcs	r0, r0, r5
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldm	lr, {r2, r6, lr}
+	adds	r0, r4, r2
+	ldr	r5, [sp, #284]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	adcs	r6, r10, r6
+	mul	r11, r8, r0
+	ldm	r9, {r4, r7, r8, r9}
+	ldm	r12, {r0, r1, r2, r3, r12}
+	str	r6, [sp, #36]                   @ 4-byte Spill
+	ldr	r6, [sp, #80]                   @ 4-byte Reload
+	adcs	r6, r6, lr
+	str	r6, [sp, #32]                   @ 4-byte Spill
+	ldr	r6, [sp, #72]                   @ 4-byte Reload
+	adcs	r6, r6, r5
+	str	r6, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	mov	r2, r11
+	adcs	r0, r0, r3
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	mov	r0, #0
+	adc	r0, r0, #0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	add	r0, sp, #216
+	bl	mulPv384x32
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	add	r0, sp, #160
+	ldr	r2, [r1, #44]
+	ldr	r1, [sp, #264]
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #260]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #256]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #252]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [sp, #248]
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r7, [sp, #244]
+	ldr	r4, [sp, #240]
+	ldr	r8, [sp, #236]
+	ldr	r9, [sp, #232]
+	ldr	r11, [sp, #216]
+	ldr	r5, [sp, #220]
+	ldr	r10, [sp, #224]
+	ldr	r6, [sp, #228]
+	bl	mulPv384x32
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #160
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	add	r12, sp, #176
+	adds	r0, r0, r11
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	adcs	r11, r1, r10
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	add	r10, sp, #196
+	adcs	r1, r1, r6
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r9
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r4
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adcs	r1, r1, r7
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	ldr	r2, [sp, #92]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldm	lr, {r2, r7, lr}
+	adds	r4, r0, r2
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r6, [sp, #172]
+	adcs	r7, r11, r7
+	mul	r1, r0, r4
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldm	r10, {r5, r8, r9, r10}
+	ldm	r12, {r0, r1, r2, r3, r12}
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r7, [sp, #96]                   @ 4-byte Reload
+	adcs	r11, r7, lr
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	adcs	r6, r7, r6
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r7, r0, r5
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r5, [sp, #100]                  @ 4-byte Reload
+	adcs	r8, r0, r8
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	mov	r1, r5
+	adcs	r9, r0, r10
+	mov	r0, #0
+	adc	r10, r0, #0
+	add	r0, sp, #104
+	bl	mulPv384x32
+	add	r3, sp, #104
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r12, r0, r1
+	str	r12, [sp, #72]                  @ 4-byte Spill
+	adcs	r2, r11, r2
+	str	r2, [sp, #64]                   @ 4-byte Spill
+	adcs	r4, r6, r3
+	str	r4, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #132]
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #136]
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #140]
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #144]
+	adcs	r0, r8, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #148]
+	adcs	r0, r1, r0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #152]
+	adcs	r0, r9, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	adc	r0, r10, #0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldm	r5, {r0, r1, r3, r7, lr}
+	add	r9, r5, #20
+	add	r5, r5, #32
+	subs	r0, r12, r0
+	ldm	r9, {r6, r8, r9}
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	sbcs	r0, r2, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	sbcs	r0, r4, r3
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r4, [sp, #88]                   @ 4-byte Reload
+	sbcs	r0, r0, r7
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldm	r5, {r1, r2, r3, r5}
+	sbcs	r11, r0, lr
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	sbcs	r10, r4, r6
+	ldr	r4, [sp, #44]                   @ 4-byte Reload
+	sbcs	r8, r0, r8
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	lr, [sp, #40]                   @ 4-byte Reload
+	sbcs	r9, r0, r9
+	ldr	r12, [sp, #36]                  @ 4-byte Reload
+	sbcs	r1, r4, r1
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	sbcs	r2, lr, r2
+	sbcs	r3, r12, r3
+	sbcs	r6, r0, r5
+	ldr	r5, [sp, #32]                   @ 4-byte Reload
+	sbc	r5, r5, #0
+	ands	r7, r5, #1
+	ldr	r5, [sp, #76]                   @ 4-byte Reload
+	movne	r6, r0
+	movne	r3, r12
+	movne	r2, lr
+	cmp	r7, #0
+	movne	r1, r4
+	add	r0, r5, #32
+	stm	r0, {r1, r2, r3, r6}
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	movne	r9, r0
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	str	r9, [r5, #28]
+	movne	r8, r0
+	cmp	r7, #0
+	movne	r10, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	str	r8, [r5, #24]
+	movne	r11, r1
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	movne	r1, r0
+	cmp	r7, #0
+	str	r1, [r5, #12]
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	str	r10, [r5, #20]
+	movne	r0, r1
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	str	r0, [r5, #8]
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	str	r0, [r5, #4]
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	movne	r0, r1
+	str	r11, [r5, #16]
+	str	r0, [r5]
+	add	sp, sp, #428
+	add	sp, sp, #1024
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end253:
-	.size	mcl_fpDbl_sub16L, .Lfunc_end253-mcl_fpDbl_sub16L
-	.cantunwind
-	.fnend
-
-	.align	2
-	.type	.LmulPv544x32,%function
-.LmulPv544x32:                          @ @mulPv544x32
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r11, lr}
-	ldr	r12, [r1]
-	ldmib	r1, {r3, lr}
-	ldr	r9, [r1, #12]
-	umull	r4, r8, lr, r2
-	umull	lr, r6, r12, r2
-	mov	r5, r4
-	mov	r7, r6
-	str	lr, [r0]
-	umull	lr, r12, r9, r2
-	umlal	r7, r5, r3, r2
-	str	r5, [r0, #8]
-	str	r7, [r0, #4]
-	umull	r5, r7, r3, r2
-	adds	r3, r6, r5
-	adcs	r3, r7, r4
-	adcs	r3, r8, lr
-	str	r3, [r0, #12]
-	ldr	r3, [r1, #16]
-	umull	r7, r6, r3, r2
-	adcs	r3, r12, r7
-	str	r3, [r0, #16]
-	ldr	r3, [r1, #20]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #20]
-	ldr	r3, [r1, #24]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #24]
-	ldr	r3, [r1, #28]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #28]
-	ldr	r3, [r1, #32]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #32]
-	ldr	r3, [r1, #36]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #36]
-	ldr	r3, [r1, #40]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #40]
-	ldr	r3, [r1, #44]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #44]
-	ldr	r3, [r1, #48]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #48]
-	ldr	r3, [r1, #52]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #52]
-	ldr	r3, [r1, #56]
-	umull	r7, r6, r3, r2
-	adcs	r3, r5, r7
-	str	r3, [r0, #56]
-	ldr	r3, [r1, #60]
-	umull	r7, r5, r3, r2
-	adcs	r3, r6, r7
-	str	r3, [r0, #60]
-	ldr	r1, [r1, #64]
-	umull	r3, r7, r1, r2
-	adcs	r1, r5, r3
-	adc	r2, r7, #0
-	str	r1, [r0, #64]
-	str	r2, [r0, #68]
-	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
-	mov	pc, lr
-.Lfunc_end254:
-	.size	.LmulPv544x32, .Lfunc_end254-.LmulPv544x32
+.Lfunc_end60:
+	.size	mcl_fp_mont12L, .Lfunc_end60-mcl_fp_mont12L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_mulUnitPre17L
-	.align	2
-	.type	mcl_fp_mulUnitPre17L,%function
-mcl_fp_mulUnitPre17L:                   @ @mcl_fp_mulUnitPre17L
+                                        @ -- End function
+	.globl	mcl_fp_montNF12L                @ -- Begin function mcl_fp_montNF12L
+	.p2align	2
+	.type	mcl_fp_montNF12L,%function
+	.code	32                              @ @mcl_fp_montNF12L
+mcl_fp_montNF12L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#100
-	sub	sp, sp, #100
-	mov	r4, r0
-	add	r0, sp, #24
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #92]
-	add	r11, sp, #48
-	add	lr, sp, #24
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #88]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #84]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #80]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #76]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldm	r11, {r5, r6, r7, r8, r9, r10, r11}
+	.pad	#428
+	sub	sp, sp, #428
+	.pad	#1024
+	sub	sp, sp, #1024
+	mov	r7, r2
+	ldr	r2, [r2]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	add	r0, sp, #1392
+	ldr	r5, [r3, #-4]
+	mov	r4, r3
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	str	r5, [sp, #88]                   @ 4-byte Spill
+	str	r3, [sp, #100]                  @ 4-byte Spill
+	str	r7, [sp, #92]                   @ 4-byte Spill
+	bl	mulPv384x32
+	ldr	r0, [sp, #1396]
+	add	lr, sp, #1024
+	ldr	r8, [sp, #1392]
+	mov	r1, r4
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #1400]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mul	r2, r5, r8
+	ldr	r0, [sp, #1404]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #1440]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #1436]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #1432]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #1428]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #1424]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1420]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1416]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1412]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1408]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	add	r0, lr, #312
+	bl	mulPv384x32
+	ldr	r0, [sp, #1384]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1380]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1376]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1372]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1368]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1364]
+	ldr	r2, [r7, #4]
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #1280
+	ldr	r7, [sp, #1360]
+	ldr	r11, [sp, #1356]
+	ldr	r5, [sp, #1352]
+	ldr	r6, [sp, #1336]
+	ldr	r10, [sp, #1340]
+	ldr	r9, [sp, #1344]
+	ldr	r4, [sp, #1348]
+	bl	mulPv384x32
+	adds	r0, r6, r8
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r10, r0
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r10, [sp, #64]                  @ 4-byte Reload
+	adcs	r0, r9, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r4, r0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r11, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r1, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r4, [sp, #1280]
+	ldr	r0, [sp, #1284]
+	adds	r10, r10, r4
+	ldr	r4, [sp, #60]                   @ 4-byte Reload
+	ldr	r11, [sp, #1328]
+	adcs	r0, r4, r0
+	ldr	r9, [sp, #1324]
+	ldr	r8, [sp, #1320]
+	mov	r4, r10
+	ldr	r7, [sp, #1316]
+	ldr	r6, [sp, #1312]
+	ldr	r5, [sp, #1308]
+	ldr	lr, [sp, #1304]
+	ldr	r12, [sp, #1300]
+	ldr	r3, [sp, #1296]
+	ldr	r1, [sp, #1288]
+	ldr	r2, [sp, #1292]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	add	lr, sp, #1024
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r11, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r10
+	add	r0, lr, #200
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r9, sp, #1232
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #8]
+	ldr	r0, [sp, #1272]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1268]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1264]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1260]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1256]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1252]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #1168
+	ldm	r9, {r6, r7, r9}
+	ldr	r5, [sp, #1248]
+	ldr	r8, [sp, #1244]
+	ldr	r10, [sp, #1224]
+	ldr	r11, [sp, #1228]
+	bl	mulPv384x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	lr, sp, #1168
+	adcs	r0, r0, r11
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r7, r7, r0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r11, [sp, #1216]
+	adcs	r0, r0, r1
+	ldr	r10, [sp, #1212]
+	ldr	r9, [sp, #1208]
+	ldr	r8, [sp, #1204]
+	ldr	r6, [sp, #1200]
+	ldr	r5, [sp, #1196]
+	ldr	r4, [sp, #1192]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	add	lr, sp, #1024
+	adcs	r0, r0, r4
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	mov	r8, r7
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r11, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, lr, #88
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r10, sp, #1120
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #12]
+	ldr	r0, [sp, #1160]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1156]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1152]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1148]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1144]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1140]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #1056
+	ldm	r10, {r4, r5, r10}
+	ldr	r6, [sp, #1136]
+	ldr	r9, [sp, #1132]
+	ldr	r11, [sp, #1112]
+	ldr	r7, [sp, #1116]
+	bl	mulPv384x32
+	adds	r0, r8, r11
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r10, [sp, #80]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r4, [sp, #1056]
+	ldr	r0, [sp, #1060]
+	adds	r10, r10, r4
+	ldr	r4, [sp, #76]                   @ 4-byte Reload
+	ldr	r11, [sp, #1104]
+	adcs	r0, r4, r0
+	ldr	r9, [sp, #1100]
+	ldr	r8, [sp, #1096]
+	mov	r4, r10
+	ldr	r7, [sp, #1092]
+	ldr	r6, [sp, #1088]
+	ldr	r5, [sp, #1084]
+	ldr	lr, [sp, #1080]
+	ldr	r12, [sp, #1076]
+	ldr	r3, [sp, #1072]
+	ldr	r1, [sp, #1064]
+	ldr	r2, [sp, #1068]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r11, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r10
+	add	r0, sp, #1000
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r9, sp, #1008
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #16]
+	ldr	r0, [sp, #1048]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1044]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1040]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1036]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1032]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1028]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #944
+	ldm	r9, {r6, r7, r9}
+	ldr	r5, [sp, #1024]
+	ldr	r8, [sp, #1020]
+	ldr	r10, [sp, #1000]
+	ldr	r11, [sp, #1004]
+	bl	mulPv384x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	lr, sp, #944
+	adcs	r0, r0, r11
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	add	r11, sp, #968
+	adcs	r0, r0, r6
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r7, r7, r0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	mov	r8, r7
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r11, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #888
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r10, sp, #896
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #20]
+	ldr	r0, [sp, #936]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #932]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #928]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #924]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #920]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #916]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #832
+	ldm	r10, {r4, r5, r10}
+	ldr	r6, [sp, #912]
+	ldr	r9, [sp, #908]
+	ldr	r11, [sp, #888]
+	ldr	r7, [sp, #892]
+	bl	mulPv384x32
+	adds	r0, r8, r11
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	lr, sp, #836
+	add	r11, sp, #860
+	adcs	r0, r0, r7
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r10, [sp, #80]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r4, [sp, #832]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r10, r10, r4
+	ldr	r4, [sp, #76]                   @ 4-byte Reload
+	ldm	r11, {r5, r6, r7, r8, r9, r11}
+	adcs	r0, r4, r0
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	mov	r4, r10
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r11, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r10
+	add	r0, sp, #776
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r9, sp, #784
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #24]
+	ldr	r0, [sp, #824]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #820]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #816]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #812]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #808]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #804]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #720
+	ldm	r9, {r6, r7, r9}
+	ldr	r5, [sp, #800]
+	ldr	r8, [sp, #796]
+	ldr	r10, [sp, #776]
+	ldr	r11, [sp, #780]
+	bl	mulPv384x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	lr, sp, #720
+	adcs	r0, r0, r11
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	add	r11, sp, #744
+	adcs	r0, r0, r6
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r7, r7, r0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	mov	r8, r7
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r11, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #664
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r10, sp, #672
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #28]
+	ldr	r0, [sp, #712]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #708]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #704]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #700]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #696]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #692]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #608
+	ldm	r10, {r4, r5, r10}
+	ldr	r6, [sp, #688]
+	ldr	r9, [sp, #684]
+	ldr	r11, [sp, #664]
+	ldr	r7, [sp, #668]
+	bl	mulPv384x32
+	adds	r0, r8, r11
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	lr, sp, #612
+	add	r11, sp, #636
+	adcs	r0, r0, r7
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r10, [sp, #80]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r4, [sp, #608]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r10, r10, r4
+	ldr	r4, [sp, #76]                   @ 4-byte Reload
+	ldm	r11, {r5, r6, r7, r8, r9, r11}
+	adcs	r0, r4, r0
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	mov	r4, r10
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r11, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r10
+	add	r0, sp, #552
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r9, sp, #560
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #32]
+	ldr	r0, [sp, #600]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #596]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #592]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #588]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #584]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #580]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #496
+	ldm	r9, {r6, r7, r9}
+	ldr	r5, [sp, #576]
+	ldr	r8, [sp, #572]
+	ldr	r10, [sp, #552]
+	ldr	r11, [sp, #556]
+	bl	mulPv384x32
+	adds	r0, r4, r10
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	lr, sp, #496
+	adcs	r0, r0, r11
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	add	r11, sp, #520
+	adcs	r0, r0, r6
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
 	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	stm	r4, {r0, r1, r2, r3, r12, lr}
-	add	r0, r4, #24
-	stm	r0, {r5, r6, r7, r8, r9, r10, r11}
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	str	r0, [r4, #52]
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	str	r0, [r4, #56]
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	str	r0, [r4, #60]
-	ldr	r0, [sp, #16]           @ 4-byte Reload
-	str	r0, [r4, #64]
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	str	r0, [r4, #68]
-	add	sp, sp, #100
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end255:
-	.size	mcl_fp_mulUnitPre17L, .Lfunc_end255-mcl_fp_mulUnitPre17L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_mulPre17L
-	.align	2
-	.type	mcl_fpDbl_mulPre17L,%function
-mcl_fpDbl_mulPre17L:                    @ @mcl_fpDbl_mulPre17L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#340
-	sub	sp, sp, #340
-	.pad	#1024
-	sub	sp, sp, #1024
-	mov	r9, r2
-	add	r6, sp, #1024
-	mov	r4, r0
-	str	r1, [sp, #128]          @ 4-byte Spill
-	mov	r5, r1
-	ldr	r2, [r9]
-	add	r0, r6, #264
-	str	r9, [sp, #124]          @ 4-byte Spill
-	str	r4, [sp, #132]          @ 4-byte Spill
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1356]
-	ldr	r1, [sp, #1292]
-	ldr	r2, [r9, #4]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #1352]
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #1296]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #1348]
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #1300]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #1344]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	mov	r1, r5
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #1336]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #1332]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #1328]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #1324]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #1320]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #1316]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1312]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #1308]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #1304]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #1288]
-	str	r0, [r4]
-	add	r0, sp, #1216
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1284]
-	add	lr, sp, #1216
-	ldr	r10, [sp, #1256]
-	ldr	r8, [sp, #1252]
-	ldr	r7, [sp, #1248]
-	ldr	r6, [sp, #1244]
-	ldr	r5, [sp, #1240]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1280]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1276]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1272]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1268]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1264]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1260]
-	str	r0, [sp, #28]           @ 4-byte Spill
+	adds	r7, r7, r0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldm	r11, {r4, r5, r6, r8, r9, r10, r11}
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	mov	r8, r7
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r11, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #440
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r10, sp, #448
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #36]
+	ldr	r0, [sp, #488]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #484]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #480]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #476]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #472]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #468]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #384
+	ldm	r10, {r4, r5, r10}
+	ldr	r6, [sp, #464]
+	ldr	r9, [sp, #460]
+	ldr	r11, [sp, #440]
+	ldr	r7, [sp, #444]
+	bl	mulPv384x32
+	adds	r0, r8, r11
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	lr, sp, #388
+	add	r11, sp, #412
+	adcs	r0, r0, r7
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r10, [sp, #80]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #432]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r4, [sp, #384]
 	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #56]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #4]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r9, #8]
-	add	r9, sp, #1024
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	adds	r4, r10, r4
 	adcs	r0, r7, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	ldm	r11, {r5, r6, r8, r9, r11}
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	str	r4, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r8, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	mul	r2, r8, r4
+	adcs	r0, r0, r11
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	add	r0, sp, #328
+	bl	mulPv384x32
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	add	r9, sp, #336
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [r0, #40]
+	ldr	r0, [sp, #376]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #372]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #368]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #364]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #360]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #356]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #272
+	ldm	r9, {r6, r7, r9}
+	ldr	r4, [sp, #352]
+	ldr	r5, [sp, #348]
+	ldr	r10, [sp, #328]
+	ldr	r11, [sp, #332]
+	bl	mulPv384x32
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	add	r12, sp, #288
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	adds	r0, r0, r10
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	adcs	r1, r1, r6
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r7
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r9
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adcs	r1, r1, r5
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r1, r4
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adc	r1, r1, r2
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r2, [sp, #272]
+	ldr	lr, [sp, #276]
+	adds	r0, r0, r2
+	ldr	r5, [sp, #280]
+	ldr	r6, [sp, #284]
+	adcs	r7, r7, lr
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	mul	r10, r8, r0
+	ldr	r9, [sp, #320]
+	ldr	r8, [sp, #316]
+	ldr	r11, [sp, #312]
+	ldr	r4, [sp, #308]
+	ldm	r12, {r0, r1, r2, r3, r12}
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	adcs	r7, r7, r5
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
+	adcs	r7, r7, r6
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	mov	r2, r10
+	adcs	r0, r0, r3
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	adc	r0, r9, #0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	add	r0, sp, #216
+	bl	mulPv384x32
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	add	r11, sp, #220
+	add	r0, sp, #160
+	ldr	r2, [r1, #44]
+	ldr	r1, [sp, #264]
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #260]
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #256]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #252]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #248]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldm	r11, {r4, r5, r11}
+	ldr	r6, [sp, #244]
+	ldr	r7, [sp, #240]
+	ldr	r8, [sp, #236]
+	ldr	r9, [sp, #232]
+	ldr	r10, [sp, #216]
+	bl	mulPv384x32
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	add	lr, sp, #160
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	add	r12, sp, #176
+	adds	r0, r0, r10
+	add	r10, sp, #200
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r4, r0, r4
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r5, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldm	lr, {r2, r7, lr}
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adds	r4, r4, r2
+	adcs	r7, r5, r7
+	ldr	r5, [sp, #80]                   @ 4-byte Reload
+	ldr	r6, [sp, #172]
+	mul	r1, r0, r4
+	adcs	r5, r5, lr
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldm	r10, {r8, r9, r10}
+	ldr	r11, [sp, #196]
+	ldm	r12, {r0, r1, r2, r3, r12}
+	str	r5, [sp, #32]                   @ 4-byte Spill
+	ldr	r5, [sp, #76]                   @ 4-byte Reload
+	adcs	r6, r5, r6
+	str	r6, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r6, r0, r11
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r9, r0, r9
+	adc	r0, r10, #0
+	ldr	r10, [sp, #100]                 @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	add	r0, sp, #104
+	mov	r1, r10
+	bl	mulPv384x32
+	add	r3, sp, #104
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	ldr	r4, [r10, #12]
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	adcs	r8, r7, r1
+	str	r8, [sp, #80]                   @ 4-byte Spill
+	adcs	r5, r0, r2
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	str	r5, [sp, #72]                   @ 4-byte Spill
+	adcs	r7, r0, r3
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r2, r1, r0
+	str	r2, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #132]
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #136]
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r1, r0
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #140]
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #144]
 	adcs	r0, r1, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #148]
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r9, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #152]
+	adc	r6, r1, r0
+	ldm	r10, {r1, r3, r9}
+	mov	r0, r10
+	subs	r1, r8, r1
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	sbcs	r1, r5, r3
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	sbcs	r1, r7, r9
+	ldr	r12, [r0, #28]
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	sbcs	r1, r2, r4
+	ldr	r10, [r10, #16]
+	add	r3, r0, #36
+	ldr	r11, [r0, #20]
+	ldr	lr, [r0, #24]
+	ldr	r4, [r0, #32]
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	str	r12, [sp, #36]                  @ 4-byte Spill
+	sbcs	r10, r0, r10
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	sbcs	r9, r0, r11
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r11, [sp, #48]                  @ 4-byte Reload
+	sbcs	r7, r0, lr
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r8, [sp, #44]                   @ 4-byte Reload
+	sbcs	r5, r11, r0
+	ldm	r3, {r1, r2, r3}
+	sbcs	r4, r8, r4
+	ldr	lr, [sp, #32]                   @ 4-byte Reload
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	sbcs	r1, lr, r1
+	sbcs	r2, r0, r2
+	sbc	r3, r6, r3
+	asr	r12, r3, #31
+	cmn	r12, #1
+	movgt	r6, r3
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	movle	r2, r0
+	movle	r1, lr
+	cmn	r12, #1
+	add	r0, r3, #36
+	movle	r4, r8
+	movle	r5, r11
+	str	r4, [r3, #32]
+	stm	r0, {r1, r2, r6}
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	movle	r7, r0
+	cmn	r12, #1
+	movle	r9, r1
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	str	r5, [r3, #28]
+	movle	r10, r1
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	str	r7, [r3, #24]
+	str	r9, [r3, #20]
+	movle	r2, r1
+	cmn	r12, #1
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	movle	r1, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	str	r1, [r3, #8]
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	str	r10, [r3, #16]
+	str	r2, [r3, #12]
+	movle	r0, r1
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	str	r0, [r3, #4]
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	movle	r0, r1
+	str	r0, [r3]
+	add	sp, sp, #428
+	add	sp, sp, #1024
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end61:
+	.size	mcl_fp_montNF12L, .Lfunc_end61-mcl_fp_montNF12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_montRed12L               @ -- Begin function mcl_fp_montRed12L
+	.p2align	2
+	.type	mcl_fp_montRed12L,%function
+	.code	32                              @ @mcl_fp_montRed12L
+mcl_fp_montRed12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#852
+	sub	sp, sp, #852
+	str	r0, [sp, #160]                  @ 4-byte Spill
+	mov	r6, r2
+	ldr	r0, [r2]
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [r2, #4]
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [r2, #8]
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [r2, #12]
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [r2, #16]
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [r2, #20]
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [r2, #24]
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [r1, #4]
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [r1, #8]
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [r1, #12]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [r1, #16]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [r1, #20]
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [r1, #24]
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r1, #28]
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [r6, #28]
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [r6, #32]
+	ldr	r3, [r2, #-4]
+	ldr	r4, [r1]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [r6, #36]
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	mul	r2, r4, r3
+	ldr	r0, [r6, #40]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [r6, #44]
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [r1, #32]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [r1, #36]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [r1, #40]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r1, #44]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	add	r0, sp, #792
+	str	r1, [sp, #172]                  @ 4-byte Spill
 	mov	r1, r6
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, r9, #120
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1212]
-	ldr	r9, [sp, #56]           @ 4-byte Reload
-	ldr	r8, [sp, #1184]
-	ldr	r7, [sp, #1180]
-	ldr	r11, [sp, #1176]
-	ldr	r5, [sp, #1172]
-	ldr	lr, [sp, #1168]
-	ldr	r10, [sp, #1164]
-	ldr	r12, [sp, #1160]
-	ldr	r1, [sp, #1148]
-	ldr	r2, [sp, #1152]
-	ldr	r3, [sp, #1156]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1208]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1204]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1200]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1196]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1192]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1188]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1144]
-	adds	r0, r0, r9
-	str	r0, [r4, #8]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r3, [sp, #164]                  @ 4-byte Spill
+	str	r6, [sp, #168]                  @ 4-byte Spill
+	bl	mulPv384x32
+	add	lr, sp, #792
+	add	r11, sp, #816
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r0, r4, r0
+	ldm	r11, {r5, r7, r8, r9, r10, r11}
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	adcs	r4, r0, r1
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	mov	r1, r6
+	adcs	r0, r0, r2
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	mrs	r0, apsr
+	ldr	r5, [sp, #164]                  @ 4-byte Reload
+	ldr	r7, [sp, #172]                  @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	mul	r2, r5, r4
+	ldr	r0, [r7, #48]
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #840]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	add	r0, sp, #736
+	bl	mulPv384x32
+	add	r3, sp, #736
+	ldr	r12, [sp, #776]
+	ldr	lr, [sp, #772]
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	ldr	r4, [sp, #108]                  @ 4-byte Reload
+	ldr	r6, [sp, #768]
+	adcs	r8, r4, r1
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r9, [sp, #764]
+	adcs	r1, r1, r2
+	ldr	r10, [sp, #760]
+	ldr	r11, [sp, #756]
+	mul	r2, r5, r8
+	ldr	r0, [sp, #752]
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	mov	r9, r7
+	adcs	r0, r0, r6
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	mov	r6, r5
+	adcs	r0, r0, lr
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	ldr	r0, [sp, #784]
+	adcs	r1, r1, r3
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r7, #52]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r4, [sp, #168]                  @ 4-byte Reload
+	ldr	r0, [sp, #780]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	add	r0, sp, #680
+	mov	r1, r4
+	bl	mulPv384x32
+	add	lr, sp, #680
+	ldr	r11, [sp, #716]
+	ldr	r10, [sp, #712]
+	ldm	lr, {r0, r1, r2, r3, r5, r7, r12, lr}
+	adds	r0, r8, r0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r8, r0, r1
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	mul	r2, r6, r8
+	adcs	r0, r0, r3
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	ldr	r0, [sp, #728]
+	adcs	r1, r1, r3
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r9, #56]
+	mov	r1, r4
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	add	r0, sp, #624
+	ldr	r10, [sp, #724]
+	ldr	r9, [sp, #720]
+	bl	mulPv384x32
+	ldr	r5, [sp, #624]
+	add	r11, sp, #628
+	ldr	r12, [sp, #656]
+	adds	r4, r8, r5
+	ldm	r11, {r0, r1, r7, r11}
+	mov	r8, r6
+	ldr	r4, [sp, #100]                  @ 4-byte Reload
+	ldr	lr, [sp, #652]
+	adcs	r4, r4, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [sp, #648]
+	adcs	r0, r0, r1
+	ldr	r3, [sp, #644]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	mul	r2, r6, r4
+	adcs	r0, r0, lr
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r0, [sp, #672]
+	adcs	r1, r1, r9
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r6, [sp, #172]                  @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r6, #60]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r9, [sp, #168]                  @ 4-byte Reload
+	ldr	r0, [sp, #668]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	add	r0, sp, #568
+	mov	r1, r9
+	ldr	r11, [sp, #664]
+	ldr	r10, [sp, #660]
+	bl	mulPv384x32
+	ldr	r7, [sp, #568]
+	add	r3, sp, #576
+	ldr	r5, [sp, #572]
+	adds	r4, r4, r7
+	ldm	r3, {r0, r1, r3}
+	ldr	r4, [sp, #100]                  @ 4-byte Reload
+	ldr	r12, [sp, #596]
+	adcs	r4, r4, r5
+	ldr	r5, [sp, #96]                   @ 4-byte Reload
+	ldr	lr, [sp, #592]
 	adcs	r0, r5, r0
-	ldr	r5, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [r5, #12]
-	adcs	r0, r11, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r6
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	ldr	r2, [sp, #588]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	mul	r2, r8, r4
+	adcs	r0, r0, lr
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r0, [sp, #616]
+	adcs	r1, r1, r10
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r11
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #68]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #1072
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1140]
-	add	lr, sp, #1072
-	ldr	r10, [sp, #1112]
-	ldr	r9, [sp, #1108]
-	ldr	r8, [sp, #1104]
-	ldr	r7, [sp, #1100]
-	ldr	r6, [sp, #1096]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1136]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1132]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1128]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1124]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1120]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1116]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #56]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #12]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r4, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #16]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [r6, #64]
+	mov	r1, r9
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #612]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #608]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #604]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	add	r0, sp, #512
+	ldr	r10, [sp, #600]
+	bl	mulPv384x32
+	ldr	r3, [sp, #512]
+	ldr	r7, [sp, #516]
+	adds	r3, r4, r3
+	ldr	r5, [sp, #520]
+	ldr	r3, [sp, #100]                  @ 4-byte Reload
+	ldr	r12, [sp, #536]
+	adcs	r4, r3, r7
+	ldr	r3, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #532]
+	adcs	r3, r3, r5
+	ldr	r2, [sp, #528]
+	ldr	r0, [sp, #524]
+	str	r3, [sp, #100]                  @ 4-byte Spill
+	ldr	r3, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r3, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	mul	r2, r8, r4
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r0, [sp, #560]
+	adcs	r1, r1, r10
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [r6, #68]
+	mov	r1, r9
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #556]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #552]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	add	r0, sp, #456
+	ldr	r8, [sp, #548]
+	ldr	r10, [sp, #544]
+	ldr	r11, [sp, #540]
+	bl	mulPv384x32
+	add	r7, sp, #456
+	ldr	r0, [sp, #476]
+	ldr	r1, [sp, #472]
+	ldm	r7, {r2, r3, r7}
+	adds	r2, r4, r2
+	ldr	r5, [sp, #468]
+	ldr	r2, [sp, #100]                  @ 4-byte Reload
+	adcs	r9, r2, r3
+	ldr	r2, [sp, #96]                   @ 4-byte Reload
+	adcs	r2, r2, r7
+	str	r2, [sp, #100]                  @ 4-byte Spill
+	ldr	r2, [sp, #92]                   @ 4-byte Reload
+	adcs	r2, r2, r5
+	str	r2, [sp, #96]                   @ 4-byte Spill
+	ldr	r2, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r0, [sp, #504]
+	adcs	r1, r1, r11
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r6, [sp, #172]                  @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r4, [sp, #164]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	mul	r2, r4, r9
+	ldr	r7, [sp, #168]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r6, #72]
+	mov	r1, r7
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #500]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #496]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #492]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #488]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	add	r0, sp, #400
+	ldr	r10, [sp, #484]
+	ldr	r8, [sp, #480]
+	bl	mulPv384x32
+	add	r5, sp, #400
+	ldr	r0, [sp, #416]
+	ldm	r5, {r1, r2, r3, r5}
+	adds	r1, r9, r1
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r11, r1, r2
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	mul	r2, r4, r11
+	adcs	r1, r1, r5
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	mov	r5, r6
 	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r0, [sp, #448]
+	adcs	r1, r1, r8
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #1000
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1068]
-	add	r11, sp, #1024
-	add	lr, sp, #1000
-	ldr	r6, [sp, #1040]
-	ldr	r5, [sp, #1036]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1064]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1060]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	ldr	r8, [sp, #132]          @ 4-byte Reload
-	adds	r0, r0, r7
-	str	r0, [r8, #16]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r2, [r5, #20]
-	adcs	r0, r6, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r6, #76]
+	mov	r1, r7
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #444]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #440]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #436]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	add	r0, sp, #344
+	ldr	r4, [sp, #432]
+	ldr	r8, [sp, #428]
+	ldr	r10, [sp, #424]
+	ldr	r9, [sp, #420]
+	bl	mulPv384x32
+	add	r3, sp, #344
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r11, r0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r7, r0, r1
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	ldr	r0, [sp, #392]
+	adcs	r1, r1, r9
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r6, [sp, #164]                  @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	mul	r2, r6, r7
+	adcs	r1, r1, r4
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #928
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #996]
-	add	r11, sp, #952
-	add	lr, sp, #928
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #992]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #988]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #984]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #980]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #976]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r11, {r6, r7, r8, r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r4, [sp, #56]           @ 4-byte Reload
-	adds	r0, r0, r4
-	ldr	r4, [sp, #132]          @ 4-byte Reload
-	str	r0, [r4, #20]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r5, #24]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r6
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r5, #80]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #388]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #384]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r5, [sp, #168]                  @ 4-byte Reload
+	ldr	r0, [sp, #380]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	add	r0, sp, #288
+	mov	r1, r5
+	ldr	r4, [sp, #376]
+	ldr	r8, [sp, #372]
+	ldr	r9, [sp, #368]
+	ldr	r10, [sp, #364]
+	ldr	r11, [sp, #360]
+	bl	mulPv384x32
+	add	r3, sp, #288
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r7, r0
+	str	r3, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r7, r0, r1
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	mrs	r0, apsr
+	mul	r2, r6, r7
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r0, [sp, #336]
+	adcs	r11, r1, r11
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	adcs	r10, r1, r10
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	adcs	r9, r1, r9
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r4
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r4, [sp, #172]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #84]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #856
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #924]
-	add	r11, sp, #880
-	add	lr, sp, #856
-	ldr	r7, [sp, #896]
-	ldr	r5, [sp, #892]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #920]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #916]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #912]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #908]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #904]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #900]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r8, [sp, #56]           @ 4-byte Reload
-	adds	r0, r0, r8
-	str	r0, [r4, #24]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r4, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r4, #28]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [r4, #84]
+	mov	r1, r5
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #332]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #328]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #324]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #320]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #316]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #312]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #308]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	add	r0, sp, #232
+	ldr	r8, [sp, #304]
+	bl	mulPv384x32
+	add	r2, sp, #232
+	ldm	r2, {r0, r1, r2}
+	adds	r0, r7, r0
+	str	r2, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r2, [sp, #244]
+	adcs	r0, r0, r1
+	str	r2, [sp, #40]                   @ 4-byte Spill
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	mrs	r1, apsr
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	mul	r2, r6, r0
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r1
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r11, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	mov	r5, r6
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #276]
+	adcs	r5, r10, r8
+	str	r0, [sp, #164]                  @ 4-byte Spill
+	adcs	r11, r9, r1
+	ldr	r0, [sp, #272]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
+	ldr	r0, [sp, #268]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	adcs	r9, r1, r3
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	ldr	r0, [sp, #264]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r10, r1, r3
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	ldr	r0, [sp, #260]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adcs	r8, r1, r3
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	ldr	r0, [sp, #256]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adcs	r7, r1, r3
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	ldr	r0, [sp, #252]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r6, [r4, #88]
+	adcs	r4, r1, r3
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	ldr	r0, [sp, #248]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	adcs	r1, r1, r3
+	ldr	r0, [sp, #280]
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	adc	r0, r0, #0
+	ldr	r1, [sp, #168]                  @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	add	r0, sp, #176
+	bl	mulPv384x32
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r3, r1, r0
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	adcs	r12, r5, r0
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
+	adcs	r2, r11, r0
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	adcs	r9, r9, r0
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r10, r10, r0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r11, r8, r0
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r7, r7, r0
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r8, r4, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #164]                  @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #784
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #852]
-	add	r10, sp, #808
-	add	lr, sp, #784
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #848]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #844]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #840]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #836]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #832]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #828]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #824]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r6, r8, r9, r10}
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r4, [sp, #224]
+	adc	r0, r4, #0
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r6, [sp, #176]
+	ldr	r4, [sp, #108]                  @ 4-byte Reload
+	ldr	r5, [sp, #180]
+	adds	r6, r4, r6
+	ldr	r0, [sp, #184]
+	adcs	r3, r3, r5
+	ldr	r1, [sp, #188]
+	adcs	r6, r12, r0
+	str	r3, [sp, #168]                  @ 4-byte Spill
+	adcs	lr, r2, r1
+	str	r6, [sp, #164]                  @ 4-byte Spill
+	str	lr, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #192]
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	adcs	r2, r9, r0
+	str	r2, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #196]
+	adcs	r5, r10, r0
+	str	r5, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #200]
+	adcs	r12, r11, r0
+	str	r12, [sp, #96]                  @ 4-byte Spill
+	ldr	r0, [sp, #204]
+	adcs	r11, r7, r0
+	str	r11, [sp, #76]                  @ 4-byte Spill
+	ldr	r0, [sp, #208]
+	adcs	r1, r8, r0
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #212]
+	adcs	r0, r4, r0
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #216]
+	ldr	r4, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r4, r0
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #220]
+	ldr	r4, [sp, #92]                   @ 4-byte Reload
+	adcs	r10, r4, r0
+	ldr	r0, [sp, #172]                  @ 4-byte Reload
+	ldr	r4, [sp, #88]                   @ 4-byte Reload
+	ldr	r0, [r0, #92]
+	adc	r7, r0, r4
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	subs	r0, r3, r0
+	str	r0, [sp, #172]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	ldr	r3, [sp, #128]                  @ 4-byte Reload
+	sbcs	r0, r6, r0
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #148]                  @ 4-byte Reload
+	sbcs	r0, lr, r0
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	sbcs	r0, r2, r0
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	ldr	r2, [sp, #124]                  @ 4-byte Reload
+	sbcs	r9, r5, r0
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
+	sbcs	r8, r12, r0
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	ldr	r12, [sp, #84]                  @ 4-byte Reload
+	sbcs	r5, r11, r0
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	mov	r11, #0
+	sbcs	lr, r1, r0
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	sbcs	r0, r4, r0
+	sbcs	r1, r12, r1
+	sbcs	r2, r10, r2
+	sbcs	r3, r7, r3
+	sbc	r6, r11, #0
+	ands	r6, r6, #1
+	movne	r3, r7
+	movne	r2, r10
+	movne	r1, r12
+	cmp	r6, #0
+	movne	r0, r4
+	ldr	r4, [sp, #160]                  @ 4-byte Reload
+	add	r12, r4, #32
+	stm	r12, {r0, r1, r2, r3}
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #148]                  @ 4-byte Reload
+	movne	lr, r0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	str	lr, [r4, #28]
+	movne	r5, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	cmp	r6, #0
+	str	r5, [r4, #24]
+	movne	r8, r0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	str	r8, [r4, #20]
+	movne	r9, r0
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	str	r9, [r4, #16]
+	movne	r1, r0
+	cmp	r6, #0
+	str	r1, [r4, #12]
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #164]                  @ 4-byte Reload
+	str	r0, [r4, #8]
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #168]                  @ 4-byte Reload
+	str	r0, [r4, #4]
+	ldr	r0, [sp, #172]                  @ 4-byte Reload
+	movne	r0, r1
+	str	r0, [r4]
+	add	sp, sp, #852
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end62:
+	.size	mcl_fp_montRed12L, .Lfunc_end62-mcl_fp_montRed12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_montRedNF12L             @ -- Begin function mcl_fp_montRedNF12L
+	.p2align	2
+	.type	mcl_fp_montRedNF12L,%function
+	.code	32                              @ @mcl_fp_montRedNF12L
+mcl_fp_montRedNF12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#852
+	sub	sp, sp, #852
+	str	r0, [sp, #160]                  @ 4-byte Spill
+	mov	r6, r2
+	ldr	r0, [r2]
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [r2, #4]
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [r2, #8]
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [r2, #12]
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [r2, #16]
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [r2, #20]
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [r2, #24]
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [r1, #4]
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [r1, #8]
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [r1, #12]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [r1, #16]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [r1, #20]
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [r1, #24]
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r1, #28]
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [r6, #28]
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [r6, #32]
+	ldr	r3, [r2, #-4]
+	ldr	r4, [r1]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [r6, #36]
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	mul	r2, r4, r3
+	ldr	r0, [r6, #40]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [r6, #44]
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [r1, #32]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [r1, #36]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [r1, #40]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r1, #44]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	add	r0, sp, #792
+	str	r1, [sp, #172]                  @ 4-byte Spill
+	mov	r1, r6
+	str	r3, [sp, #164]                  @ 4-byte Spill
+	str	r6, [sp, #168]                  @ 4-byte Spill
+	bl	mulPv384x32
+	add	lr, sp, #792
+	add	r11, sp, #816
 	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	ldr	r11, [sp, #132]         @ 4-byte Reload
-	adds	r0, r0, r7
-	str	r0, [r11, #28]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r4, #32]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	adds	r0, r4, r0
+	ldm	r11, {r5, r7, r8, r9, r10, r11}
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	adcs	r4, r0, r1
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	mov	r1, r6
+	adcs	r0, r0, r2
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	mrs	r0, apsr
+	ldr	r5, [sp, #164]                  @ 4-byte Reload
+	ldr	r7, [sp, #172]                  @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	mul	r2, r5, r4
+	ldr	r0, [r7, #48]
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #840]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	add	r0, sp, #736
+	bl	mulPv384x32
+	add	r3, sp, #736
+	ldr	r12, [sp, #776]
+	ldr	lr, [sp, #772]
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	ldr	r4, [sp, #108]                  @ 4-byte Reload
+	ldr	r6, [sp, #768]
+	adcs	r8, r4, r1
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r9, [sp, #764]
+	adcs	r1, r1, r2
+	ldr	r10, [sp, #760]
+	ldr	r11, [sp, #756]
+	mul	r2, r5, r8
+	ldr	r0, [sp, #752]
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	mov	r9, r7
+	adcs	r0, r0, r6
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	mov	r6, r5
+	adcs	r0, r0, lr
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	ldr	r0, [sp, #784]
+	adcs	r1, r1, r3
+	str	r1, [sp, #64]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #712
-	bl	.LmulPv544x32(PLT)
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r7, #52]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r4, [sp, #168]                  @ 4-byte Reload
 	ldr	r0, [sp, #780]
-	add	r8, sp, #748
-	add	r11, sp, #736
-	add	lr, sp, #712
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #776]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #772]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #768]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #764]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r8, {r4, r6, r7, r8}
-	ldm	r11, {r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r5, [sp, #56]           @ 4-byte Reload
-	adds	r0, r0, r5
-	ldr	r5, [sp, #132]          @ 4-byte Reload
-	str	r0, [r5, #32]
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [r6, #36]
-	adcs	r0, r7, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	add	r0, sp, #680
 	mov	r1, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	bl	mulPv384x32
+	add	lr, sp, #680
+	ldr	r11, [sp, #716]
+	ldr	r10, [sp, #712]
+	ldm	lr, {r0, r1, r2, r3, r5, r7, r12, lr}
+	adds	r0, r8, r0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r8, r0, r1
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	mul	r2, r6, r8
+	adcs	r0, r0, r3
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	ldr	r0, [sp, #728]
+	adcs	r1, r1, r3
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #640
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #708]
-	add	r10, sp, #664
-	add	lr, sp, #640
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #688]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #684]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #680]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #56]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r5, #36]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r6, #40]
-	mov	r6, r4
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r9, #56]
 	mov	r1, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	add	r0, sp, #624
+	ldr	r10, [sp, #724]
+	ldr	r9, [sp, #720]
+	bl	mulPv384x32
+	ldr	r5, [sp, #624]
+	add	r11, sp, #628
+	ldr	r12, [sp, #656]
+	adds	r4, r8, r5
+	ldm	r11, {r0, r1, r7, r11}
+	mov	r8, r6
+	ldr	r4, [sp, #100]                  @ 4-byte Reload
+	ldr	lr, [sp, #652]
+	adcs	r4, r4, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [sp, #648]
+	adcs	r0, r0, r1
+	ldr	r3, [sp, #644]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	mul	r2, r6, r4
+	adcs	r0, r0, lr
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r0, [sp, #672]
+	adcs	r1, r1, r9
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r6, [sp, #172]                  @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r6, #60]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r9, [sp, #168]                  @ 4-byte Reload
+	ldr	r0, [sp, #668]
+	str	r0, [sp, #52]                   @ 4-byte Spill
 	add	r0, sp, #568
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #636]
-	add	r11, sp, #592
-	add	lr, sp, #568
-	ldr	r7, [sp, #608]
-	ldr	r4, [sp, #604]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #632]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #628]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #624]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #620]
-	str	r0, [sp, #32]           @ 4-byte Spill
+	mov	r1, r9
+	ldr	r11, [sp, #664]
+	ldr	r10, [sp, #660]
+	bl	mulPv384x32
+	ldr	r7, [sp, #568]
+	add	r3, sp, #576
+	ldr	r5, [sp, #572]
+	adds	r4, r4, r7
+	ldm	r3, {r0, r1, r3}
+	ldr	r4, [sp, #100]                  @ 4-byte Reload
+	ldr	r12, [sp, #596]
+	adcs	r4, r4, r5
+	ldr	r5, [sp, #96]                   @ 4-byte Reload
+	ldr	lr, [sp, #592]
+	adcs	r0, r5, r0
+	ldr	r2, [sp, #588]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	mul	r2, r8, r4
+	adcs	r0, r0, lr
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
 	ldr	r0, [sp, #616]
-	str	r0, [sp, #28]           @ 4-byte Spill
+	adcs	r1, r1, r10
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r11
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [r6, #64]
+	mov	r1, r9
+	str	r0, [sp, #60]                   @ 4-byte Spill
 	ldr	r0, [sp, #612]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r8, [sp, #56]           @ 4-byte Reload
-	adds	r0, r0, r8
-	str	r0, [r5, #40]
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #608]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #604]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	add	r0, sp, #512
+	ldr	r10, [sp, #600]
+	bl	mulPv384x32
+	ldr	r3, [sp, #512]
+	ldr	r7, [sp, #516]
+	adds	r3, r4, r3
+	ldr	r5, [sp, #520]
+	ldr	r3, [sp, #100]                  @ 4-byte Reload
+	ldr	r12, [sp, #536]
+	adcs	r4, r3, r7
+	ldr	r3, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #532]
+	adcs	r3, r3, r5
+	ldr	r2, [sp, #528]
+	ldr	r0, [sp, #524]
+	str	r3, [sp, #100]                  @ 4-byte Spill
+	ldr	r3, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r3, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	ldr	r2, [r4, #44]
-	adcs	r0, r7, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r6
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #496
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #564]
-	add	r10, sp, #520
-	add	lr, sp, #496
-	str	r0, [sp, #44]           @ 4-byte Spill
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	mul	r2, r8, r4
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
 	ldr	r0, [sp, #560]
-	str	r0, [sp, #40]           @ 4-byte Spill
+	adcs	r1, r1, r10
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [r6, #68]
+	mov	r1, r9
+	str	r0, [sp, #60]                   @ 4-byte Spill
 	ldr	r0, [sp, #556]
-	str	r0, [sp, #36]           @ 4-byte Spill
+	str	r0, [sp, #52]                   @ 4-byte Spill
 	ldr	r0, [sp, #552]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #548]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #544]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #540]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #48]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r5, #44]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r5, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r4, #48]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	mov	r8, r4
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #424
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #492]
-	add	lr, sp, #428
-	ldr	r9, [sp, #460]
-	ldr	r7, [sp, #456]
-	ldr	r11, [sp, #452]
-	ldr	r10, [sp, #448]
-	ldr	r3, [sp, #424]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #488]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #484]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #480]
-	str	r0, [sp, #36]           @ 4-byte Spill
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	add	r0, sp, #456
+	ldr	r8, [sp, #548]
+	ldr	r10, [sp, #544]
+	ldr	r11, [sp, #540]
+	bl	mulPv384x32
+	add	r7, sp, #456
 	ldr	r0, [sp, #476]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #472]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #468]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #464]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r12, lr}
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	ldr	r4, [sp, #120]          @ 4-byte Reload
-	adds	r3, r3, r6
-	ldr	r6, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	str	r3, [r6, #48]
-	ldr	r3, [r8, #52]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	mov	r2, r3
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r5
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	ldr	r1, [sp, #472]
+	ldm	r7, {r2, r3, r7}
+	adds	r2, r4, r2
+	ldr	r5, [sp, #468]
+	ldr	r2, [sp, #100]                  @ 4-byte Reload
+	adcs	r9, r2, r3
+	ldr	r2, [sp, #96]                   @ 4-byte Reload
+	adcs	r2, r2, r7
+	str	r2, [sp, #100]                  @ 4-byte Spill
+	ldr	r2, [sp, #92]                   @ 4-byte Reload
+	adcs	r2, r2, r5
+	str	r2, [sp, #96]                   @ 4-byte Spill
+	ldr	r2, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r0, [sp, #504]
+	adcs	r1, r1, r11
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r6, [sp, #172]                  @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r4, [sp, #164]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	mul	r2, r4, r9
+	ldr	r7, [sp, #168]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #352
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #420]
-	add	r11, sp, #380
-	add	r12, sp, #356
-	str	r0, [sp, #56]           @ 4-byte Spill
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r6, #72]
+	mov	r1, r7
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #500]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #496]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #492]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #488]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	add	r0, sp, #400
+	ldr	r10, [sp, #484]
+	ldr	r8, [sp, #480]
+	bl	mulPv384x32
+	add	r5, sp, #400
 	ldr	r0, [sp, #416]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #408]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #404]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #400]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #396]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r11, {r4, r9, r10, r11}
-	ldr	r5, [sp, #376]
-	ldr	lr, [sp, #352]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r7, [sp, #52]           @ 4-byte Reload
-	adds	r7, lr, r7
-	ldr	lr, [r8, #56]
-	str	r7, [r6, #52]
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	add	r7, sp, #280
-	adcs	r0, r0, r6
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	mov	r2, lr
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	ldm	r5, {r1, r2, r3, r5}
+	adds	r1, r9, r1
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r11, r1, r2
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	mul	r2, r4, r11
+	adcs	r1, r1, r5
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	mov	r5, r6
 	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r0, [sp, #448]
+	adcs	r1, r1, r8
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, r7
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #348]
-	add	r8, sp, #316
-	add	r11, sp, #304
-	add	lr, sp, #280
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #344]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #340]
-	str	r0, [sp, #40]           @ 4-byte Spill
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r6, #76]
+	mov	r1, r7
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #444]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #440]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #436]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	add	r0, sp, #344
+	ldr	r4, [sp, #432]
+	ldr	r8, [sp, #428]
+	ldr	r10, [sp, #424]
+	ldr	r9, [sp, #420]
+	bl	mulPv384x32
+	add	r3, sp, #344
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r11, r0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r7, r0, r1
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	ldr	r0, [sp, #392]
+	adcs	r1, r1, r9
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r6, [sp, #164]                  @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	mul	r2, r6, r7
+	adcs	r1, r1, r4
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [r5, #80]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #388]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #384]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r5, [sp, #168]                  @ 4-byte Reload
+	ldr	r0, [sp, #380]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	add	r0, sp, #288
+	mov	r1, r5
+	ldr	r4, [sp, #376]
+	ldr	r8, [sp, #372]
+	ldr	r9, [sp, #368]
+	ldr	r10, [sp, #364]
+	ldr	r11, [sp, #360]
+	bl	mulPv384x32
+	add	r3, sp, #288
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r7, r0
+	str	r3, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r7, r0, r1
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	mrs	r0, apsr
+	mul	r2, r6, r7
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
 	ldr	r0, [sp, #336]
-	str	r0, [sp, #36]           @ 4-byte Spill
+	adcs	r11, r1, r11
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	adcs	r10, r1, r10
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	adcs	r9, r1, r9
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r4
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r4, [sp, #172]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [r4, #84]
+	mov	r1, r5
+	str	r0, [sp, #76]                   @ 4-byte Spill
 	ldr	r0, [sp, #332]
-	str	r0, [sp, #32]           @ 4-byte Spill
+	str	r0, [sp, #68]                   @ 4-byte Spill
 	ldr	r0, [sp, #328]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r8, {r6, r7, r8}
-	ldm	r11, {r9, r10, r11}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	adds	r0, r0, r5
-	ldr	r5, [sp, #132]          @ 4-byte Reload
-	str	r0, [r5, #56]
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #324]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #320]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #316]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #312]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #308]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	add	r0, sp, #232
+	ldr	r8, [sp, #304]
+	bl	mulPv384x32
+	add	r2, sp, #232
+	ldm	r2, {r0, r1, r2}
+	adds	r0, r7, r0
+	str	r2, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r2, [sp, #244]
+	adcs	r0, r0, r1
+	str	r2, [sp, #40]                   @ 4-byte Spill
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	mrs	r1, apsr
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	mul	r2, r6, r0
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r1
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r11, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [r8, #60]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #208
-	bl	.LmulPv544x32(PLT)
+	str	r0, [sp, #20]                   @ 4-byte Spill
 	ldr	r0, [sp, #276]
-	add	lr, sp, #228
-	add	r12, sp, #212
-	ldr	r6, [sp, #248]
-	ldr	r9, [sp, #244]
-	ldr	r4, [sp, #240]
-	str	r0, [sp, #40]           @ 4-byte Spill
+	adcs	r5, r10, r8
+	str	r0, [sp, #164]                  @ 4-byte Spill
+	adcs	r11, r9, r1
 	ldr	r0, [sp, #272]
-	str	r0, [sp, #36]           @ 4-byte Spill
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #32]                   @ 4-byte Reload
 	ldr	r0, [sp, #268]
-	str	r0, [sp, #32]           @ 4-byte Spill
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	adcs	r9, r1, r3
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
 	ldr	r0, [sp, #264]
-	str	r0, [sp, #28]           @ 4-byte Spill
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	adcs	r10, r1, r3
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
 	ldr	r0, [sp, #260]
-	str	r0, [sp, #24]           @ 4-byte Spill
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	adcs	r8, r1, r3
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
 	ldr	r0, [sp, #256]
-	str	r0, [sp, #20]           @ 4-byte Spill
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	adcs	r7, r1, r3
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
 	ldr	r0, [sp, #252]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	lr, {r10, r11, lr}
-	ldr	r3, [sp, #208]
-	ldm	r12, {r0, r1, r2, r12}
-	ldr	r7, [sp, #88]           @ 4-byte Reload
-	adds	r3, r3, r7
-	str	r3, [r5, #60]
-	ldr	r5, [sp, #120]          @ 4-byte Reload
-	ldr	r3, [r8, #64]
-	adcs	r8, r0, r5
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r5, r1, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	mov	r2, r3
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r6, [r4, #88]
+	adcs	r4, r1, r3
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	ldr	r0, [sp, #248]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	adcs	r1, r1, r3
+	ldr	r0, [sp, #280]
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	adc	r0, r0, #0
+	ldr	r1, [sp, #168]                  @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	add	r0, sp, #176
+	bl	mulPv384x32
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r3, r1, r0
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	adcs	r12, r5, r0
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
+	adcs	r2, r11, r0
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	adcs	lr, r9, r0
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	r10, r10, r0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r11, r8, r0
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	adcs	r8, r7, r0
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r4, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #164]                  @ 4-byte Reload
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	add	r0, sp, #136
-	bl	.LmulPv544x32(PLT)
-	add	r3, sp, #136
-	add	r11, sp, #172
-	add	lr, sp, #152
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r7, r0, r8
-	ldr	r0, [sp, #12]           @ 4-byte Reload
-	adcs	r6, r1, r5
-	adcs	r5, r2, r0
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r4, r3, r0
-	ldr	r0, [sp, #204]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #200]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #196]
-	str	r0, [sp, #80]           @ 4-byte Spill
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r4, [sp, #224]
+	adc	r0, r4, #0
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r6, [sp, #176]
+	ldr	r4, [sp, #108]                  @ 4-byte Reload
+	ldr	r5, [sp, #180]
+	adds	r6, r4, r6
+	ldr	r0, [sp, #184]
+	adcs	r9, r3, r5
+	ldr	r1, [sp, #188]
+	adcs	r6, r12, r0
+	str	r9, [sp, #168]                  @ 4-byte Spill
+	adcs	r7, r2, r1
+	str	r6, [sp, #164]                  @ 4-byte Spill
+	str	r7, [sp, #108]                  @ 4-byte Spill
 	ldr	r0, [sp, #192]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldm	lr, {r0, r2, r3, r12, lr}
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r7, [r1, #64]
-	str	r6, [r1, #68]
-	str	r5, [r1, #72]
-	ldr	r5, [sp, #44]           @ 4-byte Reload
-	str	r4, [r1, #76]
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [r1, #80]
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	adcs	r3, lr, r0
+	str	r3, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #196]
+	ldr	r4, [sp, #84]                   @ 4-byte Reload
+	adcs	r5, r10, r0
+	str	r5, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #200]
+	adcs	r12, r11, r0
+	str	r12, [sp, #96]                  @ 4-byte Spill
+	ldr	r0, [sp, #204]
+	adcs	lr, r8, r0
+	str	lr, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #208]
+	adcs	r2, r1, r0
+	str	r2, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #212]
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r0
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #216]
+	adcs	r10, r4, r0
+	ldr	r0, [sp, #220]
+	ldr	r4, [sp, #92]                   @ 4-byte Reload
+	adcs	r8, r4, r0
+	ldr	r0, [sp, #172]                  @ 4-byte Reload
+	ldr	r4, [sp, #88]                   @ 4-byte Reload
+	ldr	r0, [r0, #92]
+	adc	r4, r0, r4
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	subs	r0, r9, r0
+	str	r0, [sp, #172]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	sbcs	r0, r6, r0
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #148]                  @ 4-byte Reload
+	sbcs	r11, r7, r0
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	sbcs	r9, r3, r0
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	sbcs	r7, r5, r0
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
+	sbcs	r6, r12, r0
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	sbcs	r5, lr, r0
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	sbcs	lr, r2, r0
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	sbcs	r3, r1, r0
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	sbcs	r1, r10, r0
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	sbcs	r2, r8, r0
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	sbc	r0, r4, r0
+	asr	r12, r0, #31
+	cmn	r12, #1
+	movgt	r4, r0
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	movle	r2, r8
+	movle	r1, r10
+	cmn	r12, #1
+	movle	r3, r0
+	ldr	r0, [sp, #160]                  @ 4-byte Reload
+	str	r3, [r0, #32]
+	add	r3, r0, #36
+	stm	r3, {r1, r2, r4}
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r2, [sp, #164]                  @ 4-byte Reload
+	movle	lr, r1
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	str	lr, [r0, #28]
+	movle	r5, r1
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	cmn	r12, #1
+	str	r5, [r0, #24]
+	movle	r6, r1
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	str	r6, [r0, #20]
+	movle	r7, r1
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	str	r7, [r0, #16]
+	movle	r9, r1
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	cmn	r12, #1
+	str	r9, [r0, #12]
+	movle	r11, r1
+	ldr	r1, [sp, #156]                  @ 4-byte Reload
+	movle	r1, r2
+	ldr	r2, [sp, #168]                  @ 4-byte Reload
+	str	r1, [r0, #4]
+	ldr	r1, [sp, #172]                  @ 4-byte Reload
+	movle	r1, r2
+	str	r11, [r0, #8]
+	str	r1, [r0]
+	add	sp, sp, #852
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end63:
+	.size	mcl_fp_montRedNF12L, .Lfunc_end63-mcl_fp_montRedNF12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_addPre12L                @ -- Begin function mcl_fp_addPre12L
+	.p2align	2
+	.type	mcl_fp_addPre12L,%function
+	.code	32                              @ @mcl_fp_addPre12L
+mcl_fp_addPre12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#32
+	sub	sp, sp, #32
+	ldr	r3, [r2, #16]
+	add	r10, r1, #32
+	str	r3, [sp]                        @ 4-byte Spill
+	ldr	r3, [r2, #20]
+	ldr	lr, [r2]
+	ldm	r1, {r5, r6, r7}
+	str	r3, [sp, #4]                    @ 4-byte Spill
+	adds	r11, r5, lr
+	ldr	r12, [r2, #4]
+	ldr	r3, [r2, #24]
+	str	r3, [sp, #8]                    @ 4-byte Spill
+	adcs	r6, r6, r12
+	ldr	r8, [r2, #8]
+	add	r12, r1, #16
+	ldr	r3, [r2, #28]
+	ldr	r5, [r2, #32]
+	adcs	r7, r7, r8
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	str	r5, [sp, #16]                   @ 4-byte Spill
+	ldr	r5, [r2, #36]
+	ldr	r4, [r2, #12]
+	ldr	r3, [r1, #12]
+	str	r5, [sp, #20]                   @ 4-byte Spill
+	ldr	r5, [r2, #40]
+	adcs	lr, r3, r4
+	ldr	r2, [r2, #44]
+	str	r5, [sp, #24]                   @ 4-byte Spill
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r9, [r1, #44]
+	ldm	r12, {r1, r2, r3, r12}
+	ldr	r5, [sp]                        @ 4-byte Reload
+	str	r11, [r0]
+	adcs	r1, r1, r5
+	ldr	r5, [sp, #4]                    @ 4-byte Reload
+	stmib	r0, {r6, r7, lr}
+	add	lr, r0, #16
+	adcs	r2, r2, r5
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	add	r0, r0, #32
+	ldm	r10, {r4, r8, r10}
+	adcs	r3, r3, r5
+	ldr	r5, [sp, #12]                   @ 4-byte Reload
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	adcs	r12, r12, r5
+	stm	lr, {r1, r2, r3, r12}
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r4, r1
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	r2, r8, r2
+	adcs	r3, r10, r3
+	adcs	r7, r9, r7
+	stm	r0, {r1, r2, r3, r7}
+	mov	r0, #0
+	adc	r0, r0, #0
+	add	sp, sp, #32
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end64:
+	.size	mcl_fp_addPre12L, .Lfunc_end64-mcl_fp_addPre12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_subPre12L                @ -- Begin function mcl_fp_subPre12L
+	.p2align	2
+	.type	mcl_fp_subPre12L,%function
+	.code	32                              @ @mcl_fp_subPre12L
+mcl_fp_subPre12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#32
+	sub	sp, sp, #32
+	ldr	r3, [r2, #16]
+	add	r10, r1, #32
+	str	r3, [sp]                        @ 4-byte Spill
+	ldr	r3, [r2, #20]
+	ldr	lr, [r2]
+	ldm	r1, {r5, r6, r7}
+	str	r3, [sp, #4]                    @ 4-byte Spill
+	subs	r11, r5, lr
+	ldr	r12, [r2, #4]
+	ldr	r3, [r2, #24]
+	str	r3, [sp, #8]                    @ 4-byte Spill
+	sbcs	r6, r6, r12
+	ldr	r8, [r2, #8]
+	add	r12, r1, #16
+	ldr	r3, [r2, #28]
+	ldr	r5, [r2, #32]
+	sbcs	r7, r7, r8
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	str	r5, [sp, #16]                   @ 4-byte Spill
+	ldr	r5, [r2, #36]
+	ldr	r4, [r2, #12]
+	ldr	r3, [r1, #12]
+	str	r5, [sp, #20]                   @ 4-byte Spill
+	ldr	r5, [r2, #40]
+	sbcs	lr, r3, r4
+	ldr	r2, [r2, #44]
+	str	r5, [sp, #24]                   @ 4-byte Spill
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r9, [r1, #44]
+	ldm	r12, {r1, r2, r3, r12}
+	ldr	r5, [sp]                        @ 4-byte Reload
+	str	r11, [r0]
+	sbcs	r1, r1, r5
+	ldr	r5, [sp, #4]                    @ 4-byte Reload
+	stmib	r0, {r6, r7, lr}
+	add	lr, r0, #16
+	sbcs	r2, r2, r5
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	add	r0, r0, #32
+	ldm	r10, {r4, r8, r10}
+	sbcs	r3, r3, r5
+	ldr	r5, [sp, #12]                   @ 4-byte Reload
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	sbcs	r12, r12, r5
+	stm	lr, {r1, r2, r3, r12}
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	sbcs	r1, r4, r1
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	sbcs	r2, r8, r2
+	sbcs	r3, r10, r3
+	sbcs	r7, r9, r7
+	stm	r0, {r1, r2, r3, r7}
+	mov	r0, #0
+	sbc	r0, r0, #0
+	and	r0, r0, #1
+	add	sp, sp, #32
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end65:
+	.size	mcl_fp_subPre12L, .Lfunc_end65-mcl_fp_subPre12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_shr1_12L                 @ -- Begin function mcl_fp_shr1_12L
+	.p2align	2
+	.type	mcl_fp_shr1_12L,%function
+	.code	32                              @ @mcl_fp_shr1_12L
+mcl_fp_shr1_12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	ldr	r9, [r1, #36]
+	add	r6, r1, #20
+	ldr	r11, [r1, #40]
+	ldr	r5, [r1, #32]
+	lsr	r2, r9, #1
+	ldr	r10, [r1, #12]
+	orr	r2, r2, r11, lsl #31
+	str	r2, [r0, #36]
+	ldm	r6, {r2, r3, r6}
+	lsr	r4, r6, #1
+	orr	r4, r4, r5, lsl #31
+	str	r4, [r0, #28]
+	lsr	r4, r2, #1
+	ldr	r7, [r1, #16]
+	orr	r4, r4, r3, lsl #31
+	str	r4, [r0, #20]
+	lsr	r4, r10, #1
+	ldr	r8, [r1, #8]
+	ldm	r1, {r12, lr}
+	orr	r4, r4, r7, lsl #31
+	ldr	r1, [r1, #44]
+	str	r4, [r0, #12]
+	lsr	r4, lr, #1
+	orr	r4, r4, r8, lsl #31
+	str	r4, [r0, #4]
+	lsr	r4, r1, #1
+	lsrs	r1, r1, #1
+	rrx	r1, r11
+	str	r4, [r0, #44]
+	str	r1, [r0, #40]
+	lsrs	r1, r9, #1
+	rrx	r1, r5
+	str	r1, [r0, #32]
+	lsrs	r1, r6, #1
+	rrx	r1, r3
+	str	r1, [r0, #24]
+	lsrs	r1, r2, #1
+	rrx	r1, r7
+	str	r1, [r0, #16]
+	lsrs	r1, r10, #1
+	rrx	r1, r8
+	str	r1, [r0, #8]
+	lsrs	r1, lr, #1
+	rrx	r1, r12
+	str	r1, [r0]
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end66:
+	.size	mcl_fp_shr1_12L, .Lfunc_end66-mcl_fp_shr1_12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_add12L                   @ -- Begin function mcl_fp_add12L
+	.p2align	2
+	.type	mcl_fp_add12L,%function
+	.code	32                              @ @mcl_fp_add12L
+mcl_fp_add12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#40
+	sub	sp, sp, #40
+	ldm	r2, {r7, r8, lr}
+	ldr	r6, [r1]
+	ldmib	r1, {r4, r5, r9}
+	adds	r11, r6, r7
+	adcs	r4, r4, r8
+	ldr	r12, [r2, #12]
+	adcs	r10, r5, lr
+	ldr	r5, [r2, #16]
+	ldr	r6, [r1, #16]
+	adcs	r8, r9, r12
+	str	r4, [sp, #24]                   @ 4-byte Spill
+	add	lr, r2, #32
+	adcs	r7, r6, r5
+	ldr	r4, [r2, #20]
+	ldr	r5, [r1, #20]
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	adcs	r7, r5, r4
+	ldr	r4, [r2, #24]
+	ldr	r5, [r1, #24]
+	str	r7, [sp, #28]                   @ 4-byte Spill
+	adcs	r7, r5, r4
+	ldr	r4, [r2, #28]
+	ldr	r5, [r1, #28]
+	str	r7, [sp, #20]                   @ 4-byte Spill
+	adcs	r7, r5, r4
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldm	lr, {r4, r5, r7, lr}
+	ldr	r6, [r1, #32]
+	ldr	r2, [r1, #36]
+	adcs	r9, r6, r4
+	ldr	r12, [r1, #40]
+	adcs	r6, r2, r5
+	ldr	r1, [r1, #44]
+	adcs	r4, r12, r7
+	str	r11, [r0]
+	adcs	r5, r1, lr
+	mov	r1, #0
+	adc	r1, r1, #0
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldm	r3, {r1, r2, r7, r12}
+	subs	r1, r11, r1
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	sbcs	r2, r1, r2
+	str	r2, [sp, #8]                    @ 4-byte Spill
+	sbcs	r2, r10, r7
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	sbcs	r2, r8, r12
+	stmib	r0, {r1, r10}
+	str	r2, [sp]                        @ 4-byte Spill
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [r3, #16]
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	sbcs	r12, r2, r1
+	ldr	r1, [r3, #20]
+	str	r8, [r0, #12]
+	ldr	r8, [sp, #20]                   @ 4-byte Reload
+	sbcs	lr, r7, r1
+	ldr	r1, [r3, #24]
+	str	r7, [r0, #20]
+	add	r7, r3, #32
+	str	r8, [r0, #24]
+	sbcs	r8, r8, r1
+	ldr	r10, [sp, #16]                  @ 4-byte Reload
+	ldr	r1, [r3, #28]
+	str	r10, [r0, #28]
+	str	r2, [r0, #16]
+	sbcs	r10, r10, r1
+	ldm	r7, {r1, r2, r7}
+	sbcs	r1, r9, r1
+	ldr	r3, [r3, #44]
+	sbcs	r2, r6, r2
+	str	r6, [r0, #36]
+	sbcs	r6, r4, r7
+	str	r4, [r0, #40]
+	sbcs	r4, r5, r3
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	str	r9, [r0, #32]
+	sbc	r3, r3, #0
+	str	r5, [r0, #44]
+	tst	r3, #1
+	bne	.LBB67_2
+@ %bb.1:                                @ %nocarry
+	ldr	r3, [sp, #12]                   @ 4-byte Reload
+	add	r5, r0, #12
+	str	r3, [r0]
+	ldr	r3, [sp, #8]                    @ 4-byte Reload
+	str	r3, [r0, #4]
+	ldr	r3, [sp, #4]                    @ 4-byte Reload
+	str	r3, [r0, #8]
+	ldr	r3, [sp]                        @ 4-byte Reload
+	stm	r5, {r3, r12, lr}
+	add	r3, r0, #32
+	str	r8, [r0, #24]
+	str	r10, [r0, #28]
+	stm	r3, {r1, r2, r6}
+	str	r4, [r0, #44]
+.LBB67_2:                               @ %carry
+	add	sp, sp, #40
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end67:
+	.size	mcl_fp_add12L, .Lfunc_end67-mcl_fp_add12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_addNF12L                 @ -- Begin function mcl_fp_addNF12L
+	.p2align	2
+	.type	mcl_fp_addNF12L,%function
+	.code	32                              @ @mcl_fp_addNF12L
+mcl_fp_addNF12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#60
+	sub	sp, sp, #60
+	ldr	r9, [r1]
+	ldm	r2, {r4, r5, r6, r7}
+	ldmib	r1, {r8, lr}
+	adds	r4, r4, r9
+	str	r4, [sp, #56]                   @ 4-byte Spill
+	adcs	r4, r5, r8
+	ldr	r12, [r1, #12]
+	adcs	r6, r6, lr
+	str	r6, [sp, #48]                   @ 4-byte Spill
+	add	lr, r1, #32
+	adcs	r7, r7, r12
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	ldr	r6, [r1, #16]
+	ldr	r7, [r2, #16]
+	str	r4, [sp, #52]                   @ 4-byte Spill
+	adcs	r7, r7, r6
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	ldr	r4, [r1, #20]
+	ldr	r7, [r2, #20]
+	ldr	r6, [r2, #24]
+	adcs	r7, r7, r4
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r7, [r1, #24]
+	ldr	r4, [r2, #32]
+	adcs	r7, r6, r7
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [r1, #28]
+	ldr	r6, [r2, #28]
+	ldr	r5, [r1, #44]
+	adcs	r7, r6, r7
+	str	r7, [sp, #28]                   @ 4-byte Spill
+	ldm	lr, {r7, r12, lr}
+	ldr	r6, [r2, #36]
+	adcs	r4, r4, r7
+	ldr	r1, [r2, #40]
+	adcs	r7, r6, r12
+	ldr	r2, [r2, #44]
+	adcs	r1, r1, lr
+	str	r4, [sp, #8]                    @ 4-byte Spill
+	adc	r6, r2, r5
+	ldr	r2, [r3]
+	ldr	r4, [sp, #56]                   @ 4-byte Reload
+	add	lr, r3, #36
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	subs	r2, r4, r2
+	str	r1, [sp]                        @ 4-byte Spill
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	ldmib	r3, {r1, r7, r12}
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	ldr	r9, [r3, #16]
+	sbcs	r2, r2, r1
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	ldr	r10, [r3, #20]
+	sbcs	r2, r2, r7
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	ldr	r5, [r3, #24]
+	ldr	r8, [r3, #28]
+	sbcs	r2, r2, r12
+	ldr	r7, [r3, #32]
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	sbcs	r11, r3, r9
+	ldr	r3, [sp, #36]                   @ 4-byte Reload
+	ldr	r9, [sp, #8]                    @ 4-byte Reload
+	sbcs	r10, r3, r10
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	sbcs	r5, r1, r5
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	ldm	lr, {r2, r12, lr}
+	sbcs	r8, r1, r8
+	ldr	r4, [sp, #4]                    @ 4-byte Reload
+	sbcs	r7, r9, r7
+	ldr	r1, [sp]                        @ 4-byte Reload
+	sbcs	r2, r4, r2
+	sbcs	r12, r1, r12
+	sbc	r3, r6, lr
+	asr	lr, r3, #31
+	cmn	lr, #1
+	movle	r12, r1
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	movgt	r6, r3
+	movle	r2, r4
+	cmn	lr, #1
+	str	r2, [r0, #36]
+	movle	r8, r1
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	movle	r7, r9
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	str	r7, [r0, #32]
+	movle	r5, r1
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	cmn	lr, #1
+	str	r12, [r0, #40]
+	str	r6, [r0, #44]
+	movle	r10, r1
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	str	r8, [r0, #28]
+	str	r5, [r0, #24]
+	movle	r11, r1
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	str	r10, [r0, #20]
+	str	r11, [r0, #16]
+	movle	r2, r1
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	str	r2, [r0, #12]
+	cmn	lr, #1
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	movle	r2, r1
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	str	r2, [r0, #8]
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	movle	r2, r1
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	str	r2, [r0, #4]
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	movle	r2, r1
+	str	r2, [r0]
+	add	sp, sp, #60
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end68:
+	.size	mcl_fp_addNF12L, .Lfunc_end68-mcl_fp_addNF12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_sub12L                   @ -- Begin function mcl_fp_sub12L
+	.p2align	2
+	.type	mcl_fp_sub12L,%function
+	.code	32                              @ @mcl_fp_sub12L
+mcl_fp_sub12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#40
+	sub	sp, sp, #40
+	ldr	r12, [r2]
+	mov	r11, r3
+	ldr	r5, [r1]
+	ldr	r9, [r2, #4]
+	ldmib	r1, {r4, r6, r7}
+	subs	r3, r5, r12
+	ldr	r8, [r2, #8]
+	sbcs	r10, r4, r9
+	ldr	lr, [r2, #12]
+	add	r9, r1, #32
+	str	r3, [sp, #36]                   @ 4-byte Spill
+	sbcs	r3, r6, r8
+	sbcs	r12, r7, lr
+	ldr	r6, [r2, #16]
+	ldr	r7, [r1, #16]
+	add	lr, r2, #32
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	sbcs	r3, r7, r6
+	ldr	r4, [r2, #20]
+	ldr	r7, [r1, #20]
+	str	r3, [sp, #20]                   @ 4-byte Spill
+	sbcs	r3, r7, r4
+	ldr	r6, [r2, #24]
+	ldr	r4, [r1, #24]
+	str	r3, [sp, #32]                   @ 4-byte Spill
+	sbcs	r3, r4, r6
+	ldr	r4, [r2, #28]
+	ldr	r6, [r1, #28]
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	sbcs	r3, r6, r4
+	str	r3, [sp, #28]                   @ 4-byte Spill
+	ldm	lr, {r4, r6, lr}
+	ldm	r9, {r3, r8, r9}
+	sbcs	r3, r3, r4
+	ldr	r5, [r1, #44]
+	sbcs	r8, r8, r6
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	ldr	r7, [r2, #44]
+	sbcs	lr, r9, lr
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	sbcs	r4, r5, r7
+	mov	r7, #0
+	str	r1, [r0, #16]
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	sbc	r7, r7, #0
+	str	r1, [r0, #20]
+	tst	r7, #1
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	ldr	r5, [sp, #36]                   @ 4-byte Reload
+	str	r1, [r0, #24]
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	str	r5, [r0]
+	str	r10, [sp, #12]                  @ 4-byte Spill
+	str	r10, [r0, #4]
+	str	r12, [sp, #8]                   @ 4-byte Spill
+	str	r12, [r0, #12]
+	str	r1, [r0, #28]
+	str	r3, [sp, #4]                    @ 4-byte Spill
+	str	r3, [r0, #32]
+	str	r8, [sp]                        @ 4-byte Spill
+	str	r8, [r0, #36]
+	str	lr, [r0, #40]
+	str	r4, [r0, #44]
+	beq	.LBB69_2
+@ %bb.1:                                @ %carry
+	ldr	r7, [r11]
+	ldr	r12, [sp, #36]                  @ 4-byte Reload
+	ldmib	r11, {r1, r10}
+	adds	r7, r7, r12
+	str	r7, [r0]
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	ldr	r2, [r11, #12]
+	adcs	r7, r1, r7
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	str	r7, [r0, #4]
+	adcs	r7, r10, r1
+	str	r7, [r0, #8]
+	ldr	r7, [sp, #8]                    @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r7, r2, r7
+	str	r7, [r0, #12]
+	ldr	r7, [r11, #16]
+	ldr	r2, [sp, #4]                    @ 4-byte Reload
+	adcs	r7, r7, r1
+	str	r7, [r0, #16]
+	ldr	r7, [r11, #20]
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldr	r6, [r11, #36]
+	adcs	r7, r7, r1
+	str	r7, [r0, #20]
+	ldr	r7, [r11, #24]
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	ldr	r3, [r11, #44]
+	adcs	r7, r7, r1
+	str	r7, [r0, #24]
+	ldr	r7, [r11, #28]
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r7, r7, r1
+	str	r7, [r0, #28]
+	ldr	r7, [r11, #32]
+	ldr	r1, [r11, #40]
+	adcs	r7, r7, r2
+	ldr	r2, [sp]                        @ 4-byte Reload
+	str	r7, [r0, #32]
+	adcs	r2, r6, r2
+	str	r2, [r0, #36]
+	adcs	r1, r1, lr
+	str	r1, [r0, #40]
+	adc	r3, r3, r4
+	str	r3, [r0, #44]
+.LBB69_2:                               @ %nocarry
+	add	sp, sp, #40
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end69:
+	.size	mcl_fp_sub12L, .Lfunc_end69-mcl_fp_sub12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_subNF12L                 @ -- Begin function mcl_fp_subNF12L
+	.p2align	2
+	.type	mcl_fp_subNF12L,%function
+	.code	32                              @ @mcl_fp_subNF12L
+mcl_fp_subNF12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#64
+	sub	sp, sp, #64
+	ldr	r9, [r2]
+	add	r10, r3, #8
+	ldm	r1, {r4, r5, r6, r7}
+	ldmib	r2, {r8, lr}
+	subs	r4, r4, r9
+	str	r4, [sp, #60]                   @ 4-byte Spill
+	sbcs	r4, r5, r8
+	ldr	r12, [r2, #12]
+	sbcs	r6, r6, lr
+	str	r6, [sp, #52]                   @ 4-byte Spill
+	sbcs	r7, r7, r12
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r6, [r2, #16]
+	ldr	r7, [r1, #16]
+	str	r4, [sp, #56]                   @ 4-byte Spill
+	sbcs	r7, r7, r6
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	ldr	r4, [r2, #20]
+	ldr	r7, [r1, #20]
+	ldr	r6, [r1, #24]
+	sbcs	r7, r7, r4
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [r2, #24]
+	ldr	lr, [r2, #32]
+	sbcs	r7, r6, r7
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r7, [r2, #28]
+	ldr	r6, [r1, #28]
+	ldr	r5, [r2, #40]
+	sbcs	r7, r6, r7
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [r1, #32]
+	ldr	r6, [r2, #36]
+	ldr	r4, [r2, #44]
+	sbcs	r7, r7, lr
+	ldr	r2, [r1, #36]
+	add	lr, r3, #36
+	ldr	r12, [r1, #40]
+	sbcs	r2, r2, r6
+	ldr	r1, [r1, #44]
+	str	r2, [sp, #8]                    @ 4-byte Spill
+	sbcs	r2, r12, r5
+	ldr	r5, [r3, #24]
+	sbc	r9, r1, r4
+	str	r2, [sp, #4]                    @ 4-byte Spill
+	str	r5, [sp]                        @ 4-byte Spill
+	ldm	r3, {r2, r4}
+	ldr	r5, [sp, #60]                   @ 4-byte Reload
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	adds	r2, r5, r2
+	str	r2, [sp, #28]                   @ 4-byte Spill
+	ldr	r2, [sp, #56]                   @ 4-byte Reload
+	ldm	r10, {r1, r6, r7, r10}
 	adcs	r2, r2, r4
-	str	r2, [r1, #84]
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [r1, #88]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r2, r12, r2
-	str	r2, [r1, #92]
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [r1, #96]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r2, r8, r2
-	str	r2, [r1, #100]
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [r1, #104]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r2, r10, r2
-	str	r2, [r1, #108]
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [r1, #112]
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	str	r0, [r1, #116]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r0, [r1, #120]
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	str	r0, [r1, #124]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	str	r0, [r1, #128]
-	adc	r2, r2, #0
-	str	r2, [r1, #132]
-	add	sp, sp, #340
-	add	sp, sp, #1024
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	ldr	r8, [r3, #28]
+	adcs	r2, r2, r1
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	ldr	r4, [r3, #32]
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r2, r6
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	adcs	r11, r3, r7
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r10, r3, r10
+	ldr	r3, [sp]                        @ 4-byte Reload
+	ldr	r7, [sp, #12]                   @ 4-byte Reload
+	adcs	r6, r1, r3
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldm	lr, {r2, r12, lr}
+	adcs	r8, r1, r8
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	adcs	r4, r7, r4
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	adcs	r2, r5, r2
+	adcs	r3, r1, r12
+	adc	r12, r9, lr
+	asr	lr, r9, #31
+	cmp	lr, #0
+	movpl	r3, r1
+	movpl	r12, r9
+	movpl	r2, r5
+	cmp	lr, #0
+	movpl	r4, r7
+	add	r1, r0, #36
+	str	r4, [r0, #32]
+	stm	r1, {r2, r3, r12}
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	movpl	r8, r1
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	str	r8, [r0, #28]
+	movpl	r6, r1
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	cmp	lr, #0
+	str	r6, [r0, #24]
+	movpl	r10, r1
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	str	r10, [r0, #20]
+	movpl	r11, r1
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	str	r11, [r0, #16]
+	movpl	r2, r1
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	str	r2, [r0, #12]
+	cmp	lr, #0
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	movpl	r2, r1
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	str	r2, [r0, #8]
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	movpl	r2, r1
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	str	r2, [r0, #4]
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	movpl	r2, r1
+	str	r2, [r0]
+	add	sp, sp, #64
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end70:
+	.size	mcl_fp_subNF12L, .Lfunc_end70-mcl_fp_subNF12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fpDbl_add12L                @ -- Begin function mcl_fpDbl_add12L
+	.p2align	2
+	.type	mcl_fpDbl_add12L,%function
+	.code	32                              @ @mcl_fpDbl_add12L
+mcl_fpDbl_add12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#116
+	sub	sp, sp, #116
+	ldr	r9, [r2]
+	ldm	r1, {r4, r5, r6, r7}
+	ldmib	r2, {r8, lr}
+	adds	r4, r4, r9
+	str	r4, [sp, #92]                   @ 4-byte Spill
+	adcs	r4, r5, r8
+	ldr	r12, [r2, #12]
+	adcs	r6, r6, lr
+	str	r6, [sp, #84]                   @ 4-byte Spill
+	adcs	r7, r7, r12
+	str	r7, [sp, #80]                   @ 4-byte Spill
+	ldr	r6, [r2, #16]
+	ldr	r7, [r1, #16]
+	str	r4, [sp, #88]                   @ 4-byte Spill
+	adcs	r7, r7, r6
+	str	r7, [sp, #76]                   @ 4-byte Spill
+	ldr	r4, [r2, #20]
+	ldr	r7, [r1, #20]
+	ldr	r6, [r1, #24]
+	adcs	r7, r7, r4
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	ldr	r7, [r2, #24]
+	ldr	r5, [r1, #84]
+	adcs	r7, r6, r7
+	str	r7, [sp, #60]                   @ 4-byte Spill
+	ldr	r7, [r2, #28]
+	ldr	r6, [r1, #28]
+	ldr	r4, [r1, #88]
+	adcs	r7, r6, r7
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [r2, #32]
+	ldr	r6, [r1, #32]
+	adcs	r7, r6, r7
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r7, [r2, #36]
+	ldr	r6, [r1, #36]
+	adcs	r7, r6, r7
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r7, [r2, #40]
+	ldr	r6, [r1, #40]
+	adcs	r7, r6, r7
+	str	r7, [sp, #28]                   @ 4-byte Spill
+	ldr	r7, [r2, #44]
+	ldr	r6, [r1, #44]
+	adcs	r7, r6, r7
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	ldr	r7, [r2, #48]
+	ldr	r6, [r1, #48]
+	adcs	r9, r6, r7
+	ldr	r7, [r2, #52]
+	ldr	r6, [r1, #52]
+	str	r9, [sp, #72]                   @ 4-byte Spill
+	adcs	r10, r6, r7
+	ldr	r7, [r2, #56]
+	ldr	r6, [r1, #56]
+	str	r10, [sp, #56]                  @ 4-byte Spill
+	adcs	r8, r6, r7
+	ldr	r7, [r2, #60]
+	ldr	r6, [r1, #60]
+	str	r8, [sp, #44]                   @ 4-byte Spill
+	adcs	r7, r6, r7
+	str	r7, [sp, #112]                  @ 4-byte Spill
+	ldr	r7, [r2, #64]
+	ldr	r6, [r1, #64]
+	adcs	r7, r6, r7
+	str	r7, [sp, #108]                  @ 4-byte Spill
+	ldr	r7, [r2, #68]
+	ldr	r6, [r1, #68]
+	adcs	r7, r6, r7
+	str	r7, [sp, #104]                  @ 4-byte Spill
+	ldr	r7, [r2, #72]
+	ldr	r6, [r1, #72]
+	adcs	r7, r6, r7
+	str	r7, [sp, #100]                  @ 4-byte Spill
+	ldr	r7, [r2, #76]
+	ldr	r6, [r1, #76]
+	adcs	r7, r6, r7
+	str	r7, [sp, #96]                   @ 4-byte Spill
+	ldr	r6, [r1, #80]
+	ldr	r7, [r2, #80]
+	ldr	r1, [r1, #92]
+	adcs	r11, r6, r7
+	ldr	r6, [r2, #84]
+	str	r11, [sp]                       @ 4-byte Spill
+	adcs	r7, r5, r6
+	ldr	r5, [r2, #88]
+	ldr	r2, [r2, #92]
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	adcs	r7, r4, r5
+	adcs	r1, r1, r2
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	mov	r1, #0
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	adc	r1, r1, #0
+	str	r1, [sp, #4]                    @ 4-byte Spill
+	ldm	r3, {r1, r2, r12, lr}
+	subs	r1, r9, r1
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	sbcs	r1, r10, r2
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	sbcs	r1, r8, r12
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	add	r12, r3, #20
+	ldr	r5, [r3, #32]
+	sbcs	r1, r1, lr
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	add	lr, r3, #36
+	str	r1, [r0]
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	str	r1, [r0, #4]
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	str	r1, [r0, #16]
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	str	r1, [r0, #20]
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	str	r1, [r0, #24]
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	str	r1, [r0, #28]
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	str	r1, [r0, #32]
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	str	r1, [r0, #36]
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	ldr	r3, [r3, #16]
+	ldr	r7, [sp, #108]                  @ 4-byte Reload
+	str	r1, [r0, #40]
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	sbcs	r10, r7, r3
+	str	r1, [r0, #44]
+	ldm	r12, {r1, r2, r12}
+	ldr	r3, [sp, #104]                  @ 4-byte Reload
+	ldm	lr, {r4, r6, lr}
+	sbcs	r9, r3, r1
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	sbcs	r8, r1, r2
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	sbcs	r12, r1, r12
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	sbcs	r5, r11, r5
+	ldr	r7, [sp, #4]                    @ 4-byte Reload
+	sbcs	r4, r3, r4
+	sbcs	r6, r2, r6
+	sbcs	lr, r1, lr
+	sbc	r11, r7, #0
+	ands	r11, r11, #1
+	movne	lr, r1
+	ldr	r1, [sp]                        @ 4-byte Reload
+	movne	r6, r2
+	movne	r4, r3
+	cmp	r11, #0
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	movne	r5, r1
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	str	lr, [r0, #92]
+	str	r6, [r0, #88]
+	movne	r12, r1
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	str	r4, [r0, #84]
+	str	r5, [r0, #80]
+	movne	r8, r1
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	cmp	r11, #0
+	str	r12, [r0, #76]
+	str	r8, [r0, #72]
+	movne	r9, r1
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	str	r9, [r0, #68]
+	movne	r10, r1
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	str	r10, [r0, #64]
+	movne	r2, r1
+	cmp	r11, #0
+	str	r2, [r0, #60]
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	movne	r1, r2
+	ldr	r2, [sp, #56]                   @ 4-byte Reload
+	str	r1, [r0, #56]
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	movne	r1, r2
+	ldr	r2, [sp, #72]                   @ 4-byte Reload
+	str	r1, [r0, #52]
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	movne	r1, r2
+	str	r1, [r0, #48]
+	add	sp, sp, #116
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end71:
+	.size	mcl_fpDbl_add12L, .Lfunc_end71-mcl_fpDbl_add12L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fpDbl_sub12L                @ -- Begin function mcl_fpDbl_sub12L
+	.p2align	2
+	.type	mcl_fpDbl_sub12L,%function
+	.code	32                              @ @mcl_fpDbl_sub12L
+mcl_fpDbl_sub12L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#108
+	sub	sp, sp, #108
+	ldr	r7, [r2, #84]
+	add	r9, r1, #12
+	str	r7, [sp, #84]                   @ 4-byte Spill
+	ldr	r7, [r2, #80]
+	str	r7, [sp, #76]                   @ 4-byte Spill
+	ldr	r7, [r2, #76]
+	str	r7, [sp, #92]                   @ 4-byte Spill
+	ldr	r7, [r2, #72]
+	str	r7, [sp, #96]                   @ 4-byte Spill
+	ldr	r7, [r2, #68]
+	str	r7, [sp, #100]                  @ 4-byte Spill
+	ldr	r7, [r2, #64]
+	str	r7, [sp, #104]                  @ 4-byte Spill
+	ldr	r7, [r2, #32]
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [r2, #36]
+	str	r7, [sp, #28]                   @ 4-byte Spill
+	ldr	r7, [r2, #40]
+	str	r7, [sp, #60]                   @ 4-byte Spill
+	ldr	r7, [r2, #44]
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	ldr	r7, [r2, #48]
+	str	r7, [sp, #88]                   @ 4-byte Spill
+	ldr	r7, [r2, #52]
+	str	r7, [sp, #80]                   @ 4-byte Spill
+	ldr	r7, [r2, #56]
+	str	r7, [sp, #72]                   @ 4-byte Spill
+	ldr	r7, [r2, #60]
+	str	r7, [sp, #68]                   @ 4-byte Spill
+	ldr	r7, [r2, #8]
+	ldm	r2, {r4, r11}
+	ldm	r1, {r12, lr}
+	str	r7, [sp, #56]                   @ 4-byte Spill
+	subs	r4, r12, r4
+	ldr	r7, [r2, #12]
+	sbcs	r11, lr, r11
+	str	r7, [sp, #52]                   @ 4-byte Spill
+	add	lr, r1, #44
+	ldr	r7, [r2, #16]
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r7, [r2, #20]
+	str	r4, [sp, #8]                    @ 4-byte Spill
+	ldr	r10, [r1, #8]
+	ldr	r4, [sp, #56]                   @ 4-byte Reload
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	ldr	r7, [r2, #24]
+	sbcs	r4, r10, r4
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [r2, #28]
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	str	r4, [sp, #56]                   @ 4-byte Spill
+	ldm	r9, {r5, r6, r7, r8, r9}
+	ldr	r4, [sp, #52]                   @ 4-byte Reload
+	ldr	r10, [sp, #32]                  @ 4-byte Reload
+	sbcs	r5, r5, r4
+	str	r5, [sp, #52]                   @ 4-byte Spill
+	ldr	r5, [sp, #48]                   @ 4-byte Reload
+	ldm	lr, {r4, r12, lr}
+	sbcs	r6, r6, r5
+	str	r6, [sp, #48]                   @ 4-byte Spill
+	ldr	r6, [sp, #44]                   @ 4-byte Reload
+	ldr	r5, [r1, #40]
+	sbcs	r7, r7, r6
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	ldr	r7, [sp, #40]                   @ 4-byte Reload
+	ldr	r6, [r1, #36]
+	sbcs	r7, r8, r7
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [sp, #36]                   @ 4-byte Reload
+	ldr	r8, [r1, #56]
+	sbcs	r7, r9, r7
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r7, [r1, #32]
+	ldr	r9, [r1, #60]
+	sbcs	r7, r7, r10
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	sbcs	r6, r6, r7
+	str	r6, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #60]                   @ 4-byte Reload
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	sbcs	r5, r5, r6
+	str	r5, [sp, #24]                   @ 4-byte Spill
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	ldr	r6, [sp, #104]                  @ 4-byte Reload
+	sbcs	r4, r4, r5
+	str	r4, [sp, #20]                   @ 4-byte Spill
+	ldr	r4, [sp, #88]                   @ 4-byte Reload
+	sbcs	r5, r12, r4
+	ldr	r4, [r1, #88]
+	sbcs	lr, lr, r7
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	ldr	r12, [r3, #12]
+	sbcs	r8, r8, r7
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
+	str	r5, [sp, #88]                   @ 4-byte Spill
+	sbcs	r9, r9, r7
+	ldr	r7, [r1, #64]
+	str	lr, [sp, #80]                   @ 4-byte Spill
+	sbcs	r7, r7, r6
+	str	r7, [sp, #104]                  @ 4-byte Spill
+	ldr	r7, [r1, #68]
+	ldr	r6, [sp, #100]                  @ 4-byte Reload
+	str	r8, [sp, #72]                   @ 4-byte Spill
+	sbcs	r7, r7, r6
+	str	r7, [sp, #100]                  @ 4-byte Spill
+	ldr	r7, [r1, #72]
+	ldr	r6, [sp, #96]                   @ 4-byte Reload
+	str	r9, [sp, #64]                   @ 4-byte Spill
+	sbcs	r7, r7, r6
+	str	r7, [sp, #96]                   @ 4-byte Spill
+	ldr	r7, [r1, #76]
+	ldr	r6, [sp, #92]                   @ 4-byte Reload
+	sbcs	r7, r7, r6
+	str	r7, [sp, #92]                   @ 4-byte Spill
+	ldr	r7, [r1, #80]
+	ldr	r6, [sp, #76]                   @ 4-byte Reload
+	sbcs	r7, r7, r6
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r6, [r1, #84]
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [r1, #92]
+	sbcs	r7, r6, r7
+	ldr	r6, [r2, #88]
+	ldr	r2, [r2, #92]
+	sbcs	r10, r4, r6
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	sbcs	r1, r1, r2
+	str	r1, [sp, #4]                    @ 4-byte Spill
+	mov	r1, #0
+	ldr	r2, [r3, #8]
+	sbc	r1, r1, #0
+	str	r1, [sp]                        @ 4-byte Spill
+	ldm	r3, {r1, r6}
+	adds	r1, r5, r1
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	adcs	r1, lr, r6
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	adcs	r1, r8, r2
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	adcs	r1, r9, r12
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	add	lr, r3, #36
+	ldr	r5, [r3, #32]
+	add	r12, r3, #20
+	stm	r0, {r1, r11}
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	str	r1, [r0, #16]
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	str	r1, [r0, #20]
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	str	r1, [r0, #24]
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	str	r1, [r0, #28]
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	str	r1, [r0, #32]
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	ldr	r3, [r3, #16]
+	ldr	r7, [sp, #104]                  @ 4-byte Reload
+	str	r1, [r0, #36]
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r3, r7, r3
+	str	r1, [r0, #40]
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	str	r1, [r0, #44]
+	str	r3, [sp, #56]                   @ 4-byte Spill
+	ldm	r12, {r1, r2, r12}
+	ldr	r3, [sp, #100]                  @ 4-byte Reload
+	ldr	r9, [sp, #16]                   @ 4-byte Reload
+	adcs	r11, r3, r1
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldm	lr, {r4, r6, lr}
+	adcs	r8, r1, r2
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	mov	r2, r10
+	ldr	r3, [sp, #12]                   @ 4-byte Reload
+	adcs	r12, r1, r12
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	adcs	r5, r9, r5
+	ldr	r7, [sp]                        @ 4-byte Reload
+	adcs	r4, r3, r4
+	adcs	r6, r10, r6
+	adc	lr, r1, lr
+	ands	r10, r7, #1
+	moveq	lr, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	moveq	r6, r2
+	moveq	r4, r3
+	cmp	r10, #0
+	ldr	r2, [sp, #56]                   @ 4-byte Reload
+	moveq	r12, r1
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	moveq	r5, r9
+	str	lr, [r0, #92]
+	str	r6, [r0, #88]
+	moveq	r8, r1
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	cmp	r10, #0
+	str	r4, [r0, #84]
+	str	r5, [r0, #80]
+	moveq	r11, r1
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	str	r12, [r0, #76]
+	str	r8, [r0, #72]
+	moveq	r2, r1
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	str	r2, [r0, #64]
+	ldr	r2, [sp, #64]                   @ 4-byte Reload
+	str	r11, [r0, #68]
+	moveq	r1, r2
+	ldr	r2, [sp, #72]                   @ 4-byte Reload
+	str	r1, [r0, #60]
+	cmp	r10, #0
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	moveq	r1, r2
+	ldr	r2, [sp, #80]                   @ 4-byte Reload
+	str	r1, [r0, #56]
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	moveq	r1, r2
+	ldr	r2, [sp, #88]                   @ 4-byte Reload
+	str	r1, [r0, #52]
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	moveq	r1, r2
+	str	r1, [r0, #48]
+	add	sp, sp, #108
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end256:
-	.size	mcl_fpDbl_mulPre17L, .Lfunc_end256-mcl_fpDbl_mulPre17L
+.Lfunc_end72:
+	.size	mcl_fpDbl_sub12L, .Lfunc_end72-mcl_fpDbl_sub12L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_sqrPre17L
-	.align	2
-	.type	mcl_fpDbl_sqrPre17L,%function
-mcl_fpDbl_sqrPre17L:                    @ @mcl_fpDbl_sqrPre17L
+                                        @ -- End function
+	.globl	mulPv512x32                     @ -- Begin function mulPv512x32
+	.p2align	2
+	.type	mulPv512x32,%function
+	.code	32                              @ @mulPv512x32
+mulPv512x32:
 	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#332
-	sub	sp, sp, #332
-	.pad	#1024
-	sub	sp, sp, #1024
-	mov	r7, r1
-	mov	r4, r0
-	add	r0, sp, #1280
-	ldr	r2, [r7]
-	str	r7, [sp, #120]          @ 4-byte Spill
-	str	r4, [sp, #124]          @ 4-byte Spill
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1348]
-	ldr	r1, [sp, #1284]
-	ldr	r2, [r7, #4]
-	add	r11, sp, #1024
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #1344]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #1288]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #1292]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #1336]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	mov	r1, r7
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #1332]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #1328]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #1324]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #1320]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #1316]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #1312]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #1308]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1304]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #1300]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #1296]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #1280]
-	str	r0, [r4]
-	add	r0, r11, #184
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1276]
-	add	r10, sp, #1232
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1272]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1268]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1264]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1260]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1256]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1252]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r8, r9, r10}
-	ldr	r0, [sp, #1208]
-	ldr	r11, [sp, #52]          @ 4-byte Reload
-	ldr	lr, [sp, #1228]
-	ldr	r12, [sp, #1224]
-	ldr	r1, [sp, #1212]
-	ldr	r2, [sp, #1216]
-	ldr	r3, [sp, #1220]
-	adds	r0, r0, r11
-	str	r0, [r4, #4]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #8]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #1136
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1204]
-	add	r12, sp, #1136
-	ldr	r6, [sp, #1176]
-	ldr	r4, [sp, #1172]
-	ldr	lr, [sp, #1168]
-	ldr	r11, [sp, #1164]
-	ldr	r10, [sp, #1160]
-	ldr	r9, [sp, #1156]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1200]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1196]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1192]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1188]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1184]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1180]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r5, [sp, #52]           @ 4-byte Reload
-	ldr	r8, [sp, #124]          @ 4-byte Reload
-	adds	r0, r0, r5
-	str	r0, [r8, #8]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #12]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	add	r4, sp, #1024
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, r4, #40
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1132]
-	add	r11, sp, #1088
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1128]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1124]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1120]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1116]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1112]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r11, {r5, r6, r8, r9, r10, r11}
-	ldr	r0, [sp, #1064]
-	ldr	r4, [sp, #52]           @ 4-byte Reload
-	ldr	lr, [sp, #1084]
-	ldr	r12, [sp, #1080]
-	ldr	r1, [sp, #1068]
-	ldr	r2, [sp, #1072]
-	ldr	r3, [sp, #1076]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #124]          @ 4-byte Reload
-	str	r0, [r4, #12]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #16]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #992
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1060]
-	add	lr, sp, #1012
-	add	r12, sp, #992
-	ldr	r6, [sp, #1032]
-	ldr	r5, [sp, #1028]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r9, r10, r11, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r8, [sp, #52]           @ 4-byte Reload
-	adds	r0, r0, r8
-	str	r0, [r4, #16]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #20]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #920
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #988]
-	add	r10, sp, #944
-	add	lr, sp, #920
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #984]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #980]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #976]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #972]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #968]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #964]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r8, r9, r10}
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r11, lr}
+	ldr	r12, [r1]
+	ldmib	r1, {r3, lr}
+	umull	r7, r6, r12, r2
+	ldr	r4, [r1, #12]
+	umull	r5, r8, r3, r2
+	str	r7, [r0]
+	umull	r9, r12, r4, r2
+	adds	r7, r6, r5
+	umull	r5, r4, lr, r2
+	adcs	r7, r8, r5
+	umlal	r6, r5, r3, r2
+	adcs	r7, r4, r9
+	str	r7, [r0, #12]
+	str	r5, [r0, #8]
+	str	r6, [r0, #4]
+	ldr	r3, [r1, #16]
+	umull	r7, r6, r3, r2
+	adcs	r3, r12, r7
+	str	r3, [r0, #16]
+	ldr	r3, [r1, #20]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #20]
+	ldr	r3, [r1, #24]
+	umull	r7, r6, r3, r2
+	adcs	r3, r5, r7
+	str	r3, [r0, #24]
+	ldr	r3, [r1, #28]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #28]
+	ldr	r3, [r1, #32]
+	umull	r7, r6, r3, r2
+	adcs	r3, r5, r7
+	str	r3, [r0, #32]
+	ldr	r3, [r1, #36]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #36]
+	ldr	r3, [r1, #40]
+	umull	r7, r6, r3, r2
+	adcs	r3, r5, r7
+	str	r3, [r0, #40]
+	ldr	r3, [r1, #44]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #44]
+	ldr	r3, [r1, #48]
+	umull	r7, r6, r3, r2
+	adcs	r3, r5, r7
+	str	r3, [r0, #48]
+	ldr	r3, [r1, #52]
+	umull	r7, r5, r3, r2
+	adcs	r3, r6, r7
+	str	r3, [r0, #52]
+	ldr	r3, [r1, #56]
+	umull	r7, r6, r3, r2
+	adcs	r3, r5, r7
+	str	r3, [r0, #56]
+	ldr	r1, [r1, #60]
+	umull	r3, r7, r1, r2
+	adcs	r1, r6, r3
+	str	r1, [r0, #60]
+	adc	r1, r7, #0
+	str	r1, [r0, #64]
+	pop	{r4, r5, r6, r7, r8, r9, r11, lr}
+	mov	pc, lr
+.Lfunc_end73:
+	.size	mulPv512x32, .Lfunc_end73-mulPv512x32
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_mulUnitPre16L            @ -- Begin function mcl_fp_mulUnitPre16L
+	.p2align	2
+	.type	mcl_fp_mulUnitPre16L,%function
+	.code	32                              @ @mcl_fp_mulUnitPre16L
+mcl_fp_mulUnitPre16L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#92
+	sub	sp, sp, #92
+	mov	r4, r0
+	add	r0, sp, #16
+	bl	mulPv512x32
+	add	r11, sp, #16
+	ldr	r0, [sp, #32]
+	add	lr, sp, #52
+	ldm	r11, {r8, r9, r10, r11}
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #40]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	ldr	r0, [sp, #44]
+	str	r0, [sp]                        @ 4-byte Spill
 	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #52]          @ 4-byte Reload
-	adds	r0, r0, r11
+	ldr	r6, [sp, #80]
+	ldr	r7, [sp, #48]
+	ldr	r5, [sp, #76]
+	str	r6, [r4, #64]
+	add	r6, r4, #36
+	str	r7, [r4, #32]
+	stm	r6, {r0, r1, r2, r3, r12, lr}
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	str	r5, [r4, #60]
+	stm	r4, {r8, r9, r10, r11}
+	str	r0, [r4, #16]
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
 	str	r0, [r4, #20]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #24]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #848
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #916]
-	add	lr, sp, #868
-	add	r12, sp, #848
-	ldr	r6, [sp, #888]
-	ldr	r5, [sp, #884]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #912]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #908]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #904]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #900]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #896]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #892]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r9, r10, r11, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r8, [sp, #52]           @ 4-byte Reload
-	adds	r0, r0, r8
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
 	str	r0, [r4, #24]
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #28]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	ldr	r0, [sp]                        @ 4-byte Reload
+	str	r0, [r4, #28]
+	add	sp, sp, #92
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end74:
+	.size	mcl_fp_mulUnitPre16L, .Lfunc_end74-mcl_fp_mulUnitPre16L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fpDbl_mulPre16L             @ -- Begin function mcl_fpDbl_mulPre16L
+	.p2align	2
+	.type	mcl_fpDbl_mulPre16L,%function
+	.code	32                              @ @mcl_fpDbl_mulPre16L
+mcl_fpDbl_mulPre16L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#276
+	sub	sp, sp, #276
+	mov	r6, r2
+	mov	r5, r1
+	mov	r4, r0
+	bl	mcl_fpDbl_mulPre8L
+	add	r0, r4, #64
+	add	r1, r5, #32
+	add	r2, r6, #32
+	bl	mcl_fpDbl_mulPre8L
+	ldr	r0, [r6, #12]
+	add	lr, r6, #32
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [r6, #16]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [r6, #20]
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [r6, #24]
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [r6, #28]
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldm	r6, {r9, r10, r11}
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r8, r0, r9
+	ldr	r7, [r6, #56]
+	adcs	r0, r1, r10
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r1, r2, r11
+	ldr	r6, [r6, #60]
+	add	r10, r5, #44
 	adcs	r0, r3, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r3, [r5, #36]
 	adcs	r0, r12, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	ldr	r11, [r5, #8]
 	adcs	r0, lr, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	ldm	r5, {r12, lr}
+	adcs	r0, r7, r0
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
+	ldr	r7, [r5, #32]
 	adcs	r0, r6, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	mov	r0, #0
+	ldr	r2, [r5, #40]
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #776
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #844]
-	add	r10, sp, #800
-	add	lr, sp, #776
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #840]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #836]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #832]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #828]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #824]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #820]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #52]          @ 4-byte Reload
-	adds	r0, r0, r11
-	str	r0, [r4, #28]
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [r5, #12]
+	adds	r7, r7, r12
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	adcs	r3, r3, lr
+	ldr	r0, [r5, #16]
+	adcs	r2, r2, r11
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [r5, #20]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [r5, #24]
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [r5, #28]
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	str	r1, [sp, #156]
+	ldm	r10, {r0, r6, r10}
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r9, [r5, #56]
+	adcs	r0, r0, r1
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	str	r0, [sp, #192]
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	str	r9, [sp, #80]                   @ 4-byte Spill
+	adcs	r9, r6, r0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r10, r0
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	str	r0, [sp, #200]
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r5, [r5, #60]
 	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #32]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	str	r0, [sp, #204]
+	add	r1, sp, #180
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	str	r2, [sp, #116]                  @ 4-byte Spill
 	adcs	r0, r5, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	str	r0, [sp, #208]
+	mov	r0, #0
+	str	r2, [sp, #188]
+	adc	r10, r0, #0
+	add	r0, sp, #212
+	add	r2, sp, #148
+	ldr	r11, [sp, #104]                 @ 4-byte Reload
+	ldr	r6, [sp, #120]                  @ 4-byte Reload
+	ldr	r5, [sp, #124]                  @ 4-byte Reload
+	str	r8, [sp, #128]                  @ 4-byte Spill
+	str	r8, [sp, #148]
+	ldr	r8, [sp, #76]                   @ 4-byte Reload
+	str	r7, [sp, #136]                  @ 4-byte Spill
+	str	r7, [sp, #180]
+	ldr	r7, [sp, #108]                  @ 4-byte Reload
+	str	r3, [sp, #132]                  @ 4-byte Spill
+	str	r3, [sp, #184]
+	ldr	r3, [sp, #144]                  @ 4-byte Reload
+	str	r3, [sp, #152]
+	str	r8, [sp, #160]
+	str	r9, [sp, #196]
+	str	r11, [sp, #164]
+	str	r7, [sp, #168]
+	str	r6, [sp, #172]
+	str	r5, [sp, #176]
+	bl	mcl_fpDbl_mulPre8L
+	ldr	r2, [sp, #96]                   @ 4-byte Reload
+	rsb	r0, r10, #0
+	mov	r1, r10
+	str	r10, [sp, #92]                  @ 4-byte Spill
+	and	r10, r7, r0
+	and	r12, r5, r0
+	and	r3, r6, r0
+	and	r11, r11, r0
+	and	r7, r2, r0
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	sub	r1, r1, r1, lsl #1
+	and	r6, r0, r1
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	and	r5, r8, r1
+	str	r6, [sp, #124]                  @ 4-byte Spill
+	and	r1, r0, r1
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	str	r1, [sp, #144]                  @ 4-byte Spill
+	adds	r2, r1, r0
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	adcs	lr, r6, r0
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r6, r7, r0
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r8, r5, r0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r9, r11, r9
 	adcs	r0, r10, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r12, r1
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	mov	r1, #0
+	adc	r1, r1, #0
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	cmp	r1, #0
+	moveq	r0, r10
+	moveq	r9, r11
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	moveq	r8, r5
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	moveq	r6, r7
+	ldr	r7, [sp, #244]
+	ldr	r11, [sp, #120]                 @ 4-byte Reload
+	moveq	lr, r0
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	cmp	r1, #0
+	ldr	r5, [sp, #128]                  @ 4-byte Reload
+	moveq	r11, r3
+	moveq	r5, r12
+	moveq	r2, r0
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	ldr	r3, [sp, #92]                   @ 4-byte Reload
+	and	r10, r1, r0
+	adds	r0, r7, r2
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	and	r1, r1, r3
+	ldr	r0, [sp, #248]
+	ldr	r2, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #252]
+	add	lr, r4, #8
+	ldr	r7, [r4, #28]
+	adcs	r0, r0, r6
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #256]
+	ldm	lr, {r3, r6, lr}
+	adcs	r0, r0, r8
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #260]
+	ldm	r4, {r8, r12}
+	adcs	r0, r0, r9
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #264]
+	adcs	r0, r0, r2
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #268]
+	add	r2, sp, #216
+	adcs	r0, r0, r11
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #272]
+	ldr	r11, [r4, #24]
+	adcs	r9, r0, r5
+	ldr	r5, [sp, #212]
+	adc	r0, r1, r10
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldm	r2, {r0, r1, r2}
+	subs	r5, r5, r8
+	sbcs	r0, r0, r12
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	sbcs	r0, r1, r3
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	sbcs	r0, r2, r6
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #228]
+	ldr	r10, [r4, #20]
+	sbcs	r0, r0, lr
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #232]
+	str	r5, [sp, #88]                   @ 4-byte Spill
+	sbcs	r0, r0, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #236]
+	ldr	r10, [r4, #32]
+	sbcs	r8, r0, r11
+	ldr	r0, [sp, #240]
+	ldr	r5, [sp, #144]                  @ 4-byte Reload
+	sbcs	lr, r0, r7
+	ldr	r11, [r4, #36]
+	sbcs	r5, r5, r10
+	str	r5, [sp, #64]                   @ 4-byte Spill
+	ldr	r5, [sp, #136]                  @ 4-byte Reload
+	ldr	r3, [r4, #40]
+	sbcs	r5, r5, r11
+	str	r5, [sp, #60]                   @ 4-byte Spill
+	ldr	r5, [sp, #128]                  @ 4-byte Reload
+	str	r3, [sp, #100]                  @ 4-byte Spill
+	sbcs	r3, r5, r3
+	ldr	r2, [r4, #44]
+	str	r3, [sp, #56]                   @ 4-byte Spill
+	ldr	r3, [sp, #120]                  @ 4-byte Reload
+	str	r2, [sp, #108]                  @ 4-byte Spill
+	sbcs	r2, r3, r2
+	ldr	r1, [r4, #48]
+	str	r2, [sp, #52]                   @ 4-byte Spill
+	ldr	r2, [sp, #112]                  @ 4-byte Reload
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	sbcs	r1, r2, r1
+	ldr	r6, [r4, #52]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r7, [r4, #56]
+	sbcs	r1, r1, r6
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r0, [r4, #60]
+	sbcs	r1, r1, r7
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	sbcs	r0, r9, r0
+	str	r7, [sp, #132]                  @ 4-byte Spill
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r12, [r4, #64]
+	ldr	r7, [sp, #88]                   @ 4-byte Reload
+	sbc	r0, r0, #0
+	ldr	r9, [r4, #68]
+	subs	r7, r7, r12
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	sbcs	r7, r7, r9
+	ldr	r0, [r4, #72]
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	sbcs	r0, r7, r0
+	ldr	r3, [r4, #76]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r2, [r4, #80]
+	sbcs	r0, r0, r3
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	sbcs	r0, r0, r2
+	ldr	r1, [r4, #84]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	str	r6, [sp, #124]                  @ 4-byte Spill
+	str	r9, [sp, #92]                   @ 4-byte Spill
+	add	r9, r4, #96
+	ldr	r6, [r4, #88]
+	sbcs	r0, r0, r1
+	ldr	r5, [r4, #92]
+	str	r2, [sp, #120]                  @ 4-byte Spill
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	sbcs	r0, r8, r6
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	ldm	r9, {r1, r8, r9}
+	ldr	r2, [sp, #64]                   @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	sbcs	r0, lr, r5
+	ldr	r7, [sp, #60]                   @ 4-byte Reload
+	sbcs	r2, r2, r1
+	str	r3, [sp, #112]                  @ 4-byte Spill
+	sbcs	r7, r7, r8
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	ldr	r3, [r4, #108]
+	sbcs	r7, r7, r9
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r7, [sp, #52]                   @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	sbcs	r7, r7, r3
+	ldr	r0, [r4, #112]
+	str	r7, [sp, #56]                   @ 4-byte Spill
+	ldr	r7, [sp, #48]                   @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	sbcs	r0, r7, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	lr, [r4, #116]
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	str	r5, [sp, #144]                  @ 4-byte Spill
+	sbcs	r0, r0, lr
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r5, [r4, #120]
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	str	r6, [sp, #136]                  @ 4-byte Spill
+	sbcs	r0, r0, r5
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r6, [r4, #124]
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r7, [sp, #20]                   @ 4-byte Reload
+	sbcs	r0, r0, r6
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	sbc	r0, r0, #0
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adds	r10, r10, r0
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	str	r10, [r4, #32]
+	adcs	r11, r11, r0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	str	r11, [r4, #36]
+	adcs	r10, r0, r7
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	ldr	r7, [sp, #24]                   @ 4-byte Reload
+	str	r10, [r4, #40]
+	adcs	r11, r0, r7
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r7, [sp, #28]                   @ 4-byte Reload
+	str	r11, [r4, #44]
+	adcs	r10, r0, r7
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
+	str	r10, [r4, #48]
+	adcs	r11, r0, r7
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
+	str	r11, [r4, #52]
+	adcs	r10, r0, r7
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
+	ldr	r7, [sp, #88]                   @ 4-byte Reload
+	str	r10, [r4, #56]
+	adcs	r11, r0, r7
+	ldr	r7, [sp, #4]                    @ 4-byte Reload
+	adcs	r0, r12, r2
+	ldr	r2, [sp, #92]                   @ 4-byte Reload
+	str	r0, [r4, #64]
+	add	r12, r4, #100
+	adcs	r2, r2, r7
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r7, [sp, #16]                   @ 4-byte Reload
+	str	r2, [r4, #68]
+	adcs	r0, r0, r7
+	ldr	r2, [sp, #112]                  @ 4-byte Reload
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	str	r0, [r4, #72]
+	adcs	r2, r2, r7
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	ldr	r7, [sp, #60]                   @ 4-byte Reload
+	str	r2, [r4, #76]
+	adcs	r0, r0, r7
+	ldr	r2, [sp, #128]                  @ 4-byte Reload
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	str	r0, [r4, #80]
+	adcs	r2, r2, r7
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	str	r2, [r4, #84]
+	adcs	r0, r0, r7
+	ldr	r2, [sp, #144]                  @ 4-byte Reload
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	str	r0, [r4, #88]
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r2, r2, r7
+	str	r2, [r4, #92]
+	adcs	r10, r1, r0
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r1, r8, #0
+	str	r11, [r4, #60]
+	adcs	r2, r9, #0
+	str	r10, [r4, #96]
+	adcs	r3, r3, #0
+	adcs	r7, r0, #0
+	stm	r12, {r1, r2, r3, r7}
+	adcs	r0, lr, #0
+	adcs	r5, r5, #0
+	add	r1, r4, #116
+	adc	r6, r6, #0
+	stm	r1, {r0, r5, r6}
+	add	sp, sp, #276
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end75:
+	.size	mcl_fpDbl_mulPre16L, .Lfunc_end75-mcl_fpDbl_mulPre16L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fpDbl_sqrPre16L             @ -- Begin function mcl_fpDbl_sqrPre16L
+	.p2align	2
+	.type	mcl_fpDbl_sqrPre16L,%function
+	.code	32                              @ @mcl_fpDbl_sqrPre16L
+mcl_fpDbl_sqrPre16L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#276
+	sub	sp, sp, #276
+	mov	r2, r1
+	mov	r5, r1
+	mov	r4, r0
+	bl	mcl_fpDbl_mulPre8L
+	add	r1, r5, #32
+	add	r0, r4, #64
+	mov	r2, r1
+	bl	mcl_fpDbl_mulPre8L
+	ldr	r0, [r5, #16]
+	add	lr, r5, #44
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [r5, #20]
+	ldm	r5, {r8, r9, r10, r11}
+	ldr	r7, [r5, #32]
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [r5, #24]
+	adds	r8, r7, r8
+	ldr	r6, [r5, #36]
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [r5, #28]
+	ldr	r3, [r5, #40]
+	adcs	r5, r6, r9
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r12, lr}
+	adcs	r3, r3, r10
+	adcs	r7, r0, r11
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	str	r8, [sp, #180]
+	adcs	r11, r1, r0
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	add	r1, sp, #180
+	str	r5, [sp, #184]
+	adcs	r6, r2, r0
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
+	add	r2, sp, #148
+	str	r3, [sp, #188]
+	adcs	r9, r12, r0
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	str	r3, [sp, #128]                  @ 4-byte Spill
+	adcs	r10, lr, r0
+	add	r0, sp, #168
+	stm	r0, {r6, r9, r10}
+	add	r0, sp, #156
+	stm	r0, {r3, r7, r11}
+	mov	r0, #0
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #704
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #772]
-	add	lr, sp, #724
-	add	r12, sp, #704
-	ldr	r6, [sp, #744]
-	ldr	r5, [sp, #740]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #768]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #764]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #760]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #756]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #752]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #748]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r9, r10, r11, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r8, [sp, #52]           @ 4-byte Reload
-	adds	r0, r0, r8
-	str	r0, [r4, #32]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	add	r0, sp, #212
+	str	r7, [sp, #192]
+	str	r11, [sp, #196]
+	str	r6, [sp, #200]
+	str	r9, [sp, #204]
+	str	r10, [sp, #208]
+	str	r5, [sp, #152]
+	str	r8, [sp, #148]
+	bl	mcl_fpDbl_mulPre8L
+	ldr	r1, [sp, #144]                  @ 4-byte Reload
+	rsb	r3, r1, #0
+	sub	r0, r1, r1, lsl #1
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	and	r2, r5, r0
+	and	r12, r8, r0
+	and	r0, r7, r0
+	and	r7, r1, r3
+	and	r1, r9, r3
+	and	lr, r10, r3
+	and	r6, r6, r3
+	and	r5, r11, r3
+	lsl	r3, lr, #1
+	orr	r8, r3, r1, lsr #31
+	lsl	r1, r1, #1
+	ldr	r3, [sp, #244]
+	orr	r1, r1, r6, lsr #31
+	lsl	r6, r6, #1
+	orr	r6, r6, r5, lsr #31
+	lsl	r5, r5, #1
+	orr	r5, r5, r0, lsr #31
+	lsl	r0, r0, #1
+	orr	r0, r0, r7, lsr #31
+	lsl	r7, r7, #1
+	adds	r3, r3, r12, lsl #1
+	str	r3, [sp, #140]                  @ 4-byte Spill
+	orr	r7, r7, r2, lsr #31
+	lsl	r2, r2, #1
+	ldr	r3, [sp, #248]
+	orr	r2, r2, r12, lsr #31
+	add	r12, r4, #8
+	ldr	r9, [r4, #20]
+	adcs	r2, r3, r2
+	str	r2, [sp, #136]                  @ 4-byte Spill
+	ldr	r2, [sp, #252]
+	ldr	r11, [r4, #24]
+	adcs	r2, r2, r7
+	str	r2, [sp, #128]                  @ 4-byte Spill
+	ldr	r2, [sp, #256]
+	ldr	r7, [r4, #28]
 	adcs	r0, r2, r0
-	ldr	r2, [r7, #36]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #632
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #700]
-	add	r10, sp, #656
-	add	lr, sp, #632
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #692]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #688]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #684]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #680]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #676]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #52]          @ 4-byte Reload
-	adds	r0, r0, r11
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #260]
+	ldr	r2, [sp, #224]
+	adcs	r0, r0, r5
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #264]
+	ldr	r5, [sp, #212]
+	adcs	r0, r0, r6
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #268]
+	ldm	r12, {r3, r6, r12}
+	adcs	r0, r0, r1
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #272]
+	ldr	r1, [sp, #216]
+	adcs	r0, r0, r8
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	ldr	r8, [r4, #4]
+	adc	r0, r0, lr, lsr #31
+	ldr	lr, [r4]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	subs	r5, r5, lr
+	ldr	r0, [sp, #220]
+	sbcs	r1, r1, r8
+	ldr	r10, [sp, #140]                 @ 4-byte Reload
+	sbcs	r0, r0, r3
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	sbcs	r0, r2, r6
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #228]
+	ldr	lr, [r4, #36]
+	sbcs	r0, r0, r12
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #232]
+	ldr	r6, [sp, #136]                  @ 4-byte Reload
+	sbcs	r0, r0, r9
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #236]
+	str	r5, [sp, #80]                   @ 4-byte Spill
+	sbcs	r9, r0, r11
+	ldr	r0, [sp, #240]
+	ldr	r11, [r4, #32]
+	sbcs	r8, r0, r7
+	ldr	r12, [r4, #40]
+	sbcs	r5, r10, r11
+	ldr	r3, [r4, #44]
+	sbcs	r6, r6, lr
+	str	r6, [sp, #52]                   @ 4-byte Spill
+	ldr	r6, [sp, #128]                  @ 4-byte Reload
+	str	r3, [sp, #108]                  @ 4-byte Spill
+	sbcs	r6, r6, r12
+	str	r6, [sp, #48]                   @ 4-byte Spill
+	ldr	r6, [sp, #120]                  @ 4-byte Reload
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	sbcs	r3, r6, r3
+	ldr	r1, [r4, #48]
+	str	r3, [sp, #44]                   @ 4-byte Spill
+	ldr	r3, [sp, #112]                  @ 4-byte Reload
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	sbcs	r1, r3, r1
+	ldr	r7, [r4, #52]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r2, [r4, #56]
+	sbcs	r1, r1, r7
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r0, [r4, #60]
+	sbcs	r1, r1, r2
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	str	lr, [sp, #92]                   @ 4-byte Spill
+	add	lr, r4, #64
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	sbcs	r0, r1, r0
+	str	r5, [sp, #56]                   @ 4-byte Spill
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	str	r7, [sp, #124]                  @ 4-byte Spill
+	str	r2, [sp, #132]                  @ 4-byte Spill
+	sbc	r0, r0, #0
+	ldm	lr, {r2, r7, lr}
+	ldr	r5, [sp, #80]                   @ 4-byte Reload
+	str	r12, [sp, #100]                 @ 4-byte Spill
+	subs	r5, r5, r2
+	str	r5, [sp, #12]                   @ 4-byte Spill
+	ldr	r5, [sp, #76]                   @ 4-byte Reload
+	ldr	r12, [r4, #76]
+	sbcs	r5, r5, r7
+	str	r5, [sp, #8]                    @ 4-byte Spill
+	ldr	r5, [sp, #72]                   @ 4-byte Reload
+	ldr	r3, [r4, #80]
+	sbcs	r5, r5, lr
+	str	r5, [sp, #20]                   @ 4-byte Spill
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	str	r3, [sp, #120]                  @ 4-byte Spill
+	sbcs	r5, r5, r12
+	str	r5, [sp, #24]                   @ 4-byte Spill
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [r4, #84]
+	sbcs	r3, r5, r3
+	str	r3, [sp, #28]                   @ 4-byte Spill
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	ldr	r6, [r4, #88]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	str	lr, [sp, #104]                  @ 4-byte Spill
+	add	lr, r4, #96
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	sbcs	r1, r3, r1
+	ldr	r0, [r4, #92]
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	sbcs	r1, r9, r6
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	sbcs	r0, r8, r0
+	str	r12, [sp, #112]                 @ 4-byte Spill
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldm	lr, {r1, r8, r9, r10, r12, lr}
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	ldr	r5, [sp, #52]                   @ 4-byte Reload
+	sbcs	r3, r3, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	sbcs	r5, r5, r8
+	str	r5, [sp, #4]                    @ 4-byte Spill
+	ldr	r5, [sp, #48]                   @ 4-byte Reload
+	ldr	r0, [r4, #120]
+	sbcs	r5, r5, r9
+	str	r5, [sp, #16]                   @ 4-byte Spill
+	ldr	r5, [sp, #44]                   @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	sbcs	r5, r5, r10
+	str	r5, [sp, #48]                   @ 4-byte Spill
+	ldr	r5, [sp, #40]                   @ 4-byte Reload
+	str	r6, [sp, #136]                  @ 4-byte Spill
+	sbcs	r5, r5, r12
+	str	r5, [sp, #52]                   @ 4-byte Spill
+	ldr	r5, [sp, #36]                   @ 4-byte Reload
+	ldr	r6, [r4, #124]
+	sbcs	r5, r5, lr
+	str	r5, [sp, #56]                   @ 4-byte Spill
+	ldr	r5, [sp, #32]                   @ 4-byte Reload
+	str	r6, [sp, #68]                   @ 4-byte Spill
+	sbcs	r0, r5, r0
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r5, [sp, #8]                    @ 4-byte Reload
+	sbcs	r0, r0, r6
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r6, [sp, #20]                   @ 4-byte Reload
+	sbc	r0, r0, #0
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adds	r11, r11, r0
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	str	r11, [r4, #32]
+	adcs	r0, r0, r5
+	ldr	r5, [sp, #100]                  @ 4-byte Reload
 	str	r0, [r4, #36]
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [r7, #40]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #560
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #628]
-	add	r7, sp, #596
-	add	lr, sp, #580
-	add	r12, sp, #560
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #624]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #620]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #616]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #612]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #608]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r7, {r5, r6, r7}
-	ldm	lr, {r9, r10, r11, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r8, [sp, #52]           @ 4-byte Reload
-	adds	r0, r0, r8
-	str	r0, [r4, #40]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	ldr	r2, [r1, #44]
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, sp, #488
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #556]
-	add	r10, sp, #512
-	add	lr, sp, #488
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #552]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #548]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #544]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #540]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #536]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #44]          @ 4-byte Reload
-	adds	r0, r0, r11
+	adcs	r11, r5, r6
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	ldr	r5, [sp, #24]                   @ 4-byte Reload
+	ldr	r6, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	ldr	r5, [sp, #116]                  @ 4-byte Reload
+	str	r11, [r4, #40]
+	adcs	r11, r5, r6
 	str	r0, [r4, #44]
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r5, [sp, #60]                   @ 4-byte Reload
+	ldr	r6, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	ldr	r5, [sp, #132]                  @ 4-byte Reload
+	str	r11, [r4, #48]
+	adcs	r11, r5, r6
+	str	r0, [r4, #52]
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	ldr	r5, [sp, #80]                   @ 4-byte Reload
+	str	r11, [r4, #56]
+	adcs	r0, r0, r5
+	str	r0, [r4, #60]
+	adcs	r2, r2, r3
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
+	str	r2, [r4, #64]
+	ldr	r2, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r7, r0
+	ldr	r3, [sp, #16]                   @ 4-byte Reload
+	str	r0, [r4, #68]
+	adcs	r2, r2, r3
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	str	r2, [r4, #72]
+	adcs	r0, r0, r3
+	ldr	r2, [sp, #120]                  @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	str	r0, [r4, #76]
+	adcs	r2, r2, r3
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	str	r2, [r4, #80]
+	adcs	r0, r0, r3
+	ldr	r2, [sp, #136]                  @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	str	r0, [r4, #84]
+	adcs	r2, r2, r3
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	str	r2, [r4, #88]
+	ldr	r2, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [r4, #92]
+	adcs	r11, r1, r2
+	ldr	r5, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r8, #0
+	ldr	r6, [sp, #68]                   @ 4-byte Reload
+	adcs	r2, r9, #0
+	str	r11, [r4, #96]
+	adcs	r3, r10, #0
+	adcs	r7, r12, #0
+	add	r12, r4, #100
+	adcs	r1, lr, #0
+	stm	r12, {r0, r2, r3, r7}
+	adcs	r5, r5, #0
+	add	r0, r4, #116
+	adc	r6, r6, #0
+	stm	r0, {r1, r5, r6}
+	add	sp, sp, #276
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end76:
+	.size	mcl_fpDbl_sqrPre16L, .Lfunc_end76-mcl_fpDbl_sqrPre16L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_mont16L                  @ -- Begin function mcl_fp_mont16L
+	.p2align	2
+	.type	mcl_fp_mont16L,%function
+	.code	32                              @ @mcl_fp_mont16L
+mcl_fp_mont16L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#404
+	sub	sp, sp, #404
+	.pad	#2048
+	sub	sp, sp, #2048
+	mov	r7, r2
+	ldr	r2, [r2]
+	add	lr, sp, #2048
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	add	r0, lr, #328
+	ldr	r5, [r3, #-4]
+	mov	r4, r3
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	str	r5, [sp, #128]                  @ 4-byte Spill
+	str	r3, [sp, #140]                  @ 4-byte Spill
+	str	r7, [sp, #132]                  @ 4-byte Spill
+	bl	mulPv512x32
+	ldr	r0, [sp, #2376]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #2380]
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	mul	r2, r5, r0
+	ldr	r1, [sp, #2384]
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #2388]
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	mov	r1, r4
+	ldr	r0, [sp, #2440]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #2436]
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #2432]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #2428]
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #2424]
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #2420]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #2416]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #2412]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #2408]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #2404]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #2400]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #2396]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #2392]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	add	r0, sp, #2304
+	bl	mulPv512x32
+	ldr	r0, [sp, #2368]
+	add	lr, sp, #2048
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #2364]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #2360]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #2356]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #2352]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #2348]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #2344]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #2340]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #2336]
+	ldr	r2, [r7, #4]
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	add	r0, lr, #184
+	ldr	r10, [sp, #2332]
+	ldr	r5, [sp, #2328]
+	ldr	r8, [sp, #2324]
+	ldr	r9, [sp, #2320]
+	ldr	r11, [sp, #2304]
+	ldr	r4, [sp, #2308]
+	ldr	r6, [sp, #2312]
+	ldr	r7, [sp, #2316]
+	bl	mulPv512x32
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	adds	r0, r11, r0
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r4, r0
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r11, [sp, #96]                  @ 4-byte Reload
 	adcs	r0, r6, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r6, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r7, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r9, r0
-	ldr	r9, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [r9, #48]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r8, r0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r10, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	adcs	r1, r2, r1
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	mov	r0, #0
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #60]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #416
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #484]
-	add	r10, sp, #444
-	add	lr, sp, #420
-	mov	r8, r4
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #480]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #476]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #472]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #468]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #464]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #460]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #456]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r10}
-	ldr	r11, [sp, #440]
-	ldr	r12, [sp, #416]
-	ldm	lr, {r0, r1, r2, r3, lr}
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	adds	r7, r12, r7
-	str	r7, [r4, #48]
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	mov	r4, r9
-	add	r9, sp, #344
-	ldr	r12, [r4, #52]
-	adcs	r7, r0, r7
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	mov	r2, r12
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #2296]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #2292]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #2288]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #2284]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #2280]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r7, [sp, #2232]
+	ldr	r0, [sp, #2236]
+	adds	r7, r11, r7
+	ldr	r10, [sp, #2276]
 	adcs	r0, r6, r0
-	mov	r6, r4
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	ldr	r9, [sp, #2272]
+	ldr	r8, [sp, #2268]
+	ldr	r5, [sp, #2264]
+	ldr	r4, [sp, #2260]
+	ldr	lr, [sp, #2256]
+	ldr	r12, [sp, #2252]
+	ldr	r3, [sp, #2248]
+	ldr	r1, [sp, #2240]
+	ldr	r2, [sp, #2244]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	mov	r0, r9
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #412]
-	add	r11, sp, #368
-	add	r12, sp, #348
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #408]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #404]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #400]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #396]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #392]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #388]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r9, r10, r11}
-	ldr	lr, [sp, #344]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	adds	r7, lr, r7
-	str	r7, [r8, #52]
-	mov	r7, r6
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	add	r8, sp, #272
-	ldr	lr, [r7, #56]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #2160
+	bl	mulPv512x32
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	add	r11, sp, #2160
+	add	lr, sp, #2048
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r2, [r0, #8]
+	ldr	r0, [sp, #2224]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #2220]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #2216]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #2212]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #2208]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #2204]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #2200]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #2196]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #2192]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, lr, #40
+	ldm	r11, {r8, r9, r10, r11}
+	ldr	r4, [sp, #2188]
+	ldr	r5, [sp, #2184]
+	ldr	r6, [sp, #2180]
+	ldr	r7, [sp, #2176]
+	bl	mulPv512x32
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r8
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	add	r10, sp, #2128
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r6, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #2152]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #2148]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #2144]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #2140]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [sp, #2088]
+	ldr	r0, [sp, #2092]
+	adds	r7, r6, r7
+	ldr	r6, [sp, #120]                  @ 4-byte Reload
+	ldm	r10, {r8, r9, r10}
+	adcs	r0, r6, r0
+	ldr	r11, [sp, #2124]
+	ldr	r5, [sp, #2120]
+	ldr	r4, [sp, #2116]
+	ldr	lr, [sp, #2112]
+	ldr	r12, [sp, #2108]
+	ldr	r3, [sp, #2104]
+	ldr	r1, [sp, #2096]
+	ldr	r2, [sp, #2100]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #2016
+	bl	mulPv512x32
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	add	r11, sp, #2016
+	add	lr, sp, #1024
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r2, [r0, #12]
+	ldr	r0, [sp, #2080]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #2076]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #2072]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #2068]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #2064]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #2060]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #2056]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #2052]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #2048]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, lr, #920
+	ldm	r11, {r8, r9, r10, r11}
+	ldr	r4, [sp, #2044]
+	ldr	r5, [sp, #2040]
+	ldr	r6, [sp, #2036]
+	ldr	r7, [sp, #2032]
+	bl	mulPv512x32
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r8
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	add	r10, sp, #1984
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r6, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #2008]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #2004]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #2000]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1996]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [sp, #1944]
+	ldr	r0, [sp, #1948]
+	adds	r7, r6, r7
+	ldr	r6, [sp, #120]                  @ 4-byte Reload
+	ldm	r10, {r8, r9, r10}
+	adcs	r0, r6, r0
+	ldr	r11, [sp, #1980]
+	ldr	r5, [sp, #1976]
+	ldr	r4, [sp, #1972]
+	ldr	lr, [sp, #1968]
+	ldr	r12, [sp, #1964]
+	ldr	r3, [sp, #1960]
+	ldr	r1, [sp, #1952]
+	ldr	r2, [sp, #1956]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #1872
+	bl	mulPv512x32
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	add	r11, sp, #1872
+	add	lr, sp, #1024
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r2, [r0, #16]
+	ldr	r0, [sp, #1936]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1932]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1928]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1924]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1920]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1916]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1912]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1908]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1904]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, lr, #776
+	ldm	r11, {r8, r9, r10, r11}
+	ldr	r4, [sp, #1900]
+	ldr	r5, [sp, #1896]
+	ldr	r6, [sp, #1892]
+	ldr	r7, [sp, #1888]
+	bl	mulPv512x32
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r8
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	add	r10, sp, #1840
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r6
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	mov	r2, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r7
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r6, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	mov	r0, r8
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #340]
-	add	r8, sp, #308
-	add	lr, sp, #292
-	add	r12, sp, #272
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #336]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #332]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #328]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #324]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r8, {r5, r6, r7, r8}
-	ldm	lr, {r9, r10, r11, lr}
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r4, [sp, #48]           @ 4-byte Reload
-	adds	r0, r0, r4
-	ldr	r4, [sp, #124]          @ 4-byte Reload
-	str	r0, [r4, #56]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r12, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #1864]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1860]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1856]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1852]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [sp, #1800]
+	ldr	r0, [sp, #1804]
+	adds	r7, r6, r7
+	ldr	r6, [sp, #120]                  @ 4-byte Reload
+	ldm	r10, {r8, r9, r10}
 	adcs	r0, r6, r0
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r2, [r6, #60]
-	adcs	r0, r7, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #200
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #268]
-	add	r9, sp, #232
-	add	lr, sp, #204
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #264]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #260]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #256]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #252]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #248]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #244]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r9, {r5, r8, r9}
-	ldr	r10, [sp, #228]
-	ldr	r12, [sp, #200]
-	ldm	lr, {r0, r1, r2, r3, r11, lr}
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	adds	r7, r12, r7
-	ldr	r12, [r6, #64]
-	str	r7, [r4, #60]
-	ldr	r4, [sp, #116]          @ 4-byte Reload
-	adcs	r7, r0, r4
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r4, r1, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adcs	r0, r2, r0
-	mov	r2, r12
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	mov	r1, r6
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
+	ldr	r11, [sp, #1836]
+	ldr	r5, [sp, #1832]
+	ldr	r4, [sp, #1828]
+	ldr	lr, [sp, #1824]
+	ldr	r12, [sp, #1820]
+	ldr	r3, [sp, #1816]
+	ldr	r1, [sp, #1808]
+	ldr	r2, [sp, #1812]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	add	r0, sp, #128
-	bl	.LmulPv544x32(PLT)
-	add	r3, sp, #128
-	add	r11, sp, #164
-	add	lr, sp, #144
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r7, r0, r7
-	ldr	r0, [sp, #8]            @ 4-byte Reload
-	adcs	r6, r1, r4
-	adcs	r5, r2, r0
-	ldr	r0, [sp, #4]            @ 4-byte Reload
-	adcs	r4, r3, r0
-	ldr	r0, [sp, #196]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #192]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #184]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #180]
-	str	r0, [sp, #64]           @ 4-byte Spill
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #1728
+	bl	mulPv512x32
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	add	r11, sp, #1728
+	add	lr, sp, #1024
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r2, [r0, #20]
+	ldr	r0, [sp, #1792]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1788]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1784]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1780]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1776]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1772]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1768]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1764]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1760]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, lr, #632
 	ldm	r11, {r8, r9, r10, r11}
-	ldm	lr, {r0, r2, r3, r12, lr}
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r7, [r1, #64]
-	str	r6, [r1, #68]
-	str	r5, [r1, #72]
-	ldr	r5, [sp, #40]           @ 4-byte Reload
-	str	r4, [r1, #76]
-	ldr	r4, [sp, #44]           @ 4-byte Reload
+	ldr	r4, [sp, #1756]
+	ldr	r5, [sp, #1752]
+	ldr	r6, [sp, #1748]
+	ldr	r7, [sp, #1744]
+	bl	mulPv512x32
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r8
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	add	r10, sp, #1696
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r6, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [r1, #80]
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r2, r2, r4
-	str	r2, [r1, #84]
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r3, r0
-	str	r0, [r1, #88]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r2, r12, r2
-	str	r2, [r1, #92]
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	adcs	r0, lr, r0
-	str	r0, [r1, #96]
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r2, r8, r2
-	str	r2, [r1, #100]
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [r1, #104]
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r2, r10, r2
-	str	r2, [r1, #108]
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [r1, #112]
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r0, [r1, #116]
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	str	r0, [r1, #120]
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	str	r0, [r1, #124]
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r2, r0
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	str	r0, [r1, #128]
-	adc	r2, r2, #0
-	str	r2, [r1, #132]
-	add	sp, sp, #332
-	add	sp, sp, #1024
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end257:
-	.size	mcl_fpDbl_sqrPre17L, .Lfunc_end257-mcl_fpDbl_sqrPre17L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_mont17L
-	.align	2
-	.type	mcl_fp_mont17L,%function
-mcl_fp_mont17L:                         @ @mcl_fp_mont17L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#556
-	sub	sp, sp, #556
-	.pad	#2048
-	sub	sp, sp, #2048
-	add	r12, sp, #140
-	mov	r4, r3
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #96]           @ 4-byte Spill
-	add	r0, sp, #2528
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	str	r5, [sp, #136]          @ 4-byte Spill
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #2528]
-	ldr	r1, [sp, #2532]
-	mul	r2, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #2596]
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #2536]
-	add	r5, sp, #2048
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #2592]
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #2540]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #2588]
-	str	r1, [sp, #92]           @ 4-byte Spill
-	mov	r1, r4
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #2584]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #2580]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #2576]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #2572]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #2568]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #2564]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #2560]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #2556]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #2552]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #2548]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2544]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, r5, #408
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #2524]
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r4, [sp, #2484]
-	ldr	r10, [sp, #2480]
-	ldr	r6, [sp, #2476]
-	ldr	r7, [sp, #2472]
-	ldr	r11, [sp, #2456]
-	ldr	r9, [sp, #2460]
-	ldr	r5, [sp, #2464]
-	ldr	r8, [sp, #2468]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #2520]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #2516]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2512]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2508]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2504]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2500]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2496]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2492]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2488]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r2, [r0, #4]
-	add	r0, sp, #2384
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	ldr	r3, [sp, #2400]
-	ldr	r12, [sp, #2404]
-	ldr	lr, [sp, #2408]
-	adds	r0, r11, r0
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r9, r0
-	ldr	r9, [sp, #2424]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r11, [sp, #104]         @ 4-byte Reload
-	adcs	r0, r5, r0
-	ldr	r5, [sp, #2416]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r8, r0
-	ldr	r8, [sp, #2384]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r7, r0
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #1720]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1716]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1712]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1708]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [sp, #1656]
+	ldr	r0, [sp, #1660]
+	adds	r7, r6, r7
+	ldr	r6, [sp, #120]                  @ 4-byte Reload
+	ldm	r10, {r8, r9, r10}
 	adcs	r0, r6, r0
-	ldr	r6, [sp, #2420]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r10, r0
-	ldr	r10, [sp, #2428]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	ldr	r4, [sp, #2412]
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
+	ldr	r11, [sp, #1692]
+	ldr	r5, [sp, #1688]
+	ldr	r4, [sp, #1684]
+	ldr	lr, [sp, #1680]
+	ldr	r12, [sp, #1676]
+	ldr	r3, [sp, #1672]
+	ldr	r1, [sp, #1664]
+	ldr	r2, [sp, #1668]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
 	mov	r0, #0
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	adcs	r1, r2, r1
-	ldr	r2, [sp, #2396]
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #1584
+	bl	mulPv512x32
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	add	r11, sp, #1584
+	add	lr, sp, #1024
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r2, [r0, #24]
+	ldr	r0, [sp, #1648]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1644]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1640]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1636]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1632]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1628]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1624]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1620]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1616]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, lr, #488
+	ldm	r11, {r8, r9, r10, r11}
+	ldr	r4, [sp, #1612]
+	ldr	r5, [sp, #1608]
+	ldr	r6, [sp, #1604]
+	ldr	r7, [sp, #1600]
+	bl	mulPv512x32
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r8
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	add	r10, sp, #1552
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r6, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	adds	r8, r11, r8
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #2392]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2452]
-	str	r8, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2448]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2444]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2440]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #2436]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #2432]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #2388]
-	adcs	r0, r7, r0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #1576]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1572]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1568]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1564]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [sp, #1512]
+	ldr	r0, [sp, #1516]
+	adds	r7, r6, r7
+	ldr	r6, [sp, #120]                  @ 4-byte Reload
+	ldm	r10, {r8, r9, r10}
+	adcs	r0, r6, r0
+	ldr	r11, [sp, #1548]
+	ldr	r5, [sp, #1544]
+	ldr	r4, [sp, #1540]
+	ldr	lr, [sp, #1536]
+	ldr	r12, [sp, #1532]
+	ldr	r3, [sp, #1528]
+	ldr	r1, [sp, #1520]
+	ldr	r2, [sp, #1524]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	str	r7, [sp, #36]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r4
-	add	r4, sp, #2048
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
+	str	r0, [sp, #60]                   @ 4-byte Spill
 	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, r4, #264
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #2380]
-	add	r10, sp, #2320
-	ldr	r7, [sp, #2340]
-	ldr	r6, [sp, #2336]
-	ldr	r4, [sp, #2312]
-	ldr	r11, [sp, #2316]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2376]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2372]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2368]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2364]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2360]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2356]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2352]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2348]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2344]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, sp, #2240
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #2252]
-	ldr	r3, [sp, #2256]
-	ldr	r12, [sp, #2260]
-	ldr	lr, [sp, #2264]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #2268]
-	ldr	r0, [sp, #132]          @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #1440
+	bl	mulPv512x32
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	add	r11, sp, #1440
+	add	lr, sp, #1024
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r2, [r0, #28]
+	ldr	r0, [sp, #1504]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1500]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1496]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1492]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1488]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1484]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1480]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1476]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1472]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, lr, #344
+	ldm	r11, {r8, r9, r10, r11}
+	ldr	r4, [sp, #1468]
+	ldr	r5, [sp, #1464]
+	ldr	r6, [sp, #1460]
+	ldr	r7, [sp, #1456]
+	bl	mulPv512x32
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r8
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	add	r10, sp, #1408
 	adcs	r0, r0, r11
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r11, [sp, #132]         @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r6, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #1432]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1428]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1424]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1420]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [sp, #1368]
+	ldr	r0, [sp, #1372]
+	adds	r7, r6, r7
+	ldr	r6, [sp, #120]                  @ 4-byte Reload
+	ldm	r10, {r8, r9, r10}
+	adcs	r0, r6, r0
+	ldr	r11, [sp, #1404]
+	ldr	r5, [sp, #1400]
+	ldr	r4, [sp, #1396]
+	ldr	lr, [sp, #1392]
+	ldr	r12, [sp, #1388]
+	ldr	r3, [sp, #1384]
+	ldr	r1, [sp, #1376]
+	ldr	r2, [sp, #1380]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	ldr	r5, [sp, #2272]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	ldr	r8, [sp, #2240]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	ldr	r9, [sp, #2280]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	ldr	r10, [sp, #2284]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #2276]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r7
+	add	r0, sp, #1296
+	bl	mulPv512x32
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	add	r11, sp, #1296
+	add	lr, sp, #1024
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r2, [r0, #32]
+	ldr	r0, [sp, #1360]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1356]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1352]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1348]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1344]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1340]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1336]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1332]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1328]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, lr, #200
+	ldm	r11, {r8, r9, r10, r11}
+	ldr	r4, [sp, #1324]
+	ldr	r5, [sp, #1320]
+	ldr	r6, [sp, #1316]
+	ldr	r7, [sp, #1312]
+	bl	mulPv512x32
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r8
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	add	r10, sp, #1264
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r7
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r6, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #1288]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1284]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1280]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1276]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r7, [sp, #1224]
+	ldr	r0, [sp, #1228]
+	adds	r7, r6, r7
+	ldr	r6, [sp, #120]                  @ 4-byte Reload
+	ldm	r10, {r8, r9, r10}
+	adcs	r0, r6, r0
+	ldr	r11, [sp, #1260]
+	ldr	r5, [sp, #1256]
+	ldr	r4, [sp, #1252]
+	ldr	lr, [sp, #1248]
+	ldr	r12, [sp, #1244]
+	ldr	r3, [sp, #1240]
+	ldr	r1, [sp, #1232]
+	ldr	r2, [sp, #1236]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	str	r7, [sp, #36]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r5, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	mul	r2, r5, r7
+	adcs	r0, r0, r8
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	add	r0, sp, #1152
+	bl	mulPv512x32
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	add	r11, sp, #1152
+	add	lr, sp, #1024
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r2, [r0, #36]
+	ldr	r0, [sp, #1216]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1212]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1208]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1204]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1200]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1196]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1192]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1188]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1184]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1180]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, lr, #56
+	ldm	r11, {r8, r9, r10, r11}
+	ldr	r4, [sp, #1176]
+	ldr	r6, [sp, #1172]
+	ldr	r7, [sp, #1168]
+	bl	mulPv512x32
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	adds	r0, r0, r8
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	adcs	r1, r1, r10
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	add	r10, sp, #1120
+	adcs	r1, r1, r11
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	adcs	r1, r1, r7
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	adcs	r1, r1, r6
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r1, r1, r4
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r4, [sp, #1080]
+	ldr	r6, [sp, #1084]
+	adds	r0, r0, r4
+	ldr	r7, [sp, #1088]
+	ldr	r11, [sp, #1092]
+	mul	r1, r5, r0
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r5, [sp, #120]                  @ 4-byte Reload
+	adcs	r6, r5, r6
+	ldr	r5, [sp, #116]                  @ 4-byte Reload
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1144]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1140]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1136]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldm	r10, {r4, r8, r9, r10}
+	ldr	lr, [sp, #1116]
+	ldr	r12, [sp, #1112]
+	ldr	r3, [sp, #1108]
+	ldr	r2, [sp, #1104]
+	ldr	r1, [sp, #1100]
+	ldr	r0, [sp, #1096]
+	str	r6, [sp, #120]                  @ 4-byte Spill
+	adcs	r6, r5, r7
+	ldr	r5, [sp, #112]                  @ 4-byte Reload
+	str	r6, [sp, #116]                  @ 4-byte Spill
+	adcs	r6, r5, r11
+	ldr	r5, [sp, #104]                  @ 4-byte Reload
+	str	r6, [sp, #112]                  @ 4-byte Spill
+	adcs	r0, r5, r0
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #2248]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	adds	r8, r11, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #2308]
-	str	r8, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2304]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2300]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2296]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2292]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2288]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2244]
-	adcs	r0, r7, r0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	add	r0, sp, #1008
+	bl	mulPv512x32
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	add	r11, sp, #1008
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r2, [r0, #40]
+	ldr	r0, [sp, #1072]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1068]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1064]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1060]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1056]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1052]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1048]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1044]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1040]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	add	r0, sp, #936
+	ldm	r11, {r8, r9, r10, r11}
+	ldr	r4, [sp, #1036]
+	ldr	r5, [sp, #1032]
+	ldr	r6, [sp, #1028]
+	ldr	r7, [sp, #1024]
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	add	lr, sp, #952
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adds	r0, r0, r8
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	adcs	r1, r1, r10
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	add	r10, sp, #976
+	adcs	r1, r1, r11
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r11, [sp, #120]                 @ 4-byte Reload
+	adcs	r1, r1, r7
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	add	r7, sp, #936
+	adcs	r1, r1, r6
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	adcs	r1, r1, r5
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	adcs	r1, r1, r4
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldm	r7, {r4, r6, r7}
+	adds	r1, r0, r4
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	ldr	r5, [sp, #948]
+	adcs	r6, r11, r6
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	mul	r2, r0, r1
+	str	r2, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1000]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #996]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #992]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldm	r10, {r4, r8, r9, r10}
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	str	r6, [sp, #76]                   @ 4-byte Spill
+	ldr	r6, [sp, #116]                  @ 4-byte Reload
+	adcs	r6, r6, r7
+	str	r6, [sp, #72]                   @ 4-byte Spill
+	ldr	r6, [sp, #112]                  @ 4-byte Reload
+	adcs	r5, r6, r5
+	str	r5, [sp, #68]                   @ 4-byte Spill
+	ldr	r5, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r4
-	add	r4, sp, #2048
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
+	str	r0, [sp, #84]                   @ 4-byte Spill
 	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, r4, #120
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #2236]
-	add	r10, sp, #2176
-	ldr	r7, [sp, #2196]
-	ldr	r6, [sp, #2192]
-	ldr	r4, [sp, #2168]
-	ldr	r11, [sp, #2172]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2232]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2228]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2224]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2220]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2216]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2212]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2208]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2204]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2200]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, sp, #2096
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #2108]
-	ldr	r3, [sp, #2112]
-	ldr	r12, [sp, #2116]
-	ldr	lr, [sp, #2120]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #2124]
-	ldr	r0, [sp, #132]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	add	r0, sp, #864
+	bl	mulPv512x32
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	add	r11, sp, #864
+	add	r0, sp, #792
+	ldr	r2, [r1, #44]
+	ldr	r1, [sp, #928]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #924]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #920]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #916]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #912]
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #908]
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #904]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #900]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #896]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldm	r11, {r9, r10, r11}
+	ldr	r6, [sp, #892]
+	ldr	r7, [sp, #888]
+	ldr	r4, [sp, #884]
+	ldr	r8, [sp, #880]
+	ldr	r5, [sp, #876]
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	add	lr, sp, #796
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r9
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	add	r10, sp, #820
 	adcs	r0, r0, r11
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r11, [sp, #132]         @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r11, [sp, #124]                 @ 4-byte Reload
 	adcs	r0, r0, r5
-	ldr	r5, [sp, #2128]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	ldr	r8, [sp, #2096]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #2136]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #2140]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #2132]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r7
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #2104]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	adds	r8, r11, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #2164]
-	str	r8, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2160]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2156]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2152]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2148]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2144]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2100]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #856]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #852]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #848]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #844]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #840]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #792]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r11, r6
 	adcs	r0, r7, r0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	ldm	r10, {r4, r5, r8, r9, r10}
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	str	r6, [sp, #32]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
+	str	r0, [sp, #88]                   @ 4-byte Spill
 	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, r4, #1000
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #2092]
-	add	r10, sp, #2032
-	ldr	r7, [sp, #2052]
-	ldr	r6, [sp, #2048]
-	ldr	r4, [sp, #2024]
-	ldr	r11, [sp, #2028]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2088]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2084]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2080]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2076]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2072]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2068]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2064]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2060]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2056]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, sp, #1952
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1964]
-	ldr	r3, [sp, #1968]
-	ldr	r12, [sp, #1972]
-	ldr	lr, [sp, #1976]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1980]
-	ldr	r0, [sp, #132]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, sp, #720
+	bl	mulPv512x32
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	add	r10, sp, #728
+	add	r0, sp, #648
+	ldr	r2, [r1, #48]
+	ldr	r1, [sp, #784]
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #780]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #776]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #772]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #768]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #764]
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #760]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #756]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #752]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldm	r10, {r4, r6, r8, r10}
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r7, [sp, #748]
+	ldr	r5, [sp, #744]
+	ldr	r9, [sp, #720]
+	ldr	r11, [sp, #724]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	add	lr, sp, #652
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r9
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r11
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r11, [sp, #132]         @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1984]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r11, [sp, #80]                  @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	ldr	r8, [sp, #1952]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1992]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	ldr	r10, [sp, #1996]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1988]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	add	r10, sp, #676
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r7
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1960]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	adds	r8, r11, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #2020]
-	str	r8, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2016]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2012]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2008]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2004]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2000]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1956]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #712]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #708]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #704]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #700]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #696]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r6, [sp, #648]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r11, r6
 	adcs	r0, r7, r0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	ldm	r10, {r4, r5, r8, r9, r10}
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	str	r6, [sp, #32]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
+	str	r0, [sp, #88]                   @ 4-byte Spill
 	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, r4, #856
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1948]
-	add	r10, sp, #1888
-	ldr	r7, [sp, #1908]
-	ldr	r6, [sp, #1904]
-	ldr	r4, [sp, #1880]
-	ldr	r11, [sp, #1884]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1944]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1940]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1936]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1932]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1928]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1924]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1920]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1916]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1912]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, sp, #1808
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1820]
-	ldr	r3, [sp, #1824]
-	ldr	r12, [sp, #1828]
-	ldr	lr, [sp, #1832]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1836]
-	ldr	r0, [sp, #132]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, sp, #576
+	bl	mulPv512x32
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	add	r10, sp, #584
+	add	r0, sp, #504
+	ldr	r2, [r1, #52]
+	ldr	r1, [sp, #640]
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #636]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #632]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #628]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #624]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #620]
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #616]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #612]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #608]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldm	r10, {r4, r6, r8, r10}
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r7, [sp, #604]
+	ldr	r5, [sp, #600]
+	ldr	r9, [sp, #576]
+	ldr	r11, [sp, #580]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	add	lr, sp, #508
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r9
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r11
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r11, [sp, #132]         @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1840]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r11, [sp, #80]                  @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	ldr	r8, [sp, #1808]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1848]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	ldr	r10, [sp, #1852]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1844]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	add	r10, sp, #532
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r7
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1816]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	adds	r8, r11, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1876]
-	str	r8, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1872]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1868]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1864]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1860]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1856]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1812]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #568]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #564]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #560]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #556]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #552]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r6, [sp, #504]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r11, r6
 	adcs	r0, r7, r0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	ldm	r10, {r4, r5, r8, r9, r10}
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	str	r6, [sp, #32]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
+	str	r0, [sp, #88]                   @ 4-byte Spill
 	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, r4, #712
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1804]
-	add	r10, sp, #1744
-	ldr	r7, [sp, #1764]
-	ldr	r6, [sp, #1760]
-	ldr	r4, [sp, #1736]
-	ldr	r11, [sp, #1740]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1800]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1796]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1792]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1788]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1784]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1780]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1776]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1772]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1768]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, sp, #1664
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1676]
-	ldr	r3, [sp, #1680]
-	ldr	r12, [sp, #1684]
-	ldr	lr, [sp, #1688]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1692]
-	ldr	r0, [sp, #132]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, sp, #432
+	bl	mulPv512x32
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	add	r10, sp, #440
+	add	r0, sp, #360
+	ldr	r2, [r1, #56]
+	ldr	r1, [sp, #496]
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #492]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #488]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #484]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #480]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #476]
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #472]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #468]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #464]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldm	r10, {r4, r6, r8, r10}
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r7, [sp, #460]
+	ldr	r5, [sp, #456]
+	ldr	r9, [sp, #432]
+	ldr	r11, [sp, #436]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	add	lr, sp, #364
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adds	r0, r0, r9
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r11
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r11, [sp, #132]         @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1696]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r11, [sp, #80]                  @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	ldr	r8, [sp, #1664]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1704]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	ldr	r10, [sp, #1708]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1700]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	add	r10, sp, #388
+	adcs	r0, r0, r5
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r7
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1672]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	adds	r8, r11, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1732]
-	str	r8, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1728]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1724]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1720]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1716]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1712]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1668]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #424]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #420]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #416]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #412]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #408]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r6, [sp, #360]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r11, r6
 	adcs	r0, r7, r0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	ldm	r10, {r4, r5, r8, r9, r10}
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	str	r6, [sp, #36]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
+	str	r0, [sp, #88]                   @ 4-byte Spill
 	mov	r0, #0
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, r4, #568
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1660]
-	add	r10, sp, #1600
-	ldr	r7, [sp, #1620]
-	ldr	r6, [sp, #1616]
-	ldr	r4, [sp, #1592]
-	ldr	r11, [sp, #1596]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1656]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1652]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1648]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1644]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1640]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1636]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1632]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1628]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1624]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, sp, #1520
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1532]
-	ldr	r3, [sp, #1536]
-	ldr	r12, [sp, #1540]
-	ldr	lr, [sp, #1544]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1548]
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r11, [sp, #132]         @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1552]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1520]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, sp, #288
+	bl	mulPv512x32
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	add	r0, sp, #216
+	ldr	r2, [r1, #60]
+	ldr	r1, [sp, #352]
+	str	r1, [sp, #132]                  @ 4-byte Spill
+	ldr	r1, [sp, #348]
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #344]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #340]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #336]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #332]
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #328]
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #324]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #320]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	ldr	r5, [sp, #316]
+	ldr	r10, [sp, #312]
+	ldr	r8, [sp, #308]
+	ldr	r9, [sp, #304]
+	ldr	r11, [sp, #288]
+	ldr	r4, [sp, #292]
+	ldr	r7, [sp, #296]
+	ldr	r6, [sp, #300]
+	bl	mulPv512x32
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	add	lr, sp, #232
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adds	r0, r0, r11
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r2, r0, r4
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r11, [sp, #136]                 @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	ldr	r9, [sp, #1560]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	ldr	r10, [sp, #1564]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1556]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r5, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #1528]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
 	adc	r0, r0, #0
-	adds	r8, r11, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1588]
-	str	r8, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1584]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1580]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1576]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1572]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1568]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1524]
-	adcs	r0, r7, r0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r4, [sp, #216]
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	adds	r9, r2, r4
+	ldr	r10, [sp, #220]
+	ldr	r6, [sp, #224]
+	mul	r1, r0, r9
+	ldr	r8, [sp, #228]
+	adcs	r10, r11, r10
+	adcs	r6, r5, r6
+	ldr	r5, [sp, #76]                   @ 4-byte Reload
+	ldr	r11, [sp, #140]                 @ 4-byte Reload
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	adcs	r8, r5, r8
+	ldr	r0, [sp, #280]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #276]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #272]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #268]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #264]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	ldr	r5, [sp, #72]                   @ 4-byte Reload
+	ldr	r7, [sp, #260]
+	adcs	r0, r5, r0
+	ldr	r4, [sp, #256]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, r4, #424
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1516]
-	add	r10, sp, #1456
-	ldr	r7, [sp, #1476]
-	ldr	r6, [sp, #1472]
-	ldr	r4, [sp, #1448]
-	ldr	r11, [sp, #1452]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1512]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1508]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1504]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1500]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1496]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1492]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1488]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1484]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1480]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, sp, #1376
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1388]
-	ldr	r3, [sp, #1392]
-	ldr	r12, [sp, #1396]
-	ldr	lr, [sp, #1400]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1404]
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r11, [sp, #132]         @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #1408]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	ldr	r8, [sp, #1376]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #1416]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	ldr	r10, [sp, #1420]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1412]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r7
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1384]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adc	r0, r0, #0
-	adds	r8, r11, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1444]
-	str	r8, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1440]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1436]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1432]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1428]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1424]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1380]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	mov	r0, #0
+	adc	r5, r0, #0
+	add	r0, sp, #144
+	mov	r1, r11
+	bl	mulPv512x32
+	add	r3, sp, #144
+	add	lr, r11, #32
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r9, r0
+	adcs	r12, r10, r1
+	str	r12, [sp, #104]                 @ 4-byte Spill
+	adcs	r6, r6, r2
+	str	r6, [sp, #96]                   @ 4-byte Spill
+	adcs	r7, r8, r3
+	str	r7, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #160]
+	add	r8, r11, #44
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	adcs	r4, r1, r0
+	str	r4, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #164]
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	adcs	r10, r1, r0
+	str	r10, [sp, #72]                  @ 4-byte Spill
+	ldr	r0, [sp, #168]
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #172]
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #176]
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #180]
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #184]
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #188]
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #192]
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #196]
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #200]
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #204]
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #208]
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	adc	r0, r5, #0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldm	r11, {r1, r2, r3, r5}
+	subs	r1, r12, r1
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	sbcs	r1, r6, r2
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	sbcs	r1, r7, r3
+	ldr	r0, [r11, #16]
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	sbcs	r1, r4, r5
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	sbcs	r0, r10, r0
+	ldm	lr, {r1, r9, lr}
+	ldm	r8, {r4, r5, r6, r7, r8}
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r12, [r11, #20]
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	ldr	r3, [r11, #24]
+	sbcs	r0, r0, r12
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	ldr	r2, [r11, #28]
+	sbcs	r0, r0, r3
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	ldr	r10, [sp, #56]                  @ 4-byte Reload
+	sbcs	r0, r0, r2
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r12, [sp, #52]                  @ 4-byte Reload
+	sbcs	r11, r0, r1
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	sbcs	r9, r0, r9
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	sbcs	lr, r0, lr
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	sbcs	r4, r0, r4
+	sbcs	r5, r10, r5
+	sbcs	r6, r12, r6
+	sbcs	r7, r2, r7
+	sbcs	r0, r1, r8
+	sbc	r3, r3, #0
+	ands	r8, r3, #1
+	ldr	r3, [sp, #108]                  @ 4-byte Reload
+	movne	r0, r1
+	movne	r7, r2
+	movne	r6, r12
+	cmp	r8, #0
+	str	r0, [r3, #60]
+	movne	r5, r10
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	str	r7, [r3, #56]
+	movne	r4, r0
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	str	r6, [r3, #52]
+	str	r5, [r3, #48]
+	movne	lr, r0
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	cmp	r8, #0
+	str	r4, [r3, #44]
+	str	lr, [r3, #40]
+	movne	r9, r0
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	str	r9, [r3, #36]
+	movne	r11, r0
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	str	r11, [r3, #32]
+	movne	r1, r0
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	str	r1, [r3, #28]
+	cmp	r8, #0
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	movne	r1, r0
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	str	r1, [r3, #24]
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	movne	r1, r0
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
+	str	r1, [r3, #20]
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	str	r0, [r3, #16]
+	cmp	r8, #0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	str	r0, [r3, #12]
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	str	r0, [r3, #8]
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	str	r0, [r3, #4]
+	cmp	r8, #0
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	movne	r0, r1
+	str	r0, [r3]
+	add	sp, sp, #404
+	add	sp, sp, #2048
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end77:
+	.size	mcl_fp_mont16L, .Lfunc_end77-mcl_fp_mont16L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_montNF16L                @ -- Begin function mcl_fp_montNF16L
+	.p2align	2
+	.type	mcl_fp_montNF16L,%function
+	.code	32                              @ @mcl_fp_montNF16L
+mcl_fp_montNF16L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#396
+	sub	sp, sp, #396
+	.pad	#2048
+	sub	sp, sp, #2048
+	mov	r7, r2
+	ldr	r2, [r2]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	add	r0, sp, #2368
+	ldr	r5, [r3, #-4]
+	mov	r4, r3
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	str	r5, [sp, #120]                  @ 4-byte Spill
+	str	r3, [sp, #132]                  @ 4-byte Spill
+	str	r7, [sp, #124]                  @ 4-byte Spill
+	bl	mulPv512x32
+	ldr	r0, [sp, #2368]
+	add	lr, sp, #2048
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #2372]
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	mul	r2, r5, r0
+	ldr	r1, [sp, #2376]
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #2380]
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	mov	r1, r4
+	ldr	r0, [sp, #2432]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #2428]
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #2424]
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #2420]
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #2416]
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #2412]
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #2408]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #2404]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #2400]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #2396]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #2392]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #2388]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #2384]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	add	r0, lr, #248
+	bl	mulPv512x32
+	ldr	r0, [sp, #2360]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #2356]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #2352]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #2348]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #2344]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #2340]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #2336]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #2332]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #2328]
+	ldr	r2, [r7, #4]
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #2224
+	ldr	r7, [sp, #2324]
+	ldr	r5, [sp, #2320]
+	ldr	r6, [sp, #2316]
+	ldr	r8, [sp, #2312]
+	ldr	r9, [sp, #2296]
+	ldr	r11, [sp, #2300]
+	ldr	r10, [sp, #2304]
+	ldr	r4, [sp, #2308]
+	bl	mulPv512x32
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adds	r0, r9, r0
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r11, r0
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r11, [sp, #88]                  @ 4-byte Reload
+	adcs	r0, r10, r0
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r4, r0
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r8, r0
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r6, r0
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r7, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	adc	r0, r1, r0
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #2288]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #2284]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #2280]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #2276]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #2272]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	ldr	r6, [sp, #2224]
+	ldr	r0, [sp, #2228]
+	adds	r6, r11, r6
+	ldr	r10, [sp, #2268]
 	adcs	r0, r7, r0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	ldr	r9, [sp, #2264]
+	ldr	r8, [sp, #2260]
+	ldr	r5, [sp, #2256]
+	ldr	r4, [sp, #2252]
+	ldr	lr, [sp, #2248]
+	ldr	r12, [sp, #2244]
+	ldr	r3, [sp, #2240]
+	ldr	r1, [sp, #2232]
+	ldr	r2, [sp, #2236]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	str	r6, [sp, #20]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	add	lr, sp, #2048
 	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r8, r0
-	add	r0, r4, #280
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1372]
-	add	r10, sp, #1312
-	ldr	r7, [sp, #1332]
-	ldr	r6, [sp, #1328]
-	ldr	r4, [sp, #1304]
-	ldr	r11, [sp, #1308]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1368]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1364]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1360]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1356]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1352]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1348]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1344]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1336]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r5, r8, r9, r10}
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, sp, #1232
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	ldr	r2, [sp, #1244]
-	ldr	r3, [sp, #1248]
-	ldr	r12, [sp, #1252]
-	ldr	lr, [sp, #1256]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1260]
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r11, [sp, #132]         @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	ldr	r5, [sp, #1264]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	ldr	r8, [sp, #1232]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	ldr	r9, [sp, #1272]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	ldr	r10, [sp, #1276]
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #1268]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	ldr	r7, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #1240]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	adds	r8, r11, r8
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #1300]
-	str	r8, [sp, #36]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1296]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1292]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1288]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1284]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1280]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1236]
-	adcs	r0, r7, r0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, lr, #104
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	add	r9, sp, #2160
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r2, [r0, #8]
+	ldr	r0, [sp, #2216]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #2212]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #2208]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #2204]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #2200]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #2196]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #2192]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #2188]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #2184]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #2080
+	ldm	r9, {r4, r6, r9}
+	ldr	r7, [sp, #2180]
+	ldr	r5, [sp, #2176]
+	ldr	r8, [sp, #2172]
+	ldr	r10, [sp, #2152]
+	ldr	r11, [sp, #2156]
+	bl	mulPv512x32
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adds	r0, r0, r10
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r11, [sp, #116]                 @ 4-byte Reload
 	adcs	r0, r0, r4
-	add	r4, sp, #1024
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #136]          @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	mul	r2, r8, r5
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, r4, #136
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1228]
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r11, [sp, #1184]
-	ldr	r4, [sp, #1180]
-	ldr	r6, [sp, #1176]
-	ldr	r7, [sp, #1160]
-	ldr	r8, [sp, #1164]
-	ldr	r9, [sp, #1168]
-	ldr	r10, [sp, #1172]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1224]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1220]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1216]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1212]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1208]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1204]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1200]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1196]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1192]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1188]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, sp, #1088
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #1104
-	adds	r0, r0, r7
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	add	r8, sp, #1088
-	adcs	r1, r1, r9
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	adcs	r1, r1, r10
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #68]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #64]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldm	r8, {r4, r6, r8}
-	ldr	r7, [sp, #1100]
-	ldr	r10, [sp, #1140]
-	ldr	r9, [sp, #1136]
-	adds	r0, r0, r4
-	ldr	r4, [sp, #1128]
-	mul	r1, r0, r5
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #1156]
-	ldr	r5, [sp, #1132]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1152]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1148]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1144]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #128]         @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #128]          @ 4-byte Spill
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	adcs	r6, r6, r8
-	str	r6, [sp, #124]          @ 4-byte Spill
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	adcs	r6, r6, r7
-	str	r6, [sp, #120]          @ 4-byte Spill
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #1016
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1084]
-	add	r10, sp, #1016
-	ldr	r11, [sp, #1044]
-	ldr	r4, [sp, #1040]
-	ldr	r5, [sp, #1036]
-	ldr	r6, [sp, #1032]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1080]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1076]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1072]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1068]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1064]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1060]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1048]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	r10, {r7, r8, r9, r10}
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #944
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #960
-	adds	r0, r0, r7
-	ldr	r0, [sp, #128]          @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	add	r8, sp, #944
-	adcs	r1, r1, r9
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	adcs	r1, r1, r10
-	add	r10, sp, #984
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r1, r1, r6
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r1, r5
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r1, r4
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adc	r1, r1, #0
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldm	r8, {r4, r6, r8}
-	ldr	r7, [sp, #956]
-	adds	r1, r0, r4
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	str	r1, [sp, #132]          @ 4-byte Spill
-	mul	r2, r1, r0
-	ldr	r0, [sp, #1012]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1008]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1004]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1000]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #128]         @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #80]           @ 4-byte Spill
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	adcs	r6, r6, r8
-	str	r6, [sp, #76]           @ 4-byte Spill
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	adcs	r6, r6, r7
-	str	r6, [sp, #72]           @ 4-byte Spill
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	add	r0, sp, #872
-	bl	.LmulPv544x32(PLT)
-	ldr	r1, [sp, #940]
-	add	r11, sp, #880
-	ldr	r5, [sp, #900]
-	ldr	r4, [sp, #896]
-	ldr	r9, [sp, #872]
-	ldr	r10, [sp, #876]
-	add	r0, sp, #800
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #936]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #932]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #928]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #924]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #920]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #916]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #912]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #908]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #904]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r6, r7, r8, r11}
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	ldr	r2, [r1, #48]
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #804
-	adds	r0, r0, r9
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #828
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r7, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #868]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #864]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #860]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #856]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #852]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #848]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r7, [sp, #800]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #132]         @ 4-byte Reload
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	adds	r7, r11, r7
-	adcs	r0, r6, r0
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #2144]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #2140]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #2136]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #2132]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #2128]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #2080]
+	ldr	r0, [sp, #2084]
+	adds	r6, r11, r6
+	ldr	r10, [sp, #2124]
+	adcs	r0, r7, r0
+	ldr	r9, [sp, #2120]
+	ldr	r8, [sp, #2116]
+	ldr	r5, [sp, #2112]
+	ldr	r4, [sp, #2108]
+	ldr	lr, [sp, #2104]
+	ldr	r12, [sp, #2100]
+	ldr	r3, [sp, #2096]
+	ldr	r1, [sp, #2088]
+	ldr	r2, [sp, #2092]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	str	r6, [sp, #32]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #1024
 	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	mov	r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #728
-	bl	.LmulPv544x32(PLT)
-	ldr	r1, [sp, #796]
-	add	r9, sp, #732
-	ldr	r5, [sp, #756]
-	ldr	r11, [sp, #752]
-	ldr	r8, [sp, #748]
-	ldr	r10, [sp, #728]
-	add	r0, sp, #656
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #792]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #788]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #784]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #780]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #776]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #772]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #768]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #764]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #760]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r9, {r4, r6, r7, r9}
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	ldr	r2, [r1, #52]
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #660
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, lr, #984
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	add	r9, sp, #2016
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r2, [r0, #12]
+	ldr	r0, [sp, #2072]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #2068]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #2064]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #2060]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #2056]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #2052]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #2048]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #2044]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #2040]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #1936
+	ldm	r9, {r4, r6, r9}
+	ldr	r7, [sp, #2036]
+	ldr	r5, [sp, #2032]
+	ldr	r8, [sp, #2028]
+	ldr	r10, [sp, #2008]
+	ldr	r11, [sp, #2012]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
 	adds	r0, r0, r10
-	add	r10, sp, #684
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r11, [sp, #116]                 @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r7, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #724]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #720]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #716]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #712]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #704]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r7, [sp, #656]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	adds	r7, r11, r7
-	adcs	r0, r6, r0
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #2000]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1996]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1992]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1988]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1984]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #1936]
+	ldr	r0, [sp, #1940]
+	adds	r6, r11, r6
+	ldr	r10, [sp, #1980]
+	adcs	r0, r7, r0
+	ldr	r9, [sp, #1976]
+	ldr	r8, [sp, #1972]
+	ldr	r5, [sp, #1968]
+	ldr	r4, [sp, #1964]
+	ldr	lr, [sp, #1960]
+	ldr	r12, [sp, #1956]
+	ldr	r3, [sp, #1952]
+	ldr	r1, [sp, #1944]
+	ldr	r2, [sp, #1948]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	str	r6, [sp, #32]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #1024
 	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	mov	r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #584
-	bl	.LmulPv544x32(PLT)
-	ldr	r1, [sp, #652]
-	add	r9, sp, #588
-	ldr	r5, [sp, #612]
-	ldr	r11, [sp, #608]
-	ldr	r8, [sp, #604]
-	ldr	r10, [sp, #584]
-	add	r0, sp, #512
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #648]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #644]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #640]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #636]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #632]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #628]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #624]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #620]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #616]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r9, {r4, r6, r7, r9}
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	ldr	r2, [r1, #56]
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #516
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, lr, #840
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	add	r9, sp, #1872
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r2, [r0, #16]
+	ldr	r0, [sp, #1928]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1924]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1920]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1916]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1912]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1908]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1904]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1900]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1896]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #1792
+	ldm	r9, {r4, r6, r9}
+	ldr	r7, [sp, #1892]
+	ldr	r5, [sp, #1888]
+	ldr	r8, [sp, #1884]
+	ldr	r10, [sp, #1864]
+	ldr	r11, [sp, #1868]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
 	adds	r0, r0, r10
-	add	r10, sp, #540
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r11, [sp, #116]                 @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r7, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #576]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #572]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #568]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #564]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #560]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r7, [sp, #512]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	adds	r7, r11, r7
-	adcs	r0, r6, r0
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1856]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1852]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1848]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1844]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1840]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #1792]
+	ldr	r0, [sp, #1796]
+	adds	r6, r11, r6
+	ldr	r10, [sp, #1836]
+	adcs	r0, r7, r0
+	ldr	r9, [sp, #1832]
+	ldr	r8, [sp, #1828]
+	ldr	r5, [sp, #1824]
+	ldr	r4, [sp, #1820]
+	ldr	lr, [sp, #1816]
+	ldr	r12, [sp, #1812]
+	ldr	r3, [sp, #1808]
+	ldr	r1, [sp, #1800]
+	ldr	r2, [sp, #1804]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	str	r6, [sp, #32]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #1024
 	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	mov	r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #440
-	bl	.LmulPv544x32(PLT)
-	ldr	r1, [sp, #508]
-	add	r9, sp, #444
-	ldr	r5, [sp, #468]
-	ldr	r11, [sp, #464]
-	ldr	r8, [sp, #460]
-	ldr	r10, [sp, #440]
-	add	r0, sp, #368
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #504]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #500]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #496]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #492]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #488]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #484]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #480]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #476]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #472]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r9, {r4, r6, r7, r9}
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	ldr	r2, [r1, #60]
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #372
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, lr, #696
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	add	r9, sp, #1728
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r2, [r0, #20]
+	ldr	r0, [sp, #1784]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1780]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1776]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1772]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1768]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1764]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1760]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1756]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1752]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #1648
+	ldm	r9, {r4, r6, r9}
+	ldr	r7, [sp, #1748]
+	ldr	r5, [sp, #1744]
+	ldr	r8, [sp, #1740]
+	ldr	r10, [sp, #1720]
+	ldr	r11, [sp, #1724]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
 	adds	r0, r0, r10
-	add	r10, sp, #396
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r11, [sp, #116]                 @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r7, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #436]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #432]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #428]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #424]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #420]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r7, [sp, #368]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #84]          @ 4-byte Reload
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	adds	r7, r11, r7
-	adcs	r0, r6, r0
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1712]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1708]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1704]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1700]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1696]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #1648]
+	ldr	r0, [sp, #1652]
+	adds	r6, r11, r6
+	ldr	r10, [sp, #1692]
+	adcs	r0, r7, r0
+	ldr	r9, [sp, #1688]
+	ldr	r8, [sp, #1684]
+	ldr	r5, [sp, #1680]
+	ldr	r4, [sp, #1676]
+	ldr	lr, [sp, #1672]
+	ldr	r12, [sp, #1668]
+	ldr	r3, [sp, #1664]
+	ldr	r1, [sp, #1656]
+	ldr	r2, [sp, #1660]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	str	r6, [sp, #32]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #1024
 	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	mov	r0, #0
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #296
-	bl	.LmulPv544x32(PLT)
-	ldr	r1, [sp, #364]
-	add	r11, sp, #312
-	add	r7, sp, #300
-	ldr	r9, [sp, #324]
-	add	r0, sp, #224
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [sp, #360]
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #356]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #352]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #348]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #344]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #340]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #336]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #332]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #328]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r11, {r4, r10, r11}
-	ldr	r8, [sp, #296]
-	ldm	r7, {r5, r6, r7}
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	ldr	r2, [r1, #64]
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	add	lr, sp, #240
-	adds	r0, r0, r8
-	ldr	r8, [sp, #232]
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #268]
-	adcs	r1, r1, r6
-	str	r1, [sp, #144]          @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #236]
-	str	r1, [sp, #140]          @ 4-byte Spill
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r4
-	ldr	r4, [sp, #224]
-	str	r1, [sp, #84]           @ 4-byte Spill
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r10
-	str	r1, [sp, #80]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	ldr	r11, [sp, #228]
-	str	r1, [sp, #76]           @ 4-byte Spill
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [sp, #72]           @ 4-byte Spill
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	str	r1, [sp, #132]          @ 4-byte Spill
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r1, [sp, #128]          @ 4-byte Spill
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #112]          @ 4-byte Spill
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #108]          @ 4-byte Spill
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #104]          @ 4-byte Spill
-	ldr	r1, [sp, #100]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #92]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adc	r1, r1, #0
-	adds	r9, r0, r4
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r4, [sp, #264]
-	str	r1, [sp, #88]           @ 4-byte Spill
-	mul	r1, r9, r0
-	ldr	r0, [sp, #292]
-	str	r1, [sp, #68]           @ 4-byte Spill
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #288]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #284]
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #280]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #276]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #272]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r10, [sp, #144]         @ 4-byte Reload
-	ldr	r6, [sp, #140]          @ 4-byte Reload
-	adcs	r11, r10, r11
-	adcs	r10, r6, r8
-	ldr	r6, [sp, #84]           @ 4-byte Reload
-	adcs	r7, r6, r7
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, lr, #552
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	add	r9, sp, #1584
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r2, [r0, #24]
+	ldr	r0, [sp, #1640]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1636]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1632]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1628]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1624]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1620]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1616]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1612]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1608]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #1504
+	ldm	r9, {r4, r6, r9}
+	ldr	r7, [sp, #1604]
+	ldr	r5, [sp, #1600]
+	ldr	r8, [sp, #1596]
+	ldr	r10, [sp, #1576]
+	ldr	r11, [sp, #1580]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adds	r0, r0, r10
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r11, [sp, #116]                 @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	ldr	r5, [sp, #148]          @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r7, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #144]          @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	add	r0, sp, #152
-	bl	.LmulPv544x32(PLT)
-	add	r3, sp, #152
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r9, r0
-	adcs	r4, r11, r1
-	ldr	r0, [sp, #168]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r6, r10, r2
-	str	r4, [sp, #52]           @ 4-byte Spill
-	adcs	r9, r7, r3
-	mov	r3, r5
-	str	r6, [sp, #60]           @ 4-byte Spill
-	str	r9, [sp, #68]           @ 4-byte Spill
-	adcs	lr, r1, r0
-	ldr	r0, [sp, #172]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	lr, [sp, #72]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #176]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #180]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #184]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #192]
-	adcs	r11, r1, r0
-	ldr	r0, [sp, #196]
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r11, [sp, #76]          @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #200]
-	adcs	r0, r8, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #204]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #208]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #212]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #216]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #220]
-	adcs	r0, r1, r0
-	str	r0, [sp, #144]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldm	r3, {r1, r2, r7}
-	ldr	r0, [r3, #64]
-	ldr	r5, [r3, #12]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	subs	r12, r4, r1
-	ldr	r1, [r3, #40]
-	sbcs	r4, r6, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	sbcs	r6, r9, r7
-	ldr	r7, [r3, #32]
-	ldr	r9, [r3, #28]
-	sbcs	r10, lr, r5
-	ldr	r5, [r3, #16]
-	ldr	lr, [r3, #24]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	sbcs	r2, r2, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r3, #56]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r3, #60]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r3, #20]
-	ldr	r3, [sp, #104]          @ 4-byte Reload
-	sbcs	r3, r3, r0
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	sbcs	lr, r0, lr
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	sbcs	r5, r0, r9
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	sbcs	r8, r0, r7
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	sbcs	r9, r11, r0
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	sbcs	r11, r0, r1
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	sbcs	r0, r0, r1
-	str	r0, [sp, #148]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbc	r0, r0, #0
-	ands	r1, r0, #1
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	movne	r4, r7
-	movne	r12, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r12, [r0]
-	str	r4, [r0, #4]
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	movne	r6, r4
-	cmp	r1, #0
-	str	r6, [r0, #8]
-	ldr	r6, [sp, #72]           @ 4-byte Reload
-	movne	r10, r6
-	ldr	r6, [sp, #100]          @ 4-byte Reload
-	str	r10, [r0, #12]
-	movne	r2, r6
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	movne	lr, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	str	lr, [r0, #24]
-	movne	r5, r2
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r5, [r0, #28]
-	movne	r8, r2
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r8, [r0, #32]
-	movne	r9, r2
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	str	r9, [r0, #36]
-	movne	r11, r2
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	str	r11, [r0, #40]
-	movne	r3, r2
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #44]
-	ldr	r3, [sp, #80]           @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	str	r3, [r0, #48]
-	ldr	r3, [sp, #84]           @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	str	r3, [r0, #52]
-	ldr	r3, [sp, #88]           @ 4-byte Reload
-	movne	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	str	r3, [r0, #56]
-	movne	r2, r1
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	str	r2, [r0, #60]
-	ldr	r2, [sp, #148]          @ 4-byte Reload
-	movne	r2, r1
-	str	r2, [r0, #64]
-	add	sp, sp, #556
-	add	sp, sp, #2048
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end258:
-	.size	mcl_fp_mont17L, .Lfunc_end258-mcl_fp_mont17L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_montNF17L
-	.align	2
-	.type	mcl_fp_montNF17L,%function
-mcl_fp_montNF17L:                       @ @mcl_fp_montNF17L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#548
-	sub	sp, sp, #548
-	.pad	#2048
-	sub	sp, sp, #2048
-	add	r12, sp, #132
-	add	r6, sp, #2048
-	mov	r4, r3
-	stm	r12, {r1, r2, r3}
-	str	r0, [sp, #92]           @ 4-byte Spill
-	add	r0, r6, #472
-	ldr	r5, [r3, #-4]
-	ldr	r2, [r2]
-	str	r5, [sp, #128]          @ 4-byte Spill
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #2520]
-	ldr	r1, [sp, #2524]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	mul	r2, r0, r5
-	ldr	r0, [sp, #2588]
-	str	r1, [sp, #100]          @ 4-byte Spill
-	ldr	r1, [sp, #2528]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #2584]
-	str	r1, [sp, #96]           @ 4-byte Spill
-	ldr	r1, [sp, #2532]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #2580]
-	str	r1, [sp, #88]           @ 4-byte Spill
-	mov	r1, r4
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #2576]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #2572]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #2568]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #2564]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #2560]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #2556]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #2552]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #2548]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #2544]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2540]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2536]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	add	r0, sp, #2448
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #2516]
-	add	r11, sp, #2448
-	ldr	r9, [sp, #2476]
-	ldr	r4, [sp, #2472]
-	ldr	r7, [sp, #2468]
-	ldr	r6, [sp, #2464]
-	add	lr, sp, #2048
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #2512]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2508]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2504]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2500]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2496]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2492]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2488]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2484]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2480]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r5, [sp, #2460]
-	ldr	r2, [r0, #4]
-	add	r0, lr, #328
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adds	r0, r8, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r10, r0
-	add	r10, sp, #2416
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r5, r0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1568]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1564]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1560]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1556]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1552]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #1504]
+	ldr	r0, [sp, #1508]
+	adds	r6, r11, r6
+	ldr	r10, [sp, #1548]
 	adcs	r0, r7, r0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r4, r0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r9, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adc	r0, r1, r0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #2444]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2440]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2436]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2432]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #2428]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r7, [sp, #2376]
-	ldr	r6, [sp, #100]          @ 4-byte Reload
-	ldr	r0, [sp, #2380]
-	ldr	r1, [sp, #2384]
-	ldr	r2, [sp, #2388]
-	ldr	r3, [sp, #2392]
-	ldr	r12, [sp, #2396]
-	ldr	lr, [sp, #2400]
-	ldr	r4, [sp, #2404]
-	ldr	r5, [sp, #2408]
-	ldr	r11, [sp, #2412]
-	adds	r7, r6, r7
-	ldr	r6, [sp, #96]           @ 4-byte Reload
-	str	r7, [sp, #24]           @ 4-byte Spill
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	ldr	r9, [sp, #1544]
+	ldr	r8, [sp, #1540]
+	ldr	r5, [sp, #1536]
+	ldr	r4, [sp, #1532]
+	ldr	lr, [sp, #1528]
+	ldr	r12, [sp, #1524]
+	ldr	r3, [sp, #1520]
+	ldr	r1, [sp, #1512]
+	ldr	r2, [sp, #1516]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	str	r6, [sp, #32]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #1024
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #2304
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #2372]
-	add	r11, sp, #2304
-	ldr	r4, [sp, #2332]
-	ldr	r5, [sp, #2328]
-	ldr	r6, [sp, #2324]
-	ldr	r7, [sp, #2320]
-	add	lr, sp, #2048
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2368]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2364]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2360]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2356]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2352]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #2348]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2344]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2340]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2336]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #8]
-	add	r0, lr, #184
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #24]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adds	r0, r0, r8
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #2272
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, lr, #408
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	add	r9, sp, #1440
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r2, [r0, #28]
+	ldr	r0, [sp, #1496]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1492]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1488]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1484]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1480]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1476]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1472]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1468]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1464]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #1360
+	ldm	r9, {r4, r6, r9}
+	ldr	r7, [sp, #1460]
+	ldr	r5, [sp, #1456]
+	ldr	r8, [sp, #1452]
+	ldr	r10, [sp, #1432]
+	ldr	r11, [sp, #1436]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adds	r0, r0, r10
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r11, [sp, #116]                 @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r6
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r7, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adc	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2300]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2296]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2292]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2288]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2284]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r7, [sp, #2232]
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	ldr	r0, [sp, #2236]
-	ldr	r1, [sp, #2240]
-	ldr	r2, [sp, #2244]
-	ldr	r3, [sp, #2248]
-	ldr	r12, [sp, #2252]
-	ldr	lr, [sp, #2256]
-	ldr	r4, [sp, #2260]
-	ldr	r5, [sp, #2264]
-	ldr	r11, [sp, #2268]
-	adds	r7, r6, r7
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1424]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1420]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1416]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1412]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1408]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #1360]
+	ldr	r0, [sp, #1364]
+	adds	r6, r11, r6
+	ldr	r10, [sp, #1404]
+	adcs	r0, r7, r0
+	ldr	r9, [sp, #1400]
+	ldr	r8, [sp, #1396]
+	ldr	r5, [sp, #1392]
+	ldr	r4, [sp, #1388]
+	ldr	lr, [sp, #1384]
+	ldr	r12, [sp, #1380]
+	ldr	r3, [sp, #1376]
+	ldr	r1, [sp, #1368]
+	ldr	r2, [sp, #1372]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	str	r6, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #1024
+	adcs	r0, r0, r4
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	adc	r0, r0, #0
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, lr, #264
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	add	r9, sp, #1296
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r2, [r0, #32]
+	ldr	r0, [sp, #1352]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1348]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1344]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1340]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1336]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1332]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1328]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1324]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1320]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #1216
+	ldm	r9, {r4, r6, r9}
+	ldr	r7, [sp, #1316]
+	ldr	r5, [sp, #1312]
+	ldr	r8, [sp, #1308]
+	ldr	r10, [sp, #1288]
+	ldr	r11, [sp, #1292]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adds	r0, r0, r10
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r11, [sp, #116]                 @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r7, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #1280]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1276]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1272]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1268]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1264]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #1216]
+	ldr	r0, [sp, #1220]
+	adds	r6, r11, r6
+	ldr	r10, [sp, #1260]
+	adcs	r0, r7, r0
+	ldr	r9, [sp, #1256]
+	ldr	r8, [sp, #1252]
+	ldr	r5, [sp, #1248]
+	ldr	r4, [sp, #1244]
+	ldr	lr, [sp, #1240]
+	ldr	r12, [sp, #1236]
+	ldr	r3, [sp, #1232]
+	ldr	r1, [sp, #1224]
+	ldr	r2, [sp, #1228]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	ldr	r11, [sp, #120]                 @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	mul	r2, r11, r6
+	str	r6, [sp, #32]                   @ 4-byte Spill
 	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #1024
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #2160
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #2228]
-	add	r11, sp, #2160
-	ldr	r4, [sp, #2188]
-	ldr	r5, [sp, #2184]
-	ldr	r6, [sp, #2180]
-	ldr	r7, [sp, #2176]
-	add	lr, sp, #2048
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2224]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2220]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2216]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2212]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2208]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2204]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2200]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2196]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2192]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #12]
-	add	r0, lr, #40
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adds	r0, r0, r8
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	add	r0, lr, #120
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r2, [r0, #36]
+	ldr	r0, [sp, #1208]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1204]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1200]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1196]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1192]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1188]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1184]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1180]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1176]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #1172]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	add	r0, sp, #1072
+	ldr	r4, [sp, #1168]
+	ldr	r5, [sp, #1164]
+	ldr	r6, [sp, #1160]
+	ldr	r7, [sp, #1144]
+	ldr	r8, [sp, #1148]
+	ldr	r9, [sp, #1152]
+	ldr	r10, [sp, #1156]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	add	lr, sp, #1088
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	adds	r0, r0, r7
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r2, r0, r8
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r10
-	add	r10, sp, #2128
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r6
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adc	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2156]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2152]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2148]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2144]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2140]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r7, [sp, #2088]
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	ldr	r0, [sp, #2092]
-	ldr	r1, [sp, #2096]
-	ldr	r2, [sp, #2100]
-	ldr	r3, [sp, #2104]
-	ldr	r12, [sp, #2108]
-	ldr	lr, [sp, #2112]
-	ldr	r4, [sp, #2116]
-	ldr	r5, [sp, #2120]
-	ldr	r11, [sp, #2124]
-	adds	r7, r6, r7
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r4, [sp, #1072]
+	ldr	r6, [sp, #1076]
+	adds	r0, r2, r4
+	ldr	r5, [sp, #1080]
+	ldr	r7, [sp, #1084]
+	mul	r1, r11, r0
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r11, [sp, #112]                 @ 4-byte Reload
+	adcs	r6, r11, r6
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1136]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #1132]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1128]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r10, [sp, #1124]
+	ldr	r9, [sp, #1120]
+	ldr	r8, [sp, #1116]
+	ldr	r4, [sp, #1112]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	str	r6, [sp, #112]                  @ 4-byte Spill
+	ldr	r6, [sp, #108]                  @ 4-byte Reload
+	adcs	r5, r6, r5
+	str	r5, [sp, #108]                  @ 4-byte Spill
+	ldr	r5, [sp, #104]                  @ 4-byte Reload
+	adcs	r5, r5, r7
+	str	r5, [sp, #104]                  @ 4-byte Spill
+	ldr	r5, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #2016
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #2084]
-	add	r11, sp, #2016
-	ldr	r4, [sp, #2044]
-	ldr	r5, [sp, #2040]
-	ldr	r6, [sp, #2036]
-	ldr	r7, [sp, #2032]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2080]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2076]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2072]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #2068]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #2064]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #2060]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #2056]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #2052]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #2048]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #16]
-	add	r0, lr, #920
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adds	r0, r0, r8
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	add	r0, sp, #1000
+	bl	mulPv512x32
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	add	r10, sp, #1000
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r2, [r0, #40]
+	ldr	r0, [sp, #1064]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #1060]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #1056]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #1052]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #1048]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #1044]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #1040]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #1036]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #1032]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	add	r0, sp, #928
+	ldm	r10, {r7, r8, r9, r10}
+	ldr	r11, [sp, #1028]
+	ldr	r4, [sp, #1024]
+	ldr	r5, [sp, #1020]
+	ldr	r6, [sp, #1016]
+	bl	mulPv512x32
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	add	lr, sp, #944
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adds	r0, r0, r7
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r2, r0, r8
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r10
-	add	r10, sp, #1984
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	add	r10, sp, #968
 	adcs	r0, r0, r6
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r11, [sp, #112]                 @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adc	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #2012]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #2008]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #2004]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #2000]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1996]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r7, [sp, #1944]
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	ldr	r0, [sp, #1948]
-	ldr	r1, [sp, #1952]
-	ldr	r2, [sp, #1956]
-	ldr	r3, [sp, #1960]
-	ldr	r12, [sp, #1964]
-	ldr	lr, [sp, #1968]
-	ldr	r4, [sp, #1972]
-	ldr	r5, [sp, #1976]
-	ldr	r11, [sp, #1980]
-	adds	r7, r6, r7
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r4, [sp, #928]
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adds	r1, r2, r4
+	ldr	r6, [sp, #932]
+	ldr	r5, [sp, #936]
+	mul	r2, r0, r1
+	ldr	r7, [sp, #940]
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	adcs	r6, r11, r6
+	str	r2, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #992]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #988]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #984]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldm	r10, {r4, r8, r9, r10}
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	str	r6, [sp, #72]                   @ 4-byte Spill
+	ldr	r6, [sp, #108]                  @ 4-byte Reload
+	adcs	r5, r6, r5
+	str	r5, [sp, #68]                   @ 4-byte Spill
+	ldr	r5, [sp, #104]                  @ 4-byte Reload
+	adcs	r5, r5, r7
+	str	r5, [sp, #64]                   @ 4-byte Spill
+	ldr	r5, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1872
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1940]
-	add	r11, sp, #1872
-	ldr	r4, [sp, #1900]
-	ldr	r5, [sp, #1896]
-	ldr	r6, [sp, #1892]
-	ldr	r7, [sp, #1888]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1936]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1932]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1928]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1924]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1920]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1916]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1912]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1908]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1904]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #20]
-	add	r0, lr, #776
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	add	r0, sp, #856
+	bl	mulPv512x32
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	add	r7, sp, #876
+	add	r11, sp, #856
+	add	r0, sp, #784
+	ldr	r2, [r1, #44]
+	ldr	r1, [sp, #920]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #916]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #912]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #908]
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #904]
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #900]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #896]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #892]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [sp, #888]
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldm	r7, {r4, r6, r7}
+	ldm	r11, {r8, r9, r11}
+	ldr	r10, [sp, #872]
+	ldr	r5, [sp, #868]
+	bl	mulPv512x32
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	add	lr, sp, #788
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
 	adds	r0, r0, r8
-	ldr	r0, [sp, #124]          @ 4-byte Reload
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1840
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
 	adcs	r0, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r11, [sp, #116]                 @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1868]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1864]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1860]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1856]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1852]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r7, [sp, #1800]
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	ldr	r0, [sp, #1804]
-	ldr	r1, [sp, #1808]
-	ldr	r2, [sp, #1812]
-	ldr	r3, [sp, #1816]
-	ldr	r12, [sp, #1820]
-	ldr	lr, [sp, #1824]
-	ldr	r4, [sp, #1828]
-	ldr	r5, [sp, #1832]
-	ldr	r11, [sp, #1836]
-	adds	r7, r6, r7
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	add	r10, sp, #812
+	adcs	r0, r0, r4
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #848]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #844]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #840]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #836]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [sp, #832]
+	str	r0, [sp, #24]                   @ 4-byte Spill
+	ldr	r6, [sp, #784]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r11, r6
+	adcs	r0, r7, r0
+	ldm	r10, {r4, r5, r8, r9, r10}
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	str	r6, [sp, #28]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1728
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1796]
-	add	r11, sp, #1728
-	ldr	r4, [sp, #1756]
-	ldr	r5, [sp, #1752]
-	ldr	r6, [sp, #1748]
-	ldr	r7, [sp, #1744]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1792]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1788]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1784]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1780]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1776]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1772]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1768]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1764]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1760]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #24]
-	add	r0, lr, #632
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adds	r0, r0, r8
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1696
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, sp, #712
+	bl	mulPv512x32
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	add	r11, sp, #720
+	add	r0, sp, #640
+	ldr	r2, [r1, #48]
+	ldr	r1, [sp, #776]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #772]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #768]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #764]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #760]
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #756]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #752]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #748]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [sp, #744]
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldm	r11, {r5, r6, r11}
+	ldr	r7, [sp, #740]
+	ldr	r4, [sp, #736]
+	ldr	r9, [sp, #732]
+	ldr	r10, [sp, #712]
+	ldr	r8, [sp, #716]
+	bl	mulPv512x32
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	add	lr, sp, #644
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adds	r0, r0, r10
+	add	r10, sp, #668
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r11, [sp, #76]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adc	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1724]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1720]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1716]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1712]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1708]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r7, [sp, #1656]
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	ldr	r0, [sp, #1660]
-	ldr	r1, [sp, #1664]
-	ldr	r2, [sp, #1668]
-	ldr	r3, [sp, #1672]
-	ldr	r12, [sp, #1676]
-	ldr	lr, [sp, #1680]
-	ldr	r4, [sp, #1684]
-	ldr	r5, [sp, #1688]
-	ldr	r11, [sp, #1692]
-	adds	r7, r6, r7
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #704]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #700]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #696]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #692]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #688]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r6, [sp, #640]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r11, r6
+	adcs	r0, r7, r0
+	ldm	r10, {r4, r5, r8, r9, r10}
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	str	r6, [sp, #28]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1584
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1652]
-	add	r11, sp, #1584
-	ldr	r4, [sp, #1612]
-	ldr	r5, [sp, #1608]
-	ldr	r6, [sp, #1604]
-	ldr	r7, [sp, #1600]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1648]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1644]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1640]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1636]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1632]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1628]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1624]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1620]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1616]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #28]
-	add	r0, lr, #488
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adds	r0, r0, r8
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1552
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, sp, #568
+	bl	mulPv512x32
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	add	r11, sp, #576
+	add	r0, sp, #496
+	ldr	r2, [r1, #52]
+	ldr	r1, [sp, #632]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #628]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #624]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #620]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #616]
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #612]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #608]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #604]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [sp, #600]
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldm	r11, {r5, r6, r11}
+	ldr	r7, [sp, #596]
+	ldr	r4, [sp, #592]
+	ldr	r9, [sp, #588]
+	ldr	r10, [sp, #568]
+	ldr	r8, [sp, #572]
+	bl	mulPv512x32
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	add	lr, sp, #500
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adds	r0, r0, r10
+	add	r10, sp, #524
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r11, [sp, #76]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adc	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1580]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1576]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1572]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1568]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1564]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r7, [sp, #1512]
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	ldr	r0, [sp, #1516]
-	ldr	r1, [sp, #1520]
-	ldr	r2, [sp, #1524]
-	ldr	r3, [sp, #1528]
-	ldr	r12, [sp, #1532]
-	ldr	lr, [sp, #1536]
-	ldr	r4, [sp, #1540]
-	ldr	r5, [sp, #1544]
-	ldr	r11, [sp, #1548]
-	adds	r7, r6, r7
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #560]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #556]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #552]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #548]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #544]
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r6, [sp, #496]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r11, r6
+	adcs	r0, r7, r0
+	ldm	r10, {r4, r5, r8, r9, r10}
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	str	r6, [sp, #28]                   @ 4-byte Spill
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1440
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1508]
-	add	r11, sp, #1440
-	ldr	r4, [sp, #1468]
-	ldr	r5, [sp, #1464]
-	ldr	r6, [sp, #1460]
-	ldr	r7, [sp, #1456]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1504]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1500]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1496]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1492]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1488]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1484]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1480]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1476]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1472]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #32]
-	add	r0, lr, #344
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adds	r0, r0, r8
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1408
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, sp, #424
+	bl	mulPv512x32
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	add	r11, sp, #432
+	add	r0, sp, #352
+	ldr	r2, [r1, #56]
+	ldr	r1, [sp, #488]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #484]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #480]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #476]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #472]
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	ldr	r1, [sp, #468]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #464]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #460]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [sp, #456]
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldm	r11, {r5, r6, r11}
+	ldr	r7, [sp, #452]
+	ldr	r4, [sp, #448]
+	ldr	r9, [sp, #444]
+	ldr	r10, [sp, #424]
+	ldr	r8, [sp, #428]
+	bl	mulPv512x32
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	add	lr, sp, #356
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	adds	r0, r0, r10
+	add	r10, sp, #380
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r11, [sp, #76]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1436]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1432]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1428]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1424]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1420]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r7, [sp, #1368]
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	ldr	r0, [sp, #1372]
-	ldr	r1, [sp, #1376]
-	ldr	r2, [sp, #1380]
-	ldr	r3, [sp, #1384]
-	ldr	r12, [sp, #1388]
-	ldr	lr, [sp, #1392]
-	ldr	r4, [sp, #1396]
-	ldr	r5, [sp, #1400]
-	ldr	r11, [sp, #1404]
-	adds	r7, r6, r7
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adc	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #416]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #412]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #408]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #404]
+	str	r0, [sp, #36]                   @ 4-byte Spill
+	ldr	r0, [sp, #400]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r6, [sp, #352]
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adds	r6, r11, r6
+	adcs	r0, r7, r0
+	ldm	r10, {r4, r5, r8, r9, r10}
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	str	r6, [sp, #32]                   @ 4-byte Spill
+	adcs	r0, r0, r1
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
 	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #48]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #1296
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1364]
-	add	r11, sp, #1296
-	ldr	r4, [sp, #1324]
-	ldr	r5, [sp, #1320]
-	ldr	r6, [sp, #1316]
-	ldr	r7, [sp, #1312]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1360]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1356]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1352]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1348]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1344]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1340]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1336]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1332]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1328]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #36]
-	add	r0, lr, #200
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	adds	r0, r0, r8
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1264
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r0, r6
+	add	r0, sp, #280
+	bl	mulPv512x32
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	add	r0, sp, #208
+	ldr	r2, [r1, #60]
+	ldr	r1, [sp, #344]
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	ldr	r1, [sp, #340]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #336]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	ldr	r1, [sp, #332]
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #328]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #324]
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	ldr	r1, [sp, #320]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	ldr	r1, [sp, #316]
+	str	r1, [sp, #20]                   @ 4-byte Spill
+	ldr	r1, [sp, #312]
+	str	r1, [sp, #16]                   @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r4, [sp, #308]
+	ldr	r11, [sp, #304]
+	ldr	r8, [sp, #300]
+	ldr	r9, [sp, #296]
+	ldr	r10, [sp, #280]
+	ldr	r5, [sp, #284]
+	ldr	r7, [sp, #288]
+	ldr	r6, [sp, #292]
+	bl	mulPv512x32
+	ldr	r0, [sp, #32]                   @ 4-byte Reload
+	add	lr, sp, #224
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	adds	r0, r0, r10
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	adcs	r1, r1, r7
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r6
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r5, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r9
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r1, r11
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r11, [sp, #128]                 @ 4-byte Reload
+	adcs	r1, r1, r4
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r2, [sp, #124]                  @ 4-byte Reload
+	adc	r1, r1, r2
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	ldr	r4, [sp, #208]
+	ldr	r10, [sp, #212]
+	adds	r7, r0, r4
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	ldr	r9, [sp, #216]
+	adcs	r10, r11, r10
+	ldr	r6, [sp, #220]
+	mul	r1, r0, r7
+	adcs	r11, r5, r9
+	ldr	r5, [sp, #72]                   @ 4-byte Reload
+	ldr	r9, [sp, #132]                  @ 4-byte Reload
+	adcs	r6, r5, r6
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #272]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #268]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #264]
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #260]
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #256]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldm	lr, {r0, r1, r2, r3, r12, lr}
+	adcs	r0, r5, r0
+	ldr	r8, [sp, #252]
+	ldr	r4, [sp, #248]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #64]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r2, [sp, #80]                   @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r4
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r8, r0, r8
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #1292]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1288]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1284]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1280]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1276]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r8, r9, r10}
-	ldr	r7, [sp, #1224]
-	ldr	r6, [sp, #124]          @ 4-byte Reload
-	ldr	r0, [sp, #1228]
-	ldr	r1, [sp, #1232]
-	ldr	r2, [sp, #1236]
-	ldr	r3, [sp, #1240]
-	ldr	r12, [sp, #1244]
-	ldr	lr, [sp, #1248]
-	ldr	r4, [sp, #1252]
-	ldr	r5, [sp, #1256]
-	ldr	r11, [sp, #1260]
-	adds	r7, r6, r7
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	str	r7, [sp, #32]           @ 4-byte Spill
-	adcs	r0, r6, r0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #56]                   @ 4-byte Reload
+	mov	r1, r9
+	adc	r5, r0, #0
+	add	r0, sp, #136
+	bl	mulPv512x32
+	add	r3, sp, #136
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r7, r0
+	ldr	r7, [r9]
+	adcs	lr, r10, r1
+	str	lr, [sp, #96]                   @ 4-byte Spill
+	adcs	r12, r11, r2
+	str	r12, [sp, #88]                  @ 4-byte Spill
+	adcs	r6, r6, r3
+	str	r6, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #152]
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r2, [r9, #4]
+	adcs	r4, r1, r0
+	str	r4, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #156]
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #160]
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #164]
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #168]
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #172]
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #176]
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #180]
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	adcs	r0, r8, r0
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #184]
+	adcs	r0, r1, r0
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #188]
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #48]                   @ 4-byte Spill
+	ldr	r0, [sp, #192]
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #44]                   @ 4-byte Spill
+	ldr	r0, [sp, #196]
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #200]
+	mov	r1, r9
+	adc	r3, r5, r0
+	subs	r7, lr, r7
+	ldr	r0, [r9, #8]
+	sbcs	r2, r12, r2
+	ldr	r5, [r9, #12]
+	add	lr, r9, #32
+	str	r2, [sp, #84]                   @ 4-byte Spill
+	sbcs	r2, r6, r0
+	str	r2, [sp, #76]                   @ 4-byte Spill
+	sbcs	r2, r4, r5
+	str	r2, [sp, #68]                   @ 4-byte Spill
+	add	r9, r9, #44
+	ldr	r2, [r1, #28]
+	ldr	r0, [r1, #24]
+	ldr	r12, [r1, #20]
+	ldr	r1, [r1, #16]
+	ldr	r4, [sp, #128]                  @ 4-byte Reload
+	str	r7, [sp, #92]                   @ 4-byte Spill
+	sbcs	r1, r4, r1
+	ldm	lr, {r10, r11, lr}
+	ldm	r9, {r5, r6, r7, r8, r9}
+	str	r1, [sp, #132]                  @ 4-byte Spill
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	sbcs	r1, r1, r12
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	sbcs	r0, r1, r0
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	sbcs	r0, r1, r2
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	sbcs	r10, r1, r10
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	sbcs	r12, r1, r11
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	sbcs	r4, r0, lr
+	ldr	lr, [sp, #52]                   @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	sbcs	r5, lr, r5
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	sbcs	r6, r2, r6
+	sbcs	r7, r1, r7
+	sbcs	r8, r0, r8
+	sbc	r11, r3, r9
+	ldr	r9, [sp, #100]                  @ 4-byte Reload
+	cmn	r11, #1
+	movle	r8, r0
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	movle	r7, r1
+	movgt	r3, r11
+	cmn	r11, #1
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	movle	r4, r0
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	movle	r6, r2
+	movle	r5, lr
+	cmn	r11, #1
+	str	r3, [r9, #60]
+	movle	r12, r0
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	str	r8, [r9, #56]
+	str	r7, [r9, #52]
+	movle	r10, r0
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	str	r6, [r9, #48]
+	str	r5, [r9, #44]
+	movle	r1, r0
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	str	r1, [r9, #28]
+	cmn	r11, #1
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	movle	r1, r0
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	str	r1, [r9, #24]
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	movle	r1, r0
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	str	r1, [r9, #20]
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	movle	r1, r0
+	cmn	r11, #1
+	str	r1, [r9, #16]
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	str	r4, [r9, #40]
+	movle	r0, r1
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	str	r0, [r9, #12]
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	movle	r0, r1
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	str	r0, [r9, #8]
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	movle	r0, r1
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	str	r0, [r9, #4]
+	cmn	r11, #1
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	movle	r0, r1
+	str	r12, [r9, #36]
+	str	r10, [r9, #32]
+	str	r0, [r9]
+	add	sp, sp, #396
+	add	sp, sp, #2048
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end78:
+	.size	mcl_fp_montNF16L, .Lfunc_end78-mcl_fp_montNF16L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_montRed16L               @ -- Begin function mcl_fp_montRed16L
+	.p2align	2
+	.type	mcl_fp_montRed16L,%function
+	.code	32                              @ @mcl_fp_montRed16L
+mcl_fp_montRed16L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#372
+	sub	sp, sp, #372
+	.pad	#1024
+	sub	sp, sp, #1024
+	str	r0, [sp, #224]                  @ 4-byte Spill
+	mov	r3, r2
+	ldr	r0, [r2]
+	add	lr, sp, #1024
+	str	r0, [sp, #216]                  @ 4-byte Spill
+	ldr	r0, [r2, #4]
+	str	r0, [sp, #212]                  @ 4-byte Spill
+	ldr	r0, [r2, #8]
+	str	r0, [sp, #208]                  @ 4-byte Spill
+	ldr	r0, [r2, #12]
+	str	r0, [sp, #192]                  @ 4-byte Spill
+	ldr	r0, [r2, #16]
+	str	r0, [sp, #196]                  @ 4-byte Spill
+	ldr	r0, [r2, #20]
+	str	r0, [sp, #200]                  @ 4-byte Spill
+	ldr	r0, [r2, #24]
+	str	r0, [sp, #204]                  @ 4-byte Spill
+	ldr	r0, [r1, #4]
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [r1, #8]
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [r1, #12]
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [r3, #60]
+	str	r0, [sp, #220]                  @ 4-byte Spill
+	ldr	r0, [r3, #28]
+	str	r0, [sp, #160]                  @ 4-byte Spill
+	ldr	r0, [r3, #32]
+	str	r0, [sp, #164]                  @ 4-byte Spill
+	ldr	r0, [r3, #36]
+	str	r0, [sp, #168]                  @ 4-byte Spill
+	ldr	r0, [r3, #40]
+	str	r0, [sp, #172]                  @ 4-byte Spill
+	ldr	r0, [r3, #44]
+	str	r0, [sp, #176]                  @ 4-byte Spill
+	ldr	r0, [r3, #48]
+	str	r0, [sp, #180]                  @ 4-byte Spill
+	ldr	r0, [r3, #52]
+	str	r0, [sp, #184]                  @ 4-byte Spill
+	ldr	r0, [r3, #56]
+	str	r0, [sp, #188]                  @ 4-byte Spill
+	ldr	r0, [r1, #32]
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [r1, #36]
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [r1, #40]
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [r1, #44]
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [r1, #48]
+	ldr	r7, [r2, #-4]
+	ldr	r10, [r1]
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [r1, #52]
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	mul	r2, r10, r7
+	ldr	r0, [r1, #56]
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [r1, #60]
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [r1, #28]
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [r1, #24]
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [r1, #20]
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [r1, #16]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	add	r0, lr, #296
+	str	r1, [sp, #236]                  @ 4-byte Spill
+	mov	r1, r3
+	str	r7, [sp, #228]                  @ 4-byte Spill
+	str	r3, [sp, #232]                  @ 4-byte Spill
+	bl	mulPv512x32
+	ldr	r0, [sp, #1380]
+	add	r11, sp, #1344
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #1376]
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [sp, #1372]
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #1320]
+	ldr	r1, [sp, #1324]
+	adds	r0, r10, r0
+	ldr	r2, [sp, #1328]
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	ldm	r11, {r4, r5, r6, r7, r8, r9, r11}
+	adcs	r10, r0, r1
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	ldr	lr, [sp, #1340]
 	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	ldr	r12, [sp, #1336]
+	ldr	r3, [sp, #1332]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #148]                  @ 4-byte Reload
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	ldr	r5, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	mul	r2, r7, r5
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	mrs	r0, apsr
+	ldr	r6, [sp, #228]                  @ 4-byte Reload
+	ldr	r7, [sp, #236]                  @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	mul	r2, r6, r10
+	ldr	r8, [sp, #232]                  @ 4-byte Reload
+	ldr	r0, [r7, #64]
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #1384]
+	mov	r1, r8
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	add	r0, sp, #1248
+	bl	mulPv512x32
+	ldr	r0, [sp, #1304]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #1300]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r5, [sp, #1248]
+	ldr	r4, [sp, #1252]
+	ldr	r2, [sp, #68]                   @ 4-byte Reload
+	adds	r5, r10, r5
+	ldr	r0, [sp, #1256]
+	adcs	r9, r2, r4
+	ldr	r4, [sp, #148]                  @ 4-byte Reload
+	ldr	r1, [sp, #1260]
+	adcs	r0, r4, r0
+	ldr	r4, [sp, #72]                   @ 4-byte Reload
+	ldr	lr, [sp, #1296]
+	ldr	r12, [sp, #1292]
+	adcs	r1, r4, r1
+	ldr	r11, [sp, #1288]
+	ldr	r3, [sp, #1284]
+	ldr	r5, [sp, #1280]
+	ldr	r2, [sp, #1276]
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #1272]
+	str	r1, [sp, #144]                  @ 4-byte Spill
+	ldr	r1, [sp, #1264]
+	ldr	r4, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r4, r1
+	str	r1, [sp, #140]                  @ 4-byte Spill
+	ldr	r1, [sp, #1268]
+	ldr	r4, [sp, #156]                  @ 4-byte Reload
+	adcs	r1, r4, r1
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r6, r9
+	adcs	r0, r0, r5
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	mov	r11, r7
+	adcs	r0, r0, r12
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #1024
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #152]                  @ 4-byte Reload
+	ldr	r3, [sp, #92]                   @ 4-byte Reload
+	ldr	r0, [sp, #1312]
+	adcs	r1, r1, r3
+	str	r1, [sp, #96]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	add	r0, sp, #1152
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1220]
-	add	r11, sp, #1152
-	ldr	r4, [sp, #1176]
-	ldr	r6, [sp, #1172]
-	ldr	r7, [sp, #1168]
-	add	lr, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1216]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1212]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1208]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1204]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1200]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1196]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1192]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1188]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1184]
-	str	r0, [sp, #12]           @ 4-byte Spill
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r7, #68]
+	mov	r1, r8
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #1308]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	add	r0, lr, #152
+	bl	mulPv512x32
+	ldr	r0, [sp, #1228]
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #1224]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r7, [sp, #1176]
 	ldr	r0, [sp, #1180]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #40]
-	add	r0, lr, #56
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	adds	r0, r0, r8
-	ldr	r8, [sp, #1092]
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r2, r0, r9
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #1120
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	ldr	r2, [sp, #148]                  @ 4-byte Reload
+	adds	r4, r9, r7
+	ldr	r1, [sp, #1184]
+	adcs	r4, r2, r0
+	ldr	r2, [sp, #144]                  @ 4-byte Reload
+	ldr	r10, [sp, #1188]
+	adcs	r1, r2, r1
+	ldr	r2, [sp, #140]                  @ 4-byte Reload
+	ldr	lr, [sp, #1220]
+	ldr	r12, [sp, #1216]
+	adcs	r2, r2, r10
+	ldr	r6, [sp, #1212]
+	ldr	r5, [sp, #1208]
+	ldr	r7, [sp, #1204]
+	ldr	r0, [sp, #1200]
+	str	r1, [sp, #148]                  @ 4-byte Spill
+	ldr	r1, [sp, #1196]
+	str	r2, [sp, #144]                  @ 4-byte Spill
+	ldr	r2, [sp, #1192]
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
+	adcs	r2, r3, r2
+	str	r2, [sp, #140]                  @ 4-byte Spill
+	ldr	r2, [sp, #132]                  @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
 	adcs	r0, r0, r7
-	ldr	r7, [sp, #1084]
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r6
-	ldr	r6, [sp, #1088]
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	ldr	r4, [sp, #1080]
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #56]           @ 4-byte Spill
-	adds	r0, r2, r4
-	mul	r1, r0, r5
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #1148]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #1144]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1140]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1136]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r9, r10}
-	ldr	r11, [sp, #120]         @ 4-byte Reload
-	ldr	r0, [sp, #1096]
-	ldr	r1, [sp, #1100]
-	ldr	r2, [sp, #1104]
-	ldr	r3, [sp, #1108]
-	ldr	r12, [sp, #1112]
-	ldr	lr, [sp, #1116]
-	adcs	r7, r11, r7
-	str	r7, [sp, #120]          @ 4-byte Spill
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	adcs	r6, r7, r6
-	str	r6, [sp, #116]          @ 4-byte Spill
-	ldr	r6, [sp, #112]          @ 4-byte Reload
-	adcs	r6, r6, r8
-	str	r6, [sp, #112]          @ 4-byte Spill
-	ldr	r6, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #152]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	ldr	r0, [sp, #1240]
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	ldr	r5, [sp, #228]                  @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r11, #72]
+	mul	r2, r5, r4
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #1236]
+	mov	r1, r8
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #1232]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	add	r0, sp, #1104
+	bl	mulPv512x32
+	ldr	r7, [sp, #1104]
+	ldr	r0, [sp, #1108]
+	adds	r4, r4, r7
+	ldr	r1, [sp, #1112]
+	ldr	r4, [sp, #148]                  @ 4-byte Reload
+	ldr	r10, [sp, #144]                 @ 4-byte Reload
+	adcs	r9, r4, r0
+	ldr	r11, [sp, #1116]
+	ldr	r4, [sp, #140]                  @ 4-byte Reload
+	adcs	r1, r10, r1
+	ldr	r8, [sp, #1152]
+	ldr	lr, [sp, #1148]
+	adcs	r4, r4, r11
+	ldr	r12, [sp, #1144]
+	ldr	r3, [sp, #1140]
+	ldr	r6, [sp, #1136]
+	ldr	r2, [sp, #1132]
+	ldr	r7, [sp, #1128]
+	ldr	r0, [sp, #1124]
+	str	r1, [sp, #148]                  @ 4-byte Spill
+	ldr	r1, [sp, #1120]
+	str	r4, [sp, #144]                  @ 4-byte Spill
+	ldr	r4, [sp, #136]                  @ 4-byte Reload
+	adcs	r1, r4, r1
+	str	r1, [sp, #140]                  @ 4-byte Spill
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r5, r9
+	adcs	r0, r0, r6
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	add	lr, sp, #1024
+	adcs	r0, r0, r8
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	ldr	r0, [sp, #1168]
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	ldr	r10, [sp, #236]                 @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	ldr	r7, [sp, #232]                  @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #1008
-	bl	.LmulPv544x32(PLT)
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r10, #76]
+	mov	r1, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #1164]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #1160]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #1156]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	add	r0, lr, #8
+	bl	mulPv512x32
 	ldr	r0, [sp, #1076]
-	add	r11, sp, #1008
-	ldr	r4, [sp, #1036]
+	add	lr, sp, #1056
+	str	r0, [sp, #152]                  @ 4-byte Spill
 	ldr	r5, [sp, #1032]
-	ldr	r6, [sp, #1028]
-	ldr	r7, [sp, #1024]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #1072]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #1068]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #1064]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1060]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1056]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1052]
-	str	r0, [sp, #24]           @ 4-byte Spill
+	ldr	r0, [sp, #1036]
+	adds	r4, r9, r5
+	ldr	r1, [sp, #1040]
+	ldr	r4, [sp, #148]                  @ 4-byte Reload
+	ldr	r6, [sp, #144]                  @ 4-byte Reload
+	adcs	r4, r4, r0
+	ldr	r11, [sp, #1072]
+	adcs	r1, r6, r1
+	ldr	r8, [sp, #1068]
+	ldm	lr, {r3, r12, lr}
+	ldr	r2, [sp, #1044]
+	ldr	r5, [sp, #1052]
 	ldr	r0, [sp, #1048]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1044]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1040]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r8, r9, r10, r11}
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	ldr	r2, [r0, #44]
-	add	r0, sp, #936
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #952
-	adds	r0, r0, r8
-	add	r8, sp, #936
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r2, r0, r9
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #976
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	str	r1, [sp, #148]                  @ 4-byte Spill
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #144]                  @ 4-byte Spill
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	ldr	r1, [sp, #152]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	mov	r11, r10
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldm	r8, {r4, r6, r7, r8}
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adds	r1, r2, r4
-	mul	r2, r1, r0
-	ldr	r0, [sp, #1004]
-	str	r1, [sp, #124]          @ 4-byte Spill
-	str	r2, [sp, #24]           @ 4-byte Spill
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1000]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #996]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #992]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r5, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #120]         @ 4-byte Reload
-	adcs	r6, r11, r6
-	str	r6, [sp, #76]           @ 4-byte Spill
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	adcs	r6, r6, r7
-	str	r6, [sp, #72]           @ 4-byte Spill
-	ldr	r6, [sp, #112]          @ 4-byte Reload
-	adcs	r6, r6, r8
-	str	r6, [sp, #68]           @ 4-byte Spill
-	ldr	r6, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	ldr	r0, [sp, #1096]
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	ldr	r6, [sp, #228]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	mul	r2, r6, r4
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r10, #80]
+	mov	r1, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #1092]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #1088]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #1084]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #1080]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	add	r0, sp, #960
+	bl	mulPv512x32
+	ldr	r5, [sp, #960]
+	add	r2, sp, #964
+	add	lr, sp, #984
+	ldr	r9, [sp, #1000]
+	adds	r4, r4, r5
+	ldm	r2, {r0, r1, r2}
+	ldr	r4, [sp, #148]                  @ 4-byte Reload
+	ldr	r8, [sp, #996]
+	adcs	r10, r4, r0
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	ldm	lr, {r3, r12, lr}
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
+	ldr	r7, [sp, #980]
+	ldr	r5, [sp, #976]
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
 	adcs	r0, r0, r2
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	mul	r2, r6, r10
+	adcs	r0, r0, r5
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	mrs	r0, apsr
+	mov	r9, r6
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	ldr	r0, [sp, #1024]
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	ldr	r4, [sp, #232]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	add	r0, sp, #864
-	bl	.LmulPv544x32(PLT)
-	ldr	r1, [sp, #932]
-	ldr	r5, [sp, #892]
-	ldr	r7, [sp, #888]
-	ldr	r4, [sp, #884]
-	ldr	r9, [sp, #880]
-	ldr	r8, [sp, #864]
-	ldr	r11, [sp, #868]
-	ldr	r10, [sp, #872]
-	ldr	r6, [sp, #876]
-	add	r0, sp, #792
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #928]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #924]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #920]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #916]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #912]
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #908]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #904]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #900]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #896]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r1, #48]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #796
-	adds	r0, r0, r8
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	add	r10, sp, #820
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #860]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #856]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #852]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #848]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #844]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #840]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r7, [sp, #792]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #124]         @ 4-byte Reload
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	adds	r7, r11, r7
-	adcs	r0, r6, r0
-	str	r7, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r11, #84]
+	mov	r1, r4
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #1020]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #1016]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #1012]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #1008]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	add	r0, sp, #888
+	ldr	r11, [sp, #1004]
+	bl	mulPv512x32
+	ldr	r6, [sp, #888]
+	add	r7, sp, #892
+	add	lr, sp, #912
+	ldr	r8, [sp, #924]
+	adds	r6, r10, r6
+	ldm	r7, {r0, r1, r2, r5, r7}
+	ldr	r6, [sp, #148]                  @ 4-byte Reload
+	ldm	lr, {r3, r12, lr}
+	adcs	r10, r6, r0
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	mul	r2, r9, r10
+	adcs	r0, r0, r5
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r0, [sp, #952]
+	adcs	r1, r1, r11
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	ldr	r6, [sp, #236]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r6, #88]
+	mov	r1, r4
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #948]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #944]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #940]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #936]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	add	r0, sp, #816
+	ldr	r11, [sp, #932]
+	ldr	r8, [sp, #928]
+	bl	mulPv512x32
+	ldr	r4, [sp, #816]
+	add	r5, sp, #824
+	ldr	r7, [sp, #820]
+	adds	r4, r10, r4
+	ldm	r5, {r0, r1, r5}
+	ldr	r4, [sp, #148]                  @ 4-byte Reload
+	ldr	r12, [sp, #848]
+	adcs	r4, r4, r7
+	ldr	r7, [sp, #144]                  @ 4-byte Reload
+	ldr	lr, [sp, #844]
+	adcs	r0, r7, r0
+	ldr	r2, [sp, #840]
+	ldr	r3, [sp, #836]
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	mul	r2, r9, r4
+	adcs	r0, r0, lr
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r0, [sp, #880]
+	adcs	r1, r1, r8
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r11
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r11, [sp, #232]                 @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #720
-	bl	.LmulPv544x32(PLT)
-	ldr	r1, [sp, #788]
-	add	r11, sp, #728
-	ldr	r5, [sp, #748]
-	ldr	r9, [sp, #744]
-	ldr	r10, [sp, #720]
-	ldr	r6, [sp, #724]
-	add	r0, sp, #648
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #784]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #780]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #776]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #772]
-	str	r1, [sp, #36]           @ 4-byte Spill
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r6, #92]
+	mov	r1, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #876]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #872]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #868]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #864]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #860]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #852]
+	ldr	r10, [sp, #856]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	add	r0, sp, #744
+	bl	mulPv512x32
+	add	r7, sp, #744
+	ldr	r12, [sp, #772]
 	ldr	r1, [sp, #768]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #764]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #760]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #756]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #752]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r7, r8, r11}
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r1, #52]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #652
-	adds	r0, r0, r10
-	add	r10, sp, #676
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
+	ldm	r7, {r5, r6, r7}
+	adds	r5, r4, r5
+	ldr	r2, [sp, #764]
+	ldr	r5, [sp, #148]                  @ 4-byte Reload
+	ldr	r3, [sp, #760]
+	adcs	r8, r5, r6
+	ldr	r6, [sp, #144]                  @ 4-byte Reload
+	ldr	r0, [sp, #756]
+	adcs	r7, r6, r7
+	str	r7, [sp, #148]                  @ 4-byte Spill
+	ldr	r7, [sp, #140]                  @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r9, r8
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #84]           @ 4-byte Spill
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	ldr	r0, [sp, #808]
+	adcs	r1, r1, r3
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r10, [sp, #236]                 @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r10, #96]
+	mov	r1, r11
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #804]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #800]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #796]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #792]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #788]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #784]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #780]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	add	r0, sp, #672
+	ldr	r9, [sp, #776]
+	bl	mulPv512x32
+	add	r7, sp, #672
+	ldr	r0, [sp, #696]
+	ldr	r1, [sp, #692]
+	ldm	r7, {r3, r5, r6, r7}
+	adds	r3, r8, r3
+	ldr	r2, [sp, #688]
+	ldr	r3, [sp, #148]                  @ 4-byte Reload
+	adcs	r8, r3, r5
+	ldr	r3, [sp, #144]                  @ 4-byte Reload
+	adcs	r3, r3, r6
+	str	r3, [sp, #148]                  @ 4-byte Spill
+	ldr	r3, [sp, #140]                  @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #144]                  @ 4-byte Spill
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
+	adcs	r2, r3, r2
+	str	r2, [sp, #140]                  @ 4-byte Spill
+	ldr	r2, [sp, #132]                  @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	ldr	r0, [sp, #736]
+	adcs	r1, r1, r9
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	ldr	r4, [sp, #228]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	mul	r2, r4, r8
+	ldr	r7, [sp, #232]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r10, #100]
+	mov	r1, r7
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #732]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #728]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #724]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #720]
+	str	r0, [sp, #72]                   @ 4-byte Spill
 	ldr	r0, [sp, #716]
-	str	r0, [sp, #52]           @ 4-byte Spill
+	str	r0, [sp, #68]                   @ 4-byte Spill
 	ldr	r0, [sp, #712]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #708]
-	str	r0, [sp, #44]           @ 4-byte Spill
+	str	r0, [sp, #64]                   @ 4-byte Spill
 	ldr	r0, [sp, #704]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #700]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #696]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r7, [sp, #648]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #80]          @ 4-byte Reload
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	adds	r7, r11, r7
-	adcs	r0, r6, r0
-	str	r7, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #576
-	bl	.LmulPv544x32(PLT)
-	ldr	r1, [sp, #644]
-	add	r11, sp, #584
-	ldr	r5, [sp, #604]
-	ldr	r9, [sp, #600]
-	ldr	r10, [sp, #576]
-	ldr	r6, [sp, #580]
-	add	r0, sp, #504
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #640]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #636]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #632]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #628]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #624]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #620]
-	str	r1, [sp, #24]           @ 4-byte Spill
+	ldr	r10, [sp, #708]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	add	r0, sp, #600
+	ldr	r9, [sp, #700]
+	bl	mulPv512x32
+	add	r6, sp, #600
+	ldr	r0, [sp, #620]
 	ldr	r1, [sp, #616]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #612]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #608]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r7, r8, r11}
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r1, #56]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #508
-	adds	r0, r0, r10
-	add	r10, sp, #532
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #572]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #568]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #564]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #560]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #556]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #552]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r7, [sp, #504]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #80]          @ 4-byte Reload
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	adds	r7, r11, r7
-	adcs	r0, r6, r0
-	str	r7, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	ldm	r6, {r2, r3, r5, r6}
+	adds	r2, r8, r2
+	ldr	r2, [sp, #148]                  @ 4-byte Reload
+	adcs	r11, r2, r3
+	ldr	r2, [sp, #144]                  @ 4-byte Reload
+	adcs	r2, r2, r5
+	str	r2, [sp, #148]                  @ 4-byte Spill
+	ldr	r2, [sp, #140]                  @ 4-byte Reload
+	adcs	r2, r2, r6
+	str	r2, [sp, #144]                  @ 4-byte Spill
+	ldr	r2, [sp, #136]                  @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #140]                  @ 4-byte Spill
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	mul	r2, r4, r11
+	adcs	r0, r1, r0
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r0, [sp, #664]
+	adcs	r1, r1, r9
+	str	r1, [sp, #156]                  @ 4-byte Spill
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #132]                  @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r10, [sp, #236]                 @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [r10, #104]
+	mov	r1, r7
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #660]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #656]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #652]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #648]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #644]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #640]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #636]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	add	r0, sp, #528
+	ldr	r8, [sp, #632]
+	ldr	r9, [sp, #628]
+	ldr	r5, [sp, #624]
+	bl	mulPv512x32
+	add	r6, sp, #528
+	ldr	r0, [sp, #544]
+	ldm	r6, {r1, r2, r3, r6}
+	adds	r1, r11, r1
+	ldr	r1, [sp, #148]                  @ 4-byte Reload
+	adcs	r4, r1, r2
+	ldr	r1, [sp, #144]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adcs	r1, r1, r6
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #156]                  @ 4-byte Reload
+	ldr	r0, [sp, #592]
+	adcs	r1, r1, r5
+	str	r1, [sp, #152]                  @ 4-byte Spill
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r9
+	str	r1, [sp, #144]                  @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	ldr	r7, [sp, #228]                  @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #140]                  @ 4-byte Spill
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	mul	r2, r7, r4
+	ldr	r6, [sp, #232]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #132]                  @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [r10, #108]
+	mov	r1, r6
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #588]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #584]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #580]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #576]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #572]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #568]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #564]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	add	r0, sp, #456
+	ldr	r11, [sp, #560]
+	ldr	r8, [sp, #556]
+	ldr	r9, [sp, #552]
+	ldr	r5, [sp, #548]
+	bl	mulPv512x32
+	add	r3, sp, #456
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r4, r0, r1
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	mul	r2, r7, r4
 	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #432
-	bl	.LmulPv544x32(PLT)
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #148]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r3, [sp, #152]                  @ 4-byte Reload
+	ldr	r1, [sp, #520]
+	add	r0, sp, #384
+	adcs	r3, r3, r5
+	str	r3, [sp, #152]                  @ 4-byte Spill
+	ldr	r3, [sp, #144]                  @ 4-byte Reload
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	adcs	r3, r3, r9
+	str	r3, [sp, #148]                  @ 4-byte Spill
+	ldr	r3, [sp, #140]                  @ 4-byte Reload
+	adcs	r3, r3, r8
+	str	r3, [sp, #144]                  @ 4-byte Spill
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
+	adcs	r3, r3, r11
+	str	r3, [sp, #140]                  @ 4-byte Spill
+	ldr	r3, [sp, #132]                  @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #136]                  @ 4-byte Spill
+	ldr	r3, [sp, #128]                  @ 4-byte Reload
+	ldr	r7, [sp, #60]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #132]                  @ 4-byte Spill
+	ldr	r3, [sp, #124]                  @ 4-byte Reload
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #128]                  @ 4-byte Spill
+	ldr	r3, [sp, #120]                  @ 4-byte Reload
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #124]                  @ 4-byte Spill
+	ldr	r3, [sp, #116]                  @ 4-byte Reload
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #120]                  @ 4-byte Spill
+	ldr	r3, [sp, #112]                  @ 4-byte Reload
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #116]                  @ 4-byte Spill
+	ldr	r3, [sp, #108]                  @ 4-byte Reload
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #112]                  @ 4-byte Spill
+	ldr	r3, [sp, #104]                  @ 4-byte Reload
+	ldr	r7, [sp, #100]                  @ 4-byte Reload
+	adcs	r3, r7, r3
+	str	r3, [sp, #108]                  @ 4-byte Spill
+	adc	r1, r1, #0
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [r10, #112]
+	mov	r7, r6
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #516]
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #512]
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #508]
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #504]
+	str	r1, [sp, #64]                   @ 4-byte Spill
 	ldr	r1, [sp, #500]
-	add	r11, sp, #440
-	ldr	r5, [sp, #460]
-	ldr	r9, [sp, #456]
-	ldr	r10, [sp, #432]
-	ldr	r6, [sp, #436]
-	add	r0, sp, #360
-	str	r1, [sp, #52]           @ 4-byte Spill
+	str	r1, [sp, #56]                   @ 4-byte Spill
 	ldr	r1, [sp, #496]
-	str	r1, [sp, #48]           @ 4-byte Spill
+	str	r1, [sp, #52]                   @ 4-byte Spill
 	ldr	r1, [sp, #492]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #488]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #484]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #480]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #476]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #472]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #468]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #464]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r11, {r4, r7, r8, r11}
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [r1, #60]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #364
-	adds	r0, r0, r10
-	add	r10, sp, #388
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #428]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #424]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #420]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #416]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #412]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #408]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r8, r9, r10}
-	ldr	r7, [sp, #360]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #80]          @ 4-byte Reload
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	adds	r7, r11, r7
-	adcs	r0, r6, r0
-	str	r7, [sp, #28]           @ 4-byte Spill
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, sp, #288
-	bl	.LmulPv544x32(PLT)
-	ldr	r1, [sp, #356]
-	add	r8, sp, #288
-	ldr	r9, [sp, #316]
-	ldr	r10, [sp, #312]
-	ldr	r11, [sp, #308]
-	ldr	r6, [sp, #304]
-	add	r0, sp, #216
-	str	r1, [sp, #52]           @ 4-byte Spill
-	ldr	r1, [sp, #352]
-	str	r1, [sp, #48]           @ 4-byte Spill
-	ldr	r1, [sp, #348]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #344]
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #340]
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #336]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #332]
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [sp, #328]
-	str	r1, [sp, #20]           @ 4-byte Spill
-	ldr	r1, [sp, #324]
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldr	r1, [sp, #320]
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldm	r8, {r4, r5, r8}
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r7, [sp, #300]
-	ldr	r2, [r1, #64]
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	add	lr, sp, #232
-	adds	r0, r0, r4
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r2, r0, r5
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	add	r9, sp, #216
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adc	r0, r0, r1
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldm	r9, {r4, r8, r9}
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	ldr	r7, [sp, #228]
-	ldr	r5, [sp, #260]
-	adds	r11, r2, r4
-	ldr	r4, [sp, #256]
-	mul	r1, r11, r0
-	ldr	r0, [sp, #284]
-	str	r1, [sp, #64]           @ 4-byte Spill
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #280]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #276]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #272]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #268]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #264]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r10, [sp, #136]         @ 4-byte Reload
-	ldr	r6, [sp, #132]          @ 4-byte Reload
-	adcs	r8, r10, r8
-	ldr	r10, [sp, #140]         @ 4-byte Reload
-	adcs	r9, r6, r9
-	ldr	r6, [sp, #80]           @ 4-byte Reload
-	adcs	r7, r6, r7
-	ldr	r6, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r6, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r10
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	add	r0, sp, #144
-	bl	.LmulPv544x32(PLT)
-	add	r3, sp, #144
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	mov	r1, r6
+	ldr	r8, [sp, #488]
+	ldr	r9, [sp, #484]
+	ldr	r10, [sp, #480]
+	ldr	r5, [sp, #476]
+	ldr	r11, [sp, #472]
+	bl	mulPv512x32
+	add	r3, sp, #384
 	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r11, r0
-	adcs	r4, r8, r1
-	ldr	r0, [sp, #160]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r8, r9, r2
-	str	r4, [sp, #52]           @ 4-byte Spill
-	adcs	r9, r7, r3
-	mov	r3, r10
-	str	r8, [sp, #60]           @ 4-byte Spill
-	str	r9, [sp, #64]           @ 4-byte Spill
-	adcs	r5, r1, r0
-	ldr	r0, [sp, #164]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r5, [sp, #68]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #168]
-	adcs	lr, r1, r0
-	ldr	r0, [sp, #172]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	lr, [sp, #48]           @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #176]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #180]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #184]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #188]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #192]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #196]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #200]
-	adcs	r0, r6, r0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #204]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #208]
+	adds	r0, r4, r0
+	str	r3, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r4, r0, r1
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r3, [sp, #152]                  @ 4-byte Reload
+	ldr	r1, [sp, #448]
+	add	r0, sp, #312
+	adcs	r3, r3, r11
+	str	r3, [sp, #60]                   @ 4-byte Spill
+	ldr	r3, [sp, #148]                  @ 4-byte Reload
+	ldr	r6, [sp, #228]                  @ 4-byte Reload
+	adcs	r11, r3, r5
+	ldr	r3, [sp, #144]                  @ 4-byte Reload
+	ldr	r5, [sp, #48]                   @ 4-byte Reload
+	adcs	r10, r3, r10
+	ldr	r3, [sp, #140]                  @ 4-byte Reload
+	mul	r2, r6, r4
+	adcs	r3, r3, r9
+	str	r3, [sp, #152]                  @ 4-byte Spill
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
+	adcs	r8, r3, r8
+	ldr	r3, [sp, #132]                  @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #148]                  @ 4-byte Spill
+	ldr	r3, [sp, #128]                  @ 4-byte Reload
+	ldr	r5, [sp, #52]                   @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #144]                  @ 4-byte Spill
+	ldr	r3, [sp, #124]                  @ 4-byte Reload
+	ldr	r5, [sp, #56]                   @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #140]                  @ 4-byte Spill
+	ldr	r3, [sp, #120]                  @ 4-byte Reload
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #136]                  @ 4-byte Spill
+	ldr	r3, [sp, #116]                  @ 4-byte Reload
+	ldr	r5, [sp, #68]                   @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #132]                  @ 4-byte Spill
+	ldr	r3, [sp, #112]                  @ 4-byte Reload
+	ldr	r5, [sp, #72]                   @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #128]                  @ 4-byte Spill
+	ldr	r3, [sp, #108]                  @ 4-byte Reload
+	ldr	r5, [sp, #80]                   @ 4-byte Reload
+	adcs	r3, r3, r5
+	str	r3, [sp, #124]                  @ 4-byte Spill
+	ldr	r3, [sp, #104]                  @ 4-byte Reload
+	ldr	r5, [sp, #88]                   @ 4-byte Reload
+	adcs	r3, r5, r3
+	ldr	r5, [sp, #236]                  @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	str	r3, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [r5, #116]
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #444]
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #440]
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #436]
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #432]
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #428]
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #424]
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #420]
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #416]
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r1, [sp, #412]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #408]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #404]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	mov	r1, r7
+	ldr	r9, [sp, #400]
+	bl	mulPv512x32
+	add	r2, sp, #312
+	ldm	r2, {r0, r1, r2}
+	adds	r0, r4, r0
+	str	r2, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r2, [sp, #324]
+	adcs	r0, r0, r1
+	str	r2, [sp, #68]                   @ 4-byte Spill
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	mrs	r1, apsr
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	mul	r2, r6, r0
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r1
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [r5, #120]
+	adcs	r7, r11, r9
+	str	r0, [sp, #228]                  @ 4-byte Spill
+	adcs	r3, r10, r3
+	ldr	r0, [sp, #372]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #368]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #364]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #360]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #356]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #352]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #348]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #344]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #340]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #336]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #332]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #328]
+	str	r0, [sp, #4]                    @ 4-byte Spill
+	add	r0, sp, #240
+	ldr	r1, [sp, #376]
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	ldr	r3, [sp, #152]                  @ 4-byte Reload
+	ldr	r6, [sp, #36]                   @ 4-byte Reload
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	adcs	r11, r3, r6
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	ldr	r6, [sp, #56]                   @ 4-byte Reload
+	adcs	r8, r8, r3
+	ldr	r3, [sp, #148]                  @ 4-byte Reload
+	ldr	r4, [sp, #92]                   @ 4-byte Reload
+	adcs	r6, r3, r6
+	ldr	r3, [sp, #144]                  @ 4-byte Reload
+	adcs	r9, r3, r5
+	ldr	r3, [sp, #140]                  @ 4-byte Reload
+	ldr	r5, [sp, #80]                   @ 4-byte Reload
+	adcs	r10, r3, r5
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
+	ldr	r5, [sp, #88]                   @ 4-byte Reload
+	adcs	r5, r3, r5
+	ldr	r3, [sp, #132]                  @ 4-byte Reload
+	adcs	r3, r3, r4
+	str	r3, [sp, #152]                  @ 4-byte Spill
+	ldr	r3, [sp, #128]                  @ 4-byte Reload
+	ldr	r4, [sp, #96]                   @ 4-byte Reload
+	adcs	r3, r3, r4
+	str	r3, [sp, #148]                  @ 4-byte Spill
+	ldr	r3, [sp, #124]                  @ 4-byte Reload
+	ldr	r4, [sp, #104]                  @ 4-byte Reload
+	adcs	r3, r3, r4
+	str	r3, [sp, #144]                  @ 4-byte Spill
+	ldr	r3, [sp, #120]                  @ 4-byte Reload
+	ldr	r4, [sp, #108]                  @ 4-byte Reload
+	adcs	r3, r3, r4
+	str	r3, [sp, #132]                  @ 4-byte Spill
+	ldr	r3, [sp, #116]                  @ 4-byte Reload
+	ldr	r4, [sp, #112]                  @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #128]                  @ 4-byte Spill
+	adc	r1, r1, #0
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #232]                  @ 4-byte Reload
+	bl	mulPv512x32
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	add	r2, sp, #244
+	msr	APSR_nzcvq, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r3, r1, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r7, r7, r0
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
+	adcs	r4, r1, r0
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	ldr	r1, [sp, #152]                  @ 4-byte Reload
+	adcs	r12, r11, r0
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	lr, r8, r0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r6, r6, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r0, r9, r0
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r0, r10, r0
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r10, [sp, #156]                 @ 4-byte Reload
+	adcs	r0, r5, r0
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
 	adcs	r0, r1, r0
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #212]
-	adc	r1, r1, r0
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldm	r3, {r0, r2, r7}
-	ldr	r6, [r3, #12]
-	ldr	r11, [r3, #36]
-	ldr	r10, [r3, #32]
-	subs	r12, r4, r0
-	ldr	r0, [r3, #64]
-	sbcs	r4, r8, r2
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	sbcs	r8, r9, r7
-	ldr	r7, [r3, #20]
-	sbcs	r9, r5, r6
-	ldr	r6, [r3, #24]
-	ldr	r5, [r3, #28]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r3, #56]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r3, #60]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r3, #16]
-	sbcs	r2, r2, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	sbcs	r3, lr, r7
-	ldr	r7, [sp, #56]           @ 4-byte Reload
-	sbcs	lr, r0, r6
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	sbcs	r5, r0, r5
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	sbcs	r6, r0, r10
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	sbcs	r11, r0, r11
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	sbcs	r0, r0, r7
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	sbcs	r0, r0, r7
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	sbcs	r0, r0, r7
-	ldr	r7, [sp, #80]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	sbcs	r0, r0, r7
-	ldr	r7, [sp, #84]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	sbcs	r0, r0, r7
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	sbcs	r0, r0, r7
-	ldr	r7, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	sbc	r10, r1, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	asr	r1, r10, #31
-	cmp	r1, #0
-	movlt	r4, r7
-	movlt	r12, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r12, [r0]
-	str	r4, [r0, #4]
-	ldr	r4, [sp, #64]           @ 4-byte Reload
-	movlt	r8, r4
-	ldr	r4, [sp, #68]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r8, [r0, #8]
-	movlt	r9, r4
-	ldr	r4, [sp, #96]           @ 4-byte Reload
-	str	r9, [r0, #12]
-	movlt	r2, r4
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	movlt	r3, r2
-	ldr	r2, [sp, #100]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #56]           @ 4-byte Reload
-	movlt	lr, r2
-	ldr	r2, [sp, #104]          @ 4-byte Reload
-	str	lr, [r0, #24]
-	movlt	r5, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	str	r5, [r0, #28]
-	movlt	r6, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r6, [r0, #32]
-	movlt	r11, r2
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r11, [r0, #36]
-	movlt	r3, r2
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	str	r3, [r0, #40]
-	ldr	r3, [sp, #72]           @ 4-byte Reload
-	movlt	r3, r2
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #44]
-	ldr	r3, [sp, #76]           @ 4-byte Reload
-	movlt	r3, r2
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	str	r3, [r0, #48]
-	ldr	r3, [sp, #80]           @ 4-byte Reload
-	movlt	r3, r2
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	str	r3, [r0, #52]
-	ldr	r3, [sp, #84]           @ 4-byte Reload
-	movlt	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	str	r3, [r0, #56]
-	movlt	r2, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r2, [r0, #60]
-	movlt	r10, r1
-	str	r10, [r0, #64]
-	add	sp, sp, #548
-	add	sp, sp, #2048
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #148]                  @ 4-byte Reload
+	adcs	r11, r1, r0
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #144]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #228]                  @ 4-byte Reload
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r5, [sp, #304]
+	adc	r0, r5, #0
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r5, [sp, #240]
+	ldm	r2, {r0, r1, r2}
+	adds	r5, r10, r5
+	adcs	r9, r3, r0
+	str	r9, [sp, #232]                  @ 4-byte Spill
+	adcs	r10, r7, r1
+	str	r10, [sp, #228]                 @ 4-byte Spill
+	adcs	r8, r4, r2
+	str	r8, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #256]
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adcs	r7, r12, r0
+	str	r7, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #260]
+	ldr	r3, [sp, #112]                  @ 4-byte Reload
+	adcs	r5, lr, r0
+	str	r5, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #264]
+	adcs	r6, r6, r0
+	str	r6, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [sp, #268]
+	adcs	r12, r1, r0
+	str	r12, [sp, #140]                 @ 4-byte Spill
+	ldr	r0, [sp, #272]
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	adcs	lr, r1, r0
+	str	lr, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #276]
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adcs	r4, r1, r0
+	str	r4, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #280]
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	adcs	r2, r1, r0
+	str	r2, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #284]
+	adcs	r1, r11, r0
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #288]
+	adcs	r11, r3, r0
+	ldr	r0, [sp, #292]
+	ldr	r3, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r3, r0
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #296]
+	ldr	r3, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r3, r0
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #300]
+	ldr	r3, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r3, r0
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #236]                  @ 4-byte Reload
+	ldr	r3, [sp, #128]                  @ 4-byte Reload
+	ldr	r0, [r0, #124]
+	adc	r0, r0, r3
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #216]                  @ 4-byte Reload
+	subs	r0, r9, r0
+	str	r0, [sp, #236]                  @ 4-byte Spill
+	ldr	r0, [sp, #212]                  @ 4-byte Reload
+	sbcs	r0, r10, r0
+	str	r0, [sp, #216]                  @ 4-byte Spill
+	ldr	r0, [sp, #208]                  @ 4-byte Reload
+	sbcs	r0, r8, r0
+	str	r0, [sp, #212]                  @ 4-byte Spill
+	ldr	r0, [sp, #192]                  @ 4-byte Reload
+	sbcs	r0, r7, r0
+	str	r0, [sp, #208]                  @ 4-byte Spill
+	ldr	r0, [sp, #196]                  @ 4-byte Reload
+	mov	r7, #0
+	sbcs	r0, r5, r0
+	str	r0, [sp, #196]                  @ 4-byte Spill
+	ldr	r0, [sp, #200]                  @ 4-byte Reload
+	ldr	r5, [sp, #120]                  @ 4-byte Reload
+	sbcs	r0, r6, r0
+	str	r0, [sp, #200]                  @ 4-byte Spill
+	ldr	r0, [sp, #204]                  @ 4-byte Reload
+	sbcs	r0, r12, r0
+	str	r0, [sp, #204]                  @ 4-byte Spill
+	ldr	r0, [sp, #160]                  @ 4-byte Reload
+	ldr	r12, [sp, #112]                 @ 4-byte Reload
+	sbcs	r0, lr, r0
+	str	r0, [sp, #192]                  @ 4-byte Spill
+	ldr	r0, [sp, #164]                  @ 4-byte Reload
+	ldr	lr, [sp, #124]                  @ 4-byte Reload
+	sbcs	r0, r4, r0
+	str	r0, [sp, #164]                  @ 4-byte Spill
+	ldr	r0, [sp, #168]                  @ 4-byte Reload
+	sbcs	r10, r2, r0
+	ldr	r0, [sp, #172]                  @ 4-byte Reload
+	ldr	r2, [sp, #132]                  @ 4-byte Reload
+	sbcs	r9, r1, r0
+	ldr	r0, [sp, #176]                  @ 4-byte Reload
+	sbcs	r6, r11, r0
+	ldr	r0, [sp, #180]                  @ 4-byte Reload
+	sbcs	r4, r5, r0
+	ldr	r0, [sp, #184]                  @ 4-byte Reload
+	sbcs	r3, lr, r0
+	ldr	r0, [sp, #188]                  @ 4-byte Reload
+	sbcs	r1, r12, r0
+	ldr	r0, [sp, #220]                  @ 4-byte Reload
+	sbcs	r0, r2, r0
+	sbc	r7, r7, #0
+	ands	r8, r7, #1
+	ldr	r7, [sp, #224]                  @ 4-byte Reload
+	movne	r0, r2
+	movne	r1, r12
+	movne	r3, lr
+	cmp	r8, #0
+	str	r0, [r7, #60]
+	movne	r4, r5
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	movne	r6, r11
+	str	r1, [r7, #56]
+	ldr	r1, [sp, #164]                  @ 4-byte Reload
+	movne	r9, r0
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	cmp	r8, #0
+	str	r3, [r7, #52]
+	str	r4, [r7, #48]
+	movne	r10, r0
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	str	r6, [r7, #44]
+	str	r9, [r7, #40]
+	movne	r1, r0
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	str	r1, [r7, #32]
+	ldr	r1, [sp, #192]                  @ 4-byte Reload
+	movne	r1, r0
+	cmp	r8, #0
+	str	r1, [r7, #28]
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	ldr	r0, [sp, #204]                  @ 4-byte Reload
+	str	r10, [r7, #36]
+	movne	r0, r1
+	ldr	r1, [sp, #144]                  @ 4-byte Reload
+	str	r0, [r7, #24]
+	ldr	r0, [sp, #200]                  @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #148]                  @ 4-byte Reload
+	str	r0, [r7, #20]
+	ldr	r0, [sp, #196]                  @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #152]                  @ 4-byte Reload
+	str	r0, [r7, #16]
+	cmp	r8, #0
+	ldr	r0, [sp, #208]                  @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #156]                  @ 4-byte Reload
+	str	r0, [r7, #12]
+	ldr	r0, [sp, #212]                  @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #228]                  @ 4-byte Reload
+	str	r0, [r7, #8]
+	ldr	r0, [sp, #216]                  @ 4-byte Reload
+	movne	r0, r1
+	ldr	r1, [sp, #232]                  @ 4-byte Reload
+	str	r0, [r7, #4]
+	cmp	r8, #0
+	ldr	r0, [sp, #236]                  @ 4-byte Reload
+	movne	r0, r1
+	str	r0, [r7]
+	add	sp, sp, #372
+	add	sp, sp, #1024
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end259:
-	.size	mcl_fp_montNF17L, .Lfunc_end259-mcl_fp_montNF17L
+.Lfunc_end79:
+	.size	mcl_fp_montRed16L, .Lfunc_end79-mcl_fp_montRed16L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_montRed17L
-	.align	2
-	.type	mcl_fp_montRed17L,%function
-mcl_fp_montRed17L:                      @ @mcl_fp_montRed17L
+                                        @ -- End function
+	.globl	mcl_fp_montRedNF16L             @ -- Begin function mcl_fp_montRedNF16L
+	.p2align	2
+	.type	mcl_fp_montRedNF16L,%function
+	.code	32                              @ @mcl_fp_montRedNF16L
+mcl_fp_montRedNF16L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#444
-	sub	sp, sp, #444
+	.pad	#372
+	sub	sp, sp, #372
 	.pad	#1024
 	sub	sp, sp, #1024
+	str	r0, [sp, #224]                  @ 4-byte Spill
 	mov	r3, r2
-	str	r0, [sp, #212]          @ 4-byte Spill
-	ldr	r2, [r1, #4]
-	ldr	r7, [r1]
-	ldr	r0, [r3]
-	str	r3, [sp, #236]          @ 4-byte Spill
-	str	r2, [sp, #116]          @ 4-byte Spill
-	ldr	r2, [r1, #8]
-	str	r0, [sp, #200]          @ 4-byte Spill
-	ldr	r0, [r3, #4]
-	str	r2, [sp, #112]          @ 4-byte Spill
-	ldr	r2, [r1, #12]
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [r3, #8]
-	str	r2, [sp, #108]          @ 4-byte Spill
-	str	r0, [sp, #192]          @ 4-byte Spill
-	ldr	r0, [r3, #12]
-	str	r0, [sp, #176]          @ 4-byte Spill
-	ldr	r0, [r3, #16]
-	str	r0, [sp, #180]          @ 4-byte Spill
-	ldr	r0, [r3, #20]
-	str	r0, [sp, #184]          @ 4-byte Spill
-	ldr	r0, [r3, #24]
-	str	r0, [sp, #188]          @ 4-byte Spill
-	ldr	r0, [r3, #-4]
-	str	r0, [sp, #232]          @ 4-byte Spill
-	mul	r2, r7, r0
+	ldr	r0, [r2]
+	add	lr, sp, #1024
+	str	r0, [sp, #216]                  @ 4-byte Spill
+	ldr	r0, [r2, #4]
+	str	r0, [sp, #212]                  @ 4-byte Spill
+	ldr	r0, [r2, #8]
+	str	r0, [sp, #208]                  @ 4-byte Spill
+	ldr	r0, [r2, #12]
+	str	r0, [sp, #192]                  @ 4-byte Spill
+	ldr	r0, [r2, #16]
+	str	r0, [sp, #196]                  @ 4-byte Spill
+	ldr	r0, [r2, #20]
+	str	r0, [sp, #200]                  @ 4-byte Spill
+	ldr	r0, [r2, #24]
+	str	r0, [sp, #204]                  @ 4-byte Spill
+	ldr	r0, [r1, #4]
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [r1, #8]
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [r1, #12]
+	str	r0, [sp, #148]                  @ 4-byte Spill
 	ldr	r0, [r3, #60]
-	str	r0, [sp, #204]          @ 4-byte Spill
-	ldr	r0, [r3, #64]
-	str	r0, [sp, #208]          @ 4-byte Spill
+	str	r0, [sp, #220]                  @ 4-byte Spill
 	ldr	r0, [r3, #28]
-	str	r0, [sp, #148]          @ 4-byte Spill
+	str	r0, [sp, #160]                  @ 4-byte Spill
+	ldr	r0, [r3, #32]
+	str	r0, [sp, #164]                  @ 4-byte Spill
 	ldr	r0, [r3, #36]
-	str	r0, [sp, #152]          @ 4-byte Spill
+	str	r0, [sp, #168]                  @ 4-byte Spill
 	ldr	r0, [r3, #40]
-	str	r0, [sp, #156]          @ 4-byte Spill
+	str	r0, [sp, #172]                  @ 4-byte Spill
 	ldr	r0, [r3, #44]
-	str	r0, [sp, #160]          @ 4-byte Spill
+	str	r0, [sp, #176]                  @ 4-byte Spill
 	ldr	r0, [r3, #48]
-	str	r0, [sp, #164]          @ 4-byte Spill
+	str	r0, [sp, #180]                  @ 4-byte Spill
 	ldr	r0, [r3, #52]
-	str	r0, [sp, #168]          @ 4-byte Spill
+	str	r0, [sp, #184]                  @ 4-byte Spill
 	ldr	r0, [r3, #56]
-	str	r0, [sp, #172]          @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r0, [sp, #144]          @ 4-byte Spill
-	ldr	r0, [r1, #128]
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [r1, #132]
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [r1, #96]
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [r1, #104]
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [r1, #108]
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [r1, #112]
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [r1, #116]
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [r1, #120]
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [r1, #124]
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [r1, #100]
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [r1, #64]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r1, #68]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r1, #72]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r1, #76]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r1, #80]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r1, #84]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r1, #88]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r1, #92]
-	str	r0, [sp, #104]          @ 4-byte Spill
+	str	r0, [sp, #188]                  @ 4-byte Spill
 	ldr	r0, [r1, #32]
-	str	r0, [sp, #60]           @ 4-byte Spill
+	str	r0, [sp, #116]                  @ 4-byte Spill
 	ldr	r0, [r1, #36]
-	str	r0, [sp, #56]           @ 4-byte Spill
+	str	r0, [sp, #120]                  @ 4-byte Spill
 	ldr	r0, [r1, #40]
-	str	r0, [sp, #52]           @ 4-byte Spill
+	str	r0, [sp, #124]                  @ 4-byte Spill
 	ldr	r0, [r1, #44]
-	str	r0, [sp, #48]           @ 4-byte Spill
+	str	r0, [sp, #128]                  @ 4-byte Spill
 	ldr	r0, [r1, #48]
-	str	r0, [sp, #44]           @ 4-byte Spill
+	ldr	r7, [r2, #-4]
+	ldr	r9, [r1]
+	str	r0, [sp, #132]                  @ 4-byte Spill
 	ldr	r0, [r1, #52]
-	str	r0, [sp, #40]           @ 4-byte Spill
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	mul	r2, r9, r7
 	ldr	r0, [r1, #56]
-	str	r0, [sp, #36]           @ 4-byte Spill
+	str	r0, [sp, #140]                  @ 4-byte Spill
 	ldr	r0, [r1, #60]
-	str	r0, [sp, #32]           @ 4-byte Spill
+	str	r0, [sp, #144]                  @ 4-byte Spill
 	ldr	r0, [r1, #28]
-	str	r0, [sp, #64]           @ 4-byte Spill
+	str	r0, [sp, #112]                  @ 4-byte Spill
 	ldr	r0, [r1, #24]
-	str	r0, [sp, #68]           @ 4-byte Spill
+	str	r0, [sp, #108]                  @ 4-byte Spill
 	ldr	r0, [r1, #20]
-	str	r0, [sp, #72]           @ 4-byte Spill
+	str	r0, [sp, #104]                  @ 4-byte Spill
 	ldr	r0, [r1, #16]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	add	r0, lr, #296
+	str	r1, [sp, #236]                  @ 4-byte Spill
 	mov	r1, r3
-	str	r0, [sp, #20]           @ 4-byte Spill
-	add	r0, sp, #1392
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1460]
-	ldr	r11, [sp, #1392]
-	ldr	r1, [sp, #1400]
-	ldr	r2, [sp, #1404]
-	ldr	r3, [sp, #1408]
-	ldr	r12, [sp, #1412]
-	ldr	lr, [sp, #1416]
-	ldr	r4, [sp, #1420]
-	ldr	r5, [sp, #1424]
-	ldr	r6, [sp, #1428]
-	ldr	r8, [sp, #1432]
-	ldr	r9, [sp, #1436]
-	ldr	r10, [sp, #1440]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1456]
-	adds	r7, r7, r11
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1452]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1448]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1444]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1396]
-	adcs	r7, r7, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #20]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #236]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	add	r9, sp, #1024
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	mov	r0, #0
-	adc	r0, r0, #0
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #232]          @ 4-byte Reload
-	mul	r2, r7, r0
-	add	r0, r9, #296
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1388]
-	ldr	r9, [sp, #1320]
-	ldr	r1, [sp, #1328]
-	ldr	r2, [sp, #1332]
-	ldr	r3, [sp, #1336]
-	ldr	r12, [sp, #1340]
-	ldr	r10, [sp, #1344]
-	ldr	lr, [sp, #1348]
-	ldr	r4, [sp, #1352]
-	ldr	r5, [sp, #1356]
-	ldr	r8, [sp, #1360]
-	ldr	r11, [sp, #1364]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1384]
-	adds	r7, r7, r9
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
+	str	r7, [sp, #228]                  @ 4-byte Spill
+	str	r3, [sp, #232]                  @ 4-byte Spill
+	bl	mulPv512x32
 	ldr	r0, [sp, #1380]
-	str	r0, [sp, #16]           @ 4-byte Spill
+	add	r11, sp, #1344
+	str	r0, [sp, #96]                   @ 4-byte Spill
 	ldr	r0, [sp, #1376]
-	str	r0, [sp, #12]           @ 4-byte Spill
+	str	r0, [sp, #92]                   @ 4-byte Spill
 	ldr	r0, [sp, #1372]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldr	r0, [sp, #1368]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #1324]
-	adcs	r7, r7, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #1320]
+	ldr	r1, [sp, #1324]
+	adds	r0, r9, r0
+	ldr	r2, [sp, #1328]
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	ldm	r11, {r4, r5, r6, r7, r8, r10, r11}
+	adcs	r9, r0, r1
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	ldr	lr, [sp, #1340]
 	adcs	r0, r0, r2
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	ldr	r12, [sp, #1336]
+	ldr	r3, [sp, #1332]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #148]                  @ 4-byte Reload
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	ldr	r5, [sp, #232]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	mul	r2, r7, r5
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
 	adcs	r0, r0, r11
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	mrs	r0, apsr
+	ldr	r7, [sp, #228]                  @ 4-byte Reload
+	ldr	r11, [sp, #236]                 @ 4-byte Reload
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	mul	r2, r7, r9
+	ldr	r6, [sp, #232]                  @ 4-byte Reload
+	ldr	r0, [r11, #64]
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #1384]
 	mov	r1, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #28]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #32]           @ 4-byte Spill
+	str	r0, [sp, #92]                   @ 4-byte Spill
 	add	r0, sp, #1248
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1316]
-	add	r10, sp, #1280
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1312]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1308]
-	str	r0, [sp, #20]           @ 4-byte Spill
+	bl	mulPv512x32
 	ldr	r0, [sp, #1304]
-	str	r0, [sp, #16]           @ 4-byte Spill
+	str	r0, [sp, #64]                   @ 4-byte Spill
 	ldr	r0, [sp, #1300]
-	str	r0, [sp, #12]           @ 4-byte Spill
-	ldr	r0, [sp, #1296]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	ldm	r10, {r4, r6, r9, r10}
-	ldr	r8, [sp, #1248]
-	ldr	r0, [sp, #1252]
-	ldr	r1, [sp, #1256]
-	ldr	r2, [sp, #1260]
-	ldr	r3, [sp, #1264]
-	ldr	r12, [sp, #1268]
-	ldr	lr, [sp, #1272]
-	ldr	r11, [sp, #1276]
-	adds	r7, r7, r8
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	adcs	r7, r7, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r7, r5
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	add	r9, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #12]           @ 4-byte Reload
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #236]          @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #32]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #36]           @ 4-byte Spill
-	add	r0, r9, #152
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1244]
-	ldr	r9, [sp, #1176]
-	ldr	r1, [sp, #1184]
-	ldr	r2, [sp, #1188]
-	ldr	r3, [sp, #1192]
-	ldr	r12, [sp, #1196]
-	ldr	lr, [sp, #1200]
-	ldr	r4, [sp, #1204]
-	ldr	r5, [sp, #1208]
-	ldr	r6, [sp, #1212]
-	ldr	r8, [sp, #1216]
-	ldr	r10, [sp, #1220]
-	ldr	r11, [sp, #1224]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1240]
-	adds	r7, r7, r9
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1236]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1232]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #1228]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #1180]
-	adcs	r7, r7, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	str	r7, [sp, #12]           @ 4-byte Spill
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r5, [sp, #1248]
+	ldr	r4, [sp, #1252]
+	ldr	r2, [sp, #68]                   @ 4-byte Reload
+	adds	r5, r9, r5
+	ldr	r0, [sp, #1256]
+	adcs	r8, r2, r4
+	ldr	r4, [sp, #148]                  @ 4-byte Reload
+	ldr	r1, [sp, #1260]
+	adcs	r0, r4, r0
+	ldr	r4, [sp, #72]                   @ 4-byte Reload
+	ldr	lr, [sp, #1296]
+	ldr	r10, [sp, #1292]
+	adcs	r1, r4, r1
+	ldr	r12, [sp, #1288]
+	ldr	r3, [sp, #1284]
+	ldr	r5, [sp, #1280]
+	ldr	r2, [sp, #1276]
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #1272]
+	str	r1, [sp, #144]                  @ 4-byte Spill
+	ldr	r1, [sp, #1264]
+	ldr	r4, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r4, r1
+	str	r1, [sp, #140]                  @ 4-byte Spill
+	ldr	r1, [sp, #1268]
+	ldr	r4, [sp, #156]                  @ 4-byte Reload
+	adcs	r1, r4, r1
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	mul	r2, r7, r8
+	adcs	r0, r0, r5
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	adcs	r0, r0, r10
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	add	lr, sp, #1024
+	adcs	r0, r0, r1
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #80]                   @ 4-byte Reload
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #152]                  @ 4-byte Reload
+	ldr	r3, [sp, #92]                   @ 4-byte Reload
+	ldr	r0, [sp, #1312]
+	adcs	r1, r1, r3
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r11, #68]
+	mov	r1, r6
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #1308]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	add	r0, lr, #152
+	bl	mulPv512x32
+	ldr	r7, [sp, #1176]
+	ldr	r0, [sp, #1180]
+	ldr	r2, [sp, #148]                  @ 4-byte Reload
+	adds	r4, r8, r7
+	ldr	r1, [sp, #1184]
+	adcs	r4, r2, r0
+	ldr	r2, [sp, #144]                  @ 4-byte Reload
+	ldr	r10, [sp, #1188]
+	adcs	r1, r2, r1
+	ldr	r2, [sp, #140]                  @ 4-byte Reload
+	ldr	r11, [sp, #1228]
+	ldr	r9, [sp, #1224]
+	adcs	r2, r2, r10
+	ldr	lr, [sp, #1220]
+	ldr	r12, [sp, #1216]
+	ldr	r6, [sp, #1212]
+	ldr	r5, [sp, #1208]
+	ldr	r7, [sp, #1204]
+	ldr	r0, [sp, #1200]
+	str	r1, [sp, #148]                  @ 4-byte Spill
+	ldr	r1, [sp, #1196]
+	str	r2, [sp, #144]                  @ 4-byte Spill
+	ldr	r2, [sp, #1192]
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
+	adcs	r2, r3, r2
+	str	r2, [sp, #140]                  @ 4-byte Spill
+	ldr	r2, [sp, #132]                  @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, r5
-	ldr	r5, [sp, #236]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r6
-	ldr	r6, [sp, #232]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	mul	r2, r7, r6
-	adcs	r0, r0, r8
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	adcs	r0, r0, r9
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
 	adcs	r0, r0, r11
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #36]           @ 4-byte Reload
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	ldr	r0, [sp, #1240]
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	ldr	r7, [sp, #236]                  @ 4-byte Reload
+	adcs	r1, r3, r1
+	ldr	r6, [sp, #228]                  @ 4-byte Reload
 	adc	r0, r0, #0
-	str	r0, [sp, #40]           @ 4-byte Spill
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r7, #72]
+	mul	r2, r6, r4
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #1236]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r8, [sp, #232]                  @ 4-byte Reload
+	ldr	r0, [sp, #1232]
+	str	r0, [sp, #80]                   @ 4-byte Spill
 	add	r0, sp, #1104
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1172]
-	ldr	r4, [sp, #1104]
-	ldr	r9, [sp, #12]           @ 4-byte Reload
-	ldr	r1, [sp, #1112]
-	ldr	r2, [sp, #1116]
-	ldr	r3, [sp, #1120]
-	ldr	r12, [sp, #1124]
-	ldr	r10, [sp, #1128]
-	ldr	r11, [sp, #1132]
-	ldr	lr, [sp, #1136]
-	ldr	r7, [sp, #1140]
-	ldr	r8, [sp, #1144]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #1168]
-	adds	r4, r9, r4
-	ldr	r4, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #1164]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #1160]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1156]
-	str	r0, [sp, #20]           @ 4-byte Spill
+	mov	r1, r8
+	bl	mulPv512x32
 	ldr	r0, [sp, #1152]
-	str	r0, [sp, #16]           @ 4-byte Spill
+	str	r0, [sp, #156]                  @ 4-byte Spill
 	ldr	r0, [sp, #1148]
-	str	r0, [sp, #8]            @ 4-byte Spill
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r5, [sp, #1104]
 	ldr	r0, [sp, #1108]
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #8]            @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r4, r6
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	adds	r4, r4, r5
+	ldr	r1, [sp, #1112]
+	ldr	r4, [sp, #148]                  @ 4-byte Reload
+	ldr	r11, [sp, #144]                 @ 4-byte Reload
+	adcs	r9, r4, r0
+	ldr	r2, [sp, #1116]
+	ldr	r4, [sp, #140]                  @ 4-byte Reload
+	adcs	r1, r11, r1
+	ldr	r10, [sp, #1144]
+	ldr	lr, [sp, #1140]
+	adcs	r2, r4, r2
+	ldr	r12, [sp, #1136]
+	ldr	r3, [sp, #1132]
+	ldr	r5, [sp, #1128]
+	ldr	r0, [sp, #1124]
+	str	r1, [sp, #148]                  @ 4-byte Spill
+	ldr	r1, [sp, #1120]
+	str	r2, [sp, #144]                  @ 4-byte Spill
+	ldr	r2, [sp, #136]                  @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #140]                  @ 4-byte Spill
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	mul	r2, r6, r9
+	adcs	r0, r1, r0
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	mov	r7, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	add	r8, sp, #1024
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	add	lr, sp, #1024
+	adcs	r0, r0, r10
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #104]                  @ 4-byte Reload
+	ldr	r1, [sp, #156]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	mov	r1, r5
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #40]           @ 4-byte Reload
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	ldr	r0, [sp, #1168]
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
 	adc	r0, r0, #0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	add	r0, r8, #8
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1100]
-	ldr	r8, [sp, #1032]
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r7, #76]
+	mov	r1, r8
+	str	r0, [sp, #88]                   @ 4-byte Spill
+	ldr	r0, [sp, #1164]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #1160]
+	str	r0, [sp, #80]                   @ 4-byte Spill
+	ldr	r0, [sp, #1156]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	add	r0, lr, #8
+	bl	mulPv512x32
+	ldr	r5, [sp, #1032]
+	ldr	r0, [sp, #1036]
+	adds	r4, r9, r5
 	ldr	r1, [sp, #1040]
+	ldr	r4, [sp, #148]                  @ 4-byte Reload
+	ldr	r11, [sp, #1076]
+	adcs	r10, r4, r0
+	ldr	r4, [sp, #144]                  @ 4-byte Reload
+	ldr	lr, [sp, #1072]
+	adcs	r1, r4, r1
+	ldr	r12, [sp, #1068]
+	ldr	r3, [sp, #1064]
+	ldr	r6, [sp, #1060]
+	ldr	r7, [sp, #1056]
 	ldr	r2, [sp, #1044]
-	ldr	r3, [sp, #1048]
-	ldr	r12, [sp, #1052]
-	ldr	lr, [sp, #1056]
-	ldr	r4, [sp, #1060]
-	ldr	r5, [sp, #1064]
-	ldr	r6, [sp, #1068]
-	ldr	r9, [sp, #1072]
-	ldr	r10, [sp, #1076]
-	ldr	r11, [sp, #1080]
-	str	r0, [sp, #40]           @ 4-byte Spill
+	ldr	r5, [sp, #1052]
+	ldr	r0, [sp, #1048]
+	str	r1, [sp, #148]                  @ 4-byte Spill
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #144]                  @ 4-byte Spill
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r6
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r0, lr
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #108]                  @ 4-byte Reload
+	adcs	r0, r0, r11
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
 	ldr	r0, [sp, #1096]
-	adds	r7, r7, r8
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	ldr	r7, [sp, #236]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	ldr	r6, [sp, #228]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	mul	r2, r6, r10
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r7, #80]
+	mov	r1, r8
+	str	r0, [sp, #88]                   @ 4-byte Spill
 	ldr	r0, [sp, #1092]
-	str	r0, [sp, #32]           @ 4-byte Spill
+	str	r0, [sp, #84]                   @ 4-byte Spill
 	ldr	r0, [sp, #1088]
-	str	r0, [sp, #28]           @ 4-byte Spill
+	str	r0, [sp, #80]                   @ 4-byte Spill
 	ldr	r0, [sp, #1084]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1036]
-	adcs	r7, r7, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	str	r7, [sp, #20]           @ 4-byte Spill
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #1080]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	add	r0, sp, #960
+	bl	mulPv512x32
+	ldr	r5, [sp, #960]
+	add	r2, sp, #964
+	add	r11, sp, #992
+	add	lr, sp, #980
+	adds	r4, r10, r5
+	ldm	r2, {r0, r1, r2}
+	ldr	r4, [sp, #148]                  @ 4-byte Reload
+	ldm	r11, {r8, r9, r11}
+	adcs	r10, r4, r0
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
+	ldm	lr, {r3, r12, lr}
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	ldr	r5, [sp, #976]
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
 	adcs	r0, r0, r2
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	mul	r2, r6, r10
+	adcs	r0, r0, r5
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #232]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	mul	r2, r7, r5
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #236]          @ 4-byte Reload
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r8
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r9
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #112]                  @ 4-byte Reload
 	adcs	r0, r0, r11
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #48]           @ 4-byte Spill
-	add	r0, sp, #960
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #1028]
-	add	lr, sp, #984
-	add	r12, sp, #964
-	ldr	r8, [sp, #1000]
-	ldr	r7, [sp, #996]
-	str	r0, [sp, #44]           @ 4-byte Spill
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
 	ldr	r0, [sp, #1024]
-	str	r0, [sp, #40]           @ 4-byte Spill
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	ldr	r4, [sp, #232]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r7, #84]
+	mov	r1, r4
+	str	r0, [sp, #88]                   @ 4-byte Spill
 	ldr	r0, [sp, #1020]
-	str	r0, [sp, #36]           @ 4-byte Spill
+	str	r0, [sp, #84]                   @ 4-byte Spill
 	ldr	r0, [sp, #1016]
-	str	r0, [sp, #32]           @ 4-byte Spill
+	str	r0, [sp, #80]                   @ 4-byte Spill
 	ldr	r0, [sp, #1012]
-	str	r0, [sp, #28]           @ 4-byte Spill
+	str	r0, [sp, #76]                   @ 4-byte Spill
 	ldr	r0, [sp, #1008]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #1004]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldm	lr, {r10, r11, lr}
-	ldr	r4, [sp, #960]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r9, [sp, #20]           @ 4-byte Reload
-	adds	r4, r9, r4
-	ldr	r4, [sp, #116]          @ 4-byte Reload
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	add	r0, sp, #888
+	ldr	r11, [sp, #1004]
+	bl	mulPv512x32
+	ldr	r6, [sp, #888]
+	add	r7, sp, #892
+	add	lr, sp, #912
+	ldr	r8, [sp, #924]
+	adds	r6, r10, r6
+	ldm	r7, {r0, r1, r2, r5, r7}
+	ldr	r6, [sp, #148]                  @ 4-byte Reload
+	ldm	lr, {r3, r12, lr}
+	adcs	r9, r6, r0
+	ldr	r0, [sp, #144]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
 	adcs	r0, r0, r2
-	mul	r2, r4, r5
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, r7
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
 	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
 	adcs	r0, r0, lr
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	mov	r7, r4
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	ldr	r0, [sp, #116]                  @ 4-byte Reload
 	adcs	r0, r0, r8
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	add	r0, sp, #888
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #956]
-	add	r11, sp, #916
-	add	lr, sp, #892
-	str	r0, [sp, #48]           @ 4-byte Spill
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
 	ldr	r0, [sp, #952]
-	str	r0, [sp, #44]           @ 4-byte Spill
+	adcs	r1, r1, r11
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	ldr	r8, [sp, #236]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	ldr	r6, [sp, #228]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	mul	r2, r6, r9
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r8, #88]
+	mov	r1, r4
+	str	r0, [sp, #88]                   @ 4-byte Spill
 	ldr	r0, [sp, #948]
-	str	r0, [sp, #40]           @ 4-byte Spill
+	str	r0, [sp, #84]                   @ 4-byte Spill
 	ldr	r0, [sp, #944]
-	str	r0, [sp, #36]           @ 4-byte Spill
+	str	r0, [sp, #80]                   @ 4-byte Spill
 	ldr	r0, [sp, #940]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldm	r11, {r4, r5, r6, r9, r10, r11}
-	ldr	r8, [sp, #888]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r7, r7, r8
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	adcs	r7, r7, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	str	r7, [sp, #28]           @ 4-byte Spill
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #232]          @ 4-byte Reload
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	mul	r2, r7, r5
-	ldr	r7, [sp, #236]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r7
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #936]
+	str	r0, [sp, #72]                   @ 4-byte Spill
 	add	r0, sp, #816
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #884]
-	add	lr, sp, #840
-	add	r12, sp, #820
-	ldr	r8, [sp, #856]
-	ldr	r6, [sp, #852]
-	str	r0, [sp, #52]           @ 4-byte Spill
+	ldr	r11, [sp, #932]
+	ldr	r10, [sp, #928]
+	bl	mulPv512x32
+	ldr	r4, [sp, #816]
+	add	r5, sp, #824
+	ldr	r7, [sp, #820]
+	adds	r4, r9, r4
+	ldm	r5, {r0, r1, r5}
+	ldr	r4, [sp, #148]                  @ 4-byte Reload
+	ldr	r12, [sp, #848]
+	adcs	r4, r4, r7
+	ldr	r7, [sp, #144]                  @ 4-byte Reload
+	ldr	lr, [sp, #844]
+	adcs	r0, r7, r0
+	ldr	r2, [sp, #840]
+	ldr	r3, [sp, #836]
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #140]                  @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r0, r5
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, r3
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	mul	r2, r6, r4
+	adcs	r0, r0, lr
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r0, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #124]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
 	ldr	r0, [sp, #880]
-	str	r0, [sp, #48]           @ 4-byte Spill
+	adcs	r1, r1, r10
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r11
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r10, [sp, #232]                 @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r8, #92]
+	mov	r1, r10
+	str	r0, [sp, #88]                   @ 4-byte Spill
 	ldr	r0, [sp, #876]
-	str	r0, [sp, #44]           @ 4-byte Spill
+	str	r0, [sp, #84]                   @ 4-byte Spill
 	ldr	r0, [sp, #872]
-	str	r0, [sp, #40]           @ 4-byte Spill
+	str	r0, [sp, #80]                   @ 4-byte Spill
 	ldr	r0, [sp, #868]
-	str	r0, [sp, #36]           @ 4-byte Spill
+	str	r0, [sp, #76]                   @ 4-byte Spill
 	ldr	r0, [sp, #864]
-	str	r0, [sp, #32]           @ 4-byte Spill
+	str	r0, [sp, #72]                   @ 4-byte Spill
 	ldr	r0, [sp, #860]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	lr, {r10, r11, lr}
-	ldr	r4, [sp, #816]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	ldr	r9, [sp, #28]           @ 4-byte Reload
-	adds	r4, r9, r4
-	ldr	r4, [sp, #116]          @ 4-byte Reload
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r4, r5
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #852]
+	ldr	r8, [sp, #856]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	add	r0, sp, #744
+	bl	mulPv512x32
+	add	r7, sp, #744
+	ldr	r12, [sp, #772]
+	ldr	r1, [sp, #768]
+	ldm	r7, {r5, r6, r7}
+	adds	r5, r4, r5
+	ldr	r2, [sp, #764]
+	ldr	r5, [sp, #148]                  @ 4-byte Reload
+	ldr	r3, [sp, #760]
+	adcs	r9, r5, r6
+	ldr	r6, [sp, #144]                  @ 4-byte Reload
+	ldr	r0, [sp, #756]
+	adcs	r7, r6, r7
+	str	r7, [sp, #148]                  @ 4-byte Spill
+	ldr	r7, [sp, #140]                  @ 4-byte Reload
+	adcs	r0, r7, r0
+	str	r0, [sp, #144]                  @ 4-byte Spill
+	ldr	r0, [sp, #136]                  @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r11
-	mov	r11, r4
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
+	str	r0, [sp, #140]                  @ 4-byte Spill
+	ldr	r0, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	ldr	r0, [sp, #128]                  @ 4-byte Reload
 	adcs	r0, r0, r1
-	mov	r1, r7
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	add	r0, sp, #744
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #812]
-	add	r10, sp, #768
-	add	lr, sp, #744
-	str	r0, [sp, #56]           @ 4-byte Spill
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #124]                  @ 4-byte Reload
+	adcs	r0, r0, r12
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
 	ldr	r0, [sp, #808]
-	str	r0, [sp, #52]           @ 4-byte Spill
+	adcs	r1, r1, r3
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r4, [sp, #236]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	ldr	r11, [sp, #228]                 @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	mul	r2, r11, r9
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r4, #96]
+	mov	r1, r10
+	str	r0, [sp, #88]                   @ 4-byte Spill
 	ldr	r0, [sp, #804]
-	str	r0, [sp, #48]           @ 4-byte Spill
+	str	r0, [sp, #84]                   @ 4-byte Spill
 	ldr	r0, [sp, #800]
-	str	r0, [sp, #44]           @ 4-byte Spill
+	str	r0, [sp, #80]                   @ 4-byte Spill
 	ldr	r0, [sp, #796]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #232]          @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	mul	r2, r11, r5
-	adcs	r0, r0, r6
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #236]          @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #64]           @ 4-byte Spill
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #792]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #788]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #784]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #780]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #776]
+	str	r0, [sp, #56]                   @ 4-byte Spill
 	add	r0, sp, #672
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #740]
-	add	r9, sp, #704
-	add	r12, sp, #676
-	str	r0, [sp, #60]           @ 4-byte Spill
+	bl	mulPv512x32
+	add	r7, sp, #672
+	ldr	r0, [sp, #696]
+	ldr	r1, [sp, #692]
+	ldm	r7, {r3, r5, r6, r7}
+	adds	r3, r9, r3
+	ldr	r2, [sp, #688]
+	ldr	r3, [sp, #148]                  @ 4-byte Reload
+	adcs	r8, r3, r5
+	ldr	r3, [sp, #144]                  @ 4-byte Reload
+	adcs	r3, r3, r6
+	str	r3, [sp, #148]                  @ 4-byte Spill
+	ldr	r3, [sp, #140]                  @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #144]                  @ 4-byte Spill
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
+	adcs	r2, r3, r2
+	str	r2, [sp, #140]                  @ 4-byte Spill
+	ldr	r2, [sp, #132]                  @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	mul	r2, r11, r8
+	adcs	r0, r1, r0
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
 	ldr	r0, [sp, #736]
-	str	r0, [sp, #56]           @ 4-byte Spill
+	adcs	r1, r1, r3
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	ldr	r5, [sp, #232]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	ldr	r0, [r4, #100]
+	mov	r1, r5
+	str	r0, [sp, #88]                   @ 4-byte Spill
 	ldr	r0, [sp, #732]
-	str	r0, [sp, #52]           @ 4-byte Spill
+	str	r0, [sp, #84]                   @ 4-byte Spill
 	ldr	r0, [sp, #728]
-	str	r0, [sp, #48]           @ 4-byte Spill
+	str	r0, [sp, #80]                   @ 4-byte Spill
 	ldr	r0, [sp, #724]
-	str	r0, [sp, #44]           @ 4-byte Spill
+	str	r0, [sp, #76]                   @ 4-byte Spill
 	ldr	r0, [sp, #720]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldm	r9, {r6, r7, r8, r9}
-	ldr	r4, [sp, #672]
-	ldr	lr, [sp, #700]
-	ldr	r10, [sp, #696]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #116]          @ 4-byte Reload
-	adcs	r11, r4, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r4, [sp, #236]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r5
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #68]           @ 4-byte Spill
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #716]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #712]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #704]
+	ldr	r9, [sp, #708]
+	str	r0, [sp, #60]                   @ 4-byte Spill
 	add	r0, sp, #600
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #668]
-	add	r10, sp, #624
-	add	lr, sp, #600
-	str	r0, [sp, #64]           @ 4-byte Spill
+	ldr	r10, [sp, #700]
+	bl	mulPv512x32
+	add	r7, sp, #600
+	ldr	r0, [sp, #620]
+	ldr	r1, [sp, #616]
+	ldm	r7, {r2, r3, r6, r7}
+	adds	r2, r8, r2
+	ldr	r2, [sp, #148]                  @ 4-byte Reload
+	adcs	r4, r2, r3
+	ldr	r2, [sp, #144]                  @ 4-byte Reload
+	adcs	r2, r2, r6
+	str	r2, [sp, #148]                  @ 4-byte Spill
+	ldr	r2, [sp, #140]                  @ 4-byte Reload
+	adcs	r2, r2, r7
+	str	r2, [sp, #144]                  @ 4-byte Spill
+	ldr	r2, [sp, #136]                  @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [sp, #140]                  @ 4-byte Spill
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	mul	r2, r11, r4
+	adcs	r0, r1, r0
+	str	r0, [sp, #136]                  @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #152]                  @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
 	ldr	r0, [sp, #664]
-	str	r0, [sp, #60]           @ 4-byte Spill
+	adcs	r1, r1, r10
+	str	r1, [sp, #156]                  @ 4-byte Spill
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	ldr	r7, [sp, #236]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #132]                  @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	adcs	r1, r1, r9
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #100]                  @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [r7, #104]
+	mov	r1, r5
+	str	r0, [sp, #80]                   @ 4-byte Spill
 	ldr	r0, [sp, #660]
-	str	r0, [sp, #56]           @ 4-byte Spill
+	str	r0, [sp, #76]                   @ 4-byte Spill
 	ldr	r0, [sp, #656]
-	str	r0, [sp, #52]           @ 4-byte Spill
+	str	r0, [sp, #72]                   @ 4-byte Spill
 	ldr	r0, [sp, #652]
-	str	r0, [sp, #48]           @ 4-byte Spill
+	str	r0, [sp, #68]                   @ 4-byte Spill
 	ldr	r0, [sp, #648]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r9
-	ldr	r9, [sp, #232]          @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	mul	r2, r11, r9
-	adcs	r0, r0, r10
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r4
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #72]           @ 4-byte Spill
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #644]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #640]
+	str	r0, [sp, #56]                   @ 4-byte Spill
+	ldr	r0, [sp, #636]
+	str	r0, [sp, #52]                   @ 4-byte Spill
 	add	r0, sp, #528
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #596]
-	add	r8, sp, #560
-	add	r12, sp, #532
-	str	r0, [sp, #68]           @ 4-byte Spill
+	ldr	r8, [sp, #632]
+	ldr	r10, [sp, #628]
+	ldr	r9, [sp, #624]
+	bl	mulPv512x32
+	add	r6, sp, #528
+	ldr	r0, [sp, #544]
+	mov	r5, r7
+	ldm	r6, {r1, r2, r3, r6}
+	adds	r1, r4, r1
+	ldr	r1, [sp, #148]                  @ 4-byte Reload
+	adcs	r4, r1, r2
+	ldr	r1, [sp, #144]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	mul	r2, r11, r4
+	adcs	r1, r1, r6
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #148]                  @ 4-byte Spill
+	ldr	r0, [sp, #152]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r1, [sp, #156]                  @ 4-byte Reload
 	ldr	r0, [sp, #592]
-	str	r0, [sp, #64]           @ 4-byte Spill
+	adcs	r1, r1, r9
+	str	r1, [sp, #152]                  @ 4-byte Spill
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	adcs	r1, r1, r10
+	str	r1, [sp, #144]                  @ 4-byte Spill
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	add	r10, sp, #548
+	ldr	r6, [sp, #232]                  @ 4-byte Reload
+	adcs	r1, r1, r8
+	str	r1, [sp, #140]                  @ 4-byte Spill
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #136]                  @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #132]                  @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #128]                  @ 4-byte Spill
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #64]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #124]                  @ 4-byte Spill
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r3, [sp, #68]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	ldr	r1, [sp, #100]                  @ 4-byte Reload
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	adcs	r1, r1, r3
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	adcs	r1, r3, r1
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	adc	r0, r0, #0
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [r7, #108]
+	mov	r1, r6
+	str	r0, [sp, #100]                  @ 4-byte Spill
 	ldr	r0, [sp, #588]
-	str	r0, [sp, #60]           @ 4-byte Spill
+	str	r0, [sp, #80]                   @ 4-byte Spill
 	ldr	r0, [sp, #584]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #580]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #576]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldm	r8, {r5, r6, r7, r8}
-	ldr	r4, [sp, #528]
-	ldr	lr, [sp, #556]
-	ldr	r10, [sp, #552]
-	ldm	r12, {r0, r1, r2, r3, r12}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #116]          @ 4-byte Reload
-	adcs	r11, r4, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	mov	r4, r9
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	mul	r2, r11, r4
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #236]          @ 4-byte Reload
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #580]
+	str	r0, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #576]
+	str	r0, [sp, #68]                   @ 4-byte Spill
+	ldr	r0, [sp, #572]
+	str	r0, [sp, #64]                   @ 4-byte Spill
+	ldr	r0, [sp, #568]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #564]
+	str	r0, [sp, #56]                   @ 4-byte Spill
 	add	r0, sp, #456
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #524]
-	add	r10, sp, #480
-	add	lr, sp, #456
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #520]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #516]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #512]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #508]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #504]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldm	r10, {r5, r6, r7, r8, r9, r10}
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r0, r11, r0
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	ldr	r1, [sp, #52]           @ 4-byte Reload
+	ldm	r10, {r7, r8, r10}
+	ldr	r9, [sp, #560]
+	bl	mulPv512x32
+	add	r3, sp, #456
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r4, r0, r1
+	ldr	r0, [sp, #88]                   @ 4-byte Reload
 	adcs	r0, r0, r2
+	str	r0, [sp, #96]                   @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
 	mul	r2, r11, r4
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #108]          @ 4-byte Reload
 	adcs	r0, r0, r3
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	ldr	r6, [sp, #236]          @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r7
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	mov	r1, r6
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #60]           @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #56]           @ 4-byte Spill
-	ldr	r0, [sp, #228]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #52]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #76]           @ 4-byte Spill
+	str	r0, [sp, #92]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	ldr	r0, [sp, #148]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r3, [sp, #152]                  @ 4-byte Reload
+	ldr	r1, [sp, #520]
 	add	r0, sp, #384
-	bl	.LmulPv544x32(PLT)
-	ldr	r0, [sp, #452]
-	add	r10, sp, #412
-	add	lr, sp, #388
-	str	r0, [sp, #48]           @ 4-byte Spill
-	ldr	r0, [sp, #448]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [sp, #444]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #440]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #436]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #432]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r5, r7, r8, r9, r10}
-	ldr	r4, [sp, #384]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	adds	r4, r11, r4
-	ldr	r4, [sp, #116]          @ 4-byte Reload
-	adcs	r4, r4, r0
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	adcs	r0, r0, r2
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r5
-	ldr	r5, [sp, #232]          @ 4-byte Reload
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	mul	r2, r4, r5
-	adcs	r0, r0, r7
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r10
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #128]          @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #124]          @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #120]          @ 4-byte Spill
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #116]          @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r0, [sp, #112]          @ 4-byte Spill
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	adcs	r0, r0, r1
+	adcs	r3, r3, r7
+	str	r3, [sp, #152]                  @ 4-byte Spill
+	ldr	r3, [sp, #144]                  @ 4-byte Reload
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	adcs	r3, r3, r8
+	str	r3, [sp, #148]                  @ 4-byte Spill
+	ldr	r3, [sp, #140]                  @ 4-byte Reload
+	adcs	r3, r3, r10
+	str	r3, [sp, #144]                  @ 4-byte Spill
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
+	adcs	r3, r3, r9
+	str	r3, [sp, #140]                  @ 4-byte Spill
+	ldr	r3, [sp, #132]                  @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #136]                  @ 4-byte Spill
+	ldr	r3, [sp, #128]                  @ 4-byte Reload
+	ldr	r7, [sp, #60]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #132]                  @ 4-byte Spill
+	ldr	r3, [sp, #124]                  @ 4-byte Reload
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #128]                  @ 4-byte Spill
+	ldr	r3, [sp, #120]                  @ 4-byte Reload
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #124]                  @ 4-byte Spill
+	ldr	r3, [sp, #116]                  @ 4-byte Reload
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #120]                  @ 4-byte Spill
+	ldr	r3, [sp, #112]                  @ 4-byte Reload
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #116]                  @ 4-byte Spill
+	ldr	r3, [sp, #108]                  @ 4-byte Reload
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #112]                  @ 4-byte Spill
+	ldr	r3, [sp, #104]                  @ 4-byte Reload
+	ldr	r7, [sp, #100]                  @ 4-byte Reload
+	adcs	r3, r7, r3
+	str	r3, [sp, #108]                  @ 4-byte Spill
+	adc	r1, r1, #0
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [r5, #112]
+	mov	r5, r6
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #516]
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #512]
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #508]
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #504]
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #500]
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r1, [sp, #496]
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [sp, #492]
+	str	r1, [sp, #48]                   @ 4-byte Spill
 	mov	r1, r6
-	str	r0, [sp, #108]          @ 4-byte Spill
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #104]          @ 4-byte Spill
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #96]           @ 4-byte Spill
+	ldr	r8, [sp, #488]
+	ldr	r9, [sp, #484]
+	ldr	r7, [sp, #480]
+	ldr	r10, [sp, #476]
+	ldr	r11, [sp, #472]
+	bl	mulPv512x32
+	add	r3, sp, #384
+	ldm	r3, {r0, r1, r2, r3}
+	adds	r0, r4, r0
+	str	r3, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #96]                   @ 4-byte Reload
+	adcs	r4, r0, r1
+	ldr	r0, [sp, #92]                   @ 4-byte Reload
+	adcs	r0, r0, r2
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	mrs	r0, apsr
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #156]                  @ 4-byte Reload
+	msr	APSR_nzcvq, r0
+	ldr	r3, [sp, #152]                  @ 4-byte Reload
+	ldr	r1, [sp, #448]
 	add	r0, sp, #312
-	bl	.LmulPv544x32(PLT)
-	add	r6, sp, #312
-	add	r10, sp, #356
-	add	lr, sp, #328
-	ldm	r6, {r0, r1, r3, r6}
+	adcs	r3, r3, r11
+	str	r3, [sp, #60]                   @ 4-byte Spill
+	ldr	r3, [sp, #148]                  @ 4-byte Reload
+	ldr	r6, [sp, #228]                  @ 4-byte Reload
+	adcs	r10, r3, r10
+	ldr	r3, [sp, #144]                  @ 4-byte Reload
+	adcs	r11, r3, r7
+	ldr	r3, [sp, #140]                  @ 4-byte Reload
+	ldr	r7, [sp, #48]                   @ 4-byte Reload
+	mul	r2, r6, r4
+	adcs	r3, r3, r9
+	str	r3, [sp, #152]                  @ 4-byte Spill
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
+	adcs	r8, r3, r8
+	ldr	r3, [sp, #132]                  @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #148]                  @ 4-byte Spill
+	ldr	r3, [sp, #128]                  @ 4-byte Reload
+	ldr	r7, [sp, #52]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #144]                  @ 4-byte Spill
+	ldr	r3, [sp, #124]                  @ 4-byte Reload
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #140]                  @ 4-byte Spill
+	ldr	r3, [sp, #120]                  @ 4-byte Reload
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #136]                  @ 4-byte Spill
+	ldr	r3, [sp, #116]                  @ 4-byte Reload
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #132]                  @ 4-byte Spill
+	ldr	r3, [sp, #112]                  @ 4-byte Reload
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #128]                  @ 4-byte Spill
+	ldr	r3, [sp, #108]                  @ 4-byte Reload
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	adcs	r3, r3, r7
+	str	r3, [sp, #124]                  @ 4-byte Spill
+	ldr	r3, [sp, #104]                  @ 4-byte Reload
+	ldr	r7, [sp, #88]                   @ 4-byte Reload
+	adcs	r3, r7, r3
+	ldr	r7, [sp, #236]                  @ 4-byte Reload
+	adc	r1, r1, #0
+	str	r1, [sp, #116]                  @ 4-byte Spill
+	str	r3, [sp, #120]                  @ 4-byte Spill
+	ldr	r1, [r7, #116]
+	str	r1, [sp, #112]                  @ 4-byte Spill
+	ldr	r1, [sp, #444]
+	str	r1, [sp, #108]                  @ 4-byte Spill
+	ldr	r1, [sp, #440]
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	ldr	r1, [sp, #436]
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	ldr	r1, [sp, #432]
+	str	r1, [sp, #92]                   @ 4-byte Spill
+	ldr	r1, [sp, #428]
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #424]
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #420]
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #416]
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r1, [sp, #412]
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #408]
+	str	r1, [sp, #36]                   @ 4-byte Spill
+	ldr	r1, [sp, #404]
+	str	r1, [sp, #24]                   @ 4-byte Spill
+	mov	r1, r5
+	ldr	r9, [sp, #400]
+	bl	mulPv512x32
+	add	r2, sp, #312
+	ldm	r2, {r0, r1, r2}
 	adds	r0, r4, r0
-	adcs	r7, r11, r1
-	mul	r0, r7, r5
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #380]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adcs	r0, r0, r3
-	str	r0, [sp, #232]          @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #376]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r8, r10}
-	ldr	r9, [sp, #352]
-	ldm	lr, {r0, r1, r2, r3, r12, lr}
-	ldr	r11, [sp, #228]         @ 4-byte Reload
-	adcs	r0, r11, r0
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #224]          @ 4-byte Reload
+	str	r2, [sp, #72]                   @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r2, [sp, #324]
 	adcs	r0, r0, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #220]          @ 4-byte Reload
-	adcs	r0, r0, r2
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #216]          @ 4-byte Reload
-	adcs	r11, r0, r3
-	ldr	r0, [sp, #140]          @ 4-byte Reload
-	adcs	r0, r0, r12
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #136]          @ 4-byte Reload
-	adcs	r0, r0, lr
-	str	r0, [sp, #224]          @ 4-byte Spill
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	adcs	r0, r0, r9
-	str	r0, [sp, #220]          @ 4-byte Spill
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adcs	r0, r0, r4
-	str	r0, [sp, #216]          @ 4-byte Spill
-	ldr	r0, [sp, #124]          @ 4-byte Reload
-	adcs	r0, r0, r5
-	str	r0, [sp, #136]          @ 4-byte Spill
-	ldr	r0, [sp, #120]          @ 4-byte Reload
-	adcs	r0, r0, r6
-	str	r0, [sp, #228]          @ 4-byte Spill
-	ldr	r0, [sp, #116]          @ 4-byte Reload
-	adcs	r0, r0, r8
-	str	r0, [sp, #140]          @ 4-byte Spill
-	ldr	r0, [sp, #112]          @ 4-byte Reload
-	adcs	r10, r0, r10
-	ldr	r0, [sp, #108]          @ 4-byte Reload
-	adcs	r8, r0, r1
-	ldr	r0, [sp, #104]          @ 4-byte Reload
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	adcs	r6, r0, r1
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ldr	r1, [sp, #236]          @ 4-byte Reload
-	adcs	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #128]          @ 4-byte Spill
+	str	r2, [sp, #68]                   @ 4-byte Spill
+	str	r0, [sp, #156]                  @ 4-byte Spill
+	mrs	r1, apsr
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	mul	r2, r6, r0
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	msr	APSR_nzcvq, r1
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r3, [sp, #24]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #32]                   @ 4-byte Spill
+	ldr	r0, [r7, #120]
+	adcs	r5, r10, r9
+	str	r0, [sp, #228]                  @ 4-byte Spill
+	adcs	r3, r11, r3
+	ldr	r0, [sp, #372]
+	str	r0, [sp, #100]                  @ 4-byte Spill
+	ldr	r0, [sp, #368]
+	str	r0, [sp, #84]                   @ 4-byte Spill
+	ldr	r0, [sp, #364]
+	str	r0, [sp, #76]                   @ 4-byte Spill
+	ldr	r0, [sp, #360]
+	str	r0, [sp, #60]                   @ 4-byte Spill
+	ldr	r0, [sp, #356]
+	str	r0, [sp, #52]                   @ 4-byte Spill
+	ldr	r0, [sp, #352]
+	str	r0, [sp, #40]                   @ 4-byte Spill
+	ldr	r0, [sp, #348]
+	str	r0, [sp, #28]                   @ 4-byte Spill
+	ldr	r0, [sp, #344]
+	str	r0, [sp, #20]                   @ 4-byte Spill
+	ldr	r0, [sp, #340]
+	str	r0, [sp, #16]                   @ 4-byte Spill
+	ldr	r0, [sp, #336]
+	str	r0, [sp, #12]                   @ 4-byte Spill
+	ldr	r0, [sp, #332]
+	str	r0, [sp, #8]                    @ 4-byte Spill
+	ldr	r0, [sp, #328]
+	str	r0, [sp, #4]                    @ 4-byte Spill
 	add	r0, sp, #240
-	bl	.LmulPv544x32(PLT)
-	add	r3, sp, #240
-	ldm	r3, {r0, r1, r2, r3}
-	adds	r0, r7, r0
-	ldr	r0, [sp, #232]          @ 4-byte Reload
-	adcs	r9, r0, r1
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	str	r9, [sp, #100]          @ 4-byte Spill
-	adcs	r12, r0, r2
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	str	r12, [sp, #104]         @ 4-byte Spill
-	adcs	lr, r0, r3
-	ldr	r0, [sp, #256]
-	str	lr, [sp, #108]          @ 4-byte Spill
+	ldr	r1, [sp, #376]
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	ldr	r3, [sp, #152]                  @ 4-byte Reload
+	ldr	r7, [sp, #36]                   @ 4-byte Reload
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	adcs	r6, r3, r7
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	adcs	r11, r8, r3
+	ldr	r3, [sp, #148]                  @ 4-byte Reload
+	adcs	r8, r3, r7
+	ldr	r3, [sp, #144]                  @ 4-byte Reload
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	adcs	r7, r3, r7
+	ldr	r3, [sp, #140]                  @ 4-byte Reload
+	adcs	r10, r3, r4
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
+	ldr	r4, [sp, #88]                   @ 4-byte Reload
+	adcs	r9, r3, r4
+	ldr	r3, [sp, #132]                  @ 4-byte Reload
+	ldr	r4, [sp, #92]                   @ 4-byte Reload
+	adcs	r3, r3, r4
+	str	r3, [sp, #152]                  @ 4-byte Spill
+	ldr	r3, [sp, #128]                  @ 4-byte Reload
+	ldr	r4, [sp, #96]                   @ 4-byte Reload
+	adcs	r3, r3, r4
+	str	r3, [sp, #148]                  @ 4-byte Spill
+	ldr	r3, [sp, #124]                  @ 4-byte Reload
+	ldr	r4, [sp, #104]                  @ 4-byte Reload
+	adcs	r3, r3, r4
+	str	r3, [sp, #144]                  @ 4-byte Spill
+	ldr	r3, [sp, #120]                  @ 4-byte Reload
+	ldr	r4, [sp, #108]                  @ 4-byte Reload
+	adcs	r3, r3, r4
+	str	r3, [sp, #140]                  @ 4-byte Spill
+	ldr	r3, [sp, #116]                  @ 4-byte Reload
+	ldr	r4, [sp, #112]                  @ 4-byte Reload
+	adcs	r3, r4, r3
+	str	r3, [sp, #136]                  @ 4-byte Spill
+	adc	r1, r1, #0
+	str	r1, [sp, #132]                  @ 4-byte Spill
+	ldr	r1, [sp, #232]                  @ 4-byte Reload
+	bl	mulPv512x32
+	ldr	r0, [sp, #44]                   @ 4-byte Reload
+	add	r3, sp, #244
+	msr	APSR_nzcvq, r0
+	ldr	r0, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	adcs	r12, r1, r0
+	ldr	r0, [sp, #68]                   @ 4-byte Reload
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r5, r5, r0
+	ldr	r0, [sp, #4]                    @ 4-byte Reload
 	adcs	r4, r1, r0
+	ldr	r0, [sp, #8]                    @ 4-byte Reload
+	ldr	r1, [sp, #152]                  @ 4-byte Reload
+	adcs	r6, r6, r0
+	ldr	r0, [sp, #12]                   @ 4-byte Reload
+	adcs	lr, r11, r0
+	ldr	r0, [sp, #16]                   @ 4-byte Reload
+	adcs	r8, r8, r0
+	ldr	r0, [sp, #20]                   @ 4-byte Reload
+	adcs	r7, r7, r0
+	ldr	r0, [sp, #28]                   @ 4-byte Reload
+	adcs	r11, r10, r0
+	ldr	r0, [sp, #40]                   @ 4-byte Reload
+	ldr	r10, [sp, #156]                 @ 4-byte Reload
+	adcs	r0, r9, r0
+	str	r0, [sp, #108]                  @ 4-byte Spill
+	ldr	r0, [sp, #52]                   @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #104]                  @ 4-byte Spill
+	ldr	r0, [sp, #60]                   @ 4-byte Reload
+	ldr	r1, [sp, #148]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #232]                  @ 4-byte Spill
+	ldr	r0, [sp, #76]                   @ 4-byte Reload
+	ldr	r1, [sp, #144]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #112]                  @ 4-byte Spill
+	ldr	r0, [sp, #84]                   @ 4-byte Reload
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #116]                  @ 4-byte Spill
+	ldr	r0, [sp, #100]                  @ 4-byte Reload
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	adcs	r0, r1, r0
+	str	r0, [sp, #120]                  @ 4-byte Spill
+	ldr	r0, [sp, #228]                  @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r0, r1
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r2, [sp, #304]
+	adc	r0, r2, #0
+	str	r0, [sp, #128]                  @ 4-byte Spill
+	ldr	r2, [sp, #240]
+	ldm	r3, {r0, r1, r3}
+	adds	r2, r10, r2
+	adcs	r12, r12, r0
+	str	r12, [sp, #228]                 @ 4-byte Spill
+	adcs	r9, r5, r1
+	str	r9, [sp, #156]                  @ 4-byte Spill
+	adcs	r10, r4, r3
+	str	r10, [sp, #152]                 @ 4-byte Spill
+	ldr	r0, [sp, #256]
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	adcs	r4, r6, r0
+	str	r4, [sp, #148]                  @ 4-byte Spill
 	ldr	r0, [sp, #260]
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	str	r4, [sp, #112]          @ 4-byte Spill
-	adcs	r5, r1, r0
+	ldr	r2, [sp, #104]                  @ 4-byte Reload
+	adcs	r6, lr, r0
+	str	r6, [sp, #144]                  @ 4-byte Spill
 	ldr	r0, [sp, #264]
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	str	r5, [sp, #116]          @ 4-byte Spill
-	adcs	r11, r11, r0
+	adcs	r5, r8, r0
+	str	r5, [sp, #140]                  @ 4-byte Spill
 	ldr	r0, [sp, #268]
-	str	r11, [sp, #120]         @ 4-byte Spill
-	adcs	r7, r1, r0
+	adcs	r3, r7, r0
+	str	r3, [sp, #136]                  @ 4-byte Spill
 	ldr	r0, [sp, #272]
-	ldr	r1, [sp, #224]          @ 4-byte Reload
-	str	r7, [sp, #124]          @ 4-byte Spill
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #220]          @ 4-byte Reload
-	str	r0, [sp, #224]          @ 4-byte Spill
+	adcs	lr, r11, r0
+	str	lr, [sp, #124]                  @ 4-byte Spill
 	ldr	r0, [sp, #276]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #216]          @ 4-byte Reload
-	str	r0, [sp, #220]          @ 4-byte Spill
+	adcs	r1, r1, r0
+	str	r1, [sp, #108]                  @ 4-byte Spill
 	ldr	r0, [sp, #280]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #136]          @ 4-byte Reload
-	str	r0, [sp, #216]          @ 4-byte Spill
+	adcs	r7, r2, r0
+	str	r7, [sp, #104]                  @ 4-byte Spill
 	ldr	r0, [sp, #284]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #228]          @ 4-byte Reload
-	str	r0, [sp, #232]          @ 4-byte Spill
+	ldr	r2, [sp, #232]                  @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #232]                  @ 4-byte Spill
 	ldr	r0, [sp, #288]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	str	r0, [sp, #228]          @ 4-byte Spill
+	ldr	r2, [sp, #112]                  @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #112]                  @ 4-byte Spill
 	ldr	r0, [sp, #292]
-	adcs	r0, r1, r0
-	ldr	r1, [sp, #132]          @ 4-byte Reload
-	str	r0, [sp, #236]          @ 4-byte Spill
+	ldr	r2, [sp, #116]                  @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #116]                  @ 4-byte Spill
 	ldr	r0, [sp, #296]
-	adcs	r10, r10, r0
+	ldr	r2, [sp, #120]                  @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #120]                  @ 4-byte Spill
 	ldr	r0, [sp, #300]
-	str	r10, [sp, #136]         @ 4-byte Spill
-	adcs	r8, r8, r0
-	ldr	r0, [sp, #304]
-	str	r8, [sp, #140]          @ 4-byte Spill
-	adcs	r6, r6, r0
-	ldr	r0, [sp, #308]
-	adcs	r2, r1, r0
-	ldr	r0, [sp, #128]          @ 4-byte Reload
-	adc	r0, r0, #0
-	str	r0, [sp, #132]          @ 4-byte Spill
-	ldr	r0, [sp, #200]          @ 4-byte Reload
-	subs	r1, r9, r0
-	ldr	r0, [sp, #196]          @ 4-byte Reload
-	sbcs	r3, r12, r0
-	ldr	r0, [sp, #192]          @ 4-byte Reload
-	sbcs	r12, lr, r0
-	ldr	r0, [sp, #176]          @ 4-byte Reload
-	sbcs	lr, r4, r0
-	ldr	r0, [sp, #180]          @ 4-byte Reload
-	sbcs	r4, r5, r0
-	ldr	r0, [sp, #184]          @ 4-byte Reload
-	sbcs	r5, r11, r0
-	ldr	r0, [sp, #188]          @ 4-byte Reload
-	ldr	r11, [sp, #224]         @ 4-byte Reload
-	sbcs	r9, r7, r0
-	ldr	r0, [sp, #148]          @ 4-byte Reload
-	ldr	r7, [sp, #220]          @ 4-byte Reload
-	sbcs	r0, r11, r0
-	ldr	r11, [sp, #232]         @ 4-byte Reload
-	str	r0, [sp, #176]          @ 4-byte Spill
-	ldr	r0, [sp, #144]          @ 4-byte Reload
-	sbcs	r0, r7, r0
-	ldr	r7, [sp, #216]          @ 4-byte Reload
-	str	r0, [sp, #180]          @ 4-byte Spill
-	ldr	r0, [sp, #152]          @ 4-byte Reload
-	sbcs	r0, r7, r0
-	ldr	r7, [sp, #228]          @ 4-byte Reload
-	str	r0, [sp, #184]          @ 4-byte Spill
-	ldr	r0, [sp, #156]          @ 4-byte Reload
-	sbcs	r0, r11, r0
-	ldr	r11, [sp, #236]         @ 4-byte Reload
-	str	r0, [sp, #188]          @ 4-byte Spill
-	ldr	r0, [sp, #160]          @ 4-byte Reload
-	sbcs	r0, r7, r0
-	str	r0, [sp, #192]          @ 4-byte Spill
-	ldr	r0, [sp, #164]          @ 4-byte Reload
-	sbcs	r0, r11, r0
-	str	r0, [sp, #196]          @ 4-byte Spill
-	ldr	r0, [sp, #168]          @ 4-byte Reload
+	ldr	r2, [sp, #132]                  @ 4-byte Reload
+	adcs	r0, r2, r0
+	str	r0, [sp, #132]                  @ 4-byte Spill
+	ldr	r0, [sp, #236]                  @ 4-byte Reload
+	ldr	r2, [sp, #128]                  @ 4-byte Reload
+	ldr	r0, [r0, #124]
+	adc	r2, r0, r2
+	ldr	r0, [sp, #216]                  @ 4-byte Reload
+	subs	r0, r12, r0
+	str	r0, [sp, #236]                  @ 4-byte Spill
+	ldr	r0, [sp, #212]                  @ 4-byte Reload
+	sbcs	r0, r9, r0
+	str	r0, [sp, #216]                  @ 4-byte Spill
+	ldr	r0, [sp, #208]                  @ 4-byte Reload
 	sbcs	r0, r10, r0
-	mov	r10, r6
-	str	r0, [sp, #200]          @ 4-byte Spill
-	ldr	r0, [sp, #172]          @ 4-byte Reload
-	sbcs	r7, r8, r0
-	ldr	r0, [sp, #204]          @ 4-byte Reload
-	mov	r8, r2
-	sbcs	r11, r6, r0
-	ldr	r0, [sp, #208]          @ 4-byte Reload
-	sbcs	r6, r2, r0
-	ldr	r0, [sp, #132]          @ 4-byte Reload
-	sbc	r2, r0, #0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	ands	r2, r2, #1
-	movne	r1, r0
-	ldr	r0, [sp, #212]          @ 4-byte Reload
-	str	r1, [r0]
-	ldr	r1, [sp, #104]          @ 4-byte Reload
-	movne	r3, r1
-	ldr	r1, [sp, #108]          @ 4-byte Reload
-	str	r3, [r0, #4]
-	ldr	r3, [sp, #176]          @ 4-byte Reload
-	movne	r12, r1
-	ldr	r1, [sp, #112]          @ 4-byte Reload
-	cmp	r2, #0
-	str	r12, [r0, #8]
-	movne	lr, r1
-	ldr	r1, [sp, #116]          @ 4-byte Reload
-	str	lr, [r0, #12]
-	movne	r4, r1
-	ldr	r1, [sp, #120]          @ 4-byte Reload
-	str	r4, [r0, #16]
-	movne	r5, r1
-	ldr	r1, [sp, #124]          @ 4-byte Reload
-	cmp	r2, #0
-	str	r5, [r0, #20]
-	movne	r9, r1
-	ldr	r1, [sp, #224]          @ 4-byte Reload
-	str	r9, [r0, #24]
-	movne	r3, r1
-	ldr	r1, [sp, #220]          @ 4-byte Reload
-	str	r3, [r0, #28]
-	ldr	r3, [sp, #180]          @ 4-byte Reload
-	movne	r3, r1
-	ldr	r1, [sp, #216]          @ 4-byte Reload
-	cmp	r2, #0
-	str	r3, [r0, #32]
-	ldr	r3, [sp, #184]          @ 4-byte Reload
-	movne	r3, r1
-	ldr	r1, [sp, #232]          @ 4-byte Reload
-	str	r3, [r0, #36]
-	ldr	r3, [sp, #188]          @ 4-byte Reload
-	movne	r3, r1
-	ldr	r1, [sp, #228]          @ 4-byte Reload
-	str	r3, [r0, #40]
-	ldr	r3, [sp, #192]          @ 4-byte Reload
-	movne	r3, r1
-	ldr	r1, [sp, #236]          @ 4-byte Reload
-	cmp	r2, #0
-	str	r3, [r0, #44]
-	ldr	r3, [sp, #196]          @ 4-byte Reload
-	movne	r3, r1
-	ldr	r1, [sp, #200]          @ 4-byte Reload
-	str	r3, [r0, #48]
-	ldr	r3, [sp, #136]          @ 4-byte Reload
-	movne	r1, r3
-	str	r1, [r0, #52]
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	movne	r7, r1
-	cmp	r2, #0
-	movne	r11, r10
-	movne	r6, r8
-	str	r7, [r0, #56]
-	str	r11, [r0, #60]
-	str	r6, [r0, #64]
-	add	sp, sp, #444
+	str	r0, [sp, #212]                  @ 4-byte Spill
+	ldr	r0, [sp, #192]                  @ 4-byte Reload
+	ldr	r10, [sp, #112]                 @ 4-byte Reload
+	sbcs	r0, r4, r0
+	str	r0, [sp, #208]                  @ 4-byte Spill
+	ldr	r0, [sp, #196]                  @ 4-byte Reload
+	sbcs	r0, r6, r0
+	str	r0, [sp, #196]                  @ 4-byte Spill
+	ldr	r0, [sp, #200]                  @ 4-byte Reload
+	sbcs	r0, r5, r0
+	str	r0, [sp, #200]                  @ 4-byte Spill
+	ldr	r0, [sp, #204]                  @ 4-byte Reload
+	ldr	r5, [sp, #120]                  @ 4-byte Reload
+	sbcs	r0, r3, r0
+	str	r0, [sp, #204]                  @ 4-byte Spill
+	ldr	r0, [sp, #160]                  @ 4-byte Reload
+	sbcs	r0, lr, r0
+	str	r0, [sp, #192]                  @ 4-byte Spill
+	ldr	r0, [sp, #164]                  @ 4-byte Reload
+	ldr	lr, [sp, #132]                  @ 4-byte Reload
+	sbcs	r11, r1, r0
+	ldr	r0, [sp, #168]                  @ 4-byte Reload
+	ldr	r1, [sp, #232]                  @ 4-byte Reload
+	sbcs	r9, r7, r0
+	ldr	r0, [sp, #172]                  @ 4-byte Reload
+	ldr	r7, [sp, #116]                  @ 4-byte Reload
+	sbcs	r8, r1, r0
+	ldr	r0, [sp, #176]                  @ 4-byte Reload
+	sbcs	r6, r10, r0
+	ldr	r0, [sp, #180]                  @ 4-byte Reload
+	sbcs	r4, r7, r0
+	ldr	r0, [sp, #184]                  @ 4-byte Reload
+	sbcs	r3, r5, r0
+	ldr	r0, [sp, #188]                  @ 4-byte Reload
+	sbcs	r1, lr, r0
+	ldr	r0, [sp, #220]                  @ 4-byte Reload
+	sbc	r12, r2, r0
+	ldr	r0, [sp, #224]                  @ 4-byte Reload
+	cmn	r12, #1
+	movle	r1, lr
+	movgt	r2, r12
+	str	r1, [r0, #56]
+	movle	r3, r5
+	ldr	r1, [sp, #232]                  @ 4-byte Reload
+	cmn	r12, #1
+	movle	r4, r7
+	movle	r6, r10
+	str	r2, [r0, #60]
+	movle	r8, r1
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	cmn	r12, #1
+	ldr	r2, [sp, #192]                  @ 4-byte Reload
+	str	r3, [r0, #52]
+	movle	r9, r1
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	str	r4, [r0, #48]
+	str	r6, [r0, #44]
+	movle	r11, r1
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	str	r8, [r0, #40]
+	str	r9, [r0, #36]
+	movle	r2, r1
+	cmn	r12, #1
+	str	r2, [r0, #28]
+	ldr	r2, [sp, #136]                  @ 4-byte Reload
+	ldr	r1, [sp, #204]                  @ 4-byte Reload
+	str	r11, [r0, #32]
+	movle	r1, r2
+	ldr	r2, [sp, #140]                  @ 4-byte Reload
+	str	r1, [r0, #24]
+	ldr	r1, [sp, #200]                  @ 4-byte Reload
+	movle	r1, r2
+	ldr	r2, [sp, #144]                  @ 4-byte Reload
+	str	r1, [r0, #20]
+	ldr	r1, [sp, #196]                  @ 4-byte Reload
+	movle	r1, r2
+	ldr	r2, [sp, #148]                  @ 4-byte Reload
+	str	r1, [r0, #16]
+	cmn	r12, #1
+	ldr	r1, [sp, #208]                  @ 4-byte Reload
+	movle	r1, r2
+	ldr	r2, [sp, #152]                  @ 4-byte Reload
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #212]                  @ 4-byte Reload
+	movle	r1, r2
+	ldr	r2, [sp, #156]                  @ 4-byte Reload
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #216]                  @ 4-byte Reload
+	movle	r1, r2
+	ldr	r2, [sp, #228]                  @ 4-byte Reload
+	str	r1, [r0, #4]
+	cmn	r12, #1
+	ldr	r1, [sp, #236]                  @ 4-byte Reload
+	movle	r1, r2
+	str	r1, [r0]
+	add	sp, sp, #372
 	add	sp, sp, #1024
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end260:
-	.size	mcl_fp_montRed17L, .Lfunc_end260-mcl_fp_montRed17L
+.Lfunc_end80:
+	.size	mcl_fp_montRedNF16L, .Lfunc_end80-mcl_fp_montRedNF16L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_addPre17L
-	.align	2
-	.type	mcl_fp_addPre17L,%function
-mcl_fp_addPre17L:                       @ @mcl_fp_addPre17L
+                                        @ -- End function
+	.globl	mcl_fp_addPre16L                @ -- Begin function mcl_fp_addPre16L
+	.p2align	2
+	.type	mcl_fp_addPre16L,%function
+	.code	32                              @ @mcl_fp_addPre16L
+mcl_fp_addPre16L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#76
-	sub	sp, sp, #76
-	ldm	r1, {r3, lr}
-	ldr	r8, [r1, #8]
-	ldr	r5, [r1, #12]
-	ldm	r2, {r6, r7, r12}
-	ldr	r4, [r2, #12]
-	add	r10, r2, #16
-	adds	r3, r6, r3
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [r2, #64]
-	str	r3, [sp, #72]           @ 4-byte Spill
-	adcs	r3, r7, lr
-	add	lr, r1, #16
-	str	r3, [sp, #28]           @ 4-byte Spill
+	.pad	#60
+	sub	sp, sp, #60
+	ldm	r2, {r3, r12}
+	add	r11, r1, #48
+	ldm	r1, {r4, r7, lr}
+	adds	r3, r4, r3
+	str	r3, [sp, #28]                   @ 4-byte Spill
+	adcs	r3, r7, r12
+	str	r3, [sp, #24]                   @ 4-byte Spill
 	ldr	r3, [r2, #32]
-	adcs	r6, r12, r8
-	adcs	r8, r4, r5
-	str	r3, [sp, #36]           @ 4-byte Spill
+	add	r7, r2, #20
+	str	r3, [sp, #20]                   @ 4-byte Spill
+	add	r12, r1, #16
 	ldr	r3, [r2, #36]
-	str	r3, [sp, #40]           @ 4-byte Spill
+	str	r3, [sp, #32]                   @ 4-byte Spill
 	ldr	r3, [r2, #40]
-	str	r3, [sp, #44]           @ 4-byte Spill
+	str	r3, [sp, #36]                   @ 4-byte Spill
 	ldr	r3, [r2, #44]
-	str	r3, [sp, #48]           @ 4-byte Spill
+	str	r3, [sp, #40]                   @ 4-byte Spill
 	ldr	r3, [r2, #48]
-	str	r3, [sp, #52]           @ 4-byte Spill
+	str	r3, [sp, #44]                   @ 4-byte Spill
 	ldr	r3, [r2, #52]
-	str	r3, [sp, #56]           @ 4-byte Spill
+	str	r3, [sp, #48]                   @ 4-byte Spill
 	ldr	r3, [r2, #56]
-	str	r3, [sp, #60]           @ 4-byte Spill
+	ldr	r8, [r2, #8]
+	str	r3, [sp, #52]                   @ 4-byte Spill
 	ldr	r3, [r2, #60]
-	str	r3, [sp, #64]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	str	r3, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r2, [r1, #64]
-	ldr	r11, [r1, #60]
-	str	r2, [sp, #68]           @ 4-byte Spill
+	str	r3, [sp, #56]                   @ 4-byte Spill
+	adcs	r3, lr, r8
+	ldr	r6, [r2, #12]
+	ldr	lr, [r2, #16]
 	ldr	r2, [r1, #36]
-	str	r2, [sp]                @ 4-byte Spill
+	ldr	r5, [r1, #12]
+	str	r2, [sp]                        @ 4-byte Spill
 	ldr	r2, [r1, #40]
-	str	r2, [sp, #4]            @ 4-byte Spill
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	adcs	r3, r5, r6
+	str	r2, [sp, #4]                    @ 4-byte Spill
 	ldr	r2, [r1, #44]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r1, r2, r3, r12, lr}
-	ldr	r9, [sp, #32]           @ 4-byte Reload
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	adcs	r1, r4, r1
-	str	r9, [r0]
-	str	r7, [r0, #4]
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	ldr	r5, [r1, #32]
+	str	r2, [sp, #8]                    @ 4-byte Spill
+	ldm	r12, {r1, r2, r3, r12}
+	ldm	r7, {r4, r6, r7}
+	adcs	r1, r1, lr
+	add	lr, r0, #16
+	adcs	r2, r2, r4
+	ldm	r11, {r8, r9, r10, r11}
+	adcs	r3, r3, r6
+	ldr	r6, [sp, #20]                   @ 4-byte Reload
+	adcs	r7, r12, r7
+	adcs	r12, r5, r6
+	ldr	r6, [sp, #28]                   @ 4-byte Reload
+	str	r6, [r0]
+	ldr	r6, [sp, #24]                   @ 4-byte Reload
+	str	r6, [r0, #4]
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
 	str	r6, [r0, #8]
-	str	r8, [r0, #12]
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	adcs	r2, r5, r2
-	str	r1, [r0, #16]
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	adcs	r1, r10, r3
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adcs	r2, r2, r12
-	adcs	r12, r1, lr
-	str	r2, [r0, #28]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	ldr	r2, [sp]                @ 4-byte Reload
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	str	r6, [r0, #12]
+	stm	lr, {r1, r2, r3, r7}
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldr	r2, [sp]                        @ 4-byte Reload
+	ldr	r3, [sp, #4]                    @ 4-byte Reload
+	adcs	r1, r2, r1
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	ldr	r7, [sp, #8]                    @ 4-byte Reload
+	adcs	r2, r3, r2
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	ldr	r6, [sp, #48]                   @ 4-byte Reload
+	adcs	r3, r7, r3
+	ldr	r7, [sp, #44]                   @ 4-byte Reload
 	str	r12, [r0, #32]
 	add	r12, r0, #36
-	adcs	r2, r1, r2
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r3, r1, r3
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r7, r1, r7
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r6, r1, r6
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r1, r5
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r4, r1, r4
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	stm	r12, {r2, r3, r7}
-	str	r6, [r0, #48]
+	adcs	r7, r8, r7
+	stm	r12, {r1, r2, r3, r7}
+	adcs	r5, r9, r6
+	ldr	r6, [sp, #52]                   @ 4-byte Reload
 	str	r5, [r0, #52]
+	adcs	r4, r10, r6
+	ldr	r6, [sp, #56]                   @ 4-byte Reload
 	str	r4, [r0, #56]
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	adcs	r1, r1, r11
-	str	r1, [r0, #60]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [r0, #64]
+	adcs	r6, r11, r6
+	str	r6, [r0, #60]
 	mov	r0, #0
 	adc	r0, r0, #0
-	add	sp, sp, #76
+	add	sp, sp, #60
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end261:
-	.size	mcl_fp_addPre17L, .Lfunc_end261-mcl_fp_addPre17L
+.Lfunc_end81:
+	.size	mcl_fp_addPre16L, .Lfunc_end81-mcl_fp_addPre16L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_subPre17L
-	.align	2
-	.type	mcl_fp_subPre17L,%function
-mcl_fp_subPre17L:                       @ @mcl_fp_subPre17L
+                                        @ -- End function
+	.globl	mcl_fp_subPre16L                @ -- Begin function mcl_fp_subPre16L
+	.p2align	2
+	.type	mcl_fp_subPre16L,%function
+	.code	32                              @ @mcl_fp_subPre16L
+mcl_fp_subPre16L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#76
-	sub	sp, sp, #76
-	ldm	r2, {r3, lr}
-	ldr	r8, [r2, #8]
-	ldr	r5, [r2, #12]
-	ldm	r1, {r6, r7, r12}
-	ldr	r4, [r1, #12]
-	add	r10, r2, #16
-	subs	r3, r6, r3
-	str	r3, [sp, #32]           @ 4-byte Spill
-	ldr	r3, [r2, #64]
-	str	r3, [sp, #72]           @ 4-byte Spill
-	sbcs	r3, r7, lr
-	add	lr, r1, #16
-	str	r3, [sp, #28]           @ 4-byte Spill
+	.pad	#60
+	sub	sp, sp, #60
+	ldm	r2, {r3, r12}
+	add	r11, r1, #48
+	ldm	r1, {r4, r7, lr}
+	subs	r3, r4, r3
+	str	r3, [sp, #28]                   @ 4-byte Spill
+	sbcs	r3, r7, r12
+	str	r3, [sp, #24]                   @ 4-byte Spill
 	ldr	r3, [r2, #32]
-	sbcs	r6, r12, r8
-	sbcs	r8, r4, r5
-	str	r3, [sp, #36]           @ 4-byte Spill
+	add	r7, r2, #20
+	str	r3, [sp, #20]                   @ 4-byte Spill
+	add	r12, r1, #16
 	ldr	r3, [r2, #36]
-	str	r3, [sp, #40]           @ 4-byte Spill
+	str	r3, [sp, #32]                   @ 4-byte Spill
 	ldr	r3, [r2, #40]
-	str	r3, [sp, #44]           @ 4-byte Spill
+	str	r3, [sp, #36]                   @ 4-byte Spill
 	ldr	r3, [r2, #44]
-	str	r3, [sp, #48]           @ 4-byte Spill
+	str	r3, [sp, #40]                   @ 4-byte Spill
 	ldr	r3, [r2, #48]
-	str	r3, [sp, #52]           @ 4-byte Spill
+	str	r3, [sp, #44]                   @ 4-byte Spill
 	ldr	r3, [r2, #52]
-	str	r3, [sp, #56]           @ 4-byte Spill
+	str	r3, [sp, #48]                   @ 4-byte Spill
 	ldr	r3, [r2, #56]
-	str	r3, [sp, #60]           @ 4-byte Spill
+	ldr	r8, [r2, #8]
+	str	r3, [sp, #52]                   @ 4-byte Spill
 	ldr	r3, [r2, #60]
-	str	r3, [sp, #64]           @ 4-byte Spill
-	ldr	r3, [r2, #28]
-	str	r3, [sp, #24]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r10}
-	ldr	r2, [r1, #64]
-	ldr	r11, [r1, #60]
-	str	r2, [sp, #68]           @ 4-byte Spill
+	str	r3, [sp, #56]                   @ 4-byte Spill
+	sbcs	r3, lr, r8
+	ldr	r6, [r2, #12]
+	ldr	lr, [r2, #16]
 	ldr	r2, [r1, #36]
-	str	r2, [sp]                @ 4-byte Spill
+	ldr	r5, [r1, #12]
+	str	r2, [sp]                        @ 4-byte Spill
 	ldr	r2, [r1, #40]
-	str	r2, [sp, #4]            @ 4-byte Spill
+	str	r3, [sp, #16]                   @ 4-byte Spill
+	sbcs	r3, r5, r6
+	str	r2, [sp, #4]                    @ 4-byte Spill
 	ldr	r2, [r1, #44]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #48]
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #52]
-	str	r2, [sp, #16]           @ 4-byte Spill
-	ldr	r2, [r1, #56]
-	str	r2, [sp, #20]           @ 4-byte Spill
-	ldm	lr, {r1, r2, r3, r12, lr}
-	ldr	r9, [sp, #32]           @ 4-byte Reload
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	sbcs	r1, r1, r4
-	str	r9, [r0]
-	str	r7, [r0, #4]
+	str	r3, [sp, #12]                   @ 4-byte Spill
+	ldr	r5, [r1, #32]
+	str	r2, [sp, #8]                    @ 4-byte Spill
+	ldm	r12, {r1, r2, r3, r12}
+	ldm	r7, {r4, r6, r7}
+	sbcs	r1, r1, lr
+	add	lr, r0, #16
+	sbcs	r2, r2, r4
+	ldm	r11, {r8, r9, r10, r11}
+	sbcs	r3, r3, r6
+	ldr	r6, [sp, #20]                   @ 4-byte Reload
+	sbcs	r7, r12, r7
+	sbcs	r12, r5, r6
+	ldr	r6, [sp, #28]                   @ 4-byte Reload
+	str	r6, [r0]
+	ldr	r6, [sp, #24]                   @ 4-byte Reload
+	str	r6, [r0, #4]
+	ldr	r6, [sp, #16]                   @ 4-byte Reload
 	str	r6, [r0, #8]
-	str	r8, [r0, #12]
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	ldr	r6, [sp, #12]           @ 4-byte Reload
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	sbcs	r2, r2, r5
-	str	r1, [r0, #16]
-	ldr	r5, [sp, #16]           @ 4-byte Reload
-	sbcs	r1, r3, r10
-	str	r2, [r0, #20]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	ldr	r3, [sp, #4]            @ 4-byte Reload
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	sbcs	r2, r12, r2
-	sbcs	r12, lr, r1
-	str	r2, [r0, #28]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	ldr	r2, [sp]                @ 4-byte Reload
+	ldr	r6, [sp, #12]                   @ 4-byte Reload
+	str	r6, [r0, #12]
+	stm	lr, {r1, r2, r3, r7}
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldr	r2, [sp]                        @ 4-byte Reload
+	ldr	r3, [sp, #4]                    @ 4-byte Reload
+	sbcs	r1, r2, r1
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	ldr	r7, [sp, #8]                    @ 4-byte Reload
+	sbcs	r2, r3, r2
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	ldr	r6, [sp, #48]                   @ 4-byte Reload
+	sbcs	r3, r7, r3
+	ldr	r7, [sp, #44]                   @ 4-byte Reload
 	str	r12, [r0, #32]
 	add	r12, r0, #36
-	sbcs	r2, r2, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	sbcs	r3, r3, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	sbcs	r7, r7, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	sbcs	r6, r6, r1
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	sbcs	r5, r5, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	sbcs	r4, r4, r1
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	stm	r12, {r2, r3, r7}
-	str	r6, [r0, #48]
+	sbcs	r7, r8, r7
+	stm	r12, {r1, r2, r3, r7}
+	sbcs	r5, r9, r6
+	ldr	r6, [sp, #52]                   @ 4-byte Reload
 	str	r5, [r0, #52]
+	sbcs	r4, r10, r6
+	ldr	r6, [sp, #56]                   @ 4-byte Reload
 	str	r4, [r0, #56]
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	sbcs	r1, r11, r1
-	str	r1, [r0, #60]
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	sbcs	r1, r2, r1
-	str	r1, [r0, #64]
+	sbcs	r6, r11, r6
+	str	r6, [r0, #60]
 	mov	r0, #0
 	sbc	r0, r0, #0
 	and	r0, r0, #1
-	add	sp, sp, #76
+	add	sp, sp, #60
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end262:
-	.size	mcl_fp_subPre17L, .Lfunc_end262-mcl_fp_subPre17L
+.Lfunc_end82:
+	.size	mcl_fp_subPre16L, .Lfunc_end82-mcl_fp_subPre16L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_shr1_17L
-	.align	2
-	.type	mcl_fp_shr1_17L,%function
-mcl_fp_shr1_17L:                        @ @mcl_fp_shr1_17L
+                                        @ -- End function
+	.globl	mcl_fp_shr1_16L                 @ -- Begin function mcl_fp_shr1_16L
+	.p2align	2
+	.type	mcl_fp_shr1_16L,%function
+	.code	32                              @ @mcl_fp_shr1_16L
+mcl_fp_shr1_16L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#20
-	sub	sp, sp, #20
-	ldr	r4, [r1, #4]
-	ldr	r3, [r1, #8]
-	add	r9, r1, #32
+	.pad	#24
+	sub	sp, sp, #24
+	ldr	r2, [r1, #52]
+	ldr	r3, [r1, #56]
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	lsr	r2, r2, #1
+	str	r3, [sp, #8]                    @ 4-byte Spill
+	orr	r2, r2, r3, lsl #31
+	str	r2, [r0, #52]
+	ldr	r2, [r1, #44]
+	ldr	r3, [r1, #48]
+	ldr	r11, [r1, #36]
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	lsr	r2, r2, #1
+	ldr	r10, [r1, #40]
+	orr	r2, r2, r3, lsl #31
+	str	r2, [r0, #44]
+	lsr	r2, r11, #1
+	ldr	r12, [r1, #28]
+	ldr	r5, [r1, #32]
+	orr	r2, r2, r10, lsl #31
+	ldr	r4, [r1, #20]
+	str	r2, [r0, #36]
+	lsr	r9, r12, #1
+	ldr	r2, [r1]
+	orr	r7, r9, r5, lsl #31
+	ldr	lr, [r1, #24]
+	str	r2, [sp, #20]                   @ 4-byte Spill
 	ldr	r2, [r1, #12]
-	ldr	r11, [r1]
+	str	r7, [r0, #28]
 	lsr	r7, r4, #1
-	lsr	lr, r2, #1
-	lsrs	r2, r2, #1
-	orr	r10, r7, r3, lsl #31
-	ldr	r7, [r1, #64]
-	rrx	r12, r3
-	lsrs	r3, r4, #1
-	add	r4, r1, #16
-	rrx	r11, r11
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldm	r9, {r5, r7, r9}
-	ldr	r6, [r1, #48]
-	ldr	r8, [r1, #44]
-	str	r6, [sp]                @ 4-byte Spill
-	ldr	r6, [r1, #52]
-	str	r6, [sp, #4]            @ 4-byte Spill
-	ldr	r6, [r1, #56]
-	str	r6, [sp, #8]            @ 4-byte Spill
-	ldr	r6, [r1, #60]
-	str	r6, [sp, #12]           @ 4-byte Spill
-	ldm	r4, {r1, r2, r3, r4}
-	str	r11, [r0]
-	stmib	r0, {r10, r12}
-	orr	r6, lr, r1, lsl #31
-	str	r6, [r0, #12]
-	lsrs	r6, r2, #1
+	ldr	r6, [r1, #16]
+	orr	r7, r7, lr, lsl #31
+	ldr	r8, [r1, #4]
+	str	r3, [sp, #4]                    @ 4-byte Spill
+	ldr	r3, [r1, #8]
+	str	r7, [r0, #20]
+	lsr	r7, r2, #1
+	orr	r7, r7, r6, lsl #31
+	ldr	r1, [r1, #60]
+	str	r7, [r0, #12]
+	lsr	r7, r8, #1
+	orr	r7, r7, r3, lsl #31
+	str	r7, [r0, #4]
+	lsr	r7, r1, #1
+	lsrs	r1, r1, #1
+	ldr	r1, [sp, #8]                    @ 4-byte Reload
+	str	r2, [sp]                        @ 4-byte Spill
+	str	r7, [r0, #60]
 	rrx	r1, r1
-	str	r1, [r0, #16]
-	lsr	r1, r2, #1
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	orr	r1, r1, r3, lsl #31
-	str	r1, [r0, #20]
-	lsrs	r1, r4, #1
-	rrx	r1, r3
-	ldr	r3, [sp]                @ 4-byte Reload
-	str	r1, [r0, #24]
-	lsr	r1, r4, #1
-	orr	r1, r1, r5, lsl #31
-	str	r1, [r0, #28]
-	lsrs	r1, r7, #1
+	str	r1, [r0, #56]
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	lsrs	r1, r1, #1
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	rrx	r1, r1
+	str	r1, [r0, #48]
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	lsrs	r1, r1, #1
+	rrx	r1, r10
+	str	r1, [r0, #40]
+	lsrs	r1, r11, #1
 	rrx	r1, r5
 	str	r1, [r0, #32]
-	lsr	r1, r7, #1
-	orr	r1, r1, r9, lsl #31
-	str	r1, [r0, #36]
+	lsrs	r1, r12, #1
+	rrx	r1, lr
+	str	r1, [r0, #24]
+	lsrs	r1, r4, #1
+	rrx	r1, r6
+	str	r1, [r0, #16]
+	ldr	r1, [sp]                        @ 4-byte Reload
+	lsrs	r1, r1, #1
+	rrx	r1, r3
+	str	r1, [r0, #8]
 	lsrs	r1, r8, #1
-	rrx	r1, r9
-	str	r1, [r0, #40]
-	lsr	r1, r8, #1
-	orr	r1, r1, r3, lsl #31
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	rrx	r1, r1
+	str	r1, [r0]
+	add	sp, sp, #24
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end83:
+	.size	mcl_fp_shr1_16L, .Lfunc_end83-mcl_fp_shr1_16L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_add16L                   @ -- Begin function mcl_fp_add16L
+	.p2align	2
+	.type	mcl_fp_add16L,%function
+	.code	32                              @ @mcl_fp_add16L
+mcl_fp_add16L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#68
+	sub	sp, sp, #68
+	ldm	r2, {r4, lr}
+	ldm	r1, {r5, r6, r7, r9}
+	adds	r4, r5, r4
+	ldr	r12, [r2, #8]
+	adcs	r6, r6, lr
+	ldr	r8, [r2, #12]
+	adcs	r5, r7, r12
+	str	r4, [sp, #56]                   @ 4-byte Spill
+	ldr	r7, [r2, #16]
+	adcs	lr, r9, r8
+	ldr	r4, [r1, #16]
+	str	r6, [sp, #52]                   @ 4-byte Spill
+	adcs	r8, r4, r7
+	ldr	r6, [r2, #20]
+	ldr	r4, [r1, #20]
+	ldr	r12, [r3, #12]
+	adcs	r9, r4, r6
+	ldr	r4, [r2, #24]
+	ldr	r6, [r1, #24]
+	adcs	r7, r6, r4
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r6, [r2, #28]
+	ldr	r7, [r1, #28]
+	ldr	r4, [r1, #56]
+	adcs	r7, r7, r6
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	ldr	r6, [r2, #32]
+	ldr	r7, [r1, #32]
+	adcs	r7, r7, r6
+	str	r7, [sp, #40]                   @ 4-byte Spill
+	ldr	r6, [r2, #36]
+	ldr	r7, [r1, #36]
+	adcs	r7, r7, r6
+	str	r7, [sp, #36]                   @ 4-byte Spill
+	ldr	r6, [r2, #40]
+	ldr	r7, [r1, #40]
+	adcs	r7, r7, r6
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r6, [r2, #44]
+	ldr	r7, [r1, #44]
+	adcs	r7, r7, r6
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	ldr	r6, [r2, #48]
+	ldr	r7, [r1, #48]
+	adcs	r7, r7, r6
+	str	r7, [sp, #28]                   @ 4-byte Spill
+	ldr	r7, [r1, #52]
+	ldr	r6, [r2, #52]
+	ldr	r1, [r1, #60]
+	adcs	r11, r7, r6
+	ldr	r6, [r2, #56]
+	ldr	r2, [r2, #60]
+	adcs	r7, r4, r6
+	ldr	r6, [sp, #56]                   @ 4-byte Reload
+	adcs	r10, r1, r2
+	mov	r2, #0
+	adc	r1, r2, #0
+	ldm	r3, {r2, r4}
+	subs	r2, r6, r2
+	str	r2, [sp, #12]                   @ 4-byte Spill
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	ldr	r1, [r3, #8]
+	sbcs	r4, r2, r4
+	str	r6, [r0]
+	mov	r6, r8
+	sbcs	r1, r5, r1
+	str	r1, [sp, #4]                    @ 4-byte Spill
+	sbcs	r1, lr, r12
+	str	r1, [sp]                        @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	stmib	r0, {r2, r5, lr}
 	str	r1, [r0, #44]
-	lsrs	r1, r2, #1
-	rrx	r1, r3
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	str	r1, [r0, #48]
-	lsr	r1, r2, #1
-	ldr	r2, [sp, #12]           @ 4-byte Reload
-	orr	r1, r1, r3, lsl #31
-	str	r1, [r0, #52]
-	lsrs	r1, r2, #1
-	rrx	r1, r3
-	str	r1, [r0, #56]
-	lsr	r1, r2, #1
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	orr	r1, r1, r2, lsl #31
+	ldr	r1, [r3, #16]
+	str	r4, [sp, #8]                    @ 4-byte Spill
+	mov	r4, r9
+	sbcs	r1, r6, r1
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r1, [r3, #20]
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	sbcs	r1, r4, r1
+	ldr	r7, [sp, #48]                   @ 4-byte Reload
+	str	r1, [sp, #52]                   @ 4-byte Spill
+	ldr	r1, [r3, #24]
+	ldr	lr, [sp, #44]                   @ 4-byte Reload
+	sbcs	r1, r7, r1
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [r3, #28]
+	ldr	r5, [sp, #40]                   @ 4-byte Reload
+	sbcs	r1, lr, r1
+	str	lr, [r0, #28]
+	str	r1, [sp, #44]                   @ 4-byte Spill
+	add	lr, r3, #40
+	ldr	r1, [r3, #32]
+	str	r8, [r0, #16]
+	ldr	r8, [sp, #36]                   @ 4-byte Reload
+	sbcs	r1, r5, r1
+	ldr	r6, [r3, #36]
+	str	r9, [r0, #20]
+	str	r7, [r0, #24]
+	ldr	r9, [sp, #32]                   @ 4-byte Reload
+	ldm	lr, {r4, r7, lr}
+	str	r8, [r0, #36]
+	sbcs	r8, r8, r6
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	str	r5, [r0, #32]
+	sbcs	r5, r9, r4
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	sbcs	r4, r1, r7
+	str	r11, [sp, #20]                  @ 4-byte Spill
+	str	r11, [r0, #52]
+	sbcs	r6, r2, lr
+	ldr	r11, [r3, #52]
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	ldr	r12, [sp, #24]                  @ 4-byte Reload
+	str	r10, [sp, #16]                  @ 4-byte Spill
+	str	r10, [r0, #60]
+	ldr	r10, [r3, #56]
+	str	r2, [r0, #48]
+	sbcs	r2, r1, r11
+	ldr	r3, [r3, #60]
+	sbcs	lr, r12, r10
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	str	r9, [r0, #40]
+	sbcs	r1, r1, r3
+	ldr	r3, [sp, #60]                   @ 4-byte Reload
+	str	r12, [r0, #56]
+	sbc	r3, r3, #0
+	tst	r3, #1
+	bne	.LBB84_2
+@ %bb.1:                                @ %nocarry
+	ldr	r3, [sp, #12]                   @ 4-byte Reload
+	str	r3, [r0]
+	ldr	r3, [sp, #8]                    @ 4-byte Reload
+	str	r3, [r0, #4]
+	ldr	r3, [sp, #4]                    @ 4-byte Reload
+	str	r3, [r0, #8]
+	ldr	r3, [sp]                        @ 4-byte Reload
+	str	r3, [r0, #12]
+	ldr	r3, [sp, #56]                   @ 4-byte Reload
+	str	r3, [r0, #16]
+	ldr	r3, [sp, #52]                   @ 4-byte Reload
+	str	r3, [r0, #20]
+	ldr	r3, [sp, #48]                   @ 4-byte Reload
+	str	r3, [r0, #24]
+	ldr	r3, [sp, #44]                   @ 4-byte Reload
+	str	r3, [r0, #28]
+	ldr	r3, [sp, #40]                   @ 4-byte Reload
+	str	r3, [r0, #32]
+	str	r8, [r0, #36]
+	str	r5, [r0, #40]
+	str	r4, [r0, #44]
+	str	r6, [r0, #48]
+	str	r2, [r0, #52]
+	str	lr, [r0, #56]
 	str	r1, [r0, #60]
-	lsr	r1, r2, #1
-	str	r1, [r0, #64]
-	add	sp, sp, #20
+.LBB84_2:                               @ %carry
+	add	sp, sp, #68
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end263:
-	.size	mcl_fp_shr1_17L, .Lfunc_end263-mcl_fp_shr1_17L
+.Lfunc_end84:
+	.size	mcl_fp_add16L, .Lfunc_end84-mcl_fp_add16L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_add17L
-	.align	2
-	.type	mcl_fp_add17L,%function
-mcl_fp_add17L:                          @ @mcl_fp_add17L
+                                        @ -- End function
+	.globl	mcl_fp_addNF16L                 @ -- Begin function mcl_fp_addNF16L
+	.p2align	2
+	.type	mcl_fp_addNF16L,%function
+	.code	32                              @ @mcl_fp_addNF16L
+mcl_fp_addNF16L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#64
-	sub	sp, sp, #64
+	.pad	#96
+	sub	sp, sp, #96
 	ldr	r9, [r1]
-	ldmib	r1, {r8, lr}
-	ldr	r12, [r1, #12]
+	add	r11, r3, #32
 	ldm	r2, {r4, r5, r6, r7}
+	ldmib	r1, {r8, lr}
 	adds	r9, r4, r9
-	ldr	r4, [r1, #24]
-	adcs	r5, r5, r8
-	mov	r8, r9
-	adcs	r6, r6, lr
-	str	r5, [sp, #32]           @ 4-byte Spill
-	ldr	r5, [r1, #20]
-	str	r8, [r0]
-	adcs	r7, r7, r12
-	str	r6, [sp, #28]           @ 4-byte Spill
+	adcs	r8, r5, r8
+	ldr	r12, [r1, #12]
+	adcs	lr, r6, lr
 	ldr	r6, [r1, #16]
-	ldr	lr, [sp, #32]           @ 4-byte Reload
-	str	r7, [sp, #24]           @ 4-byte Spill
+	adcs	r10, r7, r12
 	ldr	r7, [r2, #16]
-	str	lr, [r0, #4]
-	adcs	r10, r7, r6
+	ldr	r4, [r1, #20]
+	adcs	r5, r7, r6
 	ldr	r7, [r2, #20]
-	ldr	r6, [r2, #28]
-	str	r10, [sp, #4]           @ 4-byte Spill
-	adcs	r7, r7, r5
-	ldr	r5, [r2, #44]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	ldr	r9, [sp, #20]           @ 4-byte Reload
-	adcs	r7, r7, r4
-	ldr	r4, [r2, #48]
-	str	r7, [sp, #60]           @ 4-byte Spill
+	ldr	r6, [r2, #24]
+	adcs	r4, r7, r4
+	ldr	r7, [r1, #24]
+	str	r4, [sp, #88]                   @ 4-byte Spill
+	adcs	r4, r6, r7
 	ldr	r7, [r1, #28]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #32]
-	str	r7, [sp, #12]           @ 4-byte Spill
+	ldr	r6, [r2, #28]
+	str	r4, [sp, #84]                   @ 4-byte Spill
+	adcs	r4, r6, r7
 	ldr	r7, [r1, #32]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #36]
-	str	r7, [sp, #56]           @ 4-byte Spill
+	ldr	r6, [r2, #32]
+	str	r4, [sp, #80]                   @ 4-byte Spill
+	adcs	r4, r6, r7
 	ldr	r7, [r1, #36]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #40]
-	str	r7, [sp, #40]           @ 4-byte Spill
+	ldr	r6, [r2, #36]
+	str	r4, [sp, #76]                   @ 4-byte Spill
+	adcs	r4, r6, r7
 	ldr	r7, [r1, #40]
+	ldr	r6, [r2, #40]
+	str	r5, [sp, #92]                   @ 4-byte Spill
 	adcs	r7, r6, r7
-	ldr	r6, [r1, #44]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	adcs	r7, r5, r6
-	ldr	r5, [r1, #48]
-	ldr	r6, [r2, #56]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r2, #52]
-	adcs	r11, r4, r5
-	ldr	r4, [r1, #52]
-	ldr	r5, [sp, #24]           @ 4-byte Reload
-	str	r11, [sp, #8]           @ 4-byte Spill
-	adcs	r7, r7, r4
-	ldr	r4, [sp, #28]           @ 4-byte Reload
-	str	r7, [sp, #44]           @ 4-byte Spill
+	str	r7, [sp, #68]                   @ 4-byte Spill
+	ldr	r7, [r1, #44]
+	ldr	r6, [r2, #44]
+	ldr	r5, [r3, #8]
+	adcs	r7, r6, r7
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	ldr	r7, [r1, #48]
+	ldr	r6, [r2, #48]
+	str	r4, [sp, #72]                   @ 4-byte Spill
+	adcs	r7, r6, r7
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r7, [r1, #52]
+	ldr	r6, [r2, #52]
+	str	lr, [sp, #44]                   @ 4-byte Spill
+	adcs	r7, r6, r7
+	str	r7, [sp, #8]                    @ 4-byte Spill
 	ldr	r7, [r1, #56]
-	str	r4, [r0, #8]
-	str	r5, [r0, #12]
-	str	r10, [r0, #16]
-	str	r9, [r0, #20]
-	ldr	r10, [sp, #12]          @ 4-byte Reload
-	adcs	r12, r6, r7
-	ldr	r7, [r1, #60]
-	ldr	r6, [r2, #60]
-	ldr	r1, [r1, #64]
-	ldr	r2, [r2, #64]
-	adcs	r6, r6, r7
-	adcs	r2, r2, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	str	r2, [sp, #36]           @ 4-byte Spill
-	str	r1, [r0, #24]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	str	r10, [r0, #28]
-	str	r2, [r0, #64]
-	mov	r2, #0
-	str	r1, [r0, #32]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r1, [r0, #36]
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	str	r1, [r0, #40]
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	str	r1, [r0, #44]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	str	r11, [r0, #48]
-	mov	r11, r12
-	str	r1, [r0, #52]
-	adc	r1, r2, #0
-	str	r12, [r0, #56]
-	str	r6, [r0, #60]
-	mov	r12, r6
-	str	r1, [sp, #16]           @ 4-byte Spill
-	ldm	r3, {r6, r7}
-	ldr	r1, [r3, #8]
-	ldr	r2, [r3, #12]
-	subs	r6, r8, r6
-	sbcs	r7, lr, r7
-	str	r6, [sp]                @ 4-byte Spill
-	sbcs	r1, r4, r1
-	str	r7, [sp, #32]           @ 4-byte Spill
-	str	r1, [sp, #28]           @ 4-byte Spill
-	sbcs	r1, r5, r2
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	r1, [sp, #24]           @ 4-byte Spill
-	ldr	r1, [r3, #16]
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp, #4]            @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	sbcs	r9, r9, r1
-	ldr	r1, [r3, #24]
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r1, [sp, #60]           @ 4-byte Spill
-	ldr	r1, [r3, #28]
-	sbcs	r10, r10, r1
-	ldr	r1, [r3, #32]
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #56]           @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	sbcs	lr, r2, r1
-	ldr	r1, [r3, #44]
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	sbcs	r8, r2, r1
-	ldr	r1, [r3, #48]
-	ldr	r2, [sp, #8]            @ 4-byte Reload
-	sbcs	r4, r2, r1
-	ldr	r1, [r3, #52]
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	sbcs	r5, r2, r1
-	ldr	r1, [r3, #56]
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	sbcs	r7, r11, r1
+	ldr	r6, [r2, #56]
+	ldr	r1, [r1, #60]
+	ldr	r2, [r2, #60]
+	adcs	r7, r6, r7
+	str	r7, [sp, #4]                    @ 4-byte Spill
+	adc	r12, r2, r1
+	ldm	r3, {r2, r7}
+	subs	r2, r9, r2
+	str	r2, [sp, #56]                   @ 4-byte Spill
+	sbcs	r2, r8, r7
+	str	r2, [sp, #48]                   @ 4-byte Spill
+	sbcs	r2, lr, r5
+	ldr	r4, [r3, #12]
+	add	lr, r3, #16
 	ldr	r1, [r3, #60]
-	sbcs	r6, r12, r1
-	ldr	r1, [r3, #64]
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #16]           @ 4-byte Reload
+	str	r2, [sp, #40]                   @ 4-byte Spill
+	sbcs	r2, r10, r4
+	str	r1, [sp]                        @ 4-byte Spill
+	ldr	r1, [r3, #28]
+	ldm	lr, {r3, r4, lr}
+	ldr	r5, [sp, #92]                   @ 4-byte Reload
+	str	r9, [sp, #60]                   @ 4-byte Spill
+	sbcs	r3, r5, r3
+	str	r3, [sp, #28]                   @ 4-byte Spill
+	ldr	r3, [sp, #88]                   @ 4-byte Reload
+	str	r8, [sp, #52]                   @ 4-byte Spill
+	sbcs	r3, r3, r4
+	str	r3, [sp, #24]                   @ 4-byte Spill
+	ldr	r3, [sp, #84]                   @ 4-byte Reload
+	str	r10, [sp, #36]                  @ 4-byte Spill
+	sbcs	r3, r3, lr
+	str	r3, [sp, #20]                   @ 4-byte Spill
+	ldr	r3, [sp, #80]                   @ 4-byte Reload
+	str	r2, [sp, #32]                   @ 4-byte Spill
+	sbcs	r1, r3, r1
+	ldm	r11, {r2, r6, r7, r8, r9, r10, r11}
+	ldr	r3, [sp, #76]                   @ 4-byte Reload
+	str	r1, [sp, #12]                   @ 4-byte Spill
+	sbcs	r4, r3, r2
+	ldr	r3, [sp, #72]                   @ 4-byte Reload
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	sbcs	r6, r3, r6
+	ldr	lr, [sp, #16]                   @ 4-byte Reload
+	sbcs	r7, r1, r7
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	sbcs	r8, r1, r8
+	ldr	r1, [sp, #4]                    @ 4-byte Reload
+	sbcs	r9, lr, r9
+	ldr	r5, [sp]                        @ 4-byte Reload
+	sbcs	r3, r2, r10
+	sbcs	r11, r1, r11
+	sbc	r10, r12, r5
+	cmn	r10, #1
+	movle	r11, r1
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	movle	r3, r2
+	movgt	r12, r10
+	cmn	r10, #1
+	ldr	r2, [sp, #12]                   @ 4-byte Reload
+	movle	r8, r1
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	movle	r9, lr
+	str	r12, [r0, #60]
+	str	r11, [r0, #56]
+	movle	r7, r1
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	cmn	r10, #1
+	str	r3, [r0, #52]
+	str	r9, [r0, #48]
+	movle	r6, r1
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	str	r8, [r0, #44]
+	str	r7, [r0, #40]
+	movle	r4, r1
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	str	r6, [r0, #36]
+	str	r4, [r0, #32]
+	movle	r2, r1
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	str	r2, [r0, #28]
+	cmn	r10, #1
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	movle	r2, r1
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	str	r2, [r0, #24]
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	movle	r2, r1
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	str	r2, [r0, #20]
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	movle	r2, r1
+	cmn	r10, #1
+	str	r2, [r0, #16]
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	movle	r1, r2
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	movle	r1, r2
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	movle	r1, r2
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	str	r1, [r0, #4]
+	cmn	r10, #1
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	movle	r1, r2
+	str	r1, [r0]
+	add	sp, sp, #96
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	mov	pc, lr
+.Lfunc_end85:
+	.size	mcl_fp_addNF16L, .Lfunc_end85-mcl_fp_addNF16L
+	.cantunwind
+	.fnend
+                                        @ -- End function
+	.globl	mcl_fp_sub16L                   @ -- Begin function mcl_fp_sub16L
+	.p2align	2
+	.type	mcl_fp_sub16L,%function
+	.code	32                              @ @mcl_fp_sub16L
+mcl_fp_sub16L:
+	.fnstart
+@ %bb.0:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	.pad	#60
+	sub	sp, sp, #60
+	ldm	r2, {r9, lr}
+	ldr	r6, [r1]
+	ldmib	r1, {r4, r5, r7}
+	subs	r6, r6, r9
+	ldr	r8, [r2, #8]
+	sbcs	r9, r4, lr
+	ldr	r12, [r2, #12]
+	str	r6, [sp, #56]                   @ 4-byte Spill
+	sbcs	r6, r5, r8
+	sbcs	lr, r7, r12
+	ldr	r5, [r2, #16]
+	ldr	r7, [r1, #16]
+	ldr	r4, [r2, #20]
+	sbcs	r12, r7, r5
+	ldr	r5, [r1, #20]
+	ldr	r7, [r1, #44]
+	sbcs	r10, r5, r4
+	ldr	r4, [r2, #24]
+	ldr	r5, [r1, #24]
+	str	r9, [sp, #20]                   @ 4-byte Spill
+	sbcs	r4, r5, r4
+	str	r4, [sp, #52]                   @ 4-byte Spill
+	ldr	r4, [r2, #28]
+	ldr	r5, [r1, #28]
+	str	r9, [r0, #4]
+	sbcs	r4, r5, r4
+	str	r4, [sp, #48]                   @ 4-byte Spill
+	ldr	r4, [r2, #32]
+	ldr	r5, [r1, #32]
+	str	r6, [sp, #16]                   @ 4-byte Spill
+	sbcs	r4, r5, r4
+	str	r4, [sp, #44]                   @ 4-byte Spill
+	ldr	r4, [r2, #36]
+	ldr	r5, [r1, #36]
+	str	r6, [r0, #8]
+	sbcs	r4, r5, r4
+	str	r4, [sp, #40]                   @ 4-byte Spill
+	ldr	r4, [r2, #40]
+	ldr	r5, [r1, #40]
+	str	lr, [sp, #12]                   @ 4-byte Spill
+	sbcs	r4, r5, r4
+	str	r4, [sp, #36]                   @ 4-byte Spill
+	ldr	r4, [r2, #44]
+	ldr	r5, [r1, #52]
+	sbcs	r8, r7, r4
+	ldr	r7, [r2, #48]
+	ldr	r4, [r1, #48]
+	str	lr, [r0, #12]
+	sbcs	r7, r4, r7
+	ldr	r4, [r2, #52]
+	ldr	lr, [sp, #40]                   @ 4-byte Reload
+	sbcs	r11, r5, r4
+	ldr	r4, [r2, #56]
+	ldr	r5, [r1, #56]
+	mov	r9, r7
+	ldr	r2, [r2, #60]
+	add	r7, sp, #44
+	ldr	r1, [r1, #60]
+	sbcs	r4, r5, r4
+	ldm	r7, {r5, r6, r7}                @ 12-byte Folded Reload
+	sbcs	r1, r1, r2
+	ldr	r2, [sp, #56]                   @ 4-byte Reload
+	str	r2, [r0]
+	add	r2, r0, #44
+	str	r10, [sp, #32]                  @ 4-byte Spill
+	str	r10, [r0, #20]
+	ldr	r10, [sp, #36]                  @ 4-byte Reload
+	str	r12, [r0, #16]
+	str	r7, [r0, #24]
+	str	r6, [r0, #28]
+	str	r5, [r0, #32]
+	str	lr, [r0, #36]
+	str	r10, [r0, #40]
+	stm	r2, {r8, r9, r11}
+	mov	r2, #0
 	sbc	r2, r2, #0
 	tst	r2, #1
-	bne	.LBB264_2
-@ BB#1:                                 @ %nocarry
-	ldr	r2, [sp]                @ 4-byte Reload
+	str	r12, [sp, #24]                  @ 4-byte Spill
+	str	r4, [sp, #28]                   @ 4-byte Spill
+	str	r4, [r0, #56]
+	str	r1, [r0, #60]
+	beq	.LBB86_2
+@ %bb.1:                                @ %carry
+	ldr	r2, [r3]
+	ldr	r12, [sp, #56]                  @ 4-byte Reload
+	str	r1, [sp, #8]                    @ 4-byte Spill
+	adds	r2, r2, r12
+	ldmib	r3, {r1, r4}
 	str	r2, [r0]
-	ldr	r2, [sp, #32]           @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	str	r4, [sp]                        @ 4-byte Spill
+	adcs	r2, r1, r2
 	str	r2, [r0, #4]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
+	ldr	r1, [sp, #16]                   @ 4-byte Reload
+	ldr	r2, [sp]                        @ 4-byte Reload
+	ldr	r4, [r3, #12]
+	adcs	r2, r2, r1
+	str	r4, [sp, #4]                    @ 4-byte Spill
 	str	r2, [r0, #8]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	str	r2, [r0, #12]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r9, [r0, #20]
-	str	r2, [r0, #24]
-	str	r10, [r0, #28]
-	str	r1, [r0, #64]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	ldr	r2, [sp, #4]                    @ 4-byte Reload
+	adcs	r1, r2, r1
+	str	r1, [r0, #12]
+	ldr	r1, [r3, #16]
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [r0, #16]
+	ldr	r1, [r3, #20]
+	ldr	r2, [sp, #32]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [r0, #20]
+	ldr	r1, [r3, #24]
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	adcs	r1, r1, r7
+	str	r1, [r0, #24]
+	ldr	r1, [r3, #28]
+	adcs	r1, r1, r6
+	str	r1, [r0, #28]
+	ldr	r1, [r3, #32]
+	adcs	r1, r1, r5
 	str	r1, [r0, #32]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
+	ldr	r1, [r3, #36]
+	adcs	r1, r1, lr
 	str	r1, [r0, #36]
-	add	r1, r0, #48
-	str	lr, [r0, #40]
-	str	r8, [r0, #44]
-	stm	r1, {r4, r5, r7}
-	str	r6, [r0, #60]
-.LBB264_2:                              @ %carry
-	add	sp, sp, #64
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end264:
-	.size	mcl_fp_add17L, .Lfunc_end264-mcl_fp_add17L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fp_addNF17L
-	.align	2
-	.type	mcl_fp_addNF17L,%function
-mcl_fp_addNF17L:                        @ @mcl_fp_addNF17L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#96
-	sub	sp, sp, #96
-	ldr	r9, [r1]
-	ldmib	r1, {r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r7}
-	adds	r11, r4, r9
-	ldr	r4, [r1, #24]
-	adcs	r10, r5, r8
-	ldr	r5, [r1, #20]
-	str	r11, [sp, #8]           @ 4-byte Spill
-	adcs	r8, r6, lr
-	ldr	r6, [r1, #16]
-	str	r10, [sp, #16]          @ 4-byte Spill
-	adcs	r9, r7, r12
-	ldr	r7, [r2, #16]
-	str	r8, [sp, #20]           @ 4-byte Spill
-	str	r9, [sp, #24]           @ 4-byte Spill
-	adcs	r7, r7, r6
-	ldr	r6, [r2, #28]
-	str	r7, [sp, #48]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	adcs	lr, r7, r5
-	ldr	r7, [r2, #24]
-	str	lr, [sp, #4]            @ 4-byte Spill
-	adcs	r7, r7, r4
-	str	r7, [sp, #60]           @ 4-byte Spill
-	ldr	r7, [r1, #28]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #32]
-	str	r7, [sp, #56]           @ 4-byte Spill
-	ldr	r7, [r1, #32]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #36]
-	str	r7, [sp, #52]           @ 4-byte Spill
-	ldr	r7, [r1, #36]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #40]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r1, #40]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #44]
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r1, #44]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #48]
-	str	r7, [sp, #64]           @ 4-byte Spill
-	ldr	r7, [r1, #48]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #52]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r1, #52]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #56]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r1, #56]
-	adcs	r7, r6, r7
-	ldr	r6, [r2, #60]
-	ldr	r2, [r2, #64]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r1, #60]
-	ldr	r1, [r1, #64]
-	adcs	r7, r6, r7
-	adc	r1, r2, r1
-	str	r7, [sp, #92]           @ 4-byte Spill
-	str	r1, [sp, #88]           @ 4-byte Spill
-	ldm	r3, {r1, r7}
-	ldr	r6, [r3, #8]
-	ldr	r5, [r3, #12]
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	subs	r12, r11, r1
-	ldr	r1, [r3, #64]
-	ldr	r11, [r3, #36]
-	sbcs	r4, r10, r7
-	ldr	r10, [r3, #32]
-	ldr	r7, [r3, #24]
-	sbcs	r6, r8, r6
-	sbcs	r9, r9, r5
-	ldr	r5, [r3, #28]
-	str	r1, [sp]                @ 4-byte Spill
 	ldr	r1, [r3, #40]
-	str	r1, [sp, #12]           @ 4-byte Spill
+	adcs	r1, r1, r10
+	str	r1, [r0, #40]
 	ldr	r1, [r3, #44]
-	str	r1, [sp, #28]           @ 4-byte Spill
+	adcs	r1, r1, r8
+	str	r1, [r0, #44]
 	ldr	r1, [r3, #48]
-	str	r1, [sp, #32]           @ 4-byte Spill
+	adcs	r1, r1, r9
+	str	r1, [r0, #48]
 	ldr	r1, [r3, #52]
-	str	r1, [sp, #36]           @ 4-byte Spill
+	adcs	r1, r1, r11
+	str	r1, [r0, #52]
 	ldr	r1, [r3, #56]
-	str	r1, [sp, #40]           @ 4-byte Spill
+	adcs	r1, r1, r2
+	str	r1, [r0, #56]
 	ldr	r1, [r3, #60]
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [r3, #20]
-	ldr	r3, [r3, #16]
-	sbcs	r2, r2, r3
-	sbcs	r3, lr, r1
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	sbcs	lr, r1, r7
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	sbcs	r5, r1, r5
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	sbcs	r8, r1, r10
-	ldr	r1, [sp, #72]           @ 4-byte Reload
-	sbcs	r11, r1, r11
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r1, [sp, #12]           @ 4-byte Spill
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	str	r1, [sp, #28]           @ 4-byte Spill
-	ldr	r1, [sp, #84]           @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #36]           @ 4-byte Reload
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #40]           @ 4-byte Reload
-	str	r1, [sp, #36]           @ 4-byte Spill
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r1, [sp, #40]           @ 4-byte Spill
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r1, [sp, #44]           @ 4-byte Spill
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	sbc	r10, r1, r7
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	asr	r1, r10, #31
-	cmp	r1, #0
-	movlt	r12, r7
-	ldr	r7, [sp, #16]           @ 4-byte Reload
-	str	r12, [r0]
-	movlt	r4, r7
-	str	r4, [r0, #4]
-	ldr	r4, [sp, #20]           @ 4-byte Reload
-	movlt	r6, r4
-	cmp	r1, #0
-	str	r6, [r0, #8]
-	ldr	r6, [sp, #24]           @ 4-byte Reload
-	movlt	r9, r6
-	ldr	r6, [sp, #48]           @ 4-byte Reload
-	str	r9, [r0, #12]
-	movlt	r2, r6
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	movlt	r3, r2
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #20]
-	ldr	r3, [sp, #12]           @ 4-byte Reload
-	movlt	lr, r2
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	lr, [r0, #24]
-	movlt	r5, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	r5, [r0, #28]
-	movlt	r8, r2
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r8, [r0, #32]
-	movlt	r11, r2
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r11, [r0, #36]
-	movlt	r3, r2
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	r3, [r0, #40]
-	ldr	r3, [sp, #28]           @ 4-byte Reload
-	movlt	r3, r2
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #44]
-	ldr	r3, [sp, #32]           @ 4-byte Reload
-	movlt	r3, r2
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r3, [r0, #48]
-	ldr	r3, [sp, #36]           @ 4-byte Reload
-	movlt	r3, r2
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	str	r3, [r0, #52]
-	ldr	r3, [sp, #40]           @ 4-byte Reload
-	movlt	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #92]           @ 4-byte Reload
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r3, [r0, #56]
-	movlt	r2, r1
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	str	r2, [r0, #60]
-	movlt	r10, r1
-	str	r10, [r0, #64]
-	add	sp, sp, #96
+	ldr	r2, [sp, #8]                    @ 4-byte Reload
+	adc	r1, r1, r2
+	str	r1, [r0, #60]
+.LBB86_2:                               @ %nocarry
+	add	sp, sp, #60
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end265:
-	.size	mcl_fp_addNF17L, .Lfunc_end265-mcl_fp_addNF17L
+.Lfunc_end86:
+	.size	mcl_fp_sub16L, .Lfunc_end86-mcl_fp_sub16L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_sub17L
-	.align	2
-	.type	mcl_fp_sub17L,%function
-mcl_fp_sub17L:                          @ @mcl_fp_sub17L
+                                        @ -- End function
+	.globl	mcl_fp_subNF16L                 @ -- Begin function mcl_fp_subNF16L
+	.p2align	2
+	.type	mcl_fp_subNF16L,%function
+	.code	32                              @ @mcl_fp_subNF16L
+mcl_fp_subNF16L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#92
-	sub	sp, sp, #92
-	ldm	r2, {r8, r9, lr}
-	ldr	r12, [r2, #12]
-	ldm	r1, {r4, r5, r6, r7}
-	subs	r4, r4, r8
-	sbcs	r5, r5, r9
-	str	r4, [sp, #68]           @ 4-byte Spill
-	ldr	r4, [r2, #24]
-	sbcs	r6, r6, lr
-	str	r5, [sp, #88]           @ 4-byte Spill
-	ldr	r5, [r2, #20]
-	sbcs	r7, r7, r12
-	str	r6, [sp, #84]           @ 4-byte Spill
-	ldr	r6, [r2, #16]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r1, #16]
-	sbcs	r7, r7, r6
-	ldr	r6, [r1, #28]
-	str	r7, [sp, #76]           @ 4-byte Spill
-	ldr	r7, [r1, #20]
-	sbcs	r7, r7, r5
-	ldr	r5, [r1, #44]
-	str	r7, [sp, #72]           @ 4-byte Spill
-	ldr	r7, [r1, #24]
-	sbcs	r11, r7, r4
-	ldr	r7, [r2, #28]
-	ldr	r4, [r2, #52]
-	sbcs	r10, r6, r7
+	.pad	#84
+	sub	sp, sp, #84
 	ldr	r7, [r2, #32]
-	ldr	r6, [r1, #32]
-	str	r10, [sp, #60]          @ 4-byte Spill
-	sbcs	r9, r6, r7
+	str	r7, [sp, #56]                   @ 4-byte Spill
 	ldr	r7, [r2, #36]
-	ldr	r6, [r1, #36]
-	str	r9, [sp, #56]           @ 4-byte Spill
-	sbcs	r7, r6, r7
-	ldr	r6, [r1, #40]
-	str	r7, [sp, #64]           @ 4-byte Spill
+	str	r7, [sp, #60]                   @ 4-byte Spill
 	ldr	r7, [r2, #40]
-	sbcs	r8, r6, r7
+	str	r7, [sp, #64]                   @ 4-byte Spill
 	ldr	r7, [r2, #44]
-	str	r8, [sp, #52]           @ 4-byte Spill
-	sbcs	lr, r5, r7
+	str	r7, [sp, #68]                   @ 4-byte Spill
 	ldr	r7, [r2, #48]
-	ldr	r5, [r1, #48]
-	str	lr, [sp, #48]           @ 4-byte Spill
-	sbcs	r6, r5, r7
-	ldr	r5, [r1, #52]
-	sbcs	r7, r5, r4
-	ldr	r4, [r2, #56]
-	ldr	r5, [r1, #56]
-	str	r7, [sp, #44]           @ 4-byte Spill
-	sbcs	r12, r5, r4
-	ldr	r4, [r2, #60]
-	ldr	r5, [r1, #60]
-	ldr	r2, [r2, #64]
-	ldr	r1, [r1, #64]
-	str	r12, [sp, #40]          @ 4-byte Spill
-	sbcs	r4, r5, r4
-	ldr	r5, [sp, #64]           @ 4-byte Reload
-	sbcs	r1, r1, r2
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r2, [r0]
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	str	r2, [r0, #4]
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	str	r2, [r0, #8]
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	str	r2, [r0, #12]
-	ldr	r2, [sp, #76]           @ 4-byte Reload
-	str	r2, [r0, #16]
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	str	r2, [r0, #20]
-	add	r2, r0, #36
-	str	r11, [r0, #24]
-	str	r10, [r0, #28]
-	str	r1, [r0, #64]
-	str	r9, [r0, #32]
-	stm	r2, {r5, r8, lr}
-	add	r2, r0, #48
-	stm	r2, {r6, r7, r12}
-	mov	r2, #0
-	str	r4, [r0, #60]
-	sbc	r2, r2, #0
-	tst	r2, #1
-	beq	.LBB266_2
-@ BB#1:                                 @ %carry
-	ldr	r2, [r3, #64]
-	mov	r9, r4
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldm	r3, {r4, r12}
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r6, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r3, #8]
-	str	r1, [sp, #32]           @ 4-byte Spill
-	ldr	r1, [r3, #12]
-	ldr	lr, [r3, #20]
-	adds	r8, r4, r2
-	ldr	r2, [r3, #32]
-	str	r8, [r0]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r3, #36]
-	str	r2, [sp, #4]            @ 4-byte Spill
+	str	r7, [sp, #72]                   @ 4-byte Spill
+	ldr	r7, [r2, #52]
+	str	r7, [sp, #80]                   @ 4-byte Spill
+	ldr	r7, [r2, #56]
+	str	r7, [sp, #76]                   @ 4-byte Spill
+	ldr	r7, [r2, #60]
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [r1, #60]
+	str	r7, [sp, #28]                   @ 4-byte Spill
+	ldr	r7, [r1, #56]
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	ldr	r7, [r1, #52]
+	str	r7, [sp, #20]                   @ 4-byte Spill
+	ldr	r7, [r1, #48]
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r4, [r2, #24]
+	ldm	r2, {r5, r6, r7, r8, r9, r11}
+	ldr	r2, [r2, #28]
+	str	r2, [sp, #48]                   @ 4-byte Spill
+	ldm	r1, {r2, r12, lr}
+	subs	r2, r2, r5
+	str	r4, [sp, #12]                   @ 4-byte Spill
+	sbcs	r10, r12, r6
+	ldr	r4, [r1, #12]
+	sbcs	lr, lr, r7
+	str	r2, [sp, #44]                   @ 4-byte Spill
+	sbcs	r5, r4, r8
+	str	r5, [sp, #52]                   @ 4-byte Spill
+	ldr	r5, [r1, #16]
+	ldr	r4, [sp, #12]                   @ 4-byte Reload
+	sbcs	r5, r5, r9
+	str	r5, [sp, #40]                   @ 4-byte Spill
+	ldr	r5, [r1, #20]
+	ldr	r2, [r1, #44]
+	sbcs	r5, r5, r11
+	str	r5, [sp, #36]                   @ 4-byte Spill
+	ldr	r5, [r1, #24]
+	ldr	r6, [r1, #40]
+	sbcs	r5, r5, r4
+	str	r5, [sp]                        @ 4-byte Spill
+	ldr	r7, [r1, #36]
+	ldr	r5, [r1, #32]
+	ldr	r1, [r1, #28]
+	ldr	r4, [sp, #48]                   @ 4-byte Reload
+	ldr	r9, [r3]
+	sbcs	r1, r1, r4
+	str	r1, [sp, #48]                   @ 4-byte Spill
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	ldr	r4, [r3, #16]
+	sbcs	r1, r5, r1
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	ldr	r5, [r3, #12]
+	sbcs	r1, r7, r1
+	str	r1, [sp, #60]                   @ 4-byte Spill
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	ldr	r7, [r3, #4]
+	sbcs	r1, r6, r1
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r6, [r3, #8]
+	sbcs	r1, r2, r1
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	ldr	r2, [sp, #16]                   @ 4-byte Reload
+	str	r10, [sp, #8]                   @ 4-byte Spill
+	sbcs	r1, r2, r1
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	ldr	r2, [sp, #20]                   @ 4-byte Reload
+	ldr	r12, [r3, #20]
+	sbcs	r1, r2, r1
+	str	r1, [sp, #80]                   @ 4-byte Spill
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	ldr	r2, [sp, #24]                   @ 4-byte Reload
+	str	lr, [sp, #4]                    @ 4-byte Spill
+	sbcs	r1, r2, r1
+	str	r1, [sp, #76]                   @ 4-byte Spill
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
+	ldr	r2, [sp, #28]                   @ 4-byte Reload
+	ldr	r11, [r3, #28]
+	sbc	r8, r2, r1
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [r3, #36]
+	adds	r9, r2, r9
+	ldr	r2, [sp, #52]                   @ 4-byte Reload
+	adcs	r7, r10, r7
+	str	r1, [sp, #32]                   @ 4-byte Spill
+	adcs	r6, lr, r6
+	ldr	r1, [r3, #32]
+	adcs	r5, r2, r5
+	ldr	r2, [sp, #40]                   @ 4-byte Reload
+	str	r1, [sp, #28]                   @ 4-byte Spill
+	adcs	r10, r2, r4
+	ldr	r2, [sp, #36]                   @ 4-byte Reload
+	ldr	r1, [r3, #24]
+	adcs	lr, r2, r12
+	ldr	r12, [sp]                       @ 4-byte Reload
+	ldr	r2, [sp, #48]                   @ 4-byte Reload
+	adcs	r1, r12, r1
+	ldr	r4, [sp, #28]                   @ 4-byte Reload
+	adcs	r11, r2, r11
+	ldr	r2, [sp, #56]                   @ 4-byte Reload
+	adcs	r2, r2, r4
+	str	r2, [sp, #16]                   @ 4-byte Spill
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	ldr	r4, [sp, #32]                   @ 4-byte Reload
+	adcs	r2, r2, r4
+	str	r2, [sp, #20]                   @ 4-byte Spill
 	ldr	r2, [r3, #40]
-	str	r2, [sp, #8]            @ 4-byte Spill
+	ldr	r4, [sp, #64]                   @ 4-byte Reload
+	adcs	r2, r4, r2
+	str	r2, [sp, #32]                   @ 4-byte Spill
 	ldr	r2, [r3, #44]
-	str	r2, [sp, #12]           @ 4-byte Spill
+	ldr	r4, [sp, #68]                   @ 4-byte Reload
+	adcs	r2, r4, r2
+	str	r2, [sp, #28]                   @ 4-byte Spill
 	ldr	r2, [r3, #48]
-	str	r2, [sp, #16]           @ 4-byte Spill
+	ldr	r4, [sp, #72]                   @ 4-byte Reload
+	adcs	r2, r4, r2
+	str	r2, [sp, #24]                   @ 4-byte Spill
 	ldr	r2, [r3, #52]
-	str	r2, [sp, #20]           @ 4-byte Spill
+	ldr	r4, [sp, #80]                   @ 4-byte Reload
+	adcs	r2, r4, r2
+	str	r2, [sp, #12]                   @ 4-byte Spill
 	ldr	r2, [r3, #56]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [r3, #60]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldr	r2, [sp, #88]           @ 4-byte Reload
-	adcs	r6, r12, r2
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	adcs	r7, r7, r2
-	ldr	r2, [sp, #80]           @ 4-byte Reload
-	adcs	r4, r1, r2
-	ldr	r2, [r3, #28]
-	ldr	r1, [r3, #24]
-	ldr	r3, [r3, #16]
-	stmib	r0, {r6, r7}
-	ldr	r7, [sp, #76]           @ 4-byte Reload
-	str	r4, [r0, #12]
-	ldr	r6, [sp, #16]           @ 4-byte Reload
-	ldr	r4, [sp, #24]           @ 4-byte Reload
-	adcs	r3, r3, r7
-	ldr	r7, [sp, #72]           @ 4-byte Reload
-	str	r3, [r0, #16]
-	ldr	r3, [sp, #60]           @ 4-byte Reload
-	adcs	r7, lr, r7
-	adcs	r1, r1, r11
-	str	r7, [r0, #20]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
-	adcs	r3, r2, r3
+	ldr	r4, [sp, #76]                   @ 4-byte Reload
+	ldr	r3, [r3, #60]
+	adcs	r2, r4, r2
+	ldr	r4, [sp, #44]                   @ 4-byte Reload
+	adc	r3, r8, r3
+	cmp	r8, #0
+	movpl	r1, r12
+	add	r12, r0, #12
+	movpl	r9, r4
+	ldr	r4, [sp, #36]                   @ 4-byte Reload
+	str	r9, [r0]
+	movpl	lr, r4
+	ldr	r4, [sp, #40]                   @ 4-byte Reload
+	movpl	r10, r4
+	ldr	r4, [sp, #52]                   @ 4-byte Reload
+	movpl	r5, r4
+	ldr	r4, [sp, #4]                    @ 4-byte Reload
+	cmp	r8, #0
+	movpl	r6, r4
+	ldr	r4, [sp, #8]                    @ 4-byte Reload
+	str	r6, [r0, #8]
+	ldr	r6, [sp, #28]                   @ 4-byte Reload
+	movpl	r7, r4
+	cmp	r8, #0
+	str	r7, [r0, #4]
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	stm	r12, {r5, r10, lr}
+	ldr	r12, [sp, #16]                  @ 4-byte Reload
+	movpl	r12, r7
+	ldr	r7, [sp, #60]                   @ 4-byte Reload
+	ldr	lr, [sp, #20]                   @ 4-byte Reload
+	ldr	r4, [sp, #48]                   @ 4-byte Reload
+	movpl	lr, r7
+	ldr	r7, [sp, #64]                   @ 4-byte Reload
+	ldr	r5, [sp, #24]                   @ 4-byte Reload
+	movpl	r11, r4
+	cmp	r8, #0
+	ldr	r4, [sp, #32]                   @ 4-byte Reload
+	movpl	r4, r7
+	ldr	r7, [sp, #68]                   @ 4-byte Reload
 	str	r1, [r0, #24]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	ldr	r2, [sp]                @ 4-byte Reload
-	str	r3, [r0, #28]
-	ldr	r3, [sp, #8]            @ 4-byte Reload
-	adcs	r12, r2, r1
-	ldr	r1, [sp, #4]            @ 4-byte Reload
+	ldr	r1, [sp, #12]                   @ 4-byte Reload
+	movpl	r6, r7
+	ldr	r7, [sp, #72]                   @ 4-byte Reload
+	str	r11, [r0, #28]
 	str	r12, [r0, #32]
-	add	r12, r0, #36
-	adcs	r2, r1, r5
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	ldr	r5, [sp, #20]           @ 4-byte Reload
-	adcs	r3, r3, r1
-	ldr	r1, [sp, #48]           @ 4-byte Reload
-	adcs	r7, r7, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	adcs	r6, r6, r1
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	adcs	r5, r5, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	adcs	r4, r4, r1
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	stm	r12, {r2, r3, r7}
-	str	r6, [r0, #48]
-	str	r5, [r0, #52]
-	str	r4, [r0, #56]
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	adcs	r1, r1, r9
-	str	r1, [r0, #60]
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	adc	r1, r1, r2
-	str	r1, [r0, #64]
-.LBB266_2:                              @ %nocarry
-	add	sp, sp, #92
+	movpl	r5, r7
+	ldr	r7, [sp, #80]                   @ 4-byte Reload
+	cmp	r8, #0
+	str	lr, [r0, #36]
+	str	r4, [r0, #40]
+	movpl	r3, r8
+	movpl	r1, r7
+	ldr	r7, [sp, #76]                   @ 4-byte Reload
+	str	r6, [r0, #44]
+	str	r5, [r0, #48]
+	add	r0, r0, #52
+	movpl	r2, r7
+	stm	r0, {r1, r2, r3}
+	add	sp, sp, #84
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end266:
-	.size	mcl_fp_sub17L, .Lfunc_end266-mcl_fp_sub17L
+.Lfunc_end87:
+	.size	mcl_fp_subNF16L, .Lfunc_end87-mcl_fp_subNF16L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fp_subNF17L
-	.align	2
-	.type	mcl_fp_subNF17L,%function
-mcl_fp_subNF17L:                        @ @mcl_fp_subNF17L
+                                        @ -- End function
+	.globl	mcl_fpDbl_add16L                @ -- Begin function mcl_fpDbl_add16L
+	.p2align	2
+	.type	mcl_fpDbl_add16L,%function
+	.code	32                              @ @mcl_fpDbl_add16L
+mcl_fpDbl_add16L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#104
-	sub	sp, sp, #104
-	mov	r12, r0
-	ldr	r0, [r2, #64]
-	ldr	r11, [r2]
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r1, #64]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r2, #32]
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [r2, #36]
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [r2, #40]
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [r2, #44]
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [r2, #48]
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [r2, #52]
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [r2, #56]
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [r2, #60]
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [r1, #60]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r1, #56]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r1, #52]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r1, #48]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldmib	r2, {r5, r6, r7, r8, r9, r10}
-	ldr	r0, [r2, #28]
-	ldr	r2, [r1]
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldmib	r1, {r0, lr}
-	ldr	r4, [r1, #12]
-	subs	r2, r2, r11
-	add	r11, r3, #8
-	str	r2, [sp, #12]           @ 4-byte Spill
-	ldr	r2, [r1, #44]
-	sbcs	r0, r0, r5
-	ldr	r5, [r1, #40]
-	str	r0, [sp, #8]            @ 4-byte Spill
-	sbcs	r0, lr, r6
-	ldr	r6, [r1, #36]
-	str	r0, [sp, #48]           @ 4-byte Spill
-	sbcs	r0, r4, r7
+	.pad	#140
+	sub	sp, sp, #140
+	ldr	r9, [r2]
+	ldm	r1, {r4, r5, r6, r7}
+	ldmib	r2, {r8, lr}
+	adds	r10, r4, r9
+	adcs	r11, r5, r8
+	ldr	r12, [r2, #12]
+	adcs	r6, r6, lr
+	str	r6, [sp, #104]                  @ 4-byte Spill
+	adcs	r7, r7, r12
+	str	r7, [sp, #92]                   @ 4-byte Spill
+	ldr	r6, [r2, #16]
 	ldr	r7, [r1, #16]
-	str	r0, [sp, #52]           @ 4-byte Spill
-	sbcs	r0, r7, r8
+	ldr	r4, [r2, #20]
+	adcs	r7, r7, r6
+	str	r7, [sp, #88]                   @ 4-byte Spill
 	ldr	r7, [r1, #20]
-	str	r0, [sp, #56]           @ 4-byte Spill
-	sbcs	r0, r7, r9
-	ldr	r7, [r1, #24]
-	str	r0, [sp, #60]           @ 4-byte Spill
-	sbcs	r0, r7, r10
-	ldr	r7, [r1, #32]
-	ldr	r1, [r1, #28]
-	str	r0, [sp, #68]           @ 4-byte Spill
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #64]           @ 4-byte Spill
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	sbcs	r0, r7, r0
-	str	r0, [sp, #72]           @ 4-byte Spill
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	sbcs	r0, r6, r0
-	str	r0, [sp, #76]           @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	sbcs	r0, r5, r0
-	str	r0, [sp, #80]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	sbcs	r0, r2, r0
-	str	r0, [sp, #84]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #88]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #92]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #96]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	sbcs	r0, r1, r0
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #100]          @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	sbc	r0, r1, r0
-	str	r0, [sp, #44]           @ 4-byte Spill
-	ldr	r0, [r3, #64]
-	str	r0, [sp, #40]           @ 4-byte Spill
-	ldr	r0, [r3, #36]
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [r3, #40]
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [r3, #44]
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [r3, #48]
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [r3, #52]
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [r3, #56]
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [r3, #60]
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [r3, #32]
-	str	r0, [sp]                @ 4-byte Spill
-	ldm	r3, {r2, r7}
-	ldm	r11, {r1, r4, r5, r6, r11}
-	ldr	r8, [sp, #12]           @ 4-byte Reload
-	ldr	r10, [sp, #8]           @ 4-byte Reload
-	ldr	r0, [r3, #28]
-	adds	r2, r8, r2
-	adcs	r3, r10, r7
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	adcs	lr, r7, r1
-	ldr	r1, [sp, #52]           @ 4-byte Reload
-	adcs	r4, r1, r4
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r5, r1, r5
-	ldr	r1, [sp, #60]           @ 4-byte Reload
-	adcs	r6, r1, r6
-	ldr	r1, [sp, #68]           @ 4-byte Reload
-	adcs	r7, r1, r11
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r9, r1, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	ldr	r1, [sp]                @ 4-byte Reload
-	adcs	r11, r0, r1
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	ldr	r1, [sp, #4]            @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	str	r0, [sp, #4]            @ 4-byte Spill
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #20]           @ 4-byte Reload
-	str	r0, [sp, #16]           @ 4-byte Spill
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #24]           @ 4-byte Reload
-	str	r0, [sp, #20]           @ 4-byte Spill
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #28]           @ 4-byte Reload
-	str	r0, [sp, #24]           @ 4-byte Spill
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #32]           @ 4-byte Reload
-	str	r0, [sp, #28]           @ 4-byte Spill
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r0, [sp, #32]           @ 4-byte Spill
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	adcs	r0, r0, r1
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	str	r0, [sp, #36]           @ 4-byte Spill
-	ldr	r0, [sp, #44]           @ 4-byte Reload
-	adc	r1, r0, r1
-	str	r1, [sp, #40]           @ 4-byte Spill
-	asr	r1, r0, #31
-	ldr	r0, [sp, #48]           @ 4-byte Reload
-	cmp	r1, #0
-	movge	r2, r8
-	movge	r3, r10
-	str	r2, [r12]
-	ldr	r2, [sp, #4]            @ 4-byte Reload
-	str	r3, [r12, #4]
-	movge	lr, r0
-	ldr	r0, [sp, #52]           @ 4-byte Reload
-	cmp	r1, #0
-	str	lr, [r12, #8]
-	movge	r4, r0
-	ldr	r0, [sp, #56]           @ 4-byte Reload
-	str	r4, [r12, #12]
-	movge	r5, r0
-	ldr	r0, [sp, #60]           @ 4-byte Reload
-	str	r5, [r12, #16]
-	movge	r6, r0
-	ldr	r0, [sp, #68]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r6, [r12, #20]
-	movge	r7, r0
-	ldr	r0, [sp, #64]           @ 4-byte Reload
-	str	r7, [r12, #24]
-	movge	r9, r0
-	ldr	r0, [sp, #72]           @ 4-byte Reload
-	str	r9, [r12, #28]
-	movge	r11, r0
-	ldr	r0, [sp, #76]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r11, [r12, #32]
-	movge	r2, r0
-	ldr	r0, [sp, #80]           @ 4-byte Reload
-	str	r2, [r12, #36]
-	ldr	r2, [sp, #16]           @ 4-byte Reload
-	movge	r2, r0
-	ldr	r0, [sp, #84]           @ 4-byte Reload
-	str	r2, [r12, #40]
-	ldr	r2, [sp, #20]           @ 4-byte Reload
-	movge	r2, r0
-	ldr	r0, [sp, #88]           @ 4-byte Reload
-	cmp	r1, #0
-	str	r2, [r12, #44]
-	ldr	r2, [sp, #24]           @ 4-byte Reload
-	movge	r2, r0
-	ldr	r0, [sp, #92]           @ 4-byte Reload
-	str	r2, [r12, #48]
-	ldr	r2, [sp, #28]           @ 4-byte Reload
-	movge	r2, r0
-	ldr	r0, [sp, #96]           @ 4-byte Reload
-	str	r2, [r12, #52]
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	movge	r2, r0
-	ldr	r0, [sp, #100]          @ 4-byte Reload
-	cmp	r1, #0
-	ldr	r1, [sp, #36]           @ 4-byte Reload
-	str	r2, [r12, #56]
-	movge	r1, r0
-	ldr	r0, [sp, #40]           @ 4-byte Reload
-	str	r1, [r12, #60]
-	ldr	r1, [sp, #44]           @ 4-byte Reload
-	movge	r0, r1
-	str	r0, [r12, #64]
-	add	sp, sp, #104
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	mov	pc, lr
-.Lfunc_end267:
-	.size	mcl_fp_subNF17L, .Lfunc_end267-mcl_fp_subNF17L
-	.cantunwind
-	.fnend
-
-	.globl	mcl_fpDbl_add17L
-	.align	2
-	.type	mcl_fpDbl_add17L,%function
-mcl_fpDbl_add17L:                       @ @mcl_fpDbl_add17L
-	.fnstart
-@ BB#0:
-	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#216
-	sub	sp, sp, #216
-	ldm	r1, {r7, r8, lr}
-	ldr	r12, [r1, #12]
-	ldm	r2, {r4, r5, r6, r9}
-	add	r10, r1, #32
-	adds	r4, r4, r7
-	str	r4, [sp, #104]          @ 4-byte Spill
-	ldr	r4, [r2, #128]
-	str	r4, [sp, #208]          @ 4-byte Spill
-	ldr	r4, [r2, #132]
-	str	r4, [sp, #212]          @ 4-byte Spill
-	adcs	r4, r5, r8
-	adcs	r7, r6, lr
-	str	r4, [sp, #100]          @ 4-byte Spill
-	add	lr, r1, #16
-	str	r7, [sp, #96]           @ 4-byte Spill
-	ldr	r7, [r2, #96]
-	str	r7, [sp, #176]          @ 4-byte Spill
-	ldr	r7, [r2, #100]
-	str	r7, [sp, #180]          @ 4-byte Spill
-	ldr	r7, [r2, #104]
-	str	r7, [sp, #184]          @ 4-byte Spill
-	ldr	r7, [r2, #108]
-	str	r7, [sp, #188]          @ 4-byte Spill
-	ldr	r7, [r2, #112]
-	str	r7, [sp, #192]          @ 4-byte Spill
-	ldr	r7, [r2, #116]
-	str	r7, [sp, #196]          @ 4-byte Spill
-	ldr	r7, [r2, #120]
-	str	r7, [sp, #200]          @ 4-byte Spill
-	ldr	r7, [r2, #124]
-	str	r7, [sp, #204]          @ 4-byte Spill
-	adcs	r7, r9, r12
-	str	r7, [sp, #68]           @ 4-byte Spill
-	ldr	r7, [r2, #64]
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [r2, #68]
-	str	r7, [sp, #148]          @ 4-byte Spill
-	ldr	r7, [r2, #72]
-	str	r7, [sp, #152]          @ 4-byte Spill
-	ldr	r7, [r2, #76]
-	str	r7, [sp, #156]          @ 4-byte Spill
-	ldr	r7, [r2, #80]
-	str	r7, [sp, #160]          @ 4-byte Spill
-	ldr	r7, [r2, #84]
-	str	r7, [sp, #168]          @ 4-byte Spill
-	ldr	r7, [r2, #88]
-	str	r7, [sp, #164]          @ 4-byte Spill
-	ldr	r7, [r2, #92]
-	str	r7, [sp, #172]          @ 4-byte Spill
+	ldr	r6, [r1, #24]
+	adcs	r7, r7, r4
+	str	r7, [sp, #84]                   @ 4-byte Spill
+	ldr	r7, [r2, #24]
+	ldr	r4, [r3, #8]
+	adcs	r7, r6, r7
+	str	r7, [sp, #80]                   @ 4-byte Spill
+	ldr	r7, [r2, #28]
+	ldr	r6, [r1, #28]
+	ldr	r12, [r3, #12]
+	adcs	r7, r6, r7
+	str	r7, [sp, #68]                   @ 4-byte Spill
 	ldr	r7, [r2, #32]
-	str	r7, [sp, #56]           @ 4-byte Spill
+	ldr	r6, [r1, #32]
+	stm	r0, {r10, r11}
+	add	r10, r3, #40
+	adcs	r7, r6, r7
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	add	r11, r3, #28
 	ldr	r7, [r2, #36]
-	str	r7, [sp, #60]           @ 4-byte Spill
+	ldr	r6, [r1, #36]
+	adcs	r7, r6, r7
+	str	r7, [sp, #52]                   @ 4-byte Spill
 	ldr	r7, [r2, #40]
-	str	r7, [sp, #64]           @ 4-byte Spill
+	ldr	r6, [r1, #40]
+	adcs	r7, r6, r7
+	str	r7, [sp, #48]                   @ 4-byte Spill
 	ldr	r7, [r2, #44]
-	str	r7, [sp, #72]           @ 4-byte Spill
+	ldr	r6, [r1, #44]
+	adcs	r7, r6, r7
+	str	r7, [sp, #36]                   @ 4-byte Spill
 	ldr	r7, [r2, #48]
-	str	r7, [sp, #76]           @ 4-byte Spill
+	ldr	r6, [r1, #48]
+	adcs	r7, r6, r7
+	str	r7, [sp, #32]                   @ 4-byte Spill
 	ldr	r7, [r2, #52]
-	str	r7, [sp, #80]           @ 4-byte Spill
+	ldr	r6, [r1, #52]
+	adcs	r7, r6, r7
+	str	r7, [sp, #28]                   @ 4-byte Spill
 	ldr	r7, [r2, #56]
-	str	r7, [sp, #88]           @ 4-byte Spill
+	ldr	r6, [r1, #56]
+	adcs	r7, r6, r7
+	str	r7, [sp, #24]                   @ 4-byte Spill
 	ldr	r7, [r2, #60]
-	str	r7, [sp, #92]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #16]           @ 4-byte Spill
-	ldr	r7, [r2, #20]
-	ldr	r2, [r2, #16]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #128]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #136]          @ 4-byte Spill
-	ldr	r2, [r1, #132]
-	str	r2, [sp, #140]          @ 4-byte Spill
-	ldr	r2, [r1, #96]
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r1, #104]
-	str	r2, [sp, #112]          @ 4-byte Spill
-	ldr	r2, [r1, #108]
-	str	r2, [sp, #116]          @ 4-byte Spill
-	ldr	r2, [r1, #112]
-	str	r2, [sp, #120]          @ 4-byte Spill
-	ldr	r2, [r1, #116]
-	str	r2, [sp, #124]          @ 4-byte Spill
-	ldr	r2, [r1, #120]
-	str	r2, [sp, #128]          @ 4-byte Spill
-	ldr	r2, [r1, #124]
-	str	r2, [sp, #132]          @ 4-byte Spill
-	ldr	r2, [r1, #100]
-	str	r2, [sp, #84]           @ 4-byte Spill
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #24]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #28]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #32]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #36]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #40]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r8, r9, r10}
-	ldr	r2, [r1, #56]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #104]         @ 4-byte Reload
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #96]           @ 4-byte Reload
-	str	r7, [r0, #8]
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	adcs	r1, r7, r1
-	ldr	r7, [sp, #68]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
+	ldr	r6, [r1, #60]
+	adcs	r7, r6, r7
+	str	r7, [sp, #20]                   @ 4-byte Spill
+	ldr	r7, [r2, #64]
+	ldr	r6, [r1, #64]
+	adcs	lr, r6, r7
+	ldr	r7, [r2, #68]
+	ldr	r6, [r1, #68]
+	str	lr, [sp, #100]                  @ 4-byte Spill
+	adcs	r8, r6, r7
+	ldr	r7, [r2, #72]
+	ldr	r6, [r1, #72]
+	str	r8, [sp, #76]                   @ 4-byte Spill
+	adcs	r9, r6, r7
+	ldr	r7, [r2, #76]
+	ldr	r6, [r1, #76]
+	str	r9, [sp, #60]                   @ 4-byte Spill
+	adcs	r5, r6, r7
+	ldr	r7, [r2, #80]
+	ldr	r6, [r1, #80]
+	str	r5, [sp, #44]                   @ 4-byte Spill
+	adcs	r7, r6, r7
+	str	r7, [sp, #136]                  @ 4-byte Spill
+	ldr	r7, [r2, #84]
+	ldr	r6, [r1, #84]
+	adcs	r7, r6, r7
+	str	r7, [sp, #132]                  @ 4-byte Spill
+	ldr	r7, [r2, #88]
+	ldr	r6, [r1, #88]
+	adcs	r7, r6, r7
+	str	r7, [sp, #128]                  @ 4-byte Spill
+	ldr	r7, [r2, #92]
+	ldr	r6, [r1, #92]
+	adcs	r7, r6, r7
+	str	r7, [sp, #124]                  @ 4-byte Spill
+	ldr	r7, [r2, #96]
+	ldr	r6, [r1, #96]
+	adcs	r7, r6, r7
+	str	r7, [sp, #120]                  @ 4-byte Spill
+	ldr	r7, [r2, #100]
+	ldr	r6, [r1, #100]
+	adcs	r7, r6, r7
+	str	r7, [sp, #116]                  @ 4-byte Spill
+	ldr	r7, [r2, #104]
+	ldr	r6, [r1, #104]
+	adcs	r7, r6, r7
+	str	r7, [sp, #112]                  @ 4-byte Spill
+	ldr	r7, [r2, #108]
+	ldr	r6, [r1, #108]
+	adcs	r7, r6, r7
+	str	r7, [sp, #108]                  @ 4-byte Spill
+	ldr	r7, [r2, #112]
+	ldr	r6, [r1, #112]
+	adcs	r7, r6, r7
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r7, [r2, #116]
+	ldr	r6, [r1, #116]
+	adcs	r7, r6, r7
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	ldr	r7, [r2, #120]
+	ldr	r6, [r1, #120]
+	ldr	r2, [r2, #124]
+	ldr	r1, [r1, #124]
+	adcs	r7, r6, r7
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	adcs	r1, r1, r2
+	str	r1, [sp, #4]                    @ 4-byte Spill
+	mov	r1, #0
+	ldm	r3, {r2, r6}
+	adc	r1, r1, #0
+	str	r1, [sp]                        @ 4-byte Spill
+	subs	r1, lr, r2
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	sbcs	r1, r8, r6
+	str	r1, [sp, #72]                   @ 4-byte Spill
+	sbcs	r1, r9, r4
+	str	r1, [sp, #56]                   @ 4-byte Spill
+	sbcs	r1, r5, r12
+	str	r1, [sp, #40]                   @ 4-byte Spill
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #92]                   @ 4-byte Reload
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
 	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	adcs	r2, r7, r2
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	adcs	r1, r1, r12
-	ldr	r2, [sp, #20]           @ 4-byte Reload
+	ldr	r1, [sp, #84]                   @ 4-byte Reload
+	str	r1, [r0, #20]
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
 	str	r1, [r0, #24]
-	ldr	r1, [sp, #56]           @ 4-byte Reload
-	adcs	r2, r2, lr
-	str	r2, [r0, #28]
-	adcs	r1, r1, r4
-	ldr	r2, [sp, #60]           @ 4-byte Reload
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	ldr	r12, [r3, #24]
+	ldr	lr, [r3, #20]
+	ldr	r3, [r3, #16]
+	ldr	r4, [sp, #136]                  @ 4-byte Reload
+	str	r1, [r0, #28]
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
+	sbcs	r3, r4, r3
 	str	r1, [r0, #32]
-	ldr	r1, [sp, #64]           @ 4-byte Reload
-	adcs	r2, r2, r5
-	str	r2, [r0, #36]
-	adcs	r1, r1, r6
-	ldr	r2, [sp, #72]           @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	str	r1, [r0, #36]
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	str	r3, [sp, #104]                  @ 4-byte Spill
+	ldr	r3, [sp, #132]                  @ 4-byte Reload
 	str	r1, [r0, #40]
-	ldr	r1, [sp, #76]           @ 4-byte Reload
-	adcs	r2, r2, r8
-	str	r2, [r0, #44]
-	adcs	r1, r1, r9
-	ldr	r2, [sp, #80]           @ 4-byte Reload
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	sbcs	r3, r3, lr
+	str	r1, [r0, #44]
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	str	r1, [r0, #48]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	adcs	r2, r2, r10
-	adcs	r1, r1, r7
-	str	r2, [r0, #52]
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	ldr	r7, [sp, #4]            @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	str	r3, [sp, #92]                   @ 4-byte Spill
+	ldr	r3, [sp, #128]                  @ 4-byte Reload
+	str	r1, [r0, #52]
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	sbcs	r3, r3, r12
 	str	r1, [r0, #56]
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	adcs	r2, r2, r7
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	str	r2, [r0, #60]
-	ldr	r2, [sp, #148]          @ 4-byte Reload
-	adcs	r1, r1, r7
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r1, [r0, #64]
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	adcs	r12, r2, r7
-	ldr	r2, [sp, #32]           @ 4-byte Reload
-	str	r12, [sp, #96]          @ 4-byte Spill
-	adcs	r9, r1, r2
-	ldr	r1, [sp, #156]          @ 4-byte Reload
-	ldr	r2, [sp, #36]           @ 4-byte Reload
-	str	r9, [sp, #100]          @ 4-byte Spill
-	adcs	r8, r1, r2
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	ldr	r2, [sp, #40]           @ 4-byte Reload
-	str	r8, [sp, #104]          @ 4-byte Spill
-	adcs	r4, r1, r2
-	ldr	r1, [sp, #168]          @ 4-byte Reload
-	ldr	r2, [sp, #44]           @ 4-byte Reload
-	str	r4, [sp, #144]          @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #48]           @ 4-byte Reload
-	str	r1, [sp, #168]          @ 4-byte Spill
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	adcs	lr, r1, r2
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	str	lr, [sp, #92]           @ 4-byte Spill
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #108]          @ 4-byte Reload
-	str	r1, [sp, #172]          @ 4-byte Spill
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #84]           @ 4-byte Reload
-	str	r1, [sp, #176]          @ 4-byte Spill
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #112]          @ 4-byte Reload
-	str	r1, [sp, #180]          @ 4-byte Spill
-	ldr	r1, [sp, #184]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #116]          @ 4-byte Reload
-	str	r1, [sp, #184]          @ 4-byte Spill
-	ldr	r1, [sp, #188]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	str	r1, [sp, #188]          @ 4-byte Spill
-	ldr	r1, [sp, #192]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #124]          @ 4-byte Reload
-	str	r1, [sp, #192]          @ 4-byte Spill
-	ldr	r1, [sp, #196]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #128]          @ 4-byte Reload
-	str	r1, [sp, #196]          @ 4-byte Spill
-	ldr	r1, [sp, #200]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #132]          @ 4-byte Reload
-	str	r1, [sp, #200]          @ 4-byte Spill
-	ldr	r1, [sp, #204]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #136]          @ 4-byte Reload
-	str	r1, [sp, #204]          @ 4-byte Spill
-	ldr	r1, [sp, #208]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	str	r1, [sp, #208]          @ 4-byte Spill
-	ldr	r1, [sp, #212]          @ 4-byte Reload
-	adcs	r1, r1, r2
-	str	r1, [sp, #212]          @ 4-byte Spill
-	mov	r1, #0
-	adc	r1, r1, #0
-	str	r1, [sp, #140]          @ 4-byte Spill
-	ldm	r3, {r2, r7}
-	ldr	r1, [r3, #64]
-	ldr	r6, [r3, #8]
-	ldr	r5, [r3, #12]
-	ldr	r10, [r3, #36]
-	ldr	r11, [r3, #40]
-	str	r1, [sp, #164]          @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	subs	r12, r12, r2
-	sbcs	r7, r9, r7
-	sbcs	r6, r8, r6
-	add	r8, r3, #20
-	sbcs	r9, r4, r5
-	str	r1, [sp, #136]          @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #148]          @ 4-byte Spill
-	ldr	r1, [r3, #52]
-	str	r1, [sp, #152]          @ 4-byte Spill
-	ldr	r1, [r3, #56]
-	str	r1, [sp, #156]          @ 4-byte Spill
-	ldr	r1, [r3, #60]
-	str	r1, [sp, #160]          @ 4-byte Spill
-	ldm	r8, {r1, r4, r5, r8}
-	ldr	r3, [r3, #16]
-	ldr	r2, [sp, #168]          @ 4-byte Reload
-	sbcs	r2, r2, r3
-	sbcs	r3, lr, r1
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	sbcs	lr, r1, r4
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	sbcs	r4, r1, r5
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	ldr	r5, [sp, #136]          @ 4-byte Reload
-	sbcs	r8, r1, r8
-	ldr	r1, [sp, #184]          @ 4-byte Reload
-	sbcs	r10, r1, r10
-	ldr	r1, [sp, #188]          @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	str	r1, [r0, #60]
+	str	r3, [sp, #88]                   @ 4-byte Spill
+	ldm	r11, {r1, r2, r11}
+	ldr	r3, [sp, #124]                  @ 4-byte Reload
+	ldm	r10, {r5, r6, r7, r8, r9, r10}
+	sbcs	r1, r3, r1
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	ldr	r12, [sp, #16]                  @ 4-byte Reload
+	sbcs	lr, r1, r2
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r3, [sp, #12]                   @ 4-byte Reload
 	sbcs	r11, r1, r11
-	ldr	r1, [sp, #192]          @ 4-byte Reload
-	sbcs	r1, r1, r5
-	ldr	r5, [sp, #148]          @ 4-byte Reload
-	str	r1, [sp, #136]          @ 4-byte Spill
-	ldr	r1, [sp, #196]          @ 4-byte Reload
-	sbcs	r1, r1, r5
-	ldr	r5, [sp, #152]          @ 4-byte Reload
-	str	r1, [sp, #148]          @ 4-byte Spill
-	ldr	r1, [sp, #200]          @ 4-byte Reload
-	sbcs	r1, r1, r5
-	ldr	r5, [sp, #156]          @ 4-byte Reload
-	str	r1, [sp, #152]          @ 4-byte Spill
-	ldr	r1, [sp, #204]          @ 4-byte Reload
-	sbcs	r1, r1, r5
-	ldr	r5, [sp, #160]          @ 4-byte Reload
-	str	r1, [sp, #156]          @ 4-byte Spill
-	ldr	r1, [sp, #208]          @ 4-byte Reload
-	sbcs	r1, r1, r5
-	ldr	r5, [sp, #164]          @ 4-byte Reload
-	str	r1, [sp, #160]          @ 4-byte Spill
-	ldr	r1, [sp, #212]          @ 4-byte Reload
-	sbcs	r1, r1, r5
-	ldr	r5, [sp, #96]           @ 4-byte Reload
-	str	r1, [sp, #164]          @ 4-byte Spill
-	ldr	r1, [sp, #140]          @ 4-byte Reload
-	sbc	r1, r1, #0
-	ands	r1, r1, #1
-	movne	r12, r5
-	ldr	r5, [sp, #100]          @ 4-byte Reload
-	str	r12, [r0, #68]
-	movne	r7, r5
-	str	r7, [r0, #72]
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	movne	r6, r7
-	ldr	r7, [sp, #144]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r6, [r0, #76]
-	movne	r9, r7
-	ldr	r7, [sp, #168]          @ 4-byte Reload
-	str	r9, [r0, #80]
-	movne	r2, r7
-	str	r2, [r0, #84]
-	ldr	r2, [sp, #92]           @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #172]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #88]
-	ldr	r3, [sp, #136]          @ 4-byte Reload
-	movne	lr, r2
-	ldr	r2, [sp, #176]          @ 4-byte Reload
-	str	lr, [r0, #92]
-	movne	r4, r2
-	ldr	r2, [sp, #180]          @ 4-byte Reload
-	str	r4, [r0, #96]
-	movne	r8, r2
-	ldr	r2, [sp, #184]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r8, [r0, #100]
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r2, [sp, #4]                    @ 4-byte Reload
+	sbcs	r5, r1, r5
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r4, [sp]                        @ 4-byte Reload
+	sbcs	r6, r1, r6
+	sbcs	r7, r12, r7
+	sbcs	r1, r3, r8
+	ldr	r8, [sp, #8]                    @ 4-byte Reload
+	sbcs	r9, r8, r9
+	sbcs	r10, r2, r10
+	sbc	r4, r4, #0
+	ands	r4, r4, #1
+	movne	r1, r3
 	movne	r10, r2
-	ldr	r2, [sp, #188]          @ 4-byte Reload
-	str	r10, [r0, #104]
-	movne	r11, r2
-	ldr	r2, [sp, #192]          @ 4-byte Reload
-	str	r11, [r0, #108]
-	movne	r3, r2
-	ldr	r2, [sp, #196]          @ 4-byte Reload
-	cmp	r1, #0
-	str	r3, [r0, #112]
-	ldr	r3, [sp, #148]          @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #200]          @ 4-byte Reload
-	str	r3, [r0, #116]
-	ldr	r3, [sp, #152]          @ 4-byte Reload
-	movne	r3, r2
-	ldr	r2, [sp, #204]          @ 4-byte Reload
-	str	r3, [r0, #120]
-	ldr	r3, [sp, #156]          @ 4-byte Reload
-	movne	r3, r2
-	cmp	r1, #0
-	ldr	r1, [sp, #208]          @ 4-byte Reload
-	ldr	r2, [sp, #160]          @ 4-byte Reload
-	str	r3, [r0, #124]
-	ldr	r3, [sp, #164]          @ 4-byte Reload
+	str	r1, [r0, #116]
+	movne	r9, r8
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	cmp	r4, #0
+	movne	r7, r12
+	ldr	r2, [sp, #84]                   @ 4-byte Reload
+	str	r10, [r0, #124]
+	movne	r6, r1
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	str	r9, [r0, #120]
+	str	r7, [r0, #112]
+	movne	r5, r1
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	cmp	r4, #0
+	str	r6, [r0, #108]
+	str	r5, [r0, #104]
+	movne	r11, r1
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	str	r11, [r0, #100]
+	movne	lr, r1
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	str	lr, [r0, #96]
 	movne	r2, r1
-	ldr	r1, [sp, #212]          @ 4-byte Reload
-	str	r2, [r0, #128]
-	movne	r3, r1
-	str	r3, [r0, #132]
-	add	sp, sp, #216
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	str	r2, [r0, #92]
+	cmp	r4, #0
+	ldr	r2, [sp, #88]                   @ 4-byte Reload
+	movne	r2, r1
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	str	r2, [r0, #88]
+	ldr	r2, [sp, #92]                   @ 4-byte Reload
+	movne	r2, r1
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	str	r2, [r0, #84]
+	ldr	r2, [sp, #104]                  @ 4-byte Reload
+	movne	r2, r1
+	cmp	r4, #0
+	str	r2, [r0, #80]
+	ldr	r2, [sp, #44]                   @ 4-byte Reload
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	movne	r1, r2
+	ldr	r2, [sp, #60]                   @ 4-byte Reload
+	str	r1, [r0, #76]
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
+	movne	r1, r2
+	ldr	r2, [sp, #76]                   @ 4-byte Reload
+	str	r1, [r0, #72]
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	movne	r1, r2
+	ldr	r2, [sp, #100]                  @ 4-byte Reload
+	str	r1, [r0, #68]
+	cmp	r4, #0
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	movne	r1, r2
+	str	r1, [r0, #64]
+	add	sp, sp, #140
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end268:
-	.size	mcl_fpDbl_add17L, .Lfunc_end268-mcl_fpDbl_add17L
+.Lfunc_end88:
+	.size	mcl_fpDbl_add16L, .Lfunc_end88-mcl_fpDbl_add16L
 	.cantunwind
 	.fnend
-
-	.globl	mcl_fpDbl_sub17L
-	.align	2
-	.type	mcl_fpDbl_sub17L,%function
-mcl_fpDbl_sub17L:                       @ @mcl_fpDbl_sub17L
+                                        @ -- End function
+	.globl	mcl_fpDbl_sub16L                @ -- Begin function mcl_fpDbl_sub16L
+	.p2align	2
+	.type	mcl_fpDbl_sub16L,%function
+	.code	32                              @ @mcl_fpDbl_sub16L
+mcl_fpDbl_sub16L:
 	.fnstart
-@ BB#0:
+@ %bb.0:
 	.save	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	push	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	.pad	#216
-	sub	sp, sp, #216
-	ldr	r7, [r2, #128]
-	add	r10, r1, #32
-	str	r7, [sp, #208]          @ 4-byte Spill
-	ldr	r7, [r2, #132]
-	str	r7, [sp, #212]          @ 4-byte Spill
-	ldr	r7, [r2, #96]
-	str	r7, [sp, #188]          @ 4-byte Spill
-	ldr	r7, [r2, #104]
-	str	r7, [sp, #164]          @ 4-byte Spill
-	ldr	r7, [r2, #108]
-	str	r7, [sp, #168]          @ 4-byte Spill
-	ldr	r7, [r2, #112]
-	str	r7, [sp, #192]          @ 4-byte Spill
-	ldr	r7, [r2, #116]
-	str	r7, [sp, #196]          @ 4-byte Spill
-	ldr	r7, [r2, #120]
-	str	r7, [sp, #200]          @ 4-byte Spill
-	ldr	r7, [r2, #124]
-	str	r7, [sp, #204]          @ 4-byte Spill
+	.pad	#148
+	sub	sp, sp, #148
 	ldr	r7, [r2, #100]
-	str	r7, [sp, #156]          @ 4-byte Spill
+	add	r9, r1, #12
+	str	r7, [sp, #116]                  @ 4-byte Spill
+	ldr	r7, [r2, #96]
+	str	r7, [sp, #124]                  @ 4-byte Spill
 	ldr	r7, [r2, #64]
-	str	r7, [sp, #144]          @ 4-byte Spill
+	str	r7, [sp, #120]                  @ 4-byte Spill
 	ldr	r7, [r2, #68]
-	str	r7, [sp, #148]          @ 4-byte Spill
+	str	r7, [sp, #108]                  @ 4-byte Spill
 	ldr	r7, [r2, #72]
-	str	r7, [sp, #152]          @ 4-byte Spill
+	str	r7, [sp, #112]                  @ 4-byte Spill
 	ldr	r7, [r2, #76]
-	str	r7, [sp, #160]          @ 4-byte Spill
+	str	r7, [sp, #144]                  @ 4-byte Spill
 	ldr	r7, [r2, #80]
-	str	r7, [sp, #172]          @ 4-byte Spill
+	str	r7, [sp, #140]                  @ 4-byte Spill
 	ldr	r7, [r2, #84]
-	str	r7, [sp, #176]          @ 4-byte Spill
+	str	r7, [sp, #136]                  @ 4-byte Spill
 	ldr	r7, [r2, #88]
-	str	r7, [sp, #180]          @ 4-byte Spill
+	str	r7, [sp, #132]                  @ 4-byte Spill
 	ldr	r7, [r2, #92]
-	str	r7, [sp, #184]          @ 4-byte Spill
-	ldr	r7, [r2, #60]
-	str	r7, [sp, #140]          @ 4-byte Spill
-	ldm	r2, {r6, r8, r12, lr}
-	ldm	r1, {r4, r5, r7, r9}
-	subs	r4, r4, r6
-	str	r4, [sp, #36]           @ 4-byte Spill
-	ldr	r4, [r2, #56]
-	str	r4, [sp, #128]          @ 4-byte Spill
-	sbcs	r4, r5, r8
-	sbcs	r7, r7, r12
-	str	r4, [sp, #32]           @ 4-byte Spill
-	ldr	r4, [r2, #52]
-	str	r7, [sp, #28]           @ 4-byte Spill
-	ldr	r7, [r2, #48]
-	str	r4, [sp, #96]           @ 4-byte Spill
-	str	r7, [sp, #88]           @ 4-byte Spill
-	sbcs	r7, r9, lr
-	add	lr, r1, #16
-	str	r7, [sp, #24]           @ 4-byte Spill
-	ldr	r7, [r2, #44]
-	str	r7, [sp, #84]           @ 4-byte Spill
-	ldr	r7, [r2, #40]
-	str	r7, [sp, #80]           @ 4-byte Spill
-	ldr	r7, [r2, #36]
-	str	r7, [sp, #76]           @ 4-byte Spill
+	str	r7, [sp, #128]                  @ 4-byte Spill
 	ldr	r7, [r2, #32]
-	str	r7, [sp, #40]           @ 4-byte Spill
-	ldr	r7, [r2, #28]
-	str	r7, [sp, #20]           @ 4-byte Spill
-	ldr	r7, [r2, #24]
-	str	r7, [sp, #16]           @ 4-byte Spill
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r7, [r2, #36]
+	str	r7, [sp, #44]                   @ 4-byte Spill
+	ldr	r7, [r2, #40]
+	str	r7, [sp, #84]                   @ 4-byte Spill
+	ldr	r7, [r2, #44]
+	str	r7, [sp, #88]                   @ 4-byte Spill
+	ldr	r7, [r2, #48]
+	str	r7, [sp, #92]                   @ 4-byte Spill
+	ldr	r7, [r2, #52]
+	str	r7, [sp, #96]                   @ 4-byte Spill
+	ldr	r7, [r2, #56]
+	str	r7, [sp, #100]                  @ 4-byte Spill
+	ldr	r7, [r2, #60]
+	ldm	r2, {r4, r11}
+	ldm	r1, {r12, lr}
+	str	r7, [sp, #104]                  @ 4-byte Spill
+	subs	r4, r12, r4
+	ldr	r7, [r2, #8]
+	str	r7, [sp, #72]                   @ 4-byte Spill
+	ldr	r7, [r2, #12]
+	str	r7, [sp, #68]                   @ 4-byte Spill
+	ldr	r7, [r2, #16]
+	str	r4, [sp, #80]                   @ 4-byte Spill
+	sbcs	r4, lr, r11
+	str	r7, [sp, #64]                   @ 4-byte Spill
+	add	lr, r1, #40
 	ldr	r7, [r2, #20]
-	ldr	r2, [r2, #16]
-	str	r2, [sp, #8]            @ 4-byte Spill
-	ldr	r2, [r1, #128]
-	str	r7, [sp, #12]           @ 4-byte Spill
-	str	r2, [sp, #132]          @ 4-byte Spill
-	ldr	r2, [r1, #132]
-	str	r2, [sp, #136]          @ 4-byte Spill
-	ldr	r2, [r1, #96]
-	str	r2, [sp, #100]          @ 4-byte Spill
-	ldr	r2, [r1, #104]
-	str	r2, [sp, #104]          @ 4-byte Spill
-	ldr	r2, [r1, #108]
-	str	r2, [sp, #108]          @ 4-byte Spill
-	ldr	r2, [r1, #112]
-	str	r2, [sp, #112]          @ 4-byte Spill
-	ldr	r2, [r1, #116]
-	str	r2, [sp, #116]          @ 4-byte Spill
-	ldr	r2, [r1, #120]
-	str	r2, [sp, #120]          @ 4-byte Spill
-	ldr	r2, [r1, #124]
-	str	r2, [sp, #124]          @ 4-byte Spill
-	ldr	r2, [r1, #100]
-	str	r2, [sp, #92]           @ 4-byte Spill
-	ldr	r2, [r1, #64]
-	str	r2, [sp, #44]           @ 4-byte Spill
-	ldr	r2, [r1, #68]
-	str	r2, [sp, #48]           @ 4-byte Spill
-	ldr	r2, [r1, #72]
-	str	r2, [sp, #52]           @ 4-byte Spill
-	ldr	r2, [r1, #76]
-	str	r2, [sp, #56]           @ 4-byte Spill
-	ldr	r2, [r1, #80]
-	str	r2, [sp, #60]           @ 4-byte Spill
-	ldr	r2, [r1, #84]
-	str	r2, [sp, #64]           @ 4-byte Spill
-	ldr	r2, [r1, #88]
-	str	r2, [sp, #68]           @ 4-byte Spill
-	ldr	r2, [r1, #92]
-	str	r2, [sp, #72]           @ 4-byte Spill
-	ldm	r10, {r4, r5, r6, r8, r9, r10}
-	ldr	r2, [r1, #56]
-	str	r2, [sp]                @ 4-byte Spill
-	ldr	r2, [r1, #60]
-	str	r2, [sp, #4]            @ 4-byte Spill
-	ldm	lr, {r1, r2, r12, lr}
-	ldr	r11, [sp, #36]          @ 4-byte Reload
-	ldr	r7, [sp, #32]           @ 4-byte Reload
-	str	r11, [r0]
-	str	r7, [r0, #4]
-	ldr	r7, [sp, #28]           @ 4-byte Reload
-	str	r7, [r0, #8]
-	ldr	r7, [sp, #8]            @ 4-byte Reload
-	sbcs	r1, r1, r7
-	ldr	r7, [sp, #24]           @ 4-byte Reload
-	str	r7, [r0, #12]
-	ldr	r7, [sp, #12]           @ 4-byte Reload
+	str	r4, [sp, #76]                   @ 4-byte Spill
+	ldr	r10, [r1, #8]
+	ldr	r4, [sp, #72]                   @ 4-byte Reload
+	str	r7, [sp, #60]                   @ 4-byte Spill
+	ldr	r7, [r2, #24]
+	sbcs	r4, r10, r4
+	str	r7, [sp, #56]                   @ 4-byte Spill
+	ldr	r7, [r2, #28]
+	str	r7, [sp, #52]                   @ 4-byte Spill
+	str	r4, [sp, #72]                   @ 4-byte Spill
+	ldm	r9, {r5, r6, r7, r8, r9}
+	ldr	r4, [sp, #68]                   @ 4-byte Reload
+	sbcs	r5, r5, r4
+	str	r5, [sp, #68]                   @ 4-byte Spill
+	ldr	r5, [sp, #64]                   @ 4-byte Reload
+	ldr	r4, [sp, #48]                   @ 4-byte Reload
+	sbcs	r6, r6, r5
+	str	r6, [sp, #64]                   @ 4-byte Spill
+	ldr	r6, [sp, #60]                   @ 4-byte Reload
+	ldm	lr, {r5, r10, r12, lr}
+	sbcs	r7, r7, r6
+	str	r7, [sp, #60]                   @ 4-byte Spill
+	ldr	r7, [sp, #56]                   @ 4-byte Reload
+	ldr	r6, [r1, #36]
+	sbcs	r7, r8, r7
+	str	r7, [sp, #56]                   @ 4-byte Spill
+	ldr	r7, [sp, #52]                   @ 4-byte Reload
+	ldr	r8, [r1, #56]
+	sbcs	r7, r9, r7
+	str	r7, [sp, #52]                   @ 4-byte Spill
+	ldr	r7, [r1, #32]
+	ldr	r9, [r1, #60]
+	sbcs	r7, r7, r4
+	str	r7, [sp, #48]                   @ 4-byte Spill
+	ldr	r7, [sp, #44]                   @ 4-byte Reload
+	sbcs	r6, r6, r7
+	str	r6, [sp, #44]                   @ 4-byte Spill
+	ldr	r6, [sp, #84]                   @ 4-byte Reload
+	sbcs	r5, r5, r6
+	str	r5, [sp, #40]                   @ 4-byte Spill
+	ldr	r5, [sp, #88]                   @ 4-byte Reload
+	ldr	r6, [r1, #68]
+	sbcs	r4, r10, r5
+	str	r4, [sp, #36]                   @ 4-byte Spill
+	ldr	r4, [sp, #92]                   @ 4-byte Reload
+	sbcs	r7, r12, r4
+	str	r7, [sp, #32]                   @ 4-byte Spill
+	ldr	r7, [sp, #96]                   @ 4-byte Reload
+	ldr	r4, [sp, #120]                  @ 4-byte Reload
+	sbcs	r7, lr, r7
+	str	r7, [sp, #28]                   @ 4-byte Spill
+	ldr	r7, [sp, #100]                  @ 4-byte Reload
+	add	lr, r1, #72
+	sbcs	r7, r8, r7
+	str	r7, [sp, #24]                   @ 4-byte Spill
+	ldr	r7, [sp, #104]                  @ 4-byte Reload
+	ldm	lr, {r5, r10, r12, lr}
+	sbcs	r7, r9, r7
+	str	r7, [sp, #20]                   @ 4-byte Spill
+	ldr	r7, [r1, #64]
+	ldr	r8, [r1, #88]
+	sbcs	r4, r7, r4
+	ldr	r7, [sp, #108]                  @ 4-byte Reload
+	str	r4, [sp, #120]                  @ 4-byte Spill
+	sbcs	r11, r6, r7
+	ldr	r6, [sp, #112]                  @ 4-byte Reload
+	ldr	r9, [r1, #92]
+	sbcs	r5, r5, r6
+	ldr	r6, [sp, #144]                  @ 4-byte Reload
+	str	r11, [sp, #100]                 @ 4-byte Spill
+	sbcs	r4, r10, r6
+	str	r4, [sp, #144]                  @ 4-byte Spill
+	ldr	r4, [sp, #140]                  @ 4-byte Reload
+	add	r10, r3, #40
+	ldr	r6, [sp, #124]                  @ 4-byte Reload
+	sbcs	r7, r12, r4
+	str	r7, [sp, #140]                  @ 4-byte Spill
+	ldr	r7, [sp, #136]                  @ 4-byte Reload
+	ldr	r4, [r3, #8]
+	sbcs	r7, lr, r7
+	str	r7, [sp, #136]                  @ 4-byte Spill
+	ldr	r7, [sp, #132]                  @ 4-byte Reload
+	ldr	r12, [r3, #12]
+	sbcs	r7, r8, r7
+	str	r7, [sp, #132]                  @ 4-byte Spill
+	ldr	r7, [sp, #128]                  @ 4-byte Reload
+	ldr	lr, [r3, #20]
+	sbcs	r7, r9, r7
+	str	r7, [sp, #128]                  @ 4-byte Spill
+	ldr	r7, [r1, #96]
+	str	r5, [sp, #92]                   @ 4-byte Spill
+	sbcs	r7, r7, r6
+	str	r7, [sp, #124]                  @ 4-byte Spill
+	ldr	r7, [r1, #100]
+	ldr	r6, [sp, #116]                  @ 4-byte Reload
+	sbcs	r7, r7, r6
+	str	r7, [sp, #116]                  @ 4-byte Spill
+	ldr	r7, [r2, #104]
+	ldr	r6, [r1, #104]
+	sbcs	r7, r6, r7
+	str	r7, [sp, #112]                  @ 4-byte Spill
+	ldr	r7, [r2, #108]
+	ldr	r6, [r1, #108]
+	sbcs	r7, r6, r7
+	str	r7, [sp, #108]                  @ 4-byte Spill
+	ldr	r7, [r2, #112]
+	ldr	r6, [r1, #112]
+	sbcs	r7, r6, r7
+	str	r7, [sp, #16]                   @ 4-byte Spill
+	ldr	r7, [r2, #116]
+	ldr	r6, [r1, #116]
+	sbcs	r7, r6, r7
+	str	r7, [sp, #12]                   @ 4-byte Spill
+	ldr	r7, [r2, #120]
+	ldr	r6, [r1, #120]
+	ldr	r2, [r2, #124]
+	ldr	r1, [r1, #124]
+	sbcs	r7, r6, r7
+	str	r7, [sp, #8]                    @ 4-byte Spill
+	sbcs	r1, r1, r2
+	str	r1, [sp, #4]                    @ 4-byte Spill
+	mov	r1, #0
+	ldm	r3, {r2, r6}
+	sbc	r1, r1, #0
+	str	r1, [sp]                        @ 4-byte Spill
+	ldr	r1, [sp, #120]                  @ 4-byte Reload
+	adds	r1, r1, r2
+	str	r1, [sp, #104]                  @ 4-byte Spill
+	adcs	r1, r11, r6
+	str	r1, [sp, #96]                   @ 4-byte Spill
+	adcs	r1, r5, r4
+	str	r1, [sp, #88]                   @ 4-byte Spill
+	ldr	r1, [sp, #144]                  @ 4-byte Reload
+	add	r11, r3, #28
+	ldr	r4, [sp, #140]                  @ 4-byte Reload
+	adcs	r1, r1, r12
+	str	r1, [sp, #84]                   @ 4-byte Spill
+	ldr	r1, [sp, #80]                   @ 4-byte Reload
+	str	r1, [r0]
+	ldr	r1, [sp, #76]                   @ 4-byte Reload
+	str	r1, [r0, #4]
+	ldr	r1, [sp, #72]                   @ 4-byte Reload
+	str	r1, [r0, #8]
+	ldr	r1, [sp, #68]                   @ 4-byte Reload
+	str	r1, [r0, #12]
+	ldr	r1, [sp, #64]                   @ 4-byte Reload
 	str	r1, [r0, #16]
-	ldr	r1, [sp, #16]           @ 4-byte Reload
-	sbcs	r2, r2, r7
-	ldr	r7, [sp]                @ 4-byte Reload
-	str	r2, [r0, #20]
-	sbcs	r1, r12, r1
-	ldr	r2, [sp, #20]           @ 4-byte Reload
+	ldr	r1, [sp, #60]                   @ 4-byte Reload
+	str	r1, [r0, #20]
+	ldr	r1, [sp, #56]                   @ 4-byte Reload
 	str	r1, [r0, #24]
-	ldr	r1, [sp, #40]           @ 4-byte Reload
-	sbcs	r2, lr, r2
-	add	lr, r3, #8
-	str	r2, [r0, #28]
-	sbcs	r1, r4, r1
-	ldr	r2, [sp, #76]           @ 4-byte Reload
+	ldr	r1, [sp, #52]                   @ 4-byte Reload
+	ldr	r12, [r3, #24]
+	ldr	r3, [r3, #16]
+	str	r1, [r0, #28]
+	ldr	r1, [sp, #48]                   @ 4-byte Reload
+	adcs	r3, r4, r3
 	str	r1, [r0, #32]
-	ldr	r1, [sp, #80]           @ 4-byte Reload
-	sbcs	r2, r5, r2
-	str	r2, [r0, #36]
-	sbcs	r1, r6, r1
-	ldr	r2, [sp, #84]           @ 4-byte Reload
+	ldr	r1, [sp, #44]                   @ 4-byte Reload
+	str	r1, [r0, #36]
+	ldr	r1, [sp, #40]                   @ 4-byte Reload
+	str	r3, [sp, #80]                   @ 4-byte Spill
+	ldr	r3, [sp, #136]                  @ 4-byte Reload
 	str	r1, [r0, #40]
-	ldr	r1, [sp, #88]           @ 4-byte Reload
-	sbcs	r2, r8, r2
-	sbcs	r1, r9, r1
-	str	r2, [r0, #44]
-	ldr	r2, [sp, #96]           @ 4-byte Reload
-	add	r9, r3, #20
+	ldr	r1, [sp, #36]                   @ 4-byte Reload
+	adcs	r3, r3, lr
+	str	r1, [r0, #44]
+	ldr	r1, [sp, #32]                   @ 4-byte Reload
 	str	r1, [r0, #48]
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	sbcs	r2, r10, r2
-	sbcs	r1, r7, r1
-	str	r2, [r0, #52]
-	ldr	r2, [sp, #140]          @ 4-byte Reload
-	ldr	r7, [sp, #4]            @ 4-byte Reload
+	ldr	r1, [sp, #28]                   @ 4-byte Reload
+	str	r3, [sp, #76]                   @ 4-byte Spill
+	ldr	r3, [sp, #132]                  @ 4-byte Reload
+	str	r1, [r0, #52]
+	ldr	r1, [sp, #24]                   @ 4-byte Reload
+	adcs	r3, r3, r12
 	str	r1, [r0, #56]
-	ldr	r1, [sp, #144]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #44]           @ 4-byte Reload
-	str	r2, [r0, #60]
-	ldr	r2, [sp, #148]          @ 4-byte Reload
-	sbcs	r1, r7, r1
-	ldr	r7, [sp, #48]           @ 4-byte Reload
-	str	r1, [r0, #64]
-	ldr	r1, [sp, #152]          @ 4-byte Reload
-	sbcs	r5, r7, r2
-	ldr	r2, [sp, #52]           @ 4-byte Reload
-	ldr	r7, [sp, #100]          @ 4-byte Reload
-	sbcs	r10, r2, r1
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	ldr	r2, [sp, #56]           @ 4-byte Reload
-	str	r10, [sp, #96]          @ 4-byte Spill
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #60]           @ 4-byte Reload
-	str	r1, [sp, #160]          @ 4-byte Spill
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #64]           @ 4-byte Reload
-	str	r1, [sp, #172]          @ 4-byte Spill
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #68]           @ 4-byte Reload
-	str	r1, [sp, #176]          @ 4-byte Spill
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #72]           @ 4-byte Reload
-	str	r1, [sp, #180]          @ 4-byte Spill
-	ldr	r1, [sp, #184]          @ 4-byte Reload
-	sbcs	r1, r2, r1
-	ldr	r2, [sp, #188]          @ 4-byte Reload
-	str	r1, [sp, #184]          @ 4-byte Spill
-	mov	r1, #0
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #92]           @ 4-byte Reload
-	str	r2, [sp, #188]          @ 4-byte Spill
-	ldr	r2, [sp, #156]          @ 4-byte Reload
-	sbcs	r11, r7, r2
-	ldr	r2, [sp, #164]          @ 4-byte Reload
-	ldr	r7, [sp, #104]          @ 4-byte Reload
-	str	r11, [sp, #128]         @ 4-byte Spill
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #108]          @ 4-byte Reload
-	str	r2, [sp, #164]          @ 4-byte Spill
-	ldr	r2, [sp, #168]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #112]          @ 4-byte Reload
-	str	r2, [sp, #168]          @ 4-byte Spill
-	ldr	r2, [sp, #192]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #116]          @ 4-byte Reload
-	str	r2, [sp, #192]          @ 4-byte Spill
-	ldr	r2, [sp, #196]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #120]          @ 4-byte Reload
-	str	r2, [sp, #196]          @ 4-byte Spill
-	ldr	r2, [sp, #200]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #124]          @ 4-byte Reload
-	str	r2, [sp, #200]          @ 4-byte Spill
-	ldr	r2, [sp, #204]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #132]          @ 4-byte Reload
-	str	r2, [sp, #204]          @ 4-byte Spill
-	ldr	r2, [sp, #208]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	ldr	r7, [sp, #136]          @ 4-byte Reload
-	str	r2, [sp, #208]          @ 4-byte Spill
-	ldr	r2, [sp, #212]          @ 4-byte Reload
-	sbcs	r2, r7, r2
-	sbc	r1, r1, #0
-	str	r2, [sp, #212]          @ 4-byte Spill
-	str	r1, [sp, #124]          @ 4-byte Spill
-	ldr	r1, [r3, #64]
-	str	r1, [sp, #156]          @ 4-byte Spill
-	ldr	r1, [r3, #36]
-	str	r1, [sp, #120]          @ 4-byte Spill
-	ldr	r1, [r3, #40]
-	str	r1, [sp, #132]          @ 4-byte Spill
-	ldr	r1, [r3, #44]
-	str	r1, [sp, #136]          @ 4-byte Spill
-	ldr	r1, [r3, #48]
-	str	r1, [sp, #140]          @ 4-byte Spill
-	ldr	r1, [r3, #52]
-	str	r1, [sp, #144]          @ 4-byte Spill
-	ldr	r1, [r3, #56]
-	str	r1, [sp, #148]          @ 4-byte Spill
-	ldr	r1, [r3, #60]
-	str	r1, [sp, #152]          @ 4-byte Spill
-	ldr	r1, [r3, #32]
-	str	r1, [sp, #116]          @ 4-byte Spill
-	ldm	r3, {r2, r7}
-	ldm	lr, {r6, r12, lr}
-	ldm	r9, {r4, r8, r9}
-	ldr	r3, [sp, #160]          @ 4-byte Reload
-	adds	r1, r5, r2
-	adcs	r2, r10, r7
-	ldr	r7, [sp, #164]          @ 4-byte Reload
-	adcs	r3, r3, r6
-	ldr	r6, [sp, #172]          @ 4-byte Reload
-	adcs	r12, r6, r12
-	ldr	r6, [sp, #176]          @ 4-byte Reload
-	adcs	lr, r6, lr
-	ldr	r6, [sp, #180]          @ 4-byte Reload
-	adcs	r4, r6, r4
-	ldr	r6, [sp, #184]          @ 4-byte Reload
-	adcs	r8, r6, r8
-	ldr	r6, [sp, #188]          @ 4-byte Reload
-	adcs	r9, r6, r9
-	ldr	r6, [sp, #116]          @ 4-byte Reload
-	adcs	r10, r11, r6
-	ldr	r6, [sp, #120]          @ 4-byte Reload
-	ldr	r11, [sp, #156]         @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #132]          @ 4-byte Reload
-	str	r7, [sp, #120]          @ 4-byte Spill
-	ldr	r7, [sp, #168]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #136]          @ 4-byte Reload
-	str	r7, [sp, #132]          @ 4-byte Spill
-	ldr	r7, [sp, #192]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #140]          @ 4-byte Reload
-	str	r7, [sp, #136]          @ 4-byte Spill
-	ldr	r7, [sp, #196]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #144]          @ 4-byte Reload
-	str	r7, [sp, #140]          @ 4-byte Spill
-	ldr	r7, [sp, #200]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #148]          @ 4-byte Reload
-	str	r7, [sp, #144]          @ 4-byte Spill
-	ldr	r7, [sp, #204]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	ldr	r6, [sp, #152]          @ 4-byte Reload
-	str	r7, [sp, #148]          @ 4-byte Spill
-	ldr	r7, [sp, #208]          @ 4-byte Reload
-	adcs	r7, r7, r6
-	str	r7, [sp, #152]          @ 4-byte Spill
-	ldr	r7, [sp, #212]          @ 4-byte Reload
-	adc	r7, r7, r11
-	str	r7, [sp, #156]          @ 4-byte Spill
-	ldr	r7, [sp, #124]          @ 4-byte Reload
-	ands	r7, r7, #1
-	moveq	r1, r5
-	str	r1, [r0, #68]
-	ldr	r1, [sp, #96]           @ 4-byte Reload
-	moveq	r2, r1
-	ldr	r1, [sp, #160]          @ 4-byte Reload
-	str	r2, [r0, #72]
-	ldr	r2, [sp, #120]          @ 4-byte Reload
-	moveq	r3, r1
-	ldr	r1, [sp, #172]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r3, [r0, #76]
-	ldr	r3, [sp, #156]          @ 4-byte Reload
-	moveq	r12, r1
-	ldr	r1, [sp, #176]          @ 4-byte Reload
-	str	r12, [r0, #80]
-	moveq	lr, r1
-	ldr	r1, [sp, #180]          @ 4-byte Reload
-	str	lr, [r0, #84]
-	moveq	r4, r1
-	ldr	r1, [sp, #184]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r4, [r0, #88]
-	moveq	r8, r1
-	ldr	r1, [sp, #188]          @ 4-byte Reload
-	str	r8, [r0, #92]
-	moveq	r9, r1
-	ldr	r1, [sp, #128]          @ 4-byte Reload
-	str	r9, [r0, #96]
-	moveq	r10, r1
-	ldr	r1, [sp, #164]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r10, [r0, #100]
-	moveq	r2, r1
-	ldr	r1, [sp, #168]          @ 4-byte Reload
-	str	r2, [r0, #104]
-	ldr	r2, [sp, #132]          @ 4-byte Reload
+	ldr	r1, [sp, #20]                   @ 4-byte Reload
+	str	r1, [r0, #60]
+	str	r3, [sp, #72]                   @ 4-byte Spill
+	ldm	r11, {r1, r2, r11}
+	ldr	r3, [sp, #128]                  @ 4-byte Reload
+	ldm	r10, {r5, r6, r7, r8, r9, r10}
+	adcs	r1, r3, r1
+	str	r1, [sp, #68]                   @ 4-byte Spill
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	ldr	lr, [sp, #16]                   @ 4-byte Reload
+	adcs	r1, r1, r2
+	str	r1, [sp, #64]                   @ 4-byte Spill
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	ldr	r12, [sp, #12]                  @ 4-byte Reload
+	adcs	r11, r1, r11
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	ldr	r3, [sp, #8]                    @ 4-byte Reload
+	adcs	r5, r1, r5
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	ldr	r2, [sp, #4]                    @ 4-byte Reload
+	adcs	r6, r1, r6
+	ldr	r4, [sp]                        @ 4-byte Reload
+	adcs	r7, lr, r7
+	adcs	r1, r12, r8
+	adcs	r9, r3, r9
+	adc	r10, r2, r10
+	ands	r8, r4, #1
+	moveq	r1, r12
+	moveq	r10, r2
+	str	r1, [r0, #116]
+	moveq	r9, r3
+	ldr	r1, [sp, #108]                  @ 4-byte Reload
+	cmp	r8, #0
+	moveq	r7, lr
+	ldr	r2, [sp, #64]                   @ 4-byte Reload
+	str	r10, [r0, #124]
+	moveq	r6, r1
+	ldr	r1, [sp, #112]                  @ 4-byte Reload
+	str	r9, [r0, #120]
+	str	r7, [r0, #112]
+	moveq	r5, r1
+	ldr	r1, [sp, #116]                  @ 4-byte Reload
+	cmp	r8, #0
+	str	r6, [r0, #108]
+	str	r5, [r0, #104]
+	moveq	r11, r1
+	ldr	r1, [sp, #124]                  @ 4-byte Reload
+	str	r11, [r0, #100]
 	moveq	r2, r1
-	ldr	r1, [sp, #192]          @ 4-byte Reload
-	str	r2, [r0, #108]
-	ldr	r2, [sp, #136]          @ 4-byte Reload
+	ldr	r1, [sp, #128]                  @ 4-byte Reload
+	str	r2, [r0, #96]
+	ldr	r2, [sp, #68]                   @ 4-byte Reload
 	moveq	r2, r1
-	ldr	r1, [sp, #196]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r2, [r0, #112]
-	ldr	r2, [sp, #140]          @ 4-byte Reload
+	ldr	r1, [sp, #132]                  @ 4-byte Reload
+	str	r2, [r0, #92]
+	cmp	r8, #0
+	ldr	r2, [sp, #72]                   @ 4-byte Reload
 	moveq	r2, r1
-	ldr	r1, [sp, #200]          @ 4-byte Reload
-	str	r2, [r0, #116]
-	ldr	r2, [sp, #144]          @ 4-byte Reload
+	ldr	r1, [sp, #136]                  @ 4-byte Reload
+	str	r2, [r0, #88]
+	ldr	r2, [sp, #76]                   @ 4-byte Reload
 	moveq	r2, r1
-	ldr	r1, [sp, #204]          @ 4-byte Reload
-	str	r2, [r0, #120]
-	ldr	r2, [sp, #148]          @ 4-byte Reload
+	ldr	r1, [sp, #140]                  @ 4-byte Reload
+	str	r2, [r0, #84]
+	ldr	r2, [sp, #80]                   @ 4-byte Reload
 	moveq	r2, r1
-	ldr	r1, [sp, #208]          @ 4-byte Reload
-	cmp	r7, #0
-	str	r2, [r0, #124]
-	ldr	r2, [sp, #152]          @ 4-byte Reload
+	ldr	r1, [sp, #144]                  @ 4-byte Reload
+	str	r2, [r0, #80]
+	cmp	r8, #0
+	ldr	r2, [sp, #84]                   @ 4-byte Reload
 	moveq	r2, r1
-	ldr	r1, [sp, #212]          @ 4-byte Reload
-	str	r2, [r0, #128]
-	moveq	r3, r1
-	str	r3, [r0, #132]
-	add	sp, sp, #216
+	ldr	r1, [sp, #88]                   @ 4-byte Reload
+	str	r2, [r0, #76]
+	ldr	r2, [sp, #92]                   @ 4-byte Reload
+	moveq	r1, r2
+	ldr	r2, [sp, #100]                  @ 4-byte Reload
+	str	r1, [r0, #72]
+	ldr	r1, [sp, #96]                   @ 4-byte Reload
+	moveq	r1, r2
+	ldr	r2, [sp, #120]                  @ 4-byte Reload
+	str	r1, [r0, #68]
+	cmp	r8, #0
+	ldr	r1, [sp, #104]                  @ 4-byte Reload
+	moveq	r1, r2
+	str	r1, [r0, #64]
+	add	sp, sp, #148
 	pop	{r4, r5, r6, r7, r8, r9, r10, r11, lr}
 	mov	pc, lr
-.Lfunc_end269:
-	.size	mcl_fpDbl_sub17L, .Lfunc_end269-mcl_fpDbl_sub17L
+.Lfunc_end89:
+	.size	mcl_fpDbl_sub16L, .Lfunc_end89-mcl_fpDbl_sub16L
 	.cantunwind
 	.fnend
-
-
+                                        @ -- End function
 	.section	".note.GNU-stack","",%progbits
 	.eabi_attribute	30, 2	@ Tag_ABI_optimization_goals

From f01790ba5d8c2c36ed690f99a9ff54bc013e0d9c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 16:14:13 +0900
Subject: [PATCH 524/553] x86 asm generated by llvm-11

---
 src/asm/x86.bmi2.s | 94788 +++++++++++------------------------------
 src/asm/x86.s      | 98329 ++++++++++++-------------------------------
 2 files changed, 50510 insertions(+), 142607 deletions(-)

diff --git a/src/asm/x86.bmi2.s b/src/asm/x86.bmi2.s
index 77729c53..260f2e66 100644
--- a/src/asm/x86.bmi2.s
+++ b/src/asm/x86.bmi2.s
@@ -1,10 +1,10 @@
 	.text
-	.file	"<stdin>"
-	.globl	makeNIST_P192Lbmi2
-	.align	16, 0x90
+	.file	"base32.bmi2.ll"
+	.globl	makeNIST_P192Lbmi2              # -- Begin function makeNIST_P192Lbmi2
+	.p2align	4, 0x90
 	.type	makeNIST_P192Lbmi2,@function
 makeNIST_P192Lbmi2:                     # @makeNIST_P192Lbmi2
-# BB#0:
+# %bb.0:
 	movl	4(%esp), %eax
 	movl	$-1, 20(%eax)
 	movl	$-1, 16(%eax)
@@ -15,137 +15,150 @@ makeNIST_P192Lbmi2:                     # @makeNIST_P192Lbmi2
 	retl	$4
 .Lfunc_end0:
 	.size	makeNIST_P192Lbmi2, .Lfunc_end0-makeNIST_P192Lbmi2
-
-	.globl	mcl_fpDbl_mod_NIST_P192Lbmi2
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fpDbl_mod_NIST_P192Lbmi2    # -- Begin function mcl_fpDbl_mod_NIST_P192Lbmi2
+	.p2align	4, 0x90
 	.type	mcl_fpDbl_mod_NIST_P192Lbmi2,@function
 mcl_fpDbl_mod_NIST_P192Lbmi2:           # @mcl_fpDbl_mod_NIST_P192Lbmi2
-# BB#0:
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$32, %esp
-	movl	56(%esp), %eax
-	movl	32(%eax), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	28(%eax), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	xorl	%edx, %edx
-	movl	(%eax), %ebx
-	addl	%ecx, %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	4(%eax), %ecx
-	adcl	%edi, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	8(%eax), %ebp
-	adcl	%esi, %ebp
-	movl	36(%eax), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	12(%eax), %esi
-	adcl	%ecx, %esi
-	movl	40(%eax), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	16(%eax), %ecx
-	adcl	%ebx, %ecx
-	movl	44(%eax), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	movl	20(%eax), %eax
+	subl	$36, %esp
+	movl	60(%esp), %esi
+	movl	32(%esi), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%esi), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	28(%esi), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	(%esi), %eax
+	addl	%edi, %eax
+	movl	%eax, %ebx
+	movl	4(%esi), %eax
+	adcl	%edx, %eax
+	movl	%eax, %edx
+	movl	8(%esi), %ebp
+	adcl	%ecx, %ebp
+	movl	36(%esi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esi), %ecx
+	adcl	%eax, %ecx
+	movl	40(%esi), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	16(%esi), %eax
 	adcl	%edi, %eax
-	adcl	$0, %edx
-	sbbl	%edi, %edi
-	andl	$1, %edi
-	addl	%ebx, 24(%esp)          # 4-byte Folded Spill
-	movl	(%esp), %ebx            # 4-byte Reload
-	adcl	%ebx, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	adcl	$0, %edx
-	adcl	$0, %edi
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	%ebx, %esi
-	adcl	$0, %ecx
+	movl	44(%esi), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	20(%esi), %edi
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	%esi, %edi
+	setb	3(%esp)                         # 1-byte Folded Spill
+	addl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	adcl	%esi, %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movzbl	3(%esp), %ebx                   # 1-byte Folded Reload
+	adcl	$0, %ebx
+	setb	%dl
+	addl	4(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	%esi, %ecx
 	adcl	$0, %eax
+	adcl	$0, %edi
+	setb	4(%esp)                         # 1-byte Folded Spill
+	movl	%ebx, %esi
+	adcl	$0, %esi
+	movzbl	%dl, %edx
 	adcl	$0, %edx
+	addb	$255, 4(%esp)                   # 1-byte Folded Spill
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	%edx, 28(%esp)                  # 4-byte Folded Spill
+	adcl	%ebp, %esi
+	adcl	%ecx, %edx
+	adcl	$0, %eax
 	adcl	$0, %edi
-	addl	%edx, 24(%esp)          # 4-byte Folded Spill
-	adcl	%edi, 28(%esp)          # 4-byte Folded Spill
-	adcl	%ebp, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	%esi, %edi
+	setb	24(%esp)                        # 1-byte Folded Spill
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	addl	$1, %ebx
+	movl	28(%esp), %ecx                  # 4-byte Reload
 	adcl	$0, %ecx
-	adcl	$0, %eax
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	24(%esp), %esi          # 4-byte Reload
-	addl	$1, %esi
-	movl	28(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$1, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%edi, %edx
-	adcl	$0, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	adcl	$0, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, %edx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	%esi, %ebp
+	adcl	$1, %ebp
+	movl	%edx, 12(%esp)                  # 4-byte Spill
 	adcl	$0, %edx
-	adcl	$-1, %ebx
-	andl	$1, %ebx
-	jne	.LBB1_2
-# BB#1:
-	movl	%edx, %eax
-.LBB1_2:
-	testb	%bl, %bl
-	movl	24(%esp), %edx          # 4-byte Reload
-	jne	.LBB1_4
-# BB#3:
-	movl	%esi, %edx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%edi, %esi
+	adcl	$0, %esi
+	movzbl	24(%esp), %ecx                  # 1-byte Folded Reload
+	adcl	$-1, %ecx
+	testb	$1, %cl
+	jne	.LBB1_1
+# %bb.2:
+	movl	56(%esp), %edi
+	movl	%esi, 20(%edi)
+	jne	.LBB1_3
 .LBB1_4:
-	movl	52(%esp), %esi
-	movl	%edx, (%esi)
-	movl	20(%esp), %edx          # 4-byte Reload
-	movl	28(%esp), %ebx          # 4-byte Reload
-	jne	.LBB1_6
-# BB#5:
-	movl	%ebp, %ebx
+	movl	%eax, 16(%edi)
+	jne	.LBB1_5
 .LBB1_6:
-	movl	%ebx, 4(%esi)
-	jne	.LBB1_8
-# BB#7:
-	movl	8(%esp), %edx           # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	jne	.LBB1_7
 .LBB1_8:
-	movl	%edx, 8(%esi)
-	jne	.LBB1_10
-# BB#9:
-	movl	12(%esp), %edi          # 4-byte Reload
+	movl	%ebp, 8(%edi)
+	jne	.LBB1_9
 .LBB1_10:
-	movl	%edi, 12(%esi)
-	jne	.LBB1_12
-# BB#11:
-	movl	16(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 4(%edi)
+	je	.LBB1_12
+.LBB1_11:
+	movl	20(%esp), %ebx                  # 4-byte Reload
 .LBB1_12:
-	movl	%ecx, 16(%esi)
-	movl	%eax, 20(%esi)
-	addl	$32, %esp
+	movl	%ebx, (%edi)
+	addl	$36, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
+.LBB1_1:
+	movl	%edi, %esi
+	movl	56(%esp), %edi
+	movl	%esi, 20(%edi)
+	je	.LBB1_4
+.LBB1_3:
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 16(%edi)
+	je	.LBB1_6
+.LBB1_5:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	je	.LBB1_8
+.LBB1_7:
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 8(%edi)
+	je	.LBB1_10
+.LBB1_9:
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%edi)
+	jne	.LBB1_11
+	jmp	.LBB1_12
 .Lfunc_end1:
 	.size	mcl_fpDbl_mod_NIST_P192Lbmi2, .Lfunc_end1-mcl_fpDbl_mod_NIST_P192Lbmi2
-
-	.globl	mcl_fp_sqr_NIST_P192Lbmi2
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fp_sqr_NIST_P192Lbmi2       # -- Begin function mcl_fp_sqr_NIST_P192Lbmi2
+	.p2align	4, 0x90
 	.type	mcl_fp_sqr_NIST_P192Lbmi2,@function
 mcl_fp_sqr_NIST_P192Lbmi2:              # @mcl_fp_sqr_NIST_P192Lbmi2
-# BB#0:
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
@@ -156,130 +169,146 @@ mcl_fp_sqr_NIST_P192Lbmi2:              # @mcl_fp_sqr_NIST_P192Lbmi2
 	popl	%ebx
 .Ltmp0:
 	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L2$pb), %ebx
-	movl	116(%esp), %eax
-	movl	%eax, 4(%esp)
-	leal	44(%esp), %eax
-	movl	%eax, (%esp)
+	subl	$8, %esp
+	movl	124(%esp), %eax
+	leal	52(%esp), %ecx
+	pushl	%eax
+	pushl	%ecx
 	calll	mcl_fpDbl_sqrPre6Lbmi2@PLT
-	xorl	%edi, %edi
-	movl	76(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
+	addl	$16, %esp
+	movl	80(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	76(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
 	movl	72(%esp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi
-	addl	%eax, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax
+	addl	%edi, %eax
+	movl	%eax, %ebx
 	movl	48(%esp), %eax
 	adcl	%edx, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp
+	movl	%eax, %edx
+	movl	52(%esp), %eax
+	adcl	%esi, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ebp
 	adcl	%ecx, %ebp
-	movl	80(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi
-	adcl	%eax, %esi
-	movl	84(%esp), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
+	movl	84(%esp), %esi
 	movl	60(%esp), %ecx
-	adcl	%ebx, %ecx
+	adcl	%esi, %ecx
 	movl	88(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx
+	movl	64(%esp), %edi
+	adcl	%eax, %edi
+	setb	15(%esp)                        # 1-byte Folded Spill
+	addl	%esi, %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
 	adcl	%eax, %edx
-	adcl	$0, %edi
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	%ebx, 36(%esp)          # 4-byte Folded Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 40(%esp)          # 4-byte Folded Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %edi
-	adcl	$0, %eax
-	addl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%ebx, %esi
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movzbl	15(%esp), %ebx                  # 1-byte Folded Reload
+	adcl	$0, %ebx
+	setb	16(%esp)                        # 1-byte Folded Spill
+	addl	%esi, %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	%eax, %ebp
 	adcl	$0, %ecx
-	adcl	$0, %edx
 	adcl	$0, %edi
-	adcl	$0, %eax
-	addl	%edi, 36(%esp)          # 4-byte Folded Spill
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	%ebp, %edi
-	adcl	%esi, %eax
-	adcl	$0, %ecx
+	setb	%al
+	movl	%ebx, %esi
+	adcl	$0, %esi
+	movzbl	16(%esp), %edx                  # 1-byte Folded Reload
 	adcl	$0, %edx
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	36(%esp), %esi          # 4-byte Reload
-	addl	$1, %esi
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	%edi, %ebp
-	adcl	$1, %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	$0, %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
+	addb	$255, %al
+	adcl	36(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	%edx, 40(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	adcl	%ebp, %edx
+	adcl	$0, %ecx
+	adcl	$0, %edi
+	setb	36(%esp)                        # 1-byte Folded Spill
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	addl	$1, %ebx
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
 	adcl	$0, %ebp
-	adcl	$-1, %ebx
-	andl	$1, %ebx
-	jne	.LBB2_2
-# BB#1:
-	movl	%ebp, %edx
-.LBB2_2:
-	testb	%bl, %bl
-	movl	36(%esp), %ebx          # 4-byte Reload
-	jne	.LBB2_4
-# BB#3:
+	movl	%esi, 24(%esp)                  # 4-byte Spill
 	movl	%esi, %ebx
+	adcl	$1, %ebx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	adcl	$0, %edx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	adcl	$0, %eax
+	movl	%edi, %esi
+	adcl	$0, %esi
+	movzbl	36(%esp), %ecx                  # 1-byte Folded Reload
+	adcl	$-1, %ecx
+	testb	$1, %cl
+	jne	.LBB2_1
+# %bb.2:
+	movl	112(%esp), %edi
+	movl	%esi, 20(%edi)
+	jne	.LBB2_3
 .LBB2_4:
-	movl	112(%esp), %esi
-	movl	%ebx, (%esi)
-	movl	40(%esp), %ebx          # 4-byte Reload
-	jne	.LBB2_6
-# BB#5:
-	movl	20(%esp), %ebx          # 4-byte Reload
+	movl	%eax, 16(%edi)
+	jne	.LBB2_5
 .LBB2_6:
-	movl	%ebx, 4(%esi)
-	jne	.LBB2_8
-# BB#7:
-	movl	24(%esp), %edi          # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	jne	.LBB2_7
 .LBB2_8:
-	movl	%edi, 8(%esi)
-	jne	.LBB2_10
-# BB#9:
-	movl	28(%esp), %eax          # 4-byte Reload
+	movl	%ebx, 8(%edi)
+	jne	.LBB2_9
 .LBB2_10:
-	movl	%eax, 12(%esi)
-	jne	.LBB2_12
-# BB#11:
-	movl	32(%esp), %ecx          # 4-byte Reload
+	movl	%ebp, 4(%edi)
+	je	.LBB2_12
+.LBB2_11:
+	movl	16(%esp), %eax                  # 4-byte Reload
 .LBB2_12:
-	movl	%ecx, 16(%esi)
-	movl	%edx, 20(%esi)
+	movl	%eax, (%edi)
 	addl	$92, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
+.LBB2_1:
+	movl	%edi, %esi
+	movl	112(%esp), %edi
+	movl	%esi, 20(%edi)
+	je	.LBB2_4
+.LBB2_3:
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%edi)
+	je	.LBB2_6
+.LBB2_5:
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	je	.LBB2_8
+.LBB2_7:
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 8(%edi)
+	je	.LBB2_10
+.LBB2_9:
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 4(%edi)
+	jne	.LBB2_11
+	jmp	.LBB2_12
 .Lfunc_end2:
 	.size	mcl_fp_sqr_NIST_P192Lbmi2, .Lfunc_end2-mcl_fp_sqr_NIST_P192Lbmi2
-
-	.globl	mcl_fp_mulNIST_P192Lbmi2
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fp_mulNIST_P192Lbmi2        # -- Begin function mcl_fp_mulNIST_P192Lbmi2
+	.p2align	4, 0x90
 	.type	mcl_fp_mulNIST_P192Lbmi2,@function
 mcl_fp_mulNIST_P192Lbmi2:               # @mcl_fp_mulNIST_P192Lbmi2
-# BB#0:
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
@@ -290,314 +319,331 @@ mcl_fp_mulNIST_P192Lbmi2:               # @mcl_fp_mulNIST_P192Lbmi2
 	popl	%ebx
 .Ltmp1:
 	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L3$pb), %ebx
-	movl	120(%esp), %eax
-	movl	%eax, 8(%esp)
-	movl	116(%esp), %eax
-	movl	%eax, 4(%esp)
-	leal	44(%esp), %eax
-	movl	%eax, (%esp)
+	subl	$4, %esp
+	movl	124(%esp), %eax
+	movl	120(%esp), %ecx
+	leal	48(%esp), %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
 	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
-	xorl	%edi, %edi
-	movl	76(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
+	addl	$16, %esp
+	movl	80(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	76(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
 	movl	72(%esp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi
-	addl	%eax, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax
+	addl	%edi, %eax
+	movl	%eax, %ebx
 	movl	48(%esp), %eax
 	adcl	%edx, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp
+	movl	%eax, %edx
+	movl	52(%esp), %eax
+	adcl	%esi, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ebp
 	adcl	%ecx, %ebp
-	movl	80(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi
-	adcl	%eax, %esi
-	movl	84(%esp), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
+	movl	84(%esp), %esi
 	movl	60(%esp), %ecx
-	adcl	%ebx, %ecx
+	adcl	%esi, %ecx
 	movl	88(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx
+	movl	64(%esp), %edi
+	adcl	%eax, %edi
+	setb	15(%esp)                        # 1-byte Folded Spill
+	addl	%esi, %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
 	adcl	%eax, %edx
-	adcl	$0, %edi
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	%ebx, 36(%esp)          # 4-byte Folded Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 40(%esp)          # 4-byte Folded Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %edi
-	adcl	$0, %eax
-	addl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%ebx, %esi
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movzbl	15(%esp), %ebx                  # 1-byte Folded Reload
+	adcl	$0, %ebx
+	setb	16(%esp)                        # 1-byte Folded Spill
+	addl	%esi, %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	%eax, %ebp
 	adcl	$0, %ecx
-	adcl	$0, %edx
 	adcl	$0, %edi
-	adcl	$0, %eax
-	addl	%edi, 36(%esp)          # 4-byte Folded Spill
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	%ebp, %edi
-	adcl	%esi, %eax
-	adcl	$0, %ecx
+	setb	%al
+	movl	%ebx, %esi
+	adcl	$0, %esi
+	movzbl	16(%esp), %edx                  # 1-byte Folded Reload
 	adcl	$0, %edx
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	36(%esp), %esi          # 4-byte Reload
-	addl	$1, %esi
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	%edi, %ebp
-	adcl	$1, %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	$0, %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
+	addb	$255, %al
+	adcl	36(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	%edx, 40(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	adcl	%ebp, %edx
+	adcl	$0, %ecx
+	adcl	$0, %edi
+	setb	36(%esp)                        # 1-byte Folded Spill
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	addl	$1, %ebx
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
 	adcl	$0, %ebp
-	adcl	$-1, %ebx
-	andl	$1, %ebx
-	jne	.LBB3_2
-# BB#1:
-	movl	%ebp, %edx
-.LBB3_2:
-	testb	%bl, %bl
-	movl	36(%esp), %ebx          # 4-byte Reload
-	jne	.LBB3_4
-# BB#3:
+	movl	%esi, 24(%esp)                  # 4-byte Spill
 	movl	%esi, %ebx
+	adcl	$1, %ebx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	adcl	$0, %edx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	adcl	$0, %eax
+	movl	%edi, %esi
+	adcl	$0, %esi
+	movzbl	36(%esp), %ecx                  # 1-byte Folded Reload
+	adcl	$-1, %ecx
+	testb	$1, %cl
+	jne	.LBB3_1
+# %bb.2:
+	movl	112(%esp), %edi
+	movl	%esi, 20(%edi)
+	jne	.LBB3_3
 .LBB3_4:
-	movl	112(%esp), %esi
-	movl	%ebx, (%esi)
-	movl	40(%esp), %ebx          # 4-byte Reload
-	jne	.LBB3_6
-# BB#5:
-	movl	20(%esp), %ebx          # 4-byte Reload
+	movl	%eax, 16(%edi)
+	jne	.LBB3_5
 .LBB3_6:
-	movl	%ebx, 4(%esi)
-	jne	.LBB3_8
-# BB#7:
-	movl	24(%esp), %edi          # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	jne	.LBB3_7
 .LBB3_8:
-	movl	%edi, 8(%esi)
-	jne	.LBB3_10
-# BB#9:
-	movl	28(%esp), %eax          # 4-byte Reload
+	movl	%ebx, 8(%edi)
+	jne	.LBB3_9
 .LBB3_10:
-	movl	%eax, 12(%esi)
-	jne	.LBB3_12
-# BB#11:
-	movl	32(%esp), %ecx          # 4-byte Reload
+	movl	%ebp, 4(%edi)
+	je	.LBB3_12
+.LBB3_11:
+	movl	16(%esp), %eax                  # 4-byte Reload
 .LBB3_12:
-	movl	%ecx, 16(%esi)
-	movl	%edx, 20(%esi)
+	movl	%eax, (%edi)
 	addl	$92, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end3:
-	.size	mcl_fp_mulNIST_P192Lbmi2, .Lfunc_end3-mcl_fp_mulNIST_P192Lbmi2
-
-	.globl	mcl_fpDbl_mod_NIST_P521Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mod_NIST_P521Lbmi2,@function
-mcl_fpDbl_mod_NIST_P521Lbmi2:           # @mcl_fpDbl_mod_NIST_P521Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
+.LBB3_1:
+	movl	%edi, %esi
+	movl	112(%esp), %edi
+	movl	%esi, 20(%edi)
+	je	.LBB3_4
+.LBB3_3:
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%edi)
+	je	.LBB3_6
+.LBB3_5:
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	je	.LBB3_8
+.LBB3_7:
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 8(%edi)
+	je	.LBB3_10
+.LBB3_9:
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 4(%edi)
+	jne	.LBB3_11
+	jmp	.LBB3_12
+.Lfunc_end3:
+	.size	mcl_fp_mulNIST_P192Lbmi2, .Lfunc_end3-mcl_fp_mulNIST_P192Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_mod_NIST_P521Lbmi2    # -- Begin function mcl_fpDbl_mod_NIST_P521Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mod_NIST_P521Lbmi2,@function
+mcl_fpDbl_mod_NIST_P521Lbmi2:           # @mcl_fpDbl_mod_NIST_P521Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
 	subl	$60, %esp
-	movl	84(%esp), %ecx
-	movl	124(%ecx), %edx
-	movl	128(%ecx), %esi
-	movl	%esi, %eax
-	shldl	$23, %edx, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	120(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	116(%ecx), %edx
-	shldl	$23, %edx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	112(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	108(%ecx), %edx
-	shldl	$23, %edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	104(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	100(%ecx), %edx
-	shldl	$23, %edx, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	96(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	92(%ecx), %edx
+	movl	84(%esp), %edi
+	movl	124(%edi), %eax
+	movl	128(%edi), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	shldl	$23, %eax, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	120(%edi), %ecx
+	shldl	$23, %ecx, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	116(%edi), %eax
+	shldl	$23, %eax, %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	112(%edi), %ecx
+	shldl	$23, %ecx, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	108(%edi), %eax
+	shldl	$23, %eax, %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	104(%edi), %ecx
+	shldl	$23, %ecx, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	100(%edi), %eax
+	shldl	$23, %eax, %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	96(%edi), %ecx
+	shldl	$23, %ecx, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	92(%edi), %eax
+	shldl	$23, %eax, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	88(%edi), %ecx
+	shldl	$23, %ecx, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	84(%edi), %esi
+	shldl	$23, %esi, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	80(%edi), %ecx
+	shldl	$23, %ecx, %esi
+	movl	76(%edi), %eax
+	shldl	$23, %eax, %ecx
+	movl	72(%edi), %edx
 	shldl	$23, %edx, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	88(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	84(%ecx), %edi
-	shldl	$23, %edi, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	80(%ecx), %edx
-	shldl	$23, %edx, %edi
-	movl	76(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	72(%ecx), %ebx
-	shldl	$23, %ebx, %eax
-	movl	68(%ecx), %ebp
-	shldl	$23, %ebp, %ebx
-	shrl	$9, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	64(%ecx), %esi
-	shldl	$23, %esi, %ebp
-	andl	$511, %esi              # imm = 0x1FF
-	addl	(%ecx), %ebp
-	adcl	4(%ecx), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	adcl	8(%ecx), %eax
-	adcl	12(%ecx), %edx
-	adcl	16(%ecx), %edi
-	movl	28(%esp), %ebx          # 4-byte Reload
-	adcl	20(%ecx), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebx          # 4-byte Reload
-	adcl	24(%ecx), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebx          # 4-byte Reload
-	adcl	28(%ecx), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebx          # 4-byte Reload
-	adcl	32(%ecx), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	36(%ecx), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebx          # 4-byte Reload
-	adcl	40(%ecx), %ebx
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	24(%esp), %ebx          # 4-byte Reload
-	adcl	44(%ecx), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	48(%ecx), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	20(%esp), %ebx          # 4-byte Reload
-	adcl	52(%ecx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebx          # 4-byte Reload
-	adcl	56(%ecx), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	adcl	60(%ecx), %ebx
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	shrl	$9, %ecx
-	andl	$1, %ecx
-	addl	%ebp, %ecx
-	adcl	$0, 16(%esp)            # 4-byte Folded Spill
+	movl	68(%edi), %ebp
+	shldl	$23, %ebp, %edx
+	movl	%edx, %ebx
+	shrl	$9, 44(%esp)                    # 4-byte Folded Spill
+	movl	64(%edi), %edx
+	shldl	$23, %edx, %ebp
+	andl	$511, %edx                      # imm = 0x1FF
+	addl	(%edi), %ebp
+	adcl	4(%edi), %ebx
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	adcl	8(%edi), %eax
+	adcl	12(%edi), %ecx
+	adcl	16(%edi), %esi
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	20(%edi), %ebx
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	24(%edi), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	28(%edi), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	32(%edi), %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	adcl	36(%edi), %ebx
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	adcl	40(%edi), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	44(%edi), %ebx
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	adcl	48(%edi), %ebx
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	52(%edi), %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebx                  # 4-byte Reload
+	adcl	56(%edi), %ebx
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	movl	(%esp), %ebx                    # 4-byte Reload
+	adcl	60(%edi), %ebx
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, %edi
+	shrl	$9, %edi
+	andl	$1, %edi
+	addl	%ebp, %edi
+	adcl	$0, 48(%esp)                    # 4-byte Folded Spill
 	adcl	$0, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	%edi, %esi
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	adcl	$0, 48(%esp)            # 4-byte Folded Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	adcl	$0, 20(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	%ebx, %ebp
-	adcl	$0, %ebp
-	movl	12(%esp), %ebx          # 4-byte Reload
+	adcl	$0, %ecx
+	adcl	$0, %esi
+	adcl	$0, 4(%esp)                     # 4-byte Folded Spill
+	adcl	$0, 8(%esp)                     # 4-byte Folded Spill
+	adcl	$0, 12(%esp)                    # 4-byte Folded Spill
+	adcl	$0, 16(%esp)                    # 4-byte Folded Spill
+	adcl	$0, 20(%esp)                    # 4-byte Folded Spill
+	adcl	$0, 24(%esp)                    # 4-byte Folded Spill
+	adcl	$0, 28(%esp)                    # 4-byte Folded Spill
+	adcl	$0, 32(%esp)                    # 4-byte Folded Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
 	adcl	$0, %ebx
-	movl	%ecx, %edi
+	adcl	$0, 40(%esp)                    # 4-byte Folded Spill
+	movl	(%esp), %ebp                    # 4-byte Reload
+	adcl	$0, %ebp
+	adcl	$0, %edx
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
 	andl	%eax, %edi
-	andl	%edx, %edi
-	andl	%esi, %edi
-	andl	28(%esp), %edi          # 4-byte Folded Reload
-	andl	32(%esp), %edi          # 4-byte Folded Reload
-	andl	36(%esp), %edi          # 4-byte Folded Reload
-	andl	40(%esp), %edi          # 4-byte Folded Reload
-	andl	44(%esp), %edi          # 4-byte Folded Reload
-	andl	48(%esp), %edi          # 4-byte Folded Reload
-	andl	24(%esp), %edi          # 4-byte Folded Reload
-	andl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	20(%esp), %esi          # 4-byte Reload
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	andl	%ecx, %edi
+	movl	%esi, (%esp)                    # 4-byte Spill
 	andl	%esi, %edi
-	andl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, %edx
-	movl	16(%esp), %ebx          # 4-byte Reload
+	movl	48(%esp), %esi                  # 4-byte Reload
+	andl	4(%esp), %edi                   # 4-byte Folded Reload
+	andl	8(%esp), %edi                   # 4-byte Folded Reload
+	andl	12(%esp), %edi                  # 4-byte Folded Reload
+	andl	16(%esp), %edi                  # 4-byte Folded Reload
+	andl	20(%esp), %edi                  # 4-byte Folded Reload
+	andl	24(%esp), %edi                  # 4-byte Folded Reload
+	andl	28(%esp), %edi                  # 4-byte Folded Reload
+	andl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ebx, %eax
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	andl	%ebx, %edi
+	andl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ebp, %ecx
 	andl	%ebp, %edi
-	movl	%ebp, %eax
 	movl	%edx, %ebp
-	orl	$-512, %ebp             # imm = 0xFFFFFFFFFFFFFE00
+	orl	$-512, %ebp                     # imm = 0xFE00
 	andl	%edi, %ebp
-	andl	%ebx, %ebp
+	andl	%esi, %ebp
 	cmpl	$-1, %ebp
 	movl	80(%esp), %edi
 	je	.LBB4_1
-# BB#3:                                 # %nonzero
-	movl	%ecx, (%edi)
-	movl	%ebx, 4(%edi)
-	movl	(%esp), %ecx            # 4-byte Reload
-	movl	%ecx, 8(%edi)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 12(%edi)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 16(%edi)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 20(%edi)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%edi)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%edi)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%edi)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%edi)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%edi)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%edi)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%edi)
-	movl	%esi, 52(%edi)
-	movl	56(%esp), %ecx          # 4-byte Reload
+# %bb.3:                                # %nonzero
+	movl	%ecx, 60(%edi)
+	movl	40(%esp), %ecx                  # 4-byte Reload
 	movl	%ecx, 56(%edi)
-	movl	%eax, 60(%edi)
-	andl	$511, %edx              # imm = 0x1FF
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 52(%edi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%edi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 40(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 36(%edi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%edi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%edi)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 24(%edi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 20(%edi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 16(%edi)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edi)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edi)
+	movl	%esi, 4(%edi)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%edi)
+	andl	$511, %edx                      # imm = 0x1FF
 	movl	%edx, 64(%edi)
 	jmp	.LBB4_2
 .LBB4_1:                                # %zero
 	xorl	%eax, %eax
 	movl	$17, %ecx
-	rep;stosl
+	rep;stosl %eax, %es:(%edi)
 .LBB4_2:                                # %zero
 	addl	$60, %esp
 	popl	%esi
@@ -607,70179 +653,24306 @@ mcl_fpDbl_mod_NIST_P521Lbmi2:           # @mcl_fpDbl_mod_NIST_P521Lbmi2
 	retl
 .Lfunc_end4:
 	.size	mcl_fpDbl_mod_NIST_P521Lbmi2, .Lfunc_end4-mcl_fpDbl_mod_NIST_P521Lbmi2
-
-	.globl	mcl_fp_mulUnitPre1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre1Lbmi2,@function
-mcl_fp_mulUnitPre1Lbmi2:                # @mcl_fp_mulUnitPre1Lbmi2
-# BB#0:
-	movl	8(%esp), %eax
-	movl	(%eax), %edx
-	mulxl	12(%esp), %ecx, %eax
-	movl	4(%esp), %edx
-	movl	%ecx, (%edx)
-	movl	%eax, 4(%edx)
-	retl
-.Lfunc_end5:
-	.size	mcl_fp_mulUnitPre1Lbmi2, .Lfunc_end5-mcl_fp_mulUnitPre1Lbmi2
-
-	.globl	mcl_fpDbl_mulPre1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre1Lbmi2,@function
-mcl_fpDbl_mulPre1Lbmi2:                 # @mcl_fpDbl_mulPre1Lbmi2
-# BB#0:
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	8(%esp), %eax
-	mulxl	(%eax), %ecx, %eax
-	movl	4(%esp), %edx
-	movl	%ecx, (%edx)
-	movl	%eax, 4(%edx)
-	retl
-.Lfunc_end6:
-	.size	mcl_fpDbl_mulPre1Lbmi2, .Lfunc_end6-mcl_fpDbl_mulPre1Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre1Lbmi2,@function
-mcl_fpDbl_sqrPre1Lbmi2:                 # @mcl_fpDbl_sqrPre1Lbmi2
-# BB#0:
-	movl	8(%esp), %eax
-	movl	(%eax), %edx
-	mulxl	%edx, %ecx, %eax
-	movl	4(%esp), %edx
-	movl	%ecx, (%edx)
-	movl	%eax, 4(%edx)
-	retl
-.Lfunc_end7:
-	.size	mcl_fpDbl_sqrPre1Lbmi2, .Lfunc_end7-mcl_fpDbl_sqrPre1Lbmi2
-
-	.globl	mcl_fp_mont1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont1Lbmi2,@function
-mcl_fp_mont1Lbmi2:                      # @mcl_fp_mont1Lbmi2
-# BB#0:
+                                        # -- End function
+	.globl	mulPv192x32bmi2                 # -- Begin function mulPv192x32bmi2
+	.p2align	4, 0x90
+	.type	mulPv192x32bmi2,@function
+mulPv192x32bmi2:                        # @mulPv192x32bmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %edx
-	movl	20(%esp), %eax
-	mulxl	(%eax), %esi, %ecx
-	movl	24(%esp), %eax
-	movl	-4(%eax), %edx
-	imull	%esi, %edx
-	movl	(%eax), %edi
-	mulxl	%edi, %edx, %eax
-	addl	%esi, %edx
-	adcl	%ecx, %eax
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	%eax, %ecx
-	subl	%edi, %ecx
-	sbbl	$0, %edx
-	testb	$1, %dl
-	jne	.LBB8_2
-# BB#1:
+	subl	$12, %esp
+	movl	40(%esp), %edx
+	movl	36(%esp), %edi
+	mulxl	4(%edi), %ecx, %esi
+	mulxl	(%edi), %eax, %ebx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	addl	%ecx, %ebx
+	mulxl	8(%edi), %ebp, %eax
+	adcl	%esi, %ebp
+	mulxl	12(%edi), %esi, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	%eax, %esi
+	mulxl	16(%edi), %eax, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	32(%esp), %ecx
+	movl	8(%esp), %edi                   # 4-byte Reload
+	movl	%edi, (%ecx)
+	movl	%ebx, 4(%ecx)
+	movl	%ebp, 8(%ecx)
+	movl	%esi, 12(%ecx)
+	movl	%eax, 16(%ecx)
+	movl	36(%esp), %eax
+	mulxl	20(%eax), %eax, %edx
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 20(%ecx)
+	adcl	$0, %edx
+	movl	%edx, 24(%ecx)
 	movl	%ecx, %eax
-.LBB8_2:
-	movl	12(%esp), %ecx
-	movl	%eax, (%ecx)
+	addl	$12, %esp
 	popl	%esi
 	popl	%edi
-	retl
-.Lfunc_end8:
-	.size	mcl_fp_mont1Lbmi2, .Lfunc_end8-mcl_fp_mont1Lbmi2
-
-	.globl	mcl_fp_montNF1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF1Lbmi2,@function
-mcl_fp_montNF1Lbmi2:                    # @mcl_fp_montNF1Lbmi2
-# BB#0:
+	popl	%ebx
+	popl	%ebp
+	retl	$4
+.Lfunc_end5:
+	.size	mulPv192x32bmi2, .Lfunc_end5-mulPv192x32bmi2
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre6Lbmi2         # -- Begin function mcl_fp_mulUnitPre6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre6Lbmi2,@function
+mcl_fp_mulUnitPre6Lbmi2:                # @mcl_fp_mulUnitPre6Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %edx
-	movl	20(%esp), %eax
-	mulxl	(%eax), %esi, %ecx
-	movl	24(%esp), %eax
-	movl	-4(%eax), %edx
-	imull	%esi, %edx
-	movl	(%eax), %edi
-	mulxl	%edi, %edx, %eax
-	addl	%esi, %edx
-	adcl	%ecx, %eax
-	movl	%eax, %ecx
-	subl	%edi, %ecx
-	js	.LBB9_2
-# BB#1:
-	movl	%ecx, %eax
-.LBB9_2:
-	movl	12(%esp), %ecx
-	movl	%eax, (%ecx)
+	subl	$12, %esp
+	movl	40(%esp), %edx
+	movl	36(%esp), %edi
+	mulxl	4(%edi), %esi, %eax
+	mulxl	(%edi), %ecx, %ebx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	addl	%esi, %ebx
+	mulxl	8(%edi), %ebp, %ecx
+	adcl	%eax, %ebp
+	mulxl	12(%edi), %esi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	%ecx, %esi
+	mulxl	16(%edi), %eax, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	32(%esp), %ecx
+	movl	8(%esp), %edi                   # 4-byte Reload
+	movl	%edi, (%ecx)
+	movl	%ebx, 4(%ecx)
+	movl	%ebp, 8(%ecx)
+	movl	%esi, 12(%ecx)
+	movl	%eax, 16(%ecx)
+	movl	36(%esp), %eax
+	mulxl	20(%eax), %eax, %edx
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 20(%ecx)
+	adcl	$0, %edx
+	movl	%edx, 24(%ecx)
+	addl	$12, %esp
 	popl	%esi
 	popl	%edi
+	popl	%ebx
+	popl	%ebp
 	retl
-.Lfunc_end9:
-	.size	mcl_fp_montNF1Lbmi2, .Lfunc_end9-mcl_fp_montNF1Lbmi2
-
-	.globl	mcl_fp_montRed1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed1Lbmi2,@function
-mcl_fp_montRed1Lbmi2:                   # @mcl_fp_montRed1Lbmi2
-# BB#0:
+.Lfunc_end6:
+	.size	mcl_fp_mulUnitPre6Lbmi2, .Lfunc_end6-mcl_fp_mulUnitPre6Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre6Lbmi2          # -- Begin function mcl_fpDbl_mulPre6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre6Lbmi2,@function
+mcl_fpDbl_mulPre6Lbmi2:                 # @mcl_fpDbl_mulPre6Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	16(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	20(%esp), %eax
-	movl	-4(%eax), %edx
-	imull	%esi, %edx
-	movl	(%eax), %edi
-	mulxl	%edi, %edx, %eax
-	addl	%esi, %edx
-	adcl	4(%ecx), %eax
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	%eax, %ecx
-	subl	%edi, %ecx
-	sbbl	$0, %edx
-	testb	$1, %dl
-	jne	.LBB10_2
-# BB#1:
-	movl	%ecx, %eax
-.LBB10_2:
-	movl	12(%esp), %ecx
-	movl	%eax, (%ecx)
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end10:
-	.size	mcl_fp_montRed1Lbmi2, .Lfunc_end10-mcl_fp_montRed1Lbmi2
-
-	.globl	mcl_fp_addPre1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre1Lbmi2,@function
-mcl_fp_addPre1Lbmi2:                    # @mcl_fp_addPre1Lbmi2
-# BB#0:
-	movl	12(%esp), %eax
-	movl	(%eax), %eax
-	movl	4(%esp), %ecx
-	movl	8(%esp), %edx
-	addl	(%edx), %eax
-	movl	%eax, (%ecx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	retl
-.Lfunc_end11:
-	.size	mcl_fp_addPre1Lbmi2, .Lfunc_end11-mcl_fp_addPre1Lbmi2
-
-	.globl	mcl_fp_subPre1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre1Lbmi2,@function
-mcl_fp_subPre1Lbmi2:                    # @mcl_fp_subPre1Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	xorl	%eax, %eax
-	movl	8(%esp), %edx
-	movl	16(%esp), %esi
-	subl	(%esi), %ecx
-	movl	%ecx, (%edx)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	retl
-.Lfunc_end12:
-	.size	mcl_fp_subPre1Lbmi2, .Lfunc_end12-mcl_fp_subPre1Lbmi2
-
-	.globl	mcl_fp_shr1_1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_1Lbmi2,@function
-mcl_fp_shr1_1Lbmi2:                     # @mcl_fp_shr1_1Lbmi2
-# BB#0:
-	movl	8(%esp), %eax
-	movl	(%eax), %eax
-	shrl	%eax
-	movl	4(%esp), %ecx
-	movl	%eax, (%ecx)
-	retl
-.Lfunc_end13:
-	.size	mcl_fp_shr1_1Lbmi2, .Lfunc_end13-mcl_fp_shr1_1Lbmi2
-
-	.globl	mcl_fp_add1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add1Lbmi2,@function
-mcl_fp_add1Lbmi2:                       # @mcl_fp_add1Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %eax
-	movl	8(%esp), %ecx
-	movl	12(%esp), %edx
-	addl	(%edx), %eax
-	movl	%eax, (%ecx)
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	20(%esp), %esi
-	subl	(%esi), %eax
-	sbbl	$0, %edx
-	testb	$1, %dl
-	jne	.LBB14_2
-# BB#1:                                 # %nocarry
+	subl	$64, %esp
+	movl	88(%esp), %edi
+	movl	(%edi), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	92(%esp), %eax
+	movl	(%eax), %ebx
+	mulxl	%ebx, %eax, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	84(%esp), %ecx
 	movl	%eax, (%ecx)
-.LBB14_2:                               # %carry
-	popl	%esi
-	retl
-.Lfunc_end14:
-	.size	mcl_fp_add1Lbmi2, .Lfunc_end14-mcl_fp_add1Lbmi2
-
-	.globl	mcl_fp_addNF1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF1Lbmi2,@function
-mcl_fp_addNF1Lbmi2:                     # @mcl_fp_addNF1Lbmi2
-# BB#0:
-	movl	12(%esp), %eax
-	movl	(%eax), %eax
-	movl	8(%esp), %ecx
-	addl	(%ecx), %eax
-	movl	16(%esp), %edx
+	movl	20(%edi), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %eax, %ebp
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	16(%edi), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %eax, %ecx
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%edi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %edx, %eax
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	4(%edi), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %esi, %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	addl	4(%esp), %esi                   # 4-byte Folded Reload
+	movl	8(%edi), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	mulxl	%ebx, %edi, %ebx
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	$0, %ebp
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	92(%esp), %edx
+	movl	4(%edx), %edx
+	mulxl	40(%esp), %ebp, %eax            # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	addl	%esi, %ebp
+	mulxl	44(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	%edi, %eax
 	movl	%eax, %ecx
-	subl	(%edx), %ecx
-	js	.LBB15_2
-# BB#1:
-	movl	%ecx, %eax
-.LBB15_2:
-	movl	4(%esp), %ecx
-	movl	%eax, (%ecx)
-	retl
-.Lfunc_end15:
-	.size	mcl_fp_addNF1Lbmi2, .Lfunc_end15-mcl_fp_addNF1Lbmi2
-
-	.globl	mcl_fp_sub1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub1Lbmi2,@function
-mcl_fp_sub1Lbmi2:                       # @mcl_fp_sub1Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %eax
-	xorl	%edx, %edx
-	movl	8(%esp), %ecx
-	movl	16(%esp), %esi
-	subl	(%esi), %eax
-	movl	%eax, (%ecx)
-	sbbl	$0, %edx
-	testb	$1, %dl
-	jne	.LBB16_2
-# BB#1:                                 # %nocarry
-	popl	%esi
-	retl
-.LBB16_2:                               # %carry
-	movl	20(%esp), %edx
-	addl	(%edx), %eax
-	movl	%eax, (%ecx)
-	popl	%esi
-	retl
-.Lfunc_end16:
-	.size	mcl_fp_sub1Lbmi2, .Lfunc_end16-mcl_fp_sub1Lbmi2
-
-	.globl	mcl_fp_subNF1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF1Lbmi2,@function
-mcl_fp_subNF1Lbmi2:                     # @mcl_fp_subNF1Lbmi2
-# BB#0:
-	movl	8(%esp), %eax
-	movl	(%eax), %eax
-	movl	12(%esp), %ecx
-	subl	(%ecx), %eax
-	movl	%eax, %ecx
-	sarl	$31, %ecx
-	movl	16(%esp), %edx
-	andl	(%edx), %ecx
-	addl	%eax, %ecx
-	movl	4(%esp), %eax
-	movl	%ecx, (%eax)
-	retl
-.Lfunc_end17:
-	.size	mcl_fp_subNF1Lbmi2, .Lfunc_end17-mcl_fp_subNF1Lbmi2
-
-	.globl	mcl_fpDbl_add1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add1Lbmi2,@function
-mcl_fpDbl_add1Lbmi2:                    # @mcl_fpDbl_add1Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%esi
-	movl	20(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %eax
-	movl	16(%esp), %esi
-	addl	(%esi), %edx
-	movl	12(%esp), %ecx
-	adcl	4(%esi), %eax
-	movl	%edx, (%ecx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	24(%esp), %esi
-	movl	%eax, %edx
-	subl	(%esi), %edx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB18_2
-# BB#1:
-	movl	%edx, %eax
-.LBB18_2:
-	movl	%eax, 4(%ecx)
-	popl	%esi
-	popl	%ebx
-	retl
-.Lfunc_end18:
-	.size	mcl_fpDbl_add1Lbmi2, .Lfunc_end18-mcl_fpDbl_add1Lbmi2
-
-	.globl	mcl_fpDbl_sub1Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub1Lbmi2,@function
-mcl_fpDbl_sub1Lbmi2:                    # @mcl_fpDbl_sub1Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %eax
-	xorl	%ecx, %ecx
-	movl	16(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %eax
-	movl	8(%esp), %edx
-	movl	%esi, (%edx)
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	je	.LBB19_2
-# BB#1:
-	movl	20(%esp), %ecx
-	movl	(%ecx), %ecx
-.LBB19_2:
-	addl	%eax, %ecx
-	movl	%ecx, 4(%edx)
-	popl	%esi
-	retl
-.Lfunc_end19:
-	.size	mcl_fpDbl_sub1Lbmi2, .Lfunc_end19-mcl_fpDbl_sub1Lbmi2
-
-	.globl	mcl_fp_mulUnitPre2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre2Lbmi2,@function
-mcl_fp_mulUnitPre2Lbmi2:                # @mcl_fp_mulUnitPre2Lbmi2
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	20(%esp), %edx
-	movl	16(%esp), %eax
-	mulxl	4(%eax), %ecx, %esi
-	mulxl	(%eax), %eax, %edx
-	movl	12(%esp), %edi
-	movl	%eax, (%edi)
-	addl	%ecx, %edx
-	movl	%edx, 4(%edi)
-	adcl	$0, %esi
-	movl	%esi, 8(%edi)
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end20:
-	.size	mcl_fp_mulUnitPre2Lbmi2, .Lfunc_end20-mcl_fp_mulUnitPre2Lbmi2
-
-	.globl	mcl_fpDbl_mulPre2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre2Lbmi2,@function
-mcl_fpDbl_mulPre2Lbmi2:                 # @mcl_fpDbl_mulPre2Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	4(%ecx), %ecx
-	movl	28(%esp), %esi
-	movl	(%esi), %edi
-	movl	%ecx, %edx
-	mulxl	%edi, %ebx, %ebp
-	movl	%eax, %edx
-	mulxl	%edi, %edx, %edi
-	addl	%ebx, %edi
-	movl	20(%esp), %ebx
-	movl	%edx, (%ebx)
-	adcl	$0, %ebp
-	movl	4(%esi), %esi
-	movl	%eax, %edx
-	mulxl	%esi, %eax, %ebx
-	addl	%edi, %eax
+	mulxl	4(%esp), %eax, %esi             # 4-byte Folded Reload
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	%ebx, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %edi, %eax            # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
+	mulxl	%eax, %ebx, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	mulxl	52(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	setb	%dl
+	addl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 16(%esp)                  # 4-byte Folded Spill
+	movl	84(%esp), %eax
+	movl	%ebp, 4(%eax)
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movzbl	%dl, %eax
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	92(%esp), %eax
+	movl	8(%eax), %edx
+	mulxl	44(%esp), %edi, %ecx            # 4-byte Folded Reload
+	mulxl	40(%esp), %eax, %ebp            # 4-byte Folded Reload
+	addl	%edi, %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	mulxl	4(%esp), %ebp, %edi             # 4-byte Folded Reload
+	adcl	%ecx, %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	%edi, %ebp
+	movl	%ebp, %edi
+	mulxl	36(%esp), %ecx, %ebp            # 4-byte Folded Reload
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	mulxl	%ecx, %ebp, %edx
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	$0, %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	addl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 12(%esp)                  # 4-byte Folded Spill
+	movl	84(%esp), %edx
+	movl	%eax, 8(%edx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 20(%esp)                  # 4-byte Folded Spill
+	adcl	%ebx, %edi
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	adcl	%esi, 28(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	adcl	$0, 8(%esp)                     # 4-byte Folded Spill
+	movl	92(%esp), %eax
+	movl	12(%eax), %edx
+	mulxl	%ecx, %ecx, %eax
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	36(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	44(%esp), %ecx, %edi            # 4-byte Folded Reload
+	mulxl	40(%esp), %ebx, %eax            # 4-byte Folded Reload
+	addl	%ecx, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	mulxl	4(%esp), %eax, %ecx             # 4-byte Folded Reload
+	adcl	%edi, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %edi, %eax            # 4-byte Folded Reload
+	adcl	%ecx, %edi
+	adcl	%esi, %eax
+	movl	%eax, %esi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	60(%esp), %eax                  # 4-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	84(%esp), %ecx
+	movl	%ebx, 12(%ecx)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 32(%esp)                  # 4-byte Folded Spill
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	adcl	%ebp, %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	$0, %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	92(%esp), %eax
+	movl	16(%eax), %edx
+	mulxl	44(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	40(%esp), %edi, %esi            # 4-byte Folded Reload
+	addl	%ecx, %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	mulxl	4(%esp), %ebp, %ecx             # 4-byte Folded Reload
+	adcl	%eax, %ebp
+	movl	%ebp, %ebx
+	mulxl	48(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	mulxl	36(%esp), %esi, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	%ebp, %esi
+	mulxl	52(%esp), %ecx, %eax            # 4-byte Folded Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
 	movl	%ecx, %edx
-	mulxl	%esi, %edx, %ecx
-	adcl	%ebp, %edx
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	addl	%ebx, %edx
-	movl	20(%esp), %edi
-	movl	%eax, 4(%edi)
-	movl	%edx, 8(%edi)
+	adcl	$0, %eax
+	movl	%eax, %ecx
+	addl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	84(%esp), %eax
+	movl	%edi, 16(%eax)
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	56(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 20(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	92(%esp), %eax
+	movl	20(%eax), %edx
+	mulxl	44(%esp), %esi, %eax            # 4-byte Folded Reload
+	mulxl	40(%esp), %edi, %ebx            # 4-byte Folded Reload
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	adcl	$0, %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	addl	%esi, %ebx
+	mulxl	4(%esp), %edi, %ecx             # 4-byte Folded Reload
+	adcl	%eax, %edi
+	mulxl	48(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
 	adcl	%ecx, %esi
-	movl	%esi, 12(%edi)
+	mulxl	36(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	mulxl	52(%esp), %edx, %ecx            # 4-byte Folded Reload
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	adcl	$0, %ecx
+	addl	40(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	84(%esp), %ebx
+	movl	%ebp, 20(%ebx)
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	84(%esp), %ebp
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 24(%ebp)
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%edi, 28(%ebp)
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%esi, 32(%ebp)
+	movl	%eax, 36(%ebp)
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 40(%ebp)
+	adcl	$0, %ecx
+	movl	%ecx, 44(%ebp)
+	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end21:
-	.size	mcl_fpDbl_mulPre2Lbmi2, .Lfunc_end21-mcl_fpDbl_mulPre2Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre2Lbmi2,@function
-mcl_fpDbl_sqrPre2Lbmi2:                 # @mcl_fpDbl_sqrPre2Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	20(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	4(%ecx), %ecx
-	movl	16(%esp), %esi
-	movl	%eax, %edx
-	mulxl	%eax, %edx, %edi
-	movl	%edx, (%esi)
-	movl	%ecx, %edx
-	mulxl	%eax, %edx, %eax
-	addl	%edx, %edi
-	movl	%eax, %ebx
-	adcl	$0, %ebx
-	addl	%edx, %edi
-	movl	%ecx, %edx
-	mulxl	%ecx, %edx, %ecx
-	adcl	%ebx, %edx
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	addl	%eax, %edx
-	movl	%edi, 4(%esi)
-	movl	%edx, 8(%esi)
-	adcl	%ecx, %ebx
-	movl	%ebx, 12(%esi)
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end22:
-	.size	mcl_fpDbl_sqrPre2Lbmi2, .Lfunc_end22-mcl_fpDbl_sqrPre2Lbmi2
-
-	.globl	mcl_fp_mont2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont2Lbmi2,@function
-mcl_fp_mont2Lbmi2:                      # @mcl_fp_mont2Lbmi2
-# BB#0:
+.Lfunc_end7:
+	.size	mcl_fpDbl_mulPre6Lbmi2, .Lfunc_end7-mcl_fpDbl_mulPre6Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre6Lbmi2          # -- Begin function mcl_fpDbl_sqrPre6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre6Lbmi2,@function
+mcl_fpDbl_sqrPre6Lbmi2:                 # @mcl_fpDbl_sqrPre6Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$24, %esp
-	movl	48(%esp), %eax
-	movl	(%eax), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	4(%eax), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	52(%esp), %eax
-	movl	(%eax), %eax
-	mulxl	%eax, %ecx, %esi
+	subl	$176, %esp
+	movl	200(%esp), %eax
+	movl	(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	4(%eax), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	8(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	12(%eax), %edi
+	movl	20(%eax), %edx
+	movl	16(%eax), %ebx
+	mulxl	%ebx, %eax, %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	%edi, %eax, %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	mulxl	%ecx, %ecx, %eax
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	mulxl	%esi, %eax, %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	mulxl	(%esp), %eax, %esi              # 4-byte Folded Reload
+	movl	%esi, 128(%esp)                 # 4-byte Spill
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	mulxl	%edx, %ecx, %eax
+	addl	76(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 152(%esp)                 # 4-byte Spill
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 156(%esp)                 # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	8(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 160(%esp)                 # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 164(%esp)                 # 4-byte Spill
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 168(%esp)                 # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 172(%esp)                 # 4-byte Spill
+	movl	%ebx, %edx
+	mulxl	%edi, %ecx, %eax
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	mulxl	16(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	mulxl	20(%esp), %esi, %ebp            # 4-byte Folded Reload
+	movl	%esi, 104(%esp)                 # 4-byte Spill
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	mulxl	(%esp), %ecx, %eax              # 4-byte Folded Reload
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %edx, %ecx
+	movl	%eax, %ebx
+	addl	%esi, %ebx
+	movl	%ebx, 140(%esp)                 # 4-byte Spill
+	adcl	60(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 136(%esp)                 # 4-byte Spill
+	movl	48(%esp), %ebx                  # 4-byte Reload
+	adcl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 132(%esp)                 # 4-byte Spill
+	adcl	56(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 144(%esp)                 # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 148(%esp)                 # 4-byte Spill
+	adcl	$0, 52(%esp)                    # 4-byte Folded Spill
 	movl	%edi, %edx
-	mulxl	%eax, %edx, %edi
-	movl	%edx, (%esp)            # 4-byte Spill
-	addl	%ecx, %edi
+	mulxl	16(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	mulxl	20(%esp), %esi, %ebp            # 4-byte Folded Reload
+	movl	%esi, 88(%esp)                  # 4-byte Spill
+	movl	%ebp, 72(%esp)                  # 4-byte Spill
+	mulxl	(%esp), %ecx, %ebx              # 4-byte Folded Reload
+	movl	%ebx, 92(%esp)                  # 4-byte Spill
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	mulxl	%edi, %edx, %ecx
+	movl	%ebx, %edi
+	addl	%esi, %edi
+	movl	%edi, 116(%esp)                 # 4-byte Spill
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 112(%esp)                 # 4-byte Spill
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, %edx
+	movl	%edx, 120(%esp)                 # 4-byte Spill
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 124(%esp)                 # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	%eax, 56(%esp)                  # 4-byte Folded Spill
+	adcl	$0, 28(%esp)                    # 4-byte Folded Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	mulxl	%ebp, %esi, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	(%esp), %edi, %ecx              # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	mulxl	%edx, %edi, %edx
+	addl	%esi, %ecx
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	%ecx, %edi
+	movl	%edi, 100(%esp)                 # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	adcl	60(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 48(%esp)                  # 4-byte Folded Spill
+	adcl	$0, 40(%esp)                    # 4-byte Folded Spill
+	movl	%ebp, %edx
+	mulxl	%ebp, %edi, %ebx
+	movl	(%esp), %ebp                    # 4-byte Reload
+	mulxl	%ebp, %edx, %eax
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	addl	%eax, %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	adcl	%esi, %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	adcl	88(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	104(%esp), %eax                 # 4-byte Reload
+	adcl	%eax, 72(%esp)                  # 4-byte Folded Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ebp, %edx
+	mulxl	%edx, %edx, %eax
+	movl	196(%esp), %ecx
+	movl	%edx, (%ecx)
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	$0, %ebp
+	addl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %edx
+	movl	%edi, %eax
+	adcl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	68(%esp), %edi                  # 4-byte Folded Reload
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	adcl	80(%esp), %ebx                  # 4-byte Folded Reload
+	movl	108(%esp), %ecx                 # 4-byte Reload
+	adcl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	128(%esp), %esi                 # 4-byte Reload
 	adcl	$0, %esi
-	movl	56(%esp), %eax
-	movl	-4(%eax), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	imull	%ecx, %edx
-	movl	(%eax), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	4(%eax), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebp, %ecx
-	mulxl	%ebx, %edx, %eax
-	addl	%ebp, %eax
-	adcl	$0, %ecx
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	adcl	%edi, %eax
-	adcl	%esi, %ecx
-	movl	52(%esp), %edx
-	movl	4(%edx), %edx
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	mulxl	4(%esp), %esi, %ebp     # 4-byte Folded Reload
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	mulxl	8(%esp), %edi, %esi     # 4-byte Folded Reload
-	addl	4(%esp), %esi           # 4-byte Folded Reload
+	addl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %edx
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	adcl	4(%esp), %ebx                   # 4-byte Folded Reload
+	adcl	72(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	24(%esp), %esi                  # 4-byte Folded Reload
 	adcl	$0, %ebp
-	addl	%eax, %edi
-	adcl	%ecx, %esi
-	adcl	%ebx, %ebp
-	sbbl	%ecx, %ecx
-	movl	12(%esp), %edx          # 4-byte Reload
-	imull	%edi, %edx
+	addl	64(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	96(%esp), %edi                  # 4-byte Folded Reload
+	adcl	100(%esp), %ebx                 # 4-byte Folded Reload
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %eax
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	adcl	48(%esp), %ebp                  # 4-byte Folded Reload
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	68(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	adcl	116(%esp), %ebx                 # 4-byte Folded Reload
+	adcl	112(%esp), %eax                 # 4-byte Folded Reload
+	adcl	120(%esp), %esi                 # 4-byte Folded Reload
+	adcl	124(%esp), %ebp                 # 4-byte Folded Reload
+	adcl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %edi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	80(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	140(%esp), %eax                 # 4-byte Folded Reload
+	adcl	136(%esp), %esi                 # 4-byte Folded Reload
+	adcl	132(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	adcl	144(%esp), %edi                 # 4-byte Folded Reload
+	adcl	148(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	84(%esp), %eax                  # 4-byte Folded Reload
+	movl	196(%esp), %ecx
+	movl	(%esp), %ebp                    # 4-byte Reload
+	movl	%ebp, 4(%ecx)
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 8(%ecx)
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 12(%ecx)
+	movl	%ebx, 16(%ecx)
+	adcl	152(%esp), %esi                 # 4-byte Folded Reload
+	movl	%eax, 20(%ecx)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax                 # 4-byte Folded Reload
+	movl	%esi, 24(%ecx)
+	adcl	160(%esp), %edi                 # 4-byte Folded Reload
+	movl	%eax, 28(%ecx)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	164(%esp), %eax                 # 4-byte Folded Reload
+	movl	%edi, 32(%ecx)
+	movl	%eax, 36(%ecx)
 	movl	%edx, %eax
-	mulxl	16(%esp), %ebx, %edx    # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	mulxl	20(%esp), %edx, %eax    # 4-byte Folded Reload
-	addl	12(%esp), %edx          # 4-byte Folded Reload
+	adcl	168(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 40(%ecx)
+	movl	172(%esp), %eax                 # 4-byte Reload
 	adcl	$0, %eax
-	andl	$1, %ecx
-	addl	%edi, %ebx
-	adcl	%esi, %edx
-	adcl	%ebp, %eax
-	adcl	$0, %ecx
-	movl	%edx, %ebp
-	subl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, %esi
-	sbbl	20(%esp), %esi          # 4-byte Folded Reload
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB23_2
-# BB#1:
-	movl	%ebp, %edx
-.LBB23_2:
-	movl	44(%esp), %edi
-	movl	%edx, (%edi)
-	testb	%cl, %cl
-	jne	.LBB23_4
-# BB#3:
-	movl	%esi, %eax
-.LBB23_4:
-	movl	%eax, 4(%edi)
-	addl	$24, %esp
+	movl	%eax, 44(%ecx)
+	addl	$176, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end23:
-	.size	mcl_fp_mont2Lbmi2, .Lfunc_end23-mcl_fp_mont2Lbmi2
-
-	.globl	mcl_fp_montNF2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF2Lbmi2,@function
-mcl_fp_montNF2Lbmi2:                    # @mcl_fp_montNF2Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
+.Lfunc_end8:
+	.size	mcl_fpDbl_sqrPre6Lbmi2, .Lfunc_end8-mcl_fpDbl_sqrPre6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_mont6Lbmi2               # -- Begin function mcl_fp_mont6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mont6Lbmi2,@function
+mcl_fp_mont6Lbmi2:                      # @mcl_fp_mont6Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$20, %esp
-	movl	44(%esp), %eax
-	movl	(%eax), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	4(%eax), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	48(%esp), %eax
-	movl	(%eax), %eax
-	mulxl	%eax, %edi, %ebp
-	movl	%ecx, %edx
-	mulxl	%eax, %ecx, %esi
-	addl	%edi, %esi
-	adcl	$0, %ebp
-	movl	52(%esp), %eax
-	movl	-4(%eax), %ebx
-	movl	%ecx, %edx
-	imull	%ebx, %edx
-	movl	(%eax), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	mulxl	%eax, %edi, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	addl	%ecx, %edi
-	movl	52(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	mulxl	%eax, %edi, %edx
-	adcl	%esi, %edi
-	adcl	$0, %ebp
-	addl	(%esp), %edi            # 4-byte Folded Reload
-	adcl	%edx, %ebp
-	movl	48(%esp), %eax
-	movl	4(%eax), %edx
-	mulxl	4(%esp), %eax, %esi     # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	mulxl	8(%esp), %eax, %ecx     # 4-byte Folded Reload
-	addl	4(%esp), %ecx           # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	%edi, %eax
-	adcl	%ebp, %ecx
-	adcl	$0, %esi
-	imull	%eax, %ebx
-	movl	%ebx, %edx
-	movl	16(%esp), %ebp          # 4-byte Reload
-	mulxl	%ebp, %edx, %edi
-	addl	%eax, %edx
-	movl	%ebx, %edx
-	movl	12(%esp), %ebx          # 4-byte Reload
-	mulxl	%ebx, %eax, %edx
-	adcl	%ecx, %eax
+	subl	$100, %esp
+	movl	124(%esp), %ebx
+	movl	20(%ebx), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	128(%esp), %ecx
+	movl	(%ecx), %edi
+	mulxl	%edi, %eax, %esi
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	16(%ebx), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	mulxl	%edi, %eax, %ecx
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%ebx), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	mulxl	%edi, %edx, %eax
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	4(%ebx), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	mulxl	%edi, %ebp, %edx
+	movl	%ebp, 84(%esp)                  # 4-byte Spill
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	(%ebx), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	mulxl	%edi, %ebp, %edx
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	addl	84(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	8(%ebx), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	mulxl	%edi, %edi, %edx
+	adcl	44(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	adcl	48(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	52(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	88(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
 	adcl	$0, %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	132(%esp), %esi
+	movl	-4(%esi), %edx
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	imull	(%esp), %edx                    # 4-byte Folded Reload
+	movl	20(%esi), %ecx
+	movl	%ecx, 88(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ecx, %eax
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	8(%esi), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ebx, %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	4(%esi), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %edi, %ebp
+	movl	(%esi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %eax
 	addl	%edi, %eax
-	adcl	%edx, %esi
-	movl	%eax, %edx
-	subl	%ebp, %edx
-	movl	%esi, %ecx
-	sbbl	%ebx, %ecx
-	testl	%ecx, %ecx
-	js	.LBB24_2
-# BB#1:
-	movl	%edx, %eax
-.LBB24_2:
-	movl	40(%esp), %edx
-	movl	%eax, (%edx)
-	js	.LBB24_4
-# BB#3:
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	%ebx, %ebp
+	movl	%ebp, %edi
+	movl	12(%esi), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	mulxl	%eax, %eax, %ebx
+	adcl	80(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	16(%esi), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ebp, %eax
+	adcl	%ebx, %ebp
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %esi
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	$0, %edx
+	addl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 24(%esp)                  # 4-byte Folded Spill
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	%eax, 16(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	128(%esp), %ecx
+	movl	4(%ecx), %edx
+	mulxl	40(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	68(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	mulxl	72(%esp), %esi, %edi            # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	mulxl	60(%esp), %edi, %ebx            # 4-byte Folded Reload
+	setb	32(%esp)                        # 1-byte Folded Spill
+	addl	%esi, %ebx
+	mulxl	76(%esp), %esi, %edx            # 4-byte Folded Reload
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	adcl	56(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, %esi
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	$0, %edx
+	addl	24(%esp), %edi                  # 4-byte Folded Reload
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	%ebp, %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movzbl	32(%esp), %eax                  # 1-byte Folded Reload
+	adcl	%eax, %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	movl	92(%esp), %edx                  # 4-byte Reload
+	imull	%edi, %edx
+	mulxl	48(%esp), %esi, %eax            # 4-byte Folded Reload
+	mulxl	44(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	addl	%esi, %ecx
+	movl	%ecx, %ebp
+	mulxl	52(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	mulxl	84(%esp), %ecx, %eax            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	mulxl	80(%esp), %ecx, %esi            # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	%eax, %ecx
 	movl	%ecx, %esi
-.LBB24_4:
-	movl	%esi, 4(%edx)
-	addl	$20, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end24:
-	.size	mcl_fp_montNF2Lbmi2, .Lfunc_end24-mcl_fp_montNF2Lbmi2
-
-	.globl	mcl_fp_montRed2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed2Lbmi2,@function
-mcl_fp_montRed2Lbmi2:                   # @mcl_fp_montRed2Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$16, %esp
-	movl	44(%esp), %esi
-	movl	-4(%esi), %ecx
-	movl	(%esi), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	40(%esp), %eax
-	movl	(%eax), %ebx
+	mulxl	88(%esp), %ecx, %eax            # 4-byte Folded Reload
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	$0, %eax
+	addl	%edi, 8(%esp)                   # 4-byte Folded Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	adcl	%ebx, 16(%esp)                  # 4-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 32(%esp)                  # 4-byte Folded Spill
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movzbl	24(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	128(%esp), %ecx
+	movl	8(%ecx), %edx
+	mulxl	40(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	68(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	72(%esp), %ecx, %ebp            # 4-byte Folded Reload
+	mulxl	60(%esp), %eax, %edi            # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	addl	%ecx, %edi
+	mulxl	76(%esp), %esi, %ecx            # 4-byte Folded Reload
+	adcl	%ebp, %esi
+	mulxl	64(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	%ecx, %ebp
+	adcl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	28(%esp), %edx                  # 4-byte Folded Reload
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	$0, %ecx
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	addl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	setb	32(%esp)                        # 1-byte Folded Spill
+	movl	92(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	48(%esp), %esi, %eax            # 4-byte Folded Reload
+	mulxl	44(%esp), %edi, %ecx            # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	addl	%esi, %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, %eax
+	mulxl	84(%esp), %ecx, %edi            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	mulxl	80(%esp), %esi, %ebx            # 4-byte Folded Reload
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	adcl	%edi, %esi
+	movl	%esi, %ebx
+	mulxl	88(%esp), %edi, %edx            # 4-byte Folded Reload
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	adcl	$0, %edx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	addl	24(%esp), %esi                  # 4-byte Folded Reload
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	%esi, 16(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	%ebp, %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	adcl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movzbl	32(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	128(%esp), %ecx
+	movl	12(%ecx), %edx
+	mulxl	40(%esp), %eax, %ebx            # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	72(%esp), %ebp, %eax            # 4-byte Folded Reload
+	mulxl	60(%esp), %esi, %ecx            # 4-byte Folded Reload
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	addl	%ebp, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	mulxl	76(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	%esi, %ebp
+	adcl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %esi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
 	movl	%ebx, %edx
+	adcl	$0, %edx
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	addl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 4(%esp)                   # 4-byte Folded Spill
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 24(%esp)                  # 4-byte Folded Spill
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	%edi, %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	92(%esp), %edx                  # 4-byte Reload
 	imull	%ecx, %edx
-	movl	4(%esi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebp, %esi
-	mulxl	%edi, %edx, %eax
-	addl	%ebp, %eax
-	adcl	$0, %esi
-	addl	%ebx, %edx
-	movl	40(%esp), %edi
-	movl	12(%edi), %edx
-	adcl	4(%edi), %eax
-	adcl	8(%edi), %esi
+	mulxl	48(%esp), %esi, %eax            # 4-byte Folded Reload
+	mulxl	44(%esp), %ebx, %edi            # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	addl	%esi, %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %edi, %esi            # 4-byte Folded Reload
+	adcl	%eax, %edi
+	movl	%edi, %eax
+	mulxl	84(%esp), %edi, %ebx            # 4-byte Folded Reload
+	adcl	%esi, %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	mulxl	80(%esp), %esi, %edi            # 4-byte Folded Reload
+	adcl	%ebx, %esi
+	mulxl	88(%esp), %ebx, %edx            # 4-byte Folded Reload
+	adcl	%edi, %ebx
 	adcl	$0, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	sbbl	%ebx, %ebx
-	imull	%eax, %ecx
-	movl	%ecx, %edx
-	mulxl	8(%esp), %edi, %edx     # 4-byte Folded Reload
-	movl	%edx, (%esp)            # 4-byte Spill
+	addl	%ecx, 8(%esp)                   # 4-byte Folded Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	%ecx, 32(%esp)                  # 4-byte Folded Spill
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %edi
+	adcl	%ebp, 28(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movzbl	16(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	128(%esp), %ecx
+	movl	16(%ecx), %edx
+	mulxl	40(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	68(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	72(%esp), %ebp, %eax            # 4-byte Folded Reload
+	mulxl	60(%esp), %esi, %ecx            # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	addl	%ebp, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	mulxl	76(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	mulxl	64(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	%esi, %ebp
+	adcl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	$0, %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	addl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	%edi, 12(%esp)                  # 4-byte Folded Spill
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	%ebx, %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	setb	28(%esp)                        # 1-byte Folded Spill
+	movl	92(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	mulxl	48(%esp), %esi, %eax            # 4-byte Folded Reload
+	mulxl	44(%esp), %ebx, %edi            # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	addl	%esi, %edi
+	movl	%edi, %ebx
+	mulxl	52(%esp), %edi, %esi            # 4-byte Folded Reload
+	adcl	%eax, %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	mulxl	84(%esp), %edi, %ecx            # 4-byte Folded Reload
+	adcl	%esi, %edi
+	mulxl	80(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	%ecx, %esi
+	mulxl	88(%esp), %eax, %edx            # 4-byte Folded Reload
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	adcl	$0, %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	addl	%ecx, 8(%esp)                   # 4-byte Folded Spill
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	%ecx, 32(%esp)                  # 4-byte Folded Spill
+	adcl	%ebp, %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, %edi
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movzbl	28(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	128(%esp), %ecx
+	movl	20(%ecx), %edx
+	mulxl	72(%esp), %ebx, %ebp            # 4-byte Folded Reload
+	mulxl	60(%esp), %esi, %ecx            # 4-byte Folded Reload
+	addl	%ebx, %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	mulxl	40(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	mulxl	76(%esp), %ecx, %ebx            # 4-byte Folded Reload
+	adcl	%ebp, %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	mulxl	64(%esp), %eax, %edx            # 4-byte Folded Reload
+	adcl	%ebx, %eax
+	movl	%eax, %ebx
+	adcl	%ebp, %edx
+	movl	%edx, %eax
 	movl	%ecx, %edx
-	mulxl	12(%esp), %edx, %ebp    # 4-byte Folded Reload
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	adcl	$0, %ebp
-	andl	$1, %ebx
+	adcl	60(%esp), %edx                  # 4-byte Folded Reload
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%esi, %ebp
+	addl	16(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	%esi, 72(%esp)                  # 4-byte Folded Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	%esi, 76(%esp)                  # 4-byte Folded Spill
+	adcl	%edi, %ebx
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	setb	68(%esp)                        # 1-byte Folded Spill
+	movl	92(%esp), %edx                  # 4-byte Reload
+	imull	%ebp, %edx
+	mulxl	44(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %edi, %ecx            # 4-byte Folded Reload
 	addl	%eax, %edi
-	adcl	%esi, %edx
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
+	mulxl	52(%esp), %esi, %ebp            # 4-byte Folded Reload
+	adcl	%ecx, %esi
+	mulxl	84(%esp), %eax, %ebx            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	%edx, %ecx
+	mulxl	80(%esp), %ebp, %edx            # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	%ebx, %ebp
+	movl	%ecx, %edx
+	mulxl	88(%esp), %edx, %ecx            # 4-byte Folded Reload
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	adcl	$0, %ecx
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	addl	16(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	72(%esp), %edi                  # 4-byte Folded Reload
+	adcl	76(%esp), %esi                  # 4-byte Folded Reload
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	adcl	60(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	64(%esp), %edx                  # 4-byte Folded Reload
+	adcl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movzbl	68(%esp), %ebx                  # 1-byte Folded Reload
 	adcl	$0, %ebx
-	movl	%edx, %edi
-	subl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%ebp, %ecx
-	sbbl	12(%esp), %ecx          # 4-byte Folded Reload
+	movl	%edi, 68(%esp)                  # 4-byte Spill
+	subl	44(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	sbbl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%eax, %esi
+	movl	%ebp, %eax
+	movl	%edx, %ebp
+	sbbl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	sbbl	84(%esp), %edi                  # 4-byte Folded Reload
+	sbbl	80(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, %esi
+	sbbl	88(%esp), %esi                  # 4-byte Folded Reload
 	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB25_2
-# BB#1:
-	movl	%edi, %edx
-.LBB25_2:
-	movl	36(%esp), %esi
-	movl	%edx, (%esi)
-	testb	%bl, %bl
-	jne	.LBB25_4
-# BB#3:
-	movl	%ecx, %ebp
-.LBB25_4:
-	movl	%ebp, 4(%esi)
-	addl	$16, %esp
+	testb	$1, %bl
+	jne	.LBB9_1
+# %bb.2:
+	movl	120(%esp), %ecx
+	movl	%esi, 20(%ecx)
+	jne	.LBB9_3
+.LBB9_4:
+	movl	%edx, 16(%ecx)
+	jne	.LBB9_5
+.LBB9_6:
+	movl	%edi, 12(%ecx)
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	52(%esp), %eax                  # 4-byte Reload
+	jne	.LBB9_7
+.LBB9_8:
+	movl	%eax, 8(%ecx)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	jne	.LBB9_9
+.LBB9_10:
+	movl	%edx, 4(%ecx)
+	je	.LBB9_12
+.LBB9_11:
+	movl	68(%esp), %eax                  # 4-byte Reload
+.LBB9_12:
+	movl	%eax, (%ecx)
+	addl	$100, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end25:
-	.size	mcl_fp_montRed2Lbmi2, .Lfunc_end25-mcl_fp_montRed2Lbmi2
-
-	.globl	mcl_fp_addPre2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre2Lbmi2,@function
-mcl_fp_addPre2Lbmi2:                    # @mcl_fp_addPre2Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %eax
-	movl	12(%esp), %edx
-	addl	(%edx), %ecx
-	movl	8(%esp), %esi
-	adcl	4(%edx), %eax
-	movl	%ecx, (%esi)
-	movl	%eax, 4(%esi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	retl
-.Lfunc_end26:
-	.size	mcl_fp_addPre2Lbmi2, .Lfunc_end26-mcl_fp_addPre2Lbmi2
-
-	.globl	mcl_fp_subPre2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre2Lbmi2,@function
-mcl_fp_subPre2Lbmi2:                    # @mcl_fp_subPre2Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	xorl	%eax, %eax
-	movl	16(%esp), %esi
-	subl	(%esi), %ecx
-	sbbl	4(%esi), %edx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	%edx, 4(%esi)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	retl
-.Lfunc_end27:
-	.size	mcl_fp_subPre2Lbmi2, .Lfunc_end27-mcl_fp_subPre2Lbmi2
-
-	.globl	mcl_fp_shr1_2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_2Lbmi2,@function
-mcl_fp_shr1_2Lbmi2:                     # @mcl_fp_shr1_2Lbmi2
-# BB#0:
-	movl	8(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %eax
-	shrdl	$1, %eax, %ecx
-	movl	4(%esp), %edx
-	movl	%ecx, (%edx)
-	shrl	%eax
-	movl	%eax, 4(%edx)
-	retl
-.Lfunc_end28:
-	.size	mcl_fp_shr1_2Lbmi2, .Lfunc_end28-mcl_fp_shr1_2Lbmi2
-
-	.globl	mcl_fp_add2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add2Lbmi2,@function
-mcl_fp_add2Lbmi2:                       # @mcl_fp_add2Lbmi2
-# BB#0:
+.LBB9_1:
+	movl	%ecx, %esi
+	movl	120(%esp), %ecx
+	movl	%esi, 20(%ecx)
+	je	.LBB9_4
+.LBB9_3:
+	movl	%ebp, %edx
+	movl	%edx, 16(%ecx)
+	je	.LBB9_6
+.LBB9_5:
+	movl	%eax, %edi
+	movl	%edi, 12(%ecx)
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	52(%esp), %eax                  # 4-byte Reload
+	je	.LBB9_8
+.LBB9_7:
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ecx)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	je	.LBB9_10
+.LBB9_9:
+	movl	64(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%ecx)
+	jne	.LBB9_11
+	jmp	.LBB9_12
+.Lfunc_end9:
+	.size	mcl_fp_mont6Lbmi2, .Lfunc_end9-mcl_fp_mont6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montNF6Lbmi2             # -- Begin function mcl_fp_montNF6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF6Lbmi2,@function
+mcl_fp_montNF6Lbmi2:                    # @mcl_fp_montNF6Lbmi2
+# %bb.0:
+	pushl	%ebp
 	pushl	%ebx
+	pushl	%edi
 	pushl	%esi
-	movl	20(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	4(%ecx), %ecx
-	movl	16(%esp), %esi
-	addl	(%esi), %eax
-	movl	12(%esp), %edx
-	adcl	4(%esi), %ecx
-	movl	%eax, (%edx)
-	movl	%ecx, 4(%edx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	24(%esp), %esi
-	subl	(%esi), %eax
-	sbbl	4(%esi), %ecx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB29_2
-# BB#1:                                 # %nocarry
-	movl	%eax, (%edx)
-	movl	%ecx, 4(%edx)
-.LBB29_2:                               # %carry
-	popl	%esi
-	popl	%ebx
-	retl
-.Lfunc_end29:
-	.size	mcl_fp_add2Lbmi2, .Lfunc_end29-mcl_fp_add2Lbmi2
-
-	.globl	mcl_fp_addNF2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF2Lbmi2,@function
-mcl_fp_addNF2Lbmi2:                     # @mcl_fp_addNF2Lbmi2
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	20(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %eax
-	movl	16(%esp), %edx
-	addl	(%edx), %ecx
-	adcl	4(%edx), %eax
-	movl	24(%esp), %edi
+	subl	$84, %esp
+	movl	108(%esp), %ebx
+	movl	(%ebx), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	4(%ebx), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	112(%esp), %eax
+	movl	(%eax), %eax
+	mulxl	%eax, %ecx, %esi
+	movl	%edi, %edx
+	mulxl	%eax, %edx, %ebp
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	addl	%ecx, %ebp
+	movl	8(%ebx), %edx
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %edi
+	adcl	%esi, %ecx
 	movl	%ecx, %esi
-	subl	(%edi), %esi
-	movl	%eax, %edx
-	sbbl	4(%edi), %edx
-	testl	%edx, %edx
-	js	.LBB30_2
-# BB#1:
-	movl	%esi, %ecx
-.LBB30_2:
-	movl	12(%esp), %esi
-	movl	%ecx, (%esi)
-	js	.LBB30_4
-# BB#3:
-	movl	%edx, %eax
-.LBB30_4:
-	movl	%eax, 4(%esi)
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end30:
-	.size	mcl_fp_addNF2Lbmi2, .Lfunc_end30-mcl_fp_addNF2Lbmi2
-
-	.globl	mcl_fp_sub2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub2Lbmi2,@function
-mcl_fp_sub2Lbmi2:                       # @mcl_fp_sub2Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	20(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %eax
-	xorl	%ebx, %ebx
-	movl	24(%esp), %edx
-	subl	(%edx), %ecx
-	sbbl	4(%edx), %eax
-	movl	16(%esp), %edx
-	movl	%ecx, (%edx)
-	movl	%eax, 4(%edx)
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	je	.LBB31_2
-# BB#1:                                 # %carry
-	movl	28(%esp), %esi
-	movl	4(%esi), %edi
-	addl	(%esi), %ecx
-	movl	%ecx, (%edx)
-	adcl	%eax, %edi
-	movl	%edi, 4(%edx)
-.LBB31_2:                               # %nocarry
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end31:
-	.size	mcl_fp_sub2Lbmi2, .Lfunc_end31-mcl_fp_sub2Lbmi2
-
-	.globl	mcl_fp_subNF2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF2Lbmi2,@function
-mcl_fp_subNF2Lbmi2:                     # @mcl_fp_subNF2Lbmi2
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %eax
-	movl	20(%esp), %edx
-	subl	(%edx), %ecx
-	sbbl	4(%edx), %eax
-	movl	%eax, %edx
-	sarl	$31, %edx
-	movl	24(%esp), %esi
-	movl	4(%esi), %edi
-	andl	%edx, %edi
-	andl	(%esi), %edx
-	addl	%ecx, %edx
-	movl	12(%esp), %ecx
-	movl	%edx, (%ecx)
-	adcl	%eax, %edi
-	movl	%edi, 4(%ecx)
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end32:
-	.size	mcl_fp_subNF2Lbmi2, .Lfunc_end32-mcl_fp_subNF2Lbmi2
-
-	.globl	mcl_fpDbl_add2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add2Lbmi2,@function
-mcl_fpDbl_add2Lbmi2:                    # @mcl_fpDbl_add2Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	28(%esp), %edx
-	movl	12(%edx), %esi
-	movl	24(%esp), %edi
-	movl	12(%edi), %eax
-	movl	8(%edx), %ecx
-	movl	(%edx), %ebx
-	movl	4(%edx), %ebp
-	addl	(%edi), %ebx
-	adcl	4(%edi), %ebp
-	movl	20(%esp), %edx
-	adcl	8(%edi), %ecx
-	movl	%ebx, (%edx)
-	movl	%ebp, 4(%edx)
+	movl	12(%ebx), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	adcl	%edi, %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	16(%ebx), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %edi
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %eax
+	adcl	%edi, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	116(%esp), %ebx
+	movl	-4(%ebx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	imull	%edi, %edx
+	movl	(%ebx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	addl	%edi, %ecx
+	movl	4(%ebx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	mulxl	%eax, %edi, %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	%ebp, %edi
+	movl	8(%ebx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	mulxl	%eax, %eax, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
 	adcl	%esi, %eax
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	32(%esp), %ebp
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%ebx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	mulxl	%eax, %esi, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	60(%esp), %esi                  # 4-byte Folded Reload
+	movl	16(%ebx), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	20(%ebx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ebp, %eax
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	%edi, 8(%esp)                   # 4-byte Folded Spill
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	adcl	%eax, %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	112(%esp), %eax
+	movl	4(%eax), %edx
+	mulxl	36(%esp), %ecx, %esi            # 4-byte Folded Reload
+	mulxl	40(%esp), %ebp, %eax            # 4-byte Folded Reload
+	addl	%ecx, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	80(%esp), %ecx, %edi            # 4-byte Folded Reload
+	adcl	%esi, %ecx
 	movl	%ecx, %esi
-	subl	(%ebp), %esi
-	movl	%eax, %edi
-	sbbl	4(%ebp), %edi
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB33_2
-# BB#1:
+	mulxl	52(%esp), %eax, %ebx            # 4-byte Folded Reload
+	adcl	%edi, %eax
+	mulxl	76(%esp), %ecx, %edi            # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	%ebx, %ecx
+	movl	%ecx, %edi
+	mulxl	48(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	$0, %ecx
+	movl	%ecx, %edx
+	addl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	$0, %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%ebp, %esi
+	imull	%ebp, %edx
+	mulxl	72(%esp), %ebp, %edi            # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	addl	%esi, %ebp
+	mulxl	44(%esp), %ebp, %esi            # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	%ecx, %ebp
+	movl	%ebp, %esi
+	mulxl	64(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, %ecx
+	mulxl	68(%esp), %ebp, %edi            # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	%eax, %ebp
+	movl	%ebp, %eax
+	mulxl	60(%esp), %ebp, %edi            # 4-byte Folded Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	mulxl	56(%esp), %edi, %edx            # 4-byte Folded Reload
+	adcl	%ebx, %edi
+	movl	%edi, %ebx
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	$0, %edi
+	addl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	%edx, %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	112(%esp), %eax
+	movl	8(%eax), %edx
+	mulxl	36(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	40(%esp), %esi, %edi            # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	addl	%ecx, %edi
+	mulxl	80(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	adcl	%eax, %ebx
+	mulxl	52(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	mulxl	76(%esp), %ecx, %ebp            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	mulxl	48(%esp), %edx, %eax            # 4-byte Folded Reload
+	adcl	%ebp, %edx
+	adcl	$0, %eax
+	movl	%eax, %ebp
+	movl	12(%esp), %esi                  # 4-byte Reload
+	addl	8(%esp), %esi                   # 4-byte Folded Reload
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	$0, %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	imull	%esi, %edx
+	mulxl	72(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	addl	%esi, %ebp
+	mulxl	44(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	%edi, %ebp
+	movl	%ebp, %edi
+	mulxl	64(%esp), %esi, %ecx            # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	%ebx, %esi
+	mulxl	68(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	%eax, %ebp
+	movl	%ebp, %ecx
+	mulxl	60(%esp), %ebp, %eax            # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, %eax
+	mulxl	56(%esp), %ebp, %ebx            # 4-byte Folded Reload
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	adcl	%ebx, %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	112(%esp), %eax
+	movl	12(%eax), %edx
+	mulxl	36(%esp), %eax, %ecx            # 4-byte Folded Reload
+	mulxl	40(%esp), %ebp, %esi            # 4-byte Folded Reload
+	addl	%eax, %esi
+	mulxl	80(%esp), %eax, %edi            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	mulxl	52(%esp), %ecx, %ebx            # 4-byte Folded Reload
+	adcl	%edi, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	mulxl	76(%esp), %ecx, %edi            # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	%ebx, %ecx
+	movl	%ecx, %edi
+	mulxl	48(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	$0, %ecx
+	addl	4(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	$0, %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%ebp, %ecx
+	imull	%ebp, %edx
+	mulxl	72(%esp), %ebp, %eax            # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	addl	%ecx, %ebp
+	mulxl	44(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	%esi, %ecx
+	mulxl	64(%esp), %ebp, %eax            # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	mulxl	68(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, %eax
+	mulxl	60(%esp), %esi, %edi            # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	mulxl	56(%esp), %esi, %edx            # 4-byte Folded Reload
+	adcl	%ebx, %esi
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	$0, %edi
+	addl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 4(%esp)                   # 4-byte Folded Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	%edx, %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	112(%esp), %eax
+	movl	16(%eax), %edx
+	mulxl	36(%esp), %eax, %ecx            # 4-byte Folded Reload
+	mulxl	40(%esp), %ebp, %edi            # 4-byte Folded Reload
+	addl	%eax, %edi
+	mulxl	80(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	mulxl	52(%esp), %ecx, %ebx            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	mulxl	76(%esp), %ecx, %esi            # 4-byte Folded Reload
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	%ebx, %ecx
+	movl	%ecx, %esi
+	mulxl	48(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	$0, %ecx
+	addl	20(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	$0, %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%ebp, %ecx
+	imull	%ebp, %edx
+	mulxl	72(%esp), %ebp, %eax            # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	addl	%ecx, %ebp
+	mulxl	44(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	%edi, %ecx
+	mulxl	64(%esp), %ebp, %eax            # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	mulxl	68(%esp), %edi, %eax            # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
 	movl	%edi, %eax
-.LBB33_2:
-	testb	%bl, %bl
-	jne	.LBB33_4
-# BB#3:
-	movl	%esi, %ecx
-.LBB33_4:
-	movl	%ecx, 8(%edx)
-	movl	%eax, 12(%edx)
+	mulxl	60(%esp), %edi, %esi            # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	mulxl	56(%esp), %esi, %edx            # 4-byte Folded Reload
+	adcl	%ebx, %esi
+	movl	%esi, %ebx
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	addl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	%edx, %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	112(%esp), %eax
+	movl	20(%eax), %edx
+	mulxl	36(%esp), %esi, %ecx            # 4-byte Folded Reload
+	mulxl	40(%esp), %edi, %eax            # 4-byte Folded Reload
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	addl	%esi, %eax
+	movl	%eax, %esi
+	mulxl	80(%esp), %eax, %edi            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, %ecx
+	mulxl	52(%esp), %eax, %ebx            # 4-byte Folded Reload
+	adcl	%edi, %eax
+	movl	%eax, %edi
+	mulxl	76(%esp), %eax, %ebp            # 4-byte Folded Reload
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	%ebx, %eax
+	movl	%eax, %ebx
+	mulxl	48(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	adcl	$0, %ebp
+	movl	40(%esp), %edx                  # 4-byte Reload
+	addl	8(%esp), %edx                   # 4-byte Folded Reload
+	adcl	4(%esp), %esi                   # 4-byte Folded Reload
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	$0, %ebp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %ecx
+	imull	%edx, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%eax, %edx
+	mulxl	72(%esp), %edx, %edi            # 4-byte Folded Reload
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	addl	%ecx, %edx
+	movl	%eax, %edx
+	mulxl	44(%esp), %ecx, %edx            # 4-byte Folded Reload
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	adcl	%esi, %ecx
+	movl	%eax, %edx
+	mulxl	64(%esp), %edi, %edx            # 4-byte Folded Reload
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%eax, %edx
+	mulxl	68(%esp), %esi, %edx            # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%eax, %edx
+	mulxl	60(%esp), %eax, %edx            # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	adcl	%ebx, %eax
+	movl	32(%esp), %edx                  # 4-byte Reload
+	mulxl	56(%esp), %edx, %ebx            # 4-byte Folded Reload
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %edx                  # 4-byte Folded Reload
+	adcl	$0, %ebp
+	addl	36(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	80(%esp), %edi                  # 4-byte Folded Reload
+	adcl	76(%esp), %esi                  # 4-byte Folded Reload
+	adcl	48(%esp), %eax                  # 4-byte Folded Reload
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	subl	72(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	sbbl	44(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 72(%esp)                  # 4-byte Spill
+	movl	%ebp, %edi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	sbbl	64(%esp), %esi                  # 4-byte Folded Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%eax, %ecx
+	sbbl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	sbbl	60(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ebp, %eax
+	sbbl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %ebp
+	sarl	$31, %ebp
+	testl	%ebp, %ebp
+	js	.LBB10_1
+# %bb.2:
+	movl	104(%esp), %edi
+	movl	%eax, 20(%edi)
+	js	.LBB10_3
+.LBB10_4:
+	movl	%edx, 16(%edi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	js	.LBB10_5
+.LBB10_6:
+	movl	%ecx, 12(%edi)
+	js	.LBB10_7
+.LBB10_8:
+	movl	%esi, 8(%edi)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	js	.LBB10_9
+.LBB10_10:
+	movl	%ecx, 4(%edi)
+	jns	.LBB10_12
+.LBB10_11:
+	movl	40(%esp), %eax                  # 4-byte Reload
+.LBB10_12:
+	movl	%eax, (%edi)
+	addl	$84, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end33:
-	.size	mcl_fpDbl_add2Lbmi2, .Lfunc_end33-mcl_fpDbl_add2Lbmi2
-
-	.globl	mcl_fpDbl_sub2Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub2Lbmi2,@function
-mcl_fpDbl_sub2Lbmi2:                    # @mcl_fpDbl_sub2Lbmi2
-# BB#0:
+.LBB10_1:
+	movl	%edi, %eax
+	movl	104(%esp), %edi
+	movl	%eax, 20(%edi)
+	jns	.LBB10_4
+.LBB10_3:
+	movl	68(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 16(%edi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	jns	.LBB10_6
+.LBB10_5:
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%edi)
+	jns	.LBB10_8
+.LBB10_7:
+	movl	44(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%edi)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB10_10
+.LBB10_9:
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	js	.LBB10_11
+	jmp	.LBB10_12
+.Lfunc_end10:
+	.size	mcl_fp_montNF6Lbmi2, .Lfunc_end10-mcl_fp_montNF6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRed6Lbmi2            # -- Begin function mcl_fp_montRed6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed6Lbmi2,@function
+mcl_fp_montRed6Lbmi2:                   # @mcl_fp_montRed6Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%ebx, %ebx
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %eax
-	sbbl	8(%edx), %eax
-	movl	12(%edx), %ebp
-	movl	12(%ecx), %edx
-	movl	20(%esp), %ecx
-	movl	%esi, (%ecx)
-	movl	%edi, 4(%ecx)
-	sbbl	%ebp, %edx
-	movl	32(%esp), %edi
-	movl	(%edi), %esi
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB34_1
-# BB#2:
-	xorl	%edi, %edi
-	jmp	.LBB34_3
-.LBB34_1:
-	movl	4(%edi), %edi
-.LBB34_3:
-	testb	%bl, %bl
-	jne	.LBB34_5
-# BB#4:
-	xorl	%esi, %esi
-.LBB34_5:
-	addl	%eax, %esi
-	movl	%esi, 8(%ecx)
-	adcl	%edx, %edi
-	movl	%edi, 12(%ecx)
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end34:
-	.size	mcl_fpDbl_sub2Lbmi2, .Lfunc_end34-mcl_fpDbl_sub2Lbmi2
-
-	.globl	mcl_fp_mulUnitPre3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre3Lbmi2,@function
-mcl_fp_mulUnitPre3Lbmi2:                # @mcl_fp_mulUnitPre3Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %edx
-	movl	20(%esp), %eax
-	mulxl	4(%eax), %esi, %ecx
-	mulxl	(%eax), %edi, %ebx
+	subl	$64, %esp
+	movl	92(%esp), %esi
+	movl	-4(%esi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	88(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	imull	%eax, %edx
+	movl	8(%esi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	mulxl	%eax, %edi, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esi), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ebp, %eax
+	movl	(%esi), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ecx, %ebx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	addl	%ebp, %ebx
+	adcl	%edi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	12(%esi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %eax
+	adcl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esi), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	mulxl	%edi, %edi, %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	%eax, %edi
+	movl	20(%esi), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	mulxl	%eax, %eax, %ebp
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	adcl	$0, %ebp
+	movl	4(%esp), %edx                   # 4-byte Reload
+	addl	8(%esp), %edx                   # 4-byte Folded Reload
+	movl	88(%esp), %edx
+	adcl	4(%edx), %ebx
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	8(%edx), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	12(%edx), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	16(%edx), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	adcl	20(%edx), %eax
+	adcl	24(%edx), %ebp
+	setb	24(%esp)                        # 1-byte Folded Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	48(%esp), %ecx, %esi            # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %ecx, %edi            # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	addl	%esi, %ecx
+	mulxl	36(%esp), %esi, %edi            # 4-byte Folded Reload
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	mulxl	44(%esp), %esi, %edi            # 4-byte Folded Reload
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	mulxl	40(%esp), %esi, %edi            # 4-byte Folded Reload
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, %edi
+	mulxl	60(%esp), %esi, %edx            # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movzbl	24(%esp), %edx                  # 1-byte Folded Reload
+	adcl	56(%esp), %edx                  # 4-byte Folded Reload
+	addl	%ebx, 28(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 8(%esp)                   # 4-byte Folded Spill
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 4(%esp)                   # 4-byte Folded Spill
+	adcl	%eax, %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	%ebp, %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	88(%esp), %eax
+	adcl	28(%eax), %edx
+	movl	%edx, %esi
+	setb	20(%esp)                        # 1-byte Folded Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	mulxl	48(%esp), %edi, %eax            # 4-byte Folded Reload
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %edi, %ebp            # 4-byte Folded Reload
+	addl	%eax, %edi
+	mulxl	36(%esp), %eax, %ebx            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	44(%esp), %eax, %ebp            # 4-byte Folded Reload
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	adcl	%ebx, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	mulxl	40(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	mulxl	60(%esp), %ebx, %edx            # 4-byte Folded Reload
+	adcl	%ebp, %ebx
+	movzbl	20(%esp), %ebp                  # 1-byte Folded Reload
+	adcl	%edx, %ebp
+	addl	%ecx, 24(%esp)                  # 4-byte Folded Spill
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	%ecx, (%esp)                    # 4-byte Folded Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 16(%esp)                  # 4-byte Folded Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	%esi, %ebx
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	movl	88(%esp), %ecx
+	adcl	32(%ecx), %ebp
+	setb	20(%esp)                        # 1-byte Folded Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	imull	%edi, %edx
+	mulxl	48(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %ebx, %eax            # 4-byte Folded Reload
 	addl	%esi, %ebx
-	mulxl	8(%eax), %eax, %edx
-	movl	16(%esp), %esi
-	movl	%edi, (%esi)
-	movl	%ebx, 4(%esi)
+	mulxl	36(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	mulxl	44(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	%esi, %ecx
+	movl	%ecx, %esi
+	mulxl	40(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %eax
+	mulxl	60(%esp), %ecx, %edx            # 4-byte Folded Reload
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movzbl	20(%esp), %ecx                  # 1-byte Folded Reload
+	adcl	%edx, %ecx
+	addl	%edi, 24(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 8(%esp)                   # 4-byte Folded Spill
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	%ebp, 12(%esp)                  # 4-byte Folded Spill
+	movl	88(%esp), %eax
+	adcl	36(%eax), %ecx
+	setb	20(%esp)                        # 1-byte Folded Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	48(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %eax, %edi            # 4-byte Folded Reload
+	addl	%esi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	36(%esp), %esi, %eax            # 4-byte Folded Reload
+	adcl	%edi, %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	mulxl	44(%esp), %esi, %edi            # 4-byte Folded Reload
+	adcl	%eax, %esi
+	movl	%esi, %ebp
+	mulxl	40(%esp), %esi, %eax            # 4-byte Folded Reload
+	adcl	%edi, %esi
+	movl	%esi, %edi
+	mulxl	60(%esp), %esi, %edx            # 4-byte Folded Reload
+	adcl	%eax, %esi
+	movl	%esi, %eax
+	movzbl	20(%esp), %esi                  # 1-byte Folded Reload
+	adcl	%edx, %esi
+	movl	%esi, %edx
+	addl	%ebx, 24(%esp)                  # 4-byte Folded Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 4(%esp)                   # 4-byte Folded Spill
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
 	adcl	%ecx, %eax
-	movl	%eax, 8(%esi)
-	adcl	$0, %edx
-	movl	%edx, 12(%esi)
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	88(%esp), %ecx
+	adcl	40(%ecx), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	imull	%esi, %edx
+	mulxl	48(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %ecx, %edi            # 4-byte Folded Reload
+	addl	%esi, %ecx
+	mulxl	36(%esp), %ebp, %esi            # 4-byte Folded Reload
+	adcl	%edi, %ebp
+	mulxl	44(%esp), %ebx, %eax            # 4-byte Folded Reload
+	adcl	%esi, %ebx
+	movl	%edx, %edi
+	mulxl	40(%esp), %esi, %edx            # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	%eax, %esi
+	movl	%edi, %edx
+	mulxl	60(%esp), %edi, %eax            # 4-byte Folded Reload
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movzbl	24(%esp), %edx                  # 1-byte Folded Reload
+	adcl	%eax, %edx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	addl	(%esp), %eax                    # 4-byte Folded Reload
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	movl	88(%esp), %eax
+	adcl	44(%eax), %edx
+	xorl	%eax, %eax
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	subl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	%ebp, %ecx
+	movl	%ebx, %ebp
+	sbbl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	%ebx, %ecx
+	sbbl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %ecx
+	sbbl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	sbbl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edx, %ebx
+	sbbl	60(%esp), %ebx                  # 4-byte Folded Reload
+	sbbl	%eax, %eax
+	testb	$1, %al
+	jne	.LBB11_1
+# %bb.2:
+	movl	84(%esp), %eax
+	movl	%ebx, 20(%eax)
+	jne	.LBB11_3
+.LBB11_4:
+	movl	%edi, 16(%eax)
+	movl	52(%esp), %edx                  # 4-byte Reload
+	jne	.LBB11_5
+.LBB11_6:
+	movl	%ecx, 12(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB11_7
+.LBB11_8:
+	movl	%ecx, 8(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB11_9
+.LBB11_10:
+	movl	%edx, 4(%eax)
+	je	.LBB11_12
+.LBB11_11:
+	movl	32(%esp), %ecx                  # 4-byte Reload
+.LBB11_12:
+	movl	%ecx, (%eax)
+	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
+	popl	%ebp
 	retl
-.Lfunc_end35:
-	.size	mcl_fp_mulUnitPre3Lbmi2, .Lfunc_end35-mcl_fp_mulUnitPre3Lbmi2
-
-	.globl	mcl_fpDbl_mulPre3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre3Lbmi2,@function
-mcl_fpDbl_mulPre3Lbmi2:                 # @mcl_fpDbl_mulPre3Lbmi2
-# BB#0:
+.LBB11_1:
+	movl	%edx, %ebx
+	movl	84(%esp), %eax
+	movl	%ebx, 20(%eax)
+	je	.LBB11_4
+.LBB11_3:
+	movl	44(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 16(%eax)
+	movl	52(%esp), %edx                  # 4-byte Reload
+	je	.LBB11_6
+.LBB11_5:
+	movl	%esi, %ecx
+	movl	%ecx, 12(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	je	.LBB11_8
+.LBB11_7:
+	movl	%ebp, %ecx
+	movl	%ecx, 8(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	je	.LBB11_10
+.LBB11_9:
+	movl	(%esp), %edx                    # 4-byte Reload
+	movl	%edx, 4(%eax)
+	jne	.LBB11_11
+	jmp	.LBB11_12
+.Lfunc_end11:
+	.size	mcl_fp_montRed6Lbmi2, .Lfunc_end11-mcl_fp_montRed6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRedNF6Lbmi2          # -- Begin function mcl_fp_montRedNF6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF6Lbmi2,@function
+mcl_fp_montRedNF6Lbmi2:                 # @mcl_fp_montRedNF6Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$16, %esp
-	movl	40(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	4(%ecx), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi
-	movl	(%esi), %edi
-	mulxl	%edi, %ebx, %ebp
-	movl	%eax, %edx
-	movl	%eax, %esi
-	mulxl	%edi, %edx, %eax
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	addl	%ebx, %eax
-	movl	8(%ecx), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	mulxl	%edi, %ebx, %edi
+	subl	$64, %esp
+	movl	92(%esp), %esi
+	movl	-4(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	88(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	imull	%eax, %edx
+	movl	8(%esi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	mulxl	%eax, %edi, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	4(%esi), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ebp, %eax
+	movl	(%esi), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ecx, %ebx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	addl	%ebp, %ebx
+	adcl	%edi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	12(%esi), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %eax
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esi), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	mulxl	%edi, %edi, %ebp
+	adcl	%eax, %edi
+	movl	20(%esi), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	mulxl	%eax, %eax, %edx
+	adcl	%ebp, %eax
+	adcl	$0, %edx
+	movl	%edx, %ebp
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	88(%esp), %edx
+	adcl	4(%edx), %ebx
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	8(%edx), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	12(%edx), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	16(%edx), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	20(%edx), %eax
+	adcl	24(%edx), %ebp
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	setb	8(%esp)                         # 1-byte Folded Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	44(%esp), %ecx, %esi            # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %ecx, %ebp            # 4-byte Folded Reload
+	addl	%esi, %ecx
+	mulxl	32(%esp), %esi, %edi            # 4-byte Folded Reload
+	adcl	%ebp, %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	mulxl	40(%esp), %esi, %ebp            # 4-byte Folded Reload
+	adcl	%edi, %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	mulxl	36(%esp), %esi, %edi            # 4-byte Folded Reload
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	adcl	%ebp, %esi
+	mulxl	60(%esp), %ebp, %edi            # 4-byte Folded Reload
+	adcl	52(%esp), %ebp                  # 4-byte Folded Reload
+	movzbl	8(%esp), %edx                   # 1-byte Folded Reload
+	adcl	%edi, %edx
+	addl	%ebx, 12(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	%edi, 4(%esp)                   # 4-byte Folded Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	%edi, 20(%esp)                  # 4-byte Folded Spill
+	adcl	%eax, %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	88(%esp), %eax
+	adcl	28(%eax), %edx
+	movl	%edx, %ebp
+	setb	8(%esp)                         # 1-byte Folded Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	mulxl	44(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %edi, %esi            # 4-byte Folded Reload
+	addl	%eax, %edi
+	mulxl	32(%esp), %eax, %ebx            # 4-byte Folded Reload
+	adcl	%esi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	40(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	adcl	%ebx, %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	mulxl	36(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	52(%esp), %eax                  # 4-byte Folded Reload
+	mulxl	60(%esp), %ebx, %edx            # 4-byte Folded Reload
+	adcl	%esi, %ebx
+	movzbl	8(%esp), %esi                   # 1-byte Folded Reload
+	adcl	%edx, %esi
+	addl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, (%esp)                    # 4-byte Folded Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 56(%esp)                  # 4-byte Folded Spill
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
 	adcl	%ebp, %ebx
-	movl	36(%esp), %ecx
-	movl	4(%esp), %edx           # 4-byte Reload
-	movl	%edx, (%ecx)
-	adcl	$0, %edi
-	movl	44(%esp), %ecx
-	movl	4(%ecx), %ebp
-	movl	%esi, %edx
-	mulxl	%ebp, %ecx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	88(%esp), %ecx
+	adcl	32(%ecx), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	imull	%edi, %edx
+	mulxl	44(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %ebx, %ebp            # 4-byte Folded Reload
+	addl	%esi, %ebx
+	mulxl	32(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%ebp, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	mulxl	40(%esp), %ecx, %ebp            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	mulxl	36(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	mulxl	60(%esp), %ecx, %edx            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, %esi
+	movzbl	52(%esp), %ebp                  # 1-byte Folded Reload
+	adcl	%edx, %ebp
+	addl	%edi, 12(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 4(%esp)                   # 4-byte Folded Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 28(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	88(%esp), %eax
+	adcl	36(%eax), %ebp
+	setb	52(%esp)                        # 1-byte Folded Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	44(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	mulxl	48(%esp), %ecx, %edi            # 4-byte Folded Reload
 	addl	%eax, %ecx
-	movl	12(%esp), %edx          # 4-byte Reload
-	mulxl	%ebp, %eax, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	%ebx, %eax
-	movl	8(%esp), %edx           # 4-byte Reload
-	mulxl	%ebp, %ebx, %edx
-	adcl	%edi, %ebx
-	sbbl	%edi, %edi
-	andl	$1, %edi
-	addl	4(%esp), %eax           # 4-byte Folded Reload
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	adcl	%edx, %edi
-	movl	36(%esp), %edx
-	movl	%ecx, 4(%edx)
-	movl	44(%esp), %ecx
-	movl	8(%ecx), %ecx
-	movl	%esi, %edx
-	mulxl	%ecx, %ebp, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	addl	%eax, %ebp
-	movl	12(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %eax, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	%ebx, %eax
-	movl	8(%esp), %edx           # 4-byte Reload
-	mulxl	%ecx, %edx, %ecx
-	adcl	%edi, %edx
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	addl	4(%esp), %eax           # 4-byte Folded Reload
-	adcl	12(%esp), %edx          # 4-byte Folded Reload
-	movl	36(%esp), %edi
-	movl	%ebp, 8(%edi)
-	movl	%eax, 12(%edi)
-	movl	%edx, 16(%edi)
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	mulxl	32(%esp), %eax, %ecx            # 4-byte Folded Reload
+	adcl	%edi, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	mulxl	40(%esp), %esi, %edi            # 4-byte Folded Reload
 	adcl	%ecx, %esi
-	movl	%esi, 20(%edi)
-	addl	$16, %esp
+	mulxl	36(%esp), %eax, %ecx            # 4-byte Folded Reload
+	adcl	%edi, %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	mulxl	60(%esp), %edx, %edi            # 4-byte Folded Reload
+	adcl	%ecx, %edx
+	movzbl	52(%esp), %ecx                  # 1-byte Folded Reload
+	adcl	%edi, %ecx
+	addl	%ebx, 8(%esp)                   # 4-byte Folded Spill
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 20(%esp)                  # 4-byte Folded Spill
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 56(%esp)                  # 4-byte Folded Spill
+	adcl	%ebp, %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	88(%esp), %eax
+	adcl	40(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	setb	8(%esp)                         # 1-byte Folded Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	imull	%edi, %edx
+	mulxl	44(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %ebp, %edi            # 4-byte Folded Reload
+	addl	%eax, %ebp
+	mulxl	32(%esp), %eax, %ebx            # 4-byte Folded Reload
+	adcl	%edi, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	40(%esp), %esi, %edi            # 4-byte Folded Reload
+	adcl	%ebx, %esi
+	movl	%edx, %ecx
+	mulxl	36(%esp), %eax, %ebx            # 4-byte Folded Reload
+	adcl	%edi, %eax
+	mulxl	60(%esp), %edi, %edx            # 4-byte Folded Reload
+	adcl	%ebx, %edi
+	movzbl	8(%esp), %ebx                   # 1-byte Folded Reload
+	adcl	%edx, %ebx
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	addl	(%esp), %ecx                    # 4-byte Folded Reload
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	28(%esp), %edx                  # 4-byte Folded Reload
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	88(%esp), %ecx
+	adcl	44(%ecx), %ebx
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	subl	44(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%edx, %ebp
+	sbbl	48(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	sbbl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%eax, %ecx
+	sbbl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	sbbl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ebx, %esi
+	sbbl	60(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, %edx
+	sarl	$31, %edx
+	testl	%edx, %edx
+	js	.LBB12_1
+# %bb.2:
+	movl	84(%esp), %eax
+	movl	%esi, 20(%eax)
+	js	.LBB12_3
+.LBB12_4:
+	movl	%edi, 16(%eax)
+	js	.LBB12_5
+.LBB12_6:
+	movl	%ecx, 12(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	js	.LBB12_7
+.LBB12_8:
+	movl	%ecx, 8(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	js	.LBB12_9
+.LBB12_10:
+	movl	%ebp, 4(%eax)
+	jns	.LBB12_12
+.LBB12_11:
+	movl	(%esp), %ecx                    # 4-byte Reload
+.LBB12_12:
+	movl	%ecx, (%eax)
+	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end36:
-	.size	mcl_fpDbl_mulPre3Lbmi2, .Lfunc_end36-mcl_fpDbl_mulPre3Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre3Lbmi2,@function
-mcl_fpDbl_sqrPre3Lbmi2:                 # @mcl_fpDbl_sqrPre3Lbmi2
-# BB#0:
+.LBB12_1:
+	movl	%ebx, %esi
+	movl	84(%esp), %eax
+	movl	%esi, 20(%eax)
+	jns	.LBB12_4
+.LBB12_3:
+	movl	40(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 16(%eax)
+	jns	.LBB12_6
+.LBB12_5:
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB12_8
+.LBB12_7:
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB12_10
+.LBB12_9:
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 4(%eax)
+	js	.LBB12_11
+	jmp	.LBB12_12
+.Lfunc_end12:
+	.size	mcl_fp_montRedNF6Lbmi2, .Lfunc_end12-mcl_fp_montRedNF6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addPre6Lbmi2             # -- Begin function mcl_fp_addPre6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre6Lbmi2,@function
+mcl_fp_addPre6Lbmi2:                    # @mcl_fp_addPre6Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$20, %esp
-	movl	44(%esp), %edx
-	movl	8(%edx), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	4(%edx), %esi
-	movl	40(%esp), %eax
-	movl	%ecx, %edx
-	mulxl	%ecx, %edx, %ebx
-	movl	%edx, (%eax)
-	movl	%esi, %edx
-	mulxl	%ecx, %ebp, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	addl	%ebp, %ebx
-	movl	%edi, %edx
-	mulxl	%ecx, %edx, %ecx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	adcl	%edx, %edi
-	adcl	$0, %ecx
-	addl	%ebp, %ebx
-	movl	%esi, %edx
-	mulxl	%esi, %ebp, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	%edi, %ebp
-	movl	(%esp), %eax            # 4-byte Reload
-	movl	%eax, %edx
-	mulxl	%esi, %edx, %esi
-	adcl	%edx, %ecx
-	sbbl	%edi, %edi
-	andl	$1, %edi
-	addl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	adcl	%esi, %edi
-	addl	12(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%edx, %ecx
-	movl	%eax, %edx
-	mulxl	%eax, %edx, %eax
-	adcl	%edi, %edx
-	sbbl	%edi, %edi
-	andl	$1, %edi
-	addl	16(%esp), %ecx          # 4-byte Folded Reload
-	adcl	%esi, %edx
-	movl	40(%esp), %esi
-	movl	%ebx, 4(%esi)
-	movl	%ebp, 8(%esi)
-	movl	%ecx, 12(%esi)
-	movl	%edx, 16(%esi)
-	adcl	%eax, %edi
+	movl	24(%esp), %eax
+	movl	(%eax), %ecx
+	movl	4(%eax), %edx
+	movl	28(%esp), %esi
+	addl	(%esi), %ecx
+	adcl	4(%esi), %edx
+	movl	20(%eax), %edi
+	movl	16(%eax), %ebx
+	movl	12(%eax), %ebp
+	movl	8(%eax), %eax
+	adcl	8(%esi), %eax
+	adcl	12(%esi), %ebp
+	adcl	16(%esi), %ebx
+	adcl	20(%esi), %edi
+	movl	20(%esp), %esi
+	movl	%ebx, 16(%esi)
+	movl	%ebp, 12(%esi)
+	movl	%eax, 8(%esi)
 	movl	%edi, 20(%esi)
-	addl	$20, %esp
+	movl	%edx, 4(%esi)
+	movl	%ecx, (%esi)
+	setb	%al
+	movzbl	%al, %eax
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end37:
-	.size	mcl_fpDbl_sqrPre3Lbmi2, .Lfunc_end37-mcl_fpDbl_sqrPre3Lbmi2
-
-	.globl	mcl_fp_mont3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont3Lbmi2,@function
-mcl_fp_mont3Lbmi2:                      # @mcl_fp_mont3Lbmi2
-# BB#0:
+.Lfunc_end13:
+	.size	mcl_fp_addPre6Lbmi2, .Lfunc_end13-mcl_fp_addPre6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subPre6Lbmi2             # -- Begin function mcl_fp_subPre6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre6Lbmi2,@function
+mcl_fp_subPre6Lbmi2:                    # @mcl_fp_subPre6Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$44, %esp
-	movl	68(%esp), %eax
-	movl	8(%eax), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx
-	movl	(%ecx), %ecx
-	mulxl	%ecx, %edx, %edi
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	(%eax), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	4(%eax), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	mulxl	%ecx, %eax, %ebp
-	movl	%esi, %edx
-	mulxl	%ecx, %edx, %ebx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	addl	%eax, %ebx
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi
-	movl	-4(%esi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	imull	%eax, %edx
-	movl	(%esi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	4(%esi), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ecx, %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	mulxl	%eax, %ebp, %edi
-	addl	%ecx, %edi
-	movl	8(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %esi
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	%ebx, %edi
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
+	pushl	%eax
+	movl	28(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %esi
+	xorl	%eax, %eax
+	movl	32(%esp), %edi
+	subl	(%edi), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	sbbl	4(%edi), %esi
+	movl	20(%edx), %ebx
+	movl	16(%edx), %ebp
+	movl	12(%edx), %ecx
+	movl	8(%edx), %edx
+	sbbl	8(%edi), %edx
+	sbbl	12(%edi), %ecx
+	sbbl	16(%edi), %ebp
+	sbbl	20(%edi), %ebx
+	movl	24(%esp), %edi
+	movl	%ebp, 16(%edi)
+	movl	%ecx, 12(%edi)
+	movl	%edx, 8(%edi)
+	movl	%esi, 4(%edi)
+	movl	%ebx, 20(%edi)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, (%edi)
 	sbbl	%eax, %eax
 	andl	$1, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax
-	movl	4(%eax), %edx
-	mulxl	16(%esp), %ebx, %eax    # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	mulxl	8(%esp), %ebx, %eax     # 4-byte Folded Reload
-	movl	%ebx, (%esp)            # 4-byte Spill
-	mulxl	12(%esp), %ebx, %ebp    # 4-byte Folded Reload
-	addl	(%esp), %ebp            # 4-byte Folded Reload
-	movl	%eax, %edx
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	%edi, %ebx
-	adcl	%ecx, %ebp
-	adcl	%esi, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	%eax, %edx
-	imull	20(%esp), %edx          # 4-byte Folded Reload
-	mulxl	40(%esp), %ecx, %esi    # 4-byte Folded Reload
-	movl	%esi, (%esp)            # 4-byte Spill
-	mulxl	36(%esp), %esi, %ebx    # 4-byte Folded Reload
-	addl	%ecx, %ebx
-	mulxl	32(%esp), %ecx, %edi    # 4-byte Folded Reload
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	adcl	$0, %edi
-	movl	24(%esp), %edx          # 4-byte Reload
-	andl	$1, %edx
-	addl	%eax, %esi
-	adcl	%ebp, %ebx
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	adcl	$0, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx
-	movl	8(%edx), %edx
-	mulxl	16(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	mulxl	8(%esp), %eax, %ebp     # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	mulxl	12(%esp), %eax, %esi    # 4-byte Folded Reload
-	addl	8(%esp), %esi           # 4-byte Folded Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%ebx, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	%ecx, %esi
-	adcl	%edi, %ebp
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	sbbl	%ebx, %ebx
-	movl	20(%esp), %edx          # 4-byte Reload
-	imull	%eax, %edx
-	mulxl	36(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	mulxl	40(%esp), %edi, %edx    # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	addl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%eax, %edx
-	mulxl	32(%esp), %edx, %eax    # 4-byte Folded Reload
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %eax
-	andl	$1, %ebx
-	addl	16(%esp), %ecx          # 4-byte Folded Reload
-	adcl	%esi, %edi
-	adcl	%ebp, %edx
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	adcl	$0, %ebx
-	movl	%edi, %ebp
-	subl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edx, %esi
-	sbbl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, %ecx
-	sbbl	32(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB38_2
-# BB#1:
-	movl	%ebp, %edi
-.LBB38_2:
-	movl	64(%esp), %ebp
-	movl	%edi, (%ebp)
-	testb	%bl, %bl
-	jne	.LBB38_4
-# BB#3:
-	movl	%esi, %edx
-.LBB38_4:
-	movl	%edx, 4(%ebp)
-	jne	.LBB38_6
-# BB#5:
-	movl	%ecx, %eax
-.LBB38_6:
-	movl	%eax, 8(%ebp)
-	addl	$44, %esp
+	addl	$4, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end38:
-	.size	mcl_fp_mont3Lbmi2, .Lfunc_end38-mcl_fp_mont3Lbmi2
-
-	.globl	mcl_fp_montNF3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF3Lbmi2,@function
-mcl_fp_montNF3Lbmi2:                    # @mcl_fp_montNF3Lbmi2
-# BB#0:
+.Lfunc_end14:
+	.size	mcl_fp_subPre6Lbmi2, .Lfunc_end14-mcl_fp_subPre6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_shr1_6Lbmi2              # -- Begin function mcl_fp_shr1_6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_6Lbmi2,@function
+mcl_fp_shr1_6Lbmi2:                     # @mcl_fp_shr1_6Lbmi2
+# %bb.0:
+	pushl	%esi
+	movl	12(%esp), %eax
+	movl	20(%eax), %ecx
+	movl	%ecx, %edx
+	shrl	%edx
+	movl	8(%esp), %esi
+	movl	%edx, 20(%esi)
+	movl	16(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 16(%esi)
+	movl	12(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 12(%esi)
+	movl	8(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 8(%esi)
+	movl	4(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 4(%esi)
+	movl	(%eax), %eax
+	shrdl	$1, %ecx, %eax
+	movl	%eax, (%esi)
+	popl	%esi
+	retl
+.Lfunc_end15:
+	.size	mcl_fp_shr1_6Lbmi2, .Lfunc_end15-mcl_fp_shr1_6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_add6Lbmi2                # -- Begin function mcl_fp_add6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_add6Lbmi2,@function
+mcl_fp_add6Lbmi2:                       # @mcl_fp_add6Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$36, %esp
-	movl	60(%esp), %eax
-	movl	(%eax), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	4(%eax), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx
-	movl	(%ecx), %ecx
-	mulxl	%ecx, %esi, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	%ecx, %edi, %ebp
-	addl	%esi, %ebp
-	movl	8(%eax), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	mulxl	%ecx, %eax, %ebx
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	adcl	$0, %ebx
-	movl	68(%esp), %esi
-	movl	-4(%esi), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	imull	%ecx, %edx
-	movl	(%esi), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	mulxl	%ecx, %esi, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	addl	%edi, %esi
-	movl	68(%esp), %esi
-	movl	4(%esi), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edi, %ecx
-	adcl	%ebp, %edi
-	movl	8(%esi), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	mulxl	%esi, %ebp, %edx
-	adcl	%eax, %ebp
-	adcl	$0, %ebx
-	addl	4(%esp), %edi           # 4-byte Folded Reload
-	adcl	%ecx, %ebp
-	adcl	%edx, %ebx
-	movl	64(%esp), %eax
-	movl	4(%eax), %edx
-	mulxl	12(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	mulxl	16(%esp), %esi, %ecx    # 4-byte Folded Reload
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	addl	%eax, %ecx
-	mulxl	8(%esp), %esi, %eax     # 4-byte Folded Reload
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	adcl	$0, %eax
-	movl	4(%esp), %edx           # 4-byte Reload
-	addl	%edi, %edx
-	adcl	%ebp, %ecx
-	adcl	%ebx, %esi
-	adcl	$0, %eax
-	movl	%edx, %ebp
-	imull	20(%esp), %edx          # 4-byte Folded Reload
-	mulxl	32(%esp), %ebx, %edi    # 4-byte Folded Reload
-	addl	%ebp, %ebx
-	mulxl	28(%esp), %ebp, %ebx    # 4-byte Folded Reload
-	adcl	%ecx, %ebp
-	mulxl	24(%esp), %ecx, %edx    # 4-byte Folded Reload
-	adcl	%esi, %ecx
-	adcl	$0, %eax
-	addl	%edi, %ebp
-	adcl	%ebx, %ecx
-	adcl	%edx, %eax
-	movl	64(%esp), %edx
-	movl	8(%edx), %edx
-	mulxl	12(%esp), %esi, %edi    # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	mulxl	16(%esp), %ebx, %edi    # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	addl	%esi, %edi
-	mulxl	8(%esp), %ebx, %esi     # 4-byte Folded Reload
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	%ebp, 16(%esp)          # 4-byte Folded Spill
-	adcl	%ecx, %edi
-	adcl	%eax, %ebx
-	adcl	$0, %esi
-	movl	20(%esp), %edx          # 4-byte Reload
-	movl	16(%esp), %ecx          # 4-byte Reload
-	imull	%ecx, %edx
-	mulxl	32(%esp), %eax, %ebp    # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	addl	%ecx, %eax
-	movl	%edx, %eax
-	mulxl	28(%esp), %ecx, %ebp    # 4-byte Folded Reload
-	adcl	%edi, %ecx
-	mulxl	24(%esp), %eax, %edx    # 4-byte Folded Reload
-	adcl	%ebx, %eax
-	adcl	$0, %esi
-	addl	20(%esp), %ecx          # 4-byte Folded Reload
-	adcl	%ebp, %eax
-	adcl	%edx, %esi
-	movl	%ecx, %ebp
-	subl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, %edi
-	sbbl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, %edx
-	sbbl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %ebx
-	sarl	$31, %ebx
-	testl	%ebx, %ebx
-	js	.LBB39_2
-# BB#1:
-	movl	%ebp, %ecx
-.LBB39_2:
-	movl	56(%esp), %ebx
-	movl	%ecx, (%ebx)
-	js	.LBB39_4
-# BB#3:
-	movl	%edi, %eax
-.LBB39_4:
-	movl	%eax, 4(%ebx)
-	js	.LBB39_6
-# BB#5:
-	movl	%edx, %esi
-.LBB39_6:
-	movl	%esi, 8(%ebx)
-	addl	$36, %esp
+	subl	$8, %esp
+	movl	32(%esp), %ebx
+	movl	(%ebx), %eax
+	movl	4(%ebx), %ecx
+	movl	36(%esp), %ebp
+	addl	(%ebp), %eax
+	adcl	4(%ebp), %ecx
+	movl	20(%ebx), %edx
+	movl	16(%ebx), %esi
+	movl	12(%ebx), %edi
+	movl	8(%ebx), %ebx
+	adcl	8(%ebp), %ebx
+	adcl	12(%ebp), %edi
+	adcl	16(%ebp), %esi
+	adcl	20(%ebp), %edx
+	movl	28(%esp), %ebp
+	movl	%edx, 20(%ebp)
+	movl	%esi, 16(%ebp)
+	movl	%edi, 12(%ebp)
+	movl	%ebx, 8(%ebp)
+	movl	%ecx, 4(%ebp)
+	movl	%eax, (%ebp)
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	40(%esp), %ebp
+	subl	(%ebp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	sbbl	4(%ebp), %ecx
+	sbbl	8(%ebp), %ebx
+	sbbl	12(%ebp), %edi
+	sbbl	16(%ebp), %esi
+	sbbl	20(%ebp), %edx
+	movzbl	3(%esp), %eax                   # 1-byte Folded Reload
+	sbbl	$0, %eax
+	testb	$1, %al
+	jne	.LBB16_2
+# %bb.1:                                # %nocarry
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	28(%esp), %ebp
+	movl	%eax, (%ebp)
+	movl	%ecx, 4(%ebp)
+	movl	%ebx, 8(%ebp)
+	movl	%edi, 12(%ebp)
+	movl	%esi, 16(%ebp)
+	movl	%edx, 20(%ebp)
+.LBB16_2:                               # %carry
+	addl	$8, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end39:
-	.size	mcl_fp_montNF3Lbmi2, .Lfunc_end39-mcl_fp_montNF3Lbmi2
-
-	.globl	mcl_fp_montRed3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed3Lbmi2,@function
-mcl_fp_montRed3Lbmi2:                   # @mcl_fp_montRed3Lbmi2
-# BB#0:
+.Lfunc_end16:
+	.size	mcl_fp_add6Lbmi2, .Lfunc_end16-mcl_fp_add6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addNF6Lbmi2              # -- Begin function mcl_fp_addNF6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF6Lbmi2,@function
+mcl_fp_addNF6Lbmi2:                     # @mcl_fp_addNF6Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$36, %esp
-	movl	64(%esp), %ecx
-	movl	-4(%ecx), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	(%ecx), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax
-	movl	(%eax), %ebx
-	movl	%ebx, %edx
-	imull	%edi, %edx
-	movl	8(%ecx), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	4(%ecx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	mulxl	%edi, %edi, %eax
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ebp, %edi
-	mulxl	%esi, %edx, %ecx
-	addl	%ebp, %ecx
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	$0, %eax
-	addl	%ebx, %edx
-	movl	60(%esp), %edx
-	adcl	4(%edx), %ecx
-	adcl	8(%edx), %edi
+	subl	$28, %esp
+	movl	56(%esp), %ecx
+	movl	(%ecx), %ebx
+	movl	4(%ecx), %ebp
+	movl	52(%esp), %edx
+	addl	(%edx), %ebx
+	adcl	4(%edx), %ebp
+	movl	20(%ecx), %esi
+	movl	16(%ecx), %edi
+	movl	12(%ecx), %eax
+	movl	8(%ecx), %ecx
+	adcl	8(%edx), %ecx
 	adcl	12(%edx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	20(%edx), %eax
-	movl	16(%edx), %edx
-	adcl	$0, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	%ecx, %edx
-	imull	20(%esp), %edx          # 4-byte Folded Reload
-	mulxl	28(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, (%esp)            # 4-byte Spill
-	mulxl	24(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	addl	%esi, %eax
-	mulxl	32(%esp), %esi, %ebp    # 4-byte Folded Reload
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	adcl	$0, %ebp
-	addl	%ecx, 4(%esp)           # 4-byte Folded Spill
-	adcl	%edi, %eax
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	$0, 16(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebx
-	movl	20(%esp), %edx          # 4-byte Reload
-	imull	%eax, %edx
-	mulxl	24(%esp), %ecx, %edi    # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	%edx, %ecx
-	mulxl	28(%esp), %edi, %edx    # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	addl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%ecx, %edx
-	mulxl	32(%esp), %ecx, %edx    # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	adcl	$0, %edx
-	addl	%eax, 20(%esp)          # 4-byte Folded Spill
-	adcl	%esi, %edi
-	adcl	%ebp, %ecx
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %ebx
+	adcl	16(%edx), %edi
+	adcl	20(%edx), %esi
+	movl	60(%esp), %edx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	subl	(%edx), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	sbbl	4(%edx), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	sbbl	8(%edx), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%edi, %eax
+	sbbl	12(%edx), %ebx
 	movl	%edi, %ebp
-	subl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ecx, %esi
-	sbbl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, %eax
-	sbbl	32(%esp), %eax          # 4-byte Folded Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB40_2
-# BB#1:
-	movl	%ebp, %edi
-.LBB40_2:
-	movl	56(%esp), %ebp
-	movl	%edi, (%ebp)
-	testb	%bl, %bl
-	jne	.LBB40_4
-# BB#3:
+	sbbl	16(%edx), %ebp
 	movl	%esi, %ecx
-.LBB40_4:
-	movl	%ecx, 4(%ebp)
-	jne	.LBB40_6
-# BB#5:
-	movl	%eax, %edx
-.LBB40_6:
-	movl	%edx, 8(%ebp)
-	addl	$36, %esp
+	sbbl	20(%edx), %ecx
+	movl	%ecx, %edx
+	sarl	$31, %edx
+	testl	%edx, %edx
+	js	.LBB17_1
+# %bb.2:
+	movl	48(%esp), %edi
+	movl	%ecx, 20(%edi)
+	js	.LBB17_3
+.LBB17_4:
+	movl	%ebp, 16(%edi)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	js	.LBB17_5
+.LBB17_6:
+	movl	%ebx, 12(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	js	.LBB17_7
+.LBB17_8:
+	movl	%edx, 8(%edi)
+	js	.LBB17_9
+.LBB17_10:
+	movl	%ecx, 4(%edi)
+	jns	.LBB17_12
+.LBB17_11:
+	movl	12(%esp), %eax                  # 4-byte Reload
+.LBB17_12:
+	movl	%eax, (%edi)
+	addl	$28, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end40:
-	.size	mcl_fp_montRed3Lbmi2, .Lfunc_end40-mcl_fp_montRed3Lbmi2
-
-	.globl	mcl_fp_addPre3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre3Lbmi2,@function
-mcl_fp_addPre3Lbmi2:                    # @mcl_fp_addPre3Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	12(%esp), %esi
-	addl	(%esi), %ecx
-	adcl	4(%esi), %edx
-	movl	8(%eax), %eax
-	adcl	8(%esi), %eax
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	%edx, 4(%esi)
-	movl	%eax, 8(%esi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	retl
-.Lfunc_end41:
-	.size	mcl_fp_addPre3Lbmi2, .Lfunc_end41-mcl_fp_addPre3Lbmi2
-
-	.globl	mcl_fp_subPre3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre3Lbmi2,@function
-mcl_fp_subPre3Lbmi2:                    # @mcl_fp_subPre3Lbmi2
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	16(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %esi
-	xorl	%eax, %eax
-	movl	20(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %esi
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	12(%esp), %edi
-	movl	%edx, (%edi)
-	movl	%esi, 4(%edi)
-	movl	%ecx, 8(%edi)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end42:
-	.size	mcl_fp_subPre3Lbmi2, .Lfunc_end42-mcl_fp_subPre3Lbmi2
-
-	.globl	mcl_fp_shr1_3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_3Lbmi2,@function
-mcl_fp_shr1_3Lbmi2:                     # @mcl_fp_shr1_3Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	8(%eax), %ecx
-	movl	(%eax), %edx
-	movl	4(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	8(%esp), %esi
-	movl	%edx, (%esi)
-	shrdl	$1, %ecx, %eax
-	movl	%eax, 4(%esi)
-	shrl	%ecx
-	movl	%ecx, 8(%esi)
-	popl	%esi
-	retl
-.Lfunc_end43:
-	.size	mcl_fp_shr1_3Lbmi2, .Lfunc_end43-mcl_fp_shr1_3Lbmi2
-
-	.globl	mcl_fp_add3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add3Lbmi2,@function
-mcl_fp_add3Lbmi2:                       # @mcl_fp_add3Lbmi2
-# BB#0:
+.LBB17_1:
+	movl	%esi, %ecx
+	movl	48(%esp), %edi
+	movl	%ecx, 20(%edi)
+	jns	.LBB17_4
+.LBB17_3:
+	movl	%eax, %ebp
+	movl	%ebp, 16(%edi)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	jns	.LBB17_6
+.LBB17_5:
+	movl	(%esp), %ebx                    # 4-byte Reload
+	movl	%ebx, 12(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	jns	.LBB17_8
+.LBB17_7:
+	movl	4(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 8(%edi)
+	jns	.LBB17_10
+.LBB17_9:
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	js	.LBB17_11
+	jmp	.LBB17_12
+.Lfunc_end17:
+	.size	mcl_fp_addNF6Lbmi2, .Lfunc_end17-mcl_fp_addNF6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_sub6Lbmi2                # -- Begin function mcl_fp_sub6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_sub6Lbmi2,@function
+mcl_fp_sub6Lbmi2:                       # @mcl_fp_sub6Lbmi2
+# %bb.0:
+	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %edx
-	movl	(%edx), %eax
-	movl	4(%edx), %ecx
-	movl	20(%esp), %esi
-	addl	(%esi), %eax
-	adcl	4(%esi), %ecx
+	subl	$20, %esp
+	movl	44(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %esi
+	xorl	%eax, %eax
+	movl	48(%esp), %ebp
+	subl	(%ebp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	sbbl	4(%ebp), %esi
+	movl	20(%edx), %ecx
+	movl	16(%edx), %ebx
+	movl	12(%edx), %edi
 	movl	8(%edx), %edx
-	adcl	8(%esi), %edx
-	movl	16(%esp), %esi
-	movl	%eax, (%esi)
-	movl	%ecx, 4(%esi)
-	movl	%edx, 8(%esi)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	28(%esp), %edi
-	subl	(%edi), %eax
-	sbbl	4(%edi), %ecx
-	sbbl	8(%edi), %edx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB44_2
-# BB#1:                                 # %nocarry
-	movl	%eax, (%esi)
-	movl	%ecx, 4(%esi)
-	movl	%edx, 8(%esi)
-.LBB44_2:                               # %carry
+	sbbl	8(%ebp), %edx
+	sbbl	12(%ebp), %edi
+	sbbl	16(%ebp), %ebx
+	sbbl	20(%ebp), %ecx
+	sbbl	%eax, %eax
+	testb	$1, %al
+	movl	40(%esp), %eax
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, 20(%eax)
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	%ebx, 16(%eax)
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	%edi, 12(%eax)
+	movl	%edx, 8(%eax)
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	%esi, 4(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, (%eax)
+	je	.LBB18_2
+# %bb.1:                                # %carry
+	movl	%ecx, %ebp
+	movl	52(%esp), %ecx
+	addl	(%ecx), %ebp
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	4(%ecx), %ebp
+	adcl	8(%ecx), %edx
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	12(%ecx), %edi
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	16(%ecx), %ebx
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	52(%esp), %esi
+	adcl	20(%esi), %ecx
+	movl	%ecx, 20(%eax)
+	movl	%ebx, 16(%eax)
+	movl	%edi, 12(%eax)
+	movl	%edx, 8(%eax)
+	movl	%ebp, 4(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, (%eax)
+.LBB18_2:                               # %nocarry
+	addl	$20, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
+	popl	%ebp
 	retl
-.Lfunc_end44:
-	.size	mcl_fp_add3Lbmi2, .Lfunc_end44-mcl_fp_add3Lbmi2
-
-	.globl	mcl_fp_addNF3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF3Lbmi2,@function
-mcl_fp_addNF3Lbmi2:                     # @mcl_fp_addNF3Lbmi2
-# BB#0:
+.Lfunc_end18:
+	.size	mcl_fp_sub6Lbmi2, .Lfunc_end18-mcl_fp_sub6Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subNF6Lbmi2              # -- Begin function mcl_fp_subNF6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF6Lbmi2,@function
+mcl_fp_subNF6Lbmi2:                     # @mcl_fp_subNF6Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	28(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ecx
-	movl	24(%esp), %esi
-	addl	(%esi), %edx
-	adcl	4(%esi), %ecx
-	movl	8(%eax), %eax
-	adcl	8(%esi), %eax
-	movl	32(%esp), %ebp
-	movl	%edx, %ebx
-	subl	(%ebp), %ebx
-	movl	%ecx, %edi
-	sbbl	4(%ebp), %edi
-	movl	%eax, %esi
+	subl	$24, %esp
+	movl	48(%esp), %ebx
+	movl	(%ebx), %ecx
+	movl	4(%ebx), %eax
+	movl	52(%esp), %ebp
+	subl	(%ebp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	4(%ebp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %ecx
+	movl	16(%ebx), %eax
+	movl	12(%ebx), %edx
+	movl	8(%ebx), %esi
 	sbbl	8(%ebp), %esi
-	movl	%esi, %ebp
+	movl	%esi, (%esp)                    # 4-byte Spill
+	sbbl	12(%ebp), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	sbbl	16(%ebp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	sbbl	20(%ebp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, %ebp
 	sarl	$31, %ebp
-	testl	%ebp, %ebp
-	js	.LBB45_2
-# BB#1:
-	movl	%ebx, %edx
-.LBB45_2:
-	movl	20(%esp), %ebx
-	movl	%edx, (%ebx)
-	js	.LBB45_4
-# BB#3:
-	movl	%edi, %ecx
-.LBB45_4:
-	movl	%ecx, 4(%ebx)
-	js	.LBB45_6
-# BB#5:
-	movl	%esi, %eax
-.LBB45_6:
-	movl	%eax, 8(%ebx)
+	movl	%ebp, %eax
+	shldl	$1, %ecx, %eax
+	movl	56(%esp), %ebx
+	andl	(%ebx), %eax
+	movl	20(%ebx), %edi
+	andl	%ebp, %edi
+	movl	16(%ebx), %esi
+	andl	%ebp, %esi
+	movl	12(%ebx), %edx
+	andl	%ebp, %edx
+	movl	8(%ebx), %ecx
+	andl	%ebp, %ecx
+	andl	4(%ebx), %ebp
+	addl	12(%esp), %eax                  # 4-byte Folded Reload
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	movl	44(%esp), %ebx
+	movl	%eax, (%ebx)
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ebp, 4(%ebx)
+	adcl	8(%esp), %edx                   # 4-byte Folded Reload
+	movl	%ecx, 8(%ebx)
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 12(%ebx)
+	movl	%esi, 16(%ebx)
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 20(%ebx)
+	addl	$24, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end45:
-	.size	mcl_fp_addNF3Lbmi2, .Lfunc_end45-mcl_fp_addNF3Lbmi2
-
-	.globl	mcl_fp_sub3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub3Lbmi2,@function
-mcl_fp_sub3Lbmi2:                       # @mcl_fp_sub3Lbmi2
-# BB#0:
+.Lfunc_end19:
+	.size	mcl_fp_subNF6Lbmi2, .Lfunc_end19-mcl_fp_subNF6Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_add6Lbmi2             # -- Begin function mcl_fpDbl_add6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add6Lbmi2,@function
+mcl_fpDbl_add6Lbmi2:                    # @mcl_fpDbl_add6Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %edx
-	movl	(%edx), %ecx
-	movl	4(%edx), %eax
-	xorl	%ebx, %ebx
-	movl	28(%esp), %esi
-	subl	(%esi), %ecx
-	sbbl	4(%esi), %eax
-	movl	8(%edx), %edx
-	sbbl	8(%esi), %edx
-	movl	20(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	%eax, 4(%esi)
-	movl	%edx, 8(%esi)
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	je	.LBB46_2
-# BB#1:                                 # %carry
-	movl	32(%esp), %edi
-	movl	4(%edi), %ebx
-	movl	8(%edi), %ebp
-	addl	(%edi), %ecx
-	movl	%ecx, (%esi)
-	adcl	%eax, %ebx
-	movl	%ebx, 4(%esi)
-	adcl	%edx, %ebp
-	movl	%ebp, 8(%esi)
-.LBB46_2:                               # %nocarry
+	subl	$32, %esp
+	movl	56(%esp), %esi
+	movl	(%esi), %eax
+	movl	4(%esi), %ecx
+	movl	60(%esp), %edx
+	addl	(%edx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	4(%edx), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	44(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	40(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	36(%esi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	32(%esi), %ecx
+	movl	28(%esi), %ebx
+	movl	24(%esi), %edi
+	movl	20(%esi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	16(%esi), %ebp
+	movl	12(%esi), %eax
+	movl	8(%esi), %esi
+	adcl	8(%edx), %esi
+	adcl	12(%edx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	16(%edx), %ebp
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	20(%edx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	24(%edx), %edi
+	adcl	28(%edx), %ebx
+	adcl	32(%edx), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	36(%edx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	40(%edx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	44(%edx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edx
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 20(%edx)
+	movl	%ebp, 16(%edx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edx)
+	movl	%esi, 8(%edx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%edx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%edx)
+	setb	20(%esp)                        # 1-byte Folded Spill
+	movl	64(%esp), %eax
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	subl	(%eax), %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	%ebx, %ecx
+	sbbl	4(%eax), %ecx
+	movl	28(%esp), %edx                  # 4-byte Reload
+	sbbl	8(%eax), %edx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	sbbl	12(%eax), %esi
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	sbbl	16(%eax), %ebx
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, %edi
+	sbbl	20(%eax), %ebp
+	movzbl	20(%esp), %eax                  # 1-byte Folded Reload
+	sbbl	$0, %eax
+	testb	$1, %al
+	jne	.LBB20_1
+# %bb.2:
+	movl	52(%esp), %eax
+	movl	%ebp, 44(%eax)
+	jne	.LBB20_3
+.LBB20_4:
+	movl	%ebx, 40(%eax)
+	jne	.LBB20_5
+.LBB20_6:
+	movl	%esi, 36(%eax)
+	jne	.LBB20_7
+.LBB20_8:
+	movl	%edx, 32(%eax)
+	jne	.LBB20_9
+.LBB20_10:
+	movl	%ecx, 28(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	je	.LBB20_12
+.LBB20_11:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+.LBB20_12:
+	movl	%ecx, 24(%eax)
+	addl	$32, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end46:
-	.size	mcl_fp_sub3Lbmi2, .Lfunc_end46-mcl_fp_sub3Lbmi2
-
-	.globl	mcl_fp_subNF3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF3Lbmi2,@function
-mcl_fp_subNF3Lbmi2:                     # @mcl_fp_subNF3Lbmi2
-# BB#0:
+.LBB20_1:
+	movl	%edi, %ebp
+	movl	52(%esp), %eax
+	movl	%ebp, 44(%eax)
+	je	.LBB20_4
+.LBB20_3:
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 40(%eax)
+	je	.LBB20_6
+.LBB20_5:
+	movl	8(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 36(%eax)
+	je	.LBB20_8
+.LBB20_7:
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 32(%eax)
+	je	.LBB20_10
+.LBB20_9:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	jne	.LBB20_11
+	jmp	.LBB20_12
+.Lfunc_end20:
+	.size	mcl_fpDbl_add6Lbmi2, .Lfunc_end20-mcl_fpDbl_add6Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sub6Lbmi2             # -- Begin function mcl_fpDbl_sub6Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub6Lbmi2,@function
+mcl_fpDbl_sub6Lbmi2:                    # @mcl_fpDbl_sub6Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	28(%esp), %esi
-	subl	(%esi), %ecx
-	sbbl	4(%esi), %edx
-	movl	8(%eax), %eax
-	sbbl	8(%esi), %eax
-	movl	%eax, %esi
-	sarl	$31, %esi
-	movl	%esi, %edi
-	shldl	$1, %eax, %edi
-	movl	32(%esp), %ebx
-	andl	(%ebx), %edi
-	movl	8(%ebx), %ebp
-	andl	%esi, %ebp
-	andl	4(%ebx), %esi
-	addl	%ecx, %edi
-	adcl	%edx, %esi
-	movl	20(%esp), %ecx
-	movl	%edi, (%ecx)
-	movl	%esi, 4(%ecx)
-	adcl	%eax, %ebp
-	movl	%ebp, 8(%ecx)
+	subl	$36, %esp
+	movl	60(%esp), %eax
+	movl	(%eax), %esi
+	movl	4(%eax), %edi
+	xorl	%edx, %edx
+	movl	64(%esp), %ebx
+	subl	(%ebx), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	sbbl	4(%ebx), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	44(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	40(%eax), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	36(%eax), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	32(%eax), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	28(%eax), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	24(%eax), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	20(%eax), %esi
+	movl	16(%eax), %edi
+	movl	12(%eax), %ecx
+	movl	8(%eax), %ebp
+	sbbl	8(%ebx), %ebp
+	sbbl	12(%ebx), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	sbbl	16(%ebx), %edi
+	sbbl	20(%ebx), %esi
+	movl	%esi, %ecx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	24(%ebx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	28(%ebx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	32(%ebx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ebx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%ebx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	44(%ebx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	56(%esp), %esi
+	movl	%ecx, 20(%esi)
+	movl	%edi, 16(%esi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%esi)
+	movl	%ebp, 8(%esi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%esi)
+	sbbl	%edx, %edx
+	andl	$1, %edx
+	negl	%edx
+	movl	68(%esp), %eax
+	movl	20(%eax), %ecx
+	andl	%edx, %ecx
+	movl	16(%eax), %edi
+	andl	%edx, %edi
+	movl	12(%eax), %ebx
+	andl	%edx, %ebx
+	movl	8(%eax), %ebp
+	andl	%edx, %ebp
+	movl	68(%esp), %eax
+	movl	4(%eax), %eax
+	andl	%edx, %eax
+	movl	68(%esp), %esi
+	andl	(%esi), %edx
+	addl	4(%esp), %edx                   # 4-byte Folded Reload
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	56(%esp), %esi
+	movl	%edx, 24(%esi)
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%eax, 28(%esi)
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebp, 32(%esi)
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ebx, 36(%esi)
+	movl	%edi, 40(%esi)
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 44(%esi)
+	addl	$36, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end47:
-	.size	mcl_fp_subNF3Lbmi2, .Lfunc_end47-mcl_fp_subNF3Lbmi2
-
-	.globl	mcl_fpDbl_add3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add3Lbmi2,@function
-mcl_fpDbl_add3Lbmi2:                    # @mcl_fpDbl_add3Lbmi2
-# BB#0:
+.Lfunc_end21:
+	.size	mcl_fpDbl_sub6Lbmi2, .Lfunc_end21-mcl_fpDbl_sub6Lbmi2
+                                        # -- End function
+	.globl	mulPv224x32bmi2                 # -- Begin function mulPv224x32bmi2
+	.p2align	4, 0x90
+	.type	mulPv224x32bmi2,@function
+mulPv224x32bmi2:                        # @mulPv224x32bmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	pushl	%eax
-	movl	32(%esp), %esi
-	movl	20(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	16(%esi), %edi
-	movl	12(%esi), %ebx
-	movl	(%esi), %edx
-	movl	28(%esp), %eax
-	addl	(%eax), %edx
-	movl	24(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%esi), %edx
-	movl	4(%esi), %esi
-	adcl	4(%eax), %esi
-	adcl	8(%eax), %edx
-	movl	%esi, 4(%ecx)
-	movl	20(%eax), %ebp
-	movl	%edx, 8(%ecx)
-	movl	12(%eax), %esi
-	movl	16(%eax), %edx
-	adcl	%ebx, %esi
-	adcl	%edi, %edx
-	adcl	(%esp), %ebp            # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	36(%esp), %ecx
+	subl	$16, %esp
+	movl	44(%esp), %edx
+	movl	40(%esp), %esi
+	mulxl	4(%esi), %ecx, %eax
+	mulxl	(%esi), %edi, %ebx
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	addl	%ecx, %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	mulxl	8(%esi), %ebp, %ecx
+	adcl	%eax, %ebp
+	mulxl	12(%esi), %edi, %eax
 	movl	%esi, %ebx
-	subl	(%ecx), %ebx
-	movl	%edx, %edi
-	sbbl	4(%ecx), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	movl	%ebp, %ecx
-	movl	36(%esp), %edi
-	sbbl	8(%edi), %ecx
-	sbbl	$0, %eax
-	andl	$1, %eax
-	jne	.LBB48_2
-# BB#1:
-	movl	%ecx, %ebp
-.LBB48_2:
-	testb	%al, %al
-	jne	.LBB48_4
-# BB#3:
-	movl	%ebx, %esi
-.LBB48_4:
-	movl	24(%esp), %eax
-	movl	%esi, 12(%eax)
-	jne	.LBB48_6
-# BB#5:
-	movl	(%esp), %edx            # 4-byte Reload
-.LBB48_6:
-	movl	%edx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	addl	$4, %esp
+	adcl	%ecx, %edi
+	mulxl	16(%esi), %esi, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	%eax, %esi
+	mulxl	20(%ebx), %ecx, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	36(%esp), %eax
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, (%eax)
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 4(%eax)
+	movl	%ebp, 8(%eax)
+	movl	%edi, 12(%eax)
+	movl	%esi, 16(%eax)
+	movl	%ecx, 20(%eax)
+	movl	40(%esp), %ecx
+	mulxl	24(%ecx), %ecx, %edx
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 24(%eax)
+	adcl	$0, %edx
+	movl	%edx, 28(%eax)
+	addl	$16, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
-	retl
-.Lfunc_end48:
-	.size	mcl_fpDbl_add3Lbmi2, .Lfunc_end48-mcl_fpDbl_add3Lbmi2
-
-	.globl	mcl_fpDbl_sub3Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub3Lbmi2,@function
-mcl_fpDbl_sub3Lbmi2:                    # @mcl_fpDbl_sub3Lbmi2
-# BB#0:
+	retl	$4
+.Lfunc_end22:
+	.size	mulPv224x32bmi2, .Lfunc_end22-mulPv224x32bmi2
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre7Lbmi2         # -- Begin function mcl_fp_mulUnitPre7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre7Lbmi2,@function
+mcl_fp_mulUnitPre7Lbmi2:                # @mcl_fp_mulUnitPre7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %esi
-	movl	28(%esp), %ebx
-	subl	(%ebx), %edx
-	sbbl	4(%ebx), %esi
-	movl	8(%ecx), %ebp
-	sbbl	8(%ebx), %ebp
-	movl	20(%esp), %eax
-	movl	%edx, (%eax)
-	movl	12(%ecx), %edi
-	sbbl	12(%ebx), %edi
-	movl	%esi, 4(%eax)
-	movl	16(%ecx), %esi
-	sbbl	16(%ebx), %esi
-	movl	20(%ebx), %ebx
-	movl	20(%ecx), %edx
+	subl	$12, %esp
+	movl	40(%esp), %edx
+	movl	36(%esp), %edi
+	mulxl	4(%edi), %ecx, %esi
+	mulxl	(%edi), %eax, %ebx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	addl	%ecx, %ebx
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	mulxl	8(%edi), %ebp, %eax
+	movl	%edi, %ecx
+	adcl	%esi, %ebp
+	mulxl	12(%edi), %edi, %ebx
+	adcl	%eax, %edi
+	mulxl	16(%ecx), %esi, %eax
+	adcl	%ebx, %esi
+	mulxl	20(%ecx), %ecx, %ebx
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	%eax, %ecx
+	movl	32(%esp), %eax
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, (%eax)
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 4(%eax)
 	movl	%ebp, 8(%eax)
-	sbbl	%ebx, %edx
-	movl	$0, %ecx
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	movl	32(%esp), %ebp
-	jne	.LBB49_1
-# BB#2:
-	xorl	%ebx, %ebx
-	jmp	.LBB49_3
-.LBB49_1:
-	movl	8(%ebp), %ebx
-.LBB49_3:
-	testb	%cl, %cl
-	movl	$0, %eax
-	jne	.LBB49_4
-# BB#5:
-	xorl	%ecx, %ecx
-	jmp	.LBB49_6
-.LBB49_4:
-	movl	(%ebp), %ecx
-	movl	4(%ebp), %eax
-.LBB49_6:
-	addl	%edi, %ecx
-	adcl	%esi, %eax
-	movl	20(%esp), %esi
-	movl	%ecx, 12(%esi)
-	movl	%eax, 16(%esi)
-	adcl	%edx, %ebx
-	movl	%ebx, 20(%esi)
+	movl	%edi, 12(%eax)
+	movl	%esi, 16(%eax)
+	movl	%ecx, 20(%eax)
+	movl	36(%esp), %ecx
+	mulxl	24(%ecx), %ecx, %edx
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 24(%eax)
+	adcl	$0, %edx
+	movl	%edx, 28(%eax)
+	addl	$12, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end49:
-	.size	mcl_fpDbl_sub3Lbmi2, .Lfunc_end49-mcl_fpDbl_sub3Lbmi2
-
-	.globl	mcl_fp_mulUnitPre4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre4Lbmi2,@function
-mcl_fp_mulUnitPre4Lbmi2:                # @mcl_fp_mulUnitPre4Lbmi2
-# BB#0:
+.Lfunc_end23:
+	.size	mcl_fp_mulUnitPre7Lbmi2, .Lfunc_end23-mcl_fp_mulUnitPre7Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre7Lbmi2          # -- Begin function mcl_fpDbl_mulPre7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre7Lbmi2,@function
+mcl_fpDbl_mulPre7Lbmi2:                 # @mcl_fpDbl_mulPre7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	28(%esp), %edx
-	movl	24(%esp), %eax
-	mulxl	4(%eax), %esi, %ecx
-	mulxl	(%eax), %edi, %ebx
-	addl	%esi, %ebx
-	mulxl	8(%eax), %ebp, %esi
-	adcl	%ecx, %ebp
-	mulxl	12(%eax), %eax, %ecx
-	movl	20(%esp), %edx
-	movl	%edi, (%edx)
-	movl	%ebx, 4(%edx)
-	movl	%ebp, 8(%edx)
-	adcl	%esi, %eax
-	movl	%eax, 12(%edx)
-	adcl	$0, %ecx
-	movl	%ecx, 16(%edx)
+	subl	$80, %esp
+	movl	104(%esp), %edi
+	movl	(%edi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	108(%esp), %eax
+	movl	(%eax), %ebx
+	mulxl	%ebx, %eax, %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	100(%esp), %ecx
+	movl	%eax, (%ecx)
+	movl	24(%edi), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %ecx, %eax
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%edi), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %ecx, %eax
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	16(%edi), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %ecx, %eax
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	4(%edi), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %esi, %ebp
+	addl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	8(%edi), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %ecx, %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	%ebp, %ecx
+	movl	12(%edi), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %edi, %edx
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	$0, 16(%esp)                    # 4-byte Folded Spill
+	movl	108(%esp), %edx
+	movl	4(%edx), %edx
+	mulxl	48(%esp), %eax, %ebp            # 4-byte Folded Reload
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	addl	%esi, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	32(%esp), %ebx, %esi            # 4-byte Folded Reload
+	movl	%esi, 60(%esp)                  # 4-byte Spill
+	adcl	%ecx, %ebx
+	mulxl	20(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	%edi, %eax
+	movl	%eax, %edi
+	mulxl	28(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, %ebp
+	mulxl	44(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	72(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	76(%esp), %esi                  # 4-byte Reload
+	mulxl	%esi, %eax, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	setb	%dl
+	addl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 64(%esp)                  # 4-byte Spill
+	adcl	60(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	100(%esp), %ebp
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%ebp)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 4(%esp)                   # 4-byte Folded Spill
+	adcl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %ebx
+	movzbl	%dl, %ecx
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	108(%esp), %edx
+	movl	8(%edx), %edx
+	mulxl	%esi, %esi, %ecx
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	mulxl	72(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	20(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	32(%esp), %ebp, %eax            # 4-byte Folded Reload
+	mulxl	48(%esp), %esi, %edi            # 4-byte Folded Reload
+	addl	%ebp, %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	adcl	%ecx, %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	mulxl	28(%esp), %eax, %ecx            # 4-byte Folded Reload
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	44(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	%ecx, %ebp
+	adcl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %edi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	64(%esp), %esi                  # 4-byte Folded Reload
+	movl	100(%esp), %edx
+	movl	%esi, 8(%edx)
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 36(%esp)                  # 4-byte Folded Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	%edx, 60(%esp)                  # 4-byte Folded Spill
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	%ebx, %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	108(%esp), %eax
+	movl	12(%eax), %edx
+	mulxl	32(%esp), %ecx, %esi            # 4-byte Folded Reload
+	mulxl	48(%esp), %eax, %ebx            # 4-byte Folded Reload
+	addl	%ecx, %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	mulxl	20(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	adcl	%esi, %ebx
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	mulxl	28(%esp), %ebx, %esi            # 4-byte Folded Reload
+	adcl	%ecx, %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	mulxl	44(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	adcl	%esi, %ebx
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	movl	72(%esp), %edi                  # 4-byte Reload
+	mulxl	%edi, %ebx, %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	adcl	%ecx, %ebx
+	movl	%ebx, 64(%esp)                  # 4-byte Spill
+	movl	76(%esp), %esi                  # 4-byte Reload
+	mulxl	%esi, %ecx, %edx
+	adcl	56(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	$0, %edx
+	movl	%edx, %ebx
+	addl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	60(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 12(%esp)                  # 4-byte Folded Spill
+	movl	100(%esp), %edx
+	movl	%eax, 12(%edx)
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	%eax, 4(%esp)                   # 4-byte Folded Spill
+	adcl	%ebp, 8(%esp)                   # 4-byte Folded Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 40(%esp)                  # 4-byte Folded Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 64(%esp)                  # 4-byte Folded Spill
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	$0, %ebx
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	movl	108(%esp), %eax
+	movl	16(%eax), %edx
+	mulxl	%esi, %ecx, %eax
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	mulxl	%edi, %ecx, %eax
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	20(%esp), %ebp, %edi            # 4-byte Folded Reload
+	mulxl	32(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	48(%esp), %ebx, %esi            # 4-byte Folded Reload
+	addl	%ecx, %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	%ebp, %eax
+	movl	%eax, %ebp
+	mulxl	28(%esp), %eax, %ecx            # 4-byte Folded Reload
+	adcl	%edi, %eax
+	movl	%eax, %edi
+	mulxl	44(%esp), %edx, %esi            # 4-byte Folded Reload
+	adcl	%ecx, %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	52(%esp), %eax                  # 4-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	100(%esp), %ecx
+	movl	%ebx, 16(%ecx)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 60(%esp)                  # 4-byte Folded Spill
+	adcl	36(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	$0, %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	108(%esp), %eax
+	movl	20(%eax), %edx
+	mulxl	32(%esp), %ecx, %edi            # 4-byte Folded Reload
+	mulxl	48(%esp), %eax, %ebx            # 4-byte Folded Reload
+	addl	%ecx, %ebx
+	mulxl	20(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	adcl	%edi, %ebp
+	movl	%ebp, %edi
+	mulxl	28(%esp), %ebp, %esi            # 4-byte Folded Reload
+	adcl	%ecx, %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	mulxl	44(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	adcl	%esi, %ebp
+	movl	%ebp, %esi
+	mulxl	72(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	64(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, %ecx
+	mulxl	76(%esp), %ebp, %edx            # 4-byte Folded Reload
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	adcl	$0, %edx
+	movl	%edx, %ebp
+	addl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	100(%esp), %edx
+	movl	%eax, 20(%edx)
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 12(%esp)                  # 4-byte Folded Spill
+	adcl	36(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 4(%esp)                   # 4-byte Folded Spill
+	movl	108(%esp), %eax
+	movl	24(%eax), %edx
+	mulxl	32(%esp), %eax, %edi            # 4-byte Folded Reload
+	mulxl	48(%esp), %esi, %ecx            # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	$0, %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	addl	%eax, %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	mulxl	20(%esp), %ecx, %eax            # 4-byte Folded Reload
+	adcl	%edi, %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	mulxl	28(%esp), %edi, %ecx            # 4-byte Folded Reload
+	adcl	%eax, %edi
+	mulxl	44(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	%ecx, %esi
+	mulxl	72(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	mulxl	76(%esp), %edx, %ecx            # 4-byte Folded Reload
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	adcl	$0, %ecx
+	addl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	adcl	40(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	movl	100(%esp), %ebx
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 24(%ebx)
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 28(%ebx)
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 32(%ebx)
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%edi, 36(%ebx)
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%esi, 40(%ebx)
+	movl	%eax, 44(%ebx)
+	adcl	48(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 48(%ebx)
+	adcl	$0, %ecx
+	movl	%ecx, 52(%ebx)
+	addl	$80, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end50:
-	.size	mcl_fp_mulUnitPre4Lbmi2, .Lfunc_end50-mcl_fp_mulUnitPre4Lbmi2
-
-	.globl	mcl_fpDbl_mulPre4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre4Lbmi2,@function
-mcl_fpDbl_mulPre4Lbmi2:                 # @mcl_fpDbl_mulPre4Lbmi2
-# BB#0:
+.Lfunc_end24:
+	.size	mcl_fpDbl_mulPre7Lbmi2, .Lfunc_end24-mcl_fpDbl_mulPre7Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre7Lbmi2          # -- Begin function mcl_fpDbl_sqrPre7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre7Lbmi2,@function
+mcl_fpDbl_sqrPre7Lbmi2:                 # @mcl_fpDbl_sqrPre7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$28, %esp
-	movl	52(%esp), %eax
-	movl	(%eax), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	4(%eax), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx
-	movl	(%ecx), %ebp
-	mulxl	%ebp, %esi, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	movl	%ebx, %ecx
-	mulxl	%ebp, %edx, %ebx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	addl	%esi, %ebx
-	movl	8(%eax), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, %esi
+	subl	$244, %esp
+	movl	268(%esp), %eax
+	movl	(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	4(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	8(%eax), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	12(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	16(%eax), %ebx
+	movl	24(%eax), %edx
+	movl	20(%eax), %ebp
 	mulxl	%ebp, %eax, %edi
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	12(%esi), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	mulxl	%ebp, %ebp, %esi
-	adcl	%edi, %ebp
-	movl	48(%esp), %edx
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	%edi, (%edx)
+	movl	%edi, 84(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	%ebx, %eax, %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	mulxl	%ecx, %ecx, %eax
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	mulxl	%esi, %ecx, %eax
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	mulxl	(%esp), %esi, %edi              # 4-byte Folded Reload
+	movl	%esi, 160(%esp)                 # 4-byte Spill
+	movl	%edi, 164(%esp)                 # 4-byte Spill
+	mulxl	4(%esp), %eax, %ecx             # 4-byte Folded Reload
+	movl	%ecx, 168(%esp)                 # 4-byte Spill
+	movl	%eax, 120(%esp)                 # 4-byte Spill
+	mulxl	%edx, %edx, %eax
+	addl	%esi, %ecx
+	movl	%ecx, 216(%esp)                 # 4-byte Spill
+	adcl	52(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 220(%esp)                 # 4-byte Spill
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 224(%esp)                 # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 228(%esp)                 # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 232(%esp)                 # 4-byte Spill
+	movl	84(%esp), %esi                  # 4-byte Reload
+	adcl	%esi, %edx
+	movl	%edx, 236(%esp)                 # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 240(%esp)                 # 4-byte Spill
+	movl	%ebp, %edx
+	mulxl	%ebx, %eax, %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	16(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	mulxl	28(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	mulxl	(%esp), %eax, %edi              # 4-byte Folded Reload
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	mulxl	4(%esp), %eax, %ecx             # 4-byte Folded Reload
+	movl	%ecx, 156(%esp)                 # 4-byte Spill
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	mulxl	%ebp, %edx, %ebp
+	addl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 204(%esp)                 # 4-byte Spill
+	movl	%edi, %eax
+	adcl	48(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 200(%esp)                 # 4-byte Spill
+	movl	76(%esp), %eax                  # 4-byte Reload
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 196(%esp)                 # 4-byte Spill
+	movl	80(%esp), %eax                  # 4-byte Reload
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 192(%esp)                 # 4-byte Spill
+	movl	72(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, %edx
+	movl	%edx, 212(%esp)                 # 4-byte Spill
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 208(%esp)                 # 4-byte Spill
 	adcl	$0, %esi
-	movl	56(%esp), %edx
-	movl	4(%edx), %edi
-	movl	%ecx, %edx
-	mulxl	%edi, %ecx, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	addl	%ebx, %ecx
-	movl	24(%esp), %edx          # 4-byte Reload
-	mulxl	%edi, %ebx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	%eax, %ebx
-	movl	20(%esp), %edx          # 4-byte Reload
-	mulxl	%edi, %eax, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	%ebp, %eax
-	movl	16(%esp), %edx          # 4-byte Reload
-	mulxl	%edi, %edi, %edx
-	adcl	%esi, %edi
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	addl	8(%esp), %ebx           # 4-byte Folded Reload
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	adcl	(%esp), %edi            # 4-byte Folded Reload
-	adcl	%edx, %ebp
-	movl	48(%esp), %edx
-	movl	%ecx, 4(%edx)
-	movl	56(%esp), %ecx
-	movl	8(%ecx), %ecx
-	movl	12(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %edx, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	addl	%ebx, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	24(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %ebx, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	%eax, %ebx
-	movl	20(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %esi, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	movl	16(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %edi, %eax
-	adcl	%ebp, %edi
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	addl	8(%esp), %ebx           # 4-byte Folded Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	adcl	%eax, %ebp
-	movl	48(%esp), %eax
-	movl	12(%esp), %ecx          # 4-byte Reload
+	movl	%esi, 84(%esp)                  # 4-byte Spill
+	movl	%ebx, %edx
+	mulxl	16(%esp), %ecx, %edx            # 4-byte Folded Reload
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	%ebx, %edx
+	mulxl	28(%esp), %ecx, %edx            # 4-byte Folded Reload
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	movl	%ebx, %edx
+	mulxl	(%esp), %esi, %ebp              # 4-byte Folded Reload
+	movl	%esi, 132(%esp)                 # 4-byte Spill
+	movl	%ebp, 104(%esp)                 # 4-byte Spill
+	mulxl	4(%esp), %edx, %ecx             # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	%edx, 112(%esp)                 # 4-byte Spill
+	movl	%ebx, %edx
+	mulxl	%ebx, %ebx, %edx
+	movl	%ecx, %edi
+	addl	%esi, %edi
+	movl	%edi, 180(%esp)                 # 4-byte Spill
+	adcl	92(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 176(%esp)                 # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 172(%esp)                 # 4-byte Spill
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	%edi, %ebx
+	movl	%ebx, 184(%esp)                 # 4-byte Spill
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 188(%esp)                 # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	adcl	$0, 40(%esp)                    # 4-byte Folded Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	mulxl	28(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	mulxl	(%esp), %esi, %ebx              # 4-byte Folded Reload
+	movl	%esi, 128(%esp)                 # 4-byte Spill
+	movl	%ebx, 96(%esp)                  # 4-byte Spill
+	mulxl	4(%esp), %eax, %ecx             # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	mulxl	%edx, %ebp, %edx
+	addl	%esi, %ecx
+	movl	%ecx, 148(%esp)                 # 4-byte Spill
+	adcl	88(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 144(%esp)                 # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	%esi, %ebp
+	movl	%ebp, 152(%esp)                 # 4-byte Spill
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 80(%esp)                  # 4-byte Folded Spill
+	adcl	$0, 36(%esp)                    # 4-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	mulxl	(%esp), %edi, %eax              # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%edi, 124(%esp)                 # 4-byte Spill
+	movl	%edx, %ebp
+	mulxl	4(%esp), %edx, %ecx             # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%ebp, %edx
+	mulxl	%ebp, %ebx, %edx
+	addl	%edi, %ecx
+	movl	%ecx, 136(%esp)                 # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	%edi, %ebx
+	movl	%ebx, 140(%esp)                 # 4-byte Spill
+	adcl	88(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	adcl	92(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 64(%esp)                  # 4-byte Folded Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 76(%esp)                  # 4-byte Folded Spill
+	adcl	$0, 68(%esp)                    # 4-byte Folded Spill
+	movl	(%esp), %edx                    # 4-byte Reload
+	mulxl	%edx, %esi, %ebx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	mulxl	%eax, %ebp, %ecx
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	%eax, %edx
+	addl	%ecx, %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %esi
+	adcl	124(%esp), %ebx                 # 4-byte Folded Reload
+	movl	%ebx, 52(%esp)                  # 4-byte Spill
+	adcl	128(%esp), %edi                 # 4-byte Folded Reload
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	132(%esp), %eax                 # 4-byte Reload
+	adcl	%eax, 96(%esp)                  # 4-byte Folded Spill
+	movl	100(%esp), %eax                 # 4-byte Reload
+	adcl	%eax, 104(%esp)                 # 4-byte Folded Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	160(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	mulxl	%edx, %eax, %ecx
+	movl	264(%esp), %edi
+	movl	%eax, (%edi)
+	movl	164(%esp), %edi                 # 4-byte Reload
+	adcl	$0, %edi
+	addl	%ebp, %ecx
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	108(%esp), %edx                 # 4-byte Folded Reload
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	112(%esp), %eax                 # 4-byte Folded Reload
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	116(%esp), %ebp                 # 4-byte Folded Reload
+	movl	156(%esp), %ebx                 # 4-byte Reload
+	adcl	120(%esp), %ebx                 # 4-byte Folded Reload
+	movl	168(%esp), %esi                 # 4-byte Reload
+	adcl	$0, %esi
+	addl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	96(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	104(%esp), %ebx                 # 4-byte Folded Reload
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	adcl	$0, %edi
+	addl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%edx, %eax
+	adcl	136(%esp), %eax                 # 4-byte Folded Reload
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	140(%esp), %ecx                 # 4-byte Folded Reload
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	64(%esp), %esi                  # 4-byte Folded Reload
+	adcl	76(%esp), %edi                  # 4-byte Folded Reload
+	movl	68(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	108(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	148(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, %eax
+	adcl	144(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	adcl	152(%esp), %ebx                 # 4-byte Folded Reload
+	adcl	44(%esp), %esi                  # 4-byte Folded Reload
+	adcl	60(%esp), %edi                  # 4-byte Folded Reload
+	adcl	80(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	112(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	180(%esp), %edx                 # 4-byte Folded Reload
+	adcl	176(%esp), %ebx                 # 4-byte Folded Reload
+	adcl	172(%esp), %esi                 # 4-byte Folded Reload
+	adcl	184(%esp), %edi                 # 4-byte Folded Reload
+	adcl	188(%esp), %ebp                 # 4-byte Folded Reload
+	adcl	72(%esp), %ecx                  # 4-byte Folded Reload
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	116(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	204(%esp), %ebx                 # 4-byte Folded Reload
+	adcl	200(%esp), %esi                 # 4-byte Folded Reload
+	adcl	196(%esp), %edi                 # 4-byte Folded Reload
+	adcl	192(%esp), %ebp                 # 4-byte Folded Reload
+	adcl	212(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	208(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	84(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	120(%esp), %ebx                 # 4-byte Folded Reload
+	movl	264(%esp), %eax
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
 	movl	%ecx, 8(%eax)
-	movl	56(%esp), %eax
-	movl	12(%eax), %edx
-	movl	52(%esp), %eax
-	mulxl	(%eax), %ecx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	addl	%ebx, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx
-	mulxl	4(%ebx), %ecx, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	%esi, %ecx
-	mulxl	8(%ebx), %eax, %esi
-	adcl	%edi, %eax
-	mulxl	12(%ebx), %edi, %edx
-	adcl	%ebp, %edi
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	addl	20(%esp), %ecx          # 4-byte Folded Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	adcl	%esi, %edi
-	movl	48(%esp), %esi
-	movl	24(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 12(%esi)
-	movl	%ecx, 16(%esi)
-	movl	%eax, 20(%esi)
-	movl	%edi, 24(%esi)
-	adcl	%edx, %ebp
-	movl	%ebp, 28(%esi)
-	addl	$28, %esp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	adcl	216(%esp), %esi                 # 4-byte Folded Reload
+	movl	%ebx, 24(%eax)
+	adcl	220(%esp), %edi                 # 4-byte Folded Reload
+	movl	%esi, 28(%eax)
+	adcl	224(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%edi, 32(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	228(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ebp, 36(%eax)
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	232(%esp), %esi                 # 4-byte Folded Reload
+	movl	%ecx, 40(%eax)
+	movl	%esi, 44(%eax)
+	adcl	236(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 48(%eax)
+	movl	%eax, %ecx
+	movl	240(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 52(%ecx)
+	addl	$244, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end51:
-	.size	mcl_fpDbl_mulPre4Lbmi2, .Lfunc_end51-mcl_fpDbl_mulPre4Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre4Lbmi2,@function
-mcl_fpDbl_sqrPre4Lbmi2:                 # @mcl_fpDbl_sqrPre4Lbmi2
-# BB#0:
+.Lfunc_end25:
+	.size	mcl_fpDbl_sqrPre7Lbmi2, .Lfunc_end25-mcl_fpDbl_sqrPre7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_mont7Lbmi2               # -- Begin function mcl_fp_mont7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mont7Lbmi2,@function
+mcl_fp_mont7Lbmi2:                      # @mcl_fp_mont7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$36, %esp
-	movl	60(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %edi
-	movl	56(%esp), %ebx
-	movl	%esi, %edx
-	mulxl	%esi, %eax, %ebp
-	movl	%eax, (%ebx)
-	movl	%edi, %edx
-	mulxl	%esi, %edx, %ecx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	addl	%edx, %eax
-	movl	60(%esp), %edx
-	movl	8(%edx), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	mulxl	%esi, %edx, %ebx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	%edx, %ebp
-	movl	60(%esp), %ecx
-	movl	12(%ecx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%esi, %esi, %ecx
-	adcl	%ebx, %esi
-	adcl	$0, %ecx
-	addl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	%edi, %ebx, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%edi, %ebp, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	(%esp), %edx            # 4-byte Reload
-	mulxl	%edi, %edi, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	adcl	%ecx, %ebp
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	addl	16(%esp), %ebx          # 4-byte Folded Reload
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	adcl	%eax, %ebp
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	addl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	%edi, %esi
-	mulxl	%edx, %edi, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	adcl	%ebp, %edi
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebp, %edx
-	adcl	%ecx, %ebp
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%eax)
-	movl	%ebx, 8(%eax)
-	movl	60(%esp), %eax
-	movl	12(%eax), %edx
-	mulxl	(%eax), %ebx, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	addl	%esi, %ebx
-	mulxl	4(%eax), %esi, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	mulxl	8(%eax), %ecx, %edi
-	adcl	%ebp, %ecx
-	mulxl	%edx, %ebp, %edx
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	32(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	adcl	%edi, %ebp
-	movl	56(%esp), %edi
-	movl	%ebx, 12(%edi)
-	movl	%esi, 16(%edi)
-	movl	%ecx, 20(%edi)
-	movl	%ebp, 24(%edi)
-	adcl	%edx, %eax
-	movl	%eax, 28(%edi)
-	addl	$36, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end52:
-	.size	mcl_fpDbl_sqrPre4Lbmi2, .Lfunc_end52-mcl_fpDbl_sqrPre4Lbmi2
-
-	.globl	mcl_fp_mont4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont4Lbmi2,@function
-mcl_fp_mont4Lbmi2:                      # @mcl_fp_mont4Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$64, %esp
-	movl	88(%esp), %eax
-	movl	12(%eax), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx
+	subl	$124, %esp
+	movl	148(%esp), %edi
+	movl	24(%edi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	152(%esp), %ecx
 	movl	(%ecx), %ecx
-	movl	8(%eax), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	(%eax), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	4(%eax), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
+	mulxl	%ecx, %edx, %eax
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%edi), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
 	mulxl	%ecx, %eax, %ebp
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%esi, %edx
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	16(%edi), %edx
+	movl	%edx, 88(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %eax, %esi
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	8(%edi), %edx
+	movl	%edx, 96(%esp)                  # 4-byte Spill
 	mulxl	%ecx, %edx, %eax
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	%ecx, %ebx, %esi
-	movl	%edi, %edx
-	mulxl	%ecx, %edx, %ecx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	addl	%ebx, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebx
-	movl	-4(%ebx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	imull	%eax, %edx
-	movl	(%ebx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	4(%ebx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
+	movl	%edx, 104(%esp)                 # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	4(%edi), %edx
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %edx, %eax
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	(%edi), %edx
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ebx, %edx
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	addl	100(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	104(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%edi), %edx
+	movl	%edx, 108(%esp)                 # 4-byte Spill
+	mulxl	%ecx, %ecx, %eax
+	adcl	64(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	72(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	76(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	$0, 4(%esp)                     # 4-byte Folded Spill
+	movl	156(%esp), %eax
+	movl	-4(%eax), %edx
+	movl	%edx, 84(%esp)                  # 4-byte Spill
+	imull	(%esp), %edx                    # 4-byte Folded Reload
+	movl	4(%eax), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
 	mulxl	%ecx, %esi, %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %eax
-	addl	%esi, %eax
-	movl	%eax, %ebp
-	movl	8(%ebx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	mulxl	%eax, %edi, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	12(%ebx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	%eax, %esi, %ebx
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	adcl	$0, %ebx
-	addl	8(%esp), %ecx           # 4-byte Folded Reload
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	4(%eax), %edx
-	mulxl	40(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	36(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	mulxl	28(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	mulxl	32(%esp), %ecx, %ebp    # 4-byte Folded Reload
-	addl	(%esp), %ebp            # 4-byte Folded Reload
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	24(%esp), %eax          # 4-byte Reload
+	movl	(%eax), %edi
+	movl	%edi, 72(%esp)                  # 4-byte Spill
+	mulxl	%edi, %ebx, %edi
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	addl	%esi, %edi
+	movl	%edi, %ebx
+	movl	8(%eax), %esi
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	mulxl	%esi, %edi, %esi
+	adcl	%ecx, %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	12(%eax), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %edi, %ecx
+	adcl	%esi, %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	16(%eax), %esi
+	movl	%esi, 104(%esp)                 # 4-byte Spill
+	mulxl	%esi, %edi, %esi
+	adcl	%ecx, %edi
+	movl	%edi, %ebp
+	movl	20(%eax), %ecx
+	movl	%ecx, 100(%esp)                 # 4-byte Spill
+	mulxl	%ecx, %edi, %ecx
+	adcl	%esi, %edi
+	movl	24(%eax), %eax
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	mulxl	%eax, %edx, %eax
+	adcl	%ecx, %edx
 	adcl	$0, %eax
-	addl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	%edi, %ebp
-	adcl	%esi, 12(%esp)          # 4-byte Folded Spill
-	adcl	%ebx, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	imull	44(%esp), %edx          # 4-byte Folded Reload
-	mulxl	60(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	56(%esp), %edi, %esi    # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	addl	(%esp), %ecx                    # 4-byte Folded Reload
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 40(%esp)                  # 4-byte Folded Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 32(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	152(%esp), %edx
+	movl	4(%edx), %edx
+	mulxl	48(%esp), %eax, %ebp            # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	60(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 112(%esp)                 # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	88(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 120(%esp)                 # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	mulxl	80(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	mulxl	92(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	addl	%ebx, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	96(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	108(%esp), %edi, %eax           # 4-byte Folded Reload
+	adcl	%esi, %edi
+	adcl	120(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	112(%esp), %eax                 # 4-byte Folded Reload
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebp, %edx
+	adcl	$0, %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	addl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ebp                    # 4-byte Reload
+	adcl	40(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movzbl	52(%esp), %eax                  # 1-byte Folded Reload
+	adcl	%eax, %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	32(%esp)                        # 1-byte Folded Spill
+	movl	84(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	mulxl	76(%esp), %ecx, %ebx            # 4-byte Folded Reload
+	mulxl	72(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
 	addl	%ecx, %esi
-	mulxl	52(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %ebx
-	mulxl	48(%esp), %edi, %eax    # 4-byte Folded Reload
-	adcl	%ecx, %edi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %eax, %edi            # 4-byte Folded Reload
+	adcl	%ebx, %eax
+	mulxl	64(%esp), %esi, %ebx            # 4-byte Folded Reload
+	adcl	%edi, %esi
+	mulxl	104(%esp), %edi, %ecx           # 4-byte Folded Reload
+	adcl	%ebx, %edi
+	mulxl	100(%esp), %ebx, %ebp           # 4-byte Folded Reload
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	adcl	%ecx, %ebx
+	mulxl	116(%esp), %ebp, %edx           # 4-byte Folded Reload
+	adcl	44(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	$0, %edx
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	addl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 44(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movzbl	32(%esp), %eax                  # 1-byte Folded Reload
 	adcl	$0, %eax
-	movl	16(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	movl	4(%esp), %edx           # 4-byte Reload
-	addl	8(%esp), %edx           # 4-byte Folded Reload
-	adcl	%ebp, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx
-	movl	8(%edx), %edx
-	mulxl	40(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	36(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	mulxl	28(%esp), %esi, %eax    # 4-byte Folded Reload
-	mulxl	32(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	addl	%esi, %ecx
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	152(%esp), %eax
+	movl	8(%eax), %edx
+	mulxl	48(%esp), %eax, %ebp            # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	mulxl	60(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	96(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	mulxl	80(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	92(%esp), %ebx, %edi            # 4-byte Folded Reload
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	addl	%ecx, %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	adcl	%esi, %eax
+	movl	%eax, %ebx
+	mulxl	108(%esp), %edi, %eax           # 4-byte Folded Reload
+	adcl	52(%esp), %edi                  # 4-byte Folded Reload
+	mulxl	88(%esp), %esi, %ecx            # 4-byte Folded Reload
+	adcl	%eax, %esi
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ebp, %eax
+	adcl	$0, %eax
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	addl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	%ebp, 24(%esp)                  # 4-byte Folded Spill
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	44(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	adcl	8(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	setb	32(%esp)                        # 1-byte Folded Spill
+	movl	84(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	mulxl	76(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	72(%esp), %ebx, %esi            # 4-byte Folded Reload
+	movl	%ebx, 44(%esp)                  # 4-byte Spill
+	addl	%ecx, %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	mulxl	68(%esp), %esi, %ecx            # 4-byte Folded Reload
+	adcl	%eax, %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %esi, %eax            # 4-byte Folded Reload
+	adcl	%ecx, %esi
+	movl	%esi, %ebp
+	mulxl	104(%esp), %ecx, %esi           # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	mulxl	100(%esp), %ecx, %eax           # 4-byte Folded Reload
+	adcl	%esi, %ecx
 	movl	%ecx, %esi
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, %ecx
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	24(%esp), %edx          # 4-byte Reload
+	mulxl	116(%esp), %ebx, %ecx           # 4-byte Folded Reload
+	adcl	%eax, %ebx
+	adcl	$0, %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 8(%esp)                   # 4-byte Folded Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 40(%esp)                  # 4-byte Folded Spill
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 36(%esp)                  # 4-byte Folded Spill
+	adcl	%edi, %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movzbl	32(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	152(%esp), %eax
+	movl	12(%eax), %edx
+	mulxl	48(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	mulxl	60(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	96(%esp), %ebp, %eax            # 4-byte Folded Reload
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	mulxl	80(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	92(%esp), %esi, %edi            # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	addl	%ecx, %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	%ebp, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	mulxl	108(%esp), %edi, %ebp           # 4-byte Folded Reload
+	adcl	112(%esp), %edi                 # 4-byte Folded Reload
+	mulxl	88(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movl	28(%esp), %edx                  # 4-byte Reload
 	adcl	$0, %edx
-	addl	8(%esp), %ebp           # 4-byte Folded Reload
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	addl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	40(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 16(%esp)                  # 4-byte Folded Spill
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	112(%esp), %edi                 # 4-byte Reload
+	adcl	44(%esp), %edi                  # 4-byte Folded Reload
 	adcl	%ebx, %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	%edi, %ecx
-	movl	%ecx, %edi
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	imull	44(%esp), %edx          # 4-byte Folded Reload
-	mulxl	60(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	56(%esp), %esi, %ebx    # 4-byte Folded Reload
-	movl	%esi, 8(%esp)           # 4-byte Spill
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, %ebp
+	setb	8(%esp)                         # 1-byte Folded Spill
+	movl	84(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	mulxl	76(%esp), %ecx, %esi            # 4-byte Folded Reload
+	mulxl	72(%esp), %eax, %ebx            # 4-byte Folded Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	addl	%ecx, %ebx
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	adcl	%esi, %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %esi, %ebx            # 4-byte Folded Reload
+	adcl	%ecx, %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	mulxl	104(%esp), %eax, %esi           # 4-byte Folded Reload
+	adcl	%ebx, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	mulxl	100(%esp), %eax, %ebx           # 4-byte Folded Reload
+	adcl	%esi, %eax
+	movl	%eax, %esi
+	mulxl	116(%esp), %ecx, %eax           # 4-byte Folded Reload
+	adcl	%ebx, %ecx
+	movl	%ecx, %ebx
+	adcl	$0, %eax
+	movl	%eax, %edx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	addl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	%eax, 40(%esp)                  # 4-byte Folded Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 36(%esp)                  # 4-byte Folded Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 32(%esp)                  # 4-byte Folded Spill
+	adcl	%edi, 20(%esp)                  # 4-byte Folded Spill
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 112(%esp)                 # 4-byte Spill
+	adcl	%ebp, %edx
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movzbl	8(%esp), %eax                   # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	152(%esp), %eax
+	movl	16(%eax), %edx
+	mulxl	48(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	60(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	96(%esp), %edi, %eax            # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	mulxl	80(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	92(%esp), %ebp, %ebx            # 4-byte Folded Reload
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
 	addl	%ecx, %ebx
-	mulxl	52(%esp), %ecx, %esi    # 4-byte Folded Reload
-	movl	%esi, 4(%esp)           # 4-byte Spill
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	%edi, %eax
+	movl	%eax, %ebp
+	mulxl	108(%esp), %edi, %eax           # 4-byte Folded Reload
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	mulxl	88(%esp), %edx, %ecx            # 4-byte Folded Reload
+	adcl	%eax, %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	%esi, %ecx
+	movl	%ecx, %esi
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	28(%esp), %edx                  # 4-byte Folded Reload
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	$0, %eax
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	addl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %ebx                    # 4-byte Reload
+	adcl	36(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	%edi, 16(%esp)                  # 4-byte Folded Spill
+	adcl	112(%esp), %esi                 # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	movl	84(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	mulxl	76(%esp), %esi, %eax            # 4-byte Folded Reload
+	mulxl	72(%esp), %edi, %ecx            # 4-byte Folded Reload
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	addl	%esi, %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %ecx, %esi            # 4-byte Folded Reload
 	adcl	%eax, %ecx
-	mulxl	48(%esp), %eax, %esi    # 4-byte Folded Reload
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	adcl	$0, %esi
-	movl	16(%esp), %edx          # 4-byte Reload
-	andl	$1, %edx
-	addl	%ebp, 8(%esp)           # 4-byte Folded Spill
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %ecx, %eax            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	mulxl	104(%esp), %ecx, %edi           # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, %ebx
+	mulxl	100(%esp), %ecx, %eax           # 4-byte Folded Reload
 	adcl	%edi, %ecx
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	$0, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx
-	movl	12(%edx), %edx
-	mulxl	28(%esp), %ebp, %edi    # 4-byte Folded Reload
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	mulxl	32(%esp), %edi, %ebp    # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	addl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	mulxl	40(%esp), %ebp, %edi    # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	mulxl	36(%esp), %edi, %edx    # 4-byte Folded Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	%ebp, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	movl	32(%esp), %ebp          # 4-byte Reload
-	addl	%ebx, %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	%ecx, 28(%esp)          # 4-byte Folded Spill
-	adcl	%eax, %edi
-	adcl	%esi, 36(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	movl	44(%esp), %edx          # 4-byte Reload
-	imull	%ebp, %edx
-	mulxl	56(%esp), %eax, %esi    # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edx, %ebx
-	mulxl	60(%esp), %ebp, %eax    # 4-byte Folded Reload
-	addl	%esi, %ebp
-	mulxl	52(%esp), %esi, %edx    # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
+	movl	%ecx, %esi
+	mulxl	116(%esp), %ebp, %ecx           # 4-byte Folded Reload
+	adcl	%eax, %ebp
+	adcl	$0, %ecx
+	movl	56(%esp), %eax                  # 4-byte Reload
+	addl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	%eax, 40(%esp)                  # 4-byte Folded Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 36(%esp)                  # 4-byte Folded Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 32(%esp)                  # 4-byte Folded Spill
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 44(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movzbl	20(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	152(%esp), %eax
+	movl	20(%eax), %edx
+	mulxl	48(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	60(%esp), %esi, %eax            # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	96(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	80(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	92(%esp), %edi, %ebx            # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	addl	%ecx, %ebx
+	movl	%ebx, %edi
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	108(%esp), %ebx, %eax           # 4-byte Folded Reload
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	mulxl	88(%esp), %edx, %ecx            # 4-byte Folded Reload
+	adcl	%eax, %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	adcl	%esi, %ecx
+	movl	%ecx, %esi
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	$0, %eax
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	addl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	adcl	56(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	%ebp, %esi
+	movl	%esi, %ebp
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	84(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	mulxl	76(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	72(%esp), %edi, %esi            # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	addl	%ecx, %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %esi, %ecx            # 4-byte Folded Reload
 	adcl	%eax, %esi
-	movl	%ebx, %edx
-	mulxl	48(%esp), %edx, %eax    # 4-byte Folded Reload
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %esi, %eax            # 4-byte Folded Reload
+	adcl	%ecx, %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	mulxl	104(%esp), %esi, %ecx           # 4-byte Folded Reload
+	adcl	%eax, %esi
+	mulxl	100(%esp), %edi, %eax           # 4-byte Folded Reload
+	adcl	%ecx, %edi
+	mulxl	116(%esp), %edx, %ecx           # 4-byte Folded Reload
+	adcl	%eax, %edx
+	adcl	$0, %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	addl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 40(%esp)                  # 4-byte Folded Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	%eax, 36(%esp)                  # 4-byte Folded Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 32(%esp)                  # 4-byte Folded Spill
+	adcl	%ebx, %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	%ebp, %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movzbl	16(%esp), %eax                  # 1-byte Folded Reload
 	adcl	$0, %eax
-	andl	$1, %ecx
-	movl	44(%esp), %ebx          # 4-byte Reload
-	addl	32(%esp), %ebx          # 4-byte Folded Reload
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	152(%esp), %ecx
+	movl	24(%ecx), %edx
+	mulxl	80(%esp), %ebx, %eax            # 4-byte Folded Reload
+	mulxl	92(%esp), %esi, %ecx            # 4-byte Folded Reload
+	movl	%esi, 80(%esp)                  # 4-byte Spill
+	addl	%ebx, %ecx
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %esi, %ecx            # 4-byte Folded Reload
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	mulxl	96(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	adcl	%eax, %ebp
+	mulxl	108(%esp), %ebx, %eax           # 4-byte Folded Reload
+	adcl	%ecx, %ebx
+	movl	%ebx, 96(%esp)                  # 4-byte Spill
+	mulxl	60(%esp), %esi, %ebx            # 4-byte Folded Reload
+	mulxl	88(%esp), %edi, %edx            # 4-byte Folded Reload
+	adcl	%eax, %edi
+	adcl	%esi, %edx
+	movl	%edx, %eax
+	adcl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, %esi
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	movl	80(%esp), %ecx                  # 4-byte Reload
+	addl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 92(%esp)                  # 4-byte Folded Spill
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 108(%esp)                 # 4-byte Spill
+	movl	(%esp), %ebx                    # 4-byte Reload
+	adcl	%ebx, 96(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	setb	88(%esp)                        # 1-byte Folded Spill
+	movl	84(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	mulxl	72(%esp), %ecx, %eax            # 4-byte Folded Reload
+	movl	%ecx, 84(%esp)                  # 4-byte Spill
+	mulxl	76(%esp), %ebx, %esi            # 4-byte Folded Reload
+	addl	%eax, %ebx
+	mulxl	68(%esp), %eax, %edi            # 4-byte Folded Reload
+	adcl	%esi, %eax
+	mulxl	64(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	adcl	%edi, %ebp
+	mulxl	104(%esp), %esi, %edi           # 4-byte Folded Reload
+	adcl	%ecx, %esi
+	movl	%esi, 60(%esp)                  # 4-byte Spill
+	movl	%edx, %ecx
+	mulxl	100(%esp), %esi, %edx           # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
 	adcl	%edi, %esi
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
+	movl	%ecx, %edx
+	mulxl	116(%esp), %edi, %edx           # 4-byte Folded Reload
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	adcl	$0, %edx
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	addl	80(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	92(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	108(%esp), %eax                 # 4-byte Folded Reload
+	adcl	96(%esp), %ebp                  # 4-byte Folded Reload
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %esi                   # 4-byte Folded Reload
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	adcl	48(%esp), %edx                  # 4-byte Folded Reload
+	movzbl	88(%esp), %ecx                  # 1-byte Folded Reload
 	adcl	$0, %ecx
-	movl	%ebp, %edi
-	subl	56(%esp), %edi          # 4-byte Folded Reload
+	movl	%ebx, 88(%esp)                  # 4-byte Spill
+	subl	72(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 72(%esp)                  # 4-byte Spill
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	sbbl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	sbbl	68(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 68(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	sbbl	64(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 64(%esp)                  # 4-byte Spill
 	movl	%esi, %ebx
-	sbbl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	%edx, %ebx
-	sbbl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	sbbl	48(%esp), %ebx          # 4-byte Folded Reload
+	sbbl	104(%esp), %ebx                 # 4-byte Folded Reload
+	movl	%edi, %eax
+	sbbl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	%edx, %ebp
+	sbbl	116(%esp), %edx                 # 4-byte Folded Reload
 	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB53_2
-# BB#1:
-	movl	%edi, %ebp
-.LBB53_2:
-	movl	84(%esp), %edi
-	movl	%ebp, (%edi)
-	testb	%cl, %cl
-	jne	.LBB53_4
-# BB#3:
-	movl	56(%esp), %esi          # 4-byte Reload
-.LBB53_4:
-	movl	%esi, 4(%edi)
-	jne	.LBB53_6
-# BB#5:
-	movl	60(%esp), %edx          # 4-byte Reload
-.LBB53_6:
-	movl	%edx, 8(%edi)
-	jne	.LBB53_8
-# BB#7:
-	movl	%ebx, %eax
-.LBB53_8:
-	movl	%eax, 12(%edi)
-	addl	$64, %esp
+	testb	$1, %cl
+	jne	.LBB26_1
+# %bb.2:
+	movl	144(%esp), %ecx
+	movl	%edx, 24(%ecx)
+	jne	.LBB26_3
+.LBB26_4:
+	movl	%eax, 20(%ecx)
+	movl	76(%esp), %edx                  # 4-byte Reload
+	jne	.LBB26_5
+.LBB26_6:
+	movl	%ebx, 16(%ecx)
+	movl	72(%esp), %eax                  # 4-byte Reload
+	movl	64(%esp), %esi                  # 4-byte Reload
+	jne	.LBB26_7
+.LBB26_8:
+	movl	%esi, 12(%ecx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	jne	.LBB26_9
+.LBB26_10:
+	movl	%esi, 8(%ecx)
+	jne	.LBB26_11
+.LBB26_12:
+	movl	%edx, 4(%ecx)
+	je	.LBB26_14
+.LBB26_13:
+	movl	88(%esp), %eax                  # 4-byte Reload
+.LBB26_14:
+	movl	%eax, (%ecx)
+	addl	$124, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end53:
-	.size	mcl_fp_mont4Lbmi2, .Lfunc_end53-mcl_fp_mont4Lbmi2
-
-	.globl	mcl_fp_montNF4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF4Lbmi2,@function
-mcl_fp_montNF4Lbmi2:                    # @mcl_fp_montNF4Lbmi2
-# BB#0:
+.LBB26_1:
+	movl	%ebp, %edx
+	movl	144(%esp), %ecx
+	movl	%edx, 24(%ecx)
+	je	.LBB26_4
+.LBB26_3:
+	movl	%edi, %eax
+	movl	%eax, 20(%ecx)
+	movl	76(%esp), %edx                  # 4-byte Reload
+	je	.LBB26_6
+.LBB26_5:
+	movl	%esi, %ebx
+	movl	%ebx, 16(%ecx)
+	movl	72(%esp), %eax                  # 4-byte Reload
+	movl	64(%esp), %esi                  # 4-byte Reload
+	je	.LBB26_8
+.LBB26_7:
+	movl	60(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 12(%ecx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	je	.LBB26_10
+.LBB26_9:
+	movl	48(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%ecx)
+	je	.LBB26_12
+.LBB26_11:
+	movl	84(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%ecx)
+	jne	.LBB26_13
+	jmp	.LBB26_14
+.Lfunc_end26:
+	.size	mcl_fp_mont7Lbmi2, .Lfunc_end26-mcl_fp_mont7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montNF7Lbmi2             # -- Begin function mcl_fp_montNF7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF7Lbmi2,@function
+mcl_fp_montNF7Lbmi2:                    # @mcl_fp_montNF7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$52, %esp
-	movl	76(%esp), %esi
-	movl	(%esi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	4(%esi), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx
-	movl	(%ecx), %ecx
-	mulxl	%ecx, %edi, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	mulxl	%ecx, %ebp, %eax
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	addl	%edi, %eax
-	movl	8(%esi), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ebx, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	12(%esi), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	mulxl	%ecx, %esi, %edi
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	adcl	$0, %edi
-	movl	84(%esp), %ecx
-	movl	-4(%ecx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	imull	%ecx, %edx
-	movl	84(%esp), %ecx
-	movl	(%ecx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ecx, %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	addl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	84(%esp), %ecx
-	movl	4(%ecx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ecx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	%eax, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %eax
-	adcl	%ebx, %ecx
-	movl	%ecx, %ebp
-	movl	84(%esp), %ecx
-	movl	12(%ecx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ebx, %edx
-	adcl	%esi, %ebx
-	adcl	$0, %edi
-	movl	8(%esp), %ecx           # 4-byte Reload
-	addl	%ecx, 12(%esp)          # 4-byte Folded Spill
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	%eax, %ebx
-	adcl	%edx, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	80(%esp), %eax
+	subl	$104, %esp
+	movl	128(%esp), %eax
+	movl	(%eax), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
 	movl	4(%eax), %edx
-	mulxl	24(%esp), %esi, %edi    # 4-byte Folded Reload
-	mulxl	28(%esp), %ecx, %eax    # 4-byte Folded Reload
-	addl	%esi, %eax
-	mulxl	16(%esp), %ebp, %esi    # 4-byte Folded Reload
-	movl	%esi, (%esp)            # 4-byte Spill
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	132(%esp), %ecx
+	movl	(%ecx), %ebp
+	mulxl	%ebp, %ecx, %esi
+	movl	%edi, %edx
+	mulxl	%ebp, %edi, %edx
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	addl	%ecx, %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	8(%eax), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	mulxl	%ebp, %ecx, %edi
+	adcl	%esi, %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	12(%eax), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	mulxl	%ebp, %ecx, %ebx
+	adcl	%edi, %ecx
+	movl	%ecx, %edi
+	movl	16(%eax), %edx
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	mulxl	%ebp, %edx, %ecx
+	adcl	%ebx, %edx
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	20(%eax), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	mulxl	%ebp, %edx, %esi
+	adcl	%ecx, %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	24(%eax), %edx
+	movl	%edx, 88(%esp)                  # 4-byte Spill
+	mulxl	%ebp, %ecx, %eax
+	adcl	%esi, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	136(%esp), %ebx
+	movl	-4(%ebx), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%eax, %edx
+	movl	(%ebx), %ecx
+	movl	%ecx, 84(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ecx, %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	addl	%eax, %ecx
+	movl	4(%ebx), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ebp, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	60(%esp), %ebp                  # 4-byte Folded Reload
+	movl	8(%ebx), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	mulxl	%eax, %eax, %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%ebx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	mulxl	%eax, %eax, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	%edi, %eax
+	movl	%eax, %edi
+	movl	16(%ebx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	mulxl	%eax, %esi, %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	80(%esp), %esi                  # 4-byte Folded Reload
+	movl	20(%ebx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	mulxl	%eax, %eax, %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %ecx
+	movl	24(%ebx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	mulxl	%eax, %edx, %eax
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	$0, %edx
+	addl	24(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	%ebp, 16(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	adcl	%eax, %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	132(%esp), %eax
+	movl	4(%eax), %edx
+	mulxl	52(%esp), %ecx, %esi            # 4-byte Folded Reload
+	mulxl	44(%esp), %edi, %eax            # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	addl	%ecx, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	mulxl	40(%esp), %eax, %edi            # 4-byte Folded Reload
+	adcl	%esi, %eax
+	movl	%eax, %ecx
+	mulxl	72(%esp), %esi, %ebx            # 4-byte Folded Reload
+	adcl	%edi, %esi
+	mulxl	92(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	%ebx, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	68(%esp), %eax, %edi            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	%eax, %ebx
+	mulxl	88(%esp), %ebp, %eax            # 4-byte Folded Reload
 	adcl	%edi, %ebp
-	mulxl	20(%esp), %edi, %esi    # 4-byte Folded Reload
-	adcl	(%esp), %edi            # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	adcl	%ebx, %ebp
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	adcl	$0, %esi
-	movl	%ecx, %edx
-	imull	48(%esp), %edx          # 4-byte Folded Reload
-	mulxl	44(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	addl	(%esp), %ebx            # 4-byte Folded Reload
-	mulxl	40(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	%eax, %ebx
-	movl	%ebx, %eax
-	mulxl	36(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	adcl	%ebp, %ebx
-	movl	%ebx, %ebp
-	mulxl	32(%esp), %ebx, %edx    # 4-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	addl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	%ecx, 4(%esp)                   # 4-byte Folded Spill
+	adcl	32(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	adcl	$0, %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	imull	%eax, %edx
+	movl	%eax, %ebx
+	mulxl	84(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	addl	%ebx, %eax
+	mulxl	64(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	%edi, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	60(%esp), %edi, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	mulxl	56(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	%esi, %eax
+	movl	%eax, %esi
+	mulxl	96(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, %ecx
+	mulxl	80(%esp), %eax, %ebx            # 4-byte Folded Reload
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	76(%esp), %eax, %edx            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	$0, %ebx
+	movl	(%esp), %ebp                    # 4-byte Reload
+	addl	%ebp, 24(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	%edx, %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	132(%esp), %eax
+	movl	8(%eax), %edx
+	mulxl	52(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	44(%esp), %esi, %edi            # 4-byte Folded Reload
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	addl	%ecx, %edi
+	mulxl	40(%esp), %ecx, %ebx            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	mulxl	72(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ebx, %eax
+	movl	%eax, %ebx
+	mulxl	92(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	%esi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	68(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	%ebp, %eax
+	movl	%eax, %esi
+	mulxl	88(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	addl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	32(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	$0, %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	84(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	addl	%ebx, %eax
+	mulxl	64(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
 	adcl	%edi, %ebx
+	mulxl	60(%esp), %edi, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	mulxl	56(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %ecx
+	mulxl	96(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	80(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	76(%esp), %eax, %edx            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	16(%esp), %esi                  # 4-byte Reload
 	adcl	$0, %esi
-	addl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	%ecx, %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
+	addl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 24(%esp)                  # 4-byte Folded Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
 	adcl	%edx, %esi
-	movl	80(%esp), %ecx
-	movl	8(%ecx), %edx
-	mulxl	24(%esp), %ecx, %ebx    # 4-byte Folded Reload
-	mulxl	28(%esp), %eax, %ebp    # 4-byte Folded Reload
-	addl	%ecx, %ebp
-	mulxl	16(%esp), %edi, %ecx    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	132(%esp), %eax
+	movl	12(%eax), %edx
+	mulxl	52(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	44(%esp), %edi, %esi            # 4-byte Folded Reload
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	addl	%ecx, %esi
+	mulxl	40(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	adcl	%eax, %ebp
+	mulxl	72(%esp), %eax, %edi            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	92(%esp), %ecx, %ebx            # 4-byte Folded Reload
+	adcl	%edi, %ecx
+	mulxl	68(%esp), %edi, %eax            # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
 	adcl	%ebx, %edi
-	mulxl	20(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	adcl	$0, %ecx
-	addl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)            # 4-byte Spill
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	adcl	%esi, %ebx
-	adcl	$0, %ecx
-	movl	%eax, %edx
-	imull	48(%esp), %edx          # 4-byte Folded Reload
-	mulxl	44(%esp), %eax, %esi    # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	addl	(%esp), %eax            # 4-byte Folded Reload
-	mulxl	40(%esp), %eax, %esi    # 4-byte Folded Reload
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	adcl	%ebp, %eax
-	mulxl	36(%esp), %ebp, %esi    # 4-byte Folded Reload
-	movl	%esi, (%esp)            # 4-byte Spill
+	mulxl	88(%esp), %edx, %eax            # 4-byte Folded Reload
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, %ebx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	addl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	$0, %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	84(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	addl	%ebx, %eax
+	mulxl	64(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	60(%esp), %ebx, %esi            # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	adcl	%ebp, %ebx
+	mulxl	56(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, %esi
+	mulxl	96(%esp), %eax, %ebp            # 4-byte Folded Reload
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	adcl	%ecx, %eax
+	movl	%eax, %ecx
+	mulxl	80(%esp), %ebp, %eax            # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
 	adcl	%edi, %ebp
-	mulxl	32(%esp), %esi, %edx    # 4-byte Folded Reload
-	adcl	%ebx, %esi
-	adcl	$0, %ecx
-	addl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	adcl	%edx, %ecx
-	movl	80(%esp), %eax
-	movl	12(%eax), %edx
-	mulxl	24(%esp), %ebx, %ebp    # 4-byte Folded Reload
-	mulxl	28(%esp), %edi, %eax    # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
+	mulxl	76(%esp), %edi, %eax            # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %eax
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	$0, %edi
+	movl	12(%esp), %edx                  # 4-byte Reload
+	addl	%edx, 4(%esp)                   # 4-byte Folded Spill
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	132(%esp), %eax
+	movl	16(%eax), %edx
+	mulxl	52(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	44(%esp), %esi, %edi            # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	addl	%ecx, %edi
+	mulxl	40(%esp), %ecx, %ebx            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	mulxl	72(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ebx, %eax
+	movl	%eax, %ebx
+	mulxl	92(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	%esi, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	%ebp, %eax
+	movl	%eax, %esi
+	mulxl	88(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	$0, %eax
+	movl	(%esp), %edx                    # 4-byte Reload
+	addl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 24(%esp)                  # 4-byte Folded Spill
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	(%esp), %ebx                    # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	84(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
 	addl	%ebx, %eax
-	mulxl	16(%esp), %edi, %ebx    # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	adcl	%ebp, %edi
-	mulxl	20(%esp), %ebp, %ebx    # 4-byte Folded Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	$0, %ebx
-	movl	28(%esp), %edx          # 4-byte Reload
-	addl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	adcl	%esi, %edi
-	adcl	%ecx, %ebp
-	adcl	$0, %ebx
-	movl	48(%esp), %edx          # 4-byte Reload
-	imull	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	mulxl	44(%esp), %ecx, %esi    # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	addl	28(%esp), %ecx          # 4-byte Folded Reload
-	mulxl	40(%esp), %esi, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	%eax, %esi
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	36(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	%edi, %ecx
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	32(%esp), %eax, %edx    # 4-byte Folded Reload
+	mulxl	64(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	%edi, %ebx
+	mulxl	60(%esp), %edi, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	mulxl	56(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %ecx
+	mulxl	96(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	80(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 100(%esp)                 # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	76(%esp), %eax, %edx            # 4-byte Folded Reload
 	adcl	%ebp, %eax
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	addl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 24(%esp)                  # 4-byte Folded Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	adcl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	%edx, %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	132(%esp), %eax
+	movl	20(%eax), %edx
+	mulxl	52(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	44(%esp), %edi, %esi            # 4-byte Folded Reload
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	addl	%ecx, %esi
+	mulxl	40(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	adcl	%eax, %ebp
+	mulxl	72(%esp), %eax, %edi            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	mulxl	92(%esp), %ecx, %ebx            # 4-byte Folded Reload
+	adcl	%edi, %ecx
+	mulxl	68(%esp), %edi, %eax            # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	%ebx, %edi
+	mulxl	88(%esp), %edx, %eax            # 4-byte Folded Reload
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, %ebx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	addl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
 	adcl	$0, %ebx
-	addl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	adcl	%edx, %ebx
-	movl	%esi, %edi
-	subl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	sbbl	40(%esp), %ebp          # 4-byte Folded Reload
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	84(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	addl	%ebx, %eax
+	mulxl	64(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	60(%esp), %ebx, %esi            # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	adcl	%ebp, %ebx
+	mulxl	56(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, %esi
+	mulxl	96(%esp), %eax, %ebp            # 4-byte Folded Reload
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	adcl	%ecx, %eax
+	movl	%eax, %ecx
+	mulxl	80(%esp), %ebp, %eax            # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	%edi, %ebp
+	mulxl	76(%esp), %edi, %eax            # 4-byte Folded Reload
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %eax
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	$0, %edi
+	movl	12(%esp), %edx                  # 4-byte Reload
+	addl	%edx, 4(%esp)                   # 4-byte Folded Spill
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	100(%esp), %edi                 # 4-byte Folded Reload
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	132(%esp), %eax
+	movl	24(%eax), %edx
+	mulxl	52(%esp), %ecx, %eax            # 4-byte Folded Reload
+	mulxl	44(%esp), %esi, %ebx            # 4-byte Folded Reload
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	addl	%ecx, %ebx
+	mulxl	40(%esp), %ecx, %edi            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	mulxl	72(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%edi, %eax
+	movl	%eax, %edi
+	mulxl	92(%esp), %ecx, %ebp            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	mulxl	68(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	mulxl	88(%esp), %eax, %edx            # 4-byte Folded Reload
+	adcl	%esi, %eax
+	adcl	$0, %edx
+	movl	%edx, %esi
+	movl	44(%esp), %edx                  # 4-byte Reload
+	addl	4(%esp), %edx                   # 4-byte Folded Reload
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	24(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	%ecx, 52(%esp)                  # 4-byte Folded Spill
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	$0, %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %ecx
+	imull	%edx, %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
 	movl	%eax, %edx
-	sbbl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	sbbl	32(%esp), %edx          # 4-byte Folded Reload
-	testl	%edx, %edx
-	js	.LBB54_2
-# BB#1:
-	movl	%edi, %esi
-.LBB54_2:
-	movl	72(%esp), %edi
-	movl	%esi, (%edi)
-	js	.LBB54_4
-# BB#3:
+	mulxl	84(%esp), %edx, %esi            # 4-byte Folded Reload
+	movl	%esi, 72(%esp)                  # 4-byte Spill
+	addl	%ecx, %edx
+	movl	%eax, %edx
+	mulxl	64(%esp), %esi, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	adcl	%ebx, %esi
+	mulxl	60(%esp), %edi, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 88(%esp)                  # 4-byte Spill
+	adcl	%ebp, %edi
+	mulxl	56(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	mulxl	96(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	68(%esp), %ebp                  # 4-byte Folded Reload
+	mulxl	80(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %eax                  # 4-byte Folded Reload
+	movl	48(%esp), %edx                  # 4-byte Reload
+	mulxl	76(%esp), %edx, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %edx                  # 4-byte Folded Reload
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	72(%esp), %esi                  # 4-byte Folded Reload
+	adcl	92(%esp), %edi                  # 4-byte Folded Reload
+	adcl	88(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	adcl	68(%esp), %edx                  # 4-byte Folded Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	subl	84(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	%edi, %ecx
+	movl	44(%esp), %edi                  # 4-byte Reload
+	sbbl	64(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 84(%esp)                  # 4-byte Spill
+	movl	%ebx, 72(%esp)                  # 4-byte Spill
+	movl	%ebx, %ecx
+	movl	%ebp, %ebx
+	sbbl	60(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
 	movl	%ebp, %ecx
-.LBB54_4:
+	sbbl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	sbbl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%edx, %esi
+	sbbl	80(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edi, %ecx
+	sbbl	76(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %ebp
+	sarl	$31, %ebp
+	testl	%ebp, %ebp
+	js	.LBB27_1
+# %bb.2:
+	movl	124(%esp), %edi
+	movl	%ecx, 24(%edi)
+	js	.LBB27_3
+.LBB27_4:
+	movl	%esi, 20(%edi)
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	js	.LBB27_5
+.LBB27_6:
+	movl	%eax, 16(%edi)
+	movl	64(%esp), %edx                  # 4-byte Reload
+	movl	60(%esp), %eax                  # 4-byte Reload
+	js	.LBB27_7
+.LBB27_8:
+	movl	%eax, 12(%edi)
+	movl	48(%esp), %eax                  # 4-byte Reload
+	js	.LBB27_9
+.LBB27_10:
+	movl	%edx, 8(%edi)
+	js	.LBB27_11
+.LBB27_12:
 	movl	%ecx, 4(%edi)
-	js	.LBB54_6
-# BB#5:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB54_6:
-	movl	%eax, 8(%edi)
-	js	.LBB54_8
-# BB#7:
-	movl	%edx, %ebx
-.LBB54_8:
-	movl	%ebx, 12(%edi)
-	addl	$52, %esp
+	jns	.LBB27_14
+.LBB27_13:
+	movl	52(%esp), %eax                  # 4-byte Reload
+.LBB27_14:
+	movl	%eax, (%edi)
+	addl	$104, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end54:
-	.size	mcl_fp_montNF4Lbmi2, .Lfunc_end54-mcl_fp_montNF4Lbmi2
-
-	.globl	mcl_fp_montRed4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed4Lbmi2,@function
-mcl_fp_montRed4Lbmi2:                   # @mcl_fp_montRed4Lbmi2
-# BB#0:
+.LBB27_1:
+	movl	%edi, %ecx
+	movl	124(%esp), %edi
+	movl	%ecx, 24(%edi)
+	jns	.LBB27_4
+.LBB27_3:
+	movl	%edx, %esi
+	movl	%esi, 20(%edi)
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB27_6
+.LBB27_5:
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%edi)
+	movl	64(%esp), %edx                  # 4-byte Reload
+	movl	60(%esp), %eax                  # 4-byte Reload
+	jns	.LBB27_8
+.LBB27_7:
+	movl	%ebx, %eax
+	movl	%eax, 12(%edi)
+	movl	48(%esp), %eax                  # 4-byte Reload
+	jns	.LBB27_10
+.LBB27_9:
+	movl	72(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%edi)
+	jns	.LBB27_12
+.LBB27_11:
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	js	.LBB27_13
+	jmp	.LBB27_14
+.Lfunc_end27:
+	.size	mcl_fp_montNF7Lbmi2, .Lfunc_end27-mcl_fp_montNF7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRed7Lbmi2            # -- Begin function mcl_fp_montRed7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed7Lbmi2,@function
+mcl_fp_montRed7Lbmi2:                   # @mcl_fp_montRed7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$52, %esp
-	movl	80(%esp), %ecx
-	movl	-4(%ecx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	(%ecx), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp
-	movl	(%ebp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
+	subl	$76, %esp
+	movl	104(%esp), %ebp
+	movl	-4(%ebp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	100(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
 	imull	%eax, %edx
-	movl	12(%ecx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	8(%ecx), %ebx
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	4(%ecx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mulxl	%esi, %esi, %ecx
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	mulxl	%ebx, %esi, %ecx
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebx, %ecx
-	mulxl	%edi, %edx, %esi
-	addl	%ebx, %esi
-	movl	%ecx, %edi
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	20(%esp), %ebx          # 4-byte Reload
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	36(%esp), %edx          # 4-byte Folded Reload
-	adcl	4(%ebp), %esi
-	adcl	8(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	12(%ebp), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	adcl	16(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	28(%ebp), %ecx
-	movl	24(%ebp), %edx
-	movl	20(%ebp), %edi
-	adcl	$0, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	%esi, %edx
-	imull	40(%esp), %edx          # 4-byte Folded Reload
-	mulxl	%eax, %ebp, %edi
-	mulxl	44(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	addl	%ebp, %eax
-	mulxl	48(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	adcl	%edi, %ebp
-	movl	28(%esp), %ecx          # 4-byte Reload
-	mulxl	%ecx, %edi, %edx
-	adcl	(%esp), %edi            # 4-byte Folded Reload
-	adcl	$0, %edx
-	addl	%esi, 4(%esp)           # 4-byte Folded Spill
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	$0, 16(%esp)            # 4-byte Folded Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebx
-	movl	%eax, %edx
-	imull	40(%esp), %edx          # 4-byte Folded Reload
-	mulxl	%ecx, %esi, %ecx
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	mulxl	32(%esp), %esi, %ecx    # 4-byte Folded Reload
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	mulxl	44(%esp), %esi, %ecx    # 4-byte Folded Reload
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	addl	(%esp), %ecx            # 4-byte Folded Reload
-	mulxl	48(%esp), %esi, %edx    # 4-byte Folded Reload
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%eax, 8(%esp)           # 4-byte Folded Spill
-	adcl	%ebp, %ecx
+	movl	12(%ebp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ebx, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%ebp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ecx, %esi
+	movl	(%ebp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	mulxl	%eax, %edi, %eax
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	addl	%ecx, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	8(%ebp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %eax
+	adcl	%esi, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	%ebx, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%ebp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	mulxl	%eax, %edi, %ecx
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	20(%ebp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	mulxl	%eax, %esi, %ebx
+	adcl	%ecx, %esi
+	movl	24(%ebp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	mulxl	%eax, %eax, %ebp
+	adcl	%ebx, %eax
+	adcl	$0, %ebp
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	addl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	100(%esp), %ecx
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	4(%ecx), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	8(%ecx), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	12(%ecx), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	16(%ecx), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	adcl	20(%ecx), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	24(%ecx), %eax
+	adcl	28(%ecx), %ebp
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	48(%esp), %ecx, %esi            # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %ebx, %edi            # 4-byte Folded Reload
+	addl	%esi, %ebx
+	mulxl	44(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%edi, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	mulxl	56(%esp), %ecx, %edi            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%edi, %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	mulxl	40(%esp), %ecx, %edi            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	mulxl	64(%esp), %esi, %edx            # 4-byte Folded Reload
 	adcl	%edi, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 20(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebx
-	movl	40(%esp), %edx          # 4-byte Reload
-	imull	%ecx, %edx
-	mulxl	44(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	mulxl	32(%esp), %ebp, %esi    # 4-byte Folded Reload
-	addl	%eax, %ebp
+	movzbl	16(%esp), %edi                  # 1-byte Folded Reload
+	adcl	%edx, %edi
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	8(%esp), %edx                   # 4-byte Folded Reload
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	%edx, 12(%esp)                  # 4-byte Folded Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 24(%esp)                  # 4-byte Folded Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 60(%esp)                  # 4-byte Folded Spill
+	adcl	%eax, %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	%ebp, %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	100(%esp), %eax
+	adcl	32(%eax), %edi
+	movl	%edi, %esi
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	48(%esp), %eax, %ebp            # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %edi, %ecx            # 4-byte Folded Reload
+	addl	%ebp, %edi
+	mulxl	44(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	56(%esp), %eax, %ecx            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	mulxl	68(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	mulxl	40(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	adcl	%ebp, %eax
+	movl	%eax, %ecx
+	mulxl	64(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	72(%esp), %ebp                  # 4-byte Folded Reload
+	movzbl	16(%esp), %edx                  # 1-byte Folded Reload
+	adcl	%eax, %edx
 	movl	%edx, %eax
-	mulxl	48(%esp), %edi, %edx    # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	%esi, %edi
+	addl	%ebx, 20(%esp)                  # 4-byte Folded Spill
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 4(%esp)                   # 4-byte Folded Spill
+	movl	60(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 8(%esp)                   # 4-byte Folded Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 36(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	%esi, %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	100(%esp), %ecx
+	adcl	36(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	72(%esp)                        # 1-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	imull	%edi, %edx
+	mulxl	48(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %ebx, %eax            # 4-byte Folded Reload
+	addl	%esi, %ebx
+	mulxl	44(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	mulxl	56(%esp), %ecx, %eax            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	mulxl	40(%esp), %ecx, %eax            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, %esi
+	mulxl	64(%esp), %ecx, %ebp            # 4-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, %eax
+	movzbl	72(%esp), %ecx                  # 1-byte Folded Reload
+	adcl	%ebp, %ecx
+	addl	%edi, 16(%esp)                  # 4-byte Folded Spill
+	adcl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	%edx, (%esp)                    # 4-byte Folded Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 12(%esp)                  # 4-byte Folded Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 60(%esp)                  # 4-byte Folded Spill
+	adcl	24(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	100(%esp), %eax
+	adcl	40(%eax), %ecx
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	48(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %edi, %eax            # 4-byte Folded Reload
+	addl	%esi, %edi
+	mulxl	44(%esp), %ebp, %esi            # 4-byte Folded Reload
+	adcl	%eax, %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	mulxl	56(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	%esi, %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %ebp, %esi            # 4-byte Folded Reload
+	adcl	%eax, %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	mulxl	40(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	%esi, %ebp
+	mulxl	64(%esp), %esi, %edx            # 4-byte Folded Reload
+	adcl	%eax, %esi
+	movzbl	16(%esp), %eax                  # 1-byte Folded Reload
+	adcl	%edx, %eax
 	movl	%eax, %edx
-	mulxl	28(%esp), %edx, %esi    # 4-byte Folded Reload
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	%ecx, 40(%esp)          # 4-byte Folded Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	adcl	$0, %ebx
-	movl	%ebp, %ecx
-	subl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, %eax
-	sbbl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
+	addl	%ebx, 20(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 8(%esp)                   # 4-byte Folded Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 24(%esp)                  # 4-byte Folded Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 36(%esp)                  # 4-byte Folded Spill
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	adcl	%ecx, %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	100(%esp), %eax
+	adcl	44(%eax), %edx
+	movl	%edx, %esi
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	imull	%edi, %edx
+	mulxl	48(%esp), %ecx, %ebx            # 4-byte Folded Reload
+	mulxl	52(%esp), %ebp, %eax            # 4-byte Folded Reload
+	addl	%ebx, %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	mulxl	44(%esp), %ebp, %ebx            # 4-byte Folded Reload
+	adcl	%eax, %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	mulxl	56(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	%ebx, %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %ebp, %ebx            # 4-byte Folded Reload
+	adcl	%eax, %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	mulxl	40(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	%ebx, %ebp
+	mulxl	64(%esp), %ebx, %edx            # 4-byte Folded Reload
+	adcl	%eax, %ebx
+	movzbl	16(%esp), %eax                  # 1-byte Folded Reload
+	adcl	%edx, %eax
+	movl	%eax, %edx
+	addl	%edi, %ecx
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 12(%esp)                  # 4-byte Folded Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 60(%esp)                  # 4-byte Folded Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 32(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	adcl	%esi, %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	100(%esp), %eax
+	adcl	48(%eax), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	mulxl	48(%esp), %eax, %ecx            # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %ebx, %esi            # 4-byte Folded Reload
+	addl	%ecx, %ebx
+	mulxl	44(%esp), %ecx, %eax            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	mulxl	56(%esp), %esi, %edi            # 4-byte Folded Reload
+	adcl	%eax, %esi
+	mulxl	68(%esp), %ecx, %ebp            # 4-byte Folded Reload
+	adcl	%edi, %ecx
 	movl	%edx, %eax
-	sbbl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	sbbl	28(%esp), %eax          # 4-byte Folded Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB55_2
-# BB#1:
-	movl	%ecx, %ebp
-.LBB55_2:
-	movl	72(%esp), %ecx
-	movl	%ebp, (%ecx)
-	testb	%bl, %bl
-	jne	.LBB55_4
-# BB#3:
-	movl	44(%esp), %edi          # 4-byte Reload
-.LBB55_4:
-	movl	%edi, 4(%ecx)
-	jne	.LBB55_6
-# BB#5:
-	movl	48(%esp), %edx          # 4-byte Reload
-.LBB55_6:
-	movl	%edx, 8(%ecx)
-	jne	.LBB55_8
-# BB#7:
-	movl	%eax, %esi
-.LBB55_8:
-	movl	%esi, 12(%ecx)
-	addl	$52, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end55:
-	.size	mcl_fp_montRed4Lbmi2, .Lfunc_end55-mcl_fp_montRed4Lbmi2
-
-	.globl	mcl_fp_addPre4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre4Lbmi2,@function
-mcl_fp_addPre4Lbmi2:                    # @mcl_fp_addPre4Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	20(%esp), %esi
-	addl	(%esi), %ecx
-	adcl	4(%esi), %edx
-	movl	12(%eax), %edi
-	movl	8(%eax), %eax
-	adcl	8(%esi), %eax
-	movl	12(%esi), %esi
-	movl	16(%esp), %ebx
-	movl	%ecx, (%ebx)
-	movl	%edx, 4(%ebx)
-	movl	%eax, 8(%ebx)
-	adcl	%edi, %esi
-	movl	%esi, 12(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end56:
-	.size	mcl_fp_addPre4Lbmi2, .Lfunc_end56-mcl_fp_addPre4Lbmi2
-
-	.globl	mcl_fp_subPre4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre4Lbmi2,@function
-mcl_fp_subPre4Lbmi2:                    # @mcl_fp_subPre4Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %esi
+	mulxl	40(%esp), %edx, %edi            # 4-byte Folded Reload
+	adcl	%ebp, %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, %edx
+	mulxl	64(%esp), %ebp, %edx            # 4-byte Folded Reload
+	adcl	%edi, %ebp
+	movzbl	16(%esp), %edi                  # 1-byte Folded Reload
+	adcl	%edx, %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	60(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	movl	100(%esp), %eax
+	adcl	52(%eax), %edi
 	xorl	%eax, %eax
-	movl	28(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %esi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edi), %ebx
-	movl	12(%edi), %edi
-	movl	12(%ecx), %ecx
-	movl	20(%esp), %ebp
-	movl	%edx, (%ebp)
-	movl	%esi, 4(%ebp)
-	movl	%ebx, 8(%ebp)
-	sbbl	%edi, %ecx
-	movl	%ecx, 12(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end57:
-	.size	mcl_fp_subPre4Lbmi2, .Lfunc_end57-mcl_fp_subPre4Lbmi2
-
-	.globl	mcl_fp_shr1_4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_4Lbmi2,@function
-mcl_fp_shr1_4Lbmi2:                     # @mcl_fp_shr1_4Lbmi2
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	12(%eax), %ecx
-	movl	8(%eax), %edx
-	movl	(%eax), %esi
-	movl	4(%eax), %eax
-	shrdl	$1, %eax, %esi
-	movl	12(%esp), %edi
-	movl	%esi, (%edi)
-	shrdl	$1, %edx, %eax
-	movl	%eax, 4(%edi)
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 8(%edi)
-	shrl	%ecx
-	movl	%ecx, 12(%edi)
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end58:
-	.size	mcl_fp_shr1_4Lbmi2, .Lfunc_end58-mcl_fp_shr1_4Lbmi2
-
-	.globl	mcl_fp_add4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add4Lbmi2,@function
-mcl_fp_add4Lbmi2:                       # @mcl_fp_add4Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	28(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %ecx
-	movl	24(%esp), %esi
-	addl	(%esi), %eax
-	adcl	4(%esi), %ecx
-	movl	8(%edi), %edx
-	adcl	8(%esi), %edx
-	movl	12(%esi), %esi
-	adcl	12(%edi), %esi
-	movl	20(%esp), %edi
-	movl	%eax, (%edi)
-	movl	%ecx, 4(%edi)
-	movl	%edx, 8(%edi)
-	movl	%esi, 12(%edi)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	32(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %ecx
-	sbbl	8(%ebp), %edx
-	sbbl	12(%ebp), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB59_2
-# BB#1:                                 # %nocarry
-	movl	%eax, (%edi)
-	movl	%ecx, 4(%edi)
-	movl	%edx, 8(%edi)
-	movl	%esi, 12(%edi)
-.LBB59_2:                               # %carry
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	subl	48(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	sbbl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	%esi, %edx
+	movl	(%esp), %esi                    # 4-byte Reload
+	sbbl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	%esi, %ecx
+	sbbl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ebp, %ebx
+	movl	%ebp, %edx
+	movl	%edi, %ebp
+	sbbl	40(%esp), %edx                  # 4-byte Folded Reload
+	sbbl	64(%esp), %edi                  # 4-byte Folded Reload
+	sbbl	%eax, %eax
+	testb	$1, %al
+	jne	.LBB28_1
+# %bb.2:
+	movl	96(%esp), %eax
+	movl	%edi, 24(%eax)
+	jne	.LBB28_3
+.LBB28_4:
+	movl	%edx, 20(%eax)
+	je	.LBB28_6
+.LBB28_5:
+	movl	%esi, %ecx
+.LBB28_6:
+	movl	%ecx, 16(%eax)
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	movl	44(%esp), %edx                  # 4-byte Reload
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB28_7
+# %bb.8:
+	movl	%ecx, 12(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB28_9
+.LBB28_10:
+	movl	%edx, 8(%eax)
+	jne	.LBB28_11
+.LBB28_12:
+	movl	%ebx, 4(%eax)
+	je	.LBB28_14
+.LBB28_13:
+	movl	4(%esp), %ecx                   # 4-byte Reload
+.LBB28_14:
+	movl	%ecx, (%eax)
+	addl	$76, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end59:
-	.size	mcl_fp_add4Lbmi2, .Lfunc_end59-mcl_fp_add4Lbmi2
-
-	.globl	mcl_fp_addNF4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF4Lbmi2,@function
-mcl_fp_addNF4Lbmi2:                     # @mcl_fp_addNF4Lbmi2
-# BB#0:
+.LBB28_1:
+	movl	%ebp, %edi
+	movl	96(%esp), %eax
+	movl	%edi, 24(%eax)
+	je	.LBB28_4
+.LBB28_3:
+	movl	%ebx, %edx
+	movl	%edx, 20(%eax)
+	jne	.LBB28_5
+	jmp	.LBB28_6
+.LBB28_7:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	je	.LBB28_10
+.LBB28_9:
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 8(%eax)
+	je	.LBB28_12
+.LBB28_11:
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 4(%eax)
+	jne	.LBB28_13
+	jmp	.LBB28_14
+.Lfunc_end28:
+	.size	mcl_fp_montRed7Lbmi2, .Lfunc_end28-mcl_fp_montRed7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRedNF7Lbmi2          # -- Begin function mcl_fp_montRedNF7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF7Lbmi2,@function
+mcl_fp_montRedNF7Lbmi2:                 # @mcl_fp_montRedNF7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$8, %esp
-	movl	36(%esp), %edx
-	movl	(%edx), %esi
-	movl	4(%edx), %ecx
-	movl	32(%esp), %edi
-	addl	(%edi), %esi
-	adcl	4(%edi), %ecx
-	movl	12(%edx), %ebp
-	movl	8(%edx), %edx
-	adcl	8(%edi), %edx
-	adcl	12(%edi), %ebp
-	movl	40(%esp), %eax
-	movl	%esi, %ebx
-	subl	(%eax), %ebx
-	movl	%ecx, %edi
-	sbbl	4(%eax), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	movl	%edx, %edi
-	movl	40(%esp), %eax
-	sbbl	8(%eax), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%ebp, %edi
-	movl	40(%esp), %eax
-	sbbl	12(%eax), %edi
+	subl	$76, %esp
+	movl	104(%esp), %ebp
+	movl	-4(%ebp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	100(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	imull	%eax, %edx
+	movl	12(%ebp), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ebx, %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	4(%ebp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	mulxl	%ecx, %ecx, %esi
+	movl	(%ebp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	mulxl	%eax, %edi, %eax
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	addl	%ecx, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	8(%ebp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	mulxl	%eax, %ecx, %eax
+	adcl	%esi, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	%ebx, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%ebp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	mulxl	%eax, %edi, %ecx
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	20(%ebp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	mulxl	%eax, %esi, %ebx
+	adcl	%ecx, %esi
+	movl	24(%ebp), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	mulxl	%eax, %eax, %ebp
+	adcl	%ebx, %eax
+	adcl	$0, %ebp
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	addl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	100(%esp), %ecx
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	4(%ecx), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	8(%ecx), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	12(%ecx), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	16(%ecx), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	adcl	20(%ecx), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	24(%ecx), %eax
+	adcl	28(%ecx), %ebp
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	60(%esp), %ecx, %esi            # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %ebx, %edi            # 4-byte Folded Reload
+	addl	%esi, %ebx
+	mulxl	56(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%edi, %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %ecx, %edi            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%edi, %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %ecx, %edi            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	mulxl	72(%esp), %esi, %edx            # 4-byte Folded Reload
+	adcl	%edi, %esi
+	movzbl	16(%esp), %edi                  # 1-byte Folded Reload
+	adcl	%edx, %edi
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	8(%esp), %edx                   # 4-byte Folded Reload
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	%edx, 32(%esp)                  # 4-byte Folded Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 24(%esp)                  # 4-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 44(%esp)                  # 4-byte Folded Spill
+	adcl	%eax, %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	%ebp, %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	100(%esp), %eax
+	adcl	32(%eax), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	60(%esp), %eax, %ebp            # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %edi, %ecx            # 4-byte Folded Reload
+	addl	%ebp, %edi
+	mulxl	56(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	68(%esp), %eax, %ecx            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	mulxl	52(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %eax, %ecx            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	%eax, %ebp
+	mulxl	72(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, %ecx
+	movzbl	3(%esp), %eax                   # 1-byte Folded Reload
+	adcl	%esi, %eax
+	addl	%ebx, 16(%esp)                  # 4-byte Folded Spill
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 4(%esp)                   # 4-byte Folded Spill
+	movl	44(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 8(%esp)                   # 4-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 40(%esp)                  # 4-byte Folded Spill
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	100(%esp), %ecx
+	adcl	36(%ecx), %eax
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	imull	%edi, %edx
+	mulxl	60(%esp), %ecx, %esi            # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %ebx, %ebp            # 4-byte Folded Reload
+	addl	%esi, %ebx
+	mulxl	56(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%ebp, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	mulxl	68(%esp), %ecx, %ebp            # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %ecx, %esi            # 4-byte Folded Reload
+	adcl	%ebp, %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	adcl	%esi, %ebp
+	movl	%ebp, %esi
+	mulxl	72(%esp), %ebp, %edx            # 4-byte Folded Reload
+	adcl	%ecx, %ebp
+	movl	%ebp, %ecx
+	movzbl	16(%esp), %ebp                  # 1-byte Folded Reload
+	adcl	%edx, %ebp
+	addl	%edi, 20(%esp)                  # 4-byte Folded Spill
+	adcl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	%edx, 12(%esp)                  # 4-byte Folded Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 32(%esp)                  # 4-byte Folded Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	%edx, 44(%esp)                  # 4-byte Folded Spill
+	adcl	24(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	%eax, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	100(%esp), %eax
+	adcl	40(%eax), %ebp
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	imull	%ebx, %edx
+	mulxl	60(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %edi, %ecx            # 4-byte Folded Reload
+	addl	%esi, %edi
+	mulxl	56(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	mulxl	68(%esp), %eax, %ecx            # 4-byte Folded Reload
+	adcl	%esi, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %eax, %esi            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %eax, %ecx            # 4-byte Folded Reload
+	adcl	%esi, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	mulxl	72(%esp), %eax, %edx            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, %ecx
+	movzbl	3(%esp), %esi                   # 1-byte Folded Reload
+	adcl	%edx, %esi
+	addl	%ebx, 16(%esp)                  # 4-byte Folded Spill
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 8(%esp)                   # 4-byte Folded Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 24(%esp)                  # 4-byte Folded Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 40(%esp)                  # 4-byte Folded Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	%eax, 28(%esp)                  # 4-byte Folded Spill
+	adcl	%ebp, %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	100(%esp), %eax
+	adcl	44(%eax), %esi
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	imull	%edi, %edx
+	mulxl	60(%esp), %eax, %ebx            # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %eax, %ecx            # 4-byte Folded Reload
+	addl	%ebx, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	mulxl	56(%esp), %eax, %ebx            # 4-byte Folded Reload
+	adcl	%ecx, %eax
+	movl	%eax, %ecx
+	mulxl	68(%esp), %eax, %ebp            # 4-byte Folded Reload
+	adcl	%ebx, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mulxl	52(%esp), %eax, %ebx            # 4-byte Folded Reload
+	adcl	%ebp, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	mulxl	48(%esp), %ebp, %eax            # 4-byte Folded Reload
+	adcl	%ebx, %ebp
+	mulxl	72(%esp), %ebx, %edx            # 4-byte Folded Reload
+	adcl	%eax, %ebx
+	movzbl	16(%esp), %eax                  # 1-byte Folded Reload
+	adcl	%edx, %eax
+	movl	%eax, %edx
+	addl	%edi, 20(%esp)                  # 4-byte Folded Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 32(%esp)                  # 4-byte Folded Spill
+	adcl	44(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	adcl	%esi, %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	100(%esp), %ecx
+	adcl	48(%ecx), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	imull	%eax, %edx
+	mulxl	60(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	mulxl	64(%esp), %ebx, %ecx            # 4-byte Folded Reload
+	addl	%esi, %ebx
+	mulxl	56(%esp), %edi, %eax            # 4-byte Folded Reload
+	adcl	%ecx, %edi
+	mulxl	68(%esp), %ebp, %ecx            # 4-byte Folded Reload
+	adcl	%eax, %ebp
+	mulxl	52(%esp), %eax, %esi            # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	%ecx, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	mulxl	48(%esp), %esi, %ecx            # 4-byte Folded Reload
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	mulxl	72(%esp), %edx, %eax            # 4-byte Folded Reload
+	adcl	%ecx, %edx
+	movzbl	20(%esp), %ecx                  # 1-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	44(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	adcl	40(%esp), %edx                  # 4-byte Folded Reload
+	movl	100(%esp), %eax
+	adcl	52(%eax), %ecx
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	subl	60(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	sbbl	64(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	sbbl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	sbbl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	sbbl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	sbbl	48(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, %eax
+	sbbl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %edi
+	sarl	$31, %edi
 	testl	%edi, %edi
-	js	.LBB60_2
-# BB#1:
-	movl	%ebx, %esi
-.LBB60_2:
-	movl	28(%esp), %ebx
-	movl	%esi, (%ebx)
-	js	.LBB60_4
-# BB#3:
-	movl	(%esp), %ecx            # 4-byte Reload
-.LBB60_4:
-	movl	%ecx, 4(%ebx)
-	js	.LBB60_6
-# BB#5:
-	movl	4(%esp), %edx           # 4-byte Reload
-.LBB60_6:
-	movl	%edx, 8(%ebx)
-	js	.LBB60_8
-# BB#7:
-	movl	%edi, %ebp
-.LBB60_8:
-	movl	%ebp, 12(%ebx)
-	addl	$8, %esp
+	js	.LBB29_1
+# %bb.2:
+	movl	96(%esp), %ecx
+	movl	%eax, 24(%ecx)
+	js	.LBB29_3
+.LBB29_4:
+	movl	%edx, 20(%ecx)
+	js	.LBB29_5
+.LBB29_6:
+	movl	%esi, 16(%ecx)
+	movl	64(%esp), %edx                  # 4-byte Reload
+	movl	68(%esp), %esi                  # 4-byte Reload
+	js	.LBB29_7
+.LBB29_8:
+	movl	%esi, 12(%ecx)
+	movl	60(%esp), %esi                  # 4-byte Reload
+	js	.LBB29_9
+.LBB29_10:
+	movl	%esi, 8(%ecx)
+	js	.LBB29_11
+.LBB29_12:
+	movl	%edx, 4(%ecx)
+	jns	.LBB29_14
+.LBB29_13:
+	movl	4(%esp), %ebx                   # 4-byte Reload
+.LBB29_14:
+	movl	%ebx, (%ecx)
+	addl	$76, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end60:
-	.size	mcl_fp_addNF4Lbmi2, .Lfunc_end60-mcl_fp_addNF4Lbmi2
-
-	.globl	mcl_fp_sub4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub4Lbmi2,@function
-mcl_fp_sub4Lbmi2:                       # @mcl_fp_sub4Lbmi2
-# BB#0:
+.LBB29_1:
+	movl	%ecx, %eax
+	movl	96(%esp), %ecx
+	movl	%eax, 24(%ecx)
+	jns	.LBB29_4
+.LBB29_3:
+	movl	52(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 20(%ecx)
+	jns	.LBB29_6
+.LBB29_5:
+	movl	56(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 16(%ecx)
+	movl	64(%esp), %edx                  # 4-byte Reload
+	movl	68(%esp), %esi                  # 4-byte Reload
+	jns	.LBB29_8
+.LBB29_7:
+	movl	%ebp, %esi
+	movl	%esi, 12(%ecx)
+	movl	60(%esp), %esi                  # 4-byte Reload
+	jns	.LBB29_10
+.LBB29_9:
+	movl	12(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%ecx)
+	jns	.LBB29_12
+.LBB29_11:
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 4(%ecx)
+	js	.LBB29_13
+	jmp	.LBB29_14
+.Lfunc_end29:
+	.size	mcl_fp_montRedNF7Lbmi2, .Lfunc_end29-mcl_fp_montRedNF7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addPre7Lbmi2             # -- Begin function mcl_fp_addPre7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre7Lbmi2,@function
+mcl_fp_addPre7Lbmi2:                    # @mcl_fp_addPre7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
+	pushl	%eax
+	movl	28(%esp), %eax
+	movl	(%eax), %ecx
+	movl	4(%eax), %edx
+	movl	32(%esp), %esi
+	addl	(%esi), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	4(%esi), %edx
+	movl	24(%eax), %edi
+	movl	20(%eax), %ebx
+	movl	16(%eax), %ebp
+	movl	12(%eax), %ecx
+	movl	8(%eax), %eax
+	adcl	8(%esi), %eax
+	adcl	12(%esi), %ecx
+	adcl	16(%esi), %ebp
+	adcl	20(%esi), %ebx
+	adcl	24(%esi), %edi
 	movl	24(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	28(%esp), %edi
-	subl	(%edi), %eax
-	sbbl	4(%edi), %ecx
-	movl	8(%esi), %edx
-	sbbl	8(%edi), %edx
-	movl	12(%esi), %esi
-	sbbl	12(%edi), %esi
-	movl	20(%esp), %edi
-	movl	%eax, (%edi)
-	movl	%ecx, 4(%edi)
-	movl	%edx, 8(%edi)
-	movl	%esi, 12(%edi)
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	je	.LBB61_2
-# BB#1:                                 # %carry
-	movl	32(%esp), %ebx
-	addl	(%ebx), %eax
-	movl	8(%ebx), %ebp
-	adcl	4(%ebx), %ecx
-	movl	12(%ebx), %ebx
-	movl	%eax, (%edi)
-	movl	%ecx, 4(%edi)
-	adcl	%edx, %ebp
-	movl	%ebp, 8(%edi)
-	adcl	%esi, %ebx
-	movl	%ebx, 12(%edi)
-.LBB61_2:                               # %nocarry
+	movl	%ebx, 20(%esi)
+	movl	%ebp, 16(%esi)
+	movl	%ecx, 12(%esi)
+	movl	%eax, 8(%esi)
+	movl	%edi, 24(%esi)
+	movl	%edx, 4(%esi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, (%esi)
+	setb	%al
+	movzbl	%al, %eax
+	addl	$4, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end61:
-	.size	mcl_fp_sub4Lbmi2, .Lfunc_end61-mcl_fp_sub4Lbmi2
-
-	.globl	mcl_fp_subNF4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF4Lbmi2,@function
-mcl_fp_subNF4Lbmi2:                     # @mcl_fp_subNF4Lbmi2
-# BB#0:
+.Lfunc_end30:
+	.size	mcl_fp_addPre7Lbmi2, .Lfunc_end30-mcl_fp_addPre7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subPre7Lbmi2             # -- Begin function mcl_fp_subPre7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre7Lbmi2,@function
+mcl_fp_subPre7Lbmi2:                    # @mcl_fp_subPre7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
 	subl	$8, %esp
-	movl	32(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ecx
-	movl	36(%esp), %esi
-	subl	(%esi), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	sbbl	4(%esi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	12(%eax), %edi
-	movl	8(%eax), %edx
-	sbbl	8(%esi), %edx
-	sbbl	12(%esi), %edi
-	movl	%edi, %esi
-	sarl	$31, %esi
-	movl	40(%esp), %eax
-	movl	12(%eax), %ebp
-	andl	%esi, %ebp
-	movl	8(%eax), %ecx
-	andl	%esi, %ecx
-	movl	40(%esp), %eax
-	movl	4(%eax), %eax
-	andl	%esi, %eax
-	movl	40(%esp), %ebx
-	andl	(%ebx), %esi
-	addl	(%esp), %esi            # 4-byte Folded Reload
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	28(%esp), %ebx
-	movl	%esi, (%ebx)
-	adcl	%edx, %ecx
-	movl	%eax, 4(%ebx)
-	movl	%ecx, 8(%ebx)
-	adcl	%edi, %ebp
-	movl	%ebp, 12(%ebx)
+	movl	32(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	4(%ecx), %esi
+	xorl	%eax, %eax
+	movl	36(%esp), %edi
+	subl	(%edi), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	sbbl	4(%edi), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	24(%ecx), %ebx
+	movl	20(%ecx), %ebp
+	movl	16(%ecx), %edx
+	movl	12(%ecx), %esi
+	movl	8(%ecx), %ecx
+	sbbl	8(%edi), %ecx
+	sbbl	12(%edi), %esi
+	sbbl	16(%edi), %edx
+	sbbl	20(%edi), %ebp
+	sbbl	24(%edi), %ebx
+	movl	28(%esp), %edi
+	movl	%ebp, 20(%edi)
+	movl	%edx, 16(%edi)
+	movl	%esi, 12(%edi)
+	movl	%ecx, 8(%edi)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	movl	%ebx, 24(%edi)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, (%edi)
+	sbbl	%eax, %eax
+	andl	$1, %eax
 	addl	$8, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end62:
-	.size	mcl_fp_subNF4Lbmi2, .Lfunc_end62-mcl_fp_subNF4Lbmi2
-
-	.globl	mcl_fpDbl_add4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add4Lbmi2,@function
-mcl_fpDbl_add4Lbmi2:                    # @mcl_fpDbl_add4Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
+.Lfunc_end31:
+	.size	mcl_fp_subPre7Lbmi2, .Lfunc_end31-mcl_fp_subPre7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_shr1_7Lbmi2              # -- Begin function mcl_fp_shr1_7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_7Lbmi2,@function
+mcl_fp_shr1_7Lbmi2:                     # @mcl_fp_shr1_7Lbmi2
+# %bb.0:
 	pushl	%esi
-	subl	$12, %esp
-	movl	40(%esp), %eax
-	movl	(%eax), %edi
-	movl	4(%eax), %edx
-	movl	36(%esp), %esi
-	addl	(%esi), %edi
-	adcl	4(%esi), %edx
-	movl	8(%eax), %ebx
-	adcl	8(%esi), %ebx
-	movl	12(%esi), %ebp
-	movl	32(%esp), %ecx
-	movl	%edi, (%ecx)
-	movl	16(%esi), %edi
-	adcl	12(%eax), %ebp
-	adcl	16(%eax), %edi
-	movl	%edx, 4(%ecx)
-	movl	28(%eax), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%ebx, 8(%ecx)
-	movl	24(%eax), %ebx
-	movl	20(%eax), %eax
-	movl	%ebp, 12(%ecx)
-	movl	20(%esi), %edx
-	adcl	%eax, %edx
-	movl	28(%esi), %ecx
-	movl	24(%esi), %ebp
-	adcl	%ebx, %ebp
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
+	movl	12(%esp), %eax
+	movl	24(%eax), %ecx
+	movl	%ecx, %edx
+	shrl	%edx
+	movl	8(%esp), %esi
+	movl	%edx, 24(%esi)
+	movl	20(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 20(%esi)
+	movl	16(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 16(%esi)
+	movl	12(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 12(%esi)
+	movl	8(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 8(%esi)
+	movl	4(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 4(%esi)
+	movl	(%eax), %eax
+	shrdl	$1, %edx, %eax
+	movl	%eax, (%esi)
+	popl	%esi
+	retl
+.Lfunc_end32:
+	.size	mcl_fp_shr1_7Lbmi2, .Lfunc_end32-mcl_fp_shr1_7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_add7Lbmi2                # -- Begin function mcl_fp_add7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_add7Lbmi2,@function
+mcl_fp_add7Lbmi2:                       # @mcl_fp_add7Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$20, %esp
 	movl	44(%esp), %eax
-	movl	%edi, %esi
-	subl	(%eax), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	%edx, %esi
-	sbbl	4(%eax), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
+	movl	(%eax), %ebp
+	movl	4(%eax), %esi
+	movl	48(%esp), %ecx
+	addl	(%ecx), %ebp
+	adcl	4(%ecx), %esi
+	movl	24(%eax), %edi
+	movl	20(%eax), %ebx
+	movl	16(%eax), %edx
+	movl	12(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	8(%eax), %eax
+	movl	48(%esp), %ecx
+	adcl	8(%ecx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	12(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	16(%ecx), %edx
+	adcl	20(%ecx), %ebx
+	adcl	24(%ecx), %edi
+	movl	40(%esp), %ecx
+	movl	%edi, 24(%ecx)
+	movl	%ebx, 20(%ecx)
+	movl	%edx, 16(%ecx)
+	movl	%eax, 12(%ecx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ecx)
+	movl	%esi, 4(%ecx)
+	movl	%ebp, (%ecx)
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	52(%esp), %ecx
+	subl	(%ecx), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	sbbl	4(%ecx), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	sbbl	8(%ecx), %eax
+	sbbl	12(%ecx), %ebp
+	sbbl	16(%ecx), %edx
+	sbbl	20(%ecx), %ebx
+	sbbl	24(%ecx), %edi
+	movzbl	3(%esp), %ecx                   # 1-byte Folded Reload
+	sbbl	$0, %ecx
+	testb	$1, %cl
+	jne	.LBB33_2
+# %bb.1:                                # %nocarry
 	movl	%ebp, %esi
-	sbbl	8(%eax), %esi
-	sbbl	12(%eax), %ecx
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB63_2
-# BB#1:
-	movl	%esi, %ebp
-.LBB63_2:
-	testb	%bl, %bl
-	jne	.LBB63_4
-# BB#3:
-	movl	(%esp), %edi            # 4-byte Reload
-.LBB63_4:
-	movl	32(%esp), %eax
-	movl	%edi, 16(%eax)
-	jne	.LBB63_6
-# BB#5:
-	movl	4(%esp), %edx           # 4-byte Reload
-.LBB63_6:
-	movl	%edx, 20(%eax)
-	movl	%ebp, 24(%eax)
-	movl	8(%esp), %edx           # 4-byte Reload
-	jne	.LBB63_8
-# BB#7:
-	movl	%ecx, %edx
-.LBB63_8:
-	movl	%edx, 28(%eax)
-	addl	$12, %esp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	40(%esp), %ebp
+	movl	%ecx, (%ebp)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 4(%ebp)
+	movl	%eax, 8(%ebp)
+	movl	%esi, 12(%ebp)
+	movl	%edx, 16(%ebp)
+	movl	%ebx, 20(%ebp)
+	movl	%edi, 24(%ebp)
+.LBB33_2:                               # %carry
+	addl	$20, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end63:
-	.size	mcl_fpDbl_add4Lbmi2, .Lfunc_end63-mcl_fpDbl_add4Lbmi2
-
-	.globl	mcl_fpDbl_sub4Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub4Lbmi2,@function
-mcl_fpDbl_sub4Lbmi2:                    # @mcl_fpDbl_sub4Lbmi2
-# BB#0:
+.Lfunc_end33:
+	.size	mcl_fp_add7Lbmi2, .Lfunc_end33-mcl_fp_add7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addNF7Lbmi2              # -- Begin function mcl_fp_addNF7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF7Lbmi2,@function
+mcl_fp_addNF7Lbmi2:                     # @mcl_fp_addNF7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	pushl	%eax
-	movl	28(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	32(%esp), %ebp
-	subl	(%ebp), %edx
-	sbbl	4(%ebp), %esi
-	movl	8(%eax), %ebx
-	sbbl	8(%ebp), %ebx
-	movl	24(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	12(%eax), %edx
-	sbbl	12(%ebp), %edx
-	movl	%esi, 4(%ecx)
-	movl	16(%eax), %edi
-	sbbl	16(%ebp), %edi
-	movl	%ebx, 8(%ecx)
-	movl	20(%ebp), %esi
-	movl	%edx, 12(%ecx)
-	movl	20(%eax), %ebx
-	sbbl	%esi, %ebx
-	movl	24(%ebp), %edx
-	movl	24(%eax), %esi
-	sbbl	%edx, %esi
-	movl	28(%ebp), %edx
-	movl	28(%eax), %eax
-	sbbl	%edx, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	$0, %edx
-	sbbl	$0, %edx
-	andl	$1, %edx
-	movl	36(%esp), %ecx
-	movl	(%ecx), %eax
-	jne	.LBB64_1
-# BB#2:
-	xorl	%ebp, %ebp
-	jmp	.LBB64_3
-.LBB64_1:
+	subl	$40, %esp
+	movl	68(%esp), %ecx
+	movl	(%ecx), %ebx
 	movl	4(%ecx), %ebp
-.LBB64_3:
-	testb	%dl, %dl
-	jne	.LBB64_5
-# BB#4:
-	movl	$0, %eax
-.LBB64_5:
-	jne	.LBB64_6
-# BB#7:
-	movl	$0, %edx
-	jmp	.LBB64_8
-.LBB64_6:
-	movl	12(%ecx), %edx
-.LBB64_8:
-	jne	.LBB64_9
-# BB#10:
-	xorl	%ecx, %ecx
-	jmp	.LBB64_11
-.LBB64_9:
+	movl	64(%esp), %esi
+	addl	(%esi), %ebx
+	adcl	4(%esi), %ebp
+	movl	24(%ecx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%ecx), %edx
+	movl	16(%ecx), %edi
+	movl	12(%ecx), %eax
 	movl	8(%ecx), %ecx
-.LBB64_11:
-	addl	%edi, %eax
-	adcl	%ebx, %ebp
-	movl	24(%esp), %edi
-	movl	%eax, 16(%edi)
-	adcl	%esi, %ecx
-	movl	%ebp, 20(%edi)
-	movl	%ecx, 24(%edi)
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 28(%edi)
-	addl	$4, %esp
+	adcl	8(%esi), %ecx
+	adcl	12(%esi), %eax
+	adcl	16(%esi), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	adcl	20(%esi), %edx
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	24(%esi), %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	72(%esp), %esi
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	subl	(%esi), %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	sbbl	4(%esi), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	8(%esi), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%eax, %ebx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	(%esp), %edi                    # 4-byte Reload
+	sbbl	12(%esi), %ebx
+	movl	%eax, %ebp
+	sbbl	16(%esi), %ebp
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%edx, %ecx
+	sbbl	20(%esi), %ecx
+	movl	%edi, %edx
+	sbbl	24(%esi), %edx
+	movl	%edx, %esi
+	sarl	$31, %esi
+	testl	%esi, %esi
+	js	.LBB34_1
+# %bb.2:
+	movl	60(%esp), %edi
+	movl	%edx, 24(%edi)
+	js	.LBB34_3
+.LBB34_4:
+	movl	%ecx, 20(%edi)
+	movl	28(%esp), %edx                  # 4-byte Reload
+	js	.LBB34_5
+.LBB34_6:
+	movl	%ebp, 16(%edi)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	js	.LBB34_7
+.LBB34_8:
+	movl	%ebx, 12(%edi)
+	js	.LBB34_9
+.LBB34_10:
+	movl	%edx, 8(%edi)
+	js	.LBB34_11
+.LBB34_12:
+	movl	%ecx, 4(%edi)
+	jns	.LBB34_14
+.LBB34_13:
+	movl	20(%esp), %eax                  # 4-byte Reload
+.LBB34_14:
+	movl	%eax, (%edi)
+	addl	$40, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end64:
-	.size	mcl_fpDbl_sub4Lbmi2, .Lfunc_end64-mcl_fpDbl_sub4Lbmi2
-
-	.globl	mcl_fp_mulUnitPre5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre5Lbmi2,@function
-mcl_fp_mulUnitPre5Lbmi2:                # @mcl_fp_mulUnitPre5Lbmi2
-# BB#0:
+.LBB34_1:
+	movl	%edi, %edx
+	movl	60(%esp), %edi
+	movl	%edx, 24(%edi)
+	jns	.LBB34_4
+.LBB34_3:
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 20(%edi)
+	movl	28(%esp), %edx                  # 4-byte Reload
+	jns	.LBB34_6
+.LBB34_5:
+	movl	%eax, %ebp
+	movl	%ebp, 16(%edi)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB34_8
+.LBB34_7:
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 12(%edi)
+	jns	.LBB34_10
+.LBB34_9:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%edi)
+	jns	.LBB34_12
+.LBB34_11:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	js	.LBB34_13
+	jmp	.LBB34_14
+.Lfunc_end34:
+	.size	mcl_fp_addNF7Lbmi2, .Lfunc_end34-mcl_fp_addNF7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_sub7Lbmi2                # -- Begin function mcl_fp_sub7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_sub7Lbmi2,@function
+mcl_fp_sub7Lbmi2:                       # @mcl_fp_sub7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$8, %esp
-	movl	36(%esp), %edx
-	movl	32(%esp), %ecx
-	mulxl	4(%ecx), %esi, %eax
-	mulxl	(%ecx), %edi, %ebx
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	addl	%esi, %ebx
-	mulxl	8(%ecx), %ebp, %esi
-	adcl	%eax, %ebp
-	mulxl	12(%ecx), %eax, %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	adcl	%esi, %eax
-	mulxl	16(%ecx), %ecx, %edx
-	movl	28(%esp), %esi
-	movl	4(%esp), %edi           # 4-byte Reload
-	movl	%edi, (%esi)
-	movl	%ebx, 4(%esi)
-	movl	%ebp, 8(%esi)
-	movl	%eax, 12(%esi)
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	%ecx, 16(%esi)
-	adcl	$0, %edx
-	movl	%edx, 20(%esi)
-	addl	$8, %esp
+	subl	$24, %esp
+	movl	48(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %edi
+	movl	52(%esp), %eax
+	subl	(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	sbbl	4(%eax), %edi
+	movl	24(%edx), %esi
+	movl	20(%edx), %ecx
+	movl	16(%edx), %ebp
+	movl	12(%edx), %ebx
+	movl	8(%edx), %edx
+	sbbl	8(%eax), %edx
+	sbbl	12(%eax), %ebx
+	sbbl	16(%eax), %ebp
+	sbbl	20(%eax), %ecx
+	sbbl	24(%eax), %esi
+	movl	$0, %eax
+	sbbl	%eax, %eax
+	testb	$1, %al
+	movl	44(%esp), %eax
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	%esi, 24(%eax)
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	%ecx, 20(%eax)
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	%ebp, 16(%eax)
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	%ebx, 12(%eax)
+	movl	%edx, 8(%eax)
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	%edi, 4(%eax)
+	movl	4(%esp), %esi                   # 4-byte Reload
+	movl	%esi, (%eax)
+	je	.LBB35_2
+# %bb.1:                                # %carry
+	movl	%esi, %ecx
+	movl	56(%esp), %esi
+	addl	(%esi), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	4(%esi), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	8(%esi), %edx
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	adcl	12(%esi), %ebx
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	16(%esi), %ebp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	20(%esi), %ecx
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	24(%esi), %edi
+	movl	%edi, 24(%eax)
+	movl	%ecx, 20(%eax)
+	movl	%ebp, 16(%eax)
+	movl	%ebx, 12(%eax)
+	movl	%edx, 8(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, (%eax)
+.LBB35_2:                               # %nocarry
+	addl	$24, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end65:
-	.size	mcl_fp_mulUnitPre5Lbmi2, .Lfunc_end65-mcl_fp_mulUnitPre5Lbmi2
-
-	.globl	mcl_fpDbl_mulPre5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre5Lbmi2,@function
-mcl_fpDbl_mulPre5Lbmi2:                 # @mcl_fpDbl_mulPre5Lbmi2
-# BB#0:
+.Lfunc_end35:
+	.size	mcl_fp_sub7Lbmi2, .Lfunc_end35-mcl_fp_sub7Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subNF7Lbmi2              # -- Begin function mcl_fp_subNF7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF7Lbmi2,@function
+mcl_fp_subNF7Lbmi2:                     # @mcl_fp_subNF7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$44, %esp
-	movl	68(%esp), %eax
-	movl	(%eax), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
+	subl	$32, %esp
+	movl	56(%esp), %eax
+	movl	(%eax), %esi
 	movl	4(%eax), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
+	movl	60(%esp), %ecx
+	subl	(%ecx), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	sbbl	4(%ecx), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	24(%eax), %edx
+	movl	20(%eax), %esi
+	movl	16(%eax), %edi
+	movl	12(%eax), %ebx
+	movl	8(%eax), %eax
+	sbbl	8(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	sbbl	12(%ecx), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	sbbl	16(%ecx), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	sbbl	20(%ecx), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	sbbl	24(%ecx), %eax
 	movl	%eax, %ecx
-	movl	72(%esp), %eax
-	movl	(%eax), %ebp
-	mulxl	%ebp, %esi, %edi
-	movl	%ebx, %edx
-	mulxl	%ebp, %edx, %eax
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	addl	%esi, %eax
-	movl	8(%ecx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%ebp, %esi, %ebx
-	adcl	%edi, %esi
-	movl	12(%ecx), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	mulxl	%ebp, %edi, %ecx
-	adcl	%ebx, %edi
-	movl	68(%esp), %edx
-	movl	16(%edx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	mulxl	%ebp, %ebp, %edx
-	adcl	%ecx, %ebp
-	movl	64(%esp), %ecx
-	movl	20(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, (%ecx)
-	adcl	$0, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx
-	movl	4(%ecx), %ebx
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%ebx, %ecx, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	addl	%eax, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%ebx, %ecx, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%ebx, %esi, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	movl	28(%esp), %edx          # 4-byte Reload
-	mulxl	%ebx, %edi, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	%ebp, %edi
-	movl	24(%esp), %edx          # 4-byte Reload
-	mulxl	%ebx, %eax, %edx
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ebx
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	20(%esp), %ecx          # 4-byte Folded Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	adcl	%edx, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
-	movl	36(%esp), %edx          # 4-byte Reload
-	movl	%edx, 4(%eax)
-	movl	68(%esp), %ebx
-	movl	(%ebx), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax
-	movl	8(%eax), %eax
-	mulxl	%eax, %edx, %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	addl	%ecx, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	4(%ebx), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%eax, %edx, %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	%esi, %edx
-	movl	%edx, %ebp
-	movl	8(%ebx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	%edi, %ecx
-	movl	12(%ebx), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	mulxl	%eax, %esi, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	16(%ebx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	mulxl	%eax, %edi, %edx
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	addl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	adcl	(%esp), %edi            # 4-byte Folded Reload
-	adcl	%edx, %ebx
-	movl	64(%esp), %eax
-	movl	20(%esp), %edx          # 4-byte Reload
-	movl	%edx, 8(%eax)
-	movl	72(%esp), %eax
-	movl	12(%eax), %eax
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	addl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebp, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebp
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	28(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	%edi, %edx
-	movl	%edx, %esi
-	movl	24(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edi, %edx
-	adcl	%ebx, %edi
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	adcl	%edx, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
-	movl	40(%esp), %edx          # 4-byte Reload
-	movl	%edx, 12(%eax)
-	movl	72(%esp), %eax
-	movl	16(%eax), %edx
-	movl	68(%esp), %eax
-	mulxl	(%eax), %esi, %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	addl	%ebp, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	mulxl	4(%eax), %ebx, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebx
-	mulxl	8(%eax), %esi, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	mulxl	12(%eax), %ecx, %ebp
-	adcl	%edi, %ecx
-	mulxl	16(%eax), %edi, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	adcl	%ebp, %edi
+	movl	%eax, %edx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	sarl	$31, %ecx
+	movl	%ecx, %eax
+	shldl	$1, %edx, %eax
+	movl	64(%esp), %edx
+	andl	(%edx), %eax
+	movl	24(%edx), %esi
+	andl	%ecx, %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	20(%edx), %ebx
+	andl	%ecx, %ebx
+	movl	16(%edx), %edi
+	andl	%ecx, %edi
+	movl	12(%edx), %esi
+	andl	%ecx, %esi
+	movl	64(%esp), %edx
+	movl	8(%edx), %edx
+	andl	%ecx, %edx
 	movl	64(%esp), %ebp
-	movl	40(%esp), %edx          # 4-byte Reload
-	movl	%edx, 16(%ebp)
+	andl	4(%ebp), %ecx
+	addl	20(%esp), %eax                  # 4-byte Folded Reload
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	52(%esp), %ebp
+	movl	%eax, (%ebp)
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%ecx, 4(%ebp)
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 8(%ebp)
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%esi, 12(%ebp)
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%edi, 16(%ebp)
 	movl	%ebx, 20(%ebp)
-	movl	%esi, 24(%ebp)
-	movl	%ecx, 28(%ebp)
-	movl	%edi, 32(%ebp)
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ebp)
-	addl	$44, %esp
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 24(%ebp)
+	addl	$32, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end66:
-	.size	mcl_fpDbl_mulPre5Lbmi2, .Lfunc_end66-mcl_fpDbl_mulPre5Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre5Lbmi2,@function
-mcl_fpDbl_sqrPre5Lbmi2:                 # @mcl_fpDbl_sqrPre5Lbmi2
-# BB#0:
+.Lfunc_end36:
+	.size	mcl_fp_subNF7Lbmi2, .Lfunc_end36-mcl_fp_subNF7Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_add7Lbmi2             # -- Begin function mcl_fpDbl_add7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add7Lbmi2,@function
+mcl_fpDbl_add7Lbmi2:                    # @mcl_fpDbl_add7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$40, %esp
-	movl	64(%esp), %ecx
-	movl	(%ecx), %edi
-	movl	4(%ecx), %esi
-	movl	%esi, %edx
-	mulxl	%edi, %ebp, %ebx
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	%edi, %edx, %eax
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	addl	%ebp, %eax
-	movl	8(%ecx), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%edi, %ebp, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	%ebx, %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	12(%ecx), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	mulxl	%edi, %ecx, %ebx
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	64(%esp), %edx
-	movl	16(%edx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%edi, %edi, %edx
-	adcl	%ebx, %edi
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	60(%esp), %ebp
-	movl	%ebx, (%ebp)
-	adcl	$0, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	addl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	%esi, %ebx, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%esi, %ebp, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebp
-	movl	20(%esp), %edx          # 4-byte Reload
-	mulxl	%esi, %ecx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	%edi, %ecx
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%esi, %edi, %edx
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	adcl	%edx, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	%edx, 4(%eax)
-	movl	64(%esp), %eax
-	movl	(%eax), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	mulxl	%esi, %edx, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	addl	%ebx, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
+	subl	$44, %esp
+	movl	68(%esp), %eax
+	movl	(%eax), %ecx
 	movl	4(%eax), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	mulxl	%esi, %ebx, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	%esi, %edx
-	mulxl	%esi, %ebp, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	adcl	%ecx, %ebp
-	movl	64(%esp), %ecx
-	movl	12(%ecx), %esi
-	movl	%esi, %edx
-	mulxl	%eax, %eax, %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	adcl	%edi, %eax
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	36(%esp), %ecx, %edx    # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	%edi, %edi
-	andl	$1, %edi
-	addl	16(%esp), %ebx          # 4-byte Folded Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	adcl	%edx, %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	24(%esp), %edx          # 4-byte Reload
-	mulxl	%esi, %edx, %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	addl	%ebx, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	20(%esp), %edx          # 4-byte Reload
-	mulxl	%esi, %edx, %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	%ebp, %edx
-	movl	%edx, %edi
-	movl	60(%esp), %eax
-	movl	28(%esp), %edx          # 4-byte Reload
-	movl	%edx, 8(%eax)
-	movl	64(%esp), %eax
-	movl	8(%eax), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%esi, %ebx, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, %edx
-	mulxl	%esi, %ebp, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebp
-	movl	16(%eax), %ecx
-	movl	%ecx, %edx
-	mulxl	%esi, %esi, %edx
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	adcl	%edx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%edx)
-	movl	%ecx, %edx
-	movl	64(%esp), %eax
-	mulxl	(%eax), %edx, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	addl	%edi, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	movl	64(%esp), %eax
-	mulxl	4(%eax), %edi, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	%ebx, %edi
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %ebx, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	%ecx, %edx
-	mulxl	12(%eax), %ebp, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	%esi, %ebp
-	movl	%ecx, %edx
-	mulxl	%ecx, %edx, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	28(%esp), %edi          # 4-byte Folded Reload
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	12(%esp), %edx          # 4-byte Folded Reload
-	movl	60(%esp), %esi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 16(%esi)
-	movl	%edi, 20(%esi)
-	movl	%ebx, 24(%esi)
-	movl	%ebp, 28(%esi)
-	movl	%edx, 32(%esi)
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esi)
-	addl	$40, %esp
+	movl	72(%esp), %esi
+	addl	(%esi), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	4(%esi), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	52(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%eax), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	44(%eax), %ebx
+	movl	40(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	28(%eax), %ebp
+	movl	24(%eax), %edi
+	movl	20(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	16(%eax), %edx
+	movl	12(%eax), %ecx
+	movl	8(%eax), %eax
+	adcl	8(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	12(%esi), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	16(%esi), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	20(%esi), %ecx
+	adcl	24(%esi), %edi
+	movl	%edi, %edx
+	adcl	28(%esi), %ebp
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	32(%esi), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	36(%esi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	40(%esi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	44(%esi), %ebx
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	48(%esi), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	52(%esi), %ebx
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	movl	64(%esp), %ebx
+	movl	%edx, 24(%ebx)
+	movl	%ecx, 20(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%ebx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebx)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%ebx)
+	setb	24(%esp)                        # 1-byte Folded Spill
+	movl	76(%esp), %edx
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	subl	(%edx), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	sbbl	4(%edx), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	sbbl	8(%edx), %esi
+	movl	12(%esp), %edi                  # 4-byte Reload
+	sbbl	12(%edx), %edi
+	movl	(%esp), %ebp                    # 4-byte Reload
+	sbbl	16(%edx), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	20(%edx), %eax
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	sbbl	24(%edx), %ecx
+	movzbl	24(%esp), %edx                  # 1-byte Folded Reload
+	sbbl	$0, %edx
+	testb	$1, %dl
+	jne	.LBB37_1
+# %bb.2:
+	movl	%ecx, 52(%ebx)
+	jne	.LBB37_3
+.LBB37_4:
+	movl	%eax, 48(%ebx)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB37_5
+.LBB37_6:
+	movl	%ebp, 44(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	jne	.LBB37_7
+.LBB37_8:
+	movl	%edi, 40(%ebx)
+	jne	.LBB37_9
+.LBB37_10:
+	movl	%esi, 36(%ebx)
+	jne	.LBB37_11
+.LBB37_12:
+	movl	%ecx, 32(%ebx)
+	je	.LBB37_14
+.LBB37_13:
+	movl	20(%esp), %eax                  # 4-byte Reload
+.LBB37_14:
+	movl	%eax, 28(%ebx)
+	addl	$44, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end67:
-	.size	mcl_fpDbl_sqrPre5Lbmi2, .Lfunc_end67-mcl_fpDbl_sqrPre5Lbmi2
-
-	.globl	mcl_fp_mont5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont5Lbmi2,@function
-mcl_fp_mont5Lbmi2:                      # @mcl_fp_mont5Lbmi2
-# BB#0:
+.LBB37_1:
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 52(%ebx)
+	je	.LBB37_4
+.LBB37_3:
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 48(%ebx)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	je	.LBB37_6
+.LBB37_5:
+	movl	(%esp), %ebp                    # 4-byte Reload
+	movl	%ebp, 44(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	je	.LBB37_8
+.LBB37_7:
+	movl	12(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 40(%ebx)
+	je	.LBB37_10
+.LBB37_9:
+	movl	16(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 36(%ebx)
+	je	.LBB37_12
+.LBB37_11:
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 32(%ebx)
+	jne	.LBB37_13
+	jmp	.LBB37_14
+.Lfunc_end37:
+	.size	mcl_fpDbl_add7Lbmi2, .Lfunc_end37-mcl_fpDbl_add7Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sub7Lbmi2             # -- Begin function mcl_fpDbl_sub7Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub7Lbmi2,@function
+mcl_fpDbl_sub7Lbmi2:                    # @mcl_fpDbl_sub7Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$84, %esp
-	movl	108(%esp), %eax
-	movl	16(%eax), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx
-	movl	(%ecx), %ecx
-	movl	12(%eax), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	8(%eax), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	(%eax), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	4(%eax), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edx, %eax
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	%ecx, %edx, %esi
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	%ecx, %edx, %edi
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	mulxl	%ecx, %edx, %ebp
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	%ecx, %edx, %ecx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	addl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	adcl	76(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	80(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	116(%esp), %ebp
-	movl	-4(%ebp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	imull	%eax, %edx
-	movl	(%ebp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	4(%ebp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ecx, %ebx
-	mulxl	%eax, %esi, %edi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	addl	%ecx, %edi
-	movl	8(%ebp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	mulxl	%eax, %esi, %ecx
-	adcl	%ebx, %esi
-	movl	12(%ebp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	mulxl	%eax, %eax, %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	adcl	%ecx, %eax
-	movl	%eax, %ecx
-	movl	16(%ebp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebx, %eax
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	adcl	$0, %eax
-	movl	12(%esp), %edx          # 4-byte Reload
-	addl	16(%esp), %edx          # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	andl	$1, %edi
-	movl	112(%esp), %edx
-	movl	4(%edx), %edx
-	mulxl	48(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mulxl	52(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mulxl	40(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	mulxl	44(%esp), %esi, %ebp    # 4-byte Folded Reload
-	addl	%eax, %ebp
-	mulxl	56(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, %ecx
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	24(%esp), %ebp          # 4-byte Reload
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	adcl	%ebx, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	%edi, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	76(%esp), %edi, %esi    # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, %edi
-	mulxl	72(%esp), %esi, %ebx    # 4-byte Folded Reload
-	adcl	%eax, %esi
-	mulxl	68(%esp), %ecx, %ebp    # 4-byte Folded Reload
-	adcl	%ebx, %ecx
-	mulxl	64(%esp), %edx, %eax    # 4-byte Folded Reload
-	adcl	%ebp, %edx
-	movl	%edx, %ebx
-	adcl	$0, %eax
-	movl	28(%esp), %edx          # 4-byte Reload
-	andl	$1, %edx
-	movl	12(%esp), %ebp          # 4-byte Reload
-	addl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %esi
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	112(%esp), %edx
-	movl	8(%edx), %edx
-	mulxl	48(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mulxl	52(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mulxl	40(%esp), %edi, %ebx    # 4-byte Folded Reload
-	mulxl	44(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	addl	%edi, %ecx
-	mulxl	56(%esp), %eax, %edx    # 4-byte Folded Reload
-	adcl	%ebx, %eax
-	movl	%eax, %edi
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, %eax
-	movl	32(%esp), %ebx          # 4-byte Reload
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	movl	24(%esp), %ebp          # 4-byte Reload
-	addl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	%esi, %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	76(%esp), %edi, %esi    # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	mulxl	72(%esp), %ecx, %ebp    # 4-byte Folded Reload
-	adcl	%eax, %ecx
-	mulxl	68(%esp), %eax, %edi    # 4-byte Folded Reload
-	adcl	%ebp, %eax
-	mulxl	64(%esp), %ebx, %ebp    # 4-byte Folded Reload
-	adcl	%edi, %ebx
-	adcl	$0, %ebp
-	movl	28(%esp), %edx          # 4-byte Reload
-	andl	$1, %edx
-	movl	12(%esp), %edi          # 4-byte Reload
-	addl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	adcl	$0, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	112(%esp), %edx
-	movl	12(%edx), %edx
-	mulxl	48(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mulxl	52(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mulxl	40(%esp), %eax, %esi    # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	mulxl	44(%esp), %esi, %edi    # 4-byte Folded Reload
-	addl	%eax, %edi
-	mulxl	56(%esp), %eax, %edx    # 4-byte Folded Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	%ecx, %edx
-	movl	%edx, %ecx
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	%ebx, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	%ebp, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	76(%esp), %edi, %esi    # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, %ebp
-	mulxl	72(%esp), %esi, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %esi
-	movl	%esi, %eax
-	mulxl	68(%esp), %ebx, %edi    # 4-byte Folded Reload
+	subl	$44, %esp
+	movl	68(%esp), %eax
+	movl	(%eax), %edx
+	movl	4(%eax), %edi
+	xorl	%esi, %esi
+	movl	72(%esp), %ecx
+	subl	(%ecx), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	sbbl	4(%ecx), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	52(%eax), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	48(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	44(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	40(%eax), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	36(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	32(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	28(%eax), %edi
+	movl	24(%eax), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	20(%eax), %edx
+	movl	16(%eax), %ebx
+	movl	12(%eax), %ebp
+	movl	8(%eax), %eax
+	sbbl	8(%ecx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	sbbl	12(%ecx), %ebp
+	sbbl	16(%ecx), %ebx
+	sbbl	20(%ecx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	sbbl	24(%ecx), %edx
+	sbbl	28(%ecx), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	32(%ecx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ecx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	44(%ecx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	48(%ecx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	52(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	64(%esp), %ecx
+	movl	%edx, 24(%ecx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%ecx)
+	movl	%ebx, 16(%ecx)
+	movl	%ebp, 12(%ecx)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ecx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, (%ecx)
+	sbbl	%esi, %esi
+	andl	$1, %esi
+	negl	%esi
+	movl	76(%esp), %ecx
+	movl	24(%ecx), %eax
+	andl	%esi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%ecx), %edx
+	andl	%esi, %edx
+	movl	16(%ecx), %ebx
+	andl	%esi, %ebx
+	movl	12(%ecx), %ebp
+	andl	%esi, %ebp
+	movl	8(%ecx), %ecx
+	andl	%esi, %ecx
+	movl	76(%esp), %eax
+	movl	4(%eax), %eax
+	andl	%esi, %eax
+	movl	76(%esp), %edi
+	andl	(%edi), %esi
+	addl	4(%esp), %esi                   # 4-byte Folded Reload
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	64(%esp), %edi
+	movl	%esi, 28(%edi)
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 32(%edi)
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ecx, 36(%edi)
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebp, 40(%edi)
+	adcl	28(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ebx, 44(%edi)
+	movl	%edx, 48(%edi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 52(%edi)
+	addl	$44, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.Lfunc_end38:
+	.size	mcl_fpDbl_sub7Lbmi2, .Lfunc_end38-mcl_fpDbl_sub7Lbmi2
+                                        # -- End function
+	.globl	mulPv256x32bmi2                 # -- Begin function mulPv256x32bmi2
+	.p2align	4, 0x90
+	.type	mulPv256x32bmi2,@function
+mulPv256x32bmi2:                        # @mulPv256x32bmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$16, %esp
+	movl	44(%esp), %edx
+	movl	40(%esp), %esi
+	mulxl	4(%esi), %ecx, %eax
+	mulxl	(%esi), %ebx, %edi
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	addl	%ecx, %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	mulxl	8(%esi), %edi, %ecx
+	adcl	%eax, %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	mulxl	12(%esi), %ebx, %eax
 	adcl	%ecx, %ebx
-	mulxl	64(%esp), %ecx, %esi    # 4-byte Folded Reload
-	adcl	%edi, %ecx
-	adcl	$0, %esi
-	movl	28(%esp), %edx          # 4-byte Reload
-	andl	$1, %edx
-	movl	12(%esp), %edi          # 4-byte Reload
-	addl	8(%esp), %edi           # 4-byte Folded Reload
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	adcl	$0, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	112(%esp), %edx
-	movl	16(%edx), %edx
-	mulxl	40(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	mulxl	44(%esp), %eax, %edi    # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	addl	%ebp, %edi
-	mulxl	48(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	52(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %ebp, %edx    # 4-byte Folded Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ebp
-	movl	48(%esp), %edx          # 4-byte Reload
+	mulxl	16(%esi), %edi, %ebp
+	movl	%esi, %ecx
+	adcl	%eax, %edi
+	mulxl	20(%esi), %esi, %eax
+	adcl	%ebp, %esi
+	mulxl	24(%ecx), %ecx, %ebp
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	adcl	%eax, %ecx
+	movl	36(%esp), %eax
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, (%eax)
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 4(%eax)
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 8(%eax)
+	movl	%ebx, 12(%eax)
+	movl	%edi, 16(%eax)
+	movl	%esi, 20(%eax)
+	movl	%ecx, 24(%eax)
+	movl	40(%esp), %ecx
+	mulxl	28(%ecx), %ecx, %edx
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 28(%eax)
 	adcl	$0, %edx
-	movl	44(%esp), %eax          # 4-byte Reload
-	addl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	%ebx, 52(%esp)          # 4-byte Folded Spill
-	adcl	%ecx, 56(%esp)          # 4-byte Folded Spill
-	adcl	%esi, %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	sbbl	%ebx, %ebx
-	movl	60(%esp), %edx          # 4-byte Reload
-	imull	%eax, %edx
-	mulxl	76(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	mulxl	80(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	addl	%eax, %ebp
-	mulxl	72(%esp), %edi, %eax    # 4-byte Folded Reload
-	adcl	%ecx, %edi
-	movl	%edx, %ecx
-	mulxl	68(%esp), %esi, %edx    # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	%eax, %esi
-	movl	%ecx, %edx
-	mulxl	64(%esp), %edx, %ecx    # 4-byte Folded Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %ecx
-	andl	$1, %ebx
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	44(%esp), %eax          # 4-byte Folded Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	adcl	$0, %ebx
-	subl	76(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	sbbl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, %ebp
-	sbbl	72(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	sbbl	64(%esp), %edx          # 4-byte Folded Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB68_2
-# BB#1:
-	movl	%eax, %edi
-.LBB68_2:
-	testb	%bl, %bl
-	movl	44(%esp), %ebx          # 4-byte Reload
-	jne	.LBB68_4
-# BB#3:
-	movl	76(%esp), %ebx          # 4-byte Reload
-.LBB68_4:
-	movl	104(%esp), %eax
-	movl	%ebx, (%eax)
-	movl	%edi, 4(%eax)
-	jne	.LBB68_6
-# BB#5:
-	movl	%ebp, %esi
-.LBB68_6:
-	movl	%esi, 8(%eax)
-	movl	60(%esp), %esi          # 4-byte Reload
-	jne	.LBB68_8
-# BB#7:
-	movl	80(%esp), %esi          # 4-byte Reload
-.LBB68_8:
+	movl	%edx, 32(%eax)
+	addl	$16, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl	$4
+.Lfunc_end39:
+	.size	mulPv256x32bmi2, .Lfunc_end39-mulPv256x32bmi2
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre8Lbmi2         # -- Begin function mcl_fp_mulUnitPre8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre8Lbmi2,@function
+mcl_fp_mulUnitPre8Lbmi2:                # @mcl_fp_mulUnitPre8Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$60, %esp
+	calll	.L40$pb
+.L40$pb:
+	popl	%ebx
+.Ltmp2:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L40$pb), %ebx
+	subl	$4, %esp
+	movl	92(%esp), %eax
+	movl	88(%esp), %ecx
+	leal	28(%esp), %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	24(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi
+	movl	40(%esp), %edi
+	movl	44(%esp), %ebx
+	movl	48(%esp), %ebp
+	movl	52(%esp), %edx
+	movl	56(%esp), %ecx
+	movl	80(%esp), %eax
+	movl	%ecx, 32(%eax)
+	movl	%edx, 28(%eax)
+	movl	%ebp, 24(%eax)
+	movl	%ebx, 20(%eax)
+	movl	%edi, 16(%eax)
 	movl	%esi, 12(%eax)
-	jne	.LBB68_10
-# BB#9:
-	movl	%edx, %ecx
-.LBB68_10:
-	movl	%ecx, 16(%eax)
-	addl	$84, %esp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%eax)
+	addl	$60, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end68:
-	.size	mcl_fp_mont5Lbmi2, .Lfunc_end68-mcl_fp_mont5Lbmi2
-
-	.globl	mcl_fp_montNF5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF5Lbmi2,@function
-mcl_fp_montNF5Lbmi2:                    # @mcl_fp_montNF5Lbmi2
-# BB#0:
+.Lfunc_end40:
+	.size	mcl_fp_mulUnitPre8Lbmi2, .Lfunc_end40-mcl_fp_mulUnitPre8Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre8Lbmi2          # -- Begin function mcl_fpDbl_mulPre8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre8Lbmi2,@function
+mcl_fpDbl_mulPre8Lbmi2:                 # @mcl_fpDbl_mulPre8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$68, %esp
-	movl	92(%esp), %edi
-	movl	(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	4(%edi), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx
-	movl	(%ecx), %ebx
-	mulxl	%ebx, %ecx, %esi
-	movl	%eax, %edx
-	mulxl	%ebx, %edx, %eax
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	addl	%ecx, %eax
-	movl	%eax, %ecx
-	movl	8(%edi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%ebx, %eax, %ebp
-	adcl	%esi, %eax
-	movl	%eax, %esi
-	movl	12(%edi), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	mulxl	%ebx, %eax, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	%ebp, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	16(%edi), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	mulxl	%ebx, %edx, %eax
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
+	subl	$348, %esp                      # imm = 0x15C
+	calll	.L41$pb
+.L41$pb:
+	popl	%ebx
+.Ltmp3:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L41$pb), %ebx
+	movl	376(%esp), %ecx
+	subl	$4, %esp
+	leal	316(%esp), %eax
+	pushl	(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	344(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	340(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	336(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	332(%esp), %esi
+	movl	328(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	324(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	320(%esp), %edi
+	movl	312(%esp), %eax
+	movl	316(%esp), %ebp
+	movl	368(%esp), %ecx
+	movl	%eax, (%ecx)
+	subl	$4, %esp
+	leal	276(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	4(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	304(%esp), %eax
+	movl	%ebp, %edx
+	addl	272(%esp), %edx
+	adcl	276(%esp), %edi
+	movl	%edi, %ebp
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	280(%esp), %edi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	284(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	288(%esp), %esi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	292(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	296(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	300(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 4(%ecx)
 	adcl	$0, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebx
-	movl	-4(%ebx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	movl	%edi, %edx
-	imull	%eax, %edx
-	movl	(%ebx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebp, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	addl	%edi, %ebp
-	movl	4(%ebx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	mulxl	%eax, %eax, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	adcl	%ecx, %eax
-	movl	%eax, %edi
-	movl	8(%ebx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	mulxl	%eax, %eax, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	%esi, %eax
-	movl	%eax, %esi
-	movl	12(%ebx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %ebp
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	16(%ebx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebx, %edx
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	%ebp, %ebx
-	adcl	%edx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	4(%eax), %edx
-	mulxl	36(%esp), %ecx, %esi    # 4-byte Folded Reload
-	mulxl	40(%esp), %edi, %eax    # 4-byte Folded Reload
-	addl	%ecx, %eax
-	mulxl	32(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	adcl	%esi, %ebp
-	mulxl	28(%esp), %esi, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	mulxl	24(%esp), %edx, %ecx    # 4-byte Folded Reload
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	adcl	$0, %ecx
-	addl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	%ebx, %esi
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	imull	44(%esp), %edx          # 4-byte Folded Reload
-	mulxl	64(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	addl	%edi, %ebx
-	mulxl	60(%esp), %edi, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	%eax, %edi
-	mulxl	56(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	%ebp, %ecx
-	mulxl	52(%esp), %eax, %ebp    # 4-byte Folded Reload
-	adcl	%esi, %eax
-	mulxl	48(%esp), %ebx, %edx    # 4-byte Folded Reload
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	20(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	addl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	adcl	%edx, %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	8(%eax), %edx
-	mulxl	36(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	40(%esp), %ebp, %esi    # 4-byte Folded Reload
-	addl	%ecx, %esi
-	mulxl	32(%esp), %edi, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %edi
-	mulxl	28(%esp), %ebx, %eax    # 4-byte Folded Reload
-	movl	%eax, (%esp)            # 4-byte Spill
-	adcl	%ecx, %ebx
-	mulxl	24(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	adcl	$0, %eax
-	addl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	movl	%eax, %edx
-	imull	44(%esp), %edx          # 4-byte Folded Reload
-	mulxl	64(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	addl	%eax, %ebp
-	mulxl	60(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	%esi, %ebp
-	movl	%ebp, %esi
-	mulxl	56(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	%edi, %ebp
-	movl	%ebp, %eax
-	mulxl	52(%esp), %ebp, %edi    # 4-byte Folded Reload
-	adcl	%ebx, %ebp
-	movl	%ebp, %ebx
-	mulxl	48(%esp), %ebp, %edx    # 4-byte Folded Reload
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	adcl	%edi, %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	%edx, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	12(%eax), %edx
-	mulxl	36(%esp), %ecx, %esi    # 4-byte Folded Reload
-	mulxl	40(%esp), %ebx, %ebp    # 4-byte Folded Reload
-	addl	%ecx, %ebp
-	mulxl	32(%esp), %ecx, %edi    # 4-byte Folded Reload
-	adcl	%esi, %ecx
-	mulxl	28(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, (%esp)            # 4-byte Spill
-	adcl	%edi, %esi
-	mulxl	24(%esp), %edi, %eax    # 4-byte Folded Reload
-	adcl	(%esp), %edi            # 4-byte Folded Reload
-	adcl	$0, %eax
-	addl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	236(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	8(%ecx)
+	movl	380(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	264(%esp), %eax
+	movl	%ebp, %edx
+	addl	232(%esp), %edx
+	adcl	236(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	240(%esp), %edi
+	adcl	244(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	248(%esp), %ebp
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	252(%esp), %esi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	256(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	260(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 8(%ecx)
 	adcl	$0, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	imull	44(%esp), %edx          # 4-byte Folded Reload
-	mulxl	64(%esp), %ebx, %eax    # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	addl	16(%esp), %ebx          # 4-byte Folded Reload
-	mulxl	60(%esp), %ebx, %eax    # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %eax, %ebx    # 4-byte Folded Reload
-	adcl	%ecx, %eax
-	mulxl	52(%esp), %ecx, %ebp    # 4-byte Folded Reload
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	%ecx, %esi
-	mulxl	48(%esp), %ecx, %edx    # 4-byte Folded Reload
-	adcl	%edi, %ecx
-	movl	20(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	movl	12(%esp), %edi          # 4-byte Reload
-	addl	%edi, 16(%esp)          # 4-byte Folded Spill
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	%ebx, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	adcl	%edx, %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	16(%eax), %edx
-	mulxl	36(%esp), %eax, %ebp    # 4-byte Folded Reload
-	mulxl	40(%esp), %edi, %ebx    # 4-byte Folded Reload
-	addl	%eax, %ebx
-	mulxl	32(%esp), %eax, %esi    # 4-byte Folded Reload
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	%ebp, %eax
-	mulxl	28(%esp), %ebp, %esi    # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	mulxl	24(%esp), %edx, %esi    # 4-byte Folded Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebp
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	44(%esp), %edx          # 4-byte Reload
-	imull	%edi, %edx
-	mulxl	64(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	addl	%edi, %ecx
-	mulxl	60(%esp), %edi, %eax    # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	%ebx, %edi
-	movl	%edx, %eax
-	mulxl	56(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	mulxl	52(%esp), %ecx, %edx    # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	%ebp, %ecx
-	movl	%eax, %edx
-	mulxl	48(%esp), %ebp, %edx    # 4-byte Folded Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	44(%esp), %edi          # 4-byte Folded Reload
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%edx, %esi
-	movl	%edi, %eax
-	subl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	sbbl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	sbbl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	sbbl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	sbbl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	sarl	$31, %edx
-	testl	%edx, %edx
-	js	.LBB69_2
-# BB#1:
-	movl	40(%esp), %edi          # 4-byte Reload
-.LBB69_2:
-	movl	88(%esp), %edx
-	movl	%edi, (%edx)
-	js	.LBB69_4
-# BB#3:
-	movl	44(%esp), %ebx          # 4-byte Reload
-.LBB69_4:
-	movl	%ebx, 4(%edx)
-	js	.LBB69_6
-# BB#5:
-	movl	56(%esp), %ecx          # 4-byte Reload
-.LBB69_6:
-	movl	%ecx, 8(%edx)
-	js	.LBB69_8
-# BB#7:
-	movl	60(%esp), %ebp          # 4-byte Reload
-.LBB69_8:
-	movl	%ebp, 12(%edx)
-	js	.LBB69_10
-# BB#9:
-	movl	64(%esp), %esi          # 4-byte Reload
-.LBB69_10:
-	movl	%esi, 16(%edx)
-	addl	$68, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end69:
-	.size	mcl_fp_montNF5Lbmi2, .Lfunc_end69-mcl_fp_montNF5Lbmi2
-
-	.globl	mcl_fp_montRed5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed5Lbmi2,@function
-mcl_fp_montRed5Lbmi2:                   # @mcl_fp_montRed5Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$68, %esp
-	movl	96(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	(%eax), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	imull	%esi, %edx
-	movl	16(%eax), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	12(%eax), %ebx
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	8(%eax), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	4(%eax), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	mulxl	%esi, %esi, %eax
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mulxl	%ebx, %esi, %eax
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	mulxl	%ebp, %ebp, %eax
-	mulxl	%ecx, %esi, %ecx
-	mulxl	%edi, %edx, %ebx
-	addl	%esi, %ebx
-	adcl	%ebp, %ecx
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	addl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	92(%esp), %ebp
-	adcl	4(%ebp), %ebx
-	adcl	8(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	12(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	16(%ebp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	20(%ebp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	36(%ebp), %edx
-	movl	32(%ebp), %esi
-	movl	28(%ebp), %edi
-	movl	24(%ebp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	leal	196(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	224(%esp), %eax
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	192(%esp), %edx
+	adcl	196(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	200(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	204(%esp), %ebp
+	adcl	208(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	212(%esp), %esi
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	216(%esp), %edi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	220(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 12(%ecx)
 	adcl	$0, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, %esi
-	movl	%ebx, %edx
-	imull	48(%esp), %edx          # 4-byte Folded Reload
-	mulxl	60(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %ebp, %eax    # 4-byte Folded Reload
-	mulxl	52(%esp), %edi, %ecx    # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	addl	%ebp, %ecx
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, %ebp
-	mulxl	64(%esp), %eax, %edi    # 4-byte Folded Reload
-	movl	%edi, (%esp)            # 4-byte Spill
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	mulxl	%eax, %edi, %edx
-	adcl	(%esp), %edi            # 4-byte Folded Reload
-	adcl	$0, %edx
-	addl	%ebx, 4(%esp)           # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 20(%esp)          # 4-byte Folded Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%ecx, %edx
-	imull	48(%esp), %edx          # 4-byte Folded Reload
-	mulxl	%eax, %edi, %eax
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %edi, %eax    # 4-byte Folded Reload
-	movl	%eax, (%esp)            # 4-byte Spill
-	mulxl	52(%esp), %eax, %ebp    # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	addl	%edi, %ebp
-	mulxl	60(%esp), %ebx, %eax    # 4-byte Folded Reload
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	mulxl	64(%esp), %edi, %edx    # 4-byte Folded Reload
-	adcl	%eax, %edi
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, %eax
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%ecx, 8(%esp)           # 4-byte Folded Spill
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	imull	48(%esp), %edx          # 4-byte Folded Reload
-	mulxl	40(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	52(%esp), %eax, %esi    # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	mulxl	60(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	mulxl	64(%esp), %edx, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %edx
-	movl	%edx, %eax
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	28(%esp), %edx          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	156(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	184(%esp), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	addl	152(%esp), %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	160(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	164(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	168(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	172(%esp), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	176(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	180(%esp), %edi
+	movl	368(%esp), %eax
+	movl	%ecx, 16(%eax)
 	adcl	$0, %edx
-	addl	%ebp, 12(%esp)          # 4-byte Folded Spill
-	adcl	%ebx, %esi
-	adcl	%edi, 24(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	16(%esp), %ebx          # 4-byte Reload
-	adcl	$0, %ebx
-	movl	48(%esp), %edx          # 4-byte Reload
-	imull	%esi, %edx
-	mulxl	52(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %edi, %eax    # 4-byte Folded Reload
-	addl	%ecx, %edi
-	movl	%edx, %ebp
-	mulxl	60(%esp), %ecx, %edx    # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	%eax, %ecx
-	movl	%ebp, %edx
-	mulxl	64(%esp), %eax, %edx    # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, %edx
-	mulxl	40(%esp), %ebp, %edx    # 4-byte Folded Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	20(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	144(%esp), %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	addl	112(%esp), %eax
+	movl	%ebp, %esi
+	adcl	116(%esp), %esi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	120(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	124(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	128(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	132(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	136(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	140(%esp), %edi
+	movl	368(%esp), %ecx
+	movl	%eax, 20(%ecx)
 	adcl	$0, %edx
-	addl	%esi, 48(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, %esi
-	adcl	$0, %esi
-	movl	%edi, %ebx
-	subl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %ebx
-	sbbl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	sbbl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ebx
-	sbbl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 64(%esp)          # 4-byte Spill
 	movl	%edx, %ebp
-	sbbl	40(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	$0, %esi
-	andl	$1, %esi
-	jne	.LBB70_2
-# BB#1:
-	movl	56(%esp), %ecx          # 4-byte Reload
-.LBB70_2:
+	subl	$4, %esp
+	leal	76(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	24(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
 	movl	%esi, %eax
-	testb	%al, %al
-	jne	.LBB70_4
-# BB#3:
-	movl	52(%esp), %edi          # 4-byte Reload
-.LBB70_4:
-	movl	88(%esp), %esi
-	movl	%edi, (%esi)
-	movl	%ecx, 4(%esi)
-	movl	48(%esp), %eax          # 4-byte Reload
-	movl	36(%esp), %ecx          # 4-byte Reload
-	jne	.LBB70_6
-# BB#5:
-	movl	%ebx, %ecx
-.LBB70_6:
-	movl	%ecx, 8(%esi)
-	jne	.LBB70_8
-# BB#7:
-	movl	64(%esp), %eax          # 4-byte Reload
-.LBB70_8:
-	movl	%eax, 12(%esi)
-	jne	.LBB70_10
-# BB#9:
-	movl	%ebp, %edx
-.LBB70_10:
-	movl	%edx, 16(%esi)
-	addl	$68, %esp
+	addl	72(%esp), %eax
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	76(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	80(%esp), %esi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	84(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	88(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	92(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	96(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	100(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	104(%esp), %edx
+	movl	368(%esp), %ecx
+	movl	%eax, 24(%ecx)
+	adcl	$0, %edx
+	movl	%edx, %edi
+	subl	$4, %esp
+	leal	36(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	28(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	addl	32(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	40(%esp), %ebp
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	44(%esp), %ebx
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	48(%esp), %esi
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	52(%esp), %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esp), %eax
+	adcl	60(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	64(%esp), %edi
+	movl	368(%esp), %ecx
+	movl	%eax, 52(%ecx)
+	movl	%edx, 48(%ecx)
+	movl	%esi, 44(%ecx)
+	movl	%ebx, 40(%ecx)
+	movl	%ebp, 36(%ecx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 56(%ecx)
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 28(%ecx)
+	adcl	$0, %edi
+	movl	%edi, 60(%ecx)
+	addl	$348, %esp                      # imm = 0x15C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end70:
-	.size	mcl_fp_montRed5Lbmi2, .Lfunc_end70-mcl_fp_montRed5Lbmi2
-
-	.globl	mcl_fp_addPre5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre5Lbmi2,@function
-mcl_fp_addPre5Lbmi2:                    # @mcl_fp_addPre5Lbmi2
-# BB#0:
+.Lfunc_end41:
+	.size	mcl_fpDbl_mulPre8Lbmi2, .Lfunc_end41-mcl_fpDbl_mulPre8Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre8Lbmi2          # -- Begin function mcl_fpDbl_sqrPre8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre8Lbmi2,@function
+mcl_fpDbl_sqrPre8Lbmi2:                 # @mcl_fpDbl_sqrPre8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	28(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	24(%esp), %esi
-	addl	(%esi), %ecx
-	adcl	4(%esi), %edx
-	movl	8(%eax), %edi
-	adcl	8(%esi), %edi
-	movl	12(%esi), %ebx
-	movl	16(%esi), %esi
-	adcl	12(%eax), %ebx
-	movl	16(%eax), %eax
-	movl	20(%esp), %ebp
-	movl	%ecx, (%ebp)
-	movl	%edx, 4(%ebp)
-	movl	%edi, 8(%ebp)
-	movl	%ebx, 12(%ebp)
-	adcl	%esi, %eax
-	movl	%eax, 16(%ebp)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
+	subl	$348, %esp                      # imm = 0x15C
+	calll	.L42$pb
+.L42$pb:
 	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end71:
-	.size	mcl_fp_addPre5Lbmi2, .Lfunc_end71-mcl_fp_addPre5Lbmi2
-
-	.globl	mcl_fp_subPre5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre5Lbmi2,@function
-mcl_fp_subPre5Lbmi2:                    # @mcl_fp_subPre5Lbmi2
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	16(%esp), %ecx
-	movl	(%ecx), %edx
-	xorl	%eax, %eax
-	movl	20(%esp), %esi
-	subl	(%esi), %edx
-	movl	12(%esp), %edi
-	movl	%edx, (%edi)
-	movl	4(%ecx), %edx
-	sbbl	4(%esi), %edx
-	movl	%edx, 4(%edi)
-	movl	8(%ecx), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, 8(%edi)
-	movl	12(%ecx), %edx
-	sbbl	12(%esi), %edx
-	movl	%edx, 12(%edi)
-	movl	16(%esi), %edx
-	movl	16(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 16(%edi)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end72:
-	.size	mcl_fp_subPre5Lbmi2, .Lfunc_end72-mcl_fp_subPre5Lbmi2
-
-	.globl	mcl_fp_shr1_5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_5Lbmi2,@function
-mcl_fp_shr1_5Lbmi2:                     # @mcl_fp_shr1_5Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	20(%esp), %eax
-	movl	16(%eax), %ecx
-	movl	12(%eax), %edx
-	movl	8(%eax), %esi
-	movl	(%eax), %edi
-	movl	4(%eax), %eax
-	shrdl	$1, %eax, %edi
-	movl	16(%esp), %ebx
-	movl	%edi, (%ebx)
-	shrdl	$1, %esi, %eax
-	movl	%eax, 4(%ebx)
-	shrdl	$1, %edx, %esi
-	movl	%esi, 8(%ebx)
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%ebx)
-	shrl	%ecx
-	movl	%ecx, 16(%ebx)
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end73:
-	.size	mcl_fp_shr1_5Lbmi2, .Lfunc_end73-mcl_fp_shr1_5Lbmi2
-
-	.globl	mcl_fp_add5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add5Lbmi2,@function
-mcl_fp_add5Lbmi2:                       # @mcl_fp_add5Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	28(%esp), %ebx
-	movl	(%ebx), %eax
-	movl	4(%ebx), %ecx
-	movl	24(%esp), %edi
-	addl	(%edi), %eax
-	adcl	4(%edi), %ecx
-	movl	8(%ebx), %edx
-	adcl	8(%edi), %edx
-	movl	12(%edi), %esi
-	movl	16(%edi), %edi
-	adcl	12(%ebx), %esi
-	adcl	16(%ebx), %edi
-	movl	20(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	%ecx, 4(%ebx)
-	movl	%edx, 8(%ebx)
-	movl	%esi, 12(%ebx)
-	movl	%edi, 16(%ebx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	32(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %ecx
-	sbbl	8(%ebp), %edx
-	sbbl	12(%ebp), %esi
-	sbbl	16(%ebp), %edi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB74_2
-# BB#1:                                 # %nocarry
-	movl	20(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	%ecx, 4(%ebx)
-	movl	%edx, 8(%ebx)
-	movl	%esi, 12(%ebx)
-	movl	%edi, 16(%ebx)
-.LBB74_2:                               # %carry
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end74:
-	.size	mcl_fp_add5Lbmi2, .Lfunc_end74-mcl_fp_add5Lbmi2
-
-	.globl	mcl_fp_addNF5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF5Lbmi2,@function
-mcl_fp_addNF5Lbmi2:                     # @mcl_fp_addNF5Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$20, %esp
-	movl	48(%esp), %esi
-	movl	(%esi), %ebx
-	movl	4(%esi), %eax
-	movl	44(%esp), %edi
-	addl	(%edi), %ebx
-	adcl	4(%edi), %eax
-	movl	16(%esi), %ecx
-	movl	12(%esi), %edx
-	movl	8(%esi), %ebp
-	adcl	8(%edi), %ebp
-	adcl	12(%edi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	16(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi
-	movl	%ebx, %esi
-	subl	(%edi), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	%eax, %esi
-	sbbl	4(%edi), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
+.Ltmp4:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.L42$pb), %ebx
+	movl	372(%esp), %ecx
+	subl	$4, %esp
+	leal	316(%esp), %eax
+	pushl	(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	344(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	340(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	336(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	332(%esp), %ebp
+	movl	328(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	324(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	320(%esp), %edi
+	movl	312(%esp), %eax
+	movl	316(%esp), %esi
+	movl	368(%esp), %ecx
+	movl	%eax, (%ecx)
+	subl	$4, %esp
+	leal	276(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	4(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	304(%esp), %eax
+	movl	%esi, %edx
+	addl	272(%esp), %edx
+	adcl	276(%esp), %edi
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	280(%esp), %esi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	284(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	288(%esp), %ebp
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	292(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	296(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	300(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 4(%ecx)
+	adcl	$0, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	236(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	264(%esp), %eax
+	movl	%edi, %edx
+	addl	232(%esp), %edx
+	adcl	236(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	240(%esp), %esi
+	adcl	244(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	248(%esp), %ebp
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	252(%esp), %edi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	256(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	260(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 8(%ecx)
+	adcl	$0, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	leal	196(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	224(%esp), %eax
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	192(%esp), %edx
+	adcl	196(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	200(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	204(%esp), %ebp
+	adcl	208(%esp), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	212(%esp), %esi
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	216(%esp), %edi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	220(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 12(%ecx)
+	adcl	$0, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	156(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	184(%esp), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	addl	152(%esp), %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	160(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	164(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	168(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	172(%esp), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	176(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	180(%esp), %edi
+	movl	368(%esp), %eax
+	movl	%ecx, 16(%eax)
+	adcl	$0, %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	20(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	144(%esp), %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	addl	112(%esp), %eax
 	movl	%ebp, %esi
-	sbbl	8(%edi), %esi
-	sbbl	12(%edi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %edx
-	sbbl	16(%edi), %edx
+	adcl	116(%esp), %esi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	120(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	124(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	128(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	132(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	136(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	140(%esp), %edi
+	movl	368(%esp), %ecx
+	movl	%eax, 20(%ecx)
+	adcl	$0, %edx
+	movl	%edx, %ebp
+	subl	$4, %esp
+	leal	76(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	24(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	72(%esp), %esi
+	movl	%esi, %eax
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	76(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	80(%esp), %esi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	84(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	88(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	92(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	96(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	100(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	104(%esp), %edx
+	movl	368(%esp), %ecx
+	movl	%eax, 24(%ecx)
+	adcl	$0, %edx
 	movl	%edx, %edi
-	sarl	$31, %edi
-	testl	%edi, %edi
-	js	.LBB75_2
-# BB#1:
-	movl	(%esp), %ebx            # 4-byte Reload
-.LBB75_2:
-	movl	40(%esp), %edi
-	movl	%ebx, (%edi)
-	js	.LBB75_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB75_4:
-	movl	%eax, 4(%edi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	js	.LBB75_6
-# BB#5:
-	movl	%esi, %ebp
-.LBB75_6:
-	movl	%ebp, 8(%edi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	js	.LBB75_8
-# BB#7:
-	movl	8(%esp), %ecx           # 4-byte Reload
-.LBB75_8:
-	movl	%ecx, 12(%edi)
-	js	.LBB75_10
-# BB#9:
-	movl	%edx, %eax
-.LBB75_10:
-	movl	%eax, 16(%edi)
-	addl	$20, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end75:
-	.size	mcl_fp_addNF5Lbmi2, .Lfunc_end75-mcl_fp_addNF5Lbmi2
-
-	.globl	mcl_fp_sub5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub5Lbmi2,@function
-mcl_fp_sub5Lbmi2:                       # @mcl_fp_sub5Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %ecx
-	xorl	%ebx, %ebx
-	movl	28(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %ecx
-	movl	8(%edi), %edx
-	sbbl	8(%ebp), %edx
-	movl	12(%edi), %esi
-	sbbl	12(%ebp), %esi
-	movl	16(%edi), %edi
-	sbbl	16(%ebp), %edi
-	movl	20(%esp), %ebp
-	movl	%eax, (%ebp)
-	movl	%ecx, 4(%ebp)
-	movl	%edx, 8(%ebp)
-	movl	%esi, 12(%ebp)
-	movl	%edi, 16(%ebp)
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	je	.LBB76_2
-# BB#1:                                 # %carry
-	movl	32(%esp), %ebx
-	addl	(%ebx), %eax
-	movl	%eax, (%ebp)
-	adcl	4(%ebx), %ecx
-	movl	%ecx, 4(%ebp)
-	adcl	8(%ebx), %edx
-	movl	%edx, 8(%ebp)
-	movl	12(%ebx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 12(%ebp)
-	movl	16(%ebx), %eax
-	adcl	%edi, %eax
-	movl	%eax, 16(%ebp)
-.LBB76_2:                               # %nocarry
+	subl	$4, %esp
+	leal	36(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	28(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	addl	32(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	40(%esp), %ebp
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	44(%esp), %ebx
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	48(%esp), %esi
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	52(%esp), %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esp), %eax
+	adcl	60(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	64(%esp), %edi
+	movl	368(%esp), %ecx
+	movl	%eax, 52(%ecx)
+	movl	%edx, 48(%ecx)
+	movl	%esi, 44(%ecx)
+	movl	%ebx, 40(%ecx)
+	movl	%ebp, 36(%ecx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 56(%ecx)
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 28(%ecx)
+	adcl	$0, %edi
+	movl	%edi, 60(%ecx)
+	addl	$348, %esp                      # imm = 0x15C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end76:
-	.size	mcl_fp_sub5Lbmi2, .Lfunc_end76-mcl_fp_sub5Lbmi2
-
-	.globl	mcl_fp_subNF5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF5Lbmi2,@function
-mcl_fp_subNF5Lbmi2:                     # @mcl_fp_subNF5Lbmi2
-# BB#0:
+.Lfunc_end42:
+	.size	mcl_fpDbl_sqrPre8Lbmi2, .Lfunc_end42-mcl_fpDbl_sqrPre8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_mont8Lbmi2               # -- Begin function mcl_fp_mont8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mont8Lbmi2,@function
+mcl_fp_mont8Lbmi2:                      # @mcl_fp_mont8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$16, %esp
-	movl	40(%esp), %edi
-	movl	(%edi), %ecx
-	movl	4(%edi), %eax
-	movl	44(%esp), %ebx
-	subl	(%ebx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	sbbl	4(%ebx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	16(%edi), %esi
-	movl	12(%edi), %eax
-	movl	8(%edi), %ecx
-	sbbl	8(%ebx), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	sbbl	12(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	16(%ebx), %esi
-	movl	%esi, %ebx
-	sarl	$31, %ebx
-	movl	%ebx, %ebp
-	shldl	$1, %esi, %ebp
-	movl	48(%esp), %edi
-	movl	4(%edi), %ecx
-	andl	%ebp, %ecx
-	andl	(%edi), %ebp
-	movl	16(%edi), %edx
-	andl	%ebx, %edx
-	rorxl	$31, %ebx, %eax
-	andl	12(%edi), %ebx
-	andl	8(%edi), %eax
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	36(%esp), %edi
-	movl	%ebp, (%edi)
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%ecx, 4(%edi)
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	%eax, 8(%edi)
-	movl	%ebx, 12(%edi)
-	adcl	%esi, %edx
-	movl	%edx, 16(%edi)
-	addl	$16, %esp
-	popl	%esi
-	popl	%edi
+	subl	$684, %esp                      # imm = 0x2AC
+	calll	.L43$pb
+.L43$pb:
 	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end77:
-	.size	mcl_fp_subNF5Lbmi2, .Lfunc_end77-mcl_fp_subNF5Lbmi2
-
-	.globl	mcl_fpDbl_add5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add5Lbmi2,@function
-mcl_fpDbl_add5Lbmi2:                    # @mcl_fpDbl_add5Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$28, %esp
-	movl	56(%esp), %edx
-	movl	52(%esp), %ecx
-	movl	12(%ecx), %ebx
-	movl	16(%ecx), %ebp
-	movl	8(%edx), %esi
-	movl	(%edx), %edi
-	addl	(%ecx), %edi
-	movl	48(%esp), %eax
-	movl	%edi, (%eax)
-	movl	4(%edx), %edi
-	adcl	4(%ecx), %edi
-	adcl	8(%ecx), %esi
-	adcl	12(%edx), %ebx
-	adcl	16(%edx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	%edi, 4(%eax)
-	movl	28(%edx), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	%esi, 8(%eax)
-	movl	20(%edx), %esi
-	movl	%ebx, 12(%eax)
-	movl	20(%ecx), %ebp
-	adcl	%esi, %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	24(%edx), %esi
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%eax)
-	movl	24(%ecx), %ebx
-	adcl	%esi, %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	28(%ecx), %edi
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	movl	32(%ecx), %esi
-	adcl	%eax, %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	36(%edx), %eax
-	movl	36(%ecx), %edx
-	adcl	%eax, %edx
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%ebp, %ecx
-	movl	60(%esp), %ebp
-	subl	(%ebp), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	sbbl	4(%ebp), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	sbbl	8(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	%esi, %ebx
+.Ltmp5:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.L43$pb), %ebx
+	movl	716(%esp), %eax
+	movl	-4(%eax), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	712(%esp), %ecx
+	subl	$4, %esp
+	leal	652(%esp), %eax
+	pushl	(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	648(%esp), %edi
+	movl	652(%esp), %esi
+	movl	%ebp, %eax
+	imull	%edi, %eax
+	movl	680(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	676(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	672(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	668(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	664(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	660(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	656(%esp), %ebp
+	subl	$4, %esp
+	leal	612(%esp), %ecx
+	pushl	%eax
+	pushl	724(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	608(%esp), %edi
+	adcl	612(%esp), %esi
+	adcl	616(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	620(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	624(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	628(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	632(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	636(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	640(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	572(%esp), %ecx
+	movzbl	%al, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	716(%esp), %eax
+	pushl	4(%eax)
+	pushl	716(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	568(%esp), %esi
+	adcl	572(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	576(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	580(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	584(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	588(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	592(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	596(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	600(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	532(%esp), %ecx
+	movl	48(%esp), %edx                  # 4-byte Reload
+	imull	%esi, %edx
+	movzbl	%al, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	724(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	528(%esp), %esi
+	adcl	532(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	540(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	544(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	556(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	560(%esp), %esi
+	adcl	$0, 24(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	492(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	488(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	492(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	496(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	500(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	504(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	508(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	516(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	520(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	452(%esp), %ecx
+	movl	48(%esp), %edx                  # 4-byte Reload
+	imull	%ebp, %edx
+	movzbl	%al, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	724(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	448(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	452(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	456(%esp), %edi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	464(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	468(%esp), %ebp
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	472(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	476(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	480(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	$0, 32(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	412(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	addl	408(%esp), %eax
+	adcl	412(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	416(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	420(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	424(%esp), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	adcl	428(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	432(%esp), %esi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	436(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	440(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	setb	%cl
+	subl	$4, %esp
+	leal	372(%esp), %ebp
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	%eax, %edi
+	imull	%eax, %edx
+	movzbl	%cl, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	pushl	%edx
+	movl	724(%esp), %eax
+	pushl	%eax
+	pushl	%ebp
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	368(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	372(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	376(%esp), %ebp
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	380(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	384(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	388(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	392(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	396(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	400(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	$0, 8(%esp)                     # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	332(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	addl	328(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	332(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	340(%esp), %ebp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	344(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	348(%esp), %edi
+	adcl	352(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	356(%esp), %esi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	360(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	setb	%cl
+	subl	$4, %esp
+	movl	48(%esp), %edx                  # 4-byte Reload
+	imull	%eax, %edx
+	movzbl	%cl, %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	724(%esp)
+	leal	300(%esp), %eax
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	addl	288(%esp), %eax
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	296(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	300(%esp), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	304(%esp), %ebp
+	adcl	308(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	316(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	320(%esp), %edi
+	adcl	$0, 40(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	252(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	20(%ecx)
+	movl	716(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	addl	248(%esp), %eax
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	252(%esp), %esi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	256(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	260(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	264(%esp), %ebp
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	268(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	272(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	276(%esp), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	280(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	setb	%cl
+	subl	$4, %esp
+	movl	48(%esp), %edx                  # 4-byte Reload
+	imull	%eax, %edx
+	movl	%eax, %edi
+	movzbl	%cl, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	724(%esp)
+	leal	220(%esp), %eax
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	208(%esp), %edi
+	adcl	212(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	216(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	220(%esp), %esi
+	adcl	224(%esp), %ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	240(%esp), %edi
+	adcl	$0, 12(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	716(%esp), %eax
+	pushl	24(%eax)
+	movl	716(%esp), %eax
+	pushl	%eax
+	leal	180(%esp), %eax
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	168(%esp), %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	176(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	180(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	184(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	188(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	192(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	196(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	200(%esp), %ebp
+	setb	%al
+	subl	$4, %esp
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
 	movl	%edx, %esi
-	sbbl	12(%ebp), %ebx
-	sbbl	16(%ebp), %edx
+	movzbl	%al, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	movl	724(%esp), %eax
+	pushl	%eax
+	leal	140(%esp), %eax
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	128(%esp), %esi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	132(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	136(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	140(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	144(%esp), %esi
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	148(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	152(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	160(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	$0, %ebp
+	subl	$4, %esp
+	leal	92(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	28(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	4(%esp), %edx                   # 4-byte Reload
+	addl	88(%esp), %edx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	92(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	96(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	100(%esp), %esi
+	adcl	104(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	108(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	112(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	116(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	120(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	setb	4(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	leal	52(%esp), %eax
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %ebp
+	pushl	%ecx
+	movl	724(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	48(%esp), %ebp
+	movzbl	4(%esp), %eax                   # 1-byte Folded Reload
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	52(%esp), %ecx
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	56(%esp), %edx
+	adcl	60(%esp), %esi
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	adcl	64(%esp), %ebx
+	movl	%edi, %ebp
+	adcl	68(%esp), %ebp
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	72(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	76(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	80(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	%ecx, %edi
+	movl	716(%esp), %ecx
+	subl	(%ecx), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	sbbl	4(%ecx), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %edx
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	sbbl	8(%ecx), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	sbbl	12(%ecx), %ebx
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	sbbl	16(%ecx), %ebp
+	movl	%ecx, %edx
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	sbbl	20(%edx), %ecx
+	movl	12(%esp), %esi                  # 4-byte Reload
+	sbbl	24(%edx), %esi
+	movl	20(%esp), %edi                  # 4-byte Reload
+	sbbl	28(%edx), %edi
 	sbbl	$0, %eax
-	andl	$1, %eax
-	jne	.LBB78_2
-# BB#1:
-	movl	%edx, %esi
-.LBB78_2:
-	testb	%al, %al
-	movl	12(%esp), %ebp          # 4-byte Reload
-	jne	.LBB78_4
-# BB#3:
-	movl	(%esp), %ebp            # 4-byte Reload
-.LBB78_4:
-	movl	48(%esp), %eax
-	movl	%ebp, 20(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	20(%esp), %edx          # 4-byte Reload
-	movl	16(%esp), %edi          # 4-byte Reload
-	jne	.LBB78_6
-# BB#5:
-	movl	4(%esp), %edi           # 4-byte Reload
-.LBB78_6:
-	movl	%edi, 24(%eax)
-	jne	.LBB78_8
-# BB#7:
-	movl	8(%esp), %edx           # 4-byte Reload
-.LBB78_8:
-	movl	%edx, 28(%eax)
-	jne	.LBB78_10
-# BB#9:
-	movl	%ebx, %ecx
-.LBB78_10:
-	movl	%ecx, 32(%eax)
-	movl	%esi, 36(%eax)
-	addl	$28, %esp
+	testb	$1, %al
+	jne	.LBB43_1
+# %bb.2:
+	movl	704(%esp), %eax
+	movl	%edi, 28(%eax)
+	jne	.LBB43_3
+.LBB43_4:
+	movl	%esi, 24(%eax)
+	movl	36(%esp), %edx                  # 4-byte Reload
+	jne	.LBB43_5
+.LBB43_6:
+	movl	%ecx, 20(%eax)
+	movl	44(%esp), %esi                  # 4-byte Reload
+	jne	.LBB43_7
+.LBB43_8:
+	movl	%ebp, 16(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	jne	.LBB43_9
+.LBB43_10:
+	movl	%ebx, 12(%eax)
+	jne	.LBB43_11
+.LBB43_12:
+	movl	%esi, 8(%eax)
+	jne	.LBB43_13
+.LBB43_14:
+	movl	%edx, 4(%eax)
+	je	.LBB43_16
+.LBB43_15:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+.LBB43_16:
+	movl	%ecx, (%eax)
+	addl	$684, %esp                      # imm = 0x2AC
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end78:
-	.size	mcl_fpDbl_add5Lbmi2, .Lfunc_end78-mcl_fpDbl_add5Lbmi2
-
-	.globl	mcl_fpDbl_sub5Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub5Lbmi2,@function
-mcl_fpDbl_sub5Lbmi2:                    # @mcl_fpDbl_sub5Lbmi2
-# BB#0:
+.LBB43_1:
+	movl	20(%esp), %edi                  # 4-byte Reload
+	movl	704(%esp), %eax
+	movl	%edi, 28(%eax)
+	je	.LBB43_4
+.LBB43_3:
+	movl	12(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 24(%eax)
+	movl	36(%esp), %edx                  # 4-byte Reload
+	je	.LBB43_6
+.LBB43_5:
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	44(%esp), %esi                  # 4-byte Reload
+	je	.LBB43_8
+.LBB43_7:
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 16(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	je	.LBB43_10
+.LBB43_9:
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 12(%eax)
+	je	.LBB43_12
+.LBB43_11:
+	movl	24(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%eax)
+	je	.LBB43_14
+.LBB43_13:
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	jne	.LBB43_15
+	jmp	.LBB43_16
+.Lfunc_end43:
+	.size	mcl_fp_mont8Lbmi2, .Lfunc_end43-mcl_fp_mont8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montNF8Lbmi2             # -- Begin function mcl_fp_montNF8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF8Lbmi2,@function
+mcl_fp_montNF8Lbmi2:                    # @mcl_fp_montNF8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$16, %esp
-	movl	40(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %edi
-	movl	44(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%eax), %ebx
-	sbbl	8(%edx), %ebx
-	movl	36(%esp), %ecx
-	movl	%esi, (%ecx)
-	movl	12(%eax), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ecx)
-	movl	16(%eax), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ecx)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	24(%edx), %esi
-	movl	%edi, 16(%ecx)
-	movl	24(%eax), %ebp
-	sbbl	%esi, %ebp
-	movl	28(%edx), %esi
-	movl	28(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	movl	32(%edx), %esi
-	movl	32(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	36(%edx), %edx
-	movl	36(%eax), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	$0, %edx
-	sbbl	$0, %edx
-	andl	$1, %edx
-	movl	48(%esp), %ebx
-	jne	.LBB79_1
-# BB#2:
-	xorl	%eax, %eax
-	jmp	.LBB79_3
-.LBB79_1:
-	movl	16(%ebx), %eax
-.LBB79_3:
-	testb	%dl, %dl
-	jne	.LBB79_4
-# BB#5:
-	movl	$0, %edx
-	movl	$0, %esi
-	jmp	.LBB79_6
-.LBB79_4:
-	movl	(%ebx), %esi
-	movl	4(%ebx), %edx
-.LBB79_6:
-	jne	.LBB79_7
-# BB#8:
-	movl	$0, %edi
-	jmp	.LBB79_9
-.LBB79_7:
-	movl	12(%ebx), %edi
-.LBB79_9:
-	jne	.LBB79_10
-# BB#11:
-	xorl	%ebx, %ebx
-	jmp	.LBB79_12
-.LBB79_10:
-	movl	8(%ebx), %ebx
-.LBB79_12:
-	addl	4(%esp), %esi           # 4-byte Folded Reload
-	adcl	%ebp, %edx
-	movl	%esi, 20(%ecx)
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	movl	%edx, 24(%ecx)
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%ebx, 28(%ecx)
-	movl	%edi, 32(%ecx)
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	addl	$16, %esp
-	popl	%esi
-	popl	%edi
+	subl	$684, %esp                      # imm = 0x2AC
+	calll	.L44$pb
+.L44$pb:
 	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end79:
-	.size	mcl_fpDbl_sub5Lbmi2, .Lfunc_end79-mcl_fpDbl_sub5Lbmi2
-
-	.globl	mcl_fp_mulUnitPre6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre6Lbmi2,@function
-mcl_fp_mulUnitPre6Lbmi2:                # @mcl_fp_mulUnitPre6Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$12, %esp
-	movl	40(%esp), %edx
-	movl	36(%esp), %esi
-	mulxl	4(%esi), %ecx, %edi
-	mulxl	(%esi), %eax, %ebx
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	addl	%ecx, %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	mulxl	8(%esi), %ebp, %eax
-	adcl	%edi, %ebp
-	mulxl	12(%esi), %ecx, %edi
-	adcl	%eax, %ecx
-	mulxl	16(%esi), %eax, %ebx
-	movl	%ebx, (%esp)            # 4-byte Spill
-	adcl	%edi, %eax
-	mulxl	20(%esi), %edx, %esi
-	movl	32(%esp), %edi
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, (%edi)
-	movl	4(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 4(%edi)
-	movl	%ebp, 8(%edi)
-	movl	%ecx, 12(%edi)
-	movl	%eax, 16(%edi)
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 20(%edi)
-	adcl	$0, %esi
-	movl	%esi, 24(%edi)
+.Ltmp6:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp6-.L44$pb), %ebx
+	movl	716(%esp), %eax
+	movl	-4(%eax), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	712(%esp), %ecx
+	subl	$4, %esp
+	leal	652(%esp), %eax
+	pushl	(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	648(%esp), %ebp
+	movl	652(%esp), %edi
+	movl	%esi, %eax
+	imull	%ebp, %eax
+	movl	680(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	676(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	672(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	668(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	664(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	660(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	656(%esp), %esi
+	subl	$4, %esp
+	leal	612(%esp), %ecx
+	pushl	%eax
+	pushl	724(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	608(%esp), %ebp
+	adcl	612(%esp), %edi
+	adcl	616(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	620(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	624(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	628(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	632(%esp), %ebp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	636(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	640(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	572(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	4(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	600(%esp), %eax
+	addl	568(%esp), %edi
+	adcl	572(%esp), %esi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	576(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	580(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	584(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	588(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	592(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	596(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, %ebp
+	subl	$4, %esp
+	leal	532(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edi, %ecx
+	pushl	%ecx
+	movl	724(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	528(%esp), %edi
+	adcl	532(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	540(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	544(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	556(%esp), %edi
+	adcl	560(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	492(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	520(%esp), %ecx
+	addl	488(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	492(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	496(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	500(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	504(%esp), %ebp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	508(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	512(%esp), %edi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	$0, %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	452(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%esi, %ecx
+	pushl	%ecx
+	pushl	724(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	448(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	452(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	464(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	468(%esp), %esi
+	adcl	472(%esp), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	476(%esp), %edi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	480(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	412(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	12(%ecx)
+	movl	716(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	440(%esp), %ecx
+	movl	8(%esp), %edx                   # 4-byte Reload
+	addl	408(%esp), %edx
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	412(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	416(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	420(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	424(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	432(%esp), %edi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	$0, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	leal	372(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	724(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	368(%esp), %esi
+	adcl	372(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	376(%esp), %esi
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	380(%esp), %ebp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	384(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	388(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	392(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	396(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	400(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	leal	332(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
 	addl	$12, %esp
+	movl	360(%esp), %eax
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	328(%esp), %edx
+	adcl	332(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	340(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	344(%esp), %esi
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	348(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	352(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	356(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, %ebp
+	subl	$4, %esp
+	leal	292(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	pushl	724(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	288(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	296(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	300(%esp), %edi
+	adcl	304(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	316(%esp), %esi
+	adcl	320(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	252(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	20(%ecx)
+	movl	716(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	leal	208(%esp), %eax
+	movl	280(%esp), %ebp
+	movl	12(%esp), %edx                  # 4-byte Reload
+	addl	248(%esp), %edx
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	252(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	256(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	260(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	264(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	268(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	272(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	276(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	$0, %ebp
+	subl	$4, %esp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	724(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	208(%esp), %esi
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	212(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	216(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	220(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	224(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	240(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	716(%esp), %eax
+	pushl	24(%eax)
+	pushl	716(%esp)
+	leal	180(%esp), %eax
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	leal	128(%esp), %eax
+	movl	200(%esp), %ebp
+	movl	%edi, %edx
+	addl	168(%esp), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	172(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	176(%esp), %edi
+	adcl	180(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	184(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	188(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	192(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	196(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	$0, %ebp
+	subl	$4, %esp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	724(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	128(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	132(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	136(%esp), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	140(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	144(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	148(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	152(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	160(%esp), %ebp
+	subl	$4, %esp
+	leal	92(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	28(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	movl	16(%esp), %edx                  # 4-byte Reload
+	addl	88(%esp), %edx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	92(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	96(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	100(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	104(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	108(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	112(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	116(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	120(%esp), %ebp
+	adcl	$0, %ebp
+	subl	$4, %esp
+	leal	52(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	movl	724(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	48(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	52(%esp), %eax
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	56(%esp), %ecx
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	60(%esp), %edx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	64(%esp), %esi
+	adcl	68(%esp), %edi
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	72(%esp), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	76(%esp), %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	adcl	80(%esp), %ebp
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	716(%esp), %ebx
+	subl	(%ebx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	sbbl	4(%ebx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	sbbl	8(%ebx), %edx
+	movl	%esi, %eax
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	sbbl	12(%ebx), %eax
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	%edi, %ecx
+	sbbl	16(%ebx), %ecx
+	movl	12(%esp), %edi                  # 4-byte Reload
+	sbbl	20(%ebx), %edi
+	movl	16(%esp), %esi                  # 4-byte Reload
+	sbbl	24(%ebx), %esi
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	716(%esp), %ebx
+	sbbl	28(%ebx), %ebp
+	testl	%ebp, %ebp
+	js	.LBB44_1
+# %bb.2:
+	movl	704(%esp), %ebx
+	movl	%ebp, 28(%ebx)
+	js	.LBB44_3
+.LBB44_4:
+	movl	%esi, 24(%ebx)
+	js	.LBB44_5
+.LBB44_6:
+	movl	%edi, 20(%ebx)
+	js	.LBB44_7
+.LBB44_8:
+	movl	%ecx, 16(%ebx)
+	js	.LBB44_9
+.LBB44_10:
+	movl	%eax, 12(%ebx)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	js	.LBB44_11
+.LBB44_12:
+	movl	%edx, 8(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	js	.LBB44_13
+.LBB44_14:
+	movl	%ecx, 4(%ebx)
+	jns	.LBB44_16
+.LBB44_15:
+	movl	24(%esp), %eax                  # 4-byte Reload
+.LBB44_16:
+	movl	%eax, (%ebx)
+	addl	$684, %esp                      # imm = 0x2AC
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end80:
-	.size	mcl_fp_mulUnitPre6Lbmi2, .Lfunc_end80-mcl_fp_mulUnitPre6Lbmi2
-
-	.globl	mcl_fpDbl_mulPre6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre6Lbmi2,@function
-mcl_fpDbl_mulPre6Lbmi2:                 # @mcl_fpDbl_mulPre6Lbmi2
-# BB#0:
+.LBB44_1:
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	movl	704(%esp), %ebx
+	movl	%ebp, 28(%ebx)
+	jns	.LBB44_4
+.LBB44_3:
+	movl	16(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 24(%ebx)
+	jns	.LBB44_6
+.LBB44_5:
+	movl	12(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 20(%ebx)
+	jns	.LBB44_8
+.LBB44_7:
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%ebx)
+	jns	.LBB44_10
+.LBB44_9:
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 12(%ebx)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB44_12
+.LBB44_11:
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	jns	.LBB44_14
+.LBB44_13:
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%ebx)
+	js	.LBB44_15
+	jmp	.LBB44_16
+.Lfunc_end44:
+	.size	mcl_fp_montNF8Lbmi2, .Lfunc_end44-mcl_fp_montNF8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRed8Lbmi2            # -- Begin function mcl_fp_montRed8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed8Lbmi2,@function
+mcl_fp_montRed8Lbmi2:                   # @mcl_fp_montRed8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$56, %esp
-	movl	80(%esp), %ebp
-	movl	(%ebp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	4(%ebp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	(%eax), %eax
-	mulxl	%eax, %esi, %edi
-	movl	%ecx, %edx
-	mulxl	%eax, %edx, %ecx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	addl	%esi, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	8(%ebp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	mulxl	%eax, %esi, %ebx
-	adcl	%edi, %esi
-	movl	12(%ebp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%ebp, %ecx
-	mulxl	%eax, %edi, %ebp
-	adcl	%ebx, %edi
-	movl	16(%ecx), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebx, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	%ebp, %ebx
+	subl	$380, %esp                      # imm = 0x17C
+	calll	.L45$pb
+.L45$pb:
+	popl	%ebx
+.Ltmp7:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp7-.L45$pb), %ebx
+	movl	408(%esp), %ecx
+	movl	28(%ecx), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	24(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
 	movl	%ecx, %edx
-	movl	20(%edx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%eax, %eax, %ecx
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	76(%esp), %edx
-	movl	28(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%edx)
-	adcl	$0, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx
-	movl	4(%edx), %ebp
-	movl	52(%esp), %edx          # 4-byte Reload
-	mulxl	%ebp, %edx, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	addl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%ebp, %ecx, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	44(%esp), %edx          # 4-byte Reload
-	mulxl	%ebp, %esi, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%ebp, %edi, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	%ebx, %edi
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%ebp, %ebx, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	%eax, %ebx
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%ebp, %eax, %edx
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ebp
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	28(%esp), %ecx          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	adcl	%edx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	52(%esp), %edx          # 4-byte Reload
-	movl	%edx, 4(%eax)
-	movl	80(%esp), %eax
-	movl	(%eax), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	8(%eax), %eax
-	mulxl	%eax, %edx, %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	addl	%ecx, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp
-	movl	4(%ebp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	8(%ebp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	%edi, %ecx
-	movl	%ecx, %esi
-	movl	12(%ebp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%eax, %edi, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	%ebx, %edi
-	movl	16(%ebp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebx, %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	20(%ebp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebp, %edx
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	16(%esp), %eax          # 4-byte Reload
-	addl	%eax, 52(%esp)          # 4-byte Folded Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	adcl	(%esp), %ebp            # 4-byte Folded Reload
-	adcl	%edx, %ecx
-	movl	76(%esp), %eax
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	%edx, 8(%eax)
-	movl	84(%esp), %eax
-	movl	12(%eax), %eax
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	addl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %esi, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edi, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	%ebx, %edi
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebx, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	28(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebp, %edx
-	adcl	%ecx, %ebp
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	24(%esp), %ecx          # 4-byte Reload
-	addl	%ecx, 52(%esp)          # 4-byte Folded Spill
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	48(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	404(%esp), %eax
+	movl	28(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	16(%eax), %esi
+	movl	12(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	8(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	(%eax), %edi
+	movl	4(%eax), %ebp
+	movl	-4(%edx), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	%edi, %eax
+	imull	%ecx, %eax
+	leal	348(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	344(%esp), %edi
+	adcl	348(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	356(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	360(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	364(%esp), %esi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	368(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	372(%esp), %edi
+	movl	404(%esp), %eax
+	movl	32(%eax), %eax
+	adcl	376(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%ebp, %eax
+	leal	308(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	336(%esp), %eax
+	adcl	$0, %eax
+	addl	304(%esp), %ebp
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	308(%esp), %edx
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	312(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	316(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	320(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	324(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	328(%esp), %edi
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	332(%esp), %ebp
+	movl	404(%esp), %ecx
+	adcl	36(%ecx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %esi
+	leal	268(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 20(%esp)                  # 1-byte Folded Spill
+	movl	296(%esp), %eax
+	adcl	$0, %eax
+	addl	264(%esp), %esi
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	268(%esp), %edx
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	272(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	276(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	280(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	284(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	288(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	292(%esp), %ebp
+	movl	404(%esp), %ecx
+	adcl	40(%ecx), %eax
+	movl	%eax, %esi
+	setb	(%esp)                          # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	228(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, (%esp)                    # 1-byte Folded Spill
+	movl	256(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	224(%esp), %edi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	228(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	236(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	244(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	248(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	adcl	252(%esp), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	404(%esp), %eax
+	adcl	44(%eax), %edx
+	movl	%edx, %esi
+	setb	4(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	leal	188(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 4(%esp)                   # 1-byte Folded Spill
+	movl	216(%esp), %ebp
+	adcl	$0, %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	addl	184(%esp), %eax
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	188(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	192(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	196(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	204(%esp), %edi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	212(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	404(%esp), %eax
+	adcl	48(%eax), %ebp
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	leal	148(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 12(%esp)                  # 1-byte Folded Spill
+	movl	176(%esp), %esi
+	adcl	$0, %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	addl	144(%esp), %eax
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	148(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	152(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	160(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	164(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	168(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	172(%esp), %ebp
+	movl	404(%esp), %eax
+	adcl	52(%eax), %esi
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	leal	108(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	136(%esp), %edi
+	adcl	$0, %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	addl	104(%esp), %eax
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	108(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	112(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	116(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	120(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	124(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	128(%esp), %ebp
+	adcl	132(%esp), %esi
+	movl	404(%esp), %ecx
+	adcl	56(%ecx), %edi
+	setb	4(%esp)                         # 1-byte Folded Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	imull	%eax, %ecx
+	subl	$4, %esp
+	leal	68(%esp), %eax
+	pushl	%ecx
+	pushl	416(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 4(%esp)                   # 1-byte Folded Spill
+	movl	96(%esp), %eax
+	adcl	$0, %eax
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	addl	64(%esp), %ecx
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	68(%esp), %ecx
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	72(%esp), %ebx
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	76(%esp), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	80(%esp), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	84(%esp), %ebp
+	adcl	88(%esp), %esi
+	adcl	92(%esp), %edi
+	movl	404(%esp), %edx
+	adcl	60(%edx), %eax
+	xorl	%edx, %edx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	subl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%ebx, %ecx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	sbbl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	sbbl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	sbbl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	%ebp, %ecx
+	sbbl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	sbbl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edi, %ebp
+	sbbl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%eax, %ebx
+	sbbl	60(%esp), %ebx                  # 4-byte Folded Reload
+	sbbl	%edx, %edx
+	testb	$1, %dl
+	jne	.LBB45_1
+# %bb.2:
+	movl	400(%esp), %eax
+	movl	%ebx, 28(%eax)
+	jne	.LBB45_3
+.LBB45_4:
+	movl	%ebp, 24(%eax)
+	movl	16(%esp), %edx                  # 4-byte Reload
+	jne	.LBB45_5
+.LBB45_6:
+	movl	%esi, 20(%eax)
+	jne	.LBB45_7
+.LBB45_8:
+	movl	%ecx, 16(%eax)
+	movl	24(%esp), %esi                  # 4-byte Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB45_9
+.LBB45_10:
 	movl	%ecx, 12(%eax)
-	movl	80(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	16(%eax), %eax
-	mulxl	%eax, %edx, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	addl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx
-	movl	4(%ecx), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	80(%esp), %edx
-	movl	8(%edx), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	mulxl	%eax, %esi, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	movl	%esi, %edi
-	movl	80(%esp), %esi
-	movl	%esi, %edx
-	movl	12(%edx), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	mulxl	%eax, %esi, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	%ebx, %esi
-	movl	80(%esp), %edx
-	movl	16(%edx), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	80(%esp), %edx
-	movl	20(%edx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebp, %edx
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	20(%esp), %ecx          # 4-byte Folded Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	%edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	28(%esp), %edx          # 4-byte Reload
-	movl	%edx, 16(%eax)
-	movl	84(%esp), %eax
-	movl	20(%eax), %eax
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	addl	%ecx, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %edi
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	44(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %esi, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	%ebx, %esi
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebx, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	addl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %eax
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%eax)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%eax)
-	movl	%ecx, 28(%eax)
-	movl	%esi, 32(%eax)
-	movl	%ebx, 36(%eax)
-	movl	%edx, 40(%eax)
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%eax)
-	addl	$56, %esp
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	jne	.LBB45_11
+.LBB45_12:
+	movl	%esi, 8(%eax)
+	jne	.LBB45_13
+.LBB45_14:
+	movl	%edx, 4(%eax)
+	je	.LBB45_16
+.LBB45_15:
+	movl	20(%esp), %ecx                  # 4-byte Reload
+.LBB45_16:
+	movl	%ecx, (%eax)
+	addl	$380, %esp                      # imm = 0x17C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end81:
-	.size	mcl_fpDbl_mulPre6Lbmi2, .Lfunc_end81-mcl_fpDbl_mulPre6Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre6Lbmi2,@function
-mcl_fpDbl_sqrPre6Lbmi2:                 # @mcl_fpDbl_sqrPre6Lbmi2
-# BB#0:
+.LBB45_1:
+	movl	%eax, %ebx
+	movl	400(%esp), %eax
+	movl	%ebx, 28(%eax)
+	je	.LBB45_4
+.LBB45_3:
+	movl	%edi, %ebp
+	movl	%ebp, 24(%eax)
+	movl	16(%esp), %edx                  # 4-byte Reload
+	je	.LBB45_6
+.LBB45_5:
+	movl	36(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 20(%eax)
+	je	.LBB45_8
+.LBB45_7:
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	24(%esp), %esi                  # 4-byte Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	je	.LBB45_10
+.LBB45_9:
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	je	.LBB45_12
+.LBB45_11:
+	movl	(%esp), %esi                    # 4-byte Reload
+	movl	%esi, 8(%eax)
+	je	.LBB45_14
+.LBB45_13:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	jne	.LBB45_15
+	jmp	.LBB45_16
+.Lfunc_end45:
+	.size	mcl_fp_montRed8Lbmi2, .Lfunc_end45-mcl_fp_montRed8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRedNF8Lbmi2          # -- Begin function mcl_fp_montRedNF8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF8Lbmi2,@function
+mcl_fp_montRedNF8Lbmi2:                 # @mcl_fp_montRedNF8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$60, %esp
-	movl	84(%esp), %ebp
-	movl	(%ebp), %ecx
-	movl	4(%ebp), %eax
-	movl	%eax, %edx
-	mulxl	%ecx, %edi, %esi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	%esi, 52(%esp)          # 4-byte Spill
+	subl	$396, %esp                      # imm = 0x18C
+	calll	.L46$pb
+.L46$pb:
+	popl	%ebx
+.Ltmp8:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp8-.L46$pb), %ebx
+	movl	424(%esp), %ecx
+	movl	28(%ecx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	24(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
 	movl	%ecx, %edx
-	mulxl	%ecx, %ebx, %edx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	addl	%edi, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	8(%ebp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edi, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	%esi, %edi
-	movl	12(%ebp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ebx, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	16(%ebp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edx, %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	20(%ebp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ebp, %edx
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	80(%esp), %ecx
-	movl	28(%esp), %esi          # 4-byte Reload
-	movl	%esi, (%ecx)
-	adcl	$0, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	%ecx, 56(%esp)          # 4-byte Folded Spill
-	movl	%eax, %edx
-	mulxl	%eax, %esi, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	%ebx, %ecx
-	movl	44(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebx, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edi, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	%ebp, %edi
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %eax
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %ebp
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	addl	52(%esp), %esi          # 4-byte Folded Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	%eax, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	56(%esp), %edx          # 4-byte Reload
-	movl	%edx, 4(%eax)
-	movl	84(%esp), %eax
-	movl	(%eax), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	8(%eax), %ebp
-	mulxl	%ebp, %edx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	addl	%esi, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	4(%eax), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	mulxl	%ebp, %edx, %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	%ecx, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	mulxl	%ebp, %ecx, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	%ebx, %ecx
-	movl	%eax, %esi
-	movl	12(%esi), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	420(%esp), %eax
+	movl	28(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	20(%eax), %esi
+	movl	16(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	12(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	8(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	(%eax), %ebp
+	movl	4(%eax), %edi
+	movl	-4(%edx), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	%ebp, %eax
+	imull	%ecx, %eax
+	leal	364(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addl	360(%esp), %ebp
+	adcl	364(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	368(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	372(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	376(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	380(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	384(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	388(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	420(%esp), %eax
+	movl	32(%eax), %eax
+	adcl	392(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	setb	28(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, %eax
+	imull	%edi, %eax
+	leal	324(%esp), %ecx
+	pushl	%eax
+	pushl	432(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 28(%esp)                  # 1-byte Folded Spill
+	movl	352(%esp), %eax
+	adcl	$0, %eax
+	addl	320(%esp), %edi
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	324(%esp), %edx
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	328(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	332(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %esi
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	340(%esp), %edi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	344(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	348(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	420(%esp), %ecx
+	adcl	36(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	%ebp, %eax
+	imull	%edx, %eax
+	movl	%edx, %ebp
+	leal	284(%esp), %ecx
+	pushl	%eax
+	pushl	432(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 12(%esp)                  # 1-byte Folded Spill
+	movl	312(%esp), %eax
+	adcl	$0, %eax
+	addl	280(%esp), %ebp
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	284(%esp), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	288(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	292(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	296(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	300(%esp), %ebp
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	304(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	308(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	420(%esp), %ecx
+	adcl	40(%ecx), %eax
+	movl	%eax, %edi
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	44(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %esi
+	leal	244(%esp), %ecx
+	pushl	%eax
+	pushl	432(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	272(%esp), %eax
+	adcl	$0, %eax
+	addl	240(%esp), %esi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	244(%esp), %ecx
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	248(%esp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	252(%esp), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	256(%esp), %ebp
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	260(%esp), %esi
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	264(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	268(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	420(%esp), %edx
+	adcl	44(%edx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	44(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %edi
+	leal	204(%esp), %ecx
+	pushl	%eax
+	movl	432(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	232(%esp), %eax
+	adcl	$0, %eax
 	movl	%eax, %edx
-	mulxl	%ebp, %ebx, %edx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	%ebx, %edi
-	movl	16(%esi), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%ebp, %ebx, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	20(%esi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%ebp, %esi, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	movl	20(%esp), %edx          # 4-byte Reload
-	addl	%edx, 56(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	addl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
+	addl	200(%esp), %edi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	204(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	212(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	adcl	216(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	220(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	224(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	420(%esp), %eax
+	adcl	48(%eax), %edx
+	movl	%edx, %edi
+	setb	20(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, %eax
+	imull	%ecx, %eax
+	leal	164(%esp), %ecx
+	pushl	%eax
+	pushl	432(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 20(%esp)                  # 1-byte Folded Spill
+	movl	192(%esp), %eax
+	adcl	$0, %eax
 	movl	%eax, %edx
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	%ebx, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	%esi, %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	addl	160(%esp), %eax
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	164(%esp), %ecx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	168(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	176(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	180(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	184(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	188(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	420(%esp), %eax
+	adcl	52(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	setb	47(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	imull	%ecx, %ebp
 	movl	%ecx, %esi
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebx, %edx
-	adcl	%ebp, %ebx
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	20(%esp), %eax          # 4-byte Reload
-	addl	%eax, 48(%esp)          # 4-byte Folded Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	%edx, %ecx
-	movl	80(%esp), %eax
-	movl	44(%esp), %edx          # 4-byte Reload
-	movl	%edx, 8(%eax)
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	%edx, 12(%eax)
-	movl	84(%esp), %esi
-	movl	(%esi), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	16(%esi), %ebp
-	mulxl	%ebp, %edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	addl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	4(%esi), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	mulxl	%ebp, %eax, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	%edi, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	8(%esi), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%ebp, %eax, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	12(%esi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%ebp, %edi, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebp, %edx
-	mulxl	%ebp, %eax, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	%ebx, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	20(%esi), %ebx
-	movl	%ebx, %edx
-	mulxl	%ebp, %edx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	%edx, %ecx
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	movl	8(%esp), %esi           # 4-byte Reload
-	addl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	%edx, 56(%esp)          # 4-byte Folded Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	%edx, 52(%esp)          # 4-byte Folded Spill
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	adcl	%eax, %ebp
-	movl	44(%esp), %edx          # 4-byte Reload
-	mulxl	%ebx, %edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	addl	%esi, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%ebx, %edx, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %eax
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%ebx, %esi, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	movl	32(%esp), %edx          # 4-byte Reload
-	mulxl	%ebx, %edi, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebx, %edx
-	mulxl	%ebx, %ebx, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	%ebp, %ebx
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	addl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	80(%esp), %eax
-	movl	48(%esp), %edx          # 4-byte Reload
-	movl	%edx, 16(%eax)
-	movl	44(%esp), %edx          # 4-byte Reload
-	movl	%edx, 20(%eax)
-	movl	52(%esp), %edx          # 4-byte Reload
-	movl	%edx, 24(%eax)
-	movl	%esi, 28(%eax)
-	movl	%edi, 32(%eax)
-	movl	%ecx, 36(%eax)
-	movl	%ebx, 40(%eax)
-	adcl	56(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%eax)
-	addl	$60, %esp
+	leal	124(%esp), %ecx
+	pushl	%ebp
+	pushl	432(%esp)
+	pushl	%ecx
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 47(%esp)                  # 1-byte Folded Spill
+	movl	152(%esp), %edi
+	adcl	$0, %edi
+	addl	120(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	124(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	128(%esp), %ebp
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	132(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	136(%esp), %esi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	140(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	144(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	148(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	420(%esp), %ecx
+	adcl	56(%ecx), %edi
+	setb	36(%esp)                        # 1-byte Folded Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	imull	%eax, %ecx
+	subl	$4, %esp
+	leal	84(%esp), %eax
+	pushl	%ecx
+	pushl	432(%esp)
+	pushl	%eax
+	calll	mulPv256x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 36(%esp)                  # 1-byte Folded Spill
+	movl	112(%esp), %edx
+	adcl	$0, %edx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	addl	80(%esp), %eax
+	movl	%ebp, %eax
+	adcl	84(%esp), %eax
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	88(%esp), %ecx
+	adcl	92(%esp), %esi
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	96(%esp), %ebx
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	100(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	104(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	adcl	108(%esp), %edi
+	movl	420(%esp), %ebp
+	adcl	60(%ebp), %edx
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	subl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	sbbl	60(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	sbbl	48(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	sbbl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	sbbl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	sbbl	52(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	sbbl	72(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edx, %ebx
+	sbbl	76(%esp), %ebx                  # 4-byte Folded Reload
+	testl	%ebx, %ebx
+	js	.LBB46_1
+# %bb.2:
+	movl	416(%esp), %edx
+	movl	%ebx, 28(%edx)
+	js	.LBB46_3
+.LBB46_4:
+	movl	%edi, 24(%edx)
+	js	.LBB46_5
+.LBB46_6:
+	movl	%ebp, 20(%edx)
+	js	.LBB46_7
+.LBB46_8:
+	movl	%ecx, 16(%edx)
+	js	.LBB46_9
+.LBB46_10:
+	movl	%eax, 12(%edx)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	48(%esp), %eax                  # 4-byte Reload
+	js	.LBB46_11
+.LBB46_12:
+	movl	%eax, 8(%edx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	js	.LBB46_13
+.LBB46_14:
+	movl	%ecx, 4(%edx)
+	jns	.LBB46_16
+.LBB46_15:
+	movl	36(%esp), %eax                  # 4-byte Reload
+.LBB46_16:
+	movl	%eax, (%edx)
+	addl	$396, %esp                      # imm = 0x18C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end82:
-	.size	mcl_fpDbl_sqrPre6Lbmi2, .Lfunc_end82-mcl_fpDbl_sqrPre6Lbmi2
-
-	.globl	mcl_fp_mont6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont6Lbmi2,@function
-mcl_fp_mont6Lbmi2:                      # @mcl_fp_mont6Lbmi2
-# BB#0:
+.LBB46_1:
+	movl	%edx, %ebx
+	movl	416(%esp), %edx
+	movl	%ebx, 28(%edx)
+	jns	.LBB46_4
+.LBB46_3:
+	movl	52(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 24(%edx)
+	jns	.LBB46_6
+.LBB46_5:
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%edx)
+	jns	.LBB46_8
+.LBB46_7:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%edx)
+	jns	.LBB46_10
+.LBB46_9:
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edx)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	48(%esp), %eax                  # 4-byte Reload
+	jns	.LBB46_12
+.LBB46_11:
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	jns	.LBB46_14
+.LBB46_13:
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edx)
+	js	.LBB46_15
+	jmp	.LBB46_16
+.Lfunc_end46:
+	.size	mcl_fp_montRedNF8Lbmi2, .Lfunc_end46-mcl_fp_montRedNF8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addPre8Lbmi2             # -- Begin function mcl_fp_addPre8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre8Lbmi2,@function
+mcl_fp_addPre8Lbmi2:                    # @mcl_fp_addPre8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$100, %esp
-	movl	124(%esp), %eax
-	movl	20(%eax), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	128(%esp), %ecx
-	movl	(%ecx), %ecx
-	mulxl	%ecx, %edx, %ebp
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	16(%eax), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edx, %edi
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	12(%eax), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edx, %esi
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	(%eax), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	4(%eax), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	mulxl	%ecx, %eax, %edx
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	%ecx, %ebx, %edx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	addl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	8(%eax), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ecx, %eax
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	92(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	96(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	132(%esp), %edi
-	movl	-4(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	imull	%eax, %edx
-	movl	(%edi), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	4(%edi), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	mulxl	%esi, %esi, %ebp
-	mulxl	%eax, %ecx, %eax
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	addl	%esi, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	8(%edi), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %esi
-	adcl	%ebp, %ecx
-	movl	12(%edi), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebx, %eax
-	adcl	%esi, %ebx
-	movl	16(%edi), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	mulxl	%esi, %esi, %ebp
-	adcl	%eax, %esi
-	movl	20(%edi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	mulxl	%eax, %edi, %eax
-	adcl	%ebp, %edi
-	adcl	$0, %eax
-	movl	12(%esp), %edx          # 4-byte Reload
-	addl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	%edx, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	128(%esp), %edx
-	movl	4(%edx), %edx
-	mulxl	64(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	mulxl	48(%esp), %eax, %ecx    # 4-byte Folded Reload
-	mulxl	52(%esp), %ebx, %ebp    # 4-byte Folded Reload
-	addl	%eax, %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %esi, %ebp    # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	mulxl	68(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	%ebp, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, %ecx
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	20(%esp), %ebp          # 4-byte Reload
-	adcl	%ebp, 28(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	%edi, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	96(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	92(%esp), %edi, %esi    # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, %edi
-	mulxl	88(%esp), %esi, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %esi
-	movl	%esi, %eax
-	mulxl	84(%esp), %esi, %ebx    # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	mulxl	80(%esp), %ecx, %ebp    # 4-byte Folded Reload
-	adcl	%ebx, %ecx
-	movl	%ecx, %ebx
-	mulxl	76(%esp), %ecx, %edx    # 4-byte Folded Reload
-	adcl	%ebp, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, %ecx
-	movl	36(%esp), %edx          # 4-byte Reload
-	andl	$1, %edx
-	movl	12(%esp), %ebp          # 4-byte Reload
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	128(%esp), %edx
+	subl	$8, %esp
+	movl	32(%esp), %ecx
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	movl	36(%esp), %esi
+	addl	(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	4(%esi), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	28(%ecx), %edi
+	movl	24(%ecx), %ebx
+	movl	20(%ecx), %ebp
+	movl	16(%ecx), %eax
+	movl	12(%ecx), %edx
+	movl	8(%ecx), %ecx
+	adcl	8(%esi), %ecx
+	adcl	12(%esi), %edx
+	adcl	16(%esi), %eax
+	adcl	20(%esi), %ebp
+	adcl	24(%esi), %ebx
+	adcl	28(%esi), %edi
+	movl	28(%esp), %esi
+	movl	%ebx, 24(%esi)
+	movl	%ebp, 20(%esi)
+	movl	%eax, 16(%esi)
+	movl	%edx, 12(%esi)
+	movl	%ecx, 8(%esi)
+	movl	%edi, 28(%esi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 4(%esi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, (%esi)
+	setb	%al
+	movzbl	%al, %eax
+	addl	$8, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.Lfunc_end47:
+	.size	mcl_fp_addPre8Lbmi2, .Lfunc_end47-mcl_fp_addPre8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subPre8Lbmi2             # -- Begin function mcl_fp_subPre8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre8Lbmi2,@function
+mcl_fp_subPre8Lbmi2:                    # @mcl_fp_subPre8Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$16, %esp
+	movl	40(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %esi
+	xorl	%eax, %eax
+	movl	44(%esp), %edi
+	subl	(%edi), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	28(%edx), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	24(%edx), %ebp
+	movl	20(%edx), %ecx
+	movl	16(%edx), %esi
+	movl	12(%edx), %ebx
 	movl	8(%edx), %edx
-	mulxl	64(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	mulxl	48(%esp), %ebx, %edi    # 4-byte Folded Reload
-	mulxl	52(%esp), %eax, %esi    # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	addl	%ebx, %esi
-	mulxl	56(%esp), %ecx, %ebp    # 4-byte Folded Reload
-	adcl	%edi, %ecx
-	mulxl	68(%esp), %edi, %eax    # 4-byte Folded Reload
-	adcl	%ebp, %edi
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	movl	4(%esp), %ebx           # 4-byte Reload
-	addl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	96(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	92(%esp), %ebp, %esi    # 4-byte Folded Reload
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	mulxl	88(%esp), %esi, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %esi, %eax    # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	movl	%esi, %ecx
-	mulxl	80(%esp), %esi, %ebp    # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	%eax, %esi
-	mulxl	76(%esp), %ebp, %eax    # 4-byte Folded Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	$0, %eax
-	movl	32(%esp), %edx          # 4-byte Reload
-	andl	$1, %edx
-	addl	%ebx, 36(%esp)          # 4-byte Folded Spill
-	movl	20(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 28(%esp)          # 4-byte Folded Spill
-	movl	16(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 24(%esp)          # 4-byte Folded Spill
-	adcl	%edi, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	128(%esp), %edx
-	movl	12(%edx), %edx
-	mulxl	64(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	mulxl	72(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mulxl	48(%esp), %eax, %ebx    # 4-byte Folded Reload
-	mulxl	52(%esp), %ecx, %edi    # 4-byte Folded Reload
-	addl	%eax, %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %eax, %edi    # 4-byte Folded Reload
-	adcl	%ebx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	mulxl	68(%esp), %ebx, %eax    # 4-byte Folded Reload
-	adcl	%edi, %ebx
-	adcl	%esi, %eax
-	movl	%eax, %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	16(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 20(%esp)          # 4-byte Folded Spill
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	adcl	%ebp, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	96(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	92(%esp), %edi, %esi    # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, %ebx
-	mulxl	88(%esp), %esi, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %esi
-	movl	%esi, %edi
-	mulxl	84(%esp), %esi, %eax    # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	movl	%esi, %ecx
-	mulxl	80(%esp), %esi, %ebp    # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	adcl	%eax, %esi
-	movl	%esi, %ebp
-	mulxl	76(%esp), %edx, %eax    # 4-byte Folded Reload
-	adcl	12(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	$0, %eax
-	movl	32(%esp), %edx          # 4-byte Reload
-	andl	$1, %edx
-	movl	16(%esp), %esi          # 4-byte Reload
-	addl	4(%esp), %esi           # 4-byte Folded Reload
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %esi           # 4-byte Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	128(%esp), %edx
-	movl	16(%edx), %edx
-	mulxl	64(%esp), %eax, %ebp    # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	mulxl	48(%esp), %ebx, %eax    # 4-byte Folded Reload
-	mulxl	52(%esp), %ecx, %edi    # 4-byte Folded Reload
-	addl	%ebx, %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %edi, %ebx    # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	adcl	%eax, %edi
-	movl	%edi, %ebx
-	mulxl	68(%esp), %edx, %eax    # 4-byte Folded Reload
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, %edi
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, %edx
-	adcl	$0, %edx
-	addl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	20(%esp), %ebp          # 4-byte Reload
-	adcl	%ebp, 28(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	4(%esp), %ebx           # 4-byte Reload
-	adcl	%esi, %ebx
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
+	sbbl	8(%edi), %edx
+	sbbl	12(%edi), %ebx
+	sbbl	16(%edi), %esi
+	sbbl	20(%edi), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	sbbl	24(%edi), %ebp
+	movl	(%esp), %ecx                    # 4-byte Reload
+	sbbl	28(%edi), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	36(%esp), %edi
+	movl	%ebp, 24(%edi)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 20(%edi)
+	movl	%esi, 16(%edi)
+	movl	%ebx, 12(%edi)
+	movl	%edx, 8(%edi)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 28(%edi)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%edi)
 	sbbl	%eax, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
+	andl	$1, %eax
+	addl	$16, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.Lfunc_end48:
+	.size	mcl_fp_subPre8Lbmi2, .Lfunc_end48-mcl_fp_subPre8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_shr1_8Lbmi2              # -- Begin function mcl_fp_shr1_8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_8Lbmi2,@function
+mcl_fp_shr1_8Lbmi2:                     # @mcl_fp_shr1_8Lbmi2
+# %bb.0:
+	pushl	%esi
+	movl	12(%esp), %eax
+	movl	28(%eax), %ecx
 	movl	%ecx, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	96(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	92(%esp), %edi, %esi    # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	mulxl	88(%esp), %esi, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %edi, %eax    # 4-byte Folded Reload
-	adcl	%ecx, %edi
-	mulxl	80(%esp), %esi, %ebp    # 4-byte Folded Reload
-	adcl	%eax, %esi
-	mulxl	76(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	%ebp, %ecx
-	adcl	$0, %eax
-	movl	%eax, %ebp
-	movl	40(%esp), %edx          # 4-byte Reload
-	andl	$1, %edx
-	movl	12(%esp), %eax          # 4-byte Reload
-	addl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	%ebx, %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	128(%esp), %edx
-	movl	20(%edx), %edx
-	mulxl	48(%esp), %eax, %esi    # 4-byte Folded Reload
-	mulxl	52(%esp), %ecx, %ebp    # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	addl	%eax, %ebp
-	mulxl	64(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	adcl	%esi, %ebx
-	mulxl	72(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	mulxl	68(%esp), %esi, %edx    # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %ecx
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	movl	52(%esp), %edi          # 4-byte Reload
-	addl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	imull	%edi, %edx
-	mulxl	92(%esp), %eax, %edi    # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	mulxl	96(%esp), %ecx, %esi    # 4-byte Folded Reload
-	addl	%edi, %ecx
-	mulxl	88(%esp), %edi, %ebx    # 4-byte Folded Reload
-	adcl	%esi, %edi
-	movl	%edx, %esi
-	mulxl	84(%esp), %ebp, %eax    # 4-byte Folded Reload
-	adcl	%ebx, %ebp
-	movl	%esi, %edx
-	mulxl	80(%esp), %ebx, %edx    # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	%eax, %ebx
-	movl	%esi, %edx
-	mulxl	76(%esp), %esi, %edx    # 4-byte Folded Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	adcl	$0, %edx
-	andl	$1, 72(%esp)            # 4-byte Folded Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %eax
-	subl	92(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	%edi, %ecx
-	sbbl	96(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebp, %edi
-	sbbl	88(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 92(%esp)          # 4-byte Spill
+	shrl	%edx
+	movl	8(%esp), %esi
+	movl	%edx, 28(%esi)
+	movl	24(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 24(%esi)
+	movl	20(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 20(%esi)
+	movl	16(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 16(%esi)
+	movl	12(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 12(%esi)
+	movl	8(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 8(%esi)
+	movl	4(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 4(%esi)
+	movl	(%eax), %eax
+	shrdl	$1, %ecx, %eax
+	movl	%eax, (%esi)
+	popl	%esi
+	retl
+.Lfunc_end49:
+	.size	mcl_fp_shr1_8Lbmi2, .Lfunc_end49-mcl_fp_shr1_8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_add8Lbmi2                # -- Begin function mcl_fp_add8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_add8Lbmi2,@function
+mcl_fp_add8Lbmi2:                       # @mcl_fp_add8Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$32, %esp
+	movl	56(%esp), %eax
+	movl	(%eax), %ebx
+	movl	4(%eax), %ecx
+	movl	60(%esp), %edx
+	addl	(%edx), %ebx
+	adcl	4(%edx), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	28(%eax), %ebp
+	movl	24(%eax), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	16(%eax), %esi
+	movl	12(%eax), %edi
+	movl	8(%eax), %edx
+	movl	60(%esp), %eax
+	adcl	8(%eax), %edx
+	adcl	12(%eax), %edi
+	adcl	16(%eax), %esi
+	adcl	20(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	60(%esp), %ecx
+	adcl	24(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	28(%ecx), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx
+	movl	%ebp, 28(%ecx)
+	movl	%eax, 24(%ecx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%ecx)
+	movl	%esi, 16(%ecx)
+	movl	%edi, 12(%ecx)
+	movl	%edx, 8(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	movl	%ebx, (%ecx)
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	64(%esp), %ebp
+	subl	(%ebp), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	sbbl	4(%ebp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	sbbl	8(%ebp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	sbbl	12(%ebp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	sbbl	16(%ebp), %esi
 	movl	%esi, %edi
-	sbbl	84(%esp), %ebx          # 4-byte Folded Reload
-	sbbl	80(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	sbbl	76(%esp), %esi          # 4-byte Folded Reload
-	sbbl	$0, %eax
-	andl	$1, %eax
-	jne	.LBB83_2
-# BB#1:
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-.LBB83_2:
-	testb	%al, %al
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB83_4
-# BB#3:
-	movl	72(%esp), %ecx          # 4-byte Reload
-.LBB83_4:
-	movl	120(%esp), %eax
-	movl	%ecx, (%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%eax)
-	jne	.LBB83_6
-# BB#5:
-	movl	92(%esp), %ebp          # 4-byte Reload
-.LBB83_6:
-	movl	%ebp, 8(%eax)
-	movl	60(%esp), %ecx          # 4-byte Reload
-	jne	.LBB83_8
-# BB#7:
-	movl	%ebx, %ecx
-.LBB83_8:
-	movl	%ecx, 12(%eax)
-	jne	.LBB83_10
-# BB#9:
-	movl	96(%esp), %edi          # 4-byte Reload
-.LBB83_10:
-	movl	%edi, 16(%eax)
-	jne	.LBB83_12
-# BB#11:
-	movl	%esi, %edx
-.LBB83_12:
-	movl	%edx, 20(%eax)
-	addl	$100, %esp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	sbbl	20(%ebp), %ecx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	sbbl	24(%ebp), %esi
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	sbbl	28(%ebp), %ebx
+	movzbl	3(%esp), %edx                   # 1-byte Folded Reload
+	sbbl	$0, %edx
+	testb	$1, %dl
+	jne	.LBB50_2
+# %bb.1:                                # %nocarry
+	movl	24(%esp), %edx                  # 4-byte Reload
+	movl	52(%esp), %ebp
+	movl	%edx, (%ebp)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 4(%ebp)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebp)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebp)
+	movl	%edi, 16(%ebp)
+	movl	%ecx, 20(%ebp)
+	movl	%esi, 24(%ebp)
+	movl	%ebx, 28(%ebp)
+.LBB50_2:                               # %carry
+	addl	$32, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end83:
-	.size	mcl_fp_mont6Lbmi2, .Lfunc_end83-mcl_fp_mont6Lbmi2
-
-	.globl	mcl_fp_montNF6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF6Lbmi2,@function
-mcl_fp_montNF6Lbmi2:                    # @mcl_fp_montNF6Lbmi2
-# BB#0:
+.Lfunc_end50:
+	.size	mcl_fp_add8Lbmi2, .Lfunc_end50-mcl_fp_add8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addNF8Lbmi2              # -- Begin function mcl_fp_addNF8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF8Lbmi2,@function
+mcl_fp_addNF8Lbmi2:                     # @mcl_fp_addNF8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$84, %esp
-	movl	108(%esp), %ebx
-	movl	(%ebx), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	4(%ebx), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	(%eax), %eax
-	mulxl	%eax, %ecx, %esi
-	movl	%edi, %edx
-	mulxl	%eax, %edx, %ebp
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	addl	%ecx, %ebp
-	movl	8(%ebx), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %edi
-	adcl	%esi, %ecx
-	movl	%ecx, %esi
-	movl	12(%ebx), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	%edi, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	16(%ebx), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %edi
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	20(%ebx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %eax
-	adcl	%edi, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	116(%esp), %ebx
-	movl	-4(%ebx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	movl	%edi, %edx
-	imull	%eax, %edx
-	movl	(%ebx), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	addl	%edi, %ecx
-	movl	4(%ebx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	mulxl	%eax, %edi, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	%ebp, %edi
-	movl	8(%ebx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	mulxl	%eax, %eax, %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	%esi, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	12(%ebx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	mulxl	%eax, %esi, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	16(%ebx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	20(%ebx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebp, %eax
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	12(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 24(%esp)          # 4-byte Folded Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	(%esp), %ebp            # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	4(%eax), %edx
-	mulxl	48(%esp), %ecx, %esi    # 4-byte Folded Reload
-	mulxl	52(%esp), %ebp, %eax    # 4-byte Folded Reload
-	addl	%ecx, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	mulxl	44(%esp), %ecx, %edi    # 4-byte Folded Reload
-	adcl	%esi, %ecx
-	movl	%ecx, %esi
-	mulxl	40(%esp), %eax, %ebx    # 4-byte Folded Reload
-	adcl	%edi, %eax
-	mulxl	36(%esp), %ecx, %edi    # 4-byte Folded Reload
-	movl	%edi, (%esp)            # 4-byte Spill
-	adcl	%ebx, %ecx
-	movl	%ecx, %edi
-	mulxl	32(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	adcl	$0, %ecx
-	movl	%ecx, %edx
-	addl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	$0, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %esi
-	movl	%esi, %edx
-	imull	56(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ebp, %edi    # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	addl	%esi, %ebp
-	mulxl	76(%esp), %ebp, %esi    # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebp
-	movl	%ebp, %esi
-	mulxl	72(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, %ecx
-	mulxl	68(%esp), %ebp, %edi    # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	adcl	%eax, %ebp
-	movl	%ebp, %eax
-	mulxl	64(%esp), %ebp, %edi    # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	mulxl	60(%esp), %edi, %edx    # 4-byte Folded Reload
-	adcl	%ebx, %edi
-	movl	%edi, %ebx
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	addl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	adcl	%edx, %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	8(%eax), %edx
-	mulxl	48(%esp), %eax, %ecx    # 4-byte Folded Reload
-	mulxl	52(%esp), %esi, %edi    # 4-byte Folded Reload
-	movl	%esi, (%esp)            # 4-byte Spill
-	addl	%eax, %edi
-	mulxl	44(%esp), %eax, %esi    # 4-byte Folded Reload
-	adcl	%ecx, %eax
-	movl	%eax, %ecx
-	mulxl	40(%esp), %eax, %ebx    # 4-byte Folded Reload
-	adcl	%esi, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	36(%esp), %eax, %ebp    # 4-byte Folded Reload
-	adcl	%ebx, %eax
-	movl	%eax, %esi
-	mulxl	32(%esp), %ebx, %eax    # 4-byte Folded Reload
-	adcl	%ebp, %ebx
-	adcl	$0, %eax
-	movl	%eax, %edx
-	movl	(%esp), %ebp            # 4-byte Reload
-	addl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, (%esp)            # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	$0, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	movl	%ebp, %eax
-	imull	56(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	addl	%eax, %ebp
-	mulxl	76(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	%edi, %ebp
+	subl	$44, %esp
+	movl	72(%esp), %eax
+	movl	(%eax), %edx
+	movl	4(%eax), %ecx
+	movl	68(%esp), %esi
+	addl	(%esi), %edx
+	adcl	4(%esi), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	28(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	16(%eax), %ebx
+	movl	12(%eax), %ebp
+	movl	8(%eax), %eax
+	adcl	8(%esi), %eax
+	adcl	12(%esi), %ebp
+	adcl	16(%esi), %ebx
+	adcl	20(%esi), %ecx
+	movl	(%esp), %esi                    # 4-byte Reload
+	movl	68(%esp), %edi
+	adcl	24(%edi), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	movl	68(%esp), %edi
+	adcl	28(%edi), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	76(%esp), %esi
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	subl	(%esi), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edi                   # 4-byte Reload
+	sbbl	4(%esi), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	sbbl	8(%esi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
 	movl	%ebp, %edi
-	mulxl	72(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, %ecx
-	mulxl	68(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, %eax
-	mulxl	64(%esp), %ebp, %esi    # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	(%esp), %ebp            # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	mulxl	60(%esp), %ebp, %edx    # 4-byte Folded Reload
-	adcl	%ebx, %ebp
-	movl	28(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	addl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 20(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	%edx, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	12(%eax), %edx
-	mulxl	48(%esp), %eax, %ecx    # 4-byte Folded Reload
-	mulxl	52(%esp), %ebp, %esi    # 4-byte Folded Reload
-	addl	%eax, %esi
-	mulxl	44(%esp), %eax, %edi    # 4-byte Folded Reload
-	adcl	%ecx, %eax
-	mulxl	40(%esp), %ecx, %ebx    # 4-byte Folded Reload
-	adcl	%edi, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	mulxl	36(%esp), %ecx, %edi    # 4-byte Folded Reload
-	movl	%edi, (%esp)            # 4-byte Spill
-	adcl	%ebx, %ecx
-	movl	%ecx, %edi
-	mulxl	32(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	adcl	$0, %ecx
-	addl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	$0, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %ecx
-	movl	%ecx, %edx
-	imull	56(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	addl	%ecx, %ebp
-	mulxl	76(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	%esi, %ecx
-	mulxl	72(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	(%esp), %ebp            # 4-byte Folded Reload
-	mulxl	68(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %eax
-	mulxl	64(%esp), %esi, %edi    # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	mulxl	60(%esp), %esi, %edx    # 4-byte Folded Reload
-	adcl	%ebx, %esi
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	addl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 20(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	adcl	%edx, %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	16(%eax), %edx
-	mulxl	48(%esp), %eax, %ecx    # 4-byte Folded Reload
-	mulxl	52(%esp), %esi, %edi    # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	addl	%eax, %edi
-	mulxl	44(%esp), %eax, %esi    # 4-byte Folded Reload
-	adcl	%ecx, %eax
-	mulxl	40(%esp), %ecx, %ebx    # 4-byte Folded Reload
-	adcl	%esi, %ecx
-	mulxl	36(%esp), %esi, %ebp    # 4-byte Folded Reload
-	adcl	%ebx, %esi
-	mulxl	32(%esp), %ebx, %edx    # 4-byte Folded Reload
-	adcl	%ebp, %ebx
-	adcl	$0, %edx
-	movl	24(%esp), %ebp          # 4-byte Reload
-	addl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	$0, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	imull	56(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	addl	24(%esp), %ebp          # 4-byte Folded Reload
-	mulxl	76(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	%edi, %ebp
-	mulxl	72(%esp), %edi, %eax    # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	mulxl	68(%esp), %eax, %edi    # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	adcl	%ecx, %eax
-	mulxl	64(%esp), %ecx, %edi    # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	%ecx, %edi
-	mulxl	60(%esp), %ecx, %edx    # 4-byte Folded Reload
-	adcl	%ebx, %ecx
-	movl	28(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	addl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 24(%esp)          # 4-byte Folded Spill
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	adcl	%edx, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	20(%eax), %edx
-	mulxl	48(%esp), %ebx, %eax    # 4-byte Folded Reload
-	mulxl	52(%esp), %edi, %esi    # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	addl	%ebx, %esi
-	mulxl	44(%esp), %ebx, %ebp    # 4-byte Folded Reload
-	adcl	%eax, %ebx
-	mulxl	40(%esp), %eax, %edi    # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	%ebp, %eax
-	movl	%eax, %ebp
-	mulxl	36(%esp), %eax, %edi    # 4-byte Folded Reload
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	32(%esp), %edx, %eax    # 4-byte Folded Reload
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %eax
-	movl	52(%esp), %edi          # 4-byte Reload
-	addl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	%ecx, 48(%esp)          # 4-byte Folded Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	56(%esp), %edx          # 4-byte Reload
-	movl	52(%esp), %ebp          # 4-byte Reload
-	imull	%ebp, %edx
-	mulxl	80(%esp), %ecx, %edi    # 4-byte Folded Reload
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	addl	%ebp, %ecx
-	mulxl	76(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	%esi, %ebp
-	mulxl	72(%esp), %ecx, %esi    # 4-byte Folded Reload
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	%ebx, %ecx
-	movl	%edx, %ebx
-	mulxl	68(%esp), %esi, %edx    # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%ebx, %edx
-	mulxl	64(%esp), %edi, %edx    # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
+	sbbl	12(%esi), %edi
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	%ebx, %ebp
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	sbbl	16(%esi), %ebp
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	20(%esi), %ecx
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	24(%esi), %eax
 	movl	%ebx, %edx
-	mulxl	60(%esp), %ebx, %edx    # 4-byte Folded Reload
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	adcl	$0, %eax
-	addl	56(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	adcl	%edx, %eax
-	movl	%ebp, %edx
-	subl	80(%esp), %edx          # 4-byte Folded Reload
-	sbbl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	%esi, %ebp
-	movl	%ebx, %esi
-	sbbl	72(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	sbbl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	%esi, %ebx
-	sbbl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%eax, %edi
-	sbbl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %ebp
-	sarl	$31, %ebp
-	testl	%ebp, %ebp
-	js	.LBB84_2
-# BB#1:
-	movl	%edx, 56(%esp)          # 4-byte Spill
-.LBB84_2:
-	movl	104(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, (%ebp)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	js	.LBB84_4
-# BB#3:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB84_4:
-	movl	%ecx, 4(%ebp)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	js	.LBB84_6
-# BB#5:
-	movl	76(%esp), %edx          # 4-byte Reload
-.LBB84_6:
-	movl	%edx, 8(%ebp)
-	js	.LBB84_8
-# BB#7:
-	movl	80(%esp), %ecx          # 4-byte Reload
-.LBB84_8:
-	movl	%ecx, 12(%ebp)
-	js	.LBB84_10
-# BB#9:
-	movl	%ebx, %esi
-.LBB84_10:
-	movl	%esi, 16(%ebp)
-	js	.LBB84_12
-# BB#11:
-	movl	%edi, %eax
-.LBB84_12:
-	movl	%eax, 20(%ebp)
-	addl	$84, %esp
+	sbbl	28(%esi), %edx
+	testl	%edx, %edx
+	js	.LBB51_1
+# %bb.2:
+	movl	64(%esp), %ebx
+	movl	%edx, 28(%ebx)
+	js	.LBB51_3
+.LBB51_4:
+	movl	%eax, 24(%ebx)
+	movl	32(%esp), %edx                  # 4-byte Reload
+	js	.LBB51_5
+.LBB51_6:
+	movl	%ecx, 20(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	js	.LBB51_7
+.LBB51_8:
+	movl	%ebp, 16(%ebx)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	js	.LBB51_9
+.LBB51_10:
+	movl	%edi, 12(%ebx)
+	js	.LBB51_11
+.LBB51_12:
+	movl	%edx, 8(%ebx)
+	js	.LBB51_13
+.LBB51_14:
+	movl	%ecx, 4(%ebx)
+	jns	.LBB51_16
+.LBB51_15:
+	movl	28(%esp), %eax                  # 4-byte Reload
+.LBB51_16:
+	movl	%eax, (%ebx)
+	addl	$44, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end84:
-	.size	mcl_fp_montNF6Lbmi2, .Lfunc_end84-mcl_fp_montNF6Lbmi2
-
-	.globl	mcl_fp_montRed6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed6Lbmi2,@function
-mcl_fp_montRed6Lbmi2:                   # @mcl_fp_montRed6Lbmi2
-# BB#0:
+.LBB51_1:
+	movl	%ebx, %edx
+	movl	64(%esp), %ebx
+	movl	%edx, 28(%ebx)
+	jns	.LBB51_4
+.LBB51_3:
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 24(%ebx)
+	movl	32(%esp), %edx                  # 4-byte Reload
+	jns	.LBB51_6
+.LBB51_5:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	jns	.LBB51_8
+.LBB51_7:
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 16(%ebx)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB51_10
+.LBB51_9:
+	movl	20(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%ebx)
+	jns	.LBB51_12
+.LBB51_11:
+	movl	24(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%ebx)
+	jns	.LBB51_14
+.LBB51_13:
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 4(%ebx)
+	js	.LBB51_15
+	jmp	.LBB51_16
+.Lfunc_end51:
+	.size	mcl_fp_addNF8Lbmi2, .Lfunc_end51-mcl_fp_addNF8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_sub8Lbmi2                # -- Begin function mcl_fp_sub8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_sub8Lbmi2,@function
+mcl_fp_sub8Lbmi2:                       # @mcl_fp_sub8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$88, %esp
-	movl	116(%esp), %edi
-	movl	-4(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	(%edi), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	imull	%eax, %edx
-	movl	20(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebx, %eax
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	16(%edi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %eax
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	12(%edi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %eax
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	4(%edi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	%esi, %ecx, %eax
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	addl	%ebx, %eax
-	movl	%eax, %ebp
-	movl	8(%edi), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	mulxl	%esi, %eax, %edx
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	112(%esp), %ecx
-	adcl	4(%ecx), %ebp
-	adcl	8(%ecx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	12(%ecx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	adcl	16(%ecx), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	20(%ecx), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	24(%ecx), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	40(%ecx), %esi
-	movl	36(%ecx), %edi
-	movl	32(%ecx), %ebx
-	movl	28(%ecx), %eax
-	adcl	$0, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	$0, %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
+	subl	$28, %esp
+	movl	52(%esp), %edx
+	movl	(%edx), %esi
+	movl	4(%edx), %edi
+	movl	56(%esp), %eax
+	subl	(%eax), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	sbbl	4(%eax), %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	28(%edx), %ecx
+	movl	24(%edx), %edi
+	movl	20(%edx), %esi
+	movl	16(%edx), %ebp
+	movl	12(%edx), %ebx
+	movl	8(%edx), %edx
+	sbbl	8(%eax), %edx
+	sbbl	12(%eax), %ebx
+	sbbl	16(%eax), %ebp
+	sbbl	20(%eax), %esi
+	sbbl	24(%eax), %edi
+	sbbl	28(%eax), %ecx
+	movl	%ecx, %eax
+	movl	$0, %ecx
 	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%ebp, %ebx
-	movl	%ebx, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	76(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	68(%esp), %esi, %ebp    # 4-byte Folded Reload
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	addl	%ecx, %ebp
-	mulxl	64(%esp), %edi, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %edi
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	mulxl	80(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, %ecx
-	movl	52(%esp), %esi          # 4-byte Reload
-	mulxl	%esi, %edx, %eax
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	adcl	$0, %eax
-	addl	%ebx, 8(%esp)           # 4-byte Folded Spill
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 24(%esp)          # 4-byte Folded Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	adcl	$0, 48(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	%esi, %ecx, %eax
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	mulxl	64(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	mulxl	72(%esp), %edi, %eax    # 4-byte Folded Reload
-	mulxl	68(%esp), %ecx, %ebx    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	addl	%edi, %ebx
-	adcl	%esi, %eax
-	movl	%eax, %esi
-	mulxl	76(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	mulxl	80(%esp), %edi, %edx    # 4-byte Folded Reload
-	adcl	%eax, %edi
-	movl	%edi, %eax
-	adcl	12(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%ebp, 8(%esp)           # 4-byte Folded Spill
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	adcl	$0, 48(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ebx, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	52(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mulxl	64(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	68(%esp), %edi, %ebp    # 4-byte Folded Reload
-	addl	%ecx, %ebp
-	adcl	%esi, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mulxl	76(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, %esi
-	mulxl	80(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%ebx, %edi
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	20(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 32(%esp)          # 4-byte Folded Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	$0, 48(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %edx
-	imull	60(%esp), %edx          # 4-byte Folded Reload
-	mulxl	52(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	mulxl	64(%esp), %edi, %esi    # 4-byte Folded Reload
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	mulxl	68(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	addl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	%edi, %eax
-	movl	%eax, %edi
-	mulxl	76(%esp), %eax, %ecx    # 4-byte Folded Reload
-	adcl	%esi, %eax
-	movl	%eax, %esi
-	mulxl	80(%esp), %edx, %eax    # 4-byte Folded Reload
-	adcl	%ecx, %edx
-	movl	%edx, %ecx
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%ebp, %ebx
-	movl	40(%esp), %ebx          # 4-byte Reload
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	imull	%ebx, %edx
-	mulxl	68(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	mulxl	72(%esp), %eax, %edi    # 4-byte Folded Reload
-	addl	%ecx, %eax
-	mulxl	64(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	adcl	%edi, %ebp
-	movl	%edx, %edi
-	mulxl	76(%esp), %esi, %edx    # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	%ecx, %esi
-	movl	%edi, %edx
-	mulxl	80(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, %edx
-	mulxl	52(%esp), %ecx, %edi    # 4-byte Folded Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	adcl	$0, %edi
-	movl	60(%esp), %edx          # 4-byte Reload
-	addl	40(%esp), %edx          # 4-byte Folded Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	subl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	sbbl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, %ebp
-	sbbl	64(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	76(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %ebx
-	sbbl	80(%esp), %ebx          # 4-byte Folded Reload
-	sbbl	52(%esp), %edi          # 4-byte Folded Reload
-	sbbl	$0, %edx
-	andl	$1, %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	jne	.LBB85_2
-# BB#1:
-	movl	%eax, 60(%esp)          # 4-byte Spill
-.LBB85_2:
-	movl	84(%esp), %eax          # 4-byte Reload
-	testb	%al, %al
-	movl	36(%esp), %ecx          # 4-byte Reload
-	jne	.LBB85_4
-# BB#3:
-	movl	68(%esp), %ecx          # 4-byte Reload
-.LBB85_4:
-	movl	108(%esp), %eax
-	movl	%ecx, (%eax)
-	movl	60(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	jne	.LBB85_6
-# BB#5:
-	movl	%ebp, %esi
-.LBB85_6:
-	movl	%esi, 8(%eax)
-	jne	.LBB85_8
-# BB#7:
-	movl	76(%esp), %ecx          # 4-byte Reload
-.LBB85_8:
-	movl	%ecx, 12(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	jne	.LBB85_10
-# BB#9:
-	movl	%ebx, %ecx
-.LBB85_10:
-	movl	%ecx, 16(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB85_12
-# BB#11:
-	movl	%edi, %ecx
-.LBB85_12:
-	movl	%ecx, 20(%eax)
-	addl	$88, %esp
+	testb	$1, %cl
+	movl	48(%esp), %ecx
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%ecx)
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	%edi, 24(%ecx)
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	%esi, 20(%ecx)
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	%ebp, 16(%ecx)
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	%ebx, 12(%ecx)
+	movl	%edx, 8(%ecx)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, (%ecx)
+	je	.LBB52_2
+# %bb.1:                                # %carry
+	movl	60(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	addl	(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	4(%edi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	8(%edi), %edx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	12(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	16(%edi), %ebp
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	20(%edi), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	24(%edi), %eax
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	28(%edi), %ebx
+	movl	%ebx, 28(%ecx)
+	movl	%eax, 24(%ecx)
+	movl	%esi, 20(%ecx)
+	movl	%ebp, 16(%ecx)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 12(%ecx)
+	movl	%edx, 8(%ecx)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, (%ecx)
+.LBB52_2:                               # %nocarry
+	addl	$28, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end85:
-	.size	mcl_fp_montRed6Lbmi2, .Lfunc_end85-mcl_fp_montRed6Lbmi2
-
-	.globl	mcl_fp_addPre6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre6Lbmi2,@function
-mcl_fp_addPre6Lbmi2:                    # @mcl_fp_addPre6Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %ecx
-	movl	12(%esp), %edx
-	addl	(%edx), %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	4(%eax), %ecx
-	adcl	4(%edx), %ecx
-	movl	%ecx, 4(%esi)
-	movl	8(%eax), %ecx
-	adcl	8(%edx), %ecx
-	movl	%ecx, 8(%esi)
-	movl	12(%edx), %ecx
-	adcl	12(%eax), %ecx
-	movl	%ecx, 12(%esi)
-	movl	16(%edx), %ecx
-	adcl	16(%eax), %ecx
-	movl	%ecx, 16(%esi)
-	movl	20(%eax), %eax
-	movl	20(%edx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 20(%esi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	retl
-.Lfunc_end86:
-	.size	mcl_fp_addPre6Lbmi2, .Lfunc_end86-mcl_fp_addPre6Lbmi2
-
-	.globl	mcl_fp_subPre6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre6Lbmi2,@function
-mcl_fp_subPre6Lbmi2:                    # @mcl_fp_subPre6Lbmi2
-# BB#0:
+.Lfunc_end52:
+	.size	mcl_fp_sub8Lbmi2, .Lfunc_end52-mcl_fp_sub8Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subNF8Lbmi2              # -- Begin function mcl_fp_subNF8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF8Lbmi2,@function
+mcl_fp_subNF8Lbmi2:                     # @mcl_fp_subNF8Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	16(%esp), %ecx
-	movl	(%ecx), %edx
-	xorl	%eax, %eax
-	movl	20(%esp), %esi
-	subl	(%esi), %edx
-	movl	12(%esp), %edi
-	movl	%edx, (%edi)
-	movl	4(%ecx), %edx
-	sbbl	4(%esi), %edx
-	movl	%edx, 4(%edi)
-	movl	8(%ecx), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, 8(%edi)
-	movl	12(%ecx), %edx
-	sbbl	12(%esi), %edx
-	movl	%edx, 12(%edi)
-	movl	16(%ecx), %edx
-	sbbl	16(%esi), %edx
-	movl	%edx, 16(%edi)
-	movl	20(%esi), %edx
-	movl	20(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 20(%edi)
-	sbbl	$0, %eax
-	andl	$1, %eax
+	subl	$40, %esp
+	movl	64(%esp), %eax
+	movl	(%eax), %esi
+	movl	4(%eax), %edx
+	movl	68(%esp), %ecx
+	subl	(%ecx), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	sbbl	4(%ecx), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	28(%eax), %edx
+	movl	24(%eax), %esi
+	movl	20(%eax), %edi
+	movl	16(%eax), %ebx
+	movl	12(%eax), %ebp
+	movl	8(%eax), %eax
+	sbbl	8(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	sbbl	12(%ecx), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	sbbl	16(%ecx), %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	sbbl	20(%ecx), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	sbbl	24(%ecx), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	%edx, %edi
+	sbbl	28(%ecx), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	sarl	$31, %edi
+	movl	72(%esp), %ebp
+	movl	28(%ebp), %eax
+	andl	%edi, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%ebp), %eax
+	andl	%edi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%ebp), %ebx
+	andl	%edi, %ebx
+	movl	16(%ebp), %esi
+	andl	%edi, %esi
+	movl	12(%ebp), %edx
+	andl	%edi, %edx
+	movl	8(%ebp), %ecx
+	andl	%edi, %ecx
+	movl	4(%ebp), %eax
+	andl	%edi, %eax
+	andl	(%ebp), %edi
+	addl	24(%esp), %edi                  # 4-byte Folded Reload
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	60(%esp), %ebp
+	movl	%edi, (%ebp)
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%eax, 4(%ebp)
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 8(%ebp)
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 12(%ebp)
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%esi, 16(%ebp)
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ebx, 20(%ebp)
+	movl	%eax, 24(%ebp)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%ebp)
+	addl	$40, %esp
 	popl	%esi
 	popl	%edi
+	popl	%ebx
+	popl	%ebp
 	retl
-.Lfunc_end87:
-	.size	mcl_fp_subPre6Lbmi2, .Lfunc_end87-mcl_fp_subPre6Lbmi2
-
-	.globl	mcl_fp_shr1_6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_6Lbmi2,@function
-mcl_fp_shr1_6Lbmi2:                     # @mcl_fp_shr1_6Lbmi2
-# BB#0:
+.Lfunc_end53:
+	.size	mcl_fp_subNF8Lbmi2, .Lfunc_end53-mcl_fp_subNF8Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_add8Lbmi2             # -- Begin function mcl_fpDbl_add8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add8Lbmi2,@function
+mcl_fpDbl_add8Lbmi2:                    # @mcl_fpDbl_add8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %eax
+	subl	$52, %esp
+	movl	76(%esp), %eax
+	movl	(%eax), %ecx
+	movl	4(%eax), %edx
+	movl	80(%esp), %edi
+	addl	(%edi), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	4(%edi), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	60(%eax), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	56(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	52(%eax), %ebp
+	movl	48(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	44(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ebx
+	movl	28(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	24(%eax), %esi
 	movl	20(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
 	movl	16(%eax), %edx
-	movl	12(%eax), %esi
-	movl	8(%eax), %edi
-	movl	(%eax), %ebx
-	movl	4(%eax), %eax
-	shrdl	$1, %eax, %ebx
-	movl	20(%esp), %ebp
-	movl	%ebx, (%ebp)
-	shrdl	$1, %edi, %eax
+	movl	12(%eax), %ecx
+	movl	8(%eax), %eax
+	adcl	8(%edi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	12(%edi), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	16(%edi), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	20(%edi), %edx
+	adcl	24(%edi), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	28(%edi), %ecx
+	adcl	32(%edi), %ebx
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	36(%edi), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	40(%edi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	44(%edi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	48(%edi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	52(%edi), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	56(%edi), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	60(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	72(%esp), %ebp
+	movl	%ecx, 28(%ebp)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 24(%ebp)
+	movl	%edx, 20(%ebp)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%ebp)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebp)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebp)
+	movl	40(%esp), %eax                  # 4-byte Reload
 	movl	%eax, 4(%ebp)
-	shrdl	$1, %esi, %edi
-	movl	%edi, 8(%ebp)
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ebp)
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 16(%ebp)
-	shrl	%ecx
-	movl	%ecx, 20(%ebp)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%ebp)
+	setb	28(%esp)                        # 1-byte Folded Spill
+	movl	84(%esp), %edi
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	subl	(%edi), %ebx
+	movl	%ebx, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %edx
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	8(%edi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	12(%edi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	16(%edi), %eax
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	sbbl	20(%edi), %ecx
+	movl	12(%esp), %edx                  # 4-byte Reload
+	sbbl	24(%edi), %edx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	sbbl	28(%edi), %esi
+	movzbl	28(%esp), %ebx                  # 1-byte Folded Reload
+	sbbl	$0, %ebx
+	testb	$1, %bl
+	jne	.LBB54_1
+# %bb.2:
+	movl	%esi, 60(%ebp)
+	jne	.LBB54_3
+.LBB54_4:
+	movl	%edx, 56(%ebp)
+	jne	.LBB54_5
+.LBB54_6:
+	movl	%ecx, 52(%ebp)
+	movl	36(%esp), %edx                  # 4-byte Reload
+	jne	.LBB54_7
+.LBB54_8:
+	movl	%eax, 48(%ebp)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	jne	.LBB54_9
+.LBB54_10:
+	movl	%eax, 44(%ebp)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	jne	.LBB54_11
+.LBB54_12:
+	movl	%edx, 40(%ebp)
+	jne	.LBB54_13
+.LBB54_14:
+	movl	%ecx, 36(%ebp)
+	je	.LBB54_16
+.LBB54_15:
+	movl	(%esp), %eax                    # 4-byte Reload
+.LBB54_16:
+	movl	%eax, 32(%ebp)
+	addl	$52, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end88:
-	.size	mcl_fp_shr1_6Lbmi2, .Lfunc_end88-mcl_fp_shr1_6Lbmi2
-
-	.globl	mcl_fp_add6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add6Lbmi2,@function
-mcl_fp_add6Lbmi2:                       # @mcl_fp_add6Lbmi2
-# BB#0:
+.LBB54_1:
+	movl	8(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 60(%ebp)
+	je	.LBB54_4
+.LBB54_3:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 56(%ebp)
+	je	.LBB54_6
+.LBB54_5:
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 52(%ebp)
+	movl	36(%esp), %edx                  # 4-byte Reload
+	je	.LBB54_8
+.LBB54_7:
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%ebp)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	je	.LBB54_10
+.LBB54_9:
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%ebp)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	je	.LBB54_12
+.LBB54_11:
+	movl	24(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 40(%ebp)
+	je	.LBB54_14
+.LBB54_13:
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 36(%ebp)
+	jne	.LBB54_15
+	jmp	.LBB54_16
+.Lfunc_end54:
+	.size	mcl_fpDbl_add8Lbmi2, .Lfunc_end54-mcl_fpDbl_add8Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sub8Lbmi2             # -- Begin function mcl_fpDbl_sub8Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub8Lbmi2,@function
+mcl_fpDbl_sub8Lbmi2:                    # @mcl_fpDbl_sub8Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$12, %esp
-	movl	40(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ebp
-	movl	36(%esp), %ebx
-	addl	(%ebx), %edx
-	adcl	4(%ebx), %ebp
-	movl	8(%eax), %ecx
-	adcl	8(%ebx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %esi
-	movl	12(%ebx), %ecx
-	movl	16(%ebx), %edi
-	adcl	12(%eax), %ecx
-	adcl	16(%eax), %edi
-	movl	20(%ebx), %ebx
-	adcl	20(%eax), %ebx
-	movl	32(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ebp, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%ecx, 12(%eax)
-	movl	%edi, 16(%eax)
-	movl	%ebx, 20(%eax)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	44(%esp), %esi
-	subl	(%esi), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	8(%esp), %edx           # 4-byte Reload
-	movl	44(%esp), %esi
-	sbbl	4(%esi), %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	movl	%ecx, %ebp
-	sbbl	8(%esi), %edx
-	sbbl	12(%esi), %ebp
-	sbbl	16(%esi), %edi
-	sbbl	20(%esi), %ebx
-	sbbl	$0, %eax
-	testb	$1, %al
-	jne	.LBB89_2
-# BB#1:                                 # %nocarry
-	movl	(%esp), %eax            # 4-byte Reload
-	movl	32(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, 4(%ecx)
-	movl	%edx, 8(%ecx)
-	movl	%ebp, 12(%ecx)
-	movl	%edi, 16(%ecx)
-	movl	%ebx, 20(%ecx)
-.LBB89_2:                               # %carry
-	addl	$12, %esp
+	subl	$56, %esp
+	movl	80(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	4(%ecx), %edi
+	xorl	%esi, %esi
+	movl	84(%esp), %ebp
+	subl	(%ebp), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	sbbl	4(%ebp), %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	60(%ecx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	56(%ecx), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	52(%ecx), %ebx
+	movl	48(%ecx), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	44(%ecx), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	40(%ecx), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	36(%ecx), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	32(%ecx), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	28(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	24(%ecx), %edx
+	movl	20(%ecx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %edi
+	movl	12(%ecx), %eax
+	movl	8(%ecx), %ecx
+	sbbl	8(%ebp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	sbbl	12(%ebp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	sbbl	16(%ebp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	sbbl	20(%ebp), %edi
+	sbbl	24(%ebp), %edx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	28(%ebp), %eax
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	sbbl	32(%ebp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	36(%ebp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	sbbl	40(%ebp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	sbbl	44(%ebp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	sbbl	48(%ebp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	sbbl	52(%ebp), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	sbbl	56(%ebp), %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	sbbl	60(%ebp), %ebx
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	movl	76(%esp), %ecx
+	movl	%eax, 28(%ecx)
+	movl	%edx, 24(%ecx)
+	movl	%edi, 20(%ecx)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%ecx)
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ecx)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ecx)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, (%ecx)
+	sbbl	%esi, %esi
+	andl	$1, %esi
+	negl	%esi
+	movl	88(%esp), %ecx
+	movl	28(%ecx), %eax
+	andl	%esi, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%ecx), %eax
+	andl	%esi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%ecx), %edi
+	andl	%esi, %edi
+	movl	16(%ecx), %ebp
+	andl	%esi, %ebp
+	movl	12(%ecx), %edx
+	andl	%esi, %edx
+	movl	8(%ecx), %ecx
+	andl	%esi, %ecx
+	movl	88(%esp), %eax
+	movl	4(%eax), %eax
+	andl	%esi, %eax
+	movl	88(%esp), %ebx
+	andl	(%ebx), %esi
+	addl	12(%esp), %esi                  # 4-byte Folded Reload
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	76(%esp), %ebx
+	movl	%esi, 32(%ebx)
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 36(%ebx)
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%ebx)
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%edx, 44(%ebx)
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	%ebp, 48(%ebx)
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%edi, 52(%ebx)
+	movl	%eax, 56(%ebx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 60(%ebx)
+	addl	$56, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end89:
-	.size	mcl_fp_add6Lbmi2, .Lfunc_end89-mcl_fp_add6Lbmi2
-
-	.globl	mcl_fp_addNF6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF6Lbmi2,@function
-mcl_fp_addNF6Lbmi2:                     # @mcl_fp_addNF6Lbmi2
-# BB#0:
+.Lfunc_end55:
+	.size	mcl_fpDbl_sub8Lbmi2, .Lfunc_end55-mcl_fpDbl_sub8Lbmi2
+                                        # -- End function
+	.globl	mulPv384x32bmi2                 # -- Begin function mulPv384x32bmi2
+	.p2align	4, 0x90
+	.type	mulPv384x32bmi2,@function
+mulPv384x32bmi2:                        # @mulPv384x32bmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$40, %esp
-	movl	68(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ecx
-	movl	64(%esp), %ebp
-	addl	(%ebp), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%edx, %ebx
-	adcl	4(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	20(%eax), %edx
-	movl	16(%eax), %esi
-	movl	12(%eax), %edi
-	movl	8(%eax), %eax
-	adcl	8(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	12(%ebp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	16(%ebp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	20(%ebp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %ebp
-	movl	72(%esp), %ebx
-	subl	(%ebx), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	movl	%ecx, %ebp
-	movl	72(%esp), %ecx
-	sbbl	4(%ecx), %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	sbbl	8(%ecx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	sbbl	12(%ecx), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	%esi, %edi
-	sbbl	16(%ecx), %edi
-	movl	%edx, %esi
-	sbbl	20(%ecx), %esi
-	movl	%esi, %ebx
-	sarl	$31, %ebx
-	testl	%ebx, %ebx
-	js	.LBB90_2
-# BB#1:
-	movl	(%esp), %eax            # 4-byte Reload
-.LBB90_2:
-	movl	60(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	js	.LBB90_4
-# BB#3:
-	movl	4(%esp), %ecx           # 4-byte Reload
-.LBB90_4:
-	movl	%ecx, 4(%ebx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	28(%esp), %edx          # 4-byte Reload
-	movl	24(%esp), %ecx          # 4-byte Reload
-	js	.LBB90_6
-# BB#5:
-	movl	8(%esp), %ecx           # 4-byte Reload
-.LBB90_6:
-	movl	%ecx, 8(%ebx)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	js	.LBB90_8
-# BB#7:
-	movl	12(%esp), %edx          # 4-byte Reload
-.LBB90_8:
-	movl	%edx, 12(%ebx)
-	js	.LBB90_10
-# BB#9:
-	movl	%edi, %ecx
-.LBB90_10:
-	movl	%ecx, 16(%ebx)
-	js	.LBB90_12
-# BB#11:
-	movl	%esi, %eax
-.LBB90_12:
-	movl	%eax, 20(%ebx)
-	addl	$40, %esp
+	subl	$36, %esp
+	movl	64(%esp), %edx
+	movl	60(%esp), %ebp
+	mulxl	4(%ebp), %eax, %ecx
+	mulxl	(%ebp), %edi, %esi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	addl	%eax, %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	mulxl	8(%ebp), %esi, %eax
+	adcl	%ecx, %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	mulxl	12(%ebp), %esi, %ecx
+	adcl	%eax, %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	mulxl	16(%ebp), %esi, %eax
+	adcl	%ecx, %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	mulxl	20(%ebp), %ecx, %esi
+	adcl	%eax, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	mulxl	24(%ebp), %ecx, %eax
+	adcl	%esi, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	mulxl	28(%ebp), %ebx, %ecx
+	adcl	%eax, %ebx
+	mulxl	32(%ebp), %edi, %eax
+	adcl	%ecx, %edi
+	mulxl	36(%ebp), %esi, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	%eax, %esi
+	mulxl	40(%ebp), %ecx, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	56(%esp), %eax
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, (%eax)
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 4(%eax)
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 8(%eax)
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 12(%eax)
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 16(%eax)
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%eax)
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 24(%eax)
+	movl	%ebx, 28(%eax)
+	movl	%edi, 32(%eax)
+	movl	%esi, 36(%eax)
+	movl	%ecx, 40(%eax)
+	movl	60(%esp), %ecx
+	mulxl	44(%ecx), %ecx, %edx
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 44(%eax)
+	adcl	$0, %edx
+	movl	%edx, 48(%eax)
+	addl	$36, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
-	retl
-.Lfunc_end90:
-	.size	mcl_fp_addNF6Lbmi2, .Lfunc_end90-mcl_fp_addNF6Lbmi2
-
-	.globl	mcl_fp_sub6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub6Lbmi2,@function
-mcl_fp_sub6Lbmi2:                       # @mcl_fp_sub6Lbmi2
-# BB#0:
+	retl	$4
+.Lfunc_end56:
+	.size	mulPv384x32bmi2, .Lfunc_end56-mulPv384x32bmi2
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre12Lbmi2        # -- Begin function mcl_fp_mulUnitPre12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre12Lbmi2,@function
+mcl_fp_mulUnitPre12Lbmi2:               # @mcl_fp_mulUnitPre12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$16, %esp
-	movl	40(%esp), %ebx
-	movl	(%ebx), %esi
-	movl	4(%ebx), %edi
-	movl	44(%esp), %ecx
-	subl	(%ecx), %esi
-	sbbl	4(%ecx), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	movl	8(%ebx), %eax
-	sbbl	8(%ecx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	12(%ebx), %eax
-	sbbl	12(%ecx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	16(%ebx), %ebp
-	sbbl	16(%ecx), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	20(%ebx), %edx
-	sbbl	20(%ecx), %edx
-	movl	$0, %ecx
-	sbbl	$0, %ecx
-	testb	$1, %cl
-	movl	36(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	%edi, 4(%ebx)
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	%eax, 12(%ebx)
-	movl	%ebp, 16(%ebx)
-	movl	%edx, 20(%ebx)
-	je	.LBB91_2
-# BB#1:                                 # %carry
-	movl	48(%esp), %ecx
-	addl	(%ecx), %esi
-	movl	%esi, (%ebx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	4(%ecx), %eax
-	adcl	8(%ecx), %edi
-	movl	%eax, 4(%ebx)
-	movl	12(%ecx), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%edi, 8(%ebx)
-	movl	%eax, 12(%ebx)
-	movl	16(%ecx), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 16(%ebx)
-	movl	20(%ecx), %eax
-	adcl	%edx, %eax
-	movl	%eax, 20(%ebx)
-.LBB91_2:                               # %nocarry
-	addl	$16, %esp
+	subl	$92, %esp
+	calll	.L57$pb
+.L57$pb:
+	popl	%ebx
+.Ltmp9:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp9-.L57$pb), %ebx
+	subl	$4, %esp
+	movl	124(%esp), %eax
+	movl	120(%esp), %ecx
+	leal	44(%esp), %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	40(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ebp
+	movl	72(%esp), %ebx
+	movl	76(%esp), %edi
+	movl	80(%esp), %esi
+	movl	84(%esp), %edx
+	movl	88(%esp), %ecx
+	movl	112(%esp), %eax
+	movl	%ecx, 48(%eax)
+	movl	%edx, 44(%eax)
+	movl	%esi, 40(%eax)
+	movl	%edi, 36(%eax)
+	movl	%ebx, 32(%eax)
+	movl	%ebp, 28(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%eax)
+	addl	$92, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end91:
-	.size	mcl_fp_sub6Lbmi2, .Lfunc_end91-mcl_fp_sub6Lbmi2
-
-	.globl	mcl_fp_subNF6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF6Lbmi2,@function
-mcl_fp_subNF6Lbmi2:                     # @mcl_fp_subNF6Lbmi2
-# BB#0:
+.Lfunc_end57:
+	.size	mcl_fp_mulUnitPre12Lbmi2, .Lfunc_end57-mcl_fp_mulUnitPre12Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre12Lbmi2         # -- Begin function mcl_fpDbl_mulPre12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre12Lbmi2,@function
+mcl_fpDbl_mulPre12Lbmi2:                # @mcl_fpDbl_mulPre12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$24, %esp
-	movl	48(%esp), %ebx
-	movl	20(%ebx), %esi
-	movl	(%ebx), %ecx
-	movl	4(%ebx), %eax
-	movl	52(%esp), %ebp
-	subl	(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	4(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	16(%ebx), %eax
-	movl	12(%ebx), %ecx
-	movl	8(%ebx), %edx
-	sbbl	8(%ebp), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	sbbl	16(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	sbbl	20(%ebp), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	%edx, %ebp
-	sarl	$31, %ebp
-	movl	%ebp, %ecx
-	addl	%ecx, %ecx
-	movl	%ebp, %eax
-	adcl	%eax, %eax
-	shrl	$31, %edx
-	orl	%ecx, %edx
-	movl	56(%esp), %ebx
-	andl	4(%ebx), %eax
-	andl	(%ebx), %edx
-	movl	20(%ebx), %edi
-	andl	%ebp, %edi
-	movl	16(%ebx), %esi
-	andl	%ebp, %esi
-	movl	12(%ebx), %ecx
-	andl	%ebp, %ecx
-	andl	8(%ebx), %ebp
-	addl	8(%esp), %edx           # 4-byte Folded Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	44(%esp), %ebx
-	movl	%edx, (%ebx)
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%eax, 4(%ebx)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebp, 8(%ebx)
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%ecx, 12(%ebx)
-	movl	%esi, 16(%ebx)
-	adcl	(%esp), %edi            # 4-byte Folded Reload
-	movl	%edi, 20(%ebx)
-	addl	$24, %esp
-	popl	%esi
+	subl	$220, %esp
+	calll	.L58$pb
+.L58$pb:
 	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end92:
-	.size	mcl_fp_subNF6Lbmi2, .Lfunc_end92-mcl_fp_subNF6Lbmi2
-
-	.globl	mcl_fpDbl_add6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add6Lbmi2,@function
-mcl_fpDbl_add6Lbmi2:                    # @mcl_fpDbl_add6Lbmi2
-# BB#0:
+.Ltmp10:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp10-.L58$pb), %edi
+	subl	$4, %esp
+	movl	252(%esp), %ebp
+	movl	248(%esp), %esi
+	movl	%edi, %ebx
+	movl	%edi, 68(%esp)                  # 4-byte Spill
 	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
 	pushl	%esi
-	subl	$36, %esp
-	movl	64(%esp), %edx
-	movl	60(%esp), %ecx
-	movl	12(%ecx), %esi
-	movl	16(%ecx), %eax
-	movl	8(%edx), %edi
-	movl	(%edx), %ebx
-	addl	(%ecx), %ebx
-	movl	56(%esp), %ebp
-	movl	%ebx, (%ebp)
-	movl	4(%edx), %ebx
-	adcl	4(%ecx), %ebx
-	adcl	8(%ecx), %edi
-	adcl	12(%edx), %esi
-	adcl	16(%edx), %eax
-	movl	%ebx, 4(%ebp)
-	movl	%edx, %ebx
-	movl	32(%ebx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%edi, 8(%ebp)
-	movl	20(%ebx), %edi
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	adcl	%edi, %esi
-	movl	24(%ebx), %edi
-	movl	%eax, 16(%ebp)
-	movl	24(%ecx), %edx
-	adcl	%edi, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	28(%ebx), %edi
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %eax
-	adcl	%edi, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	32(%ecx), %ebp
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	36(%ebx), %esi
+	pushl	252(%esp)
+	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
+	addl	$12, %esp
+	leal	24(%ebp), %eax
+	leal	24(%esi), %ecx
+	movl	244(%esp), %edx
+	addl	$48, %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
+	addl	$16, %esp
+	movl	40(%esi), %edx
+	movl	36(%esi), %ebx
+	movl	32(%esi), %eax
+	movl	24(%esi), %edi
+	movl	28(%esi), %ecx
+	addl	(%esi), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	adcl	4(%esi), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	8(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	12(%esi), %ebx
 	movl	%ebx, %edi
-	movl	36(%ecx), %ebx
-	adcl	%esi, %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	40(%edi), %esi
-	movl	40(%ecx), %edi
-	adcl	%esi, %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi
-	movl	44(%esi), %esi
-	movl	44(%ecx), %ecx
-	adcl	%esi, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	68(%esp), %esi
-	subl	(%esi), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	68(%esp), %edx
-	sbbl	4(%edx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	8(%edx), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %ebp
-	sbbl	12(%edx), %ebp
-	movl	%edi, %ebx
-	movl	12(%esp), %edi          # 4-byte Reload
-	sbbl	16(%edx), %ebx
-	movl	%edi, %eax
-	sbbl	20(%edx), %eax
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB93_2
-# BB#1:
-	movl	%eax, %edi
-.LBB93_2:
-	testb	%cl, %cl
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	16(%esp), %edx          # 4-byte Reload
-	jne	.LBB93_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	4(%esp), %ecx           # 4-byte Reload
-.LBB93_4:
-	movl	56(%esp), %eax
-	movl	%ecx, 24(%eax)
-	movl	%edx, 28(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	24(%esp), %edx          # 4-byte Reload
-	jne	.LBB93_6
-# BB#5:
-	movl	8(%esp), %edx           # 4-byte Reload
-.LBB93_6:
-	movl	%edx, 32(%eax)
-	movl	28(%esp), %edx          # 4-byte Reload
-	jne	.LBB93_8
-# BB#7:
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	adcl	16(%esi), %edx
+	movl	%edx, %ecx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esi), %eax
+	adcl	20(%esi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	setb	48(%esp)                        # 1-byte Folded Spill
+	movl	24(%ebp), %ebx
+	addl	(%ebp), %ebx
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	movl	28(%ebp), %edx
+	adcl	4(%ebp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	32(%ebp), %edx
+	adcl	8(%ebp), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	36(%ebp), %edx
+	adcl	12(%ebp), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	40(%ebp), %edx
+	adcl	16(%ebp), %edx
+	movl	%edx, %esi
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	44(%ebp), %edx
+	adcl	20(%ebp), %edx
+	movl	%eax, 168(%esp)
+	movl	%ecx, 164(%esp)
+	movl	%edi, 160(%esp)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 156(%esp)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 152(%esp)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 148(%esp)
+	movl	%edx, 144(%esp)
+	movl	%esi, 140(%esp)
+	movl	36(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 136(%esp)
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 132(%esp)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 128(%esp)
+	movl	%ebx, 124(%esp)
+	setb	60(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movzbl	52(%esp), %ecx                  # 1-byte Folded Reload
+	movl	%ecx, %esi
+	negl	%esi
+	movl	%ecx, %ebx
+	shll	$31, %ebx
+	shrdl	$31, %esi, %ebx
+	andl	60(%esp), %ebx                  # 4-byte Folded Reload
+	andl	%esi, 56(%esp)                  # 4-byte Folded Spill
+	andl	%esi, %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	andl	%esi, %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	andl	%esi, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	andl	%edx, %esi
+	movzbl	64(%esp), %ebp                  # 1-byte Folded Reload
+	andl	%ebp, %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
 	movl	%ebp, %edx
-.LBB93_8:
-	movl	%edx, 36(%eax)
-	jne	.LBB93_10
-# BB#9:
-	movl	%ebx, %ecx
-.LBB93_10:
-	movl	%ecx, 40(%eax)
-	movl	%edi, 44(%eax)
-	addl	$36, %esp
+	negl	%edx
+	andl	%edx, 48(%esp)                  # 4-byte Folded Spill
+	andl	%edx, 44(%esp)                  # 4-byte Folded Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	andl	%edx, %edi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	andl	%edx, %ecx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	andl	%edx, %eax
+	shll	$31, %ebp
+	shrdl	$31, %edx, %ebp
+	andl	16(%esp), %ebp                  # 4-byte Folded Reload
+	addl	%ebx, %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	leal	176(%esp), %ecx
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	leal	128(%esp), %edx
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	%esi, %ebp
+	setb	%al
+	movzbl	%al, %esi
+	movl	68(%esp), %ebx                  # 4-byte Reload
+	pushl	%edx
+	leal	156(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
+	addl	$16, %esp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	addl	196(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	204(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	212(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	adcl	216(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %ebp
+	adcl	48(%esp), %ebp                  # 4-byte Folded Reload
+	movl	172(%esp), %eax
+	movl	240(%esp), %edi
+	subl	(%edi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	176(%esp), %eax
+	sbbl	4(%edi), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	180(%esp), %eax
+	sbbl	8(%edi), %eax
+	movl	%eax, %ecx
+	movl	184(%esp), %esi
+	sbbl	12(%edi), %esi
+	movl	188(%esp), %ebx
+	sbbl	16(%edi), %ebx
+	movl	192(%esp), %eax
+	sbbl	20(%edi), %eax
+	movl	%eax, %edx
+	movl	24(%edi), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	sbbl	%eax, 12(%esp)                  # 4-byte Folded Spill
+	movl	28(%edi), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	sbbl	%eax, 20(%esp)                  # 4-byte Folded Spill
+	movl	32(%edi), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	sbbl	%eax, 24(%esp)                  # 4-byte Folded Spill
+	movl	36(%edi), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	sbbl	%eax, 16(%esp)                  # 4-byte Folded Spill
+	movl	40(%edi), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	sbbl	%eax, 40(%esp)                  # 4-byte Folded Spill
+	movl	44(%edi), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	sbbl	%eax, 44(%esp)                  # 4-byte Folded Spill
+	sbbl	$0, %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	48(%edi), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	subl	%eax, 32(%esp)                  # 4-byte Folded Spill
+	movl	52(%edi), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	sbbl	%eax, 28(%esp)                  # 4-byte Folded Spill
+	movl	56(%edi), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	sbbl	%eax, %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	60(%edi), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	sbbl	%eax, %esi
+	movl	64(%edi), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	sbbl	%eax, %ebx
+	movl	68(%edi), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	sbbl	%eax, %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	72(%edi), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	sbbl	%eax, 12(%esp)                  # 4-byte Folded Spill
+	movl	76(%edi), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	sbbl	%eax, 20(%esp)                  # 4-byte Folded Spill
+	movl	80(%edi), %eax
+	movl	%eax, 120(%esp)                 # 4-byte Spill
+	sbbl	%eax, 24(%esp)                  # 4-byte Folded Spill
+	movl	84(%edi), %eax
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	sbbl	%eax, %ebp
+	movl	88(%edi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	sbbl	%eax, 40(%esp)                  # 4-byte Folded Spill
+	movl	92(%edi), %eax
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	sbbl	%eax, 44(%esp)                  # 4-byte Folded Spill
+	sbbl	$0, 36(%esp)                    # 4-byte Folded Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	addl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	60(%esp), %eax                  # 4-byte Folded Reload
+	adcl	104(%esp), %esi                 # 4-byte Folded Reload
+	adcl	76(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%esi, 36(%edi)
+	movl	%eax, 32(%edi)
+	movl	%ecx, 28(%edi)
+	movl	%edx, 24(%edi)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	adcl	64(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ebx, 40(%edi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 44(%edi)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	108(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%eax, 48(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 52(%edi)
+	adcl	92(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%eax, 56(%edi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	88(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ebp, 60(%edi)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 64(%edi)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	80(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 68(%edi)
+	movl	%eax, 72(%edi)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 76(%edi)
+	movl	120(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 80(%edi)
+	movl	116(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 84(%edi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 88(%edi)
+	movl	112(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 92(%edi)
+	addl	$220, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end93:
-	.size	mcl_fpDbl_add6Lbmi2, .Lfunc_end93-mcl_fpDbl_add6Lbmi2
-
-	.globl	mcl_fpDbl_sub6Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub6Lbmi2,@function
-mcl_fpDbl_sub6Lbmi2:                    # @mcl_fpDbl_sub6Lbmi2
-# BB#0:
+.Lfunc_end58:
+	.size	mcl_fpDbl_mulPre12Lbmi2, .Lfunc_end58-mcl_fpDbl_mulPre12Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre12Lbmi2         # -- Begin function mcl_fpDbl_sqrPre12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre12Lbmi2,@function
+mcl_fpDbl_sqrPre12Lbmi2:                # @mcl_fpDbl_sqrPre12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$24, %esp
-	movl	48(%esp), %edx
-	movl	(%edx), %eax
-	movl	4(%edx), %edi
-	movl	52(%esp), %esi
-	subl	(%esi), %eax
-	sbbl	4(%esi), %edi
-	movl	8(%edx), %ebx
-	sbbl	8(%esi), %ebx
-	movl	44(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%edx), %eax
-	sbbl	12(%esi), %eax
-	movl	%edi, 4(%ecx)
-	movl	16(%edx), %edi
-	sbbl	16(%esi), %edi
-	movl	%ebx, 8(%ecx)
-	movl	20(%esi), %ebx
-	movl	%eax, 12(%ecx)
-	movl	20(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	24(%esi), %ebx
-	movl	%edi, 16(%ecx)
-	movl	24(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	28(%esi), %edi
-	movl	%eax, 20(%ecx)
-	movl	28(%edx), %eax
-	sbbl	%edi, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	32(%esi), %edi
-	movl	32(%edx), %eax
-	sbbl	%edi, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	36(%esi), %edi
-	movl	36(%edx), %eax
-	sbbl	%edi, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	40(%esi), %edi
-	movl	40(%edx), %eax
-	sbbl	%edi, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	44(%esi), %esi
-	movl	44(%edx), %eax
-	sbbl	%esi, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	$0, %ebx
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	movl	56(%esp), %eax
-	jne	.LBB94_1
-# BB#2:
-	xorl	%edx, %edx
-	jmp	.LBB94_3
-.LBB94_1:
-	movl	20(%eax), %edx
-.LBB94_3:
-	testb	%bl, %bl
-	jne	.LBB94_4
-# BB#5:
-	movl	$0, %esi
-	movl	$0, %edi
-	jmp	.LBB94_6
-.LBB94_4:
-	movl	(%eax), %edi
-	movl	4(%eax), %esi
-.LBB94_6:
-	jne	.LBB94_7
-# BB#8:
-	movl	$0, %ebx
-	jmp	.LBB94_9
-.LBB94_7:
-	movl	16(%eax), %ebx
-.LBB94_9:
-	jne	.LBB94_10
-# BB#11:
-	movl	$0, %ebp
-	jmp	.LBB94_12
-.LBB94_10:
-	movl	12(%eax), %ebp
-.LBB94_12:
-	jne	.LBB94_13
-# BB#14:
-	xorl	%eax, %eax
-	jmp	.LBB94_15
-.LBB94_13:
-	movl	8(%eax), %eax
-.LBB94_15:
-	addl	8(%esp), %edi           # 4-byte Folded Reload
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	movl	%edi, 24(%ecx)
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%esi, 28(%ecx)
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 36(%ecx)
-	movl	%ebx, 40(%ecx)
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%ecx)
-	addl	$24, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
+	subl	$220, %esp
+	calll	.L59$pb
+.L59$pb:
 	popl	%ebp
-	retl
-.Lfunc_end94:
-	.size	mcl_fpDbl_sub6Lbmi2, .Lfunc_end94-mcl_fpDbl_sub6Lbmi2
-
-	.globl	mcl_fp_mulUnitPre7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre7Lbmi2,@function
-mcl_fp_mulUnitPre7Lbmi2:                # @mcl_fp_mulUnitPre7Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
+.Ltmp11:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp11-.L59$pb), %ebp
+	subl	$4, %esp
+	movl	248(%esp), %esi
+	movl	244(%esp), %edi
+	movl	%ebp, %ebx
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
 	pushl	%esi
-	subl	$16, %esp
-	movl	44(%esp), %edx
-	movl	40(%esp), %edi
-	mulxl	4(%edi), %ecx, %esi
-	mulxl	(%edi), %ebx, %eax
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	addl	%ecx, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	mulxl	8(%edi), %ecx, %eax
-	adcl	%esi, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	mulxl	12(%edi), %ebx, %ecx
-	adcl	%eax, %ebx
-	mulxl	16(%edi), %esi, %ebp
-	adcl	%ecx, %esi
-	mulxl	20(%edi), %ecx, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	adcl	%ebp, %ecx
-	mulxl	24(%edi), %edx, %edi
-	movl	36(%esp), %eax
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%eax)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 4(%eax)
-	movl	4(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	%ebx, 12(%eax)
-	movl	%esi, 16(%eax)
-	movl	%ecx, 20(%eax)
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 24(%eax)
-	adcl	$0, %edi
-	movl	%edi, 28(%eax)
-	addl	$16, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end95:
-	.size	mcl_fp_mulUnitPre7Lbmi2, .Lfunc_end95-mcl_fp_mulUnitPre7Lbmi2
-
-	.globl	mcl_fpDbl_mulPre7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre7Lbmi2,@function
-mcl_fpDbl_mulPre7Lbmi2:                 # @mcl_fpDbl_mulPre7Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
 	pushl	%esi
-	subl	$72, %esp
-	movl	96(%esp), %eax
-	movl	(%eax), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	4(%eax), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	100(%esp), %eax
-	movl	(%eax), %ebp
-	mulxl	%ebp, %ecx, %eax
-	movl	%esi, %edx
-	mulxl	%ebp, %edx, %esi
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	movl	8(%edi), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%edi, %ebx
-	mulxl	%ebp, %edi, %ecx
-	adcl	%eax, %edi
-	movl	12(%ebx), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mulxl	%ebp, %ebx, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebx
-	movl	16(%eax), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	mulxl	%ebp, %ecx, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	20(%eax), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	mulxl	%ebp, %edx, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	24(%eax), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	mulxl	%ebp, %eax, %edx
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%eax)
-	adcl	$0, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	4(%eax), %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	addl	%esi, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %esi, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	movl	60(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edi, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	%ebx, %edi
-	movl	56(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebx, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebx
-	movl	52(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %edx
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	40(%esp), %esi          # 4-byte Folded Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	adcl	56(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	%ebp, 64(%esp)          # 4-byte Folded Spill
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	%edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	movl	%edx, 4(%eax)
-	movl	96(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	8(%eax), %eax
-	mulxl	%eax, %edx, %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	addl	%esi, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	%edi, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	8(%ecx), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	mulxl	%eax, %esi, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	%ebx, %esi
-	movl	%esi, %edi
-	movl	12(%ecx), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	16(%ecx), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	20(%ecx), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%eax, %ebp, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	24(%ecx), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	mulxl	%eax, %ecx, %edx
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	movl	20(%esp), %eax          # 4-byte Reload
-	addl	%eax, 68(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 60(%esp)          # 4-byte Folded Spill
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	adcl	%edx, %esi
-	movl	92(%esp), %eax
-	movl	32(%esp), %edx          # 4-byte Reload
-	movl	%edx, 8(%eax)
-	movl	100(%esp), %eax
-	movl	12(%eax), %eax
-	movl	56(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	addl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edi, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	%ebx, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebx, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebp, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebp
-	movl	28(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %eax
-	adcl	%esi, %ecx
-	movl	%ecx, %edx
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	32(%esp), %esi          # 4-byte Reload
-	addl	%esi, 64(%esp)          # 4-byte Folded Spill
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 68(%esp)          # 4-byte Folded Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	%eax, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	movl	96(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	16(%eax), %esi
-	mulxl	%esi, %eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	addl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	mulxl	%esi, %eax, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	%edi, %eax
-	movl	%eax, %edi
-	movl	8(%ecx), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	mulxl	%esi, %eax, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	12(%ecx), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	mulxl	%esi, %eax, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	%ebx, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	16(%ecx), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	mulxl	%esi, %ebx, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	20(%ecx), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	mulxl	%esi, %edx, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %eax
-	movl	24(%ecx), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	mulxl	%esi, %ebp, %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	%edi, %esi
-	addl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	12(%esp), %edi          # 4-byte Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	%edx, 68(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	36(%esp), %edx          # 4-byte Reload
-	movl	%edx, 16(%eax)
-	movl	100(%esp), %eax
-	movl	20(%eax), %eax
-	movl	60(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	addl	%esi, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %esi, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	movl	56(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebx, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	44(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	%ebp, %edx
+	pushl	%edi
+	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
+	addl	$12, %esp
+	leal	24(%esi), %eax
+	leal	48(%edi), %ecx
+	pushl	%eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
+	addl	$16, %esp
+	movl	44(%esi), %edi
+	movl	40(%esi), %ecx
+	movl	36(%esi), %ebx
+	movl	32(%esi), %eax
+	movl	24(%esi), %ebp
+	movl	28(%esi), %edx
+	addl	(%esi), %ebp
+	adcl	4(%esi), %edx
+	adcl	8(%esi), %eax
+	adcl	12(%esi), %ebx
+	adcl	16(%esi), %ecx
+	adcl	20(%esi), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	%edi, 168(%esp)
+	movl	%ecx, 164(%esp)
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	%ebx, 160(%esp)
+	movl	%eax, 156(%esp)
+	movl	%edx, 152(%esp)
+	movl	%ebp, 148(%esp)
+	movl	%edi, 144(%esp)
+	movl	%ecx, 140(%esp)
+	movl	%ebx, 136(%esp)
+	movl	%eax, 132(%esp)
+	movl	%edx, 128(%esp)
+	movl	%ebp, 124(%esp)
+	setb	%bl
+	subl	$4, %esp
+	movzbl	%bl, %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	%edi, %ebx
+	shll	$31, %ebx
+	negl	%edi
+	shrdl	$31, %edi, %ebx
+	andl	%ebp, %ebx
+	movl	%ebx, %esi
+	andl	%edi, %edx
+	andl	%edi, %eax
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	andl	%edi, %ebx
+	andl	%edi, %ecx
+	andl	12(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %ebp
+	shldl	$1, %ecx, %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	shldl	$1, %ebx, %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	shldl	$1, %eax, %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	shldl	$1, %edx, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	shldl	$1, %esi, %edx
 	movl	%edx, %ebp
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %eax
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	addl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	60(%esp), %edx          # 4-byte Reload
-	movl	%edx, 20(%eax)
-	movl	100(%esp), %eax
-	movl	24(%eax), %edx
-	movl	96(%esp), %eax
-	mulxl	(%eax), %ebp, %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	addl	%esi, %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	mulxl	4(%eax), %esi, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	%ecx, %esi
-	movl	%esi, %ebp
-	mulxl	8(%eax), %ecx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	%ebx, %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	mulxl	12(%eax), %ebx, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	mulxl	16(%eax), %edi, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	mulxl	20(%eax), %esi, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	mulxl	24(%eax), %edx, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	addl	60(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 68(%esp)          # 4-byte Folded Spill
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	92(%esp), %eax
-	movl	64(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	68(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 32(%eax)
-	movl	%ebx, 36(%eax)
-	movl	%edi, 40(%eax)
-	movl	%esi, 44(%eax)
-	movl	%edx, 48(%eax)
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
+	shrl	$31, %edi
+	addl	%esi, %esi
+	leal	128(%esp), %eax
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	pushl	%eax
+	leal	156(%esp), %eax
+	pushl	%eax
+	leal	184(%esp), %eax
+	pushl	%eax
+	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
+	addl	$16, %esp
+	addl	196(%esp), %esi
+	adcl	200(%esp), %ebp
+	movl	%ebp, %edx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	204(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	212(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	216(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movl	172(%esp), %ecx
+	movl	240(%esp), %eax
+	subl	(%eax), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	176(%esp), %ebx
+	sbbl	4(%eax), %ebx
+	movl	180(%esp), %ecx
+	sbbl	8(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	184(%esp), %ebp
+	sbbl	12(%eax), %ebp
+	movl	188(%esp), %ecx
+	sbbl	16(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	192(%esp), %ecx
+	sbbl	20(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 120(%esp)                 # 4-byte Spill
+	sbbl	%ecx, %esi
+	movl	28(%eax), %ecx
+	movl	%ecx, 112(%esp)                 # 4-byte Spill
+	sbbl	%ecx, %edx
+	movl	32(%eax), %ecx
+	movl	%ecx, 108(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 16(%esp)                  # 4-byte Folded Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 20(%esp)                  # 4-byte Folded Spill
+	movl	44(%eax), %ecx
+	movl	%ecx, 116(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 8(%esp)                   # 4-byte Folded Spill
+	sbbl	$0, %edi
+	movl	48(%eax), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	subl	%ecx, 28(%esp)                  # 4-byte Folded Spill
+	movl	52(%eax), %ecx
+	movl	%ecx, 84(%esp)                  # 4-byte Spill
+	sbbl	%ecx, %ebx
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	movl	56(%eax), %ecx
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 32(%esp)                  # 4-byte Folded Spill
+	movl	60(%eax), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	sbbl	%ecx, %ebp
+	movl	64(%eax), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 36(%esp)                  # 4-byte Folded Spill
+	movl	68(%eax), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 24(%esp)                  # 4-byte Folded Spill
+	movl	72(%eax), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	sbbl	%ecx, %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	76(%eax), %ecx
+	movl	%ecx, 104(%esp)                 # 4-byte Spill
+	sbbl	%ecx, %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	80(%eax), %ecx
+	movl	%ecx, 100(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 16(%esp)                  # 4-byte Folded Spill
+	movl	84(%eax), %ecx
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	movl	88(%eax), %ecx
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 20(%esp)                  # 4-byte Folded Spill
+	movl	92(%eax), %ecx
+	movl	%ecx, 88(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 8(%esp)                   # 4-byte Folded Spill
+	sbbl	$0, %edi
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	addl	120(%esp), %ebx                 # 4-byte Folded Reload
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	112(%esp), %edx                 # 4-byte Folded Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	108(%esp), %ecx                 # 4-byte Folded Reload
+	adcl	76(%esp), %ebp                  # 4-byte Folded Reload
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ebp, 36(%eax)
+	movl	%ecx, 32(%eax)
+	movl	%edx, 28(%eax)
+	movl	%ebx, 24(%eax)
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	116(%esp), %edx                 # 4-byte Folded Reload
+	movl	%esi, 40(%eax)
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	72(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 44(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%esi, 48(%eax)
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	80(%esp), %edx                  # 4-byte Folded Reload
 	movl	%ecx, 52(%eax)
-	addl	$72, %esp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edx, 56(%eax)
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	64(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 60(%eax)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	60(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edx, 64(%eax)
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ecx, 68(%eax)
+	movl	%edi, 72(%eax)
+	movl	104(%esp), %ecx                 # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 76(%eax)
+	movl	100(%esp), %ecx                 # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 80(%eax)
+	movl	96(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 84(%eax)
+	movl	92(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 88(%eax)
+	movl	88(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 92(%eax)
+	addl	$220, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end96:
-	.size	mcl_fpDbl_mulPre7Lbmi2, .Lfunc_end96-mcl_fpDbl_mulPre7Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre7Lbmi2,@function
-mcl_fpDbl_sqrPre7Lbmi2:                 # @mcl_fpDbl_sqrPre7Lbmi2
-# BB#0:
+.Lfunc_end59:
+	.size	mcl_fpDbl_sqrPre12Lbmi2, .Lfunc_end59-mcl_fpDbl_sqrPre12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_mont12Lbmi2              # -- Begin function mcl_fp_mont12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mont12Lbmi2,@function
+mcl_fp_mont12Lbmi2:                     # @mcl_fp_mont12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$80, %esp
-	movl	104(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	4(%ecx), %eax
-	movl	%eax, %edx
-	mulxl	%ebx, %esi, %edi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	%ebx, %ebp, %edx
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	addl	%esi, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	8(%ecx), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	mulxl	%ebx, %edx, %esi
-	adcl	%edi, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	12(%ecx), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	mulxl	%ebx, %edi, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	%esi, %edi
-	movl	16(%ecx), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	mulxl	%ebx, %esi, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	20(%ecx), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	mulxl	%ebx, %edx, %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	24(%ecx), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	mulxl	%ebx, %ecx, %ebx
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx
-	movl	44(%esp), %edx          # 4-byte Reload
-	movl	%edx, (%ecx)
-	adcl	$0, %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	addl	%edx, 72(%esp)          # 4-byte Folded Spill
-	movl	%eax, %edx
-	mulxl	%eax, %ebx, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	68(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ebp, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	%edi, %ebp
-	movl	64(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edi, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	%esi, %edi
-	movl	60(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	52(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %ecx, %eax
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %edx
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	addl	76(%esp), %ebx          # 4-byte Folded Reload
-	adcl	56(%esp), %ebp          # 4-byte Folded Reload
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 68(%esp)          # 4-byte Folded Spill
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	%eax, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	72(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%eax)
-	movl	104(%esp), %esi
-	movl	(%esi), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	8(%esi), %ecx
-	mulxl	%ecx, %edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	addl	%ebx, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	4(%esi), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%esi, %ebx
-	mulxl	%ecx, %eax, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	%ebp, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	mulxl	%ecx, %eax, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	%edi, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	12(%ebx), %eax
-	movl	%eax, %edx
-	mulxl	%ecx, %edi, %edx
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	%edi, %edx
-	movl	%edx, %esi
-	movl	16(%ebx), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edx, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	20(%ebx), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %ebp
-	mulxl	%ecx, %ebx, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	24(%ebp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ecx, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	movl	24(%esp), %edx          # 4-byte Reload
-	addl	%edx, 64(%esp)          # 4-byte Folded Spill
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	%edx, 60(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	72(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ebx          # 4-byte Folded Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	52(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	addl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	%ebx, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	%ecx, %edx
-	movl	%edx, %esi
-	movl	36(%esp), %edx          # 4-byte Reload
-	mulxl	%eax, %edi, %eax
-	adcl	%ebp, %edi
-	movl	%edi, %edx
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	52(%esp), %edi          # 4-byte Reload
-	addl	%edi, 68(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	28(%esp), %ebx          # 4-byte Reload
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	24(%esp), %ebp          # 4-byte Reload
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	adcl	%eax, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 8(%eax)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	movl	104(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	16(%ecx), %eax
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	addl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	%edi, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	8(%ecx), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	mulxl	%eax, %edx, %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	%ebx, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	12(%ecx), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	%ebp, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	mulxl	%eax, %ecx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	20(%esi), %ecx
-	movl	%ecx, %edx
-	mulxl	%eax, %edx, %ebp
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	%edx, %edi
-	movl	24(%esi), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	mulxl	%eax, %esi, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	12(%esp), %eax          # 4-byte Reload
-	addl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	%edx, 72(%esp)          # 4-byte Folded Spill
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	%edx, 64(%esp)          # 4-byte Folded Spill
-	movl	8(%esp), %edx           # 4-byte Reload
-	adcl	%edx, 68(%esp)          # 4-byte Folded Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	adcl	%ebp, %esi
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %edx, %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	addl	%eax, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %ebp, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebp          # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %edi, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 76(%esp)          # 4-byte Folded Spill
-	movl	%ecx, %edx
-	mulxl	%ecx, %edx, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	%esi, %edx
-	movl	%edx, %eax
-	movl	40(%esp), %edx          # 4-byte Reload
-	mulxl	%ecx, %ecx, %edx
-	adcl	%ebx, %ecx
-	movl	%ecx, %ebx
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	addl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 72(%esp)          # 4-byte Folded Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	adcl	%edx, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	60(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 16(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 20(%eax)
-	movl	104(%esp), %eax
-	movl	24(%eax), %edx
-	mulxl	(%eax), %ecx, %ebx
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	addl	%ebp, %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	mulxl	4(%eax), %ecx, %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	mulxl	8(%eax), %ecx, %ebx
-	movl	%ebx, 72(%esp)          # 4-byte Spill
-	adcl	%edi, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	mulxl	12(%eax), %ebx, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	%esi, %ebx
-	mulxl	16(%eax), %edi, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	mulxl	20(%eax), %esi, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	mulxl	%edx, %edx, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	addl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 76(%esp)          # 4-byte Folded Spill
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	100(%esp), %eax
-	movl	68(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	64(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	76(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 32(%eax)
-	movl	%ebx, 36(%eax)
-	movl	%edi, 40(%eax)
-	movl	%esi, 44(%eax)
-	movl	%edx, 48(%eax)
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%eax)
-	addl	$80, %esp
-	popl	%esi
-	popl	%edi
+	subl	$1420, %esp                     # imm = 0x58C
+	calll	.L60$pb
+.L60$pb:
 	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end97:
-	.size	mcl_fpDbl_sqrPre7Lbmi2, .Lfunc_end97-mcl_fpDbl_sqrPre7Lbmi2
-
-	.globl	mcl_fp_mont7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont7Lbmi2,@function
-mcl_fp_mont7Lbmi2:                      # @mcl_fp_mont7Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$116, %esp
-	movl	140(%esp), %eax
-	movl	24(%eax), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	144(%esp), %ecx
-	movl	(%ecx), %ecx
-	mulxl	%ecx, %edx, %esi
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	20(%eax), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edi, %edx
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	16(%eax), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edx, %ebx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	8(%eax), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edi, %edx
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	(%eax), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	4(%eax), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	mulxl	%ecx, %edi, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	mulxl	%ecx, %ebp, %edx
-	addl	%edi, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	%edx, 48(%esp)          # 4-byte Folded Spill
-	movl	12(%eax), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ecx, %eax
-	adcl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	108(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	148(%esp), %ebx
-	movl	-4(%ebx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	imull	%eax, %edx
-	movl	(%ebx), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	4(%ebx), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	mulxl	%ecx, %esi, %ecx
-	mulxl	%edi, %edi, %eax
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	addl	%esi, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	8(%ebx), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	mulxl	%esi, %eax, %esi
-	adcl	%ecx, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	12(%ebx), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	mulxl	%ecx, %eax, %ecx
-	adcl	%esi, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	16(%ebx), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	mulxl	%esi, %eax, %esi
-	adcl	%ecx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	20(%ebx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	mulxl	%eax, %eax, %edi
-	adcl	%esi, %eax
-	movl	%eax, %ecx
-	movl	24(%ebx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	mulxl	%eax, %edx, %eax
-	adcl	%edi, %edx
-	adcl	$0, %eax
-	addl	%ebp, 8(%esp)           # 4-byte Folded Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 40(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 32(%esp)          # 4-byte Folded Spill
-	movl	12(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 28(%esp)          # 4-byte Folded Spill
-	movl	16(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 24(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	144(%esp), %edx
-	movl	4(%edx), %edx
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %ebx, %esi    # 4-byte Folded Reload
-	mulxl	60(%esp), %eax, %edi    # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	addl	%ebx, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	mulxl	68(%esp), %edi, %eax    # 4-byte Folded Reload
-	adcl	%esi, %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	mulxl	64(%esp), %ebp, %ebx    # 4-byte Folded Reload
-	adcl	%eax, %ebp
-	mulxl	80(%esp), %esi, %eax    # 4-byte Folded Reload
-	adcl	%ebx, %esi
-	adcl	%ecx, %eax
-	movl	%eax, %ecx
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	movl	36(%esp), %eax          # 4-byte Reload
-	addl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, %ebx
-	sbbl	%ecx, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	imull	76(%esp), %edx          # 4-byte Folded Reload
-	mulxl	112(%esp), %ecx, %eax   # 4-byte Folded Reload
-	mulxl	108(%esp), %edi, %esi   # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	mulxl	104(%esp), %esi, %ecx   # 4-byte Folded Reload
-	adcl	%eax, %esi
-	movl	%esi, %edi
-	mulxl	100(%esp), %esi, %eax   # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	mulxl	96(%esp), %ecx, %esi    # 4-byte Folded Reload
-	adcl	%eax, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	mulxl	92(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	%esi, %ecx
-	movl	%ecx, %esi
-	mulxl	88(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %ebp
-	adcl	$0, %ecx
-	movl	%ecx, %edx
-	movl	40(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	movl	8(%esp), %ecx           # 4-byte Reload
-	addl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 32(%esp)          # 4-byte Folded Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 28(%esp)          # 4-byte Folded Spill
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 24(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%ebx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	144(%esp), %eax
-	movl	8(%eax), %edx
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	68(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	60(%esp), %ebx, %edi    # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	addl	%ecx, %edi
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mulxl	64(%esp), %ebx, %eax    # 4-byte Folded Reload
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	mulxl	80(%esp), %edx, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	%ecx, %esi
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 36(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	%ebp, %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	imull	76(%esp), %edx          # 4-byte Folded Reload
-	mulxl	112(%esp), %ecx, %eax   # 4-byte Folded Reload
-	mulxl	108(%esp), %edi, %esi   # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	mulxl	104(%esp), %esi, %ecx   # 4-byte Folded Reload
-	adcl	%eax, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	mulxl	100(%esp), %esi, %eax   # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	movl	%esi, %edi
-	mulxl	96(%esp), %ecx, %esi    # 4-byte Folded Reload
-	adcl	%eax, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	mulxl	92(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	%esi, %ecx
-	movl	%ecx, %esi
-	mulxl	88(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %ebp
-	adcl	$0, %ecx
-	movl	%ecx, %edx
-	movl	40(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	movl	8(%esp), %ecx           # 4-byte Reload
-	addl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	16(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 32(%esp)          # 4-byte Folded Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 28(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	%ebx, 20(%esp)          # 4-byte Folded Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	144(%esp), %eax
-	movl	12(%eax), %edx
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	mulxl	68(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
-	mulxl	56(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	60(%esp), %ebx, %edi    # 4-byte Folded Reload
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	addl	%ecx, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	64(%esp), %ebx, %eax    # 4-byte Folded Reload
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	mulxl	80(%esp), %edx, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	%ecx, %esi
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	16(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 24(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	(%esp), %ebx            # 4-byte Reload
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	adcl	%ebp, %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	imull	76(%esp), %edx          # 4-byte Folded Reload
-	mulxl	112(%esp), %ecx, %eax   # 4-byte Folded Reload
-	mulxl	108(%esp), %edi, %esi   # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	mulxl	104(%esp), %esi, %ecx   # 4-byte Folded Reload
-	adcl	%eax, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	mulxl	100(%esp), %esi, %eax   # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	movl	%esi, %edi
-	mulxl	96(%esp), %ecx, %esi    # 4-byte Folded Reload
-	adcl	%eax, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	mulxl	92(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	%esi, %ecx
+.Ltmp12:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp12-.L60$pb), %ebx
+	movl	1452(%esp), %eax
+	movl	-4(%eax), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	1448(%esp), %ecx
+	subl	$4, %esp
+	leal	1372(%esp), %eax
+	pushl	(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	1368(%esp), %esi
+	movl	1372(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	imull	%esi, %eax
+	movl	1416(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	1412(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	1408(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	1404(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	1400(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	1396(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	1392(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	1388(%esp), %ebp
+	movl	1384(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	1380(%esp), %edi
+	movl	1376(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	1316(%esp), %ecx
+	pushl	%eax
+	pushl	1460(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	1312(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1316(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1320(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	1324(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	1328(%esp), %esi
+	adcl	1332(%esp), %ebp
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1336(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1340(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1344(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1348(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	1352(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	1356(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1360(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	1260(%esp), %ecx
+	movzbl	%al, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	1452(%esp), %eax
+	pushl	4(%eax)
+	pushl	1452(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	addl	1256(%esp), %ecx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1260(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1264(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	1268(%esp), %esi
+	adcl	1272(%esp), %ebp
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1276(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1280(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1284(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1288(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	1292(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	1296(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1300(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1304(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %edx                  # 4-byte Reload
+	movl	%ecx, %edi
+	imull	%ecx, %edx
+	movzbl	%al, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	1460(%esp)
+	leal	1212(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	1200(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1204(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1208(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	1212(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	adcl	1216(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	1220(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1224(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1228(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1232(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	1236(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1240(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	1244(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1248(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	$0, 32(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	1148(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	addl	1144(%esp), %ecx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1148(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1152(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1156(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	1160(%esp), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	1164(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1168(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1172(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	1176(%esp), %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	1180(%esp), %edi
+	adcl	1184(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1188(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1192(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	movl	%ecx, %ebp
+	movzbl	%al, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	pushl	%edx
+	movl	1460(%esp), %eax
+	pushl	%eax
+	leal	1100(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	1088(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1092(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1096(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1100(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1104(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	1108(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1112(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1116(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	1120(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	1124(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1128(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	1132(%esp), %ebp
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	1136(%esp), %esi
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	$0, %edi
+	subl	$4, %esp
+	leal	1036(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	addl	1032(%esp), %ecx
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1036(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1040(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1044(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1048(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1052(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1056(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	1060(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1064(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1068(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	1072(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	1076(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	1080(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	980(%esp), %edi
+	movl	56(%esp), %edx                  # 4-byte Reload
 	movl	%ecx, %esi
-	mulxl	88(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %ebp
-	adcl	$0, %ecx
-	movl	%ecx, %edx
-	movl	40(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	movl	8(%esp), %ecx           # 4-byte Reload
-	addl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 32(%esp)          # 4-byte Folded Spill
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 28(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	%ebx, 20(%esp)          # 4-byte Folded Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	144(%esp), %eax
-	movl	16(%eax), %edx
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	mulxl	68(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
-	mulxl	56(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	60(%esp), %ebx, %edi    # 4-byte Folded Reload
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	addl	%ecx, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	64(%esp), %ebx, %eax    # 4-byte Folded Reload
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	mulxl	80(%esp), %edx, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	%esi, %ecx
-	movl	%ecx, %esi
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	16(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 24(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	(%esp), %ebx            # 4-byte Reload
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	adcl	%ebp, %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	imull	76(%esp), %edx          # 4-byte Folded Reload
-	mulxl	112(%esp), %ecx, %eax   # 4-byte Folded Reload
-	mulxl	108(%esp), %edi, %esi   # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	mulxl	104(%esp), %esi, %ecx   # 4-byte Folded Reload
-	adcl	%eax, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	mulxl	100(%esp), %esi, %eax   # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	movl	%esi, %edi
-	mulxl	96(%esp), %ecx, %esi    # 4-byte Folded Reload
-	adcl	%eax, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	mulxl	92(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	%esi, %ecx
-	movl	%ecx, %esi
-	mulxl	88(%esp), %edx, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %edx
-	movl	%edx, %ebp
-	adcl	$0, %ecx
-	movl	%ecx, %edx
-	movl	40(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	movl	8(%esp), %ecx           # 4-byte Reload
-	addl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 32(%esp)          # 4-byte Folded Spill
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 28(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	%ebx, 20(%esp)          # 4-byte Folded Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	144(%esp), %eax
-	movl	20(%eax), %edx
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	mulxl	84(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	68(%esp), %eax, %ebp    # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	60(%esp), %ebx, %edi    # 4-byte Folded Reload
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	addl	%ecx, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	64(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	%ebp, %ecx
+	imull	%ecx, %edx
+	movzbl	%al, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	1460(%esp)
+	pushl	%edi
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	976(%esp), %esi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	980(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	984(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	988(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	992(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	996(%esp), %ebp
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	1000(%esp), %esi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	1004(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	1008(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1012(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1016(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1020(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1024(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	$0, 12(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	924(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	addl	920(%esp), %ecx
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	924(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	928(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	932(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	936(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	adcl	940(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	944(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	948(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	952(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	956(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	960(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	964(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	968(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %edx                  # 4-byte Reload
 	movl	%ecx, %ebp
-	mulxl	80(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %ebx
-	adcl	%esi, %ecx
-	movl	%ecx, %esi
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	$0, %eax
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	16(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 24(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ebp
-	sbbl	%eax, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	imull	76(%esp), %edx          # 4-byte Folded Reload
-	mulxl	112(%esp), %ecx, %eax   # 4-byte Folded Reload
-	mulxl	108(%esp), %edi, %esi   # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	mulxl	104(%esp), %esi, %ecx   # 4-byte Folded Reload
-	adcl	%eax, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	mulxl	100(%esp), %esi, %eax   # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	movl	%esi, %edi
-	mulxl	96(%esp), %ecx, %esi    # 4-byte Folded Reload
-	adcl	%eax, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	mulxl	92(%esp), %eax, %ebx    # 4-byte Folded Reload
-	adcl	%esi, %eax
-	movl	%eax, %esi
-	mulxl	88(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	%ebx, %ecx
-	movl	%ecx, %ebx
-	adcl	$0, %eax
-	movl	%eax, %ecx
-	movl	44(%esp), %edx          # 4-byte Reload
-	andl	$1, %edx
-	movl	8(%esp), %eax           # 4-byte Reload
-	addl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	adcl	%ebp, %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	144(%esp), %edx
-	movl	24(%edx), %edx
-	mulxl	56(%esp), %ebx, %esi    # 4-byte Folded Reload
-	mulxl	60(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	addl	%ebx, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	mulxl	72(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	mulxl	68(%esp), %ebp, %edi    # 4-byte Folded Reload
-	adcl	%esi, %ebp
-	mulxl	64(%esp), %eax, %esi    # 4-byte Folded Reload
-	adcl	%edi, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %edi, %eax    # 4-byte Folded Reload
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	mulxl	80(%esp), %ebx, %edx    # 4-byte Folded Reload
-	adcl	%esi, %ebx
-	adcl	84(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %esi
-	adcl	%ecx, %eax
-	movl	%eax, %ecx
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	movl	60(%esp), %edi          # 4-byte Reload
-	addl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	adcl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 68(%esp)          # 4-byte Folded Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	imull	%edi, %edx
-	mulxl	108(%esp), %ecx, %eax   # 4-byte Folded Reload
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	mulxl	112(%esp), %ecx, %esi   # 4-byte Folded Reload
-	addl	%eax, %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	mulxl	104(%esp), %eax, %edi   # 4-byte Folded Reload
-	adcl	%esi, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	mulxl	100(%esp), %ecx, %eax   # 4-byte Folded Reload
-	adcl	%edi, %ecx
+	imull	%ecx, %edx
+	movzbl	%al, %edi
+	pushl	%edx
+	pushl	1460(%esp)
+	leal	876(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	864(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	868(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	872(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	876(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	880(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	884(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	888(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	892(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	896(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	900(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	904(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	908(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	912(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	$0, %edi
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	20(%eax)
+	pushl	1452(%esp)
+	leal	820(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	40(%esp), %edx                  # 4-byte Reload
+	addl	808(%esp), %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	812(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	816(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	820(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	824(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	828(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	832(%esp), %ebp
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	836(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	840(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	844(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	848(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	852(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	856(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %ecx                  # 4-byte Reload
 	movl	%edx, %edi
-	mulxl	96(%esp), %ebx, %ebp    # 4-byte Folded Reload
-	adcl	%eax, %ebx
-	mulxl	92(%esp), %esi, %eax    # 4-byte Folded Reload
-	adcl	%ebp, %esi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	764(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	752(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	756(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	760(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	764(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	768(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	776(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	780(%esp), %edi
+	adcl	784(%esp), %esi
+	movl	%esi, %ebp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	788(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	792(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	796(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	800(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	$0, 40(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	24(%eax)
+	movl	1452(%esp), %eax
+	pushl	%eax
+	leal	708(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	4(%esp), %edx                   # 4-byte Reload
+	addl	696(%esp), %edx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	700(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	704(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	708(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	712(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	716(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	720(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	724(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	728(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	732(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	736(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	740(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	744(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %ebp
+	movzbl	%al, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	652(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	640(%esp), %ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	644(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	648(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	652(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	656(%esp), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	660(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	664(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	668(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	672(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	676(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	680(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	684(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	688(%esp), %ebp
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	$0, %edi
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	28(%eax)
+	pushl	1452(%esp)
+	leal	596(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	24(%esp), %edx                  # 4-byte Reload
+	addl	584(%esp), %edx
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	588(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	592(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	596(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	600(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	604(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	608(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	612(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	616(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	620(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	624(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	628(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	adcl	632(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	movzbl	%al, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	540(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	528(%esp), %edi
+	adcl	532(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	540(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	544(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	556(%esp), %esi
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	560(%esp), %ebp
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	564(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	568(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	572(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	576(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	$0, 24(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	32(%eax)
+	pushl	1452(%esp)
+	leal	484(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	28(%esp), %edx                  # 4-byte Reload
+	addl	472(%esp), %edx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	476(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	480(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	488(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	492(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	496(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	500(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	504(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	508(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	520(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	428(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	416(%esp), %esi
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	420(%esp), %edi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	432(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	440(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	444(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	448(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	452(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	464(%esp), %ebp
+	adcl	$0, 28(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	36(%eax)
+	pushl	1452(%esp)
+	leal	372(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
 	movl	%edi, %edx
-	mulxl	88(%esp), %edi, %ebp    # 4-byte Folded Reload
-	adcl	%eax, %edi
-	adcl	$0, %ebp
-	andl	$1, 64(%esp)            # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	addl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	adcl	72(%esp), %ebp          # 4-byte Folded Reload
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	subl	108(%esp), %edx         # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	sbbl	112(%esp), %edx         # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	sbbl	104(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	sbbl	100(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ebx, 100(%esp)         # 4-byte Spill
-	movl	%esi, %ebx
-	sbbl	96(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 104(%esp)         # 4-byte Spill
-	movl	%edi, %ebx
-	sbbl	92(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 108(%esp)         # 4-byte Spill
-	movl	%ebp, %ebx
-	sbbl	88(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 112(%esp)         # 4-byte Spill
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	%eax, %ecx
-	jne	.LBB98_2
-# BB#1:
-	movl	60(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-.LBB98_2:
-	movl	136(%esp), %ebx
-	movl	80(%esp), %edx          # 4-byte Reload
-	movl	%edx, (%ebx)
-	movl	%ebx, %edx
-	testb	%cl, %cl
-	movl	84(%esp), %ebx          # 4-byte Reload
-	jne	.LBB98_4
-# BB#3:
-	movl	64(%esp), %ebx          # 4-byte Reload
-.LBB98_4:
-	movl	%ebx, 4(%edx)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	jne	.LBB98_6
-# BB#5:
-	movl	72(%esp), %ecx          # 4-byte Reload
-.LBB98_6:
-	movl	%ecx, 8(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	jne	.LBB98_8
-# BB#7:
-	movl	100(%esp), %eax         # 4-byte Reload
-.LBB98_8:
-	movl	%eax, 12(%edx)
-	jne	.LBB98_10
-# BB#9:
-	movl	104(%esp), %esi         # 4-byte Reload
-.LBB98_10:
-	movl	%esi, 16(%edx)
-	jne	.LBB98_12
-# BB#11:
-	movl	108(%esp), %edi         # 4-byte Reload
-.LBB98_12:
-	movl	%edi, 20(%edx)
-	jne	.LBB98_14
-# BB#13:
-	movl	112(%esp), %ebp         # 4-byte Reload
-.LBB98_14:
-	movl	%ebp, 24(%edx)
-	addl	$116, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end98:
-	.size	mcl_fp_mont7Lbmi2, .Lfunc_end98-mcl_fp_mont7Lbmi2
-
-	.globl	mcl_fp_montNF7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF7Lbmi2,@function
-mcl_fp_montNF7Lbmi2:                    # @mcl_fp_montNF7Lbmi2
-# BB#0:
-	pushl	%ebp
+	addl	360(%esp), %edx
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	364(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	368(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	372(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	376(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	380(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	384(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	388(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	392(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	396(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	400(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	404(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	408(%esp), %ebp
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	316(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	304(%esp), %esi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	312(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	316(%esp), %esi
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	320(%esp), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	324(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	328(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	332(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	336(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	340(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	352(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	$0, %ebp
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	40(%eax)
+	movl	1452(%esp), %eax
+	pushl	%eax
+	leal	260(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	(%esp), %ecx                    # 4-byte Reload
+	addl	248(%esp), %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	252(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	256(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	260(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	268(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	272(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	276(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	280(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	284(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	296(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	setb	(%esp)                          # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	56(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %esi
+	pushl	%eax
+	pushl	1460(%esp)
+	leal	204(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	192(%esp), %esi
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	196(%esp), %ebp
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	204(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	212(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	216(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	220(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	224(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	228(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movzbl	(%esp), %eax                    # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	subl	$4, %esp
+	leal	140(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	44(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	%ebp, %edx
+	addl	136(%esp), %edx
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	140(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	144(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	148(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	152(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	156(%esp), %ebp
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	160(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	164(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	168(%esp), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	176(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	180(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	184(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	leal	84(%esp), %eax
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	1460(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	80(%esp), %esi
+	movzbl	44(%esp), %eax                  # 1-byte Folded Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	84(%esp), %eax
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	88(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	92(%esp), %esi
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	96(%esp), %edx
+	adcl	100(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	adcl	104(%esp), %edi
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	108(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	112(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	adcl	116(%esp), %ebx
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	120(%esp), %ebp
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	124(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	128(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	$0, 44(%esp)                    # 4-byte Folded Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	1452(%esp), %ecx
+	subl	(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	sbbl	4(%ecx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	sbbl	8(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	sbbl	12(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	16(%ecx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	sbbl	20(%ecx), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	28(%ecx), %eax
+	movl	%ebx, %edi
+	movl	(%esp), %ebx                    # 4-byte Reload
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	sbbl	32(%ecx), %edi
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	sbbl	36(%ecx), %ebp
+	movl	%ecx, %edx
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	sbbl	40(%edx), %ecx
+	movl	%ebx, %esi
+	sbbl	44(%edx), %esi
+	movl	44(%esp), %edx                  # 4-byte Reload
+	sbbl	$0, %edx
+	testb	$1, %dl
+	jne	.LBB60_1
+# %bb.2:
+	movl	1440(%esp), %ebx
+	movl	%esi, 44(%ebx)
+	jne	.LBB60_3
+.LBB60_4:
+	movl	%ecx, 40(%ebx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	jne	.LBB60_5
+.LBB60_6:
+	movl	%ebp, 36(%ebx)
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB60_7
+.LBB60_8:
+	movl	%edi, 32(%ebx)
+	jne	.LBB60_9
+.LBB60_10:
+	movl	%eax, 28(%ebx)
+	movl	64(%esp), %edi                  # 4-byte Reload
+	movl	56(%esp), %eax                  # 4-byte Reload
+	jne	.LBB60_11
+.LBB60_12:
+	movl	%eax, 24(%ebx)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	60(%esp), %edx                  # 4-byte Reload
+	jne	.LBB60_13
+.LBB60_14:
+	movl	%edx, 20(%ebx)
+	movl	72(%esp), %edx                  # 4-byte Reload
+	jne	.LBB60_15
+.LBB60_16:
+	movl	%edi, 16(%ebx)
+	jne	.LBB60_17
+.LBB60_18:
+	movl	%esi, 12(%ebx)
+	jne	.LBB60_19
+.LBB60_20:
+	movl	%edx, 8(%ebx)
+	jne	.LBB60_21
+.LBB60_22:
+	movl	%ecx, 4(%ebx)
+	je	.LBB60_24
+.LBB60_23:
+	movl	20(%esp), %eax                  # 4-byte Reload
+.LBB60_24:
+	movl	%eax, (%ebx)
+	addl	$1420, %esp                     # imm = 0x58C
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.LBB60_1:
+	movl	%ebx, %esi
+	movl	1440(%esp), %ebx
+	movl	%esi, 44(%ebx)
+	je	.LBB60_4
+.LBB60_3:
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 40(%ebx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	je	.LBB60_6
+.LBB60_5:
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 36(%ebx)
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	je	.LBB60_8
+.LBB60_7:
+	movl	24(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 32(%ebx)
+	je	.LBB60_10
+.LBB60_9:
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 28(%ebx)
+	movl	64(%esp), %edi                  # 4-byte Reload
+	movl	56(%esp), %eax                  # 4-byte Reload
+	je	.LBB60_12
+.LBB60_11:
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%ebx)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	60(%esp), %edx                  # 4-byte Reload
+	je	.LBB60_14
+.LBB60_13:
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 20(%ebx)
+	movl	72(%esp), %edx                  # 4-byte Reload
+	je	.LBB60_16
+.LBB60_15:
+	movl	12(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 16(%ebx)
+	je	.LBB60_18
+.LBB60_17:
+	movl	16(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 12(%ebx)
+	je	.LBB60_20
+.LBB60_19:
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%ebx)
+	je	.LBB60_22
+.LBB60_21:
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%ebx)
+	jne	.LBB60_23
+	jmp	.LBB60_24
+.Lfunc_end60:
+	.size	mcl_fp_mont12Lbmi2, .Lfunc_end60-mcl_fp_mont12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montNF12Lbmi2            # -- Begin function mcl_fp_montNF12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF12Lbmi2,@function
+mcl_fp_montNF12Lbmi2:                   # @mcl_fp_montNF12Lbmi2
+# %bb.0:
+	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$104, %esp
-	movl	128(%esp), %eax
-	movl	(%eax), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	4(%eax), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	132(%esp), %ecx
-	movl	(%ecx), %ebp
-	mulxl	%ebp, %ecx, %esi
-	movl	%edi, %edx
-	mulxl	%ebp, %edi, %edx
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	addl	%ecx, %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	8(%eax), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	mulxl	%ebp, %ecx, %edi
-	adcl	%esi, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	12(%eax), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	mulxl	%ebp, %ecx, %ebx
-	adcl	%edi, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	16(%eax), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	mulxl	%ebp, %edx, %ecx
-	adcl	%ebx, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	20(%eax), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	mulxl	%ebp, %edx, %esi
-	adcl	%ecx, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	24(%eax), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	mulxl	%ebp, %ebp, %eax
-	adcl	%esi, %ebp
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	136(%esp), %edi
-	movl	-4(%edi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	movl	%esi, %edx
-	imull	%eax, %edx
-	movl	(%edi), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	mulxl	%ecx, %ecx, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	addl	%esi, %ecx
-	movl	4(%edi), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	mulxl	%ecx, %eax, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	8(%edi), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	mulxl	%ecx, %eax, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	12(%edi), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	mulxl	%ecx, %esi, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	84(%esp), %esi          # 4-byte Folded Reload
-	movl	16(%edi), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	mulxl	%eax, %eax, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ecx
-	movl	20(%edi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	mulxl	%eax, %eax, %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ebx
-	movl	24(%edi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	mulxl	%eax, %edx, %eax
-	adcl	%ebp, %edx
-	movl	%edx, %edi
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	movl	28(%esp), %ebp          # 4-byte Reload
-	addl	%ebp, 36(%esp)          # 4-byte Folded Spill
-	movl	24(%esp), %ebp          # 4-byte Reload
-	adcl	%ebp, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	%eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	4(%eax), %edx
-	mulxl	64(%esp), %ecx, %esi    # 4-byte Folded Reload
-	mulxl	68(%esp), %edi, %eax    # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	addl	%ecx, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	mulxl	60(%esp), %eax, %edi    # 4-byte Folded Reload
-	adcl	%esi, %eax
-	movl	%eax, %ecx
-	mulxl	56(%esp), %esi, %ebx    # 4-byte Folded Reload
-	adcl	%edi, %esi
-	mulxl	52(%esp), %eax, %ebp    # 4-byte Folded Reload
-	adcl	%ebx, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	mulxl	48(%esp), %eax, %edi    # 4-byte Folded Reload
-	adcl	%ebp, %eax
-	movl	%eax, %ebx
-	mulxl	44(%esp), %ebp, %eax    # 4-byte Folded Reload
-	adcl	%edi, %ebp
+	subl	$1420, %esp                     # imm = 0x58C
+	calll	.L61$pb
+.L61$pb:
+	popl	%ebx
+.Ltmp13:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp13-.L61$pb), %ebx
+	movl	1452(%esp), %eax
+	movl	-4(%eax), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	1448(%esp), %ecx
+	subl	$4, %esp
+	leal	1372(%esp), %eax
+	pushl	(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	1368(%esp), %esi
+	movl	1372(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	imull	%esi, %eax
+	movl	1416(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	1412(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	1408(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	1404(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	1400(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	1396(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	1392(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	1388(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	1384(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	1380(%esp), %ebp
+	movl	1376(%esp), %edi
+	subl	$4, %esp
+	leal	1316(%esp), %ecx
+	pushl	%eax
+	pushl	1460(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	1312(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1316(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	1320(%esp), %edi
+	adcl	1324(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1328(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1332(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1336(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1340(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1344(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1348(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	1352(%esp), %esi
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	1356(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1360(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	1260(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	4(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	1304(%esp), %eax
+	movl	36(%esp), %edx                  # 4-byte Reload
+	addl	1256(%esp), %edx
+	adcl	1260(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1264(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1268(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1272(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1276(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1280(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1284(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1288(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	1292(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	1296(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	1300(%esp), %ebp
 	adcl	$0, %eax
-	movl	%eax, %edx
-	movl	8(%esp), %eax           # 4-byte Reload
-	addl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	4(%esp), %edi           # 4-byte Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	16(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 28(%esp)          # 4-byte Folded Spill
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	$0, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	%eax, %ebx
-	imull	72(%esp), %edx          # 4-byte Folded Reload
-	mulxl	100(%esp), %eax, %ecx   # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	addl	%ebx, %eax
-	mulxl	96(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	%edi, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mulxl	92(%esp), %edi, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	mulxl	88(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	%esi, %eax
-	movl	%eax, %esi
-	mulxl	84(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ecx
-	mulxl	80(%esp), %eax, %ebx    # 4-byte Folded Reload
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	mulxl	76(%esp), %eax, %edx    # 4-byte Folded Reload
-	adcl	%ebp, %eax
-	movl	40(%esp), %ebx          # 4-byte Reload
-	adcl	$0, %ebx
-	movl	36(%esp), %ebp          # 4-byte Reload
-	addl	%ebp, 32(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 28(%esp)          # 4-byte Folded Spill
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	%edx, %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	8(%eax), %edx
-	mulxl	64(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	68(%esp), %esi, %edi    # 4-byte Folded Reload
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	addl	%ecx, %edi
-	mulxl	60(%esp), %ecx, %ebx    # 4-byte Folded Reload
-	adcl	%eax, %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	mulxl	56(%esp), %esi, %eax    # 4-byte Folded Reload
-	adcl	%ebx, %esi
-	mulxl	52(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %ebx
-	mulxl	48(%esp), %eax, %ebp    # 4-byte Folded Reload
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	%ecx, %eax
-	movl	%eax, %ebp
-	mulxl	44(%esp), %edx, %eax    # 4-byte Folded Reload
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	adcl	$0, %eax
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	8(%esp), %ecx           # 4-byte Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, %edx
-	imull	72(%esp), %edx          # 4-byte Folded Reload
-	mulxl	100(%esp), %eax, %ebp   # 4-byte Folded Reload
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	addl	%ebx, %eax
-	mulxl	96(%esp), %ebp, %ebx    # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	adcl	%edi, %ebp
-	mulxl	92(%esp), %ebx, %edi    # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebx
-	mulxl	88(%esp), %edi, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	%esi, %edi
-	mulxl	84(%esp), %esi, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	mulxl	80(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, (%esp)            # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ecx
-	mulxl	76(%esp), %edx, %eax    # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %eax
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	12(%eax), %edx
-	mulxl	64(%esp), %esi, %eax    # 4-byte Folded Reload
-	mulxl	68(%esp), %edi, %ecx    # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	addl	%esi, %ecx
-	mulxl	60(%esp), %esi, %edi    # 4-byte Folded Reload
-	adcl	%eax, %esi
-	mulxl	56(%esp), %eax, %ebx    # 4-byte Folded Reload
-	adcl	%edi, %eax
 	movl	%eax, %edi
-	mulxl	52(%esp), %eax, %ebp    # 4-byte Folded Reload
-	adcl	%ebx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	48(%esp), %eax, %ebx    # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	adcl	%ebp, %eax
-	movl	%eax, %ebx
-	mulxl	44(%esp), %ebp, %eax    # 4-byte Folded Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	$0, %eax
-	movl	32(%esp), %edx          # 4-byte Reload
-	addl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	12(%esp), %edx          # 4-byte Reload
-	adcl	%edx, 24(%esp)          # 4-byte Folded Spill
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
+	subl	$4, %esp
+	leal	1204(%esp), %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	1460(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	1200(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1204(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1208(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1212(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1216(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1220(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1224(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1228(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1232(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1236(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1240(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	1244(%esp), %ebp
+	adcl	1248(%esp), %edi
+	subl	$4, %esp
+	leal	1148(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	1192(%esp), %eax
+	movl	36(%esp), %edx                  # 4-byte Reload
+	addl	1144(%esp), %edx
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1148(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1152(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1156(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1160(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1164(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1168(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1172(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	1176(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1180(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	1184(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	adcl	1188(%esp), %edi
+	movl	%edi, %esi
+	movl	%eax, %ebp
+	adcl	$0, %ebp
+	subl	$4, %esp
+	leal	1092(%esp), %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	movl	1460(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	1088(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1092(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1096(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1100(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1104(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1108(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1112(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1116(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1120(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	1124(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1128(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	1132(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	1136(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	1036(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	1080(%esp), %eax
+	movl	44(%esp), %edx                  # 4-byte Reload
+	addl	1032(%esp), %edx
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1036(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1040(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1044(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1048(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1052(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	1056(%esp), %esi
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	1060(%esp), %ebp
+	adcl	1064(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1068(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1072(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	1076(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, %edx
-	imull	72(%esp), %edx          # 4-byte Folded Reload
-	mulxl	100(%esp), %eax, %ecx   # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	addl	%edi, %eax
-	mulxl	96(%esp), %edi, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	mulxl	92(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	%esi, %eax
-	movl	%eax, %esi
-	mulxl	88(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ecx
-	mulxl	80(%esp), %eax, %ebx    # 4-byte Folded Reload
-	movl	%ebx, (%esp)            # 4-byte Spill
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	76(%esp), %eax, %edx    # 4-byte Folded Reload
-	adcl	%ebp, %eax
-	movl	36(%esp), %ebx          # 4-byte Reload
-	adcl	$0, %ebx
-	addl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 28(%esp)          # 4-byte Folded Spill
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	%ecx, 24(%esp)          # 4-byte Folded Spill
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	%edx, %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	16(%eax), %edx
-	mulxl	64(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	68(%esp), %esi, %edi    # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	addl	%ecx, %edi
-	mulxl	60(%esp), %ecx, %ebx    # 4-byte Folded Reload
-	adcl	%eax, %ecx
-	mulxl	56(%esp), %eax, %esi    # 4-byte Folded Reload
-	adcl	%ebx, %eax
-	movl	%eax, %ebx
-	mulxl	52(%esp), %eax, %ebp    # 4-byte Folded Reload
-	adcl	%esi, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	mulxl	48(%esp), %eax, %esi    # 4-byte Folded Reload
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	adcl	%ebp, %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	980(%esp), %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	1460(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	976(%esp), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	980(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	984(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	988(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	992(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	996(%esp), %edi
+	adcl	1000(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	1004(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	1008(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1012(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1016(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	1020(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1024(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	924(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	968(%esp), %ecx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	addl	920(%esp), %eax
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	924(%esp), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	928(%esp), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	932(%esp), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	936(%esp), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	940(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	944(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	948(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	952(%esp), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	956(%esp), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	adcl	960(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	964(%esp), %edi
+	movl	%ecx, %ebp
+	adcl	$0, %ebp
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
 	movl	%eax, %esi
-	mulxl	44(%esp), %ebp, %eax    # 4-byte Folded Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
+	imull	%eax, %ecx
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	876(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	864(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	868(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	adcl	872(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	876(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	880(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	884(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	888(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	892(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	896(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	900(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	904(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	908(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	adcl	912(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	20(%eax)
+	movl	1452(%esp), %eax
+	pushl	%eax
+	leal	820(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	856(%esp), %eax
+	movl	24(%esp), %edx                  # 4-byte Reload
+	addl	808(%esp), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	812(%esp), %esi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	816(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	820(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	824(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	828(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	832(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	836(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	840(%esp), %edi
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	844(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	848(%esp), %ebp
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	852(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, %edx
-	movl	32(%esp), %eax          # 4-byte Reload
-	addl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, %edx
-	imull	72(%esp), %edx          # 4-byte Folded Reload
-	mulxl	100(%esp), %eax, %ecx   # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	addl	%ebx, %eax
-	mulxl	96(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	%edi, %ebx
-	mulxl	92(%esp), %edi, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	mulxl	88(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ecx
-	mulxl	84(%esp), %eax, %esi    # 4-byte Folded Reload
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	mulxl	80(%esp), %eax, %esi    # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mulxl	76(%esp), %eax, %edx    # 4-byte Folded Reload
-	adcl	%ebp, %eax
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	addl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %ecx           # 4-byte Reload
-	adcl	%ecx, 28(%esp)          # 4-byte Folded Spill
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	%ecx, 24(%esp)          # 4-byte Folded Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	%edx, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	20(%eax), %edx
-	mulxl	64(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	68(%esp), %edi, %esi    # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	mulxl	60(%esp), %ebp, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %ebp
-	mulxl	56(%esp), %eax, %edi    # 4-byte Folded Reload
-	adcl	%ecx, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	mulxl	52(%esp), %ecx, %ebx    # 4-byte Folded Reload
-	adcl	%edi, %ecx
-	mulxl	48(%esp), %eax, %edi    # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	adcl	%ebx, %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	764(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	addl	752(%esp), %eax
+	adcl	756(%esp), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	760(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	764(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	768(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	776(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	780(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	784(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	788(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	792(%esp), %ebp
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	796(%esp), %edi
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	800(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	24(%eax)
+	pushl	1452(%esp)
+	leal	708(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	744(%esp), %ecx
+	movl	52(%esp), %eax                  # 4-byte Reload
+	addl	696(%esp), %eax
+	adcl	700(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	704(%esp), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	708(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	712(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	716(%esp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	720(%esp), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	724(%esp), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	728(%esp), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	adcl	732(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	adcl	736(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	740(%esp), %esi
+	adcl	$0, %ecx
+	movl	%ecx, %ebp
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
 	movl	%eax, %edi
-	mulxl	44(%esp), %ebx, %eax    # 4-byte Folded Reload
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
+	imull	%eax, %ecx
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	652(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	640(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	644(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	648(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	652(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	656(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	660(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	664(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	668(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	672(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	676(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	680(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	684(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	adcl	688(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	28(%eax)
+	pushl	1452(%esp)
+	leal	596(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	632(%esp), %eax
+	movl	12(%esp), %edx                  # 4-byte Reload
+	addl	584(%esp), %edx
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	588(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	592(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	596(%esp), %esi
+	adcl	600(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	604(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	608(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	612(%esp), %ebp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	616(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	620(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	624(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	628(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, %edx
-	movl	32(%esp), %eax          # 4-byte Reload
-	addl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, (%esp)            # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %edx
-	imull	72(%esp), %edx          # 4-byte Folded Reload
-	mulxl	100(%esp), %eax, %esi   # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	addl	%ecx, %eax
-	mulxl	96(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mulxl	92(%esp), %esi, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	%ebp, %esi
-	mulxl	88(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ecx
-	mulxl	84(%esp), %eax, %ebp    # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	mulxl	80(%esp), %ebp, %edi    # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %ebp            # 4-byte Folded Reload
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	mulxl	76(%esp), %edi, %edx    # 4-byte Folded Reload
-	adcl	%ebx, %edi
-	movl	%edi, %ebx
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	28(%esp), %ebp          # 4-byte Reload
-	addl	%ebp, 32(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	adcl	%edx, %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	24(%eax), %edx
-	mulxl	64(%esp), %edi, %ebx    # 4-byte Folded Reload
-	mulxl	68(%esp), %eax, %ebp    # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	addl	%edi, %ebp
-	mulxl	60(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	%ebx, %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	mulxl	56(%esp), %ebx, %ecx    # 4-byte Folded Reload
-	adcl	%eax, %ebx
-	mulxl	52(%esp), %esi, %edi    # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	mulxl	48(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	%edi, %eax
-	movl	%eax, %ecx
-	mulxl	44(%esp), %edx, %eax    # 4-byte Folded Reload
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %eax
-	movl	64(%esp), %edi          # 4-byte Reload
-	addl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	540(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	528(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	532(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	540(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	544(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	548(%esp), %edi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	556(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	560(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	564(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	568(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	572(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	576(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	32(%eax)
+	pushl	1452(%esp)
+	leal	484(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	520(%esp), %ecx
+	movl	8(%esp), %edx                   # 4-byte Reload
+	addl	472(%esp), %edx
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	476(%esp), %ebp
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	480(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	488(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	492(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	496(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	500(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	504(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	508(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	$0, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	428(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	416(%esp), %edi
+	adcl	420(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	432(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	440(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	444(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	448(%esp), %esi
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	adcl	452(%esp), %ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	464(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	36(%eax)
+	pushl	1452(%esp)
+	leal	372(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	408(%esp), %eax
+	movl	16(%esp), %edx                  # 4-byte Reload
+	addl	360(%esp), %edx
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	364(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	368(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	372(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	376(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	380(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	384(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	388(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	392(%esp), %ebp
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	396(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	400(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	404(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	movl	64(%esp), %eax          # 4-byte Reload
-	imull	%eax, %edx
-	mulxl	100(%esp), %esi, %ecx   # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	addl	%eax, %esi
-	mulxl	96(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	%ebp, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%edx, %ecx
-	mulxl	92(%esp), %eax, %edx    # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	%edi, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	mulxl	88(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	%ebx, %ebp
-	movl	%ecx, %edx
-	mulxl	84(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%ecx, %edx
-	mulxl	80(%esp), %edi, %eax    # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%ecx, %edx
-	mulxl	76(%esp), %ebx, %eax    # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	316(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	304(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	316(%esp), %esi
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	320(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	324(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	328(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	332(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	340(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	252(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	40(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	296(%esp), %ecx
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	248(%esp), %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	252(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	256(%esp), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	adcl	260(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	268(%esp), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	272(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	276(%esp), %ebp
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	280(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	284(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
 	adcl	$0, %ecx
-	movl	36(%esp), %edx          # 4-byte Reload
-	addl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	%edx, %ecx
-	subl	100(%esp), %ecx         # 4-byte Folded Reload
-	sbbl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	sbbl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	sbbl	88(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	sbbl	84(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	sbbl	80(%esp), %ebx          # 4-byte Folded Reload
-	movl	68(%esp), %edi          # 4-byte Reload
-	sbbl	76(%esp), %edi          # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	196(%esp), %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	1460(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	192(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	196(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	204(%esp), %esi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	212(%esp), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	216(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	220(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	224(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	140(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	44(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	movl	28(%esp), %edx                  # 4-byte Reload
+	addl	136(%esp), %edx
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	140(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	144(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	148(%esp), %esi
+	adcl	152(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	160(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	164(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	168(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	176(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	180(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	184(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	84(%esp), %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	pushl	1460(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	80(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	84(%esp), %eax
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	88(%esp), %ecx
+	adcl	92(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	96(%esp), %esi
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	100(%esp), %edi
+	movl	56(%esp), %ebx                  # 4-byte Reload
+	adcl	104(%esp), %ebx
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	108(%esp), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	112(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	116(%esp), %ebp
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	120(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	124(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	128(%esp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%eax, %edx
+	movl	1452(%esp), %eax
+	subl	(%eax), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	sbbl	4(%eax), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	sbbl	8(%eax), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	movl	%esi, %ecx
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	sbbl	12(%eax), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	%edi, %ecx
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	sbbl	16(%eax), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	%ebx, %ecx
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	sbbl	20(%eax), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	sbbl	24(%eax), %ebx
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	sbbl	28(%eax), %ecx
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	%ebp, %edx
+	sbbl	32(%eax), %edx
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	sbbl	36(%eax), %ebp
+	movl	20(%esp), %esi                  # 4-byte Reload
+	sbbl	40(%eax), %esi
+	movl	28(%esp), %edi                  # 4-byte Reload
+	sbbl	44(%eax), %edi
 	movl	%edi, %eax
 	sarl	$31, %eax
 	testl	%eax, %eax
-	js	.LBB99_2
-# BB#1:
-	movl	%ecx, %edx
-.LBB99_2:
-	movl	124(%esp), %esi
-	movl	%edx, (%esi)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB99_4
-# BB#3:
-	movl	52(%esp), %eax          # 4-byte Reload
-.LBB99_4:
-	movl	%eax, 4(%esi)
-	movl	68(%esp), %eax          # 4-byte Reload
-	movl	64(%esp), %ecx          # 4-byte Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	js	.LBB99_6
-# BB#5:
-	movl	92(%esp), %ebp          # 4-byte Reload
-.LBB99_6:
-	movl	%ebp, 8(%esi)
-	movl	%esi, %ebp
-	movl	56(%esp), %esi          # 4-byte Reload
-	js	.LBB99_8
-# BB#7:
-	movl	96(%esp), %esi          # 4-byte Reload
-.LBB99_8:
-	movl	%esi, 12(%ebp)
-	js	.LBB99_10
-# BB#9:
-	movl	100(%esp), %edx         # 4-byte Reload
-.LBB99_10:
-	movl	%edx, 16(%ebp)
-	js	.LBB99_12
-# BB#11:
-	movl	%ebx, %ecx
-.LBB99_12:
-	movl	%ecx, 20(%ebp)
-	js	.LBB99_14
-# BB#13:
-	movl	%edi, %eax
-.LBB99_14:
-	movl	%eax, 24(%ebp)
-	addl	$104, %esp
+	js	.LBB61_1
+# %bb.2:
+	movl	1440(%esp), %eax
+	movl	%edi, 44(%eax)
+	js	.LBB61_3
+.LBB61_4:
+	movl	%esi, 40(%eax)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	js	.LBB61_5
+.LBB61_6:
+	movl	%ebp, 36(%eax)
+	movl	76(%esp), %esi                  # 4-byte Reload
+	js	.LBB61_7
+.LBB61_8:
+	movl	%edx, 32(%eax)
+	js	.LBB61_9
+.LBB61_10:
+	movl	%ecx, 28(%eax)
+	movl	60(%esp), %edx                  # 4-byte Reload
+	js	.LBB61_11
+.LBB61_12:
+	movl	%ebx, 24(%eax)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	js	.LBB61_13
+.LBB61_14:
+	movl	%ebx, 20(%eax)
+	movl	68(%esp), %ebx                  # 4-byte Reload
+	js	.LBB61_15
+.LBB61_16:
+	movl	%ebx, 16(%eax)
+	js	.LBB61_17
+.LBB61_18:
+	movl	%edi, 12(%eax)
+	js	.LBB61_19
+.LBB61_20:
+	movl	%esi, 8(%eax)
+	js	.LBB61_21
+.LBB61_22:
+	movl	%edx, 4(%eax)
+	jns	.LBB61_24
+.LBB61_23:
+	movl	40(%esp), %ecx                  # 4-byte Reload
+.LBB61_24:
+	movl	%ecx, (%eax)
+	addl	$1420, %esp                     # imm = 0x58C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end99:
-	.size	mcl_fp_montNF7Lbmi2, .Lfunc_end99-mcl_fp_montNF7Lbmi2
-
-	.globl	mcl_fp_montRed7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed7Lbmi2,@function
-mcl_fp_montRed7Lbmi2:                   # @mcl_fp_montRed7Lbmi2
-# BB#0:
+.LBB61_1:
+	movl	28(%esp), %edi                  # 4-byte Reload
+	movl	1440(%esp), %eax
+	movl	%edi, 44(%eax)
+	jns	.LBB61_4
+.LBB61_3:
+	movl	20(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 40(%eax)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	jns	.LBB61_6
+.LBB61_5:
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 36(%eax)
+	movl	76(%esp), %esi                  # 4-byte Reload
+	jns	.LBB61_8
+.LBB61_7:
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 32(%eax)
+	jns	.LBB61_10
+.LBB61_9:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	60(%esp), %edx                  # 4-byte Reload
+	jns	.LBB61_12
+.LBB61_11:
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 24(%eax)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	jns	.LBB61_14
+.LBB61_13:
+	movl	56(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 20(%eax)
+	movl	68(%esp), %ebx                  # 4-byte Reload
+	jns	.LBB61_16
+.LBB61_15:
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 16(%eax)
+	jns	.LBB61_18
+.LBB61_17:
+	movl	44(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%eax)
+	jns	.LBB61_20
+.LBB61_19:
+	movl	48(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%eax)
+	jns	.LBB61_22
+.LBB61_21:
+	movl	36(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	js	.LBB61_23
+	jmp	.LBB61_24
+.Lfunc_end61:
+	.size	mcl_fp_montNF12Lbmi2, .Lfunc_end61-mcl_fp_montNF12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRed12Lbmi2           # -- Begin function mcl_fp_montRed12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed12Lbmi2,@function
+mcl_fp_montRed12Lbmi2:                  # @mcl_fp_montRed12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$108, %esp
-	movl	136(%esp), %edi
-	movl	-4(%edi), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	(%edi), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	(%eax), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	imull	%ecx, %edx
-	movl	24(%edi), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ebx, %ecx
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	20(%edi), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	mulxl	%ecx, %ebx, %ecx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	16(%edi), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	mulxl	%ecx, %ebx, %ecx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	4(%edi), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	mulxl	%ecx, %ecx, %ebp
-	mulxl	%esi, %ebx, %esi
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	8(%edi), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	mulxl	%ecx, %esi, %ecx
-	adcl	%ebp, %esi
-	movl	%esi, %ebp
-	movl	12(%edi), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	mulxl	%esi, %esi, %edx
-	adcl	%ecx, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	addl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	28(%esp), %ebx          # 4-byte Reload
-	adcl	4(%eax), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	adcl	8(%eax), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	12(%eax), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	16(%eax), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	20(%eax), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	24(%eax), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	28(%eax), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%eax), %ecx
-	movl	48(%eax), %edx
-	movl	44(%eax), %esi
-	movl	40(%eax), %edi
-	movl	36(%eax), %ebp
-	movl	32(%eax), %eax
-	adcl	$0, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	imull	76(%esp), %edx          # 4-byte Folded Reload
-	mulxl	100(%esp), %eax, %ebx   # 4-byte Folded Reload
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	mulxl	88(%esp), %ebp, %eax    # 4-byte Folded Reload
-	movl	%eax, (%esp)            # 4-byte Spill
-	mulxl	96(%esp), %ecx, %eax    # 4-byte Folded Reload
-	mulxl	92(%esp), %edi, %esi    # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	addl	%ecx, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	%ebp, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %esi, %ebp    # 4-byte Folded Reload
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	mulxl	104(%esp), %ecx, %edi   # 4-byte Folded Reload
-	adcl	%ebx, %ecx
-	mulxl	80(%esp), %ebx, %edx    # 4-byte Folded Reload
-	adcl	%edi, %ebx
-	adcl	$0, %edx
-	movl	8(%esp), %eax           # 4-byte Reload
-	addl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	adcl	$0, 60(%esp)            # 4-byte Folded Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	adcl	$0, 72(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%edi, %edx
-	imull	76(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %ebx, %ebp    # 4-byte Folded Reload
-	mulxl	96(%esp), %eax, %esi    # 4-byte Folded Reload
-	mulxl	92(%esp), %ecx, %edi    # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	addl	%eax, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	mulxl	88(%esp), %edi, %eax    # 4-byte Folded Reload
-	adcl	%esi, %edi
-	adcl	%ebx, %eax
-	movl	%eax, %ebx
-	mulxl	100(%esp), %esi, %eax   # 4-byte Folded Reload
-	adcl	%ebp, %esi
-	mulxl	104(%esp), %edx, %ecx   # 4-byte Folded Reload
-	adcl	%eax, %edx
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	12(%esp), %ebp          # 4-byte Reload
-	addl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	8(%esp), %ebp           # 4-byte Reload
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	adcl	$0, 60(%esp)            # 4-byte Folded Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	adcl	$0, 72(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %edx
-	imull	76(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %edi, %eax    # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	mulxl	96(%esp), %eax, %ebx    # 4-byte Folded Reload
-	mulxl	92(%esp), %esi, %ecx    # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	addl	%eax, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	mulxl	88(%esp), %esi, %eax    # 4-byte Folded Reload
-	adcl	%ebx, %esi
-	movl	%esi, %ebx
-	adcl	%edi, %eax
-	movl	%eax, %edi
-	mulxl	100(%esp), %esi, %ecx   # 4-byte Folded Reload
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	mulxl	104(%esp), %edx, %eax   # 4-byte Folded Reload
-	adcl	%ecx, %edx
-	movl	%edx, %ecx
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %edx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	%ebp, 20(%esp)          # 4-byte Folded Spill
-	movl	16(%esp), %ebp          # 4-byte Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, 60(%esp)            # 4-byte Folded Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	adcl	$0, 72(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %edx
-	movl	%ebp, %edi
-	imull	76(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %eax, %ebx    # 4-byte Folded Reload
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	mulxl	96(%esp), %eax, %ecx    # 4-byte Folded Reload
-	mulxl	92(%esp), %esi, %ebp    # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	addl	%eax, %ebp
-	mulxl	88(%esp), %esi, %eax    # 4-byte Folded Reload
-	adcl	%ecx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %ecx
-	mulxl	100(%esp), %esi, %eax   # 4-byte Folded Reload
-	adcl	%ebx, %esi
-	mulxl	104(%esp), %ebx, %edx   # 4-byte Folded Reload
-	adcl	%eax, %ebx
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	56(%esp), %eax          # 4-byte Reload
+	subl	$780, %esp                      # imm = 0x30C
+	calll	.L62$pb
+.L62$pb:
+	popl	%ebx
+.Ltmp14:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp14-.L62$pb), %ebx
+	movl	808(%esp), %ecx
+	movl	44(%ecx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	36(%ecx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	32(%ecx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	28(%ecx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	24(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
+	movl	%ecx, %edx
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	movl	44(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	28(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	16(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	12(%eax), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	8(%eax), %esi
+	movl	(%eax), %ebp
+	movl	4(%eax), %edi
+	movl	-4(%edx), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	%ebp, %eax
+	imull	%ecx, %eax
+	leal	732(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	728(%esp), %ebp
+	adcl	732(%esp), %edi
+	adcl	736(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	740(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	744(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	748(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	752(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	756(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	760(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	764(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	768(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	movl	48(%eax), %eax
+	adcl	776(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edi, %eax
+	leal	676(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	720(%esp), %eax
 	adcl	$0, %eax
-	addl	%edi, 20(%esp)          # 4-byte Folded Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 44(%esp)          # 4-byte Folded Spill
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	adcl	$0, 72(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %edx
-	imull	76(%esp), %edx          # 4-byte Folded Reload
-	mulxl	80(%esp), %ecx, %eax    # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	mulxl	84(%esp), %ebx, %eax    # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mulxl	96(%esp), %ecx, %edi    # 4-byte Folded Reload
-	mulxl	92(%esp), %esi, %eax    # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	addl	%ecx, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	mulxl	88(%esp), %ecx, %eax    # 4-byte Folded Reload
-	adcl	%edi, %ecx
+	movl	%eax, %ecx
+	addl	672(%esp), %edi
+	adcl	676(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	680(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	684(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	688(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	692(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	696(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	700(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	704(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	708(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	712(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	716(%esp), %edi
+	movl	804(%esp), %eax
+	adcl	52(%eax), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	620(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	664(%esp), %eax
+	adcl	$0, %eax
+	addl	616(%esp), %esi
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	620(%esp), %edx
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	624(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	628(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	632(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	636(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	640(%esp), %ebp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	644(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	648(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	652(%esp), %esi
+	adcl	656(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	660(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	56(%ecx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	564(%esp), %ecx
+	pushl	%eax
+	movl	816(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	608(%esp), %eax
+	adcl	$0, %eax
+	addl	560(%esp), %edi
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	564(%esp), %edi
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	568(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	572(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	576(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	580(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	584(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	588(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	592(%esp), %esi
+	movl	%esi, %ebp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	596(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	adcl	600(%esp), %esi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	604(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	60(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edi, %eax
+	leal	508(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	552(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %ecx
+	addl	504(%esp), %edi
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	508(%esp), %edx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	520(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	524(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	528(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	532(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	540(%esp), %esi
+	movl	%esi, %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	544(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	64(%eax), %ecx
 	movl	%ecx, %edi
-	adcl	%ebx, %eax
-	movl	%eax, %ebx
-	mulxl	100(%esp), %esi, %ecx   # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	mulxl	104(%esp), %edx, %eax   # 4-byte Folded Reload
-	adcl	%ecx, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	%ebp, 20(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 48(%esp)          # 4-byte Folded Spill
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	$0, 72(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	imull	%eax, %edx
-	mulxl	92(%esp), %eax, %ecx    # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	mulxl	96(%esp), %eax, %esi    # 4-byte Folded Reload
-	addl	%ecx, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	mulxl	88(%esp), %eax, %edi    # 4-byte Folded Reload
-	adcl	%esi, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
 	movl	%edx, %esi
-	mulxl	84(%esp), %ebp, %eax    # 4-byte Folded Reload
-	adcl	%edi, %ebp
-	mulxl	100(%esp), %ecx, %edi   # 4-byte Folded Reload
-	adcl	%eax, %ecx
-	mulxl	104(%esp), %ebx, %eax   # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	%edi, %ebx
-	mulxl	80(%esp), %edi, %eax    # 4-byte Folded Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
+	leal	452(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 52(%esp)                  # 1-byte Folded Spill
+	movl	496(%esp), %eax
 	adcl	$0, %eax
-	movl	64(%esp), %edx          # 4-byte Reload
-	addl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	%edx, %ebp
-	subl	92(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	sbbl	96(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	sbbl	88(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	sbbl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	%ebx, %ecx
-	sbbl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	%edi, %ecx
-	sbbl	104(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	%eax, %edx
 	movl	%eax, %ecx
-	sbbl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	sbbl	$0, %esi
-	andl	$1, %esi
-	jne	.LBB100_2
-# BB#1:
-	movl	68(%esp), %ebp          # 4-byte Reload
-.LBB100_2:
-	movl	128(%esp), %edx
-	movl	%ebp, (%edx)
-	movl	%esi, %eax
-	testb	%al, %al
-	movl	76(%esp), %ebp          # 4-byte Reload
-	jne	.LBB100_4
-# BB#3:
-	movl	72(%esp), %ebp          # 4-byte Reload
-.LBB100_4:
-	movl	%ebp, 4(%edx)
-	movl	%ecx, %eax
-	movl	64(%esp), %ecx          # 4-byte Reload
-	jne	.LBB100_6
-# BB#5:
-	movl	88(%esp), %ecx          # 4-byte Reload
-.LBB100_6:
-	movl	%ecx, 8(%edx)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB100_8
-# BB#7:
-	movl	92(%esp), %ecx          # 4-byte Reload
-.LBB100_8:
-	movl	%ecx, 12(%edx)
-	jne	.LBB100_10
-# BB#9:
-	movl	96(%esp), %ebx          # 4-byte Reload
-.LBB100_10:
-	movl	%ebx, 16(%edx)
-	jne	.LBB100_12
-# BB#11:
-	movl	104(%esp), %edi         # 4-byte Reload
-.LBB100_12:
-	movl	%edi, 20(%edx)
-	jne	.LBB100_14
-# BB#13:
-	movl	100(%esp), %eax         # 4-byte Reload
-.LBB100_14:
-	movl	%eax, 24(%edx)
-	addl	$108, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end100:
-	.size	mcl_fp_montRed7Lbmi2, .Lfunc_end100-mcl_fp_montRed7Lbmi2
-
-	.globl	mcl_fp_addPre7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre7Lbmi2,@function
-mcl_fp_addPre7Lbmi2:                    # @mcl_fp_addPre7Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	20(%esp), %esi
-	addl	(%esi), %ecx
-	adcl	4(%esi), %edx
-	movl	8(%eax), %edi
-	adcl	8(%esi), %edi
-	movl	16(%esp), %ebx
-	movl	%ecx, (%ebx)
-	movl	12(%esi), %ecx
-	movl	%edx, 4(%ebx)
-	movl	16(%esi), %edx
-	adcl	12(%eax), %ecx
-	adcl	16(%eax), %edx
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%ecx, 12(%ebx)
-	movl	20(%esi), %ecx
-	adcl	%edi, %ecx
-	movl	%edx, 16(%ebx)
-	movl	%ecx, 20(%ebx)
-	movl	24(%eax), %eax
-	movl	24(%esi), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 24(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end101:
-	.size	mcl_fp_addPre7Lbmi2, .Lfunc_end101-mcl_fp_addPre7Lbmi2
-
-	.globl	mcl_fp_subPre7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre7Lbmi2,@function
-mcl_fp_subPre7Lbmi2:                    # @mcl_fp_subPre7Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %esi
-	xorl	%eax, %eax
-	movl	28(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %esi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edi), %ebx
-	movl	20(%esp), %ebp
-	movl	%edx, (%ebp)
-	movl	12(%ecx), %edx
-	sbbl	12(%edi), %edx
-	movl	%esi, 4(%ebp)
-	movl	16(%ecx), %esi
-	sbbl	16(%edi), %esi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edi), %ebx
-	movl	%edx, 12(%ebp)
-	movl	20(%ecx), %edx
-	sbbl	%ebx, %edx
-	movl	%esi, 16(%ebp)
-	movl	%edx, 20(%ebp)
-	movl	24(%edi), %edx
-	movl	24(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 24(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end102:
-	.size	mcl_fp_subPre7Lbmi2, .Lfunc_end102-mcl_fp_subPre7Lbmi2
-
-	.globl	mcl_fp_shr1_7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_7Lbmi2,@function
-mcl_fp_shr1_7Lbmi2:                     # @mcl_fp_shr1_7Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	8(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 4(%esi)
-	movl	12(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 8(%esi)
-	movl	16(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%esi)
-	movl	20(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 16(%esi)
-	movl	24(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 20(%esi)
-	shrl	%eax
-	movl	%eax, 24(%esi)
-	popl	%esi
-	retl
-.Lfunc_end103:
-	.size	mcl_fp_shr1_7Lbmi2, .Lfunc_end103-mcl_fp_shr1_7Lbmi2
-
-	.globl	mcl_fp_add7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add7Lbmi2,@function
-mcl_fp_add7Lbmi2:                       # @mcl_fp_add7Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$20, %esp
-	movl	48(%esp), %ebp
-	movl	(%ebp), %eax
-	movl	4(%ebp), %edi
-	movl	44(%esp), %ecx
-	addl	(%ecx), %eax
-	adcl	4(%ecx), %edi
-	movl	8(%ebp), %esi
-	adcl	8(%ecx), %esi
-	movl	12(%ecx), %edx
-	movl	16(%ecx), %ebx
-	adcl	12(%ebp), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	16(%ebp), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	%ebp, %ebx
-	movl	20(%ecx), %ebp
-	adcl	20(%ebx), %ebp
-	movl	24(%ecx), %edx
-	adcl	24(%ebx), %edx
-	movl	40(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edi, 4(%ecx)
-	movl	%esi, 8(%ecx)
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 12(%ecx)
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 16(%ecx)
-	movl	%ebp, 20(%ecx)
-	movl	%edx, 24(%ecx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	52(%esp), %ecx
-	subl	(%ecx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	52(%esp), %eax
-	sbbl	4(%eax), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%eax, %edi
-	sbbl	8(%edi), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	sbbl	12(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %ecx
+	addl	448(%esp), %esi
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	452(%esp), %edx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	464(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	468(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	472(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	476(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	480(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	488(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	492(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	68(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	setb	36(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	396(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 36(%esp)                  # 1-byte Folded Spill
+	movl	440(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	392(%esp), %edi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	396(%esp), %ecx
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	400(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	404(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	408(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	412(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	416(%esp), %edi
+	adcl	420(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	432(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	72(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %ebp
+	leal	340(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	384(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	336(%esp), %ebp
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	340(%esp), %ecx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	356(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	360(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	364(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	368(%esp), %edi
+	adcl	372(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	376(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	380(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	76(%eax), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
 	movl	%ecx, %esi
-	sbbl	20(%edi), %ebp
-	sbbl	24(%edi), %edx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB104_2
-# BB#1:                                 # %nocarry
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	40(%esp), %eax
-	movl	%eax, %ebx
-	movl	%ecx, (%ebx)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 4(%ebx)
-	movl	(%esp), %eax            # 4-byte Reload
-	movl	%eax, 8(%ebx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%ebx)
-	movl	%esi, 16(%ebx)
-	movl	%ebp, 20(%ebx)
-	movl	%edx, 24(%ebx)
-.LBB104_2:                              # %carry
-	addl	$20, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end104:
-	.size	mcl_fp_add7Lbmi2, .Lfunc_end104-mcl_fp_add7Lbmi2
-
-	.globl	mcl_fp_addNF7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF7Lbmi2,@function
-mcl_fp_addNF7Lbmi2:                     # @mcl_fp_addNF7Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$52, %esp
-	movl	80(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	76(%esp), %esi
-	addl	(%esi), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	4(%esi), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	20(%eax), %ebx
-	movl	16(%eax), %edi
-	movl	12(%eax), %ebp
-	movl	8(%eax), %ecx
-	adcl	8(%esi), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	12(%esi), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	24(%esi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	44(%esp), %esi          # 4-byte Reload
-	subl	(%eax), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	sbbl	4(%eax), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	8(%eax), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%eax), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	sbbl	16(%eax), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	sbbl	20(%eax), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	sbbl	24(%eax), %edi
+	leal	284(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 20(%esp)                  # 1-byte Folded Spill
+	movl	328(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	280(%esp), %esi
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	284(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	296(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	300(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	304(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	308(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	316(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	320(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	324(%esp), %edi
+	movl	804(%esp), %eax
+	adcl	80(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	40(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, %eax
+	imull	%esi, %eax
+	leal	228(%esp), %ecx
+	pushl	%eax
+	movl	816(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 40(%esp)                  # 1-byte Folded Spill
+	movl	272(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	224(%esp), %esi
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	228(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	244(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	248(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	252(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	256(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	260(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	264(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	268(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	84(%eax), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	setb	63(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	%ebp, %eax
+	movl	%ebp, %edi
+	imull	%esi, %eax
+	leal	172(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 63(%esp)                  # 1-byte Folded Spill
+	movl	216(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	168(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	176(%esp), %ebp
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	180(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	184(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	188(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	192(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	196(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	200(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	204(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	208(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	212(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	88(%ecx), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
 	movl	%edi, %ecx
-	sarl	$31, %ecx
-	testl	%ecx, %ecx
-	js	.LBB105_2
-# BB#1:
-	movl	(%esp), %esi            # 4-byte Reload
-.LBB105_2:
-	movl	72(%esp), %ecx
-	movl	%esi, (%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	js	.LBB105_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB105_4:
-	movl	%eax, 4(%ecx)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	movl	32(%esp), %esi          # 4-byte Reload
-	movl	24(%esp), %ebx          # 4-byte Reload
-	js	.LBB105_6
-# BB#5:
-	movl	8(%esp), %ebx           # 4-byte Reload
-.LBB105_6:
-	movl	72(%esp), %eax
-	movl	%ebx, 8(%eax)
-	movl	%eax, %ebx
-	js	.LBB105_8
-# BB#7:
-	movl	16(%esp), %esi          # 4-byte Reload
-.LBB105_8:
-	movl	%esi, 12(%ebx)
-	js	.LBB105_10
-# BB#9:
-	movl	20(%esp), %edx          # 4-byte Reload
-.LBB105_10:
-	movl	%edx, 16(%ebx)
-	js	.LBB105_12
-# BB#11:
-	movl	12(%esp), %ecx          # 4-byte Reload
-.LBB105_12:
-	movl	%ecx, 20(%ebx)
-	js	.LBB105_14
-# BB#13:
-	movl	%edi, %ebp
-.LBB105_14:
-	movl	%ebp, 24(%ebx)
-	addl	$52, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end105:
-	.size	mcl_fp_addNF7Lbmi2, .Lfunc_end105-mcl_fp_addNF7Lbmi2
-
-	.globl	mcl_fp_sub7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub7Lbmi2,@function
-mcl_fp_sub7Lbmi2:                       # @mcl_fp_sub7Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$24, %esp
-	movl	48(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %ecx
-	xorl	%ebx, %ebx
-	movl	52(%esp), %esi
-	subl	(%esi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	sbbl	4(%esi), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	8(%edi), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	12(%edi), %ecx
-	sbbl	12(%esi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	16(%edi), %eax
-	sbbl	16(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	20(%edi), %ebp
-	sbbl	20(%esi), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	24(%edi), %edi
-	sbbl	24(%esi), %edi
-	sbbl	$0, %ebx
+	imull	%eax, %ecx
+	movl	%eax, %edi
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	pushl	%ecx
+	pushl	816(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	160(%esp), %esi
+	adcl	$0, %esi
+	addl	112(%esp), %edi
+	movl	%ebp, %eax
+	adcl	116(%esp), %eax
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	120(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	124(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	128(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	132(%esp), %edx
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	136(%esp), %edi
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	140(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	144(%esp), %ebp
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	148(%esp), %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	152(%esp), %ebx
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	156(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	92(%ecx), %esi
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	subl	88(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	sbbl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	movl	%esi, %edx
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	sbbl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	sbbl	80(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 80(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	84(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	%ebp, %ecx
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	sbbl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esp), %esi                  # 4-byte Reload
+	sbbl	96(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ebx, %edi
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	sbbl	100(%esp), %edi                 # 4-byte Folded Reload
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	sbbl	104(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%edx, %eax
+	sbbl	108(%esp), %eax                 # 4-byte Folded Reload
+	movl	$0, %ebx
+	sbbl	%ebx, %ebx
 	testb	$1, %bl
-	movl	44(%esp), %ebx
-	movl	16(%esp), %esi          # 4-byte Reload
-	movl	%esi, (%ebx)
-	movl	20(%esp), %esi          # 4-byte Reload
-	movl	%esi, 4(%ebx)
-	movl	%edx, 8(%ebx)
-	movl	%ecx, 12(%ebx)
-	movl	%eax, 16(%ebx)
-	movl	%ebp, 20(%ebx)
-	movl	%edi, 24(%ebx)
-	je	.LBB106_2
-# BB#1:                                 # %carry
-	movl	56(%esp), %ebp
-	movl	16(%esp), %ecx          # 4-byte Reload
-	addl	(%ebp), %ecx
-	movl	%ecx, (%ebx)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	4(%ebp), %edx
-	movl	%edx, 4(%ebx)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	8(%ebp), %ecx
-	movl	12(%ebp), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 8(%ebx)
-	movl	16(%ebp), %ecx
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	%ecx, 16(%ebx)
-	movl	20(%ebp), %eax
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	24(%ebp), %eax
-	adcl	%edi, %eax
-	movl	%eax, 24(%ebx)
-.LBB106_2:                              # %nocarry
-	addl	$24, %esp
+	jne	.LBB62_1
+# %bb.2:
+	movl	800(%esp), %edx
+	movl	%eax, 44(%edx)
+	jne	.LBB62_3
+.LBB62_4:
+	movl	%ebp, 40(%edx)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	jne	.LBB62_5
+.LBB62_6:
+	movl	%edi, 36(%edx)
+	movl	76(%esp), %ebx                  # 4-byte Reload
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	jne	.LBB62_7
+.LBB62_8:
+	movl	%esi, 32(%edx)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	jne	.LBB62_9
+.LBB62_10:
+	movl	%ecx, 28(%edx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB62_11
+.LBB62_12:
+	movl	%ecx, 24(%edx)
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB62_13
+.LBB62_14:
+	movl	%ebp, 20(%edx)
+	jne	.LBB62_15
+.LBB62_16:
+	movl	%ebx, 16(%edx)
+	jne	.LBB62_17
+.LBB62_18:
+	movl	%edi, 12(%edx)
+	jne	.LBB62_19
+.LBB62_20:
+	movl	%esi, 8(%edx)
+	jne	.LBB62_21
+.LBB62_22:
+	movl	%ecx, 4(%edx)
+	je	.LBB62_24
+.LBB62_23:
+	movl	44(%esp), %eax                  # 4-byte Reload
+.LBB62_24:
+	movl	%eax, (%edx)
+	addl	$780, %esp                      # imm = 0x30C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end106:
-	.size	mcl_fp_sub7Lbmi2, .Lfunc_end106-mcl_fp_sub7Lbmi2
-
-	.globl	mcl_fp_subNF7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF7Lbmi2,@function
-mcl_fp_subNF7Lbmi2:                     # @mcl_fp_subNF7Lbmi2
-# BB#0:
+.LBB62_1:
+	movl	%edx, %eax
+	movl	800(%esp), %edx
+	movl	%eax, 44(%edx)
+	je	.LBB62_4
+.LBB62_3:
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 40(%edx)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	je	.LBB62_6
+.LBB62_5:
+	movl	36(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 36(%edx)
+	movl	76(%esp), %ebx                  # 4-byte Reload
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	je	.LBB62_8
+.LBB62_7:
+	movl	16(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 32(%edx)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	je	.LBB62_10
+.LBB62_9:
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%edx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	je	.LBB62_12
+.LBB62_11:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%edx)
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	je	.LBB62_14
+.LBB62_13:
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%edx)
+	je	.LBB62_16
+.LBB62_15:
+	movl	48(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 16(%edx)
+	je	.LBB62_18
+.LBB62_17:
+	movl	20(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%edx)
+	je	.LBB62_20
+.LBB62_19:
+	movl	28(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%edx)
+	je	.LBB62_22
+.LBB62_21:
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edx)
+	jne	.LBB62_23
+	jmp	.LBB62_24
+.Lfunc_end62:
+	.size	mcl_fp_montRed12Lbmi2, .Lfunc_end62-mcl_fp_montRed12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRedNF12Lbmi2         # -- Begin function mcl_fp_montRedNF12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF12Lbmi2,@function
+mcl_fp_montRedNF12Lbmi2:                # @mcl_fp_montRedNF12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$32, %esp
-	movl	56(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %edx
-	movl	60(%esp), %ecx
-	subl	(%ecx), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	sbbl	4(%ecx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	24(%eax), %edx
-	movl	20(%eax), %esi
-	movl	16(%eax), %edi
-	movl	12(%eax), %ebx
-	movl	8(%eax), %eax
-	sbbl	8(%ecx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	12(%ecx), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	sbbl	16(%ecx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	sbbl	20(%ecx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	sbbl	24(%ecx), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%edx, %ecx
-	sarl	$31, %ecx
-	movl	%ecx, %eax
-	shldl	$1, %edx, %eax
-	movl	64(%esp), %edx
-	andl	(%edx), %eax
-	movl	24(%edx), %esi
-	andl	%ecx, %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	20(%edx), %ebx
-	andl	%ecx, %ebx
-	movl	16(%edx), %edi
-	andl	%ecx, %edi
-	movl	12(%edx), %esi
-	andl	%ecx, %esi
-	movl	64(%esp), %edx
-	movl	8(%edx), %edx
-	andl	%ecx, %edx
-	movl	64(%esp), %ebp
-	andl	4(%ebp), %ecx
-	addl	20(%esp), %eax          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %ebp
-	movl	%eax, (%ebp)
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	%ebp, %eax
-	movl	%ecx, 4(%eax)
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 8(%eax)
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 12(%eax)
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 16(%eax)
-	movl	%ebx, 20(%eax)
-	movl	(%esp), %ecx            # 4-byte Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 24(%eax)
-	addl	$32, %esp
-	popl	%esi
-	popl	%edi
+	subl	$780, %esp                      # imm = 0x30C
+	calll	.L63$pb
+.L63$pb:
 	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end107:
-	.size	mcl_fp_subNF7Lbmi2, .Lfunc_end107-mcl_fp_subNF7Lbmi2
-
-	.globl	mcl_fpDbl_add7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add7Lbmi2,@function
-mcl_fpDbl_add7Lbmi2:                    # @mcl_fpDbl_add7Lbmi2
-# BB#0:
+.Ltmp15:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp15-.L63$pb), %ebx
+	movl	808(%esp), %ecx
+	movl	44(%ecx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	36(%ecx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	32(%ecx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	28(%ecx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	24(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
+	movl	%ecx, %edx
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	movl	44(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	16(%eax), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	12(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	8(%eax), %esi
+	movl	(%eax), %ebp
+	movl	4(%eax), %edi
+	movl	-4(%edx), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	%ebp, %eax
+	imull	%ecx, %eax
+	leal	732(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addl	728(%esp), %ebp
+	adcl	732(%esp), %edi
+	adcl	736(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	740(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	744(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	748(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	752(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	756(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	760(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	764(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	768(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	movl	48(%eax), %eax
+	adcl	776(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	setb	32(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edi, %eax
+	leal	676(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 32(%esp)                  # 1-byte Folded Spill
+	movl	720(%esp), %eax
+	adcl	$0, %eax
+	addl	672(%esp), %edi
+	adcl	676(%esp), %esi
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	680(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	684(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	688(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	692(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	696(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	700(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	704(%esp), %edi
+	adcl	708(%esp), %ebp
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	712(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	716(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	52(%ecx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	setb	48(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	620(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 48(%esp)                  # 1-byte Folded Spill
+	movl	664(%esp), %eax
+	adcl	$0, %eax
+	addl	616(%esp), %esi
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	620(%esp), %edx
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	624(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	628(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	632(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	636(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	640(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	644(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	648(%esp), %ebp
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	652(%esp), %esi
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	656(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	660(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	56(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	36(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	564(%esp), %ecx
+	pushl	%eax
+	movl	816(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 36(%esp)                  # 1-byte Folded Spill
+	movl	608(%esp), %eax
+	adcl	$0, %eax
+	addl	560(%esp), %edi
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	564(%esp), %edi
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	568(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	572(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	576(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	580(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	584(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	588(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	adcl	592(%esp), %esi
+	movl	%esi, %ebp
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	596(%esp), %esi
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	600(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	604(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	60(%ecx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	setb	40(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edi, %eax
+	leal	508(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 40(%esp)                  # 1-byte Folded Spill
+	movl	552(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %ecx
+	addl	504(%esp), %edi
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	508(%esp), %edx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	520(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	524(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	528(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	532(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	536(%esp), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	540(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	544(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	64(%eax), %ecx
+	movl	%ecx, %ebp
+	setb	32(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	452(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 32(%esp)                  # 1-byte Folded Spill
+	movl	496(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	448(%esp), %edi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	452(%esp), %ecx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	464(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	468(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	472(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	476(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	480(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	488(%esp), %esi
+	adcl	492(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	68(%eax), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	setb	28(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %ebp
+	leal	396(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 28(%esp)                  # 1-byte Folded Spill
+	movl	440(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	392(%esp), %ebp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	396(%esp), %ecx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	400(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	404(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	408(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	412(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	416(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	420(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	428(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	432(%esp), %edi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	72(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, %eax
+	imull	%ecx, %eax
+	movl	%ecx, %esi
+	leal	340(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 12(%esp)                  # 1-byte Folded Spill
+	movl	384(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	336(%esp), %esi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	340(%esp), %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	356(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	360(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	364(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	368(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	372(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	376(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	380(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	76(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	setb	63(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	imull	%ecx, %ebp
+	movl	%ecx, %esi
+	leal	284(%esp), %ecx
 	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$44, %esp
-	movl	72(%esp), %esi
-	movl	68(%esp), %edx
-	movl	12(%edx), %edi
-	movl	16(%edx), %ecx
-	movl	8(%esi), %eax
-	movl	(%esi), %ebx
-	addl	(%edx), %ebx
-	movl	64(%esp), %ebp
-	movl	%ebx, (%ebp)
-	movl	4(%esi), %ebx
-	adcl	4(%edx), %ebx
-	adcl	8(%edx), %eax
-	adcl	12(%esi), %edi
-	adcl	16(%esi), %ecx
-	movl	%ebx, 4(%ebp)
-	movl	%esi, %ebx
-	movl	36(%ebx), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	%eax, 8(%ebp)
-	movl	20(%ebx), %eax
-	movl	%edi, 12(%ebp)
-	movl	20(%edx), %edi
-	adcl	%eax, %edi
-	movl	24(%ebx), %eax
-	movl	%ecx, 16(%ebp)
-	movl	24(%edx), %ecx
-	adcl	%eax, %ecx
-	movl	28(%ebx), %eax
-	movl	%edi, 20(%ebp)
-	movl	28(%edx), %edi
-	adcl	%eax, %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	32(%ebx), %eax
-	movl	%ecx, 24(%ebp)
-	movl	32(%edx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%edx), %esi
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	40(%ebx), %ecx
-	movl	40(%edx), %eax
-	adcl	%ecx, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%ebx), %ebp
-	movl	44(%edx), %ecx
-	adcl	%ebp, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	48(%ebx), %ebp
-	movl	%ebx, %eax
-	movl	48(%edx), %ebx
-	adcl	%ebp, %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	52(%eax), %eax
-	movl	52(%edx), %ebp
-	adcl	%eax, %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	76(%esp), %eax
-	subl	(%eax), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	sbbl	4(%eax), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
+	movl	816(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 63(%esp)                  # 1-byte Folded Spill
+	movl	328(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	280(%esp), %esi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	284(%esp), %ecx
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	296(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	300(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	304(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	308(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	316(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	320(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	324(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	80(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	setb	32(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %esi
+	leal	228(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 32(%esp)                  # 1-byte Folded Spill
+	movl	272(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	224(%esp), %esi
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	228(%esp), %ecx
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	240(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	244(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	248(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	252(%esp), %esi
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	256(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	260(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	268(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	84(%eax), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %ebp
+	leal	172(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 12(%esp)                  # 1-byte Folded Spill
+	movl	216(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	168(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	176(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	180(%esp), %ebp
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	184(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	188(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	192(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	196(%esp), %edi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	200(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	204(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	208(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	212(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	88(%ecx), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	setb	32(%esp)                        # 1-byte Folded Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%eax, %ecx
+	movl	%eax, %esi
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	pushl	%ecx
+	pushl	816(%esp)
+	pushl	%eax
+	calll	mulPv384x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 32(%esp)                  # 1-byte Folded Spill
+	movl	160(%esp), %edx
+	adcl	$0, %edx
+	addl	112(%esp), %esi
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	116(%esp), %ecx
+	adcl	120(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	124(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	128(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	132(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	136(%esp), %edi
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	140(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	144(%esp), %ebp
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	148(%esp), %ebx
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	152(%esp), %eax
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	156(%esp), %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ebx
+	adcl	92(%ebx), %edx
+	movl	%eax, %ebx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	subl	88(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	sbbl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	movl	76(%esp), %edi
-	sbbl	8(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	sbbl	12(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %ebx
-	sbbl	24(%edi), %ebp
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB108_2
-# BB#1:
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-.LBB108_2:
-	testb	%dl, %dl
-	movl	20(%esp), %ecx          # 4-byte Reload
-	jne	.LBB108_4
-# BB#3:
-	movl	(%esp), %esi            # 4-byte Reload
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	8(%esp), %ecx           # 4-byte Reload
-.LBB108_4:
-	movl	64(%esp), %eax
-	movl	%ecx, 28(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	%esi, 36(%eax)
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	36(%esp), %ecx          # 4-byte Reload
-	jne	.LBB108_6
-# BB#5:
-	movl	12(%esp), %ecx          # 4-byte Reload
-.LBB108_6:
-	movl	%ecx, 40(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	jne	.LBB108_8
-# BB#7:
-	movl	16(%esp), %edx          # 4-byte Reload
-.LBB108_8:
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	sbbl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	sbbl	80(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 80(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	84(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	%ebp, %ecx
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	sbbl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	28(%esp), %esi                  # 4-byte Reload
+	sbbl	96(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	movl	%ebx, %edi
+	sbbl	100(%esp), %edi                 # 4-byte Folded Reload
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	sbbl	104(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%edx, %ebx
+	sbbl	108(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, %eax
+	sarl	$31, %eax
+	testl	%eax, %eax
+	js	.LBB63_1
+# %bb.2:
+	movl	800(%esp), %eax
 	movl	%edx, 44(%eax)
-	jne	.LBB108_10
-# BB#9:
-	movl	%ebx, %ecx
-.LBB108_10:
-	movl	%ecx, 48(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	addl	$44, %esp
+	js	.LBB63_3
+.LBB63_4:
+	movl	%ebp, 40(%eax)
+	movl	64(%esp), %edx                  # 4-byte Reload
+	js	.LBB63_5
+.LBB63_6:
+	movl	%edi, 36(%eax)
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	js	.LBB63_7
+.LBB63_8:
+	movl	%esi, 32(%eax)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	movl	76(%esp), %ebx                  # 4-byte Reload
+	js	.LBB63_9
+.LBB63_10:
+	movl	%ecx, 28(%eax)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	js	.LBB63_11
+.LBB63_12:
+	movl	%ecx, 24(%eax)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	js	.LBB63_13
+.LBB63_14:
+	movl	%ebp, 20(%eax)
+	js	.LBB63_15
+.LBB63_16:
+	movl	%ebx, 16(%eax)
+	js	.LBB63_17
+.LBB63_18:
+	movl	%edi, 12(%eax)
+	js	.LBB63_19
+.LBB63_20:
+	movl	%esi, 8(%eax)
+	js	.LBB63_21
+.LBB63_22:
+	movl	%edx, 4(%eax)
+	jns	.LBB63_24
+.LBB63_23:
+	movl	40(%esp), %ecx                  # 4-byte Reload
+.LBB63_24:
+	movl	%ecx, (%eax)
+	addl	$780, %esp                      # imm = 0x30C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end108:
-	.size	mcl_fpDbl_add7Lbmi2, .Lfunc_end108-mcl_fpDbl_add7Lbmi2
-
-	.globl	mcl_fpDbl_sub7Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub7Lbmi2,@function
-mcl_fpDbl_sub7Lbmi2:                    # @mcl_fpDbl_sub7Lbmi2
-# BB#0:
+.LBB63_1:
+	movl	%ebx, %edx
+	movl	800(%esp), %eax
+	movl	%edx, 44(%eax)
+	jns	.LBB63_4
+.LBB63_3:
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 40(%eax)
+	movl	64(%esp), %edx                  # 4-byte Reload
+	jns	.LBB63_6
+.LBB63_5:
+	movl	48(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 36(%eax)
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	jns	.LBB63_8
+.LBB63_7:
+	movl	28(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 32(%eax)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	movl	76(%esp), %ebx                  # 4-byte Reload
+	jns	.LBB63_10
+.LBB63_9:
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB63_12
+.LBB63_11:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB63_14
+.LBB63_13:
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%eax)
+	jns	.LBB63_16
+.LBB63_15:
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 16(%eax)
+	jns	.LBB63_18
+.LBB63_17:
+	movl	44(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%eax)
+	jns	.LBB63_20
+.LBB63_19:
+	movl	20(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%eax)
+	jns	.LBB63_22
+.LBB63_21:
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	js	.LBB63_23
+	jmp	.LBB63_24
+.Lfunc_end63:
+	.size	mcl_fp_montRedNF12Lbmi2, .Lfunc_end63-mcl_fp_montRedNF12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addPre12Lbmi2            # -- Begin function mcl_fp_addPre12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre12Lbmi2,@function
+mcl_fp_addPre12Lbmi2:                   # @mcl_fp_addPre12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$32, %esp
-	movl	56(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %edx
+	subl	$36, %esp
 	movl	60(%esp), %edi
-	subl	(%edi), %eax
-	sbbl	4(%edi), %edx
-	movl	8(%esi), %ebx
-	sbbl	8(%edi), %ebx
-	movl	52(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%esi), %edx
-	sbbl	16(%edi), %edx
-	movl	%ebx, 8(%ecx)
-	movl	20(%edi), %ebx
-	movl	%eax, 12(%ecx)
-	movl	20(%esi), %eax
-	sbbl	%ebx, %eax
-	movl	24(%edi), %ebx
-	movl	%edx, 16(%ecx)
-	movl	24(%esi), %edx
-	sbbl	%ebx, %edx
-	movl	28(%edi), %ebx
-	movl	%eax, 20(%ecx)
-	movl	28(%esi), %eax
-	sbbl	%ebx, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	32(%edi), %eax
-	movl	%edx, 24(%ecx)
-	movl	32(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	36(%edi), %eax
-	movl	36(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	40(%edi), %eax
-	movl	40(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	44(%edi), %eax
-	movl	44(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	48(%edi), %eax
-	movl	48(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	52(%edi), %eax
-	movl	52(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
+	movl	(%edi), %eax
+	movl	4(%edi), %ecx
 	movl	64(%esp), %esi
-	jne	.LBB109_1
-# BB#2:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB109_3
-.LBB109_1:
-	movl	24(%esi), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-.LBB109_3:
-	testb	%al, %al
-	jne	.LBB109_4
-# BB#5:
-	movl	$0, %edi
-	movl	$0, %eax
-	jmp	.LBB109_6
-.LBB109_4:
-	movl	(%esi), %eax
-	movl	4(%esi), %edi
-.LBB109_6:
-	jne	.LBB109_7
-# BB#8:
-	movl	$0, %ebx
-	jmp	.LBB109_9
-.LBB109_7:
-	movl	20(%esi), %ebx
-.LBB109_9:
-	jne	.LBB109_10
-# BB#11:
-	movl	$0, %ebp
-	jmp	.LBB109_12
-.LBB109_10:
-	movl	16(%esi), %ebp
-.LBB109_12:
-	jne	.LBB109_13
-# BB#14:
-	movl	$0, %edx
-	jmp	.LBB109_15
-.LBB109_13:
-	movl	12(%esi), %edx
-.LBB109_15:
-	jne	.LBB109_16
-# BB#17:
-	xorl	%esi, %esi
-	jmp	.LBB109_18
-.LBB109_16:
-	movl	8(%esi), %esi
-.LBB109_18:
-	addl	12(%esp), %eax          # 4-byte Folded Reload
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%edi, 32(%ecx)
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 36(%ecx)
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edx, 40(%ecx)
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 44(%ecx)
-	movl	%ebx, 48(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%ecx)
-	addl	$32, %esp
+	addl	(%esi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	4(%esi), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	44(%edi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	40(%edi), %ebx
+	movl	36(%edi), %ebp
+	movl	32(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%edi), %edx
+	movl	20(%edi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	16(%edi), %ecx
+	movl	12(%edi), %eax
+	movl	8(%edi), %edi
+	adcl	8(%esi), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	16(%esi), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	20(%esi), %edi
+	adcl	24(%esi), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	28(%esi), %edx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	32(%esi), %eax
+	adcl	36(%esi), %ebp
+	adcl	40(%esi), %ebx
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	44(%esi), %ecx
+	movl	56(%esp), %esi
+	movl	%ebx, 40(%esi)
+	movl	%ebp, 36(%esi)
+	movl	%eax, 32(%esi)
+	movl	%edx, 28(%esi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 24(%esi)
+	movl	%edi, 20(%esi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%esi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%esi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%esi)
+	movl	%ecx, 44(%esi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%esi)
+	setb	%al
+	movzbl	%al, %eax
+	addl	$36, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end109:
-	.size	mcl_fpDbl_sub7Lbmi2, .Lfunc_end109-mcl_fpDbl_sub7Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv256x32,@function
-.LmulPv256x32:                          # @mulPv256x32
-# BB#0:
+.Lfunc_end64:
+	.size	mcl_fp_addPre12Lbmi2, .Lfunc_end64-mcl_fp_addPre12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subPre12Lbmi2            # -- Begin function mcl_fp_subPre12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre12Lbmi2,@function
+mcl_fp_subPre12Lbmi2:                   # @mcl_fp_subPre12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$20, %esp
-	movl	%edx, %eax
-	movl	40(%esp), %edx
-	mulxl	4(%eax), %edi, %esi
-	mulxl	(%eax), %ebp, %ebx
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	addl	%edi, %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	mulxl	8(%eax), %edi, %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	adcl	%esi, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	%edx, %ebp
-	mulxl	12(%eax), %ebx, %esi
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	mulxl	16(%eax), %edi, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	%esi, %edi
-	movl	%ebp, %edx
-	mulxl	20(%eax), %esi, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	%ebp, %edx
-	mulxl	24(%eax), %edx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ecx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ecx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 8(%ecx)
-	movl	%ebx, 12(%ecx)
-	movl	%edi, 16(%ecx)
-	movl	%esi, 20(%ecx)
-	movl	%edx, 24(%ecx)
-	movl	40(%esp), %edx
-	mulxl	28(%eax), %eax, %edx
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	adcl	$0, %edx
-	movl	%edx, 32(%ecx)
-	movl	%ecx, %eax
-	addl	$20, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end110:
-	.size	.LmulPv256x32, .Lfunc_end110-.LmulPv256x32
-
-	.globl	mcl_fp_mulUnitPre8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre8Lbmi2,@function
-mcl_fp_mulUnitPre8Lbmi2:                # @mcl_fp_mulUnitPre8Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$60, %esp
-	calll	.L111$pb
-.L111$pb:
-	popl	%ebx
-.Ltmp2:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L111$pb), %ebx
-	movl	88(%esp), %eax
-	movl	%eax, (%esp)
-	leal	24(%esp), %ecx
-	movl	84(%esp), %edx
-	calll	.LmulPv256x32
-	movl	56(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi
-	movl	40(%esp), %edi
-	movl	36(%esp), %ebx
-	movl	32(%esp), %ebp
-	movl	24(%esp), %edx
-	movl	28(%esp), %ecx
-	movl	80(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%ebp, 8(%eax)
-	movl	%ebx, 12(%eax)
-	movl	%edi, 16(%eax)
-	movl	%esi, 20(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	addl	$60, %esp
+	subl	$44, %esp
+	movl	68(%esp), %ebx
+	movl	(%ebx), %ecx
+	movl	4(%ebx), %eax
+	movl	72(%esp), %edi
+	subl	(%edi), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%ebx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	40(%ebx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	36(%ebx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	32(%ebx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%ebx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%ebx), %edx
+	movl	20(%ebx), %ebp
+	movl	16(%ebx), %ecx
+	movl	12(%ebx), %eax
+	movl	8(%ebx), %esi
+	sbbl	8(%edi), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	sbbl	12(%edi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	sbbl	16(%edi), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	20(%edi), %ebp
+	sbbl	24(%edi), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	sbbl	28(%edi), %ebx
+	movl	8(%esp), %edx                   # 4-byte Reload
+	sbbl	32(%edi), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	36(%edi), %ecx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%edi), %eax
+	movl	32(%esp), %esi                  # 4-byte Reload
+	sbbl	44(%edi), %esi
+	movl	64(%esp), %edi
+	movl	%eax, 40(%edi)
+	movl	%ecx, 36(%edi)
+	movl	%edx, 32(%edi)
+	movl	%ebx, 28(%edi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 24(%edi)
+	movl	%ebp, 20(%edi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edi)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	movl	%esi, 44(%edi)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%edi)
+	movl	$0, %eax
+	sbbl	%eax, %eax
+	andl	$1, %eax
+	addl	$44, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end111:
-	.size	mcl_fp_mulUnitPre8Lbmi2, .Lfunc_end111-mcl_fp_mulUnitPre8Lbmi2
-
-	.globl	mcl_fpDbl_mulPre8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre8Lbmi2,@function
-mcl_fpDbl_mulPre8Lbmi2:                 # @mcl_fpDbl_mulPre8Lbmi2
-# BB#0:
+.Lfunc_end65:
+	.size	mcl_fp_subPre12Lbmi2, .Lfunc_end65-mcl_fp_subPre12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_shr1_12Lbmi2             # -- Begin function mcl_fp_shr1_12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_12Lbmi2,@function
+mcl_fp_shr1_12Lbmi2:                    # @mcl_fp_shr1_12Lbmi2
+# %bb.0:
+	pushl	%esi
+	movl	12(%esp), %eax
+	movl	44(%eax), %edx
+	movl	%edx, %esi
+	shrl	%esi
+	movl	8(%esp), %ecx
+	movl	%esi, 44(%ecx)
+	movl	40(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 40(%ecx)
+	movl	36(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 36(%ecx)
+	movl	32(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 32(%ecx)
+	movl	28(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 28(%ecx)
+	movl	24(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 24(%ecx)
+	movl	20(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 20(%ecx)
+	movl	16(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 16(%ecx)
+	movl	12(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 12(%ecx)
+	movl	8(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 8(%ecx)
+	movl	4(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 4(%ecx)
+	movl	(%eax), %eax
+	shrdl	$1, %edx, %eax
+	movl	%eax, (%ecx)
+	popl	%esi
+	retl
+.Lfunc_end66:
+	.size	mcl_fp_shr1_12Lbmi2, .Lfunc_end66-mcl_fp_shr1_12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_add12Lbmi2               # -- Begin function mcl_fp_add12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_add12Lbmi2,@function
+mcl_fp_add12Lbmi2:                      # @mcl_fp_add12Lbmi2
+# %bb.0:
 	pushl	%ebp
-	movl	%esp, %ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$156, %esp
-	calll	.L112$pb
-.L112$pb:
-	popl	%ebx
-.Ltmp3:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L112$pb), %ebx
-	movl	%ebx, -96(%ebp)         # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	%esi, 8(%esp)
-	movl	12(%ebp), %edi
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre4Lbmi2@PLT
-	leal	16(%esi), %eax
-	movl	%eax, 8(%esp)
-	leal	16(%edi), %eax
-	movl	%eax, 4(%esp)
-	movl	8(%ebp), %eax
-	leal	32(%eax), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre4Lbmi2@PLT
-	movl	24(%edi), %esi
-	movl	(%edi), %ebx
+	subl	$48, %esp
+	movl	72(%esp), %edi
+	movl	(%edi), %ecx
 	movl	4(%edi), %eax
-	addl	16(%edi), %ebx
-	movl	%ebx, -120(%ebp)        # 4-byte Spill
-	adcl	20(%edi), %eax
-	movl	%eax, -100(%ebp)        # 4-byte Spill
-	adcl	8(%edi), %esi
-	movl	%esi, -108(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -80(%ebp)         # 4-byte Spill
-	movl	16(%ebp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %ecx
-	addl	16(%edi), %eax
-	adcl	20(%edi), %ecx
-	movl	%ecx, -124(%ebp)        # 4-byte Spill
-	movl	24(%edi), %edx
-	adcl	8(%edi), %edx
-	movl	28(%edi), %ecx
-	adcl	12(%edi), %ecx
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -128(%ebp)        # 4-byte Spill
-	jb	.LBB112_2
-# BB#1:
-	xorl	%esi, %esi
-	xorl	%ebx, %ebx
-.LBB112_2:
-	movl	%ebx, -112(%ebp)        # 4-byte Spill
-	movl	%esi, -104(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %esi
-	movl	28(%esi), %edi
-	movl	-80(%ebp), %ebx         # 4-byte Reload
-	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	12(%esi), %edi
-	movl	%edi, -116(%ebp)        # 4-byte Spill
-	movl	%ecx, -84(%ebp)         # 4-byte Spill
-	movl	%edx, %edi
-	movl	-124(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -80(%ebp)         # 4-byte Spill
-	movl	%eax, -92(%ebp)         # 4-byte Spill
-	jb	.LBB112_4
-# BB#3:
-	movl	$0, -84(%ebp)           # 4-byte Folded Spill
-	movl	$0, %edi
-	movl	$0, -80(%ebp)           # 4-byte Folded Spill
-	movl	$0, -92(%ebp)           # 4-byte Folded Spill
-.LBB112_4:
-	movl	%edi, -88(%ebp)         # 4-byte Spill
-	movl	-120(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -60(%ebp)
-	movl	-100(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -56(%ebp)
-	movl	-108(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -52(%ebp)
-	movl	%eax, -76(%ebp)
-	movl	%ebx, -72(%ebp)
-	movl	%edx, -68(%ebp)
-	movl	%ecx, -64(%ebp)
-	sbbl	%edx, %edx
-	movl	-116(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -48(%ebp)
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB112_6
-# BB#5:
-	movl	$0, %esi
-	movl	$0, %edi
-.LBB112_6:
-	sbbl	%eax, %eax
-	leal	-76(%ebp), %ecx
-	movl	%ecx, 8(%esp)
-	leal	-60(%ebp), %ecx
-	movl	%ecx, 4(%esp)
-	leal	-44(%ebp), %ecx
-	movl	%ecx, (%esp)
-	andl	%eax, %edx
-	movl	%edi, %eax
-	movl	-92(%ebp), %edi         # 4-byte Reload
-	addl	-112(%ebp), %edi        # 4-byte Folded Reload
-	adcl	%eax, -80(%ebp)         # 4-byte Folded Spill
-	movl	-104(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -88(%ebp)         # 4-byte Folded Spill
-	adcl	%esi, -84(%ebp)         # 4-byte Folded Spill
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	andl	$1, %edx
-	movl	%edx, -92(%ebp)         # 4-byte Spill
-	movl	-96(%ebp), %ebx         # 4-byte Reload
-	calll	mcl_fpDbl_mulPre4Lbmi2@PLT
-	addl	-28(%ebp), %edi
-	movl	-80(%ebp), %eax         # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -80(%ebp)         # 4-byte Spill
-	movl	-88(%ebp), %eax         # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -88(%ebp)         # 4-byte Spill
-	movl	-84(%ebp), %eax         # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	movl	%eax, -84(%ebp)         # 4-byte Spill
-	adcl	%esi, -92(%ebp)         # 4-byte Folded Spill
-	movl	-44(%ebp), %eax
-	movl	8(%ebp), %esi
-	subl	(%esi), %eax
-	movl	%eax, -116(%ebp)        # 4-byte Spill
-	movl	-40(%ebp), %ebx
-	sbbl	4(%esi), %ebx
-	movl	-36(%ebp), %eax
-	sbbl	8(%esi), %eax
-	movl	%eax, -96(%ebp)         # 4-byte Spill
-	movl	-32(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	16(%esi), %eax
-	movl	%eax, -100(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	20(%esi), %eax
-	movl	%eax, -112(%ebp)        # 4-byte Spill
-	sbbl	%eax, -80(%ebp)         # 4-byte Folded Spill
-	movl	24(%esi), %eax
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	sbbl	%eax, -88(%ebp)         # 4-byte Folded Spill
-	movl	28(%esi), %eax
-	movl	%eax, -108(%ebp)        # 4-byte Spill
-	sbbl	%eax, -84(%ebp)         # 4-byte Folded Spill
-	sbbl	$0, -92(%ebp)           # 4-byte Folded Spill
-	movl	32(%esi), %ecx
-	movl	%ecx, -132(%ebp)        # 4-byte Spill
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	subl	%ecx, %eax
-	movl	36(%esi), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
-	movl	40(%esi), %ecx
-	movl	%ecx, -128(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -96(%ebp)         # 4-byte Folded Spill
-	movl	44(%esi), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edx
-	movl	48(%esi), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	52(%esi), %ecx
-	movl	%ecx, -116(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -80(%ebp)         # 4-byte Folded Spill
-	movl	56(%esi), %ecx
-	movl	%ecx, -120(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -88(%ebp)         # 4-byte Folded Spill
-	movl	60(%esi), %ecx
-	movl	%ecx, -124(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -84(%ebp)         # 4-byte Folded Spill
-	sbbl	$0, -92(%ebp)           # 4-byte Folded Spill
-	addl	-100(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-112(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%eax, 16(%esi)
-	movl	-96(%ebp), %eax         # 4-byte Reload
-	adcl	-104(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 20(%esi)
-	adcl	-108(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 24(%esi)
-	adcl	-132(%ebp), %edi        # 4-byte Folded Reload
-	movl	%edx, 28(%esi)
-	movl	-80(%ebp), %eax         # 4-byte Reload
-	adcl	-136(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edi, 32(%esi)
-	movl	-88(%ebp), %ecx         # 4-byte Reload
-	adcl	-128(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 36(%esi)
-	movl	-84(%ebp), %eax         # 4-byte Reload
-	adcl	-140(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 40(%esi)
-	movl	-92(%ebp), %ecx         # 4-byte Reload
-	adcl	-144(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 44(%esi)
-	movl	%ecx, 48(%esi)
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 52(%esi)
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 56(%esi)
-	movl	-124(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 60(%esi)
-	addl	$156, %esp
+	movl	76(%esp), %ebx
+	addl	(%ebx), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	4(%ebx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%edi), %ebp
+	movl	40(%edi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	36(%edi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	32(%edi), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	28(%edi), %eax
+	movl	24(%edi), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	20(%edi), %ecx
+	movl	16(%edi), %esi
+	movl	12(%edi), %edx
+	movl	8(%edi), %edi
+	adcl	8(%ebx), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	12(%ebx), %edx
+	adcl	16(%ebx), %esi
+	adcl	20(%ebx), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	24(%ebx), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	28(%ebx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	32(%ebx), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	36(%ebx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	40(%ebx), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	44(%ebx), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ebx
+	movl	%ebp, 44(%ebx)
+	movl	%ecx, 40(%ebx)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%eax, 36(%ebx)
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	%edi, 32(%ebx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%ebx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 24(%ebx)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 20(%ebx)
+	movl	%esi, 16(%ebx)
+	movl	%edx, 12(%ebx)
+	movl	%ecx, 8(%ebx)
+	movl	20(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 4(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%ebx)
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	80(%esp), %ebp
+	subl	(%ebp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	sbbl	4(%ebp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	sbbl	8(%ebp), %ecx
+	movl	%ecx, %edi
+	sbbl	12(%ebp), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	sbbl	16(%ebp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	20(%ebp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	24(%ebp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%ebp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %esi                  # 4-byte Reload
+	sbbl	32(%ebp), %esi
+	movl	32(%esp), %edx                  # 4-byte Reload
+	sbbl	36(%ebp), %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	sbbl	40(%ebp), %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	sbbl	44(%ebp), %eax
+	movl	%eax, %ebp
+	movzbl	3(%esp), %eax                   # 1-byte Folded Reload
+	sbbl	$0, %eax
+	testb	$1, %al
+	jne	.LBB67_2
+# %bb.1:                                # %nocarry
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%ebx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ebx)
+	movl	%edi, 8(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%ebx)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 20(%ebx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 24(%ebx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%ebx)
+	movl	%esi, 32(%ebx)
+	movl	%edx, 36(%ebx)
+	movl	%ecx, 40(%ebx)
+	movl	%ebp, 44(%ebx)
+.LBB67_2:                               # %carry
+	addl	$48, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end112:
-	.size	mcl_fpDbl_mulPre8Lbmi2, .Lfunc_end112-mcl_fpDbl_mulPre8Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre8Lbmi2,@function
-mcl_fpDbl_sqrPre8Lbmi2:                 # @mcl_fpDbl_sqrPre8Lbmi2
-# BB#0:
+.Lfunc_end67:
+	.size	mcl_fp_add12Lbmi2, .Lfunc_end67-mcl_fp_add12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addNF12Lbmi2             # -- Begin function mcl_fp_addNF12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF12Lbmi2,@function
+mcl_fp_addNF12Lbmi2:                    # @mcl_fp_addNF12Lbmi2
+# %bb.0:
 	pushl	%ebp
-	movl	%esp, %ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$156, %esp
-	calll	.L113$pb
-.L113$pb:
-	popl	%ebx
-.Ltmp4:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.L113$pb), %ebx
-	movl	%ebx, -96(%ebp)         # 4-byte Spill
-	movl	12(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %esi
-	movl	%esi, (%esp)
-	calll	mcl_fpDbl_mulPre4Lbmi2@PLT
-	leal	16(%edi), %eax
-	movl	%eax, 8(%esp)
-	movl	%eax, 4(%esp)
-	leal	32(%esi), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre4Lbmi2@PLT
-	movl	(%edi), %esi
-	movl	4(%edi), %ecx
-	addl	16(%edi), %esi
-	movl	%esi, -108(%ebp)        # 4-byte Spill
-	adcl	20(%edi), %ecx
-	seto	%al
-	lahf
-	movl	%eax, %edx
-	addl	%esi, %esi
-	movl	%esi, -84(%ebp)         # 4-byte Spill
-	movl	%ecx, %esi
-	adcl	%esi, %esi
-	movl	%esi, -80(%ebp)         # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %esi
-	popl	%eax
-	movl	%esi, -88(%ebp)         # 4-byte Spill
-	movl	24(%edi), %esi
-	pushl	%eax
-	movl	%edx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	8(%edi), %esi
-	movl	28(%edi), %edx
-	adcl	12(%edi), %edx
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -100(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %ebx
-	sbbl	%edi, %edi
-	movl	%edi, -92(%ebp)         # 4-byte Spill
-	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB113_2
-# BB#1:
-	movl	$0, -80(%ebp)           # 4-byte Folded Spill
-	movl	$0, -84(%ebp)           # 4-byte Folded Spill
-.LBB113_2:
-	movl	%esi, %ebx
-	movl	-88(%ebp), %edi         # 4-byte Reload
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	adcl	%ebx, %ebx
-	movl	%edx, %edi
-	adcl	%edi, %edi
-	movl	-104(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB113_4
-# BB#3:
-	xorl	%edi, %edi
-	xorl	%ebx, %ebx
-.LBB113_4:
-	movl	%ebx, -88(%ebp)         # 4-byte Spill
-	movl	-108(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -60(%ebp)
-	movl	%ecx, -56(%ebp)
-	movl	%esi, -52(%ebp)
-	movl	%edx, -48(%ebp)
-	movl	%eax, -76(%ebp)
-	movl	%ecx, -72(%ebp)
-	movl	%esi, -68(%ebp)
-	movl	%edx, -64(%ebp)
-	movl	-100(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB113_5
-# BB#6:
-	movl	$0, -100(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB113_7
-.LBB113_5:
-	shrl	$31, %edx
-	movl	%edx, -100(%ebp)        # 4-byte Spill
-.LBB113_7:
-	leal	-76(%ebp), %eax
-	movl	%eax, 8(%esp)
-	leal	-60(%ebp), %eax
-	movl	%eax, 4(%esp)
-	leal	-44(%ebp), %eax
-	movl	%eax, (%esp)
-	movl	-92(%ebp), %esi         # 4-byte Reload
-	andl	$1, %esi
-	movl	-96(%ebp), %ebx         # 4-byte Reload
-	calll	mcl_fpDbl_mulPre4Lbmi2@PLT
-	movl	-84(%ebp), %eax         # 4-byte Reload
-	addl	-28(%ebp), %eax
-	movl	%eax, -84(%ebp)         # 4-byte Spill
-	movl	-80(%ebp), %eax         # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -80(%ebp)         # 4-byte Spill
-	movl	-88(%ebp), %eax         # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -88(%ebp)         # 4-byte Spill
-	adcl	-16(%ebp), %edi
-	movl	%edi, -92(%ebp)         # 4-byte Spill
-	adcl	-100(%ebp), %esi        # 4-byte Folded Reload
-	movl	-44(%ebp), %eax
-	movl	8(%ebp), %edi
-	subl	(%edi), %eax
-	movl	%eax, -116(%ebp)        # 4-byte Spill
-	movl	-40(%ebp), %ebx
-	sbbl	4(%edi), %ebx
-	movl	-36(%ebp), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, -96(%ebp)         # 4-byte Spill
-	movl	-32(%ebp), %edx
-	sbbl	12(%edi), %edx
-	movl	16(%edi), %eax
-	movl	%eax, -100(%ebp)        # 4-byte Spill
-	sbbl	%eax, -84(%ebp)         # 4-byte Folded Spill
-	movl	20(%edi), %eax
-	movl	%eax, -112(%ebp)        # 4-byte Spill
-	sbbl	%eax, -80(%ebp)         # 4-byte Folded Spill
-	movl	24(%edi), %eax
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	sbbl	%eax, -88(%ebp)         # 4-byte Folded Spill
-	movl	28(%edi), %eax
-	movl	%eax, -108(%ebp)        # 4-byte Spill
-	sbbl	%eax, -92(%ebp)         # 4-byte Folded Spill
-	sbbl	$0, %esi
-	movl	32(%edi), %ecx
-	movl	%ecx, -132(%ebp)        # 4-byte Spill
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	subl	%ecx, %eax
-	movl	36(%edi), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
-	movl	40(%edi), %ecx
-	movl	%ecx, -128(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -96(%ebp)         # 4-byte Folded Spill
-	movl	44(%edi), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edx
-	movl	48(%edi), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -84(%ebp)         # 4-byte Folded Spill
-	movl	52(%edi), %ecx
-	movl	%ecx, -116(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -80(%ebp)         # 4-byte Folded Spill
-	movl	56(%edi), %ecx
-	movl	%ecx, -120(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -88(%ebp)         # 4-byte Folded Spill
-	movl	60(%edi), %ecx
-	movl	%ecx, -124(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -92(%ebp)         # 4-byte Folded Spill
-	sbbl	$0, %esi
-	addl	-100(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-112(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%eax, 16(%edi)
-	movl	-96(%ebp), %eax         # 4-byte Reload
-	adcl	-104(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 20(%edi)
-	adcl	-108(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 24(%edi)
-	movl	-84(%ebp), %eax         # 4-byte Reload
-	adcl	-132(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edx, 28(%edi)
-	movl	-80(%ebp), %ecx         # 4-byte Reload
-	adcl	-136(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 32(%edi)
-	movl	-88(%ebp), %eax         # 4-byte Reload
-	adcl	-128(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 36(%edi)
-	movl	-92(%ebp), %ecx         # 4-byte Reload
-	adcl	-140(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 40(%edi)
-	adcl	-144(%ebp), %esi        # 4-byte Folded Reload
-	movl	%ecx, 44(%edi)
-	movl	%esi, 48(%edi)
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 52(%edi)
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 56(%edi)
-	movl	-124(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 60(%edi)
-	addl	$156, %esp
+	subl	$72, %esp
+	movl	100(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	4(%ecx), %esi
+	movl	96(%esp), %eax
+	addl	(%eax), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	adcl	4(%eax), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	44(%ecx), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	40(%ecx), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	36(%ecx), %esi
+	movl	32(%ecx), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	28(%ecx), %edi
+	movl	24(%ecx), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	20(%ecx), %edx
+	movl	16(%ecx), %ebp
+	movl	12(%ecx), %ebx
+	movl	8(%ecx), %ecx
+	adcl	8(%eax), %ecx
+	adcl	12(%eax), %ebx
+	adcl	16(%eax), %ebp
+	adcl	20(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	24(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	28(%eax), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	32(%eax), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	adcl	36(%eax), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	40(%eax), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	44(%eax), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	104(%esp), %esi
+	movl	32(%esp), %edx                  # 4-byte Reload
+	subl	(%esi), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	sbbl	4(%esi), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	sbbl	8(%esi), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	sbbl	12(%esi), %ebx
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	sbbl	16(%esi), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	20(%esi), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	sbbl	24(%esi), %edi
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	sbbl	28(%esi), %ebx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	32(%esi), %eax
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	36(%esi), %ecx
+	movl	4(%esp), %edx                   # 4-byte Reload
+	sbbl	40(%esi), %edx
+	movl	(%esp), %ebp                    # 4-byte Reload
+	sbbl	44(%esi), %ebp
+	movl	%ebp, %esi
+	sarl	$31, %esi
+	testl	%esi, %esi
+	js	.LBB68_1
+# %bb.2:
+	movl	92(%esp), %esi
+	movl	%ebp, 44(%esi)
+	js	.LBB68_3
+.LBB68_4:
+	movl	%edx, 40(%esi)
+	js	.LBB68_5
+.LBB68_6:
+	movl	%ecx, 36(%esi)
+	movl	60(%esp), %edx                  # 4-byte Reload
+	js	.LBB68_7
+.LBB68_8:
+	movl	%eax, 32(%esi)
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	js	.LBB68_9
+.LBB68_10:
+	movl	%ebx, 28(%esi)
+	movl	68(%esp), %eax                  # 4-byte Reload
+	js	.LBB68_11
+.LBB68_12:
+	movl	%edi, 24(%esi)
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	movl	48(%esp), %edi                  # 4-byte Reload
+	js	.LBB68_13
+.LBB68_14:
+	movl	%edi, 20(%esi)
+	movl	56(%esp), %edi                  # 4-byte Reload
+	js	.LBB68_15
+.LBB68_16:
+	movl	%ebx, 16(%esi)
+	js	.LBB68_17
+.LBB68_18:
+	movl	%edi, 12(%esi)
+	js	.LBB68_19
+.LBB68_20:
+	movl	%edx, 8(%esi)
+	js	.LBB68_21
+.LBB68_22:
+	movl	%ecx, 4(%esi)
+	jns	.LBB68_24
+.LBB68_23:
+	movl	32(%esp), %eax                  # 4-byte Reload
+.LBB68_24:
+	movl	%eax, (%esi)
+	addl	$72, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end113:
-	.size	mcl_fpDbl_sqrPre8Lbmi2, .Lfunc_end113-mcl_fpDbl_sqrPre8Lbmi2
-
-	.globl	mcl_fp_mont8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont8Lbmi2,@function
-mcl_fp_mont8Lbmi2:                      # @mcl_fp_mont8Lbmi2
-# BB#0:
+.LBB68_1:
+	movl	(%esp), %ebp                    # 4-byte Reload
+	movl	92(%esp), %esi
+	movl	%ebp, 44(%esi)
+	jns	.LBB68_4
+.LBB68_3:
+	movl	4(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 40(%esi)
+	jns	.LBB68_6
+.LBB68_5:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 36(%esi)
+	movl	60(%esp), %edx                  # 4-byte Reload
+	jns	.LBB68_8
+.LBB68_7:
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 32(%esi)
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB68_10
+.LBB68_9:
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 28(%esi)
+	movl	68(%esp), %eax                  # 4-byte Reload
+	jns	.LBB68_12
+.LBB68_11:
+	movl	12(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 24(%esi)
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	movl	48(%esp), %edi                  # 4-byte Reload
+	jns	.LBB68_14
+.LBB68_13:
+	movl	24(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 20(%esi)
+	movl	56(%esp), %edi                  # 4-byte Reload
+	jns	.LBB68_16
+.LBB68_15:
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 16(%esi)
+	jns	.LBB68_18
+.LBB68_17:
+	movl	40(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%esi)
+	jns	.LBB68_20
+.LBB68_19:
+	movl	44(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%esi)
+	jns	.LBB68_22
+.LBB68_21:
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%esi)
+	js	.LBB68_23
+	jmp	.LBB68_24
+.Lfunc_end68:
+	.size	mcl_fp_addNF12Lbmi2, .Lfunc_end68-mcl_fp_addNF12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_sub12Lbmi2               # -- Begin function mcl_fp_sub12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_sub12Lbmi2,@function
+mcl_fp_sub12Lbmi2:                      # @mcl_fp_sub12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$700, %esp              # imm = 0x2BC
-	calll	.L114$pb
-.L114$pb:
-	popl	%ebx
-.Ltmp5:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.L114$pb), %ebx
-	movl	732(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	664(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	664(%esp), %ebp
-	movl	668(%esp), %edi
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	696(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	692(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	688(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	684(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	680(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	676(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	672(%esp), %esi
-	movl	%eax, (%esp)
-	leal	624(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	624(%esp), %ebp
-	adcl	628(%esp), %edi
-	adcl	632(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	656(%esp), %ebp
-	sbbl	%eax, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	584(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	584(%esp), %edi
-	adcl	588(%esp), %esi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	592(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	596(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	600(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	604(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	608(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	612(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	616(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%edi, %eax
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	732(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv256x32
-	andl	$1, %ebp
-	addl	544(%esp), %edi
-	adcl	548(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	568(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	728(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	addl	504(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	524(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	536(%esp), %ebp
-	sbbl	%edi, %edi
-	movl	%esi, %eax
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	464(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	464(%esp), %esi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	468(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	472(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	480(%esp), %edi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	484(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	488(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	492(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	496(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	724(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv256x32
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	424(%esp), %ecx
-	movl	28(%esp), %ebp          # 4-byte Reload
-	adcl	428(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	436(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	444(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	448(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
+	subl	$48, %esp
+	movl	72(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %esi
+	movl	76(%esp), %eax
+	subl	(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	sbbl	4(%eax), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	44(%edx), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	40(%edx), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	36(%edx), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	32(%edx), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%edx), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%edx), %esi
+	movl	20(%edx), %ebp
+	movl	16(%edx), %ebx
+	movl	12(%edx), %edi
+	movl	8(%edx), %ecx
+	sbbl	8(%eax), %ecx
+	sbbl	12(%eax), %edi
+	sbbl	16(%eax), %ebx
+	sbbl	20(%eax), %ebp
+	sbbl	24(%eax), %esi
+	movl	16(%esp), %edx                  # 4-byte Reload
+	sbbl	28(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	sbbl	32(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	sbbl	36(%eax), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	sbbl	40(%eax), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	sbbl	44(%eax), %edx
+	movl	$0, %eax
 	sbbl	%eax, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	384(%esp), %esi
-	adcl	388(%esp), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	392(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	396(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	400(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	404(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	408(%esp), %edi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	412(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	416(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	28(%esp), %ecx          # 4-byte Reload
-	addl	344(%esp), %ecx
-	adcl	348(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	352(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	364(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	372(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	movl	%ebp, %eax
-	andl	$1, %eax
-	addl	304(%esp), %edi
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	308(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	312(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	316(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	320(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	324(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	328(%esp), %esi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	332(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	336(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	264(%esp), %ecx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	272(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	276(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	284(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	292(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	224(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	228(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	232(%esp), %esi
-	adcl	236(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	240(%esp), %edi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	244(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	248(%esp), %ebp
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	252(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	256(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	184(%esp), %ecx
-	adcl	188(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	196(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	204(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebp          # 4-byte Reload
-	adcl	212(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	movl	%edi, %ecx
-	andl	$1, %ecx
-	addl	144(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	152(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	160(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	172(%esp), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	176(%esp), %ebp
-	adcl	$0, %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	104(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	104(%esp), %ecx
-	adcl	108(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	116(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	120(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	128(%esp), %edi
-	adcl	132(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	24(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %ebp
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	andl	$1, %esi
-	addl	64(%esp), %ebp
-	movl	32(%esp), %ebx          # 4-byte Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	72(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ebx
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	88(%esp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%eax, %edx
-	movl	732(%esp), %ebp
-	subl	(%ebp), %edx
-	movl	%ecx, %eax
-	sbbl	4(%ebp), %eax
-	movl	%ebx, %ecx
-	sbbl	8(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	sbbl	20(%ebp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	sbbl	$0, %esi
-	andl	$1, %esi
-	movl	%esi, %ecx
-	jne	.LBB114_2
-# BB#1:
-	movl	%edx, %ebp
-.LBB114_2:
-	movl	720(%esp), %edx
-	movl	%ebp, (%edx)
-	testb	%cl, %cl
-	movl	60(%esp), %ebp          # 4-byte Reload
-	jne	.LBB114_4
-# BB#3:
-	movl	%eax, %ebp
-.LBB114_4:
-	movl	%ebp, 4(%edx)
-	jne	.LBB114_6
-# BB#5:
-	movl	12(%esp), %ebx          # 4-byte Reload
-.LBB114_6:
-	movl	%ebx, 8(%edx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	jne	.LBB114_8
-# BB#7:
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-.LBB114_8:
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%edx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	jne	.LBB114_10
-# BB#9:
-	movl	20(%esp), %edi          # 4-byte Reload
-.LBB114_10:
-	movl	%edi, 16(%edx)
-	jne	.LBB114_12
-# BB#11:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB114_12:
-	movl	%eax, 20(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	jne	.LBB114_14
-# BB#13:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB114_14:
-	movl	%eax, 24(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	jne	.LBB114_16
-# BB#15:
-	movl	52(%esp), %eax          # 4-byte Reload
-.LBB114_16:
-	movl	%eax, 28(%edx)
-	addl	$700, %esp              # imm = 0x2BC
-	popl	%esi
-	popl	%edi
-	popl	%ebx
+	testb	$1, %al
+	movl	68(%esp), %eax
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%edx, 44(%eax)
+	movl	4(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 40(%eax)
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 36(%eax)
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 32(%eax)
+	movl	16(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 28(%eax)
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	%esi, 24(%eax)
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	%ebp, 20(%eax)
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	%ebx, 16(%eax)
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	%edi, 12(%eax)
+	movl	%ecx, %edx
+	movl	%ecx, 8(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	36(%esp), %esi                  # 4-byte Reload
+	movl	%esi, (%eax)
+	je	.LBB69_2
+# %bb.1:                                # %carry
+	movl	%esi, %ecx
+	movl	80(%esp), %ecx
+	addl	(%ecx), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	4(%ecx), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	8(%ecx), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	12(%ecx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	16(%ecx), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	20(%ecx), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	24(%ecx), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	28(%ecx), %ebx
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	32(%ecx), %edi
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	36(%ecx), %esi
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	40(%ecx), %edx
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	44(%ecx), %ebp
+	movl	%ebp, 44(%eax)
+	movl	%edx, 40(%eax)
+	movl	%esi, 36(%eax)
+	movl	%edi, 32(%eax)
+	movl	%ebx, 28(%eax)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%eax)
+.LBB69_2:                               # %nocarry
+	addl	$48, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end114:
-	.size	mcl_fp_mont8Lbmi2, .Lfunc_end114-mcl_fp_mont8Lbmi2
-
-	.globl	mcl_fp_montNF8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF8Lbmi2,@function
-mcl_fp_montNF8Lbmi2:                    # @mcl_fp_montNF8Lbmi2
-# BB#0:
+.Lfunc_end69:
+	.size	mcl_fp_sub12Lbmi2, .Lfunc_end69-mcl_fp_sub12Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subNF12Lbmi2             # -- Begin function mcl_fp_subNF12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF12Lbmi2,@function
+mcl_fp_subNF12Lbmi2:                    # @mcl_fp_subNF12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$700, %esp              # imm = 0x2BC
-	calll	.L115$pb
-.L115$pb:
-	popl	%ebx
-.Ltmp6:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp6-.L115$pb), %ebx
-	movl	732(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	664(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	664(%esp), %ebp
-	movl	668(%esp), %edi
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	696(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	692(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	688(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	684(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	680(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	676(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	672(%esp), %esi
-	movl	%eax, (%esp)
-	leal	624(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	624(%esp), %ebp
-	adcl	628(%esp), %edi
-	adcl	632(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	640(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	584(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	616(%esp), %ecx
-	addl	584(%esp), %edi
-	adcl	588(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	596(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	604(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	732(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv256x32
-	addl	544(%esp), %edi
-	adcl	548(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	564(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	572(%esp), %edi
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	576(%esp), %ebp
-	movl	728(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	536(%esp), %ecx
-	addl	504(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	528(%esp), %edi
-	adcl	532(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	464(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	464(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	472(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	484(%esp), %esi
-	adcl	488(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	496(%esp), %edi
-	movl	728(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	456(%esp), %eax
-	movl	44(%esp), %edx          # 4-byte Reload
-	addl	424(%esp), %edx
-	adcl	428(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	432(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	436(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	440(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	448(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	452(%esp), %edi
-	movl	%edi, %ebp
+	subl	$72, %esp
+	movl	96(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	4(%ecx), %eax
+	movl	100(%esp), %edi
+	subl	(%edi), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	44(%ecx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	32(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%ecx), %ebp
+	movl	24(%ecx), %ebx
+	movl	20(%ecx), %esi
+	movl	16(%ecx), %edx
+	movl	12(%ecx), %eax
+	movl	8(%ecx), %ecx
+	sbbl	8(%edi), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	sbbl	12(%edi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	sbbl	16(%edi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	sbbl	20(%edi), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	sbbl	24(%edi), %ebx
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	sbbl	28(%edi), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	32(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	36(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%edi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	44(%edi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
 	movl	%eax, %edi
-	adcl	$0, %edi
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	384(%esp), %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	396(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	412(%esp), %ebp
-	adcl	416(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	376(%esp), %edx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	344(%esp), %ecx
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	352(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	360(%esp), %esi
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	364(%esp), %edi
-	adcl	368(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	304(%esp), %ebp
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	308(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	320(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	324(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	328(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	336(%esp), %edi
-	movl	728(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	724(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv256x32
-	movl	296(%esp), %edx
-	movl	%ebp, %ecx
-	addl	264(%esp), %ecx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	280(%esp), %ebp
-	adcl	284(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	292(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	adcl	$0, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	224(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	236(%esp), %esi
-	adcl	240(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	256(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	216(%esp), %ebp
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	184(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	192(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	196(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	144(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	156(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	160(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	176(%esp), %ebp
-	movl	728(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	104(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	136(%esp), %edi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	104(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	116(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	120(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	128(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	132(%esp), %ebp
-	adcl	$0, %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	64(%esp), %esi
-	movl	32(%esp), %esi          # 4-byte Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	68(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	72(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	76(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	80(%esp), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	92(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	96(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	732(%esp), %eax
-	subl	(%eax), %edx
-	sbbl	4(%eax), %ecx
-	sbbl	8(%eax), %esi
-	sbbl	12(%eax), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebx          # 4-byte Reload
-	sbbl	16(%eax), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%eax), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	sbbl	24(%eax), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	sbbl	28(%eax), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	testl	%edi, %edi
-	js	.LBB115_2
-# BB#1:
-	movl	%edx, 56(%esp)          # 4-byte Spill
-.LBB115_2:
-	movl	720(%esp), %edx
-	movl	56(%esp), %eax          # 4-byte Reload
-	movl	%eax, (%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB115_4
-# BB#3:
-	movl	%ecx, %eax
-.LBB115_4:
-	movl	%eax, 4(%edx)
-	js	.LBB115_6
-# BB#5:
-	movl	%esi, 32(%esp)          # 4-byte Spill
-.LBB115_6:
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 8(%edx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	60(%esp), %eax          # 4-byte Reload
-	movl	48(%esp), %ecx          # 4-byte Reload
-	js	.LBB115_8
-# BB#7:
-	movl	12(%esp), %esi          # 4-byte Reload
-	movl	%esi, 44(%esp)          # 4-byte Spill
-.LBB115_8:
-	movl	44(%esp), %esi          # 4-byte Reload
-	movl	%esi, 12(%edx)
-	js	.LBB115_10
-# BB#9:
-	movl	16(%esp), %edi          # 4-byte Reload
-.LBB115_10:
-	movl	%edi, 16(%edx)
-	js	.LBB115_12
-# BB#11:
-	movl	20(%esp), %ebp          # 4-byte Reload
-.LBB115_12:
-	movl	%ebp, 20(%edx)
-	js	.LBB115_14
-# BB#13:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB115_14:
-	movl	%eax, 24(%edx)
-	js	.LBB115_16
-# BB#15:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB115_16:
-	movl	%ecx, 28(%edx)
-	addl	$700, %esp              # imm = 0x2BC
+	sarl	$31, %edi
+	movl	%edi, %edx
+	shldl	$1, %eax, %edx
+	movl	104(%esp), %ebx
+	andl	(%ebx), %edx
+	movl	44(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	36(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	32(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	24(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %ebp
+	andl	%edi, %ebp
+	movl	16(%ebx), %esi
+	andl	%edi, %esi
+	movl	12(%ebx), %ecx
+	andl	%edi, %ecx
+	movl	8(%ebx), %eax
+	andl	%edi, %eax
+	andl	4(%ebx), %edi
+	addl	64(%esp), %edx                  # 4-byte Folded Reload
+	adcl	68(%esp), %edi                  # 4-byte Folded Reload
+	movl	92(%esp), %ebx
+	movl	%edx, (%ebx)
+	adcl	40(%esp), %eax                  # 4-byte Folded Reload
+	movl	%edi, 4(%ebx)
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 8(%ebx)
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ecx, 12(%ebx)
+	adcl	52(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%esi, 16(%ebx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ebp, 20(%ebx)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	60(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 24(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%ecx, 28(%ebx)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%eax, 32(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 36(%ebx)
+	movl	%eax, 40(%ebx)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, 44(%ebx)
+	addl	$72, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end115:
-	.size	mcl_fp_montNF8Lbmi2, .Lfunc_end115-mcl_fp_montNF8Lbmi2
-
-	.globl	mcl_fp_montRed8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed8Lbmi2,@function
-mcl_fp_montRed8Lbmi2:                   # @mcl_fp_montRed8Lbmi2
-# BB#0:
+.Lfunc_end70:
+	.size	mcl_fp_subNF12Lbmi2, .Lfunc_end70-mcl_fp_subNF12Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_add12Lbmi2            # -- Begin function mcl_fpDbl_add12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add12Lbmi2,@function
+mcl_fpDbl_add12Lbmi2:                   # @mcl_fpDbl_add12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$428, %esp              # imm = 0x1AC
-	calll	.L116$pb
-.L116$pb:
-	popl	%ebx
-.Ltmp7:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp7-.L116$pb), %ebx
-	movl	456(%esp), %edx
-	movl	-4(%edx), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	452(%esp), %eax
-	movl	(%eax), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	4(%eax), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	imull	%edi, %ecx
-	movl	60(%eax), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	56(%eax), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	52(%eax), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	48(%eax), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	44(%eax), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	40(%eax), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	36(%eax), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	32(%eax), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	28(%eax), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	24(%eax), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	20(%eax), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	16(%eax), %ebp
-	movl	12(%eax), %edi
-	movl	8(%eax), %esi
-	movl	(%edx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%edx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	24(%edx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	20(%edx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	16(%edx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	12(%edx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	8(%edx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	4(%edx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ecx, (%esp)
-	leal	392(%esp), %ecx
-	calll	.LmulPv256x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	392(%esp), %eax
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	396(%esp), %ecx
-	adcl	400(%esp), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	404(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	408(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	sbbl	%eax, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	movl	64(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	352(%esp), %edi
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	356(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	360(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	364(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	368(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	372(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	376(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	380(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	384(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	312(%esp), %edi
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	316(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	272(%esp), %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	276(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	232(%esp), %ebp
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	236(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	252(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	192(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	192(%esp), %edi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	196(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	204(%esp), %edi
-	adcl	208(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	212(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	152(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	152(%esp), %esi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	156(%esp), %ecx
-	adcl	160(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	168(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	172(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	180(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	112(%esp), %esi
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	116(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	120(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	128(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	%edi, %ebx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	%eax, %esi
-	adcl	136(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	%ecx, %edx
-	subl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	104(%esp), %ebp         # 4-byte Reload
-	sbbl	28(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	32(%esp), %ebx          # 4-byte Folded Reload
-	sbbl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	sbbl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	sbbl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	sbbl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	sbbl	$0, %edi
-	andl	$1, %edi
-	jne	.LBB116_2
-# BB#1:
-	movl	%edx, %ecx
-.LBB116_2:
-	movl	448(%esp), %edx
-	movl	%ecx, (%edx)
-	movl	%edi, %ecx
-	testb	%cl, %cl
-	jne	.LBB116_4
-# BB#3:
-	movl	%eax, 108(%esp)         # 4-byte Spill
-.LBB116_4:
-	movl	108(%esp), %eax         # 4-byte Reload
-	movl	%eax, 4(%edx)
-	movl	104(%esp), %eax         # 4-byte Reload
-	jne	.LBB116_6
-# BB#5:
-	movl	%ebp, %eax
-.LBB116_6:
-	movl	%eax, 8(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	jne	.LBB116_8
-# BB#7:
-	movl	%ebx, %ebp
-.LBB116_8:
-	movl	%ebp, 12(%edx)
-	movl	100(%esp), %ebx         # 4-byte Reload
-	jne	.LBB116_10
-# BB#9:
-	movl	68(%esp), %ebx          # 4-byte Reload
-.LBB116_10:
-	movl	%ebx, 16(%edx)
-	movl	80(%esp), %edi          # 4-byte Reload
-	jne	.LBB116_12
-# BB#11:
-	movl	72(%esp), %edi          # 4-byte Reload
-.LBB116_12:
-	movl	%edi, 20(%edx)
-	movl	88(%esp), %esi          # 4-byte Reload
-	jne	.LBB116_14
-# BB#13:
-	movl	92(%esp), %esi          # 4-byte Reload
-.LBB116_14:
-	movl	%esi, 24(%edx)
-	jne	.LBB116_16
-# BB#15:
-	movl	96(%esp), %eax          # 4-byte Reload
-.LBB116_16:
+	subl	$92, %esp
+	movl	116(%esp), %esi
+	movl	(%esi), %eax
+	movl	4(%esi), %ecx
+	movl	120(%esp), %edx
+	addl	(%edx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	adcl	4(%edx), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	92(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	88(%esi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	84(%esi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	80(%esi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	76(%esi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	72(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	68(%esi), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	64(%esi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	56(%esi), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	52(%esi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	48(%esi), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	44(%esi), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	36(%esi), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	32(%esi), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	28(%esi), %edi
+	movl	24(%esi), %ecx
+	movl	20(%esi), %ebx
+	movl	16(%esi), %ebp
+	movl	12(%esi), %eax
+	movl	8(%esi), %esi
+	adcl	8(%edx), %esi
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	adcl	12(%edx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	adcl	16(%edx), %ebp
+	adcl	20(%edx), %ebx
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	adcl	24(%edx), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	adcl	28(%edx), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	84(%esp), %edi                  # 4-byte Reload
+	adcl	32(%edx), %edi
+	movl	88(%esp), %ecx                  # 4-byte Reload
+	adcl	36(%edx), %ecx
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	40(%edx), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	44(%edx), %eax
+	movl	80(%esp), %ebx                  # 4-byte Reload
+	adcl	48(%edx), %ebx
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	52(%edx), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	56(%edx), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	60(%edx), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	64(%edx), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	68(%edx), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	72(%edx), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	76(%edx), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	80(%edx), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	84(%edx), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	88(%edx), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	92(%edx), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	112(%esp), %edx
+	movl	%eax, 44(%edx)
+	movl	(%esp), %esi                    # 4-byte Reload
+	movl	%esi, 40(%edx)
+	movl	%ecx, 36(%edx)
+	movl	%edi, 32(%edx)
+	movl	52(%esp), %eax                  # 4-byte Reload
 	movl	%eax, 28(%edx)
-	addl	$428, %esp              # imm = 0x1AC
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%edx)
+	movl	60(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%edx)
+	movl	%ebp, 16(%edx)
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edx)
+	movl	68(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edx)
+	movl	72(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 4(%edx)
+	movl	76(%esp), %esi                  # 4-byte Reload
+	movl	%esi, (%edx)
+	setb	48(%esp)                        # 1-byte Folded Spill
+	movl	124(%esp), %ebp
+	movl	%ebx, %eax
+	movl	%ebx, 80(%esp)                  # 4-byte Spill
+	subl	(%ebp), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	sbbl	4(%ebp), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	sbbl	8(%ebp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	sbbl	12(%ebp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	sbbl	16(%ebp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	20(%ebp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%ebp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%ebp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	32(%ebp), %ecx
+	movl	12(%esp), %esi                  # 4-byte Reload
+	sbbl	36(%ebp), %esi
+	movl	8(%esp), %edi                   # 4-byte Reload
+	sbbl	40(%ebp), %edi
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	sbbl	44(%ebp), %ebx
+	movzbl	48(%esp), %eax                  # 1-byte Folded Reload
+	sbbl	$0, %eax
+	testb	$1, %al
+	jne	.LBB71_1
+# %bb.2:
+	movl	%ebx, 92(%edx)
+	jne	.LBB71_3
+.LBB71_4:
+	movl	%edi, 88(%edx)
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	jne	.LBB71_5
+.LBB71_6:
+	movl	%esi, 84(%edx)
+	movl	64(%esp), %edi                  # 4-byte Reload
+	jne	.LBB71_7
+.LBB71_8:
+	movl	%ecx, 80(%edx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	52(%esp), %eax                  # 4-byte Reload
+	jne	.LBB71_9
+.LBB71_10:
+	movl	%eax, 76(%edx)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB71_11
+.LBB71_12:
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 72(%edx)
+	jne	.LBB71_13
+.LBB71_14:
+	movl	%ebp, 68(%edx)
+	jne	.LBB71_15
+.LBB71_16:
+	movl	%ebx, 64(%edx)
+	movl	76(%esp), %eax                  # 4-byte Reload
+	jne	.LBB71_17
+.LBB71_18:
+	movl	%edi, 60(%edx)
+	jne	.LBB71_19
+.LBB71_20:
+	movl	%esi, 56(%edx)
+	jne	.LBB71_21
+.LBB71_22:
+	movl	%ecx, 52(%edx)
+	je	.LBB71_24
+.LBB71_23:
+	movl	80(%esp), %eax                  # 4-byte Reload
+.LBB71_24:
+	movl	%eax, 48(%edx)
+	addl	$92, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end116:
-	.size	mcl_fp_montRed8Lbmi2, .Lfunc_end116-mcl_fp_montRed8Lbmi2
-
-	.globl	mcl_fp_addPre8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre8Lbmi2,@function
-mcl_fp_addPre8Lbmi2:                    # @mcl_fp_addPre8Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	20(%esp), %esi
-	addl	(%esi), %ecx
-	adcl	4(%esi), %edx
-	movl	8(%eax), %edi
-	adcl	8(%esi), %edi
-	movl	16(%esp), %ebx
-	movl	%ecx, (%ebx)
-	movl	12(%esi), %ecx
-	movl	%edx, 4(%ebx)
-	movl	16(%esi), %edx
-	adcl	12(%eax), %ecx
-	adcl	16(%eax), %edx
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%ecx, 12(%ebx)
-	movl	20(%esi), %ecx
-	adcl	%edi, %ecx
-	movl	24(%eax), %edi
-	movl	%edx, 16(%ebx)
-	movl	24(%esi), %edx
-	adcl	%edi, %edx
-	movl	%ecx, 20(%ebx)
-	movl	%edx, 24(%ebx)
-	movl	28(%eax), %eax
-	movl	28(%esi), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 28(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end117:
-	.size	mcl_fp_addPre8Lbmi2, .Lfunc_end117-mcl_fp_addPre8Lbmi2
-
-	.globl	mcl_fp_subPre8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre8Lbmi2,@function
-mcl_fp_subPre8Lbmi2:                    # @mcl_fp_subPre8Lbmi2
-# BB#0:
+.LBB71_1:
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 92(%edx)
+	je	.LBB71_4
+.LBB71_3:
+	movl	8(%esp), %edi                   # 4-byte Reload
+	movl	%edi, 88(%edx)
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	je	.LBB71_6
+.LBB71_5:
+	movl	12(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 84(%edx)
+	movl	64(%esp), %edi                  # 4-byte Reload
+	je	.LBB71_8
+.LBB71_7:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 80(%edx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	52(%esp), %eax                  # 4-byte Reload
+	je	.LBB71_10
+.LBB71_9:
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 76(%edx)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	je	.LBB71_12
+.LBB71_11:
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 72(%edx)
+	je	.LBB71_14
+.LBB71_13:
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 68(%edx)
+	je	.LBB71_16
+.LBB71_15:
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 64(%edx)
+	movl	76(%esp), %eax                  # 4-byte Reload
+	je	.LBB71_18
+.LBB71_17:
+	movl	36(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 60(%edx)
+	je	.LBB71_20
+.LBB71_19:
+	movl	40(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 56(%edx)
+	je	.LBB71_22
+.LBB71_21:
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 52(%edx)
+	jne	.LBB71_23
+	jmp	.LBB71_24
+.Lfunc_end71:
+	.size	mcl_fpDbl_add12Lbmi2, .Lfunc_end71-mcl_fpDbl_add12Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sub12Lbmi2            # -- Begin function mcl_fpDbl_sub12Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub12Lbmi2,@function
+mcl_fpDbl_sub12Lbmi2:                   # @mcl_fpDbl_sub12Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %esi
-	xorl	%eax, %eax
-	movl	28(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %esi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edi), %ebx
-	movl	20(%esp), %ebp
-	movl	%edx, (%ebp)
-	movl	12(%ecx), %edx
-	sbbl	12(%edi), %edx
-	movl	%esi, 4(%ebp)
-	movl	16(%ecx), %esi
-	sbbl	16(%edi), %esi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edi), %ebx
-	movl	%edx, 12(%ebp)
-	movl	20(%ecx), %edx
-	sbbl	%ebx, %edx
-	movl	24(%edi), %ebx
-	movl	%esi, 16(%ebp)
-	movl	24(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	%edx, 20(%ebp)
-	movl	%esi, 24(%ebp)
-	movl	28(%edi), %edx
-	movl	28(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 28(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end118:
-	.size	mcl_fp_subPre8Lbmi2, .Lfunc_end118-mcl_fp_subPre8Lbmi2
-
-	.globl	mcl_fp_shr1_8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_8Lbmi2,@function
-mcl_fp_shr1_8Lbmi2:                     # @mcl_fp_shr1_8Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	8(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 4(%esi)
-	movl	12(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 8(%esi)
-	movl	16(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%esi)
-	movl	20(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 16(%esi)
-	movl	24(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 20(%esi)
-	movl	28(%eax), %eax
-	shrdl	$1, %eax, %ecx
-	movl	%ecx, 24(%esi)
-	shrl	%eax
-	movl	%eax, 28(%esi)
+	subl	$92, %esp
+	movl	116(%esp), %esi
+	movl	(%esi), %ecx
+	movl	4(%esi), %eax
+	xorl	%ebp, %ebp
+	movl	120(%esp), %edx
+	subl	(%edx), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	sbbl	4(%edx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	92(%esi), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	88(%esi), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	84(%esi), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	80(%esi), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	76(%esi), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	72(%esi), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	68(%esi), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	64(%esi), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	60(%esi), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	56(%esi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	52(%esi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	48(%esi), %ebx
+	movl	44(%esi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	40(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	36(%esi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	32(%esi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esi), %edi
+	movl	20(%esi), %ecx
+	movl	16(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esi), %eax
+	movl	8(%esi), %esi
+	sbbl	8(%edx), %esi
+	movl	%esi, 88(%esp)                  # 4-byte Spill
+	sbbl	12(%edx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	16(%edx), %eax
+	sbbl	20(%edx), %ecx
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	sbbl	24(%edx), %edi
+	movl	%edi, 76(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	28(%edx), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	sbbl	32(%edx), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	sbbl	36(%edx), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	sbbl	40(%edx), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	sbbl	44(%edx), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	sbbl	48(%edx), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	sbbl	52(%edx), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	sbbl	56(%edx), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	sbbl	60(%edx), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	sbbl	64(%edx), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	sbbl	68(%edx), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	sbbl	72(%edx), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	60(%esp), %esi                  # 4-byte Reload
+	sbbl	76(%edx), %esi
+	movl	%esi, 60(%esp)                  # 4-byte Spill
+	movl	64(%esp), %esi                  # 4-byte Reload
+	sbbl	80(%edx), %esi
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %esi                  # 4-byte Reload
+	sbbl	84(%edx), %esi
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	movl	72(%esp), %esi                  # 4-byte Reload
+	sbbl	88(%edx), %esi
+	movl	%esi, 72(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	sbbl	92(%edx), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	112(%esp), %edi
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 44(%edi)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 40(%edi)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 36(%edi)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 32(%edi)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%edi)
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%edi)
+	movl	80(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%edi)
+	movl	%eax, 16(%edi)
+	movl	84(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edi)
+	movl	88(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%edi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%edi)
+	sbbl	%ebp, %ebp
+	andl	$1, %ebp
+	negl	%ebp
+	movl	124(%esp), %ebx
+	movl	44(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	32(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	24(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	16(%ebx), %esi
+	andl	%ebp, %esi
+	movl	12(%ebx), %edx
+	andl	%ebp, %edx
+	movl	8(%ebx), %ecx
+	andl	%ebp, %ecx
+	movl	4(%ebx), %eax
+	andl	%ebp, %eax
+	andl	(%ebx), %ebp
+	addl	24(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ebp, 48(%edi)
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 52(%edi)
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 56(%edi)
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 60(%edi)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%esi, 64(%edi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 68(%edi)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	60(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 72(%edi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 76(%edi)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 80(%edi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 84(%edi)
+	movl	%eax, 88(%edi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	40(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 92(%edi)
+	addl	$92, %esp
 	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
 	retl
-.Lfunc_end119:
-	.size	mcl_fp_shr1_8Lbmi2, .Lfunc_end119-mcl_fp_shr1_8Lbmi2
-
-	.globl	mcl_fp_add8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add8Lbmi2,@function
-mcl_fp_add8Lbmi2:                       # @mcl_fp_add8Lbmi2
-# BB#0:
+.Lfunc_end72:
+	.size	mcl_fpDbl_sub12Lbmi2, .Lfunc_end72-mcl_fpDbl_sub12Lbmi2
+                                        # -- End function
+	.globl	mulPv512x32bmi2                 # -- Begin function mulPv512x32bmi2
+	.p2align	4, 0x90
+	.type	mulPv512x32bmi2,@function
+mulPv512x32bmi2:                        # @mulPv512x32bmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$20, %esp
-	movl	48(%esp), %edi
-	movl	(%edi), %ecx
-	movl	4(%edi), %eax
-	movl	44(%esp), %edx
-	addl	(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	4(%edx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	8(%edi), %eax
-	adcl	8(%edx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	12(%edx), %esi
-	movl	16(%edx), %eax
-	adcl	12(%edi), %esi
-	adcl	16(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	20(%edx), %ecx
-	adcl	20(%edi), %ecx
-	movl	24(%edx), %ebx
-	adcl	24(%edi), %ebx
-	movl	28(%edx), %edi
-	movl	48(%esp), %edx
-	adcl	28(%edx), %edi
-	movl	40(%esp), %edx
-	movl	%ebp, (%edx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%edx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%edx)
-	movl	%esi, 12(%edx)
-	movl	%eax, 16(%edx)
-	movl	%ecx, 20(%edx)
-	movl	%ebx, 24(%edx)
-	movl	%edi, 28(%edx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	52(%esp), %edx
-	movl	8(%esp), %ebp           # 4-byte Reload
-	subl	(%edx), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	52(%esp), %edx
-	sbbl	4(%edx), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	52(%esp), %edx
-	sbbl	8(%edx), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp
-	sbbl	12(%ebp), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	4(%esp), %edx           # 4-byte Reload
-	sbbl	16(%ebp), %edx
-	movl	%edx, %esi
-	sbbl	20(%ebp), %ecx
-	sbbl	24(%ebp), %ebx
-	sbbl	28(%ebp), %edi
-	sbbl	$0, %eax
-	testb	$1, %al
-	jne	.LBB120_2
-# BB#1:                                 # %nocarry
-	movl	8(%esp), %edx           # 4-byte Reload
-	movl	40(%esp), %ebp
-	movl	%edx, (%ebp)
-	movl	16(%esp), %edx          # 4-byte Reload
-	movl	%edx, 4(%ebp)
-	movl	12(%esp), %edx          # 4-byte Reload
-	movl	%edx, 8(%ebp)
-	movl	(%esp), %eax            # 4-byte Reload
-	movl	%eax, 12(%ebp)
-	movl	%esi, 16(%ebp)
-	movl	%ecx, 20(%ebp)
-	movl	%ebx, 24(%ebp)
-	movl	%edi, 28(%ebp)
-.LBB120_2:                              # %carry
-	addl	$20, %esp
+	subl	$52, %esp
+	movl	80(%esp), %edx
+	movl	76(%esp), %edi
+	mulxl	4(%edi), %ecx, %eax
+	mulxl	(%edi), %ebx, %esi
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	addl	%ecx, %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	mulxl	8(%edi), %esi, %ecx
+	adcl	%eax, %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	mulxl	12(%edi), %esi, %eax
+	adcl	%ecx, %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	mulxl	16(%edi), %esi, %ecx
+	adcl	%eax, %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	mulxl	20(%edi), %esi, %eax
+	adcl	%ecx, %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	mulxl	24(%edi), %esi, %ecx
+	adcl	%eax, %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	mulxl	28(%edi), %esi, %eax
+	adcl	%ecx, %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	mulxl	32(%edi), %esi, %ecx
+	adcl	%eax, %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	mulxl	36(%edi), %esi, %eax
+	adcl	%ecx, %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	mulxl	40(%edi), %ecx, %esi
+	adcl	%eax, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	mulxl	44(%edi), %ecx, %eax
+	adcl	%esi, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	mulxl	48(%edi), %ebx, %ecx
+	adcl	%eax, %ebx
+	mulxl	52(%edi), %esi, %eax
+	adcl	%ecx, %esi
+	mulxl	56(%edi), %ecx, %ebp
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	adcl	%eax, %ecx
+	movl	72(%esp), %eax
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, (%eax)
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 4(%eax)
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 8(%eax)
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 12(%eax)
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 16(%eax)
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%eax)
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 24(%eax)
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 28(%eax)
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 32(%eax)
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 36(%eax)
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 40(%eax)
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 44(%eax)
+	movl	%ebx, 48(%eax)
+	movl	%esi, 52(%eax)
+	movl	%ecx, 56(%eax)
+	mulxl	60(%edi), %ecx, %edx
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 60(%eax)
+	adcl	$0, %edx
+	movl	%edx, 64(%eax)
+	addl	$52, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
-	retl
-.Lfunc_end120:
-	.size	mcl_fp_add8Lbmi2, .Lfunc_end120-mcl_fp_add8Lbmi2
-
-	.globl	mcl_fp_addNF8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF8Lbmi2,@function
-mcl_fp_addNF8Lbmi2:                     # @mcl_fp_addNF8Lbmi2
-# BB#0:
+	retl	$4
+.Lfunc_end73:
+	.size	mulPv512x32bmi2, .Lfunc_end73-mulPv512x32bmi2
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre16Lbmi2        # -- Begin function mcl_fp_mulUnitPre16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre16Lbmi2,@function
+mcl_fp_mulUnitPre16Lbmi2:               # @mcl_fp_mulUnitPre16Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$56, %esp
+	subl	$124, %esp
+	calll	.L74$pb
+.L74$pb:
+	popl	%ebx
+.Ltmp16:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp16-.L74$pb), %ebx
+	subl	$4, %esp
+	movl	156(%esp), %eax
+	movl	152(%esp), %ecx
+	leal	60(%esp), %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	56(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	72(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	76(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	80(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
 	movl	84(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edi
-	movl	80(%esp), %ebx
-	addl	(%ebx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	4(%ebx), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	28(%eax), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	20(%eax), %ebp
-	movl	16(%eax), %esi
-	movl	12(%eax), %edx
-	movl	8(%eax), %ecx
-	adcl	8(%ebx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	12(%ebx), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	16(%ebx), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	20(%ebx), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	24(%ebx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	28(%ebx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx
-	movl	24(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, %eax
-	subl	(%ebx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	4(%ebx), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%ebp, %eax
-	sbbl	8(%ebx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%ebx), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	sbbl	16(%ebx), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	20(%ebx), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%ebx), %ebp
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	28(%ebx), %esi
-	testl	%esi, %esi
-	js	.LBB121_2
-# BB#1:
-	movl	(%esp), %eax            # 4-byte Reload
-.LBB121_2:
-	movl	76(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	js	.LBB121_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB121_4:
-	movl	%eax, 4(%ebx)
-	movl	40(%esp), %edx          # 4-byte Reload
-	movl	28(%esp), %edi          # 4-byte Reload
-	js	.LBB121_6
-# BB#5:
-	movl	8(%esp), %edi           # 4-byte Reload
-.LBB121_6:
-	movl	%edi, 8(%ebx)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB121_8
-# BB#7:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB121_8:
-	movl	%eax, 12(%ebx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB121_10
-# BB#9:
-	movl	16(%esp), %edx          # 4-byte Reload
-.LBB121_10:
-	movl	%edx, 16(%ebx)
-	js	.LBB121_12
-# BB#11:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB121_12:
-	movl	%ecx, 20(%ebx)
-	js	.LBB121_14
-# BB#13:
-	movl	%ebp, %eax
-.LBB121_14:
-	movl	%eax, 24(%ebx)
-	js	.LBB121_16
-# BB#15:
-	movl	%esi, %edi
-.LBB121_16:
-	movl	%edi, 28(%ebx)
-	addl	$56, %esp
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	88(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	92(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	96(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	100(%esp), %ebp
+	movl	104(%esp), %ebx
+	movl	108(%esp), %edi
+	movl	112(%esp), %esi
+	movl	116(%esp), %edx
+	movl	120(%esp), %ecx
+	movl	144(%esp), %eax
+	movl	%ecx, 64(%eax)
+	movl	%edx, 60(%eax)
+	movl	%esi, 56(%eax)
+	movl	%edi, 52(%eax)
+	movl	%ebx, 48(%eax)
+	movl	%ebp, 44(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 40(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 36(%eax)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 32(%eax)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%eax)
+	addl	$124, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end121:
-	.size	mcl_fp_addNF8Lbmi2, .Lfunc_end121-mcl_fp_addNF8Lbmi2
-
-	.globl	mcl_fp_sub8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub8Lbmi2,@function
-mcl_fp_sub8Lbmi2:                       # @mcl_fp_sub8Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
+.Lfunc_end74:
+	.size	mcl_fp_mulUnitPre16Lbmi2, .Lfunc_end74-mcl_fp_mulUnitPre16Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre16Lbmi2         # -- Begin function mcl_fpDbl_mulPre16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre16Lbmi2,@function
+mcl_fpDbl_mulPre16Lbmi2:                # @mcl_fpDbl_mulPre16Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$28, %esp
-	movl	52(%esp), %esi
-	movl	(%esi), %ecx
-	movl	4(%esi), %eax
-	xorl	%ebx, %ebx
-	movl	56(%esp), %ebp
-	subl	(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	sbbl	4(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	8(%esi), %edx
-	sbbl	8(%ebp), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	16(%esi), %ecx
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	24(%esi), %edi
-	sbbl	24(%ebp), %edi
-	movl	28(%esi), %esi
-	sbbl	28(%ebp), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	48(%esp), %ebx
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ebx)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ebx)
-	movl	%edx, 8(%ebx)
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	%edx, 12(%ebx)
-	movl	%ecx, 16(%ebx)
-	movl	%eax, 20(%ebx)
-	movl	%edi, 24(%ebx)
-	movl	%esi, 28(%ebx)
-	je	.LBB122_2
-# BB#1:                                 # %carry
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	60(%esp), %esi
-	movl	16(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	adcl	8(%esi), %ebp
-	movl	12(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	%eax, 20(%ebx)
-	movl	24(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 24(%ebx)
-	movl	28(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-.LBB122_2:                              # %nocarry
-	addl	$28, %esp
+	subl	$300, %esp                      # imm = 0x12C
+	calll	.L75$pb
+.L75$pb:
 	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end122:
-	.size	mcl_fp_sub8Lbmi2, .Lfunc_end122-mcl_fp_sub8Lbmi2
-
-	.globl	mcl_fp_subNF8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF8Lbmi2,@function
-mcl_fp_subNF8Lbmi2:                     # @mcl_fp_subNF8Lbmi2
-# BB#0:
+.Ltmp17:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp17-.L75$pb), %esi
+	subl	$4, %esp
+	movl	332(%esp), %ebp
+	movl	328(%esp), %edi
+	movl	%esi, %ebx
+	movl	%esi, 88(%esp)                  # 4-byte Spill
 	pushl	%ebp
-	pushl	%ebx
 	pushl	%edi
-	pushl	%esi
-	subl	$40, %esp
-	movl	64(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %edx
-	movl	68(%esp), %ecx
-	subl	(%ecx), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	sbbl	4(%ecx), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	28(%eax), %edx
-	movl	24(%eax), %esi
-	movl	20(%eax), %edi
-	movl	16(%eax), %ebx
-	movl	12(%eax), %ebp
-	movl	8(%eax), %eax
-	sbbl	8(%ecx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	12(%ecx), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	sbbl	16(%ecx), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	sbbl	20(%ecx), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	sbbl	24(%ecx), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	sbbl	28(%ecx), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	sarl	$31, %edi
-	movl	72(%esp), %ebp
-	movl	28(%ebp), %eax
-	andl	%edi, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	24(%ebp), %eax
-	andl	%edi, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	20(%ebp), %ebx
-	andl	%edi, %ebx
-	movl	16(%ebp), %esi
-	andl	%edi, %esi
-	movl	12(%ebp), %edx
-	andl	%edi, %edx
-	movl	8(%ebp), %ecx
-	andl	%edi, %ecx
-	movl	4(%ebp), %eax
-	andl	%edi, %eax
-	andl	(%ebp), %edi
-	addl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	60(%esp), %ebp
-	movl	%edi, (%ebp)
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 4(%ebp)
-	adcl	12(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebp)
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 12(%ebp)
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 16(%ebp)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, 20(%ebp)
-	movl	%eax, 24(%ebp)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ebp)
-	addl	$40, %esp
+	pushl	332(%esp)
+	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
+	addl	$12, %esp
+	leal	32(%ebp), %eax
+	leal	32(%edi), %ecx
+	movl	324(%esp), %edx
+	addl	$64, %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
+	addl	$16, %esp
+	movl	48(%edi), %eax
+	movl	44(%edi), %ecx
+	movl	40(%edi), %edx
+	movl	32(%edi), %esi
+	movl	36(%edi), %ebx
+	addl	(%edi), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	4(%edi), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	adcl	8(%edi), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	adcl	12(%edi), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	16(%edi), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	52(%edi), %eax
+	adcl	20(%edi), %eax
+	movl	%eax, %edx
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	56(%edi), %eax
+	adcl	24(%edi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	60(%edi), %eax
+	adcl	28(%edi), %eax
+	movl	%eax, %ecx
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	setb	80(%esp)                        # 1-byte Folded Spill
+	movl	32(%ebp), %eax
+	addl	(%ebp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%ebp), %eax
+	adcl	4(%ebp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%ebp), %eax
+	adcl	8(%ebp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	44(%ebp), %eax
+	adcl	12(%ebp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	48(%ebp), %esi
+	adcl	16(%ebp), %esi
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	movl	52(%ebp), %eax
+	adcl	20(%ebp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%ebp), %eax
+	adcl	24(%ebp), %eax
+	movl	%eax, %edi
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	60(%ebp), %ebx
+	adcl	28(%ebp), %ebx
+	movl	%ebx, 76(%esp)                  # 4-byte Spill
+	movl	%ecx, 232(%esp)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 228(%esp)
+	movl	%edx, 224(%esp)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 220(%esp)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 216(%esp)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 212(%esp)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 208(%esp)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 204(%esp)
+	movl	%ebx, 200(%esp)
+	movl	%edi, 196(%esp)
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 192(%esp)
+	movl	%esi, 188(%esp)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 184(%esp)
+	movl	56(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 180(%esp)
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 176(%esp)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 172(%esp)
+	setb	72(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movzbl	84(%esp), %esi                  # 1-byte Folded Reload
+	movl	%esi, %ecx
+	negl	%ecx
+	movl	%esi, %edi
+	shll	$31, %edi
+	shrdl	$31, %ecx, %edi
+	andl	52(%esp), %edi                  # 4-byte Folded Reload
+	andl	%ecx, 72(%esp)                  # 4-byte Folded Spill
+	andl	%ecx, %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	andl	%ecx, 68(%esp)                  # 4-byte Folded Spill
+	andl	%ecx, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	andl	%ecx, %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	andl	%ecx, %ebx
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	andl	80(%esp), %ecx                  # 4-byte Folded Reload
+	movzbl	76(%esp), %ebp                  # 1-byte Folded Reload
+	andl	%ebp, %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	%ebp, %edx
+	negl	%edx
+	andl	%edx, 64(%esp)                  # 4-byte Folded Spill
+	andl	%edx, 48(%esp)                  # 4-byte Folded Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	andl	%edx, %ebx
+	andl	%edx, 44(%esp)                  # 4-byte Folded Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	andl	%edx, %esi
+	andl	%edx, 40(%esp)                  # 4-byte Folded Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	andl	%edx, %eax
+	shll	$31, %ebp
+	shrdl	$31, %edx, %ebp
+	andl	20(%esp), %ebp                  # 4-byte Folded Reload
+	addl	%edi, %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	60(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	72(%esp), %edi                  # 4-byte Folded Reload
+	leal	176(%esp), %edx
+	adcl	%ecx, 64(%esp)                  # 4-byte Folded Spill
+	setb	%al
+	movzbl	%al, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	88(%esp), %ebx                  # 4-byte Reload
+	pushl	%edx
+	leal	212(%esp), %eax
+	pushl	%eax
+	leal	248(%esp), %eax
+	pushl	%eax
+	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
+	addl	$16, %esp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	addl	268(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	272(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	276(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	280(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	284(%esp), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	adcl	292(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	296(%esp), %ebp
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	movl	236(%esp), %ecx
+	movl	320(%esp), %esi
+	subl	(%esi), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	240(%esp), %ebx
+	sbbl	4(%esi), %ebx
+	movl	244(%esp), %ecx
+	sbbl	8(%esi), %ecx
+	movl	248(%esp), %edx
+	sbbl	12(%esi), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	252(%esp), %edx
+	sbbl	16(%esi), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	256(%esp), %edx
+	sbbl	20(%esi), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	260(%esp), %edx
+	sbbl	24(%esi), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	264(%esp), %edx
+	sbbl	28(%esi), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	32(%esi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	sbbl	%edx, 16(%esp)                  # 4-byte Folded Spill
+	movl	36(%esi), %edx
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	sbbl	%edx, 24(%esp)                  # 4-byte Folded Spill
+	movl	40(%esi), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	sbbl	%edx, 36(%esp)                  # 4-byte Folded Spill
+	movl	44(%esi), %edx
+	movl	%edx, 136(%esp)                 # 4-byte Spill
+	sbbl	%edx, 20(%esp)                  # 4-byte Folded Spill
+	movl	48(%esi), %edx
+	movl	%edx, 104(%esp)                 # 4-byte Spill
+	sbbl	%edx, 40(%esp)                  # 4-byte Folded Spill
+	movl	52(%esi), %edx
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	sbbl	%edx, %eax
+	movl	%eax, %edi
+	movl	56(%esi), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	sbbl	%eax, 44(%esp)                  # 4-byte Folded Spill
+	movl	60(%esi), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	sbbl	%eax, %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	$0, %eax
+	movl	64(%esi), %ebp
+	movl	%ebp, 132(%esp)                 # 4-byte Spill
+	subl	%ebp, 56(%esp)                  # 4-byte Folded Spill
+	movl	68(%esi), %ebp
+	movl	%ebp, 144(%esp)                 # 4-byte Spill
+	sbbl	%ebp, %ebx
+	movl	%ebx, 92(%esp)                  # 4-byte Spill
+	movl	72(%esi), %ebx
+	movl	%ebx, 140(%esp)                 # 4-byte Spill
+	sbbl	%ebx, %ecx
+	movl	%ecx, 88(%esp)                  # 4-byte Spill
+	movl	76(%esi), %ecx
+	movl	%ecx, 128(%esp)                 # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	sbbl	%ecx, %edx
+	movl	80(%esi), %ebx
+	movl	%ebx, 124(%esp)                 # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	sbbl	%ebx, %ecx
+	movl	84(%esi), %ebx
+	movl	%ebx, 120(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 68(%esp)                  # 4-byte Folded Spill
+	movl	88(%esi), %ebx
+	movl	%ebx, 116(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 64(%esp)                  # 4-byte Folded Spill
+	movl	92(%esi), %ebx
+	movl	%ebx, 112(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 52(%esp)                  # 4-byte Folded Spill
+	movl	96(%esi), %ebx
+	movl	%ebx, 108(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 16(%esp)                  # 4-byte Folded Spill
+	movl	100(%esi), %ebx
+	movl	%ebx, 164(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 24(%esp)                  # 4-byte Folded Spill
+	movl	104(%esi), %ebx
+	movl	%ebx, 160(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 36(%esp)                  # 4-byte Folded Spill
+	movl	108(%esi), %ebx
+	movl	%ebx, 156(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 20(%esp)                  # 4-byte Folded Spill
+	movl	112(%esi), %ebx
+	movl	%ebx, 152(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 40(%esp)                  # 4-byte Folded Spill
+	movl	116(%esi), %ebx
+	movl	%ebx, 72(%esp)                  # 4-byte Spill
+	sbbl	%ebx, %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	120(%esi), %edi
+	movl	%edi, 148(%esp)                 # 4-byte Spill
+	sbbl	%edi, 44(%esp)                  # 4-byte Folded Spill
+	movl	124(%esi), %edi
+	movl	%edi, 168(%esp)                 # 4-byte Spill
+	sbbl	%edi, 60(%esp)                  # 4-byte Folded Spill
+	sbbl	$0, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	addl	48(%esp), %ebp                  # 4-byte Folded Reload
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	adcl	80(%esp), %ebx                  # 4-byte Folded Reload
+	movl	88(%esp), %edi                  # 4-byte Reload
+	adcl	76(%esp), %edi                  # 4-byte Folded Reload
+	adcl	136(%esp), %edx                 # 4-byte Folded Reload
+	adcl	104(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	96(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 52(%esi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%esi)
+	movl	%edx, 44(%esi)
+	movl	%edi, 40(%esi)
+	movl	%ebx, 36(%esi)
+	movl	%ebp, 32(%esi)
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	84(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 56(%esi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	132(%esp), %eax                 # 4-byte Folded Reload
+	movl	%edx, 60(%esi)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	144(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%eax, 64(%esi)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	140(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 68(%esi)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	128(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%eax, 72(%esi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	124(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 76(%esi)
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	120(%esp), %edx                 # 4-byte Folded Reload
+	movl	%eax, 80(%esi)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	116(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%edx, 84(%esi)
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	112(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 88(%esi)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	108(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%eax, 92(%esi)
+	movl	%ecx, 96(%esi)
+	movl	164(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 100(%esi)
+	movl	160(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 104(%esi)
+	movl	156(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 108(%esi)
+	movl	152(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 112(%esi)
+	movl	72(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 116(%esi)
+	movl	148(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 120(%esi)
+	movl	168(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 124(%esi)
+	addl	$300, %esp                      # imm = 0x12C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end123:
-	.size	mcl_fp_subNF8Lbmi2, .Lfunc_end123-mcl_fp_subNF8Lbmi2
-
-	.globl	mcl_fpDbl_add8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add8Lbmi2,@function
-mcl_fpDbl_add8Lbmi2:                    # @mcl_fpDbl_add8Lbmi2
-# BB#0:
+.Lfunc_end75:
+	.size	mcl_fpDbl_mulPre16Lbmi2, .Lfunc_end75-mcl_fpDbl_mulPre16Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre16Lbmi2         # -- Begin function mcl_fpDbl_sqrPre16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre16Lbmi2,@function
+mcl_fpDbl_sqrPre16Lbmi2:                # @mcl_fpDbl_sqrPre16Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$56, %esp
-	movl	84(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edx
-	movl	80(%esp), %ebp
-	addl	(%ebp), %esi
-	adcl	4(%ebp), %edx
-	movl	8(%ecx), %edi
-	adcl	8(%ebp), %edi
-	movl	12(%ebp), %ebx
-	movl	76(%esp), %eax
-	movl	%esi, (%eax)
-	movl	16(%ebp), %esi
-	adcl	12(%ecx), %ebx
-	adcl	16(%ecx), %esi
-	movl	%edx, 4(%eax)
-	movl	40(%ecx), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%edi, 8(%eax)
-	movl	20(%ecx), %edx
-	movl	%ebx, 12(%eax)
-	movl	20(%ebp), %edi
-	adcl	%edx, %edi
-	movl	24(%ecx), %edx
-	movl	%esi, 16(%eax)
-	movl	24(%ebp), %esi
-	adcl	%edx, %esi
-	movl	28(%ecx), %edx
-	movl	%edi, 20(%eax)
-	movl	28(%ebp), %ebx
-	adcl	%edx, %ebx
-	movl	32(%ecx), %edx
-	movl	%esi, 24(%eax)
-	movl	32(%ebp), %esi
-	adcl	%edx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	36(%ecx), %edx
-	movl	%ebx, 28(%eax)
-	movl	36(%ebp), %ebx
-	adcl	%edx, %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	40(%ebp), %eax
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	44(%ebp), %edi
-	adcl	%edx, %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	48(%ecx), %edx
-	movl	48(%ebp), %eax
-	adcl	%edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	52(%ecx), %edx
-	movl	52(%ebp), %esi
-	adcl	%edx, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	56(%ecx), %edx
-	movl	56(%ebp), %eax
-	adcl	%edx, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%ecx), %ecx
-	movl	60(%ebp), %ebp
-	adcl	%ecx, %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	44(%esp), %eax          # 4-byte Reload
-	movl	88(%esp), %edx
-	subl	(%edx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	88(%esp), %eax
-	sbbl	4(%eax), %ebx
-	movl	%eax, %edx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	movl	%edx, %ebx
-	sbbl	8(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	movl	24(%esp), %edi          # 4-byte Reload
-	sbbl	12(%ebx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	sbbl	16(%ebx), %eax
-	sbbl	20(%ebx), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	sbbl	24(%ebx), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	sbbl	28(%ebx), %ebp
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB124_2
-# BB#1:
-	movl	%eax, %edi
-.LBB124_2:
-	testb	%cl, %cl
-	movl	44(%esp), %ecx          # 4-byte Reload
-	jne	.LBB124_4
-# BB#3:
-	movl	(%esp), %ecx            # 4-byte Reload
-.LBB124_4:
-	movl	76(%esp), %eax
-	movl	%ecx, 32(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	32(%esp), %edx          # 4-byte Reload
-	movl	48(%esp), %esi          # 4-byte Reload
-	movl	28(%esp), %ebx          # 4-byte Reload
-	jne	.LBB124_6
-# BB#5:
-	movl	4(%esp), %ebx           # 4-byte Reload
-.LBB124_6:
-	movl	%ebx, 36(%eax)
-	jne	.LBB124_8
-# BB#7:
-	movl	8(%esp), %esi           # 4-byte Reload
-.LBB124_8:
-	movl	%esi, 40(%eax)
-	movl	36(%esp), %esi          # 4-byte Reload
-	jne	.LBB124_10
-# BB#9:
-	movl	12(%esp), %edx          # 4-byte Reload
-.LBB124_10:
-	movl	%edx, 44(%eax)
-	movl	%edi, 48(%eax)
-	movl	52(%esp), %edx          # 4-byte Reload
-	jne	.LBB124_12
-# BB#11:
-	movl	16(%esp), %esi          # 4-byte Reload
-.LBB124_12:
-	movl	%esi, 52(%eax)
-	jne	.LBB124_14
-# BB#13:
-	movl	20(%esp), %edx          # 4-byte Reload
-.LBB124_14:
-	movl	%edx, 56(%eax)
-	jne	.LBB124_16
-# BB#15:
-	movl	%ebp, %ecx
-.LBB124_16:
-	movl	%ecx, 60(%eax)
-	addl	$56, %esp
+	subl	$284, %esp                      # imm = 0x11C
+	calll	.L76$pb
+.L76$pb:
 	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end124:
-	.size	mcl_fpDbl_add8Lbmi2, .Lfunc_end124-mcl_fpDbl_add8Lbmi2
-
-	.globl	mcl_fpDbl_sub8Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub8Lbmi2,@function
-mcl_fpDbl_sub8Lbmi2:                    # @mcl_fpDbl_sub8Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
+.Ltmp18:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp18-.L76$pb), %esi
+	subl	$4, %esp
+	movl	312(%esp), %edi
+	movl	308(%esp), %ebp
+	movl	%esi, %ebx
+	movl	%esi, 20(%esp)                  # 4-byte Spill
 	pushl	%edi
-	pushl	%esi
-	subl	$40, %esp
-	movl	64(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %edx
-	movl	68(%esp), %ebx
-	subl	(%ebx), %eax
-	sbbl	4(%ebx), %edx
-	movl	8(%edi), %esi
-	sbbl	8(%ebx), %esi
-	movl	60(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%edi), %eax
-	sbbl	12(%ebx), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%edi), %edx
-	sbbl	16(%ebx), %edx
-	movl	%esi, 8(%ecx)
-	movl	20(%ebx), %esi
-	movl	%eax, 12(%ecx)
-	movl	20(%edi), %eax
-	sbbl	%esi, %eax
-	movl	24(%ebx), %esi
-	movl	%edx, 16(%ecx)
-	movl	24(%edi), %edx
-	sbbl	%esi, %edx
-	movl	28(%ebx), %esi
-	movl	%eax, 20(%ecx)
-	movl	28(%edi), %eax
-	sbbl	%esi, %eax
-	movl	32(%ebx), %esi
-	movl	%edx, 24(%ecx)
-	movl	32(%edi), %edx
-	sbbl	%esi, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	36(%ebx), %edx
-	movl	%eax, 28(%ecx)
-	movl	36(%edi), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	40(%ebx), %eax
-	movl	40(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	44(%ebx), %eax
-	movl	44(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	48(%ebx), %eax
-	movl	48(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	52(%ebx), %eax
-	movl	52(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	56(%ebx), %eax
-	movl	56(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	60(%ebx), %eax
-	movl	60(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	72(%esp), %ebx
-	jne	.LBB125_1
-# BB#2:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB125_3
-.LBB125_1:
-	movl	28(%ebx), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-.LBB125_3:
-	testb	%al, %al
-	jne	.LBB125_4
-# BB#5:
-	movl	$0, %ebp
-	movl	$0, %eax
-	jmp	.LBB125_6
-.LBB125_4:
-	movl	(%ebx), %eax
-	movl	4(%ebx), %ebp
-.LBB125_6:
-	jne	.LBB125_7
-# BB#8:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB125_9
-.LBB125_7:
-	movl	24(%ebx), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-.LBB125_9:
-	jne	.LBB125_10
-# BB#11:
-	movl	$0, %edx
-	jmp	.LBB125_12
-.LBB125_10:
-	movl	20(%ebx), %edx
-.LBB125_12:
-	jne	.LBB125_13
-# BB#14:
-	movl	$0, %esi
-	jmp	.LBB125_15
-.LBB125_13:
-	movl	16(%ebx), %esi
-.LBB125_15:
-	jne	.LBB125_16
-# BB#17:
-	movl	$0, %edi
-	jmp	.LBB125_18
-.LBB125_16:
-	movl	12(%ebx), %edi
-.LBB125_18:
-	jne	.LBB125_19
-# BB#20:
-	xorl	%ebx, %ebx
-	jmp	.LBB125_21
-.LBB125_19:
-	movl	8(%ebx), %ebx
-.LBB125_21:
-	addl	16(%esp), %eax          # 4-byte Folded Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 36(%ecx)
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 40(%ecx)
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 44(%ecx)
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 48(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 52(%ecx)
-	movl	%eax, 56(%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%ecx)
-	addl	$40, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end125:
-	.size	mcl_fpDbl_sub8Lbmi2, .Lfunc_end125-mcl_fpDbl_sub8Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv288x32,@function
-.LmulPv288x32:                          # @mulPv288x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
 	pushl	%edi
-	pushl	%esi
-	subl	$24, %esp
-	movl	%edx, %eax
-	movl	44(%esp), %edx
-	mulxl	4(%eax), %edi, %esi
-	mulxl	(%eax), %ebp, %ebx
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	addl	%edi, %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	mulxl	8(%eax), %edi, %ebx
-	adcl	%esi, %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	mulxl	12(%eax), %esi, %edi
-	adcl	%ebx, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	%edx, %ebp
-	mulxl	16(%eax), %ebx, %esi
-	adcl	%edi, %ebx
-	mulxl	20(%eax), %edi, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	%esi, %edi
-	movl	%ebp, %edx
-	mulxl	24(%eax), %esi, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	%ebp, %edx
-	mulxl	28(%eax), %edx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ecx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ecx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%ecx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 12(%ecx)
-	movl	%ebx, 16(%ecx)
-	movl	%edi, 20(%ecx)
-	movl	%esi, 24(%ecx)
-	movl	%edx, 28(%ecx)
-	movl	44(%esp), %edx
-	mulxl	32(%eax), %eax, %edx
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	adcl	$0, %edx
-	movl	%edx, 36(%ecx)
-	movl	%ecx, %eax
-	addl	$24, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end126:
-	.size	.LmulPv288x32, .Lfunc_end126-.LmulPv288x32
-
-	.globl	mcl_fp_mulUnitPre9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre9Lbmi2,@function
-mcl_fp_mulUnitPre9Lbmi2:                # @mcl_fp_mulUnitPre9Lbmi2
-# BB#0:
 	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$76, %esp
-	calll	.L127$pb
-.L127$pb:
-	popl	%ebx
-.Ltmp8:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp8-.L127$pb), %ebx
-	movl	104(%esp), %eax
-	movl	%eax, (%esp)
-	leal	32(%esp), %ecx
-	movl	100(%esp), %edx
-	calll	.LmulPv288x32
-	movl	68(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi
-	movl	48(%esp), %ebx
-	movl	44(%esp), %ebp
-	movl	40(%esp), %esi
-	movl	32(%esp), %edx
-	movl	36(%esp), %ecx
-	movl	96(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%ebp, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%edi, 20(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	addl	$76, %esp
+	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
+	addl	$12, %esp
+	leal	32(%edi), %eax
+	leal	64(%ebp), %ecx
+	pushl	%eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
+	addl	$16, %esp
+	movl	52(%edi), %ecx
+	movl	48(%edi), %ebx
+	movl	44(%edi), %ebp
+	movl	40(%edi), %esi
+	movl	32(%edi), %eax
+	movl	36(%edi), %edx
+	addl	(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	4(%edi), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	8(%edi), %esi
+	adcl	12(%edi), %ebp
+	adcl	16(%edi), %ebx
+	adcl	20(%edi), %ecx
+	movl	%ecx, %edx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	56(%edi), %ecx
+	adcl	24(%edi), %ecx
+	movl	60(%edi), %eax
+	adcl	28(%edi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 216(%esp)
+	movl	%ecx, 212(%esp)
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%edx, 208(%esp)
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	%ebx, 204(%esp)
+	movl	%ebp, %edi
+	movl	%ebp, 200(%esp)
+	movl	%esi, 196(%esp)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 192(%esp)
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 188(%esp)
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 184(%esp)
+	movl	%ecx, 180(%esp)
+	movl	%edx, 176(%esp)
+	movl	%ebx, 172(%esp)
+	movl	%edi, 168(%esp)
+	movl	%edi, %ebp
+	movl	%esi, 164(%esp)
+	movl	%esi, %ecx
+	movl	%eax, 160(%esp)
+	movl	%eax, %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 156(%esp)
+	setb	%bl
+	subl	$4, %esp
+	movzbl	%bl, %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	%edi, %esi
+	shll	$31, %esi
+	negl	%edi
+	shrdl	$31, %edi, %esi
+	andl	%eax, %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	andl	%edi, %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	andl	%edi, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	andl	%edi, %ebp
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	andl	%edi, %ebx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	andl	%edi, %eax
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	andl	%edi, %ecx
+	andl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %edx
+	shldl	$1, %ecx, %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	shldl	$1, %eax, %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	shldl	$1, %ebx, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	shldl	$1, %ebp, %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	shldl	$1, %eax, %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	shldl	$1, %ecx, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	shldl	$1, %esi, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	shrl	$31, %edi
+	addl	%esi, %esi
+	leal	160(%esp), %eax
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	pushl	%eax
+	leal	196(%esp), %eax
+	pushl	%eax
+	leal	232(%esp), %eax
+	pushl	%eax
+	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
+	addl	$16, %esp
+	addl	252(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	256(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	260(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	268(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	272(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	276(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	280(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	220(%esp), %ebx
+	movl	304(%esp), %ebp
+	subl	(%ebp), %ebx
+	movl	224(%esp), %eax
+	sbbl	4(%ebp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	228(%esp), %ecx
+	sbbl	8(%ebp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	232(%esp), %ecx
+	sbbl	12(%ebp), %ecx
+	movl	236(%esp), %eax
+	sbbl	16(%ebp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	240(%esp), %edx
+	sbbl	20(%ebp), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	244(%esp), %edx
+	sbbl	24(%ebp), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	248(%esp), %eax
+	sbbl	28(%ebp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	32(%ebp), %eax
+	movl	%eax, 152(%esp)                 # 4-byte Spill
+	sbbl	%eax, %esi
+	movl	36(%ebp), %eax
+	movl	%eax, 144(%esp)                 # 4-byte Spill
+	sbbl	%eax, 8(%esp)                   # 4-byte Folded Spill
+	movl	40(%ebp), %eax
+	movl	%eax, 140(%esp)                 # 4-byte Spill
+	sbbl	%eax, 4(%esp)                   # 4-byte Folded Spill
+	movl	44(%ebp), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	sbbl	%eax, 36(%esp)                  # 4-byte Folded Spill
+	movl	48(%ebp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	sbbl	%eax, 20(%esp)                  # 4-byte Folded Spill
+	movl	52(%ebp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	sbbl	%eax, 24(%esp)                  # 4-byte Folded Spill
+	movl	56(%ebp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	sbbl	%eax, 28(%esp)                  # 4-byte Folded Spill
+	movl	60(%ebp), %eax
+	movl	%eax, 148(%esp)                 # 4-byte Spill
+	sbbl	%eax, 12(%esp)                  # 4-byte Folded Spill
+	sbbl	$0, %edi
+	movl	64(%ebp), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	subl	%eax, %ebx
+	movl	68(%ebp), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	sbbl	%eax, %edx
+	movl	72(%ebp), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	sbbl	%eax, 56(%esp)                  # 4-byte Folded Spill
+	movl	76(%ebp), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	sbbl	%eax, %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	80(%ebp), %ecx
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	sbbl	%ecx, %eax
+	movl	84(%ebp), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 52(%esp)                  # 4-byte Folded Spill
+	movl	88(%ebp), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 48(%esp)                  # 4-byte Folded Spill
+	movl	92(%ebp), %ecx
+	movl	%ecx, 100(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 44(%esp)                  # 4-byte Folded Spill
+	movl	96(%ebp), %ecx
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	sbbl	%ecx, %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	100(%ebp), %ecx
+	movl	%ecx, 136(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 8(%esp)                   # 4-byte Folded Spill
+	movl	104(%ebp), %ecx
+	movl	%ecx, 132(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 4(%esp)                   # 4-byte Folded Spill
+	movl	108(%ebp), %ecx
+	movl	%ecx, 128(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 36(%esp)                  # 4-byte Folded Spill
+	movl	112(%ebp), %ecx
+	movl	%ecx, 124(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 20(%esp)                  # 4-byte Folded Spill
+	movl	116(%ebp), %ecx
+	movl	%ecx, 120(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 24(%esp)                  # 4-byte Folded Spill
+	movl	120(%ebp), %ecx
+	movl	%ecx, 116(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 28(%esp)                  # 4-byte Folded Spill
+	movl	124(%ebp), %ecx
+	movl	%ecx, 112(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	sbbl	$0, %edi
+	addl	152(%esp), %ebx                 # 4-byte Folded Reload
+	movl	%edx, %esi
+	adcl	144(%esp), %esi                 # 4-byte Folded Reload
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	140(%esp), %ecx                 # 4-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	92(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	60(%esp), %edx                  # 4-byte Folded Reload
+	movl	%eax, 52(%ebp)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%ebp)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%ebp)
+	movl	%ecx, 40(%ebp)
+	movl	%esi, 36(%ebp)
+	movl	%ebx, 32(%ebp)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	148(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%edx, 56(%ebp)
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	88(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 60(%ebp)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	108(%esp), %eax                 # 4-byte Folded Reload
+	movl	%edx, 64(%ebp)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 68(%ebp)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	104(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 72(%ebp)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	80(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 76(%ebp)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 80(%ebp)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	72(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 84(%ebp)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 88(%ebp)
+	adcl	96(%esp), %edi                  # 4-byte Folded Reload
+	movl	%eax, 92(%ebp)
+	movl	%edi, 96(%ebp)
+	movl	136(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 100(%ebp)
+	movl	132(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 104(%ebp)
+	movl	128(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 108(%ebp)
+	movl	124(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 112(%ebp)
+	movl	120(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 116(%ebp)
+	movl	116(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 120(%ebp)
+	movl	112(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 124(%ebp)
+	addl	$284, %esp                      # imm = 0x11C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end127:
-	.size	mcl_fp_mulUnitPre9Lbmi2, .Lfunc_end127-mcl_fp_mulUnitPre9Lbmi2
-
-	.globl	mcl_fpDbl_mulPre9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre9Lbmi2,@function
-mcl_fpDbl_mulPre9Lbmi2:                 # @mcl_fpDbl_mulPre9Lbmi2
-# BB#0:
+.Lfunc_end76:
+	.size	mcl_fpDbl_sqrPre16Lbmi2, .Lfunc_end76-mcl_fpDbl_sqrPre16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_mont16Lbmi2              # -- Begin function mcl_fp_mont16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_mont16Lbmi2,@function
+mcl_fp_mont16Lbmi2:                     # @mcl_fp_mont16Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$428, %esp              # imm = 0x1AC
-	calll	.L128$pb
-.L128$pb:
-	popl	%esi
-.Ltmp9:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp9-.L128$pb), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	456(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	%edx, %ebp
-	movl	%esi, %ebx
-	calll	.LmulPv288x32
-	movl	420(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	404(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	400(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	396(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	392(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	388(%esp), %edi
-	movl	448(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	456(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	%ebp, %edx
-	movl	%esi, %ebx
-	calll	.LmulPv288x32
-	addl	344(%esp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	364(%esp), %ebx
-	movl	360(%esp), %edi
-	movl	356(%esp), %esi
-	movl	348(%esp), %ecx
-	movl	352(%esp), %edx
-	movl	448(%esp), %eax
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 4(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	304(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	340(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	336(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	324(%esp), %edi
-	movl	320(%esp), %ebp
-	movl	316(%esp), %esi
-	movl	308(%esp), %ecx
-	movl	312(%esp), %edx
-	movl	448(%esp), %eax
-	movl	40(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	264(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	300(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	296(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	292(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	288(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	284(%esp), %ebx
-	movl	280(%esp), %edi
-	movl	276(%esp), %esi
-	movl	268(%esp), %ecx
-	movl	272(%esp), %edx
-	movl	448(%esp), %eax
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	addl	224(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	248(%esp), %ebx
-	movl	244(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	240(%esp), %edi
-	movl	236(%esp), %ebp
-	movl	228(%esp), %ecx
-	movl	232(%esp), %edx
-	movl	448(%esp), %eax
-	movl	44(%esp), %esi          # 4-byte Reload
-	movl	%esi, 16(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	44(%esp), %eax          # 4-byte Reload
-	addl	184(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	220(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	204(%esp), %edi
-	movl	200(%esp), %ebx
-	movl	196(%esp), %esi
-	movl	188(%esp), %ecx
-	movl	192(%esp), %edx
-	movl	448(%esp), %eax
-	movl	44(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 20(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	addl	144(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	164(%esp), %ebx
-	movl	160(%esp), %edi
-	movl	156(%esp), %esi
-	movl	148(%esp), %ecx
-	movl	152(%esp), %edx
-	movl	448(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	104(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	12(%esp), %esi          # 4-byte Reload
-	addl	104(%esp), %esi
-	movl	140(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	136(%esp), %ebp
-	movl	132(%esp), %edi
-	movl	128(%esp), %ebx
-	movl	124(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	116(%esp), %edx
-	movl	108(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx
-	movl	448(%esp), %eax
-	movl	%esi, 28(%eax)
-	movl	12(%esp), %esi          # 4-byte Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	%esi, %ebp
-	addl	64(%esp), %ebp
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	68(%esp), %edx
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi
-	movl	84(%esp), %ebx
-	movl	80(%esp), %esi
-	movl	76(%esp), %eax
-	movl	448(%esp), %ecx
-	movl	%ebp, 32(%ecx)
-	movl	%edx, 36(%ecx)
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	52(%esp), %edx          # 4-byte Reload
-	movl	%edx, 40(%ecx)
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 48(%ecx)
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 52(%ecx)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 56(%ecx)
-	movl	%eax, 60(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 68(%ecx)
-	addl	$428, %esp              # imm = 0x1AC
-	popl	%esi
-	popl	%edi
+	subl	$2412, %esp                     # imm = 0x96C
+	calll	.L77$pb
+.L77$pb:
 	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end128:
-	.size	mcl_fpDbl_mulPre9Lbmi2, .Lfunc_end128-mcl_fpDbl_mulPre9Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre9Lbmi2,@function
-mcl_fpDbl_sqrPre9Lbmi2:                 # @mcl_fpDbl_sqrPre9Lbmi2
-# BB#0:
+.Ltmp19:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp19-.L77$pb), %ebx
+	movl	2444(%esp), %eax
+	movl	-4(%eax), %esi
+	movl	%esi, 72(%esp)                  # 4-byte Spill
+	movl	2440(%esp), %ecx
+	subl	$4, %esp
+	leal	2348(%esp), %eax
+	pushl	(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	2344(%esp), %edi
+	movl	2348(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	imull	%edi, %eax
+	movl	2408(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	2404(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	2400(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	2396(%esp), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	2392(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	2388(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	2384(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	2380(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	2376(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	2372(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	2368(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	2364(%esp), %ebp
+	movl	2360(%esp), %esi
+	movl	2356(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	2352(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	2276(%esp), %ecx
+	pushl	%eax
+	pushl	2452(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	2272(%esp), %edi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	2276(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	2280(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2284(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	2288(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	2292(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	2296(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2300(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2304(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	2308(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2312(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	2316(%esp), %ebp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	2320(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	2324(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	2328(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	2332(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	2336(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	2204(%esp), %ecx
+	movzbl	%al, %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	2444(%esp), %eax
+	pushl	4(%eax)
+	pushl	2444(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	addl	2200(%esp), %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	2204(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2208(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	2212(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	2216(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	2220(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2224(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2228(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	2232(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2236(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	2240(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	2244(%esp), %edi
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	2248(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	2252(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	2256(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	2260(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	2264(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	2132(%esp), %ebp
+	movl	76(%esp), %edx                  # 4-byte Reload
+	movl	%ecx, %esi
+	imull	%ecx, %edx
+	movzbl	%al, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	2452(%esp)
 	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$428, %esp              # imm = 0x1AC
-	calll	.L129$pb
-.L129$pb:
-	popl	%ebx
-.Ltmp10:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp10-.L129$pb), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	452(%esp), %edx
-	movl	(%edx), %eax
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	%edx, %esi
-	movl	%ebx, %edi
-	calll	.LmulPv288x32
-	movl	420(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	404(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	400(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	396(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	392(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	388(%esp), %ebp
-	movl	448(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	4(%esi), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	%esi, %edx
-	movl	%edi, %ebx
-	calll	.LmulPv288x32
-	addl	344(%esp), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	364(%esp), %ebx
-	movl	360(%esp), %edi
-	movl	356(%esp), %esi
-	movl	348(%esp), %ecx
-	movl	352(%esp), %edx
-	movl	448(%esp), %eax
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 4(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	8(%edx), %eax
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	304(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	340(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	336(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	324(%esp), %edi
-	movl	320(%esp), %ebp
-	movl	316(%esp), %esi
-	movl	308(%esp), %ecx
-	movl	312(%esp), %edx
-	movl	448(%esp), %eax
-	movl	40(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	12(%edx), %eax
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	264(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	300(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	296(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	292(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	288(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	284(%esp), %ebx
-	movl	280(%esp), %edi
-	movl	276(%esp), %esi
-	movl	268(%esp), %ecx
-	movl	272(%esp), %edx
-	movl	448(%esp), %eax
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 20(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	16(%edx), %eax
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	addl	224(%esp), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	248(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	244(%esp), %edi
-	movl	240(%esp), %ebp
-	movl	236(%esp), %esi
-	movl	228(%esp), %ecx
-	movl	232(%esp), %edx
-	movl	448(%esp), %eax
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 16(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	20(%edx), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	184(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	220(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	204(%esp), %ebx
-	movl	200(%esp), %edi
-	movl	196(%esp), %esi
-	movl	188(%esp), %ecx
-	movl	192(%esp), %edx
-	movl	448(%esp), %eax
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	24(%edx), %eax
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	addl	144(%esp), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	164(%esp), %edi
-	movl	160(%esp), %ebp
-	movl	156(%esp), %esi
-	movl	148(%esp), %ecx
-	movl	152(%esp), %edx
-	movl	448(%esp), %eax
-	movl	24(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 24(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	28(%edx), %eax
-	movl	%eax, (%esp)
-	leal	104(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	4(%esp), %esi           # 4-byte Reload
-	addl	104(%esp), %esi
-	movl	140(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	136(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	132(%esp), %ebp
-	movl	128(%esp), %ebx
-	movl	124(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	120(%esp), %edi
-	movl	116(%esp), %edx
-	movl	108(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx
-	movl	448(%esp), %eax
-	movl	%esi, 28(%eax)
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	32(%edx), %eax
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	%esi, %ebp
-	addl	64(%esp), %ebp
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	68(%esp), %edx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi
-	movl	84(%esp), %ebx
-	movl	80(%esp), %esi
-	movl	76(%esp), %eax
-	movl	448(%esp), %ecx
-	movl	%ebp, 32(%ecx)
-	movl	%edx, 36(%ecx)
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	movl	%edx, 40(%ecx)
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 48(%ecx)
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 52(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 56(%ecx)
-	movl	%eax, 60(%ecx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 68(%ecx)
-	addl	$428, %esp              # imm = 0x1AC
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end129:
-	.size	mcl_fpDbl_sqrPre9Lbmi2, .Lfunc_end129-mcl_fpDbl_sqrPre9Lbmi2
-
-	.globl	mcl_fp_mont9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont9Lbmi2,@function
-mcl_fp_mont9Lbmi2:                      # @mcl_fp_mont9Lbmi2
-# BB#0:
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	2128(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	2132(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2136(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	2140(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	2144(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	2148(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2152(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2156(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	2160(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2164(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	2168(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	2172(%esp), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	2176(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	2180(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	2184(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	2188(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	2192(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	$0, 36(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	2060(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	addl	2056(%esp), %ecx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2060(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	2064(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	2068(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	2072(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2076(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2080(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	2084(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2088(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	2092(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	2096(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ebp                  # 4-byte Reload
+	adcl	2100(%esp), %ebp
+	adcl	2104(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	2108(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	2112(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	2116(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	2120(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	movl	%ecx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	2452(%esp)
+	leal	1996(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1984(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1988(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1992(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1996(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	2000(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2004(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2008(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	2012(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2016(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	2020(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	2024(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	adcl	2028(%esp), %ebp
+	movl	%ebp, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	2032(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	2036(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	2040(%esp), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	2044(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	2048(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	subl	$4, %esp
+	leal	1916(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	addl	1912(%esp), %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1916(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1920(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1924(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1928(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	1932(%esp), %edi
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1936(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1940(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1944(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1948(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1952(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1956(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1960(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1964(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	adcl	1968(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1972(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	1976(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	setb	%al
+	leal	1840(%esp), %ebp
+	subl	$4, %esp
+	movl	76(%esp), %edx                  # 4-byte Reload
+	movl	%ecx, %esi
+	imull	%ecx, %edx
+	movzbl	%al, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	pushl	%edx
+	movl	2452(%esp), %eax
+	pushl	%eax
 	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$796, %esp              # imm = 0x31C
-	calll	.L130$pb
-.L130$pb:
-	popl	%ebx
-.Ltmp11:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp11-.L130$pb), %ebx
-	movl	828(%esp), %eax
-	movl	-4(%eax), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	752(%esp), %ebp
-	movl	756(%esp), %esi
-	movl	%ebp, %eax
-	imull	%edi, %eax
-	movl	788(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	784(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	780(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	776(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	772(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	768(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	764(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	760(%esp), %edi
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	712(%esp), %ebp
-	adcl	716(%esp), %esi
-	adcl	720(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	748(%esp), %ebp
-	sbbl	%eax, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	672(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	64(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	672(%esp), %esi
-	adcl	676(%esp), %edi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	680(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	684(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	688(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	696(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	704(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	708(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	828(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	andl	$1, %ebp
-	addl	632(%esp), %esi
-	adcl	636(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	660(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	824(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	592(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	addl	592(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	616(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	624(%esp), %esi
-	adcl	628(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%edi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	andl	$1, %ebp
-	addl	552(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	580(%esp), %edi
-	adcl	584(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	824(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	512(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	512(%esp), %ecx
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	524(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	536(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	544(%esp), %edi
-	adcl	548(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	472(%esp), %ebp
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	480(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	484(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	488(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	492(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	496(%esp), %ebp
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	500(%esp), %esi
-	adcl	504(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	508(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	432(%esp), %ecx
-	movl	820(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	movl	32(%esp), %ecx          # 4-byte Reload
-	addl	432(%esp), %ecx
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	444(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	452(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	456(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	392(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	movl	%esi, %eax
-	andl	$1, %eax
-	addl	392(%esp), %ebp
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	396(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	400(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	404(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	408(%esp), %ebp
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	412(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	416(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	420(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	424(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	428(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	addl	352(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	364(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	372(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	384(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	312(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	316(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	320(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	324(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	328(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	332(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	336(%esp), %esi
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	340(%esp), %edi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	344(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	348(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %ebp
-	movl	824(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	272(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	292(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	296(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	308(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	movl	%edi, %ecx
-	andl	$1, %ecx
-	addl	232(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	240(%esp), %esi
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	244(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	260(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	192(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	192(%esp), %ecx
-	adcl	196(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	200(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	212(%esp), %esi
-	adcl	216(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	152(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	andl	$1, %ebp
-	addl	152(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	164(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	172(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	180(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	112(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	120(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	128(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	132(%esp), %ebp
-	adcl	136(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	72(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	andl	$1, %edi
-	addl	72(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1840(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1844(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1848(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1852(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	1856(%esp), %ebp
+	adcl	1860(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1864(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	1868(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1872(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1876(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1880(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1884(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1888(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1892(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1896(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	1900(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1904(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	$0, 8(%esp)                     # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	16(%eax)
+	pushl	2444(%esp)
+	leal	1780(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	28(%esp), %edx                  # 4-byte Reload
+	addl	1768(%esp), %edx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1772(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1776(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	1780(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1784(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1788(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	1792(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1796(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	adcl	1800(%esp), %ebp
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1804(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1808(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %edi                  # 4-byte Reload
+	adcl	1812(%esp), %edi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1816(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1820(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	1824(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1828(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1832(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
 	movl	%edx, %esi
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	92(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	828(%esp), %ebx
-	subl	(%ebx), %eax
-	movl	%ecx, %edx
-	sbbl	4(%ebx), %edx
-	movl	%esi, %ecx
-	sbbl	8(%ebx), %ecx
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	12(%ebx), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	sbbl	16(%ebx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	sbbl	20(%ebx), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	24(%ebx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	sbbl	28(%ebx), %esi
-	movl	60(%esp), %ebp          # 4-byte Reload
-	sbbl	32(%ebx), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	sbbl	$0, %edi
-	andl	$1, %edi
-	movl	%edi, %ebx
-	jne	.LBB130_2
-# BB#1:
-	movl	%esi, 32(%esp)          # 4-byte Spill
-.LBB130_2:
-	testb	%bl, %bl
-	movl	68(%esp), %esi          # 4-byte Reload
-	jne	.LBB130_4
-# BB#3:
-	movl	%eax, %esi
-.LBB130_4:
-	movl	816(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	64(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_6
-# BB#5:
-	movl	%edx, %eax
-.LBB130_6:
-	movl	%eax, 4(%ebp)
-	movl	52(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_8
-# BB#7:
-	movl	%ecx, %eax
-.LBB130_8:
-	movl	%eax, 8(%ebp)
-	movl	44(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_10
-# BB#9:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB130_10:
-	movl	%eax, 12(%ebp)
-	jne	.LBB130_12
-# BB#11:
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-.LBB130_12:
-	movl	40(%esp), %eax          # 4-byte Reload
-	movl	%eax, 16(%ebp)
-	movl	36(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_14
-# BB#13:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB130_14:
-	movl	%eax, 20(%ebp)
-	movl	48(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_16
-# BB#15:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB130_16:
-	movl	%eax, 24(%ebp)
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 28(%ebp)
-	movl	60(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_18
-# BB#17:
-	movl	56(%esp), %eax          # 4-byte Reload
-.LBB130_18:
-	movl	%eax, 32(%ebp)
-	addl	$796, %esp              # imm = 0x31C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end130:
-	.size	mcl_fp_mont9Lbmi2, .Lfunc_end130-mcl_fp_mont9Lbmi2
-
-	.globl	mcl_fp_montNF9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF9Lbmi2,@function
-mcl_fp_montNF9Lbmi2:                    # @mcl_fp_montNF9Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$796, %esp              # imm = 0x31C
-	calll	.L131$pb
-.L131$pb:
-	popl	%ebx
-.Ltmp12:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp12-.L131$pb), %ebx
-	movl	828(%esp), %eax
-	movl	-4(%eax), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	752(%esp), %esi
-	movl	756(%esp), %ebp
-	movl	%esi, %eax
-	imull	%edi, %eax
-	movl	788(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	784(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	780(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	776(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	772(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	768(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	764(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	760(%esp), %edi
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	712(%esp), %esi
-	adcl	716(%esp), %ebp
-	adcl	720(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	740(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	672(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	708(%esp), %eax
-	addl	672(%esp), %ebp
-	adcl	676(%esp), %edi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	680(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	684(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	688(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	696(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	704(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	632(%esp), %ebp
-	adcl	636(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	656(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	664(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	592(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	628(%esp), %eax
-	addl	592(%esp), %edi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	596(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	600(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	604(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	608(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	612(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	616(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	620(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	624(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%edi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	828(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	addl	552(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	572(%esp), %esi
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	576(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	588(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	512(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	548(%esp), %eax
-	movl	32(%esp), %edx          # 4-byte Reload
-	addl	512(%esp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	516(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	520(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	524(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	528(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	532(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	536(%esp), %ebp
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	540(%esp), %edi
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	544(%esp), %esi
-	adcl	$0, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	movl	32(%esp), %eax          # 4-byte Reload
-	addl	472(%esp), %eax
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	496(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	500(%esp), %edi
-	movl	%edi, %ebp
-	adcl	504(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	432(%esp), %ecx
-	movl	820(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	movl	468(%esp), %eax
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	432(%esp), %ecx
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	436(%esp), %esi
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	440(%esp), %edi
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	444(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	448(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	452(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	456(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	460(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	464(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	392(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	392(%esp), %ebp
-	adcl	396(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	400(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	412(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	424(%esp), %edi
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	428(%esp), %esi
-	movl	824(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	388(%esp), %eax
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	352(%esp), %ecx
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	356(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	360(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	364(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	368(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	372(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	376(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	380(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	384(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	312(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	324(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	340(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	348(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	308(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	272(%esp), %ecx
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	280(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	292(%esp), %ebp
-	adcl	296(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	232(%esp), %edi
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	236(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %edi
-	adcl	252(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	192(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	228(%esp), %ebp
-	movl	%esi, %ecx
-	addl	192(%esp), %ecx
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	196(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	208(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	152(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	152(%esp), %edi
-	adcl	156(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	160(%esp), %edi
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	164(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	188(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	148(%esp), %ebp
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	112(%esp), %ecx
-	adcl	116(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	120(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	128(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	132(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	72(%esp), %ecx
-	movl	828(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	addl	72(%esp), %edi
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebx          # 4-byte Reload
-	adcl	80(%esp), %ebx
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	84(%esp), %edi
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	92(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	108(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	828(%esp), %eax
-	subl	(%eax), %edx
-	sbbl	4(%eax), %ebx
-	movl	%edi, %ecx
-	sbbl	8(%eax), %ecx
-	movl	52(%esp), %esi          # 4-byte Reload
-	sbbl	12(%eax), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	40(%esp), %esi          # 4-byte Reload
-	sbbl	16(%eax), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	20(%eax), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	24(%eax), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	sbbl	28(%eax), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	sbbl	32(%eax), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	sarl	$31, %ebp
-	testl	%ebp, %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	js	.LBB131_2
-# BB#1:
-	movl	%edx, %eax
-.LBB131_2:
-	movl	816(%esp), %edx
-	movl	%eax, (%edx)
-	movl	64(%esp), %esi          # 4-byte Reload
-	js	.LBB131_4
-# BB#3:
-	movl	%ebx, %esi
-.LBB131_4:
-	movl	%esi, 4(%edx)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB131_6
-# BB#5:
-	movl	%ecx, %edi
-.LBB131_6:
-	movl	%edi, 8(%edx)
-	js	.LBB131_8
-# BB#7:
-	movl	16(%esp), %ebp          # 4-byte Reload
-.LBB131_8:
-	movl	%ebp, 12(%edx)
-	js	.LBB131_10
-# BB#9:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB131_10:
-	movl	%eax, 16(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB131_12
-# BB#11:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB131_12:
-	movl	%eax, 20(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB131_14
-# BB#13:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB131_14:
-	movl	%eax, 24(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB131_16
-# BB#15:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB131_16:
-	movl	%eax, 28(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB131_18
-# BB#17:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB131_18:
-	movl	%eax, 32(%edx)
-	addl	$796, %esp              # imm = 0x31C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end131:
-	.size	mcl_fp_montNF9Lbmi2, .Lfunc_end131-mcl_fp_montNF9Lbmi2
-
-	.globl	mcl_fp_montRed9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed9Lbmi2,@function
-mcl_fp_montRed9Lbmi2:                   # @mcl_fp_montRed9Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$492, %esp              # imm = 0x1EC
-	calll	.L132$pb
-.L132$pb:
-	popl	%ebx
-.Ltmp13:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp13-.L132$pb), %ebx
-	movl	520(%esp), %edx
-	movl	-4(%edx), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	516(%esp), %eax
-	movl	(%eax), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	4(%eax), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	imull	%edi, %ecx
-	movl	68(%eax), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	64(%eax), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	60(%eax), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	56(%eax), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	52(%eax), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	48(%eax), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	44(%eax), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	40(%eax), %edi
-	movl	%edi, 124(%esp)         # 4-byte Spill
-	movl	36(%eax), %edi
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	movl	32(%eax), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	28(%eax), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	24(%eax), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	20(%eax), %ebp
-	movl	16(%eax), %edi
-	movl	12(%eax), %esi
-	movl	8(%eax), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	(%edx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	28(%edx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%edx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	20(%edx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	16(%edx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	12(%edx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	8(%edx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	4(%edx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, (%esp)
-	leal	448(%esp), %ecx
-	calll	.LmulPv288x32
-	movl	76(%esp), %eax          # 4-byte Reload
-	addl	448(%esp), %eax
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	452(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	460(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	464(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	468(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	sbbl	%eax, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	408(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	movl	76(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	408(%esp), %ecx
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	412(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	416(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	420(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	424(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	428(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	432(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	436(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	440(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebp
-	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	addl	368(%esp), %edi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	372(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	404(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	addl	328(%esp), %ebp
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	332(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	364(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	288(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	movl	64(%esp), %eax          # 4-byte Reload
-	addl	288(%esp), %eax
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	292(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	520(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	addl	248(%esp), %esi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	252(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	264(%esp), %ebp
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%edi, %esi
-	adcl	$0, %esi
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	addl	208(%esp), %edi
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	212(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	220(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	520(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	addl	168(%esp), %ebp
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	172(%esp), %ecx
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebp         # 4-byte Reload
-	adcl	180(%esp), %ebp
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	184(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	addl	128(%esp), %edi
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	%eax, %edi
-	adcl	136(%esp), %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	adcl	140(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	144(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	%eax, %ebx
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	164(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	subl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	124(%esp), %eax         # 4-byte Reload
-	sbbl	16(%esp), %eax          # 4-byte Folded Reload
-	sbbl	24(%esp), %esi          # 4-byte Folded Reload
-	sbbl	28(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebx         # 4-byte Reload
-	sbbl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebx         # 4-byte Reload
-	sbbl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 92(%esp)          # 4-byte Spill
-	movl	%edx, %ebx
-	movl	%ebp, %edx
-	sbbl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 104(%esp)         # 4-byte Spill
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB132_2
-# BB#1:
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-.LBB132_2:
-	testb	%dl, %dl
-	movl	120(%esp), %ecx         # 4-byte Reload
-	jne	.LBB132_4
-# BB#3:
-	movl	%edi, %ecx
-.LBB132_4:
-	movl	512(%esp), %edi
-	movl	%ecx, (%edi)
-	movl	88(%esp), %ecx          # 4-byte Reload
-	jne	.LBB132_6
-# BB#5:
-	movl	%eax, 124(%esp)         # 4-byte Spill
-.LBB132_6:
-	movl	124(%esp), %eax         # 4-byte Reload
-	movl	%eax, 4(%edi)
-	movl	96(%esp), %eax          # 4-byte Reload
-	jne	.LBB132_8
-# BB#7:
-	movl	%esi, %eax
-.LBB132_8:
-	movl	%eax, 8(%edi)
-	movl	116(%esp), %eax         # 4-byte Reload
-	movl	%eax, 12(%edi)
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	108(%esp), %ebp         # 4-byte Reload
-	jne	.LBB132_10
-# BB#9:
-	movl	72(%esp), %ebp          # 4-byte Reload
-.LBB132_10:
-	movl	%ebp, 16(%edi)
-	movl	112(%esp), %ebx         # 4-byte Reload
-	jne	.LBB132_12
-# BB#11:
-	movl	76(%esp), %ebx          # 4-byte Reload
-.LBB132_12:
-	movl	%ebx, 20(%edi)
-	movl	100(%esp), %esi         # 4-byte Reload
-	jne	.LBB132_14
-# BB#13:
-	movl	84(%esp), %esi          # 4-byte Reload
-.LBB132_14:
-	movl	%esi, 24(%edi)
-	jne	.LBB132_16
-# BB#15:
-	movl	92(%esp), %ecx          # 4-byte Reload
-.LBB132_16:
-	movl	%ecx, 28(%edi)
-	jne	.LBB132_18
-# BB#17:
-	movl	104(%esp), %eax         # 4-byte Reload
-.LBB132_18:
-	movl	%eax, 32(%edi)
-	addl	$492, %esp              # imm = 0x1EC
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end132:
-	.size	mcl_fp_montRed9Lbmi2, .Lfunc_end132-mcl_fp_montRed9Lbmi2
-
-	.globl	mcl_fp_addPre9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre9Lbmi2,@function
-mcl_fp_addPre9Lbmi2:                    # @mcl_fp_addPre9Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	%esi, 24(%ebx)
-	movl	%edx, 28(%ebx)
-	movl	32(%eax), %eax
-	movl	32(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 32(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end133:
-	.size	mcl_fp_addPre9Lbmi2, .Lfunc_end133-mcl_fp_addPre9Lbmi2
-
-	.globl	mcl_fp_subPre9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre9Lbmi2,@function
-mcl_fp_subPre9Lbmi2:                    # @mcl_fp_subPre9Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	%edi, 24(%ebp)
-	movl	%esi, 28(%ebp)
-	movl	32(%edx), %edx
-	movl	32(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 32(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end134:
-	.size	mcl_fp_subPre9Lbmi2, .Lfunc_end134-mcl_fp_subPre9Lbmi2
-
-	.globl	mcl_fp_shr1_9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_9Lbmi2,@function
-mcl_fp_shr1_9Lbmi2:                     # @mcl_fp_shr1_9Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	8(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 4(%esi)
-	movl	12(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 8(%esi)
-	movl	16(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%esi)
-	movl	20(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 16(%esi)
-	movl	24(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 20(%esi)
-	movl	28(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 24(%esi)
-	movl	32(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 28(%esi)
-	shrl	%eax
-	movl	%eax, 32(%esi)
-	popl	%esi
-	retl
-.Lfunc_end135:
-	.size	mcl_fp_shr1_9Lbmi2, .Lfunc_end135-mcl_fp_shr1_9Lbmi2
-
-	.globl	mcl_fp_add9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add9Lbmi2,@function
-mcl_fp_add9Lbmi2:                       # @mcl_fp_add9Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$20, %esp
-	movl	48(%esp), %edi
-	movl	(%edi), %ecx
-	movl	4(%edi), %eax
-	movl	44(%esp), %ebx
-	addl	(%ebx), %ecx
-	movl	%ecx, %ebp
-	adcl	4(%ebx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	8(%edi), %eax
-	adcl	8(%ebx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	12(%ebx), %ecx
-	movl	16(%ebx), %eax
-	adcl	12(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	16(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	20(%ebx), %esi
-	adcl	20(%edi), %esi
-	movl	24(%ebx), %edx
-	adcl	24(%edi), %edx
-	movl	28(%ebx), %ecx
-	adcl	28(%edi), %ecx
-	movl	32(%ebx), %eax
-	adcl	32(%edi), %eax
-	movl	40(%esp), %edi
-	movl	%ebp, (%edi)
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%edi)
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%edi)
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 12(%edi)
-	movl	4(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 16(%edi)
-	movl	%esi, 20(%edi)
-	movl	%edx, 24(%edi)
-	movl	%ecx, 28(%edi)
-	movl	%eax, 32(%edi)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	52(%esp), %edi
-	subl	(%edi), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	movl	16(%esp), %ebp          # 4-byte Reload
-	sbbl	4(%edi), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebp          # 4-byte Reload
-	sbbl	8(%edi), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %ebp           # 4-byte Reload
-	sbbl	12(%edi), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	4(%esp), %ebp           # 4-byte Reload
-	sbbl	16(%edi), %ebp
-	sbbl	20(%edi), %esi
-	sbbl	24(%edi), %edx
-	sbbl	28(%edi), %ecx
-	sbbl	32(%edi), %eax
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB136_2
-# BB#1:                                 # %nocarry
-	movl	(%esp), %edi            # 4-byte Reload
-	movl	40(%esp), %ebx
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	%ebp, 16(%ebx)
-	movl	%esi, 20(%ebx)
-	movl	%edx, 24(%ebx)
-	movl	%ecx, 28(%ebx)
-	movl	%eax, 32(%ebx)
-.LBB136_2:                              # %carry
-	addl	$20, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end136:
-	.size	mcl_fp_add9Lbmi2, .Lfunc_end136-mcl_fp_add9Lbmi2
-
-	.globl	mcl_fp_addNF9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF9Lbmi2,@function
-mcl_fp_addNF9Lbmi2:                     # @mcl_fp_addNF9Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$72, %esp
-	movl	100(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edi
-	movl	96(%esp), %esi
-	addl	(%esi), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	4(%esi), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	28(%eax), %ebp
-	movl	24(%eax), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	20(%eax), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	16(%eax), %ebx
-	movl	12(%eax), %edx
-	movl	8(%eax), %ecx
-	adcl	8(%esi), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	12(%esi), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	16(%esi), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	20(%esi), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	24(%esi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	28(%esi), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	32(%esi), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ebp
-	subl	(%esi), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	sbbl	4(%esi), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	sbbl	8(%esi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%esi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	sbbl	16(%esi), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%esi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%esi), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%esi), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %edx
-	movl	%ecx, %ebp
-	sbbl	32(%esi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	sarl	$31, %esi
-	testl	%esi, %esi
-	js	.LBB137_2
-# BB#1:
-	movl	(%esp), %eax            # 4-byte Reload
-.LBB137_2:
-	movl	92(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	js	.LBB137_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB137_4:
-	movl	%eax, 4(%ecx)
-	movl	68(%esp), %esi          # 4-byte Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB137_6
-# BB#5:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB137_6:
-	movl	%eax, 8(%ecx)
-	movl	%ebp, %eax
-	js	.LBB137_8
-# BB#7:
-	movl	12(%esp), %edx          # 4-byte Reload
-.LBB137_8:
-	movl	%edx, 12(%ecx)
-	movl	56(%esp), %edx          # 4-byte Reload
-	js	.LBB137_10
-# BB#9:
-	movl	16(%esp), %ebx          # 4-byte Reload
-.LBB137_10:
-	movl	%ebx, 16(%ecx)
-	js	.LBB137_12
-# BB#11:
-	movl	20(%esp), %edi          # 4-byte Reload
-.LBB137_12:
-	movl	%edi, 20(%ecx)
-	js	.LBB137_14
-# BB#13:
-	movl	24(%esp), %esi          # 4-byte Reload
-.LBB137_14:
-	movl	%esi, 24(%ecx)
-	js	.LBB137_16
-# BB#15:
-	movl	28(%esp), %edx          # 4-byte Reload
-.LBB137_16:
-	movl	%edx, 28(%ecx)
-	js	.LBB137_18
-# BB#17:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB137_18:
-	movl	%eax, 32(%ecx)
-	addl	$72, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end137:
-	.size	mcl_fp_addNF9Lbmi2, .Lfunc_end137-mcl_fp_addNF9Lbmi2
-
-	.globl	mcl_fp_sub9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub9Lbmi2,@function
-mcl_fp_sub9Lbmi2:                       # @mcl_fp_sub9Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$28, %esp
-	movl	52(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	56(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	16(%esi), %edx
-	sbbl	16(%edi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	20(%esi), %ecx
-	sbbl	20(%edi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	28(%esi), %ebp
-	sbbl	28(%edi), %ebp
-	movl	32(%esi), %esi
-	sbbl	32(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	48(%esp), %ebx
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	%edx, 16(%ebx)
-	movl	%ecx, 20(%ebx)
-	movl	%eax, 24(%ebx)
-	movl	%ebp, 28(%ebx)
-	movl	%esi, 32(%ebx)
-	je	.LBB138_2
-# BB#1:                                 # %carry
-	movl	%esi, %edi
-	movl	60(%esp), %esi
-	movl	12(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esi), %ecx
-	movl	12(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	%ecx, 24(%ebx)
-	movl	28(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 28(%ebx)
-	movl	32(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 32(%ebx)
-.LBB138_2:                              # %nocarry
-	addl	$28, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end138:
-	.size	mcl_fp_sub9Lbmi2, .Lfunc_end138-mcl_fp_sub9Lbmi2
-
-	.globl	mcl_fp_subNF9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF9Lbmi2,@function
-mcl_fp_subNF9Lbmi2:                     # @mcl_fp_subNF9Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$48, %esp
-	movl	72(%esp), %edx
-	movl	(%edx), %ecx
-	movl	4(%edx), %eax
-	movl	76(%esp), %esi
-	subl	(%esi), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	sbbl	4(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	28(%edx), %ebp
-	movl	24(%edx), %edi
-	movl	20(%edx), %ebx
-	movl	16(%edx), %ecx
-	movl	12(%edx), %eax
-	movl	8(%edx), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	sbbl	12(%esi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	sbbl	16(%esi), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	sbbl	20(%esi), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	sbbl	24(%esi), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	sbbl	28(%esi), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	sbbl	32(%esi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	sarl	$31, %ecx
-	movl	%ecx, %edi
-	shldl	$1, %eax, %edi
-	movl	80(%esp), %ebp
-	movl	12(%ebp), %eax
-	andl	%edi, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	4(%ebp), %ebx
-	andl	%edi, %ebx
-	andl	(%ebp), %edi
-	movl	32(%ebp), %eax
-	andl	%ecx, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	rorxl	$31, %ecx, %eax
-	andl	28(%ebp), %ecx
-	movl	24(%ebp), %edx
-	andl	%eax, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	20(%ebp), %esi
-	andl	%eax, %esi
-	movl	16(%ebp), %edx
-	andl	%eax, %edx
-	andl	8(%ebp), %eax
-	addl	36(%esp), %edi          # 4-byte Folded Reload
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	68(%esp), %ebp
-	movl	%edi, (%ebp)
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, 4(%ebp)
-	movl	4(%esp), %edi           # 4-byte Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%eax, 8(%ebp)
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, 12(%ebp)
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 16(%ebp)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 20(%ebp)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 24(%ebp)
-	movl	%ecx, 28(%ebp)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ebp)
-	addl	$48, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end139:
-	.size	mcl_fp_subNF9Lbmi2, .Lfunc_end139-mcl_fp_subNF9Lbmi2
-
-	.globl	mcl_fpDbl_add9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add9Lbmi2,@function
-mcl_fpDbl_add9Lbmi2:                    # @mcl_fpDbl_add9Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$68, %esp
-	movl	96(%esp), %edx
-	movl	92(%esp), %edi
-	movl	12(%edi), %esi
-	movl	16(%edi), %ecx
-	movl	8(%edx), %ebx
-	movl	(%edx), %ebp
-	addl	(%edi), %ebp
-	movl	88(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%edx), %ebp
-	adcl	4(%edi), %ebp
-	adcl	8(%edi), %ebx
-	adcl	12(%edx), %esi
-	adcl	16(%edx), %ecx
-	movl	%ebp, 4(%eax)
-	movl	44(%edx), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	%ebx, 8(%eax)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%eax)
-	movl	20(%edi), %esi
-	adcl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%ecx, 16(%eax)
-	movl	24(%edi), %ecx
-	adcl	%ebx, %ecx
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%eax)
-	movl	28(%edi), %esi
-	adcl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%ecx, 24(%eax)
-	movl	32(%edi), %ecx
-	adcl	%ebx, %ecx
-	movl	36(%edx), %ebp
-	movl	%esi, 28(%eax)
-	movl	36(%edi), %esi
-	adcl	%ebp, %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	40(%edx), %esi
-	movl	%ecx, 32(%eax)
-	movl	40(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%edi), %eax
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	48(%edi), %ebx
-	adcl	%ecx, %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	52(%edx), %eax
-	movl	52(%edi), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	56(%edx), %esi
-	movl	56(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%edx), %ebp
-	movl	60(%edi), %esi
-	adcl	%ebp, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	64(%edx), %eax
-	movl	64(%edi), %ebp
-	adcl	%eax, %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	68(%edx), %edx
-	movl	68(%edi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	100(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	subl	(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	4(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	sbbl	8(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	12(%edi), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	sbbl	16(%edi), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	20(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	movl	32(%esp), %ebp          # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %ebx
-	sbbl	32(%edi), %ebx
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB140_2
-# BB#1:
-	movl	%ebx, %ebp
-.LBB140_2:
-	testb	%dl, %dl
-	movl	60(%esp), %edx          # 4-byte Reload
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	36(%esp), %esi          # 4-byte Reload
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	52(%esp), %ebx          # 4-byte Reload
-	jne	.LBB140_4
-# BB#3:
-	movl	(%esp), %ecx            # 4-byte Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	16(%esp), %edx          # 4-byte Reload
-.LBB140_4:
-	movl	88(%esp), %eax
-	movl	%edx, 36(%eax)
-	movl	%ebx, 40(%eax)
-	movl	%edi, 44(%eax)
-	movl	%esi, 48(%eax)
-	movl	%ecx, 52(%eax)
-	movl	44(%esp), %edx          # 4-byte Reload
-	movl	64(%esp), %ecx          # 4-byte Reload
-	jne	.LBB140_6
-# BB#5:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB140_6:
-	movl	%ecx, 56(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	jne	.LBB140_8
-# BB#7:
-	movl	24(%esp), %edx          # 4-byte Reload
-.LBB140_8:
-	movl	%edx, 60(%eax)
-	jne	.LBB140_10
-# BB#9:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB140_10:
-	movl	%ecx, 64(%eax)
-	movl	%ebp, 68(%eax)
-	addl	$68, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end140:
-	.size	mcl_fpDbl_add9Lbmi2, .Lfunc_end140-mcl_fpDbl_add9Lbmi2
-
-	.globl	mcl_fpDbl_sub9Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub9Lbmi2,@function
-mcl_fpDbl_sub9Lbmi2:                    # @mcl_fpDbl_sub9Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$52, %esp
-	movl	76(%esp), %ebx
-	movl	(%ebx), %eax
-	movl	4(%ebx), %edx
-	movl	80(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %edx
-	movl	8(%ebx), %esi
-	sbbl	8(%ebp), %esi
-	movl	72(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%ebx), %eax
-	sbbl	12(%ebp), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%ebx), %edx
-	sbbl	16(%ebp), %edx
-	movl	%esi, 8(%ecx)
-	movl	20(%ebp), %esi
-	movl	%eax, 12(%ecx)
-	movl	20(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	24(%ebp), %esi
-	movl	%edx, 16(%ecx)
-	movl	24(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	28(%ebp), %esi
-	movl	%eax, 20(%ecx)
-	movl	28(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	32(%ebp), %esi
-	movl	%edx, 24(%ecx)
-	movl	32(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	36(%ebp), %esi
-	movl	%eax, 28(%ecx)
-	movl	36(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%ebp), %eax
-	movl	%edx, 32(%ecx)
-	movl	40(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	44(%ebp), %eax
-	movl	44(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	48(%ebp), %eax
-	movl	48(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	52(%ebp), %eax
-	movl	52(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	56(%ebp), %eax
-	movl	56(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	60(%ebp), %eax
-	movl	60(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	64(%ebp), %eax
-	movl	64(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	68(%ebp), %eax
-	movl	68(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	84(%esp), %ebp
-	jne	.LBB141_1
-# BB#2:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB141_3
-.LBB141_1:
-	movl	32(%ebp), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-.LBB141_3:
-	testb	%al, %al
-	jne	.LBB141_4
-# BB#5:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	movl	$0, %esi
-	jmp	.LBB141_6
-.LBB141_4:
-	movl	(%ebp), %esi
-	movl	4(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB141_6:
-	jne	.LBB141_7
-# BB#8:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB141_9
-.LBB141_7:
-	movl	28(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB141_9:
-	jne	.LBB141_10
-# BB#11:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB141_12
-.LBB141_10:
-	movl	24(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB141_12:
-	jne	.LBB141_13
-# BB#14:
-	movl	$0, %edi
-	jmp	.LBB141_15
-.LBB141_13:
-	movl	20(%ebp), %edi
-.LBB141_15:
-	jne	.LBB141_16
-# BB#17:
-	movl	$0, %ebx
-	jmp	.LBB141_18
-.LBB141_16:
-	movl	16(%ebp), %ebx
-.LBB141_18:
-	jne	.LBB141_19
-# BB#20:
-	movl	%ebp, %eax
-	movl	$0, %ebp
-	jmp	.LBB141_21
-.LBB141_19:
-	movl	%ebp, %eax
-	movl	12(%eax), %ebp
-.LBB141_21:
-	jne	.LBB141_22
-# BB#23:
-	xorl	%eax, %eax
-	jmp	.LBB141_24
-.LBB141_22:
-	movl	8(%eax), %eax
-.LBB141_24:
-	addl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 36(%ecx)
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 40(%ecx)
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 48(%ecx)
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 52(%ecx)
-	movl	(%esp), %edx            # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, 56(%ecx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 60(%ecx)
-	movl	%eax, 64(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%ecx)
-	addl	$52, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end141:
-	.size	mcl_fpDbl_sub9Lbmi2, .Lfunc_end141-mcl_fpDbl_sub9Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv320x32,@function
-.LmulPv320x32:                          # @mulPv320x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$28, %esp
-	movl	%edx, %eax
-	movl	48(%esp), %edx
-	mulxl	4(%eax), %edi, %esi
-	mulxl	(%eax), %ebp, %ebx
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	addl	%edi, %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	mulxl	8(%eax), %edi, %ebx
-	adcl	%esi, %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	mulxl	12(%eax), %esi, %edi
-	adcl	%ebx, %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	mulxl	16(%eax), %esi, %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	adcl	%edi, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	%edx, %ebp
-	mulxl	20(%eax), %ebx, %esi
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	mulxl	24(%eax), %edi, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	%esi, %edi
-	movl	%ebp, %edx
-	mulxl	28(%eax), %esi, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	%ebp, %edx
-	mulxl	32(%eax), %edx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	24(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ecx)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ecx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%ecx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%ecx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 16(%ecx)
-	movl	%ebx, 20(%ecx)
-	movl	%edi, 24(%ecx)
-	movl	%esi, 28(%ecx)
-	movl	%edx, 32(%ecx)
-	movl	48(%esp), %edx
-	mulxl	36(%eax), %eax, %edx
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	adcl	$0, %edx
-	movl	%edx, 40(%ecx)
-	movl	%ecx, %eax
-	addl	$28, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end142:
-	.size	.LmulPv320x32, .Lfunc_end142-.LmulPv320x32
-
-	.globl	mcl_fp_mulUnitPre10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre10Lbmi2,@function
-mcl_fp_mulUnitPre10Lbmi2:               # @mcl_fp_mulUnitPre10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$76, %esp
-	calll	.L143$pb
-.L143$pb:
-	popl	%ebx
-.Ltmp14:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp14-.L143$pb), %ebx
-	movl	104(%esp), %eax
-	movl	%eax, (%esp)
-	leal	32(%esp), %ecx
-	movl	100(%esp), %edx
-	calll	.LmulPv320x32
-	movl	72(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx
-	movl	48(%esp), %ebp
-	movl	44(%esp), %edi
-	movl	40(%esp), %esi
-	movl	32(%esp), %edx
-	movl	36(%esp), %ecx
-	movl	96(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebp, 16(%eax)
-	movl	%ebx, 20(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	addl	$76, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end143:
-	.size	mcl_fp_mulUnitPre10Lbmi2, .Lfunc_end143-mcl_fp_mulUnitPre10Lbmi2
-
-	.globl	mcl_fpDbl_mulPre10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre10Lbmi2,@function
-mcl_fpDbl_mulPre10Lbmi2:                # @mcl_fpDbl_mulPre10Lbmi2
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$188, %esp
-	calll	.L144$pb
-.L144$pb:
-	popl	%ebx
-.Ltmp15:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp15-.L144$pb), %ebx
-	movl	%ebx, -128(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	12(%ebp), %esi
-	movl	%esi, 4(%esp)
-	movl	8(%ebp), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre5Lbmi2@PLT
-	leal	20(%edi), %eax
-	movl	%eax, 8(%esp)
-	leal	20(%esi), %eax
-	movl	%eax, 4(%esp)
-	movl	8(%ebp), %eax
-	leal	40(%eax), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre5Lbmi2@PLT
-	movl	28(%esi), %edi
-	movl	(%esi), %ebx
-	movl	4(%esi), %eax
-	addl	20(%esi), %ebx
-	movl	%ebx, -148(%ebp)        # 4-byte Spill
-	adcl	24(%esi), %eax
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-	adcl	8(%esi), %edi
-	movl	%edi, -140(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -96(%ebp)         # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	addl	20(%esi), %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	adcl	24(%esi), %ecx
-	movl	%ecx, -120(%ebp)        # 4-byte Spill
-	movl	28(%esi), %eax
-	adcl	8(%esi), %eax
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	movl	32(%esi), %eax
-	adcl	12(%esi), %eax
-	movl	36(%esi), %ecx
-	adcl	16(%esi), %ecx
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %esi
-	popl	%eax
-	movl	%esi, -156(%ebp)        # 4-byte Spill
-	movl	%ebx, -124(%ebp)        # 4-byte Spill
-	jb	.LBB144_2
-# BB#1:
-	xorl	%edi, %edi
-	movl	$0, -124(%ebp)          # 4-byte Folded Spill
-.LBB144_2:
-	movl	%edi, -136(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %esi
-	movl	%esi, %ebx
-	movl	36(%ebx), %esi
-	movl	32(%ebx), %edi
-	movl	-96(%ebp), %edx         # 4-byte Reload
-	pushl	%eax
-	movl	%edx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	12(%ebx), %edi
-	movl	%edi, -116(%ebp)        # 4-byte Spill
-	adcl	16(%ebx), %esi
-	movl	%esi, -144(%ebp)        # 4-byte Spill
-	movl	%ecx, -112(%ebp)        # 4-byte Spill
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	movl	-160(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -108(%ebp)        # 4-byte Spill
-	movl	-120(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -96(%ebp)         # 4-byte Spill
-	movl	-152(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -100(%ebp)        # 4-byte Spill
-	jb	.LBB144_4
-# BB#3:
-	movl	$0, -112(%ebp)          # 4-byte Folded Spill
-	movl	$0, -104(%ebp)          # 4-byte Folded Spill
-	movl	$0, -108(%ebp)          # 4-byte Folded Spill
-	movl	$0, -96(%ebp)           # 4-byte Folded Spill
-	movl	$0, -100(%ebp)          # 4-byte Folded Spill
-.LBB144_4:
-	movl	-148(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -72(%ebp)
-	movl	-132(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -68(%ebp)
-	movl	-140(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -64(%ebp)
-	movl	%ebx, -92(%ebp)
-	movl	-120(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -88(%ebp)
-	movl	%edx, -84(%ebp)
-	movl	%eax, -80(%ebp)
-	movl	%ecx, -76(%ebp)
-	sbbl	%edx, %edx
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -60(%ebp)
-	movl	-144(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -56(%ebp)
-	movl	-156(%ebp), %ecx        # 4-byte Reload
-	pushl	%eax
-	movl	%ecx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB144_6
-# BB#5:
-	movl	$0, %ebx
-	movl	$0, %eax
-	movl	$0, %edi
-.LBB144_6:
-	movl	%eax, -116(%ebp)        # 4-byte Spill
-	sbbl	%eax, %eax
-	leal	-92(%ebp), %ecx
-	movl	%ecx, 8(%esp)
-	leal	-72(%ebp), %ecx
-	movl	%ecx, 4(%esp)
-	leal	-52(%ebp), %ecx
-	movl	%ecx, (%esp)
-	andl	%eax, %edx
-	movl	-124(%ebp), %eax        # 4-byte Reload
-	addl	%eax, -100(%ebp)        # 4-byte Folded Spill
-	adcl	%edi, -96(%ebp)         # 4-byte Folded Spill
-	movl	-108(%ebp), %esi        # 4-byte Reload
-	adcl	-136(%ebp), %esi        # 4-byte Folded Reload
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -104(%ebp)        # 4-byte Folded Spill
-	movl	-112(%ebp), %edi        # 4-byte Reload
-	adcl	%ebx, %edi
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, -120(%ebp)        # 4-byte Spill
-	andl	$1, %edx
-	movl	%edx, -116(%ebp)        # 4-byte Spill
-	movl	-128(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre5Lbmi2@PLT
-	movl	-100(%ebp), %eax        # 4-byte Reload
-	addl	-32(%ebp), %eax
-	movl	%eax, -100(%ebp)        # 4-byte Spill
-	movl	-96(%ebp), %eax         # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -96(%ebp)         # 4-byte Spill
-	adcl	-24(%ebp), %esi
-	movl	%esi, -108(%ebp)        # 4-byte Spill
-	movl	-104(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	adcl	-16(%ebp), %edi
-	movl	%edi, -112(%ebp)        # 4-byte Spill
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -116(%ebp)        # 4-byte Folded Spill
-	movl	-52(%ebp), %ecx
-	movl	8(%ebp), %esi
-	subl	(%esi), %ecx
-	movl	-48(%ebp), %ebx
-	sbbl	4(%esi), %ebx
-	movl	-44(%ebp), %eax
-	sbbl	8(%esi), %eax
-	movl	%eax, -120(%ebp)        # 4-byte Spill
-	movl	-40(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	-36(%ebp), %edi
-	sbbl	16(%esi), %edi
-	movl	20(%esi), %eax
-	movl	%eax, -124(%ebp)        # 4-byte Spill
-	sbbl	%eax, -100(%ebp)        # 4-byte Folded Spill
-	movl	24(%esi), %eax
-	movl	%eax, -128(%ebp)        # 4-byte Spill
-	sbbl	%eax, -96(%ebp)         # 4-byte Folded Spill
-	movl	28(%esi), %eax
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-	sbbl	%eax, -108(%ebp)        # 4-byte Folded Spill
-	movl	32(%esi), %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	sbbl	%eax, -104(%ebp)        # 4-byte Folded Spill
-	movl	36(%esi), %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-	sbbl	%eax, -112(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, -116(%ebp)          # 4-byte Folded Spill
-	movl	40(%esi), %eax
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	subl	%eax, %ecx
-	movl	44(%esi), %eax
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ebx
-	movl	48(%esi), %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	sbbl	%eax, -120(%ebp)        # 4-byte Folded Spill
-	movl	52(%esi), %eax
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edx
-	movl	56(%esi), %eax
-	movl	%eax, -176(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	60(%esi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	sbbl	%eax, -100(%ebp)        # 4-byte Folded Spill
-	movl	64(%esi), %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	sbbl	%eax, -96(%ebp)         # 4-byte Folded Spill
-	movl	68(%esi), %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	sbbl	%eax, -108(%ebp)        # 4-byte Folded Spill
-	movl	72(%esi), %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	sbbl	%eax, -104(%ebp)        # 4-byte Folded Spill
-	movl	76(%esi), %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	sbbl	%eax, -112(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, -116(%ebp)          # 4-byte Folded Spill
-	addl	-124(%ebp), %ecx        # 4-byte Folded Reload
-	adcl	-128(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%ecx, 20(%esi)
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	-132(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 24(%esi)
-	adcl	-136(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 28(%esi)
-	adcl	-140(%ebp), %edi        # 4-byte Folded Reload
-	movl	%edx, 32(%esi)
-	movl	-100(%ebp), %eax        # 4-byte Reload
-	adcl	-160(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edi, 36(%esi)
-	movl	-96(%ebp), %ecx         # 4-byte Reload
-	adcl	-164(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 40(%esi)
-	movl	-108(%ebp), %eax        # 4-byte Reload
-	adcl	-168(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 44(%esi)
-	movl	-104(%ebp), %ecx        # 4-byte Reload
-	adcl	-172(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 48(%esi)
-	movl	-112(%ebp), %edx        # 4-byte Reload
-	adcl	-176(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	-180(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edx, 56(%esi)
-	movl	%eax, 60(%esi)
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 64(%esi)
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 68(%esi)
-	movl	-152(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 72(%esi)
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 76(%esi)
-	addl	$188, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end144:
-	.size	mcl_fpDbl_mulPre10Lbmi2, .Lfunc_end144-mcl_fpDbl_mulPre10Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre10Lbmi2,@function
-mcl_fpDbl_sqrPre10Lbmi2:                # @mcl_fpDbl_sqrPre10Lbmi2
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$188, %esp
-	calll	.L145$pb
-.L145$pb:
-	popl	%ebx
-.Ltmp16:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp16-.L145$pb), %ebx
-	movl	%ebx, -120(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %esi
-	movl	%esi, (%esp)
-	calll	mcl_fpDbl_mulPre5Lbmi2@PLT
-	leal	20(%edi), %eax
-	movl	%eax, 8(%esp)
-	movl	%eax, 4(%esp)
-	leal	40(%esi), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre5Lbmi2@PLT
-	movl	36(%edi), %eax
-	movl	32(%edi), %ebx
-	movl	28(%edi), %esi
-	movl	(%edi), %ecx
-	movl	4(%edi), %edx
-	addl	20(%edi), %ecx
-	adcl	24(%edi), %edx
-	adcl	8(%edi), %esi
-	adcl	12(%edi), %ebx
-	movl	%ebx, -124(%ebp)        # 4-byte Spill
-	adcl	16(%edi), %eax
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -128(%ebp)        # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -108(%ebp)        # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -104(%ebp)        # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -100(%ebp)        # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -96(%ebp)         # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	sbbl	%ebx, %ebx
-	movl	%ebx, -116(%ebp)        # 4-byte Spill
-	pushl	%eax
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB145_1
-# BB#2:
-	movl	$0, -112(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB145_3
-.LBB145_1:
-	leal	(%ecx,%ecx), %edi
-	movl	%edi, -112(%ebp)        # 4-byte Spill
-.LBB145_3:
-	movl	-96(%ebp), %edi         # 4-byte Reload
-	pushl	%eax
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	movl	-124(%ebp), %edi        # 4-byte Reload
-	jb	.LBB145_4
-# BB#5:
-	movl	$0, -96(%ebp)           # 4-byte Folded Spill
-	jmp	.LBB145_6
-.LBB145_4:
-	movl	%edx, %ebx
-	shldl	$1, %ecx, %ebx
-	movl	%ebx, -96(%ebp)         # 4-byte Spill
-.LBB145_6:
-	movl	-100(%ebp), %ebx        # 4-byte Reload
-	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB145_7
-# BB#8:
-	movl	$0, -100(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB145_9
-.LBB145_7:
-	movl	%esi, %ebx
-	shldl	$1, %edx, %ebx
-	movl	%ebx, -100(%ebp)        # 4-byte Spill
-.LBB145_9:
-	movl	-104(%ebp), %ebx        # 4-byte Reload
-	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB145_10
-# BB#11:
-	movl	$0, -104(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB145_12
-.LBB145_10:
-	movl	%edi, %ebx
-	shldl	$1, %esi, %ebx
-	movl	%ebx, -104(%ebp)        # 4-byte Spill
-.LBB145_12:
-	movl	-108(%ebp), %ebx        # 4-byte Reload
-	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB145_13
-# BB#14:
-	movl	$0, -108(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB145_15
-.LBB145_13:
-	movl	%eax, %ebx
-	shldl	$1, %edi, %ebx
-	movl	%ebx, -108(%ebp)        # 4-byte Spill
-.LBB145_15:
-	movl	%ecx, -72(%ebp)
-	movl	%edx, -68(%ebp)
-	movl	%esi, -64(%ebp)
-	movl	%edi, -60(%ebp)
-	movl	%eax, -56(%ebp)
-	movl	%ecx, -92(%ebp)
-	movl	%edx, -88(%ebp)
-	movl	%esi, -84(%ebp)
-	movl	%edi, -80(%ebp)
-	movl	%eax, -76(%ebp)
-	movl	-128(%ebp), %ecx        # 4-byte Reload
-	pushl	%eax
-	movl	%ecx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB145_16
-# BB#17:
-	movl	$0, -124(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB145_18
-.LBB145_16:
-	shrl	$31, %eax
-	movl	%eax, -124(%ebp)        # 4-byte Spill
-.LBB145_18:
-	leal	-52(%ebp), %eax
-	movl	%eax, (%esp)
-	leal	-72(%ebp), %eax
-	movl	%eax, 4(%esp)
-	leal	-92(%ebp), %eax
-	movl	%eax, 8(%esp)
-	movl	-116(%ebp), %esi        # 4-byte Reload
-	andl	$1, %esi
-	movl	-120(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre5Lbmi2@PLT
-	movl	-112(%ebp), %edi        # 4-byte Reload
-	addl	-32(%ebp), %edi
-	movl	-96(%ebp), %eax         # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -96(%ebp)         # 4-byte Spill
-	movl	-100(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -100(%ebp)        # 4-byte Spill
-	movl	-104(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	movl	-108(%ebp), %eax        # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	movl	%eax, -108(%ebp)        # 4-byte Spill
-	adcl	-124(%ebp), %esi        # 4-byte Folded Reload
-	movl	-52(%ebp), %edx
-	movl	8(%ebp), %eax
-	subl	(%eax), %edx
-	movl	-48(%ebp), %ebx
-	sbbl	4(%eax), %ebx
-	movl	-44(%ebp), %ecx
-	sbbl	8(%eax), %ecx
-	movl	%ecx, -116(%ebp)        # 4-byte Spill
-	movl	-40(%ebp), %ecx
-	sbbl	12(%eax), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	movl	-36(%ebp), %ecx
-	sbbl	16(%eax), %ecx
-	movl	%ecx, -120(%ebp)        # 4-byte Spill
-	movl	20(%eax), %ecx
-	movl	%ecx, -124(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	%edi, -112(%ebp)        # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, -128(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -96(%ebp)         # 4-byte Folded Spill
-	movl	28(%eax), %ecx
-	movl	%ecx, -132(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -100(%ebp)        # 4-byte Folded Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -104(%ebp)        # 4-byte Folded Spill
-	movl	36(%eax), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -108(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %esi
-	movl	40(%eax), %ecx
-	movl	%ecx, -160(%ebp)        # 4-byte Spill
-	subl	%ecx, %edx
-	movl	44(%eax), %ecx
-	movl	%ecx, -164(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
-	movl	48(%eax), %ecx
-	movl	%ecx, -168(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -116(%ebp)        # 4-byte Folded Spill
-	movl	52(%eax), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %edi        # 4-byte Reload
-	sbbl	%ecx, %edi
-	movl	56(%eax), %ecx
-	movl	%ecx, -176(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -120(%ebp)        # 4-byte Folded Spill
-	movl	60(%eax), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -112(%ebp)        # 4-byte Folded Spill
-	movl	64(%eax), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -96(%ebp)         # 4-byte Folded Spill
-	movl	68(%eax), %ecx
-	movl	%ecx, -148(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -100(%ebp)        # 4-byte Folded Spill
-	movl	72(%eax), %ecx
-	movl	%ecx, -152(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -104(%ebp)        # 4-byte Folded Spill
-	movl	76(%eax), %ecx
-	movl	%ecx, -156(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -108(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %esi
-	addl	-124(%ebp), %edx        # 4-byte Folded Reload
-	adcl	-128(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%edx, 20(%eax)
-	movl	-116(%ebp), %ecx        # 4-byte Reload
-	adcl	-132(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%ebx, 24(%eax)
-	adcl	-136(%ebp), %edi        # 4-byte Folded Reload
-	movl	%ecx, 28(%eax)
-	movl	-120(%ebp), %edx        # 4-byte Reload
-	adcl	-140(%ebp), %edx        # 4-byte Folded Reload
-	movl	%edi, 32(%eax)
-	movl	-112(%ebp), %ecx        # 4-byte Reload
-	adcl	-160(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 36(%eax)
-	movl	-96(%ebp), %edx         # 4-byte Reload
-	adcl	-164(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 40(%eax)
-	movl	-100(%ebp), %ecx        # 4-byte Reload
-	adcl	-168(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 44(%eax)
-	movl	-104(%ebp), %edx        # 4-byte Reload
-	adcl	-172(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 48(%eax)
-	movl	-108(%ebp), %ecx        # 4-byte Reload
-	adcl	-176(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 52(%eax)
-	adcl	-180(%ebp), %esi        # 4-byte Folded Reload
-	movl	%ecx, 56(%eax)
-	movl	%esi, 60(%eax)
-	movl	-144(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 64(%eax)
-	movl	-148(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 68(%eax)
-	movl	-152(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 72(%eax)
-	movl	-156(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 76(%eax)
-	addl	$188, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end145:
-	.size	mcl_fpDbl_sqrPre10Lbmi2, .Lfunc_end145-mcl_fpDbl_sqrPre10Lbmi2
-
-	.globl	mcl_fp_mont10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont10Lbmi2,@function
-mcl_fp_mont10Lbmi2:                     # @mcl_fp_mont10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1036, %esp             # imm = 0x40C
-	calll	.L146$pb
-.L146$pb:
-	popl	%ebx
-.Ltmp17:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp17-.L146$pb), %ebx
-	movl	1068(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	992(%esp), %edi
-	movl	996(%esp), %ebp
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	1032(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1028(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1024(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1020(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1016(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1012(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1008(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1004(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1000(%esp), %esi
-	movl	%eax, (%esp)
-	leal	944(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	addl	944(%esp), %edi
-	adcl	948(%esp), %ebp
-	adcl	952(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	1064(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	896(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %edi
-	addl	896(%esp), %ebp
-	adcl	900(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	936(%esp), %edi
-	sbbl	%eax, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	848(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	movl	64(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	848(%esp), %ebp
-	adcl	852(%esp), %esi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	856(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	860(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	864(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	868(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	872(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	876(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	880(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	884(%esp), %ebp
-	adcl	888(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	800(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	addl	800(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	832(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	adcl	836(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%esi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	1068(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv320x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	752(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	768(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	780(%esp), %esi
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	784(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	704(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	716(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	728(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	732(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	736(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	744(%esp), %edi
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	656(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	movl	44(%esp), %eax          # 4-byte Reload
-	addl	656(%esp), %eax
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	676(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	688(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	696(%esp), %edi
-	adcl	$0, %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	608(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	608(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	624(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	636(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	640(%esp), %esi
-	adcl	644(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	648(%esp), %edi
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	560(%esp), %eax
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	572(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	592(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	600(%esp), %edi
-	adcl	$0, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	512(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	512(%esp), %ecx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %esi
-	adcl	520(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	548(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	464(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	464(%esp), %edi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	468(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	472(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	480(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	484(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	488(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	492(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	496(%esp), %ebp
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	504(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	1060(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv320x32
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	416(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	432(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	444(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	452(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	368(%esp), %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	380(%esp), %esi
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	384(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	400(%esp), %edi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	320(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	320(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	328(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	332(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	348(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	360(%esp), %ebp
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	272(%esp), %esi
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	276(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	288(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	312(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	$0, %ebp
-	movl	1064(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	%edi, %ecx
-	addl	224(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	236(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	240(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	264(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	176(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %ebp
-	addl	176(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	adcl	192(%esp), %esi
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	196(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	1064(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	128(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	140(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	adcl	144(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	168(%esp), %ebp
-	sbbl	%esi, %esi
-	movl	32(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	80(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %esi
-	addl	80(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	64(%esp), %ebx          # 4-byte Reload
-	adcl	84(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	88(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	92(%esp), %ebx
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	112(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	120(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	1068(%esp), %edx
-	subl	(%edx), %eax
-	sbbl	4(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %ecx
-	sbbl	8(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	sbbl	20(%edx), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%edx), %ecx
-	movl	68(%esp), %edi          # 4-byte Reload
-	sbbl	32(%edx), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	sbbl	36(%edx), %ebp
-	movl	%ebp, %edx
-	sbbl	$0, %esi
-	andl	$1, %esi
-	jne	.LBB146_2
-# BB#1:
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-.LBB146_2:
-	movl	%esi, %ecx
-	testb	%cl, %cl
-	movl	76(%esp), %esi          # 4-byte Reload
-	jne	.LBB146_4
-# BB#3:
-	movl	%eax, %esi
-.LBB146_4:
-	movl	1056(%esp), %eax
-	movl	%esi, (%eax)
-	movl	60(%esp), %edi          # 4-byte Reload
-	jne	.LBB146_6
-# BB#5:
-	movl	16(%esp), %edi          # 4-byte Reload
-.LBB146_6:
-	movl	%edi, 4(%eax)
-	jne	.LBB146_8
-# BB#7:
-	movl	20(%esp), %ebx          # 4-byte Reload
-.LBB146_8:
-	movl	%ebx, 8(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	44(%esp), %ecx          # 4-byte Reload
-	jne	.LBB146_10
-# BB#9:
-	movl	24(%esp), %ebp          # 4-byte Reload
-.LBB146_10:
-	movl	%ebp, 12(%eax)
-	jne	.LBB146_12
-# BB#11:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB146_12:
-	movl	%ecx, 16(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	jne	.LBB146_14
-# BB#13:
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB146_14:
-	movl	%ecx, 20(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	jne	.LBB146_16
-# BB#15:
-	movl	56(%esp), %ecx          # 4-byte Reload
-.LBB146_16:
-	movl	%ecx, 24(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	jne	.LBB146_18
-# BB#17:
-	movl	64(%esp), %ecx          # 4-byte Reload
-.LBB146_18:
-	movl	%ecx, 32(%eax)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	jne	.LBB146_20
-# BB#19:
-	movl	%edx, %ecx
-.LBB146_20:
-	movl	%ecx, 36(%eax)
-	addl	$1036, %esp             # imm = 0x40C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end146:
-	.size	mcl_fp_mont10Lbmi2, .Lfunc_end146-mcl_fp_mont10Lbmi2
-
-	.globl	mcl_fp_montNF10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF10Lbmi2,@function
-mcl_fp_montNF10Lbmi2:                   # @mcl_fp_montNF10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1020, %esp             # imm = 0x3FC
-	calll	.L147$pb
-.L147$pb:
-	popl	%ebx
-.Ltmp18:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp18-.L147$pb), %ebx
-	movl	1052(%esp), %eax
-	movl	-4(%eax), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	976(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	976(%esp), %edi
-	movl	980(%esp), %esi
-	movl	%edi, %eax
-	imull	%ebp, %eax
-	movl	1016(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1012(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1008(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1004(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1000(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	996(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	992(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	988(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	984(%esp), %ebp
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	928(%esp), %edi
-	adcl	932(%esp), %esi
-	adcl	936(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	952(%esp), %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	880(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	920(%esp), %ecx
-	addl	880(%esp), %esi
-	adcl	884(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	900(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	832(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	832(%esp), %esi
-	adcl	836(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	848(%esp), %edi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	872(%esp), %esi
-	movl	1048(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	824(%esp), %ecx
-	addl	784(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	796(%esp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	808(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	820(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	736(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	736(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	760(%esp), %edi
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	764(%esp), %ebp
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	768(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	688(%esp), %ecx
-	movl	1044(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv320x32
-	movl	728(%esp), %eax
-	movl	28(%esp), %edx          # 4-byte Reload
-	addl	688(%esp), %edx
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	696(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	704(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	708(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	712(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	716(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	720(%esp), %ebp
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	724(%esp), %esi
-	adcl	$0, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %edi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	640(%esp), %ecx
-	movl	1052(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv320x32
-	addl	640(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	656(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	672(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	676(%esp), %esi
-	movl	%esi, %ebp
-	movl	28(%esp), %esi          # 4-byte Reload
-	adcl	680(%esp), %esi
-	movl	1048(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	592(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	632(%esp), %edx
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	592(%esp), %ecx
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	604(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	624(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	628(%esp), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	544(%esp), %esi
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	548(%esp), %edi
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	552(%esp), %esi
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	556(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	536(%esp), %edx
-	addl	496(%esp), %edi
-	adcl	500(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	504(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %esi          # 4-byte Reload
-	adcl	528(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	448(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	448(%esp), %edi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	456(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	464(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	480(%esp), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %esi          # 4-byte Reload
-	adcl	488(%esp), %esi
-	movl	1048(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	400(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	440(%esp), %eax
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	400(%esp), %ecx
-	adcl	404(%esp), %ebp
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	408(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	412(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	416(%esp), %edi
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	420(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	424(%esp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	428(%esp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	432(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	436(%esp), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	352(%esp), %esi
-	adcl	356(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	360(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	adcl	368(%esp), %esi
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	372(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	1044(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv320x32
-	movl	344(%esp), %edx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	304(%esp), %ecx
-	adcl	308(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	316(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	320(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	324(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	332(%esp), %esi
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	256(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	256(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	272(%esp), %edi
-	adcl	276(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	284(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %esi          # 4-byte Reload
-	adcl	288(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	248(%esp), %edx
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	208(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	220(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	224(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	236(%esp), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	160(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	160(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ebp, %edi
-	adcl	176(%esp), %edi
-	movl	28(%esp), %esi          # 4-byte Reload
-	adcl	180(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	192(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	152(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	112(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	120(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	124(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	128(%esp), %esi
-	movl	%esi, %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	140(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	144(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %ebp
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	64(%esp), %ebp
-	movl	%edi, %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	movl	32(%esp), %ebx          # 4-byte Reload
-	adcl	68(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	72(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ebx
-	adcl	80(%esp), %ebp
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	96(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	1052(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %ecx
-	movl	%ebx, %eax
-	sbbl	8(%edi), %eax
-	movl	%ebp, %esi
-	sbbl	12(%edi), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	16(%edi), %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	24(%esp), %esi          # 4-byte Reload
-	sbbl	20(%edi), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	40(%esp), %esi          # 4-byte Reload
-	sbbl	24(%edi), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	sbbl	28(%edi), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	32(%edi), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	36(%edi), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	%esi, %edi
-	sarl	$31, %edi
-	testl	%edi, %edi
-	movl	60(%esp), %edi          # 4-byte Reload
-	js	.LBB147_2
-# BB#1:
-	movl	%edx, %edi
-.LBB147_2:
-	movl	1040(%esp), %edx
-	movl	%edi, (%edx)
-	movl	52(%esp), %edi          # 4-byte Reload
-	js	.LBB147_4
-# BB#3:
-	movl	%ecx, %edi
-.LBB147_4:
-	movl	%edi, 4(%edx)
-	js	.LBB147_6
-# BB#5:
-	movl	%eax, %ebx
-.LBB147_6:
-	movl	%ebx, 8(%edx)
-	js	.LBB147_8
-# BB#7:
-	movl	4(%esp), %ebp           # 4-byte Reload
-.LBB147_8:
-	movl	%ebp, 12(%edx)
-	movl	44(%esp), %esi          # 4-byte Reload
-	movl	24(%esp), %eax          # 4-byte Reload
-	js	.LBB147_10
-# BB#9:
-	movl	8(%esp), %esi           # 4-byte Reload
-.LBB147_10:
-	movl	%esi, 16(%edx)
-	js	.LBB147_12
-# BB#11:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB147_12:
-	movl	%eax, 20(%edx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB147_14
-# BB#13:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB147_14:
-	movl	%eax, 24(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB147_16
-# BB#15:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB147_16:
-	movl	%eax, 28(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB147_18
-# BB#17:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB147_18:
-	movl	%eax, 32(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB147_20
-# BB#19:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB147_20:
-	movl	%eax, 36(%edx)
-	addl	$1020, %esp             # imm = 0x3FC
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end147:
-	.size	mcl_fp_montNF10Lbmi2, .Lfunc_end147-mcl_fp_montNF10Lbmi2
-
-	.globl	mcl_fp_montRed10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed10Lbmi2,@function
-mcl_fp_montRed10Lbmi2:                  # @mcl_fp_montRed10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$604, %esp              # imm = 0x25C
-	calll	.L148$pb
-.L148$pb:
-	popl	%eax
-.Ltmp19:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp19-.L148$pb), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	632(%esp), %edx
-	movl	-4(%edx), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	628(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	imull	%esi, %ebx
-	movl	76(%ecx), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	72(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	60(%ecx), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	56(%ecx), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	48(%ecx), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	44(%ecx), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	40(%ecx), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	36(%ecx), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	28(%ecx), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	24(%ecx), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	20(%ecx), %ebp
-	movl	16(%ecx), %edi
-	movl	12(%ecx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	8(%ecx), %esi
-	movl	(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	560(%esp), %ecx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	560(%esp), %eax
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	564(%esp), %ecx
-	adcl	568(%esp), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	576(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	580(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	sbbl	%eax, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	512(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	movl	68(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	512(%esp), %esi
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	516(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	520(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	524(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	528(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	532(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	536(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	540(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	544(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	548(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	552(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	464(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	464(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	468(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	492(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	movl	52(%esp), %eax          # 4-byte Reload
-	addl	416(%esp), %eax
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	420(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	440(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	movl	60(%esp), %edi          # 4-byte Reload
-	imull	%edi, %eax
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	632(%esp), %eax
-	movl	%eax, %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	368(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	372(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	404(%esp), %ebp
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	%edi, %eax
-	movl	%eax, (%esp)
-	leal	320(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	320(%esp), %esi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	324(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %edi         # 4-byte Reload
-	adcl	344(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	352(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	360(%esp), %esi
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	272(%esp), %ebp
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	276(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	288(%esp), %ebp
-	adcl	292(%esp), %edi
-	movl	%edi, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	296(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	308(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	312(%esp), %esi
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	movl	96(%esp), %eax          # 4-byte Reload
-	addl	224(%esp), %eax
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	232(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	236(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	240(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	adcl	244(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	248(%esp), %ebp
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	252(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	256(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	260(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	264(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	%eax, %edi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	176(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	176(%esp), %edi
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	180(%esp), %ecx
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	184(%esp), %edi
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	196(%esp), %ebp
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	128(%esp), %esi
-	movl	%edi, %eax
-	adcl	132(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%eax, %edi
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	136(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	140(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	adcl	144(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	76(%esp), %ebx          # 4-byte Reload
-	adcl	164(%esp), %ebx
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	subl	12(%esp), %edi          # 4-byte Folded Reload
-	sbbl	8(%esp), %ecx           # 4-byte Folded Reload
-	sbbl	16(%esp), %esi          # 4-byte Folded Reload
-	sbbl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	120(%esp), %eax         # 4-byte Reload
-	sbbl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	sbbl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	sbbl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ebx          # 4-byte Reload
-	sbbl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 96(%esp)          # 4-byte Spill
-	sbbl	$0, %eax
-	andl	$1, %eax
-	jne	.LBB148_2
-# BB#1:
-	movl	%edx, 80(%esp)          # 4-byte Spill
-.LBB148_2:
-	testb	%al, %al
-	movl	112(%esp), %edx         # 4-byte Reload
-	jne	.LBB148_4
-# BB#3:
-	movl	%edi, %edx
-.LBB148_4:
-	movl	624(%esp), %edi
-	movl	%edx, (%edi)
-	movl	108(%esp), %edx         # 4-byte Reload
-	jne	.LBB148_6
-# BB#5:
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-.LBB148_6:
-	movl	124(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 4(%edi)
-	movl	116(%esp), %ecx         # 4-byte Reload
-	jne	.LBB148_8
-# BB#7:
-	movl	%esi, %ecx
-.LBB148_8:
-	movl	%ecx, 8(%edi)
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%edi)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	movl	120(%esp), %eax         # 4-byte Reload
-	jne	.LBB148_10
-# BB#9:
-	movl	64(%esp), %eax          # 4-byte Reload
-.LBB148_10:
-	movl	%eax, 16(%edi)
-	movl	84(%esp), %eax          # 4-byte Reload
-	movl	104(%esp), %ebp         # 4-byte Reload
-	jne	.LBB148_12
-# BB#11:
-	movl	68(%esp), %ebp          # 4-byte Reload
-.LBB148_12:
-	movl	%ebp, 20(%edi)
-	movl	88(%esp), %ebx          # 4-byte Reload
-	jne	.LBB148_14
-# BB#13:
-	movl	72(%esp), %ebx          # 4-byte Reload
-.LBB148_14:
-	movl	%ebx, 24(%edi)
-	jne	.LBB148_16
-# BB#15:
-	movl	92(%esp), %edx          # 4-byte Reload
-.LBB148_16:
-	movl	%edx, 28(%edi)
-	jne	.LBB148_18
-# BB#17:
-	movl	100(%esp), %ecx         # 4-byte Reload
-.LBB148_18:
-	movl	%ecx, 32(%edi)
-	jne	.LBB148_20
-# BB#19:
-	movl	96(%esp), %eax          # 4-byte Reload
-.LBB148_20:
-	movl	%eax, 36(%edi)
-	addl	$604, %esp              # imm = 0x25C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end148:
-	.size	mcl_fp_montRed10Lbmi2, .Lfunc_end148-mcl_fp_montRed10Lbmi2
-
-	.globl	mcl_fp_addPre10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre10Lbmi2,@function
-mcl_fp_addPre10Lbmi2:                   # @mcl_fp_addPre10Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ebx)
-	movl	32(%ecx), %esi
-	adcl	%edi, %esi
-	movl	%edx, 28(%ebx)
-	movl	%esi, 32(%ebx)
-	movl	36(%eax), %eax
-	movl	36(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 36(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end149:
-	.size	mcl_fp_addPre10Lbmi2, .Lfunc_end149-mcl_fp_addPre10Lbmi2
-
-	.globl	mcl_fp_subPre10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre10Lbmi2,@function
-mcl_fp_subPre10Lbmi2:                   # @mcl_fp_subPre10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ebp)
-	movl	32(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	%esi, 28(%ebp)
-	movl	%edi, 32(%ebp)
-	movl	36(%edx), %edx
-	movl	36(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 36(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end150:
-	.size	mcl_fp_subPre10Lbmi2, .Lfunc_end150-mcl_fp_subPre10Lbmi2
-
-	.globl	mcl_fp_shr1_10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_10Lbmi2,@function
-mcl_fp_shr1_10Lbmi2:                    # @mcl_fp_shr1_10Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	8(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 4(%esi)
-	movl	12(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 8(%esi)
-	movl	16(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%esi)
-	movl	20(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 16(%esi)
-	movl	24(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 20(%esi)
-	movl	28(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 24(%esi)
-	movl	32(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 28(%esi)
-	movl	36(%eax), %eax
-	shrdl	$1, %eax, %ecx
-	movl	%ecx, 32(%esi)
-	shrl	%eax
-	movl	%eax, 36(%esi)
-	popl	%esi
-	retl
-.Lfunc_end151:
-	.size	mcl_fp_shr1_10Lbmi2, .Lfunc_end151-mcl_fp_shr1_10Lbmi2
-
-	.globl	mcl_fp_add10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add10Lbmi2,@function
-mcl_fp_add10Lbmi2:                      # @mcl_fp_add10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$24, %esp
-	movl	52(%esp), %edi
-	movl	(%edi), %ecx
-	movl	4(%edi), %eax
-	movl	48(%esp), %ebx
-	addl	(%ebx), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	adcl	4(%ebx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	8(%edi), %eax
-	adcl	8(%ebx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	12(%ebx), %ecx
-	movl	16(%ebx), %eax
-	adcl	12(%edi), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	16(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	20(%ebx), %eax
-	adcl	20(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	24(%ebx), %esi
-	adcl	24(%edi), %esi
-	movl	28(%ebx), %ebp
-	adcl	28(%edi), %ebp
-	movl	32(%ebx), %edx
-	adcl	32(%edi), %edx
-	movl	36(%ebx), %ecx
-	adcl	36(%edi), %ecx
-	movl	44(%esp), %edi
-	movl	(%esp), %ebx            # 4-byte Reload
-	movl	%ebx, (%edi)
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 4(%edi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 8(%edi)
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%edi)
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 16(%edi)
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, 20(%edi)
-	movl	%esi, 24(%edi)
-	movl	%ebp, 28(%edi)
-	movl	%edx, 32(%edi)
-	movl	%ecx, 36(%edi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	56(%esp), %edi
-	subl	(%edi), %ebx
-	movl	%ebx, (%esp)            # 4-byte Spill
-	movl	20(%esp), %ebx          # 4-byte Reload
-	sbbl	4(%edi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %ebx          # 4-byte Reload
-	sbbl	8(%edi), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	sbbl	12(%edi), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %ebx           # 4-byte Reload
-	sbbl	16(%edi), %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	4(%esp), %ebx           # 4-byte Reload
-	sbbl	20(%edi), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	sbbl	24(%edi), %esi
-	sbbl	28(%edi), %ebp
-	sbbl	32(%edi), %edx
-	sbbl	36(%edi), %ecx
-	sbbl	$0, %eax
-	testb	$1, %al
-	jne	.LBB152_2
-# BB#1:                                 # %nocarry
-	movl	(%esp), %edi            # 4-byte Reload
-	movl	44(%esp), %ebx
-	movl	%edi, (%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, 20(%ebx)
-	movl	%esi, 24(%ebx)
-	movl	%ebp, 28(%ebx)
-	movl	%edx, 32(%ebx)
-	movl	%ecx, 36(%ebx)
-.LBB152_2:                              # %carry
-	addl	$24, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end152:
-	.size	mcl_fp_add10Lbmi2, .Lfunc_end152-mcl_fp_add10Lbmi2
-
-	.globl	mcl_fp_addNF10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF10Lbmi2,@function
-mcl_fp_addNF10Lbmi2:                    # @mcl_fp_addNF10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$72, %esp
-	movl	100(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	4(%ecx), %esi
-	movl	96(%esp), %edx
-	addl	(%edx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	4(%edx), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	36(%ecx), %edi
-	movl	32(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	28(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	24(%ecx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	20(%ecx), %ebp
-	movl	16(%ecx), %ebx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %esi
-	adcl	8(%edx), %esi
-	adcl	12(%edx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	16(%edx), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	adcl	20(%edx), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	24(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	28(%edx), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	32(%edx), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	adcl	36(%edx), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	104(%esp), %edi
-	movl	52(%esp), %edx          # 4-byte Reload
-	subl	(%edi), %edx
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	4(%edi), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	%ecx, %esi
-	sbbl	8(%edi), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	16(%edi), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	movl	%esi, %eax
-	movl	%esi, %ebp
-	sbbl	24(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	movl	%esi, %eax
-	movl	%esi, %ebx
-	sbbl	32(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, %esi
-	sbbl	36(%edi), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	%esi, %edi
-	movl	52(%esp), %esi          # 4-byte Reload
-	sarl	$31, %edi
-	testl	%edi, %edi
-	js	.LBB153_2
-# BB#1:
-	movl	%edx, %esi
-.LBB153_2:
-	movl	92(%esp), %edx
-	movl	%esi, (%edx)
-	movl	56(%esp), %esi          # 4-byte Reload
-	js	.LBB153_4
-# BB#3:
-	movl	(%esp), %esi            # 4-byte Reload
-.LBB153_4:
-	movl	%esi, 4(%edx)
-	movl	%ebp, %edi
-	movl	40(%esp), %esi          # 4-byte Reload
-	js	.LBB153_6
-# BB#5:
-	movl	4(%esp), %ecx           # 4-byte Reload
-.LBB153_6:
-	movl	%ecx, 8(%edx)
-	movl	%ebx, %ecx
-	movl	44(%esp), %ebp          # 4-byte Reload
-	js	.LBB153_8
-# BB#7:
-	movl	8(%esp), %esi           # 4-byte Reload
-.LBB153_8:
-	movl	%esi, 12(%edx)
-	movl	68(%esp), %esi          # 4-byte Reload
-	movl	48(%esp), %ebx          # 4-byte Reload
-	js	.LBB153_10
-# BB#9:
-	movl	12(%esp), %ebp          # 4-byte Reload
-.LBB153_10:
-	movl	%ebp, 16(%edx)
-	js	.LBB153_12
-# BB#11:
-	movl	16(%esp), %ebx          # 4-byte Reload
-.LBB153_12:
-	movl	%ebx, 20(%edx)
-	js	.LBB153_14
-# BB#13:
-	movl	20(%esp), %edi          # 4-byte Reload
-.LBB153_14:
-	movl	%edi, 24(%edx)
-	js	.LBB153_16
-# BB#15:
-	movl	24(%esp), %esi          # 4-byte Reload
-.LBB153_16:
-	movl	%esi, 28(%edx)
-	js	.LBB153_18
-# BB#17:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB153_18:
-	movl	%ecx, 32(%edx)
-	js	.LBB153_20
-# BB#19:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB153_20:
-	movl	%eax, 36(%edx)
-	addl	$72, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end153:
-	.size	mcl_fp_addNF10Lbmi2, .Lfunc_end153-mcl_fp_addNF10Lbmi2
-
-	.globl	mcl_fp_sub10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub10Lbmi2,@function
-mcl_fp_sub10Lbmi2:                      # @mcl_fp_sub10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$32, %esp
-	movl	56(%esp), %esi
-	movl	(%esi), %ecx
-	movl	4(%esi), %eax
-	xorl	%ebx, %ebx
-	movl	60(%esp), %edi
-	subl	(%edi), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	20(%esi), %edx
-	sbbl	20(%edi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	24(%esi), %ecx
-	sbbl	24(%edi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	32(%esi), %ebp
-	sbbl	32(%edi), %ebp
-	movl	36(%esi), %esi
-	sbbl	36(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	52(%esp), %ebx
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	%edx, 20(%ebx)
-	movl	%ecx, 24(%ebx)
-	movl	%eax, 28(%ebx)
-	movl	%ebp, 32(%ebx)
-	movl	%esi, 36(%ebx)
-	je	.LBB154_2
-# BB#1:                                 # %carry
-	movl	%esi, %edi
-	movl	64(%esp), %esi
-	movl	12(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esi), %ecx
-	movl	12(%esi), %eax
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	%eax, 28(%ebx)
-	movl	32(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 32(%ebx)
-	movl	36(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 36(%ebx)
-.LBB154_2:                              # %nocarry
-	addl	$32, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end154:
-	.size	mcl_fp_sub10Lbmi2, .Lfunc_end154-mcl_fp_sub10Lbmi2
-
-	.globl	mcl_fp_subNF10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF10Lbmi2,@function
-mcl_fp_subNF10Lbmi2:                    # @mcl_fp_subNF10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$56, %esp
-	movl	80(%esp), %eax
-	movl	36(%eax), %esi
-	movl	(%eax), %edi
-	movl	4(%eax), %edx
-	movl	84(%esp), %ecx
-	subl	(%ecx), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	sbbl	4(%ecx), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	32(%eax), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	28(%eax), %edi
-	movl	24(%eax), %ebx
-	movl	20(%eax), %ebp
-	movl	16(%eax), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	12(%eax), %edx
-	movl	8(%eax), %eax
-	sbbl	8(%ecx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	sbbl	12(%ecx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	16(%ecx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	sbbl	20(%ecx), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	sbbl	24(%ecx), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	sbbl	28(%ecx), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	sbbl	32(%ecx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	sbbl	36(%ecx), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	sarl	$31, %eax
-	movl	%eax, %edx
-	addl	%edx, %edx
-	movl	%eax, %ecx
-	adcl	%ecx, %ecx
-	movl	%esi, %ebx
-	shrl	$31, %ebx
-	orl	%edx, %ebx
-	movl	88(%esp), %edi
-	movl	20(%edi), %edx
-	andl	%ecx, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	12(%edi), %edx
-	andl	%ecx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	andl	4(%edi), %ecx
-	movl	16(%edi), %edx
-	andl	%ebx, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	8(%edi), %edx
-	andl	%ebx, %edx
-	andl	(%edi), %ebx
-	movl	36(%edi), %esi
-	andl	%eax, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	32(%edi), %ebp
-	andl	%eax, %ebp
-	movl	28(%edi), %esi
-	andl	%eax, %esi
-	andl	24(%edi), %eax
-	addl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %edi
-	movl	%ebx, (%edi)
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 4(%edi)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 8(%edi)
-	movl	(%esp), %edx            # 4-byte Reload
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 12(%edi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 16(%edi)
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 20(%edi)
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, 24(%edi)
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	%esi, 28(%edi)
-	movl	%ebp, 32(%edi)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%edi)
-	addl	$56, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end155:
-	.size	mcl_fp_subNF10Lbmi2, .Lfunc_end155-mcl_fp_subNF10Lbmi2
-
-	.globl	mcl_fpDbl_add10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add10Lbmi2,@function
-mcl_fpDbl_add10Lbmi2:                   # @mcl_fpDbl_add10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$72, %esp
-	movl	100(%esp), %edx
-	movl	96(%esp), %edi
-	movl	12(%edi), %esi
-	movl	16(%edi), %ecx
-	movl	8(%edx), %ebx
-	movl	(%edx), %ebp
-	addl	(%edi), %ebp
-	movl	92(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%edx), %ebp
-	adcl	4(%edi), %ebp
-	adcl	8(%edi), %ebx
-	adcl	12(%edx), %esi
-	adcl	16(%edx), %ecx
-	movl	%ebp, 4(%eax)
-	movl	48(%edx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%eax)
-	movl	20(%edi), %esi
-	adcl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%ecx, 16(%eax)
-	movl	24(%edi), %ecx
-	adcl	%ebx, %ecx
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%eax)
-	movl	28(%edi), %esi
-	adcl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%ecx, 24(%eax)
-	movl	32(%edi), %ecx
-	adcl	%ebx, %ecx
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%eax)
-	movl	36(%edi), %esi
-	adcl	%ebx, %esi
-	movl	40(%edx), %ebx
-	movl	%ecx, 32(%eax)
-	movl	40(%edi), %ecx
-	adcl	%ebx, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	44(%edx), %ebx
-	movl	%esi, 36(%eax)
-	movl	44(%edi), %eax
-	adcl	%ebx, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	48(%edi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	52(%edx), %eax
-	movl	52(%edi), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	56(%edx), %eax
-	movl	56(%edi), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	60(%edx), %eax
-	movl	60(%edi), %ecx
-	adcl	%eax, %ecx
-	movl	64(%edx), %esi
-	movl	64(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	68(%edx), %ebx
-	movl	68(%edi), %esi
-	adcl	%ebx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	72(%edx), %ebx
-	movl	72(%edi), %ebp
-	adcl	%ebx, %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	76(%edx), %edx
-	movl	76(%edi), %edi
-	adcl	%edx, %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	104(%esp), %ebx
-	movl	64(%esp), %edi          # 4-byte Reload
-	subl	(%ebx), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	sbbl	4(%ebx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	sbbl	8(%ebx), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	sbbl	12(%ebx), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebx), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%ecx, %edi
-	sbbl	20(%ebx), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	sbbl	24(%ebx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	sbbl	28(%ebx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	movl	36(%esp), %ebp          # 4-byte Reload
-	sbbl	32(%ebx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %edi
-	sbbl	36(%ebx), %edi
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB156_2
-# BB#1:
-	movl	%edi, %ebp
-.LBB156_2:
-	testb	%dl, %dl
-	movl	64(%esp), %edx          # 4-byte Reload
-	movl	60(%esp), %esi          # 4-byte Reload
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	52(%esp), %ebx          # 4-byte Reload
-	jne	.LBB156_4
-# BB#3:
-	movl	(%esp), %ecx            # 4-byte Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	20(%esp), %edx          # 4-byte Reload
-.LBB156_4:
-	movl	92(%esp), %eax
-	movl	%edx, 40(%eax)
-	movl	68(%esp), %edx          # 4-byte Reload
-	movl	%edx, 44(%eax)
-	movl	%ebx, 48(%eax)
-	movl	%edi, 52(%eax)
-	movl	%esi, 56(%eax)
-	movl	%ecx, 60(%eax)
-	movl	44(%esp), %edx          # 4-byte Reload
-	movl	40(%esp), %ecx          # 4-byte Reload
-	jne	.LBB156_6
-# BB#5:
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB156_6:
-	movl	%ecx, 64(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	jne	.LBB156_8
-# BB#7:
-	movl	28(%esp), %edx          # 4-byte Reload
-.LBB156_8:
-	movl	%edx, 68(%eax)
-	jne	.LBB156_10
-# BB#9:
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB156_10:
-	movl	%ecx, 72(%eax)
-	movl	%ebp, 76(%eax)
-	addl	$72, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end156:
-	.size	mcl_fpDbl_add10Lbmi2, .Lfunc_end156-mcl_fpDbl_add10Lbmi2
-
-	.globl	mcl_fpDbl_sub10Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub10Lbmi2,@function
-mcl_fpDbl_sub10Lbmi2:                   # @mcl_fpDbl_sub10Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$60, %esp
-	movl	84(%esp), %ebp
-	movl	(%ebp), %edx
-	movl	4(%ebp), %esi
-	movl	88(%esp), %eax
-	subl	(%eax), %edx
-	sbbl	4(%eax), %esi
-	movl	8(%ebp), %edi
-	sbbl	8(%eax), %edi
-	movl	80(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	12(%ebp), %edx
-	sbbl	12(%eax), %edx
-	movl	%esi, 4(%ecx)
-	movl	16(%ebp), %esi
-	sbbl	16(%eax), %esi
-	movl	%edi, 8(%ecx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ecx)
-	movl	20(%ebp), %edx
-	sbbl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ecx)
-	movl	24(%ebp), %esi
-	sbbl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ecx)
-	movl	28(%ebp), %edx
-	sbbl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ecx)
-	movl	32(%ebp), %esi
-	sbbl	%edi, %esi
-	movl	36(%eax), %edi
-	movl	%edx, 28(%ecx)
-	movl	36(%ebp), %edx
-	sbbl	%edi, %edx
-	movl	40(%eax), %edi
-	movl	%esi, 32(%ecx)
-	movl	40(%ebp), %esi
-	sbbl	%edi, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	44(%eax), %esi
-	movl	%edx, 36(%ecx)
-	movl	44(%ebp), %edx
-	sbbl	%esi, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	48(%eax), %edx
-	movl	48(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	52(%eax), %edx
-	movl	52(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	56(%eax), %edx
-	movl	56(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	60(%eax), %edx
-	movl	60(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	64(%eax), %edx
-	movl	64(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	68(%eax), %edx
-	movl	68(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	72(%eax), %edx
-	movl	72(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	76(%eax), %eax
-	movl	76(%ebp), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	92(%esp), %esi
-	jne	.LBB157_1
-# BB#2:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB157_3
-.LBB157_1:
-	movl	36(%esi), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-.LBB157_3:
-	testb	%al, %al
-	jne	.LBB157_4
-# BB#5:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	movl	$0, %ebx
-	jmp	.LBB157_6
-.LBB157_4:
-	movl	(%esi), %ebx
-	movl	4(%esi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB157_6:
-	jne	.LBB157_7
-# BB#8:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB157_9
-.LBB157_7:
-	movl	32(%esi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB157_9:
-	jne	.LBB157_10
-# BB#11:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB157_12
-.LBB157_10:
-	movl	28(%esi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB157_12:
-	jne	.LBB157_13
-# BB#14:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB157_15
-.LBB157_13:
-	movl	24(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB157_15:
-	jne	.LBB157_16
-# BB#17:
-	movl	$0, %ebp
-	jmp	.LBB157_18
-.LBB157_16:
-	movl	20(%esi), %ebp
-.LBB157_18:
-	jne	.LBB157_19
-# BB#20:
-	movl	$0, %eax
-	jmp	.LBB157_21
-.LBB157_19:
-	movl	16(%esi), %eax
-.LBB157_21:
-	jne	.LBB157_22
-# BB#23:
-	movl	$0, %edx
-	jmp	.LBB157_24
-.LBB157_22:
-	movl	12(%esi), %edx
-.LBB157_24:
-	jne	.LBB157_25
-# BB#26:
-	xorl	%esi, %esi
-	jmp	.LBB157_27
-.LBB157_25:
-	movl	8(%esi), %esi
-.LBB157_27:
-	addl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 40(%ecx)
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 44(%ecx)
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 48(%ecx)
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 52(%ecx)
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 56(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 60(%ecx)
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 68(%ecx)
-	movl	%eax, 72(%ecx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%ecx)
-	addl	$60, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end157:
-	.size	mcl_fpDbl_sub10Lbmi2, .Lfunc_end157-mcl_fpDbl_sub10Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv352x32,@function
-.LmulPv352x32:                          # @mulPv352x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$32, %esp
-	movl	%edx, %eax
-	movl	52(%esp), %edx
-	mulxl	4(%eax), %ebx, %esi
-	mulxl	(%eax), %edi, %ebp
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	addl	%ebx, %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	mulxl	8(%eax), %edi, %ebx
-	adcl	%esi, %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	mulxl	12(%eax), %esi, %edi
-	adcl	%ebx, %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	mulxl	16(%eax), %esi, %ebx
-	adcl	%edi, %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	mulxl	20(%eax), %esi, %edi
-	adcl	%ebx, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	mulxl	24(%eax), %ebx, %esi
-	adcl	%edi, %ebx
-	mulxl	28(%eax), %edi, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	%esi, %edi
-	mulxl	32(%eax), %esi, %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	mulxl	36(%eax), %edx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	28(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ecx)
-	movl	24(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ecx)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%ecx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%ecx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%ecx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 20(%ecx)
-	movl	%ebx, 24(%ecx)
-	movl	%edi, 28(%ecx)
-	movl	%esi, 32(%ecx)
-	movl	%edx, 36(%ecx)
-	movl	52(%esp), %edx
-	mulxl	40(%eax), %eax, %edx
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 40(%ecx)
-	adcl	$0, %edx
-	movl	%edx, 44(%ecx)
-	movl	%ecx, %eax
-	addl	$32, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end158:
-	.size	.LmulPv352x32, .Lfunc_end158-.LmulPv352x32
-
-	.globl	mcl_fp_mulUnitPre11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre11Lbmi2,@function
-mcl_fp_mulUnitPre11Lbmi2:               # @mcl_fp_mulUnitPre11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$92, %esp
-	calll	.L159$pb
-.L159$pb:
-	popl	%ebx
-.Ltmp20:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp20-.L159$pb), %ebx
-	movl	120(%esp), %eax
-	movl	%eax, (%esp)
-	leal	40(%esp), %ecx
-	movl	116(%esp), %edx
-	calll	.LmulPv352x32
-	movl	84(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp
-	movl	56(%esp), %ebx
-	movl	52(%esp), %edi
-	movl	48(%esp), %esi
-	movl	40(%esp), %edx
-	movl	44(%esp), %ecx
-	movl	112(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	addl	$92, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end159:
-	.size	mcl_fp_mulUnitPre11Lbmi2, .Lfunc_end159-mcl_fp_mulUnitPre11Lbmi2
-
-	.globl	mcl_fpDbl_mulPre11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre11Lbmi2,@function
-mcl_fpDbl_mulPre11Lbmi2:                # @mcl_fpDbl_mulPre11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$620, %esp              # imm = 0x26C
-	calll	.L160$pb
-.L160$pb:
-	popl	%eax
-.Ltmp21:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp21-.L160$pb), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	648(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	%edx, %ebp
-	movl	%ebx, %edi
-	calll	.LmulPv352x32
-	movl	612(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	588(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	584(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	580(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	576(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	568(%esp), %eax
-	movl	572(%esp), %esi
-	movl	640(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	648(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	520(%esp), %ecx
-	movl	%ebp, %edx
-	movl	%edi, %ebx
-	calll	.LmulPv352x32
-	addl	520(%esp), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	564(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	560(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	540(%esp), %ebx
-	movl	536(%esp), %edi
-	movl	532(%esp), %esi
-	movl	524(%esp), %ecx
-	movl	528(%esp), %edx
-	movl	640(%esp), %eax
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%eax)
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	472(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	516(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	512(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	508(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	504(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	492(%esp), %ebp
-	movl	488(%esp), %edi
-	movl	484(%esp), %esi
-	movl	476(%esp), %ecx
-	movl	480(%esp), %edx
-	movl	640(%esp), %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%eax)
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	424(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	468(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	464(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	460(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	456(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	452(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	448(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	444(%esp), %ebx
-	movl	440(%esp), %edi
-	movl	436(%esp), %esi
-	movl	428(%esp), %ecx
-	movl	432(%esp), %edx
-	movl	640(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	376(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	404(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	400(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	396(%esp), %ebp
-	movl	392(%esp), %edi
-	movl	388(%esp), %esi
-	movl	380(%esp), %ecx
-	movl	384(%esp), %edx
-	movl	640(%esp), %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 16(%eax)
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	328(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	360(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	356(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	352(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	348(%esp), %ebx
-	movl	344(%esp), %edi
-	movl	340(%esp), %esi
-	movl	332(%esp), %ecx
-	movl	336(%esp), %edx
-	movl	640(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	280(%esp), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	316(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	304(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	300(%esp), %ebp
-	movl	296(%esp), %edi
-	movl	292(%esp), %esi
-	movl	284(%esp), %ecx
-	movl	288(%esp), %edx
-	movl	640(%esp), %eax
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 24(%eax)
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	232(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	272(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	252(%esp), %ebx
-	movl	248(%esp), %edi
-	movl	244(%esp), %esi
-	movl	236(%esp), %ecx
-	movl	240(%esp), %edx
-	movl	640(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	184(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	228(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	224(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	220(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	204(%esp), %ebp
-	movl	200(%esp), %edi
-	movl	196(%esp), %esi
-	movl	188(%esp), %ecx
-	movl	192(%esp), %edx
-	movl	640(%esp), %eax
-	movl	32(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 32(%eax)
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %edi
-	movl	36(%edi), %eax
-	movl	%eax, (%esp)
-	leal	136(%esp), %ecx
-	movl	644(%esp), %eax
-	movl	%eax, %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	136(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	164(%esp), %ebp
-	movl	160(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	156(%esp), %edi
-	movl	152(%esp), %esi
-	movl	148(%esp), %edx
-	movl	140(%esp), %ecx
-	movl	144(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	640(%esp), %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 36(%eax)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	88(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	52(%esp), %eax          # 4-byte Reload
-	addl	88(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	92(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	112(%esp), %edi
-	movl	108(%esp), %esi
-	movl	104(%esp), %edx
-	movl	100(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 40(%eax)
-	movl	%ebp, 44(%eax)
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 48(%eax)
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 52(%eax)
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 56(%eax)
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 60(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	72(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 64(%eax)
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 72(%eax)
-	movl	%ecx, 76(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%eax)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 84(%eax)
-	addl	$620, %esp              # imm = 0x26C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end160:
-	.size	mcl_fpDbl_mulPre11Lbmi2, .Lfunc_end160-mcl_fpDbl_mulPre11Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre11Lbmi2,@function
-mcl_fpDbl_sqrPre11Lbmi2:                # @mcl_fpDbl_sqrPre11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$620, %esp              # imm = 0x26C
-	calll	.L161$pb
-.L161$pb:
-	popl	%ebx
-.Ltmp22:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp22-.L161$pb), %ebx
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	movl	644(%esp), %edx
-	movl	(%edx), %eax
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	%edx, %esi
-	movl	%ebx, %edi
-	calll	.LmulPv352x32
-	movl	612(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	588(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	584(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	580(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	576(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	568(%esp), %eax
-	movl	572(%esp), %ebp
-	movl	640(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%esi, %edx
-	movl	4(%edx), %eax
-	movl	%eax, (%esp)
-	leal	520(%esp), %ecx
-	movl	%edi, %ebx
-	calll	.LmulPv352x32
-	addl	520(%esp), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	564(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	560(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	540(%esp), %ebx
-	movl	536(%esp), %edi
-	movl	532(%esp), %esi
-	movl	524(%esp), %ecx
-	movl	528(%esp), %edx
-	movl	640(%esp), %eax
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%eax)
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	8(%edx), %eax
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	472(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	516(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	512(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	508(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	504(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	492(%esp), %ebp
-	movl	488(%esp), %edi
-	movl	484(%esp), %esi
-	movl	476(%esp), %ecx
-	movl	480(%esp), %edx
-	movl	640(%esp), %eax
-	movl	60(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	12(%edx), %eax
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	48(%esp), %eax          # 4-byte Reload
-	addl	424(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	468(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	464(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	460(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	456(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	452(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	448(%esp), %ebx
-	movl	444(%esp), %edi
-	movl	440(%esp), %esi
-	movl	436(%esp), %edx
-	movl	428(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	432(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 80(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	16(%edx), %eax
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	80(%esp), %eax          # 4-byte Reload
-	addl	376(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	404(%esp), %ebx
-	movl	400(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	396(%esp), %edi
-	movl	392(%esp), %esi
-	movl	388(%esp), %edx
-	movl	380(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	384(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	80(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	20(%edx), %eax
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	328(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	360(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	356(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	352(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	348(%esp), %ebp
-	movl	344(%esp), %edi
-	movl	340(%esp), %esi
-	movl	332(%esp), %ecx
-	movl	336(%esp), %edx
-	movl	640(%esp), %eax
-	movl	48(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 20(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	80(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	24(%edx), %eax
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	48(%esp), %eax          # 4-byte Reload
-	addl	280(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	316(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	304(%esp), %ebx
-	movl	300(%esp), %edi
-	movl	296(%esp), %esi
-	movl	292(%esp), %edx
-	movl	284(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	288(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 80(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	28(%edx), %eax
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	80(%esp), %eax          # 4-byte Reload
-	addl	232(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	272(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	260(%esp), %ebx
-	movl	256(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	252(%esp), %edi
-	movl	248(%esp), %esi
-	movl	244(%esp), %edx
-	movl	236(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	240(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	80(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	32(%edx), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	184(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	228(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	224(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	220(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	204(%esp), %ebp
-	movl	200(%esp), %edi
-	movl	196(%esp), %esi
-	movl	188(%esp), %ecx
-	movl	192(%esp), %edx
-	movl	640(%esp), %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 32(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	36(%edx), %eax
-	movl	%eax, (%esp)
-	leal	136(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	52(%esp), %eax          # 4-byte Reload
-	addl	136(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	164(%esp), %ebp
-	movl	160(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	156(%esp), %edi
-	movl	152(%esp), %esi
-	movl	148(%esp), %edx
-	movl	140(%esp), %ecx
-	movl	144(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	640(%esp), %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 36(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	40(%edx), %eax
-	movl	%eax, (%esp)
-	leal	88(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	52(%esp), %eax          # 4-byte Reload
-	addl	88(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	92(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	112(%esp), %edi
-	movl	108(%esp), %esi
-	movl	104(%esp), %edx
-	movl	100(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 40(%eax)
-	movl	%ebp, 44(%eax)
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 48(%eax)
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 52(%eax)
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 56(%eax)
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 60(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 64(%eax)
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 72(%eax)
-	movl	%ecx, 76(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%eax)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 84(%eax)
-	addl	$620, %esp              # imm = 0x26C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end161:
-	.size	mcl_fpDbl_sqrPre11Lbmi2, .Lfunc_end161-mcl_fpDbl_sqrPre11Lbmi2
-
-	.globl	mcl_fp_mont11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont11Lbmi2,@function
-mcl_fp_mont11Lbmi2:                     # @mcl_fp_mont11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1132, %esp             # imm = 0x46C
-	calll	.L162$pb
-.L162$pb:
-	popl	%ebx
-.Ltmp23:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp23-.L162$pb), %ebx
-	movl	1164(%esp), %eax
-	movl	-4(%eax), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1080(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	1080(%esp), %edi
-	movl	1084(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	%ebp, %eax
-	movl	1124(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1120(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1116(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1112(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1108(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	1100(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	1096(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1092(%esp), %esi
-	movl	1088(%esp), %ebp
-	movl	%eax, (%esp)
-	leal	1032(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	1032(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1040(%esp), %ebp
-	adcl	1044(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	1076(%esp), %esi
-	sbbl	%edi, %edi
-	movl	1160(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	984(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	984(%esp), %ecx
-	adcl	988(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1024(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	adcl	1028(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	936(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	936(%esp), %esi
-	adcl	940(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	964(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	980(%esp), %esi
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	addl	888(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	912(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	928(%esp), %esi
-	movl	%esi, %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ebp, %eax
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	840(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	movl	%esi, %eax
-	andl	$1, %eax
-	addl	840(%esp), %ebp
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	844(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	848(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	852(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	856(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	860(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	864(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	868(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	872(%esp), %ebp
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	876(%esp), %esi
-	adcl	880(%esp), %edi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	884(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	792(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	792(%esp), %ecx
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	820(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	adcl	824(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	adcl	828(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	836(%esp), %esi
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	744(%esp), %ecx
-	movl	1164(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	744(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	768(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	776(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	788(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	1156(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	696(%esp), %ecx
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	716(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	724(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	728(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	648(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	648(%esp), %ebp
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebp          # 4-byte Reload
-	adcl	656(%esp), %ebp
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	676(%esp), %edi
-	adcl	680(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	692(%esp), %esi
-	adcl	$0, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	600(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	24(%esp), %ecx          # 4-byte Reload
-	addl	600(%esp), %ecx
-	adcl	604(%esp), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	608(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	624(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	640(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	addl	552(%esp), %edi
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	556(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %edi
-	adcl	560(%esp), %edi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	564(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	568(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	572(%esp), %esi
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	576(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	580(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	584(%esp), %ebp
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	588(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	592(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	596(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	28(%esp), %ecx          # 4-byte Reload
-	addl	504(%esp), %ecx
-	adcl	508(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	520(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	532(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	536(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	456(%esp), %ecx
-	movl	1164(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	456(%esp), %edi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	460(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	464(%esp), %ebp
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	468(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	472(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	480(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	484(%esp), %edi
-	adcl	488(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	492(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	496(%esp), %esi
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	408(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	32(%esp), %ecx          # 4-byte Reload
-	addl	408(%esp), %ecx
-	adcl	412(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	428(%esp), %ebp
-	adcl	432(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	444(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	360(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	360(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	368(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	380(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	384(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	392(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	312(%esp), %ecx
-	adcl	316(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	332(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	340(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	348(%esp), %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	264(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	276(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	284(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	300(%esp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	304(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	216(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	216(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	224(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	232(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	252(%esp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	movl	%esi, %ecx
-	andl	$1, %ecx
-	addl	168(%esp), %ebp
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	172(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	184(%esp), %ebp
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	188(%esp), %edi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	%esi, %ecx
-	addl	120(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	128(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	132(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	136(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	156(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	20(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	72(%esp), %ecx
-	movl	1164(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	andl	$1, %esi
-	addl	72(%esp), %edi
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	88(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	108(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	112(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	1164(%esp), %ebp
-	subl	(%ebp), %eax
-	movl	%ecx, %edx
-	sbbl	4(%ebp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	sbbl	8(%ebp), %ecx
-	sbbl	12(%ebp), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	24(%esp), %edi          # 4-byte Reload
-	sbbl	24(%ebp), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%ebp), %ebx
-	movl	32(%esp), %edi          # 4-byte Reload
-	sbbl	32(%ebp), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	sbbl	36(%ebp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	sbbl	40(%ebp), %edi
-	movl	%edi, %ebp
-	sbbl	$0, %esi
-	andl	$1, %esi
-	jne	.LBB162_2
-# BB#1:
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-.LBB162_2:
-	movl	%esi, %ebx
-	testb	%bl, %bl
-	movl	68(%esp), %ebx          # 4-byte Reload
-	jne	.LBB162_4
-# BB#3:
-	movl	%eax, %ebx
-.LBB162_4:
-	movl	1152(%esp), %eax
-	movl	%ebx, (%eax)
-	movl	56(%esp), %edi          # 4-byte Reload
-	jne	.LBB162_6
-# BB#5:
-	movl	%edx, %edi
-.LBB162_6:
-	movl	%edi, 4(%eax)
-	movl	52(%esp), %edx          # 4-byte Reload
-	jne	.LBB162_8
-# BB#7:
-	movl	%ecx, %edx
-.LBB162_8:
-	movl	%edx, 8(%eax)
-	jne	.LBB162_10
-# BB#9:
-	movl	4(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-.LBB162_10:
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_12
-# BB#11:
-	movl	8(%esp), %ecx           # 4-byte Reload
-.LBB162_12:
-	movl	%ecx, 16(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_14
-# BB#13:
-	movl	12(%esp), %ecx          # 4-byte Reload
-.LBB162_14:
-	movl	%ecx, 20(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_16
-# BB#15:
-	movl	16(%esp), %ecx          # 4-byte Reload
-.LBB162_16:
-	movl	%ecx, 24(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_18
-# BB#17:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB162_18:
-	movl	%ecx, 32(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_20
-# BB#19:
-	movl	60(%esp), %ecx          # 4-byte Reload
-.LBB162_20:
-	movl	%ecx, 36(%eax)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_22
-# BB#21:
-	movl	%ebp, %ecx
-.LBB162_22:
-	movl	%ecx, 40(%eax)
-	addl	$1132, %esp             # imm = 0x46C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end162:
-	.size	mcl_fp_mont11Lbmi2, .Lfunc_end162-mcl_fp_mont11Lbmi2
-
-	.globl	mcl_fp_montNF11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF11Lbmi2,@function
-mcl_fp_montNF11Lbmi2:                   # @mcl_fp_montNF11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1132, %esp             # imm = 0x46C
-	calll	.L163$pb
-.L163$pb:
-	popl	%ebx
-.Ltmp24:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp24-.L163$pb), %ebx
-	movl	1164(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1080(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	1080(%esp), %ebp
-	movl	1084(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	1124(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1120(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1116(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1112(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1108(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	1100(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	1096(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1092(%esp), %esi
-	movl	1088(%esp), %edi
-	movl	%eax, (%esp)
-	leal	1032(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	1032(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1040(%esp), %edi
-	adcl	1044(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	1048(%esp), %ebp
-	movl	28(%esp), %esi          # 4-byte Reload
-	adcl	1052(%esp), %esi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	984(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	1028(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	984(%esp), %ecx
-	adcl	988(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	996(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	1000(%esp), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	1004(%esp), %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	936(%esp), %ecx
-	movl	1164(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	addl	936(%esp), %ebp
-	adcl	940(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	956(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	960(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	980(%esp), %ebp
-	movl	1160(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	932(%esp), %eax
-	addl	888(%esp), %edi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	892(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	896(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	900(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	904(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	908(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	912(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	916(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	920(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	924(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	928(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%edi, %eax
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	840(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	840(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	860(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	872(%esp), %edi
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	876(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	884(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	792(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	836(%esp), %eax
-	movl	44(%esp), %edx          # 4-byte Reload
-	addl	792(%esp), %edx
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	796(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	800(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	804(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	808(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	812(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	816(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	820(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	824(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	828(%esp), %ebp
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	832(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	744(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	744(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	764(%esp), %esi
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	768(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	780(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	784(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	740(%esp), %edx
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	696(%esp), %eax
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	704(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	708(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	712(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	716(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	720(%esp), %edi
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	724(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	728(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	732(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	736(%esp), %esi
-	adcl	$0, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	648(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	648(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	656(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	672(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	688(%esp), %esi
-	movl	%esi, %edi
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	692(%esp), %esi
-	movl	1160(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	600(%esp), %ecx
-	movl	1156(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	movl	644(%esp), %eax
-	movl	28(%esp), %ecx          # 4-byte Reload
-	addl	600(%esp), %ecx
-	adcl	604(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	608(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	612(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	616(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	620(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	624(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	628(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	632(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	636(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	640(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	552(%esp), %esi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	560(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	576(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	596(%esp), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	1160(%esp), %ecx
-	movl	%ecx, %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	548(%esp), %edx
-	movl	32(%esp), %eax          # 4-byte Reload
-	addl	504(%esp), %eax
-	adcl	508(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	512(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	516(%esp), %ebp
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	520(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	524(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	528(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	532(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	536(%esp), %edi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	540(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	544(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	456(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	456(%esp), %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	468(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	480(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	488(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	496(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	408(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	452(%esp), %edx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	408(%esp), %ecx
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	412(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	428(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	444(%esp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	448(%esp), %edi
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	360(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	360(%esp), %esi
-	adcl	364(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	372(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	392(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	400(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	356(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	312(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	320(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	332(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	340(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	264(%esp), %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	276(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	adcl	284(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	292(%esp), %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	216(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	260(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	216(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	224(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	232(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	240(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebp          # 4-byte Reload
-	adcl	244(%esp), %ebp
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	168(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	176(%esp), %esi
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	180(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	196(%esp), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	204(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	164(%esp), %edx
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	120(%esp), %ecx
-	adcl	124(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	adcl	128(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	136(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	152(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	156(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	72(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	72(%esp), %edi
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	84(%esp), %edi
-	adcl	88(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	108(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	112(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	1164(%esp), %ebx
-	subl	(%ebx), %edx
-	movl	%ecx, %esi
-	sbbl	4(%ebx), %esi
-	movl	%edi, %ecx
-	sbbl	8(%ebx), %ecx
-	movl	44(%esp), %eax          # 4-byte Reload
-	sbbl	12(%ebx), %eax
-	movl	40(%esp), %ebp          # 4-byte Reload
-	sbbl	16(%ebx), %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	movl	28(%esp), %ebp          # 4-byte Reload
-	sbbl	20(%ebx), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%ebx), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	sbbl	28(%ebx), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	sbbl	32(%ebx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	sbbl	36(%ebx), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	sbbl	40(%ebx), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %ebx
-	sarl	$31, %ebx
-	testl	%ebx, %ebx
-	movl	68(%esp), %ebx          # 4-byte Reload
-	js	.LBB163_2
-# BB#1:
-	movl	%edx, %ebx
-.LBB163_2:
-	movl	1152(%esp), %edx
-	movl	%ebx, (%edx)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	js	.LBB163_4
-# BB#3:
-	movl	%esi, %ebp
-.LBB163_4:
-	movl	%ebp, 4(%edx)
-	js	.LBB163_6
-# BB#5:
-	movl	%ecx, %edi
-.LBB163_6:
-	movl	%edi, 8(%edx)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	js	.LBB163_8
-# BB#7:
-	movl	%eax, %ecx
-.LBB163_8:
-	movl	%ecx, 12(%edx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB163_10
-# BB#9:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB163_10:
-	movl	%eax, 16(%edx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	js	.LBB163_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB163_12:
-	movl	%eax, 20(%edx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	js	.LBB163_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB163_14:
-	movl	%eax, 24(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB163_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB163_16:
-	movl	%eax, 28(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB163_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB163_18:
-	movl	%eax, 32(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB163_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB163_20:
-	movl	%eax, 36(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB163_22
-# BB#21:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB163_22:
-	movl	%eax, 40(%edx)
-	addl	$1132, %esp             # imm = 0x46C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end163:
-	.size	mcl_fp_montNF11Lbmi2, .Lfunc_end163-mcl_fp_montNF11Lbmi2
-
-	.globl	mcl_fp_montRed11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed11Lbmi2,@function
-mcl_fp_montRed11Lbmi2:                  # @mcl_fp_montRed11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$668, %esp              # imm = 0x29C
-	calll	.L164$pb
-.L164$pb:
-	popl	%eax
-.Ltmp25:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp25-.L164$pb), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	696(%esp), %edx
-	movl	-4(%edx), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	692(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	4(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	imull	%esi, %ebx
-	movl	84(%ecx), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	80(%ecx), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	76(%ecx), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	68(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	60(%ecx), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	56(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	48(%ecx), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	44(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	40(%ecx), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	32(%ecx), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	28(%ecx), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	24(%ecx), %ebp
-	movl	20(%ecx), %edi
-	movl	16(%ecx), %esi
-	movl	12(%ecx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	8(%ecx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	616(%esp), %ecx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	616(%esp), %eax
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	620(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	632(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	636(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	640(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	568(%esp), %esi
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	572(%esp), %edx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	600(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	520(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	520(%esp), %ebp
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	524(%esp), %ecx
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	548(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	124(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	movl	120(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	472(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	424(%esp), %ebp
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	428(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	464(%esp), %ebp
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	376(%esp), %esi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	380(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %esi         # 4-byte Reload
-	adcl	404(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	412(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	328(%esp), %edi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	332(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	344(%esp), %edi
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	352(%esp), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	356(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	696(%esp), %eax
-	movl	%eax, %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	280(%esp), %ebp
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	284(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	292(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %edi         # 4-byte Reload
-	adcl	296(%esp), %edi
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	304(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	232(%esp), %ebp
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	236(%esp), %ebp
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	244(%esp), %edi
-	movl	%edi, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	276(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	%ebp, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	184(%esp), %ebp
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	188(%esp), %ecx
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	136(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	136(%esp), %esi
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	%eax, %edi
-	movl	128(%esp), %edx         # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	148(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	152(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebx          # 4-byte Reload
-	adcl	180(%esp), %ebx
-	movl	%ebx, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	subl	12(%esp), %edi          # 4-byte Folded Reload
-	sbbl	4(%esp), %edx           # 4-byte Folded Reload
-	sbbl	8(%esp), %ecx           # 4-byte Folded Reload
-	sbbl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	124(%esp), %eax         # 4-byte Reload
-	sbbl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	sbbl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	sbbl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	sbbl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	%ebp, %ebx
-	sbbl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB164_2
-# BB#1:
-	movl	%esi, 112(%esp)         # 4-byte Spill
-.LBB164_2:
-	testb	%bl, %bl
-	movl	132(%esp), %esi         # 4-byte Reload
-	jne	.LBB164_4
-# BB#3:
-	movl	%edi, %esi
-.LBB164_4:
-	movl	688(%esp), %edi
-	movl	%esi, (%edi)
-	movl	104(%esp), %esi         # 4-byte Reload
-	jne	.LBB164_6
-# BB#5:
-	movl	%edx, 128(%esp)         # 4-byte Spill
-.LBB164_6:
-	movl	128(%esp), %edx         # 4-byte Reload
-	movl	%edx, 4(%edi)
-	movl	116(%esp), %edx         # 4-byte Reload
-	jne	.LBB164_8
-# BB#7:
-	movl	%ecx, %edx
-.LBB164_8:
-	movl	%edx, 8(%edi)
-	movl	112(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 12(%edi)
-	movl	92(%esp), %edx          # 4-byte Reload
-	movl	124(%esp), %ecx         # 4-byte Reload
-	jne	.LBB164_10
-# BB#9:
-	movl	64(%esp), %ecx          # 4-byte Reload
-.LBB164_10:
-	movl	%ecx, 16(%edi)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	movl	120(%esp), %eax         # 4-byte Reload
-	jne	.LBB164_12
-# BB#11:
-	movl	68(%esp), %eax          # 4-byte Reload
-.LBB164_12:
-	movl	%eax, 20(%edi)
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	108(%esp), %ebp         # 4-byte Reload
-	jne	.LBB164_14
-# BB#13:
-	movl	72(%esp), %ebp          # 4-byte Reload
-.LBB164_14:
-	movl	%ebp, 24(%edi)
-	jne	.LBB164_16
-# BB#15:
-	movl	76(%esp), %esi          # 4-byte Reload
-.LBB164_16:
-	movl	%esi, 28(%edi)
-	jne	.LBB164_18
-# BB#17:
-	movl	84(%esp), %edx          # 4-byte Reload
-.LBB164_18:
-	movl	%edx, 32(%edi)
-	jne	.LBB164_20
-# BB#19:
-	movl	88(%esp), %ecx          # 4-byte Reload
-.LBB164_20:
-	movl	%ecx, 36(%edi)
-	jne	.LBB164_22
-# BB#21:
-	movl	100(%esp), %eax         # 4-byte Reload
-.LBB164_22:
-	movl	%eax, 40(%edi)
-	addl	$668, %esp              # imm = 0x29C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end164:
-	.size	mcl_fp_montRed11Lbmi2, .Lfunc_end164-mcl_fp_montRed11Lbmi2
-
-	.globl	mcl_fp_addPre11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre11Lbmi2,@function
-mcl_fp_addPre11Lbmi2:                   # @mcl_fp_addPre11Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ebx)
-	movl	32(%ecx), %esi
-	adcl	%edi, %esi
-	movl	36(%eax), %edi
-	movl	%edx, 28(%ebx)
-	movl	36(%ecx), %edx
-	adcl	%edi, %edx
-	movl	%esi, 32(%ebx)
-	movl	%edx, 36(%ebx)
-	movl	40(%eax), %eax
-	movl	40(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 40(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end165:
-	.size	mcl_fp_addPre11Lbmi2, .Lfunc_end165-mcl_fp_addPre11Lbmi2
-
-	.globl	mcl_fp_subPre11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre11Lbmi2,@function
-mcl_fp_subPre11Lbmi2:                   # @mcl_fp_subPre11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ebp)
-	movl	32(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%ebp)
-	movl	36(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	%edi, 32(%ebp)
-	movl	%esi, 36(%ebp)
-	movl	40(%edx), %edx
-	movl	40(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 40(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end166:
-	.size	mcl_fp_subPre11Lbmi2, .Lfunc_end166-mcl_fp_subPre11Lbmi2
-
-	.globl	mcl_fp_shr1_11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_11Lbmi2,@function
-mcl_fp_shr1_11Lbmi2:                    # @mcl_fp_shr1_11Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	8(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 4(%esi)
-	movl	12(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 8(%esi)
-	movl	16(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%esi)
-	movl	20(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 16(%esi)
-	movl	24(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 20(%esi)
-	movl	28(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 24(%esi)
-	movl	32(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 28(%esi)
-	movl	36(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 32(%esi)
-	movl	40(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 36(%esi)
-	shrl	%eax
-	movl	%eax, 40(%esi)
-	popl	%esi
-	retl
-.Lfunc_end167:
-	.size	mcl_fp_shr1_11Lbmi2, .Lfunc_end167-mcl_fp_shr1_11Lbmi2
-
-	.globl	mcl_fp_add11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add11Lbmi2,@function
-mcl_fp_add11Lbmi2:                      # @mcl_fp_add11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$32, %esp
-	movl	60(%esp), %edi
-	movl	(%edi), %ecx
-	movl	4(%edi), %eax
-	movl	56(%esp), %esi
-	addl	(%esi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	4(%esi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	8(%edi), %eax
-	adcl	8(%esi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	movl	16(%esi), %ecx
-	adcl	12(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	16(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	adcl	20(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	adcl	24(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	28(%esi), %ebx
-	adcl	28(%edi), %ebx
-	movl	%ebx, (%esp)            # 4-byte Spill
-	movl	32(%esi), %ecx
-	adcl	32(%edi), %ecx
-	movl	36(%esi), %eax
-	adcl	36(%edi), %eax
-	movl	40(%esi), %edx
-	adcl	40(%edi), %edx
-	movl	52(%esp), %esi
-	movl	%ebp, (%esi)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%esi)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%esi)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%esi)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%esi)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%esi)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 24(%esi)
-	movl	%ebx, 28(%esi)
-	movl	%ecx, 32(%esi)
-	movl	%eax, 36(%esi)
-	movl	%edx, 40(%esi)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	64(%esp), %ebp
-	movl	4(%esp), %edi           # 4-byte Reload
-	subl	(%ebp), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	28(%esp), %edi          # 4-byte Reload
-	sbbl	4(%ebp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %edi          # 4-byte Reload
-	sbbl	8(%ebp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %edi          # 4-byte Reload
-	sbbl	12(%ebp), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebp), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %edi           # 4-byte Reload
-	sbbl	24(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	(%esp), %edi            # 4-byte Reload
-	sbbl	28(%ebp), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	sbbl	32(%ebp), %ecx
-	sbbl	36(%ebp), %eax
-	sbbl	40(%ebp), %edx
-	movl	%edx, %edi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB168_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, (%esi)
-	movl	28(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%esi)
-	movl	24(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%esi)
-	movl	20(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 12(%esi)
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 16(%esi)
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 20(%esi)
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 24(%esi)
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	%edx, 28(%esi)
-	movl	%ecx, 32(%esi)
-	movl	%eax, 36(%esi)
-	movl	%edi, 40(%esi)
-.LBB168_2:                              # %carry
-	addl	$32, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end168:
-	.size	mcl_fp_add11Lbmi2, .Lfunc_end168-mcl_fp_add11Lbmi2
-
-	.globl	mcl_fp_addNF11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF11Lbmi2,@function
-mcl_fp_addNF11Lbmi2:                    # @mcl_fp_addNF11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$80, %esp
-	movl	108(%esp), %edx
-	movl	(%edx), %eax
-	movl	4(%edx), %ecx
-	movl	104(%esp), %esi
-	addl	(%esi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	4(%esi), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%edx), %ebx
-	movl	36(%edx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	28(%edx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	24(%edx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	20(%edx), %ebp
-	movl	16(%edx), %edi
-	movl	12(%edx), %eax
-	movl	8(%edx), %ecx
-	adcl	8(%esi), %ecx
-	adcl	12(%esi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	24(%esi), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	28(%esi), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	32(%esi), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	36(%esi), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	adcl	40(%esi), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebx
-	movl	52(%esp), %esi          # 4-byte Reload
-	subl	(%ebx), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	sbbl	4(%ebx), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	%edx, %ecx
-	sbbl	8(%ebx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	sbbl	12(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	16(%ebx), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	sbbl	20(%ebx), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	sbbl	24(%ebx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	movl	%edi, %ecx
-	movl	%edi, %ebp
-	sbbl	36(%ebx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %edi
-	sbbl	40(%ebx), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	%edi, %ebx
-	movl	52(%esp), %edi          # 4-byte Reload
-	sarl	$31, %ebx
-	testl	%ebx, %ebx
-	js	.LBB169_2
-# BB#1:
-	movl	%esi, %edi
-.LBB169_2:
-	movl	100(%esp), %esi
-	movl	%edi, (%esi)
-	movl	60(%esp), %edi          # 4-byte Reload
-	js	.LBB169_4
-# BB#3:
-	movl	(%esp), %edi            # 4-byte Reload
-.LBB169_4:
-	movl	%edi, 4(%esi)
-	movl	%eax, %edi
-	js	.LBB169_6
-# BB#5:
-	movl	4(%esp), %edx           # 4-byte Reload
-.LBB169_6:
-	movl	%edx, 8(%esi)
-	movl	%ebp, %ecx
-	movl	72(%esp), %edx          # 4-byte Reload
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB169_8
-# BB#7:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB169_8:
-	movl	%eax, 12(%esi)
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	44(%esp), %ebp          # 4-byte Reload
-	js	.LBB169_10
-# BB#9:
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-.LBB169_10:
-	movl	48(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 16(%esi)
-	js	.LBB169_12
-# BB#11:
-	movl	16(%esp), %ebp          # 4-byte Reload
-.LBB169_12:
-	movl	%ebp, 20(%esi)
-	js	.LBB169_14
-# BB#13:
-	movl	20(%esp), %edi          # 4-byte Reload
-.LBB169_14:
-	movl	%edi, 24(%esi)
-	js	.LBB169_16
-# BB#15:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB169_16:
-	movl	%eax, 28(%esi)
-	js	.LBB169_18
-# BB#17:
-	movl	28(%esp), %edx          # 4-byte Reload
-.LBB169_18:
-	movl	%edx, 32(%esi)
-	js	.LBB169_20
-# BB#19:
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB169_20:
-	movl	%ecx, 36(%esi)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB169_22
-# BB#21:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB169_22:
-	movl	%eax, 40(%esi)
-	addl	$80, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end169:
-	.size	mcl_fp_addNF11Lbmi2, .Lfunc_end169-mcl_fp_addNF11Lbmi2
-
-	.globl	mcl_fp_sub11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub11Lbmi2,@function
-mcl_fp_sub11Lbmi2:                      # @mcl_fp_sub11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$40, %esp
-	movl	64(%esp), %ebp
-	movl	(%ebp), %ecx
-	movl	4(%ebp), %eax
-	movl	68(%esp), %edi
-	subl	(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	8(%ebp), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	12(%ebp), %ebx
-	sbbl	12(%edi), %ebx
-	movl	16(%ebp), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	20(%ebp), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	24(%ebp), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	28(%ebp), %edx
-	sbbl	28(%edi), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	32(%ebp), %ecx
-	sbbl	32(%edi), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	36(%ebp), %eax
-	sbbl	36(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	40(%ebp), %eax
-	sbbl	40(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %ebp
-	movl	16(%esp), %esi          # 4-byte Reload
-	movl	$0, %ebx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	60(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	%ebp, 12(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	%edx, 28(%ebx)
-	movl	%ecx, 32(%ebx)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%ebx)
-	movl	%ecx, %edi
-	movl	%eax, 40(%ebx)
-	je	.LBB170_2
-# BB#1:                                 # %carry
-	movl	72(%esp), %eax
-	addl	(%eax), %esi
-	movl	%esi, (%ebx)
-	movl	28(%esp), %edx          # 4-byte Reload
-	movl	%eax, %esi
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esi), %ecx
-	movl	12(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%ecx, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	%ecx, 32(%ebx)
-	movl	36(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 36(%ebx)
-	movl	40(%esi), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 40(%ebx)
-.LBB170_2:                              # %nocarry
-	addl	$40, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end170:
-	.size	mcl_fp_sub11Lbmi2, .Lfunc_end170-mcl_fp_sub11Lbmi2
-
-	.globl	mcl_fp_subNF11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF11Lbmi2,@function
-mcl_fp_subNF11Lbmi2:                    # @mcl_fp_subNF11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$64, %esp
-	movl	88(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ecx
-	movl	92(%esp), %edi
-	subl	(%edi), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%eax), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	36(%eax), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	28(%eax), %ebx
-	movl	24(%eax), %ebp
-	movl	20(%eax), %esi
-	movl	16(%eax), %edx
-	movl	12(%eax), %ecx
-	movl	8(%eax), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	sbbl	28(%edi), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	sarl	$31, %ecx
-	movl	%ecx, %edx
-	shldl	$1, %eax, %edx
-	movl	96(%esp), %ebx
-	movl	4(%ebx), %eax
-	andl	%edx, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	andl	(%ebx), %edx
-	movl	40(%ebx), %eax
-	andl	%ecx, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	36(%ebx), %eax
-	andl	%ecx, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	32(%ebx), %eax
-	andl	%ecx, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	28(%ebx), %eax
-	andl	%ecx, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	24(%ebx), %ebp
-	andl	%ecx, %ebp
-	rorxl	$31, %ecx, %eax
-	andl	20(%ebx), %ecx
-	movl	16(%ebx), %edi
-	andl	%eax, %edi
-	movl	12(%ebx), %esi
-	andl	%eax, %esi
-	andl	8(%ebx), %eax
-	addl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	48(%esp), %ebx          # 4-byte Reload
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebx
-	movl	%edx, (%ebx)
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	movl	%edx, 4(%ebx)
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, 8(%ebx)
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 12(%ebx)
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 16(%ebx)
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ecx, 20(%ebx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 24(%ebx)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	%eax, 36(%ebx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%ebx)
-	addl	$64, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end171:
-	.size	mcl_fp_subNF11Lbmi2, .Lfunc_end171-mcl_fp_subNF11Lbmi2
-
-	.globl	mcl_fpDbl_add11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add11Lbmi2,@function
-mcl_fpDbl_add11Lbmi2:                   # @mcl_fpDbl_add11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$80, %esp
-	movl	108(%esp), %ecx
-	movl	104(%esp), %edi
-	movl	12(%edi), %esi
-	movl	16(%edi), %edx
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%edi), %ebp
-	movl	100(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%edi), %ebp
-	adcl	8(%edi), %ebx
-	adcl	12(%ecx), %esi
-	adcl	16(%ecx), %edx
-	movl	%ebp, 4(%eax)
-	movl	52(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%esi, 12(%eax)
-	movl	20(%edi), %esi
-	adcl	%ebx, %esi
-	movl	24(%ecx), %ebx
-	movl	%edx, 16(%eax)
-	movl	24(%edi), %edx
-	adcl	%ebx, %edx
-	movl	28(%ecx), %ebx
-	movl	%esi, 20(%eax)
-	movl	28(%edi), %esi
-	adcl	%ebx, %esi
-	movl	32(%ecx), %ebx
-	movl	%edx, 24(%eax)
-	movl	32(%edi), %edx
-	adcl	%ebx, %edx
-	movl	36(%ecx), %ebx
-	movl	%esi, 28(%eax)
-	movl	36(%edi), %esi
-	adcl	%ebx, %esi
-	movl	40(%ecx), %ebx
-	movl	%edx, 32(%eax)
-	movl	40(%edi), %edx
-	adcl	%ebx, %edx
-	movl	44(%ecx), %ebx
-	movl	%esi, 36(%eax)
-	movl	44(%edi), %esi
-	adcl	%ebx, %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	48(%ecx), %esi
-	movl	%edx, 40(%eax)
-	movl	48(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	52(%edi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	56(%ecx), %edx
-	movl	56(%edi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%ecx), %edx
-	movl	60(%edi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%ecx), %edx
-	movl	64(%edi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%ecx), %eax
-	movl	68(%edi), %edx
-	adcl	%eax, %edx
-	movl	72(%ecx), %esi
-	movl	72(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	76(%ecx), %ebx
-	movl	76(%edi), %esi
-	adcl	%ebx, %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	80(%ecx), %ebp
-	movl	80(%edi), %ebx
-	adcl	%ebp, %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	84(%ecx), %ecx
-	movl	84(%edi), %edi
-	adcl	%ecx, %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	112(%esp), %ebp
-	movl	68(%esp), %edi          # 4-byte Reload
-	subl	(%ebp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	sbbl	4(%ebp), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	sbbl	8(%ebp), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	sbbl	12(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ebp), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%edx, %edi
-	sbbl	24(%ebp), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	sbbl	28(%ebp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	32(%ebp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	40(%esp), %ebx          # 4-byte Reload
-	sbbl	36(%ebp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %edi
-	sbbl	40(%ebp), %edi
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB172_2
-# BB#1:
-	movl	%edi, %ebx
-.LBB172_2:
-	testb	%cl, %cl
-	movl	68(%esp), %ecx          # 4-byte Reload
-	movl	64(%esp), %esi          # 4-byte Reload
-	movl	60(%esp), %edi          # 4-byte Reload
-	movl	56(%esp), %ebp          # 4-byte Reload
-	jne	.LBB172_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB172_4:
-	movl	100(%esp), %eax
-	movl	%ecx, 44(%eax)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	%ebp, 56(%eax)
-	movl	%edi, 60(%eax)
-	movl	%esi, 64(%eax)
-	movl	%edx, 68(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	44(%esp), %edx          # 4-byte Reload
-	jne	.LBB172_6
-# BB#5:
-	movl	28(%esp), %edx          # 4-byte Reload
-.LBB172_6:
-	movl	%edx, 72(%eax)
-	movl	48(%esp), %edx          # 4-byte Reload
-	jne	.LBB172_8
-# BB#7:
-	movl	32(%esp), %edx          # 4-byte Reload
-.LBB172_8:
-	movl	%edx, 76(%eax)
-	jne	.LBB172_10
-# BB#9:
-	movl	36(%esp), %ecx          # 4-byte Reload
-.LBB172_10:
-	movl	%ecx, 80(%eax)
-	movl	%ebx, 84(%eax)
-	addl	$80, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end172:
-	.size	mcl_fpDbl_add11Lbmi2, .Lfunc_end172-mcl_fpDbl_add11Lbmi2
-
-	.globl	mcl_fpDbl_sub11Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub11Lbmi2,@function
-mcl_fpDbl_sub11Lbmi2:                   # @mcl_fpDbl_sub11Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$72, %esp
-	movl	96(%esp), %edx
-	movl	(%edx), %eax
-	movl	4(%edx), %esi
-	movl	100(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %esi
-	movl	8(%edx), %edi
-	sbbl	8(%ebp), %edi
-	movl	92(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%edx), %eax
-	sbbl	12(%ebp), %eax
-	movl	%esi, 4(%ecx)
-	movl	16(%edx), %esi
-	sbbl	16(%ebp), %esi
-	movl	%edi, 8(%ecx)
-	movl	20(%ebp), %edi
-	movl	%eax, 12(%ecx)
-	movl	20(%edx), %eax
-	sbbl	%edi, %eax
-	movl	24(%ebp), %edi
-	movl	%esi, 16(%ecx)
-	movl	24(%edx), %esi
-	sbbl	%edi, %esi
-	movl	28(%ebp), %edi
-	movl	%eax, 20(%ecx)
-	movl	28(%edx), %eax
-	sbbl	%edi, %eax
-	movl	32(%ebp), %edi
-	movl	%esi, 24(%ecx)
-	movl	32(%edx), %esi
-	sbbl	%edi, %esi
-	movl	36(%ebp), %edi
-	movl	%eax, 28(%ecx)
-	movl	36(%edx), %eax
-	sbbl	%edi, %eax
-	movl	40(%ebp), %edi
-	movl	%esi, 32(%ecx)
-	movl	40(%edx), %esi
-	sbbl	%edi, %esi
-	movl	44(%ebp), %edi
-	movl	%eax, 36(%ecx)
-	movl	44(%edx), %eax
-	sbbl	%edi, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%ebp), %eax
-	movl	%esi, 40(%ecx)
-	movl	48(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	52(%ebp), %eax
-	movl	52(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	56(%ebp), %eax
-	movl	56(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	60(%ebp), %eax
-	movl	60(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	64(%ebp), %eax
-	movl	64(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	68(%ebp), %eax
-	movl	68(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	72(%ebp), %eax
-	movl	72(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	76(%ebp), %eax
-	movl	76(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	80(%ebp), %eax
-	movl	80(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	84(%ebp), %eax
-	movl	84(%edx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	104(%esp), %ebp
-	jne	.LBB173_1
-# BB#2:
-	movl	$0, 28(%esp)            # 4-byte Folded Spill
-	jmp	.LBB173_3
-.LBB173_1:
-	movl	40(%ebp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-.LBB173_3:
-	testb	%al, %al
-	jne	.LBB173_4
-# BB#5:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB173_6
-.LBB173_4:
-	movl	(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	4(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB173_6:
-	jne	.LBB173_7
-# BB#8:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB173_9
-.LBB173_7:
-	movl	36(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB173_9:
-	jne	.LBB173_10
-# BB#11:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB173_12
-.LBB173_10:
-	movl	32(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB173_12:
-	jne	.LBB173_13
-# BB#14:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB173_15
-.LBB173_13:
-	movl	28(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB173_15:
-	jne	.LBB173_16
-# BB#17:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB173_18
-.LBB173_16:
-	movl	24(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB173_18:
-	jne	.LBB173_19
-# BB#20:
-	movl	$0, %edx
-	jmp	.LBB173_21
-.LBB173_19:
-	movl	20(%ebp), %edx
-.LBB173_21:
-	jne	.LBB173_22
-# BB#23:
-	movl	$0, %edi
-	jmp	.LBB173_24
-.LBB173_22:
-	movl	16(%ebp), %edi
-.LBB173_24:
-	jne	.LBB173_25
-# BB#26:
-	movl	$0, %ebx
-	jmp	.LBB173_27
-.LBB173_25:
-	movl	12(%ebp), %ebx
-.LBB173_27:
-	jne	.LBB173_28
-# BB#29:
-	xorl	%ebp, %ebp
-	jmp	.LBB173_30
-.LBB173_28:
-	movl	8(%ebp), %ebp
-.LBB173_30:
-	movl	8(%esp), %esi           # 4-byte Reload
-	addl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 44(%ecx)
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 48(%ecx)
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 52(%ecx)
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 56(%ecx)
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, 60(%ecx)
-	movl	(%esp), %esi            # 4-byte Reload
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 64(%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 68(%ecx)
-	movl	12(%esp), %edx          # 4-byte Reload
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 72(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 76(%ecx)
-	movl	%eax, 80(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%ecx)
-	addl	$72, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end173:
-	.size	mcl_fpDbl_sub11Lbmi2, .Lfunc_end173-mcl_fpDbl_sub11Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv384x32,@function
-.LmulPv384x32:                          # @mulPv384x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$36, %esp
-	movl	%edx, %eax
-	movl	56(%esp), %edx
-	mulxl	4(%eax), %ebx, %edi
-	mulxl	(%eax), %esi, %ebp
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	addl	%ebx, %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	mulxl	8(%eax), %ebx, %esi
-	adcl	%edi, %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	mulxl	12(%eax), %edi, %ebx
-	adcl	%esi, %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	mulxl	16(%eax), %esi, %edi
-	adcl	%ebx, %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	mulxl	20(%eax), %esi, %ebx
-	adcl	%edi, %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	mulxl	24(%eax), %esi, %edi
-	adcl	%ebx, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	mulxl	28(%eax), %ebx, %esi
-	adcl	%edi, %ebx
-	mulxl	32(%eax), %edi, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	%esi, %edi
-	mulxl	36(%eax), %esi, %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	mulxl	40(%eax), %edx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	32(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ecx)
-	movl	28(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ecx)
-	movl	24(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%ecx)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%ecx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%ecx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%ecx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 24(%ecx)
-	movl	%ebx, 28(%ecx)
-	movl	%edi, 32(%ecx)
-	movl	%esi, 36(%ecx)
-	movl	%edx, 40(%ecx)
-	movl	56(%esp), %edx
-	mulxl	44(%eax), %eax, %edx
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	adcl	$0, %edx
-	movl	%edx, 48(%ecx)
-	movl	%ecx, %eax
-	addl	$36, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end174:
-	.size	.LmulPv384x32, .Lfunc_end174-.LmulPv384x32
-
-	.globl	mcl_fp_mulUnitPre12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre12Lbmi2,@function
-mcl_fp_mulUnitPre12Lbmi2:               # @mcl_fp_mulUnitPre12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$92, %esp
-	calll	.L175$pb
-.L175$pb:
-	popl	%ebx
-.Ltmp26:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp26-.L175$pb), %ebx
-	movl	120(%esp), %eax
-	movl	%eax, (%esp)
-	leal	40(%esp), %ecx
-	movl	116(%esp), %edx
-	calll	.LmulPv384x32
-	movl	88(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp
-	movl	56(%esp), %ebx
-	movl	52(%esp), %edi
-	movl	48(%esp), %esi
-	movl	40(%esp), %edx
-	movl	44(%esp), %ecx
-	movl	112(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	addl	$92, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end175:
-	.size	mcl_fp_mulUnitPre12Lbmi2, .Lfunc_end175-mcl_fp_mulUnitPre12Lbmi2
-
-	.globl	mcl_fpDbl_mulPre12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre12Lbmi2,@function
-mcl_fpDbl_mulPre12Lbmi2:                # @mcl_fpDbl_mulPre12Lbmi2
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$220, %esp
-	calll	.L176$pb
-.L176$pb:
-	popl	%ebx
-.Ltmp27:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp27-.L176$pb), %ebx
-	movl	%ebx, -164(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	%esi, 8(%esp)
-	movl	12(%ebp), %edi
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
-	leal	24(%esi), %eax
-	movl	%eax, 8(%esp)
-	leal	24(%edi), %eax
-	movl	%eax, 4(%esp)
-	movl	8(%ebp), %eax
-	leal	48(%eax), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
-	movl	40(%edi), %ebx
-	movl	36(%edi), %eax
-	movl	32(%edi), %edx
-	movl	(%edi), %esi
-	movl	4(%edi), %ecx
-	addl	24(%edi), %esi
-	adcl	28(%edi), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	adcl	8(%edi), %edx
-	movl	%edx, -188(%ebp)        # 4-byte Spill
-	adcl	12(%edi), %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	adcl	16(%edi), %ebx
-	movl	%ebx, -180(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -112(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %edi
-	movl	(%edi), %eax
-	addl	24(%edi), %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	movl	4(%edi), %eax
-	adcl	28(%edi), %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-	movl	32(%edi), %eax
-	adcl	8(%edi), %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	movl	36(%edi), %eax
-	adcl	12(%edi), %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	movl	40(%edi), %ecx
-	adcl	16(%edi), %ecx
-	movl	44(%edi), %eax
-	adcl	20(%edi), %eax
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -184(%ebp)        # 4-byte Spill
-	movl	%ebx, %edi
-	movl	%edx, -156(%ebp)        # 4-byte Spill
-	movl	%esi, -160(%ebp)        # 4-byte Spill
-	movl	%esi, %edx
-	jb	.LBB176_2
-# BB#1:
-	xorl	%edi, %edi
-	movl	$0, -156(%ebp)          # 4-byte Folded Spill
-	movl	$0, -160(%ebp)          # 4-byte Folded Spill
-.LBB176_2:
-	movl	%edi, -176(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %esi
-	movl	44(%esi), %edi
-	movl	-112(%ebp), %ebx        # 4-byte Reload
-	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	20(%esi), %edi
-	movl	%edi, -132(%ebp)        # 4-byte Spill
-	movl	%eax, -124(%ebp)        # 4-byte Spill
-	movl	%ecx, -112(%ebp)        # 4-byte Spill
-	movl	-148(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -116(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -120(%ebp)        # 4-byte Spill
-	movl	-140(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -128(%ebp)        # 4-byte Spill
-	movl	-136(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -152(%ebp)        # 4-byte Spill
-	jb	.LBB176_4
-# BB#3:
-	movl	$0, -124(%ebp)          # 4-byte Folded Spill
-	movl	$0, -112(%ebp)          # 4-byte Folded Spill
-	movl	$0, -116(%ebp)          # 4-byte Folded Spill
-	movl	$0, -120(%ebp)          # 4-byte Folded Spill
-	movl	$0, -128(%ebp)          # 4-byte Folded Spill
-	movl	$0, -152(%ebp)          # 4-byte Folded Spill
-.LBB176_4:
-	movl	%edx, -84(%ebp)
-	movl	-172(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -80(%ebp)
-	movl	-188(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -76(%ebp)
-	movl	-168(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -72(%ebp)
-	movl	-180(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -68(%ebp)
-	movl	-136(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -108(%ebp)
-	movl	-140(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -104(%ebp)
-	movl	-144(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -100(%ebp)
-	movl	-148(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -96(%ebp)
-	movl	%ecx, -92(%ebp)
-	movl	%eax, -88(%ebp)
-	movl	%edi, %ebx
-	sbbl	%edx, %edx
-	movl	-132(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -64(%ebp)
-	movl	-184(%ebp), %ecx        # 4-byte Reload
-	pushl	%eax
-	movl	%ecx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB176_6
-# BB#5:
-	movl	$0, %eax
-	movl	$0, %ebx
-	movl	$0, %esi
-.LBB176_6:
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-	sbbl	%eax, %eax
-	leal	-108(%ebp), %ecx
-	movl	%ecx, 8(%esp)
-	leal	-84(%ebp), %ecx
-	movl	%ecx, 4(%esp)
-	leal	-60(%ebp), %ecx
-	movl	%ecx, (%esp)
-	andl	%eax, %edx
-	movl	-152(%ebp), %edi        # 4-byte Reload
-	addl	-160(%ebp), %edi        # 4-byte Folded Reload
-	adcl	%esi, -128(%ebp)        # 4-byte Folded Spill
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -120(%ebp)        # 4-byte Folded Spill
-	adcl	%ebx, -116(%ebp)        # 4-byte Folded Spill
-	movl	-176(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -112(%ebp)        # 4-byte Folded Spill
-	movl	-132(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -124(%ebp)        # 4-byte Folded Spill
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	andl	$1, %edx
-	movl	%edx, -132(%ebp)        # 4-byte Spill
-	movl	-164(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
-	addl	-36(%ebp), %edi
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	adcl	-32(%ebp), %eax
-	movl	%eax, -128(%ebp)        # 4-byte Spill
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -120(%ebp)        # 4-byte Spill
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -116(%ebp)        # 4-byte Spill
-	movl	-112(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -112(%ebp)        # 4-byte Spill
-	movl	-124(%ebp), %eax        # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	movl	%eax, -124(%ebp)        # 4-byte Spill
-	adcl	%esi, -132(%ebp)        # 4-byte Folded Spill
-	movl	-60(%ebp), %ecx
-	movl	8(%ebp), %eax
-	subl	(%eax), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	movl	-56(%ebp), %esi
-	sbbl	4(%eax), %esi
-	movl	-52(%ebp), %ecx
-	sbbl	8(%eax), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	movl	-48(%ebp), %edx
-	sbbl	12(%eax), %edx
-	movl	-44(%ebp), %ebx
-	sbbl	16(%eax), %ebx
-	movl	-40(%ebp), %ecx
-	sbbl	20(%eax), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, -148(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	28(%eax), %ecx
-	movl	%ecx, -152(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -128(%ebp)        # 4-byte Folded Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, -156(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -120(%ebp)        # 4-byte Folded Spill
-	movl	36(%eax), %ecx
-	movl	%ecx, -160(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -116(%ebp)        # 4-byte Folded Spill
-	movl	40(%eax), %ecx
-	movl	%ecx, -164(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -112(%ebp)        # 4-byte Folded Spill
-	movl	44(%eax), %ecx
-	movl	%ecx, -168(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -124(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, -132(%ebp)          # 4-byte Folded Spill
-	movl	48(%eax), %ecx
-	movl	%ecx, -192(%ebp)        # 4-byte Spill
-	subl	%ecx, -144(%ebp)        # 4-byte Folded Spill
-	movl	52(%eax), %ecx
-	movl	%ecx, -196(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %esi
-	movl	56(%eax), %ecx
-	movl	%ecx, -200(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -136(%ebp)        # 4-byte Folded Spill
-	movl	60(%eax), %ecx
-	movl	%ecx, -204(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edx
-	movl	64(%eax), %ecx
-	movl	%ecx, -208(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
-	movl	68(%eax), %ecx
-	movl	%ecx, -212(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -140(%ebp)        # 4-byte Folded Spill
-	movl	72(%eax), %ecx
-	movl	%ecx, -216(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	76(%eax), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -128(%ebp)        # 4-byte Folded Spill
-	movl	80(%eax), %ecx
-	movl	%ecx, -176(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -120(%ebp)        # 4-byte Folded Spill
-	movl	84(%eax), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -116(%ebp)        # 4-byte Folded Spill
-	movl	88(%eax), %ecx
-	movl	%ecx, -184(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -112(%ebp)        # 4-byte Folded Spill
-	movl	92(%eax), %ecx
-	movl	%ecx, -188(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -124(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, -132(%ebp)          # 4-byte Folded Spill
-	movl	-144(%ebp), %ecx        # 4-byte Reload
-	addl	-148(%ebp), %ecx        # 4-byte Folded Reload
-	adcl	-152(%ebp), %esi        # 4-byte Folded Reload
-	movl	%ecx, 24(%eax)
-	movl	-136(%ebp), %ecx        # 4-byte Reload
-	adcl	-156(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%esi, 28(%eax)
-	adcl	-160(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 32(%eax)
-	adcl	-164(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%edx, 36(%eax)
-	movl	-140(%ebp), %ecx        # 4-byte Reload
-	adcl	-168(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%ebx, 40(%eax)
-	adcl	-192(%ebp), %edi        # 4-byte Folded Reload
-	movl	%ecx, 44(%eax)
-	movl	-128(%ebp), %ecx        # 4-byte Reload
-	adcl	-196(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edi, 48(%eax)
-	movl	-120(%ebp), %edx        # 4-byte Reload
-	adcl	-200(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 52(%eax)
-	movl	-116(%ebp), %ecx        # 4-byte Reload
-	adcl	-204(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 56(%eax)
-	movl	-112(%ebp), %edx        # 4-byte Reload
-	adcl	-208(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 60(%eax)
-	movl	-124(%ebp), %ecx        # 4-byte Reload
-	adcl	-212(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 64(%eax)
-	movl	-132(%ebp), %edx        # 4-byte Reload
-	adcl	-216(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	movl	%edx, 72(%eax)
-	movl	-172(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 76(%eax)
-	movl	-176(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 80(%eax)
-	movl	-180(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 84(%eax)
-	movl	-184(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 88(%eax)
-	movl	-188(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 92(%eax)
-	addl	$220, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end176:
-	.size	mcl_fpDbl_mulPre12Lbmi2, .Lfunc_end176-mcl_fpDbl_mulPre12Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre12Lbmi2,@function
-mcl_fpDbl_sqrPre12Lbmi2:                # @mcl_fpDbl_sqrPre12Lbmi2
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$220, %esp
-	calll	.L177$pb
-.L177$pb:
-	popl	%ebx
-.Ltmp28:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp28-.L177$pb), %ebx
-	movl	%ebx, -152(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %esi
-	movl	%esi, (%esp)
-	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
-	leal	24(%edi), %eax
-	movl	%eax, 8(%esp)
-	movl	%eax, 4(%esp)
-	leal	48(%esi), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
-	movl	44(%edi), %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	movl	40(%edi), %edx
-	movl	36(%edi), %eax
-	movl	(%edi), %ebx
-	movl	4(%edi), %esi
-	addl	24(%edi), %ebx
-	adcl	28(%edi), %esi
-	movl	32(%edi), %ecx
-	adcl	8(%edi), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	adcl	12(%edi), %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-	adcl	16(%edi), %edx
-	movl	%edx, %ecx
-	movl	-136(%ebp), %eax        # 4-byte Reload
-	adcl	20(%edi), %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %edx
-	movl	%edx, -156(%ebp)        # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edx
-	popl	%eax
-	movl	%edx, -124(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -120(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %edx
-	sbbl	%edi, %edi
-	movl	%edi, -148(%ebp)        # 4-byte Spill
-	movl	%ebx, %edi
-	addl	%edi, %edi
-	movl	%edi, -112(%ebp)        # 4-byte Spill
-	movl	%esi, %edi
-	movl	%esi, %eax
-	adcl	%edi, %edi
-	movl	%edi, -132(%ebp)        # 4-byte Spill
-	pushl	%eax
-	movl	%edx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB177_2
-# BB#1:
-	movl	$0, -132(%ebp)          # 4-byte Folded Spill
-	movl	$0, -112(%ebp)          # 4-byte Folded Spill
-.LBB177_2:
-	movl	-144(%ebp), %esi        # 4-byte Reload
-	addl	%esi, %esi
-	movl	-140(%ebp), %edx        # 4-byte Reload
-	adcl	%edx, %edx
-	movl	%edx, -116(%ebp)        # 4-byte Spill
-	movl	-120(%ebp), %edx        # 4-byte Reload
-	pushl	%eax
-	movl	%edx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB177_3
-# BB#4:
-	movl	$0, -116(%ebp)          # 4-byte Folded Spill
-	movl	$0, -120(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB177_5
-.LBB177_3:
-	movl	%eax, %edx
-	shrl	$31, %edx
-	orl	%esi, %edx
-	movl	%edx, -120(%ebp)        # 4-byte Spill
-.LBB177_5:
-	movl	-136(%ebp), %edx        # 4-byte Reload
-	movl	%ecx, %esi
-	addl	%esi, %esi
-	adcl	%edx, %edx
-	movl	-124(%ebp), %edi        # 4-byte Reload
-	pushl	%eax
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB177_6
-# BB#7:
-	xorl	%edx, %edx
-	movl	$0, -128(%ebp)          # 4-byte Folded Spill
-	movl	-140(%ebp), %edi        # 4-byte Reload
-	jmp	.LBB177_8
-.LBB177_6:
-	movl	%ecx, -124(%ebp)        # 4-byte Spill
-	movl	-140(%ebp), %edi        # 4-byte Reload
-	movl	%edi, %ecx
-	shrl	$31, %ecx
-	orl	%esi, %ecx
-	movl	%ecx, -128(%ebp)        # 4-byte Spill
-	movl	-124(%ebp), %ecx        # 4-byte Reload
-.LBB177_8:
-	movl	%edx, -124(%ebp)        # 4-byte Spill
-	movl	%ebx, -84(%ebp)
-	movl	%eax, -80(%ebp)
-	movl	-144(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -76(%ebp)
-	movl	%edi, -72(%ebp)
-	movl	%ecx, -68(%ebp)
-	movl	-136(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -64(%ebp)
-	movl	%ebx, -108(%ebp)
-	movl	%eax, -104(%ebp)
-	movl	%esi, -100(%ebp)
-	movl	%edi, -96(%ebp)
-	movl	%ecx, -92(%ebp)
-	movl	%edx, -88(%ebp)
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB177_9
-# BB#10:
-	movl	$0, -136(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB177_11
-.LBB177_9:
-	shrl	$31, %edx
-	movl	%edx, -136(%ebp)        # 4-byte Spill
-.LBB177_11:
-	leal	-108(%ebp), %eax
-	movl	%eax, 8(%esp)
-	leal	-84(%ebp), %eax
-	movl	%eax, 4(%esp)
-	leal	-60(%ebp), %eax
-	movl	%eax, (%esp)
-	movl	-148(%ebp), %esi        # 4-byte Reload
-	andl	$1, %esi
-	movl	-152(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre6Lbmi2@PLT
-	movl	-112(%ebp), %eax        # 4-byte Reload
-	addl	-36(%ebp), %eax
-	movl	%eax, -112(%ebp)        # 4-byte Spill
-	movl	-132(%ebp), %edi        # 4-byte Reload
-	adcl	-32(%ebp), %edi
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -120(%ebp)        # 4-byte Spill
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -116(%ebp)        # 4-byte Spill
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -128(%ebp)        # 4-byte Spill
-	movl	-124(%ebp), %eax        # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	movl	%eax, -124(%ebp)        # 4-byte Spill
-	adcl	-136(%ebp), %esi        # 4-byte Folded Reload
-	movl	-60(%ebp), %edx
-	movl	8(%ebp), %eax
-	subl	(%eax), %edx
-	movl	-56(%ebp), %ebx
-	sbbl	4(%eax), %ebx
-	movl	-52(%ebp), %ecx
-	sbbl	8(%eax), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	movl	-48(%ebp), %ecx
-	sbbl	12(%eax), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	movl	-44(%ebp), %ecx
-	sbbl	16(%eax), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	movl	-40(%ebp), %ecx
-	sbbl	20(%eax), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, -148(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -112(%ebp)        # 4-byte Folded Spill
-	movl	28(%eax), %ecx
-	movl	%ecx, -152(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	%edi, -132(%ebp)        # 4-byte Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, -156(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -120(%ebp)        # 4-byte Folded Spill
-	movl	36(%eax), %ecx
-	movl	%ecx, -160(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -116(%ebp)        # 4-byte Folded Spill
-	movl	40(%eax), %ecx
-	movl	%ecx, -164(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -128(%ebp)        # 4-byte Folded Spill
-	movl	44(%eax), %ecx
-	movl	%ecx, -168(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -124(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %esi
-	movl	48(%eax), %ecx
-	movl	%ecx, -192(%ebp)        # 4-byte Spill
-	subl	%ecx, %edx
-	movl	52(%eax), %ecx
-	movl	%ecx, -196(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
-	movl	56(%eax), %ecx
-	movl	%ecx, -200(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -136(%ebp)        # 4-byte Folded Spill
-	movl	60(%eax), %ecx
-	movl	%ecx, -204(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -144(%ebp)        # 4-byte Folded Spill
-	movl	64(%eax), %ecx
-	movl	%ecx, -208(%ebp)        # 4-byte Spill
-	movl	-172(%ebp), %edi        # 4-byte Reload
-	sbbl	%ecx, %edi
-	movl	68(%eax), %ecx
-	movl	%ecx, -212(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -140(%ebp)        # 4-byte Folded Spill
-	movl	72(%eax), %ecx
-	movl	%ecx, -216(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -112(%ebp)        # 4-byte Folded Spill
-	movl	76(%eax), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -132(%ebp)        # 4-byte Folded Spill
-	movl	80(%eax), %ecx
-	movl	%ecx, -176(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -120(%ebp)        # 4-byte Folded Spill
-	movl	84(%eax), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -116(%ebp)        # 4-byte Folded Spill
-	movl	88(%eax), %ecx
-	movl	%ecx, -184(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -128(%ebp)        # 4-byte Folded Spill
-	movl	92(%eax), %ecx
-	movl	%ecx, -188(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -124(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %esi
-	addl	-148(%ebp), %edx        # 4-byte Folded Reload
-	adcl	-152(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%edx, 24(%eax)
-	movl	-136(%ebp), %ecx        # 4-byte Reload
-	adcl	-156(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%ebx, 28(%eax)
-	movl	-144(%ebp), %edx        # 4-byte Reload
-	adcl	-160(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 32(%eax)
-	adcl	-164(%ebp), %edi        # 4-byte Folded Reload
-	movl	%edx, 36(%eax)
-	movl	-140(%ebp), %edx        # 4-byte Reload
-	adcl	-168(%ebp), %edx        # 4-byte Folded Reload
-	movl	%edi, 40(%eax)
-	movl	-112(%ebp), %ecx        # 4-byte Reload
-	adcl	-192(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 44(%eax)
-	movl	-132(%ebp), %edi        # 4-byte Reload
-	adcl	-196(%ebp), %edi        # 4-byte Folded Reload
-	movl	%ecx, 48(%eax)
-	movl	-120(%ebp), %edx        # 4-byte Reload
-	adcl	-200(%ebp), %edx        # 4-byte Folded Reload
-	movl	%edi, 52(%eax)
-	movl	-116(%ebp), %ecx        # 4-byte Reload
-	adcl	-204(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 56(%eax)
-	movl	-128(%ebp), %edx        # 4-byte Reload
-	adcl	-208(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 60(%eax)
-	movl	-124(%ebp), %ecx        # 4-byte Reload
-	adcl	-212(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 64(%eax)
-	adcl	-216(%ebp), %esi        # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	movl	%esi, 72(%eax)
-	movl	-172(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 76(%eax)
-	movl	-176(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 80(%eax)
-	movl	-180(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 84(%eax)
-	movl	-184(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 88(%eax)
-	movl	-188(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 92(%eax)
-	addl	$220, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end177:
-	.size	mcl_fpDbl_sqrPre12Lbmi2, .Lfunc_end177-mcl_fpDbl_sqrPre12Lbmi2
-
-	.globl	mcl_fp_mont12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont12Lbmi2,@function
-mcl_fp_mont12Lbmi2:                     # @mcl_fp_mont12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1436, %esp             # imm = 0x59C
-	calll	.L178$pb
-.L178$pb:
-	popl	%ebx
-.Ltmp29:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp29-.L178$pb), %ebx
-	movl	1468(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1384(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	1384(%esp), %ebp
-	movl	1388(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	1432(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	1428(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	1424(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	1420(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1416(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1412(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1408(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1404(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1400(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1396(%esp), %edi
-	movl	1392(%esp), %esi
-	movl	%eax, (%esp)
-	leal	1328(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	addl	1328(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1336(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	1340(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1360(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1372(%esp), %esi
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1376(%esp), %ebp
-	sbbl	%edi, %edi
-	movl	1464(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1272(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %edi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	1272(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1312(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	1316(%esp), %ebp
-	adcl	1320(%esp), %edi
-	sbbl	%eax, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1216(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	movl	84(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1216(%esp), %esi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1220(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	1224(%esp), %esi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1228(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1232(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1236(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1240(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1244(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1248(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1252(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1256(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	1260(%esp), %ebp
-	adcl	1264(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1160(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	1160(%esp), %ecx
-	adcl	1164(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1188(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	1200(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	adcl	1204(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1208(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1104(%esp), %ecx
-	movl	1468(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	1104(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1140(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1148(%esp), %edi
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	1152(%esp), %ebp
-	adcl	$0, %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1048(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	1048(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1080(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1084(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1088(%esp), %edi
-	adcl	1092(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	addl	992(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	996(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1000(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	1004(%esp), %ebp
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1008(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1012(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1016(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1020(%esp), %esi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1024(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1028(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1032(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1036(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1040(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	adcl	$0, %edi
-	movl	1464(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	936(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	936(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	944(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	948(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	960(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	980(%esp), %esi
-	adcl	984(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	880(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	880(%esp), %eax
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	892(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	912(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	924(%esp), %esi
-	movl	%esi, %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	824(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	824(%esp), %ecx
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	840(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	852(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	864(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	768(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	768(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	780(%esp), %ebp
-	adcl	784(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	800(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	808(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	1460(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	movl	44(%esp), %eax          # 4-byte Reload
-	addl	712(%esp), %eax
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	716(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	720(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	724(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	728(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	732(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	736(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	740(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	744(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	748(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	752(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	756(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	760(%esp), %edi
-	sbbl	%ebp, %ebp
-	movl	%eax, %esi
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	656(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	656(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	660(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	664(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	668(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	672(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	676(%esp), %ebp
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	680(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	684(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	688(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	696(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	704(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	adcl	$0, %edi
-	movl	1464(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	600(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	600(%esp), %ecx
-	adcl	604(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	616(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	620(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	636(%esp), %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	648(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	movl	44(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	544(%esp), %edi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	548(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	552(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	556(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	560(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	564(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	568(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	572(%esp), %edi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	576(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	580(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	584(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	588(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	592(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	488(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	488(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	512(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	524(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	532(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	536(%esp), %ebp
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	432(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	432(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	440(%esp), %edi
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	444(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	480(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	376(%esp), %ecx
-	adcl	380(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	adcl	384(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	392(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	416(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	320(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	320(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	332(%esp), %esi
-	adcl	336(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	344(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	360(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	264(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	272(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	284(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	288(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	296(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	movl	88(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	208(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	224(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	232(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	236(%esp), %edi
-	adcl	240(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	248(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	152(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	152(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	164(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	176(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	188(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	196(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	96(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %esi
-	addl	96(%esp), %edi
-	movl	84(%esp), %ebx          # 4-byte Reload
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	100(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	adcl	108(%esp), %ebx
-	adcl	112(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	120(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	124(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	128(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	132(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	136(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	140(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	adcl	$0, %esi
-	movl	1468(%esp), %edx
-	subl	(%edx), %eax
-	sbbl	4(%edx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %edi
-	sbbl	8(%edx), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	sbbl	12(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	sbbl	20(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%edx), %ecx
-	movl	44(%esp), %edi          # 4-byte Reload
-	sbbl	32(%edx), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	sbbl	36(%edx), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	sbbl	40(%edx), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	sbbl	44(%edx), %ebp
-	movl	%ebp, %edx
-	sbbl	$0, %esi
-	andl	$1, %esi
-	jne	.LBB178_2
-# BB#1:
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-.LBB178_2:
-	movl	%esi, %ecx
-	testb	%cl, %cl
-	movl	92(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_4
-# BB#3:
-	movl	%eax, %ecx
-.LBB178_4:
-	movl	1456(%esp), %eax
-	movl	%ecx, (%eax)
-	movl	68(%esp), %edi          # 4-byte Reload
-	jne	.LBB178_6
-# BB#5:
-	movl	16(%esp), %edi          # 4-byte Reload
-.LBB178_6:
-	movl	%edi, 4(%eax)
-	movl	64(%esp), %ebp          # 4-byte Reload
-	jne	.LBB178_8
-# BB#7:
-	movl	20(%esp), %ebx          # 4-byte Reload
-.LBB178_8:
-	movl	%ebx, 8(%eax)
-	jne	.LBB178_10
-# BB#9:
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-.LBB178_10:
-	movl	72(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	jne	.LBB178_12
-# BB#11:
-	movl	28(%esp), %ebp          # 4-byte Reload
-.LBB178_12:
-	movl	%ebp, 16(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_14
-# BB#13:
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB178_14:
-	movl	%ecx, 20(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_16
-# BB#15:
-	movl	36(%esp), %ecx          # 4-byte Reload
-.LBB178_16:
-	movl	%ecx, 24(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_18
-# BB#17:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB178_18:
-	movl	%ecx, 32(%eax)
-	movl	60(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_20
-# BB#19:
-	movl	80(%esp), %ecx          # 4-byte Reload
-.LBB178_20:
-	movl	%ecx, 36(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_22
-# BB#21:
-	movl	84(%esp), %ecx          # 4-byte Reload
-.LBB178_22:
-	movl	%ecx, 40(%eax)
-	movl	88(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_24
-# BB#23:
-	movl	%edx, %ecx
-.LBB178_24:
-	movl	%ecx, 44(%eax)
-	addl	$1436, %esp             # imm = 0x59C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end178:
-	.size	mcl_fp_mont12Lbmi2, .Lfunc_end178-mcl_fp_mont12Lbmi2
-
-	.globl	mcl_fp_montNF12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF12Lbmi2,@function
-mcl_fp_montNF12Lbmi2:                   # @mcl_fp_montNF12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1420, %esp             # imm = 0x58C
-	calll	.L179$pb
-.L179$pb:
-	popl	%ebx
-.Ltmp30:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp30-.L179$pb), %ebx
-	movl	1452(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1368(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	1368(%esp), %ebp
-	movl	1372(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	1416(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1412(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1408(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1404(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1400(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1396(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1392(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	1388(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1384(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1380(%esp), %edi
-	movl	1376(%esp), %esi
-	movl	%eax, (%esp)
-	leal	1312(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	1312(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1320(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	adcl	1324(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1328(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	1344(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	1356(%esp), %esi
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	1360(%esp), %ebp
-	movl	1448(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1256(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	1304(%esp), %eax
-	movl	56(%esp), %edx          # 4-byte Reload
-	addl	1256(%esp), %edx
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1260(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1264(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1268(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1272(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	1276(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1280(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	1284(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1288(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	1296(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	1300(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1200(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	1200(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1204(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	1208(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1212(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1216(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1232(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1240(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1244(%esp), %ebp
-	adcl	1248(%esp), %edi
-	movl	1448(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1144(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	1192(%esp), %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	addl	1144(%esp), %edx
-	adcl	1148(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1152(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1156(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	1160(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1164(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1168(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1172(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1176(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1180(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	1184(%esp), %ebp
-	adcl	1188(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1088(%esp), %ecx
-	movl	1452(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	addl	1088(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1092(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%esi, %edi
-	adcl	1104(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	1124(%esp), %esi
-	adcl	1128(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	1136(%esp), %ebp
-	movl	1448(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1032(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	1080(%esp), %eax
-	movl	52(%esp), %edx          # 4-byte Reload
-	addl	1032(%esp), %edx
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1036(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1040(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	1044(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1048(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1052(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1056(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1060(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	1064(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1068(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1072(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	1076(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	976(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	976(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	1004(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	1012(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1024(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	920(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	968(%esp), %eax
-	movl	40(%esp), %edx          # 4-byte Reload
-	addl	920(%esp), %edx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	924(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	928(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	932(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	936(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	940(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	944(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	948(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	952(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	956(%esp), %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	960(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	964(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	864(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	864(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	884(%esp), %ebp
-	adcl	888(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	900(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	912(%esp), %edi
-	movl	1448(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	808(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	856(%esp), %edx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	808(%esp), %ecx
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	824(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	adcl	828(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	832(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	852(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	1452(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	addl	752(%esp), %esi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	760(%esp), %edi
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	764(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	776(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	792(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1448(%esp), %ecx
-	movl	%ecx, %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	1444(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	movl	744(%esp), %ecx
-	movl	32(%esp), %eax          # 4-byte Reload
-	addl	696(%esp), %eax
-	adcl	700(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	704(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	708(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	712(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	716(%esp), %esi
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	720(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	724(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	728(%esp), %edi
-	adcl	732(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	736(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	740(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	640(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	640(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	648(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	660(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	668(%esp), %esi
-	adcl	672(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	676(%esp), %edi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	584(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	632(%esp), %edx
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	584(%esp), %ecx
-	adcl	588(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	596(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	608(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	616(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	620(%esp), %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	528(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	528(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	540(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	564(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	568(%esp), %edi
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	572(%esp), %esi
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	576(%esp), %ebp
-	movl	1448(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	520(%esp), %edx
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	472(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	508(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	512(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	516(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	416(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	432(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	440(%esp), %ebp
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	444(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	360(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	408(%esp), %edx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	360(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	372(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	380(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	384(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	304(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	312(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	320(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	328(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	332(%esp), %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	296(%esp), %edx
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	248(%esp), %ecx
-	adcl	252(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	260(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%esi, %ebp
-	adcl	272(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	192(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	192(%esp), %esi
-	adcl	196(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	200(%esp), %edi
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	204(%esp), %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	216(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	224(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	136(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	184(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	136(%esp), %ecx
-	adcl	140(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	adcl	144(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	160(%esp), %edi
-	adcl	164(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	168(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	80(%esp), %ecx
-	movl	1452(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	addl	80(%esp), %esi
-	movl	56(%esp), %esi          # 4-byte Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	88(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	92(%esp), %esi
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	104(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	112(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	120(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	124(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	128(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	1452(%esp), %ebp
-	subl	(%ebp), %edx
-	movl	%ecx, %eax
-	sbbl	4(%ebp), %eax
-	movl	%esi, %ebx
-	sbbl	8(%ebp), %ebx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%ebp), %ecx
-	movl	40(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebp), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	sbbl	24(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	sbbl	28(%ebp), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	sbbl	32(%ebp), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	sbbl	36(%ebp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	sbbl	40(%ebp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	sbbl	44(%ebp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	%edi, %ebp
-	sarl	$31, %ebp
-	testl	%ebp, %ebp
-	movl	76(%esp), %ebp          # 4-byte Reload
-	js	.LBB179_2
-# BB#1:
-	movl	%edx, %ebp
-.LBB179_2:
-	movl	1440(%esp), %edx
-	movl	%ebp, (%edx)
-	movl	68(%esp), %edi          # 4-byte Reload
-	js	.LBB179_4
-# BB#3:
-	movl	%eax, %edi
-.LBB179_4:
-	movl	%edi, 4(%edx)
-	js	.LBB179_6
-# BB#5:
-	movl	%ebx, %esi
-.LBB179_6:
-	movl	%esi, 8(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB179_8
-# BB#7:
-	movl	%ecx, %eax
-.LBB179_8:
-	movl	%eax, 12(%edx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB179_10
-# BB#9:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB179_10:
-	movl	%eax, 16(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB179_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB179_12:
-	movl	%eax, 20(%edx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	js	.LBB179_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB179_14:
-	movl	%eax, 24(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB179_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB179_16:
-	movl	%eax, 28(%edx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	js	.LBB179_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB179_18:
-	movl	%eax, 32(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB179_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB179_20:
-	movl	%eax, 36(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB179_22
-# BB#21:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB179_22:
-	movl	%eax, 40(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB179_24
-# BB#23:
-	movl	56(%esp), %eax          # 4-byte Reload
-.LBB179_24:
-	movl	%eax, 44(%edx)
-	addl	$1420, %esp             # imm = 0x58C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end179:
-	.size	mcl_fp_montNF12Lbmi2, .Lfunc_end179-mcl_fp_montNF12Lbmi2
-
-	.globl	mcl_fp_montRed12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed12Lbmi2,@function
-mcl_fp_montRed12Lbmi2:                  # @mcl_fp_montRed12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$828, %esp              # imm = 0x33C
-	calll	.L180$pb
-.L180$pb:
-	popl	%eax
-.Ltmp31:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp31-.L180$pb), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	856(%esp), %edx
-	movl	-4(%edx), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	852(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 88(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	imull	%esi, %ebx
-	movl	92(%ecx), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	88(%ecx), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	84(%ecx), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	80(%ecx), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	76(%ecx), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	72(%ecx), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	68(%ecx), %edi
-	movl	%edi, 140(%esp)         # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	60(%ecx), %esi
-	movl	%esi, 148(%esp)         # 4-byte Spill
-	movl	56(%ecx), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	movl	48(%ecx), %edi
-	movl	%edi, 152(%esp)         # 4-byte Spill
-	movl	44(%ecx), %edi
-	movl	%edi, 132(%esp)         # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	32(%ecx), %edi
-	movl	28(%ecx), %esi
-	movl	24(%ecx), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	20(%ecx), %ebp
-	movl	16(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	12(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	8(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	776(%esp), %ecx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	movl	88(%esp), %eax          # 4-byte Reload
-	addl	776(%esp), %eax
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	780(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	796(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	804(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	adcl	808(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	720(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	720(%esp), %esi
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	724(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	752(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	664(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	664(%esp), %ebp
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	668(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	692(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	696(%esp), %ebp
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	712(%esp), %edi
-	movl	%edi, 136(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	movl	144(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	608(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	608(%esp), %esi
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	612(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	636(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 144(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	552(%esp), %esi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	556(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, %esi
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	496(%esp), %edi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %ebp         # 4-byte Reload
-	adcl	528(%esp), %ebp
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	532(%esp), %edi
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	440(%esp), %esi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	468(%esp), %ebp
-	movl	%ebp, 156(%esp)         # 4-byte Spill
-	adcl	472(%esp), %edi
-	movl	%edi, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %esi         # 4-byte Reload
-	adcl	476(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	856(%esp), %eax
-	movl	%eax, %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	384(%esp), %edi
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	388(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %ebp         # 4-byte Reload
-	adcl	400(%esp), %ebp
-	movl	152(%esp), %edi         # 4-byte Reload
-	adcl	404(%esp), %edi
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	adcl	416(%esp), %esi
-	movl	%esi, 148(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	140(%esp), %esi         # 4-byte Reload
-	adcl	424(%esp), %esi
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	movl	100(%esp), %eax         # 4-byte Reload
-	addl	328(%esp), %eax
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	336(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	340(%esp), %ebp
-	movl	%ebp, 132(%esp)         # 4-byte Spill
-	adcl	344(%esp), %edi
-	movl	%edi, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	348(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	352(%esp), %ecx
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	356(%esp), %ecx
-	movl	%ecx, 148(%esp)         # 4-byte Spill
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	360(%esp), %ebp
-	adcl	364(%esp), %esi
-	movl	%esi, 140(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	368(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	372(%esp), %ecx
-	movl	%ecx, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	376(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	%eax, %esi
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	272(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	280(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	284(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	288(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	292(%esp), %ecx
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	296(%esp), %ecx
-	movl	%ecx, 148(%esp)         # 4-byte Spill
-	movl	%ebp, %esi
-	adcl	300(%esp), %esi
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	304(%esp), %ecx
-	movl	%ecx, 140(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	308(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	312(%esp), %ecx
-	movl	%ecx, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	316(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	320(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, %ebp
-	movl	%eax, %edi
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	216(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	216(%esp), %edi
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	220(%esp), %ecx
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	240(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	248(%esp), %esi
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	160(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	160(%esp), %edi
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	%eax, %edi
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %edx         # 4-byte Reload
-	adcl	172(%esp), %edx
-	movl	%edx, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %ebx         # 4-byte Reload
-	adcl	176(%esp), %ebx
-	movl	%ebx, 148(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	180(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	188(%esp), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	subl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	156(%esp), %esi         # 4-byte Reload
-	sbbl	16(%esp), %esi          # 4-byte Folded Reload
-	sbbl	20(%esp), %edx          # 4-byte Folded Reload
-	sbbl	28(%esp), %ebx          # 4-byte Folded Reload
-	sbbl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	140(%esp), %eax         # 4-byte Reload
-	sbbl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	sbbl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	sbbl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	sbbl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	sbbl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	sbbl	$0, %ebp
-	andl	$1, %ebp
-	jne	.LBB180_2
-# BB#1:
-	movl	%ebx, 148(%esp)         # 4-byte Spill
-.LBB180_2:
-	movl	%ebp, %ebx
-	testb	%bl, %bl
-	movl	152(%esp), %ebx         # 4-byte Reload
-	jne	.LBB180_4
-# BB#3:
-	movl	%edi, %ebx
-.LBB180_4:
-	movl	848(%esp), %edi
-	movl	%ebx, (%edi)
-	movl	144(%esp), %ebx         # 4-byte Reload
-	jne	.LBB180_6
-# BB#5:
-	movl	%esi, 156(%esp)         # 4-byte Spill
-.LBB180_6:
-	movl	156(%esp), %esi         # 4-byte Reload
-	movl	%esi, 4(%edi)
-	movl	136(%esp), %esi         # 4-byte Reload
-	jne	.LBB180_8
-# BB#7:
-	movl	%edx, %esi
-.LBB180_8:
-	movl	%esi, 8(%edi)
-	movl	148(%esp), %edx         # 4-byte Reload
-	movl	%edx, 12(%edi)
-	movl	128(%esp), %esi         # 4-byte Reload
-	movl	116(%esp), %edx         # 4-byte Reload
-	jne	.LBB180_10
-# BB#9:
-	movl	%ecx, %edx
-.LBB180_10:
-	movl	%edx, 16(%edi)
-	movl	120(%esp), %edx         # 4-byte Reload
-	movl	140(%esp), %ecx         # 4-byte Reload
-	jne	.LBB180_12
-# BB#11:
-	movl	84(%esp), %ecx          # 4-byte Reload
-.LBB180_12:
-	movl	%ecx, 20(%edi)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	124(%esp), %eax         # 4-byte Reload
-	jne	.LBB180_14
-# BB#13:
-	movl	88(%esp), %eax          # 4-byte Reload
-.LBB180_14:
-	movl	%eax, 24(%edi)
-	movl	104(%esp), %eax         # 4-byte Reload
-	jne	.LBB180_16
-# BB#15:
-	movl	92(%esp), %ebx          # 4-byte Reload
-.LBB180_16:
-	movl	%ebx, 28(%edi)
-	jne	.LBB180_18
-# BB#17:
-	movl	96(%esp), %esi          # 4-byte Reload
-.LBB180_18:
-	movl	%esi, 32(%edi)
-	jne	.LBB180_20
-# BB#19:
-	movl	100(%esp), %edx         # 4-byte Reload
-.LBB180_20:
-	movl	%edx, 36(%edi)
-	jne	.LBB180_22
-# BB#21:
-	movl	112(%esp), %ecx         # 4-byte Reload
-.LBB180_22:
-	movl	%ecx, 40(%edi)
-	jne	.LBB180_24
-# BB#23:
-	movl	132(%esp), %eax         # 4-byte Reload
-.LBB180_24:
-	movl	%eax, 44(%edi)
-	addl	$828, %esp              # imm = 0x33C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end180:
-	.size	mcl_fp_montRed12Lbmi2, .Lfunc_end180-mcl_fp_montRed12Lbmi2
-
-	.globl	mcl_fp_addPre12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre12Lbmi2,@function
-mcl_fp_addPre12Lbmi2:                   # @mcl_fp_addPre12Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ebx)
-	movl	32(%ecx), %esi
-	adcl	%edi, %esi
-	movl	36(%eax), %edi
-	movl	%edx, 28(%ebx)
-	movl	36(%ecx), %edx
-	adcl	%edi, %edx
-	movl	40(%eax), %edi
-	movl	%esi, 32(%ebx)
-	movl	40(%ecx), %esi
-	adcl	%edi, %esi
-	movl	%edx, 36(%ebx)
-	movl	%esi, 40(%ebx)
-	movl	44(%eax), %eax
-	movl	44(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 44(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end181:
-	.size	mcl_fp_addPre12Lbmi2, .Lfunc_end181-mcl_fp_addPre12Lbmi2
-
-	.globl	mcl_fp_subPre12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre12Lbmi2,@function
-mcl_fp_subPre12Lbmi2:                   # @mcl_fp_subPre12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ebp)
-	movl	32(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%ebp)
-	movl	36(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	40(%edx), %ebx
-	movl	%edi, 32(%ebp)
-	movl	40(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	%esi, 36(%ebp)
-	movl	%edi, 40(%ebp)
-	movl	44(%edx), %edx
-	movl	44(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 44(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end182:
-	.size	mcl_fp_subPre12Lbmi2, .Lfunc_end182-mcl_fp_subPre12Lbmi2
-
-	.globl	mcl_fp_shr1_12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_12Lbmi2,@function
-mcl_fp_shr1_12Lbmi2:                    # @mcl_fp_shr1_12Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 40(%ecx)
-	shrl	%eax
-	movl	%eax, 44(%ecx)
-	popl	%esi
-	retl
-.Lfunc_end183:
-	.size	mcl_fp_shr1_12Lbmi2, .Lfunc_end183-mcl_fp_shr1_12Lbmi2
-
-	.globl	mcl_fp_add12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add12Lbmi2,@function
-mcl_fp_add12Lbmi2:                      # @mcl_fp_add12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$36, %esp
-	movl	64(%esp), %ebx
-	movl	(%ebx), %edx
-	movl	4(%ebx), %ecx
-	movl	60(%esp), %eax
-	addl	(%eax), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	4(%eax), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	8(%ebx), %ecx
-	adcl	8(%eax), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	12(%eax), %edx
-	movl	16(%eax), %ecx
-	adcl	12(%ebx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	16(%ebx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	20(%eax), %ecx
-	adcl	20(%ebx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	24(%eax), %ecx
-	adcl	24(%ebx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	28(%eax), %ecx
-	adcl	28(%ebx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	32(%eax), %ebp
-	adcl	32(%ebx), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	movl	36(%eax), %edi
-	adcl	36(%ebx), %edi
-	movl	40(%eax), %esi
-	adcl	40(%ebx), %esi
-	movl	44(%eax), %edx
-	adcl	44(%ebx), %edx
-	movl	56(%esp), %ebx
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, (%ebx)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%ebx)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 8(%ebx)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%ebx)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 16(%ebx)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 20(%ebx)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%ebx)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 28(%ebx)
-	movl	%ebp, 32(%ebx)
-	movl	%edi, 36(%ebx)
-	movl	%esi, 40(%ebx)
-	movl	%edx, 44(%ebx)
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	68(%esp), %ebp
-	subl	(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	sbbl	4(%ebp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	sbbl	8(%ebp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	sbbl	12(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	sbbl	16(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	sbbl	20(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	sbbl	24(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %eax           # 4-byte Reload
-	sbbl	28(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	(%esp), %eax            # 4-byte Reload
-	sbbl	32(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	36(%ebp), %edi
-	sbbl	40(%ebp), %esi
-	sbbl	44(%ebp), %edx
-	sbbl	$0, %ecx
-	testb	$1, %cl
-	jne	.LBB184_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, (%ebx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 4(%ebx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 8(%ebx)
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 16(%ebx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 20(%ebx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 24(%ebx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 28(%ebx)
-	movl	(%esp), %eax            # 4-byte Reload
-	movl	%eax, 32(%ebx)
-	movl	%edi, 36(%ebx)
-	movl	%esi, 40(%ebx)
-	movl	%edx, 44(%ebx)
-.LBB184_2:                              # %carry
-	addl	$36, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end184:
-	.size	mcl_fp_add12Lbmi2, .Lfunc_end184-mcl_fp_add12Lbmi2
-
-	.globl	mcl_fp_addNF12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF12Lbmi2,@function
-mcl_fp_addNF12Lbmi2:                    # @mcl_fp_addNF12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$88, %esp
-	movl	116(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	movl	112(%esp), %edx
-	addl	(%edx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	4(%edx), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	44(%esi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	40(%esi), %ebp
-	movl	36(%esi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	20(%esi), %ebx
-	movl	16(%esi), %edi
-	movl	12(%esi), %ecx
-	movl	8(%esi), %eax
-	adcl	8(%edx), %eax
-	adcl	12(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	16(%edx), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	20(%edx), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	24(%edx), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	28(%edx), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	32(%edx), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	36(%edx), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	adcl	40(%edx), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	44(%edx), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	120(%esp), %ebp
-	movl	60(%esp), %edx          # 4-byte Reload
-	subl	(%ebp), %edx
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	4(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%esi, %eax
-	sbbl	8(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	16(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	sbbl	20(%ebp), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	36(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %edi
-	sbbl	40(%ebp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	sbbl	44(%ebp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	%edi, %ebp
-	movl	60(%esp), %edi          # 4-byte Reload
-	sarl	$31, %ebp
-	testl	%ebp, %ebp
-	js	.LBB185_2
-# BB#1:
-	movl	%edx, %edi
-.LBB185_2:
-	movl	108(%esp), %edx
-	movl	%edi, (%edx)
-	movl	64(%esp), %edi          # 4-byte Reload
-	js	.LBB185_4
-# BB#3:
-	movl	(%esp), %edi            # 4-byte Reload
-.LBB185_4:
-	movl	%edi, 4(%edx)
-	movl	%eax, %ebp
-	js	.LBB185_6
-# BB#5:
-	movl	4(%esp), %esi           # 4-byte Reload
-.LBB185_6:
-	movl	%esi, 8(%edx)
-	movl	%ecx, %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	movl	48(%esp), %ecx          # 4-byte Reload
-	js	.LBB185_8
-# BB#7:
-	movl	8(%esp), %ecx           # 4-byte Reload
-.LBB185_8:
-	movl	%ecx, 12(%edx)
-	movl	76(%esp), %ebx          # 4-byte Reload
-	movl	84(%esp), %edi          # 4-byte Reload
-	js	.LBB185_10
-# BB#9:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB185_10:
-	movl	%eax, 16(%edx)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	js	.LBB185_12
-# BB#11:
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-.LBB185_12:
-	movl	56(%esp), %eax          # 4-byte Reload
-	movl	%eax, 20(%edx)
-	js	.LBB185_14
-# BB#13:
-	movl	20(%esp), %ebp          # 4-byte Reload
-.LBB185_14:
-	movl	%ebp, 24(%edx)
-	js	.LBB185_16
-# BB#15:
-	movl	24(%esp), %edi          # 4-byte Reload
-.LBB185_16:
-	movl	%edi, 28(%edx)
-	js	.LBB185_18
-# BB#17:
-	movl	28(%esp), %ebx          # 4-byte Reload
-.LBB185_18:
-	movl	%ebx, 32(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB185_20
-# BB#19:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB185_20:
-	movl	%eax, 36(%edx)
-	js	.LBB185_22
-# BB#21:
-	movl	36(%esp), %esi          # 4-byte Reload
-.LBB185_22:
-	movl	%esi, 40(%edx)
-	js	.LBB185_24
-# BB#23:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB185_24:
-	movl	%ecx, 44(%edx)
-	addl	$88, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end185:
-	.size	mcl_fp_addNF12Lbmi2, .Lfunc_end185-mcl_fp_addNF12Lbmi2
-
-	.globl	mcl_fp_sub12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub12Lbmi2,@function
-mcl_fp_sub12Lbmi2:                      # @mcl_fp_sub12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$40, %esp
-	movl	64(%esp), %esi
-	movl	(%esi), %ecx
-	movl	4(%esi), %eax
-	xorl	%ebx, %ebx
-	movl	68(%esp), %edi
-	subl	(%edi), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	28(%esi), %edx
-	sbbl	28(%edi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	32(%esi), %ecx
-	sbbl	32(%edi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	36(%esi), %eax
-	sbbl	36(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	40(%esi), %ebp
-	sbbl	40(%edi), %ebp
-	movl	44(%esi), %esi
-	sbbl	44(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	60(%esp), %ebx
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	%edx, 28(%ebx)
-	movl	%ecx, 32(%ebx)
-	movl	%eax, 36(%ebx)
-	movl	%ebp, 40(%ebx)
-	movl	%esi, 44(%ebx)
-	je	.LBB186_2
-# BB#1:                                 # %carry
-	movl	%esi, %edi
-	movl	72(%esp), %esi
-	movl	12(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esi), %ecx
-	movl	12(%esi), %eax
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	%eax, 36(%ebx)
-	movl	40(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 40(%ebx)
-	movl	44(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 44(%ebx)
-.LBB186_2:                              # %nocarry
-	addl	$40, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end186:
-	.size	mcl_fp_sub12Lbmi2, .Lfunc_end186-mcl_fp_sub12Lbmi2
-
-	.globl	mcl_fp_subNF12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF12Lbmi2,@function
-mcl_fp_subNF12Lbmi2:                    # @mcl_fp_subNF12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$72, %esp
-	movl	96(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %eax
-	movl	100(%esp), %edi
-	subl	(%edi), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%ecx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	28(%ecx), %ebp
-	movl	24(%ecx), %ebx
-	movl	20(%ecx), %esi
-	movl	16(%ecx), %edx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	sbbl	28(%edi), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	sarl	$31, %eax
-	movl	%eax, %edx
-	addl	%edx, %edx
-	movl	%eax, %edi
-	adcl	%edi, %edi
-	movl	%eax, %ebp
-	adcl	%ebp, %ebp
-	movl	%eax, %esi
-	adcl	%esi, %esi
-	shrl	$31, %ecx
-	orl	%edx, %ecx
-	movl	104(%esp), %edx
-	andl	12(%edx), %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	andl	8(%edx), %ebp
-	andl	4(%edx), %edi
-	andl	(%edx), %ecx
-	movl	44(%edx), %esi
-	andl	%eax, %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	40(%edx), %esi
-	andl	%eax, %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	36(%edx), %esi
-	andl	%eax, %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	32(%edx), %esi
-	andl	%eax, %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	28(%edx), %esi
-	andl	%eax, %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	24(%edx), %ebx
-	andl	%eax, %ebx
-	movl	20(%edx), %esi
-	andl	%eax, %esi
-	andl	16(%edx), %eax
-	addl	48(%esp), %ecx          # 4-byte Folded Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	92(%esp), %edx
-	movl	%ecx, (%edx)
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edi, 4(%edx)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebp, 8(%edx)
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 12(%edx)
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, 16(%edx)
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 20(%edx)
-	movl	(%esp), %ecx            # 4-byte Reload
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebx, 24(%edx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 28(%edx)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 32(%edx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 36(%edx)
-	movl	%eax, 40(%edx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%edx)
-	addl	$72, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end187:
-	.size	mcl_fp_subNF12Lbmi2, .Lfunc_end187-mcl_fp_subNF12Lbmi2
-
-	.globl	mcl_fpDbl_add12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add12Lbmi2,@function
-mcl_fpDbl_add12Lbmi2:                   # @mcl_fpDbl_add12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$88, %esp
-	movl	116(%esp), %ecx
-	movl	112(%esp), %edi
-	movl	12(%edi), %esi
-	movl	16(%edi), %edx
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%edi), %ebp
-	movl	108(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%edi), %ebp
-	adcl	8(%edi), %ebx
-	adcl	12(%ecx), %esi
-	adcl	16(%ecx), %edx
-	movl	%ebp, 4(%eax)
-	movl	56(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%esi, 12(%eax)
-	movl	20(%edi), %esi
-	adcl	%ebx, %esi
-	movl	24(%ecx), %ebx
-	movl	%edx, 16(%eax)
-	movl	24(%edi), %edx
-	adcl	%ebx, %edx
-	movl	28(%ecx), %ebx
-	movl	%esi, 20(%eax)
-	movl	28(%edi), %esi
-	adcl	%ebx, %esi
-	movl	32(%ecx), %ebx
-	movl	%edx, 24(%eax)
-	movl	32(%edi), %edx
-	adcl	%ebx, %edx
-	movl	36(%ecx), %ebx
-	movl	%esi, 28(%eax)
-	movl	36(%edi), %esi
-	adcl	%ebx, %esi
-	movl	40(%ecx), %ebx
-	movl	%edx, 32(%eax)
-	movl	40(%edi), %edx
-	adcl	%ebx, %edx
-	movl	44(%ecx), %ebx
-	movl	%esi, 36(%eax)
-	movl	44(%edi), %esi
-	adcl	%ebx, %esi
-	movl	48(%ecx), %ebx
-	movl	%edx, 40(%eax)
-	movl	48(%edi), %edx
-	adcl	%ebx, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	52(%ecx), %ebx
-	movl	%esi, 44(%eax)
-	movl	52(%edi), %eax
-	adcl	%ebx, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	56(%edi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%ecx), %eax
-	movl	60(%edi), %edx
-	adcl	%eax, %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	64(%edi), %edx
-	adcl	%eax, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	68(%ecx), %eax
-	movl	68(%edi), %edx
-	adcl	%eax, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	72(%ecx), %eax
-	movl	72(%edi), %edx
-	adcl	%eax, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	76(%ecx), %eax
-	movl	76(%edi), %edx
-	adcl	%eax, %edx
-	movl	80(%ecx), %esi
-	movl	80(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	84(%ecx), %ebx
-	movl	84(%edi), %esi
-	adcl	%ebx, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	88(%ecx), %ebp
-	movl	88(%edi), %ebx
-	adcl	%ebp, %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	92(%ecx), %ecx
-	movl	92(%edi), %edi
-	adcl	%ecx, %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	120(%esp), %ebp
-	movl	72(%esp), %edi          # 4-byte Reload
-	subl	(%ebp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	sbbl	4(%ebp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	sbbl	8(%ebp), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	sbbl	12(%ebp), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	sbbl	24(%ebp), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%edx, %edi
-	sbbl	28(%ebp), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	sbbl	32(%ebp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	sbbl	36(%ebp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	44(%esp), %ebx          # 4-byte Reload
-	sbbl	40(%ebp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %edi
-	sbbl	44(%ebp), %edi
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB188_2
-# BB#1:
-	movl	%edi, %ebx
-.LBB188_2:
-	testb	%cl, %cl
-	movl	72(%esp), %ecx          # 4-byte Reload
-	movl	68(%esp), %esi          # 4-byte Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	movl	60(%esp), %ebp          # 4-byte Reload
-	jne	.LBB188_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB188_4:
-	movl	108(%esp), %eax
-	movl	%ecx, 48(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	movl	%ebp, 64(%eax)
-	movl	%edi, 68(%eax)
-	movl	%esi, 72(%eax)
-	movl	%edx, 76(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	jne	.LBB188_6
-# BB#5:
-	movl	32(%esp), %edx          # 4-byte Reload
-.LBB188_6:
-	movl	%edx, 80(%eax)
-	movl	52(%esp), %edx          # 4-byte Reload
-	jne	.LBB188_8
-# BB#7:
-	movl	36(%esp), %edx          # 4-byte Reload
-.LBB188_8:
-	movl	%edx, 84(%eax)
-	jne	.LBB188_10
-# BB#9:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB188_10:
-	movl	%ecx, 88(%eax)
-	movl	%ebx, 92(%eax)
-	addl	$88, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end188:
-	.size	mcl_fpDbl_add12Lbmi2, .Lfunc_end188-mcl_fpDbl_add12Lbmi2
-
-	.globl	mcl_fpDbl_sub12Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub12Lbmi2,@function
-mcl_fpDbl_sub12Lbmi2:                   # @mcl_fpDbl_sub12Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$76, %esp
-	movl	100(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %edx
-	movl	104(%esp), %ebx
-	subl	(%ebx), %eax
-	sbbl	4(%ebx), %edx
-	movl	8(%esi), %edi
-	sbbl	8(%ebx), %edi
-	movl	96(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%esi), %eax
-	sbbl	12(%ebx), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%esi), %edx
-	sbbl	16(%ebx), %edx
-	movl	%edi, 8(%ecx)
-	movl	20(%ebx), %edi
-	movl	%eax, 12(%ecx)
-	movl	20(%esi), %eax
-	sbbl	%edi, %eax
-	movl	24(%ebx), %edi
-	movl	%edx, 16(%ecx)
-	movl	24(%esi), %edx
-	sbbl	%edi, %edx
-	movl	28(%ebx), %edi
-	movl	%eax, 20(%ecx)
-	movl	28(%esi), %eax
-	sbbl	%edi, %eax
-	movl	32(%ebx), %edi
-	movl	%edx, 24(%ecx)
-	movl	32(%esi), %edx
-	sbbl	%edi, %edx
-	movl	36(%ebx), %edi
-	movl	%eax, 28(%ecx)
-	movl	36(%esi), %eax
-	sbbl	%edi, %eax
-	movl	40(%ebx), %edi
-	movl	%edx, 32(%ecx)
-	movl	40(%esi), %edx
-	sbbl	%edi, %edx
-	movl	44(%ebx), %edi
-	movl	%eax, 36(%ecx)
-	movl	44(%esi), %eax
-	sbbl	%edi, %eax
-	movl	48(%ebx), %edi
-	movl	%edx, 40(%ecx)
-	movl	48(%esi), %edx
-	sbbl	%edi, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	52(%ebx), %edx
-	movl	%eax, 44(%ecx)
-	movl	52(%esi), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	56(%ebx), %eax
-	movl	56(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	60(%ebx), %eax
-	movl	60(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	64(%ebx), %eax
-	movl	64(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	68(%ebx), %eax
-	movl	68(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	72(%ebx), %eax
-	movl	72(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	76(%ebx), %eax
-	movl	76(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	80(%ebx), %eax
-	movl	80(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	84(%ebx), %eax
-	movl	84(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	88(%ebx), %eax
-	movl	88(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	92(%ebx), %eax
-	movl	92(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	108(%esp), %ebp
-	jne	.LBB189_1
-# BB#2:
-	movl	$0, 36(%esp)            # 4-byte Folded Spill
-	jmp	.LBB189_3
-.LBB189_1:
-	movl	44(%ebp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-.LBB189_3:
-	testb	%al, %al
-	jne	.LBB189_4
-# BB#5:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	movl	$0, %esi
-	jmp	.LBB189_6
-.LBB189_4:
-	movl	(%ebp), %esi
-	movl	4(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB189_6:
-	jne	.LBB189_7
-# BB#8:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB189_9
-.LBB189_7:
-	movl	40(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB189_9:
-	jne	.LBB189_10
-# BB#11:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB189_12
-.LBB189_10:
-	movl	36(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB189_12:
-	jne	.LBB189_13
-# BB#14:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB189_15
-.LBB189_13:
-	movl	32(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB189_15:
-	jne	.LBB189_16
-# BB#17:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB189_18
-.LBB189_16:
-	movl	28(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB189_18:
-	jne	.LBB189_19
-# BB#20:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB189_21
-.LBB189_19:
-	movl	24(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB189_21:
-	jne	.LBB189_22
-# BB#23:
-	movl	$0, %ebx
-	jmp	.LBB189_24
-.LBB189_22:
-	movl	20(%ebp), %ebx
-.LBB189_24:
-	jne	.LBB189_25
-# BB#26:
-	movl	$0, %eax
-	jmp	.LBB189_27
-.LBB189_25:
-	movl	16(%ebp), %eax
-.LBB189_27:
-	jne	.LBB189_28
-# BB#29:
-	movl	%ebp, %edx
-	movl	$0, %ebp
-	jmp	.LBB189_30
-.LBB189_28:
-	movl	%ebp, %edx
-	movl	12(%edx), %ebp
-.LBB189_30:
-	jne	.LBB189_31
-# BB#32:
-	xorl	%edx, %edx
-	jmp	.LBB189_33
-.LBB189_31:
-	movl	8(%edx), %edx
-.LBB189_33:
-	addl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	12(%esp), %edi          # 4-byte Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 48(%ecx)
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, 52(%ecx)
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edx, 56(%ecx)
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 60(%ecx)
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, 68(%ecx)
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 72(%ecx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 76(%ecx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 80(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 84(%ecx)
-	movl	%eax, 88(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%ecx)
-	addl	$76, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end189:
-	.size	mcl_fpDbl_sub12Lbmi2, .Lfunc_end189-mcl_fpDbl_sub12Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv416x32,@function
-.LmulPv416x32:                          # @mulPv416x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$44, %esp
-	movl	%edx, %eax
-	movl	64(%esp), %ebx
-	movl	%ebx, %edx
-	mulxl	4(%eax), %esi, %ebp
-	movl	%ebx, %edx
-	mulxl	(%eax), %edi, %edx
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	addl	%esi, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	8(%eax), %edx, %esi
-	adcl	%ebp, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	12(%eax), %edx, %edi
-	adcl	%esi, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	16(%eax), %edx, %esi
-	adcl	%edi, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	20(%eax), %edx, %edi
-	adcl	%esi, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	24(%eax), %edx, %esi
-	adcl	%edi, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	28(%eax), %edx, %edi
-	adcl	%esi, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	32(%eax), %edx, %esi
-	adcl	%edi, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	36(%eax), %edi, %ebp
-	adcl	%esi, %edi
-	movl	%ebx, %edx
-	mulxl	40(%eax), %esi, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	%ebp, %esi
-	movl	%ebx, %edx
-	mulxl	44(%eax), %edx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ecx)
-	movl	36(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ecx)
-	movl	32(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%ecx)
-	movl	28(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%ecx)
-	movl	24(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%ecx)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%ecx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%ecx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%ecx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 32(%ecx)
-	movl	%edi, 36(%ecx)
-	movl	%esi, 40(%ecx)
-	movl	%edx, 44(%ecx)
-	movl	%ebx, %edx
-	mulxl	48(%eax), %eax, %edx
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 48(%ecx)
-	adcl	$0, %edx
-	movl	%edx, 52(%ecx)
-	movl	%ecx, %eax
-	addl	$44, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end190:
-	.size	.LmulPv416x32, .Lfunc_end190-.LmulPv416x32
-
-	.globl	mcl_fp_mulUnitPre13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre13Lbmi2,@function
-mcl_fp_mulUnitPre13Lbmi2:               # @mcl_fp_mulUnitPre13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$108, %esp
-	calll	.L191$pb
-.L191$pb:
-	popl	%ebx
-.Ltmp32:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp32-.L191$pb), %ebx
-	movl	136(%esp), %eax
-	movl	%eax, (%esp)
-	leal	48(%esp), %ecx
-	movl	132(%esp), %edx
-	calll	.LmulPv416x32
-	movl	100(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp
-	movl	64(%esp), %ebx
-	movl	60(%esp), %edi
-	movl	56(%esp), %esi
-	movl	48(%esp), %edx
-	movl	52(%esp), %ecx
-	movl	128(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	addl	$108, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end191:
-	.size	mcl_fp_mulUnitPre13Lbmi2, .Lfunc_end191-mcl_fp_mulUnitPre13Lbmi2
-
-	.globl	mcl_fpDbl_mulPre13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre13Lbmi2,@function
-mcl_fpDbl_mulPre13Lbmi2:                # @mcl_fpDbl_mulPre13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$844, %esp              # imm = 0x34C
-	calll	.L192$pb
-.L192$pb:
-	popl	%edi
-.Ltmp33:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp33-.L192$pb), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	872(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	%edx, %esi
-	movl	%edi, %ebx
-	calll	.LmulPv416x32
-	movl	836(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	828(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	804(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	800(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	796(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	792(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	784(%esp), %eax
-	movl	788(%esp), %ebp
-	movl	864(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	872(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	728(%esp), %ecx
-	movl	%esi, %edx
-	movl	%edi, %ebx
-	calll	.LmulPv416x32
-	addl	728(%esp), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	780(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	776(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	772(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	764(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	760(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	748(%esp), %edi
-	movl	744(%esp), %esi
-	movl	740(%esp), %edx
-	movl	732(%esp), %eax
-	movl	736(%esp), %ecx
-	movl	864(%esp), %ebp
-	movl	24(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	672(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	672(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	724(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	720(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	716(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	712(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	708(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	704(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	700(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	696(%esp), %ebx
-	movl	692(%esp), %edi
-	movl	688(%esp), %esi
-	movl	684(%esp), %edx
-	movl	676(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	680(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	60(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	616(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	616(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	660(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	656(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	652(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	648(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	644(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	640(%esp), %ebx
-	movl	636(%esp), %edi
-	movl	632(%esp), %esi
-	movl	628(%esp), %edx
-	movl	620(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	624(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	560(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	588(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	584(%esp), %ebx
-	movl	580(%esp), %edi
-	movl	576(%esp), %esi
-	movl	572(%esp), %edx
-	movl	564(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	568(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	504(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	532(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	528(%esp), %ebx
-	movl	524(%esp), %edi
-	movl	520(%esp), %esi
-	movl	516(%esp), %edx
-	movl	508(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	512(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	448(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	448(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	492(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	472(%esp), %ebp
-	movl	468(%esp), %edi
-	movl	464(%esp), %esi
-	movl	460(%esp), %edx
-	movl	452(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	456(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 24(%eax)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	392(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	392(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	444(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	440(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	436(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	432(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	428(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	424(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	416(%esp), %ebx
-	movl	412(%esp), %edi
-	movl	408(%esp), %esi
-	movl	404(%esp), %edx
-	movl	396(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	400(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	336(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	336(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	360(%esp), %ebp
-	movl	356(%esp), %edi
-	movl	352(%esp), %esi
-	movl	348(%esp), %edx
-	movl	340(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	344(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	60(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 32(%eax)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	280(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	316(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	304(%esp), %ebx
-	movl	300(%esp), %edi
-	movl	296(%esp), %esi
-	movl	292(%esp), %edx
-	movl	284(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	288(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	224(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	272(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	248(%esp), %ebx
-	movl	244(%esp), %edi
-	movl	240(%esp), %esi
-	movl	236(%esp), %edx
-	movl	228(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	232(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %edi
-	movl	44(%edi), %eax
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	868(%esp), %eax
-	movl	%eax, %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	60(%esp), %esi          # 4-byte Reload
-	addl	168(%esp), %esi
-	movl	220(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	204(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	200(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	196(%esp), %ebp
-	movl	192(%esp), %ebx
-	movl	188(%esp), %edi
-	movl	184(%esp), %edx
-	movl	180(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	176(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	%esi, 44(%eax)
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	104(%esp), %ebp         # 4-byte Folded Reload
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	112(%esp), %esi
-	movl	%esi, %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	120(%esp), %edi
-	movl	164(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	156(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	152(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	148(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	144(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	140(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	136(%esp), %ebx
-	movl	132(%esp), %esi
-	movl	128(%esp), %edx
-	movl	124(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	%ebp, 48(%eax)
-	movl	68(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 52(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 56(%eax)
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 60(%eax)
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 64(%eax)
-	adcl	104(%esp), %ebx         # 4-byte Folded Reload
-	movl	%esi, 68(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebx, 72(%eax)
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 76(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	88(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 80(%eax)
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 84(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 88(%eax)
-	movl	%ecx, 92(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 96(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 100(%eax)
-	addl	$844, %esp              # imm = 0x34C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end192:
-	.size	mcl_fpDbl_mulPre13Lbmi2, .Lfunc_end192-mcl_fpDbl_mulPre13Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre13Lbmi2,@function
-mcl_fpDbl_sqrPre13Lbmi2:                # @mcl_fpDbl_sqrPre13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$844, %esp              # imm = 0x34C
-	calll	.L193$pb
-.L193$pb:
-	popl	%ebx
-.Ltmp34:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp34-.L193$pb), %ebx
-	movl	%ebx, 108(%esp)         # 4-byte Spill
-	movl	868(%esp), %edx
-	movl	(%edx), %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	%edx, %edi
-	movl	%ebx, %esi
-	calll	.LmulPv416x32
-	movl	836(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	828(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	804(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	800(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	796(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	792(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	784(%esp), %eax
-	movl	788(%esp), %ebp
-	movl	864(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edi, %edx
-	movl	4(%edx), %eax
-	movl	%eax, (%esp)
-	leal	728(%esp), %ecx
-	movl	%esi, %ebx
-	calll	.LmulPv416x32
-	addl	728(%esp), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	780(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	776(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	772(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	764(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	760(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	748(%esp), %edi
-	movl	744(%esp), %esi
-	movl	740(%esp), %edx
-	movl	732(%esp), %eax
-	movl	736(%esp), %ecx
-	movl	864(%esp), %ebp
-	movl	24(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	8(%edx), %eax
-	movl	%eax, (%esp)
-	leal	672(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	672(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	724(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	720(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	716(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	712(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	708(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	704(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	700(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	696(%esp), %ebx
-	movl	692(%esp), %edi
-	movl	688(%esp), %esi
-	movl	684(%esp), %edx
-	movl	676(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	680(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	60(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	12(%edx), %eax
-	movl	%eax, (%esp)
-	leal	616(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	616(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	660(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	656(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	652(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	648(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	644(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	640(%esp), %ebx
-	movl	636(%esp), %edi
-	movl	632(%esp), %esi
-	movl	628(%esp), %edx
-	movl	620(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	624(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	16(%edx), %eax
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	560(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	588(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	584(%esp), %ebx
-	movl	580(%esp), %edi
-	movl	576(%esp), %esi
-	movl	572(%esp), %edx
-	movl	564(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	568(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	20(%edx), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	504(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	532(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	528(%esp), %ebx
-	movl	524(%esp), %edi
-	movl	520(%esp), %esi
-	movl	516(%esp), %edx
-	movl	508(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	512(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	24(%edx), %eax
-	movl	%eax, (%esp)
-	leal	448(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	448(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	492(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	472(%esp), %ebp
-	movl	468(%esp), %edi
-	movl	464(%esp), %esi
-	movl	460(%esp), %edx
-	movl	452(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	456(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 24(%eax)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	28(%edx), %eax
-	movl	%eax, (%esp)
-	leal	392(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	392(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	444(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	440(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	436(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	432(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	428(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	424(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	416(%esp), %ebx
-	movl	412(%esp), %edi
-	movl	408(%esp), %esi
-	movl	404(%esp), %edx
-	movl	396(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	400(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	32(%edx), %eax
-	movl	%eax, (%esp)
-	leal	336(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	336(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	360(%esp), %ebp
-	movl	356(%esp), %edi
-	movl	352(%esp), %esi
-	movl	348(%esp), %edx
-	movl	340(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	344(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	60(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 32(%eax)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	36(%edx), %eax
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	280(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	316(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	304(%esp), %ebx
-	movl	300(%esp), %edi
-	movl	296(%esp), %esi
-	movl	292(%esp), %edx
-	movl	284(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	288(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	40(%edx), %eax
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	224(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	272(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	248(%esp), %ebx
-	movl	244(%esp), %edi
-	movl	240(%esp), %esi
-	movl	236(%esp), %edx
-	movl	228(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	232(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	44(%edx), %eax
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	60(%esp), %esi          # 4-byte Reload
-	addl	168(%esp), %esi
-	movl	220(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	204(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	200(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	196(%esp), %ebp
-	movl	192(%esp), %ebx
-	movl	188(%esp), %edi
-	movl	184(%esp), %edx
-	movl	180(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	176(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	%esi, 44(%eax)
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	104(%esp), %ebp         # 4-byte Folded Reload
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	48(%edx), %eax
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	112(%esp), %esi
-	movl	%esi, %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	120(%esp), %edi
-	movl	164(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	156(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	152(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	148(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	144(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	140(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	136(%esp), %ebx
-	movl	132(%esp), %esi
-	movl	128(%esp), %edx
-	movl	124(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	%ebp, 48(%eax)
-	movl	68(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 52(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 56(%eax)
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 60(%eax)
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 64(%eax)
-	adcl	104(%esp), %ebx         # 4-byte Folded Reload
-	movl	%esi, 68(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebx, 72(%eax)
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 76(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	88(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 80(%eax)
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 84(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 88(%eax)
-	movl	%ecx, 92(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 96(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 100(%eax)
-	addl	$844, %esp              # imm = 0x34C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end193:
-	.size	mcl_fpDbl_sqrPre13Lbmi2, .Lfunc_end193-mcl_fpDbl_sqrPre13Lbmi2
-
-	.globl	mcl_fp_mont13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont13Lbmi2,@function
-mcl_fp_mont13Lbmi2:                     # @mcl_fp_mont13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1548, %esp             # imm = 0x60C
-	calll	.L194$pb
-.L194$pb:
-	popl	%ebx
-.Ltmp35:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp35-.L194$pb), %ebx
-	movl	1580(%esp), %eax
-	movl	-4(%eax), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1488(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1488(%esp), %esi
-	movl	1492(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	imull	%edi, %eax
-	movl	1540(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	1536(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1532(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1528(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1524(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1520(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1516(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1512(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	1508(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1504(%esp), %edi
-	movl	1500(%esp), %ebp
-	movl	1496(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	1432(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	1432(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1436(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1440(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1444(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	1448(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1452(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1456(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1460(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1464(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1472(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1476(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1480(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1484(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	1576(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1376(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	1376(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1380(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1384(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1388(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1400(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	1404(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1408(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1412(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1416(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	1420(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1424(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	1428(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1320(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	1320(%esp), %ecx
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1324(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1328(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1332(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1336(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	1340(%esp), %esi
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1344(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	1348(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1352(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1356(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1360(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	1364(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1368(%esp), %ebp
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	1372(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1264(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	1264(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1272(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	1280(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	1300(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1308(%esp), %ebp
-	adcl	1312(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	1580(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv416x32
-	movl	84(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1208(%esp), %edi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1212(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1216(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1220(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	1224(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1228(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1232(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1236(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1240(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	1244(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	1248(%esp), %edi
-	adcl	1252(%esp), %ebp
-	movl	%ebp, %esi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1256(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	1260(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1152(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	1152(%esp), %ecx
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1188(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	1192(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1200(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1204(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1096(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	1096(%esp), %esi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1100(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1104(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	1108(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1112(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1116(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1120(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1124(%esp), %ebp
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	1128(%esp), %edi
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	1132(%esp), %esi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1136(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1140(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1144(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1148(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1040(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	1040(%esp), %ecx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1044(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1064(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	1068(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	1072(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1080(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1084(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1088(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1092(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	984(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %edi
-	addl	984(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	996(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1028(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	1576(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	928(%esp), %ecx
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	936(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	944(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	980(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	872(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %edi
-	addl	872(%esp), %ebp
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	884(%esp), %ebp
-	adcl	888(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	904(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	816(%esp), %ecx
-	movl	1572(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv416x32
-	movl	32(%esp), %ecx          # 4-byte Reload
-	addl	816(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	824(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	844(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	848(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	856(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	movl	32(%esp), %ecx          # 4-byte Reload
-	addl	760(%esp), %ecx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	764(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	768(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	772(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	776(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	780(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	784(%esp), %esi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	788(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	792(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	796(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	800(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	804(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	808(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	812(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	36(%esp), %eax          # 4-byte Reload
-	addl	704(%esp), %eax
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	708(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	712(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	716(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	720(%esp), %ebp
-	adcl	724(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	728(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	732(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	736(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	740(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	744(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	748(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	752(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	756(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%eax, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	648(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	648(%esp), %esi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	652(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	656(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	660(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	664(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	668(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	672(%esp), %edi
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	676(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	680(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	684(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	688(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	696(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	592(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	592(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	600(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	612(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	adcl	616(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	620(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	536(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	movl	44(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	536(%esp), %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	544(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	560(%esp), %esi
-	adcl	564(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	572(%esp), %edi
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	576(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	480(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	480(%esp), %ecx
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	500(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	512(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	516(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	movl	%edi, %ecx
-	andl	$1, %ecx
-	addl	424(%esp), %esi
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	428(%esp), %ebp
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	432(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	444(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	addl	368(%esp), %ebp
-	adcl	372(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	376(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	384(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	392(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	movl	52(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	312(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	320(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	328(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	336(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	340(%esp), %edi
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	344(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	256(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	256(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	268(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	280(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	284(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	288(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	200(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	200(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	208(%esp), %ebp
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	212(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	232(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	236(%esp), %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	144(%esp), %ecx
-	adcl	148(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	152(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	156(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	176(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	88(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %edi
-	addl	88(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	92(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	96(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	100(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	adcl	104(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebx          # 4-byte Reload
-	adcl	112(%esp), %ebx
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebx          # 4-byte Reload
-	adcl	116(%esp), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebx          # 4-byte Reload
-	adcl	120(%esp), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebx          # 4-byte Reload
-	adcl	124(%esp), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	128(%esp), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebx          # 4-byte Reload
-	adcl	132(%esp), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	136(%esp), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebx          # 4-byte Reload
-	adcl	140(%esp), %ebx
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	1580(%esp), %ebx
-	subl	(%ebx), %eax
-	sbbl	4(%ebx), %ecx
-	sbbl	8(%ebx), %ebp
-	sbbl	12(%ebx), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	sbbl	16(%ebx), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	sbbl	20(%ebx), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	sbbl	24(%ebx), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	sbbl	28(%ebx), %edx
-	movl	36(%esp), %esi          # 4-byte Reload
-	sbbl	32(%ebx), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	36(%ebx), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	40(%ebx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	sbbl	44(%ebx), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	sbbl	48(%ebx), %esi
-	movl	%esi, %ebx
-	sbbl	$0, %edi
-	andl	$1, %edi
-	jne	.LBB194_2
-# BB#1:
-	movl	%edx, 32(%esp)          # 4-byte Spill
-.LBB194_2:
-	movl	%edi, %edx
-	testb	%dl, %dl
-	movl	80(%esp), %edx          # 4-byte Reload
-	jne	.LBB194_4
-# BB#3:
-	movl	%eax, %edx
-.LBB194_4:
-	movl	1568(%esp), %eax
-	movl	%edx, (%eax)
-	movl	64(%esp), %esi          # 4-byte Reload
-	jne	.LBB194_6
-# BB#5:
-	movl	%ecx, %esi
-.LBB194_6:
-	movl	%esi, 4(%eax)
-	jne	.LBB194_8
-# BB#7:
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-.LBB194_8:
-	movl	76(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 8(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	jne	.LBB194_10
-# BB#9:
-	movl	4(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-.LBB194_10:
-	movl	84(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	jne	.LBB194_12
-# BB#11:
-	movl	8(%esp), %ebp           # 4-byte Reload
-.LBB194_12:
-	movl	%ebp, 16(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_14
-# BB#13:
-	movl	12(%esp), %ecx          # 4-byte Reload
-.LBB194_14:
-	movl	%ecx, 20(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_16
-# BB#15:
-	movl	16(%esp), %ecx          # 4-byte Reload
-.LBB194_16:
-	movl	%ecx, 24(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_18
-# BB#17:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB194_18:
-	movl	%ecx, 32(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_20
-# BB#19:
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB194_20:
-	movl	%ecx, 36(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_22
-# BB#21:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB194_22:
-	movl	%ecx, 40(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_24
-# BB#23:
-	movl	72(%esp), %ecx          # 4-byte Reload
-.LBB194_24:
-	movl	%ecx, 44(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_26
-# BB#25:
-	movl	%ebx, %ecx
-.LBB194_26:
-	movl	%ecx, 48(%eax)
-	addl	$1548, %esp             # imm = 0x60C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end194:
-	.size	mcl_fp_mont13Lbmi2, .Lfunc_end194-mcl_fp_mont13Lbmi2
-
-	.globl	mcl_fp_montNF13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF13Lbmi2,@function
-mcl_fp_montNF13Lbmi2:                   # @mcl_fp_montNF13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1548, %esp             # imm = 0x60C
-	calll	.L195$pb
-.L195$pb:
-	popl	%ebx
-.Ltmp36:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp36-.L195$pb), %ebx
-	movl	1580(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1488(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1488(%esp), %edi
-	movl	1492(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	1540(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	1536(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1532(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1528(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1524(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1520(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1516(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1512(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1508(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1504(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1500(%esp), %esi
-	movl	1496(%esp), %ebp
-	movl	%eax, (%esp)
-	leal	1432(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	1432(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1436(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	1440(%esp), %ebp
-	adcl	1444(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1448(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1452(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1456(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1460(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1464(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	1472(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1476(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1480(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	1484(%esp), %edi
-	movl	1576(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1376(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1428(%esp), %ecx
-	movl	80(%esp), %edx          # 4-byte Reload
-	addl	1376(%esp), %edx
-	adcl	1380(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1384(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1400(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1404(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1408(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1412(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1416(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1424(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1320(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	1320(%esp), %esi
-	adcl	1324(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1328(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	1340(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	1360(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1264(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1316(%esp), %eax
-	addl	1264(%esp), %ebp
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1268(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1272(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1276(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	1280(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	1284(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1288(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1296(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	1300(%esp), %edi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1304(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1308(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1312(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	1208(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1212(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1216(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	1228(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1232(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1240(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1244(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1248(%esp), %esi
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	1252(%esp), %edi
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1256(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1260(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1152(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1204(%esp), %eax
-	movl	64(%esp), %edx          # 4-byte Reload
-	addl	1152(%esp), %edx
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1156(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1160(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1164(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1168(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1172(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1176(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1180(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1184(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	1188(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	1192(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	adcl	1196(%esp), %ebp
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1200(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1096(%esp), %ecx
-	movl	1580(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv416x32
-	addl	1096(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	1116(%esp), %esi
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	1120(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1140(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	1148(%esp), %ebp
-	movl	1576(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1040(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1092(%esp), %eax
-	movl	40(%esp), %edx          # 4-byte Reload
-	addl	1040(%esp), %edx
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1044(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1048(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1052(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	1056(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	1060(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1064(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1068(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1072(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1076(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1080(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1084(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	1088(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	adcl	$0, %esi
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	984(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	984(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	996(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	1008(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1036(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	980(%esp), %eax
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	928(%esp), %ecx
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	932(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	936(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	940(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	944(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	948(%esp), %ebp
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	952(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	956(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	960(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	964(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	968(%esp), %esi
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	972(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	976(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	872(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	872(%esp), %edi
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	876(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	892(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	912(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	816(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	868(%esp), %edx
-	addl	816(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	832(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	836(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	860(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	760(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	780(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	784(%esp), %esi
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	788(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	804(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	756(%esp), %eax
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	704(%esp), %ecx
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	708(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	712(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	716(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	720(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	724(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	728(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	732(%esp), %esi
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	736(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	740(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	744(%esp), %ebp
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	748(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	752(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	648(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	648(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	676(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	688(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	696(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	592(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	644(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	592(%esp), %ecx
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	596(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	624(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	636(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	536(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	536(%esp), %edi
-	adcl	540(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	556(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	564(%esp), %esi
-	adcl	568(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	572(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	480(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	532(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	480(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	496(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	504(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	508(%esp), %edi
-	adcl	512(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	528(%esp), %ebp
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	424(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	adcl	452(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	460(%esp), %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	472(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	420(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	368(%esp), %ecx
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	372(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	392(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	400(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	404(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	312(%esp), %esi
-	adcl	316(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	320(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	332(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	348(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	256(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	308(%esp), %edx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	256(%esp), %ecx
-	adcl	260(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	272(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	288(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	200(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	200(%esp), %esi
-	adcl	204(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	212(%esp), %ebp
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	216(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	228(%esp), %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	196(%esp), %edx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	144(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	152(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	156(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	164(%esp), %ebp
-	adcl	168(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	88(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	88(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	92(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	96(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	100(%esp), %edi
-	movl	64(%esp), %ebx          # 4-byte Reload
-	adcl	104(%esp), %ebx
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	108(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	%ebp, %esi
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	112(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	120(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	124(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	128(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	132(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	136(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	140(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	1580(%esp), %eax
-	subl	(%eax), %edx
-	movl	%ecx, %ebp
-	sbbl	4(%eax), %ebp
-	movl	%edi, %ecx
-	sbbl	8(%eax), %ecx
-	sbbl	12(%eax), %ebx
-	sbbl	16(%eax), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	20(%eax), %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	sbbl	24(%eax), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	28(%eax), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	sbbl	32(%eax), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	36(%eax), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	sbbl	40(%eax), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	sbbl	44(%eax), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	sbbl	48(%eax), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	sarl	$31, %eax
-	testl	%eax, %eax
-	movl	84(%esp), %eax          # 4-byte Reload
-	js	.LBB195_2
-# BB#1:
-	movl	%edx, %eax
-.LBB195_2:
-	movl	1568(%esp), %edx
-	movl	%eax, (%edx)
-	movl	80(%esp), %esi          # 4-byte Reload
-	js	.LBB195_4
-# BB#3:
-	movl	%ebp, %esi
-.LBB195_4:
-	movl	%esi, 4(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB195_6
-# BB#5:
-	movl	%ecx, %edi
-.LBB195_6:
-	movl	%edi, 8(%edx)
-	js	.LBB195_8
-# BB#7:
-	movl	%ebx, %eax
-.LBB195_8:
-	movl	%eax, 12(%edx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB195_10
-# BB#9:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB195_10:
-	movl	%eax, 16(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB195_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB195_12:
-	movl	%eax, 20(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB195_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB195_14:
-	movl	%eax, 24(%edx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	js	.LBB195_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB195_16:
-	movl	%eax, 28(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB195_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB195_18:
-	movl	%eax, 32(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB195_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB195_20:
-	movl	%eax, 36(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB195_22
-# BB#21:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB195_22:
-	movl	%eax, 40(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB195_24
-# BB#23:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB195_24:
-	movl	%eax, 44(%edx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	js	.LBB195_26
-# BB#25:
-	movl	68(%esp), %eax          # 4-byte Reload
-.LBB195_26:
-	movl	%eax, 48(%edx)
-	addl	$1548, %esp             # imm = 0x60C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end195:
-	.size	mcl_fp_montNF13Lbmi2, .Lfunc_end195-mcl_fp_montNF13Lbmi2
-
-	.globl	mcl_fp_montRed13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed13Lbmi2,@function
-mcl_fp_montRed13Lbmi2:                  # @mcl_fp_montRed13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$892, %esp              # imm = 0x37C
-	calll	.L196$pb
-.L196$pb:
-	popl	%eax
-.Ltmp37:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp37-.L196$pb), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	920(%esp), %edx
-	movl	-4(%edx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	916(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	imull	%eax, %ebx
-	movl	100(%ecx), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	96(%ecx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%ecx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	88(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%ecx), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	80(%ecx), %esi
-	movl	%esi, 136(%esp)         # 4-byte Spill
-	movl	76(%ecx), %esi
-	movl	%esi, 144(%esp)         # 4-byte Spill
-	movl	72(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	68(%ecx), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	64(%ecx), %esi
-	movl	%esi, 148(%esp)         # 4-byte Spill
-	movl	60(%ecx), %esi
-	movl	%esi, 152(%esp)         # 4-byte Spill
-	movl	56(%ecx), %esi
-	movl	%esi, 140(%esp)         # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	44(%ecx), %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	movl	40(%ecx), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	36(%ecx), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	28(%ecx), %ebp
-	movl	24(%ecx), %edi
-	movl	20(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	16(%ecx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	12(%ecx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	8(%ecx), %esi
-	movl	(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	832(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	76(%esp), %eax          # 4-byte Reload
-	addl	832(%esp), %eax
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	836(%esp), %ecx
-	adcl	840(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	856(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	860(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	776(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	andl	$1, %esi
-	addl	776(%esp), %edi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	780(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, %edi
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	720(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	720(%esp), %esi
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	724(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 132(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	664(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	664(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	668(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	608(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	608(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	612(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	movl	144(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	552(%esp), %esi
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	556(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	496(%esp), %edi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %ebp         # 4-byte Reload
-	adcl	532(%esp), %ebp
-	movl	148(%esp), %edi         # 4-byte Reload
-	adcl	536(%esp), %edi
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 136(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	440(%esp), %esi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	472(%esp), %ebp
-	movl	%ebp, 152(%esp)         # 4-byte Spill
-	adcl	476(%esp), %edi
-	movl	%edi, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	384(%esp), %esi
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	388(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %ebp         # 4-byte Reload
-	adcl	404(%esp), %ebp
-	movl	140(%esp), %edi         # 4-byte Reload
-	adcl	408(%esp), %edi
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %esi         # 4-byte Reload
-	adcl	420(%esp), %esi
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	920(%esp), %eax
-	movl	%eax, %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	328(%esp), %eax
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	332(%esp), %ecx
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	344(%esp), %ebp
-	movl	%ebp, 156(%esp)         # 4-byte Spill
-	adcl	348(%esp), %edi
-	movl	%edi, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	360(%esp), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	movl	72(%esp), %esi          # 4-byte Reload
-	imull	%esi, %eax
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	272(%esp), %edi
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	120(%esp), %edi         # 4-byte Reload
-	adcl	280(%esp), %edi
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	284(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	288(%esp), %ecx
-	movl	%ecx, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	292(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	296(%esp), %ecx
-	movl	%ecx, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	300(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	304(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	308(%esp), %ecx
-	movl	%ecx, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	312(%esp), %ecx
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	316(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	320(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	324(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	imull	%esi, %eax
-	movl	%eax, (%esp)
-	leal	216(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	216(%esp), %ebp
-	movl	%edi, %ecx
-	adcl	220(%esp), %ecx
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %ebp         # 4-byte Reload
-	adcl	228(%esp), %ebp
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %edi         # 4-byte Reload
-	adcl	244(%esp), %edi
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	160(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	160(%esp), %esi
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	168(%esp), %ebp
-	movl	%ebp, 140(%esp)         # 4-byte Spill
-	movl	%ebp, %ebx
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	172(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %ebp         # 4-byte Reload
-	adcl	176(%esp), %ebp
-	movl	%ebp, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	%eax, %edx
-	movl	%edi, %eax
-	adcl	184(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	156(%esp), %edi         # 4-byte Reload
-	subl	12(%esp), %edi          # 4-byte Folded Reload
-	sbbl	4(%esp), %ebx           # 4-byte Folded Reload
-	sbbl	8(%esp), %ecx           # 4-byte Folded Reload
-	sbbl	16(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	132(%esp), %edx         # 4-byte Reload
-	sbbl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	144(%esp), %edx         # 4-byte Reload
-	sbbl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	136(%esp), %edx         # 4-byte Reload
-	sbbl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	116(%esp), %edx         # 4-byte Reload
-	sbbl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	112(%esp), %edx         # 4-byte Reload
-	sbbl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	sbbl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	sbbl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 120(%esp)         # 4-byte Spill
-	movl	%eax, %edx
-	movl	%esi, %eax
-	sbbl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 124(%esp)         # 4-byte Spill
-	sbbl	$0, %eax
-	andl	$1, %eax
-	jne	.LBB196_2
-# BB#1:
-	movl	%ebp, 148(%esp)         # 4-byte Spill
-.LBB196_2:
-	testb	%al, %al
-	movl	156(%esp), %ebp         # 4-byte Reload
-	jne	.LBB196_4
-# BB#3:
-	movl	%edi, %ebp
-.LBB196_4:
-	movl	912(%esp), %edi
-	movl	%ebp, (%edi)
-	movl	140(%esp), %ebp         # 4-byte Reload
-	jne	.LBB196_6
-# BB#5:
-	movl	%ebx, %ebp
-.LBB196_6:
-	movl	%ebp, 4(%edi)
-	movl	152(%esp), %ebx         # 4-byte Reload
-	jne	.LBB196_8
-# BB#7:
-	movl	%ecx, %ebx
-.LBB196_8:
-	movl	%ebx, 8(%edi)
-	movl	148(%esp), %esi         # 4-byte Reload
-	movl	%esi, 12(%edi)
-	movl	116(%esp), %ebx         # 4-byte Reload
-	movl	128(%esp), %esi         # 4-byte Reload
-	jne	.LBB196_10
-# BB#9:
-	movl	72(%esp), %esi          # 4-byte Reload
-.LBB196_10:
-	movl	%esi, 16(%edi)
-	movl	112(%esp), %esi         # 4-byte Reload
-	movl	132(%esp), %edx         # 4-byte Reload
-	jne	.LBB196_12
-# BB#11:
-	movl	76(%esp), %edx          # 4-byte Reload
-.LBB196_12:
-	movl	%edx, 20(%edi)
-	movl	96(%esp), %edx          # 4-byte Reload
-	movl	144(%esp), %ecx         # 4-byte Reload
-	jne	.LBB196_14
-# BB#13:
-	movl	80(%esp), %ecx          # 4-byte Reload
-.LBB196_14:
-	movl	%ecx, 24(%edi)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	movl	136(%esp), %eax         # 4-byte Reload
-	jne	.LBB196_16
-# BB#15:
-	movl	84(%esp), %eax          # 4-byte Reload
-.LBB196_16:
-	movl	%eax, 28(%edi)
-	movl	92(%esp), %eax          # 4-byte Reload
-	jne	.LBB196_18
-# BB#17:
-	movl	88(%esp), %ebx          # 4-byte Reload
-.LBB196_18:
-	movl	%ebx, 32(%edi)
-	jne	.LBB196_20
-# BB#19:
-	movl	104(%esp), %esi         # 4-byte Reload
-.LBB196_20:
-	movl	%esi, 36(%edi)
-	jne	.LBB196_22
-# BB#21:
-	movl	108(%esp), %edx         # 4-byte Reload
-.LBB196_22:
-	movl	%edx, 40(%edi)
-	jne	.LBB196_24
-# BB#23:
-	movl	120(%esp), %ecx         # 4-byte Reload
-.LBB196_24:
-	movl	%ecx, 44(%edi)
-	jne	.LBB196_26
-# BB#25:
-	movl	124(%esp), %eax         # 4-byte Reload
-.LBB196_26:
-	movl	%eax, 48(%edi)
-	addl	$892, %esp              # imm = 0x37C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end196:
-	.size	mcl_fp_montRed13Lbmi2, .Lfunc_end196-mcl_fp_montRed13Lbmi2
-
-	.globl	mcl_fp_addPre13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre13Lbmi2,@function
-mcl_fp_addPre13Lbmi2:                   # @mcl_fp_addPre13Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ebx)
-	movl	32(%ecx), %esi
-	adcl	%edi, %esi
-	movl	36(%eax), %edi
-	movl	%edx, 28(%ebx)
-	movl	36(%ecx), %edx
-	adcl	%edi, %edx
-	movl	40(%eax), %edi
-	movl	%esi, 32(%ebx)
-	movl	40(%ecx), %esi
-	adcl	%edi, %esi
-	movl	44(%eax), %edi
-	movl	%edx, 36(%ebx)
-	movl	44(%ecx), %edx
-	adcl	%edi, %edx
-	movl	%esi, 40(%ebx)
-	movl	%edx, 44(%ebx)
-	movl	48(%eax), %eax
-	movl	48(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 48(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end197:
-	.size	mcl_fp_addPre13Lbmi2, .Lfunc_end197-mcl_fp_addPre13Lbmi2
-
-	.globl	mcl_fp_subPre13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre13Lbmi2,@function
-mcl_fp_subPre13Lbmi2:                   # @mcl_fp_subPre13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ebp)
-	movl	32(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%ebp)
-	movl	36(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	40(%edx), %ebx
-	movl	%edi, 32(%ebp)
-	movl	40(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	44(%edx), %ebx
-	movl	%esi, 36(%ebp)
-	movl	44(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	%edi, 40(%ebp)
-	movl	%esi, 44(%ebp)
-	movl	48(%edx), %edx
-	movl	48(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 48(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end198:
-	.size	mcl_fp_subPre13Lbmi2, .Lfunc_end198-mcl_fp_subPre13Lbmi2
-
-	.globl	mcl_fp_shr1_13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_13Lbmi2,@function
-mcl_fp_shr1_13Lbmi2:                    # @mcl_fp_shr1_13Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 40(%ecx)
-	movl	48(%eax), %eax
-	shrdl	$1, %eax, %esi
-	movl	%esi, 44(%ecx)
-	shrl	%eax
-	movl	%eax, 48(%ecx)
-	popl	%esi
-	retl
-.Lfunc_end199:
-	.size	mcl_fp_shr1_13Lbmi2, .Lfunc_end199-mcl_fp_shr1_13Lbmi2
-
-	.globl	mcl_fp_add13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add13Lbmi2,@function
-mcl_fp_add13Lbmi2:                      # @mcl_fp_add13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$40, %esp
-	movl	68(%esp), %ebp
-	movl	(%ebp), %ecx
-	movl	4(%ebp), %eax
-	movl	64(%esp), %ebx
-	addl	(%ebx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	4(%ebx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	8(%ebp), %eax
-	adcl	8(%ebx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	12(%ebx), %ecx
-	movl	16(%ebx), %eax
-	adcl	12(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	16(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	20(%ebx), %eax
-	adcl	20(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	24(%ebx), %eax
-	adcl	24(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	28(%ebx), %eax
-	adcl	28(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	32(%ebx), %eax
-	adcl	32(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	36(%ebx), %ecx
-	adcl	36(%ebp), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	40(%ebx), %edi
-	adcl	40(%ebp), %edi
-	movl	44(%ebx), %edx
-	adcl	44(%ebp), %edx
-	movl	48(%ebx), %esi
-	adcl	48(%ebp), %esi
-	movl	60(%esp), %ebp
-	movl	4(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, (%ebp)
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, 4(%ebp)
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 8(%ebp)
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%ebp)
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 16(%ebp)
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 20(%ebp)
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 24(%ebp)
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 28(%ebp)
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 32(%ebp)
-	movl	%ecx, 36(%ebp)
-	movl	%edi, 40(%ebp)
-	movl	%edx, 44(%ebp)
-	movl	%esi, 48(%ebp)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	72(%esp), %ecx
-	subl	(%ecx), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	36(%esp), %ebx          # 4-byte Reload
-	sbbl	4(%ecx), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebx          # 4-byte Reload
-	sbbl	8(%ecx), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebx          # 4-byte Reload
-	sbbl	12(%ecx), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %ebx          # 4-byte Reload
-	sbbl	16(%ecx), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%ecx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %ebx          # 4-byte Reload
-	sbbl	24(%ecx), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%ecx), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %ebx           # 4-byte Reload
-	sbbl	32(%ecx), %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	(%esp), %ebx            # 4-byte Reload
-	sbbl	36(%ecx), %ebx
-	sbbl	40(%ecx), %edi
-	sbbl	44(%ecx), %edx
-	sbbl	48(%ecx), %esi
-	sbbl	$0, %eax
-	testb	$1, %al
-	jne	.LBB200_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, (%ebp)
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, 4(%ebp)
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 8(%ebp)
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%ebp)
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 16(%ebp)
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 20(%ebp)
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 24(%ebp)
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 28(%ebp)
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 32(%ebp)
-	movl	%ebx, 36(%ebp)
-	movl	%edi, 40(%ebp)
-	movl	%edx, 44(%ebp)
-	movl	%esi, 48(%ebp)
-.LBB200_2:                              # %carry
-	addl	$40, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end200:
-	.size	mcl_fp_add13Lbmi2, .Lfunc_end200-mcl_fp_add13Lbmi2
-
-	.globl	mcl_fp_addNF13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF13Lbmi2,@function
-mcl_fp_addNF13Lbmi2:                    # @mcl_fp_addNF13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$100, %esp
-	movl	128(%esp), %esi
-	movl	(%esi), %ecx
-	movl	4(%esi), %eax
-	movl	124(%esp), %edx
-	addl	(%edx), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	4(%edx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	48(%esi), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	44(%esi), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	40(%esi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	36(%esi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	32(%esi), %ebp
-	movl	28(%esi), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	movl	20(%esi), %ebx
-	movl	16(%esi), %edi
-	movl	12(%esi), %ecx
-	movl	8(%esi), %esi
-	adcl	8(%edx), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	12(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	16(%edx), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	adcl	20(%edx), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	adcl	24(%edx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	28(%edx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	32(%edx), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	36(%edx), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	40(%edx), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%edx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	48(%edx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	132(%esp), %edx
-	movl	64(%esp), %eax          # 4-byte Reload
-	subl	(%edx), %eax
-	movl	68(%esp), %ebp          # 4-byte Reload
-	sbbl	4(%edx), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	sbbl	8(%edx), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	sbbl	12(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	16(%edx), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	sbbl	20(%edx), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%edx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	movl	%esi, %ecx
-	movl	%esi, %ebp
-	sbbl	36(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	movl	%esi, %ecx
-	movl	%esi, %edi
-	sbbl	40(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	48(%edx), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	sarl	$31, %ebx
-	testl	%ebx, %ebx
-	movl	64(%esp), %edx          # 4-byte Reload
-	js	.LBB201_2
-# BB#1:
-	movl	%eax, %edx
-.LBB201_2:
-	movl	120(%esp), %esi
-	movl	%edx, (%esi)
-	movl	68(%esp), %edx          # 4-byte Reload
-	js	.LBB201_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-.LBB201_4:
-	movl	%edx, 4(%esi)
-	movl	%edi, %edx
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB201_6
-# BB#5:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB201_6:
-	movl	%eax, 8(%esi)
-	movl	%ebp, %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB201_8
-# BB#7:
-	movl	8(%esp), %ebx           # 4-byte Reload
-.LBB201_8:
-	movl	%ebx, 12(%esi)
-	movl	96(%esp), %ebp          # 4-byte Reload
-	movl	56(%esp), %ecx          # 4-byte Reload
-	js	.LBB201_10
-# BB#9:
-	movl	12(%esp), %ecx          # 4-byte Reload
-.LBB201_10:
-	movl	%ecx, 16(%esi)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	js	.LBB201_12
-# BB#11:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB201_12:
-	movl	%eax, 20(%esi)
-	movl	72(%esp), %ebx          # 4-byte Reload
-	js	.LBB201_14
-# BB#13:
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-.LBB201_14:
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	%eax, 24(%esi)
-	js	.LBB201_16
-# BB#15:
-	movl	24(%esp), %ebp          # 4-byte Reload
-.LBB201_16:
-	movl	%ebp, 28(%esi)
-	js	.LBB201_18
-# BB#17:
-	movl	28(%esp), %ebx          # 4-byte Reload
-.LBB201_18:
-	movl	%ebx, 32(%esi)
-	js	.LBB201_20
-# BB#19:
-	movl	32(%esp), %edi          # 4-byte Reload
-.LBB201_20:
-	movl	%edi, 36(%esi)
-	js	.LBB201_22
-# BB#21:
-	movl	36(%esp), %edx          # 4-byte Reload
-.LBB201_22:
-	movl	%edx, 40(%esi)
-	js	.LBB201_24
-# BB#23:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB201_24:
-	movl	%ecx, 44(%esi)
-	movl	88(%esp), %eax          # 4-byte Reload
-	js	.LBB201_26
-# BB#25:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB201_26:
-	movl	%eax, 48(%esi)
-	addl	$100, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end201:
-	.size	mcl_fp_addNF13Lbmi2, .Lfunc_end201-mcl_fp_addNF13Lbmi2
-
-	.globl	mcl_fp_sub13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub13Lbmi2,@function
-mcl_fp_sub13Lbmi2:                      # @mcl_fp_sub13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$44, %esp
-	movl	68(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	72(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	32(%esi), %edx
-	sbbl	32(%edi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	36(%esi), %ecx
-	sbbl	36(%edi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	40(%esi), %eax
-	sbbl	40(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	44(%esi), %ebp
-	sbbl	44(%edi), %ebp
-	movl	48(%esi), %esi
-	sbbl	48(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	64(%esp), %ebx
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%ebx)
-	movl	%edx, 32(%ebx)
-	movl	%ecx, 36(%ebx)
-	movl	%eax, 40(%ebx)
-	movl	%ebp, 44(%ebx)
-	movl	%esi, 48(%ebx)
-	je	.LBB202_2
-# BB#1:                                 # %carry
-	movl	%esi, %edi
-	movl	76(%esp), %esi
-	movl	12(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esi), %ecx
-	movl	12(%esi), %eax
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	40(%esi), %ecx
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	%ecx, 40(%ebx)
-	movl	44(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 44(%ebx)
-	movl	48(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 48(%ebx)
-.LBB202_2:                              # %nocarry
-	addl	$44, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end202:
-	.size	mcl_fp_sub13Lbmi2, .Lfunc_end202-mcl_fp_sub13Lbmi2
-
-	.globl	mcl_fp_subNF13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF13Lbmi2,@function
-mcl_fp_subNF13Lbmi2:                    # @mcl_fp_subNF13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$80, %esp
-	movl	104(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %eax
-	movl	108(%esp), %edi
-	subl	(%edi), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	32(%ecx), %ebp
-	movl	28(%ecx), %ebx
-	movl	24(%ecx), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	20(%ecx), %esi
-	movl	16(%ecx), %edx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%edi), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	sbbl	28(%edi), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	sbbl	32(%edi), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	sbbl	48(%edi), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	sarl	$31, %esi
-	movl	%esi, %ecx
-	shldl	$1, %edx, %ecx
-	movl	112(%esp), %edi
-	movl	4(%edi), %eax
-	andl	%ecx, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	andl	(%edi), %ecx
-	movl	48(%edi), %eax
-	andl	%esi, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	44(%edi), %eax
-	andl	%esi, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	40(%edi), %eax
-	andl	%esi, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	36(%edi), %eax
-	andl	%esi, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	32(%edi), %eax
-	andl	%esi, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	28(%edi), %eax
-	andl	%esi, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	24(%edi), %ebp
-	andl	%esi, %ebp
-	movl	20(%edi), %ebx
-	andl	%esi, %ebx
-	movl	16(%edi), %edx
-	andl	%esi, %edx
-	rorxl	$31, %esi, %eax
-	andl	12(%edi), %esi
-	andl	8(%edi), %eax
-	addl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	100(%esp), %edi
-	movl	%ecx, (%edi)
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%edi)
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, 8(%edi)
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 12(%edi)
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edx, 16(%edi)
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebx, 20(%edi)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 24(%edi)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%edi)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%edi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 36(%edi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 40(%edi)
-	movl	%eax, 44(%edi)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%edi)
-	addl	$80, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end203:
-	.size	mcl_fp_subNF13Lbmi2, .Lfunc_end203-mcl_fp_subNF13Lbmi2
-
-	.globl	mcl_fpDbl_add13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add13Lbmi2,@function
-mcl_fpDbl_add13Lbmi2:                   # @mcl_fpDbl_add13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$96, %esp
-	movl	124(%esp), %ecx
-	movl	120(%esp), %esi
-	movl	12(%esi), %edi
-	movl	16(%esi), %edx
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%esi), %ebp
-	movl	116(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%esi), %ebp
-	adcl	8(%esi), %ebx
-	adcl	12(%ecx), %edi
-	adcl	16(%ecx), %edx
-	movl	%ebp, 4(%eax)
-	movl	60(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%edi, 12(%eax)
-	movl	20(%esi), %edi
-	adcl	%ebx, %edi
-	movl	24(%ecx), %ebx
-	movl	%edx, 16(%eax)
-	movl	24(%esi), %edx
-	adcl	%ebx, %edx
-	movl	28(%ecx), %ebx
-	movl	%edi, 20(%eax)
-	movl	28(%esi), %edi
-	adcl	%ebx, %edi
-	movl	32(%ecx), %ebx
-	movl	%edx, 24(%eax)
-	movl	32(%esi), %edx
-	adcl	%ebx, %edx
-	movl	36(%ecx), %ebx
-	movl	%edi, 28(%eax)
-	movl	36(%esi), %edi
-	adcl	%ebx, %edi
-	movl	40(%ecx), %ebx
-	movl	%edx, 32(%eax)
-	movl	40(%esi), %edx
-	adcl	%ebx, %edx
-	movl	44(%ecx), %ebx
-	movl	%edi, 36(%eax)
-	movl	44(%esi), %edi
-	adcl	%ebx, %edi
-	movl	48(%ecx), %ebx
-	movl	%edx, 40(%eax)
-	movl	48(%esi), %edx
-	adcl	%ebx, %edx
-	movl	52(%ecx), %ebx
-	movl	%edi, 44(%eax)
-	movl	52(%esi), %edi
-	adcl	%ebx, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	56(%ecx), %edi
-	movl	%edx, 48(%eax)
-	movl	56(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	64(%ecx), %edx
-	movl	64(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	68(%ecx), %edx
-	movl	68(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	72(%ecx), %edx
-	movl	72(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%ecx), %edx
-	movl	76(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%ecx), %edx
-	movl	80(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%ecx), %edx
-	movl	84(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%ecx), %edx
-	movl	88(%esi), %edi
-	adcl	%edx, %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	92(%ecx), %edx
-	movl	92(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	96(%ecx), %edx
-	movl	96(%esi), %ebx
-	adcl	%edx, %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	100(%ecx), %ecx
-	movl	100(%esi), %esi
-	adcl	%ecx, %esi
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	128(%esp), %ebp
-	movl	76(%esp), %ecx          # 4-byte Reload
-	subl	(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	4(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	8(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	sbbl	20(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebp), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	sbbl	36(%ebp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	sbbl	40(%ebp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	%esi, %ebx
-	sbbl	44(%ebp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ebx, %ecx
-	sbbl	48(%ebp), %ecx
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB204_2
-# BB#1:
-	movl	%ecx, %ebx
-.LBB204_2:
-	testb	%dl, %dl
-	movl	76(%esp), %ecx          # 4-byte Reload
-	movl	72(%esp), %edx          # 4-byte Reload
-	movl	68(%esp), %esi          # 4-byte Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	movl	60(%esp), %ebp          # 4-byte Reload
-	jne	.LBB204_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB204_4:
-	movl	116(%esp), %eax
-	movl	%ecx, 52(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	movl	88(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 64(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 68(%eax)
-	movl	%ebp, 72(%eax)
-	movl	%edi, 76(%eax)
-	movl	%esi, 80(%eax)
-	movl	%edx, 84(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	52(%esp), %edx          # 4-byte Reload
-	movl	48(%esp), %esi          # 4-byte Reload
-	jne	.LBB204_6
-# BB#5:
-	movl	36(%esp), %esi          # 4-byte Reload
-.LBB204_6:
-	movl	%esi, 88(%eax)
-	jne	.LBB204_8
-# BB#7:
-	movl	40(%esp), %edx          # 4-byte Reload
-.LBB204_8:
-	movl	%edx, 92(%eax)
-	jne	.LBB204_10
-# BB#9:
-	movl	44(%esp), %ecx          # 4-byte Reload
-.LBB204_10:
-	movl	%ecx, 96(%eax)
-	movl	%ebx, 100(%eax)
-	addl	$96, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end204:
-	.size	mcl_fpDbl_add13Lbmi2, .Lfunc_end204-mcl_fpDbl_add13Lbmi2
-
-	.globl	mcl_fpDbl_sub13Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub13Lbmi2,@function
-mcl_fpDbl_sub13Lbmi2:                   # @mcl_fpDbl_sub13Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$84, %esp
-	movl	108(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %edx
-	movl	112(%esp), %ebx
-	subl	(%ebx), %eax
-	sbbl	4(%ebx), %edx
-	movl	8(%edi), %esi
-	sbbl	8(%ebx), %esi
-	movl	104(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%edi), %eax
-	sbbl	12(%ebx), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%edi), %edx
-	sbbl	16(%ebx), %edx
-	movl	%esi, 8(%ecx)
-	movl	20(%ebx), %esi
-	movl	%eax, 12(%ecx)
-	movl	20(%edi), %eax
-	sbbl	%esi, %eax
-	movl	24(%ebx), %esi
-	movl	%edx, 16(%ecx)
-	movl	24(%edi), %edx
-	sbbl	%esi, %edx
-	movl	28(%ebx), %esi
-	movl	%eax, 20(%ecx)
-	movl	28(%edi), %eax
-	sbbl	%esi, %eax
-	movl	32(%ebx), %esi
-	movl	%edx, 24(%ecx)
-	movl	32(%edi), %edx
-	sbbl	%esi, %edx
-	movl	36(%ebx), %esi
-	movl	%eax, 28(%ecx)
-	movl	36(%edi), %eax
-	sbbl	%esi, %eax
-	movl	40(%ebx), %esi
-	movl	%edx, 32(%ecx)
-	movl	40(%edi), %edx
-	sbbl	%esi, %edx
-	movl	44(%ebx), %esi
-	movl	%eax, 36(%ecx)
-	movl	44(%edi), %eax
-	sbbl	%esi, %eax
-	movl	48(%ebx), %esi
-	movl	%edx, 40(%ecx)
-	movl	48(%edi), %edx
-	sbbl	%esi, %edx
-	movl	52(%ebx), %esi
-	movl	%eax, 44(%ecx)
-	movl	52(%edi), %eax
-	sbbl	%esi, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	56(%ebx), %eax
-	movl	%edx, 48(%ecx)
-	movl	56(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	60(%ebx), %eax
-	movl	60(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	64(%ebx), %eax
-	movl	64(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	68(%ebx), %eax
-	movl	68(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	72(%ebx), %eax
-	movl	72(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	76(%ebx), %eax
-	movl	76(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	80(%ebx), %eax
-	movl	80(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	84(%ebx), %eax
-	movl	84(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	88(%ebx), %eax
-	movl	88(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	92(%ebx), %eax
-	movl	92(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	96(%ebx), %eax
-	movl	96(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	100(%ebx), %eax
-	movl	100(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	116(%esp), %edi
-	jne	.LBB205_1
-# BB#2:
-	movl	$0, 44(%esp)            # 4-byte Folded Spill
-	jmp	.LBB205_3
-.LBB205_1:
-	movl	48(%edi), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-.LBB205_3:
-	testb	%al, %al
-	jne	.LBB205_4
-# BB#5:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	movl	$0, %ebx
-	jmp	.LBB205_6
-.LBB205_4:
-	movl	(%edi), %ebx
-	movl	4(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB205_6:
-	jne	.LBB205_7
-# BB#8:
-	movl	$0, 24(%esp)            # 4-byte Folded Spill
-	jmp	.LBB205_9
-.LBB205_7:
-	movl	44(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-.LBB205_9:
-	jne	.LBB205_10
-# BB#11:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB205_12
-.LBB205_10:
-	movl	40(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB205_12:
-	jne	.LBB205_13
-# BB#14:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB205_15
-.LBB205_13:
-	movl	36(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB205_15:
-	jne	.LBB205_16
-# BB#17:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB205_18
-.LBB205_16:
-	movl	32(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB205_18:
-	jne	.LBB205_19
-# BB#20:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB205_21
-.LBB205_19:
-	movl	28(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB205_21:
-	jne	.LBB205_22
-# BB#23:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB205_24
-.LBB205_22:
-	movl	24(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB205_24:
-	jne	.LBB205_25
-# BB#26:
-	movl	$0, %eax
-	jmp	.LBB205_27
-.LBB205_25:
-	movl	20(%edi), %eax
-.LBB205_27:
-	jne	.LBB205_28
-# BB#29:
-	movl	$0, %edx
-	jmp	.LBB205_30
-.LBB205_28:
-	movl	16(%edi), %edx
-.LBB205_30:
-	jne	.LBB205_31
-# BB#32:
-	movl	$0, %esi
-	jmp	.LBB205_33
-.LBB205_31:
-	movl	12(%edi), %esi
-.LBB205_33:
-	jne	.LBB205_34
-# BB#35:
-	xorl	%edi, %edi
-	jmp	.LBB205_36
-.LBB205_34:
-	movl	8(%edi), %edi
-.LBB205_36:
-	addl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	16(%esp), %ebp          # 4-byte Reload
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebx, 52(%ecx)
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebp, 56(%ecx)
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 60(%ecx)
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 64(%ecx)
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 68(%ecx)
-	movl	(%esp), %edx            # 4-byte Reload
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 72(%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 76(%ecx)
-	movl	8(%esp), %edx           # 4-byte Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 80(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 84(%ecx)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 88(%ecx)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 92(%ecx)
-	movl	%eax, 96(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%ecx)
-	addl	$84, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end205:
-	.size	mcl_fpDbl_sub13Lbmi2, .Lfunc_end205-mcl_fpDbl_sub13Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv448x32,@function
-.LmulPv448x32:                          # @mulPv448x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$48, %esp
-	movl	%edx, %eax
-	movl	68(%esp), %ebx
-	movl	%ebx, %edx
-	mulxl	4(%eax), %edi, %esi
-	movl	%ebx, %edx
-	mulxl	(%eax), %ebp, %edx
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	addl	%edi, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	8(%eax), %edx, %edi
-	adcl	%esi, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	12(%eax), %edx, %esi
-	adcl	%edi, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	16(%eax), %edx, %edi
-	adcl	%esi, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	20(%eax), %edx, %esi
-	adcl	%edi, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	24(%eax), %edx, %edi
-	adcl	%esi, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	28(%eax), %edx, %esi
-	adcl	%edi, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	32(%eax), %edx, %edi
-	adcl	%esi, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	36(%eax), %edx, %esi
-	adcl	%edi, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %edx
-	mulxl	40(%eax), %edi, %ebp
-	adcl	%esi, %edi
-	movl	%ebx, %edx
-	mulxl	44(%eax), %esi, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	%ebp, %esi
-	movl	%ebx, %edx
-	mulxl	48(%eax), %edx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	44(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ecx)
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ecx)
-	movl	36(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%ecx)
-	movl	32(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%ecx)
-	movl	28(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%ecx)
-	movl	24(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%ecx)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%ecx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%ecx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 32(%ecx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 36(%ecx)
-	movl	%edi, 40(%ecx)
-	movl	%esi, 44(%ecx)
-	movl	%edx, 48(%ecx)
-	movl	%ebx, %edx
-	mulxl	52(%eax), %eax, %edx
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 52(%ecx)
-	adcl	$0, %edx
-	movl	%edx, 56(%ecx)
-	movl	%ecx, %eax
-	addl	$48, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end206:
-	.size	.LmulPv448x32, .Lfunc_end206-.LmulPv448x32
-
-	.globl	mcl_fp_mulUnitPre14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre14Lbmi2,@function
-mcl_fp_mulUnitPre14Lbmi2:               # @mcl_fp_mulUnitPre14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$108, %esp
-	calll	.L207$pb
-.L207$pb:
-	popl	%ebx
-.Ltmp38:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp38-.L207$pb), %ebx
-	movl	136(%esp), %eax
-	movl	%eax, (%esp)
-	leal	48(%esp), %ecx
-	movl	132(%esp), %edx
-	calll	.LmulPv448x32
-	movl	104(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp
-	movl	64(%esp), %ebx
-	movl	60(%esp), %edi
-	movl	56(%esp), %esi
-	movl	48(%esp), %edx
-	movl	52(%esp), %ecx
-	movl	128(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	addl	$108, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end207:
-	.size	mcl_fp_mulUnitPre14Lbmi2, .Lfunc_end207-mcl_fp_mulUnitPre14Lbmi2
-
-	.globl	mcl_fpDbl_mulPre14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre14Lbmi2,@function
-mcl_fpDbl_mulPre14Lbmi2:                # @mcl_fpDbl_mulPre14Lbmi2
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$268, %esp              # imm = 0x10C
-	calll	.L208$pb
-.L208$pb:
-	popl	%ebx
-.Ltmp39:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp39-.L208$pb), %ebx
-	movl	%ebx, -192(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	%esi, 8(%esp)
-	movl	12(%ebp), %edi
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre7Lbmi2@PLT
-	leal	28(%esi), %eax
-	movl	%eax, 8(%esp)
-	leal	28(%edi), %eax
-	movl	%eax, 4(%esp)
-	movl	8(%ebp), %eax
-	leal	56(%eax), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre7Lbmi2@PLT
-	movl	44(%edi), %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	40(%edi), %eax
-	movl	36(%edi), %edx
-	movl	(%edi), %edi
-	movl	12(%ebp), %ecx
-	movl	4(%ecx), %ecx
-	movl	12(%ebp), %ebx
-	addl	28(%ebx), %edi
-	movl	%edi, -180(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %edi
-	adcl	32(%edi), %ecx
-	movl	%ecx, -200(%ebp)        # 4-byte Spill
-	adcl	8(%edi), %edx
-	movl	%edx, -212(%ebp)        # 4-byte Spill
-	adcl	12(%edi), %eax
-	movl	%eax, -196(%ebp)        # 4-byte Spill
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	16(%edi), %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	%eax, %ebx
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -128(%ebp)        # 4-byte Spill
-	movl	(%esi), %eax
-	addl	28(%esi), %eax
-	movl	%eax, -216(%ebp)        # 4-byte Spill
-	movl	4(%esi), %eax
-	adcl	32(%esi), %eax
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	movl	36(%esi), %eax
-	adcl	8(%esi), %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	movl	40(%esi), %eax
-	adcl	12(%esi), %eax
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	movl	44(%esi), %eax
-	adcl	16(%esi), %eax
-	movl	%eax, -176(%ebp)        # 4-byte Spill
-	movl	48(%esi), %ecx
-	adcl	20(%esi), %ecx
-	movl	52(%esi), %eax
-	adcl	24(%esi), %eax
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %esi
-	popl	%eax
-	movl	%esi, -220(%ebp)        # 4-byte Spill
-	movl	%ebx, %esi
-	movl	%edx, -184(%ebp)        # 4-byte Spill
-	movl	-180(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -188(%ebp)        # 4-byte Spill
-	jb	.LBB208_2
-# BB#1:
-	xorl	%esi, %esi
-	movl	$0, -184(%ebp)          # 4-byte Folded Spill
-	movl	$0, -188(%ebp)          # 4-byte Folded Spill
-.LBB208_2:
-	movl	%esi, -204(%ebp)        # 4-byte Spill
-	movl	52(%edi), %esi
-	movl	48(%edi), %ebx
-	movl	-128(%ebp), %edx        # 4-byte Reload
-	pushl	%eax
-	movl	%edx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	20(%edi), %ebx
-	movl	%ebx, -160(%ebp)        # 4-byte Spill
-	adcl	24(%edi), %esi
-	movl	%esi, -208(%ebp)        # 4-byte Spill
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	movl	%ecx, -152(%ebp)        # 4-byte Spill
-	movl	-176(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -128(%ebp)        # 4-byte Spill
-	movl	-172(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -132(%ebp)        # 4-byte Spill
-	movl	-168(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -136(%ebp)        # 4-byte Spill
-	movl	-164(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -140(%ebp)        # 4-byte Spill
-	movl	-216(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -144(%ebp)        # 4-byte Spill
-	jb	.LBB208_4
-# BB#3:
-	movl	$0, -148(%ebp)          # 4-byte Folded Spill
-	movl	$0, -152(%ebp)          # 4-byte Folded Spill
-	movl	$0, -128(%ebp)          # 4-byte Folded Spill
-	movl	$0, -132(%ebp)          # 4-byte Folded Spill
-	movl	$0, -136(%ebp)          # 4-byte Folded Spill
-	movl	$0, -140(%ebp)          # 4-byte Folded Spill
-	movl	$0, -144(%ebp)          # 4-byte Folded Spill
-.LBB208_4:
-	movl	-180(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -96(%ebp)
-	movl	-200(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -92(%ebp)
-	movl	-212(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -88(%ebp)
-	movl	-196(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -84(%ebp)
-	movl	-156(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -80(%ebp)
-	movl	%ebx, -124(%ebp)
-	movl	-164(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -120(%ebp)
-	movl	-168(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -116(%ebp)
-	movl	-172(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -112(%ebp)
-	movl	-176(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -108(%ebp)
-	movl	%ecx, -104(%ebp)
-	movl	%edi, %ebx
-	movl	%esi, %edi
-	movl	%eax, -100(%ebp)
-	sbbl	%edx, %edx
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -76(%ebp)
-	movl	-208(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -72(%ebp)
-	movl	-220(%ebp), %ecx        # 4-byte Reload
-	pushl	%eax
-	movl	%ecx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB208_6
-# BB#5:
-	movl	$0, %esi
-	movl	$0, %eax
-	movl	$0, %ebx
-	movl	$0, %edi
-.LBB208_6:
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	sbbl	%eax, %eax
-	leal	-124(%ebp), %ecx
-	movl	%ecx, 8(%esp)
-	leal	-96(%ebp), %ecx
-	movl	%ecx, 4(%esp)
-	leal	-68(%ebp), %ecx
-	movl	%ecx, (%esp)
-	andl	%eax, %edx
-	movl	-188(%ebp), %eax        # 4-byte Reload
-	addl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	adcl	%edi, -140(%ebp)        # 4-byte Folded Spill
-	movl	-184(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -136(%ebp)        # 4-byte Folded Spill
-	adcl	%ebx, -132(%ebp)        # 4-byte Folded Spill
-	movl	-204(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -128(%ebp)        # 4-byte Folded Spill
-	movl	-152(%ebp), %edi        # 4-byte Reload
-	adcl	-160(%ebp), %edi        # 4-byte Folded Reload
-	adcl	%esi, -148(%ebp)        # 4-byte Folded Spill
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	andl	$1, %edx
-	movl	%edx, -156(%ebp)        # 4-byte Spill
-	movl	-192(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre7Lbmi2@PLT
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	addl	-40(%ebp), %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	movl	-140(%ebp), %eax        # 4-byte Reload
-	adcl	-36(%ebp), %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-	movl	-136(%ebp), %eax        # 4-byte Reload
-	adcl	-32(%ebp), %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	movl	-132(%ebp), %eax        # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -128(%ebp)        # 4-byte Spill
-	adcl	-20(%ebp), %edi
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	adcl	%esi, -156(%ebp)        # 4-byte Folded Spill
-	movl	-68(%ebp), %eax
-	movl	8(%ebp), %esi
-	subl	(%esi), %eax
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	movl	-64(%ebp), %ecx
-	sbbl	4(%esi), %ecx
-	movl	-60(%ebp), %eax
-	sbbl	8(%esi), %eax
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	movl	-56(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	-52(%ebp), %ebx
-	sbbl	16(%esi), %ebx
-	movl	-48(%ebp), %eax
-	sbbl	20(%esi), %eax
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	movl	-44(%ebp), %eax
-	sbbl	24(%esi), %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	movl	28(%esi), %eax
-	movl	%eax, -176(%ebp)        # 4-byte Spill
-	sbbl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	movl	32(%esi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	sbbl	%eax, -140(%ebp)        # 4-byte Folded Spill
-	movl	36(%esi), %eax
-	movl	%eax, -184(%ebp)        # 4-byte Spill
-	sbbl	%eax, -136(%ebp)        # 4-byte Folded Spill
-	movl	40(%esi), %eax
-	movl	%eax, -188(%ebp)        # 4-byte Spill
-	sbbl	%eax, -132(%ebp)        # 4-byte Folded Spill
-	movl	44(%esi), %eax
-	movl	%eax, -192(%ebp)        # 4-byte Spill
-	sbbl	%eax, -128(%ebp)        # 4-byte Folded Spill
-	movl	48(%esi), %eax
-	movl	%eax, -196(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	%edi, -152(%ebp)        # 4-byte Spill
-	movl	52(%esi), %eax
-	movl	%eax, -200(%ebp)        # 4-byte Spill
-	movl	-148(%ebp), %edi        # 4-byte Reload
-	sbbl	%eax, %edi
-	sbbl	$0, -156(%ebp)          # 4-byte Folded Spill
-	movl	56(%esi), %eax
-	movl	%eax, -228(%ebp)        # 4-byte Spill
-	subl	%eax, -172(%ebp)        # 4-byte Folded Spill
-	movl	60(%esi), %eax
-	movl	%eax, -232(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ecx
-	movl	64(%esi), %eax
-	movl	%eax, -236(%ebp)        # 4-byte Spill
-	sbbl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	movl	68(%esi), %eax
-	movl	%eax, -240(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edx
-	movl	72(%esi), %eax
-	movl	%eax, -244(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ebx
-	movl	76(%esi), %eax
-	movl	%eax, -248(%ebp)        # 4-byte Spill
-	sbbl	%eax, -164(%ebp)        # 4-byte Folded Spill
-	movl	80(%esi), %eax
-	movl	%eax, -252(%ebp)        # 4-byte Spill
-	sbbl	%eax, -168(%ebp)        # 4-byte Folded Spill
-	movl	84(%esi), %eax
-	movl	%eax, -256(%ebp)        # 4-byte Spill
-	sbbl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	movl	88(%esi), %eax
-	movl	%eax, -208(%ebp)        # 4-byte Spill
-	sbbl	%eax, -140(%ebp)        # 4-byte Folded Spill
-	movl	92(%esi), %eax
-	movl	%eax, -212(%ebp)        # 4-byte Spill
-	sbbl	%eax, -136(%ebp)        # 4-byte Folded Spill
-	movl	96(%esi), %eax
-	movl	%eax, -216(%ebp)        # 4-byte Spill
-	sbbl	%eax, -132(%ebp)        # 4-byte Folded Spill
-	movl	100(%esi), %eax
-	movl	%eax, -220(%ebp)        # 4-byte Spill
-	sbbl	%eax, -128(%ebp)        # 4-byte Folded Spill
-	movl	104(%esi), %eax
-	movl	%eax, -224(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	movl	108(%esi), %eax
-	movl	%eax, -204(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	%edi, -148(%ebp)        # 4-byte Spill
-	movl	-156(%ebp), %edi        # 4-byte Reload
-	sbbl	$0, %edi
-	movl	-172(%ebp), %eax        # 4-byte Reload
-	addl	-176(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-180(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 28(%esi)
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	adcl	-184(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 32(%esi)
-	adcl	-188(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 36(%esi)
-	adcl	-192(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%edx, 40(%esi)
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	adcl	-196(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 44(%esi)
-	movl	-168(%ebp), %ecx        # 4-byte Reload
-	adcl	-200(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 48(%esi)
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	-228(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	movl	-140(%ebp), %ecx        # 4-byte Reload
-	adcl	-232(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 56(%esi)
-	movl	-136(%ebp), %eax        # 4-byte Reload
-	adcl	-236(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 60(%esi)
-	movl	-132(%ebp), %ecx        # 4-byte Reload
-	adcl	-240(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 64(%esi)
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	adcl	-244(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 68(%esi)
-	movl	-152(%ebp), %ecx        # 4-byte Reload
-	adcl	-248(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 72(%esi)
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-252(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 76(%esi)
-	adcl	-256(%ebp), %edi        # 4-byte Folded Reload
-	movl	%eax, 80(%esi)
-	movl	%edi, 84(%esi)
-	movl	-208(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 88(%esi)
-	movl	-212(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 92(%esi)
-	movl	-216(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 96(%esi)
-	movl	-220(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 100(%esi)
-	movl	-224(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 104(%esi)
-	movl	-204(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 108(%esi)
-	addl	$268, %esp              # imm = 0x10C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end208:
-	.size	mcl_fpDbl_mulPre14Lbmi2, .Lfunc_end208-mcl_fpDbl_mulPre14Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre14Lbmi2,@function
-mcl_fpDbl_sqrPre14Lbmi2:                # @mcl_fpDbl_sqrPre14Lbmi2
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$268, %esp              # imm = 0x10C
-	calll	.L209$pb
-.L209$pb:
-	popl	%ebx
-.Ltmp40:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp40-.L209$pb), %ebx
-	movl	%ebx, -172(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %esi
-	movl	%esi, (%esp)
-	calll	mcl_fpDbl_mulPre7Lbmi2@PLT
-	leal	28(%edi), %eax
-	movl	%eax, 8(%esp)
-	movl	%eax, 4(%esp)
-	leal	56(%esi), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre7Lbmi2@PLT
-	movl	48(%edi), %eax
-	movl	44(%edi), %ecx
-	movl	36(%edi), %edx
-	movl	(%edi), %esi
-	movl	4(%edi), %ebx
-	addl	28(%edi), %esi
-	adcl	32(%edi), %ebx
-	movl	%ebx, -164(%ebp)        # 4-byte Spill
-	adcl	8(%edi), %edx
-	movl	%edx, -160(%ebp)        # 4-byte Spill
-	movl	40(%edi), %edx
-	adcl	12(%edi), %edx
-	adcl	16(%edi), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	adcl	20(%edi), %eax
-	movl	%eax, -176(%ebp)        # 4-byte Spill
-	movl	52(%edi), %ecx
-	adcl	24(%edi), %ecx
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -184(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	sbbl	%ebx, %ebx
-	movl	%ebx, -128(%ebp)        # 4-byte Spill
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_1
-# BB#2:
-	movl	%esi, -168(%ebp)        # 4-byte Spill
-	movl	$0, -132(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_3
-.LBB209_1:
-	leal	(%esi,%esi), %eax
-	movl	%esi, -168(%ebp)        # 4-byte Spill
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-.LBB209_3:
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	movl	-180(%ebp), %ebx        # 4-byte Reload
-	jb	.LBB209_4
-# BB#5:
-	movl	$0, -156(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_6
-.LBB209_4:
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	movl	-168(%ebp), %esi        # 4-byte Reload
-	shldl	$1, %esi, %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-.LBB209_6:
-	movl	-176(%ebp), %edi        # 4-byte Reload
-	movl	-136(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_7
-# BB#8:
-	movl	$0, -136(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_9
-.LBB209_7:
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	movl	-164(%ebp), %esi        # 4-byte Reload
-	shldl	$1, %esi, %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-.LBB209_9:
-	movl	%ebx, %esi
-	movl	-140(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_10
-# BB#11:
-	movl	$0, -140(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_12
-.LBB209_10:
-	movl	%edx, %eax
-	movl	-160(%ebp), %ebx        # 4-byte Reload
-	shldl	$1, %ebx, %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-.LBB209_12:
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_13
-# BB#14:
-	movl	$0, -144(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_15
-.LBB209_13:
-	movl	%esi, %eax
-	shldl	$1, %edx, %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-.LBB209_15:
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_16
-# BB#17:
-	movl	$0, -148(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_18
-.LBB209_16:
-	movl	%edi, %eax
-	shldl	$1, %esi, %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-.LBB209_18:
-	movl	-152(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_19
-# BB#20:
-	movl	$0, -152(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_21
-.LBB209_19:
-	movl	%ecx, %eax
-	shldl	$1, %edi, %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-.LBB209_21:
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -96(%ebp)
-	movl	%eax, -124(%ebp)
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -92(%ebp)
-	movl	%eax, -120(%ebp)
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -88(%ebp)
-	movl	%eax, -116(%ebp)
-	movl	%edx, -84(%ebp)
-	movl	%edx, -112(%ebp)
-	movl	%esi, -80(%ebp)
-	movl	%esi, -108(%ebp)
-	movl	%edi, -76(%ebp)
-	movl	%edi, -104(%ebp)
-	movl	%ecx, -72(%ebp)
-	movl	%ecx, -100(%ebp)
-	movl	-184(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_22
-# BB#23:
-	xorl	%edi, %edi
-	jmp	.LBB209_24
-.LBB209_22:
-	shrl	$31, %ecx
-	movl	%ecx, %edi
-.LBB209_24:
-	leal	-68(%ebp), %eax
-	movl	%eax, (%esp)
-	leal	-96(%ebp), %eax
-	movl	%eax, 4(%esp)
-	leal	-124(%ebp), %eax
-	movl	%eax, 8(%esp)
-	movl	-128(%ebp), %esi        # 4-byte Reload
-	andl	$1, %esi
-	movl	-172(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre7Lbmi2@PLT
-	movl	-132(%ebp), %eax        # 4-byte Reload
-	addl	-40(%ebp), %eax
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	-36(%ebp), %eax
-	movl	-136(%ebp), %ecx        # 4-byte Reload
-	adcl	-32(%ebp), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	movl	-140(%ebp), %ecx        # 4-byte Reload
-	adcl	-28(%ebp), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %ecx        # 4-byte Reload
-	adcl	-24(%ebp), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	movl	-148(%ebp), %ecx        # 4-byte Reload
-	adcl	-20(%ebp), %ecx
-	movl	%ecx, -148(%ebp)        # 4-byte Spill
-	movl	-152(%ebp), %ecx        # 4-byte Reload
-	adcl	-16(%ebp), %ecx
-	movl	%ecx, -152(%ebp)        # 4-byte Spill
-	adcl	%edi, %esi
-	movl	%esi, -128(%ebp)        # 4-byte Spill
-	movl	-68(%ebp), %ecx
-	movl	8(%ebp), %esi
-	subl	(%esi), %ecx
-	movl	%ecx, -204(%ebp)        # 4-byte Spill
-	movl	-64(%ebp), %edi
-	sbbl	4(%esi), %edi
-	movl	-60(%ebp), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, -160(%ebp)        # 4-byte Spill
-	movl	-56(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	%edx, -168(%ebp)        # 4-byte Spill
-	movl	-52(%ebp), %ebx
-	sbbl	16(%esi), %ebx
-	movl	-48(%ebp), %ecx
-	sbbl	20(%esi), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	movl	-44(%ebp), %edx
-	sbbl	24(%esi), %edx
-	movl	%edx, -164(%ebp)        # 4-byte Spill
-	movl	28(%esi), %edx
-	movl	%edx, -176(%ebp)        # 4-byte Spill
-	sbbl	%edx, -132(%ebp)        # 4-byte Folded Spill
-	movl	32(%esi), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	36(%esi), %eax
-	movl	%eax, -184(%ebp)        # 4-byte Spill
-	sbbl	%eax, -136(%ebp)        # 4-byte Folded Spill
-	movl	40(%esi), %eax
-	movl	%eax, -188(%ebp)        # 4-byte Spill
-	sbbl	%eax, -140(%ebp)        # 4-byte Folded Spill
-	movl	44(%esi), %eax
-	movl	%eax, -192(%ebp)        # 4-byte Spill
-	sbbl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	movl	48(%esi), %eax
-	movl	%eax, -196(%ebp)        # 4-byte Spill
-	sbbl	%eax, -148(%ebp)        # 4-byte Folded Spill
-	movl	52(%esi), %eax
-	movl	%eax, -200(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	movl	-128(%ebp), %ecx        # 4-byte Reload
-	sbbl	$0, %ecx
-	movl	56(%esi), %eax
-	movl	%eax, -228(%ebp)        # 4-byte Spill
-	movl	-204(%ebp), %edx        # 4-byte Reload
-	subl	%eax, %edx
-	movl	60(%esi), %eax
-	movl	%eax, -232(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	64(%esi), %eax
-	movl	%eax, -236(%ebp)        # 4-byte Spill
-	sbbl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	movl	68(%esi), %eax
-	movl	%eax, -240(%ebp)        # 4-byte Spill
-	sbbl	%eax, -168(%ebp)        # 4-byte Folded Spill
-	movl	72(%esi), %eax
-	movl	%eax, -244(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ebx
-	movl	76(%esi), %eax
-	movl	%eax, -248(%ebp)        # 4-byte Spill
-	sbbl	%eax, -172(%ebp)        # 4-byte Folded Spill
-	movl	80(%esi), %eax
-	movl	%eax, -252(%ebp)        # 4-byte Spill
-	sbbl	%eax, -164(%ebp)        # 4-byte Folded Spill
-	movl	84(%esi), %eax
-	movl	%eax, -256(%ebp)        # 4-byte Spill
-	sbbl	%eax, -132(%ebp)        # 4-byte Folded Spill
-	movl	88(%esi), %eax
-	movl	%eax, -204(%ebp)        # 4-byte Spill
-	sbbl	%eax, -156(%ebp)        # 4-byte Folded Spill
-	movl	92(%esi), %eax
-	movl	%eax, -208(%ebp)        # 4-byte Spill
-	sbbl	%eax, -136(%ebp)        # 4-byte Folded Spill
-	movl	96(%esi), %eax
-	movl	%eax, -212(%ebp)        # 4-byte Spill
-	sbbl	%eax, -140(%ebp)        # 4-byte Folded Spill
-	movl	100(%esi), %eax
-	movl	%eax, -216(%ebp)        # 4-byte Spill
-	sbbl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	movl	104(%esi), %eax
-	movl	%eax, -220(%ebp)        # 4-byte Spill
-	sbbl	%eax, -148(%ebp)        # 4-byte Folded Spill
-	movl	108(%esi), %eax
-	movl	%eax, -224(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %ecx
-	movl	%ecx, -128(%ebp)        # 4-byte Spill
-	movl	%edx, %eax
-	addl	-176(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-180(%ebp), %edi        # 4-byte Folded Reload
-	movl	%eax, 28(%esi)
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	adcl	-184(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edi, 32(%esi)
-	movl	-168(%ebp), %ecx        # 4-byte Reload
-	adcl	-188(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 36(%esi)
-	adcl	-192(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%ecx, 40(%esi)
-	movl	-172(%ebp), %eax        # 4-byte Reload
-	adcl	-196(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 44(%esi)
-	movl	-164(%ebp), %ecx        # 4-byte Reload
-	adcl	-200(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 48(%esi)
-	movl	-132(%ebp), %eax        # 4-byte Reload
-	adcl	-228(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	movl	-156(%ebp), %edx        # 4-byte Reload
-	adcl	-232(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 56(%esi)
-	movl	-136(%ebp), %ecx        # 4-byte Reload
-	adcl	-236(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 60(%esi)
-	movl	-140(%ebp), %eax        # 4-byte Reload
-	adcl	-240(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 64(%esi)
-	movl	-144(%ebp), %ecx        # 4-byte Reload
-	adcl	-244(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 68(%esi)
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-248(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 72(%esi)
-	movl	-152(%ebp), %ecx        # 4-byte Reload
-	adcl	-252(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 76(%esi)
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	adcl	-256(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 80(%esi)
-	movl	%eax, 84(%esi)
-	movl	-204(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 88(%esi)
-	movl	-208(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 92(%esi)
-	movl	-212(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 96(%esi)
-	movl	-216(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 100(%esi)
-	movl	-220(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 104(%esi)
-	movl	-224(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 108(%esi)
-	addl	$268, %esp              # imm = 0x10C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end209:
-	.size	mcl_fpDbl_sqrPre14Lbmi2, .Lfunc_end209-mcl_fpDbl_sqrPre14Lbmi2
-
-	.globl	mcl_fp_mont14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont14Lbmi2,@function
-mcl_fp_mont14Lbmi2:                     # @mcl_fp_mont14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1900, %esp             # imm = 0x76C
-	calll	.L210$pb
-.L210$pb:
-	popl	%ebx
-.Ltmp41:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp41-.L210$pb), %ebx
-	movl	1932(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1840(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1840(%esp), %edi
-	movl	1844(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	1896(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	1892(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	1888(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	1884(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	1880(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	1876(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1872(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1868(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1864(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1860(%esp), %esi
-	movl	1856(%esp), %ebp
-	movl	1852(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1848(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	1776(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1776(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1780(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1784(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1792(%esp), %ebp
-	adcl	1796(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1804(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1808(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1816(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1820(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1824(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1828(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1832(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	1928(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1712(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %edi
-	movl	%edi, %edx
-	movl	100(%esp), %ecx         # 4-byte Reload
-	addl	1712(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1716(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1720(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1724(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	adcl	1728(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	1732(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1736(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1740(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	1744(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1748(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1752(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1756(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1760(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1764(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1768(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1648(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	movl	100(%esp), %eax         # 4-byte Reload
-	andl	$1, %eax
-	addl	1648(%esp), %ebp
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1652(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1656(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1660(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1664(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	1668(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1672(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1676(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	1680(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1684(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1688(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1692(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1696(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1700(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	1704(%esp), %esi
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	1928(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1584(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	1584(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1588(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1592(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1596(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1600(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1604(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	1608(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1612(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1616(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1620(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1624(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1628(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1632(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1636(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	adcl	1640(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1520(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1520(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1528(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1532(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1540(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	1544(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1548(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1552(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	1564(%esp), %ebp
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	1568(%esp), %esi
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	1572(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1576(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1456(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	1456(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1460(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1464(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1472(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1476(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1480(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1484(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1488(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1492(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	1496(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	adcl	1500(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	1504(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1508(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1512(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1392(%esp), %ecx
-	movl	1932(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv448x32
-	andl	$1, %edi
-	movl	%edi, %eax
-	addl	1392(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1396(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1400(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1404(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1408(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1412(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1416(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1420(%esp), %esi
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	1424(%esp), %ebp
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1428(%esp), %edi
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1432(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1436(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1440(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1444(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1448(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1328(%esp), %ecx
-	movl	1924(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv448x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	1328(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1352(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	1356(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	adcl	1360(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1376(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1380(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	1384(%esp), %edi
-	sbbl	%esi, %esi
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1264(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1264(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1272(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	1284(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1312(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	1316(%esp), %esi
-	adcl	1320(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1200(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	68(%esp), %eax          # 4-byte Reload
-	addl	1200(%esp), %eax
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1204(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1208(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	1212(%esp), %edi
-	adcl	1216(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1220(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1224(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1228(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1232(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1236(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1240(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1244(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	1248(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1252(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1256(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%eax, %ebp
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1136(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1136(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1144(%esp), %ebp
-	adcl	1148(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	1172(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	1180(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1188(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1072(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	1072(%esp), %eax
-	adcl	1076(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1080(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1084(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1088(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1092(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1096(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1100(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	1104(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1108(%esp), %ebp
-	adcl	1112(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1116(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1120(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1124(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1128(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%eax, %edi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1008(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1008(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	1020(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	1036(%esp), %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1044(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	1052(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	944(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	52(%esp), %eax          # 4-byte Reload
-	addl	944(%esp), %eax
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	948(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	952(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	956(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	960(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	964(%esp), %esi
-	adcl	968(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	972(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	976(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	980(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	984(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	988(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	992(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	996(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1000(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%eax, %edi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	880(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %ebp
-	addl	880(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	896(%esp), %edi
-	adcl	900(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	924(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	936(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	816(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	816(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	824(%esp), %ebp
-	adcl	828(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	856(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	872(%esp), %esi
-	sbbl	%eax, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	752(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	756(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	760(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	764(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	768(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	772(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	776(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	780(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	784(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	788(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	792(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	796(%esp), %ebp
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	800(%esp), %edi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	804(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	808(%esp), %esi
-	adcl	$0, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	688(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	688(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	728(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	adcl	732(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	740(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	624(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	movl	%edi, %ecx
-	andl	$1, %ecx
-	addl	624(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	636(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	648(%esp), %esi
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	652(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	560(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	568(%esp), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	580(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	adcl	584(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	592(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	496(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	508(%esp), %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	520(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	528(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	540(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	432(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	432(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	440(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	444(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	452(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	472(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	368(%esp), %ebp
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	376(%esp), %esi
-	adcl	380(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	392(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	304(%esp), %ecx
-	adcl	308(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	adcl	312(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	324(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	328(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	240(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	240(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	248(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	252(%esp), %edi
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	256(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	264(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	268(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	176(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	104(%esp), %ecx         # 4-byte Reload
-	addl	176(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	184(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	adcl	188(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	192(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	200(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %ebp
-	addl	112(%esp), %esi
-	movl	100(%esp), %esi         # 4-byte Reload
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	120(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	124(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	adcl	128(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	132(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %ebx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	136(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	140(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	144(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	148(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	152(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	156(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	160(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	164(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	168(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	1932(%esp), %ecx
-	subl	(%ecx), %eax
-	sbbl	4(%ecx), %edx
-	sbbl	8(%ecx), %esi
-	sbbl	12(%ecx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	sbbl	16(%ecx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ecx), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	sbbl	24(%ecx), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%ecx), %ebx
-	movl	52(%esp), %edi          # 4-byte Reload
-	sbbl	32(%ecx), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	sbbl	36(%ecx), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	sbbl	40(%ecx), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	sbbl	44(%ecx), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	sbbl	48(%ecx), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	sbbl	52(%ecx), %edi
-	movl	%ebp, %ecx
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB210_2
-# BB#1:
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-.LBB210_2:
-	testb	%cl, %cl
-	movl	108(%esp), %ebx         # 4-byte Reload
-	jne	.LBB210_4
-# BB#3:
-	movl	%eax, %ebx
-.LBB210_4:
-	movl	1920(%esp), %eax
-	movl	%ebx, (%eax)
-	movl	92(%esp), %edi          # 4-byte Reload
-	movl	72(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_6
-# BB#5:
-	movl	%edx, %edi
-.LBB210_6:
-	movl	%edi, 4(%eax)
-	jne	.LBB210_8
-# BB#7:
-	movl	%esi, 100(%esp)         # 4-byte Spill
-.LBB210_8:
-	movl	100(%esp), %edx         # 4-byte Reload
-	movl	%edx, 8(%eax)
-	jne	.LBB210_10
-# BB#9:
-	movl	16(%esp), %edx          # 4-byte Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-.LBB210_10:
-	movl	84(%esp), %edx          # 4-byte Reload
-	movl	%edx, 12(%eax)
-	jne	.LBB210_12
-# BB#11:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB210_12:
-	movl	%ecx, 16(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_14
-# BB#13:
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB210_14:
-	movl	%ecx, 20(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_16
-# BB#15:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB210_16:
-	movl	%ecx, 24(%eax)
-	movl	60(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_18
-# BB#17:
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB210_18:
-	movl	%ecx, 32(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_20
-# BB#19:
-	movl	36(%esp), %ecx          # 4-byte Reload
-.LBB210_20:
-	movl	%ecx, 36(%eax)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_22
-# BB#21:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB210_22:
-	movl	%ecx, 40(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_24
-# BB#23:
-	movl	44(%esp), %ecx          # 4-byte Reload
-.LBB210_24:
-	movl	%ecx, 44(%eax)
-	movl	88(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_26
-# BB#25:
-	movl	48(%esp), %ecx          # 4-byte Reload
-.LBB210_26:
-	movl	%ecx, 48(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_28
-# BB#27:
-	movl	104(%esp), %ecx         # 4-byte Reload
-.LBB210_28:
-	movl	%ecx, 52(%eax)
-	addl	$1900, %esp             # imm = 0x76C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end210:
-	.size	mcl_fp_mont14Lbmi2, .Lfunc_end210-mcl_fp_mont14Lbmi2
-
-	.globl	mcl_fp_montNF14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF14Lbmi2,@function
-mcl_fp_montNF14Lbmi2:                   # @mcl_fp_montNF14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1884, %esp             # imm = 0x75C
-	calll	.L211$pb
-.L211$pb:
-	popl	%ebx
-.Ltmp42:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp42-.L211$pb), %ebx
-	movl	1916(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1824(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1824(%esp), %edi
-	movl	1828(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	1880(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	1876(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	1872(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1868(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	1864(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1860(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1856(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1852(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1848(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1844(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1840(%esp), %esi
-	movl	1836(%esp), %ebp
-	movl	1832(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	1760(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1760(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1764(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1768(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1772(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	adcl	1776(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1780(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1784(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1792(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1804(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	1808(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1816(%esp), %ebp
-	movl	1912(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1696(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1752(%esp), %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	addl	1696(%esp), %edx
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1700(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1704(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	1708(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1712(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1716(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1720(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1724(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1728(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1732(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1736(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	1740(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	1744(%esp), %edi
-	adcl	1748(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %ebp
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1632(%esp), %ecx
-	movl	1916(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv448x32
-	addl	1632(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1636(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1640(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1644(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1648(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1652(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1656(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1660(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	1664(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1668(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1672(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1676(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1680(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1684(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1688(%esp), %ebp
-	movl	1912(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1568(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1624(%esp), %eax
-	movl	88(%esp), %edx          # 4-byte Reload
-	addl	1568(%esp), %edx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1572(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1576(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1580(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1584(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1588(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1592(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	1596(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1600(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1604(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1608(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	1612(%esp), %edi
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1616(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1620(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %ebp
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1504(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1504(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1508(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1512(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1516(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1520(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1528(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1532(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1540(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1544(%esp), %esi
-	adcl	1548(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1552(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1560(%esp), %ebp
-	movl	1912(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1440(%esp), %ecx
-	movl	1908(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv448x32
-	movl	1496(%esp), %eax
-	movl	72(%esp), %edx          # 4-byte Reload
-	addl	1440(%esp), %edx
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1444(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1448(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1452(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1456(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1460(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	1464(%esp), %edi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1468(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1472(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	1476(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1480(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1484(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1488(%esp), %esi
-	adcl	1492(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %ebp
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1376(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1376(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1380(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1384(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	1400(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1404(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1408(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1412(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1416(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1424(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1428(%esp), %eax
-	movl	%eax, %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1432(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1312(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1368(%esp), %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	addl	1312(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1316(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1320(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1324(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	1328(%esp), %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1332(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1336(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	1340(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1344(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1348(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1352(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1356(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	1360(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1364(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1248(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1248(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1252(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1256(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1260(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	1264(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1272(%esp), %ebp
-	adcl	1276(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	1284(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	1300(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1184(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1240(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	1184(%esp), %ecx
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1188(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1200(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1204(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1208(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1212(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1216(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1232(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1120(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1120(%esp), %esi
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	1124(%esp), %ebp
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	1128(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1148(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	1156(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1056(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1112(%esp), %eax
-	movl	%ebp, %ecx
-	addl	1056(%esp), %ecx
-	adcl	1060(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	1064(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	1068(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1072(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1076(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	1080(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	1084(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	1088(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1092(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	1096(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	1100(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	1104(%esp), %ebp
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	1108(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %esi
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	992(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	1008(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1040(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	1044(%esp), %ebp
-	adcl	1048(%esp), %esi
-	movl	1912(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	984(%esp), %eax
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	928(%esp), %ecx
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	932(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	936(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	940(%esp), %edi
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	944(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	948(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	952(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	956(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	960(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	964(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	968(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	972(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	976(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	980(%esp), %esi
-	movl	%esi, %ebp
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	864(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	864(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	876(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	884(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	916(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	920(%esp), %ebp
-	movl	1912(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	800(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	856(%esp), %edx
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	800(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	808(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	816(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	828(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	852(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	736(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	736(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	764(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	768(%esp), %ebp
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	772(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	780(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	672(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	728(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	672(%esp), %ecx
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	700(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	adcl	704(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	712(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	608(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	608(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	616(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	624(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	644(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	600(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	544(%esp), %ecx
-	adcl	548(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	556(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	568(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	576(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	480(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	480(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	488(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	496(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	504(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	532(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	472(%esp), %edx
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	416(%esp), %ecx
-	adcl	420(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	424(%esp), %edi
-	adcl	428(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	464(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	352(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	360(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	364(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	388(%esp), %edi
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	392(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	288(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	344(%esp), %edx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	288(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	296(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	320(%esp), %edi
-	adcl	324(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	328(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	224(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	232(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	256(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	260(%esp), %edi
-	adcl	264(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	160(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	216(%esp), %edx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	160(%esp), %ecx
-	adcl	164(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	168(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	192(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	96(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	96(%esp), %esi
-	movl	64(%esp), %esi          # 4-byte Reload
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	104(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	%ebp, %ebx
-	adcl	108(%esp), %esi
-	adcl	112(%esp), %edi
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	120(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	124(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	128(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	132(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	136(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	140(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	148(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	152(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	1916(%esp), %ebp
-	subl	(%ebp), %edx
-	sbbl	4(%ebp), %ebx
-	movl	%esi, %eax
-	sbbl	8(%ebp), %eax
-	movl	%edi, %ecx
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	sbbl	20(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	sbbl	36(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	sbbl	40(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%ebp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	48(%ebp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	52(%ebp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	sarl	$31, %ecx
-	testl	%ecx, %ecx
-	movl	92(%esp), %ebp          # 4-byte Reload
-	js	.LBB211_2
-# BB#1:
-	movl	%edx, %ebp
-.LBB211_2:
-	movl	1904(%esp), %edx
-	movl	%ebp, (%edx)
-	movl	88(%esp), %ebp          # 4-byte Reload
-	js	.LBB211_4
-# BB#3:
-	movl	%ebx, %ebp
-.LBB211_4:
-	movl	%ebp, 4(%edx)
-	js	.LBB211_6
-# BB#5:
-	movl	%eax, %esi
-.LBB211_6:
-	movl	%esi, 8(%edx)
-	js	.LBB211_8
-# BB#7:
-	movl	4(%esp), %edi           # 4-byte Reload
-.LBB211_8:
-	movl	%edi, 12(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	js	.LBB211_10
-# BB#9:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB211_10:
-	movl	%eax, 16(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB211_12
-# BB#11:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB211_12:
-	movl	%eax, 20(%edx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	js	.LBB211_14
-# BB#13:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB211_14:
-	movl	%eax, 24(%edx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB211_16
-# BB#15:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB211_16:
-	movl	%eax, 28(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB211_18
-# BB#17:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB211_18:
-	movl	%eax, 32(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB211_20
-# BB#19:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB211_20:
-	movl	%eax, 36(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB211_22
-# BB#21:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB211_22:
-	movl	%eax, 40(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	js	.LBB211_24
-# BB#23:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB211_24:
-	movl	%eax, 44(%edx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	js	.LBB211_26
-# BB#25:
-	movl	64(%esp), %eax          # 4-byte Reload
-.LBB211_26:
-	movl	%eax, 48(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	js	.LBB211_28
-# BB#27:
-	movl	72(%esp), %eax          # 4-byte Reload
-.LBB211_28:
-	movl	%eax, 52(%edx)
-	addl	$1884, %esp             # imm = 0x75C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end211:
-	.size	mcl_fp_montNF14Lbmi2, .Lfunc_end211-mcl_fp_montNF14Lbmi2
-
-	.globl	mcl_fp_montRed14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed14Lbmi2,@function
-mcl_fp_montRed14Lbmi2:                  # @mcl_fp_montRed14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1068, %esp             # imm = 0x42C
-	calll	.L212$pb
-.L212$pb:
-	popl	%eax
-.Ltmp43:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp43-.L212$pb), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1096(%esp), %edx
-	movl	-4(%edx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1092(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 92(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	imull	%eax, %ebx
-	movl	108(%ecx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	104(%ecx), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	100(%ecx), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	96(%ecx), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	92(%ecx), %esi
-	movl	%esi, 140(%esp)         # 4-byte Spill
-	movl	88(%ecx), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	84(%ecx), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	80(%ecx), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	76(%ecx), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	%esi, 136(%esp)         # 4-byte Spill
-	movl	68(%ecx), %esi
-	movl	%esi, 168(%esp)         # 4-byte Spill
-	movl	64(%ecx), %esi
-	movl	%esi, 164(%esp)         # 4-byte Spill
-	movl	60(%ecx), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	56(%ecx), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	52(%ecx), %edi
-	movl	%edi, 144(%esp)         # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	44(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	40(%ecx), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	36(%ecx), %ebp
-	movl	32(%ecx), %edi
-	movl	28(%ecx), %esi
-	movl	24(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	20(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	16(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	12(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	8(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	52(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	1008(%esp), %ecx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	movl	92(%esp), %eax          # 4-byte Reload
-	addl	1008(%esp), %eax
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1012(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1036(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	1040(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	adcl	1044(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	1052(%esp), %ebp
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 164(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	944(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	movl	%edi, %ecx
-	andl	$1, %ecx
-	addl	944(%esp), %esi
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	948(%esp), %edx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	976(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	984(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %esi         # 4-byte Reload
-	adcl	1000(%esp), %esi
-	adcl	$0, 164(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %ebp
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	880(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	880(%esp), %ebp
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	884(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	908(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %edi         # 4-byte Reload
-	adcl	920(%esp), %edi
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	adcl	932(%esp), %esi
-	movl	%esi, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	936(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	movl	152(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	816(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	816(%esp), %esi
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	820(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	852(%esp), %edi
-	movl	%edi, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 152(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	752(%esp), %ebp
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	756(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	movl	156(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	688(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	688(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 156(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	movl	140(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	624(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	624(%esp), %ebp
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	628(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %ebp         # 4-byte Reload
-	adcl	664(%esp), %ebp
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 140(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	560(%esp), %esi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	564(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	adcl	596(%esp), %ebp
-	movl	%ebp, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %edi         # 4-byte Reload
-	adcl	600(%esp), %edi
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	movl	120(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	1096(%esp), %eax
-	movl	%eax, %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	496(%esp), %ebp
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %ebp         # 4-byte Reload
-	adcl	516(%esp), %ebp
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	adcl	532(%esp), %edi
-	movl	%edi, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	536(%esp), %edi
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	432(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	432(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	440(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	adcl	448(%esp), %ebp
-	movl	%ebp, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %ecx         # 4-byte Reload
-	adcl	452(%esp), %ecx
-	movl	%ecx, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %ebp         # 4-byte Reload
-	adcl	456(%esp), %ebp
-	movl	164(%esp), %ecx         # 4-byte Reload
-	adcl	460(%esp), %ecx
-	movl	%ecx, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %ecx         # 4-byte Reload
-	adcl	464(%esp), %ecx
-	movl	%ecx, 168(%esp)         # 4-byte Spill
-	adcl	468(%esp), %edi
-	movl	%edi, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	472(%esp), %ecx
-	movl	%ecx, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	480(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	484(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	488(%esp), %ecx
-	movl	%ecx, 140(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%eax, %esi
-	movl	88(%esp), %edi          # 4-byte Reload
-	imull	%edi, %eax
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	368(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	376(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	380(%esp), %ecx
-	movl	%ecx, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %esi         # 4-byte Reload
-	adcl	384(%esp), %esi
-	adcl	388(%esp), %ebp
-	movl	%ebp, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %ecx         # 4-byte Reload
-	adcl	392(%esp), %ecx
-	movl	%ecx, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %ecx         # 4-byte Reload
-	adcl	396(%esp), %ecx
-	movl	%ecx, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	400(%esp), %ecx
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	404(%esp), %ecx
-	movl	%ecx, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	408(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	412(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	416(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	420(%esp), %ecx
-	movl	%ecx, 140(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	424(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%eax, %ebp
-	imull	%edi, %eax
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	304(%esp), %ebp
-	movl	132(%esp), %edi         # 4-byte Reload
-	adcl	308(%esp), %edi
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	%esi, %ebp
-	adcl	316(%esp), %ebp
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %esi         # 4-byte Reload
-	adcl	332(%esp), %esi
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%edi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	240(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	240(%esp), %edi
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	244(%esp), %ecx
-	adcl	248(%esp), %ebp
-	movl	%ebp, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	adcl	264(%esp), %esi
-	movl	%esi, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %edi         # 4-byte Reload
-	adcl	268(%esp), %edi
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebp         # 4-byte Reload
-	adcl	280(%esp), %ebp
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	176(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	176(%esp), %esi
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %ebx         # 4-byte Reload
-	adcl	188(%esp), %ebx
-	movl	%ebx, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %edx         # 4-byte Reload
-	adcl	196(%esp), %edx
-	movl	%edx, 136(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	adcl	200(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	212(%esp), %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	232(%esp), %ecx
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	172(%esp), %edi         # 4-byte Reload
-	subl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	160(%esp), %ebp         # 4-byte Reload
-	sbbl	8(%esp), %ebp           # 4-byte Folded Reload
-	sbbl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	168(%esp), %eax         # 4-byte Reload
-	sbbl	20(%esp), %eax          # 4-byte Folded Reload
-	sbbl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	148(%esp), %edx         # 4-byte Reload
-	sbbl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	156(%esp), %edx         # 4-byte Reload
-	sbbl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	152(%esp), %edx         # 4-byte Reload
-	sbbl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	124(%esp), %edx         # 4-byte Reload
-	sbbl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	140(%esp), %edx         # 4-byte Reload
-	sbbl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	128(%esp), %edx         # 4-byte Reload
-	sbbl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %edx         # 4-byte Reload
-	sbbl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %edx         # 4-byte Reload
-	sbbl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 132(%esp)         # 4-byte Spill
-	movl	%ecx, %edx
-	sbbl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 144(%esp)         # 4-byte Spill
-	sbbl	$0, %esi
-	andl	$1, %esi
-	jne	.LBB212_2
-# BB#1:
-	movl	%eax, 168(%esp)         # 4-byte Spill
-.LBB212_2:
-	movl	%esi, %edx
-	testb	%dl, %dl
-	movl	172(%esp), %eax         # 4-byte Reload
-	jne	.LBB212_4
-# BB#3:
-	movl	%edi, %eax
-.LBB212_4:
-	movl	1088(%esp), %edi
-	movl	%eax, (%edi)
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	movl	160(%esp), %ecx         # 4-byte Reload
-	jne	.LBB212_6
-# BB#5:
-	movl	%ebp, %ecx
-.LBB212_6:
-	movl	%ecx, 4(%edi)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	164(%esp), %ebp         # 4-byte Reload
-	jne	.LBB212_8
-# BB#7:
-	movl	%ebx, %ebp
-.LBB212_8:
-	movl	%ebp, 8(%edi)
-	movl	168(%esp), %ebx         # 4-byte Reload
-	movl	%ebx, 12(%edi)
-	movl	124(%esp), %ebp         # 4-byte Reload
-	movl	136(%esp), %ebx         # 4-byte Reload
-	jne	.LBB212_10
-# BB#9:
-	movl	80(%esp), %ebx          # 4-byte Reload
-.LBB212_10:
-	movl	%ebx, 16(%edi)
-	movl	140(%esp), %ebx         # 4-byte Reload
-	movl	148(%esp), %esi         # 4-byte Reload
-	jne	.LBB212_12
-# BB#11:
-	movl	84(%esp), %esi          # 4-byte Reload
-.LBB212_12:
-	movl	%esi, 20(%edi)
-	movl	128(%esp), %esi         # 4-byte Reload
-	jne	.LBB212_14
-# BB#13:
-	movl	88(%esp), %eax          # 4-byte Reload
-.LBB212_14:
-	movl	%eax, 24(%edi)
-	movl	120(%esp), %edx         # 4-byte Reload
-	jne	.LBB212_16
-# BB#15:
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	%eax, 152(%esp)         # 4-byte Spill
-.LBB212_16:
-	movl	152(%esp), %eax         # 4-byte Reload
-	movl	%eax, 28(%edi)
-	jne	.LBB212_18
-# BB#17:
-	movl	96(%esp), %ebp          # 4-byte Reload
-.LBB212_18:
-	movl	%ebp, 32(%edi)
-	jne	.LBB212_20
-# BB#19:
-	movl	100(%esp), %ebx         # 4-byte Reload
-.LBB212_20:
-	movl	%ebx, 36(%edi)
-	jne	.LBB212_22
-# BB#21:
-	movl	112(%esp), %esi         # 4-byte Reload
-.LBB212_22:
-	movl	%esi, 40(%edi)
-	jne	.LBB212_24
-# BB#23:
-	movl	116(%esp), %edx         # 4-byte Reload
-.LBB212_24:
-	movl	%edx, 44(%edi)
-	jne	.LBB212_26
-# BB#25:
-	movl	132(%esp), %ecx         # 4-byte Reload
-.LBB212_26:
-	movl	%ecx, 48(%edi)
-	movl	104(%esp), %eax         # 4-byte Reload
-	jne	.LBB212_28
-# BB#27:
-	movl	144(%esp), %eax         # 4-byte Reload
-.LBB212_28:
-	movl	%eax, 52(%edi)
-	addl	$1068, %esp             # imm = 0x42C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end212:
-	.size	mcl_fp_montRed14Lbmi2, .Lfunc_end212-mcl_fp_montRed14Lbmi2
-
-	.globl	mcl_fp_addPre14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre14Lbmi2,@function
-mcl_fp_addPre14Lbmi2:                   # @mcl_fp_addPre14Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ebx)
-	movl	32(%ecx), %esi
-	adcl	%edi, %esi
-	movl	36(%eax), %edi
-	movl	%edx, 28(%ebx)
-	movl	36(%ecx), %edx
-	adcl	%edi, %edx
-	movl	40(%eax), %edi
-	movl	%esi, 32(%ebx)
-	movl	40(%ecx), %esi
-	adcl	%edi, %esi
-	movl	44(%eax), %edi
-	movl	%edx, 36(%ebx)
-	movl	44(%ecx), %edx
-	adcl	%edi, %edx
-	movl	48(%eax), %edi
-	movl	%esi, 40(%ebx)
-	movl	48(%ecx), %esi
-	adcl	%edi, %esi
-	movl	%edx, 44(%ebx)
-	movl	%esi, 48(%ebx)
-	movl	52(%eax), %eax
-	movl	52(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 52(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end213:
-	.size	mcl_fp_addPre14Lbmi2, .Lfunc_end213-mcl_fp_addPre14Lbmi2
-
-	.globl	mcl_fp_subPre14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre14Lbmi2,@function
-mcl_fp_subPre14Lbmi2:                   # @mcl_fp_subPre14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ebp)
-	movl	32(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%ebp)
-	movl	36(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	40(%edx), %ebx
-	movl	%edi, 32(%ebp)
-	movl	40(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	44(%edx), %ebx
-	movl	%esi, 36(%ebp)
-	movl	44(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	48(%edx), %ebx
-	movl	%edi, 40(%ebp)
-	movl	48(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	%esi, 44(%ebp)
-	movl	%edi, 48(%ebp)
-	movl	52(%edx), %edx
-	movl	52(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 52(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end214:
-	.size	mcl_fp_subPre14Lbmi2, .Lfunc_end214-mcl_fp_subPre14Lbmi2
-
-	.globl	mcl_fp_shr1_14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_14Lbmi2,@function
-mcl_fp_shr1_14Lbmi2:                    # @mcl_fp_shr1_14Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 40(%ecx)
-	movl	48(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 44(%ecx)
-	movl	52(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 48(%ecx)
-	shrl	%eax
-	movl	%eax, 52(%ecx)
-	popl	%esi
-	retl
-.Lfunc_end215:
-	.size	mcl_fp_shr1_14Lbmi2, .Lfunc_end215-mcl_fp_shr1_14Lbmi2
-
-	.globl	mcl_fp_add14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add14Lbmi2,@function
-mcl_fp_add14Lbmi2:                      # @mcl_fp_add14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$44, %esp
-	movl	72(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ecx
-	movl	68(%esp), %ebp
-	addl	(%ebp), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	4(%ebp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	8(%eax), %ecx
-	adcl	8(%ebp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	12(%ebp), %edx
-	movl	16(%ebp), %ecx
-	adcl	12(%eax), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	16(%eax), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	20(%ebp), %ecx
-	adcl	20(%eax), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	24(%ebp), %ecx
-	adcl	24(%eax), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	28(%ebp), %ecx
-	adcl	28(%eax), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	32(%ebp), %ecx
-	adcl	32(%eax), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	36(%ebp), %ecx
-	adcl	36(%eax), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	40(%ebp), %edx
-	adcl	40(%eax), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	44(%ebp), %ebx
-	adcl	44(%eax), %ebx
-	movl	48(%ebp), %esi
-	adcl	48(%eax), %esi
-	movl	52(%ebp), %edi
-	adcl	52(%eax), %edi
-	movl	64(%esp), %eax
-	movl	4(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, (%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 8(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 16(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 20(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	%edx, 40(%eax)
-	movl	%ebx, 44(%eax)
-	movl	%esi, 48(%eax)
-	movl	%edi, 52(%eax)
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	76(%esp), %edx
-	subl	(%edx), %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	sbbl	4(%edx), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	sbbl	8(%edx), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	sbbl	12(%edx), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebp          # 4-byte Reload
-	sbbl	16(%edx), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %ebp          # 4-byte Reload
-	sbbl	20(%edx), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%edx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %ebp          # 4-byte Reload
-	sbbl	28(%edx), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebp          # 4-byte Reload
-	sbbl	32(%edx), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %ebp           # 4-byte Reload
-	sbbl	36(%edx), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	(%esp), %ebp            # 4-byte Reload
-	sbbl	40(%edx), %ebp
-	sbbl	44(%edx), %ebx
-	sbbl	48(%edx), %esi
-	sbbl	52(%edx), %edi
-	sbbl	$0, %ecx
-	testb	$1, %cl
-	jne	.LBB216_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, (%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 8(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 16(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 20(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	%ebp, 40(%eax)
-	movl	%ebx, 44(%eax)
-	movl	%esi, 48(%eax)
-	movl	%edi, 52(%eax)
-.LBB216_2:                              # %carry
-	addl	$44, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end216:
-	.size	mcl_fp_add14Lbmi2, .Lfunc_end216-mcl_fp_add14Lbmi2
-
-	.globl	mcl_fp_addNF14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF14Lbmi2,@function
-mcl_fp_addNF14Lbmi2:                    # @mcl_fp_addNF14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$112, %esp
-	movl	140(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	136(%esp), %ecx
-	addl	(%ecx), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	4(%ecx), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	52(%eax), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	48(%eax), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	44(%eax), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	40(%eax), %ebp
-	movl	36(%eax), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	32(%eax), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	28(%eax), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	24(%eax), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	20(%eax), %ebx
-	movl	16(%eax), %edi
-	movl	12(%eax), %esi
-	movl	8(%eax), %edx
-	adcl	8(%ecx), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	12(%ecx), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	16(%ecx), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	20(%ecx), %ebx
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	24(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	28(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	32(%ecx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	36(%ecx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	40(%ecx), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	44(%ecx), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	48(%ecx), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	52(%ecx), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	144(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	subl	(%ecx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	sbbl	4(%ecx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	8(%ecx), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%ecx), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	sbbl	16(%ecx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	sbbl	20(%ecx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	movl	%edx, %eax
-	sbbl	24(%ecx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	28(%ecx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	sbbl	32(%ecx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	sbbl	36(%ecx), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	sbbl	40(%ecx), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	movl	%eax, %esi
-	movl	%eax, %ebp
-	sbbl	44(%ecx), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, %esi
-	sbbl	48(%ecx), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	%eax, %edi
-	sbbl	52(%ecx), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	%edi, %ecx
-	sarl	$31, %ecx
-	testl	%ecx, %ecx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	js	.LBB217_2
-# BB#1:
-	movl	(%esp), %ecx            # 4-byte Reload
-.LBB217_2:
-	movl	132(%esp), %edi
-	movl	%ecx, (%edi)
-	movl	76(%esp), %eax          # 4-byte Reload
-	js	.LBB217_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB217_4:
-	movl	%eax, 4(%edi)
-	movl	%edx, %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	js	.LBB217_6
-# BB#5:
-	movl	8(%esp), %edx           # 4-byte Reload
-.LBB217_6:
-	movl	%edx, 8(%edi)
-	movl	%ebp, %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	movl	60(%esp), %ebp          # 4-byte Reload
-	js	.LBB217_8
-# BB#7:
-	movl	12(%esp), %ebp          # 4-byte Reload
-.LBB217_8:
-	movl	%ebp, 12(%edi)
-	movl	100(%esp), %ebp         # 4-byte Reload
-	js	.LBB217_10
-# BB#9:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB217_10:
-	movl	%eax, 16(%edi)
-	movl	80(%esp), %esi          # 4-byte Reload
-	js	.LBB217_12
-# BB#11:
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-.LBB217_12:
-	movl	68(%esp), %eax          # 4-byte Reload
-	movl	%eax, 20(%edi)
-	js	.LBB217_14
-# BB#13:
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB217_14:
-	movl	%ecx, 24(%edi)
-	js	.LBB217_16
-# BB#15:
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-.LBB217_16:
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 28(%edi)
-	js	.LBB217_18
-# BB#17:
-	movl	32(%esp), %ebp          # 4-byte Reload
-.LBB217_18:
-	movl	%ebp, 32(%edi)
-	js	.LBB217_20
-# BB#19:
-	movl	36(%esp), %ebx          # 4-byte Reload
-.LBB217_20:
-	movl	%ebx, 36(%edi)
-	js	.LBB217_22
-# BB#21:
-	movl	40(%esp), %esi          # 4-byte Reload
-.LBB217_22:
-	movl	%esi, 40(%edi)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB217_24
-# BB#23:
-	movl	44(%esp), %edx          # 4-byte Reload
-.LBB217_24:
-	movl	%edx, 44(%edi)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	js	.LBB217_26
-# BB#25:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB217_26:
-	movl	%eax, 48(%edi)
-	js	.LBB217_28
-# BB#27:
-	movl	52(%esp), %ecx          # 4-byte Reload
-.LBB217_28:
-	movl	%ecx, 52(%edi)
-	addl	$112, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end217:
-	.size	mcl_fp_addNF14Lbmi2, .Lfunc_end217-mcl_fp_addNF14Lbmi2
-
-	.globl	mcl_fp_sub14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub14Lbmi2,@function
-mcl_fp_sub14Lbmi2:                      # @mcl_fp_sub14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$52, %esp
-	movl	76(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	80(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	sbbl	32(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	36(%esi), %edx
-	sbbl	36(%edi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	40(%esi), %ecx
-	sbbl	40(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	44(%esi), %eax
-	sbbl	44(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	48(%esi), %ebp
-	sbbl	48(%edi), %ebp
-	movl	52(%esi), %esi
-	sbbl	52(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	72(%esp), %ebx
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%ebx)
-	movl	%edx, 36(%ebx)
-	movl	%ecx, 40(%ebx)
-	movl	%eax, 44(%ebx)
-	movl	%ebp, 48(%ebx)
-	movl	%esi, 52(%ebx)
-	je	.LBB218_2
-# BB#1:                                 # %carry
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	84(%esp), %esi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	8(%esi), %edi
-	movl	12(%esi), %eax
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	40(%esi), %ecx
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	44(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 40(%ebx)
-	movl	%eax, 44(%ebx)
-	movl	48(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 48(%ebx)
-	movl	52(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 52(%ebx)
-.LBB218_2:                              # %nocarry
-	addl	$52, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end218:
-	.size	mcl_fp_sub14Lbmi2, .Lfunc_end218-mcl_fp_sub14Lbmi2
-
-	.globl	mcl_fp_subNF14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF14Lbmi2,@function
-mcl_fp_subNF14Lbmi2:                    # @mcl_fp_subNF14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$88, %esp
-	movl	112(%esp), %ecx
-	movl	52(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	(%ecx), %edx
-	movl	4(%ecx), %eax
-	movl	116(%esp), %edi
-	subl	(%edi), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%ecx), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	32(%ecx), %ebp
-	movl	28(%ecx), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	24(%ecx), %ebx
-	movl	20(%ecx), %esi
-	movl	16(%ecx), %edx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %ebx
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%edi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	sbbl	32(%edi), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	48(%edi), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	52(%edi), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	sarl	$31, %esi
-	movl	%esi, %ecx
-	addl	%ecx, %ecx
-	movl	%esi, %ebp
-	adcl	%ebp, %ebp
-	shrl	$31, %eax
-	orl	%ecx, %eax
-	movl	120(%esp), %edi
-	andl	4(%edi), %ebp
-	andl	(%edi), %eax
-	movl	52(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	48(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	44(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	40(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	36(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	32(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	28(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	24(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	20(%edi), %ebx
-	andl	%esi, %ebx
-	movl	16(%edi), %edx
-	andl	%esi, %edx
-	movl	12(%edi), %ecx
-	andl	%esi, %ecx
-	andl	8(%edi), %esi
-	addl	56(%esp), %eax          # 4-byte Folded Reload
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	movl	108(%esp), %edi
-	movl	%eax, (%edi)
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%ebp, 4(%edi)
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%esi, 8(%edi)
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 12(%edi)
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edx, 16(%edi)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, 20(%edi)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 24(%edi)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 28(%edi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 32(%edi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 36(%edi)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 40(%edi)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 44(%edi)
-	movl	%eax, 48(%edi)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%edi)
-	addl	$88, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end219:
-	.size	mcl_fp_subNF14Lbmi2, .Lfunc_end219-mcl_fp_subNF14Lbmi2
-
-	.globl	mcl_fpDbl_add14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add14Lbmi2,@function
-mcl_fpDbl_add14Lbmi2:                   # @mcl_fpDbl_add14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$100, %esp
-	movl	128(%esp), %ecx
-	movl	124(%esp), %esi
-	movl	12(%esi), %edi
-	movl	16(%esi), %edx
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%esi), %ebp
-	movl	120(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%esi), %ebp
-	adcl	8(%esi), %ebx
-	adcl	12(%ecx), %edi
-	adcl	16(%ecx), %edx
-	movl	%ebp, 4(%eax)
-	movl	64(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%edi, 12(%eax)
-	movl	20(%esi), %edi
-	adcl	%ebx, %edi
-	movl	24(%ecx), %ebx
-	movl	%edx, 16(%eax)
-	movl	24(%esi), %edx
-	adcl	%ebx, %edx
-	movl	28(%ecx), %ebx
-	movl	%edi, 20(%eax)
-	movl	28(%esi), %edi
-	adcl	%ebx, %edi
-	movl	32(%ecx), %ebx
-	movl	%edx, 24(%eax)
-	movl	32(%esi), %edx
-	adcl	%ebx, %edx
-	movl	36(%ecx), %ebx
-	movl	%edi, 28(%eax)
-	movl	36(%esi), %edi
-	adcl	%ebx, %edi
-	movl	40(%ecx), %ebx
-	movl	%edx, 32(%eax)
-	movl	40(%esi), %edx
-	adcl	%ebx, %edx
-	movl	44(%ecx), %ebx
-	movl	%edi, 36(%eax)
-	movl	44(%esi), %edi
-	adcl	%ebx, %edi
-	movl	48(%ecx), %ebx
-	movl	%edx, 40(%eax)
-	movl	48(%esi), %edx
-	adcl	%ebx, %edx
-	movl	52(%ecx), %ebx
-	movl	%edi, 44(%eax)
-	movl	52(%esi), %edi
-	adcl	%ebx, %edi
-	movl	56(%ecx), %ebx
-	movl	%edx, 48(%eax)
-	movl	56(%esi), %edx
-	adcl	%ebx, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	60(%ecx), %edx
-	movl	%edi, 52(%eax)
-	movl	60(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	64(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%ecx), %edx
-	movl	68(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%ecx), %edx
-	movl	72(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	76(%ecx), %edx
-	movl	76(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%ecx), %edx
-	movl	80(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%ecx), %edx
-	movl	84(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	88(%ecx), %edx
-	movl	88(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	92(%ecx), %edx
-	movl	92(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	96(%ecx), %edx
-	movl	96(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	100(%ecx), %edx
-	movl	100(%esi), %edi
-	adcl	%edx, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	104(%ecx), %edx
-	movl	104(%esi), %ebx
-	adcl	%edx, %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	108(%ecx), %ecx
-	movl	108(%esi), %esi
-	adcl	%ecx, %esi
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	132(%esp), %ebp
-	movl	72(%esp), %ecx          # 4-byte Reload
-	subl	(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	4(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	8(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	20(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebp), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	sbbl	36(%ebp), %ecx
-	sbbl	40(%ebp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	44(%ebp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	%esi, %ebx
-	sbbl	48(%ebp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	sbbl	52(%ebp), %esi
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB220_2
-# BB#1:
-	movl	%esi, %ebx
-.LBB220_2:
-	testb	%dl, %dl
-	movl	72(%esp), %eax          # 4-byte Reload
-	movl	68(%esp), %edx          # 4-byte Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	movl	60(%esp), %ebp          # 4-byte Reload
-	jne	.LBB220_4
-# BB#3:
-	movl	%ecx, %edx
-	movl	(%esp), %edi            # 4-byte Reload
-	movl	4(%esp), %ebp           # 4-byte Reload
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB220_4:
-	movl	120(%esp), %esi
-	movl	%eax, 56(%esi)
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	%eax, 60(%esi)
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	%eax, 64(%esi)
-	movl	84(%esp), %eax          # 4-byte Reload
-	movl	%eax, 68(%esi)
-	movl	88(%esp), %eax          # 4-byte Reload
-	movl	%eax, 72(%esi)
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%esi)
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esi)
-	movl	%ebp, 84(%esi)
-	movl	%edi, 88(%esi)
-	movl	%edx, 92(%esi)
-	movl	52(%esp), %edx          # 4-byte Reload
-	movl	48(%esp), %eax          # 4-byte Reload
-	jne	.LBB220_6
-# BB#5:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB220_6:
-	movl	%eax, 96(%esi)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB220_8
-# BB#7:
-	movl	40(%esp), %edx          # 4-byte Reload
-.LBB220_8:
-	movl	%edx, 100(%esi)
-	jne	.LBB220_10
-# BB#9:
-	movl	44(%esp), %ecx          # 4-byte Reload
-.LBB220_10:
-	movl	%ecx, 104(%esi)
-	movl	%ebx, 108(%esi)
-	addl	$100, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end220:
-	.size	mcl_fpDbl_add14Lbmi2, .Lfunc_end220-mcl_fpDbl_add14Lbmi2
-
-	.globl	mcl_fpDbl_sub14Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub14Lbmi2,@function
-mcl_fpDbl_sub14Lbmi2:                   # @mcl_fpDbl_sub14Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$96, %esp
-	movl	120(%esp), %ebx
-	movl	(%ebx), %eax
-	movl	4(%ebx), %edx
-	movl	124(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %edx
-	movl	8(%ebx), %esi
-	sbbl	8(%ebp), %esi
-	movl	116(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%ebx), %eax
-	sbbl	12(%ebp), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%ebx), %edx
-	sbbl	16(%ebp), %edx
-	movl	%esi, 8(%ecx)
-	movl	20(%ebp), %esi
-	movl	%eax, 12(%ecx)
-	movl	20(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	24(%ebp), %esi
-	movl	%edx, 16(%ecx)
-	movl	24(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	28(%ebp), %esi
-	movl	%eax, 20(%ecx)
-	movl	28(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	32(%ebp), %esi
-	movl	%edx, 24(%ecx)
-	movl	32(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	36(%ebp), %esi
-	movl	%eax, 28(%ecx)
-	movl	36(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	40(%ebp), %esi
-	movl	%edx, 32(%ecx)
-	movl	40(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	44(%ebp), %esi
-	movl	%eax, 36(%ecx)
-	movl	44(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	48(%ebp), %esi
-	movl	%edx, 40(%ecx)
-	movl	48(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	52(%ebp), %esi
-	movl	%eax, 44(%ecx)
-	movl	52(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	56(%ebp), %esi
-	movl	%edx, 48(%ecx)
-	movl	56(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	60(%ebp), %edx
-	movl	%eax, 52(%ecx)
-	movl	60(%ebx), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	64(%ebp), %eax
-	movl	64(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	68(%ebp), %eax
-	movl	68(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	72(%ebp), %eax
-	movl	72(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	76(%ebp), %eax
-	movl	76(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	80(%ebp), %eax
-	movl	80(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	84(%ebp), %eax
-	movl	84(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	88(%ebp), %eax
-	movl	88(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	92(%ebp), %eax
-	movl	92(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	96(%ebp), %eax
-	movl	96(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	100(%ebp), %eax
-	movl	100(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	104(%ebp), %eax
-	movl	104(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	108(%ebp), %eax
-	movl	108(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	128(%esp), %ebp
-	jne	.LBB221_1
-# BB#2:
-	movl	$0, 56(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_3
-.LBB221_1:
-	movl	52(%ebp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-.LBB221_3:
-	testb	%al, %al
-	jne	.LBB221_4
-# BB#5:
-	movl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_6
-.LBB221_4:
-	movl	(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	4(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-.LBB221_6:
-	jne	.LBB221_7
-# BB#8:
-	movl	$0, 32(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_9
-.LBB221_7:
-	movl	48(%ebp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-.LBB221_9:
-	jne	.LBB221_10
-# BB#11:
-	movl	$0, 28(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_12
-.LBB221_10:
-	movl	44(%ebp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-.LBB221_12:
-	jne	.LBB221_13
-# BB#14:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_15
-.LBB221_13:
-	movl	40(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB221_15:
-	jne	.LBB221_16
-# BB#17:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_18
-.LBB221_16:
-	movl	36(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB221_18:
-	jne	.LBB221_19
-# BB#20:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB221_21
-.LBB221_19:
-	movl	32(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB221_21:
-	jne	.LBB221_22
-# BB#23:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB221_24
-.LBB221_22:
-	movl	28(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB221_24:
-	jne	.LBB221_25
-# BB#26:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB221_27
-.LBB221_25:
-	movl	24(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB221_27:
-	jne	.LBB221_28
-# BB#29:
-	movl	$0, %esi
-	jmp	.LBB221_30
-.LBB221_28:
-	movl	20(%ebp), %esi
-.LBB221_30:
-	jne	.LBB221_31
-# BB#32:
-	movl	$0, %edi
-	jmp	.LBB221_33
-.LBB221_31:
-	movl	16(%ebp), %edi
-.LBB221_33:
-	jne	.LBB221_34
-# BB#35:
-	movl	$0, %ebx
-	jmp	.LBB221_36
-.LBB221_34:
-	movl	12(%ebp), %ebx
-.LBB221_36:
-	jne	.LBB221_37
-# BB#38:
-	xorl	%ebp, %ebp
-	jmp	.LBB221_39
-.LBB221_37:
-	movl	8(%ebp), %ebp
-.LBB221_39:
-	movl	20(%esp), %edx          # 4-byte Reload
-	addl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 56(%ecx)
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 60(%ecx)
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 64(%ecx)
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 68(%ecx)
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 72(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 76(%ecx)
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 80(%ecx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 84(%ecx)
-	movl	12(%esp), %edx          # 4-byte Reload
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 88(%ecx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 92(%ecx)
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 96(%ecx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 100(%ecx)
-	movl	%eax, 104(%ecx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%ecx)
-	addl	$96, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end221:
-	.size	mcl_fpDbl_sub14Lbmi2, .Lfunc_end221-mcl_fpDbl_sub14Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv480x32,@function
-.LmulPv480x32:                          # @mulPv480x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$52, %esp
-	movl	%edx, %eax
-	movl	72(%esp), %edi
-	movl	%edi, %edx
-	mulxl	4(%eax), %ebx, %esi
-	movl	%edi, %edx
-	mulxl	(%eax), %ebp, %edx
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	addl	%ebx, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	8(%eax), %edx, %ebx
-	adcl	%esi, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	12(%eax), %edx, %esi
-	adcl	%ebx, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	16(%eax), %edx, %ebx
-	adcl	%esi, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	20(%eax), %edx, %esi
-	adcl	%ebx, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	24(%eax), %edx, %ebx
-	adcl	%esi, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	28(%eax), %edx, %esi
-	adcl	%ebx, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	32(%eax), %edx, %ebx
-	adcl	%esi, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	36(%eax), %edx, %esi
-	adcl	%ebx, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	40(%eax), %edx, %ebp
-	adcl	%esi, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	44(%eax), %ebx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	%ebp, %ebx
-	movl	%edi, %edx
-	mulxl	48(%eax), %esi, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	%edi, %edx
-	mulxl	52(%eax), %edx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ecx)
-	movl	44(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ecx)
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%ecx)
-	movl	36(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%ecx)
-	movl	32(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%ecx)
-	movl	28(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%ecx)
-	movl	24(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%ecx)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%ecx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 32(%ecx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 36(%ecx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 40(%ecx)
-	movl	%ebx, 44(%ecx)
-	movl	%esi, 48(%ecx)
-	movl	%edx, 52(%ecx)
-	movl	%edi, %edx
-	mulxl	56(%eax), %eax, %edx
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 56(%ecx)
-	adcl	$0, %edx
-	movl	%edx, 60(%ecx)
-	movl	%ecx, %eax
-	addl	$52, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end222:
-	.size	.LmulPv480x32, .Lfunc_end222-.LmulPv480x32
-
-	.globl	mcl_fp_mulUnitPre15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre15Lbmi2,@function
-mcl_fp_mulUnitPre15Lbmi2:               # @mcl_fp_mulUnitPre15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$124, %esp
-	calll	.L223$pb
-.L223$pb:
-	popl	%ebx
-.Ltmp44:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp44-.L223$pb), %ebx
-	movl	152(%esp), %eax
-	movl	%eax, (%esp)
-	leal	56(%esp), %ecx
-	movl	148(%esp), %edx
-	calll	.LmulPv480x32
-	movl	116(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp
-	movl	72(%esp), %ebx
-	movl	68(%esp), %edi
-	movl	64(%esp), %esi
-	movl	56(%esp), %edx
-	movl	60(%esp), %ecx
-	movl	144(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	addl	$124, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end223:
-	.size	mcl_fp_mulUnitPre15Lbmi2, .Lfunc_end223-mcl_fp_mulUnitPre15Lbmi2
-
-	.globl	mcl_fpDbl_mulPre15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre15Lbmi2,@function
-mcl_fpDbl_mulPre15Lbmi2:                # @mcl_fpDbl_mulPre15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1084, %esp             # imm = 0x43C
-	calll	.L224$pb
-.L224$pb:
-	popl	%esi
-.Ltmp45:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp45-.L224$pb), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	1112(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1016(%esp), %ecx
-	movl	1108(%esp), %edi
-	movl	%edi, %edx
-	movl	%esi, %ebx
-	calll	.LmulPv480x32
-	movl	1076(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1072(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1068(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1060(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1056(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1052(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1044(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1040(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1036(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1032(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1028(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	1024(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1016(%esp), %eax
-	movl	1020(%esp), %ebp
-	movl	1104(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	1112(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	952(%esp), %ecx
-	movl	%edi, %edx
-	movl	%esi, %ebx
-	calll	.LmulPv480x32
-	addl	952(%esp), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	1012(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1008(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1004(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1000(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	996(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	992(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	988(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	984(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	980(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	976(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	972(%esp), %edi
-	movl	968(%esp), %esi
-	movl	964(%esp), %edx
-	movl	956(%esp), %eax
-	movl	960(%esp), %ecx
-	movl	1104(%esp), %ebp
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	72(%esp), %eax          # 4-byte Reload
-	addl	888(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	948(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	944(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	940(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	936(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	932(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	928(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	924(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	920(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	916(%esp), %ebx
-	movl	912(%esp), %edi
-	movl	908(%esp), %esi
-	movl	904(%esp), %edx
-	movl	900(%esp), %ecx
-	movl	892(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	896(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	72(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 112(%esp)         # 4-byte Folded Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	824(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	824(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	884(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	880(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	876(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	872(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	868(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	864(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	860(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	856(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	852(%esp), %ebx
-	movl	848(%esp), %edi
-	movl	844(%esp), %esi
-	movl	840(%esp), %edx
-	movl	836(%esp), %ecx
-	movl	828(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	760(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	804(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	800(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	796(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	792(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	788(%esp), %ebx
-	movl	784(%esp), %edi
-	movl	780(%esp), %esi
-	movl	776(%esp), %edx
-	movl	772(%esp), %ecx
-	movl	764(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	60(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	696(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	748(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	744(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	740(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	736(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	732(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	724(%esp), %ebx
-	movl	720(%esp), %edi
-	movl	716(%esp), %esi
-	movl	712(%esp), %edx
-	movl	708(%esp), %ecx
-	movl	700(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	704(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	632(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	692(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	688(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	684(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	680(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	676(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	672(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	660(%esp), %ebx
-	movl	656(%esp), %edi
-	movl	652(%esp), %esi
-	movl	648(%esp), %edx
-	movl	644(%esp), %ecx
-	movl	636(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	640(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	568(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	628(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	624(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	620(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	616(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	596(%esp), %ebx
-	movl	592(%esp), %edi
-	movl	588(%esp), %esi
-	movl	584(%esp), %edx
-	movl	580(%esp), %ecx
-	movl	572(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	576(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	504(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	564(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	560(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	532(%esp), %ebx
-	movl	528(%esp), %edi
-	movl	524(%esp), %esi
-	movl	520(%esp), %edx
-	movl	516(%esp), %ecx
-	movl	508(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	512(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 32(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	440(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	492(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	472(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	468(%esp), %ebx
-	movl	464(%esp), %edi
-	movl	460(%esp), %esi
-	movl	456(%esp), %edx
-	movl	452(%esp), %ecx
-	movl	444(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	448(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	376(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	436(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	432(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	428(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	424(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	404(%esp), %ebx
-	movl	400(%esp), %edi
-	movl	396(%esp), %esi
-	movl	392(%esp), %edx
-	movl	388(%esp), %ecx
-	movl	380(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	312(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	360(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	356(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	352(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	348(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	344(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	340(%esp), %ebx
-	movl	336(%esp), %edi
-	movl	332(%esp), %esi
-	movl	328(%esp), %edx
-	movl	324(%esp), %ecx
-	movl	316(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 44(%eax)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 108(%esp)         # 4-byte Folded Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 48(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	1108(%esp), %eax
-	movl	%eax, %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	108(%esp), %eax         # 4-byte Reload
-	addl	248(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	304(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	300(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	296(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	292(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	288(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	284(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	280(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	276(%esp), %ebx
-	movl	272(%esp), %edi
-	movl	268(%esp), %edx
-	movl	264(%esp), %ecx
-	movl	260(%esp), %eax
-	movl	252(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	256(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	movl	1104(%esp), %ebp
-	movl	%esi, 48(%ebp)
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	20(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 36(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	184(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	244(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	240(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	236(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	232(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	228(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	224(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	220(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	212(%esp), %ebx
-	movl	208(%esp), %edx
-	movl	204(%esp), %ecx
-	movl	200(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	196(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	188(%esp), %eax
-	movl	192(%esp), %esi
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	1104(%esp), %edi
-	movl	%ebp, 52(%edi)
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%esi, %ebp
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 40(%esp)          # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 64(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	120(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	124(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	128(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	164(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	156(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	152(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	148(%esp), %ebp
-	movl	144(%esp), %edi
-	movl	140(%esp), %esi
-	movl	136(%esp), %edx
-	movl	132(%esp), %ecx
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebx         # 4-byte Reload
-	movl	%ebx, 56(%eax)
-	movl	32(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 60(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	72(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 64(%eax)
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 72(%eax)
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 76(%eax)
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edi, 80(%eax)
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebp, 84(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 88(%eax)
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 92(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	104(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 96(%eax)
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx         # 4-byte Folded Reload
-	movl	%ecx, 100(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 104(%eax)
-	movl	%ecx, 108(%eax)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 112(%eax)
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 116(%eax)
-	addl	$1084, %esp             # imm = 0x43C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end224:
-	.size	mcl_fpDbl_mulPre15Lbmi2, .Lfunc_end224-mcl_fpDbl_mulPre15Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre15Lbmi2,@function
-mcl_fpDbl_sqrPre15Lbmi2:                # @mcl_fpDbl_sqrPre15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1084, %esp             # imm = 0x43C
-	calll	.L225$pb
-.L225$pb:
-	popl	%ebx
-.Ltmp46:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp46-.L225$pb), %ebx
-	movl	%ebx, 116(%esp)         # 4-byte Spill
-	movl	1108(%esp), %edx
-	movl	(%edx), %eax
-	movl	%eax, (%esp)
-	leal	1016(%esp), %ecx
-	movl	%edx, %edi
-	movl	%ebx, %esi
-	calll	.LmulPv480x32
-	movl	1076(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1072(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1068(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1060(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1056(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1052(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1044(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1040(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1036(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1032(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1028(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	1024(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1016(%esp), %eax
-	movl	1020(%esp), %ebp
-	movl	1104(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edi, %edx
-	movl	4(%edx), %eax
-	movl	%eax, (%esp)
-	leal	952(%esp), %ecx
-	movl	%esi, %ebx
-	calll	.LmulPv480x32
-	addl	952(%esp), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	1012(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1008(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1004(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1000(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	996(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	992(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	988(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	984(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	980(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	976(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	972(%esp), %edi
-	movl	968(%esp), %esi
-	movl	964(%esp), %edx
-	movl	956(%esp), %eax
-	movl	960(%esp), %ecx
-	movl	1104(%esp), %ebp
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	8(%edx), %eax
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	72(%esp), %eax          # 4-byte Reload
-	addl	888(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	948(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	944(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	940(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	936(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	932(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	928(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	924(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	920(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	916(%esp), %ebx
-	movl	912(%esp), %edi
-	movl	908(%esp), %esi
-	movl	904(%esp), %edx
-	movl	900(%esp), %ecx
-	movl	892(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	896(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	72(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 112(%esp)         # 4-byte Folded Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	12(%edx), %eax
-	movl	%eax, (%esp)
-	leal	824(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	824(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	884(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	880(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	876(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	872(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	868(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	864(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	860(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	856(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	852(%esp), %ebx
-	movl	848(%esp), %edi
-	movl	844(%esp), %esi
-	movl	840(%esp), %edx
-	movl	836(%esp), %ecx
-	movl	828(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	16(%edx), %eax
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	760(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	804(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	800(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	796(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	792(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	788(%esp), %ebx
-	movl	784(%esp), %edi
-	movl	780(%esp), %esi
-	movl	776(%esp), %edx
-	movl	772(%esp), %ecx
-	movl	764(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	60(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	20(%edx), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	696(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	748(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	744(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	740(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	736(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	732(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	724(%esp), %ebx
-	movl	720(%esp), %edi
-	movl	716(%esp), %esi
-	movl	712(%esp), %edx
-	movl	708(%esp), %ecx
-	movl	700(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	704(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	24(%edx), %eax
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	632(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	692(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	688(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	684(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	680(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	676(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	672(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	660(%esp), %ebx
-	movl	656(%esp), %edi
-	movl	652(%esp), %esi
-	movl	648(%esp), %edx
-	movl	644(%esp), %ecx
-	movl	636(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	640(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	28(%edx), %eax
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	568(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	628(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	624(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	620(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	616(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	596(%esp), %ebx
-	movl	592(%esp), %edi
-	movl	588(%esp), %esi
-	movl	584(%esp), %edx
-	movl	580(%esp), %ecx
-	movl	572(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	576(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	32(%edx), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	504(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	564(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	560(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	532(%esp), %ebx
-	movl	528(%esp), %edi
-	movl	524(%esp), %esi
-	movl	520(%esp), %edx
-	movl	516(%esp), %ecx
-	movl	508(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	512(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 32(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	36(%edx), %eax
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	440(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	492(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	472(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	468(%esp), %ebx
-	movl	464(%esp), %edi
-	movl	460(%esp), %esi
-	movl	456(%esp), %edx
-	movl	452(%esp), %ecx
-	movl	444(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	448(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	40(%edx), %eax
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	376(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	436(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	432(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	428(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	424(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	404(%esp), %ebx
-	movl	400(%esp), %edi
-	movl	396(%esp), %esi
-	movl	392(%esp), %edx
-	movl	388(%esp), %ecx
-	movl	380(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	44(%edx), %eax
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	312(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	360(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	356(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	352(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	348(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	344(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	340(%esp), %ebx
-	movl	336(%esp), %edi
-	movl	332(%esp), %esi
-	movl	328(%esp), %edx
-	movl	324(%esp), %ecx
-	movl	316(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 44(%eax)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 108(%esp)         # 4-byte Folded Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 48(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	48(%edx), %eax
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	108(%esp), %eax         # 4-byte Reload
-	addl	248(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	304(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	300(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	296(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	292(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	288(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	284(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	280(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	276(%esp), %ebx
-	movl	272(%esp), %edi
-	movl	268(%esp), %edx
-	movl	264(%esp), %ecx
-	movl	260(%esp), %eax
-	movl	252(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	256(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	movl	1104(%esp), %ebp
-	movl	%esi, 48(%ebp)
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	20(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 36(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	52(%edx), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	184(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	244(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	240(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	236(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	232(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	228(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	224(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	220(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	212(%esp), %ebx
-	movl	208(%esp), %edx
-	movl	204(%esp), %ecx
-	movl	200(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	196(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	188(%esp), %eax
-	movl	192(%esp), %esi
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	1104(%esp), %edi
-	movl	%ebp, 52(%edi)
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%esi, %ebp
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 40(%esp)          # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 64(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	56(%edx), %eax
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	120(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	124(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	128(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	164(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	156(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	152(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	148(%esp), %ebp
-	movl	144(%esp), %edi
-	movl	140(%esp), %esi
-	movl	136(%esp), %edx
-	movl	132(%esp), %ecx
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebx         # 4-byte Reload
-	movl	%ebx, 56(%eax)
-	movl	32(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 60(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	72(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 64(%eax)
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 72(%eax)
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 76(%eax)
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edi, 80(%eax)
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebp, 84(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 88(%eax)
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 92(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	104(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 96(%eax)
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx         # 4-byte Folded Reload
-	movl	%ecx, 100(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 104(%eax)
-	movl	%ecx, 108(%eax)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 112(%eax)
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 116(%eax)
-	addl	$1084, %esp             # imm = 0x43C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end225:
-	.size	mcl_fpDbl_sqrPre15Lbmi2, .Lfunc_end225-mcl_fpDbl_sqrPre15Lbmi2
-
-	.globl	mcl_fp_mont15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont15Lbmi2,@function
-mcl_fp_mont15Lbmi2:                     # @mcl_fp_mont15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2044, %esp             # imm = 0x7FC
-	calll	.L226$pb
-.L226$pb:
-	popl	%ebx
-.Ltmp47:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp47-.L226$pb), %ebx
-	movl	2076(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1976(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1976(%esp), %ebp
-	movl	1980(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	2036(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	2032(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	2028(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	2024(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2020(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2016(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2012(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	2008(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	2004(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	2000(%esp), %edi
-	movl	1996(%esp), %esi
-	movl	1992(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	1988(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	1984(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	1912(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1912(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1916(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1920(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1924(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1928(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1932(%esp), %esi
-	adcl	1936(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1940(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1944(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1948(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1952(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1956(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1960(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1964(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1968(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	1972(%esp), %ebp
-	sbbl	%eax, %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1848(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	116(%esp), %eax         # 4-byte Reload
-	andl	$1, %eax
-	movl	88(%esp), %edx          # 4-byte Reload
-	addl	1848(%esp), %edx
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1852(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1856(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1860(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1864(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	1868(%esp), %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1872(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1876(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1880(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1884(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1888(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1892(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1896(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1900(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	1904(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	adcl	1908(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1784(%esp), %ecx
-	movl	2076(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	1784(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1792(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	1804(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1808(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	1812(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1816(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1820(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1824(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1828(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1832(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	1836(%esp), %esi
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	1840(%esp), %ebp
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1844(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1720(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	1720(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1724(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1728(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1732(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1736(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1740(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1744(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1748(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1752(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	1756(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1760(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1764(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	1768(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	adcl	1772(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1776(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1780(%esp), %esi
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1656(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	addl	1656(%esp), %eax
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1660(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1664(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1668(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1672(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1676(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1680(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1684(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	1688(%esp), %ebp
-	adcl	1692(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1696(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1700(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1704(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1708(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	1712(%esp), %edi
-	adcl	1716(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1592(%esp), %ecx
-	movl	2068(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	1592(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1596(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1600(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1604(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1608(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1612(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1616(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1620(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1624(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	1628(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1632(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1636(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1640(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1644(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1648(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1652(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1528(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	1528(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1532(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1540(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	1544(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1548(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1552(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1564(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	1568(%esp), %edi
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	1572(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1576(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1580(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1584(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1588(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1464(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	92(%esp), %ecx          # 4-byte Reload
-	addl	1464(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1472(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1476(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1480(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	1484(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1488(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1492(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1496(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	1500(%esp), %edi
-	movl	%edi, 112(%esp)         # 4-byte Spill
-	adcl	1504(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1508(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	1512(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1516(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1520(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1400(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	movl	92(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1400(%esp), %edi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1404(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1408(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1412(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1416(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	1420(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1424(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1428(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	1432(%esp), %edi
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1436(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1440(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1444(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	1448(%esp), %esi
-	movl	%esi, %ebp
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1452(%esp), %esi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1456(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1460(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1336(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	1336(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1360(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1364(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1376(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1380(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	adcl	1384(%esp), %esi
-	movl	%esi, %ebp
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	1392(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1272(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	movl	80(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1272(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1276(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1280(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1284(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1288(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1296(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1300(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1304(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1308(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1312(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1316(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	1320(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1324(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	1328(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1332(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	2072(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	1208(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1212(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1216(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	1232(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1240(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	1244(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1248(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1252(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1256(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1260(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1264(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1268(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1144(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	addl	1144(%esp), %eax
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1148(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	1156(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1168(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1180(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1188(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	1196(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1200(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1204(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1080(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	1080(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1084(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1088(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	1092(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	1128(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1016(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	1016(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1028(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	1032(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	1044(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	1060(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	952(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	952(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	964(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	976(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	992(%esp), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	movl	%ebp, %eax
-	andl	$1, %eax
-	addl	888(%esp), %esi
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	892(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	896(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	900(%esp), %edi
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	904(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	908(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	912(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	916(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	920(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	924(%esp), %ebp
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	928(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	932(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	936(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	940(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	944(%esp), %esi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	948(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	824(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	824(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	832(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	856(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	864(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	872(%esp), %edi
-	adcl	876(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	760(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	776(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	800(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	804(%esp), %ebp
-	adcl	808(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	816(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	696(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	708(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	736(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	748(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	752(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	632(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	656(%esp), %edi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	672(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	688(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	568(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	588(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	596(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	604(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	504(%esp), %esi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	516(%esp), %edi
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	520(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	532(%esp), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	560(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	100(%esp), %ecx         # 4-byte Reload
-	addl	440(%esp), %ecx
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	448(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	adcl	452(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	460(%esp), %edi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	492(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	376(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	388(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	396(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	404(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	416(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	112(%esp), %ecx         # 4-byte Reload
-	addl	312(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	320(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	336(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	348(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	352(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	248(%esp), %edi
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	252(%esp), %esi
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	256(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	288(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	%esi, %ecx
-	movl	96(%esp), %esi          # 4-byte Reload
-	addl	184(%esp), %ecx
-	adcl	188(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	196(%esp), %ebp
-	adcl	200(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	movl	104(%esp), %ebx         # 4-byte Reload
-	andl	$1, %ebx
-	addl	120(%esp), %edi
-	movl	%ebp, %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	128(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	132(%esp), %edi
-	adcl	136(%esp), %esi
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	140(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	148(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	152(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	156(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	160(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	164(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	168(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	172(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	176(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %edx         # 4-byte Reload
-	adcl	180(%esp), %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	adcl	$0, %ebx
-	movl	%ebx, 104(%esp)         # 4-byte Spill
-	movl	%eax, %edx
-	movl	2076(%esp), %ebp
-	subl	(%ebp), %edx
-	sbbl	4(%ebp), %ecx
-	movl	%edi, %eax
-	sbbl	8(%ebp), %eax
-	movl	%esi, %ebx
-	sbbl	12(%ebp), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebx          # 4-byte Reload
-	sbbl	16(%ebp), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%ebp), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebx          # 4-byte Reload
-	sbbl	24(%ebp), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%ebp), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebx          # 4-byte Reload
-	sbbl	32(%ebp), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebx          # 4-byte Reload
-	sbbl	36(%ebp), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebx          # 4-byte Reload
-	sbbl	40(%ebp), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebx          # 4-byte Reload
-	sbbl	44(%ebp), %ebx
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebx          # 4-byte Reload
-	sbbl	48(%ebp), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebx         # 4-byte Reload
-	sbbl	52(%ebp), %ebx
-	movl	%ebx, 88(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebx         # 4-byte Reload
-	sbbl	56(%ebp), %ebx
-	movl	%ebx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebx         # 4-byte Reload
-	movl	108(%esp), %ebp         # 4-byte Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB226_2
-# BB#1:
-	movl	%edx, %ebp
-.LBB226_2:
-	movl	2064(%esp), %edx
-	movl	%ebp, (%edx)
-	testb	%bl, %bl
-	movl	116(%esp), %ebp         # 4-byte Reload
-	jne	.LBB226_4
-# BB#3:
-	movl	%ecx, %ebp
-.LBB226_4:
-	movl	%ebp, 4(%edx)
-	jne	.LBB226_6
-# BB#5:
-	movl	%eax, %edi
-.LBB226_6:
-	movl	%edi, 8(%edx)
-	jne	.LBB226_8
-# BB#7:
-	movl	16(%esp), %esi          # 4-byte Reload
-.LBB226_8:
-	movl	%esi, 12(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_10
-# BB#9:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB226_10:
-	movl	%eax, 16(%edx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_12
-# BB#11:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB226_12:
-	movl	%eax, 20(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_14
-# BB#13:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB226_14:
-	movl	%eax, 24(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_16
-# BB#15:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB226_16:
-	movl	%eax, 28(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_18
-# BB#17:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB226_18:
-	movl	%eax, 32(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_20
-# BB#19:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB226_20:
-	movl	%eax, 36(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_22
-# BB#21:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB226_22:
-	movl	%eax, 40(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_24
-# BB#23:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB226_24:
-	movl	%eax, 44(%edx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_26
-# BB#25:
-	movl	52(%esp), %eax          # 4-byte Reload
-.LBB226_26:
-	movl	%eax, 48(%edx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	jne	.LBB226_28
-# BB#27:
-	movl	88(%esp), %eax          # 4-byte Reload
-.LBB226_28:
-	movl	%eax, 52(%edx)
-	movl	112(%esp), %eax         # 4-byte Reload
-	jne	.LBB226_30
-# BB#29:
-	movl	96(%esp), %eax          # 4-byte Reload
-.LBB226_30:
-	movl	%eax, 56(%edx)
-	addl	$2044, %esp             # imm = 0x7FC
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end226:
-	.size	mcl_fp_mont15Lbmi2, .Lfunc_end226-mcl_fp_mont15Lbmi2
-
-	.globl	mcl_fp_montNF15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF15Lbmi2,@function
-mcl_fp_montNF15Lbmi2:                   # @mcl_fp_montNF15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2028, %esp             # imm = 0x7EC
-	calll	.L227$pb
-.L227$pb:
-	popl	%ebx
-.Ltmp48:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp48-.L227$pb), %ebx
-	movl	2060(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1960(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1960(%esp), %ebp
-	movl	1964(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	2020(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2016(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2012(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2008(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	2004(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2000(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1996(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1992(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1988(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1984(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1980(%esp), %esi
-	movl	1976(%esp), %edi
-	movl	1972(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	1968(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	1896(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1896(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1900(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1904(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1908(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1912(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	1916(%esp), %esi
-	movl	%esi, %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1920(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1924(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1928(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1932(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1936(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1940(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1944(%esp), %ebp
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1948(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1952(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1956(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1832(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1892(%esp), %eax
-	movl	92(%esp), %edx          # 4-byte Reload
-	addl	1832(%esp), %edx
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1836(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1840(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1844(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	1848(%esp), %edi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1852(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1856(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1860(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1864(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1868(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1872(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	1876(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	1880(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	1884(%esp), %ebp
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1888(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1768(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1768(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1772(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1776(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1780(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1784(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1792(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	1804(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1808(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1816(%esp), %eax
-	movl	%eax, %esi
-	adcl	1820(%esp), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	1824(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1828(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1704(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1764(%esp), %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	addl	1704(%esp), %edx
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1708(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1712(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1716(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1720(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1724(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1728(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1732(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	1736(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1740(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	1744(%esp), %edi
-	adcl	1748(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1752(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	1756(%esp), %ebp
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1760(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1640(%esp), %ecx
-	movl	2060(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	addl	1640(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1644(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1648(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1652(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1656(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1660(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1664(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1668(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1672(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1676(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1680(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1684(%esp), %eax
-	movl	%eax, %esi
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1688(%esp), %edi
-	adcl	1692(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1696(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	1700(%esp), %ebp
-	movl	2056(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1576(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1636(%esp), %eax
-	movl	88(%esp), %edx          # 4-byte Reload
-	addl	1576(%esp), %edx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1580(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1584(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1588(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1592(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1596(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1600(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1604(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1608(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1612(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	1616(%esp), %esi
-	adcl	1620(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1624(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1628(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1632(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1512(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1512(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1516(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1520(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1528(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	1532(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1540(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1544(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1548(%esp), %ebp
-	adcl	1552(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1564(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1568(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1572(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1448(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1508(%esp), %eax
-	movl	72(%esp), %edx          # 4-byte Reload
-	addl	1448(%esp), %edx
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1452(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1456(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1460(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	1464(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1468(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	1472(%esp), %edi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1476(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	1480(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	1484(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1488(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1492(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1496(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1500(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	1504(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1384(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1384(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1400(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1404(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1408(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1412(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1416(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1424(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1428(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1432(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1436(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1440(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1444(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1320(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1380(%esp), %edx
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	1320(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	1324(%esp), %ebp
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	1328(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	1352(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1360(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	1368(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1376(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1256(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	1256(%esp), %eax
-	adcl	1260(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	adcl	1264(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	1272(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	1296(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1304(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	1312(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1192(%esp), %ecx
-	movl	2052(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	movl	1252(%esp), %eax
-	movl	48(%esp), %edx          # 4-byte Reload
-	addl	1192(%esp), %edx
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1196(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1200(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	1204(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1208(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	1212(%esp), %ebp
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1216(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1220(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1224(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	1228(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1232(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1236(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1240(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	1244(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	1248(%esp), %esi
-	adcl	$0, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1128(%esp), %ecx
-	movl	2060(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	addl	1128(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	1140(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1148(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1168(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1184(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	1188(%esp), %esi
-	movl	2056(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1064(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1124(%esp), %eax
-	movl	44(%esp), %edx          # 4-byte Reload
-	addl	1064(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1068(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	1072(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1076(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1080(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	1084(%esp), %edi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1088(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1092(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1096(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	1100(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1104(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1108(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1112(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1116(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	1120(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %esi
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1000(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1000(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	1012(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1020(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1028(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1044(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	1060(%esp), %esi
-	movl	2056(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	936(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	996(%esp), %eax
-	movl	52(%esp), %edx          # 4-byte Reload
-	addl	936(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	940(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	944(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	948(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	952(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	956(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	960(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	964(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	968(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	972(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	976(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	980(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	984(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	988(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	992(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %esi
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	872(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	872(%esp), %edi
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	876(%esp), %ebp
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	880(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	932(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	808(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	868(%esp), %eax
-	movl	%ebp, %ecx
-	addl	808(%esp), %ecx
-	adcl	812(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	816(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	820(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	824(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	828(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	832(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	836(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	840(%esp), %edi
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	844(%esp), %esi
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	848(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	852(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	856(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	860(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	864(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	744(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	744(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	768(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	776(%esp), %edi
-	adcl	780(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	792(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	680(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	740(%esp), %eax
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	680(%esp), %ecx
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	684(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	688(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	692(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	696(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	adcl	700(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	704(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	708(%esp), %edi
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	712(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	716(%esp), %ebp
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	720(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	724(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	728(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	732(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	736(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	616(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	616(%esp), %esi
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	620(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	644(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	648(%esp), %edi
-	adcl	652(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	656(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	612(%esp), %edx
-	movl	%esi, %ecx
-	addl	552(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	572(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	580(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	588(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	488(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	488(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	%esi, %ebp
-	adcl	508(%esp), %ebp
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	512(%esp), %edi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	528(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	484(%esp), %edx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	424(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	440(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	adcl	444(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%esi, %edi
-	adcl	460(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	360(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	360(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	368(%esp), %esi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	376(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	396(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	400(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	296(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	356(%esp), %edx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	296(%esp), %ecx
-	adcl	300(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	308(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	332(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	336(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	232(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	240(%esp), %ebp
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	244(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	272(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	276(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	228(%esp), %edx
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	168(%esp), %ecx
-	adcl	172(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	adcl	176(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	188(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	208(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	212(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	104(%esp), %ecx
-	movl	2060(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	addl	104(%esp), %edi
-	movl	68(%esp), %edi          # 4-byte Reload
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	108(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	112(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	%ecx, %ebx
-	adcl	116(%esp), %edi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	120(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	124(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	128(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	132(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	136(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	140(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	144(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	148(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	152(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	156(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	160(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	164(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	2060(%esp), %ecx
-	subl	(%ecx), %edx
-	movl	%ebx, %ebp
-	sbbl	4(%ecx), %ebp
-	movl	%edi, %ebx
-	sbbl	8(%ecx), %ebx
-	movl	88(%esp), %eax          # 4-byte Reload
-	sbbl	12(%ecx), %eax
-	sbbl	16(%ecx), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	sbbl	20(%ecx), %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	24(%ecx), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	sbbl	28(%ecx), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	32(%ecx), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	36(%ecx), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	sbbl	40(%ecx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	sbbl	44(%ecx), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	sbbl	48(%ecx), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	sbbl	52(%ecx), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	sbbl	56(%ecx), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	sarl	$31, %ecx
-	testl	%ecx, %ecx
-	movl	100(%esp), %ecx         # 4-byte Reload
-	js	.LBB227_2
-# BB#1:
-	movl	%edx, %ecx
-.LBB227_2:
-	movl	2048(%esp), %edx
-	movl	%ecx, (%edx)
-	movl	92(%esp), %esi          # 4-byte Reload
-	js	.LBB227_4
-# BB#3:
-	movl	%ebp, %esi
-.LBB227_4:
-	movl	%esi, 4(%edx)
-	movl	88(%esp), %ecx          # 4-byte Reload
-	js	.LBB227_6
-# BB#5:
-	movl	%ebx, %edi
-.LBB227_6:
-	movl	%edi, 8(%edx)
-	js	.LBB227_8
-# BB#7:
-	movl	%eax, %ecx
-.LBB227_8:
-	movl	%ecx, 12(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB227_10
-# BB#9:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB227_10:
-	movl	%eax, 16(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB227_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB227_12:
-	movl	%eax, 20(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB227_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB227_14:
-	movl	%eax, 24(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB227_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB227_16:
-	movl	%eax, 28(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB227_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB227_18:
-	movl	%eax, 32(%edx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	js	.LBB227_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB227_20:
-	movl	%eax, 36(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB227_22
-# BB#21:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB227_22:
-	movl	%eax, 40(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	js	.LBB227_24
-# BB#23:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB227_24:
-	movl	%eax, 44(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	js	.LBB227_26
-# BB#25:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB227_26:
-	movl	%eax, 48(%edx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	js	.LBB227_28
-# BB#27:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB227_28:
-	movl	%eax, 52(%edx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB227_30
-# BB#29:
-	movl	68(%esp), %eax          # 4-byte Reload
-.LBB227_30:
-	movl	%eax, 56(%edx)
-	addl	$2028, %esp             # imm = 0x7EC
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end227:
-	.size	mcl_fp_montNF15Lbmi2, .Lfunc_end227-mcl_fp_montNF15Lbmi2
-
-	.globl	mcl_fp_montRed15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed15Lbmi2,@function
-mcl_fp_montRed15Lbmi2:                  # @mcl_fp_montRed15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1148, %esp             # imm = 0x47C
-	calll	.L228$pb
-.L228$pb:
-	popl	%eax
-.Ltmp49:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp49-.L228$pb), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1176(%esp), %edx
-	movl	-4(%edx), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	1172(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 80(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	imull	%esi, %ebx
-	movl	116(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%ecx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%ecx), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	104(%ecx), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	100(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	96(%ecx), %esi
-	movl	%esi, 152(%esp)         # 4-byte Spill
-	movl	92(%ecx), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	88(%ecx), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	84(%ecx), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	80(%ecx), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	76(%ecx), %esi
-	movl	%esi, 168(%esp)         # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	%esi, 164(%esp)         # 4-byte Spill
-	movl	68(%ecx), %esi
-	movl	%esi, 176(%esp)         # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	60(%ecx), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	56(%ecx), %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	%esi, 140(%esp)         # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	44(%ecx), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	36(%ecx), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	28(%ecx), %ebp
-	movl	24(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	20(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	16(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	12(%ecx), %edi
-	movl	8(%ecx), %esi
-	movl	(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	56(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	1080(%esp), %ecx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	80(%esp), %eax          # 4-byte Reload
-	addl	1080(%esp), %eax
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1084(%esp), %ecx
-	adcl	1088(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	adcl	1092(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1108(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	adcl	$0, 180(%esp)           # 4-byte Folded Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 164(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	movl	148(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1016(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	1016(%esp), %esi
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1020(%esp), %edx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1044(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	1060(%esp), %ebp
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 164(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, %esi
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 148(%esp)         # 4-byte Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	952(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	952(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	956(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	992(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %ebp         # 4-byte Reload
-	adcl	1004(%esp), %ebp
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	adcl	$0, 164(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	888(%esp), %esi
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	892(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	936(%esp), %ebp
-	movl	%ebp, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 160(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	movl	132(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	824(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	824(%esp), %esi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	828(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 132(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	760(%esp), %esi
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	764(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	movl	152(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	696(%esp), %esi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 152(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	632(%esp), %edi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	636(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %ebp         # 4-byte Reload
-	adcl	672(%esp), %ebp
-	movl	164(%esp), %edi         # 4-byte Reload
-	adcl	676(%esp), %edi
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	568(%esp), %esi
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	572(%esp), %ecx
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	adcl	604(%esp), %ebp
-	movl	%ebp, 176(%esp)         # 4-byte Spill
-	adcl	608(%esp), %edi
-	movl	%edi, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %edi         # 4-byte Reload
-	adcl	616(%esp), %edi
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	1176(%esp), %eax
-	movl	%eax, %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	504(%esp), %esi
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	508(%esp), %ecx
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %esi         # 4-byte Reload
-	adcl	524(%esp), %esi
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	adcl	548(%esp), %edi
-	movl	%edi, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	440(%esp), %edi
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %edi         # 4-byte Reload
-	adcl	452(%esp), %edi
-	adcl	456(%esp), %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %esi         # 4-byte Reload
-	adcl	464(%esp), %esi
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	376(%esp), %ebp
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	380(%esp), %ebp
-	adcl	384(%esp), %edi
-	movl	%edi, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %edi         # 4-byte Reload
-	adcl	392(%esp), %edi
-	adcl	396(%esp), %esi
-	movl	%esi, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %esi         # 4-byte Reload
-	adcl	412(%esp), %esi
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	312(%esp), %ebp
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	320(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	adcl	324(%esp), %edi
-	movl	%edi, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %ecx         # 4-byte Reload
-	adcl	328(%esp), %ecx
-	movl	%ecx, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %ecx         # 4-byte Reload
-	adcl	332(%esp), %ecx
-	movl	%ecx, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %ecx         # 4-byte Reload
-	adcl	336(%esp), %ecx
-	movl	%ecx, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %ecx         # 4-byte Reload
-	adcl	340(%esp), %ecx
-	movl	%ecx, 168(%esp)         # 4-byte Spill
-	adcl	344(%esp), %esi
-	movl	%esi, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %ecx         # 4-byte Reload
-	adcl	348(%esp), %ecx
-	movl	%ecx, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %ebp         # 4-byte Reload
-	adcl	352(%esp), %ebp
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	356(%esp), %ecx
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	360(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	364(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	368(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	372(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	%eax, %edi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	248(%esp), %edi
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	252(%esp), %ecx
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	adcl	284(%esp), %ebp
-	movl	%ebp, 148(%esp)         # 4-byte Spill
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	288(%esp), %edi
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	308(%esp), %ebp
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	184(%esp), %esi
-	movl	172(%esp), %edx         # 4-byte Reload
-	adcl	188(%esp), %edx
-	movl	%edx, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %ecx         # 4-byte Reload
-	adcl	192(%esp), %ecx
-	movl	%ecx, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %esi         # 4-byte Reload
-	adcl	204(%esp), %esi
-	movl	%esi, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	220(%esp), %edi
-	movl	%edi, 136(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	240(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %ebx          # 4-byte Reload
-	adcl	$0, %ebx
-	movl	%edx, %eax
-	subl	16(%esp), %edx          # 4-byte Folded Reload
-	sbbl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	176(%esp), %eax         # 4-byte Reload
-	sbbl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	164(%esp), %ebp         # 4-byte Reload
-	sbbl	12(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	144(%esp), %edi         # 4-byte Reload
-	sbbl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	160(%esp), %edi         # 4-byte Reload
-	sbbl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	148(%esp), %edi         # 4-byte Reload
-	sbbl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	136(%esp), %edi         # 4-byte Reload
-	sbbl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	152(%esp), %edi         # 4-byte Reload
-	sbbl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %edi         # 4-byte Reload
-	sbbl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 112(%esp)         # 4-byte Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	sbbl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	sbbl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 124(%esp)         # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	sbbl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 140(%esp)         # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	sbbl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 156(%esp)         # 4-byte Spill
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	movl	%ebx, %edi
-	jne	.LBB228_2
-# BB#1:
-	movl	%edx, 172(%esp)         # 4-byte Spill
-.LBB228_2:
-	movl	1168(%esp), %edx
-	movl	172(%esp), %ebx         # 4-byte Reload
-	movl	%ebx, (%edx)
-	movl	%edi, %ebx
-	testb	%bl, %bl
-	jne	.LBB228_4
-# BB#3:
-	movl	%ecx, 180(%esp)         # 4-byte Spill
-.LBB228_4:
-	movl	180(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 4(%edx)
-	movl	176(%esp), %ecx         # 4-byte Reload
-	jne	.LBB228_6
-# BB#5:
-	movl	%eax, %ecx
-.LBB228_6:
-	movl	%ecx, 8(%edx)
-	movl	164(%esp), %eax         # 4-byte Reload
-	jne	.LBB228_8
-# BB#7:
-	movl	%ebp, %eax
-.LBB228_8:
-	movl	%eax, 12(%edx)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	148(%esp), %eax         # 4-byte Reload
-	movl	168(%esp), %ebp         # 4-byte Reload
-	jne	.LBB228_10
-# BB#9:
-	movl	%esi, %ebp
-.LBB228_10:
-	movl	%ebp, 16(%edx)
-	movl	152(%esp), %ebp         # 4-byte Reload
-	movl	144(%esp), %ebx         # 4-byte Reload
-	jne	.LBB228_12
-# BB#11:
-	movl	84(%esp), %ebx          # 4-byte Reload
-.LBB228_12:
-	movl	%ebx, 20(%edx)
-	movl	132(%esp), %ebx         # 4-byte Reload
-	movl	160(%esp), %edi         # 4-byte Reload
-	jne	.LBB228_14
-# BB#13:
-	movl	88(%esp), %edi          # 4-byte Reload
-.LBB228_14:
-	movl	%edi, 24(%edx)
-	movl	128(%esp), %edi         # 4-byte Reload
-	jne	.LBB228_16
-# BB#15:
-	movl	92(%esp), %eax          # 4-byte Reload
-.LBB228_16:
-	movl	%eax, 28(%edx)
-	movl	116(%esp), %esi         # 4-byte Reload
-	jne	.LBB228_18
-# BB#17:
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, 136(%esp)         # 4-byte Spill
-.LBB228_18:
-	movl	136(%esp), %eax         # 4-byte Reload
-	movl	%eax, 32(%edx)
-	jne	.LBB228_20
-# BB#19:
-	movl	100(%esp), %ebp         # 4-byte Reload
-.LBB228_20:
-	movl	%ebp, 36(%edx)
-	movl	104(%esp), %eax         # 4-byte Reload
-	jne	.LBB228_22
-# BB#21:
-	movl	112(%esp), %ebx         # 4-byte Reload
-.LBB228_22:
-	movl	%ebx, 40(%edx)
-	jne	.LBB228_24
-# BB#23:
-	movl	120(%esp), %edi         # 4-byte Reload
-.LBB228_24:
-	movl	%edi, 44(%edx)
-	jne	.LBB228_26
-# BB#25:
-	movl	124(%esp), %esi         # 4-byte Reload
-.LBB228_26:
-	movl	%esi, 48(%edx)
-	jne	.LBB228_28
-# BB#27:
-	movl	140(%esp), %eax         # 4-byte Reload
-.LBB228_28:
-	movl	%eax, 52(%edx)
-	jne	.LBB228_30
-# BB#29:
-	movl	156(%esp), %ecx         # 4-byte Reload
-.LBB228_30:
-	movl	%ecx, 56(%edx)
-	addl	$1148, %esp             # imm = 0x47C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end228:
-	.size	mcl_fp_montRed15Lbmi2, .Lfunc_end228-mcl_fp_montRed15Lbmi2
-
-	.globl	mcl_fp_addPre15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre15Lbmi2,@function
-mcl_fp_addPre15Lbmi2:                   # @mcl_fp_addPre15Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %ebx
-	adcl	8(%ecx), %ebx
-	movl	16(%esp), %edi
-	movl	%edx, (%edi)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%edi)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%ebx, 8(%edi)
-	movl	20(%eax), %ebx
-	movl	%edx, 12(%edi)
-	movl	20(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	24(%eax), %ebx
-	movl	%esi, 16(%edi)
-	movl	24(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	28(%eax), %ebx
-	movl	%edx, 20(%edi)
-	movl	28(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	32(%eax), %ebx
-	movl	%esi, 24(%edi)
-	movl	32(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	36(%eax), %ebx
-	movl	%edx, 28(%edi)
-	movl	36(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	40(%eax), %ebx
-	movl	%esi, 32(%edi)
-	movl	40(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	44(%eax), %ebx
-	movl	%edx, 36(%edi)
-	movl	44(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	48(%eax), %ebx
-	movl	%esi, 40(%edi)
-	movl	48(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	52(%eax), %ebx
-	movl	%edx, 44(%edi)
-	movl	52(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	%esi, 48(%edi)
-	movl	%edx, 52(%edi)
-	movl	56(%eax), %eax
-	movl	56(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 56(%edi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end229:
-	.size	mcl_fp_addPre15Lbmi2, .Lfunc_end229-mcl_fp_addPre15Lbmi2
-
-	.globl	mcl_fp_subPre15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre15Lbmi2,@function
-mcl_fp_subPre15Lbmi2:                   # @mcl_fp_subPre15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebp
-	sbbl	8(%edx), %ebp
-	movl	20(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebx)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebp, 8(%ebx)
-	movl	20(%edx), %ebp
-	movl	%esi, 12(%ebx)
-	movl	20(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	24(%edx), %ebp
-	movl	%edi, 16(%ebx)
-	movl	24(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	28(%edx), %ebp
-	movl	%esi, 20(%ebx)
-	movl	28(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	32(%edx), %ebp
-	movl	%edi, 24(%ebx)
-	movl	32(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	36(%edx), %ebp
-	movl	%esi, 28(%ebx)
-	movl	36(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	40(%edx), %ebp
-	movl	%edi, 32(%ebx)
-	movl	40(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	44(%edx), %ebp
-	movl	%esi, 36(%ebx)
-	movl	44(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	48(%edx), %ebp
-	movl	%edi, 40(%ebx)
-	movl	48(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	52(%edx), %ebp
-	movl	%esi, 44(%ebx)
-	movl	52(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	%edi, 48(%ebx)
-	movl	%esi, 52(%ebx)
-	movl	56(%edx), %edx
-	movl	56(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 56(%ebx)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end230:
-	.size	mcl_fp_subPre15Lbmi2, .Lfunc_end230-mcl_fp_subPre15Lbmi2
-
-	.globl	mcl_fp_shr1_15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_15Lbmi2,@function
-mcl_fp_shr1_15Lbmi2:                    # @mcl_fp_shr1_15Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 40(%ecx)
-	movl	48(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 44(%ecx)
-	movl	52(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 48(%ecx)
-	movl	56(%eax), %eax
-	shrdl	$1, %eax, %esi
-	movl	%esi, 52(%ecx)
-	shrl	%eax
-	movl	%eax, 56(%ecx)
-	popl	%esi
-	retl
-.Lfunc_end231:
-	.size	mcl_fp_shr1_15Lbmi2, .Lfunc_end231-mcl_fp_shr1_15Lbmi2
-
-	.globl	mcl_fp_add15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add15Lbmi2,@function
-mcl_fp_add15Lbmi2:                      # @mcl_fp_add15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$48, %esp
-	movl	76(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edx
-	movl	72(%esp), %eax
-	addl	(%eax), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	adcl	4(%eax), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	8(%ecx), %edx
-	adcl	8(%eax), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	12(%eax), %esi
-	movl	16(%eax), %edx
-	adcl	12(%ecx), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	16(%ecx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	20(%eax), %edx
-	adcl	20(%ecx), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	24(%eax), %edx
-	adcl	24(%ecx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	28(%eax), %edx
-	adcl	28(%ecx), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	32(%eax), %edx
-	adcl	32(%ecx), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	36(%eax), %edx
-	adcl	36(%ecx), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	40(%eax), %edx
-	adcl	40(%ecx), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	44(%eax), %ebx
-	adcl	44(%ecx), %ebx
-	movl	%ebx, (%esp)            # 4-byte Spill
-	movl	48(%eax), %ebp
-	adcl	48(%ecx), %ebp
-	movl	52(%eax), %edi
-	adcl	52(%ecx), %edi
-	movl	56(%eax), %edx
-	adcl	56(%ecx), %edx
-	movl	68(%esp), %ecx
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, (%ecx)
-	movl	44(%esp), %esi          # 4-byte Reload
-	movl	%esi, 4(%ecx)
-	movl	40(%esp), %esi          # 4-byte Reload
-	movl	%esi, 8(%ecx)
-	movl	36(%esp), %esi          # 4-byte Reload
-	movl	%esi, 12(%ecx)
-	movl	32(%esp), %esi          # 4-byte Reload
-	movl	%esi, 16(%ecx)
-	movl	28(%esp), %esi          # 4-byte Reload
-	movl	%esi, 20(%ecx)
-	movl	24(%esp), %esi          # 4-byte Reload
-	movl	%esi, 24(%ecx)
-	movl	20(%esp), %esi          # 4-byte Reload
-	movl	%esi, 28(%ecx)
-	movl	16(%esp), %esi          # 4-byte Reload
-	movl	%esi, 32(%ecx)
-	movl	12(%esp), %esi          # 4-byte Reload
-	movl	%esi, 36(%ecx)
-	movl	8(%esp), %esi           # 4-byte Reload
-	movl	%esi, 40(%ecx)
-	movl	%ebx, 44(%ecx)
-	movl	%ebp, 48(%ecx)
-	movl	%edi, 52(%ecx)
-	movl	%edx, 56(%ecx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	80(%esp), %esi
-	subl	(%esi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	sbbl	4(%esi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	40(%esp), %edx          # 4-byte Reload
-	sbbl	8(%esi), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	sbbl	12(%esi), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	sbbl	16(%esi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %edx          # 4-byte Reload
-	sbbl	20(%esi), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %edx          # 4-byte Reload
-	sbbl	24(%esi), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %edx          # 4-byte Reload
-	sbbl	28(%esi), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %edx          # 4-byte Reload
-	sbbl	32(%esi), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %edx          # 4-byte Reload
-	sbbl	36(%esi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %edx           # 4-byte Reload
-	sbbl	40(%esi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	(%esp), %edx            # 4-byte Reload
-	sbbl	44(%esi), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	sbbl	48(%esi), %ebp
-	sbbl	52(%esi), %edi
-	sbbl	56(%esi), %eax
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB232_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %edx           # 4-byte Reload
-	movl	%edx, (%ecx)
-	movl	44(%esp), %edx          # 4-byte Reload
-	movl	%edx, 4(%ecx)
-	movl	40(%esp), %edx          # 4-byte Reload
-	movl	%edx, 8(%ecx)
-	movl	36(%esp), %edx          # 4-byte Reload
-	movl	%edx, 12(%ecx)
-	movl	32(%esp), %edx          # 4-byte Reload
-	movl	%edx, 16(%ecx)
-	movl	28(%esp), %edx          # 4-byte Reload
-	movl	%edx, 20(%ecx)
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	%edx, 24(%ecx)
-	movl	20(%esp), %edx          # 4-byte Reload
-	movl	%edx, 28(%ecx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	movl	%edx, 32(%ecx)
-	movl	12(%esp), %edx          # 4-byte Reload
-	movl	%edx, 36(%ecx)
-	movl	8(%esp), %edx           # 4-byte Reload
-	movl	%edx, 40(%ecx)
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	%edx, 44(%ecx)
-	movl	%ebp, 48(%ecx)
-	movl	%edi, 52(%ecx)
-	movl	%eax, 56(%ecx)
-.LBB232_2:                              # %carry
-	addl	$48, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end232:
-	.size	mcl_fp_add15Lbmi2, .Lfunc_end232-mcl_fp_add15Lbmi2
-
-	.globl	mcl_fp_addNF15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF15Lbmi2,@function
-mcl_fp_addNF15Lbmi2:                    # @mcl_fp_addNF15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$120, %esp
-	movl	148(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	4(%ecx), %edx
-	movl	144(%esp), %esi
-	addl	(%esi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	4(%esi), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	56(%ecx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	52(%ecx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	44(%ecx), %ebp
-	movl	40(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	28(%ecx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	24(%ecx), %eax
-	movl	20(%ecx), %ebx
-	movl	16(%ecx), %edi
-	movl	12(%ecx), %edx
-	movl	8(%ecx), %ecx
-	adcl	8(%esi), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	12(%esi), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ebx
-	movl	%ebx, 72(%esp)          # 4-byte Spill
-	adcl	24(%esi), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	28(%esi), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	32(%esi), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esi), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	40(%esi), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	44(%esi), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	48(%esi), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	52(%esi), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	56(%esi), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	152(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	subl	(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	sbbl	4(%esi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	8(%esi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%esi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	sbbl	16(%esi), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	sbbl	20(%esi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	24(%esi), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	sbbl	28(%esi), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	sbbl	32(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	36(%esi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	sbbl	40(%esi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	movl	%edx, %eax
-	sbbl	44(%esi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, %edi
-	sbbl	48(%esi), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %edi
-	movl	%ecx, %ebx
-	sbbl	52(%esi), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, %edi
-	sbbl	56(%esi), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	sarl	$31, %esi
-	testl	%esi, %esi
-	movl	80(%esp), %esi          # 4-byte Reload
-	js	.LBB233_2
-# BB#1:
-	movl	(%esp), %esi            # 4-byte Reload
-.LBB233_2:
-	movl	140(%esp), %edi
-	movl	%esi, (%edi)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	js	.LBB233_4
-# BB#3:
-	movl	4(%esp), %ecx           # 4-byte Reload
-.LBB233_4:
-	movl	%ecx, 4(%edi)
-	movl	104(%esp), %ecx         # 4-byte Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	js	.LBB233_6
-# BB#5:
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-.LBB233_6:
-	movl	76(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%edi)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB233_8
-# BB#7:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB233_8:
-	movl	%eax, 12(%edi)
-	movl	%ebx, %ebp
-	movl	%edx, %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	js	.LBB233_10
-# BB#9:
-	movl	16(%esp), %edx          # 4-byte Reload
-.LBB233_10:
-	movl	%edx, 16(%edi)
-	movl	112(%esp), %edx         # 4-byte Reload
-	movl	108(%esp), %ebx         # 4-byte Reload
-	js	.LBB233_12
-# BB#11:
-	movl	20(%esp), %esi          # 4-byte Reload
-.LBB233_12:
-	movl	%esi, 20(%edi)
-	js	.LBB233_14
-# BB#13:
-	movl	24(%esp), %esi          # 4-byte Reload
-	movl	%esi, 88(%esp)          # 4-byte Spill
-.LBB233_14:
-	movl	88(%esp), %esi          # 4-byte Reload
-	movl	%esi, 24(%edi)
-	js	.LBB233_16
-# BB#15:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB233_16:
-	movl	%ecx, 28(%edi)
-	js	.LBB233_18
-# BB#17:
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-.LBB233_18:
-	movl	116(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 32(%edi)
-	js	.LBB233_20
-# BB#19:
-	movl	36(%esp), %ebx          # 4-byte Reload
-.LBB233_20:
-	movl	%ebx, 36(%edi)
-	js	.LBB233_22
-# BB#21:
-	movl	40(%esp), %edx          # 4-byte Reload
-.LBB233_22:
-	movl	%edx, 40(%edi)
-	js	.LBB233_24
-# BB#23:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB233_24:
-	movl	%eax, 44(%edi)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB233_26
-# BB#25:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB233_26:
-	movl	%eax, 48(%edi)
-	js	.LBB233_28
-# BB#27:
-	movl	52(%esp), %ebp          # 4-byte Reload
-.LBB233_28:
-	movl	%ebp, 52(%edi)
-	movl	100(%esp), %eax         # 4-byte Reload
-	js	.LBB233_30
-# BB#29:
-	movl	56(%esp), %eax          # 4-byte Reload
-.LBB233_30:
-	movl	%eax, 56(%edi)
-	addl	$120, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end233:
-	.size	mcl_fp_addNF15Lbmi2, .Lfunc_end233-mcl_fp_addNF15Lbmi2
-
-	.globl	mcl_fp_sub15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub15Lbmi2,@function
-mcl_fp_sub15Lbmi2:                      # @mcl_fp_sub15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$56, %esp
-	movl	80(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	84(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	sbbl	32(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	36(%esi), %eax
-	sbbl	36(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	40(%esi), %edx
-	sbbl	40(%edi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	44(%esi), %ecx
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	48(%esi), %eax
-	sbbl	48(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	52(%esi), %ebp
-	sbbl	52(%edi), %ebp
-	movl	56(%esi), %esi
-	sbbl	56(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	76(%esp), %ebx
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%ebx)
-	movl	%edx, 40(%ebx)
-	movl	%ecx, 44(%ebx)
-	movl	%eax, 48(%ebx)
-	movl	%ebp, 52(%ebx)
-	movl	%esi, 56(%ebx)
-	je	.LBB234_2
-# BB#1:                                 # %carry
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	88(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	8(%esi), %edi
-	movl	12(%esi), %eax
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	40(%esi), %ecx
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	44(%esi), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 40(%ebx)
-	movl	48(%esi), %ecx
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 44(%ebx)
-	movl	%ecx, 48(%ebx)
-	movl	52(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 52(%ebx)
-	movl	56(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 56(%ebx)
-.LBB234_2:                              # %nocarry
-	addl	$56, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end234:
-	.size	mcl_fp_sub15Lbmi2, .Lfunc_end234-mcl_fp_sub15Lbmi2
-
-	.globl	mcl_fp_subNF15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF15Lbmi2,@function
-mcl_fp_subNF15Lbmi2:                    # @mcl_fp_subNF15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$96, %esp
-	movl	120(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edx
-	movl	124(%esp), %edi
-	subl	(%edi), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	56(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	52(%ecx), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	48(%ecx), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	32(%ecx), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	28(%ecx), %ebp
-	movl	24(%ecx), %ebx
-	movl	20(%ecx), %esi
-	movl	16(%ecx), %edx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	sbbl	28(%edi), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%edi), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	48(%edi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	52(%edi), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	sbbl	56(%edi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	sarl	$31, %ebp
-	movl	%ebp, %edi
-	shldl	$1, %eax, %edi
-	movl	128(%esp), %edx
-	andl	(%edx), %edi
-	movl	56(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	48(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	44(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	40(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	36(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	28(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	24(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	20(%edx), %ebx
-	andl	%ebp, %ebx
-	movl	16(%edx), %esi
-	andl	%ebp, %esi
-	movl	12(%edx), %ecx
-	andl	%ebp, %ecx
-	movl	8(%edx), %eax
-	andl	%ebp, %eax
-	andl	4(%edx), %ebp
-	addl	60(%esp), %edi          # 4-byte Folded Reload
-	adcl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	116(%esp), %edx
-	movl	%edi, (%edx)
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 4(%edx)
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 8(%edx)
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%ecx, 12(%edx)
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 16(%edx)
-	movl	(%esp), %ecx            # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebx, 20(%edx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%edx)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	adcl	92(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%edx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%edx)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 36(%edx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 40(%edx)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 44(%edx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 48(%edx)
-	movl	%eax, 52(%edx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%edx)
-	addl	$96, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end235:
-	.size	mcl_fp_subNF15Lbmi2, .Lfunc_end235-mcl_fp_subNF15Lbmi2
-
-	.globl	mcl_fpDbl_add15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add15Lbmi2,@function
-mcl_fpDbl_add15Lbmi2:                   # @mcl_fpDbl_add15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$108, %esp
-	movl	136(%esp), %ecx
-	movl	132(%esp), %edx
-	movl	12(%edx), %edi
-	movl	16(%edx), %esi
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%edx), %ebp
-	movl	128(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%edx), %ebp
-	adcl	8(%edx), %ebx
-	adcl	12(%ecx), %edi
-	adcl	16(%ecx), %esi
-	movl	%ebp, 4(%eax)
-	movl	68(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%edi, 12(%eax)
-	movl	20(%edx), %edi
-	adcl	%ebx, %edi
-	movl	24(%ecx), %ebx
-	movl	%esi, 16(%eax)
-	movl	24(%edx), %esi
-	adcl	%ebx, %esi
-	movl	28(%ecx), %ebx
-	movl	%edi, 20(%eax)
-	movl	28(%edx), %edi
-	adcl	%ebx, %edi
-	movl	32(%ecx), %ebx
-	movl	%esi, 24(%eax)
-	movl	32(%edx), %esi
-	adcl	%ebx, %esi
-	movl	36(%ecx), %ebx
-	movl	%edi, 28(%eax)
-	movl	36(%edx), %edi
-	adcl	%ebx, %edi
-	movl	40(%ecx), %ebx
-	movl	%esi, 32(%eax)
-	movl	40(%edx), %esi
-	adcl	%ebx, %esi
-	movl	44(%ecx), %ebx
-	movl	%edi, 36(%eax)
-	movl	44(%edx), %edi
-	adcl	%ebx, %edi
-	movl	48(%ecx), %ebx
-	movl	%esi, 40(%eax)
-	movl	48(%edx), %esi
-	adcl	%ebx, %esi
-	movl	52(%ecx), %ebx
-	movl	%edi, 44(%eax)
-	movl	52(%edx), %edi
-	adcl	%ebx, %edi
-	movl	56(%ecx), %ebx
-	movl	%esi, 48(%eax)
-	movl	56(%edx), %esi
-	adcl	%ebx, %esi
-	movl	60(%ecx), %ebx
-	movl	%edi, 52(%eax)
-	movl	60(%edx), %edi
-	adcl	%ebx, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	64(%ecx), %edi
-	movl	%esi, 56(%eax)
-	movl	64(%edx), %eax
-	adcl	%edi, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%edx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	72(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	76(%ecx), %esi
-	movl	76(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%ecx), %esi
-	movl	80(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%ecx), %esi
-	movl	84(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%ecx), %esi
-	movl	88(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	92(%ecx), %esi
-	movl	92(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	96(%ecx), %esi
-	movl	96(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%ecx), %esi
-	movl	100(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%ecx), %eax
-	movl	104(%edx), %esi
-	adcl	%eax, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	108(%ecx), %edi
-	movl	108(%edx), %eax
-	adcl	%edi, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	112(%ecx), %ebx
-	movl	112(%edx), %edi
-	adcl	%ebx, %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	116(%ecx), %ecx
-	movl	116(%edx), %edx
-	adcl	%ecx, %edx
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	140(%esp), %ebp
-	movl	76(%esp), %ecx          # 4-byte Reload
-	subl	(%ebp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	4(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	8(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	sbbl	20(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebp), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	sbbl	36(%ebp), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	40(%ebp), %ecx
-	sbbl	44(%ebp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	sbbl	48(%ebp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	movl	%edx, %edi
-	sbbl	52(%ebp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	sbbl	56(%ebp), %esi
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB236_2
-# BB#1:
-	movl	%esi, %edi
-.LBB236_2:
-	testb	%bl, %bl
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	movl	68(%esp), %ebx          # 4-byte Reload
-	movl	64(%esp), %ebp          # 4-byte Reload
-	jne	.LBB236_4
-# BB#3:
-	movl	%ecx, %esi
-	movl	(%esp), %ebx            # 4-byte Reload
-	movl	4(%esp), %ebp           # 4-byte Reload
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB236_4:
-	movl	128(%esp), %edx
-	movl	%eax, 60(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	%eax, 64(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	movl	%eax, 68(%edx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	movl	%eax, 72(%edx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%edx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%edx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	movl	%eax, 84(%edx)
-	movl	104(%esp), %eax         # 4-byte Reload
-	movl	%eax, 88(%edx)
-	movl	%ebp, 92(%edx)
-	movl	%ebx, 96(%edx)
-	movl	%esi, 100(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	jne	.LBB236_6
-# BB#5:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB236_6:
-	movl	%eax, 104(%edx)
-	movl	60(%esp), %ecx          # 4-byte Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	jne	.LBB236_8
-# BB#7:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB236_8:
-	movl	%eax, 108(%edx)
-	jne	.LBB236_10
-# BB#9:
-	movl	48(%esp), %ecx          # 4-byte Reload
-.LBB236_10:
-	movl	%ecx, 112(%edx)
-	movl	%edi, 116(%edx)
-	addl	$108, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end236:
-	.size	mcl_fpDbl_add15Lbmi2, .Lfunc_end236-mcl_fpDbl_add15Lbmi2
-
-	.globl	mcl_fpDbl_sub15Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub15Lbmi2,@function
-mcl_fpDbl_sub15Lbmi2:                   # @mcl_fpDbl_sub15Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$100, %esp
-	movl	124(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	128(%esp), %ebp
-	subl	(%ebp), %edx
-	sbbl	4(%ebp), %esi
-	movl	8(%eax), %edi
-	sbbl	8(%ebp), %edi
-	movl	120(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	12(%eax), %edx
-	sbbl	12(%ebp), %edx
-	movl	%esi, 4(%ecx)
-	movl	16(%eax), %esi
-	sbbl	16(%ebp), %esi
-	movl	%edi, 8(%ecx)
-	movl	20(%ebp), %edi
-	movl	%edx, 12(%ecx)
-	movl	20(%eax), %edx
-	sbbl	%edi, %edx
-	movl	24(%ebp), %edi
-	movl	%esi, 16(%ecx)
-	movl	24(%eax), %esi
-	sbbl	%edi, %esi
-	movl	28(%ebp), %edi
-	movl	%edx, 20(%ecx)
-	movl	28(%eax), %edx
-	sbbl	%edi, %edx
-	movl	32(%ebp), %edi
-	movl	%esi, 24(%ecx)
-	movl	32(%eax), %esi
-	sbbl	%edi, %esi
-	movl	36(%ebp), %edi
-	movl	%edx, 28(%ecx)
-	movl	36(%eax), %edx
-	sbbl	%edi, %edx
-	movl	40(%ebp), %edi
-	movl	%esi, 32(%ecx)
-	movl	40(%eax), %esi
-	sbbl	%edi, %esi
-	movl	44(%ebp), %edi
-	movl	%edx, 36(%ecx)
-	movl	44(%eax), %edx
-	sbbl	%edi, %edx
-	movl	48(%ebp), %edi
-	movl	%esi, 40(%ecx)
-	movl	48(%eax), %esi
-	sbbl	%edi, %esi
-	movl	52(%ebp), %edi
-	movl	%edx, 44(%ecx)
-	movl	52(%eax), %edx
-	sbbl	%edi, %edx
-	movl	56(%ebp), %edi
-	movl	%esi, 48(%ecx)
-	movl	56(%eax), %esi
-	sbbl	%edi, %esi
-	movl	60(%ebp), %edi
-	movl	%edx, 52(%ecx)
-	movl	60(%eax), %edx
-	sbbl	%edi, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	64(%ebp), %edx
-	movl	%esi, 56(%ecx)
-	movl	64(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	68(%ebp), %edx
-	movl	68(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	72(%ebp), %edx
-	movl	72(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	76(%ebp), %edx
-	movl	76(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	80(%ebp), %edx
-	movl	80(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	84(%ebp), %edx
-	movl	84(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	88(%ebp), %edx
-	movl	88(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	92(%ebp), %edx
-	movl	92(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	96(%ebp), %edx
-	movl	96(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	100(%ebp), %edx
-	movl	100(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	104(%ebp), %edx
-	movl	104(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	108(%ebp), %edx
-	movl	108(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	112(%ebp), %edx
-	movl	112(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	116(%ebp), %edx
-	movl	116(%eax), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	132(%esp), %esi
-	jne	.LBB237_1
-# BB#2:
-	movl	$0, 60(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_3
-.LBB237_1:
-	movl	56(%esi), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-.LBB237_3:
-	testb	%al, %al
-	jne	.LBB237_4
-# BB#5:
-	movl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	$0, %ebx
-	jmp	.LBB237_6
-.LBB237_4:
-	movl	(%esi), %ebx
-	movl	4(%esi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-.LBB237_6:
-	jne	.LBB237_7
-# BB#8:
-	movl	$0, 32(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_9
-.LBB237_7:
-	movl	52(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-.LBB237_9:
-	jne	.LBB237_10
-# BB#11:
-	movl	$0, 28(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_12
-.LBB237_10:
-	movl	48(%esi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-.LBB237_12:
-	jne	.LBB237_13
-# BB#14:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_15
-.LBB237_13:
-	movl	44(%esi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB237_15:
-	jne	.LBB237_16
-# BB#17:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_18
-.LBB237_16:
-	movl	40(%esi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB237_18:
-	jne	.LBB237_19
-# BB#20:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_21
-.LBB237_19:
-	movl	36(%esi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB237_21:
-	jne	.LBB237_22
-# BB#23:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB237_24
-.LBB237_22:
-	movl	32(%esi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB237_24:
-	jne	.LBB237_25
-# BB#26:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB237_27
-.LBB237_25:
-	movl	28(%esi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB237_27:
-	jne	.LBB237_28
-# BB#29:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB237_30
-.LBB237_28:
-	movl	24(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB237_30:
-	jne	.LBB237_31
-# BB#32:
-	movl	$0, %edx
-	jmp	.LBB237_33
-.LBB237_31:
-	movl	20(%esi), %edx
-.LBB237_33:
-	jne	.LBB237_34
-# BB#35:
-	movl	$0, %ebp
-	jmp	.LBB237_36
-.LBB237_34:
-	movl	16(%esi), %ebp
-.LBB237_36:
-	jne	.LBB237_37
-# BB#38:
-	movl	$0, %eax
-	jmp	.LBB237_39
-.LBB237_37:
-	movl	12(%esi), %eax
-.LBB237_39:
-	jne	.LBB237_40
-# BB#41:
-	xorl	%esi, %esi
-	jmp	.LBB237_42
-.LBB237_40:
-	movl	8(%esi), %esi
-.LBB237_42:
-	addl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 60(%ecx)
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 64(%ecx)
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 68(%ecx)
-	adcl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 72(%ecx)
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebp, 76(%ecx)
-	movl	(%esp), %esi            # 4-byte Reload
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 80(%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 84(%ecx)
-	movl	8(%esp), %edx           # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 88(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 92(%ecx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 96(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 100(%ecx)
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 104(%ecx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 108(%ecx)
-	movl	%eax, 112(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%ecx)
-	addl	$100, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end237:
-	.size	mcl_fpDbl_sub15Lbmi2, .Lfunc_end237-mcl_fpDbl_sub15Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv512x32,@function
-.LmulPv512x32:                          # @mulPv512x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$56, %esp
-	movl	%edx, %eax
-	movl	76(%esp), %edi
-	movl	%edi, %edx
-	mulxl	4(%eax), %ebx, %esi
-	movl	%edi, %edx
-	mulxl	(%eax), %ebp, %edx
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	addl	%ebx, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	8(%eax), %edx, %ebx
-	adcl	%esi, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	12(%eax), %edx, %esi
-	adcl	%ebx, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	16(%eax), %edx, %ebx
-	adcl	%esi, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	20(%eax), %edx, %esi
-	adcl	%ebx, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	24(%eax), %edx, %ebx
-	adcl	%esi, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	28(%eax), %edx, %esi
-	adcl	%ebx, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	32(%eax), %edx, %ebx
-	adcl	%esi, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	36(%eax), %edx, %esi
-	adcl	%ebx, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	40(%eax), %edx, %ebx
-	adcl	%esi, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	44(%eax), %edx, %esi
-	adcl	%ebx, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%edi, %edx
-	mulxl	48(%eax), %ebx, %ebp
-	adcl	%esi, %ebx
-	movl	%edi, %edx
-	mulxl	52(%eax), %esi, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	%ebp, %esi
-	movl	%edi, %edx
-	mulxl	56(%eax), %edx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ecx)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ecx)
-	movl	44(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%ecx)
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%ecx)
-	movl	36(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%ecx)
-	movl	32(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%ecx)
-	movl	28(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%ecx)
-	movl	24(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%ecx)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 32(%ecx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 36(%ecx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 40(%ecx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 44(%ecx)
-	movl	%ebx, 48(%ecx)
-	movl	%esi, 52(%ecx)
-	movl	%edx, 56(%ecx)
-	movl	%edi, %edx
-	mulxl	60(%eax), %eax, %edx
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 60(%ecx)
-	adcl	$0, %edx
-	movl	%edx, 64(%ecx)
-	movl	%ecx, %eax
-	addl	$56, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end238:
-	.size	.LmulPv512x32, .Lfunc_end238-.LmulPv512x32
-
-	.globl	mcl_fp_mulUnitPre16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre16Lbmi2,@function
-mcl_fp_mulUnitPre16Lbmi2:               # @mcl_fp_mulUnitPre16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$124, %esp
-	calll	.L239$pb
-.L239$pb:
-	popl	%ebx
-.Ltmp50:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp50-.L239$pb), %ebx
-	movl	152(%esp), %eax
-	movl	%eax, (%esp)
-	leal	56(%esp), %ecx
-	movl	148(%esp), %edx
-	calll	.LmulPv512x32
-	movl	120(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp
-	movl	72(%esp), %ebx
-	movl	68(%esp), %edi
-	movl	64(%esp), %esi
-	movl	56(%esp), %edx
-	movl	60(%esp), %ecx
-	movl	144(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 64(%eax)
-	addl	$124, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end239:
-	.size	mcl_fp_mulUnitPre16Lbmi2, .Lfunc_end239-mcl_fp_mulUnitPre16Lbmi2
-
-	.globl	mcl_fpDbl_mulPre16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre16Lbmi2,@function
-mcl_fpDbl_mulPre16Lbmi2:                # @mcl_fpDbl_mulPre16Lbmi2
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$300, %esp              # imm = 0x12C
-	calll	.L240$pb
-.L240$pb:
-	popl	%ebx
-.Ltmp51:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp51-.L240$pb), %ebx
-	movl	%ebx, -224(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	12(%ebp), %esi
-	movl	%esi, 4(%esp)
-	movl	8(%ebp), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
-	leal	32(%edi), %eax
-	movl	%eax, 8(%esp)
-	leal	32(%esi), %eax
-	movl	%eax, 4(%esp)
-	movl	8(%ebp), %eax
-	leal	64(%eax), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
-	movl	52(%esi), %ebx
-	movl	48(%esi), %eax
-	movl	44(%esi), %ecx
-	movl	40(%esi), %edx
-	movl	%edx, -176(%ebp)        # 4-byte Spill
-	movl	(%esi), %edi
-	movl	4(%esi), %edx
-	addl	32(%esi), %edi
-	movl	%edi, -184(%ebp)        # 4-byte Spill
-	movl	%esi, %edi
-	adcl	36(%edi), %edx
-	movl	%edx, -236(%ebp)        # 4-byte Spill
-	movl	-176(%ebp), %edx        # 4-byte Reload
-	adcl	8(%edi), %edx
-	movl	%edx, -176(%ebp)        # 4-byte Spill
-	adcl	12(%edi), %ecx
-	movl	%ecx, -232(%ebp)        # 4-byte Spill
-	adcl	16(%edi), %eax
-	movl	%eax, -212(%ebp)        # 4-byte Spill
-	adcl	20(%edi), %ebx
-	movl	%ebx, -228(%ebp)        # 4-byte Spill
-	movl	56(%edi), %eax
-	adcl	24(%edi), %eax
-	movl	%eax, -248(%ebp)        # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %ecx
-	popl	%eax
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	(%esi), %ecx
-	addl	32(%esi), %ecx
-	movl	%ecx, -188(%ebp)        # 4-byte Spill
-	movl	4(%esi), %ecx
-	adcl	36(%esi), %ecx
-	movl	%ecx, -192(%ebp)        # 4-byte Spill
-	movl	40(%esi), %ecx
-	adcl	8(%esi), %ecx
-	movl	%ecx, -196(%ebp)        # 4-byte Spill
-	movl	44(%esi), %ecx
-	adcl	12(%esi), %ecx
-	movl	%ecx, -200(%ebp)        # 4-byte Spill
-	movl	48(%esi), %ecx
-	adcl	16(%esi), %ecx
-	movl	%ecx, -204(%ebp)        # 4-byte Spill
-	movl	52(%esi), %ecx
-	adcl	20(%esi), %ecx
-	movl	%ecx, -208(%ebp)        # 4-byte Spill
-	movl	56(%esi), %edx
-	adcl	24(%esi), %edx
-	movl	60(%esi), %ecx
-	adcl	28(%esi), %ecx
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %ebx
-	popl	%eax
-	movl	%ebx, -252(%ebp)        # 4-byte Spill
-	movl	-212(%ebp), %ebx        # 4-byte Reload
-	movl	-176(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -216(%ebp)        # 4-byte Spill
-	movl	-184(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -220(%ebp)        # 4-byte Spill
-	jb	.LBB240_2
-# BB#1:
-	xorl	%eax, %eax
-	xorl	%ebx, %ebx
-	movl	$0, -216(%ebp)          # 4-byte Folded Spill
-	movl	$0, -220(%ebp)          # 4-byte Folded Spill
-.LBB240_2:
-	movl	%ebx, -244(%ebp)        # 4-byte Spill
-	movl	%eax, -240(%ebp)        # 4-byte Spill
-	movl	60(%edi), %eax
-	movl	-144(%ebp), %ebx        # 4-byte Reload
-	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	28(%edi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	movl	%edx, -144(%ebp)        # 4-byte Spill
-	movl	-208(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	movl	-204(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	movl	-200(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	-196(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	movl	-192(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	movl	-188(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	jb	.LBB240_4
-# BB#3:
-	movl	$0, -172(%ebp)          # 4-byte Folded Spill
-	movl	$0, -144(%ebp)          # 4-byte Folded Spill
-	movl	$0, -148(%ebp)          # 4-byte Folded Spill
-	movl	$0, -152(%ebp)          # 4-byte Folded Spill
-	movl	$0, -156(%ebp)          # 4-byte Folded Spill
-	movl	$0, -160(%ebp)          # 4-byte Folded Spill
-	movl	$0, -164(%ebp)          # 4-byte Folded Spill
-	movl	$0, -168(%ebp)          # 4-byte Folded Spill
-.LBB240_4:
-	movl	-184(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -108(%ebp)
-	movl	-236(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -104(%ebp)
-	movl	-176(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -100(%ebp)
-	movl	-232(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -96(%ebp)
-	movl	-212(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -92(%ebp)
-	movl	-228(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -88(%ebp)
-	movl	-248(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -84(%ebp)
-	movl	-188(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -140(%ebp)
-	movl	-192(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -136(%ebp)
-	movl	-196(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -132(%ebp)
-	movl	-200(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -128(%ebp)
-	movl	-204(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -124(%ebp)
-	movl	-208(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -120(%ebp)
-	movl	%esi, %ebx
-	movl	%edi, %esi
-	movl	%eax, %edi
-	movl	%edx, -116(%ebp)
-	movl	%ecx, -112(%ebp)
-	sbbl	%edx, %edx
-	movl	-180(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -80(%ebp)
-	movl	-252(%ebp), %ecx        # 4-byte Reload
-	pushl	%eax
-	movl	%ecx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB240_6
-# BB#5:
-	movl	$0, %eax
-	movl	$0, %ebx
-	movl	$0, %esi
-	movl	$0, %edi
-.LBB240_6:
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	sbbl	%eax, %eax
-	leal	-140(%ebp), %ecx
-	movl	%ecx, 8(%esp)
-	leal	-108(%ebp), %ecx
-	movl	%ecx, 4(%esp)
-	leal	-76(%ebp), %ecx
-	movl	%ecx, (%esp)
-	andl	%eax, %edx
-	movl	-220(%ebp), %eax        # 4-byte Reload
-	addl	%eax, -168(%ebp)        # 4-byte Folded Spill
-	adcl	%edi, -164(%ebp)        # 4-byte Folded Spill
-	movl	-216(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	adcl	%esi, -156(%ebp)        # 4-byte Folded Spill
-	movl	-244(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	adcl	%ebx, -148(%ebp)        # 4-byte Folded Spill
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	-240(%ebp), %eax        # 4-byte Folded Reload
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	movl	-172(%ebp), %edi        # 4-byte Reload
-	adcl	-180(%ebp), %edi        # 4-byte Folded Reload
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	andl	$1, %edx
-	movl	%edx, -176(%ebp)        # 4-byte Spill
-	movl	-224(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	addl	-44(%ebp), %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	adcl	-40(%ebp), %eax
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	adcl	-36(%ebp), %eax
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	-32(%ebp), %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	-152(%ebp), %eax        # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	adcl	-16(%ebp), %edi
-	movl	%edi, -172(%ebp)        # 4-byte Spill
-	adcl	%esi, -176(%ebp)        # 4-byte Folded Spill
-	movl	-76(%ebp), %eax
-	movl	8(%ebp), %esi
-	subl	(%esi), %eax
-	movl	%eax, -196(%ebp)        # 4-byte Spill
-	movl	-72(%ebp), %ecx
-	sbbl	4(%esi), %ecx
-	movl	-68(%ebp), %eax
-	sbbl	8(%esi), %eax
-	movl	%eax, -192(%ebp)        # 4-byte Spill
-	movl	-64(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	-60(%ebp), %ebx
-	sbbl	16(%esi), %ebx
-	movl	-56(%ebp), %eax
-	sbbl	20(%esi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	movl	-52(%ebp), %eax
-	sbbl	24(%esi), %eax
-	movl	%eax, -184(%ebp)        # 4-byte Spill
-	movl	-48(%ebp), %eax
-	sbbl	28(%esi), %eax
-	movl	%eax, -188(%ebp)        # 4-byte Spill
-	movl	32(%esi), %eax
-	movl	%eax, -200(%ebp)        # 4-byte Spill
-	sbbl	%eax, -168(%ebp)        # 4-byte Folded Spill
-	movl	36(%esi), %eax
-	movl	%eax, -204(%ebp)        # 4-byte Spill
-	sbbl	%eax, -164(%ebp)        # 4-byte Folded Spill
-	movl	40(%esi), %eax
-	movl	%eax, -208(%ebp)        # 4-byte Spill
-	sbbl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	movl	44(%esi), %eax
-	movl	%eax, -212(%ebp)        # 4-byte Spill
-	sbbl	%eax, -156(%ebp)        # 4-byte Folded Spill
-	movl	48(%esi), %eax
-	movl	%eax, -216(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	movl	52(%esi), %eax
-	movl	%eax, -220(%ebp)        # 4-byte Spill
-	sbbl	%eax, -148(%ebp)        # 4-byte Folded Spill
-	movl	56(%esi), %eax
-	movl	%eax, -224(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %edi        # 4-byte Reload
-	sbbl	%eax, %edi
-	movl	60(%esi), %eax
-	movl	%eax, -228(%ebp)        # 4-byte Spill
-	sbbl	%eax, -172(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, -176(%ebp)          # 4-byte Folded Spill
-	movl	64(%esi), %eax
-	movl	%eax, -260(%ebp)        # 4-byte Spill
-	subl	%eax, -196(%ebp)        # 4-byte Folded Spill
-	movl	68(%esi), %eax
-	movl	%eax, -264(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ecx
-	movl	72(%esi), %eax
-	movl	%eax, -268(%ebp)        # 4-byte Spill
-	sbbl	%eax, -192(%ebp)        # 4-byte Folded Spill
-	movl	76(%esi), %eax
-	movl	%eax, -272(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edx
-	movl	80(%esi), %eax
-	movl	%eax, -276(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ebx
-	movl	84(%esi), %eax
-	movl	%eax, -280(%ebp)        # 4-byte Spill
-	sbbl	%eax, -180(%ebp)        # 4-byte Folded Spill
-	movl	88(%esi), %eax
-	movl	%eax, -284(%ebp)        # 4-byte Spill
-	sbbl	%eax, -184(%ebp)        # 4-byte Folded Spill
-	movl	92(%esi), %eax
-	movl	%eax, -288(%ebp)        # 4-byte Spill
-	sbbl	%eax, -188(%ebp)        # 4-byte Folded Spill
-	movl	96(%esi), %eax
-	movl	%eax, -292(%ebp)        # 4-byte Spill
-	sbbl	%eax, -168(%ebp)        # 4-byte Folded Spill
-	movl	100(%esi), %eax
-	movl	%eax, -236(%ebp)        # 4-byte Spill
-	sbbl	%eax, -164(%ebp)        # 4-byte Folded Spill
-	movl	104(%esi), %eax
-	movl	%eax, -240(%ebp)        # 4-byte Spill
-	sbbl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	movl	108(%esi), %eax
-	movl	%eax, -244(%ebp)        # 4-byte Spill
-	sbbl	%eax, -156(%ebp)        # 4-byte Folded Spill
-	movl	112(%esi), %eax
-	movl	%eax, -248(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	movl	116(%esi), %eax
-	movl	%eax, -252(%ebp)        # 4-byte Spill
-	sbbl	%eax, -148(%ebp)        # 4-byte Folded Spill
-	movl	120(%esi), %eax
-	movl	%eax, -232(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	%edi, -144(%ebp)        # 4-byte Spill
-	movl	124(%esi), %eax
-	movl	%eax, -256(%ebp)        # 4-byte Spill
-	sbbl	%eax, -172(%ebp)        # 4-byte Folded Spill
-	movl	-176(%ebp), %edi        # 4-byte Reload
-	sbbl	$0, %edi
-	movl	-196(%ebp), %eax        # 4-byte Reload
-	addl	-200(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-204(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 32(%esi)
-	movl	-192(%ebp), %eax        # 4-byte Reload
-	adcl	-208(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 36(%esi)
-	adcl	-212(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 40(%esi)
-	adcl	-216(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%edx, 44(%esi)
-	movl	-180(%ebp), %eax        # 4-byte Reload
-	adcl	-220(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 48(%esi)
-	movl	-184(%ebp), %ecx        # 4-byte Reload
-	adcl	-224(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 52(%esi)
-	movl	-188(%ebp), %edx        # 4-byte Reload
-	adcl	-228(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 56(%esi)
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	adcl	-260(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edx, 60(%esi)
-	movl	-164(%ebp), %ecx        # 4-byte Reload
-	adcl	-264(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 64(%esi)
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	adcl	-268(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 68(%esi)
-	movl	-156(%ebp), %ecx        # 4-byte Reload
-	adcl	-272(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 72(%esi)
-	movl	-152(%ebp), %eax        # 4-byte Reload
-	adcl	-276(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 76(%esi)
-	movl	-148(%ebp), %ecx        # 4-byte Reload
-	adcl	-280(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 80(%esi)
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	-284(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 84(%esi)
-	movl	-172(%ebp), %ecx        # 4-byte Reload
-	adcl	-288(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 88(%esi)
-	adcl	-292(%ebp), %edi        # 4-byte Folded Reload
-	movl	%ecx, 92(%esi)
-	movl	%edi, 96(%esi)
-	movl	-236(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 100(%esi)
-	movl	-240(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 104(%esi)
-	movl	-244(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 108(%esi)
-	movl	-248(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 112(%esi)
-	movl	-252(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 116(%esi)
-	movl	-232(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 120(%esi)
-	movl	-256(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 124(%esi)
-	addl	$300, %esp              # imm = 0x12C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end240:
-	.size	mcl_fpDbl_mulPre16Lbmi2, .Lfunc_end240-mcl_fpDbl_mulPre16Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre16Lbmi2,@function
-mcl_fpDbl_sqrPre16Lbmi2:                # @mcl_fpDbl_sqrPre16Lbmi2
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$300, %esp              # imm = 0x12C
-	calll	.L241$pb
-.L241$pb:
-	popl	%ebx
-.Ltmp52:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp52-.L241$pb), %ebx
-	movl	%ebx, -184(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %esi
-	movl	%esi, (%esp)
-	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
-	leal	32(%edi), %eax
-	movl	%eax, 8(%esp)
-	movl	%eax, 4(%esp)
-	leal	64(%esi), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
-	movl	52(%edi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	movl	48(%edi), %eax
-	movl	44(%edi), %ebx
-	movl	40(%edi), %esi
-	movl	(%edi), %ecx
-	movl	4(%edi), %edx
-	addl	32(%edi), %ecx
-	movl	%ecx, -192(%ebp)        # 4-byte Spill
-	adcl	36(%edi), %edx
-	movl	%edx, -196(%ebp)        # 4-byte Spill
-	adcl	8(%edi), %esi
-	movl	%esi, -188(%ebp)        # 4-byte Spill
-	adcl	12(%edi), %ebx
-	adcl	16(%edi), %eax
-	movl	%eax, -208(%ebp)        # 4-byte Spill
-	movl	-180(%ebp), %eax        # 4-byte Reload
-	adcl	20(%edi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	addl	%ecx, %ecx
-	movl	%ecx, -164(%ebp)        # 4-byte Spill
-	adcl	%edx, %edx
-	movl	%edx, -160(%ebp)        # 4-byte Spill
-	adcl	%esi, %esi
-	movl	%esi, -156(%ebp)        # 4-byte Spill
-	movl	%ebx, %edx
-	movl	%ebx, %esi
-	adcl	%edx, %edx
-	movl	%edx, -152(%ebp)        # 4-byte Spill
-	movl	-208(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %edx
-	movl	%eax, %ebx
-	adcl	%edx, %edx
-	movl	%edx, -148(%ebp)        # 4-byte Spill
-	movl	-180(%ebp), %edx        # 4-byte Reload
-	adcl	%edx, %edx
-	movl	%edx, -144(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	movl	56(%edi), %edx
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	adcl	24(%edi), %edx
-	movl	60(%edi), %ecx
-	adcl	28(%edi), %ecx
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -200(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -204(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	sbbl	%eax, %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB241_2
-# BB#1:
-	movl	$0, -144(%ebp)          # 4-byte Folded Spill
-	movl	$0, -148(%ebp)          # 4-byte Folded Spill
-	movl	$0, -152(%ebp)          # 4-byte Folded Spill
-	movl	$0, -156(%ebp)          # 4-byte Folded Spill
-	movl	$0, -160(%ebp)          # 4-byte Folded Spill
-	movl	$0, -164(%ebp)          # 4-byte Folded Spill
-.LBB241_2:
-	movl	%edx, %eax
-	movl	-172(%ebp), %edi        # 4-byte Reload
-	pushl	%eax
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	%eax, %eax
-	movl	%ecx, %edi
-	adcl	%edi, %edi
-	movl	%edi, -176(%ebp)        # 4-byte Spill
-	movl	-204(%ebp), %edi        # 4-byte Reload
-	pushl	%eax
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB241_4
-# BB#3:
-	movl	$0, -176(%ebp)          # 4-byte Folded Spill
-	xorl	%eax, %eax
-.LBB241_4:
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	movl	-192(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -108(%ebp)
-	movl	%eax, -140(%ebp)
-	movl	-196(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -104(%ebp)
-	movl	%eax, -136(%ebp)
-	movl	-188(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -100(%ebp)
-	movl	%eax, -132(%ebp)
-	movl	%esi, -96(%ebp)
-	movl	%esi, -128(%ebp)
-	movl	%ebx, -92(%ebp)
-	movl	%ebx, -124(%ebp)
-	movl	-180(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -88(%ebp)
-	movl	%eax, -120(%ebp)
-	movl	%edx, -84(%ebp)
-	movl	%edx, -116(%ebp)
-	movl	%ecx, -80(%ebp)
-	movl	%ecx, -112(%ebp)
-	movl	-200(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB241_5
-# BB#6:
-	xorl	%edi, %edi
-	jmp	.LBB241_7
-.LBB241_5:
-	shrl	$31, %ecx
-	movl	%ecx, %edi
-.LBB241_7:
-	leal	-140(%ebp), %eax
-	movl	%eax, 8(%esp)
-	leal	-108(%ebp), %eax
-	movl	%eax, 4(%esp)
-	leal	-76(%ebp), %eax
-	movl	%eax, (%esp)
-	movl	-168(%ebp), %esi        # 4-byte Reload
-	andl	$1, %esi
-	movl	-184(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre8Lbmi2@PLT
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	addl	-44(%ebp), %eax
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	adcl	-40(%ebp), %eax
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	-36(%ebp), %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	-152(%ebp), %eax        # 4-byte Reload
-	adcl	-32(%ebp), %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	movl	-172(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	movl	-176(%ebp), %eax        # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	adcl	%edi, %esi
-	movl	%esi, -168(%ebp)        # 4-byte Spill
-	movl	-76(%ebp), %ecx
-	movl	8(%ebp), %esi
-	subl	(%esi), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	movl	-72(%ebp), %edi
-	sbbl	4(%esi), %edi
-	movl	-68(%ebp), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, -184(%ebp)        # 4-byte Spill
-	movl	-64(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	%edx, -192(%ebp)        # 4-byte Spill
-	movl	-60(%ebp), %ebx
-	sbbl	16(%esi), %ebx
-	movl	%eax, %ecx
-	movl	-56(%ebp), %eax
-	sbbl	20(%esi), %eax
-	movl	%eax, -196(%ebp)        # 4-byte Spill
-	movl	-52(%ebp), %edx
-	sbbl	24(%esi), %edx
-	movl	%edx, -188(%ebp)        # 4-byte Spill
-	movl	-48(%ebp), %edx
-	sbbl	28(%esi), %edx
-	movl	32(%esi), %eax
-	movl	%eax, -200(%ebp)        # 4-byte Spill
-	sbbl	%eax, -164(%ebp)        # 4-byte Folded Spill
-	movl	36(%esi), %eax
-	movl	%eax, -204(%ebp)        # 4-byte Spill
-	sbbl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	movl	40(%esi), %eax
-	movl	%eax, -208(%ebp)        # 4-byte Spill
-	sbbl	%eax, -156(%ebp)        # 4-byte Folded Spill
-	movl	44(%esi), %eax
-	movl	%eax, -212(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	movl	48(%esi), %eax
-	movl	%eax, -216(%ebp)        # 4-byte Spill
-	sbbl	%eax, -148(%ebp)        # 4-byte Folded Spill
-	movl	52(%esi), %eax
-	movl	%eax, -220(%ebp)        # 4-byte Spill
-	sbbl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	movl	56(%esi), %eax
-	movl	%eax, -224(%ebp)        # 4-byte Spill
-	sbbl	%eax, -172(%ebp)        # 4-byte Folded Spill
-	movl	60(%esi), %eax
-	movl	%eax, -228(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ecx
-	movl	%ecx, -176(%ebp)        # 4-byte Spill
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	sbbl	$0, %eax
-	movl	64(%esi), %ecx
-	movl	%ecx, -260(%ebp)        # 4-byte Spill
-	subl	%ecx, -180(%ebp)        # 4-byte Folded Spill
-	movl	68(%esi), %ecx
-	movl	%ecx, -264(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	72(%esi), %ecx
-	movl	%ecx, -268(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -184(%ebp)        # 4-byte Folded Spill
-	movl	76(%esi), %ecx
-	movl	%ecx, -272(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -192(%ebp)        # 4-byte Folded Spill
-	movl	80(%esi), %ecx
-	movl	%ecx, -276(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
-	movl	84(%esi), %ecx
-	movl	%ecx, -280(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -196(%ebp)        # 4-byte Folded Spill
-	movl	88(%esi), %ecx
-	movl	%ecx, -284(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -188(%ebp)        # 4-byte Folded Spill
-	movl	92(%esi), %ecx
-	movl	%ecx, -288(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edx
-	movl	96(%esi), %ecx
-	movl	%ecx, -292(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -164(%ebp)        # 4-byte Folded Spill
-	movl	100(%esi), %ecx
-	movl	%ecx, -232(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -160(%ebp)        # 4-byte Folded Spill
-	movl	104(%esi), %ecx
-	movl	%ecx, -236(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -156(%ebp)        # 4-byte Folded Spill
-	movl	108(%esi), %ecx
-	movl	%ecx, -240(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -152(%ebp)        # 4-byte Folded Spill
-	movl	112(%esi), %ecx
-	movl	%ecx, -244(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -148(%ebp)        # 4-byte Folded Spill
-	movl	116(%esi), %ecx
-	movl	%ecx, -248(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -144(%ebp)        # 4-byte Folded Spill
-	movl	120(%esi), %ecx
-	movl	%ecx, -252(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -172(%ebp)        # 4-byte Folded Spill
-	movl	124(%esi), %ecx
-	movl	%ecx, -256(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -176(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	movl	-180(%ebp), %eax        # 4-byte Reload
-	addl	-200(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-204(%ebp), %edi        # 4-byte Folded Reload
-	movl	%eax, 32(%esi)
-	movl	-184(%ebp), %eax        # 4-byte Reload
-	adcl	-208(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edi, 36(%esi)
-	movl	-192(%ebp), %ecx        # 4-byte Reload
-	adcl	-212(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 40(%esi)
-	adcl	-216(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%ecx, 44(%esi)
-	movl	-196(%ebp), %ecx        # 4-byte Reload
-	adcl	-220(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%ebx, 48(%esi)
-	movl	-188(%ebp), %eax        # 4-byte Reload
-	adcl	-224(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	movl	%edx, %ecx
-	adcl	-228(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 56(%esi)
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	adcl	-260(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 60(%esi)
-	movl	-160(%ebp), %ecx        # 4-byte Reload
-	adcl	-264(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 64(%esi)
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	-268(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 68(%esi)
-	movl	-152(%ebp), %ecx        # 4-byte Reload
-	adcl	-272(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 72(%esi)
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-276(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 76(%esi)
-	movl	-144(%ebp), %ecx        # 4-byte Reload
-	adcl	-280(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 80(%esi)
-	movl	-172(%ebp), %eax        # 4-byte Reload
-	adcl	-284(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 84(%esi)
-	movl	-176(%ebp), %ecx        # 4-byte Reload
-	adcl	-288(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 88(%esi)
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	adcl	-292(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 92(%esi)
-	movl	%eax, 96(%esi)
-	movl	-232(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 100(%esi)
-	movl	-236(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 104(%esi)
-	movl	-240(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 108(%esi)
-	movl	-244(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 112(%esi)
-	movl	-248(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 116(%esi)
-	movl	-252(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 120(%esi)
-	movl	-256(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 124(%esi)
-	addl	$300, %esp              # imm = 0x12C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end241:
-	.size	mcl_fpDbl_sqrPre16Lbmi2, .Lfunc_end241-mcl_fpDbl_sqrPre16Lbmi2
-
-	.globl	mcl_fp_mont16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont16Lbmi2,@function
-mcl_fp_mont16Lbmi2:                     # @mcl_fp_mont16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2428, %esp             # imm = 0x97C
-	calll	.L242$pb
-.L242$pb:
-	popl	%ebx
-.Ltmp53:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp53-.L242$pb), %ebx
-	movl	2460(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2360(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	2360(%esp), %ebp
-	movl	2364(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	2424(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	2420(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	2416(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	2412(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2408(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	2404(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2400(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2396(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	2392(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	2388(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2384(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	2380(%esp), %edi
-	movl	2376(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2372(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2368(%esp), %esi
-	movl	%eax, (%esp)
-	leal	2288(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	addl	2288(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2292(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	2296(%esp), %esi
-	movl	%esi, %ebp
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2300(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2304(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2308(%esp), %edi
-	movl	%edi, %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2312(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2316(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2320(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2328(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2332(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2336(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2340(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2344(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2348(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2352(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	2456(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2216(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %edi
-	movl	112(%esp), %ecx         # 4-byte Reload
-	addl	2216(%esp), %ecx
-	adcl	2220(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2224(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2228(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2232(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	2236(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2244(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2248(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2252(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2256(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2260(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2268(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2272(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2276(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	2280(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2144(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %edi
-	addl	2144(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2148(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2152(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2156(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2160(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	2164(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	2168(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2176(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2180(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2184(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2188(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2192(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2196(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2200(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2204(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	2208(%esp), %esi
-	adcl	$0, %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2072(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	112(%esp), %ecx         # 4-byte Reload
-	addl	2072(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2076(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2080(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2084(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2088(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	2092(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2096(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2100(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2104(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	2108(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2112(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	2116(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2120(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2124(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2128(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	2132(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2136(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2000(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	movl	112(%esp), %ecx         # 4-byte Reload
-	addl	2000(%esp), %ecx
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	2004(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2008(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2012(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	2016(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2020(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2024(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	2028(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	2032(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	2036(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	2040(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	2044(%esp), %edi
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	2048(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	2052(%esp), %ebp
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	2056(%esp), %esi
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	2060(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	2064(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1928(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	100(%esp), %ecx         # 4-byte Reload
-	addl	1928(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1932(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1936(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1940(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1944(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1948(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1952(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1956(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1960(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1964(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1968(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1972(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	1976(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	adcl	1980(%esp), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1984(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1988(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1992(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1856(%esp), %ecx
-	movl	2460(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	1856(%esp), %esi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1860(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1864(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1868(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1872(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1876(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1880(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1884(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1888(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	1892(%esp), %esi
-	adcl	1896(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	1900(%esp), %ebp
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1904(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1908(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1912(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1916(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1920(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1784(%esp), %ecx
-	movl	2452(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	1784(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1792(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	1804(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1808(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1816(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1820(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	1824(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1828(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1832(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1836(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1840(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1844(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1848(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1712(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1712(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1716(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1720(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1724(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1728(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1732(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1736(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1740(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	1744(%esp), %edi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1748(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1752(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1756(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1760(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	1764(%esp), %ebp
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	1768(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1772(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1776(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1640(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	1640(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1644(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1648(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1652(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1656(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1660(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1664(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1668(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1672(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1676(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1680(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1684(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	1688(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	adcl	1692(%esp), %esi
-	movl	%esi, %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1696(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1700(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1704(%esp), %esi
-	sbbl	%eax, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1568(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	80(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1568(%esp), %ebp
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1572(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1576(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1580(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1584(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	1588(%esp), %ebp
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1592(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1596(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1600(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1604(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1608(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1612(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1616(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	1620(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1624(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1628(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	1632(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1496(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	1496(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1500(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	1504(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1508(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1512(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	1516(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1520(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1528(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1532(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1540(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1544(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1548(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1552(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1424(%esp), %ecx
-	movl	2460(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	andl	$1, %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	1424(%esp), %eax
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1428(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1432(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1436(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1440(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1444(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1448(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1452(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1456(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1460(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1464(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	1472(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1476(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	1480(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1484(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1488(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	2456(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1352(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	1352(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1360(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1376(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1380(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1384(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1396(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1400(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	1404(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1408(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1412(%esp), %esi
-	adcl	1416(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1280(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	1280(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1284(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	1288(%esp), %ebp
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1296(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1300(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1304(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1308(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1312(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1316(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1320(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1324(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1328(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1332(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1336(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	1340(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1344(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	2456(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	2452(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	1208(%esp), %ecx
-	adcl	1212(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1232(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1240(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1244(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1248(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1252(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1256(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1260(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1264(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1272(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1136(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	1136(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1148(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %edi         # 4-byte Reload
-	adcl	1164(%esp), %edi
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	1188(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1192(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1200(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1064(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	1064(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1080(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1084(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	1088(%esp), %edi
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	1092(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1116(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	992(%esp), %ebp
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	996(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1000(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1004(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1008(%esp), %edi
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	1012(%esp), %ebp
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1016(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	adcl	1020(%esp), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1024(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1028(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	1032(%esp), %esi
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1036(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1040(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1044(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1048(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1052(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1056(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	920(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	920(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	932(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	adcl	936(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	956(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	968(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	848(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	848(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	856(%esp), %edi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %esi         # 4-byte Reload
-	adcl	868(%esp), %esi
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	896(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	776(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	776(%esp), %ecx
-	adcl	780(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	784(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	792(%esp), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	800(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	704(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	712(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	%ebp, %esi
-	adcl	728(%esp), %esi
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	732(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	752(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	92(%esp), %ecx          # 4-byte Reload
-	addl	632(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	652(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	656(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	664(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	676(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	680(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	92(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	560(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	576(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	592(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	608(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	612(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	488(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	488(%esp), %ecx
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	500(%esp), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	508(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	516(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	520(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	536(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	416(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	432(%esp), %edi
-	adcl	436(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	440(%esp), %esi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	448(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	116(%esp), %ecx         # 4-byte Reload
-	addl	344(%esp), %ecx
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	348(%esp), %ebp
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	356(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	360(%esp), %edi
-	adcl	364(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	116(%esp), %ecx         # 4-byte Reload
-	andl	$1, %ecx
-	addl	272(%esp), %esi
-	adcl	276(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	288(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	296(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	308(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	60(%eax), %eax
-	movl	%eax, (%esp)
-	leal	200(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	120(%esp), %ecx         # 4-byte Reload
-	addl	200(%esp), %ecx
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	212(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	220(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	232(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	244(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	2460(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	andl	$1, %ebp
-	addl	128(%esp), %esi
-	movl	104(%esp), %ebx         # 4-byte Reload
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	136(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	140(%esp), %ebx
-	movl	%ebx, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %edx         # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	148(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	152(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	156(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	160(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	164(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	168(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	172(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	176(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	180(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	184(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	188(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %edx         # 4-byte Reload
-	adcl	192(%esp), %edx
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%eax, %edx
-	movl	2460(%esp), %edi
-	subl	(%edi), %edx
-	movl	%ecx, %eax
-	sbbl	4(%edi), %eax
-	movl	%ebx, %ecx
-	sbbl	8(%edi), %ecx
-	movl	112(%esp), %ebx         # 4-byte Reload
-	sbbl	12(%edi), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebx         # 4-byte Reload
-	sbbl	16(%edi), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%edi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	sbbl	28(%edi), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	sbbl	32(%edi), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	sbbl	36(%edi), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	sbbl	40(%edi), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	sbbl	44(%edi), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	sbbl	48(%edi), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	sbbl	52(%edi), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	sbbl	56(%edi), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	sbbl	60(%edi), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %edi         # 4-byte Reload
-	sbbl	$0, %ebp
-	andl	$1, %ebp
-	movl	%ebp, %ebx
-	jne	.LBB242_2
-# BB#1:
-	movl	%edx, %edi
-.LBB242_2:
-	movl	2448(%esp), %edx
-	movl	%edi, (%edx)
-	testb	%bl, %bl
-	movl	108(%esp), %edi         # 4-byte Reload
-	jne	.LBB242_4
-# BB#3:
-	movl	%eax, %edi
-.LBB242_4:
-	movl	%edi, 4(%edx)
-	jne	.LBB242_6
-# BB#5:
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-.LBB242_6:
-	movl	104(%esp), %eax         # 4-byte Reload
-	movl	%eax, 8(%edx)
-	jne	.LBB242_8
-# BB#7:
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-.LBB242_8:
-	movl	112(%esp), %eax         # 4-byte Reload
-	movl	%eax, 12(%edx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	jne	.LBB242_10
-# BB#9:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB242_10:
-	movl	%eax, 16(%edx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_12
-# BB#11:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB242_12:
-	movl	%eax, 20(%edx)
-	jne	.LBB242_14
-# BB#13:
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-.LBB242_14:
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	%eax, 24(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_16
-# BB#15:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB242_16:
-	movl	%eax, 28(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_18
-# BB#17:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB242_18:
-	movl	%eax, 32(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_20
-# BB#19:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB242_20:
-	movl	%eax, 36(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_22
-# BB#21:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB242_22:
-	movl	%eax, 40(%edx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_24
-# BB#23:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB242_24:
-	movl	%eax, 44(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_26
-# BB#25:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB242_26:
-	movl	%eax, 48(%edx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_28
-# BB#27:
-	movl	52(%esp), %eax          # 4-byte Reload
-.LBB242_28:
-	movl	%eax, 52(%edx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_30
-# BB#29:
-	movl	56(%esp), %eax          # 4-byte Reload
-.LBB242_30:
-	movl	%eax, 56(%edx)
-	movl	116(%esp), %eax         # 4-byte Reload
-	jne	.LBB242_32
-# BB#31:
-	movl	120(%esp), %eax         # 4-byte Reload
-.LBB242_32:
-	movl	%eax, 60(%edx)
-	addl	$2428, %esp             # imm = 0x97C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end242:
-	.size	mcl_fp_mont16Lbmi2, .Lfunc_end242-mcl_fp_mont16Lbmi2
-
-	.globl	mcl_fp_montNF16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF16Lbmi2,@function
-mcl_fp_montNF16Lbmi2:                   # @mcl_fp_montNF16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2412, %esp             # imm = 0x96C
-	calll	.L243$pb
-.L243$pb:
-	popl	%ebx
-.Ltmp54:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp54-.L243$pb), %ebx
-	movl	2444(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2344(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	2344(%esp), %edi
-	movl	2348(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	2408(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2404(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	2400(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	2396(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2392(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2388(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2384(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	2380(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	2376(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	2372(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	2368(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	2364(%esp), %ebp
-	movl	2360(%esp), %esi
-	movl	2356(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2352(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	2272(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	2272(%esp), %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2276(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2280(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2284(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	2288(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	2292(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2296(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2300(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	2304(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	2308(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2312(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2316(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2320(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2324(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	2328(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2332(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2336(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2200(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	2264(%esp), %edx
-	movl	108(%esp), %ecx         # 4-byte Reload
-	addl	2200(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2204(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2208(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2212(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	2216(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2220(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2224(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	2228(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	2232(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	2236(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2244(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2248(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	2252(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2256(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	2260(%esp), %esi
-	adcl	$0, %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2128(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	2128(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2132(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2136(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2140(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2144(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2148(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2152(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	2156(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	2160(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	2164(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2168(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2172(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2176(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2180(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2184(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	2188(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	2192(%esp), %esi
-	movl	2440(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2056(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	2120(%esp), %eax
-	movl	84(%esp), %edx          # 4-byte Reload
-	addl	2056(%esp), %edx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	2060(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2064(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	2068(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2072(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	2076(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	2080(%esp), %edi
-	movl	%edi, %ebp
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	2084(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	2088(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2092(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2096(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	2100(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	2104(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	2108(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	2112(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	2116(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1984(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1984(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1988(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1992(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1996(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2000(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	2004(%esp), %edi
-	adcl	2008(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	2012(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2016(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2020(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2024(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2028(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2032(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	2036(%esp), %ebp
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	2040(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2044(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2048(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1912(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1976(%esp), %eax
-	movl	76(%esp), %edx          # 4-byte Reload
-	addl	1912(%esp), %edx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1916(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1920(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1924(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	1928(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1932(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1936(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1940(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1944(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	1948(%esp), %edi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1952(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1956(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1960(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	adcl	1964(%esp), %esi
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1968(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1972(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1840(%esp), %ecx
-	movl	2444(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	addl	1840(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1844(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1848(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1852(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1856(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1860(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1864(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1868(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1872(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1876(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1880(%esp), %edi
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1884(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1888(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1892(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1896(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1900(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1904(%esp), %esi
-	movl	2440(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1768(%esp), %ecx
-	movl	2436(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	movl	1832(%esp), %edx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	1768(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1772(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1776(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1780(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1784(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1792(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1804(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	adcl	1808(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1816(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1820(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1824(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1828(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1696(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1696(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1700(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1704(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1708(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1712(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1716(%esp), %ebp
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	1720(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1724(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1728(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1732(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1736(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	1740(%esp), %esi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1744(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1748(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1752(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1756(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1760(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1624(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1688(%esp), %edx
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	1624(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1628(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1632(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1636(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	1640(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	1644(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	1648(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1652(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1656(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1660(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1664(%esp), %esi
-	movl	%esi, %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1668(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1672(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1676(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1680(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1684(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1552(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1552(%esp), %esi
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	1556(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1564(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1568(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1572(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1576(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1580(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1584(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1588(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1592(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1596(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	1600(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1604(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1608(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1612(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	1616(%esp), %edi
-	movl	2440(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1480(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1544(%esp), %eax
-	addl	1480(%esp), %esi
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	1484(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	1488(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1492(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	1496(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	1500(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1504(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	1508(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	1512(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	1516(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	1520(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	adcl	1524(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1528(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	1532(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1536(%esp), %ebp
-	adcl	1540(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	adcl	$0, %edi
-	movl	%esi, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1408(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1408(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1412(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	1416(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1424(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1428(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1432(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1436(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1440(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1444(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1448(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1452(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1456(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1460(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1464(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	1468(%esp), %ebp
-	adcl	1472(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1336(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1400(%esp), %eax
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	1336(%esp), %ecx
-	adcl	1340(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1344(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	1348(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	1352(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1356(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	1360(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	1364(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	1368(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	1372(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %edx         # 4-byte Reload
-	adcl	1376(%esp), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	1380(%esp), %edi
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1384(%esp), %esi
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	1388(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	1392(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	1396(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1264(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1264(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1272(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1308(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	adcl	1312(%esp), %esi
-	movl	%esi, %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1320(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1324(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1328(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1192(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1256(%esp), %eax
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	1192(%esp), %ecx
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1196(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	1200(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	1204(%esp), %esi
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1208(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1212(%esp), %edi
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	1216(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	1220(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	1224(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %edx         # 4-byte Reload
-	adcl	1228(%esp), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1232(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	adcl	1236(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	1240(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	1244(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	1248(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	1252(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1120(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1120(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1132(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1140(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1144(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1148(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	1176(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1048(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1112(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	1048(%esp), %ecx
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	1052(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	1068(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	1076(%esp), %ebp
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1080(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1084(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1088(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1092(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1100(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	976(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	976(%esp), %edi
-	adcl	980(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	1000(%esp), %edi
-	adcl	1004(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	1008(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	1016(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	904(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	968(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	904(%esp), %eax
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	908(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	912(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	916(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	920(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	924(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	928(%esp), %edi
-	adcl	932(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	936(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	adcl	940(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	944(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	948(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	952(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	956(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	960(%esp), %ebp
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	964(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	832(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	832(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	856(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	872(%esp), %esi
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	876(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	888(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	892(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	824(%esp), %edx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	760(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	796(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	800(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	808(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	816(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	688(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	688(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	716(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	732(%esp), %ebp
-	adcl	736(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	616(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	680(%esp), %edx
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	616(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	624(%esp), %edi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	640(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	656(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	672(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	544(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	552(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	560(%esp), %edi
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	564(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	600(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	536(%esp), %edx
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	472(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	484(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	adcl	488(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	496(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	400(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	400(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	412(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	420(%esp), %edi
-	adcl	424(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	444(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	392(%esp), %edx
-	movl	92(%esp), %ecx          # 4-byte Reload
-	addl	328(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	336(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	344(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	352(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	368(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	256(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	256(%esp), %ebp
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	260(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	268(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	280(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	284(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	60(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	248(%esp), %edx
-	movl	%edi, %ecx
-	addl	184(%esp), %ecx
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	188(%esp), %edi
-	adcl	192(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	196(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	208(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	112(%esp), %esi
-	movl	%edi, %eax
-	adcl	116(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	120(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	adcl	124(%esp), %ebp
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	128(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %ebx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	132(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	136(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	140(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	144(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	148(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	152(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	156(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	160(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	164(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	168(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	172(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	176(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	%eax, %edx
-	movl	2444(%esp), %esi
-	subl	(%esi), %edx
-	sbbl	4(%esi), %edi
-	movl	%ebp, %ecx
-	sbbl	8(%esi), %ecx
-	movl	%ebx, %eax
-	sbbl	12(%esi), %eax
-	movl	80(%esp), %ebx          # 4-byte Reload
-	sbbl	16(%esi), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	68(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%esi), %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	64(%esp), %ebx          # 4-byte Reload
-	sbbl	24(%esi), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%esi), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebx          # 4-byte Reload
-	sbbl	32(%esi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	sbbl	36(%esi), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebx          # 4-byte Reload
-	sbbl	40(%esi), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebx          # 4-byte Reload
-	sbbl	44(%esi), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	48(%esi), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebx          # 4-byte Reload
-	sbbl	52(%esi), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebx          # 4-byte Reload
-	sbbl	56(%esi), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebx         # 4-byte Reload
-	sbbl	60(%esi), %ebx
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	testl	%ebx, %ebx
-	js	.LBB243_2
-# BB#1:
-	movl	%edx, %esi
-.LBB243_2:
-	movl	2432(%esp), %edx
-	movl	%esi, (%edx)
-	movl	108(%esp), %esi         # 4-byte Reload
-	js	.LBB243_4
-# BB#3:
-	movl	%edi, %esi
-.LBB243_4:
-	movl	%esi, 4(%edx)
-	js	.LBB243_6
-# BB#5:
-	movl	%ecx, %ebp
-.LBB243_6:
-	movl	%ebp, 8(%edx)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	js	.LBB243_8
-# BB#7:
-	movl	%eax, %ecx
-.LBB243_8:
-	movl	%ecx, 12(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	js	.LBB243_10
-# BB#9:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB243_10:
-	movl	%eax, 16(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	js	.LBB243_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB243_12:
-	movl	%eax, 20(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB243_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB243_14:
-	movl	%eax, 24(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB243_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB243_16:
-	movl	%eax, 28(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB243_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB243_18:
-	movl	%eax, 32(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB243_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB243_20:
-	movl	%eax, 36(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB243_22
-# BB#21:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB243_22:
-	movl	%eax, 40(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB243_24
-# BB#23:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB243_24:
-	movl	%eax, 44(%edx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	js	.LBB243_26
-# BB#25:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB243_26:
-	movl	%eax, 48(%edx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB243_28
-# BB#27:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB243_28:
-	movl	%eax, 52(%edx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	js	.LBB243_30
-# BB#29:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB243_30:
-	movl	%eax, 56(%edx)
-	movl	104(%esp), %eax         # 4-byte Reload
-	js	.LBB243_32
-# BB#31:
-	movl	84(%esp), %eax          # 4-byte Reload
-.LBB243_32:
-	movl	%eax, 60(%edx)
-	addl	$2412, %esp             # imm = 0x96C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end243:
-	.size	mcl_fp_montNF16Lbmi2, .Lfunc_end243-mcl_fp_montNF16Lbmi2
-
-	.globl	mcl_fp_montRed16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed16Lbmi2,@function
-mcl_fp_montRed16Lbmi2:                  # @mcl_fp_montRed16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1356, %esp             # imm = 0x54C
-	calll	.L244$pb
-.L244$pb:
-	popl	%eax
-.Ltmp55:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp55-.L244$pb), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1384(%esp), %edx
-	movl	-4(%edx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1380(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 112(%esp)         # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	imull	%eax, %ebx
-	movl	124(%ecx), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%ecx), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	112(%ecx), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	108(%ecx), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	104(%ecx), %esi
-	movl	%esi, 152(%esp)         # 4-byte Spill
-	movl	100(%ecx), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	96(%ecx), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	92(%ecx), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	88(%ecx), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	84(%ecx), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	80(%ecx), %edi
-	movl	%edi, 148(%esp)         # 4-byte Spill
-	movl	76(%ecx), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	%esi, 192(%esp)         # 4-byte Spill
-	movl	68(%ecx), %edi
-	movl	%edi, 204(%esp)         # 4-byte Spill
-	movl	64(%ecx), %esi
-	movl	%esi, 200(%esp)         # 4-byte Spill
-	movl	60(%ecx), %edi
-	movl	%edi, 180(%esp)         # 4-byte Spill
-	movl	56(%ecx), %edi
-	movl	%edi, 164(%esp)         # 4-byte Spill
-	movl	52(%ecx), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	44(%ecx), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	40(%ecx), %ebp
-	movl	36(%ecx), %edi
-	movl	32(%ecx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	28(%ecx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	24(%ecx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	20(%ecx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	16(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	12(%ecx), %esi
-	movl	8(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	60(%edx), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	56(%edx), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	52(%edx), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	1288(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	1288(%esp), %eax
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1300(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1312(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1320(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1324(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	adcl	1328(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	adcl	$0, 204(%esp)           # 4-byte Folded Spill
-	adcl	$0, 192(%esp)           # 4-byte Folded Spill
-	movl	196(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	movl	132(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	sbbl	%eax, %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1216(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	movl	112(%esp), %ecx         # 4-byte Reload
-	andl	$1, %ecx
-	addl	1216(%esp), %esi
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	1220(%esp), %edx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1232(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1240(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1244(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1248(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1252(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1256(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %esi         # 4-byte Reload
-	adcl	1260(%esp), %esi
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	1264(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1272(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	adcl	$0, 192(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 196(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	movl	156(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 132(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1144(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	1144(%esp), %ebp
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1148(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	1184(%esp), %esi
-	movl	%esi, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	1188(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1200(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1204(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1208(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	adcl	$0, 196(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	movl	168(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 156(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1072(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	1072(%esp), %esi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1076(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1080(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1084(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1088(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1092(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 168(%esp)         # 4-byte Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1000(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	1000(%esp), %edi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1004(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1044(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	movl	188(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	movl	172(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	928(%esp), %esi
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	932(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	936(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 188(%esp)         # 4-byte Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 172(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	movl	144(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	movl	100(%esp), %ebp         # 4-byte Reload
-	imull	%ebp, %eax
-	movl	%eax, (%esp)
-	leal	856(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	856(%esp), %edi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	860(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	movl	176(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 144(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	%ebp, %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	784(%esp), %esi
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	788(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %ebp         # 4-byte Reload
-	adcl	828(%esp), %ebp
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 176(%esp)         # 4-byte Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	movl	156(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	712(%esp), %edi
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	716(%esp), %ecx
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	adcl	752(%esp), %ebp
-	movl	%ebp, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %edi         # 4-byte Reload
-	adcl	756(%esp), %edi
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	640(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	640(%esp), %esi
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	644(%esp), %ecx
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %esi         # 4-byte Reload
-	adcl	668(%esp), %esi
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	adcl	680(%esp), %edi
-	movl	%edi, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	1384(%esp), %eax
-	movl	%eax, %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	568(%esp), %ebp
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	572(%esp), %ecx
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %ebp         # 4-byte Reload
-	adcl	588(%esp), %ebp
-	adcl	592(%esp), %esi
-	movl	%esi, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %esi         # 4-byte Reload
-	adcl	596(%esp), %esi
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	adcl	632(%esp), %edi
-	movl	%edi, 152(%esp)         # 4-byte Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	496(%esp), %edi
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %edi         # 4-byte Reload
-	adcl	508(%esp), %edi
-	adcl	512(%esp), %ebp
-	movl	%ebp, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	adcl	520(%esp), %esi
-	movl	%esi, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %ebp         # 4-byte Reload
-	adcl	532(%esp), %ebp
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	424(%esp), %esi
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	428(%esp), %eax
-	adcl	432(%esp), %edi
-	movl	%edi, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %ecx         # 4-byte Reload
-	adcl	436(%esp), %ecx
-	movl	%ecx, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %ecx         # 4-byte Reload
-	adcl	440(%esp), %ecx
-	movl	%ecx, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %ecx         # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	%ecx, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %ecx         # 4-byte Reload
-	adcl	448(%esp), %ecx
-	movl	%ecx, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %ecx         # 4-byte Reload
-	adcl	452(%esp), %ecx
-	movl	%ecx, 196(%esp)         # 4-byte Spill
-	adcl	456(%esp), %ebp
-	movl	184(%esp), %ecx         # 4-byte Reload
-	adcl	460(%esp), %ecx
-	movl	%ecx, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %ecx         # 4-byte Reload
-	adcl	464(%esp), %ecx
-	movl	%ecx, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %ecx         # 4-byte Reload
-	adcl	468(%esp), %ecx
-	movl	%ecx, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %ecx         # 4-byte Reload
-	adcl	472(%esp), %ecx
-	movl	%ecx, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %ecx         # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	480(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	484(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	488(%esp), %ecx
-	movl	%ecx, 144(%esp)         # 4-byte Spill
-	movl	132(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%eax, %esi
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	352(%esp), %esi
-	movl	164(%esp), %esi         # 4-byte Reload
-	adcl	356(%esp), %esi
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	adcl	380(%esp), %ebp
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	adcl	416(%esp), %edi
-	movl	%edi, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	280(%esp), %esi
-	movl	180(%esp), %ecx         # 4-byte Reload
-	adcl	284(%esp), %ecx
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	adcl	304(%esp), %ebp
-	movl	%ebp, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %esi         # 4-byte Reload
-	adcl	316(%esp), %esi
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	344(%esp), %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	movl	124(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %ebp
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	208(%esp), %ebp
-	movl	200(%esp), %edx         # 4-byte Reload
-	adcl	212(%esp), %edx
-	movl	%edx, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %ecx         # 4-byte Reload
-	adcl	220(%esp), %ecx
-	movl	%ecx, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %ebp         # 4-byte Reload
-	adcl	228(%esp), %ebp
-	movl	%ebp, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	%eax, %ebx
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	adcl	240(%esp), %esi
-	movl	%esi, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	272(%esp), %edi
-	movl	%edi, 124(%esp)         # 4-byte Spill
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	movl	%edx, %eax
-	subl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	204(%esp), %esi         # 4-byte Reload
-	sbbl	12(%esp), %esi          # 4-byte Folded Reload
-	sbbl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	196(%esp), %eax         # 4-byte Reload
-	sbbl	20(%esp), %eax          # 4-byte Folded Reload
-	sbbl	28(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 100(%esp)         # 4-byte Spill
-	movl	188(%esp), %ebx         # 4-byte Reload
-	sbbl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 104(%esp)         # 4-byte Spill
-	movl	168(%esp), %ebx         # 4-byte Reload
-	sbbl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 108(%esp)         # 4-byte Spill
-	movl	176(%esp), %ebx         # 4-byte Reload
-	sbbl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 112(%esp)         # 4-byte Spill
-	movl	172(%esp), %ebx         # 4-byte Reload
-	sbbl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 116(%esp)         # 4-byte Spill
-	movl	152(%esp), %ebx         # 4-byte Reload
-	sbbl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %ebx         # 4-byte Reload
-	sbbl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 136(%esp)         # 4-byte Spill
-	movl	144(%esp), %ebx         # 4-byte Reload
-	sbbl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 140(%esp)         # 4-byte Spill
-	movl	132(%esp), %ebx         # 4-byte Reload
-	sbbl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 160(%esp)         # 4-byte Spill
-	movl	128(%esp), %ebx         # 4-byte Reload
-	sbbl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 164(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebx         # 4-byte Reload
-	sbbl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 180(%esp)         # 4-byte Spill
-	sbbl	$0, %edi
-	andl	$1, %edi
-	movl	%edi, %ebx
-	jne	.LBB244_2
-# BB#1:
-	movl	%edx, 200(%esp)         # 4-byte Spill
-.LBB244_2:
-	movl	1376(%esp), %edx
-	movl	200(%esp), %edi         # 4-byte Reload
-	movl	%edi, (%edx)
-	testb	%bl, %bl
-	jne	.LBB244_4
-# BB#3:
-	movl	%esi, 204(%esp)         # 4-byte Spill
-.LBB244_4:
-	movl	204(%esp), %esi         # 4-byte Reload
-	movl	%esi, 4(%edx)
-	movl	192(%esp), %esi         # 4-byte Reload
-	jne	.LBB244_6
-# BB#5:
-	movl	%ecx, %esi
-.LBB244_6:
-	movl	%esi, 8(%edx)
-	movl	196(%esp), %ecx         # 4-byte Reload
-	jne	.LBB244_8
-# BB#7:
-	movl	%eax, %ecx
-.LBB244_8:
-	movl	%ecx, 12(%edx)
-	movl	128(%esp), %esi         # 4-byte Reload
-	movl	148(%esp), %eax         # 4-byte Reload
-	jne	.LBB244_10
-# BB#9:
-	movl	%ebp, %eax
-.LBB244_10:
-	movl	%eax, 16(%edx)
-	movl	124(%esp), %ecx         # 4-byte Reload
-	movl	176(%esp), %eax         # 4-byte Reload
-	movl	184(%esp), %ebp         # 4-byte Reload
-	jne	.LBB244_12
-# BB#11:
-	movl	100(%esp), %ebp         # 4-byte Reload
-.LBB244_12:
-	movl	%ebp, 20(%edx)
-	movl	152(%esp), %ebp         # 4-byte Reload
-	movl	188(%esp), %ebx         # 4-byte Reload
-	jne	.LBB244_14
-# BB#13:
-	movl	104(%esp), %ebx         # 4-byte Reload
-.LBB244_14:
-	movl	%ebx, 24(%edx)
-	movl	156(%esp), %ebx         # 4-byte Reload
-	movl	168(%esp), %edi         # 4-byte Reload
-	jne	.LBB244_16
-# BB#15:
-	movl	108(%esp), %edi         # 4-byte Reload
-.LBB244_16:
-	movl	%edi, 28(%edx)
-	movl	144(%esp), %edi         # 4-byte Reload
-	jne	.LBB244_18
-# BB#17:
-	movl	112(%esp), %eax         # 4-byte Reload
-.LBB244_18:
-	movl	%eax, 32(%edx)
-	jne	.LBB244_20
-# BB#19:
-	movl	116(%esp), %eax         # 4-byte Reload
-	movl	%eax, 172(%esp)         # 4-byte Spill
-.LBB244_20:
-	movl	172(%esp), %eax         # 4-byte Reload
-	movl	%eax, 36(%edx)
-	jne	.LBB244_22
-# BB#21:
-	movl	120(%esp), %ebp         # 4-byte Reload
-.LBB244_22:
-	movl	%ebp, 40(%edx)
-	movl	132(%esp), %eax         # 4-byte Reload
-	jne	.LBB244_24
-# BB#23:
-	movl	136(%esp), %ebx         # 4-byte Reload
-.LBB244_24:
-	movl	%ebx, 44(%edx)
-	jne	.LBB244_26
-# BB#25:
-	movl	140(%esp), %edi         # 4-byte Reload
-.LBB244_26:
-	movl	%edi, 48(%edx)
-	jne	.LBB244_28
-# BB#27:
-	movl	160(%esp), %eax         # 4-byte Reload
-.LBB244_28:
-	movl	%eax, 52(%edx)
-	jne	.LBB244_30
-# BB#29:
-	movl	164(%esp), %esi         # 4-byte Reload
-.LBB244_30:
-	movl	%esi, 56(%edx)
-	jne	.LBB244_32
-# BB#31:
-	movl	180(%esp), %ecx         # 4-byte Reload
-.LBB244_32:
-	movl	%ecx, 60(%edx)
-	addl	$1356, %esp             # imm = 0x54C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end244:
-	.size	mcl_fp_montRed16Lbmi2, .Lfunc_end244-mcl_fp_montRed16Lbmi2
-
-	.globl	mcl_fp_addPre16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre16Lbmi2,@function
-mcl_fp_addPre16Lbmi2:                   # @mcl_fp_addPre16Lbmi2
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %ebx
-	adcl	8(%ecx), %ebx
-	movl	16(%esp), %edi
-	movl	%edx, (%edi)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%edi)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%ebx, 8(%edi)
-	movl	20(%eax), %ebx
-	movl	%edx, 12(%edi)
-	movl	20(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	24(%eax), %ebx
-	movl	%esi, 16(%edi)
-	movl	24(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	28(%eax), %ebx
-	movl	%edx, 20(%edi)
-	movl	28(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	32(%eax), %ebx
-	movl	%esi, 24(%edi)
-	movl	32(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	36(%eax), %ebx
-	movl	%edx, 28(%edi)
-	movl	36(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	40(%eax), %ebx
-	movl	%esi, 32(%edi)
-	movl	40(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	44(%eax), %ebx
-	movl	%edx, 36(%edi)
-	movl	44(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	48(%eax), %ebx
-	movl	%esi, 40(%edi)
-	movl	48(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	52(%eax), %ebx
-	movl	%edx, 44(%edi)
-	movl	52(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	56(%eax), %ebx
-	movl	%esi, 48(%edi)
-	movl	56(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	%edx, 52(%edi)
-	movl	%esi, 56(%edi)
-	movl	60(%eax), %eax
-	movl	60(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 60(%edi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end245:
-	.size	mcl_fp_addPre16Lbmi2, .Lfunc_end245-mcl_fp_addPre16Lbmi2
-
-	.globl	mcl_fp_subPre16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre16Lbmi2,@function
-mcl_fp_subPre16Lbmi2:                   # @mcl_fp_subPre16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebp
-	sbbl	8(%edx), %ebp
-	movl	20(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebx)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebp, 8(%ebx)
-	movl	20(%edx), %ebp
-	movl	%esi, 12(%ebx)
-	movl	20(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	24(%edx), %ebp
-	movl	%edi, 16(%ebx)
-	movl	24(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	28(%edx), %ebp
-	movl	%esi, 20(%ebx)
-	movl	28(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	32(%edx), %ebp
-	movl	%edi, 24(%ebx)
-	movl	32(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	36(%edx), %ebp
-	movl	%esi, 28(%ebx)
-	movl	36(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	40(%edx), %ebp
-	movl	%edi, 32(%ebx)
-	movl	40(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	44(%edx), %ebp
-	movl	%esi, 36(%ebx)
-	movl	44(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	48(%edx), %ebp
-	movl	%edi, 40(%ebx)
-	movl	48(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	52(%edx), %ebp
-	movl	%esi, 44(%ebx)
-	movl	52(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	56(%edx), %ebp
-	movl	%edi, 48(%ebx)
-	movl	56(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	%esi, 52(%ebx)
-	movl	%edi, 56(%ebx)
-	movl	60(%edx), %edx
-	movl	60(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 60(%ebx)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end246:
-	.size	mcl_fp_subPre16Lbmi2, .Lfunc_end246-mcl_fp_subPre16Lbmi2
-
-	.globl	mcl_fp_shr1_16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_16Lbmi2,@function
-mcl_fp_shr1_16Lbmi2:                    # @mcl_fp_shr1_16Lbmi2
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 40(%ecx)
-	movl	48(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 44(%ecx)
-	movl	52(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 48(%ecx)
-	movl	56(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 52(%ecx)
-	movl	60(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 56(%ecx)
-	shrl	%eax
-	movl	%eax, 60(%ecx)
-	popl	%esi
-	retl
-.Lfunc_end247:
-	.size	mcl_fp_shr1_16Lbmi2, .Lfunc_end247-mcl_fp_shr1_16Lbmi2
-
-	.globl	mcl_fp_add16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add16Lbmi2,@function
-mcl_fp_add16Lbmi2:                      # @mcl_fp_add16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$56, %esp
-	movl	84(%esp), %edx
-	movl	(%edx), %esi
-	movl	4(%edx), %ebp
-	movl	80(%esp), %ecx
-	addl	(%ecx), %esi
-	movl	%esi, %ebx
-	adcl	4(%ecx), %ebp
-	movl	8(%edx), %eax
-	adcl	8(%ecx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	12(%ecx), %esi
-	movl	16(%ecx), %edi
-	adcl	12(%edx), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	16(%edx), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	20(%ecx), %eax
-	adcl	20(%edx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%ecx), %eax
-	adcl	24(%edx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%ecx), %eax
-	adcl	28(%edx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%ecx), %eax
-	adcl	32(%edx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%ecx), %eax
-	adcl	36(%edx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	adcl	40(%edx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	44(%ecx), %eax
-	adcl	44(%edx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	48(%ecx), %eax
-	adcl	48(%edx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	52(%ecx), %eax
-	adcl	52(%edx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	56(%ecx), %esi
-	adcl	56(%edx), %esi
-	movl	60(%ecx), %ecx
-	adcl	60(%edx), %ecx
-	movl	76(%esp), %edx
-	movl	%ebx, (%edx)
-	movl	%ebx, %eax
-	movl	%ebp, 4(%edx)
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%edx)
-	movl	48(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 12(%edx)
-	movl	%edi, 16(%edx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%edx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%edx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%edx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%edx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%edx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%edx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 44(%edx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 48(%edx)
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	%edi, 52(%edx)
-	movl	%esi, 56(%edx)
-	movl	%ecx, 60(%edx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	88(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	4(%edi), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	8(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	sbbl	12(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	sbbl	16(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	sbbl	20(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	sbbl	24(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	sbbl	44(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	sbbl	48(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	8(%esp), %eax           # 4-byte Reload
-	sbbl	52(%edi), %eax
-	movl	%eax, %ebp
-	sbbl	56(%edi), %esi
-	sbbl	60(%edi), %ecx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB248_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %edi           # 4-byte Reload
-	movl	%edi, (%edx)
-	movl	(%esp), %edi            # 4-byte Reload
-	movl	%edi, 4(%edx)
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%edx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%edx)
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%edx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%edx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%edx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%edx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%edx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%edx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%edx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 44(%edx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 48(%edx)
-	movl	%ebp, 52(%edx)
-	movl	%esi, 56(%edx)
-	movl	%ecx, 60(%edx)
-.LBB248_2:                              # %carry
-	addl	$56, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end248:
-	.size	mcl_fp_add16Lbmi2, .Lfunc_end248-mcl_fp_add16Lbmi2
-
-	.globl	mcl_fp_addNF16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF16Lbmi2,@function
-mcl_fp_addNF16Lbmi2:                    # @mcl_fp_addNF16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$124, %esp
-	movl	152(%esp), %edx
-	movl	(%edx), %eax
-	movl	4(%edx), %ecx
-	movl	148(%esp), %esi
-	addl	(%esi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	4(%esi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	60(%edx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	56(%edx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	52(%edx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	48(%edx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	44(%edx), %edi
-	movl	40(%edx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	36(%edx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	32(%edx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	28(%edx), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	24(%edx), %eax
-	movl	20(%edx), %ebp
-	movl	16(%edx), %ebx
-	movl	12(%edx), %ecx
-	movl	8(%edx), %edx
-	adcl	8(%esi), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	12(%esi), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	16(%esi), %ebx
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	adcl	24(%esi), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	28(%esi), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	32(%esi), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	36(%esi), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esi), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	44(%esi), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	48(%esi), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	52(%esi), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	56(%esi), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	60(%esi), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	156(%esp), %edi
-	movl	80(%esp), %esi          # 4-byte Reload
-	subl	(%edi), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	sbbl	4(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	8(%edi), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	sbbl	12(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	16(%edi), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%edi), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	sbbl	48(%edi), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	52(%edi), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %ebx
-	sbbl	56(%edi), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebx         # 4-byte Reload
-	sbbl	60(%edi), %ebx
-	movl	80(%esp), %edi          # 4-byte Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	testl	%ebx, %ebx
-	js	.LBB249_2
-# BB#1:
-	movl	%esi, %edi
-.LBB249_2:
-	movl	144(%esp), %ebx
-	movl	%edi, (%ebx)
-	movl	84(%esp), %edx          # 4-byte Reload
-	js	.LBB249_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-.LBB249_4:
-	movl	%edx, 4(%ebx)
-	movl	68(%esp), %edx          # 4-byte Reload
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB249_6
-# BB#5:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB249_6:
-	movl	%eax, 8(%ebx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	movl	88(%esp), %ecx          # 4-byte Reload
-	movl	64(%esp), %esi          # 4-byte Reload
-	js	.LBB249_8
-# BB#7:
-	movl	8(%esp), %esi           # 4-byte Reload
-.LBB249_8:
-	movl	%esi, 12(%ebx)
-	movl	108(%esp), %esi         # 4-byte Reload
-	js	.LBB249_10
-# BB#9:
-	movl	12(%esp), %edx          # 4-byte Reload
-.LBB249_10:
-	movl	%edx, 16(%ebx)
-	movl	112(%esp), %edi         # 4-byte Reload
-	movl	104(%esp), %ebp         # 4-byte Reload
-	js	.LBB249_12
-# BB#11:
-	movl	16(%esp), %edx          # 4-byte Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-.LBB249_12:
-	movl	72(%esp), %edx          # 4-byte Reload
-	movl	%edx, 20(%ebx)
-	js	.LBB249_14
-# BB#13:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB249_14:
-	movl	%ecx, 24(%ebx)
-	js	.LBB249_16
-# BB#15:
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-.LBB249_16:
-	movl	116(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 28(%ebx)
-	js	.LBB249_18
-# BB#17:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB249_18:
-	movl	%eax, 32(%ebx)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	js	.LBB249_20
-# BB#19:
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 120(%esp)         # 4-byte Spill
-.LBB249_20:
-	movl	120(%esp), %eax         # 4-byte Reload
-	movl	%eax, 36(%ebx)
-	js	.LBB249_22
-# BB#21:
-	movl	36(%esp), %ebp          # 4-byte Reload
-.LBB249_22:
-	movl	%ebp, 40(%ebx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	js	.LBB249_24
-# BB#23:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB249_24:
-	movl	%eax, 44(%ebx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	js	.LBB249_26
-# BB#25:
-	movl	44(%esp), %esi          # 4-byte Reload
-.LBB249_26:
-	movl	%esi, 48(%ebx)
-	js	.LBB249_28
-# BB#27:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB249_28:
-	movl	%eax, 52(%ebx)
-	js	.LBB249_30
-# BB#29:
-	movl	52(%esp), %ecx          # 4-byte Reload
-.LBB249_30:
-	movl	%ecx, 56(%ebx)
-	js	.LBB249_32
-# BB#31:
-	movl	56(%esp), %edi          # 4-byte Reload
-.LBB249_32:
-	movl	%edi, 60(%ebx)
-	addl	$124, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end249:
-	.size	mcl_fp_addNF16Lbmi2, .Lfunc_end249-mcl_fp_addNF16Lbmi2
-
-	.globl	mcl_fp_sub16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub16Lbmi2,@function
-mcl_fp_sub16Lbmi2:                      # @mcl_fp_sub16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$60, %esp
-	movl	84(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	88(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	sbbl	32(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	36(%esi), %eax
-	sbbl	36(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esi), %eax
-	sbbl	40(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	44(%esi), %edx
-	sbbl	44(%edi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	48(%esi), %ecx
-	sbbl	48(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	52(%esi), %eax
-	sbbl	52(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	56(%esi), %ebp
-	sbbl	56(%edi), %ebp
-	movl	60(%esi), %esi
-	sbbl	60(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	80(%esp), %ebx
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%ebx)
-	movl	%edx, 44(%ebx)
-	movl	%ecx, 48(%ebx)
-	movl	%eax, 52(%ebx)
-	movl	%ebp, 56(%ebx)
-	movl	%esi, 60(%ebx)
-	je	.LBB250_2
-# BB#1:                                 # %carry
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	92(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	8(%esi), %edi
-	movl	12(%esi), %eax
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	40(%esi), %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	44(%esi), %eax
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 40(%ebx)
-	movl	48(%esi), %ecx
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 44(%ebx)
-	movl	52(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 48(%ebx)
-	movl	%eax, 52(%ebx)
-	movl	56(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 56(%ebx)
-	movl	60(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 60(%ebx)
-.LBB250_2:                              # %nocarry
-	addl	$60, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end250:
-	.size	mcl_fp_sub16Lbmi2, .Lfunc_end250-mcl_fp_sub16Lbmi2
-
-	.globl	mcl_fp_subNF16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF16Lbmi2,@function
-mcl_fp_subNF16Lbmi2:                    # @mcl_fp_subNF16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$104, %esp
-	movl	128(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edx
-	movl	132(%esp), %edi
-	subl	(%edi), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	60(%ecx), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	56(%ecx), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	52(%ecx), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	48(%ecx), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	36(%ecx), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	28(%ecx), %ebp
-	movl	24(%ecx), %ebx
-	movl	20(%ecx), %esi
-	movl	16(%ecx), %edx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	sbbl	28(%edi), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	sbbl	36(%edi), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	48(%edi), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	52(%edi), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	sbbl	56(%edi), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	sbbl	60(%edi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	sarl	$31, %eax
-	movl	136(%esp), %esi
-	movl	60(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	56(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	52(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	48(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	44(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	40(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	36(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	32(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	28(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	24(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	20(%esi), %ebp
-	andl	%eax, %ebp
-	movl	16(%esi), %ebx
-	andl	%eax, %ebx
-	movl	12(%esi), %edi
-	andl	%eax, %edi
-	movl	8(%esi), %edx
-	andl	%eax, %edx
-	movl	4(%esi), %ecx
-	andl	%eax, %ecx
-	andl	(%esi), %eax
-	addl	64(%esp), %eax          # 4-byte Folded Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	124(%esp), %esi
-	movl	%eax, (%esi)
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esi)
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edx, 8(%esi)
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 12(%esi)
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebx, 16(%esi)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 20(%esi)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 24(%esi)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 28(%esi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	%eax, 32(%esi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 36(%esi)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 40(%esi)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 44(%esi)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	92(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 48(%esi)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	movl	%eax, 56(%esi)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esi)
-	addl	$104, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end251:
-	.size	mcl_fp_subNF16Lbmi2, .Lfunc_end251-mcl_fp_subNF16Lbmi2
-
-	.globl	mcl_fpDbl_add16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add16Lbmi2,@function
-mcl_fpDbl_add16Lbmi2:                   # @mcl_fpDbl_add16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$116, %esp
-	movl	144(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edx
-	movl	140(%esp), %ebx
-	addl	(%ebx), %esi
-	adcl	4(%ebx), %edx
-	movl	8(%ecx), %edi
-	adcl	8(%ebx), %edi
-	movl	12(%ebx), %ebp
-	movl	136(%esp), %eax
-	movl	%esi, (%eax)
-	movl	16(%ebx), %esi
-	adcl	12(%ecx), %ebp
-	adcl	16(%ecx), %esi
-	movl	%edx, 4(%eax)
-	movl	72(%ecx), %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	%edi, 8(%eax)
-	movl	20(%ecx), %edx
-	movl	%ebp, 12(%eax)
-	movl	20(%ebx), %edi
-	adcl	%edx, %edi
-	movl	24(%ecx), %edx
-	movl	%esi, 16(%eax)
-	movl	24(%ebx), %esi
-	adcl	%edx, %esi
-	movl	28(%ecx), %edx
-	movl	%edi, 20(%eax)
-	movl	28(%ebx), %edi
-	adcl	%edx, %edi
-	movl	32(%ecx), %edx
-	movl	%esi, 24(%eax)
-	movl	32(%ebx), %esi
-	adcl	%edx, %esi
-	movl	36(%ecx), %edx
-	movl	%edi, 28(%eax)
-	movl	36(%ebx), %edi
-	adcl	%edx, %edi
-	movl	40(%ecx), %edx
-	movl	%esi, 32(%eax)
-	movl	40(%ebx), %esi
-	adcl	%edx, %esi
-	movl	44(%ecx), %edx
-	movl	%edi, 36(%eax)
-	movl	44(%ebx), %edi
-	adcl	%edx, %edi
-	movl	48(%ecx), %edx
-	movl	%esi, 40(%eax)
-	movl	48(%ebx), %esi
-	adcl	%edx, %esi
-	movl	52(%ecx), %edx
-	movl	%edi, 44(%eax)
-	movl	52(%ebx), %edi
-	adcl	%edx, %edi
-	movl	56(%ecx), %edx
-	movl	%esi, 48(%eax)
-	movl	56(%ebx), %esi
-	adcl	%edx, %esi
-	movl	60(%ecx), %edx
-	movl	%edi, 52(%eax)
-	movl	60(%ebx), %ebp
-	adcl	%edx, %ebp
-	movl	64(%ecx), %edx
-	movl	%esi, 56(%eax)
-	movl	64(%ebx), %esi
-	adcl	%edx, %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	68(%ecx), %edx
-	movl	%ebp, 60(%eax)
-	movl	68(%ebx), %eax
-	adcl	%edx, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	72(%ebx), %eax
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%ecx), %ebp
-	movl	76(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%ecx), %ebp
-	movl	80(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	84(%ecx), %ebp
-	movl	84(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	88(%ecx), %ebp
-	movl	88(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%ecx), %ebp
-	movl	92(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%ecx), %ebp
-	movl	96(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	100(%ecx), %ebp
-	movl	100(%ebx), %edx
-	adcl	%ebp, %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	104(%ecx), %ebp
-	movl	104(%ebx), %edx
-	adcl	%ebp, %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	108(%ecx), %ebp
-	movl	108(%ebx), %edx
-	adcl	%ebp, %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	112(%ecx), %edx
-	movl	112(%ebx), %ebp
-	adcl	%edx, %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	116(%ecx), %edx
-	movl	116(%ebx), %esi
-	adcl	%edx, %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	120(%ecx), %edx
-	movl	120(%ebx), %edi
-	adcl	%edx, %edi
-	movl	124(%ecx), %ecx
-	movl	124(%ebx), %esi
-	adcl	%ecx, %esi
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	148(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	subl	(%edx), %ebx
-	movl	%ebx, (%esp)            # 4-byte Spill
-	movl	76(%esp), %ebx          # 4-byte Reload
-	sbbl	4(%edx), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	80(%esp), %ebx          # 4-byte Reload
-	sbbl	8(%edx), %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	84(%esp), %ebx          # 4-byte Reload
-	sbbl	12(%edx), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebx         # 4-byte Reload
-	sbbl	16(%edx), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%edx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	108(%esp), %ebx         # 4-byte Reload
-	sbbl	24(%edx), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%edx), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	sbbl	32(%edx), %ebx
-	movl	112(%esp), %eax         # 4-byte Reload
-	sbbl	36(%edx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	sbbl	44(%edx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	48(%edx), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ebp
-	sbbl	52(%edx), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	%edi, %ebp
-	sbbl	56(%edx), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	%esi, %ebp
-	sbbl	60(%edx), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB252_2
-# BB#1:
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-.LBB252_2:
-	testb	%cl, %cl
-	movl	72(%esp), %ecx          # 4-byte Reload
-	jne	.LBB252_4
-# BB#3:
-	movl	(%esp), %ecx            # 4-byte Reload
-.LBB252_4:
-	movl	136(%esp), %ebx
-	movl	%ecx, 64(%ebx)
-	movl	%esi, %ebp
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	movl	92(%esp), %ecx          # 4-byte Reload
-	movl	88(%esp), %edx          # 4-byte Reload
-	movl	76(%esp), %esi          # 4-byte Reload
-	jne	.LBB252_6
-# BB#5:
-	movl	4(%esp), %esi           # 4-byte Reload
-.LBB252_6:
-	movl	%esi, 68(%ebx)
-	movl	84(%esp), %esi          # 4-byte Reload
-	movl	80(%esp), %eax          # 4-byte Reload
-	jne	.LBB252_8
-# BB#7:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB252_8:
-	movl	%eax, 72(%ebx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	jne	.LBB252_10
-# BB#9:
-	movl	12(%esp), %esi          # 4-byte Reload
-.LBB252_10:
-	movl	%esi, 76(%ebx)
-	jne	.LBB252_12
-# BB#11:
-	movl	16(%esp), %esi          # 4-byte Reload
-	movl	%esi, 104(%esp)         # 4-byte Spill
-.LBB252_12:
-	movl	104(%esp), %esi         # 4-byte Reload
-	movl	%esi, 80(%ebx)
-	jne	.LBB252_14
-# BB#13:
-	movl	20(%esp), %edx          # 4-byte Reload
-.LBB252_14:
-	movl	%edx, 84(%ebx)
-	jne	.LBB252_16
-# BB#15:
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	%edx, 108(%esp)         # 4-byte Spill
-.LBB252_16:
-	movl	108(%esp), %edx         # 4-byte Reload
-	movl	%edx, 88(%ebx)
-	jne	.LBB252_18
-# BB#17:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB252_18:
-	movl	%ecx, 92(%ebx)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 96(%ebx)
-	jne	.LBB252_20
-# BB#19:
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-.LBB252_20:
-	movl	112(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 100(%ebx)
-	jne	.LBB252_22
-# BB#21:
-	movl	36(%esp), %edi          # 4-byte Reload
-.LBB252_22:
-	movl	%edi, 104(%ebx)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	jne	.LBB252_24
-# BB#23:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB252_24:
-	movl	%ecx, 108(%ebx)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	jne	.LBB252_26
-# BB#25:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB252_26:
-	movl	%eax, 112(%ebx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	jne	.LBB252_28
-# BB#27:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB252_28:
-	movl	%eax, 116(%ebx)
-	jne	.LBB252_30
-# BB#29:
-	movl	52(%esp), %ecx          # 4-byte Reload
-.LBB252_30:
-	movl	%ecx, 120(%ebx)
-	jne	.LBB252_32
-# BB#31:
-	movl	56(%esp), %ebp          # 4-byte Reload
-.LBB252_32:
-	movl	%ebp, 124(%ebx)
-	addl	$116, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end252:
-	.size	mcl_fpDbl_add16Lbmi2, .Lfunc_end252-mcl_fpDbl_add16Lbmi2
-
-	.globl	mcl_fpDbl_sub16Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub16Lbmi2,@function
-mcl_fpDbl_sub16Lbmi2:                   # @mcl_fpDbl_sub16Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$108, %esp
-	movl	132(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %edi
-	movl	136(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%eax), %ebx
-	sbbl	8(%edx), %ebx
-	movl	128(%esp), %ecx
-	movl	%esi, (%ecx)
-	movl	12(%eax), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ecx)
-	movl	16(%eax), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ecx)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ecx)
-	movl	24(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ecx)
-	movl	32(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	40(%edx), %ebx
-	movl	%edi, 32(%ecx)
-	movl	40(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	44(%edx), %ebx
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	48(%edx), %ebx
-	movl	%edi, 40(%ecx)
-	movl	48(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	52(%edx), %ebx
-	movl	%esi, 44(%ecx)
-	movl	52(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	56(%edx), %ebx
-	movl	%edi, 48(%ecx)
-	movl	56(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	60(%edx), %ebx
-	movl	%esi, 52(%ecx)
-	movl	60(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	64(%edx), %ebx
-	movl	%edi, 56(%ecx)
-	movl	64(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	68(%edx), %edi
-	movl	%esi, 60(%ecx)
-	movl	68(%eax), %esi
-	sbbl	%edi, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	72(%edx), %esi
-	movl	72(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	76(%edx), %esi
-	movl	76(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	80(%edx), %esi
-	movl	80(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	84(%edx), %esi
-	movl	84(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	88(%edx), %esi
-	movl	88(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	92(%edx), %esi
-	movl	92(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	96(%edx), %esi
-	movl	96(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	100(%edx), %esi
-	movl	100(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	104(%edx), %esi
-	movl	104(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	108(%edx), %esi
-	movl	108(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	112(%edx), %esi
-	movl	112(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	116(%edx), %esi
-	movl	116(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	120(%edx), %esi
-	movl	120(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	124(%edx), %edx
-	movl	124(%eax), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	140(%esp), %ebx
-	jne	.LBB253_1
-# BB#2:
-	movl	$0, 68(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_3
-.LBB253_1:
-	movl	60(%ebx), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-.LBB253_3:
-	testb	%al, %al
-	jne	.LBB253_4
-# BB#5:
-	movl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	$0, %ebp
-	jmp	.LBB253_6
-.LBB253_4:
-	movl	(%ebx), %ebp
-	movl	4(%ebx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-.LBB253_6:
-	jne	.LBB253_7
-# BB#8:
-	movl	$0, 36(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_9
-.LBB253_7:
-	movl	56(%ebx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-.LBB253_9:
-	jne	.LBB253_10
-# BB#11:
-	movl	$0, 32(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_12
-.LBB253_10:
-	movl	52(%ebx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-.LBB253_12:
-	jne	.LBB253_13
-# BB#14:
-	movl	$0, 28(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_15
-.LBB253_13:
-	movl	48(%ebx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-.LBB253_15:
-	jne	.LBB253_16
-# BB#17:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_18
-.LBB253_16:
-	movl	44(%ebx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB253_18:
-	jne	.LBB253_19
-# BB#20:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_21
-.LBB253_19:
-	movl	40(%ebx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB253_21:
-	jne	.LBB253_22
-# BB#23:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_24
-.LBB253_22:
-	movl	36(%ebx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB253_24:
-	jne	.LBB253_25
-# BB#26:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB253_27
-.LBB253_25:
-	movl	32(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB253_27:
-	jne	.LBB253_28
-# BB#29:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB253_30
-.LBB253_28:
-	movl	28(%ebx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB253_30:
-	jne	.LBB253_31
-# BB#32:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB253_33
-.LBB253_31:
-	movl	24(%ebx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB253_33:
-	jne	.LBB253_34
-# BB#35:
-	movl	$0, %esi
-	jmp	.LBB253_36
-.LBB253_34:
-	movl	20(%ebx), %esi
-.LBB253_36:
-	jne	.LBB253_37
-# BB#38:
-	movl	$0, %edx
-	jmp	.LBB253_39
-.LBB253_37:
-	movl	16(%ebx), %edx
-.LBB253_39:
-	jne	.LBB253_40
-# BB#41:
-	movl	$0, %edi
-	jmp	.LBB253_42
-.LBB253_40:
-	movl	12(%ebx), %edi
-.LBB253_42:
-	jne	.LBB253_43
-# BB#44:
-	xorl	%ebx, %ebx
-	jmp	.LBB253_45
-.LBB253_43:
-	movl	8(%ebx), %ebx
-.LBB253_45:
-	addl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, %eax
-	movl	24(%esp), %ebp          # 4-byte Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 68(%ecx)
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 72(%ecx)
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, 76(%ecx)
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 80(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 84(%ecx)
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 88(%ecx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 92(%ecx)
-	movl	12(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 96(%ecx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 100(%ecx)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 104(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 108(%ecx)
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 112(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%edx, 116(%ecx)
-	movl	%eax, 120(%ecx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 124(%ecx)
-	addl	$108, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end253:
-	.size	mcl_fpDbl_sub16Lbmi2, .Lfunc_end253-mcl_fpDbl_sub16Lbmi2
-
-	.align	16, 0x90
-	.type	.LmulPv544x32,@function
-.LmulPv544x32:                          # @mulPv544x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$60, %esp
-	movl	%edx, %eax
-	movl	80(%esp), %esi
-	movl	%esi, %edx
-	mulxl	4(%eax), %edi, %ebx
-	movl	%esi, %edx
-	mulxl	(%eax), %ebp, %edx
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	addl	%edi, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	8(%eax), %edx, %edi
-	adcl	%ebx, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	12(%eax), %edx, %ebx
-	adcl	%edi, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	16(%eax), %edx, %edi
-	adcl	%ebx, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	20(%eax), %edx, %ebx
-	adcl	%edi, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	24(%eax), %edx, %edi
-	adcl	%ebx, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	28(%eax), %edx, %ebx
-	adcl	%edi, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	32(%eax), %edx, %edi
-	adcl	%ebx, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	36(%eax), %edx, %ebx
-	adcl	%edi, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	40(%eax), %edx, %edi
-	adcl	%ebx, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	44(%eax), %edx, %ebx
-	adcl	%edi, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	48(%eax), %edx, %edi
-	adcl	%ebx, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%esi, %edx
-	mulxl	52(%eax), %ebx, %ebp
-	adcl	%edi, %ebx
-	movl	%esi, %edx
-	mulxl	56(%eax), %edi, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	adcl	%ebp, %edi
-	movl	%esi, %edx
-	mulxl	60(%eax), %edx, %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ecx)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ecx)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%ecx)
-	movl	44(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%ecx)
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%ecx)
-	movl	36(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%ecx)
-	movl	32(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%ecx)
-	movl	28(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%ecx)
-	movl	24(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 32(%ecx)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 36(%ecx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 40(%ecx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 44(%ecx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 48(%ecx)
-	movl	%ebx, 52(%ecx)
-	movl	%edi, 56(%ecx)
-	movl	%edx, 60(%ecx)
-	movl	%esi, %edx
-	mulxl	64(%eax), %eax, %edx
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	adcl	$0, %edx
-	movl	%edx, 68(%ecx)
-	movl	%ecx, %eax
-	addl	$60, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end254:
-	.size	.LmulPv544x32, .Lfunc_end254-.LmulPv544x32
-
-	.globl	mcl_fp_mulUnitPre17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre17Lbmi2,@function
-mcl_fp_mulUnitPre17Lbmi2:               # @mcl_fp_mulUnitPre17Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$140, %esp
-	calll	.L255$pb
-.L255$pb:
-	popl	%ebx
-.Ltmp56:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp56-.L255$pb), %ebx
-	movl	168(%esp), %eax
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	164(%esp), %edx
-	calll	.LmulPv544x32
-	movl	132(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp
-	movl	80(%esp), %ebx
-	movl	76(%esp), %edi
-	movl	72(%esp), %esi
-	movl	64(%esp), %edx
-	movl	68(%esp), %ecx
-	movl	160(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 64(%eax)
-	movl	60(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 68(%eax)
-	addl	$140, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end255:
-	.size	mcl_fp_mulUnitPre17Lbmi2, .Lfunc_end255-mcl_fp_mulUnitPre17Lbmi2
-
-	.globl	mcl_fpDbl_mulPre17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre17Lbmi2,@function
-mcl_fpDbl_mulPre17Lbmi2:                # @mcl_fpDbl_mulPre17Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1356, %esp             # imm = 0x54C
-	calll	.L256$pb
-.L256$pb:
-	popl	%edi
-.Ltmp57:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp57-.L256$pb), %edi
-	movl	%edi, 124(%esp)         # 4-byte Spill
-	movl	1384(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1280(%esp), %ecx
-	movl	1380(%esp), %edx
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1708(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1696(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1700(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1704(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1708(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1712(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1716(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1720(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1724(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	1728(%esp), %ebp
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1732(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	1736(%esp), %esi
+	adcl	1740(%esp), %edi
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1744(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1748(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1752(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1756(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1760(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	$0, 28(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	20(%eax)
+	pushl	2444(%esp)
+	leal	1636(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	24(%esp), %edx                  # 4-byte Reload
+	addl	1624(%esp), %edx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1628(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1632(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1636(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	1640(%esp), %ebp
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1644(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	1648(%esp), %edi
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1652(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1656(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	adcl	1660(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1664(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1668(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1672(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1676(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1680(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1684(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1688(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1564(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1552(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1556(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1560(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1564(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	1568(%esp), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1572(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	1576(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1580(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %esi                  # 4-byte Reload
+	adcl	1584(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1588(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1592(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1596(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1600(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1604(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1608(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	1612(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1616(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	$0, 24(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	24(%eax)
+	pushl	2444(%esp)
+	leal	1492(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	16(%esp), %edx                  # 4-byte Reload
+	addl	1480(%esp), %edx
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1484(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	1488(%esp), %edi
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1492(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1496(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1500(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1504(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	adcl	1508(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1512(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1516(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1520(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1524(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1528(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1532(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	1536(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1540(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1544(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %ebp
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1420(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1408(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1412(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	1416(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	1420(%esp), %ebp
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1424(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1428(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	1432(%esp), %edi
+	adcl	1436(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1440(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1444(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1448(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1452(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1456(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1460(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1464(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1468(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1472(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	$0, 16(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	28(%eax)
+	pushl	2444(%esp)
+	leal	1348(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	40(%esp), %edx                  # 4-byte Reload
+	addl	1336(%esp), %edx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1340(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	1344(%esp), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1348(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1352(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	1356(%esp), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	adcl	1360(%esp), %esi
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1364(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1368(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1372(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1376(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1380(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	1384(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1388(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1392(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	1396(%esp), %ebp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1400(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1276(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1264(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1268(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1272(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1276(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1280(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %esi                  # 4-byte Reload
+	adcl	1284(%esp), %esi
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1288(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1292(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1296(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1300(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1304(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1308(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	1312(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1316(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1320(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	1324(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1328(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	$0, 40(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	32(%eax)
+	pushl	2444(%esp)
+	leal	1204(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	32(%esp), %edx                  # 4-byte Reload
+	addl	1192(%esp), %edx
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	1196(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1200(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1204(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	1208(%esp), %esi
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1212(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1216(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1220(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	1224(%esp), %ebp
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1228(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1232(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1236(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1240(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1244(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1248(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1252(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1256(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1132(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1120(%esp), %esi
+	adcl	1124(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1128(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1132(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1136(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1140(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1144(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1148(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	1152(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	1156(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1160(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1164(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1168(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1172(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1176(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1180(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	1184(%esp), %edi
+	adcl	$0, 32(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	36(%eax)
+	pushl	2444(%esp)
+	leal	1060(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	60(%esp), %edx                  # 4-byte Reload
+	addl	1048(%esp), %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1052(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1056(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	adcl	1060(%esp), %ebp
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1064(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1068(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1072(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1076(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	adcl	1080(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1084(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1088(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1092(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1096(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1100(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1104(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	1108(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1112(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	988(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	976(%esp), %esi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	980(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	984(%esp), %edi
+	adcl	988(%esp), %ebp
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	992(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	996(%esp), %ebp
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1000(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1004(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1008(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1012(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1016(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1020(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	1024(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1028(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1032(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1036(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1040(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	$0, 60(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	40(%eax)
+	movl	2444(%esp), %eax
+	pushl	%eax
+	leal	916(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	4(%esp), %edx                   # 4-byte Reload
+	addl	904(%esp), %edx
+	adcl	908(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	912(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	916(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	adcl	920(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	924(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	928(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	932(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	936(%esp), %edi
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	940(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	944(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	948(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	952(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	956(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	960(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	964(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	968(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	844(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	832(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	836(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	840(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	844(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	848(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	852(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	856(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	860(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	864(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	adcl	868(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	872(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	876(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	880(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	884(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	888(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	892(%esp), %ebp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	896(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	$0, 4(%esp)                     # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	44(%eax)
+	pushl	2444(%esp)
+	leal	772(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	760(%esp), %edx
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	764(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	768(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	776(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	780(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	784(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	788(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	792(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	796(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	800(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	804(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	808(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	812(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	816(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	820(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	824(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	700(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	688(%esp), %esi
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	692(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	696(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	700(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	704(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	708(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	712(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	716(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	720(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	724(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	728(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	732(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	736(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	740(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	744(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	748(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	752(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	$0, 20(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	48(%eax)
+	pushl	2444(%esp)
+	leal	628(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	64(%esp), %edx                  # 4-byte Reload
+	addl	616(%esp), %edx
+	movl	68(%esp), %edi                  # 4-byte Reload
+	adcl	620(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	624(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	628(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	632(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	636(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	640(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	644(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	648(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	652(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	656(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	660(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	664(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	668(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	672(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	676(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	680(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	556(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	544(%esp), %esi
+	adcl	548(%esp), %edi
+	movl	%edi, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	556(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	560(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	564(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	568(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	572(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	576(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	580(%esp), %edi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	584(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	588(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	592(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	596(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	600(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	604(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	608(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	$0, 64(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	52(%eax)
+	pushl	2444(%esp)
+	leal	484(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	68(%esp), %edx                  # 4-byte Reload
+	addl	472(%esp), %edx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	476(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	480(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	adcl	488(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	492(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	496(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	500(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	504(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	508(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	520(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	524(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	528(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	532(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	412(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	400(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	404(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	408(%esp), %esi
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	412(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	416(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	420(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	424(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	432(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	440(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	444(%esp), %ebp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	448(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	452(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	464(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	adcl	$0, 68(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	56(%eax)
+	pushl	2444(%esp)
+	leal	340(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	12(%esp), %edx                  # 4-byte Reload
+	addl	328(%esp), %edx
+	adcl	332(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	340(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	356(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	360(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	364(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%ebp, %edi
+	adcl	368(%esp), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	372(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	376(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	380(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	384(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	388(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	392(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	leal	260(%esp), %eax
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	256(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	260(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	268(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	272(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	276(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	280(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	284(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	292(%esp), %ebp
+	adcl	296(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	300(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	304(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	308(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	316(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	320(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movzbl	12(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	188(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	60(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	56(%esp), %edx                  # 4-byte Reload
+	addl	184(%esp), %edx
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	188(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	192(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	196(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	204(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	212(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	216(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	220(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	224(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	232(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	244(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	248(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	56(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
 	movl	%edx, %esi
-	movl	%edi, %ebx
-	calll	.LmulPv544x32
-	movl	1348(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1344(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1340(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1336(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1332(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1328(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1320(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1316(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1312(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1308(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1304(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1300(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1296(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	1292(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	1288(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	1280(%esp), %eax
-	movl	1284(%esp), %ebp
-	movl	1376(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	1384(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	%esi, %edx
-	movl	%edi, %ebx
-	calll	.LmulPv544x32
-	addl	1208(%esp), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	1276(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1272(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1268(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1260(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1256(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	1252(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1248(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1244(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1236(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1232(%esp), %edi
-	movl	1228(%esp), %esi
-	movl	1224(%esp), %edx
-	movl	1220(%esp), %ecx
-	movl	1212(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1216(%esp), %eax
-	movl	1376(%esp), %ebp
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	adcl	%ebp, 120(%esp)         # 4-byte Folded Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 64(%esp)          # 4-byte Folded Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1136(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	1136(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1204(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1200(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1196(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1192(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1188(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1184(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1180(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1176(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1168(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1164(%esp), %ebx
-	movl	1160(%esp), %edi
-	movl	1156(%esp), %esi
-	movl	1152(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1148(%esp), %edx
-	movl	1140(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1144(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1064(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1064(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	1132(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1128(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1124(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1120(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1116(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1112(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1108(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1100(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1096(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1092(%esp), %ebx
-	movl	1088(%esp), %edi
-	movl	1084(%esp), %esi
-	movl	1080(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1076(%esp), %edx
-	movl	1068(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1072(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	992(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1060(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1056(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1052(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1044(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1040(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1036(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1032(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1028(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1024(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1020(%esp), %ebx
-	movl	1016(%esp), %edi
-	movl	1012(%esp), %esi
-	movl	1008(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1004(%esp), %edx
-	movl	996(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1000(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	920(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	920(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	988(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	984(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	980(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	976(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	972(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	968(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	964(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	960(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	956(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	952(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	948(%esp), %ebx
-	movl	944(%esp), %edi
-	movl	940(%esp), %esi
-	movl	936(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	932(%esp), %edx
-	movl	924(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	928(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	848(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	848(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	916(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	912(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	908(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	904(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	900(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	896(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	892(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	888(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	884(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	880(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	876(%esp), %ebx
-	movl	872(%esp), %edi
-	movl	868(%esp), %esi
-	movl	864(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	860(%esp), %edx
-	movl	852(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	856(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	776(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	776(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	844(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	840(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	836(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	828(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	804(%esp), %ebx
-	movl	800(%esp), %edi
-	movl	796(%esp), %esi
-	movl	792(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	788(%esp), %edx
-	movl	780(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	784(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	704(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	772(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	764(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	760(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	748(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	744(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	740(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	736(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	732(%esp), %ebx
-	movl	728(%esp), %edi
-	movl	724(%esp), %esi
-	movl	720(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	716(%esp), %edx
-	movl	708(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	712(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 32(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	632(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	700(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	696(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	692(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	688(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	684(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	680(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	676(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	672(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	660(%esp), %ebx
-	movl	656(%esp), %edi
-	movl	652(%esp), %esi
-	movl	648(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	644(%esp), %edx
-	movl	636(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	640(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	560(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	628(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	624(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	620(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	616(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	588(%esp), %ebx
-	movl	584(%esp), %edi
-	movl	580(%esp), %esi
-	movl	576(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	572(%esp), %edx
-	movl	564(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	568(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	488(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	488(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	532(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	528(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	524(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	520(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	516(%esp), %ebx
-	movl	512(%esp), %edi
-	movl	508(%esp), %esi
-	movl	504(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	500(%esp), %edx
-	movl	492(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	496(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 44(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	416(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	472(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	468(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	464(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	460(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	456(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	452(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	448(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	444(%esp), %ebx
-	movl	440(%esp), %edi
-	movl	436(%esp), %esi
-	movl	432(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	428(%esp), %edx
-	movl	420(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	424(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 48(%eax)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	344(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	404(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	400(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	396(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	392(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	388(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	372(%esp), %ebx
-	movl	368(%esp), %edi
-	movl	364(%esp), %esi
-	movl	360(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	356(%esp), %edx
-	movl	348(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	352(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 52(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	1380(%esp), %eax
-	movl	%eax, %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	272(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	340(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	336(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	320(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	316(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	304(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	300(%esp), %ebx
-	movl	296(%esp), %edi
-	movl	292(%esp), %edx
-	movl	288(%esp), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	284(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	280(%esp), %ecx
-	movl	120(%esp), %esi         # 4-byte Reload
-	movl	1376(%esp), %ebp
-	movl	%esi, 56(%ebp)
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, %ebp
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %ecx
-	movl	%ecx, %eax
-	movl	60(%eax), %eax
-	movl	%eax, (%esp)
-	leal	200(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	200(%esp), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	248(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	244(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	240(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	236(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	232(%esp), %edi
-	movl	228(%esp), %esi
-	movl	224(%esp), %edx
-	movl	220(%esp), %ecx
-	movl	216(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	204(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	208(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	1376(%esp), %ebx
-	movl	%ebp, 60(%ebx)
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	64(%eax), %eax
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	128(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	132(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	196(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	164(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	156(%esp), %ebx
-	movl	152(%esp), %edi
-	movl	148(%esp), %esi
-	movl	144(%esp), %edx
-	movl	140(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 64(%eax)
-	movl	64(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 68(%eax)
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 72(%eax)
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 76(%eax)
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 80(%eax)
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 84(%eax)
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 88(%eax)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, 92(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 96(%eax)
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 100(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	96(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 104(%eax)
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx         # 4-byte Folded Reload
-	movl	%ecx, 108(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	116(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 112(%eax)
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 116(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 120(%eax)
-	movl	%ecx, 124(%eax)
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 128(%eax)
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 132(%eax)
-	addl	$1356, %esp             # imm = 0x54C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end256:
-	.size	mcl_fpDbl_mulPre17Lbmi2, .Lfunc_end256-mcl_fpDbl_mulPre17Lbmi2
-
-	.globl	mcl_fpDbl_sqrPre17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre17Lbmi2,@function
-mcl_fpDbl_sqrPre17Lbmi2:                # @mcl_fpDbl_sqrPre17Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1356, %esp             # imm = 0x54C
-	calll	.L257$pb
-.L257$pb:
-	popl	%ebx
-.Ltmp58:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp58-.L257$pb), %ebx
-	movl	%ebx, 124(%esp)         # 4-byte Spill
-	movl	1380(%esp), %edx
-	movl	(%edx), %eax
-	movl	%eax, (%esp)
-	leal	1280(%esp), %ecx
-	movl	%edx, %edi
-	movl	%ebx, %esi
-	calll	.LmulPv544x32
-	movl	1348(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1344(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1340(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1336(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1332(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1328(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1320(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1316(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1312(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1308(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1304(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1300(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1296(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	1292(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	1288(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	1280(%esp), %eax
-	movl	1284(%esp), %ebp
-	movl	1376(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edi, %edx
-	movl	4(%edx), %eax
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	%esi, %ebx
-	calll	.LmulPv544x32
-	addl	1208(%esp), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	1276(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1272(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1268(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1260(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1256(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	1252(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1248(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1244(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1236(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1232(%esp), %edi
-	movl	1228(%esp), %esi
-	movl	1224(%esp), %edx
-	movl	1220(%esp), %ecx
-	movl	1212(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1216(%esp), %eax
-	movl	1376(%esp), %ebp
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	adcl	%ebp, 120(%esp)         # 4-byte Folded Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 64(%esp)          # 4-byte Folded Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	8(%edx), %eax
-	movl	%eax, (%esp)
-	leal	1136(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	1136(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1204(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1200(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1196(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1192(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1188(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1184(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1180(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1176(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1168(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1164(%esp), %ebx
-	movl	1160(%esp), %edi
-	movl	1156(%esp), %esi
-	movl	1152(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1148(%esp), %edx
-	movl	1140(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1144(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	12(%edx), %eax
-	movl	%eax, (%esp)
-	leal	1064(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1064(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	1132(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1128(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1124(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1120(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1116(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1112(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1108(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1100(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1096(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1092(%esp), %ebx
-	movl	1088(%esp), %edi
-	movl	1084(%esp), %esi
-	movl	1080(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1076(%esp), %edx
-	movl	1068(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1072(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	16(%edx), %eax
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	992(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1060(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1056(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1052(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1044(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1040(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1036(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1032(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1028(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1024(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1020(%esp), %ebx
-	movl	1016(%esp), %edi
-	movl	1012(%esp), %esi
-	movl	1008(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1004(%esp), %edx
-	movl	996(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1000(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	20(%edx), %eax
-	movl	%eax, (%esp)
-	leal	920(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	920(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	988(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	984(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	980(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	976(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	972(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	968(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	964(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	960(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	956(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	952(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	948(%esp), %ebx
-	movl	944(%esp), %edi
-	movl	940(%esp), %esi
-	movl	936(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	932(%esp), %edx
-	movl	924(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	928(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	24(%edx), %eax
-	movl	%eax, (%esp)
-	leal	848(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	848(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	916(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	912(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	908(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	904(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	900(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	896(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	892(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	888(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	884(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	880(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	876(%esp), %ebx
-	movl	872(%esp), %edi
-	movl	868(%esp), %esi
-	movl	864(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	860(%esp), %edx
-	movl	852(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	856(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	28(%edx), %eax
-	movl	%eax, (%esp)
-	leal	776(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	776(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	844(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	840(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	836(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	828(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	804(%esp), %ebx
-	movl	800(%esp), %edi
-	movl	796(%esp), %esi
-	movl	792(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	788(%esp), %edx
-	movl	780(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	784(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	32(%edx), %eax
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	704(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	772(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	764(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	760(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	748(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	744(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	740(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	736(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	732(%esp), %ebx
-	movl	728(%esp), %edi
-	movl	724(%esp), %esi
-	movl	720(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	716(%esp), %edx
-	movl	708(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	712(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 32(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	36(%edx), %eax
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	632(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	700(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	696(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	692(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	688(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	684(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	680(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	676(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	672(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	660(%esp), %ebx
-	movl	656(%esp), %edi
-	movl	652(%esp), %esi
-	movl	648(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	644(%esp), %edx
-	movl	636(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	640(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	40(%edx), %eax
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	560(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	628(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	624(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	620(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	616(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	588(%esp), %ebx
-	movl	584(%esp), %edi
-	movl	580(%esp), %esi
-	movl	576(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	572(%esp), %edx
-	movl	564(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	568(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	44(%edx), %eax
-	movl	%eax, (%esp)
-	leal	488(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	488(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	532(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	528(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	524(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	520(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	516(%esp), %ebx
-	movl	512(%esp), %edi
-	movl	508(%esp), %esi
-	movl	504(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	500(%esp), %edx
-	movl	492(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	496(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 44(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	48(%edx), %eax
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	416(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	472(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	468(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	464(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	460(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	456(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	452(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	448(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	444(%esp), %ebx
-	movl	440(%esp), %edi
-	movl	436(%esp), %esi
-	movl	432(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	428(%esp), %edx
-	movl	420(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	424(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 48(%eax)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	52(%edx), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	344(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	404(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	400(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	396(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	392(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	388(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	372(%esp), %ebx
-	movl	368(%esp), %edi
-	movl	364(%esp), %esi
-	movl	360(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	356(%esp), %edx
-	movl	348(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	352(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 52(%eax)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	56(%edx), %eax
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	272(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	340(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	336(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	320(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	316(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	304(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	300(%esp), %ebx
-	movl	296(%esp), %edi
-	movl	292(%esp), %edx
-	movl	288(%esp), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	284(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	280(%esp), %ecx
-	movl	120(%esp), %esi         # 4-byte Reload
-	movl	1376(%esp), %ebp
-	movl	%esi, 56(%ebp)
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, %ebp
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	60(%edx), %eax
-	movl	%eax, (%esp)
-	leal	200(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	200(%esp), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	248(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	244(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	240(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	236(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	232(%esp), %edi
-	movl	228(%esp), %esi
-	movl	224(%esp), %edx
-	movl	220(%esp), %ecx
-	movl	216(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	204(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	208(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	1376(%esp), %ebx
-	movl	%ebp, 60(%ebx)
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	64(%edx), %eax
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	128(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	132(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	196(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	164(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	156(%esp), %ebx
-	movl	152(%esp), %edi
-	movl	148(%esp), %esi
-	movl	144(%esp), %edx
-	movl	140(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 64(%eax)
-	movl	68(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 68(%eax)
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 72(%eax)
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 76(%eax)
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 80(%eax)
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 84(%eax)
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 88(%eax)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, 92(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 96(%eax)
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 100(%eax)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	96(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 104(%eax)
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx         # 4-byte Folded Reload
-	movl	%ecx, 108(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	112(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 112(%eax)
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 116(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 120(%eax)
-	movl	%ecx, 124(%eax)
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 128(%eax)
-	movl	124(%esp), %ecx         # 4-byte Reload
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	112(%esp), %esi
+	movzbl	56(%esp), %ecx                  # 1-byte Folded Reload
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	116(%esp), %eax
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	120(%esp), %esi
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	124(%esp), %ebx
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	128(%esp), %ebp
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	132(%esp), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	136(%esp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	140(%esp), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	144(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	148(%esp), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	152(%esp), %edi
+	movl	60(%esp), %edx                  # 4-byte Reload
+	adcl	156(%esp), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	160(%esp), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	164(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edx                  # 4-byte Reload
+	adcl	168(%esp), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edx                  # 4-byte Reload
+	adcl	172(%esp), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	176(%esp), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
 	adcl	$0, %ecx
-	movl	%ecx, 132(%eax)
-	addl	$1356, %esp             # imm = 0x54C
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	2444(%esp), %edx
+	subl	(%edx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	%esi, %eax
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	sbbl	4(%edx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	%ebx, %eax
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	sbbl	8(%edx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	%ebp, %eax
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	sbbl	12(%edx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	16(%edx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	20(%edx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%edx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%edx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	sbbl	32(%edx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	sbbl	36(%edx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%edx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	sbbl	44(%edx), %esi
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	sbbl	48(%edx), %ebx
+	movl	64(%esp), %eax                  # 4-byte Reload
+	sbbl	52(%edx), %eax
+	movl	%edx, %ebp
+	movl	68(%esp), %edx                  # 4-byte Reload
+	sbbl	56(%ebp), %edx
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	2444(%esp), %edi
+	sbbl	60(%edi), %ebp
+	sbbl	$0, %ecx
+	testb	$1, %cl
+	jne	.LBB77_1
+# %bb.2:
+	movl	2432(%esp), %ecx
+	movl	%ebp, 60(%ecx)
+	jne	.LBB77_3
+.LBB77_4:
+	movl	%edx, 56(%ecx)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	jne	.LBB77_5
+.LBB77_6:
+	movl	%eax, 52(%ecx)
+	jne	.LBB77_7
+.LBB77_8:
+	movl	%ebx, 48(%ecx)
+	movl	84(%esp), %eax                  # 4-byte Reload
+	jne	.LBB77_9
+.LBB77_10:
+	movl	%esi, 44(%ecx)
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	movl	76(%esp), %esi                  # 4-byte Reload
+	jne	.LBB77_11
+.LBB77_12:
+	movl	%esi, 40(%ecx)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	80(%esp), %edi                  # 4-byte Reload
+	jne	.LBB77_13
+.LBB77_14:
+	movl	%edi, 36(%ecx)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	jne	.LBB77_15
+.LBB77_16:
+	movl	72(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 32(%ecx)
+	jne	.LBB77_17
+.LBB77_18:
+	movl	%eax, 28(%ecx)
+	jne	.LBB77_19
+.LBB77_20:
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%ecx)
+	jne	.LBB77_21
+.LBB77_22:
+	movl	%edx, 20(%ecx)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	movl	104(%esp), %eax                 # 4-byte Reload
+	jne	.LBB77_23
+.LBB77_24:
+	movl	%ebx, 16(%ecx)
+	jne	.LBB77_25
+.LBB77_26:
+	movl	%edi, 12(%ecx)
+	jne	.LBB77_27
+.LBB77_28:
+	movl	%esi, 8(%ecx)
+	jne	.LBB77_29
+.LBB77_30:
+	movl	%eax, 4(%ecx)
+	je	.LBB77_32
+.LBB77_31:
+	movl	52(%esp), %edx                  # 4-byte Reload
+.LBB77_32:
+	movl	%edx, (%ecx)
+	addl	$2412, %esp                     # imm = 0x96C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end257:
-	.size	mcl_fpDbl_sqrPre17Lbmi2, .Lfunc_end257-mcl_fpDbl_sqrPre17Lbmi2
-
-	.globl	mcl_fp_mont17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_mont17Lbmi2,@function
-mcl_fp_mont17Lbmi2:                     # @mcl_fp_mont17Lbmi2
-# BB#0:
+.LBB77_1:
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	2432(%esp), %ecx
+	movl	%ebp, 60(%ecx)
+	je	.LBB77_4
+.LBB77_3:
+	movl	68(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 56(%ecx)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	je	.LBB77_6
+.LBB77_5:
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 52(%ecx)
+	je	.LBB77_8
+.LBB77_7:
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 48(%ecx)
+	movl	84(%esp), %eax                  # 4-byte Reload
+	je	.LBB77_10
+.LBB77_9:
+	movl	4(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 44(%ecx)
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	movl	76(%esp), %esi                  # 4-byte Reload
+	je	.LBB77_12
+.LBB77_11:
+	movl	60(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 40(%ecx)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	80(%esp), %edi                  # 4-byte Reload
+	je	.LBB77_14
+.LBB77_13:
+	movl	32(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 36(%ecx)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	je	.LBB77_16
+.LBB77_15:
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 72(%esp)                  # 4-byte Spill
+	movl	72(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 32(%ecx)
+	je	.LBB77_18
+.LBB77_17:
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%ecx)
+	je	.LBB77_20
+.LBB77_19:
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%ecx)
+	je	.LBB77_22
+.LBB77_21:
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 20(%ecx)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	movl	104(%esp), %eax                 # 4-byte Reload
+	je	.LBB77_24
+.LBB77_23:
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 16(%ecx)
+	je	.LBB77_26
+.LBB77_25:
+	movl	44(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%ecx)
+	je	.LBB77_28
+.LBB77_27:
+	movl	36(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%ecx)
+	je	.LBB77_30
+.LBB77_29:
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	jne	.LBB77_31
+	jmp	.LBB77_32
+.Lfunc_end77:
+	.size	mcl_fp_mont16Lbmi2, .Lfunc_end77-mcl_fp_mont16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montNF16Lbmi2            # -- Begin function mcl_fp_montNF16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF16Lbmi2,@function
+mcl_fp_montNF16Lbmi2:                   # @mcl_fp_montNF16Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$2588, %esp             # imm = 0xA1C
-	calll	.L258$pb
-.L258$pb:
+	subl	$2412, %esp                     # imm = 0x96C
+	calll	.L78$pb
+.L78$pb:
 	popl	%ebx
-.Ltmp59:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp59-.L258$pb), %ebx
-	movl	2620(%esp), %eax
+.Ltmp20:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp20-.L78$pb), %ebx
+	movl	2444(%esp), %eax
 	movl	-4(%eax), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2512(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	2512(%esp), %ebp
-	movl	2516(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	2580(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	2576(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	2572(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	2568(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	2564(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	2560(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2556(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2552(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2548(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	2544(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	2540(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2536(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	2532(%esp), %edi
-	movl	2528(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2524(%esp), %esi
-	movl	2520(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	2440(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	addl	2440(%esp), %ebp
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2444(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2448(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	2452(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2456(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2460(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2464(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2468(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2472(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2476(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2480(%esp), %eax
-	movl	%eax, %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2484(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2488(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2492(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2496(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2500(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	2504(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	2508(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	2616(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2368(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %ebp
-	movl	120(%esp), %ecx         # 4-byte Reload
-	addl	2368(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2372(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2376(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2380(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2384(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2388(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	2392(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2396(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2400(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	2404(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	2408(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2412(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2416(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2420(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2424(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	2428(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	2432(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	2436(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2296(%esp), %ecx
-	movl	2620(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	movl	116(%esp), %eax         # 4-byte Reload
-	andl	$1, %eax
-	addl	2296(%esp), %ebp
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	2300(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	2304(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2308(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2312(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	2316(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	2320(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	2324(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2328(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2332(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	2336(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	2340(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	2344(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	2348(%esp), %esi
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	2352(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	2356(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	2360(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	2364(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	2616(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2224(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	100(%esp), %ecx         # 4-byte Reload
-	addl	2224(%esp), %ecx
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2228(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2232(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2236(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2240(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2244(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2248(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2252(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2256(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2260(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2268(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	2272(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	movl	2440(%esp), %ecx
+	subl	$4, %esp
+	leal	2348(%esp), %eax
+	pushl	(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	2344(%esp), %ebp
+	movl	2348(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	imull	%ebp, %eax
+	movl	2408(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	2404(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	2400(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	2396(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	2392(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	2388(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	2384(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	2380(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	2376(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	2372(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	2368(%esp), %edi
+	movl	2364(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	2360(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	2356(%esp), %esi
+	movl	2352(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	2276(%esp), %ecx
+	pushl	%eax
+	pushl	2452(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	2272(%esp), %ebp
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	2276(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	2280(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	2284(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	2288(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	adcl	2292(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2152(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	2152(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	2284(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2288(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	2292(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	2296(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	2300(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2304(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	2308(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2312(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	2316(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	2320(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	2324(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	2328(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2332(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	2336(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	2204(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	4(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	2264(%esp), %eax
+	movl	64(%esp), %edx                  # 4-byte Reload
+	addl	2200(%esp), %edx
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	2204(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	2208(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	2212(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	2216(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	2220(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	2224(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	2228(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	2232(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	2236(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	2240(%esp), %esi
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	2244(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	2248(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	2252(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	2256(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	2260(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, %ebp
+	adcl	$0, %ebp
+	subl	$4, %esp
+	leal	2132(%esp), %eax
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	2128(%esp), %edi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	2132(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	2136(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2140(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	2144(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	2148(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	2152(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	2156(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	2160(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	2164(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2168(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	2168(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	2172(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	2176(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	2180(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	2184(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2188(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2192(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2196(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2200(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebp         # 4-byte Reload
-	adcl	2204(%esp), %ebp
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	2208(%esp), %edi
-	movl	132(%esp), %esi         # 4-byte Reload
-	adcl	2212(%esp), %esi
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2216(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2220(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2080(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	112(%esp), %ecx         # 4-byte Reload
-	addl	2080(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2084(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2088(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2092(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2096(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2100(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2104(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2108(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2112(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2116(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2120(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2124(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	2128(%esp), %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	adcl	2132(%esp), %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	adcl	2136(%esp), %esi
-	movl	%esi, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2140(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2144(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	2148(%esp), %esi
-	sbbl	%ebp, %ebp
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2008(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	%ebp, %eax
-	andl	$1, %eax
-	addl	2008(%esp), %edi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2012(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2016(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	2020(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	2024(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	2028(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2032(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2036(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	2040(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	2044(%esp), %edi
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	2048(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	2052(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	2056(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	2188(%esp), %esi
+	adcl	2192(%esp), %ebp
+	subl	$4, %esp
+	leal	2060(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	2120(%esp), %eax
+	movl	24(%esp), %edx                  # 4-byte Reload
+	addl	2056(%esp), %edx
+	movl	56(%esp), %ecx                  # 4-byte Reload
 	adcl	2060(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
 	adcl	2064(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
 	adcl	2068(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	2072(%esp), %ebp
-	adcl	2076(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	2072(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	2076(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	2080(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	2084(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	2088(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	2092(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	2096(%esp), %edi
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	2100(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	2104(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	2108(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	2112(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	adcl	2116(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1936(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	1936(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1940(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1944(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1948(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1952(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	1956(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1960(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1964(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1968(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1972(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1976(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1980(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1984(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
+	movl	%eax, %ebp
+	subl	$4, %esp
+	leal	1988(%esp), %eax
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1984(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	1988(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	1992(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	1996(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1996(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	2000(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	2004(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1864(%esp), %ecx
-	movl	2620(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	1864(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2008(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	2012(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2016(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	2020(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	2024(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	2028(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	2032(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	2036(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	2040(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	2044(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	2048(%esp), %ebp
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	1916(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	12(%ecx)
+	movl	2444(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	1976(%esp), %eax
+	movl	56(%esp), %edx                  # 4-byte Reload
+	addl	1912(%esp), %edx
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1916(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	1920(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1924(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1928(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	1932(%esp), %ebp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1936(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	1940(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	1944(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1948(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	1952(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1956(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	1960(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1964(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1968(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	1972(%esp), %edi
+	adcl	$0, %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	movl	2452(%esp), %ecx
+	pushl	%ecx
+	leal	1852(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1840(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1844(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1848(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1852(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1856(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	1860(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1864(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1868(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	1872(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	1876(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	1880(%esp), %edi
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1880(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
 	adcl	1884(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1888(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	1888(%esp), %ebp
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	1892(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	1896(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1900(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1904(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1908(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1912(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ebp         # 4-byte Reload
-	adcl	1916(%esp), %ebp
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1920(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1924(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1928(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1932(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1792(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	1792(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1804(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1808(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1816(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1820(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1824(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1828(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1832(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1836(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	1840(%esp), %ebp
-	movl	%ebp, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1844(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	1848(%esp), %edi
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	1852(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1856(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1860(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1720(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	1720(%esp), %ecx
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1724(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1728(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1732(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1736(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1740(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1744(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1748(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1896(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	1900(%esp), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %edi                  # 4-byte Reload
+	adcl	1904(%esp), %edi
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	16(%eax)
+	pushl	2444(%esp)
+	leal	1780(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	1832(%esp), %ecx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	addl	1768(%esp), %eax
+	movl	60(%esp), %edx                  # 4-byte Reload
+	adcl	1772(%esp), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	1776(%esp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	1780(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	1784(%esp), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	1788(%esp), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	1792(%esp), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	1796(%esp), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	1800(%esp), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	1804(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	1808(%esp), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	adcl	1812(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edx                  # 4-byte Reload
+	adcl	1816(%esp), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	1820(%esp), %ebp
+	movl	64(%esp), %edx                  # 4-byte Reload
+	adcl	1824(%esp), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	adcl	1828(%esp), %edi
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	movl	%ecx, %edi
+	adcl	$0, %edi
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%eax, %ecx
+	movl	%eax, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1708(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1696(%esp), %esi
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1700(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1704(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1708(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1712(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1716(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1720(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1724(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1728(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1732(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1736(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1740(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1744(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	1748(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %esi                  # 4-byte Reload
 	adcl	1752(%esp), %esi
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1756(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1760(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	1764(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	1768(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1772(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	adcl	1776(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	adcl	1780(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1784(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	1788(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1648(%esp), %ecx
-	movl	2612(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	movl	92(%esp), %eax          # 4-byte Reload
-	addl	1648(%esp), %eax
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	1652(%esp), %edi
-	movl	68(%esp), %ecx          # 4-byte Reload
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1756(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	1760(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	20(%eax)
+	pushl	2444(%esp)
+	leal	1636(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	1688(%esp), %eax
+	movl	60(%esp), %edx                  # 4-byte Reload
+	addl	1624(%esp), %edx
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1628(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1632(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1636(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1640(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	1644(%esp), %ebp
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	1648(%esp), %edi
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1652(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
 	adcl	1656(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
 	adcl	1660(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
 	adcl	1664(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
 	adcl	1668(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
 	adcl	1672(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
 	adcl	1676(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
 	adcl	1680(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
 	adcl	1684(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	1688(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	1692(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1696(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1700(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1704(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1708(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	1712(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1716(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%eax, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1576(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1576(%esp), %ebp
-	adcl	1580(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1564(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1552(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1556(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1560(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1564(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1568(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	1572(%esp), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	adcl	1576(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1580(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	1584(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	1588(%esp), %ebp
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	1592(%esp), %edi
-	movl	84(%esp), %esi          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1588(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1592(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
 	adcl	1596(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	1600(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	1604(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	1608(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	1612(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1616(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1620(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1624(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1628(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1632(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1636(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1640(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1644(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1504(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	1504(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1508(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1512(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	adcl	1516(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	1520(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1528(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1532(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	1540(%esp), %edi
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1544(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1548(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1552(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1564(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1568(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1572(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1432(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	76(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1432(%esp), %ebp
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1436(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1440(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1444(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1448(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1452(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1456(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	1460(%esp), %ebp
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1464(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	adcl	1468(%esp), %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	1472(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %edi         # 4-byte Reload
-	adcl	1476(%esp), %edi
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1480(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	1616(%esp), %edi
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	24(%eax)
+	pushl	2444(%esp)
+	leal	1492(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	1544(%esp), %eax
+	movl	28(%esp), %edx                  # 4-byte Reload
+	addl	1480(%esp), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
 	adcl	1484(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
 	adcl	1488(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1492(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	1496(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	1492(%esp), %ebp
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	1496(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
 	adcl	1500(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1504(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	1508(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1512(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1516(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	1520(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1524(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	1528(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	1532(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1536(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	1540(%esp), %edi
 	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1360(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	1360(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1376(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1380(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1384(%esp), %ebp
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	1400(%esp), %edi
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1404(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	1408(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1420(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1408(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	1412(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	1416(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	1420(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1424(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	1428(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1288(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	68(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1288(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1296(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1300(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1304(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1308(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	1312(%esp), %ebp
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1316(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	1320(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	1324(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1328(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1332(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	1336(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	1340(%esp), %edi
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1344(%esp), %esi
-	movl	80(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	1432(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1436(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	1440(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1444(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1448(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1452(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1456(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1460(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1464(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	1468(%esp), %edi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1472(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	28(%eax)
+	pushl	2444(%esp)
+	leal	1348(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	1400(%esp), %eax
+	movl	16(%esp), %edx                  # 4-byte Reload
+	addl	1336(%esp), %edx
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1340(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1344(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
 	adcl	1348(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
 	adcl	1352(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1356(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	1356(%esp), %esi
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	1360(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	1364(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1368(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1372(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1376(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	1380(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	adcl	1384(%esp), %ebp
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1388(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	1392(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1396(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1216(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	1216(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1232(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1236(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebp         # 4-byte Reload
-	adcl	1240(%esp), %ebp
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1244(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1248(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1252(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1256(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1260(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	1264(%esp), %edi
-	movl	%edi, 112(%esp)         # 4-byte Spill
-	adcl	1268(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1276(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1264(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1268(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	1272(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1276(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	1280(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1144(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	1144(%esp), %esi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1148(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1152(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1156(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	1160(%esp), %edi
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1164(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	1168(%esp), %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	1172(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	1176(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %esi         # 4-byte Reload
-	adcl	1180(%esp), %esi
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1184(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1188(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1192(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	1284(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1288(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1292(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	1296(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1300(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1304(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1308(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	adcl	1312(%esp), %ebp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1316(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1320(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1324(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1328(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	32(%eax)
+	pushl	2444(%esp)
+	leal	1204(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	1256(%esp), %eax
+	movl	8(%esp), %edx                   # 4-byte Reload
+	addl	1192(%esp), %edx
+	movl	12(%esp), %ecx                  # 4-byte Reload
 	adcl	1196(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
 	adcl	1200(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
 	adcl	1204(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1208(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	1208(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
 	adcl	1212(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1216(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	1220(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1224(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1228(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	1232(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	adcl	1236(%esp), %ebp
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	1240(%esp), %ebp
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	1244(%esp), %edi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1248(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1252(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1072(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	1072(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	1080(%esp), %ebp
-	adcl	1084(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1088(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1092(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	1104(%esp), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	1112(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1132(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1120(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	1124(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1128(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	1132(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	1136(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1000(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	72(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1000(%esp), %edi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1004(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	1008(%esp), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1012(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1016(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1020(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	1024(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ebp         # 4-byte Reload
-	adcl	1028(%esp), %ebp
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1032(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	1036(%esp), %edi
-	adcl	1040(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1044(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1048(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	1140(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1144(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1148(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1152(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1156(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1160(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1164(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	1168(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	adcl	1172(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	1176(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1180(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	1184(%esp), %ebp
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	36(%eax)
+	movl	2444(%esp), %eax
+	pushl	%eax
+	leal	1060(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	1112(%esp), %eax
+	movl	12(%esp), %edx                  # 4-byte Reload
+	addl	1048(%esp), %edx
+	movl	4(%esp), %ecx                   # 4-byte Reload
 	adcl	1052(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	1056(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	1056(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
 	adcl	1060(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1064(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	1064(%esp), %esi
+	movl	52(%esp), %ecx                  # 4-byte Reload
 	adcl	1068(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1072(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1076(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1080(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	1084(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	1088(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1092(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	1096(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	adcl	1100(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1104(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	1108(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	928(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	936(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	952(%esp), %ebp
-	movl	%ebp, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	960(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	980(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	984(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	988(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	976(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	980(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	984(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	988(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	992(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	996(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	856(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	856(%esp), %ebp
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1000(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1004(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	1008(%esp), %esi
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	adcl	1012(%esp), %ebp
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1016(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1020(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1024(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1028(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	1032(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1036(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1040(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	40(%eax)
+	pushl	2444(%esp)
+	leal	916(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	968(%esp), %eax
+	movl	4(%esp), %edx                   # 4-byte Reload
+	addl	904(%esp), %edx
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	908(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	912(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	916(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	920(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	924(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	928(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	932(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	936(%esp), %ebp
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	940(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	944(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	948(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	952(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	956(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	960(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	964(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	844(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	832(%esp), %esi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	836(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	840(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	844(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	848(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	852(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	856(%esp), %edi
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	860(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %esi                  # 4-byte Reload
+	adcl	864(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	868(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	872(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
 	adcl	876(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	880(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	884(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	888(%esp), %ebp
-	movl	100(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	888(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	892(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	896(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	912(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2616(%esp), %ecx
-	movl	%ecx, %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	2612(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	784(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	812(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	820(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	828(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	712(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %esi         # 4-byte Reload
-	adcl	728(%esp), %esi
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	736(%esp), %ebp
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	756(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	640(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	104(%esp), %ecx         # 4-byte Reload
-	addl	640(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	652(%esp), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	660(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	680(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	896(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	44(%eax)
+	pushl	2444(%esp)
+	leal	772(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	824(%esp), %eax
+	movl	48(%esp), %edx                  # 4-byte Reload
+	addl	760(%esp), %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	764(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	768(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	772(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	776(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	780(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	784(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	788(%esp), %esi
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	792(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	796(%esp), %ebp
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	800(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	804(%esp), %edi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	808(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	812(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	816(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	820(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	700(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	688(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	692(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	696(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	696(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	700(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	704(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	708(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	104(%esp), %ecx         # 4-byte Reload
-	andl	$1, %ecx
-	addl	568(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	712(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	716(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	720(%esp), %esi
+	adcl	724(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	728(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	732(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	736(%esp), %edi
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	740(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	744(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	748(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	752(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	48(%eax)
+	pushl	2444(%esp)
+	leal	628(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	680(%esp), %eax
+	movl	36(%esp), %edx                  # 4-byte Reload
+	addl	616(%esp), %edx
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	620(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	624(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	628(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	632(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	636(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	640(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	adcl	644(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	648(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	652(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	656(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	660(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	664(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	668(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	672(%esp), %ebp
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	676(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	556(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	544(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	552(%esp), %edi
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	556(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	560(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	564(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	568(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	572(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	576(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
 	adcl	580(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	584(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	588(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	592(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	596(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	600(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	600(%esp), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	604(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	608(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	616(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	624(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	108(%esp), %ecx         # 4-byte Reload
-	addl	496(%esp), %ecx
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	524(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	528(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	540(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	544(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	108(%esp), %ecx         # 4-byte Reload
-	andl	$1, %ecx
-	addl	424(%esp), %esi
-	movl	124(%esp), %eax         # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	608(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	52(%eax)
+	pushl	2444(%esp)
+	leal	484(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	536(%esp), %eax
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	472(%esp), %edx
+	adcl	476(%esp), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	adcl	480(%esp), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	484(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	488(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	492(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	496(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	500(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	504(%esp), %ebp
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	508(%esp), %edi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	512(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	516(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	520(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	524(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	528(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	532(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	412(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	400(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	404(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	408(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	412(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	416(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	420(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	428(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %esi         # 4-byte Reload
-	adcl	440(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	432(%esp), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	adcl	436(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	440(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	444(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	448(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	452(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
 	adcl	456(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	460(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	464(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	472(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	480(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	60(%eax), %eax
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	124(%esp), %ecx         # 4-byte Reload
-	addl	352(%esp), %ecx
-	movl	128(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	332(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	56(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	52(%esp), %edx                  # 4-byte Reload
+	addl	328(%esp), %edx
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	332(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	340(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
 	adcl	356(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	360(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	364(%esp), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	364(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	368(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
 	adcl	372(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	376(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	380(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	384(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	388(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	404(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	280(%esp), %ebp
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %esi         # 4-byte Reload
-	adcl	288(%esp), %esi
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	392(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	260(%esp), %eax
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	256(%esp), %esi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	260(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	268(%esp), %esi
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	272(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	276(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	280(%esp), %ebp
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	284(%esp), %edi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	292(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	296(%esp), %ebp
-	adcl	300(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	296(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	300(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	304(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	308(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	312(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	316(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	64(%eax), %eax
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	128(%esp), %ecx         # 4-byte Reload
-	addl	208(%esp), %ecx
-	adcl	212(%esp), %esi
-	movl	%esi, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	320(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	188(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	60(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	movl	40(%esp), %edx                  # 4-byte Reload
+	addl	184(%esp), %edx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	188(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	192(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	196(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	204(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	adcl	208(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	212(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	216(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	220(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	224(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	220(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	224(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	228(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	232(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	232(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	236(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	240(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	244(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	136(%esp), %ecx
-	movl	2620(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	andl	$1, %edi
-	addl	136(%esp), %esi
-	movl	116(%esp), %edx         # 4-byte Reload
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	148(%esp), %edx
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	adcl	152(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	164(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	168(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	248(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	112(%esp), %esi
+	movl	64(%esp), %edx                  # 4-byte Reload
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	116(%esp), %eax
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	120(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	124(%esp), %edx
+	movl	56(%esp), %edi                  # 4-byte Reload
+	adcl	128(%esp), %edi
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	132(%esp), %esi
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	adcl	136(%esp), %ebx
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	140(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	144(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	148(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	152(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	156(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	160(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	164(%esp), %ebp
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	168(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
 	adcl	172(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
 	adcl	176(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	180(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	184(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	188(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	192(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	196(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	200(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	204(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	132(%esp), %ecx         # 4-byte Reload
-	movl	2620(%esp), %ebx
-	subl	(%ebx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	sbbl	4(%ebx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	sbbl	8(%ebx), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	sbbl	12(%ebx), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%eax, %edx
-	sbbl	16(%ebx), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	sbbl	20(%ebx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	sbbl	24(%ebx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	28(%ebx), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	sbbl	32(%ebx), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	sbbl	36(%ebx), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	sbbl	40(%ebx), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	sbbl	44(%ebx), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	sbbl	48(%ebx), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	sbbl	52(%ebx), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	sbbl	56(%ebx), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	%ebx, %ebp
-	movl	108(%esp), %ebx         # 4-byte Reload
-	sbbl	60(%ebp), %ebx
-	movl	124(%esp), %esi         # 4-byte Reload
-	sbbl	64(%ebp), %esi
-	movl	%esi, %ebp
-	sbbl	$0, %edi
-	andl	$1, %edi
-	jne	.LBB258_2
-# BB#1:
-	movl	%ebx, 108(%esp)         # 4-byte Spill
-.LBB258_2:
-	movl	%edi, %ebx
-	testb	%bl, %bl
-	movl	132(%esp), %ebx         # 4-byte Reload
-	jne	.LBB258_4
-# BB#3:
-	movl	12(%esp), %ebx          # 4-byte Reload
-.LBB258_4:
-	movl	2608(%esp), %eax
-	movl	%ebx, (%eax)
-	movl	120(%esp), %ebx         # 4-byte Reload
-	jne	.LBB258_6
-# BB#5:
-	movl	16(%esp), %ebx          # 4-byte Reload
-.LBB258_6:
-	movl	%ebx, 4(%eax)
-	jne	.LBB258_8
-# BB#7:
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-.LBB258_8:
-	movl	116(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 8(%eax)
-	jne	.LBB258_10
-# BB#9:
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-.LBB258_10:
-	movl	100(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	movl	112(%esp), %esi         # 4-byte Reload
-	jne	.LBB258_12
-# BB#11:
-	movl	28(%esp), %esi          # 4-byte Reload
-.LBB258_12:
-	movl	%esi, 16(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_14
-# BB#13:
-	movl	32(%esp), %edx          # 4-byte Reload
-.LBB258_14:
-	movl	%edx, 20(%eax)
-	jne	.LBB258_16
-# BB#15:
-	movl	36(%esp), %ecx          # 4-byte Reload
-.LBB258_16:
-	movl	%ecx, 24(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_18
-# BB#17:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB258_18:
-	movl	%ecx, 28(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_20
-# BB#19:
-	movl	44(%esp), %ecx          # 4-byte Reload
-.LBB258_20:
-	movl	%ecx, 32(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_22
-# BB#21:
-	movl	48(%esp), %ecx          # 4-byte Reload
-.LBB258_22:
-	movl	%ecx, 36(%eax)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_24
-# BB#23:
-	movl	52(%esp), %ecx          # 4-byte Reload
-.LBB258_24:
-	movl	%ecx, 40(%eax)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_26
-# BB#25:
-	movl	56(%esp), %ecx          # 4-byte Reload
-.LBB258_26:
-	movl	%ecx, 44(%eax)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_28
-# BB#27:
-	movl	60(%esp), %ecx          # 4-byte Reload
-.LBB258_28:
-	movl	%ecx, 48(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_30
-# BB#29:
-	movl	88(%esp), %ecx          # 4-byte Reload
-.LBB258_30:
-	movl	%ecx, 52(%eax)
-	movl	104(%esp), %ecx         # 4-byte Reload
-	jne	.LBB258_32
-# BB#31:
-	movl	128(%esp), %ecx         # 4-byte Reload
-.LBB258_32:
-	movl	%ecx, 56(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	movl	124(%esp), %ecx         # 4-byte Reload
-	jne	.LBB258_34
-# BB#33:
-	movl	%ebp, %ecx
-.LBB258_34:
-	movl	%ecx, 64(%eax)
-	addl	$2588, %esp             # imm = 0xA1C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end258:
-	.size	mcl_fp_mont17Lbmi2, .Lfunc_end258-mcl_fp_mont17Lbmi2
-
-	.globl	mcl_fp_montNF17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montNF17Lbmi2,@function
-mcl_fp_montNF17Lbmi2:                   # @mcl_fp_montNF17Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2572, %esp             # imm = 0xA0C
-	calll	.L259$pb
-.L259$pb:
-	popl	%ebx
-.Ltmp60:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp60-.L259$pb), %ebx
-	movl	2604(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2496(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	2496(%esp), %edi
-	movl	2500(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	2564(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	2560(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2556(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	2552(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2548(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2544(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	2540(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2536(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	2532(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	2528(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	2524(%esp), %ebp
-	movl	2520(%esp), %esi
-	movl	2516(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2512(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2508(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	2504(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	2424(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	2424(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2428(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2432(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2436(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2440(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2444(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	2448(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	2452(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	2456(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2460(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2464(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2468(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	2472(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2476(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	2480(%esp), %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2484(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2488(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2492(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2352(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	2420(%esp), %ecx
-	movl	112(%esp), %edx         # 4-byte Reload
-	addl	2352(%esp), %edx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2356(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2360(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2364(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2368(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2372(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2376(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	2380(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2384(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2388(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2392(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	2396(%esp), %esi
-	movl	%esi, %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2400(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2404(%esp), %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2408(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2412(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2416(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2280(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	2280(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2284(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2288(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2292(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2296(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2300(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2304(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	2308(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2312(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	2316(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2320(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	2324(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2328(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2332(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2336(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	2340(%esp), %ebp
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	2344(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2348(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2208(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	2276(%esp), %eax
-	movl	92(%esp), %edx          # 4-byte Reload
-	addl	2208(%esp), %edx
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	2212(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	2216(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	2220(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2224(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	2228(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	2232(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	2236(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	2240(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2244(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2248(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2252(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	2256(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	2260(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	2264(%esp), %ebp
-	adcl	2268(%esp), %edi
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	2444(%esp), %ecx
+	subl	(%ecx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	4(%ecx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	sbbl	8(%ecx), %edx
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	sbbl	12(%ecx), %edi
+	movl	%edi, 96(%esp)                  # 4-byte Spill
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	sbbl	16(%ecx), %esi
+	movl	%esi, 92(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	sbbl	20(%ecx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	32(%ecx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	40(%ecx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	sbbl	44(%ecx), %eax
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	%ebp, %edx
+	sbbl	48(%ecx), %edx
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	sbbl	52(%ecx), %ebp
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	sbbl	56(%ecx), %ebx
+	movl	40(%esp), %edi                  # 4-byte Reload
 	movl	%edi, %esi
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	2272(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%edx, %edi
+	sbbl	60(%ecx), %edi
+	testl	%edi, %edi
+	js	.LBB78_1
+# %bb.2:
+	movl	2432(%esp), %ecx
+	movl	%edi, 60(%ecx)
+	js	.LBB78_3
+.LBB78_4:
+	movl	%ebx, 56(%ecx)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	movl	84(%esp), %esi                  # 4-byte Reload
+	js	.LBB78_5
+.LBB78_6:
+	movl	%ebp, 52(%ecx)
+	js	.LBB78_7
+.LBB78_8:
+	movl	%edx, 48(%ecx)
+	movl	88(%esp), %ebp                  # 4-byte Reload
+	js	.LBB78_9
+.LBB78_10:
+	movl	%eax, 44(%ecx)
+	movl	104(%esp), %ebx                 # 4-byte Reload
+	movl	80(%esp), %eax                  # 4-byte Reload
+	js	.LBB78_11
+.LBB78_12:
+	movl	%eax, 40(%ecx)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	js	.LBB78_13
+.LBB78_14:
 	movl	%edi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2136(%esp), %ecx
-	movl	2604(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	addl	2136(%esp), %edi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2140(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2144(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2148(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2152(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2156(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	2160(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2164(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2168(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2172(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2176(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2180(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2184(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	2188(%esp), %edi
-	adcl	2192(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	adcl	2196(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	2200(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2204(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2064(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	2132(%esp), %eax
-	movl	104(%esp), %edx         # 4-byte Reload
-	addl	2064(%esp), %edx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	2068(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	2072(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2076(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	2080(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	2084(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	2088(%esp), %ebp
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	2092(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2096(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2100(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2104(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	2108(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	2112(%esp), %edi
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	2116(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	2120(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	2124(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	2128(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1992(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1992(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1996(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2000(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2004(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2008(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	2012(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	2016(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2020(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2024(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2028(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2032(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2036(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	2040(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	2044(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2048(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2052(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	2056(%esp), %edi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2060(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1920(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1988(%esp), %eax
-	movl	76(%esp), %edx          # 4-byte Reload
-	addl	1920(%esp), %edx
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1924(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1928(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1932(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1936(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1940(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1944(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1948(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1952(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1956(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1960(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1964(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	1968(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1972(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	1976(%esp), %esi
-	adcl	1980(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1984(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %edi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1848(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1848(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1852(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1856(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1860(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1864(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1868(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1872(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1876(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1880(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1884(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1888(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1892(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1896(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1900(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	1904(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1908(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1912(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1916(%esp), %eax
+	movl	72(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 36(%ecx)
+	js	.LBB78_15
+.LBB78_16:
+	movl	76(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 32(%ecx)
+	js	.LBB78_17
+.LBB78_18:
+	movl	%esi, 28(%ecx)
 	movl	%eax, %edi
-	movl	2600(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1776(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1844(%esp), %eax
-	movl	84(%esp), %edx          # 4-byte Reload
-	addl	1776(%esp), %edx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1780(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1784(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1788(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1792(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1796(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1800(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1804(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1808(%esp), %esi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1812(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1816(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1820(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1824(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1828(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	1832(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1836(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	1840(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1704(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1704(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1708(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1712(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1716(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1720(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1724(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1728(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1732(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1736(%esp), %esi
-	movl	%esi, %ebp
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	1740(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1744(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1748(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1752(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1756(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1760(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1764(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1768(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1772(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1632(%esp), %ecx
-	movl	2596(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	movl	1700(%esp), %eax
-	movl	80(%esp), %edx          # 4-byte Reload
-	addl	1632(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1636(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1640(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1644(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1648(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1652(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1656(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	1660(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	adcl	1664(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1668(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	1672(%esp), %esi
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1676(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1680(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1684(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1688(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	1692(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1696(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edx, %edi
+	js	.LBB78_19
+.LBB78_20:
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 24(%ecx)
+	movl	%edx, %eax
+	js	.LBB78_21
+.LBB78_22:
+	movl	%ebp, 20(%ecx)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	92(%esp), %edx                  # 4-byte Reload
+	js	.LBB78_23
+.LBB78_24:
+	movl	%edx, 16(%ecx)
+	js	.LBB78_25
+.LBB78_26:
+	movl	%edi, 12(%ecx)
+	js	.LBB78_27
+.LBB78_28:
+	movl	%esi, 8(%ecx)
+	js	.LBB78_29
+.LBB78_30:
+	movl	%ebx, 4(%ecx)
+	jns	.LBB78_32
+.LBB78_31:
+	movl	44(%esp), %eax                  # 4-byte Reload
+.LBB78_32:
+	movl	%eax, (%ecx)
+	addl	$2412, %esp                     # imm = 0x96C
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.LBB78_1:
+	movl	%esi, %edi
+	movl	2432(%esp), %ecx
+	movl	%edi, 60(%ecx)
+	jns	.LBB78_4
+.LBB78_3:
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 56(%ecx)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	movl	84(%esp), %esi                  # 4-byte Reload
+	jns	.LBB78_6
+.LBB78_5:
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 52(%ecx)
+	jns	.LBB78_8
+.LBB78_7:
+	movl	36(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 48(%ecx)
+	movl	88(%esp), %ebp                  # 4-byte Reload
+	jns	.LBB78_10
+.LBB78_9:
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%ecx)
+	movl	104(%esp), %ebx                 # 4-byte Reload
+	movl	80(%esp), %eax                  # 4-byte Reload
+	jns	.LBB78_12
+.LBB78_11:
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 40(%ecx)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	jns	.LBB78_14
+.LBB78_13:
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
 	movl	%edi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1560(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1560(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1564(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1568(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1572(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1576(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	1580(%esp), %edi
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	1584(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1588(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1592(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1596(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1600(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1604(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	1608(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1612(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1616(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1620(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1624(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1628(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1488(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1556(%esp), %eax
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	1488(%esp), %ecx
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1492(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	1496(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	1500(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	1504(%esp), %edi
-	adcl	1508(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1512(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	1516(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %edx         # 4-byte Reload
-	adcl	1520(%esp), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	1524(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %edx         # 4-byte Reload
-	adcl	1528(%esp), %edx
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	adcl	1532(%esp), %esi
-	movl	%esi, %ebp
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	1536(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	1540(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	1544(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1548(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	1552(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
+	movl	72(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 36(%ecx)
+	jns	.LBB78_16
+.LBB78_15:
+	movl	8(%esp), %edi                   # 4-byte Reload
+	movl	%edi, 76(%esp)                  # 4-byte Spill
+	movl	76(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 32(%ecx)
+	jns	.LBB78_18
+.LBB78_17:
+	movl	16(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 28(%ecx)
+	movl	%eax, %edi
+	jns	.LBB78_20
+.LBB78_19:
+	movl	28(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 24(%ecx)
+	movl	%edx, %eax
+	jns	.LBB78_22
+.LBB78_21:
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%ecx)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	92(%esp), %edx                  # 4-byte Reload
+	jns	.LBB78_24
+.LBB78_23:
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 16(%ecx)
+	jns	.LBB78_26
+.LBB78_25:
+	movl	56(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%ecx)
+	jns	.LBB78_28
+.LBB78_27:
+	movl	64(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%ecx)
+	jns	.LBB78_30
+.LBB78_29:
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 4(%ecx)
+	js	.LBB78_31
+	jmp	.LBB78_32
+.Lfunc_end78:
+	.size	mcl_fp_montNF16Lbmi2, .Lfunc_end78-mcl_fp_montNF16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRed16Lbmi2           # -- Begin function mcl_fp_montRed16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed16Lbmi2,@function
+mcl_fp_montRed16Lbmi2:                  # @mcl_fp_montRed16Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$1292, %esp                     # imm = 0x50C
+	calll	.L79$pb
+.L79$pb:
+	popl	%ebx
+.Ltmp21:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp21-.L79$pb), %ebx
+	movl	1320(%esp), %ecx
+	movl	60(%ecx), %eax
+	movl	%eax, 140(%esp)                 # 4-byte Spill
+	movl	56(%ecx), %eax
+	movl	%eax, 136(%esp)                 # 4-byte Spill
+	movl	52(%ecx), %eax
+	movl	%eax, 132(%esp)                 # 4-byte Spill
+	movl	48(%ecx), %eax
+	movl	%eax, 128(%esp)                 # 4-byte Spill
+	movl	44(%ecx), %eax
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	movl	36(%ecx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	32(%ecx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	28(%ecx), %eax
+	movl	%eax, 124(%esp)                 # 4-byte Spill
+	movl	24(%ecx), %eax
+	movl	%eax, 120(%esp)                 # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
+	movl	%ecx, %edx
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	movl	60(%eax), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	56(%eax), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	52(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	48(%eax), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	44(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%eax), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	16(%eax), %edi
+	movl	12(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	8(%eax), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	(%eax), %esi
+	movl	4(%eax), %ebp
+	movl	-4(%edx), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	subl	$4, %esp
 	movl	%esi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1416(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1416(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1424(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	1428(%esp), %esi
-	adcl	1432(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	1436(%esp), %edi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1440(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1444(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1448(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1452(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1456(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	1460(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1464(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1472(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1476(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1480(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	1484(%esp), %ebp
-	movl	2600(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1344(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1412(%esp), %eax
-	movl	52(%esp), %edx          # 4-byte Reload
-	addl	1344(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1348(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	1352(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1356(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	1360(%esp), %edi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1364(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1368(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1372(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1376(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1380(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1384(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1388(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1392(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1396(%esp), %esi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1400(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1404(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	1408(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1272(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1272(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1288(%esp), %edi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	1304(%esp), %ebp
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1312(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1320(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1324(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1328(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1200(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1268(%esp), %eax
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	1200(%esp), %ecx
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	1204(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	1208(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	1212(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1216(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	1220(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	1224(%esp), %esi
+	imull	%ecx, %eax
+	leal	1228(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1224(%esp), %esi
 	adcl	1228(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	1232(%esp), %edi
-	movl	112(%esp), %edx         # 4-byte Reload
-	adcl	1236(%esp), %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	1240(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	1244(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	1248(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1252(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	1256(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	1260(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1264(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1232(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1236(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	1240(%esp), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1244(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1248(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1252(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1256(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1260(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1264(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1268(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1272(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1276(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	1280(%esp), %edi
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	1284(%esp), %esi
+	movl	1316(%esp), %eax
+	movl	64(%eax), %eax
+	adcl	1288(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%ebp, %eax
+	leal	1156(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	1216(%esp), %eax
 	adcl	$0, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1128(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1128(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1148(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	1152(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	1160(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	1172(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	1180(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	1188(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1056(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1124(%esp), %edx
-	movl	68(%esp), %eax          # 4-byte Reload
-	addl	1056(%esp), %eax
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1060(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1064(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1068(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1072(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1076(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1080(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1084(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
+	addl	1152(%esp), %ebp
+	movl	56(%esp), %edx                  # 4-byte Reload
+	adcl	1156(%esp), %edx
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	1160(%esp), %ebp
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1164(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1168(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1172(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	1176(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1180(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1184(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1188(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1192(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	adcl	1196(%esp), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1200(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	1204(%esp), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	adcl	1208(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %esi                  # 4-byte Reload
+	adcl	1212(%esp), %esi
+	movl	1316(%esp), %ecx
+	adcl	68(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	60(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	1084(%esp), %ecx
+	pushl	%eax
+	movl	1328(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 60(%esp)                  # 1-byte Folded Spill
+	movl	1144(%esp), %eax
+	adcl	$0, %eax
+	addl	1080(%esp), %edi
+	adcl	1084(%esp), %ebp
+	movl	%ebp, %edx
+	movl	52(%esp), %ecx                  # 4-byte Reload
 	adcl	1088(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
 	adcl	1092(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1096(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1096(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
 	adcl	1100(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	1104(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1104(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
 	adcl	1108(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	1112(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	1116(%esp), %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1112(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1116(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ecx                  # 4-byte Reload
 	adcl	1120(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	984(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	984(%esp), %esi
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	988(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	996(%esp), %ebp
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1044(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	912(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	980(%esp), %eax
-	addl	912(%esp), %esi
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	916(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	920(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	924(%esp), %edi
-	movl	108(%esp), %edx         # 4-byte Reload
-	adcl	928(%esp), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	932(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %edx         # 4-byte Reload
-	adcl	936(%esp), %edx
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %edx         # 4-byte Reload
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1124(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	adcl	1128(%esp), %ebp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1132(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	1136(%esp), %esi
+	movl	%esi, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	1140(%esp), %esi
+	movl	1316(%esp), %ecx
+	adcl	72(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %eax
+	leal	1012(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 20(%esp)                  # 1-byte Folded Spill
+	movl	1072(%esp), %eax
+	adcl	$0, %eax
+	addl	1008(%esp), %edi
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	1012(%esp), %edx
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1016(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1020(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	1024(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1028(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1032(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1036(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1040(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	adcl	1044(%esp), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	1048(%esp), %edi
+	adcl	1052(%esp), %ebp
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1056(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	1060(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	adcl	1064(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	1068(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %ecx
+	adcl	76(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %eax
+	leal	940(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	1000(%esp), %eax
+	adcl	$0, %eax
+	addl	936(%esp), %esi
+	movl	40(%esp), %edx                  # 4-byte Reload
 	adcl	940(%esp), %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	944(%esp), %ebp
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	948(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	952(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	956(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	960(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	964(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	968(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	972(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	976(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	944(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	948(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	952(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	956(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	960(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	964(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ebp                  # 4-byte Reload
+	adcl	968(%esp), %ebp
+	adcl	972(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	976(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	980(%esp), %esi
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	984(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	988(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	992(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	996(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %ecx
+	adcl	80(%ecx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	868(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	928(%esp), %eax
+	adcl	$0, %eax
+	addl	864(%esp), %edi
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	868(%esp), %edx
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	872(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	876(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	880(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	884(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	888(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	892(%esp), %ebp
+	movl	%ebp, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	896(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	900(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	adcl	904(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %esi                  # 4-byte Reload
+	adcl	908(%esp), %esi
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	912(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	916(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	920(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	924(%esp), %ebp
+	movl	1316(%esp), %ecx
+	adcl	84(%ecx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	setb	60(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %eax
+	leal	796(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 60(%esp)                  # 1-byte Folded Spill
+	movl	856(%esp), %eax
 	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	840(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	840(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	852(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	864(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%ebp, %esi
-	adcl	872(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	888(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	768(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	836(%esp), %edx
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	768(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	788(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	792(%esp), %edi
-	adcl	796(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	812(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	828(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	696(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	716(%esp), %esi
-	adcl	720(%esp), %edi
-	movl	%edi, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
+	addl	792(%esp), %edi
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	796(%esp), %edx
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	800(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	804(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	808(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	812(%esp), %edi
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	adcl	816(%esp), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	820(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	824(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	828(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	832(%esp), %esi
+	movl	%esi, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	836(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	840(%esp), %esi
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	844(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	848(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	852(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %ecx
+	adcl	88(%ecx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	setb	56(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %ebp
+	imull	%edx, %eax
+	leal	724(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 56(%esp)                  # 1-byte Folded Spill
+	movl	784(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	720(%esp), %ebp
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	724(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	728(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	732(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	732(%esp), %ebp
+	adcl	736(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
 	adcl	740(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	744(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	748(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	752(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	756(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	756(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	760(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	624(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	692(%esp), %edx
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	624(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	636(%esp), %ebp
-	adcl	640(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	656(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	660(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	552(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	564(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	576(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	764(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	adcl	768(%esp), %esi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	776(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	780(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	92(%eax), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	leal	652(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 52(%esp)                  # 1-byte Folded Spill
+	movl	712(%esp), %eax
+	adcl	$0, %eax
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	addl	648(%esp), %ecx
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	652(%esp), %ecx
+	adcl	656(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	660(%esp), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edx                  # 4-byte Reload
+	adcl	664(%esp), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	668(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	672(%esp), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edx                  # 4-byte Reload
+	adcl	676(%esp), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edx                  # 4-byte Reload
+	adcl	680(%esp), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	684(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %edx                  # 4-byte Reload
+	adcl	688(%esp), %edx
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	adcl	692(%esp), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	696(%esp), %ebp
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	700(%esp), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	704(%esp), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	76(%esp), %edx                  # 4-byte Reload
+	adcl	708(%esp), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %edx
+	adcl	96(%edx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	40(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %esi                  # 4-byte Reload
+	movl	%esi, %eax
+	movl	%ecx, %edi
+	imull	%ecx, %eax
+	leal	580(%esp), %ecx
+	pushl	%eax
+	movl	1328(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 40(%esp)                  # 1-byte Folded Spill
+	movl	640(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	576(%esp), %edi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	580(%esp), %ecx
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	584(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edi                  # 4-byte Reload
 	adcl	588(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	592(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	596(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	600(%esp), %ebp
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	604(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	600(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	604(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	608(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	612(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	616(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	480(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	548(%esp), %edx
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	480(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	500(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	adcl	620(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	624(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	628(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	76(%esp), %ebp                  # 4-byte Reload
+	adcl	632(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	636(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	100(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	%esi, %eax
+	movl	%ecx, %esi
+	imull	%ecx, %eax
+	leal	508(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	568(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	504(%esp), %esi
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	508(%esp), %esi
+	adcl	512(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	516(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	520(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	524(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	528(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	524(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	528(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	532(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	536(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	540(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	544(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	408(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	408(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	420(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	432(%esp), %ebp
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	436(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	556(%esp), %ebp
+	movl	%ebp, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	560(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	564(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	104(%eax), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	436(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	496(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	432(%esp), %esi
+	movl	%edi, %esi
+	adcl	436(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	440(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	444(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	448(%esp), %ebp
+	movl	60(%esp), %eax                  # 4-byte Reload
 	adcl	452(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	456(%esp), %edi
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	460(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	464(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	468(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	472(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	476(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	60(%eax), %eax
-	movl	%eax, (%esp)
-	leal	336(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	404(%esp), %edx
-	movl	108(%esp), %ecx         # 4-byte Reload
-	addl	336(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	344(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	356(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	adcl	360(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	364(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	76(%esp), %eax                  # 4-byte Reload
+	adcl	480(%esp), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	488(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	492(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	108(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	364(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 20(%esp)                  # 1-byte Folded Spill
+	movl	424(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	360(%esp), %esi
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	364(%esp), %esi
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	368(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
 	adcl	372(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	376(%esp), %ebp
+	adcl	380(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	384(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	388(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	392(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	396(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	400(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	264(%esp), %esi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	280(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	292(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	76(%esp), %eax                  # 4-byte Reload
+	adcl	404(%esp), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	408(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	412(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	416(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	420(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	112(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	60(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	292(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 60(%esp)                  # 1-byte Folded Spill
+	movl	352(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	288(%esp), %esi
+	movl	64(%esp), %esi                  # 4-byte Reload
+	adcl	292(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	296(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
 	adcl	300(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	304(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	308(%esp), %edi
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	312(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	316(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	320(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	324(%esp), %ebp
+	movl	76(%esp), %eax                  # 4-byte Reload
 	adcl	328(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	332(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	64(%eax), %eax
-	movl	%eax, (%esp)
-	leal	192(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	260(%esp), %edx
-	movl	100(%esp), %ecx         # 4-byte Reload
-	addl	192(%esp), %ecx
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	204(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	336(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	340(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	116(%eax), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	setb	68(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	220(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 68(%esp)                  # 1-byte Folded Spill
+	movl	280(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	216(%esp), %esi
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	220(%esp), %esi
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	224(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	228(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %edi                  # 4-byte Reload
 	adcl	232(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	adcl	236(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	120(%esp), %esi
-	movl	92(%esp), %esi          # 4-byte Reload
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	128(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	adcl	132(%esp), %esi
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	136(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	140(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	148(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	236(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	240(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	244(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	248(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	adcl	252(%esp), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	256(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	260(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	264(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	268(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	272(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	276(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %ecx
+	adcl	120(%ecx), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	setb	56(%esp)                        # 1-byte Folded Spill
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%esi, %ecx
+	subl	$4, %esp
+	leal	148(%esp), %eax
+	pushl	%ecx
+	pushl	1328(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 56(%esp)                  # 1-byte Folded Spill
+	movl	208(%esp), %ebp
+	adcl	$0, %ebp
+	addl	144(%esp), %esi
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	148(%esp), %ecx
+	movl	20(%esp), %edx                  # 4-byte Reload
 	adcl	152(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	156(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	160(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	164(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	168(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	172(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	176(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	180(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %edx         # 4-byte Reload
-	adcl	184(%esp), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	188(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	%eax, %edx
-	movl	2604(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %ebp
-	movl	%esi, %ebx
-	sbbl	8(%edi), %ebx
-	movl	104(%esp), %ecx         # 4-byte Reload
-	sbbl	12(%edi), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	sbbl	16(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	sbbl	20(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	sbbl	24(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	sbbl	44(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	48(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	sbbl	52(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	sbbl	56(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	60(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	sbbl	64(%edi), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	sarl	$31, %eax
-	testl	%eax, %eax
-	movl	116(%esp), %edi         # 4-byte Reload
-	js	.LBB259_2
-# BB#1:
-	movl	%edx, %edi
-.LBB259_2:
-	movl	2592(%esp), %edx
-	movl	%edi, (%edx)
-	movl	112(%esp), %edi         # 4-byte Reload
-	js	.LBB259_4
-# BB#3:
-	movl	%ebp, %edi
-.LBB259_4:
-	movl	%edi, 4(%edx)
-	js	.LBB259_6
-# BB#5:
-	movl	%ebx, %esi
-.LBB259_6:
-	movl	%esi, 8(%edx)
-	movl	104(%esp), %esi         # 4-byte Reload
-	js	.LBB259_8
-# BB#7:
-	movl	%ecx, %esi
-.LBB259_8:
-	movl	%esi, 12(%edx)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	js	.LBB259_10
-# BB#9:
-	movl	4(%esp), %ecx           # 4-byte Reload
-.LBB259_10:
-	movl	%ecx, 16(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	js	.LBB259_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB259_12:
-	movl	%eax, 20(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	js	.LBB259_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB259_14:
-	movl	%eax, 24(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB259_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB259_16:
-	movl	%eax, 28(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB259_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB259_18:
-	movl	%eax, 32(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB259_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB259_20:
-	movl	%eax, 36(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	js	.LBB259_22
-# BB#21:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB259_22:
-	movl	%eax, 40(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB259_24
-# BB#23:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB259_24:
-	movl	%eax, 44(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB259_26
-# BB#25:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB259_26:
-	movl	%eax, 48(%edx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	js	.LBB259_28
-# BB#27:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB259_28:
-	movl	%eax, 52(%edx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB259_30
-# BB#29:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB259_30:
-	movl	%eax, 56(%edx)
-	movl	108(%esp), %eax         # 4-byte Reload
-	js	.LBB259_32
-# BB#31:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB259_32:
-	movl	%eax, 60(%edx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	js	.LBB259_34
-# BB#33:
-	movl	92(%esp), %eax          # 4-byte Reload
-.LBB259_34:
-	movl	%eax, 64(%edx)
-	addl	$2572, %esp             # imm = 0xA0C
+	movl	%edi, %esi
+	adcl	156(%esp), %esi
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	160(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	164(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	168(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	76(%esp), %ebx                  # 4-byte Reload
+	adcl	176(%esp), %ebx
+	movl	%ebx, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	180(%esp), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	184(%esp), %ebx
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	adcl	188(%esp), %ebx
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	adcl	192(%esp), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	196(%esp), %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	adcl	200(%esp), %ebx
+	movl	%ebx, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebx                  # 4-byte Reload
+	adcl	204(%esp), %ebx
+	movl	%ebx, 44(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %ebx
+	adcl	124(%ebx), %ebp
+	xorl	%ebx, %ebx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	subl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 84(%esp)                  # 4-byte Spill
+	movl	%edx, %ecx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	sbbl	88(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 88(%esp)                  # 4-byte Spill
+	movl	%esi, %ecx
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	sbbl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	movl	%edi, %ecx
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	sbbl	96(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	sbbl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 100(%esp)                 # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	sbbl	104(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 104(%esp)                 # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	sbbl	120(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	76(%esp), %eax                  # 4-byte Reload
+	sbbl	124(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	108(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	80(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	sbbl	112(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	116(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	sbbl	128(%esp), %edx                 # 4-byte Folded Reload
+	movl	64(%esp), %esi                  # 4-byte Reload
+	sbbl	132(%esp), %esi                 # 4-byte Folded Reload
+	movl	44(%esp), %edi                  # 4-byte Reload
+	sbbl	136(%esp), %edi                 # 4-byte Folded Reload
+	movl	%ebp, %ecx
+	sbbl	140(%esp), %ecx                 # 4-byte Folded Reload
+	sbbl	%ebx, %ebx
+	testb	$1, %bl
+	je	.LBB79_2
+# %bb.1:
+	movl	%ebp, %ecx
+.LBB79_2:
+	movl	1312(%esp), %eax
+	movl	%ecx, 60(%eax)
+	je	.LBB79_4
+# %bb.3:
+	movl	44(%esp), %edi                  # 4-byte Reload
+.LBB79_4:
+	movl	%edi, 56(%eax)
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	movl	100(%esp), %ebx                 # 4-byte Reload
+	movl	104(%esp), %ebp                 # 4-byte Reload
+	je	.LBB79_6
+# %bb.5:
+	movl	64(%esp), %esi                  # 4-byte Reload
+.LBB79_6:
+	movl	%esi, 52(%eax)
+	je	.LBB79_8
+# %bb.7:
+	movl	16(%esp), %edx                  # 4-byte Reload
+.LBB79_8:
+	movl	%edx, 48(%eax)
+	movl	92(%esp), %edi                  # 4-byte Reload
+	movl	112(%esp), %esi                 # 4-byte Reload
+	movl	116(%esp), %edx                 # 4-byte Reload
+	jne	.LBB79_9
+# %bb.10:
+	movl	%edx, 44(%eax)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	jne	.LBB79_11
+.LBB79_12:
+	movl	%esi, 40(%eax)
+	jne	.LBB79_13
+.LBB79_14:
+	movl	80(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 36(%eax)
+	jne	.LBB79_15
+.LBB79_16:
+	movl	%edx, 32(%eax)
+	je	.LBB79_18
+.LBB79_17:
+	movl	76(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+.LBB79_18:
+	movl	%ecx, %edx
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	%edi, %esi
+	jne	.LBB79_19
+# %bb.20:
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	jne	.LBB79_21
+.LBB79_22:
+	movl	%ebp, 20(%eax)
+	movl	%edx, %ecx
+	jne	.LBB79_23
+.LBB79_24:
+	movl	%ebx, 16(%eax)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	jne	.LBB79_25
+.LBB79_26:
+	movl	%edi, 12(%eax)
+	jne	.LBB79_27
+.LBB79_28:
+	movl	%esi, 8(%eax)
+	jne	.LBB79_29
+.LBB79_30:
+	movl	%edx, 4(%eax)
+	je	.LBB79_32
+.LBB79_31:
+	movl	60(%esp), %ecx                  # 4-byte Reload
+.LBB79_32:
+	movl	%ecx, (%eax)
+	addl	$1292, %esp                     # imm = 0x50C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end259:
-	.size	mcl_fp_montNF17Lbmi2, .Lfunc_end259-mcl_fp_montNF17Lbmi2
-
-	.globl	mcl_fp_montRed17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_montRed17Lbmi2,@function
-mcl_fp_montRed17Lbmi2:                  # @mcl_fp_montRed17Lbmi2
-# BB#0:
+.LBB79_9:
+	movl	24(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 44(%eax)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	je	.LBB79_12
+.LBB79_11:
+	movl	32(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 40(%eax)
+	je	.LBB79_14
+.LBB79_13:
+	movl	28(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 80(%esp)                  # 4-byte Spill
+	movl	80(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 36(%eax)
+	je	.LBB79_16
+.LBB79_15:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 32(%eax)
+	jne	.LBB79_17
+	jmp	.LBB79_18
+.LBB79_19:
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	je	.LBB79_22
+.LBB79_21:
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%eax)
+	movl	%edx, %ecx
+	je	.LBB79_24
+.LBB79_23:
+	movl	40(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 16(%eax)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	je	.LBB79_26
+.LBB79_25:
+	movl	52(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%eax)
+	je	.LBB79_28
+.LBB79_27:
+	movl	56(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%eax)
+	je	.LBB79_30
+.LBB79_29:
+	movl	20(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	jne	.LBB79_31
+	jmp	.LBB79_32
+.Lfunc_end79:
+	.size	mcl_fp_montRed16Lbmi2, .Lfunc_end79-mcl_fp_montRed16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_montRedNF16Lbmi2         # -- Begin function mcl_fp_montRedNF16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF16Lbmi2,@function
+mcl_fp_montRedNF16Lbmi2:                # @mcl_fp_montRedNF16Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$1436, %esp             # imm = 0x59C
-	calll	.L260$pb
-.L260$pb:
-	popl	%eax
-.Ltmp61:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp61-.L260$pb), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1464(%esp), %edx
-	movl	-4(%edx), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	1460(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	imull	%esi, %ebx
-	movl	132(%ecx), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	128(%ecx), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	124(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%ecx), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%ecx), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	112(%ecx), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	108(%ecx), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	104(%ecx), %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	movl	100(%ecx), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	96(%ecx), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	92(%ecx), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	88(%ecx), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	84(%ecx), %esi
-	movl	%esi, 180(%esp)         # 4-byte Spill
-	movl	80(%ecx), %edi
-	movl	%edi, 196(%esp)         # 4-byte Spill
-	movl	76(%ecx), %esi
-	movl	%esi, 192(%esp)         # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	%esi, 204(%esp)         # 4-byte Spill
-	movl	68(%ecx), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	64(%ecx), %ebp
-	movl	%ebp, 176(%esp)         # 4-byte Spill
-	movl	60(%ecx), %ebp
-	movl	%ebp, 164(%esp)         # 4-byte Spill
+	subl	$1276, %esp                     # imm = 0x4FC
+	calll	.L80$pb
+.L80$pb:
+	popl	%ebx
+.Ltmp22:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp22-.L80$pb), %ebx
+	movl	1304(%esp), %ecx
+	movl	60(%ecx), %eax
+	movl	%eax, 124(%esp)                 # 4-byte Spill
 	movl	56(%ecx), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
+	movl	%eax, 120(%esp)                 # 4-byte Spill
 	movl	52(%ecx), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
+	movl	%eax, 116(%esp)                 # 4-byte Spill
 	movl	48(%ecx), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
+	movl	%eax, 112(%esp)                 # 4-byte Spill
 	movl	44(%ecx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
+	movl	%eax, 108(%esp)                 # 4-byte Spill
 	movl	40(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
+	movl	%eax, 104(%esp)                 # 4-byte Spill
 	movl	36(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
+	movl	%eax, 96(%esp)                  # 4-byte Spill
 	movl	32(%ecx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
+	movl	%eax, 92(%esp)                  # 4-byte Spill
 	movl	28(%ecx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
 	movl	24(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	20(%ecx), %ebp
-	movl	16(%ecx), %esi
-	movl	12(%ecx), %edi
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
 	movl	8(%ecx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	64(%edx), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%edx), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	1360(%esp), %ecx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	76(%esp), %eax          # 4-byte Reload
-	addl	1360(%esp), %eax
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1364(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	1372(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	1376(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	1380(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1384(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1400(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1404(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1408(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1412(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	1416(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1424(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1428(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	adcl	$0, 204(%esp)           # 4-byte Folded Spill
-	adcl	$0, 192(%esp)           # 4-byte Folded Spill
-	adcl	$0, 196(%esp)           # 4-byte Folded Spill
-	adcl	$0, 180(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	movl	128(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1288(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	1288(%esp), %esi
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	1292(%esp), %edx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1312(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1320(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1324(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1328(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	1336(%esp), %edi
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	adcl	$0, 192(%esp)           # 4-byte Folded Spill
-	adcl	$0, 196(%esp)           # 4-byte Folded Spill
-	adcl	$0, 180(%esp)           # 4-byte Folded Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, %esi
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 128(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1216(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1216(%esp), %ebp
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1220(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
+	movl	%ecx, %edx
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	movl	60(%eax), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	56(%eax), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	52(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	48(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	44(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	16(%eax), %edi
+	movl	12(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%eax), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	(%eax), %esi
+	movl	4(%eax), %ebp
+	movl	-4(%edx), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	%esi, %eax
+	imull	%ecx, %eax
+	leal	1212(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addl	1208(%esp), %esi
+	adcl	1212(%esp), %ebp
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1216(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1220(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	1224(%esp), %edi
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	1228(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	1232(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	1236(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	1240(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	1244(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	1248(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	1252(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1256(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	1260(%esp), %edi
-	movl	%edi, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %edi         # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1260(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
 	adcl	1264(%esp), %edi
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	1268(%esp), %esi
+	movl	1300(%esp), %eax
+	movl	64(%eax), %eax
 	adcl	1272(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	adcl	$0, 196(%esp)           # 4-byte Folded Spill
-	adcl	$0, 180(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 184(%esp)         # 4-byte Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	movl	144(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1144(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1144(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	setb	8(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%ebp, %eax
+	leal	1140(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 8(%esp)                   # 1-byte Folded Spill
+	movl	1200(%esp), %eax
+	adcl	$0, %eax
+	addl	1136(%esp), %ebp
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	1140(%esp), %edx
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	1144(%esp), %ebp
+	movl	56(%esp), %ecx                  # 4-byte Reload
 	adcl	1148(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1152(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1156(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	1160(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	1164(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1168(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1172(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1176(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	1180(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1184(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
 	adcl	1188(%esp), %edi
-	movl	%edi, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1200(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1204(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1208(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	1212(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	adcl	$0, 180(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	movl	188(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 144(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1072(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1072(%esp), %esi
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1076(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	adcl	1192(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	1196(%esp), %esi
+	movl	1300(%esp), %ecx
+	adcl	68(%ecx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	1068(%esp), %ecx
+	pushl	%eax
+	movl	1312(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	1128(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	1064(%esp), %edi
+	adcl	1068(%esp), %ebp
+	movl	%ebp, %ecx
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1072(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1076(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	1080(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	1084(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	1088(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	1092(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	1096(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	1100(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1104(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	1108(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	1112(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	1116(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 188(%esp)         # 4-byte Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	movl	172(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	movl	152(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1000(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1000(%esp), %esi
-	movl	84(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	1120(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	1124(%esp), %esi
+	movl	1300(%esp), %eax
+	adcl	72(%eax), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%ecx, %edi
+	imull	%ecx, %eax
+	leal	996(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	1056(%esp), %eax
+	adcl	$0, %eax
+	addl	992(%esp), %edi
+	movl	56(%esp), %edx                  # 4-byte Reload
+	adcl	996(%esp), %edx
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1000(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
 	adcl	1004(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1044(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 172(%esp)         # 4-byte Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 152(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	928(%esp), %esi
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	932(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	936(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	movl	160(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	856(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	856(%esp), %esi
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	1008(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	1012(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1016(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1020(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1024(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	1028(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	1032(%esp), %edi
+	adcl	1036(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1040(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1044(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	1048(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	adcl	1052(%esp), %esi
+	movl	1300(%esp), %ecx
+	adcl	76(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %ebp
+	leal	924(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 12(%esp)                  # 1-byte Folded Spill
+	movl	984(%esp), %eax
+	adcl	$0, %eax
+	addl	920(%esp), %ebp
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	924(%esp), %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	928(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	932(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	936(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	940(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	944(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	948(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	952(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	956(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	960(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	964(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	968(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	972(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	976(%esp), %esi
+	movl	%esi, %ebp
+	movl	56(%esp), %edi                  # 4-byte Reload
+	adcl	980(%esp), %edi
+	movl	1300(%esp), %ecx
+	adcl	80(%ecx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %eax
+	leal	852(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 52(%esp)                  # 1-byte Folded Spill
+	movl	912(%esp), %eax
+	adcl	$0, %eax
+	addl	848(%esp), %esi
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	852(%esp), %edx
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	856(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
 	adcl	860(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	adcl	924(%esp), %ebp
-	movl	%ebp, 168(%esp)         # 4-byte Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 160(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	movl	96(%esp), %ebp          # 4-byte Reload
-	imull	%ebp, %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	784(%esp), %edi
-	movl	108(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	864(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	868(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	872(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	876(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	880(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	884(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	888(%esp), %esi
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	892(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	896(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	900(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	adcl	904(%esp), %edi
+	movl	%edi, %ebp
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	908(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %ecx
+	adcl	84(%ecx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	setb	8(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %eax
+	leal	780(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 8(%esp)                   # 1-byte Folded Spill
+	movl	840(%esp), %eax
+	adcl	$0, %eax
+	addl	776(%esp), %edi
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	780(%esp), %edx
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	784(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
 	adcl	788(%esp), %ecx
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	%ebp, %eax
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	712(%esp), %esi
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	716(%esp), %ecx
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %ebp         # 4-byte Reload
-	adcl	760(%esp), %ebp
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	adcl	780(%esp), %edi
-	movl	%edi, 156(%esp)         # 4-byte Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	640(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	640(%esp), %esi
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	644(%esp), %ecx
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	792(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	796(%esp), %edi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	800(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	804(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	808(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	812(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	816(%esp), %esi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	820(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	824(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	828(%esp), %ebp
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	832(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	836(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %ecx
+	adcl	88(%ecx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %ebp
+	imull	%edx, %eax
+	leal	708(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	768(%esp), %eax
+	adcl	$0, %eax
+	addl	704(%esp), %ebp
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	708(%esp), %edx
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	712(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	716(%esp), %ebp
+	adcl	720(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	724(%esp), %edi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	728(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	732(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	736(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	740(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	744(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	748(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	752(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	756(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	760(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	764(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	1300(%esp), %ecx
+	adcl	92(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	4(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %esi
+	leal	636(%esp), %ecx
+	pushl	%eax
+	movl	1312(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 4(%esp)                   # 1-byte Folded Spill
+	movl	696(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	632(%esp), %esi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	636(%esp), %ecx
+	adcl	640(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	644(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	648(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	652(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	656(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	660(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	664(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	668(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %edi         # 4-byte Reload
-	adcl	672(%esp), %edi
-	movl	192(%esp), %esi         # 4-byte Reload
-	adcl	676(%esp), %esi
-	movl	196(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	672(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	adcl	676(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	680(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	adcl	684(%esp), %ebp
-	movl	%ebp, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	684(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	688(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	692(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	1464(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	96(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	setb	48(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%ecx, %esi
+	imull	%ecx, %eax
+	leal	564(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 48(%esp)                  # 1-byte Folded Spill
+	movl	624(%esp), %eax
+	adcl	$0, %eax
 	movl	%eax, %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	568(%esp), %ebp
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	572(%esp), %ecx
-	movl	136(%esp), %eax         # 4-byte Reload
+	addl	560(%esp), %esi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	564(%esp), %ecx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	568(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	572(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	576(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	580(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	584(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %ebp         # 4-byte Reload
-	adcl	588(%esp), %ebp
-	movl	200(%esp), %eax         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	588(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	592(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	adcl	596(%esp), %edi
-	movl	%edi, 204(%esp)         # 4-byte Spill
-	adcl	600(%esp), %esi
-	movl	%esi, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %esi         # 4-byte Reload
-	adcl	604(%esp), %esi
-	movl	180(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	596(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	adcl	600(%esp), %ebp
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	604(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	608(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	612(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	616(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	620(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	100(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	setb	4(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
 	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	496(%esp), %edi
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	500(%esp), %edi
-	movl	148(%esp), %eax         # 4-byte Reload
+	leal	492(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 4(%esp)                   # 1-byte Folded Spill
+	movl	552(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	488(%esp), %edi
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	492(%esp), %ecx
+	adcl	496(%esp), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	500(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	504(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	508(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	adcl	512(%esp), %ebp
-	movl	%ebp, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %ebp         # 4-byte Reload
-	adcl	516(%esp), %ebp
-	movl	204(%esp), %eax         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	516(%esp), %edi
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	520(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	524(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	adcl	528(%esp), %esi
-	movl	%esi, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	528(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	532(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	536(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	540(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	544(%esp), %ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	548(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	movl	%edi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	424(%esp), %edi
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	428(%esp), %ecx
-	movl	164(%esp), %edi         # 4-byte Reload
-	adcl	432(%esp), %edi
-	movl	176(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	104(%eax), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %esi
+	leal	420(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	480(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	416(%esp), %esi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	420(%esp), %ecx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	432(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	436(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	adcl	440(%esp), %ebp
-	movl	204(%esp), %eax         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	440(%esp), %edi
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	444(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	448(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	452(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	456(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	460(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	464(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	468(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	472(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	476(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	108(%eax), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
 	movl	%ecx, %esi
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	352(%esp), %esi
-	movl	%edi, %ecx
-	adcl	356(%esp), %ecx
-	movl	176(%esp), %eax         # 4-byte Reload
+	leal	348(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	408(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	344(%esp), %esi
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	348(%esp), %esi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	356(%esp), %ebp
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	360(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	adcl	364(%esp), %ebp
-	movl	%ebp, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	364(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	368(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	372(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	376(%esp), %edi
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	380(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %edi         # 4-byte Reload
-	adcl	384(%esp), %edi
-	movl	188(%esp), %eax         # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	384(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	388(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	392(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	396(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	400(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	404(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	280(%esp), %ebp
-	movl	176(%esp), %ecx         # 4-byte Reload
-	adcl	284(%esp), %ecx
-	movl	200(%esp), %eax         # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	112(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	setb	8(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	276(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 8(%esp)                   # 1-byte Folded Spill
+	movl	336(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	272(%esp), %esi
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	276(%esp), %ecx
+	adcl	280(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	284(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	288(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	adcl	292(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	296(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	adcl	308(%esp), %edi
-	movl	%edi, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	300(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	304(%esp), %edi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	312(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	316(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	320(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	324(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	328(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	332(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, %esi
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	116(%eax), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
 	imull	%ecx, %eax
 	movl	%ecx, %ebp
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	208(%esp), %ebp
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %edx         # 4-byte Reload
-	adcl	216(%esp), %edx
-	movl	%edx, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %ecx         # 4-byte Reload
+	leal	204(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, 52(%esp)                  # 1-byte Folded Spill
+	movl	264(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	200(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	204(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	208(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	212(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	216(%esp), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
 	adcl	220(%esp), %ecx
-	movl	%ecx, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	%eax, %ebp
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %ebx         # 4-byte Reload
-	adcl	264(%esp), %ebx
-	movl	%ebx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	276(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	200(%esp), %edi         # 4-byte Reload
-	subl	16(%esp), %edi          # 4-byte Folded Reload
-	sbbl	4(%esp), %edx           # 4-byte Folded Reload
-	sbbl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	196(%esp), %eax         # 4-byte Reload
-	sbbl	12(%esp), %eax          # 4-byte Folded Reload
-	sbbl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	184(%esp), %esi         # 4-byte Reload
-	sbbl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	188(%esp), %esi         # 4-byte Reload
-	sbbl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	168(%esp), %esi         # 4-byte Reload
-	sbbl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	172(%esp), %ebp         # 4-byte Reload
-	sbbl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	156(%esp), %ebp         # 4-byte Reload
-	sbbl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	160(%esp), %ebp         # 4-byte Reload
-	sbbl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	152(%esp), %ebp         # 4-byte Reload
-	sbbl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	144(%esp), %ebp         # 4-byte Reload
-	sbbl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 136(%esp)         # 4-byte Spill
-	sbbl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 140(%esp)         # 4-byte Spill
-	movl	132(%esp), %ebx         # 4-byte Reload
-	sbbl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 148(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebx         # 4-byte Reload
-	sbbl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 164(%esp)         # 4-byte Spill
-	movl	116(%esp), %ebx         # 4-byte Reload
-	sbbl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 176(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebx         # 4-byte Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB260_2
-# BB#1:
-	movl	%esi, 168(%esp)         # 4-byte Spill
-.LBB260_2:
-	testb	%bl, %bl
-	movl	200(%esp), %esi         # 4-byte Reload
-	jne	.LBB260_4
-# BB#3:
-	movl	%edi, %esi
-.LBB260_4:
-	movl	1456(%esp), %edi
-	movl	%esi, (%edi)
-	movl	156(%esp), %esi         # 4-byte Reload
-	movl	204(%esp), %ebx         # 4-byte Reload
-	jne	.LBB260_6
-# BB#5:
-	movl	%edx, %ebx
-.LBB260_6:
-	movl	%ebx, 4(%edi)
-	movl	144(%esp), %ebx         # 4-byte Reload
-	movl	192(%esp), %edx         # 4-byte Reload
-	jne	.LBB260_8
-# BB#7:
-	movl	%ecx, %edx
-.LBB260_8:
-	movl	%edx, 8(%edi)
-	movl	132(%esp), %edx         # 4-byte Reload
-	movl	196(%esp), %ecx         # 4-byte Reload
-	jne	.LBB260_10
-# BB#9:
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	224(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	228(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ebp                    # 4-byte Reload
+	adcl	232(%esp), %ebp
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	236(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	240(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	244(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	248(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	252(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	256(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	260(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %ecx
+	adcl	120(%ecx), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	setb	(%esp)                          # 1-byte Folded Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	imull	%eax, %ecx
+	movl	%eax, %esi
+	subl	$4, %esp
+	leal	132(%esp), %eax
+	pushl	%ecx
+	pushl	1312(%esp)
+	pushl	%eax
+	calll	mulPv512x32bmi2@PLT
+	addl	$12, %esp
+	addb	$255, (%esp)                    # 1-byte Folded Spill
+	movl	192(%esp), %eax
+	adcl	$0, %eax
+	addl	128(%esp), %esi
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	132(%esp), %ecx
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	136(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	140(%esp), %edx
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	144(%esp), %esi
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	148(%esp), %edi
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	152(%esp), %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	adcl	156(%esp), %ebp
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	160(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	164(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	168(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	172(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	176(%esp), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	180(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	184(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	188(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	1300(%esp), %ebp
+	adcl	124(%ebp), %ebx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	subl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	sbbl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	sbbl	80(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	sbbl	84(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 84(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	sbbl	88(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	92(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	sbbl	104(%esp), %eax                 # 4-byte Folded Reload
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	sbbl	108(%esp), %ebp                 # 4-byte Folded Reload
+	movl	12(%esp), %edx                  # 4-byte Reload
+	sbbl	112(%esp), %edx                 # 4-byte Folded Reload
+	movl	48(%esp), %esi                  # 4-byte Reload
+	sbbl	116(%esp), %esi                 # 4-byte Folded Reload
+	movl	8(%esp), %edi                   # 4-byte Reload
+	sbbl	120(%esp), %edi                 # 4-byte Folded Reload
+	movl	%ebx, %ecx
+	sbbl	124(%esp), %ecx                 # 4-byte Folded Reload
+	testl	%ecx, %ecx
+	js	.LBB80_1
+# %bb.2:
+	movl	1296(%esp), %ebx
+	movl	%ecx, 60(%ebx)
+	js	.LBB80_3
+.LBB80_4:
+	movl	%edi, 56(%ebx)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	js	.LBB80_5
+.LBB80_6:
+	movl	%esi, 52(%ebx)
+	movl	84(%esp), %edi                  # 4-byte Reload
+	js	.LBB80_7
+.LBB80_8:
+	movl	%edx, 48(%ebx)
+	movl	80(%esp), %esi                  # 4-byte Reload
+	js	.LBB80_9
+.LBB80_10:
+	movl	%ebp, 44(%ebx)
+	js	.LBB80_11
+.LBB80_12:
+	movl	%eax, 40(%ebx)
+	movl	88(%esp), %ebp                  # 4-byte Reload
+	movl	96(%esp), %eax                  # 4-byte Reload
+	js	.LBB80_13
+.LBB80_14:
+	movl	%eax, 36(%ebx)
+	movl	68(%esp), %edx                  # 4-byte Reload
+	movl	92(%esp), %eax                  # 4-byte Reload
+	js	.LBB80_15
+.LBB80_16:
+	movl	%eax, 32(%ebx)
+	js	.LBB80_17
+.LBB80_18:
+	movl	%ecx, %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%ebx)
+	js	.LBB80_19
+.LBB80_20:
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%ebx)
+	js	.LBB80_21
+.LBB80_22:
+	movl	%ebp, 20(%ebx)
 	movl	%eax, %ecx
-.LBB260_10:
-	movl	%ecx, 12(%edi)
-	movl	124(%esp), %ecx         # 4-byte Reload
-	movl	180(%esp), %eax         # 4-byte Reload
-	jne	.LBB260_12
-# BB#11:
-	movl	88(%esp), %eax          # 4-byte Reload
-.LBB260_12:
-	movl	%eax, 16(%edi)
-	movl	188(%esp), %eax         # 4-byte Reload
-	movl	184(%esp), %ebp         # 4-byte Reload
-	jne	.LBB260_14
-# BB#13:
-	movl	92(%esp), %ebp          # 4-byte Reload
-.LBB260_14:
-	movl	%ebp, 20(%edi)
-	movl	152(%esp), %ebp         # 4-byte Reload
-	jne	.LBB260_16
-# BB#15:
-	movl	96(%esp), %eax          # 4-byte Reload
-.LBB260_16:
-	movl	%eax, 24(%edi)
-	movl	168(%esp), %eax         # 4-byte Reload
-	movl	%eax, 28(%edi)
-	jne	.LBB260_18
-# BB#17:
-	movl	104(%esp), %eax         # 4-byte Reload
-	movl	%eax, 172(%esp)         # 4-byte Spill
-.LBB260_18:
-	movl	172(%esp), %eax         # 4-byte Reload
-	movl	%eax, 32(%edi)
-	jne	.LBB260_20
-# BB#19:
-	movl	108(%esp), %esi         # 4-byte Reload
-.LBB260_20:
-	movl	%esi, 36(%edi)
-	jne	.LBB260_22
-# BB#21:
-	movl	112(%esp), %eax         # 4-byte Reload
-	movl	%eax, 160(%esp)         # 4-byte Spill
-.LBB260_22:
-	movl	160(%esp), %esi         # 4-byte Reload
-	movl	%esi, 40(%edi)
-	movl	128(%esp), %eax         # 4-byte Reload
-	jne	.LBB260_24
-# BB#23:
-	movl	120(%esp), %ebp         # 4-byte Reload
-.LBB260_24:
-	movl	%ebp, 44(%edi)
-	jne	.LBB260_26
-# BB#25:
-	movl	136(%esp), %ebx         # 4-byte Reload
-.LBB260_26:
-	movl	%ebx, 48(%edi)
-	jne	.LBB260_28
-# BB#27:
-	movl	140(%esp), %eax         # 4-byte Reload
-.LBB260_28:
-	movl	%eax, 52(%edi)
-	jne	.LBB260_30
-# BB#29:
-	movl	148(%esp), %edx         # 4-byte Reload
-.LBB260_30:
-	movl	%edx, 56(%edi)
-	movl	116(%esp), %eax         # 4-byte Reload
-	jne	.LBB260_32
-# BB#31:
-	movl	164(%esp), %ecx         # 4-byte Reload
-.LBB260_32:
-	movl	%ecx, 60(%edi)
-	jne	.LBB260_34
-# BB#33:
-	movl	176(%esp), %eax         # 4-byte Reload
-.LBB260_34:
-	movl	%eax, 64(%edi)
-	addl	$1436, %esp             # imm = 0x59C
+	js	.LBB80_23
+.LBB80_24:
+	movl	%edi, 16(%ebx)
+	movl	76(%esp), %eax                  # 4-byte Reload
+	js	.LBB80_25
+.LBB80_26:
+	movl	%esi, 12(%ebx)
+	js	.LBB80_27
+.LBB80_28:
+	movl	%eax, 8(%ebx)
+	js	.LBB80_29
+.LBB80_30:
+	movl	%ecx, 4(%ebx)
+	jns	.LBB80_32
+.LBB80_31:
+	movl	44(%esp), %edx                  # 4-byte Reload
+.LBB80_32:
+	movl	%edx, (%ebx)
+	addl	$1276, %esp                     # imm = 0x4FC
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end260:
-	.size	mcl_fp_montRed17Lbmi2, .Lfunc_end260-mcl_fp_montRed17Lbmi2
-
-	.globl	mcl_fp_addPre17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addPre17Lbmi2,@function
-mcl_fp_addPre17Lbmi2:                   # @mcl_fp_addPre17Lbmi2
-# BB#0:
+.LBB80_1:
+	movl	%ebx, %ecx
+	movl	1296(%esp), %ebx
+	movl	%ecx, 60(%ebx)
+	jns	.LBB80_4
+.LBB80_3:
+	movl	8(%esp), %edi                   # 4-byte Reload
+	movl	%edi, 56(%ebx)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB80_6
+.LBB80_5:
+	movl	48(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 52(%ebx)
+	movl	84(%esp), %edi                  # 4-byte Reload
+	jns	.LBB80_8
+.LBB80_7:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 48(%ebx)
+	movl	80(%esp), %esi                  # 4-byte Reload
+	jns	.LBB80_10
+.LBB80_9:
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 44(%ebx)
+	jns	.LBB80_12
+.LBB80_11:
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 40(%ebx)
+	movl	88(%esp), %ebp                  # 4-byte Reload
+	movl	96(%esp), %eax                  # 4-byte Reload
+	jns	.LBB80_14
+.LBB80_13:
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 36(%ebx)
+	movl	68(%esp), %edx                  # 4-byte Reload
+	movl	92(%esp), %eax                  # 4-byte Reload
+	jns	.LBB80_16
+.LBB80_15:
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%ebx)
+	jns	.LBB80_18
+.LBB80_17:
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%ebx)
+	jns	.LBB80_20
+.LBB80_19:
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%ebx)
+	jns	.LBB80_22
+.LBB80_21:
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%ebx)
+	movl	%eax, %ecx
+	jns	.LBB80_24
+.LBB80_23:
+	movl	40(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 16(%ebx)
+	movl	76(%esp), %eax                  # 4-byte Reload
+	jns	.LBB80_26
+.LBB80_25:
+	movl	56(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 12(%ebx)
+	jns	.LBB80_28
+.LBB80_27:
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebx)
+	jns	.LBB80_30
+.LBB80_29:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%ebx)
+	js	.LBB80_31
+	jmp	.LBB80_32
+.Lfunc_end80:
+	.size	mcl_fp_montRedNF16Lbmi2, .Lfunc_end80-mcl_fp_montRedNF16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addPre16Lbmi2            # -- Begin function mcl_fp_addPre16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre16Lbmi2,@function
+mcl_fp_addPre16Lbmi2:                   # @mcl_fp_addPre16Lbmi2
+# %bb.0:
+	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %ebx
-	adcl	8(%ecx), %ebx
-	movl	16(%esp), %edi
-	movl	%edx, (%edi)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%edi)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%ebx, 8(%edi)
-	movl	20(%eax), %ebx
-	movl	%edx, 12(%edi)
-	movl	20(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	24(%eax), %ebx
-	movl	%esi, 16(%edi)
-	movl	24(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	28(%eax), %ebx
-	movl	%edx, 20(%edi)
-	movl	28(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	32(%eax), %ebx
-	movl	%esi, 24(%edi)
-	movl	32(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	36(%eax), %ebx
-	movl	%edx, 28(%edi)
-	movl	36(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	40(%eax), %ebx
-	movl	%esi, 32(%edi)
-	movl	40(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	44(%eax), %ebx
-	movl	%edx, 36(%edi)
-	movl	44(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	48(%eax), %ebx
-	movl	%esi, 40(%edi)
-	movl	48(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	52(%eax), %ebx
-	movl	%edx, 44(%edi)
-	movl	52(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	56(%eax), %ebx
-	movl	%esi, 48(%edi)
-	movl	56(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	60(%eax), %ebx
-	movl	%edx, 52(%edi)
-	movl	60(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	%esi, 56(%edi)
-	movl	%edx, 60(%edi)
-	movl	64(%eax), %eax
-	movl	64(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 64(%edi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
+	subl	$64, %esp
+	movl	88(%esp), %edi
+	movl	(%edi), %eax
+	movl	4(%edi), %ecx
+	movl	92(%esp), %esi
+	addl	(%esi), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	4(%esi), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	60(%edi), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%edi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	52(%edi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%edi), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	44(%edi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	40(%edi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%edi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	32(%edi), %edx
+	movl	28(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%edi), %ecx
+	movl	20(%edi), %eax
+	movl	16(%edi), %ebp
+	movl	12(%edi), %ebx
+	movl	8(%edi), %edi
+	adcl	8(%esi), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	adcl	12(%esi), %ebx
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	adcl	16(%esi), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	adcl	20(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	24(%esi), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	28(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	32(%esi), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	36(%esi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	40(%esi), %ebp
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	44(%esi), %edi
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	48(%esi), %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	52(%esi), %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esi), %eax
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	adcl	60(%esi), %ebx
+	movl	84(%esp), %esi
+	movl	%eax, 56(%esi)
+	movl	%ecx, 52(%esi)
+	movl	%edx, 48(%esi)
+	movl	%edi, 44(%esi)
+	movl	%ebp, 40(%esi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 36(%esi)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 32(%esi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 28(%esi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%esi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%esi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%esi)
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%esi)
+	movl	%ebx, 60(%esi)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%esi)
+	movl	60(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%esi)
+	setb	%al
+	movzbl	%al, %eax
+	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
+	popl	%ebp
 	retl
-.Lfunc_end261:
-	.size	mcl_fp_addPre17Lbmi2, .Lfunc_end261-mcl_fp_addPre17Lbmi2
-
-	.globl	mcl_fp_subPre17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subPre17Lbmi2,@function
-mcl_fp_subPre17Lbmi2:                   # @mcl_fp_subPre17Lbmi2
-# BB#0:
+.Lfunc_end81:
+	.size	mcl_fp_addPre16Lbmi2, .Lfunc_end81-mcl_fp_addPre16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subPre16Lbmi2            # -- Begin function mcl_fp_subPre16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre16Lbmi2,@function
+mcl_fp_subPre16Lbmi2:                   # @mcl_fp_subPre16Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebp
-	sbbl	8(%edx), %ebp
-	movl	20(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebx)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebp, 8(%ebx)
-	movl	20(%edx), %ebp
-	movl	%esi, 12(%ebx)
-	movl	20(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	24(%edx), %ebp
-	movl	%edi, 16(%ebx)
-	movl	24(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	28(%edx), %ebp
-	movl	%esi, 20(%ebx)
-	movl	28(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	32(%edx), %ebp
-	movl	%edi, 24(%ebx)
-	movl	32(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	36(%edx), %ebp
-	movl	%esi, 28(%ebx)
-	movl	36(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	40(%edx), %ebp
-	movl	%edi, 32(%ebx)
-	movl	40(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	44(%edx), %ebp
-	movl	%esi, 36(%ebx)
-	movl	44(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	48(%edx), %ebp
-	movl	%edi, 40(%ebx)
-	movl	48(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	52(%edx), %ebp
-	movl	%esi, 44(%ebx)
-	movl	52(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	56(%edx), %ebp
-	movl	%edi, 48(%ebx)
-	movl	56(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	60(%edx), %ebp
-	movl	%esi, 52(%ebx)
-	movl	60(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	%edi, 56(%ebx)
-	movl	%esi, 60(%ebx)
-	movl	64(%edx), %edx
-	movl	64(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 64(%ebx)
-	sbbl	$0, %eax
+	subl	$64, %esp
+	movl	88(%esp), %ebx
+	movl	(%ebx), %ecx
+	movl	4(%ebx), %eax
+	movl	92(%esp), %edi
+	subl	(%edi), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	60(%ebx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%ebx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	52(%ebx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%ebx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	44(%ebx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	40(%ebx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%ebx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	32(%ebx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%ebx), %ebp
+	movl	24(%ebx), %esi
+	movl	20(%ebx), %edx
+	movl	16(%ebx), %ecx
+	movl	12(%ebx), %eax
+	movl	8(%ebx), %ebx
+	sbbl	8(%edi), %ebx
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	sbbl	12(%edi), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	sbbl	16(%edi), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	sbbl	20(%edi), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	sbbl	24(%edi), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	sbbl	28(%edi), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	32(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	36(%edi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	sbbl	40(%edi), %ebp
+	movl	20(%esp), %esi                  # 4-byte Reload
+	sbbl	44(%edi), %esi
+	movl	28(%esp), %edx                  # 4-byte Reload
+	sbbl	48(%edi), %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	sbbl	52(%edi), %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	sbbl	56(%edi), %eax
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	sbbl	60(%edi), %ebx
+	movl	84(%esp), %edi
+	movl	%eax, 56(%edi)
+	movl	%ecx, 52(%edi)
+	movl	%edx, 48(%edi)
+	movl	%esi, 44(%edi)
+	movl	%ebp, 40(%edi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 36(%edi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 32(%edi)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 28(%edi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%edi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%edi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edi)
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edi)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	movl	%ebx, 60(%edi)
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%edi)
+	movl	$0, %eax
+	sbbl	%eax, %eax
 	andl	$1, %eax
+	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end262:
-	.size	mcl_fp_subPre17Lbmi2, .Lfunc_end262-mcl_fp_subPre17Lbmi2
-
-	.globl	mcl_fp_shr1_17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_shr1_17Lbmi2,@function
-mcl_fp_shr1_17Lbmi2:                    # @mcl_fp_shr1_17Lbmi2
-# BB#0:
+.Lfunc_end82:
+	.size	mcl_fp_subPre16Lbmi2, .Lfunc_end82-mcl_fp_subPre16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_shr1_16Lbmi2             # -- Begin function mcl_fp_shr1_16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_16Lbmi2,@function
+mcl_fp_shr1_16Lbmi2:                    # @mcl_fp_shr1_16Lbmi2
+# %bb.0:
 	pushl	%esi
 	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
+	movl	60(%eax), %edx
+	movl	%edx, %esi
+	shrl	%esi
 	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 40(%ecx)
-	movl	48(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 44(%ecx)
-	movl	52(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 48(%ecx)
-	movl	56(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 52(%ecx)
-	movl	60(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 56(%ecx)
-	movl	64(%eax), %eax
-	shrdl	$1, %eax, %esi
 	movl	%esi, 60(%ecx)
-	shrl	%eax
-	movl	%eax, 64(%ecx)
+	movl	56(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 56(%ecx)
+	movl	52(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 52(%ecx)
+	movl	48(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 48(%ecx)
+	movl	44(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 44(%ecx)
+	movl	40(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 40(%ecx)
+	movl	36(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 36(%ecx)
+	movl	32(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 32(%ecx)
+	movl	28(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 28(%ecx)
+	movl	24(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 24(%ecx)
+	movl	20(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 20(%ecx)
+	movl	16(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 16(%ecx)
+	movl	12(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 12(%ecx)
+	movl	8(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 8(%ecx)
+	movl	4(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 4(%ecx)
+	movl	(%eax), %eax
+	shrdl	$1, %edx, %eax
+	movl	%eax, (%ecx)
 	popl	%esi
 	retl
-.Lfunc_end263:
-	.size	mcl_fp_shr1_17Lbmi2, .Lfunc_end263-mcl_fp_shr1_17Lbmi2
-
-	.globl	mcl_fp_add17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_add17Lbmi2,@function
-mcl_fp_add17Lbmi2:                      # @mcl_fp_add17Lbmi2
-# BB#0:
+.Lfunc_end83:
+	.size	mcl_fp_shr1_16Lbmi2, .Lfunc_end83-mcl_fp_shr1_16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_add16Lbmi2               # -- Begin function mcl_fp_add16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_add16Lbmi2,@function
+mcl_fp_add16Lbmi2:                      # @mcl_fp_add16Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$60, %esp
-	movl	88(%esp), %esi
-	movl	(%esi), %ecx
-	movl	4(%esi), %eax
-	movl	84(%esp), %edx
-	addl	(%edx), %ecx
-	movl	%ecx, %ebx
+	subl	$64, %esp
+	movl	88(%esp), %ecx
+	movl	(%ecx), %esi
+	movl	4(%ecx), %eax
+	movl	92(%esp), %edx
+	addl	(%edx), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
 	adcl	4(%edx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	8(%esi), %eax
-	adcl	8(%edx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	16(%edx), %edi
-	adcl	12(%esi), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	20(%edx), %eax
-	adcl	20(%esi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	24(%edx), %eax
-	adcl	24(%esi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	28(%edx), %eax
-	adcl	28(%esi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	adcl	32(%esi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	36(%edx), %eax
-	adcl	36(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	40(%edx), %eax
-	adcl	40(%esi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%edx), %eax
-	adcl	44(%esi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	48(%edx), %eax
-	adcl	48(%esi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	52(%edx), %eax
-	adcl	52(%esi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	56(%edx), %eax
-	adcl	56(%esi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	60(%edx), %ebp
-	adcl	60(%esi), %ebp
-	movl	64(%edx), %edx
-	adcl	64(%esi), %edx
-	movl	80(%esp), %esi
-	movl	%ebx, (%esi)
-	movl	%ebx, %eax
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 4(%esi)
-	movl	56(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%esi)
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 12(%esi)
-	movl	%edi, 16(%esi)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%esi)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%esi)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%esi)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%esi)
-	movl	32(%esp), %edi          # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	60(%ecx), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	56(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%ecx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	44(%ecx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	36(%ecx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	32(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%ecx), %edi
+	movl	24(%ecx), %ebx
+	movl	20(%ecx), %esi
+	movl	16(%ecx), %eax
+	movl	12(%ecx), %ebp
+	movl	8(%ecx), %ecx
+	adcl	8(%edx), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	12(%edx), %ebp
+	adcl	16(%edx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	20(%edx), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	adcl	24(%edx), %ebx
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	adcl	28(%edx), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	32(%edx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	36(%edx), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	40(%edx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	44(%edx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	48(%edx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	52(%edx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	56(%edx), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	adcl	60(%edx), %ebx
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	movl	84(%esp), %esi
+	movl	%ebx, 60(%esi)
+	movl	%ecx, 56(%esi)
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	movl	%eax, 52(%esi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%esi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%esi)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 40(%esi)
 	movl	%edi, 36(%esi)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%esi)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 44(%esi)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 48(%esi)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 52(%esi)
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, 56(%esi)
-	movl	%ebp, 60(%esi)
-	movl	%edx, 64(%esi)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	92(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	sbbl	8(%edi), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	12(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	4(%esp), %eax           # 4-byte Reload
-	sbbl	16(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	sbbl	20(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	sbbl	24(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	sbbl	44(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	sbbl	48(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	sbbl	52(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	sbbl	56(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	60(%edi), %ebp
-	sbbl	64(%edi), %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 32(%esi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%esi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%esi)
+	movl	40(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 20(%esi)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%esi)
+	movl	%ebp, 12(%esi)
+	movl	%ebx, 8(%esi)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%esi)
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	%edx, (%esi)
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	96(%esp), %esi
+	subl	(%esi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	sbbl	4(%esi), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	sbbl	8(%esi), %ebx
+	movl	%ebx, %ecx
+	sbbl	12(%esi), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	sbbl	16(%esi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	sbbl	20(%esi), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%esi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	32(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%esi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	40(%esi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	44(%esi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	48(%esi), %eax
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	sbbl	52(%esi), %ebp
+	movl	56(%esp), %edi                  # 4-byte Reload
+	sbbl	56(%esi), %edi
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	sbbl	60(%esi), %ebx
+	movl	%ebx, %esi
+	movzbl	3(%esp), %ebx                   # 1-byte Folded Reload
 	sbbl	$0, %ebx
 	testb	$1, %bl
-	jne	.LBB264_2
-# BB#1:                                 # %nocarry
-	movl	(%esp), %edi            # 4-byte Reload
-	movl	%edi, (%esi)
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	%edi, 4(%esi)
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%esi)
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%esi)
-	movl	4(%esp), %edi           # 4-byte Reload
-	movl	%edi, 16(%esi)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%esi)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%esi)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%esi)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%esi)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%esi)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%esi)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 44(%esi)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 48(%esi)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%esi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%esi)
-	movl	%ebp, 60(%esi)
-	movl	%edx, 64(%esi)
-.LBB264_2:                              # %carry
-	addl	$60, %esp
+	jne	.LBB84_2
+# %bb.1:                                # %nocarry
+	movl	48(%esp), %ebx                  # 4-byte Reload
+	movl	84(%esp), %edx
+	movl	%ebx, (%edx)
+	movl	44(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 4(%edx)
+	movl	%ecx, 8(%edx)
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%edx)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%edx)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%edx)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%edx)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%edx)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 32(%edx)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 36(%edx)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 40(%edx)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 44(%edx)
+	movl	%eax, 48(%edx)
+	movl	%ebp, 52(%edx)
+	movl	%edi, 56(%edx)
+	movl	%esi, 60(%edx)
+.LBB84_2:                               # %carry
+	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end264:
-	.size	mcl_fp_add17Lbmi2, .Lfunc_end264-mcl_fp_add17Lbmi2
-
-	.globl	mcl_fp_addNF17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_addNF17Lbmi2,@function
-mcl_fp_addNF17Lbmi2:                    # @mcl_fp_addNF17Lbmi2
-# BB#0:
+.Lfunc_end84:
+	.size	mcl_fp_add16Lbmi2, .Lfunc_end84-mcl_fp_add16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_addNF16Lbmi2             # -- Begin function mcl_fp_addNF16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF16Lbmi2,@function
+mcl_fp_addNF16Lbmi2:                    # @mcl_fp_addNF16Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$132, %esp
-	movl	160(%esp), %eax
-	movl	(%eax), %ecx
+	subl	$104, %esp
+	movl	132(%esp), %eax
+	movl	(%eax), %esi
 	movl	4(%eax), %edx
-	movl	156(%esp), %esi
-	addl	(%esi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	4(%esi), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	64(%eax), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	60(%eax), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	56(%eax), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	52(%eax), %ebp
-	movl	48(%eax), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	44(%eax), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	40(%eax), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	36(%eax), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	28(%eax), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
+	movl	128(%esp), %ecx
+	addl	(%ecx), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	adcl	4(%ecx), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	60(%eax), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	56(%eax), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	52(%eax), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	48(%eax), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	44(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	40(%eax), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	36(%eax), %edx
+	movl	32(%eax), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	28(%eax), %esi
+	movl	24(%eax), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
 	movl	20(%eax), %ebx
-	movl	16(%eax), %edi
-	movl	12(%eax), %edx
-	movl	8(%eax), %ecx
-	adcl	8(%esi), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	12(%esi), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ebx
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	24(%esi), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	28(%esi), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	32(%esi), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esi), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	40(%esi), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	44(%esi), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	48(%esi), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	52(%esi), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	56(%esi), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	60(%esi), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	64(%esi), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	164(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	subl	(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	sbbl	4(%esi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	8(%esi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%esi), %edx
-	sbbl	16(%esi), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	sbbl	20(%esi), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	120(%esp), %ebx         # 4-byte Reload
-	sbbl	24(%esi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	28(%esi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	sbbl	32(%esi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	36(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	sbbl	40(%esi), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	sbbl	44(%esi), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	sbbl	48(%esi), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	sbbl	52(%esi), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	movl	%eax, %ebp
-	sbbl	56(%esi), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	sbbl	60(%esi), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	movl	%eax, %ebx
-	sbbl	64(%esi), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	%ebx, %esi
-	sarl	$31, %esi
-	testl	%esi, %esi
-	movl	84(%esp), %esi          # 4-byte Reload
-	js	.LBB265_2
-# BB#1:
-	movl	(%esp), %esi            # 4-byte Reload
-.LBB265_2:
-	movl	152(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	js	.LBB265_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB265_4:
-	movl	%eax, 4(%ebx)
-	movl	108(%esp), %eax         # 4-byte Reload
-	movl	76(%esp), %esi          # 4-byte Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	js	.LBB265_6
-# BB#5:
-	movl	8(%esp), %edi           # 4-byte Reload
-.LBB265_6:
-	movl	%edi, 8(%ebx)
-	movl	116(%esp), %edi         # 4-byte Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
-	js	.LBB265_8
-# BB#7:
-	movl	%edx, %ecx
-.LBB265_8:
-	movl	%ecx, 12(%ebx)
-	movl	104(%esp), %ecx         # 4-byte Reload
-	movl	72(%esp), %edx          # 4-byte Reload
-	js	.LBB265_10
-# BB#9:
-	movl	12(%esp), %edx          # 4-byte Reload
-.LBB265_10:
-	movl	%edx, 16(%ebx)
-	movl	%ebp, %edx
-	js	.LBB265_12
-# BB#11:
-	movl	16(%esp), %esi          # 4-byte Reload
-.LBB265_12:
-	movl	%esi, 20(%ebx)
-	movl	112(%esp), %ebp         # 4-byte Reload
-	js	.LBB265_14
-# BB#13:
-	movl	20(%esp), %esi          # 4-byte Reload
-	movl	%esi, 120(%esp)         # 4-byte Spill
-.LBB265_14:
-	movl	120(%esp), %esi         # 4-byte Reload
-	movl	%esi, 24(%ebx)
-	js	.LBB265_16
-# BB#15:
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB265_16:
-	movl	%ecx, 28(%ebx)
-	js	.LBB265_18
-# BB#17:
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-.LBB265_18:
-	movl	124(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 32(%ebx)
-	js	.LBB265_20
-# BB#19:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB265_20:
-	movl	%eax, 36(%ebx)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	js	.LBB265_22
-# BB#21:
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, 128(%esp)         # 4-byte Spill
-.LBB265_22:
-	movl	128(%esp), %eax         # 4-byte Reload
-	movl	%eax, 40(%ebx)
-	js	.LBB265_24
-# BB#23:
-	movl	40(%esp), %ebp          # 4-byte Reload
-.LBB265_24:
-	movl	%ebp, 44(%ebx)
-	js	.LBB265_26
-# BB#25:
-	movl	44(%esp), %edi          # 4-byte Reload
-.LBB265_26:
-	movl	%edi, 48(%ebx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	js	.LBB265_28
-# BB#27:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB265_28:
-	movl	%eax, 52(%ebx)
-	js	.LBB265_30
-# BB#29:
-	movl	52(%esp), %edx          # 4-byte Reload
-.LBB265_30:
-	movl	%edx, 56(%ebx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB265_32
-# BB#31:
-	movl	56(%esp), %eax          # 4-byte Reload
-.LBB265_32:
-	movl	%eax, 60(%ebx)
-	js	.LBB265_34
-# BB#33:
-	movl	60(%esp), %ecx          # 4-byte Reload
-.LBB265_34:
-	movl	%ecx, 64(%ebx)
-	addl	$132, %esp
+	movl	16(%eax), %ebp
+	movl	12(%eax), %edi
+	movl	8(%eax), %eax
+	adcl	8(%ecx), %eax
+	adcl	12(%ecx), %edi
+	adcl	16(%ecx), %ebp
+	adcl	20(%ecx), %ebx
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	24(%ecx), %ebx
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	adcl	28(%ecx), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	32(%ecx), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	36(%ecx), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	40(%ecx), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	44(%ecx), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	48(%ecx), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	52(%ecx), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	56(%ecx), %ebx
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %ebx                    # 4-byte Reload
+	adcl	60(%ecx), %ebx
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	movl	136(%esp), %ebx
+	movl	56(%esp), %edx                  # 4-byte Reload
+	subl	(%ebx), %edx
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	sbbl	4(%ebx), %ecx
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	sbbl	8(%ebx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	sbbl	12(%ebx), %edi
+	movl	%edi, 88(%esp)                  # 4-byte Spill
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	sbbl	16(%ebx), %ebp
+	movl	%ebp, 84(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	sbbl	20(%ebx), %esi
+	movl	%esi, 80(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%ebx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%ebx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	32(%ebx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ebx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	sbbl	40(%ebx), %ebp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	44(%ebx), %ecx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	48(%ebx), %eax
+	movl	8(%esp), %esi                   # 4-byte Reload
+	sbbl	52(%ebx), %esi
+	movl	4(%esp), %edi                   # 4-byte Reload
+	sbbl	56(%ebx), %edi
+	movl	(%esp), %edx                    # 4-byte Reload
+	sbbl	60(%ebx), %edx
+	testl	%edx, %edx
+	js	.LBB85_1
+# %bb.2:
+	movl	124(%esp), %ebx
+	movl	%edx, 60(%ebx)
+	js	.LBB85_3
+.LBB85_4:
+	movl	%edi, 56(%ebx)
+	movl	92(%esp), %edx                  # 4-byte Reload
+	js	.LBB85_5
+.LBB85_6:
+	movl	%esi, 52(%ebx)
+	movl	84(%esp), %edi                  # 4-byte Reload
+	js	.LBB85_7
+.LBB85_8:
+	movl	%eax, 48(%ebx)
+	js	.LBB85_9
+.LBB85_10:
+	movl	%ecx, 44(%ebx)
+	movl	100(%esp), %ecx                 # 4-byte Reload
+	movl	76(%esp), %eax                  # 4-byte Reload
+	js	.LBB85_11
+.LBB85_12:
+	movl	%ebp, 40(%ebx)
+	movl	96(%esp), %esi                  # 4-byte Reload
+	movl	72(%esp), %ebp                  # 4-byte Reload
+	js	.LBB85_13
+.LBB85_14:
+	movl	%ebp, 36(%ebx)
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	js	.LBB85_15
+.LBB85_16:
+	movl	%eax, 32(%ebx)
+	js	.LBB85_17
+.LBB85_18:
+	movl	%edx, %eax
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 28(%ebx)
+	js	.LBB85_19
+.LBB85_20:
+	movl	36(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 24(%ebx)
+	js	.LBB85_21
+.LBB85_22:
+	movl	%ebp, 20(%ebx)
+	movl	%eax, %edx
+	js	.LBB85_23
+.LBB85_24:
+	movl	%edi, 16(%ebx)
+	movl	88(%esp), %eax                  # 4-byte Reload
+	js	.LBB85_25
+.LBB85_26:
+	movl	%eax, 12(%ebx)
+	js	.LBB85_27
+.LBB85_28:
+	movl	%edx, 8(%ebx)
+	js	.LBB85_29
+.LBB85_30:
+	movl	%esi, 4(%ebx)
+	jns	.LBB85_32
+.LBB85_31:
+	movl	56(%esp), %ecx                  # 4-byte Reload
+.LBB85_32:
+	movl	%ecx, (%ebx)
+	addl	$104, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end265:
-	.size	mcl_fp_addNF17Lbmi2, .Lfunc_end265-mcl_fp_addNF17Lbmi2
-
-	.globl	mcl_fp_sub17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_sub17Lbmi2,@function
-mcl_fp_sub17Lbmi2:                      # @mcl_fp_sub17Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$64, %esp
-	movl	88(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	92(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	sbbl	32(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esi), %eax
-	sbbl	36(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	40(%esi), %eax
-	sbbl	40(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	44(%esi), %eax
-	sbbl	44(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	48(%esi), %edx
-	sbbl	48(%edi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	52(%esi), %ecx
-	sbbl	52(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	56(%esi), %eax
-	sbbl	56(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	60(%esi), %ebp
-	sbbl	60(%edi), %ebp
-	movl	64(%esi), %esi
-	sbbl	64(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	84(%esp), %ebx
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	60(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 44(%ebx)
-	movl	%edx, 48(%ebx)
-	movl	%ecx, 52(%ebx)
-	movl	%eax, 56(%ebx)
-	movl	%ebp, 60(%ebx)
-	movl	%esi, 64(%ebx)
-	je	.LBB266_2
-# BB#1:                                 # %carry
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	96(%esp), %esi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	8(%esi), %edi
-	movl	12(%esi), %eax
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	40(%esi), %ecx
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	44(%esi), %eax
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 40(%ebx)
-	movl	48(%esi), %ecx
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 44(%ebx)
-	movl	52(%esi), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 48(%ebx)
-	movl	56(%esi), %ecx
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 52(%ebx)
-	movl	%ecx, 56(%ebx)
-	movl	60(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 60(%ebx)
-	movl	64(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 64(%ebx)
-.LBB266_2:                              # %nocarry
+.LBB85_1:
+	movl	(%esp), %edx                    # 4-byte Reload
+	movl	124(%esp), %ebx
+	movl	%edx, 60(%ebx)
+	jns	.LBB85_4
+.LBB85_3:
+	movl	4(%esp), %edi                   # 4-byte Reload
+	movl	%edi, 56(%ebx)
+	movl	92(%esp), %edx                  # 4-byte Reload
+	jns	.LBB85_6
+.LBB85_5:
+	movl	8(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 52(%ebx)
+	movl	84(%esp), %edi                  # 4-byte Reload
+	jns	.LBB85_8
+.LBB85_7:
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%ebx)
+	jns	.LBB85_10
+.LBB85_9:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 44(%ebx)
+	movl	100(%esp), %ecx                 # 4-byte Reload
+	movl	76(%esp), %eax                  # 4-byte Reload
+	jns	.LBB85_12
+.LBB85_11:
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 40(%ebx)
+	movl	96(%esp), %esi                  # 4-byte Reload
+	movl	72(%esp), %ebp                  # 4-byte Reload
+	jns	.LBB85_14
+.LBB85_13:
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 36(%ebx)
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	jns	.LBB85_16
+.LBB85_15:
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%ebx)
+	jns	.LBB85_18
+.LBB85_17:
+	movl	44(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 28(%ebx)
+	jns	.LBB85_20
+.LBB85_19:
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 24(%ebx)
+	jns	.LBB85_22
+.LBB85_21:
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%ebx)
+	movl	%eax, %edx
+	jns	.LBB85_24
+.LBB85_23:
+	movl	60(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 16(%ebx)
+	movl	88(%esp), %eax                  # 4-byte Reload
+	jns	.LBB85_26
+.LBB85_25:
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebx)
+	jns	.LBB85_28
+.LBB85_27:
+	movl	68(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%ebx)
+	jns	.LBB85_30
+.LBB85_29:
+	movl	52(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 4(%ebx)
+	js	.LBB85_31
+	jmp	.LBB85_32
+.Lfunc_end85:
+	.size	mcl_fp_addNF16Lbmi2, .Lfunc_end85-mcl_fp_addNF16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_sub16Lbmi2               # -- Begin function mcl_fp_sub16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_sub16Lbmi2,@function
+mcl_fp_sub16Lbmi2:                      # @mcl_fp_sub16Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$64, %esp
+	movl	88(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %esi
+	movl	92(%esp), %edi
+	subl	(%edi), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	60(%edx), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	56(%edx), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%edx), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	48(%edx), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	44(%edx), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	40(%edx), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	36(%edx), %ebp
+	movl	32(%edx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%edx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	24(%edx), %ebx
+	movl	20(%edx), %esi
+	movl	16(%edx), %ecx
+	movl	12(%edx), %eax
+	movl	8(%edx), %edx
+	sbbl	8(%edi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	sbbl	12(%edi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	sbbl	16(%edi), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	sbbl	20(%edi), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	sbbl	24(%edi), %ebx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	sbbl	28(%edi), %esi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	sbbl	32(%edi), %ecx
+	sbbl	36(%edi), %ebp
+	movl	(%esp), %edx                    # 4-byte Reload
+	sbbl	40(%edi), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	sbbl	44(%edi), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	sbbl	48(%edi), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	sbbl	52(%edi), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	sbbl	56(%edi), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edx                  # 4-byte Reload
+	sbbl	60(%edi), %edx
+	movl	$0, %eax
+	sbbl	%eax, %eax
+	testb	$1, %al
+	movl	84(%esp), %eax
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	%edx, 60(%eax)
+	movl	20(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 56(%eax)
+	movl	24(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 52(%eax)
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 48(%eax)
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 44(%eax)
+	movl	(%esp), %edx                    # 4-byte Reload
+	movl	%edx, 40(%eax)
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	%ebp, 36(%eax)
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, 32(%eax)
+	movl	%esi, %ecx
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	%esi, 28(%eax)
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	movl	%ebx, 24(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, %edx
+	movl	%ecx, 12(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, %ebx
+	movl	%ecx, 8(%eax)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	56(%esp), %edi                  # 4-byte Reload
+	movl	%edi, (%eax)
+	je	.LBB86_2
+# %bb.1:                                # %carry
+	movl	%ecx, %esi
+	movl	%edi, %ecx
+	movl	96(%esp), %ecx
+	addl	(%ecx), %edi
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	adcl	4(%ecx), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	adcl	8(%ecx), %ebx
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	adcl	12(%ecx), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	16(%ecx), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	20(%ecx), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	24(%ecx), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	28(%ecx), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	32(%ecx), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	36(%ecx), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	40(%ecx), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	adcl	44(%ecx), %ebx
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	48(%ecx), %edi
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	52(%ecx), %esi
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	56(%ecx), %edx
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	60(%ecx), %ebp
+	movl	%ebp, 60(%eax)
+	movl	%edx, 56(%eax)
+	movl	%esi, 52(%eax)
+	movl	%edi, 48(%eax)
+	movl	%ebx, 44(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 40(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 36(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 32(%eax)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%eax)
+.LBB86_2:                               # %nocarry
 	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end266:
-	.size	mcl_fp_sub17Lbmi2, .Lfunc_end266-mcl_fp_sub17Lbmi2
-
-	.globl	mcl_fp_subNF17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fp_subNF17Lbmi2,@function
-mcl_fp_subNF17Lbmi2:                    # @mcl_fp_subNF17Lbmi2
-# BB#0:
+.Lfunc_end86:
+	.size	mcl_fp_sub16Lbmi2, .Lfunc_end86-mcl_fp_sub16Lbmi2
+                                        # -- End function
+	.globl	mcl_fp_subNF16Lbmi2             # -- Begin function mcl_fp_subNF16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF16Lbmi2,@function
+mcl_fp_subNF16Lbmi2:                    # @mcl_fp_subNF16Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$116, %esp
-	movl	140(%esp), %ecx
+	subl	$104, %esp
+	movl	128(%esp), %ecx
 	movl	(%ecx), %esi
 	movl	4(%ecx), %edx
-	movl	144(%esp), %edi
+	movl	132(%esp), %edi
 	subl	(%edi), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
+	movl	%esi, 96(%esp)                  # 4-byte Spill
 	sbbl	4(%edi), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	60(%ecx), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	60(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
 	movl	56(%ecx), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
+	movl	%edx, 24(%esp)                  # 4-byte Spill
 	movl	52(%ecx), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
+	movl	%edx, 20(%esp)                  # 4-byte Spill
 	movl	48(%ecx), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
+	movl	%edx, 16(%esp)                  # 4-byte Spill
 	movl	44(%ecx), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	40(%ecx), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	36(%ecx), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	36(%ecx), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
 	movl	32(%ecx), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
 	movl	28(%ecx), %ebp
 	movl	24(%ecx), %ebx
 	movl	20(%ecx), %esi
@@ -70787,761 +24960,804 @@ mcl_fp_subNF17Lbmi2:                    # @mcl_fp_subNF17Lbmi2
 	movl	12(%ecx), %eax
 	movl	8(%ecx), %ecx
 	sbbl	8(%edi), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
 	sbbl	12(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
+	movl	%eax, 72(%esp)                  # 4-byte Spill
 	sbbl	16(%edi), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
+	movl	%edx, 76(%esp)                  # 4-byte Spill
 	sbbl	20(%edi), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
+	movl	%esi, 80(%esp)                  # 4-byte Spill
 	sbbl	24(%edi), %ebx
-	movl	%ebx, 64(%esp)          # 4-byte Spill
+	movl	%ebx, 84(%esp)                  # 4-byte Spill
 	sbbl	28(%edi), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%ebp, 88(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	sbbl	32(%edi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
 	sbbl	36(%edi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	sbbl	40(%edi), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	40(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
 	sbbl	44(%edi), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
 	sbbl	48(%edi), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
 	sbbl	52(%edi), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
 	sbbl	56(%edi), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	sbbl	60(%edi), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	64(%edi), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	60(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
 	sarl	$31, %eax
-	movl	%eax, %edx
-	shldl	$1, %ecx, %edx
-	movl	148(%esp), %ebx
-	movl	28(%ebx), %ecx
-	andl	%edx, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	12(%ebx), %ecx
-	andl	%edx, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	4(%ebx), %ecx
-	andl	%edx, %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	andl	(%ebx), %edx
-	movl	64(%ebx), %edi
-	movl	%eax, %ecx
-	andl	%ecx, %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	rorxl	$31, %ecx, %eax
-	andl	60(%ebx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	56(%ebx), %ecx
+	movl	136(%esp), %esi
+	movl	60(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	movl	56(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	52(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	48(%esi), %ecx
 	andl	%eax, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	52(%ebx), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	44(%esi), %ecx
 	andl	%eax, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	48(%ebx), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esi), %ecx
 	andl	%eax, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	44(%ebx), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esi), %ecx
 	andl	%eax, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	40(%ebx), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	32(%esi), %ecx
 	andl	%eax, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	36(%ebx), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	28(%esi), %ecx
 	andl	%eax, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	32(%ebx), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	24(%esi), %ecx
 	andl	%eax, %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	24(%ebx), %ebp
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	20(%esi), %ebp
 	andl	%eax, %ebp
-	movl	20(%ebx), %edi
+	movl	16(%esi), %ebx
+	andl	%eax, %ebx
+	movl	12(%esi), %edi
 	andl	%eax, %edi
-	movl	16(%ebx), %esi
-	andl	%eax, %esi
-	andl	8(%ebx), %eax
-	addl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	8(%esp), %ecx           # 4-byte Reload
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	136(%esp), %ebx
-	movl	%edx, (%ebx)
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 4(%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 8(%ebx)
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 12(%ebx)
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 16(%ebx)
-	adcl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edi, 20(%ebx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 24(%ebx)
-	movl	(%esp), %ecx            # 4-byte Reload
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	112(%esp), %ecx         # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 40(%ebx)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	96(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 44(%ebx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%ecx, 48(%ebx)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	104(%esp), %ecx         # 4-byte Folded Reload
-	movl	%eax, 52(%ebx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%ecx, 56(%ebx)
-	movl	%eax, 60(%ebx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%ebx)
-	addl	$116, %esp
+	movl	8(%esi), %edx
+	andl	%eax, %edx
+	movl	4(%esi), %ecx
+	andl	%eax, %ecx
+	andl	(%esi), %eax
+	addl	96(%esp), %eax                  # 4-byte Folded Reload
+	adcl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	124(%esp), %esi
+	movl	%eax, (%esi)
+	adcl	64(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 4(%esi)
+	adcl	72(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edx, 8(%esi)
+	adcl	76(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%edi, 12(%esi)
+	adcl	80(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebx, 16(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	84(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ebp, 20(%esi)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	88(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 24(%esi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%ecx, 28(%esi)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 32(%esi)
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%ecx, 36(%esi)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 40(%esi)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 44(%esi)
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 48(%esi)
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 52(%esi)
+	movl	%eax, 56(%esi)
+	movl	92(%esp), %eax                  # 4-byte Reload
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 60(%esi)
+	addl	$104, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end267:
-	.size	mcl_fp_subNF17Lbmi2, .Lfunc_end267-mcl_fp_subNF17Lbmi2
-
-	.globl	mcl_fpDbl_add17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_add17Lbmi2,@function
-mcl_fpDbl_add17Lbmi2:                   # @mcl_fpDbl_add17Lbmi2
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$128, %esp
-	movl	156(%esp), %ecx
-	movl	152(%esp), %edx
-	movl	12(%edx), %edi
-	movl	16(%edx), %esi
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%edx), %ebp
-	movl	148(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%edx), %ebp
-	adcl	8(%edx), %ebx
-	adcl	12(%ecx), %edi
-	adcl	16(%ecx), %esi
-	movl	%ebp, 4(%eax)
-	movl	76(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%edi, 12(%eax)
-	movl	20(%edx), %edi
-	adcl	%ebx, %edi
-	movl	24(%ecx), %ebx
-	movl	%esi, 16(%eax)
-	movl	24(%edx), %esi
-	adcl	%ebx, %esi
-	movl	28(%ecx), %ebx
-	movl	%edi, 20(%eax)
-	movl	28(%edx), %edi
-	adcl	%ebx, %edi
-	movl	32(%ecx), %ebx
-	movl	%esi, 24(%eax)
-	movl	32(%edx), %esi
-	adcl	%ebx, %esi
-	movl	36(%ecx), %ebx
-	movl	%edi, 28(%eax)
-	movl	36(%edx), %edi
-	adcl	%ebx, %edi
-	movl	40(%ecx), %ebx
-	movl	%esi, 32(%eax)
-	movl	40(%edx), %esi
-	adcl	%ebx, %esi
-	movl	44(%ecx), %ebx
-	movl	%edi, 36(%eax)
-	movl	44(%edx), %edi
-	adcl	%ebx, %edi
-	movl	48(%ecx), %ebx
-	movl	%esi, 40(%eax)
-	movl	48(%edx), %esi
-	adcl	%ebx, %esi
-	movl	52(%ecx), %ebx
-	movl	%edi, 44(%eax)
-	movl	52(%edx), %edi
-	adcl	%ebx, %edi
-	movl	56(%ecx), %ebx
-	movl	%esi, 48(%eax)
-	movl	56(%edx), %esi
-	adcl	%ebx, %esi
-	movl	60(%ecx), %ebx
-	movl	%edi, 52(%eax)
-	movl	60(%edx), %edi
-	adcl	%ebx, %edi
-	movl	64(%ecx), %ebx
-	movl	%esi, 56(%eax)
-	movl	64(%edx), %esi
-	adcl	%ebx, %esi
-	movl	68(%ecx), %ebx
-	movl	%edi, 60(%eax)
-	movl	68(%edx), %edi
-	adcl	%ebx, %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	72(%ecx), %edi
-	movl	%esi, 64(%eax)
-	movl	72(%edx), %eax
-	adcl	%edi, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	76(%edx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	80(%ecx), %esi
-	movl	80(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	84(%ecx), %esi
-	movl	84(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	88(%ecx), %esi
-	movl	88(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%ecx), %esi
-	movl	92(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	96(%ecx), %esi
-	movl	96(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	100(%ecx), %esi
-	movl	100(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	104(%ecx), %esi
-	movl	104(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	108(%ecx), %esi
-	movl	108(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	112(%ecx), %esi
-	movl	112(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	116(%ecx), %esi
+.Lfunc_end87:
+	.size	mcl_fp_subNF16Lbmi2, .Lfunc_end87-mcl_fp_subNF16Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_add16Lbmi2            # -- Begin function mcl_fpDbl_add16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add16Lbmi2,@function
+mcl_fpDbl_add16Lbmi2:                   # @mcl_fpDbl_add16Lbmi2
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$128, %esp
+	movl	152(%esp), %edx
+	movl	(%edx), %eax
+	movl	4(%edx), %esi
+	movl	156(%esp), %ecx
+	addl	(%ecx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	adcl	4(%ecx), %esi
+	movl	%esi, 96(%esp)                  # 4-byte Spill
+	movl	124(%edx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	120(%edx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
 	movl	116(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	120(%ecx), %edi
-	movl	120(%edx), %esi
-	adcl	%edi, %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	124(%ecx), %ebx
-	movl	124(%edx), %edi
-	adcl	%ebx, %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	128(%ecx), %ebx
-	movl	128(%edx), %ebp
-	adcl	%ebx, %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	132(%ecx), %ecx
-	movl	132(%edx), %edx
-	adcl	%ecx, %edx
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	160(%esp), %ebx
-	movl	92(%esp), %eax          # 4-byte Reload
-	subl	(%ebx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	sbbl	4(%ebx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	sbbl	8(%ebx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	12(%ebx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	16(%ebx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	sbbl	20(%ebx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	sbbl	24(%ebx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	sbbl	28(%ebx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	sbbl	32(%ebx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	sbbl	36(%ebx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	sbbl	40(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	sbbl	44(%ebx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	sbbl	48(%ebx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	52(%ebx), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	sbbl	56(%ebx), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	sbbl	60(%ebx), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	sbbl	64(%ebx), %ebp
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	112(%edx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	108(%edx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	104(%edx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	100(%edx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	96(%edx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	92(%edx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	88(%edx), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	84(%edx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	80(%edx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	76(%edx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	72(%edx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	68(%edx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	64(%edx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	60(%edx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%edx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	52(%edx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	48(%edx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%edx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	40(%edx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	36(%edx), %eax
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	32(%edx), %eax
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	movl	28(%edx), %ebp
+	movl	24(%edx), %ebx
+	movl	20(%edx), %edi
+	movl	16(%edx), %esi
+	movl	12(%edx), %eax
+	movl	8(%edx), %edx
+	adcl	8(%ecx), %edx
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	adcl	12(%ecx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	adcl	16(%ecx), %esi
+	movl	%esi, 84(%esp)                  # 4-byte Spill
+	adcl	20(%ecx), %edi
+	movl	%edi, 80(%esp)                  # 4-byte Spill
+	adcl	24(%ecx), %ebx
+	movl	%ebx, 124(%esp)                 # 4-byte Spill
+	adcl	28(%ecx), %ebp
+	movl	%ebp, 120(%esp)                 # 4-byte Spill
+	movl	112(%esp), %esi                 # 4-byte Reload
+	adcl	32(%ecx), %esi
+	movl	116(%esp), %eax                 # 4-byte Reload
+	adcl	36(%ecx), %eax
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	40(%ecx), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	44(%ecx), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	48(%ecx), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	52(%ecx), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	56(%ecx), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	60(%ecx), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	108(%esp), %ebx                 # 4-byte Reload
+	adcl	64(%ecx), %ebx
+	movl	104(%esp), %edi                 # 4-byte Reload
+	adcl	68(%ecx), %edi
+	movl	76(%esp), %edx                  # 4-byte Reload
+	adcl	72(%ecx), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	72(%esp), %edx                  # 4-byte Reload
+	adcl	76(%ecx), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edx                  # 4-byte Reload
+	adcl	80(%ecx), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edx                  # 4-byte Reload
+	adcl	84(%ecx), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edx                  # 4-byte Reload
+	adcl	88(%ecx), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	56(%esp), %edx                  # 4-byte Reload
+	adcl	92(%ecx), %edx
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	96(%ecx), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	100(%ecx), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	104(%ecx), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	108(%ecx), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	112(%ecx), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	116(%ecx), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	120(%ecx), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	124(%ecx), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	148(%esp), %ebp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 60(%ebp)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 56(%ebp)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 52(%ebp)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 48(%ebp)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 44(%ebp)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 40(%ebp)
+	movl	%eax, 36(%ebp)
+	movl	%esi, 32(%ebp)
+	movl	120(%esp), %eax                 # 4-byte Reload
+	movl	%eax, 28(%ebp)
+	movl	124(%esp), %eax                 # 4-byte Reload
+	movl	%eax, 24(%ebp)
+	movl	80(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%ebp)
+	movl	84(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%ebp)
+	movl	88(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebp)
+	movl	92(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebp)
+	movl	96(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%ebp)
+	movl	100(%esp), %ecx                 # 4-byte Reload
+	movl	%ecx, (%ebp)
+	setb	80(%esp)                        # 1-byte Folded Spill
+	movl	160(%esp), %ecx
+	movl	%ebx, %eax
+	movl	%ebx, 108(%esp)                 # 4-byte Spill
+	subl	(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%edi, %esi
+	movl	%edi, 104(%esp)                 # 4-byte Spill
+	sbbl	4(%ecx), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	76(%esp), %edi                  # 4-byte Reload
+	sbbl	8(%ecx), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	72(%esp), %eax                  # 4-byte Reload
+	sbbl	12(%ecx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	sbbl	16(%ecx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	sbbl	20(%ecx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	sbbl	32(%ecx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ecx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	sbbl	44(%ecx), %edx
+	movl	36(%esp), %esi                  # 4-byte Reload
+	sbbl	48(%ecx), %esi
+	movl	32(%esp), %edi                  # 4-byte Reload
+	sbbl	52(%ecx), %edi
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	sbbl	56(%ecx), %ebx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	60(%ecx), %eax
+	movzbl	80(%esp), %ecx                  # 1-byte Folded Reload
 	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB268_2
-# BB#1:
-	movl	%ebp, %edx
-.LBB268_2:
-	testb	%cl, %cl
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	88(%esp), %esi          # 4-byte Reload
-	movl	84(%esp), %edi          # 4-byte Reload
-	movl	80(%esp), %ebx          # 4-byte Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	jne	.LBB268_4
-# BB#3:
-	movl	(%esp), %esi            # 4-byte Reload
-	movl	4(%esp), %edi           # 4-byte Reload
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB268_4:
-	movl	148(%esp), %ecx
-	movl	%eax, 68(%ecx)
+	testb	$1, %cl
+	jne	.LBB88_1
+# %bb.2:
+	movl	%eax, 124(%ebp)
+	jne	.LBB88_3
+.LBB88_4:
+	movl	%ebx, 120(%ebp)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB88_5
+.LBB88_6:
+	movl	%edi, 116(%ebp)
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	jne	.LBB88_7
+.LBB88_8:
+	movl	%esi, 112(%ebp)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	jne	.LBB88_9
+.LBB88_10:
+	movl	%edx, 108(%ebp)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	84(%esp), %edx                  # 4-byte Reload
+	jne	.LBB88_11
+.LBB88_12:
+	movl	%edx, 104(%ebp)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	jne	.LBB88_13
+.LBB88_14:
+	movl	%edx, 100(%ebp)
+	jne	.LBB88_15
+.LBB88_16:
+	movl	%ecx, %edx
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 96(%ebp)
+	jne	.LBB88_17
+.LBB88_18:
+	movl	%eax, %ecx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 92(%ebp)
+	jne	.LBB88_19
+.LBB88_20:
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 88(%ebp)
+	jne	.LBB88_21
+.LBB88_22:
+	movl	%ebx, 84(%ebp)
 	movl	%ecx, %eax
-	movl	96(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 72(%eax)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 76(%eax)
-	movl	104(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 80(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 84(%eax)
-	movl	112(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 88(%eax)
-	movl	116(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 92(%eax)
-	movl	120(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 96(%eax)
-	movl	124(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 100(%eax)
-	movl	%ebp, 104(%eax)
-	movl	%ebx, 108(%eax)
-	movl	%edi, 112(%eax)
-	movl	%esi, 116(%eax)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	movl	64(%esp), %esi          # 4-byte Reload
-	jne	.LBB268_6
-# BB#5:
-	movl	52(%esp), %esi          # 4-byte Reload
-.LBB268_6:
-	movl	%esi, 120(%eax)
-	movl	68(%esp), %esi          # 4-byte Reload
-	jne	.LBB268_8
-# BB#7:
-	movl	56(%esp), %esi          # 4-byte Reload
-.LBB268_8:
-	movl	%esi, 124(%eax)
-	jne	.LBB268_10
-# BB#9:
-	movl	60(%esp), %ecx          # 4-byte Reload
-.LBB268_10:
-	movl	%ecx, 128(%eax)
-	movl	%edx, 132(%eax)
+	jne	.LBB88_23
+.LBB88_24:
+	movl	%edi, 80(%ebp)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB88_25
+.LBB88_26:
+	movl	%esi, 76(%ebp)
+	jne	.LBB88_27
+.LBB88_28:
+	movl	%ecx, 72(%ebp)
+	jne	.LBB88_29
+.LBB88_30:
+	movl	%edx, 68(%ebp)
+	je	.LBB88_32
+.LBB88_31:
+	movl	108(%esp), %eax                 # 4-byte Reload
+.LBB88_32:
+	movl	%eax, 64(%ebp)
 	addl	$128, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end268:
-	.size	mcl_fpDbl_add17Lbmi2, .Lfunc_end268-mcl_fpDbl_add17Lbmi2
-
-	.globl	mcl_fpDbl_sub17Lbmi2
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub17Lbmi2,@function
-mcl_fpDbl_sub17Lbmi2:                   # @mcl_fpDbl_sub17Lbmi2
-# BB#0:
+.LBB88_1:
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 124(%ebp)
+	je	.LBB88_4
+.LBB88_3:
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 120(%ebp)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	je	.LBB88_6
+.LBB88_5:
+	movl	32(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 116(%ebp)
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	je	.LBB88_8
+.LBB88_7:
+	movl	36(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 112(%ebp)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	je	.LBB88_10
+.LBB88_9:
+	movl	40(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 108(%ebp)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	84(%esp), %edx                  # 4-byte Reload
+	je	.LBB88_12
+.LBB88_11:
+	movl	44(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 104(%ebp)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	je	.LBB88_14
+.LBB88_13:
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 100(%ebp)
+	je	.LBB88_16
+.LBB88_15:
+	movl	52(%esp), %edx                  # 4-byte Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%ecx, %edx
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 96(%ebp)
+	je	.LBB88_18
+.LBB88_17:
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, %ecx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 92(%ebp)
+	je	.LBB88_20
+.LBB88_19:
+	movl	60(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 88(%ebp)
+	je	.LBB88_22
+.LBB88_21:
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 84(%ebp)
+	movl	%ecx, %eax
+	je	.LBB88_24
+.LBB88_23:
+	movl	68(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 80(%ebp)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	je	.LBB88_26
+.LBB88_25:
+	movl	72(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 76(%ebp)
+	je	.LBB88_28
+.LBB88_27:
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 72(%ebp)
+	je	.LBB88_30
+.LBB88_29:
+	movl	104(%esp), %edx                 # 4-byte Reload
+	movl	%edx, 68(%ebp)
+	jne	.LBB88_31
+	jmp	.LBB88_32
+.Lfunc_end88:
+	.size	mcl_fpDbl_add16Lbmi2, .Lfunc_end88-mcl_fpDbl_add16Lbmi2
+                                        # -- End function
+	.globl	mcl_fpDbl_sub16Lbmi2            # -- Begin function mcl_fpDbl_sub16Lbmi2
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub16Lbmi2,@function
+mcl_fpDbl_sub16Lbmi2:                   # @mcl_fpDbl_sub16Lbmi2
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$116, %esp
-	movl	140(%esp), %edx
+	subl	$128, %esp
+	movl	152(%esp), %edx
 	movl	(%edx), %eax
 	movl	4(%edx), %edi
-	movl	144(%esp), %esi
-	subl	(%esi), %eax
-	sbbl	4(%esi), %edi
-	movl	8(%edx), %ebx
-	sbbl	8(%esi), %ebx
-	movl	136(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%edx), %eax
-	sbbl	12(%esi), %eax
-	movl	%edi, 4(%ecx)
-	movl	16(%edx), %edi
-	sbbl	16(%esi), %edi
-	movl	%ebx, 8(%ecx)
-	movl	20(%esi), %ebx
-	movl	%eax, 12(%ecx)
-	movl	20(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	24(%esi), %ebx
-	movl	%edi, 16(%ecx)
-	movl	24(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%esi), %ebx
-	movl	%eax, 20(%ecx)
-	movl	28(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	32(%esi), %ebx
-	movl	%edi, 24(%ecx)
-	movl	32(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	36(%esi), %ebx
-	movl	%eax, 28(%ecx)
-	movl	36(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	40(%esi), %ebx
-	movl	%edi, 32(%ecx)
-	movl	40(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	44(%esi), %ebx
-	movl	%eax, 36(%ecx)
-	movl	44(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	48(%esi), %ebx
-	movl	%edi, 40(%ecx)
-	movl	48(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	52(%esi), %ebx
-	movl	%eax, 44(%ecx)
-	movl	52(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	56(%esi), %ebx
-	movl	%edi, 48(%ecx)
-	movl	56(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	60(%esi), %ebx
-	movl	%eax, 52(%ecx)
-	movl	60(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	64(%esi), %ebx
-	movl	%edi, 56(%ecx)
-	movl	64(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	68(%esi), %ebx
-	movl	%eax, 60(%ecx)
+	xorl	%esi, %esi
+	movl	156(%esp), %ecx
+	subl	(%ecx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	sbbl	4(%ecx), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	124(%edx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	120(%edx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	116(%edx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	112(%edx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	108(%edx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	104(%edx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	100(%edx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	96(%edx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	92(%edx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	88(%edx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	84(%edx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	80(%edx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	76(%edx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	72(%edx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
 	movl	68(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	72(%esi), %eax
-	movl	%edi, 64(%ecx)
-	movl	72(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	76(%esi), %eax
-	movl	76(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	80(%esi), %eax
-	movl	80(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	84(%esi), %eax
-	movl	84(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	88(%esi), %eax
-	movl	88(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	92(%esi), %eax
-	movl	92(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	96(%esi), %eax
-	movl	96(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	100(%esi), %eax
-	movl	100(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	104(%esi), %eax
-	movl	104(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	108(%esi), %eax
-	movl	108(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	112(%esi), %eax
-	movl	112(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	116(%esi), %eax
-	movl	116(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	120(%esi), %eax
-	movl	120(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	124(%esi), %eax
-	movl	124(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	128(%esi), %eax
-	movl	128(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	132(%esi), %eax
-	movl	132(%edx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	148(%esp), %ebp
-	jne	.LBB269_1
-# BB#2:
-	movl	$0, 76(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_3
-.LBB269_1:
-	movl	64(%ebp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-.LBB269_3:
-	testb	%al, %al
-	jne	.LBB269_4
-# BB#5:
-	movl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	$0, %esi
-	jmp	.LBB269_6
-.LBB269_4:
-	movl	(%ebp), %esi
-	movl	4(%ebp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-.LBB269_6:
-	jne	.LBB269_7
-# BB#8:
-	movl	$0, 40(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_9
-.LBB269_7:
-	movl	60(%ebp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-.LBB269_9:
-	jne	.LBB269_10
-# BB#11:
-	movl	$0, 36(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_12
-.LBB269_10:
-	movl	56(%ebp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-.LBB269_12:
-	jne	.LBB269_13
-# BB#14:
-	movl	$0, 32(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_15
-.LBB269_13:
-	movl	52(%ebp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-.LBB269_15:
-	jne	.LBB269_16
-# BB#17:
-	movl	$0, 24(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_18
-.LBB269_16:
-	movl	48(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-.LBB269_18:
-	jne	.LBB269_19
-# BB#20:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_21
-.LBB269_19:
-	movl	44(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB269_21:
-	jne	.LBB269_22
-# BB#23:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_24
-.LBB269_22:
-	movl	40(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB269_24:
-	jne	.LBB269_25
-# BB#26:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_27
-.LBB269_25:
-	movl	36(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB269_27:
-	jne	.LBB269_28
-# BB#29:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB269_30
-.LBB269_28:
-	movl	32(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB269_30:
-	jne	.LBB269_31
-# BB#32:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB269_33
-.LBB269_31:
-	movl	28(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB269_33:
-	jne	.LBB269_34
-# BB#35:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB269_36
-.LBB269_34:
-	movl	24(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB269_36:
-	jne	.LBB269_37
-# BB#38:
-	movl	$0, %ebx
-	jmp	.LBB269_39
-.LBB269_37:
-	movl	20(%ebp), %ebx
-.LBB269_39:
-	jne	.LBB269_40
-# BB#41:
-	movl	$0, %edi
-	jmp	.LBB269_42
-.LBB269_40:
-	movl	16(%ebp), %edi
-.LBB269_42:
-	jne	.LBB269_43
-# BB#44:
-	movl	%ebp, %eax
-	movl	$0, %ebp
-	jmp	.LBB269_45
-.LBB269_43:
-	movl	%ebp, %eax
-	movl	12(%eax), %ebp
-.LBB269_45:
-	jne	.LBB269_46
-# BB#47:
-	xorl	%eax, %eax
-	jmp	.LBB269_48
-.LBB269_46:
-	movl	8(%eax), %eax
-.LBB269_48:
-	addl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 68(%ecx)
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 72(%ecx)
-	adcl	56(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 76(%ecx)
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebp, 80(%ecx)
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 84(%ecx)
-	movl	(%esp), %edx            # 4-byte Reload
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, 88(%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 92(%ecx)
-	movl	8(%esp), %edx           # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 96(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 100(%ecx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 104(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 108(%ecx)
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 112(%ecx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%edx, 116(%ecx)
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx         # 4-byte Folded Reload
-	movl	%eax, 120(%ecx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%edx, 124(%ecx)
-	movl	%eax, 128(%ecx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 132(%ecx)
-	addl	$116, %esp
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	64(%edx), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	60(%edx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	56(%edx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	52(%edx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	48(%edx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	44(%edx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%edx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%edx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%edx), %eax
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	movl	28(%edx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	24(%edx), %ebp
+	movl	20(%edx), %ebx
+	movl	16(%edx), %edi
+	movl	12(%edx), %eax
+	movl	8(%edx), %edx
+	sbbl	8(%ecx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	sbbl	12(%ecx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	sbbl	16(%ecx), %edi
+	movl	%edi, 124(%esp)                 # 4-byte Spill
+	sbbl	20(%ecx), %ebx
+	movl	%ebx, 120(%esp)                 # 4-byte Spill
+	sbbl	24(%ecx), %ebp
+	movl	%ebp, 116(%esp)                 # 4-byte Spill
+	movl	108(%esp), %ebp                 # 4-byte Reload
+	sbbl	28(%ecx), %ebp
+	movl	112(%esp), %edi                 # 4-byte Reload
+	sbbl	32(%ecx), %edi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ecx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	44(%ecx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	48(%ecx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	52(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	56(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	60(%ecx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	60(%esp), %edx                  # 4-byte Reload
+	sbbl	64(%ecx), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	sbbl	68(%ecx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	sbbl	72(%ecx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	sbbl	76(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	sbbl	80(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edx                  # 4-byte Reload
+	sbbl	84(%ecx), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	72(%esp), %edx                  # 4-byte Reload
+	sbbl	88(%ecx), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	76(%esp), %edx                  # 4-byte Reload
+	sbbl	92(%ecx), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	80(%esp), %edx                  # 4-byte Reload
+	sbbl	96(%ecx), %edx
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	84(%esp), %edx                  # 4-byte Reload
+	sbbl	100(%ecx), %edx
+	movl	%edx, 84(%esp)                  # 4-byte Spill
+	movl	88(%esp), %edx                  # 4-byte Reload
+	sbbl	104(%ecx), %edx
+	movl	%edx, 88(%esp)                  # 4-byte Spill
+	movl	92(%esp), %edx                  # 4-byte Reload
+	sbbl	108(%ecx), %edx
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	movl	96(%esp), %edx                  # 4-byte Reload
+	sbbl	112(%ecx), %edx
+	movl	%edx, 96(%esp)                  # 4-byte Spill
+	movl	100(%esp), %edx                 # 4-byte Reload
+	sbbl	116(%ecx), %edx
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	104(%esp), %edx                 # 4-byte Reload
+	sbbl	120(%ecx), %edx
+	movl	%edx, 104(%esp)                 # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	sbbl	124(%ecx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	148(%esp), %ebx
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 60(%ebx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 56(%ebx)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 52(%ebx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%ebx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%ebx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 40(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 36(%ebx)
+	movl	%edi, 32(%ebx)
+	movl	%ebp, 28(%ebx)
+	movl	116(%esp), %eax                 # 4-byte Reload
+	movl	%eax, 24(%ebx)
+	movl	120(%esp), %eax                 # 4-byte Reload
+	movl	%eax, 20(%ebx)
+	movl	124(%esp), %eax                 # 4-byte Reload
+	movl	%eax, 16(%ebx)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebx)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%ebx)
+	sbbl	%esi, %esi
+	andl	$1, %esi
+	negl	%esi
+	movl	160(%esp), %edi
+	movl	60(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	56(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	48(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	44(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	40(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	36(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	32(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	28(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	24(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	20(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%edi), %ebp
+	andl	%esi, %ebp
+	movl	12(%edi), %edx
+	andl	%esi, %edx
+	movl	8(%edi), %ecx
+	andl	%esi, %ecx
+	movl	4(%edi), %eax
+	andl	%esi, %eax
+	andl	(%edi), %esi
+	addl	60(%esp), %esi                  # 4-byte Folded Reload
+	adcl	44(%esp), %eax                  # 4-byte Folded Reload
+	movl	%esi, 64(%ebx)
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 68(%ebx)
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 72(%ebx)
+	adcl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%edx, 76(%ebx)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ebp, 80(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 84(%ebx)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	76(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 88(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	80(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 92(%ebx)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 96(%ebx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	88(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 100(%ebx)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 104(%ebx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 108(%ebx)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%eax, 112(%ebx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	104(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 116(%ebx)
+	movl	%eax, 120(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 124(%ebx)
+	addl	$128, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end269:
-	.size	mcl_fpDbl_sub17Lbmi2, .Lfunc_end269-mcl_fpDbl_sub17Lbmi2
-
-
+.Lfunc_end89:
+	.size	mcl_fpDbl_sub16Lbmi2, .Lfunc_end89-mcl_fpDbl_sub16Lbmi2
+                                        # -- End function
 	.section	".note.GNU-stack","",@progbits
diff --git a/src/asm/x86.s b/src/asm/x86.s
index cdd988ad..c9753734 100644
--- a/src/asm/x86.s
+++ b/src/asm/x86.s
@@ -1,10 +1,10 @@
 	.text
-	.file	"<stdin>"
-	.globl	makeNIST_P192L
-	.align	16, 0x90
+	.file	"base32.ll"
+	.globl	makeNIST_P192L                  # -- Begin function makeNIST_P192L
+	.p2align	4, 0x90
 	.type	makeNIST_P192L,@function
 makeNIST_P192L:                         # @makeNIST_P192L
-# BB#0:
+# %bb.0:
 	movl	4(%esp), %eax
 	movl	$-1, 20(%eax)
 	movl	$-1, 16(%eax)
@@ -15,137 +15,150 @@ makeNIST_P192L:                         # @makeNIST_P192L
 	retl	$4
 .Lfunc_end0:
 	.size	makeNIST_P192L, .Lfunc_end0-makeNIST_P192L
-
-	.globl	mcl_fpDbl_mod_NIST_P192L
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fpDbl_mod_NIST_P192L        # -- Begin function mcl_fpDbl_mod_NIST_P192L
+	.p2align	4, 0x90
 	.type	mcl_fpDbl_mod_NIST_P192L,@function
 mcl_fpDbl_mod_NIST_P192L:               # @mcl_fpDbl_mod_NIST_P192L
-# BB#0:
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$32, %esp
-	movl	56(%esp), %eax
-	movl	32(%eax), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	28(%eax), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	xorl	%edx, %edx
-	movl	(%eax), %ebx
-	addl	%ecx, %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	4(%eax), %ecx
-	adcl	%edi, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	8(%eax), %ebp
-	adcl	%esi, %ebp
-	movl	36(%eax), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	12(%eax), %esi
-	adcl	%ecx, %esi
-	movl	40(%eax), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	16(%eax), %ecx
-	adcl	%ebx, %ecx
-	movl	44(%eax), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	movl	20(%eax), %eax
+	subl	$36, %esp
+	movl	60(%esp), %esi
+	movl	32(%esi), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%esi), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	28(%esi), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	(%esi), %eax
+	addl	%edi, %eax
+	movl	%eax, %ebx
+	movl	4(%esi), %eax
+	adcl	%edx, %eax
+	movl	%eax, %edx
+	movl	8(%esi), %ebp
+	adcl	%ecx, %ebp
+	movl	36(%esi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esi), %ecx
+	adcl	%eax, %ecx
+	movl	40(%esi), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	16(%esi), %eax
 	adcl	%edi, %eax
-	adcl	$0, %edx
-	sbbl	%edi, %edi
-	andl	$1, %edi
-	addl	%ebx, 24(%esp)          # 4-byte Folded Spill
-	movl	(%esp), %ebx            # 4-byte Reload
-	adcl	%ebx, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	adcl	$0, %edx
-	adcl	$0, %edi
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	%ebx, %esi
-	adcl	$0, %ecx
+	movl	44(%esi), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	20(%esi), %edi
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	%esi, %edi
+	setb	3(%esp)                         # 1-byte Folded Spill
+	addl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	adcl	%esi, %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movzbl	3(%esp), %ebx                   # 1-byte Folded Reload
+	adcl	$0, %ebx
+	setb	%dl
+	addl	4(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	%esi, %ecx
 	adcl	$0, %eax
+	adcl	$0, %edi
+	setb	4(%esp)                         # 1-byte Folded Spill
+	movl	%ebx, %esi
+	adcl	$0, %esi
+	movzbl	%dl, %edx
 	adcl	$0, %edx
+	addb	$255, 4(%esp)                   # 1-byte Folded Spill
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	%edx, 28(%esp)                  # 4-byte Folded Spill
+	adcl	%ebp, %esi
+	adcl	%ecx, %edx
+	adcl	$0, %eax
 	adcl	$0, %edi
-	addl	%edx, 24(%esp)          # 4-byte Folded Spill
-	adcl	%edi, 28(%esp)          # 4-byte Folded Spill
-	adcl	%ebp, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	%esi, %edi
+	setb	24(%esp)                        # 1-byte Folded Spill
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	addl	$1, %ebx
+	movl	28(%esp), %ecx                  # 4-byte Reload
 	adcl	$0, %ecx
-	adcl	$0, %eax
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	24(%esp), %esi          # 4-byte Reload
-	addl	$1, %esi
-	movl	28(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$1, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%edi, %edx
-	adcl	$0, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	adcl	$0, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, %edx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	%esi, %ebp
+	adcl	$1, %ebp
+	movl	%edx, 12(%esp)                  # 4-byte Spill
 	adcl	$0, %edx
-	adcl	$-1, %ebx
-	andl	$1, %ebx
-	jne	.LBB1_2
-# BB#1:
-	movl	%edx, %eax
-.LBB1_2:
-	testb	%bl, %bl
-	movl	24(%esp), %edx          # 4-byte Reload
-	jne	.LBB1_4
-# BB#3:
-	movl	%esi, %edx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%edi, %esi
+	adcl	$0, %esi
+	movzbl	24(%esp), %ecx                  # 1-byte Folded Reload
+	adcl	$-1, %ecx
+	testb	$1, %cl
+	jne	.LBB1_1
+# %bb.2:
+	movl	56(%esp), %edi
+	movl	%esi, 20(%edi)
+	jne	.LBB1_3
 .LBB1_4:
-	movl	52(%esp), %esi
-	movl	%edx, (%esi)
-	movl	20(%esp), %edx          # 4-byte Reload
-	movl	28(%esp), %ebx          # 4-byte Reload
-	jne	.LBB1_6
-# BB#5:
-	movl	%ebp, %ebx
+	movl	%eax, 16(%edi)
+	jne	.LBB1_5
 .LBB1_6:
-	movl	%ebx, 4(%esi)
-	jne	.LBB1_8
-# BB#7:
-	movl	8(%esp), %edx           # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	jne	.LBB1_7
 .LBB1_8:
-	movl	%edx, 8(%esi)
-	jne	.LBB1_10
-# BB#9:
-	movl	12(%esp), %edi          # 4-byte Reload
+	movl	%ebp, 8(%edi)
+	jne	.LBB1_9
 .LBB1_10:
-	movl	%edi, 12(%esi)
-	jne	.LBB1_12
-# BB#11:
-	movl	16(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 4(%edi)
+	je	.LBB1_12
+.LBB1_11:
+	movl	20(%esp), %ebx                  # 4-byte Reload
 .LBB1_12:
-	movl	%ecx, 16(%esi)
-	movl	%eax, 20(%esi)
-	addl	$32, %esp
+	movl	%ebx, (%edi)
+	addl	$36, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
+.LBB1_1:
+	movl	%edi, %esi
+	movl	56(%esp), %edi
+	movl	%esi, 20(%edi)
+	je	.LBB1_4
+.LBB1_3:
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 16(%edi)
+	je	.LBB1_6
+.LBB1_5:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	je	.LBB1_8
+.LBB1_7:
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 8(%edi)
+	je	.LBB1_10
+.LBB1_9:
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%edi)
+	jne	.LBB1_11
+	jmp	.LBB1_12
 .Lfunc_end1:
 	.size	mcl_fpDbl_mod_NIST_P192L, .Lfunc_end1-mcl_fpDbl_mod_NIST_P192L
-
-	.globl	mcl_fp_sqr_NIST_P192L
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fp_sqr_NIST_P192L           # -- Begin function mcl_fp_sqr_NIST_P192L
+	.p2align	4, 0x90
 	.type	mcl_fp_sqr_NIST_P192L,@function
 mcl_fp_sqr_NIST_P192L:                  # @mcl_fp_sqr_NIST_P192L
-# BB#0:
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
@@ -156,130 +169,146 @@ mcl_fp_sqr_NIST_P192L:                  # @mcl_fp_sqr_NIST_P192L
 	popl	%ebx
 .Ltmp0:
 	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L2$pb), %ebx
-	movl	116(%esp), %eax
-	movl	%eax, 4(%esp)
-	leal	44(%esp), %eax
-	movl	%eax, (%esp)
+	subl	$8, %esp
+	movl	124(%esp), %eax
+	leal	52(%esp), %ecx
+	pushl	%eax
+	pushl	%ecx
 	calll	mcl_fpDbl_sqrPre6L@PLT
-	xorl	%edi, %edi
-	movl	76(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
+	addl	$16, %esp
+	movl	80(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	76(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
 	movl	72(%esp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi
-	addl	%eax, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax
+	addl	%edi, %eax
+	movl	%eax, %ebx
 	movl	48(%esp), %eax
 	adcl	%edx, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp
+	movl	%eax, %edx
+	movl	52(%esp), %eax
+	adcl	%esi, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ebp
 	adcl	%ecx, %ebp
-	movl	80(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi
-	adcl	%eax, %esi
-	movl	84(%esp), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
+	movl	84(%esp), %esi
 	movl	60(%esp), %ecx
-	adcl	%ebx, %ecx
+	adcl	%esi, %ecx
 	movl	88(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx
+	movl	64(%esp), %edi
+	adcl	%eax, %edi
+	setb	15(%esp)                        # 1-byte Folded Spill
+	addl	%esi, %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
 	adcl	%eax, %edx
-	adcl	$0, %edi
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	%ebx, 36(%esp)          # 4-byte Folded Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 40(%esp)          # 4-byte Folded Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %edi
-	adcl	$0, %eax
-	addl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%ebx, %esi
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movzbl	15(%esp), %ebx                  # 1-byte Folded Reload
+	adcl	$0, %ebx
+	setb	16(%esp)                        # 1-byte Folded Spill
+	addl	%esi, %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	%eax, %ebp
 	adcl	$0, %ecx
-	adcl	$0, %edx
 	adcl	$0, %edi
-	adcl	$0, %eax
-	addl	%edi, 36(%esp)          # 4-byte Folded Spill
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	%ebp, %edi
-	adcl	%esi, %eax
-	adcl	$0, %ecx
+	setb	%al
+	movl	%ebx, %esi
+	adcl	$0, %esi
+	movzbl	16(%esp), %edx                  # 1-byte Folded Reload
 	adcl	$0, %edx
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	36(%esp), %esi          # 4-byte Reload
-	addl	$1, %esi
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	%edi, %ebp
-	adcl	$1, %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	$0, %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
+	addb	$255, %al
+	adcl	36(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	%edx, 40(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	adcl	%ebp, %edx
+	adcl	$0, %ecx
+	adcl	$0, %edi
+	setb	36(%esp)                        # 1-byte Folded Spill
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	addl	$1, %ebx
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
 	adcl	$0, %ebp
-	adcl	$-1, %ebx
-	andl	$1, %ebx
-	jne	.LBB2_2
-# BB#1:
-	movl	%ebp, %edx
-.LBB2_2:
-	testb	%bl, %bl
-	movl	36(%esp), %ebx          # 4-byte Reload
-	jne	.LBB2_4
-# BB#3:
+	movl	%esi, 24(%esp)                  # 4-byte Spill
 	movl	%esi, %ebx
+	adcl	$1, %ebx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	adcl	$0, %edx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	adcl	$0, %eax
+	movl	%edi, %esi
+	adcl	$0, %esi
+	movzbl	36(%esp), %ecx                  # 1-byte Folded Reload
+	adcl	$-1, %ecx
+	testb	$1, %cl
+	jne	.LBB2_1
+# %bb.2:
+	movl	112(%esp), %edi
+	movl	%esi, 20(%edi)
+	jne	.LBB2_3
 .LBB2_4:
-	movl	112(%esp), %esi
-	movl	%ebx, (%esi)
-	movl	40(%esp), %ebx          # 4-byte Reload
-	jne	.LBB2_6
-# BB#5:
-	movl	20(%esp), %ebx          # 4-byte Reload
+	movl	%eax, 16(%edi)
+	jne	.LBB2_5
 .LBB2_6:
-	movl	%ebx, 4(%esi)
-	jne	.LBB2_8
-# BB#7:
-	movl	24(%esp), %edi          # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	jne	.LBB2_7
 .LBB2_8:
-	movl	%edi, 8(%esi)
-	jne	.LBB2_10
-# BB#9:
-	movl	28(%esp), %eax          # 4-byte Reload
+	movl	%ebx, 8(%edi)
+	jne	.LBB2_9
 .LBB2_10:
-	movl	%eax, 12(%esi)
-	jne	.LBB2_12
-# BB#11:
-	movl	32(%esp), %ecx          # 4-byte Reload
+	movl	%ebp, 4(%edi)
+	je	.LBB2_12
+.LBB2_11:
+	movl	16(%esp), %eax                  # 4-byte Reload
 .LBB2_12:
-	movl	%ecx, 16(%esi)
-	movl	%edx, 20(%esi)
+	movl	%eax, (%edi)
 	addl	$92, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
+.LBB2_1:
+	movl	%edi, %esi
+	movl	112(%esp), %edi
+	movl	%esi, 20(%edi)
+	je	.LBB2_4
+.LBB2_3:
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%edi)
+	je	.LBB2_6
+.LBB2_5:
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	je	.LBB2_8
+.LBB2_7:
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 8(%edi)
+	je	.LBB2_10
+.LBB2_9:
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 4(%edi)
+	jne	.LBB2_11
+	jmp	.LBB2_12
 .Lfunc_end2:
 	.size	mcl_fp_sqr_NIST_P192L, .Lfunc_end2-mcl_fp_sqr_NIST_P192L
-
-	.globl	mcl_fp_mulNIST_P192L
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fp_mulNIST_P192L            # -- Begin function mcl_fp_mulNIST_P192L
+	.p2align	4, 0x90
 	.type	mcl_fp_mulNIST_P192L,@function
 mcl_fp_mulNIST_P192L:                   # @mcl_fp_mulNIST_P192L
-# BB#0:
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
@@ -290,314 +319,331 @@ mcl_fp_mulNIST_P192L:                   # @mcl_fp_mulNIST_P192L
 	popl	%ebx
 .Ltmp1:
 	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L3$pb), %ebx
-	movl	120(%esp), %eax
-	movl	%eax, 8(%esp)
-	movl	116(%esp), %eax
-	movl	%eax, 4(%esp)
-	leal	44(%esp), %eax
-	movl	%eax, (%esp)
+	subl	$4, %esp
+	movl	124(%esp), %eax
+	movl	120(%esp), %ecx
+	leal	48(%esp), %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
 	calll	mcl_fpDbl_mulPre6L@PLT
-	xorl	%edi, %edi
-	movl	76(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
+	addl	$16, %esp
+	movl	80(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	76(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
 	movl	72(%esp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi
-	addl	%eax, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax
+	addl	%edi, %eax
+	movl	%eax, %ebx
 	movl	48(%esp), %eax
 	adcl	%edx, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp
+	movl	%eax, %edx
+	movl	52(%esp), %eax
+	adcl	%esi, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ebp
 	adcl	%ecx, %ebp
-	movl	80(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi
-	adcl	%eax, %esi
-	movl	84(%esp), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
+	movl	84(%esp), %esi
 	movl	60(%esp), %ecx
-	adcl	%ebx, %ecx
+	adcl	%esi, %ecx
 	movl	88(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx
+	movl	64(%esp), %edi
+	adcl	%eax, %edi
+	setb	15(%esp)                        # 1-byte Folded Spill
+	addl	%esi, %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
 	adcl	%eax, %edx
-	adcl	$0, %edi
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	%ebx, 36(%esp)          # 4-byte Folded Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 40(%esp)          # 4-byte Folded Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %edi
-	adcl	$0, %eax
-	addl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%ebx, %esi
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movzbl	15(%esp), %ebx                  # 1-byte Folded Reload
+	adcl	$0, %ebx
+	setb	16(%esp)                        # 1-byte Folded Spill
+	addl	%esi, %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	%eax, %ebp
 	adcl	$0, %ecx
-	adcl	$0, %edx
 	adcl	$0, %edi
-	adcl	$0, %eax
-	addl	%edi, 36(%esp)          # 4-byte Folded Spill
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	%ebp, %edi
-	adcl	%esi, %eax
-	adcl	$0, %ecx
+	setb	%al
+	movl	%ebx, %esi
+	adcl	$0, %esi
+	movzbl	16(%esp), %edx                  # 1-byte Folded Reload
 	adcl	$0, %edx
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	36(%esp), %esi          # 4-byte Reload
-	addl	$1, %esi
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	%edi, %ebp
-	adcl	$1, %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	$0, %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
+	addb	$255, %al
+	adcl	36(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	%edx, 40(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	adcl	%ebp, %edx
+	adcl	$0, %ecx
+	adcl	$0, %edi
+	setb	36(%esp)                        # 1-byte Folded Spill
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	addl	$1, %ebx
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
 	adcl	$0, %ebp
-	adcl	$-1, %ebx
-	andl	$1, %ebx
-	jne	.LBB3_2
-# BB#1:
-	movl	%ebp, %edx
-.LBB3_2:
-	testb	%bl, %bl
-	movl	36(%esp), %ebx          # 4-byte Reload
-	jne	.LBB3_4
-# BB#3:
+	movl	%esi, 24(%esp)                  # 4-byte Spill
 	movl	%esi, %ebx
+	adcl	$1, %ebx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	adcl	$0, %edx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	adcl	$0, %eax
+	movl	%edi, %esi
+	adcl	$0, %esi
+	movzbl	36(%esp), %ecx                  # 1-byte Folded Reload
+	adcl	$-1, %ecx
+	testb	$1, %cl
+	jne	.LBB3_1
+# %bb.2:
+	movl	112(%esp), %edi
+	movl	%esi, 20(%edi)
+	jne	.LBB3_3
 .LBB3_4:
-	movl	112(%esp), %esi
-	movl	%ebx, (%esi)
-	movl	40(%esp), %ebx          # 4-byte Reload
-	jne	.LBB3_6
-# BB#5:
-	movl	20(%esp), %ebx          # 4-byte Reload
+	movl	%eax, 16(%edi)
+	jne	.LBB3_5
 .LBB3_6:
-	movl	%ebx, 4(%esi)
-	jne	.LBB3_8
-# BB#7:
-	movl	24(%esp), %edi          # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	jne	.LBB3_7
 .LBB3_8:
-	movl	%edi, 8(%esi)
-	jne	.LBB3_10
-# BB#9:
-	movl	28(%esp), %eax          # 4-byte Reload
+	movl	%ebx, 8(%edi)
+	jne	.LBB3_9
 .LBB3_10:
-	movl	%eax, 12(%esi)
-	jne	.LBB3_12
-# BB#11:
-	movl	32(%esp), %ecx          # 4-byte Reload
+	movl	%ebp, 4(%edi)
+	je	.LBB3_12
+.LBB3_11:
+	movl	16(%esp), %eax                  # 4-byte Reload
 .LBB3_12:
-	movl	%ecx, 16(%esi)
-	movl	%edx, 20(%esi)
+	movl	%eax, (%edi)
 	addl	$92, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
+.LBB3_1:
+	movl	%edi, %esi
+	movl	112(%esp), %edi
+	movl	%esi, 20(%edi)
+	je	.LBB3_4
+.LBB3_3:
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%edi)
+	je	.LBB3_6
+.LBB3_5:
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	je	.LBB3_8
+.LBB3_7:
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 8(%edi)
+	je	.LBB3_10
+.LBB3_9:
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 4(%edi)
+	jne	.LBB3_11
+	jmp	.LBB3_12
 .Lfunc_end3:
 	.size	mcl_fp_mulNIST_P192L, .Lfunc_end3-mcl_fp_mulNIST_P192L
-
-	.globl	mcl_fpDbl_mod_NIST_P521L
-	.align	16, 0x90
+                                        # -- End function
+	.globl	mcl_fpDbl_mod_NIST_P521L        # -- Begin function mcl_fpDbl_mod_NIST_P521L
+	.p2align	4, 0x90
 	.type	mcl_fpDbl_mod_NIST_P521L,@function
 mcl_fpDbl_mod_NIST_P521L:               # @mcl_fpDbl_mod_NIST_P521L
-# BB#0:
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
 	subl	$60, %esp
-	movl	84(%esp), %ecx
-	movl	124(%ecx), %edx
-	movl	128(%ecx), %esi
-	movl	%esi, %eax
-	shldl	$23, %edx, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	120(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	116(%ecx), %edx
-	shldl	$23, %edx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	112(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	108(%ecx), %edx
-	shldl	$23, %edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	104(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	100(%ecx), %edx
-	shldl	$23, %edx, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	96(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	92(%ecx), %edx
+	movl	84(%esp), %edi
+	movl	124(%edi), %eax
+	movl	128(%edi), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	shldl	$23, %eax, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	120(%edi), %ecx
+	shldl	$23, %ecx, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	116(%edi), %eax
+	shldl	$23, %eax, %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	112(%edi), %ecx
+	shldl	$23, %ecx, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	108(%edi), %eax
+	shldl	$23, %eax, %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	104(%edi), %ecx
+	shldl	$23, %ecx, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	100(%edi), %eax
+	shldl	$23, %eax, %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	96(%edi), %ecx
+	shldl	$23, %ecx, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	92(%edi), %eax
+	shldl	$23, %eax, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	88(%edi), %ecx
+	shldl	$23, %ecx, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	84(%edi), %esi
+	shldl	$23, %esi, %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	80(%edi), %ecx
+	shldl	$23, %ecx, %esi
+	movl	76(%edi), %eax
+	shldl	$23, %eax, %ecx
+	movl	72(%edi), %edx
 	shldl	$23, %edx, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	88(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	84(%ecx), %edi
-	shldl	$23, %edi, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	80(%ecx), %edx
-	shldl	$23, %edx, %edi
-	movl	76(%ecx), %eax
-	shldl	$23, %eax, %edx
-	movl	72(%ecx), %ebx
-	shldl	$23, %ebx, %eax
-	movl	68(%ecx), %ebp
-	shldl	$23, %ebp, %ebx
-	shrl	$9, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	64(%ecx), %esi
-	shldl	$23, %esi, %ebp
-	andl	$511, %esi              # imm = 0x1FF
-	addl	(%ecx), %ebp
-	adcl	4(%ecx), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	adcl	8(%ecx), %eax
-	adcl	12(%ecx), %edx
-	adcl	16(%ecx), %edi
-	movl	28(%esp), %ebx          # 4-byte Reload
-	adcl	20(%ecx), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebx          # 4-byte Reload
-	adcl	24(%ecx), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebx          # 4-byte Reload
-	adcl	28(%ecx), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebx          # 4-byte Reload
-	adcl	32(%ecx), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	36(%ecx), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebx          # 4-byte Reload
-	adcl	40(%ecx), %ebx
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	24(%esp), %ebx          # 4-byte Reload
-	adcl	44(%ecx), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	48(%ecx), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	20(%esp), %ebx          # 4-byte Reload
-	adcl	52(%ecx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebx          # 4-byte Reload
-	adcl	56(%ecx), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	adcl	60(%ecx), %ebx
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	shrl	$9, %ecx
-	andl	$1, %ecx
-	addl	%ebp, %ecx
-	adcl	$0, 16(%esp)            # 4-byte Folded Spill
+	movl	68(%edi), %ebp
+	shldl	$23, %ebp, %edx
+	movl	%edx, %ebx
+	shrl	$9, 44(%esp)                    # 4-byte Folded Spill
+	movl	64(%edi), %edx
+	shldl	$23, %edx, %ebp
+	andl	$511, %edx                      # imm = 0x1FF
+	addl	(%edi), %ebp
+	adcl	4(%edi), %ebx
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	adcl	8(%edi), %eax
+	adcl	12(%edi), %ecx
+	adcl	16(%edi), %esi
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	20(%edi), %ebx
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	24(%edi), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	28(%edi), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	32(%edi), %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	adcl	36(%edi), %ebx
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	adcl	40(%edi), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	44(%edi), %ebx
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	adcl	48(%edi), %ebx
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	52(%edi), %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebx                  # 4-byte Reload
+	adcl	56(%edi), %ebx
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	movl	(%esp), %ebx                    # 4-byte Reload
+	adcl	60(%edi), %ebx
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, %edi
+	shrl	$9, %edi
+	andl	$1, %edi
+	addl	%ebp, %edi
+	adcl	$0, 48(%esp)                    # 4-byte Folded Spill
 	adcl	$0, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	%edi, %esi
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	adcl	$0, 48(%esp)            # 4-byte Folded Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	adcl	$0, 20(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	%ebx, %ebp
-	adcl	$0, %ebp
-	movl	12(%esp), %ebx          # 4-byte Reload
+	adcl	$0, %ecx
+	adcl	$0, %esi
+	adcl	$0, 4(%esp)                     # 4-byte Folded Spill
+	adcl	$0, 8(%esp)                     # 4-byte Folded Spill
+	adcl	$0, 12(%esp)                    # 4-byte Folded Spill
+	adcl	$0, 16(%esp)                    # 4-byte Folded Spill
+	adcl	$0, 20(%esp)                    # 4-byte Folded Spill
+	adcl	$0, 24(%esp)                    # 4-byte Folded Spill
+	adcl	$0, 28(%esp)                    # 4-byte Folded Spill
+	adcl	$0, 32(%esp)                    # 4-byte Folded Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
 	adcl	$0, %ebx
-	movl	%ecx, %edi
+	adcl	$0, 40(%esp)                    # 4-byte Folded Spill
+	movl	(%esp), %ebp                    # 4-byte Reload
+	adcl	$0, %ebp
+	adcl	$0, %edx
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
 	andl	%eax, %edi
-	andl	%edx, %edi
-	andl	%esi, %edi
-	andl	28(%esp), %edi          # 4-byte Folded Reload
-	andl	32(%esp), %edi          # 4-byte Folded Reload
-	andl	36(%esp), %edi          # 4-byte Folded Reload
-	andl	40(%esp), %edi          # 4-byte Folded Reload
-	andl	44(%esp), %edi          # 4-byte Folded Reload
-	andl	48(%esp), %edi          # 4-byte Folded Reload
-	andl	24(%esp), %edi          # 4-byte Folded Reload
-	andl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	20(%esp), %esi          # 4-byte Reload
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	andl	%ecx, %edi
+	movl	%esi, (%esp)                    # 4-byte Spill
 	andl	%esi, %edi
-	andl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, %edx
-	movl	16(%esp), %ebx          # 4-byte Reload
+	movl	48(%esp), %esi                  # 4-byte Reload
+	andl	4(%esp), %edi                   # 4-byte Folded Reload
+	andl	8(%esp), %edi                   # 4-byte Folded Reload
+	andl	12(%esp), %edi                  # 4-byte Folded Reload
+	andl	16(%esp), %edi                  # 4-byte Folded Reload
+	andl	20(%esp), %edi                  # 4-byte Folded Reload
+	andl	24(%esp), %edi                  # 4-byte Folded Reload
+	andl	28(%esp), %edi                  # 4-byte Folded Reload
+	andl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ebx, %eax
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	andl	%ebx, %edi
+	andl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ebp, %ecx
 	andl	%ebp, %edi
-	movl	%ebp, %eax
 	movl	%edx, %ebp
-	orl	$-512, %ebp             # imm = 0xFFFFFFFFFFFFFE00
+	orl	$-512, %ebp                     # imm = 0xFE00
 	andl	%edi, %ebp
-	andl	%ebx, %ebp
+	andl	%esi, %ebp
 	cmpl	$-1, %ebp
 	movl	80(%esp), %edi
 	je	.LBB4_1
-# BB#3:                                 # %nonzero
-	movl	%ecx, (%edi)
-	movl	%ebx, 4(%edi)
-	movl	(%esp), %ecx            # 4-byte Reload
-	movl	%ecx, 8(%edi)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 12(%edi)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 16(%edi)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 20(%edi)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%edi)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%edi)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%edi)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%edi)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%edi)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%edi)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%edi)
-	movl	%esi, 52(%edi)
-	movl	56(%esp), %ecx          # 4-byte Reload
+# %bb.3:                                # %nonzero
+	movl	%ecx, 60(%edi)
+	movl	40(%esp), %ecx                  # 4-byte Reload
 	movl	%ecx, 56(%edi)
-	movl	%eax, 60(%edi)
-	andl	$511, %edx              # imm = 0x1FF
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 52(%edi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%edi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 40(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 36(%edi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%edi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%edi)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 24(%edi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 20(%edi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 16(%edi)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edi)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edi)
+	movl	%esi, 4(%edi)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%edi)
+	andl	$511, %edx                      # imm = 0x1FF
 	movl	%edx, 64(%edi)
 	jmp	.LBB4_2
 .LBB4_1:                                # %zero
 	xorl	%eax, %eax
 	movl	$17, %ecx
-	rep;stosl
+	rep;stosl %eax, %es:(%edi)
 .LBB4_2:                                # %zero
 	addl	$60, %esp
 	popl	%esi
@@ -607,72417 +653,26015 @@ mcl_fpDbl_mod_NIST_P521L:               # @mcl_fpDbl_mod_NIST_P521L
 	retl
 .Lfunc_end4:
 	.size	mcl_fpDbl_mod_NIST_P521L, .Lfunc_end4-mcl_fpDbl_mod_NIST_P521L
-
-	.globl	mcl_fp_mulUnitPre1L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre1L,@function
-mcl_fp_mulUnitPre1L:                    # @mcl_fp_mulUnitPre1L
-# BB#0:
-	movl	8(%esp), %eax
-	movl	(%eax), %eax
-	mull	12(%esp)
-	movl	4(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edx, 4(%ecx)
-	retl
-.Lfunc_end5:
-	.size	mcl_fp_mulUnitPre1L, .Lfunc_end5-mcl_fp_mulUnitPre1L
-
-	.globl	mcl_fpDbl_mulPre1L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre1L,@function
-mcl_fpDbl_mulPre1L:                     # @mcl_fpDbl_mulPre1L
-# BB#0:
-	movl	12(%esp), %eax
-	movl	(%eax), %eax
-	movl	8(%esp), %ecx
-	mull	(%ecx)
-	movl	4(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edx, 4(%ecx)
-	retl
-.Lfunc_end6:
-	.size	mcl_fpDbl_mulPre1L, .Lfunc_end6-mcl_fpDbl_mulPre1L
-
-	.globl	mcl_fpDbl_sqrPre1L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre1L,@function
-mcl_fpDbl_sqrPre1L:                     # @mcl_fpDbl_sqrPre1L
-# BB#0:
-	movl	8(%esp), %eax
-	movl	(%eax), %eax
-	mull	%eax
-	movl	4(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edx, 4(%ecx)
-	retl
-.Lfunc_end7:
-	.size	mcl_fpDbl_sqrPre1L, .Lfunc_end7-mcl_fpDbl_sqrPre1L
-
-	.globl	mcl_fp_mont1L
-	.align	16, 0x90
-	.type	mcl_fp_mont1L,@function
-mcl_fp_mont1L:                          # @mcl_fp_mont1L
-# BB#0:
+                                        # -- End function
+	.globl	mulPv192x32                     # -- Begin function mulPv192x32
+	.p2align	4, 0x90
+	.type	mulPv192x32,@function
+mulPv192x32:                            # @mulPv192x32
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %eax
-	movl	20(%esp), %ecx
-	mull	(%ecx)
-	movl	%eax, %ecx
+	subl	$28, %esp
+	movl	56(%esp), %ebx
+	movl	52(%esp), %edi
+	movl	%ebx, %eax
+	mull	20(%edi)
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	16(%edi)
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	12(%edi)
 	movl	%edx, %esi
-	movl	24(%esp), %edx
-	movl	-4(%edx), %eax
-	imull	%ecx, %eax
-	movl	(%edx), %edi
-	mull	%edi
-	addl	%ecx, %eax
-	adcl	%esi, %edx
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	%edx, %eax
-	subl	%edi, %eax
-	sbbl	$0, %ecx
-	testb	$1, %cl
-	jne	.LBB8_2
-# BB#1:
-	movl	%eax, %edx
-.LBB8_2:
-	movl	12(%esp), %eax
-	movl	%edx, (%eax)
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ebx, %eax
+	mull	8(%edi)
+	movl	%edx, %ebp
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ebx, %eax
+	mull	4(%edi)
+	movl	%edx, %ecx
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%ebx, %eax
+	mull	(%edi)
+	movl	48(%esp), %ebx
+	movl	%eax, (%ebx)
+	addl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 4(%ebx)
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 8(%ebx)
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 12(%ebx)
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 16(%ebx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 24(%ebx)
+	movl	%ebx, %eax
+	addl	$28, %esp
 	popl	%esi
 	popl	%edi
-	retl
-.Lfunc_end8:
-	.size	mcl_fp_mont1L, .Lfunc_end8-mcl_fp_mont1L
-
-	.globl	mcl_fp_montNF1L
-	.align	16, 0x90
-	.type	mcl_fp_montNF1L,@function
-mcl_fp_montNF1L:                        # @mcl_fp_montNF1L
-# BB#0:
+	popl	%ebx
+	popl	%ebp
+	retl	$4
+.Lfunc_end5:
+	.size	mulPv192x32, .Lfunc_end5-mulPv192x32
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre6L             # -- Begin function mcl_fp_mulUnitPre6L
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre6L,@function
+mcl_fp_mulUnitPre6L:                    # @mcl_fp_mulUnitPre6L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %eax
-	movl	20(%esp), %ecx
-	mull	(%ecx)
-	movl	%eax, %ecx
+	subl	$28, %esp
+	movl	56(%esp), %ebx
+	movl	52(%esp), %edi
+	movl	%ebx, %eax
+	mull	20(%edi)
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	16(%edi)
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	12(%edi)
 	movl	%edx, %esi
-	movl	24(%esp), %edx
-	movl	-4(%edx), %eax
-	imull	%ecx, %eax
-	movl	(%edx), %edi
-	mull	%edi
-	addl	%ecx, %eax
-	adcl	%esi, %edx
-	movl	%edx, %eax
-	subl	%edi, %eax
-	js	.LBB9_2
-# BB#1:
-	movl	%eax, %edx
-.LBB9_2:
-	movl	12(%esp), %eax
-	movl	%edx, (%eax)
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ebx, %eax
+	mull	8(%edi)
+	movl	%edx, %ebp
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ebx, %eax
+	mull	4(%edi)
+	movl	%edx, %ecx
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%ebx, %eax
+	mull	(%edi)
+	movl	48(%esp), %edi
+	movl	%eax, (%edi)
+	addl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 4(%edi)
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 8(%edi)
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 12(%edi)
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 16(%edi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 24(%edi)
+	addl	$28, %esp
 	popl	%esi
 	popl	%edi
+	popl	%ebx
+	popl	%ebp
 	retl
-.Lfunc_end9:
-	.size	mcl_fp_montNF1L, .Lfunc_end9-mcl_fp_montNF1L
-
-	.globl	mcl_fp_montRed1L
-	.align	16, 0x90
-	.type	mcl_fp_montRed1L,@function
-mcl_fp_montRed1L:                       # @mcl_fp_montRed1L
-# BB#0:
+.Lfunc_end6:
+	.size	mcl_fp_mulUnitPre6L, .Lfunc_end6-mcl_fp_mulUnitPre6L
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre6L              # -- Begin function mcl_fpDbl_mulPre6L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre6L,@function
+mcl_fpDbl_mulPre6L:                     # @mcl_fpDbl_mulPre6L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	16(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	20(%esp), %edx
-	movl	-4(%edx), %eax
-	imull	%esi, %eax
-	movl	(%edx), %edi
+	subl	$96, %esp
+	movl	120(%esp), %ecx
+	movl	(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	124(%esp), %edx
+	movl	(%edx), %esi
+	mull	%esi
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	116(%esp), %edx
+	movl	%eax, (%edx)
+	movl	4(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %edi
+	movl	12(%ecx), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %ebx
+	movl	20(%ecx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	124(%esp), %eax
+	movl	4(%eax), %ecx
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%ebx
+	movl	%ebx, 64(%esp)                  # 4-byte Spill
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%ebp
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
 	mull	%edi
-	addl	%esi, %eax
-	adcl	4(%ecx), %edx
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	%edx, %eax
-	subl	%edi, %eax
-	sbbl	$0, %ecx
-	testb	$1, %cl
-	jne	.LBB10_2
-# BB#1:
-	movl	%eax, %edx
-.LBB10_2:
-	movl	12(%esp), %eax
-	movl	%edx, (%eax)
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end10:
-	.size	mcl_fp_montRed1L, .Lfunc_end10-mcl_fp_montRed1L
-
-	.globl	mcl_fp_addPre1L
-	.align	16, 0x90
-	.type	mcl_fp_addPre1L,@function
-mcl_fp_addPre1L:                        # @mcl_fp_addPre1L
-# BB#0:
-	movl	12(%esp), %eax
-	movl	(%eax), %eax
-	movl	4(%esp), %ecx
-	movl	8(%esp), %edx
-	addl	(%edx), %eax
-	movl	%eax, (%ecx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	retl
-.Lfunc_end11:
-	.size	mcl_fp_addPre1L, .Lfunc_end11-mcl_fp_addPre1L
-
-	.globl	mcl_fp_subPre1L
-	.align	16, 0x90
-	.type	mcl_fp_subPre1L,@function
-mcl_fp_subPre1L:                        # @mcl_fp_subPre1L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	xorl	%eax, %eax
-	movl	8(%esp), %edx
-	movl	16(%esp), %esi
-	subl	(%esi), %ecx
-	movl	%ecx, (%edx)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	retl
-.Lfunc_end12:
-	.size	mcl_fp_subPre1L, .Lfunc_end12-mcl_fp_subPre1L
-
-	.globl	mcl_fp_shr1_1L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_1L,@function
-mcl_fp_shr1_1L:                         # @mcl_fp_shr1_1L
-# BB#0:
-	movl	8(%esp), %eax
-	movl	(%eax), %eax
-	shrl	%eax
-	movl	4(%esp), %ecx
-	movl	%eax, (%ecx)
-	retl
-.Lfunc_end13:
-	.size	mcl_fp_shr1_1L, .Lfunc_end13-mcl_fp_shr1_1L
-
-	.globl	mcl_fp_add1L
-	.align	16, 0x90
-	.type	mcl_fp_add1L,@function
-mcl_fp_add1L:                           # @mcl_fp_add1L
-# BB#0:
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %eax
-	movl	8(%esp), %ecx
-	movl	12(%esp), %edx
-	addl	(%edx), %eax
-	movl	%eax, (%ecx)
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	20(%esp), %esi
-	subl	(%esi), %eax
-	sbbl	$0, %edx
-	testb	$1, %dl
-	jne	.LBB14_2
-# BB#1:                                 # %nocarry
-	movl	%eax, (%ecx)
-.LBB14_2:                               # %carry
-	popl	%esi
-	retl
-.Lfunc_end14:
-	.size	mcl_fp_add1L, .Lfunc_end14-mcl_fp_add1L
-
-	.globl	mcl_fp_addNF1L
-	.align	16, 0x90
-	.type	mcl_fp_addNF1L,@function
-mcl_fp_addNF1L:                         # @mcl_fp_addNF1L
-# BB#0:
-	movl	12(%esp), %eax
-	movl	(%eax), %eax
-	movl	8(%esp), %ecx
-	addl	(%ecx), %eax
-	movl	16(%esp), %edx
-	movl	%eax, %ecx
-	subl	(%edx), %ecx
-	js	.LBB15_2
-# BB#1:
-	movl	%ecx, %eax
-.LBB15_2:
-	movl	4(%esp), %ecx
-	movl	%eax, (%ecx)
-	retl
-.Lfunc_end15:
-	.size	mcl_fp_addNF1L, .Lfunc_end15-mcl_fp_addNF1L
-
-	.globl	mcl_fp_sub1L
-	.align	16, 0x90
-	.type	mcl_fp_sub1L,@function
-mcl_fp_sub1L:                           # @mcl_fp_sub1L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %eax
-	xorl	%edx, %edx
-	movl	8(%esp), %ecx
-	movl	16(%esp), %esi
-	subl	(%esi), %eax
-	movl	%eax, (%ecx)
-	sbbl	$0, %edx
-	testb	$1, %dl
-	jne	.LBB16_2
-# BB#1:                                 # %nocarry
-	popl	%esi
-	retl
-.LBB16_2:                               # %carry
-	movl	20(%esp), %edx
-	addl	(%edx), %eax
-	movl	%eax, (%ecx)
-	popl	%esi
-	retl
-.Lfunc_end16:
-	.size	mcl_fp_sub1L, .Lfunc_end16-mcl_fp_sub1L
-
-	.globl	mcl_fp_subNF1L
-	.align	16, 0x90
-	.type	mcl_fp_subNF1L,@function
-mcl_fp_subNF1L:                         # @mcl_fp_subNF1L
-# BB#0:
-	movl	8(%esp), %eax
-	movl	(%eax), %eax
-	movl	12(%esp), %ecx
-	subl	(%ecx), %eax
-	movl	%eax, %ecx
-	sarl	$31, %ecx
-	movl	16(%esp), %edx
-	andl	(%edx), %ecx
-	addl	%eax, %ecx
-	movl	4(%esp), %eax
-	movl	%ecx, (%eax)
-	retl
-.Lfunc_end17:
-	.size	mcl_fp_subNF1L, .Lfunc_end17-mcl_fp_subNF1L
-
-	.globl	mcl_fpDbl_add1L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add1L,@function
-mcl_fpDbl_add1L:                        # @mcl_fpDbl_add1L
-# BB#0:
-	pushl	%ebx
-	pushl	%esi
-	movl	20(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %eax
-	movl	16(%esp), %esi
-	addl	(%esi), %edx
-	movl	12(%esp), %ecx
-	adcl	4(%esi), %eax
-	movl	%edx, (%ecx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	24(%esp), %esi
-	movl	%eax, %edx
-	subl	(%esi), %edx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB18_2
-# BB#1:
-	movl	%edx, %eax
-.LBB18_2:
-	movl	%eax, 4(%ecx)
-	popl	%esi
-	popl	%ebx
-	retl
-.Lfunc_end18:
-	.size	mcl_fpDbl_add1L, .Lfunc_end18-mcl_fpDbl_add1L
-
-	.globl	mcl_fpDbl_sub1L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub1L,@function
-mcl_fpDbl_sub1L:                        # @mcl_fpDbl_sub1L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %eax
-	xorl	%ecx, %ecx
-	movl	16(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %eax
-	movl	8(%esp), %edx
-	movl	%esi, (%edx)
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	je	.LBB19_2
-# BB#1:
-	movl	20(%esp), %ecx
-	movl	(%ecx), %ecx
-.LBB19_2:
-	addl	%eax, %ecx
-	movl	%ecx, 4(%edx)
-	popl	%esi
-	retl
-.Lfunc_end19:
-	.size	mcl_fpDbl_sub1L, .Lfunc_end19-mcl_fpDbl_sub1L
-
-	.globl	mcl_fp_mulUnitPre2L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre2L,@function
-mcl_fp_mulUnitPre2L:                    # @mcl_fp_mulUnitPre2L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	20(%esp), %ebx
-	movl	%ecx, %eax
-	mull	4(%ebx)
-	movl	%edx, %esi
-	movl	%eax, %edi
+	movl	%edi, 76(%esp)                  # 4-byte Spill
+	movl	%edi, %ebp
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	(%ebx)
-	movl	16(%esp), %ecx
-	movl	%eax, (%ecx)
-	addl	%edi, %edx
-	movl	%edx, 4(%ecx)
-	adcl	$0, %esi
-	movl	%esi, 8(%ecx)
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end20:
-	.size	mcl_fp_mulUnitPre2L, .Lfunc_end20-mcl_fp_mulUnitPre2L
-
-	.globl	mcl_fpDbl_mulPre2L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre2L,@function
-mcl_fpDbl_mulPre2L:                     # @mcl_fpDbl_mulPre2L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$8, %esp
-	movl	32(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edi
-	movl	36(%esp), %ebx
-	movl	(%ebx), %esi
+	movl	56(%esp), %edi                  # 4-byte Reload
+	mull	%edi
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	mull	%esi
-	movl	%edx, %ebp
-	movl	28(%esp), %edx
-	movl	%eax, (%edx)
-	movl	4(%ebx), %ebx
-	movl	%edi, %eax
-	mull	%ebx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%ebx
 	movl	%edx, %ecx
-	movl	%eax, %ebx
-	movl	%edi, %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
 	mull	%esi
-	addl	%ebp, %eax
-	adcl	$0, %edx
-	addl	%ebx, %eax
-	movl	28(%esp), %esi
-	movl	%eax, 4(%esi)
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	%ecx, %edx
-	movl	%edx, 8(%esi)
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 12(%esi)
-	addl	$8, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end21:
-	.size	mcl_fpDbl_mulPre2L, .Lfunc_end21-mcl_fpDbl_mulPre2L
-
-	.globl	mcl_fpDbl_sqrPre2L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre2L,@function
-mcl_fpDbl_sqrPre2L:                     # @mcl_fpDbl_sqrPre2L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %esi
-	movl	%esi, %eax
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	mull	%esi
+	movl	%edx, %ebx
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	%esi
+	movl	%edx, %ebp
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
 	mull	%esi
 	movl	%edx, %edi
+	addl	48(%esp), %eax                  # 4-byte Folded Reload
+	adcl	80(%esp), %edi                  # 4-byte Folded Reload
+	adcl	84(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	88(%esp), %ebx                  # 4-byte Folded Reload
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	92(%esp), %esi                  # 4-byte Folded Reload
+	adcl	$0, %ecx
+	addl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	116(%esp), %edx
+	movl	%eax, 4(%edx)
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	adcl	40(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ecx, %edx
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	setb	%al
+	addl	44(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %ecx
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movzbl	%al, %eax
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	124(%esp), %eax
+	movl	8(%eax), %esi
+	movl	%esi, %eax
+	mull	32(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
 	movl	%eax, %ebx
 	movl	%esi, %eax
-	mull	%ecx
-	movl	%edx, %esi
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
 	movl	%eax, %ebp
+	addl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	%ebx, %edi
+	movl	%edi, %ebx
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	40(%esp), %esi                  # 4-byte Folded Reload
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	44(%esp), %eax                  # 4-byte Folded Reload
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	$0, %edx
+	addl	%ecx, %ebp
+	movl	116(%esp), %ecx
+	movl	%ebp, 8(%ecx)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, (%esp)                    # 4-byte Folded Spill
+	adcl	48(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	$0, %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	124(%esp), %eax
+	movl	12(%eax), %ecx
 	movl	%ecx, %eax
+	mull	32(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	movl	72(%esp), %ecx                  # 4-byte Reload
 	mull	%ecx
-	movl	20(%esp), %ecx
-	movl	%eax, (%ecx)
-	addl	%ebp, %edx
+	movl	%ecx, %ebp
+	movl	%eax, %edi
+	addl	68(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, %ecx
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, %eax
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	adcl	40(%esp), %ebx                  # 4-byte Folded Reload
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	44(%esp), %esi                  # 4-byte Folded Reload
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	(%esp), %edi                    # 4-byte Folded Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	116(%esp), %ecx
+	movl	%edi, 12(%ecx)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 8(%esp)                   # 4-byte Folded Spill
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 52(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	$0, %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	124(%esp), %eax
+	movl	16(%eax), %esi
+	movl	%esi, %eax
+	mull	32(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%esi, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, %ebx
 	movl	%esi, %eax
+	mull	%ebp
+	movl	%eax, %ebp
+	movl	%edx, %edi
+	addl	%ebx, %edi
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	40(%esp), %ebx                  # 4-byte Folded Reload
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	$0, %edx
+	addl	44(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	116(%esp), %ebp
+	movl	4(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 16(%ebp)
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	52(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	adcl	$0, %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	124(%esp), %eax
+	movl	20(%eax), %ebx
+	movl	%ebx, %eax
+	mull	32(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, %esi
+	movl	%ebx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	addl	16(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	adcl	60(%esp), %edx                  # 4-byte Folded Reload
+	adcl	64(%esp), %edi                  # 4-byte Folded Reload
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	$0, 32(%esp)                    # 4-byte Folded Spill
+	addl	8(%esp), %esi                   # 4-byte Folded Reload
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	116(%esp), %ebx
+	movl	%esi, 20(%ebx)
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 24(%ebx)
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%eax, 28(%ebx)
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edx, 32(%ebx)
+	movl	%edi, 36(%ebx)
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 40(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	$0, %eax
-	addl	%ebp, %edx
-	movl	%edx, 4(%ecx)
-	adcl	%ebx, %eax
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	addl	%esi, %eax
-	movl	%eax, 8(%ecx)
-	adcl	%edi, %edx
-	movl	%edx, 12(%ecx)
+	movl	%eax, 44(%ebx)
+	addl	$96, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end22:
-	.size	mcl_fpDbl_sqrPre2L, .Lfunc_end22-mcl_fpDbl_sqrPre2L
-
-	.globl	mcl_fp_mont2L
-	.align	16, 0x90
-	.type	mcl_fp_mont2L,@function
-mcl_fp_mont2L:                          # @mcl_fp_mont2L
-# BB#0:
+.Lfunc_end7:
+	.size	mcl_fpDbl_mulPre6L, .Lfunc_end7-mcl_fpDbl_mulPre6L
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre6L              # -- Begin function mcl_fpDbl_sqrPre6L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre6L,@function
+mcl_fpDbl_sqrPre6L:                     # @mcl_fpDbl_sqrPre6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$32, %esp
-	movl	56(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	4(%ecx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx
-	movl	(%ecx), %esi
+	subl	$172, %esp
+	movl	196(%esp), %edi
+	movl	20(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%edi), %esi
 	mull	%esi
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	64(%esp), %edx
-	movl	-4(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	imull	%ecx, %ebp
-	movl	(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	4(%edx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%edx
-	movl	%edx, %ebx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%ecx
-	movl	%edx, %ebp
-	movl	%eax, %edi
-	movl	16(%esp), %eax          # 4-byte Reload
+	movl	%eax, 148(%esp)                 # 4-byte Spill
+	movl	%edx, 144(%esp)                 # 4-byte Spill
+	movl	16(%edi), %ebx
+	movl	%ebx, %eax
+	movl	%ebx, (%esp)                    # 4-byte Spill
 	mull	%esi
-	addl	4(%esp), %eax           # 4-byte Folded Reload
-	adcl	$0, %edx
-	addl	(%esp), %ebp            # 4-byte Folded Reload
-	adcl	$0, %ebx
-	addl	8(%esp), %edi           # 4-byte Folded Reload
-	adcl	%eax, %ebp
-	adcl	%edx, %ebx
-	movl	60(%esp), %eax
-	movl	4(%eax), %ecx
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
+	movl	%eax, 132(%esp)                 # 4-byte Spill
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	12(%edi), %ecx
 	movl	%ecx, %eax
-	mull	16(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 16(%esp)          # 4-byte Spill
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, 120(%esp)                 # 4-byte Spill
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	4(%edi), %ebp
+	movl	8(%edi), %edi
+	movl	%edi, %eax
+	mull	%esi
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	mull	%ebp
+	movl	%edx, 140(%esp)                 # 4-byte Spill
+	movl	%eax, 136(%esp)                 # 4-byte Spill
+	movl	%ebx, %eax
+	mull	%ebp
+	movl	%edx, 128(%esp)                 # 4-byte Spill
+	movl	%eax, 124(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
-	mull	12(%esp)                # 4-byte Folded Reload
-	movl	%eax, %ecx
-	movl	%edx, %esi
-	addl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	$0, %edi
-	addl	%ebp, %ecx
-	adcl	%ebx, %esi
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	sbbl	%ebx, %ebx
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	andl	$1, %ebx
-	mull	20(%esp)                # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	mull	24(%esp)                # 4-byte Folded Reload
-	addl	16(%esp), %eax          # 4-byte Folded Reload
-	adcl	$0, %edx
-	addl	%ecx, %ebp
-	adcl	%esi, %eax
-	adcl	%edi, %edx
-	adcl	$0, %ebx
-	movl	%eax, %esi
-	subl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, %ecx
-	sbbl	24(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB23_2
-# BB#1:
-	movl	%esi, %eax
-.LBB23_2:
-	movl	52(%esp), %esi
-	movl	%eax, (%esi)
-	testb	%bl, %bl
-	jne	.LBB23_4
-# BB#3:
-	movl	%ecx, %edx
-.LBB23_4:
-	movl	%edx, 4(%esi)
-	addl	$32, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end23:
-	.size	mcl_fp_mont2L, .Lfunc_end23-mcl_fp_mont2L
-
-	.globl	mcl_fp_montNF2L
-	.align	16, 0x90
-	.type	mcl_fp_montNF2L,@function
-mcl_fp_montNF2L:                        # @mcl_fp_montNF2L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$36, %esp
-	movl	60(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	4(%ecx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx
-	movl	(%ecx), %ebp
 	mull	%ebp
-	movl	%eax, %ebx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	-4(%eax), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %edi
-	imull	%ecx, %edi
-	movl	(%eax), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	4(%eax), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	%edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 112(%esp)                 # 4-byte Spill
 	movl	%edi, %eax
-	mull	%ecx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	%eax, %esi
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %eax
 	mull	%ebp
-	movl	%edx, %edi
-	movl	%eax, %ebp
-	addl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	$0, %edi
-	addl	%ebx, %esi
-	adcl	(%esp), %ebp            # 4-byte Folded Reload
-	adcl	$0, %edi
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	64(%esp), %eax
-	movl	4(%eax), %ebx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	%ebp
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	%esi
+	movl	%edx, %ebp
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	%esi
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	192(%esp), %edx
+	movl	%eax, (%edx)
+	movl	8(%esp), %ebx                   # 4-byte Reload
 	movl	%ebx, %eax
+	movl	(%esp), %ecx                    # 4-byte Reload
 	mull	%ecx
-	movl	%edx, %esi
-	movl	%eax, 16(%esp)          # 4-byte Spill
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	movl	%eax, 108(%esp)                 # 4-byte Spill
 	movl	%ebx, %eax
-	mull	20(%esp)                # 4-byte Folded Reload
-	movl	%eax, %ebx
-	movl	%edx, %ecx
-	addl	16(%esp), %ecx          # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	%ebp, %ebx
-	adcl	%edi, %ecx
-	adcl	$0, %esi
-	movl	24(%esp), %eax          # 4-byte Reload
-	imull	%ebx, %eax
-	movl	%eax, %edi
-	mull	32(%esp)                # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
+	movl	28(%esp), %esi                  # 4-byte Reload
+	mull	%esi
+	movl	%edx, 104(%esp)                 # 4-byte Spill
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	%ebx, %eax
+	mull	%edi
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	%ebx
+	movl	%edx, 168(%esp)                 # 4-byte Spill
+	movl	%eax, 164(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%esi
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edi
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%ecx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	%edi
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	%esi
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
 	movl	%edi, %eax
-	movl	28(%esp), %edi          # 4-byte Reload
 	mull	%edi
-	addl	%ebx, %ebp
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%ebp, %edi
+	addl	%ebp, 36(%esp)                  # 4-byte Folded Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	112(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 160(%esp)                 # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	124(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 156(%esp)                 # 4-byte Spill
+	movl	128(%esp), %eax                 # 4-byte Reload
+	adcl	136(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 152(%esp)                 # 4-byte Spill
+	movl	140(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, %ebx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	addl	88(%esp), %eax                  # 4-byte Folded Reload
+	adcl	116(%esp), %edi                 # 4-byte Folded Reload
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	120(%esp), %ecx                 # 4-byte Folded Reload
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	132(%esp), %esi                 # 4-byte Folded Reload
+	movl	44(%esp), %edx                  # 4-byte Reload
+	adcl	148(%esp), %edx                 # 4-byte Folded Reload
+	movl	144(%esp), %ebp                 # 4-byte Reload
+	adcl	$0, %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	addl	88(%esp), %eax                  # 4-byte Folded Reload
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	192(%esp), %ebp
+	movl	%eax, 4(%ebp)
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	adcl	160(%esp), %esi                 # 4-byte Folded Reload
+	adcl	156(%esp), %edx                 # 4-byte Folded Reload
+	movl	152(%esp), %eax                 # 4-byte Reload
+	adcl	%eax, 32(%esp)                  # 4-byte Folded Spill
+	adcl	$0, %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	addl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	(%esp), %ebp                    # 4-byte Reload
+	adcl	24(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	68(%esp), %ebp                  # 4-byte Folded Reload
+	movl	72(%esp), %ebx                  # 4-byte Reload
+	adcl	84(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	96(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	adcl	$0, 4(%esp)                     # 4-byte Folded Spill
+	addl	116(%esp), %edi                 # 4-byte Folded Reload
 	adcl	%ecx, %eax
-	adcl	$0, %esi
-	addl	24(%esp), %eax          # 4-byte Folded Reload
-	adcl	%edx, %esi
-	movl	%eax, %edx
-	subl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, %ecx
-	sbbl	%edi, %ecx
-	testl	%ecx, %ecx
-	js	.LBB24_2
-# BB#1:
-	movl	%edx, %eax
-.LBB24_2:
-	movl	56(%esp), %edx
-	movl	%eax, (%edx)
-	js	.LBB24_4
-# BB#3:
-	movl	%ecx, %esi
-.LBB24_4:
-	movl	%esi, 4(%edx)
-	addl	$36, %esp
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	192(%esp), %eax
+	movl	%edi, 8(%eax)
+	adcl	%esi, (%esp)                    # 4-byte Folded Spill
+	adcl	%edx, %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 20(%esp)                  # 4-byte Folded Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 24(%esp)                  # 4-byte Folded Spill
+	adcl	$0, 4(%esp)                     # 4-byte Folded Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	addl	112(%esp), %eax                 # 4-byte Folded Reload
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	72(%esp), %edx                  # 4-byte Folded Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	76(%esp), %ecx                  # 4-byte Folded Reload
+	movl	80(%esp), %ebx                  # 4-byte Reload
+	adcl	100(%esp), %ebx                 # 4-byte Folded Reload
+	movl	104(%esp), %ebp                 # 4-byte Reload
+	adcl	$0, %ebp
+	movl	12(%esp), %edi                  # 4-byte Reload
+	addl	120(%esp), %edi                 # 4-byte Folded Reload
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	192(%esp), %eax
+	movl	%edi, 12(%eax)
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	adcl	$0, %ebp
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	addl	124(%esp), %esi                 # 4-byte Folded Reload
+	movl	128(%esp), %edi                 # 4-byte Reload
+	adcl	84(%esp), %edi                  # 4-byte Folded Reload
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	76(%esp), %ecx                  # 4-byte Folded Reload
+	movl	56(%esp), %edx                  # 4-byte Reload
+	adcl	80(%esp), %edx                  # 4-byte Folded Reload
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	108(%esp), %eax                 # 4-byte Folded Reload
+	movl	92(%esp), %ebp                  # 4-byte Reload
+	adcl	$0, %ebp
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	addl	132(%esp), %ebx                 # 4-byte Folded Reload
+	adcl	40(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	192(%esp), %esi
+	movl	%ebx, 16(%esi)
+	adcl	52(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %esi
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	$0, %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	144(%esp), %ecx                 # 4-byte Reload
+	addl	136(%esp), %ecx                 # 4-byte Folded Reload
+	movl	140(%esp), %edi                 # 4-byte Reload
+	adcl	96(%esp), %edi                  # 4-byte Folded Reload
+	movl	100(%esp), %eax                 # 4-byte Reload
+	adcl	%eax, 64(%esp)                  # 4-byte Folded Spill
+	movl	104(%esp), %eax                 # 4-byte Reload
+	adcl	108(%esp), %eax                 # 4-byte Folded Reload
+	movl	164(%esp), %ebx                 # 4-byte Reload
+	adcl	92(%esp), %ebx                  # 4-byte Folded Reload
+	movl	168(%esp), %ebp                 # 4-byte Reload
+	adcl	$0, %ebp
+	movl	44(%esp), %edx                  # 4-byte Reload
+	addl	148(%esp), %edx                 # 4-byte Folded Reload
+	adcl	%esi, %ecx
+	movl	192(%esp), %esi
+	movl	%edx, 20(%esi)
+	movl	%edi, %edx
+	adcl	8(%esp), %edx                   # 4-byte Folded Reload
+	movl	%ecx, 24(%esi)
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edx, 28(%esi)
+	adcl	60(%esp), %eax                  # 4-byte Folded Reload
+	movl	%edi, 32(%esi)
+	movl	%eax, 36(%esi)
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 40(%esi)
+	movl	%ebp, %eax
+	adcl	$0, %eax
+	movl	%eax, 44(%esi)
+	addl	$172, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end24:
-	.size	mcl_fp_montNF2L, .Lfunc_end24-mcl_fp_montNF2L
-
-	.globl	mcl_fp_montRed2L
-	.align	16, 0x90
-	.type	mcl_fp_montRed2L,@function
-mcl_fp_montRed2L:                       # @mcl_fp_montRed2L
-# BB#0:
+.Lfunc_end8:
+	.size	mcl_fpDbl_sqrPre6L, .Lfunc_end8-mcl_fpDbl_sqrPre6L
+                                        # -- End function
+	.globl	mcl_fp_mont6L                   # -- Begin function mcl_fp_mont6L
+	.p2align	4, 0x90
+	.type	mcl_fp_mont6L,@function
+mcl_fp_mont6L:                          # @mcl_fp_mont6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$16, %esp
-	movl	44(%esp), %eax
-	movl	-4(%eax), %ecx
-	movl	(%eax), %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	40(%esp), %edx
-	movl	(%edx), %ebp
-	movl	%ebp, %edi
-	imull	%ecx, %edi
-	movl	4(%eax), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%edi, %eax
+	subl	$128, %esp
+	movl	152(%esp), %ebx
+	movl	(%ebx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	156(%esp), %ecx
+	movl	(%ecx), %esi
+	mull	%esi
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	160(%esp), %edi
+	movl	-4(%edi), %ecx
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	imull	%eax, %ecx
+	movl	20(%edi), %edx
+	movl	%edx, 112(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
 	mull	%edx
-	movl	%edx, %esi
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	%ebx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	16(%edi), %edx
+	movl	%edx, 108(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	12(%edi), %edx
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	8(%edi), %edx
+	movl	%edx, 88(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	(%edi), %ebp
+	movl	%ebp, 84(%esp)                  # 4-byte Spill
+	movl	4(%edi), %edx
+	movl	%edx, 96(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%ebp
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%ebx, %ecx
+	movl	20(%ebx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	16(%ebx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, 124(%esp)                 # 4-byte Spill
+	movl	%edx, %ebp
+	movl	12(%ebx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, 120(%esp)                 # 4-byte Spill
 	movl	%edx, %edi
-	addl	4(%esp), %edi           # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	%ebp, %eax
-	movl	40(%esp), %edx
-	movl	12(%edx), %eax
-	adcl	4(%edx), %edi
-	adcl	8(%edx), %esi
-	adcl	$0, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	%ebx, %ebx
-	imull	%edi, %ecx
-	andl	$1, %ebx
+	movl	4(%ebx), %ecx
+	movl	%ecx, 104(%esp)                 # 4-byte Spill
+	movl	8(%ebx), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%edx, %ebx
+	movl	%eax, 116(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
-	mull	8(%esp)                 # 4-byte Folded Reload
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	%eax, %ebp
+	mull	%esi
+	addl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	116(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 116(%esp)                 # 4-byte Spill
+	adcl	120(%esp), %ebx                 # 4-byte Folded Reload
+	movl	%ebx, 120(%esp)                 # 4-byte Spill
+	adcl	124(%esp), %edi                 # 4-byte Folded Reload
+	movl	%edi, 124(%esp)                 # 4-byte Spill
+	adcl	100(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%ebp, 100(%esp)                 # 4-byte Spill
+	adcl	$0, (%esp)                      # 4-byte Folded Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	addl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	52(%esp), %ebx                  # 4-byte Folded Reload
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	addl	12(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	adcl	116(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	adcl	120(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	124(%esp), %ebx                 # 4-byte Folded Reload
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	adcl	100(%esp), %esi                 # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	movl	156(%esp), %eax
+	movl	4(%eax), %ecx
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, %ebx
 	movl	%ecx, %eax
-	mull	12(%esp)                # 4-byte Folded Reload
-	addl	(%esp), %eax            # 4-byte Folded Reload
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	addl	%ebx, %edx
+	movl	%edx, %edi
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, %ebx
+	movl	%ebp, %edx
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	$0, %ebp
+	movl	32(%esp), %esi                  # 4-byte Reload
+	addl	36(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movzbl	52(%esp), %eax                  # 1-byte Folded Reload
+	adcl	%eax, %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	movl	80(%esp), %ebx                  # 4-byte Reload
+	imull	%esi, %ebx
+	movl	%ebx, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	%ebx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, %ebp
+	movl	%ebx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %edi
+	movl	%edx, %esi
+	addl	%ebp, %esi
+	adcl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, %ebx
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	44(%esp), %eax                  # 4-byte Folded Reload
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	4(%esp), %edx                   # 4-byte Reload
 	adcl	$0, %edx
-	addl	%edi, %ebp
-	adcl	%esi, %eax
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	adcl	$0, %ebx
-	movl	%eax, %esi
-	subl	8(%esp), %esi           # 4-byte Folded Reload
+	addl	32(%esp), %edi                  # 4-byte Folded Reload
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movzbl	24(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	156(%esp), %eax
+	movl	8(%eax), %esi
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ecx
-	sbbl	12(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB25_2
-# BB#1:
+	movl	%eax, 56(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-.LBB25_2:
-	movl	36(%esp), %esi
-	movl	%eax, (%esi)
-	testb	%bl, %bl
-	jne	.LBB25_4
-# BB#3:
-	movl	%ecx, %edx
-.LBB25_4:
-	movl	%edx, 4(%esi)
-	addl	$16, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end25:
-	.size	mcl_fp_montRed2L, .Lfunc_end25-mcl_fp_montRed2L
-
-	.globl	mcl_fp_addPre2L
-	.align	16, 0x90
-	.type	mcl_fp_addPre2L,@function
-mcl_fp_addPre2L:                        # @mcl_fp_addPre2L
-# BB#0:
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %eax
-	movl	12(%esp), %edx
-	addl	(%edx), %ecx
-	movl	8(%esp), %esi
-	adcl	4(%edx), %eax
-	movl	%ecx, (%esi)
-	movl	%eax, 4(%esi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	retl
-.Lfunc_end26:
-	.size	mcl_fp_addPre2L, .Lfunc_end26-mcl_fp_addPre2L
-
-	.globl	mcl_fp_subPre2L
-	.align	16, 0x90
-	.type	mcl_fp_subPre2L,@function
-mcl_fp_subPre2L:                        # @mcl_fp_subPre2L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	xorl	%eax, %eax
-	movl	16(%esp), %esi
-	subl	(%esi), %ecx
-	sbbl	4(%esi), %edx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	%edx, 4(%esi)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	retl
-.Lfunc_end27:
-	.size	mcl_fp_subPre2L, .Lfunc_end27-mcl_fp_subPre2L
-
-	.globl	mcl_fp_shr1_2L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_2L,@function
-mcl_fp_shr1_2L:                         # @mcl_fp_shr1_2L
-# BB#0:
-	movl	8(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %eax
-	shrdl	$1, %eax, %ecx
-	movl	4(%esp), %edx
-	movl	%ecx, (%edx)
-	shrl	%eax
-	movl	%eax, 4(%edx)
-	retl
-.Lfunc_end28:
-	.size	mcl_fp_shr1_2L, .Lfunc_end28-mcl_fp_shr1_2L
-
-	.globl	mcl_fp_add2L
-	.align	16, 0x90
-	.type	mcl_fp_add2L,@function
-mcl_fp_add2L:                           # @mcl_fp_add2L
-# BB#0:
-	pushl	%ebx
-	pushl	%esi
-	movl	20(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	4(%ecx), %ecx
-	movl	16(%esp), %esi
-	addl	(%esi), %eax
-	movl	12(%esp), %edx
-	adcl	4(%esi), %ecx
-	movl	%eax, (%edx)
-	movl	%ecx, 4(%edx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	24(%esp), %esi
-	subl	(%esi), %eax
-	sbbl	4(%esi), %ecx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB29_2
-# BB#1:                                 # %nocarry
-	movl	%eax, (%edx)
-	movl	%ecx, 4(%edx)
-.LBB29_2:                               # %carry
-	popl	%esi
-	popl	%ebx
-	retl
-.Lfunc_end29:
-	.size	mcl_fp_add2L, .Lfunc_end29-mcl_fp_add2L
-
-	.globl	mcl_fp_addNF2L
-	.align	16, 0x90
-	.type	mcl_fp_addNF2L,@function
-mcl_fp_addNF2L:                         # @mcl_fp_addNF2L
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	20(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %eax
-	movl	16(%esp), %edx
-	addl	(%edx), %ecx
-	adcl	4(%edx), %eax
-	movl	24(%esp), %edi
-	movl	%ecx, %esi
-	subl	(%edi), %esi
-	movl	%eax, %edx
-	sbbl	4(%edi), %edx
-	testl	%edx, %edx
-	js	.LBB30_2
-# BB#1:
-	movl	%esi, %ecx
-.LBB30_2:
-	movl	12(%esp), %esi
-	movl	%ecx, (%esi)
-	js	.LBB30_4
-# BB#3:
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, %edi
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ebx
+	addl	%edi, %edx
+	movl	%edx, %ebp
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	8(%esp), %edx                   # 4-byte Folded Reload
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	48(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	setb	36(%esp)                        # 1-byte Folded Spill
+	movl	80(%esp), %edi                  # 4-byte Reload
+	imull	%ebx, %edi
+	movl	%edi, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	%edi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, %ebx
+	movl	%edi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ebp
+	movl	%edx, %esi
+	addl	%ebx, %esi
+	adcl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, %edi
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	44(%esp), %eax                  # 4-byte Folded Reload
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	48(%esp), %ebx                  # 4-byte Folded Reload
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	28(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movzbl	36(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	156(%esp), %eax
+	movl	12(%eax), %esi
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, %ebx
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	addl	%ebx, %edi
+	adcl	44(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	52(%esp), %ebx                  # 4-byte Folded Reload
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	addl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	setb	4(%esp)                         # 1-byte Folded Spill
+	movl	80(%esp), %edi                  # 4-byte Reload
+	imull	%eax, %edi
+	movl	%edi, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%edi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %esi
+	addl	%ebx, %edx
+	movl	%edx, %edi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	48(%esp), %ebp                  # 4-byte Folded Reload
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	32(%esp), %esi                  # 4-byte Folded Reload
+	adcl	52(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movzbl	4(%esp), %eax                   # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	156(%esp), %eax
+	movl	16(%eax), %esi
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, %edi
+	movl	%esi, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, %ebp
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	addl	%ebp, %edx
+	movl	%edx, %ebp
+	adcl	%edi, %ebx
+	movl	%ebx, %edi
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	28(%esp), %edx                  # 4-byte Folded Reload
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	40(%esp), %ebx                  # 4-byte Reload
+	addl	52(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	setb	36(%esp)                        # 1-byte Folded Spill
+	movl	80(%esp), %ecx                  # 4-byte Reload
+	imull	%ebx, %ecx
+	movl	%ecx, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, %edi
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, %esi
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ebp
+	addl	%esi, %edx
+	movl	%edx, %esi
+	adcl	%edi, %ebx
+	movl	%ebx, %edi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	48(%esp), %edx                  # 4-byte Folded Reload
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	40(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movzbl	36(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	156(%esp), %eax
+	movl	20(%eax), %ecx
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	%eax, %ebp
+	movl	%ecx, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, %ebx
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %edi
 	movl	%edx, %eax
-.LBB30_4:
-	movl	%eax, 4(%esi)
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end30:
-	.size	mcl_fp_addNF2L, .Lfunc_end30-mcl_fp_addNF2L
-
-	.globl	mcl_fp_sub2L
-	.align	16, 0x90
-	.type	mcl_fp_sub2L,@function
-mcl_fp_sub2L:                           # @mcl_fp_sub2L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	20(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %eax
-	xorl	%ebx, %ebx
-	movl	24(%esp), %edx
-	subl	(%edx), %ecx
-	sbbl	4(%edx), %eax
-	movl	16(%esp), %edx
-	movl	%ecx, (%edx)
-	movl	%eax, 4(%edx)
+	addl	%ebx, %eax
+	adcl	%ebp, %esi
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	68(%esp), %edx                  # 4-byte Reload
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	28(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%esi, 104(%esp)                 # 4-byte Spill
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 64(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	setb	76(%esp)                        # 1-byte Folded Spill
+	movl	80(%esp), %esi                  # 4-byte Reload
+	imull	%edi, %esi
+	movl	%esi, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%esi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%esi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%esi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	adcl	8(%esp), %edx                   # 4-byte Folded Reload
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	80(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	addl	16(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	adcl	104(%esp), %edx                 # 4-byte Folded Reload
+	adcl	60(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	64(%esp), %edi                  # 4-byte Folded Reload
+	adcl	68(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	72(%esp), %esi                  # 4-byte Folded Reload
+	movzbl	76(%esp), %ebx                  # 1-byte Folded Reload
+	adcl	$0, %ebx
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	subl	84(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	sbbl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	sbbl	88(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edi, 88(%esp)                  # 4-byte Spill
+	sbbl	92(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ebp, 92(%esp)                  # 4-byte Spill
+	sbbl	108(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%esi, 80(%esp)                  # 4-byte Spill
+	sbbl	112(%esp), %esi                 # 4-byte Folded Reload
 	sbbl	$0, %ebx
 	testb	$1, %bl
-	je	.LBB31_2
-# BB#1:                                 # %carry
-	movl	28(%esp), %esi
-	movl	4(%esi), %edi
-	addl	(%esi), %ecx
-	movl	%ecx, (%edx)
-	adcl	%eax, %edi
-	movl	%edi, 4(%edx)
-.LBB31_2:                               # %nocarry
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end31:
-	.size	mcl_fp_sub2L, .Lfunc_end31-mcl_fp_sub2L
-
-	.globl	mcl_fp_subNF2L
-	.align	16, 0x90
-	.type	mcl_fp_subNF2L,@function
-mcl_fp_subNF2L:                         # @mcl_fp_subNF2L
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %eax
-	movl	20(%esp), %edx
-	subl	(%edx), %ecx
-	sbbl	4(%edx), %eax
-	movl	%eax, %edx
-	sarl	$31, %edx
-	movl	24(%esp), %esi
-	movl	4(%esi), %edi
-	andl	%edx, %edi
-	andl	(%esi), %edx
-	addl	%ecx, %edx
-	movl	12(%esp), %ecx
-	movl	%edx, (%ecx)
-	adcl	%eax, %edi
-	movl	%edi, 4(%ecx)
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end32:
-	.size	mcl_fp_subNF2L, .Lfunc_end32-mcl_fp_subNF2L
-
-	.globl	mcl_fpDbl_add2L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add2L,@function
-mcl_fpDbl_add2L:                        # @mcl_fpDbl_add2L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	28(%esp), %edx
-	movl	12(%edx), %esi
-	movl	24(%esp), %edi
-	movl	12(%edi), %eax
-	movl	8(%edx), %ecx
-	movl	(%edx), %ebx
-	movl	4(%edx), %ebp
-	addl	(%edi), %ebx
-	adcl	4(%edi), %ebp
-	movl	20(%esp), %edx
-	adcl	8(%edi), %ecx
-	movl	%ebx, (%edx)
-	movl	%ebp, 4(%edx)
-	adcl	%esi, %eax
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	32(%esp), %ebp
-	movl	%ecx, %esi
-	subl	(%ebp), %esi
-	movl	%eax, %edi
-	sbbl	4(%ebp), %edi
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB33_2
-# BB#1:
-	movl	%edi, %eax
-.LBB33_2:
-	testb	%bl, %bl
-	jne	.LBB33_4
-# BB#3:
-	movl	%esi, %ecx
-.LBB33_4:
-	movl	%ecx, 8(%edx)
-	movl	%eax, 12(%edx)
+	jne	.LBB9_1
+# %bb.2:
+	movl	148(%esp), %ebx
+	movl	%esi, 20(%ebx)
+	jne	.LBB9_3
+.LBB9_4:
+	movl	%ebp, 16(%ebx)
+	jne	.LBB9_5
+.LBB9_6:
+	movl	%edi, 12(%ebx)
+	jne	.LBB9_7
+.LBB9_8:
+	movl	%ecx, 8(%ebx)
+	jne	.LBB9_9
+.LBB9_10:
+	movl	%eax, 4(%ebx)
+	movl	84(%esp), %eax                  # 4-byte Reload
+	je	.LBB9_12
+.LBB9_11:
+	movl	76(%esp), %eax                  # 4-byte Reload
+.LBB9_12:
+	movl	%eax, (%ebx)
+	addl	$128, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end33:
-	.size	mcl_fpDbl_add2L, .Lfunc_end33-mcl_fpDbl_add2L
-
-	.globl	mcl_fpDbl_sub2L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub2L,@function
-mcl_fpDbl_sub2L:                        # @mcl_fpDbl_sub2L
-# BB#0:
+.LBB9_1:
+	movl	80(%esp), %esi                  # 4-byte Reload
+	movl	148(%esp), %ebx
+	movl	%esi, 20(%ebx)
+	je	.LBB9_4
+.LBB9_3:
+	movl	92(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 16(%ebx)
+	je	.LBB9_6
+.LBB9_5:
+	movl	88(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%ebx)
+	je	.LBB9_8
+.LBB9_7:
+	movl	96(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%ebx)
+	je	.LBB9_10
+.LBB9_9:
+	movl	%edx, %eax
+	movl	%eax, 4(%ebx)
+	movl	84(%esp), %eax                  # 4-byte Reload
+	jne	.LBB9_11
+	jmp	.LBB9_12
+.Lfunc_end9:
+	.size	mcl_fp_mont6L, .Lfunc_end9-mcl_fp_mont6L
+                                        # -- End function
+	.globl	mcl_fp_montNF6L                 # -- Begin function mcl_fp_montNF6L
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF6L,@function
+mcl_fp_montNF6L:                        # @mcl_fp_montNF6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%ebx, %ebx
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %eax
-	sbbl	8(%edx), %eax
-	movl	12(%edx), %ebp
-	movl	12(%ecx), %edx
-	movl	20(%esp), %ecx
-	movl	%esi, (%ecx)
-	movl	%edi, 4(%ecx)
-	sbbl	%ebp, %edx
-	movl	32(%esp), %edi
-	movl	(%edi), %esi
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB34_1
-# BB#2:
-	xorl	%edi, %edi
-	jmp	.LBB34_3
-.LBB34_1:
-	movl	4(%edi), %edi
-.LBB34_3:
-	testb	%bl, %bl
-	jne	.LBB34_5
-# BB#4:
-	xorl	%esi, %esi
-.LBB34_5:
-	addl	%eax, %esi
-	movl	%esi, 8(%ecx)
-	adcl	%edx, %edi
-	movl	%edi, 12(%ecx)
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end34:
-	.size	mcl_fpDbl_sub2L, .Lfunc_end34-mcl_fpDbl_sub2L
-
-	.globl	mcl_fp_mulUnitPre3L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre3L,@function
-mcl_fp_mulUnitPre3L:                    # @mcl_fp_mulUnitPre3L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	pushl	%eax
-	movl	32(%esp), %ecx
-	movl	28(%esp), %edi
+	subl	$128, %esp
+	movl	152(%esp), %esi
+	movl	(%esi), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	156(%esp), %ecx
+	movl	(%ecx), %ebx
+	mull	%ebx
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	160(%esp), %edi
+	movl	-4(%edi), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	imull	%eax, %ecx
+	movl	20(%edi), %edx
+	movl	%edx, 104(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
-	mull	8(%edi)
-	movl	%edx, %esi
-	movl	%eax, (%esp)            # 4-byte Spill
+	mull	%edx
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	16(%edi), %edx
+	movl	%edx, 100(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
-	mull	4(%edi)
-	movl	%edx, %ebx
-	movl	%eax, %ebp
+	mull	%edx
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	12(%edi), %edx
+	movl	%edx, 96(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	(%edi)
-	movl	24(%esp), %ecx
-	movl	%eax, (%ecx)
-	addl	%ebp, %edx
-	movl	%edx, 4(%ecx)
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	movl	%ebx, 8(%ecx)
-	adcl	$0, %esi
-	movl	%esi, 12(%ecx)
-	addl	$4, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end35:
-	.size	mcl_fp_mulUnitPre3L, .Lfunc_end35-mcl_fp_mulUnitPre3L
-
-	.globl	mcl_fpDbl_mulPre3L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre3L,@function
-mcl_fpDbl_mulPre3L:                     # @mcl_fpDbl_mulPre3L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$28, %esp
-	movl	52(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx
-	movl	(%edx), %edi
-	mull	%edi
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx
-	movl	%eax, (%edx)
-	movl	4(%ecx), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	movl	8(%ecx), %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	%edi
+	mull	%edx
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	8(%edi), %edx
+	movl	%edx, 84(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	(%edi), %ebp
+	movl	%ebp, 88(%esp)                  # 4-byte Spill
+	movl	4(%edi), %edx
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 124(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%ebp
+	movl	%eax, 120(%esp)                 # 4-byte Spill
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	20(%esi), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	mull	%ebx
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	16(%esi), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	mull	%ebx
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esi), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	mull	%ebx
+	movl	%eax, 108(%esp)                 # 4-byte Spill
 	movl	%edx, %ecx
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%edi
+	movl	4(%esi), %ebp
+	movl	%ebp, 92(%esp)                  # 4-byte Spill
+	movl	8(%esi), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	mull	%ebx
 	movl	%edx, %edi
-	movl	%eax, %ebx
-	addl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	$0, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax
-	movl	4(%eax), %ecx
-	movl	%esi, %eax
-	mull	%ecx
-	movl	%edx, 20(%esp)          # 4-byte Spill
 	movl	%eax, %esi
 	movl	%ebp, %eax
-	mull	%ecx
-	movl	%edx, 16(%esp)          # 4-byte Spill
+	mull	%ebx
+	movl	%edx, %ebx
 	movl	%eax, %ebp
-	movl	24(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	addl	%ebx, %eax
-	movl	48(%esp), %ecx
-	movl	%eax, 4(%ecx)
-	adcl	%edi, %ebp
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	56(%esp), %eax
-	movl	8(%eax), %edi
-	sbbl	%ecx, %ecx
-	movl	(%esp), %eax            # 4-byte Reload
-	mull	%edi
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	8(%esp), %eax           # 4-byte Reload
-	mull	%edi
-	andl	$1, %ecx
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	addl	%ebx, %ebp
-	movl	48(%esp), %edi
-	movl	%ebp, 8(%edi)
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	adcl	%eax, %ecx
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%esi, 12(%edi)
-	movl	%ecx, 16(%edi)
-	adcl	%edx, %eax
-	movl	%eax, 20(%edi)
-	addl	$28, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end36:
-	.size	mcl_fpDbl_mulPre3L, .Lfunc_end36-mcl_fpDbl_mulPre3L
-
-	.globl	mcl_fpDbl_sqrPre3L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre3L,@function
-mcl_fpDbl_sqrPre3L:                     # @mcl_fpDbl_sqrPre3L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$32, %esp
-	movl	56(%esp), %eax
-	movl	8(%eax), %ebp
-	movl	(%eax), %ecx
+	addl	32(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	%esi, %ebx
+	adcl	108(%esp), %edi                 # 4-byte Folded Reload
+	adcl	112(%esp), %ecx                 # 4-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	116(%esp), %edx                 # 4-byte Folded Reload
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	120(%esp), %esi                 # 4-byte Reload
+	addl	24(%esp), %esi                  # 4-byte Folded Reload
+	adcl	124(%esp), %ebp                 # 4-byte Folded Reload
+	adcl	52(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	44(%esp), %edi                  # 4-byte Folded Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	adcl	$0, %eax
+	addl	40(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	48(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	8(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	156(%esp), %eax
 	movl	4(%eax), %esi
-	movl	%ebp, %eax
-	mull	%esi
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%ecx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ebx, (%esp)            # 4-byte Spill
 	movl	%esi, %eax
-	mull	%esi
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 12(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	%ecx
-	movl	%edx, %esi
-	movl	%eax, %edi
-	movl	%ecx, %eax
-	mull	%ecx
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ecx
-	movl	52(%esp), %edx
-	movl	%eax, (%edx)
-	movl	%ebp, %eax
-	mull	%ebp
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	addl	%edi, %ecx
-	movl	%esi, %ebp
-	adcl	%ebx, %ebp
-	movl	4(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	addl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	40(%esp), %esi                  # 4-byte Folded Reload
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edi, %eax
 	adcl	$0, %eax
-	addl	%edi, %ecx
-	movl	52(%esp), %edx
-	movl	%ecx, 4(%edx)
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	%edx, %eax
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	addl	%esi, %ebp
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	%edi, %ecx
-	addl	(%esp), %ebp            # 4-byte Folded Reload
-	movl	52(%esp), %esi
-	movl	%ebp, 8(%esi)
-	adcl	%edx, %eax
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	addl	%ebx, %eax
-	adcl	%edi, %ecx
-	movl	52(%esp), %edx
-	movl	%eax, 12(%edx)
-	movl	%ecx, 16(%edx)
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%edx)
-	addl	$32, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end37:
-	.size	mcl_fpDbl_sqrPre3L, .Lfunc_end37-mcl_fpDbl_sqrPre3L
-
-	.globl	mcl_fp_mont3L
-	.align	16, 0x90
-	.type	mcl_fp_mont3L,@function
-mcl_fp_mont3L:                          # @mcl_fp_mont3L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$56, %esp
-	movl	80(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx
-	movl	(%edx), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	mull	%edx
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi
-	movl	-4(%esi), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	imull	%edx, %ebp
-	movl	(%esi), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	8(%esi), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	4(%esi), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	4(%ecx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	8(%ecx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%ebx
-	movl	%edx, %ebx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%edi
-	movl	%edx, %ebp
-	movl	%eax, 4(%esp)           # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	addl	%ebp, %edi
+	adcl	%ebx, 32(%esp)                  # 4-byte Folded Spill
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	%esi, 36(%esp)                  # 4-byte Folded Spill
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edi, %ecx
 	movl	%ecx, %eax
-	movl	12(%esp), %ecx          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, %esi
-	movl	%eax, %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%eax, %ecx
-	addl	16(%esp), %ecx          # 4-byte Folded Reload
-	adcl	%edi, %edx
-	adcl	$0, %esi
-	addl	(%esp), %ebp            # 4-byte Folded Reload
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	28(%esp), %eax          # 4-byte Reload
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%ecx, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, %esi
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	addl	%edi, %eax
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, %edi
+	movl	%ebx, %edx
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	36(%esp), %esi                  # 4-byte Folded Reload
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	$0, %eax
-	movl	4(%esp), %edi           # 4-byte Reload
-	addl	32(%esp), %edi          # 4-byte Folded Reload
-	adcl	%ecx, %ebp
-	adcl	%edx, %ebx
-	adcl	%esi, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	4(%eax), %ecx
+	addl	%ebp, %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	156(%esp), %eax
+	movl	8(%eax), %ecx
 	movl	%ecx, %eax
-	mull	20(%esp)                # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	36(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 12(%esp)          # 4-byte Spill
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	24(%esp)                # 4-byte Folded Reload
+	mull	64(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %edi
-	addl	12(%esp), %edi          # 4-byte Folded Reload
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%eax, %edx
-	addl	%ebp, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	%ebx, %edi
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%edx, %esi
-	imull	52(%esp), %esi          # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	48(%esp)                # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ebp
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	44(%esp)                # 4-byte Folded Reload
+	movl	%eax, %esi
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ecx
 	movl	%edx, %ebx
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	40(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	addl	4(%esp), %ecx           # 4-byte Folded Reload
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	adcl	$0, %ebp
-	addl	12(%esp), %eax          # 4-byte Folded Reload
-	adcl	%edi, %ecx
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	84(%esp), %eax
-	movl	8(%eax), %esi
-	movl	%esi, %eax
-	mull	20(%esp)                # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	36(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	24(%esp)                # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	addl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	32(%esp), %eax          # 4-byte Reload
+	addl	%esi, %ebx
+	adcl	40(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	44(%esp), %edi                  # 4-byte Folded Reload
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	$0, %eax
-	movl	36(%esp), %edx          # 4-byte Reload
-	addl	%ecx, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
+	addl	36(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	76(%esp), %edi                  # 4-byte Reload
+	imull	%ecx, %edi
+	movl	%edi, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%edi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%edi, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, %esi
+	movl	%edi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	addl	%ecx, %eax
 	adcl	%ebx, %esi
-	adcl	%ebp, %edi
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	movl	52(%esp), %ebp          # 4-byte Reload
-	imull	%edx, %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	andl	$1, %ecx
-	movl	%ebp, %eax
-	mull	40(%esp)                # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	48(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	mull	44(%esp)                # 4-byte Folded Reload
-	addl	28(%esp), %eax          # 4-byte Folded Reload
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %ebp
-	addl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	%esi, %eax
-	adcl	%edi, %edx
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	$0, %ecx
-	movl	%eax, %ebx
-	subl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edx, %edi
-	sbbl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebp, %esi
-	sbbl	48(%esp), %esi          # 4-byte Folded Reload
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB38_2
-# BB#1:
-	movl	%ebx, %eax
-.LBB38_2:
-	movl	76(%esp), %ebx
-	movl	%eax, (%ebx)
-	testb	%cl, %cl
-	jne	.LBB38_4
-# BB#3:
-	movl	%edi, %edx
-.LBB38_4:
-	movl	%edx, 4(%ebx)
-	jne	.LBB38_6
-# BB#5:
-	movl	%esi, %ebp
-.LBB38_6:
-	movl	%ebp, 8(%ebx)
-	addl	$56, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end38:
-	.size	mcl_fp_mont3L, .Lfunc_end38-mcl_fp_mont3L
-
-	.globl	mcl_fp_montNF3L
-	.align	16, 0x90
-	.type	mcl_fp_montNF3L,@function
-mcl_fp_montNF3L:                        # @mcl_fp_montNF3L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$60, %esp
-	movl	84(%esp), %ebp
-	movl	(%ebp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx
-	movl	(%ecx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi
-	movl	-4(%esi), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	imull	%edx, %ecx
-	movl	(%esi), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	8(%esi), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	4(%esi), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	4(%ebp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	8(%ebp), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	%ebp, %edx
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	32(%esp), %ebx                  # 4-byte Folded Reload
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	40(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	156(%esp), %eax
+	movl	12(%eax), %ecx
 	movl	%ecx, %eax
-	mull	%edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 32(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	%ebx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%esi, %eax
-	movl	20(%esp), %ecx          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, %edi
-	movl	%eax, %ebx
-	movl	40(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, %ecx
-	movl	%eax, %esi
-	addl	36(%esp), %esi          # 4-byte Folded Reload
-	adcl	%ebx, %ecx
-	adcl	$0, %edi
-	addl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	adcl	$0, %edi
-	addl	4(%esp), %esi           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	88(%esp), %eax
-	movl	4(%eax), %ebx
-	movl	%ebx, %eax
-	mull	24(%esp)                # 4-byte Folded Reload
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ebp
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	40(%esp)                # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	28(%esp)                # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ebx
-	addl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	adcl	$0, %ebp
-	movl	20(%esp), %edx          # 4-byte Reload
-	addl	%esi, %edx
-	adcl	%ecx, %ebx
-	adcl	%edi, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%edx, %ecx
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
 	movl	%edx, %edi
-	imull	56(%esp), %ecx          # 4-byte Folded Reload
+	addl	52(%esp), %edi                  # 4-byte Folded Reload
+	adcl	40(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	44(%esp), %ebp                  # 4-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	48(%esp), %edx                  # 4-byte Folded Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%esi, %eax
+	adcl	$0, %eax
+	movl	36(%esp), %esi                  # 4-byte Reload
+	addl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%esi, %ecx
 	movl	%ecx, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	48(%esp)                # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%ecx, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
 	movl	%eax, %esi
 	movl	%ecx, %eax
-	mull	44(%esp)                # 4-byte Folded Reload
-	addl	%edi, %eax
-	adcl	%ebx, %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	adcl	$0, %ebp
-	addl	%edx, %esi
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	88(%esp), %eax
-	movl	8(%eax), %edi
-	movl	%edi, %eax
-	mull	24(%esp)                # 4-byte Folded Reload
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	addl	36(%esp), %eax                  # 4-byte Folded Reload
+	adcl	%edi, %esi
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	%ebx, %edx
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	%ebp, %ebx
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	156(%esp), %eax
+	movl	16(%eax), %ecx
+	movl	%ecx, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ebx
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	40(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	28(%esp)                # 4-byte Folded Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
 	movl	%edx, %edi
-	addl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	adcl	$0, %ebx
-	addl	%esi, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	adcl	%ebp, %ecx
-	adcl	$0, %ebx
-	movl	56(%esp), %esi          # 4-byte Reload
-	imull	%eax, %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
+	addl	52(%esp), %edi                  # 4-byte Folded Reload
+	adcl	40(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	44(%esp), %esi                  # 4-byte Folded Reload
+	adcl	48(%esp), %ebx                  # 4-byte Folded Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	24(%esp), %edx                  # 4-byte Reload
+	addl	36(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%ecx, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	addl	24(%esp), %eax                  # 4-byte Folded Reload
+	adcl	%edi, %ebx
+	movl	%ebx, %edx
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	%ebp, %ebx
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	%esi, %edi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	addl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	40(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	adcl	44(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	156(%esp), %eax
+	movl	20(%eax), %ecx
+	movl	%ecx, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, %ebp
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	addl	%ebp, %edx
+	movl	%edx, %ebp
+	adcl	64(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%edi, %ecx
+	adcl	68(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	24(%esp), %esi                  # 4-byte Folded Reload
+	movl	72(%esp), %eax                  # 4-byte Reload
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	56(%esp), %edi                  # 4-byte Reload
+	adcl	$0, %edi
+	movl	60(%esp), %edx                  # 4-byte Reload
+	addl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	adcl	$0, %edi
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	movl	76(%esp), %esi                  # 4-byte Reload
+	imull	%edx, %esi
 	movl	%esi, %eax
-	mull	44(%esp)                # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 68(%esp)                  # 4-byte Spill
 	movl	%eax, %ebp
 	movl	%esi, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	mull	48(%esp)                # 4-byte Folded Reload
-	addl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%edi, %eax
-	adcl	%ecx, %esi
-	adcl	$0, %ebx
-	addl	40(%esp), %eax          # 4-byte Folded Reload
-	adcl	%edx, %esi
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
 	movl	%eax, %edi
-	subl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, %edx
-	sbbl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, %ecx
-	sbbl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	sarl	$31, %ebp
-	testl	%ebp, %ebp
-	js	.LBB39_2
-# BB#1:
-	movl	%edi, %eax
-.LBB39_2:
-	movl	80(%esp), %edi
-	movl	%eax, (%edi)
-	js	.LBB39_4
-# BB#3:
-	movl	%edx, %esi
-.LBB39_4:
-	movl	%esi, 4(%edi)
-	js	.LBB39_6
-# BB#5:
-	movl	%ecx, %ebx
-.LBB39_6:
-	movl	%ebx, 8(%edi)
-	addl	$60, %esp
+	movl	%esi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%esi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, %ecx
+	movl	%esi, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	addl	60(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	adcl	64(%esp), %ebp                  # 4-byte Folded Reload
+	movl	76(%esp), %esi                  # 4-byte Reload
+	adcl	72(%esp), %esi                  # 4-byte Folded Reload
+	movl	56(%esp), %ebx                  # 4-byte Reload
+	adcl	$0, %ebx
+	addl	16(%esp), %eax                  # 4-byte Folded Reload
+	adcl	%edx, %ecx
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	adcl	92(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%eax, %edx
+	subl	88(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 88(%esp)                  # 4-byte Spill
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	%ecx, %edx
+	sbbl	80(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edi, 80(%esp)                  # 4-byte Spill
+	movl	%edi, %ecx
+	sbbl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ebp, 84(%esp)                  # 4-byte Spill
+	sbbl	96(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%esi, 76(%esp)                  # 4-byte Spill
+	sbbl	100(%esp), %esi                 # 4-byte Folded Reload
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	movl	%ebx, %edi
+	sbbl	104(%esp), %edi                 # 4-byte Folded Reload
+	movl	%edi, %ebx
+	sarl	$31, %ebx
+	testl	%ebx, %ebx
+	js	.LBB10_1
+# %bb.2:
+	movl	148(%esp), %ebx
+	movl	%edi, 20(%ebx)
+	js	.LBB10_3
+.LBB10_4:
+	movl	%esi, 16(%ebx)
+	js	.LBB10_5
+.LBB10_6:
+	movl	%ebp, 12(%ebx)
+	js	.LBB10_7
+.LBB10_8:
+	movl	%ecx, 8(%ebx)
+	js	.LBB10_9
+.LBB10_10:
+	movl	%edx, 4(%ebx)
+	movl	88(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB10_12
+.LBB10_11:
+	movl	%eax, %ecx
+.LBB10_12:
+	movl	%ecx, (%ebx)
+	addl	$128, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end39:
-	.size	mcl_fp_montNF3L, .Lfunc_end39-mcl_fp_montNF3L
-
-	.globl	mcl_fp_montRed3L
-	.align	16, 0x90
-	.type	mcl_fp_montRed3L,@function
-mcl_fp_montRed3L:                       # @mcl_fp_montRed3L
-# BB#0:
+.LBB10_1:
+	movl	56(%esp), %edi                  # 4-byte Reload
+	movl	148(%esp), %ebx
+	movl	%edi, 20(%ebx)
+	jns	.LBB10_4
+.LBB10_3:
+	movl	76(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 16(%ebx)
+	jns	.LBB10_6
+.LBB10_5:
+	movl	84(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 12(%ebx)
+	jns	.LBB10_8
+.LBB10_7:
+	movl	80(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%ebx)
+	jns	.LBB10_10
+.LBB10_9:
+	movl	60(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%ebx)
+	movl	88(%esp), %ecx                  # 4-byte Reload
+	js	.LBB10_11
+	jmp	.LBB10_12
+.Lfunc_end10:
+	.size	mcl_fp_montNF6L, .Lfunc_end10-mcl_fp_montNF6L
+                                        # -- End function
+	.globl	mcl_fp_montRed6L                # -- Begin function mcl_fp_montRed6L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed6L,@function
+mcl_fp_montRed6L:                       # @mcl_fp_montRed6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$40, %esp
-	movl	68(%esp), %eax
-	movl	-4(%eax), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	(%eax), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebx
-	movl	(%ebx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	imull	%edx, %ecx
-	movl	8(%eax), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	4(%eax), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
+	subl	$84, %esp
+	movl	112(%esp), %ecx
+	movl	-4(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	108(%esp), %edx
+	movl	(%edx), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	imull	%eax, %edi
+	movl	20(%ecx), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
 	mull	%edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%esi
-	movl	%edx, %esi
-	movl	%eax, %ebp
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, %ecx
-	addl	%ebp, %ecx
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	16(%esp), %eax          # 4-byte Folded Reload
-	adcl	4(%ebx), %ecx
-	adcl	8(%ebx), %esi
-	adcl	12(%ebx), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	20(%ebx), %eax
-	movl	16(%ebx), %edx
-	adcl	$0, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	%ecx, %edi
-	imull	36(%esp), %edi          # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	16(%ecx), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%edx
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
 	movl	%edi, %eax
-	mull	32(%esp)                # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
+	mull	%edx
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
 	movl	%edi, %eax
-	mull	28(%esp)                # 4-byte Folded Reload
+	mull	%edx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
 	movl	%edx, %ebp
-	movl	%eax, (%esp)            # 4-byte Spill
+	movl	(%ecx), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%edx, %ebx
+	movl	%eax, %ecx
 	movl	%edi, %eax
-	mull	24(%esp)                # 4-byte Folded Reload
+	mull	%esi
+	movl	%eax, %esi
 	movl	%edx, %edi
-	addl	(%esp), %edi            # 4-byte Folded Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%ecx, %eax
-	adcl	%esi, %edi
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	$0, 12(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebx
-	movl	36(%esp), %ecx          # 4-byte Reload
+	addl	%ecx, %edi
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebp, %eax
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	$0, %ecx
+	addl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	108(%esp), %esi
+	adcl	4(%esi), %edi
+	adcl	8(%esi), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	adcl	12(%esi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	16(%esi), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	adcl	20(%esi), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	adcl	24(%esi), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	setb	%bl
+	movl	52(%esp), %ecx                  # 4-byte Reload
 	imull	%edi, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
 	movl	%ecx, %eax
-	mull	32(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 20(%esp)          # 4-byte Spill
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	24(%esp)                # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%eax, %ecx
-	movl	36(%esp), %eax          # 4-byte Reload
-	mull	28(%esp)                # 4-byte Folded Reload
-	addl	8(%esp), %eax           # 4-byte Folded Reload
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	%edi, %ecx
-	adcl	%ebp, %eax
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	adcl	$0, %ebx
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, %esi
+	movl	%ecx, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
 	movl	%eax, %ebp
-	subl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edx, %edi
-	sbbl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, %ecx
-	sbbl	32(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB40_2
-# BB#1:
-	movl	%ebp, %eax
-.LBB40_2:
-	movl	60(%esp), %ebp
-	movl	%eax, (%ebp)
-	testb	%bl, %bl
-	jne	.LBB40_4
-# BB#3:
-	movl	%edi, %edx
-.LBB40_4:
-	movl	%edx, 4(%ebp)
-	jne	.LBB40_6
-# BB#5:
-	movl	%ecx, %esi
-.LBB40_6:
-	movl	%esi, 8(%ebp)
-	addl	$40, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end40:
-	.size	mcl_fp_montRed3L, .Lfunc_end40-mcl_fp_montRed3L
-
-	.globl	mcl_fp_addPre3L
-	.align	16, 0x90
-	.type	mcl_fp_addPre3L,@function
-mcl_fp_addPre3L:                        # @mcl_fp_addPre3L
-# BB#0:
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	12(%esp), %esi
-	addl	(%esi), %ecx
-	adcl	4(%esi), %edx
-	movl	8(%eax), %eax
-	adcl	8(%esi), %eax
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	%edx, 4(%esi)
-	movl	%eax, 8(%esi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	retl
-.Lfunc_end41:
-	.size	mcl_fp_addPre3L, .Lfunc_end41-mcl_fp_addPre3L
-
-	.globl	mcl_fp_subPre3L
-	.align	16, 0x90
-	.type	mcl_fp_subPre3L,@function
-mcl_fp_subPre3L:                        # @mcl_fp_subPre3L
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	16(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %esi
-	xorl	%eax, %eax
-	movl	20(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %esi
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	12(%esp), %edi
-	movl	%edx, (%edi)
-	movl	%esi, 4(%edi)
-	movl	%ecx, 8(%edi)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end42:
-	.size	mcl_fp_subPre3L, .Lfunc_end42-mcl_fp_subPre3L
-
-	.globl	mcl_fp_shr1_3L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_3L,@function
-mcl_fp_shr1_3L:                         # @mcl_fp_shr1_3L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	8(%eax), %ecx
-	movl	(%eax), %edx
-	movl	4(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	8(%esp), %esi
-	movl	%edx, (%esi)
-	shrdl	$1, %ecx, %eax
-	movl	%eax, 4(%esi)
-	shrl	%ecx
-	movl	%ecx, 8(%esi)
-	popl	%esi
-	retl
-.Lfunc_end43:
-	.size	mcl_fp_shr1_3L, .Lfunc_end43-mcl_fp_shr1_3L
-
-	.globl	mcl_fp_add3L
-	.align	16, 0x90
-	.type	mcl_fp_add3L,@function
-mcl_fp_add3L:                           # @mcl_fp_add3L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %edx
-	movl	(%edx), %eax
-	movl	4(%edx), %ecx
-	movl	20(%esp), %esi
-	addl	(%esi), %eax
-	adcl	4(%esi), %ecx
-	movl	8(%edx), %edx
-	adcl	8(%esi), %edx
-	movl	16(%esp), %esi
-	movl	%eax, (%esi)
-	movl	%ecx, 4(%esi)
-	movl	%edx, 8(%esi)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	28(%esp), %edi
-	subl	(%edi), %eax
-	sbbl	4(%edi), %ecx
-	sbbl	8(%edi), %edx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB44_2
-# BB#1:                                 # %nocarry
-	movl	%eax, (%esi)
-	movl	%ecx, 4(%esi)
-	movl	%edx, 8(%esi)
-.LBB44_2:                               # %carry
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end44:
-	.size	mcl_fp_add3L, .Lfunc_end44-mcl_fp_add3L
-
-	.globl	mcl_fp_addNF3L
-	.align	16, 0x90
-	.type	mcl_fp_addNF3L,@function
-mcl_fp_addNF3L:                         # @mcl_fp_addNF3L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	28(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ecx
-	movl	24(%esp), %esi
-	addl	(%esi), %edx
-	adcl	4(%esi), %ecx
-	movl	8(%eax), %eax
-	adcl	8(%esi), %eax
-	movl	32(%esp), %ebp
+	addl	80(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	%esi, %edx
+	movl	%edx, %esi
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	44(%esp), %eax                  # 4-byte Folded Reload
+	movzbl	%bl, %ebx
+	adcl	48(%esp), %ebx                  # 4-byte Folded Reload
+	addl	%edi, 4(%esp)                   # 4-byte Folded Spill
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	108(%esp), %eax
+	adcl	28(%eax), %ebx
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	setb	48(%esp)                        # 1-byte Folded Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	imull	%ebp, %esi
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%esi, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	%ebx, %edx
 	movl	%edx, %ebx
-	subl	(%ebp), %ebx
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	36(%esp), %esi                  # 4-byte Folded Reload
+	adcl	%edi, %ecx
 	movl	%ecx, %edi
-	sbbl	4(%ebp), %edi
-	movl	%eax, %esi
-	sbbl	8(%ebp), %esi
-	movl	%esi, %ebp
-	sarl	$31, %ebp
-	testl	%ebp, %ebp
-	js	.LBB45_2
-# BB#1:
-	movl	%ebx, %edx
-.LBB45_2:
-	movl	20(%esp), %ebx
-	movl	%edx, (%ebx)
-	js	.LBB45_4
-# BB#3:
-	movl	%edi, %ecx
-.LBB45_4:
-	movl	%ecx, 4(%ebx)
-	js	.LBB45_6
-# BB#5:
-	movl	%esi, %eax
-.LBB45_6:
-	movl	%eax, 8(%ebx)
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end45:
-	.size	mcl_fp_addNF3L, .Lfunc_end45-mcl_fp_addNF3L
-
-	.globl	mcl_fp_sub3L
-	.align	16, 0x90
-	.type	mcl_fp_sub3L,@function
-mcl_fp_sub3L:                           # @mcl_fp_sub3L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %edx
-	movl	(%edx), %ecx
-	movl	4(%edx), %eax
-	xorl	%ebx, %ebx
-	movl	28(%esp), %esi
-	subl	(%esi), %ecx
-	sbbl	4(%esi), %eax
-	movl	8(%edx), %edx
-	sbbl	8(%esi), %edx
-	movl	20(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	%eax, 4(%esi)
-	movl	%edx, 8(%esi)
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	je	.LBB46_2
-# BB#1:                                 # %carry
-	movl	32(%esp), %edi
-	movl	4(%edi), %ebx
-	movl	8(%edi), %ebp
-	addl	(%edi), %ecx
-	movl	%ecx, (%esi)
-	adcl	%eax, %ebx
-	movl	%ebx, 4(%esi)
-	adcl	%edx, %ebp
-	movl	%ebp, 8(%esi)
-.LBB46_2:                               # %nocarry
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end46:
-	.size	mcl_fp_sub3L, .Lfunc_end46-mcl_fp_sub3L
-
-	.globl	mcl_fp_subNF3L
-	.align	16, 0x90
-	.type	mcl_fp_subNF3L,@function
-mcl_fp_subNF3L:                         # @mcl_fp_subNF3L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	28(%esp), %esi
-	subl	(%esi), %ecx
-	sbbl	4(%esi), %edx
-	movl	8(%eax), %eax
-	sbbl	8(%esi), %eax
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movzbl	48(%esp), %eax                  # 1-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	%eax, %edx
+	addl	%ebp, 40(%esp)                  # 4-byte Folded Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	108(%esp), %eax
+	adcl	32(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	setb	8(%esp)                         # 1-byte Folded Spill
+	movl	52(%esp), %edi                  # 4-byte Reload
+	imull	%ebp, %edi
+	movl	%ebp, %ebx
+	movl	%edi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
 	movl	%eax, %esi
-	sarl	$31, %esi
+	movl	%edi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, %ecx
+	movl	%edi, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, %edi
+	adcl	%ecx, %edx
+	movl	%edx, %ecx
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	%esi, %eax
+	movzbl	8(%esp), %esi                   # 1-byte Folded Reload
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	addl	%ebx, 40(%esp)                  # 4-byte Folded Spill
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	108(%esp), %eax
+	adcl	36(%eax), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	setb	48(%esp)                        # 1-byte Folded Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	imull	%edi, %ecx
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, %ebx
+	movl	%ecx, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	addl	%ebp, %eax
+	movl	%eax, %ebp
+	adcl	%ebx, %edx
+	movl	%edx, %ebx
+	adcl	%edi, %esi
 	movl	%esi, %edi
-	shldl	$1, %eax, %edi
-	movl	32(%esp), %ebx
-	andl	(%ebx), %edi
-	movl	8(%ebx), %ebp
-	andl	%esi, %ebp
-	andl	4(%ebx), %esi
-	addl	%ecx, %edi
-	adcl	%edx, %esi
-	movl	20(%esp), %ecx
-	movl	%edi, (%ecx)
-	movl	%esi, 4(%ecx)
-	adcl	%eax, %ebp
-	movl	%ebp, 8(%ecx)
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	36(%esp), %esi                  # 4-byte Folded Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movzbl	48(%esp), %eax                  # 1-byte Folded Reload
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	%eax, %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	addl	44(%esp), %eax                  # 4-byte Folded Reload
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	108(%esp), %eax
+	adcl	40(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	setb	(%esp)                          # 1-byte Folded Spill
+	movl	52(%esp), %edi                  # 4-byte Reload
+	imull	%ebp, %edi
+	movl	%edi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%edi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%edi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ecx
+	addl	40(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movzbl	(%esp), %edi                    # 1-byte Folded Reload
+	adcl	52(%esp), %edi                  # 4-byte Folded Reload
+	movl	48(%esp), %eax                  # 4-byte Reload
+	addl	12(%esp), %eax                  # 4-byte Folded Reload
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	adcl	24(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	32(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	108(%esp), %eax
+	adcl	44(%eax), %edi
+	xorl	%eax, %eax
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	subl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	%edx, %ecx
+	sbbl	60(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	sbbl	64(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebx, 64(%esp)                  # 4-byte Spill
+	sbbl	68(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	movl	%esi, %ecx
+	sbbl	76(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edi, %esi
+	sbbl	72(%esp), %esi                  # 4-byte Folded Reload
+	sbbl	%eax, %eax
+	testb	$1, %al
+	jne	.LBB11_1
+# %bb.2:
+	movl	104(%esp), %eax
+	movl	%esi, 20(%eax)
+	jne	.LBB11_3
+.LBB11_4:
+	movl	%ecx, 16(%eax)
+	movl	60(%esp), %esi                  # 4-byte Reload
+	jne	.LBB11_5
+.LBB11_6:
+	movl	%ebx, 12(%eax)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB11_7
+.LBB11_8:
+	movl	%ebp, 8(%eax)
+	jne	.LBB11_9
+.LBB11_10:
+	movl	%esi, 4(%eax)
+	je	.LBB11_12
+.LBB11_11:
+	movl	52(%esp), %ecx                  # 4-byte Reload
+.LBB11_12:
+	movl	%ecx, (%eax)
+	addl	$84, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end47:
-	.size	mcl_fp_subNF3L, .Lfunc_end47-mcl_fp_subNF3L
-
-	.globl	mcl_fpDbl_add3L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add3L,@function
-mcl_fpDbl_add3L:                        # @mcl_fpDbl_add3L
-# BB#0:
+.LBB11_1:
+	movl	%edi, %esi
+	movl	104(%esp), %eax
+	movl	%esi, 20(%eax)
+	je	.LBB11_4
+.LBB11_3:
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	60(%esp), %esi                  # 4-byte Reload
+	je	.LBB11_6
+.LBB11_5:
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 12(%eax)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	je	.LBB11_8
+.LBB11_7:
+	movl	(%esp), %ebp                    # 4-byte Reload
+	movl	%ebp, 8(%eax)
+	je	.LBB11_10
+.LBB11_9:
+	movl	%edx, %esi
+	movl	%esi, 4(%eax)
+	jne	.LBB11_11
+	jmp	.LBB11_12
+.Lfunc_end11:
+	.size	mcl_fp_montRed6L, .Lfunc_end11-mcl_fp_montRed6L
+                                        # -- End function
+	.globl	mcl_fp_montRedNF6L              # -- Begin function mcl_fp_montRedNF6L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF6L,@function
+mcl_fp_montRedNF6L:                     # @mcl_fp_montRedNF6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	pushl	%eax
-	movl	32(%esp), %esi
-	movl	20(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	16(%esi), %edi
-	movl	12(%esi), %ebx
-	movl	(%esi), %edx
-	movl	28(%esp), %eax
-	addl	(%eax), %edx
-	movl	24(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%esi), %edx
-	movl	4(%esi), %esi
-	adcl	4(%eax), %esi
-	adcl	8(%eax), %edx
-	movl	%esi, 4(%ecx)
-	movl	20(%eax), %ebp
-	movl	%edx, 8(%ecx)
-	movl	12(%eax), %esi
-	movl	16(%eax), %edx
-	adcl	%ebx, %esi
-	adcl	%edi, %edx
-	adcl	(%esp), %ebp            # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	36(%esp), %ecx
-	movl	%esi, %ebx
-	subl	(%ecx), %ebx
+	subl	$84, %esp
+	movl	112(%esp), %ecx
+	movl	-4(%ecx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	108(%esp), %edx
+	movl	(%edx), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	imull	%eax, %edi
+	movl	20(%ecx), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%edx
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	16(%ecx), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%edx
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%edx
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%edx, %ebx
+	movl	8(%ecx), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%edx
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	(%ecx), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%edx, %ebp
+	movl	%eax, %ecx
+	movl	%edi, %eax
+	mull	%esi
+	movl	%eax, %esi
 	movl	%edx, %edi
-	sbbl	4(%ecx), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
+	addl	%ecx, %edi
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	$0, %ecx
+	addl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	108(%esp), %esi
+	adcl	4(%esi), %edi
+	adcl	8(%esi), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	adcl	12(%esi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	16(%esi), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	adcl	20(%esi), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	24(%esi), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	setb	(%esp)                          # 1-byte Folded Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	imull	%edi, %ecx
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, %esi
+	movl	%ecx, %eax
+	mull	52(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	addl	%ebx, %eax
+	movl	%eax, %ebx
+	adcl	%esi, %edx
+	movl	%edx, %esi
 	movl	%ebp, %ecx
-	movl	36(%esp), %edi
-	sbbl	8(%edi), %ecx
-	sbbl	$0, %eax
-	andl	$1, %eax
-	jne	.LBB48_2
-# BB#1:
-	movl	%ecx, %ebp
-.LBB48_2:
-	testb	%al, %al
-	jne	.LBB48_4
-# BB#3:
-	movl	%ebx, %esi
-.LBB48_4:
-	movl	24(%esp), %eax
-	movl	%esi, 12(%eax)
-	jne	.LBB48_6
-# BB#5:
-	movl	(%esp), %edx            # 4-byte Reload
-.LBB48_6:
-	movl	%edx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	addl	$4, %esp
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	40(%esp), %eax                  # 4-byte Folded Reload
+	movzbl	(%esp), %edx                    # 1-byte Folded Reload
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	addl	%edi, 68(%esp)                  # 4-byte Folded Spill
+	adcl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	108(%esp), %eax
+	adcl	28(%eax), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	imull	%ebx, %esi
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	movl	%esi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%esi, %eax
+	mull	52(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ebp
+	addl	80(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	%ebx, %edx
+	movl	%edx, %ebx
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	adcl	%edi, %ecx
+	movl	%ecx, %edi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movzbl	44(%esp), %eax                  # 1-byte Folded Reload
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	%eax, %edx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	addl	40(%esp), %eax                  # 4-byte Folded Reload
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 44(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	108(%esp), %eax
+	adcl	32(%eax), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	setb	40(%esp)                        # 1-byte Folded Spill
+	movl	48(%esp), %ebx                  # 4-byte Reload
+	imull	%ebp, %ebx
+	movl	%ebx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, %esi
+	movl	%ebx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, %ecx
+	movl	%ebx, %eax
+	mull	52(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, %edi
+	movl	%ebx, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	addl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	%ecx, %edx
+	movl	%edx, %ecx
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	68(%esp), %ebx                  # 4-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	%esi, %eax
+	movzbl	40(%esp), %esi                  # 1-byte Folded Reload
+	adcl	36(%esp), %esi                  # 4-byte Folded Reload
+	addl	%ebp, %edi
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	44(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	108(%esp), %eax
+	adcl	36(%eax), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	setb	36(%esp)                        # 1-byte Folded Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	imull	%edi, %ecx
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, %ebp
+	movl	%ecx, %eax
+	mull	52(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%ecx, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	%ebp, %edx
+	movl	%edx, %ebp
+	adcl	%edi, %esi
+	movl	%esi, %edi
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movzbl	36(%esp), %eax                  # 1-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	%eax, %edx
+	addl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	108(%esp), %eax
+	adcl	40(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	setb	8(%esp)                         # 1-byte Folded Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	imull	%ebx, %ecx
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, %ebp
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	52(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	56(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ecx
+	addl	40(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	adcl	36(%esp), %esi                  # 4-byte Folded Reload
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	%ebp, %edi
+	movzbl	8(%esp), %ebp                   # 1-byte Folded Reload
+	adcl	48(%esp), %ebp                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movl	108(%esp), %eax
+	adcl	44(%eax), %ebp
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	subl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	sbbl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	sbbl	60(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	sbbl	64(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	movl	%edi, %ecx
+	sbbl	76(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ebp, %eax
+	sbbl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %edi
+	sarl	$31, %edi
+	testl	%edi, %edi
+	js	.LBB12_1
+# %bb.2:
+	movl	104(%esp), %edi
+	movl	%eax, 20(%edi)
+	js	.LBB12_3
+.LBB12_4:
+	movl	%ecx, 16(%edi)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	js	.LBB12_5
+.LBB12_6:
+	movl	%ebx, 12(%edi)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	js	.LBB12_7
+.LBB12_8:
+	movl	%esi, 8(%edi)
+	js	.LBB12_9
+.LBB12_10:
+	movl	%ecx, 4(%edi)
+	jns	.LBB12_12
+.LBB12_11:
+	movl	48(%esp), %eax                  # 4-byte Reload
+.LBB12_12:
+	movl	%eax, (%edi)
+	addl	$84, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end48:
-	.size	mcl_fpDbl_add3L, .Lfunc_end48-mcl_fpDbl_add3L
-
-	.globl	mcl_fpDbl_sub3L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub3L,@function
-mcl_fpDbl_sub3L:                        # @mcl_fpDbl_sub3L
-# BB#0:
+.LBB12_1:
+	movl	%ebp, %eax
+	movl	104(%esp), %edi
+	movl	%eax, 20(%edi)
+	jns	.LBB12_4
+.LBB12_3:
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%edi)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	jns	.LBB12_6
+.LBB12_5:
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 12(%edi)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB12_8
+.LBB12_7:
+	movl	8(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 8(%edi)
+	jns	.LBB12_10
+.LBB12_9:
+	movl	%edx, %ecx
+	movl	%ecx, 4(%edi)
+	js	.LBB12_11
+	jmp	.LBB12_12
+.Lfunc_end12:
+	.size	mcl_fp_montRedNF6L, .Lfunc_end12-mcl_fp_montRedNF6L
+                                        # -- End function
+	.globl	mcl_fp_addPre6L                 # -- Begin function mcl_fp_addPre6L
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre6L,@function
+mcl_fp_addPre6L:                        # @mcl_fp_addPre6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %esi
-	movl	28(%esp), %ebx
-	subl	(%ebx), %edx
-	sbbl	4(%ebx), %esi
-	movl	8(%ecx), %ebp
-	sbbl	8(%ebx), %ebp
-	movl	20(%esp), %eax
-	movl	%edx, (%eax)
-	movl	12(%ecx), %edi
-	sbbl	12(%ebx), %edi
-	movl	%esi, 4(%eax)
-	movl	16(%ecx), %esi
-	sbbl	16(%ebx), %esi
-	movl	20(%ebx), %ebx
-	movl	20(%ecx), %edx
-	movl	%ebp, 8(%eax)
-	sbbl	%ebx, %edx
-	movl	$0, %ecx
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	movl	32(%esp), %ebp
-	jne	.LBB49_1
-# BB#2:
-	xorl	%ebx, %ebx
-	jmp	.LBB49_3
-.LBB49_1:
-	movl	8(%ebp), %ebx
-.LBB49_3:
-	testb	%cl, %cl
-	movl	$0, %eax
-	jne	.LBB49_4
-# BB#5:
-	xorl	%ecx, %ecx
-	jmp	.LBB49_6
-.LBB49_4:
-	movl	(%ebp), %ecx
-	movl	4(%ebp), %eax
-.LBB49_6:
-	addl	%edi, %ecx
-	adcl	%esi, %eax
+	movl	24(%esp), %eax
+	movl	(%eax), %ecx
+	movl	4(%eax), %edx
+	movl	28(%esp), %esi
+	addl	(%esi), %ecx
+	adcl	4(%esi), %edx
+	movl	20(%eax), %edi
+	movl	16(%eax), %ebx
+	movl	12(%eax), %ebp
+	movl	8(%eax), %eax
+	adcl	8(%esi), %eax
+	adcl	12(%esi), %ebp
+	adcl	16(%esi), %ebx
+	adcl	20(%esi), %edi
 	movl	20(%esp), %esi
-	movl	%ecx, 12(%esi)
-	movl	%eax, 16(%esi)
-	adcl	%edx, %ebx
-	movl	%ebx, 20(%esi)
+	movl	%ebx, 16(%esi)
+	movl	%ebp, 12(%esi)
+	movl	%eax, 8(%esi)
+	movl	%edi, 20(%esi)
+	movl	%edx, 4(%esi)
+	movl	%ecx, (%esi)
+	setb	%al
+	movzbl	%al, %eax
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end49:
-	.size	mcl_fpDbl_sub3L, .Lfunc_end49-mcl_fpDbl_sub3L
-
-	.globl	mcl_fp_mulUnitPre4L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre4L,@function
-mcl_fp_mulUnitPre4L:                    # @mcl_fp_mulUnitPre4L
-# BB#0:
+.Lfunc_end13:
+	.size	mcl_fp_addPre6L, .Lfunc_end13-mcl_fp_addPre6L
+                                        # -- End function
+	.globl	mcl_fp_subPre6L                 # -- Begin function mcl_fp_subPre6L
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre6L,@function
+mcl_fp_subPre6L:                        # @mcl_fp_subPre6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$12, %esp
-	movl	40(%esp), %ecx
-	movl	36(%esp), %ebp
-	movl	%ecx, %eax
-	mull	12(%ebp)
-	movl	%edx, %esi
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %eax
-	mull	8(%ebp)
-	movl	%edx, %ebx
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ecx, %eax
-	mull	4(%ebp)
-	movl	%edx, %edi
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ecx, %eax
-	mull	(%ebp)
-	movl	32(%esp), %ecx
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 8(%ecx)
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 12(%ecx)
-	adcl	$0, %esi
-	movl	%esi, 16(%ecx)
-	addl	$12, %esp
+	pushl	%eax
+	movl	28(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %esi
+	xorl	%eax, %eax
+	movl	32(%esp), %edi
+	subl	(%edi), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	sbbl	4(%edi), %esi
+	movl	20(%edx), %ebx
+	movl	16(%edx), %ebp
+	movl	12(%edx), %ecx
+	movl	8(%edx), %edx
+	sbbl	8(%edi), %edx
+	sbbl	12(%edi), %ecx
+	sbbl	16(%edi), %ebp
+	sbbl	20(%edi), %ebx
+	movl	24(%esp), %edi
+	movl	%ebp, 16(%edi)
+	movl	%ecx, 12(%edi)
+	movl	%edx, 8(%edi)
+	movl	%esi, 4(%edi)
+	movl	%ebx, 20(%edi)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, (%edi)
+	sbbl	%eax, %eax
+	andl	$1, %eax
+	addl	$4, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end50:
-	.size	mcl_fp_mulUnitPre4L, .Lfunc_end50-mcl_fp_mulUnitPre4L
-
-	.globl	mcl_fpDbl_mulPre4L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre4L,@function
-mcl_fpDbl_mulPre4L:                     # @mcl_fpDbl_mulPre4L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
+.Lfunc_end14:
+	.size	mcl_fp_subPre6L, .Lfunc_end14-mcl_fp_subPre6L
+                                        # -- End function
+	.globl	mcl_fp_shr1_6L                  # -- Begin function mcl_fp_shr1_6L
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_6L,@function
+mcl_fp_shr1_6L:                         # @mcl_fp_shr1_6L
+# %bb.0:
 	pushl	%esi
-	subl	$56, %esp
-	movl	80(%esp), %edi
-	movl	(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	%ecx, %ebp
-	mull	%esi
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	4(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	8(%edi), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	12(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	4(%ebp), %ecx
-	movl	%eax, %ebp
-	mull	%ecx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ecx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, %eax
-	mull	%ecx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%esi
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%ebx, %eax
-	mull	%esi
-	movl	%edx, %ecx
-	movl	%eax, %ebx
-	movl	%edi, %eax
-	mull	%esi
-	movl	%edx, %edi
-	addl	12(%esp), %eax          # 4-byte Folded Reload
-	adcl	%ebx, %edi
-	adcl	%ebp, %ecx
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	addl	(%esp), %eax            # 4-byte Folded Reload
-	movl	76(%esp), %edx
-	movl	%eax, 4(%edx)
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	8(%esp), %edi           # 4-byte Folded Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	8(%eax), %esi
-	movl	20(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	40(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	addl	%edi, %eax
-	movl	76(%esp), %edx
-	movl	%eax, 8(%edx)
-	adcl	%ecx, %ebp
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax
-	movl	12(%eax), %esi
-	sbbl	%ecx, %ecx
-	movl	%esi, %eax
-	movl	80(%esp), %edi
-	mull	12(%edi)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	8(%edi)
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	4(%edi)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%esi, %eax
-	movl	80(%esp), %edx
-	mull	(%edx)
-	movl	%eax, %esi
-	andl	$1, %ecx
-	addl	28(%esp), %ebp          # 4-byte Folded Reload
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	addl	%esi, %ebp
-	movl	76(%esp), %esi
-	movl	%ebp, 12(%esi)
-	adcl	%edi, %ebx
-	movl	%eax, %edi
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	%edx, %ebx
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, %edx
-	movl	%ebx, 16(%edx)
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 20(%edx)
-	movl	%ecx, 24(%edx)
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%edx)
-	addl	$56, %esp
+	movl	12(%esp), %eax
+	movl	20(%eax), %ecx
+	movl	%ecx, %edx
+	shrl	%edx
+	movl	8(%esp), %esi
+	movl	%edx, 20(%esi)
+	movl	16(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 16(%esi)
+	movl	12(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 12(%esi)
+	movl	8(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 8(%esi)
+	movl	4(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 4(%esi)
+	movl	(%eax), %eax
+	shrdl	$1, %ecx, %eax
+	movl	%eax, (%esi)
+	popl	%esi
+	retl
+.Lfunc_end15:
+	.size	mcl_fp_shr1_6L, .Lfunc_end15-mcl_fp_shr1_6L
+                                        # -- End function
+	.globl	mcl_fp_add6L                    # -- Begin function mcl_fp_add6L
+	.p2align	4, 0x90
+	.type	mcl_fp_add6L,@function
+mcl_fp_add6L:                           # @mcl_fp_add6L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$8, %esp
+	movl	32(%esp), %ebx
+	movl	(%ebx), %eax
+	movl	4(%ebx), %ecx
+	movl	36(%esp), %ebp
+	addl	(%ebp), %eax
+	adcl	4(%ebp), %ecx
+	movl	20(%ebx), %edx
+	movl	16(%ebx), %esi
+	movl	12(%ebx), %edi
+	movl	8(%ebx), %ebx
+	adcl	8(%ebp), %ebx
+	adcl	12(%ebp), %edi
+	adcl	16(%ebp), %esi
+	adcl	20(%ebp), %edx
+	movl	28(%esp), %ebp
+	movl	%edx, 20(%ebp)
+	movl	%esi, 16(%ebp)
+	movl	%edi, 12(%ebp)
+	movl	%ebx, 8(%ebp)
+	movl	%ecx, 4(%ebp)
+	movl	%eax, (%ebp)
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	40(%esp), %ebp
+	subl	(%ebp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	sbbl	4(%ebp), %ecx
+	sbbl	8(%ebp), %ebx
+	sbbl	12(%ebp), %edi
+	sbbl	16(%ebp), %esi
+	sbbl	20(%ebp), %edx
+	movzbl	3(%esp), %eax                   # 1-byte Folded Reload
+	sbbl	$0, %eax
+	testb	$1, %al
+	jne	.LBB16_2
+# %bb.1:                                # %nocarry
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	28(%esp), %ebp
+	movl	%eax, (%ebp)
+	movl	%ecx, 4(%ebp)
+	movl	%ebx, 8(%ebp)
+	movl	%edi, 12(%ebp)
+	movl	%esi, 16(%ebp)
+	movl	%edx, 20(%ebp)
+.LBB16_2:                               # %carry
+	addl	$8, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end51:
-	.size	mcl_fpDbl_mulPre4L, .Lfunc_end51-mcl_fpDbl_mulPre4L
-
-	.globl	mcl_fpDbl_sqrPre4L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre4L,@function
-mcl_fpDbl_sqrPre4L:                     # @mcl_fpDbl_sqrPre4L
-# BB#0:
+.Lfunc_end16:
+	.size	mcl_fp_add6L, .Lfunc_end16-mcl_fp_add6L
+                                        # -- End function
+	.globl	mcl_fp_addNF6L                  # -- Begin function mcl_fp_addNF6L
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF6L,@function
+mcl_fp_addNF6L:                         # @mcl_fp_addNF6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$40, %esp
-	movl	64(%esp), %ecx
-	movl	12(%ecx), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
+	subl	$28, %esp
+	movl	56(%esp), %ecx
 	movl	(%ecx), %ebx
-	movl	4(%ecx), %esi
-	movl	%ebp, %eax
-	mull	%esi
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	8(%ecx), %edi
-	movl	%edi, %eax
-	mull	%esi
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%ebx
-	movl	%edx, %ebp
-	movl	%eax, %ecx
-	movl	%edi, %eax
-	mull	%ebx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%esi
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ebx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	%eax, %esi
-	movl	%ebx, %eax
-	mull	%ebx
-	movl	60(%esp), %ebx
-	movl	%eax, (%ebx)
-	addl	%esi, %edx
-	movl	(%esp), %eax            # 4-byte Reload
+	movl	4(%ecx), %ebp
+	movl	52(%esp), %edx
+	addl	(%edx), %ebx
+	adcl	4(%edx), %ebp
+	movl	20(%ecx), %esi
+	movl	16(%ecx), %edi
+	movl	12(%ecx), %eax
+	movl	8(%ecx), %ecx
+	adcl	8(%edx), %ecx
+	adcl	12(%edx), %eax
+	adcl	16(%edx), %edi
+	adcl	20(%edx), %esi
+	movl	60(%esp), %edx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	subl	(%edx), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	sbbl	4(%edx), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	sbbl	8(%edx), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
 	movl	%eax, %ebx
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	adcl	$0, %ebp
-	addl	%esi, %edx
-	movl	60(%esp), %esi
-	movl	%edx, 4(%esi)
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	addl	%eax, %ebx
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
 	movl	%edi, %eax
-	mull	%edi
-	movl	%eax, %edi
-	addl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	60(%esp), %eax
-	movl	%ebx, 8(%eax)
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	adcl	%ebp, %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	%esi, %eax
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	addl	28(%esp), %ecx          # 4-byte Folded Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	%edx, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi
-	movl	12(%esi), %ebp
-	movl	%ebp, %eax
-	mull	8(%esi)
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	4(%esi)
-	movl	%esi, %edi
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%ebp, %eax
-	mull	(%edi)
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebp, %eax
-	mull	%ebp
-	addl	%ecx, %edi
-	movl	60(%esp), %ebp
-	movl	%edi, 12(%ebp)
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	addl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, %edi
-	movl	%esi, 16(%edi)
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, 20(%edi)
-	movl	%eax, 24(%edi)
-	adcl	%edx, %ecx
-	movl	%ecx, 28(%edi)
-	addl	$40, %esp
+	sbbl	12(%edx), %ebx
+	movl	%edi, %ebp
+	sbbl	16(%edx), %ebp
+	movl	%esi, %ecx
+	sbbl	20(%edx), %ecx
+	movl	%ecx, %edx
+	sarl	$31, %edx
+	testl	%edx, %edx
+	js	.LBB17_1
+# %bb.2:
+	movl	48(%esp), %edi
+	movl	%ecx, 20(%edi)
+	js	.LBB17_3
+.LBB17_4:
+	movl	%ebp, 16(%edi)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	js	.LBB17_5
+.LBB17_6:
+	movl	%ebx, 12(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	js	.LBB17_7
+.LBB17_8:
+	movl	%edx, 8(%edi)
+	js	.LBB17_9
+.LBB17_10:
+	movl	%ecx, 4(%edi)
+	jns	.LBB17_12
+.LBB17_11:
+	movl	12(%esp), %eax                  # 4-byte Reload
+.LBB17_12:
+	movl	%eax, (%edi)
+	addl	$28, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end52:
-	.size	mcl_fpDbl_sqrPre4L, .Lfunc_end52-mcl_fpDbl_sqrPre4L
-
-	.globl	mcl_fp_mont4L
-	.align	16, 0x90
-	.type	mcl_fp_mont4L,@function
-mcl_fp_mont4L:                          # @mcl_fp_mont4L
-# BB#0:
+.LBB17_1:
+	movl	%esi, %ecx
+	movl	48(%esp), %edi
+	movl	%ecx, 20(%edi)
+	jns	.LBB17_4
+.LBB17_3:
+	movl	%eax, %ebp
+	movl	%ebp, 16(%edi)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	jns	.LBB17_6
+.LBB17_5:
+	movl	(%esp), %ebx                    # 4-byte Reload
+	movl	%ebx, 12(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	jns	.LBB17_8
+.LBB17_7:
+	movl	4(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 8(%edi)
+	jns	.LBB17_10
+.LBB17_9:
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	js	.LBB17_11
+	jmp	.LBB17_12
+.Lfunc_end17:
+	.size	mcl_fp_addNF6L, .Lfunc_end17-mcl_fp_addNF6L
+                                        # -- End function
+	.globl	mcl_fp_sub6L                    # -- Begin function mcl_fp_sub6L
+	.p2align	4, 0x90
+	.type	mcl_fp_sub6L,@function
+mcl_fp_sub6L:                           # @mcl_fp_sub6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$88, %esp
-	movl	112(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	%ecx, %ebp
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	116(%esp), %edx
-	movl	(%edx), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	mull	%edx
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	120(%esp), %edi
-	movl	-4(%edi), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	imull	%edx, %ebx
-	movl	(%edi), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	12(%edi), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	8(%edi), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	4(%edi), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	%ebp, %edi
-	movl	4(%edi), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	12(%edi), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	8(%edi), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%esi
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ecx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	movl	28(%esp), %esi          # 4-byte Reload
-	mull	%esi
-	movl	%edx, %ecx
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	%esi
-	movl	%edx, %ebp
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, %ebx
-	movl	%eax, %edi
-	addl	32(%esp), %edi          # 4-byte Folded Reload
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	addl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	12(%esp), %ebp          # 4-byte Reload
-	addl	48(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%edi, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	%ebx, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebx
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	movl	116(%esp), %eax
-	movl	4(%eax), %esi
-	movl	%esi, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	56(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	addl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%ecx, %edx
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	adcl	$0, %edi
-	addl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	%ebx, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	%ebp, %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%eax, %ecx
-	imull	80(%esp), %ecx          # 4-byte Folded Reload
-	andl	$1, %esi
-	movl	%ecx, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	addl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	28(%esp), %eax          # 4-byte Folded Reload
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax
-	movl	8(%eax), %esi
-	movl	%esi, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	56(%esp)                # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	addl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, %eax
-	adcl	$0, %eax
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	%edi, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	%ebp, 32(%esp)          # 4-byte Folded Spill
-	adcl	%ebx, %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
+	subl	$20, %esp
+	movl	44(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %esi
+	xorl	%eax, %eax
+	movl	48(%esp), %ebp
+	subl	(%ebp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	sbbl	4(%ebp), %esi
+	movl	20(%edx), %ecx
+	movl	16(%edx), %ebx
+	movl	12(%edx), %edi
+	movl	8(%edx), %edx
+	sbbl	8(%ebp), %edx
+	sbbl	12(%ebp), %edi
+	sbbl	16(%ebp), %ebx
+	sbbl	20(%ebp), %ecx
 	sbbl	%eax, %eax
-	movl	%ecx, %esi
-	imull	80(%esp), %esi          # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, %ecx
-	movl	%esi, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, %edi
-	movl	%esi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	addl	%edi, %esi
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	40(%esp), %eax          # 4-byte Folded Reload
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax
-	movl	12(%eax), %ebp
-	movl	%ebp, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	56(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	addl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, %eax
-	adcl	$0, %eax
-	movl	64(%esp), %edi          # 4-byte Reload
-	addl	%esi, %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	%ebx, %ebp
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	sbbl	%ebx, %ebx
-	movl	80(%esp), %esi          # 4-byte Reload
-	imull	%edi, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	andl	$1, %ebx
-	movl	%esi, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	%esi, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	mull	68(%esp)                # 4-byte Folded Reload
-	addl	44(%esp), %eax          # 4-byte Folded Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	adcl	$0, %edi
-	addl	64(%esp), %ecx          # 4-byte Folded Reload
-	adcl	%ebp, %eax
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	adcl	$0, %ebx
-	movl	%eax, %ebp
-	subl	84(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edx, %ecx
-	sbbl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	sbbl	72(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%edi, %ecx
-	sbbl	76(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB53_2
-# BB#1:
-	movl	%ebp, %eax
-.LBB53_2:
-	movl	108(%esp), %ebp
-	movl	%eax, (%ebp)
-	testb	%bl, %bl
-	jne	.LBB53_4
-# BB#3:
-	movl	80(%esp), %edx          # 4-byte Reload
-.LBB53_4:
-	movl	%edx, 4(%ebp)
-	jne	.LBB53_6
-# BB#5:
-	movl	84(%esp), %esi          # 4-byte Reload
-.LBB53_6:
-	movl	%esi, 8(%ebp)
-	jne	.LBB53_8
-# BB#7:
-	movl	%ecx, %edi
-.LBB53_8:
-	movl	%edi, 12(%ebp)
-	addl	$88, %esp
+	testb	$1, %al
+	movl	40(%esp), %eax
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, 20(%eax)
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	%ebx, 16(%eax)
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	%edi, 12(%eax)
+	movl	%edx, 8(%eax)
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	%esi, 4(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, (%eax)
+	je	.LBB18_2
+# %bb.1:                                # %carry
+	movl	%ecx, %ebp
+	movl	52(%esp), %ecx
+	addl	(%ecx), %ebp
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	4(%ecx), %ebp
+	adcl	8(%ecx), %edx
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	12(%ecx), %edi
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	16(%ecx), %ebx
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	52(%esp), %esi
+	adcl	20(%esi), %ecx
+	movl	%ecx, 20(%eax)
+	movl	%ebx, 16(%eax)
+	movl	%edi, 12(%eax)
+	movl	%edx, 8(%eax)
+	movl	%ebp, 4(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, (%eax)
+.LBB18_2:                               # %nocarry
+	addl	$20, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end53:
-	.size	mcl_fp_mont4L, .Lfunc_end53-mcl_fp_mont4L
-
-	.globl	mcl_fp_montNF4L
-	.align	16, 0x90
-	.type	mcl_fp_montNF4L,@function
-mcl_fp_montNF4L:                        # @mcl_fp_montNF4L
-# BB#0:
+.Lfunc_end18:
+	.size	mcl_fp_sub6L, .Lfunc_end18-mcl_fp_sub6L
+                                        # -- End function
+	.globl	mcl_fp_subNF6L                  # -- Begin function mcl_fp_subNF6L
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF6L,@function
+mcl_fp_subNF6L:                         # @mcl_fp_subNF6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$84, %esp
-	movl	108(%esp), %ecx
-	movl	(%ecx), %eax
+	subl	$24, %esp
+	movl	48(%esp), %ebx
+	movl	(%ebx), %ecx
+	movl	4(%ebx), %eax
+	movl	52(%esp), %ebp
+	subl	(%ebp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	4(%ebp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %ecx
+	movl	16(%ebx), %eax
+	movl	12(%ebx), %edx
+	movl	8(%ebx), %esi
+	sbbl	8(%ebp), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	sbbl	12(%ebp), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	sbbl	16(%ebp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	sbbl	20(%ebp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
 	movl	%ecx, %ebp
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx
-	movl	(%ecx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %esi
-	movl	-4(%esi), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	imull	%edx, %ecx
-	movl	(%esi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	12(%esi), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	8(%esi), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	4(%esi), %ebx
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	movl	4(%eax), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	12(%eax), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	8(%eax), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%ebx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebp, %eax
-	movl	36(%esp), %ebx          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, %ecx
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ebx
-	movl	%ebx, %esi
-	movl	%edx, %ebx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, %esi
-	movl	%eax, %ebp
-	addl	76(%esp), %ebp          # 4-byte Folded Reload
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	adcl	$0, %ecx
-	addl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	adcl	$0, %ecx
-	addl	12(%esp), %ebp          # 4-byte Folded Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	4(%eax), %edi
-	movl	%edi, %eax
-	mull	44(%esp)                # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	48(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	addl	20(%esp), %edi          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	%ebp, 32(%esp)          # 4-byte Folded Spill
-	adcl	%esi, %edi
-	adcl	%ebx, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	movl	%esi, %ecx
-	imull	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%ecx, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	addl	%esi, %eax
-	adcl	%edi, %ebx
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	%edx, %ebx
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	8(%eax), %ecx
-	movl	%ecx, %eax
-	mull	44(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	48(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, %edi
-	movl	%ecx, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edx, %ecx
-	addl	%edi, %ecx
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	adcl	$0, %esi
-	movl	32(%esp), %edx          # 4-byte Reload
-	addl	%ebx, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%edx, %ebx
-	imull	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	addl	32(%esp), %eax          # 4-byte Folded Reload
-	adcl	%ecx, %edi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	%ebp, %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	%edx, %edi
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	112(%esp), %eax
-	movl	12(%eax), %ecx
-	movl	%ecx, %eax
-	mull	44(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	48(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%edx, %ecx
-	addl	32(%esp), %ecx          # 4-byte Folded Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	adcl	$0, %ebx
-	movl	60(%esp), %edx          # 4-byte Reload
-	addl	%edi, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	%esi, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %ebx
-	movl	56(%esp), %edi          # 4-byte Reload
-	imull	%edx, %edi
-	movl	%edi, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%edi, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	movl	%edi, %ebp
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, %edi
+	sarl	$31, %ebp
 	movl	%ebp, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	44(%esp), %ebp          # 4-byte Reload
-	addl	60(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%ecx, %eax
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	adcl	$0, %ebx
-	addl	56(%esp), %eax          # 4-byte Folded Reload
-	adcl	%edx, %edi
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	adcl	76(%esp), %ebx          # 4-byte Folded Reload
-	movl	%eax, %edx
-	subl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, %ebp
-	sbbl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	%esi, %ecx
-	sbbl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	%ebx, %ecx
-	sbbl	72(%esp), %ecx          # 4-byte Folded Reload
-	testl	%ecx, %ecx
-	js	.LBB54_2
-# BB#1:
-	movl	%edx, %eax
-.LBB54_2:
-	movl	104(%esp), %edx
-	movl	%eax, (%edx)
-	js	.LBB54_4
-# BB#3:
-	movl	%ebp, %edi
-.LBB54_4:
-	movl	%edi, 4(%edx)
-	js	.LBB54_6
-# BB#5:
-	movl	80(%esp), %esi          # 4-byte Reload
-.LBB54_6:
-	movl	%esi, 8(%edx)
-	js	.LBB54_8
-# BB#7:
-	movl	%ecx, %ebx
-.LBB54_8:
-	movl	%ebx, 12(%edx)
-	addl	$84, %esp
+	shldl	$1, %ecx, %eax
+	movl	56(%esp), %ebx
+	andl	(%ebx), %eax
+	movl	20(%ebx), %edi
+	andl	%ebp, %edi
+	movl	16(%ebx), %esi
+	andl	%ebp, %esi
+	movl	12(%ebx), %edx
+	andl	%ebp, %edx
+	movl	8(%ebx), %ecx
+	andl	%ebp, %ecx
+	andl	4(%ebx), %ebp
+	addl	12(%esp), %eax                  # 4-byte Folded Reload
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	movl	44(%esp), %ebx
+	movl	%eax, (%ebx)
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ebp, 4(%ebx)
+	adcl	8(%esp), %edx                   # 4-byte Folded Reload
+	movl	%ecx, 8(%ebx)
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 12(%ebx)
+	movl	%esi, 16(%ebx)
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 20(%ebx)
+	addl	$24, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end54:
-	.size	mcl_fp_montNF4L, .Lfunc_end54-mcl_fp_montNF4L
-
-	.globl	mcl_fp_montRed4L
-	.align	16, 0x90
-	.type	mcl_fp_montRed4L,@function
-mcl_fp_montRed4L:                       # @mcl_fp_montRed4L
-# BB#0:
+.Lfunc_end19:
+	.size	mcl_fp_subNF6L, .Lfunc_end19-mcl_fp_subNF6L
+                                        # -- End function
+	.globl	mcl_fpDbl_add6L                 # -- Begin function mcl_fpDbl_add6L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add6L,@function
+mcl_fpDbl_add6L:                        # @mcl_fpDbl_add6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$64, %esp
-	movl	92(%esp), %eax
-	movl	-4(%eax), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	(%eax), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	imull	%edx, %esi
-	movl	12(%eax), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	8(%eax), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	4(%eax), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ebx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ebp
-	movl	%edx, %ebp
-	movl	%eax, %ebx
-	movl	%esi, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	addl	%ebx, %edi
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	40(%esp), %eax          # 4-byte Folded Reload
-	adcl	4(%ecx), %edi
-	adcl	8(%ecx), %ebp
-	adcl	12(%ecx), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	16(%ecx), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	28(%ecx), %eax
-	movl	24(%ecx), %edx
-	movl	20(%ecx), %ecx
-	adcl	$0, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%edi, %ebx
-	imull	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, %eax
-	mull	44(%esp)                # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	48(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebx, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	addl	(%esp), %esi            # 4-byte Folded Reload
-	movl	%ecx, %ebx
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%edi, %eax
-	adcl	%ebp, %esi
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	8(%esp), %edi           # 4-byte Reload
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	$0, 20(%esp)            # 4-byte Folded Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%esi, %ebp
-	imull	56(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, %eax
-	mull	44(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	48(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebp, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	addl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	%ebx, %ecx
-	movl	%ecx, %ebx
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%esi, %eax
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	56(%esp), %esi          # 4-byte Reload
-	imull	%ebp, %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	44(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	mull	48(%esp)                # 4-byte Folded Reload
-	addl	16(%esp), %eax          # 4-byte Folded Reload
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	$0, %ecx
-	addl	%ebp, %esi
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	adcl	$0, %edi
-	movl	%eax, %ebp
-	subl	60(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edx, %esi
-	sbbl	48(%esp), %esi          # 4-byte Folded Reload
-	sbbl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %ebx
-	sbbl	44(%esp), %ebx          # 4-byte Folded Reload
-	sbbl	$0, %edi
-	andl	$1, %edi
-	jne	.LBB55_2
-# BB#1:
-	movl	%ebp, %eax
-.LBB55_2:
-	movl	84(%esp), %ebp
-	movl	%eax, (%ebp)
-	movl	%edi, %eax
-	testb	%al, %al
-	jne	.LBB55_4
-# BB#3:
-	movl	%esi, %edx
-.LBB55_4:
-	movl	%edx, 4(%ebp)
-	movl	56(%esp), %eax          # 4-byte Reload
-	jne	.LBB55_6
-# BB#5:
-	movl	60(%esp), %eax          # 4-byte Reload
-.LBB55_6:
-	movl	%eax, 8(%ebp)
-	jne	.LBB55_8
-# BB#7:
+	subl	$32, %esp
+	movl	56(%esp), %esi
+	movl	(%esi), %eax
+	movl	4(%esi), %ecx
+	movl	60(%esp), %edx
+	addl	(%edx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	4(%edx), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	44(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	40(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	36(%esi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	32(%esi), %ecx
+	movl	28(%esi), %ebx
+	movl	24(%esi), %edi
+	movl	20(%esi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	16(%esi), %ebp
+	movl	12(%esi), %eax
+	movl	8(%esi), %esi
+	adcl	8(%edx), %esi
+	adcl	12(%edx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	16(%edx), %ebp
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	20(%edx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	24(%edx), %edi
+	adcl	28(%edx), %ebx
+	adcl	32(%edx), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	36(%edx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	40(%edx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	44(%edx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edx
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 20(%edx)
+	movl	%ebp, 16(%edx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edx)
+	movl	%esi, 8(%edx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%edx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%edx)
+	setb	20(%esp)                        # 1-byte Folded Spill
+	movl	64(%esp), %eax
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	subl	(%eax), %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
 	movl	%ebx, %ecx
-.LBB55_8:
-	movl	%ecx, 12(%ebp)
-	addl	$64, %esp
+	sbbl	4(%eax), %ecx
+	movl	28(%esp), %edx                  # 4-byte Reload
+	sbbl	8(%eax), %edx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	sbbl	12(%eax), %esi
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	sbbl	16(%eax), %ebx
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, %edi
+	sbbl	20(%eax), %ebp
+	movzbl	20(%esp), %eax                  # 1-byte Folded Reload
+	sbbl	$0, %eax
+	testb	$1, %al
+	jne	.LBB20_1
+# %bb.2:
+	movl	52(%esp), %eax
+	movl	%ebp, 44(%eax)
+	jne	.LBB20_3
+.LBB20_4:
+	movl	%ebx, 40(%eax)
+	jne	.LBB20_5
+.LBB20_6:
+	movl	%esi, 36(%eax)
+	jne	.LBB20_7
+.LBB20_8:
+	movl	%edx, 32(%eax)
+	jne	.LBB20_9
+.LBB20_10:
+	movl	%ecx, 28(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	je	.LBB20_12
+.LBB20_11:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+.LBB20_12:
+	movl	%ecx, 24(%eax)
+	addl	$32, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end55:
-	.size	mcl_fp_montRed4L, .Lfunc_end55-mcl_fp_montRed4L
-
-	.globl	mcl_fp_addPre4L
-	.align	16, 0x90
-	.type	mcl_fp_addPre4L,@function
-mcl_fp_addPre4L:                        # @mcl_fp_addPre4L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	20(%esp), %esi
-	addl	(%esi), %ecx
-	adcl	4(%esi), %edx
-	movl	12(%eax), %edi
-	movl	8(%eax), %eax
-	adcl	8(%esi), %eax
-	movl	12(%esi), %esi
-	movl	16(%esp), %ebx
-	movl	%ecx, (%ebx)
-	movl	%edx, 4(%ebx)
-	movl	%eax, 8(%ebx)
-	adcl	%edi, %esi
-	movl	%esi, 12(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end56:
-	.size	mcl_fp_addPre4L, .Lfunc_end56-mcl_fp_addPre4L
-
-	.globl	mcl_fp_subPre4L
-	.align	16, 0x90
-	.type	mcl_fp_subPre4L,@function
-mcl_fp_subPre4L:                        # @mcl_fp_subPre4L
-# BB#0:
+.LBB20_1:
+	movl	%edi, %ebp
+	movl	52(%esp), %eax
+	movl	%ebp, 44(%eax)
+	je	.LBB20_4
+.LBB20_3:
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 40(%eax)
+	je	.LBB20_6
+.LBB20_5:
+	movl	8(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 36(%eax)
+	je	.LBB20_8
+.LBB20_7:
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 32(%eax)
+	je	.LBB20_10
+.LBB20_9:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	jne	.LBB20_11
+	jmp	.LBB20_12
+.Lfunc_end20:
+	.size	mcl_fpDbl_add6L, .Lfunc_end20-mcl_fpDbl_add6L
+                                        # -- End function
+	.globl	mcl_fpDbl_sub6L                 # -- Begin function mcl_fpDbl_sub6L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub6L,@function
+mcl_fpDbl_sub6L:                        # @mcl_fpDbl_sub6L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %esi
-	xorl	%eax, %eax
-	movl	28(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %esi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edi), %ebx
-	movl	12(%edi), %edi
-	movl	12(%ecx), %ecx
-	movl	20(%esp), %ebp
-	movl	%edx, (%ebp)
-	movl	%esi, 4(%ebp)
-	movl	%ebx, 8(%ebp)
-	sbbl	%edi, %ecx
-	movl	%ecx, 12(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
+	subl	$36, %esp
+	movl	60(%esp), %eax
+	movl	(%eax), %esi
+	movl	4(%eax), %edi
+	xorl	%edx, %edx
+	movl	64(%esp), %ebx
+	subl	(%ebx), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	sbbl	4(%ebx), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	44(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	40(%eax), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	36(%eax), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	32(%eax), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	28(%eax), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	24(%eax), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	20(%eax), %esi
+	movl	16(%eax), %edi
+	movl	12(%eax), %ecx
+	movl	8(%eax), %ebp
+	sbbl	8(%ebx), %ebp
+	sbbl	12(%ebx), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	sbbl	16(%ebx), %edi
+	sbbl	20(%ebx), %esi
+	movl	%esi, %ecx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	24(%ebx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	28(%ebx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	32(%ebx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ebx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%ebx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	44(%ebx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	56(%esp), %esi
+	movl	%ecx, 20(%esi)
+	movl	%edi, 16(%esi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%esi)
+	movl	%ebp, 8(%esi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%esi)
+	sbbl	%edx, %edx
+	andl	$1, %edx
+	negl	%edx
+	movl	68(%esp), %eax
+	movl	20(%eax), %ecx
+	andl	%edx, %ecx
+	movl	16(%eax), %edi
+	andl	%edx, %edi
+	movl	12(%eax), %ebx
+	andl	%edx, %ebx
+	movl	8(%eax), %ebp
+	andl	%edx, %ebp
+	movl	68(%esp), %eax
+	movl	4(%eax), %eax
+	andl	%edx, %eax
+	movl	68(%esp), %esi
+	andl	(%esi), %edx
+	addl	4(%esp), %edx                   # 4-byte Folded Reload
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	56(%esp), %esi
+	movl	%edx, 24(%esi)
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%eax, 28(%esi)
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebp, 32(%esi)
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ebx, 36(%esi)
+	movl	%edi, 40(%esi)
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 44(%esi)
+	addl	$36, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end57:
-	.size	mcl_fp_subPre4L, .Lfunc_end57-mcl_fp_subPre4L
-
-	.globl	mcl_fp_shr1_4L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_4L,@function
-mcl_fp_shr1_4L:                         # @mcl_fp_shr1_4L
-# BB#0:
+.Lfunc_end21:
+	.size	mcl_fpDbl_sub6L, .Lfunc_end21-mcl_fpDbl_sub6L
+                                        # -- End function
+	.globl	mulPv224x32                     # -- Begin function mulPv224x32
+	.p2align	4, 0x90
+	.type	mulPv224x32,@function
+mulPv224x32:                            # @mulPv224x32
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	16(%esp), %eax
-	movl	12(%eax), %ecx
-	movl	8(%eax), %edx
-	movl	(%eax), %esi
-	movl	4(%eax), %eax
-	shrdl	$1, %eax, %esi
-	movl	12(%esp), %edi
-	movl	%esi, (%edi)
-	shrdl	$1, %edx, %eax
-	movl	%eax, 4(%edi)
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 8(%edi)
-	shrl	%ecx
-	movl	%ecx, 12(%edi)
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end58:
-	.size	mcl_fp_shr1_4L, .Lfunc_end58-mcl_fp_shr1_4L
-
-	.globl	mcl_fp_add4L
-	.align	16, 0x90
-	.type	mcl_fp_add4L,@function
-mcl_fp_add4L:                           # @mcl_fp_add4L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	28(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %ecx
-	movl	24(%esp), %esi
-	addl	(%esi), %eax
-	adcl	4(%esi), %ecx
-	movl	8(%edi), %edx
-	adcl	8(%esi), %edx
-	movl	12(%esi), %esi
-	adcl	12(%edi), %esi
-	movl	20(%esp), %edi
-	movl	%eax, (%edi)
-	movl	%ecx, 4(%edi)
-	movl	%edx, 8(%edi)
-	movl	%esi, 12(%edi)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	32(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %ecx
-	sbbl	8(%ebp), %edx
-	sbbl	12(%ebp), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB59_2
-# BB#1:                                 # %nocarry
-	movl	%eax, (%edi)
-	movl	%ecx, 4(%edi)
-	movl	%edx, 8(%edi)
-	movl	%esi, 12(%edi)
-.LBB59_2:                               # %carry
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end59:
-	.size	mcl_fp_add4L, .Lfunc_end59-mcl_fp_add4L
-
-	.globl	mcl_fp_addNF4L
-	.align	16, 0x90
-	.type	mcl_fp_addNF4L,@function
-mcl_fp_addNF4L:                         # @mcl_fp_addNF4L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$8, %esp
-	movl	36(%esp), %edx
-	movl	(%edx), %esi
-	movl	4(%edx), %ecx
-	movl	32(%esp), %edi
-	addl	(%edi), %esi
-	adcl	4(%edi), %ecx
-	movl	12(%edx), %ebp
-	movl	8(%edx), %edx
-	adcl	8(%edi), %edx
-	adcl	12(%edi), %ebp
-	movl	40(%esp), %eax
-	movl	%esi, %ebx
-	subl	(%eax), %ebx
-	movl	%ecx, %edi
-	sbbl	4(%eax), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
+	subl	$36, %esp
+	movl	64(%esp), %esi
+	movl	60(%esp), %ebx
+	movl	%esi, %eax
+	mull	24(%ebx)
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	20(%ebx)
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	16(%ebx)
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	12(%ebx)
+	movl	%edx, %ebp
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%esi, %eax
+	mull	8(%ebx)
+	movl	%edx, %ecx
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%esi, %eax
+	mull	4(%ebx)
 	movl	%edx, %edi
-	movl	40(%esp), %eax
-	sbbl	8(%eax), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%ebp, %edi
-	movl	40(%esp), %eax
-	sbbl	12(%eax), %edi
-	testl	%edi, %edi
-	js	.LBB60_2
-# BB#1:
-	movl	%ebx, %esi
-.LBB60_2:
-	movl	28(%esp), %ebx
-	movl	%esi, (%ebx)
-	js	.LBB60_4
-# BB#3:
-	movl	(%esp), %ecx            # 4-byte Reload
-.LBB60_4:
-	movl	%ecx, 4(%ebx)
-	js	.LBB60_6
-# BB#5:
-	movl	4(%esp), %edx           # 4-byte Reload
-.LBB60_6:
-	movl	%edx, 8(%ebx)
-	js	.LBB60_8
-# BB#7:
-	movl	%edi, %ebp
-.LBB60_8:
-	movl	%ebp, 12(%ebx)
-	addl	$8, %esp
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%esi, %eax
+	mull	(%ebx)
+	movl	56(%esp), %esi
+	movl	%eax, (%esi)
+	addl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 4(%esi)
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 8(%esi)
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 12(%esi)
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%esi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 28(%esi)
+	movl	%esi, %eax
+	addl	$36, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
-	retl
-.Lfunc_end60:
-	.size	mcl_fp_addNF4L, .Lfunc_end60-mcl_fp_addNF4L
-
-	.globl	mcl_fp_sub4L
-	.align	16, 0x90
-	.type	mcl_fp_sub4L,@function
-mcl_fp_sub4L:                           # @mcl_fp_sub4L
-# BB#0:
+	retl	$4
+.Lfunc_end22:
+	.size	mulPv224x32, .Lfunc_end22-mulPv224x32
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre7L             # -- Begin function mcl_fp_mulUnitPre7L
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre7L,@function
+mcl_fp_mulUnitPre7L:                    # @mcl_fp_mulUnitPre7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	28(%esp), %edi
-	subl	(%edi), %eax
-	sbbl	4(%edi), %ecx
-	movl	8(%esi), %edx
-	sbbl	8(%edi), %edx
-	movl	12(%esi), %esi
-	sbbl	12(%edi), %esi
-	movl	20(%esp), %edi
-	movl	%eax, (%edi)
-	movl	%ecx, 4(%edi)
-	movl	%edx, 8(%edi)
-	movl	%esi, 12(%edi)
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	je	.LBB61_2
-# BB#1:                                 # %carry
-	movl	32(%esp), %ebx
-	addl	(%ebx), %eax
-	movl	8(%ebx), %ebp
-	adcl	4(%ebx), %ecx
-	movl	12(%ebx), %ebx
-	movl	%eax, (%edi)
-	movl	%ecx, 4(%edi)
-	adcl	%edx, %ebp
-	movl	%ebp, 8(%edi)
-	adcl	%esi, %ebx
-	movl	%ebx, 12(%edi)
-.LBB61_2:                               # %nocarry
+	subl	$36, %esp
+	movl	64(%esp), %esi
+	movl	60(%esp), %ebx
+	movl	%esi, %eax
+	mull	24(%ebx)
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	20(%ebx)
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	16(%ebx)
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	12(%ebx)
+	movl	%edx, %ebp
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%esi, %eax
+	mull	8(%ebx)
+	movl	%edx, %ecx
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%esi, %eax
+	mull	4(%ebx)
+	movl	%edx, %edi
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%esi, %eax
+	mull	(%ebx)
+	movl	56(%esp), %esi
+	movl	%eax, (%esi)
+	addl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 4(%esi)
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 8(%esi)
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 12(%esi)
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%esi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 28(%esi)
+	addl	$36, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end61:
-	.size	mcl_fp_sub4L, .Lfunc_end61-mcl_fp_sub4L
-
-	.globl	mcl_fp_subNF4L
-	.align	16, 0x90
-	.type	mcl_fp_subNF4L,@function
-mcl_fp_subNF4L:                         # @mcl_fp_subNF4L
-# BB#0:
+.Lfunc_end23:
+	.size	mcl_fp_mulUnitPre7L, .Lfunc_end23-mcl_fp_mulUnitPre7L
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre7L              # -- Begin function mcl_fpDbl_mulPre7L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre7L,@function
+mcl_fpDbl_mulPre7L:                     # @mcl_fpDbl_mulPre7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$8, %esp
-	movl	32(%esp), %eax
-	movl	(%eax), %edx
+	subl	$112, %esp
+	movl	136(%esp), %ecx
+	movl	(%ecx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	140(%esp), %edx
+	movl	(%edx), %edi
+	mull	%edi
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	132(%esp), %edx
+	movl	%eax, (%edx)
+	movl	4(%ecx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %esi
+	movl	20(%ecx), %ebx
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	movl	24(%ecx), %ebp
+	movl	140(%esp), %eax
 	movl	4(%eax), %ecx
-	movl	36(%esp), %esi
-	subl	(%esi), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	sbbl	4(%esi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	12(%eax), %edi
-	movl	8(%eax), %edx
-	sbbl	8(%esi), %edx
-	sbbl	12(%esi), %edi
-	movl	%edi, %esi
-	sarl	$31, %esi
-	movl	40(%esp), %eax
-	movl	12(%eax), %ebp
-	andl	%esi, %ebp
-	movl	8(%eax), %ecx
-	andl	%esi, %ecx
-	movl	40(%esp), %eax
-	movl	4(%eax), %eax
-	andl	%esi, %eax
-	movl	40(%esp), %ebx
-	andl	(%ebx), %esi
-	addl	(%esp), %esi            # 4-byte Folded Reload
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	28(%esp), %ebx
-	movl	%esi, (%ebx)
-	adcl	%edx, %ecx
-	movl	%eax, 4(%ebx)
-	movl	%ecx, 8(%ebx)
-	adcl	%edi, %ebp
-	movl	%ebp, 12(%ebx)
-	addl	$8, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end62:
-	.size	mcl_fp_subNF4L, .Lfunc_end62-mcl_fp_subNF4L
-
-	.globl	mcl_fpDbl_add4L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add4L,@function
-mcl_fpDbl_add4L:                        # @mcl_fpDbl_add4L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$12, %esp
-	movl	40(%esp), %eax
-	movl	(%eax), %edi
-	movl	4(%eax), %edx
-	movl	36(%esp), %esi
-	addl	(%esi), %edi
-	adcl	4(%esi), %edx
-	movl	8(%eax), %ebx
-	adcl	8(%esi), %ebx
-	movl	12(%esi), %ebp
-	movl	32(%esp), %ecx
-	movl	%edi, (%ecx)
-	movl	16(%esi), %edi
-	adcl	12(%eax), %ebp
-	adcl	16(%eax), %edi
-	movl	%edx, 4(%ecx)
-	movl	28(%eax), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%ebx, 8(%ecx)
-	movl	24(%eax), %ebx
-	movl	20(%eax), %eax
-	movl	%ebp, 12(%ecx)
-	movl	20(%esi), %edx
-	adcl	%eax, %edx
-	movl	28(%esi), %ecx
-	movl	24(%esi), %ebp
-	adcl	%ebx, %ebp
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	44(%esp), %eax
-	movl	%edi, %esi
-	subl	(%eax), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%ebp
+	movl	%ebp, 72(%esp)                  # 4-byte Spill
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%ebx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%esi
+	movl	%esi, 80(%esp)                  # 4-byte Spill
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	%ebp, %eax
+	mull	%edi
+	movl	%edx, %ebp
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	mull	%edi
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	%esi, %eax
+	mull	%edi
+	movl	%edx, %ebx
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	mull	%edi
 	movl	%edx, %esi
-	sbbl	4(%eax), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	%ebp, %esi
-	sbbl	8(%eax), %esi
-	sbbl	12(%eax), %ecx
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB63_2
-# BB#1:
-	movl	%esi, %ebp
-.LBB63_2:
-	testb	%bl, %bl
-	jne	.LBB63_4
-# BB#3:
-	movl	(%esp), %edi            # 4-byte Reload
-.LBB63_4:
-	movl	32(%esp), %eax
-	movl	%edi, 16(%eax)
-	jne	.LBB63_6
-# BB#5:
-	movl	4(%esp), %edx           # 4-byte Reload
-.LBB63_6:
-	movl	%edx, 20(%eax)
-	movl	%ebp, 24(%eax)
-	movl	8(%esp), %edx           # 4-byte Reload
-	jne	.LBB63_8
-# BB#7:
-	movl	%ecx, %edx
-.LBB63_8:
-	movl	%edx, 28(%eax)
-	addl	$12, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end63:
-	.size	mcl_fpDbl_add4L, .Lfunc_end63-mcl_fpDbl_add4L
-
-	.globl	mcl_fpDbl_sub4L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub4L,@function
-mcl_fpDbl_sub4L:                        # @mcl_fpDbl_sub4L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	pushl	%eax
-	movl	28(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	32(%esp), %ebp
-	subl	(%ebp), %edx
-	sbbl	4(%ebp), %esi
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	84(%esp), %eax                  # 4-byte Reload
+	mull	%edi
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, %ecx
+	movl	64(%esp), %eax                  # 4-byte Reload
+	mull	%edi
+	addl	8(%esp), %eax                   # 4-byte Folded Reload
+	adcl	%ecx, %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	92(%esp), %edi                  # 4-byte Folded Reload
+	adcl	96(%esp), %esi                  # 4-byte Folded Reload
+	adcl	100(%esp), %ebx                 # 4-byte Folded Reload
+	movl	104(%esp), %ecx                 # 4-byte Reload
+	adcl	%ecx, 20(%esp)                  # 4-byte Folded Spill
+	movl	%ebp, %ecx
+	adcl	$0, %ecx
+	addl	108(%esp), %eax                 # 4-byte Folded Reload
+	movl	132(%esp), %edx
+	movl	%eax, 4(%edx)
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	76(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	setb	%al
+	addl	52(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	44(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	adcl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movzbl	%al, %eax
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	140(%esp), %eax
 	movl	8(%eax), %ebx
-	sbbl	8(%ebp), %ebx
-	movl	24(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	12(%eax), %edx
-	sbbl	12(%ebp), %edx
-	movl	%esi, 4(%ecx)
-	movl	16(%eax), %edi
-	sbbl	16(%ebp), %edi
+	movl	%ebx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, %ecx
+	movl	%ebx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ebx
+	addl	%ecx, %edx
+	movl	%edx, %ecx
+	adcl	12(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	44(%esp), %eax                  # 4-byte Folded Reload
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	$0, %edx
+	addl	%ebp, %ebx
+	adcl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	132(%esp), %ecx
 	movl	%ebx, 8(%ecx)
-	movl	20(%ebp), %esi
-	movl	%edx, 12(%ecx)
-	movl	20(%eax), %ebx
-	sbbl	%esi, %ebx
-	movl	24(%ebp), %edx
-	movl	24(%eax), %esi
-	sbbl	%edx, %esi
-	movl	28(%ebp), %edx
-	movl	28(%eax), %eax
-	sbbl	%edx, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	$0, %edx
-	sbbl	$0, %edx
-	andl	$1, %edx
-	movl	36(%esp), %ecx
-	movl	(%ecx), %eax
-	jne	.LBB64_1
-# BB#2:
-	xorl	%ebp, %ebp
-	jmp	.LBB64_3
-.LBB64_1:
-	movl	4(%ecx), %ebp
-.LBB64_3:
-	testb	%dl, %dl
-	jne	.LBB64_5
-# BB#4:
-	movl	$0, %eax
-.LBB64_5:
-	jne	.LBB64_6
-# BB#7:
-	movl	$0, %edx
-	jmp	.LBB64_8
-.LBB64_6:
-	movl	12(%ecx), %edx
-.LBB64_8:
-	jne	.LBB64_9
-# BB#10:
-	xorl	%ecx, %ecx
-	jmp	.LBB64_11
-.LBB64_9:
-	movl	8(%ecx), %ecx
-.LBB64_11:
-	addl	%edi, %eax
-	adcl	%ebx, %ebp
-	movl	24(%esp), %edi
-	movl	%eax, 16(%edi)
-	adcl	%esi, %ecx
-	movl	%ebp, 20(%edi)
-	movl	%ecx, 24(%edi)
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 28(%edi)
-	addl	$4, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end64:
-	.size	mcl_fpDbl_sub4L, .Lfunc_end64-mcl_fpDbl_sub4L
-
-	.globl	mcl_fp_mulUnitPre5L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre5L,@function
-mcl_fp_mulUnitPre5L:                    # @mcl_fp_mulUnitPre5L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$20, %esp
-	movl	48(%esp), %esi
-	movl	44(%esp), %ecx
-	movl	%esi, %eax
-	mull	16(%ecx)
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	12(%ecx)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 16(%esp)                  # 4-byte Folded Spill
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	$0, %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	140(%esp), %eax
+	movl	12(%eax), %ecx
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ebx
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	8(%ecx)
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %edi
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	4(%ecx)
+	movl	%eax, %ebp
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	addl	%ebp, %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	adcl	76(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, %ebp
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, %ebx
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	addl	44(%esp), %eax                  # 4-byte Folded Reload
+	movl	132(%esp), %edx
+	movl	%eax, 12(%edx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 8(%esp)                   # 4-byte Folded Spill
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	$0, %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	140(%esp), %eax
+	movl	16(%eax), %ebx
+	movl	%ebx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ebx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, %edi
+	movl	%ebx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ebp
+	addl	%edi, %edx
+	movl	%edx, %edi
+	adcl	44(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	76(%esp), %esi                  # 4-byte Folded Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %ebx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	20(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	132(%esp), %edi
+	movl	%ebp, 16(%edi)
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	%edi, 44(%esp)                  # 4-byte Folded Spill
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	$0, %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	140(%esp), %eax
+	movl	20(%eax), %ecx
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, %ebp
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ecx
+	addl	%ebp, %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	76(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %eax
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	52(%esp), %ebp                  # 4-byte Folded Reload
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	56(%esp), %edx                  # 4-byte Folded Reload
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	$0, %edi
+	addl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	132(%esp), %ecx
+	movl	4(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 20(%ecx)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	adcl	$0, %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	140(%esp), %eax
+	movl	24(%eax), %edi
+	movl	%edi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	60(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ebp
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%esi, %eax
-	mull	(%ecx)
-	movl	40(%esp), %ecx
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 8(%ecx)
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 12(%ecx)
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%ecx)
-	movl	16(%esp), %eax          # 4-byte Reload
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	64(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, %ebx
+	movl	%edi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	addl	16(%esp), %esi                  # 4-byte Folded Reload
+	adcl	64(%esp), %eax                  # 4-byte Folded Reload
+	adcl	68(%esp), %edx                  # 4-byte Folded Reload
+	adcl	80(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	%edi, 60(%esp)                  # 4-byte Folded Spill
+	adcl	$0, 72(%esp)                    # 4-byte Folded Spill
+	addl	20(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	132(%esp), %edi
+	movl	%ebx, 24(%edi)
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%esi, 28(%edi)
+	adcl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%eax, 32(%edi)
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edx, 36(%edi)
+	adcl	40(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ecx, 40(%edi)
+	movl	%ebp, 44(%edi)
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 48(%edi)
+	movl	72(%esp), %eax                  # 4-byte Reload
 	adcl	$0, %eax
-	movl	%eax, 20(%ecx)
-	addl	$20, %esp
+	movl	%eax, 52(%edi)
+	addl	$112, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end65:
-	.size	mcl_fp_mulUnitPre5L, .Lfunc_end65-mcl_fp_mulUnitPre5L
-
-	.globl	mcl_fpDbl_mulPre5L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre5L,@function
-mcl_fpDbl_mulPre5L:                     # @mcl_fpDbl_mulPre5L
-# BB#0:
+.Lfunc_end24:
+	.size	mcl_fpDbl_mulPre7L, .Lfunc_end24-mcl_fpDbl_mulPre7L
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre7L              # -- Begin function mcl_fpDbl_sqrPre7L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre7L,@function
+mcl_fpDbl_sqrPre7L:                     # @mcl_fpDbl_sqrPre7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$64, %esp
-	movl	88(%esp), %esi
-	movl	(%esi), %ebp
-	movl	92(%esp), %eax
-	movl	(%eax), %ebx
-	movl	%eax, %edi
+	subl	$244, %esp
+	movl	268(%esp), %edi
+	movl	24(%edi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	(%edi), %ecx
+	mull	%ecx
+	movl	%eax, 216(%esp)                 # 4-byte Spill
+	movl	%edx, 212(%esp)                 # 4-byte Spill
+	movl	20(%edi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	mull	%ecx
+	movl	%eax, 208(%esp)                 # 4-byte Spill
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	16(%edi), %ebp
 	movl	%ebp, %eax
-	mull	%ebx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx
-	movl	%eax, (%edx)
-	movl	%esi, %eax
-	movl	4(%eax), %esi
-	movl	8(%eax), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	12(%eax), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	16(%eax), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	4(%edi), %edi
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	mull	%ecx
+	movl	%eax, 192(%esp)                 # 4-byte Spill
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	12(%edi), %esi
 	movl	%esi, %eax
-	mull	%edi
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	mull	%ecx
+	movl	%eax, 184(%esp)                 # 4-byte Spill
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	4(%edi), %ebx
+	movl	8(%edi), %edi
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%edx, 236(%esp)                 # 4-byte Spill
+	movl	%eax, 160(%esp)                 # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	mull	%ebx
+	movl	%edx, 204(%esp)                 # 4-byte Spill
+	movl	%eax, 200(%esp)                 # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	mull	%ebx
+	movl	%edx, 196(%esp)                 # 4-byte Spill
+	movl	%eax, 188(%esp)                 # 4-byte Spill
 	movl	%ebp, %eax
-	mull	%edi
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
 	mull	%ebx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	(%esp), %esi            # 4-byte Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 180(%esp)                 # 4-byte Spill
 	movl	%esi, %eax
-	mull	%edi
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
 	mull	%ebx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 148(%esp)                 # 4-byte Spill
+	movl	%edi, %eax
 	mull	%ebx
-	movl	%edx, %esi
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ecx, %eax
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
 	mull	%ebx
-	movl	%eax, %edi
-	movl	%edx, %ebx
-	addl	36(%esp), %ebp          # 4-byte Folded Reload
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	84(%esp), %eax
-	movl	%ebp, 4(%eax)
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	addl	52(%esp), %edi          # 4-byte Folded Reload
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	88(%esp), %eax
-	movl	%eax, %esi
-	movl	16(%esi), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	8(%eax), %ecx
-	movl	%edx, %eax
-	mull	%ecx
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
 	mull	%ecx
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	movl	8(%edx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 140(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
 	mull	%ecx
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edx, 12(%esp)          # 4-byte Spill
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	264(%esp), %ecx
+	movl	%eax, (%ecx)
+	movl	16(%esp), %esi                  # 4-byte Reload
 	movl	%esi, %eax
-	movl	(%eax), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	4(%eax), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	mull	%ebp
+	movl	%edx, 164(%esp)                 # 4-byte Spill
+	movl	%eax, 172(%esp)                 # 4-byte Spill
 	movl	%esi, %eax
+	movl	40(%esp), %ebx                  # 4-byte Reload
+	mull	%ebx
+	movl	%edx, 168(%esp)                 # 4-byte Spill
+	movl	%eax, 156(%esp)                 # 4-byte Spill
+	movl	%esi, %eax
+	movl	36(%esp), %ecx                  # 4-byte Reload
 	mull	%ecx
-	addl	%edi, %eax
-	movl	84(%esp), %ecx
-	movl	%eax, 8(%ecx)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	%ebx, %ecx
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	%ebp, %eax
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	addl	%edx, %ecx
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	12(%eax), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	mull	%ebp
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	mull	%ebp
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	mull	%ebp
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	24(%esp), %eax          # 4-byte Reload
-	mull	%ebp
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	mull	%ebp
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	addl	%ecx, %eax
-	movl	84(%esp), %edx
-	movl	%eax, 12(%edx)
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 60(%esp)          # 4-byte Folded Spill
-	movl	92(%esp), %eax
-	movl	16(%eax), %ebp
-	sbbl	%ecx, %ecx
-	movl	%ebp, %eax
-	movl	88(%esp), %esi
-	mull	16(%esi)
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
+	movl	%edx, 88(%esp)                  # 4-byte Spill
+	movl	%eax, 152(%esp)                 # 4-byte Spill
+	movl	%esi, %eax
+	mull	%edi
+	movl	%edx, 84(%esp)                  # 4-byte Spill
+	movl	%eax, 144(%esp)                 # 4-byte Spill
+	movl	%esi, %eax
+	mull	%esi
+	movl	%edx, 240(%esp)                 # 4-byte Spill
+	movl	%eax, 176(%esp)                 # 4-byte Spill
 	movl	%ebp, %eax
-	mull	12(%esi)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
+	mull	%ebx
+	movl	%edx, 136(%esp)                 # 4-byte Spill
+	movl	%eax, 120(%esp)                 # 4-byte Spill
 	movl	%ebp, %eax
-	mull	8(%esi)
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
+	mull	%ecx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 132(%esp)                 # 4-byte Spill
 	movl	%ebp, %eax
-	mull	4(%esi)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
+	mull	%edi
+	movl	%edx, 128(%esp)                 # 4-byte Spill
+	movl	%eax, 124(%esp)                 # 4-byte Spill
 	movl	%ebp, %eax
-	mull	(%esi)
-	movl	%eax, %ebp
-	andl	$1, %ecx
-	addl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	addl	%ebp, %edi
-	movl	84(%esp), %ebp
-	movl	%edi, 16(%ebp)
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, %edi
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	%edx, %ebx
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%ebp, %edx
-	movl	%ebx, 20(%edx)
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 24(%edx)
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 28(%edx)
-	movl	%ecx, 32(%edx)
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%edx)
-	addl	$64, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end66:
-	.size	mcl_fpDbl_mulPre5L, .Lfunc_end66-mcl_fpDbl_mulPre5L
-
-	.globl	mcl_fpDbl_sqrPre5L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre5L,@function
-mcl_fpDbl_sqrPre5L:                     # @mcl_fpDbl_sqrPre5L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$60, %esp
-	movl	84(%esp), %ebx
-	movl	16(%ebx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	(%ebx), %edi
-	movl	4(%ebx), %ecx
-	mull	%ecx
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	12(%ebx), %esi
-	movl	%esi, %eax
-	mull	%ecx
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	8(%ebx), %ebx
+	mull	%ebp
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	%eax, 76(%esp)                  # 4-byte Spill
 	movl	%ebx, %eax
 	mull	%ecx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%esi, %eax
+	movl	%edx, 108(%esp)                 # 4-byte Spill
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	%ebx, %eax
 	mull	%edi
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 112(%esp)                 # 4-byte Spill
 	movl	%ebx, %eax
+	mull	%ebx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
 	mull	%edi
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
+	movl	%edx, 104(%esp)                 # 4-byte Spill
+	movl	%eax, 100(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
 	mull	%ecx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	%eax, %ecx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
 	movl	%edi, %eax
 	mull	%edi
-	movl	80(%esp), %edi
-	movl	%eax, (%edi)
-	addl	%ecx, %edx
-	adcl	%esi, %ebp
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	52(%esp), %eax          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	addl	%ecx, 44(%esp)                  # 4-byte Folded Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	148(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 228(%esp)                 # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	180(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 224(%esp)                 # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	188(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 220(%esp)                 # 4-byte Spill
+	movl	196(%esp), %eax                 # 4-byte Reload
+	adcl	200(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 232(%esp)                 # 4-byte Spill
+	movl	204(%esp), %eax                 # 4-byte Reload
 	adcl	$0, %eax
-	addl	%ecx, %edx
-	movl	80(%esp), %ecx
-	movl	%edx, 4(%ecx)
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%esi, %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	addl	(%esp), %ebp            # 4-byte Folded Reload
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	44(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx
-	movl	12(%ecx), %edi
-	movl	%edi, %eax
-	mull	%ebx
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	(%eax), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	4(%eax), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	mull	%ebx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%ebx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%eax, %ecx
-	movl	%ebx, %eax
-	mull	%ebx
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	addl	%ebp, %ecx
-	movl	80(%esp), %eax
-	movl	%ecx, 8(%eax)
-	movl	32(%esp), %ebx          # 4-byte Reload
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	%esi, %eax
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	addl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	adcl	%edx, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	8(%eax), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	16(%eax), %ebx
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	addl	140(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	160(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	236(%esp), %ebx                 # 4-byte Reload
 	movl	%ebx, %eax
-	mull	%edi
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	28(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	24(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	%edi, %eax
-	mull	%edi
+	adcl	184(%esp), %eax                 # 4-byte Folded Reload
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	192(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	208(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, %edx
+	movl	60(%esp), %esi                  # 4-byte Reload
+	adcl	216(%esp), %esi                 # 4-byte Folded Reload
+	movl	212(%esp), %edi                 # 4-byte Reload
+	adcl	$0, %edi
+	movl	(%esp), %ebp                    # 4-byte Reload
+	addl	140(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %ebp                    # 4-byte Reload
+	movl	264(%esp), %ecx
+	movl	%ebp, 4(%ecx)
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	228(%esp), %ecx                 # 4-byte Reload
+	adcl	%ecx, 92(%esp)                  # 4-byte Folded Spill
+	adcl	224(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	adcl	220(%esp), %esi                 # 4-byte Folded Reload
+	adcl	232(%esp), %edi                 # 4-byte Folded Reload
+	adcl	$0, 96(%esp)                    # 4-byte Folded Spill
+	addl	24(%esp), %ebx                  # 4-byte Folded Reload
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	104(%esp), %edx                 # 4-byte Reload
+	adcl	112(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	124(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	128(%esp), %edx                 # 4-byte Reload
+	adcl	144(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	84(%esp), %edx                  # 4-byte Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	$0, (%esp)                      # 4-byte Folded Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	addl	160(%esp), %edx                 # 4-byte Folded Reload
+	adcl	%eax, %ebx
+	movl	264(%esp), %eax
+	movl	%edx, 8(%eax)
+	adcl	92(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	%esi, 24(%esp)                  # 4-byte Folded Spill
+	adcl	%edi, 28(%esp)                  # 4-byte Folded Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	$0, (%esp)                      # 4-byte Folded Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	148(%esp), %edx                 # 4-byte Folded Reload
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	100(%esp), %esi                 # 4-byte Folded Reload
+	movl	68(%esp), %ebp                  # 4-byte Reload
+	adcl	104(%esp), %ebp                 # 4-byte Folded Reload
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	116(%esp), %ecx                 # 4-byte Folded Reload
+	movl	108(%esp), %eax                 # 4-byte Reload
+	adcl	132(%esp), %eax                 # 4-byte Folded Reload
 	movl	%eax, %edi
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	addl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	80(%esp), %eax
-	movl	%ecx, 12(%eax)
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	sbbl	%ecx, %ecx
-	movl	%ebx, %eax
-	movl	84(%esp), %edx
-	mull	12(%edx)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	84(%esp), %edx
-	mull	4(%edx)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	movl	84(%esp), %edx
-	mull	(%edx)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ebx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	andl	$1, %ecx
-	addl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	addl	(%esp), %ebp            # 4-byte Folded Reload
-	movl	80(%esp), %ebx
-	movl	%ebp, 16(%ebx)
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%eax, %ebp
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	12(%esp), %esi          # 4-byte Folded Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 20(%ebx)
-	adcl	%edx, %ebp
-	movl	%edi, 24(%ebx)
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebp, 28(%ebx)
-	movl	%ecx, 32(%ebx)
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	addl	$60, %esp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	152(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	88(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	184(%esp), %ebx                 # 4-byte Folded Reload
+	adcl	64(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	264(%esp), %edx
+	movl	%ebx, 12(%edx)
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 68(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	%ecx, 4(%esp)                   # 4-byte Folded Spill
+	adcl	$0, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	56(%esp), %ebx                  # 4-byte Reload
+	addl	180(%esp), %ebx                 # 4-byte Folded Reload
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	112(%esp), %ebp                 # 4-byte Folded Reload
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	116(%esp), %edi                 # 4-byte Folded Reload
+	movl	72(%esp), %edx                  # 4-byte Reload
+	adcl	108(%esp), %edx                 # 4-byte Folded Reload
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	120(%esp), %ecx                 # 4-byte Folded Reload
+	movl	136(%esp), %esi                 # 4-byte Reload
+	adcl	156(%esp), %esi                 # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	168(%esp), %esi                 # 4-byte Reload
+	adcl	$0, %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	addl	192(%esp), %eax                 # 4-byte Folded Reload
+	adcl	48(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	movl	264(%esp), %ebx
+	movl	%eax, 16(%ebx)
+	adcl	68(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	%eax, (%esp)                    # 4-byte Folded Spill
+	adcl	$0, %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edi                  # 4-byte Reload
+	addl	188(%esp), %edi                 # 4-byte Folded Reload
+	movl	196(%esp), %ecx                 # 4-byte Reload
+	adcl	124(%esp), %ecx                 # 4-byte Folded Reload
+	movl	128(%esp), %ebx                 # 4-byte Reload
+	adcl	132(%esp), %ebx                 # 4-byte Folded Reload
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	120(%esp), %ebp                 # 4-byte Folded Reload
+	movl	76(%esp), %esi                  # 4-byte Reload
+	adcl	136(%esp), %esi                 # 4-byte Folded Reload
+	movl	80(%esp), %edx                  # 4-byte Reload
+	adcl	172(%esp), %edx                 # 4-byte Folded Reload
+	movl	164(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	addl	208(%esp), %eax                 # 4-byte Folded Reload
+	adcl	52(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	264(%esp), %edi
+	movl	%eax, 20(%edi)
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %edi
+	adcl	72(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	40(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	%esi, 76(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	adcl	$0, 4(%esp)                     # 4-byte Folded Spill
+	movl	212(%esp), %ecx                 # 4-byte Reload
+	addl	200(%esp), %ecx                 # 4-byte Folded Reload
+	movl	204(%esp), %esi                 # 4-byte Reload
+	adcl	144(%esp), %esi                 # 4-byte Folded Reload
+	movl	152(%esp), %eax                 # 4-byte Reload
+	adcl	%eax, 84(%esp)                  # 4-byte Folded Spill
+	movl	156(%esp), %eax                 # 4-byte Reload
+	adcl	%eax, 88(%esp)                  # 4-byte Folded Spill
+	movl	168(%esp), %ebp                 # 4-byte Reload
+	adcl	172(%esp), %ebp                 # 4-byte Folded Reload
+	movl	164(%esp), %eax                 # 4-byte Reload
+	adcl	%eax, 176(%esp)                 # 4-byte Folded Spill
+	movl	240(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	60(%esp), %edx                  # 4-byte Reload
+	addl	216(%esp), %edx                 # 4-byte Folded Reload
+	adcl	%edi, %ecx
+	movl	264(%esp), %edi
+	movl	%edx, 24(%edi)
+	movl	%esi, %edx
+	adcl	%ebx, %edx
+	movl	%ecx, 28(%edi)
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edx, 32(%edi)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	adcl	76(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 36(%edi)
+	movl	%ebp, %ecx
+	adcl	80(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edx, 40(%edi)
+	movl	%ecx, 44(%edi)
+	movl	176(%esp), %ecx                 # 4-byte Reload
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 48(%edi)
+	adcl	$0, %eax
+	movl	%eax, 52(%edi)
+	addl	$244, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end67:
-	.size	mcl_fpDbl_sqrPre5L, .Lfunc_end67-mcl_fpDbl_sqrPre5L
-
-	.globl	mcl_fp_mont5L
-	.align	16, 0x90
-	.type	mcl_fp_mont5L,@function
-mcl_fp_mont5L:                          # @mcl_fp_mont5L
-# BB#0:
+.Lfunc_end25:
+	.size	mcl_fpDbl_sqrPre7L, .Lfunc_end25-mcl_fpDbl_sqrPre7L
+                                        # -- End function
+	.globl	mcl_fp_mont7L                   # -- Begin function mcl_fp_mont7L
+	.p2align	4, 0x90
+	.type	mcl_fp_mont7L,@function
+mcl_fp_mont7L:                          # @mcl_fp_mont7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$112, %esp
-	movl	136(%esp), %ebx
-	movl	(%ebx), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	140(%esp), %ecx
-	movl	(%ecx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	144(%esp), %esi
-	movl	-4(%esi), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	imull	%edx, %ecx
-	movl	(%esi), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	16(%esi), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	8(%esi), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	4(%esi), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	4(%ebx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	16(%ebx), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	12(%ebx), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	8(%ebx), %ebx
-	movl	%ebx, 76(%esp)          # 4-byte Spill
+	subl	$152, %esp
+	movl	176(%esp), %edi
+	movl	(%edi), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	180(%esp), %ecx
+	movl	(%ecx), %esi
+	mull	%esi
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	184(%esp), %ebx
+	movl	-4(%ebx), %ecx
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	imull	%eax, %ecx
+	movl	24(%ebx), %edx
+	movl	%edx, 132(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
 	mull	%edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %edx
+	movl	%edx, 128(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
+	mull	%edx
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	16(%ebx), %edx
+	movl	%edx, 124(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
+	mull	%edx
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	12(%ebx), %edx
+	movl	%edx, 120(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
-	mull	%esi
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
+	mull	%edx
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	8(%ebx), %edx
+	movl	%edx, 112(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	movl	32(%esp), %ecx          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	%ecx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ecx
+	mull	%edx
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	(%ebx), %ebp
+	movl	%ebp, 108(%esp)                 # 4-byte Spill
+	movl	4(%ebx), %edx
+	movl	%edx, 116(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%ebp
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%edi, %ebx
+	movl	24(%edi), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	20(%edi), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, 148(%esp)                 # 4-byte Spill
+	movl	%edx, %edi
+	movl	16(%ebx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	mull	%esi
+	movl	%eax, 144(%esp)                 # 4-byte Spill
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	12(%ebx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, 140(%esp)                 # 4-byte Spill
 	movl	%edx, %ebp
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	mull	%ecx
+	movl	4(%ebx), %ecx
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	8(%ebx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	mull	%esi
 	movl	%edx, %ebx
-	movl	%eax, %edi
-	addl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 60(%esp)          # 4-byte Folded Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	addl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	16(%esp), %ebp          # 4-byte Reload
-	addl	36(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%edi, 40(%esp)          # 4-byte Folded Spill
-	adcl	%ebx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	140(%esp), %eax
+	movl	%eax, 136(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%esi
+	addl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	136(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 136(%esp)                 # 4-byte Spill
+	adcl	140(%esp), %ebx                 # 4-byte Folded Reload
+	movl	%ebx, 140(%esp)                 # 4-byte Spill
+	adcl	144(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%ebp, 144(%esp)                 # 4-byte Spill
+	movl	148(%esp), %eax                 # 4-byte Reload
+	adcl	%eax, (%esp)                    # 4-byte Folded Spill
+	adcl	88(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 88(%esp)                  # 4-byte Spill
+	adcl	$0, 8(%esp)                     # 4-byte Folded Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	addl	68(%esp), %ebp                  # 4-byte Folded Reload
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	48(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	64(%esp), %ebx                  # 4-byte Folded Reload
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	movl	60(%esp), %eax                  # 4-byte Reload
+	addl	24(%esp), %eax                  # 4-byte Folded Reload
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	adcl	136(%esp), %edi                 # 4-byte Folded Reload
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	adcl	140(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	144(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	adcl	88(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	setb	64(%esp)                        # 1-byte Folded Spill
+	movl	180(%esp), %eax
 	movl	4(%eax), %edi
 	movl	%edi, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
 	movl	%edi, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 44(%esp)                  # 4-byte Spill
 	movl	%edi, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 24(%esp)          # 4-byte Spill
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, %ecx
 	movl	%edi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
+	mull	96(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %esi
 	movl	%eax, %ebp
 	movl	%edi, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%eax, %ebx
+	mull	84(%esp)                        # 4-byte Folded Reload
 	addl	%ebp, %edx
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%ebx, %ebp
-	imull	96(%esp), %ebp          # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, %ebx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	adcl	%ecx, %esi
+	movl	%esi, %ebp
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	60(%esp), %esi                  # 4-byte Folded Reload
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	adcl	36(%esp), %ebx                  # 4-byte Folded Reload
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	$0, %ecx
+	addl	40(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 48(%esp)                  # 4-byte Folded Spill
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movzbl	64(%esp), %eax                  # 1-byte Folded Reload
+	adcl	%eax, %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	setb	32(%esp)                        # 1-byte Folded Spill
+	movl	92(%esp), %ebp                  # 4-byte Reload
+	imull	36(%esp), %ebp                  # 4-byte Folded Reload
 	movl	%ebp, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
+	mull	132(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
 	movl	%ebp, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 32(%esp)          # 4-byte Spill
+	mull	128(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 60(%esp)                  # 4-byte Spill
 	movl	%ebp, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
 	movl	%edx, %esi
-	movl	%eax, 28(%esp)          # 4-byte Spill
+	movl	%eax, 88(%esp)                  # 4-byte Spill
 	movl	%ebp, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 24(%esp)          # 4-byte Spill
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, %ecx
 	movl	%ebp, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	addl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%ecx, %edx
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	52(%esp), %ecx          # 4-byte Reload
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%eax, %edi
+	addl	%ecx, %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	88(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, %ebp
+	movl	64(%esp), %esi                  # 4-byte Reload
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	60(%esp), %eax                  # 4-byte Folded Reload
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	28(%esp), %edx                  # 4-byte Folded Reload
+	movl	16(%esp), %ecx                  # 4-byte Reload
 	adcl	$0, %ecx
-	addl	36(%esp), %eax          # 4-byte Folded Reload
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	140(%esp), %eax
-	movl	8(%eax), %ebx
-	movl	%ebx, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%eax, %esi
-	addl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	64(%esp), %ebx          # 4-byte Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	%ebp, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	%edi, 48(%esp)          # 4-byte Folded Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%esi, %ebp
-	imull	96(%esp), %ebp          # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	addl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, %edx
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	addl	40(%esp), %eax          # 4-byte Folded Reload
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	140(%esp), %eax
-	movl	12(%eax), %edi
-	movl	%edi, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%eax, %ebx
-	addl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	%ebp, %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	%esi, 48(%esp)          # 4-byte Folded Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%ebx, %ebp
-	imull	96(%esp), %ebp          # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	addl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%ecx, %edx
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	40(%esp), %eax          # 4-byte Folded Reload
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ebx
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	140(%esp), %eax
-	movl	16(%eax), %ebx
-	movl	%ebx, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	addl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %ebx          # 4-byte Reload
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, %eax
+	addl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movzbl	32(%esp), %eax                  # 1-byte Folded Reload
 	adcl	$0, %eax
-	movl	84(%esp), %esi          # 4-byte Reload
-	addl	%ebp, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	adcl	%edi, 80(%esp)          # 4-byte Folded Spill
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%ebx, %ebx
-	movl	96(%esp), %ecx          # 4-byte Reload
-	imull	%esi, %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	andl	$1, %ebx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	180(%esp), %eax
+	movl	8(%eax), %ecx
 	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 60(%esp)          # 4-byte Spill
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 52(%esp)          # 4-byte Spill
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	mull	88(%esp)                # 4-byte Folded Reload
-	addl	48(%esp), %eax          # 4-byte Folded Reload
-	adcl	%ecx, %edx
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	adcl	$0, %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	84(%esp), %ecx          # 4-byte Folded Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	adcl	76(%esp), %esi          # 4-byte Folded Reload
-	adcl	72(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	adcl	$0, %ebx
-	movl	%eax, %ecx
-	subl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	%edx, %ecx
-	sbbl	88(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	sbbl	104(%esp), %ecx         # 4-byte Folded Reload
-	sbbl	108(%esp), %ebp         # 4-byte Folded Reload
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	%edi, %ebp
-	sbbl	92(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB68_2
-# BB#1:
-	movl	88(%esp), %edx          # 4-byte Reload
-.LBB68_2:
-	testb	%bl, %bl
-	jne	.LBB68_4
-# BB#3:
-	movl	100(%esp), %eax         # 4-byte Reload
-.LBB68_4:
-	movl	132(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	%edx, 4(%ebx)
-	jne	.LBB68_6
-# BB#5:
-	movl	%ecx, %esi
-.LBB68_6:
-	movl	%esi, 8(%ebx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	jne	.LBB68_8
-# BB#7:
-	movl	108(%esp), %eax         # 4-byte Reload
-.LBB68_8:
-	movl	%eax, 12(%ebx)
-	jne	.LBB68_10
-# BB#9:
-	movl	%ebp, %edi
-.LBB68_10:
-	movl	%edi, 16(%ebx)
-	addl	$112, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end68:
-	.size	mcl_fp_mont5L, .Lfunc_end68-mcl_fp_mont5L
-
-	.globl	mcl_fp_montNF5L
-	.align	16, 0x90
-	.type	mcl_fp_montNF5L,@function
-mcl_fp_montNF5L:                        # @mcl_fp_montNF5L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$104, %esp
-	movl	128(%esp), %ebx
-	movl	(%ebx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	132(%esp), %ecx
-	movl	(%ecx), %ecx
-	mull	%ecx
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	136(%esp), %esi
-	movl	-4(%esi), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	imull	%edx, %edi
-	movl	(%esi), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	16(%esi), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	4(%esi), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	4(%ebx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	16(%ebx), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	12(%ebx), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	8(%ebx), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	%edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%ecx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ecx
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
 	movl	%edx, %edi
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ecx
-	movl	%edx, %ebp
-	movl	%eax, %ebx
-	movl	76(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, %ecx
+	movl	%eax, %ebp
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
 	movl	%eax, %esi
-	addl	68(%esp), %esi          # 4-byte Folded Reload
-	adcl	%ebx, %ecx
-	adcl	(%esp), %ebp            # 4-byte Folded Reload
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	8(%esp), %edx           # 4-byte Reload
-	addl	48(%esp), %edx          # 4-byte Folded Reload
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	adcl	$0, %eax
-	addl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	4(%eax), %ebx
-	movl	%ebx, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	56(%esp)                # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	addl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	36(%esp), %ebx          # 4-byte Reload
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	%esi, 28(%esp)          # 4-byte Folded Spill
-	adcl	%ecx, 32(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	%ebp, %ecx
-	adcl	%edi, %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	addl	%esi, %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	adcl	%ebp, %ebx
+	movl	%ebx, %ebp
+	adcl	44(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	56(%esp), %edx                  # 4-byte Folded Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	28(%esp), %esi          # 4-byte Reload
-	movl	%esi, %edi
-	imull	84(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
+	addl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 36(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 44(%esp)                  # 4-byte Folded Spill
+	adcl	40(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	40(%esp)                        # 1-byte Folded Spill
+	movl	92(%esp), %esi                  # 4-byte Reload
+	imull	%edi, %esi
+	movl	%esi, %eax
+	mull	132(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	128(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
 	movl	%eax, %ebp
-	movl	%edi, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
+	movl	%esi, %eax
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, %ecx
+	movl	%esi, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
 	movl	%eax, %ebx
-	movl	%edi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	addl	%esi, %eax
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	adcl	%ecx, %ebp
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	68(%esp), %eax          # 4-byte Reload
+	addl	%ecx, %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	%ebp, %edi
+	movl	%edi, %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	88(%esp), %eax                  # 4-byte Folded Reload
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	68(%esp), %edi                  # 4-byte Folded Reload
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	64(%esp), %edx                  # 4-byte Folded Reload
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	48(%esp), %ebx                  # 4-byte Folded Reload
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	36(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	adcl	60(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	44(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movzbl	40(%esp), %eax                  # 1-byte Folded Reload
 	adcl	$0, %eax
-	addl	%edx, %ebx
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	8(%eax), %ecx
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	180(%esp), %eax
+	movl	12(%eax), %ecx
 	movl	%ecx, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	56(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 20(%esp)          # 4-byte Spill
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
 	movl	%edx, %edi
-	movl	%eax, 16(%esp)          # 4-byte Spill
+	movl	%eax, %ebp
 	movl	%ecx, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	addl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	32(%esp), %edx          # 4-byte Reload
-	addl	%ebx, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
+	mull	96(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ebx
-	imull	84(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	%ebx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	addl	32(%esp), %eax          # 4-byte Folded Reload
-	adcl	%ebp, %ecx
-	movl	40(%esp), %ebx          # 4-byte Reload
-	adcl	%edi, %ebx
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	%esi, %edi
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, %esi
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	addl	%esi, %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	adcl	%ebp, %ebx
+	movl	%ebx, %ebp
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	56(%esp), %edx                  # 4-byte Folded Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	$0, %eax
-	addl	%edx, %ecx
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	12(%eax), %edi
-	movl	%edi, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	56(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	addl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	addl	%ecx, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	addl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 64(%esp)                  # 4-byte Folded Spill
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 48(%esp)                  # 4-byte Folded Spill
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	60(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	36(%esp)                        # 1-byte Folded Spill
+	movl	92(%esp), %esi                  # 4-byte Reload
+	imull	%edi, %esi
+	movl	%esi, %eax
+	mull	132(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	128(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, %ebp
+	movl	%esi, %eax
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %edi
 	movl	%eax, %ecx
-	imull	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
+	movl	%esi, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%eax, %ebx
+	addl	%ecx, %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	%ebp, %edi
+	movl	%edi, %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	88(%esp), %eax                  # 4-byte Folded Reload
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	68(%esp), %edi                  # 4-byte Folded Reload
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	60(%esp), %edx                  # 4-byte Folded Reload
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	40(%esp), %ebx                  # 4-byte Folded Reload
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	64(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	adcl	44(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	adcl	48(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movzbl	36(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	180(%esp), %eax
+	movl	16(%eax), %ecx
 	movl	%ecx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, %edi
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	addl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, %edx
-	adcl	%ebp, %edx
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	%ebx, %edi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	%esi, %ecx
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	16(%eax), %ecx
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	52(%esp)                # 4-byte Folded Reload
+	mull	104(%esp)                       # 4-byte Folded Reload
 	movl	%edx, %edi
-	movl	%eax, 52(%esp)          # 4-byte Spill
+	movl	%eax, %ebp
 	movl	%ecx, %eax
-	mull	56(%esp)                # 4-byte Folded Reload
+	mull	96(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %esi
-	movl	%eax, 56(%esp)          # 4-byte Spill
+	movl	%eax, %ebx
 	movl	%ecx, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, 60(%esp)          # 4-byte Spill
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	addl	%ebx, %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	adcl	%ebp, %esi
+	movl	%esi, %ebp
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	56(%esp), %edx                  # 4-byte Folded Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	40(%esp), %edi                  # 4-byte Reload
+	addl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 64(%esp)                  # 4-byte Folded Spill
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 48(%esp)                  # 4-byte Folded Spill
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	60(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	movl	92(%esp), %ecx                  # 4-byte Reload
+	imull	%edi, %ecx
 	movl	%ecx, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 76(%esp)          # 4-byte Spill
+	mull	132(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	addl	76(%esp), %edx          # 4-byte Folded Reload
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	adcl	$0, %edi
-	addl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	imull	%eax, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
+	mull	128(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 60(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, %esi
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 68(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
 	movl	%eax, %ebx
 	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	mull	88(%esp)                # 4-byte Folded Reload
-	addl	40(%esp), %ebx          # 4-byte Folded Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	adcl	$0, %edi
-	addl	52(%esp), %eax          # 4-byte Folded Reload
-	adcl	%edx, %ecx
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebp          # 4-byte Folded Reload
-	adcl	76(%esp), %edi          # 4-byte Folded Reload
-	movl	%eax, %ebx
-	subl	100(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ecx, %edx
-	sbbl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	sbbl	92(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	sbbl	96(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	%edi, %edx
-	movl	%edi, %esi
-	sbbl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %edx
-	sarl	$31, %edx
-	testl	%edx, %edx
-	js	.LBB69_2
-# BB#1:
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, %esi
+	movl	%ecx, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	addl	%esi, %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%edi, %ecx
+	adcl	%ebx, %ecx
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	68(%esp), %edi                  # 4-byte Folded Reload
+	adcl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, %ebx
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	60(%esp), %ebp                  # 4-byte Folded Reload
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	44(%esp), %esi                  # 4-byte Folded Reload
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	$0, %edx
+	addl	40(%esp), %eax                  # 4-byte Folded Reload
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movzbl	52(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	180(%esp), %eax
+	movl	20(%eax), %esi
+	movl	%esi, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%esi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, %ebx
+	movl	%esi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	addl	%ebx, %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	60(%esp), %esi                  # 4-byte Folded Reload
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ebp, %ecx
+	adcl	$0, %ecx
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	%ebp, 52(%esp)                  # 4-byte Folded Spill
+	movl	(%esp), %ebp                    # 4-byte Reload
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	64(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	setb	40(%esp)                        # 1-byte Folded Spill
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	imull	%eax, %ebx
 	movl	%ebx, %eax
-.LBB69_2:
-	movl	124(%esp), %edx
-	movl	%eax, (%edx)
-	js	.LBB69_4
-# BB#3:
-	movl	88(%esp), %ecx          # 4-byte Reload
-.LBB69_4:
-	movl	%ecx, 4(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	js	.LBB69_6
-# BB#5:
-	movl	92(%esp), %eax          # 4-byte Reload
-.LBB69_6:
-	movl	%eax, 8(%edx)
-	js	.LBB69_8
-# BB#7:
-	movl	100(%esp), %ebp         # 4-byte Reload
-.LBB69_8:
-	movl	%ebp, 12(%edx)
-	js	.LBB69_10
-# BB#9:
-	movl	%edi, %esi
-.LBB69_10:
-	movl	%esi, 16(%edx)
-	addl	$104, %esp
+	mull	132(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	128(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, %ecx
+	movl	%ebx, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%eax, %edi
+	addl	%ecx, %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	88(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, %ebx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	60(%esp), %esi                  # 4-byte Folded Reload
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	52(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	adcl	(%esp), %ebp                    # 4-byte Folded Reload
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	adcl	48(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 52(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	64(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movzbl	40(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	180(%esp), %eax
+	movl	24(%eax), %ecx
+	movl	%ecx, %eax
+	mull	80(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, %ebp
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	addl	%ebp, %edx
+	movl	%edx, 104(%esp)                 # 4-byte Spill
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	72(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %ebp
+	movl	100(%esp), %edi                 # 4-byte Reload
+	adcl	64(%esp), %edi                  # 4-byte Folded Reload
+	movl	76(%esp), %edx                  # 4-byte Reload
+	adcl	40(%esp), %edx                  # 4-byte Folded Reload
+	movl	80(%esp), %ecx                  # 4-byte Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ebx, %eax
+	adcl	$0, %eax
+	movl	84(%esp), %esi                  # 4-byte Reload
+	addl	4(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 84(%esp)                  # 4-byte Spill
+	movl	(%esp), %ebx                    # 4-byte Reload
+	adcl	%ebx, 104(%esp)                 # 4-byte Folded Spill
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	adcl	%ebx, 12(%esp)                  # 4-byte Folded Spill
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 100(%esp)                 # 4-byte Spill
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	setb	4(%esp)                         # 1-byte Folded Spill
+	movl	92(%esp), %edi                  # 4-byte Reload
+	imull	%esi, %edi
+	movl	%edi, %eax
+	mull	132(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 96(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%edi, %eax
+	mull	128(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%edi, %eax
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%eax, %edi
+	addl	%ecx, %edi
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	72(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 72(%esp)                  # 4-byte Spill
+	movl	92(%esp), %ecx                  # 4-byte Reload
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	96(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	24(%esp), %esi                  # 4-byte Reload
+	addl	84(%esp), %esi                  # 4-byte Folded Reload
+	adcl	104(%esp), %edi                 # 4-byte Folded Reload
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	100(%esp), %ebp                 # 4-byte Folded Reload
+	movl	72(%esp), %esi                  # 4-byte Reload
+	adcl	76(%esp), %esi                  # 4-byte Folded Reload
+	adcl	80(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, %ecx
+	movzbl	4(%esp), %eax                   # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%edi, 84(%esp)                  # 4-byte Spill
+	subl	108(%esp), %edi                 # 4-byte Folded Reload
+	movl	%edi, 108(%esp)                 # 4-byte Spill
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	sbbl	116(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 116(%esp)                 # 4-byte Spill
+	movl	%ebx, 80(%esp)                  # 4-byte Spill
+	movl	%ebx, %edx
+	sbbl	112(%esp), %edx                 # 4-byte Folded Reload
+	movl	%ebp, 112(%esp)                 # 4-byte Spill
+	movl	%ebp, %edi
+	sbbl	120(%esp), %edi                 # 4-byte Folded Reload
+	movl	%esi, %ebp
+	movl	%esi, 72(%esp)                  # 4-byte Spill
+	sbbl	124(%esp), %ebp                 # 4-byte Folded Reload
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	sbbl	128(%esp), %ebx                 # 4-byte Folded Reload
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	sbbl	132(%esp), %ecx                 # 4-byte Folded Reload
+	sbbl	$0, %eax
+	testb	$1, %al
+	jne	.LBB26_1
+# %bb.2:
+	movl	172(%esp), %eax
+	movl	%ecx, 24(%eax)
+	jne	.LBB26_3
+.LBB26_4:
+	movl	%ebx, 20(%eax)
+	movl	108(%esp), %ecx                 # 4-byte Reload
+	jne	.LBB26_5
+.LBB26_6:
+	movl	%ebp, 16(%eax)
+	jne	.LBB26_7
+.LBB26_8:
+	movl	%edi, 12(%eax)
+	jne	.LBB26_9
+.LBB26_10:
+	movl	%edx, 8(%eax)
+	movl	116(%esp), %edx                 # 4-byte Reload
+	jne	.LBB26_11
+.LBB26_12:
+	movl	%edx, 4(%eax)
+	je	.LBB26_14
+.LBB26_13:
+	movl	84(%esp), %ecx                  # 4-byte Reload
+.LBB26_14:
+	movl	%ecx, (%eax)
+	addl	$152, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end69:
-	.size	mcl_fp_montNF5L, .Lfunc_end69-mcl_fp_montNF5L
-
-	.globl	mcl_fp_montRed5L
-	.align	16, 0x90
-	.type	mcl_fp_montRed5L,@function
-mcl_fp_montRed5L:                       # @mcl_fp_montRed5L
-# BB#0:
+.LBB26_1:
+	movl	96(%esp), %ecx                  # 4-byte Reload
+	movl	172(%esp), %eax
+	movl	%ecx, 24(%eax)
+	je	.LBB26_4
+.LBB26_3:
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 20(%eax)
+	movl	108(%esp), %ecx                 # 4-byte Reload
+	je	.LBB26_6
+.LBB26_5:
+	movl	72(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 16(%eax)
+	je	.LBB26_8
+.LBB26_7:
+	movl	112(%esp), %edi                 # 4-byte Reload
+	movl	%edi, 12(%eax)
+	je	.LBB26_10
+.LBB26_9:
+	movl	80(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%eax)
+	movl	116(%esp), %edx                 # 4-byte Reload
+	je	.LBB26_12
+.LBB26_11:
+	movl	76(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	jne	.LBB26_13
+	jmp	.LBB26_14
+.Lfunc_end26:
+	.size	mcl_fp_mont7L, .Lfunc_end26-mcl_fp_mont7L
+                                        # -- End function
+	.globl	mcl_fp_montNF7L                 # -- Begin function mcl_fp_montNF7L
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF7L,@function
+mcl_fp_montNF7L:                        # @mcl_fp_montNF7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$88, %esp
-	movl	116(%esp), %eax
-	movl	-4(%eax), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	(%eax), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	112(%esp), %esi
-	movl	(%esi), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	imull	%edx, %ecx
-	movl	16(%eax), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	12(%eax), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	8(%eax), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	4(%eax), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
+	subl	$156, %esp
+	movl	180(%esp), %edi
+	movl	(%edi), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	184(%esp), %ecx
+	movl	(%ecx), %esi
+	mull	%esi
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	188(%esp), %ebx
+	movl	-4(%ebx), %ecx
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	imull	%eax, %ecx
+	movl	24(%ebx), %edx
+	movl	%edx, 124(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
 	mull	%edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %edx
+	movl	%edx, 120(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 52(%esp)          # 4-byte Spill
+	mull	%edx
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	16(%ebx), %edx
+	movl	%edx, 116(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	12(%ebx), %edx
+	movl	%edx, 112(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	8(%ebx), %edx
+	movl	%edx, 104(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%eax, 128(%esp)                 # 4-byte Spill
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	(%ebx), %ebp
+	movl	%ebp, 108(%esp)                 # 4-byte Spill
+	movl	4(%ebx), %edx
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	%ecx, %eax
+	mull	%edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 152(%esp)                 # 4-byte Spill
 	movl	%ecx, %eax
 	mull	%ebp
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
+	movl	%eax, 148(%esp)                 # 4-byte Spill
+	movl	%edx, 132(%esp)                 # 4-byte Spill
+	movl	24(%edi), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, %ebp
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	20(%edi), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, 144(%esp)                 # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	16(%edi), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, 140(%esp)                 # 4-byte Spill
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	12(%edi), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%eax, 136(%esp)                 # 4-byte Spill
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	4(%edi), %ecx
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	movl	8(%edi), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	mull	%esi
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
 	movl	%ecx, %eax
-	mull	%ebx
-	movl	%edx, %ebp
-	movl	%eax, %edi
+	mull	%esi
+	addl	44(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	%ebx, %edx
+	movl	%edx, %edi
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	136(%esp), %esi                 # 4-byte Folded Reload
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	adcl	140(%esp), %ebx                 # 4-byte Folded Reload
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	144(%esp), %edx                 # 4-byte Folded Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	%ebp, %ecx
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	148(%esp), %ebp                 # 4-byte Reload
+	addl	60(%esp), %ebp                  # 4-byte Folded Reload
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	152(%esp), %ebp                 # 4-byte Folded Reload
+	adcl	128(%esp), %edi                 # 4-byte Folded Reload
+	adcl	64(%esp), %esi                  # 4-byte Folded Reload
+	adcl	52(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	adcl	$0, %eax
+	addl	132(%esp), %ebp                 # 4-byte Folded Reload
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	adcl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	184(%esp), %eax
+	movl	4(%eax), %ecx
 	movl	%ecx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
+	mull	76(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ebx
-	addl	%edi, %ebx
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	80(%esp), %eax          # 4-byte Folded Reload
-	adcl	4(%esi), %ebx
-	adcl	8(%esi), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	adcl	12(%esi), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	36(%esi), %eax
-	movl	32(%esi), %ecx
-	movl	28(%esi), %edx
-	movl	24(%esi), %esi
-	adcl	$0, %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ebx, %ecx
-	imull	76(%esp), %ecx          # 4-byte Folded Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, %esi
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 40(%esp)                  # 4-byte Spill
 	movl	%ecx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	addl	8(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	40(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	adcl	44(%esp), %edi                  # 4-byte Folded Reload
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	60(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ebx, %eax
+	adcl	$0, %eax
+	movl	48(%esp), %ebx                  # 4-byte Reload
+	addl	%ebp, %ebx
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	%ebp, 8(%esp)                   # 4-byte Folded Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	%ebp, 40(%esp)                  # 4-byte Folded Spill
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	80(%esp), %edi                  # 4-byte Reload
+	imull	%ebx, %edi
+	movl	%edi, %eax
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, %ebp
+	movl	%edi, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, %ecx
+	movl	%edi, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%eax, %esi
+	movl	%edi, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	addl	%ebx, %eax
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	adcl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %edx
+	adcl	60(%esp), %ebp                  # 4-byte Folded Reload
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	adcl	64(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	48(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	184(%esp), %eax
+	movl	8(%eax), %ebp
+	movl	%ebp, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
 	movl	%eax, %edi
-	addl	%esi, %edx
+	movl	%ebp, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, %esi
+	movl	%ebp, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
 	movl	%edx, %ebp
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%ebx, %edi
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	$0, 20(%esp)            # 4-byte Folded Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %esi
-	imull	76(%esp), %esi          # 4-byte Folded Reload
+	addl	%esi, %ebp
+	adcl	%edi, %ebx
+	movl	%ebx, %edi
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	60(%esp), %esi                  # 4-byte Reload
+	addl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 60(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	80(%esp), %esi                  # 4-byte Reload
+	movl	60(%esp), %edi                  # 4-byte Reload
+	imull	%edi, %esi
 	movl	%esi, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 8(%esp)           # 4-byte Spill
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %ebx
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	addl	%ebx, %ebp
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	16(%esp), %eax          # 4-byte Folded Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %edi
-	imull	76(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	60(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%edx, %ebx
-	addl	8(%esp), %ebx           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%ebp, 16(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	imull	%ebx, %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%esi, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 132(%esp)                 # 4-byte Spill
+	movl	%eax, %ecx
+	movl	%esi, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 128(%esp)                 # 4-byte Spill
+	addl	%edi, %eax
+	adcl	%ebp, %ecx
+	movl	%ecx, %ebx
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	128(%esp), %ebx                 # 4-byte Folded Reload
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	adcl	132(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	64(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	44(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	184(%esp), %eax
+	movl	12(%eax), %esi
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	72(%esp)                # 4-byte Folded Reload
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ebp
-	movl	%eax, 52(%esp)          # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	68(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 48(%esp)          # 4-byte Spill
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	64(%esp)                # 4-byte Folded Reload
+	mull	84(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %ecx
-	movl	%eax, 44(%esp)          # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
 	movl	%esi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	mull	60(%esp)                # 4-byte Folded Reload
-	addl	24(%esp), %eax          # 4-byte Folded Reload
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	adcl	$0, %ebp
-	addl	%ebx, %esi
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebx          # 4-byte Reload
-	adcl	$0, %ebx
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, %ebx
+	movl	%esi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
 	movl	%eax, %esi
-	subl	84(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	sbbl	60(%esp), %esi          # 4-byte Folded Reload
-	sbbl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	sbbl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	%edi, %ecx
-	sbbl	72(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	movl	%ebx, 80(%esp)          # 4-byte Spill
-	jne	.LBB70_2
-# BB#1:
-	movl	%esi, %edx
-.LBB70_2:
-	movl	80(%esp), %ebx          # 4-byte Reload
-	testb	%bl, %bl
-	jne	.LBB70_4
-# BB#3:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB70_4:
-	movl	108(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edx, 4(%ecx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	jne	.LBB70_6
-# BB#5:
-	movl	%ebp, %eax
-.LBB70_6:
-	movl	%eax, 8(%ecx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	jne	.LBB70_8
-# BB#7:
-	movl	68(%esp), %eax          # 4-byte Reload
-.LBB70_8:
-	movl	%eax, 12(%ecx)
-	jne	.LBB70_10
-# BB#9:
-	movl	84(%esp), %edi          # 4-byte Reload
-.LBB70_10:
-	movl	%edi, 16(%ecx)
-	addl	$88, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end70:
-	.size	mcl_fp_montRed5L, .Lfunc_end70-mcl_fp_montRed5L
-
-	.globl	mcl_fp_addPre5L
-	.align	16, 0x90
-	.type	mcl_fp_addPre5L,@function
-mcl_fp_addPre5L:                        # @mcl_fp_addPre5L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	28(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	24(%esp), %esi
-	addl	(%esi), %ecx
-	adcl	4(%esi), %edx
-	movl	8(%eax), %edi
-	adcl	8(%esi), %edi
-	movl	12(%esi), %ebx
-	movl	16(%esi), %esi
-	adcl	12(%eax), %ebx
-	movl	16(%eax), %eax
-	movl	20(%esp), %ebp
-	movl	%ecx, (%ebp)
-	movl	%edx, 4(%ebp)
-	movl	%edi, 8(%ebp)
-	movl	%ebx, 12(%ebp)
-	adcl	%esi, %eax
-	movl	%eax, 16(%ebp)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end71:
-	.size	mcl_fp_addPre5L, .Lfunc_end71-mcl_fp_addPre5L
-
-	.globl	mcl_fp_subPre5L
-	.align	16, 0x90
-	.type	mcl_fp_subPre5L,@function
-mcl_fp_subPre5L:                        # @mcl_fp_subPre5L
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	16(%esp), %ecx
-	movl	(%ecx), %edx
-	xorl	%eax, %eax
-	movl	20(%esp), %esi
-	subl	(%esi), %edx
-	movl	12(%esp), %edi
-	movl	%edx, (%edi)
-	movl	4(%ecx), %edx
-	sbbl	4(%esi), %edx
-	movl	%edx, 4(%edi)
-	movl	8(%ecx), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, 8(%edi)
-	movl	12(%ecx), %edx
-	sbbl	12(%esi), %edx
-	movl	%edx, 12(%edi)
-	movl	16(%esi), %edx
-	movl	16(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 16(%edi)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end72:
-	.size	mcl_fp_subPre5L, .Lfunc_end72-mcl_fp_subPre5L
-
-	.globl	mcl_fp_shr1_5L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_5L,@function
-mcl_fp_shr1_5L:                         # @mcl_fp_shr1_5L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	20(%esp), %eax
-	movl	16(%eax), %ecx
-	movl	12(%eax), %edx
-	movl	8(%eax), %esi
-	movl	(%eax), %edi
-	movl	4(%eax), %eax
-	shrdl	$1, %eax, %edi
-	movl	16(%esp), %ebx
-	movl	%edi, (%ebx)
-	shrdl	$1, %esi, %eax
-	movl	%eax, 4(%ebx)
-	shrdl	$1, %edx, %esi
-	movl	%esi, 8(%ebx)
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%ebx)
-	shrl	%ecx
-	movl	%ecx, 16(%ebx)
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end73:
-	.size	mcl_fp_shr1_5L, .Lfunc_end73-mcl_fp_shr1_5L
-
-	.globl	mcl_fp_add5L
-	.align	16, 0x90
-	.type	mcl_fp_add5L,@function
-mcl_fp_add5L:                           # @mcl_fp_add5L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	28(%esp), %ebx
-	movl	(%ebx), %eax
-	movl	4(%ebx), %ecx
-	movl	24(%esp), %edi
-	addl	(%edi), %eax
-	adcl	4(%edi), %ecx
-	movl	8(%ebx), %edx
-	adcl	8(%edi), %edx
-	movl	12(%edi), %esi
-	movl	16(%edi), %edi
-	adcl	12(%ebx), %esi
-	adcl	16(%ebx), %edi
-	movl	20(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	%ecx, 4(%ebx)
-	movl	%edx, 8(%ebx)
-	movl	%esi, 12(%ebx)
-	movl	%edi, 16(%ebx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	32(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %ecx
-	sbbl	8(%ebp), %edx
-	sbbl	12(%ebp), %esi
-	sbbl	16(%ebp), %edi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB74_2
-# BB#1:                                 # %nocarry
-	movl	20(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	%ecx, 4(%ebx)
-	movl	%edx, 8(%ebx)
-	movl	%esi, 12(%ebx)
-	movl	%edi, 16(%ebx)
-.LBB74_2:                               # %carry
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end74:
-	.size	mcl_fp_add5L, .Lfunc_end74-mcl_fp_add5L
-
-	.globl	mcl_fp_addNF5L
-	.align	16, 0x90
-	.type	mcl_fp_addNF5L,@function
-mcl_fp_addNF5L:                         # @mcl_fp_addNF5L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$20, %esp
-	movl	48(%esp), %esi
-	movl	(%esi), %ebx
-	movl	4(%esi), %eax
-	movl	44(%esp), %edi
-	addl	(%edi), %ebx
-	adcl	4(%edi), %eax
-	movl	16(%esi), %ecx
-	movl	12(%esi), %edx
-	movl	8(%esi), %ebp
-	adcl	8(%edi), %ebp
-	adcl	12(%edi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	16(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi
-	movl	%ebx, %esi
-	subl	(%edi), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	%eax, %esi
-	sbbl	4(%edi), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	%ebp, %esi
-	sbbl	8(%edi), %esi
-	sbbl	12(%edi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
+	addl	%ebx, %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %ebx
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	52(%esp), %ebp                  # 4-byte Folded Reload
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	56(%esp), %edx                  # 4-byte Folded Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	60(%esp), %esi                  # 4-byte Folded Reload
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	%edi, 12(%esp)                  # 4-byte Folded Spill
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	80(%esp), %ebx                  # 4-byte Reload
+	imull	%esi, %ebx
+	movl	%ebx, %eax
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	movl	%ebx, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, %ebp
+	movl	%ebx, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%eax, %ecx
+	movl	%ebx, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	addl	%esi, %eax
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
 	movl	%ecx, %edx
-	sbbl	16(%edi), %edx
+	movl	%ebp, %ebx
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	adcl	60(%esp), %edi                  # 4-byte Folded Reload
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	40(%esp), %esi                  # 4-byte Folded Reload
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	56(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	adcl	64(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	44(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	184(%esp), %eax
+	movl	16(%eax), %esi
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, %ebx
+	movl	%esi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%edx, %edi
+	addl	%ebx, %edi
+	adcl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	adcl	64(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %ebp
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	(%esp), %ebx                    # 4-byte Reload
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	40(%esp), %esi                  # 4-byte Reload
+	addl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	adcl	60(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	80(%esp), %ebx                  # 4-byte Reload
+	movl	40(%esp), %edi                  # 4-byte Reload
+	imull	%edi, %ebx
+	movl	%ebx, %eax
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, %esi
+	movl	%ebx, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 132(%esp)                 # 4-byte Spill
+	movl	%eax, %ebp
+	movl	%ebx, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 128(%esp)                 # 4-byte Spill
+	addl	%edi, %eax
+	adcl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	adcl	60(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%esi, %edi
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	(%esp), %esi                    # 4-byte Folded Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	128(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	adcl	132(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	64(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	adcl	48(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	adcl	44(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	184(%esp), %eax
+	movl	20(%eax), %esi
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%esi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, %ebx
+	movl	%esi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
 	movl	%edx, %edi
+	movl	%eax, %ebp
+	movl	%esi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %esi
+	addl	%ebp, %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	%ebx, %edi
+	movl	%edi, %ebx
+	adcl	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	44(%esp), %ebp                  # 4-byte Folded Reload
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	addl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	%edi, 12(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	60(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	imull	%esi, %ebp
+	movl	%ebp, %eax
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	movl	%ebp, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, %ecx
+	movl	%ebp, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	addl	%esi, %eax
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %edx
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	%ebx, %ebp
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	addl	64(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	48(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	52(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	adcl	60(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	184(%esp), %eax
+	movl	24(%eax), %edi
+	movl	%edi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%edi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%edi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, %ecx
+	movl	%edi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	addl	%ecx, %edx
+	movl	%edx, 96(%esp)                  # 4-byte Spill
+	movl	%ebp, %edx
+	adcl	84(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ebx, %ecx
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%esi, %edi
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	68(%esp), %ebp                  # 4-byte Reload
+	adcl	8(%esp), %ebp                   # 4-byte Folded Reload
+	movl	72(%esp), %ebx                  # 4-byte Reload
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	76(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 96(%esp)                  # 4-byte Folded Spill
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 68(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 72(%esp)                  # 4-byte Spill
+	adcl	$0, %esi
+	movl	%esi, 76(%esp)                  # 4-byte Spill
+	movl	80(%esp), %ecx                  # 4-byte Reload
+	imull	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, %eax
+	mull	124(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 84(%esp)                  # 4-byte Spill
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	120(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	116(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, %ebp
+	movl	%ecx, %eax
+	mull	112(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%ecx, %eax
+	mull	108(%esp)                       # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, %esi
+	movl	%ecx, %eax
+	mull	104(%esp)                       # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, %edi
+	movl	%ecx, %eax
+	mull	100(%esp)                       # 4-byte Folded Reload
+	movl	%eax, %ecx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	addl	92(%esp), %esi                  # 4-byte Folded Reload
+	adcl	96(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	20(%esp), %edi                  # 4-byte Folded Reload
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	88(%esp), %edx                  # 4-byte Reload
+	adcl	68(%esp), %edx                  # 4-byte Folded Reload
+	movl	80(%esp), %eax                  # 4-byte Reload
+	adcl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	76(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	addl	4(%esp), %ecx                   # 4-byte Folded Reload
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	32(%esp), %edx                  # 4-byte Folded Reload
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	adcl	84(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	subl	108(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 108(%esp)                 # 4-byte Spill
+	movl	%edi, 68(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	sbbl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ebx, 100(%esp)                 # 4-byte Spill
+	sbbl	104(%esp), %ebx                 # 4-byte Folded Reload
+	movl	%ebp, 104(%esp)                 # 4-byte Spill
+	sbbl	112(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%edx, 88(%esp)                  # 4-byte Spill
+	sbbl	116(%esp), %edx                 # 4-byte Folded Reload
+	movl	80(%esp), %ecx                  # 4-byte Reload
+	sbbl	120(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%esi, 76(%esp)                  # 4-byte Spill
+	sbbl	124(%esp), %esi                 # 4-byte Folded Reload
+	movl	%esi, %edi
 	sarl	$31, %edi
 	testl	%edi, %edi
-	js	.LBB75_2
-# BB#1:
-	movl	(%esp), %ebx            # 4-byte Reload
-.LBB75_2:
-	movl	40(%esp), %edi
-	movl	%ebx, (%edi)
-	js	.LBB75_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB75_4:
+	js	.LBB27_1
+# %bb.2:
+	movl	176(%esp), %edi
+	movl	%esi, 24(%edi)
+	js	.LBB27_3
+.LBB27_4:
+	movl	%ecx, 20(%edi)
+	js	.LBB27_5
+.LBB27_6:
+	movl	%edx, 16(%edi)
+	js	.LBB27_7
+.LBB27_8:
+	movl	%ebp, 12(%edi)
+	js	.LBB27_9
+.LBB27_10:
+	movl	%ebx, 8(%edi)
+	js	.LBB27_11
+.LBB27_12:
 	movl	%eax, 4(%edi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	js	.LBB75_6
-# BB#5:
-	movl	%esi, %ebp
-.LBB75_6:
-	movl	%ebp, 8(%edi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	js	.LBB75_8
-# BB#7:
-	movl	8(%esp), %ecx           # 4-byte Reload
-.LBB75_8:
-	movl	%ecx, 12(%edi)
-	js	.LBB75_10
-# BB#9:
-	movl	%edx, %eax
-.LBB75_10:
-	movl	%eax, 16(%edi)
-	addl	$20, %esp
+	movl	108(%esp), %eax                 # 4-byte Reload
+	jns	.LBB27_14
+.LBB27_13:
+	movl	72(%esp), %eax                  # 4-byte Reload
+.LBB27_14:
+	movl	%eax, (%edi)
+	addl	$156, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end75:
-	.size	mcl_fp_addNF5L, .Lfunc_end75-mcl_fp_addNF5L
-
-	.globl	mcl_fp_sub5L
-	.align	16, 0x90
-	.type	mcl_fp_sub5L,@function
-mcl_fp_sub5L:                           # @mcl_fp_sub5L
-# BB#0:
+.LBB27_1:
+	movl	76(%esp), %esi                  # 4-byte Reload
+	movl	176(%esp), %edi
+	movl	%esi, 24(%edi)
+	jns	.LBB27_4
+.LBB27_3:
+	movl	80(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%edi)
+	jns	.LBB27_6
+.LBB27_5:
+	movl	88(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 16(%edi)
+	jns	.LBB27_8
+.LBB27_7:
+	movl	104(%esp), %ebp                 # 4-byte Reload
+	movl	%ebp, 12(%edi)
+	jns	.LBB27_10
+.LBB27_9:
+	movl	100(%esp), %ebx                 # 4-byte Reload
+	movl	%ebx, 8(%edi)
+	jns	.LBB27_12
+.LBB27_11:
+	movl	68(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%edi)
+	movl	108(%esp), %eax                 # 4-byte Reload
+	js	.LBB27_13
+	jmp	.LBB27_14
+.Lfunc_end27:
+	.size	mcl_fp_montNF7L, .Lfunc_end27-mcl_fp_montNF7L
+                                        # -- End function
+	.globl	mcl_fp_montRed7L                # -- Begin function mcl_fp_montRed7L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed7L,@function
+mcl_fp_montRed7L:                       # @mcl_fp_montRed7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %ecx
-	xorl	%ebx, %ebx
-	movl	28(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %ecx
-	movl	8(%edi), %edx
-	sbbl	8(%ebp), %edx
-	movl	12(%edi), %esi
-	sbbl	12(%ebp), %esi
-	movl	16(%edi), %edi
-	sbbl	16(%ebp), %edi
-	movl	20(%esp), %ebp
-	movl	%eax, (%ebp)
-	movl	%ecx, 4(%ebp)
-	movl	%edx, 8(%ebp)
-	movl	%esi, 12(%ebp)
-	movl	%edi, 16(%ebp)
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	je	.LBB76_2
-# BB#1:                                 # %carry
-	movl	32(%esp), %ebx
-	addl	(%ebx), %eax
-	movl	%eax, (%ebp)
-	adcl	4(%ebx), %ecx
-	movl	%ecx, 4(%ebp)
-	adcl	8(%ebx), %edx
-	movl	%edx, 8(%ebp)
-	movl	12(%ebx), %eax
+	subl	$100, %esp
+	movl	128(%esp), %ebx
+	movl	-4(%ebx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	124(%esp), %ecx
+	movl	(%ecx), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	imull	%eax, %edi
+	movl	24(%ebx), %ecx
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %ecx
+	movl	%ecx, 84(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	16(%ebx), %ecx
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	12(%ebx), %ecx
+	movl	%ecx, 88(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	8(%ebx), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%edx, %ecx
+	movl	(%ebx), %ebp
+	movl	%ebp, 68(%esp)                  # 4-byte Spill
+	movl	4(%ebx), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%edx
+	movl	%edx, %esi
+	movl	%eax, %ebx
+	movl	%edi, %eax
+	mull	%ebp
+	movl	%eax, %ebp
+	addl	%ebx, %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	20(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ecx, %eax
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movl	40(%esp), %ebx                  # 4-byte Reload
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	12(%esp), %ebp                  # 4-byte Folded Reload
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	movl	124(%esp), %edx
+	adcl	4(%edx), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	124(%esp), %ebp
+	adcl	8(%ebp), %esi
+	movl	%esi, 60(%esp)                  # 4-byte Spill
+	adcl	12(%ebp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	16(%ebp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	adcl	20(%ebp), %ebx
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	24(%ebp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	28(%ebp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	setb	56(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	4(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	addl	%ebp, %eax
+	movl	%eax, %ebx
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	%edi, %esi
+	movl	%esi, %edi
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	64(%esp), %esi                  # 4-byte Folded Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	52(%esp), %ebp                  # 4-byte Folded Reload
+	movzbl	56(%esp), %eax                  # 1-byte Folded Reload
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	%eax, %edx
+	movl	80(%esp), %eax                  # 4-byte Reload
+	addl	4(%esp), %eax                   # 4-byte Folded Reload
+	adcl	60(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 20(%esp)                  # 4-byte Folded Spill
+	adcl	36(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	adcl	40(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	32(%eax), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	imull	%ebx, %edi
+	movl	%edi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, %esi
+	movl	%edi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%edi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	addl	%ecx, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	%ebx, %edx
+	movl	%edx, %ebx
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	80(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ebp, %ecx
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	%esi, %eax
-	movl	%eax, 12(%ebp)
-	movl	16(%ebx), %eax
+	movzbl	16(%esp), %esi                  # 1-byte Folded Reload
+	adcl	56(%esp), %esi                  # 4-byte Folded Reload
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	addl	60(%esp), %ebp                  # 4-byte Folded Reload
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	36(%eax), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	setb	56(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%ebp, %ecx
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ebp
+	addl	%esi, %ebp
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	%ebx, %edi
+	movl	%edi, %ebx
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	64(%esp), %edi                  # 4-byte Folded Reload
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	movzbl	56(%esp), %eax                  # 1-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	%eax, %edx
+	movl	80(%esp), %eax                  # 4-byte Reload
+	addl	32(%esp), %eax                  # 4-byte Folded Reload
+	adcl	60(%esp), %ebp                  # 4-byte Folded Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 24(%esp)                  # 4-byte Folded Spill
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	40(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	imull	%ebp, %esi
+	movl	%esi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	movl	%esi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, %ecx
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	addl	%ebp, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	%ecx, %edx
+	movl	%edx, %ecx
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	80(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	48(%esp), %ebx                  # 4-byte Folded Reload
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	%edi, %eax
-	movl	%eax, 16(%ebp)
-.LBB76_2:                               # %nocarry
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end76:
-	.size	mcl_fp_sub5L, .Lfunc_end76-mcl_fp_sub5L
-
-	.globl	mcl_fp_subNF5L
-	.align	16, 0x90
-	.type	mcl_fp_subNF5L,@function
-mcl_fp_subNF5L:                         # @mcl_fp_subNF5L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$16, %esp
-	movl	40(%esp), %edi
-	movl	(%edi), %ecx
-	movl	4(%edi), %eax
-	movl	44(%esp), %ebx
-	subl	(%ebx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	sbbl	4(%ebx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	16(%edi), %esi
-	movl	12(%edi), %eax
-	movl	8(%edi), %ecx
-	sbbl	8(%ebx), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	sbbl	12(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	16(%ebx), %esi
-	movl	%esi, %ebx
-	sarl	$31, %ebx
-	movl	%ebx, %ebp
-	shldl	$1, %esi, %ebp
-	movl	48(%esp), %edi
-	movl	4(%edi), %ecx
-	andl	%ebp, %ecx
-	andl	(%edi), %ebp
-	movl	16(%edi), %edx
-	andl	%ebx, %edx
-	movl	12(%edi), %eax
-	andl	%ebx, %eax
-	roll	%ebx
-	andl	8(%edi), %ebx
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	36(%esp), %edi
-	movl	%ebp, (%edi)
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
+	movzbl	20(%esp), %edi                  # 1-byte Folded Reload
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	64(%esp), %esi                  # 4-byte Reload
+	addl	60(%esp), %esi                  # 4-byte Folded Reload
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	24(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	44(%eax), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	setb	60(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%esi, %ecx
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	%eax, %esi
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	addl	%ebx, %eax
+	movl	%eax, %ebx
+	adcl	28(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	adcl	%esi, %ebp
+	movl	%ebp, %esi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	52(%esp), %edi                  # 4-byte Folded Reload
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movzbl	60(%esp), %eax                  # 1-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	%eax, %edx
+	movl	64(%esp), %eax                  # 4-byte Reload
+	addl	36(%esp), %eax                  # 4-byte Folded Reload
+	adcl	32(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 28(%esp)                  # 4-byte Folded Spill
+	adcl	24(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	48(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	imull	%ebx, %esi
+	movl	%esi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	addl	56(%esp), %eax                  # 4-byte Folded Reload
+	adcl	48(%esp), %edx                  # 4-byte Folded Reload
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	60(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	44(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	%edi, %esi
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	movzbl	12(%esp), %edi                  # 1-byte Folded Reload
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	addl	36(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	adcl	40(%esp), %edx                  # 4-byte Folded Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	adcl	(%esp), %ebx                    # 4-byte Folded Reload
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	124(%esp), %esi
+	adcl	52(%esi), %edi
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	subl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	sbbl	72(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	76(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	%ebx, %edx
+	movl	(%esp), %esi                    # 4-byte Reload
+	sbbl	88(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ebp, %ecx
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	sbbl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%esi, %ebp
+	sbbl	84(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edi, %eax
+	sbbl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	$0, %ebx
+	sbbl	%ebx, %ebx
+	testb	$1, %bl
+	jne	.LBB28_1
+# %bb.2:
+	movl	120(%esp), %edi
+	movl	%eax, 24(%edi)
+	jne	.LBB28_3
+.LBB28_4:
+	movl	%esi, 20(%edi)
+	movl	68(%esp), %eax                  # 4-byte Reload
+	jne	.LBB28_5
+.LBB28_6:
+	movl	%ecx, 16(%edi)
+	jne	.LBB28_7
+.LBB28_8:
+	movl	%edx, 12(%edi)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	76(%esp), %edx                  # 4-byte Reload
+	jne	.LBB28_9
+.LBB28_10:
+	movl	%edx, 8(%edi)
+	jne	.LBB28_11
+.LBB28_12:
 	movl	%ecx, 4(%edi)
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ebx, 8(%edi)
-	movl	%eax, 12(%edi)
-	adcl	%esi, %edx
-	movl	%edx, 16(%edi)
-	addl	$16, %esp
+	je	.LBB28_14
+.LBB28_13:
+	movl	4(%esp), %eax                   # 4-byte Reload
+.LBB28_14:
+	movl	%eax, (%edi)
+	addl	$100, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end77:
-	.size	mcl_fp_subNF5L, .Lfunc_end77-mcl_fp_subNF5L
-
-	.globl	mcl_fpDbl_add5L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add5L,@function
-mcl_fpDbl_add5L:                        # @mcl_fpDbl_add5L
-# BB#0:
+.LBB28_1:
+	movl	%edi, %eax
+	movl	120(%esp), %edi
+	movl	%eax, 24(%edi)
+	je	.LBB28_4
+.LBB28_3:
+	movl	%ebp, %esi
+	movl	%esi, 20(%edi)
+	movl	68(%esp), %eax                  # 4-byte Reload
+	je	.LBB28_6
+.LBB28_5:
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%edi)
+	je	.LBB28_8
+.LBB28_7:
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 12(%edi)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	76(%esp), %edx                  # 4-byte Reload
+	je	.LBB28_10
+.LBB28_9:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%edi)
+	je	.LBB28_12
+.LBB28_11:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	jne	.LBB28_13
+	jmp	.LBB28_14
+.Lfunc_end28:
+	.size	mcl_fp_montRed7L, .Lfunc_end28-mcl_fp_montRed7L
+                                        # -- End function
+	.globl	mcl_fp_montRedNF7L              # -- Begin function mcl_fp_montRedNF7L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF7L,@function
+mcl_fp_montRedNF7L:                     # @mcl_fp_montRedNF7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$28, %esp
-	movl	56(%esp), %edx
-	movl	52(%esp), %ecx
-	movl	12(%ecx), %ebx
-	movl	16(%ecx), %ebp
-	movl	8(%edx), %esi
-	movl	(%edx), %edi
-	addl	(%ecx), %edi
-	movl	48(%esp), %eax
-	movl	%edi, (%eax)
-	movl	4(%edx), %edi
-	adcl	4(%ecx), %edi
-	adcl	8(%ecx), %esi
-	adcl	12(%edx), %ebx
-	adcl	16(%edx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	%edi, 4(%eax)
-	movl	28(%edx), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	%esi, 8(%eax)
-	movl	20(%edx), %esi
-	movl	%ebx, 12(%eax)
-	movl	20(%ecx), %ebp
-	adcl	%esi, %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	24(%edx), %esi
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%eax)
-	movl	24(%ecx), %ebx
-	adcl	%esi, %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	28(%ecx), %edi
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	movl	32(%ecx), %esi
-	adcl	%eax, %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	36(%edx), %eax
-	movl	36(%ecx), %edx
-	adcl	%eax, %edx
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%ebp, %ecx
-	movl	60(%esp), %ebp
-	subl	(%ebp), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	sbbl	4(%ebp), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	sbbl	8(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	%esi, %ebx
-	movl	%edx, %esi
-	sbbl	12(%ebp), %ebx
-	sbbl	16(%ebp), %edx
-	sbbl	$0, %eax
-	andl	$1, %eax
-	jne	.LBB78_2
-# BB#1:
-	movl	%edx, %esi
-.LBB78_2:
-	testb	%al, %al
-	movl	12(%esp), %ebp          # 4-byte Reload
-	jne	.LBB78_4
-# BB#3:
-	movl	(%esp), %ebp            # 4-byte Reload
-.LBB78_4:
-	movl	48(%esp), %eax
-	movl	%ebp, 20(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	20(%esp), %edx          # 4-byte Reload
-	movl	16(%esp), %edi          # 4-byte Reload
-	jne	.LBB78_6
-# BB#5:
-	movl	4(%esp), %edi           # 4-byte Reload
-.LBB78_6:
-	movl	%edi, 24(%eax)
-	jne	.LBB78_8
-# BB#7:
-	movl	8(%esp), %edx           # 4-byte Reload
-.LBB78_8:
-	movl	%edx, 28(%eax)
-	jne	.LBB78_10
-# BB#9:
-	movl	%ebx, %ecx
-.LBB78_10:
-	movl	%ecx, 32(%eax)
-	movl	%esi, 36(%eax)
-	addl	$28, %esp
+	subl	$100, %esp
+	movl	128(%esp), %ebx
+	movl	-4(%ebx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	124(%esp), %ecx
+	movl	(%ecx), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	imull	%eax, %edi
+	movl	24(%ebx), %ecx
+	movl	%ecx, 88(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %ecx
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	16(%ebx), %ecx
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	12(%ebx), %ecx
+	movl	%ecx, 84(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	8(%ebx), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%ecx
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%edx, %ecx
+	movl	(%ebx), %ebp
+	movl	%ebp, 68(%esp)                  # 4-byte Spill
+	movl	4(%ebx), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	%edx
+	movl	%edx, %esi
+	movl	%eax, %ebx
+	movl	%edi, %eax
+	mull	%ebp
+	movl	%eax, %ebp
+	addl	%ebx, %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%esi, %edi
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	28(%esp), %esi                  # 4-byte Folded Reload
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	adcl	36(%esp), %ebx                  # 4-byte Folded Reload
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	addl	16(%esp), %ebp                  # 4-byte Folded Reload
+	movl	(%esp), %ebp                    # 4-byte Reload
+	movl	124(%esp), %eax
+	adcl	4(%eax), %ebp
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	124(%esp), %ebp
+	adcl	8(%ebp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	12(%ebp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	16(%ebp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	20(%ebp), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	adcl	24(%ebp), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	adcl	28(%ebp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	setb	56(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	addl	%ebp, %eax
+	movl	%eax, %ebx
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	%edi, %esi
+	movl	%esi, %edi
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	64(%esp), %esi                  # 4-byte Folded Reload
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	52(%esp), %ebp                  # 4-byte Folded Reload
+	movzbl	56(%esp), %eax                  # 1-byte Folded Reload
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	%eax, %edx
+	movl	80(%esp), %eax                  # 4-byte Reload
+	addl	(%esp), %eax                    # 4-byte Folded Reload
+	adcl	60(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	%eax, 4(%esp)                   # 4-byte Folded Spill
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	32(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	imull	%ebx, %edi
+	movl	%edi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, %esi
+	movl	%edi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%edi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	addl	%ecx, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	%ebx, %edx
+	movl	%edx, %ebx
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	80(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ebp, %ecx
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	%esi, %eax
+	movzbl	20(%esp), %esi                  # 1-byte Folded Reload
+	adcl	24(%esp), %esi                  # 4-byte Folded Reload
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	addl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	adcl	60(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	adcl	28(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	36(%eax), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	setb	56(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%ebp, %ecx
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%eax, %ebp
+	addl	%esi, %ebp
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	adcl	%ebx, %edi
+	movl	%edi, %ebx
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	64(%esp), %edi                  # 4-byte Folded Reload
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	movzbl	56(%esp), %eax                  # 1-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	%eax, %edx
+	movl	80(%esp), %eax                  # 4-byte Reload
+	addl	8(%esp), %eax                   # 4-byte Folded Reload
+	adcl	60(%esp), %ebp                  # 4-byte Folded Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 4(%esp)                   # 4-byte Folded Spill
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	adcl	(%esp), %ecx                    # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	40(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	40(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	imull	%ebp, %esi
+	movl	%esi, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, %edi
+	movl	%esi, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, %ecx
+	movl	%esi, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	addl	%ebp, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	%ecx, %edx
+	movl	%edx, %ecx
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	80(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	48(%esp), %ebx                  # 4-byte Folded Reload
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	%edi, %eax
+	movzbl	40(%esp), %edi                  # 1-byte Folded Reload
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	64(%esp), %esi                  # 4-byte Reload
+	addl	60(%esp), %esi                  # 4-byte Folded Reload
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	4(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	adcl	20(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	44(%eax), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	setb	60(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%esi, %ecx
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, %esi
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	addl	%ebx, %eax
+	movl	%eax, %ebx
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	%esi, %ebp
+	movl	%ebp, %esi
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	52(%esp), %edi                  # 4-byte Folded Reload
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movzbl	60(%esp), %eax                  # 1-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	%eax, %edx
+	movl	64(%esp), %eax                  # 4-byte Reload
+	addl	28(%esp), %eax                  # 4-byte Folded Reload
+	adcl	8(%esp), %ebx                   # 4-byte Folded Reload
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	%eax, 24(%esp)                  # 4-byte Folded Spill
+	adcl	36(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	%edi, (%esp)                    # 4-byte Spill
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	48(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%ebx, %ecx
+	movl	%ecx, %eax
+	mull	88(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	96(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebp
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	92(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ebx
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	68(%esp)                        # 4-byte Folded Reload
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, %eax
+	mull	84(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %edi
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	76(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %esi
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	mull	72(%esp)                        # 4-byte Folded Reload
+	movl	%edx, %ecx
+	movl	%eax, %edx
+	addl	56(%esp), %edx                  # 4-byte Folded Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	adcl	60(%esp), %edi                  # 4-byte Folded Reload
+	adcl	44(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 44(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movzbl	12(%esp), %ebp                  # 1-byte Folded Reload
+	adcl	32(%esp), %ebp                  # 4-byte Folded Reload
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	addl	28(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	adcl	40(%esp), %esi                  # 4-byte Folded Reload
+	adcl	(%esp), %edi                    # 4-byte Folded Reload
+	movl	44(%esp), %ebx                  # 4-byte Reload
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	124(%esp), %eax
+	adcl	52(%eax), %ebp
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	subl	68(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	sbbl	72(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	%esi, %edx
+	sbbl	76(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edi, 76(%esp)                  # 4-byte Spill
+	movl	%edi, %esi
+	sbbl	84(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ebx, %ecx
+	movl	%ebx, 44(%esp)                  # 4-byte Spill
+	sbbl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	(%esp), %ebx                    # 4-byte Reload
+	sbbl	96(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebp, %edi
+	sbbl	88(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %eax
+	sarl	$31, %eax
+	testl	%eax, %eax
+	js	.LBB29_1
+# %bb.2:
+	movl	120(%esp), %eax
+	movl	%edi, 24(%eax)
+	js	.LBB29_3
+.LBB29_4:
+	movl	%ebx, 20(%eax)
+	js	.LBB29_5
+.LBB29_6:
+	movl	%ecx, 16(%eax)
+	js	.LBB29_7
+.LBB29_8:
+	movl	%esi, 12(%eax)
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	js	.LBB29_9
+.LBB29_10:
+	movl	%edx, 8(%eax)
+	movl	72(%esp), %edx                  # 4-byte Reload
+	js	.LBB29_11
+.LBB29_12:
+	movl	%edx, 4(%eax)
+	jns	.LBB29_14
+.LBB29_13:
+	movl	20(%esp), %ecx                  # 4-byte Reload
+.LBB29_14:
+	movl	%ecx, (%eax)
+	addl	$100, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end78:
-	.size	mcl_fpDbl_add5L, .Lfunc_end78-mcl_fpDbl_add5L
-
-	.globl	mcl_fpDbl_sub5L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub5L,@function
-mcl_fpDbl_sub5L:                        # @mcl_fpDbl_sub5L
-# BB#0:
+.LBB29_1:
+	movl	%ebp, %edi
+	movl	120(%esp), %eax
+	movl	%edi, 24(%eax)
+	jns	.LBB29_4
+.LBB29_3:
+	movl	(%esp), %ebx                    # 4-byte Reload
+	movl	%ebx, 20(%eax)
+	jns	.LBB29_6
+.LBB29_5:
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	jns	.LBB29_8
+.LBB29_7:
+	movl	76(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 12(%eax)
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB29_10
+.LBB29_9:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%eax)
+	movl	72(%esp), %edx                  # 4-byte Reload
+	jns	.LBB29_12
+.LBB29_11:
+	movl	16(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	js	.LBB29_13
+	jmp	.LBB29_14
+.Lfunc_end29:
+	.size	mcl_fp_montRedNF7L, .Lfunc_end29-mcl_fp_montRedNF7L
+                                        # -- End function
+	.globl	mcl_fp_addPre7L                 # -- Begin function mcl_fp_addPre7L
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre7L,@function
+mcl_fp_addPre7L:                        # @mcl_fp_addPre7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$16, %esp
-	movl	40(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %edi
-	movl	44(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%eax), %ebx
-	sbbl	8(%edx), %ebx
-	movl	36(%esp), %ecx
-	movl	%esi, (%ecx)
-	movl	12(%eax), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ecx)
-	movl	16(%eax), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ecx)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	24(%edx), %esi
-	movl	%edi, 16(%ecx)
-	movl	24(%eax), %ebp
-	sbbl	%esi, %ebp
-	movl	28(%edx), %esi
-	movl	28(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	movl	32(%edx), %esi
-	movl	32(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	36(%edx), %edx
-	movl	36(%eax), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	$0, %edx
-	sbbl	$0, %edx
-	andl	$1, %edx
-	movl	48(%esp), %ebx
-	jne	.LBB79_1
-# BB#2:
-	xorl	%eax, %eax
-	jmp	.LBB79_3
-.LBB79_1:
-	movl	16(%ebx), %eax
-.LBB79_3:
-	testb	%dl, %dl
-	jne	.LBB79_4
-# BB#5:
-	movl	$0, %edx
-	movl	$0, %esi
-	jmp	.LBB79_6
-.LBB79_4:
-	movl	(%ebx), %esi
-	movl	4(%ebx), %edx
-.LBB79_6:
-	jne	.LBB79_7
-# BB#8:
-	movl	$0, %edi
-	jmp	.LBB79_9
-.LBB79_7:
-	movl	12(%ebx), %edi
-.LBB79_9:
-	jne	.LBB79_10
-# BB#11:
-	xorl	%ebx, %ebx
-	jmp	.LBB79_12
-.LBB79_10:
-	movl	8(%ebx), %ebx
-.LBB79_12:
-	addl	4(%esp), %esi           # 4-byte Folded Reload
-	adcl	%ebp, %edx
-	movl	%esi, 20(%ecx)
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	movl	%edx, 24(%ecx)
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%ebx, 28(%ecx)
-	movl	%edi, 32(%ecx)
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	addl	$16, %esp
+	pushl	%eax
+	movl	28(%esp), %eax
+	movl	(%eax), %ecx
+	movl	4(%eax), %edx
+	movl	32(%esp), %esi
+	addl	(%esi), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	4(%esi), %edx
+	movl	24(%eax), %edi
+	movl	20(%eax), %ebx
+	movl	16(%eax), %ebp
+	movl	12(%eax), %ecx
+	movl	8(%eax), %eax
+	adcl	8(%esi), %eax
+	adcl	12(%esi), %ecx
+	adcl	16(%esi), %ebp
+	adcl	20(%esi), %ebx
+	adcl	24(%esi), %edi
+	movl	24(%esp), %esi
+	movl	%ebx, 20(%esi)
+	movl	%ebp, 16(%esi)
+	movl	%ecx, 12(%esi)
+	movl	%eax, 8(%esi)
+	movl	%edi, 24(%esi)
+	movl	%edx, 4(%esi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, (%esi)
+	setb	%al
+	movzbl	%al, %eax
+	addl	$4, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end79:
-	.size	mcl_fpDbl_sub5L, .Lfunc_end79-mcl_fpDbl_sub5L
-
-	.globl	mcl_fp_mulUnitPre6L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre6L,@function
-mcl_fp_mulUnitPre6L:                    # @mcl_fp_mulUnitPre6L
-# BB#0:
+.Lfunc_end30:
+	.size	mcl_fp_addPre7L, .Lfunc_end30-mcl_fp_addPre7L
+                                        # -- End function
+	.globl	mcl_fp_subPre7L                 # -- Begin function mcl_fp_subPre7L
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre7L,@function
+mcl_fp_subPre7L:                        # @mcl_fp_subPre7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$28, %esp
-	movl	56(%esp), %ebx
-	movl	52(%esp), %edi
-	movl	%ebx, %eax
-	mull	20(%edi)
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	16(%edi)
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	12(%edi)
-	movl	%edx, %esi
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	8(%edi)
-	movl	%edx, %ebp
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	4(%edi)
-	movl	%edx, %ecx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebx, %eax
-	mull	(%edi)
-	movl	48(%esp), %edi
-	movl	%eax, (%edi)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%edi)
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%edi)
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 12(%edi)
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%edi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%edi)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 24(%edi)
-	addl	$28, %esp
-	popl	%esi
-	popl	%edi
+	subl	$8, %esp
+	movl	32(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	4(%ecx), %esi
+	xorl	%eax, %eax
+	movl	36(%esp), %edi
+	subl	(%edi), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	sbbl	4(%edi), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	24(%ecx), %ebx
+	movl	20(%ecx), %ebp
+	movl	16(%ecx), %edx
+	movl	12(%ecx), %esi
+	movl	8(%ecx), %ecx
+	sbbl	8(%edi), %ecx
+	sbbl	12(%edi), %esi
+	sbbl	16(%edi), %edx
+	sbbl	20(%edi), %ebp
+	sbbl	24(%edi), %ebx
+	movl	28(%esp), %edi
+	movl	%ebp, 20(%edi)
+	movl	%edx, 16(%edi)
+	movl	%esi, 12(%edi)
+	movl	%ecx, 8(%edi)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	movl	%ebx, 24(%edi)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, (%edi)
+	sbbl	%eax, %eax
+	andl	$1, %eax
+	addl	$8, %esp
+	popl	%esi
+	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end80:
-	.size	mcl_fp_mulUnitPre6L, .Lfunc_end80-mcl_fp_mulUnitPre6L
-
-	.globl	mcl_fpDbl_mulPre6L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre6L,@function
-mcl_fpDbl_mulPre6L:                     # @mcl_fpDbl_mulPre6L
-# BB#0:
+.Lfunc_end31:
+	.size	mcl_fp_subPre7L, .Lfunc_end31-mcl_fp_subPre7L
+                                        # -- End function
+	.globl	mcl_fp_shr1_7L                  # -- Begin function mcl_fp_shr1_7L
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_7L,@function
+mcl_fp_shr1_7L:                         # @mcl_fp_shr1_7L
+# %bb.0:
+	pushl	%esi
+	movl	12(%esp), %eax
+	movl	24(%eax), %ecx
+	movl	%ecx, %edx
+	shrl	%edx
+	movl	8(%esp), %esi
+	movl	%edx, 24(%esi)
+	movl	20(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 20(%esi)
+	movl	16(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 16(%esi)
+	movl	12(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 12(%esi)
+	movl	8(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 8(%esi)
+	movl	4(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 4(%esi)
+	movl	(%eax), %eax
+	shrdl	$1, %edx, %eax
+	movl	%eax, (%esi)
+	popl	%esi
+	retl
+.Lfunc_end32:
+	.size	mcl_fp_shr1_7L, .Lfunc_end32-mcl_fp_shr1_7L
+                                        # -- End function
+	.globl	mcl_fp_add7L                    # -- Begin function mcl_fp_add7L
+	.p2align	4, 0x90
+	.type	mcl_fp_add7L,@function
+mcl_fp_add7L:                           # @mcl_fp_add7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$84, %esp
-	movl	108(%esp), %esi
-	movl	(%esi), %ebp
-	movl	112(%esp), %eax
-	movl	(%eax), %edi
-	movl	%ebp, %eax
-	mull	%edi
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx
-	movl	%eax, (%edx)
-	movl	4(%esi), %ebx
-	movl	8(%esi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	20(%esi), %ecx
-	movl	112(%esp), %eax
+	subl	$20, %esp
+	movl	44(%esp), %eax
+	movl	(%eax), %ebp
 	movl	4(%eax), %esi
-	movl	%ebx, %eax
-	mull	%esi
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%esi
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%edi
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%ecx, %eax
-	mull	%esi
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, %eax
-	mull	%esi
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, %ecx
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, %esi
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%edi
-	movl	%eax, %ebx
-	movl	%edx, %edi
-	addl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	adcl	76(%esp), %esi          # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	104(%esp), %eax
-	movl	%ebp, 4(%eax)
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%ecx, %eax
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, %ecx
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	addl	60(%esp), %ebx          # 4-byte Folded Reload
-	adcl	72(%esp), %edi          # 4-byte Folded Reload
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	108(%esp), %ebp
-	movl	20(%ebp), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	8(%eax), %ecx
-	movl	%edx, %eax
-	mull	%ecx
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	16(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	12(%ebp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	8(%ebp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	(%ebp), %esi
-	movl	4(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	mull	%ecx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%esi, %eax
-	mull	%ecx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	addl	%ebx, %eax
-	movl	104(%esp), %ecx
-	movl	%eax, 8(%ecx)
-	adcl	%edi, %ebp
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 72(%esp)          # 4-byte Folded Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 76(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %ebx          # 4-byte Reload
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 80(%esp)          # 4-byte Folded Spill
-	sbbl	%edi, %edi
-	movl	112(%esp), %eax
+	movl	48(%esp), %ecx
+	addl	(%ecx), %ebp
+	adcl	4(%ecx), %esi
+	movl	24(%eax), %edi
+	movl	20(%eax), %ebx
+	movl	16(%eax), %edx
 	movl	12(%eax), %ecx
-	movl	20(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	8(%esp), %eax           # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ecx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	andl	$1, %edi
-	addl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, %ecx
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	104(%esp), %ebx
-	movl	%ebp, 12(%ebx)
-	movl	%esi, %ebx
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edx, %esi
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	addl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 72(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax
-	movl	%eax, %ecx
-	movl	20(%ecx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	16(%ecx), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	12(%ecx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	8(%ecx), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	(%ecx), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	4(%ecx), %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	movl	112(%esp), %esi
-	movl	16(%esi), %ecx
-	mull	%ecx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	%ecx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebx, %eax
-	mull	%ecx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	%ecx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	addl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	104(%esp), %ecx
-	movl	%eax, 16(%ecx)
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 64(%esp)          # 4-byte Folded Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	20(%eax), %ecx
-	sbbl	%esi, %esi
-	movl	20(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	4(%esp), %eax           # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	andl	$1, %esi
-	addl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	8(%esp), %ecx           # 4-byte Reload
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	104(%esp), %edx
-	movl	%ebp, 20(%edx)
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %edx
-	movl	%ecx, %ebp
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	60(%esp), %ebx          # 4-byte Folded Reload
-	adcl	72(%esp), %edi          # 4-byte Folded Reload
-	movl	104(%esp), %ecx
-	movl	%ebx, 24(%ecx)
-	movl	%edx, %ebx
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 28(%ecx)
-	movl	%ebp, %edx
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, 32(%ecx)
-	adcl	76(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 36(%ecx)
-	movl	%esi, 40(%ecx)
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	addl	$84, %esp
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	8(%eax), %eax
+	movl	48(%esp), %ecx
+	adcl	8(%ecx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	12(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	16(%ecx), %edx
+	adcl	20(%ecx), %ebx
+	adcl	24(%ecx), %edi
+	movl	40(%esp), %ecx
+	movl	%edi, 24(%ecx)
+	movl	%ebx, 20(%ecx)
+	movl	%edx, 16(%ecx)
+	movl	%eax, 12(%ecx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ecx)
+	movl	%esi, 4(%ecx)
+	movl	%ebp, (%ecx)
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	52(%esp), %ecx
+	subl	(%ecx), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	sbbl	4(%ecx), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	sbbl	8(%ecx), %eax
+	sbbl	12(%ecx), %ebp
+	sbbl	16(%ecx), %edx
+	sbbl	20(%ecx), %ebx
+	sbbl	24(%ecx), %edi
+	movzbl	3(%esp), %ecx                   # 1-byte Folded Reload
+	sbbl	$0, %ecx
+	testb	$1, %cl
+	jne	.LBB33_2
+# %bb.1:                                # %nocarry
+	movl	%ebp, %esi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	40(%esp), %ebp
+	movl	%ecx, (%ebp)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 4(%ebp)
+	movl	%eax, 8(%ebp)
+	movl	%esi, 12(%ebp)
+	movl	%edx, 16(%ebp)
+	movl	%ebx, 20(%ebp)
+	movl	%edi, 24(%ebp)
+.LBB33_2:                               # %carry
+	addl	$20, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end81:
-	.size	mcl_fpDbl_mulPre6L, .Lfunc_end81-mcl_fpDbl_mulPre6L
-
-	.globl	mcl_fpDbl_sqrPre6L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre6L,@function
-mcl_fpDbl_sqrPre6L:                     # @mcl_fpDbl_sqrPre6L
-# BB#0:
+.Lfunc_end33:
+	.size	mcl_fp_add7L, .Lfunc_end33-mcl_fp_add7L
+                                        # -- End function
+	.globl	mcl_fp_addNF7L                  # -- Begin function mcl_fp_addNF7L
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF7L,@function
+mcl_fp_addNF7L:                         # @mcl_fp_addNF7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$100, %esp
-	movl	124(%esp), %esi
-	movl	20(%esi), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	(%esi), %ebp
-	movl	4(%esi), %ebx
-	mull	%ebx
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	16(%esi), %ecx
-	movl	%ecx, %eax
-	mull	%ebx
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	12(%esi), %edi
-	movl	%edi, %eax
-	mull	%ebx
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	8(%eax), %esi
-	movl	%esi, %eax
-	mull	%ebx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	mull	%ebp
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%ebp
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	%ebp
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ebp
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	%ebx, %eax
-	mull	%ebx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ebp
-	movl	%edx, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebp, %eax
-	mull	%ebp
-	movl	120(%esp), %ebx
-	movl	%eax, (%ebx)
-	addl	%edi, %edx
-	adcl	%esi, %ecx
-	movl	%ecx, %ebx
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	addl	%edi, %edx
-	movl	120(%esp), %edi
-	movl	%edx, 4(%edi)
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%esi, %edx
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %edi
-	adcl	72(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%ebx, %esi
-	addl	32(%esp), %esi          # 4-byte Folded Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	76(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	adcl	80(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, %edi
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	124(%esp), %ebx
-	movl	20(%ebx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	8(%ebx), %ebp
-	mull	%ebp
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	16(%ebx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	mull	%ebp
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	12(%ebx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mull	%ebp
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	(%ebx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	4(%ebx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mull	%ebp
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%ebp
-	movl	%edx, 32(%esp)          # 4-byte Spill
+	subl	$40, %esp
+	movl	68(%esp), %ecx
+	movl	(%ecx), %ebx
+	movl	4(%ecx), %ebp
+	movl	64(%esp), %esi
+	addl	(%esi), %ebx
+	adcl	4(%esi), %ebp
+	movl	24(%ecx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%ecx), %edx
+	movl	16(%ecx), %edi
+	movl	12(%ecx), %eax
+	movl	8(%ecx), %ecx
+	adcl	8(%esi), %ecx
+	adcl	12(%esi), %eax
+	adcl	16(%esi), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	adcl	20(%esi), %edx
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	24(%esi), %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	72(%esp), %esi
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	subl	(%esi), %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	sbbl	4(%esi), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	8(%esi), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
 	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	%ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	(%esp), %edi                    # 4-byte Reload
+	sbbl	12(%esi), %ebx
 	movl	%eax, %ebp
-	addl	%esi, %ebx
-	movl	120(%esp), %eax
-	movl	%ebx, 8(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	88(%esp), %ecx          # 4-byte Folded Reload
-	adcl	92(%esp), %ebp          # 4-byte Folded Reload
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	80(%esp), %ebx          # 4-byte Reload
-	adcl	%edi, %ebx
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	sbbl	%edi, %edi
-	andl	$1, %edi
-	addl	32(%esp), %ecx          # 4-byte Folded Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	adcl	%edx, %eax
+	sbbl	16(%esi), %ebp
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	%edx, %ecx
+	sbbl	20(%esi), %ecx
+	movl	%edi, %edx
+	sbbl	24(%esi), %edx
+	movl	%edx, %esi
+	sarl	$31, %esi
+	testl	%esi, %esi
+	js	.LBB34_1
+# %bb.2:
+	movl	60(%esp), %edi
+	movl	%edx, 24(%edi)
+	js	.LBB34_3
+.LBB34_4:
+	movl	%ecx, 20(%edi)
+	movl	28(%esp), %edx                  # 4-byte Reload
+	js	.LBB34_5
+.LBB34_6:
+	movl	%ebp, 16(%edi)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	js	.LBB34_7
+.LBB34_8:
+	movl	%ebx, 12(%edi)
+	js	.LBB34_9
+.LBB34_10:
+	movl	%edx, 8(%edi)
+	js	.LBB34_11
+.LBB34_12:
+	movl	%ecx, 4(%edi)
+	jns	.LBB34_14
+.LBB34_13:
+	movl	20(%esp), %eax                  # 4-byte Reload
+.LBB34_14:
+	movl	%eax, (%edi)
+	addl	$40, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.LBB34_1:
+	movl	%edi, %edx
+	movl	60(%esp), %edi
+	movl	%edx, 24(%edi)
+	jns	.LBB34_4
+.LBB34_3:
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 20(%edi)
+	movl	28(%esp), %edx                  # 4-byte Reload
+	jns	.LBB34_6
+.LBB34_5:
 	movl	%eax, %ebp
-	adcl	76(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 80(%esp)          # 4-byte Spill
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	movl	36(%esp), %edi          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	56(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%edi, %eax
-	mull	%edi
-	movl	%eax, %edi
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	addl	%ecx, %esi
-	movl	120(%esp), %eax
-	movl	%esi, 12(%eax)
-	adcl	96(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 72(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 88(%esp)          # 4-byte Folded Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 92(%esp)          # 4-byte Folded Spill
+	movl	%ebp, 16(%edi)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB34_8
+.LBB34_7:
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 12(%edi)
+	jns	.LBB34_10
+.LBB34_9:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%edi)
+	jns	.LBB34_12
+.LBB34_11:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	js	.LBB34_13
+	jmp	.LBB34_14
+.Lfunc_end34:
+	.size	mcl_fp_addNF7L, .Lfunc_end34-mcl_fp_addNF7L
+                                        # -- End function
+	.globl	mcl_fp_sub7L                    # -- Begin function mcl_fp_sub7L
+	.p2align	4, 0x90
+	.type	mcl_fp_sub7L,@function
+mcl_fp_sub7L:                           # @mcl_fp_sub7L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$24, %esp
+	movl	48(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %edi
+	movl	52(%esp), %eax
+	subl	(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	sbbl	4(%eax), %edi
+	movl	24(%edx), %esi
+	movl	20(%edx), %ecx
+	movl	16(%edx), %ebp
+	movl	12(%edx), %ebx
+	movl	8(%edx), %edx
+	sbbl	8(%eax), %edx
+	sbbl	12(%eax), %ebx
+	sbbl	16(%eax), %ebp
+	sbbl	20(%eax), %ecx
+	sbbl	24(%eax), %esi
+	movl	$0, %eax
 	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	124(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	4(%ecx), %edi
-	movl	20(%ecx), %ebp
-	movl	%edi, %eax
-	mull	%ebp
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ebp
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	16(%ecx), %esi
-	movl	%edi, %eax
-	mull	%esi
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%esi
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	%eax, 72(%esp)          # 4-byte Folded Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 96(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 88(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 92(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	12(%eax), %edi
-	movl	8(%eax), %ebx
-	movl	%edi, %eax
-	mull	%ebp
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	%esi
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ebp
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%esi
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebp, %eax
-	mull	%esi
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%ebp
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%esi
-	movl	%eax, %ebx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	addl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	120(%esp), %ebp
-	movl	%eax, 16(%ebp)
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	adcl	%ecx, %edi
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	88(%esp), %ecx          # 4-byte Folded Reload
-	adcl	92(%esp), %ebx          # 4-byte Folded Reload
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	addl	28(%esp), %eax          # 4-byte Folded Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	adcl	80(%esp), %esi          # 4-byte Folded Reload
-	addl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	120(%esp), %ebp
-	movl	%eax, 20(%ebp)
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edx, %eax
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	addl	64(%esp), %edi          # 4-byte Folded Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	120(%esp), %ebp
-	movl	%edi, 24(%ebp)
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ecx, 28(%ebp)
-	movl	%eax, %edi
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 32(%ebp)
-	adcl	80(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 36(%ebp)
-	movl	%esi, 40(%ebp)
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%ebp)
-	addl	$100, %esp
+	testb	$1, %al
+	movl	44(%esp), %eax
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	%esi, 24(%eax)
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	%ecx, 20(%eax)
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	%ebp, 16(%eax)
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	%ebx, 12(%eax)
+	movl	%edx, 8(%eax)
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	%edi, 4(%eax)
+	movl	4(%esp), %esi                   # 4-byte Reload
+	movl	%esi, (%eax)
+	je	.LBB35_2
+# %bb.1:                                # %carry
+	movl	%esi, %ecx
+	movl	56(%esp), %esi
+	addl	(%esi), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	4(%esi), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	8(%esi), %edx
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	adcl	12(%esi), %ebx
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	16(%esi), %ebp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	20(%esi), %ecx
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	24(%esi), %edi
+	movl	%edi, 24(%eax)
+	movl	%ecx, 20(%eax)
+	movl	%ebp, 16(%eax)
+	movl	%ebx, 12(%eax)
+	movl	%edx, 8(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, (%eax)
+.LBB35_2:                               # %nocarry
+	addl	$24, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end82:
-	.size	mcl_fpDbl_sqrPre6L, .Lfunc_end82-mcl_fpDbl_sqrPre6L
-
-	.globl	mcl_fp_mont6L
-	.align	16, 0x90
-	.type	mcl_fp_mont6L,@function
-mcl_fp_mont6L:                          # @mcl_fp_mont6L
-# BB#0:
+.Lfunc_end35:
+	.size	mcl_fp_sub7L, .Lfunc_end35-mcl_fp_sub7L
+                                        # -- End function
+	.globl	mcl_fp_subNF7L                  # -- Begin function mcl_fp_subNF7L
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF7L,@function
+mcl_fp_subNF7L:                         # @mcl_fp_subNF7L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$132, %esp
-	movl	156(%esp), %edi
-	movl	(%edi), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	160(%esp), %ecx
-	movl	(%ecx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	164(%esp), %edx
-	movl	-4(%edx), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	%eax, %ebp
-	imull	%ecx, %ebp
-	movl	(%edx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	20(%edx), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	16(%edx), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	12(%edx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	8(%edx), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	4(%edx), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	4(%edi), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	movl	20(%eax), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	16(%eax), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
+	subl	$32, %esp
+	movl	56(%esp), %eax
+	movl	(%eax), %esi
+	movl	4(%eax), %edx
+	movl	60(%esp), %ecx
+	subl	(%ecx), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	sbbl	4(%ecx), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	24(%eax), %edx
+	movl	20(%eax), %esi
+	movl	16(%eax), %edi
 	movl	12(%eax), %ebx
-	movl	%ebx, 80(%esp)          # 4-byte Spill
 	movl	8(%eax), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%esi
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	movl	40(%esp), %ebp          # 4-byte Reload
-	mull	%ebp
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%ebp
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ebp
-	movl	%edx, %ebx
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	mull	%ebp
-	movl	%ebp, %ecx
-	movl	%edx, %ebp
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	mull	%ecx
-	movl	%eax, %edi
-	addl	64(%esp), %edi          # 4-byte Folded Reload
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 72(%esp)          # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	addl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	movl	20(%esp), %ebx          # 4-byte Reload
-	addl	68(%esp), %ebx          # 4-byte Folded Reload
-	adcl	%edi, 40(%esp)          # 4-byte Folded Spill
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	4(%eax), %ecx
-	movl	%ecx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ecx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, %esi
+	sbbl	8(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	sbbl	12(%ecx), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	sbbl	16(%ecx), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	sbbl	20(%ecx), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	sbbl	24(%ecx), %eax
+	movl	%eax, %ecx
+	movl	%eax, %edx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	sarl	$31, %ecx
 	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	addl	%esi, %edx
-	movl	%edx, %esi
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	%edi, %ebx
-	movl	%ebx, %edi
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	64(%esp), %ebx          # 4-byte Reload
-	addl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%ebx, %esi
-	imull	112(%esp), %esi         # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
+	shldl	$1, %edx, %eax
+	movl	64(%esp), %edx
+	andl	(%edx), %eax
+	movl	24(%edx), %esi
+	andl	%ecx, %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	20(%edx), %ebx
+	andl	%ecx, %ebx
+	movl	16(%edx), %edi
+	andl	%ecx, %edi
+	movl	12(%edx), %esi
+	andl	%ecx, %esi
+	movl	64(%esp), %edx
+	movl	8(%edx), %edx
+	andl	%ecx, %edx
+	movl	64(%esp), %ebp
+	andl	4(%ebp), %ecx
+	addl	20(%esp), %eax                  # 4-byte Folded Reload
+	adcl	24(%esp), %ecx                  # 4-byte Folded Reload
+	movl	52(%esp), %ebp
+	movl	%eax, (%ebp)
+	adcl	4(%esp), %edx                   # 4-byte Folded Reload
+	movl	%ecx, 4(%ebp)
+	adcl	12(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 8(%ebp)
+	adcl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%esi, 12(%ebp)
+	adcl	28(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%edi, 16(%ebp)
+	movl	%ebx, 20(%ebp)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%ecx, 24(%ebp)
+	addl	$32, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.Lfunc_end36:
+	.size	mcl_fp_subNF7L, .Lfunc_end36-mcl_fp_subNF7L
+                                        # -- End function
+	.globl	mcl_fpDbl_add7L                 # -- Begin function mcl_fpDbl_add7L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add7L,@function
+mcl_fpDbl_add7L:                        # @mcl_fpDbl_add7L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$44, %esp
+	movl	68(%esp), %eax
+	movl	(%eax), %ecx
+	movl	4(%eax), %edx
+	movl	72(%esp), %esi
+	addl	(%esi), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	4(%esi), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	52(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%eax), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	44(%eax), %ebx
+	movl	40(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	28(%eax), %ebp
+	movl	24(%eax), %edi
+	movl	20(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	16(%eax), %edx
+	movl	12(%eax), %ecx
+	movl	8(%eax), %eax
+	adcl	8(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	12(%esi), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	16(%esi), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	20(%esi), %ecx
+	adcl	24(%esi), %edi
+	movl	%edi, %edx
+	adcl	28(%esi), %ebp
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	32(%esi), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	36(%esi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	40(%esi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	44(%esi), %ebx
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	48(%esi), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	52(%esi), %ebx
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	movl	64(%esp), %ebx
+	movl	%edx, 24(%ebx)
+	movl	%ecx, 20(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%ebx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebx)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%ebx)
+	setb	24(%esp)                        # 1-byte Folded Spill
+	movl	76(%esp), %edx
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	subl	(%edx), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	sbbl	4(%edx), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	sbbl	8(%edx), %esi
+	movl	12(%esp), %edi                  # 4-byte Reload
+	sbbl	12(%edx), %edi
+	movl	(%esp), %ebp                    # 4-byte Reload
+	sbbl	16(%edx), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	20(%edx), %eax
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	sbbl	24(%edx), %ecx
+	movzbl	24(%esp), %edx                  # 1-byte Folded Reload
+	sbbl	$0, %edx
+	testb	$1, %dl
+	jne	.LBB37_1
+# %bb.2:
+	movl	%ecx, 52(%ebx)
+	jne	.LBB37_3
+.LBB37_4:
+	movl	%eax, 48(%ebx)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB37_5
+.LBB37_6:
+	movl	%ebp, 44(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	jne	.LBB37_7
+.LBB37_8:
+	movl	%edi, 40(%ebx)
+	jne	.LBB37_9
+.LBB37_10:
+	movl	%esi, 36(%ebx)
+	jne	.LBB37_11
+.LBB37_12:
+	movl	%ecx, 32(%ebx)
+	je	.LBB37_14
+.LBB37_13:
+	movl	20(%esp), %eax                  # 4-byte Reload
+.LBB37_14:
+	movl	%eax, 28(%ebx)
+	addl	$44, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.LBB37_1:
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 52(%ebx)
+	je	.LBB37_4
+.LBB37_3:
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 48(%ebx)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	je	.LBB37_6
+.LBB37_5:
+	movl	(%esp), %ebp                    # 4-byte Reload
+	movl	%ebp, 44(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	je	.LBB37_8
+.LBB37_7:
+	movl	12(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 40(%ebx)
+	je	.LBB37_10
+.LBB37_9:
+	movl	16(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 36(%ebx)
+	je	.LBB37_12
+.LBB37_11:
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 32(%ebx)
+	jne	.LBB37_13
+	jmp	.LBB37_14
+.Lfunc_end37:
+	.size	mcl_fpDbl_add7L, .Lfunc_end37-mcl_fpDbl_add7L
+                                        # -- End function
+	.globl	mcl_fpDbl_sub7L                 # -- Begin function mcl_fpDbl_sub7L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub7L,@function
+mcl_fpDbl_sub7L:                        # @mcl_fpDbl_sub7L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$44, %esp
+	movl	68(%esp), %eax
+	movl	(%eax), %edx
+	movl	4(%eax), %edi
+	xorl	%esi, %esi
+	movl	72(%esp), %ecx
+	subl	(%ecx), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	sbbl	4(%ecx), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	52(%eax), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	48(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	44(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	40(%eax), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	36(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	32(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	28(%eax), %edi
+	movl	24(%eax), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	20(%eax), %edx
+	movl	16(%eax), %ebx
+	movl	12(%eax), %ebp
+	movl	8(%eax), %eax
+	sbbl	8(%ecx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	sbbl	12(%ecx), %ebp
+	sbbl	16(%ecx), %ebx
+	sbbl	20(%ecx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	sbbl	24(%ecx), %edx
+	sbbl	28(%ecx), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	32(%ecx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ecx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	44(%ecx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	48(%ecx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	52(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	64(%esp), %ecx
+	movl	%edx, 24(%ecx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%ecx)
+	movl	%ebx, 16(%ecx)
+	movl	%ebp, 12(%ecx)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ecx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, (%ecx)
+	sbbl	%esi, %esi
+	andl	$1, %esi
+	negl	%esi
+	movl	76(%esp), %ecx
+	movl	24(%ecx), %eax
+	andl	%esi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%ecx), %edx
+	andl	%esi, %edx
+	movl	16(%ecx), %ebx
+	andl	%esi, %ebx
+	movl	12(%ecx), %ebp
+	andl	%esi, %ebp
+	movl	8(%ecx), %ecx
+	andl	%esi, %ecx
+	movl	76(%esp), %eax
+	movl	4(%eax), %eax
+	andl	%esi, %eax
+	movl	76(%esp), %edi
+	andl	(%edi), %esi
+	addl	4(%esp), %esi                   # 4-byte Folded Reload
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	64(%esp), %edi
+	movl	%esi, 28(%edi)
+	adcl	16(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 32(%edi)
+	adcl	20(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ecx, 36(%edi)
+	adcl	24(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebp, 40(%edi)
+	adcl	28(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ebx, 44(%edi)
+	movl	%edx, 48(%edi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
+	movl	%eax, 52(%edi)
+	addl	$44, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.Lfunc_end38:
+	.size	mcl_fpDbl_sub7L, .Lfunc_end38-mcl_fpDbl_sub7L
+                                        # -- End function
+	.globl	mulPv256x32                     # -- Begin function mulPv256x32
+	.p2align	4, 0x90
+	.type	mulPv256x32,@function
+mulPv256x32:                            # @mulPv256x32
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$44, %esp
+	movl	72(%esp), %esi
+	movl	68(%esp), %ecx
 	movl	%esi, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
+	mull	28(%ecx)
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
+	mull	24(%ecx)
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
+	mull	20(%ecx)
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, %ecx
+	mull	16(%ecx)
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
 	movl	%esi, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%eax, %edi
+	mull	12(%ecx)
 	movl	%edx, %ebx
-	addl	%ecx, %ebx
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	64(%esp), %edi          # 4-byte Folded Reload
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	76(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	160(%esp), %eax
-	movl	8(%eax), %ecx
-	movl	%ecx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	addl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	adcl	$0, %edi
-	addl	%ebx, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	%ebp, 44(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 40(%esp)          # 4-byte Folded Spill
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	movl	%eax, %esi
-	imull	112(%esp), %esi         # 4-byte Folded Reload
-	andl	$1, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
+	movl	%eax, 8(%esp)                   # 4-byte Spill
 	movl	%esi, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 20(%esp)          # 4-byte Spill
+	mull	8(%ecx)
+	movl	%edx, %edi
+	movl	%eax, 4(%esp)                   # 4-byte Spill
 	movl	%esi, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
+	mull	4(%ecx)
 	movl	%edx, %ebp
-	movl	%eax, %edi
-	movl	%esi, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%eax, %esi
-	movl	%edx, %ebx
-	addl	%edi, %ebx
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ecx, %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	32(%esp), %esi          # 4-byte Folded Reload
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	72(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	160(%esp), %eax
-	movl	12(%eax), %edi
-	movl	%edi, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	addl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	%ebx, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	%ebp, 52(%esp)          # 4-byte Folded Spill
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 48(%esp)          # 4-byte Folded Spill
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	movl	%eax, %esi
-	imull	112(%esp), %esi         # 4-byte Folded Reload
-	andl	$1, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
 	movl	%esi, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, %edi
+	mull	(%ecx)
+	movl	64(%esp), %esi
+	movl	%eax, (%esi)
+	addl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 4(%esi)
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 8(%esi)
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 12(%esi)
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 16(%esi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	20(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 20(%esi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%esi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 32(%esi)
 	movl	%esi, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%eax, %esi
-	movl	%edx, %ebx
-	addl	%edi, %ebx
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ecx, %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	32(%esp), %esi          # 4-byte Folded Reload
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	72(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	160(%esp), %eax
-	movl	16(%eax), %edi
-	movl	%edi, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	addl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %esi
-	addl	%ebx, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	%ebp, 48(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 44(%esp)          # 4-byte Folded Spill
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	movl	%eax, %esi
-	imull	112(%esp), %esi         # 4-byte Folded Reload
-	andl	$1, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
+	addl	$44, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl	$4
+.Lfunc_end39:
+	.size	mulPv256x32, .Lfunc_end39-mulPv256x32
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre8L             # -- Begin function mcl_fp_mulUnitPre8L
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre8L,@function
+mcl_fp_mulUnitPre8L:                    # @mcl_fp_mulUnitPre8L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$60, %esp
+	calll	.L40$pb
+.L40$pb:
+	popl	%ebx
+.Ltmp2:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L40$pb), %ebx
+	subl	$4, %esp
+	movl	92(%esp), %eax
+	movl	88(%esp), %ecx
+	leal	28(%esp), %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	24(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi
+	movl	40(%esp), %edi
+	movl	44(%esp), %ebx
+	movl	48(%esp), %ebp
+	movl	52(%esp), %edx
+	movl	56(%esp), %ecx
+	movl	80(%esp), %eax
+	movl	%ecx, 32(%eax)
+	movl	%edx, 28(%eax)
+	movl	%ebp, 24(%eax)
+	movl	%ebx, 20(%eax)
+	movl	%edi, 16(%eax)
+	movl	%esi, 12(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%eax)
+	addl	$60, %esp
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+	retl
+.Lfunc_end40:
+	.size	mcl_fp_mulUnitPre8L, .Lfunc_end40-mcl_fp_mulUnitPre8L
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre8L              # -- Begin function mcl_fpDbl_mulPre8L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre8L,@function
+mcl_fpDbl_mulPre8L:                     # @mcl_fpDbl_mulPre8L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$348, %esp                      # imm = 0x15C
+	calll	.L41$pb
+.L41$pb:
+	popl	%ebx
+.Ltmp3:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L41$pb), %ebx
+	movl	376(%esp), %ecx
+	subl	$4, %esp
+	leal	316(%esp), %eax
+	pushl	(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	344(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	340(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	336(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	332(%esp), %esi
+	movl	328(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	324(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	320(%esp), %edi
+	movl	312(%esp), %eax
+	movl	316(%esp), %ebp
+	movl	368(%esp), %ecx
+	movl	%eax, (%ecx)
+	subl	$4, %esp
+	leal	276(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	4(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	304(%esp), %eax
+	movl	%ebp, %edx
+	addl	272(%esp), %edx
+	adcl	276(%esp), %edi
+	movl	%edi, %ebp
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	280(%esp), %edi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	284(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	288(%esp), %esi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	292(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	296(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	300(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 4(%ecx)
+	adcl	$0, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	236(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	8(%ecx)
+	movl	380(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	264(%esp), %eax
+	movl	%ebp, %edx
+	addl	232(%esp), %edx
+	adcl	236(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	240(%esp), %edi
+	adcl	244(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	248(%esp), %ebp
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	252(%esp), %esi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	256(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	260(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 8(%ecx)
+	adcl	$0, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	leal	196(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	224(%esp), %eax
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	192(%esp), %edx
+	adcl	196(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	200(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	204(%esp), %ebp
+	adcl	208(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	212(%esp), %esi
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	216(%esp), %edi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	220(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 12(%ecx)
+	adcl	$0, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	156(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	184(%esp), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	addl	152(%esp), %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	160(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	164(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	168(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	172(%esp), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	176(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	180(%esp), %edi
+	movl	368(%esp), %eax
+	movl	%ecx, 16(%eax)
+	adcl	$0, %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	20(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	144(%esp), %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	addl	112(%esp), %eax
+	movl	%ebp, %esi
+	adcl	116(%esp), %esi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	120(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	124(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	128(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	132(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	136(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	140(%esp), %edi
+	movl	368(%esp), %ecx
+	movl	%eax, 20(%ecx)
+	adcl	$0, %edx
 	movl	%edx, %ebp
-	movl	%eax, %edi
+	subl	$4, %esp
+	leal	76(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	24(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
 	movl	%esi, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%eax, %ecx
-	movl	%edx, %ebx
-	addl	%edi, %ebx
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	64(%esp), %edx          # 4-byte Reload
+	addl	72(%esp), %eax
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	76(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	80(%esp), %esi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	84(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	88(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	92(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	96(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	100(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	104(%esp), %edx
+	movl	368(%esp), %ecx
+	movl	%eax, 24(%ecx)
 	adcl	$0, %edx
-	addl	32(%esp), %ecx          # 4-byte Folded Reload
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	72(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	160(%esp), %eax
-	movl	20(%eax), %edi
-	movl	%edi, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	addl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	72(%esp), %edi          # 4-byte Folded Reload
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, %eax
-	adcl	$0, %eax
-	movl	100(%esp), %esi         # 4-byte Reload
-	addl	%ebx, %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	adcl	%ebp, 92(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 80(%esp)          # 4-byte Folded Spill
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	112(%esp), %ecx         # 4-byte Reload
-	imull	%esi, %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	andl	$1, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
 	movl	%edx, %edi
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%ecx, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	mull	104(%esp)               # 4-byte Folded Reload
-	addl	56(%esp), %eax          # 4-byte Folded Reload
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	adcl	$0, %edi
-	addl	100(%esp), %esi         # 4-byte Folded Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	adcl	96(%esp), %ecx          # 4-byte Folded Reload
-	adcl	88(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 112(%esp)         # 4-byte Spill
-	adcl	84(%esp), %ebp          # 4-byte Folded Reload
-	adcl	76(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
+	subl	$4, %esp
+	leal	36(%esp), %eax
+	movl	380(%esp), %ecx
+	pushl	28(%ecx)
+	pushl	380(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	addl	32(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	40(%esp), %ebp
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	44(%esp), %ebx
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	48(%esp), %esi
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	52(%esp), %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esp), %eax
+	adcl	60(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	64(%esp), %edi
+	movl	368(%esp), %ecx
+	movl	%eax, 52(%ecx)
+	movl	%edx, 48(%ecx)
+	movl	%esi, 44(%ecx)
+	movl	%ebx, 40(%ecx)
+	movl	%ebp, 36(%ecx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 56(%ecx)
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 28(%ecx)
 	adcl	$0, %edi
-	movl	%eax, %esi
-	subl	108(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	%edx, %esi
-	sbbl	104(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	%ecx, %esi
-	sbbl	116(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	%ebx, %esi
-	movl	%edi, %ebx
-	sbbl	120(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	%ebp, %esi
-	movl	%ebp, %edi
-	sbbl	124(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, %esi
-	sbbl	128(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB83_2
-# BB#1:
-	movl	104(%esp), %edx         # 4-byte Reload
-.LBB83_2:
-	testb	%bl, %bl
-	jne	.LBB83_4
-# BB#3:
-	movl	108(%esp), %eax         # 4-byte Reload
-.LBB83_4:
-	movl	152(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	%edx, 4(%ebx)
-	jne	.LBB83_6
-# BB#5:
-	movl	116(%esp), %ecx         # 4-byte Reload
-.LBB83_6:
-	movl	%ecx, 8(%ebx)
-	movl	112(%esp), %eax         # 4-byte Reload
-	jne	.LBB83_8
-# BB#7:
-	movl	120(%esp), %eax         # 4-byte Reload
-.LBB83_8:
-	movl	%eax, 12(%ebx)
-	jne	.LBB83_10
-# BB#9:
-	movl	124(%esp), %edi         # 4-byte Reload
-.LBB83_10:
-	movl	%edi, 16(%ebx)
-	jne	.LBB83_12
-# BB#11:
-	movl	128(%esp), %ebp         # 4-byte Reload
-.LBB83_12:
-	movl	%ebp, 20(%ebx)
-	addl	$132, %esp
+	movl	%edi, 60(%ecx)
+	addl	$348, %esp                      # imm = 0x15C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end83:
-	.size	mcl_fp_mont6L, .Lfunc_end83-mcl_fp_mont6L
-
-	.globl	mcl_fp_montNF6L
-	.align	16, 0x90
-	.type	mcl_fp_montNF6L,@function
-mcl_fp_montNF6L:                        # @mcl_fp_montNF6L
-# BB#0:
+.Lfunc_end41:
+	.size	mcl_fpDbl_mulPre8L, .Lfunc_end41-mcl_fpDbl_mulPre8L
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre8L              # -- Begin function mcl_fpDbl_sqrPre8L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre8L,@function
+mcl_fpDbl_sqrPre8L:                     # @mcl_fpDbl_sqrPre8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$132, %esp
-	movl	156(%esp), %ebx
-	movl	(%ebx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	160(%esp), %ecx
-	movl	(%ecx), %edi
-	mull	%edi
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	164(%esp), %esi
-	movl	-4(%esi), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	imull	%edx, %ecx
-	movl	(%esi), %edx
-	movl	%edx, 128(%esp)         # 4-byte Spill
-	movl	20(%esi), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	16(%esi), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	12(%esi), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	8(%esi), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	4(%esi), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	4(%ebx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	20(%ebx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	16(%ebx), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	12(%ebx), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	8(%ebx), %ebx
-	movl	%ebx, 88(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	mull	%edi
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%edi
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%edi
-	movl	%edx, %ecx
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%edi
-	movl	%edx, %ebx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	mull	%edi
-	movl	%edx, %ebp
-	movl	%eax, %esi
-	addl	64(%esp), %esi          # 4-byte Folded Reload
-	adcl	(%esp), %ebp            # 4-byte Folded Reload
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	12(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	16(%esp), %edi          # 4-byte Reload
-	addl	72(%esp), %edi          # 4-byte Folded Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	adcl	$0, %eax
-	addl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	4(%eax), %edi
-	movl	%edi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	addl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	%esi, 40(%esp)          # 4-byte Folded Spill
-	adcl	%ebp, 44(%esp)          # 4-byte Folded Spill
-	adcl	%ebx, 48(%esp)          # 4-byte Folded Spill
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %ebp
-	imull	96(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%ebp, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebp, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	addl	%ecx, %eax
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, %ebp
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	%edx, %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	8(%eax), %ecx
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ecx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, %esi
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	addl	%esi, %ebp
-	adcl	%edi, %ebx
-	movl	%ebx, %edi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
+	subl	$348, %esp                      # imm = 0x15C
+	calll	.L42$pb
+.L42$pb:
+	popl	%ebx
+.Ltmp4:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.L42$pb), %ebx
+	movl	372(%esp), %ecx
+	subl	$4, %esp
+	leal	316(%esp), %eax
+	pushl	(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	344(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	340(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	336(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	332(%esp), %ebp
+	movl	328(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	324(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	320(%esp), %edi
+	movl	312(%esp), %eax
+	movl	316(%esp), %esi
+	movl	368(%esp), %ecx
+	movl	%eax, (%ecx)
+	subl	$4, %esp
+	leal	276(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	4(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	304(%esp), %eax
+	movl	%esi, %edx
+	addl	272(%esp), %edx
+	adcl	276(%esp), %edi
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	280(%esp), %esi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	284(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	288(%esp), %ebp
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	292(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	296(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	300(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 4(%ecx)
 	adcl	$0, %eax
-	movl	24(%esp), %ebx          # 4-byte Reload
-	addl	40(%esp), %ebx          # 4-byte Folded Reload
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	236(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	264(%esp), %eax
+	movl	%edi, %edx
+	addl	232(%esp), %edx
+	adcl	236(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	240(%esp), %esi
+	adcl	244(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	248(%esp), %ebp
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	252(%esp), %edi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	256(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	260(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 8(%ecx)
 	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ebx, %edi
-	movl	%ebx, %ecx
-	imull	96(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%edi, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%edi, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	addl	%ecx, %eax
-	adcl	%ebp, %esi
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	leal	196(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	224(%esp), %eax
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	192(%esp), %edx
+	adcl	196(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	200(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	204(%esp), %ebp
+	adcl	208(%esp), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	212(%esp), %esi
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	216(%esp), %edi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	220(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	368(%esp), %ecx
+	movl	%edx, 12(%ecx)
 	adcl	$0, %eax
-	addl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	12(%eax), %ecx
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%ecx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	156(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	184(%esp), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	addl	152(%esp), %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	160(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	164(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	168(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	172(%esp), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	176(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	180(%esp), %edi
+	movl	368(%esp), %eax
+	movl	%ecx, 16(%eax)
+	adcl	$0, %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	20(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	144(%esp), %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	addl	112(%esp), %eax
+	movl	%ebp, %esi
+	adcl	116(%esp), %esi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	120(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	124(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	128(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	132(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	136(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	140(%esp), %edi
+	movl	368(%esp), %ecx
+	movl	%eax, 20(%ecx)
+	adcl	$0, %edx
 	movl	%edx, %ebp
-	addl	%ebx, %ebp
-	adcl	%esi, %edi
-	movl	%edi, %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	24(%esp), %edi          # 4-byte Reload
-	addl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	movl	%edi, %ecx
-	imull	96(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%esi, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
+	subl	$4, %esp
+	leal	76(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	24(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	72(%esp), %esi
 	movl	%esi, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	addl	%ecx, %eax
-	adcl	%ebp, %ebx
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	16(%eax), %ecx
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ecx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebp
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edx, %ebx
-	addl	%ebp, %ebx
-	adcl	%edi, %esi
-	movl	%esi, %edi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	24(%esp), %esi          # 4-byte Reload
-	addl	40(%esp), %esi          # 4-byte Folded Reload
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%esi, %ebx
-	movl	%esi, %ecx
-	imull	96(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%ebx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%ebx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	addl	%ecx, %eax
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	64(%esp), %ebx          # 4-byte Reload
-	adcl	%edi, %ebx
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	%edx, %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	20(%eax), %ecx
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %edi
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, %ebp
-	movl	%edx, %ebx
-	addl	%edi, %ebx
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %edi
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	addl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebx          # 4-byte Reload
-	imull	%ebp, %ebx
-	movl	%ebx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	%ebx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%ebx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%ebx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebx, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	%ebx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	addl	52(%esp), %edi          # 4-byte Folded Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	adcl	88(%esp), %esi          # 4-byte Folded Reload
-	adcl	84(%esp), %ebp          # 4-byte Folded Reload
-	movl	104(%esp), %ebx         # 4-byte Reload
-	adcl	100(%esp), %ebx         # 4-byte Folded Reload
-	movl	60(%esp), %edi          # 4-byte Reload
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	76(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	80(%esp), %esi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	84(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	88(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	92(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	96(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	100(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	104(%esp), %edx
+	movl	368(%esp), %ecx
+	movl	%eax, 24(%ecx)
+	adcl	$0, %edx
+	movl	%edx, %edi
+	subl	$4, %esp
+	leal	36(%esp), %eax
+	movl	376(%esp), %ecx
+	pushl	28(%ecx)
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	addl	32(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	36(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	40(%esp), %ebp
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	44(%esp), %ebx
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	48(%esp), %esi
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	52(%esp), %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esp), %eax
+	adcl	60(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	64(%esp), %edi
+	movl	368(%esp), %ecx
+	movl	%eax, 52(%ecx)
+	movl	%edx, 48(%ecx)
+	movl	%esi, 44(%ecx)
+	movl	%ebx, 40(%ecx)
+	movl	%ebp, 36(%ecx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 56(%ecx)
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 28(%ecx)
 	adcl	$0, %edi
-	addl	72(%esp), %eax          # 4-byte Folded Reload
-	adcl	%edx, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	adcl	92(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 104(%esp)         # 4-byte Spill
-	adcl	96(%esp), %edi          # 4-byte Folded Reload
-	movl	%eax, %edx
-	subl	128(%esp), %edx         # 4-byte Folded Reload
-	sbbl	112(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, %ebx
-	sbbl	116(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	%ebp, %ecx
-	sbbl	120(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	sbbl	124(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	%edi, %ecx
-	movl	%edi, %esi
-	movl	%ecx, %edi
-	sbbl	108(%esp), %edi         # 4-byte Folded Reload
-	movl	%edi, %ecx
-	sarl	$31, %ecx
-	testl	%ecx, %ecx
-	js	.LBB84_2
-# BB#1:
-	movl	%edx, %eax
-.LBB84_2:
-	movl	152(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	js	.LBB84_4
-# BB#3:
-	movl	%ebx, %eax
-.LBB84_4:
-	movl	%eax, 4(%ecx)
-	movl	%ecx, %ebx
-	movl	%esi, %eax
-	movl	104(%esp), %ecx         # 4-byte Reload
-	movl	100(%esp), %edx         # 4-byte Reload
-	js	.LBB84_6
-# BB#5:
-	movl	116(%esp), %edx         # 4-byte Reload
-.LBB84_6:
-	movl	%edx, 8(%ebx)
-	movl	%ebx, %edx
-	js	.LBB84_8
-# BB#7:
-	movl	120(%esp), %ebp         # 4-byte Reload
-.LBB84_8:
-	movl	%ebp, 12(%edx)
-	js	.LBB84_10
-# BB#9:
-	movl	128(%esp), %ecx         # 4-byte Reload
-.LBB84_10:
-	movl	%ecx, 16(%edx)
-	js	.LBB84_12
-# BB#11:
-	movl	%edi, %eax
-.LBB84_12:
-	movl	%eax, 20(%edx)
-	addl	$132, %esp
+	movl	%edi, 60(%ecx)
+	addl	$348, %esp                      # imm = 0x15C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end84:
-	.size	mcl_fp_montNF6L, .Lfunc_end84-mcl_fp_montNF6L
-
-	.globl	mcl_fp_montRed6L
-	.align	16, 0x90
-	.type	mcl_fp_montRed6L,@function
-mcl_fp_montRed6L:                       # @mcl_fp_montRed6L
-# BB#0:
+.Lfunc_end42:
+	.size	mcl_fpDbl_sqrPre8L, .Lfunc_end42-mcl_fpDbl_sqrPre8L
+                                        # -- End function
+	.globl	mcl_fp_mont8L                   # -- Begin function mcl_fp_mont8L
+	.p2align	4, 0x90
+	.type	mcl_fp_mont8L,@function
+mcl_fp_mont8L:                          # @mcl_fp_mont8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$104, %esp
-	movl	132(%esp), %eax
-	movl	-4(%eax), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	(%eax), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	128(%esp), %ebp
-	movl	(%ebp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	imull	%edx, %ecx
-	movl	20(%eax), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	16(%eax), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	12(%eax), %ebx
-	movl	%ebx, 88(%esp)          # 4-byte Spill
-	movl	8(%eax), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	4(%eax), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%ebx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%esi
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, %esi
+	subl	$684, %esp                      # imm = 0x2AC
+	calll	.L43$pb
+.L43$pb:
+	popl	%ebx
+.Ltmp5:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.L43$pb), %ebx
+	movl	716(%esp), %eax
+	movl	-4(%eax), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	712(%esp), %ecx
+	subl	$4, %esp
+	leal	652(%esp), %eax
+	pushl	(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	648(%esp), %edi
+	movl	652(%esp), %esi
+	movl	%ebp, %eax
+	imull	%edi, %eax
+	movl	680(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	676(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	672(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	668(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	664(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	660(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	656(%esp), %ebp
+	subl	$4, %esp
+	leal	612(%esp), %ecx
+	pushl	%eax
+	pushl	724(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	608(%esp), %edi
+	adcl	612(%esp), %esi
+	adcl	616(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	620(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	624(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	628(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	632(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	636(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	640(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	572(%esp), %ecx
+	movzbl	%al, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	716(%esp), %eax
+	pushl	4(%eax)
+	pushl	716(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	568(%esp), %esi
+	adcl	572(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	576(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	580(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	584(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	588(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	592(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	596(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	600(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	532(%esp), %ecx
+	movl	48(%esp), %edx                  # 4-byte Reload
+	imull	%esi, %edx
+	movzbl	%al, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	724(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	528(%esp), %esi
+	adcl	532(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	540(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	544(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	556(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	560(%esp), %esi
+	adcl	$0, 24(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	492(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	488(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	492(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	496(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	500(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	504(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	508(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	516(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	520(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	452(%esp), %ecx
+	movl	48(%esp), %edx                  # 4-byte Reload
+	imull	%ebp, %edx
+	movzbl	%al, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	724(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	448(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	452(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	456(%esp), %edi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	464(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	468(%esp), %ebp
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	472(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	476(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	480(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	$0, 32(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	412(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	addl	408(%esp), %eax
+	adcl	412(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	416(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	420(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	424(%esp), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	adcl	428(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	432(%esp), %esi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	436(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	440(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	setb	%cl
+	subl	$4, %esp
+	leal	372(%esp), %ebp
+	movl	48(%esp), %edx                  # 4-byte Reload
 	movl	%eax, %edi
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, %ebx
-	addl	%edi, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %eax
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	36(%esp), %ebx          # 4-byte Reload
-	adcl	4(%ebp), %ebx
-	adcl	8(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	12(%ebp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	16(%ebp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	20(%ebp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	24(%ebp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	44(%ebp), %eax
-	movl	40(%ebp), %edx
-	movl	36(%ebp), %esi
-	movl	32(%ebp), %edi
-	movl	28(%ebp), %ecx
-	adcl	$0, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebx, %esi
-	imull	96(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%esi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%esi, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
+	imull	%eax, %edx
+	movzbl	%cl, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	pushl	%edx
+	movl	724(%esp), %eax
+	pushl	%eax
+	pushl	%ebp
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	368(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	372(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	376(%esp), %ebp
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	380(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	384(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	388(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	392(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	396(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	400(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	$0, 8(%esp)                     # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	332(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	addl	328(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	332(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	340(%esp), %ebp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	344(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	348(%esp), %edi
+	adcl	352(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	356(%esp), %esi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	360(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	setb	%cl
+	subl	$4, %esp
+	movl	48(%esp), %edx                  # 4-byte Reload
+	imull	%eax, %edx
+	movzbl	%cl, %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	724(%esp)
+	leal	300(%esp), %eax
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	addl	288(%esp), %eax
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	296(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	300(%esp), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	304(%esp), %ebp
+	adcl	308(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	316(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	320(%esp), %edi
+	adcl	$0, 40(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	252(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	20(%ecx)
+	movl	716(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	addl	248(%esp), %eax
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	252(%esp), %esi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	256(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	260(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	264(%esp), %ebp
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	268(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	272(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	276(%esp), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	280(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	setb	%cl
+	subl	$4, %esp
+	movl	48(%esp), %edx                  # 4-byte Reload
+	imull	%eax, %edx
 	movl	%eax, %edi
-	movl	%esi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, %esi
-	addl	%edi, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	%ebp, %ecx
-	movl	%ecx, %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	%ebx, %esi
-	movl	24(%esp), %esi          # 4-byte Reload
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	$0, 20(%esp)            # 4-byte Folded Spill
-	adcl	$0, 60(%esp)            # 4-byte Folded Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	adcl	$0, 72(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%esi, %ebx
-	imull	96(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, %ecx
-	movl	%ebx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	addl	%ecx, %ebp
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	48(%esp), %ebx          # 4-byte Reload
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	24(%esp), %eax          # 4-byte Folded Reload
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	$0, 60(%esp)            # 4-byte Folded Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	adcl	$0, 72(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %ecx
-	imull	96(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
+	movzbl	%cl, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	724(%esp)
+	leal	220(%esp), %eax
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	208(%esp), %edi
+	adcl	212(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	216(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	220(%esp), %esi
+	adcl	224(%esp), %ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	240(%esp), %edi
+	adcl	$0, 12(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	716(%esp), %eax
+	pushl	24(%eax)
+	movl	716(%esp), %eax
+	pushl	%eax
+	leal	180(%esp), %eax
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	168(%esp), %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	176(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	180(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	184(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	188(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	192(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	196(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	200(%esp), %ebp
+	setb	%al
+	subl	$4, %esp
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
 	movl	%edx, %esi
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, %edi
-	addl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	%ebx, %esi
-	movl	%esi, %ebx
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	%ebp, %edi
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	adcl	$0, 72(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%edi, %esi
-	imull	96(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	80(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	%esi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%eax, %ebx
-	addl	%ecx, %edx
-	movl	%edx, %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, 72(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	96(%esp), %ebx          # 4-byte Reload
-	imull	%ebp, %ebx
-	movl	%ebx, 96(%esp)          # 4-byte Spill
-	movl	%ebp, %esi
-	movl	%ebx, %eax
-	mull	76(%esp)                # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
+	movzbl	%al, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	movl	724(%esp), %eax
+	pushl	%eax
+	leal	140(%esp), %eax
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	128(%esp), %esi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	132(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	136(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	140(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	144(%esp), %esi
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	148(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	152(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	160(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	$0, %ebp
+	subl	$4, %esp
+	leal	92(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	28(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	4(%esp), %edx                   # 4-byte Reload
+	addl	88(%esp), %edx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	92(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	96(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	100(%esp), %esi
+	adcl	104(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	108(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	112(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	116(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	120(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	setb	4(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	leal	52(%esp), %eax
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
 	movl	%edx, %ebp
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	88(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	84(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	mull	80(%esp)                # 4-byte Folded Reload
-	addl	%ebx, %eax
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	64(%esp), %ebx          # 4-byte Reload
-	adcl	$0, %ebx
-	addl	%esi, 28(%esp)          # 4-byte Folded Spill
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebx          # 4-byte Reload
-	adcl	$0, %ebx
-	movl	%eax, %esi
-	subl	92(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	sbbl	80(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	sbbl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	sbbl	88(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	sbbl	100(%esp), %ebp         # 4-byte Folded Reload
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	movl	%edi, %esi
-	sbbl	76(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB85_2
-# BB#1:
-	movl	80(%esp), %edx          # 4-byte Reload
-.LBB85_2:
-	testb	%bl, %bl
-	jne	.LBB85_4
-# BB#3:
-	movl	72(%esp), %eax          # 4-byte Reload
-.LBB85_4:
-	movl	124(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	%edx, 4(%ebx)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	jne	.LBB85_6
-# BB#5:
-	movl	84(%esp), %ecx          # 4-byte Reload
-.LBB85_6:
-	movl	%ecx, 8(%ebx)
-	movl	%edi, %ecx
-	movl	60(%esp), %edi          # 4-byte Reload
-	movl	96(%esp), %esi          # 4-byte Reload
-	jne	.LBB85_8
-# BB#7:
-	movl	88(%esp), %esi          # 4-byte Reload
-.LBB85_8:
-	movl	%esi, 12(%ebx)
-	jne	.LBB85_10
-# BB#9:
-	movl	92(%esp), %edi          # 4-byte Reload
-.LBB85_10:
-	movl	%edi, 16(%ebx)
-	jne	.LBB85_12
-# BB#11:
-	movl	100(%esp), %ecx         # 4-byte Reload
-.LBB85_12:
-	movl	%ecx, 20(%ebx)
-	addl	$104, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end85:
-	.size	mcl_fp_montRed6L, .Lfunc_end85-mcl_fp_montRed6L
-
-	.globl	mcl_fp_addPre6L
-	.align	16, 0x90
-	.type	mcl_fp_addPre6L,@function
-mcl_fp_addPre6L:                        # @mcl_fp_addPre6L
-# BB#0:
-	pushl	%esi
-	movl	16(%esp), %eax
-	movl	(%eax), %ecx
-	movl	12(%esp), %edx
-	addl	(%edx), %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	4(%eax), %ecx
-	adcl	4(%edx), %ecx
-	movl	%ecx, 4(%esi)
-	movl	8(%eax), %ecx
-	adcl	8(%edx), %ecx
-	movl	%ecx, 8(%esi)
-	movl	12(%edx), %ecx
-	adcl	12(%eax), %ecx
-	movl	%ecx, 12(%esi)
-	movl	16(%edx), %ecx
-	adcl	16(%eax), %ecx
-	movl	%ecx, 16(%esi)
-	movl	20(%eax), %eax
-	movl	20(%edx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 20(%esi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	retl
-.Lfunc_end86:
-	.size	mcl_fp_addPre6L, .Lfunc_end86-mcl_fp_addPre6L
-
-	.globl	mcl_fp_subPre6L
-	.align	16, 0x90
-	.type	mcl_fp_subPre6L,@function
-mcl_fp_subPre6L:                        # @mcl_fp_subPre6L
-# BB#0:
-	pushl	%edi
-	pushl	%esi
-	movl	16(%esp), %ecx
-	movl	(%ecx), %edx
-	xorl	%eax, %eax
-	movl	20(%esp), %esi
-	subl	(%esi), %edx
-	movl	12(%esp), %edi
-	movl	%edx, (%edi)
-	movl	4(%ecx), %edx
-	sbbl	4(%esi), %edx
-	movl	%edx, 4(%edi)
-	movl	8(%ecx), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, 8(%edi)
-	movl	12(%ecx), %edx
-	sbbl	12(%esi), %edx
-	movl	%edx, 12(%edi)
-	movl	16(%ecx), %edx
-	sbbl	16(%esi), %edx
-	movl	%edx, 16(%edi)
-	movl	20(%esi), %edx
-	movl	20(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 20(%edi)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	retl
-.Lfunc_end87:
-	.size	mcl_fp_subPre6L, .Lfunc_end87-mcl_fp_subPre6L
-
-	.globl	mcl_fp_shr1_6L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_6L,@function
-mcl_fp_shr1_6L:                         # @mcl_fp_shr1_6L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	20(%eax), %ecx
-	movl	16(%eax), %edx
-	movl	12(%eax), %esi
-	movl	8(%eax), %edi
-	movl	(%eax), %ebx
-	movl	4(%eax), %eax
-	shrdl	$1, %eax, %ebx
-	movl	20(%esp), %ebp
-	movl	%ebx, (%ebp)
-	shrdl	$1, %edi, %eax
-	movl	%eax, 4(%ebp)
-	shrdl	$1, %esi, %edi
-	movl	%edi, 8(%ebp)
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ebp)
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 16(%ebp)
-	shrl	%ecx
-	movl	%ecx, 20(%ebp)
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end88:
-	.size	mcl_fp_shr1_6L, .Lfunc_end88-mcl_fp_shr1_6L
-
-	.globl	mcl_fp_add6L
-	.align	16, 0x90
-	.type	mcl_fp_add6L,@function
-mcl_fp_add6L:                           # @mcl_fp_add6L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$12, %esp
-	movl	40(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ebp
-	movl	36(%esp), %ebx
-	addl	(%ebx), %edx
-	adcl	4(%ebx), %ebp
-	movl	8(%eax), %ecx
-	adcl	8(%ebx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %esi
-	movl	12(%ebx), %ecx
-	movl	16(%ebx), %edi
-	adcl	12(%eax), %ecx
-	adcl	16(%eax), %edi
-	movl	20(%ebx), %ebx
-	adcl	20(%eax), %ebx
-	movl	32(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ebp, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%ecx, 12(%eax)
-	movl	%edi, 16(%eax)
-	movl	%ebx, 20(%eax)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	44(%esp), %esi
-	subl	(%esi), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	8(%esp), %edx           # 4-byte Reload
-	movl	44(%esp), %esi
-	sbbl	4(%esi), %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	movl	%ecx, %ebp
-	sbbl	8(%esi), %edx
-	sbbl	12(%esi), %ebp
-	sbbl	16(%esi), %edi
-	sbbl	20(%esi), %ebx
+	pushl	%ecx
+	movl	724(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	48(%esp), %ebp
+	movzbl	4(%esp), %eax                   # 1-byte Folded Reload
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	52(%esp), %ecx
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	56(%esp), %edx
+	adcl	60(%esp), %esi
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	adcl	64(%esp), %ebx
+	movl	%edi, %ebp
+	adcl	68(%esp), %ebp
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	72(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	76(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	80(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	%ecx, %edi
+	movl	716(%esp), %ecx
+	subl	(%ecx), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	sbbl	4(%ecx), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %edx
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	sbbl	8(%ecx), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	sbbl	12(%ecx), %ebx
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	sbbl	16(%ecx), %ebp
+	movl	%ecx, %edx
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	sbbl	20(%edx), %ecx
+	movl	12(%esp), %esi                  # 4-byte Reload
+	sbbl	24(%edx), %esi
+	movl	20(%esp), %edi                  # 4-byte Reload
+	sbbl	28(%edx), %edi
 	sbbl	$0, %eax
 	testb	$1, %al
-	jne	.LBB89_2
-# BB#1:                                 # %nocarry
-	movl	(%esp), %eax            # 4-byte Reload
-	movl	32(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, 4(%ecx)
-	movl	%edx, 8(%ecx)
-	movl	%ebp, 12(%ecx)
-	movl	%edi, 16(%ecx)
-	movl	%ebx, 20(%ecx)
-.LBB89_2:                               # %carry
-	addl	$12, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end89:
-	.size	mcl_fp_add6L, .Lfunc_end89-mcl_fp_add6L
-
-	.globl	mcl_fp_addNF6L
-	.align	16, 0x90
-	.type	mcl_fp_addNF6L,@function
-mcl_fp_addNF6L:                         # @mcl_fp_addNF6L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$40, %esp
-	movl	68(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ecx
-	movl	64(%esp), %ebp
-	addl	(%ebp), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%edx, %ebx
-	adcl	4(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	20(%eax), %edx
-	movl	16(%eax), %esi
-	movl	12(%eax), %edi
-	movl	8(%eax), %eax
-	adcl	8(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	12(%ebp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	16(%ebp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	20(%ebp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %ebp
-	movl	72(%esp), %ebx
-	subl	(%ebx), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	movl	%ecx, %ebp
-	movl	72(%esp), %ecx
-	sbbl	4(%ecx), %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	sbbl	8(%ecx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	sbbl	12(%ecx), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	%esi, %edi
-	sbbl	16(%ecx), %edi
-	movl	%edx, %esi
-	sbbl	20(%ecx), %esi
-	movl	%esi, %ebx
-	sarl	$31, %ebx
-	testl	%ebx, %ebx
-	js	.LBB90_2
-# BB#1:
-	movl	(%esp), %eax            # 4-byte Reload
-.LBB90_2:
-	movl	60(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	js	.LBB90_4
-# BB#3:
-	movl	4(%esp), %ecx           # 4-byte Reload
-.LBB90_4:
-	movl	%ecx, 4(%ebx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	28(%esp), %edx          # 4-byte Reload
-	movl	24(%esp), %ecx          # 4-byte Reload
-	js	.LBB90_6
-# BB#5:
-	movl	8(%esp), %ecx           # 4-byte Reload
-.LBB90_6:
-	movl	%ecx, 8(%ebx)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	js	.LBB90_8
-# BB#7:
-	movl	12(%esp), %edx          # 4-byte Reload
-.LBB90_8:
-	movl	%edx, 12(%ebx)
-	js	.LBB90_10
-# BB#9:
-	movl	%edi, %ecx
-.LBB90_10:
-	movl	%ecx, 16(%ebx)
-	js	.LBB90_12
-# BB#11:
-	movl	%esi, %eax
-.LBB90_12:
-	movl	%eax, 20(%ebx)
-	addl	$40, %esp
+	jne	.LBB43_1
+# %bb.2:
+	movl	704(%esp), %eax
+	movl	%edi, 28(%eax)
+	jne	.LBB43_3
+.LBB43_4:
+	movl	%esi, 24(%eax)
+	movl	36(%esp), %edx                  # 4-byte Reload
+	jne	.LBB43_5
+.LBB43_6:
+	movl	%ecx, 20(%eax)
+	movl	44(%esp), %esi                  # 4-byte Reload
+	jne	.LBB43_7
+.LBB43_8:
+	movl	%ebp, 16(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	jne	.LBB43_9
+.LBB43_10:
+	movl	%ebx, 12(%eax)
+	jne	.LBB43_11
+.LBB43_12:
+	movl	%esi, 8(%eax)
+	jne	.LBB43_13
+.LBB43_14:
+	movl	%edx, 4(%eax)
+	je	.LBB43_16
+.LBB43_15:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+.LBB43_16:
+	movl	%ecx, (%eax)
+	addl	$684, %esp                      # imm = 0x2AC
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end90:
-	.size	mcl_fp_addNF6L, .Lfunc_end90-mcl_fp_addNF6L
-
-	.globl	mcl_fp_sub6L
-	.align	16, 0x90
-	.type	mcl_fp_sub6L,@function
-mcl_fp_sub6L:                           # @mcl_fp_sub6L
-# BB#0:
+.LBB43_1:
+	movl	20(%esp), %edi                  # 4-byte Reload
+	movl	704(%esp), %eax
+	movl	%edi, 28(%eax)
+	je	.LBB43_4
+.LBB43_3:
+	movl	12(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 24(%eax)
+	movl	36(%esp), %edx                  # 4-byte Reload
+	je	.LBB43_6
+.LBB43_5:
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	44(%esp), %esi                  # 4-byte Reload
+	je	.LBB43_8
+.LBB43_7:
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 16(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	je	.LBB43_10
+.LBB43_9:
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 12(%eax)
+	je	.LBB43_12
+.LBB43_11:
+	movl	24(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%eax)
+	je	.LBB43_14
+.LBB43_13:
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	jne	.LBB43_15
+	jmp	.LBB43_16
+.Lfunc_end43:
+	.size	mcl_fp_mont8L, .Lfunc_end43-mcl_fp_mont8L
+                                        # -- End function
+	.globl	mcl_fp_montNF8L                 # -- Begin function mcl_fp_montNF8L
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF8L,@function
+mcl_fp_montNF8L:                        # @mcl_fp_montNF8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$16, %esp
-	movl	40(%esp), %ebx
-	movl	(%ebx), %esi
-	movl	4(%ebx), %edi
-	movl	44(%esp), %ecx
-	subl	(%ecx), %esi
-	sbbl	4(%ecx), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	movl	8(%ebx), %eax
-	sbbl	8(%ecx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	12(%ebx), %eax
-	sbbl	12(%ecx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	16(%ebx), %ebp
-	sbbl	16(%ecx), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	20(%ebx), %edx
-	sbbl	20(%ecx), %edx
-	movl	$0, %ecx
-	sbbl	$0, %ecx
-	testb	$1, %cl
-	movl	36(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	%edi, 4(%ebx)
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	%eax, 12(%ebx)
-	movl	%ebp, 16(%ebx)
-	movl	%edx, 20(%ebx)
-	je	.LBB91_2
-# BB#1:                                 # %carry
-	movl	48(%esp), %ecx
-	addl	(%ecx), %esi
-	movl	%esi, (%ebx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	4(%ecx), %eax
-	adcl	8(%ecx), %edi
-	movl	%eax, 4(%ebx)
-	movl	12(%ecx), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%edi, 8(%ebx)
-	movl	%eax, 12(%ebx)
-	movl	16(%ecx), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 16(%ebx)
-	movl	20(%ecx), %eax
-	adcl	%edx, %eax
-	movl	%eax, 20(%ebx)
-.LBB91_2:                               # %nocarry
-	addl	$16, %esp
-	popl	%esi
-	popl	%edi
+	subl	$684, %esp                      # imm = 0x2AC
+	calll	.L44$pb
+.L44$pb:
 	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end91:
-	.size	mcl_fp_sub6L, .Lfunc_end91-mcl_fp_sub6L
-
-	.globl	mcl_fp_subNF6L
-	.align	16, 0x90
-	.type	mcl_fp_subNF6L,@function
-mcl_fp_subNF6L:                         # @mcl_fp_subNF6L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$24, %esp
-	movl	48(%esp), %ebx
-	movl	20(%ebx), %esi
-	movl	(%ebx), %ecx
-	movl	4(%ebx), %eax
-	movl	52(%esp), %ebp
-	subl	(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	4(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	16(%ebx), %eax
-	movl	12(%ebx), %ecx
-	movl	8(%ebx), %edx
-	sbbl	8(%ebp), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	sbbl	16(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %edx
-	sbbl	20(%ebp), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	%edx, %ebp
-	sarl	$31, %ebp
-	movl	%ebp, %ecx
-	addl	%ecx, %ecx
-	movl	%ebp, %eax
-	adcl	%eax, %eax
-	shrl	$31, %edx
-	orl	%ecx, %edx
-	movl	56(%esp), %ebx
-	andl	4(%ebx), %eax
-	andl	(%ebx), %edx
-	movl	20(%ebx), %edi
-	andl	%ebp, %edi
-	movl	16(%ebx), %esi
-	andl	%ebp, %esi
-	movl	12(%ebx), %ecx
-	andl	%ebp, %ecx
-	andl	8(%ebx), %ebp
-	addl	8(%esp), %edx           # 4-byte Folded Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	44(%esp), %ebx
-	movl	%edx, (%ebx)
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%eax, 4(%ebx)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebp, 8(%ebx)
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%ecx, 12(%ebx)
-	movl	%esi, 16(%ebx)
-	adcl	(%esp), %edi            # 4-byte Folded Reload
+.Ltmp6:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp6-.L44$pb), %ebx
+	movl	716(%esp), %eax
+	movl	-4(%eax), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	712(%esp), %ecx
+	subl	$4, %esp
+	leal	652(%esp), %eax
+	pushl	(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	648(%esp), %ebp
+	movl	652(%esp), %edi
+	movl	%esi, %eax
+	imull	%ebp, %eax
+	movl	680(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	676(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	672(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	668(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	664(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	660(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	656(%esp), %esi
+	subl	$4, %esp
+	leal	612(%esp), %ecx
+	pushl	%eax
+	pushl	724(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	608(%esp), %ebp
+	adcl	612(%esp), %edi
+	adcl	616(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	620(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	624(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	628(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	632(%esp), %ebp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	636(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	640(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	572(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	4(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	600(%esp), %eax
+	addl	568(%esp), %edi
+	adcl	572(%esp), %esi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	576(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	580(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	584(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	588(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	592(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	596(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, %ebp
+	subl	$4, %esp
+	leal	532(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edi, %ecx
+	pushl	%ecx
+	movl	724(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	528(%esp), %edi
+	adcl	532(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	540(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	544(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	556(%esp), %edi
+	adcl	560(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	492(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	520(%esp), %ecx
+	addl	488(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	492(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	496(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	500(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	504(%esp), %ebp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	508(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	512(%esp), %edi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	$0, %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	452(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%esi, %ecx
+	pushl	%ecx
+	pushl	724(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	448(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	452(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	464(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	468(%esp), %esi
+	adcl	472(%esp), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	476(%esp), %edi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	480(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	412(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	12(%ecx)
+	movl	716(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	440(%esp), %ecx
+	movl	8(%esp), %edx                   # 4-byte Reload
+	addl	408(%esp), %edx
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	412(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	416(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	420(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	424(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	432(%esp), %edi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	$0, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	leal	372(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	724(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	368(%esp), %esi
+	adcl	372(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	376(%esp), %esi
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	380(%esp), %ebp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	384(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	388(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	392(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	396(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	400(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	leal	332(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	360(%esp), %eax
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	328(%esp), %edx
+	adcl	332(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	340(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	344(%esp), %esi
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	348(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	352(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	356(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, %ebp
+	subl	$4, %esp
+	leal	292(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	pushl	724(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	288(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	296(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	300(%esp), %edi
+	adcl	304(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	316(%esp), %esi
+	adcl	320(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	252(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	20(%ecx)
+	movl	716(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	leal	208(%esp), %eax
+	movl	280(%esp), %ebp
+	movl	12(%esp), %edx                  # 4-byte Reload
+	addl	248(%esp), %edx
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	252(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	256(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	260(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	264(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	268(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	272(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	276(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	$0, %ebp
+	subl	$4, %esp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	724(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	208(%esp), %esi
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	212(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	216(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	220(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	224(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	240(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	716(%esp), %eax
+	pushl	24(%eax)
+	pushl	716(%esp)
+	leal	180(%esp), %eax
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	leal	128(%esp), %eax
+	movl	200(%esp), %ebp
+	movl	%edi, %edx
+	addl	168(%esp), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	172(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %edi                  # 4-byte Reload
+	adcl	176(%esp), %edi
+	adcl	180(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	184(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	188(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	192(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	196(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	$0, %ebp
+	subl	$4, %esp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	724(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	128(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	132(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	136(%esp), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	140(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	144(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	148(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	152(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	160(%esp), %ebp
+	subl	$4, %esp
+	leal	92(%esp), %eax
+	movl	716(%esp), %ecx
+	pushl	28(%ecx)
+	pushl	716(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	movl	16(%esp), %edx                  # 4-byte Reload
+	addl	88(%esp), %edx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	92(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	96(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	100(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	104(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	108(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	112(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	116(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	120(%esp), %ebp
+	adcl	$0, %ebp
+	subl	$4, %esp
+	leal	52(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	movl	724(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	48(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	52(%esp), %eax
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	56(%esp), %ecx
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	60(%esp), %edx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	64(%esp), %esi
+	adcl	68(%esp), %edi
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	72(%esp), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	76(%esp), %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	adcl	80(%esp), %ebp
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	716(%esp), %ebx
+	subl	(%ebx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	sbbl	4(%ebx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	sbbl	8(%ebx), %edx
+	movl	%esi, %eax
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	sbbl	12(%ebx), %eax
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	%edi, %ecx
+	sbbl	16(%ebx), %ecx
+	movl	12(%esp), %edi                  # 4-byte Reload
+	sbbl	20(%ebx), %edi
+	movl	16(%esp), %esi                  # 4-byte Reload
+	sbbl	24(%ebx), %esi
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	716(%esp), %ebx
+	sbbl	28(%ebx), %ebp
+	testl	%ebp, %ebp
+	js	.LBB44_1
+# %bb.2:
+	movl	704(%esp), %ebx
+	movl	%ebp, 28(%ebx)
+	js	.LBB44_3
+.LBB44_4:
+	movl	%esi, 24(%ebx)
+	js	.LBB44_5
+.LBB44_6:
 	movl	%edi, 20(%ebx)
-	addl	$24, %esp
+	js	.LBB44_7
+.LBB44_8:
+	movl	%ecx, 16(%ebx)
+	js	.LBB44_9
+.LBB44_10:
+	movl	%eax, 12(%ebx)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	js	.LBB44_11
+.LBB44_12:
+	movl	%edx, 8(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	js	.LBB44_13
+.LBB44_14:
+	movl	%ecx, 4(%ebx)
+	jns	.LBB44_16
+.LBB44_15:
+	movl	24(%esp), %eax                  # 4-byte Reload
+.LBB44_16:
+	movl	%eax, (%ebx)
+	addl	$684, %esp                      # imm = 0x2AC
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end92:
-	.size	mcl_fp_subNF6L, .Lfunc_end92-mcl_fp_subNF6L
-
-	.globl	mcl_fpDbl_add6L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add6L,@function
-mcl_fpDbl_add6L:                        # @mcl_fpDbl_add6L
-# BB#0:
+.LBB44_1:
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	movl	704(%esp), %ebx
+	movl	%ebp, 28(%ebx)
+	jns	.LBB44_4
+.LBB44_3:
+	movl	16(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 24(%ebx)
+	jns	.LBB44_6
+.LBB44_5:
+	movl	12(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 20(%ebx)
+	jns	.LBB44_8
+.LBB44_7:
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%ebx)
+	jns	.LBB44_10
+.LBB44_9:
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 12(%ebx)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB44_12
+.LBB44_11:
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	jns	.LBB44_14
+.LBB44_13:
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%ebx)
+	js	.LBB44_15
+	jmp	.LBB44_16
+.Lfunc_end44:
+	.size	mcl_fp_montNF8L, .Lfunc_end44-mcl_fp_montNF8L
+                                        # -- End function
+	.globl	mcl_fp_montRed8L                # -- Begin function mcl_fp_montRed8L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed8L,@function
+mcl_fp_montRed8L:                       # @mcl_fp_montRed8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$36, %esp
-	movl	64(%esp), %edx
-	movl	60(%esp), %ecx
-	movl	12(%ecx), %esi
-	movl	16(%ecx), %eax
-	movl	8(%edx), %edi
-	movl	(%edx), %ebx
-	addl	(%ecx), %ebx
-	movl	56(%esp), %ebp
-	movl	%ebx, (%ebp)
-	movl	4(%edx), %ebx
-	adcl	4(%ecx), %ebx
-	adcl	8(%ecx), %edi
-	adcl	12(%edx), %esi
-	adcl	16(%edx), %eax
-	movl	%ebx, 4(%ebp)
-	movl	%edx, %ebx
-	movl	32(%ebx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%edi, 8(%ebp)
-	movl	20(%ebx), %edi
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	adcl	%edi, %esi
-	movl	24(%ebx), %edi
-	movl	%eax, 16(%ebp)
-	movl	24(%ecx), %edx
-	adcl	%edi, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	28(%ebx), %edi
-	movl	%esi, 20(%ebp)
+	subl	$380, %esp                      # imm = 0x17C
+	calll	.L45$pb
+.L45$pb:
+	popl	%ebx
+.Ltmp7:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp7-.L45$pb), %ebx
+	movl	408(%esp), %ecx
 	movl	28(%ecx), %eax
-	adcl	%edi, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	32(%ecx), %ebp
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	36(%ebx), %esi
-	movl	%ebx, %edi
-	movl	36(%ecx), %ebx
-	adcl	%esi, %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	40(%edi), %esi
-	movl	40(%ecx), %edi
-	adcl	%esi, %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi
-	movl	44(%esi), %esi
-	movl	44(%ecx), %ecx
-	adcl	%esi, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	68(%esp), %esi
-	subl	(%esi), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	68(%esp), %edx
-	sbbl	4(%edx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	8(%edx), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %ebp
-	sbbl	12(%edx), %ebp
-	movl	%edi, %ebx
-	movl	12(%esp), %edi          # 4-byte Reload
-	sbbl	16(%edx), %ebx
-	movl	%edi, %eax
-	sbbl	20(%edx), %eax
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB93_2
-# BB#1:
-	movl	%eax, %edi
-.LBB93_2:
-	testb	%cl, %cl
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	16(%esp), %edx          # 4-byte Reload
-	jne	.LBB93_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	4(%esp), %ecx           # 4-byte Reload
-.LBB93_4:
-	movl	56(%esp), %eax
-	movl	%ecx, 24(%eax)
-	movl	%edx, 28(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	24(%esp), %edx          # 4-byte Reload
-	jne	.LBB93_6
-# BB#5:
-	movl	8(%esp), %edx           # 4-byte Reload
-.LBB93_6:
-	movl	%edx, 32(%eax)
-	movl	28(%esp), %edx          # 4-byte Reload
-	jne	.LBB93_8
-# BB#7:
-	movl	%ebp, %edx
-.LBB93_8:
-	movl	%edx, 36(%eax)
-	jne	.LBB93_10
-# BB#9:
-	movl	%ebx, %ecx
-.LBB93_10:
-	movl	%ecx, 40(%eax)
-	movl	%edi, 44(%eax)
-	addl	$36, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end93:
-	.size	mcl_fpDbl_add6L, .Lfunc_end93-mcl_fpDbl_add6L
-
-	.globl	mcl_fpDbl_sub6L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub6L,@function
-mcl_fpDbl_sub6L:                        # @mcl_fpDbl_sub6L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$24, %esp
-	movl	48(%esp), %edx
-	movl	(%edx), %eax
-	movl	4(%edx), %edi
-	movl	52(%esp), %esi
-	subl	(%esi), %eax
-	sbbl	4(%esi), %edi
-	movl	8(%edx), %ebx
-	sbbl	8(%esi), %ebx
-	movl	44(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%edx), %eax
-	sbbl	12(%esi), %eax
-	movl	%edi, 4(%ecx)
-	movl	16(%edx), %edi
-	sbbl	16(%esi), %edi
-	movl	%ebx, 8(%ecx)
-	movl	20(%esi), %ebx
-	movl	%eax, 12(%ecx)
-	movl	20(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	24(%esi), %ebx
-	movl	%edi, 16(%ecx)
-	movl	24(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	28(%esi), %edi
-	movl	%eax, 20(%ecx)
-	movl	28(%edx), %eax
-	sbbl	%edi, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	32(%esi), %edi
-	movl	32(%edx), %eax
-	sbbl	%edi, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	36(%esi), %edi
-	movl	36(%edx), %eax
-	sbbl	%edi, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	40(%esi), %edi
-	movl	40(%edx), %eax
-	sbbl	%edi, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	44(%esi), %esi
-	movl	44(%edx), %eax
-	sbbl	%esi, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	$0, %ebx
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	movl	56(%esp), %eax
-	jne	.LBB94_1
-# BB#2:
-	xorl	%edx, %edx
-	jmp	.LBB94_3
-.LBB94_1:
-	movl	20(%eax), %edx
-.LBB94_3:
-	testb	%bl, %bl
-	jne	.LBB94_4
-# BB#5:
-	movl	$0, %esi
-	movl	$0, %edi
-	jmp	.LBB94_6
-.LBB94_4:
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	24(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
+	movl	%ecx, %edx
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	404(%esp), %eax
+	movl	28(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	16(%eax), %esi
+	movl	12(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	8(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
 	movl	(%eax), %edi
-	movl	4(%eax), %esi
-.LBB94_6:
-	jne	.LBB94_7
-# BB#8:
-	movl	$0, %ebx
-	jmp	.LBB94_9
-.LBB94_7:
-	movl	16(%eax), %ebx
-.LBB94_9:
-	jne	.LBB94_10
-# BB#11:
-	movl	$0, %ebp
-	jmp	.LBB94_12
-.LBB94_10:
-	movl	12(%eax), %ebp
-.LBB94_12:
-	jne	.LBB94_13
-# BB#14:
-	xorl	%eax, %eax
-	jmp	.LBB94_15
-.LBB94_13:
-	movl	8(%eax), %eax
-.LBB94_15:
-	addl	8(%esp), %edi           # 4-byte Folded Reload
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	movl	%edi, 24(%ecx)
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%esi, 28(%ecx)
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 36(%ecx)
-	movl	%ebx, 40(%ecx)
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%ecx)
-	addl	$24, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end94:
-	.size	mcl_fpDbl_sub6L, .Lfunc_end94-mcl_fpDbl_sub6L
-
-	.globl	mcl_fp_mulUnitPre7L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre7L,@function
-mcl_fp_mulUnitPre7L:                    # @mcl_fp_mulUnitPre7L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$36, %esp
-	movl	64(%esp), %esi
-	movl	60(%esp), %ebx
-	movl	%esi, %eax
-	mull	24(%ebx)
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	20(%ebx)
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	16(%ebx)
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	12(%ebx)
-	movl	%edx, %ebp
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	8(%ebx)
-	movl	%edx, %ecx
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	4(%ebx)
+	movl	4(%eax), %ebp
+	movl	-4(%edx), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	%edi, %eax
+	imull	%ecx, %eax
+	leal	348(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	344(%esp), %edi
+	adcl	348(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	356(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	360(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	364(%esp), %esi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	368(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	372(%esp), %edi
+	movl	404(%esp), %eax
+	movl	32(%eax), %eax
+	adcl	376(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%ebp, %eax
+	leal	308(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	336(%esp), %eax
+	adcl	$0, %eax
+	addl	304(%esp), %ebp
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	308(%esp), %edx
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	312(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	316(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	320(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	324(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	328(%esp), %edi
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	332(%esp), %ebp
+	movl	404(%esp), %ecx
+	adcl	36(%ecx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %esi
+	leal	268(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 20(%esp)                  # 1-byte Folded Spill
+	movl	296(%esp), %eax
+	adcl	$0, %eax
+	addl	264(%esp), %esi
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	268(%esp), %edx
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	272(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	276(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	280(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	284(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	288(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	292(%esp), %ebp
+	movl	404(%esp), %ecx
+	adcl	40(%ecx), %eax
+	movl	%eax, %esi
+	setb	(%esp)                          # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
 	movl	%edx, %edi
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%esi, %eax
-	mull	(%ebx)
-	movl	56(%esp), %esi
-	movl	%eax, (%esi)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%esi)
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 8(%esi)
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 12(%esi)
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 16(%esi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%esi)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%esi)
-	movl	32(%esp), %eax          # 4-byte Reload
+	leal	228(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, (%esp)                    # 1-byte Folded Spill
+	movl	256(%esp), %eax
 	adcl	$0, %eax
-	movl	%eax, 28(%esi)
-	addl	$36, %esp
+	movl	%eax, %edx
+	addl	224(%esp), %edi
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	228(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	236(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	244(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	248(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	adcl	252(%esp), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	404(%esp), %eax
+	adcl	44(%eax), %edx
+	movl	%edx, %esi
+	setb	4(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	leal	188(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 4(%esp)                   # 1-byte Folded Spill
+	movl	216(%esp), %ebp
+	adcl	$0, %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	addl	184(%esp), %eax
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	188(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	192(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	196(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	204(%esp), %edi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	212(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	404(%esp), %eax
+	adcl	48(%eax), %ebp
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	leal	148(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 12(%esp)                  # 1-byte Folded Spill
+	movl	176(%esp), %esi
+	adcl	$0, %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	addl	144(%esp), %eax
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	148(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	152(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	160(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	164(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	168(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	172(%esp), %ebp
+	movl	404(%esp), %eax
+	adcl	52(%eax), %esi
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	leal	108(%esp), %ecx
+	pushl	%eax
+	pushl	416(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	136(%esp), %edi
+	adcl	$0, %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	addl	104(%esp), %eax
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	108(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	112(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	116(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	120(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	124(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	128(%esp), %ebp
+	adcl	132(%esp), %esi
+	movl	404(%esp), %ecx
+	adcl	56(%ecx), %edi
+	setb	4(%esp)                         # 1-byte Folded Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	imull	%eax, %ecx
+	subl	$4, %esp
+	leal	68(%esp), %eax
+	pushl	%ecx
+	pushl	416(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 4(%esp)                   # 1-byte Folded Spill
+	movl	96(%esp), %eax
+	adcl	$0, %eax
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	addl	64(%esp), %ecx
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	68(%esp), %ecx
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	72(%esp), %ebx
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	76(%esp), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	80(%esp), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	84(%esp), %ebp
+	adcl	88(%esp), %esi
+	adcl	92(%esp), %edi
+	movl	404(%esp), %edx
+	adcl	60(%edx), %eax
+	xorl	%edx, %edx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	subl	40(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%ebx, %ecx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	sbbl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	sbbl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	sbbl	32(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	%ebp, %ecx
+	sbbl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	sbbl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edi, %ebp
+	sbbl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%eax, %ebx
+	sbbl	60(%esp), %ebx                  # 4-byte Folded Reload
+	sbbl	%edx, %edx
+	testb	$1, %dl
+	jne	.LBB45_1
+# %bb.2:
+	movl	400(%esp), %eax
+	movl	%ebx, 28(%eax)
+	jne	.LBB45_3
+.LBB45_4:
+	movl	%ebp, 24(%eax)
+	movl	16(%esp), %edx                  # 4-byte Reload
+	jne	.LBB45_5
+.LBB45_6:
+	movl	%esi, 20(%eax)
+	jne	.LBB45_7
+.LBB45_8:
+	movl	%ecx, 16(%eax)
+	movl	24(%esp), %esi                  # 4-byte Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB45_9
+.LBB45_10:
+	movl	%ecx, 12(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	jne	.LBB45_11
+.LBB45_12:
+	movl	%esi, 8(%eax)
+	jne	.LBB45_13
+.LBB45_14:
+	movl	%edx, 4(%eax)
+	je	.LBB45_16
+.LBB45_15:
+	movl	20(%esp), %ecx                  # 4-byte Reload
+.LBB45_16:
+	movl	%ecx, (%eax)
+	addl	$380, %esp                      # imm = 0x17C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end95:
-	.size	mcl_fp_mulUnitPre7L, .Lfunc_end95-mcl_fp_mulUnitPre7L
-
-	.globl	mcl_fpDbl_mulPre7L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre7L,@function
-mcl_fpDbl_mulPre7L:                     # @mcl_fpDbl_mulPre7L
-# BB#0:
+.LBB45_1:
+	movl	%eax, %ebx
+	movl	400(%esp), %eax
+	movl	%ebx, 28(%eax)
+	je	.LBB45_4
+.LBB45_3:
+	movl	%edi, %ebp
+	movl	%ebp, 24(%eax)
+	movl	16(%esp), %edx                  # 4-byte Reload
+	je	.LBB45_6
+.LBB45_5:
+	movl	36(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 20(%eax)
+	je	.LBB45_8
+.LBB45_7:
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	24(%esp), %esi                  # 4-byte Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	je	.LBB45_10
+.LBB45_9:
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	je	.LBB45_12
+.LBB45_11:
+	movl	(%esp), %esi                    # 4-byte Reload
+	movl	%esi, 8(%eax)
+	je	.LBB45_14
+.LBB45_13:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	jne	.LBB45_15
+	jmp	.LBB45_16
+.Lfunc_end45:
+	.size	mcl_fp_montRed8L, .Lfunc_end45-mcl_fp_montRed8L
+                                        # -- End function
+	.globl	mcl_fp_montRedNF8L              # -- Begin function mcl_fp_montRedNF8L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF8L,@function
+mcl_fp_montRedNF8L:                     # @mcl_fp_montRedNF8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$100, %esp
-	movl	124(%esp), %ebx
-	movl	(%ebx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	128(%esp), %ecx
-	movl	(%ecx), %edi
-	movl	%ecx, %ebp
-	mull	%edi
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	120(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	4(%ebx), %ecx
-	movl	8(%ebx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	12(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	16(%ebx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	20(%ebx), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	24(%ebx), %ebx
-	movl	%ebx, 80(%esp)          # 4-byte Spill
-	movl	4(%ebp), %ebp
-	movl	%ecx, %eax
-	mull	%ebp
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	mull	%ebp
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ebp
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ebp
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	4(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, %eax
-	mull	%ebp
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	8(%esp), %esi           # 4-byte Reload
-	movl	%esi, %eax
-	mull	%ebp
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %eax
-	mull	%ebp
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	mull	%edi
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%edi
-	movl	%edx, %ebx
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	%edi
-	movl	%edx, %ebp
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, %ecx
-	movl	24(%esp), %esi          # 4-byte Reload
-	addl	96(%esp), %esi          # 4-byte Folded Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebx, %edi
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	80(%esp), %ebx          # 4-byte Reload
-	adcl	$0, %ebx
-	addl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	120(%esp), %eax
-	movl	%esi, 4(%eax)
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, %ecx
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %esi
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	96(%esp), %ebp          # 4-byte Reload
-	addl	84(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 80(%esp)          # 4-byte Spill
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	124(%esp), %esi
-	movl	24(%esi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax
-	movl	8(%eax), %edi
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	mull	%edi
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	mull	%edi
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	mull	%edi
-	movl	%eax, %ebp
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mull	%edi
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	(%esi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	4(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	mull	%edi
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	addl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	120(%esp), %edx
-	movl	%eax, 8(%edx)
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 88(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 92(%esp)          # 4-byte Folded Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 84(%esp)          # 4-byte Folded Spill
-	sbbl	%edi, %edi
-	movl	128(%esp), %eax
-	movl	12(%eax), %ecx
-	movl	16(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	8(%esp), %eax           # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	(%esp), %eax            # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	4(%esp), %eax           # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	mull	%ecx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	andl	$1, %edi
-	addl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	%ebp, %esi
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	addl	4(%esp), %ebx           # 4-byte Folded Reload
-	movl	120(%esp), %ebp
-	movl	%ebx, 12(%ebp)
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	movl	%esi, %ebx
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, %esi
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %edx
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	124(%esp), %ebx
-	movl	24(%ebx), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax
-	movl	16(%eax), %ecx
-	movl	%edx, %eax
-	mull	%ecx
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	20(%ebx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	16(%ebx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	12(%ebx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	8(%ebx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	(%ebx), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	4(%ebx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	%eax, %esi
-	movl	%edi, %eax
-	mull	%ecx
-	movl	%edx, (%esp)            # 4-byte Spill
-	addl	%ebp, %eax
-	movl	120(%esp), %ecx
-	movl	%eax, 16(%ecx)
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %edi
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	88(%esp), %esi          # 4-byte Folded Reload
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx          # 4-byte Folded Reload
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	addl	(%esp), %edi            # 4-byte Folded Reload
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 92(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax
-	movl	20(%eax), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	40(%esp), %eax          # 4-byte Reload
-	mull	%esi
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	addl	%edi, %eax
-	movl	120(%esp), %edx
-	movl	%eax, 20(%edx)
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 84(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 88(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 96(%esp)          # 4-byte Folded Spill
-	adcl	92(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax
-	movl	24(%eax), %ecx
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	124(%esp), %edi
-	mull	24(%edi)
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	20(%edi)
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	16(%edi)
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	12(%edi)
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	8(%edi)
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %eax
-	mull	4(%edi)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ecx, %eax
-	mull	(%edi)
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	andl	$1, %esi
-	addl	40(%esp), %ebx          # 4-byte Folded Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	addl	(%esp), %ebx            # 4-byte Folded Reload
-	movl	120(%esp), %ecx
-	movl	%ebx, 24(%ecx)
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, %ebx
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, %edx
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	addl	12(%esp), %ebp          # 4-byte Folded Reload
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 28(%ecx)
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, 32(%ecx)
-	movl	96(%esp), %ebx          # 4-byte Reload
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edx, 36(%ecx)
-	movl	%edi, %edx
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, 40(%ecx)
-	adcl	76(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 44(%ecx)
-	movl	%esi, 48(%ecx)
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%ecx)
-	addl	$100, %esp
-	popl	%esi
-	popl	%edi
+	subl	$396, %esp                      # imm = 0x18C
+	calll	.L46$pb
+.L46$pb:
 	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end96:
-	.size	mcl_fpDbl_mulPre7L, .Lfunc_end96-mcl_fpDbl_mulPre7L
-
-	.globl	mcl_fpDbl_sqrPre7L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre7L,@function
-mcl_fpDbl_sqrPre7L:                     # @mcl_fpDbl_sqrPre7L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$100, %esp
-	movl	124(%esp), %esi
-	movl	24(%esi), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	(%esi), %ebx
-	movl	4(%esi), %edi
-	mull	%edi
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	mull	%edi
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	16(%esi), %ecx
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	12(%esi), %esi
-	movl	%esi, %eax
-	mull	%edi
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	8(%eax), %ebp
+.Ltmp8:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp8-.L46$pb), %ebx
+	movl	424(%esp), %ecx
+	movl	28(%ecx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	24(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
+	movl	%ecx, %edx
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	420(%esp), %eax
+	movl	28(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	20(%eax), %esi
+	movl	16(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	12(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	8(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	(%eax), %ebp
+	movl	4(%eax), %edi
+	movl	-4(%edx), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	subl	$4, %esp
 	movl	%ebp, %eax
-	mull	%edi
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%ebx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ebx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
+	imull	%ecx, %eax
+	leal	364(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addl	360(%esp), %ebp
+	adcl	364(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	368(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	372(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	376(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	380(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	384(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	388(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	420(%esp), %eax
+	movl	32(%eax), %eax
+	adcl	392(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	setb	28(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	44(%esp), %ebp                  # 4-byte Reload
 	movl	%ebp, %eax
-	mull	%ebx
+	imull	%edi, %eax
+	leal	324(%esp), %ecx
+	pushl	%eax
+	pushl	432(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 28(%esp)                  # 1-byte Folded Spill
+	movl	352(%esp), %eax
+	adcl	$0, %eax
+	addl	320(%esp), %edi
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	324(%esp), %edx
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	328(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	332(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %esi
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	340(%esp), %edi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	344(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	348(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	420(%esp), %ecx
+	adcl	36(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	%ebp, %eax
+	imull	%edx, %eax
 	movl	%edx, %ebp
-	movl	%eax, %ecx
-	movl	%edi, %eax
-	mull	%edi
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	%ebx
-	movl	%edx, %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebx, %eax
-	mull	%ebx
-	movl	120(%esp), %ebx
-	movl	%eax, (%ebx)
-	addl	%edi, %edx
-	adcl	%esi, %ecx
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, %esi
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 92(%esp)          # 4-byte Folded Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 96(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	88(%esp), %eax          # 4-byte Reload
+	leal	284(%esp), %ecx
+	pushl	%eax
+	pushl	432(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 12(%esp)                  # 1-byte Folded Spill
+	movl	312(%esp), %eax
 	adcl	$0, %eax
-	addl	%edi, %edx
-	movl	%edx, 4(%ebx)
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebx
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %edi
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, %esi
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	addl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	72(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	80(%esp), %ebp          # 4-byte Folded Reload
-	movl	124(%esp), %edi
-	movl	24(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	8(%edi), %esi
-	mull	%esi
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	20(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	mull	%esi
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	16(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	mull	%esi
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	12(%edi), %ebx
-	movl	%ebx, %eax
-	mull	%esi
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	(%edi), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	4(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mull	%esi
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%esi
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%eax, %edi
-	movl	%esi, %eax
-	mull	%esi
-	movl	%eax, %ecx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	addl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	120(%esp), %eax
-	movl	%edi, 8(%eax)
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	adcl	92(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	84(%esp), %esi          # 4-byte Folded Reload
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	88(%esp), %edi          # 4-byte Folded Reload
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%ebp, %eax
-	sbbl	%ebp, %ebp
-	andl	$1, %ebp
-	addl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	12(%esp), %edx          # 4-byte Reload
-	adcl	%edx, 56(%esp)          # 4-byte Folded Spill
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	52(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	24(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	20(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebx, %eax
-	mull	%ebx
-	movl	%eax, %ebx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	addl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	120(%esp), %eax
-	movl	%edi, 12(%eax)
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 96(%esp)          # 4-byte Spill
-	adcl	76(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 88(%esp)          # 4-byte Folded Spill
-	adcl	%ebp, 92(%esp)          # 4-byte Folded Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %edi
-	movl	20(%eax), %ebx
-	movl	%edi, %eax
-	mull	%ebx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ebx
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	16(%eax), %ebp
-	movl	%edi, %eax
-	mull	%ebp
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ebp
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	addl	%eax, 56(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 60(%esp)          # 4-byte Folded Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 88(%esp)          # 4-byte Folded Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 92(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	124(%esp), %esi
-	movl	24(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	mull	%ebp
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ebp
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	mull	%ebp
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	mull	%ebp
-	movl	%edx, 16(%esp)          # 4-byte Spill
+	addl	280(%esp), %ebp
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	284(%esp), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	288(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	292(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	296(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	300(%esp), %ebp
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	304(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	308(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	420(%esp), %ecx
+	adcl	40(%ecx), %eax
 	movl	%eax, %edi
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	44(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %esi
+	leal	244(%esp), %ecx
+	pushl	%eax
+	pushl	432(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	272(%esp), %eax
+	adcl	$0, %eax
+	addl	240(%esp), %esi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	244(%esp), %ecx
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	248(%esp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	252(%esp), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	256(%esp), %ebp
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	260(%esp), %esi
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	264(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	268(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	420(%esp), %edx
+	adcl	44(%edx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	44(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %edi
+	leal	204(%esp), %ecx
+	pushl	%eax
+	movl	432(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	232(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	200(%esp), %edi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	204(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	212(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	adcl	216(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	220(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	224(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	420(%esp), %eax
+	adcl	48(%eax), %edx
+	movl	%edx, %edi
+	setb	20(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	44(%esp), %ebp                  # 4-byte Reload
 	movl	%ebp, %eax
-	mull	%ebp
-	movl	%eax, %esi
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	addl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	120(%esp), %eax
-	movl	%ebp, 16(%eax)
-	movl	%ecx, %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%edi, %ebp
-	adcl	96(%esp), %ebp          # 4-byte Folded Reload
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	adcl	88(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx          # 4-byte Folded Reload
-	sbbl	%edi, %edi
-	andl	$1, %edi
-	addl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	80(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	(%esp), %esi            # 4-byte Folded Reload
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	32(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	mull	%ebx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
+	imull	%ecx, %eax
+	leal	164(%esp), %ecx
+	pushl	%eax
+	pushl	432(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 20(%esp)                  # 1-byte Folded Spill
+	movl	192(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	addl	160(%esp), %eax
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	164(%esp), %ecx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	168(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	176(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	180(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	184(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	188(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	420(%esp), %eax
+	adcl	52(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	setb	47(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	imull	%ecx, %ebp
+	movl	%ecx, %esi
+	leal	124(%esp), %ecx
+	pushl	%ebp
+	pushl	432(%esp)
+	pushl	%ecx
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 47(%esp)                  # 1-byte Folded Spill
+	movl	152(%esp), %edi
+	adcl	$0, %edi
+	addl	120(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	124(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	128(%esp), %ebp
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	132(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	136(%esp), %esi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	140(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	144(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	148(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	420(%esp), %ecx
+	adcl	56(%ecx), %edi
+	setb	36(%esp)                        # 1-byte Folded Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	imull	%eax, %ecx
+	subl	$4, %esp
+	leal	84(%esp), %eax
+	pushl	%ecx
+	pushl	432(%esp)
+	pushl	%eax
+	calll	mulPv256x32@PLT
+	addl	$12, %esp
+	addb	$255, 36(%esp)                  # 1-byte Folded Spill
+	movl	112(%esp), %edx
+	adcl	$0, %edx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	addl	80(%esp), %eax
+	movl	%ebp, %eax
+	adcl	84(%esp), %eax
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	88(%esp), %ecx
+	adcl	92(%esp), %esi
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	96(%esp), %ebx
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	100(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	104(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	adcl	108(%esp), %edi
+	movl	420(%esp), %ebp
+	adcl	60(%ebp), %edx
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	subl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	sbbl	60(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	sbbl	48(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
 	movl	%ebx, %eax
-	mull	%ebx
-	movl	%eax, %esi
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	addl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	120(%esp), %edx
-	movl	%eax, 20(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, %edx
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	84(%esp), %ebx          # 4-byte Reload
-	adcl	%ebp, %ebx
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	adcl	76(%esp), %esi          # 4-byte Folded Reload
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	%edi, %ebp
-	sbbl	%edi, %edi
-	andl	$1, %edi
-	addl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	124(%esp), %esi
-	movl	24(%esi), %ecx
-	movl	%ecx, %eax
-	mull	20(%esi)
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	16(%esi)
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	12(%esi)
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	8(%esi)
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%ecx, %eax
-	mull	4(%esi)
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ecx, %eax
-	mull	(%esi)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%ecx, %eax
-	mull	%ecx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	addl	80(%esp), %esi          # 4-byte Folded Reload
-	movl	120(%esp), %edx
-	movl	%esi, 24(%edx)
-	movl	%edx, %esi
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	adcl	84(%esp), %ebp          # 4-byte Folded Reload
-	adcl	92(%esp), %ebx          # 4-byte Folded Reload
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	%ecx, 96(%esp)          # 4-byte Folded Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	addl	36(%esp), %edi          # 4-byte Folded Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edi, 28(%esi)
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 32(%esi)
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 36(%esi)
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, 40(%esi)
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 44(%esi)
-	movl	%eax, 48(%esi)
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	addl	$100, %esp
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	sbbl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	sbbl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	sbbl	52(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	sbbl	72(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edx, %ebx
+	sbbl	76(%esp), %ebx                  # 4-byte Folded Reload
+	testl	%ebx, %ebx
+	js	.LBB46_1
+# %bb.2:
+	movl	416(%esp), %edx
+	movl	%ebx, 28(%edx)
+	js	.LBB46_3
+.LBB46_4:
+	movl	%edi, 24(%edx)
+	js	.LBB46_5
+.LBB46_6:
+	movl	%ebp, 20(%edx)
+	js	.LBB46_7
+.LBB46_8:
+	movl	%ecx, 16(%edx)
+	js	.LBB46_9
+.LBB46_10:
+	movl	%eax, 12(%edx)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	48(%esp), %eax                  # 4-byte Reload
+	js	.LBB46_11
+.LBB46_12:
+	movl	%eax, 8(%edx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	js	.LBB46_13
+.LBB46_14:
+	movl	%ecx, 4(%edx)
+	jns	.LBB46_16
+.LBB46_15:
+	movl	36(%esp), %eax                  # 4-byte Reload
+.LBB46_16:
+	movl	%eax, (%edx)
+	addl	$396, %esp                      # imm = 0x18C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end97:
-	.size	mcl_fpDbl_sqrPre7L, .Lfunc_end97-mcl_fpDbl_sqrPre7L
-
-	.globl	mcl_fp_mont7L
-	.align	16, 0x90
-	.type	mcl_fp_mont7L,@function
-mcl_fp_mont7L:                          # @mcl_fp_mont7L
-# BB#0:
+.LBB46_1:
+	movl	%edx, %ebx
+	movl	416(%esp), %edx
+	movl	%ebx, 28(%edx)
+	jns	.LBB46_4
+.LBB46_3:
+	movl	52(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 24(%edx)
+	jns	.LBB46_6
+.LBB46_5:
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%edx)
+	jns	.LBB46_8
+.LBB46_7:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%edx)
+	jns	.LBB46_10
+.LBB46_9:
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edx)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	48(%esp), %eax                  # 4-byte Reload
+	jns	.LBB46_12
+.LBB46_11:
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	jns	.LBB46_14
+.LBB46_13:
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edx)
+	js	.LBB46_15
+	jmp	.LBB46_16
+.Lfunc_end46:
+	.size	mcl_fp_montRedNF8L, .Lfunc_end46-mcl_fp_montRedNF8L
+                                        # -- End function
+	.globl	mcl_fp_addPre8L                 # -- Begin function mcl_fp_addPre8L
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre8L,@function
+mcl_fp_addPre8L:                        # @mcl_fp_addPre8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$152, %esp
-	movl	176(%esp), %esi
-	movl	(%esi), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	180(%esp), %edx
-	movl	(%edx), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	mull	%ecx
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	184(%esp), %ecx
-	movl	-4(%ecx), %edx
-	movl	%edx, 132(%esp)         # 4-byte Spill
-	movl	%eax, %ebx
-	imull	%edx, %ebx
+	subl	$8, %esp
+	movl	32(%esp), %ecx
 	movl	(%ecx), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	24(%ecx), %edx
-	movl	%edx, 120(%esp)         # 4-byte Spill
-	movl	20(%ecx), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
+	movl	4(%ecx), %edx
+	movl	36(%esp), %esi
+	addl	(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	4(%esi), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	28(%ecx), %edi
+	movl	24(%ecx), %ebx
+	movl	20(%ecx), %ebp
 	movl	16(%ecx), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	12(%ecx), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	8(%ecx), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	4(%ecx), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	4(%esi), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	%esi, %eax
-	movl	24(%eax), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	20(%eax), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	16(%eax), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	12(%eax), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	8(%eax), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	72(%esp), %ecx          # 4-byte Reload
-	mull	%ecx
-	movl	%edx, %esi
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	%ecx
-	movl	%edx, %ebp
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	%ecx
-	movl	%edx, %edi
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	mull	%ecx
-	movl	%edx, %ebx
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	mull	%ecx
-	addl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	adcl	(%esp), %ebx            # 4-byte Folded Reload
-	movl	%ebx, (%esp)            # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 84(%esp)          # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	addl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	28(%esp), %ebp          # 4-byte Reload
-	addl	80(%esp), %ebp          # 4-byte Folded Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	%ebp, 88(%esp)          # 4-byte Folded Spill
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	adcl	(%esp), %edi            # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	72(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	4(%eax), %ecx
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %edi
-	movl	%ecx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebx
-	addl	%edi, %ebx
-	adcl	84(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	addl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	88(%esp), %ecx          # 4-byte Reload
-	imull	132(%esp), %ecx         # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%eax, %ebp
-	addl	%ebx, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	addl	88(%esp), %ebp          # 4-byte Folded Reload
-	movl	20(%esp), %ebp          # 4-byte Reload
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	adcl	84(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	80(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	180(%esp), %eax
-	movl	8(%eax), %ebx
-	movl	%ebx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %edi
-	movl	%ebx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	addl	%edi, %edx
-	movl	%edx, %edi
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	84(%esp), %ebx          # 4-byte Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	72(%esp), %esi          # 4-byte Reload
-	addl	%ebp, %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 44(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 48(%esp)          # 4-byte Folded Spill
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%esi, %ecx
-	imull	132(%esp), %ecx         # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%eax, %ebp
-	addl	%ebx, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	72(%esp), %ebp          # 4-byte Folded Reload
-	movl	20(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	88(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	adcl	84(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	180(%esp), %eax
-	movl	12(%eax), %ebx
-	movl	%ebx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %edi
-	movl	%ebx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	addl	%edi, %edx
-	movl	%edx, %edi
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	84(%esp), %ebx          # 4-byte Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	72(%esp), %esi          # 4-byte Reload
-	addl	%ebp, %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 48(%esp)          # 4-byte Folded Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 52(%esp)          # 4-byte Folded Spill
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%esi, %ecx
-	imull	132(%esp), %ecx         # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%eax, %ebp
-	addl	%ebx, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	72(%esp), %ebp          # 4-byte Folded Reload
-	movl	20(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	88(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	adcl	84(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	180(%esp), %eax
-	movl	16(%eax), %ebx
-	movl	%ebx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %edi
-	movl	%ebx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	addl	%edi, %edx
-	movl	%edx, %edi
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	84(%esp), %ebx          # 4-byte Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	72(%esp), %esi          # 4-byte Reload
-	addl	%ebp, %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 48(%esp)          # 4-byte Folded Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 52(%esp)          # 4-byte Folded Spill
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%esi, %ecx
-	imull	132(%esp), %ecx         # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%eax, %ebp
-	addl	%ebx, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	72(%esp), %ebp          # 4-byte Folded Reload
-	movl	20(%esp), %ebp          # 4-byte Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	88(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	adcl	84(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	180(%esp), %eax
-	movl	20(%eax), %ebx
-	movl	%ebx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %edi
-	movl	%ebx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	addl	%edi, %edx
-	movl	%edx, %edi
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	84(%esp), %ebx          # 4-byte Reload
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	72(%esp), %esi          # 4-byte Reload
-	addl	%ebp, %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 48(%esp)          # 4-byte Folded Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 52(%esp)          # 4-byte Folded Spill
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%esi, %ecx
-	imull	132(%esp), %ecx         # 4-byte Folded Reload
-	andl	$1, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebp
-	movl	%ecx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%eax, %ebx
-	addl	%ebp, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	20(%esp), %ebx          # 4-byte Reload
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	88(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	84(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	180(%esp), %eax
-	movl	24(%eax), %ebp
-	movl	%ebp, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%ebp, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %edi
-	movl	%ebp, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	addl	%edi, %edx
-	movl	%edx, %edi
-	adcl	112(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	92(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	96(%esp), %ecx          # 4-byte Folded Reload
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	116(%esp), %esi         # 4-byte Reload
-	addl	%ebx, %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 112(%esp)         # 4-byte Folded Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 104(%esp)         # 4-byte Folded Spill
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	132(%esp), %ecx         # 4-byte Reload
-	imull	%esi, %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	andl	$1, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %edi
-	movl	132(%esp), %eax         # 4-byte Reload
-	mull	124(%esp)               # 4-byte Folded Reload
-	addl	56(%esp), %eax          # 4-byte Folded Reload
-	adcl	%edi, %edx
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	adcl	72(%esp), %ebp          # 4-byte Folded Reload
-	adcl	76(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 132(%esp)         # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	64(%esp), %esi          # 4-byte Reload
-	addl	116(%esp), %esi         # 4-byte Folded Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	adcl	112(%esp), %edx         # 4-byte Folded Reload
-	adcl	104(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	108(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ebx, 116(%esp)         # 4-byte Spill
-	adcl	100(%esp), %ebp         # 4-byte Folded Reload
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %esi         # 4-byte Reload
-	adcl	88(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 132(%esp)         # 4-byte Spill
-	adcl	84(%esp), %edi          # 4-byte Folded Reload
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	movl	%eax, %esi
-	subl	128(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	%edx, %esi
-	sbbl	124(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	sbbl	136(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	sbbl	140(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ebx, 136(%esp)         # 4-byte Spill
-	sbbl	144(%esp), %ebp         # 4-byte Folded Reload
-	movl	%ebp, 140(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, %ebx
-	movl	%ecx, %ebp
-	sbbl	148(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ebx, 144(%esp)         # 4-byte Spill
-	movl	%edi, %ebx
-	sbbl	120(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ebx, 148(%esp)         # 4-byte Spill
-	movl	96(%esp), %ebx          # 4-byte Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB98_2
-# BB#1:
-	movl	108(%esp), %eax         # 4-byte Reload
-.LBB98_2:
-	movl	172(%esp), %esi
-	movl	%eax, (%esi)
-	testb	%bl, %bl
-	jne	.LBB98_4
-# BB#3:
-	movl	124(%esp), %edx         # 4-byte Reload
-.LBB98_4:
-	movl	%edx, 4(%esi)
-	movl	104(%esp), %ecx         # 4-byte Reload
-	jne	.LBB98_6
-# BB#5:
-	movl	128(%esp), %ecx         # 4-byte Reload
-.LBB98_6:
-	movl	%ecx, 8(%esi)
-	movl	112(%esp), %ecx         # 4-byte Reload
-	movl	116(%esp), %eax         # 4-byte Reload
-	jne	.LBB98_8
-# BB#7:
-	movl	136(%esp), %eax         # 4-byte Reload
-.LBB98_8:
-	movl	%eax, 12(%esi)
-	jne	.LBB98_10
-# BB#9:
-	movl	140(%esp), %ecx         # 4-byte Reload
-.LBB98_10:
-	movl	%ecx, 16(%esi)
-	jne	.LBB98_12
-# BB#11:
-	movl	144(%esp), %ebp         # 4-byte Reload
-.LBB98_12:
+	movl	12(%ecx), %edx
+	movl	8(%ecx), %ecx
+	adcl	8(%esi), %ecx
+	adcl	12(%esi), %edx
+	adcl	16(%esi), %eax
+	adcl	20(%esi), %ebp
+	adcl	24(%esi), %ebx
+	adcl	28(%esi), %edi
+	movl	28(%esp), %esi
+	movl	%ebx, 24(%esi)
 	movl	%ebp, 20(%esi)
-	jne	.LBB98_14
-# BB#13:
-	movl	148(%esp), %edi         # 4-byte Reload
-.LBB98_14:
-	movl	%edi, 24(%esi)
-	addl	$152, %esp
+	movl	%eax, 16(%esi)
+	movl	%edx, 12(%esi)
+	movl	%ecx, 8(%esi)
+	movl	%edi, 28(%esi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 4(%esi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, (%esi)
+	setb	%al
+	movzbl	%al, %eax
+	addl	$8, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end98:
-	.size	mcl_fp_mont7L, .Lfunc_end98-mcl_fp_mont7L
-
-	.globl	mcl_fp_montNF7L
-	.align	16, 0x90
-	.type	mcl_fp_montNF7L,@function
-mcl_fp_montNF7L:                        # @mcl_fp_montNF7L
-# BB#0:
+.Lfunc_end47:
+	.size	mcl_fp_addPre8L, .Lfunc_end47-mcl_fp_addPre8L
+                                        # -- End function
+	.globl	mcl_fp_subPre8L                 # -- Begin function mcl_fp_subPre8L
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre8L,@function
+mcl_fp_subPre8L:                        # @mcl_fp_subPre8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$152, %esp
-	movl	176(%esp), %ebp
-	movl	(%ebp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	180(%esp), %ecx
-	movl	(%ecx), %ecx
-	mull	%ecx
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	184(%esp), %esi
-	movl	-4(%esi), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	%eax, %edi
-	imull	%edx, %edi
-	movl	(%esi), %edx
-	movl	%edx, 148(%esp)         # 4-byte Spill
-	movl	24(%esi), %edx
-	movl	%edx, 124(%esp)         # 4-byte Spill
-	movl	20(%esi), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	16(%esi), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	12(%esi), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	8(%esi), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	4(%esi), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	4(%ebp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	24(%ebp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	20(%ebp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	12(%ebp), %ebx
-	movl	%ebx, 96(%esp)          # 4-byte Spill
-	movl	8(%ebp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	mull	%edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	132(%esp)               # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	mull	%ecx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	%ecx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	%ecx
-	movl	%edx, %esi
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	%ecx
-	movl	%edx, %ebp
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	mull	%ecx
-	movl	%eax, %edi
-	addl	84(%esp), %edi          # 4-byte Folded Reload
-	adcl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	adcl	%ebx, %ebp
-	movl	%esi, %edx
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	16(%esp), %ebx          # 4-byte Reload
-	addl	88(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, %ebx
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	adcl	$0, %eax
-	addl	28(%esp), %ebx          # 4-byte Folded Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	4(%eax), %ecx
-	movl	%ecx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	addl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	addl	%ebx, 48(%esp)          # 4-byte Folded Spill
-	adcl	%edi, 52(%esp)          # 4-byte Folded Spill
-	movl	%ebp, %edi
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 56(%esp)          # 4-byte Folded Spill
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, %ecx
-	imull	108(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	132(%esp)               # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%ecx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	addl	%ebp, %eax
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	adcl	%edi, %esi
-	movl	%esi, %edi
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	88(%esp), %ecx          # 4-byte Folded Reload
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	8(%eax), %ebp
-	movl	%ebp, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebp, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	addl	%ebx, %ebp
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	%edi, %esi
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	44(%esp), %edi          # 4-byte Reload
-	addl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	80(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	84(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, %ebx
-	imull	108(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ebx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	132(%esp)               # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	%ebx, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	addl	%edi, %eax
-	adcl	%ebp, %ecx
-	movl	%ecx, %esi
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	72(%esp), %ebx          # 4-byte Reload
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 72(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	12(%eax), %edi
-	movl	%edi, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebp
-	movl	%edi, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edx, %ebx
-	addl	%ebp, %ebx
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %edi
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	44(%esp), %esi          # 4-byte Reload
-	addl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	72(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	80(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, %ebp
-	imull	108(%esp), %ebp         # 4-byte Folded Reload
-	movl	%ebp, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	132(%esp)               # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%ebp, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	addl	%edi, %eax
-	adcl	%ebx, %esi
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	72(%esp), %ebx          # 4-byte Reload
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 72(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	16(%eax), %ebp
-	movl	%ebp, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %edi
-	movl	%ebp, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	addl	%ebx, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	%edi, %esi
-	movl	%esi, %edi
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	56(%esp), %ebx          # 4-byte Reload
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	24(%esp), %ecx          # 4-byte Reload
-	addl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	72(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	80(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	adcl	84(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	imull	108(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%esi, %eax
-	mull	132(%esp)               # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	%esi, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	addl	%ecx, %eax
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	%edi, %edx
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	20(%eax), %ebp
-	movl	%ebp, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ebp, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	addl	%ebx, %ebp
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %ebx
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	%edi, %edx
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	52(%esp), %edi          # 4-byte Reload
-	addl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	76(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %esi
-	adcl	88(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, %ebx
-	imull	108(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ebx, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	132(%esp)               # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	%ebx, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	addl	%edi, %eax
-	adcl	%ebp, %ecx
-	movl	%ecx, %edx
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	76(%esp), %ebx          # 4-byte Reload
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	%esi, %edi
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	addl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	24(%eax), %edi
-	movl	%edi, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	116(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	%eax, %esi
-	movl	%edi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %ebp
-	movl	%edi, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%eax, %ebx
-	addl	%ebp, %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	%ecx, %edi
-	adcl	%esi, %edi
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx         # 4-byte Folded Reload
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	addl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 104(%esp)         # 4-byte Folded Spill
-	adcl	72(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	76(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	movl	48(%esp), %ecx          # 4-byte Reload
-	imull	%ecx, %edi
-	movl	%edi, %eax
-	mull	124(%esp)               # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	144(%esp)               # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	mull	140(%esp)               # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	mull	136(%esp)               # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	mull	148(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	movl	%edi, %eax
-	mull	132(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	movl	%edi, %eax
-	mull	128(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	addl	%ecx, %ebp
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, %edx
-	adcl	72(%esp), %esi          # 4-byte Folded Reload
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	108(%esp), %ebx         # 4-byte Reload
-	adcl	92(%esp), %ebx          # 4-byte Folded Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	addl	60(%esp), %edx          # 4-byte Folded Reload
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	80(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 108(%esp)         # 4-byte Spill
-	adcl	84(%esp), %ebp          # 4-byte Folded Reload
-	adcl	88(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 112(%esp)         # 4-byte Spill
-	movl	%edx, %eax
-	subl	148(%esp), %eax         # 4-byte Folded Reload
-	sbbl	128(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	sbbl	132(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	movl	%edx, %esi
-	sbbl	136(%esp), %ecx         # 4-byte Folded Reload
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	sbbl	140(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ebx, 148(%esp)         # 4-byte Spill
-	movl	%ebp, %ebx
-	movl	%ebp, %ecx
-	movl	%ebx, %ebp
-	sbbl	144(%esp), %ebp         # 4-byte Folded Reload
-	movl	%edi, %ebx
-	sbbl	124(%esp), %ebx         # 4-byte Folded Reload
-	movl	%ebx, %edi
-	sarl	$31, %edi
-	testl	%edi, %edi
-	js	.LBB99_2
-# BB#1:
-	movl	%eax, %esi
-.LBB99_2:
-	movl	172(%esp), %edx
-	movl	%esi, (%edx)
-	movl	104(%esp), %eax         # 4-byte Reload
-	js	.LBB99_4
-# BB#3:
-	movl	128(%esp), %eax         # 4-byte Reload
-.LBB99_4:
-	movl	%eax, 4(%edx)
-	movl	%ecx, %eax
-	movl	116(%esp), %ecx         # 4-byte Reload
-	js	.LBB99_6
-# BB#5:
-	movl	132(%esp), %ecx         # 4-byte Reload
-.LBB99_6:
-	movl	%ecx, 8(%edx)
-	movl	108(%esp), %esi         # 4-byte Reload
-	movl	120(%esp), %ecx         # 4-byte Reload
-	js	.LBB99_8
-# BB#7:
-	movl	136(%esp), %ecx         # 4-byte Reload
-.LBB99_8:
-	movl	%ecx, 12(%edx)
-	js	.LBB99_10
-# BB#9:
-	movl	148(%esp), %esi         # 4-byte Reload
-.LBB99_10:
-	movl	%esi, 16(%edx)
-	js	.LBB99_12
-# BB#11:
-	movl	%ebp, %eax
-.LBB99_12:
-	movl	%eax, 20(%edx)
-	js	.LBB99_14
-# BB#13:
-	movl	%ebx, 112(%esp)         # 4-byte Spill
-.LBB99_14:
-	movl	112(%esp), %eax         # 4-byte Reload
-	movl	%eax, 24(%edx)
-	addl	$152, %esp
+	subl	$16, %esp
+	movl	40(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %esi
+	xorl	%eax, %eax
+	movl	44(%esp), %edi
+	subl	(%edi), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	28(%edx), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	24(%edx), %ebp
+	movl	20(%edx), %ecx
+	movl	16(%edx), %esi
+	movl	12(%edx), %ebx
+	movl	8(%edx), %edx
+	sbbl	8(%edi), %edx
+	sbbl	12(%edi), %ebx
+	sbbl	16(%edi), %esi
+	sbbl	20(%edi), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	sbbl	24(%edi), %ebp
+	movl	(%esp), %ecx                    # 4-byte Reload
+	sbbl	28(%edi), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	36(%esp), %edi
+	movl	%ebp, 24(%edi)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 20(%edi)
+	movl	%esi, 16(%edi)
+	movl	%ebx, 12(%edi)
+	movl	%edx, 8(%edi)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 28(%edi)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%edi)
+	sbbl	%eax, %eax
+	andl	$1, %eax
+	addl	$16, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end99:
-	.size	mcl_fp_montNF7L, .Lfunc_end99-mcl_fp_montNF7L
-
-	.globl	mcl_fp_montRed7L
-	.align	16, 0x90
-	.type	mcl_fp_montRed7L,@function
-mcl_fp_montRed7L:                       # @mcl_fp_montRed7L
-# BB#0:
+.Lfunc_end48:
+	.size	mcl_fp_subPre8L, .Lfunc_end48-mcl_fp_subPre8L
+                                        # -- End function
+	.globl	mcl_fp_shr1_8L                  # -- Begin function mcl_fp_shr1_8L
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_8L,@function
+mcl_fp_shr1_8L:                         # @mcl_fp_shr1_8L
+# %bb.0:
+	pushl	%esi
+	movl	12(%esp), %eax
+	movl	28(%eax), %ecx
+	movl	%ecx, %edx
+	shrl	%edx
+	movl	8(%esp), %esi
+	movl	%edx, 28(%esi)
+	movl	24(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 24(%esi)
+	movl	20(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 20(%esi)
+	movl	16(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 16(%esi)
+	movl	12(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 12(%esi)
+	movl	8(%eax), %edx
+	shldl	$31, %edx, %ecx
+	movl	%ecx, 8(%esi)
+	movl	4(%eax), %ecx
+	shldl	$31, %ecx, %edx
+	movl	%edx, 4(%esi)
+	movl	(%eax), %eax
+	shrdl	$1, %ecx, %eax
+	movl	%eax, (%esi)
+	popl	%esi
+	retl
+.Lfunc_end49:
+	.size	mcl_fp_shr1_8L, .Lfunc_end49-mcl_fp_shr1_8L
+                                        # -- End function
+	.globl	mcl_fp_add8L                    # -- Begin function mcl_fp_add8L
+	.p2align	4, 0x90
+	.type	mcl_fp_add8L,@function
+mcl_fp_add8L:                           # @mcl_fp_add8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$124, %esp
-	movl	152(%esp), %eax
-	movl	-4(%eax), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	(%eax), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx
-	movl	(%ecx), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	imull	%edx, %ecx
-	movl	24(%eax), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	20(%eax), %edx
-	movl	%edx, 120(%esp)         # 4-byte Spill
-	movl	16(%eax), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	12(%eax), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	8(%eax), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	4(%eax), %ebx
-	movl	%ebx, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%edi
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%ebp
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%esi
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	%ebx
-	movl	%edx, %esi
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	addl	%ebx, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	adcl	80(%esp), %ebp          # 4-byte Folded Reload
-	movl	56(%esp), %ebx          # 4-byte Reload
-	adcl	84(%esp), %ebx          # 4-byte Folded Reload
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	%edi, %esi
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	addl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	148(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	4(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	8(%ecx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	12(%ecx), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	16(%ecx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	adcl	20(%ecx), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	adcl	24(%ecx), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	28(%ecx), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	48(%ecx), %edi
-	movl	44(%ecx), %edx
-	movl	40(%ecx), %ebx
-	movl	36(%ecx), %ebp
-	movl	32(%ecx), %eax
-	adcl	$0, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	adcl	$0, %ebx
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	imull	88(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ecx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebp
-	movl	%ecx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, %ebx
-	movl	%ecx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%eax, %ecx
-	addl	%ebx, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	%ebp, %edi
-	movl	%esi, %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	16(%esp), %ebx          # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	72(%esp), %ecx          # 4-byte Folded Reload
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	imull	88(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, %ebx
-	movl	%edi, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, %esi
-	movl	%edi, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%eax, %edi
-	addl	%esi, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	%ebx, %ebp
-	movl	%ebp, %eax
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	60(%esp), %ebx          # 4-byte Reload
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	%edi, %esi
-	imull	88(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, %ecx
-	movl	%esi, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, %ebp
-	movl	%esi, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%eax, %edi
-	addl	%ebp, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	%ecx, %ebx
-	movl	%ebx, %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	40(%esp), %ebx          # 4-byte Reload
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	addl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	%edi, %ebp
-	imull	88(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ecx
-	movl	%ebp, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, %ebx
-	movl	%ebp, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%eax, %ebp
-	addl	%ebx, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	%ecx, %edi
-	movl	%edi, %ecx
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	64(%esp), %ebx          # 4-byte Reload
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	$0, %edx
-	addl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	%ebp, %edi
-	imull	88(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ecx
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	movl	%eax, %ebx
-	movl	%edi, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%eax, %ebp
-	addl	%ebx, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, %eax
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	addl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	80(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	imull	%ebp, %ecx
-	movl	%ecx, %eax
-	mull	92(%esp)                # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	120(%esp)               # 4-byte Folded Reload
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	104(%esp)               # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	108(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebp
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	100(%esp)               # 4-byte Folded Reload
-	movl	%edx, %ebx
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	112(%esp)               # 4-byte Folded Reload
-	movl	%edx, %edi
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	mull	96(%esp)                # 4-byte Folded Reload
-	movl	%edx, %esi
-	addl	%edi, %eax
-	movl	%eax, %edi
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	80(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 80(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	movl	28(%esp), %ebx          # 4-byte Reload
-	addl	44(%esp), %ebx          # 4-byte Folded Reload
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebx          # 4-byte Reload
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 80(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, %eax
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	116(%esp), %ebx         # 4-byte Reload
-	adcl	$0, %ebx
-	movl	%edi, %edx
-	movl	%edx, %ecx
-	subl	112(%esp), %ecx         # 4-byte Folded Reload
-	sbbl	96(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	sbbl	100(%esp), %edi         # 4-byte Folded Reload
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	sbbl	108(%esp), %ebp         # 4-byte Folded Reload
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	sbbl	104(%esp), %esi         # 4-byte Folded Reload
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	%eax, %esi
-	movl	%esi, %ebp
-	sbbl	120(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	movl	%esi, %eax
-	sbbl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	movl	%ebx, 116(%esp)         # 4-byte Spill
-	jne	.LBB100_2
-# BB#1:
-	movl	%ecx, %edx
-.LBB100_2:
-	movl	144(%esp), %edi
-	movl	%edx, (%edi)
-	movl	116(%esp), %eax         # 4-byte Reload
-	testb	%al, %al
-	movl	64(%esp), %eax          # 4-byte Reload
-	jne	.LBB100_4
-# BB#3:
-	movl	84(%esp), %eax          # 4-byte Reload
-.LBB100_4:
-	movl	%eax, 4(%edi)
-	movl	80(%esp), %eax          # 4-byte Reload
-	jne	.LBB100_6
-# BB#5:
-	movl	96(%esp), %eax          # 4-byte Reload
-.LBB100_6:
-	movl	%eax, 8(%edi)
-	movl	88(%esp), %eax          # 4-byte Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
-	jne	.LBB100_8
-# BB#7:
-	movl	100(%esp), %ecx         # 4-byte Reload
-.LBB100_8:
-	movl	%ecx, 12(%edi)
-	jne	.LBB100_10
-# BB#9:
-	movl	108(%esp), %eax         # 4-byte Reload
-.LBB100_10:
-	movl	%eax, 16(%edi)
-	jne	.LBB100_12
-# BB#11:
-	movl	112(%esp), %ebp         # 4-byte Reload
-.LBB100_12:
-	movl	%ebp, 20(%edi)
-	jne	.LBB100_14
-# BB#13:
-	movl	120(%esp), %esi         # 4-byte Reload
-.LBB100_14:
-	movl	%esi, 24(%edi)
-	addl	$124, %esp
+	subl	$32, %esp
+	movl	56(%esp), %eax
+	movl	(%eax), %ebx
+	movl	4(%eax), %ecx
+	movl	60(%esp), %edx
+	addl	(%edx), %ebx
+	adcl	4(%edx), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	28(%eax), %ebp
+	movl	24(%eax), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	16(%eax), %esi
+	movl	12(%eax), %edi
+	movl	8(%eax), %edx
+	movl	60(%esp), %eax
+	adcl	8(%eax), %edx
+	adcl	12(%eax), %edi
+	adcl	16(%eax), %esi
+	adcl	20(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	60(%esp), %ecx
+	adcl	24(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	28(%ecx), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx
+	movl	%ebp, 28(%ecx)
+	movl	%eax, 24(%ecx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%ecx)
+	movl	%esi, 16(%ecx)
+	movl	%edi, 12(%ecx)
+	movl	%edx, 8(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	movl	%ebx, (%ecx)
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	64(%esp), %ebp
+	subl	(%ebp), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	sbbl	4(%ebp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	sbbl	8(%ebp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	sbbl	12(%ebp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	sbbl	16(%ebp), %esi
+	movl	%esi, %edi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	sbbl	20(%ebp), %ecx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	sbbl	24(%ebp), %esi
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	sbbl	28(%ebp), %ebx
+	movzbl	3(%esp), %edx                   # 1-byte Folded Reload
+	sbbl	$0, %edx
+	testb	$1, %dl
+	jne	.LBB50_2
+# %bb.1:                                # %nocarry
+	movl	24(%esp), %edx                  # 4-byte Reload
+	movl	52(%esp), %ebp
+	movl	%edx, (%ebp)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 4(%ebp)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebp)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebp)
+	movl	%edi, 16(%ebp)
+	movl	%ecx, 20(%ebp)
+	movl	%esi, 24(%ebp)
+	movl	%ebx, 28(%ebp)
+.LBB50_2:                               # %carry
+	addl	$32, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end100:
-	.size	mcl_fp_montRed7L, .Lfunc_end100-mcl_fp_montRed7L
-
-	.globl	mcl_fp_addPre7L
-	.align	16, 0x90
-	.type	mcl_fp_addPre7L,@function
-mcl_fp_addPre7L:                        # @mcl_fp_addPre7L
-# BB#0:
+.Lfunc_end50:
+	.size	mcl_fp_add8L, .Lfunc_end50-mcl_fp_add8L
+                                        # -- End function
+	.globl	mcl_fp_addNF8L                  # -- Begin function mcl_fp_addNF8L
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF8L,@function
+mcl_fp_addNF8L:                         # @mcl_fp_addNF8L
+# %bb.0:
+	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	20(%esp), %esi
-	addl	(%esi), %ecx
-	adcl	4(%esi), %edx
-	movl	8(%eax), %edi
-	adcl	8(%esi), %edi
-	movl	16(%esp), %ebx
-	movl	%ecx, (%ebx)
-	movl	12(%esi), %ecx
-	movl	%edx, 4(%ebx)
-	movl	16(%esi), %edx
-	adcl	12(%eax), %ecx
-	adcl	16(%eax), %edx
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%ecx, 12(%ebx)
-	movl	20(%esi), %ecx
-	adcl	%edi, %ecx
-	movl	%edx, 16(%ebx)
+	subl	$44, %esp
+	movl	72(%esp), %eax
+	movl	(%eax), %edx
+	movl	4(%eax), %ecx
+	movl	68(%esp), %esi
+	addl	(%esi), %edx
+	adcl	4(%esi), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	28(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	16(%eax), %ebx
+	movl	12(%eax), %ebp
+	movl	8(%eax), %eax
+	adcl	8(%esi), %eax
+	adcl	12(%esi), %ebp
+	adcl	16(%esi), %ebx
+	adcl	20(%esi), %ecx
+	movl	(%esp), %esi                    # 4-byte Reload
+	movl	68(%esp), %edi
+	adcl	24(%edi), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	movl	68(%esp), %edi
+	adcl	28(%edi), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	76(%esp), %esi
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	subl	(%esi), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edi                   # 4-byte Reload
+	sbbl	4(%esi), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	sbbl	8(%esi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	%ebp, %edi
+	sbbl	12(%esi), %edi
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	%ebx, %ebp
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	sbbl	16(%esi), %ebp
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	20(%esi), %ecx
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	24(%esi), %eax
+	movl	%ebx, %edx
+	sbbl	28(%esi), %edx
+	testl	%edx, %edx
+	js	.LBB51_1
+# %bb.2:
+	movl	64(%esp), %ebx
+	movl	%edx, 28(%ebx)
+	js	.LBB51_3
+.LBB51_4:
+	movl	%eax, 24(%ebx)
+	movl	32(%esp), %edx                  # 4-byte Reload
+	js	.LBB51_5
+.LBB51_6:
 	movl	%ecx, 20(%ebx)
-	movl	24(%eax), %eax
-	movl	24(%esi), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 24(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
+	movl	40(%esp), %eax                  # 4-byte Reload
+	js	.LBB51_7
+.LBB51_8:
+	movl	%ebp, 16(%ebx)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	js	.LBB51_9
+.LBB51_10:
+	movl	%edi, 12(%ebx)
+	js	.LBB51_11
+.LBB51_12:
+	movl	%edx, 8(%ebx)
+	js	.LBB51_13
+.LBB51_14:
+	movl	%ecx, 4(%ebx)
+	jns	.LBB51_16
+.LBB51_15:
+	movl	28(%esp), %eax                  # 4-byte Reload
+.LBB51_16:
+	movl	%eax, (%ebx)
+	addl	$44, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
+	popl	%ebp
 	retl
-.Lfunc_end101:
-	.size	mcl_fp_addPre7L, .Lfunc_end101-mcl_fp_addPre7L
-
-	.globl	mcl_fp_subPre7L
-	.align	16, 0x90
-	.type	mcl_fp_subPre7L,@function
-mcl_fp_subPre7L:                        # @mcl_fp_subPre7L
-# BB#0:
+.LBB51_1:
+	movl	%ebx, %edx
+	movl	64(%esp), %ebx
+	movl	%edx, 28(%ebx)
+	jns	.LBB51_4
+.LBB51_3:
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 24(%ebx)
+	movl	32(%esp), %edx                  # 4-byte Reload
+	jns	.LBB51_6
+.LBB51_5:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	jns	.LBB51_8
+.LBB51_7:
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 16(%ebx)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB51_10
+.LBB51_9:
+	movl	20(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%ebx)
+	jns	.LBB51_12
+.LBB51_11:
+	movl	24(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%ebx)
+	jns	.LBB51_14
+.LBB51_13:
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 4(%ebx)
+	js	.LBB51_15
+	jmp	.LBB51_16
+.Lfunc_end51:
+	.size	mcl_fp_addNF8L, .Lfunc_end51-mcl_fp_addNF8L
+                                        # -- End function
+	.globl	mcl_fp_sub8L                    # -- Begin function mcl_fp_sub8L
+	.p2align	4, 0x90
+	.type	mcl_fp_sub8L,@function
+mcl_fp_sub8L:                           # @mcl_fp_sub8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %esi
-	xorl	%eax, %eax
-	movl	28(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %esi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edi), %ebx
-	movl	20(%esp), %ebp
-	movl	%edx, (%ebp)
-	movl	12(%ecx), %edx
-	sbbl	12(%edi), %edx
-	movl	%esi, 4(%ebp)
-	movl	16(%ecx), %esi
-	sbbl	16(%edi), %esi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edi), %ebx
-	movl	%edx, 12(%ebp)
-	movl	20(%ecx), %edx
-	sbbl	%ebx, %edx
-	movl	%esi, 16(%ebp)
-	movl	%edx, 20(%ebp)
-	movl	24(%edi), %edx
-	movl	24(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 24(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
+	subl	$28, %esp
+	movl	52(%esp), %edx
+	movl	(%edx), %esi
+	movl	4(%edx), %edi
+	movl	56(%esp), %eax
+	subl	(%eax), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	sbbl	4(%eax), %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	28(%edx), %ecx
+	movl	24(%edx), %edi
+	movl	20(%edx), %esi
+	movl	16(%edx), %ebp
+	movl	12(%edx), %ebx
+	movl	8(%edx), %edx
+	sbbl	8(%eax), %edx
+	sbbl	12(%eax), %ebx
+	sbbl	16(%eax), %ebp
+	sbbl	20(%eax), %esi
+	sbbl	24(%eax), %edi
+	sbbl	28(%eax), %ecx
+	movl	%ecx, %eax
+	movl	$0, %ecx
+	sbbl	%ecx, %ecx
+	testb	$1, %cl
+	movl	48(%esp), %ecx
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%ecx)
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	%edi, 24(%ecx)
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	%esi, 20(%ecx)
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	%ebp, 16(%ecx)
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	%ebx, 12(%ecx)
+	movl	%edx, 8(%ecx)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, (%ecx)
+	je	.LBB52_2
+# %bb.1:                                # %carry
+	movl	60(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	addl	(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	4(%edi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	8(%edi), %edx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	12(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	16(%edi), %ebp
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	20(%edi), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	24(%edi), %eax
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	28(%edi), %ebx
+	movl	%ebx, 28(%ecx)
+	movl	%eax, 24(%ecx)
+	movl	%esi, 20(%ecx)
+	movl	%ebp, 16(%ecx)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 12(%ecx)
+	movl	%edx, 8(%ecx)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, (%ecx)
+.LBB52_2:                               # %nocarry
+	addl	$28, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end102:
-	.size	mcl_fp_subPre7L, .Lfunc_end102-mcl_fp_subPre7L
-
-	.globl	mcl_fp_shr1_7L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_7L,@function
-mcl_fp_shr1_7L:                         # @mcl_fp_shr1_7L
-# BB#0:
+.Lfunc_end52:
+	.size	mcl_fp_sub8L, .Lfunc_end52-mcl_fp_sub8L
+                                        # -- End function
+	.globl	mcl_fp_subNF8L                  # -- Begin function mcl_fp_subNF8L
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF8L,@function
+mcl_fp_subNF8L:                         # @mcl_fp_subNF8L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
 	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
+	subl	$40, %esp
+	movl	64(%esp), %eax
+	movl	(%eax), %esi
 	movl	4(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	8(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 4(%esi)
-	movl	12(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 8(%esi)
-	movl	16(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%esi)
-	movl	20(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 16(%esi)
-	movl	24(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 20(%esi)
-	shrl	%eax
-	movl	%eax, 24(%esi)
+	movl	68(%esp), %ecx
+	subl	(%ecx), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	sbbl	4(%ecx), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	28(%eax), %edx
+	movl	24(%eax), %esi
+	movl	20(%eax), %edi
+	movl	16(%eax), %ebx
+	movl	12(%eax), %ebp
+	movl	8(%eax), %eax
+	sbbl	8(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	sbbl	12(%ecx), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	sbbl	16(%ecx), %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	sbbl	20(%ecx), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	sbbl	24(%ecx), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	%edx, %edi
+	sbbl	28(%ecx), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	sarl	$31, %edi
+	movl	72(%esp), %ebp
+	movl	28(%ebp), %eax
+	andl	%edi, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%ebp), %eax
+	andl	%edi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%ebp), %ebx
+	andl	%edi, %ebx
+	movl	16(%ebp), %esi
+	andl	%edi, %esi
+	movl	12(%ebp), %edx
+	andl	%edi, %edx
+	movl	8(%ebp), %ecx
+	andl	%edi, %ecx
+	movl	4(%ebp), %eax
+	andl	%edi, %eax
+	andl	(%ebp), %edi
+	addl	24(%esp), %edi                  # 4-byte Folded Reload
+	adcl	28(%esp), %eax                  # 4-byte Folded Reload
+	movl	60(%esp), %ebp
+	movl	%edi, (%ebp)
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%eax, 4(%ebp)
+	adcl	12(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 8(%ebp)
+	adcl	16(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 12(%ebp)
+	adcl	20(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%esi, 16(%ebp)
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ebx, 20(%ebp)
+	movl	%eax, 24(%ebp)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%ebp)
+	addl	$40, %esp
 	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
 	retl
-.Lfunc_end103:
-	.size	mcl_fp_shr1_7L, .Lfunc_end103-mcl_fp_shr1_7L
-
-	.globl	mcl_fp_add7L
-	.align	16, 0x90
-	.type	mcl_fp_add7L,@function
-mcl_fp_add7L:                           # @mcl_fp_add7L
-# BB#0:
+.Lfunc_end53:
+	.size	mcl_fp_subNF8L, .Lfunc_end53-mcl_fp_subNF8L
+                                        # -- End function
+	.globl	mcl_fpDbl_add8L                 # -- Begin function mcl_fpDbl_add8L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add8L,@function
+mcl_fpDbl_add8L:                        # @mcl_fpDbl_add8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$20, %esp
-	movl	48(%esp), %ebp
-	movl	(%ebp), %eax
-	movl	4(%ebp), %edi
-	movl	44(%esp), %ecx
-	addl	(%ecx), %eax
-	adcl	4(%ecx), %edi
-	movl	8(%ebp), %esi
-	adcl	8(%ecx), %esi
-	movl	12(%ecx), %edx
-	movl	16(%ecx), %ebx
-	adcl	12(%ebp), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	16(%ebp), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	%ebp, %ebx
-	movl	20(%ecx), %ebp
-	adcl	20(%ebx), %ebp
-	movl	24(%ecx), %edx
-	adcl	24(%ebx), %edx
-	movl	40(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edi, 4(%ecx)
-	movl	%esi, 8(%ecx)
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 12(%ecx)
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 16(%ecx)
-	movl	%ebp, 20(%ecx)
-	movl	%edx, 24(%ecx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	52(%esp), %ecx
-	subl	(%ecx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	52(%esp), %eax
-	sbbl	4(%eax), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%eax, %edi
-	sbbl	8(%edi), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
+	subl	$52, %esp
+	movl	76(%esp), %eax
+	movl	(%eax), %ecx
+	movl	4(%eax), %edx
+	movl	80(%esp), %edi
+	addl	(%edi), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	4(%edi), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	60(%eax), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	56(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	52(%eax), %ebp
+	movl	48(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	44(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ebx
+	movl	28(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	24(%eax), %esi
+	movl	20(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	16(%eax), %edx
+	movl	12(%eax), %ecx
+	movl	8(%eax), %eax
+	adcl	8(%edi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	12(%edi), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	16(%edi), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	20(%edi), %edx
+	adcl	24(%edi), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	28(%edi), %ecx
+	adcl	32(%edi), %ebx
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	36(%edi), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	40(%edi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	44(%edi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	48(%edi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	52(%edi), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	56(%edi), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	60(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	72(%esp), %ebp
+	movl	%ecx, 28(%ebp)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 24(%ebp)
+	movl	%edx, 20(%ebp)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%ebp)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebp)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebp)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ebp)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%ebp)
+	setb	28(%esp)                        # 1-byte Folded Spill
+	movl	84(%esp), %edi
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	subl	(%edi), %ebx
+	movl	%ebx, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %edx
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	8(%edi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	sbbl	12(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %ecx
-	movl	%ecx, %esi
-	sbbl	20(%edi), %ebp
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	16(%edi), %eax
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	sbbl	20(%edi), %ecx
+	movl	12(%esp), %edx                  # 4-byte Reload
 	sbbl	24(%edi), %edx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	sbbl	28(%edi), %esi
+	movzbl	28(%esp), %ebx                  # 1-byte Folded Reload
 	sbbl	$0, %ebx
 	testb	$1, %bl
-	jne	.LBB104_2
-# BB#1:                                 # %nocarry
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	40(%esp), %eax
-	movl	%eax, %ebx
-	movl	%ecx, (%ebx)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 4(%ebx)
-	movl	(%esp), %eax            # 4-byte Reload
-	movl	%eax, 8(%ebx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%ebx)
-	movl	%esi, 16(%ebx)
-	movl	%ebp, 20(%ebx)
-	movl	%edx, 24(%ebx)
-.LBB104_2:                              # %carry
-	addl	$20, %esp
+	jne	.LBB54_1
+# %bb.2:
+	movl	%esi, 60(%ebp)
+	jne	.LBB54_3
+.LBB54_4:
+	movl	%edx, 56(%ebp)
+	jne	.LBB54_5
+.LBB54_6:
+	movl	%ecx, 52(%ebp)
+	movl	36(%esp), %edx                  # 4-byte Reload
+	jne	.LBB54_7
+.LBB54_8:
+	movl	%eax, 48(%ebp)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	jne	.LBB54_9
+.LBB54_10:
+	movl	%eax, 44(%ebp)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	jne	.LBB54_11
+.LBB54_12:
+	movl	%edx, 40(%ebp)
+	jne	.LBB54_13
+.LBB54_14:
+	movl	%ecx, 36(%ebp)
+	je	.LBB54_16
+.LBB54_15:
+	movl	(%esp), %eax                    # 4-byte Reload
+.LBB54_16:
+	movl	%eax, 32(%ebp)
+	addl	$52, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end104:
-	.size	mcl_fp_add7L, .Lfunc_end104-mcl_fp_add7L
-
-	.globl	mcl_fp_addNF7L
-	.align	16, 0x90
-	.type	mcl_fp_addNF7L,@function
-mcl_fp_addNF7L:                         # @mcl_fp_addNF7L
-# BB#0:
+.LBB54_1:
+	movl	8(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 60(%ebp)
+	je	.LBB54_4
+.LBB54_3:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 56(%ebp)
+	je	.LBB54_6
+.LBB54_5:
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 52(%ebp)
+	movl	36(%esp), %edx                  # 4-byte Reload
+	je	.LBB54_8
+.LBB54_7:
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%ebp)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	32(%esp), %eax                  # 4-byte Reload
+	je	.LBB54_10
+.LBB54_9:
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%ebp)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	je	.LBB54_12
+.LBB54_11:
+	movl	24(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 40(%ebp)
+	je	.LBB54_14
+.LBB54_13:
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 36(%ebp)
+	jne	.LBB54_15
+	jmp	.LBB54_16
+.Lfunc_end54:
+	.size	mcl_fpDbl_add8L, .Lfunc_end54-mcl_fpDbl_add8L
+                                        # -- End function
+	.globl	mcl_fpDbl_sub8L                 # -- Begin function mcl_fpDbl_sub8L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub8L,@function
+mcl_fpDbl_sub8L:                        # @mcl_fpDbl_sub8L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$52, %esp
-	movl	80(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	76(%esp), %esi
-	addl	(%esi), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	4(%esi), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	20(%eax), %ebx
-	movl	16(%eax), %edi
-	movl	12(%eax), %ebp
-	movl	8(%eax), %ecx
-	adcl	8(%esi), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	12(%esi), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	24(%esi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	44(%esp), %esi          # 4-byte Reload
-	subl	(%eax), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	sbbl	4(%eax), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	8(%eax), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%eax), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	sbbl	16(%eax), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	sbbl	20(%eax), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	sbbl	24(%eax), %edi
-	movl	%edi, %ecx
-	sarl	$31, %ecx
-	testl	%ecx, %ecx
-	js	.LBB105_2
-# BB#1:
-	movl	(%esp), %esi            # 4-byte Reload
-.LBB105_2:
-	movl	72(%esp), %ecx
-	movl	%esi, (%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	js	.LBB105_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB105_4:
+	subl	$56, %esp
+	movl	80(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	4(%ecx), %edi
+	xorl	%esi, %esi
+	movl	84(%esp), %ebp
+	subl	(%ebp), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	sbbl	4(%ebp), %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	60(%ecx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	56(%ecx), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	52(%ecx), %ebx
+	movl	48(%ecx), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	44(%ecx), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	40(%ecx), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	36(%ecx), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	32(%ecx), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	28(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	24(%ecx), %edx
+	movl	20(%ecx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %edi
+	movl	12(%ecx), %eax
+	movl	8(%ecx), %ecx
+	sbbl	8(%ebp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	sbbl	12(%ebp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	sbbl	16(%ebp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	sbbl	20(%ebp), %edi
+	sbbl	24(%ebp), %edx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	28(%ebp), %eax
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	sbbl	32(%ebp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	36(%ebp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	sbbl	40(%ebp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	sbbl	44(%ebp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	sbbl	48(%ebp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	sbbl	52(%ebp), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	sbbl	56(%ebp), %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	sbbl	60(%ebp), %ebx
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	movl	76(%esp), %ecx
+	movl	%eax, 28(%ecx)
+	movl	%edx, 24(%ecx)
+	movl	%edi, 20(%ecx)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%ecx)
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ecx)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ecx)
+	movl	(%esp), %eax                    # 4-byte Reload
 	movl	%eax, 4(%ecx)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	36(%esp), %edx          # 4-byte Reload
-	movl	32(%esp), %esi          # 4-byte Reload
-	movl	24(%esp), %ebx          # 4-byte Reload
-	js	.LBB105_6
-# BB#5:
-	movl	8(%esp), %ebx           # 4-byte Reload
-.LBB105_6:
-	movl	72(%esp), %eax
-	movl	%ebx, 8(%eax)
-	movl	%eax, %ebx
-	js	.LBB105_8
-# BB#7:
-	movl	16(%esp), %esi          # 4-byte Reload
-.LBB105_8:
-	movl	%esi, 12(%ebx)
-	js	.LBB105_10
-# BB#9:
-	movl	20(%esp), %edx          # 4-byte Reload
-.LBB105_10:
-	movl	%edx, 16(%ebx)
-	js	.LBB105_12
-# BB#11:
-	movl	12(%esp), %ecx          # 4-byte Reload
-.LBB105_12:
-	movl	%ecx, 20(%ebx)
-	js	.LBB105_14
-# BB#13:
-	movl	%edi, %ebp
-.LBB105_14:
-	movl	%ebp, 24(%ebx)
-	addl	$52, %esp
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, (%ecx)
+	sbbl	%esi, %esi
+	andl	$1, %esi
+	negl	%esi
+	movl	88(%esp), %ecx
+	movl	28(%ecx), %eax
+	andl	%esi, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%ecx), %eax
+	andl	%esi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%ecx), %edi
+	andl	%esi, %edi
+	movl	16(%ecx), %ebp
+	andl	%esi, %ebp
+	movl	12(%ecx), %edx
+	andl	%esi, %edx
+	movl	8(%ecx), %ecx
+	andl	%esi, %ecx
+	movl	88(%esp), %eax
+	movl	4(%eax), %eax
+	andl	%esi, %eax
+	movl	88(%esp), %ebx
+	andl	(%ebx), %esi
+	addl	12(%esp), %esi                  # 4-byte Folded Reload
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	76(%esp), %ebx
+	movl	%esi, 32(%ebx)
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 36(%ebx)
+	adcl	24(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%ebx)
+	adcl	28(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%edx, 44(%ebx)
+	adcl	8(%esp), %edi                   # 4-byte Folded Reload
+	movl	%ebp, 48(%ebx)
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	36(%esp), %eax                  # 4-byte Folded Reload
+	movl	%edi, 52(%ebx)
+	movl	%eax, 56(%ebx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 60(%ebx)
+	addl	$56, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end105:
-	.size	mcl_fp_addNF7L, .Lfunc_end105-mcl_fp_addNF7L
-
-	.globl	mcl_fp_sub7L
-	.align	16, 0x90
-	.type	mcl_fp_sub7L,@function
-mcl_fp_sub7L:                           # @mcl_fp_sub7L
-# BB#0:
+.Lfunc_end55:
+	.size	mcl_fpDbl_sub8L, .Lfunc_end55-mcl_fpDbl_sub8L
+                                        # -- End function
+	.globl	mulPv384x32                     # -- Begin function mulPv384x32
+	.p2align	4, 0x90
+	.type	mulPv384x32,@function
+mulPv384x32:                            # @mulPv384x32
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$24, %esp
-	movl	48(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %ecx
-	xorl	%ebx, %ebx
-	movl	52(%esp), %esi
-	subl	(%esi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	sbbl	4(%esi), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	8(%edi), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	12(%edi), %ecx
-	sbbl	12(%esi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	16(%edi), %eax
-	sbbl	16(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	20(%edi), %ebp
-	sbbl	20(%esi), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	24(%edi), %edi
-	sbbl	24(%esi), %edi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	44(%esp), %ebx
-	movl	16(%esp), %esi          # 4-byte Reload
-	movl	%esi, (%ebx)
-	movl	20(%esp), %esi          # 4-byte Reload
-	movl	%esi, 4(%ebx)
-	movl	%edx, 8(%ebx)
-	movl	%ecx, 12(%ebx)
-	movl	%eax, 16(%ebx)
-	movl	%ebp, 20(%ebx)
-	movl	%edi, 24(%ebx)
-	je	.LBB106_2
-# BB#1:                                 # %carry
-	movl	56(%esp), %ebp
-	movl	16(%esp), %ecx          # 4-byte Reload
-	addl	(%ebp), %ecx
-	movl	%ecx, (%ebx)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	4(%ebp), %edx
-	movl	%edx, 4(%ebx)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	8(%ebp), %ecx
-	movl	12(%ebp), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 8(%ebx)
-	movl	16(%ebp), %ecx
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	%ecx, 16(%ebx)
-	movl	20(%ebp), %eax
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	24(%ebp), %eax
-	adcl	%edi, %eax
-	movl	%eax, 24(%ebx)
-.LBB106_2:                              # %nocarry
-	addl	$24, %esp
+	subl	$76, %esp
+	movl	104(%esp), %edi
+	movl	100(%esp), %ecx
+	movl	%edi, %eax
+	mull	44(%ecx)
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	40(%ecx)
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	36(%ecx)
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	32(%ecx)
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	28(%ecx)
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	24(%ecx)
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	20(%ecx)
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	16(%ecx)
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	mull	12(%ecx)
+	movl	%edx, %ebx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%edi, %eax
+	mull	8(%ecx)
+	movl	%edx, %esi
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%edi, %eax
+	mull	4(%ecx)
+	movl	%edx, %ebp
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%edi, %eax
+	mull	(%ecx)
+	movl	%eax, %edi
+	movl	96(%esp), %eax
+	movl	%edi, (%eax)
+	addl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 4(%eax)
+	adcl	4(%esp), %ebp                   # 4-byte Folded Reload
+	movl	%ebp, 8(%eax)
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 12(%eax)
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 16(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 20(%eax)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%eax)
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%eax)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 36(%eax)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	60(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%eax)
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 44(%eax)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 48(%eax)
+	addl	$76, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
-	retl
-.Lfunc_end106:
-	.size	mcl_fp_sub7L, .Lfunc_end106-mcl_fp_sub7L
-
-	.globl	mcl_fp_subNF7L
-	.align	16, 0x90
-	.type	mcl_fp_subNF7L,@function
-mcl_fp_subNF7L:                         # @mcl_fp_subNF7L
-# BB#0:
+	retl	$4
+.Lfunc_end56:
+	.size	mulPv384x32, .Lfunc_end56-mulPv384x32
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre12L            # -- Begin function mcl_fp_mulUnitPre12L
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre12L,@function
+mcl_fp_mulUnitPre12L:                   # @mcl_fp_mulUnitPre12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$32, %esp
+	subl	$92, %esp
+	calll	.L57$pb
+.L57$pb:
+	popl	%ebx
+.Ltmp9:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp9-.L57$pb), %ebx
+	subl	$4, %esp
+	movl	124(%esp), %eax
+	movl	120(%esp), %ecx
+	leal	44(%esp), %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	40(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
 	movl	56(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %edx
-	movl	60(%esp), %ecx
-	subl	(%ecx), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	sbbl	4(%ecx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	24(%eax), %edx
-	movl	20(%eax), %esi
-	movl	16(%eax), %edi
-	movl	12(%eax), %ebx
-	movl	8(%eax), %eax
-	sbbl	8(%ecx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	12(%ecx), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	sbbl	16(%ecx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	sbbl	20(%ecx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	sbbl	24(%ecx), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	%edx, %ecx
-	sarl	$31, %ecx
-	movl	%ecx, %eax
-	shldl	$1, %edx, %eax
-	movl	64(%esp), %edx
-	andl	(%edx), %eax
-	movl	24(%edx), %esi
-	andl	%ecx, %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	20(%edx), %ebx
-	andl	%ecx, %ebx
-	movl	16(%edx), %edi
-	andl	%ecx, %edi
-	movl	12(%edx), %esi
-	andl	%ecx, %esi
-	movl	64(%esp), %edx
-	movl	8(%edx), %edx
-	andl	%ecx, %edx
-	movl	64(%esp), %ebp
-	andl	4(%ebp), %ecx
-	addl	20(%esp), %eax          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	52(%esp), %ebp
-	movl	%eax, (%ebp)
-	adcl	4(%esp), %edx           # 4-byte Folded Reload
-	movl	%ebp, %eax
-	movl	%ecx, 4(%eax)
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 8(%eax)
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 12(%eax)
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 16(%eax)
-	movl	%ebx, 20(%eax)
-	movl	(%esp), %ecx            # 4-byte Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ebp
+	movl	72(%esp), %ebx
+	movl	76(%esp), %edi
+	movl	80(%esp), %esi
+	movl	84(%esp), %edx
+	movl	88(%esp), %ecx
+	movl	112(%esp), %eax
+	movl	%ecx, 48(%eax)
+	movl	%edx, 44(%eax)
+	movl	%esi, 40(%eax)
+	movl	%edi, 36(%eax)
+	movl	%ebx, 32(%eax)
+	movl	%ebp, 28(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
 	movl	%ecx, 24(%eax)
-	addl	$32, %esp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%eax)
+	addl	$92, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end107:
-	.size	mcl_fp_subNF7L, .Lfunc_end107-mcl_fp_subNF7L
-
-	.globl	mcl_fpDbl_add7L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add7L,@function
-mcl_fpDbl_add7L:                        # @mcl_fpDbl_add7L
-# BB#0:
+.Lfunc_end57:
+	.size	mcl_fp_mulUnitPre12L, .Lfunc_end57-mcl_fp_mulUnitPre12L
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre12L             # -- Begin function mcl_fpDbl_mulPre12L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre12L,@function
+mcl_fpDbl_mulPre12L:                    # @mcl_fpDbl_mulPre12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$44, %esp
-	movl	72(%esp), %esi
-	movl	68(%esp), %edx
-	movl	12(%edx), %edi
-	movl	16(%edx), %ecx
-	movl	8(%esi), %eax
-	movl	(%esi), %ebx
-	addl	(%edx), %ebx
-	movl	64(%esp), %ebp
-	movl	%ebx, (%ebp)
-	movl	4(%esi), %ebx
-	adcl	4(%edx), %ebx
-	adcl	8(%edx), %eax
-	adcl	12(%esi), %edi
-	adcl	16(%esi), %ecx
-	movl	%ebx, 4(%ebp)
-	movl	%esi, %ebx
-	movl	36(%ebx), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	%eax, 8(%ebp)
-	movl	20(%ebx), %eax
-	movl	%edi, 12(%ebp)
-	movl	20(%edx), %edi
-	adcl	%eax, %edi
-	movl	24(%ebx), %eax
-	movl	%ecx, 16(%ebp)
-	movl	24(%edx), %ecx
-	adcl	%eax, %ecx
-	movl	28(%ebx), %eax
-	movl	%edi, 20(%ebp)
-	movl	28(%edx), %edi
-	adcl	%eax, %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	32(%ebx), %eax
-	movl	%ecx, 24(%ebp)
-	movl	32(%edx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%edx), %esi
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	40(%ebx), %ecx
-	movl	40(%edx), %eax
-	adcl	%ecx, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%ebx), %ebp
-	movl	44(%edx), %ecx
-	adcl	%ebp, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	48(%ebx), %ebp
-	movl	%ebx, %eax
-	movl	48(%edx), %ebx
-	adcl	%ebp, %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	52(%eax), %eax
-	movl	52(%edx), %ebp
-	adcl	%eax, %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	76(%esp), %eax
-	subl	(%eax), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	sbbl	4(%eax), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	movl	76(%esp), %edi
-	sbbl	8(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	sbbl	12(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %ebx
-	sbbl	24(%edi), %ebp
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB108_2
-# BB#1:
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-.LBB108_2:
-	testb	%dl, %dl
-	movl	20(%esp), %ecx          # 4-byte Reload
-	jne	.LBB108_4
-# BB#3:
-	movl	(%esp), %esi            # 4-byte Reload
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	8(%esp), %ecx           # 4-byte Reload
-.LBB108_4:
-	movl	64(%esp), %eax
-	movl	%ecx, 28(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	%esi, 36(%eax)
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	36(%esp), %ecx          # 4-byte Reload
-	jne	.LBB108_6
-# BB#5:
-	movl	12(%esp), %ecx          # 4-byte Reload
-.LBB108_6:
-	movl	%ecx, 40(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	jne	.LBB108_8
-# BB#7:
-	movl	16(%esp), %edx          # 4-byte Reload
-.LBB108_8:
-	movl	%edx, 44(%eax)
-	jne	.LBB108_10
-# BB#9:
-	movl	%ebx, %ecx
-.LBB108_10:
-	movl	%ecx, 48(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	addl	$44, %esp
-	popl	%esi
+	subl	$220, %esp
+	calll	.L58$pb
+.L58$pb:
 	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end108:
-	.size	mcl_fpDbl_add7L, .Lfunc_end108-mcl_fpDbl_add7L
-
-	.globl	mcl_fpDbl_sub7L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub7L,@function
-mcl_fpDbl_sub7L:                        # @mcl_fpDbl_sub7L
-# BB#0:
+.Ltmp10:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp10-.L58$pb), %edi
+	subl	$4, %esp
+	movl	252(%esp), %ebp
+	movl	248(%esp), %esi
+	movl	%edi, %ebx
+	movl	%edi, 68(%esp)                  # 4-byte Spill
 	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
 	pushl	%esi
-	subl	$32, %esp
-	movl	56(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %edx
-	movl	60(%esp), %edi
+	pushl	252(%esp)
+	calll	mcl_fpDbl_mulPre6L@PLT
+	addl	$12, %esp
+	leal	24(%ebp), %eax
+	leal	24(%esi), %ecx
+	movl	244(%esp), %edx
+	addl	$48, %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	calll	mcl_fpDbl_mulPre6L@PLT
+	addl	$16, %esp
+	movl	40(%esi), %edx
+	movl	36(%esi), %ebx
+	movl	32(%esi), %eax
+	movl	24(%esi), %edi
+	movl	28(%esi), %ecx
+	addl	(%esi), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	adcl	4(%esi), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	8(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	12(%esi), %ebx
+	movl	%ebx, %edi
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	adcl	16(%esi), %edx
+	movl	%edx, %ecx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esi), %eax
+	adcl	20(%esi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	setb	48(%esp)                        # 1-byte Folded Spill
+	movl	24(%ebp), %ebx
+	addl	(%ebp), %ebx
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	movl	28(%ebp), %edx
+	adcl	4(%ebp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	32(%ebp), %edx
+	adcl	8(%ebp), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	36(%ebp), %edx
+	adcl	12(%ebp), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	40(%ebp), %edx
+	adcl	16(%ebp), %edx
+	movl	%edx, %esi
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	44(%ebp), %edx
+	adcl	20(%ebp), %edx
+	movl	%eax, 168(%esp)
+	movl	%ecx, 164(%esp)
+	movl	%edi, 160(%esp)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 156(%esp)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 152(%esp)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 148(%esp)
+	movl	%edx, 144(%esp)
+	movl	%esi, 140(%esp)
+	movl	36(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 136(%esp)
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 132(%esp)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 128(%esp)
+	movl	%ebx, 124(%esp)
+	setb	60(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movzbl	52(%esp), %ecx                  # 1-byte Folded Reload
+	movl	%ecx, %esi
+	negl	%esi
+	movl	%ecx, %ebx
+	shll	$31, %ebx
+	shrdl	$31, %esi, %ebx
+	andl	60(%esp), %ebx                  # 4-byte Folded Reload
+	andl	%esi, 56(%esp)                  # 4-byte Folded Spill
+	andl	%esi, %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	andl	%esi, %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	andl	%esi, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	andl	%edx, %esi
+	movzbl	64(%esp), %ebp                  # 1-byte Folded Reload
+	andl	%ebp, %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	%ebp, %edx
+	negl	%edx
+	andl	%edx, 48(%esp)                  # 4-byte Folded Spill
+	andl	%edx, 44(%esp)                  # 4-byte Folded Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	andl	%edx, %edi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	andl	%edx, %ecx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	andl	%edx, %eax
+	shll	$31, %ebp
+	shrdl	$31, %edx, %ebp
+	andl	16(%esp), %ebp                  # 4-byte Folded Reload
+	addl	%ebx, %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	leal	176(%esp), %ecx
+	adcl	40(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	leal	128(%esp), %edx
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	%esi, %ebp
+	setb	%al
+	movzbl	%al, %esi
+	movl	68(%esp), %ebx                  # 4-byte Reload
+	pushl	%edx
+	leal	156(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mcl_fpDbl_mulPre6L@PLT
+	addl	$16, %esp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	addl	196(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	204(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	212(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	adcl	216(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	%esi, %ebp
+	adcl	48(%esp), %ebp                  # 4-byte Folded Reload
+	movl	172(%esp), %eax
+	movl	240(%esp), %edi
 	subl	(%edi), %eax
-	sbbl	4(%edi), %edx
-	movl	8(%esi), %ebx
-	sbbl	8(%edi), %ebx
-	movl	52(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%esi), %edx
-	sbbl	16(%edi), %edx
-	movl	%ebx, 8(%ecx)
-	movl	20(%edi), %ebx
-	movl	%eax, 12(%ecx)
-	movl	20(%esi), %eax
-	sbbl	%ebx, %eax
-	movl	24(%edi), %ebx
-	movl	%edx, 16(%ecx)
-	movl	24(%esi), %edx
-	sbbl	%ebx, %edx
-	movl	28(%edi), %ebx
-	movl	%eax, 20(%ecx)
-	movl	28(%esi), %eax
-	sbbl	%ebx, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	176(%esp), %eax
+	sbbl	4(%edi), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	180(%esp), %eax
+	sbbl	8(%edi), %eax
+	movl	%eax, %ecx
+	movl	184(%esp), %esi
+	sbbl	12(%edi), %esi
+	movl	188(%esp), %ebx
+	sbbl	16(%edi), %ebx
+	movl	192(%esp), %eax
+	sbbl	20(%edi), %eax
+	movl	%eax, %edx
+	movl	24(%edi), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	sbbl	%eax, 12(%esp)                  # 4-byte Folded Spill
+	movl	28(%edi), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	sbbl	%eax, 20(%esp)                  # 4-byte Folded Spill
 	movl	32(%edi), %eax
-	movl	%edx, 24(%ecx)
-	movl	32(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	sbbl	%eax, 24(%esp)                  # 4-byte Folded Spill
 	movl	36(%edi), %eax
-	movl	36(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	sbbl	%eax, 16(%esp)                  # 4-byte Folded Spill
 	movl	40(%edi), %eax
-	movl	40(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	sbbl	%eax, 40(%esp)                  # 4-byte Folded Spill
 	movl	44(%edi), %eax
-	movl	44(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	sbbl	%eax, 44(%esp)                  # 4-byte Folded Spill
+	sbbl	$0, %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
 	movl	48(%edi), %eax
-	movl	48(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	subl	%eax, 32(%esp)                  # 4-byte Folded Spill
 	movl	52(%edi), %eax
-	movl	52(%esi), %edx
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	sbbl	%eax, 28(%esp)                  # 4-byte Folded Spill
+	movl	56(%edi), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	sbbl	%eax, %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	60(%edi), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	sbbl	%eax, %esi
+	movl	64(%edi), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	sbbl	%eax, %ebx
+	movl	68(%edi), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
 	sbbl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	64(%esp), %esi
-	jne	.LBB109_1
-# BB#2:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB109_3
-.LBB109_1:
-	movl	24(%esi), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-.LBB109_3:
-	testb	%al, %al
-	jne	.LBB109_4
-# BB#5:
-	movl	$0, %edi
-	movl	$0, %eax
-	jmp	.LBB109_6
-.LBB109_4:
-	movl	(%esi), %eax
-	movl	4(%esi), %edi
-.LBB109_6:
-	jne	.LBB109_7
-# BB#8:
-	movl	$0, %ebx
-	jmp	.LBB109_9
-.LBB109_7:
-	movl	20(%esi), %ebx
-.LBB109_9:
-	jne	.LBB109_10
-# BB#11:
-	movl	$0, %ebp
-	jmp	.LBB109_12
-.LBB109_10:
-	movl	16(%esi), %ebp
-.LBB109_12:
-	jne	.LBB109_13
-# BB#14:
-	movl	$0, %edx
-	jmp	.LBB109_15
-.LBB109_13:
-	movl	12(%esi), %edx
-.LBB109_15:
-	jne	.LBB109_16
-# BB#17:
-	xorl	%esi, %esi
-	jmp	.LBB109_18
-.LBB109_16:
-	movl	8(%esi), %esi
-.LBB109_18:
-	addl	12(%esp), %eax          # 4-byte Folded Reload
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%edi, 32(%ecx)
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 36(%ecx)
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edx, 40(%ecx)
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 44(%ecx)
-	movl	%ebx, 48(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%ecx)
-	addl	$32, %esp
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	72(%edi), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	sbbl	%eax, 12(%esp)                  # 4-byte Folded Spill
+	movl	76(%edi), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	sbbl	%eax, 20(%esp)                  # 4-byte Folded Spill
+	movl	80(%edi), %eax
+	movl	%eax, 120(%esp)                 # 4-byte Spill
+	sbbl	%eax, 24(%esp)                  # 4-byte Folded Spill
+	movl	84(%edi), %eax
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	sbbl	%eax, %ebp
+	movl	88(%edi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	sbbl	%eax, 40(%esp)                  # 4-byte Folded Spill
+	movl	92(%edi), %eax
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	sbbl	%eax, 44(%esp)                  # 4-byte Folded Spill
+	sbbl	$0, 36(%esp)                    # 4-byte Folded Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	addl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	60(%esp), %eax                  # 4-byte Folded Reload
+	adcl	104(%esp), %esi                 # 4-byte Folded Reload
+	adcl	76(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%esi, 36(%edi)
+	movl	%eax, 32(%edi)
+	movl	%ecx, 28(%edi)
+	movl	%edx, 24(%edi)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	adcl	64(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ebx, 40(%edi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 44(%edi)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	108(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%eax, 48(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 52(%edi)
+	adcl	92(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%eax, 56(%edi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	88(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ebp, 60(%edi)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 64(%edi)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	80(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 68(%edi)
+	movl	%eax, 72(%edi)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 76(%edi)
+	movl	120(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 80(%edi)
+	movl	116(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 84(%edi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 88(%edi)
+	movl	112(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 92(%edi)
+	addl	$220, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end109:
-	.size	mcl_fpDbl_sub7L, .Lfunc_end109-mcl_fpDbl_sub7L
-
-	.align	16, 0x90
-	.type	.LmulPv256x32,@function
-.LmulPv256x32:                          # @mulPv256x32
-# BB#0:
+.Lfunc_end58:
+	.size	mcl_fpDbl_mulPre12L, .Lfunc_end58-mcl_fpDbl_mulPre12L
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre12L             # -- Begin function mcl_fpDbl_sqrPre12L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre12L,@function
+mcl_fpDbl_sqrPre12L:                    # @mcl_fpDbl_sqrPre12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$48, %esp
-	movl	%edx, %esi
-	movl	68(%esp), %ebx
-	movl	%ebx, %eax
-	mull	28(%esi)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	24(%esi)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	20(%esi)
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	16(%esi)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	12(%esi)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	8(%esi)
-	movl	%edx, %ebp
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	4(%esi)
-	movl	%edx, %edi
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebx, %eax
-	mull	(%esi)
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 8(%ecx)
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 12(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 32(%ecx)
-	movl	%ecx, %eax
-	addl	$48, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
+	subl	$220, %esp
+	calll	.L59$pb
+.L59$pb:
 	popl	%ebp
-	retl
-.Lfunc_end110:
-	.size	.LmulPv256x32, .Lfunc_end110-.LmulPv256x32
-
-	.globl	mcl_fp_mulUnitPre8L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre8L,@function
-mcl_fp_mulUnitPre8L:                    # @mcl_fp_mulUnitPre8L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
+.Ltmp11:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp11-.L59$pb), %ebp
+	subl	$4, %esp
+	movl	248(%esp), %esi
+	movl	244(%esp), %edi
+	movl	%ebp, %ebx
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
 	pushl	%esi
-	subl	$60, %esp
-	calll	.L111$pb
-.L111$pb:
-	popl	%ebx
-.Ltmp2:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L111$pb), %ebx
-	movl	88(%esp), %eax
-	movl	%eax, (%esp)
-	leal	24(%esp), %ecx
-	movl	84(%esp), %edx
-	calll	.LmulPv256x32
-	movl	56(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi
-	movl	40(%esp), %edi
-	movl	36(%esp), %ebx
-	movl	32(%esp), %ebp
-	movl	24(%esp), %edx
-	movl	28(%esp), %ecx
-	movl	80(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%ebp, 8(%eax)
-	movl	%ebx, 12(%eax)
-	movl	%edi, 16(%eax)
-	movl	%esi, 20(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	addl	$60, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end111:
-	.size	mcl_fp_mulUnitPre8L, .Lfunc_end111-mcl_fp_mulUnitPre8L
-
-	.globl	mcl_fpDbl_mulPre8L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre8L,@function
-mcl_fpDbl_mulPre8L:                     # @mcl_fpDbl_mulPre8L
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
 	pushl	%esi
-	subl	$156, %esp
-	calll	.L112$pb
-.L112$pb:
-	popl	%ebx
-.Ltmp3:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L112$pb), %ebx
-	movl	%ebx, -96(%ebp)         # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	%esi, 8(%esp)
-	movl	12(%ebp), %edi
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre4L@PLT
-	leal	16(%esi), %eax
-	movl	%eax, 8(%esp)
-	leal	16(%edi), %eax
-	movl	%eax, 4(%esp)
-	movl	8(%ebp), %eax
-	leal	32(%eax), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre4L@PLT
-	movl	24(%edi), %esi
-	movl	(%edi), %ebx
-	movl	4(%edi), %eax
-	addl	16(%edi), %ebx
-	movl	%ebx, -120(%ebp)        # 4-byte Spill
-	adcl	20(%edi), %eax
-	movl	%eax, -100(%ebp)        # 4-byte Spill
-	adcl	8(%edi), %esi
-	movl	%esi, -108(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -80(%ebp)         # 4-byte Spill
-	movl	16(%ebp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %ecx
-	addl	16(%edi), %eax
-	adcl	20(%edi), %ecx
-	movl	%ecx, -124(%ebp)        # 4-byte Spill
-	movl	24(%edi), %edx
-	adcl	8(%edi), %edx
-	movl	28(%edi), %ecx
-	adcl	12(%edi), %ecx
+	pushl	%edi
+	calll	mcl_fpDbl_mulPre6L@PLT
+	addl	$12, %esp
+	leal	24(%esi), %eax
+	leal	48(%edi), %ecx
 	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -128(%ebp)        # 4-byte Spill
-	jb	.LBB112_2
-# BB#1:
-	xorl	%esi, %esi
-	xorl	%ebx, %ebx
-.LBB112_2:
-	movl	%ebx, -112(%ebp)        # 4-byte Spill
-	movl	%esi, -104(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %esi
-	movl	28(%esi), %edi
-	movl	-80(%ebp), %ebx         # 4-byte Reload
 	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	12(%esi), %edi
-	movl	%edi, -116(%ebp)        # 4-byte Spill
-	movl	%ecx, -84(%ebp)         # 4-byte Spill
-	movl	%edx, %edi
-	movl	-124(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -80(%ebp)         # 4-byte Spill
-	movl	%eax, -92(%ebp)         # 4-byte Spill
-	jb	.LBB112_4
-# BB#3:
-	movl	$0, -84(%ebp)           # 4-byte Folded Spill
-	movl	$0, %edi
-	movl	$0, -80(%ebp)           # 4-byte Folded Spill
-	movl	$0, -92(%ebp)           # 4-byte Folded Spill
-.LBB112_4:
-	movl	%edi, -88(%ebp)         # 4-byte Spill
-	movl	-120(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -60(%ebp)
-	movl	-100(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -56(%ebp)
-	movl	-108(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -52(%ebp)
-	movl	%eax, -76(%ebp)
-	movl	%ebx, -72(%ebp)
-	movl	%edx, -68(%ebp)
-	movl	%ecx, -64(%ebp)
-	sbbl	%edx, %edx
-	movl	-116(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -48(%ebp)
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB112_6
-# BB#5:
-	movl	$0, %esi
-	movl	$0, %edi
-.LBB112_6:
-	sbbl	%eax, %eax
-	leal	-76(%ebp), %ecx
-	movl	%ecx, 8(%esp)
-	leal	-60(%ebp), %ecx
-	movl	%ecx, 4(%esp)
-	leal	-44(%ebp), %ecx
-	movl	%ecx, (%esp)
-	andl	%eax, %edx
-	movl	%edi, %eax
-	movl	-92(%ebp), %edi         # 4-byte Reload
-	addl	-112(%ebp), %edi        # 4-byte Folded Reload
-	adcl	%eax, -80(%ebp)         # 4-byte Folded Spill
-	movl	-104(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -88(%ebp)         # 4-byte Folded Spill
-	adcl	%esi, -84(%ebp)         # 4-byte Folded Spill
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	andl	$1, %edx
-	movl	%edx, -92(%ebp)         # 4-byte Spill
-	movl	-96(%ebp), %ebx         # 4-byte Reload
-	calll	mcl_fpDbl_mulPre4L@PLT
-	addl	-28(%ebp), %edi
-	movl	-80(%ebp), %eax         # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -80(%ebp)         # 4-byte Spill
-	movl	-88(%ebp), %eax         # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -88(%ebp)         # 4-byte Spill
-	movl	-84(%ebp), %eax         # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	movl	%eax, -84(%ebp)         # 4-byte Spill
-	adcl	%esi, -92(%ebp)         # 4-byte Folded Spill
-	movl	-44(%ebp), %eax
-	movl	8(%ebp), %esi
-	subl	(%esi), %eax
-	movl	%eax, -116(%ebp)        # 4-byte Spill
-	movl	-40(%ebp), %ebx
-	sbbl	4(%esi), %ebx
-	movl	-36(%ebp), %eax
-	sbbl	8(%esi), %eax
-	movl	%eax, -96(%ebp)         # 4-byte Spill
-	movl	-32(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	16(%esi), %eax
-	movl	%eax, -100(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	20(%esi), %eax
-	movl	%eax, -112(%ebp)        # 4-byte Spill
-	sbbl	%eax, -80(%ebp)         # 4-byte Folded Spill
-	movl	24(%esi), %eax
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	sbbl	%eax, -88(%ebp)         # 4-byte Folded Spill
-	movl	28(%esi), %eax
-	movl	%eax, -108(%ebp)        # 4-byte Spill
-	sbbl	%eax, -84(%ebp)         # 4-byte Folded Spill
-	sbbl	$0, -92(%ebp)           # 4-byte Folded Spill
-	movl	32(%esi), %ecx
-	movl	%ecx, -132(%ebp)        # 4-byte Spill
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	subl	%ecx, %eax
-	movl	36(%esi), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
+	pushl	%ecx
+	calll	mcl_fpDbl_mulPre6L@PLT
+	addl	$16, %esp
+	movl	44(%esi), %edi
 	movl	40(%esi), %ecx
-	movl	%ecx, -128(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -96(%ebp)         # 4-byte Folded Spill
-	movl	44(%esi), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edx
-	movl	48(%esi), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	52(%esi), %ecx
-	movl	%ecx, -116(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -80(%ebp)         # 4-byte Folded Spill
-	movl	56(%esi), %ecx
-	movl	%ecx, -120(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -88(%ebp)         # 4-byte Folded Spill
-	movl	60(%esi), %ecx
-	movl	%ecx, -124(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -84(%ebp)         # 4-byte Folded Spill
-	sbbl	$0, -92(%ebp)           # 4-byte Folded Spill
-	addl	-100(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-112(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%eax, 16(%esi)
-	movl	-96(%ebp), %eax         # 4-byte Reload
-	adcl	-104(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 20(%esi)
-	adcl	-108(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 24(%esi)
-	adcl	-132(%ebp), %edi        # 4-byte Folded Reload
-	movl	%edx, 28(%esi)
-	movl	-80(%ebp), %eax         # 4-byte Reload
-	adcl	-136(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edi, 32(%esi)
-	movl	-88(%ebp), %ecx         # 4-byte Reload
-	adcl	-128(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 36(%esi)
-	movl	-84(%ebp), %eax         # 4-byte Reload
-	adcl	-140(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 40(%esi)
-	movl	-92(%ebp), %ecx         # 4-byte Reload
-	adcl	-144(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 44(%esi)
-	movl	%ecx, 48(%esi)
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 52(%esi)
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 56(%esi)
-	movl	-124(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 60(%esi)
-	addl	$156, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end112:
-	.size	mcl_fpDbl_mulPre8L, .Lfunc_end112-mcl_fpDbl_mulPre8L
-
-	.globl	mcl_fpDbl_sqrPre8L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre8L,@function
-mcl_fpDbl_sqrPre8L:                     # @mcl_fpDbl_sqrPre8L
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$156, %esp
-	calll	.L113$pb
-.L113$pb:
-	popl	%ebx
-.Ltmp4:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.L113$pb), %ebx
-	movl	%ebx, -96(%ebp)         # 4-byte Spill
-	movl	12(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %esi
-	movl	%esi, (%esp)
-	calll	mcl_fpDbl_mulPre4L@PLT
-	leal	16(%edi), %eax
-	movl	%eax, 8(%esp)
-	movl	%eax, 4(%esp)
-	leal	32(%esi), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre4L@PLT
-	movl	(%edi), %esi
-	movl	4(%edi), %ecx
-	addl	16(%edi), %esi
-	movl	%esi, -108(%ebp)        # 4-byte Spill
-	adcl	20(%edi), %ecx
-	seto	%al
-	lahf
-	movl	%eax, %edx
+	movl	36(%esi), %ebx
+	movl	32(%esi), %eax
+	movl	24(%esi), %ebp
+	movl	28(%esi), %edx
+	addl	(%esi), %ebp
+	adcl	4(%esi), %edx
+	adcl	8(%esi), %eax
+	adcl	12(%esi), %ebx
+	adcl	16(%esi), %ecx
+	adcl	20(%esi), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	%edi, 168(%esp)
+	movl	%ecx, 164(%esp)
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	%ebx, 160(%esp)
+	movl	%eax, 156(%esp)
+	movl	%edx, 152(%esp)
+	movl	%ebp, 148(%esp)
+	movl	%edi, 144(%esp)
+	movl	%ecx, 140(%esp)
+	movl	%ebx, 136(%esp)
+	movl	%eax, 132(%esp)
+	movl	%edx, 128(%esp)
+	movl	%ebp, 124(%esp)
+	setb	%bl
+	subl	$4, %esp
+	movzbl	%bl, %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	%edi, %ebx
+	shll	$31, %ebx
+	negl	%edi
+	shrdl	$31, %edi, %ebx
+	andl	%ebp, %ebx
+	movl	%ebx, %esi
+	andl	%edi, %edx
+	andl	%edi, %eax
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	andl	%edi, %ebx
+	andl	%edi, %ecx
+	andl	12(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %ebp
+	shldl	$1, %ecx, %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	shldl	$1, %ebx, %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	shldl	$1, %eax, %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	shldl	$1, %edx, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	shldl	$1, %esi, %edx
+	movl	%edx, %ebp
+	shrl	$31, %edi
 	addl	%esi, %esi
-	movl	%esi, -84(%ebp)         # 4-byte Spill
-	movl	%ecx, %esi
-	adcl	%esi, %esi
-	movl	%esi, -80(%ebp)         # 4-byte Spill
+	leal	128(%esp), %eax
+	movl	32(%esp), %ebx                  # 4-byte Reload
 	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %esi
-	popl	%eax
-	movl	%esi, -88(%ebp)         # 4-byte Spill
-	movl	24(%edi), %esi
-	pushl	%eax
-	movl	%edx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	8(%edi), %esi
-	movl	28(%edi), %edx
-	adcl	12(%edi), %edx
+	leal	156(%esp), %eax
 	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -100(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %ebx
-	sbbl	%edi, %edi
-	movl	%edi, -92(%ebp)         # 4-byte Spill
+	leal	184(%esp), %eax
 	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB113_2
-# BB#1:
-	movl	$0, -80(%ebp)           # 4-byte Folded Spill
-	movl	$0, -84(%ebp)           # 4-byte Folded Spill
-.LBB113_2:
-	movl	%esi, %ebx
-	movl	-88(%ebp), %edi         # 4-byte Reload
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	adcl	%ebx, %ebx
-	movl	%edx, %edi
-	adcl	%edi, %edi
-	movl	-104(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB113_4
-# BB#3:
-	xorl	%edi, %edi
-	xorl	%ebx, %ebx
-.LBB113_4:
-	movl	%ebx, -88(%ebp)         # 4-byte Spill
-	movl	-108(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -60(%ebp)
-	movl	%ecx, -56(%ebp)
-	movl	%esi, -52(%ebp)
-	movl	%edx, -48(%ebp)
-	movl	%eax, -76(%ebp)
-	movl	%ecx, -72(%ebp)
-	movl	%esi, -68(%ebp)
-	movl	%edx, -64(%ebp)
-	movl	-100(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB113_5
-# BB#6:
-	movl	$0, -100(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB113_7
-.LBB113_5:
-	shrl	$31, %edx
-	movl	%edx, -100(%ebp)        # 4-byte Spill
-.LBB113_7:
-	leal	-76(%ebp), %eax
-	movl	%eax, 8(%esp)
-	leal	-60(%ebp), %eax
-	movl	%eax, 4(%esp)
-	leal	-44(%ebp), %eax
-	movl	%eax, (%esp)
-	movl	-92(%ebp), %esi         # 4-byte Reload
-	andl	$1, %esi
-	movl	-96(%ebp), %ebx         # 4-byte Reload
-	calll	mcl_fpDbl_mulPre4L@PLT
-	movl	-84(%ebp), %eax         # 4-byte Reload
-	addl	-28(%ebp), %eax
-	movl	%eax, -84(%ebp)         # 4-byte Spill
-	movl	-80(%ebp), %eax         # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -80(%ebp)         # 4-byte Spill
-	movl	-88(%ebp), %eax         # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -88(%ebp)         # 4-byte Spill
-	adcl	-16(%ebp), %edi
-	movl	%edi, -92(%ebp)         # 4-byte Spill
-	adcl	-100(%ebp), %esi        # 4-byte Folded Reload
-	movl	-44(%ebp), %eax
-	movl	8(%ebp), %edi
-	subl	(%edi), %eax
-	movl	%eax, -116(%ebp)        # 4-byte Spill
-	movl	-40(%ebp), %ebx
-	sbbl	4(%edi), %ebx
-	movl	-36(%ebp), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, -96(%ebp)         # 4-byte Spill
-	movl	-32(%ebp), %edx
-	sbbl	12(%edi), %edx
-	movl	16(%edi), %eax
-	movl	%eax, -100(%ebp)        # 4-byte Spill
-	sbbl	%eax, -84(%ebp)         # 4-byte Folded Spill
-	movl	20(%edi), %eax
-	movl	%eax, -112(%ebp)        # 4-byte Spill
-	sbbl	%eax, -80(%ebp)         # 4-byte Folded Spill
-	movl	24(%edi), %eax
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	sbbl	%eax, -88(%ebp)         # 4-byte Folded Spill
-	movl	28(%edi), %eax
-	movl	%eax, -108(%ebp)        # 4-byte Spill
-	sbbl	%eax, -92(%ebp)         # 4-byte Folded Spill
-	sbbl	$0, %esi
-	movl	32(%edi), %ecx
-	movl	%ecx, -132(%ebp)        # 4-byte Spill
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	subl	%ecx, %eax
-	movl	36(%edi), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
+	calll	mcl_fpDbl_mulPre6L@PLT
+	addl	$16, %esp
+	addl	196(%esp), %esi
+	adcl	200(%esp), %ebp
+	movl	%ebp, %edx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	204(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	212(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	216(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	24(%esp), %edi                  # 4-byte Folded Reload
+	movl	172(%esp), %ecx
+	movl	240(%esp), %eax
+	subl	(%eax), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	176(%esp), %ebx
+	sbbl	4(%eax), %ebx
+	movl	180(%esp), %ecx
+	sbbl	8(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	184(%esp), %ebp
+	sbbl	12(%eax), %ebp
+	movl	188(%esp), %ecx
+	sbbl	16(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	192(%esp), %ecx
+	sbbl	20(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 120(%esp)                 # 4-byte Spill
+	sbbl	%ecx, %esi
+	movl	28(%eax), %ecx
+	movl	%ecx, 112(%esp)                 # 4-byte Spill
+	sbbl	%ecx, %edx
+	movl	32(%eax), %ecx
+	movl	%ecx, 108(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 16(%esp)                  # 4-byte Folded Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 20(%esp)                  # 4-byte Folded Spill
+	movl	44(%eax), %ecx
+	movl	%ecx, 116(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 8(%esp)                   # 4-byte Folded Spill
+	sbbl	$0, %edi
+	movl	48(%eax), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	subl	%ecx, 28(%esp)                  # 4-byte Folded Spill
+	movl	52(%eax), %ecx
+	movl	%ecx, 84(%esp)                  # 4-byte Spill
 	sbbl	%ecx, %ebx
-	movl	40(%edi), %ecx
-	movl	%ecx, -128(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -96(%ebp)         # 4-byte Folded Spill
-	movl	44(%edi), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	movl	56(%eax), %ecx
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 32(%esp)                  # 4-byte Folded Spill
+	movl	60(%eax), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	sbbl	%ecx, %ebp
+	movl	64(%eax), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 36(%esp)                  # 4-byte Folded Spill
+	movl	68(%eax), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 24(%esp)                  # 4-byte Folded Spill
+	movl	72(%eax), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	sbbl	%ecx, %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	76(%eax), %ecx
+	movl	%ecx, 104(%esp)                 # 4-byte Spill
 	sbbl	%ecx, %edx
-	movl	48(%edi), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -84(%ebp)         # 4-byte Folded Spill
-	movl	52(%edi), %ecx
-	movl	%ecx, -116(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -80(%ebp)         # 4-byte Folded Spill
-	movl	56(%edi), %ecx
-	movl	%ecx, -120(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -88(%ebp)         # 4-byte Folded Spill
-	movl	60(%edi), %ecx
-	movl	%ecx, -124(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -92(%ebp)         # 4-byte Folded Spill
-	sbbl	$0, %esi
-	addl	-100(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-112(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%eax, 16(%edi)
-	movl	-96(%ebp), %eax         # 4-byte Reload
-	adcl	-104(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 20(%edi)
-	adcl	-108(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 24(%edi)
-	movl	-84(%ebp), %eax         # 4-byte Reload
-	adcl	-132(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edx, 28(%edi)
-	movl	-80(%ebp), %ecx         # 4-byte Reload
-	adcl	-136(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 32(%edi)
-	movl	-88(%ebp), %eax         # 4-byte Reload
-	adcl	-128(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 36(%edi)
-	movl	-92(%ebp), %ecx         # 4-byte Reload
-	adcl	-140(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 40(%edi)
-	adcl	-144(%ebp), %esi        # 4-byte Folded Reload
-	movl	%ecx, 44(%edi)
-	movl	%esi, 48(%edi)
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 52(%edi)
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 56(%edi)
-	movl	-124(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 60(%edi)
-	addl	$156, %esp
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	80(%eax), %ecx
+	movl	%ecx, 100(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 16(%esp)                  # 4-byte Folded Spill
+	movl	84(%eax), %ecx
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	movl	88(%eax), %ecx
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 20(%esp)                  # 4-byte Folded Spill
+	movl	92(%eax), %ecx
+	movl	%ecx, 88(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 8(%esp)                   # 4-byte Folded Spill
+	sbbl	$0, %edi
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	addl	120(%esp), %ebx                 # 4-byte Folded Reload
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	112(%esp), %edx                 # 4-byte Folded Reload
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	108(%esp), %ecx                 # 4-byte Folded Reload
+	adcl	76(%esp), %ebp                  # 4-byte Folded Reload
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	52(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ebp, 36(%eax)
+	movl	%ecx, 32(%eax)
+	movl	%edx, 28(%eax)
+	movl	%ebx, 24(%eax)
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	116(%esp), %edx                 # 4-byte Folded Reload
+	movl	%esi, 40(%eax)
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	72(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 44(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%esi, 48(%eax)
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	80(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 52(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edx, 56(%eax)
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	64(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 60(%eax)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	60(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%edx, 64(%eax)
+	adcl	56(%esp), %edi                  # 4-byte Folded Reload
+	movl	%ecx, 68(%eax)
+	movl	%edi, 72(%eax)
+	movl	104(%esp), %ecx                 # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 76(%eax)
+	movl	100(%esp), %ecx                 # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 80(%eax)
+	movl	96(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 84(%eax)
+	movl	92(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 88(%eax)
+	movl	88(%esp), %ecx                  # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 92(%eax)
+	addl	$220, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end113:
-	.size	mcl_fpDbl_sqrPre8L, .Lfunc_end113-mcl_fpDbl_sqrPre8L
-
-	.globl	mcl_fp_mont8L
-	.align	16, 0x90
-	.type	mcl_fp_mont8L,@function
-mcl_fp_mont8L:                          # @mcl_fp_mont8L
-# BB#0:
+.Lfunc_end59:
+	.size	mcl_fpDbl_sqrPre12L, .Lfunc_end59-mcl_fpDbl_sqrPre12L
+                                        # -- End function
+	.globl	mcl_fp_mont12L                  # -- Begin function mcl_fp_mont12L
+	.p2align	4, 0x90
+	.type	mcl_fp_mont12L,@function
+mcl_fp_mont12L:                         # @mcl_fp_mont12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$700, %esp              # imm = 0x2BC
-	calll	.L114$pb
-.L114$pb:
+	subl	$1420, %esp                     # imm = 0x58C
+	calll	.L60$pb
+.L60$pb:
 	popl	%ebx
-.Ltmp5:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.L114$pb), %ebx
-	movl	732(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	664(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	664(%esp), %ebp
-	movl	668(%esp), %edi
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	696(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	692(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	688(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	684(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	680(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	676(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	672(%esp), %esi
-	movl	%eax, (%esp)
-	leal	624(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	624(%esp), %ebp
-	adcl	628(%esp), %edi
-	adcl	632(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	656(%esp), %ebp
-	sbbl	%eax, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	584(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	584(%esp), %edi
-	adcl	588(%esp), %esi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	592(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	596(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	600(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	604(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	608(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	612(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	616(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
+.Ltmp12:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp12-.L60$pb), %ebx
+	movl	1452(%esp), %eax
+	movl	-4(%eax), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	1448(%esp), %ecx
+	subl	$4, %esp
+	leal	1372(%esp), %eax
+	pushl	(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	1368(%esp), %esi
+	movl	1372(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
 	movl	%edi, %eax
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	732(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv256x32
-	andl	$1, %ebp
-	addl	544(%esp), %edi
-	adcl	548(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	568(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
+	imull	%esi, %eax
+	movl	1416(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	1412(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	1408(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	1404(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	1400(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	1396(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	1392(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	1388(%esp), %ebp
+	movl	1384(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	1380(%esp), %edi
+	movl	1376(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	1316(%esp), %ecx
+	pushl	%eax
+	pushl	1460(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	1312(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1316(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1320(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	1324(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	1328(%esp), %esi
+	adcl	1332(%esp), %ebp
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1336(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1340(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1344(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1348(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	1352(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	1356(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1360(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	1260(%esp), %ecx
+	movzbl	%al, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	1452(%esp), %eax
+	pushl	4(%eax)
+	pushl	1452(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	addl	1256(%esp), %ecx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1260(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1264(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	1268(%esp), %esi
+	adcl	1272(%esp), %ebp
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1276(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1280(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1284(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1288(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	1292(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	1296(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1300(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1304(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %edx                  # 4-byte Reload
+	movl	%ecx, %edi
+	imull	%ecx, %edx
+	movzbl	%al, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	1460(%esp)
+	leal	1212(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	1200(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1204(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1208(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	1212(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	adcl	1216(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	1220(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1224(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1228(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1232(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	1236(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1240(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	1244(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1248(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	$0, 32(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	1148(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	addl	1144(%esp), %ecx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1148(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1152(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1156(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	1160(%esp), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	1164(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1168(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1172(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	1176(%esp), %edi
+	movl	%edi, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	1180(%esp), %edi
+	adcl	1184(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1188(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1192(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	movl	%ecx, %ebp
+	movzbl	%al, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	pushl	%edx
+	movl	1460(%esp), %eax
+	pushl	%eax
+	leal	1100(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	1088(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1092(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1096(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1100(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1104(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	1108(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1112(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1116(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	1120(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	1124(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1128(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	1132(%esp), %ebp
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	1136(%esp), %esi
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	$0, %edi
+	subl	$4, %esp
+	leal	1036(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	addl	1032(%esp), %ecx
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1036(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1040(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1044(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1048(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1052(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1056(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	1060(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1064(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1068(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	1072(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	1076(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	1080(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	980(%esp), %edi
+	movl	56(%esp), %edx                  # 4-byte Reload
+	movl	%ecx, %esi
+	imull	%ecx, %edx
+	movzbl	%al, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	1460(%esp)
+	pushl	%edi
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	976(%esp), %esi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	980(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	984(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	988(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	992(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	996(%esp), %ebp
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	1000(%esp), %esi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	1004(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	1008(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1012(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1016(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1020(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1024(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	$0, 12(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	924(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	addl	920(%esp), %ecx
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	924(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	928(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	932(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	936(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	adcl	940(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	944(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	948(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	952(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	956(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	960(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	964(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	968(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %edx                  # 4-byte Reload
+	movl	%ecx, %ebp
+	imull	%ecx, %edx
+	movzbl	%al, %edi
+	pushl	%edx
+	pushl	1460(%esp)
+	leal	876(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	864(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	868(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	872(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	876(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	880(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	884(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	888(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	892(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	896(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	900(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	904(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	908(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	912(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	$0, %edi
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	20(%eax)
+	pushl	1452(%esp)
+	leal	820(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	40(%esp), %edx                  # 4-byte Reload
+	addl	808(%esp), %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	812(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	816(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	820(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	824(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	828(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	832(%esp), %ebp
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	836(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	840(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	844(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	848(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	852(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	856(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	764(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	752(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	756(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	760(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	764(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	768(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	776(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	780(%esp), %edi
+	adcl	784(%esp), %esi
+	movl	%esi, %ebp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	788(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	792(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	796(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	800(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	$0, 40(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	24(%eax)
+	movl	1452(%esp), %eax
+	pushl	%eax
+	leal	708(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	4(%esp), %edx                   # 4-byte Reload
+	addl	696(%esp), %edx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	700(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	704(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	708(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	712(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	716(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	720(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	724(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	728(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	732(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	736(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	740(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	744(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %ebp
+	movzbl	%al, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	652(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	640(%esp), %ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	644(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	648(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	652(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	656(%esp), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	660(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	664(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	668(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	672(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	676(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	680(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	684(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	688(%esp), %ebp
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	$0, %edi
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	28(%eax)
+	pushl	1452(%esp)
+	leal	596(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	24(%esp), %edx                  # 4-byte Reload
+	addl	584(%esp), %edx
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	588(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	592(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	596(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	600(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	604(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	608(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	612(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	616(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	620(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	624(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	628(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	adcl	632(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	movzbl	%al, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	540(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	528(%esp), %edi
+	adcl	532(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	540(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	544(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	556(%esp), %esi
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	560(%esp), %ebp
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	564(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	568(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	572(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	576(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	728(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	addl	504(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	$0, 24(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	32(%eax)
+	pushl	1452(%esp)
+	leal	484(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	28(%esp), %edx                  # 4-byte Reload
+	addl	472(%esp), %edx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	476(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	480(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	488(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	492(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	496(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	500(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	adcl	504(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	508(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	512(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	516(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	520(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	524(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	536(%esp), %ebp
-	sbbl	%edi, %edi
-	movl	%esi, %eax
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	464(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	464(%esp), %esi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	468(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	472(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	480(%esp), %edi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	484(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	488(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	492(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	496(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	724(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv256x32
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	424(%esp), %ecx
-	movl	28(%esp), %ebp          # 4-byte Reload
-	adcl	428(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	428(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	416(%esp), %esi
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	420(%esp), %edi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	432(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	436(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	440(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
 	adcl	444(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	448(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	448(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	452(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	456(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	384(%esp), %esi
-	adcl	388(%esp), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	392(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	396(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	400(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	404(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	408(%esp), %edi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	412(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	416(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	28(%esp), %ecx          # 4-byte Reload
-	addl	344(%esp), %ecx
-	adcl	348(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	352(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	364(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	372(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	464(%esp), %ebp
+	adcl	$0, 28(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	36(%eax)
+	pushl	1452(%esp)
+	leal	372(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	%edi, %edx
+	addl	360(%esp), %edx
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	364(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	368(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	372(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	376(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	movl	%ebp, %eax
-	andl	$1, %eax
-	addl	304(%esp), %edi
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	308(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	312(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	316(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	320(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	324(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	328(%esp), %esi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	332(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	336(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	264(%esp), %ecx
-	movl	48(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	380(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	384(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	388(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	392(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	396(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	400(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	404(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	408(%esp), %ebp
+	setb	%al
+	subl	$4, %esp
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	316(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	304(%esp), %esi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	312(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	316(%esp), %esi
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	320(%esp), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	324(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	328(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	332(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	336(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	340(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	352(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	$0, %ebp
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	40(%eax)
+	movl	1452(%esp), %eax
+	pushl	%eax
+	leal	260(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	(%esp), %ecx                    # 4-byte Reload
+	addl	248(%esp), %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	252(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	256(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	260(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	268(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	272(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	276(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	284(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	272(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	276(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	280(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	284(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	288(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	292(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	296(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	setb	(%esp)                          # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	56(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
 	movl	%ecx, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	224(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	228(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	232(%esp), %esi
-	adcl	236(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	240(%esp), %edi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	244(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	248(%esp), %ebp
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	252(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	256(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	184(%esp), %ecx
-	adcl	188(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	196(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
+	pushl	%eax
+	pushl	1460(%esp)
+	leal	204(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	192(%esp), %esi
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	196(%esp), %ebp
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	200(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	204(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	204(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	208(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebp          # 4-byte Reload
-	adcl	212(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	212(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	216(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	movl	%edi, %ecx
-	andl	$1, %ecx
-	addl	144(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	220(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	224(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	228(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movzbl	(%esp), %eax                    # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	subl	$4, %esp
+	leal	140(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	44(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	%ebp, %edx
+	addl	136(%esp), %edx
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	140(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	144(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	148(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	152(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	152(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	156(%esp), %ebp
+	movl	48(%esp), %edi                  # 4-byte Reload
 	adcl	160(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	164(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	172(%esp), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	176(%esp), %ebp
-	adcl	$0, %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	104(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	104(%esp), %ecx
-	adcl	108(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	116(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	120(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	128(%esp), %edi
-	adcl	132(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	24(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %ebp
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	andl	$1, %esi
-	addl	64(%esp), %ebp
-	movl	32(%esp), %ebx          # 4-byte Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	72(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ebx
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	88(%esp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	168(%esp), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	176(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	180(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	184(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	leal	84(%esp), %eax
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	1460(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	80(%esp), %esi
+	movzbl	44(%esp), %eax                  # 1-byte Folded Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	84(%esp), %eax
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	88(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	92(%esp), %esi
+	movl	16(%esp), %edx                  # 4-byte Reload
 	adcl	96(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%eax, %edx
-	movl	732(%esp), %ebp
-	subl	(%ebp), %edx
-	movl	%ecx, %eax
-	sbbl	4(%ebp), %eax
-	movl	%ebx, %ecx
-	sbbl	8(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	sbbl	20(%ebp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	sbbl	$0, %esi
-	andl	$1, %esi
-	movl	%esi, %ecx
-	jne	.LBB114_2
-# BB#1:
-	movl	%edx, %ebp
-.LBB114_2:
-	movl	720(%esp), %edx
-	movl	%ebp, (%edx)
-	testb	%cl, %cl
-	movl	60(%esp), %ebp          # 4-byte Reload
-	jne	.LBB114_4
-# BB#3:
-	movl	%eax, %ebp
-.LBB114_4:
-	movl	%ebp, 4(%edx)
-	jne	.LBB114_6
-# BB#5:
-	movl	12(%esp), %ebx          # 4-byte Reload
-.LBB114_6:
-	movl	%ebx, 8(%edx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	jne	.LBB114_8
-# BB#7:
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-.LBB114_8:
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%edx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	jne	.LBB114_10
-# BB#9:
-	movl	20(%esp), %edi          # 4-byte Reload
-.LBB114_10:
-	movl	%edi, 16(%edx)
-	jne	.LBB114_12
-# BB#11:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB114_12:
-	movl	%eax, 20(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	jne	.LBB114_14
-# BB#13:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB114_14:
-	movl	%eax, 24(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	jne	.LBB114_16
-# BB#15:
-	movl	52(%esp), %eax          # 4-byte Reload
-.LBB114_16:
-	movl	%eax, 28(%edx)
-	addl	$700, %esp              # imm = 0x2BC
+	adcl	100(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	adcl	104(%esp), %edi
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	108(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	112(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	adcl	116(%esp), %ebx
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	120(%esp), %ebp
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	124(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	128(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	adcl	$0, 44(%esp)                    # 4-byte Folded Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	1452(%esp), %ecx
+	subl	(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	sbbl	4(%ecx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	sbbl	8(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	sbbl	12(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	16(%ecx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	sbbl	20(%ecx), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	28(%ecx), %eax
+	movl	%ebx, %edi
+	movl	(%esp), %ebx                    # 4-byte Reload
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	sbbl	32(%ecx), %edi
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	sbbl	36(%ecx), %ebp
+	movl	%ecx, %edx
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	sbbl	40(%edx), %ecx
+	movl	%ebx, %esi
+	sbbl	44(%edx), %esi
+	movl	44(%esp), %edx                  # 4-byte Reload
+	sbbl	$0, %edx
+	testb	$1, %dl
+	jne	.LBB60_1
+# %bb.2:
+	movl	1440(%esp), %ebx
+	movl	%esi, 44(%ebx)
+	jne	.LBB60_3
+.LBB60_4:
+	movl	%ecx, 40(%ebx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	jne	.LBB60_5
+.LBB60_6:
+	movl	%ebp, 36(%ebx)
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB60_7
+.LBB60_8:
+	movl	%edi, 32(%ebx)
+	jne	.LBB60_9
+.LBB60_10:
+	movl	%eax, 28(%ebx)
+	movl	64(%esp), %edi                  # 4-byte Reload
+	movl	56(%esp), %eax                  # 4-byte Reload
+	jne	.LBB60_11
+.LBB60_12:
+	movl	%eax, 24(%ebx)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	60(%esp), %edx                  # 4-byte Reload
+	jne	.LBB60_13
+.LBB60_14:
+	movl	%edx, 20(%ebx)
+	movl	72(%esp), %edx                  # 4-byte Reload
+	jne	.LBB60_15
+.LBB60_16:
+	movl	%edi, 16(%ebx)
+	jne	.LBB60_17
+.LBB60_18:
+	movl	%esi, 12(%ebx)
+	jne	.LBB60_19
+.LBB60_20:
+	movl	%edx, 8(%ebx)
+	jne	.LBB60_21
+.LBB60_22:
+	movl	%ecx, 4(%ebx)
+	je	.LBB60_24
+.LBB60_23:
+	movl	20(%esp), %eax                  # 4-byte Reload
+.LBB60_24:
+	movl	%eax, (%ebx)
+	addl	$1420, %esp                     # imm = 0x58C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end114:
-	.size	mcl_fp_mont8L, .Lfunc_end114-mcl_fp_mont8L
-
-	.globl	mcl_fp_montNF8L
-	.align	16, 0x90
-	.type	mcl_fp_montNF8L,@function
-mcl_fp_montNF8L:                        # @mcl_fp_montNF8L
-# BB#0:
+.LBB60_1:
+	movl	%ebx, %esi
+	movl	1440(%esp), %ebx
+	movl	%esi, 44(%ebx)
+	je	.LBB60_4
+.LBB60_3:
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 40(%ebx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	je	.LBB60_6
+.LBB60_5:
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 36(%ebx)
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	je	.LBB60_8
+.LBB60_7:
+	movl	24(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 32(%ebx)
+	je	.LBB60_10
+.LBB60_9:
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 28(%ebx)
+	movl	64(%esp), %edi                  # 4-byte Reload
+	movl	56(%esp), %eax                  # 4-byte Reload
+	je	.LBB60_12
+.LBB60_11:
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%ebx)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	60(%esp), %edx                  # 4-byte Reload
+	je	.LBB60_14
+.LBB60_13:
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 20(%ebx)
+	movl	72(%esp), %edx                  # 4-byte Reload
+	je	.LBB60_16
+.LBB60_15:
+	movl	12(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 16(%ebx)
+	je	.LBB60_18
+.LBB60_17:
+	movl	16(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 12(%ebx)
+	je	.LBB60_20
+.LBB60_19:
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%ebx)
+	je	.LBB60_22
+.LBB60_21:
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%ebx)
+	jne	.LBB60_23
+	jmp	.LBB60_24
+.Lfunc_end60:
+	.size	mcl_fp_mont12L, .Lfunc_end60-mcl_fp_mont12L
+                                        # -- End function
+	.globl	mcl_fp_montNF12L                # -- Begin function mcl_fp_montNF12L
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF12L,@function
+mcl_fp_montNF12L:                       # @mcl_fp_montNF12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$700, %esp              # imm = 0x2BC
-	calll	.L115$pb
-.L115$pb:
+	subl	$1420, %esp                     # imm = 0x58C
+	calll	.L61$pb
+.L61$pb:
 	popl	%ebx
-.Ltmp6:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp6-.L115$pb), %ebx
-	movl	732(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	664(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	664(%esp), %ebp
-	movl	668(%esp), %edi
-	movl	%ebp, %eax
+.Ltmp13:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp13-.L61$pb), %ebx
+	movl	1452(%esp), %eax
+	movl	-4(%eax), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	1448(%esp), %ecx
+	subl	$4, %esp
+	leal	1372(%esp), %eax
+	pushl	(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	1368(%esp), %esi
+	movl	1372(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
 	imull	%esi, %eax
-	movl	696(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	692(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	688(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	684(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	680(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	676(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	672(%esp), %esi
-	movl	%eax, (%esp)
-	leal	624(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	624(%esp), %ebp
-	adcl	628(%esp), %edi
-	adcl	632(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	640(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	584(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	616(%esp), %ecx
-	addl	584(%esp), %edi
-	adcl	588(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	596(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	604(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	732(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv256x32
-	addl	544(%esp), %edi
-	adcl	548(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	564(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	572(%esp), %edi
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	576(%esp), %ebp
-	movl	728(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	536(%esp), %ecx
-	addl	504(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	528(%esp), %edi
-	adcl	532(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	464(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	464(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	472(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	484(%esp), %esi
-	adcl	488(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	496(%esp), %edi
-	movl	728(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	456(%esp), %eax
-	movl	44(%esp), %edx          # 4-byte Reload
-	addl	424(%esp), %edx
-	adcl	428(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	432(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	436(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	440(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	448(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	452(%esp), %edi
-	movl	%edi, %ebp
+	movl	1416(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	1412(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	1408(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	1404(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	1400(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	1396(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	1392(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	1388(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	1384(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	1380(%esp), %ebp
+	movl	1376(%esp), %edi
+	subl	$4, %esp
+	leal	1316(%esp), %ecx
+	pushl	%eax
+	pushl	1460(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	1312(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1316(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	1320(%esp), %edi
+	adcl	1324(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1328(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1332(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1336(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1340(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1344(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1348(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	1352(%esp), %esi
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	1356(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1360(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	1260(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	4(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	1304(%esp), %eax
+	movl	36(%esp), %edx                  # 4-byte Reload
+	addl	1256(%esp), %edx
+	adcl	1260(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1264(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1268(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1272(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1276(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1280(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1284(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1288(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	1292(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	adcl	1296(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	1300(%esp), %ebp
+	adcl	$0, %eax
 	movl	%eax, %edi
-	adcl	$0, %edi
-	movl	%edx, %eax
+	subl	$4, %esp
+	leal	1204(%esp), %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
 	movl	%edx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	384(%esp), %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	396(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	412(%esp), %ebp
-	adcl	416(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	376(%esp), %edx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	344(%esp), %ecx
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	352(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	360(%esp), %esi
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	364(%esp), %edi
-	adcl	368(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	304(%esp), %ebp
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	308(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	320(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	324(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	328(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	336(%esp), %edi
-	movl	728(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	724(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv256x32
-	movl	296(%esp), %edx
-	movl	%ebp, %ecx
-	addl	264(%esp), %ecx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	280(%esp), %ebp
-	adcl	284(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	292(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	adcl	$0, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	224(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	236(%esp), %esi
-	adcl	240(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	256(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	216(%esp), %ebp
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	184(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	192(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	196(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	144(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	156(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	160(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	176(%esp), %ebp
-	movl	728(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	104(%esp), %ecx
-	movl	724(%esp), %edx
-	calll	.LmulPv256x32
-	movl	136(%esp), %edi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	104(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	116(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	120(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	128(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	132(%esp), %ebp
-	adcl	$0, %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	732(%esp), %edx
-	calll	.LmulPv256x32
-	addl	64(%esp), %esi
-	movl	32(%esp), %esi          # 4-byte Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	68(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	72(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	76(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	80(%esp), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	92(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	96(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	732(%esp), %eax
-	subl	(%eax), %edx
-	sbbl	4(%eax), %ecx
-	sbbl	8(%eax), %esi
-	sbbl	12(%eax), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebx          # 4-byte Reload
-	sbbl	16(%eax), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%eax), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	sbbl	24(%eax), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	sbbl	28(%eax), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	testl	%edi, %edi
-	js	.LBB115_2
-# BB#1:
-	movl	%edx, 56(%esp)          # 4-byte Spill
-.LBB115_2:
-	movl	720(%esp), %edx
-	movl	56(%esp), %eax          # 4-byte Reload
-	movl	%eax, (%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB115_4
-# BB#3:
-	movl	%ecx, %eax
-.LBB115_4:
-	movl	%eax, 4(%edx)
-	js	.LBB115_6
-# BB#5:
-	movl	%esi, 32(%esp)          # 4-byte Spill
-.LBB115_6:
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 8(%edx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	60(%esp), %eax          # 4-byte Reload
-	movl	48(%esp), %ecx          # 4-byte Reload
-	js	.LBB115_8
-# BB#7:
-	movl	12(%esp), %esi          # 4-byte Reload
-	movl	%esi, 44(%esp)          # 4-byte Spill
-.LBB115_8:
-	movl	44(%esp), %esi          # 4-byte Reload
-	movl	%esi, 12(%edx)
-	js	.LBB115_10
-# BB#9:
-	movl	16(%esp), %edi          # 4-byte Reload
-.LBB115_10:
-	movl	%edi, 16(%edx)
-	js	.LBB115_12
-# BB#11:
-	movl	20(%esp), %ebp          # 4-byte Reload
-.LBB115_12:
-	movl	%ebp, 20(%edx)
-	js	.LBB115_14
-# BB#13:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB115_14:
-	movl	%eax, 24(%edx)
-	js	.LBB115_16
-# BB#15:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB115_16:
-	movl	%ecx, 28(%edx)
-	addl	$700, %esp              # imm = 0x2BC
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end115:
-	.size	mcl_fp_montNF8L, .Lfunc_end115-mcl_fp_montNF8L
-
-	.globl	mcl_fp_montRed8L
-	.align	16, 0x90
-	.type	mcl_fp_montRed8L,@function
-mcl_fp_montRed8L:                       # @mcl_fp_montRed8L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$428, %esp              # imm = 0x1AC
-	calll	.L116$pb
-.L116$pb:
-	popl	%ebx
-.Ltmp7:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp7-.L116$pb), %ebx
-	movl	456(%esp), %edx
-	movl	-4(%edx), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	452(%esp), %eax
-	movl	(%eax), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	4(%eax), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	imull	%edi, %ecx
-	movl	60(%eax), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	56(%eax), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	52(%eax), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	48(%eax), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	44(%eax), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	40(%eax), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	36(%eax), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	32(%eax), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	28(%eax), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	24(%eax), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	20(%eax), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	16(%eax), %ebp
-	movl	12(%eax), %edi
-	movl	8(%eax), %esi
-	movl	(%edx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%edx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	24(%edx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	20(%edx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	16(%edx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	12(%edx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	8(%edx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	4(%edx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	%ecx, (%esp)
-	leal	392(%esp), %ecx
-	calll	.LmulPv256x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	392(%esp), %eax
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	396(%esp), %ecx
-	adcl	400(%esp), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	404(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	408(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	sbbl	%eax, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	movl	64(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	352(%esp), %edi
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	356(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	360(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	364(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	368(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	372(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	376(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	380(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	384(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
+	pushl	%ecx
+	pushl	1460(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	1200(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1204(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1208(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1212(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1216(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1220(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1224(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1228(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1232(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1236(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1240(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	1244(%esp), %ebp
+	adcl	1248(%esp), %edi
+	subl	$4, %esp
+	leal	1148(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	1192(%esp), %eax
+	movl	36(%esp), %edx                  # 4-byte Reload
+	addl	1144(%esp), %edx
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1148(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1152(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1156(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1160(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1164(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1168(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1172(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	1176(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1180(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	1184(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	adcl	1188(%esp), %edi
+	movl	%edi, %esi
+	movl	%eax, %ebp
 	adcl	$0, %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
+	subl	$4, %esp
+	leal	1092(%esp), %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
 	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	312(%esp), %edi
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	316(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	272(%esp), %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	276(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
+	pushl	%ecx
+	movl	1460(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	1088(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1092(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1096(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1100(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1104(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1108(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1112(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1116(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1120(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	1124(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1128(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	1132(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	adcl	1136(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	1036(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	1080(%esp), %eax
+	movl	44(%esp), %edx                  # 4-byte Reload
+	addl	1032(%esp), %edx
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1036(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1040(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1044(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1048(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1052(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	1056(%esp), %esi
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	1060(%esp), %ebp
+	adcl	1064(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1068(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1072(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	1076(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	980(%esp), %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	1460(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	976(%esp), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	980(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	984(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	988(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	992(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	996(%esp), %edi
+	adcl	1000(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	1004(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	1008(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1012(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1016(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	1020(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1024(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	924(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	16(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	968(%esp), %ecx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	addl	920(%esp), %eax
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	924(%esp), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	928(%esp), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	932(%esp), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	936(%esp), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	940(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	944(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	948(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	952(%esp), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	956(%esp), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	adcl	960(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	964(%esp), %edi
+	movl	%ecx, %ebp
 	adcl	$0, %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	movl	%eax, %esi
+	imull	%eax, %ecx
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	876(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	864(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	868(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	adcl	872(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	876(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	880(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	884(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	888(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	892(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	896(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	900(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	904(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	908(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	adcl	912(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	20(%eax)
+	movl	1452(%esp), %eax
+	pushl	%eax
+	leal	820(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	856(%esp), %eax
+	movl	24(%esp), %edx                  # 4-byte Reload
+	addl	808(%esp), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	812(%esp), %esi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	816(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	820(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	824(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	828(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	832(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	836(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	840(%esp), %edi
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	844(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	848(%esp), %ebp
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	852(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	764(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	addl	752(%esp), %eax
+	adcl	756(%esp), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	760(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	764(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	768(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	776(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	780(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	784(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	788(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	792(%esp), %ebp
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	796(%esp), %edi
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	800(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	24(%eax)
+	pushl	1452(%esp)
+	leal	708(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	744(%esp), %ecx
+	movl	52(%esp), %eax                  # 4-byte Reload
+	addl	696(%esp), %eax
+	adcl	700(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	704(%esp), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	708(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	712(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	716(%esp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	720(%esp), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	724(%esp), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	728(%esp), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	adcl	732(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	adcl	736(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	740(%esp), %esi
+	adcl	$0, %ecx
 	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	232(%esp), %ebp
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	236(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	252(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	movl	%eax, %edi
+	imull	%eax, %ecx
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	652(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	640(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	644(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	648(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	652(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	656(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	660(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	664(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	668(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	672(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	676(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	680(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	684(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	adcl	688(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	28(%eax)
+	pushl	1452(%esp)
+	leal	596(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	632(%esp), %eax
+	movl	12(%esp), %edx                  # 4-byte Reload
+	addl	584(%esp), %edx
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	588(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	592(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	596(%esp), %esi
+	adcl	600(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	604(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	608(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	612(%esp), %ebp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	616(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	620(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	624(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	628(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	540(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	528(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	532(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	540(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	544(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	548(%esp), %edi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	556(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	560(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	564(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	568(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	572(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	576(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	32(%eax)
+	pushl	1452(%esp)
+	leal	484(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	520(%esp), %ecx
+	movl	8(%esp), %edx                   # 4-byte Reload
+	addl	472(%esp), %edx
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	476(%esp), %ebp
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	480(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	488(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	492(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	496(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	500(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	504(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	508(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	$0, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	428(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	416(%esp), %edi
+	adcl	420(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	432(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	440(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	444(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	448(%esp), %esi
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	adcl	452(%esp), %ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	464(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	movl	1452(%esp), %eax
+	pushl	36(%eax)
+	pushl	1452(%esp)
+	leal	372(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	408(%esp), %eax
+	movl	16(%esp), %edx                  # 4-byte Reload
+	addl	360(%esp), %edx
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	364(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	368(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	372(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	376(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	380(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	384(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	388(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	adcl	392(%esp), %ebp
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	396(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	400(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	404(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	1460(%esp)
+	leal	316(%esp), %eax
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	304(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	316(%esp), %esi
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	320(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	324(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	328(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	332(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	340(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	252(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	40(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	296(%esp), %ecx
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	248(%esp), %edx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	252(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	256(%esp), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	adcl	260(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	264(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	192(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	192(%esp), %edi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	196(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	204(%esp), %edi
-	adcl	208(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	212(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	152(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	152(%esp), %esi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	156(%esp), %ecx
-	adcl	160(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	268(%esp), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	272(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	276(%esp), %ebp
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	280(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	284(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	$0, %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	196(%esp), %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	1460(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	192(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	196(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	204(%esp), %esi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	212(%esp), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	216(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	220(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	224(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	140(%esp), %eax
+	movl	1452(%esp), %ecx
+	pushl	44(%ecx)
+	pushl	1452(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	movl	28(%esp), %edx                  # 4-byte Reload
+	addl	136(%esp), %edx
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	140(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	144(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	148(%esp), %esi
+	adcl	152(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	156(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	160(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	164(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
 	adcl	168(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	172(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	176(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	180(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 64(%esp)            # 4-byte Folded Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	456(%esp), %edx
-	calll	.LmulPv256x32
-	addl	112(%esp), %esi
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	116(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	120(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	128(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	%edi, %ebx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	%eax, %esi
-	adcl	136(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	%ecx, %edx
-	subl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	104(%esp), %ebp         # 4-byte Reload
-	sbbl	28(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	32(%esp), %ebx          # 4-byte Folded Reload
-	sbbl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	sbbl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	sbbl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	sbbl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	sbbl	$0, %edi
-	andl	$1, %edi
-	jne	.LBB116_2
-# BB#1:
-	movl	%edx, %ecx
-.LBB116_2:
-	movl	448(%esp), %edx
-	movl	%ecx, (%edx)
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	180(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	184(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	84(%esp), %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	pushl	1460(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	80(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	84(%esp), %eax
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	88(%esp), %ecx
+	adcl	92(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	96(%esp), %esi
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	100(%esp), %edi
+	movl	56(%esp), %ebx                  # 4-byte Reload
+	adcl	104(%esp), %ebx
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	108(%esp), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	adcl	112(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	116(%esp), %ebp
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	120(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	124(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	128(%esp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	%eax, %edx
+	movl	1452(%esp), %eax
+	subl	(%eax), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	sbbl	4(%eax), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	sbbl	8(%eax), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	movl	%esi, %ecx
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	sbbl	12(%eax), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
 	movl	%edi, %ecx
-	testb	%cl, %cl
-	jne	.LBB116_4
-# BB#3:
-	movl	%eax, 108(%esp)         # 4-byte Spill
-.LBB116_4:
-	movl	108(%esp), %eax         # 4-byte Reload
-	movl	%eax, 4(%edx)
-	movl	104(%esp), %eax         # 4-byte Reload
-	jne	.LBB116_6
-# BB#5:
-	movl	%ebp, %eax
-.LBB116_6:
-	movl	%eax, 8(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	jne	.LBB116_8
-# BB#7:
-	movl	%ebx, %ebp
-.LBB116_8:
-	movl	%ebp, 12(%edx)
-	movl	100(%esp), %ebx         # 4-byte Reload
-	jne	.LBB116_10
-# BB#9:
-	movl	68(%esp), %ebx          # 4-byte Reload
-.LBB116_10:
-	movl	%ebx, 16(%edx)
-	movl	80(%esp), %edi          # 4-byte Reload
-	jne	.LBB116_12
-# BB#11:
-	movl	72(%esp), %edi          # 4-byte Reload
-.LBB116_12:
-	movl	%edi, 20(%edx)
-	movl	88(%esp), %esi          # 4-byte Reload
-	jne	.LBB116_14
-# BB#13:
-	movl	92(%esp), %esi          # 4-byte Reload
-.LBB116_14:
-	movl	%esi, 24(%edx)
-	jne	.LBB116_16
-# BB#15:
-	movl	96(%esp), %eax          # 4-byte Reload
-.LBB116_16:
-	movl	%eax, 28(%edx)
-	addl	$428, %esp              # imm = 0x1AC
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	sbbl	16(%eax), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	%ebx, %ecx
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	sbbl	20(%eax), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	sbbl	24(%eax), %ebx
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	sbbl	28(%eax), %ecx
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	%ebp, %edx
+	sbbl	32(%eax), %edx
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	sbbl	36(%eax), %ebp
+	movl	20(%esp), %esi                  # 4-byte Reload
+	sbbl	40(%eax), %esi
+	movl	28(%esp), %edi                  # 4-byte Reload
+	sbbl	44(%eax), %edi
+	movl	%edi, %eax
+	sarl	$31, %eax
+	testl	%eax, %eax
+	js	.LBB61_1
+# %bb.2:
+	movl	1440(%esp), %eax
+	movl	%edi, 44(%eax)
+	js	.LBB61_3
+.LBB61_4:
+	movl	%esi, 40(%eax)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	js	.LBB61_5
+.LBB61_6:
+	movl	%ebp, 36(%eax)
+	movl	76(%esp), %esi                  # 4-byte Reload
+	js	.LBB61_7
+.LBB61_8:
+	movl	%edx, 32(%eax)
+	js	.LBB61_9
+.LBB61_10:
+	movl	%ecx, 28(%eax)
+	movl	60(%esp), %edx                  # 4-byte Reload
+	js	.LBB61_11
+.LBB61_12:
+	movl	%ebx, 24(%eax)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	js	.LBB61_13
+.LBB61_14:
+	movl	%ebx, 20(%eax)
+	movl	68(%esp), %ebx                  # 4-byte Reload
+	js	.LBB61_15
+.LBB61_16:
+	movl	%ebx, 16(%eax)
+	js	.LBB61_17
+.LBB61_18:
+	movl	%edi, 12(%eax)
+	js	.LBB61_19
+.LBB61_20:
+	movl	%esi, 8(%eax)
+	js	.LBB61_21
+.LBB61_22:
+	movl	%edx, 4(%eax)
+	jns	.LBB61_24
+.LBB61_23:
+	movl	40(%esp), %ecx                  # 4-byte Reload
+.LBB61_24:
+	movl	%ecx, (%eax)
+	addl	$1420, %esp                     # imm = 0x58C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end116:
-	.size	mcl_fp_montRed8L, .Lfunc_end116-mcl_fp_montRed8L
-
-	.globl	mcl_fp_addPre8L
-	.align	16, 0x90
-	.type	mcl_fp_addPre8L,@function
-mcl_fp_addPre8L:                        # @mcl_fp_addPre8L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	movl	20(%esp), %esi
-	addl	(%esi), %ecx
-	adcl	4(%esi), %edx
-	movl	8(%eax), %edi
-	adcl	8(%esi), %edi
-	movl	16(%esp), %ebx
-	movl	%ecx, (%ebx)
-	movl	12(%esi), %ecx
-	movl	%edx, 4(%ebx)
-	movl	16(%esi), %edx
-	adcl	12(%eax), %ecx
-	adcl	16(%eax), %edx
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%ecx, 12(%ebx)
-	movl	20(%esi), %ecx
-	adcl	%edi, %ecx
-	movl	24(%eax), %edi
-	movl	%edx, 16(%ebx)
-	movl	24(%esi), %edx
-	adcl	%edi, %edx
-	movl	%ecx, 20(%ebx)
-	movl	%edx, 24(%ebx)
-	movl	28(%eax), %eax
-	movl	28(%esi), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 28(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end117:
-	.size	mcl_fp_addPre8L, .Lfunc_end117-mcl_fp_addPre8L
-
-	.globl	mcl_fp_subPre8L
-	.align	16, 0x90
-	.type	mcl_fp_subPre8L,@function
-mcl_fp_subPre8L:                        # @mcl_fp_subPre8L
-# BB#0:
+.LBB61_1:
+	movl	28(%esp), %edi                  # 4-byte Reload
+	movl	1440(%esp), %eax
+	movl	%edi, 44(%eax)
+	jns	.LBB61_4
+.LBB61_3:
+	movl	20(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 40(%eax)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	jns	.LBB61_6
+.LBB61_5:
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 36(%eax)
+	movl	76(%esp), %esi                  # 4-byte Reload
+	jns	.LBB61_8
+.LBB61_7:
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 32(%eax)
+	jns	.LBB61_10
+.LBB61_9:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	60(%esp), %edx                  # 4-byte Reload
+	jns	.LBB61_12
+.LBB61_11:
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 24(%eax)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	jns	.LBB61_14
+.LBB61_13:
+	movl	56(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 20(%eax)
+	movl	68(%esp), %ebx                  # 4-byte Reload
+	jns	.LBB61_16
+.LBB61_15:
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 16(%eax)
+	jns	.LBB61_18
+.LBB61_17:
+	movl	44(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%eax)
+	jns	.LBB61_20
+.LBB61_19:
+	movl	48(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%eax)
+	jns	.LBB61_22
+.LBB61_21:
+	movl	36(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	js	.LBB61_23
+	jmp	.LBB61_24
+.Lfunc_end61:
+	.size	mcl_fp_montNF12L, .Lfunc_end61-mcl_fp_montNF12L
+                                        # -- End function
+	.globl	mcl_fp_montRed12L               # -- Begin function mcl_fp_montRed12L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed12L,@function
+mcl_fp_montRed12L:                      # @mcl_fp_montRed12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %esi
-	xorl	%eax, %eax
-	movl	28(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %esi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edi), %ebx
-	movl	20(%esp), %ebp
-	movl	%edx, (%ebp)
-	movl	12(%ecx), %edx
-	sbbl	12(%edi), %edx
-	movl	%esi, 4(%ebp)
-	movl	16(%ecx), %esi
-	sbbl	16(%edi), %esi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edi), %ebx
-	movl	%edx, 12(%ebp)
-	movl	20(%ecx), %edx
-	sbbl	%ebx, %edx
-	movl	24(%edi), %ebx
-	movl	%esi, 16(%ebp)
-	movl	24(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	%edx, 20(%ebp)
-	movl	%esi, 24(%ebp)
-	movl	28(%edi), %edx
-	movl	28(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 28(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
+	subl	$780, %esp                      # imm = 0x30C
+	calll	.L62$pb
+.L62$pb:
 	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end118:
-	.size	mcl_fp_subPre8L, .Lfunc_end118-mcl_fp_subPre8L
-
-	.globl	mcl_fp_shr1_8L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_8L,@function
-mcl_fp_shr1_8L:                         # @mcl_fp_shr1_8L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	8(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 4(%esi)
-	movl	12(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 8(%esi)
-	movl	16(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%esi)
-	movl	20(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 16(%esi)
+.Ltmp14:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp14-.L62$pb), %ebx
+	movl	808(%esp), %ecx
+	movl	44(%ecx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	36(%ecx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	32(%ecx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	28(%ecx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	24(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
+	movl	%ecx, %edx
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	movl	44(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	28(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
 	movl	24(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 20(%esi)
-	movl	28(%eax), %eax
-	shrdl	$1, %eax, %ecx
-	movl	%ecx, 24(%esi)
-	shrl	%eax
-	movl	%eax, 28(%esi)
-	popl	%esi
-	retl
-.Lfunc_end119:
-	.size	mcl_fp_shr1_8L, .Lfunc_end119-mcl_fp_shr1_8L
-
-	.globl	mcl_fp_add8L
-	.align	16, 0x90
-	.type	mcl_fp_add8L,@function
-mcl_fp_add8L:                           # @mcl_fp_add8L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$20, %esp
-	movl	48(%esp), %edi
-	movl	(%edi), %ecx
-	movl	4(%edi), %eax
-	movl	44(%esp), %edx
-	addl	(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	4(%edx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	8(%edi), %eax
-	adcl	8(%edx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	12(%edx), %esi
-	movl	16(%edx), %eax
-	adcl	12(%edi), %esi
-	adcl	16(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	20(%edx), %ecx
-	adcl	20(%edi), %ecx
-	movl	24(%edx), %ebx
-	adcl	24(%edi), %ebx
-	movl	28(%edx), %edi
-	movl	48(%esp), %edx
-	adcl	28(%edx), %edi
-	movl	40(%esp), %edx
-	movl	%ebp, (%edx)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%edx)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%edx)
-	movl	%esi, 12(%edx)
-	movl	%eax, 16(%edx)
-	movl	%ecx, 20(%edx)
-	movl	%ebx, 24(%edx)
-	movl	%edi, 28(%edx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	52(%esp), %edx
-	movl	8(%esp), %ebp           # 4-byte Reload
-	subl	(%edx), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	52(%esp), %edx
-	sbbl	4(%edx), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	52(%esp), %edx
-	sbbl	8(%edx), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp
-	sbbl	12(%ebp), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	4(%esp), %edx           # 4-byte Reload
-	sbbl	16(%ebp), %edx
-	movl	%edx, %esi
-	sbbl	20(%ebp), %ecx
-	sbbl	24(%ebp), %ebx
-	sbbl	28(%ebp), %edi
-	sbbl	$0, %eax
-	testb	$1, %al
-	jne	.LBB120_2
-# BB#1:                                 # %nocarry
-	movl	8(%esp), %edx           # 4-byte Reload
-	movl	40(%esp), %ebp
-	movl	%edx, (%ebp)
-	movl	16(%esp), %edx          # 4-byte Reload
-	movl	%edx, 4(%ebp)
-	movl	12(%esp), %edx          # 4-byte Reload
-	movl	%edx, 8(%ebp)
-	movl	(%esp), %eax            # 4-byte Reload
-	movl	%eax, 12(%ebp)
-	movl	%esi, 16(%ebp)
-	movl	%ecx, 20(%ebp)
-	movl	%ebx, 24(%ebp)
-	movl	%edi, 28(%ebp)
-.LBB120_2:                              # %carry
-	addl	$20, %esp
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	16(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	12(%eax), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	8(%eax), %esi
+	movl	(%eax), %ebp
+	movl	4(%eax), %edi
+	movl	-4(%edx), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	%ebp, %eax
+	imull	%ecx, %eax
+	leal	732(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	728(%esp), %ebp
+	adcl	732(%esp), %edi
+	adcl	736(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	740(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	744(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	748(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	752(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	756(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	760(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	764(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	768(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	movl	48(%eax), %eax
+	adcl	776(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edi, %eax
+	leal	676(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	720(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %ecx
+	addl	672(%esp), %edi
+	adcl	676(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	680(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	684(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	688(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	692(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	696(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	700(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	704(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	708(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	712(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	716(%esp), %edi
+	movl	804(%esp), %eax
+	adcl	52(%eax), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	620(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	664(%esp), %eax
+	adcl	$0, %eax
+	addl	616(%esp), %esi
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	620(%esp), %edx
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	624(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	628(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	632(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	636(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	640(%esp), %ebp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	644(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	648(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	652(%esp), %esi
+	adcl	656(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	660(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	56(%ecx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	564(%esp), %ecx
+	pushl	%eax
+	movl	816(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	608(%esp), %eax
+	adcl	$0, %eax
+	addl	560(%esp), %edi
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	564(%esp), %edi
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	568(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	572(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	576(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	580(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	584(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	588(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	592(%esp), %esi
+	movl	%esi, %ebp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	596(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	adcl	600(%esp), %esi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	604(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	60(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edi, %eax
+	leal	508(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	552(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %ecx
+	addl	504(%esp), %edi
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	508(%esp), %edx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	520(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	524(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	528(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	532(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	540(%esp), %esi
+	movl	%esi, %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	544(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	64(%eax), %ecx
+	movl	%ecx, %edi
+	setb	52(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %esi
+	leal	452(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 52(%esp)                  # 1-byte Folded Spill
+	movl	496(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %ecx
+	addl	448(%esp), %esi
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	452(%esp), %edx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	464(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	468(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	472(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	476(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	480(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	488(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	492(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	68(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	setb	36(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	396(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 36(%esp)                  # 1-byte Folded Spill
+	movl	440(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	392(%esp), %edi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	396(%esp), %ecx
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	400(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	404(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	408(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	412(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	416(%esp), %edi
+	adcl	420(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	432(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	72(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %ebp
+	leal	340(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	384(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	336(%esp), %ebp
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	340(%esp), %ecx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	356(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	360(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	364(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	368(%esp), %edi
+	adcl	372(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	376(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	380(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	76(%eax), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %esi
+	leal	284(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 20(%esp)                  # 1-byte Folded Spill
+	movl	328(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	280(%esp), %esi
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	284(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	296(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	300(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	304(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	308(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	316(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	320(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	324(%esp), %edi
+	movl	804(%esp), %eax
+	adcl	80(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	40(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, %eax
+	imull	%esi, %eax
+	leal	228(%esp), %ecx
+	pushl	%eax
+	movl	816(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 40(%esp)                  # 1-byte Folded Spill
+	movl	272(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	224(%esp), %esi
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	228(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	244(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	248(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	252(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	256(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	260(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	264(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	268(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	84(%eax), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	setb	63(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	%ebp, %eax
+	movl	%ebp, %edi
+	imull	%esi, %eax
+	leal	172(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 63(%esp)                  # 1-byte Folded Spill
+	movl	216(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	168(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	176(%esp), %ebp
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	180(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	184(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	188(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	192(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	196(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	200(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	204(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	208(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	212(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	88(%ecx), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	movl	%edi, %ecx
+	imull	%eax, %ecx
+	movl	%eax, %edi
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	pushl	%ecx
+	pushl	816(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	160(%esp), %esi
+	adcl	$0, %esi
+	addl	112(%esp), %edi
+	movl	%ebp, %eax
+	adcl	116(%esp), %eax
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	120(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	124(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	128(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	132(%esp), %edx
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	136(%esp), %edi
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	140(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	144(%esp), %ebp
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	148(%esp), %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	152(%esp), %ebx
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	156(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	92(%ecx), %esi
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	subl	88(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	sbbl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	movl	%esi, %edx
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	sbbl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	sbbl	80(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 80(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	84(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	%ebp, %ecx
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	sbbl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	16(%esp), %esi                  # 4-byte Reload
+	sbbl	96(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ebx, %edi
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	sbbl	100(%esp), %edi                 # 4-byte Folded Reload
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	sbbl	104(%esp), %ebp                 # 4-byte Folded Reload
+	movl	%edx, %eax
+	sbbl	108(%esp), %eax                 # 4-byte Folded Reload
+	movl	$0, %ebx
+	sbbl	%ebx, %ebx
+	testb	$1, %bl
+	jne	.LBB62_1
+# %bb.2:
+	movl	800(%esp), %edx
+	movl	%eax, 44(%edx)
+	jne	.LBB62_3
+.LBB62_4:
+	movl	%ebp, 40(%edx)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	jne	.LBB62_5
+.LBB62_6:
+	movl	%edi, 36(%edx)
+	movl	76(%esp), %ebx                  # 4-byte Reload
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	jne	.LBB62_7
+.LBB62_8:
+	movl	%esi, 32(%edx)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	jne	.LBB62_9
+.LBB62_10:
+	movl	%ecx, 28(%edx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB62_11
+.LBB62_12:
+	movl	%ecx, 24(%edx)
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB62_13
+.LBB62_14:
+	movl	%ebp, 20(%edx)
+	jne	.LBB62_15
+.LBB62_16:
+	movl	%ebx, 16(%edx)
+	jne	.LBB62_17
+.LBB62_18:
+	movl	%edi, 12(%edx)
+	jne	.LBB62_19
+.LBB62_20:
+	movl	%esi, 8(%edx)
+	jne	.LBB62_21
+.LBB62_22:
+	movl	%ecx, 4(%edx)
+	je	.LBB62_24
+.LBB62_23:
+	movl	44(%esp), %eax                  # 4-byte Reload
+.LBB62_24:
+	movl	%eax, (%edx)
+	addl	$780, %esp                      # imm = 0x30C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end120:
-	.size	mcl_fp_add8L, .Lfunc_end120-mcl_fp_add8L
-
-	.globl	mcl_fp_addNF8L
-	.align	16, 0x90
-	.type	mcl_fp_addNF8L,@function
-mcl_fp_addNF8L:                         # @mcl_fp_addNF8L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$56, %esp
-	movl	84(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edi
-	movl	80(%esp), %ebx
-	addl	(%ebx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	4(%ebx), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	28(%eax), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	20(%eax), %ebp
-	movl	16(%eax), %esi
-	movl	12(%eax), %edx
-	movl	8(%eax), %ecx
-	adcl	8(%ebx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	12(%ebx), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	16(%ebx), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	20(%ebx), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	24(%ebx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	28(%ebx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx
-	movl	24(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, %eax
-	subl	(%ebx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	4(%ebx), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%ebp, %eax
-	sbbl	8(%ebx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%ebx), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	sbbl	16(%ebx), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	20(%ebx), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%ebx), %ebp
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	28(%ebx), %esi
-	testl	%esi, %esi
-	js	.LBB121_2
-# BB#1:
-	movl	(%esp), %eax            # 4-byte Reload
-.LBB121_2:
-	movl	76(%esp), %ebx
-	movl	%eax, (%ebx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	js	.LBB121_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB121_4:
-	movl	%eax, 4(%ebx)
-	movl	40(%esp), %edx          # 4-byte Reload
-	movl	28(%esp), %edi          # 4-byte Reload
-	js	.LBB121_6
-# BB#5:
-	movl	8(%esp), %edi           # 4-byte Reload
-.LBB121_6:
-	movl	%edi, 8(%ebx)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB121_8
-# BB#7:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB121_8:
-	movl	%eax, 12(%ebx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB121_10
-# BB#9:
-	movl	16(%esp), %edx          # 4-byte Reload
-.LBB121_10:
-	movl	%edx, 16(%ebx)
-	js	.LBB121_12
-# BB#11:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB121_12:
-	movl	%ecx, 20(%ebx)
-	js	.LBB121_14
-# BB#13:
-	movl	%ebp, %eax
-.LBB121_14:
-	movl	%eax, 24(%ebx)
-	js	.LBB121_16
-# BB#15:
-	movl	%esi, %edi
-.LBB121_16:
-	movl	%edi, 28(%ebx)
-	addl	$56, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end121:
-	.size	mcl_fp_addNF8L, .Lfunc_end121-mcl_fp_addNF8L
-
-	.globl	mcl_fp_sub8L
-	.align	16, 0x90
-	.type	mcl_fp_sub8L,@function
-mcl_fp_sub8L:                           # @mcl_fp_sub8L
-# BB#0:
+.LBB62_1:
+	movl	%edx, %eax
+	movl	800(%esp), %edx
+	movl	%eax, 44(%edx)
+	je	.LBB62_4
+.LBB62_3:
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 40(%edx)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	je	.LBB62_6
+.LBB62_5:
+	movl	36(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 36(%edx)
+	movl	76(%esp), %ebx                  # 4-byte Reload
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	je	.LBB62_8
+.LBB62_7:
+	movl	16(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 32(%edx)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	je	.LBB62_10
+.LBB62_9:
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%edx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	je	.LBB62_12
+.LBB62_11:
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%edx)
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	je	.LBB62_14
+.LBB62_13:
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%edx)
+	je	.LBB62_16
+.LBB62_15:
+	movl	48(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 16(%edx)
+	je	.LBB62_18
+.LBB62_17:
+	movl	20(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%edx)
+	je	.LBB62_20
+.LBB62_19:
+	movl	28(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%edx)
+	je	.LBB62_22
+.LBB62_21:
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edx)
+	jne	.LBB62_23
+	jmp	.LBB62_24
+.Lfunc_end62:
+	.size	mcl_fp_montRed12L, .Lfunc_end62-mcl_fp_montRed12L
+                                        # -- End function
+	.globl	mcl_fp_montRedNF12L             # -- Begin function mcl_fp_montRedNF12L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF12L,@function
+mcl_fp_montRedNF12L:                    # @mcl_fp_montRedNF12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$28, %esp
-	movl	52(%esp), %esi
-	movl	(%esi), %ecx
-	movl	4(%esi), %eax
-	xorl	%ebx, %ebx
-	movl	56(%esp), %ebp
-	subl	(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	sbbl	4(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	8(%esi), %edx
-	sbbl	8(%ebp), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	16(%esi), %ecx
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	24(%esi), %edi
-	sbbl	24(%ebp), %edi
-	movl	28(%esi), %esi
-	sbbl	28(%ebp), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	48(%esp), %ebx
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, (%ebx)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%ebx)
-	movl	%edx, 8(%ebx)
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	%edx, 12(%ebx)
-	movl	%ecx, 16(%ebx)
-	movl	%eax, 20(%ebx)
-	movl	%edi, 24(%ebx)
-	movl	%esi, 28(%ebx)
-	je	.LBB122_2
-# BB#1:                                 # %carry
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	60(%esp), %esi
-	movl	16(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	adcl	8(%esi), %ebp
-	movl	12(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	%eax, 20(%ebx)
-	movl	24(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 24(%ebx)
-	movl	28(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-.LBB122_2:                              # %nocarry
-	addl	$28, %esp
-	popl	%esi
-	popl	%edi
+	subl	$780, %esp                      # imm = 0x30C
+	calll	.L63$pb
+.L63$pb:
 	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end122:
-	.size	mcl_fp_sub8L, .Lfunc_end122-mcl_fp_sub8L
-
-	.globl	mcl_fp_subNF8L
-	.align	16, 0x90
-	.type	mcl_fp_subNF8L,@function
-mcl_fp_subNF8L:                         # @mcl_fp_subNF8L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$40, %esp
-	movl	64(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %edx
-	movl	68(%esp), %ecx
-	subl	(%ecx), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	sbbl	4(%ecx), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	28(%eax), %edx
-	movl	24(%eax), %esi
-	movl	20(%eax), %edi
-	movl	16(%eax), %ebx
-	movl	12(%eax), %ebp
-	movl	8(%eax), %eax
-	sbbl	8(%ecx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	12(%ecx), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	sbbl	16(%ecx), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	sbbl	20(%ecx), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	sbbl	24(%ecx), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
+.Ltmp15:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp15-.L63$pb), %ebx
+	movl	808(%esp), %ecx
+	movl	44(%ecx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	36(%ecx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	32(%ecx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	28(%ecx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	24(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
+	movl	%ecx, %edx
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	movl	44(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	16(%eax), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	12(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	8(%eax), %esi
+	movl	(%eax), %ebp
+	movl	4(%eax), %edi
+	movl	-4(%edx), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	%ebp, %eax
+	imull	%ecx, %eax
+	leal	732(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addl	728(%esp), %ebp
+	adcl	732(%esp), %edi
+	adcl	736(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	740(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	744(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	748(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	752(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	756(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	760(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	764(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	768(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	movl	48(%eax), %eax
+	adcl	776(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	setb	32(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edi, %eax
+	leal	676(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 32(%esp)                  # 1-byte Folded Spill
+	movl	720(%esp), %eax
+	adcl	$0, %eax
+	addl	672(%esp), %edi
+	adcl	676(%esp), %esi
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	680(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	684(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	688(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	692(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	696(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	700(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	704(%esp), %edi
+	adcl	708(%esp), %ebp
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	712(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	716(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	52(%ecx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	setb	48(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	620(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 48(%esp)                  # 1-byte Folded Spill
+	movl	664(%esp), %eax
+	adcl	$0, %eax
+	addl	616(%esp), %esi
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	620(%esp), %edx
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	624(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	628(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	632(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	636(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	640(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	644(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	adcl	648(%esp), %ebp
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	652(%esp), %esi
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	656(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	660(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	56(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	36(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
 	movl	%edx, %edi
-	sbbl	28(%ecx), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	sarl	$31, %edi
-	movl	72(%esp), %ebp
-	movl	28(%ebp), %eax
-	andl	%edi, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	24(%ebp), %eax
-	andl	%edi, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	20(%ebp), %ebx
-	andl	%edi, %ebx
-	movl	16(%ebp), %esi
-	andl	%edi, %esi
-	movl	12(%ebp), %edx
-	andl	%edi, %edx
-	movl	8(%ebp), %ecx
-	andl	%edi, %ecx
-	movl	4(%ebp), %eax
-	andl	%edi, %eax
-	andl	(%ebp), %edi
-	addl	24(%esp), %edi          # 4-byte Folded Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	60(%esp), %ebp
-	movl	%edi, (%ebp)
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 4(%ebp)
-	adcl	12(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebp)
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 12(%ebp)
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 16(%ebp)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, 20(%ebp)
-	movl	%eax, 24(%ebp)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ebp)
-	addl	$40, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end123:
-	.size	mcl_fp_subNF8L, .Lfunc_end123-mcl_fp_subNF8L
-
-	.globl	mcl_fpDbl_add8L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add8L,@function
-mcl_fpDbl_add8L:                        # @mcl_fpDbl_add8L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$56, %esp
-	movl	84(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edx
-	movl	80(%esp), %ebp
-	addl	(%ebp), %esi
-	adcl	4(%ebp), %edx
-	movl	8(%ecx), %edi
-	adcl	8(%ebp), %edi
-	movl	12(%ebp), %ebx
-	movl	76(%esp), %eax
-	movl	%esi, (%eax)
-	movl	16(%ebp), %esi
-	adcl	12(%ecx), %ebx
-	adcl	16(%ecx), %esi
-	movl	%edx, 4(%eax)
-	movl	40(%ecx), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%edi, 8(%eax)
-	movl	20(%ecx), %edx
-	movl	%ebx, 12(%eax)
-	movl	20(%ebp), %edi
-	adcl	%edx, %edi
-	movl	24(%ecx), %edx
-	movl	%esi, 16(%eax)
-	movl	24(%ebp), %esi
-	adcl	%edx, %esi
-	movl	28(%ecx), %edx
-	movl	%edi, 20(%eax)
-	movl	28(%ebp), %ebx
-	adcl	%edx, %ebx
-	movl	32(%ecx), %edx
-	movl	%esi, 24(%eax)
-	movl	32(%ebp), %esi
-	adcl	%edx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	36(%ecx), %edx
-	movl	%ebx, 28(%eax)
-	movl	36(%ebp), %ebx
-	adcl	%edx, %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	40(%ebp), %eax
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	44(%ebp), %edi
-	adcl	%edx, %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	48(%ecx), %edx
-	movl	48(%ebp), %eax
-	adcl	%edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	52(%ecx), %edx
-	movl	52(%ebp), %esi
-	adcl	%edx, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	56(%ecx), %edx
-	movl	56(%ebp), %eax
-	adcl	%edx, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%ecx), %ecx
-	movl	60(%ebp), %ebp
-	adcl	%ecx, %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	44(%esp), %eax          # 4-byte Reload
-	movl	88(%esp), %edx
-	subl	(%edx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	88(%esp), %eax
-	sbbl	4(%eax), %ebx
+	leal	564(%esp), %ecx
+	pushl	%eax
+	movl	816(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 36(%esp)                  # 1-byte Folded Spill
+	movl	608(%esp), %eax
+	adcl	$0, %eax
+	addl	560(%esp), %edi
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	564(%esp), %edi
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	568(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	572(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	576(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	580(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	584(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	588(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	adcl	592(%esp), %esi
+	movl	%esi, %ebp
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	596(%esp), %esi
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	600(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	604(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	60(%ecx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	setb	40(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edi, %eax
+	leal	508(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 40(%esp)                  # 1-byte Folded Spill
+	movl	552(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %ecx
+	addl	504(%esp), %edi
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	508(%esp), %edx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	520(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	524(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	528(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	532(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	adcl	536(%esp), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	540(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	544(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	64(%eax), %ecx
+	movl	%ecx, %ebp
+	setb	32(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	452(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 32(%esp)                  # 1-byte Folded Spill
+	movl	496(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	448(%esp), %edi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	452(%esp), %ecx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	464(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	468(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	472(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	476(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	480(%esp), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	488(%esp), %esi
+	adcl	492(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	68(%eax), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	setb	28(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %ebp
+	leal	396(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 28(%esp)                  # 1-byte Folded Spill
+	movl	440(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	392(%esp), %ebp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	396(%esp), %ecx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	400(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	404(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	408(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	412(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	416(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	420(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	428(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	432(%esp), %edi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	72(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, %eax
+	imull	%ecx, %eax
+	movl	%ecx, %esi
+	leal	340(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 12(%esp)                  # 1-byte Folded Spill
+	movl	384(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	336(%esp), %esi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	340(%esp), %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	356(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	360(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	364(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	368(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	372(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	376(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	380(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	76(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	setb	63(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	imull	%ecx, %ebp
+	movl	%ecx, %esi
+	leal	284(%esp), %ecx
+	pushl	%ebp
+	movl	816(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 63(%esp)                  # 1-byte Folded Spill
+	movl	328(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	280(%esp), %esi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	284(%esp), %ecx
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	292(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	296(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	300(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	304(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	308(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	316(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	320(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	324(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	80(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	setb	32(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %esi
+	leal	228(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 32(%esp)                  # 1-byte Folded Spill
+	movl	272(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	224(%esp), %esi
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	228(%esp), %ecx
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	232(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	240(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	244(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	248(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	252(%esp), %esi
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	256(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	260(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	268(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	804(%esp), %eax
+	adcl	84(%eax), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %ebp
+	leal	172(%esp), %ecx
+	pushl	%eax
+	pushl	816(%esp)
+	pushl	%ecx
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 12(%esp)                  # 1-byte Folded Spill
+	movl	216(%esp), %eax
+	adcl	$0, %eax
 	movl	%eax, %edx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
+	addl	168(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	176(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	180(%esp), %ebp
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	184(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	188(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	192(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	adcl	196(%esp), %edi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	200(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	204(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	208(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	212(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ecx
+	adcl	88(%ecx), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	setb	32(%esp)                        # 1-byte Folded Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	imull	%eax, %ecx
+	movl	%eax, %esi
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	pushl	%ecx
+	pushl	816(%esp)
+	pushl	%eax
+	calll	mulPv384x32@PLT
+	addl	$12, %esp
+	addb	$255, 32(%esp)                  # 1-byte Folded Spill
+	movl	160(%esp), %edx
+	adcl	$0, %edx
+	addl	112(%esp), %esi
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	116(%esp), %ecx
+	adcl	120(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	124(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	128(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	132(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	136(%esp), %edi
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	140(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	144(%esp), %ebp
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	148(%esp), %ebx
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	152(%esp), %eax
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	156(%esp), %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	804(%esp), %ebx
+	adcl	92(%ebx), %edx
+	movl	%eax, %ebx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	subl	88(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	sbbl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	sbbl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	sbbl	80(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 80(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	84(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	%ebp, %ecx
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	sbbl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	28(%esp), %esi                  # 4-byte Reload
+	sbbl	96(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	movl	%ebx, %edi
+	sbbl	100(%esp), %edi                 # 4-byte Folded Reload
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	sbbl	104(%esp), %ebp                 # 4-byte Folded Reload
 	movl	%edx, %ebx
-	sbbl	8(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	movl	24(%esp), %edi          # 4-byte Reload
-	sbbl	12(%ebx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	sbbl	16(%ebx), %eax
-	sbbl	20(%ebx), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	sbbl	24(%ebx), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	sbbl	28(%ebx), %ebp
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB124_2
-# BB#1:
-	movl	%eax, %edi
-.LBB124_2:
-	testb	%cl, %cl
-	movl	44(%esp), %ecx          # 4-byte Reload
-	jne	.LBB124_4
-# BB#3:
-	movl	(%esp), %ecx            # 4-byte Reload
-.LBB124_4:
-	movl	76(%esp), %eax
-	movl	%ecx, 32(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	32(%esp), %edx          # 4-byte Reload
-	movl	48(%esp), %esi          # 4-byte Reload
-	movl	28(%esp), %ebx          # 4-byte Reload
-	jne	.LBB124_6
-# BB#5:
-	movl	4(%esp), %ebx           # 4-byte Reload
-.LBB124_6:
-	movl	%ebx, 36(%eax)
-	jne	.LBB124_8
-# BB#7:
-	movl	8(%esp), %esi           # 4-byte Reload
-.LBB124_8:
-	movl	%esi, 40(%eax)
-	movl	36(%esp), %esi          # 4-byte Reload
-	jne	.LBB124_10
-# BB#9:
-	movl	12(%esp), %edx          # 4-byte Reload
-.LBB124_10:
+	sbbl	108(%esp), %edx                 # 4-byte Folded Reload
+	movl	%edx, %eax
+	sarl	$31, %eax
+	testl	%eax, %eax
+	js	.LBB63_1
+# %bb.2:
+	movl	800(%esp), %eax
 	movl	%edx, 44(%eax)
-	movl	%edi, 48(%eax)
-	movl	52(%esp), %edx          # 4-byte Reload
-	jne	.LBB124_12
-# BB#11:
-	movl	16(%esp), %esi          # 4-byte Reload
-.LBB124_12:
-	movl	%esi, 52(%eax)
-	jne	.LBB124_14
-# BB#13:
-	movl	20(%esp), %edx          # 4-byte Reload
-.LBB124_14:
-	movl	%edx, 56(%eax)
-	jne	.LBB124_16
-# BB#15:
-	movl	%ebp, %ecx
-.LBB124_16:
-	movl	%ecx, 60(%eax)
-	addl	$56, %esp
+	js	.LBB63_3
+.LBB63_4:
+	movl	%ebp, 40(%eax)
+	movl	64(%esp), %edx                  # 4-byte Reload
+	js	.LBB63_5
+.LBB63_6:
+	movl	%edi, 36(%eax)
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	js	.LBB63_7
+.LBB63_8:
+	movl	%esi, 32(%eax)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	movl	76(%esp), %ebx                  # 4-byte Reload
+	js	.LBB63_9
+.LBB63_10:
+	movl	%ecx, 28(%eax)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	js	.LBB63_11
+.LBB63_12:
+	movl	%ecx, 24(%eax)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	js	.LBB63_13
+.LBB63_14:
+	movl	%ebp, 20(%eax)
+	js	.LBB63_15
+.LBB63_16:
+	movl	%ebx, 16(%eax)
+	js	.LBB63_17
+.LBB63_18:
+	movl	%edi, 12(%eax)
+	js	.LBB63_19
+.LBB63_20:
+	movl	%esi, 8(%eax)
+	js	.LBB63_21
+.LBB63_22:
+	movl	%edx, 4(%eax)
+	jns	.LBB63_24
+.LBB63_23:
+	movl	40(%esp), %ecx                  # 4-byte Reload
+.LBB63_24:
+	movl	%ecx, (%eax)
+	addl	$780, %esp                      # imm = 0x30C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end124:
-	.size	mcl_fpDbl_add8L, .Lfunc_end124-mcl_fpDbl_add8L
-
-	.globl	mcl_fpDbl_sub8L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub8L,@function
-mcl_fpDbl_sub8L:                        # @mcl_fpDbl_sub8L
-# BB#0:
+.LBB63_1:
+	movl	%ebx, %edx
+	movl	800(%esp), %eax
+	movl	%edx, 44(%eax)
+	jns	.LBB63_4
+.LBB63_3:
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 40(%eax)
+	movl	64(%esp), %edx                  # 4-byte Reload
+	jns	.LBB63_6
+.LBB63_5:
+	movl	48(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 36(%eax)
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	jns	.LBB63_8
+.LBB63_7:
+	movl	28(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 32(%eax)
+	movl	72(%esp), %edi                  # 4-byte Reload
+	movl	76(%esp), %ebx                  # 4-byte Reload
+	jns	.LBB63_10
+.LBB63_9:
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB63_12
+.LBB63_11:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB63_14
+.LBB63_13:
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%eax)
+	jns	.LBB63_16
+.LBB63_15:
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 16(%eax)
+	jns	.LBB63_18
+.LBB63_17:
+	movl	44(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%eax)
+	jns	.LBB63_20
+.LBB63_19:
+	movl	20(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%eax)
+	jns	.LBB63_22
+.LBB63_21:
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	js	.LBB63_23
+	jmp	.LBB63_24
+.Lfunc_end63:
+	.size	mcl_fp_montRedNF12L, .Lfunc_end63-mcl_fp_montRedNF12L
+                                        # -- End function
+	.globl	mcl_fp_addPre12L                # -- Begin function mcl_fp_addPre12L
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre12L,@function
+mcl_fp_addPre12L:                       # @mcl_fp_addPre12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$40, %esp
-	movl	64(%esp), %edi
+	subl	$36, %esp
+	movl	60(%esp), %edi
 	movl	(%edi), %eax
-	movl	4(%edi), %edx
-	movl	68(%esp), %ebx
-	subl	(%ebx), %eax
-	sbbl	4(%ebx), %edx
-	movl	8(%edi), %esi
-	sbbl	8(%ebx), %esi
-	movl	60(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%edi), %eax
-	sbbl	12(%ebx), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%edi), %edx
-	sbbl	16(%ebx), %edx
-	movl	%esi, 8(%ecx)
-	movl	20(%ebx), %esi
-	movl	%eax, 12(%ecx)
-	movl	20(%edi), %eax
-	sbbl	%esi, %eax
-	movl	24(%ebx), %esi
-	movl	%edx, 16(%ecx)
-	movl	24(%edi), %edx
-	sbbl	%esi, %edx
-	movl	28(%ebx), %esi
-	movl	%eax, 20(%ecx)
+	movl	4(%edi), %ecx
+	movl	64(%esp), %esi
+	addl	(%esi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	4(%esi), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	44(%edi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	40(%edi), %ebx
+	movl	36(%edi), %ebp
+	movl	32(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
 	movl	28(%edi), %eax
-	sbbl	%esi, %eax
-	movl	32(%ebx), %esi
-	movl	%edx, 24(%ecx)
-	movl	32(%edi), %edx
-	sbbl	%esi, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	36(%ebx), %edx
-	movl	%eax, 28(%ecx)
-	movl	36(%edi), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	40(%ebx), %eax
-	movl	40(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	44(%ebx), %eax
-	movl	44(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	48(%ebx), %eax
-	movl	48(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	52(%ebx), %eax
-	movl	52(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	56(%ebx), %eax
-	movl	56(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	60(%ebx), %eax
-	movl	60(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	72(%esp), %ebx
-	jne	.LBB125_1
-# BB#2:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB125_3
-.LBB125_1:
-	movl	28(%ebx), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-.LBB125_3:
-	testb	%al, %al
-	jne	.LBB125_4
-# BB#5:
-	movl	$0, %ebp
-	movl	$0, %eax
-	jmp	.LBB125_6
-.LBB125_4:
-	movl	(%ebx), %eax
-	movl	4(%ebx), %ebp
-.LBB125_6:
-	jne	.LBB125_7
-# BB#8:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB125_9
-.LBB125_7:
-	movl	24(%ebx), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-.LBB125_9:
-	jne	.LBB125_10
-# BB#11:
-	movl	$0, %edx
-	jmp	.LBB125_12
-.LBB125_10:
-	movl	20(%ebx), %edx
-.LBB125_12:
-	jne	.LBB125_13
-# BB#14:
-	movl	$0, %esi
-	jmp	.LBB125_15
-.LBB125_13:
-	movl	16(%ebx), %esi
-.LBB125_15:
-	jne	.LBB125_16
-# BB#17:
-	movl	$0, %edi
-	jmp	.LBB125_18
-.LBB125_16:
-	movl	12(%ebx), %edi
-.LBB125_18:
-	jne	.LBB125_19
-# BB#20:
-	xorl	%ebx, %ebx
-	jmp	.LBB125_21
-.LBB125_19:
-	movl	8(%ebx), %ebx
-.LBB125_21:
-	addl	16(%esp), %eax          # 4-byte Folded Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	adcl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 36(%ecx)
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 40(%ecx)
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 44(%ecx)
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 48(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 52(%ecx)
-	movl	%eax, 56(%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%ecx)
-	addl	$40, %esp
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%edi), %edx
+	movl	20(%edi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	16(%edi), %ecx
+	movl	12(%edi), %eax
+	movl	8(%edi), %edi
+	adcl	8(%esi), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	adcl	12(%esi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	16(%esi), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %edi                    # 4-byte Reload
+	adcl	20(%esi), %edi
+	adcl	24(%esi), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	28(%esi), %edx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	32(%esi), %eax
+	adcl	36(%esi), %ebp
+	adcl	40(%esi), %ebx
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	44(%esi), %ecx
+	movl	56(%esp), %esi
+	movl	%ebx, 40(%esi)
+	movl	%ebp, 36(%esi)
+	movl	%eax, 32(%esi)
+	movl	%edx, 28(%esi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 24(%esi)
+	movl	%edi, 20(%esi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%esi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%esi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%esi)
+	movl	%ecx, 44(%esi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%esi)
+	setb	%al
+	movzbl	%al, %eax
+	addl	$36, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end125:
-	.size	mcl_fpDbl_sub8L, .Lfunc_end125-mcl_fpDbl_sub8L
-
-	.align	16, 0x90
-	.type	.LmulPv288x32,@function
-.LmulPv288x32:                          # @mulPv288x32
-# BB#0:
+.Lfunc_end64:
+	.size	mcl_fp_addPre12L, .Lfunc_end64-mcl_fp_addPre12L
+                                        # -- End function
+	.globl	mcl_fp_subPre12L                # -- Begin function mcl_fp_subPre12L
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre12L,@function
+mcl_fp_subPre12L:                       # @mcl_fp_subPre12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$56, %esp
-	movl	%edx, %esi
-	movl	76(%esp), %edi
-	movl	%edi, %eax
-	mull	32(%esi)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	28(%esi)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	24(%esi)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	20(%esi)
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	16(%esi)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	12(%esi)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	8(%esi)
-	movl	%edx, %ebx
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	4(%esi)
-	movl	%edx, %ebp
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%edi, %eax
-	mull	(%esi)
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 8(%ecx)
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 12(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 36(%ecx)
-	movl	%ecx, %eax
-	addl	$56, %esp
+	subl	$44, %esp
+	movl	68(%esp), %ebx
+	movl	(%ebx), %ecx
+	movl	4(%ebx), %eax
+	movl	72(%esp), %edi
+	subl	(%edi), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%ebx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	40(%ebx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	36(%ebx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	32(%ebx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%ebx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%ebx), %edx
+	movl	20(%ebx), %ebp
+	movl	16(%ebx), %ecx
+	movl	12(%ebx), %eax
+	movl	8(%ebx), %esi
+	sbbl	8(%edi), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	sbbl	12(%edi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	sbbl	16(%edi), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	sbbl	20(%edi), %ebp
+	sbbl	24(%edi), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	sbbl	28(%edi), %ebx
+	movl	8(%esp), %edx                   # 4-byte Reload
+	sbbl	32(%edi), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	36(%edi), %ecx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%edi), %eax
+	movl	32(%esp), %esi                  # 4-byte Reload
+	sbbl	44(%edi), %esi
+	movl	64(%esp), %edi
+	movl	%eax, 40(%edi)
+	movl	%ecx, 36(%edi)
+	movl	%edx, 32(%edi)
+	movl	%ebx, 28(%edi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 24(%edi)
+	movl	%ebp, 20(%edi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edi)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	movl	%esi, 44(%edi)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%edi)
+	movl	$0, %eax
+	sbbl	%eax, %eax
+	andl	$1, %eax
+	addl	$44, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end126:
-	.size	.LmulPv288x32, .Lfunc_end126-.LmulPv288x32
-
-	.globl	mcl_fp_mulUnitPre9L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre9L,@function
-mcl_fp_mulUnitPre9L:                    # @mcl_fp_mulUnitPre9L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
+.Lfunc_end65:
+	.size	mcl_fp_subPre12L, .Lfunc_end65-mcl_fp_subPre12L
+                                        # -- End function
+	.globl	mcl_fp_shr1_12L                 # -- Begin function mcl_fp_shr1_12L
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_12L,@function
+mcl_fp_shr1_12L:                        # @mcl_fp_shr1_12L
+# %bb.0:
 	pushl	%esi
-	subl	$76, %esp
-	calll	.L127$pb
-.L127$pb:
-	popl	%ebx
-.Ltmp8:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp8-.L127$pb), %ebx
-	movl	104(%esp), %eax
-	movl	%eax, (%esp)
-	leal	32(%esp), %ecx
-	movl	100(%esp), %edx
-	calll	.LmulPv288x32
-	movl	68(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi
-	movl	48(%esp), %ebx
-	movl	44(%esp), %ebp
-	movl	40(%esp), %esi
-	movl	32(%esp), %edx
-	movl	36(%esp), %ecx
-	movl	96(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%ebp, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%edi, 20(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	addl	$76, %esp
+	movl	12(%esp), %eax
+	movl	44(%eax), %edx
+	movl	%edx, %esi
+	shrl	%esi
+	movl	8(%esp), %ecx
+	movl	%esi, 44(%ecx)
+	movl	40(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 40(%ecx)
+	movl	36(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 36(%ecx)
+	movl	32(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 32(%ecx)
+	movl	28(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 28(%ecx)
+	movl	24(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 24(%ecx)
+	movl	20(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 20(%ecx)
+	movl	16(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 16(%ecx)
+	movl	12(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 12(%ecx)
+	movl	8(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 8(%ecx)
+	movl	4(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 4(%ecx)
+	movl	(%eax), %eax
+	shrdl	$1, %edx, %eax
+	movl	%eax, (%ecx)
 	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
 	retl
-.Lfunc_end127:
-	.size	mcl_fp_mulUnitPre9L, .Lfunc_end127-mcl_fp_mulUnitPre9L
-
-	.globl	mcl_fpDbl_mulPre9L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre9L,@function
-mcl_fpDbl_mulPre9L:                     # @mcl_fpDbl_mulPre9L
-# BB#0:
+.Lfunc_end66:
+	.size	mcl_fp_shr1_12L, .Lfunc_end66-mcl_fp_shr1_12L
+                                        # -- End function
+	.globl	mcl_fp_add12L                   # -- Begin function mcl_fp_add12L
+	.p2align	4, 0x90
+	.type	mcl_fp_add12L,@function
+mcl_fp_add12L:                          # @mcl_fp_add12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$428, %esp              # imm = 0x1AC
-	calll	.L128$pb
-.L128$pb:
-	popl	%esi
-.Ltmp9:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp9-.L128$pb), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	456(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	%edx, %ebp
-	movl	%esi, %ebx
-	calll	.LmulPv288x32
-	movl	420(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	404(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	400(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	396(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	392(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	388(%esp), %edi
-	movl	448(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	456(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	%ebp, %edx
-	movl	%esi, %ebx
-	calll	.LmulPv288x32
-	addl	344(%esp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	364(%esp), %ebx
-	movl	360(%esp), %edi
-	movl	356(%esp), %esi
-	movl	348(%esp), %ecx
-	movl	352(%esp), %edx
-	movl	448(%esp), %eax
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 4(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	304(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	340(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	336(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	324(%esp), %edi
-	movl	320(%esp), %ebp
-	movl	316(%esp), %esi
-	movl	308(%esp), %ecx
-	movl	312(%esp), %edx
-	movl	448(%esp), %eax
-	movl	40(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	264(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	300(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	296(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	292(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	288(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	284(%esp), %ebx
-	movl	280(%esp), %edi
-	movl	276(%esp), %esi
-	movl	268(%esp), %ecx
-	movl	272(%esp), %edx
-	movl	448(%esp), %eax
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	addl	224(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	248(%esp), %ebx
-	movl	244(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	240(%esp), %edi
-	movl	236(%esp), %ebp
-	movl	228(%esp), %ecx
-	movl	232(%esp), %edx
-	movl	448(%esp), %eax
-	movl	44(%esp), %esi          # 4-byte Reload
-	movl	%esi, 16(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	44(%esp), %eax          # 4-byte Reload
-	addl	184(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	220(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	204(%esp), %edi
-	movl	200(%esp), %ebx
-	movl	196(%esp), %esi
-	movl	188(%esp), %ecx
-	movl	192(%esp), %edx
-	movl	448(%esp), %eax
-	movl	44(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 20(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	addl	144(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	164(%esp), %ebx
-	movl	160(%esp), %edi
-	movl	156(%esp), %esi
-	movl	148(%esp), %ecx
-	movl	152(%esp), %edx
-	movl	448(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	104(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	12(%esp), %esi          # 4-byte Reload
-	addl	104(%esp), %esi
-	movl	140(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	136(%esp), %ebp
-	movl	132(%esp), %edi
-	movl	128(%esp), %ebx
-	movl	124(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	116(%esp), %edx
-	movl	108(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx
-	movl	448(%esp), %eax
-	movl	%esi, 28(%eax)
-	movl	12(%esp), %esi          # 4-byte Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	456(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	452(%esp), %edx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	%esi, %ebp
-	addl	64(%esp), %ebp
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	68(%esp), %edx
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi
-	movl	84(%esp), %ebx
-	movl	80(%esp), %esi
-	movl	76(%esp), %eax
-	movl	448(%esp), %ecx
-	movl	%ebp, 32(%ecx)
-	movl	%edx, 36(%ecx)
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	52(%esp), %edx          # 4-byte Reload
-	movl	%edx, 40(%ecx)
-	adcl	48(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 48(%ecx)
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 52(%ecx)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 56(%ecx)
-	movl	%eax, 60(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 68(%ecx)
-	addl	$428, %esp              # imm = 0x1AC
+	subl	$48, %esp
+	movl	72(%esp), %edi
+	movl	(%edi), %ecx
+	movl	4(%edi), %eax
+	movl	76(%esp), %ebx
+	addl	(%ebx), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	4(%ebx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	44(%edi), %ebp
+	movl	40(%edi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	36(%edi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	32(%edi), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	28(%edi), %eax
+	movl	24(%edi), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	20(%edi), %ecx
+	movl	16(%edi), %esi
+	movl	12(%edi), %edx
+	movl	8(%edi), %edi
+	adcl	8(%ebx), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	12(%ebx), %edx
+	adcl	16(%ebx), %esi
+	adcl	20(%ebx), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	24(%ebx), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	28(%ebx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	32(%ebx), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	36(%ebx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	40(%ebx), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	44(%ebx), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ebx
+	movl	%ebp, 44(%ebx)
+	movl	%ecx, 40(%ebx)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%eax, 36(%ebx)
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	%edi, 32(%ebx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%ebx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 24(%ebx)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 20(%ebx)
+	movl	%esi, 16(%ebx)
+	movl	%edx, 12(%ebx)
+	movl	%ecx, 8(%ebx)
+	movl	20(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 4(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%ebx)
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	80(%esp), %ebp
+	subl	(%ebp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	sbbl	4(%ebp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	sbbl	8(%ebp), %ecx
+	movl	%ecx, %edi
+	sbbl	12(%ebp), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	sbbl	16(%ebp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	20(%ebp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	24(%ebp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%ebp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %esi                  # 4-byte Reload
+	sbbl	32(%ebp), %esi
+	movl	32(%esp), %edx                  # 4-byte Reload
+	sbbl	36(%ebp), %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	sbbl	40(%ebp), %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	sbbl	44(%ebp), %eax
+	movl	%eax, %ebp
+	movzbl	3(%esp), %eax                   # 1-byte Folded Reload
+	sbbl	$0, %eax
+	testb	$1, %al
+	jne	.LBB67_2
+# %bb.1:                                # %nocarry
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%ebx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ebx)
+	movl	%edi, 8(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%ebx)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 20(%ebx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 24(%ebx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%ebx)
+	movl	%esi, 32(%ebx)
+	movl	%edx, 36(%ebx)
+	movl	%ecx, 40(%ebx)
+	movl	%ebp, 44(%ebx)
+.LBB67_2:                               # %carry
+	addl	$48, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end128:
-	.size	mcl_fpDbl_mulPre9L, .Lfunc_end128-mcl_fpDbl_mulPre9L
-
-	.globl	mcl_fpDbl_sqrPre9L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre9L,@function
-mcl_fpDbl_sqrPre9L:                     # @mcl_fpDbl_sqrPre9L
-# BB#0:
+.Lfunc_end67:
+	.size	mcl_fp_add12L, .Lfunc_end67-mcl_fp_add12L
+                                        # -- End function
+	.globl	mcl_fp_addNF12L                 # -- Begin function mcl_fp_addNF12L
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF12L,@function
+mcl_fp_addNF12L:                        # @mcl_fp_addNF12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$428, %esp              # imm = 0x1AC
-	calll	.L129$pb
-.L129$pb:
-	popl	%ebx
-.Ltmp10:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp10-.L129$pb), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	452(%esp), %edx
-	movl	(%edx), %eax
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	%edx, %esi
-	movl	%ebx, %edi
-	calll	.LmulPv288x32
-	movl	420(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	404(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	400(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	396(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	392(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	388(%esp), %ebp
-	movl	448(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	4(%esi), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	%esi, %edx
-	movl	%edi, %ebx
-	calll	.LmulPv288x32
-	addl	344(%esp), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	364(%esp), %ebx
-	movl	360(%esp), %edi
-	movl	356(%esp), %esi
-	movl	348(%esp), %ecx
-	movl	352(%esp), %edx
-	movl	448(%esp), %eax
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 4(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	adcl	24(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	8(%edx), %eax
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	304(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	340(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	336(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	324(%esp), %edi
-	movl	320(%esp), %ebp
-	movl	316(%esp), %esi
-	movl	308(%esp), %ecx
-	movl	312(%esp), %edx
-	movl	448(%esp), %eax
-	movl	40(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	12(%edx), %eax
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	264(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	300(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	296(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	292(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	288(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	284(%esp), %ebx
-	movl	280(%esp), %edi
-	movl	276(%esp), %esi
-	movl	268(%esp), %ecx
-	movl	272(%esp), %edx
-	movl	448(%esp), %eax
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 20(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	16(%edx), %eax
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	addl	224(%esp), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	248(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	244(%esp), %edi
-	movl	240(%esp), %ebp
-	movl	236(%esp), %esi
-	movl	228(%esp), %ecx
-	movl	232(%esp), %edx
-	movl	448(%esp), %eax
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 16(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	20(%edx), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	184(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	220(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	204(%esp), %ebx
-	movl	200(%esp), %edi
-	movl	196(%esp), %esi
-	movl	188(%esp), %ecx
-	movl	192(%esp), %edx
-	movl	448(%esp), %eax
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	24(%edx), %eax
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	addl	144(%esp), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	164(%esp), %edi
-	movl	160(%esp), %ebp
-	movl	156(%esp), %esi
-	movl	148(%esp), %ecx
-	movl	152(%esp), %edx
-	movl	448(%esp), %eax
-	movl	24(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 24(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	28(%edx), %eax
-	movl	%eax, (%esp)
-	leal	104(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	4(%esp), %esi           # 4-byte Reload
-	addl	104(%esp), %esi
-	movl	140(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	136(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	132(%esp), %ebp
-	movl	128(%esp), %ebx
-	movl	124(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	120(%esp), %edi
-	movl	116(%esp), %edx
-	movl	108(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx
-	movl	448(%esp), %eax
-	movl	%esi, 28(%eax)
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	452(%esp), %edx
-	movl	32(%edx), %eax
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	60(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv288x32
-	movl	%esi, %ebp
-	addl	64(%esp), %ebp
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	68(%esp), %edx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
+	subl	$72, %esp
+	movl	100(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	4(%ecx), %esi
 	movl	96(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi
-	movl	84(%esp), %ebx
-	movl	80(%esp), %esi
-	movl	76(%esp), %eax
-	movl	448(%esp), %ecx
-	movl	%ebp, 32(%ecx)
-	movl	%edx, 36(%ecx)
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	movl	%edx, 40(%ecx)
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 48(%ecx)
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 52(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 56(%ecx)
-	movl	%eax, 60(%ecx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 68(%ecx)
-	addl	$428, %esp              # imm = 0x1AC
+	addl	(%eax), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	adcl	4(%eax), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	44(%ecx), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	40(%ecx), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	36(%ecx), %esi
+	movl	32(%ecx), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	movl	28(%ecx), %edi
+	movl	24(%ecx), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	20(%ecx), %edx
+	movl	16(%ecx), %ebp
+	movl	12(%ecx), %ebx
+	movl	8(%ecx), %ecx
+	adcl	8(%eax), %ecx
+	adcl	12(%eax), %ebx
+	adcl	16(%eax), %ebp
+	adcl	20(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	24(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	adcl	28(%eax), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edi                   # 4-byte Reload
+	adcl	32(%eax), %edi
+	movl	%edi, 8(%esp)                   # 4-byte Spill
+	adcl	36(%eax), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	40(%eax), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	44(%eax), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	104(%esp), %esi
+	movl	32(%esp), %edx                  # 4-byte Reload
+	subl	(%esi), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	sbbl	4(%esi), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	sbbl	8(%esi), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	sbbl	12(%esi), %ebx
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	sbbl	16(%esi), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	20(%esi), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	sbbl	24(%esi), %edi
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	sbbl	28(%esi), %ebx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	32(%esi), %eax
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	36(%esi), %ecx
+	movl	4(%esp), %edx                   # 4-byte Reload
+	sbbl	40(%esi), %edx
+	movl	(%esp), %ebp                    # 4-byte Reload
+	sbbl	44(%esi), %ebp
+	movl	%ebp, %esi
+	sarl	$31, %esi
+	testl	%esi, %esi
+	js	.LBB68_1
+# %bb.2:
+	movl	92(%esp), %esi
+	movl	%ebp, 44(%esi)
+	js	.LBB68_3
+.LBB68_4:
+	movl	%edx, 40(%esi)
+	js	.LBB68_5
+.LBB68_6:
+	movl	%ecx, 36(%esi)
+	movl	60(%esp), %edx                  # 4-byte Reload
+	js	.LBB68_7
+.LBB68_8:
+	movl	%eax, 32(%esi)
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	js	.LBB68_9
+.LBB68_10:
+	movl	%ebx, 28(%esi)
+	movl	68(%esp), %eax                  # 4-byte Reload
+	js	.LBB68_11
+.LBB68_12:
+	movl	%edi, 24(%esi)
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	movl	48(%esp), %edi                  # 4-byte Reload
+	js	.LBB68_13
+.LBB68_14:
+	movl	%edi, 20(%esi)
+	movl	56(%esp), %edi                  # 4-byte Reload
+	js	.LBB68_15
+.LBB68_16:
+	movl	%ebx, 16(%esi)
+	js	.LBB68_17
+.LBB68_18:
+	movl	%edi, 12(%esi)
+	js	.LBB68_19
+.LBB68_20:
+	movl	%edx, 8(%esi)
+	js	.LBB68_21
+.LBB68_22:
+	movl	%ecx, 4(%esi)
+	jns	.LBB68_24
+.LBB68_23:
+	movl	32(%esp), %eax                  # 4-byte Reload
+.LBB68_24:
+	movl	%eax, (%esi)
+	addl	$72, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end129:
-	.size	mcl_fpDbl_sqrPre9L, .Lfunc_end129-mcl_fpDbl_sqrPre9L
-
-	.globl	mcl_fp_mont9L
-	.align	16, 0x90
-	.type	mcl_fp_mont9L,@function
-mcl_fp_mont9L:                          # @mcl_fp_mont9L
-# BB#0:
+.LBB68_1:
+	movl	(%esp), %ebp                    # 4-byte Reload
+	movl	92(%esp), %esi
+	movl	%ebp, 44(%esi)
+	jns	.LBB68_4
+.LBB68_3:
+	movl	4(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 40(%esi)
+	jns	.LBB68_6
+.LBB68_5:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 36(%esi)
+	movl	60(%esp), %edx                  # 4-byte Reload
+	jns	.LBB68_8
+.LBB68_7:
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 32(%esi)
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB68_10
+.LBB68_9:
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 28(%esi)
+	movl	68(%esp), %eax                  # 4-byte Reload
+	jns	.LBB68_12
+.LBB68_11:
+	movl	12(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 24(%esi)
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	movl	48(%esp), %edi                  # 4-byte Reload
+	jns	.LBB68_14
+.LBB68_13:
+	movl	24(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 20(%esi)
+	movl	56(%esp), %edi                  # 4-byte Reload
+	jns	.LBB68_16
+.LBB68_15:
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 16(%esi)
+	jns	.LBB68_18
+.LBB68_17:
+	movl	40(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%esi)
+	jns	.LBB68_20
+.LBB68_19:
+	movl	44(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%esi)
+	jns	.LBB68_22
+.LBB68_21:
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%esi)
+	js	.LBB68_23
+	jmp	.LBB68_24
+.Lfunc_end68:
+	.size	mcl_fp_addNF12L, .Lfunc_end68-mcl_fp_addNF12L
+                                        # -- End function
+	.globl	mcl_fp_sub12L                   # -- Begin function mcl_fp_sub12L
+	.p2align	4, 0x90
+	.type	mcl_fp_sub12L,@function
+mcl_fp_sub12L:                          # @mcl_fp_sub12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$796, %esp              # imm = 0x31C
-	calll	.L130$pb
-.L130$pb:
-	popl	%ebx
-.Ltmp11:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp11-.L130$pb), %ebx
-	movl	828(%esp), %eax
-	movl	-4(%eax), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	752(%esp), %ebp
-	movl	756(%esp), %esi
-	movl	%ebp, %eax
-	imull	%edi, %eax
-	movl	788(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	784(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	780(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	776(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	772(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	768(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	764(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	760(%esp), %edi
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	712(%esp), %ebp
-	adcl	716(%esp), %esi
-	adcl	720(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	748(%esp), %ebp
+	subl	$48, %esp
+	movl	72(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %esi
+	movl	76(%esp), %eax
+	subl	(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	sbbl	4(%eax), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	44(%edx), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	40(%edx), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	36(%edx), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	32(%edx), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%edx), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	24(%edx), %esi
+	movl	20(%edx), %ebp
+	movl	16(%edx), %ebx
+	movl	12(%edx), %edi
+	movl	8(%edx), %ecx
+	sbbl	8(%eax), %ecx
+	sbbl	12(%eax), %edi
+	sbbl	16(%eax), %ebx
+	sbbl	20(%eax), %ebp
+	sbbl	24(%eax), %esi
+	movl	16(%esp), %edx                  # 4-byte Reload
+	sbbl	28(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	sbbl	32(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	sbbl	36(%eax), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	sbbl	40(%eax), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	sbbl	44(%eax), %edx
+	movl	$0, %eax
 	sbbl	%eax, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	672(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	64(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	672(%esp), %esi
-	adcl	676(%esp), %edi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	680(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	684(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	688(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	696(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	704(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	708(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	828(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	andl	$1, %ebp
-	addl	632(%esp), %esi
-	adcl	636(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	660(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	824(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	592(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	addl	592(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	616(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	624(%esp), %esi
-	adcl	628(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%edi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	andl	$1, %ebp
-	addl	552(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	580(%esp), %edi
-	adcl	584(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	824(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	512(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	512(%esp), %ecx
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	524(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	536(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	544(%esp), %edi
-	adcl	548(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	472(%esp), %ebp
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	480(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	484(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	488(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	492(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	496(%esp), %ebp
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	500(%esp), %esi
-	adcl	504(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	508(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	432(%esp), %ecx
-	movl	820(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	movl	32(%esp), %ecx          # 4-byte Reload
-	addl	432(%esp), %ecx
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	444(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	452(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	456(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	392(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	movl	%esi, %eax
-	andl	$1, %eax
-	addl	392(%esp), %ebp
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	396(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	400(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	404(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	408(%esp), %ebp
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	412(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	416(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	420(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	424(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	428(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	addl	352(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	364(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	372(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	384(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	312(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	316(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	320(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	324(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	328(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	332(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	336(%esp), %esi
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	340(%esp), %edi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	344(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	348(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %ebp
-	movl	824(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	272(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	292(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	296(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	308(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	movl	%edi, %ecx
-	andl	$1, %ecx
-	addl	232(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	240(%esp), %esi
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	244(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	260(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	192(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	192(%esp), %ecx
-	adcl	196(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	200(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	212(%esp), %esi
-	adcl	216(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	152(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	andl	$1, %ebp
-	addl	152(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	164(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	172(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	180(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	112(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	120(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	128(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	132(%esp), %ebp
-	adcl	136(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	72(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	andl	$1, %edi
-	addl	72(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	92(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	828(%esp), %ebx
-	subl	(%ebx), %eax
+	testb	$1, %al
+	movl	68(%esp), %eax
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%edx, 44(%eax)
+	movl	4(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 40(%eax)
+	movl	8(%esp), %edx                   # 4-byte Reload
+	movl	%edx, 36(%eax)
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 32(%eax)
+	movl	16(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 28(%eax)
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	%esi, 24(%eax)
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	%ebp, 20(%eax)
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	%ebx, 16(%eax)
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	%edi, 12(%eax)
 	movl	%ecx, %edx
-	sbbl	4(%ebx), %edx
+	movl	%ecx, 8(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	36(%esp), %esi                  # 4-byte Reload
+	movl	%esi, (%eax)
+	je	.LBB69_2
+# %bb.1:                                # %carry
 	movl	%esi, %ecx
-	sbbl	8(%ebx), %ecx
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	12(%ebx), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	sbbl	16(%ebx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	sbbl	20(%ebx), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	24(%ebx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	sbbl	28(%ebx), %esi
-	movl	60(%esp), %ebp          # 4-byte Reload
-	sbbl	32(%ebx), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	sbbl	$0, %edi
-	andl	$1, %edi
-	movl	%edi, %ebx
-	jne	.LBB130_2
-# BB#1:
-	movl	%esi, 32(%esp)          # 4-byte Spill
-.LBB130_2:
-	testb	%bl, %bl
-	movl	68(%esp), %esi          # 4-byte Reload
-	jne	.LBB130_4
-# BB#3:
-	movl	%eax, %esi
-.LBB130_4:
-	movl	816(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	64(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_6
-# BB#5:
-	movl	%edx, %eax
-.LBB130_6:
-	movl	%eax, 4(%ebp)
-	movl	52(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_8
-# BB#7:
-	movl	%ecx, %eax
-.LBB130_8:
-	movl	%eax, 8(%ebp)
-	movl	44(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_10
-# BB#9:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB130_10:
-	movl	%eax, 12(%ebp)
-	jne	.LBB130_12
-# BB#11:
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-.LBB130_12:
-	movl	40(%esp), %eax          # 4-byte Reload
-	movl	%eax, 16(%ebp)
-	movl	36(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_14
-# BB#13:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB130_14:
-	movl	%eax, 20(%ebp)
-	movl	48(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_16
-# BB#15:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB130_16:
-	movl	%eax, 24(%ebp)
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 28(%ebp)
-	movl	60(%esp), %eax          # 4-byte Reload
-	jne	.LBB130_18
-# BB#17:
-	movl	56(%esp), %eax          # 4-byte Reload
-.LBB130_18:
-	movl	%eax, 32(%ebp)
-	addl	$796, %esp              # imm = 0x31C
+	movl	80(%esp), %ecx
+	addl	(%ecx), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	4(%ecx), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	adcl	8(%ecx), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	12(%ecx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	16(%ecx), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	20(%ecx), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	24(%ecx), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	28(%ecx), %ebx
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	32(%ecx), %edi
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	36(%ecx), %esi
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	40(%ecx), %edx
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	44(%ecx), %ebp
+	movl	%ebp, 44(%eax)
+	movl	%edx, 40(%eax)
+	movl	%esi, 36(%eax)
+	movl	%edi, 32(%eax)
+	movl	%ebx, 28(%eax)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%eax)
+.LBB69_2:                               # %nocarry
+	addl	$48, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end130:
-	.size	mcl_fp_mont9L, .Lfunc_end130-mcl_fp_mont9L
-
-	.globl	mcl_fp_montNF9L
-	.align	16, 0x90
-	.type	mcl_fp_montNF9L,@function
-mcl_fp_montNF9L:                        # @mcl_fp_montNF9L
-# BB#0:
+.Lfunc_end69:
+	.size	mcl_fp_sub12L, .Lfunc_end69-mcl_fp_sub12L
+                                        # -- End function
+	.globl	mcl_fp_subNF12L                 # -- Begin function mcl_fp_subNF12L
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF12L,@function
+mcl_fp_subNF12L:                        # @mcl_fp_subNF12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$796, %esp              # imm = 0x31C
-	calll	.L131$pb
-.L131$pb:
-	popl	%ebx
-.Ltmp12:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp12-.L131$pb), %ebx
-	movl	828(%esp), %eax
-	movl	-4(%eax), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	752(%esp), %esi
-	movl	756(%esp), %ebp
-	movl	%esi, %eax
-	imull	%edi, %eax
-	movl	788(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	784(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	780(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	776(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	772(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	768(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	764(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	760(%esp), %edi
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	712(%esp), %esi
-	adcl	716(%esp), %ebp
-	adcl	720(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	740(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	672(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	708(%esp), %eax
-	addl	672(%esp), %ebp
-	adcl	676(%esp), %edi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	680(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	684(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	688(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	696(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	704(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	632(%esp), %ebp
-	adcl	636(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	656(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	664(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	592(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	628(%esp), %eax
-	addl	592(%esp), %edi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	596(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	600(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	604(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	608(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	612(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	616(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	620(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	624(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%edi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	828(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	addl	552(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	572(%esp), %esi
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	576(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	588(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	512(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	548(%esp), %eax
-	movl	32(%esp), %edx          # 4-byte Reload
-	addl	512(%esp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	516(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	520(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	524(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	528(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	532(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	536(%esp), %ebp
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	540(%esp), %edi
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	544(%esp), %esi
-	adcl	$0, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	movl	32(%esp), %eax          # 4-byte Reload
-	addl	472(%esp), %eax
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	496(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	500(%esp), %edi
-	movl	%edi, %ebp
-	adcl	504(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	432(%esp), %ecx
-	movl	820(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	movl	468(%esp), %eax
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	432(%esp), %ecx
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	436(%esp), %esi
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	440(%esp), %edi
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	444(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	448(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	452(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	456(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	460(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	464(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	392(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	392(%esp), %ebp
-	adcl	396(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	400(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	412(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	424(%esp), %edi
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	428(%esp), %esi
-	movl	824(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	388(%esp), %eax
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	352(%esp), %ecx
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	356(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	360(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	364(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	368(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	372(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	376(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	380(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	384(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	312(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	324(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	340(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	348(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	308(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	272(%esp), %ecx
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	280(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	292(%esp), %ebp
-	adcl	296(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	232(%esp), %edi
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	236(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %edi
-	adcl	252(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	192(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	228(%esp), %ebp
-	movl	%esi, %ecx
-	addl	192(%esp), %ecx
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	196(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	208(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	152(%esp), %ecx
-	movl	828(%esp), %edx
-	calll	.LmulPv288x32
-	addl	152(%esp), %edi
-	adcl	156(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	160(%esp), %edi
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	164(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	188(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	820(%esp), %edx
-	calll	.LmulPv288x32
-	movl	148(%esp), %ebp
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	112(%esp), %ecx
-	adcl	116(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	120(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	128(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	132(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	72(%esp), %ecx
-	movl	828(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	addl	72(%esp), %edi
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebx          # 4-byte Reload
-	adcl	80(%esp), %ebx
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	84(%esp), %edi
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	92(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	108(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	828(%esp), %eax
-	subl	(%eax), %edx
-	sbbl	4(%eax), %ebx
-	movl	%edi, %ecx
-	sbbl	8(%eax), %ecx
-	movl	52(%esp), %esi          # 4-byte Reload
-	sbbl	12(%eax), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	40(%esp), %esi          # 4-byte Reload
-	sbbl	16(%eax), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	20(%eax), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	24(%eax), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	sbbl	28(%eax), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	sbbl	32(%eax), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	sarl	$31, %ebp
-	testl	%ebp, %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	js	.LBB131_2
-# BB#1:
-	movl	%edx, %eax
-.LBB131_2:
-	movl	816(%esp), %edx
-	movl	%eax, (%edx)
-	movl	64(%esp), %esi          # 4-byte Reload
-	js	.LBB131_4
-# BB#3:
-	movl	%ebx, %esi
-.LBB131_4:
-	movl	%esi, 4(%edx)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB131_6
-# BB#5:
-	movl	%ecx, %edi
-.LBB131_6:
-	movl	%edi, 8(%edx)
-	js	.LBB131_8
-# BB#7:
-	movl	16(%esp), %ebp          # 4-byte Reload
-.LBB131_8:
-	movl	%ebp, 12(%edx)
-	js	.LBB131_10
-# BB#9:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB131_10:
-	movl	%eax, 16(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB131_12
-# BB#11:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB131_12:
-	movl	%eax, 20(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB131_14
-# BB#13:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB131_14:
-	movl	%eax, 24(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB131_16
-# BB#15:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB131_16:
-	movl	%eax, 28(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB131_18
-# BB#17:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB131_18:
-	movl	%eax, 32(%edx)
-	addl	$796, %esp              # imm = 0x31C
+	subl	$72, %esp
+	movl	96(%esp), %ecx
+	movl	(%ecx), %edx
+	movl	4(%ecx), %eax
+	movl	100(%esp), %edi
+	subl	(%edi), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	44(%ecx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	32(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%ecx), %ebp
+	movl	24(%ecx), %ebx
+	movl	20(%ecx), %esi
+	movl	16(%ecx), %edx
+	movl	12(%ecx), %eax
+	movl	8(%ecx), %ecx
+	sbbl	8(%edi), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	sbbl	12(%edi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	sbbl	16(%edi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	sbbl	20(%edi), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	sbbl	24(%edi), %ebx
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	sbbl	28(%edi), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	32(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	36(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%edi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	44(%edi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%eax, %edi
+	sarl	$31, %edi
+	movl	%edi, %edx
+	shldl	$1, %eax, %edx
+	movl	104(%esp), %ebx
+	andl	(%ebx), %edx
+	movl	44(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	40(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	36(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	32(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	24(%ebx), %eax
+	andl	%edi, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %ebp
+	andl	%edi, %ebp
+	movl	16(%ebx), %esi
+	andl	%edi, %esi
+	movl	12(%ebx), %ecx
+	andl	%edi, %ecx
+	movl	8(%ebx), %eax
+	andl	%edi, %eax
+	andl	4(%ebx), %edi
+	addl	64(%esp), %edx                  # 4-byte Folded Reload
+	adcl	68(%esp), %edi                  # 4-byte Folded Reload
+	movl	92(%esp), %ebx
+	movl	%edx, (%ebx)
+	adcl	40(%esp), %eax                  # 4-byte Folded Reload
+	movl	%edi, 4(%ebx)
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 8(%ebx)
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	%ecx, 12(%ebx)
+	adcl	52(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%esi, 16(%ebx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ebp, 20(%ebx)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	60(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 24(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%ecx, 28(%ebx)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	8(%esp), %ecx                   # 4-byte Folded Reload
+	movl	%eax, 32(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	12(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 36(%ebx)
+	movl	%eax, 40(%ebx)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%eax, 44(%ebx)
+	addl	$72, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end131:
-	.size	mcl_fp_montNF9L, .Lfunc_end131-mcl_fp_montNF9L
-
-	.globl	mcl_fp_montRed9L
-	.align	16, 0x90
-	.type	mcl_fp_montRed9L,@function
-mcl_fp_montRed9L:                       # @mcl_fp_montRed9L
-# BB#0:
+.Lfunc_end70:
+	.size	mcl_fp_subNF12L, .Lfunc_end70-mcl_fp_subNF12L
+                                        # -- End function
+	.globl	mcl_fpDbl_add12L                # -- Begin function mcl_fpDbl_add12L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add12L,@function
+mcl_fpDbl_add12L:                       # @mcl_fpDbl_add12L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$492, %esp              # imm = 0x1EC
-	calll	.L132$pb
-.L132$pb:
+	subl	$92, %esp
+	movl	116(%esp), %esi
+	movl	(%esi), %eax
+	movl	4(%esi), %ecx
+	movl	120(%esp), %edx
+	addl	(%edx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	adcl	4(%edx), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	92(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	88(%esi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	84(%esi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	80(%esi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	76(%esi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	72(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	68(%esi), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	64(%esi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	56(%esi), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	52(%esi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	48(%esi), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	44(%esi), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	40(%esi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	36(%esi), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	32(%esi), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	28(%esi), %edi
+	movl	24(%esi), %ecx
+	movl	20(%esi), %ebx
+	movl	16(%esi), %ebp
+	movl	12(%esi), %eax
+	movl	8(%esi), %esi
+	adcl	8(%edx), %esi
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	adcl	12(%edx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	adcl	16(%edx), %ebp
+	adcl	20(%edx), %ebx
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	adcl	24(%edx), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	adcl	28(%edx), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	84(%esp), %edi                  # 4-byte Reload
+	adcl	32(%edx), %edi
+	movl	88(%esp), %ecx                  # 4-byte Reload
+	adcl	36(%edx), %ecx
+	movl	(%esp), %esi                    # 4-byte Reload
+	adcl	40(%edx), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	44(%edx), %eax
+	movl	80(%esp), %ebx                  # 4-byte Reload
+	adcl	48(%edx), %ebx
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	52(%edx), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	56(%edx), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	60(%edx), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	64(%edx), %esi
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	68(%edx), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	72(%edx), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	76(%edx), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	80(%edx), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	84(%edx), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	88(%edx), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	92(%edx), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	112(%esp), %edx
+	movl	%eax, 44(%edx)
+	movl	(%esp), %esi                    # 4-byte Reload
+	movl	%esi, 40(%edx)
+	movl	%ecx, 36(%edx)
+	movl	%edi, 32(%edx)
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%edx)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%edx)
+	movl	60(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%edx)
+	movl	%ebp, 16(%edx)
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edx)
+	movl	68(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edx)
+	movl	72(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 4(%edx)
+	movl	76(%esp), %esi                  # 4-byte Reload
+	movl	%esi, (%edx)
+	setb	48(%esp)                        # 1-byte Folded Spill
+	movl	124(%esp), %ebp
+	movl	%ebx, %eax
+	movl	%ebx, 80(%esp)                  # 4-byte Spill
+	subl	(%ebp), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	sbbl	4(%ebp), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	sbbl	8(%ebp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	sbbl	12(%ebp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	sbbl	16(%ebp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	20(%ebp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%ebp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%ebp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	32(%ebp), %ecx
+	movl	12(%esp), %esi                  # 4-byte Reload
+	sbbl	36(%ebp), %esi
+	movl	8(%esp), %edi                   # 4-byte Reload
+	sbbl	40(%ebp), %edi
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	sbbl	44(%ebp), %ebx
+	movzbl	48(%esp), %eax                  # 1-byte Folded Reload
+	sbbl	$0, %eax
+	testb	$1, %al
+	jne	.LBB71_1
+# %bb.2:
+	movl	%ebx, 92(%edx)
+	jne	.LBB71_3
+.LBB71_4:
+	movl	%edi, 88(%edx)
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	jne	.LBB71_5
+.LBB71_6:
+	movl	%esi, 84(%edx)
+	movl	64(%esp), %edi                  # 4-byte Reload
+	jne	.LBB71_7
+.LBB71_8:
+	movl	%ecx, 80(%edx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	52(%esp), %eax                  # 4-byte Reload
+	jne	.LBB71_9
+.LBB71_10:
+	movl	%eax, 76(%edx)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB71_11
+.LBB71_12:
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 72(%edx)
+	jne	.LBB71_13
+.LBB71_14:
+	movl	%ebp, 68(%edx)
+	jne	.LBB71_15
+.LBB71_16:
+	movl	%ebx, 64(%edx)
+	movl	76(%esp), %eax                  # 4-byte Reload
+	jne	.LBB71_17
+.LBB71_18:
+	movl	%edi, 60(%edx)
+	jne	.LBB71_19
+.LBB71_20:
+	movl	%esi, 56(%edx)
+	jne	.LBB71_21
+.LBB71_22:
+	movl	%ecx, 52(%edx)
+	je	.LBB71_24
+.LBB71_23:
+	movl	80(%esp), %eax                  # 4-byte Reload
+.LBB71_24:
+	movl	%eax, 48(%edx)
+	addl	$92, %esp
+	popl	%esi
+	popl	%edi
 	popl	%ebx
-.Ltmp13:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp13-.L132$pb), %ebx
-	movl	520(%esp), %edx
-	movl	-4(%edx), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	516(%esp), %eax
-	movl	(%eax), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	4(%eax), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	imull	%edi, %ecx
-	movl	68(%eax), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	64(%eax), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	60(%eax), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	56(%eax), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	52(%eax), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	48(%eax), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	44(%eax), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	40(%eax), %edi
-	movl	%edi, 124(%esp)         # 4-byte Spill
-	movl	36(%eax), %edi
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	movl	32(%eax), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	28(%eax), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	24(%eax), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	20(%eax), %ebp
-	movl	16(%eax), %edi
-	movl	12(%eax), %esi
-	movl	8(%eax), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	(%edx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	28(%edx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%edx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	20(%edx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	16(%edx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	12(%edx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	8(%edx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	4(%edx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ecx, (%esp)
-	leal	448(%esp), %ecx
-	calll	.LmulPv288x32
-	movl	76(%esp), %eax          # 4-byte Reload
-	addl	448(%esp), %eax
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	452(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	460(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	464(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	468(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	sbbl	%eax, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	408(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	movl	76(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	408(%esp), %ecx
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	412(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	416(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	420(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	424(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	428(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	432(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	436(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	440(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebp
-	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	addl	368(%esp), %edi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	372(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	404(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	addl	328(%esp), %ebp
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	332(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	364(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	288(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	movl	64(%esp), %eax          # 4-byte Reload
-	addl	288(%esp), %eax
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	292(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	520(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	addl	248(%esp), %esi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	252(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	264(%esp), %ebp
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%edi, %esi
-	adcl	$0, %esi
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	addl	208(%esp), %edi
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	212(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	220(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	520(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv288x32
-	addl	168(%esp), %ebp
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	172(%esp), %ecx
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebp         # 4-byte Reload
-	adcl	180(%esp), %ebp
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	184(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	520(%esp), %edx
-	calll	.LmulPv288x32
-	addl	128(%esp), %edi
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	%eax, %edi
-	adcl	136(%esp), %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	adcl	140(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	144(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	%eax, %ebx
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	164(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	subl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	124(%esp), %eax         # 4-byte Reload
-	sbbl	16(%esp), %eax          # 4-byte Folded Reload
-	sbbl	24(%esp), %esi          # 4-byte Folded Reload
-	sbbl	28(%esp), %ecx          # 4-byte Folded Reload
-	sbbl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebx         # 4-byte Reload
-	sbbl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebx         # 4-byte Reload
-	sbbl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 92(%esp)          # 4-byte Spill
-	movl	%edx, %ebx
-	movl	%ebp, %edx
-	sbbl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 104(%esp)         # 4-byte Spill
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB132_2
-# BB#1:
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-.LBB132_2:
-	testb	%dl, %dl
-	movl	120(%esp), %ecx         # 4-byte Reload
-	jne	.LBB132_4
-# BB#3:
-	movl	%edi, %ecx
-.LBB132_4:
-	movl	512(%esp), %edi
-	movl	%ecx, (%edi)
-	movl	88(%esp), %ecx          # 4-byte Reload
-	jne	.LBB132_6
-# BB#5:
-	movl	%eax, 124(%esp)         # 4-byte Spill
-.LBB132_6:
-	movl	124(%esp), %eax         # 4-byte Reload
-	movl	%eax, 4(%edi)
-	movl	96(%esp), %eax          # 4-byte Reload
-	jne	.LBB132_8
-# BB#7:
-	movl	%esi, %eax
-.LBB132_8:
-	movl	%eax, 8(%edi)
-	movl	116(%esp), %eax         # 4-byte Reload
-	movl	%eax, 12(%edi)
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	108(%esp), %ebp         # 4-byte Reload
-	jne	.LBB132_10
-# BB#9:
-	movl	72(%esp), %ebp          # 4-byte Reload
-.LBB132_10:
-	movl	%ebp, 16(%edi)
-	movl	112(%esp), %ebx         # 4-byte Reload
-	jne	.LBB132_12
-# BB#11:
-	movl	76(%esp), %ebx          # 4-byte Reload
-.LBB132_12:
-	movl	%ebx, 20(%edi)
-	movl	100(%esp), %esi         # 4-byte Reload
-	jne	.LBB132_14
-# BB#13:
-	movl	84(%esp), %esi          # 4-byte Reload
-.LBB132_14:
-	movl	%esi, 24(%edi)
-	jne	.LBB132_16
-# BB#15:
-	movl	92(%esp), %ecx          # 4-byte Reload
-.LBB132_16:
+	popl	%ebp
+	retl
+.LBB71_1:
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 92(%edx)
+	je	.LBB71_4
+.LBB71_3:
+	movl	8(%esp), %edi                   # 4-byte Reload
+	movl	%edi, 88(%edx)
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	je	.LBB71_6
+.LBB71_5:
+	movl	12(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 84(%edx)
+	movl	64(%esp), %edi                  # 4-byte Reload
+	je	.LBB71_8
+.LBB71_7:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 80(%edx)
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	52(%esp), %eax                  # 4-byte Reload
+	je	.LBB71_10
+.LBB71_9:
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 76(%edx)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	je	.LBB71_12
+.LBB71_11:
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 72(%edx)
+	je	.LBB71_14
+.LBB71_13:
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 68(%edx)
+	je	.LBB71_16
+.LBB71_15:
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 64(%edx)
+	movl	76(%esp), %eax                  # 4-byte Reload
+	je	.LBB71_18
+.LBB71_17:
+	movl	36(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 60(%edx)
+	je	.LBB71_20
+.LBB71_19:
+	movl	40(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 56(%edx)
+	je	.LBB71_22
+.LBB71_21:
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 52(%edx)
+	jne	.LBB71_23
+	jmp	.LBB71_24
+.Lfunc_end71:
+	.size	mcl_fpDbl_add12L, .Lfunc_end71-mcl_fpDbl_add12L
+                                        # -- End function
+	.globl	mcl_fpDbl_sub12L                # -- Begin function mcl_fpDbl_sub12L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub12L,@function
+mcl_fpDbl_sub12L:                       # @mcl_fpDbl_sub12L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$92, %esp
+	movl	116(%esp), %esi
+	movl	(%esi), %ecx
+	movl	4(%esi), %eax
+	xorl	%ebp, %ebp
+	movl	120(%esp), %edx
+	subl	(%edx), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	sbbl	4(%edx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	92(%esi), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	88(%esi), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	84(%esi), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	80(%esi), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	76(%esi), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	72(%esi), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	68(%esi), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	64(%esi), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	60(%esi), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	56(%esi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	52(%esi), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	48(%esi), %ebx
+	movl	44(%esi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	40(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	36(%esi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	32(%esi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esi), %edi
+	movl	20(%esi), %ecx
+	movl	16(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esi), %eax
+	movl	8(%esi), %esi
+	sbbl	8(%edx), %esi
+	movl	%esi, 88(%esp)                  # 4-byte Spill
+	sbbl	12(%edx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	16(%edx), %eax
+	sbbl	20(%edx), %ecx
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	sbbl	24(%edx), %edi
+	movl	%edi, 76(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	28(%edx), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	sbbl	32(%edx), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	sbbl	36(%edx), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	sbbl	40(%edx), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	sbbl	44(%edx), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	sbbl	48(%edx), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	sbbl	52(%edx), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	sbbl	56(%edx), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	sbbl	60(%edx), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	sbbl	64(%edx), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	sbbl	68(%edx), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	sbbl	72(%edx), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	60(%esp), %esi                  # 4-byte Reload
+	sbbl	76(%edx), %esi
+	movl	%esi, 60(%esp)                  # 4-byte Spill
+	movl	64(%esp), %esi                  # 4-byte Reload
+	sbbl	80(%edx), %esi
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %esi                  # 4-byte Reload
+	sbbl	84(%edx), %esi
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	movl	72(%esp), %esi                  # 4-byte Reload
+	sbbl	88(%edx), %esi
+	movl	%esi, 72(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	sbbl	92(%edx), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	112(%esp), %edi
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 44(%edi)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 40(%edi)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 36(%edi)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 32(%edi)
+	movl	16(%esp), %ecx                  # 4-byte Reload
 	movl	%ecx, 28(%edi)
-	jne	.LBB132_18
-# BB#17:
-	movl	104(%esp), %eax         # 4-byte Reload
-.LBB132_18:
-	movl	%eax, 32(%edi)
-	addl	$492, %esp              # imm = 0x1EC
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%edi)
+	movl	80(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%edi)
+	movl	%eax, 16(%edi)
+	movl	84(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edi)
+	movl	88(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%edi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%edi)
+	sbbl	%ebp, %ebp
+	andl	$1, %ebp
+	negl	%ebp
+	movl	124(%esp), %ebx
+	movl	44(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	32(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	24(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	20(%ebx), %eax
+	andl	%ebp, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	16(%ebx), %esi
+	andl	%ebp, %esi
+	movl	12(%ebx), %edx
+	andl	%ebp, %edx
+	movl	8(%ebx), %ecx
+	andl	%ebp, %ecx
+	movl	4(%ebx), %eax
+	andl	%ebp, %eax
+	andl	(%ebx), %ebp
+	addl	24(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	32(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ebp, 48(%edi)
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 52(%edi)
+	adcl	44(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 56(%edi)
+	adcl	48(%esp), %esi                  # 4-byte Folded Reload
+	movl	%edx, 60(%edi)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%esi, 64(%edi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 68(%edi)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	60(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 72(%edi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 76(%edi)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 80(%edi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 84(%edi)
+	movl	%eax, 88(%edi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	40(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 92(%edi)
+	addl	$92, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end132:
-	.size	mcl_fp_montRed9L, .Lfunc_end132-mcl_fp_montRed9L
-
-	.globl	mcl_fp_addPre9L
-	.align	16, 0x90
-	.type	mcl_fp_addPre9L,@function
-mcl_fp_addPre9L:                        # @mcl_fp_addPre9L
-# BB#0:
+.Lfunc_end72:
+	.size	mcl_fpDbl_sub12L, .Lfunc_end72-mcl_fpDbl_sub12L
+                                        # -- End function
+	.globl	mulPv512x32                     # -- Begin function mulPv512x32
+	.p2align	4, 0x90
+	.type	mulPv512x32,@function
+mulPv512x32:                            # @mulPv512x32
+# %bb.0:
+	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	%esi, 24(%ebx)
-	movl	%edx, 28(%ebx)
-	movl	32(%eax), %eax
-	movl	32(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 32(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end133:
-	.size	mcl_fp_addPre9L, .Lfunc_end133-mcl_fp_addPre9L
-
-	.globl	mcl_fp_subPre9L
-	.align	16, 0x90
-	.type	mcl_fp_subPre9L,@function
-mcl_fp_subPre9L:                        # @mcl_fp_subPre9L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	%edi, 24(%ebp)
-	movl	%esi, 28(%ebp)
-	movl	32(%edx), %edx
-	movl	32(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 32(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
+	subl	$108, %esp
+	movl	136(%esp), %ebp
+	movl	132(%esp), %ecx
+	movl	%ebp, %eax
+	mull	60(%ecx)
+	movl	%edx, 104(%esp)                 # 4-byte Spill
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	%ebp, %eax
+	mull	56(%ecx)
+	movl	%edx, 96(%esp)                  # 4-byte Spill
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	52(%ecx)
+	movl	%edx, 88(%esp)                  # 4-byte Spill
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	48(%ecx)
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	44(%ecx)
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	40(%ecx)
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	36(%ecx)
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	32(%ecx)
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	28(%ecx)
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	24(%ecx)
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	20(%ecx)
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	16(%ecx)
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%ebp, %eax
+	mull	12(%ecx)
+	movl	%edx, %ebx
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	%ebp, %eax
+	mull	8(%ecx)
+	movl	%edx, %esi
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	%ebp, %eax
+	mull	4(%ecx)
+	movl	%edx, %edi
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	%ebp, %eax
+	mull	(%ecx)
+	movl	%eax, %ebp
+	movl	128(%esp), %eax
+	movl	%ebp, (%eax)
+	addl	(%esp), %edx                    # 4-byte Folded Reload
+	movl	%edx, 4(%eax)
+	adcl	4(%esp), %edi                   # 4-byte Folded Reload
+	movl	%edi, 8(%eax)
+	adcl	8(%esp), %esi                   # 4-byte Folded Reload
+	movl	%esi, 12(%eax)
+	adcl	12(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 16(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 20(%eax)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 24(%eax)
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	36(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 28(%eax)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	44(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 32(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	52(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 36(%eax)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	60(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 40(%eax)
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 44(%eax)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	adcl	76(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 48(%eax)
+	movl	80(%esp), %ecx                  # 4-byte Reload
+	adcl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 52(%eax)
+	movl	88(%esp), %ecx                  # 4-byte Reload
+	adcl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 56(%eax)
+	movl	96(%esp), %ecx                  # 4-byte Reload
+	adcl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 60(%eax)
+	movl	104(%esp), %ecx                 # 4-byte Reload
+	adcl	$0, %ecx
+	movl	%ecx, 64(%eax)
+	addl	$108, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
-	retl
-.Lfunc_end134:
-	.size	mcl_fp_subPre9L, .Lfunc_end134-mcl_fp_subPre9L
-
-	.globl	mcl_fp_shr1_9L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_9L,@function
-mcl_fp_shr1_9L:                         # @mcl_fp_shr1_9L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	8(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 4(%esi)
-	movl	12(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 8(%esi)
-	movl	16(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%esi)
-	movl	20(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 16(%esi)
-	movl	24(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 20(%esi)
-	movl	28(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 24(%esi)
-	movl	32(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 28(%esi)
-	shrl	%eax
-	movl	%eax, 32(%esi)
-	popl	%esi
-	retl
-.Lfunc_end135:
-	.size	mcl_fp_shr1_9L, .Lfunc_end135-mcl_fp_shr1_9L
-
-	.globl	mcl_fp_add9L
-	.align	16, 0x90
-	.type	mcl_fp_add9L,@function
-mcl_fp_add9L:                           # @mcl_fp_add9L
-# BB#0:
+	retl	$4
+.Lfunc_end73:
+	.size	mulPv512x32, .Lfunc_end73-mulPv512x32
+                                        # -- End function
+	.globl	mcl_fp_mulUnitPre16L            # -- Begin function mcl_fp_mulUnitPre16L
+	.p2align	4, 0x90
+	.type	mcl_fp_mulUnitPre16L,@function
+mcl_fp_mulUnitPre16L:                   # @mcl_fp_mulUnitPre16L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$20, %esp
-	movl	48(%esp), %edi
-	movl	(%edi), %ecx
-	movl	4(%edi), %eax
-	movl	44(%esp), %ebx
-	addl	(%ebx), %ecx
-	movl	%ecx, %ebp
-	adcl	4(%ebx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	8(%edi), %eax
-	adcl	8(%ebx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	12(%ebx), %ecx
-	movl	16(%ebx), %eax
-	adcl	12(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	16(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	20(%ebx), %esi
-	adcl	20(%edi), %esi
-	movl	24(%ebx), %edx
-	adcl	24(%edi), %edx
-	movl	28(%ebx), %ecx
-	adcl	28(%edi), %ecx
-	movl	32(%ebx), %eax
-	adcl	32(%edi), %eax
-	movl	40(%esp), %edi
-	movl	%ebp, (%edi)
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%edi)
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%edi)
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 12(%edi)
-	movl	4(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 16(%edi)
-	movl	%esi, 20(%edi)
-	movl	%edx, 24(%edi)
-	movl	%ecx, 28(%edi)
-	movl	%eax, 32(%edi)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	52(%esp), %edi
-	subl	(%edi), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	movl	16(%esp), %ebp          # 4-byte Reload
-	sbbl	4(%edi), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebp          # 4-byte Reload
-	sbbl	8(%edi), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %ebp           # 4-byte Reload
-	sbbl	12(%edi), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	4(%esp), %ebp           # 4-byte Reload
-	sbbl	16(%edi), %ebp
-	sbbl	20(%edi), %esi
-	sbbl	24(%edi), %edx
-	sbbl	28(%edi), %ecx
-	sbbl	32(%edi), %eax
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB136_2
-# BB#1:                                 # %nocarry
-	movl	(%esp), %edi            # 4-byte Reload
-	movl	40(%esp), %ebx
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	%ebp, 16(%ebx)
-	movl	%esi, 20(%ebx)
-	movl	%edx, 24(%ebx)
-	movl	%ecx, 28(%ebx)
-	movl	%eax, 32(%ebx)
-.LBB136_2:                              # %carry
-	addl	$20, %esp
+	subl	$124, %esp
+	calll	.L74$pb
+.L74$pb:
+	popl	%ebx
+.Ltmp16:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp16-.L74$pb), %ebx
+	subl	$4, %esp
+	movl	156(%esp), %eax
+	movl	152(%esp), %ecx
+	leal	60(%esp), %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	56(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	72(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	76(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	80(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	84(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	88(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	92(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	96(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	100(%esp), %ebp
+	movl	104(%esp), %ebx
+	movl	108(%esp), %edi
+	movl	112(%esp), %esi
+	movl	116(%esp), %edx
+	movl	120(%esp), %ecx
+	movl	144(%esp), %eax
+	movl	%ecx, 64(%eax)
+	movl	%edx, 60(%eax)
+	movl	%esi, 56(%eax)
+	movl	%edi, 52(%eax)
+	movl	%ebx, 48(%eax)
+	movl	%ebp, 44(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 40(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 36(%eax)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 32(%eax)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%eax)
+	addl	$124, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end136:
-	.size	mcl_fp_add9L, .Lfunc_end136-mcl_fp_add9L
-
-	.globl	mcl_fp_addNF9L
-	.align	16, 0x90
-	.type	mcl_fp_addNF9L,@function
-mcl_fp_addNF9L:                         # @mcl_fp_addNF9L
-# BB#0:
+.Lfunc_end74:
+	.size	mcl_fp_mulUnitPre16L, .Lfunc_end74-mcl_fp_mulUnitPre16L
+                                        # -- End function
+	.globl	mcl_fpDbl_mulPre16L             # -- Begin function mcl_fpDbl_mulPre16L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_mulPre16L,@function
+mcl_fpDbl_mulPre16L:                    # @mcl_fpDbl_mulPre16L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$72, %esp
-	movl	100(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edi
-	movl	96(%esp), %esi
-	addl	(%esi), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	4(%esi), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	28(%eax), %ebp
-	movl	24(%eax), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	20(%eax), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	16(%eax), %ebx
-	movl	12(%eax), %edx
-	movl	8(%eax), %ecx
-	adcl	8(%esi), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	adcl	12(%esi), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	16(%esi), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	20(%esi), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	24(%esi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	28(%esi), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	32(%esi), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ebp
-	subl	(%esi), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	sbbl	4(%esi), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	sbbl	8(%esi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%esi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	sbbl	16(%esi), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%esi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%esi), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%esi), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %edx
-	movl	%ecx, %ebp
-	sbbl	32(%esi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	sarl	$31, %esi
-	testl	%esi, %esi
-	js	.LBB137_2
-# BB#1:
-	movl	(%esp), %eax            # 4-byte Reload
-.LBB137_2:
-	movl	92(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	js	.LBB137_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB137_4:
-	movl	%eax, 4(%ecx)
-	movl	68(%esp), %esi          # 4-byte Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB137_6
-# BB#5:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB137_6:
-	movl	%eax, 8(%ecx)
-	movl	%ebp, %eax
-	js	.LBB137_8
-# BB#7:
-	movl	12(%esp), %edx          # 4-byte Reload
-.LBB137_8:
-	movl	%edx, 12(%ecx)
-	movl	56(%esp), %edx          # 4-byte Reload
-	js	.LBB137_10
-# BB#9:
-	movl	16(%esp), %ebx          # 4-byte Reload
-.LBB137_10:
-	movl	%ebx, 16(%ecx)
-	js	.LBB137_12
-# BB#11:
-	movl	20(%esp), %edi          # 4-byte Reload
-.LBB137_12:
-	movl	%edi, 20(%ecx)
-	js	.LBB137_14
-# BB#13:
-	movl	24(%esp), %esi          # 4-byte Reload
-.LBB137_14:
-	movl	%esi, 24(%ecx)
-	js	.LBB137_16
-# BB#15:
-	movl	28(%esp), %edx          # 4-byte Reload
-.LBB137_16:
-	movl	%edx, 28(%ecx)
-	js	.LBB137_18
-# BB#17:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB137_18:
-	movl	%eax, 32(%ecx)
-	addl	$72, %esp
+	subl	$300, %esp                      # imm = 0x12C
+	calll	.L75$pb
+.L75$pb:
 	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end137:
-	.size	mcl_fp_addNF9L, .Lfunc_end137-mcl_fp_addNF9L
-
-	.globl	mcl_fp_sub9L
-	.align	16, 0x90
-	.type	mcl_fp_sub9L,@function
-mcl_fp_sub9L:                           # @mcl_fp_sub9L
-# BB#0:
+.Ltmp17:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp17-.L75$pb), %esi
+	subl	$4, %esp
+	movl	332(%esp), %ebp
+	movl	328(%esp), %edi
+	movl	%esi, %ebx
+	movl	%esi, 88(%esp)                  # 4-byte Spill
 	pushl	%ebp
-	pushl	%ebx
 	pushl	%edi
-	pushl	%esi
-	subl	$28, %esp
-	movl	52(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	56(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	16(%esi), %edx
-	sbbl	16(%edi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	20(%esi), %ecx
-	sbbl	20(%edi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	28(%esi), %ebp
-	sbbl	28(%edi), %ebp
-	movl	32(%esi), %esi
-	sbbl	32(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	48(%esp), %ebx
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	%edx, 16(%ebx)
-	movl	%ecx, 20(%ebx)
-	movl	%eax, 24(%ebx)
-	movl	%ebp, 28(%ebx)
-	movl	%esi, 32(%ebx)
-	je	.LBB138_2
-# BB#1:                                 # %carry
+	pushl	332(%esp)
+	calll	mcl_fpDbl_mulPre8L@PLT
+	addl	$12, %esp
+	leal	32(%ebp), %eax
+	leal	32(%edi), %ecx
+	movl	324(%esp), %edx
+	addl	$64, %edx
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	calll	mcl_fpDbl_mulPre8L@PLT
+	addl	$16, %esp
+	movl	48(%edi), %eax
+	movl	44(%edi), %ecx
+	movl	40(%edi), %edx
+	movl	32(%edi), %esi
+	movl	36(%edi), %ebx
+	addl	(%edi), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	adcl	4(%edi), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	adcl	8(%edi), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	adcl	12(%edi), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	16(%edi), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	52(%edi), %eax
+	adcl	20(%edi), %eax
+	movl	%eax, %edx
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	56(%edi), %eax
+	adcl	24(%edi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	60(%edi), %eax
+	adcl	28(%edi), %eax
+	movl	%eax, %ecx
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	setb	80(%esp)                        # 1-byte Folded Spill
+	movl	32(%ebp), %eax
+	addl	(%ebp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%ebp), %eax
+	adcl	4(%ebp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%ebp), %eax
+	adcl	8(%ebp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	44(%ebp), %eax
+	adcl	12(%ebp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	48(%ebp), %esi
+	adcl	16(%ebp), %esi
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	movl	52(%ebp), %eax
+	adcl	20(%ebp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%ebp), %eax
+	adcl	24(%ebp), %eax
+	movl	%eax, %edi
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	60(%ebp), %ebx
+	adcl	28(%ebp), %ebx
+	movl	%ebx, 76(%esp)                  # 4-byte Spill
+	movl	%ecx, 232(%esp)
+	movl	44(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 228(%esp)
+	movl	%edx, 224(%esp)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 220(%esp)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 216(%esp)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 212(%esp)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 208(%esp)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 204(%esp)
+	movl	%ebx, 200(%esp)
+	movl	%edi, 196(%esp)
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 192(%esp)
+	movl	%esi, 188(%esp)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 184(%esp)
+	movl	56(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 180(%esp)
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 176(%esp)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 172(%esp)
+	setb	72(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movzbl	84(%esp), %esi                  # 1-byte Folded Reload
+	movl	%esi, %ecx
+	negl	%ecx
 	movl	%esi, %edi
-	movl	60(%esp), %esi
-	movl	12(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esi), %ecx
-	movl	12(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	%ecx, 24(%ebx)
-	movl	28(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 28(%ebx)
-	movl	32(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 32(%ebx)
-.LBB138_2:                              # %nocarry
-	addl	$28, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end138:
-	.size	mcl_fp_sub9L, .Lfunc_end138-mcl_fp_sub9L
-
-	.globl	mcl_fp_subNF9L
-	.align	16, 0x90
-	.type	mcl_fp_subNF9L,@function
-mcl_fp_subNF9L:                         # @mcl_fp_subNF9L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$48, %esp
-	movl	72(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %eax
-	movl	76(%esp), %esi
-	subl	(%esi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	sbbl	4(%esi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	28(%ecx), %edx
-	movl	24(%ecx), %edi
-	movl	20(%ecx), %ebx
-	movl	16(%ecx), %ebp
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	movl	76(%esp), %esi
-	sbbl	8(%esi), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx
-	sbbl	12(%ecx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	sbbl	16(%ecx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	sbbl	20(%ecx), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	sbbl	24(%ecx), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	sbbl	28(%ecx), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	sbbl	32(%ecx), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%edx, %ecx
-	sarl	$31, %ecx
-	movl	%ecx, %eax
-	shldl	$1, %edx, %eax
-	movl	80(%esp), %ebp
-	movl	12(%ebp), %edx
-	andl	%eax, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	4(%ebp), %edi
-	andl	%eax, %edi
-	andl	(%ebp), %eax
-	movl	32(%ebp), %edx
-	andl	%ecx, %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	28(%ebp), %edx
+	shll	$31, %edi
+	shrdl	$31, %ecx, %edi
+	andl	52(%esp), %edi                  # 4-byte Folded Reload
+	andl	%ecx, 72(%esp)                  # 4-byte Folded Spill
+	andl	%ecx, %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	andl	%ecx, 68(%esp)                  # 4-byte Folded Spill
+	andl	%ecx, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
 	andl	%ecx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	roll	%ecx
-	movl	24(%ebp), %ebx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
 	andl	%ecx, %ebx
-	movl	20(%ebp), %esi
-	andl	%ecx, %esi
-	movl	16(%ebp), %edx
-	andl	%ecx, %edx
-	andl	8(%ebp), %ecx
-	addl	32(%esp), %eax          # 4-byte Folded Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	68(%esp), %ebp
-	movl	%eax, (%ebp)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 4(%ebp)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebp)
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebp)
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 16(%ebp)
-	adcl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 20(%ebp)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, 24(%ebp)
-	movl	%eax, 28(%ebp)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ebp)
-	addl	$48, %esp
+	movl	%ebx, 56(%esp)                  # 4-byte Spill
+	andl	80(%esp), %ecx                  # 4-byte Folded Reload
+	movzbl	76(%esp), %ebp                  # 1-byte Folded Reload
+	andl	%ebp, %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	%ebp, %edx
+	negl	%edx
+	andl	%edx, 64(%esp)                  # 4-byte Folded Spill
+	andl	%edx, 48(%esp)                  # 4-byte Folded Spill
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	andl	%edx, %ebx
+	andl	%edx, 44(%esp)                  # 4-byte Folded Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	andl	%edx, %esi
+	andl	%edx, 40(%esp)                  # 4-byte Folded Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	andl	%edx, %eax
+	shll	$31, %ebp
+	shrdl	$31, %edx, %ebp
+	andl	20(%esp), %ebp                  # 4-byte Folded Reload
+	addl	%edi, %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	adcl	56(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	60(%esp), %ebp                  # 4-byte Folded Reload
+	adcl	32(%esp), %esi                  # 4-byte Folded Reload
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	68(%esp), %esi                  # 4-byte Folded Reload
+	adcl	16(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	72(%esp), %edi                  # 4-byte Folded Reload
+	leal	176(%esp), %edx
+	adcl	%ecx, 64(%esp)                  # 4-byte Folded Spill
+	setb	%al
+	movzbl	%al, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	88(%esp), %ebx                  # 4-byte Reload
+	pushl	%edx
+	leal	212(%esp), %eax
+	pushl	%eax
+	leal	248(%esp), %eax
+	pushl	%eax
+	calll	mcl_fpDbl_mulPre8L@PLT
+	addl	$16, %esp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	addl	268(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	272(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	276(%esp), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	280(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	284(%esp), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	adcl	292(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	296(%esp), %ebp
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	movl	236(%esp), %ecx
+	movl	320(%esp), %esi
+	subl	(%esi), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	240(%esp), %ebx
+	sbbl	4(%esi), %ebx
+	movl	244(%esp), %ecx
+	sbbl	8(%esi), %ecx
+	movl	248(%esp), %edx
+	sbbl	12(%esi), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	252(%esp), %edx
+	sbbl	16(%esi), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	256(%esp), %edx
+	sbbl	20(%esi), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	260(%esp), %edx
+	sbbl	24(%esi), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	264(%esp), %edx
+	sbbl	28(%esi), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	32(%esi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	sbbl	%edx, 16(%esp)                  # 4-byte Folded Spill
+	movl	36(%esi), %edx
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	sbbl	%edx, 24(%esp)                  # 4-byte Folded Spill
+	movl	40(%esi), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	sbbl	%edx, 36(%esp)                  # 4-byte Folded Spill
+	movl	44(%esi), %edx
+	movl	%edx, 136(%esp)                 # 4-byte Spill
+	sbbl	%edx, 20(%esp)                  # 4-byte Folded Spill
+	movl	48(%esi), %edx
+	movl	%edx, 104(%esp)                 # 4-byte Spill
+	sbbl	%edx, 40(%esp)                  # 4-byte Folded Spill
+	movl	52(%esi), %edx
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	sbbl	%edx, %eax
+	movl	%eax, %edi
+	movl	56(%esi), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	sbbl	%eax, 44(%esp)                  # 4-byte Folded Spill
+	movl	60(%esi), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	sbbl	%eax, %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	$0, %eax
+	movl	64(%esi), %ebp
+	movl	%ebp, 132(%esp)                 # 4-byte Spill
+	subl	%ebp, 56(%esp)                  # 4-byte Folded Spill
+	movl	68(%esi), %ebp
+	movl	%ebp, 144(%esp)                 # 4-byte Spill
+	sbbl	%ebp, %ebx
+	movl	%ebx, 92(%esp)                  # 4-byte Spill
+	movl	72(%esi), %ebx
+	movl	%ebx, 140(%esp)                 # 4-byte Spill
+	sbbl	%ebx, %ecx
+	movl	%ecx, 88(%esp)                  # 4-byte Spill
+	movl	76(%esi), %ecx
+	movl	%ecx, 128(%esp)                 # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	sbbl	%ecx, %edx
+	movl	80(%esi), %ebx
+	movl	%ebx, 124(%esp)                 # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	sbbl	%ebx, %ecx
+	movl	84(%esi), %ebx
+	movl	%ebx, 120(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 68(%esp)                  # 4-byte Folded Spill
+	movl	88(%esi), %ebx
+	movl	%ebx, 116(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 64(%esp)                  # 4-byte Folded Spill
+	movl	92(%esi), %ebx
+	movl	%ebx, 112(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 52(%esp)                  # 4-byte Folded Spill
+	movl	96(%esi), %ebx
+	movl	%ebx, 108(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 16(%esp)                  # 4-byte Folded Spill
+	movl	100(%esi), %ebx
+	movl	%ebx, 164(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 24(%esp)                  # 4-byte Folded Spill
+	movl	104(%esi), %ebx
+	movl	%ebx, 160(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 36(%esp)                  # 4-byte Folded Spill
+	movl	108(%esi), %ebx
+	movl	%ebx, 156(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 20(%esp)                  # 4-byte Folded Spill
+	movl	112(%esi), %ebx
+	movl	%ebx, 152(%esp)                 # 4-byte Spill
+	sbbl	%ebx, 40(%esp)                  # 4-byte Folded Spill
+	movl	116(%esi), %ebx
+	movl	%ebx, 72(%esp)                  # 4-byte Spill
+	sbbl	%ebx, %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	120(%esi), %edi
+	movl	%edi, 148(%esp)                 # 4-byte Spill
+	sbbl	%edi, 44(%esp)                  # 4-byte Folded Spill
+	movl	124(%esi), %edi
+	movl	%edi, 168(%esp)                 # 4-byte Spill
+	sbbl	%edi, 60(%esp)                  # 4-byte Folded Spill
+	sbbl	$0, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	addl	48(%esp), %ebp                  # 4-byte Folded Reload
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	adcl	80(%esp), %ebx                  # 4-byte Folded Reload
+	movl	88(%esp), %edi                  # 4-byte Reload
+	adcl	76(%esp), %edi                  # 4-byte Folded Reload
+	adcl	136(%esp), %edx                 # 4-byte Folded Reload
+	adcl	104(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	96(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 52(%esi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%esi)
+	movl	%edx, 44(%esi)
+	movl	%edi, 40(%esi)
+	movl	%ebx, 36(%esi)
+	movl	%ebp, 32(%esi)
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	84(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 56(%esi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	132(%esp), %eax                 # 4-byte Folded Reload
+	movl	%edx, 60(%esi)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	144(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%eax, 64(%esi)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	140(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 68(%esi)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	128(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%eax, 72(%esi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	124(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 76(%esi)
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	120(%esp), %edx                 # 4-byte Folded Reload
+	movl	%eax, 80(%esi)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	116(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%edx, 84(%esi)
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	112(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 88(%esi)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	108(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%eax, 92(%esi)
+	movl	%ecx, 96(%esi)
+	movl	164(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 100(%esi)
+	movl	160(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 104(%esi)
+	movl	156(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 108(%esi)
+	movl	152(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 112(%esi)
+	movl	72(%esp), %eax                  # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 116(%esi)
+	movl	148(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 120(%esi)
+	movl	168(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 124(%esi)
+	addl	$300, %esp                      # imm = 0x12C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end139:
-	.size	mcl_fp_subNF9L, .Lfunc_end139-mcl_fp_subNF9L
-
-	.globl	mcl_fpDbl_add9L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add9L,@function
-mcl_fpDbl_add9L:                        # @mcl_fpDbl_add9L
-# BB#0:
+.Lfunc_end75:
+	.size	mcl_fpDbl_mulPre16L, .Lfunc_end75-mcl_fpDbl_mulPre16L
+                                        # -- End function
+	.globl	mcl_fpDbl_sqrPre16L             # -- Begin function mcl_fpDbl_sqrPre16L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sqrPre16L,@function
+mcl_fpDbl_sqrPre16L:                    # @mcl_fpDbl_sqrPre16L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$68, %esp
-	movl	96(%esp), %edx
-	movl	92(%esp), %edi
-	movl	12(%edi), %esi
-	movl	16(%edi), %ecx
-	movl	8(%edx), %ebx
-	movl	(%edx), %ebp
-	addl	(%edi), %ebp
-	movl	88(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%edx), %ebp
-	adcl	4(%edi), %ebp
-	adcl	8(%edi), %ebx
-	adcl	12(%edx), %esi
-	adcl	16(%edx), %ecx
-	movl	%ebp, 4(%eax)
-	movl	44(%edx), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	%ebx, 8(%eax)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%eax)
-	movl	20(%edi), %esi
-	adcl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%ecx, 16(%eax)
-	movl	24(%edi), %ecx
-	adcl	%ebx, %ecx
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%eax)
-	movl	28(%edi), %esi
-	adcl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%ecx, 24(%eax)
-	movl	32(%edi), %ecx
-	adcl	%ebx, %ecx
-	movl	36(%edx), %ebp
-	movl	%esi, 28(%eax)
-	movl	36(%edi), %esi
-	adcl	%ebp, %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	40(%edx), %esi
-	movl	%ecx, 32(%eax)
-	movl	40(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%edi), %eax
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	48(%edi), %ebx
-	adcl	%ecx, %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	52(%edx), %eax
-	movl	52(%edi), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	56(%edx), %esi
-	movl	56(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%edx), %ebp
-	movl	60(%edi), %esi
-	adcl	%ebp, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	64(%edx), %eax
-	movl	64(%edi), %ebp
-	adcl	%eax, %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	68(%edx), %edx
-	movl	68(%edi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	100(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	subl	(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	4(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	sbbl	8(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	12(%edi), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	sbbl	16(%edi), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	20(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	movl	32(%esp), %ebp          # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %ebx
-	sbbl	32(%edi), %ebx
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB140_2
-# BB#1:
-	movl	%ebx, %ebp
-.LBB140_2:
-	testb	%dl, %dl
-	movl	60(%esp), %edx          # 4-byte Reload
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	36(%esp), %esi          # 4-byte Reload
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	52(%esp), %ebx          # 4-byte Reload
-	jne	.LBB140_4
-# BB#3:
-	movl	(%esp), %ecx            # 4-byte Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	16(%esp), %edx          # 4-byte Reload
-.LBB140_4:
-	movl	88(%esp), %eax
-	movl	%edx, 36(%eax)
-	movl	%ebx, 40(%eax)
-	movl	%edi, 44(%eax)
-	movl	%esi, 48(%eax)
-	movl	%ecx, 52(%eax)
-	movl	44(%esp), %edx          # 4-byte Reload
-	movl	64(%esp), %ecx          # 4-byte Reload
-	jne	.LBB140_6
-# BB#5:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB140_6:
-	movl	%ecx, 56(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	jne	.LBB140_8
-# BB#7:
-	movl	24(%esp), %edx          # 4-byte Reload
-.LBB140_8:
-	movl	%edx, 60(%eax)
-	jne	.LBB140_10
-# BB#9:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB140_10:
-	movl	%ecx, 64(%eax)
-	movl	%ebp, 68(%eax)
-	addl	$68, %esp
+	subl	$284, %esp                      # imm = 0x11C
+	calll	.L76$pb
+.L76$pb:
 	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end140:
-	.size	mcl_fpDbl_add9L, .Lfunc_end140-mcl_fpDbl_add9L
-
-	.globl	mcl_fpDbl_sub9L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub9L,@function
-mcl_fpDbl_sub9L:                        # @mcl_fpDbl_sub9L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
+.Ltmp18:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp18-.L76$pb), %esi
+	subl	$4, %esp
+	movl	312(%esp), %edi
+	movl	308(%esp), %ebp
+	movl	%esi, %ebx
+	movl	%esi, 20(%esp)                  # 4-byte Spill
 	pushl	%edi
-	pushl	%esi
-	subl	$52, %esp
-	movl	76(%esp), %ebx
-	movl	(%ebx), %eax
-	movl	4(%ebx), %edx
-	movl	80(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %edx
-	movl	8(%ebx), %esi
-	sbbl	8(%ebp), %esi
-	movl	72(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%ebx), %eax
-	sbbl	12(%ebp), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%ebx), %edx
-	sbbl	16(%ebp), %edx
-	movl	%esi, 8(%ecx)
-	movl	20(%ebp), %esi
-	movl	%eax, 12(%ecx)
-	movl	20(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	24(%ebp), %esi
-	movl	%edx, 16(%ecx)
-	movl	24(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	28(%ebp), %esi
-	movl	%eax, 20(%ecx)
-	movl	28(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	32(%ebp), %esi
-	movl	%edx, 24(%ecx)
-	movl	32(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	36(%ebp), %esi
-	movl	%eax, 28(%ecx)
-	movl	36(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
+	pushl	%edi
+	pushl	%ebp
+	calll	mcl_fpDbl_mulPre8L@PLT
+	addl	$12, %esp
+	leal	32(%edi), %eax
+	leal	64(%ebp), %ecx
+	pushl	%eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mcl_fpDbl_mulPre8L@PLT
+	addl	$16, %esp
+	movl	52(%edi), %ecx
+	movl	48(%edi), %ebx
+	movl	44(%edi), %ebp
+	movl	40(%edi), %esi
+	movl	32(%edi), %eax
+	movl	36(%edi), %edx
+	addl	(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	4(%edi), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	adcl	8(%edi), %esi
+	adcl	12(%edi), %ebp
+	adcl	16(%edi), %ebx
+	adcl	20(%edi), %ecx
+	movl	%ecx, %edx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	56(%edi), %ecx
+	adcl	24(%edi), %ecx
+	movl	60(%edi), %eax
+	adcl	28(%edi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	%eax, 216(%esp)
+	movl	%ecx, 212(%esp)
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	%edx, 208(%esp)
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	movl	%ebx, 204(%esp)
+	movl	%ebp, %edi
+	movl	%ebp, 200(%esp)
+	movl	%esi, 196(%esp)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 192(%esp)
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 188(%esp)
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 184(%esp)
+	movl	%ecx, 180(%esp)
+	movl	%edx, 176(%esp)
+	movl	%ebx, 172(%esp)
+	movl	%edi, 168(%esp)
+	movl	%edi, %ebp
+	movl	%esi, 164(%esp)
+	movl	%esi, %ecx
+	movl	%eax, 160(%esp)
+	movl	%eax, %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 156(%esp)
+	setb	%bl
+	subl	$4, %esp
+	movzbl	%bl, %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	%edi, %esi
+	shll	$31, %esi
+	negl	%edi
+	shrdl	$31, %edi, %esi
+	andl	%eax, %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	andl	%edi, %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	andl	%edi, %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	andl	%edi, %ebp
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	andl	%edi, %ebx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	andl	%edi, %eax
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	andl	%edi, %ecx
+	andl	16(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, %edx
+	shldl	$1, %ecx, %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	shldl	$1, %eax, %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	shldl	$1, %ebx, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	shldl	$1, %ebp, %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	shldl	$1, %eax, %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	shldl	$1, %ecx, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	shldl	$1, %esi, %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	shrl	$31, %edi
+	addl	%esi, %esi
+	leal	160(%esp), %eax
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	pushl	%eax
+	leal	196(%esp), %eax
+	pushl	%eax
+	leal	232(%esp), %eax
+	pushl	%eax
+	calll	mcl_fpDbl_mulPre8L@PLT
+	addl	$16, %esp
+	addl	252(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	256(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	260(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	268(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	272(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	276(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	280(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	32(%esp), %edi                  # 4-byte Folded Reload
+	movl	220(%esp), %ebx
+	movl	304(%esp), %ebp
+	subl	(%ebp), %ebx
+	movl	224(%esp), %eax
+	sbbl	4(%ebp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	228(%esp), %ecx
+	sbbl	8(%ebp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	232(%esp), %ecx
+	sbbl	12(%ebp), %ecx
+	movl	236(%esp), %eax
+	sbbl	16(%ebp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	240(%esp), %edx
+	sbbl	20(%ebp), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	244(%esp), %edx
+	sbbl	24(%ebp), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	248(%esp), %eax
+	sbbl	28(%ebp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	32(%ebp), %eax
+	movl	%eax, 152(%esp)                 # 4-byte Spill
+	sbbl	%eax, %esi
+	movl	36(%ebp), %eax
+	movl	%eax, 144(%esp)                 # 4-byte Spill
+	sbbl	%eax, 8(%esp)                   # 4-byte Folded Spill
 	movl	40(%ebp), %eax
-	movl	%edx, 32(%ecx)
-	movl	40(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
+	movl	%eax, 140(%esp)                 # 4-byte Spill
+	sbbl	%eax, 4(%esp)                   # 4-byte Folded Spill
 	movl	44(%ebp), %eax
-	movl	44(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	sbbl	%eax, 36(%esp)                  # 4-byte Folded Spill
 	movl	48(%ebp), %eax
-	movl	48(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	sbbl	%eax, 20(%esp)                  # 4-byte Folded Spill
 	movl	52(%ebp), %eax
-	movl	52(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	sbbl	%eax, 24(%esp)                  # 4-byte Folded Spill
 	movl	56(%ebp), %eax
-	movl	56(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	sbbl	%eax, 28(%esp)                  # 4-byte Folded Spill
 	movl	60(%ebp), %eax
-	movl	60(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
+	movl	%eax, 148(%esp)                 # 4-byte Spill
+	sbbl	%eax, 12(%esp)                  # 4-byte Folded Spill
+	sbbl	$0, %edi
 	movl	64(%ebp), %eax
-	movl	64(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	subl	%eax, %ebx
 	movl	68(%ebp), %eax
-	movl	68(%ebx), %edx
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
 	sbbl	%eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	84(%esp), %ebp
-	jne	.LBB141_1
-# BB#2:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB141_3
-.LBB141_1:
-	movl	32(%ebp), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-.LBB141_3:
-	testb	%al, %al
-	jne	.LBB141_4
-# BB#5:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	movl	$0, %esi
-	jmp	.LBB141_6
-.LBB141_4:
-	movl	(%ebp), %esi
-	movl	4(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB141_6:
-	jne	.LBB141_7
-# BB#8:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB141_9
-.LBB141_7:
-	movl	28(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB141_9:
-	jne	.LBB141_10
-# BB#11:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB141_12
-.LBB141_10:
-	movl	24(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB141_12:
-	jne	.LBB141_13
-# BB#14:
-	movl	$0, %edi
-	jmp	.LBB141_15
-.LBB141_13:
-	movl	20(%ebp), %edi
-.LBB141_15:
-	jne	.LBB141_16
-# BB#17:
-	movl	$0, %ebx
-	jmp	.LBB141_18
-.LBB141_16:
-	movl	16(%ebp), %ebx
-.LBB141_18:
-	jne	.LBB141_19
-# BB#20:
-	movl	%ebp, %eax
-	movl	$0, %ebp
-	jmp	.LBB141_21
-.LBB141_19:
-	movl	%ebp, %eax
-	movl	12(%eax), %ebp
-.LBB141_21:
-	jne	.LBB141_22
-# BB#23:
-	xorl	%eax, %eax
-	jmp	.LBB141_24
-.LBB141_22:
-	movl	8(%eax), %eax
-.LBB141_24:
-	addl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 36(%ecx)
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 40(%ecx)
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 48(%ecx)
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 52(%ecx)
-	movl	(%esp), %edx            # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, 56(%ecx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 60(%ecx)
-	movl	%eax, 64(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%ecx)
-	addl	$52, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end141:
-	.size	mcl_fpDbl_sub9L, .Lfunc_end141-mcl_fpDbl_sub9L
-
-	.align	16, 0x90
-	.type	.LmulPv320x32,@function
-.LmulPv320x32:                          # @mulPv320x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$64, %esp
+	movl	72(%ebp), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	sbbl	%eax, 56(%esp)                  # 4-byte Folded Spill
+	movl	76(%ebp), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	sbbl	%eax, %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	80(%ebp), %ecx
+	movl	%ecx, 80(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	sbbl	%ecx, %eax
+	movl	84(%ebp), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 52(%esp)                  # 4-byte Folded Spill
+	movl	88(%ebp), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	sbbl	%ecx, 48(%esp)                  # 4-byte Folded Spill
+	movl	92(%ebp), %ecx
+	movl	%ecx, 100(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 44(%esp)                  # 4-byte Folded Spill
+	movl	96(%ebp), %ecx
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	sbbl	%ecx, %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	100(%ebp), %ecx
+	movl	%ecx, 136(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 8(%esp)                   # 4-byte Folded Spill
+	movl	104(%ebp), %ecx
+	movl	%ecx, 132(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 4(%esp)                   # 4-byte Folded Spill
+	movl	108(%ebp), %ecx
+	movl	%ecx, 128(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 36(%esp)                  # 4-byte Folded Spill
+	movl	112(%ebp), %ecx
+	movl	%ecx, 124(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 20(%esp)                  # 4-byte Folded Spill
+	movl	116(%ebp), %ecx
+	movl	%ecx, 120(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 24(%esp)                  # 4-byte Folded Spill
+	movl	120(%ebp), %ecx
+	movl	%ecx, 116(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 28(%esp)                  # 4-byte Folded Spill
+	movl	124(%ebp), %ecx
+	movl	%ecx, 112(%esp)                 # 4-byte Spill
+	sbbl	%ecx, 12(%esp)                  # 4-byte Folded Spill
+	sbbl	$0, %edi
+	addl	152(%esp), %ebx                 # 4-byte Folded Reload
 	movl	%edx, %esi
-	movl	84(%esp), %edi
-	movl	%edi, %eax
-	mull	36(%esi)
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	32(%esi)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	28(%esi)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	24(%esi)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	20(%esi)
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	16(%esi)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	12(%esi)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	8(%esi)
-	movl	%edx, %ebp
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	4(%esi)
-	movl	%edx, %ebx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%edi, %eax
-	mull	(%esi)
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 8(%ecx)
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 12(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
+	adcl	144(%esp), %esi                 # 4-byte Folded Reload
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	140(%esp), %ecx                 # 4-byte Folded Reload
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	92(%esp), %edx                  # 4-byte Folded Reload
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	68(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	60(%esp), %edx                  # 4-byte Folded Reload
+	movl	%eax, 52(%ebp)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%ebp)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%ebp)
+	movl	%ecx, 40(%ebp)
+	movl	%esi, 36(%ebp)
+	movl	%ebx, 32(%ebp)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	148(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%edx, 56(%ebp)
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	88(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 60(%ebp)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	108(%esp), %eax                 # 4-byte Folded Reload
+	movl	%edx, 64(%ebp)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 68(%ebp)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	104(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 72(%ebp)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	80(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 76(%ebp)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 80(%ebp)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	72(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 84(%ebp)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 88(%ebp)
+	adcl	96(%esp), %edi                  # 4-byte Folded Reload
+	movl	%eax, 92(%ebp)
+	movl	%edi, 96(%ebp)
+	movl	136(%esp), %eax                 # 4-byte Reload
 	adcl	$0, %eax
-	movl	%eax, 40(%ecx)
-	movl	%ecx, %eax
-	addl	$64, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end142:
-	.size	.LmulPv320x32, .Lfunc_end142-.LmulPv320x32
-
-	.globl	mcl_fp_mulUnitPre10L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre10L,@function
-mcl_fp_mulUnitPre10L:                   # @mcl_fp_mulUnitPre10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$76, %esp
-	calll	.L143$pb
-.L143$pb:
-	popl	%ebx
-.Ltmp14:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp14-.L143$pb), %ebx
-	movl	104(%esp), %eax
-	movl	%eax, (%esp)
-	leal	32(%esp), %ecx
-	movl	100(%esp), %edx
-	calll	.LmulPv320x32
-	movl	72(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx
-	movl	48(%esp), %ebp
-	movl	44(%esp), %edi
-	movl	40(%esp), %esi
-	movl	32(%esp), %edx
-	movl	36(%esp), %ecx
-	movl	96(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebp, 16(%eax)
-	movl	%ebx, 20(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	addl	$76, %esp
+	movl	%eax, 100(%ebp)
+	movl	132(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 104(%ebp)
+	movl	128(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 108(%ebp)
+	movl	124(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 112(%ebp)
+	movl	120(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 116(%ebp)
+	movl	116(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 120(%ebp)
+	movl	112(%esp), %eax                 # 4-byte Reload
+	adcl	$0, %eax
+	movl	%eax, 124(%ebp)
+	addl	$284, %esp                      # imm = 0x11C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end143:
-	.size	mcl_fp_mulUnitPre10L, .Lfunc_end143-mcl_fp_mulUnitPre10L
-
-	.globl	mcl_fpDbl_mulPre10L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre10L,@function
-mcl_fpDbl_mulPre10L:                    # @mcl_fpDbl_mulPre10L
-# BB#0:
+.Lfunc_end76:
+	.size	mcl_fpDbl_sqrPre16L, .Lfunc_end76-mcl_fpDbl_sqrPre16L
+                                        # -- End function
+	.globl	mcl_fp_mont16L                  # -- Begin function mcl_fp_mont16L
+	.p2align	4, 0x90
+	.type	mcl_fp_mont16L,@function
+mcl_fp_mont16L:                         # @mcl_fp_mont16L
+# %bb.0:
 	pushl	%ebp
-	movl	%esp, %ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$188, %esp
-	calll	.L144$pb
-.L144$pb:
+	subl	$2412, %esp                     # imm = 0x96C
+	calll	.L77$pb
+.L77$pb:
 	popl	%ebx
-.Ltmp15:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp15-.L144$pb), %ebx
-	movl	%ebx, -128(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	12(%ebp), %esi
-	movl	%esi, 4(%esp)
-	movl	8(%ebp), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre5L@PLT
-	leal	20(%edi), %eax
-	movl	%eax, 8(%esp)
-	leal	20(%esi), %eax
-	movl	%eax, 4(%esp)
-	movl	8(%ebp), %eax
-	leal	40(%eax), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre5L@PLT
-	movl	28(%esi), %edi
-	movl	(%esi), %ebx
-	movl	4(%esi), %eax
-	addl	20(%esi), %ebx
-	movl	%ebx, -148(%ebp)        # 4-byte Spill
-	adcl	24(%esi), %eax
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-	adcl	8(%esi), %edi
-	movl	%edi, -140(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -96(%ebp)         # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	addl	20(%esi), %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	adcl	24(%esi), %ecx
-	movl	%ecx, -120(%ebp)        # 4-byte Spill
-	movl	28(%esi), %eax
-	adcl	8(%esi), %eax
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	movl	32(%esi), %eax
-	adcl	12(%esi), %eax
-	movl	36(%esi), %ecx
-	adcl	16(%esi), %ecx
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %esi
-	popl	%eax
-	movl	%esi, -156(%ebp)        # 4-byte Spill
-	movl	%ebx, -124(%ebp)        # 4-byte Spill
-	jb	.LBB144_2
-# BB#1:
-	xorl	%edi, %edi
-	movl	$0, -124(%ebp)          # 4-byte Folded Spill
-.LBB144_2:
-	movl	%edi, -136(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %esi
-	movl	%esi, %ebx
-	movl	36(%ebx), %esi
-	movl	32(%ebx), %edi
-	movl	-96(%ebp), %edx         # 4-byte Reload
+.Ltmp19:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp19-.L77$pb), %ebx
+	movl	2444(%esp), %eax
+	movl	-4(%eax), %esi
+	movl	%esi, 72(%esp)                  # 4-byte Spill
+	movl	2440(%esp), %ecx
+	subl	$4, %esp
+	leal	2348(%esp), %eax
+	pushl	(%ecx)
+	pushl	2444(%esp)
 	pushl	%eax
-	movl	%edx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	12(%ebx), %edi
-	movl	%edi, -116(%ebp)        # 4-byte Spill
-	adcl	16(%ebx), %esi
-	movl	%esi, -144(%ebp)        # 4-byte Spill
-	movl	%ecx, -112(%ebp)        # 4-byte Spill
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	movl	-160(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -108(%ebp)        # 4-byte Spill
-	movl	-120(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -96(%ebp)         # 4-byte Spill
-	movl	-152(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -100(%ebp)        # 4-byte Spill
-	jb	.LBB144_4
-# BB#3:
-	movl	$0, -112(%ebp)          # 4-byte Folded Spill
-	movl	$0, -104(%ebp)          # 4-byte Folded Spill
-	movl	$0, -108(%ebp)          # 4-byte Folded Spill
-	movl	$0, -96(%ebp)           # 4-byte Folded Spill
-	movl	$0, -100(%ebp)          # 4-byte Folded Spill
-.LBB144_4:
-	movl	-148(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -72(%ebp)
-	movl	-132(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -68(%ebp)
-	movl	-140(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -64(%ebp)
-	movl	%ebx, -92(%ebp)
-	movl	-120(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -88(%ebp)
-	movl	%edx, -84(%ebp)
-	movl	%eax, -80(%ebp)
-	movl	%ecx, -76(%ebp)
-	sbbl	%edx, %edx
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -60(%ebp)
-	movl	-144(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -56(%ebp)
-	movl	-156(%ebp), %ecx        # 4-byte Reload
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	2344(%esp), %edi
+	movl	2348(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	imull	%edi, %eax
+	movl	2408(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	2404(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	2400(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	2396(%esp), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	2392(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	2388(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	2384(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	2380(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	2376(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	2372(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	2368(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	2364(%esp), %ebp
+	movl	2360(%esp), %esi
+	movl	2356(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	2352(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	2276(%esp), %ecx
 	pushl	%eax
-	movl	%ecx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB144_6
-# BB#5:
-	movl	$0, %ebx
-	movl	$0, %eax
-	movl	$0, %edi
-.LBB144_6:
-	movl	%eax, -116(%ebp)        # 4-byte Spill
-	sbbl	%eax, %eax
-	leal	-92(%ebp), %ecx
-	movl	%ecx, 8(%esp)
-	leal	-72(%ebp), %ecx
-	movl	%ecx, 4(%esp)
-	leal	-52(%ebp), %ecx
-	movl	%ecx, (%esp)
-	andl	%eax, %edx
-	movl	-124(%ebp), %eax        # 4-byte Reload
-	addl	%eax, -100(%ebp)        # 4-byte Folded Spill
-	adcl	%edi, -96(%ebp)         # 4-byte Folded Spill
-	movl	-108(%ebp), %esi        # 4-byte Reload
-	adcl	-136(%ebp), %esi        # 4-byte Folded Reload
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -104(%ebp)        # 4-byte Folded Spill
-	movl	-112(%ebp), %edi        # 4-byte Reload
-	adcl	%ebx, %edi
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	%eax, -120(%ebp)        # 4-byte Spill
-	andl	$1, %edx
-	movl	%edx, -116(%ebp)        # 4-byte Spill
-	movl	-128(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre5L@PLT
-	movl	-100(%ebp), %eax        # 4-byte Reload
-	addl	-32(%ebp), %eax
-	movl	%eax, -100(%ebp)        # 4-byte Spill
-	movl	-96(%ebp), %eax         # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -96(%ebp)         # 4-byte Spill
-	adcl	-24(%ebp), %esi
-	movl	%esi, -108(%ebp)        # 4-byte Spill
-	movl	-104(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	adcl	-16(%ebp), %edi
-	movl	%edi, -112(%ebp)        # 4-byte Spill
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -116(%ebp)        # 4-byte Folded Spill
-	movl	-52(%ebp), %ecx
-	movl	8(%ebp), %esi
-	subl	(%esi), %ecx
-	movl	-48(%ebp), %ebx
-	sbbl	4(%esi), %ebx
-	movl	-44(%ebp), %eax
-	sbbl	8(%esi), %eax
-	movl	%eax, -120(%ebp)        # 4-byte Spill
-	movl	-40(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	-36(%ebp), %edi
-	sbbl	16(%esi), %edi
-	movl	20(%esi), %eax
-	movl	%eax, -124(%ebp)        # 4-byte Spill
-	sbbl	%eax, -100(%ebp)        # 4-byte Folded Spill
-	movl	24(%esi), %eax
-	movl	%eax, -128(%ebp)        # 4-byte Spill
-	sbbl	%eax, -96(%ebp)         # 4-byte Folded Spill
-	movl	28(%esi), %eax
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-	sbbl	%eax, -108(%ebp)        # 4-byte Folded Spill
-	movl	32(%esi), %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	sbbl	%eax, -104(%ebp)        # 4-byte Folded Spill
-	movl	36(%esi), %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-	sbbl	%eax, -112(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, -116(%ebp)          # 4-byte Folded Spill
-	movl	40(%esi), %eax
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	subl	%eax, %ecx
-	movl	44(%esi), %eax
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ebx
-	movl	48(%esi), %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	sbbl	%eax, -120(%ebp)        # 4-byte Folded Spill
-	movl	52(%esi), %eax
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edx
-	movl	56(%esi), %eax
-	movl	%eax, -176(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	60(%esi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	sbbl	%eax, -100(%ebp)        # 4-byte Folded Spill
-	movl	64(%esi), %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	sbbl	%eax, -96(%ebp)         # 4-byte Folded Spill
-	movl	68(%esi), %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	sbbl	%eax, -108(%ebp)        # 4-byte Folded Spill
-	movl	72(%esi), %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	sbbl	%eax, -104(%ebp)        # 4-byte Folded Spill
-	movl	76(%esi), %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	sbbl	%eax, -112(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, -116(%ebp)          # 4-byte Folded Spill
-	addl	-124(%ebp), %ecx        # 4-byte Folded Reload
-	adcl	-128(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%ecx, 20(%esi)
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	-132(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 24(%esi)
-	adcl	-136(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 28(%esi)
-	adcl	-140(%ebp), %edi        # 4-byte Folded Reload
-	movl	%edx, 32(%esi)
-	movl	-100(%ebp), %eax        # 4-byte Reload
-	adcl	-160(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edi, 36(%esi)
-	movl	-96(%ebp), %ecx         # 4-byte Reload
-	adcl	-164(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 40(%esi)
-	movl	-108(%ebp), %eax        # 4-byte Reload
-	adcl	-168(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 44(%esi)
-	movl	-104(%ebp), %ecx        # 4-byte Reload
-	adcl	-172(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 48(%esi)
-	movl	-112(%ebp), %edx        # 4-byte Reload
-	adcl	-176(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	-180(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edx, 56(%esi)
-	movl	%eax, 60(%esi)
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 64(%esi)
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 68(%esi)
-	movl	-152(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 72(%esi)
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 76(%esi)
-	addl	$188, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end144:
-	.size	mcl_fpDbl_mulPre10L, .Lfunc_end144-mcl_fpDbl_mulPre10L
-
-	.globl	mcl_fpDbl_sqrPre10L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre10L,@function
-mcl_fpDbl_sqrPre10L:                    # @mcl_fpDbl_sqrPre10L
-# BB#0:
+	pushl	2452(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	2272(%esp), %edi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	2276(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	2280(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2284(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	2288(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	adcl	2292(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	2296(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2300(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2304(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	2308(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2312(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	2316(%esp), %ebp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	2320(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	2324(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	2328(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	2332(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	2336(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	2204(%esp), %ecx
+	movzbl	%al, %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	2444(%esp), %eax
+	pushl	4(%eax)
+	pushl	2444(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	addl	2200(%esp), %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	2204(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2208(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	2212(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	2216(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	2220(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2224(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2228(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	2232(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2236(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	2240(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	2244(%esp), %edi
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	2248(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	2252(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	2256(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	2260(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	2264(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	leal	2132(%esp), %ebp
+	movl	76(%esp), %edx                  # 4-byte Reload
+	movl	%ecx, %esi
+	imull	%ecx, %edx
+	movzbl	%al, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	2452(%esp)
 	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$188, %esp
-	calll	.L145$pb
-.L145$pb:
-	popl	%ebx
-.Ltmp16:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp16-.L145$pb), %ebx
-	movl	%ebx, -120(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %esi
-	movl	%esi, (%esp)
-	calll	mcl_fpDbl_mulPre5L@PLT
-	leal	20(%edi), %eax
-	movl	%eax, 8(%esp)
-	movl	%eax, 4(%esp)
-	leal	40(%esi), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre5L@PLT
-	movl	36(%edi), %eax
-	movl	32(%edi), %ebx
-	movl	28(%edi), %esi
-	movl	(%edi), %ecx
-	movl	4(%edi), %edx
-	addl	20(%edi), %ecx
-	adcl	24(%edi), %edx
-	adcl	8(%edi), %esi
-	adcl	12(%edi), %ebx
-	movl	%ebx, -124(%ebp)        # 4-byte Spill
-	adcl	16(%edi), %eax
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -128(%ebp)        # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -108(%ebp)        # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -104(%ebp)        # 4-byte Spill
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	2128(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	2132(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2136(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	2140(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	2144(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	2148(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2152(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2156(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	2160(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2164(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	2168(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	2172(%esp), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	2176(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	2180(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	2184(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	2188(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	2192(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	$0, 36(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	leal	2060(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	2444(%esp)
 	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -100(%ebp)        # 4-byte Spill
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	addl	2056(%esp), %ecx
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2060(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	2064(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	2068(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	2072(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2076(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2080(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	2084(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2088(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	2092(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	2096(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ebp                  # 4-byte Reload
+	adcl	2100(%esp), %ebp
+	adcl	2104(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	2108(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	2112(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	2116(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	2120(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %edx                  # 4-byte Reload
+	imull	%ecx, %edx
+	movl	%ecx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	pushl	%edx
+	pushl	2452(%esp)
+	leal	1996(%esp), %eax
 	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -96(%ebp)         # 4-byte Spill
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1984(%esp), %esi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1988(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1992(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1996(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	2000(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2004(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2008(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	2012(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2016(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	2020(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	2024(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	adcl	2028(%esp), %ebp
+	movl	%ebp, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	2032(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	2036(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	2040(%esp), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	2044(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	2048(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	$0, %esi
+	subl	$4, %esp
+	leal	1916(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	12(%ecx)
+	pushl	2444(%esp)
 	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	sbbl	%ebx, %ebx
-	movl	%ebx, -116(%ebp)        # 4-byte Spill
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	addl	1912(%esp), %ecx
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1916(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1920(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1924(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1928(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	1932(%esp), %edi
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1936(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1940(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1944(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1948(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1952(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1956(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1960(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1964(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	adcl	1968(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1972(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	1976(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	setb	%al
+	leal	1840(%esp), %ebp
+	subl	$4, %esp
+	movl	76(%esp), %edx                  # 4-byte Reload
+	movl	%ecx, %esi
+	imull	%ecx, %edx
+	movzbl	%al, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	pushl	%edx
+	movl	2452(%esp), %eax
 	pushl	%eax
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB145_1
-# BB#2:
-	movl	$0, -112(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB145_3
-.LBB145_1:
-	leal	(%ecx,%ecx), %edi
-	movl	%edi, -112(%ebp)        # 4-byte Spill
-.LBB145_3:
-	movl	-96(%ebp), %edi         # 4-byte Reload
+	pushl	%ebp
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1840(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1844(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1848(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1852(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	1856(%esp), %ebp
+	adcl	1860(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1864(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	1868(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1872(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1876(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1880(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1884(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1888(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1892(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1896(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	1900(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1904(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	$0, 8(%esp)                     # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	16(%eax)
+	pushl	2444(%esp)
+	leal	1780(%esp), %eax
 	pushl	%eax
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	movl	-124(%ebp), %edi        # 4-byte Reload
-	jb	.LBB145_4
-# BB#5:
-	movl	$0, -96(%ebp)           # 4-byte Folded Spill
-	jmp	.LBB145_6
-.LBB145_4:
-	movl	%edx, %ebx
-	shldl	$1, %ecx, %ebx
-	movl	%ebx, -96(%ebp)         # 4-byte Spill
-.LBB145_6:
-	movl	-100(%ebp), %ebx        # 4-byte Reload
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	28(%esp), %edx                  # 4-byte Reload
+	addl	1768(%esp), %edx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1772(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1776(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	1780(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1784(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1788(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	1792(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1796(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	adcl	1800(%esp), %ebp
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1804(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1808(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %edi                  # 4-byte Reload
+	adcl	1812(%esp), %edi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1816(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1820(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	1824(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1828(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1832(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1708(%esp), %eax
 	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB145_7
-# BB#8:
-	movl	$0, -100(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB145_9
-.LBB145_7:
-	movl	%esi, %ebx
-	shldl	$1, %edx, %ebx
-	movl	%ebx, -100(%ebp)        # 4-byte Spill
-.LBB145_9:
-	movl	-104(%ebp), %ebx        # 4-byte Reload
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1696(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1700(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1704(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1708(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1712(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1716(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1720(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1724(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	1728(%esp), %ebp
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1732(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	1736(%esp), %esi
+	adcl	1740(%esp), %edi
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1744(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1748(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1752(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1756(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1760(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	$0, 28(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	20(%eax)
+	pushl	2444(%esp)
+	leal	1636(%esp), %eax
 	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB145_10
-# BB#11:
-	movl	$0, -104(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB145_12
-.LBB145_10:
-	movl	%edi, %ebx
-	shldl	$1, %esi, %ebx
-	movl	%ebx, -104(%ebp)        # 4-byte Spill
-.LBB145_12:
-	movl	-108(%ebp), %ebx        # 4-byte Reload
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	24(%esp), %edx                  # 4-byte Reload
+	addl	1624(%esp), %edx
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1628(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1632(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1636(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	1640(%esp), %ebp
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1644(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	1648(%esp), %edi
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1652(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1656(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	adcl	1660(%esp), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1664(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1668(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1672(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1676(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1680(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1684(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1688(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1564(%esp), %eax
 	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB145_13
-# BB#14:
-	movl	$0, -108(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB145_15
-.LBB145_13:
-	movl	%eax, %ebx
-	shldl	$1, %edi, %ebx
-	movl	%ebx, -108(%ebp)        # 4-byte Spill
-.LBB145_15:
-	movl	%ecx, -72(%ebp)
-	movl	%edx, -68(%ebp)
-	movl	%esi, -64(%ebp)
-	movl	%edi, -60(%ebp)
-	movl	%eax, -56(%ebp)
-	movl	%ecx, -92(%ebp)
-	movl	%edx, -88(%ebp)
-	movl	%esi, -84(%ebp)
-	movl	%edi, -80(%ebp)
-	movl	%eax, -76(%ebp)
-	movl	-128(%ebp), %ecx        # 4-byte Reload
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1552(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1556(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1560(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1564(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	1568(%esp), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1572(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	1576(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1580(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %esi                  # 4-byte Reload
+	adcl	1584(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1588(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1592(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1596(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1600(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1604(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1608(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	1612(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1616(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	$0, 24(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	24(%eax)
+	pushl	2444(%esp)
+	leal	1492(%esp), %eax
 	pushl	%eax
-	movl	%ecx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB145_16
-# BB#17:
-	movl	$0, -124(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB145_18
-.LBB145_16:
-	shrl	$31, %eax
-	movl	%eax, -124(%ebp)        # 4-byte Spill
-.LBB145_18:
-	leal	-52(%ebp), %eax
-	movl	%eax, (%esp)
-	leal	-72(%ebp), %eax
-	movl	%eax, 4(%esp)
-	leal	-92(%ebp), %eax
-	movl	%eax, 8(%esp)
-	movl	-116(%ebp), %esi        # 4-byte Reload
-	andl	$1, %esi
-	movl	-120(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre5L@PLT
-	movl	-112(%ebp), %edi        # 4-byte Reload
-	addl	-32(%ebp), %edi
-	movl	-96(%ebp), %eax         # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -96(%ebp)         # 4-byte Spill
-	movl	-100(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -100(%ebp)        # 4-byte Spill
-	movl	-104(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -104(%ebp)        # 4-byte Spill
-	movl	-108(%ebp), %eax        # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	movl	%eax, -108(%ebp)        # 4-byte Spill
-	adcl	-124(%ebp), %esi        # 4-byte Folded Reload
-	movl	-52(%ebp), %edx
-	movl	8(%ebp), %eax
-	subl	(%eax), %edx
-	movl	-48(%ebp), %ebx
-	sbbl	4(%eax), %ebx
-	movl	-44(%ebp), %ecx
-	sbbl	8(%eax), %ecx
-	movl	%ecx, -116(%ebp)        # 4-byte Spill
-	movl	-40(%ebp), %ecx
-	sbbl	12(%eax), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	movl	-36(%ebp), %ecx
-	sbbl	16(%eax), %ecx
-	movl	%ecx, -120(%ebp)        # 4-byte Spill
-	movl	20(%eax), %ecx
-	movl	%ecx, -124(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	%edi, -112(%ebp)        # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, -128(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -96(%ebp)         # 4-byte Folded Spill
-	movl	28(%eax), %ecx
-	movl	%ecx, -132(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -100(%ebp)        # 4-byte Folded Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -104(%ebp)        # 4-byte Folded Spill
-	movl	36(%eax), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -108(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %esi
-	movl	40(%eax), %ecx
-	movl	%ecx, -160(%ebp)        # 4-byte Spill
-	subl	%ecx, %edx
-	movl	44(%eax), %ecx
-	movl	%ecx, -164(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
-	movl	48(%eax), %ecx
-	movl	%ecx, -168(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -116(%ebp)        # 4-byte Folded Spill
-	movl	52(%eax), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %edi        # 4-byte Reload
-	sbbl	%ecx, %edi
-	movl	56(%eax), %ecx
-	movl	%ecx, -176(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -120(%ebp)        # 4-byte Folded Spill
-	movl	60(%eax), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -112(%ebp)        # 4-byte Folded Spill
-	movl	64(%eax), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -96(%ebp)         # 4-byte Folded Spill
-	movl	68(%eax), %ecx
-	movl	%ecx, -148(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -100(%ebp)        # 4-byte Folded Spill
-	movl	72(%eax), %ecx
-	movl	%ecx, -152(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -104(%ebp)        # 4-byte Folded Spill
-	movl	76(%eax), %ecx
-	movl	%ecx, -156(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -108(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %esi
-	addl	-124(%ebp), %edx        # 4-byte Folded Reload
-	adcl	-128(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%edx, 20(%eax)
-	movl	-116(%ebp), %ecx        # 4-byte Reload
-	adcl	-132(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%ebx, 24(%eax)
-	adcl	-136(%ebp), %edi        # 4-byte Folded Reload
-	movl	%ecx, 28(%eax)
-	movl	-120(%ebp), %edx        # 4-byte Reload
-	adcl	-140(%ebp), %edx        # 4-byte Folded Reload
-	movl	%edi, 32(%eax)
-	movl	-112(%ebp), %ecx        # 4-byte Reload
-	adcl	-160(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 36(%eax)
-	movl	-96(%ebp), %edx         # 4-byte Reload
-	adcl	-164(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 40(%eax)
-	movl	-100(%ebp), %ecx        # 4-byte Reload
-	adcl	-168(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 44(%eax)
-	movl	-104(%ebp), %edx        # 4-byte Reload
-	adcl	-172(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 48(%eax)
-	movl	-108(%ebp), %ecx        # 4-byte Reload
-	adcl	-176(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 52(%eax)
-	adcl	-180(%ebp), %esi        # 4-byte Folded Reload
-	movl	%ecx, 56(%eax)
-	movl	%esi, 60(%eax)
-	movl	-144(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 64(%eax)
-	movl	-148(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 68(%eax)
-	movl	-152(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 72(%eax)
-	movl	-156(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 76(%eax)
-	addl	$188, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end145:
-	.size	mcl_fpDbl_sqrPre10L, .Lfunc_end145-mcl_fpDbl_sqrPre10L
-
-	.globl	mcl_fp_mont10L
-	.align	16, 0x90
-	.type	mcl_fp_mont10L,@function
-mcl_fp_mont10L:                         # @mcl_fp_mont10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1036, %esp             # imm = 0x40C
-	calll	.L146$pb
-.L146$pb:
-	popl	%ebx
-.Ltmp17:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp17-.L146$pb), %ebx
-	movl	1068(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	992(%esp), %edi
-	movl	996(%esp), %ebp
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	1032(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1028(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1024(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1020(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1016(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1012(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1008(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1004(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1000(%esp), %esi
-	movl	%eax, (%esp)
-	leal	944(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	addl	944(%esp), %edi
-	adcl	948(%esp), %ebp
-	adcl	952(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	16(%esp), %edx                  # 4-byte Reload
+	addl	1480(%esp), %edx
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1484(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	1488(%esp), %edi
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1492(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1496(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1500(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1504(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	adcl	1508(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1512(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1516(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1520(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1524(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1528(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1532(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	1536(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1540(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1544(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %ebp
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1420(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1408(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1412(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	1416(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	1420(%esp), %ebp
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1424(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1428(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	1432(%esp), %edi
+	adcl	1436(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1440(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1444(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1448(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1452(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1456(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1460(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1464(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1468(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1472(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	$0, 16(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	28(%eax)
+	pushl	2444(%esp)
+	leal	1348(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	40(%esp), %edx                  # 4-byte Reload
+	addl	1336(%esp), %edx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1340(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	1344(%esp), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1348(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1352(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	1356(%esp), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	adcl	1360(%esp), %esi
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1364(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1368(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1372(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1376(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1380(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	1384(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1388(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1392(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	1396(%esp), %ebp
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1400(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1276(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1264(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1268(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1272(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1276(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1280(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %esi                  # 4-byte Reload
+	adcl	1284(%esp), %esi
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1288(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1292(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1296(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1300(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1304(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1308(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	1312(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1316(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1320(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	1324(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1328(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	$0, 40(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	32(%eax)
+	pushl	2444(%esp)
+	leal	1204(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	32(%esp), %edx                  # 4-byte Reload
+	addl	1192(%esp), %edx
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	1196(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1200(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1204(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	1208(%esp), %esi
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1212(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1216(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1220(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	1224(%esp), %ebp
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1228(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1232(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1236(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1240(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1244(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1248(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1252(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1256(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1132(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1120(%esp), %esi
+	adcl	1124(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1128(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1132(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1136(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1140(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1144(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1148(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	1152(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	1156(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1160(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1164(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1168(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1172(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1176(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1180(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	1184(%esp), %edi
+	adcl	$0, 32(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	36(%eax)
+	pushl	2444(%esp)
+	leal	1060(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	60(%esp), %edx                  # 4-byte Reload
+	addl	1048(%esp), %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1052(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1056(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	adcl	1060(%esp), %ebp
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1064(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1068(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1072(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1076(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	adcl	1080(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1084(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1088(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1092(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1096(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1100(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1104(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	1108(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1112(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	988(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	976(%esp), %esi
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	980(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	1064(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	896(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %edi
-	addl	896(%esp), %ebp
-	adcl	900(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	984(%esp), %edi
+	adcl	988(%esp), %ebp
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	992(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	996(%esp), %ebp
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1000(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1004(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1008(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1012(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1016(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1020(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %esi                  # 4-byte Reload
+	adcl	1024(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1028(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1032(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1036(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1040(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	$0, 60(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	40(%eax)
+	movl	2444(%esp), %eax
+	pushl	%eax
+	leal	916(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	4(%esp), %edx                   # 4-byte Reload
+	addl	904(%esp), %edx
+	adcl	908(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	912(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
 	adcl	916(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	adcl	920(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	924(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	928(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	932(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
 	adcl	936(%esp), %edi
-	sbbl	%eax, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	848(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	movl	64(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	848(%esp), %ebp
-	adcl	852(%esp), %esi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	856(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	860(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	864(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	868(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	872(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	876(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	880(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	884(%esp), %ebp
-	adcl	888(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	800(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	addl	800(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	832(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	adcl	836(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	940(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	944(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	948(%esp), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	952(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	956(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	960(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	964(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	968(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	844(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	832(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	836(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	840(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%esi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	1068(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv320x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	752(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	768(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	780(%esp), %esi
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	784(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	704(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	716(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	728(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	732(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	736(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	744(%esp), %edi
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	656(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	movl	44(%esp), %eax          # 4-byte Reload
-	addl	656(%esp), %eax
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	676(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	688(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	696(%esp), %edi
-	adcl	$0, %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	608(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	608(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	624(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	636(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	640(%esp), %esi
-	adcl	644(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	648(%esp), %edi
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	560(%esp), %eax
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	572(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	592(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	600(%esp), %edi
-	adcl	$0, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	512(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	512(%esp), %ecx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %esi
-	adcl	520(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	548(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	464(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	464(%esp), %edi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	468(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	472(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	480(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	484(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	488(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	492(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	496(%esp), %ebp
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	504(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	1060(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv320x32
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	416(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	432(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	444(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	452(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	368(%esp), %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	380(%esp), %esi
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	384(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	400(%esp), %edi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	320(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	320(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	328(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	332(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	348(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	360(%esp), %ebp
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	272(%esp), %esi
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	276(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	288(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	312(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	$0, %ebp
-	movl	1064(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	%edi, %ecx
-	addl	224(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	236(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	240(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	264(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	176(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %ebp
-	addl	176(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	adcl	192(%esp), %esi
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	196(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	1064(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	1060(%esp), %edx
-	calll	.LmulPv320x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	128(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	140(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	adcl	144(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	168(%esp), %ebp
-	sbbl	%esi, %esi
-	movl	32(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	80(%esp), %ecx
-	movl	1068(%esp), %edx
-	calll	.LmulPv320x32
-	andl	$1, %esi
-	addl	80(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	64(%esp), %ebx          # 4-byte Reload
-	adcl	84(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	88(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	92(%esp), %ebx
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	112(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	120(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	1068(%esp), %edx
-	subl	(%edx), %eax
-	sbbl	4(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %ecx
-	sbbl	8(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	sbbl	20(%edx), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%edx), %ecx
-	movl	68(%esp), %edi          # 4-byte Reload
-	sbbl	32(%edx), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	sbbl	36(%edx), %ebp
-	movl	%ebp, %edx
-	sbbl	$0, %esi
-	andl	$1, %esi
-	jne	.LBB146_2
-# BB#1:
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-.LBB146_2:
-	movl	%esi, %ecx
-	testb	%cl, %cl
-	movl	76(%esp), %esi          # 4-byte Reload
-	jne	.LBB146_4
-# BB#3:
-	movl	%eax, %esi
-.LBB146_4:
-	movl	1056(%esp), %eax
-	movl	%esi, (%eax)
-	movl	60(%esp), %edi          # 4-byte Reload
-	jne	.LBB146_6
-# BB#5:
-	movl	16(%esp), %edi          # 4-byte Reload
-.LBB146_6:
-	movl	%edi, 4(%eax)
-	jne	.LBB146_8
-# BB#7:
-	movl	20(%esp), %ebx          # 4-byte Reload
-.LBB146_8:
-	movl	%ebx, 8(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	44(%esp), %ecx          # 4-byte Reload
-	jne	.LBB146_10
-# BB#9:
-	movl	24(%esp), %ebp          # 4-byte Reload
-.LBB146_10:
-	movl	%ebp, 12(%eax)
-	jne	.LBB146_12
-# BB#11:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB146_12:
-	movl	%ecx, 16(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	jne	.LBB146_14
-# BB#13:
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB146_14:
-	movl	%ecx, 20(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	jne	.LBB146_16
-# BB#15:
-	movl	56(%esp), %ecx          # 4-byte Reload
-.LBB146_16:
-	movl	%ecx, 24(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	jne	.LBB146_18
-# BB#17:
-	movl	64(%esp), %ecx          # 4-byte Reload
-.LBB146_18:
-	movl	%ecx, 32(%eax)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	jne	.LBB146_20
-# BB#19:
-	movl	%edx, %ecx
-.LBB146_20:
-	movl	%ecx, 36(%eax)
-	addl	$1036, %esp             # imm = 0x40C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end146:
-	.size	mcl_fp_mont10L, .Lfunc_end146-mcl_fp_mont10L
-
-	.globl	mcl_fp_montNF10L
-	.align	16, 0x90
-	.type	mcl_fp_montNF10L,@function
-mcl_fp_montNF10L:                       # @mcl_fp_montNF10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1020, %esp             # imm = 0x3FC
-	calll	.L147$pb
-.L147$pb:
-	popl	%ebx
-.Ltmp18:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp18-.L147$pb), %ebx
-	movl	1052(%esp), %eax
-	movl	-4(%eax), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	976(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	976(%esp), %edi
-	movl	980(%esp), %esi
-	movl	%edi, %eax
-	imull	%ebp, %eax
-	movl	1016(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1012(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1008(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1004(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1000(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	996(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	992(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	988(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	984(%esp), %ebp
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	928(%esp), %edi
-	adcl	932(%esp), %esi
-	adcl	936(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	952(%esp), %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	880(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	920(%esp), %ecx
-	addl	880(%esp), %esi
-	adcl	884(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	900(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	832(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	832(%esp), %esi
-	adcl	836(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
 	adcl	844(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	848(%esp), %edi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	848(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	852(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	856(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	860(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	872(%esp), %esi
-	movl	1048(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	824(%esp), %ecx
-	addl	784(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	864(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	adcl	868(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	872(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	876(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	880(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	884(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	888(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	892(%esp), %ebp
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	896(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	$0, 4(%esp)                     # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	44(%eax)
+	pushl	2444(%esp)
+	leal	772(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	760(%esp), %edx
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	764(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	768(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	776(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	780(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	784(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	788(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	792(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	796(%esp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	808(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	820(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	736(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	736(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	760(%esp), %edi
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	764(%esp), %ebp
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	768(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	688(%esp), %ecx
-	movl	1044(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv320x32
-	movl	728(%esp), %eax
-	movl	28(%esp), %edx          # 4-byte Reload
-	addl	688(%esp), %edx
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	696(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	704(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	708(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	712(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	716(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	720(%esp), %ebp
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	724(%esp), %esi
-	adcl	$0, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %edi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	640(%esp), %ecx
-	movl	1052(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv320x32
-	addl	640(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	656(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	672(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	676(%esp), %esi
-	movl	%esi, %ebp
-	movl	28(%esp), %esi          # 4-byte Reload
-	adcl	680(%esp), %esi
-	movl	1048(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	592(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	632(%esp), %edx
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	592(%esp), %ecx
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	604(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	624(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	628(%esp), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	544(%esp), %esi
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	548(%esp), %edi
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	552(%esp), %esi
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	556(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	536(%esp), %edx
-	addl	496(%esp), %edi
-	adcl	500(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	adcl	504(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %esi          # 4-byte Reload
-	adcl	528(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	448(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	448(%esp), %edi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	456(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	464(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	480(%esp), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %esi          # 4-byte Reload
-	adcl	488(%esp), %esi
-	movl	1048(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	400(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	440(%esp), %eax
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	400(%esp), %ecx
-	adcl	404(%esp), %ebp
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	408(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	412(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	416(%esp), %edi
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	420(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	424(%esp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	428(%esp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	432(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	436(%esp), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	352(%esp), %esi
-	adcl	356(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	360(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	adcl	368(%esp), %esi
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	372(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	1044(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv320x32
-	movl	344(%esp), %edx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	304(%esp), %ecx
-	adcl	308(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	316(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	320(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	324(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	332(%esp), %esi
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	256(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	256(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	272(%esp), %edi
-	adcl	276(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	284(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %esi          # 4-byte Reload
-	adcl	288(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	248(%esp), %edx
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	208(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	220(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	224(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	236(%esp), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	160(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	160(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ebp, %edi
-	adcl	176(%esp), %edi
-	movl	28(%esp), %esi          # 4-byte Reload
-	adcl	180(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	192(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	1044(%esp), %edx
-	calll	.LmulPv320x32
-	movl	152(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	112(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	120(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	124(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	128(%esp), %esi
-	movl	%esi, %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	140(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	144(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %ebp
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	1052(%esp), %edx
-	calll	.LmulPv320x32
-	addl	64(%esp), %ebp
-	movl	%edi, %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	movl	32(%esp), %ebx          # 4-byte Reload
-	adcl	68(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	72(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	76(%esp), %ebx
-	adcl	80(%esp), %ebp
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	96(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	1052(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %ecx
-	movl	%ebx, %eax
-	sbbl	8(%edi), %eax
-	movl	%ebp, %esi
-	sbbl	12(%edi), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	16(%edi), %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	24(%esp), %esi          # 4-byte Reload
-	sbbl	20(%edi), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	40(%esp), %esi          # 4-byte Reload
-	sbbl	24(%edi), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	sbbl	28(%edi), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	32(%edi), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	36(%edi), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	%esi, %edi
-	sarl	$31, %edi
-	testl	%edi, %edi
-	movl	60(%esp), %edi          # 4-byte Reload
-	js	.LBB147_2
-# BB#1:
-	movl	%edx, %edi
-.LBB147_2:
-	movl	1040(%esp), %edx
-	movl	%edi, (%edx)
-	movl	52(%esp), %edi          # 4-byte Reload
-	js	.LBB147_4
-# BB#3:
-	movl	%ecx, %edi
-.LBB147_4:
-	movl	%edi, 4(%edx)
-	js	.LBB147_6
-# BB#5:
-	movl	%eax, %ebx
-.LBB147_6:
-	movl	%ebx, 8(%edx)
-	js	.LBB147_8
-# BB#7:
-	movl	4(%esp), %ebp           # 4-byte Reload
-.LBB147_8:
-	movl	%ebp, 12(%edx)
-	movl	44(%esp), %esi          # 4-byte Reload
-	movl	24(%esp), %eax          # 4-byte Reload
-	js	.LBB147_10
-# BB#9:
-	movl	8(%esp), %esi           # 4-byte Reload
-.LBB147_10:
-	movl	%esi, 16(%edx)
-	js	.LBB147_12
-# BB#11:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB147_12:
-	movl	%eax, 20(%edx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB147_14
-# BB#13:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB147_14:
-	movl	%eax, 24(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB147_16
-# BB#15:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB147_16:
-	movl	%eax, 28(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB147_18
-# BB#17:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB147_18:
-	movl	%eax, 32(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB147_20
-# BB#19:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB147_20:
-	movl	%eax, 36(%edx)
-	addl	$1020, %esp             # imm = 0x3FC
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end147:
-	.size	mcl_fp_montNF10L, .Lfunc_end147-mcl_fp_montNF10L
-
-	.globl	mcl_fp_montRed10L
-	.align	16, 0x90
-	.type	mcl_fp_montRed10L,@function
-mcl_fp_montRed10L:                      # @mcl_fp_montRed10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$604, %esp              # imm = 0x25C
-	calll	.L148$pb
-.L148$pb:
-	popl	%eax
-.Ltmp19:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp19-.L148$pb), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	632(%esp), %edx
-	movl	-4(%edx), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	628(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	imull	%esi, %ebx
-	movl	76(%ecx), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	72(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	60(%ecx), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	56(%ecx), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	48(%ecx), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	44(%ecx), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	40(%ecx), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	36(%ecx), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	28(%ecx), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	24(%ecx), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	20(%ecx), %ebp
-	movl	16(%ecx), %edi
-	movl	12(%ecx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	8(%ecx), %esi
-	movl	(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	560(%esp), %ecx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	560(%esp), %eax
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	564(%esp), %ecx
-	adcl	568(%esp), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	576(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	580(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	sbbl	%eax, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	512(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	movl	68(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	512(%esp), %esi
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	516(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	520(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	524(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	528(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	532(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	536(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	540(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	544(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	548(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	552(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	464(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	464(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	468(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	492(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	movl	52(%esp), %eax          # 4-byte Reload
-	addl	416(%esp), %eax
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	420(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	440(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	movl	60(%esp), %edi          # 4-byte Reload
-	imull	%edi, %eax
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	632(%esp), %eax
-	movl	%eax, %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	368(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	372(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	404(%esp), %ebp
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	%edi, %eax
-	movl	%eax, (%esp)
-	leal	320(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	320(%esp), %esi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	324(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %edi         # 4-byte Reload
-	adcl	344(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	352(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	360(%esp), %esi
-	adcl	$0, 88(%esp)            # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	272(%esp), %ebp
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	276(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	288(%esp), %ebp
-	adcl	292(%esp), %edi
-	movl	%edi, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	296(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	308(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	312(%esp), %esi
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, 68(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	movl	96(%esp), %eax          # 4-byte Reload
-	addl	224(%esp), %eax
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	232(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	236(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	240(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	adcl	244(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	248(%esp), %ebp
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	252(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	256(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	260(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	264(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	%eax, %edi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	176(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	176(%esp), %edi
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	180(%esp), %ecx
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	184(%esp), %edi
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	196(%esp), %ebp
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	632(%esp), %edx
-	movl	64(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv320x32
-	addl	128(%esp), %esi
-	movl	%edi, %eax
-	adcl	132(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%eax, %edi
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	136(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	140(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	adcl	144(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	%ebp, %edx
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	76(%esp), %ebx          # 4-byte Reload
-	adcl	164(%esp), %ebx
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	subl	12(%esp), %edi          # 4-byte Folded Reload
-	sbbl	8(%esp), %ecx           # 4-byte Folded Reload
-	sbbl	16(%esp), %esi          # 4-byte Folded Reload
-	sbbl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	120(%esp), %eax         # 4-byte Reload
-	sbbl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	sbbl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	sbbl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ebx          # 4-byte Reload
-	sbbl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 96(%esp)          # 4-byte Spill
-	sbbl	$0, %eax
-	andl	$1, %eax
-	jne	.LBB148_2
-# BB#1:
-	movl	%edx, 80(%esp)          # 4-byte Spill
-.LBB148_2:
-	testb	%al, %al
-	movl	112(%esp), %edx         # 4-byte Reload
-	jne	.LBB148_4
-# BB#3:
-	movl	%edi, %edx
-.LBB148_4:
-	movl	624(%esp), %edi
-	movl	%edx, (%edi)
-	movl	108(%esp), %edx         # 4-byte Reload
-	jne	.LBB148_6
-# BB#5:
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-.LBB148_6:
-	movl	124(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 4(%edi)
-	movl	116(%esp), %ecx         # 4-byte Reload
-	jne	.LBB148_8
-# BB#7:
-	movl	%esi, %ecx
-.LBB148_8:
-	movl	%ecx, 8(%edi)
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%edi)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	movl	120(%esp), %eax         # 4-byte Reload
-	jne	.LBB148_10
-# BB#9:
-	movl	64(%esp), %eax          # 4-byte Reload
-.LBB148_10:
-	movl	%eax, 16(%edi)
-	movl	84(%esp), %eax          # 4-byte Reload
-	movl	104(%esp), %ebp         # 4-byte Reload
-	jne	.LBB148_12
-# BB#11:
-	movl	68(%esp), %ebp          # 4-byte Reload
-.LBB148_12:
-	movl	%ebp, 20(%edi)
-	movl	88(%esp), %ebx          # 4-byte Reload
-	jne	.LBB148_14
-# BB#13:
-	movl	72(%esp), %ebx          # 4-byte Reload
-.LBB148_14:
-	movl	%ebx, 24(%edi)
-	jne	.LBB148_16
-# BB#15:
-	movl	92(%esp), %edx          # 4-byte Reload
-.LBB148_16:
-	movl	%edx, 28(%edi)
-	jne	.LBB148_18
-# BB#17:
-	movl	100(%esp), %ecx         # 4-byte Reload
-.LBB148_18:
-	movl	%ecx, 32(%edi)
-	jne	.LBB148_20
-# BB#19:
-	movl	96(%esp), %eax          # 4-byte Reload
-.LBB148_20:
-	movl	%eax, 36(%edi)
-	addl	$604, %esp              # imm = 0x25C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end148:
-	.size	mcl_fp_montRed10L, .Lfunc_end148-mcl_fp_montRed10L
-
-	.globl	mcl_fp_addPre10L
-	.align	16, 0x90
-	.type	mcl_fp_addPre10L,@function
-mcl_fp_addPre10L:                       # @mcl_fp_addPre10L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ebx)
-	movl	32(%ecx), %esi
-	adcl	%edi, %esi
-	movl	%edx, 28(%ebx)
-	movl	%esi, 32(%ebx)
-	movl	36(%eax), %eax
-	movl	36(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 36(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end149:
-	.size	mcl_fp_addPre10L, .Lfunc_end149-mcl_fp_addPre10L
-
-	.globl	mcl_fp_subPre10L
-	.align	16, 0x90
-	.type	mcl_fp_subPre10L,@function
-mcl_fp_subPre10L:                       # @mcl_fp_subPre10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ebp)
-	movl	32(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	%esi, 28(%ebp)
-	movl	%edi, 32(%ebp)
-	movl	36(%edx), %edx
-	movl	36(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 36(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end150:
-	.size	mcl_fp_subPre10L, .Lfunc_end150-mcl_fp_subPre10L
-
-	.globl	mcl_fp_shr1_10L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_10L,@function
-mcl_fp_shr1_10L:                        # @mcl_fp_shr1_10L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	8(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 4(%esi)
-	movl	12(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 8(%esi)
-	movl	16(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%esi)
-	movl	20(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 16(%esi)
-	movl	24(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 20(%esi)
-	movl	28(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 24(%esi)
-	movl	32(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 28(%esi)
-	movl	36(%eax), %eax
-	shrdl	$1, %eax, %ecx
-	movl	%ecx, 32(%esi)
-	shrl	%eax
-	movl	%eax, 36(%esi)
-	popl	%esi
-	retl
-.Lfunc_end151:
-	.size	mcl_fp_shr1_10L, .Lfunc_end151-mcl_fp_shr1_10L
-
-	.globl	mcl_fp_add10L
-	.align	16, 0x90
-	.type	mcl_fp_add10L,@function
-mcl_fp_add10L:                          # @mcl_fp_add10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$24, %esp
-	movl	52(%esp), %edi
-	movl	(%edi), %ecx
-	movl	4(%edi), %eax
-	movl	48(%esp), %ebx
-	addl	(%ebx), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	adcl	4(%ebx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	8(%edi), %eax
-	adcl	8(%ebx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	12(%ebx), %ecx
-	movl	16(%ebx), %eax
-	adcl	12(%edi), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	16(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	20(%ebx), %eax
-	adcl	20(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	24(%ebx), %esi
-	adcl	24(%edi), %esi
-	movl	28(%ebx), %ebp
-	adcl	28(%edi), %ebp
-	movl	32(%ebx), %edx
-	adcl	32(%edi), %edx
-	movl	36(%ebx), %ecx
-	adcl	36(%edi), %ecx
-	movl	44(%esp), %edi
-	movl	(%esp), %ebx            # 4-byte Reload
-	movl	%ebx, (%edi)
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 4(%edi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 8(%edi)
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%edi)
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 16(%edi)
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, 20(%edi)
-	movl	%esi, 24(%edi)
-	movl	%ebp, 28(%edi)
-	movl	%edx, 32(%edi)
-	movl	%ecx, 36(%edi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	56(%esp), %edi
-	subl	(%edi), %ebx
-	movl	%ebx, (%esp)            # 4-byte Spill
-	movl	20(%esp), %ebx          # 4-byte Reload
-	sbbl	4(%edi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %ebx          # 4-byte Reload
-	sbbl	8(%edi), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	sbbl	12(%edi), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %ebx           # 4-byte Reload
-	sbbl	16(%edi), %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	4(%esp), %ebx           # 4-byte Reload
-	sbbl	20(%edi), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	sbbl	24(%edi), %esi
-	sbbl	28(%edi), %ebp
-	sbbl	32(%edi), %edx
-	sbbl	36(%edi), %ecx
-	sbbl	$0, %eax
-	testb	$1, %al
-	jne	.LBB152_2
-# BB#1:                                 # %nocarry
-	movl	(%esp), %edi            # 4-byte Reload
-	movl	44(%esp), %ebx
-	movl	%edi, (%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, 20(%ebx)
-	movl	%esi, 24(%ebx)
-	movl	%ebp, 28(%ebx)
-	movl	%edx, 32(%ebx)
-	movl	%ecx, 36(%ebx)
-.LBB152_2:                              # %carry
-	addl	$24, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end152:
-	.size	mcl_fp_add10L, .Lfunc_end152-mcl_fp_add10L
-
-	.globl	mcl_fp_addNF10L
-	.align	16, 0x90
-	.type	mcl_fp_addNF10L,@function
-mcl_fp_addNF10L:                        # @mcl_fp_addNF10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$72, %esp
-	movl	100(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	4(%ecx), %esi
-	movl	96(%esp), %edx
-	addl	(%edx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	4(%edx), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	36(%ecx), %edi
-	movl	32(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	28(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	24(%ecx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	20(%ecx), %ebp
-	movl	16(%ecx), %ebx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %esi
-	adcl	8(%edx), %esi
-	adcl	12(%edx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	16(%edx), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	adcl	20(%edx), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	24(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	28(%edx), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	32(%edx), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	adcl	36(%edx), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	104(%esp), %edi
-	movl	52(%esp), %edx          # 4-byte Reload
-	subl	(%edi), %edx
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	4(%edi), %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	%ecx, %esi
-	sbbl	8(%edi), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	16(%edi), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	movl	%esi, %eax
-	movl	%esi, %ebp
-	sbbl	24(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	movl	%esi, %eax
-	movl	%esi, %ebx
-	sbbl	32(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, %esi
-	sbbl	36(%edi), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	%esi, %edi
-	movl	52(%esp), %esi          # 4-byte Reload
-	sarl	$31, %edi
-	testl	%edi, %edi
-	js	.LBB153_2
-# BB#1:
-	movl	%edx, %esi
-.LBB153_2:
-	movl	92(%esp), %edx
-	movl	%esi, (%edx)
-	movl	56(%esp), %esi          # 4-byte Reload
-	js	.LBB153_4
-# BB#3:
-	movl	(%esp), %esi            # 4-byte Reload
-.LBB153_4:
-	movl	%esi, 4(%edx)
-	movl	%ebp, %edi
-	movl	40(%esp), %esi          # 4-byte Reload
-	js	.LBB153_6
-# BB#5:
-	movl	4(%esp), %ecx           # 4-byte Reload
-.LBB153_6:
-	movl	%ecx, 8(%edx)
-	movl	%ebx, %ecx
-	movl	44(%esp), %ebp          # 4-byte Reload
-	js	.LBB153_8
-# BB#7:
-	movl	8(%esp), %esi           # 4-byte Reload
-.LBB153_8:
-	movl	%esi, 12(%edx)
-	movl	68(%esp), %esi          # 4-byte Reload
-	movl	48(%esp), %ebx          # 4-byte Reload
-	js	.LBB153_10
-# BB#9:
-	movl	12(%esp), %ebp          # 4-byte Reload
-.LBB153_10:
-	movl	%ebp, 16(%edx)
-	js	.LBB153_12
-# BB#11:
-	movl	16(%esp), %ebx          # 4-byte Reload
-.LBB153_12:
-	movl	%ebx, 20(%edx)
-	js	.LBB153_14
-# BB#13:
-	movl	20(%esp), %edi          # 4-byte Reload
-.LBB153_14:
-	movl	%edi, 24(%edx)
-	js	.LBB153_16
-# BB#15:
-	movl	24(%esp), %esi          # 4-byte Reload
-.LBB153_16:
-	movl	%esi, 28(%edx)
-	js	.LBB153_18
-# BB#17:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB153_18:
-	movl	%ecx, 32(%edx)
-	js	.LBB153_20
-# BB#19:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB153_20:
-	movl	%eax, 36(%edx)
-	addl	$72, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end153:
-	.size	mcl_fp_addNF10L, .Lfunc_end153-mcl_fp_addNF10L
-
-	.globl	mcl_fp_sub10L
-	.align	16, 0x90
-	.type	mcl_fp_sub10L,@function
-mcl_fp_sub10L:                          # @mcl_fp_sub10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$32, %esp
-	movl	56(%esp), %esi
-	movl	(%esi), %ecx
-	movl	4(%esi), %eax
-	xorl	%ebx, %ebx
-	movl	60(%esp), %edi
-	subl	(%edi), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	20(%esi), %edx
-	sbbl	20(%edi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	24(%esi), %ecx
-	sbbl	24(%edi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	32(%esi), %ebp
-	sbbl	32(%edi), %ebp
-	movl	36(%esi), %esi
-	sbbl	36(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	52(%esp), %ebx
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	%edx, 20(%ebx)
-	movl	%ecx, 24(%ebx)
-	movl	%eax, 28(%ebx)
-	movl	%ebp, 32(%ebx)
-	movl	%esi, 36(%ebx)
-	je	.LBB154_2
-# BB#1:                                 # %carry
-	movl	%esi, %edi
-	movl	64(%esp), %esi
-	movl	12(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esi), %ecx
-	movl	12(%esi), %eax
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	%eax, 28(%ebx)
-	movl	32(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 32(%ebx)
-	movl	36(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 36(%ebx)
-.LBB154_2:                              # %nocarry
-	addl	$32, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end154:
-	.size	mcl_fp_sub10L, .Lfunc_end154-mcl_fp_sub10L
-
-	.globl	mcl_fp_subNF10L
-	.align	16, 0x90
-	.type	mcl_fp_subNF10L,@function
-mcl_fp_subNF10L:                        # @mcl_fp_subNF10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$56, %esp
-	movl	80(%esp), %eax
-	movl	36(%eax), %esi
-	movl	(%eax), %edi
-	movl	4(%eax), %edx
-	movl	84(%esp), %ecx
-	subl	(%ecx), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	sbbl	4(%ecx), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	32(%eax), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	28(%eax), %edi
-	movl	24(%eax), %ebx
-	movl	20(%eax), %ebp
-	movl	16(%eax), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	12(%eax), %edx
-	movl	8(%eax), %eax
-	sbbl	8(%ecx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	sbbl	12(%ecx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	16(%ecx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	sbbl	20(%ecx), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	sbbl	24(%ecx), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	sbbl	28(%ecx), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	sbbl	32(%ecx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	sbbl	36(%ecx), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	sarl	$31, %eax
-	movl	%eax, %edx
-	addl	%edx, %edx
-	movl	%eax, %ecx
-	adcl	%ecx, %ecx
-	movl	%esi, %ebx
-	shrl	$31, %ebx
-	orl	%edx, %ebx
-	movl	88(%esp), %edi
-	movl	20(%edi), %edx
-	andl	%ecx, %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	12(%edi), %edx
-	andl	%ecx, %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	andl	4(%edi), %ecx
-	movl	16(%edi), %edx
-	andl	%ebx, %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	8(%edi), %edx
-	andl	%ebx, %edx
-	andl	(%edi), %ebx
-	movl	36(%edi), %esi
-	andl	%eax, %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	32(%edi), %ebp
-	andl	%eax, %ebp
-	movl	28(%edi), %esi
-	andl	%eax, %esi
-	andl	24(%edi), %eax
-	addl	36(%esp), %ebx          # 4-byte Folded Reload
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %edi
-	movl	%ebx, (%edi)
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 4(%edi)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 8(%edi)
-	movl	(%esp), %edx            # 4-byte Reload
-	adcl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 12(%edi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 16(%edi)
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 20(%edi)
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, 24(%edi)
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	%esi, 28(%edi)
-	movl	%ebp, 32(%edi)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%edi)
-	addl	$56, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end155:
-	.size	mcl_fp_subNF10L, .Lfunc_end155-mcl_fp_subNF10L
-
-	.globl	mcl_fpDbl_add10L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add10L,@function
-mcl_fpDbl_add10L:                       # @mcl_fpDbl_add10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$72, %esp
-	movl	100(%esp), %edx
-	movl	96(%esp), %edi
-	movl	12(%edi), %esi
-	movl	16(%edi), %ecx
-	movl	8(%edx), %ebx
-	movl	(%edx), %ebp
-	addl	(%edi), %ebp
-	movl	92(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%edx), %ebp
-	adcl	4(%edi), %ebp
-	adcl	8(%edi), %ebx
-	adcl	12(%edx), %esi
-	adcl	16(%edx), %ecx
-	movl	%ebp, 4(%eax)
-	movl	48(%edx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%eax)
-	movl	20(%edi), %esi
-	adcl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%ecx, 16(%eax)
-	movl	24(%edi), %ecx
-	adcl	%ebx, %ecx
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%eax)
-	movl	28(%edi), %esi
-	adcl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%ecx, 24(%eax)
-	movl	32(%edi), %ecx
-	adcl	%ebx, %ecx
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%eax)
-	movl	36(%edi), %esi
-	adcl	%ebx, %esi
-	movl	40(%edx), %ebx
-	movl	%ecx, 32(%eax)
-	movl	40(%edi), %ecx
-	adcl	%ebx, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	44(%edx), %ebx
-	movl	%esi, 36(%eax)
-	movl	44(%edi), %eax
-	adcl	%ebx, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	48(%edi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	52(%edx), %eax
-	movl	52(%edi), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	56(%edx), %eax
-	movl	56(%edi), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	60(%edx), %eax
-	movl	60(%edi), %ecx
-	adcl	%eax, %ecx
-	movl	64(%edx), %esi
-	movl	64(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	68(%edx), %ebx
-	movl	68(%edi), %esi
-	adcl	%ebx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	72(%edx), %ebx
-	movl	72(%edi), %ebp
-	adcl	%ebx, %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	76(%edx), %edx
-	movl	76(%edi), %edi
-	adcl	%edx, %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	104(%esp), %ebx
-	movl	64(%esp), %edi          # 4-byte Reload
-	subl	(%ebx), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	sbbl	4(%ebx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	sbbl	8(%ebx), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	sbbl	12(%ebx), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebx), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%ecx, %edi
-	sbbl	20(%ebx), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	sbbl	24(%ebx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	sbbl	28(%ebx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	movl	36(%esp), %ebp          # 4-byte Reload
-	sbbl	32(%ebx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %edi
-	sbbl	36(%ebx), %edi
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB156_2
-# BB#1:
-	movl	%edi, %ebp
-.LBB156_2:
-	testb	%dl, %dl
-	movl	64(%esp), %edx          # 4-byte Reload
-	movl	60(%esp), %esi          # 4-byte Reload
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	52(%esp), %ebx          # 4-byte Reload
-	jne	.LBB156_4
-# BB#3:
-	movl	(%esp), %ecx            # 4-byte Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	20(%esp), %edx          # 4-byte Reload
-.LBB156_4:
-	movl	92(%esp), %eax
-	movl	%edx, 40(%eax)
-	movl	68(%esp), %edx          # 4-byte Reload
-	movl	%edx, 44(%eax)
-	movl	%ebx, 48(%eax)
-	movl	%edi, 52(%eax)
-	movl	%esi, 56(%eax)
-	movl	%ecx, 60(%eax)
-	movl	44(%esp), %edx          # 4-byte Reload
-	movl	40(%esp), %ecx          # 4-byte Reload
-	jne	.LBB156_6
-# BB#5:
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB156_6:
-	movl	%ecx, 64(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	jne	.LBB156_8
-# BB#7:
-	movl	28(%esp), %edx          # 4-byte Reload
-.LBB156_8:
-	movl	%edx, 68(%eax)
-	jne	.LBB156_10
-# BB#9:
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB156_10:
-	movl	%ecx, 72(%eax)
-	movl	%ebp, 76(%eax)
-	addl	$72, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end156:
-	.size	mcl_fpDbl_add10L, .Lfunc_end156-mcl_fpDbl_add10L
-
-	.globl	mcl_fpDbl_sub10L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub10L,@function
-mcl_fpDbl_sub10L:                       # @mcl_fpDbl_sub10L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$60, %esp
-	movl	84(%esp), %ebp
-	movl	(%ebp), %edx
-	movl	4(%ebp), %esi
-	movl	88(%esp), %eax
-	subl	(%eax), %edx
-	sbbl	4(%eax), %esi
-	movl	8(%ebp), %edi
-	sbbl	8(%eax), %edi
-	movl	80(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	12(%ebp), %edx
-	sbbl	12(%eax), %edx
-	movl	%esi, 4(%ecx)
-	movl	16(%ebp), %esi
-	sbbl	16(%eax), %esi
-	movl	%edi, 8(%ecx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ecx)
-	movl	20(%ebp), %edx
-	sbbl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ecx)
-	movl	24(%ebp), %esi
-	sbbl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ecx)
-	movl	28(%ebp), %edx
-	sbbl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ecx)
-	movl	32(%ebp), %esi
-	sbbl	%edi, %esi
-	movl	36(%eax), %edi
-	movl	%edx, 28(%ecx)
-	movl	36(%ebp), %edx
-	sbbl	%edi, %edx
-	movl	40(%eax), %edi
-	movl	%esi, 32(%ecx)
-	movl	40(%ebp), %esi
-	sbbl	%edi, %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	44(%eax), %esi
-	movl	%edx, 36(%ecx)
-	movl	44(%ebp), %edx
-	sbbl	%esi, %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	48(%eax), %edx
-	movl	48(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	52(%eax), %edx
-	movl	52(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	56(%eax), %edx
-	movl	56(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	60(%eax), %edx
-	movl	60(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	64(%eax), %edx
-	movl	64(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	68(%eax), %edx
-	movl	68(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	72(%eax), %edx
-	movl	72(%ebp), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	76(%eax), %eax
-	movl	76(%ebp), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	92(%esp), %esi
-	jne	.LBB157_1
-# BB#2:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB157_3
-.LBB157_1:
-	movl	36(%esi), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-.LBB157_3:
-	testb	%al, %al
-	jne	.LBB157_4
-# BB#5:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	movl	$0, %ebx
-	jmp	.LBB157_6
-.LBB157_4:
-	movl	(%esi), %ebx
-	movl	4(%esi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB157_6:
-	jne	.LBB157_7
-# BB#8:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB157_9
-.LBB157_7:
-	movl	32(%esi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB157_9:
-	jne	.LBB157_10
-# BB#11:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB157_12
-.LBB157_10:
-	movl	28(%esi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB157_12:
-	jne	.LBB157_13
-# BB#14:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB157_15
-.LBB157_13:
-	movl	24(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB157_15:
-	jne	.LBB157_16
-# BB#17:
-	movl	$0, %ebp
-	jmp	.LBB157_18
-.LBB157_16:
-	movl	20(%esi), %ebp
-.LBB157_18:
-	jne	.LBB157_19
-# BB#20:
-	movl	$0, %eax
-	jmp	.LBB157_21
-.LBB157_19:
-	movl	16(%esi), %eax
-.LBB157_21:
-	jne	.LBB157_22
-# BB#23:
-	movl	$0, %edx
-	jmp	.LBB157_24
-.LBB157_22:
-	movl	12(%esi), %edx
-.LBB157_24:
-	jne	.LBB157_25
-# BB#26:
-	xorl	%esi, %esi
-	jmp	.LBB157_27
-.LBB157_25:
-	movl	8(%esi), %esi
-.LBB157_27:
-	addl	28(%esp), %ebx          # 4-byte Folded Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 40(%ecx)
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 44(%ecx)
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 48(%ecx)
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 52(%ecx)
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 56(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 60(%ecx)
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 68(%ecx)
-	movl	%eax, 72(%ecx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%ecx)
-	addl	$60, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end157:
-	.size	mcl_fpDbl_sub10L, .Lfunc_end157-mcl_fpDbl_sub10L
-
-	.align	16, 0x90
-	.type	.LmulPv352x32,@function
-.LmulPv352x32:                          # @mulPv352x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$72, %esp
-	movl	%edx, %ebx
-	movl	92(%esp), %edi
-	movl	%edi, %eax
-	mull	40(%ebx)
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	36(%ebx)
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	32(%ebx)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	28(%ebx)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	24(%ebx)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	20(%ebx)
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	16(%ebx)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	mull	12(%ebx)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	8(%ebx)
-	movl	%edx, %esi
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%edi, %eax
-	mull	4(%ebx)
-	movl	%edx, %ebp
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%edi, %eax
-	mull	(%ebx)
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 8(%ecx)
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 12(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%ecx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 44(%ecx)
-	movl	%ecx, %eax
-	addl	$72, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end158:
-	.size	.LmulPv352x32, .Lfunc_end158-.LmulPv352x32
-
-	.globl	mcl_fp_mulUnitPre11L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre11L,@function
-mcl_fp_mulUnitPre11L:                   # @mcl_fp_mulUnitPre11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$92, %esp
-	calll	.L159$pb
-.L159$pb:
-	popl	%ebx
-.Ltmp20:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp20-.L159$pb), %ebx
-	movl	120(%esp), %eax
-	movl	%eax, (%esp)
-	leal	40(%esp), %ecx
-	movl	116(%esp), %edx
-	calll	.LmulPv352x32
-	movl	84(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp
-	movl	56(%esp), %ebx
-	movl	52(%esp), %edi
-	movl	48(%esp), %esi
-	movl	40(%esp), %edx
-	movl	44(%esp), %ecx
-	movl	112(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	addl	$92, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end159:
-	.size	mcl_fp_mulUnitPre11L, .Lfunc_end159-mcl_fp_mulUnitPre11L
-
-	.globl	mcl_fpDbl_mulPre11L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre11L,@function
-mcl_fpDbl_mulPre11L:                    # @mcl_fpDbl_mulPre11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$620, %esp              # imm = 0x26C
-	calll	.L160$pb
-.L160$pb:
-	popl	%eax
-.Ltmp21:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp21-.L160$pb), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	movl	648(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	%edx, %ebp
-	movl	%ebx, %edi
-	calll	.LmulPv352x32
-	movl	612(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	588(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	584(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	580(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	576(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	568(%esp), %eax
-	movl	572(%esp), %esi
-	movl	640(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	648(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	520(%esp), %ecx
-	movl	%ebp, %edx
-	movl	%edi, %ebx
-	calll	.LmulPv352x32
-	addl	520(%esp), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	564(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	560(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	540(%esp), %ebx
-	movl	536(%esp), %edi
-	movl	532(%esp), %esi
-	movl	524(%esp), %ecx
-	movl	528(%esp), %edx
-	movl	640(%esp), %eax
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%eax)
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	472(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	516(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	512(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	508(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	504(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	492(%esp), %ebp
-	movl	488(%esp), %edi
-	movl	484(%esp), %esi
-	movl	476(%esp), %ecx
-	movl	480(%esp), %edx
-	movl	640(%esp), %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%eax)
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	424(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	468(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	464(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	460(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	456(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	452(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	448(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	444(%esp), %ebx
-	movl	440(%esp), %edi
-	movl	436(%esp), %esi
-	movl	428(%esp), %ecx
-	movl	432(%esp), %edx
-	movl	640(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	376(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	404(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	400(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	396(%esp), %ebp
-	movl	392(%esp), %edi
-	movl	388(%esp), %esi
-	movl	380(%esp), %ecx
-	movl	384(%esp), %edx
-	movl	640(%esp), %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 16(%eax)
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	328(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	360(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	356(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	352(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	348(%esp), %ebx
-	movl	344(%esp), %edi
-	movl	340(%esp), %esi
-	movl	332(%esp), %ecx
-	movl	336(%esp), %edx
-	movl	640(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	280(%esp), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	316(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	304(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	300(%esp), %ebp
-	movl	296(%esp), %edi
-	movl	292(%esp), %esi
-	movl	284(%esp), %ecx
-	movl	288(%esp), %edx
-	movl	640(%esp), %eax
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 24(%eax)
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	232(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	272(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	252(%esp), %ebx
-	movl	248(%esp), %edi
-	movl	244(%esp), %esi
-	movl	236(%esp), %ecx
-	movl	240(%esp), %edx
-	movl	640(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, %ebp
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	184(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	228(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	224(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	220(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	204(%esp), %ebp
-	movl	200(%esp), %edi
-	movl	196(%esp), %esi
-	movl	188(%esp), %ecx
-	movl	192(%esp), %edx
-	movl	640(%esp), %eax
-	movl	32(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 32(%eax)
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %edi
-	movl	36(%edi), %eax
-	movl	%eax, (%esp)
-	leal	136(%esp), %ecx
-	movl	644(%esp), %eax
-	movl	%eax, %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	136(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	164(%esp), %ebp
-	movl	160(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	156(%esp), %edi
-	movl	152(%esp), %esi
-	movl	148(%esp), %edx
-	movl	140(%esp), %ecx
-	movl	144(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	640(%esp), %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 36(%eax)
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	648(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	88(%esp), %ecx
-	movl	644(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	52(%esp), %eax          # 4-byte Reload
-	addl	88(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	92(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	112(%esp), %edi
-	movl	108(%esp), %esi
-	movl	104(%esp), %edx
-	movl	100(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 40(%eax)
-	movl	%ebp, 44(%eax)
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 48(%eax)
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 52(%eax)
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 56(%eax)
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 60(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	72(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 64(%eax)
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 72(%eax)
-	movl	%ecx, 76(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%eax)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 84(%eax)
-	addl	$620, %esp              # imm = 0x26C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end160:
-	.size	mcl_fpDbl_mulPre11L, .Lfunc_end160-mcl_fpDbl_mulPre11L
-
-	.globl	mcl_fpDbl_sqrPre11L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre11L,@function
-mcl_fpDbl_sqrPre11L:                    # @mcl_fpDbl_sqrPre11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$620, %esp              # imm = 0x26C
-	calll	.L161$pb
-.L161$pb:
-	popl	%ebx
-.Ltmp22:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp22-.L161$pb), %ebx
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	movl	644(%esp), %edx
-	movl	(%edx), %eax
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	%edx, %esi
-	movl	%ebx, %edi
-	calll	.LmulPv352x32
-	movl	612(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	588(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	584(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	580(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	576(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	568(%esp), %eax
-	movl	572(%esp), %ebp
-	movl	640(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%esi, %edx
-	movl	4(%edx), %eax
-	movl	%eax, (%esp)
-	leal	520(%esp), %ecx
-	movl	%edi, %ebx
-	calll	.LmulPv352x32
-	addl	520(%esp), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	564(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	560(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	540(%esp), %ebx
-	movl	536(%esp), %edi
-	movl	532(%esp), %esi
-	movl	524(%esp), %ecx
-	movl	528(%esp), %edx
-	movl	640(%esp), %eax
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 4(%eax)
-	adcl	60(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	8(%edx), %eax
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	472(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	516(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	512(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	508(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	504(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	492(%esp), %ebp
-	movl	488(%esp), %edi
-	movl	484(%esp), %esi
-	movl	476(%esp), %ecx
-	movl	480(%esp), %edx
-	movl	640(%esp), %eax
-	movl	60(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	12(%edx), %eax
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	48(%esp), %eax          # 4-byte Reload
-	addl	424(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	468(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	464(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	460(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	456(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	452(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	448(%esp), %ebx
-	movl	444(%esp), %edi
-	movl	440(%esp), %esi
-	movl	436(%esp), %edx
-	movl	428(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	432(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 80(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	16(%edx), %eax
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	80(%esp), %eax          # 4-byte Reload
-	addl	376(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	404(%esp), %ebx
-	movl	400(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	396(%esp), %edi
-	movl	392(%esp), %esi
-	movl	388(%esp), %edx
-	movl	380(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	384(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	80(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	20(%esp), %ebp          # 4-byte Folded Reload
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	20(%edx), %eax
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	328(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	360(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	356(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	352(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	348(%esp), %ebp
-	movl	344(%esp), %edi
-	movl	340(%esp), %esi
-	movl	332(%esp), %ecx
-	movl	336(%esp), %edx
-	movl	640(%esp), %eax
-	movl	48(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 20(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	adcl	80(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	24(%edx), %eax
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	48(%esp), %eax          # 4-byte Reload
-	addl	280(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	316(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	304(%esp), %ebx
-	movl	300(%esp), %edi
-	movl	296(%esp), %esi
-	movl	292(%esp), %edx
-	movl	284(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	288(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 80(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	28(%edx), %eax
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	80(%esp), %eax          # 4-byte Reload
-	addl	232(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	272(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	260(%esp), %ebx
-	movl	256(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	252(%esp), %edi
-	movl	248(%esp), %esi
-	movl	244(%esp), %edx
-	movl	236(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	240(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	80(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	32(%edx), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	184(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	228(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	224(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	220(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	204(%esp), %ebp
-	movl	200(%esp), %edi
-	movl	196(%esp), %esi
-	movl	188(%esp), %ecx
-	movl	192(%esp), %edx
-	movl	640(%esp), %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 32(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	36(%edx), %eax
-	movl	%eax, (%esp)
-	leal	136(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	52(%esp), %eax          # 4-byte Reload
-	addl	136(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	164(%esp), %ebp
-	movl	160(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	156(%esp), %edi
-	movl	152(%esp), %esi
-	movl	148(%esp), %edx
-	movl	140(%esp), %ecx
-	movl	144(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	640(%esp), %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 36(%eax)
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	12(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	644(%esp), %edx
-	movl	40(%edx), %eax
-	movl	%eax, (%esp)
-	leal	88(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	52(%esp), %eax          # 4-byte Reload
-	addl	88(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	92(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	132(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	112(%esp), %edi
-	movl	108(%esp), %esi
-	movl	104(%esp), %edx
-	movl	100(%esp), %ecx
-	movl	640(%esp), %eax
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 40(%eax)
-	movl	%ebp, 44(%eax)
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 48(%eax)
-	adcl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 52(%eax)
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 56(%eax)
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 60(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 64(%eax)
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 72(%eax)
-	movl	%ecx, 76(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 80(%eax)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 84(%eax)
-	addl	$620, %esp              # imm = 0x26C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end161:
-	.size	mcl_fpDbl_sqrPre11L, .Lfunc_end161-mcl_fpDbl_sqrPre11L
-
-	.globl	mcl_fp_mont11L
-	.align	16, 0x90
-	.type	mcl_fp_mont11L,@function
-mcl_fp_mont11L:                         # @mcl_fp_mont11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1132, %esp             # imm = 0x46C
-	calll	.L162$pb
-.L162$pb:
-	popl	%ebx
-.Ltmp23:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp23-.L162$pb), %ebx
-	movl	1164(%esp), %eax
-	movl	-4(%eax), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1080(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	1080(%esp), %edi
-	movl	1084(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	%ebp, %eax
-	movl	1124(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1120(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1116(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1112(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1108(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	1100(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	1096(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1092(%esp), %esi
-	movl	1088(%esp), %ebp
-	movl	%eax, (%esp)
-	leal	1032(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	1032(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1040(%esp), %ebp
-	adcl	1044(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	1076(%esp), %esi
-	sbbl	%edi, %edi
-	movl	1160(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	984(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	984(%esp), %ecx
-	adcl	988(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1024(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	adcl	1028(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	936(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	936(%esp), %esi
-	adcl	940(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	964(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	980(%esp), %esi
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	addl	888(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	912(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	928(%esp), %esi
-	movl	%esi, %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ebp, %eax
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	840(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	movl	%esi, %eax
-	andl	$1, %eax
-	addl	840(%esp), %ebp
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	844(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	848(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	852(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	856(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	860(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	864(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	868(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	872(%esp), %ebp
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	876(%esp), %esi
-	adcl	880(%esp), %edi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	884(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	792(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	792(%esp), %ecx
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	820(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	adcl	824(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	adcl	828(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	836(%esp), %esi
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	744(%esp), %ecx
-	movl	1164(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	744(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	768(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	776(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	788(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	1156(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	696(%esp), %ecx
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	716(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	724(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	728(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	648(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	648(%esp), %ebp
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebp          # 4-byte Reload
-	adcl	656(%esp), %ebp
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	676(%esp), %edi
-	adcl	680(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	692(%esp), %esi
-	adcl	$0, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	600(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	24(%esp), %ecx          # 4-byte Reload
-	addl	600(%esp), %ecx
-	adcl	604(%esp), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	608(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	624(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	640(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	addl	552(%esp), %edi
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	556(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	%ebp, %edi
-	adcl	560(%esp), %edi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	564(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	568(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	572(%esp), %esi
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	576(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	580(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	584(%esp), %ebp
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	588(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	592(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	596(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	28(%esp), %ecx          # 4-byte Reload
-	addl	504(%esp), %ecx
-	adcl	508(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	520(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	532(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	536(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	456(%esp), %ecx
-	movl	1164(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	456(%esp), %edi
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	460(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	464(%esp), %ebp
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	468(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	472(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	480(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	484(%esp), %edi
-	adcl	488(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	492(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	496(%esp), %esi
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	408(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	32(%esp), %ecx          # 4-byte Reload
-	addl	408(%esp), %ecx
-	adcl	412(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	428(%esp), %ebp
-	adcl	432(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	444(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	360(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	360(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	368(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	380(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	384(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	392(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	312(%esp), %ecx
-	adcl	316(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	332(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	340(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	348(%esp), %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	264(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	276(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	284(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	300(%esp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	304(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	216(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	216(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	224(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	232(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	adcl	252(%esp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	movl	%esi, %ecx
-	andl	$1, %ecx
-	addl	168(%esp), %ebp
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	172(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	184(%esp), %ebp
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	188(%esp), %edi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	%esi, %ecx
-	addl	120(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	128(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	132(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	136(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	156(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	20(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	72(%esp), %ecx
-	movl	1164(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	andl	$1, %esi
-	addl	72(%esp), %edi
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	88(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	108(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	112(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	1164(%esp), %ebp
-	subl	(%ebp), %eax
-	movl	%ecx, %edx
-	sbbl	4(%ebp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	sbbl	8(%ebp), %ecx
-	sbbl	12(%ebp), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	24(%esp), %edi          # 4-byte Reload
-	sbbl	24(%ebp), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%ebp), %ebx
-	movl	32(%esp), %edi          # 4-byte Reload
-	sbbl	32(%ebp), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	sbbl	36(%ebp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	sbbl	40(%ebp), %edi
-	movl	%edi, %ebp
-	sbbl	$0, %esi
-	andl	$1, %esi
-	jne	.LBB162_2
-# BB#1:
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-.LBB162_2:
-	movl	%esi, %ebx
-	testb	%bl, %bl
-	movl	68(%esp), %ebx          # 4-byte Reload
-	jne	.LBB162_4
-# BB#3:
-	movl	%eax, %ebx
-.LBB162_4:
-	movl	1152(%esp), %eax
-	movl	%ebx, (%eax)
-	movl	56(%esp), %edi          # 4-byte Reload
-	jne	.LBB162_6
-# BB#5:
-	movl	%edx, %edi
-.LBB162_6:
-	movl	%edi, 4(%eax)
-	movl	52(%esp), %edx          # 4-byte Reload
-	jne	.LBB162_8
-# BB#7:
-	movl	%ecx, %edx
-.LBB162_8:
-	movl	%edx, 8(%eax)
-	jne	.LBB162_10
-# BB#9:
-	movl	4(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-.LBB162_10:
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_12
-# BB#11:
-	movl	8(%esp), %ecx           # 4-byte Reload
-.LBB162_12:
-	movl	%ecx, 16(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_14
-# BB#13:
-	movl	12(%esp), %ecx          # 4-byte Reload
-.LBB162_14:
-	movl	%ecx, 20(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_16
-# BB#15:
-	movl	16(%esp), %ecx          # 4-byte Reload
-.LBB162_16:
-	movl	%ecx, 24(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_18
-# BB#17:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB162_18:
-	movl	%ecx, 32(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_20
-# BB#19:
-	movl	60(%esp), %ecx          # 4-byte Reload
-.LBB162_20:
-	movl	%ecx, 36(%eax)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	jne	.LBB162_22
-# BB#21:
-	movl	%ebp, %ecx
-.LBB162_22:
-	movl	%ecx, 40(%eax)
-	addl	$1132, %esp             # imm = 0x46C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end162:
-	.size	mcl_fp_mont11L, .Lfunc_end162-mcl_fp_mont11L
-
-	.globl	mcl_fp_montNF11L
-	.align	16, 0x90
-	.type	mcl_fp_montNF11L,@function
-mcl_fp_montNF11L:                       # @mcl_fp_montNF11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1132, %esp             # imm = 0x46C
-	calll	.L163$pb
-.L163$pb:
-	popl	%ebx
-.Ltmp24:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp24-.L163$pb), %ebx
-	movl	1164(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1080(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	1080(%esp), %ebp
-	movl	1084(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	1124(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1120(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1116(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1112(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1108(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	1100(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	1096(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1092(%esp), %esi
-	movl	1088(%esp), %edi
-	movl	%eax, (%esp)
-	leal	1032(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	1032(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1040(%esp), %edi
-	adcl	1044(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	1048(%esp), %ebp
-	movl	28(%esp), %esi          # 4-byte Reload
-	adcl	1052(%esp), %esi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	984(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	1028(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	984(%esp), %ecx
-	adcl	988(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	996(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	1000(%esp), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	1004(%esp), %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	936(%esp), %ecx
-	movl	1164(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	addl	936(%esp), %ebp
-	adcl	940(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	956(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	960(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	980(%esp), %ebp
-	movl	1160(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	932(%esp), %eax
-	addl	888(%esp), %edi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	892(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	896(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	900(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	904(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	908(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	912(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	916(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	920(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	924(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	928(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%edi, %eax
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	840(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	840(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	860(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	872(%esp), %edi
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	876(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	884(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	792(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	836(%esp), %eax
-	movl	44(%esp), %edx          # 4-byte Reload
-	addl	792(%esp), %edx
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	796(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	800(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	804(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	808(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	812(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	816(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	820(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	824(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	828(%esp), %ebp
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	832(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	744(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	744(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	764(%esp), %esi
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	768(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	780(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	784(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	740(%esp), %edx
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	696(%esp), %eax
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	704(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	708(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	712(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	716(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	720(%esp), %edi
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	724(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	728(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	732(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	736(%esp), %esi
-	adcl	$0, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	648(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	648(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	656(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	672(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	688(%esp), %esi
-	movl	%esi, %edi
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	692(%esp), %esi
-	movl	1160(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	600(%esp), %ecx
-	movl	1156(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv352x32
-	movl	644(%esp), %eax
-	movl	28(%esp), %ecx          # 4-byte Reload
-	addl	600(%esp), %ecx
-	adcl	604(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	608(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	612(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	616(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	620(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	624(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	628(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	632(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	adcl	636(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	adcl	640(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	adcl	$0, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	552(%esp), %esi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	560(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	576(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	596(%esp), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	1160(%esp), %ecx
-	movl	%ecx, %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	548(%esp), %edx
-	movl	32(%esp), %eax          # 4-byte Reload
-	addl	504(%esp), %eax
-	adcl	508(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	512(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	516(%esp), %ebp
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	520(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	524(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	528(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	532(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	536(%esp), %edi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	540(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	544(%esp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	456(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	456(%esp), %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	468(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	480(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	488(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %edi          # 4-byte Reload
-	adcl	496(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	408(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	452(%esp), %edx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	408(%esp), %ecx
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	412(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	428(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	444(%esp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	448(%esp), %edi
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	360(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	360(%esp), %esi
-	adcl	364(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	372(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	392(%esp), %ebp
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	adcl	400(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	356(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	312(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	320(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	332(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	340(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	264(%esp), %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	276(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	adcl	284(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	292(%esp), %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	216(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	260(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	216(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	224(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	232(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	240(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebp          # 4-byte Reload
-	adcl	244(%esp), %ebp
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	168(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	176(%esp), %esi
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	180(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	196(%esp), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	204(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1160(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	1156(%esp), %edx
-	calll	.LmulPv352x32
-	movl	164(%esp), %edx
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	120(%esp), %ecx
-	adcl	124(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	adcl	128(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	136(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	152(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	156(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	72(%esp), %ecx
-	movl	1164(%esp), %edx
-	calll	.LmulPv352x32
-	addl	72(%esp), %edi
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	84(%esp), %edi
-	adcl	88(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	108(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	112(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	1164(%esp), %ebx
-	subl	(%ebx), %edx
-	movl	%ecx, %esi
-	sbbl	4(%ebx), %esi
-	movl	%edi, %ecx
-	sbbl	8(%ebx), %ecx
-	movl	44(%esp), %eax          # 4-byte Reload
-	sbbl	12(%ebx), %eax
-	movl	40(%esp), %ebp          # 4-byte Reload
-	sbbl	16(%ebx), %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	movl	28(%esp), %ebp          # 4-byte Reload
-	sbbl	20(%ebx), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%ebx), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	sbbl	28(%ebx), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	sbbl	32(%ebx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	sbbl	36(%ebx), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	sbbl	40(%ebx), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %ebx
-	sarl	$31, %ebx
-	testl	%ebx, %ebx
-	movl	68(%esp), %ebx          # 4-byte Reload
-	js	.LBB163_2
-# BB#1:
-	movl	%edx, %ebx
-.LBB163_2:
-	movl	1152(%esp), %edx
-	movl	%ebx, (%edx)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	js	.LBB163_4
-# BB#3:
-	movl	%esi, %ebp
-.LBB163_4:
-	movl	%ebp, 4(%edx)
-	js	.LBB163_6
-# BB#5:
-	movl	%ecx, %edi
-.LBB163_6:
-	movl	%edi, 8(%edx)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	js	.LBB163_8
-# BB#7:
-	movl	%eax, %ecx
-.LBB163_8:
-	movl	%ecx, 12(%edx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB163_10
-# BB#9:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB163_10:
-	movl	%eax, 16(%edx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	js	.LBB163_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB163_12:
-	movl	%eax, 20(%edx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	js	.LBB163_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB163_14:
-	movl	%eax, 24(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB163_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB163_16:
-	movl	%eax, 28(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB163_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB163_18:
-	movl	%eax, 32(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB163_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB163_20:
-	movl	%eax, 36(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB163_22
-# BB#21:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB163_22:
-	movl	%eax, 40(%edx)
-	addl	$1132, %esp             # imm = 0x46C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end163:
-	.size	mcl_fp_montNF11L, .Lfunc_end163-mcl_fp_montNF11L
-
-	.globl	mcl_fp_montRed11L
-	.align	16, 0x90
-	.type	mcl_fp_montRed11L,@function
-mcl_fp_montRed11L:                      # @mcl_fp_montRed11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$668, %esp              # imm = 0x29C
-	calll	.L164$pb
-.L164$pb:
-	popl	%eax
-.Ltmp25:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp25-.L164$pb), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	696(%esp), %edx
-	movl	-4(%edx), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	692(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	4(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	imull	%esi, %ebx
-	movl	84(%ecx), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	80(%ecx), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	76(%ecx), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	68(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	60(%ecx), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	56(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	48(%ecx), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	44(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	40(%ecx), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	32(%ecx), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	28(%ecx), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	24(%ecx), %ebp
-	movl	20(%ecx), %edi
-	movl	16(%ecx), %esi
-	movl	12(%ecx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	8(%ecx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	616(%esp), %ecx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	616(%esp), %eax
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	620(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	632(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	636(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	640(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	568(%esp), %esi
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	572(%esp), %edx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	600(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	520(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	520(%esp), %ebp
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	524(%esp), %ecx
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	548(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	124(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	movl	120(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	472(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	424(%esp), %ebp
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	428(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	464(%esp), %ebp
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	376(%esp), %esi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	380(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %esi         # 4-byte Reload
-	adcl	404(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	412(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	328(%esp), %edi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	332(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	344(%esp), %edi
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	352(%esp), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	356(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	696(%esp), %eax
-	movl	%eax, %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	280(%esp), %ebp
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	284(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	292(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %edi         # 4-byte Reload
-	adcl	296(%esp), %edi
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	304(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	232(%esp), %ebp
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	236(%esp), %ebp
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	244(%esp), %edi
-	movl	%edi, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	276(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	%ebp, %eax
-	imull	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	184(%esp), %ebp
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	188(%esp), %ecx
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	136(%esp), %ecx
-	movl	696(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv352x32
-	addl	136(%esp), %esi
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	%eax, %edi
-	movl	128(%esp), %edx         # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	148(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	152(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebx          # 4-byte Reload
-	adcl	180(%esp), %ebx
-	movl	%ebx, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	subl	12(%esp), %edi          # 4-byte Folded Reload
-	sbbl	4(%esp), %edx           # 4-byte Folded Reload
-	sbbl	8(%esp), %ecx           # 4-byte Folded Reload
-	sbbl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	124(%esp), %eax         # 4-byte Reload
-	sbbl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	sbbl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	sbbl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	sbbl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	%ebp, %ebx
-	sbbl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB164_2
-# BB#1:
-	movl	%esi, 112(%esp)         # 4-byte Spill
-.LBB164_2:
-	testb	%bl, %bl
-	movl	132(%esp), %esi         # 4-byte Reload
-	jne	.LBB164_4
-# BB#3:
-	movl	%edi, %esi
-.LBB164_4:
-	movl	688(%esp), %edi
-	movl	%esi, (%edi)
-	movl	104(%esp), %esi         # 4-byte Reload
-	jne	.LBB164_6
-# BB#5:
-	movl	%edx, 128(%esp)         # 4-byte Spill
-.LBB164_6:
-	movl	128(%esp), %edx         # 4-byte Reload
-	movl	%edx, 4(%edi)
-	movl	116(%esp), %edx         # 4-byte Reload
-	jne	.LBB164_8
-# BB#7:
-	movl	%ecx, %edx
-.LBB164_8:
-	movl	%edx, 8(%edi)
-	movl	112(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 12(%edi)
-	movl	92(%esp), %edx          # 4-byte Reload
-	movl	124(%esp), %ecx         # 4-byte Reload
-	jne	.LBB164_10
-# BB#9:
-	movl	64(%esp), %ecx          # 4-byte Reload
-.LBB164_10:
-	movl	%ecx, 16(%edi)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	movl	120(%esp), %eax         # 4-byte Reload
-	jne	.LBB164_12
-# BB#11:
-	movl	68(%esp), %eax          # 4-byte Reload
-.LBB164_12:
-	movl	%eax, 20(%edi)
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	108(%esp), %ebp         # 4-byte Reload
-	jne	.LBB164_14
-# BB#13:
-	movl	72(%esp), %ebp          # 4-byte Reload
-.LBB164_14:
-	movl	%ebp, 24(%edi)
-	jne	.LBB164_16
-# BB#15:
-	movl	76(%esp), %esi          # 4-byte Reload
-.LBB164_16:
-	movl	%esi, 28(%edi)
-	jne	.LBB164_18
-# BB#17:
-	movl	84(%esp), %edx          # 4-byte Reload
-.LBB164_18:
-	movl	%edx, 32(%edi)
-	jne	.LBB164_20
-# BB#19:
-	movl	88(%esp), %ecx          # 4-byte Reload
-.LBB164_20:
-	movl	%ecx, 36(%edi)
-	jne	.LBB164_22
-# BB#21:
-	movl	100(%esp), %eax         # 4-byte Reload
-.LBB164_22:
-	movl	%eax, 40(%edi)
-	addl	$668, %esp              # imm = 0x29C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end164:
-	.size	mcl_fp_montRed11L, .Lfunc_end164-mcl_fp_montRed11L
-
-	.globl	mcl_fp_addPre11L
-	.align	16, 0x90
-	.type	mcl_fp_addPre11L,@function
-mcl_fp_addPre11L:                       # @mcl_fp_addPre11L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ebx)
-	movl	32(%ecx), %esi
-	adcl	%edi, %esi
-	movl	36(%eax), %edi
-	movl	%edx, 28(%ebx)
-	movl	36(%ecx), %edx
-	adcl	%edi, %edx
-	movl	%esi, 32(%ebx)
-	movl	%edx, 36(%ebx)
-	movl	40(%eax), %eax
-	movl	40(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 40(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end165:
-	.size	mcl_fp_addPre11L, .Lfunc_end165-mcl_fp_addPre11L
-
-	.globl	mcl_fp_subPre11L
-	.align	16, 0x90
-	.type	mcl_fp_subPre11L,@function
-mcl_fp_subPre11L:                       # @mcl_fp_subPre11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ebp)
-	movl	32(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%ebp)
-	movl	36(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	%edi, 32(%ebp)
-	movl	%esi, 36(%ebp)
-	movl	40(%edx), %edx
-	movl	40(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 40(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end166:
-	.size	mcl_fp_subPre11L, .Lfunc_end166-mcl_fp_subPre11L
-
-	.globl	mcl_fp_shr1_11L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_11L,@function
-mcl_fp_shr1_11L:                        # @mcl_fp_shr1_11L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %ecx
-	movl	4(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	8(%esp), %esi
-	movl	%ecx, (%esi)
-	movl	8(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 4(%esi)
-	movl	12(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 8(%esi)
-	movl	16(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 12(%esi)
-	movl	20(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 16(%esi)
-	movl	24(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 20(%esi)
-	movl	28(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 24(%esi)
-	movl	32(%eax), %ecx
-	shrdl	$1, %ecx, %edx
-	movl	%edx, 28(%esi)
-	movl	36(%eax), %edx
-	shrdl	$1, %edx, %ecx
-	movl	%ecx, 32(%esi)
-	movl	40(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 36(%esi)
-	shrl	%eax
-	movl	%eax, 40(%esi)
-	popl	%esi
-	retl
-.Lfunc_end167:
-	.size	mcl_fp_shr1_11L, .Lfunc_end167-mcl_fp_shr1_11L
-
-	.globl	mcl_fp_add11L
-	.align	16, 0x90
-	.type	mcl_fp_add11L,@function
-mcl_fp_add11L:                          # @mcl_fp_add11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$32, %esp
-	movl	60(%esp), %edi
-	movl	(%edi), %ecx
-	movl	4(%edi), %eax
-	movl	56(%esp), %esi
-	addl	(%esi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%ecx, %ebp
-	adcl	4(%esi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	8(%edi), %eax
-	adcl	8(%esi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	movl	16(%esi), %ecx
-	adcl	12(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	adcl	16(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	adcl	20(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	adcl	24(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	28(%esi), %ebx
-	adcl	28(%edi), %ebx
-	movl	%ebx, (%esp)            # 4-byte Spill
-	movl	32(%esi), %ecx
-	adcl	32(%edi), %ecx
-	movl	36(%esi), %eax
-	adcl	36(%edi), %eax
-	movl	40(%esi), %edx
-	adcl	40(%edi), %edx
-	movl	52(%esp), %esi
-	movl	%ebp, (%esi)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%esi)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%esi)
-	movl	20(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%esi)
-	movl	16(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%esi)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%esi)
-	movl	8(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, 24(%esi)
-	movl	%ebx, 28(%esi)
-	movl	%ecx, 32(%esi)
-	movl	%eax, 36(%esi)
-	movl	%edx, 40(%esi)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	64(%esp), %ebp
-	movl	4(%esp), %edi           # 4-byte Reload
-	subl	(%ebp), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	28(%esp), %edi          # 4-byte Reload
-	sbbl	4(%ebp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %edi          # 4-byte Reload
-	sbbl	8(%ebp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %edi          # 4-byte Reload
-	sbbl	12(%ebp), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebp), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %edi           # 4-byte Reload
-	sbbl	24(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	(%esp), %edi            # 4-byte Reload
-	sbbl	28(%ebp), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	sbbl	32(%ebp), %ecx
-	sbbl	36(%ebp), %eax
-	sbbl	40(%ebp), %edx
-	movl	%edx, %edi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB168_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, (%esi)
-	movl	28(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%esi)
-	movl	24(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%esi)
-	movl	20(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 12(%esi)
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 16(%esi)
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 20(%esi)
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 24(%esi)
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	%edx, 28(%esi)
-	movl	%ecx, 32(%esi)
-	movl	%eax, 36(%esi)
-	movl	%edi, 40(%esi)
-.LBB168_2:                              # %carry
-	addl	$32, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end168:
-	.size	mcl_fp_add11L, .Lfunc_end168-mcl_fp_add11L
-
-	.globl	mcl_fp_addNF11L
-	.align	16, 0x90
-	.type	mcl_fp_addNF11L,@function
-mcl_fp_addNF11L:                        # @mcl_fp_addNF11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$80, %esp
-	movl	108(%esp), %edx
-	movl	(%edx), %eax
-	movl	4(%edx), %ecx
-	movl	104(%esp), %esi
-	addl	(%esi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	4(%esi), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%edx), %ebx
-	movl	36(%edx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	28(%edx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	24(%edx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	20(%edx), %ebp
-	movl	16(%edx), %edi
-	movl	12(%edx), %eax
-	movl	8(%edx), %ecx
-	adcl	8(%esi), %ecx
-	adcl	12(%esi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	24(%esi), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	28(%esi), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	32(%esi), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	36(%esi), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %edx
-	adcl	40(%esi), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebx
-	movl	52(%esp), %esi          # 4-byte Reload
-	subl	(%ebx), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	sbbl	4(%ebx), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	%edx, %ecx
-	sbbl	8(%ebx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	sbbl	12(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	sbbl	16(%ebx), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	sbbl	20(%ebx), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	sbbl	24(%ebx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	movl	%edi, %ecx
-	movl	%edi, %ebp
-	sbbl	36(%ebx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %edi
-	sbbl	40(%ebx), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	%edi, %ebx
-	movl	52(%esp), %edi          # 4-byte Reload
-	sarl	$31, %ebx
-	testl	%ebx, %ebx
-	js	.LBB169_2
-# BB#1:
-	movl	%esi, %edi
-.LBB169_2:
-	movl	100(%esp), %esi
-	movl	%edi, (%esi)
-	movl	60(%esp), %edi          # 4-byte Reload
-	js	.LBB169_4
-# BB#3:
-	movl	(%esp), %edi            # 4-byte Reload
-.LBB169_4:
-	movl	%edi, 4(%esi)
-	movl	%eax, %edi
-	js	.LBB169_6
-# BB#5:
-	movl	4(%esp), %edx           # 4-byte Reload
-.LBB169_6:
-	movl	%edx, 8(%esi)
-	movl	%ebp, %ecx
-	movl	72(%esp), %edx          # 4-byte Reload
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB169_8
-# BB#7:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB169_8:
-	movl	%eax, 12(%esi)
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	44(%esp), %ebp          # 4-byte Reload
-	js	.LBB169_10
-# BB#9:
-	movl	12(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-.LBB169_10:
-	movl	48(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 16(%esi)
-	js	.LBB169_12
-# BB#11:
-	movl	16(%esp), %ebp          # 4-byte Reload
-.LBB169_12:
-	movl	%ebp, 20(%esi)
-	js	.LBB169_14
-# BB#13:
-	movl	20(%esp), %edi          # 4-byte Reload
-.LBB169_14:
-	movl	%edi, 24(%esi)
-	js	.LBB169_16
-# BB#15:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB169_16:
-	movl	%eax, 28(%esi)
-	js	.LBB169_18
-# BB#17:
-	movl	28(%esp), %edx          # 4-byte Reload
-.LBB169_18:
-	movl	%edx, 32(%esi)
-	js	.LBB169_20
-# BB#19:
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB169_20:
-	movl	%ecx, 36(%esi)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB169_22
-# BB#21:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB169_22:
-	movl	%eax, 40(%esi)
-	addl	$80, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end169:
-	.size	mcl_fp_addNF11L, .Lfunc_end169-mcl_fp_addNF11L
-
-	.globl	mcl_fp_sub11L
-	.align	16, 0x90
-	.type	mcl_fp_sub11L,@function
-mcl_fp_sub11L:                          # @mcl_fp_sub11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$40, %esp
-	movl	64(%esp), %ebp
-	movl	(%ebp), %ecx
-	movl	4(%ebp), %eax
-	movl	68(%esp), %edi
-	subl	(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	8(%ebp), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	12(%ebp), %ebx
-	sbbl	12(%edi), %ebx
-	movl	16(%ebp), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	20(%ebp), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	24(%ebp), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	28(%ebp), %edx
-	sbbl	28(%edi), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	movl	32(%ebp), %ecx
-	sbbl	32(%edi), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	36(%ebp), %eax
-	sbbl	36(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	40(%ebp), %eax
-	sbbl	40(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %ebp
-	movl	16(%esp), %esi          # 4-byte Reload
-	movl	$0, %ebx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	60(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	%ebp, 12(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	%edx, 28(%ebx)
-	movl	%ecx, 32(%ebx)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%ebx)
-	movl	%ecx, %edi
-	movl	%eax, 40(%ebx)
-	je	.LBB170_2
-# BB#1:                                 # %carry
-	movl	72(%esp), %eax
-	addl	(%eax), %esi
-	movl	%esi, (%ebx)
-	movl	28(%esp), %edx          # 4-byte Reload
-	movl	%eax, %esi
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esi), %ecx
-	movl	12(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%ecx, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	%ecx, 32(%ebx)
-	movl	36(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 36(%ebx)
-	movl	40(%esi), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, 40(%ebx)
-.LBB170_2:                              # %nocarry
-	addl	$40, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end170:
-	.size	mcl_fp_sub11L, .Lfunc_end170-mcl_fp_sub11L
-
-	.globl	mcl_fp_subNF11L
-	.align	16, 0x90
-	.type	mcl_fp_subNF11L,@function
-mcl_fp_subNF11L:                        # @mcl_fp_subNF11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$64, %esp
-	movl	88(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ecx
-	movl	92(%esp), %edi
-	subl	(%edi), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%eax), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	36(%eax), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	28(%eax), %ebx
-	movl	24(%eax), %ebp
-	movl	20(%eax), %esi
-	movl	16(%eax), %edx
-	movl	12(%eax), %ecx
-	movl	8(%eax), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	sbbl	28(%edi), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	sbbl	40(%edi), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	sarl	$31, %esi
-	movl	%esi, %eax
-	shldl	$1, %edx, %eax
-	movl	96(%esp), %edx
-	movl	4(%edx), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, %ebx
-	andl	(%edx), %eax
-	movl	40(%edx), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	28(%edx), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	24(%edx), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	20(%edx), %ebp
-	andl	%esi, %ebp
-	roll	%esi
-	movl	16(%edx), %edi
-	andl	%esi, %edi
-	movl	12(%edx), %ecx
-	andl	%esi, %ecx
-	andl	8(%edx), %esi
-	addl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, %edx
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	84(%esp), %ebx
-	movl	%eax, (%ebx)
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 4(%ebx)
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%esi, 8(%ebx)
-	adcl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%ecx, 12(%ebx)
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edi, 16(%ebx)
-	movl	(%esp), %ecx            # 4-byte Reload
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebp, 20(%ebx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	%eax, 36(%ebx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%ebx)
-	addl	$64, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end171:
-	.size	mcl_fp_subNF11L, .Lfunc_end171-mcl_fp_subNF11L
-
-	.globl	mcl_fpDbl_add11L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add11L,@function
-mcl_fpDbl_add11L:                       # @mcl_fpDbl_add11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$80, %esp
-	movl	108(%esp), %ecx
-	movl	104(%esp), %edi
-	movl	12(%edi), %esi
-	movl	16(%edi), %edx
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%edi), %ebp
-	movl	100(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%edi), %ebp
-	adcl	8(%edi), %ebx
-	adcl	12(%ecx), %esi
-	adcl	16(%ecx), %edx
-	movl	%ebp, 4(%eax)
-	movl	52(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%esi, 12(%eax)
-	movl	20(%edi), %esi
-	adcl	%ebx, %esi
-	movl	24(%ecx), %ebx
-	movl	%edx, 16(%eax)
-	movl	24(%edi), %edx
-	adcl	%ebx, %edx
-	movl	28(%ecx), %ebx
-	movl	%esi, 20(%eax)
-	movl	28(%edi), %esi
-	adcl	%ebx, %esi
-	movl	32(%ecx), %ebx
-	movl	%edx, 24(%eax)
-	movl	32(%edi), %edx
-	adcl	%ebx, %edx
-	movl	36(%ecx), %ebx
-	movl	%esi, 28(%eax)
-	movl	36(%edi), %esi
-	adcl	%ebx, %esi
-	movl	40(%ecx), %ebx
-	movl	%edx, 32(%eax)
-	movl	40(%edi), %edx
-	adcl	%ebx, %edx
-	movl	44(%ecx), %ebx
-	movl	%esi, 36(%eax)
-	movl	44(%edi), %esi
-	adcl	%ebx, %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	48(%ecx), %esi
-	movl	%edx, 40(%eax)
-	movl	48(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	52(%edi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	56(%ecx), %edx
-	movl	56(%edi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%ecx), %edx
-	movl	60(%edi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%ecx), %edx
-	movl	64(%edi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%ecx), %eax
-	movl	68(%edi), %edx
-	adcl	%eax, %edx
-	movl	72(%ecx), %esi
-	movl	72(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	76(%ecx), %ebx
-	movl	76(%edi), %esi
-	adcl	%ebx, %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	80(%ecx), %ebp
-	movl	80(%edi), %ebx
-	adcl	%ebp, %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	84(%ecx), %ecx
-	movl	84(%edi), %edi
-	adcl	%ecx, %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	112(%esp), %ebp
-	movl	68(%esp), %edi          # 4-byte Reload
-	subl	(%ebp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	sbbl	4(%ebp), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	sbbl	8(%ebp), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	sbbl	12(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ebp), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%edx, %edi
-	sbbl	24(%ebp), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	sbbl	28(%ebp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	32(%ebp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	40(%esp), %ebx          # 4-byte Reload
-	sbbl	36(%ebp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %edi
-	sbbl	40(%ebp), %edi
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB172_2
-# BB#1:
-	movl	%edi, %ebx
-.LBB172_2:
-	testb	%cl, %cl
-	movl	68(%esp), %ecx          # 4-byte Reload
-	movl	64(%esp), %esi          # 4-byte Reload
-	movl	60(%esp), %edi          # 4-byte Reload
-	movl	56(%esp), %ebp          # 4-byte Reload
-	jne	.LBB172_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB172_4:
-	movl	100(%esp), %eax
-	movl	%ecx, 44(%eax)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	%ebp, 56(%eax)
-	movl	%edi, 60(%eax)
-	movl	%esi, 64(%eax)
-	movl	%edx, 68(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	44(%esp), %edx          # 4-byte Reload
-	jne	.LBB172_6
-# BB#5:
-	movl	28(%esp), %edx          # 4-byte Reload
-.LBB172_6:
-	movl	%edx, 72(%eax)
-	movl	48(%esp), %edx          # 4-byte Reload
-	jne	.LBB172_8
-# BB#7:
-	movl	32(%esp), %edx          # 4-byte Reload
-.LBB172_8:
-	movl	%edx, 76(%eax)
-	jne	.LBB172_10
-# BB#9:
-	movl	36(%esp), %ecx          # 4-byte Reload
-.LBB172_10:
-	movl	%ecx, 80(%eax)
-	movl	%ebx, 84(%eax)
-	addl	$80, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end172:
-	.size	mcl_fpDbl_add11L, .Lfunc_end172-mcl_fpDbl_add11L
-
-	.globl	mcl_fpDbl_sub11L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub11L,@function
-mcl_fpDbl_sub11L:                       # @mcl_fpDbl_sub11L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$72, %esp
-	movl	96(%esp), %edx
-	movl	(%edx), %eax
-	movl	4(%edx), %esi
-	movl	100(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %esi
-	movl	8(%edx), %edi
-	sbbl	8(%ebp), %edi
-	movl	92(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%edx), %eax
-	sbbl	12(%ebp), %eax
-	movl	%esi, 4(%ecx)
-	movl	16(%edx), %esi
-	sbbl	16(%ebp), %esi
-	movl	%edi, 8(%ecx)
-	movl	20(%ebp), %edi
-	movl	%eax, 12(%ecx)
-	movl	20(%edx), %eax
-	sbbl	%edi, %eax
-	movl	24(%ebp), %edi
-	movl	%esi, 16(%ecx)
-	movl	24(%edx), %esi
-	sbbl	%edi, %esi
-	movl	28(%ebp), %edi
-	movl	%eax, 20(%ecx)
-	movl	28(%edx), %eax
-	sbbl	%edi, %eax
-	movl	32(%ebp), %edi
-	movl	%esi, 24(%ecx)
-	movl	32(%edx), %esi
-	sbbl	%edi, %esi
-	movl	36(%ebp), %edi
-	movl	%eax, 28(%ecx)
-	movl	36(%edx), %eax
-	sbbl	%edi, %eax
-	movl	40(%ebp), %edi
-	movl	%esi, 32(%ecx)
-	movl	40(%edx), %esi
-	sbbl	%edi, %esi
-	movl	44(%ebp), %edi
-	movl	%eax, 36(%ecx)
-	movl	44(%edx), %eax
-	sbbl	%edi, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	48(%ebp), %eax
-	movl	%esi, 40(%ecx)
-	movl	48(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	52(%ebp), %eax
-	movl	52(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	56(%ebp), %eax
-	movl	56(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	60(%ebp), %eax
-	movl	60(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	64(%ebp), %eax
-	movl	64(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	68(%ebp), %eax
-	movl	68(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	72(%ebp), %eax
-	movl	72(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	76(%ebp), %eax
-	movl	76(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	80(%ebp), %eax
-	movl	80(%edx), %esi
-	sbbl	%eax, %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	84(%ebp), %eax
-	movl	84(%edx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	104(%esp), %ebp
-	jne	.LBB173_1
-# BB#2:
-	movl	$0, 28(%esp)            # 4-byte Folded Spill
-	jmp	.LBB173_3
-.LBB173_1:
-	movl	40(%ebp), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-.LBB173_3:
-	testb	%al, %al
-	jne	.LBB173_4
-# BB#5:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB173_6
-.LBB173_4:
-	movl	(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	4(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB173_6:
-	jne	.LBB173_7
-# BB#8:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB173_9
-.LBB173_7:
-	movl	36(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB173_9:
-	jne	.LBB173_10
-# BB#11:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB173_12
-.LBB173_10:
-	movl	32(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB173_12:
-	jne	.LBB173_13
-# BB#14:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB173_15
-.LBB173_13:
-	movl	28(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB173_15:
-	jne	.LBB173_16
-# BB#17:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB173_18
-.LBB173_16:
-	movl	24(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB173_18:
-	jne	.LBB173_19
-# BB#20:
-	movl	$0, %edx
-	jmp	.LBB173_21
-.LBB173_19:
-	movl	20(%ebp), %edx
-.LBB173_21:
-	jne	.LBB173_22
-# BB#23:
-	movl	$0, %edi
-	jmp	.LBB173_24
-.LBB173_22:
-	movl	16(%ebp), %edi
-.LBB173_24:
-	jne	.LBB173_25
-# BB#26:
-	movl	$0, %ebx
-	jmp	.LBB173_27
-.LBB173_25:
-	movl	12(%ebp), %ebx
-.LBB173_27:
-	jne	.LBB173_28
-# BB#29:
-	xorl	%ebp, %ebp
-	jmp	.LBB173_30
-.LBB173_28:
-	movl	8(%ebp), %ebp
-.LBB173_30:
-	movl	8(%esp), %esi           # 4-byte Reload
-	addl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 44(%ecx)
-	adcl	32(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 48(%ecx)
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 52(%ecx)
-	adcl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 56(%ecx)
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, 60(%ecx)
-	movl	(%esp), %esi            # 4-byte Reload
-	adcl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 64(%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 68(%ecx)
-	movl	12(%esp), %edx          # 4-byte Reload
-	adcl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 72(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 76(%ecx)
-	movl	%eax, 80(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%ecx)
-	addl	$72, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end173:
-	.size	mcl_fpDbl_sub11L, .Lfunc_end173-mcl_fpDbl_sub11L
-
-	.align	16, 0x90
-	.type	.LmulPv384x32,@function
-.LmulPv384x32:                          # @mulPv384x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$80, %esp
-	movl	%edx, %ebx
-	movl	100(%esp), %ebp
-	movl	%ebp, %eax
-	mull	44(%ebx)
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	40(%ebx)
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	36(%ebx)
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	32(%ebx)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	28(%ebx)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	24(%ebx)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	20(%ebx)
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	16(%ebx)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	12(%ebx)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebp, %eax
-	mull	8(%ebx)
-	movl	%edx, %edi
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebp, %eax
-	mull	4(%ebx)
-	movl	%edx, %esi
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebp, %eax
-	mull	(%ebx)
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 8(%ecx)
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 12(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%ecx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 48(%ecx)
-	movl	%ecx, %eax
-	addl	$80, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end174:
-	.size	.LmulPv384x32, .Lfunc_end174-.LmulPv384x32
-
-	.globl	mcl_fp_mulUnitPre12L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre12L,@function
-mcl_fp_mulUnitPre12L:                   # @mcl_fp_mulUnitPre12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$92, %esp
-	calll	.L175$pb
-.L175$pb:
-	popl	%ebx
-.Ltmp26:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp26-.L175$pb), %ebx
-	movl	120(%esp), %eax
-	movl	%eax, (%esp)
-	leal	40(%esp), %ecx
-	movl	116(%esp), %edx
-	calll	.LmulPv384x32
-	movl	88(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp
-	movl	56(%esp), %ebx
-	movl	52(%esp), %edi
-	movl	48(%esp), %esi
-	movl	40(%esp), %edx
-	movl	44(%esp), %ecx
-	movl	112(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	addl	$92, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end175:
-	.size	mcl_fp_mulUnitPre12L, .Lfunc_end175-mcl_fp_mulUnitPre12L
-
-	.globl	mcl_fpDbl_mulPre12L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre12L,@function
-mcl_fpDbl_mulPre12L:                    # @mcl_fpDbl_mulPre12L
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$220, %esp
-	calll	.L176$pb
-.L176$pb:
-	popl	%ebx
-.Ltmp27:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp27-.L176$pb), %ebx
-	movl	%ebx, -164(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	%esi, 8(%esp)
-	movl	12(%ebp), %edi
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre6L@PLT
-	leal	24(%esi), %eax
-	movl	%eax, 8(%esp)
-	leal	24(%edi), %eax
-	movl	%eax, 4(%esp)
-	movl	8(%ebp), %eax
-	leal	48(%eax), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre6L@PLT
-	movl	40(%edi), %ebx
-	movl	36(%edi), %eax
-	movl	32(%edi), %edx
-	movl	(%edi), %esi
-	movl	4(%edi), %ecx
-	addl	24(%edi), %esi
-	adcl	28(%edi), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	adcl	8(%edi), %edx
-	movl	%edx, -188(%ebp)        # 4-byte Spill
-	adcl	12(%edi), %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	adcl	16(%edi), %ebx
-	movl	%ebx, -180(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -112(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %edi
-	movl	(%edi), %eax
-	addl	24(%edi), %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	movl	4(%edi), %eax
-	adcl	28(%edi), %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-	movl	32(%edi), %eax
-	adcl	8(%edi), %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	movl	36(%edi), %eax
-	adcl	12(%edi), %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	movl	40(%edi), %ecx
-	adcl	16(%edi), %ecx
-	movl	44(%edi), %eax
-	adcl	20(%edi), %eax
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	popl	%eax
-	movl	%edi, -184(%ebp)        # 4-byte Spill
-	movl	%ebx, %edi
-	movl	%edx, -156(%ebp)        # 4-byte Spill
-	movl	%esi, -160(%ebp)        # 4-byte Spill
-	movl	%esi, %edx
-	jb	.LBB176_2
-# BB#1:
-	xorl	%edi, %edi
-	movl	$0, -156(%ebp)          # 4-byte Folded Spill
-	movl	$0, -160(%ebp)          # 4-byte Folded Spill
-.LBB176_2:
-	movl	%edi, -176(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %esi
-	movl	44(%esi), %edi
-	movl	-112(%ebp), %ebx        # 4-byte Reload
-	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	20(%esi), %edi
-	movl	%edi, -132(%ebp)        # 4-byte Spill
-	movl	%eax, -124(%ebp)        # 4-byte Spill
-	movl	%ecx, -112(%ebp)        # 4-byte Spill
-	movl	-148(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -116(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -120(%ebp)        # 4-byte Spill
-	movl	-140(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -128(%ebp)        # 4-byte Spill
-	movl	-136(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -152(%ebp)        # 4-byte Spill
-	jb	.LBB176_4
-# BB#3:
-	movl	$0, -124(%ebp)          # 4-byte Folded Spill
-	movl	$0, -112(%ebp)          # 4-byte Folded Spill
-	movl	$0, -116(%ebp)          # 4-byte Folded Spill
-	movl	$0, -120(%ebp)          # 4-byte Folded Spill
-	movl	$0, -128(%ebp)          # 4-byte Folded Spill
-	movl	$0, -152(%ebp)          # 4-byte Folded Spill
-.LBB176_4:
-	movl	%edx, -84(%ebp)
-	movl	-172(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -80(%ebp)
-	movl	-188(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -76(%ebp)
-	movl	-168(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -72(%ebp)
-	movl	-180(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -68(%ebp)
-	movl	-136(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -108(%ebp)
-	movl	-140(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -104(%ebp)
-	movl	-144(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -100(%ebp)
-	movl	-148(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -96(%ebp)
-	movl	%ecx, -92(%ebp)
-	movl	%eax, -88(%ebp)
-	movl	%edi, %ebx
-	sbbl	%edx, %edx
-	movl	-132(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -64(%ebp)
-	movl	-184(%ebp), %ecx        # 4-byte Reload
-	pushl	%eax
-	movl	%ecx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB176_6
-# BB#5:
-	movl	$0, %eax
-	movl	$0, %ebx
-	movl	$0, %esi
-.LBB176_6:
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-	sbbl	%eax, %eax
-	leal	-108(%ebp), %ecx
-	movl	%ecx, 8(%esp)
-	leal	-84(%ebp), %ecx
-	movl	%ecx, 4(%esp)
-	leal	-60(%ebp), %ecx
-	movl	%ecx, (%esp)
-	andl	%eax, %edx
-	movl	-152(%ebp), %edi        # 4-byte Reload
-	addl	-160(%ebp), %edi        # 4-byte Folded Reload
-	adcl	%esi, -128(%ebp)        # 4-byte Folded Spill
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -120(%ebp)        # 4-byte Folded Spill
-	adcl	%ebx, -116(%ebp)        # 4-byte Folded Spill
-	movl	-176(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -112(%ebp)        # 4-byte Folded Spill
-	movl	-132(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -124(%ebp)        # 4-byte Folded Spill
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	andl	$1, %edx
-	movl	%edx, -132(%ebp)        # 4-byte Spill
-	movl	-164(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre6L@PLT
-	addl	-36(%ebp), %edi
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	adcl	-32(%ebp), %eax
-	movl	%eax, -128(%ebp)        # 4-byte Spill
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -120(%ebp)        # 4-byte Spill
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -116(%ebp)        # 4-byte Spill
-	movl	-112(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -112(%ebp)        # 4-byte Spill
-	movl	-124(%ebp), %eax        # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	movl	%eax, -124(%ebp)        # 4-byte Spill
-	adcl	%esi, -132(%ebp)        # 4-byte Folded Spill
-	movl	-60(%ebp), %ecx
-	movl	8(%ebp), %eax
-	subl	(%eax), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	movl	-56(%ebp), %esi
-	sbbl	4(%eax), %esi
-	movl	-52(%ebp), %ecx
-	sbbl	8(%eax), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	movl	-48(%ebp), %edx
-	sbbl	12(%eax), %edx
-	movl	-44(%ebp), %ebx
-	sbbl	16(%eax), %ebx
-	movl	-40(%ebp), %ecx
-	sbbl	20(%eax), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, -148(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	28(%eax), %ecx
-	movl	%ecx, -152(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -128(%ebp)        # 4-byte Folded Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, -156(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -120(%ebp)        # 4-byte Folded Spill
-	movl	36(%eax), %ecx
-	movl	%ecx, -160(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -116(%ebp)        # 4-byte Folded Spill
-	movl	40(%eax), %ecx
-	movl	%ecx, -164(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -112(%ebp)        # 4-byte Folded Spill
-	movl	44(%eax), %ecx
-	movl	%ecx, -168(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -124(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, -132(%ebp)          # 4-byte Folded Spill
-	movl	48(%eax), %ecx
-	movl	%ecx, -192(%ebp)        # 4-byte Spill
-	subl	%ecx, -144(%ebp)        # 4-byte Folded Spill
-	movl	52(%eax), %ecx
-	movl	%ecx, -196(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %esi
-	movl	56(%eax), %ecx
-	movl	%ecx, -200(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -136(%ebp)        # 4-byte Folded Spill
-	movl	60(%eax), %ecx
-	movl	%ecx, -204(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edx
-	movl	64(%eax), %ecx
-	movl	%ecx, -208(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
-	movl	68(%eax), %ecx
-	movl	%ecx, -212(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -140(%ebp)        # 4-byte Folded Spill
-	movl	72(%eax), %ecx
-	movl	%ecx, -216(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	76(%eax), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -128(%ebp)        # 4-byte Folded Spill
-	movl	80(%eax), %ecx
-	movl	%ecx, -176(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -120(%ebp)        # 4-byte Folded Spill
-	movl	84(%eax), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -116(%ebp)        # 4-byte Folded Spill
-	movl	88(%eax), %ecx
-	movl	%ecx, -184(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -112(%ebp)        # 4-byte Folded Spill
-	movl	92(%eax), %ecx
-	movl	%ecx, -188(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -124(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, -132(%ebp)          # 4-byte Folded Spill
-	movl	-144(%ebp), %ecx        # 4-byte Reload
-	addl	-148(%ebp), %ecx        # 4-byte Folded Reload
-	adcl	-152(%ebp), %esi        # 4-byte Folded Reload
-	movl	%ecx, 24(%eax)
-	movl	-136(%ebp), %ecx        # 4-byte Reload
-	adcl	-156(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%esi, 28(%eax)
-	adcl	-160(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 32(%eax)
-	adcl	-164(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%edx, 36(%eax)
-	movl	-140(%ebp), %ecx        # 4-byte Reload
-	adcl	-168(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%ebx, 40(%eax)
-	adcl	-192(%ebp), %edi        # 4-byte Folded Reload
-	movl	%ecx, 44(%eax)
-	movl	-128(%ebp), %ecx        # 4-byte Reload
-	adcl	-196(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edi, 48(%eax)
-	movl	-120(%ebp), %edx        # 4-byte Reload
-	adcl	-200(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 52(%eax)
-	movl	-116(%ebp), %ecx        # 4-byte Reload
-	adcl	-204(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 56(%eax)
-	movl	-112(%ebp), %edx        # 4-byte Reload
-	adcl	-208(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 60(%eax)
-	movl	-124(%ebp), %ecx        # 4-byte Reload
-	adcl	-212(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 64(%eax)
-	movl	-132(%ebp), %edx        # 4-byte Reload
-	adcl	-216(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	movl	%edx, 72(%eax)
-	movl	-172(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 76(%eax)
-	movl	-176(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 80(%eax)
-	movl	-180(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 84(%eax)
-	movl	-184(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 88(%eax)
-	movl	-188(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 92(%eax)
-	addl	$220, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end176:
-	.size	mcl_fpDbl_mulPre12L, .Lfunc_end176-mcl_fpDbl_mulPre12L
-
-	.globl	mcl_fpDbl_sqrPre12L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre12L,@function
-mcl_fpDbl_sqrPre12L:                    # @mcl_fpDbl_sqrPre12L
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$220, %esp
-	calll	.L177$pb
-.L177$pb:
-	popl	%ebx
-.Ltmp28:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp28-.L177$pb), %ebx
-	movl	%ebx, -152(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %esi
-	movl	%esi, (%esp)
-	calll	mcl_fpDbl_mulPre6L@PLT
-	leal	24(%edi), %eax
-	movl	%eax, 8(%esp)
-	movl	%eax, 4(%esp)
-	leal	48(%esi), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre6L@PLT
-	movl	44(%edi), %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	movl	40(%edi), %edx
-	movl	36(%edi), %eax
-	movl	(%edi), %ebx
-	movl	4(%edi), %esi
-	addl	24(%edi), %ebx
-	adcl	28(%edi), %esi
-	movl	32(%edi), %ecx
-	adcl	8(%edi), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	adcl	12(%edi), %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-	adcl	16(%edi), %edx
-	movl	%edx, %ecx
-	movl	-136(%ebp), %eax        # 4-byte Reload
-	adcl	20(%edi), %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %edx
-	movl	%edx, -156(%ebp)        # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %edx
-	popl	%eax
-	movl	%edx, -124(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -120(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %edx
-	sbbl	%edi, %edi
-	movl	%edi, -148(%ebp)        # 4-byte Spill
-	movl	%ebx, %edi
-	addl	%edi, %edi
-	movl	%edi, -112(%ebp)        # 4-byte Spill
-	movl	%esi, %edi
-	movl	%esi, %eax
-	adcl	%edi, %edi
-	movl	%edi, -132(%ebp)        # 4-byte Spill
-	pushl	%eax
-	movl	%edx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB177_2
-# BB#1:
-	movl	$0, -132(%ebp)          # 4-byte Folded Spill
-	movl	$0, -112(%ebp)          # 4-byte Folded Spill
-.LBB177_2:
-	movl	-144(%ebp), %esi        # 4-byte Reload
-	addl	%esi, %esi
-	movl	-140(%ebp), %edx        # 4-byte Reload
-	adcl	%edx, %edx
-	movl	%edx, -116(%ebp)        # 4-byte Spill
-	movl	-120(%ebp), %edx        # 4-byte Reload
-	pushl	%eax
-	movl	%edx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB177_3
-# BB#4:
-	movl	$0, -116(%ebp)          # 4-byte Folded Spill
-	movl	$0, -120(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB177_5
-.LBB177_3:
-	movl	%eax, %edx
-	shrl	$31, %edx
-	orl	%esi, %edx
-	movl	%edx, -120(%ebp)        # 4-byte Spill
-.LBB177_5:
-	movl	-136(%ebp), %edx        # 4-byte Reload
-	movl	%ecx, %esi
-	addl	%esi, %esi
-	adcl	%edx, %edx
-	movl	-124(%ebp), %edi        # 4-byte Reload
-	pushl	%eax
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB177_6
-# BB#7:
-	xorl	%edx, %edx
-	movl	$0, -128(%ebp)          # 4-byte Folded Spill
-	movl	-140(%ebp), %edi        # 4-byte Reload
-	jmp	.LBB177_8
-.LBB177_6:
-	movl	%ecx, -124(%ebp)        # 4-byte Spill
-	movl	-140(%ebp), %edi        # 4-byte Reload
-	movl	%edi, %ecx
-	shrl	$31, %ecx
-	orl	%esi, %ecx
-	movl	%ecx, -128(%ebp)        # 4-byte Spill
-	movl	-124(%ebp), %ecx        # 4-byte Reload
-.LBB177_8:
-	movl	%edx, -124(%ebp)        # 4-byte Spill
-	movl	%ebx, -84(%ebp)
-	movl	%eax, -80(%ebp)
-	movl	-144(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -76(%ebp)
-	movl	%edi, -72(%ebp)
-	movl	%ecx, -68(%ebp)
-	movl	-136(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -64(%ebp)
-	movl	%ebx, -108(%ebp)
-	movl	%eax, -104(%ebp)
-	movl	%esi, -100(%ebp)
-	movl	%edi, -96(%ebp)
-	movl	%ecx, -92(%ebp)
-	movl	%edx, -88(%ebp)
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB177_9
-# BB#10:
-	movl	$0, -136(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB177_11
-.LBB177_9:
-	shrl	$31, %edx
-	movl	%edx, -136(%ebp)        # 4-byte Spill
-.LBB177_11:
-	leal	-108(%ebp), %eax
-	movl	%eax, 8(%esp)
-	leal	-84(%ebp), %eax
-	movl	%eax, 4(%esp)
-	leal	-60(%ebp), %eax
-	movl	%eax, (%esp)
-	movl	-148(%ebp), %esi        # 4-byte Reload
-	andl	$1, %esi
-	movl	-152(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre6L@PLT
-	movl	-112(%ebp), %eax        # 4-byte Reload
-	addl	-36(%ebp), %eax
-	movl	%eax, -112(%ebp)        # 4-byte Spill
-	movl	-132(%ebp), %edi        # 4-byte Reload
-	adcl	-32(%ebp), %edi
-	movl	-120(%ebp), %eax        # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -120(%ebp)        # 4-byte Spill
-	movl	-116(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -116(%ebp)        # 4-byte Spill
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -128(%ebp)        # 4-byte Spill
-	movl	-124(%ebp), %eax        # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	movl	%eax, -124(%ebp)        # 4-byte Spill
-	adcl	-136(%ebp), %esi        # 4-byte Folded Reload
-	movl	-60(%ebp), %edx
-	movl	8(%ebp), %eax
-	subl	(%eax), %edx
-	movl	-56(%ebp), %ebx
-	sbbl	4(%eax), %ebx
-	movl	-52(%ebp), %ecx
-	sbbl	8(%eax), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	movl	-48(%ebp), %ecx
-	sbbl	12(%eax), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	movl	-44(%ebp), %ecx
-	sbbl	16(%eax), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	movl	-40(%ebp), %ecx
-	sbbl	20(%eax), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, -148(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -112(%ebp)        # 4-byte Folded Spill
-	movl	28(%eax), %ecx
-	movl	%ecx, -152(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	%edi, -132(%ebp)        # 4-byte Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, -156(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -120(%ebp)        # 4-byte Folded Spill
-	movl	36(%eax), %ecx
-	movl	%ecx, -160(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -116(%ebp)        # 4-byte Folded Spill
-	movl	40(%eax), %ecx
-	movl	%ecx, -164(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -128(%ebp)        # 4-byte Folded Spill
-	movl	44(%eax), %ecx
-	movl	%ecx, -168(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -124(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %esi
-	movl	48(%eax), %ecx
-	movl	%ecx, -192(%ebp)        # 4-byte Spill
-	subl	%ecx, %edx
-	movl	52(%eax), %ecx
-	movl	%ecx, -196(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
-	movl	56(%eax), %ecx
-	movl	%ecx, -200(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -136(%ebp)        # 4-byte Folded Spill
-	movl	60(%eax), %ecx
-	movl	%ecx, -204(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -144(%ebp)        # 4-byte Folded Spill
-	movl	64(%eax), %ecx
-	movl	%ecx, -208(%ebp)        # 4-byte Spill
-	movl	-172(%ebp), %edi        # 4-byte Reload
-	sbbl	%ecx, %edi
-	movl	68(%eax), %ecx
-	movl	%ecx, -212(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -140(%ebp)        # 4-byte Folded Spill
-	movl	72(%eax), %ecx
-	movl	%ecx, -216(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -112(%ebp)        # 4-byte Folded Spill
-	movl	76(%eax), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -132(%ebp)        # 4-byte Folded Spill
-	movl	80(%eax), %ecx
-	movl	%ecx, -176(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -120(%ebp)        # 4-byte Folded Spill
-	movl	84(%eax), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -116(%ebp)        # 4-byte Folded Spill
-	movl	88(%eax), %ecx
-	movl	%ecx, -184(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -128(%ebp)        # 4-byte Folded Spill
-	movl	92(%eax), %ecx
-	movl	%ecx, -188(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -124(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %esi
-	addl	-148(%ebp), %edx        # 4-byte Folded Reload
-	adcl	-152(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%edx, 24(%eax)
-	movl	-136(%ebp), %ecx        # 4-byte Reload
-	adcl	-156(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%ebx, 28(%eax)
-	movl	-144(%ebp), %edx        # 4-byte Reload
-	adcl	-160(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 32(%eax)
-	adcl	-164(%ebp), %edi        # 4-byte Folded Reload
-	movl	%edx, 36(%eax)
-	movl	-140(%ebp), %edx        # 4-byte Reload
-	adcl	-168(%ebp), %edx        # 4-byte Folded Reload
-	movl	%edi, 40(%eax)
-	movl	-112(%ebp), %ecx        # 4-byte Reload
-	adcl	-192(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 44(%eax)
-	movl	-132(%ebp), %edi        # 4-byte Reload
-	adcl	-196(%ebp), %edi        # 4-byte Folded Reload
-	movl	%ecx, 48(%eax)
-	movl	-120(%ebp), %edx        # 4-byte Reload
-	adcl	-200(%ebp), %edx        # 4-byte Folded Reload
-	movl	%edi, 52(%eax)
-	movl	-116(%ebp), %ecx        # 4-byte Reload
-	adcl	-204(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 56(%eax)
-	movl	-128(%ebp), %edx        # 4-byte Reload
-	adcl	-208(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 60(%eax)
-	movl	-124(%ebp), %ecx        # 4-byte Reload
-	adcl	-212(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 64(%eax)
-	adcl	-216(%ebp), %esi        # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	movl	%esi, 72(%eax)
-	movl	-172(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 76(%eax)
-	movl	-176(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 80(%eax)
-	movl	-180(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 84(%eax)
-	movl	-184(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 88(%eax)
-	movl	-188(%ebp), %ecx        # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 92(%eax)
-	addl	$220, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end177:
-	.size	mcl_fpDbl_sqrPre12L, .Lfunc_end177-mcl_fpDbl_sqrPre12L
-
-	.globl	mcl_fp_mont12L
-	.align	16, 0x90
-	.type	mcl_fp_mont12L,@function
-mcl_fp_mont12L:                         # @mcl_fp_mont12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1436, %esp             # imm = 0x59C
-	calll	.L178$pb
-.L178$pb:
-	popl	%ebx
-.Ltmp29:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp29-.L178$pb), %ebx
-	movl	1468(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1384(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	1384(%esp), %ebp
-	movl	1388(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	1432(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	1428(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	1424(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	1420(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1416(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1412(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1408(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1404(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1400(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1396(%esp), %edi
-	movl	1392(%esp), %esi
-	movl	%eax, (%esp)
-	leal	1328(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	addl	1328(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1336(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	adcl	1340(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1360(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1372(%esp), %esi
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1376(%esp), %ebp
-	sbbl	%edi, %edi
-	movl	1464(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1272(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %edi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	1272(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1312(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	1316(%esp), %ebp
-	adcl	1320(%esp), %edi
-	sbbl	%eax, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1216(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	movl	84(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1216(%esp), %esi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1220(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	1224(%esp), %esi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1228(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1232(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1236(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1240(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1244(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1248(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1252(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1256(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	1260(%esp), %ebp
-	adcl	1264(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1160(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	1160(%esp), %ecx
-	adcl	1164(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1188(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	1200(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	adcl	1204(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1208(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1104(%esp), %ecx
-	movl	1468(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	1104(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1140(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1148(%esp), %edi
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	1152(%esp), %ebp
-	adcl	$0, %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1048(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	1048(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1080(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1084(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1088(%esp), %edi
-	adcl	1092(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	addl	992(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	996(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1000(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	1004(%esp), %ebp
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1008(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1012(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1016(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1020(%esp), %esi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1024(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1028(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1032(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1036(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1040(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	adcl	$0, %edi
-	movl	1464(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	936(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	936(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	944(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	948(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	960(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	980(%esp), %esi
-	adcl	984(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	880(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	880(%esp), %eax
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	892(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	912(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	924(%esp), %esi
-	movl	%esi, %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	824(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	824(%esp), %ecx
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	840(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	852(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	864(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	768(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	768(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	780(%esp), %ebp
-	adcl	784(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	800(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	808(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	1460(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	movl	44(%esp), %eax          # 4-byte Reload
-	addl	712(%esp), %eax
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	716(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	720(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	724(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	728(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	732(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	736(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	740(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	744(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	748(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	752(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	756(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	760(%esp), %edi
-	sbbl	%ebp, %ebp
-	movl	%eax, %esi
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	656(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	656(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	660(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	664(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	668(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	672(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	676(%esp), %ebp
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	680(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	684(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	688(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	696(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	704(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	adcl	$0, %edi
-	movl	1464(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	600(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	600(%esp), %ecx
-	adcl	604(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	616(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	620(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	636(%esp), %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	648(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	movl	44(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	544(%esp), %edi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	548(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	552(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	556(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	560(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	564(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	568(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	572(%esp), %edi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	576(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	580(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	584(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	588(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	592(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	488(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	488(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	512(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	524(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	532(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	536(%esp), %ebp
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	432(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	432(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	440(%esp), %edi
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	444(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	480(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	376(%esp), %ecx
-	adcl	380(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	adcl	384(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	392(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	416(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	320(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	320(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	332(%esp), %esi
-	adcl	336(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	344(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	360(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	264(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	272(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	284(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	288(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	296(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	movl	88(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	208(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	224(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	232(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	236(%esp), %edi
-	adcl	240(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	248(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	1464(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	152(%esp), %ecx
-	movl	1460(%esp), %edx
-	calll	.LmulPv384x32
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	152(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	164(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	176(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	188(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	196(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	96(%esp), %ecx
-	movl	1468(%esp), %edx
-	calll	.LmulPv384x32
-	andl	$1, %esi
-	addl	96(%esp), %edi
-	movl	84(%esp), %ebx          # 4-byte Reload
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	100(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	adcl	108(%esp), %ebx
-	adcl	112(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	120(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	124(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	128(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	132(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	136(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	140(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	adcl	$0, %esi
-	movl	1468(%esp), %edx
-	subl	(%edx), %eax
-	sbbl	4(%edx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %edi
-	sbbl	8(%edx), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	sbbl	12(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	sbbl	20(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%edx), %ecx
-	movl	44(%esp), %edi          # 4-byte Reload
-	sbbl	32(%edx), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	sbbl	36(%edx), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	sbbl	40(%edx), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	sbbl	44(%edx), %ebp
-	movl	%ebp, %edx
-	sbbl	$0, %esi
-	andl	$1, %esi
-	jne	.LBB178_2
-# BB#1:
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-.LBB178_2:
-	movl	%esi, %ecx
-	testb	%cl, %cl
-	movl	92(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_4
-# BB#3:
-	movl	%eax, %ecx
-.LBB178_4:
-	movl	1456(%esp), %eax
-	movl	%ecx, (%eax)
-	movl	68(%esp), %edi          # 4-byte Reload
-	jne	.LBB178_6
-# BB#5:
-	movl	16(%esp), %edi          # 4-byte Reload
-.LBB178_6:
-	movl	%edi, 4(%eax)
-	movl	64(%esp), %ebp          # 4-byte Reload
-	jne	.LBB178_8
-# BB#7:
-	movl	20(%esp), %ebx          # 4-byte Reload
-.LBB178_8:
-	movl	%ebx, 8(%eax)
-	jne	.LBB178_10
-# BB#9:
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-.LBB178_10:
-	movl	72(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	jne	.LBB178_12
-# BB#11:
-	movl	28(%esp), %ebp          # 4-byte Reload
-.LBB178_12:
-	movl	%ebp, 16(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_14
-# BB#13:
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB178_14:
-	movl	%ecx, 20(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_16
-# BB#15:
-	movl	36(%esp), %ecx          # 4-byte Reload
-.LBB178_16:
-	movl	%ecx, 24(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_18
-# BB#17:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB178_18:
-	movl	%ecx, 32(%eax)
-	movl	60(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_20
-# BB#19:
-	movl	80(%esp), %ecx          # 4-byte Reload
-.LBB178_20:
-	movl	%ecx, 36(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_22
-# BB#21:
-	movl	84(%esp), %ecx          # 4-byte Reload
-.LBB178_22:
-	movl	%ecx, 40(%eax)
-	movl	88(%esp), %ecx          # 4-byte Reload
-	jne	.LBB178_24
-# BB#23:
-	movl	%edx, %ecx
-.LBB178_24:
-	movl	%ecx, 44(%eax)
-	addl	$1436, %esp             # imm = 0x59C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end178:
-	.size	mcl_fp_mont12L, .Lfunc_end178-mcl_fp_mont12L
-
-	.globl	mcl_fp_montNF12L
-	.align	16, 0x90
-	.type	mcl_fp_montNF12L,@function
-mcl_fp_montNF12L:                       # @mcl_fp_montNF12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1420, %esp             # imm = 0x58C
-	calll	.L179$pb
-.L179$pb:
-	popl	%ebx
-.Ltmp30:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp30-.L179$pb), %ebx
-	movl	1452(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1368(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	1368(%esp), %ebp
-	movl	1372(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	1416(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1412(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1408(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1404(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1400(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1396(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1392(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	1388(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1384(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1380(%esp), %edi
-	movl	1376(%esp), %esi
-	movl	%eax, (%esp)
-	leal	1312(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	1312(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1320(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	adcl	1324(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1328(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	1344(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	1356(%esp), %esi
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	1360(%esp), %ebp
-	movl	1448(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1256(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	1304(%esp), %eax
-	movl	56(%esp), %edx          # 4-byte Reload
-	addl	1256(%esp), %edx
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1260(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1264(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1268(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1272(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	1276(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1280(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	1284(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1288(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	1296(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	1300(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1200(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	1200(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1204(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	1208(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1212(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1216(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1232(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1240(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1244(%esp), %ebp
-	adcl	1248(%esp), %edi
-	movl	1448(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1144(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	1192(%esp), %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	addl	1144(%esp), %edx
-	adcl	1148(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1152(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1156(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	1160(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1164(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1168(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1172(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1176(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1180(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	1184(%esp), %ebp
-	adcl	1188(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1088(%esp), %ecx
-	movl	1452(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	addl	1088(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1092(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	%esi, %edi
-	adcl	1104(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	1124(%esp), %esi
-	adcl	1128(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	1136(%esp), %ebp
-	movl	1448(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1032(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	1080(%esp), %eax
-	movl	52(%esp), %edx          # 4-byte Reload
-	addl	1032(%esp), %edx
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1036(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1040(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	1044(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1048(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1052(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1056(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1060(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	1064(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1068(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1072(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	1076(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	976(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	976(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	1004(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	1012(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1024(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	920(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	968(%esp), %eax
-	movl	40(%esp), %edx          # 4-byte Reload
-	addl	920(%esp), %edx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	924(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	928(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	932(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	936(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	940(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	944(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	948(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	952(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	956(%esp), %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	960(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	964(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	864(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	864(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	884(%esp), %ebp
-	adcl	888(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	900(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	912(%esp), %edi
-	movl	1448(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	808(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	856(%esp), %edx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	addl	808(%esp), %ecx
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	824(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	adcl	828(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	832(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	852(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	1452(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	addl	752(%esp), %esi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	760(%esp), %edi
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	764(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	776(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	792(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1448(%esp), %ecx
-	movl	%ecx, %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	1444(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	movl	744(%esp), %ecx
-	movl	32(%esp), %eax          # 4-byte Reload
-	addl	696(%esp), %eax
-	adcl	700(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	704(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	708(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	712(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	716(%esp), %esi
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	720(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	724(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	728(%esp), %edi
-	adcl	732(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	736(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	740(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	640(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	640(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	648(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	660(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	668(%esp), %esi
-	adcl	672(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	676(%esp), %edi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	584(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	632(%esp), %edx
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	584(%esp), %ecx
-	adcl	588(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	596(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	608(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	616(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	620(%esp), %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	528(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	528(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	540(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	564(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	568(%esp), %edi
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	572(%esp), %esi
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	576(%esp), %ebp
-	movl	1448(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	520(%esp), %edx
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	472(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	508(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	512(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	516(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	416(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	432(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	440(%esp), %ebp
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	444(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	360(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	408(%esp), %edx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	360(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	372(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	380(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	adcl	384(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	304(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	312(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	320(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	328(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %esi          # 4-byte Reload
-	adcl	332(%esp), %esi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	296(%esp), %edx
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	248(%esp), %ecx
-	adcl	252(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	260(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%esi, %ebp
-	adcl	272(%esp), %ebp
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	192(%esp), %ecx
-	movl	1452(%esp), %edx
-	calll	.LmulPv384x32
-	addl	192(%esp), %esi
-	adcl	196(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	200(%esp), %edi
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	204(%esp), %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	216(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	adcl	224(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1448(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	136(%esp), %ecx
-	movl	1444(%esp), %edx
-	calll	.LmulPv384x32
-	movl	184(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	136(%esp), %ecx
-	adcl	140(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	adcl	144(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	152(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	160(%esp), %edi
-	adcl	164(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	168(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	80(%esp), %ecx
-	movl	1452(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv384x32
-	addl	80(%esp), %esi
-	movl	56(%esp), %esi          # 4-byte Reload
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	88(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	92(%esp), %esi
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	100(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	104(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	112(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	120(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	124(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	128(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	1452(%esp), %ebp
-	subl	(%ebp), %edx
-	movl	%ecx, %eax
-	sbbl	4(%ebp), %eax
-	movl	%esi, %ebx
-	sbbl	8(%ebp), %ebx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%ebp), %ecx
-	movl	40(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebp), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	sbbl	24(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	sbbl	28(%ebp), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	sbbl	32(%ebp), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	sbbl	36(%ebp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	sbbl	40(%ebp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	sbbl	44(%ebp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	%edi, %ebp
-	sarl	$31, %ebp
-	testl	%ebp, %ebp
-	movl	76(%esp), %ebp          # 4-byte Reload
-	js	.LBB179_2
-# BB#1:
-	movl	%edx, %ebp
-.LBB179_2:
-	movl	1440(%esp), %edx
-	movl	%ebp, (%edx)
-	movl	68(%esp), %edi          # 4-byte Reload
-	js	.LBB179_4
-# BB#3:
-	movl	%eax, %edi
-.LBB179_4:
-	movl	%edi, 4(%edx)
-	js	.LBB179_6
-# BB#5:
-	movl	%ebx, %esi
-.LBB179_6:
-	movl	%esi, 8(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB179_8
-# BB#7:
-	movl	%ecx, %eax
-.LBB179_8:
-	movl	%eax, 12(%edx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB179_10
-# BB#9:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB179_10:
-	movl	%eax, 16(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB179_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB179_12:
-	movl	%eax, 20(%edx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	js	.LBB179_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB179_14:
-	movl	%eax, 24(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB179_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB179_16:
-	movl	%eax, 28(%edx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	js	.LBB179_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB179_18:
-	movl	%eax, 32(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB179_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB179_20:
-	movl	%eax, 36(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB179_22
-# BB#21:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB179_22:
-	movl	%eax, 40(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB179_24
-# BB#23:
-	movl	56(%esp), %eax          # 4-byte Reload
-.LBB179_24:
-	movl	%eax, 44(%edx)
-	addl	$1420, %esp             # imm = 0x58C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end179:
-	.size	mcl_fp_montNF12L, .Lfunc_end179-mcl_fp_montNF12L
-
-	.globl	mcl_fp_montRed12L
-	.align	16, 0x90
-	.type	mcl_fp_montRed12L,@function
-mcl_fp_montRed12L:                      # @mcl_fp_montRed12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$828, %esp              # imm = 0x33C
-	calll	.L180$pb
-.L180$pb:
-	popl	%eax
-.Ltmp31:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp31-.L180$pb), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	856(%esp), %edx
-	movl	-4(%edx), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	852(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 88(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	imull	%esi, %ebx
-	movl	92(%ecx), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	88(%ecx), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	84(%ecx), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	80(%ecx), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	76(%ecx), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	72(%ecx), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	68(%ecx), %edi
-	movl	%edi, 140(%esp)         # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	60(%ecx), %esi
-	movl	%esi, 148(%esp)         # 4-byte Spill
-	movl	56(%ecx), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	movl	48(%ecx), %edi
-	movl	%edi, 152(%esp)         # 4-byte Spill
-	movl	44(%ecx), %edi
-	movl	%edi, 132(%esp)         # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	32(%ecx), %edi
-	movl	28(%ecx), %esi
-	movl	24(%ecx), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	20(%ecx), %ebp
-	movl	16(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	12(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	8(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	776(%esp), %ecx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	movl	88(%esp), %eax          # 4-byte Reload
-	addl	776(%esp), %eax
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	780(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	796(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	804(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	adcl	808(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	720(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	720(%esp), %esi
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	724(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	752(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	664(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	664(%esp), %ebp
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	668(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	692(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	696(%esp), %ebp
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	712(%esp), %edi
-	movl	%edi, 136(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	movl	144(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	608(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	608(%esp), %esi
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	612(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	636(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 144(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	552(%esp), %esi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	556(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, %esi
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	496(%esp), %edi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %ebp         # 4-byte Reload
-	adcl	528(%esp), %ebp
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	532(%esp), %edi
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	440(%esp), %esi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	468(%esp), %ebp
-	movl	%ebp, 156(%esp)         # 4-byte Spill
-	adcl	472(%esp), %edi
-	movl	%edi, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %esi         # 4-byte Reload
-	adcl	476(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	856(%esp), %eax
-	movl	%eax, %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	384(%esp), %edi
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	388(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %ebp         # 4-byte Reload
-	adcl	400(%esp), %ebp
-	movl	152(%esp), %edi         # 4-byte Reload
-	adcl	404(%esp), %edi
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	adcl	416(%esp), %esi
-	movl	%esi, 148(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	140(%esp), %esi         # 4-byte Reload
-	adcl	424(%esp), %esi
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 76(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	movl	100(%esp), %eax         # 4-byte Reload
-	addl	328(%esp), %eax
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	336(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	340(%esp), %ebp
-	movl	%ebp, 132(%esp)         # 4-byte Spill
-	adcl	344(%esp), %edi
-	movl	%edi, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	348(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	352(%esp), %ecx
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	356(%esp), %ecx
-	movl	%ecx, 148(%esp)         # 4-byte Spill
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	360(%esp), %ebp
-	adcl	364(%esp), %esi
-	movl	%esi, 140(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	368(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	372(%esp), %ecx
-	movl	%ecx, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	376(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	$0, %edi
-	movl	%eax, %esi
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	272(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	280(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	284(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	288(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	292(%esp), %ecx
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	296(%esp), %ecx
-	movl	%ecx, 148(%esp)         # 4-byte Spill
-	movl	%ebp, %esi
-	adcl	300(%esp), %esi
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	304(%esp), %ecx
-	movl	%ecx, 140(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	308(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	312(%esp), %ecx
-	movl	%ecx, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	316(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	320(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, %ebp
-	movl	%eax, %edi
-	imull	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	216(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	216(%esp), %edi
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	220(%esp), %ecx
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	240(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	248(%esp), %esi
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	160(%esp), %ecx
-	movl	856(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv384x32
-	addl	160(%esp), %edi
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	%eax, %edi
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	136(%esp), %edx         # 4-byte Reload
-	adcl	172(%esp), %edx
-	movl	%edx, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %ebx         # 4-byte Reload
-	adcl	176(%esp), %ebx
-	movl	%ebx, 148(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	180(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	188(%esp), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	subl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	156(%esp), %esi         # 4-byte Reload
-	sbbl	16(%esp), %esi          # 4-byte Folded Reload
-	sbbl	20(%esp), %edx          # 4-byte Folded Reload
-	sbbl	28(%esp), %ebx          # 4-byte Folded Reload
-	sbbl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	140(%esp), %eax         # 4-byte Reload
-	sbbl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	sbbl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	sbbl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	sbbl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	sbbl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	sbbl	$0, %ebp
-	andl	$1, %ebp
-	jne	.LBB180_2
-# BB#1:
-	movl	%ebx, 148(%esp)         # 4-byte Spill
-.LBB180_2:
-	movl	%ebp, %ebx
-	testb	%bl, %bl
-	movl	152(%esp), %ebx         # 4-byte Reload
-	jne	.LBB180_4
-# BB#3:
-	movl	%edi, %ebx
-.LBB180_4:
-	movl	848(%esp), %edi
-	movl	%ebx, (%edi)
-	movl	144(%esp), %ebx         # 4-byte Reload
-	jne	.LBB180_6
-# BB#5:
-	movl	%esi, 156(%esp)         # 4-byte Spill
-.LBB180_6:
-	movl	156(%esp), %esi         # 4-byte Reload
-	movl	%esi, 4(%edi)
-	movl	136(%esp), %esi         # 4-byte Reload
-	jne	.LBB180_8
-# BB#7:
-	movl	%edx, %esi
-.LBB180_8:
-	movl	%esi, 8(%edi)
-	movl	148(%esp), %edx         # 4-byte Reload
-	movl	%edx, 12(%edi)
-	movl	128(%esp), %esi         # 4-byte Reload
-	movl	116(%esp), %edx         # 4-byte Reload
-	jne	.LBB180_10
-# BB#9:
-	movl	%ecx, %edx
-.LBB180_10:
-	movl	%edx, 16(%edi)
-	movl	120(%esp), %edx         # 4-byte Reload
-	movl	140(%esp), %ecx         # 4-byte Reload
-	jne	.LBB180_12
-# BB#11:
-	movl	84(%esp), %ecx          # 4-byte Reload
-.LBB180_12:
-	movl	%ecx, 20(%edi)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	124(%esp), %eax         # 4-byte Reload
-	jne	.LBB180_14
-# BB#13:
-	movl	88(%esp), %eax          # 4-byte Reload
-.LBB180_14:
-	movl	%eax, 24(%edi)
-	movl	104(%esp), %eax         # 4-byte Reload
-	jne	.LBB180_16
-# BB#15:
-	movl	92(%esp), %ebx          # 4-byte Reload
-.LBB180_16:
-	movl	%ebx, 28(%edi)
-	jne	.LBB180_18
-# BB#17:
-	movl	96(%esp), %esi          # 4-byte Reload
-.LBB180_18:
-	movl	%esi, 32(%edi)
-	jne	.LBB180_20
-# BB#19:
-	movl	100(%esp), %edx         # 4-byte Reload
-.LBB180_20:
-	movl	%edx, 36(%edi)
-	jne	.LBB180_22
-# BB#21:
-	movl	112(%esp), %ecx         # 4-byte Reload
-.LBB180_22:
-	movl	%ecx, 40(%edi)
-	jne	.LBB180_24
-# BB#23:
-	movl	132(%esp), %eax         # 4-byte Reload
-.LBB180_24:
-	movl	%eax, 44(%edi)
-	addl	$828, %esp              # imm = 0x33C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end180:
-	.size	mcl_fp_montRed12L, .Lfunc_end180-mcl_fp_montRed12L
-
-	.globl	mcl_fp_addPre12L
-	.align	16, 0x90
-	.type	mcl_fp_addPre12L,@function
-mcl_fp_addPre12L:                       # @mcl_fp_addPre12L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ebx)
-	movl	32(%ecx), %esi
-	adcl	%edi, %esi
-	movl	36(%eax), %edi
-	movl	%edx, 28(%ebx)
-	movl	36(%ecx), %edx
-	adcl	%edi, %edx
-	movl	40(%eax), %edi
-	movl	%esi, 32(%ebx)
-	movl	40(%ecx), %esi
-	adcl	%edi, %esi
-	movl	%edx, 36(%ebx)
-	movl	%esi, 40(%ebx)
-	movl	44(%eax), %eax
-	movl	44(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 44(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end181:
-	.size	mcl_fp_addPre12L, .Lfunc_end181-mcl_fp_addPre12L
-
-	.globl	mcl_fp_subPre12L
-	.align	16, 0x90
-	.type	mcl_fp_subPre12L,@function
-mcl_fp_subPre12L:                       # @mcl_fp_subPre12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ebp)
-	movl	32(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%ebp)
-	movl	36(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	40(%edx), %ebx
-	movl	%edi, 32(%ebp)
-	movl	40(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	%esi, 36(%ebp)
-	movl	%edi, 40(%ebp)
-	movl	44(%edx), %edx
-	movl	44(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 44(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end182:
-	.size	mcl_fp_subPre12L, .Lfunc_end182-mcl_fp_subPre12L
-
-	.globl	mcl_fp_shr1_12L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_12L,@function
-mcl_fp_shr1_12L:                        # @mcl_fp_shr1_12L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 40(%ecx)
-	shrl	%eax
-	movl	%eax, 44(%ecx)
-	popl	%esi
-	retl
-.Lfunc_end183:
-	.size	mcl_fp_shr1_12L, .Lfunc_end183-mcl_fp_shr1_12L
-
-	.globl	mcl_fp_add12L
-	.align	16, 0x90
-	.type	mcl_fp_add12L,@function
-mcl_fp_add12L:                          # @mcl_fp_add12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$36, %esp
-	movl	64(%esp), %ebx
-	movl	(%ebx), %edx
-	movl	4(%ebx), %ecx
-	movl	60(%esp), %eax
-	addl	(%eax), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	4(%eax), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	8(%ebx), %ecx
-	adcl	8(%eax), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	12(%eax), %edx
-	movl	16(%eax), %ecx
-	adcl	12(%ebx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	16(%ebx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	20(%eax), %ecx
-	adcl	20(%ebx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	24(%eax), %ecx
-	adcl	24(%ebx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	28(%eax), %ecx
-	adcl	28(%ebx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	32(%eax), %ebp
-	adcl	32(%ebx), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	movl	36(%eax), %edi
-	adcl	36(%ebx), %edi
-	movl	40(%eax), %esi
-	adcl	40(%ebx), %esi
-	movl	44(%eax), %edx
-	adcl	44(%ebx), %edx
-	movl	56(%esp), %ebx
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, (%ebx)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%ebx)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 8(%ebx)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%ebx)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 16(%ebx)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 20(%ebx)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%ebx)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 28(%ebx)
-	movl	%ebp, 32(%ebx)
-	movl	%edi, 36(%ebx)
-	movl	%esi, 40(%ebx)
-	movl	%edx, 44(%ebx)
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	68(%esp), %ebp
-	subl	(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	sbbl	4(%ebp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	sbbl	8(%ebp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	sbbl	12(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	sbbl	16(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	sbbl	20(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	sbbl	24(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %eax           # 4-byte Reload
-	sbbl	28(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	(%esp), %eax            # 4-byte Reload
-	sbbl	32(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	36(%ebp), %edi
-	sbbl	40(%ebp), %esi
-	sbbl	44(%ebp), %edx
-	sbbl	$0, %ecx
-	testb	$1, %cl
-	jne	.LBB184_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, (%ebx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 4(%ebx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 8(%ebx)
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 16(%ebx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 20(%ebx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 24(%ebx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 28(%ebx)
-	movl	(%esp), %eax            # 4-byte Reload
-	movl	%eax, 32(%ebx)
-	movl	%edi, 36(%ebx)
-	movl	%esi, 40(%ebx)
-	movl	%edx, 44(%ebx)
-.LBB184_2:                              # %carry
-	addl	$36, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end184:
-	.size	mcl_fp_add12L, .Lfunc_end184-mcl_fp_add12L
-
-	.globl	mcl_fp_addNF12L
-	.align	16, 0x90
-	.type	mcl_fp_addNF12L,@function
-mcl_fp_addNF12L:                        # @mcl_fp_addNF12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$88, %esp
-	movl	116(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	movl	112(%esp), %edx
-	addl	(%edx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	4(%edx), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	44(%esi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	40(%esi), %ebp
-	movl	36(%esi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	20(%esi), %ebx
-	movl	16(%esi), %edi
-	movl	12(%esi), %ecx
-	movl	8(%esi), %eax
-	adcl	8(%edx), %eax
-	adcl	12(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	16(%edx), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	adcl	20(%edx), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	24(%edx), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	28(%edx), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	32(%edx), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	36(%edx), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	adcl	40(%edx), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	44(%edx), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	120(%esp), %ebp
-	movl	60(%esp), %edx          # 4-byte Reload
-	subl	(%ebp), %edx
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	4(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%esi, %eax
-	sbbl	8(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	16(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	sbbl	20(%ebp), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	36(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %edi
-	sbbl	40(%ebp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	sbbl	44(%ebp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	%edi, %ebp
-	movl	60(%esp), %edi          # 4-byte Reload
-	sarl	$31, %ebp
-	testl	%ebp, %ebp
-	js	.LBB185_2
-# BB#1:
-	movl	%edx, %edi
-.LBB185_2:
-	movl	108(%esp), %edx
-	movl	%edi, (%edx)
-	movl	64(%esp), %edi          # 4-byte Reload
-	js	.LBB185_4
-# BB#3:
-	movl	(%esp), %edi            # 4-byte Reload
-.LBB185_4:
-	movl	%edi, 4(%edx)
-	movl	%eax, %ebp
-	js	.LBB185_6
-# BB#5:
-	movl	4(%esp), %esi           # 4-byte Reload
-.LBB185_6:
-	movl	%esi, 8(%edx)
-	movl	%ecx, %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	movl	48(%esp), %ecx          # 4-byte Reload
-	js	.LBB185_8
-# BB#7:
-	movl	8(%esp), %ecx           # 4-byte Reload
-.LBB185_8:
-	movl	%ecx, 12(%edx)
-	movl	76(%esp), %ebx          # 4-byte Reload
-	movl	84(%esp), %edi          # 4-byte Reload
-	js	.LBB185_10
-# BB#9:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB185_10:
-	movl	%eax, 16(%edx)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	js	.LBB185_12
-# BB#11:
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-.LBB185_12:
-	movl	56(%esp), %eax          # 4-byte Reload
-	movl	%eax, 20(%edx)
-	js	.LBB185_14
-# BB#13:
-	movl	20(%esp), %ebp          # 4-byte Reload
-.LBB185_14:
-	movl	%ebp, 24(%edx)
-	js	.LBB185_16
-# BB#15:
-	movl	24(%esp), %edi          # 4-byte Reload
-.LBB185_16:
-	movl	%edi, 28(%edx)
-	js	.LBB185_18
-# BB#17:
-	movl	28(%esp), %ebx          # 4-byte Reload
-.LBB185_18:
-	movl	%ebx, 32(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB185_20
-# BB#19:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB185_20:
-	movl	%eax, 36(%edx)
-	js	.LBB185_22
-# BB#21:
-	movl	36(%esp), %esi          # 4-byte Reload
-.LBB185_22:
-	movl	%esi, 40(%edx)
-	js	.LBB185_24
-# BB#23:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB185_24:
-	movl	%ecx, 44(%edx)
-	addl	$88, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end185:
-	.size	mcl_fp_addNF12L, .Lfunc_end185-mcl_fp_addNF12L
-
-	.globl	mcl_fp_sub12L
-	.align	16, 0x90
-	.type	mcl_fp_sub12L,@function
-mcl_fp_sub12L:                          # @mcl_fp_sub12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$40, %esp
-	movl	64(%esp), %esi
-	movl	(%esi), %ecx
-	movl	4(%esi), %eax
-	xorl	%ebx, %ebx
-	movl	68(%esp), %edi
-	subl	(%edi), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	28(%esi), %edx
-	sbbl	28(%edi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	32(%esi), %ecx
-	sbbl	32(%edi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	36(%esi), %eax
-	sbbl	36(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	40(%esi), %ebp
-	sbbl	40(%edi), %ebp
-	movl	44(%esi), %esi
-	sbbl	44(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	60(%esp), %ebx
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	%edx, 28(%ebx)
-	movl	%ecx, 32(%ebx)
-	movl	%eax, 36(%ebx)
-	movl	%ebp, 40(%ebx)
-	movl	%esi, 44(%ebx)
-	je	.LBB186_2
-# BB#1:                                 # %carry
-	movl	%esi, %edi
-	movl	72(%esp), %esi
-	movl	12(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esi), %ecx
-	movl	12(%esi), %eax
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	%eax, 36(%ebx)
-	movl	40(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 40(%ebx)
-	movl	44(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 44(%ebx)
-.LBB186_2:                              # %nocarry
-	addl	$40, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end186:
-	.size	mcl_fp_sub12L, .Lfunc_end186-mcl_fp_sub12L
-
-	.globl	mcl_fp_subNF12L
-	.align	16, 0x90
-	.type	mcl_fp_subNF12L,@function
-mcl_fp_subNF12L:                        # @mcl_fp_subNF12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$72, %esp
-	movl	96(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %eax
-	movl	100(%esp), %edi
-	subl	(%edi), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	44(%ecx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	28(%ecx), %ebp
-	movl	24(%ecx), %ebx
-	movl	20(%ecx), %esi
-	movl	16(%ecx), %edx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	sbbl	28(%edi), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	sarl	$31, %eax
-	movl	%eax, %edx
-	addl	%edx, %edx
-	movl	%eax, %edi
-	adcl	%edi, %edi
-	movl	%eax, %ebp
-	adcl	%ebp, %ebp
-	movl	%eax, %esi
-	adcl	%esi, %esi
-	shrl	$31, %ecx
-	orl	%edx, %ecx
-	movl	104(%esp), %edx
-	andl	12(%edx), %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	andl	8(%edx), %ebp
-	andl	4(%edx), %edi
-	andl	(%edx), %ecx
-	movl	44(%edx), %esi
-	andl	%eax, %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	40(%edx), %esi
-	andl	%eax, %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	36(%edx), %esi
-	andl	%eax, %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	32(%edx), %esi
-	andl	%eax, %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	28(%edx), %esi
-	andl	%eax, %esi
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	24(%edx), %ebx
-	andl	%eax, %ebx
-	movl	20(%edx), %esi
-	andl	%eax, %esi
-	andl	16(%edx), %eax
-	addl	48(%esp), %ecx          # 4-byte Folded Reload
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	92(%esp), %edx
-	movl	%ecx, (%edx)
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edi, 4(%edx)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebp, 8(%edx)
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 12(%edx)
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%eax, 16(%edx)
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 20(%edx)
-	movl	(%esp), %ecx            # 4-byte Reload
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebx, 24(%edx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 28(%edx)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	64(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 32(%edx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 36(%edx)
-	movl	%eax, 40(%edx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%edx)
-	addl	$72, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end187:
-	.size	mcl_fp_subNF12L, .Lfunc_end187-mcl_fp_subNF12L
-
-	.globl	mcl_fpDbl_add12L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add12L,@function
-mcl_fpDbl_add12L:                       # @mcl_fpDbl_add12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$88, %esp
-	movl	116(%esp), %ecx
-	movl	112(%esp), %edi
-	movl	12(%edi), %esi
-	movl	16(%edi), %edx
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%edi), %ebp
-	movl	108(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%edi), %ebp
-	adcl	8(%edi), %ebx
-	adcl	12(%ecx), %esi
-	adcl	16(%ecx), %edx
-	movl	%ebp, 4(%eax)
-	movl	56(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%esi, 12(%eax)
-	movl	20(%edi), %esi
-	adcl	%ebx, %esi
-	movl	24(%ecx), %ebx
-	movl	%edx, 16(%eax)
-	movl	24(%edi), %edx
-	adcl	%ebx, %edx
-	movl	28(%ecx), %ebx
-	movl	%esi, 20(%eax)
-	movl	28(%edi), %esi
-	adcl	%ebx, %esi
-	movl	32(%ecx), %ebx
-	movl	%edx, 24(%eax)
-	movl	32(%edi), %edx
-	adcl	%ebx, %edx
-	movl	36(%ecx), %ebx
-	movl	%esi, 28(%eax)
-	movl	36(%edi), %esi
-	adcl	%ebx, %esi
-	movl	40(%ecx), %ebx
-	movl	%edx, 32(%eax)
-	movl	40(%edi), %edx
-	adcl	%ebx, %edx
-	movl	44(%ecx), %ebx
-	movl	%esi, 36(%eax)
-	movl	44(%edi), %esi
-	adcl	%ebx, %esi
-	movl	48(%ecx), %ebx
-	movl	%edx, 40(%eax)
-	movl	48(%edi), %edx
-	adcl	%ebx, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	52(%ecx), %ebx
-	movl	%esi, 44(%eax)
-	movl	52(%edi), %eax
-	adcl	%ebx, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	56(%edi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%ecx), %eax
-	movl	60(%edi), %edx
-	adcl	%eax, %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	64(%edi), %edx
-	adcl	%eax, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	68(%ecx), %eax
-	movl	68(%edi), %edx
-	adcl	%eax, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	72(%ecx), %eax
-	movl	72(%edi), %edx
-	adcl	%eax, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	76(%ecx), %eax
-	movl	76(%edi), %edx
-	adcl	%eax, %edx
-	movl	80(%ecx), %esi
-	movl	80(%edi), %eax
-	adcl	%esi, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	84(%ecx), %ebx
-	movl	84(%edi), %esi
-	adcl	%ebx, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	88(%ecx), %ebp
-	movl	88(%edi), %ebx
-	adcl	%ebp, %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	92(%ecx), %ecx
-	movl	92(%edi), %edi
-	adcl	%ecx, %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	120(%esp), %ebp
-	movl	72(%esp), %edi          # 4-byte Reload
-	subl	(%ebp), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	sbbl	4(%ebp), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	sbbl	8(%ebp), %edi
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	sbbl	12(%ebp), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	sbbl	16(%ebp), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ebp), %edi
-	movl	%edi, 8(%esp)           # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	sbbl	24(%ebp), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	%edx, %edi
-	sbbl	28(%ebp), %edi
-	movl	%edi, (%esp)            # 4-byte Spill
-	sbbl	32(%ebp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	sbbl	36(%ebp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	44(%esp), %ebx          # 4-byte Reload
-	sbbl	40(%ebp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %edi
-	sbbl	44(%ebp), %edi
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB188_2
-# BB#1:
-	movl	%edi, %ebx
-.LBB188_2:
-	testb	%cl, %cl
-	movl	72(%esp), %ecx          # 4-byte Reload
-	movl	68(%esp), %esi          # 4-byte Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	movl	60(%esp), %ebp          # 4-byte Reload
-	jne	.LBB188_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB188_4:
-	movl	108(%esp), %eax
-	movl	%ecx, 48(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	movl	%ebp, 64(%eax)
-	movl	%edi, 68(%eax)
-	movl	%esi, 72(%eax)
-	movl	%edx, 76(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	48(%esp), %edx          # 4-byte Reload
-	jne	.LBB188_6
-# BB#5:
-	movl	32(%esp), %edx          # 4-byte Reload
-.LBB188_6:
-	movl	%edx, 80(%eax)
-	movl	52(%esp), %edx          # 4-byte Reload
-	jne	.LBB188_8
-# BB#7:
-	movl	36(%esp), %edx          # 4-byte Reload
-.LBB188_8:
-	movl	%edx, 84(%eax)
-	jne	.LBB188_10
-# BB#9:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB188_10:
-	movl	%ecx, 88(%eax)
-	movl	%ebx, 92(%eax)
-	addl	$88, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end188:
-	.size	mcl_fpDbl_add12L, .Lfunc_end188-mcl_fpDbl_add12L
-
-	.globl	mcl_fpDbl_sub12L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub12L,@function
-mcl_fpDbl_sub12L:                       # @mcl_fpDbl_sub12L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$76, %esp
-	movl	100(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %edx
-	movl	104(%esp), %ebx
-	subl	(%ebx), %eax
-	sbbl	4(%ebx), %edx
-	movl	8(%esi), %edi
-	sbbl	8(%ebx), %edi
-	movl	96(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%esi), %eax
-	sbbl	12(%ebx), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%esi), %edx
-	sbbl	16(%ebx), %edx
-	movl	%edi, 8(%ecx)
-	movl	20(%ebx), %edi
-	movl	%eax, 12(%ecx)
-	movl	20(%esi), %eax
-	sbbl	%edi, %eax
-	movl	24(%ebx), %edi
-	movl	%edx, 16(%ecx)
-	movl	24(%esi), %edx
-	sbbl	%edi, %edx
-	movl	28(%ebx), %edi
-	movl	%eax, 20(%ecx)
-	movl	28(%esi), %eax
-	sbbl	%edi, %eax
-	movl	32(%ebx), %edi
-	movl	%edx, 24(%ecx)
-	movl	32(%esi), %edx
-	sbbl	%edi, %edx
-	movl	36(%ebx), %edi
-	movl	%eax, 28(%ecx)
-	movl	36(%esi), %eax
-	sbbl	%edi, %eax
-	movl	40(%ebx), %edi
-	movl	%edx, 32(%ecx)
-	movl	40(%esi), %edx
-	sbbl	%edi, %edx
-	movl	44(%ebx), %edi
-	movl	%eax, 36(%ecx)
-	movl	44(%esi), %eax
-	sbbl	%edi, %eax
-	movl	48(%ebx), %edi
-	movl	%edx, 40(%ecx)
-	movl	48(%esi), %edx
-	sbbl	%edi, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	52(%ebx), %edx
-	movl	%eax, 44(%ecx)
-	movl	52(%esi), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	56(%ebx), %eax
-	movl	56(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	60(%ebx), %eax
-	movl	60(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	64(%ebx), %eax
-	movl	64(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	68(%ebx), %eax
-	movl	68(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	72(%ebx), %eax
-	movl	72(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	76(%ebx), %eax
-	movl	76(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	80(%ebx), %eax
-	movl	80(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	84(%ebx), %eax
-	movl	84(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	88(%ebx), %eax
-	movl	88(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	92(%ebx), %eax
-	movl	92(%esi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	108(%esp), %ebp
-	jne	.LBB189_1
-# BB#2:
-	movl	$0, 36(%esp)            # 4-byte Folded Spill
-	jmp	.LBB189_3
-.LBB189_1:
-	movl	44(%ebp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-.LBB189_3:
-	testb	%al, %al
-	jne	.LBB189_4
-# BB#5:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	movl	$0, %esi
-	jmp	.LBB189_6
-.LBB189_4:
-	movl	(%ebp), %esi
-	movl	4(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB189_6:
-	jne	.LBB189_7
-# BB#8:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB189_9
-.LBB189_7:
-	movl	40(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB189_9:
-	jne	.LBB189_10
-# BB#11:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB189_12
-.LBB189_10:
-	movl	36(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB189_12:
-	jne	.LBB189_13
-# BB#14:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB189_15
-.LBB189_13:
-	movl	32(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB189_15:
-	jne	.LBB189_16
-# BB#17:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB189_18
-.LBB189_16:
-	movl	28(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB189_18:
-	jne	.LBB189_19
-# BB#20:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB189_21
-.LBB189_19:
-	movl	24(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB189_21:
-	jne	.LBB189_22
-# BB#23:
-	movl	$0, %ebx
-	jmp	.LBB189_24
-.LBB189_22:
-	movl	20(%ebp), %ebx
-.LBB189_24:
-	jne	.LBB189_25
-# BB#26:
-	movl	$0, %eax
-	jmp	.LBB189_27
-.LBB189_25:
-	movl	16(%ebp), %eax
-.LBB189_27:
-	jne	.LBB189_28
-# BB#29:
-	movl	%ebp, %edx
-	movl	$0, %ebp
-	jmp	.LBB189_30
-.LBB189_28:
-	movl	%ebp, %edx
-	movl	12(%edx), %ebp
-.LBB189_30:
-	jne	.LBB189_31
-# BB#32:
-	xorl	%edx, %edx
-	jmp	.LBB189_33
-.LBB189_31:
-	movl	8(%edx), %edx
-.LBB189_33:
-	addl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	12(%esp), %edi          # 4-byte Reload
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 48(%ecx)
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, 52(%ecx)
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edx, 56(%ecx)
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 60(%ecx)
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, 68(%ecx)
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 72(%ecx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 76(%ecx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 80(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 84(%ecx)
-	movl	%eax, 88(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%ecx)
-	addl	$76, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end189:
-	.size	mcl_fpDbl_sub12L, .Lfunc_end189-mcl_fpDbl_sub12L
-
-	.align	16, 0x90
-	.type	.LmulPv416x32,@function
-.LmulPv416x32:                          # @mulPv416x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$88, %esp
-	movl	%edx, %edi
-	movl	108(%esp), %ebp
-	movl	%ebp, %eax
-	mull	48(%edi)
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	44(%edi)
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	40(%edi)
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	36(%edi)
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	32(%edi)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	28(%edi)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	24(%edi)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	20(%edi)
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	16(%edi)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	mull	12(%edi)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebp, %eax
-	mull	8(%edi)
-	movl	%edx, %esi
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebp, %eax
-	mull	4(%edi)
-	movl	%edx, %ebx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebp, %eax
-	mull	(%edi)
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 8(%ecx)
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 12(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%ecx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%ecx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 52(%ecx)
-	movl	%ecx, %eax
-	addl	$88, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end190:
-	.size	.LmulPv416x32, .Lfunc_end190-.LmulPv416x32
-
-	.globl	mcl_fp_mulUnitPre13L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre13L,@function
-mcl_fp_mulUnitPre13L:                   # @mcl_fp_mulUnitPre13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$108, %esp
-	calll	.L191$pb
-.L191$pb:
-	popl	%ebx
-.Ltmp32:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp32-.L191$pb), %ebx
-	movl	136(%esp), %eax
-	movl	%eax, (%esp)
-	leal	48(%esp), %ecx
-	movl	132(%esp), %edx
-	calll	.LmulPv416x32
-	movl	100(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp
-	movl	64(%esp), %ebx
-	movl	60(%esp), %edi
-	movl	56(%esp), %esi
-	movl	48(%esp), %edx
-	movl	52(%esp), %ecx
-	movl	128(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	addl	$108, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end191:
-	.size	mcl_fp_mulUnitPre13L, .Lfunc_end191-mcl_fp_mulUnitPre13L
-
-	.globl	mcl_fpDbl_mulPre13L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre13L,@function
-mcl_fpDbl_mulPre13L:                    # @mcl_fpDbl_mulPre13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$844, %esp              # imm = 0x34C
-	calll	.L192$pb
-.L192$pb:
-	popl	%edi
-.Ltmp33:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp33-.L192$pb), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	872(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	%edx, %esi
-	movl	%edi, %ebx
-	calll	.LmulPv416x32
-	movl	836(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	828(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	804(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	800(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	796(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	792(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	784(%esp), %eax
-	movl	788(%esp), %ebp
-	movl	864(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	872(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	728(%esp), %ecx
-	movl	%esi, %edx
-	movl	%edi, %ebx
-	calll	.LmulPv416x32
-	addl	728(%esp), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	780(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	776(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	772(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	764(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	760(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	748(%esp), %edi
-	movl	744(%esp), %esi
-	movl	740(%esp), %edx
-	movl	732(%esp), %eax
-	movl	736(%esp), %ecx
-	movl	864(%esp), %ebp
-	movl	24(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	672(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	672(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	724(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	720(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	716(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	712(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	708(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	704(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	700(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	696(%esp), %ebx
-	movl	692(%esp), %edi
-	movl	688(%esp), %esi
-	movl	684(%esp), %edx
-	movl	676(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	680(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	60(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	616(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	616(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	660(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	656(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	652(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	648(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	644(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	640(%esp), %ebx
-	movl	636(%esp), %edi
-	movl	632(%esp), %esi
-	movl	628(%esp), %edx
-	movl	620(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	624(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	560(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	588(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	584(%esp), %ebx
-	movl	580(%esp), %edi
-	movl	576(%esp), %esi
-	movl	572(%esp), %edx
-	movl	564(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	568(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	504(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	532(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	528(%esp), %ebx
-	movl	524(%esp), %edi
-	movl	520(%esp), %esi
-	movl	516(%esp), %edx
-	movl	508(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	512(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	448(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	448(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	492(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	472(%esp), %ebp
-	movl	468(%esp), %edi
-	movl	464(%esp), %esi
-	movl	460(%esp), %edx
-	movl	452(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	456(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 24(%eax)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	392(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	392(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	444(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	440(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	436(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	432(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	428(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	424(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	416(%esp), %ebx
-	movl	412(%esp), %edi
-	movl	408(%esp), %esi
-	movl	404(%esp), %edx
-	movl	396(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	400(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	336(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	336(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	360(%esp), %ebp
-	movl	356(%esp), %edi
-	movl	352(%esp), %esi
-	movl	348(%esp), %edx
-	movl	340(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	344(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	60(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 32(%eax)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	280(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	316(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	304(%esp), %ebx
-	movl	300(%esp), %edi
-	movl	296(%esp), %esi
-	movl	292(%esp), %edx
-	movl	284(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	288(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	224(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	272(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	248(%esp), %ebx
-	movl	244(%esp), %edi
-	movl	240(%esp), %esi
-	movl	236(%esp), %edx
-	movl	228(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	232(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %edi
-	movl	44(%edi), %eax
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	868(%esp), %eax
-	movl	%eax, %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	60(%esp), %esi          # 4-byte Reload
-	addl	168(%esp), %esi
-	movl	220(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	204(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	200(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	196(%esp), %ebp
-	movl	192(%esp), %ebx
-	movl	188(%esp), %edi
-	movl	184(%esp), %edx
-	movl	180(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	176(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	%esi, 44(%eax)
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	104(%esp), %ebp         # 4-byte Folded Reload
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	movl	872(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	868(%esp), %edx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	112(%esp), %esi
-	movl	%esi, %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	120(%esp), %edi
-	movl	164(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	156(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	152(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	148(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	144(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	140(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	136(%esp), %ebx
-	movl	132(%esp), %esi
-	movl	128(%esp), %edx
-	movl	124(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	%ebp, 48(%eax)
-	movl	68(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 52(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 56(%eax)
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 60(%eax)
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 64(%eax)
-	adcl	104(%esp), %ebx         # 4-byte Folded Reload
-	movl	%esi, 68(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebx, 72(%eax)
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 76(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	88(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 80(%eax)
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 84(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 88(%eax)
-	movl	%ecx, 92(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 96(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 100(%eax)
-	addl	$844, %esp              # imm = 0x34C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end192:
-	.size	mcl_fpDbl_mulPre13L, .Lfunc_end192-mcl_fpDbl_mulPre13L
-
-	.globl	mcl_fpDbl_sqrPre13L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre13L,@function
-mcl_fpDbl_sqrPre13L:                    # @mcl_fpDbl_sqrPre13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$844, %esp              # imm = 0x34C
-	calll	.L193$pb
-.L193$pb:
-	popl	%ebx
-.Ltmp34:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp34-.L193$pb), %ebx
-	movl	%ebx, 108(%esp)         # 4-byte Spill
-	movl	868(%esp), %edx
-	movl	(%edx), %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	%edx, %edi
-	movl	%ebx, %esi
-	calll	.LmulPv416x32
-	movl	836(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	828(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	804(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	800(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	796(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	792(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	784(%esp), %eax
-	movl	788(%esp), %ebp
-	movl	864(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edi, %edx
-	movl	4(%edx), %eax
-	movl	%eax, (%esp)
-	leal	728(%esp), %ecx
-	movl	%esi, %ebx
-	calll	.LmulPv416x32
-	addl	728(%esp), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	780(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	776(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	772(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	764(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	760(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	748(%esp), %edi
-	movl	744(%esp), %esi
-	movl	740(%esp), %edx
-	movl	732(%esp), %eax
-	movl	736(%esp), %ecx
-	movl	864(%esp), %ebp
-	movl	24(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	8(%edx), %eax
-	movl	%eax, (%esp)
-	leal	672(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	672(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	724(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	720(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	716(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	712(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	708(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	704(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	700(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	696(%esp), %ebx
-	movl	692(%esp), %edi
-	movl	688(%esp), %esi
-	movl	684(%esp), %edx
-	movl	676(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	680(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	60(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	12(%edx), %eax
-	movl	%eax, (%esp)
-	leal	616(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	616(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	660(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	656(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	652(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	648(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	644(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	640(%esp), %ebx
-	movl	636(%esp), %edi
-	movl	632(%esp), %esi
-	movl	628(%esp), %edx
-	movl	620(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	624(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	16(%edx), %eax
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	560(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	588(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	584(%esp), %ebx
-	movl	580(%esp), %edi
-	movl	576(%esp), %esi
-	movl	572(%esp), %edx
-	movl	564(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	568(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	20(%edx), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	504(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	532(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	528(%esp), %ebx
-	movl	524(%esp), %edi
-	movl	520(%esp), %esi
-	movl	516(%esp), %edx
-	movl	508(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	512(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	16(%esp), %ebp          # 4-byte Folded Reload
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	24(%edx), %eax
-	movl	%eax, (%esp)
-	leal	448(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	448(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	492(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	472(%esp), %ebp
-	movl	468(%esp), %edi
-	movl	464(%esp), %esi
-	movl	460(%esp), %edx
-	movl	452(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	456(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	56(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 24(%eax)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	28(%edx), %eax
-	movl	%eax, (%esp)
-	leal	392(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	392(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	444(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	440(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	436(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	432(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	428(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	424(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	416(%esp), %ebx
-	movl	412(%esp), %edi
-	movl	408(%esp), %esi
-	movl	404(%esp), %edx
-	movl	396(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	400(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	32(%edx), %eax
-	movl	%eax, (%esp)
-	leal	336(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	336(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	360(%esp), %ebp
-	movl	356(%esp), %edi
-	movl	352(%esp), %esi
-	movl	348(%esp), %edx
-	movl	340(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	344(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	60(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 32(%eax)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 104(%esp)         # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	36(%edx), %eax
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	280(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	316(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	304(%esp), %ebx
-	movl	300(%esp), %edi
-	movl	296(%esp), %esi
-	movl	292(%esp), %edx
-	movl	284(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	288(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	104(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	24(%esp), %ebp          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	40(%edx), %eax
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	224(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	272(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	248(%esp), %ebx
-	movl	244(%esp), %edi
-	movl	240(%esp), %esi
-	movl	236(%esp), %edx
-	movl	228(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	232(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	44(%edx), %eax
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	60(%esp), %esi          # 4-byte Reload
-	addl	168(%esp), %esi
-	movl	220(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	208(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	204(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	200(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	196(%esp), %ebp
-	movl	192(%esp), %ebx
-	movl	188(%esp), %edi
-	movl	184(%esp), %edx
-	movl	180(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	176(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	%esi, 44(%eax)
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	104(%esp), %ebp         # 4-byte Folded Reload
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	movl	868(%esp), %edx
-	movl	48(%edx), %eax
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	108(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	112(%esp), %esi
-	movl	%esi, %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	120(%esp), %edi
-	movl	164(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	156(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	152(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	148(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	144(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	140(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	136(%esp), %ebx
-	movl	132(%esp), %esi
-	movl	128(%esp), %edx
-	movl	124(%esp), %ecx
-	movl	864(%esp), %eax
-	movl	%ebp, 48(%eax)
-	movl	68(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 52(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 56(%eax)
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 60(%eax)
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 64(%eax)
-	adcl	104(%esp), %ebx         # 4-byte Folded Reload
-	movl	%esi, 68(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebx, 72(%eax)
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 76(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	88(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 80(%eax)
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 84(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 88(%eax)
-	movl	%ecx, 92(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 96(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 100(%eax)
-	addl	$844, %esp              # imm = 0x34C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end193:
-	.size	mcl_fpDbl_sqrPre13L, .Lfunc_end193-mcl_fpDbl_sqrPre13L
-
-	.globl	mcl_fp_mont13L
-	.align	16, 0x90
-	.type	mcl_fp_mont13L,@function
-mcl_fp_mont13L:                         # @mcl_fp_mont13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1548, %esp             # imm = 0x60C
-	calll	.L194$pb
-.L194$pb:
-	popl	%ebx
-.Ltmp35:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp35-.L194$pb), %ebx
-	movl	1580(%esp), %eax
-	movl	-4(%eax), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1488(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1488(%esp), %esi
-	movl	1492(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	imull	%edi, %eax
-	movl	1540(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	1536(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1532(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1528(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1524(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1520(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1516(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1512(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	1508(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1504(%esp), %edi
-	movl	1500(%esp), %ebp
-	movl	1496(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	1432(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	1432(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1436(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1440(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1444(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	1448(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1452(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1456(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1460(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1464(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1472(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1476(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1480(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1484(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	1576(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1376(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	1376(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1380(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1384(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1388(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1400(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	1404(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1408(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1412(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1416(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	1420(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1424(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	1428(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1320(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	1320(%esp), %ecx
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1324(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1328(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1332(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1336(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %esi          # 4-byte Reload
-	adcl	1340(%esp), %esi
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1344(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	adcl	1348(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1352(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1356(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1360(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	1364(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1368(%esp), %ebp
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	1372(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1264(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	1264(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1272(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	1280(%esp), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	1300(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1308(%esp), %ebp
-	adcl	1312(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	1580(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv416x32
-	movl	84(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1208(%esp), %edi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1212(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1216(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1220(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	1224(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1228(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1232(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1236(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1240(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	1244(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	1248(%esp), %edi
-	adcl	1252(%esp), %ebp
-	movl	%ebp, %esi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1256(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	1260(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1152(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	1152(%esp), %ecx
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1188(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	1192(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1200(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1204(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1096(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	1096(%esp), %esi
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1100(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1104(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	1108(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1112(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1116(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1120(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1124(%esp), %ebp
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	1128(%esp), %edi
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	1132(%esp), %esi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1136(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1140(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1144(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1148(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1040(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	1040(%esp), %ecx
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1044(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1064(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	1068(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	1072(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1080(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1084(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1088(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1092(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	984(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %edi
-	addl	984(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	adcl	996(%esp), %ebp
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1028(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	1576(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	928(%esp), %ecx
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	adcl	936(%esp), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	944(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	980(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	872(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %edi
-	addl	872(%esp), %ebp
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebp          # 4-byte Reload
-	adcl	884(%esp), %ebp
-	adcl	888(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	904(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	816(%esp), %ecx
-	movl	1572(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv416x32
-	movl	32(%esp), %ecx          # 4-byte Reload
-	addl	816(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	824(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	844(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	848(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	856(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	movl	32(%esp), %ecx          # 4-byte Reload
-	addl	760(%esp), %ecx
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	764(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	768(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	772(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	776(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	780(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	784(%esp), %esi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	788(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	792(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	796(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	800(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	804(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	808(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	812(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	36(%esp), %eax          # 4-byte Reload
-	addl	704(%esp), %eax
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	708(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	712(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	716(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	720(%esp), %ebp
-	adcl	724(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	728(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	732(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	736(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	740(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	744(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	748(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	752(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	756(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%eax, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	648(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	648(%esp), %esi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	652(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	656(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	660(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	664(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	668(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	672(%esp), %edi
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	676(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	680(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	684(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	688(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	696(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	592(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	592(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	600(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	612(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	adcl	616(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	620(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	536(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	movl	44(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	536(%esp), %esi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	544(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	560(%esp), %esi
-	adcl	564(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	572(%esp), %edi
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	576(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	480(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	480(%esp), %ecx
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	500(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	512(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	516(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	movl	%edi, %ecx
-	andl	$1, %ecx
-	addl	424(%esp), %esi
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	428(%esp), %ebp
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	432(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	444(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	addl	368(%esp), %ebp
-	adcl	372(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	376(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	384(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	392(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	movl	52(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	312(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	320(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	328(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	336(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	340(%esp), %edi
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	344(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	256(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	256(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	268(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	280(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	284(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	288(%esp), %edi
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	200(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	200(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	208(%esp), %ebp
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	212(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	232(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %edi          # 4-byte Reload
-	adcl	236(%esp), %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	144(%esp), %ecx
-	adcl	148(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	152(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	156(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	168(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	176(%esp), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	28(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	88(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	andl	$1, %edi
-	addl	88(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	92(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	96(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	100(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	adcl	104(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebx          # 4-byte Reload
-	adcl	112(%esp), %ebx
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebx          # 4-byte Reload
-	adcl	116(%esp), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebx          # 4-byte Reload
-	adcl	120(%esp), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebx          # 4-byte Reload
-	adcl	124(%esp), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ebx          # 4-byte Reload
-	adcl	128(%esp), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebx          # 4-byte Reload
-	adcl	132(%esp), %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	136(%esp), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebx          # 4-byte Reload
-	adcl	140(%esp), %ebx
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	1580(%esp), %ebx
-	subl	(%ebx), %eax
-	sbbl	4(%ebx), %ecx
-	sbbl	8(%ebx), %ebp
-	sbbl	12(%ebx), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	sbbl	16(%ebx), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	sbbl	20(%ebx), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	sbbl	24(%ebx), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	sbbl	28(%ebx), %edx
-	movl	36(%esp), %esi          # 4-byte Reload
-	sbbl	32(%ebx), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	36(%ebx), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	40(%ebx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	sbbl	44(%ebx), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	sbbl	48(%ebx), %esi
-	movl	%esi, %ebx
-	sbbl	$0, %edi
-	andl	$1, %edi
-	jne	.LBB194_2
-# BB#1:
-	movl	%edx, 32(%esp)          # 4-byte Spill
-.LBB194_2:
-	movl	%edi, %edx
-	testb	%dl, %dl
-	movl	80(%esp), %edx          # 4-byte Reload
-	jne	.LBB194_4
-# BB#3:
-	movl	%eax, %edx
-.LBB194_4:
-	movl	1568(%esp), %eax
-	movl	%edx, (%eax)
-	movl	64(%esp), %esi          # 4-byte Reload
-	jne	.LBB194_6
-# BB#5:
-	movl	%ecx, %esi
-.LBB194_6:
-	movl	%esi, 4(%eax)
-	jne	.LBB194_8
-# BB#7:
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-.LBB194_8:
-	movl	76(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 8(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	jne	.LBB194_10
-# BB#9:
-	movl	4(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-.LBB194_10:
-	movl	84(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	jne	.LBB194_12
-# BB#11:
-	movl	8(%esp), %ebp           # 4-byte Reload
-.LBB194_12:
-	movl	%ebp, 16(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_14
-# BB#13:
-	movl	12(%esp), %ecx          # 4-byte Reload
-.LBB194_14:
-	movl	%ecx, 20(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_16
-# BB#15:
-	movl	16(%esp), %ecx          # 4-byte Reload
-.LBB194_16:
-	movl	%ecx, 24(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_18
-# BB#17:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB194_18:
-	movl	%ecx, 32(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_20
-# BB#19:
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB194_20:
-	movl	%ecx, 36(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_22
-# BB#21:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB194_22:
-	movl	%ecx, 40(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_24
-# BB#23:
-	movl	72(%esp), %ecx          # 4-byte Reload
-.LBB194_24:
-	movl	%ecx, 44(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	jne	.LBB194_26
-# BB#25:
-	movl	%ebx, %ecx
-.LBB194_26:
-	movl	%ecx, 48(%eax)
-	addl	$1548, %esp             # imm = 0x60C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end194:
-	.size	mcl_fp_mont13L, .Lfunc_end194-mcl_fp_mont13L
-
-	.globl	mcl_fp_montNF13L
-	.align	16, 0x90
-	.type	mcl_fp_montNF13L,@function
-mcl_fp_montNF13L:                       # @mcl_fp_montNF13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1548, %esp             # imm = 0x60C
-	calll	.L195$pb
-.L195$pb:
-	popl	%ebx
-.Ltmp36:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp36-.L195$pb), %ebx
-	movl	1580(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1488(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1488(%esp), %edi
-	movl	1492(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	1540(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	1536(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1532(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1528(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1524(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1520(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1516(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1512(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	1508(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1504(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1500(%esp), %esi
-	movl	1496(%esp), %ebp
-	movl	%eax, (%esp)
-	leal	1432(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	1432(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1436(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	1440(%esp), %ebp
-	adcl	1444(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1448(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1452(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1456(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1460(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1464(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	1472(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1476(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1480(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	1484(%esp), %edi
-	movl	1576(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1376(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1428(%esp), %ecx
-	movl	80(%esp), %edx          # 4-byte Reload
-	addl	1376(%esp), %edx
-	adcl	1380(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1384(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1400(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1404(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1408(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1412(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1416(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1424(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1320(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	1320(%esp), %esi
-	adcl	1324(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1328(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	adcl	1340(%esp), %esi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	1360(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1264(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1316(%esp), %eax
-	addl	1264(%esp), %ebp
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1268(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1272(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1276(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	1280(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	adcl	1284(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1288(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1296(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	1300(%esp), %edi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1304(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1308(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1312(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	1208(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1212(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1216(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	adcl	1228(%esp), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1232(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1240(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1244(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1248(%esp), %esi
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	1252(%esp), %edi
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1256(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1260(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1152(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1204(%esp), %eax
-	movl	64(%esp), %edx          # 4-byte Reload
-	addl	1152(%esp), %edx
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1156(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1160(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1164(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1168(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1172(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1176(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1180(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1184(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	1188(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	1192(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	adcl	1196(%esp), %ebp
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1200(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1096(%esp), %ecx
-	movl	1580(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv416x32
-	addl	1096(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	1116(%esp), %esi
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	1120(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1140(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	1148(%esp), %ebp
-	movl	1576(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1040(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	1092(%esp), %eax
-	movl	40(%esp), %edx          # 4-byte Reload
-	addl	1040(%esp), %edx
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1044(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	1048(%esp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1052(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	1056(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	1060(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1064(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1068(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1072(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1076(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1080(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1084(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	1088(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	adcl	$0, %esi
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	984(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	984(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	996(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	1008(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1036(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	980(%esp), %eax
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	928(%esp), %ecx
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	932(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	936(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	940(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	944(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	948(%esp), %ebp
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	952(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	956(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	960(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	964(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	968(%esp), %esi
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	972(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	976(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	872(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	872(%esp), %edi
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	876(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	892(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	912(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	816(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	868(%esp), %edx
-	addl	816(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	832(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	836(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	860(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	760(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	780(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	784(%esp), %esi
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	788(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	804(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	756(%esp), %eax
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	704(%esp), %ecx
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	708(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	712(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	716(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	720(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	724(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	728(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	732(%esp), %esi
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	736(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	740(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	744(%esp), %ebp
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	748(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	752(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	648(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	648(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	676(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	688(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	696(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	592(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	644(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	592(%esp), %ecx
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	596(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	624(%esp), %ebp
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	636(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	536(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	536(%esp), %edi
-	adcl	540(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	556(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	564(%esp), %esi
-	adcl	568(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	572(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	480(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	532(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	480(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	496(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	504(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	508(%esp), %edi
-	adcl	512(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	528(%esp), %ebp
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	424(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	adcl	452(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	460(%esp), %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	472(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	420(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	368(%esp), %ecx
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	372(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	392(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	adcl	400(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	404(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	312(%esp), %esi
-	adcl	316(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	320(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	332(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	348(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	256(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	308(%esp), %edx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	256(%esp), %ecx
-	adcl	260(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	272(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	288(%esp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	200(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	200(%esp), %esi
-	adcl	204(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	212(%esp), %ebp
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	216(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	228(%esp), %edi
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1576(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	144(%esp), %ecx
-	movl	1572(%esp), %edx
-	calll	.LmulPv416x32
-	movl	196(%esp), %edx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	144(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	148(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	152(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	156(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	164(%esp), %ebp
-	adcl	168(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	88(%esp), %ecx
-	movl	1580(%esp), %edx
-	calll	.LmulPv416x32
-	addl	88(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	92(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	96(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	100(%esp), %edi
-	movl	64(%esp), %ebx          # 4-byte Reload
-	adcl	104(%esp), %ebx
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	adcl	108(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	%ebp, %esi
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	112(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	120(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	124(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	128(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	132(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	136(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	140(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	1580(%esp), %eax
-	subl	(%eax), %edx
-	movl	%ecx, %ebp
-	sbbl	4(%eax), %ebp
-	movl	%edi, %ecx
-	sbbl	8(%eax), %ecx
-	sbbl	12(%eax), %ebx
-	sbbl	16(%eax), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	20(%eax), %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	36(%esp), %esi          # 4-byte Reload
-	sbbl	24(%eax), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	28(%eax), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	sbbl	32(%eax), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	36(%eax), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	sbbl	40(%eax), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	sbbl	44(%eax), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	sbbl	48(%eax), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	sarl	$31, %eax
-	testl	%eax, %eax
-	movl	84(%esp), %eax          # 4-byte Reload
-	js	.LBB195_2
-# BB#1:
-	movl	%edx, %eax
-.LBB195_2:
-	movl	1568(%esp), %edx
-	movl	%eax, (%edx)
-	movl	80(%esp), %esi          # 4-byte Reload
-	js	.LBB195_4
-# BB#3:
-	movl	%ebp, %esi
-.LBB195_4:
-	movl	%esi, 4(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB195_6
-# BB#5:
-	movl	%ecx, %edi
-.LBB195_6:
-	movl	%edi, 8(%edx)
-	js	.LBB195_8
-# BB#7:
-	movl	%ebx, %eax
-.LBB195_8:
-	movl	%eax, 12(%edx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB195_10
-# BB#9:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB195_10:
-	movl	%eax, 16(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB195_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB195_12:
-	movl	%eax, 20(%edx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	js	.LBB195_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB195_14:
-	movl	%eax, 24(%edx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	js	.LBB195_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB195_16:
-	movl	%eax, 28(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB195_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB195_18:
-	movl	%eax, 32(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB195_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB195_20:
-	movl	%eax, 36(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB195_22
-# BB#21:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB195_22:
-	movl	%eax, 40(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB195_24
-# BB#23:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB195_24:
-	movl	%eax, 44(%edx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	js	.LBB195_26
-# BB#25:
-	movl	68(%esp), %eax          # 4-byte Reload
-.LBB195_26:
-	movl	%eax, 48(%edx)
-	addl	$1548, %esp             # imm = 0x60C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end195:
-	.size	mcl_fp_montNF13L, .Lfunc_end195-mcl_fp_montNF13L
-
-	.globl	mcl_fp_montRed13L
-	.align	16, 0x90
-	.type	mcl_fp_montRed13L,@function
-mcl_fp_montRed13L:                      # @mcl_fp_montRed13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$892, %esp              # imm = 0x37C
-	calll	.L196$pb
-.L196$pb:
-	popl	%eax
-.Ltmp37:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp37-.L196$pb), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	920(%esp), %edx
-	movl	-4(%edx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	916(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	imull	%eax, %ebx
-	movl	100(%ecx), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	96(%ecx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%ecx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	88(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%ecx), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	80(%ecx), %esi
-	movl	%esi, 136(%esp)         # 4-byte Spill
-	movl	76(%ecx), %esi
-	movl	%esi, 144(%esp)         # 4-byte Spill
-	movl	72(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	68(%ecx), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	64(%ecx), %esi
-	movl	%esi, 148(%esp)         # 4-byte Spill
-	movl	60(%ecx), %esi
-	movl	%esi, 152(%esp)         # 4-byte Spill
-	movl	56(%ecx), %esi
-	movl	%esi, 140(%esp)         # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	44(%ecx), %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	movl	40(%ecx), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	36(%ecx), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	28(%ecx), %ebp
-	movl	24(%ecx), %edi
-	movl	20(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	16(%ecx), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	12(%ecx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	8(%ecx), %esi
-	movl	(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	832(%esp), %ecx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	76(%esp), %eax          # 4-byte Reload
-	addl	832(%esp), %eax
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	836(%esp), %ecx
-	adcl	840(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	856(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	860(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	776(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	andl	$1, %esi
-	addl	776(%esp), %edi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	780(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, %edi
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	720(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	720(%esp), %esi
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	724(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 132(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	664(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	664(%esp), %esi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	668(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	608(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	608(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	612(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	movl	144(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	552(%esp), %esi
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	556(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	496(%esp), %edi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %ebp         # 4-byte Reload
-	adcl	532(%esp), %ebp
-	movl	148(%esp), %edi         # 4-byte Reload
-	adcl	536(%esp), %edi
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 136(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	440(%esp), %esi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	472(%esp), %ebp
-	movl	%ebp, 152(%esp)         # 4-byte Spill
-	adcl	476(%esp), %edi
-	movl	%edi, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	384(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	384(%esp), %esi
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	388(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %ebp         # 4-byte Reload
-	adcl	404(%esp), %ebp
-	movl	140(%esp), %edi         # 4-byte Reload
-	adcl	408(%esp), %edi
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %esi         # 4-byte Reload
-	adcl	420(%esp), %esi
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	imull	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	920(%esp), %eax
-	movl	%eax, %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	movl	104(%esp), %eax         # 4-byte Reload
-	addl	328(%esp), %eax
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	332(%esp), %ecx
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	344(%esp), %ebp
-	movl	%ebp, 156(%esp)         # 4-byte Spill
-	adcl	348(%esp), %edi
-	movl	%edi, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	360(%esp), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, 96(%esp)            # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	movl	72(%esp), %esi          # 4-byte Reload
-	imull	%esi, %eax
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	272(%esp), %edi
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	120(%esp), %edi         # 4-byte Reload
-	adcl	280(%esp), %edi
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	284(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	288(%esp), %ecx
-	movl	%ecx, 140(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	292(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	296(%esp), %ecx
-	movl	%ecx, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	300(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	304(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	308(%esp), %ecx
-	movl	%ecx, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	312(%esp), %ecx
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	316(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	320(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	324(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	imull	%esi, %eax
-	movl	%eax, (%esp)
-	leal	216(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	216(%esp), %ebp
-	movl	%edi, %ecx
-	adcl	220(%esp), %ecx
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	140(%esp), %ebp         # 4-byte Reload
-	adcl	228(%esp), %ebp
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %edi         # 4-byte Reload
-	adcl	244(%esp), %edi
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 92(%esp)            # 4-byte Folded Spill
-	adcl	$0, 80(%esp)            # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	160(%esp), %ecx
-	movl	920(%esp), %edx
-	movl	84(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv416x32
-	addl	160(%esp), %esi
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	164(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	168(%esp), %ebp
-	movl	%ebp, 140(%esp)         # 4-byte Spill
-	movl	%ebp, %ebx
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	172(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	148(%esp), %ebp         # 4-byte Reload
-	adcl	176(%esp), %ebp
-	movl	%ebp, 148(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	%eax, %edx
-	movl	%edi, %eax
-	adcl	184(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	156(%esp), %edi         # 4-byte Reload
-	subl	12(%esp), %edi          # 4-byte Folded Reload
-	sbbl	4(%esp), %ebx           # 4-byte Folded Reload
-	sbbl	8(%esp), %ecx           # 4-byte Folded Reload
-	sbbl	16(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	20(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	132(%esp), %edx         # 4-byte Reload
-	sbbl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	144(%esp), %edx         # 4-byte Reload
-	sbbl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	136(%esp), %edx         # 4-byte Reload
-	sbbl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	116(%esp), %edx         # 4-byte Reload
-	sbbl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	112(%esp), %edx         # 4-byte Reload
-	sbbl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	sbbl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	sbbl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 120(%esp)         # 4-byte Spill
-	movl	%eax, %edx
-	movl	%esi, %eax
-	sbbl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 124(%esp)         # 4-byte Spill
-	sbbl	$0, %eax
-	andl	$1, %eax
-	jne	.LBB196_2
-# BB#1:
-	movl	%ebp, 148(%esp)         # 4-byte Spill
-.LBB196_2:
-	testb	%al, %al
-	movl	156(%esp), %ebp         # 4-byte Reload
-	jne	.LBB196_4
-# BB#3:
-	movl	%edi, %ebp
-.LBB196_4:
-	movl	912(%esp), %edi
-	movl	%ebp, (%edi)
-	movl	140(%esp), %ebp         # 4-byte Reload
-	jne	.LBB196_6
-# BB#5:
-	movl	%ebx, %ebp
-.LBB196_6:
-	movl	%ebp, 4(%edi)
-	movl	152(%esp), %ebx         # 4-byte Reload
-	jne	.LBB196_8
-# BB#7:
-	movl	%ecx, %ebx
-.LBB196_8:
-	movl	%ebx, 8(%edi)
-	movl	148(%esp), %esi         # 4-byte Reload
-	movl	%esi, 12(%edi)
-	movl	116(%esp), %ebx         # 4-byte Reload
-	movl	128(%esp), %esi         # 4-byte Reload
-	jne	.LBB196_10
-# BB#9:
-	movl	72(%esp), %esi          # 4-byte Reload
-.LBB196_10:
-	movl	%esi, 16(%edi)
-	movl	112(%esp), %esi         # 4-byte Reload
-	movl	132(%esp), %edx         # 4-byte Reload
-	jne	.LBB196_12
-# BB#11:
-	movl	76(%esp), %edx          # 4-byte Reload
-.LBB196_12:
-	movl	%edx, 20(%edi)
-	movl	96(%esp), %edx          # 4-byte Reload
-	movl	144(%esp), %ecx         # 4-byte Reload
-	jne	.LBB196_14
-# BB#13:
-	movl	80(%esp), %ecx          # 4-byte Reload
-.LBB196_14:
-	movl	%ecx, 24(%edi)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	movl	136(%esp), %eax         # 4-byte Reload
-	jne	.LBB196_16
-# BB#15:
-	movl	84(%esp), %eax          # 4-byte Reload
-.LBB196_16:
-	movl	%eax, 28(%edi)
-	movl	92(%esp), %eax          # 4-byte Reload
-	jne	.LBB196_18
-# BB#17:
-	movl	88(%esp), %ebx          # 4-byte Reload
-.LBB196_18:
-	movl	%ebx, 32(%edi)
-	jne	.LBB196_20
-# BB#19:
-	movl	104(%esp), %esi         # 4-byte Reload
-.LBB196_20:
-	movl	%esi, 36(%edi)
-	jne	.LBB196_22
-# BB#21:
-	movl	108(%esp), %edx         # 4-byte Reload
-.LBB196_22:
-	movl	%edx, 40(%edi)
-	jne	.LBB196_24
-# BB#23:
-	movl	120(%esp), %ecx         # 4-byte Reload
-.LBB196_24:
-	movl	%ecx, 44(%edi)
-	jne	.LBB196_26
-# BB#25:
-	movl	124(%esp), %eax         # 4-byte Reload
-.LBB196_26:
-	movl	%eax, 48(%edi)
-	addl	$892, %esp              # imm = 0x37C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end196:
-	.size	mcl_fp_montRed13L, .Lfunc_end196-mcl_fp_montRed13L
-
-	.globl	mcl_fp_addPre13L
-	.align	16, 0x90
-	.type	mcl_fp_addPre13L,@function
-mcl_fp_addPre13L:                       # @mcl_fp_addPre13L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ebx)
-	movl	32(%ecx), %esi
-	adcl	%edi, %esi
-	movl	36(%eax), %edi
-	movl	%edx, 28(%ebx)
-	movl	36(%ecx), %edx
-	adcl	%edi, %edx
-	movl	40(%eax), %edi
-	movl	%esi, 32(%ebx)
-	movl	40(%ecx), %esi
-	adcl	%edi, %esi
-	movl	44(%eax), %edi
-	movl	%edx, 36(%ebx)
-	movl	44(%ecx), %edx
-	adcl	%edi, %edx
-	movl	%esi, 40(%ebx)
-	movl	%edx, 44(%ebx)
-	movl	48(%eax), %eax
-	movl	48(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 48(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end197:
-	.size	mcl_fp_addPre13L, .Lfunc_end197-mcl_fp_addPre13L
-
-	.globl	mcl_fp_subPre13L
-	.align	16, 0x90
-	.type	mcl_fp_subPre13L,@function
-mcl_fp_subPre13L:                       # @mcl_fp_subPre13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ebp)
-	movl	32(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%ebp)
-	movl	36(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	40(%edx), %ebx
-	movl	%edi, 32(%ebp)
-	movl	40(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	44(%edx), %ebx
-	movl	%esi, 36(%ebp)
-	movl	44(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	%edi, 40(%ebp)
-	movl	%esi, 44(%ebp)
-	movl	48(%edx), %edx
-	movl	48(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 48(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end198:
-	.size	mcl_fp_subPre13L, .Lfunc_end198-mcl_fp_subPre13L
-
-	.globl	mcl_fp_shr1_13L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_13L,@function
-mcl_fp_shr1_13L:                        # @mcl_fp_shr1_13L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 40(%ecx)
-	movl	48(%eax), %eax
-	shrdl	$1, %eax, %esi
-	movl	%esi, 44(%ecx)
-	shrl	%eax
-	movl	%eax, 48(%ecx)
-	popl	%esi
-	retl
-.Lfunc_end199:
-	.size	mcl_fp_shr1_13L, .Lfunc_end199-mcl_fp_shr1_13L
-
-	.globl	mcl_fp_add13L
-	.align	16, 0x90
-	.type	mcl_fp_add13L,@function
-mcl_fp_add13L:                          # @mcl_fp_add13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$40, %esp
-	movl	68(%esp), %ebp
-	movl	(%ebp), %ecx
-	movl	4(%ebp), %eax
-	movl	64(%esp), %ebx
-	addl	(%ebx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	4(%ebx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	8(%ebp), %eax
-	adcl	8(%ebx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	12(%ebx), %ecx
-	movl	16(%ebx), %eax
-	adcl	12(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	16(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	20(%ebx), %eax
-	adcl	20(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	24(%ebx), %eax
-	adcl	24(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	28(%ebx), %eax
-	adcl	28(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	32(%ebx), %eax
-	adcl	32(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	36(%ebx), %ecx
-	adcl	36(%ebp), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	40(%ebx), %edi
-	adcl	40(%ebp), %edi
-	movl	44(%ebx), %edx
-	adcl	44(%ebp), %edx
-	movl	48(%ebx), %esi
-	adcl	48(%ebp), %esi
-	movl	60(%esp), %ebp
-	movl	4(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, (%ebp)
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, 4(%ebp)
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 8(%ebp)
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%ebp)
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 16(%ebp)
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 20(%ebp)
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 24(%ebp)
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 28(%ebp)
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 32(%ebp)
-	movl	%ecx, 36(%ebp)
-	movl	%edi, 40(%ebp)
-	movl	%edx, 44(%ebp)
-	movl	%esi, 48(%ebp)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	movl	72(%esp), %ecx
-	subl	(%ecx), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	36(%esp), %ebx          # 4-byte Reload
-	sbbl	4(%ecx), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebx          # 4-byte Reload
-	sbbl	8(%ecx), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebx          # 4-byte Reload
-	sbbl	12(%ecx), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %ebx          # 4-byte Reload
-	sbbl	16(%ecx), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%ecx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %ebx          # 4-byte Reload
-	sbbl	24(%ecx), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%ecx), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %ebx           # 4-byte Reload
-	sbbl	32(%ecx), %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	(%esp), %ebx            # 4-byte Reload
-	sbbl	36(%ecx), %ebx
-	sbbl	40(%ecx), %edi
-	sbbl	44(%ecx), %edx
-	sbbl	48(%ecx), %esi
-	sbbl	$0, %eax
-	testb	$1, %al
-	jne	.LBB200_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, (%ebp)
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, 4(%ebp)
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 8(%ebp)
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 12(%ebp)
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 16(%ebp)
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 20(%ebp)
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 24(%ebp)
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 28(%ebp)
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 32(%ebp)
-	movl	%ebx, 36(%ebp)
-	movl	%edi, 40(%ebp)
-	movl	%edx, 44(%ebp)
-	movl	%esi, 48(%ebp)
-.LBB200_2:                              # %carry
-	addl	$40, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end200:
-	.size	mcl_fp_add13L, .Lfunc_end200-mcl_fp_add13L
-
-	.globl	mcl_fp_addNF13L
-	.align	16, 0x90
-	.type	mcl_fp_addNF13L,@function
-mcl_fp_addNF13L:                        # @mcl_fp_addNF13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$100, %esp
-	movl	128(%esp), %esi
-	movl	(%esi), %ecx
-	movl	4(%esi), %eax
-	movl	124(%esp), %edx
-	addl	(%edx), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	4(%edx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	48(%esi), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	44(%esi), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	40(%esi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	36(%esi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	32(%esi), %ebp
-	movl	28(%esi), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	movl	20(%esi), %ebx
-	movl	16(%esi), %edi
-	movl	12(%esi), %ecx
-	movl	8(%esi), %esi
-	adcl	8(%edx), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	12(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	16(%edx), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	adcl	20(%edx), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	adcl	24(%edx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	28(%edx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	32(%edx), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	36(%edx), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	40(%edx), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%edx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	48(%edx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	132(%esp), %edx
-	movl	64(%esp), %eax          # 4-byte Reload
-	subl	(%edx), %eax
-	movl	68(%esp), %ebp          # 4-byte Reload
-	sbbl	4(%edx), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	sbbl	8(%edx), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	sbbl	12(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	16(%edx), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	sbbl	20(%edx), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%edx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	movl	%esi, %ecx
-	movl	%esi, %ebp
-	sbbl	36(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	movl	%esi, %ecx
-	movl	%esi, %edi
-	sbbl	40(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	48(%edx), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	sarl	$31, %ebx
-	testl	%ebx, %ebx
-	movl	64(%esp), %edx          # 4-byte Reload
-	js	.LBB201_2
-# BB#1:
-	movl	%eax, %edx
-.LBB201_2:
-	movl	120(%esp), %esi
-	movl	%edx, (%esi)
-	movl	68(%esp), %edx          # 4-byte Reload
-	js	.LBB201_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-.LBB201_4:
-	movl	%edx, 4(%esi)
-	movl	%edi, %edx
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB201_6
-# BB#5:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB201_6:
-	movl	%eax, 8(%esi)
-	movl	%ebp, %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB201_8
-# BB#7:
-	movl	8(%esp), %ebx           # 4-byte Reload
-.LBB201_8:
-	movl	%ebx, 12(%esi)
-	movl	96(%esp), %ebp          # 4-byte Reload
-	movl	56(%esp), %ecx          # 4-byte Reload
-	js	.LBB201_10
-# BB#9:
-	movl	12(%esp), %ecx          # 4-byte Reload
-.LBB201_10:
-	movl	%ecx, 16(%esi)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	js	.LBB201_12
-# BB#11:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB201_12:
-	movl	%eax, 20(%esi)
-	movl	72(%esp), %ebx          # 4-byte Reload
-	js	.LBB201_14
-# BB#13:
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-.LBB201_14:
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	%eax, 24(%esi)
-	js	.LBB201_16
-# BB#15:
-	movl	24(%esp), %ebp          # 4-byte Reload
-.LBB201_16:
-	movl	%ebp, 28(%esi)
-	js	.LBB201_18
-# BB#17:
-	movl	28(%esp), %ebx          # 4-byte Reload
-.LBB201_18:
-	movl	%ebx, 32(%esi)
-	js	.LBB201_20
-# BB#19:
-	movl	32(%esp), %edi          # 4-byte Reload
-.LBB201_20:
-	movl	%edi, 36(%esi)
-	js	.LBB201_22
-# BB#21:
-	movl	36(%esp), %edx          # 4-byte Reload
-.LBB201_22:
-	movl	%edx, 40(%esi)
-	js	.LBB201_24
-# BB#23:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB201_24:
-	movl	%ecx, 44(%esi)
-	movl	88(%esp), %eax          # 4-byte Reload
-	js	.LBB201_26
-# BB#25:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB201_26:
-	movl	%eax, 48(%esi)
-	addl	$100, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end201:
-	.size	mcl_fp_addNF13L, .Lfunc_end201-mcl_fp_addNF13L
-
-	.globl	mcl_fp_sub13L
-	.align	16, 0x90
-	.type	mcl_fp_sub13L,@function
-mcl_fp_sub13L:                          # @mcl_fp_sub13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$44, %esp
-	movl	68(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	72(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	32(%esi), %edx
-	sbbl	32(%edi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	36(%esi), %ecx
-	sbbl	36(%edi), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	40(%esi), %eax
-	sbbl	40(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	44(%esi), %ebp
-	sbbl	44(%edi), %ebp
-	movl	48(%esi), %esi
-	sbbl	48(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	64(%esp), %ebx
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%ebx)
-	movl	%edx, 32(%ebx)
-	movl	%ecx, 36(%ebx)
-	movl	%eax, 40(%ebx)
-	movl	%ebp, 44(%ebx)
-	movl	%esi, 48(%ebx)
-	je	.LBB202_2
-# BB#1:                                 # %carry
-	movl	%esi, %edi
-	movl	76(%esp), %esi
-	movl	12(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	adcl	8(%esi), %ecx
-	movl	12(%esi), %eax
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	40(%esi), %ecx
-	adcl	(%esp), %ecx            # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	%ecx, 40(%ebx)
-	movl	44(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 44(%ebx)
-	movl	48(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 48(%ebx)
-.LBB202_2:                              # %nocarry
-	addl	$44, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end202:
-	.size	mcl_fp_sub13L, .Lfunc_end202-mcl_fp_sub13L
-
-	.globl	mcl_fp_subNF13L
-	.align	16, 0x90
-	.type	mcl_fp_subNF13L,@function
-mcl_fp_subNF13L:                        # @mcl_fp_subNF13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$84, %esp
-	movl	108(%esp), %ecx
-	movl	(%ecx), %edx
-	movl	4(%ecx), %eax
-	movl	112(%esp), %edi
-	subl	(%edi), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	32(%ecx), %ebp
-	movl	28(%ecx), %ebx
-	movl	24(%ecx), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	20(%ecx), %esi
-	movl	16(%ecx), %edx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%edi), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	sbbl	28(%edi), %ebx
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	sbbl	32(%edi), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	sbbl	48(%edi), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edx, %eax
-	sarl	$31, %edi
-	movl	%edi, %edx
-	shldl	$1, %eax, %edx
-	movl	116(%esp), %esi
-	movl	4(%esi), %eax
-	andl	%edx, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	andl	(%esi), %edx
-	movl	48(%esi), %eax
-	andl	%edi, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%esi), %eax
-	andl	%edi, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esi), %eax
-	andl	%edi, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	36(%esi), %eax
-	andl	%edi, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	andl	%edi, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	andl	%edi, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	24(%esi), %eax
-	andl	%edi, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	20(%esi), %ebp
-	andl	%edi, %ebp
-	movl	16(%esi), %ebx
-	andl	%edi, %ebx
-	movl	12(%esi), %ecx
-	andl	%edi, %ecx
-	roll	%edi
-	andl	8(%esi), %edi
-	addl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	104(%esp), %esi
-	movl	%edx, (%esi)
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%eax, 4(%esi)
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edi, 8(%esi)
-	adcl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ecx, 12(%esi)
-	adcl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebx, 16(%esi)
-	movl	(%esp), %ecx            # 4-byte Reload
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebp, 20(%esi)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%esi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%esi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%esi)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	72(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 36(%esi)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 40(%esi)
-	movl	%eax, 44(%esi)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%esi)
-	addl	$84, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end203:
-	.size	mcl_fp_subNF13L, .Lfunc_end203-mcl_fp_subNF13L
-
-	.globl	mcl_fpDbl_add13L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add13L,@function
-mcl_fpDbl_add13L:                       # @mcl_fpDbl_add13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$96, %esp
-	movl	124(%esp), %ecx
-	movl	120(%esp), %esi
-	movl	12(%esi), %edi
-	movl	16(%esi), %edx
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%esi), %ebp
-	movl	116(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%esi), %ebp
-	adcl	8(%esi), %ebx
-	adcl	12(%ecx), %edi
-	adcl	16(%ecx), %edx
-	movl	%ebp, 4(%eax)
-	movl	60(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%edi, 12(%eax)
-	movl	20(%esi), %edi
-	adcl	%ebx, %edi
-	movl	24(%ecx), %ebx
-	movl	%edx, 16(%eax)
-	movl	24(%esi), %edx
-	adcl	%ebx, %edx
-	movl	28(%ecx), %ebx
-	movl	%edi, 20(%eax)
-	movl	28(%esi), %edi
-	adcl	%ebx, %edi
-	movl	32(%ecx), %ebx
-	movl	%edx, 24(%eax)
-	movl	32(%esi), %edx
-	adcl	%ebx, %edx
-	movl	36(%ecx), %ebx
-	movl	%edi, 28(%eax)
-	movl	36(%esi), %edi
-	adcl	%ebx, %edi
-	movl	40(%ecx), %ebx
-	movl	%edx, 32(%eax)
-	movl	40(%esi), %edx
-	adcl	%ebx, %edx
-	movl	44(%ecx), %ebx
-	movl	%edi, 36(%eax)
-	movl	44(%esi), %edi
-	adcl	%ebx, %edi
-	movl	48(%ecx), %ebx
-	movl	%edx, 40(%eax)
-	movl	48(%esi), %edx
-	adcl	%ebx, %edx
-	movl	52(%ecx), %ebx
-	movl	%edi, 44(%eax)
-	movl	52(%esi), %edi
-	adcl	%ebx, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	56(%ecx), %edi
-	movl	%edx, 48(%eax)
-	movl	56(%esi), %eax
-	adcl	%edi, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	64(%ecx), %edx
-	movl	64(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	68(%ecx), %edx
-	movl	68(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	72(%ecx), %edx
-	movl	72(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%ecx), %edx
-	movl	76(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%ecx), %edx
-	movl	80(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	84(%ecx), %edx
-	movl	84(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%ecx), %edx
-	movl	88(%esi), %edi
-	adcl	%edx, %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	92(%ecx), %edx
-	movl	92(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	96(%ecx), %edx
-	movl	96(%esi), %ebx
-	adcl	%edx, %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	100(%ecx), %ecx
-	movl	100(%esi), %esi
-	adcl	%ecx, %esi
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	128(%esp), %ebp
-	movl	76(%esp), %ecx          # 4-byte Reload
-	subl	(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	4(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	8(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	sbbl	20(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebp), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	sbbl	36(%ebp), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	sbbl	40(%ebp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	%esi, %ebx
-	sbbl	44(%ebp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ebx, %ecx
-	sbbl	48(%ebp), %ecx
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB204_2
-# BB#1:
-	movl	%ecx, %ebx
-.LBB204_2:
-	testb	%dl, %dl
-	movl	76(%esp), %ecx          # 4-byte Reload
-	movl	72(%esp), %edx          # 4-byte Reload
-	movl	68(%esp), %esi          # 4-byte Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	movl	60(%esp), %ebp          # 4-byte Reload
-	jne	.LBB204_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	4(%esp), %esi           # 4-byte Reload
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB204_4:
-	movl	116(%esp), %eax
-	movl	%ecx, 52(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	movl	88(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 64(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 68(%eax)
-	movl	%ebp, 72(%eax)
-	movl	%edi, 76(%eax)
-	movl	%esi, 80(%eax)
-	movl	%edx, 84(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	52(%esp), %edx          # 4-byte Reload
-	movl	48(%esp), %esi          # 4-byte Reload
-	jne	.LBB204_6
-# BB#5:
-	movl	36(%esp), %esi          # 4-byte Reload
-.LBB204_6:
-	movl	%esi, 88(%eax)
-	jne	.LBB204_8
-# BB#7:
-	movl	40(%esp), %edx          # 4-byte Reload
-.LBB204_8:
-	movl	%edx, 92(%eax)
-	jne	.LBB204_10
-# BB#9:
-	movl	44(%esp), %ecx          # 4-byte Reload
-.LBB204_10:
-	movl	%ecx, 96(%eax)
-	movl	%ebx, 100(%eax)
-	addl	$96, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end204:
-	.size	mcl_fpDbl_add13L, .Lfunc_end204-mcl_fpDbl_add13L
-
-	.globl	mcl_fpDbl_sub13L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub13L,@function
-mcl_fpDbl_sub13L:                       # @mcl_fpDbl_sub13L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$84, %esp
-	movl	108(%esp), %edi
-	movl	(%edi), %eax
-	movl	4(%edi), %edx
-	movl	112(%esp), %ebx
-	subl	(%ebx), %eax
-	sbbl	4(%ebx), %edx
-	movl	8(%edi), %esi
-	sbbl	8(%ebx), %esi
-	movl	104(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%edi), %eax
-	sbbl	12(%ebx), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%edi), %edx
-	sbbl	16(%ebx), %edx
-	movl	%esi, 8(%ecx)
-	movl	20(%ebx), %esi
-	movl	%eax, 12(%ecx)
-	movl	20(%edi), %eax
-	sbbl	%esi, %eax
-	movl	24(%ebx), %esi
-	movl	%edx, 16(%ecx)
-	movl	24(%edi), %edx
-	sbbl	%esi, %edx
-	movl	28(%ebx), %esi
-	movl	%eax, 20(%ecx)
-	movl	28(%edi), %eax
-	sbbl	%esi, %eax
-	movl	32(%ebx), %esi
-	movl	%edx, 24(%ecx)
-	movl	32(%edi), %edx
-	sbbl	%esi, %edx
-	movl	36(%ebx), %esi
-	movl	%eax, 28(%ecx)
-	movl	36(%edi), %eax
-	sbbl	%esi, %eax
-	movl	40(%ebx), %esi
-	movl	%edx, 32(%ecx)
-	movl	40(%edi), %edx
-	sbbl	%esi, %edx
-	movl	44(%ebx), %esi
-	movl	%eax, 36(%ecx)
-	movl	44(%edi), %eax
-	sbbl	%esi, %eax
-	movl	48(%ebx), %esi
-	movl	%edx, 40(%ecx)
-	movl	48(%edi), %edx
-	sbbl	%esi, %edx
-	movl	52(%ebx), %esi
-	movl	%eax, 44(%ecx)
-	movl	52(%edi), %eax
-	sbbl	%esi, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	56(%ebx), %eax
-	movl	%edx, 48(%ecx)
-	movl	56(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	60(%ebx), %eax
-	movl	60(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	64(%ebx), %eax
-	movl	64(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	68(%ebx), %eax
-	movl	68(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	72(%ebx), %eax
-	movl	72(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	76(%ebx), %eax
-	movl	76(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	80(%ebx), %eax
-	movl	80(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	84(%ebx), %eax
-	movl	84(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	88(%ebx), %eax
-	movl	88(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	92(%ebx), %eax
-	movl	92(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	96(%ebx), %eax
-	movl	96(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	100(%ebx), %eax
-	movl	100(%edi), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	116(%esp), %edi
-	jne	.LBB205_1
-# BB#2:
-	movl	$0, 44(%esp)            # 4-byte Folded Spill
-	jmp	.LBB205_3
-.LBB205_1:
-	movl	48(%edi), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-.LBB205_3:
-	testb	%al, %al
-	jne	.LBB205_4
-# BB#5:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	movl	$0, %ebx
-	jmp	.LBB205_6
-.LBB205_4:
-	movl	(%edi), %ebx
-	movl	4(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB205_6:
-	jne	.LBB205_7
-# BB#8:
-	movl	$0, 24(%esp)            # 4-byte Folded Spill
-	jmp	.LBB205_9
-.LBB205_7:
-	movl	44(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-.LBB205_9:
-	jne	.LBB205_10
-# BB#11:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB205_12
-.LBB205_10:
-	movl	40(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB205_12:
-	jne	.LBB205_13
-# BB#14:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB205_15
-.LBB205_13:
-	movl	36(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB205_15:
-	jne	.LBB205_16
-# BB#17:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB205_18
-.LBB205_16:
-	movl	32(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB205_18:
-	jne	.LBB205_19
-# BB#20:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB205_21
-.LBB205_19:
-	movl	28(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB205_21:
-	jne	.LBB205_22
-# BB#23:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB205_24
-.LBB205_22:
-	movl	24(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB205_24:
-	jne	.LBB205_25
-# BB#26:
-	movl	$0, %eax
-	jmp	.LBB205_27
-.LBB205_25:
-	movl	20(%edi), %eax
-.LBB205_27:
-	jne	.LBB205_28
-# BB#29:
-	movl	$0, %edx
-	jmp	.LBB205_30
-.LBB205_28:
-	movl	16(%edi), %edx
-.LBB205_30:
-	jne	.LBB205_31
-# BB#32:
-	movl	$0, %esi
-	jmp	.LBB205_33
-.LBB205_31:
-	movl	12(%edi), %esi
-.LBB205_33:
-	jne	.LBB205_34
-# BB#35:
-	xorl	%edi, %edi
-	jmp	.LBB205_36
-.LBB205_34:
-	movl	8(%edi), %edi
-.LBB205_36:
-	addl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	16(%esp), %ebp          # 4-byte Reload
-	adcl	28(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebx, 52(%ecx)
-	adcl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebp, 56(%ecx)
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 60(%ecx)
-	adcl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 64(%ecx)
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 68(%ecx)
-	movl	(%esp), %edx            # 4-byte Reload
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 72(%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 76(%ecx)
-	movl	8(%esp), %edx           # 4-byte Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 80(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 84(%ecx)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 88(%ecx)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 92(%ecx)
-	movl	%eax, 96(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%ecx)
-	addl	$84, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end205:
-	.size	mcl_fpDbl_sub13L, .Lfunc_end205-mcl_fpDbl_sub13L
-
-	.align	16, 0x90
-	.type	.LmulPv448x32,@function
-.LmulPv448x32:                          # @mulPv448x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$96, %esp
-	movl	%edx, %edi
-	movl	116(%esp), %esi
-	movl	%esi, %eax
-	mull	52(%edi)
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	48(%edi)
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	44(%edi)
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	40(%edi)
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	36(%edi)
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	32(%edi)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	28(%edi)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	24(%edi)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	20(%edi)
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	16(%edi)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	12(%edi)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	8(%edi)
-	movl	%edx, %ebx
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	4(%edi)
-	movl	%edx, %ebp
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%esi, %eax
-	mull	(%edi)
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 8(%ecx)
-	adcl	8(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 12(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%ecx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%ecx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%ecx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 56(%ecx)
-	movl	%ecx, %eax
-	addl	$96, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end206:
-	.size	.LmulPv448x32, .Lfunc_end206-.LmulPv448x32
-
-	.globl	mcl_fp_mulUnitPre14L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre14L,@function
-mcl_fp_mulUnitPre14L:                   # @mcl_fp_mulUnitPre14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$108, %esp
-	calll	.L207$pb
-.L207$pb:
-	popl	%ebx
-.Ltmp38:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp38-.L207$pb), %ebx
-	movl	136(%esp), %eax
-	movl	%eax, (%esp)
-	leal	48(%esp), %ecx
-	movl	132(%esp), %edx
-	calll	.LmulPv448x32
-	movl	104(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp
-	movl	64(%esp), %ebx
-	movl	60(%esp), %edi
-	movl	56(%esp), %esi
-	movl	48(%esp), %edx
-	movl	52(%esp), %ecx
-	movl	128(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	addl	$108, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end207:
-	.size	mcl_fp_mulUnitPre14L, .Lfunc_end207-mcl_fp_mulUnitPre14L
-
-	.globl	mcl_fpDbl_mulPre14L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre14L,@function
-mcl_fpDbl_mulPre14L:                    # @mcl_fpDbl_mulPre14L
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$268, %esp              # imm = 0x10C
-	calll	.L208$pb
-.L208$pb:
-	popl	%ebx
-.Ltmp39:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp39-.L208$pb), %ebx
-	movl	%ebx, -192(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	%esi, 8(%esp)
-	movl	12(%ebp), %edi
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre7L@PLT
-	leal	28(%esi), %eax
-	movl	%eax, 8(%esp)
-	leal	28(%edi), %eax
-	movl	%eax, 4(%esp)
-	movl	8(%ebp), %eax
-	leal	56(%eax), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre7L@PLT
-	movl	44(%edi), %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	40(%edi), %eax
-	movl	36(%edi), %edx
-	movl	(%edi), %edi
-	movl	12(%ebp), %ecx
-	movl	4(%ecx), %ecx
-	movl	12(%ebp), %ebx
-	addl	28(%ebx), %edi
-	movl	%edi, -180(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %edi
-	adcl	32(%edi), %ecx
-	movl	%ecx, -200(%ebp)        # 4-byte Spill
-	adcl	8(%edi), %edx
-	movl	%edx, -212(%ebp)        # 4-byte Spill
-	adcl	12(%edi), %eax
-	movl	%eax, -196(%ebp)        # 4-byte Spill
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	16(%edi), %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	%eax, %ebx
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -128(%ebp)        # 4-byte Spill
-	movl	(%esi), %eax
-	addl	28(%esi), %eax
-	movl	%eax, -216(%ebp)        # 4-byte Spill
-	movl	4(%esi), %eax
-	adcl	32(%esi), %eax
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	movl	36(%esi), %eax
-	adcl	8(%esi), %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	movl	40(%esi), %eax
-	adcl	12(%esi), %eax
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	movl	44(%esi), %eax
-	adcl	16(%esi), %eax
-	movl	%eax, -176(%ebp)        # 4-byte Spill
-	movl	48(%esi), %ecx
-	adcl	20(%esi), %ecx
-	movl	52(%esi), %eax
-	adcl	24(%esi), %eax
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %esi
-	popl	%eax
-	movl	%esi, -220(%ebp)        # 4-byte Spill
-	movl	%ebx, %esi
-	movl	%edx, -184(%ebp)        # 4-byte Spill
-	movl	-180(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -188(%ebp)        # 4-byte Spill
-	jb	.LBB208_2
-# BB#1:
-	xorl	%esi, %esi
-	movl	$0, -184(%ebp)          # 4-byte Folded Spill
-	movl	$0, -188(%ebp)          # 4-byte Folded Spill
-.LBB208_2:
-	movl	%esi, -204(%ebp)        # 4-byte Spill
-	movl	52(%edi), %esi
-	movl	48(%edi), %ebx
-	movl	-128(%ebp), %edx        # 4-byte Reload
-	pushl	%eax
-	movl	%edx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	20(%edi), %ebx
-	movl	%ebx, -160(%ebp)        # 4-byte Spill
-	adcl	24(%edi), %esi
-	movl	%esi, -208(%ebp)        # 4-byte Spill
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	movl	%ecx, -152(%ebp)        # 4-byte Spill
-	movl	-176(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -128(%ebp)        # 4-byte Spill
-	movl	-172(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -132(%ebp)        # 4-byte Spill
-	movl	-168(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -136(%ebp)        # 4-byte Spill
-	movl	-164(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -140(%ebp)        # 4-byte Spill
-	movl	-216(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -144(%ebp)        # 4-byte Spill
-	jb	.LBB208_4
-# BB#3:
-	movl	$0, -148(%ebp)          # 4-byte Folded Spill
-	movl	$0, -152(%ebp)          # 4-byte Folded Spill
-	movl	$0, -128(%ebp)          # 4-byte Folded Spill
-	movl	$0, -132(%ebp)          # 4-byte Folded Spill
-	movl	$0, -136(%ebp)          # 4-byte Folded Spill
-	movl	$0, -140(%ebp)          # 4-byte Folded Spill
-	movl	$0, -144(%ebp)          # 4-byte Folded Spill
-.LBB208_4:
-	movl	-180(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -96(%ebp)
-	movl	-200(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -92(%ebp)
-	movl	-212(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -88(%ebp)
-	movl	-196(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -84(%ebp)
-	movl	-156(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -80(%ebp)
-	movl	%ebx, -124(%ebp)
-	movl	-164(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -120(%ebp)
-	movl	-168(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -116(%ebp)
-	movl	-172(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -112(%ebp)
-	movl	-176(%ebp), %edx        # 4-byte Reload
-	movl	%edx, -108(%ebp)
-	movl	%ecx, -104(%ebp)
-	movl	%edi, %ebx
-	movl	%esi, %edi
-	movl	%eax, -100(%ebp)
-	sbbl	%edx, %edx
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -76(%ebp)
-	movl	-208(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -72(%ebp)
-	movl	-220(%ebp), %ecx        # 4-byte Reload
-	pushl	%eax
-	movl	%ecx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB208_6
-# BB#5:
-	movl	$0, %esi
-	movl	$0, %eax
-	movl	$0, %ebx
-	movl	$0, %edi
-.LBB208_6:
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	sbbl	%eax, %eax
-	leal	-124(%ebp), %ecx
-	movl	%ecx, 8(%esp)
-	leal	-96(%ebp), %ecx
-	movl	%ecx, 4(%esp)
-	leal	-68(%ebp), %ecx
-	movl	%ecx, (%esp)
-	andl	%eax, %edx
-	movl	-188(%ebp), %eax        # 4-byte Reload
-	addl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	adcl	%edi, -140(%ebp)        # 4-byte Folded Spill
-	movl	-184(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -136(%ebp)        # 4-byte Folded Spill
-	adcl	%ebx, -132(%ebp)        # 4-byte Folded Spill
-	movl	-204(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -128(%ebp)        # 4-byte Folded Spill
-	movl	-152(%ebp), %edi        # 4-byte Reload
-	adcl	-160(%ebp), %edi        # 4-byte Folded Reload
-	adcl	%esi, -148(%ebp)        # 4-byte Folded Spill
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	andl	$1, %edx
-	movl	%edx, -156(%ebp)        # 4-byte Spill
-	movl	-192(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre7L@PLT
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	addl	-40(%ebp), %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	movl	-140(%ebp), %eax        # 4-byte Reload
-	adcl	-36(%ebp), %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-	movl	-136(%ebp), %eax        # 4-byte Reload
-	adcl	-32(%ebp), %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	movl	-132(%ebp), %eax        # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -128(%ebp)        # 4-byte Spill
-	adcl	-20(%ebp), %edi
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	adcl	%esi, -156(%ebp)        # 4-byte Folded Spill
-	movl	-68(%ebp), %eax
-	movl	8(%ebp), %esi
-	subl	(%esi), %eax
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	movl	-64(%ebp), %ecx
-	sbbl	4(%esi), %ecx
-	movl	-60(%ebp), %eax
-	sbbl	8(%esi), %eax
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	movl	-56(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	-52(%ebp), %ebx
-	sbbl	16(%esi), %ebx
-	movl	-48(%ebp), %eax
-	sbbl	20(%esi), %eax
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	movl	-44(%ebp), %eax
-	sbbl	24(%esi), %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	movl	28(%esi), %eax
-	movl	%eax, -176(%ebp)        # 4-byte Spill
-	sbbl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	movl	32(%esi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	sbbl	%eax, -140(%ebp)        # 4-byte Folded Spill
-	movl	36(%esi), %eax
-	movl	%eax, -184(%ebp)        # 4-byte Spill
-	sbbl	%eax, -136(%ebp)        # 4-byte Folded Spill
-	movl	40(%esi), %eax
-	movl	%eax, -188(%ebp)        # 4-byte Spill
-	sbbl	%eax, -132(%ebp)        # 4-byte Folded Spill
-	movl	44(%esi), %eax
-	movl	%eax, -192(%ebp)        # 4-byte Spill
-	sbbl	%eax, -128(%ebp)        # 4-byte Folded Spill
-	movl	48(%esi), %eax
-	movl	%eax, -196(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	%edi, -152(%ebp)        # 4-byte Spill
-	movl	52(%esi), %eax
-	movl	%eax, -200(%ebp)        # 4-byte Spill
-	movl	-148(%ebp), %edi        # 4-byte Reload
-	sbbl	%eax, %edi
-	sbbl	$0, -156(%ebp)          # 4-byte Folded Spill
-	movl	56(%esi), %eax
-	movl	%eax, -228(%ebp)        # 4-byte Spill
-	subl	%eax, -172(%ebp)        # 4-byte Folded Spill
-	movl	60(%esi), %eax
-	movl	%eax, -232(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ecx
-	movl	64(%esi), %eax
-	movl	%eax, -236(%ebp)        # 4-byte Spill
-	sbbl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	movl	68(%esi), %eax
-	movl	%eax, -240(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edx
-	movl	72(%esi), %eax
-	movl	%eax, -244(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ebx
-	movl	76(%esi), %eax
-	movl	%eax, -248(%ebp)        # 4-byte Spill
-	sbbl	%eax, -164(%ebp)        # 4-byte Folded Spill
-	movl	80(%esi), %eax
-	movl	%eax, -252(%ebp)        # 4-byte Spill
-	sbbl	%eax, -168(%ebp)        # 4-byte Folded Spill
-	movl	84(%esi), %eax
-	movl	%eax, -256(%ebp)        # 4-byte Spill
-	sbbl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	movl	88(%esi), %eax
-	movl	%eax, -208(%ebp)        # 4-byte Spill
-	sbbl	%eax, -140(%ebp)        # 4-byte Folded Spill
-	movl	92(%esi), %eax
-	movl	%eax, -212(%ebp)        # 4-byte Spill
-	sbbl	%eax, -136(%ebp)        # 4-byte Folded Spill
-	movl	96(%esi), %eax
-	movl	%eax, -216(%ebp)        # 4-byte Spill
-	sbbl	%eax, -132(%ebp)        # 4-byte Folded Spill
-	movl	100(%esi), %eax
-	movl	%eax, -220(%ebp)        # 4-byte Spill
-	sbbl	%eax, -128(%ebp)        # 4-byte Folded Spill
-	movl	104(%esi), %eax
-	movl	%eax, -224(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	movl	108(%esi), %eax
-	movl	%eax, -204(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	%edi, -148(%ebp)        # 4-byte Spill
-	movl	-156(%ebp), %edi        # 4-byte Reload
-	sbbl	$0, %edi
-	movl	-172(%ebp), %eax        # 4-byte Reload
-	addl	-176(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-180(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 28(%esi)
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	adcl	-184(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 32(%esi)
-	adcl	-188(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 36(%esi)
-	adcl	-192(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%edx, 40(%esi)
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	adcl	-196(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 44(%esi)
-	movl	-168(%ebp), %ecx        # 4-byte Reload
-	adcl	-200(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 48(%esi)
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	-228(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	movl	-140(%ebp), %ecx        # 4-byte Reload
-	adcl	-232(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 56(%esi)
-	movl	-136(%ebp), %eax        # 4-byte Reload
-	adcl	-236(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 60(%esi)
-	movl	-132(%ebp), %ecx        # 4-byte Reload
-	adcl	-240(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 64(%esi)
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	adcl	-244(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 68(%esi)
-	movl	-152(%ebp), %ecx        # 4-byte Reload
-	adcl	-248(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 72(%esi)
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-252(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 76(%esi)
-	adcl	-256(%ebp), %edi        # 4-byte Folded Reload
-	movl	%eax, 80(%esi)
-	movl	%edi, 84(%esi)
-	movl	-208(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 88(%esi)
-	movl	-212(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 92(%esi)
-	movl	-216(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 96(%esi)
-	movl	-220(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 100(%esi)
-	movl	-224(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 104(%esi)
-	movl	-204(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 108(%esi)
-	addl	$268, %esp              # imm = 0x10C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end208:
-	.size	mcl_fpDbl_mulPre14L, .Lfunc_end208-mcl_fpDbl_mulPre14L
-
-	.globl	mcl_fpDbl_sqrPre14L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre14L,@function
-mcl_fpDbl_sqrPre14L:                    # @mcl_fpDbl_sqrPre14L
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$268, %esp              # imm = 0x10C
-	calll	.L209$pb
-.L209$pb:
-	popl	%ebx
-.Ltmp40:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp40-.L209$pb), %ebx
-	movl	%ebx, -172(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %esi
-	movl	%esi, (%esp)
-	calll	mcl_fpDbl_mulPre7L@PLT
-	leal	28(%edi), %eax
-	movl	%eax, 8(%esp)
-	movl	%eax, 4(%esp)
-	leal	56(%esi), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre7L@PLT
-	movl	48(%edi), %eax
-	movl	44(%edi), %ecx
-	movl	36(%edi), %edx
-	movl	(%edi), %esi
-	movl	4(%edi), %ebx
-	addl	28(%edi), %esi
-	adcl	32(%edi), %ebx
-	movl	%ebx, -164(%ebp)        # 4-byte Spill
-	adcl	8(%edi), %edx
-	movl	%edx, -160(%ebp)        # 4-byte Spill
-	movl	40(%edi), %edx
-	adcl	12(%edi), %edx
-	adcl	16(%edi), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	adcl	20(%edi), %eax
-	movl	%eax, -176(%ebp)        # 4-byte Spill
-	movl	52(%edi), %ecx
-	adcl	24(%edi), %ecx
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -184(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	sbbl	%ebx, %ebx
-	movl	%ebx, -128(%ebp)        # 4-byte Spill
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_1
-# BB#2:
-	movl	%esi, -168(%ebp)        # 4-byte Spill
-	movl	$0, -132(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_3
-.LBB209_1:
-	leal	(%esi,%esi), %eax
-	movl	%esi, -168(%ebp)        # 4-byte Spill
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-.LBB209_3:
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	movl	-180(%ebp), %ebx        # 4-byte Reload
-	jb	.LBB209_4
-# BB#5:
-	movl	$0, -156(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_6
-.LBB209_4:
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	movl	-168(%ebp), %esi        # 4-byte Reload
-	shldl	$1, %esi, %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-.LBB209_6:
-	movl	-176(%ebp), %edi        # 4-byte Reload
-	movl	-136(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_7
-# BB#8:
-	movl	$0, -136(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_9
-.LBB209_7:
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	movl	-164(%ebp), %esi        # 4-byte Reload
-	shldl	$1, %esi, %eax
-	movl	%eax, -136(%ebp)        # 4-byte Spill
-.LBB209_9:
-	movl	%ebx, %esi
-	movl	-140(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_10
-# BB#11:
-	movl	$0, -140(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_12
-.LBB209_10:
-	movl	%edx, %eax
-	movl	-160(%ebp), %ebx        # 4-byte Reload
-	shldl	$1, %ebx, %eax
-	movl	%eax, -140(%ebp)        # 4-byte Spill
-.LBB209_12:
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_13
-# BB#14:
-	movl	$0, -144(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_15
-.LBB209_13:
-	movl	%esi, %eax
-	shldl	$1, %edx, %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-.LBB209_15:
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_16
-# BB#17:
-	movl	$0, -148(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_18
-.LBB209_16:
-	movl	%edi, %eax
-	shldl	$1, %esi, %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-.LBB209_18:
-	movl	-152(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_19
-# BB#20:
-	movl	$0, -152(%ebp)          # 4-byte Folded Spill
-	jmp	.LBB209_21
-.LBB209_19:
-	movl	%ecx, %eax
-	shldl	$1, %edi, %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-.LBB209_21:
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -96(%ebp)
-	movl	%eax, -124(%ebp)
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -92(%ebp)
-	movl	%eax, -120(%ebp)
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -88(%ebp)
-	movl	%eax, -116(%ebp)
-	movl	%edx, -84(%ebp)
-	movl	%edx, -112(%ebp)
-	movl	%esi, -80(%ebp)
-	movl	%esi, -108(%ebp)
-	movl	%edi, -76(%ebp)
-	movl	%edi, -104(%ebp)
-	movl	%ecx, -72(%ebp)
-	movl	%ecx, -100(%ebp)
-	movl	-184(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB209_22
-# BB#23:
-	xorl	%edi, %edi
-	jmp	.LBB209_24
-.LBB209_22:
-	shrl	$31, %ecx
-	movl	%ecx, %edi
-.LBB209_24:
-	leal	-68(%ebp), %eax
-	movl	%eax, (%esp)
-	leal	-96(%ebp), %eax
-	movl	%eax, 4(%esp)
-	leal	-124(%ebp), %eax
-	movl	%eax, 8(%esp)
-	movl	-128(%ebp), %esi        # 4-byte Reload
-	andl	$1, %esi
-	movl	-172(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre7L@PLT
-	movl	-132(%ebp), %eax        # 4-byte Reload
-	addl	-40(%ebp), %eax
-	movl	%eax, -132(%ebp)        # 4-byte Spill
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	-36(%ebp), %eax
-	movl	-136(%ebp), %ecx        # 4-byte Reload
-	adcl	-32(%ebp), %ecx
-	movl	%ecx, -136(%ebp)        # 4-byte Spill
-	movl	-140(%ebp), %ecx        # 4-byte Reload
-	adcl	-28(%ebp), %ecx
-	movl	%ecx, -140(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %ecx        # 4-byte Reload
-	adcl	-24(%ebp), %ecx
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	movl	-148(%ebp), %ecx        # 4-byte Reload
-	adcl	-20(%ebp), %ecx
-	movl	%ecx, -148(%ebp)        # 4-byte Spill
-	movl	-152(%ebp), %ecx        # 4-byte Reload
-	adcl	-16(%ebp), %ecx
-	movl	%ecx, -152(%ebp)        # 4-byte Spill
-	adcl	%edi, %esi
-	movl	%esi, -128(%ebp)        # 4-byte Spill
-	movl	-68(%ebp), %ecx
-	movl	8(%ebp), %esi
-	subl	(%esi), %ecx
-	movl	%ecx, -204(%ebp)        # 4-byte Spill
-	movl	-64(%ebp), %edi
-	sbbl	4(%esi), %edi
-	movl	-60(%ebp), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, -160(%ebp)        # 4-byte Spill
-	movl	-56(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	%edx, -168(%ebp)        # 4-byte Spill
-	movl	-52(%ebp), %ebx
-	sbbl	16(%esi), %ebx
-	movl	-48(%ebp), %ecx
-	sbbl	20(%esi), %ecx
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	movl	-44(%ebp), %edx
-	sbbl	24(%esi), %edx
-	movl	%edx, -164(%ebp)        # 4-byte Spill
-	movl	28(%esi), %edx
-	movl	%edx, -176(%ebp)        # 4-byte Spill
-	sbbl	%edx, -132(%ebp)        # 4-byte Folded Spill
-	movl	32(%esi), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	36(%esi), %eax
-	movl	%eax, -184(%ebp)        # 4-byte Spill
-	sbbl	%eax, -136(%ebp)        # 4-byte Folded Spill
-	movl	40(%esi), %eax
-	movl	%eax, -188(%ebp)        # 4-byte Spill
-	sbbl	%eax, -140(%ebp)        # 4-byte Folded Spill
-	movl	44(%esi), %eax
-	movl	%eax, -192(%ebp)        # 4-byte Spill
-	sbbl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	movl	48(%esi), %eax
-	movl	%eax, -196(%ebp)        # 4-byte Spill
-	sbbl	%eax, -148(%ebp)        # 4-byte Folded Spill
-	movl	52(%esi), %eax
-	movl	%eax, -200(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	movl	-128(%ebp), %ecx        # 4-byte Reload
-	sbbl	$0, %ecx
-	movl	56(%esi), %eax
-	movl	%eax, -228(%ebp)        # 4-byte Spill
-	movl	-204(%ebp), %edx        # 4-byte Reload
-	subl	%eax, %edx
-	movl	60(%esi), %eax
-	movl	%eax, -232(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	64(%esi), %eax
-	movl	%eax, -236(%ebp)        # 4-byte Spill
-	sbbl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	movl	68(%esi), %eax
-	movl	%eax, -240(%ebp)        # 4-byte Spill
-	sbbl	%eax, -168(%ebp)        # 4-byte Folded Spill
-	movl	72(%esi), %eax
-	movl	%eax, -244(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ebx
-	movl	76(%esi), %eax
-	movl	%eax, -248(%ebp)        # 4-byte Spill
-	sbbl	%eax, -172(%ebp)        # 4-byte Folded Spill
-	movl	80(%esi), %eax
-	movl	%eax, -252(%ebp)        # 4-byte Spill
-	sbbl	%eax, -164(%ebp)        # 4-byte Folded Spill
-	movl	84(%esi), %eax
-	movl	%eax, -256(%ebp)        # 4-byte Spill
-	sbbl	%eax, -132(%ebp)        # 4-byte Folded Spill
-	movl	88(%esi), %eax
-	movl	%eax, -204(%ebp)        # 4-byte Spill
-	sbbl	%eax, -156(%ebp)        # 4-byte Folded Spill
-	movl	92(%esi), %eax
-	movl	%eax, -208(%ebp)        # 4-byte Spill
-	sbbl	%eax, -136(%ebp)        # 4-byte Folded Spill
-	movl	96(%esi), %eax
-	movl	%eax, -212(%ebp)        # 4-byte Spill
-	sbbl	%eax, -140(%ebp)        # 4-byte Folded Spill
-	movl	100(%esi), %eax
-	movl	%eax, -216(%ebp)        # 4-byte Spill
-	sbbl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	movl	104(%esi), %eax
-	movl	%eax, -220(%ebp)        # 4-byte Spill
-	sbbl	%eax, -148(%ebp)        # 4-byte Folded Spill
-	movl	108(%esi), %eax
-	movl	%eax, -224(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %ecx
-	movl	%ecx, -128(%ebp)        # 4-byte Spill
-	movl	%edx, %eax
-	addl	-176(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-180(%ebp), %edi        # 4-byte Folded Reload
-	movl	%eax, 28(%esi)
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	adcl	-184(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edi, 32(%esi)
-	movl	-168(%ebp), %ecx        # 4-byte Reload
-	adcl	-188(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 36(%esi)
-	adcl	-192(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%ecx, 40(%esi)
-	movl	-172(%ebp), %eax        # 4-byte Reload
-	adcl	-196(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 44(%esi)
-	movl	-164(%ebp), %ecx        # 4-byte Reload
-	adcl	-200(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 48(%esi)
-	movl	-132(%ebp), %eax        # 4-byte Reload
-	adcl	-228(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	movl	-156(%ebp), %edx        # 4-byte Reload
-	adcl	-232(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 56(%esi)
-	movl	-136(%ebp), %ecx        # 4-byte Reload
-	adcl	-236(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%edx, 60(%esi)
-	movl	-140(%ebp), %eax        # 4-byte Reload
-	adcl	-240(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 64(%esi)
-	movl	-144(%ebp), %ecx        # 4-byte Reload
-	adcl	-244(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 68(%esi)
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-248(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 72(%esi)
-	movl	-152(%ebp), %ecx        # 4-byte Reload
-	adcl	-252(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 76(%esi)
-	movl	-128(%ebp), %eax        # 4-byte Reload
-	adcl	-256(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 80(%esi)
-	movl	%eax, 84(%esi)
-	movl	-204(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 88(%esi)
-	movl	-208(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 92(%esi)
-	movl	-212(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 96(%esi)
-	movl	-216(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 100(%esi)
-	movl	-220(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 104(%esi)
-	movl	-224(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 108(%esi)
-	addl	$268, %esp              # imm = 0x10C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end209:
-	.size	mcl_fpDbl_sqrPre14L, .Lfunc_end209-mcl_fpDbl_sqrPre14L
-
-	.globl	mcl_fp_mont14L
-	.align	16, 0x90
-	.type	mcl_fp_mont14L,@function
-mcl_fp_mont14L:                         # @mcl_fp_mont14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1900, %esp             # imm = 0x76C
-	calll	.L210$pb
-.L210$pb:
-	popl	%ebx
-.Ltmp41:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp41-.L210$pb), %ebx
-	movl	1932(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1840(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1840(%esp), %edi
-	movl	1844(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	1896(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	1892(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	1888(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	1884(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	1880(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	1876(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1872(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1868(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1864(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1860(%esp), %esi
-	movl	1856(%esp), %ebp
-	movl	1852(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	1848(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	1776(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1776(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1780(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1784(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1792(%esp), %ebp
-	adcl	1796(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1804(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1808(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1816(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1820(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1824(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1828(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1832(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	1928(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1712(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %edi
-	movl	%edi, %edx
-	movl	100(%esp), %ecx         # 4-byte Reload
-	addl	1712(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1716(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1720(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1724(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	adcl	1728(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	1732(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1736(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1740(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	1744(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1748(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1752(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1756(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1760(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1764(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1768(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1648(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	movl	100(%esp), %eax         # 4-byte Reload
-	andl	$1, %eax
-	addl	1648(%esp), %ebp
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1652(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1656(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1660(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1664(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	adcl	1668(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1672(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1676(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	1680(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1684(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1688(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1692(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1696(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1700(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	1704(%esp), %esi
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	1928(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1584(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	1584(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1588(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1592(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1596(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1600(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1604(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	1608(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1612(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1616(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1620(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1624(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1628(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1632(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1636(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	adcl	1640(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1520(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1520(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1528(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1532(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1540(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	1544(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1548(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1552(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	1564(%esp), %ebp
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	1568(%esp), %esi
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	1572(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1576(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1456(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	1456(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1460(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1464(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1472(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1476(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1480(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1484(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1488(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1492(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	1496(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	adcl	1500(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	1504(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1508(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1512(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1392(%esp), %ecx
-	movl	1932(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv448x32
-	andl	$1, %edi
-	movl	%edi, %eax
-	addl	1392(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1396(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1400(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1404(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1408(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1412(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1416(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1420(%esp), %esi
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	1424(%esp), %ebp
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1428(%esp), %edi
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1432(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1436(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1440(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1444(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1448(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1328(%esp), %ecx
-	movl	1924(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv448x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	1328(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1352(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	1356(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	adcl	1360(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1376(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1380(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	1384(%esp), %edi
-	sbbl	%esi, %esi
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1264(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1264(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1272(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	1284(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1312(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	1316(%esp), %esi
-	adcl	1320(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1200(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	68(%esp), %eax          # 4-byte Reload
-	addl	1200(%esp), %eax
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1204(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1208(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	1212(%esp), %edi
-	adcl	1216(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1220(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1224(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1228(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1232(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1236(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1240(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1244(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	1248(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1252(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1256(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%eax, %ebp
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1136(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1136(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1144(%esp), %ebp
-	adcl	1148(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	1172(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	1180(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1188(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1072(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	1072(%esp), %eax
-	adcl	1076(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1080(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1084(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1088(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1092(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1096(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1100(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	1104(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1108(%esp), %ebp
-	adcl	1112(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1116(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1120(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1124(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1128(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%eax, %edi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1008(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1008(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	1020(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	1036(%esp), %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1044(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	1052(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	944(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	52(%esp), %eax          # 4-byte Reload
-	addl	944(%esp), %eax
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	948(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	952(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	956(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	960(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	964(%esp), %esi
-	adcl	968(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	972(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	976(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	980(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	984(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	988(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	992(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	996(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1000(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%eax, %edi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	880(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %ebp
-	addl	880(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	896(%esp), %edi
-	adcl	900(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	924(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	936(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	816(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	816(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	824(%esp), %ebp
-	adcl	828(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	856(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	adcl	872(%esp), %esi
-	sbbl	%eax, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	movl	56(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	752(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	756(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	760(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	764(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	768(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	772(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	776(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	780(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	784(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	788(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	792(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	796(%esp), %ebp
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	800(%esp), %edi
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	804(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	808(%esp), %esi
-	adcl	$0, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	688(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	688(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	728(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	adcl	732(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	740(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	624(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	movl	%edi, %ecx
-	andl	$1, %ecx
-	addl	624(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	636(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	648(%esp), %esi
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	652(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	560(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	568(%esp), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	580(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	adcl	584(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	592(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	496(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	508(%esp), %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	520(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	528(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	540(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	432(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	432(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	440(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	444(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	452(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	472(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	368(%esp), %ebp
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	376(%esp), %esi
-	adcl	380(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	392(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	304(%esp), %ecx
-	adcl	308(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	adcl	312(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	324(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	328(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	240(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	240(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	248(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	252(%esp), %edi
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	256(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	264(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	268(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	1928(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	176(%esp), %ecx
-	movl	1924(%esp), %edx
-	calll	.LmulPv448x32
-	movl	104(%esp), %ecx         # 4-byte Reload
-	addl	176(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	184(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	adcl	188(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	192(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	200(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	1932(%esp), %edx
-	calll	.LmulPv448x32
-	andl	$1, %ebp
-	addl	112(%esp), %esi
-	movl	100(%esp), %esi         # 4-byte Reload
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	120(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	124(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	adcl	128(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	132(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %ebx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	136(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	140(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	144(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	148(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	152(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	156(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	160(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	164(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	168(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	1932(%esp), %ecx
-	subl	(%ecx), %eax
-	sbbl	4(%ecx), %edx
-	sbbl	8(%ecx), %esi
-	sbbl	12(%ecx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	sbbl	16(%ecx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	sbbl	20(%ecx), %edi
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	sbbl	24(%ecx), %edi
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%ecx), %ebx
-	movl	52(%esp), %edi          # 4-byte Reload
-	sbbl	32(%ecx), %edi
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	sbbl	36(%ecx), %edi
-	movl	%edi, 36(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	sbbl	40(%ecx), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	sbbl	44(%ecx), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	sbbl	48(%ecx), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	sbbl	52(%ecx), %edi
-	movl	%ebp, %ecx
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB210_2
-# BB#1:
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-.LBB210_2:
-	testb	%cl, %cl
-	movl	108(%esp), %ebx         # 4-byte Reload
-	jne	.LBB210_4
-# BB#3:
-	movl	%eax, %ebx
-.LBB210_4:
-	movl	1920(%esp), %eax
-	movl	%ebx, (%eax)
-	movl	92(%esp), %edi          # 4-byte Reload
-	movl	72(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_6
-# BB#5:
-	movl	%edx, %edi
-.LBB210_6:
-	movl	%edi, 4(%eax)
-	jne	.LBB210_8
-# BB#7:
-	movl	%esi, 100(%esp)         # 4-byte Spill
-.LBB210_8:
-	movl	100(%esp), %edx         # 4-byte Reload
-	movl	%edx, 8(%eax)
-	jne	.LBB210_10
-# BB#9:
-	movl	16(%esp), %edx          # 4-byte Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-.LBB210_10:
-	movl	84(%esp), %edx          # 4-byte Reload
-	movl	%edx, 12(%eax)
-	jne	.LBB210_12
-# BB#11:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB210_12:
-	movl	%ecx, 16(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_14
-# BB#13:
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB210_14:
-	movl	%ecx, 20(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_16
-# BB#15:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB210_16:
-	movl	%ecx, 24(%eax)
-	movl	60(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_18
-# BB#17:
-	movl	32(%esp), %ecx          # 4-byte Reload
-.LBB210_18:
-	movl	%ecx, 32(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_20
-# BB#19:
-	movl	36(%esp), %ecx          # 4-byte Reload
-.LBB210_20:
-	movl	%ecx, 36(%eax)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_22
-# BB#21:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB210_22:
-	movl	%ecx, 40(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_24
-# BB#23:
-	movl	44(%esp), %ecx          # 4-byte Reload
-.LBB210_24:
-	movl	%ecx, 44(%eax)
-	movl	88(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_26
-# BB#25:
-	movl	48(%esp), %ecx          # 4-byte Reload
-.LBB210_26:
-	movl	%ecx, 48(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	jne	.LBB210_28
-# BB#27:
-	movl	104(%esp), %ecx         # 4-byte Reload
-.LBB210_28:
-	movl	%ecx, 52(%eax)
-	addl	$1900, %esp             # imm = 0x76C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end210:
-	.size	mcl_fp_mont14L, .Lfunc_end210-mcl_fp_mont14L
-
-	.globl	mcl_fp_montNF14L
-	.align	16, 0x90
-	.type	mcl_fp_montNF14L,@function
-mcl_fp_montNF14L:                       # @mcl_fp_montNF14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1884, %esp             # imm = 0x75C
-	calll	.L211$pb
-.L211$pb:
-	popl	%ebx
-.Ltmp42:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp42-.L211$pb), %ebx
-	movl	1916(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1824(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1824(%esp), %edi
-	movl	1828(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	1880(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	1876(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	1872(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	1868(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	1864(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1860(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1856(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1852(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1848(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	1844(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1840(%esp), %esi
-	movl	1836(%esp), %ebp
-	movl	1832(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	1760(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1760(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1764(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1768(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1772(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	adcl	1776(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1780(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1784(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1792(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1804(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	1808(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1816(%esp), %ebp
-	movl	1912(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1696(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1752(%esp), %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	addl	1696(%esp), %edx
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1700(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1704(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	1708(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1712(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1716(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1720(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1724(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1728(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1732(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1736(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	1740(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	1744(%esp), %edi
-	adcl	1748(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %ebp
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1632(%esp), %ecx
-	movl	1916(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv448x32
-	addl	1632(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1636(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1640(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1644(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1648(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1652(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1656(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1660(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	1664(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1668(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1672(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1676(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1680(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1684(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1688(%esp), %ebp
-	movl	1912(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1568(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1624(%esp), %eax
-	movl	88(%esp), %edx          # 4-byte Reload
-	addl	1568(%esp), %edx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1572(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1576(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1580(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1584(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1588(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1592(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	1596(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1600(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1604(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1608(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	1612(%esp), %edi
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1616(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1620(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %ebp
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1504(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1504(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1508(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1512(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1516(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1520(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1528(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1532(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1540(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1544(%esp), %esi
-	adcl	1548(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1552(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1560(%esp), %ebp
-	movl	1912(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1440(%esp), %ecx
-	movl	1908(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv448x32
-	movl	1496(%esp), %eax
-	movl	72(%esp), %edx          # 4-byte Reload
-	addl	1440(%esp), %edx
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1444(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1448(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1452(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1456(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1460(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	1464(%esp), %edi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1468(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1472(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	1476(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1480(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1484(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1488(%esp), %esi
-	adcl	1492(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %ebp
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1376(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1376(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1380(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1384(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	1400(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1404(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1408(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1412(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1416(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1424(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1428(%esp), %eax
-	movl	%eax, %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1432(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1312(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1368(%esp), %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	addl	1312(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1316(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1320(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1324(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	1328(%esp), %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1332(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1336(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	1340(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1344(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1348(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1352(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1356(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	1360(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1364(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1248(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1248(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1252(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1256(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1260(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	1264(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1272(%esp), %ebp
-	adcl	1276(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	1284(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	1300(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1184(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1240(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	1184(%esp), %ecx
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1188(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1200(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1204(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1208(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1212(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1216(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1232(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1120(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	1120(%esp), %esi
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	1124(%esp), %ebp
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	1128(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1148(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	1156(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1056(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	1112(%esp), %eax
-	movl	%ebp, %ecx
-	addl	1056(%esp), %ecx
-	adcl	1060(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	1064(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	1068(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1072(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1076(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	1080(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	1084(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	1088(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1092(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	1096(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	1100(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	1104(%esp), %ebp
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	1108(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %esi
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	992(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	1008(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1040(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	1044(%esp), %ebp
-	adcl	1048(%esp), %esi
-	movl	1912(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	984(%esp), %eax
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	928(%esp), %ecx
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	932(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	936(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	940(%esp), %edi
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	944(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	948(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	952(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	956(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	960(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	964(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	968(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	972(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	976(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	980(%esp), %esi
-	movl	%esi, %ebp
-	adcl	$0, %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	864(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	864(%esp), %esi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	876(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	884(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	916(%esp), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	920(%esp), %ebp
-	movl	1912(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	800(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	856(%esp), %edx
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	800(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	808(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	816(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	828(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	852(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	736(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	736(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	764(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	768(%esp), %ebp
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	772(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	780(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	672(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	728(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	672(%esp), %ecx
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	700(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	adcl	704(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	712(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	608(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	608(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	616(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	624(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	644(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	600(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	544(%esp), %ecx
-	adcl	548(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	556(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	568(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	576(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	480(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	480(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	488(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	496(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	504(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	532(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	472(%esp), %edx
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	416(%esp), %ecx
-	adcl	420(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	424(%esp), %edi
-	adcl	428(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	464(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	352(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	360(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	364(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	388(%esp), %edi
-	movl	40(%esp), %ebp          # 4-byte Reload
-	adcl	392(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	288(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	344(%esp), %edx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	288(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	296(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	320(%esp), %edi
-	adcl	324(%esp), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	328(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	224(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	224(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	232(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	256(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	260(%esp), %edi
-	adcl	264(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1912(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	160(%esp), %ecx
-	movl	1908(%esp), %edx
-	calll	.LmulPv448x32
-	movl	216(%esp), %edx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	160(%esp), %ecx
-	adcl	164(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	168(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	176(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	188(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	adcl	192(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	96(%esp), %ecx
-	movl	1916(%esp), %edx
-	calll	.LmulPv448x32
-	addl	96(%esp), %esi
-	movl	64(%esp), %esi          # 4-byte Reload
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	104(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	%ebp, %ebx
-	adcl	108(%esp), %esi
-	adcl	112(%esp), %edi
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	116(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	120(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	124(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %edx          # 4-byte Reload
-	adcl	128(%esp), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	132(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	136(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	140(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	148(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	152(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	1916(%esp), %ebp
-	subl	(%ebp), %edx
-	sbbl	4(%ebp), %ebx
-	movl	%esi, %eax
-	sbbl	8(%ebp), %eax
-	movl	%edi, %ecx
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	sbbl	20(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	sbbl	36(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	sbbl	40(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%ebp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	48(%ebp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	52(%ebp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	sarl	$31, %ecx
-	testl	%ecx, %ecx
-	movl	92(%esp), %ebp          # 4-byte Reload
-	js	.LBB211_2
-# BB#1:
-	movl	%edx, %ebp
-.LBB211_2:
-	movl	1904(%esp), %edx
-	movl	%ebp, (%edx)
-	movl	88(%esp), %ebp          # 4-byte Reload
-	js	.LBB211_4
-# BB#3:
-	movl	%ebx, %ebp
-.LBB211_4:
-	movl	%ebp, 4(%edx)
-	js	.LBB211_6
-# BB#5:
-	movl	%eax, %esi
-.LBB211_6:
-	movl	%esi, 8(%edx)
-	js	.LBB211_8
-# BB#7:
-	movl	4(%esp), %edi           # 4-byte Reload
-.LBB211_8:
-	movl	%edi, 12(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	js	.LBB211_10
-# BB#9:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB211_10:
-	movl	%eax, 16(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB211_12
-# BB#11:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB211_12:
-	movl	%eax, 20(%edx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	js	.LBB211_14
-# BB#13:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB211_14:
-	movl	%eax, 24(%edx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	js	.LBB211_16
-# BB#15:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB211_16:
-	movl	%eax, 28(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB211_18
-# BB#17:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB211_18:
-	movl	%eax, 32(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB211_20
-# BB#19:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB211_20:
-	movl	%eax, 36(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB211_22
-# BB#21:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB211_22:
-	movl	%eax, 40(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	js	.LBB211_24
-# BB#23:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB211_24:
-	movl	%eax, 44(%edx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	js	.LBB211_26
-# BB#25:
-	movl	64(%esp), %eax          # 4-byte Reload
-.LBB211_26:
-	movl	%eax, 48(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	js	.LBB211_28
-# BB#27:
-	movl	72(%esp), %eax          # 4-byte Reload
-.LBB211_28:
-	movl	%eax, 52(%edx)
-	addl	$1884, %esp             # imm = 0x75C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end211:
-	.size	mcl_fp_montNF14L, .Lfunc_end211-mcl_fp_montNF14L
-
-	.globl	mcl_fp_montRed14L
-	.align	16, 0x90
-	.type	mcl_fp_montRed14L,@function
-mcl_fp_montRed14L:                      # @mcl_fp_montRed14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1068, %esp             # imm = 0x42C
-	calll	.L212$pb
-.L212$pb:
-	popl	%eax
-.Ltmp43:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp43-.L212$pb), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1096(%esp), %edx
-	movl	-4(%edx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1092(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 92(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	imull	%eax, %ebx
-	movl	108(%ecx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	104(%ecx), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	100(%ecx), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	96(%ecx), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	92(%ecx), %esi
-	movl	%esi, 140(%esp)         # 4-byte Spill
-	movl	88(%ecx), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	84(%ecx), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	80(%ecx), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	76(%ecx), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	%esi, 136(%esp)         # 4-byte Spill
-	movl	68(%ecx), %esi
-	movl	%esi, 168(%esp)         # 4-byte Spill
-	movl	64(%ecx), %esi
-	movl	%esi, 164(%esp)         # 4-byte Spill
-	movl	60(%ecx), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	56(%ecx), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	52(%ecx), %edi
-	movl	%edi, 144(%esp)         # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	44(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	40(%ecx), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	36(%ecx), %ebp
-	movl	32(%ecx), %edi
-	movl	28(%ecx), %esi
-	movl	24(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	20(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	16(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	12(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	8(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	52(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	1008(%esp), %ecx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	movl	92(%esp), %eax          # 4-byte Reload
-	addl	1008(%esp), %eax
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1012(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1036(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	1040(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	adcl	1044(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	1052(%esp), %ebp
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 164(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	944(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	movl	%edi, %ecx
-	andl	$1, %ecx
-	addl	944(%esp), %esi
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	948(%esp), %edx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	976(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	984(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %esi         # 4-byte Reload
-	adcl	1000(%esp), %esi
-	adcl	$0, 164(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %ebp
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	880(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	880(%esp), %ebp
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	884(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	908(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %edi         # 4-byte Reload
-	adcl	920(%esp), %edi
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	adcl	932(%esp), %esi
-	movl	%esi, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	936(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	movl	152(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	816(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	816(%esp), %esi
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	820(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	852(%esp), %edi
-	movl	%edi, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 152(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	752(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	752(%esp), %ebp
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	756(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	movl	156(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	688(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	688(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	692(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 156(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	movl	140(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	624(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	624(%esp), %ebp
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	628(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %ebp         # 4-byte Reload
-	adcl	664(%esp), %ebp
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 140(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	560(%esp), %esi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	564(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	adcl	596(%esp), %ebp
-	movl	%ebp, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %edi         # 4-byte Reload
-	adcl	600(%esp), %edi
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	movl	120(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	1096(%esp), %eax
-	movl	%eax, %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	496(%esp), %ebp
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %ebp         # 4-byte Reload
-	adcl	516(%esp), %ebp
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	adcl	532(%esp), %edi
-	movl	%edi, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	536(%esp), %edi
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	$0, 140(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	432(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	432(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	440(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	adcl	448(%esp), %ebp
-	movl	%ebp, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %ecx         # 4-byte Reload
-	adcl	452(%esp), %ecx
-	movl	%ecx, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %ebp         # 4-byte Reload
-	adcl	456(%esp), %ebp
-	movl	164(%esp), %ecx         # 4-byte Reload
-	adcl	460(%esp), %ecx
-	movl	%ecx, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %ecx         # 4-byte Reload
-	adcl	464(%esp), %ecx
-	movl	%ecx, 168(%esp)         # 4-byte Spill
-	adcl	468(%esp), %edi
-	movl	%edi, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	472(%esp), %ecx
-	movl	%ecx, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	480(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	484(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	488(%esp), %ecx
-	movl	%ecx, 140(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%eax, %esi
-	movl	88(%esp), %edi          # 4-byte Reload
-	imull	%edi, %eax
-	movl	%eax, (%esp)
-	leal	368(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	368(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	376(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	380(%esp), %ecx
-	movl	%ecx, 144(%esp)         # 4-byte Spill
-	movl	172(%esp), %esi         # 4-byte Reload
-	adcl	384(%esp), %esi
-	adcl	388(%esp), %ebp
-	movl	%ebp, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %ecx         # 4-byte Reload
-	adcl	392(%esp), %ecx
-	movl	%ecx, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %ecx         # 4-byte Reload
-	adcl	396(%esp), %ecx
-	movl	%ecx, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	400(%esp), %ecx
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	404(%esp), %ecx
-	movl	%ecx, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	408(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	412(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	416(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	420(%esp), %ecx
-	movl	%ecx, 140(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	424(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	adcl	$0, 120(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%eax, %ebp
-	imull	%edi, %eax
-	movl	%eax, (%esp)
-	leal	304(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	304(%esp), %ebp
-	movl	132(%esp), %edi         # 4-byte Reload
-	adcl	308(%esp), %edi
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	%esi, %ebp
-	adcl	316(%esp), %ebp
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %esi         # 4-byte Reload
-	adcl	332(%esp), %esi
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%edi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	240(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	240(%esp), %edi
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	244(%esp), %ecx
-	adcl	248(%esp), %ebp
-	movl	%ebp, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	adcl	264(%esp), %esi
-	movl	%esi, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %edi         # 4-byte Reload
-	adcl	268(%esp), %edi
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebp         # 4-byte Reload
-	adcl	280(%esp), %ebp
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	176(%esp), %ecx
-	movl	1096(%esp), %edx
-	movl	100(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv448x32
-	addl	176(%esp), %esi
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %ebx         # 4-byte Reload
-	adcl	188(%esp), %ebx
-	movl	%ebx, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	136(%esp), %edx         # 4-byte Reload
-	adcl	196(%esp), %edx
-	movl	%edx, 136(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	adcl	200(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	212(%esp), %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	232(%esp), %ecx
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	172(%esp), %edi         # 4-byte Reload
-	subl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	160(%esp), %ebp         # 4-byte Reload
-	sbbl	8(%esp), %ebp           # 4-byte Folded Reload
-	sbbl	12(%esp), %ebx          # 4-byte Folded Reload
-	movl	168(%esp), %eax         # 4-byte Reload
-	sbbl	20(%esp), %eax          # 4-byte Folded Reload
-	sbbl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	148(%esp), %edx         # 4-byte Reload
-	sbbl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	156(%esp), %edx         # 4-byte Reload
-	sbbl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	152(%esp), %edx         # 4-byte Reload
-	sbbl	36(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	124(%esp), %edx         # 4-byte Reload
-	sbbl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	140(%esp), %edx         # 4-byte Reload
-	sbbl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	128(%esp), %edx         # 4-byte Reload
-	sbbl	48(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %edx         # 4-byte Reload
-	sbbl	52(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %edx         # 4-byte Reload
-	sbbl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 132(%esp)         # 4-byte Spill
-	movl	%ecx, %edx
-	sbbl	60(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 144(%esp)         # 4-byte Spill
-	sbbl	$0, %esi
-	andl	$1, %esi
-	jne	.LBB212_2
-# BB#1:
-	movl	%eax, 168(%esp)         # 4-byte Spill
-.LBB212_2:
-	movl	%esi, %edx
-	testb	%dl, %dl
-	movl	172(%esp), %eax         # 4-byte Reload
-	jne	.LBB212_4
-# BB#3:
-	movl	%edi, %eax
-.LBB212_4:
-	movl	1088(%esp), %edi
-	movl	%eax, (%edi)
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	movl	160(%esp), %ecx         # 4-byte Reload
-	jne	.LBB212_6
-# BB#5:
-	movl	%ebp, %ecx
-.LBB212_6:
-	movl	%ecx, 4(%edi)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	164(%esp), %ebp         # 4-byte Reload
-	jne	.LBB212_8
-# BB#7:
-	movl	%ebx, %ebp
-.LBB212_8:
-	movl	%ebp, 8(%edi)
-	movl	168(%esp), %ebx         # 4-byte Reload
-	movl	%ebx, 12(%edi)
-	movl	124(%esp), %ebp         # 4-byte Reload
-	movl	136(%esp), %ebx         # 4-byte Reload
-	jne	.LBB212_10
-# BB#9:
-	movl	80(%esp), %ebx          # 4-byte Reload
-.LBB212_10:
-	movl	%ebx, 16(%edi)
-	movl	140(%esp), %ebx         # 4-byte Reload
-	movl	148(%esp), %esi         # 4-byte Reload
-	jne	.LBB212_12
-# BB#11:
-	movl	84(%esp), %esi          # 4-byte Reload
-.LBB212_12:
-	movl	%esi, 20(%edi)
-	movl	128(%esp), %esi         # 4-byte Reload
-	jne	.LBB212_14
-# BB#13:
-	movl	88(%esp), %eax          # 4-byte Reload
-.LBB212_14:
-	movl	%eax, 24(%edi)
-	movl	120(%esp), %edx         # 4-byte Reload
-	jne	.LBB212_16
-# BB#15:
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	%eax, 152(%esp)         # 4-byte Spill
-.LBB212_16:
-	movl	152(%esp), %eax         # 4-byte Reload
-	movl	%eax, 28(%edi)
-	jne	.LBB212_18
-# BB#17:
-	movl	96(%esp), %ebp          # 4-byte Reload
-.LBB212_18:
-	movl	%ebp, 32(%edi)
-	jne	.LBB212_20
-# BB#19:
-	movl	100(%esp), %ebx         # 4-byte Reload
-.LBB212_20:
-	movl	%ebx, 36(%edi)
-	jne	.LBB212_22
-# BB#21:
-	movl	112(%esp), %esi         # 4-byte Reload
-.LBB212_22:
-	movl	%esi, 40(%edi)
-	jne	.LBB212_24
-# BB#23:
-	movl	116(%esp), %edx         # 4-byte Reload
-.LBB212_24:
-	movl	%edx, 44(%edi)
-	jne	.LBB212_26
-# BB#25:
-	movl	132(%esp), %ecx         # 4-byte Reload
-.LBB212_26:
-	movl	%ecx, 48(%edi)
-	movl	104(%esp), %eax         # 4-byte Reload
-	jne	.LBB212_28
-# BB#27:
-	movl	144(%esp), %eax         # 4-byte Reload
-.LBB212_28:
-	movl	%eax, 52(%edi)
-	addl	$1068, %esp             # imm = 0x42C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end212:
-	.size	mcl_fp_montRed14L, .Lfunc_end212-mcl_fp_montRed14L
-
-	.globl	mcl_fp_addPre14L
-	.align	16, 0x90
-	.type	mcl_fp_addPre14L,@function
-mcl_fp_addPre14L:                       # @mcl_fp_addPre14L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %edi
-	adcl	8(%ecx), %edi
-	movl	16(%esp), %ebx
-	movl	%edx, (%ebx)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%ebx)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%edi, 8(%ebx)
-	movl	20(%eax), %edi
-	movl	%edx, 12(%ebx)
-	movl	20(%ecx), %edx
-	adcl	%edi, %edx
-	movl	24(%eax), %edi
-	movl	%esi, 16(%ebx)
-	movl	24(%ecx), %esi
-	adcl	%edi, %esi
-	movl	28(%eax), %edi
-	movl	%edx, 20(%ebx)
-	movl	28(%ecx), %edx
-	adcl	%edi, %edx
-	movl	32(%eax), %edi
-	movl	%esi, 24(%ebx)
-	movl	32(%ecx), %esi
-	adcl	%edi, %esi
-	movl	36(%eax), %edi
-	movl	%edx, 28(%ebx)
-	movl	36(%ecx), %edx
-	adcl	%edi, %edx
-	movl	40(%eax), %edi
-	movl	%esi, 32(%ebx)
-	movl	40(%ecx), %esi
-	adcl	%edi, %esi
-	movl	44(%eax), %edi
-	movl	%edx, 36(%ebx)
-	movl	44(%ecx), %edx
-	adcl	%edi, %edx
-	movl	48(%eax), %edi
-	movl	%esi, 40(%ebx)
-	movl	48(%ecx), %esi
-	adcl	%edi, %esi
-	movl	%edx, 44(%ebx)
-	movl	%esi, 48(%ebx)
-	movl	52(%eax), %eax
-	movl	52(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 52(%ebx)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end213:
-	.size	mcl_fp_addPre14L, .Lfunc_end213-mcl_fp_addPre14L
-
-	.globl	mcl_fp_subPre14L
-	.align	16, 0x90
-	.type	mcl_fp_subPre14L,@function
-mcl_fp_subPre14L:                       # @mcl_fp_subPre14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebx
-	sbbl	8(%edx), %ebx
-	movl	20(%esp), %ebp
-	movl	%esi, (%ebp)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebp)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ebp)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ebp)
-	movl	20(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ebp)
-	movl	24(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ebp)
-	movl	28(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ebp)
-	movl	32(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%ebp)
-	movl	36(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	40(%edx), %ebx
-	movl	%edi, 32(%ebp)
-	movl	40(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	44(%edx), %ebx
-	movl	%esi, 36(%ebp)
-	movl	44(%ecx), %esi
-	sbbl	%ebx, %esi
-	movl	48(%edx), %ebx
-	movl	%edi, 40(%ebp)
-	movl	48(%ecx), %edi
-	sbbl	%ebx, %edi
-	movl	%esi, 44(%ebp)
-	movl	%edi, 48(%ebp)
-	movl	52(%edx), %edx
-	movl	52(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 52(%ebp)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end214:
-	.size	mcl_fp_subPre14L, .Lfunc_end214-mcl_fp_subPre14L
-
-	.globl	mcl_fp_shr1_14L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_14L,@function
-mcl_fp_shr1_14L:                        # @mcl_fp_shr1_14L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 40(%ecx)
-	movl	48(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 44(%ecx)
-	movl	52(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 48(%ecx)
-	shrl	%eax
-	movl	%eax, 52(%ecx)
-	popl	%esi
-	retl
-.Lfunc_end215:
-	.size	mcl_fp_shr1_14L, .Lfunc_end215-mcl_fp_shr1_14L
-
-	.globl	mcl_fp_add14L
-	.align	16, 0x90
-	.type	mcl_fp_add14L,@function
-mcl_fp_add14L:                          # @mcl_fp_add14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$44, %esp
-	movl	72(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %ecx
-	movl	68(%esp), %ebp
-	addl	(%ebp), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	adcl	4(%ebp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	8(%eax), %ecx
-	adcl	8(%ebp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	12(%ebp), %edx
-	movl	16(%ebp), %ecx
-	adcl	12(%eax), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	adcl	16(%eax), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	20(%ebp), %ecx
-	adcl	20(%eax), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	24(%ebp), %ecx
-	adcl	24(%eax), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	28(%ebp), %ecx
-	adcl	28(%eax), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	32(%ebp), %ecx
-	adcl	32(%eax), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	36(%ebp), %ecx
-	adcl	36(%eax), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	40(%ebp), %edx
-	adcl	40(%eax), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	movl	44(%ebp), %ebx
-	adcl	44(%eax), %ebx
-	movl	48(%ebp), %esi
-	adcl	48(%eax), %esi
-	movl	52(%ebp), %edi
-	adcl	52(%eax), %edi
-	movl	64(%esp), %eax
-	movl	4(%esp), %ebp           # 4-byte Reload
-	movl	%ebp, (%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 8(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 16(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 20(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	%edx, 40(%eax)
-	movl	%ebx, 44(%eax)
-	movl	%esi, 48(%eax)
-	movl	%edi, 52(%eax)
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	76(%esp), %edx
-	subl	(%edx), %ebp
-	movl	%ebp, 4(%esp)           # 4-byte Spill
-	movl	40(%esp), %ebp          # 4-byte Reload
-	sbbl	4(%edx), %ebp
-	movl	%ebp, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %ebp          # 4-byte Reload
-	sbbl	8(%edx), %ebp
-	movl	%ebp, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %ebp          # 4-byte Reload
-	sbbl	12(%edx), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %ebp          # 4-byte Reload
-	sbbl	16(%edx), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %ebp          # 4-byte Reload
-	sbbl	20(%edx), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%edx), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %ebp          # 4-byte Reload
-	sbbl	28(%edx), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebp          # 4-byte Reload
-	sbbl	32(%edx), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %ebp           # 4-byte Reload
-	sbbl	36(%edx), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	(%esp), %ebp            # 4-byte Reload
-	sbbl	40(%edx), %ebp
-	sbbl	44(%edx), %ebx
-	sbbl	48(%edx), %esi
-	sbbl	52(%edx), %edi
-	sbbl	$0, %ecx
-	testb	$1, %cl
-	jne	.LBB216_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, (%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 4(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 8(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 16(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 20(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	%ebp, 40(%eax)
-	movl	%ebx, 44(%eax)
-	movl	%esi, 48(%eax)
-	movl	%edi, 52(%eax)
-.LBB216_2:                              # %carry
-	addl	$44, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end216:
-	.size	mcl_fp_add14L, .Lfunc_end216-mcl_fp_add14L
-
-	.globl	mcl_fp_addNF14L
-	.align	16, 0x90
-	.type	mcl_fp_addNF14L,@function
-mcl_fp_addNF14L:                        # @mcl_fp_addNF14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$112, %esp
-	movl	140(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	136(%esp), %ecx
-	addl	(%ecx), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	4(%ecx), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	52(%eax), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	48(%eax), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	44(%eax), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	40(%eax), %ebp
-	movl	36(%eax), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	32(%eax), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	28(%eax), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	24(%eax), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	20(%eax), %ebx
-	movl	16(%eax), %edi
-	movl	12(%eax), %esi
-	movl	8(%eax), %edx
-	adcl	8(%ecx), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	12(%ecx), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	adcl	16(%ecx), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	adcl	20(%ecx), %ebx
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	24(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	28(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	32(%ecx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	36(%ecx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	40(%ecx), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	44(%ecx), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	48(%ecx), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	52(%ecx), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	144(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	subl	(%ecx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	sbbl	4(%ecx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	8(%ecx), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%ecx), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	sbbl	16(%ecx), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	sbbl	20(%ecx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	movl	%edx, %eax
-	sbbl	24(%ecx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	28(%ecx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	sbbl	32(%ecx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	sbbl	36(%ecx), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	sbbl	40(%ecx), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	movl	%eax, %esi
-	movl	%eax, %ebp
-	sbbl	44(%ecx), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, %esi
-	sbbl	48(%ecx), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	%eax, %edi
-	sbbl	52(%ecx), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	%edi, %ecx
-	sarl	$31, %ecx
-	testl	%ecx, %ecx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	js	.LBB217_2
-# BB#1:
-	movl	(%esp), %ecx            # 4-byte Reload
-.LBB217_2:
-	movl	132(%esp), %edi
-	movl	%ecx, (%edi)
-	movl	76(%esp), %eax          # 4-byte Reload
-	js	.LBB217_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB217_4:
-	movl	%eax, 4(%edi)
-	movl	%edx, %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	movl	56(%esp), %edx          # 4-byte Reload
-	js	.LBB217_6
-# BB#5:
-	movl	8(%esp), %edx           # 4-byte Reload
-.LBB217_6:
-	movl	%edx, 8(%edi)
-	movl	%ebp, %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	movl	60(%esp), %ebp          # 4-byte Reload
-	js	.LBB217_8
-# BB#7:
-	movl	12(%esp), %ebp          # 4-byte Reload
-.LBB217_8:
-	movl	%ebp, 12(%edi)
-	movl	100(%esp), %ebp         # 4-byte Reload
-	js	.LBB217_10
-# BB#9:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB217_10:
-	movl	%eax, 16(%edi)
-	movl	80(%esp), %esi          # 4-byte Reload
-	js	.LBB217_12
-# BB#11:
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-.LBB217_12:
-	movl	68(%esp), %eax          # 4-byte Reload
-	movl	%eax, 20(%edi)
-	js	.LBB217_14
-# BB#13:
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB217_14:
-	movl	%ecx, 24(%edi)
-	js	.LBB217_16
-# BB#15:
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-.LBB217_16:
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 28(%edi)
-	js	.LBB217_18
-# BB#17:
-	movl	32(%esp), %ebp          # 4-byte Reload
-.LBB217_18:
-	movl	%ebp, 32(%edi)
-	js	.LBB217_20
-# BB#19:
-	movl	36(%esp), %ebx          # 4-byte Reload
-.LBB217_20:
-	movl	%ebx, 36(%edi)
-	js	.LBB217_22
-# BB#21:
-	movl	40(%esp), %esi          # 4-byte Reload
-.LBB217_22:
-	movl	%esi, 40(%edi)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB217_24
-# BB#23:
-	movl	44(%esp), %edx          # 4-byte Reload
-.LBB217_24:
-	movl	%edx, 44(%edi)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	js	.LBB217_26
-# BB#25:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB217_26:
-	movl	%eax, 48(%edi)
-	js	.LBB217_28
-# BB#27:
-	movl	52(%esp), %ecx          # 4-byte Reload
-.LBB217_28:
-	movl	%ecx, 52(%edi)
-	addl	$112, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end217:
-	.size	mcl_fp_addNF14L, .Lfunc_end217-mcl_fp_addNF14L
-
-	.globl	mcl_fp_sub14L
-	.align	16, 0x90
-	.type	mcl_fp_sub14L,@function
-mcl_fp_sub14L:                          # @mcl_fp_sub14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$52, %esp
-	movl	76(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	80(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	sbbl	32(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	36(%esi), %edx
-	sbbl	36(%edi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	40(%esi), %ecx
-	sbbl	40(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	44(%esi), %eax
-	sbbl	44(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	48(%esi), %ebp
-	sbbl	48(%edi), %ebp
-	movl	52(%esi), %esi
-	sbbl	52(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	72(%esp), %ebx
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%ebx)
-	movl	%edx, 36(%ebx)
-	movl	%ecx, 40(%ebx)
-	movl	%eax, 44(%ebx)
-	movl	%ebp, 48(%ebx)
-	movl	%esi, 52(%ebx)
-	je	.LBB218_2
-# BB#1:                                 # %carry
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	84(%esp), %esi
-	movl	44(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	adcl	8(%esi), %edi
-	movl	12(%esi), %eax
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	40(%esi), %ecx
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	44(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 40(%ebx)
-	movl	%eax, 44(%ebx)
-	movl	48(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 48(%ebx)
-	movl	52(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 52(%ebx)
-.LBB218_2:                              # %nocarry
-	addl	$52, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end218:
-	.size	mcl_fp_sub14L, .Lfunc_end218-mcl_fp_sub14L
-
-	.globl	mcl_fp_subNF14L
-	.align	16, 0x90
-	.type	mcl_fp_subNF14L,@function
-mcl_fp_subNF14L:                        # @mcl_fp_subNF14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$88, %esp
-	movl	112(%esp), %ecx
-	movl	52(%ecx), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	(%ecx), %edx
-	movl	4(%ecx), %eax
-	movl	116(%esp), %edi
-	subl	(%edi), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%ecx), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	32(%ecx), %ebp
-	movl	28(%ecx), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	24(%ecx), %ebx
-	movl	20(%ecx), %esi
-	movl	16(%ecx), %edx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %ebx
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%edi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	sbbl	32(%edi), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	48(%edi), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	52(%edi), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	sarl	$31, %esi
-	movl	%esi, %ecx
-	addl	%ecx, %ecx
-	movl	%esi, %ebp
-	adcl	%ebp, %ebp
-	shrl	$31, %eax
-	orl	%ecx, %eax
-	movl	120(%esp), %edi
-	andl	4(%edi), %ebp
-	andl	(%edi), %eax
-	movl	52(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	48(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	44(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	40(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	36(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	32(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	28(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	24(%edi), %ecx
-	andl	%esi, %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	20(%edi), %ebx
-	andl	%esi, %ebx
-	movl	16(%edi), %edx
-	andl	%esi, %edx
-	movl	12(%edi), %ecx
-	andl	%esi, %ecx
-	andl	8(%edi), %esi
-	addl	56(%esp), %eax          # 4-byte Folded Reload
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	movl	108(%esp), %edi
-	movl	%eax, (%edi)
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%ebp, 4(%edi)
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%esi, 8(%edi)
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 12(%edi)
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edx, 16(%edi)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, 20(%edi)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 24(%edi)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 28(%edi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 32(%edi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 36(%edi)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 40(%edi)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 44(%edi)
-	movl	%eax, 48(%edi)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%edi)
-	addl	$88, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end219:
-	.size	mcl_fp_subNF14L, .Lfunc_end219-mcl_fp_subNF14L
-
-	.globl	mcl_fpDbl_add14L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add14L,@function
-mcl_fpDbl_add14L:                       # @mcl_fpDbl_add14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$100, %esp
-	movl	128(%esp), %ecx
-	movl	124(%esp), %esi
-	movl	12(%esi), %edi
-	movl	16(%esi), %edx
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%esi), %ebp
-	movl	120(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%esi), %ebp
-	adcl	8(%esi), %ebx
-	adcl	12(%ecx), %edi
-	adcl	16(%ecx), %edx
-	movl	%ebp, 4(%eax)
-	movl	64(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%edi, 12(%eax)
-	movl	20(%esi), %edi
-	adcl	%ebx, %edi
-	movl	24(%ecx), %ebx
-	movl	%edx, 16(%eax)
-	movl	24(%esi), %edx
-	adcl	%ebx, %edx
-	movl	28(%ecx), %ebx
-	movl	%edi, 20(%eax)
-	movl	28(%esi), %edi
-	adcl	%ebx, %edi
-	movl	32(%ecx), %ebx
-	movl	%edx, 24(%eax)
-	movl	32(%esi), %edx
-	adcl	%ebx, %edx
-	movl	36(%ecx), %ebx
-	movl	%edi, 28(%eax)
-	movl	36(%esi), %edi
-	adcl	%ebx, %edi
-	movl	40(%ecx), %ebx
-	movl	%edx, 32(%eax)
-	movl	40(%esi), %edx
-	adcl	%ebx, %edx
-	movl	44(%ecx), %ebx
-	movl	%edi, 36(%eax)
-	movl	44(%esi), %edi
-	adcl	%ebx, %edi
-	movl	48(%ecx), %ebx
-	movl	%edx, 40(%eax)
-	movl	48(%esi), %edx
-	adcl	%ebx, %edx
-	movl	52(%ecx), %ebx
-	movl	%edi, 44(%eax)
-	movl	52(%esi), %edi
-	adcl	%ebx, %edi
-	movl	56(%ecx), %ebx
-	movl	%edx, 48(%eax)
-	movl	56(%esi), %edx
-	adcl	%ebx, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	60(%ecx), %edx
-	movl	%edi, 52(%eax)
-	movl	60(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	64(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%ecx), %edx
-	movl	68(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%ecx), %edx
-	movl	72(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	76(%ecx), %edx
-	movl	76(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%ecx), %edx
-	movl	80(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%ecx), %edx
-	movl	84(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	88(%ecx), %edx
-	movl	88(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	92(%ecx), %edx
-	movl	92(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	96(%ecx), %edx
-	movl	96(%esi), %eax
-	adcl	%edx, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	100(%ecx), %edx
-	movl	100(%esi), %edi
-	adcl	%edx, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	104(%ecx), %edx
-	movl	104(%esi), %ebx
-	adcl	%edx, %ebx
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	108(%ecx), %ecx
-	movl	108(%esi), %esi
-	adcl	%ecx, %esi
-	sbbl	%edx, %edx
-	andl	$1, %edx
-	movl	132(%esp), %ebp
-	movl	72(%esp), %ecx          # 4-byte Reload
-	subl	(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	sbbl	4(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	8(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	20(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebp), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	sbbl	36(%ebp), %ecx
-	sbbl	40(%ebp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	44(%ebp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	movl	%esi, %ebx
-	sbbl	48(%ebp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	sbbl	52(%ebp), %esi
-	sbbl	$0, %edx
-	andl	$1, %edx
-	jne	.LBB220_2
-# BB#1:
-	movl	%esi, %ebx
-.LBB220_2:
-	testb	%dl, %dl
-	movl	72(%esp), %eax          # 4-byte Reload
-	movl	68(%esp), %edx          # 4-byte Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	movl	60(%esp), %ebp          # 4-byte Reload
-	jne	.LBB220_4
-# BB#3:
-	movl	%ecx, %edx
-	movl	(%esp), %edi            # 4-byte Reload
-	movl	4(%esp), %ebp           # 4-byte Reload
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB220_4:
-	movl	120(%esp), %esi
-	movl	%eax, 56(%esi)
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	%eax, 60(%esi)
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	%eax, 64(%esi)
-	movl	84(%esp), %eax          # 4-byte Reload
-	movl	%eax, 68(%esi)
-	movl	88(%esp), %eax          # 4-byte Reload
-	movl	%eax, 72(%esi)
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%esi)
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esi)
-	movl	%ebp, 84(%esi)
-	movl	%edi, 88(%esi)
-	movl	%edx, 92(%esi)
-	movl	52(%esp), %edx          # 4-byte Reload
-	movl	48(%esp), %eax          # 4-byte Reload
-	jne	.LBB220_6
-# BB#5:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB220_6:
-	movl	%eax, 96(%esi)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	jne	.LBB220_8
-# BB#7:
-	movl	40(%esp), %edx          # 4-byte Reload
-.LBB220_8:
-	movl	%edx, 100(%esi)
-	jne	.LBB220_10
-# BB#9:
-	movl	44(%esp), %ecx          # 4-byte Reload
-.LBB220_10:
-	movl	%ecx, 104(%esi)
-	movl	%ebx, 108(%esi)
-	addl	$100, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end220:
-	.size	mcl_fpDbl_add14L, .Lfunc_end220-mcl_fpDbl_add14L
-
-	.globl	mcl_fpDbl_sub14L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub14L,@function
-mcl_fpDbl_sub14L:                       # @mcl_fpDbl_sub14L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$96, %esp
-	movl	120(%esp), %ebx
-	movl	(%ebx), %eax
-	movl	4(%ebx), %edx
-	movl	124(%esp), %ebp
-	subl	(%ebp), %eax
-	sbbl	4(%ebp), %edx
-	movl	8(%ebx), %esi
-	sbbl	8(%ebp), %esi
-	movl	116(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%ebx), %eax
-	sbbl	12(%ebp), %eax
-	movl	%edx, 4(%ecx)
-	movl	16(%ebx), %edx
-	sbbl	16(%ebp), %edx
-	movl	%esi, 8(%ecx)
-	movl	20(%ebp), %esi
-	movl	%eax, 12(%ecx)
-	movl	20(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	24(%ebp), %esi
-	movl	%edx, 16(%ecx)
-	movl	24(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	28(%ebp), %esi
-	movl	%eax, 20(%ecx)
-	movl	28(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	32(%ebp), %esi
-	movl	%edx, 24(%ecx)
-	movl	32(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	36(%ebp), %esi
-	movl	%eax, 28(%ecx)
-	movl	36(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	40(%ebp), %esi
-	movl	%edx, 32(%ecx)
-	movl	40(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	44(%ebp), %esi
-	movl	%eax, 36(%ecx)
-	movl	44(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	48(%ebp), %esi
-	movl	%edx, 40(%ecx)
-	movl	48(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	52(%ebp), %esi
-	movl	%eax, 44(%ecx)
-	movl	52(%ebx), %eax
-	sbbl	%esi, %eax
-	movl	56(%ebp), %esi
-	movl	%edx, 48(%ecx)
-	movl	56(%ebx), %edx
-	sbbl	%esi, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	60(%ebp), %edx
-	movl	%eax, 52(%ecx)
-	movl	60(%ebx), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	64(%ebp), %eax
-	movl	64(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	68(%ebp), %eax
-	movl	68(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	72(%ebp), %eax
-	movl	72(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	76(%ebp), %eax
-	movl	76(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	80(%ebp), %eax
-	movl	80(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	84(%ebp), %eax
-	movl	84(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	88(%ebp), %eax
-	movl	88(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	92(%ebp), %eax
-	movl	92(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	96(%ebp), %eax
-	movl	96(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	100(%ebp), %eax
-	movl	100(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	104(%ebp), %eax
-	movl	104(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	108(%ebp), %eax
-	movl	108(%ebx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	128(%esp), %ebp
-	jne	.LBB221_1
-# BB#2:
-	movl	$0, 56(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_3
-.LBB221_1:
-	movl	52(%ebp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-.LBB221_3:
-	testb	%al, %al
-	jne	.LBB221_4
-# BB#5:
-	movl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_6
-.LBB221_4:
-	movl	(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	4(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-.LBB221_6:
-	jne	.LBB221_7
-# BB#8:
-	movl	$0, 32(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_9
-.LBB221_7:
-	movl	48(%ebp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-.LBB221_9:
-	jne	.LBB221_10
-# BB#11:
-	movl	$0, 28(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_12
-.LBB221_10:
-	movl	44(%ebp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-.LBB221_12:
-	jne	.LBB221_13
-# BB#14:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_15
-.LBB221_13:
-	movl	40(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB221_15:
-	jne	.LBB221_16
-# BB#17:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB221_18
-.LBB221_16:
-	movl	36(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB221_18:
-	jne	.LBB221_19
-# BB#20:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB221_21
-.LBB221_19:
-	movl	32(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB221_21:
-	jne	.LBB221_22
-# BB#23:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB221_24
-.LBB221_22:
-	movl	28(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB221_24:
-	jne	.LBB221_25
-# BB#26:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB221_27
-.LBB221_25:
-	movl	24(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB221_27:
-	jne	.LBB221_28
-# BB#29:
-	movl	$0, %esi
-	jmp	.LBB221_30
-.LBB221_28:
-	movl	20(%ebp), %esi
-.LBB221_30:
-	jne	.LBB221_31
-# BB#32:
-	movl	$0, %edi
-	jmp	.LBB221_33
-.LBB221_31:
-	movl	16(%ebp), %edi
-.LBB221_33:
-	jne	.LBB221_34
-# BB#35:
-	movl	$0, %ebx
-	jmp	.LBB221_36
-.LBB221_34:
-	movl	12(%ebp), %ebx
-.LBB221_36:
-	jne	.LBB221_37
-# BB#38:
-	xorl	%ebp, %ebp
-	jmp	.LBB221_39
-.LBB221_37:
-	movl	8(%ebp), %ebp
-.LBB221_39:
-	movl	20(%esp), %edx          # 4-byte Reload
-	addl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 56(%ecx)
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 60(%ecx)
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 64(%ecx)
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 68(%ecx)
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 72(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 76(%ecx)
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 80(%ecx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 84(%ecx)
-	movl	12(%esp), %edx          # 4-byte Reload
-	adcl	76(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 88(%ecx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 92(%ecx)
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	84(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 96(%ecx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 100(%ecx)
-	movl	%eax, 104(%ecx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%ecx)
-	addl	$96, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end221:
-	.size	mcl_fpDbl_sub14L, .Lfunc_end221-mcl_fpDbl_sub14L
-
-	.align	16, 0x90
-	.type	.LmulPv480x32,@function
-.LmulPv480x32:                          # @mulPv480x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$104, %esp
-	movl	%edx, %ebp
-	movl	124(%esp), %esi
-	movl	%esi, %eax
-	mull	56(%ebp)
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	52(%ebp)
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	48(%ebp)
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	44(%ebp)
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	40(%ebp)
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	36(%ebp)
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	32(%ebp)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	28(%ebp)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	24(%ebp)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	20(%ebp)
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	16(%ebp)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	mull	12(%ebp)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	8(%ebp)
-	movl	%edx, %edi
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%esi, %eax
-	mull	4(%ebp)
-	movl	%edx, %ebx
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%esi, %eax
-	mull	(%ebp)
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %ebx           # 4-byte Folded Reload
-	movl	%ebx, 8(%ecx)
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 12(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%ecx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%ecx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%ecx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%ecx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 60(%ecx)
-	movl	%ecx, %eax
-	addl	$104, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end222:
-	.size	.LmulPv480x32, .Lfunc_end222-.LmulPv480x32
-
-	.globl	mcl_fp_mulUnitPre15L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre15L,@function
-mcl_fp_mulUnitPre15L:                   # @mcl_fp_mulUnitPre15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$124, %esp
-	calll	.L223$pb
-.L223$pb:
-	popl	%ebx
-.Ltmp44:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp44-.L223$pb), %ebx
-	movl	152(%esp), %eax
-	movl	%eax, (%esp)
-	leal	56(%esp), %ecx
-	movl	148(%esp), %edx
-	calll	.LmulPv480x32
-	movl	116(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp
-	movl	72(%esp), %ebx
-	movl	68(%esp), %edi
-	movl	64(%esp), %esi
-	movl	56(%esp), %edx
-	movl	60(%esp), %ecx
-	movl	144(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	addl	$124, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end223:
-	.size	mcl_fp_mulUnitPre15L, .Lfunc_end223-mcl_fp_mulUnitPre15L
-
-	.globl	mcl_fpDbl_mulPre15L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre15L,@function
-mcl_fpDbl_mulPre15L:                    # @mcl_fpDbl_mulPre15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1084, %esp             # imm = 0x43C
-	calll	.L224$pb
-.L224$pb:
-	popl	%esi
-.Ltmp45:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp45-.L224$pb), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	1112(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1016(%esp), %ecx
-	movl	1108(%esp), %edi
-	movl	%edi, %edx
-	movl	%esi, %ebx
-	calll	.LmulPv480x32
-	movl	1076(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1072(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1068(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1060(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1056(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1052(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1044(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1040(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1036(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1032(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1028(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	1024(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1016(%esp), %eax
-	movl	1020(%esp), %ebp
-	movl	1104(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	1112(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	952(%esp), %ecx
-	movl	%edi, %edx
-	movl	%esi, %ebx
-	calll	.LmulPv480x32
-	addl	952(%esp), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	1012(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1008(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1004(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1000(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	996(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	992(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	988(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	984(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	980(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	976(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	972(%esp), %edi
-	movl	968(%esp), %esi
-	movl	964(%esp), %edx
-	movl	956(%esp), %eax
-	movl	960(%esp), %ecx
-	movl	1104(%esp), %ebp
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	72(%esp), %eax          # 4-byte Reload
-	addl	888(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	948(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	944(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	940(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	936(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	932(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	928(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	924(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	920(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	916(%esp), %ebx
-	movl	912(%esp), %edi
-	movl	908(%esp), %esi
-	movl	904(%esp), %edx
-	movl	900(%esp), %ecx
-	movl	892(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	896(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	72(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 112(%esp)         # 4-byte Folded Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	824(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	824(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	884(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	880(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	876(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	872(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	868(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	864(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	860(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	856(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	852(%esp), %ebx
-	movl	848(%esp), %edi
-	movl	844(%esp), %esi
-	movl	840(%esp), %edx
-	movl	836(%esp), %ecx
-	movl	828(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	760(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	804(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	800(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	796(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	792(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	788(%esp), %ebx
-	movl	784(%esp), %edi
-	movl	780(%esp), %esi
-	movl	776(%esp), %edx
-	movl	772(%esp), %ecx
-	movl	764(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	60(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	696(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	748(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	744(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	740(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	736(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	732(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	724(%esp), %ebx
-	movl	720(%esp), %edi
-	movl	716(%esp), %esi
-	movl	712(%esp), %edx
-	movl	708(%esp), %ecx
-	movl	700(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	704(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	632(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	692(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	688(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	684(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	680(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	676(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	672(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	660(%esp), %ebx
-	movl	656(%esp), %edi
-	movl	652(%esp), %esi
-	movl	648(%esp), %edx
-	movl	644(%esp), %ecx
-	movl	636(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	640(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	568(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	628(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	624(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	620(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	616(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	596(%esp), %ebx
-	movl	592(%esp), %edi
-	movl	588(%esp), %esi
-	movl	584(%esp), %edx
-	movl	580(%esp), %ecx
-	movl	572(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	576(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	504(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	564(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	560(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	532(%esp), %ebx
-	movl	528(%esp), %edi
-	movl	524(%esp), %esi
-	movl	520(%esp), %edx
-	movl	516(%esp), %ecx
-	movl	508(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	512(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 32(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	440(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	492(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	472(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	468(%esp), %ebx
-	movl	464(%esp), %edi
-	movl	460(%esp), %esi
-	movl	456(%esp), %edx
-	movl	452(%esp), %ecx
-	movl	444(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	448(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	376(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	436(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	432(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	428(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	424(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	404(%esp), %ebx
-	movl	400(%esp), %edi
-	movl	396(%esp), %esi
-	movl	392(%esp), %edx
-	movl	388(%esp), %ecx
-	movl	380(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	312(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	360(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	356(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	352(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	348(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	344(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	340(%esp), %ebx
-	movl	336(%esp), %edi
-	movl	332(%esp), %esi
-	movl	328(%esp), %edx
-	movl	324(%esp), %ecx
-	movl	316(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 44(%eax)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 108(%esp)         # 4-byte Folded Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 48(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	1108(%esp), %eax
-	movl	%eax, %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	108(%esp), %eax         # 4-byte Reload
-	addl	248(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	304(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	300(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	296(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	292(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	288(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	284(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	280(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	276(%esp), %ebx
-	movl	272(%esp), %edi
-	movl	268(%esp), %edx
-	movl	264(%esp), %ecx
-	movl	260(%esp), %eax
-	movl	252(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	256(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	movl	1104(%esp), %ebp
-	movl	%esi, 48(%ebp)
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	20(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 36(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	184(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	244(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	240(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	236(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	232(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	228(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	224(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	220(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	212(%esp), %ebx
-	movl	208(%esp), %edx
-	movl	204(%esp), %ecx
-	movl	200(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	196(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	188(%esp), %eax
-	movl	192(%esp), %esi
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	1104(%esp), %edi
-	movl	%ebp, 52(%edi)
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%esi, %ebp
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 40(%esp)          # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 64(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1112(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	1108(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	120(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	124(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	128(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	164(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	156(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	152(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	148(%esp), %ebp
-	movl	144(%esp), %edi
-	movl	140(%esp), %esi
-	movl	136(%esp), %edx
-	movl	132(%esp), %ecx
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebx         # 4-byte Reload
-	movl	%ebx, 56(%eax)
-	movl	32(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 60(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	72(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 64(%eax)
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 72(%eax)
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 76(%eax)
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edi, 80(%eax)
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebp, 84(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 88(%eax)
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 92(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	104(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 96(%eax)
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx         # 4-byte Folded Reload
-	movl	%ecx, 100(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 104(%eax)
-	movl	%ecx, 108(%eax)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 112(%eax)
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 116(%eax)
-	addl	$1084, %esp             # imm = 0x43C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end224:
-	.size	mcl_fpDbl_mulPre15L, .Lfunc_end224-mcl_fpDbl_mulPre15L
-
-	.globl	mcl_fpDbl_sqrPre15L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre15L,@function
-mcl_fpDbl_sqrPre15L:                    # @mcl_fpDbl_sqrPre15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1084, %esp             # imm = 0x43C
-	calll	.L225$pb
-.L225$pb:
-	popl	%ebx
-.Ltmp46:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp46-.L225$pb), %ebx
-	movl	%ebx, 116(%esp)         # 4-byte Spill
-	movl	1108(%esp), %edx
-	movl	(%edx), %eax
-	movl	%eax, (%esp)
-	leal	1016(%esp), %ecx
-	movl	%edx, %edi
-	movl	%ebx, %esi
-	calll	.LmulPv480x32
-	movl	1076(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1072(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1068(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1064(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1060(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1056(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1052(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1044(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1040(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1036(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1032(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1028(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	1024(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1016(%esp), %eax
-	movl	1020(%esp), %ebp
-	movl	1104(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edi, %edx
-	movl	4(%edx), %eax
-	movl	%eax, (%esp)
-	leal	952(%esp), %ecx
-	movl	%esi, %ebx
-	calll	.LmulPv480x32
-	addl	952(%esp), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	1012(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1008(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1004(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1000(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	996(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	992(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	988(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	984(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	980(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	976(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	972(%esp), %edi
-	movl	968(%esp), %esi
-	movl	964(%esp), %edx
-	movl	956(%esp), %eax
-	movl	960(%esp), %ecx
-	movl	1104(%esp), %ebp
-	movl	16(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	8(%edx), %eax
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	72(%esp), %eax          # 4-byte Reload
-	addl	888(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	948(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	944(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	940(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	936(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	932(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	928(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	924(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	920(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	916(%esp), %ebx
-	movl	912(%esp), %edi
-	movl	908(%esp), %esi
-	movl	904(%esp), %edx
-	movl	900(%esp), %ecx
-	movl	892(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	896(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	72(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 112(%esp)         # 4-byte Folded Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	68(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	12(%edx), %eax
-	movl	%eax, (%esp)
-	leal	824(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	824(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	884(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	880(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	876(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	872(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	868(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	864(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	860(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	856(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	852(%esp), %ebx
-	movl	848(%esp), %edi
-	movl	844(%esp), %esi
-	movl	840(%esp), %edx
-	movl	836(%esp), %ecx
-	movl	828(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	16(%edx), %eax
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	760(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	804(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	800(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	796(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	792(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	788(%esp), %ebx
-	movl	784(%esp), %edi
-	movl	780(%esp), %esi
-	movl	776(%esp), %edx
-	movl	772(%esp), %ecx
-	movl	764(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	60(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	20(%edx), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	696(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	748(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	744(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	740(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	736(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	732(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	728(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	724(%esp), %ebx
-	movl	720(%esp), %edi
-	movl	716(%esp), %esi
-	movl	712(%esp), %edx
-	movl	708(%esp), %ecx
-	movl	700(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	704(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	24(%edx), %eax
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	632(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	692(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	688(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	684(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	680(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	676(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	672(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	660(%esp), %ebx
-	movl	656(%esp), %edi
-	movl	652(%esp), %esi
-	movl	648(%esp), %edx
-	movl	644(%esp), %ecx
-	movl	636(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	640(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	64(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	28(%edx), %eax
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	568(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	628(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	624(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	620(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	616(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	596(%esp), %ebx
-	movl	592(%esp), %edi
-	movl	588(%esp), %esi
-	movl	584(%esp), %edx
-	movl	580(%esp), %ecx
-	movl	572(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	576(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	68(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	32(%edx), %eax
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	504(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	564(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	560(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	532(%esp), %ebx
-	movl	528(%esp), %edi
-	movl	524(%esp), %esi
-	movl	520(%esp), %edx
-	movl	516(%esp), %ecx
-	movl	508(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	512(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 32(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	36(%edx), %eax
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	440(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	500(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	496(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	492(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	488(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	472(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	468(%esp), %ebx
-	movl	464(%esp), %edi
-	movl	460(%esp), %esi
-	movl	456(%esp), %edx
-	movl	452(%esp), %ecx
-	movl	444(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	448(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	40(%edx), %eax
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	376(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	436(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	432(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	428(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	424(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	420(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	416(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	404(%esp), %ebx
-	movl	400(%esp), %edi
-	movl	396(%esp), %esi
-	movl	392(%esp), %edx
-	movl	388(%esp), %ecx
-	movl	380(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	56(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	12(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	44(%edx), %eax
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	312(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	372(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	368(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	364(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	360(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	356(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	352(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	348(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	344(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	340(%esp), %ebx
-	movl	336(%esp), %edi
-	movl	332(%esp), %esi
-	movl	328(%esp), %edx
-	movl	324(%esp), %ecx
-	movl	316(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	320(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 44(%eax)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 108(%esp)         # 4-byte Folded Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 32(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 56(%esp)          # 4-byte Folded Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, 48(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	48(%edx), %eax
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	108(%esp), %eax         # 4-byte Reload
-	addl	248(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	304(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	300(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	296(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	292(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	288(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	284(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	280(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	276(%esp), %ebx
-	movl	272(%esp), %edi
-	movl	268(%esp), %edx
-	movl	264(%esp), %ecx
-	movl	260(%esp), %eax
-	movl	252(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	256(%esp), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	movl	1104(%esp), %ebp
-	movl	%esi, 48(%ebp)
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	20(%esp), %esi          # 4-byte Reload
-	adcl	%esi, 36(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	adcl	16(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 28(%esp)          # 4-byte Spill
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 52(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 44(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	52(%edx), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	184(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	244(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	240(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	236(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	232(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	228(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	224(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	220(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	212(%esp), %ebx
-	movl	208(%esp), %edx
-	movl	204(%esp), %ecx
-	movl	200(%esp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	196(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	188(%esp), %eax
-	movl	192(%esp), %esi
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	1104(%esp), %edi
-	movl	%ebp, 52(%edi)
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%esi, %ebp
-	adcl	12(%esp), %ebp          # 4-byte Folded Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	%edi, 40(%esp)          # 4-byte Folded Spill
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	adcl	%ebx, 64(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1108(%esp), %edx
-	movl	56(%edx), %eax
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	120(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	124(%esp), %ebp
-	movl	%ebp, 32(%esp)          # 4-byte Spill
-	adcl	128(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	164(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	156(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	152(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	148(%esp), %ebp
-	movl	144(%esp), %edi
-	movl	140(%esp), %esi
-	movl	136(%esp), %edx
-	movl	132(%esp), %ecx
-	movl	1104(%esp), %eax
-	movl	112(%esp), %ebx         # 4-byte Reload
-	movl	%ebx, 56(%eax)
-	movl	32(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 60(%eax)
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	72(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 64(%eax)
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 68(%eax)
-	adcl	36(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 72(%eax)
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 76(%eax)
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edi, 80(%eax)
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	64(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebp, 84(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 88(%eax)
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	92(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 92(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	104(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 96(%eax)
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	108(%esp), %edx         # 4-byte Folded Reload
-	movl	%ecx, 100(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 104(%eax)
-	movl	%ecx, 108(%eax)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 112(%eax)
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 116(%eax)
-	addl	$1084, %esp             # imm = 0x43C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end225:
-	.size	mcl_fpDbl_sqrPre15L, .Lfunc_end225-mcl_fpDbl_sqrPre15L
-
-	.globl	mcl_fp_mont15L
-	.align	16, 0x90
-	.type	mcl_fp_mont15L,@function
-mcl_fp_mont15L:                         # @mcl_fp_mont15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2044, %esp             # imm = 0x7FC
-	calll	.L226$pb
-.L226$pb:
-	popl	%ebx
-.Ltmp47:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp47-.L226$pb), %ebx
-	movl	2076(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1976(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1976(%esp), %ebp
-	movl	1980(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	2036(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	2032(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	2028(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	2024(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2020(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2016(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2012(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	2008(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	2004(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	2000(%esp), %edi
-	movl	1996(%esp), %esi
-	movl	1992(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	1988(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	1984(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	1912(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1912(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1916(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1920(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1924(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1928(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1932(%esp), %esi
-	adcl	1936(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1940(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1944(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1948(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1952(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1956(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1960(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1964(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1968(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	1972(%esp), %ebp
-	sbbl	%eax, %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1848(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	116(%esp), %eax         # 4-byte Reload
-	andl	$1, %eax
-	movl	88(%esp), %edx          # 4-byte Reload
-	addl	1848(%esp), %edx
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1852(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1856(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1860(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1864(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	1868(%esp), %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1872(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1876(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1880(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1884(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1888(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1892(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1896(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1900(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	1904(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	adcl	1908(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1784(%esp), %ecx
-	movl	2076(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	1784(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1792(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	1804(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1808(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	1812(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1816(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1820(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1824(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1828(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1832(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	1836(%esp), %esi
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	1840(%esp), %ebp
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1844(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1720(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	1720(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1724(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1728(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1732(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1736(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1740(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1744(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1748(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1752(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	1756(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1760(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1764(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	1768(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	adcl	1772(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1776(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1780(%esp), %esi
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1656(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	addl	1656(%esp), %eax
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1660(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1664(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1668(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1672(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1676(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1680(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1684(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	1688(%esp), %ebp
-	adcl	1692(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1696(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1700(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1704(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1708(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	1712(%esp), %edi
-	adcl	1716(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1592(%esp), %ecx
-	movl	2068(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	1592(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1596(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1600(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1604(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1608(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1612(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1616(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1620(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1624(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	1628(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1632(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1636(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1640(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1644(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1648(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1652(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1528(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	1528(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1532(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1540(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	1544(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1548(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1552(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1564(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	1568(%esp), %edi
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	1572(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1576(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1580(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1584(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1588(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1464(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	92(%esp), %ecx          # 4-byte Reload
-	addl	1464(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1472(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1476(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1480(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	1484(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1488(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1492(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1496(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	1500(%esp), %edi
-	movl	%edi, 112(%esp)         # 4-byte Spill
-	adcl	1504(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1508(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	1512(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1516(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1520(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1400(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	movl	92(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1400(%esp), %edi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1404(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1408(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1412(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1416(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	1420(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1424(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1428(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	1432(%esp), %edi
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1436(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1440(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1444(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	1448(%esp), %esi
-	movl	%esi, %ebp
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1452(%esp), %esi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1456(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1460(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1336(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	1336(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1360(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1364(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1376(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1380(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	adcl	1384(%esp), %esi
-	movl	%esi, %ebp
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	1392(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1272(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	movl	80(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1272(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1276(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1280(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1284(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1288(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1296(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1300(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1304(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1308(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1312(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1316(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	1320(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1324(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	1328(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1332(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	2072(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	1208(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1212(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1216(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	1232(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1240(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	1244(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1248(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1252(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1256(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1260(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1264(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1268(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1144(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	addl	1144(%esp), %eax
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1148(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	1156(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1168(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1180(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1188(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	1196(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1200(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1204(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1080(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	1080(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1084(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1088(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	1092(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	1128(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1016(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	1016(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1028(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	1032(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	1044(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	1060(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	952(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	952(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	964(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	976(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	992(%esp), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	movl	%ebp, %eax
-	andl	$1, %eax
-	addl	888(%esp), %esi
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	892(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	896(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	900(%esp), %edi
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	904(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	908(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	912(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	916(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	920(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	924(%esp), %ebp
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	928(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	932(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	936(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	940(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	944(%esp), %esi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	948(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	824(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	824(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	832(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	856(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	864(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	872(%esp), %edi
-	adcl	876(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	760(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	776(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	800(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	804(%esp), %ebp
-	adcl	808(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	816(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	696(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	708(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	736(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	748(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	752(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	632(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	656(%esp), %edi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	672(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	688(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	568(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	588(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	596(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	604(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	504(%esp), %esi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	516(%esp), %edi
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	520(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	532(%esp), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	560(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	100(%esp), %ecx         # 4-byte Reload
-	addl	440(%esp), %ecx
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	448(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	adcl	452(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	460(%esp), %edi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	492(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	376(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	388(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	396(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	404(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	416(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	112(%esp), %ecx         # 4-byte Reload
-	addl	312(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	320(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	336(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	348(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	352(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	248(%esp), %edi
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	252(%esp), %esi
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	256(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	288(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	2072(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	2068(%esp), %edx
-	calll	.LmulPv480x32
-	movl	%esi, %ecx
-	movl	96(%esp), %esi          # 4-byte Reload
-	addl	184(%esp), %ecx
-	adcl	188(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	196(%esp), %ebp
-	adcl	200(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	2076(%esp), %edx
-	calll	.LmulPv480x32
-	movl	104(%esp), %ebx         # 4-byte Reload
-	andl	$1, %ebx
-	addl	120(%esp), %edi
-	movl	%ebp, %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	128(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	132(%esp), %edi
-	adcl	136(%esp), %esi
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	140(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	148(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	152(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	156(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	160(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	164(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	168(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	172(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	176(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %edx         # 4-byte Reload
-	adcl	180(%esp), %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	adcl	$0, %ebx
-	movl	%ebx, 104(%esp)         # 4-byte Spill
-	movl	%eax, %edx
-	movl	2076(%esp), %ebp
-	subl	(%ebp), %edx
-	sbbl	4(%ebp), %ecx
-	movl	%edi, %eax
-	sbbl	8(%ebp), %eax
-	movl	%esi, %ebx
-	sbbl	12(%ebp), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebx          # 4-byte Reload
-	sbbl	16(%ebp), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%ebp), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebx          # 4-byte Reload
-	sbbl	24(%ebp), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%ebp), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebx          # 4-byte Reload
-	sbbl	32(%ebp), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebx          # 4-byte Reload
-	sbbl	36(%ebp), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebx          # 4-byte Reload
-	sbbl	40(%ebp), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebx          # 4-byte Reload
-	sbbl	44(%ebp), %ebx
-	movl	%ebx, 48(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebx          # 4-byte Reload
-	sbbl	48(%ebp), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebx         # 4-byte Reload
-	sbbl	52(%ebp), %ebx
-	movl	%ebx, 88(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebx         # 4-byte Reload
-	sbbl	56(%ebp), %ebx
-	movl	%ebx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebx         # 4-byte Reload
-	movl	108(%esp), %ebp         # 4-byte Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB226_2
-# BB#1:
-	movl	%edx, %ebp
-.LBB226_2:
-	movl	2064(%esp), %edx
-	movl	%ebp, (%edx)
-	testb	%bl, %bl
-	movl	116(%esp), %ebp         # 4-byte Reload
-	jne	.LBB226_4
-# BB#3:
-	movl	%ecx, %ebp
-.LBB226_4:
-	movl	%ebp, 4(%edx)
-	jne	.LBB226_6
-# BB#5:
-	movl	%eax, %edi
-.LBB226_6:
-	movl	%edi, 8(%edx)
-	jne	.LBB226_8
-# BB#7:
-	movl	16(%esp), %esi          # 4-byte Reload
-.LBB226_8:
-	movl	%esi, 12(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_10
-# BB#9:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB226_10:
-	movl	%eax, 16(%edx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_12
-# BB#11:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB226_12:
-	movl	%eax, 20(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_14
-# BB#13:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB226_14:
-	movl	%eax, 24(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_16
-# BB#15:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB226_16:
-	movl	%eax, 28(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_18
-# BB#17:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB226_18:
-	movl	%eax, 32(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_20
-# BB#19:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB226_20:
-	movl	%eax, 36(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_22
-# BB#21:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB226_22:
-	movl	%eax, 40(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_24
-# BB#23:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB226_24:
-	movl	%eax, 44(%edx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	jne	.LBB226_26
-# BB#25:
-	movl	52(%esp), %eax          # 4-byte Reload
-.LBB226_26:
-	movl	%eax, 48(%edx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	jne	.LBB226_28
-# BB#27:
-	movl	88(%esp), %eax          # 4-byte Reload
-.LBB226_28:
-	movl	%eax, 52(%edx)
-	movl	112(%esp), %eax         # 4-byte Reload
-	jne	.LBB226_30
-# BB#29:
-	movl	96(%esp), %eax          # 4-byte Reload
-.LBB226_30:
-	movl	%eax, 56(%edx)
-	addl	$2044, %esp             # imm = 0x7FC
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end226:
-	.size	mcl_fp_mont15L, .Lfunc_end226-mcl_fp_mont15L
-
-	.globl	mcl_fp_montNF15L
-	.align	16, 0x90
-	.type	mcl_fp_montNF15L,@function
-mcl_fp_montNF15L:                       # @mcl_fp_montNF15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2028, %esp             # imm = 0x7EC
-	calll	.L227$pb
-.L227$pb:
-	popl	%ebx
-.Ltmp48:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp48-.L227$pb), %ebx
-	movl	2060(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1960(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1960(%esp), %ebp
-	movl	1964(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	2020(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2016(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2012(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2008(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	2004(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2000(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	1996(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	1992(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	1988(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	1984(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	1980(%esp), %esi
-	movl	1976(%esp), %edi
-	movl	1972(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	1968(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	1896(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1896(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1900(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1904(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1908(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1912(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	1916(%esp), %esi
-	movl	%esi, %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1920(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1924(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1928(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1932(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1936(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1940(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1944(%esp), %ebp
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1948(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1952(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1956(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1832(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1892(%esp), %eax
-	movl	92(%esp), %edx          # 4-byte Reload
-	addl	1832(%esp), %edx
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1836(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1840(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1844(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	1848(%esp), %edi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1852(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1856(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1860(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1864(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1868(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1872(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	1876(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	1880(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	1884(%esp), %ebp
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1888(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1768(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1768(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1772(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1776(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1780(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1784(%esp), %edi
-	movl	%edi, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1792(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	1804(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1808(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1816(%esp), %eax
-	movl	%eax, %esi
-	adcl	1820(%esp), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	1824(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1828(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1704(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1764(%esp), %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	addl	1704(%esp), %edx
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1708(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1712(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1716(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1720(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1724(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1728(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1732(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	1736(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1740(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	1744(%esp), %edi
-	adcl	1748(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1752(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	1756(%esp), %ebp
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1760(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1640(%esp), %ecx
-	movl	2060(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	addl	1640(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1644(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1648(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1652(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1656(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1660(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1664(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1668(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1672(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1676(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1680(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1684(%esp), %eax
-	movl	%eax, %esi
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1688(%esp), %edi
-	adcl	1692(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1696(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	1700(%esp), %ebp
-	movl	2056(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1576(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1636(%esp), %eax
-	movl	88(%esp), %edx          # 4-byte Reload
-	addl	1576(%esp), %edx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1580(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1584(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1588(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1592(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1596(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1600(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1604(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1608(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1612(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	1616(%esp), %esi
-	adcl	1620(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1624(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1628(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1632(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1512(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1512(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1516(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1520(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1528(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	1532(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1540(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1544(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1548(%esp), %ebp
-	adcl	1552(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1564(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1568(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1572(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1448(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1508(%esp), %eax
-	movl	72(%esp), %edx          # 4-byte Reload
-	addl	1448(%esp), %edx
-	movl	40(%esp), %ecx          # 4-byte Reload
-	adcl	1452(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1456(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1460(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	1464(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1468(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	1472(%esp), %edi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1476(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	1480(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	adcl	1484(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1488(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1492(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1496(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1500(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	1504(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1384(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1384(%esp), %esi
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1400(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1404(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1408(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1412(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1416(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1424(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1428(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1432(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1436(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1440(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1444(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1320(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1380(%esp), %edx
-	movl	40(%esp), %ecx          # 4-byte Reload
-	addl	1320(%esp), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	1324(%esp), %ebp
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	1328(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	1352(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1360(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	1368(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1376(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1256(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	movl	40(%esp), %eax          # 4-byte Reload
-	addl	1256(%esp), %eax
-	adcl	1260(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	adcl	1264(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	1272(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	1296(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1304(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	1312(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1192(%esp), %ecx
-	movl	2052(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	movl	1252(%esp), %eax
-	movl	48(%esp), %edx          # 4-byte Reload
-	addl	1192(%esp), %edx
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	1196(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1200(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	1204(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1208(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	1212(%esp), %ebp
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1216(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1220(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1224(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	1228(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1232(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1236(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1240(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	1244(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	1248(%esp), %esi
-	adcl	$0, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1128(%esp), %ecx
-	movl	2060(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	addl	1128(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	1140(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1148(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1168(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1184(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	1188(%esp), %esi
-	movl	2056(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1064(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	1124(%esp), %eax
-	movl	44(%esp), %edx          # 4-byte Reload
-	addl	1064(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1068(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	1072(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1076(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1080(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	1084(%esp), %edi
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1088(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1092(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1096(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	1100(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1104(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1108(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1112(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1116(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	1120(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %esi
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1000(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	1000(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	1012(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1020(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1028(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1044(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	1060(%esp), %esi
-	movl	2056(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	936(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	996(%esp), %eax
-	movl	52(%esp), %edx          # 4-byte Reload
-	addl	936(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	940(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	944(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	948(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	952(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	956(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	960(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	964(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	968(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	972(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	976(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	980(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	984(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	988(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	adcl	992(%esp), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %esi
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	872(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	872(%esp), %edi
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	876(%esp), %ebp
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	880(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	932(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	808(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	868(%esp), %eax
-	movl	%ebp, %ecx
-	addl	808(%esp), %ecx
-	adcl	812(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	816(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	820(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	824(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	828(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	832(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	836(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	840(%esp), %edi
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	844(%esp), %esi
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	848(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	852(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	856(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	860(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	864(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	744(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	744(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	768(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	776(%esp), %edi
-	adcl	780(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	792(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	680(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	740(%esp), %eax
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	680(%esp), %ecx
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	684(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	688(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	692(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	696(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	adcl	700(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	704(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	708(%esp), %edi
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	712(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	716(%esp), %ebp
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	720(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	724(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	728(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	732(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %edx          # 4-byte Reload
-	adcl	736(%esp), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	616(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	616(%esp), %esi
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	620(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	644(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	648(%esp), %edi
-	adcl	652(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	656(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	612(%esp), %edx
-	movl	%esi, %ecx
-	addl	552(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	572(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	580(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	588(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	488(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	488(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	%esi, %ebp
-	adcl	508(%esp), %ebp
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	512(%esp), %edi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	528(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	484(%esp), %edx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	424(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	440(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	adcl	444(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%esi, %edi
-	adcl	460(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	360(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	360(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	368(%esp), %esi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	376(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	396(%esp), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	400(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	296(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	356(%esp), %edx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	296(%esp), %ecx
-	adcl	300(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	308(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	332(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	336(%esp), %edi
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	232(%esp), %ecx
-	movl	2060(%esp), %edx
-	calll	.LmulPv480x32
-	addl	232(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	240(%esp), %ebp
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	244(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	272(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	276(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	2056(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	168(%esp), %ecx
-	movl	2052(%esp), %edx
-	calll	.LmulPv480x32
-	movl	228(%esp), %edx
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	168(%esp), %ecx
-	adcl	172(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	adcl	176(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	180(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	184(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	188(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	192(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	208(%esp), %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	212(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %edi
-	movl	%eax, (%esp)
-	leal	104(%esp), %ecx
-	movl	2060(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv480x32
-	addl	104(%esp), %edi
-	movl	68(%esp), %edi          # 4-byte Reload
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	108(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	112(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	%ecx, %ebx
-	adcl	116(%esp), %edi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	120(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	124(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	128(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	132(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	136(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	140(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	44(%esp), %ecx          # 4-byte Reload
-	adcl	144(%esp), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	adcl	148(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	152(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	156(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	160(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	164(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	%eax, %edx
-	movl	2060(%esp), %ecx
-	subl	(%ecx), %edx
-	movl	%ebx, %ebp
-	sbbl	4(%ecx), %ebp
-	movl	%edi, %ebx
-	sbbl	8(%ecx), %ebx
-	movl	88(%esp), %eax          # 4-byte Reload
-	sbbl	12(%ecx), %eax
-	sbbl	16(%ecx), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	sbbl	20(%ecx), %esi
-	movl	%esi, 8(%esp)           # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	sbbl	24(%ecx), %esi
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	movl	52(%esp), %esi          # 4-byte Reload
-	sbbl	28(%ecx), %esi
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	movl	56(%esp), %esi          # 4-byte Reload
-	sbbl	32(%ecx), %esi
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	movl	44(%esp), %esi          # 4-byte Reload
-	sbbl	36(%ecx), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	sbbl	40(%ecx), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	sbbl	44(%ecx), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	sbbl	48(%ecx), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	sbbl	52(%ecx), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	sbbl	56(%ecx), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	%esi, %ecx
-	sarl	$31, %ecx
-	testl	%ecx, %ecx
-	movl	100(%esp), %ecx         # 4-byte Reload
-	js	.LBB227_2
-# BB#1:
-	movl	%edx, %ecx
-.LBB227_2:
-	movl	2048(%esp), %edx
-	movl	%ecx, (%edx)
-	movl	92(%esp), %esi          # 4-byte Reload
-	js	.LBB227_4
-# BB#3:
-	movl	%ebp, %esi
-.LBB227_4:
-	movl	%esi, 4(%edx)
-	movl	88(%esp), %ecx          # 4-byte Reload
-	js	.LBB227_6
-# BB#5:
-	movl	%ebx, %edi
-.LBB227_6:
-	movl	%edi, 8(%edx)
-	js	.LBB227_8
-# BB#7:
-	movl	%eax, %ecx
-.LBB227_8:
-	movl	%ecx, 12(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB227_10
-# BB#9:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB227_10:
-	movl	%eax, 16(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB227_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB227_12:
-	movl	%eax, 20(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB227_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB227_14:
-	movl	%eax, 24(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB227_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB227_16:
-	movl	%eax, 28(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB227_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB227_18:
-	movl	%eax, 32(%edx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	js	.LBB227_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB227_20:
-	movl	%eax, 36(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB227_22
-# BB#21:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB227_22:
-	movl	%eax, 40(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	js	.LBB227_24
-# BB#23:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB227_24:
-	movl	%eax, 44(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	js	.LBB227_26
-# BB#25:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB227_26:
-	movl	%eax, 48(%edx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	js	.LBB227_28
-# BB#27:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB227_28:
-	movl	%eax, 52(%edx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB227_30
-# BB#29:
-	movl	68(%esp), %eax          # 4-byte Reload
-.LBB227_30:
-	movl	%eax, 56(%edx)
-	addl	$2028, %esp             # imm = 0x7EC
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end227:
-	.size	mcl_fp_montNF15L, .Lfunc_end227-mcl_fp_montNF15L
-
-	.globl	mcl_fp_montRed15L
-	.align	16, 0x90
-	.type	mcl_fp_montRed15L,@function
-mcl_fp_montRed15L:                      # @mcl_fp_montRed15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1148, %esp             # imm = 0x47C
-	calll	.L228$pb
-.L228$pb:
-	popl	%eax
-.Ltmp49:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp49-.L228$pb), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1176(%esp), %edx
-	movl	-4(%edx), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	1172(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 80(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	imull	%esi, %ebx
-	movl	116(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%ecx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%ecx), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	104(%ecx), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	100(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	96(%ecx), %esi
-	movl	%esi, 152(%esp)         # 4-byte Spill
-	movl	92(%ecx), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	88(%ecx), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	84(%ecx), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	80(%ecx), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	76(%ecx), %esi
-	movl	%esi, 168(%esp)         # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	%esi, 164(%esp)         # 4-byte Spill
-	movl	68(%ecx), %esi
-	movl	%esi, 176(%esp)         # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	60(%ecx), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	56(%ecx), %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	movl	52(%ecx), %esi
-	movl	%esi, 140(%esp)         # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	44(%ecx), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	36(%ecx), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	28(%ecx), %ebp
-	movl	24(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	20(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	16(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	12(%ecx), %edi
-	movl	8(%ecx), %esi
-	movl	(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	56(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	1080(%esp), %ecx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	movl	80(%esp), %eax          # 4-byte Reload
-	addl	1080(%esp), %eax
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1084(%esp), %ecx
-	adcl	1088(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	adcl	1092(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1108(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	adcl	$0, 180(%esp)           # 4-byte Folded Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 164(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	movl	148(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1016(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	1016(%esp), %esi
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1020(%esp), %edx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1044(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	1060(%esp), %ebp
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 164(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, %esi
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 148(%esp)         # 4-byte Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	952(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	952(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	956(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	992(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %ebp         # 4-byte Reload
-	adcl	1004(%esp), %ebp
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	adcl	$0, 164(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	888(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	888(%esp), %esi
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	892(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	936(%esp), %ebp
-	movl	%ebp, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 160(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	movl	132(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	824(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	824(%esp), %esi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	828(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 132(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	760(%esp), %esi
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	764(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	movl	152(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	696(%esp), %esi
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	700(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 152(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	632(%esp), %edi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	636(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %ebp         # 4-byte Reload
-	adcl	672(%esp), %ebp
-	movl	164(%esp), %edi         # 4-byte Reload
-	adcl	676(%esp), %edi
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	$0, 136(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	568(%esp), %esi
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	572(%esp), %ecx
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	adcl	604(%esp), %ebp
-	movl	%ebp, 176(%esp)         # 4-byte Spill
-	adcl	608(%esp), %edi
-	movl	%edi, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %edi         # 4-byte Reload
-	adcl	616(%esp), %edi
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	$0, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	504(%esp), %ecx
-	movl	1176(%esp), %eax
-	movl	%eax, %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	504(%esp), %esi
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	508(%esp), %ecx
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %esi         # 4-byte Reload
-	adcl	524(%esp), %esi
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	adcl	548(%esp), %edi
-	movl	%edi, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	440(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	440(%esp), %edi
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %edi         # 4-byte Reload
-	adcl	452(%esp), %edi
-	adcl	456(%esp), %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %esi         # 4-byte Reload
-	adcl	464(%esp), %esi
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	376(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	376(%esp), %ebp
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	380(%esp), %ebp
-	adcl	384(%esp), %edi
-	movl	%edi, 140(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	172(%esp), %edi         # 4-byte Reload
-	adcl	392(%esp), %edi
-	adcl	396(%esp), %esi
-	movl	%esi, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %esi         # 4-byte Reload
-	adcl	412(%esp), %esi
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, 84(%esp)            # 4-byte Folded Spill
-	movl	%ebp, %eax
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	312(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	312(%esp), %ebp
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	320(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	adcl	324(%esp), %edi
-	movl	%edi, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %ecx         # 4-byte Reload
-	adcl	328(%esp), %ecx
-	movl	%ecx, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %ecx         # 4-byte Reload
-	adcl	332(%esp), %ecx
-	movl	%ecx, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %ecx         # 4-byte Reload
-	adcl	336(%esp), %ecx
-	movl	%ecx, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %ecx         # 4-byte Reload
-	adcl	340(%esp), %ecx
-	movl	%ecx, 168(%esp)         # 4-byte Spill
-	adcl	344(%esp), %esi
-	movl	%esi, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %ecx         # 4-byte Reload
-	adcl	348(%esp), %ecx
-	movl	%ecx, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %ebp         # 4-byte Reload
-	adcl	352(%esp), %ebp
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	356(%esp), %ecx
-	movl	%ecx, 136(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	360(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	364(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	368(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	372(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	$0, 104(%esp)           # 4-byte Folded Spill
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	$0, %esi
-	movl	%eax, %edi
-	imull	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	248(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	248(%esp), %edi
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	252(%esp), %ecx
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	adcl	284(%esp), %ebp
-	movl	%ebp, 148(%esp)         # 4-byte Spill
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	288(%esp), %edi
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	308(%esp), %ebp
-	adcl	$0, 108(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	1176(%esp), %edx
-	movl	92(%esp), %ebx          # 4-byte Reload
-	calll	.LmulPv480x32
-	addl	184(%esp), %esi
-	movl	172(%esp), %edx         # 4-byte Reload
-	adcl	188(%esp), %edx
-	movl	%edx, 172(%esp)         # 4-byte Spill
-	movl	180(%esp), %ecx         # 4-byte Reload
-	adcl	192(%esp), %ecx
-	movl	%ecx, 180(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	168(%esp), %esi         # 4-byte Reload
-	adcl	204(%esp), %esi
-	movl	%esi, 168(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	220(%esp), %edi
-	movl	%edi, 136(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	240(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %ebx          # 4-byte Reload
-	adcl	$0, %ebx
-	movl	%edx, %eax
-	subl	16(%esp), %edx          # 4-byte Folded Reload
-	sbbl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	176(%esp), %eax         # 4-byte Reload
-	sbbl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	164(%esp), %ebp         # 4-byte Reload
-	sbbl	12(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	144(%esp), %edi         # 4-byte Reload
-	sbbl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	160(%esp), %edi         # 4-byte Reload
-	sbbl	28(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	148(%esp), %edi         # 4-byte Reload
-	sbbl	32(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	136(%esp), %edi         # 4-byte Reload
-	sbbl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	152(%esp), %edi         # 4-byte Reload
-	sbbl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	132(%esp), %edi         # 4-byte Reload
-	sbbl	44(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 112(%esp)         # 4-byte Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	sbbl	48(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	sbbl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 124(%esp)         # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	sbbl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 140(%esp)         # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	sbbl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 156(%esp)         # 4-byte Spill
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	movl	%ebx, %edi
-	jne	.LBB228_2
-# BB#1:
-	movl	%edx, 172(%esp)         # 4-byte Spill
-.LBB228_2:
-	movl	1168(%esp), %edx
-	movl	172(%esp), %ebx         # 4-byte Reload
-	movl	%ebx, (%edx)
-	movl	%edi, %ebx
-	testb	%bl, %bl
-	jne	.LBB228_4
-# BB#3:
-	movl	%ecx, 180(%esp)         # 4-byte Spill
-.LBB228_4:
-	movl	180(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 4(%edx)
-	movl	176(%esp), %ecx         # 4-byte Reload
-	jne	.LBB228_6
-# BB#5:
-	movl	%eax, %ecx
-.LBB228_6:
-	movl	%ecx, 8(%edx)
-	movl	164(%esp), %eax         # 4-byte Reload
-	jne	.LBB228_8
-# BB#7:
-	movl	%ebp, %eax
-.LBB228_8:
-	movl	%eax, 12(%edx)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	148(%esp), %eax         # 4-byte Reload
-	movl	168(%esp), %ebp         # 4-byte Reload
-	jne	.LBB228_10
-# BB#9:
-	movl	%esi, %ebp
-.LBB228_10:
-	movl	%ebp, 16(%edx)
-	movl	152(%esp), %ebp         # 4-byte Reload
-	movl	144(%esp), %ebx         # 4-byte Reload
-	jne	.LBB228_12
-# BB#11:
-	movl	84(%esp), %ebx          # 4-byte Reload
-.LBB228_12:
-	movl	%ebx, 20(%edx)
-	movl	132(%esp), %ebx         # 4-byte Reload
-	movl	160(%esp), %edi         # 4-byte Reload
-	jne	.LBB228_14
-# BB#13:
-	movl	88(%esp), %edi          # 4-byte Reload
-.LBB228_14:
-	movl	%edi, 24(%edx)
-	movl	128(%esp), %edi         # 4-byte Reload
-	jne	.LBB228_16
-# BB#15:
-	movl	92(%esp), %eax          # 4-byte Reload
-.LBB228_16:
-	movl	%eax, 28(%edx)
-	movl	116(%esp), %esi         # 4-byte Reload
-	jne	.LBB228_18
-# BB#17:
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, 136(%esp)         # 4-byte Spill
-.LBB228_18:
-	movl	136(%esp), %eax         # 4-byte Reload
-	movl	%eax, 32(%edx)
-	jne	.LBB228_20
-# BB#19:
-	movl	100(%esp), %ebp         # 4-byte Reload
-.LBB228_20:
-	movl	%ebp, 36(%edx)
-	movl	104(%esp), %eax         # 4-byte Reload
-	jne	.LBB228_22
-# BB#21:
-	movl	112(%esp), %ebx         # 4-byte Reload
-.LBB228_22:
-	movl	%ebx, 40(%edx)
-	jne	.LBB228_24
-# BB#23:
-	movl	120(%esp), %edi         # 4-byte Reload
-.LBB228_24:
-	movl	%edi, 44(%edx)
-	jne	.LBB228_26
-# BB#25:
-	movl	124(%esp), %esi         # 4-byte Reload
-.LBB228_26:
-	movl	%esi, 48(%edx)
-	jne	.LBB228_28
-# BB#27:
-	movl	140(%esp), %eax         # 4-byte Reload
-.LBB228_28:
-	movl	%eax, 52(%edx)
-	jne	.LBB228_30
-# BB#29:
-	movl	156(%esp), %ecx         # 4-byte Reload
-.LBB228_30:
-	movl	%ecx, 56(%edx)
-	addl	$1148, %esp             # imm = 0x47C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end228:
-	.size	mcl_fp_montRed15L, .Lfunc_end228-mcl_fp_montRed15L
-
-	.globl	mcl_fp_addPre15L
-	.align	16, 0x90
-	.type	mcl_fp_addPre15L,@function
-mcl_fp_addPre15L:                       # @mcl_fp_addPre15L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %ebx
-	adcl	8(%ecx), %ebx
-	movl	16(%esp), %edi
-	movl	%edx, (%edi)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%edi)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%ebx, 8(%edi)
-	movl	20(%eax), %ebx
-	movl	%edx, 12(%edi)
-	movl	20(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	24(%eax), %ebx
-	movl	%esi, 16(%edi)
-	movl	24(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	28(%eax), %ebx
-	movl	%edx, 20(%edi)
-	movl	28(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	32(%eax), %ebx
-	movl	%esi, 24(%edi)
-	movl	32(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	36(%eax), %ebx
-	movl	%edx, 28(%edi)
-	movl	36(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	40(%eax), %ebx
-	movl	%esi, 32(%edi)
-	movl	40(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	44(%eax), %ebx
-	movl	%edx, 36(%edi)
-	movl	44(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	48(%eax), %ebx
-	movl	%esi, 40(%edi)
-	movl	48(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	52(%eax), %ebx
-	movl	%edx, 44(%edi)
-	movl	52(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	%esi, 48(%edi)
-	movl	%edx, 52(%edi)
-	movl	56(%eax), %eax
-	movl	56(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 56(%edi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end229:
-	.size	mcl_fp_addPre15L, .Lfunc_end229-mcl_fp_addPre15L
-
-	.globl	mcl_fp_subPre15L
-	.align	16, 0x90
-	.type	mcl_fp_subPre15L,@function
-mcl_fp_subPre15L:                       # @mcl_fp_subPre15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebp
-	sbbl	8(%edx), %ebp
-	movl	20(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebx)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebp, 8(%ebx)
-	movl	20(%edx), %ebp
-	movl	%esi, 12(%ebx)
-	movl	20(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	24(%edx), %ebp
-	movl	%edi, 16(%ebx)
-	movl	24(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	28(%edx), %ebp
-	movl	%esi, 20(%ebx)
-	movl	28(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	32(%edx), %ebp
-	movl	%edi, 24(%ebx)
-	movl	32(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	36(%edx), %ebp
-	movl	%esi, 28(%ebx)
-	movl	36(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	40(%edx), %ebp
-	movl	%edi, 32(%ebx)
-	movl	40(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	44(%edx), %ebp
-	movl	%esi, 36(%ebx)
-	movl	44(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	48(%edx), %ebp
-	movl	%edi, 40(%ebx)
-	movl	48(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	52(%edx), %ebp
-	movl	%esi, 44(%ebx)
-	movl	52(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	%edi, 48(%ebx)
-	movl	%esi, 52(%ebx)
-	movl	56(%edx), %edx
-	movl	56(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 56(%ebx)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end230:
-	.size	mcl_fp_subPre15L, .Lfunc_end230-mcl_fp_subPre15L
-
-	.globl	mcl_fp_shr1_15L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_15L,@function
-mcl_fp_shr1_15L:                        # @mcl_fp_shr1_15L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 40(%ecx)
-	movl	48(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 44(%ecx)
-	movl	52(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 48(%ecx)
-	movl	56(%eax), %eax
-	shrdl	$1, %eax, %esi
-	movl	%esi, 52(%ecx)
-	shrl	%eax
-	movl	%eax, 56(%ecx)
-	popl	%esi
-	retl
-.Lfunc_end231:
-	.size	mcl_fp_shr1_15L, .Lfunc_end231-mcl_fp_shr1_15L
-
-	.globl	mcl_fp_add15L
-	.align	16, 0x90
-	.type	mcl_fp_add15L,@function
-mcl_fp_add15L:                          # @mcl_fp_add15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$48, %esp
-	movl	76(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edx
-	movl	72(%esp), %eax
-	addl	(%eax), %esi
-	movl	%esi, 4(%esp)           # 4-byte Spill
-	adcl	4(%eax), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	8(%ecx), %edx
-	adcl	8(%eax), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	12(%eax), %esi
-	movl	16(%eax), %edx
-	adcl	12(%ecx), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	adcl	16(%ecx), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	20(%eax), %edx
-	adcl	20(%ecx), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	24(%eax), %edx
-	adcl	24(%ecx), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	28(%eax), %edx
-	adcl	28(%ecx), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	32(%eax), %edx
-	adcl	32(%ecx), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	36(%eax), %edx
-	adcl	36(%ecx), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	40(%eax), %edx
-	adcl	40(%ecx), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	44(%eax), %ebx
-	adcl	44(%ecx), %ebx
-	movl	%ebx, (%esp)            # 4-byte Spill
-	movl	48(%eax), %ebp
-	adcl	48(%ecx), %ebp
-	movl	52(%eax), %edi
-	adcl	52(%ecx), %edi
-	movl	56(%eax), %edx
-	adcl	56(%ecx), %edx
-	movl	68(%esp), %ecx
-	movl	4(%esp), %eax           # 4-byte Reload
-	movl	%eax, (%ecx)
-	movl	44(%esp), %esi          # 4-byte Reload
-	movl	%esi, 4(%ecx)
-	movl	40(%esp), %esi          # 4-byte Reload
-	movl	%esi, 8(%ecx)
-	movl	36(%esp), %esi          # 4-byte Reload
-	movl	%esi, 12(%ecx)
-	movl	32(%esp), %esi          # 4-byte Reload
-	movl	%esi, 16(%ecx)
-	movl	28(%esp), %esi          # 4-byte Reload
-	movl	%esi, 20(%ecx)
-	movl	24(%esp), %esi          # 4-byte Reload
-	movl	%esi, 24(%ecx)
-	movl	20(%esp), %esi          # 4-byte Reload
-	movl	%esi, 28(%ecx)
-	movl	16(%esp), %esi          # 4-byte Reload
-	movl	%esi, 32(%ecx)
-	movl	12(%esp), %esi          # 4-byte Reload
-	movl	%esi, 36(%ecx)
-	movl	8(%esp), %esi           # 4-byte Reload
-	movl	%esi, 40(%ecx)
-	movl	%ebx, 44(%ecx)
-	movl	%ebp, 48(%ecx)
-	movl	%edi, 52(%ecx)
-	movl	%edx, 56(%ecx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	80(%esp), %esi
-	subl	(%esi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	sbbl	4(%esi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	40(%esp), %edx          # 4-byte Reload
-	sbbl	8(%esi), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %edx          # 4-byte Reload
-	sbbl	12(%esi), %edx
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %edx          # 4-byte Reload
-	sbbl	16(%esi), %edx
-	movl	%edx, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %edx          # 4-byte Reload
-	sbbl	20(%esi), %edx
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %edx          # 4-byte Reload
-	sbbl	24(%esi), %edx
-	movl	%edx, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %edx          # 4-byte Reload
-	sbbl	28(%esi), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %edx          # 4-byte Reload
-	sbbl	32(%esi), %edx
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %edx          # 4-byte Reload
-	sbbl	36(%esi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	8(%esp), %edx           # 4-byte Reload
-	sbbl	40(%esi), %edx
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	(%esp), %edx            # 4-byte Reload
-	sbbl	44(%esi), %edx
-	movl	%edx, (%esp)            # 4-byte Spill
-	sbbl	48(%esi), %ebp
-	sbbl	52(%esi), %edi
-	sbbl	56(%esi), %eax
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB232_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %edx           # 4-byte Reload
-	movl	%edx, (%ecx)
-	movl	44(%esp), %edx          # 4-byte Reload
-	movl	%edx, 4(%ecx)
-	movl	40(%esp), %edx          # 4-byte Reload
-	movl	%edx, 8(%ecx)
-	movl	36(%esp), %edx          # 4-byte Reload
-	movl	%edx, 12(%ecx)
-	movl	32(%esp), %edx          # 4-byte Reload
-	movl	%edx, 16(%ecx)
-	movl	28(%esp), %edx          # 4-byte Reload
-	movl	%edx, 20(%ecx)
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	%edx, 24(%ecx)
-	movl	20(%esp), %edx          # 4-byte Reload
-	movl	%edx, 28(%ecx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	movl	%edx, 32(%ecx)
-	movl	12(%esp), %edx          # 4-byte Reload
-	movl	%edx, 36(%ecx)
-	movl	8(%esp), %edx           # 4-byte Reload
-	movl	%edx, 40(%ecx)
-	movl	(%esp), %edx            # 4-byte Reload
-	movl	%edx, 44(%ecx)
-	movl	%ebp, 48(%ecx)
-	movl	%edi, 52(%ecx)
-	movl	%eax, 56(%ecx)
-.LBB232_2:                              # %carry
-	addl	$48, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end232:
-	.size	mcl_fp_add15L, .Lfunc_end232-mcl_fp_add15L
-
-	.globl	mcl_fp_addNF15L
-	.align	16, 0x90
-	.type	mcl_fp_addNF15L,@function
-mcl_fp_addNF15L:                        # @mcl_fp_addNF15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$120, %esp
-	movl	148(%esp), %ecx
-	movl	(%ecx), %eax
-	movl	4(%ecx), %edx
-	movl	144(%esp), %esi
-	addl	(%esi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	4(%esi), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	56(%ecx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	52(%ecx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	44(%ecx), %ebp
-	movl	40(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	28(%ecx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	24(%ecx), %eax
-	movl	20(%ecx), %ebx
-	movl	16(%ecx), %edi
-	movl	12(%ecx), %edx
-	movl	8(%ecx), %ecx
-	adcl	8(%esi), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	12(%esi), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ebx
-	movl	%ebx, 72(%esp)          # 4-byte Spill
-	adcl	24(%esi), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	28(%esi), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	32(%esi), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esi), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	40(%esi), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	44(%esi), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	48(%esi), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	52(%esi), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	56(%esi), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	152(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	subl	(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	sbbl	4(%esi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	8(%esi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%esi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	sbbl	16(%esi), %edi
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	sbbl	20(%esi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	24(%esi), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	sbbl	28(%esi), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	sbbl	32(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	36(%esi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	sbbl	40(%esi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	movl	%edx, %eax
-	sbbl	44(%esi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, %edi
-	sbbl	48(%esi), %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %edi
-	movl	%ecx, %ebx
-	sbbl	52(%esi), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, %edi
-	sbbl	56(%esi), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	sarl	$31, %esi
-	testl	%esi, %esi
-	movl	80(%esp), %esi          # 4-byte Reload
-	js	.LBB233_2
-# BB#1:
-	movl	(%esp), %esi            # 4-byte Reload
-.LBB233_2:
-	movl	140(%esp), %edi
-	movl	%esi, (%edi)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	js	.LBB233_4
-# BB#3:
-	movl	4(%esp), %ecx           # 4-byte Reload
-.LBB233_4:
-	movl	%ecx, 4(%edi)
-	movl	104(%esp), %ecx         # 4-byte Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	js	.LBB233_6
-# BB#5:
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-.LBB233_6:
-	movl	76(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 8(%edi)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB233_8
-# BB#7:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB233_8:
-	movl	%eax, 12(%edi)
-	movl	%ebx, %ebp
-	movl	%edx, %eax
-	movl	68(%esp), %edx          # 4-byte Reload
-	js	.LBB233_10
-# BB#9:
-	movl	16(%esp), %edx          # 4-byte Reload
-.LBB233_10:
-	movl	%edx, 16(%edi)
-	movl	112(%esp), %edx         # 4-byte Reload
-	movl	108(%esp), %ebx         # 4-byte Reload
-	js	.LBB233_12
-# BB#11:
-	movl	20(%esp), %esi          # 4-byte Reload
-.LBB233_12:
-	movl	%esi, 20(%edi)
-	js	.LBB233_14
-# BB#13:
-	movl	24(%esp), %esi          # 4-byte Reload
-	movl	%esi, 88(%esp)          # 4-byte Spill
-.LBB233_14:
-	movl	88(%esp), %esi          # 4-byte Reload
-	movl	%esi, 24(%edi)
-	js	.LBB233_16
-# BB#15:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB233_16:
-	movl	%ecx, 28(%edi)
-	js	.LBB233_18
-# BB#17:
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-.LBB233_18:
-	movl	116(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 32(%edi)
-	js	.LBB233_20
-# BB#19:
-	movl	36(%esp), %ebx          # 4-byte Reload
-.LBB233_20:
-	movl	%ebx, 36(%edi)
-	js	.LBB233_22
-# BB#21:
-	movl	40(%esp), %edx          # 4-byte Reload
-.LBB233_22:
-	movl	%edx, 40(%edi)
-	js	.LBB233_24
-# BB#23:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB233_24:
-	movl	%eax, 44(%edi)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB233_26
-# BB#25:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB233_26:
-	movl	%eax, 48(%edi)
-	js	.LBB233_28
-# BB#27:
-	movl	52(%esp), %ebp          # 4-byte Reload
-.LBB233_28:
-	movl	%ebp, 52(%edi)
-	movl	100(%esp), %eax         # 4-byte Reload
-	js	.LBB233_30
-# BB#29:
-	movl	56(%esp), %eax          # 4-byte Reload
-.LBB233_30:
-	movl	%eax, 56(%edi)
-	addl	$120, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end233:
-	.size	mcl_fp_addNF15L, .Lfunc_end233-mcl_fp_addNF15L
-
-	.globl	mcl_fp_sub15L
-	.align	16, 0x90
-	.type	mcl_fp_sub15L,@function
-mcl_fp_sub15L:                          # @mcl_fp_sub15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$56, %esp
-	movl	80(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	84(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	sbbl	32(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	36(%esi), %eax
-	sbbl	36(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	40(%esi), %edx
-	sbbl	40(%edi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	44(%esi), %ecx
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	48(%esi), %eax
-	sbbl	48(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	52(%esi), %ebp
-	sbbl	52(%edi), %ebp
-	movl	56(%esi), %esi
-	sbbl	56(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	76(%esp), %ebx
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%ebx)
-	movl	%edx, 40(%ebx)
-	movl	%ecx, 44(%ebx)
-	movl	%eax, 48(%ebx)
-	movl	%ebp, 52(%ebx)
-	movl	%esi, 56(%ebx)
-	je	.LBB234_2
-# BB#1:                                 # %carry
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	88(%esp), %esi
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	adcl	8(%esi), %edi
-	movl	12(%esi), %eax
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	40(%esi), %ecx
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	44(%esi), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 40(%ebx)
-	movl	48(%esi), %ecx
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 44(%ebx)
-	movl	%ecx, 48(%ebx)
-	movl	52(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 52(%ebx)
-	movl	56(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 56(%ebx)
-.LBB234_2:                              # %nocarry
-	addl	$56, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end234:
-	.size	mcl_fp_sub15L, .Lfunc_end234-mcl_fp_sub15L
-
-	.globl	mcl_fp_subNF15L
-	.align	16, 0x90
-	.type	mcl_fp_subNF15L,@function
-mcl_fp_subNF15L:                        # @mcl_fp_subNF15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$96, %esp
-	movl	120(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edx
-	movl	124(%esp), %edi
-	subl	(%edi), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	56(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	52(%ecx), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	48(%ecx), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	36(%ecx), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	32(%ecx), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	28(%ecx), %ebp
-	movl	24(%ecx), %ebx
-	movl	20(%ecx), %esi
-	movl	16(%ecx), %edx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 40(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	sbbl	28(%edi), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%edi), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	48(%edi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	52(%edi), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	sbbl	56(%edi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%eax, %ebp
-	sarl	$31, %ebp
-	movl	%ebp, %edi
-	shldl	$1, %eax, %edi
-	movl	128(%esp), %edx
-	andl	(%edx), %edi
-	movl	56(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	52(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	48(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	44(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	40(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	36(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	28(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	24(%edx), %eax
-	andl	%ebp, %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	20(%edx), %ebx
-	andl	%ebp, %ebx
-	movl	16(%edx), %esi
-	andl	%ebp, %esi
-	movl	12(%edx), %ecx
-	andl	%ebp, %ecx
-	movl	8(%edx), %eax
-	andl	%ebp, %eax
-	andl	4(%edx), %ebp
-	addl	60(%esp), %edi          # 4-byte Folded Reload
-	adcl	64(%esp), %ebp          # 4-byte Folded Reload
-	movl	116(%esp), %edx
-	movl	%edi, (%edx)
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 4(%edx)
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 8(%edx)
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%ecx, 12(%edx)
-	adcl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%esi, 16(%edx)
-	movl	(%esp), %ecx            # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ebx, 20(%edx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%edx)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	adcl	92(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%edx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%edx)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 36(%edx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 40(%edx)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 44(%edx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 48(%edx)
-	movl	%eax, 52(%edx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%edx)
-	addl	$96, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end235:
-	.size	mcl_fp_subNF15L, .Lfunc_end235-mcl_fp_subNF15L
-
-	.globl	mcl_fpDbl_add15L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add15L,@function
-mcl_fpDbl_add15L:                       # @mcl_fpDbl_add15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$108, %esp
-	movl	136(%esp), %ecx
-	movl	132(%esp), %edx
-	movl	12(%edx), %edi
-	movl	16(%edx), %esi
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%edx), %ebp
-	movl	128(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%edx), %ebp
-	adcl	8(%edx), %ebx
-	adcl	12(%ecx), %edi
-	adcl	16(%ecx), %esi
-	movl	%ebp, 4(%eax)
-	movl	68(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%edi, 12(%eax)
-	movl	20(%edx), %edi
-	adcl	%ebx, %edi
-	movl	24(%ecx), %ebx
-	movl	%esi, 16(%eax)
-	movl	24(%edx), %esi
-	adcl	%ebx, %esi
-	movl	28(%ecx), %ebx
-	movl	%edi, 20(%eax)
-	movl	28(%edx), %edi
-	adcl	%ebx, %edi
-	movl	32(%ecx), %ebx
-	movl	%esi, 24(%eax)
-	movl	32(%edx), %esi
-	adcl	%ebx, %esi
-	movl	36(%ecx), %ebx
-	movl	%edi, 28(%eax)
-	movl	36(%edx), %edi
-	adcl	%ebx, %edi
-	movl	40(%ecx), %ebx
-	movl	%esi, 32(%eax)
-	movl	40(%edx), %esi
-	adcl	%ebx, %esi
-	movl	44(%ecx), %ebx
-	movl	%edi, 36(%eax)
-	movl	44(%edx), %edi
-	adcl	%ebx, %edi
-	movl	48(%ecx), %ebx
-	movl	%esi, 40(%eax)
-	movl	48(%edx), %esi
-	adcl	%ebx, %esi
-	movl	52(%ecx), %ebx
-	movl	%edi, 44(%eax)
-	movl	52(%edx), %edi
-	adcl	%ebx, %edi
-	movl	56(%ecx), %ebx
-	movl	%esi, 48(%eax)
-	movl	56(%edx), %esi
-	adcl	%ebx, %esi
-	movl	60(%ecx), %ebx
-	movl	%edi, 52(%eax)
-	movl	60(%edx), %edi
-	adcl	%ebx, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	64(%ecx), %edi
-	movl	%esi, 56(%eax)
-	movl	64(%edx), %eax
-	adcl	%edi, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%edx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	72(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	76(%ecx), %esi
-	movl	76(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	80(%ecx), %esi
-	movl	80(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	84(%ecx), %esi
-	movl	84(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%ecx), %esi
-	movl	88(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	92(%ecx), %esi
-	movl	92(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	96(%ecx), %esi
-	movl	96(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	100(%ecx), %esi
-	movl	100(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%ecx), %eax
-	movl	104(%edx), %esi
-	adcl	%eax, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	108(%ecx), %edi
-	movl	108(%edx), %eax
-	adcl	%edi, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	112(%ecx), %ebx
-	movl	112(%edx), %edi
-	adcl	%ebx, %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	116(%ecx), %ecx
-	movl	116(%edx), %edx
-	adcl	%ecx, %edx
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	140(%esp), %ebp
-	movl	76(%esp), %ecx          # 4-byte Reload
-	subl	(%ebp), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	sbbl	4(%ebp), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	8(%ebp), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	12(%ebp), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	16(%ebp), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	sbbl	20(%ebp), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	sbbl	24(%ebp), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	sbbl	28(%ebp), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	sbbl	32(%ebp), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	sbbl	36(%ebp), %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	sbbl	40(%ebp), %ecx
-	sbbl	44(%ebp), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	sbbl	48(%ebp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	%edi, %eax
-	movl	%edx, %edi
-	sbbl	52(%ebp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%edi, %esi
-	sbbl	56(%ebp), %esi
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB236_2
-# BB#1:
-	movl	%esi, %edi
-.LBB236_2:
-	testb	%bl, %bl
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	72(%esp), %esi          # 4-byte Reload
-	movl	68(%esp), %ebx          # 4-byte Reload
-	movl	64(%esp), %ebp          # 4-byte Reload
-	jne	.LBB236_4
-# BB#3:
-	movl	%ecx, %esi
-	movl	(%esp), %ebx            # 4-byte Reload
-	movl	4(%esp), %ebp           # 4-byte Reload
-	movl	8(%esp), %eax           # 4-byte Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB236_4:
-	movl	128(%esp), %edx
-	movl	%eax, 60(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	%eax, 64(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	movl	%eax, 68(%edx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	movl	%eax, 72(%edx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	%eax, 76(%edx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%edx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	movl	%eax, 84(%edx)
-	movl	104(%esp), %eax         # 4-byte Reload
-	movl	%eax, 88(%edx)
-	movl	%ebp, 92(%edx)
-	movl	%ebx, 96(%edx)
-	movl	%esi, 100(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	jne	.LBB236_6
-# BB#5:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB236_6:
-	movl	%eax, 104(%edx)
-	movl	60(%esp), %ecx          # 4-byte Reload
-	movl	56(%esp), %eax          # 4-byte Reload
-	jne	.LBB236_8
-# BB#7:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB236_8:
-	movl	%eax, 108(%edx)
-	jne	.LBB236_10
-# BB#9:
-	movl	48(%esp), %ecx          # 4-byte Reload
-.LBB236_10:
-	movl	%ecx, 112(%edx)
-	movl	%edi, 116(%edx)
-	addl	$108, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end236:
-	.size	mcl_fpDbl_add15L, .Lfunc_end236-mcl_fpDbl_add15L
-
-	.globl	mcl_fpDbl_sub15L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub15L,@function
-mcl_fpDbl_sub15L:                       # @mcl_fpDbl_sub15L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$100, %esp
-	movl	124(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	128(%esp), %ebp
-	subl	(%ebp), %edx
-	sbbl	4(%ebp), %esi
-	movl	8(%eax), %edi
-	sbbl	8(%ebp), %edi
-	movl	120(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	12(%eax), %edx
-	sbbl	12(%ebp), %edx
-	movl	%esi, 4(%ecx)
-	movl	16(%eax), %esi
-	sbbl	16(%ebp), %esi
-	movl	%edi, 8(%ecx)
-	movl	20(%ebp), %edi
-	movl	%edx, 12(%ecx)
-	movl	20(%eax), %edx
-	sbbl	%edi, %edx
-	movl	24(%ebp), %edi
-	movl	%esi, 16(%ecx)
-	movl	24(%eax), %esi
-	sbbl	%edi, %esi
-	movl	28(%ebp), %edi
-	movl	%edx, 20(%ecx)
-	movl	28(%eax), %edx
-	sbbl	%edi, %edx
-	movl	32(%ebp), %edi
-	movl	%esi, 24(%ecx)
-	movl	32(%eax), %esi
-	sbbl	%edi, %esi
-	movl	36(%ebp), %edi
-	movl	%edx, 28(%ecx)
-	movl	36(%eax), %edx
-	sbbl	%edi, %edx
-	movl	40(%ebp), %edi
-	movl	%esi, 32(%ecx)
-	movl	40(%eax), %esi
-	sbbl	%edi, %esi
-	movl	44(%ebp), %edi
-	movl	%edx, 36(%ecx)
-	movl	44(%eax), %edx
-	sbbl	%edi, %edx
-	movl	48(%ebp), %edi
-	movl	%esi, 40(%ecx)
-	movl	48(%eax), %esi
-	sbbl	%edi, %esi
-	movl	52(%ebp), %edi
-	movl	%edx, 44(%ecx)
-	movl	52(%eax), %edx
-	sbbl	%edi, %edx
-	movl	56(%ebp), %edi
-	movl	%esi, 48(%ecx)
-	movl	56(%eax), %esi
-	sbbl	%edi, %esi
-	movl	60(%ebp), %edi
-	movl	%edx, 52(%ecx)
-	movl	60(%eax), %edx
-	sbbl	%edi, %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	64(%ebp), %edx
-	movl	%esi, 56(%ecx)
-	movl	64(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	68(%ebp), %edx
-	movl	68(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	72(%ebp), %edx
-	movl	72(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	76(%ebp), %edx
-	movl	76(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	80(%ebp), %edx
-	movl	80(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	84(%ebp), %edx
-	movl	84(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	88(%ebp), %edx
-	movl	88(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	92(%ebp), %edx
-	movl	92(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	96(%ebp), %edx
-	movl	96(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	100(%ebp), %edx
-	movl	100(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	104(%ebp), %edx
-	movl	104(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	108(%ebp), %edx
-	movl	108(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	112(%ebp), %edx
-	movl	112(%eax), %esi
-	sbbl	%edx, %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	116(%ebp), %edx
-	movl	116(%eax), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	132(%esp), %esi
-	jne	.LBB237_1
-# BB#2:
-	movl	$0, 60(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_3
-.LBB237_1:
-	movl	56(%esi), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-.LBB237_3:
-	testb	%al, %al
-	jne	.LBB237_4
-# BB#5:
-	movl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	$0, %ebx
-	jmp	.LBB237_6
-.LBB237_4:
-	movl	(%esi), %ebx
-	movl	4(%esi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-.LBB237_6:
-	jne	.LBB237_7
-# BB#8:
-	movl	$0, 32(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_9
-.LBB237_7:
-	movl	52(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-.LBB237_9:
-	jne	.LBB237_10
-# BB#11:
-	movl	$0, 28(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_12
-.LBB237_10:
-	movl	48(%esi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-.LBB237_12:
-	jne	.LBB237_13
-# BB#14:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_15
-.LBB237_13:
-	movl	44(%esi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB237_15:
-	jne	.LBB237_16
-# BB#17:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_18
-.LBB237_16:
-	movl	40(%esi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB237_18:
-	jne	.LBB237_19
-# BB#20:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB237_21
-.LBB237_19:
-	movl	36(%esi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB237_21:
-	jne	.LBB237_22
-# BB#23:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB237_24
-.LBB237_22:
-	movl	32(%esi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB237_24:
-	jne	.LBB237_25
-# BB#26:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB237_27
-.LBB237_25:
-	movl	28(%esi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB237_27:
-	jne	.LBB237_28
-# BB#29:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB237_30
-.LBB237_28:
-	movl	24(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB237_30:
-	jne	.LBB237_31
-# BB#32:
-	movl	$0, %edx
-	jmp	.LBB237_33
-.LBB237_31:
-	movl	20(%esi), %edx
-.LBB237_33:
-	jne	.LBB237_34
-# BB#35:
-	movl	$0, %ebp
-	jmp	.LBB237_36
-.LBB237_34:
-	movl	16(%esi), %ebp
-.LBB237_36:
-	jne	.LBB237_37
-# BB#38:
-	movl	$0, %eax
-	jmp	.LBB237_39
-.LBB237_37:
-	movl	12(%esi), %eax
-.LBB237_39:
-	jne	.LBB237_40
-# BB#41:
-	xorl	%esi, %esi
-	jmp	.LBB237_42
-.LBB237_40:
-	movl	8(%esi), %esi
-.LBB237_42:
-	addl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	24(%esp), %edi          # 4-byte Reload
-	adcl	36(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 60(%ecx)
-	adcl	40(%esp), %esi          # 4-byte Folded Reload
-	movl	%edi, 64(%ecx)
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 68(%ecx)
-	adcl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 72(%ecx)
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebp, 76(%ecx)
-	movl	(%esp), %esi            # 4-byte Reload
-	adcl	64(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 80(%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 84(%ecx)
-	movl	8(%esp), %edx           # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 88(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 92(%ecx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 96(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 100(%ecx)
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 104(%ecx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 108(%ecx)
-	movl	%eax, 112(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%ecx)
-	addl	$100, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end237:
-	.size	mcl_fpDbl_sub15L, .Lfunc_end237-mcl_fpDbl_sub15L
-
-	.align	16, 0x90
-	.type	.LmulPv512x32,@function
-.LmulPv512x32:                          # @mulPv512x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$112, %esp
-	movl	%edx, %ebp
-	movl	132(%esp), %ebx
-	movl	%ebx, %eax
-	mull	60(%ebp)
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	%ebx, %eax
-	mull	56(%ebp)
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	52(%ebp)
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	48(%ebp)
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	44(%ebp)
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	40(%ebp)
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	36(%ebp)
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	32(%ebp)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	28(%ebp)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	24(%ebp)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	20(%ebp)
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	16(%ebp)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	12(%ebp)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	8(%ebp)
-	movl	%edx, %esi
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	4(%ebp)
-	movl	%edx, %edi
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebx, %eax
-	mull	(%ebp)
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 8(%ecx)
-	adcl	8(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 12(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%ecx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%ecx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%ecx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%ecx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%ecx)
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 64(%ecx)
-	movl	%ecx, %eax
-	addl	$112, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end238:
-	.size	.LmulPv512x32, .Lfunc_end238-.LmulPv512x32
-
-	.globl	mcl_fp_mulUnitPre16L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre16L,@function
-mcl_fp_mulUnitPre16L:                   # @mcl_fp_mulUnitPre16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$124, %esp
-	calll	.L239$pb
-.L239$pb:
-	popl	%ebx
-.Ltmp50:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp50-.L239$pb), %ebx
-	movl	152(%esp), %eax
-	movl	%eax, (%esp)
-	leal	56(%esp), %ecx
-	movl	148(%esp), %edx
-	calll	.LmulPv512x32
-	movl	120(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp
-	movl	72(%esp), %ebx
-	movl	68(%esp), %edi
-	movl	64(%esp), %esi
-	movl	56(%esp), %edx
-	movl	60(%esp), %ecx
-	movl	144(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 64(%eax)
-	addl	$124, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end239:
-	.size	mcl_fp_mulUnitPre16L, .Lfunc_end239-mcl_fp_mulUnitPre16L
-
-	.globl	mcl_fpDbl_mulPre16L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre16L,@function
-mcl_fpDbl_mulPre16L:                    # @mcl_fpDbl_mulPre16L
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$300, %esp              # imm = 0x12C
-	calll	.L240$pb
-.L240$pb:
-	popl	%ebx
-.Ltmp51:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp51-.L240$pb), %ebx
-	movl	%ebx, -224(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	12(%ebp), %esi
-	movl	%esi, 4(%esp)
-	movl	8(%ebp), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre8L@PLT
-	leal	32(%edi), %eax
-	movl	%eax, 8(%esp)
-	leal	32(%esi), %eax
-	movl	%eax, 4(%esp)
-	movl	8(%ebp), %eax
-	leal	64(%eax), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre8L@PLT
-	movl	52(%esi), %ebx
-	movl	48(%esi), %eax
-	movl	44(%esi), %ecx
-	movl	40(%esi), %edx
-	movl	%edx, -176(%ebp)        # 4-byte Spill
-	movl	(%esi), %edi
-	movl	4(%esi), %edx
-	addl	32(%esi), %edi
-	movl	%edi, -184(%ebp)        # 4-byte Spill
-	movl	%esi, %edi
-	adcl	36(%edi), %edx
-	movl	%edx, -236(%ebp)        # 4-byte Spill
-	movl	-176(%ebp), %edx        # 4-byte Reload
-	adcl	8(%edi), %edx
-	movl	%edx, -176(%ebp)        # 4-byte Spill
-	adcl	12(%edi), %ecx
-	movl	%ecx, -232(%ebp)        # 4-byte Spill
-	adcl	16(%edi), %eax
-	movl	%eax, -212(%ebp)        # 4-byte Spill
-	adcl	20(%edi), %ebx
-	movl	%ebx, -228(%ebp)        # 4-byte Spill
-	movl	56(%edi), %eax
-	adcl	24(%edi), %eax
-	movl	%eax, -248(%ebp)        # 4-byte Spill
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %ecx
-	popl	%eax
-	movl	%ecx, -144(%ebp)        # 4-byte Spill
-	movl	16(%ebp), %esi
-	movl	(%esi), %ecx
-	addl	32(%esi), %ecx
-	movl	%ecx, -188(%ebp)        # 4-byte Spill
-	movl	4(%esi), %ecx
-	adcl	36(%esi), %ecx
-	movl	%ecx, -192(%ebp)        # 4-byte Spill
-	movl	40(%esi), %ecx
-	adcl	8(%esi), %ecx
-	movl	%ecx, -196(%ebp)        # 4-byte Spill
-	movl	44(%esi), %ecx
-	adcl	12(%esi), %ecx
-	movl	%ecx, -200(%ebp)        # 4-byte Spill
-	movl	48(%esi), %ecx
-	adcl	16(%esi), %ecx
-	movl	%ecx, -204(%ebp)        # 4-byte Spill
-	movl	52(%esi), %ecx
-	adcl	20(%esi), %ecx
-	movl	%ecx, -208(%ebp)        # 4-byte Spill
-	movl	56(%esi), %edx
-	adcl	24(%esi), %edx
-	movl	60(%esi), %ecx
-	adcl	28(%esi), %ecx
-	pushl	%eax
-	seto	%al
-	lahf
-	movl	%eax, %ebx
-	popl	%eax
-	movl	%ebx, -252(%ebp)        # 4-byte Spill
-	movl	-212(%ebp), %ebx        # 4-byte Reload
-	movl	-176(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -216(%ebp)        # 4-byte Spill
-	movl	-184(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -220(%ebp)        # 4-byte Spill
-	jb	.LBB240_2
-# BB#1:
-	xorl	%eax, %eax
-	xorl	%ebx, %ebx
-	movl	$0, -216(%ebp)          # 4-byte Folded Spill
-	movl	$0, -220(%ebp)          # 4-byte Folded Spill
-.LBB240_2:
-	movl	%ebx, -244(%ebp)        # 4-byte Spill
-	movl	%eax, -240(%ebp)        # 4-byte Spill
-	movl	60(%edi), %eax
-	movl	-144(%ebp), %ebx        # 4-byte Reload
-	pushl	%eax
-	movl	%ebx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	28(%edi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	movl	%ecx, -172(%ebp)        # 4-byte Spill
-	movl	%edx, -144(%ebp)        # 4-byte Spill
-	movl	-208(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	movl	-204(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	movl	-200(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	-196(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	movl	-192(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	movl	-188(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	jb	.LBB240_4
-# BB#3:
-	movl	$0, -172(%ebp)          # 4-byte Folded Spill
-	movl	$0, -144(%ebp)          # 4-byte Folded Spill
-	movl	$0, -148(%ebp)          # 4-byte Folded Spill
-	movl	$0, -152(%ebp)          # 4-byte Folded Spill
-	movl	$0, -156(%ebp)          # 4-byte Folded Spill
-	movl	$0, -160(%ebp)          # 4-byte Folded Spill
-	movl	$0, -164(%ebp)          # 4-byte Folded Spill
-	movl	$0, -168(%ebp)          # 4-byte Folded Spill
-.LBB240_4:
-	movl	-184(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -108(%ebp)
-	movl	-236(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -104(%ebp)
-	movl	-176(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -100(%ebp)
-	movl	-232(%ebp), %edi        # 4-byte Reload
-	movl	%edi, -96(%ebp)
-	movl	-212(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -92(%ebp)
-	movl	-228(%ebp), %esi        # 4-byte Reload
-	movl	%esi, -88(%ebp)
-	movl	-248(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -84(%ebp)
-	movl	-188(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -140(%ebp)
-	movl	-192(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -136(%ebp)
-	movl	-196(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -132(%ebp)
-	movl	-200(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -128(%ebp)
-	movl	-204(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -124(%ebp)
-	movl	-208(%ebp), %ebx        # 4-byte Reload
-	movl	%ebx, -120(%ebp)
-	movl	%esi, %ebx
-	movl	%edi, %esi
-	movl	%eax, %edi
-	movl	%edx, -116(%ebp)
-	movl	%ecx, -112(%ebp)
-	sbbl	%edx, %edx
-	movl	-180(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -80(%ebp)
-	movl	-252(%ebp), %ecx        # 4-byte Reload
-	pushl	%eax
-	movl	%ecx, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB240_6
-# BB#5:
-	movl	$0, %eax
-	movl	$0, %ebx
-	movl	$0, %esi
-	movl	$0, %edi
-.LBB240_6:
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	sbbl	%eax, %eax
-	leal	-140(%ebp), %ecx
-	movl	%ecx, 8(%esp)
-	leal	-108(%ebp), %ecx
-	movl	%ecx, 4(%esp)
-	leal	-76(%ebp), %ecx
-	movl	%ecx, (%esp)
-	andl	%eax, %edx
-	movl	-220(%ebp), %eax        # 4-byte Reload
-	addl	%eax, -168(%ebp)        # 4-byte Folded Spill
-	adcl	%edi, -164(%ebp)        # 4-byte Folded Spill
-	movl	-216(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	adcl	%esi, -156(%ebp)        # 4-byte Folded Spill
-	movl	-244(%ebp), %eax        # 4-byte Reload
-	adcl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	adcl	%ebx, -148(%ebp)        # 4-byte Folded Spill
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	-240(%ebp), %eax        # 4-byte Folded Reload
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	movl	-172(%ebp), %edi        # 4-byte Reload
-	adcl	-180(%ebp), %edi        # 4-byte Folded Reload
-	sbbl	%esi, %esi
-	andl	$1, %esi
-	andl	$1, %edx
-	movl	%edx, -176(%ebp)        # 4-byte Spill
-	movl	-224(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre8L@PLT
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	addl	-44(%ebp), %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	adcl	-40(%ebp), %eax
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	adcl	-36(%ebp), %eax
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	-32(%ebp), %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	-152(%ebp), %eax        # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	adcl	-16(%ebp), %edi
-	movl	%edi, -172(%ebp)        # 4-byte Spill
-	adcl	%esi, -176(%ebp)        # 4-byte Folded Spill
-	movl	-76(%ebp), %eax
-	movl	8(%ebp), %esi
-	subl	(%esi), %eax
-	movl	%eax, -196(%ebp)        # 4-byte Spill
-	movl	-72(%ebp), %ecx
-	sbbl	4(%esi), %ecx
-	movl	-68(%ebp), %eax
-	sbbl	8(%esi), %eax
-	movl	%eax, -192(%ebp)        # 4-byte Spill
-	movl	-64(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	-60(%ebp), %ebx
-	sbbl	16(%esi), %ebx
-	movl	-56(%ebp), %eax
-	sbbl	20(%esi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	movl	-52(%ebp), %eax
-	sbbl	24(%esi), %eax
-	movl	%eax, -184(%ebp)        # 4-byte Spill
-	movl	-48(%ebp), %eax
-	sbbl	28(%esi), %eax
-	movl	%eax, -188(%ebp)        # 4-byte Spill
-	movl	32(%esi), %eax
-	movl	%eax, -200(%ebp)        # 4-byte Spill
-	sbbl	%eax, -168(%ebp)        # 4-byte Folded Spill
-	movl	36(%esi), %eax
-	movl	%eax, -204(%ebp)        # 4-byte Spill
-	sbbl	%eax, -164(%ebp)        # 4-byte Folded Spill
-	movl	40(%esi), %eax
-	movl	%eax, -208(%ebp)        # 4-byte Spill
-	sbbl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	movl	44(%esi), %eax
-	movl	%eax, -212(%ebp)        # 4-byte Spill
-	sbbl	%eax, -156(%ebp)        # 4-byte Folded Spill
-	movl	48(%esi), %eax
-	movl	%eax, -216(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	movl	52(%esi), %eax
-	movl	%eax, -220(%ebp)        # 4-byte Spill
-	sbbl	%eax, -148(%ebp)        # 4-byte Folded Spill
-	movl	56(%esi), %eax
-	movl	%eax, -224(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %edi        # 4-byte Reload
-	sbbl	%eax, %edi
-	movl	60(%esi), %eax
-	movl	%eax, -228(%ebp)        # 4-byte Spill
-	sbbl	%eax, -172(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, -176(%ebp)          # 4-byte Folded Spill
-	movl	64(%esi), %eax
-	movl	%eax, -260(%ebp)        # 4-byte Spill
-	subl	%eax, -196(%ebp)        # 4-byte Folded Spill
-	movl	68(%esi), %eax
-	movl	%eax, -264(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ecx
-	movl	72(%esi), %eax
-	movl	%eax, -268(%ebp)        # 4-byte Spill
-	sbbl	%eax, -192(%ebp)        # 4-byte Folded Spill
-	movl	76(%esi), %eax
-	movl	%eax, -272(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edx
-	movl	80(%esi), %eax
-	movl	%eax, -276(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ebx
-	movl	84(%esi), %eax
-	movl	%eax, -280(%ebp)        # 4-byte Spill
-	sbbl	%eax, -180(%ebp)        # 4-byte Folded Spill
-	movl	88(%esi), %eax
-	movl	%eax, -284(%ebp)        # 4-byte Spill
-	sbbl	%eax, -184(%ebp)        # 4-byte Folded Spill
-	movl	92(%esi), %eax
-	movl	%eax, -288(%ebp)        # 4-byte Spill
-	sbbl	%eax, -188(%ebp)        # 4-byte Folded Spill
-	movl	96(%esi), %eax
-	movl	%eax, -292(%ebp)        # 4-byte Spill
-	sbbl	%eax, -168(%ebp)        # 4-byte Folded Spill
-	movl	100(%esi), %eax
-	movl	%eax, -236(%ebp)        # 4-byte Spill
-	sbbl	%eax, -164(%ebp)        # 4-byte Folded Spill
-	movl	104(%esi), %eax
-	movl	%eax, -240(%ebp)        # 4-byte Spill
-	sbbl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	movl	108(%esi), %eax
-	movl	%eax, -244(%ebp)        # 4-byte Spill
-	sbbl	%eax, -156(%ebp)        # 4-byte Folded Spill
-	movl	112(%esi), %eax
-	movl	%eax, -248(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	movl	116(%esi), %eax
-	movl	%eax, -252(%ebp)        # 4-byte Spill
-	sbbl	%eax, -148(%ebp)        # 4-byte Folded Spill
-	movl	120(%esi), %eax
-	movl	%eax, -232(%ebp)        # 4-byte Spill
-	sbbl	%eax, %edi
-	movl	%edi, -144(%ebp)        # 4-byte Spill
-	movl	124(%esi), %eax
-	movl	%eax, -256(%ebp)        # 4-byte Spill
-	sbbl	%eax, -172(%ebp)        # 4-byte Folded Spill
-	movl	-176(%ebp), %edi        # 4-byte Reload
-	sbbl	$0, %edi
-	movl	-196(%ebp), %eax        # 4-byte Reload
-	addl	-200(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-204(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 32(%esi)
-	movl	-192(%ebp), %eax        # 4-byte Reload
-	adcl	-208(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 36(%esi)
-	adcl	-212(%ebp), %edx        # 4-byte Folded Reload
-	movl	%eax, 40(%esi)
-	adcl	-216(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%edx, 44(%esi)
-	movl	-180(%ebp), %eax        # 4-byte Reload
-	adcl	-220(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ebx, 48(%esi)
-	movl	-184(%ebp), %ecx        # 4-byte Reload
-	adcl	-224(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 52(%esi)
-	movl	-188(%ebp), %edx        # 4-byte Reload
-	adcl	-228(%ebp), %edx        # 4-byte Folded Reload
-	movl	%ecx, 56(%esi)
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	adcl	-260(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edx, 60(%esi)
-	movl	-164(%ebp), %ecx        # 4-byte Reload
-	adcl	-264(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 64(%esi)
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	adcl	-268(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 68(%esi)
-	movl	-156(%ebp), %ecx        # 4-byte Reload
-	adcl	-272(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 72(%esi)
-	movl	-152(%ebp), %eax        # 4-byte Reload
-	adcl	-276(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 76(%esi)
-	movl	-148(%ebp), %ecx        # 4-byte Reload
-	adcl	-280(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 80(%esi)
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	-284(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 84(%esi)
-	movl	-172(%ebp), %ecx        # 4-byte Reload
-	adcl	-288(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 88(%esi)
-	adcl	-292(%ebp), %edi        # 4-byte Folded Reload
-	movl	%ecx, 92(%esi)
-	movl	%edi, 96(%esi)
-	movl	-236(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 100(%esi)
-	movl	-240(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 104(%esi)
-	movl	-244(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 108(%esi)
-	movl	-248(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 112(%esi)
-	movl	-252(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 116(%esi)
-	movl	-232(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 120(%esi)
-	movl	-256(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 124(%esi)
-	addl	$300, %esp              # imm = 0x12C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end240:
-	.size	mcl_fpDbl_mulPre16L, .Lfunc_end240-mcl_fpDbl_mulPre16L
-
-	.globl	mcl_fpDbl_sqrPre16L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre16L,@function
-mcl_fpDbl_sqrPre16L:                    # @mcl_fpDbl_sqrPre16L
-# BB#0:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$300, %esp              # imm = 0x12C
-	calll	.L241$pb
-.L241$pb:
-	popl	%ebx
-.Ltmp52:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp52-.L241$pb), %ebx
-	movl	%ebx, -184(%ebp)        # 4-byte Spill
-	movl	12(%ebp), %edi
-	movl	%edi, 8(%esp)
-	movl	%edi, 4(%esp)
-	movl	8(%ebp), %esi
-	movl	%esi, (%esp)
-	calll	mcl_fpDbl_mulPre8L@PLT
-	leal	32(%edi), %eax
-	movl	%eax, 8(%esp)
-	movl	%eax, 4(%esp)
-	leal	64(%esi), %eax
-	movl	%eax, (%esp)
-	calll	mcl_fpDbl_mulPre8L@PLT
-	movl	52(%edi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	movl	48(%edi), %eax
-	movl	44(%edi), %ebx
-	movl	40(%edi), %esi
-	movl	(%edi), %ecx
-	movl	4(%edi), %edx
-	addl	32(%edi), %ecx
-	movl	%ecx, -192(%ebp)        # 4-byte Spill
-	adcl	36(%edi), %edx
-	movl	%edx, -196(%ebp)        # 4-byte Spill
-	adcl	8(%edi), %esi
-	movl	%esi, -188(%ebp)        # 4-byte Spill
-	adcl	12(%edi), %ebx
-	adcl	16(%edi), %eax
-	movl	%eax, -208(%ebp)        # 4-byte Spill
-	movl	-180(%ebp), %eax        # 4-byte Reload
-	adcl	20(%edi), %eax
-	movl	%eax, -180(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	addl	%ecx, %ecx
-	movl	%ecx, -164(%ebp)        # 4-byte Spill
-	adcl	%edx, %edx
-	movl	%edx, -160(%ebp)        # 4-byte Spill
-	adcl	%esi, %esi
-	movl	%esi, -156(%ebp)        # 4-byte Spill
-	movl	%ebx, %edx
-	movl	%ebx, %esi
-	adcl	%edx, %edx
-	movl	%edx, -152(%ebp)        # 4-byte Spill
-	movl	-208(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %edx
-	movl	%eax, %ebx
-	adcl	%edx, %edx
-	movl	%edx, -148(%ebp)        # 4-byte Spill
-	movl	-180(%ebp), %edx        # 4-byte Reload
-	adcl	%edx, %edx
-	movl	%edx, -144(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	movl	56(%edi), %edx
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	adcl	24(%edi), %edx
-	movl	60(%edi), %ecx
-	adcl	28(%edi), %ecx
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -200(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %eax
-	movl	%eax, -204(%ebp)        # 4-byte Spill
-	seto	%al
-	lahf
-	movl	%eax, %edi
-	sbbl	%eax, %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB241_2
-# BB#1:
-	movl	$0, -144(%ebp)          # 4-byte Folded Spill
-	movl	$0, -148(%ebp)          # 4-byte Folded Spill
-	movl	$0, -152(%ebp)          # 4-byte Folded Spill
-	movl	$0, -156(%ebp)          # 4-byte Folded Spill
-	movl	$0, -160(%ebp)          # 4-byte Folded Spill
-	movl	$0, -164(%ebp)          # 4-byte Folded Spill
-.LBB241_2:
-	movl	%edx, %eax
-	movl	-172(%ebp), %edi        # 4-byte Reload
-	pushl	%eax
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	adcl	%eax, %eax
-	movl	%ecx, %edi
-	adcl	%edi, %edi
-	movl	%edi, -176(%ebp)        # 4-byte Spill
-	movl	-204(%ebp), %edi        # 4-byte Reload
-	pushl	%eax
-	movl	%edi, %eax
-	addb	$127, %al
-	sahf
-	popl	%eax
-	jb	.LBB241_4
-# BB#3:
-	movl	$0, -176(%ebp)          # 4-byte Folded Spill
-	xorl	%eax, %eax
-.LBB241_4:
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	movl	-192(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -108(%ebp)
-	movl	%eax, -140(%ebp)
-	movl	-196(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -104(%ebp)
-	movl	%eax, -136(%ebp)
-	movl	-188(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -100(%ebp)
-	movl	%eax, -132(%ebp)
-	movl	%esi, -96(%ebp)
-	movl	%esi, -128(%ebp)
-	movl	%ebx, -92(%ebp)
-	movl	%ebx, -124(%ebp)
-	movl	-180(%ebp), %eax        # 4-byte Reload
-	movl	%eax, -88(%ebp)
-	movl	%eax, -120(%ebp)
-	movl	%edx, -84(%ebp)
-	movl	%edx, -116(%ebp)
-	movl	%ecx, -80(%ebp)
-	movl	%ecx, -112(%ebp)
-	movl	-200(%ebp), %eax        # 4-byte Reload
-	movl	%eax, %eax
-	addb	$127, %al
-	sahf
-	jb	.LBB241_5
-# BB#6:
-	xorl	%edi, %edi
-	jmp	.LBB241_7
-.LBB241_5:
-	shrl	$31, %ecx
-	movl	%ecx, %edi
-.LBB241_7:
-	leal	-140(%ebp), %eax
-	movl	%eax, 8(%esp)
-	leal	-108(%ebp), %eax
-	movl	%eax, 4(%esp)
-	leal	-76(%ebp), %eax
-	movl	%eax, (%esp)
-	movl	-168(%ebp), %esi        # 4-byte Reload
-	andl	$1, %esi
-	movl	-184(%ebp), %ebx        # 4-byte Reload
-	calll	mcl_fpDbl_mulPre8L@PLT
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	addl	-44(%ebp), %eax
-	movl	%eax, -164(%ebp)        # 4-byte Spill
-	movl	-160(%ebp), %eax        # 4-byte Reload
-	adcl	-40(%ebp), %eax
-	movl	%eax, -160(%ebp)        # 4-byte Spill
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	-36(%ebp), %eax
-	movl	%eax, -156(%ebp)        # 4-byte Spill
-	movl	-152(%ebp), %eax        # 4-byte Reload
-	adcl	-32(%ebp), %eax
-	movl	%eax, -152(%ebp)        # 4-byte Spill
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-28(%ebp), %eax
-	movl	%eax, -148(%ebp)        # 4-byte Spill
-	movl	-144(%ebp), %eax        # 4-byte Reload
-	adcl	-24(%ebp), %eax
-	movl	%eax, -144(%ebp)        # 4-byte Spill
-	movl	-172(%ebp), %eax        # 4-byte Reload
-	adcl	-20(%ebp), %eax
-	movl	%eax, -172(%ebp)        # 4-byte Spill
-	movl	-176(%ebp), %eax        # 4-byte Reload
-	adcl	-16(%ebp), %eax
-	adcl	%edi, %esi
-	movl	%esi, -168(%ebp)        # 4-byte Spill
-	movl	-76(%ebp), %ecx
-	movl	8(%ebp), %esi
-	subl	(%esi), %ecx
-	movl	%ecx, -180(%ebp)        # 4-byte Spill
-	movl	-72(%ebp), %edi
-	sbbl	4(%esi), %edi
-	movl	-68(%ebp), %edx
-	sbbl	8(%esi), %edx
-	movl	%edx, -184(%ebp)        # 4-byte Spill
-	movl	-64(%ebp), %edx
-	sbbl	12(%esi), %edx
-	movl	%edx, -192(%ebp)        # 4-byte Spill
-	movl	-60(%ebp), %ebx
-	sbbl	16(%esi), %ebx
-	movl	%eax, %ecx
-	movl	-56(%ebp), %eax
-	sbbl	20(%esi), %eax
-	movl	%eax, -196(%ebp)        # 4-byte Spill
-	movl	-52(%ebp), %edx
-	sbbl	24(%esi), %edx
-	movl	%edx, -188(%ebp)        # 4-byte Spill
-	movl	-48(%ebp), %edx
-	sbbl	28(%esi), %edx
-	movl	32(%esi), %eax
-	movl	%eax, -200(%ebp)        # 4-byte Spill
-	sbbl	%eax, -164(%ebp)        # 4-byte Folded Spill
-	movl	36(%esi), %eax
-	movl	%eax, -204(%ebp)        # 4-byte Spill
-	sbbl	%eax, -160(%ebp)        # 4-byte Folded Spill
-	movl	40(%esi), %eax
-	movl	%eax, -208(%ebp)        # 4-byte Spill
-	sbbl	%eax, -156(%ebp)        # 4-byte Folded Spill
-	movl	44(%esi), %eax
-	movl	%eax, -212(%ebp)        # 4-byte Spill
-	sbbl	%eax, -152(%ebp)        # 4-byte Folded Spill
-	movl	48(%esi), %eax
-	movl	%eax, -216(%ebp)        # 4-byte Spill
-	sbbl	%eax, -148(%ebp)        # 4-byte Folded Spill
-	movl	52(%esi), %eax
-	movl	%eax, -220(%ebp)        # 4-byte Spill
-	sbbl	%eax, -144(%ebp)        # 4-byte Folded Spill
-	movl	56(%esi), %eax
-	movl	%eax, -224(%ebp)        # 4-byte Spill
-	sbbl	%eax, -172(%ebp)        # 4-byte Folded Spill
-	movl	60(%esi), %eax
-	movl	%eax, -228(%ebp)        # 4-byte Spill
-	sbbl	%eax, %ecx
-	movl	%ecx, -176(%ebp)        # 4-byte Spill
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	sbbl	$0, %eax
-	movl	64(%esi), %ecx
-	movl	%ecx, -260(%ebp)        # 4-byte Spill
-	subl	%ecx, -180(%ebp)        # 4-byte Folded Spill
-	movl	68(%esi), %ecx
-	movl	%ecx, -264(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edi
-	movl	72(%esi), %ecx
-	movl	%ecx, -268(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -184(%ebp)        # 4-byte Folded Spill
-	movl	76(%esi), %ecx
-	movl	%ecx, -272(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -192(%ebp)        # 4-byte Folded Spill
-	movl	80(%esi), %ecx
-	movl	%ecx, -276(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %ebx
-	movl	84(%esi), %ecx
-	movl	%ecx, -280(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -196(%ebp)        # 4-byte Folded Spill
-	movl	88(%esi), %ecx
-	movl	%ecx, -284(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -188(%ebp)        # 4-byte Folded Spill
-	movl	92(%esi), %ecx
-	movl	%ecx, -288(%ebp)        # 4-byte Spill
-	sbbl	%ecx, %edx
-	movl	96(%esi), %ecx
-	movl	%ecx, -292(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -164(%ebp)        # 4-byte Folded Spill
-	movl	100(%esi), %ecx
-	movl	%ecx, -232(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -160(%ebp)        # 4-byte Folded Spill
-	movl	104(%esi), %ecx
-	movl	%ecx, -236(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -156(%ebp)        # 4-byte Folded Spill
-	movl	108(%esi), %ecx
-	movl	%ecx, -240(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -152(%ebp)        # 4-byte Folded Spill
-	movl	112(%esi), %ecx
-	movl	%ecx, -244(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -148(%ebp)        # 4-byte Folded Spill
-	movl	116(%esi), %ecx
-	movl	%ecx, -248(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -144(%ebp)        # 4-byte Folded Spill
-	movl	120(%esi), %ecx
-	movl	%ecx, -252(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -172(%ebp)        # 4-byte Folded Spill
-	movl	124(%esi), %ecx
-	movl	%ecx, -256(%ebp)        # 4-byte Spill
-	sbbl	%ecx, -176(%ebp)        # 4-byte Folded Spill
-	sbbl	$0, %eax
-	movl	%eax, -168(%ebp)        # 4-byte Spill
-	movl	-180(%ebp), %eax        # 4-byte Reload
-	addl	-200(%ebp), %eax        # 4-byte Folded Reload
-	adcl	-204(%ebp), %edi        # 4-byte Folded Reload
-	movl	%eax, 32(%esi)
-	movl	-184(%ebp), %eax        # 4-byte Reload
-	adcl	-208(%ebp), %eax        # 4-byte Folded Reload
-	movl	%edi, 36(%esi)
-	movl	-192(%ebp), %ecx        # 4-byte Reload
-	adcl	-212(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 40(%esi)
-	adcl	-216(%ebp), %ebx        # 4-byte Folded Reload
-	movl	%ecx, 44(%esi)
-	movl	-196(%ebp), %ecx        # 4-byte Reload
-	adcl	-220(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%ebx, 48(%esi)
-	movl	-188(%ebp), %eax        # 4-byte Reload
-	adcl	-224(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	movl	%edx, %ecx
-	adcl	-228(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 56(%esi)
-	movl	-164(%ebp), %eax        # 4-byte Reload
-	adcl	-260(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 60(%esi)
-	movl	-160(%ebp), %ecx        # 4-byte Reload
-	adcl	-264(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 64(%esi)
-	movl	-156(%ebp), %eax        # 4-byte Reload
-	adcl	-268(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 68(%esi)
-	movl	-152(%ebp), %ecx        # 4-byte Reload
-	adcl	-272(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 72(%esi)
-	movl	-148(%ebp), %eax        # 4-byte Reload
-	adcl	-276(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 76(%esi)
-	movl	-144(%ebp), %ecx        # 4-byte Reload
-	adcl	-280(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 80(%esi)
-	movl	-172(%ebp), %eax        # 4-byte Reload
-	adcl	-284(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 84(%esi)
-	movl	-176(%ebp), %ecx        # 4-byte Reload
-	adcl	-288(%ebp), %ecx        # 4-byte Folded Reload
-	movl	%eax, 88(%esi)
-	movl	-168(%ebp), %eax        # 4-byte Reload
-	adcl	-292(%ebp), %eax        # 4-byte Folded Reload
-	movl	%ecx, 92(%esi)
-	movl	%eax, 96(%esi)
-	movl	-232(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 100(%esi)
-	movl	-236(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 104(%esi)
-	movl	-240(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 108(%esi)
-	movl	-244(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 112(%esi)
-	movl	-248(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 116(%esi)
-	movl	-252(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 120(%esi)
-	movl	-256(%ebp), %eax        # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 124(%esi)
-	addl	$300, %esp              # imm = 0x12C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end241:
-	.size	mcl_fpDbl_sqrPre16L, .Lfunc_end241-mcl_fpDbl_sqrPre16L
-
-	.globl	mcl_fp_mont16L
-	.align	16, 0x90
-	.type	mcl_fp_mont16L,@function
-mcl_fp_mont16L:                         # @mcl_fp_mont16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2428, %esp             # imm = 0x97C
-	calll	.L242$pb
-.L242$pb:
-	popl	%ebx
-.Ltmp53:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp53-.L242$pb), %ebx
-	movl	2460(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2360(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	2360(%esp), %ebp
-	movl	2364(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	2424(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	2420(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	2416(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	2412(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2408(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	2404(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2400(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2396(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	2392(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	2388(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2384(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	2380(%esp), %edi
-	movl	2376(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2372(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2368(%esp), %esi
-	movl	%eax, (%esp)
-	leal	2288(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	addl	2288(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2292(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	2296(%esp), %esi
-	movl	%esi, %ebp
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2300(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2304(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2308(%esp), %edi
-	movl	%edi, %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2312(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2316(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2320(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2328(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2332(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2336(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2340(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2344(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2348(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2352(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	2456(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2216(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %edi
-	movl	112(%esp), %ecx         # 4-byte Reload
-	addl	2216(%esp), %ecx
-	adcl	2220(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2224(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2228(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2232(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	adcl	2236(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2244(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2248(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2252(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2256(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2260(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2268(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2272(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2276(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	2280(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2144(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %edi
-	addl	2144(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2148(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2152(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2156(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2160(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	2164(%esp), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	2168(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2176(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2180(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2184(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2188(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2192(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2196(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2200(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2204(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	2208(%esp), %esi
-	adcl	$0, %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2072(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	112(%esp), %ecx         # 4-byte Reload
-	addl	2072(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2076(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2080(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2084(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2088(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	2092(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2096(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2100(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2104(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	2108(%esp), %ebp
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2112(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	2116(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2120(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2124(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2128(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	2132(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2136(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2000(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	movl	112(%esp), %ecx         # 4-byte Reload
-	addl	2000(%esp), %ecx
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	2004(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2008(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2012(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	2016(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2020(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2024(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	2028(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	2032(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	2036(%esp), %ebp
-	movl	%ebp, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	2040(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	2044(%esp), %edi
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	2048(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	2052(%esp), %ebp
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	2056(%esp), %esi
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	2060(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	2064(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1928(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	100(%esp), %ecx         # 4-byte Reload
-	addl	1928(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1932(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1936(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1940(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1944(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1948(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1952(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1956(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1960(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1964(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1968(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1972(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	1976(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	adcl	1980(%esp), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1984(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1988(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1992(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1856(%esp), %ecx
-	movl	2460(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	1856(%esp), %esi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1860(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1864(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1868(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1872(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1876(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1880(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1884(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1888(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	1892(%esp), %esi
-	adcl	1896(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	1900(%esp), %ebp
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1904(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1908(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1912(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1916(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1920(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1784(%esp), %ecx
-	movl	2452(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	1784(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1792(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	1804(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1808(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1816(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1820(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	1824(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1828(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1832(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1836(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1840(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1844(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1848(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1712(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1712(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1716(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1720(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1724(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1728(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1732(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1736(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1740(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	1744(%esp), %edi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1748(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1752(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1756(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1760(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	1764(%esp), %ebp
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	1768(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1772(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1776(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1640(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	1640(%esp), %ecx
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1644(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1648(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1652(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1656(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1660(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1664(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1668(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1672(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1676(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1680(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1684(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	1688(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	adcl	1692(%esp), %esi
-	movl	%esi, %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1696(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1700(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1704(%esp), %esi
-	sbbl	%eax, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1568(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	80(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1568(%esp), %ebp
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1572(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1576(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1580(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1584(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	1588(%esp), %ebp
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1592(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1596(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1600(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1604(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1608(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1612(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1616(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	1620(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1624(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1628(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	1632(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1496(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	1496(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1500(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	1504(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1508(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1512(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	1516(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1520(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1528(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1532(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1540(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1544(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1548(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1552(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1424(%esp), %ecx
-	movl	2460(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	andl	$1, %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	addl	1424(%esp), %eax
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1428(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1432(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1436(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1440(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1444(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1448(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1452(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1456(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1460(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1464(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	1472(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1476(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	1480(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1484(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1488(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ebp
-	movl	2456(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1352(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	1352(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1360(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1376(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1380(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1384(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1396(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1400(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	1404(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1408(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1412(%esp), %esi
-	adcl	1416(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1280(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %ebp
-	movl	%ebp, %eax
-	addl	1280(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1284(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	1288(%esp), %ebp
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1296(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1300(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1304(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1308(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1312(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1316(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1320(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1324(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1328(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1332(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1336(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	1340(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1344(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	2456(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	2452(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	1208(%esp), %ecx
-	adcl	1212(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1216(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1232(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1240(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1244(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1248(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1252(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1256(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1260(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1264(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1272(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1136(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	1136(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1148(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %edi         # 4-byte Reload
-	adcl	1164(%esp), %edi
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	1188(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1192(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1200(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1064(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	1064(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1080(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1084(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	1088(%esp), %edi
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	1092(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1116(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	992(%esp), %ebp
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	996(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1000(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1004(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1008(%esp), %edi
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	1012(%esp), %ebp
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1016(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	adcl	1020(%esp), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1024(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1028(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	1032(%esp), %esi
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1036(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1040(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1044(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1048(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1052(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1056(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	920(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	920(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	928(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	932(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	adcl	936(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	956(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	968(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	848(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	848(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	856(%esp), %edi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %esi         # 4-byte Reload
-	adcl	868(%esp), %esi
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	896(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	776(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	776(%esp), %ecx
-	adcl	780(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	784(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	792(%esp), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	800(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	704(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	712(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	%ebp, %esi
-	adcl	728(%esp), %esi
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	732(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	752(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	92(%esp), %ecx          # 4-byte Reload
-	addl	632(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	640(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	652(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	656(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	664(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	676(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	680(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	92(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	560(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	576(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	592(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	608(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	612(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	488(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	488(%esp), %ecx
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	500(%esp), %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	508(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	516(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	520(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	536(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	andl	$1, %ecx
-	addl	416(%esp), %edi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	432(%esp), %edi
-	adcl	436(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	440(%esp), %esi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	444(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	448(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	472(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	116(%esp), %ecx         # 4-byte Reload
-	addl	344(%esp), %ecx
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	348(%esp), %ebp
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	356(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	360(%esp), %edi
-	adcl	364(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	2460(%esp), %edx
-	calll	.LmulPv512x32
-	movl	116(%esp), %ecx         # 4-byte Reload
-	andl	$1, %ecx
-	addl	272(%esp), %esi
-	adcl	276(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	280(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	288(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	296(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	308(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	2456(%esp), %eax
-	movl	60(%eax), %eax
-	movl	%eax, (%esp)
-	leal	200(%esp), %ecx
-	movl	2452(%esp), %edx
-	calll	.LmulPv512x32
-	movl	120(%esp), %ecx         # 4-byte Reload
-	addl	200(%esp), %ecx
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	212(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	220(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	232(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	244(%esp), %edi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	2460(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	andl	$1, %ebp
-	addl	128(%esp), %esi
-	movl	104(%esp), %ebx         # 4-byte Reload
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	132(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	136(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	140(%esp), %ebx
-	movl	%ebx, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %edx         # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	148(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	152(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	156(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	160(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	164(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	168(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	172(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	176(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	180(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	184(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	188(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	116(%esp), %edx         # 4-byte Reload
-	adcl	192(%esp), %edx
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%eax, %edx
-	movl	2460(%esp), %edi
-	subl	(%edi), %edx
-	movl	%ecx, %eax
-	sbbl	4(%edi), %eax
-	movl	%ebx, %ecx
-	sbbl	8(%edi), %ecx
-	movl	112(%esp), %ebx         # 4-byte Reload
-	sbbl	12(%edi), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebx         # 4-byte Reload
-	sbbl	16(%edi), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%edi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	sbbl	28(%edi), %esi
-	movl	%esi, 28(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	sbbl	32(%edi), %esi
-	movl	%esi, 32(%esp)          # 4-byte Spill
-	movl	60(%esp), %esi          # 4-byte Reload
-	sbbl	36(%edi), %esi
-	movl	%esi, 36(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	sbbl	40(%edi), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	sbbl	44(%edi), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	sbbl	48(%edi), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	sbbl	52(%edi), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	sbbl	56(%edi), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	sbbl	60(%edi), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	124(%esp), %edi         # 4-byte Reload
-	sbbl	$0, %ebp
-	andl	$1, %ebp
-	movl	%ebp, %ebx
-	jne	.LBB242_2
-# BB#1:
-	movl	%edx, %edi
-.LBB242_2:
-	movl	2448(%esp), %edx
-	movl	%edi, (%edx)
-	testb	%bl, %bl
-	movl	108(%esp), %edi         # 4-byte Reload
-	jne	.LBB242_4
-# BB#3:
-	movl	%eax, %edi
-.LBB242_4:
-	movl	%edi, 4(%edx)
-	jne	.LBB242_6
-# BB#5:
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-.LBB242_6:
-	movl	104(%esp), %eax         # 4-byte Reload
-	movl	%eax, 8(%edx)
-	jne	.LBB242_8
-# BB#7:
-	movl	12(%esp), %eax          # 4-byte Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-.LBB242_8:
-	movl	112(%esp), %eax         # 4-byte Reload
-	movl	%eax, 12(%edx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	jne	.LBB242_10
-# BB#9:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB242_10:
-	movl	%eax, 16(%edx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_12
-# BB#11:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB242_12:
-	movl	%eax, 20(%edx)
-	jne	.LBB242_14
-# BB#13:
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-.LBB242_14:
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	%eax, 24(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_16
-# BB#15:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB242_16:
-	movl	%eax, 28(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_18
-# BB#17:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB242_18:
-	movl	%eax, 32(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_20
-# BB#19:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB242_20:
-	movl	%eax, 36(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_22
-# BB#21:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB242_22:
-	movl	%eax, 40(%edx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_24
-# BB#23:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB242_24:
-	movl	%eax, 44(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_26
-# BB#25:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB242_26:
-	movl	%eax, 48(%edx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_28
-# BB#27:
-	movl	52(%esp), %eax          # 4-byte Reload
-.LBB242_28:
-	movl	%eax, 52(%edx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	jne	.LBB242_30
-# BB#29:
-	movl	56(%esp), %eax          # 4-byte Reload
-.LBB242_30:
-	movl	%eax, 56(%edx)
-	movl	116(%esp), %eax         # 4-byte Reload
-	jne	.LBB242_32
-# BB#31:
-	movl	120(%esp), %eax         # 4-byte Reload
-.LBB242_32:
-	movl	%eax, 60(%edx)
-	addl	$2428, %esp             # imm = 0x97C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end242:
-	.size	mcl_fp_mont16L, .Lfunc_end242-mcl_fp_mont16L
-
-	.globl	mcl_fp_montNF16L
-	.align	16, 0x90
-	.type	mcl_fp_montNF16L,@function
-mcl_fp_montNF16L:                       # @mcl_fp_montNF16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2412, %esp             # imm = 0x96C
-	calll	.L243$pb
-.L243$pb:
-	popl	%ebx
-.Ltmp54:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp54-.L243$pb), %ebx
-	movl	2444(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2344(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	2344(%esp), %edi
-	movl	2348(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	2408(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2404(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	2400(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	2396(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2392(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2388(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2384(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	2380(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	2376(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	2372(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	2368(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	2364(%esp), %ebp
-	movl	2360(%esp), %esi
-	movl	2356(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2352(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	2272(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	2272(%esp), %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2276(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2280(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2284(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	2288(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	2292(%esp), %ebp
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2296(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2300(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	2304(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	2308(%esp), %edi
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2312(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2316(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2320(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2324(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	2328(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2332(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2336(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2200(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	2264(%esp), %edx
-	movl	108(%esp), %ecx         # 4-byte Reload
-	addl	2200(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2204(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2208(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2212(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	2216(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2220(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2224(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	2228(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	2232(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	2236(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2244(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2248(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	2252(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2256(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	2260(%esp), %esi
-	adcl	$0, %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2128(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	2128(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2132(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2136(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2140(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2144(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2148(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2152(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	2156(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	2160(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	2164(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2168(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2172(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2176(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2180(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2184(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	2188(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	2192(%esp), %esi
-	movl	2440(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2056(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	2120(%esp), %eax
-	movl	84(%esp), %edx          # 4-byte Reload
-	addl	2056(%esp), %edx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	2060(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2064(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	2068(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2072(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	2076(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	adcl	2080(%esp), %edi
-	movl	%edi, %ebp
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	2084(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	2088(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2092(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2096(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	2100(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	2104(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	2108(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	2112(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	adcl	2116(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1984(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1984(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1988(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1992(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1996(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2000(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	2004(%esp), %edi
-	adcl	2008(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	2012(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2016(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2020(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2024(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2028(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2032(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	2036(%esp), %ebp
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	2040(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2044(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2048(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1912(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1976(%esp), %eax
-	movl	76(%esp), %edx          # 4-byte Reload
-	addl	1912(%esp), %edx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1916(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1920(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1924(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	1928(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	1932(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1936(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1940(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1944(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	1948(%esp), %edi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1952(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1956(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1960(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	adcl	1964(%esp), %esi
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1968(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1972(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1840(%esp), %ecx
-	movl	2444(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	addl	1840(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1844(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1848(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1852(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1856(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1860(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1864(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1868(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1872(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1876(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1880(%esp), %edi
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1884(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1888(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1892(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1896(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1900(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1904(%esp), %esi
-	movl	2440(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1768(%esp), %ecx
-	movl	2436(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv512x32
-	movl	1832(%esp), %edx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	1768(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1772(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1776(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1780(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1784(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1788(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1792(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1804(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	adcl	1808(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1816(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1820(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1824(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1828(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1696(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1696(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1700(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1704(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1708(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1712(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1716(%esp), %ebp
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	1720(%esp), %edi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1724(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1728(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1732(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1736(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	adcl	1740(%esp), %esi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1744(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1748(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1752(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1756(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1760(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1624(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1688(%esp), %edx
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	1624(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1628(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1632(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1636(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	1640(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	1644(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	1648(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1652(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1656(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1660(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1664(%esp), %esi
-	movl	%esi, %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1668(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1672(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1676(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1680(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1684(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1552(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1552(%esp), %esi
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	1556(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1564(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1568(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1572(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1576(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1580(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1584(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1588(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	1592(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1596(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	1600(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1604(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1608(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1612(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	1616(%esp), %edi
-	movl	2440(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1480(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1544(%esp), %eax
-	addl	1480(%esp), %esi
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	1484(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	1488(%esp), %edx
-	movl	%edx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1492(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	1496(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	1500(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1504(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	1508(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	1512(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	1516(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	1520(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	adcl	1524(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1528(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	1532(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	1536(%esp), %ebp
-	adcl	1540(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	%eax, %edi
-	adcl	$0, %edi
-	movl	%esi, %eax
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1408(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1408(%esp), %esi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1412(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %esi          # 4-byte Reload
-	adcl	1416(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1424(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1428(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1432(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1436(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1440(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1444(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1448(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1452(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1456(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1460(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1464(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	1468(%esp), %ebp
-	adcl	1472(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1336(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1400(%esp), %eax
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	1336(%esp), %ecx
-	adcl	1340(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1344(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	1348(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	1352(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1356(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	1360(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	1364(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	1368(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	1372(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %edx         # 4-byte Reload
-	adcl	1376(%esp), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	1380(%esp), %edi
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1384(%esp), %esi
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	1388(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	1392(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	1396(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1264(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1264(%esp), %ebp
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1272(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1288(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1308(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	adcl	1312(%esp), %esi
-	movl	%esi, %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1320(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1324(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1328(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1192(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1256(%esp), %eax
-	movl	48(%esp), %ecx          # 4-byte Reload
-	addl	1192(%esp), %ecx
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1196(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	1200(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	1204(%esp), %esi
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1208(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	1212(%esp), %edi
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	1216(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	1220(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	1224(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %edx         # 4-byte Reload
-	adcl	1228(%esp), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1232(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	adcl	1236(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	1240(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	1244(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	1248(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	1252(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1120(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	1120(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	1132(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	1140(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1144(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1148(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	1176(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1048(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	1112(%esp), %edx
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	1048(%esp), %ecx
-	movl	56(%esp), %esi          # 4-byte Reload
-	adcl	1052(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	1068(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1072(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	1076(%esp), %ebp
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1080(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1084(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1088(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1092(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1100(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	976(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	976(%esp), %edi
-	adcl	980(%esp), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	1000(%esp), %edi
-	adcl	1004(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	1008(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	1016(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	904(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	968(%esp), %ecx
-	movl	56(%esp), %eax          # 4-byte Reload
-	addl	904(%esp), %eax
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	908(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	912(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	916(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	920(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	adcl	924(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	928(%esp), %edi
-	adcl	932(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	936(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	adcl	940(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	944(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	948(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	952(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	956(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	960(%esp), %ebp
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	964(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	832(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	832(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	856(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	872(%esp), %esi
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	876(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	888(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	892(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	760(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	824(%esp), %edx
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	760(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	796(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	800(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	808(%esp), %edi
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	adcl	816(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	688(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	688(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	716(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	732(%esp), %ebp
-	adcl	736(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	616(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	680(%esp), %edx
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	616(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	624(%esp), %edi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	640(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	656(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	672(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	544(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	544(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	552(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	560(%esp), %edi
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	564(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	588(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	592(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	596(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	600(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	472(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	536(%esp), %edx
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	472(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	484(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	adcl	488(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %ebp          # 4-byte Reload
-	adcl	496(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	400(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	400(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	412(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	420(%esp), %edi
-	adcl	424(%esp), %ebp
-	movl	%ebp, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	440(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	444(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	452(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	460(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	464(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	328(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	392(%esp), %edx
-	movl	92(%esp), %ecx          # 4-byte Reload
-	addl	328(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	336(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	344(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	352(%esp), %esi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	356(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	368(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	256(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	256(%esp), %ebp
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	260(%esp), %edi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	268(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	280(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	284(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	316(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	2440(%esp), %eax
-	movl	60(%eax), %eax
-	movl	%eax, (%esp)
-	leal	184(%esp), %ecx
-	movl	2436(%esp), %edx
-	calll	.LmulPv512x32
-	movl	248(%esp), %edx
-	movl	%edi, %ecx
-	addl	184(%esp), %ecx
-	movl	100(%esp), %edi         # 4-byte Reload
-	adcl	188(%esp), %edi
-	adcl	192(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	84(%esp), %ebp          # 4-byte Reload
-	adcl	196(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	204(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	208(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	112(%esp), %ecx
-	movl	2444(%esp), %edx
-	calll	.LmulPv512x32
-	addl	112(%esp), %esi
-	movl	%edi, %eax
-	adcl	116(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	120(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	adcl	124(%esp), %ebp
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	128(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %ebx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	132(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	136(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	140(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	144(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	48(%esp), %ecx          # 4-byte Reload
-	adcl	148(%esp), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	152(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	156(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	160(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	164(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	168(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	172(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	176(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	%eax, %edx
-	movl	2444(%esp), %esi
-	subl	(%esi), %edx
-	sbbl	4(%esi), %edi
-	movl	%ebp, %ecx
-	sbbl	8(%esi), %ecx
-	movl	%ebx, %eax
-	sbbl	12(%esi), %eax
-	movl	80(%esp), %ebx          # 4-byte Reload
-	sbbl	16(%esi), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	68(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%esi), %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	64(%esp), %ebx          # 4-byte Reload
-	sbbl	24(%esi), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%esi), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	48(%esp), %ebx          # 4-byte Reload
-	sbbl	32(%esi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebx          # 4-byte Reload
-	sbbl	36(%esi), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebx          # 4-byte Reload
-	sbbl	40(%esi), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebx          # 4-byte Reload
-	sbbl	44(%esi), %ebx
-	movl	%ebx, 32(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	48(%esi), %ebx
-	movl	%ebx, 36(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebx          # 4-byte Reload
-	sbbl	52(%esi), %ebx
-	movl	%ebx, 40(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebx          # 4-byte Reload
-	sbbl	56(%esi), %ebx
-	movl	%ebx, 44(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebx         # 4-byte Reload
-	sbbl	60(%esi), %ebx
-	movl	%ebx, 84(%esp)          # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	testl	%ebx, %ebx
-	js	.LBB243_2
-# BB#1:
-	movl	%edx, %esi
-.LBB243_2:
-	movl	2432(%esp), %edx
-	movl	%esi, (%edx)
-	movl	108(%esp), %esi         # 4-byte Reload
-	js	.LBB243_4
-# BB#3:
-	movl	%edi, %esi
-.LBB243_4:
-	movl	%esi, 4(%edx)
-	js	.LBB243_6
-# BB#5:
-	movl	%ecx, %ebp
-.LBB243_6:
-	movl	%ebp, 8(%edx)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	js	.LBB243_8
-# BB#7:
-	movl	%eax, %ecx
-.LBB243_8:
-	movl	%ecx, 12(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	js	.LBB243_10
-# BB#9:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB243_10:
-	movl	%eax, 16(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	js	.LBB243_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB243_12:
-	movl	%eax, 20(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB243_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB243_14:
-	movl	%eax, 24(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB243_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB243_16:
-	movl	%eax, 28(%edx)
-	movl	48(%esp), %eax          # 4-byte Reload
-	js	.LBB243_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB243_18:
-	movl	%eax, 32(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB243_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB243_20:
-	movl	%eax, 36(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB243_22
-# BB#21:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB243_22:
-	movl	%eax, 40(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB243_24
-# BB#23:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB243_24:
-	movl	%eax, 44(%edx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	js	.LBB243_26
-# BB#25:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB243_26:
-	movl	%eax, 48(%edx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB243_28
-# BB#27:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB243_28:
-	movl	%eax, 52(%edx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	js	.LBB243_30
-# BB#29:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB243_30:
-	movl	%eax, 56(%edx)
-	movl	104(%esp), %eax         # 4-byte Reload
-	js	.LBB243_32
-# BB#31:
-	movl	84(%esp), %eax          # 4-byte Reload
-.LBB243_32:
-	movl	%eax, 60(%edx)
-	addl	$2412, %esp             # imm = 0x96C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end243:
-	.size	mcl_fp_montNF16L, .Lfunc_end243-mcl_fp_montNF16L
-
-	.globl	mcl_fp_montRed16L
-	.align	16, 0x90
-	.type	mcl_fp_montRed16L,@function
-mcl_fp_montRed16L:                      # @mcl_fp_montRed16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1356, %esp             # imm = 0x54C
-	calll	.L244$pb
-.L244$pb:
-	popl	%eax
-.Ltmp55:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp55-.L244$pb), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1384(%esp), %edx
-	movl	-4(%edx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1380(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 112(%esp)         # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	imull	%eax, %ebx
-	movl	124(%ecx), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	120(%ecx), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	112(%ecx), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	108(%ecx), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	104(%ecx), %esi
-	movl	%esi, 152(%esp)         # 4-byte Spill
-	movl	100(%ecx), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	96(%ecx), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	92(%ecx), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	88(%ecx), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	84(%ecx), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	80(%ecx), %edi
-	movl	%edi, 148(%esp)         # 4-byte Spill
-	movl	76(%ecx), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	%esi, 192(%esp)         # 4-byte Spill
-	movl	68(%ecx), %edi
-	movl	%edi, 204(%esp)         # 4-byte Spill
-	movl	64(%ecx), %esi
-	movl	%esi, 200(%esp)         # 4-byte Spill
-	movl	60(%ecx), %edi
-	movl	%edi, 180(%esp)         # 4-byte Spill
-	movl	56(%ecx), %edi
-	movl	%edi, 164(%esp)         # 4-byte Spill
-	movl	52(%ecx), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	48(%ecx), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	44(%ecx), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	40(%ecx), %ebp
-	movl	36(%ecx), %edi
-	movl	32(%ecx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	28(%ecx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	24(%ecx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	20(%ecx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	16(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	12(%ecx), %esi
-	movl	8(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	60(%edx), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	56(%edx), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	52(%edx), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	1288(%esp), %ecx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	movl	112(%esp), %eax         # 4-byte Reload
-	addl	1288(%esp), %eax
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1300(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1312(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1320(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1324(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	adcl	1328(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	adcl	$0, 204(%esp)           # 4-byte Folded Spill
-	adcl	$0, 192(%esp)           # 4-byte Folded Spill
-	movl	196(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	movl	132(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	sbbl	%eax, %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1216(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	movl	112(%esp), %ecx         # 4-byte Reload
-	andl	$1, %ecx
-	addl	1216(%esp), %esi
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	1220(%esp), %edx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1232(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1236(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1240(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1244(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1248(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1252(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1256(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %esi         # 4-byte Reload
-	adcl	1260(%esp), %esi
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	1264(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1272(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	adcl	$0, 192(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 196(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	movl	156(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 132(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1144(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	1144(%esp), %ebp
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1148(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	1184(%esp), %esi
-	movl	%esi, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	1188(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1200(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1204(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1208(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	adcl	$0, 196(%esp)           # 4-byte Folded Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	movl	168(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 156(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1072(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	1072(%esp), %esi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1076(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1080(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1084(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1088(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1092(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1104(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	adcl	$0, 148(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 168(%esp)         # 4-byte Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1000(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	1000(%esp), %edi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1004(%esp), %ecx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1044(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	movl	188(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	movl	172(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	928(%esp), %esi
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	932(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	936(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	adcl	$0, %ebp
-	movl	%ebp, 188(%esp)         # 4-byte Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 176(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 172(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	movl	144(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	movl	100(%esp), %ebp         # 4-byte Reload
-	imull	%ebp, %eax
-	movl	%eax, (%esp)
-	leal	856(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	856(%esp), %edi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	860(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	movl	176(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 144(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	%ebp, %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	784(%esp), %esi
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	788(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %ebp         # 4-byte Reload
-	adcl	828(%esp), %ebp
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	%edi, 176(%esp)         # 4-byte Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	movl	156(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	712(%esp), %edi
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	716(%esp), %ecx
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	adcl	752(%esp), %ebp
-	movl	%ebp, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %edi         # 4-byte Reload
-	adcl	756(%esp), %edi
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	640(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	640(%esp), %esi
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	644(%esp), %ecx
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	660(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %esi         # 4-byte Reload
-	adcl	668(%esp), %esi
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	adcl	680(%esp), %edi
-	movl	%edi, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	692(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	1384(%esp), %eax
-	movl	%eax, %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	568(%esp), %ebp
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	572(%esp), %ecx
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	576(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	584(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %ebp         # 4-byte Reload
-	adcl	588(%esp), %ebp
-	adcl	592(%esp), %esi
-	movl	%esi, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %esi         # 4-byte Reload
-	adcl	596(%esp), %esi
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	600(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	604(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	608(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	adcl	632(%esp), %edi
-	movl	%edi, 152(%esp)         # 4-byte Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	496(%esp), %edi
-	movl	136(%esp), %ecx         # 4-byte Reload
-	adcl	500(%esp), %ecx
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	164(%esp), %edi         # 4-byte Reload
-	adcl	508(%esp), %edi
-	adcl	512(%esp), %ebp
-	movl	%ebp, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	adcl	520(%esp), %esi
-	movl	%esi, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	524(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	528(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %ebp         # 4-byte Reload
-	adcl	532(%esp), %ebp
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	424(%esp), %esi
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	428(%esp), %eax
-	adcl	432(%esp), %edi
-	movl	%edi, 164(%esp)         # 4-byte Spill
-	movl	180(%esp), %ecx         # 4-byte Reload
-	adcl	436(%esp), %ecx
-	movl	%ecx, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %ecx         # 4-byte Reload
-	adcl	440(%esp), %ecx
-	movl	%ecx, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %ecx         # 4-byte Reload
-	adcl	444(%esp), %ecx
-	movl	%ecx, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %ecx         # 4-byte Reload
-	adcl	448(%esp), %ecx
-	movl	%ecx, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %ecx         # 4-byte Reload
-	adcl	452(%esp), %ecx
-	movl	%ecx, 196(%esp)         # 4-byte Spill
-	adcl	456(%esp), %ebp
-	movl	184(%esp), %ecx         # 4-byte Reload
-	adcl	460(%esp), %ecx
-	movl	%ecx, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %ecx         # 4-byte Reload
-	adcl	464(%esp), %ecx
-	movl	%ecx, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %ecx         # 4-byte Reload
-	adcl	468(%esp), %ecx
-	movl	%ecx, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %ecx         # 4-byte Reload
-	adcl	472(%esp), %ecx
-	movl	%ecx, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %ecx         # 4-byte Reload
-	adcl	476(%esp), %ecx
-	movl	%ecx, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %ecx         # 4-byte Reload
-	adcl	480(%esp), %ecx
-	movl	%ecx, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %ecx         # 4-byte Reload
-	adcl	484(%esp), %ecx
-	movl	%ecx, 156(%esp)         # 4-byte Spill
-	movl	144(%esp), %ecx         # 4-byte Reload
-	adcl	488(%esp), %ecx
-	movl	%ecx, 144(%esp)         # 4-byte Spill
-	movl	132(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%eax, %esi
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	352(%esp), %esi
-	movl	164(%esp), %esi         # 4-byte Reload
-	adcl	356(%esp), %esi
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	360(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	364(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	368(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	372(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	adcl	380(%esp), %ebp
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	384(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	388(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	404(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	adcl	416(%esp), %edi
-	movl	%edi, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	280(%esp), %esi
-	movl	180(%esp), %ecx         # 4-byte Reload
-	adcl	284(%esp), %ecx
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	296(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	adcl	304(%esp), %ebp
-	movl	%ebp, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	308(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	312(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %esi         # 4-byte Reload
-	adcl	316(%esp), %esi
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	344(%esp), %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	movl	124(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 112(%esp)           # 4-byte Folded Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %ebp
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	1384(%esp), %edx
-	movl	116(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv512x32
-	addl	208(%esp), %ebp
-	movl	200(%esp), %edx         # 4-byte Reload
-	adcl	212(%esp), %edx
-	movl	%edx, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %ecx         # 4-byte Reload
-	adcl	220(%esp), %ecx
-	movl	%ecx, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	148(%esp), %ebp         # 4-byte Reload
-	adcl	228(%esp), %ebp
-	movl	%ebp, 148(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	%eax, %ebx
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	adcl	240(%esp), %esi
-	movl	%esi, 168(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	272(%esp), %edi
-	movl	%edi, 124(%esp)         # 4-byte Spill
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	movl	%edx, %eax
-	subl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	204(%esp), %esi         # 4-byte Reload
-	sbbl	12(%esp), %esi          # 4-byte Folded Reload
-	sbbl	16(%esp), %ecx          # 4-byte Folded Reload
-	movl	196(%esp), %eax         # 4-byte Reload
-	sbbl	20(%esp), %eax          # 4-byte Folded Reload
-	sbbl	28(%esp), %ebp          # 4-byte Folded Reload
-	sbbl	32(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 100(%esp)         # 4-byte Spill
-	movl	188(%esp), %ebx         # 4-byte Reload
-	sbbl	36(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 104(%esp)         # 4-byte Spill
-	movl	168(%esp), %ebx         # 4-byte Reload
-	sbbl	40(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 108(%esp)         # 4-byte Spill
-	movl	176(%esp), %ebx         # 4-byte Reload
-	sbbl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 112(%esp)         # 4-byte Spill
-	movl	172(%esp), %ebx         # 4-byte Reload
-	sbbl	48(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 116(%esp)         # 4-byte Spill
-	movl	152(%esp), %ebx         # 4-byte Reload
-	sbbl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 120(%esp)         # 4-byte Spill
-	movl	156(%esp), %ebx         # 4-byte Reload
-	sbbl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 136(%esp)         # 4-byte Spill
-	movl	144(%esp), %ebx         # 4-byte Reload
-	sbbl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 140(%esp)         # 4-byte Spill
-	movl	132(%esp), %ebx         # 4-byte Reload
-	sbbl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 160(%esp)         # 4-byte Spill
-	movl	128(%esp), %ebx         # 4-byte Reload
-	sbbl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 164(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebx         # 4-byte Reload
-	sbbl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 180(%esp)         # 4-byte Spill
-	sbbl	$0, %edi
-	andl	$1, %edi
-	movl	%edi, %ebx
-	jne	.LBB244_2
-# BB#1:
-	movl	%edx, 200(%esp)         # 4-byte Spill
-.LBB244_2:
-	movl	1376(%esp), %edx
-	movl	200(%esp), %edi         # 4-byte Reload
-	movl	%edi, (%edx)
-	testb	%bl, %bl
-	jne	.LBB244_4
-# BB#3:
-	movl	%esi, 204(%esp)         # 4-byte Spill
-.LBB244_4:
-	movl	204(%esp), %esi         # 4-byte Reload
-	movl	%esi, 4(%edx)
-	movl	192(%esp), %esi         # 4-byte Reload
-	jne	.LBB244_6
-# BB#5:
-	movl	%ecx, %esi
-.LBB244_6:
-	movl	%esi, 8(%edx)
-	movl	196(%esp), %ecx         # 4-byte Reload
-	jne	.LBB244_8
-# BB#7:
-	movl	%eax, %ecx
-.LBB244_8:
-	movl	%ecx, 12(%edx)
-	movl	128(%esp), %esi         # 4-byte Reload
-	movl	148(%esp), %eax         # 4-byte Reload
-	jne	.LBB244_10
-# BB#9:
-	movl	%ebp, %eax
-.LBB244_10:
-	movl	%eax, 16(%edx)
-	movl	124(%esp), %ecx         # 4-byte Reload
-	movl	176(%esp), %eax         # 4-byte Reload
-	movl	184(%esp), %ebp         # 4-byte Reload
-	jne	.LBB244_12
-# BB#11:
-	movl	100(%esp), %ebp         # 4-byte Reload
-.LBB244_12:
-	movl	%ebp, 20(%edx)
-	movl	152(%esp), %ebp         # 4-byte Reload
-	movl	188(%esp), %ebx         # 4-byte Reload
-	jne	.LBB244_14
-# BB#13:
-	movl	104(%esp), %ebx         # 4-byte Reload
-.LBB244_14:
-	movl	%ebx, 24(%edx)
-	movl	156(%esp), %ebx         # 4-byte Reload
-	movl	168(%esp), %edi         # 4-byte Reload
-	jne	.LBB244_16
-# BB#15:
-	movl	108(%esp), %edi         # 4-byte Reload
-.LBB244_16:
-	movl	%edi, 28(%edx)
-	movl	144(%esp), %edi         # 4-byte Reload
-	jne	.LBB244_18
-# BB#17:
-	movl	112(%esp), %eax         # 4-byte Reload
-.LBB244_18:
-	movl	%eax, 32(%edx)
-	jne	.LBB244_20
-# BB#19:
-	movl	116(%esp), %eax         # 4-byte Reload
-	movl	%eax, 172(%esp)         # 4-byte Spill
-.LBB244_20:
-	movl	172(%esp), %eax         # 4-byte Reload
-	movl	%eax, 36(%edx)
-	jne	.LBB244_22
-# BB#21:
-	movl	120(%esp), %ebp         # 4-byte Reload
-.LBB244_22:
-	movl	%ebp, 40(%edx)
-	movl	132(%esp), %eax         # 4-byte Reload
-	jne	.LBB244_24
-# BB#23:
-	movl	136(%esp), %ebx         # 4-byte Reload
-.LBB244_24:
-	movl	%ebx, 44(%edx)
-	jne	.LBB244_26
-# BB#25:
-	movl	140(%esp), %edi         # 4-byte Reload
-.LBB244_26:
-	movl	%edi, 48(%edx)
-	jne	.LBB244_28
-# BB#27:
-	movl	160(%esp), %eax         # 4-byte Reload
-.LBB244_28:
-	movl	%eax, 52(%edx)
-	jne	.LBB244_30
-# BB#29:
-	movl	164(%esp), %esi         # 4-byte Reload
-.LBB244_30:
-	movl	%esi, 56(%edx)
-	jne	.LBB244_32
-# BB#31:
-	movl	180(%esp), %ecx         # 4-byte Reload
-.LBB244_32:
-	movl	%ecx, 60(%edx)
-	addl	$1356, %esp             # imm = 0x54C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end244:
-	.size	mcl_fp_montRed16L, .Lfunc_end244-mcl_fp_montRed16L
-
-	.globl	mcl_fp_addPre16L
-	.align	16, 0x90
-	.type	mcl_fp_addPre16L,@function
-mcl_fp_addPre16L:                       # @mcl_fp_addPre16L
-# BB#0:
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %ebx
-	adcl	8(%ecx), %ebx
-	movl	16(%esp), %edi
-	movl	%edx, (%edi)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%edi)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%ebx, 8(%edi)
-	movl	20(%eax), %ebx
-	movl	%edx, 12(%edi)
-	movl	20(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	24(%eax), %ebx
-	movl	%esi, 16(%edi)
-	movl	24(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	28(%eax), %ebx
-	movl	%edx, 20(%edi)
-	movl	28(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	32(%eax), %ebx
-	movl	%esi, 24(%edi)
-	movl	32(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	36(%eax), %ebx
-	movl	%edx, 28(%edi)
-	movl	36(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	40(%eax), %ebx
-	movl	%esi, 32(%edi)
-	movl	40(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	44(%eax), %ebx
-	movl	%edx, 36(%edi)
-	movl	44(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	48(%eax), %ebx
-	movl	%esi, 40(%edi)
-	movl	48(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	52(%eax), %ebx
-	movl	%edx, 44(%edi)
-	movl	52(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	56(%eax), %ebx
-	movl	%esi, 48(%edi)
-	movl	56(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	%edx, 52(%edi)
-	movl	%esi, 56(%edi)
-	movl	60(%eax), %eax
-	movl	60(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 60(%edi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	retl
-.Lfunc_end245:
-	.size	mcl_fp_addPre16L, .Lfunc_end245-mcl_fp_addPre16L
-
-	.globl	mcl_fp_subPre16L
-	.align	16, 0x90
-	.type	mcl_fp_subPre16L,@function
-mcl_fp_subPre16L:                       # @mcl_fp_subPre16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebp
-	sbbl	8(%edx), %ebp
-	movl	20(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebx)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebp, 8(%ebx)
-	movl	20(%edx), %ebp
-	movl	%esi, 12(%ebx)
-	movl	20(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	24(%edx), %ebp
-	movl	%edi, 16(%ebx)
-	movl	24(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	28(%edx), %ebp
-	movl	%esi, 20(%ebx)
-	movl	28(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	32(%edx), %ebp
-	movl	%edi, 24(%ebx)
-	movl	32(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	36(%edx), %ebp
-	movl	%esi, 28(%ebx)
-	movl	36(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	40(%edx), %ebp
-	movl	%edi, 32(%ebx)
-	movl	40(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	44(%edx), %ebp
-	movl	%esi, 36(%ebx)
-	movl	44(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	48(%edx), %ebp
-	movl	%edi, 40(%ebx)
-	movl	48(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	52(%edx), %ebp
-	movl	%esi, 44(%ebx)
-	movl	52(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	56(%edx), %ebp
-	movl	%edi, 48(%ebx)
-	movl	56(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	%esi, 52(%ebx)
-	movl	%edi, 56(%ebx)
-	movl	60(%edx), %edx
-	movl	60(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 60(%ebx)
-	sbbl	$0, %eax
-	andl	$1, %eax
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end246:
-	.size	mcl_fp_subPre16L, .Lfunc_end246-mcl_fp_subPre16L
-
-	.globl	mcl_fp_shr1_16L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_16L,@function
-mcl_fp_shr1_16L:                        # @mcl_fp_shr1_16L
-# BB#0:
-	pushl	%esi
-	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 40(%ecx)
-	movl	48(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 44(%ecx)
-	movl	52(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 48(%ecx)
-	movl	56(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 52(%ecx)
-	movl	60(%eax), %eax
-	shrdl	$1, %eax, %edx
-	movl	%edx, 56(%ecx)
-	shrl	%eax
-	movl	%eax, 60(%ecx)
-	popl	%esi
-	retl
-.Lfunc_end247:
-	.size	mcl_fp_shr1_16L, .Lfunc_end247-mcl_fp_shr1_16L
-
-	.globl	mcl_fp_add16L
-	.align	16, 0x90
-	.type	mcl_fp_add16L,@function
-mcl_fp_add16L:                          # @mcl_fp_add16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$56, %esp
-	movl	84(%esp), %edx
-	movl	(%edx), %esi
-	movl	4(%edx), %ebp
-	movl	80(%esp), %ecx
-	addl	(%ecx), %esi
-	movl	%esi, %ebx
-	adcl	4(%ecx), %ebp
-	movl	8(%edx), %eax
-	adcl	8(%ecx), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	12(%ecx), %esi
-	movl	16(%ecx), %edi
-	adcl	12(%edx), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	adcl	16(%edx), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	movl	20(%ecx), %eax
-	adcl	20(%edx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%ecx), %eax
-	adcl	24(%edx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%ecx), %eax
-	adcl	28(%edx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%ecx), %eax
-	adcl	32(%edx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%ecx), %eax
-	adcl	36(%edx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	adcl	40(%edx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	44(%ecx), %eax
-	adcl	44(%edx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	48(%ecx), %eax
-	adcl	48(%edx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	52(%ecx), %eax
-	adcl	52(%edx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	56(%ecx), %esi
-	adcl	56(%edx), %esi
-	movl	60(%ecx), %ecx
-	adcl	60(%edx), %ecx
-	movl	76(%esp), %edx
-	movl	%ebx, (%edx)
-	movl	%ebx, %eax
-	movl	%ebp, 4(%edx)
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%edx)
-	movl	48(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 12(%edx)
-	movl	%edi, 16(%edx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%edx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%edx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%edx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%edx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%edx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%edx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 44(%edx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 48(%edx)
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	%edi, 52(%edx)
-	movl	%esi, 56(%edx)
-	movl	%ecx, 60(%edx)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	88(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	4(%edi), %ebp
-	movl	%ebp, (%esp)            # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	8(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	sbbl	12(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	sbbl	16(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	sbbl	20(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	sbbl	24(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	sbbl	44(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	sbbl	48(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	8(%esp), %eax           # 4-byte Reload
-	sbbl	52(%edi), %eax
-	movl	%eax, %ebp
-	sbbl	56(%edi), %esi
-	sbbl	60(%edi), %ecx
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	jne	.LBB248_2
-# BB#1:                                 # %nocarry
-	movl	4(%esp), %edi           # 4-byte Reload
-	movl	%edi, (%edx)
-	movl	(%esp), %edi            # 4-byte Reload
-	movl	%edi, 4(%edx)
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%edx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%edx)
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%edx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%edx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%edx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%edx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%edx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%edx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%edx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 44(%edx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 48(%edx)
-	movl	%ebp, 52(%edx)
-	movl	%esi, 56(%edx)
-	movl	%ecx, 60(%edx)
-.LBB248_2:                              # %carry
-	addl	$56, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end248:
-	.size	mcl_fp_add16L, .Lfunc_end248-mcl_fp_add16L
-
-	.globl	mcl_fp_addNF16L
-	.align	16, 0x90
-	.type	mcl_fp_addNF16L,@function
-mcl_fp_addNF16L:                        # @mcl_fp_addNF16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$124, %esp
-	movl	152(%esp), %edx
-	movl	(%edx), %eax
-	movl	4(%edx), %ecx
-	movl	148(%esp), %esi
-	addl	(%esi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	4(%esi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	60(%edx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	56(%edx), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	52(%edx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	48(%edx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	44(%edx), %edi
-	movl	40(%edx), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	36(%edx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	32(%edx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	28(%edx), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	24(%edx), %eax
-	movl	20(%edx), %ebp
-	movl	16(%edx), %ebx
-	movl	12(%edx), %ecx
-	movl	8(%edx), %edx
-	adcl	8(%esi), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	adcl	12(%esi), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	16(%esi), %ebx
-	movl	%ebx, 68(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	adcl	24(%esi), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	28(%esi), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	32(%esi), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	36(%esi), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esi), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	44(%esi), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	48(%esi), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	52(%esi), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	56(%esi), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	60(%esi), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	156(%esp), %edi
-	movl	80(%esp), %esi          # 4-byte Reload
-	subl	(%edi), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	sbbl	4(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	8(%edi), %edx
-	movl	%edx, 4(%esp)           # 4-byte Spill
-	sbbl	12(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	16(%edi), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %ebp
-	movl	%ebp, 16(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	sbbl	24(%edi), %ebp
-	movl	%ebp, 20(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	sbbl	48(%edi), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	52(%edi), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, %ebx
-	sbbl	56(%edi), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebx         # 4-byte Reload
-	sbbl	60(%edi), %ebx
-	movl	80(%esp), %edi          # 4-byte Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	testl	%ebx, %ebx
-	js	.LBB249_2
-# BB#1:
-	movl	%esi, %edi
-.LBB249_2:
-	movl	144(%esp), %ebx
-	movl	%edi, (%ebx)
-	movl	84(%esp), %edx          # 4-byte Reload
-	js	.LBB249_4
-# BB#3:
-	movl	(%esp), %edx            # 4-byte Reload
-.LBB249_4:
-	movl	%edx, 4(%ebx)
-	movl	68(%esp), %edx          # 4-byte Reload
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB249_6
-# BB#5:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB249_6:
-	movl	%eax, 8(%ebx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	movl	88(%esp), %ecx          # 4-byte Reload
-	movl	64(%esp), %esi          # 4-byte Reload
-	js	.LBB249_8
-# BB#7:
-	movl	8(%esp), %esi           # 4-byte Reload
-.LBB249_8:
-	movl	%esi, 12(%ebx)
-	movl	108(%esp), %esi         # 4-byte Reload
-	js	.LBB249_10
-# BB#9:
-	movl	12(%esp), %edx          # 4-byte Reload
-.LBB249_10:
-	movl	%edx, 16(%ebx)
-	movl	112(%esp), %edi         # 4-byte Reload
-	movl	104(%esp), %ebp         # 4-byte Reload
-	js	.LBB249_12
-# BB#11:
-	movl	16(%esp), %edx          # 4-byte Reload
-	movl	%edx, 72(%esp)          # 4-byte Spill
-.LBB249_12:
-	movl	72(%esp), %edx          # 4-byte Reload
-	movl	%edx, 20(%ebx)
-	js	.LBB249_14
-# BB#13:
-	movl	20(%esp), %ecx          # 4-byte Reload
-.LBB249_14:
-	movl	%ecx, 24(%ebx)
-	js	.LBB249_16
-# BB#15:
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-.LBB249_16:
-	movl	116(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 28(%ebx)
-	js	.LBB249_18
-# BB#17:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB249_18:
-	movl	%eax, 32(%ebx)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	js	.LBB249_20
-# BB#19:
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 120(%esp)         # 4-byte Spill
-.LBB249_20:
-	movl	120(%esp), %eax         # 4-byte Reload
-	movl	%eax, 36(%ebx)
-	js	.LBB249_22
-# BB#21:
-	movl	36(%esp), %ebp          # 4-byte Reload
-.LBB249_22:
-	movl	%ebp, 40(%ebx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	js	.LBB249_24
-# BB#23:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB249_24:
-	movl	%eax, 44(%ebx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	js	.LBB249_26
-# BB#25:
-	movl	44(%esp), %esi          # 4-byte Reload
-.LBB249_26:
-	movl	%esi, 48(%ebx)
-	js	.LBB249_28
-# BB#27:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB249_28:
-	movl	%eax, 52(%ebx)
-	js	.LBB249_30
-# BB#29:
-	movl	52(%esp), %ecx          # 4-byte Reload
-.LBB249_30:
-	movl	%ecx, 56(%ebx)
-	js	.LBB249_32
-# BB#31:
-	movl	56(%esp), %edi          # 4-byte Reload
-.LBB249_32:
-	movl	%edi, 60(%ebx)
-	addl	$124, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end249:
-	.size	mcl_fp_addNF16L, .Lfunc_end249-mcl_fp_addNF16L
-
-	.globl	mcl_fp_sub16L
-	.align	16, 0x90
-	.type	mcl_fp_sub16L,@function
-mcl_fp_sub16L:                          # @mcl_fp_sub16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$60, %esp
-	movl	84(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	88(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	sbbl	32(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	36(%esi), %eax
-	sbbl	36(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	40(%esi), %eax
-	sbbl	40(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	44(%esi), %edx
-	sbbl	44(%edi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	48(%esi), %ecx
-	sbbl	48(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	52(%esi), %eax
-	sbbl	52(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	56(%esi), %ebp
-	sbbl	56(%edi), %ebp
-	movl	60(%esi), %esi
-	sbbl	60(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	80(%esp), %ebx
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%ebx)
-	movl	%edx, 44(%ebx)
-	movl	%ecx, 48(%ebx)
-	movl	%eax, 52(%ebx)
-	movl	%ebp, 56(%ebx)
-	movl	%esi, 60(%ebx)
-	je	.LBB250_2
-# BB#1:                                 # %carry
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	92(%esp), %esi
-	movl	52(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	adcl	8(%esi), %edi
-	movl	12(%esi), %eax
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	48(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	28(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	40(%esi), %ecx
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	44(%esi), %eax
-	adcl	12(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 40(%ebx)
-	movl	48(%esi), %ecx
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 44(%ebx)
-	movl	52(%esi), %eax
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 48(%ebx)
-	movl	%eax, 52(%ebx)
-	movl	56(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 56(%ebx)
-	movl	60(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 60(%ebx)
-.LBB250_2:                              # %nocarry
-	addl	$60, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end250:
-	.size	mcl_fp_sub16L, .Lfunc_end250-mcl_fp_sub16L
-
-	.globl	mcl_fp_subNF16L
-	.align	16, 0x90
-	.type	mcl_fp_subNF16L,@function
-mcl_fp_subNF16L:                        # @mcl_fp_subNF16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$104, %esp
-	movl	128(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edx
-	movl	132(%esp), %edi
-	subl	(%edi), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	60(%ecx), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	56(%ecx), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	52(%ecx), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	48(%ecx), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	44(%ecx), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	40(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	36(%ecx), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	32(%ecx), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	28(%ecx), %ebp
-	movl	24(%ecx), %ebx
-	movl	20(%ecx), %esi
-	movl	16(%ecx), %edx
-	movl	12(%ecx), %eax
-	movl	8(%ecx), %ecx
-	sbbl	8(%edi), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	sbbl	12(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	16(%edi), %edx
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	sbbl	20(%edi), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	sbbl	24(%edi), %ebx
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	sbbl	28(%edi), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	sbbl	36(%edi), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	44(%edi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	sbbl	48(%edi), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	sbbl	52(%edi), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	sbbl	56(%edi), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	sbbl	60(%edi), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	sarl	$31, %eax
-	movl	136(%esp), %esi
-	movl	60(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	56(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	52(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	48(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	44(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	40(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	36(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	32(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	28(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	24(%esi), %ecx
-	andl	%eax, %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	20(%esi), %ebp
-	andl	%eax, %ebp
-	movl	16(%esi), %ebx
-	andl	%eax, %ebx
-	movl	12(%esi), %edi
-	andl	%eax, %edi
-	movl	8(%esi), %edx
-	andl	%eax, %edx
-	movl	4(%esi), %ecx
-	andl	%eax, %ecx
-	andl	(%esi), %eax
-	addl	64(%esp), %eax          # 4-byte Folded Reload
-	adcl	68(%esp), %ecx          # 4-byte Folded Reload
-	movl	124(%esp), %esi
-	movl	%eax, (%esi)
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esi)
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edx, 8(%esi)
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 12(%esi)
-	adcl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebx, 16(%esi)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 20(%esi)
-	movl	4(%esp), %ecx           # 4-byte Reload
-	adcl	56(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 24(%esi)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 28(%esi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	adcl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	%eax, 32(%esi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 36(%esi)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	84(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 40(%esi)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 44(%esi)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	adcl	92(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 48(%esi)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 52(%esi)
-	movl	%eax, 56(%esi)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esi)
-	addl	$104, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end251:
-	.size	mcl_fp_subNF16L, .Lfunc_end251-mcl_fp_subNF16L
-
-	.globl	mcl_fpDbl_add16L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add16L,@function
-mcl_fpDbl_add16L:                       # @mcl_fpDbl_add16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$116, %esp
-	movl	144(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edx
-	movl	140(%esp), %ebx
-	addl	(%ebx), %esi
-	adcl	4(%ebx), %edx
-	movl	8(%ecx), %edi
-	adcl	8(%ebx), %edi
-	movl	12(%ebx), %ebp
-	movl	136(%esp), %eax
-	movl	%esi, (%eax)
-	movl	16(%ebx), %esi
-	adcl	12(%ecx), %ebp
-	adcl	16(%ecx), %esi
-	movl	%edx, 4(%eax)
-	movl	72(%ecx), %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	%edi, 8(%eax)
-	movl	20(%ecx), %edx
-	movl	%ebp, 12(%eax)
-	movl	20(%ebx), %edi
-	adcl	%edx, %edi
-	movl	24(%ecx), %edx
-	movl	%esi, 16(%eax)
-	movl	24(%ebx), %esi
-	adcl	%edx, %esi
-	movl	28(%ecx), %edx
-	movl	%edi, 20(%eax)
-	movl	28(%ebx), %edi
-	adcl	%edx, %edi
-	movl	32(%ecx), %edx
-	movl	%esi, 24(%eax)
-	movl	32(%ebx), %esi
-	adcl	%edx, %esi
-	movl	36(%ecx), %edx
-	movl	%edi, 28(%eax)
-	movl	36(%ebx), %edi
-	adcl	%edx, %edi
-	movl	40(%ecx), %edx
-	movl	%esi, 32(%eax)
-	movl	40(%ebx), %esi
-	adcl	%edx, %esi
-	movl	44(%ecx), %edx
-	movl	%edi, 36(%eax)
-	movl	44(%ebx), %edi
-	adcl	%edx, %edi
-	movl	48(%ecx), %edx
-	movl	%esi, 40(%eax)
-	movl	48(%ebx), %esi
-	adcl	%edx, %esi
-	movl	52(%ecx), %edx
-	movl	%edi, 44(%eax)
-	movl	52(%ebx), %edi
-	adcl	%edx, %edi
-	movl	56(%ecx), %edx
-	movl	%esi, 48(%eax)
-	movl	56(%ebx), %esi
-	adcl	%edx, %esi
-	movl	60(%ecx), %edx
-	movl	%edi, 52(%eax)
-	movl	60(%ebx), %ebp
-	adcl	%edx, %ebp
-	movl	64(%ecx), %edx
-	movl	%esi, 56(%eax)
-	movl	64(%ebx), %esi
-	adcl	%edx, %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	68(%ecx), %edx
-	movl	%ebp, 60(%eax)
-	movl	68(%ebx), %eax
-	adcl	%edx, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	72(%ebx), %eax
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	76(%ecx), %ebp
-	movl	76(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%ecx), %ebp
-	movl	80(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	84(%ecx), %ebp
-	movl	84(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	88(%ecx), %ebp
-	movl	88(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	92(%ecx), %ebp
-	movl	92(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	96(%ecx), %ebp
-	movl	96(%ebx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	100(%ecx), %ebp
-	movl	100(%ebx), %edx
-	adcl	%ebp, %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	104(%ecx), %ebp
-	movl	104(%ebx), %edx
-	adcl	%ebp, %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	108(%ecx), %ebp
-	movl	108(%ebx), %edx
-	adcl	%ebp, %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	112(%ecx), %edx
-	movl	112(%ebx), %ebp
-	adcl	%edx, %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	116(%ecx), %edx
-	movl	116(%ebx), %esi
-	adcl	%edx, %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	120(%ecx), %edx
-	movl	120(%ebx), %edi
-	adcl	%edx, %edi
-	movl	124(%ecx), %ecx
-	movl	124(%ebx), %esi
-	adcl	%ecx, %esi
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	148(%esp), %edx
-	movl	72(%esp), %ebx          # 4-byte Reload
-	subl	(%edx), %ebx
-	movl	%ebx, (%esp)            # 4-byte Spill
-	movl	76(%esp), %ebx          # 4-byte Reload
-	sbbl	4(%edx), %ebx
-	movl	%ebx, 4(%esp)           # 4-byte Spill
-	movl	80(%esp), %ebx          # 4-byte Reload
-	sbbl	8(%edx), %ebx
-	movl	%ebx, 8(%esp)           # 4-byte Spill
-	movl	84(%esp), %ebx          # 4-byte Reload
-	sbbl	12(%edx), %ebx
-	movl	%ebx, 12(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebx         # 4-byte Reload
-	sbbl	16(%edx), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebx          # 4-byte Reload
-	sbbl	20(%edx), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	108(%esp), %ebx         # 4-byte Reload
-	sbbl	24(%edx), %ebx
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebx          # 4-byte Reload
-	sbbl	28(%edx), %ebx
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	%eax, %ebx
-	sbbl	32(%edx), %ebx
-	movl	112(%esp), %eax         # 4-byte Reload
-	sbbl	36(%edx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	sbbl	44(%edx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	sbbl	48(%edx), %ebp
-	movl	%ebp, 44(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ebp
-	sbbl	52(%edx), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	%edi, %ebp
-	sbbl	56(%edx), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	%esi, %ebp
-	sbbl	60(%edx), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB252_2
-# BB#1:
-	movl	%ebx, 64(%esp)          # 4-byte Spill
-.LBB252_2:
-	testb	%cl, %cl
-	movl	72(%esp), %ecx          # 4-byte Reload
-	jne	.LBB252_4
-# BB#3:
-	movl	(%esp), %ecx            # 4-byte Reload
-.LBB252_4:
-	movl	136(%esp), %ebx
-	movl	%ecx, 64(%ebx)
-	movl	%esi, %ebp
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	movl	92(%esp), %ecx          # 4-byte Reload
-	movl	88(%esp), %edx          # 4-byte Reload
-	movl	76(%esp), %esi          # 4-byte Reload
-	jne	.LBB252_6
-# BB#5:
-	movl	4(%esp), %esi           # 4-byte Reload
-.LBB252_6:
-	movl	%esi, 68(%ebx)
-	movl	84(%esp), %esi          # 4-byte Reload
-	movl	80(%esp), %eax          # 4-byte Reload
-	jne	.LBB252_8
-# BB#7:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB252_8:
-	movl	%eax, 72(%ebx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	jne	.LBB252_10
-# BB#9:
-	movl	12(%esp), %esi          # 4-byte Reload
-.LBB252_10:
-	movl	%esi, 76(%ebx)
-	jne	.LBB252_12
-# BB#11:
-	movl	16(%esp), %esi          # 4-byte Reload
-	movl	%esi, 104(%esp)         # 4-byte Spill
-.LBB252_12:
-	movl	104(%esp), %esi         # 4-byte Reload
-	movl	%esi, 80(%ebx)
-	jne	.LBB252_14
-# BB#13:
-	movl	20(%esp), %edx          # 4-byte Reload
-.LBB252_14:
-	movl	%edx, 84(%ebx)
-	jne	.LBB252_16
-# BB#15:
-	movl	24(%esp), %edx          # 4-byte Reload
-	movl	%edx, 108(%esp)         # 4-byte Spill
-.LBB252_16:
-	movl	108(%esp), %edx         # 4-byte Reload
-	movl	%edx, 88(%ebx)
-	jne	.LBB252_18
-# BB#17:
-	movl	28(%esp), %ecx          # 4-byte Reload
-.LBB252_18:
-	movl	%ecx, 92(%ebx)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 96(%ebx)
-	jne	.LBB252_20
-# BB#19:
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-.LBB252_20:
-	movl	112(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 100(%ebx)
-	jne	.LBB252_22
-# BB#21:
-	movl	36(%esp), %edi          # 4-byte Reload
-.LBB252_22:
-	movl	%edi, 104(%ebx)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	jne	.LBB252_24
-# BB#23:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB252_24:
-	movl	%ecx, 108(%ebx)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	jne	.LBB252_26
-# BB#25:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB252_26:
-	movl	%eax, 112(%ebx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	jne	.LBB252_28
-# BB#27:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB252_28:
-	movl	%eax, 116(%ebx)
-	jne	.LBB252_30
-# BB#29:
-	movl	52(%esp), %ecx          # 4-byte Reload
-.LBB252_30:
-	movl	%ecx, 120(%ebx)
-	jne	.LBB252_32
-# BB#31:
-	movl	56(%esp), %ebp          # 4-byte Reload
-.LBB252_32:
-	movl	%ebp, 124(%ebx)
-	addl	$116, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end252:
-	.size	mcl_fpDbl_add16L, .Lfunc_end252-mcl_fpDbl_add16L
-
-	.globl	mcl_fpDbl_sub16L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub16L,@function
-mcl_fpDbl_sub16L:                       # @mcl_fpDbl_sub16L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$108, %esp
-	movl	132(%esp), %eax
-	movl	(%eax), %esi
-	movl	4(%eax), %edi
-	movl	136(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%eax), %ebx
-	sbbl	8(%edx), %ebx
-	movl	128(%esp), %ecx
-	movl	%esi, (%ecx)
-	movl	12(%eax), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ecx)
-	movl	16(%eax), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebx, 8(%ecx)
-	movl	20(%edx), %ebx
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	24(%edx), %ebx
-	movl	%edi, 16(%ecx)
-	movl	24(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	28(%edx), %ebx
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	32(%edx), %ebx
-	movl	%edi, 24(%ecx)
-	movl	32(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	36(%edx), %ebx
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	40(%edx), %ebx
-	movl	%edi, 32(%ecx)
-	movl	40(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	44(%edx), %ebx
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	48(%edx), %ebx
-	movl	%edi, 40(%ecx)
-	movl	48(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	52(%edx), %ebx
-	movl	%esi, 44(%ecx)
-	movl	52(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	56(%edx), %ebx
-	movl	%edi, 48(%ecx)
-	movl	56(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	60(%edx), %ebx
-	movl	%esi, 52(%ecx)
-	movl	60(%eax), %esi
-	sbbl	%ebx, %esi
-	movl	64(%edx), %ebx
-	movl	%edi, 56(%ecx)
-	movl	64(%eax), %edi
-	sbbl	%ebx, %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	68(%edx), %edi
-	movl	%esi, 60(%ecx)
-	movl	68(%eax), %esi
-	sbbl	%edi, %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	72(%edx), %esi
-	movl	72(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	76(%edx), %esi
-	movl	76(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	80(%edx), %esi
-	movl	80(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	84(%edx), %esi
-	movl	84(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	88(%edx), %esi
-	movl	88(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	92(%edx), %esi
-	movl	92(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	96(%edx), %esi
-	movl	96(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	100(%edx), %esi
-	movl	100(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	104(%edx), %esi
-	movl	104(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	108(%edx), %esi
-	movl	108(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	112(%edx), %esi
-	movl	112(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	116(%edx), %esi
-	movl	116(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	120(%edx), %esi
-	movl	120(%eax), %edi
-	sbbl	%esi, %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	124(%edx), %edx
-	movl	124(%eax), %eax
-	sbbl	%edx, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	140(%esp), %ebx
-	jne	.LBB253_1
-# BB#2:
-	movl	$0, 68(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_3
-.LBB253_1:
-	movl	60(%ebx), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-.LBB253_3:
-	testb	%al, %al
-	jne	.LBB253_4
-# BB#5:
-	movl	$0, 24(%esp)            # 4-byte Folded Spill
-	movl	$0, %ebp
-	jmp	.LBB253_6
-.LBB253_4:
-	movl	(%ebx), %ebp
-	movl	4(%ebx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-.LBB253_6:
-	jne	.LBB253_7
-# BB#8:
-	movl	$0, 36(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_9
-.LBB253_7:
-	movl	56(%ebx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-.LBB253_9:
-	jne	.LBB253_10
-# BB#11:
-	movl	$0, 32(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_12
-.LBB253_10:
-	movl	52(%ebx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-.LBB253_12:
-	jne	.LBB253_13
-# BB#14:
-	movl	$0, 28(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_15
-.LBB253_13:
-	movl	48(%ebx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-.LBB253_15:
-	jne	.LBB253_16
-# BB#17:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_18
-.LBB253_16:
-	movl	44(%ebx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB253_18:
-	jne	.LBB253_19
-# BB#20:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_21
-.LBB253_19:
-	movl	40(%ebx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB253_21:
-	jne	.LBB253_22
-# BB#23:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB253_24
-.LBB253_22:
-	movl	36(%ebx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB253_24:
-	jne	.LBB253_25
-# BB#26:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB253_27
-.LBB253_25:
-	movl	32(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB253_27:
-	jne	.LBB253_28
-# BB#29:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB253_30
-.LBB253_28:
-	movl	28(%ebx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB253_30:
-	jne	.LBB253_31
-# BB#32:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB253_33
-.LBB253_31:
-	movl	24(%ebx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB253_33:
-	jne	.LBB253_34
-# BB#35:
-	movl	$0, %esi
-	jmp	.LBB253_36
-.LBB253_34:
-	movl	20(%ebx), %esi
-.LBB253_36:
-	jne	.LBB253_37
-# BB#38:
-	movl	$0, %edx
-	jmp	.LBB253_39
-.LBB253_37:
-	movl	16(%ebx), %edx
-.LBB253_39:
-	jne	.LBB253_40
-# BB#41:
-	movl	$0, %edi
-	jmp	.LBB253_42
-.LBB253_40:
-	movl	12(%ebx), %edi
-.LBB253_42:
-	jne	.LBB253_43
-# BB#44:
-	xorl	%ebx, %ebx
-	jmp	.LBB253_45
-.LBB253_43:
-	movl	8(%ebx), %ebx
-.LBB253_45:
-	addl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, %eax
-	movl	24(%esp), %ebp          # 4-byte Reload
-	adcl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebp, 68(%ecx)
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebx, 72(%ecx)
-	adcl	56(%esp), %edx          # 4-byte Folded Reload
-	movl	%edi, 76(%ecx)
-	adcl	60(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 80(%ecx)
-	movl	(%esp), %eax            # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%esi, 84(%ecx)
-	movl	4(%esp), %edx           # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 88(%ecx)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 92(%ecx)
-	movl	12(%esp), %edx          # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 96(%ecx)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 100(%ecx)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 104(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 108(%ecx)
-	movl	32(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 112(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%edx, 116(%ecx)
-	movl	%eax, 120(%ecx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 124(%ecx)
-	addl	$108, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end253:
-	.size	mcl_fpDbl_sub16L, .Lfunc_end253-mcl_fpDbl_sub16L
-
-	.align	16, 0x90
-	.type	.LmulPv544x32,@function
-.LmulPv544x32:                          # @mulPv544x32
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$120, %esp
-	movl	%edx, %ebp
-	movl	140(%esp), %ebx
-	movl	%ebx, %eax
-	mull	64(%ebp)
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%ebx, %eax
-	mull	60(%ebp)
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	%ebx, %eax
-	mull	56(%ebp)
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	52(%ebp)
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	48(%ebp)
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	44(%ebp)
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	40(%ebp)
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	36(%ebp)
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	32(%ebp)
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	28(%ebp)
-	movl	%edx, 44(%esp)          # 4-byte Spill
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	24(%ebp)
-	movl	%edx, 36(%esp)          # 4-byte Spill
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	20(%ebp)
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	16(%ebp)
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	%ebx, %eax
-	mull	12(%ebp)
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	8(%ebp)
-	movl	%edx, %edi
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	%ebx, %eax
-	mull	4(%ebp)
-	movl	%edx, %esi
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	%ebx, %eax
-	mull	(%ebp)
-	movl	%eax, (%ecx)
-	addl	(%esp), %edx            # 4-byte Folded Reload
-	movl	%edx, 4(%ecx)
-	adcl	4(%esp), %esi           # 4-byte Folded Reload
-	movl	%esi, 8(%ecx)
-	adcl	8(%esp), %edi           # 4-byte Folded Reload
-	movl	%edi, 12(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 16(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 20(%ecx)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 24(%ecx)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 28(%ecx)
-	movl	44(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 32(%ecx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 36(%ecx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 40(%ecx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 44(%ecx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 48(%ecx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%ecx)
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%ecx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 60(%ecx)
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%ecx)
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, 68(%ecx)
-	movl	%ecx, %eax
-	addl	$120, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end254:
-	.size	.LmulPv544x32, .Lfunc_end254-.LmulPv544x32
-
-	.globl	mcl_fp_mulUnitPre17L
-	.align	16, 0x90
-	.type	mcl_fp_mulUnitPre17L,@function
-mcl_fp_mulUnitPre17L:                   # @mcl_fp_mulUnitPre17L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$140, %esp
-	calll	.L255$pb
-.L255$pb:
-	popl	%ebx
-.Ltmp56:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp56-.L255$pb), %ebx
-	movl	168(%esp), %eax
-	movl	%eax, (%esp)
-	leal	64(%esp), %ecx
-	movl	164(%esp), %edx
-	calll	.LmulPv544x32
-	movl	132(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	128(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	84(%esp), %ebp
-	movl	80(%esp), %ebx
-	movl	76(%esp), %edi
-	movl	72(%esp), %esi
-	movl	64(%esp), %edx
-	movl	68(%esp), %ecx
-	movl	160(%esp), %eax
-	movl	%edx, (%eax)
-	movl	%ecx, 4(%eax)
-	movl	%esi, 8(%eax)
-	movl	%edi, 12(%eax)
-	movl	%ebx, 16(%eax)
-	movl	%ebp, 20(%eax)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 24(%eax)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 28(%eax)
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 32(%eax)
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 36(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 40(%eax)
-	movl	36(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 44(%eax)
-	movl	40(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 48(%eax)
-	movl	44(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%eax)
-	movl	48(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%eax)
-	movl	52(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	movl	56(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 64(%eax)
-	movl	60(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 68(%eax)
-	addl	$140, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end255:
-	.size	mcl_fp_mulUnitPre17L, .Lfunc_end255-mcl_fp_mulUnitPre17L
-
-	.globl	mcl_fpDbl_mulPre17L
-	.align	16, 0x90
-	.type	mcl_fpDbl_mulPre17L,@function
-mcl_fpDbl_mulPre17L:                    # @mcl_fpDbl_mulPre17L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1356, %esp             # imm = 0x54C
-	calll	.L256$pb
-.L256$pb:
-	popl	%edi
-.Ltmp57:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp57-.L256$pb), %edi
-	movl	%edi, 124(%esp)         # 4-byte Spill
-	movl	1384(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1280(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	%edx, %esi
-	movl	%edi, %ebx
-	calll	.LmulPv544x32
-	movl	1348(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1344(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1340(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1336(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1332(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1328(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1320(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1316(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1312(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1308(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1304(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1300(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1296(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	1292(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	1288(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	1280(%esp), %eax
-	movl	1284(%esp), %ebp
-	movl	1376(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	1384(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	%esi, %edx
-	movl	%edi, %ebx
-	calll	.LmulPv544x32
-	addl	1208(%esp), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	1276(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1272(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1268(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1260(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1256(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	1252(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1248(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1244(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1236(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1232(%esp), %edi
-	movl	1228(%esp), %esi
-	movl	1224(%esp), %edx
-	movl	1220(%esp), %ecx
-	movl	1212(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1216(%esp), %eax
-	movl	1376(%esp), %ebp
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	adcl	%ebp, 120(%esp)         # 4-byte Folded Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 64(%esp)          # 4-byte Folded Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1136(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	1136(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1204(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1200(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1196(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1192(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1188(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1184(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1180(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1176(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1168(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1164(%esp), %ebx
-	movl	1160(%esp), %edi
-	movl	1156(%esp), %esi
-	movl	1152(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1148(%esp), %edx
-	movl	1140(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1144(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1064(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1064(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	1132(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1128(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1124(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1120(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1116(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1112(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1108(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1100(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1096(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1092(%esp), %ebx
-	movl	1088(%esp), %edi
-	movl	1084(%esp), %esi
-	movl	1080(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1076(%esp), %edx
-	movl	1068(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1072(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	992(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1060(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1056(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1052(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1044(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1040(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1036(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1032(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1028(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1024(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1020(%esp), %ebx
-	movl	1016(%esp), %edi
-	movl	1012(%esp), %esi
-	movl	1008(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1004(%esp), %edx
-	movl	996(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1000(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	920(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	920(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	988(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	984(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	980(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	976(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	972(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	968(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	964(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	960(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	956(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	952(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	948(%esp), %ebx
-	movl	944(%esp), %edi
-	movl	940(%esp), %esi
-	movl	936(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	932(%esp), %edx
-	movl	924(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	928(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	848(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	848(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	916(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	912(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	908(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	904(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	900(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	896(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	892(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	888(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	884(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	880(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	876(%esp), %ebx
-	movl	872(%esp), %edi
-	movl	868(%esp), %esi
-	movl	864(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	860(%esp), %edx
-	movl	852(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	856(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	776(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	776(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	844(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	840(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	836(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	828(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	804(%esp), %ebx
-	movl	800(%esp), %edi
-	movl	796(%esp), %esi
-	movl	792(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	788(%esp), %edx
-	movl	780(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	784(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	704(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	772(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	764(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	760(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	748(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	744(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	740(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	736(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	732(%esp), %ebx
-	movl	728(%esp), %edi
-	movl	724(%esp), %esi
-	movl	720(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	716(%esp), %edx
-	movl	708(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	712(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 32(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	632(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	700(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	696(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	692(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	688(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	684(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	680(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	676(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	672(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	660(%esp), %ebx
-	movl	656(%esp), %edi
-	movl	652(%esp), %esi
-	movl	648(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	644(%esp), %edx
-	movl	636(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	640(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	560(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	628(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	624(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	620(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	616(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	588(%esp), %ebx
-	movl	584(%esp), %edi
-	movl	580(%esp), %esi
-	movl	576(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	572(%esp), %edx
-	movl	564(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	568(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	488(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	488(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	532(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	528(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	524(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	520(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	516(%esp), %ebx
-	movl	512(%esp), %edi
-	movl	508(%esp), %esi
-	movl	504(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	500(%esp), %edx
-	movl	492(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	496(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 44(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	416(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	472(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	468(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	464(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	460(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	456(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	452(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	448(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	444(%esp), %ebx
-	movl	440(%esp), %edi
-	movl	436(%esp), %esi
-	movl	432(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	428(%esp), %edx
-	movl	420(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	424(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 48(%eax)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	344(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	404(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	400(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	396(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	392(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	388(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	372(%esp), %ebx
-	movl	368(%esp), %edi
-	movl	364(%esp), %esi
-	movl	360(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	356(%esp), %edx
-	movl	348(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	352(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 52(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	1380(%esp), %eax
-	movl	%eax, %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	272(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	340(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	336(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	320(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	316(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	304(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	300(%esp), %ebx
-	movl	296(%esp), %edi
-	movl	292(%esp), %edx
-	movl	288(%esp), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	284(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	280(%esp), %ecx
-	movl	120(%esp), %esi         # 4-byte Reload
-	movl	1376(%esp), %ebp
-	movl	%esi, 56(%ebp)
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, %ebp
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %ecx
-	movl	%ecx, %eax
-	movl	60(%eax), %eax
-	movl	%eax, (%esp)
-	leal	200(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	200(%esp), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	248(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	244(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	240(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	236(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	232(%esp), %edi
-	movl	228(%esp), %esi
-	movl	224(%esp), %edx
-	movl	220(%esp), %ecx
-	movl	216(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	204(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	208(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	1376(%esp), %ebx
-	movl	%ebp, 60(%ebx)
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	movl	1384(%esp), %eax
-	movl	64(%eax), %eax
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	1380(%esp), %edx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	128(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	132(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	196(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	164(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	156(%esp), %ebx
-	movl	152(%esp), %edi
-	movl	148(%esp), %esi
-	movl	144(%esp), %edx
-	movl	140(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 64(%eax)
-	movl	64(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 68(%eax)
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 72(%eax)
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 76(%eax)
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 80(%eax)
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 84(%eax)
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 88(%eax)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, 92(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 96(%eax)
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 100(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	96(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 104(%eax)
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx         # 4-byte Folded Reload
-	movl	%ecx, 108(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	116(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 112(%eax)
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 116(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 120(%eax)
-	movl	%ecx, 124(%eax)
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 128(%eax)
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	$0, %ecx
-	movl	%ecx, 132(%eax)
-	addl	$1356, %esp             # imm = 0x54C
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	retl
-.Lfunc_end256:
-	.size	mcl_fpDbl_mulPre17L, .Lfunc_end256-mcl_fpDbl_mulPre17L
-
-	.globl	mcl_fpDbl_sqrPre17L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sqrPre17L,@function
-mcl_fpDbl_sqrPre17L:                    # @mcl_fpDbl_sqrPre17L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$1356, %esp             # imm = 0x54C
-	calll	.L257$pb
-.L257$pb:
-	popl	%ebx
-.Ltmp58:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp58-.L257$pb), %ebx
-	movl	%ebx, 124(%esp)         # 4-byte Spill
-	movl	1380(%esp), %edx
-	movl	(%edx), %eax
-	movl	%eax, (%esp)
-	leal	1280(%esp), %ecx
-	movl	%edx, %edi
-	movl	%ebx, %esi
-	calll	.LmulPv544x32
-	movl	1348(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1344(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1340(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1336(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1332(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1328(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1320(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1316(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1312(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1308(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1304(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1300(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	1296(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	1292(%esp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	1288(%esp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	1280(%esp), %eax
-	movl	1284(%esp), %ebp
-	movl	1376(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	%edi, %edx
-	movl	4(%edx), %eax
-	movl	%eax, (%esp)
-	leal	1208(%esp), %ecx
-	movl	%esi, %ebx
-	calll	.LmulPv544x32
-	addl	1208(%esp), %ebp
-	movl	%ebp, 8(%esp)           # 4-byte Spill
-	movl	1276(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1272(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1268(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1260(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1256(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	1252(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1248(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1244(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1236(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1232(%esp), %edi
-	movl	1228(%esp), %esi
-	movl	1224(%esp), %edx
-	movl	1220(%esp), %ecx
-	movl	1212(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1216(%esp), %eax
-	movl	1376(%esp), %ebp
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	%ebx, 4(%ebp)
-	movl	12(%esp), %ebp          # 4-byte Reload
-	adcl	%ebp, 120(%esp)         # 4-byte Folded Spill
-	adcl	16(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 20(%esp)          # 4-byte Spill
-	adcl	40(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 24(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	52(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 64(%esp)          # 4-byte Folded Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, 56(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	8(%edx), %eax
-	movl	%eax, (%esp)
-	leal	1136(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	1136(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1204(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1200(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1196(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1192(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1188(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1184(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1180(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1176(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1172(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1168(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1164(%esp), %ebx
-	movl	1160(%esp), %edi
-	movl	1156(%esp), %esi
-	movl	1152(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1148(%esp), %edx
-	movl	1140(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1144(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 8(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	80(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	72(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	56(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	12(%edx), %eax
-	movl	%eax, (%esp)
-	leal	1064(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1064(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	1132(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	1128(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1124(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	1120(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	1116(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	1112(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	1108(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	1104(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	1100(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	1096(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	1092(%esp), %ebx
-	movl	1088(%esp), %edi
-	movl	1084(%esp), %esi
-	movl	1080(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	1076(%esp), %edx
-	movl	1068(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1072(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 12(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	16(%edx), %eax
-	movl	%eax, (%esp)
-	leal	992(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	992(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	1060(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	1056(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	1052(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	1048(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	1044(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	1040(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	1036(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	1032(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	1028(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	1024(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	1020(%esp), %ebx
-	movl	1016(%esp), %edi
-	movl	1012(%esp), %esi
-	movl	1008(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	1004(%esp), %edx
-	movl	996(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	1000(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 16(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	20(%edx), %eax
-	movl	%eax, (%esp)
-	leal	920(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	920(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	988(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	984(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	980(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	976(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	972(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	968(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	964(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	960(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	956(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	952(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	948(%esp), %ebx
-	movl	944(%esp), %edi
-	movl	940(%esp), %esi
-	movl	936(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	932(%esp), %edx
-	movl	924(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	928(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 20(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	24(%edx), %eax
-	movl	%eax, (%esp)
-	leal	848(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	848(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	916(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	912(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	908(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	904(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	900(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	896(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	892(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	888(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	884(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	880(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	876(%esp), %ebx
-	movl	872(%esp), %edi
-	movl	868(%esp), %esi
-	movl	864(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	860(%esp), %edx
-	movl	852(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	856(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 24(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	28(%edx), %eax
-	movl	%eax, (%esp)
-	leal	776(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	776(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	844(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	840(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	836(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	832(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	828(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	824(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	820(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	816(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	812(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	808(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	804(%esp), %ebx
-	movl	800(%esp), %edi
-	movl	796(%esp), %esi
-	movl	792(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	788(%esp), %edx
-	movl	780(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	784(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 28(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	32(%edx), %eax
-	movl	%eax, (%esp)
-	leal	704(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	704(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	772(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	768(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	764(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	760(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	756(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	752(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	748(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	744(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	740(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	736(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	732(%esp), %ebx
-	movl	728(%esp), %edi
-	movl	724(%esp), %esi
-	movl	720(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	716(%esp), %edx
-	movl	708(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	712(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 32(%eax)
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 32(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 24(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 40(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	36(%edx), %eax
-	movl	%eax, (%esp)
-	leal	632(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	632(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	700(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	696(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	692(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	688(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	684(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	680(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	676(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	672(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	668(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	664(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	660(%esp), %ebx
-	movl	656(%esp), %edi
-	movl	652(%esp), %esi
-	movl	648(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	644(%esp), %edx
-	movl	636(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	640(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	52(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 36(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	32(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	24(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	40(%edx), %eax
-	movl	%eax, (%esp)
-	leal	560(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	560(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	628(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	624(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	620(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	616(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	612(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	608(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	604(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	600(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	596(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	592(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	588(%esp), %ebx
-	movl	584(%esp), %edi
-	movl	580(%esp), %esi
-	movl	576(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	572(%esp), %edx
-	movl	564(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	568(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 40(%eax)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 16(%esp)          # 4-byte Spill
-	adcl	52(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	44(%edx), %eax
-	movl	%eax, (%esp)
-	leal	488(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	488(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	556(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	552(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	548(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	544(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	540(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	536(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	532(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	528(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	524(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	520(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	516(%esp), %ebx
-	movl	512(%esp), %edi
-	movl	508(%esp), %esi
-	movl	504(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	500(%esp), %edx
-	movl	492(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	496(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 44(%eax)
-	movl	8(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	48(%edx), %eax
-	movl	%eax, (%esp)
-	leal	416(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	416(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	484(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	480(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	476(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	472(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	468(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	464(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	460(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	456(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	452(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	448(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	444(%esp), %ebx
-	movl	440(%esp), %edi
-	movl	436(%esp), %esi
-	movl	432(%esp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	428(%esp), %edx
-	movl	420(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	424(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 48(%eax)
-	movl	48(%esp), %ebp          # 4-byte Reload
-	adcl	4(%esp), %ebp           # 4-byte Folded Reload
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	52(%edx), %eax
-	movl	%eax, (%esp)
-	leal	344(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	344(%esp), %ebp
-	movl	%ebp, 48(%esp)          # 4-byte Spill
-	movl	412(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	408(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	404(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	400(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	396(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	392(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	388(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	384(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	380(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	376(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	372(%esp), %ebx
-	movl	368(%esp), %edi
-	movl	364(%esp), %esi
-	movl	360(%esp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	356(%esp), %edx
-	movl	348(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	352(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	48(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 52(%eax)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	%eax, 120(%esp)         # 4-byte Folded Spill
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 8(%esp)           # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 28(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 12(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 16(%esp)          # 4-byte Spill
-	adcl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	76(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	40(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	32(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, 36(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	56(%edx), %eax
-	movl	%eax, (%esp)
-	leal	272(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	272(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	340(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	336(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	332(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	328(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	324(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	320(%esp), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	316(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	312(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	308(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	304(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	300(%esp), %ebx
-	movl	296(%esp), %edi
-	movl	292(%esp), %edx
-	movl	288(%esp), %esi
-	movl	%esi, 24(%esp)          # 4-byte Spill
-	movl	284(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	276(%esp), %eax
-	movl	280(%esp), %ecx
-	movl	120(%esp), %esi         # 4-byte Reload
-	movl	1376(%esp), %ebp
-	movl	%esi, 56(%ebp)
-	adcl	4(%esp), %eax           # 4-byte Folded Reload
-	movl	%eax, %ebp
-	adcl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 24(%esp)          # 4-byte Folded Spill
-	adcl	16(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 16(%esp)          # 4-byte Spill
-	adcl	20(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 20(%esp)          # 4-byte Spill
-	adcl	52(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 28(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	68(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	44(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 48(%esp)          # 4-byte Folded Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, 32(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	60(%edx), %eax
-	movl	%eax, (%esp)
-	leal	200(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	200(%esp), %ebp
-	movl	%ebp, 12(%esp)          # 4-byte Spill
-	movl	268(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	264(%esp), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	260(%esp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	256(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	252(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	248(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	244(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	240(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	236(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	232(%esp), %edi
-	movl	228(%esp), %esi
-	movl	224(%esp), %edx
-	movl	220(%esp), %ecx
-	movl	216(%esp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	212(%esp), %eax
-	movl	204(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	208(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	1376(%esp), %ebx
-	movl	%ebp, 60(%ebx)
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	8(%esp), %ebp           # 4-byte Folded Reload
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	76(%esp), %ebp          # 4-byte Folded Reload
-	adcl	24(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 36(%esp)          # 4-byte Folded Spill
-	adcl	20(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	adcl	28(%esp), %edx          # 4-byte Folded Reload
-	movl	%edx, 28(%esp)          # 4-byte Spill
-	adcl	56(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	116(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	%eax, 40(%esp)          # 4-byte Folded Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	%eax, 44(%esp)          # 4-byte Folded Spill
-	adcl	$0, 52(%esp)            # 4-byte Folded Spill
-	movl	1380(%esp), %edx
-	movl	64(%edx), %eax
-	movl	%eax, (%esp)
-	leal	128(%esp), %ecx
-	movl	124(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	120(%esp), %eax         # 4-byte Reload
-	addl	128(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	132(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	136(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	196(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	176(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	172(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	168(%esp), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	164(%esp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	160(%esp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	156(%esp), %ebx
-	movl	152(%esp), %edi
-	movl	148(%esp), %esi
-	movl	144(%esp), %edx
-	movl	140(%esp), %ecx
-	movl	1376(%esp), %eax
-	movl	120(%esp), %ebp         # 4-byte Reload
-	movl	%ebp, 64(%eax)
-	movl	68(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 68(%eax)
-	adcl	36(%esp), %ecx          # 4-byte Folded Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	movl	%ebp, 72(%eax)
-	adcl	24(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 76(%eax)
-	adcl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%edx, 80(%eax)
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%esi, 84(%eax)
-	adcl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 88(%eax)
-	movl	20(%esp), %edx          # 4-byte Reload
-	adcl	72(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, 92(%eax)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	80(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 96(%eax)
-	movl	48(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 100(%eax)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	96(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 104(%eax)
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx         # 4-byte Folded Reload
-	movl	%ecx, 108(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	112(%esp), %ecx         # 4-byte Folded Reload
-	movl	%edx, 112(%eax)
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	40(%esp), %edx          # 4-byte Folded Reload
-	movl	%ecx, 116(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	44(%esp), %ecx          # 4-byte Folded Reload
-	movl	%edx, 120(%eax)
-	movl	%ecx, 124(%eax)
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%ecx, 128(%eax)
-	movl	124(%esp), %ecx         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	796(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	800(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	804(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	808(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	812(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	816(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	820(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	824(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	movzbl	%al, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	700(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	688(%esp), %esi
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	692(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	696(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	700(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	704(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	708(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	712(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	716(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	720(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	724(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	728(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	732(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	736(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	740(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	744(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	748(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	752(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	$0, 20(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	48(%eax)
+	pushl	2444(%esp)
+	leal	628(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	64(%esp), %edx                  # 4-byte Reload
+	addl	616(%esp), %edx
+	movl	68(%esp), %edi                  # 4-byte Reload
+	adcl	620(%esp), %edi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	624(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	628(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	632(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	636(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	640(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	644(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	648(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	652(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	adcl	656(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	660(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	664(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	668(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	672(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	676(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	680(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	556(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	544(%esp), %esi
+	adcl	548(%esp), %edi
+	movl	%edi, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	556(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	560(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	564(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	568(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	572(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	576(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	580(%esp), %edi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	584(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	588(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	592(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	596(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	600(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	604(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	608(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	$0, 64(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	52(%eax)
+	pushl	2444(%esp)
+	leal	484(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	68(%esp), %edx                  # 4-byte Reload
+	addl	472(%esp), %edx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	476(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	480(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	adcl	488(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	492(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	496(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	500(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	504(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	508(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	516(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	520(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	524(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	528(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	532(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	536(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	setb	%al
+	subl	$4, %esp
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	movzbl	%al, %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	412(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	400(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	404(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	408(%esp), %esi
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	412(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	416(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	420(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	424(%esp), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	432(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	436(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	440(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	444(%esp), %ebp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	448(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	452(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	456(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	460(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	464(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	adcl	$0, 68(%esp)                    # 4-byte Folded Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	56(%eax)
+	pushl	2444(%esp)
+	leal	340(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	12(%esp), %edx                  # 4-byte Reload
+	addl	328(%esp), %edx
+	adcl	332(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	340(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	356(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	360(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	364(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	%ebp, %edi
+	adcl	368(%esp), %edi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	372(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	376(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	380(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	384(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	388(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	392(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	leal	260(%esp), %eax
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	256(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	260(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	268(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	272(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	276(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	280(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	284(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	292(%esp), %ebp
+	adcl	296(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	300(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	304(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	308(%esp), %edi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	316(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	320(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movzbl	12(%esp), %eax                  # 1-byte Folded Reload
+	adcl	$0, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	188(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	60(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	56(%esp), %edx                  # 4-byte Reload
+	addl	184(%esp), %edx
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	188(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	192(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	196(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	204(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	208(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	212(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	216(%esp), %ebp
+	movl	%ebp, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	220(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	224(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	232(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	244(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	248(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	56(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	112(%esp), %esi
+	movzbl	56(%esp), %ecx                  # 1-byte Folded Reload
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	116(%esp), %eax
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	120(%esp), %esi
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	124(%esp), %ebx
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	128(%esp), %ebp
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	132(%esp), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	136(%esp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	adcl	140(%esp), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	144(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	148(%esp), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	152(%esp), %edi
+	movl	60(%esp), %edx                  # 4-byte Reload
+	adcl	156(%esp), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	160(%esp), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	164(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edx                  # 4-byte Reload
+	adcl	168(%esp), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edx                  # 4-byte Reload
+	adcl	172(%esp), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	176(%esp), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
 	adcl	$0, %ecx
-	movl	%ecx, 132(%eax)
-	addl	$1356, %esp             # imm = 0x54C
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	2444(%esp), %edx
+	subl	(%edx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	%esi, %eax
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	sbbl	4(%edx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	%ebx, %eax
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	sbbl	8(%edx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	%ebp, %eax
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	sbbl	12(%edx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	16(%edx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	20(%edx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%edx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%edx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	sbbl	32(%edx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	sbbl	36(%edx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%edx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	sbbl	44(%edx), %esi
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	sbbl	48(%edx), %ebx
+	movl	64(%esp), %eax                  # 4-byte Reload
+	sbbl	52(%edx), %eax
+	movl	%edx, %ebp
+	movl	68(%esp), %edx                  # 4-byte Reload
+	sbbl	56(%ebp), %edx
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	2444(%esp), %edi
+	sbbl	60(%edi), %ebp
+	sbbl	$0, %ecx
+	testb	$1, %cl
+	jne	.LBB77_1
+# %bb.2:
+	movl	2432(%esp), %ecx
+	movl	%ebp, 60(%ecx)
+	jne	.LBB77_3
+.LBB77_4:
+	movl	%edx, 56(%ecx)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	jne	.LBB77_5
+.LBB77_6:
+	movl	%eax, 52(%ecx)
+	jne	.LBB77_7
+.LBB77_8:
+	movl	%ebx, 48(%ecx)
+	movl	84(%esp), %eax                  # 4-byte Reload
+	jne	.LBB77_9
+.LBB77_10:
+	movl	%esi, 44(%ecx)
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	movl	76(%esp), %esi                  # 4-byte Reload
+	jne	.LBB77_11
+.LBB77_12:
+	movl	%esi, 40(%ecx)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	80(%esp), %edi                  # 4-byte Reload
+	jne	.LBB77_13
+.LBB77_14:
+	movl	%edi, 36(%ecx)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	jne	.LBB77_15
+.LBB77_16:
+	movl	72(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 32(%ecx)
+	jne	.LBB77_17
+.LBB77_18:
+	movl	%eax, 28(%ecx)
+	jne	.LBB77_19
+.LBB77_20:
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%ecx)
+	jne	.LBB77_21
+.LBB77_22:
+	movl	%edx, 20(%ecx)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	movl	104(%esp), %eax                 # 4-byte Reload
+	jne	.LBB77_23
+.LBB77_24:
+	movl	%ebx, 16(%ecx)
+	jne	.LBB77_25
+.LBB77_26:
+	movl	%edi, 12(%ecx)
+	jne	.LBB77_27
+.LBB77_28:
+	movl	%esi, 8(%ecx)
+	jne	.LBB77_29
+.LBB77_30:
+	movl	%eax, 4(%ecx)
+	je	.LBB77_32
+.LBB77_31:
+	movl	52(%esp), %edx                  # 4-byte Reload
+.LBB77_32:
+	movl	%edx, (%ecx)
+	addl	$2412, %esp                     # imm = 0x96C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end257:
-	.size	mcl_fpDbl_sqrPre17L, .Lfunc_end257-mcl_fpDbl_sqrPre17L
-
-	.globl	mcl_fp_mont17L
-	.align	16, 0x90
-	.type	mcl_fp_mont17L,@function
-mcl_fp_mont17L:                         # @mcl_fp_mont17L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2588, %esp             # imm = 0xA1C
-	calll	.L258$pb
-.L258$pb:
-	popl	%ebx
-.Ltmp59:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp59-.L258$pb), %ebx
-	movl	2620(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2512(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	2512(%esp), %ebp
-	movl	2516(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	%ebp, %eax
-	imull	%esi, %eax
-	movl	2580(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	2576(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	2572(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	2568(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	2564(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	2560(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2556(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2552(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2548(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	2544(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	2540(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2536(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	2532(%esp), %edi
-	movl	2528(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2524(%esp), %esi
-	movl	2520(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	2440(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	addl	2440(%esp), %ebp
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2444(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2448(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	2452(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2456(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2460(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2464(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2468(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2472(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2476(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2480(%esp), %eax
-	movl	%eax, %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2484(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2488(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2492(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2496(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2500(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	2504(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	2508(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	2616(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2368(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %ebp
-	movl	120(%esp), %ecx         # 4-byte Reload
-	addl	2368(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2372(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2376(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2380(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2384(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2388(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	2392(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2396(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2400(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	2404(%esp), %esi
-	movl	%esi, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	2408(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2412(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2416(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2420(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	2424(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	2428(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	2432(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	2436(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2296(%esp), %ecx
-	movl	2620(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	movl	116(%esp), %eax         # 4-byte Reload
-	andl	$1, %eax
-	addl	2296(%esp), %ebp
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	2300(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	2304(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2308(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2312(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	2316(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	2320(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	2324(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2328(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2332(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	2336(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	2340(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	2344(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	2348(%esp), %esi
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	2352(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	2356(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	2360(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	2364(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, %edi
-	movl	2616(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2224(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	100(%esp), %ecx         # 4-byte Reload
-	addl	2224(%esp), %ecx
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2228(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2232(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2236(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2240(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2244(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2248(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2252(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2256(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2260(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2268(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	2272(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
+.LBB77_1:
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	movl	2432(%esp), %ecx
+	movl	%ebp, 60(%ecx)
+	je	.LBB77_4
+.LBB77_3:
+	movl	68(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 56(%ecx)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	je	.LBB77_6
+.LBB77_5:
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 52(%ecx)
+	je	.LBB77_8
+.LBB77_7:
+	movl	20(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 48(%ecx)
+	movl	84(%esp), %eax                  # 4-byte Reload
+	je	.LBB77_10
+.LBB77_9:
+	movl	4(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 44(%ecx)
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	movl	76(%esp), %esi                  # 4-byte Reload
+	je	.LBB77_12
+.LBB77_11:
+	movl	60(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 40(%ecx)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	80(%esp), %edi                  # 4-byte Reload
+	je	.LBB77_14
+.LBB77_13:
+	movl	32(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 36(%ecx)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	je	.LBB77_16
+.LBB77_15:
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 72(%esp)                  # 4-byte Spill
+	movl	72(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 32(%ecx)
+	je	.LBB77_18
+.LBB77_17:
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%ecx)
+	je	.LBB77_20
+.LBB77_19:
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%ecx)
+	je	.LBB77_22
+.LBB77_21:
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 20(%ecx)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	movl	104(%esp), %eax                 # 4-byte Reload
+	je	.LBB77_24
+.LBB77_23:
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	movl	%ebx, 16(%ecx)
+	je	.LBB77_26
+.LBB77_25:
+	movl	44(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%ecx)
+	je	.LBB77_28
+.LBB77_27:
+	movl	36(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%ecx)
+	je	.LBB77_30
+.LBB77_29:
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ecx)
+	jne	.LBB77_31
+	jmp	.LBB77_32
+.Lfunc_end77:
+	.size	mcl_fp_mont16L, .Lfunc_end77-mcl_fp_mont16L
+                                        # -- End function
+	.globl	mcl_fp_montNF16L                # -- Begin function mcl_fp_montNF16L
+	.p2align	4, 0x90
+	.type	mcl_fp_montNF16L,@function
+mcl_fp_montNF16L:                       # @mcl_fp_montNF16L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$2412, %esp                     # imm = 0x96C
+	calll	.L78$pb
+.L78$pb:
+	popl	%ebx
+.Ltmp20:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp20-.L78$pb), %ebx
+	movl	2444(%esp), %eax
+	movl	-4(%eax), %esi
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	movl	2440(%esp), %ecx
+	subl	$4, %esp
+	leal	2348(%esp), %eax
+	pushl	(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	2344(%esp), %ebp
+	movl	2348(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	imull	%ebp, %eax
+	movl	2408(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	2404(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	2400(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	2396(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	2392(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	2388(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	2384(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	2380(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	2376(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	2372(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	2368(%esp), %edi
+	movl	2364(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	2360(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	2356(%esp), %esi
+	movl	2352(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	2276(%esp), %ecx
+	pushl	%eax
+	pushl	2452(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	2272(%esp), %ebp
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	2276(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	2280(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	2284(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	2288(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	adcl	2292(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2152(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	2152(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	2284(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2288(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	2292(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	2296(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	2300(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2304(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	2308(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2312(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	adcl	2316(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	2320(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	2324(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	2328(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	2332(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	2336(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	2204(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	4(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	2264(%esp), %eax
+	movl	64(%esp), %edx                  # 4-byte Reload
+	addl	2200(%esp), %edx
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	2204(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	2208(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	2212(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	2216(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	2220(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	2224(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	2228(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	2232(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	2236(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	2240(%esp), %esi
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	2244(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	2248(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	2252(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	2256(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	2260(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	%eax, %ebp
+	adcl	$0, %ebp
+	subl	$4, %esp
+	leal	2132(%esp), %eax
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	2128(%esp), %edi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	2132(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	2136(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	2140(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	2144(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	2148(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	2152(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	2156(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	2160(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	2164(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2168(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	2168(%esp), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	2172(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	2176(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	2180(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	2184(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2188(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2192(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2196(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2200(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebp         # 4-byte Reload
-	adcl	2204(%esp), %ebp
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	2208(%esp), %edi
-	movl	132(%esp), %esi         # 4-byte Reload
-	adcl	2212(%esp), %esi
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2216(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2220(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2080(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	112(%esp), %ecx         # 4-byte Reload
-	addl	2080(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2084(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2088(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2092(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2096(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2100(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2104(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2108(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2112(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2116(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2120(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2124(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	2128(%esp), %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	adcl	2132(%esp), %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	adcl	2136(%esp), %esi
-	movl	%esi, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	2140(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2144(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	2148(%esp), %esi
-	sbbl	%ebp, %ebp
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2008(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	%ebp, %eax
-	andl	$1, %eax
-	addl	2008(%esp), %edi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2012(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2016(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	2020(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	2024(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	2028(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2032(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2036(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	2040(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	2044(%esp), %edi
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	2048(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	2052(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	2056(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	2188(%esp), %esi
+	adcl	2192(%esp), %ebp
+	subl	$4, %esp
+	leal	2060(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	8(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	2120(%esp), %eax
+	movl	24(%esp), %edx                  # 4-byte Reload
+	addl	2056(%esp), %edx
+	movl	56(%esp), %ecx                  # 4-byte Reload
 	adcl	2060(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
 	adcl	2064(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
 	adcl	2068(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	2072(%esp), %ebp
-	adcl	2076(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	2072(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	2076(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	2080(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	2084(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	2088(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	2092(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	2096(%esp), %edi
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	2100(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	2104(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	2108(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	2112(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	adcl	2116(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1936(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	1936(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1940(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1944(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1948(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1952(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	1956(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1960(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1964(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1968(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1972(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1976(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1980(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1984(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
+	movl	%eax, %ebp
+	subl	$4, %esp
+	leal	1988(%esp), %eax
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1984(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	1988(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	1992(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	1996(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1996(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	2000(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	2004(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	sbbl	%ebp, %ebp
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1864(%esp), %ecx
-	movl	2620(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	andl	$1, %ebp
-	movl	%ebp, %ecx
-	addl	1864(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	2008(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	2012(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	2016(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	2020(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	2024(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	2028(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	2032(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	2036(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	2040(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	2044(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	2048(%esp), %ebp
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	1916(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	12(%ecx)
+	movl	2444(%esp), %ecx
+	pushl	%ecx
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	1976(%esp), %eax
+	movl	56(%esp), %edx                  # 4-byte Reload
+	addl	1912(%esp), %edx
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1916(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	1920(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1924(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1928(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	1932(%esp), %ebp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1936(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	1940(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	1944(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1948(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	1952(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1956(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	1960(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1964(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1968(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	1972(%esp), %edi
+	adcl	$0, %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	movl	2452(%esp), %ecx
+	pushl	%ecx
+	leal	1852(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1840(%esp), %esi
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1844(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1848(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1852(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1856(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	1860(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1864(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1868(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	1872(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	1876(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	1880(%esp), %edi
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1880(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
 	adcl	1884(%esp), %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1888(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	1888(%esp), %ebp
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	1892(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	1896(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1900(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1904(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1908(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1912(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ebp         # 4-byte Reload
-	adcl	1916(%esp), %ebp
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1920(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1924(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1928(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1932(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1792(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	1792(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1796(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1800(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	1804(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1808(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1812(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1816(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	1820(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1824(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1828(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1832(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1836(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	1840(%esp), %ebp
-	movl	%ebp, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1844(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	1848(%esp), %edi
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	1852(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1856(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1860(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1720(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %eax
-	movl	80(%esp), %ecx          # 4-byte Reload
-	addl	1720(%esp), %ecx
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1724(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1728(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1732(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1736(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1740(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1744(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1748(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1896(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	1900(%esp), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %edi                  # 4-byte Reload
+	adcl	1904(%esp), %edi
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	16(%eax)
+	pushl	2444(%esp)
+	leal	1780(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	1832(%esp), %ecx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	addl	1768(%esp), %eax
+	movl	60(%esp), %edx                  # 4-byte Reload
+	adcl	1772(%esp), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	1776(%esp), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	1780(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	1784(%esp), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	1788(%esp), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	1792(%esp), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	1796(%esp), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	1800(%esp), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	1804(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	adcl	1808(%esp), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	adcl	1812(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edx                  # 4-byte Reload
+	adcl	1816(%esp), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	1820(%esp), %ebp
+	movl	64(%esp), %edx                  # 4-byte Reload
+	adcl	1824(%esp), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	adcl	1828(%esp), %edi
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	movl	%ecx, %edi
+	adcl	$0, %edi
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%eax, %ecx
+	movl	%eax, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1708(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1696(%esp), %esi
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1700(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1704(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1708(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1712(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1716(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	1720(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1724(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1728(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1732(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1736(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1740(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1744(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	1748(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %esi                  # 4-byte Reload
 	adcl	1752(%esp), %esi
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1756(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1760(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	1764(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	1768(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1772(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	adcl	1776(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	adcl	1780(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1784(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	1788(%esp), %ebp
-	adcl	$0, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1648(%esp), %ecx
-	movl	2612(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	movl	92(%esp), %eax          # 4-byte Reload
-	addl	1648(%esp), %eax
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	1652(%esp), %edi
-	movl	68(%esp), %ecx          # 4-byte Reload
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1756(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	1760(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	20(%eax)
+	pushl	2444(%esp)
+	leal	1636(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	1688(%esp), %eax
+	movl	60(%esp), %edx                  # 4-byte Reload
+	addl	1624(%esp), %edx
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1628(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1632(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1636(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1640(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	1644(%esp), %ebp
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	1648(%esp), %edi
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1652(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
 	adcl	1656(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
 	adcl	1660(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
 	adcl	1664(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
 	adcl	1668(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
 	adcl	1672(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
 	adcl	1676(%esp), %esi
-	movl	%esi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
 	adcl	1680(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
 	adcl	1684(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	1688(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	1692(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1696(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1700(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1704(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1708(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	1712(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1716(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%eax, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1576(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	1576(%esp), %ebp
-	adcl	1580(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1564(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1552(%esp), %esi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1556(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1560(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1564(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1568(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	1572(%esp), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	adcl	1576(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1580(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	1584(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	1588(%esp), %ebp
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	1592(%esp), %edi
-	movl	84(%esp), %esi          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1588(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1592(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
 	adcl	1596(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	1600(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	1604(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	1608(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	1612(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1616(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1620(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1624(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1628(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1632(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1636(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1640(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1644(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1504(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	76(%esp), %ecx          # 4-byte Reload
-	addl	1504(%esp), %ecx
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1508(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	1512(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	adcl	1516(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	1520(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1524(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1528(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1532(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1536(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %edi         # 4-byte Reload
-	adcl	1540(%esp), %edi
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1544(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1548(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1552(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1556(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1560(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1564(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1568(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1572(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1432(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	76(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1432(%esp), %ebp
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1436(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1440(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1444(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1448(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1452(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1456(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ebp         # 4-byte Reload
-	adcl	1460(%esp), %ebp
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1464(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	adcl	1468(%esp), %edi
-	movl	%edi, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	1472(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %edi         # 4-byte Reload
-	adcl	1476(%esp), %edi
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1480(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	1616(%esp), %edi
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	24(%eax)
+	pushl	2444(%esp)
+	leal	1492(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	1544(%esp), %eax
+	movl	28(%esp), %edx                  # 4-byte Reload
+	addl	1480(%esp), %edx
+	movl	16(%esp), %ecx                  # 4-byte Reload
 	adcl	1484(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
 	adcl	1488(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1492(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	adcl	1496(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	1492(%esp), %ebp
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	1496(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
 	adcl	1500(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1504(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	1508(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1512(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1516(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	1520(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1524(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	1528(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	1532(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1536(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	1540(%esp), %edi
 	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1360(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	68(%esp), %ecx          # 4-byte Reload
-	addl	1360(%esp), %ecx
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1364(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1372(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1376(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1380(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1384(%esp), %ebp
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	1400(%esp), %edi
-	movl	%edi, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1404(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	1408(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1420(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1408(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	1412(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	1416(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	1420(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1424(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	1428(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1288(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	68(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1288(%esp), %edi
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1292(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1296(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1300(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1304(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1308(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	1312(%esp), %ebp
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1316(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	1320(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	1324(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1328(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1332(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	1336(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	1340(%esp), %edi
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1344(%esp), %esi
-	movl	80(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %esi                  # 4-byte Reload
+	adcl	1432(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1436(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	1440(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1444(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1448(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1452(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1456(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1460(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1464(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	1468(%esp), %edi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1472(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	28(%eax)
+	pushl	2444(%esp)
+	leal	1348(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	1400(%esp), %eax
+	movl	16(%esp), %edx                  # 4-byte Reload
+	addl	1336(%esp), %edx
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1340(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1344(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
 	adcl	1348(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
 	adcl	1352(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1356(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	1356(%esp), %esi
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	1360(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	1364(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1368(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1372(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1376(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	1380(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	adcl	1384(%esp), %ebp
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1388(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	1392(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1396(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1216(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	1216(%esp), %ecx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1220(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1228(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1232(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1236(%esp), %ebp
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebp         # 4-byte Reload
-	adcl	1240(%esp), %ebp
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1244(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1248(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1252(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1256(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1260(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	1264(%esp), %edi
-	movl	%edi, 112(%esp)         # 4-byte Spill
-	adcl	1268(%esp), %esi
-	movl	%esi, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %ecx
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1276(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1264(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1268(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	1272(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1276(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	1280(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1144(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	%edi, %eax
-	andl	$1, %eax
-	addl	1144(%esp), %esi
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1148(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1152(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1156(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %edi         # 4-byte Reload
-	adcl	1160(%esp), %edi
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1164(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	1168(%esp), %ebp
-	movl	%ebp, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	1172(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ecx         # 4-byte Reload
-	adcl	1176(%esp), %ecx
-	movl	%ecx, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %esi         # 4-byte Reload
-	adcl	1180(%esp), %esi
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1184(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1188(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1192(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	1284(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1288(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1292(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	1296(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1300(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1304(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1308(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	adcl	1312(%esp), %ebp
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1316(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1320(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1324(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1328(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	32(%eax)
+	pushl	2444(%esp)
+	leal	1204(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	1256(%esp), %eax
+	movl	8(%esp), %edx                   # 4-byte Reload
+	addl	1192(%esp), %edx
+	movl	12(%esp), %ecx                  # 4-byte Reload
 	adcl	1196(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
 	adcl	1200(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
 	adcl	1204(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	1208(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	1208(%esp), %esi
+	movl	%esi, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
 	adcl	1212(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1216(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	1220(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1224(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1228(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	1232(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	adcl	1236(%esp), %ebp
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	1240(%esp), %ebp
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	1244(%esp), %edi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1248(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1252(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1072(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	72(%esp), %ecx          # 4-byte Reload
-	addl	1072(%esp), %ecx
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1076(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	1080(%esp), %ebp
-	adcl	1084(%esp), %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1088(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	1092(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	1096(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	1100(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	1104(%esp), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1108(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	1112(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1116(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	1132(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1120(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	1124(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1128(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	1132(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	1136(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %edi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1000(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	72(%esp), %eax          # 4-byte Reload
-	andl	$1, %eax
-	addl	1000(%esp), %edi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1004(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	1008(%esp), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1012(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1016(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	1020(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	adcl	1024(%esp), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %ebp         # 4-byte Reload
-	adcl	1028(%esp), %ebp
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	1032(%esp), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	1036(%esp), %edi
-	adcl	1040(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1044(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1048(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	1140(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1144(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1148(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1152(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1156(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	1160(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1164(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	1168(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	adcl	1172(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	1176(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1180(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	1184(%esp), %ebp
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	36(%eax)
+	movl	2444(%esp), %eax
+	pushl	%eax
+	leal	1060(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	1112(%esp), %eax
+	movl	12(%esp), %edx                  # 4-byte Reload
+	addl	1048(%esp), %edx
+	movl	4(%esp), %ecx                   # 4-byte Reload
 	adcl	1052(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	1056(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	1056(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
 	adcl	1060(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1064(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	1064(%esp), %esi
+	movl	52(%esp), %ecx                  # 4-byte Reload
 	adcl	1068(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1072(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1076(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1080(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	1084(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	1088(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1092(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	1096(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	adcl	1100(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1104(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	1108(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
 	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	84(%esp), %ecx          # 4-byte Reload
-	addl	928(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	932(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	936(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	952(%esp), %ebp
-	movl	%ebp, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	960(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	adcl	980(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	984(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %edi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	988(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	976(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	980(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	984(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	988(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	992(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	996(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	856(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	856(%esp), %ebp
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	888(%esp), %ebp
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	896(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	912(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	924(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2616(%esp), %ecx
-	movl	%ecx, %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	2612(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	784(%esp), %ecx
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	788(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	812(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	820(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %edi          # 4-byte Reload
-	adcl	828(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	712(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	716(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %esi         # 4-byte Reload
-	adcl	728(%esp), %esi
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %ebp         # 4-byte Reload
-	adcl	736(%esp), %ebp
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	756(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	760(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	640(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	104(%esp), %ecx         # 4-byte Reload
-	addl	640(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	652(%esp), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	660(%esp), %ebp
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	680(%esp), %edi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1000(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	1004(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	1008(%esp), %esi
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	adcl	1012(%esp), %ebp
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1016(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1020(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	1024(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1028(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	1032(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	1036(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1040(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	40(%eax)
+	pushl	2444(%esp)
+	leal	916(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	968(%esp), %eax
+	movl	4(%esp), %edx                   # 4-byte Reload
+	addl	904(%esp), %edx
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	908(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	912(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	916(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	920(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	924(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	928(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	932(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	936(%esp), %ebp
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	940(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	944(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	948(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	952(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	956(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	960(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	964(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	844(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	832(%esp), %esi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	836(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	840(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	844(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	adcl	848(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	852(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edi                  # 4-byte Reload
+	adcl	856(%esp), %edi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	860(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %esi                  # 4-byte Reload
+	adcl	864(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	868(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	872(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	876(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	880(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	884(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	888(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	892(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	896(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	44(%eax)
+	pushl	2444(%esp)
+	leal	772(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	824(%esp), %eax
+	movl	48(%esp), %edx                  # 4-byte Reload
+	addl	760(%esp), %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	764(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	768(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	adcl	772(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	776(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	780(%esp), %edi
+	movl	%edi, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	784(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	788(%esp), %esi
+	movl	%esi, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	792(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	796(%esp), %ebp
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	800(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	804(%esp), %edi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	808(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	812(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	816(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	820(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	700(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	688(%esp), %esi
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	692(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	696(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	696(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	700(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	704(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	708(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	104(%esp), %ecx         # 4-byte Reload
-	andl	$1, %ecx
-	addl	568(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	712(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	716(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	720(%esp), %esi
+	adcl	724(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	728(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	732(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	736(%esp), %edi
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	740(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	744(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	748(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	752(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	48(%eax)
+	pushl	2444(%esp)
+	leal	628(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	680(%esp), %eax
+	movl	36(%esp), %edx                  # 4-byte Reload
+	addl	616(%esp), %edx
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	620(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	624(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	628(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	632(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	636(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	640(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	adcl	644(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	648(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	652(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	656(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	adcl	660(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	adcl	664(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	668(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	672(%esp), %ebp
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	676(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	556(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	544(%esp), %esi
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	552(%esp), %edi
+	movl	40(%esp), %esi                  # 4-byte Reload
+	adcl	556(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	560(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	564(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	568(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	572(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	576(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
 	adcl	580(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	584(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	588(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	592(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	596(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	600(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	600(%esp), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	604(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	608(%esp), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	612(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	616(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	adcl	624(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	108(%esp), %ecx         # 4-byte Reload
-	addl	496(%esp), %ecx
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	500(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	516(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	520(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	524(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	528(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	532(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	536(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	540(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %edi          # 4-byte Reload
-	adcl	544(%esp), %edi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	sbbl	%eax, %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	movl	108(%esp), %ecx         # 4-byte Reload
-	andl	$1, %ecx
-	addl	424(%esp), %esi
-	movl	124(%esp), %eax         # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	608(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	2444(%esp), %eax
+	pushl	52(%eax)
+	pushl	2444(%esp)
+	leal	484(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	536(%esp), %eax
+	movl	20(%esp), %edx                  # 4-byte Reload
+	addl	472(%esp), %edx
+	adcl	476(%esp), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	adcl	480(%esp), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	484(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	488(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	492(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	496(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	500(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	504(%esp), %ebp
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	508(%esp), %edi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	512(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	516(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	520(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	524(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	528(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	532(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	$0, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	leal	412(%esp), %eax
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	400(%esp), %esi
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	404(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	408(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	412(%esp), %esi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	416(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	420(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	428(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	432(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	436(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %esi         # 4-byte Reload
-	adcl	440(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	432(%esp), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	adcl	436(%esp), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	440(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	444(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	448(%esp), %edi
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	452(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
 	adcl	456(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	460(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	464(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	472(%esp), %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	476(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ebp          # 4-byte Reload
-	adcl	480(%esp), %ebp
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	60(%eax), %eax
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	124(%esp), %ecx         # 4-byte Reload
-	addl	352(%esp), %ecx
-	movl	128(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	332(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	56(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	52(%esp), %edx                  # 4-byte Reload
+	addl	328(%esp), %edx
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	332(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	336(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	340(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
 	adcl	356(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	360(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	364(%esp), %esi
-	movl	%esi, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	364(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	368(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %edi         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
 	adcl	372(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	376(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	380(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	384(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	388(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	392(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	400(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	404(%esp), %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	sbbl	%esi, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	2620(%esp), %edx
-	calll	.LmulPv544x32
-	andl	$1, %esi
-	movl	%esi, %ecx
-	addl	280(%esp), %ebp
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %esi         # 4-byte Reload
-	adcl	288(%esp), %esi
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	392(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	260(%esp), %eax
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	256(%esp), %esi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	260(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	264(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	268(%esp), %esi
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	272(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	276(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	280(%esp), %ebp
+	movl	60(%esp), %edi                  # 4-byte Reload
+	adcl	284(%esp), %edi
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	288(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	292(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	116(%esp), %ebp         # 4-byte Reload
-	adcl	296(%esp), %ebp
-	adcl	300(%esp), %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	296(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	300(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	304(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %edi          # 4-byte Reload
-	adcl	308(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	312(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	316(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	320(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	328(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	332(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	2616(%esp), %eax
-	movl	64(%eax), %eax
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	2612(%esp), %edx
-	calll	.LmulPv544x32
-	movl	128(%esp), %ecx         # 4-byte Reload
-	addl	208(%esp), %ecx
-	adcl	212(%esp), %esi
-	movl	%esi, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	220(%esp), %ebp
-	movl	%ebp, 116(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	224(%esp), %ebp
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	232(%esp), %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	264(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	sbbl	%edi, %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	136(%esp), %ecx
-	movl	2620(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	andl	$1, %edi
-	addl	136(%esp), %esi
-	movl	116(%esp), %edx         # 4-byte Reload
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	140(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	144(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	adcl	148(%esp), %edx
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	adcl	152(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	156(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	160(%esp), %eax
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	164(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	168(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	188(%esp), %eax
+	movl	2444(%esp), %ecx
+	pushl	60(%ecx)
+	pushl	2444(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	movl	40(%esp), %edx                  # 4-byte Reload
+	addl	184(%esp), %edx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	188(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	192(%esp), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	adcl	196(%esp), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	200(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	204(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	adcl	208(%esp), %edi
+	movl	%edi, 60(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	212(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	216(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	220(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	224(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	228(%esp), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	232(%esp), %ebp
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	236(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	240(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	244(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	248(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	leal	116(%esp), %eax
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%edx, %ecx
+	movl	%edx, %esi
+	pushl	%ecx
+	pushl	2452(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	112(%esp), %esi
+	movl	64(%esp), %edx                  # 4-byte Reload
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	116(%esp), %eax
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	120(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	adcl	124(%esp), %edx
+	movl	56(%esp), %edi                  # 4-byte Reload
+	adcl	128(%esp), %edi
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	132(%esp), %esi
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	adcl	136(%esp), %ebx
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	140(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	144(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	148(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	152(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	156(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	160(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	164(%esp), %ebp
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	168(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
 	adcl	172(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
 	adcl	176(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	180(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	184(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	188(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	192(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	196(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	200(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	124(%esp), %ecx         # 4-byte Reload
-	adcl	204(%esp), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	adcl	$0, %edi
-	movl	132(%esp), %ecx         # 4-byte Reload
-	movl	2620(%esp), %ebx
-	subl	(%ebx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	120(%esp), %ecx         # 4-byte Reload
-	sbbl	4(%ebx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	sbbl	8(%ebx), %edx
-	movl	%edx, 20(%esp)          # 4-byte Spill
-	sbbl	12(%ebx), %ebp
-	movl	%ebp, 24(%esp)          # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	movl	%eax, %edx
-	sbbl	16(%ebx), %ebp
-	movl	%ebp, 28(%esp)          # 4-byte Spill
-	sbbl	20(%ebx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	sbbl	24(%ebx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	sbbl	28(%ebx), %esi
-	movl	%esi, 40(%esp)          # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	sbbl	32(%ebx), %esi
-	movl	%esi, 44(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	sbbl	36(%ebx), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	sbbl	40(%ebx), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	movl	72(%esp), %esi          # 4-byte Reload
-	sbbl	44(%ebx), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	sbbl	48(%ebx), %esi
-	movl	%esi, 60(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	sbbl	52(%ebx), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	104(%esp), %esi         # 4-byte Reload
-	sbbl	56(%ebx), %esi
-	movl	%esi, 128(%esp)         # 4-byte Spill
-	movl	%ebx, %ebp
-	movl	108(%esp), %ebx         # 4-byte Reload
-	sbbl	60(%ebp), %ebx
-	movl	124(%esp), %esi         # 4-byte Reload
-	sbbl	64(%ebp), %esi
-	movl	%esi, %ebp
-	sbbl	$0, %edi
-	andl	$1, %edi
-	jne	.LBB258_2
-# BB#1:
-	movl	%ebx, 108(%esp)         # 4-byte Spill
-.LBB258_2:
-	movl	%edi, %ebx
-	testb	%bl, %bl
-	movl	132(%esp), %ebx         # 4-byte Reload
-	jne	.LBB258_4
-# BB#3:
-	movl	12(%esp), %ebx          # 4-byte Reload
-.LBB258_4:
-	movl	2608(%esp), %eax
-	movl	%ebx, (%eax)
-	movl	120(%esp), %ebx         # 4-byte Reload
-	jne	.LBB258_6
-# BB#5:
-	movl	16(%esp), %ebx          # 4-byte Reload
-.LBB258_6:
-	movl	%ebx, 4(%eax)
-	jne	.LBB258_8
-# BB#7:
-	movl	20(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-.LBB258_8:
-	movl	116(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 8(%eax)
-	jne	.LBB258_10
-# BB#9:
-	movl	24(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-.LBB258_10:
-	movl	100(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 12(%eax)
-	movl	112(%esp), %esi         # 4-byte Reload
-	jne	.LBB258_12
-# BB#11:
-	movl	28(%esp), %esi          # 4-byte Reload
-.LBB258_12:
-	movl	%esi, 16(%eax)
-	movl	80(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_14
-# BB#13:
-	movl	32(%esp), %edx          # 4-byte Reload
-.LBB258_14:
-	movl	%edx, 20(%eax)
-	jne	.LBB258_16
-# BB#15:
-	movl	36(%esp), %ecx          # 4-byte Reload
-.LBB258_16:
-	movl	%ecx, 24(%eax)
-	movl	92(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_18
-# BB#17:
-	movl	40(%esp), %ecx          # 4-byte Reload
-.LBB258_18:
-	movl	%ecx, 28(%eax)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_20
-# BB#19:
-	movl	44(%esp), %ecx          # 4-byte Reload
-.LBB258_20:
-	movl	%ecx, 32(%eax)
-	movl	68(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_22
-# BB#21:
-	movl	48(%esp), %ecx          # 4-byte Reload
-.LBB258_22:
-	movl	%ecx, 36(%eax)
-	movl	64(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_24
-# BB#23:
-	movl	52(%esp), %ecx          # 4-byte Reload
-.LBB258_24:
-	movl	%ecx, 40(%eax)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_26
-# BB#25:
-	movl	56(%esp), %ecx          # 4-byte Reload
-.LBB258_26:
-	movl	%ecx, 44(%eax)
-	movl	84(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_28
-# BB#27:
-	movl	60(%esp), %ecx          # 4-byte Reload
-.LBB258_28:
-	movl	%ecx, 48(%eax)
-	movl	96(%esp), %ecx          # 4-byte Reload
-	jne	.LBB258_30
-# BB#29:
-	movl	88(%esp), %ecx          # 4-byte Reload
-.LBB258_30:
-	movl	%ecx, 52(%eax)
-	movl	104(%esp), %ecx         # 4-byte Reload
-	jne	.LBB258_32
-# BB#31:
-	movl	128(%esp), %ecx         # 4-byte Reload
-.LBB258_32:
-	movl	%ecx, 56(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 60(%eax)
-	movl	124(%esp), %ecx         # 4-byte Reload
-	jne	.LBB258_34
-# BB#33:
-	movl	%ebp, %ecx
-.LBB258_34:
-	movl	%ecx, 64(%eax)
-	addl	$2588, %esp             # imm = 0xA1C
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	2444(%esp), %ecx
+	subl	(%ecx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	4(%ecx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	sbbl	8(%ecx), %edx
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	sbbl	12(%ecx), %edi
+	movl	%edi, 96(%esp)                  # 4-byte Spill
+	movl	%esi, 32(%esp)                  # 4-byte Spill
+	sbbl	16(%ecx), %esi
+	movl	%esi, 92(%esp)                  # 4-byte Spill
+	movl	%ebx, %eax
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	sbbl	20(%ecx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%ecx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	32(%ecx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ecx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	40(%ecx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	sbbl	44(%ecx), %eax
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	%ebp, %edx
+	sbbl	48(%ecx), %edx
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	sbbl	52(%ecx), %ebp
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	sbbl	56(%ecx), %ebx
+	movl	40(%esp), %edi                  # 4-byte Reload
+	movl	%edi, %esi
+	sbbl	60(%ecx), %edi
+	testl	%edi, %edi
+	js	.LBB78_1
+# %bb.2:
+	movl	2432(%esp), %ecx
+	movl	%edi, 60(%ecx)
+	js	.LBB78_3
+.LBB78_4:
+	movl	%ebx, 56(%ecx)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	movl	84(%esp), %esi                  # 4-byte Reload
+	js	.LBB78_5
+.LBB78_6:
+	movl	%ebp, 52(%ecx)
+	js	.LBB78_7
+.LBB78_8:
+	movl	%edx, 48(%ecx)
+	movl	88(%esp), %ebp                  # 4-byte Reload
+	js	.LBB78_9
+.LBB78_10:
+	movl	%eax, 44(%ecx)
+	movl	104(%esp), %ebx                 # 4-byte Reload
+	movl	80(%esp), %eax                  # 4-byte Reload
+	js	.LBB78_11
+.LBB78_12:
+	movl	%eax, 40(%ecx)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	js	.LBB78_13
+.LBB78_14:
+	movl	%edi, %eax
+	movl	72(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 36(%ecx)
+	js	.LBB78_15
+.LBB78_16:
+	movl	76(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 32(%ecx)
+	js	.LBB78_17
+.LBB78_18:
+	movl	%esi, 28(%ecx)
+	movl	%eax, %edi
+	js	.LBB78_19
+.LBB78_20:
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 24(%ecx)
+	movl	%edx, %eax
+	js	.LBB78_21
+.LBB78_22:
+	movl	%ebp, 20(%ecx)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	92(%esp), %edx                  # 4-byte Reload
+	js	.LBB78_23
+.LBB78_24:
+	movl	%edx, 16(%ecx)
+	js	.LBB78_25
+.LBB78_26:
+	movl	%edi, 12(%ecx)
+	js	.LBB78_27
+.LBB78_28:
+	movl	%esi, 8(%ecx)
+	js	.LBB78_29
+.LBB78_30:
+	movl	%ebx, 4(%ecx)
+	jns	.LBB78_32
+.LBB78_31:
+	movl	44(%esp), %eax                  # 4-byte Reload
+.LBB78_32:
+	movl	%eax, (%ecx)
+	addl	$2412, %esp                     # imm = 0x96C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end258:
-	.size	mcl_fp_mont17L, .Lfunc_end258-mcl_fp_mont17L
-
-	.globl	mcl_fp_montNF17L
-	.align	16, 0x90
-	.type	mcl_fp_montNF17L,@function
-mcl_fp_montNF17L:                       # @mcl_fp_montNF17L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$2572, %esp             # imm = 0xA0C
-	calll	.L259$pb
-.L259$pb:
-	popl	%ebx
-.Ltmp60:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp60-.L259$pb), %ebx
-	movl	2604(%esp), %eax
-	movl	-4(%eax), %esi
-	movl	%esi, 48(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2496(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	2496(%esp), %edi
-	movl	2500(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%edi, %eax
-	imull	%esi, %eax
-	movl	2564(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	2560(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	2556(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	2552(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	2548(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	2544(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	2540(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	2536(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	2532(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	2528(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	2524(%esp), %ebp
-	movl	2520(%esp), %esi
-	movl	2516(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	2512(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	2508(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	2504(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	%eax, (%esp)
-	leal	2424(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	2424(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2428(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2432(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2436(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2440(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2444(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	2448(%esp), %esi
-	movl	%esi, 80(%esp)          # 4-byte Spill
-	adcl	2452(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	2456(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2460(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2464(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2468(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %esi          # 4-byte Reload
-	adcl	2472(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2476(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	2480(%esp), %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2484(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2488(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2492(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	4(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2352(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	2420(%esp), %ecx
-	movl	112(%esp), %edx         # 4-byte Reload
-	addl	2352(%esp), %edx
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2356(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2360(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2364(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2368(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2372(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2376(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	2380(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2384(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2388(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2392(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	2396(%esp), %esi
-	movl	%esi, %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2400(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2404(%esp), %edi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2408(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	2412(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2416(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	$0, %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2280(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	2280(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2284(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2288(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2292(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2296(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2300(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2304(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	2308(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2312(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	2316(%esp), %esi
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2320(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	2324(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2328(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	2332(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	2336(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	2340(%esp), %ebp
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	2344(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2348(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	8(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2208(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	2276(%esp), %eax
-	movl	92(%esp), %edx          # 4-byte Reload
-	addl	2208(%esp), %edx
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	2212(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	2216(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	2220(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2224(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	2228(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	2232(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	2236(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	2240(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2244(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2248(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2252(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	2256(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	2260(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	2264(%esp), %ebp
-	adcl	2268(%esp), %edi
-	movl	%edi, %esi
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	2272(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	2136(%esp), %ecx
-	movl	2604(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	addl	2136(%esp), %edi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2140(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	2144(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2148(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2152(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2156(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	2160(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	2164(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2168(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2172(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2176(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2180(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2184(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %edi         # 4-byte Reload
-	adcl	2188(%esp), %edi
-	adcl	2192(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	adcl	2196(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	2200(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	2204(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	12(%eax), %eax
-	movl	%eax, (%esp)
-	leal	2064(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	2132(%esp), %eax
-	movl	104(%esp), %edx         # 4-byte Reload
-	addl	2064(%esp), %edx
-	movl	76(%esp), %ecx          # 4-byte Reload
-	adcl	2068(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	2072(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	2076(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	2080(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	2084(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ebp          # 4-byte Reload
-	adcl	2088(%esp), %ebp
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	2092(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	2096(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	2100(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	2104(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	2108(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	adcl	2112(%esp), %edi
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	2116(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	2120(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	adcl	2124(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	2128(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	%edx, %esi
-	movl	%esi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1992(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1992(%esp), %esi
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1996(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	2000(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	2004(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	2008(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	2012(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	adcl	2016(%esp), %ebp
-	movl	%ebp, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	2020(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	2024(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	2028(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	2032(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	2036(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	2040(%esp), %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	2044(%esp), %esi
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	2048(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	2052(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %edi          # 4-byte Reload
-	adcl	2056(%esp), %edi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	2060(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	16(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1920(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1988(%esp), %eax
-	movl	76(%esp), %edx          # 4-byte Reload
-	addl	1920(%esp), %edx
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1924(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1928(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1932(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	1936(%esp), %ebp
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1940(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1944(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1948(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1952(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1956(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1960(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1964(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	adcl	1968(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1972(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	1976(%esp), %esi
-	adcl	1980(%esp), %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1984(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	%edx, %eax
-	movl	%edx, %edi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1848(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1848(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1852(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1856(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1860(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1864(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1868(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1872(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1876(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1880(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1884(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1888(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1892(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1896(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1900(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	1904(%esp), %esi
-	movl	%esi, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	1908(%esp), %ebp
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1912(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1916(%esp), %eax
+.LBB78_1:
+	movl	%esi, %edi
+	movl	2432(%esp), %ecx
+	movl	%edi, 60(%ecx)
+	jns	.LBB78_4
+.LBB78_3:
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 56(%ecx)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	movl	84(%esp), %esi                  # 4-byte Reload
+	jns	.LBB78_6
+.LBB78_5:
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 52(%ecx)
+	jns	.LBB78_8
+.LBB78_7:
+	movl	36(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 48(%ecx)
+	movl	88(%esp), %ebp                  # 4-byte Reload
+	jns	.LBB78_10
+.LBB78_9:
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%ecx)
+	movl	104(%esp), %ebx                 # 4-byte Reload
+	movl	80(%esp), %eax                  # 4-byte Reload
+	jns	.LBB78_12
+.LBB78_11:
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 40(%ecx)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	jns	.LBB78_14
+.LBB78_13:
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%edi, %eax
+	movl	72(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 36(%ecx)
+	jns	.LBB78_16
+.LBB78_15:
+	movl	8(%esp), %edi                   # 4-byte Reload
+	movl	%edi, 76(%esp)                  # 4-byte Spill
+	movl	76(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 32(%ecx)
+	jns	.LBB78_18
+.LBB78_17:
+	movl	16(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 28(%ecx)
 	movl	%eax, %edi
-	movl	2600(%esp), %eax
-	movl	20(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1776(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1844(%esp), %eax
-	movl	84(%esp), %edx          # 4-byte Reload
-	addl	1776(%esp), %edx
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1780(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1784(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1788(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1792(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1796(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1800(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1804(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	1808(%esp), %esi
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1812(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1816(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1820(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1824(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1828(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	adcl	1832(%esp), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1836(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	1840(%esp), %edi
-	adcl	$0, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1704(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1704(%esp), %ebp
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1708(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1712(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1716(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1720(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1724(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1728(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1732(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	1736(%esp), %esi
-	movl	%esi, %ebp
-	movl	96(%esp), %esi          # 4-byte Reload
-	adcl	1740(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1744(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1748(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1752(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1756(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1760(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1764(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1768(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1772(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	24(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1632(%esp), %ecx
-	movl	2596(%esp), %eax
-	movl	%eax, %edx
-	calll	.LmulPv544x32
-	movl	1700(%esp), %eax
-	movl	80(%esp), %edx          # 4-byte Reload
-	addl	1632(%esp), %edx
-	movl	60(%esp), %ecx          # 4-byte Reload
-	adcl	1636(%esp), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ecx          # 4-byte Reload
-	adcl	1640(%esp), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1644(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ecx          # 4-byte Reload
-	adcl	1648(%esp), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1652(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1656(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	1660(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	adcl	1664(%esp), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1668(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	1672(%esp), %esi
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1676(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1680(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1684(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1688(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	adcl	1692(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1696(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	%edx, %edi
-	movl	%edi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1560(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1560(%esp), %edi
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1564(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1568(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1572(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1576(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edi          # 4-byte Reload
-	adcl	1580(%esp), %edi
-	movl	64(%esp), %ebp          # 4-byte Reload
-	adcl	1584(%esp), %ebp
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1588(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1592(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1596(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	1600(%esp), %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1604(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %esi         # 4-byte Reload
-	adcl	1608(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1612(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1616(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1620(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1624(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1628(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	28(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1488(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1556(%esp), %eax
-	movl	60(%esp), %ecx          # 4-byte Reload
-	addl	1488(%esp), %ecx
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1492(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	1496(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	1500(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	1504(%esp), %edi
-	adcl	1508(%esp), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1512(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	1516(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %edx         # 4-byte Reload
-	adcl	1520(%esp), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	1524(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %edx         # 4-byte Reload
-	adcl	1528(%esp), %edx
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	adcl	1532(%esp), %esi
-	movl	%esi, %ebp
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	1536(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	1540(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	1544(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1548(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	1552(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	%ecx, %esi
+	jns	.LBB78_20
+.LBB78_19:
+	movl	28(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 68(%esp)                  # 4-byte Spill
+	movl	68(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 24(%ecx)
+	movl	%edx, %eax
+	jns	.LBB78_22
+.LBB78_21:
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%ecx)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	92(%esp), %edx                  # 4-byte Reload
+	jns	.LBB78_24
+.LBB78_23:
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 16(%ecx)
+	jns	.LBB78_26
+.LBB78_25:
+	movl	56(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%ecx)
+	jns	.LBB78_28
+.LBB78_27:
+	movl	64(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%ecx)
+	jns	.LBB78_30
+.LBB78_29:
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 4(%ecx)
+	js	.LBB78_31
+	jmp	.LBB78_32
+.Lfunc_end78:
+	.size	mcl_fp_montNF16L, .Lfunc_end78-mcl_fp_montNF16L
+                                        # -- End function
+	.globl	mcl_fp_montRed16L               # -- Begin function mcl_fp_montRed16L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRed16L,@function
+mcl_fp_montRed16L:                      # @mcl_fp_montRed16L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$1292, %esp                     # imm = 0x50C
+	calll	.L79$pb
+.L79$pb:
+	popl	%ebx
+.Ltmp21:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp21-.L79$pb), %ebx
+	movl	1320(%esp), %ecx
+	movl	60(%ecx), %eax
+	movl	%eax, 140(%esp)                 # 4-byte Spill
+	movl	56(%ecx), %eax
+	movl	%eax, 136(%esp)                 # 4-byte Spill
+	movl	52(%ecx), %eax
+	movl	%eax, 132(%esp)                 # 4-byte Spill
+	movl	48(%ecx), %eax
+	movl	%eax, 128(%esp)                 # 4-byte Spill
+	movl	44(%ecx), %eax
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	movl	36(%ecx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	32(%ecx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	28(%ecx), %eax
+	movl	%eax, 124(%esp)                 # 4-byte Spill
+	movl	24(%ecx), %eax
+	movl	%eax, 120(%esp)                 # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	8(%ecx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
+	movl	%ecx, %edx
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	movl	60(%eax), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	56(%eax), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	52(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	48(%eax), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	44(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	28(%eax), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	16(%eax), %edi
+	movl	12(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	8(%eax), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	(%eax), %esi
+	movl	4(%eax), %ebp
+	movl	-4(%edx), %ecx
+	movl	%ecx, 72(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	subl	$4, %esp
 	movl	%esi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1416(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1416(%esp), %esi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1424(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %esi          # 4-byte Reload
-	adcl	1428(%esp), %esi
-	adcl	1432(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edi          # 4-byte Reload
-	adcl	1436(%esp), %edi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1440(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1444(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1448(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1452(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1456(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	1460(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1464(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1468(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1472(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1476(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1480(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
-	adcl	1484(%esp), %ebp
-	movl	2600(%esp), %eax
-	movl	32(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1344(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1412(%esp), %eax
-	movl	52(%esp), %edx          # 4-byte Reload
-	addl	1344(%esp), %edx
-	movl	56(%esp), %ecx          # 4-byte Reload
-	adcl	1348(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	1352(%esp), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1356(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	adcl	1360(%esp), %edi
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1364(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1368(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1372(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1376(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1380(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	1384(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
-	adcl	1388(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	adcl	1392(%esp), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %esi          # 4-byte Reload
-	adcl	1396(%esp), %esi
-	movl	84(%esp), %ecx          # 4-byte Reload
-	adcl	1400(%esp), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1404(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	1408(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	adcl	$0, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1272(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1272(%esp), %ebp
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	1288(%esp), %edi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1292(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	1304(%esp), %ebp
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1312(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1320(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	1324(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1328(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1336(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	36(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1200(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1268(%esp), %eax
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	1200(%esp), %ecx
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	1204(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	1208(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	adcl	1212(%esp), %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	1216(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	1220(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %esi         # 4-byte Reload
-	adcl	1224(%esp), %esi
+	imull	%ecx, %eax
+	leal	1228(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1224(%esp), %esi
 	adcl	1228(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	1232(%esp), %edi
-	movl	112(%esp), %edx         # 4-byte Reload
-	adcl	1236(%esp), %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %edx          # 4-byte Reload
-	adcl	1240(%esp), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	1244(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	1248(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	1252(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	1256(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	1260(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	1264(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1232(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	1236(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	1240(%esp), %edi
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1244(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	1248(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	1252(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	1256(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1260(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	1264(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	1268(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	1272(%esp), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1276(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	1280(%esp), %edi
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	1284(%esp), %esi
+	movl	1316(%esp), %eax
+	movl	64(%eax), %eax
+	adcl	1288(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%ebp, %eax
+	leal	1156(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	1216(%esp), %eax
 	adcl	$0, %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1128(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	1128(%esp), %ebp
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1144(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1148(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	1152(%esp), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	1160(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	1172(%esp), %ebp
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %esi          # 4-byte Reload
-	adcl	1180(%esp), %esi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %edi          # 4-byte Reload
-	adcl	1188(%esp), %edi
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	40(%eax), %eax
-	movl	%eax, (%esp)
-	leal	1056(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	1124(%esp), %edx
-	movl	68(%esp), %eax          # 4-byte Reload
-	addl	1056(%esp), %eax
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1060(%esp), %ecx
-	movl	%ecx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %ecx          # 4-byte Reload
-	adcl	1064(%esp), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
-	adcl	1068(%esp), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
-	adcl	1072(%esp), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	adcl	1076(%esp), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
-	adcl	1080(%esp), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	adcl	1084(%esp), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
+	addl	1152(%esp), %ebp
+	movl	56(%esp), %edx                  # 4-byte Reload
+	adcl	1156(%esp), %edx
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	1160(%esp), %ebp
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	1164(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1168(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1172(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	1176(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1180(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1184(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1188(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1192(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	adcl	1196(%esp), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1200(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	1204(%esp), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	adcl	1208(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %esi                  # 4-byte Reload
+	adcl	1212(%esp), %esi
+	movl	1316(%esp), %ecx
+	adcl	68(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	60(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	1084(%esp), %ecx
+	pushl	%eax
+	movl	1328(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 60(%esp)                  # 1-byte Folded Spill
+	movl	1144(%esp), %eax
+	adcl	$0, %eax
+	addl	1080(%esp), %edi
+	adcl	1084(%esp), %ebp
+	movl	%ebp, %edx
+	movl	52(%esp), %ecx                  # 4-byte Reload
 	adcl	1088(%esp), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
 	adcl	1092(%esp), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	adcl	1096(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1096(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
 	adcl	1100(%esp), %ecx
-	movl	%ecx, 76(%esp)          # 4-byte Spill
-	adcl	1104(%esp), %esi
-	movl	%esi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1104(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
 	adcl	1108(%esp), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	adcl	1112(%esp), %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edi          # 4-byte Reload
-	adcl	1116(%esp), %edi
-	movl	56(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1112(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1116(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ecx                  # 4-byte Reload
 	adcl	1120(%esp), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	%eax, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	984(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	984(%esp), %esi
-	movl	72(%esp), %esi          # 4-byte Reload
-	adcl	988(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %ebp          # 4-byte Reload
-	adcl	996(%esp), %ebp
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	1000(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1004(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	1044(%esp), %edi
-	movl	%edi, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	44(%eax), %eax
-	movl	%eax, (%esp)
-	leal	912(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	980(%esp), %eax
-	addl	912(%esp), %esi
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	916(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	adcl	920(%esp), %ebp
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edi          # 4-byte Reload
-	adcl	924(%esp), %edi
-	movl	108(%esp), %edx         # 4-byte Reload
-	adcl	928(%esp), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	932(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %edx         # 4-byte Reload
-	adcl	936(%esp), %edx
-	movl	%edx, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %edx         # 4-byte Reload
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	1124(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ebp                  # 4-byte Reload
+	adcl	1128(%esp), %ebp
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1132(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	1136(%esp), %esi
+	movl	%esi, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %esi                  # 4-byte Reload
+	adcl	1140(%esp), %esi
+	movl	1316(%esp), %ecx
+	adcl	72(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %eax
+	leal	1012(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 20(%esp)                  # 1-byte Folded Spill
+	movl	1072(%esp), %eax
+	adcl	$0, %eax
+	addl	1008(%esp), %edi
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	1012(%esp), %edx
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1016(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1020(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	1024(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1028(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1032(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1036(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1040(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	adcl	1044(%esp), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	1048(%esp), %edi
+	adcl	1052(%esp), %ebp
+	movl	%ebp, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1056(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	1060(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	adcl	1064(%esp), %esi
+	movl	%esi, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	1068(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %ecx
+	adcl	76(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %eax
+	leal	940(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	1000(%esp), %eax
+	adcl	$0, %eax
+	addl	936(%esp), %esi
+	movl	40(%esp), %edx                  # 4-byte Reload
 	adcl	940(%esp), %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	944(%esp), %ebp
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	948(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	952(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	956(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	960(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
-	adcl	964(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	968(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	972(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	976(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	944(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	948(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	952(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	956(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	960(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	964(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ebp                  # 4-byte Reload
+	adcl	968(%esp), %ebp
+	adcl	972(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	976(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	980(%esp), %esi
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	984(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	988(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	992(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	996(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %ecx
+	adcl	80(%ecx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	868(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	928(%esp), %eax
 	adcl	$0, %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	%esi, %eax
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	840(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	840(%esp), %esi
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	852(%esp), %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	856(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	860(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	864(%esp), %edi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	%ebp, %esi
-	adcl	872(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %ebp          # 4-byte Reload
-	adcl	888(%esp), %ebp
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	48(%eax), %eax
-	movl	%eax, (%esp)
-	leal	768(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	836(%esp), %edx
-	movl	64(%esp), %ecx          # 4-byte Reload
-	addl	768(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	780(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	784(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	788(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %edi         # 4-byte Reload
-	adcl	792(%esp), %edi
-	adcl	796(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	adcl	812(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	828(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	696(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	696(%esp), %esi
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	712(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	716(%esp), %esi
-	adcl	720(%esp), %edi
-	movl	%edi, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
+	addl	864(%esp), %edi
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	868(%esp), %edx
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	872(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	876(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	880(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	884(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	888(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	892(%esp), %ebp
+	movl	%ebp, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	896(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	900(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	adcl	904(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %esi                  # 4-byte Reload
+	adcl	908(%esp), %esi
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	912(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	916(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	920(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	924(%esp), %ebp
+	movl	1316(%esp), %ecx
+	adcl	84(%ecx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	setb	60(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %eax
+	leal	796(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 60(%esp)                  # 1-byte Folded Spill
+	movl	856(%esp), %eax
+	adcl	$0, %eax
+	addl	792(%esp), %edi
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	796(%esp), %edx
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	800(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	804(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	808(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	812(%esp), %edi
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	adcl	816(%esp), %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	820(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	824(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	828(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	832(%esp), %esi
+	movl	%esi, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	836(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	840(%esp), %esi
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	844(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	848(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	852(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %ecx
+	adcl	88(%ecx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	setb	56(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %ebp
+	imull	%edx, %eax
+	leal	724(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 56(%esp)                  # 1-byte Folded Spill
+	movl	784(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	720(%esp), %ebp
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	724(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	728(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	732(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	732(%esp), %ebp
+	adcl	736(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
 	adcl	740(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	744(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edi                  # 4-byte Reload
+	adcl	748(%esp), %edi
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	752(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	adcl	756(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	756(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	760(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	52(%eax), %eax
-	movl	%eax, (%esp)
-	leal	624(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	692(%esp), %edx
-	movl	88(%esp), %ecx          # 4-byte Reload
-	addl	624(%esp), %ecx
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	636(%esp), %ebp
-	adcl	640(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	644(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	652(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	adcl	656(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	660(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	664(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	668(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	672(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	676(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	adcl	680(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	684(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	688(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	552(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	552(%esp), %esi
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	564(%esp), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	568(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	572(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	576(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	764(%esp), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	adcl	768(%esp), %esi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	772(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	776(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	780(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	92(%eax), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	leal	652(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 52(%esp)                  # 1-byte Folded Spill
+	movl	712(%esp), %eax
+	adcl	$0, %eax
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	addl	648(%esp), %ecx
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	652(%esp), %ecx
+	adcl	656(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	adcl	660(%esp), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edx                  # 4-byte Reload
+	adcl	664(%esp), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	668(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	adcl	672(%esp), %edi
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %edx                  # 4-byte Reload
+	adcl	676(%esp), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edx                  # 4-byte Reload
+	adcl	680(%esp), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	684(%esp), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %edx                  # 4-byte Reload
+	adcl	688(%esp), %edx
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	adcl	692(%esp), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	696(%esp), %ebp
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	700(%esp), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	704(%esp), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	76(%esp), %edx                  # 4-byte Reload
+	adcl	708(%esp), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %edx
+	adcl	96(%edx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	setb	40(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %esi                  # 4-byte Reload
+	movl	%esi, %eax
+	movl	%ecx, %edi
+	imull	%ecx, %eax
+	leal	580(%esp), %ecx
+	pushl	%eax
+	movl	1328(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 40(%esp)                  # 1-byte Folded Spill
+	movl	640(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	576(%esp), %edi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	580(%esp), %ecx
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	584(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edi                  # 4-byte Reload
 	adcl	588(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	592(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	596(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %ebp          # 4-byte Reload
-	adcl	600(%esp), %ebp
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	604(%esp), %edi
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	600(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	604(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	608(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	612(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	616(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	620(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	56(%eax), %eax
-	movl	%eax, (%esp)
-	leal	480(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	548(%esp), %edx
-	movl	96(%esp), %ecx          # 4-byte Reload
-	addl	480(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	496(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	500(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	504(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	508(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	512(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	adcl	620(%esp), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	624(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	628(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	76(%esp), %ebp                  # 4-byte Reload
+	adcl	632(%esp), %ebp
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	636(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	100(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	%esi, %eax
+	movl	%ecx, %esi
+	imull	%ecx, %eax
+	leal	508(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	568(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	504(%esp), %esi
+	movl	32(%esp), %esi                  # 4-byte Reload
+	adcl	508(%esp), %esi
+	adcl	512(%esp), %edi
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	516(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	520(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	adcl	524(%esp), %ebp
-	movl	%ebp, 52(%esp)          # 4-byte Spill
-	adcl	528(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	524(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	adcl	528(%esp), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	532(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	536(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	540(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	544(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	408(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	408(%esp), %esi
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %esi         # 4-byte Reload
-	adcl	420(%esp), %esi
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	424(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	428(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %ebp         # 4-byte Reload
-	adcl	432(%esp), %ebp
-	movl	76(%esp), %edi          # 4-byte Reload
-	adcl	436(%esp), %edi
-	movl	84(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	552(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	adcl	556(%esp), %ebp
+	movl	%ebp, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	560(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	564(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	104(%eax), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	setb	24(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	436(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 24(%esp)                  # 1-byte Folded Spill
+	movl	496(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	432(%esp), %esi
+	movl	%edi, %esi
+	adcl	436(%esp), %esi
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	440(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	444(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	448(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	448(%esp), %ebp
+	movl	60(%esp), %eax                  # 4-byte Reload
 	adcl	452(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	456(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	456(%esp), %edi
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	460(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	464(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	468(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	472(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	476(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	60(%eax), %eax
-	movl	%eax, (%esp)
-	leal	336(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	404(%esp), %edx
-	movl	108(%esp), %ecx         # 4-byte Reload
-	addl	336(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	344(%esp), %esi
-	movl	%esi, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	352(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	adcl	356(%esp), %ebp
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	adcl	360(%esp), %edi
-	movl	%edi, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edi          # 4-byte Reload
-	adcl	364(%esp), %edi
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	76(%esp), %eax                  # 4-byte Reload
+	adcl	480(%esp), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	484(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	488(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	492(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	108(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	setb	20(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	364(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 20(%esp)                  # 1-byte Folded Spill
+	movl	424(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	360(%esp), %esi
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	364(%esp), %esi
+	movl	64(%esp), %eax                  # 4-byte Reload
 	adcl	368(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %ebp          # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
 	adcl	372(%esp), %ebp
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	adcl	380(%esp), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	376(%esp), %ebp
+	adcl	380(%esp), %edi
+	movl	%edi, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	384(%esp), %eax
-	movl	%eax, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	388(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	392(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	396(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	396(%esp), %edi
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	400(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	264(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	264(%esp), %esi
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	276(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%esp), %esi          # 4-byte Reload
-	adcl	280(%esp), %esi
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	284(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	288(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	adcl	292(%esp), %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	76(%esp), %eax                  # 4-byte Reload
+	adcl	404(%esp), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	408(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	412(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	416(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	420(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	112(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	setb	60(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	292(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 60(%esp)                  # 1-byte Folded Spill
+	movl	352(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	288(%esp), %esi
+	movl	64(%esp), %esi                  # 4-byte Reload
+	adcl	292(%esp), %esi
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	296(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
+	movl	%eax, 44(%esp)                  # 4-byte Spill
 	adcl	300(%esp), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	304(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edi          # 4-byte Reload
-	adcl	308(%esp), %edi
-	movl	68(%esp), %ebp          # 4-byte Reload
-	adcl	312(%esp), %ebp
-	movl	72(%esp), %eax          # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	312(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	316(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	320(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	324(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	adcl	320(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	324(%esp), %ebp
+	movl	76(%esp), %eax                  # 4-byte Reload
 	adcl	328(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	332(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	2600(%esp), %eax
-	movl	64(%eax), %eax
-	movl	%eax, (%esp)
-	leal	192(%esp), %ecx
-	movl	2596(%esp), %edx
-	calll	.LmulPv544x32
-	movl	260(%esp), %edx
-	movl	100(%esp), %ecx         # 4-byte Reload
-	addl	192(%esp), %ecx
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	196(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	200(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	adcl	204(%esp), %esi
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	208(%esp), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	216(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	220(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	336(%esp), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	340(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	344(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	348(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %eax
+	adcl	116(%eax), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	setb	68(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	76(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	220(%esp), %ecx
+	pushl	%eax
+	pushl	1328(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 68(%esp)                  # 1-byte Folded Spill
+	movl	280(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	216(%esp), %esi
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	220(%esp), %esi
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	224(%esp), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	228(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	56(%esp), %edi                  # 4-byte Reload
 	adcl	232(%esp), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	adcl	236(%esp), %ebp
-	movl	%ebp, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	adcl	$0, %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	imull	%ecx, %eax
-	movl	%ecx, %esi
-	movl	%eax, (%esp)
-	leal	120(%esp), %ecx
-	movl	2604(%esp), %edx
-	calll	.LmulPv544x32
-	addl	120(%esp), %esi
-	movl	92(%esp), %esi          # 4-byte Reload
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	124(%esp), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	112(%esp), %ebp         # 4-byte Reload
-	adcl	128(%esp), %ebp
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	adcl	132(%esp), %esi
-	movl	104(%esp), %edx         # 4-byte Reload
-	adcl	136(%esp), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
-	movl	76(%esp), %edx          # 4-byte Reload
-	adcl	140(%esp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-	movl	84(%esp), %edx          # 4-byte Reload
-	adcl	144(%esp), %edx
-	movl	%edx, 84(%esp)          # 4-byte Spill
-	movl	80(%esp), %edx          # 4-byte Reload
-	adcl	148(%esp), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
-	movl	60(%esp), %edx          # 4-byte Reload
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	236(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	240(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	244(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	adcl	248(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	adcl	252(%esp), %ecx
+	movl	%ecx, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	256(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	260(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	264(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	268(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	272(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	adcl	276(%esp), %ecx
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %ecx
+	adcl	120(%ecx), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	setb	56(%esp)                        # 1-byte Folded Spill
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	imull	%esi, %ecx
+	subl	$4, %esp
+	leal	148(%esp), %eax
+	pushl	%ecx
+	pushl	1328(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 56(%esp)                  # 1-byte Folded Spill
+	movl	208(%esp), %ebp
+	adcl	$0, %ebp
+	addl	144(%esp), %esi
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	148(%esp), %ecx
+	movl	20(%esp), %edx                  # 4-byte Reload
 	adcl	152(%esp), %edx
-	movl	%edx, 60(%esp)          # 4-byte Spill
-	movl	52(%esp), %edx          # 4-byte Reload
-	adcl	156(%esp), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
-	movl	56(%esp), %edx          # 4-byte Reload
-	adcl	160(%esp), %edx
-	movl	%edx, 56(%esp)          # 4-byte Spill
-	movl	68(%esp), %edx          # 4-byte Reload
-	adcl	164(%esp), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	movl	72(%esp), %edx          # 4-byte Reload
-	adcl	168(%esp), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%esp), %edx          # 4-byte Reload
-	adcl	172(%esp), %edx
-	movl	%edx, 64(%esp)          # 4-byte Spill
-	movl	88(%esp), %edx          # 4-byte Reload
-	adcl	176(%esp), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	96(%esp), %edx          # 4-byte Reload
-	adcl	180(%esp), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
-	movl	108(%esp), %edx         # 4-byte Reload
-	adcl	184(%esp), %edx
-	movl	%edx, 108(%esp)         # 4-byte Spill
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	188(%esp), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
-	movl	%eax, %edx
-	movl	2604(%esp), %edi
-	subl	(%edi), %edx
-	sbbl	4(%edi), %ebp
-	movl	%esi, %ebx
-	sbbl	8(%edi), %ebx
-	movl	104(%esp), %ecx         # 4-byte Reload
-	sbbl	12(%edi), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	sbbl	16(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	sbbl	20(%edi), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	sbbl	24(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	60(%esp), %eax          # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	68(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	72(%esp), %eax          # 4-byte Reload
-	sbbl	44(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	64(%esp), %eax          # 4-byte Reload
-	sbbl	48(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	sbbl	52(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	sbbl	56(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	60(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	sbbl	64(%edi), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	sarl	$31, %eax
-	testl	%eax, %eax
-	movl	116(%esp), %edi         # 4-byte Reload
-	js	.LBB259_2
-# BB#1:
-	movl	%edx, %edi
-.LBB259_2:
-	movl	2592(%esp), %edx
-	movl	%edi, (%edx)
-	movl	112(%esp), %edi         # 4-byte Reload
-	js	.LBB259_4
-# BB#3:
-	movl	%ebp, %edi
-.LBB259_4:
-	movl	%edi, 4(%edx)
-	js	.LBB259_6
-# BB#5:
-	movl	%ebx, %esi
-.LBB259_6:
-	movl	%esi, 8(%edx)
-	movl	104(%esp), %esi         # 4-byte Reload
-	js	.LBB259_8
-# BB#7:
-	movl	%ecx, %esi
-.LBB259_8:
-	movl	%esi, 12(%edx)
-	movl	76(%esp), %ecx          # 4-byte Reload
-	js	.LBB259_10
-# BB#9:
-	movl	4(%esp), %ecx           # 4-byte Reload
-.LBB259_10:
-	movl	%ecx, 16(%edx)
-	movl	84(%esp), %eax          # 4-byte Reload
-	js	.LBB259_12
-# BB#11:
-	movl	8(%esp), %eax           # 4-byte Reload
-.LBB259_12:
-	movl	%eax, 20(%edx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	js	.LBB259_14
-# BB#13:
-	movl	12(%esp), %eax          # 4-byte Reload
-.LBB259_14:
-	movl	%eax, 24(%edx)
-	movl	60(%esp), %eax          # 4-byte Reload
-	js	.LBB259_16
-# BB#15:
-	movl	16(%esp), %eax          # 4-byte Reload
-.LBB259_16:
-	movl	%eax, 28(%edx)
-	movl	52(%esp), %eax          # 4-byte Reload
-	js	.LBB259_18
-# BB#17:
-	movl	20(%esp), %eax          # 4-byte Reload
-.LBB259_18:
-	movl	%eax, 32(%edx)
-	movl	56(%esp), %eax          # 4-byte Reload
-	js	.LBB259_20
-# BB#19:
-	movl	24(%esp), %eax          # 4-byte Reload
-.LBB259_20:
-	movl	%eax, 36(%edx)
-	movl	68(%esp), %eax          # 4-byte Reload
-	js	.LBB259_22
-# BB#21:
-	movl	28(%esp), %eax          # 4-byte Reload
-.LBB259_22:
-	movl	%eax, 40(%edx)
-	movl	72(%esp), %eax          # 4-byte Reload
-	js	.LBB259_24
-# BB#23:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB259_24:
-	movl	%eax, 44(%edx)
-	movl	64(%esp), %eax          # 4-byte Reload
-	js	.LBB259_26
-# BB#25:
-	movl	36(%esp), %eax          # 4-byte Reload
-.LBB259_26:
-	movl	%eax, 48(%edx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	js	.LBB259_28
-# BB#27:
-	movl	40(%esp), %eax          # 4-byte Reload
-.LBB259_28:
-	movl	%eax, 52(%edx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB259_30
-# BB#29:
-	movl	44(%esp), %eax          # 4-byte Reload
-.LBB259_30:
-	movl	%eax, 56(%edx)
-	movl	108(%esp), %eax         # 4-byte Reload
-	js	.LBB259_32
-# BB#31:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB259_32:
-	movl	%eax, 60(%edx)
-	movl	100(%esp), %eax         # 4-byte Reload
-	js	.LBB259_34
-# BB#33:
-	movl	92(%esp), %eax          # 4-byte Reload
-.LBB259_34:
-	movl	%eax, 64(%edx)
-	addl	$2572, %esp             # imm = 0xA0C
+	movl	%edi, %esi
+	adcl	156(%esp), %esi
+	movl	52(%esp), %edi                  # 4-byte Reload
+	adcl	160(%esp), %edi
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	164(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	168(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	172(%esp), %eax
+	movl	76(%esp), %ebx                  # 4-byte Reload
+	adcl	176(%esp), %ebx
+	movl	%ebx, 76(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	180(%esp), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	184(%esp), %ebx
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	adcl	188(%esp), %ebx
+	movl	%ebx, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebx                  # 4-byte Reload
+	adcl	192(%esp), %ebx
+	movl	%ebx, 24(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ebx                  # 4-byte Reload
+	adcl	196(%esp), %ebx
+	movl	%ebx, 16(%esp)                  # 4-byte Spill
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	adcl	200(%esp), %ebx
+	movl	%ebx, 64(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebx                  # 4-byte Reload
+	adcl	204(%esp), %ebx
+	movl	%ebx, 44(%esp)                  # 4-byte Spill
+	movl	1316(%esp), %ebx
+	adcl	124(%ebx), %ebp
+	xorl	%ebx, %ebx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	subl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 84(%esp)                  # 4-byte Spill
+	movl	%edx, %ecx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	sbbl	88(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 88(%esp)                  # 4-byte Spill
+	movl	%esi, %ecx
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	sbbl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	movl	%edi, %ecx
+	movl	%edi, 52(%esp)                  # 4-byte Spill
+	sbbl	96(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	sbbl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 100(%esp)                 # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	sbbl	104(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%ecx, 104(%esp)                 # 4-byte Spill
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	sbbl	120(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	76(%esp), %eax                  # 4-byte Reload
+	sbbl	124(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	108(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	80(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	sbbl	112(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	116(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	sbbl	128(%esp), %edx                 # 4-byte Folded Reload
+	movl	64(%esp), %esi                  # 4-byte Reload
+	sbbl	132(%esp), %esi                 # 4-byte Folded Reload
+	movl	44(%esp), %edi                  # 4-byte Reload
+	sbbl	136(%esp), %edi                 # 4-byte Folded Reload
+	movl	%ebp, %ecx
+	sbbl	140(%esp), %ecx                 # 4-byte Folded Reload
+	sbbl	%ebx, %ebx
+	testb	$1, %bl
+	je	.LBB79_2
+# %bb.1:
+	movl	%ebp, %ecx
+.LBB79_2:
+	movl	1312(%esp), %eax
+	movl	%ecx, 60(%eax)
+	je	.LBB79_4
+# %bb.3:
+	movl	44(%esp), %edi                  # 4-byte Reload
+.LBB79_4:
+	movl	%edi, 56(%eax)
+	movl	84(%esp), %ecx                  # 4-byte Reload
+	movl	100(%esp), %ebx                 # 4-byte Reload
+	movl	104(%esp), %ebp                 # 4-byte Reload
+	je	.LBB79_6
+# %bb.5:
+	movl	64(%esp), %esi                  # 4-byte Reload
+.LBB79_6:
+	movl	%esi, 52(%eax)
+	je	.LBB79_8
+# %bb.7:
+	movl	16(%esp), %edx                  # 4-byte Reload
+.LBB79_8:
+	movl	%edx, 48(%eax)
+	movl	92(%esp), %edi                  # 4-byte Reload
+	movl	112(%esp), %esi                 # 4-byte Reload
+	movl	116(%esp), %edx                 # 4-byte Reload
+	jne	.LBB79_9
+# %bb.10:
+	movl	%edx, 44(%eax)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	jne	.LBB79_11
+.LBB79_12:
+	movl	%esi, 40(%eax)
+	jne	.LBB79_13
+.LBB79_14:
+	movl	80(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 36(%eax)
+	jne	.LBB79_15
+.LBB79_16:
+	movl	%edx, 32(%eax)
+	je	.LBB79_18
+.LBB79_17:
+	movl	76(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+.LBB79_18:
+	movl	%ecx, %edx
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	%edi, %esi
+	jne	.LBB79_19
+# %bb.20:
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	jne	.LBB79_21
+.LBB79_22:
+	movl	%ebp, 20(%eax)
+	movl	%edx, %ecx
+	jne	.LBB79_23
+.LBB79_24:
+	movl	%ebx, 16(%eax)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	jne	.LBB79_25
+.LBB79_26:
+	movl	%edi, 12(%eax)
+	jne	.LBB79_27
+.LBB79_28:
+	movl	%esi, 8(%eax)
+	jne	.LBB79_29
+.LBB79_30:
+	movl	%edx, 4(%eax)
+	je	.LBB79_32
+.LBB79_31:
+	movl	60(%esp), %ecx                  # 4-byte Reload
+.LBB79_32:
+	movl	%ecx, (%eax)
+	addl	$1292, %esp                     # imm = 0x50C
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end259:
-	.size	mcl_fp_montNF17L, .Lfunc_end259-mcl_fp_montNF17L
-
-	.globl	mcl_fp_montRed17L
-	.align	16, 0x90
-	.type	mcl_fp_montRed17L,@function
-mcl_fp_montRed17L:                      # @mcl_fp_montRed17L
-# BB#0:
+.LBB79_9:
+	movl	24(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 44(%eax)
+	movl	108(%esp), %edx                 # 4-byte Reload
+	je	.LBB79_12
+.LBB79_11:
+	movl	32(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 40(%eax)
+	je	.LBB79_14
+.LBB79_13:
+	movl	28(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 80(%esp)                  # 4-byte Spill
+	movl	80(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 36(%eax)
+	je	.LBB79_16
+.LBB79_15:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 32(%eax)
+	jne	.LBB79_17
+	jmp	.LBB79_18
+.LBB79_19:
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	68(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	je	.LBB79_22
+.LBB79_21:
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%eax)
+	movl	%edx, %ecx
+	je	.LBB79_24
+.LBB79_23:
+	movl	40(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 16(%eax)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	je	.LBB79_26
+.LBB79_25:
+	movl	52(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 12(%eax)
+	je	.LBB79_28
+.LBB79_27:
+	movl	56(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 8(%eax)
+	je	.LBB79_30
+.LBB79_29:
+	movl	20(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 4(%eax)
+	jne	.LBB79_31
+	jmp	.LBB79_32
+.Lfunc_end79:
+	.size	mcl_fp_montRed16L, .Lfunc_end79-mcl_fp_montRed16L
+                                        # -- End function
+	.globl	mcl_fp_montRedNF16L             # -- Begin function mcl_fp_montRedNF16L
+	.p2align	4, 0x90
+	.type	mcl_fp_montRedNF16L,@function
+mcl_fp_montRedNF16L:                    # @mcl_fp_montRedNF16L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$1436, %esp             # imm = 0x59C
-	calll	.L260$pb
-.L260$pb:
-	popl	%eax
-.Ltmp61:
-	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp61-.L260$pb), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	1464(%esp), %edx
-	movl	-4(%edx), %esi
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	1460(%esp), %ecx
-	movl	(%ecx), %ebx
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	4(%ecx), %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	imull	%esi, %ebx
-	movl	132(%ecx), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	128(%ecx), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	124(%ecx), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	120(%ecx), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	116(%ecx), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	112(%ecx), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	108(%ecx), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	104(%ecx), %esi
-	movl	%esi, 156(%esp)         # 4-byte Spill
-	movl	100(%ecx), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	96(%ecx), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	92(%ecx), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	88(%ecx), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	84(%ecx), %esi
-	movl	%esi, 180(%esp)         # 4-byte Spill
-	movl	80(%ecx), %edi
-	movl	%edi, 196(%esp)         # 4-byte Spill
-	movl	76(%ecx), %esi
-	movl	%esi, 192(%esp)         # 4-byte Spill
-	movl	72(%ecx), %esi
-	movl	%esi, 204(%esp)         # 4-byte Spill
-	movl	68(%ecx), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	64(%ecx), %ebp
-	movl	%ebp, 176(%esp)         # 4-byte Spill
-	movl	60(%ecx), %ebp
-	movl	%ebp, 164(%esp)         # 4-byte Spill
+	subl	$1276, %esp                     # imm = 0x4FC
+	calll	.L80$pb
+.L80$pb:
+	popl	%ebx
+.Ltmp22:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp22-.L80$pb), %ebx
+	movl	1304(%esp), %ecx
+	movl	60(%ecx), %eax
+	movl	%eax, 124(%esp)                 # 4-byte Spill
 	movl	56(%ecx), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
+	movl	%eax, 120(%esp)                 # 4-byte Spill
 	movl	52(%ecx), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
+	movl	%eax, 116(%esp)                 # 4-byte Spill
 	movl	48(%ecx), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
+	movl	%eax, 112(%esp)                 # 4-byte Spill
 	movl	44(%ecx), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
+	movl	%eax, 108(%esp)                 # 4-byte Spill
 	movl	40(%ecx), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
+	movl	%eax, 104(%esp)                 # 4-byte Spill
 	movl	36(%ecx), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
+	movl	%eax, 96(%esp)                  # 4-byte Spill
 	movl	32(%ecx), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
+	movl	%eax, 92(%esp)                  # 4-byte Spill
 	movl	28(%ecx), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
+	movl	%eax, 64(%esp)                  # 4-byte Spill
 	movl	24(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	20(%ecx), %ebp
-	movl	16(%ecx), %esi
-	movl	12(%ecx), %edi
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	20(%ecx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	16(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	12(%ecx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
 	movl	8(%ecx), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	(%edx), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	64(%edx), %ecx
-	movl	%ecx, 68(%esp)          # 4-byte Spill
-	movl	60(%edx), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	movl	56(%edx), %ecx
-	movl	%ecx, 60(%esp)          # 4-byte Spill
-	movl	52(%edx), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	48(%edx), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	44(%edx), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	40(%edx), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	36(%edx), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	32(%edx), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	28(%edx), %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	24(%edx), %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	20(%edx), %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	16(%edx), %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	8(%edx), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	4(%edx), %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	%ebx, (%esp)
-	leal	1360(%esp), %ecx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	movl	76(%esp), %eax          # 4-byte Reload
-	addl	1360(%esp), %eax
-	movl	80(%esp), %ecx          # 4-byte Reload
-	adcl	1364(%esp), %ecx
-	movl	100(%esp), %eax         # 4-byte Reload
-	adcl	1368(%esp), %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	adcl	1372(%esp), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	1376(%esp), %esi
-	movl	%esi, 76(%esp)          # 4-byte Spill
-	adcl	1380(%esp), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1384(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1388(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1392(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1396(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1400(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1404(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1408(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1412(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	1416(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1420(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1424(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1428(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	adcl	$0, 204(%esp)           # 4-byte Folded Spill
-	adcl	$0, 192(%esp)           # 4-byte Folded Spill
-	adcl	$0, 196(%esp)           # 4-byte Folded Spill
-	adcl	$0, 180(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	movl	128(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	sbbl	%edi, %edi
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1288(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	andl	$1, %edi
-	movl	%edi, %ecx
-	addl	1288(%esp), %esi
-	movl	100(%esp), %edx         # 4-byte Reload
-	adcl	1292(%esp), %edx
-	movl	72(%esp), %eax          # 4-byte Reload
-	adcl	1296(%esp), %eax
-	movl	%eax, 72(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1300(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1304(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1308(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1312(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1316(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1320(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1324(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1328(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1332(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	1336(%esp), %edi
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	1340(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1344(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1348(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1352(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1356(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	adcl	$0, 192(%esp)           # 4-byte Folded Spill
-	adcl	$0, 196(%esp)           # 4-byte Folded Spill
-	adcl	$0, 180(%esp)           # 4-byte Folded Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	$0, %eax
-	movl	%eax, %esi
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 128(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	%edx, %ebp
-	movl	%ebp, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1216(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1216(%esp), %ebp
-	movl	72(%esp), %ecx          # 4-byte Reload
-	adcl	1220(%esp), %ecx
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	1224(%esp), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	4(%ecx), %eax
+	movl	%ecx, %edx
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	movl	60(%eax), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	56(%eax), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	52(%eax), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	48(%eax), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	44(%eax), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	40(%eax), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	36(%eax), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	32(%eax), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%eax), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	24(%eax), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	20(%eax), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	16(%eax), %edi
+	movl	12(%eax), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	8(%eax), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	(%eax), %esi
+	movl	4(%eax), %ebp
+	movl	-4(%edx), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	(%edx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	subl	$4, %esp
+	movl	%esi, %eax
+	imull	%ecx, %eax
+	leal	1212(%esp), %ecx
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addl	1208(%esp), %esi
+	adcl	1212(%esp), %ebp
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	1216(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	adcl	1220(%esp), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	adcl	1224(%esp), %edi
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	1228(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	1232(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	1236(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	1240(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	1244(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	1248(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	1252(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1256(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	adcl	1260(%esp), %edi
-	movl	%edi, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %edi         # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	1260(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
 	adcl	1264(%esp), %edi
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1268(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	1268(%esp), %esi
+	movl	1300(%esp), %eax
+	movl	64(%eax), %eax
 	adcl	1272(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1276(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1280(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1284(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	adcl	$0, 196(%esp)           # 4-byte Folded Spill
-	adcl	$0, 180(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 184(%esp)         # 4-byte Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	movl	144(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1144(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1144(%esp), %esi
-	movl	76(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	setb	8(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%ebp, %eax
+	leal	1140(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 8(%esp)                   # 1-byte Folded Spill
+	movl	1200(%esp), %eax
+	adcl	$0, %eax
+	addl	1136(%esp), %ebp
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	1140(%esp), %edx
+	movl	16(%esp), %ebp                  # 4-byte Reload
+	adcl	1144(%esp), %ebp
+	movl	56(%esp), %ecx                  # 4-byte Reload
 	adcl	1148(%esp), %ecx
-	movl	80(%esp), %eax          # 4-byte Reload
-	adcl	1152(%esp), %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	adcl	1156(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1160(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1164(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1168(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1172(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1176(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1180(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1184(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1152(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	1156(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	1160(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	1164(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1168(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1172(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1176(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	1180(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	1184(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
 	adcl	1188(%esp), %edi
-	movl	%edi, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1192(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1196(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1200(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1204(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1208(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	1212(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	adcl	$0, 180(%esp)           # 4-byte Folded Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	movl	188(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 144(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1072(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1072(%esp), %esi
-	movl	80(%esp), %esi          # 4-byte Reload
-	adcl	1076(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	adcl	1192(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	1196(%esp), %esi
+	movl	1300(%esp), %ecx
+	adcl	68(%ecx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %edi
+	leal	1068(%esp), %ecx
+	pushl	%eax
+	movl	1312(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	1128(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	1064(%esp), %edi
+	adcl	1068(%esp), %ebp
+	movl	%ebp, %ecx
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	1072(%esp), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	1076(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	1080(%esp), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	1084(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	1088(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	1092(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	1096(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	1100(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	1104(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	1108(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	1112(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	1112(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	1116(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1120(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1124(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1128(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1132(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	1136(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1140(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	adcl	$0, 184(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 188(%esp)         # 4-byte Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	movl	172(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	movl	152(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	1000(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	1000(%esp), %esi
-	movl	84(%esp), %ecx          # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	adcl	1120(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %esi                  # 4-byte Reload
+	adcl	1124(%esp), %esi
+	movl	1300(%esp), %eax
+	adcl	72(%eax), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%ecx, %edi
+	imull	%ecx, %eax
+	leal	996(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	1056(%esp), %eax
+	adcl	$0, %eax
+	addl	992(%esp), %edi
+	movl	56(%esp), %edx                  # 4-byte Reload
+	adcl	996(%esp), %edx
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	1000(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
 	adcl	1004(%esp), %ecx
-	movl	88(%esp), %eax          # 4-byte Reload
-	adcl	1008(%esp), %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	1012(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	1016(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	1020(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	1024(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	1028(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	1032(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	1036(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	1040(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	1044(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	1048(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	1052(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	1056(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	1060(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	1064(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	1068(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	adcl	$0, 188(%esp)           # 4-byte Folded Spill
-	adcl	$0, 168(%esp)           # 4-byte Folded Spill
-	adcl	$0, %ebp
-	movl	%ebp, 172(%esp)         # 4-byte Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 152(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	928(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	928(%esp), %esi
-	movl	88(%esp), %esi          # 4-byte Reload
-	adcl	932(%esp), %esi
-	movl	92(%esp), %eax          # 4-byte Reload
-	adcl	936(%esp), %eax
-	movl	%eax, 92(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	940(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	944(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	948(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	952(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	956(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	960(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	964(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	968(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	972(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	976(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	980(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	984(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	988(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	992(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	996(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %ebp         # 4-byte Reload
-	adcl	$0, %ebp
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	movl	160(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%esi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	856(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	856(%esp), %esi
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	1008(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	1012(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	1016(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	1020(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	1024(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	1028(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %edi                  # 4-byte Reload
+	adcl	1032(%esp), %edi
+	adcl	1036(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	1040(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	1044(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	adcl	1048(%esp), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	adcl	1052(%esp), %esi
+	movl	1300(%esp), %ecx
+	adcl	76(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	setb	12(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %ebp
+	leal	924(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 12(%esp)                  # 1-byte Folded Spill
+	movl	984(%esp), %eax
+	adcl	$0, %eax
+	addl	920(%esp), %ebp
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	924(%esp), %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	928(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	932(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	936(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	940(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	944(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	948(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	952(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	adcl	956(%esp), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	960(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	964(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	968(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	972(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	976(%esp), %esi
+	movl	%esi, %ebp
+	movl	56(%esp), %edi                  # 4-byte Reload
+	adcl	980(%esp), %edi
+	movl	1300(%esp), %ecx
+	adcl	80(%ecx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %esi
+	imull	%edx, %eax
+	leal	852(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 52(%esp)                  # 1-byte Folded Spill
+	movl	912(%esp), %eax
+	adcl	$0, %eax
+	addl	848(%esp), %esi
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	852(%esp), %edx
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	856(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
 	adcl	860(%esp), %ecx
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	864(%esp), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	868(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	872(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	876(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	880(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	884(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	888(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	892(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	896(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	900(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	904(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	908(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	912(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	916(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	920(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	adcl	924(%esp), %ebp
-	movl	%ebp, 168(%esp)         # 4-byte Spill
-	adcl	$0, 172(%esp)           # 4-byte Folded Spill
-	adcl	$0, 156(%esp)           # 4-byte Folded Spill
-	adcl	$0, %edi
-	movl	%edi, 160(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	movl	124(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %edi
-	movl	%edi, %eax
-	movl	96(%esp), %ebp          # 4-byte Reload
-	imull	%ebp, %eax
-	movl	%eax, (%esp)
-	leal	784(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	784(%esp), %edi
-	movl	108(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	864(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	868(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	872(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	876(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	880(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	884(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %esi                   # 4-byte Reload
+	adcl	888(%esp), %esi
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	892(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	896(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	900(%esp), %ebp
+	movl	%ebp, 52(%esp)                  # 4-byte Spill
+	adcl	904(%esp), %edi
+	movl	%edi, %ebp
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	908(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %ecx
+	adcl	84(%ecx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	setb	8(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %edi
+	imull	%edx, %eax
+	leal	780(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 8(%esp)                   # 1-byte Folded Spill
+	movl	840(%esp), %eax
+	adcl	$0, %eax
+	addl	776(%esp), %edi
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	780(%esp), %edx
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	784(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
 	adcl	788(%esp), %ecx
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	792(%esp), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	796(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	800(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	804(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	808(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	812(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	816(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	820(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	824(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	828(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	832(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	836(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	840(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	844(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	848(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	852(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 124(%esp)         # 4-byte Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %eax
-	movl	%ecx, %esi
-	imull	%ebp, %eax
-	movl	%eax, (%esp)
-	leal	712(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	712(%esp), %esi
-	movl	112(%esp), %ecx         # 4-byte Reload
-	adcl	716(%esp), %ecx
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	720(%esp), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	724(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
-	adcl	728(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	732(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
-	adcl	736(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
-	adcl	740(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	744(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	748(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
-	adcl	752(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	756(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %ebp         # 4-byte Reload
-	adcl	760(%esp), %ebp
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	764(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	768(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	772(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	776(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	adcl	780(%esp), %edi
-	movl	%edi, 156(%esp)         # 4-byte Spill
-	adcl	$0, 160(%esp)           # 4-byte Folded Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %esi
-	movl	%esi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	640(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	640(%esp), %esi
-	movl	120(%esp), %ecx         # 4-byte Reload
-	adcl	644(%esp), %ecx
-	movl	140(%esp), %eax         # 4-byte Reload
-	adcl	648(%esp), %eax
-	movl	%eax, 140(%esp)         # 4-byte Spill
-	movl	136(%esp), %eax         # 4-byte Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	792(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %edi                  # 4-byte Reload
+	adcl	796(%esp), %edi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	800(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	804(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	808(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	adcl	812(%esp), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %esi                  # 4-byte Reload
+	adcl	816(%esp), %esi
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	820(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	824(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	adcl	828(%esp), %ebp
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	832(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	836(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %ecx
+	adcl	88(%ecx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	setb	44(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%edx, %ebp
+	imull	%edx, %eax
+	leal	708(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 44(%esp)                  # 1-byte Folded Spill
+	movl	768(%esp), %eax
+	adcl	$0, %eax
+	addl	704(%esp), %ebp
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	708(%esp), %edx
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	712(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	716(%esp), %ebp
+	adcl	720(%esp), %edi
+	movl	%edi, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %edi                   # 4-byte Reload
+	adcl	724(%esp), %edi
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	728(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	732(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	736(%esp), %ecx
+	movl	%ecx, 8(%esp)                   # 4-byte Spill
+	adcl	740(%esp), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	744(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	748(%esp), %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	752(%esp), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	756(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	760(%esp), %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	764(%esp), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	1300(%esp), %ecx
+	adcl	92(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	setb	4(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%edx, %eax
+	movl	%edx, %esi
+	leal	636(%esp), %ecx
+	pushl	%eax
+	movl	1312(%esp), %eax
+	pushl	%eax
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 4(%esp)                   # 1-byte Folded Spill
+	movl	696(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	632(%esp), %esi
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	636(%esp), %ecx
+	adcl	640(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	644(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	adcl	648(%esp), %edi
+	movl	%edi, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	652(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	656(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edi                  # 4-byte Reload
+	adcl	656(%esp), %edi
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	660(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %eax         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	664(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %eax         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	668(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %edi         # 4-byte Reload
-	adcl	672(%esp), %edi
-	movl	192(%esp), %esi         # 4-byte Reload
-	adcl	676(%esp), %esi
-	movl	196(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	672(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ebp                  # 4-byte Reload
+	adcl	676(%esp), %ebp
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	680(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	adcl	684(%esp), %ebp
-	movl	%ebp, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	adcl	684(%esp), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	688(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	692(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	696(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	700(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	704(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	708(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	adcl	$0, 152(%esp)           # 4-byte Folded Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
-	movl	%ecx, %ebp
-	movl	%ebp, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	568(%esp), %ecx
-	movl	1464(%esp), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	96(%eax), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	setb	48(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%ecx, %esi
+	imull	%ecx, %eax
+	leal	564(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 48(%esp)                  # 1-byte Folded Spill
+	movl	624(%esp), %eax
+	adcl	$0, %eax
 	movl	%eax, %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	568(%esp), %ebp
-	movl	140(%esp), %ecx         # 4-byte Reload
-	adcl	572(%esp), %ecx
-	movl	136(%esp), %eax         # 4-byte Reload
+	addl	560(%esp), %esi
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	564(%esp), %ecx
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	568(%esp), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %esi                   # 4-byte Reload
+	adcl	572(%esp), %esi
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	576(%esp), %eax
-	movl	%eax, 136(%esp)         # 4-byte Spill
-	movl	148(%esp), %eax         # 4-byte Reload
-	adcl	580(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	adcl	580(%esp), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	584(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	movl	176(%esp), %ebp         # 4-byte Reload
-	adcl	588(%esp), %ebp
-	movl	200(%esp), %eax         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	588(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	592(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	adcl	596(%esp), %edi
-	movl	%edi, 204(%esp)         # 4-byte Spill
-	adcl	600(%esp), %esi
-	movl	%esi, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %esi         # 4-byte Reload
-	adcl	604(%esp), %esi
-	movl	180(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	596(%esp), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	adcl	600(%esp), %ebp
+	movl	%ebp, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	604(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	608(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	612(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	616(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	616(%esp), %ebp
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	620(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	624(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	628(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	632(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	636(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	adcl	$0, 144(%esp)           # 4-byte Folded Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, 100(%esp)           # 4-byte Folded Spill
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	100(%eax), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	setb	4(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
 	movl	%ecx, %edi
-	movl	%edi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	496(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	496(%esp), %edi
-	movl	136(%esp), %edi         # 4-byte Reload
-	adcl	500(%esp), %edi
-	movl	148(%esp), %eax         # 4-byte Reload
+	leal	492(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 4(%esp)                   # 1-byte Folded Spill
+	movl	552(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	488(%esp), %edi
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	492(%esp), %ecx
+	adcl	496(%esp), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	500(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
 	adcl	504(%esp), %eax
-	movl	%eax, 148(%esp)         # 4-byte Spill
-	movl	164(%esp), %eax         # 4-byte Reload
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
 	adcl	508(%esp), %eax
-	movl	%eax, 164(%esp)         # 4-byte Spill
-	adcl	512(%esp), %ebp
-	movl	%ebp, 176(%esp)         # 4-byte Spill
-	movl	200(%esp), %ebp         # 4-byte Reload
-	adcl	516(%esp), %ebp
-	movl	204(%esp), %eax         # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	512(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	516(%esp), %edi
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	520(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	524(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	adcl	528(%esp), %esi
-	movl	%esi, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	528(%esp), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	532(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	536(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	540(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	544(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	548(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	552(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	556(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	560(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	564(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	adcl	$0, 128(%esp)           # 4-byte Folded Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	movl	%edi, %eax
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	424(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	424(%esp), %edi
-	movl	148(%esp), %ecx         # 4-byte Reload
-	adcl	428(%esp), %ecx
-	movl	164(%esp), %edi         # 4-byte Reload
-	adcl	432(%esp), %edi
-	movl	176(%esp), %eax         # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	adcl	540(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	544(%esp), %ebp
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	548(%esp), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	104(%eax), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
+	movl	%ecx, %esi
+	leal	420(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	480(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	416(%esp), %esi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	420(%esp), %ecx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	424(%esp), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	428(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	432(%esp), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	436(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	adcl	440(%esp), %ebp
-	movl	204(%esp), %eax         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	440(%esp), %edi
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	444(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	448(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
 	adcl	452(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	456(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %eax         # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	adcl	460(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	464(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	468(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	adcl	468(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	472(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	476(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	480(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	484(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	488(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	492(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	adcl	$0, 132(%esp)           # 4-byte Folded Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	adcl	$0, %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	%ecx, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	108(%eax), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	setb	16(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%ecx, %eax
 	movl	%ecx, %esi
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	352(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	352(%esp), %esi
-	movl	%edi, %ecx
-	adcl	356(%esp), %ecx
-	movl	176(%esp), %eax         # 4-byte Reload
+	leal	348(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 16(%esp)                  # 1-byte Folded Spill
+	movl	408(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	344(%esp), %esi
+	movl	12(%esp), %esi                  # 4-byte Reload
+	adcl	348(%esp), %esi
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	352(%esp), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	356(%esp), %ebp
+	movl	44(%esp), %eax                  # 4-byte Reload
 	adcl	360(%esp), %eax
-	movl	%eax, 176(%esp)         # 4-byte Spill
-	adcl	364(%esp), %ebp
-	movl	%ebp, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	adcl	364(%esp), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
 	adcl	368(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	372(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	376(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	376(%esp), %edi
+	movl	36(%esp), %eax                  # 4-byte Reload
 	adcl	380(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	184(%esp), %edi         # 4-byte Reload
-	adcl	384(%esp), %edi
-	movl	188(%esp), %eax         # 4-byte Reload
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	384(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	388(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	392(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	396(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	400(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	404(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	408(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	412(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	416(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	420(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	adcl	$0, 124(%esp)           # 4-byte Folded Spill
-	adcl	$0, 116(%esp)           # 4-byte Folded Spill
-	movl	100(%esp), %esi         # 4-byte Reload
-	adcl	$0, %esi
-	movl	%ecx, %eax
-	movl	%ecx, %ebp
-	imull	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, (%esp)
-	leal	280(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	280(%esp), %ebp
-	movl	176(%esp), %ecx         # 4-byte Reload
-	adcl	284(%esp), %ecx
-	movl	200(%esp), %eax         # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	112(%eax), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	setb	8(%esp)                         # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
+	imull	%esi, %eax
+	leal	276(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 8(%esp)                   # 1-byte Folded Spill
+	movl	336(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	272(%esp), %esi
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	276(%esp), %ecx
+	adcl	280(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	284(%esp), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
 	adcl	288(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %eax         # 4-byte Reload
-	adcl	292(%esp), %eax
-	movl	%eax, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %eax         # 4-byte Reload
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %esi                  # 4-byte Reload
+	adcl	292(%esp), %esi
+	movl	56(%esp), %eax                  # 4-byte Reload
 	adcl	296(%esp), %eax
-	movl	%eax, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	300(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	304(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	adcl	308(%esp), %edi
-	movl	%edi, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	adcl	300(%esp), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edi                  # 4-byte Reload
+	adcl	304(%esp), %edi
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	308(%esp), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
 	adcl	312(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
 	adcl	316(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
 	adcl	320(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
 	adcl	324(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
 	adcl	328(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
 	adcl	332(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	336(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	340(%esp), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	344(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	348(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	116(%esp), %edi         # 4-byte Reload
-	adcl	$0, %edi
-	adcl	$0, %esi
-	movl	96(%esp), %eax          # 4-byte Reload
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %eax
+	adcl	116(%eax), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	setb	52(%esp)                        # 1-byte Folded Spill
+	subl	$4, %esp
+	movl	64(%esp), %eax                  # 4-byte Reload
 	imull	%ecx, %eax
 	movl	%ecx, %ebp
-	movl	%eax, (%esp)
-	leal	208(%esp), %ecx
-	movl	1464(%esp), %edx
-	movl	104(%esp), %ebx         # 4-byte Reload
-	calll	.LmulPv544x32
-	addl	208(%esp), %ebp
-	movl	200(%esp), %eax         # 4-byte Reload
-	adcl	212(%esp), %eax
-	movl	%eax, 200(%esp)         # 4-byte Spill
-	movl	204(%esp), %edx         # 4-byte Reload
-	adcl	216(%esp), %edx
-	movl	%edx, 204(%esp)         # 4-byte Spill
-	movl	192(%esp), %ecx         # 4-byte Reload
+	leal	204(%esp), %ecx
+	pushl	%eax
+	pushl	1312(%esp)
+	pushl	%ecx
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, 52(%esp)                  # 1-byte Folded Spill
+	movl	264(%esp), %eax
+	adcl	$0, %eax
+	movl	%eax, %edx
+	addl	200(%esp), %ebp
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	204(%esp), %eax
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	208(%esp), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	212(%esp), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	adcl	216(%esp), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
 	adcl	220(%esp), %ecx
-	movl	%ecx, 192(%esp)         # 4-byte Spill
-	movl	196(%esp), %eax         # 4-byte Reload
-	adcl	224(%esp), %eax
-	movl	%eax, 196(%esp)         # 4-byte Spill
-	movl	180(%esp), %eax         # 4-byte Reload
-	adcl	228(%esp), %eax
-	movl	%eax, 180(%esp)         # 4-byte Spill
-	movl	%eax, %ebp
-	movl	184(%esp), %eax         # 4-byte Reload
-	adcl	232(%esp), %eax
-	movl	%eax, 184(%esp)         # 4-byte Spill
-	movl	188(%esp), %eax         # 4-byte Reload
-	adcl	236(%esp), %eax
-	movl	%eax, 188(%esp)         # 4-byte Spill
-	movl	168(%esp), %eax         # 4-byte Reload
-	adcl	240(%esp), %eax
-	movl	%eax, 168(%esp)         # 4-byte Spill
-	movl	172(%esp), %eax         # 4-byte Reload
-	adcl	244(%esp), %eax
-	movl	%eax, 172(%esp)         # 4-byte Spill
-	movl	156(%esp), %eax         # 4-byte Reload
-	adcl	248(%esp), %eax
-	movl	%eax, 156(%esp)         # 4-byte Spill
-	movl	160(%esp), %eax         # 4-byte Reload
-	adcl	252(%esp), %eax
-	movl	%eax, 160(%esp)         # 4-byte Spill
-	movl	152(%esp), %eax         # 4-byte Reload
-	adcl	256(%esp), %eax
-	movl	%eax, 152(%esp)         # 4-byte Spill
-	movl	144(%esp), %eax         # 4-byte Reload
-	adcl	260(%esp), %eax
-	movl	%eax, 144(%esp)         # 4-byte Spill
-	movl	128(%esp), %ebx         # 4-byte Reload
-	adcl	264(%esp), %ebx
-	movl	%ebx, 128(%esp)         # 4-byte Spill
-	movl	132(%esp), %eax         # 4-byte Reload
-	adcl	268(%esp), %eax
-	movl	%eax, 132(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	272(%esp), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	adcl	276(%esp), %edi
-	movl	%edi, 116(%esp)         # 4-byte Spill
-	adcl	$0, %esi
-	movl	%esi, 100(%esp)         # 4-byte Spill
-	movl	200(%esp), %edi         # 4-byte Reload
-	subl	16(%esp), %edi          # 4-byte Folded Reload
-	sbbl	4(%esp), %edx           # 4-byte Folded Reload
-	sbbl	8(%esp), %ecx           # 4-byte Folded Reload
-	movl	196(%esp), %eax         # 4-byte Reload
-	sbbl	12(%esp), %eax          # 4-byte Folded Reload
-	sbbl	20(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 88(%esp)          # 4-byte Spill
-	movl	184(%esp), %esi         # 4-byte Reload
-	sbbl	24(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 92(%esp)          # 4-byte Spill
-	movl	188(%esp), %esi         # 4-byte Reload
-	sbbl	28(%esp), %esi          # 4-byte Folded Reload
-	movl	%esi, 96(%esp)          # 4-byte Spill
-	movl	168(%esp), %esi         # 4-byte Reload
-	sbbl	32(%esp), %esi          # 4-byte Folded Reload
-	movl	172(%esp), %ebp         # 4-byte Reload
-	sbbl	36(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 104(%esp)         # 4-byte Spill
-	movl	156(%esp), %ebp         # 4-byte Reload
-	sbbl	40(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 108(%esp)         # 4-byte Spill
-	movl	160(%esp), %ebp         # 4-byte Reload
-	sbbl	44(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 112(%esp)         # 4-byte Spill
-	movl	152(%esp), %ebp         # 4-byte Reload
-	sbbl	48(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 120(%esp)         # 4-byte Spill
-	movl	144(%esp), %ebp         # 4-byte Reload
-	sbbl	52(%esp), %ebp          # 4-byte Folded Reload
-	movl	%ebp, 136(%esp)         # 4-byte Spill
-	sbbl	56(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 140(%esp)         # 4-byte Spill
-	movl	132(%esp), %ebx         # 4-byte Reload
-	sbbl	60(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 148(%esp)         # 4-byte Spill
-	movl	124(%esp), %ebx         # 4-byte Reload
-	sbbl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 164(%esp)         # 4-byte Spill
-	movl	116(%esp), %ebx         # 4-byte Reload
-	sbbl	68(%esp), %ebx          # 4-byte Folded Reload
-	movl	%ebx, 176(%esp)         # 4-byte Spill
-	movl	100(%esp), %ebx         # 4-byte Reload
-	sbbl	$0, %ebx
-	andl	$1, %ebx
-	jne	.LBB260_2
-# BB#1:
-	movl	%esi, 168(%esp)         # 4-byte Spill
-.LBB260_2:
-	testb	%bl, %bl
-	movl	200(%esp), %esi         # 4-byte Reload
-	jne	.LBB260_4
-# BB#3:
-	movl	%edi, %esi
-.LBB260_4:
-	movl	1456(%esp), %edi
-	movl	%esi, (%edi)
-	movl	156(%esp), %esi         # 4-byte Reload
-	movl	204(%esp), %ebx         # 4-byte Reload
-	jne	.LBB260_6
-# BB#5:
-	movl	%edx, %ebx
-.LBB260_6:
-	movl	%ebx, 4(%edi)
-	movl	144(%esp), %ebx         # 4-byte Reload
-	movl	192(%esp), %edx         # 4-byte Reload
-	jne	.LBB260_8
-# BB#7:
-	movl	%ecx, %edx
-.LBB260_8:
-	movl	%edx, 8(%edi)
-	movl	132(%esp), %edx         # 4-byte Reload
-	movl	196(%esp), %ecx         # 4-byte Reload
-	jne	.LBB260_10
-# BB#9:
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	adcl	224(%esp), %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	adcl	228(%esp), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %ebp                    # 4-byte Reload
+	adcl	232(%esp), %ebp
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	adcl	236(%esp), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	240(%esp), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	adcl	244(%esp), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	adcl	248(%esp), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	adcl	252(%esp), %ecx
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	adcl	256(%esp), %ecx
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	adcl	260(%esp), %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	1300(%esp), %ecx
+	adcl	120(%ecx), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	setb	(%esp)                          # 1-byte Folded Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	imull	%eax, %ecx
+	movl	%eax, %esi
+	subl	$4, %esp
+	leal	132(%esp), %eax
+	pushl	%ecx
+	pushl	1312(%esp)
+	pushl	%eax
+	calll	mulPv512x32@PLT
+	addl	$12, %esp
+	addb	$255, (%esp)                    # 1-byte Folded Spill
+	movl	192(%esp), %eax
+	adcl	$0, %eax
+	addl	128(%esp), %esi
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	132(%esp), %ecx
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	136(%esp), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	140(%esp), %edx
+	movl	56(%esp), %esi                  # 4-byte Reload
+	adcl	144(%esp), %esi
+	movl	40(%esp), %edi                  # 4-byte Reload
+	adcl	148(%esp), %edi
+	movl	36(%esp), %ebx                  # 4-byte Reload
+	adcl	152(%esp), %ebx
+	movl	%ebx, 36(%esp)                  # 4-byte Spill
+	movl	%eax, %ebx
+	adcl	156(%esp), %ebp
+	movl	%ebp, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	adcl	160(%esp), %ebp
+	movl	%ebp, 20(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	164(%esp), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	168(%esp), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	172(%esp), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	adcl	176(%esp), %ebp
+	movl	%ebp, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	180(%esp), %ebp
+	movl	%ebp, 12(%esp)                  # 4-byte Spill
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	adcl	184(%esp), %ebp
+	movl	%ebp, 48(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebp                   # 4-byte Reload
+	adcl	188(%esp), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	1300(%esp), %ebp
+	adcl	124(%ebp), %ebx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	subl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	sbbl	76(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	%esi, %eax
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	sbbl	80(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	sbbl	84(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edi, 84(%esp)                  # 4-byte Spill
+	movl	36(%esp), %eax                  # 4-byte Reload
+	sbbl	88(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	100(%esp), %eax                 # 4-byte Folded Reload
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	92(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	32(%esp), %eax                  # 4-byte Reload
+	sbbl	104(%esp), %eax                 # 4-byte Folded Reload
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	sbbl	108(%esp), %ebp                 # 4-byte Folded Reload
+	movl	12(%esp), %edx                  # 4-byte Reload
+	sbbl	112(%esp), %edx                 # 4-byte Folded Reload
+	movl	48(%esp), %esi                  # 4-byte Reload
+	sbbl	116(%esp), %esi                 # 4-byte Folded Reload
+	movl	8(%esp), %edi                   # 4-byte Reload
+	sbbl	120(%esp), %edi                 # 4-byte Folded Reload
+	movl	%ebx, %ecx
+	sbbl	124(%esp), %ecx                 # 4-byte Folded Reload
+	testl	%ecx, %ecx
+	js	.LBB80_1
+# %bb.2:
+	movl	1296(%esp), %ebx
+	movl	%ecx, 60(%ebx)
+	js	.LBB80_3
+.LBB80_4:
+	movl	%edi, 56(%ebx)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	js	.LBB80_5
+.LBB80_6:
+	movl	%esi, 52(%ebx)
+	movl	84(%esp), %edi                  # 4-byte Reload
+	js	.LBB80_7
+.LBB80_8:
+	movl	%edx, 48(%ebx)
+	movl	80(%esp), %esi                  # 4-byte Reload
+	js	.LBB80_9
+.LBB80_10:
+	movl	%ebp, 44(%ebx)
+	js	.LBB80_11
+.LBB80_12:
+	movl	%eax, 40(%ebx)
+	movl	88(%esp), %ebp                  # 4-byte Reload
+	movl	96(%esp), %eax                  # 4-byte Reload
+	js	.LBB80_13
+.LBB80_14:
+	movl	%eax, 36(%ebx)
+	movl	68(%esp), %edx                  # 4-byte Reload
+	movl	92(%esp), %eax                  # 4-byte Reload
+	js	.LBB80_15
+.LBB80_16:
+	movl	%eax, 32(%ebx)
+	js	.LBB80_17
+.LBB80_18:
+	movl	%ecx, %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%ebx)
+	js	.LBB80_19
+.LBB80_20:
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%ebx)
+	js	.LBB80_21
+.LBB80_22:
+	movl	%ebp, 20(%ebx)
 	movl	%eax, %ecx
-.LBB260_10:
-	movl	%ecx, 12(%edi)
-	movl	124(%esp), %ecx         # 4-byte Reload
-	movl	180(%esp), %eax         # 4-byte Reload
-	jne	.LBB260_12
-# BB#11:
-	movl	88(%esp), %eax          # 4-byte Reload
-.LBB260_12:
-	movl	%eax, 16(%edi)
-	movl	188(%esp), %eax         # 4-byte Reload
-	movl	184(%esp), %ebp         # 4-byte Reload
-	jne	.LBB260_14
-# BB#13:
-	movl	92(%esp), %ebp          # 4-byte Reload
-.LBB260_14:
-	movl	%ebp, 20(%edi)
-	movl	152(%esp), %ebp         # 4-byte Reload
-	jne	.LBB260_16
-# BB#15:
-	movl	96(%esp), %eax          # 4-byte Reload
-.LBB260_16:
-	movl	%eax, 24(%edi)
-	movl	168(%esp), %eax         # 4-byte Reload
-	movl	%eax, 28(%edi)
-	jne	.LBB260_18
-# BB#17:
-	movl	104(%esp), %eax         # 4-byte Reload
-	movl	%eax, 172(%esp)         # 4-byte Spill
-.LBB260_18:
-	movl	172(%esp), %eax         # 4-byte Reload
-	movl	%eax, 32(%edi)
-	jne	.LBB260_20
-# BB#19:
-	movl	108(%esp), %esi         # 4-byte Reload
-.LBB260_20:
-	movl	%esi, 36(%edi)
-	jne	.LBB260_22
-# BB#21:
-	movl	112(%esp), %eax         # 4-byte Reload
-	movl	%eax, 160(%esp)         # 4-byte Spill
-.LBB260_22:
-	movl	160(%esp), %esi         # 4-byte Reload
-	movl	%esi, 40(%edi)
-	movl	128(%esp), %eax         # 4-byte Reload
-	jne	.LBB260_24
-# BB#23:
-	movl	120(%esp), %ebp         # 4-byte Reload
-.LBB260_24:
-	movl	%ebp, 44(%edi)
-	jne	.LBB260_26
-# BB#25:
-	movl	136(%esp), %ebx         # 4-byte Reload
-.LBB260_26:
-	movl	%ebx, 48(%edi)
-	jne	.LBB260_28
-# BB#27:
-	movl	140(%esp), %eax         # 4-byte Reload
-.LBB260_28:
-	movl	%eax, 52(%edi)
-	jne	.LBB260_30
-# BB#29:
-	movl	148(%esp), %edx         # 4-byte Reload
-.LBB260_30:
-	movl	%edx, 56(%edi)
-	movl	116(%esp), %eax         # 4-byte Reload
-	jne	.LBB260_32
-# BB#31:
-	movl	164(%esp), %ecx         # 4-byte Reload
-.LBB260_32:
-	movl	%ecx, 60(%edi)
-	jne	.LBB260_34
-# BB#33:
-	movl	176(%esp), %eax         # 4-byte Reload
-.LBB260_34:
-	movl	%eax, 64(%edi)
-	addl	$1436, %esp             # imm = 0x59C
+	js	.LBB80_23
+.LBB80_24:
+	movl	%edi, 16(%ebx)
+	movl	76(%esp), %eax                  # 4-byte Reload
+	js	.LBB80_25
+.LBB80_26:
+	movl	%esi, 12(%ebx)
+	js	.LBB80_27
+.LBB80_28:
+	movl	%eax, 8(%ebx)
+	js	.LBB80_29
+.LBB80_30:
+	movl	%ecx, 4(%ebx)
+	jns	.LBB80_32
+.LBB80_31:
+	movl	44(%esp), %edx                  # 4-byte Reload
+.LBB80_32:
+	movl	%edx, (%ebx)
+	addl	$1276, %esp                     # imm = 0x4FC
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end260:
-	.size	mcl_fp_montRed17L, .Lfunc_end260-mcl_fp_montRed17L
-
-	.globl	mcl_fp_addPre17L
-	.align	16, 0x90
-	.type	mcl_fp_addPre17L,@function
-mcl_fp_addPre17L:                       # @mcl_fp_addPre17L
-# BB#0:
+.LBB80_1:
+	movl	%ebx, %ecx
+	movl	1296(%esp), %ebx
+	movl	%ecx, 60(%ebx)
+	jns	.LBB80_4
+.LBB80_3:
+	movl	8(%esp), %edi                   # 4-byte Reload
+	movl	%edi, 56(%ebx)
+	movl	72(%esp), %ecx                  # 4-byte Reload
+	jns	.LBB80_6
+.LBB80_5:
+	movl	48(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 52(%ebx)
+	movl	84(%esp), %edi                  # 4-byte Reload
+	jns	.LBB80_8
+.LBB80_7:
+	movl	12(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 48(%ebx)
+	movl	80(%esp), %esi                  # 4-byte Reload
+	jns	.LBB80_10
+.LBB80_9:
+	movl	4(%esp), %ebp                   # 4-byte Reload
+	movl	%ebp, 44(%ebx)
+	jns	.LBB80_12
+.LBB80_11:
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 40(%ebx)
+	movl	88(%esp), %ebp                  # 4-byte Reload
+	movl	96(%esp), %eax                  # 4-byte Reload
+	jns	.LBB80_14
+.LBB80_13:
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 36(%ebx)
+	movl	68(%esp), %edx                  # 4-byte Reload
+	movl	92(%esp), %eax                  # 4-byte Reload
+	jns	.LBB80_16
+.LBB80_15:
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%ebx)
+	jns	.LBB80_18
+.LBB80_17:
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	%ecx, %eax
+	movl	64(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%ebx)
+	jns	.LBB80_20
+.LBB80_19:
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%ebx)
+	jns	.LBB80_22
+.LBB80_21:
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%ebx)
+	movl	%eax, %ecx
+	jns	.LBB80_24
+.LBB80_23:
+	movl	40(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 16(%ebx)
+	movl	76(%esp), %eax                  # 4-byte Reload
+	jns	.LBB80_26
+.LBB80_25:
+	movl	56(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 12(%ebx)
+	jns	.LBB80_28
+.LBB80_27:
+	movl	52(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebx)
+	jns	.LBB80_30
+.LBB80_29:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%ebx)
+	js	.LBB80_31
+	jmp	.LBB80_32
+.Lfunc_end80:
+	.size	mcl_fp_montRedNF16L, .Lfunc_end80-mcl_fp_montRedNF16L
+                                        # -- End function
+	.globl	mcl_fp_addPre16L                # -- Begin function mcl_fp_addPre16L
+	.p2align	4, 0x90
+	.type	mcl_fp_addPre16L,@function
+mcl_fp_addPre16L:                       # @mcl_fp_addPre16L
+# %bb.0:
+	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	movl	20(%esp), %ecx
-	addl	(%ecx), %edx
-	adcl	4(%ecx), %esi
-	movl	8(%eax), %ebx
-	adcl	8(%ecx), %ebx
-	movl	16(%esp), %edi
-	movl	%edx, (%edi)
-	movl	12(%ecx), %edx
-	movl	%esi, 4(%edi)
-	movl	16(%ecx), %esi
-	adcl	12(%eax), %edx
-	adcl	16(%eax), %esi
-	movl	%ebx, 8(%edi)
-	movl	20(%eax), %ebx
-	movl	%edx, 12(%edi)
-	movl	20(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	24(%eax), %ebx
-	movl	%esi, 16(%edi)
-	movl	24(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	28(%eax), %ebx
-	movl	%edx, 20(%edi)
-	movl	28(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	32(%eax), %ebx
-	movl	%esi, 24(%edi)
-	movl	32(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	36(%eax), %ebx
-	movl	%edx, 28(%edi)
-	movl	36(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	40(%eax), %ebx
-	movl	%esi, 32(%edi)
-	movl	40(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	44(%eax), %ebx
-	movl	%edx, 36(%edi)
-	movl	44(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	48(%eax), %ebx
-	movl	%esi, 40(%edi)
-	movl	48(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	52(%eax), %ebx
-	movl	%edx, 44(%edi)
-	movl	52(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	56(%eax), %ebx
-	movl	%esi, 48(%edi)
-	movl	56(%ecx), %esi
-	adcl	%ebx, %esi
-	movl	60(%eax), %ebx
-	movl	%edx, 52(%edi)
-	movl	60(%ecx), %edx
-	adcl	%ebx, %edx
-	movl	%esi, 56(%edi)
-	movl	%edx, 60(%edi)
-	movl	64(%eax), %eax
-	movl	64(%ecx), %ecx
-	adcl	%eax, %ecx
-	movl	%ecx, 64(%edi)
-	sbbl	%eax, %eax
-	andl	$1, %eax
+	subl	$64, %esp
+	movl	88(%esp), %edi
+	movl	(%edi), %eax
+	movl	4(%edi), %ecx
+	movl	92(%esp), %esi
+	addl	(%esi), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	adcl	4(%esi), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	60(%edi), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%edi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	52(%edi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%edi), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	44(%edi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	40(%edi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%edi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	32(%edi), %edx
+	movl	28(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	24(%edi), %ecx
+	movl	20(%edi), %eax
+	movl	16(%edi), %ebp
+	movl	12(%edi), %ebx
+	movl	8(%edi), %edi
+	adcl	8(%esi), %edi
+	movl	%edi, 48(%esp)                  # 4-byte Spill
+	adcl	12(%esi), %ebx
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	adcl	16(%esi), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	adcl	20(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	adcl	24(%esi), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	28(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	adcl	32(%esi), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	adcl	36(%esi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	adcl	40(%esi), %ebp
+	movl	20(%esp), %edi                  # 4-byte Reload
+	adcl	44(%esi), %edi
+	movl	28(%esp), %edx                  # 4-byte Reload
+	adcl	48(%esi), %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	52(%esi), %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	adcl	56(%esi), %eax
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	adcl	60(%esi), %ebx
+	movl	84(%esp), %esi
+	movl	%eax, 56(%esi)
+	movl	%ecx, 52(%esi)
+	movl	%edx, 48(%esi)
+	movl	%edi, 44(%esi)
+	movl	%ebp, 40(%esi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 36(%esi)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 32(%esi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 28(%esi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%esi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%esi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%esi)
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%esi)
+	movl	%ebx, 60(%esi)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%esi)
+	movl	60(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%esi)
+	setb	%al
+	movzbl	%al, %eax
+	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
+	popl	%ebp
 	retl
-.Lfunc_end261:
-	.size	mcl_fp_addPre17L, .Lfunc_end261-mcl_fp_addPre17L
-
-	.globl	mcl_fp_subPre17L
-	.align	16, 0x90
-	.type	mcl_fp_subPre17L,@function
-mcl_fp_subPre17L:                       # @mcl_fp_subPre17L
-# BB#0:
+.Lfunc_end81:
+	.size	mcl_fp_addPre16L, .Lfunc_end81-mcl_fp_addPre16L
+                                        # -- End function
+	.globl	mcl_fp_subPre16L                # -- Begin function mcl_fp_subPre16L
+	.p2align	4, 0x90
+	.type	mcl_fp_subPre16L,@function
+mcl_fp_subPre16L:                       # @mcl_fp_subPre16L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	movl	24(%esp), %ecx
-	movl	(%ecx), %esi
-	movl	4(%ecx), %edi
-	xorl	%eax, %eax
-	movl	28(%esp), %edx
-	subl	(%edx), %esi
-	sbbl	4(%edx), %edi
-	movl	8(%ecx), %ebp
-	sbbl	8(%edx), %ebp
-	movl	20(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	12(%ecx), %esi
-	sbbl	12(%edx), %esi
-	movl	%edi, 4(%ebx)
-	movl	16(%ecx), %edi
-	sbbl	16(%edx), %edi
-	movl	%ebp, 8(%ebx)
-	movl	20(%edx), %ebp
-	movl	%esi, 12(%ebx)
-	movl	20(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	24(%edx), %ebp
-	movl	%edi, 16(%ebx)
-	movl	24(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	28(%edx), %ebp
-	movl	%esi, 20(%ebx)
-	movl	28(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	32(%edx), %ebp
-	movl	%edi, 24(%ebx)
-	movl	32(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	36(%edx), %ebp
-	movl	%esi, 28(%ebx)
-	movl	36(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	40(%edx), %ebp
-	movl	%edi, 32(%ebx)
-	movl	40(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	44(%edx), %ebp
-	movl	%esi, 36(%ebx)
-	movl	44(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	48(%edx), %ebp
-	movl	%edi, 40(%ebx)
-	movl	48(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	52(%edx), %ebp
-	movl	%esi, 44(%ebx)
-	movl	52(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	56(%edx), %ebp
-	movl	%edi, 48(%ebx)
-	movl	56(%ecx), %edi
-	sbbl	%ebp, %edi
-	movl	60(%edx), %ebp
-	movl	%esi, 52(%ebx)
-	movl	60(%ecx), %esi
-	sbbl	%ebp, %esi
-	movl	%edi, 56(%ebx)
-	movl	%esi, 60(%ebx)
-	movl	64(%edx), %edx
-	movl	64(%ecx), %ecx
-	sbbl	%edx, %ecx
-	movl	%ecx, 64(%ebx)
-	sbbl	$0, %eax
+	subl	$64, %esp
+	movl	88(%esp), %ebx
+	movl	(%ebx), %ecx
+	movl	4(%ebx), %eax
+	movl	92(%esp), %edi
+	subl	(%edi), %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	60(%ebx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%ebx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	52(%ebx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	48(%ebx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	44(%ebx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	40(%ebx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	36(%ebx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	32(%ebx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%ebx), %ebp
+	movl	24(%ebx), %esi
+	movl	20(%ebx), %edx
+	movl	16(%ebx), %ecx
+	movl	12(%ebx), %eax
+	movl	8(%ebx), %ebx
+	sbbl	8(%edi), %ebx
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	sbbl	12(%edi), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	sbbl	16(%edi), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	sbbl	20(%edi), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	sbbl	24(%edi), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	sbbl	28(%edi), %ebp
+	movl	%ebp, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	32(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	36(%edi), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	12(%esp), %ebp                  # 4-byte Reload
+	sbbl	40(%edi), %ebp
+	movl	20(%esp), %esi                  # 4-byte Reload
+	sbbl	44(%edi), %esi
+	movl	28(%esp), %edx                  # 4-byte Reload
+	sbbl	48(%edi), %edx
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	sbbl	52(%edi), %ecx
+	movl	44(%esp), %eax                  # 4-byte Reload
+	sbbl	56(%edi), %eax
+	movl	52(%esp), %ebx                  # 4-byte Reload
+	sbbl	60(%edi), %ebx
+	movl	84(%esp), %edi
+	movl	%eax, 56(%edi)
+	movl	%ecx, 52(%edi)
+	movl	%edx, 48(%edi)
+	movl	%esi, 44(%edi)
+	movl	%ebp, 40(%edi)
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 36(%edi)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 32(%edi)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 28(%edi)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%edi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%edi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%edi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%edi)
+	movl	48(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%edi)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%edi)
+	movl	%ebx, 60(%edi)
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%edi)
+	movl	$0, %eax
+	sbbl	%eax, %eax
 	andl	$1, %eax
+	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end262:
-	.size	mcl_fp_subPre17L, .Lfunc_end262-mcl_fp_subPre17L
-
-	.globl	mcl_fp_shr1_17L
-	.align	16, 0x90
-	.type	mcl_fp_shr1_17L,@function
-mcl_fp_shr1_17L:                        # @mcl_fp_shr1_17L
-# BB#0:
+.Lfunc_end82:
+	.size	mcl_fp_subPre16L, .Lfunc_end82-mcl_fp_subPre16L
+                                        # -- End function
+	.globl	mcl_fp_shr1_16L                 # -- Begin function mcl_fp_shr1_16L
+	.p2align	4, 0x90
+	.type	mcl_fp_shr1_16L,@function
+mcl_fp_shr1_16L:                        # @mcl_fp_shr1_16L
+# %bb.0:
 	pushl	%esi
 	movl	12(%esp), %eax
-	movl	(%eax), %edx
-	movl	4(%eax), %esi
-	shrdl	$1, %esi, %edx
+	movl	60(%eax), %edx
+	movl	%edx, %esi
+	shrl	%esi
 	movl	8(%esp), %ecx
-	movl	%edx, (%ecx)
-	movl	8(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 4(%ecx)
-	movl	12(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 8(%ecx)
-	movl	16(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 12(%ecx)
-	movl	20(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 16(%ecx)
-	movl	24(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 20(%ecx)
-	movl	28(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 24(%ecx)
-	movl	32(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 28(%ecx)
-	movl	36(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 32(%ecx)
-	movl	40(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 36(%ecx)
-	movl	44(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 40(%ecx)
-	movl	48(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 44(%ecx)
-	movl	52(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 48(%ecx)
-	movl	56(%eax), %edx
-	shrdl	$1, %edx, %esi
-	movl	%esi, 52(%ecx)
-	movl	60(%eax), %esi
-	shrdl	$1, %esi, %edx
-	movl	%edx, 56(%ecx)
-	movl	64(%eax), %eax
-	shrdl	$1, %eax, %esi
 	movl	%esi, 60(%ecx)
-	shrl	%eax
-	movl	%eax, 64(%ecx)
+	movl	56(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 56(%ecx)
+	movl	52(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 52(%ecx)
+	movl	48(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 48(%ecx)
+	movl	44(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 44(%ecx)
+	movl	40(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 40(%ecx)
+	movl	36(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 36(%ecx)
+	movl	32(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 32(%ecx)
+	movl	28(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 28(%ecx)
+	movl	24(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 24(%ecx)
+	movl	20(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 20(%ecx)
+	movl	16(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 16(%ecx)
+	movl	12(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 12(%ecx)
+	movl	8(%eax), %esi
+	shldl	$31, %esi, %edx
+	movl	%edx, 8(%ecx)
+	movl	4(%eax), %edx
+	shldl	$31, %edx, %esi
+	movl	%esi, 4(%ecx)
+	movl	(%eax), %eax
+	shrdl	$1, %edx, %eax
+	movl	%eax, (%ecx)
 	popl	%esi
 	retl
-.Lfunc_end263:
-	.size	mcl_fp_shr1_17L, .Lfunc_end263-mcl_fp_shr1_17L
-
-	.globl	mcl_fp_add17L
-	.align	16, 0x90
-	.type	mcl_fp_add17L,@function
-mcl_fp_add17L:                          # @mcl_fp_add17L
-# BB#0:
+.Lfunc_end83:
+	.size	mcl_fp_shr1_16L, .Lfunc_end83-mcl_fp_shr1_16L
+                                        # -- End function
+	.globl	mcl_fp_add16L                   # -- Begin function mcl_fp_add16L
+	.p2align	4, 0x90
+	.type	mcl_fp_add16L,@function
+mcl_fp_add16L:                          # @mcl_fp_add16L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$60, %esp
-	movl	88(%esp), %esi
-	movl	(%esi), %ecx
-	movl	4(%esi), %eax
-	movl	84(%esp), %edx
-	addl	(%edx), %ecx
-	movl	%ecx, %ebx
+	subl	$64, %esp
+	movl	88(%esp), %ecx
+	movl	(%ecx), %esi
+	movl	4(%ecx), %eax
+	movl	92(%esp), %edx
+	addl	(%edx), %esi
+	movl	%esi, 48(%esp)                  # 4-byte Spill
 	adcl	4(%edx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	8(%esi), %eax
-	adcl	8(%edx), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	12(%edx), %ecx
-	movl	16(%edx), %edi
-	adcl	12(%esi), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edi
-	movl	%edi, 4(%esp)           # 4-byte Spill
-	movl	20(%edx), %eax
-	adcl	20(%esi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	24(%edx), %eax
-	adcl	24(%esi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	28(%edx), %eax
-	adcl	28(%esi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	32(%edx), %eax
-	adcl	32(%esi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	36(%edx), %eax
-	adcl	36(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	40(%edx), %eax
-	adcl	40(%esi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	44(%edx), %eax
-	adcl	44(%esi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	48(%edx), %eax
-	adcl	48(%esi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	52(%edx), %eax
-	adcl	52(%esi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	56(%edx), %eax
-	adcl	56(%esi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	60(%edx), %ebp
-	adcl	60(%esi), %ebp
-	movl	64(%edx), %edx
-	adcl	64(%esi), %edx
-	movl	80(%esp), %esi
-	movl	%ebx, (%esi)
-	movl	%ebx, %eax
-	movl	8(%esp), %ecx           # 4-byte Reload
-	movl	%ecx, 4(%esi)
-	movl	56(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 8(%esi)
-	movl	52(%esp), %ebx          # 4-byte Reload
-	movl	%ebx, 12(%esi)
-	movl	%edi, 16(%esi)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%esi)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%esi)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%esi)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%esi)
-	movl	32(%esp), %edi          # 4-byte Reload
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	60(%ecx), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	56(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	52(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	48(%ecx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	44(%ecx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	36(%ecx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	32(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%ecx), %edi
+	movl	24(%ecx), %ebx
+	movl	20(%ecx), %esi
+	movl	16(%ecx), %eax
+	movl	12(%ecx), %ebp
+	movl	8(%ecx), %ecx
+	adcl	8(%edx), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	adcl	12(%edx), %ebp
+	adcl	16(%edx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	adcl	20(%edx), %esi
+	movl	%esi, 40(%esp)                  # 4-byte Spill
+	adcl	24(%edx), %ebx
+	movl	%ebx, 20(%esp)                  # 4-byte Spill
+	adcl	28(%edx), %edi
+	movl	%edi, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	32(%edx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %edi                  # 4-byte Reload
+	adcl	36(%edx), %edi
+	movl	%edi, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	adcl	40(%edx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	44(%edx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	adcl	48(%edx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	adcl	52(%edx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	adcl	56(%edx), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	adcl	60(%edx), %ebx
+	movl	%ebx, 60(%esp)                  # 4-byte Spill
+	movl	84(%esp), %esi
+	movl	%ebx, 60(%esi)
+	movl	%ecx, 56(%esi)
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	movl	%eax, 52(%esi)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%esi)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%esi)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 40(%esi)
 	movl	%edi, 36(%esi)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%esi)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 44(%esi)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 48(%esi)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 52(%esi)
-	movl	12(%esp), %edi          # 4-byte Reload
-	movl	%edi, 56(%esi)
-	movl	%ebp, 60(%esi)
-	movl	%edx, 64(%esi)
-	sbbl	%ebx, %ebx
-	andl	$1, %ebx
-	movl	92(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	56(%esp), %eax          # 4-byte Reload
-	sbbl	8(%edi), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	movl	52(%esp), %eax          # 4-byte Reload
-	sbbl	12(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	4(%esp), %eax           # 4-byte Reload
-	sbbl	16(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-	sbbl	20(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	sbbl	24(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	sbbl	28(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	sbbl	32(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	sbbl	36(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	sbbl	40(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	sbbl	44(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	sbbl	48(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	16(%esp), %eax          # 4-byte Reload
-	sbbl	52(%edi), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	12(%esp), %eax          # 4-byte Reload
-	sbbl	56(%edi), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	sbbl	60(%edi), %ebp
-	sbbl	64(%edi), %edx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 32(%esi)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 28(%esi)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 24(%esi)
+	movl	40(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 20(%esi)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%esi)
+	movl	%ebp, 12(%esi)
+	movl	%ebx, 8(%esi)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%esi)
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	%edx, (%esi)
+	setb	3(%esp)                         # 1-byte Folded Spill
+	movl	96(%esp), %esi
+	subl	(%esi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	sbbl	4(%esi), %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	sbbl	8(%esi), %ebx
+	movl	%ebx, %ecx
+	sbbl	12(%esi), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	sbbl	16(%esi), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	sbbl	20(%esi), %edi
+	movl	%edi, 40(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%esi), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%esi), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	32(%esi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%esi), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	40(%esi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	44(%esi), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	48(%esi), %eax
+	movl	52(%esp), %ebp                  # 4-byte Reload
+	sbbl	52(%esi), %ebp
+	movl	56(%esp), %edi                  # 4-byte Reload
+	sbbl	56(%esi), %edi
+	movl	60(%esp), %ebx                  # 4-byte Reload
+	sbbl	60(%esi), %ebx
+	movl	%ebx, %esi
+	movzbl	3(%esp), %ebx                   # 1-byte Folded Reload
 	sbbl	$0, %ebx
 	testb	$1, %bl
-	jne	.LBB264_2
-# BB#1:                                 # %nocarry
-	movl	(%esp), %edi            # 4-byte Reload
-	movl	%edi, (%esi)
-	movl	8(%esp), %edi           # 4-byte Reload
-	movl	%edi, 4(%esi)
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%esi)
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%esi)
-	movl	4(%esp), %edi           # 4-byte Reload
-	movl	%edi, 16(%esi)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%esi)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%esi)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%esi)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%esi)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%esi)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%esi)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 44(%esi)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 48(%esi)
-	movl	16(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 52(%esi)
-	movl	12(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 56(%esi)
-	movl	%ebp, 60(%esi)
-	movl	%edx, 64(%esi)
-.LBB264_2:                              # %carry
-	addl	$60, %esp
+	jne	.LBB84_2
+# %bb.1:                                # %nocarry
+	movl	48(%esp), %ebx                  # 4-byte Reload
+	movl	84(%esp), %edx
+	movl	%ebx, (%edx)
+	movl	44(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 4(%edx)
+	movl	%ecx, 8(%edx)
+	movl	32(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%edx)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%edx)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%edx)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%edx)
+	movl	24(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 28(%edx)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 32(%edx)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 36(%edx)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 40(%edx)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 44(%edx)
+	movl	%eax, 48(%edx)
+	movl	%ebp, 52(%edx)
+	movl	%edi, 56(%edx)
+	movl	%esi, 60(%edx)
+.LBB84_2:                               # %carry
+	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end264:
-	.size	mcl_fp_add17L, .Lfunc_end264-mcl_fp_add17L
-
-	.globl	mcl_fp_addNF17L
-	.align	16, 0x90
-	.type	mcl_fp_addNF17L,@function
-mcl_fp_addNF17L:                        # @mcl_fp_addNF17L
-# BB#0:
+.Lfunc_end84:
+	.size	mcl_fp_add16L, .Lfunc_end84-mcl_fp_add16L
+                                        # -- End function
+	.globl	mcl_fp_addNF16L                 # -- Begin function mcl_fp_addNF16L
+	.p2align	4, 0x90
+	.type	mcl_fp_addNF16L,@function
+mcl_fp_addNF16L:                        # @mcl_fp_addNF16L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$132, %esp
-	movl	160(%esp), %eax
-	movl	(%eax), %ecx
+	subl	$104, %esp
+	movl	132(%esp), %eax
+	movl	(%eax), %esi
 	movl	4(%eax), %edx
-	movl	156(%esp), %esi
-	addl	(%esi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	adcl	4(%esi), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	64(%eax), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	60(%eax), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	56(%eax), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	52(%eax), %ebp
-	movl	48(%eax), %ecx
-	movl	%ecx, 116(%esp)         # 4-byte Spill
-	movl	44(%eax), %ecx
-	movl	%ecx, 112(%esp)         # 4-byte Spill
-	movl	40(%eax), %ecx
-	movl	%ecx, 128(%esp)         # 4-byte Spill
-	movl	36(%eax), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	32(%eax), %ecx
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-	movl	28(%eax), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	24(%eax), %ecx
-	movl	%ecx, 120(%esp)         # 4-byte Spill
+	movl	128(%esp), %ecx
+	addl	(%ecx), %esi
+	movl	%esi, 56(%esp)                  # 4-byte Spill
+	adcl	4(%ecx), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	60(%eax), %esi
+	movl	%esi, (%esp)                    # 4-byte Spill
+	movl	56(%eax), %esi
+	movl	%esi, 4(%esp)                   # 4-byte Spill
+	movl	52(%eax), %esi
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	48(%eax), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	movl	44(%eax), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	40(%eax), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	36(%eax), %edx
+	movl	32(%eax), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	movl	28(%eax), %esi
+	movl	24(%eax), %edi
+	movl	%edi, 28(%esp)                  # 4-byte Spill
 	movl	20(%eax), %ebx
-	movl	16(%eax), %edi
-	movl	12(%eax), %edx
-	movl	8(%eax), %ecx
-	adcl	8(%esi), %ecx
-	movl	%ecx, 64(%esp)          # 4-byte Spill
-	adcl	12(%esi), %edx
-	movl	%edx, 68(%esp)          # 4-byte Spill
-	adcl	16(%esi), %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	adcl	20(%esi), %ebx
-	movl	%ebx, 76(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	adcl	24(%esi), %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	adcl	28(%esi), %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	adcl	32(%esi), %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	adcl	36(%esi), %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	128(%esp), %eax         # 4-byte Reload
-	adcl	40(%esi), %eax
-	movl	%eax, 128(%esp)         # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	adcl	44(%esi), %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	adcl	48(%esi), %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	adcl	52(%esi), %ebp
-	movl	%ebp, 80(%esp)          # 4-byte Spill
-	movl	92(%esp), %ebp          # 4-byte Reload
-	adcl	56(%esi), %ebp
-	movl	%ebp, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %ebp          # 4-byte Reload
-	adcl	60(%esi), %ebp
-	movl	%ebp, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ebp         # 4-byte Reload
-	adcl	64(%esi), %ebp
-	movl	%ebp, 100(%esp)         # 4-byte Spill
-	movl	164(%esp), %esi
-	movl	84(%esp), %eax          # 4-byte Reload
-	subl	(%esi), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	sbbl	4(%esi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	sbbl	8(%esi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	sbbl	12(%esi), %edx
-	sbbl	16(%esi), %edi
-	movl	%edi, 12(%esp)          # 4-byte Spill
-	sbbl	20(%esi), %ebx
-	movl	%ebx, 16(%esp)          # 4-byte Spill
-	movl	120(%esp), %ebx         # 4-byte Reload
-	sbbl	24(%esi), %ebx
-	movl	%ebx, 20(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	28(%esi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	sbbl	32(%esi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	36(%esi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	128(%esp), %ecx         # 4-byte Reload
-	sbbl	40(%esi), %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	movl	112(%esp), %ecx         # 4-byte Reload
-	sbbl	44(%esi), %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	116(%esp), %ecx         # 4-byte Reload
-	sbbl	48(%esi), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	sbbl	52(%esi), %ecx
-	movl	%ecx, 48(%esp)          # 4-byte Spill
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	movl	%eax, %ebp
-	sbbl	56(%esi), %ecx
-	movl	%ecx, 52(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	movl	%eax, %ecx
-	sbbl	60(%esi), %ecx
-	movl	%ecx, 56(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	movl	%eax, %ebx
-	sbbl	64(%esi), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
-	movl	%ebx, %esi
-	sarl	$31, %esi
-	testl	%esi, %esi
-	movl	84(%esp), %esi          # 4-byte Reload
-	js	.LBB265_2
-# BB#1:
-	movl	(%esp), %esi            # 4-byte Reload
-.LBB265_2:
-	movl	152(%esp), %ebx
-	movl	%esi, (%ebx)
-	movl	88(%esp), %eax          # 4-byte Reload
-	js	.LBB265_4
-# BB#3:
-	movl	4(%esp), %eax           # 4-byte Reload
-.LBB265_4:
-	movl	%eax, 4(%ebx)
-	movl	108(%esp), %eax         # 4-byte Reload
-	movl	76(%esp), %esi          # 4-byte Reload
-	movl	64(%esp), %edi          # 4-byte Reload
-	js	.LBB265_6
-# BB#5:
-	movl	8(%esp), %edi           # 4-byte Reload
-.LBB265_6:
-	movl	%edi, 8(%ebx)
-	movl	116(%esp), %edi         # 4-byte Reload
-	movl	68(%esp), %ecx          # 4-byte Reload
-	js	.LBB265_8
-# BB#7:
-	movl	%edx, %ecx
-.LBB265_8:
-	movl	%ecx, 12(%ebx)
-	movl	104(%esp), %ecx         # 4-byte Reload
-	movl	72(%esp), %edx          # 4-byte Reload
-	js	.LBB265_10
-# BB#9:
-	movl	12(%esp), %edx          # 4-byte Reload
-.LBB265_10:
-	movl	%edx, 16(%ebx)
-	movl	%ebp, %edx
-	js	.LBB265_12
-# BB#11:
-	movl	16(%esp), %esi          # 4-byte Reload
-.LBB265_12:
-	movl	%esi, 20(%ebx)
-	movl	112(%esp), %ebp         # 4-byte Reload
-	js	.LBB265_14
-# BB#13:
-	movl	20(%esp), %esi          # 4-byte Reload
-	movl	%esi, 120(%esp)         # 4-byte Spill
-.LBB265_14:
-	movl	120(%esp), %esi         # 4-byte Reload
-	movl	%esi, 24(%ebx)
-	js	.LBB265_16
-# BB#15:
-	movl	24(%esp), %ecx          # 4-byte Reload
-.LBB265_16:
-	movl	%ecx, 28(%ebx)
-	js	.LBB265_18
-# BB#17:
-	movl	28(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 124(%esp)         # 4-byte Spill
-.LBB265_18:
-	movl	124(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 32(%ebx)
-	js	.LBB265_20
-# BB#19:
-	movl	32(%esp), %eax          # 4-byte Reload
-.LBB265_20:
-	movl	%eax, 36(%ebx)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	js	.LBB265_22
-# BB#21:
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, 128(%esp)         # 4-byte Spill
-.LBB265_22:
-	movl	128(%esp), %eax         # 4-byte Reload
-	movl	%eax, 40(%ebx)
-	js	.LBB265_24
-# BB#23:
-	movl	40(%esp), %ebp          # 4-byte Reload
-.LBB265_24:
-	movl	%ebp, 44(%ebx)
-	js	.LBB265_26
-# BB#25:
-	movl	44(%esp), %edi          # 4-byte Reload
-.LBB265_26:
-	movl	%edi, 48(%ebx)
-	movl	80(%esp), %eax          # 4-byte Reload
-	js	.LBB265_28
-# BB#27:
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB265_28:
-	movl	%eax, 52(%ebx)
-	js	.LBB265_30
-# BB#29:
-	movl	52(%esp), %edx          # 4-byte Reload
-.LBB265_30:
-	movl	%edx, 56(%ebx)
-	movl	96(%esp), %eax          # 4-byte Reload
-	js	.LBB265_32
-# BB#31:
-	movl	56(%esp), %eax          # 4-byte Reload
-.LBB265_32:
-	movl	%eax, 60(%ebx)
-	js	.LBB265_34
-# BB#33:
-	movl	60(%esp), %ecx          # 4-byte Reload
-.LBB265_34:
-	movl	%ecx, 64(%ebx)
-	addl	$132, %esp
+	movl	16(%eax), %ebp
+	movl	12(%eax), %edi
+	movl	8(%eax), %eax
+	adcl	8(%ecx), %eax
+	adcl	12(%ecx), %edi
+	adcl	16(%ecx), %ebp
+	adcl	20(%ecx), %ebx
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	adcl	24(%ecx), %ebx
+	movl	%ebx, 28(%esp)                  # 4-byte Spill
+	adcl	28(%ecx), %esi
+	movl	%esi, 44(%esp)                  # 4-byte Spill
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	32(%ecx), %esi
+	movl	%esi, 24(%esp)                  # 4-byte Spill
+	adcl	36(%ecx), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	40(%ecx), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	44(%ecx), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %ebx                  # 4-byte Reload
+	adcl	48(%ecx), %ebx
+	movl	%ebx, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %ebx                   # 4-byte Reload
+	adcl	52(%ecx), %ebx
+	movl	%ebx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %ebx                   # 4-byte Reload
+	adcl	56(%ecx), %ebx
+	movl	%ebx, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %ebx                    # 4-byte Reload
+	adcl	60(%ecx), %ebx
+	movl	%ebx, (%esp)                    # 4-byte Spill
+	movl	136(%esp), %ebx
+	movl	56(%esp), %edx                  # 4-byte Reload
+	subl	(%ebx), %edx
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	sbbl	4(%ebx), %ecx
+	movl	%ecx, 96(%esp)                  # 4-byte Spill
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	sbbl	8(%ebx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	%edi, 64(%esp)                  # 4-byte Spill
+	sbbl	12(%ebx), %edi
+	movl	%edi, 88(%esp)                  # 4-byte Spill
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	sbbl	16(%ebx), %ebp
+	movl	%ebp, 84(%esp)                  # 4-byte Spill
+	movl	48(%esp), %esi                  # 4-byte Reload
+	sbbl	20(%ebx), %esi
+	movl	%esi, 80(%esp)                  # 4-byte Spill
+	movl	28(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%ebx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%ebx), %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	32(%ebx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	40(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ebx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	sbbl	40(%ebx), %ebp
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	sbbl	44(%ebx), %ecx
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	48(%ebx), %eax
+	movl	8(%esp), %esi                   # 4-byte Reload
+	sbbl	52(%ebx), %esi
+	movl	4(%esp), %edi                   # 4-byte Reload
+	sbbl	56(%ebx), %edi
+	movl	(%esp), %edx                    # 4-byte Reload
+	sbbl	60(%ebx), %edx
+	testl	%edx, %edx
+	js	.LBB85_1
+# %bb.2:
+	movl	124(%esp), %ebx
+	movl	%edx, 60(%ebx)
+	js	.LBB85_3
+.LBB85_4:
+	movl	%edi, 56(%ebx)
+	movl	92(%esp), %edx                  # 4-byte Reload
+	js	.LBB85_5
+.LBB85_6:
+	movl	%esi, 52(%ebx)
+	movl	84(%esp), %edi                  # 4-byte Reload
+	js	.LBB85_7
+.LBB85_8:
+	movl	%eax, 48(%ebx)
+	js	.LBB85_9
+.LBB85_10:
+	movl	%ecx, 44(%ebx)
+	movl	100(%esp), %ecx                 # 4-byte Reload
+	movl	76(%esp), %eax                  # 4-byte Reload
+	js	.LBB85_11
+.LBB85_12:
+	movl	%ebp, 40(%ebx)
+	movl	96(%esp), %esi                  # 4-byte Reload
+	movl	72(%esp), %ebp                  # 4-byte Reload
+	js	.LBB85_13
+.LBB85_14:
+	movl	%ebp, 36(%ebx)
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	js	.LBB85_15
+.LBB85_16:
+	movl	%eax, 32(%ebx)
+	js	.LBB85_17
+.LBB85_18:
+	movl	%edx, %eax
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 28(%ebx)
+	js	.LBB85_19
+.LBB85_20:
+	movl	36(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 24(%ebx)
+	js	.LBB85_21
+.LBB85_22:
+	movl	%ebp, 20(%ebx)
+	movl	%eax, %edx
+	js	.LBB85_23
+.LBB85_24:
+	movl	%edi, 16(%ebx)
+	movl	88(%esp), %eax                  # 4-byte Reload
+	js	.LBB85_25
+.LBB85_26:
+	movl	%eax, 12(%ebx)
+	js	.LBB85_27
+.LBB85_28:
+	movl	%edx, 8(%ebx)
+	js	.LBB85_29
+.LBB85_30:
+	movl	%esi, 4(%ebx)
+	jns	.LBB85_32
+.LBB85_31:
+	movl	56(%esp), %ecx                  # 4-byte Reload
+.LBB85_32:
+	movl	%ecx, (%ebx)
+	addl	$104, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end265:
-	.size	mcl_fp_addNF17L, .Lfunc_end265-mcl_fp_addNF17L
-
-	.globl	mcl_fp_sub17L
-	.align	16, 0x90
-	.type	mcl_fp_sub17L,@function
-mcl_fp_sub17L:                          # @mcl_fp_sub17L
-# BB#0:
+.LBB85_1:
+	movl	(%esp), %edx                    # 4-byte Reload
+	movl	124(%esp), %ebx
+	movl	%edx, 60(%ebx)
+	jns	.LBB85_4
+.LBB85_3:
+	movl	4(%esp), %edi                   # 4-byte Reload
+	movl	%edi, 56(%ebx)
+	movl	92(%esp), %edx                  # 4-byte Reload
+	jns	.LBB85_6
+.LBB85_5:
+	movl	8(%esp), %esi                   # 4-byte Reload
+	movl	%esi, 52(%ebx)
+	movl	84(%esp), %edi                  # 4-byte Reload
+	jns	.LBB85_8
+.LBB85_7:
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%ebx)
+	jns	.LBB85_10
+.LBB85_9:
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 44(%ebx)
+	movl	100(%esp), %ecx                 # 4-byte Reload
+	movl	76(%esp), %eax                  # 4-byte Reload
+	jns	.LBB85_12
+.LBB85_11:
+	movl	20(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 40(%ebx)
+	movl	96(%esp), %esi                  # 4-byte Reload
+	movl	72(%esp), %ebp                  # 4-byte Reload
+	jns	.LBB85_14
+.LBB85_13:
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 36(%ebx)
+	movl	80(%esp), %ebp                  # 4-byte Reload
+	jns	.LBB85_16
+.LBB85_15:
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%ebx)
+	jns	.LBB85_18
+.LBB85_17:
+	movl	44(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	%edx, %eax
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 28(%ebx)
+	jns	.LBB85_20
+.LBB85_19:
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 24(%ebx)
+	jns	.LBB85_22
+.LBB85_21:
+	movl	48(%esp), %ebp                  # 4-byte Reload
+	movl	%ebp, 20(%ebx)
+	movl	%eax, %edx
+	jns	.LBB85_24
+.LBB85_23:
+	movl	60(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 16(%ebx)
+	movl	88(%esp), %eax                  # 4-byte Reload
+	jns	.LBB85_26
+.LBB85_25:
+	movl	64(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebx)
+	jns	.LBB85_28
+.LBB85_27:
+	movl	68(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 8(%ebx)
+	jns	.LBB85_30
+.LBB85_29:
+	movl	52(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 4(%ebx)
+	js	.LBB85_31
+	jmp	.LBB85_32
+.Lfunc_end85:
+	.size	mcl_fp_addNF16L, .Lfunc_end85-mcl_fp_addNF16L
+                                        # -- End function
+	.globl	mcl_fp_sub16L                   # -- Begin function mcl_fp_sub16L
+	.p2align	4, 0x90
+	.type	mcl_fp_sub16L,@function
+mcl_fp_sub16L:                          # @mcl_fp_sub16L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
 	subl	$64, %esp
-	movl	88(%esp), %esi
-	movl	(%esi), %eax
-	movl	4(%esi), %ecx
-	xorl	%ebx, %ebx
-	movl	92(%esp), %edi
-	subl	(%edi), %eax
-	movl	%eax, 56(%esp)          # 4-byte Spill
-	sbbl	4(%edi), %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	8(%esi), %eax
-	sbbl	8(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	12(%esi), %eax
-	sbbl	12(%edi), %eax
-	movl	%eax, 60(%esp)          # 4-byte Spill
-	movl	16(%esi), %eax
-	sbbl	16(%edi), %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	20(%esi), %eax
-	sbbl	20(%edi), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	24(%esi), %eax
-	sbbl	24(%edi), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	28(%esi), %eax
-	sbbl	28(%edi), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	32(%esi), %eax
-	sbbl	32(%edi), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	36(%esi), %eax
-	sbbl	36(%edi), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	40(%esi), %eax
-	sbbl	40(%edi), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	44(%esi), %eax
-	sbbl	44(%edi), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	48(%esi), %edx
-	sbbl	48(%edi), %edx
-	movl	%edx, 12(%esp)          # 4-byte Spill
-	movl	52(%esi), %ecx
-	sbbl	52(%edi), %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	56(%esi), %eax
-	sbbl	56(%edi), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	60(%esi), %ebp
-	sbbl	60(%edi), %ebp
-	movl	64(%esi), %esi
-	sbbl	64(%edi), %esi
-	sbbl	$0, %ebx
-	testb	$1, %bl
-	movl	84(%esp), %ebx
-	movl	56(%esp), %edi          # 4-byte Reload
-	movl	%edi, (%ebx)
-	movl	16(%esp), %edi          # 4-byte Reload
-	movl	%edi, 4(%ebx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	movl	%edi, 8(%ebx)
-	movl	60(%esp), %edi          # 4-byte Reload
-	movl	%edi, 12(%ebx)
-	movl	52(%esp), %edi          # 4-byte Reload
-	movl	%edi, 16(%ebx)
-	movl	44(%esp), %edi          # 4-byte Reload
-	movl	%edi, 20(%ebx)
-	movl	40(%esp), %edi          # 4-byte Reload
-	movl	%edi, 24(%ebx)
-	movl	36(%esp), %edi          # 4-byte Reload
-	movl	%edi, 28(%ebx)
-	movl	32(%esp), %edi          # 4-byte Reload
-	movl	%edi, 32(%ebx)
-	movl	28(%esp), %edi          # 4-byte Reload
-	movl	%edi, 36(%ebx)
-	movl	24(%esp), %edi          # 4-byte Reload
-	movl	%edi, 40(%ebx)
-	movl	20(%esp), %edi          # 4-byte Reload
-	movl	%edi, 44(%ebx)
-	movl	%edx, 48(%ebx)
-	movl	%ecx, 52(%ebx)
-	movl	%eax, 56(%ebx)
-	movl	%ebp, 60(%ebx)
-	movl	%esi, 64(%ebx)
-	je	.LBB266_2
-# BB#1:                                 # %carry
-	movl	%esi, (%esp)            # 4-byte Spill
-	movl	96(%esp), %esi
-	movl	56(%esp), %ecx          # 4-byte Reload
-	addl	(%esi), %ecx
-	movl	%ecx, (%ebx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	4(%esi), %edx
-	movl	%edx, 4(%ebx)
-	movl	48(%esp), %edi          # 4-byte Reload
-	adcl	8(%esi), %edi
-	movl	12(%esi), %eax
-	adcl	60(%esp), %eax          # 4-byte Folded Reload
-	movl	%edi, 8(%ebx)
-	movl	16(%esi), %ecx
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%ebx)
-	movl	20(%esi), %eax
-	adcl	44(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 16(%ebx)
-	movl	24(%esi), %ecx
-	adcl	40(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 20(%ebx)
-	movl	28(%esi), %eax
-	adcl	36(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 24(%ebx)
-	movl	32(%esi), %ecx
-	adcl	32(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%ebx)
-	movl	36(%esi), %eax
-	adcl	28(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%ebx)
-	movl	40(%esi), %ecx
-	adcl	24(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 36(%ebx)
-	movl	44(%esi), %eax
-	adcl	20(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 40(%ebx)
-	movl	48(%esi), %ecx
-	adcl	12(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 44(%ebx)
-	movl	52(%esi), %eax
-	adcl	8(%esp), %eax           # 4-byte Folded Reload
-	movl	%ecx, 48(%ebx)
-	movl	56(%esi), %ecx
-	adcl	4(%esp), %ecx           # 4-byte Folded Reload
-	movl	%eax, 52(%ebx)
-	movl	%ecx, 56(%ebx)
-	movl	60(%esi), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 60(%ebx)
-	movl	64(%esi), %eax
-	adcl	(%esp), %eax            # 4-byte Folded Reload
-	movl	%eax, 64(%ebx)
-.LBB266_2:                              # %nocarry
+	movl	88(%esp), %edx
+	movl	(%edx), %ecx
+	movl	4(%edx), %esi
+	movl	92(%esp), %edi
+	subl	(%edi), %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	sbbl	4(%edi), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	movl	60(%edx), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	56(%edx), %ecx
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	52(%edx), %ecx
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	48(%edx), %ecx
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	44(%edx), %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	40(%edx), %ecx
+	movl	%ecx, (%esp)                    # 4-byte Spill
+	movl	36(%edx), %ebp
+	movl	32(%edx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	28(%edx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	24(%edx), %ebx
+	movl	20(%edx), %esi
+	movl	16(%edx), %ecx
+	movl	12(%edx), %eax
+	movl	8(%edx), %edx
+	sbbl	8(%edi), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	sbbl	12(%edi), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	sbbl	16(%edi), %ecx
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	sbbl	20(%edi), %esi
+	movl	%esi, 12(%esp)                  # 4-byte Spill
+	sbbl	24(%edi), %ebx
+	movl	8(%esp), %esi                   # 4-byte Reload
+	sbbl	28(%edi), %esi
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	sbbl	32(%edi), %ecx
+	sbbl	36(%edi), %ebp
+	movl	(%esp), %edx                    # 4-byte Reload
+	sbbl	40(%edi), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	32(%esp), %edx                  # 4-byte Reload
+	sbbl	44(%edi), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %edx                  # 4-byte Reload
+	sbbl	48(%edi), %edx
+	movl	%edx, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %edx                  # 4-byte Reload
+	sbbl	52(%edi), %edx
+	movl	%edx, 24(%esp)                  # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	sbbl	56(%edi), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edx                  # 4-byte Reload
+	sbbl	60(%edi), %edx
+	movl	$0, %eax
+	sbbl	%eax, %eax
+	testb	$1, %al
+	movl	84(%esp), %eax
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	%edx, 60(%eax)
+	movl	20(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 56(%eax)
+	movl	24(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 52(%eax)
+	movl	28(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 48(%eax)
+	movl	32(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 44(%eax)
+	movl	(%esp), %edx                    # 4-byte Reload
+	movl	%edx, 40(%eax)
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	%ebp, 36(%eax)
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%ecx, 32(%eax)
+	movl	%esi, %ecx
+	movl	%esi, 8(%esp)                   # 4-byte Spill
+	movl	%esi, 28(%eax)
+	movl	%ebx, 40(%esp)                  # 4-byte Spill
+	movl	%ebx, 24(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, %edx
+	movl	%ecx, 12(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, %ebx
+	movl	%ecx, 8(%eax)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	56(%esp), %edi                  # 4-byte Reload
+	movl	%edi, (%eax)
+	je	.LBB86_2
+# %bb.1:                                # %carry
+	movl	%ecx, %esi
+	movl	%edi, %ecx
+	movl	96(%esp), %ecx
+	addl	(%ecx), %edi
+	movl	%edi, 56(%esp)                  # 4-byte Spill
+	adcl	4(%ecx), %esi
+	movl	%esi, 52(%esp)                  # 4-byte Spill
+	adcl	8(%ecx), %ebx
+	movl	%ebx, 48(%esp)                  # 4-byte Spill
+	adcl	12(%ecx), %edx
+	movl	%edx, 44(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	16(%ecx), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	20(%ecx), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	adcl	24(%ecx), %edx
+	movl	%edx, 40(%esp)                  # 4-byte Spill
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	28(%ecx), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	32(%ecx), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	36(%esp), %edx                  # 4-byte Reload
+	adcl	36(%ecx), %edx
+	movl	%edx, 36(%esp)                  # 4-byte Spill
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	40(%ecx), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	32(%esp), %ebx                  # 4-byte Reload
+	adcl	44(%ecx), %ebx
+	movl	28(%esp), %edi                  # 4-byte Reload
+	adcl	48(%ecx), %edi
+	movl	24(%esp), %esi                  # 4-byte Reload
+	adcl	52(%ecx), %esi
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	56(%ecx), %edx
+	movl	60(%esp), %ebp                  # 4-byte Reload
+	adcl	60(%ecx), %ebp
+	movl	%ebp, 60(%eax)
+	movl	%edx, 56(%eax)
+	movl	%esi, 52(%eax)
+	movl	%edi, 48(%eax)
+	movl	%ebx, 44(%eax)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 40(%eax)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 36(%eax)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 32(%eax)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 28(%eax)
+	movl	40(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 24(%eax)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 20(%eax)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 16(%eax)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 12(%eax)
+	movl	48(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 8(%eax)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%eax)
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, (%eax)
+.LBB86_2:                               # %nocarry
 	addl	$64, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end266:
-	.size	mcl_fp_sub17L, .Lfunc_end266-mcl_fp_sub17L
-
-	.globl	mcl_fp_subNF17L
-	.align	16, 0x90
-	.type	mcl_fp_subNF17L,@function
-mcl_fp_subNF17L:                        # @mcl_fp_subNF17L
-# BB#0:
+.Lfunc_end86:
+	.size	mcl_fp_sub16L, .Lfunc_end86-mcl_fp_sub16L
+                                        # -- End function
+	.globl	mcl_fp_subNF16L                 # -- Begin function mcl_fp_subNF16L
+	.p2align	4, 0x90
+	.type	mcl_fp_subNF16L,@function
+mcl_fp_subNF16L:                        # @mcl_fp_subNF16L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$112, %esp
-	movl	136(%esp), %ecx
+	subl	$104, %esp
+	movl	128(%esp), %ecx
 	movl	(%ecx), %esi
 	movl	4(%ecx), %edx
-	movl	140(%esp), %edi
+	movl	132(%esp), %edi
 	subl	(%edi), %esi
-	movl	%esi, 68(%esp)          # 4-byte Spill
+	movl	%esi, 96(%esp)                  # 4-byte Spill
 	sbbl	4(%edi), %edx
-	movl	%edx, 72(%esp)          # 4-byte Spill
-	movl	64(%ecx), %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	60(%ecx), %edx
-	movl	%edx, 104(%esp)         # 4-byte Spill
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	60(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
 	movl	56(%ecx), %edx
-	movl	%edx, 100(%esp)         # 4-byte Spill
+	movl	%edx, 24(%esp)                  # 4-byte Spill
 	movl	52(%ecx), %edx
-	movl	%edx, 96(%esp)          # 4-byte Spill
+	movl	%edx, 20(%esp)                  # 4-byte Spill
 	movl	48(%ecx), %edx
-	movl	%edx, 92(%esp)          # 4-byte Spill
+	movl	%edx, 16(%esp)                  # 4-byte Spill
 	movl	44(%ecx), %edx
-	movl	%edx, 88(%esp)          # 4-byte Spill
-	movl	40(%ecx), %esi
-	movl	%esi, 108(%esp)         # 4-byte Spill
-	movl	36(%ecx), %edx
-	movl	%edx, 80(%esp)          # 4-byte Spill
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	40(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	36(%ecx), %esi
+	movl	%esi, 28(%esp)                  # 4-byte Spill
 	movl	32(%ecx), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
+	movl	%eax, (%esp)                    # 4-byte Spill
 	movl	28(%ecx), %ebp
 	movl	24(%ecx), %ebx
 	movl	20(%ecx), %esi
@@ -73025,761 +26669,804 @@ mcl_fp_subNF17L:                        # @mcl_fp_subNF17L
 	movl	12(%ecx), %eax
 	movl	8(%ecx), %ecx
 	sbbl	8(%edi), %ecx
-	movl	%ecx, 44(%esp)          # 4-byte Spill
+	movl	%ecx, 64(%esp)                  # 4-byte Spill
 	sbbl	12(%edi), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
+	movl	%eax, 72(%esp)                  # 4-byte Spill
 	sbbl	16(%edi), %edx
-	movl	%edx, 52(%esp)          # 4-byte Spill
+	movl	%edx, 76(%esp)                  # 4-byte Spill
 	sbbl	20(%edi), %esi
-	movl	%esi, 56(%esp)          # 4-byte Spill
+	movl	%esi, 80(%esp)                  # 4-byte Spill
 	sbbl	24(%edi), %ebx
-	movl	%ebx, 60(%esp)          # 4-byte Spill
+	movl	%ebx, 84(%esp)                  # 4-byte Spill
 	sbbl	28(%edi), %ebp
-	movl	%ebp, 64(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
+	movl	%ebp, 88(%esp)                  # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
 	sbbl	32(%edi), %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	80(%esp), %ecx          # 4-byte Reload
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	28(%esp), %ecx                  # 4-byte Reload
 	sbbl	36(%edi), %ecx
-	movl	%ecx, 80(%esp)          # 4-byte Spill
-	movl	108(%esp), %ecx         # 4-byte Reload
-	sbbl	40(%edi), %ecx
-	movl	%ecx, 108(%esp)         # 4-byte Spill
-	movl	88(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 28(%esp)                  # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	40(%edi), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	12(%esp), %ecx                  # 4-byte Reload
 	sbbl	44(%edi), %ecx
-	movl	%ecx, 88(%esp)          # 4-byte Spill
-	movl	92(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 12(%esp)                  # 4-byte Spill
+	movl	16(%esp), %ecx                  # 4-byte Reload
 	sbbl	48(%edi), %ecx
-	movl	%ecx, 92(%esp)          # 4-byte Spill
-	movl	96(%esp), %ecx          # 4-byte Reload
+	movl	%ecx, 16(%esp)                  # 4-byte Spill
+	movl	20(%esp), %ecx                  # 4-byte Reload
 	sbbl	52(%edi), %ecx
-	movl	%ecx, 96(%esp)          # 4-byte Spill
-	movl	100(%esp), %ecx         # 4-byte Reload
+	movl	%ecx, 20(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ecx                  # 4-byte Reload
 	sbbl	56(%edi), %ecx
-	movl	%ecx, 100(%esp)         # 4-byte Spill
-	movl	104(%esp), %ecx         # 4-byte Reload
-	sbbl	60(%edi), %ecx
-	movl	%ecx, 104(%esp)         # 4-byte Spill
-	movl	84(%esp), %ecx          # 4-byte Reload
-	sbbl	64(%edi), %ecx
-	movl	%ecx, 84(%esp)          # 4-byte Spill
-	movl	%ecx, %ebx
-	sarl	$31, %ebx
-	movl	%ebx, %edx
-	shldl	$1, %ecx, %edx
-	movl	144(%esp), %eax
-	movl	28(%eax), %ecx
-	andl	%edx, %ecx
-	movl	%ecx, 28(%esp)          # 4-byte Spill
-	movl	12(%eax), %ecx
-	andl	%edx, %ecx
-	movl	%ecx, 16(%esp)          # 4-byte Spill
-	movl	4(%eax), %ecx
-	andl	%edx, %ecx
-	movl	%ecx, %esi
-	andl	(%eax), %edx
-	movl	64(%eax), %ecx
-	andl	%ebx, %ecx
-	movl	%ecx, 40(%esp)          # 4-byte Spill
-	movl	60(%eax), %ecx
-	andl	%ebx, %ecx
-	movl	%ecx, 36(%esp)          # 4-byte Spill
-	roll	%ebx
-	movl	56(%eax), %ecx
-	andl	%ebx, %ecx
-	movl	%ecx, 32(%esp)          # 4-byte Spill
-	movl	52(%eax), %ecx
-	andl	%ebx, %ecx
-	movl	%ecx, 24(%esp)          # 4-byte Spill
-	movl	48(%eax), %ecx
-	andl	%ebx, %ecx
-	movl	%ecx, 20(%esp)          # 4-byte Spill
-	movl	44(%eax), %ecx
-	andl	%ebx, %ecx
-	movl	%ecx, 12(%esp)          # 4-byte Spill
-	movl	40(%eax), %ecx
-	andl	%ebx, %ecx
-	movl	%ecx, 8(%esp)           # 4-byte Spill
-	movl	36(%eax), %ecx
-	andl	%ebx, %ecx
-	movl	%ecx, 4(%esp)           # 4-byte Spill
-	movl	32(%eax), %ecx
-	andl	%ebx, %ecx
-	movl	%ecx, (%esp)            # 4-byte Spill
-	movl	24(%eax), %ebp
-	andl	%ebx, %ebp
-	movl	20(%eax), %edi
-	andl	%ebx, %edi
-	movl	16(%eax), %ecx
-	andl	%ebx, %ecx
-	andl	8(%eax), %ebx
-	addl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, %eax
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	132(%esp), %esi
-	movl	%edx, (%esi)
-	adcl	44(%esp), %ebx          # 4-byte Folded Reload
-	movl	%eax, 4(%esi)
-	movl	16(%esp), %eax          # 4-byte Reload
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebx, 8(%esi)
-	adcl	52(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 12(%esi)
-	adcl	56(%esp), %edi          # 4-byte Folded Reload
-	movl	%ecx, 16(%esi)
-	adcl	60(%esp), %ebp          # 4-byte Folded Reload
-	movl	%edi, 20(%esi)
-	movl	28(%esp), %eax          # 4-byte Reload
-	adcl	64(%esp), %eax          # 4-byte Folded Reload
-	movl	%ebp, 24(%esi)
-	movl	(%esp), %ecx            # 4-byte Reload
-	adcl	76(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 28(%esi)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	80(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 32(%esi)
-	movl	8(%esp), %ecx           # 4-byte Reload
-	adcl	108(%esp), %ecx         # 4-byte Folded Reload
-	movl	%eax, 36(%esi)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	88(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 40(%esi)
-	movl	20(%esp), %ecx          # 4-byte Reload
-	adcl	92(%esp), %ecx          # 4-byte Folded Reload
-	movl	%eax, 44(%esi)
-	movl	24(%esp), %eax          # 4-byte Reload
-	adcl	96(%esp), %eax          # 4-byte Folded Reload
-	movl	%ecx, 48(%esi)
-	movl	32(%esp), %ecx          # 4-byte Reload
-	adcl	100(%esp), %ecx         # 4-byte Folded Reload
-	movl	%eax, 52(%esi)
-	movl	36(%esp), %eax          # 4-byte Reload
-	adcl	104(%esp), %eax         # 4-byte Folded Reload
-	movl	%ecx, 56(%esi)
+	movl	%ecx, 24(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	60(%edi), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	sarl	$31, %eax
+	movl	136(%esp), %esi
+	movl	60(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 92(%esp)                  # 4-byte Spill
+	movl	56(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 68(%esp)                  # 4-byte Spill
+	movl	52(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 60(%esp)                  # 4-byte Spill
+	movl	48(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 56(%esp)                  # 4-byte Spill
+	movl	44(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 52(%esp)                  # 4-byte Spill
+	movl	40(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 48(%esp)                  # 4-byte Spill
+	movl	36(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 44(%esp)                  # 4-byte Spill
+	movl	32(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 40(%esp)                  # 4-byte Spill
+	movl	28(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 36(%esp)                  # 4-byte Spill
+	movl	24(%esi), %ecx
+	andl	%eax, %ecx
+	movl	%ecx, 32(%esp)                  # 4-byte Spill
+	movl	20(%esi), %ebp
+	andl	%eax, %ebp
+	movl	16(%esi), %ebx
+	andl	%eax, %ebx
+	movl	12(%esi), %edi
+	andl	%eax, %edi
+	movl	8(%esi), %edx
+	andl	%eax, %edx
+	movl	4(%esi), %ecx
+	andl	%eax, %ecx
+	andl	(%esi), %eax
+	addl	96(%esp), %eax                  # 4-byte Folded Reload
+	adcl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	124(%esp), %esi
+	movl	%eax, (%esi)
+	adcl	64(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 4(%esi)
+	adcl	72(%esp), %edi                  # 4-byte Folded Reload
+	movl	%edx, 8(%esi)
+	adcl	76(%esp), %ebx                  # 4-byte Folded Reload
+	movl	%edi, 12(%esi)
+	adcl	80(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%ebx, 16(%esi)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	84(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ebp, 20(%esi)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	88(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 24(%esi)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	(%esp), %eax                    # 4-byte Folded Reload
+	movl	%ecx, 28(%esi)
+	movl	44(%esp), %ecx                  # 4-byte Reload
+	adcl	28(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 32(%esi)
+	movl	48(%esp), %eax                  # 4-byte Reload
+	adcl	4(%esp), %eax                   # 4-byte Folded Reload
+	movl	%ecx, 36(%esi)
+	movl	52(%esp), %ecx                  # 4-byte Reload
+	adcl	12(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 40(%esi)
+	movl	56(%esp), %eax                  # 4-byte Reload
+	adcl	16(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 44(%esi)
+	movl	60(%esp), %ecx                  # 4-byte Reload
+	adcl	20(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 48(%esi)
+	movl	68(%esp), %eax                  # 4-byte Reload
+	adcl	24(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 52(%esi)
+	movl	%eax, 56(%esi)
+	movl	92(%esp), %eax                  # 4-byte Reload
+	adcl	8(%esp), %eax                   # 4-byte Folded Reload
 	movl	%eax, 60(%esi)
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%eax, 64(%esi)
-	addl	$112, %esp
+	addl	$104, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end267:
-	.size	mcl_fp_subNF17L, .Lfunc_end267-mcl_fp_subNF17L
-
-	.globl	mcl_fpDbl_add17L
-	.align	16, 0x90
-	.type	mcl_fpDbl_add17L,@function
-mcl_fpDbl_add17L:                       # @mcl_fpDbl_add17L
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	subl	$128, %esp
-	movl	156(%esp), %ecx
-	movl	152(%esp), %edx
-	movl	12(%edx), %edi
-	movl	16(%edx), %esi
-	movl	8(%ecx), %ebx
-	movl	(%ecx), %ebp
-	addl	(%edx), %ebp
-	movl	148(%esp), %eax
-	movl	%ebp, (%eax)
-	movl	4(%ecx), %ebp
-	adcl	4(%edx), %ebp
-	adcl	8(%edx), %ebx
-	adcl	12(%ecx), %edi
-	adcl	16(%ecx), %esi
-	movl	%ebp, 4(%eax)
-	movl	76(%ecx), %ebp
-	movl	%ebx, 8(%eax)
-	movl	20(%ecx), %ebx
-	movl	%edi, 12(%eax)
-	movl	20(%edx), %edi
-	adcl	%ebx, %edi
-	movl	24(%ecx), %ebx
-	movl	%esi, 16(%eax)
-	movl	24(%edx), %esi
-	adcl	%ebx, %esi
-	movl	28(%ecx), %ebx
-	movl	%edi, 20(%eax)
-	movl	28(%edx), %edi
-	adcl	%ebx, %edi
-	movl	32(%ecx), %ebx
-	movl	%esi, 24(%eax)
-	movl	32(%edx), %esi
-	adcl	%ebx, %esi
-	movl	36(%ecx), %ebx
-	movl	%edi, 28(%eax)
-	movl	36(%edx), %edi
-	adcl	%ebx, %edi
-	movl	40(%ecx), %ebx
-	movl	%esi, 32(%eax)
-	movl	40(%edx), %esi
-	adcl	%ebx, %esi
-	movl	44(%ecx), %ebx
-	movl	%edi, 36(%eax)
-	movl	44(%edx), %edi
-	adcl	%ebx, %edi
-	movl	48(%ecx), %ebx
-	movl	%esi, 40(%eax)
-	movl	48(%edx), %esi
-	adcl	%ebx, %esi
-	movl	52(%ecx), %ebx
-	movl	%edi, 44(%eax)
-	movl	52(%edx), %edi
-	adcl	%ebx, %edi
-	movl	56(%ecx), %ebx
-	movl	%esi, 48(%eax)
-	movl	56(%edx), %esi
-	adcl	%ebx, %esi
-	movl	60(%ecx), %ebx
-	movl	%edi, 52(%eax)
-	movl	60(%edx), %edi
-	adcl	%ebx, %edi
-	movl	64(%ecx), %ebx
-	movl	%esi, 56(%eax)
-	movl	64(%edx), %esi
-	adcl	%ebx, %esi
-	movl	68(%ecx), %ebx
-	movl	%edi, 60(%eax)
-	movl	68(%edx), %edi
-	adcl	%ebx, %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	72(%ecx), %edi
-	movl	%esi, 64(%eax)
-	movl	72(%edx), %eax
-	adcl	%edi, %eax
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	76(%edx), %eax
-	adcl	%ebp, %eax
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	80(%ecx), %esi
-	movl	80(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	84(%ecx), %esi
-	movl	84(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	88(%ecx), %esi
-	movl	88(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	92(%ecx), %esi
-	movl	92(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	96(%ecx), %esi
-	movl	96(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	100(%ecx), %esi
-	movl	100(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	104(%ecx), %esi
-	movl	104(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 76(%esp)          # 4-byte Spill
-	movl	108(%ecx), %esi
-	movl	108(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 80(%esp)          # 4-byte Spill
-	movl	112(%ecx), %esi
-	movl	112(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 84(%esp)          # 4-byte Spill
-	movl	116(%ecx), %esi
+.Lfunc_end87:
+	.size	mcl_fp_subNF16L, .Lfunc_end87-mcl_fp_subNF16L
+                                        # -- End function
+	.globl	mcl_fpDbl_add16L                # -- Begin function mcl_fpDbl_add16L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_add16L,@function
+mcl_fpDbl_add16L:                       # @mcl_fpDbl_add16L
+# %bb.0:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	subl	$128, %esp
+	movl	152(%esp), %edx
+	movl	(%edx), %eax
+	movl	4(%edx), %esi
+	movl	156(%esp), %ecx
+	addl	(%ecx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	adcl	4(%ecx), %esi
+	movl	%esi, 96(%esp)                  # 4-byte Spill
+	movl	124(%edx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	120(%edx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
 	movl	116(%edx), %eax
-	adcl	%esi, %eax
-	movl	%eax, 88(%esp)          # 4-byte Spill
-	movl	120(%ecx), %edi
-	movl	120(%edx), %esi
-	adcl	%edi, %esi
-	movl	%esi, 64(%esp)          # 4-byte Spill
-	movl	124(%ecx), %ebx
-	movl	124(%edx), %edi
-	adcl	%ebx, %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	128(%ecx), %ebx
-	movl	128(%edx), %ebp
-	adcl	%ebx, %ebp
-	movl	%ebp, 72(%esp)          # 4-byte Spill
-	movl	132(%ecx), %ecx
-	movl	132(%edx), %edx
-	adcl	%ecx, %edx
-	sbbl	%ecx, %ecx
-	andl	$1, %ecx
-	movl	160(%esp), %ebx
-	movl	92(%esp), %eax          # 4-byte Reload
-	subl	(%ebx), %eax
-	movl	%eax, 48(%esp)          # 4-byte Spill
-	movl	96(%esp), %eax          # 4-byte Reload
-	sbbl	4(%ebx), %eax
-	movl	%eax, 44(%esp)          # 4-byte Spill
-	movl	100(%esp), %eax         # 4-byte Reload
-	sbbl	8(%ebx), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-	movl	104(%esp), %eax         # 4-byte Reload
-	sbbl	12(%ebx), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-	movl	108(%esp), %eax         # 4-byte Reload
-	sbbl	16(%ebx), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-	movl	112(%esp), %eax         # 4-byte Reload
-	sbbl	20(%ebx), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-	movl	116(%esp), %eax         # 4-byte Reload
-	sbbl	24(%ebx), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-	movl	120(%esp), %eax         # 4-byte Reload
-	sbbl	28(%ebx), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-	movl	124(%esp), %eax         # 4-byte Reload
-	sbbl	32(%ebx), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-	movl	76(%esp), %eax          # 4-byte Reload
-	sbbl	36(%ebx), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-	movl	80(%esp), %eax          # 4-byte Reload
-	sbbl	40(%ebx), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-	movl	84(%esp), %eax          # 4-byte Reload
-	sbbl	44(%ebx), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-	movl	88(%esp), %eax          # 4-byte Reload
-	sbbl	48(%ebx), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-	sbbl	52(%ebx), %esi
-	movl	%esi, 52(%esp)          # 4-byte Spill
-	sbbl	56(%ebx), %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	sbbl	60(%ebx), %ebp
-	movl	%ebp, 60(%esp)          # 4-byte Spill
-	movl	%edx, %ebp
-	sbbl	64(%ebx), %ebp
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	112(%edx), %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	108(%edx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	104(%edx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	100(%edx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	96(%edx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	92(%edx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	88(%edx), %eax
+	movl	%eax, 60(%esp)                  # 4-byte Spill
+	movl	84(%edx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	80(%edx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	76(%edx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	72(%edx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	68(%edx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	64(%edx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	60(%edx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	56(%edx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	52(%edx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	48(%edx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	44(%edx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	40(%edx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	36(%edx), %eax
+	movl	%eax, 116(%esp)                 # 4-byte Spill
+	movl	32(%edx), %eax
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	movl	28(%edx), %ebp
+	movl	24(%edx), %ebx
+	movl	20(%edx), %edi
+	movl	16(%edx), %esi
+	movl	12(%edx), %eax
+	movl	8(%edx), %edx
+	adcl	8(%ecx), %edx
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	adcl	12(%ecx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	adcl	16(%ecx), %esi
+	movl	%esi, 84(%esp)                  # 4-byte Spill
+	adcl	20(%ecx), %edi
+	movl	%edi, 80(%esp)                  # 4-byte Spill
+	adcl	24(%ecx), %ebx
+	movl	%ebx, 124(%esp)                 # 4-byte Spill
+	adcl	28(%ecx), %ebp
+	movl	%ebp, 120(%esp)                 # 4-byte Spill
+	movl	112(%esp), %esi                 # 4-byte Reload
+	adcl	32(%ecx), %esi
+	movl	116(%esp), %eax                 # 4-byte Reload
+	adcl	36(%ecx), %eax
+	movl	8(%esp), %edx                   # 4-byte Reload
+	adcl	40(%ecx), %edx
+	movl	%edx, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %edx                   # 4-byte Reload
+	adcl	44(%ecx), %edx
+	movl	%edx, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %edx                    # 4-byte Reload
+	adcl	48(%ecx), %edx
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	20(%esp), %edx                  # 4-byte Reload
+	adcl	52(%ecx), %edx
+	movl	%edx, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %edx                  # 4-byte Reload
+	adcl	56(%ecx), %edx
+	movl	%edx, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %edx                  # 4-byte Reload
+	adcl	60(%ecx), %edx
+	movl	%edx, 12(%esp)                  # 4-byte Spill
+	movl	108(%esp), %ebx                 # 4-byte Reload
+	adcl	64(%ecx), %ebx
+	movl	104(%esp), %edi                 # 4-byte Reload
+	adcl	68(%ecx), %edi
+	movl	76(%esp), %edx                  # 4-byte Reload
+	adcl	72(%ecx), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	72(%esp), %edx                  # 4-byte Reload
+	adcl	76(%ecx), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edx                  # 4-byte Reload
+	adcl	80(%ecx), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	64(%esp), %edx                  # 4-byte Reload
+	adcl	84(%ecx), %edx
+	movl	%edx, 64(%esp)                  # 4-byte Spill
+	movl	60(%esp), %edx                  # 4-byte Reload
+	adcl	88(%ecx), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	56(%esp), %edx                  # 4-byte Reload
+	adcl	92(%ecx), %edx
+	movl	%edx, 56(%esp)                  # 4-byte Spill
+	movl	52(%esp), %edx                  # 4-byte Reload
+	adcl	96(%ecx), %edx
+	movl	%edx, 52(%esp)                  # 4-byte Spill
+	movl	48(%esp), %edx                  # 4-byte Reload
+	adcl	100(%ecx), %edx
+	movl	%edx, 48(%esp)                  # 4-byte Spill
+	movl	44(%esp), %ebp                  # 4-byte Reload
+	adcl	104(%ecx), %ebp
+	movl	%ebp, 44(%esp)                  # 4-byte Spill
+	movl	40(%esp), %ebp                  # 4-byte Reload
+	adcl	108(%ecx), %ebp
+	movl	%ebp, 40(%esp)                  # 4-byte Spill
+	movl	36(%esp), %ebp                  # 4-byte Reload
+	adcl	112(%ecx), %ebp
+	movl	%ebp, 36(%esp)                  # 4-byte Spill
+	movl	32(%esp), %ebp                  # 4-byte Reload
+	adcl	116(%ecx), %ebp
+	movl	%ebp, 32(%esp)                  # 4-byte Spill
+	movl	28(%esp), %ebp                  # 4-byte Reload
+	adcl	120(%ecx), %ebp
+	movl	%ebp, 28(%esp)                  # 4-byte Spill
+	movl	24(%esp), %ebp                  # 4-byte Reload
+	adcl	124(%ecx), %ebp
+	movl	%ebp, 24(%esp)                  # 4-byte Spill
+	movl	148(%esp), %ebp
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 60(%ebp)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 56(%ebp)
+	movl	20(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 52(%ebp)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 48(%ebp)
+	movl	4(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 44(%ebp)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	movl	%ecx, 40(%ebp)
+	movl	%eax, 36(%ebp)
+	movl	%esi, 32(%ebp)
+	movl	120(%esp), %eax                 # 4-byte Reload
+	movl	%eax, 28(%ebp)
+	movl	124(%esp), %eax                 # 4-byte Reload
+	movl	%eax, 24(%ebp)
+	movl	80(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 20(%ebp)
+	movl	84(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 16(%ebp)
+	movl	88(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebp)
+	movl	92(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebp)
+	movl	96(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%ebp)
+	movl	100(%esp), %ecx                 # 4-byte Reload
+	movl	%ecx, (%ebp)
+	setb	80(%esp)                        # 1-byte Folded Spill
+	movl	160(%esp), %ecx
+	movl	%ebx, %eax
+	movl	%ebx, 108(%esp)                 # 4-byte Spill
+	subl	(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	%edi, %esi
+	movl	%edi, 104(%esp)                 # 4-byte Spill
+	sbbl	4(%ecx), %esi
+	movl	%esi, 16(%esp)                  # 4-byte Spill
+	movl	76(%esp), %edi                  # 4-byte Reload
+	sbbl	8(%ecx), %edi
+	movl	%edi, 12(%esp)                  # 4-byte Spill
+	movl	72(%esp), %eax                  # 4-byte Reload
+	sbbl	12(%ecx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	68(%esp), %eax                  # 4-byte Reload
+	sbbl	16(%ecx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	sbbl	20(%ecx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	60(%esp), %eax                  # 4-byte Reload
+	sbbl	24(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	sbbl	28(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	sbbl	32(%ecx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ecx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%ecx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	40(%esp), %edx                  # 4-byte Reload
+	sbbl	44(%ecx), %edx
+	movl	36(%esp), %esi                  # 4-byte Reload
+	sbbl	48(%ecx), %esi
+	movl	32(%esp), %edi                  # 4-byte Reload
+	sbbl	52(%ecx), %edi
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	sbbl	56(%ecx), %ebx
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	60(%ecx), %eax
+	movzbl	80(%esp), %ecx                  # 1-byte Folded Reload
 	sbbl	$0, %ecx
-	andl	$1, %ecx
-	jne	.LBB268_2
-# BB#1:
-	movl	%ebp, %edx
-.LBB268_2:
-	testb	%cl, %cl
-	movl	92(%esp), %eax          # 4-byte Reload
-	movl	88(%esp), %esi          # 4-byte Reload
-	movl	84(%esp), %edi          # 4-byte Reload
-	movl	80(%esp), %ebx          # 4-byte Reload
-	movl	76(%esp), %ebp          # 4-byte Reload
-	jne	.LBB268_4
-# BB#3:
-	movl	(%esp), %esi            # 4-byte Reload
-	movl	4(%esp), %edi           # 4-byte Reload
-	movl	8(%esp), %ebx           # 4-byte Reload
-	movl	12(%esp), %ebp          # 4-byte Reload
-	movl	16(%esp), %eax          # 4-byte Reload
-	movl	%eax, 124(%esp)         # 4-byte Spill
-	movl	20(%esp), %eax          # 4-byte Reload
-	movl	%eax, 120(%esp)         # 4-byte Spill
-	movl	24(%esp), %eax          # 4-byte Reload
-	movl	%eax, 116(%esp)         # 4-byte Spill
-	movl	28(%esp), %eax          # 4-byte Reload
-	movl	%eax, 112(%esp)         # 4-byte Spill
-	movl	32(%esp), %eax          # 4-byte Reload
-	movl	%eax, 108(%esp)         # 4-byte Spill
-	movl	36(%esp), %eax          # 4-byte Reload
-	movl	%eax, 104(%esp)         # 4-byte Spill
-	movl	40(%esp), %eax          # 4-byte Reload
-	movl	%eax, 100(%esp)         # 4-byte Spill
-	movl	44(%esp), %eax          # 4-byte Reload
-	movl	%eax, 96(%esp)          # 4-byte Spill
-	movl	48(%esp), %eax          # 4-byte Reload
-.LBB268_4:
-	movl	148(%esp), %ecx
-	movl	%eax, 68(%ecx)
-	movl	%ecx, %eax
-	movl	96(%esp), %ecx          # 4-byte Reload
-	movl	%ecx, 72(%eax)
-	movl	100(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 76(%eax)
-	movl	104(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 80(%eax)
-	movl	108(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 84(%eax)
-	movl	112(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 88(%eax)
-	movl	116(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 92(%eax)
-	movl	120(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 96(%eax)
-	movl	124(%esp), %ecx         # 4-byte Reload
-	movl	%ecx, 100(%eax)
-	movl	%ebp, 104(%eax)
-	movl	%ebx, 108(%eax)
-	movl	%edi, 112(%eax)
-	movl	%esi, 116(%eax)
-	movl	72(%esp), %ecx          # 4-byte Reload
-	movl	64(%esp), %esi          # 4-byte Reload
-	jne	.LBB268_6
-# BB#5:
-	movl	52(%esp), %esi          # 4-byte Reload
-.LBB268_6:
-	movl	%esi, 120(%eax)
-	movl	68(%esp), %esi          # 4-byte Reload
-	jne	.LBB268_8
-# BB#7:
-	movl	56(%esp), %esi          # 4-byte Reload
-.LBB268_8:
-	movl	%esi, 124(%eax)
-	jne	.LBB268_10
-# BB#9:
-	movl	60(%esp), %ecx          # 4-byte Reload
-.LBB268_10:
-	movl	%ecx, 128(%eax)
-	movl	%edx, 132(%eax)
+	testb	$1, %cl
+	jne	.LBB88_1
+# %bb.2:
+	movl	%eax, 124(%ebp)
+	jne	.LBB88_3
+.LBB88_4:
+	movl	%ebx, 120(%ebp)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB88_5
+.LBB88_6:
+	movl	%edi, 116(%ebp)
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	jne	.LBB88_7
+.LBB88_8:
+	movl	%esi, 112(%ebp)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	jne	.LBB88_9
+.LBB88_10:
+	movl	%edx, 108(%ebp)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	84(%esp), %edx                  # 4-byte Reload
+	jne	.LBB88_11
+.LBB88_12:
+	movl	%edx, 104(%ebp)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	jne	.LBB88_13
+.LBB88_14:
+	movl	%edx, 100(%ebp)
+	jne	.LBB88_15
+.LBB88_16:
+	movl	%ecx, %edx
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 96(%ebp)
+	jne	.LBB88_17
+.LBB88_18:
+	movl	%eax, %ecx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 92(%ebp)
+	jne	.LBB88_19
+.LBB88_20:
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 88(%ebp)
+	jne	.LBB88_21
+.LBB88_22:
+	movl	%ebx, 84(%ebp)
+	movl	%ecx, %eax
+	jne	.LBB88_23
+.LBB88_24:
+	movl	%edi, 80(%ebp)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	jne	.LBB88_25
+.LBB88_26:
+	movl	%esi, 76(%ebp)
+	jne	.LBB88_27
+.LBB88_28:
+	movl	%ecx, 72(%ebp)
+	jne	.LBB88_29
+.LBB88_30:
+	movl	%edx, 68(%ebp)
+	je	.LBB88_32
+.LBB88_31:
+	movl	108(%esp), %eax                 # 4-byte Reload
+.LBB88_32:
+	movl	%eax, 64(%ebp)
 	addl	$128, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end268:
-	.size	mcl_fpDbl_add17L, .Lfunc_end268-mcl_fpDbl_add17L
-
-	.globl	mcl_fpDbl_sub17L
-	.align	16, 0x90
-	.type	mcl_fpDbl_sub17L,@function
-mcl_fpDbl_sub17L:                       # @mcl_fpDbl_sub17L
-# BB#0:
+.LBB88_1:
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 124(%ebp)
+	je	.LBB88_4
+.LBB88_3:
+	movl	28(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 120(%ebp)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	je	.LBB88_6
+.LBB88_5:
+	movl	32(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 116(%ebp)
+	movl	92(%esp), %ebx                  # 4-byte Reload
+	je	.LBB88_8
+.LBB88_7:
+	movl	36(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 112(%ebp)
+	movl	96(%esp), %edi                  # 4-byte Reload
+	je	.LBB88_10
+.LBB88_9:
+	movl	40(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 108(%ebp)
+	movl	100(%esp), %esi                 # 4-byte Reload
+	movl	84(%esp), %edx                  # 4-byte Reload
+	je	.LBB88_12
+.LBB88_11:
+	movl	44(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 104(%ebp)
+	movl	88(%esp), %edx                  # 4-byte Reload
+	je	.LBB88_14
+.LBB88_13:
+	movl	48(%esp), %edx                  # 4-byte Reload
+	movl	%edx, 100(%ebp)
+	je	.LBB88_16
+.LBB88_15:
+	movl	52(%esp), %edx                  # 4-byte Reload
+	movl	%edx, (%esp)                    # 4-byte Spill
+	movl	%ecx, %edx
+	movl	(%esp), %ecx                    # 4-byte Reload
+	movl	%ecx, 96(%ebp)
+	je	.LBB88_18
+.LBB88_17:
+	movl	56(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 4(%esp)                   # 4-byte Spill
+	movl	%eax, %ecx
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 92(%ebp)
+	je	.LBB88_20
+.LBB88_19:
+	movl	60(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 88(%ebp)
+	je	.LBB88_22
+.LBB88_21:
+	movl	64(%esp), %ebx                  # 4-byte Reload
+	movl	%ebx, 84(%ebp)
+	movl	%ecx, %eax
+	je	.LBB88_24
+.LBB88_23:
+	movl	68(%esp), %edi                  # 4-byte Reload
+	movl	%edi, 80(%ebp)
+	movl	12(%esp), %ecx                  # 4-byte Reload
+	je	.LBB88_26
+.LBB88_25:
+	movl	72(%esp), %esi                  # 4-byte Reload
+	movl	%esi, 76(%ebp)
+	je	.LBB88_28
+.LBB88_27:
+	movl	76(%esp), %ecx                  # 4-byte Reload
+	movl	%ecx, 72(%ebp)
+	je	.LBB88_30
+.LBB88_29:
+	movl	104(%esp), %edx                 # 4-byte Reload
+	movl	%edx, 68(%ebp)
+	jne	.LBB88_31
+	jmp	.LBB88_32
+.Lfunc_end88:
+	.size	mcl_fpDbl_add16L, .Lfunc_end88-mcl_fpDbl_add16L
+                                        # -- End function
+	.globl	mcl_fpDbl_sub16L                # -- Begin function mcl_fpDbl_sub16L
+	.p2align	4, 0x90
+	.type	mcl_fpDbl_sub16L,@function
+mcl_fpDbl_sub16L:                       # @mcl_fpDbl_sub16L
+# %bb.0:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
-	subl	$116, %esp
-	movl	140(%esp), %edx
+	subl	$128, %esp
+	movl	152(%esp), %edx
 	movl	(%edx), %eax
 	movl	4(%edx), %edi
-	movl	144(%esp), %esi
-	subl	(%esi), %eax
-	sbbl	4(%esi), %edi
-	movl	8(%edx), %ebx
-	sbbl	8(%esi), %ebx
-	movl	136(%esp), %ecx
-	movl	%eax, (%ecx)
-	movl	12(%edx), %eax
-	sbbl	12(%esi), %eax
-	movl	%edi, 4(%ecx)
-	movl	16(%edx), %edi
-	sbbl	16(%esi), %edi
-	movl	%ebx, 8(%ecx)
-	movl	20(%esi), %ebx
-	movl	%eax, 12(%ecx)
-	movl	20(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	24(%esi), %ebx
-	movl	%edi, 16(%ecx)
-	movl	24(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	28(%esi), %ebx
-	movl	%eax, 20(%ecx)
-	movl	28(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	32(%esi), %ebx
-	movl	%edi, 24(%ecx)
-	movl	32(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	36(%esi), %ebx
-	movl	%eax, 28(%ecx)
-	movl	36(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	40(%esi), %ebx
-	movl	%edi, 32(%ecx)
-	movl	40(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	44(%esi), %ebx
-	movl	%eax, 36(%ecx)
-	movl	44(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	48(%esi), %ebx
-	movl	%edi, 40(%ecx)
-	movl	48(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	52(%esi), %ebx
-	movl	%eax, 44(%ecx)
-	movl	52(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	56(%esi), %ebx
-	movl	%edi, 48(%ecx)
-	movl	56(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	60(%esi), %ebx
-	movl	%eax, 52(%ecx)
-	movl	60(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	64(%esi), %ebx
-	movl	%edi, 56(%ecx)
-	movl	64(%edx), %edi
-	sbbl	%ebx, %edi
-	movl	68(%esi), %ebx
-	movl	%eax, 60(%ecx)
+	xorl	%esi, %esi
+	movl	156(%esp), %ecx
+	subl	(%ecx), %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	sbbl	4(%ecx), %edi
+	movl	%edi, 36(%esp)                  # 4-byte Spill
+	movl	124(%edx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	120(%edx), %eax
+	movl	%eax, 104(%esp)                 # 4-byte Spill
+	movl	116(%edx), %eax
+	movl	%eax, 100(%esp)                 # 4-byte Spill
+	movl	112(%edx), %eax
+	movl	%eax, 96(%esp)                  # 4-byte Spill
+	movl	108(%edx), %eax
+	movl	%eax, 92(%esp)                  # 4-byte Spill
+	movl	104(%edx), %eax
+	movl	%eax, 88(%esp)                  # 4-byte Spill
+	movl	100(%edx), %eax
+	movl	%eax, 84(%esp)                  # 4-byte Spill
+	movl	96(%edx), %eax
+	movl	%eax, 80(%esp)                  # 4-byte Spill
+	movl	92(%edx), %eax
+	movl	%eax, 76(%esp)                  # 4-byte Spill
+	movl	88(%edx), %eax
+	movl	%eax, 72(%esp)                  # 4-byte Spill
+	movl	84(%edx), %eax
+	movl	%eax, 68(%esp)                  # 4-byte Spill
+	movl	80(%edx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	76(%edx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	72(%edx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
 	movl	68(%edx), %eax
-	sbbl	%ebx, %eax
-	movl	%eax, 52(%esp)          # 4-byte Spill
-	movl	72(%esi), %eax
-	movl	%edi, 64(%ecx)
-	movl	72(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 44(%esp)          # 4-byte Spill
-	movl	76(%esi), %eax
-	movl	76(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 48(%esp)          # 4-byte Spill
-	movl	80(%esi), %eax
-	movl	80(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 56(%esp)          # 4-byte Spill
-	movl	84(%esi), %eax
-	movl	84(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 60(%esp)          # 4-byte Spill
-	movl	88(%esi), %eax
-	movl	88(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 64(%esp)          # 4-byte Spill
-	movl	92(%esi), %eax
-	movl	92(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 68(%esp)          # 4-byte Spill
-	movl	96(%esi), %eax
-	movl	96(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 72(%esp)          # 4-byte Spill
-	movl	100(%esi), %eax
-	movl	100(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 80(%esp)          # 4-byte Spill
-	movl	104(%esi), %eax
-	movl	104(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 84(%esp)          # 4-byte Spill
-	movl	108(%esi), %eax
-	movl	108(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 88(%esp)          # 4-byte Spill
-	movl	112(%esi), %eax
-	movl	112(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 92(%esp)          # 4-byte Spill
-	movl	116(%esi), %eax
-	movl	116(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 96(%esp)          # 4-byte Spill
-	movl	120(%esi), %eax
-	movl	120(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 100(%esp)         # 4-byte Spill
-	movl	124(%esi), %eax
-	movl	124(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 104(%esp)         # 4-byte Spill
-	movl	128(%esi), %eax
-	movl	128(%edx), %edi
-	sbbl	%eax, %edi
-	movl	%edi, 108(%esp)         # 4-byte Spill
-	movl	132(%esi), %eax
-	movl	132(%edx), %edx
-	sbbl	%eax, %edx
-	movl	%edx, 112(%esp)         # 4-byte Spill
-	movl	$0, %eax
-	sbbl	$0, %eax
-	andl	$1, %eax
-	movl	148(%esp), %ebp
-	jne	.LBB269_1
-# BB#2:
-	movl	$0, 76(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_3
-.LBB269_1:
-	movl	64(%ebp), %edx
-	movl	%edx, 76(%esp)          # 4-byte Spill
-.LBB269_3:
-	testb	%al, %al
-	jne	.LBB269_4
-# BB#5:
-	movl	$0, 28(%esp)            # 4-byte Folded Spill
-	movl	$0, %esi
-	jmp	.LBB269_6
-.LBB269_4:
-	movl	(%ebp), %esi
-	movl	4(%ebp), %eax
-	movl	%eax, 28(%esp)          # 4-byte Spill
-.LBB269_6:
-	jne	.LBB269_7
-# BB#8:
-	movl	$0, 40(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_9
-.LBB269_7:
-	movl	60(%ebp), %eax
-	movl	%eax, 40(%esp)          # 4-byte Spill
-.LBB269_9:
-	jne	.LBB269_10
-# BB#11:
-	movl	$0, 36(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_12
-.LBB269_10:
-	movl	56(%ebp), %eax
-	movl	%eax, 36(%esp)          # 4-byte Spill
-.LBB269_12:
-	jne	.LBB269_13
-# BB#14:
-	movl	$0, 32(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_15
-.LBB269_13:
-	movl	52(%ebp), %eax
-	movl	%eax, 32(%esp)          # 4-byte Spill
-.LBB269_15:
-	jne	.LBB269_16
-# BB#17:
-	movl	$0, 24(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_18
-.LBB269_16:
-	movl	48(%ebp), %eax
-	movl	%eax, 24(%esp)          # 4-byte Spill
-.LBB269_18:
-	jne	.LBB269_19
-# BB#20:
-	movl	$0, 20(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_21
-.LBB269_19:
-	movl	44(%ebp), %eax
-	movl	%eax, 20(%esp)          # 4-byte Spill
-.LBB269_21:
-	jne	.LBB269_22
-# BB#23:
-	movl	$0, 16(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_24
-.LBB269_22:
-	movl	40(%ebp), %eax
-	movl	%eax, 16(%esp)          # 4-byte Spill
-.LBB269_24:
-	jne	.LBB269_25
-# BB#26:
-	movl	$0, 12(%esp)            # 4-byte Folded Spill
-	jmp	.LBB269_27
-.LBB269_25:
-	movl	36(%ebp), %eax
-	movl	%eax, 12(%esp)          # 4-byte Spill
-.LBB269_27:
-	jne	.LBB269_28
-# BB#29:
-	movl	$0, 8(%esp)             # 4-byte Folded Spill
-	jmp	.LBB269_30
-.LBB269_28:
-	movl	32(%ebp), %eax
-	movl	%eax, 8(%esp)           # 4-byte Spill
-.LBB269_30:
-	jne	.LBB269_31
-# BB#32:
-	movl	$0, 4(%esp)             # 4-byte Folded Spill
-	jmp	.LBB269_33
-.LBB269_31:
-	movl	28(%ebp), %eax
-	movl	%eax, 4(%esp)           # 4-byte Spill
-.LBB269_33:
-	jne	.LBB269_34
-# BB#35:
-	movl	$0, (%esp)              # 4-byte Folded Spill
-	jmp	.LBB269_36
-.LBB269_34:
-	movl	24(%ebp), %eax
-	movl	%eax, (%esp)            # 4-byte Spill
-.LBB269_36:
-	jne	.LBB269_37
-# BB#38:
-	movl	$0, %ebx
-	jmp	.LBB269_39
-.LBB269_37:
-	movl	20(%ebp), %ebx
-.LBB269_39:
-	jne	.LBB269_40
-# BB#41:
-	movl	$0, %edi
-	jmp	.LBB269_42
-.LBB269_40:
-	movl	16(%ebp), %edi
-.LBB269_42:
-	jne	.LBB269_43
-# BB#44:
-	movl	%ebp, %eax
-	movl	$0, %ebp
-	jmp	.LBB269_45
-.LBB269_43:
-	movl	%ebp, %eax
-	movl	12(%eax), %ebp
-.LBB269_45:
-	jne	.LBB269_46
-# BB#47:
-	xorl	%eax, %eax
-	jmp	.LBB269_48
-.LBB269_46:
-	movl	8(%eax), %eax
-.LBB269_48:
-	addl	52(%esp), %esi          # 4-byte Folded Reload
-	movl	28(%esp), %edx          # 4-byte Reload
-	adcl	44(%esp), %edx          # 4-byte Folded Reload
-	movl	%esi, 68(%ecx)
-	adcl	48(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 72(%ecx)
-	adcl	56(%esp), %ebp          # 4-byte Folded Reload
-	movl	%eax, 76(%ecx)
-	adcl	60(%esp), %edi          # 4-byte Folded Reload
-	movl	%ebp, 80(%ecx)
-	adcl	64(%esp), %ebx          # 4-byte Folded Reload
-	movl	%edi, 84(%ecx)
-	movl	(%esp), %edx            # 4-byte Reload
-	adcl	68(%esp), %edx          # 4-byte Folded Reload
-	movl	%ebx, 88(%ecx)
-	movl	4(%esp), %eax           # 4-byte Reload
-	adcl	72(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 92(%ecx)
-	movl	8(%esp), %edx           # 4-byte Reload
-	adcl	80(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 96(%ecx)
-	movl	12(%esp), %eax          # 4-byte Reload
-	adcl	84(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 100(%ecx)
-	movl	16(%esp), %edx          # 4-byte Reload
-	adcl	88(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 104(%ecx)
-	movl	20(%esp), %eax          # 4-byte Reload
-	adcl	92(%esp), %eax          # 4-byte Folded Reload
-	movl	%edx, 108(%ecx)
-	movl	24(%esp), %edx          # 4-byte Reload
-	adcl	96(%esp), %edx          # 4-byte Folded Reload
-	movl	%eax, 112(%ecx)
-	movl	32(%esp), %eax          # 4-byte Reload
-	adcl	100(%esp), %eax         # 4-byte Folded Reload
-	movl	%edx, 116(%ecx)
-	movl	36(%esp), %edx          # 4-byte Reload
-	adcl	104(%esp), %edx         # 4-byte Folded Reload
-	movl	%eax, 120(%ecx)
-	movl	40(%esp), %eax          # 4-byte Reload
-	adcl	108(%esp), %eax         # 4-byte Folded Reload
-	movl	%edx, 124(%ecx)
-	movl	%eax, 128(%ecx)
-	movl	76(%esp), %eax          # 4-byte Reload
-	adcl	112(%esp), %eax         # 4-byte Folded Reload
-	movl	%eax, 132(%ecx)
-	addl	$116, %esp
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	64(%edx), %ebp
+	movl	%ebp, 60(%esp)                  # 4-byte Spill
+	movl	60(%edx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	56(%edx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	52(%edx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	48(%edx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	44(%edx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	40(%edx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	36(%edx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	32(%edx), %eax
+	movl	%eax, 112(%esp)                 # 4-byte Spill
+	movl	28(%edx), %eax
+	movl	%eax, 108(%esp)                 # 4-byte Spill
+	movl	24(%edx), %ebp
+	movl	20(%edx), %ebx
+	movl	16(%edx), %edi
+	movl	12(%edx), %eax
+	movl	8(%edx), %edx
+	sbbl	8(%ecx), %edx
+	movl	%edx, 32(%esp)                  # 4-byte Spill
+	sbbl	12(%ecx), %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	sbbl	16(%ecx), %edi
+	movl	%edi, 124(%esp)                 # 4-byte Spill
+	sbbl	20(%ecx), %ebx
+	movl	%ebx, 120(%esp)                 # 4-byte Spill
+	sbbl	24(%ecx), %ebp
+	movl	%ebp, 116(%esp)                 # 4-byte Spill
+	movl	108(%esp), %ebp                 # 4-byte Reload
+	sbbl	28(%ecx), %ebp
+	movl	112(%esp), %edi                 # 4-byte Reload
+	sbbl	32(%ecx), %edi
+	movl	24(%esp), %eax                  # 4-byte Reload
+	sbbl	36(%ecx), %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	20(%esp), %eax                  # 4-byte Reload
+	sbbl	40(%ecx), %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	16(%esp), %eax                  # 4-byte Reload
+	sbbl	44(%ecx), %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	12(%esp), %eax                  # 4-byte Reload
+	sbbl	48(%ecx), %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	8(%esp), %eax                   # 4-byte Reload
+	sbbl	52(%ecx), %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	4(%esp), %eax                   # 4-byte Reload
+	sbbl	56(%ecx), %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	(%esp), %eax                    # 4-byte Reload
+	sbbl	60(%ecx), %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	60(%esp), %edx                  # 4-byte Reload
+	sbbl	64(%ecx), %edx
+	movl	%edx, 60(%esp)                  # 4-byte Spill
+	movl	44(%esp), %eax                  # 4-byte Reload
+	sbbl	68(%ecx), %eax
+	movl	%eax, 44(%esp)                  # 4-byte Spill
+	movl	48(%esp), %eax                  # 4-byte Reload
+	sbbl	72(%ecx), %eax
+	movl	%eax, 48(%esp)                  # 4-byte Spill
+	movl	52(%esp), %eax                  # 4-byte Reload
+	sbbl	76(%ecx), %eax
+	movl	%eax, 52(%esp)                  # 4-byte Spill
+	movl	56(%esp), %eax                  # 4-byte Reload
+	sbbl	80(%ecx), %eax
+	movl	%eax, 56(%esp)                  # 4-byte Spill
+	movl	68(%esp), %edx                  # 4-byte Reload
+	sbbl	84(%ecx), %edx
+	movl	%edx, 68(%esp)                  # 4-byte Spill
+	movl	72(%esp), %edx                  # 4-byte Reload
+	sbbl	88(%ecx), %edx
+	movl	%edx, 72(%esp)                  # 4-byte Spill
+	movl	76(%esp), %edx                  # 4-byte Reload
+	sbbl	92(%ecx), %edx
+	movl	%edx, 76(%esp)                  # 4-byte Spill
+	movl	80(%esp), %edx                  # 4-byte Reload
+	sbbl	96(%ecx), %edx
+	movl	%edx, 80(%esp)                  # 4-byte Spill
+	movl	84(%esp), %edx                  # 4-byte Reload
+	sbbl	100(%ecx), %edx
+	movl	%edx, 84(%esp)                  # 4-byte Spill
+	movl	88(%esp), %edx                  # 4-byte Reload
+	sbbl	104(%ecx), %edx
+	movl	%edx, 88(%esp)                  # 4-byte Spill
+	movl	92(%esp), %edx                  # 4-byte Reload
+	sbbl	108(%ecx), %edx
+	movl	%edx, 92(%esp)                  # 4-byte Spill
+	movl	96(%esp), %edx                  # 4-byte Reload
+	sbbl	112(%ecx), %edx
+	movl	%edx, 96(%esp)                  # 4-byte Spill
+	movl	100(%esp), %edx                 # 4-byte Reload
+	sbbl	116(%ecx), %edx
+	movl	%edx, 100(%esp)                 # 4-byte Spill
+	movl	104(%esp), %edx                 # 4-byte Reload
+	sbbl	120(%ecx), %edx
+	movl	%edx, 104(%esp)                 # 4-byte Spill
+	movl	64(%esp), %eax                  # 4-byte Reload
+	sbbl	124(%ecx), %eax
+	movl	%eax, 64(%esp)                  # 4-byte Spill
+	movl	148(%esp), %ebx
+	movl	(%esp), %eax                    # 4-byte Reload
+	movl	%eax, 60(%ebx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 56(%ebx)
+	movl	8(%esp), %eax                   # 4-byte Reload
+	movl	%eax, 52(%ebx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 48(%ebx)
+	movl	16(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 44(%ebx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 40(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 36(%ebx)
+	movl	%edi, 32(%ebx)
+	movl	%ebp, 28(%ebx)
+	movl	116(%esp), %eax                 # 4-byte Reload
+	movl	%eax, 24(%ebx)
+	movl	120(%esp), %eax                 # 4-byte Reload
+	movl	%eax, 20(%ebx)
+	movl	124(%esp), %eax                 # 4-byte Reload
+	movl	%eax, 16(%ebx)
+	movl	28(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 12(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 8(%ebx)
+	movl	36(%esp), %eax                  # 4-byte Reload
+	movl	%eax, 4(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	movl	%eax, (%ebx)
+	sbbl	%esi, %esi
+	andl	$1, %esi
+	negl	%esi
+	movl	160(%esp), %edi
+	movl	60(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 24(%esp)                  # 4-byte Spill
+	movl	56(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 20(%esp)                  # 4-byte Spill
+	movl	52(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 16(%esp)                  # 4-byte Spill
+	movl	48(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 12(%esp)                  # 4-byte Spill
+	movl	44(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 8(%esp)                   # 4-byte Spill
+	movl	40(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 4(%esp)                   # 4-byte Spill
+	movl	36(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, (%esp)                    # 4-byte Spill
+	movl	32(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 40(%esp)                  # 4-byte Spill
+	movl	28(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 36(%esp)                  # 4-byte Spill
+	movl	24(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 32(%esp)                  # 4-byte Spill
+	movl	20(%edi), %eax
+	andl	%esi, %eax
+	movl	%eax, 28(%esp)                  # 4-byte Spill
+	movl	16(%edi), %ebp
+	andl	%esi, %ebp
+	movl	12(%edi), %edx
+	andl	%esi, %edx
+	movl	8(%edi), %ecx
+	andl	%esi, %ecx
+	movl	4(%edi), %eax
+	andl	%esi, %eax
+	andl	(%edi), %esi
+	addl	60(%esp), %esi                  # 4-byte Folded Reload
+	adcl	44(%esp), %eax                  # 4-byte Folded Reload
+	movl	%esi, 64(%ebx)
+	adcl	48(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 68(%ebx)
+	adcl	52(%esp), %edx                  # 4-byte Folded Reload
+	movl	%ecx, 72(%ebx)
+	adcl	56(%esp), %ebp                  # 4-byte Folded Reload
+	movl	%edx, 76(%ebx)
+	movl	28(%esp), %ecx                  # 4-byte Reload
+	adcl	68(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%ebp, 80(%ebx)
+	movl	32(%esp), %eax                  # 4-byte Reload
+	adcl	72(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 84(%ebx)
+	movl	36(%esp), %ecx                  # 4-byte Reload
+	adcl	76(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 88(%ebx)
+	movl	40(%esp), %eax                  # 4-byte Reload
+	adcl	80(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 92(%ebx)
+	movl	(%esp), %ecx                    # 4-byte Reload
+	adcl	84(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 96(%ebx)
+	movl	4(%esp), %eax                   # 4-byte Reload
+	adcl	88(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 100(%ebx)
+	movl	8(%esp), %ecx                   # 4-byte Reload
+	adcl	92(%esp), %ecx                  # 4-byte Folded Reload
+	movl	%eax, 104(%ebx)
+	movl	12(%esp), %eax                  # 4-byte Reload
+	adcl	96(%esp), %eax                  # 4-byte Folded Reload
+	movl	%ecx, 108(%ebx)
+	movl	16(%esp), %ecx                  # 4-byte Reload
+	adcl	100(%esp), %ecx                 # 4-byte Folded Reload
+	movl	%eax, 112(%ebx)
+	movl	20(%esp), %eax                  # 4-byte Reload
+	adcl	104(%esp), %eax                 # 4-byte Folded Reload
+	movl	%ecx, 116(%ebx)
+	movl	%eax, 120(%ebx)
+	movl	24(%esp), %eax                  # 4-byte Reload
+	adcl	64(%esp), %eax                  # 4-byte Folded Reload
+	movl	%eax, 124(%ebx)
+	addl	$128, %esp
 	popl	%esi
 	popl	%edi
 	popl	%ebx
 	popl	%ebp
 	retl
-.Lfunc_end269:
-	.size	mcl_fpDbl_sub17L, .Lfunc_end269-mcl_fpDbl_sub17L
-
-
+.Lfunc_end89:
+	.size	mcl_fpDbl_sub16L, .Lfunc_end89-mcl_fpDbl_sub16L
+                                        # -- End function
 	.section	".note.GNU-stack","",@progbits

From 014ecddf18c46b5fa9a6d80ca56086c536dd7c30 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 16:14:44 +0900
Subject: [PATCH 525/553] use montRed for sqrMont

---
 src/low_func.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/low_func.hpp b/src/low_func.hpp
index 77c3805b..580e6ef2 100644
--- a/src/low_func.hpp
+++ b/src/low_func.hpp
@@ -641,7 +641,7 @@ template<size_t N, bool isFullBit, class Tag = Gtag>
 struct SqrMont {
 	static inline void func(Unit *y, const Unit *x, const Unit *p)
 	{
-#if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed
+#if 0 // #if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed
 		Unit xx[N * 2];
 		SqrPre<N, Tag>::f(xx, x);
 		MontRed<N, isFullBit, Tag>::f(y, xx, p);

From fad6e7b3e3f3b5eefcc78cabf31a8c025302e74c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 17:37:35 +0900
Subject: [PATCH 526/553] set CFLAGS/LDFLAGS only if necessary

---
 common.mk | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/common.mk b/common.mk
index c42d1ca6..83b63375 100644
--- a/common.mk
+++ b/common.mk
@@ -20,12 +20,6 @@ ifeq ($(UNAME_S),Darwin)
     OS=mac-m1
   endif
   LIB_SUF=dylib
-  OPENSSL_DIR?=/usr/local/opt/openssl
-  CFLAGS+=-I$(OPENSSL_DIR)/include
-  LDFLAGS+=-L$(OPENSSL_DIR)/lib
-  GMP_DIR?=/usr/local/opt/gmp
-  CFLAGS+=-I$(GMP_DIR)/include
-  LDFLAGS+=-L$(GMP_DIR)/lib
   NASM_ELF_OPT=-fmacho64
 else
   LIB_SUF=so
@@ -130,9 +124,19 @@ ifeq ($(MCL_USE_OPENSSL),0)
 endif
 ifeq ($(MCL_USE_GMP),1)
   GMP_LIB=-lgmp -lgmpxx
+  ifeq ($(UNAME_S),Darwin)
+    GMP_DIR?=/usr/local/opt/gmp
+    CFLAGS+=-I$(GMP_DIR)/include
+    LDFLAGS+=-L$(GMP_DIR)/lib
+  endif
 endif
 ifeq ($(MCL_USE_OPENSSL),1)
   OPENSSL_LIB=-lcrypto
+  ifeq ($(UNAME_S),Darwin)
+    OPENSSL_DIR?=/usr/local/opt/openssl
+    CFLAGS+=-I$(OPENSSL_DIR)/include
+    LDFLAGS+=-L$(OPENSSL_DIR)/lib
+  endif
 endif
 ifeq ($(MCL_STATIC_CODE),1)
   MCL_USE_XBYAK=0

From 301051db5323d90ef8ff4de2bc2347f101c7acb3 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 17:38:20 +0900
Subject: [PATCH 527/553] update doc

---
 readme.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/readme.md b/readme.md
index b88f9c77..c41cb1bf 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography,
 which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
 
 # News
+- improve M1 mac performance
 - set default `MCL_MAX_BIT_SIZE=512` so disable to support `NICT_P521`.
 - improve performance
 - support M1 mac

From b8b5c37e9e0330d3678a2b4b37caf8eb5c0456e5 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 17:38:39 +0900
Subject: [PATCH 528/553] v1.38

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 8c973a23..83c62a3f 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x137; /* 0xABC = A.BC */
+static const int version = 0x138; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From c4451d9c00aecf41017f5a9a4987b7041d407e55 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 18:10:23 +0900
Subject: [PATCH 529/553] [skip ci] disable ffi/java test

---
 .github/workflows/main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 871a9b01..22bf95a9 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -17,5 +17,5 @@ jobs:
     - run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail
     - run: make clean
 #    - run: make test_go
-    - run: sudo apt install openjdk-8-jdk
-    - run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include
+#    - run: sudo apt install openjdk-8-jdk
+#    - run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include

From c2d843e6e0d62d5a00590cadadd3e2d17b260832 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 20:49:05 +0900
Subject: [PATCH 530/553] [cs] fix correct value of test for mapToG1

---
 ffi/cs/test/test.cs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
index 832d9697..c9393e3b 100644
--- a/ffi/cs/test/test.cs
+++ b/ffi/cs/test/test.cs
@@ -228,9 +228,8 @@ static void TestETH_mapToG1()
             var tbl = new[] {
                 new {
                     msg = "asdf",
-                    dst = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_",
-                    x = "bc73d15443009a8ff2ddce864136d892274dd8365c60d0d2d44cc543387348e366a8f1e1401427e37743c29ed2c939a",
-                    y = "101e26428a1b78c05458cb1cc37d2d87876ad3437096d2827f376702d4451667fe1fa82e82795495d33d466133ed1862",
+                    x = "a72df17570d0eb81260042edbea415ad49bdb94a1bc1ce9d1bf147d0d48268170764bb513a3b994d662e1faba137106",
+                    y = "122b77eca1ed58795b7cd456576362f4f7bd7a572a29334b4817898a42414d31e9c0267f2dc481a4daf8bcf4a460322",
                 },
            };
             G1 P = new G1();

From 9ee82069bf0c12083ff5bbb6dbb3f9d2de59f0ab Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 21:13:04 +0900
Subject: [PATCH 531/553] [cs] add MulVec for G1

---
 ffi/cs/mcl/mcl.cs   | 12 ++++++++++--
 ffi/cs/test/test.cs | 14 ++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 133186de..24fc55dd 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -1,4 +1,4 @@
-﻿using System;
+using System;
 using System.Text;
 using System.Runtime.InteropServices;
 
@@ -73,7 +73,7 @@ public class MCL {
         [DllImport(dllName)] public static extern void mclBnG1_add(ref G1 z, in G1 x, in G1 y);
         [DllImport(dllName)] public static extern void mclBnG1_sub(ref G1 z, in G1 x, in G1 y);
         [DllImport(dllName)] public static extern void mclBnG1_mul(ref G1 z, in G1 x, in Fr y);
-        [DllImport(dllName)] public static extern void mclBnG1_mulVec(ref G1 x, [In]G1[] vec1, [In]Fr[] vec2, long bufSize);
+        [DllImport(dllName)] public static extern void mclBnG1_mulVec(ref G1 z, [In]G1[] x, [In]Fr[] y, long n);
 
         [DllImport(dllName)] public static extern void mclBnG2_clear(ref G2 x);
         [DllImport(dllName)] public static extern int mclBnG2_setStr(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
@@ -217,6 +217,14 @@ public static void Normalize(ref G1 y, in G1 x)
         {
             mclBnG1_normalize(ref y, x);
         }
+        public static void MulVec(ref G1 z, in G1[] x, in Fr[] y)
+        {
+            int n = x.Length;
+            if (n <= 0 || n != y.Length) {
+                throw new ArgumentException("bad length");
+            }
+            mclBnG1_mulVec(ref z, x, y, (long)n);
+        }
         public static void Add(ref G2 z, in G2 x, in G2 y)
         {
             mclBnG2_add(ref z, x, y);
diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
index c9393e3b..2972cc8e 100644
--- a/ffi/cs/test/test.cs
+++ b/ffi/cs/test/test.cs
@@ -164,6 +164,20 @@ static void TestG1()
                 Q.Deserialize(buf);
                 assert("P == Q", P.Equals(Q));
             }
+            {
+                const int n = 5;
+                G1[] xVec = new G1[n];
+                Fr[] yVec = new Fr[n];
+                P.Clear();
+                for (int i = 0; i < n; i++) {
+                    xVec[i].HashAndMapTo(i.ToString());
+                    yVec[i].SetByCSPRNG();
+                    Q.Mul(xVec[i], yVec[i]);
+                    P.Add(P, Q);
+                }
+                MulVec(ref Q, xVec, yVec);
+                assert("mulVecG1", P.Equals(Q));
+            }
         }
         static void TestG2()
         {

From 67cea40560814a411ac1284308748b28066f58d5 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Mon, 15 Mar 2021 21:13:19 +0900
Subject: [PATCH 532/553] [cs] add MulVec for G2

---
 ffi/cs/mcl/mcl.cs   |  9 +++++++++
 ffi/cs/test/test.cs | 14 ++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 24fc55dd..3897bd93 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -88,6 +88,7 @@ public class MCL {
         [DllImport(dllName)] public static extern void mclBnG2_add(ref G2 z, in G2 x, in G2 y);
         [DllImport(dllName)] public static extern void mclBnG2_sub(ref G2 z, in G2 x, in G2 y);
         [DllImport(dllName)] public static extern void mclBnG2_mul(ref G2 z, in G2 x, in Fr y);
+        [DllImport(dllName)] public static extern void mclBnG2_mulVec(ref G2 z, [In] G2[] x, [In] Fr[] y, long n);
 
         [DllImport(dllName)] public static extern void mclBnGT_clear(ref GT x);
         [DllImport(dllName)] public static extern int mclBnGT_setStr(ref GT x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
@@ -249,6 +250,14 @@ public static void Normalize(ref G2 y, in G2 x)
         {
             mclBnG2_normalize(ref y, x);
         }
+        public static void MulVec(ref G2 z, in G2[] x, in Fr[] y)
+        {
+            int n = x.Length;
+            if (n <= 0 || n != y.Length) {
+                throw new ArgumentException("bad length");
+            }
+            mclBnG2_mulVec(ref z, x, y, (long)n);
+        }
         public static void Add(ref GT z, in GT x, in GT y)
         {
             mclBnGT_add(ref z, x, y);
diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
index 2972cc8e..19b0e2f1 100644
--- a/ffi/cs/test/test.cs
+++ b/ffi/cs/test/test.cs
@@ -210,6 +210,20 @@ static void TestG2()
                 Q.Deserialize(buf);
                 assert("P == Q", P.Equals(Q));
             }
+            {
+                const int n = 5;
+                G2[] xVec = new G2[n];
+                Fr[] yVec = new Fr[n];
+                P.Clear();
+                for (int i = 0; i < n; i++) {
+                    xVec[i].HashAndMapTo(i.ToString());
+                    yVec[i].SetByCSPRNG();
+                    Q.Mul(xVec[i], yVec[i]);
+                    P.Add(P, Q);
+                }
+                MulVec(ref Q, xVec, yVec);
+                assert("mulVecG2", P.Equals(Q));
+            }
         }
         static void TestPairing()
         {

From 1564151d1b01052f3bb6469989a4e293c52733fe Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Mar 2021 17:11:46 +0900
Subject: [PATCH 533/553] use MCL_SIZEOF_UNIT intead of CYBOZU_OS_BIT

---
 src/fp.cpp            | 12 ++++++------
 src/low_func_llvm.hpp |  4 ++--
 src/proto.hpp         |  2 +-
 test/llvm_test.cpp    | 10 +++++-----
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/fp.cpp b/src/fp.cpp
index d8425a0b..6d94ef08 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -569,16 +569,16 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 	}
 #endif
 	switch (N) {
-	case 192/CYBOZU_OS_BIT:  setOp<192/CYBOZU_OS_BIT>(*this, mode); break;
-#if CYBOZU_OS_BIT == 32
-	case 224/CYBOZU_OS_BIT:  setOp<224/CYBOZU_OS_BIT>(*this, mode); break;
+	case 192/(MCL_SIZEOF_UNIT * 8):  setOp<192/(MCL_SIZEOF_UNIT * 8)>(*this, mode); break;
+#if (MCL_SIZEOF_UNIT * 8) == 32
+	case 224/(MCL_SIZEOF_UNIT * 8):  setOp<224/(MCL_SIZEOF_UNIT * 8)>(*this, mode); break;
 #endif
-	case 256/CYBOZU_OS_BIT:  setOp<256/CYBOZU_OS_BIT>(*this, mode); break;
+	case 256/(MCL_SIZEOF_UNIT * 8):  setOp<256/(MCL_SIZEOF_UNIT * 8)>(*this, mode); break;
 #if MCL_MAX_BIT_SIZE >= 384
-	case 384/CYBOZU_OS_BIT:  setOp<384/CYBOZU_OS_BIT>(*this, mode); break;
+	case 384/(MCL_SIZEOF_UNIT * 8):  setOp<384/(MCL_SIZEOF_UNIT * 8)>(*this, mode); break;
 #endif
 #if MCL_MAX_BIT_SIZE >= 512
-	case 512/CYBOZU_OS_BIT:  setOp<512/CYBOZU_OS_BIT>(*this, mode); break;
+	case 512/(MCL_SIZEOF_UNIT * 8):  setOp<512/(MCL_SIZEOF_UNIT * 8)>(*this, mode); break;
 #endif
 	default:
 		return false;
diff --git a/src/low_func_llvm.hpp b/src/low_func_llvm.hpp
index e5567fed..f7f6cf1c 100644
--- a/src/low_func_llvm.hpp
+++ b/src/low_func_llvm.hpp
@@ -51,7 +51,7 @@ template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \
 	MCL_DEF_LLVM_FUNC2(n, Ltag, L)
 #endif
 
-#if CYBOZU_OS_BIT == 32
+#if MCL_SIZEOF_UNIT == 4
 
 MCL_DEF_LLVM_FUNC(6)
 MCL_DEF_LLVM_FUNC(7)
@@ -63,7 +63,7 @@ MCL_DEF_LLVM_FUNC(12)
 MCL_DEF_LLVM_FUNC(16)
 #endif
 
-#else // 64
+#else
 
 MCL_DEF_LLVM_FUNC(3)
 MCL_DEF_LLVM_FUNC(4)
diff --git a/src/proto.hpp b/src/proto.hpp
index 70588f74..aa3a8ab6 100644
--- a/src/proto.hpp
+++ b/src/proto.hpp
@@ -38,7 +38,7 @@ void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, c
 
 extern "C" {
 
-#if CYBOZU_OS_BIT == 32
+#if MCL_SIZEOF_UNIT == 4
 
 MCL_FP_DEF_FUNC(6)
 MCL_FP_DEF_FUNC(7)
diff --git a/test/llvm_test.cpp b/test/llvm_test.cpp
index 876a2353..15072f9e 100644
--- a/test/llvm_test.cpp
+++ b/test/llvm_test.cpp
@@ -65,14 +65,14 @@ template<>void sqrPre<n>(Unit *z, const Unit *x) { mcl_fpDbl_sqrPre ## n ## suf(
 template<>void mod<n>(Unit *z, const Unit *x, const Unit *p) { mcl_fp_montRedNF ## n ## suf(z, x, p); } \
 template<>void mont<n>(Unit *z, const Unit *x, const Unit *y, const Unit *p) { mcl_fp_montNF ## n ## suf(z, x, y, p); }
 
-#if CYBOZU_OS_BIT == 64
+#if MCL_SIZEOF_UNIT == 8
 MCL_FP_DEF_FUNC_SUB(4, L)
 MCL_FP_DEF_FUNC_SUB(5, L)
 #endif
 MCL_FP_DEF_FUNC_SUB(6, L)
 //MCL_FP_DEF_FUNC_SUB(7, L)
 MCL_FP_DEF_FUNC_SUB(8, L)
-#if CYBOZU_OS_BIT == 32
+#if MCL_SIZEOF_UNIT == 4
 MCL_FP_DEF_FUNC_SUB(12, L)
 MCL_FP_DEF_FUNC_SUB(16, L)
 #endif
@@ -95,7 +95,7 @@ void bench(Unit *x, Unit *y, const Unit *p)
 {
 	printf("N=%zd\n", N);
 	Unit xx[N * 2], yy[N * 2];
-#if CYBOZU_OS_BIT == 64
+#if MCL_SIZEOF_UNIT == 8
 	const int C = 10000;
 #else
 	const int C = 1000;
@@ -115,14 +115,14 @@ int main()
 	setRand(x, maxN, rg);
 	setRand(y, maxN, rg);
 	setRand(p, maxN + 1, rg);
-#if CYBOZU_OS_BIT == 64
+#if MCL_SIZEOF_UNIT == 8
 	bench<4>(x, y, p + 1);
 	bench<5>(x, y, p + 1);
 #endif
 	bench<6>(x, y, p + 1);
 //	bench<7>(x, y, p + 1);
 	bench<8>(x, y, p + 1);
-#if CYBOZU_OS_BIT == 32
+#if MCL_SIZEOF_UNIT == 4
 	bench<12>(x, y, p + 1);
 	bench<16>(x, y, p + 1);
 #endif

From 6b2d38e2b66ffe711b97ce6fffaee353b6102767 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 19 Mar 2021 17:18:19 +0900
Subject: [PATCH 534/553] v1.39

---
 include/mcl/op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 83c62a3f..954da950 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x138; /* 0xABC = A.BC */
+static const int version = 0x139; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()

From 5221e860f6149c5bdbdb584e485494a96980e968 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 20 Mar 2021 10:33:02 +0900
Subject: [PATCH 535/553] simplify generic add

---
 include/mcl/vint.hpp | 27 +++++++++++----------------
 src/fp.cpp           |  2 +-
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/include/mcl/vint.hpp b/include/mcl/vint.hpp
index 3bed5278..1e726002 100644
--- a/include/mcl/vint.hpp
+++ b/include/mcl/vint.hpp
@@ -197,14 +197,11 @@ T addN(T *z, const T *x, const T *y, size_t n)
 	T c = 0;
 	for (size_t i = 0; i < n; i++) {
 		T xc = x[i] + c;
-		if (xc < c) {
-			// x[i] = Unit(-1) and c = 1
-			z[i] = y[i];
-		} else {
-			xc += y[i];
-			c = y[i] > xc ? 1 : 0;
-			z[i] = xc;
-		}
+		c = xc < c;
+		T yi = y[i];
+		xc += yi;
+		c += xc < yi;
+		z[i] = xc;
 	}
 	return c;
 }
@@ -285,14 +282,12 @@ T subN(T *z, const T *x, const T *y, size_t n)
 	assert(n > 0);
 	T c = 0;
 	for (size_t i = 0; i < n; i++) {
-		T yc = y[i] + c;
-		if (yc < c) {
-			// y[i] = T(-1) and c = 1
-			z[i] = x[i];
-		} else {
-			c = x[i] < yc ? 1 : 0;
-			z[i] = x[i] - yc;
-		}
+		T yi = y[i];
+		yi += c;
+		c = yi < c;
+		T xi = x[i];
+		c += xi < yi;
+		z[i] = xi - yi;
 	}
 	return c;
 }
diff --git a/src/fp.cpp b/src/fp.cpp
index 6d94ef08..c43abebd 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -583,7 +583,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 	default:
 		return false;
 	}
-#ifdef USE_WASM
+#if defined(USE_WASM) && MCL_SIZEOF_UNIT == 4
 	if (N == 8) {
 		setWasmOp<8>(*this);
 	} else if (N == 12) {

From 2a82e2531ab7a8912f6f589a18689eba5ce9aef0 Mon Sep 17 00:00:00 2001
From: Valdas Rakutis <valdas@rakutis.lt>
Date: Sat, 20 Mar 2021 13:35:21 +0200
Subject: [PATCH 536/553] [cs] add some arithmetic operator overloads for g2,
 static factory, fix serialization issue

---
 ffi/cs/mcl/mcl.cs   | 50 ++++++++++++++++++++++++++++++++++++++++++---
 ffi/cs/test/test.cs |  4 ++++
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 3897bd93..99687a3c 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -606,11 +606,20 @@ public struct Fp2 {
         [StructLayout(LayoutKind.Sequential)]
         public struct G1 {
             public Fp x, y, z;
+            public static G1 Zero()
+            {
+                var g1 = new G1();
+                g1.x.SetInt(1);
+                g1.y.SetInt(1);
+                g1.z.SetInt(0);
+
+                return g1;
+            }
             public void Clear()
             {
                 mclBnG1_clear(ref this);
             }
-            public void SetStr(String s, int ioMode)
+            public void SetStr(string s, int ioMode)
             {
                 if (mclBnG1_setStr(ref this, s, s.Length, ioMode) != 0) {
                     throw new ArgumentException("mclBnG1_setStr:" + s);
@@ -711,11 +720,22 @@ public void Mul(in G1 x, in Fr y)
         [StructLayout(LayoutKind.Sequential)]
         public struct G2 {
             public Fp2 x, y, z;
+            public static G2 Zero()
+            {
+                var g2 = new G2();
+                g2.x.a.SetInt(1);
+                g2.x.b.SetInt(0);
+                g2.y.a.SetInt(1);
+                g2.y.b.SetInt(0);
+                g2.x.a.SetInt(0);
+                g2.x.a.SetInt(0);
+                return g2;
+            }
             public void Clear()
             {
                 mclBnG2_clear(ref this);
             }
-            public void SetStr(String s, int ioMode)
+            public void SetStr(string s, int ioMode)
             {
                 if (mclBnG2_setStr(ref this, s, s.Length, ioMode) != 0) {
                     throw new ArgumentException("mclBnG2_setStr:" + s);
@@ -788,6 +808,30 @@ public void Mul(in G2 x, in Fr y)
             {
                 MCL.Mul(ref this, x, y);
             }
+            public static G2 operator -(in G2 x)
+            {
+                var result = new G2();
+                result.Neg(x);
+                return result;
+            }
+            public static G2 operator +(in G2 left, in G2 right)
+            {
+                var result = new G2();
+                result.Add(left, right);
+                return result;
+            }
+            public static G2 operator -(in G2 left, in G2 right)
+            {
+                var result = new G2();
+                result.Sub(left, right);
+                return result;
+            }
+            public static G2 operator *(in G2 left, in Fr right)
+            {
+                var result = new G2();
+                result.Mul(left, right);
+                return result;
+            }
         }
         [StructLayout(LayoutKind.Sequential)]
         public struct GT {
@@ -816,7 +860,7 @@ public bool IsOne()
             }
             public string GetStr(int ioMode)
             {
-                StringBuilder sb = new StringBuilder(1024);
+                StringBuilder sb = new StringBuilder(2048);
                 long size = mclBnGT_getStr(sb, sb.Capacity, this, ioMode);
                 if (size == 0) {
                     throw new InvalidOperationException("mclBnGT_getStr:");
diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
index 19b0e2f1..eb5318d2 100644
--- a/ffi/cs/test/test.cs
+++ b/ffi/cs/test/test.cs
@@ -178,6 +178,8 @@ static void TestG1()
                 MulVec(ref Q, xVec, yVec);
                 assert("mulVecG1", P.Equals(Q));
             }
+            G1 W = G1.Zero();
+            assert("W.IsZero", W.IsZero());
         }
         static void TestG2()
         {
@@ -224,6 +226,8 @@ static void TestG2()
                 MulVec(ref Q, xVec, yVec);
                 assert("mulVecG2", P.Equals(Q));
             }
+            G2 W = G2.Zero();
+            assert("W.IsZero", W.IsZero());
         }
         static void TestPairing()
         {

From d91c0fc266a6ba145e33e3755d1fcf1f98c90df6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Tue, 23 Mar 2021 10:33:17 +0900
Subject: [PATCH 537/553] [wasm] reduce cast operation

---
 src/low_func_wasm.hpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/low_func_wasm.hpp b/src/low_func_wasm.hpp
index 352d4469..bb61f664 100644
--- a/src/low_func_wasm.hpp
+++ b/src/low_func_wasm.hpp
@@ -97,15 +97,17 @@ uint32_t mulUnitT(uint32_t z[N], const uint32_t x[N], uint32_t y)
 template<size_t N>
 uint32_t addMulUnitT(uint32_t z[N], const uint32_t x[N], uint32_t y)
 {
-	uint32_t H = 0;
+	// reduce cast operation
+	uint64_t H = 0;
+	uint64_t yy = y;
 	for (size_t i = 0; i < N; i++) {
-		uint64_t v = uint64_t(x[i]) * y;
+		uint64_t v = x[i] * yy;
 		v += H;
 		v += z[i];
 		z[i] = uint32_t(v);
-		H = uint32_t(v >> 32);
+		H = v >> 32;
 	}
-	return H;
+	return uint32_t(H);
 }
 
 // z[N * 2] = x[N] * y[N]

From aa3e88e54e3e98c24a020c95dac3aae4e6fb164a Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Mar 2021 10:36:14 +0900
Subject: [PATCH 538/553] [cs] check that default value is zero

---
 ffi/cs/test/test.cs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
index eb5318d2..d3c023b8 100644
--- a/ffi/cs/test/test.cs
+++ b/ffi/cs/test/test.cs
@@ -47,6 +47,7 @@ static void TestFr()
         {
             Console.WriteLine("TestFr");
             Fr x = new Fr();
+            assert("x.isZero", x.IsZero());
             x.Clear();
             assert("0", x.GetStr(10) == "0");
             assert("0.IzZero", x.IsZero());
@@ -92,6 +93,7 @@ static void TestFp()
         {
             Console.WriteLine("TestFp");
             Fp x = new Fp();
+            assert("x.isZero", x.IsZero());
             x.Clear();
             assert("0", x.GetStr(10) == "0");
             assert("0.IzZero", x.IsZero());
@@ -137,6 +139,7 @@ static void TestG1()
         {
             Console.WriteLine("TestG1");
             G1 P = new G1();
+            assert("P.isZero", P.IsZero());
             P.Clear();
             assert("P.IsValid", P.IsValid());
             assert("P.IsZero", P.IsZero());
@@ -185,6 +188,7 @@ static void TestG2()
         {
             Console.WriteLine("TestG2");
             G2 P = new G2();
+            assert("P.isZero", P.IsZero());
             P.Clear();
             assert("P is valid", P.IsValid());
             assert("P is zero", P.IsZero());

From 19aad60e2af9f277e18ed7661235bcdde089b976 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Mar 2021 10:37:41 +0900
Subject: [PATCH 539/553] [cs] remove unnecessary set

---
 ffi/cs/mcl/mcl.cs | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 99687a3c..e508b8b1 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -609,10 +609,6 @@ public struct G1 {
             public static G1 Zero()
             {
                 var g1 = new G1();
-                g1.x.SetInt(1);
-                g1.y.SetInt(1);
-                g1.z.SetInt(0);
-
                 return g1;
             }
             public void Clear()
@@ -723,12 +719,6 @@ public struct G2 {
             public static G2 Zero()
             {
                 var g2 = new G2();
-                g2.x.a.SetInt(1);
-                g2.x.b.SetInt(0);
-                g2.y.a.SetInt(1);
-                g2.y.b.SetInt(0);
-                g2.x.a.SetInt(0);
-                g2.x.a.SetInt(0);
                 return g2;
             }
             public void Clear()

From 1456a13f4c4572438dfd7a186309318c860db8d6 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Mar 2021 10:38:01 +0900
Subject: [PATCH 540/553] [cs] rename variables

---
 ffi/cs/mcl/mcl.cs | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index e508b8b1..0202a664 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -312,10 +312,9 @@ public struct Fr {
             private U128 v0, v1;
             public static Fr One()
             {
-                var fr = new Fr();
-                fr.SetInt(1);
-
-                return fr;
+                var x = new Fr();
+                x.SetInt(1);
+                return x;
             }
             public static Fr Zero() => new Fr();
             public void Clear()
@@ -462,9 +461,9 @@ public struct Fp {
             private U128 v0, v1, v2;
             public static Fp One()
             {
-                var fp = new Fp();
-                fp.SetInt(1);
-                return fp;
+                var x = new Fp();
+                x.SetInt(1);
+                return x;
             }
             public static Fp Zero() => new Fp();
             public void Clear()
@@ -804,23 +803,23 @@ public void Mul(in G2 x, in Fr y)
                 result.Neg(x);
                 return result;
             }
-            public static G2 operator +(in G2 left, in G2 right)
+            public static G2 operator +(in G2 x, in G2 y)
             {
-                var result = new G2();
-                result.Add(left, right);
-                return result;
+                var z = new G2();
+                z.Add(x, y);
+                return z;
             }
-            public static G2 operator -(in G2 left, in G2 right)
+            public static G2 operator -(in G2 x, in G2 y)
             {
-                var result = new G2();
-                result.Sub(left, right);
-                return result;
+                var z = new G2();
+                z.Sub(x, y);
+                return z;
             }
-            public static G2 operator *(in G2 left, in Fr right)
+            public static G2 operator *(in G2 x, in Fr y)
             {
-                var result = new G2();
-                result.Mul(left, right);
-                return result;
+                var z = new G2();
+                z.Mul(x, y);
+                return z;
             }
         }
         [StructLayout(LayoutKind.Sequential)]

From c41e9e459d98f5ffc74d6c23520dd48c941b869c Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Mar 2021 16:20:06 +0900
Subject: [PATCH 541/553] disable JIT unless both mulx and adox are supported

---
 src/fp_generator.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index d4209809..e902ebd3 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -312,6 +312,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		if (!cpu.has(Xbyak::util::Cpu::tAVX)) return false;
 		useMulx_ = cpu.has(Xbyak::util::Cpu::tBMI2);
 		useAdx_ = cpu.has(Xbyak::util::Cpu::tADX);
+		if (!(useMulx_ && useAdx_)) return false;
 #endif
 		reset(); // reset jit code for reuse
 #ifndef MCL_DUMP_JIT

From ea8c103aa3f62a461164fd3fb1d0654f430c4b88 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Mar 2021 16:21:55 +0900
Subject: [PATCH 542/553] test by valgrind

---
 .github/workflows/main.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 22bf95a9..ddd42a49 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -8,6 +8,9 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - run: lscpu
+    - run: sudo apt install valgrind
+    - run: make bin/pairing.exe -j4
+    - run: valgrind bin/pairing.exe
 #    - run: wget https://software.intel.com/content/dam/develop/external/us/en/documents/downloads/sde-external-8.63.0-2021-01-18-lin.tar.bz2
 #    - run: bzip2 -dc sde-external-8.63.0-2021-01-18-lin.tar.bz2 | tar xvf -
 #    - run: make bin/bn_test.exe DEBUG=1 -j4

From 05213a0fbb739eb7c60730167cae24afbd96d843 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Mar 2021 16:31:00 +0900
Subject: [PATCH 543/553] remove dmesg in main.yml

---
 .github/workflows/main.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ddd42a49..ff079910 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -11,13 +11,14 @@ jobs:
     - run: sudo apt install valgrind
     - run: make bin/pairing.exe -j4
     - run: valgrind bin/pairing.exe
+    - run: make clean
 #    - run: wget https://software.intel.com/content/dam/develop/external/us/en/documents/downloads/sde-external-8.63.0-2021-01-18-lin.tar.bz2
 #    - run: bzip2 -dc sde-external-8.63.0-2021-01-18-lin.tar.bz2 | tar xvf -
 #    - run: make bin/bn_test.exe DEBUG=1 -j4
 #    - run: sde-external-8.63.0-2021-01-18-lin/sde64 -hsw -- bin/bn_test.exe
-    - run: make test_ci DEBUG=1 -j4 || dmesg | tail
+    - run: make test_ci DEBUG=1 -j4
     - run: make clean
-    - run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail
+    - run: make test_ci DEBUG=1 -j4 CXX=clang++
     - run: make clean
 #    - run: make test_go
 #    - run: sudo apt install openjdk-8-jdk

From e9399112b76ad51a350b71e9a519764057fdb0f7 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 24 Mar 2021 16:43:03 +0900
Subject: [PATCH 544/553] v1.40

---
 include/mcl/op.hpp | 2 +-
 readme.md          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp
index 954da950..7f15b998 100644
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@@ -26,7 +26,7 @@
 
 namespace mcl {
 
-static const int version = 0x139; /* 0xABC = A.BC */
+static const int version = 0x140; /* 0xABC = A.BC */
 
 /*
 	specifies available string format mode for X::setIoMode()
diff --git a/readme.md b/readme.md
index c41cb1bf..1c8dab5d 100644
--- a/readme.md
+++ b/readme.md
@@ -323,6 +323,7 @@ If `MCL_USE_OLD_MAPTO_FOR_BLS12` is defined, then the old function is used, but
 
 # History
 
+- 2021/May/24 v1.40 fix sigsegv in valgrind
 - 2021/Jan/28 v1.31 fix : call setOrder in init for isValidOrder
 - 2021/Jan/28 v1.30 a little optimization of Fp operations
 - 2020/Nov/14 v1.28 support M1 mac

From de148824a4fd7320e7cd42433a2b64ec6a991683 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Mar 2021 15:54:44 +0900
Subject: [PATCH 545/553] [skip ci][java] add Fr::inv and Fp::inv

---
 ffi/java/MclTest.java               |  3 ++
 ffi/java/com/herumi/mcl/Fp.java     |  4 ++
 ffi/java/com/herumi/mcl/Fr.java     |  4 ++
 ffi/java/com/herumi/mcl/Mcl.java    | 10 +++-
 ffi/java/com/herumi/mcl/MclJNI.java |  6 ++-
 ffi/java/mcl_impl.hpp               | 14 ++++++
 ffi/java/mcl_wrap.cxx               | 76 ++++++++++++++++++++++++++++-
 7 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/ffi/java/MclTest.java b/ffi/java/MclTest.java
index e250d977..38e83eac 100644
--- a/ffi/java/MclTest.java
+++ b/ffi/java/MclTest.java
@@ -45,6 +45,9 @@ public static void testCurve(int curveType, String name) {
 			assertEquals("x == 12", (new Fr("12")).toString(), "12");
 			assertEquals("x == 18", (new Fr("12", 16)).toString(), "18");
 			assertEquals("x == ff", (new Fr("255")).toString(16), "ff");
+			Mcl.inv(y, x);
+			Mcl.mul(x, y, x);
+			assertBool("x == 1", x.isOne());
 
 			{
 				byte[] b = x.serialize();
diff --git a/ffi/java/com/herumi/mcl/Fp.java b/ffi/java/com/herumi/mcl/Fp.java
index 5d50e988..2b39ffd6 100644
--- a/ffi/java/com/herumi/mcl/Fp.java
+++ b/ffi/java/com/herumi/mcl/Fp.java
@@ -64,6 +64,10 @@ public boolean isZero() {
     return MclJNI.Fp_isZero(swigCPtr, this);
   }
 
+  public boolean isOne() {
+    return MclJNI.Fp_isOne(swigCPtr, this);
+  }
+
   public void setStr(String str, int base) {
     MclJNI.Fp_setStr__SWIG_0(swigCPtr, this, str, base);
   }
diff --git a/ffi/java/com/herumi/mcl/Fr.java b/ffi/java/com/herumi/mcl/Fr.java
index 3b5f1c34..b26d0019 100644
--- a/ffi/java/com/herumi/mcl/Fr.java
+++ b/ffi/java/com/herumi/mcl/Fr.java
@@ -64,6 +64,10 @@ public boolean isZero() {
     return MclJNI.Fr_isZero(swigCPtr, this);
   }
 
+  public boolean isOne() {
+    return MclJNI.Fr_isOne(swigCPtr, this);
+  }
+
   public void setStr(String str, int base) {
     MclJNI.Fr_setStr__SWIG_0(swigCPtr, this, str, base);
   }
diff --git a/ffi/java/com/herumi/mcl/Mcl.java b/ffi/java/com/herumi/mcl/Mcl.java
index 8074e393..502a6614 100644
--- a/ffi/java/com/herumi/mcl/Mcl.java
+++ b/ffi/java/com/herumi/mcl/Mcl.java
@@ -17,6 +17,10 @@ public static void neg(Fr y, Fr x) {
     MclJNI.neg__SWIG_0(Fr.getCPtr(y), y, Fr.getCPtr(x), x);
   }
 
+  public static void inv(Fr y, Fr x) {
+    MclJNI.inv__SWIG_0(Fr.getCPtr(y), y, Fr.getCPtr(x), x);
+  }
+
   public static void add(Fr z, Fr x, Fr y) {
     MclJNI.add__SWIG_0(Fr.getCPtr(z), z, Fr.getCPtr(x), x, Fr.getCPtr(y), y);
   }
@@ -49,6 +53,10 @@ public static void neg(Fp y, Fp x) {
     MclJNI.neg__SWIG_1(Fp.getCPtr(y), y, Fp.getCPtr(x), x);
   }
 
+  public static void inv(Fp y, Fp x) {
+    MclJNI.inv__SWIG_1(Fp.getCPtr(y), y, Fp.getCPtr(x), x);
+  }
+
   public static void add(Fp z, Fp x, Fp y) {
     MclJNI.add__SWIG_1(Fp.getCPtr(z), z, Fp.getCPtr(x), x, Fp.getCPtr(y), y);
   }
@@ -114,7 +122,7 @@ public static void mul(GT z, GT x, GT y) {
   }
 
   public static void inv(GT y, GT x) {
-    MclJNI.inv(GT.getCPtr(y), y, GT.getCPtr(x), x);
+    MclJNI.inv__SWIG_2(GT.getCPtr(y), y, GT.getCPtr(x), x);
   }
 
 }
diff --git a/ffi/java/com/herumi/mcl/MclJNI.java b/ffi/java/com/herumi/mcl/MclJNI.java
index 4ba54c29..1037f682 100644
--- a/ffi/java/com/herumi/mcl/MclJNI.java
+++ b/ffi/java/com/herumi/mcl/MclJNI.java
@@ -11,6 +11,7 @@
 public class MclJNI {
   public final static native void SystemInit(int jarg1);
   public final static native void neg__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_);
+  public final static native void inv__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_);
   public final static native void add__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
   public final static native void sub__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
   public final static native void mul__SWIG_0(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_, long jarg3, Fr jarg3_);
@@ -25,6 +26,7 @@ public class MclJNI {
   public final static native long new_Fr__SWIG_4(String jarg1);
   public final static native boolean Fr_equals(long jarg1, Fr jarg1_, long jarg2, Fr jarg2_);
   public final static native boolean Fr_isZero(long jarg1, Fr jarg1_);
+  public final static native boolean Fr_isOne(long jarg1, Fr jarg1_);
   public final static native void Fr_setStr__SWIG_0(long jarg1, Fr jarg1_, String jarg2, int jarg3);
   public final static native void Fr_setStr__SWIG_1(long jarg1, Fr jarg1_, String jarg2);
   public final static native void Fr_setInt(long jarg1, Fr jarg1_, int jarg2);
@@ -38,6 +40,7 @@ public class MclJNI {
   public final static native byte[] Fr_serialize(long jarg1, Fr jarg1_);
   public final static native void delete_Fr(long jarg1);
   public final static native void neg__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
+  public final static native void inv__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
   public final static native void add__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
   public final static native void sub__SWIG_1(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
   public final static native void mul__SWIG_3(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_, long jarg3, Fp jarg3_);
@@ -49,6 +52,7 @@ public class MclJNI {
   public final static native long new_Fp__SWIG_4(String jarg1);
   public final static native boolean Fp_equals(long jarg1, Fp jarg1_, long jarg2, Fp jarg2_);
   public final static native boolean Fp_isZero(long jarg1, Fp jarg1_);
+  public final static native boolean Fp_isOne(long jarg1, Fp jarg1_);
   public final static native void Fp_setStr__SWIG_0(long jarg1, Fp jarg1_, String jarg2, int jarg3);
   public final static native void Fp_setStr__SWIG_1(long jarg1, Fp jarg1_, String jarg2);
   public final static native void Fp_setInt(long jarg1, Fp jarg1_, int jarg2);
@@ -100,7 +104,7 @@ public class MclJNI {
   public final static native byte[] G2_serialize(long jarg1, G2 jarg1_);
   public final static native void delete_G2(long jarg1);
   public final static native void mul__SWIG_4(long jarg1, GT jarg1_, long jarg2, GT jarg2_, long jarg3, GT jarg3_);
-  public final static native void inv(long jarg1, GT jarg1_, long jarg2, GT jarg2_);
+  public final static native void inv__SWIG_2(long jarg1, GT jarg1_, long jarg2, GT jarg2_);
   public final static native long new_GT__SWIG_0();
   public final static native long new_GT__SWIG_1(long jarg1, GT jarg1_);
   public final static native boolean GT_equals(long jarg1, GT jarg1_, long jarg2, GT jarg2_);
diff --git a/ffi/java/mcl_impl.hpp b/ffi/java/mcl_impl.hpp
index b78faf71..b29de7e9 100644
--- a/ffi/java/mcl_impl.hpp
+++ b/ffi/java/mcl_impl.hpp
@@ -61,6 +61,7 @@ class Fr {
 	friend class G2;
 	friend class GT;
 	friend void neg(Fr& y, const Fr& x);
+	friend void inv(Fr& y, const Fr& x);
 	friend void add(Fr& z, const Fr& x, const Fr& y);
 	friend void sub(Fr& z, const Fr& x, const Fr& y);
 	friend void mul(Fr& z, const Fr& x, const Fr& y);
@@ -76,6 +77,7 @@ class Fr {
 		: self_(str, base) {}
 	bool equals(const Fr& rhs) const { return self_ == rhs.self_; }
 	bool isZero() const { return self_.isZero(); }
+	bool isOne() const { return self_.isOne(); }
 	void setStr(const std::string& str, int base = 0) throw(std::exception)
 	{
 		self_.setStr(str, base);
@@ -119,6 +121,11 @@ void neg(Fr& y, const Fr& x)
 	mcl::bn::Fr::neg(y.self_, x.self_);
 }
 
+void inv(Fr& y, const Fr& x)
+{
+	mcl::bn::Fr::inv(y.self_, x.self_);
+}
+
 void add(Fr& z, const Fr& x, const Fr& y)
 {
 	mcl::bn::Fr::add(z.self_, x.self_, y.self_);
@@ -145,6 +152,7 @@ class Fp {
 	friend class G2;
 	friend class GT;
 	friend void neg(Fp& y, const Fp& x);
+	friend void inv(Fp& y, const Fp& x);
 	friend void add(Fp& z, const Fp& x, const Fp& y);
 	friend void sub(Fp& z, const Fp& x, const Fp& y);
 	friend void mul(Fp& z, const Fp& x, const Fp& y);
@@ -157,6 +165,7 @@ class Fp {
 		: self_(str, base) {}
 	bool equals(const Fp& rhs) const { return self_ == rhs.self_; }
 	bool isZero() const { return self_.isZero(); }
+	bool isOne() const { return self_.isOne(); }
 	void setStr(const std::string& str, int base = 0) throw(std::exception)
 	{
 		self_.setStr(str, base);
@@ -192,6 +201,11 @@ void neg(Fp& y, const Fp& x)
 	mcl::bn::Fp::neg(y.self_, x.self_);
 }
 
+void inv(Fp& y, const Fp& x)
+{
+	mcl::bn::Fp::inv(y.self_, x.self_);
+}
+
 void add(Fp& z, const Fp& x, const Fp& y)
 {
 	mcl::bn::Fp::add(z.self_, x.self_, y.self_);
diff --git a/ffi/java/mcl_wrap.cxx b/ffi/java/mcl_wrap.cxx
index 3c2d95f1..c364cd9f 100644
--- a/ffi/java/mcl_wrap.cxx
+++ b/ffi/java/mcl_wrap.cxx
@@ -286,6 +286,28 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_10(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_inv_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  Fr *arg1 = 0 ;
+  Fr *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(Fr **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr & reference is null");
+    return ;
+  } 
+  arg2 = *(Fr **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fr const & reference is null");
+    return ;
+  } 
+  inv(*arg1,(Fr const &)*arg2);
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fr *arg1 = 0 ;
   Fr *arg2 = 0 ;
@@ -627,6 +649,21 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fr_1isZero(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fr_1isOne(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jboolean jresult = 0 ;
+  Fr *arg1 = (Fr *) 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fr **)&jarg1; 
+  result = (bool)((Fr const *)arg1)->isOne();
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fr_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   Fr *arg1 = (Fr *) 0 ;
   std::string *arg2 = 0 ;
@@ -908,6 +945,28 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_neg_1_1SWIG_11(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_inv_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+  Fp *arg1 = 0 ;
+  Fp *arg2 = 0 ;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  (void)jarg2_;
+  arg1 = *(Fp **)&jarg1;
+  if (!arg1) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp & reference is null");
+    return ;
+  } 
+  arg2 = *(Fp **)&jarg2;
+  if (!arg2) {
+    SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "Fp const & reference is null");
+    return ;
+  } 
+  inv(*arg1,(Fp const &)*arg2);
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_add_1_1SWIG_11(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
   Fp *arg1 = 0 ;
   Fp *arg2 = 0 ;
@@ -1162,6 +1221,21 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fp_1isZero(JNIEnv *jenv,
 }
 
 
+SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_Fp_1isOne(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
+  jboolean jresult = 0 ;
+  Fp *arg1 = (Fp *) 0 ;
+  bool result;
+  
+  (void)jenv;
+  (void)jcls;
+  (void)jarg1_;
+  arg1 = *(Fp **)&jarg1; 
+  result = (bool)((Fp const *)arg1)->isOne();
+  jresult = (jboolean)result; 
+  return jresult;
+}
+
+
 SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_Fp_1setStr_1_1SWIG_10(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jstring jarg2, jint jarg3) {
   Fp *arg1 = (Fp *) 0 ;
   std::string *arg2 = 0 ;
@@ -2338,7 +2412,7 @@ SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_mul_1_1SWIG_14(JNIEnv *jenv,
 }
 
 
-SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_inv(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
+SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_inv_1_1SWIG_12(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_) {
   GT *arg1 = 0 ;
   GT *arg2 = 0 ;
   

From 65c8d5f86fd3f2039d57a150a091707441c719ee Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Mar 2021 20:37:37 +0900
Subject: [PATCH 546/553] add test for Lagrange

---
 ffi/cs/mcl/mcl.cs   | 24 +++++++++++++++++++++++-
 ffi/cs/test/test.cs | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 test/bench.hpp      | 34 +++++++++++++++++++++++-----------
 3 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 0202a664..0ca1a391 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -19,7 +19,6 @@ public class MCL {
         [DllImport(dllName)] public static extern int mclBn_init(int curve, int compiledTimeVar);
         [DllImport(dllName)] public static extern void mclBn_setETHserialization(int enable);
         [DllImport(dllName)] public static extern int mclBn_setMapToMode(int mode);
-        [DllImport(dllName)] public static extern int mclBn_FrEvaluatePolynomial(ref Fr z, [In] Fr[] poly, long bufSize, in Fr y);
         [DllImport(dllName)] public static extern void mclBnFr_clear(ref Fr x);
         [DllImport(dllName)] public static extern void mclBnFr_setInt(ref Fr y, int x);
         [DllImport(dllName)] public static extern int mclBnFr_setStr(ref Fr x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
@@ -122,6 +121,8 @@ public class MCL {
         [DllImport(dllName)] public static extern ulong mclBnG1_deserialize(ref G1 x, [In]byte[] buf, ulong bufSize);
         [DllImport(dllName)] public static extern ulong mclBnG2_deserialize(ref G2 x, [In]byte[] buf, ulong bufSize);
 
+        [DllImport(dllName)] public static extern int mclBn_FrEvaluatePolynomial(ref Fr z, in Fr[] cVec, ulong cSize, in Fr x);
+        [DllImport(dllName)] public static extern int mclBn_FrLagrangeInterpolation(ref Fr z, in Fr[] xVec, in Fr[] yVec, ulong k);
         public static void Init(int curveType = BN254)
         {
             if (!System.Environment.Is64BitProcess) {
@@ -302,6 +303,27 @@ public static void MillerLoop(ref GT z, in G1 x, in G2 y)
         {
             mclBn_millerLoop(ref z, x, y);
         }
+        // y = f(x) with a polynomial f(t) = sum_i cVec[i] t^i
+        public static void Share(ref Fr y, in Fr[] cVec, in Fr x)
+        {
+            ulong k = (ulong)cVec.Length;
+            int ret = mclBn_FrEvaluatePolynomial(ref y, cVec, k, x);
+            if (ret != 0) {
+                throw new ArgumentException("mclBn_FrEvaluatePolynomial");
+            }
+        }
+        // recover z by Lagrange interpolation with {xVec[i], yVec[i]}
+        public static void Recover(ref Fr z, in Fr[] xVec, in Fr[] yVec)
+        {
+            if (xVec.Length != yVec.Length) {
+                throw new ArgumentException("bad length");
+            }
+            ulong k = (ulong)xVec.Length;
+            int ret = mclBn_FrLagrangeInterpolation(ref z, xVec, yVec, k);
+            if (ret != 0) {
+                throw new ArgumentException("mclBn_FrLagrangeInterpolation:" + ret.ToString());
+            }
+        }
         [StructLayout(LayoutKind.Sequential)]
         struct U128 {
             private ulong v0, v1;
diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
index d3c023b8..6d6e63fd 100644
--- a/ffi/cs/test/test.cs
+++ b/ffi/cs/test/test.cs
@@ -34,7 +34,6 @@ static void Main(string[] args)
         }
 
         static void TestCurve(int curveType)
-
         {
             Init(curveType);
             TestFr();
@@ -42,6 +41,7 @@ static void TestCurve(int curveType)
             TestG1();
             TestG2();
             TestPairing();
+            TestSS();
         }
         static void TestFr()
         {
@@ -286,5 +286,48 @@ static void TestETH()
         {
             TestETH_mapToG1();
         }
+        static void TestSS()
+        {
+            const int n = 5;
+            const int k = 3; // can't change because the following loop
+            Fr[] cVec = new Fr[k];
+            // init polynomial
+            for (int i = 0; i < k; i++) {
+                //                cVec[i].SetByCSPRNG();
+                cVec[i].SetInt(i + 1);
+                Console.WriteLine("cVec[" + i + "]=" + cVec[i].GetStr(10));
+            }
+
+            Fr[] xVec = new Fr[n]; // user id
+            Fr[] yVec = new Fr[n];
+            // share cVec[0] with yVec[0], ..., yVec[n-1]
+            for (int i = 0; i < n; i++) {
+                xVec[i].SetInt(i + 2); // non zero value
+                Console.WriteLine("x=" + xVec[i].GetStr(10));
+                MCL.Share(ref yVec[i], cVec, xVec[i]);
+                Console.WriteLine("y=" + yVec[i].GetStr(10));
+            }
+            // recover cVec[0] from xVecSubset and yVecSubset
+            Fr[] xVecSubset = new Fr[k];
+            Fr[] yVecSubset = new Fr[k];
+            Console.WriteLine("cVec[0]=" + cVec[0].GetStr(10));
+            for (int i0 = 0; i0 < n; i0++) {
+                xVecSubset[0] = xVec[i0];
+                yVecSubset[0] = yVec[i0];
+                for (int i1 = i0 + 1; i1 < n; i1++) {
+                    xVecSubset[1] = xVec[i1];
+                    yVecSubset[1] = yVec[i1];
+                    for (int i2 = i1 + 1; i2 < n; i2++) {
+                        xVecSubset[2] = xVec[i2];
+                        yVecSubset[2] = yVec[i2];
+                        Fr s = new Fr();
+                        MCL.Recover(ref s, xVecSubset, yVecSubset);
+                        Console.WriteLine("x=" + i0 + ", " + i1 + ", " + i2);
+                        Console.WriteLine("s=" + s.GetStr(10));
+                        assert("Recover", s.Equals(cVec[0]));
+                    }
+                }
+            }
+        }
     }
 }
diff --git a/test/bench.hpp b/test/bench.hpp
index d407bc75..cbe45e74 100644
--- a/test/bench.hpp
+++ b/test/bench.hpp
@@ -229,20 +229,32 @@ void testSquareRoot()
 void testLagrange()
 {
 	puts("testLagrange");
-	const int k = 7;
-	Fr c[k], x[k], y[k];
+	const int n = 5;
+	const int k = 3;
+	Fr c[k];
+	Fr x[n], y[n];
 	for (size_t i = 0; i < k; i++) {
 		c[i].setByCSPRNG();
-		x[i].setByCSPRNG();
 	}
-	for (size_t i = 0; i < k; i++) {
+	for (size_t i = 0; i < n; i++) {
+		x[i].setByCSPRNG();
 		mcl::evaluatePolynomial(y[i], c, k, x[i]);
 	}
-	Fr s;
-	mcl::LagrangeInterpolation(s, x, y, k);
-	CYBOZU_TEST_EQUAL(s, c[0]);
-	mcl::LagrangeInterpolation(s, x, y, 1);
-	CYBOZU_TEST_EQUAL(s, y[0]);
-	mcl::evaluatePolynomial(y[0], c, 1, x[0]);
-	CYBOZU_TEST_EQUAL(y[0], c[0]);
+	Fr xs[k], ys[k];
+	for (int i0 = 0; i0 < n; i0++) {
+		xs[0] = x[i0];
+		ys[0] = y[i0];
+		for (int i1 = i0 + 1; i1 < n; i1++) {
+			xs[1] = x[i1];
+			ys[1] = y[i1];
+			for (int i2 = i1 + 1; i2 < n; i2++) {
+				xs[2] = x[i2];
+				ys[2] = y[i2];
+				Fr s;
+				s.clear();
+				mcl::LagrangeInterpolation(s, xs, ys, k);
+				CYBOZU_TEST_EQUAL(s, c[0]);
+			}
+		}
+	}
 }

From b389732ba8af75aaf012b18bcfe95c384e053f42 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Fri, 26 Mar 2021 20:57:58 +0900
Subject: [PATCH 547/553] [cs][skip cs] pass test of Fr::Recover

---
 ffi/cs/mcl/mcl.cs   | 4 ++--
 ffi/cs/test/test.cs | 8 +-------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 0ca1a391..704f9c13 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -121,8 +121,8 @@ public class MCL {
         [DllImport(dllName)] public static extern ulong mclBnG1_deserialize(ref G1 x, [In]byte[] buf, ulong bufSize);
         [DllImport(dllName)] public static extern ulong mclBnG2_deserialize(ref G2 x, [In]byte[] buf, ulong bufSize);
 
-        [DllImport(dllName)] public static extern int mclBn_FrEvaluatePolynomial(ref Fr z, in Fr[] cVec, ulong cSize, in Fr x);
-        [DllImport(dllName)] public static extern int mclBn_FrLagrangeInterpolation(ref Fr z, in Fr[] xVec, in Fr[] yVec, ulong k);
+        [DllImport(dllName)] public static extern int mclBn_FrEvaluatePolynomial(ref Fr z, [In]Fr[] cVec, ulong cSize, in Fr x);
+        [DllImport(dllName)] public static extern int mclBn_FrLagrangeInterpolation(ref Fr z, [In]Fr[] xVec, [In]Fr[] yVec, ulong k);
         public static void Init(int curveType = BN254)
         {
             if (!System.Environment.Is64BitProcess) {
diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
index 6d6e63fd..115cbafa 100644
--- a/ffi/cs/test/test.cs
+++ b/ffi/cs/test/test.cs
@@ -293,9 +293,8 @@ static void TestSS()
             Fr[] cVec = new Fr[k];
             // init polynomial
             for (int i = 0; i < k; i++) {
-                //                cVec[i].SetByCSPRNG();
+                cVec[i].SetByCSPRNG();
                 cVec[i].SetInt(i + 1);
-                Console.WriteLine("cVec[" + i + "]=" + cVec[i].GetStr(10));
             }
 
             Fr[] xVec = new Fr[n]; // user id
@@ -303,14 +302,11 @@ static void TestSS()
             // share cVec[0] with yVec[0], ..., yVec[n-1]
             for (int i = 0; i < n; i++) {
                 xVec[i].SetInt(i + 2); // non zero value
-                Console.WriteLine("x=" + xVec[i].GetStr(10));
                 MCL.Share(ref yVec[i], cVec, xVec[i]);
-                Console.WriteLine("y=" + yVec[i].GetStr(10));
             }
             // recover cVec[0] from xVecSubset and yVecSubset
             Fr[] xVecSubset = new Fr[k];
             Fr[] yVecSubset = new Fr[k];
-            Console.WriteLine("cVec[0]=" + cVec[0].GetStr(10));
             for (int i0 = 0; i0 < n; i0++) {
                 xVecSubset[0] = xVec[i0];
                 yVecSubset[0] = yVec[i0];
@@ -322,8 +318,6 @@ static void TestSS()
                         yVecSubset[2] = yVec[i2];
                         Fr s = new Fr();
                         MCL.Recover(ref s, xVecSubset, yVecSubset);
-                        Console.WriteLine("x=" + i0 + ", " + i1 + ", " + i2);
-                        Console.WriteLine("s=" + s.GetStr(10));
                         assert("Recover", s.Equals(cVec[0]));
                     }
                 }

From 5a24245d0eab4a3b5e0488d3b59fc3d567922301 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 27 Mar 2021 09:27:50 +0900
Subject: [PATCH 548/553] [cs][skip cs] add G1, G2::Recover/Share

---
 ffi/cs/mcl/mcl.cs   | 68 ++++++++++++++++++++++++++++-----
 ffi/cs/test/test.cs | 93 ++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 146 insertions(+), 15 deletions(-)

diff --git a/ffi/cs/mcl/mcl.cs b/ffi/cs/mcl/mcl.cs
index 704f9c13..cf04071b 100644
--- a/ffi/cs/mcl/mcl.cs
+++ b/ffi/cs/mcl/mcl.cs
@@ -72,7 +72,7 @@ public class MCL {
         [DllImport(dllName)] public static extern void mclBnG1_add(ref G1 z, in G1 x, in G1 y);
         [DllImport(dllName)] public static extern void mclBnG1_sub(ref G1 z, in G1 x, in G1 y);
         [DllImport(dllName)] public static extern void mclBnG1_mul(ref G1 z, in G1 x, in Fr y);
-        [DllImport(dllName)] public static extern void mclBnG1_mulVec(ref G1 z, [In]G1[] x, [In]Fr[] y, long n);
+        [DllImport(dllName)] public static extern void mclBnG1_mulVec(ref G1 z, [In] G1[] x, [In] Fr[] y, long n);
 
         [DllImport(dllName)] public static extern void mclBnG2_clear(ref G2 x);
         [DllImport(dllName)] public static extern int mclBnG2_setStr(ref G2 x, [In][MarshalAs(UnmanagedType.LPStr)] string buf, long bufSize, int ioMode);
@@ -114,15 +114,19 @@ public class MCL {
         [DllImport(dllName)] public static extern int mclBn_getFpByteSize();
         [DllImport(dllName)] public static extern ulong mclBnFp_serialize([Out] byte[] buf, ulong maxBufSize, in Fp x);
         [DllImport(dllName)] public static extern ulong mclBnFr_serialize([Out] byte[] buf, ulong maxBufSize, in Fr x);
-        [DllImport(dllName)] public static extern ulong mclBnG1_serialize([Out]byte[] buf, ulong maxBufSize, in G1 x);
-        [DllImport(dllName)] public static extern ulong mclBnG2_serialize([Out]byte[] buf, ulong maxBufSize, in G2 x);
-        [DllImport(dllName)] public static extern ulong mclBnFr_deserialize(ref Fr x, [In]byte[] buf, ulong bufSize);
-        [DllImport(dllName)] public static extern ulong mclBnFp_deserialize(ref Fp x, [In]byte[] buf, ulong bufSize);
-        [DllImport(dllName)] public static extern ulong mclBnG1_deserialize(ref G1 x, [In]byte[] buf, ulong bufSize);
-        [DllImport(dllName)] public static extern ulong mclBnG2_deserialize(ref G2 x, [In]byte[] buf, ulong bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnG1_serialize([Out] byte[] buf, ulong maxBufSize, in G1 x);
+        [DllImport(dllName)] public static extern ulong mclBnG2_serialize([Out] byte[] buf, ulong maxBufSize, in G2 x);
+        [DllImport(dllName)] public static extern ulong mclBnFr_deserialize(ref Fr x, [In] byte[] buf, ulong bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnFp_deserialize(ref Fp x, [In] byte[] buf, ulong bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnG1_deserialize(ref G1 x, [In] byte[] buf, ulong bufSize);
+        [DllImport(dllName)] public static extern ulong mclBnG2_deserialize(ref G2 x, [In] byte[] buf, ulong bufSize);
 
-        [DllImport(dllName)] public static extern int mclBn_FrEvaluatePolynomial(ref Fr z, [In]Fr[] cVec, ulong cSize, in Fr x);
-        [DllImport(dllName)] public static extern int mclBn_FrLagrangeInterpolation(ref Fr z, [In]Fr[] xVec, [In]Fr[] yVec, ulong k);
+        [DllImport(dllName)] public static extern int mclBn_FrEvaluatePolynomial(ref Fr z, [In] Fr[] cVec, ulong cSize, in Fr x);
+        [DllImport(dllName)] public static extern int mclBn_G1EvaluatePolynomial(ref G1 z, [In] G1[] cVec, ulong cSize, in Fr x);
+        [DllImport(dllName)] public static extern int mclBn_G2EvaluatePolynomial(ref G2 z, [In] G2[] cVec, ulong cSize, in Fr x);
+        [DllImport(dllName)] public static extern int mclBn_FrLagrangeInterpolation(ref Fr z, [In] Fr[] xVec, [In] Fr[] yVec, ulong k);
+        [DllImport(dllName)] public static extern int mclBn_G1LagrangeInterpolation(ref G1 z, [In] Fr[] xVec, [In] G1[] yVec, ulong k);
+        [DllImport(dllName)] public static extern int mclBn_G2LagrangeInterpolation(ref G2 z, [In] Fr[] xVec, [In] G2[] yVec, ulong k);
         public static void Init(int curveType = BN254)
         {
             if (!System.Environment.Is64BitProcess) {
@@ -312,6 +316,22 @@ public static void Share(ref Fr y, in Fr[] cVec, in Fr x)
                 throw new ArgumentException("mclBn_FrEvaluatePolynomial");
             }
         }
+        public static void Share(ref G1 y, in G1[] cVec, in Fr x)
+        {
+            ulong k = (ulong)cVec.Length;
+            int ret = mclBn_G1EvaluatePolynomial(ref y, cVec, k, x);
+            if (ret != 0) {
+                throw new ArgumentException("mclBn_G1EvaluatePolynomial");
+            }
+        }
+        public static void Share(ref G2 y, in G2[] cVec, in Fr x)
+        {
+            ulong k = (ulong)cVec.Length;
+            int ret = mclBn_G2EvaluatePolynomial(ref y, cVec, k, x);
+            if (ret != 0) {
+                throw new ArgumentException("mclBn_G2EvaluatePolynomial");
+            }
+        }
         // recover z by Lagrange interpolation with {xVec[i], yVec[i]}
         public static void Recover(ref Fr z, in Fr[] xVec, in Fr[] yVec)
         {
@@ -324,6 +344,28 @@ public static void Recover(ref Fr z, in Fr[] xVec, in Fr[] yVec)
                 throw new ArgumentException("mclBn_FrLagrangeInterpolation:" + ret.ToString());
             }
         }
+        public static void Recover(ref G1 z, in Fr[] xVec, in G1[] yVec)
+        {
+            if (xVec.Length != yVec.Length) {
+                throw new ArgumentException("bad length");
+            }
+            ulong k = (ulong)xVec.Length;
+            int ret = mclBn_G1LagrangeInterpolation(ref z, xVec, yVec, k);
+            if (ret != 0) {
+                throw new ArgumentException("mclBn_G1LagrangeInterpolation:" + ret.ToString());
+            }
+        }
+        public static void Recover(ref G2 z, in Fr[] xVec, in G2[] yVec)
+        {
+            if (xVec.Length != yVec.Length) {
+                throw new ArgumentException("bad length");
+            }
+            ulong k = (ulong)xVec.Length;
+            int ret = mclBn_G2LagrangeInterpolation(ref z, xVec, yVec, k);
+            if (ret != 0) {
+                throw new ArgumentException("mclBn_G2LagrangeInterpolation:" + ret.ToString());
+            }
+        }
         [StructLayout(LayoutKind.Sequential)]
         struct U128 {
             private ulong v0, v1;
@@ -660,6 +702,10 @@ public void HashAndMapTo(String s)
                     throw new ArgumentException("mclBnG1_hashAndMapTo:" + s);
                 }
             }
+            public void SetHashOf(String s)
+            {
+                HashAndMapTo(s);
+            }
             public string GetStr(int ioMode)
             {
                 StringBuilder sb = new StringBuilder(1024);
@@ -770,6 +816,10 @@ public void HashAndMapTo(String s)
                     throw new ArgumentException("mclBnG2_hashAndMapTo:" + s);
                 }
             }
+            public void SetHashOf(String s)
+            {
+                HashAndMapTo(s);
+            }
             public string GetStr(int ioMode)
             {
                 StringBuilder sb = new StringBuilder(1024);
diff --git a/ffi/cs/test/test.cs b/ffi/cs/test/test.cs
index 115cbafa..b41aab42 100644
--- a/ffi/cs/test/test.cs
+++ b/ffi/cs/test/test.cs
@@ -10,7 +10,7 @@ static void assert(string msg, bool b)
             Console.WriteLine("ERR {0}", msg);
             err++;
         }
-        static void Main(string[] args)
+        static void Main()
         {
             err = 0;
             try {
@@ -286,22 +286,21 @@ static void TestETH()
         {
             TestETH_mapToG1();
         }
-        static void TestSS()
+        static void TestSS_Fr()
         {
             const int n = 5;
             const int k = 3; // can't change because the following loop
             Fr[] cVec = new Fr[k];
-            // init polynomial
+            // init polynomial coefficient
             for (int i = 0; i < k; i++) {
                 cVec[i].SetByCSPRNG();
-                cVec[i].SetInt(i + 1);
             }
 
-            Fr[] xVec = new Fr[n]; // user id
+            Fr[] xVec = new Fr[n];
             Fr[] yVec = new Fr[n];
             // share cVec[0] with yVec[0], ..., yVec[n-1]
             for (int i = 0; i < n; i++) {
-                xVec[i].SetInt(i + 2); // non zero value
+                xVec[i].SetHashOf(i.ToString());
                 MCL.Share(ref yVec[i], cVec, xVec[i]);
             }
             // recover cVec[0] from xVecSubset and yVecSubset
@@ -323,5 +322,87 @@ static void TestSS()
                 }
             }
         }
+        static void TestSS_G1()
+        {
+            const int n = 5;
+            const int k = 3; // can't change because the following loop
+            G1[] cVec = new G1[k];
+            // init polynomial coefficient
+            for (int i = 0; i < k; i++) {
+                Fr x = new Fr();
+                x.SetByCSPRNG();
+                cVec[i].SetHashOf(x.GetStr(16));
+            }
+
+            Fr[] xVec = new Fr[n];
+            G1[] yVec = new G1[n];
+            // share cVec[0] with yVec[0], ..., yVec[n-1]
+            for (int i = 0; i < n; i++) {
+                xVec[i].SetHashOf(i.ToString());
+                MCL.Share(ref yVec[i], cVec, xVec[i]);
+            }
+            // recover cVec[0] from xVecSubset and yVecSubset
+            Fr[] xVecSubset = new Fr[k];
+            G1[] yVecSubset = new G1[k];
+            for (int i0 = 0; i0 < n; i0++) {
+                xVecSubset[0] = xVec[i0];
+                yVecSubset[0] = yVec[i0];
+                for (int i1 = i0 + 1; i1 < n; i1++) {
+                    xVecSubset[1] = xVec[i1];
+                    yVecSubset[1] = yVec[i1];
+                    for (int i2 = i1 + 1; i2 < n; i2++) {
+                        xVecSubset[2] = xVec[i2];
+                        yVecSubset[2] = yVec[i2];
+                        G1 s = new G1();
+                        MCL.Recover(ref s, xVecSubset, yVecSubset);
+                        assert("Recover", s.Equals(cVec[0]));
+                    }
+                }
+            }
+        }
+        static void TestSS_G2()
+        {
+            const int n = 5;
+            const int k = 3; // can't change because the following loop
+            G2[] cVec = new G2[k];
+            // init polynomial coefficient
+            for (int i = 0; i < k; i++) {
+                Fr x = new Fr();
+                x.SetByCSPRNG();
+                cVec[i].SetHashOf(x.GetStr(16));
+            }
+
+            Fr[] xVec = new Fr[n];
+            G2[] yVec = new G2[n];
+            // share cVec[0] with yVec[0], ..., yVec[n-1]
+            for (int i = 0; i < n; i++) {
+                xVec[i].SetHashOf(i.ToString());
+                MCL.Share(ref yVec[i], cVec, xVec[i]);
+            }
+            // recover cVec[0] from xVecSubset and yVecSubset
+            Fr[] xVecSubset = new Fr[k];
+            G2[] yVecSubset = new G2[k];
+            for (int i0 = 0; i0 < n; i0++) {
+                xVecSubset[0] = xVec[i0];
+                yVecSubset[0] = yVec[i0];
+                for (int i1 = i0 + 1; i1 < n; i1++) {
+                    xVecSubset[1] = xVec[i1];
+                    yVecSubset[1] = yVec[i1];
+                    for (int i2 = i1 + 1; i2 < n; i2++) {
+                        xVecSubset[2] = xVec[i2];
+                        yVecSubset[2] = yVec[i2];
+                        G2 s = new G2();
+                        MCL.Recover(ref s, xVecSubset, yVecSubset);
+                        assert("Recover", s.Equals(cVec[0]));
+                    }
+                }
+            }
+        }
+        static void TestSS()
+        {
+            TestSS_Fr();
+            TestSS_G1();
+            TestSS_G2();
+        }
     }
 }

From e15ab2a649f3432468b2617baf176eeb402f4dfd Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 27 Mar 2021 12:47:44 +0900
Subject: [PATCH 549/553] [doc][skip cs] about C#

---
 readme.md | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/readme.md b/readme.md
index 1c8dab5d..193ac302 100644
--- a/readme.md
+++ b/readme.md
@@ -108,29 +108,36 @@ make ARCH=x86 CFLAGS_USER="-I <lib32>/include" LDFLAGS_USER="-L <lib32>/lib -Wl,
 
 # How to build on 64-bit Windows with Visual Studio
 
-Clone cybozulib\_ext,
-which provides compiled binaries of [MPIR](http://mpir.org/).
-
+Open a console window, and
 ```
-mkdir work
-cd work
-git clone git://github.com/herumi/mcl
-git clone git://github.com/herumi/cybozulib_ext
-cd work
+git clone https://github.com/herumi/mcl
+cd mcl
+
 # static library
 mklib
 mk -s test\bls12_test.cpp && bin\bls12_test.exe
+
 # dynamic library
 mklib dll
 mk -d test\bls12_test.cpp && bin\bls12_test.exe
 ```
-
 (not maintenanced)
 Open mcl.sln and build or if you have msbuild.exe
 ```
 msbuild /p:Configuration=Release
 ```
 
+# C# test
+
+```
+cd mcl
+mklib dll
+cd ffi/cs
+dotnet build mcl.sln
+cd ../../bin
+../ffi/cs/test/bin/Debug/netcoreapp3.1/test.exe
+```
+
 # How to build with CMake
 
 For Linux, macOS, etc.

From 5304c1f5b38dae85094f917a710356e3d14b7c70 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 3 Apr 2021 09:23:08 +0900
Subject: [PATCH 550/553] remove unused header

---
 src/fp_generator.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index e902ebd3..598c8fdb 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -12,7 +12,6 @@
 #include <stdio.h>
 #include <assert.h>
 #include <cybozu/exception.hpp>
-#include <cybozu/array.hpp>
 
 #ifdef _MSC_VER
 	#pragma warning(push)

From 3f9cce874188e742eefb9c32435efc6f4d33ecf1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 3 Apr 2021 09:26:24 +0900
Subject: [PATCH 551/553] don't use exception in xbyak

---
 src/fp.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fp.cpp b/src/fp.cpp
index c43abebd..4273e3fe 100644
--- a/src/fp.cpp
+++ b/src/fp.cpp
@@ -12,6 +12,7 @@
 
 #ifdef MCL_USE_XBYAK
 	#define XBYAK_DISABLE_AVX512
+	#define XBYAK_NO_EXCEPTION
 #else
 	#define XBYAK_ONLY_CLASS_CPU
 #endif

From c4a356038acde549d8a58ddae532fac10c6f74a1 Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Sat, 10 Apr 2021 18:02:23 +0900
Subject: [PATCH 552/553] remove non-mulx-code in jit

---
 src/fp_generator.hpp | 565 ++++++++++---------------------------------
 1 file changed, 134 insertions(+), 431 deletions(-)

diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp
index 598c8fdb..2d3c51dd 100644
--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@@ -588,58 +588,34 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			mov(ptr [pz + 8], rax);
 			return;
 		}
-		if (useMulx_) {
-			assert(wk.size() > 0 && wk.isReg(0));
-			const Reg64& t1 = wk.getReg(0);
-			// mulx(H, L, x) = [H:L] = x * rdx
-			mov(rdx, y);
-			mulx(t1, rax, ptr [px]); // [y:rax] = px * y
-			mov(ptr [pz], rax);
-			const Reg64 *pt0 = &t;
-			const Reg64 *pt1 = &t1;
-			for (size_t i = 1; i < n - 1; i++) {
-				mulx(*pt0, rax, ptr [px + i * 8]);
-				if (i == 1) {
-					add(rax, *pt1);
-				} else {
-					adc(rax, *pt1);
-				}
-				mov(ptr [pz + i * 8], rax);
-				std::swap(pt0, pt1);
-			}
-			mulx(rdx, rax, ptr [px + (n - 1) * 8]);
-			adc(rax, *pt1);
-			mov(ptr [pz + (n - 1) * 8], rax);
-			adc(rdx, 0);
-			return;
-		}
-		assert(wk.size() >= n - 1);
-		for (size_t i = 0; i < n; i++) {
-			mov(rax, ptr [px + i * 8]);
-			mul(y);
-			if (i < n - 1) {
-				mov(ptr [pz + i * 8], rax);
-				g_mov(wk[i], rdx);
-			}
-		}
+		assert(wk.size() > 0 && wk.isReg(0));
+		const Reg64& t1 = wk.getReg(0);
+		// mulx(H, L, x) = [H:L] = x * rdx
+		mov(rdx, y);
+		mulx(t1, rax, ptr [px]); // [y:rax] = px * y
+		mov(ptr [pz], rax);
+		const Reg64 *pt0 = &t;
+		const Reg64 *pt1 = &t1;
 		for (size_t i = 1; i < n - 1; i++) {
-			mov(t, ptr [pz + i * 8]);
+			mulx(*pt0, rax, ptr [px + i * 8]);
 			if (i == 1) {
-				g_add(t, wk[i - 1]);
+				add(rax, *pt1);
 			} else {
-				g_adc(t, wk[i - 1]);
+				adc(rax, *pt1);
 			}
-			mov(ptr [pz + i * 8], t);
+			mov(ptr [pz + i * 8], rax);
+			std::swap(pt0, pt1);
 		}
-		g_adc(rax, wk[n - 2]);
+		mulx(rdx, rax, ptr [px + (n - 1) * 8]);
+		adc(rax, *pt1);
 		mov(ptr [pz + (n - 1) * 8], rax);
 		adc(rdx, 0);
 	}
 	void gen_mulUnit()
 	{
 //		assert(pn_ >= 2);
-		const int regNum = useMulx_ ? 2 : (1 + (std::min)(pn_ - 1, 8));
-		const int stackSize = useMulx_ ? 0 : (pn_ - 1) * 8;
+		const int regNum = 2;
+		const int stackSize = 0;
 		StackFrame sf(this, 3, regNum | UseRDX, stackSize);
 		const Reg64& pz = sf.p[0];
 		const Reg64& px = sf.p[1];
@@ -870,7 +846,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			gen_montMul4();
 			return func;
 		}
-		if (pn_ == 6 && !isFullBit_ && useMulx_ && useAdx_) {
+		if (pn_ == 6 && !isFullBit_) {
 #if 1
 			// a little faster
 			gen_montMul6();
@@ -907,7 +883,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& t0 = sf.t[0];
 		const Reg64& t1 = sf.t[1];
 		const Reg64& t2 = sf.t[2];
-		const Reg64& t3 = sf.t[3];
 		const Reg64& t4 = sf.t[4];
 		const Reg64& t5 = sf.t[5];
 		const Reg64& t6 = sf.t[6];
@@ -925,7 +900,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t7, a); // q
 
 		// [d:t7:t1] = p * q
-		mul2x1(t0, t7, t1, t8);
+		mul2x1(t0, t7, t1);
 
 		xor_(t8, t8);
 		if (isFullBit_) {
@@ -944,7 +919,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t6, a); // q
 
 		// [d:t6:xy] = p * q
-		mul2x1(t0, t6, xy, t3);
+		mul2x1(t0, t6, xy);
 
 		add_rr(Pack(t8, t4, t7), Pack(d, t6, xy));
 		// [t8:t4]
@@ -994,7 +969,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t7, a); // q
 
 		// [d:t7:t2:t1] = p * q
-		mul3x1(t0, t7, t4, t2, t1, t8);
+		mul3x1(t0, t7, t2, t1, t8);
 
 		xor_(t8, t8);
 		xor_(t9, t9);
@@ -1014,7 +989,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t10, a); // q
 
 		// [d:t10:t6:xy] = p * q
-		mul3x1(t0, t10, t1, t6, xy, t3);
+		mul3x1(t0, t10, t6, xy, t3);
 
 		add_rr(Pack(t8, t4, t7, t2), Pack(d, t10, t6, xy));
 		adc(t9, 0); // [t9:t8:t4:t7]
@@ -1027,7 +1002,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t10, a); // q
 
 		// [d:t10:xy:t6] = p * q
-		mul3x1(t0, t10, t1, xy, t6, t2);
+		mul3x1(t0, t10, xy, t6, t2);
 
 		add_rr(Pack(t9, t8, t4, t7), Pack(d, t10, xy, t6));
 		// [t9:t8:t4]
@@ -1095,7 +1070,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void gen_fpDbl_mod4(const Reg64& z, const Reg64& xy, const Pack& t)
 	{
-		if (!isFullBit_ && useMulx_ && useAdx_) {
+		if (!isFullBit_) {
 			gen_fpDbl_mod4NF(z, xy, t);
 			return;
 		}
@@ -1123,7 +1098,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t7, a); // q
 
 		// [d:t7:t3:t2:t1] = p * q
-		mul4x1(t0, t7, t4, t3, t2, t1, t8);
+		mul4x1(t0, t7, t3, t2, t1);
 
 		xor_(t8, t8);
 		xor_(t9, t9);
@@ -1152,7 +1127,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 
 		vmovq(xm1, t10);
 		// [d:z:t5:t6:xy] = p * q
-		mul4x1(t0, z, t1, t5, t6, xy, t10);
+		mul4x1(t0, z, t5, t6, xy);
 		vmovq(t10, xm1);
 
 		add_rr(Pack(t8, t4, t7, t3, t2), Pack(d, z, t5, t6, xy));
@@ -1171,7 +1146,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(z, a); // q
 
 		// [d:z:t5:xy:t6] = p * q
-		mul4x1(t0, z, t1, t5, xy, t6, t2);
+		mul4x1(t0, z, t5, xy, t6);
 
 		add_rr(Pack(t9, t8, t4, t7, t3), Pack(d, z, t5, xy, t6));
 		adc(t10, 0); // c' = [t10:t9:t8:t4:t7]
@@ -1187,7 +1162,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(z, a); // q
 
 		// [d:z:t5:xy:t6] = p * q
-		mul4x1(t0, z, t1, t5, xy, t6, t2);
+		mul4x1(t0, z, t5, xy, t6);
 
 		add_rr(Pack(t10, t9, t8, t4, t7), Pack(d, z, t5, xy, t6));
 		// [t10:t9:t8:t4]
@@ -1242,7 +1217,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			ret();
 			return func;
 		}
-		if (pn_ == 6 && !isFullBit_ && useMulx_ && useAdx_) {
+		if (pn_ == 6 && !isFullBit_) {
 			StackFrame sf(this, 3, 10 | UseRDX, 0, false);
 			call(fpDbl_modL);
 			sf.close();
@@ -1270,7 +1245,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 			gen_montSqr3();
 			return func;
 		}
-		if (pn_ == 4 && useMulx_) {
+		if (pn_ == 4) {
 #if 0
 			// sqr(y, x) = mul(y, x, x)
 #ifdef XBYAK64_WIN
@@ -1290,7 +1265,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 #endif
 			return func;
 		}
-		if (pn_ == 6 && !isFullBit_ && useMulx_ && useAdx_) {
+		if (pn_ == 6 && !isFullBit_) {
 #if 1
 			StackFrame sf(this, 3, 10 | UseRDX);
 			Pack t = sf.t;
@@ -1352,27 +1327,25 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& t5 = sf.t[5];
 		const Reg64& t6 = sf.t[6];
 		const Reg64& t7 = sf.t[7];
-		const Reg64& t8 = sf.t[8];
-		const Reg64& t9 = sf.t[9];
 
 	L(fp_mulL);
 		vmovq(xm0, p0); // save p0
 		lea(p0, ptr[rip+pL_]);
 		vmovq(xm1, p2);
 		mov(p2, ptr [p2]);
-		montgomery4_1(rp_, t0, t7, t3, t2, t1, p1, p2, p0, t4, t5, t6, t8, t9, true, xm2);
+		montgomery4_1(rp_, t0, t7, t3, t2, t1, p1, p2, p0, t4, t5, t6, true, xm2);
 
 		vmovq(p2, xm1);
 		mov(p2, ptr [p2 + 8]);
-		montgomery4_1(rp_, t1, t0, t7, t3, t2, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2);
+		montgomery4_1(rp_, t1, t0, t7, t3, t2, p1, p2, p0, t4, t5, t6, false, xm2);
 
 		vmovq(p2, xm1);
 		mov(p2, ptr [p2 + 16]);
-		montgomery4_1(rp_, t2, t1, t0, t7, t3, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2);
+		montgomery4_1(rp_, t2, t1, t0, t7, t3, p1, p2, p0, t4, t5, t6, false, xm2);
 
 		vmovq(p2, xm1);
 		mov(p2, ptr [p2 + 24]);
-		montgomery4_1(rp_, t3, t2, t1, t0, t7, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2);
+		montgomery4_1(rp_, t3, t2, t1, t0, t7, p1, p2, p0, t4, t5, t6, false, xm2);
 		// [t7:t3:t2:t1:t0]
 
 		mov(t4, t0);
@@ -1475,7 +1448,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void gen_montMul6()
 	{
-		assert(!isFullBit_ && useMulx_ && useAdx_);
+		assert(!isFullBit_);
 		StackFrame sf(this, 3, 10 | UseRDX, 0, false);
 		call(fp_mulL);
 		sf.close();
@@ -1543,12 +1516,12 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		lea(t7, ptr[rip+pL_]);
 		mov(t9, ptr [p2]);
 		//                c3, c2, c1, c0, px, y,  p,
-		montgomery3_1(rp_, t0, t3, t2, t1, p1, t9, t7, t4, t5, t6, t8, p0, true);
+		montgomery3_1(rp_, t0, t3, t2, t1, p1, t9, t7, t4, t5, t8, p0, true);
 		mov(t9, ptr [p2 + 8]);
-		montgomery3_1(rp_, t1, t0, t3, t2, p1, t9, t7, t4, t5, t6, t8, p0, false);
+		montgomery3_1(rp_, t1, t0, t3, t2, p1, t9, t7, t4, t5, t8, p0, false);
 
 		mov(t9, ptr [p2 + 16]);
-		montgomery3_1(rp_, t2, t1, t0, t3, p1, t9, t7, t4, t5, t6, t8, p0, false);
+		montgomery3_1(rp_, t2, t1, t0, t3, p1, t9, t7, t4, t5, t8, p0, false);
 
 		// [(t3):t2:t1:t0]
 		mov(t4, t0);
@@ -1590,19 +1563,19 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t9, ptr [px]);
 		mul3x1_sqr1(px, t9, t3, t2, t1, t0);
 		mov(t0, rdx);
-		montgomery3_sub(rp_, t0, t9, t2, t1, px, t3, t7, t4, t5, t6, t8, pz, true);
+		montgomery3_sub(rp_, t0, t9, t2, t1, px, t3, t7, t4, t5, t8, pz, true);
 
 		mov(t3, ptr [px + 8]);
 		mul3x1_sqr2(px, t3, t6, t5, t4);
 		add_rr(Pack(t1, t0, t9, t2), Pack(rdx, rax, t5, t4));
 		if (isFullBit_) setc(pz.cvt8());
-		montgomery3_sub(rp_, t1, t3, t9, t2, px, t0, t7, t4, t5, t6, t8, pz, false);
+		montgomery3_sub(rp_, t1, t3, t9, t2, px, t0, t7, t4, t5, t8, pz, false);
 
 		mov(t0, ptr [px + 16]);
 		mul3x1_sqr3(t0, t5, t4);
 		add_rr(Pack(t2, t1, t3, t9), Pack(rdx, rax, t5, t4));
 		if (isFullBit_) setc(pz.cvt8());
-		montgomery3_sub(rp_, t2, t0, t3, t9, px, t1, t7, t4, t5, t6, t8, pz, false);
+		montgomery3_sub(rp_, t2, t0, t3, t9, px, t1, t7, t4, t5, t8, pz, false);
 
 		// [t9:t2:t0:t3]
 		mov(t4, t3);
@@ -1636,58 +1609,25 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& t9 = t[9];
 		const Reg64& t10 = t[10];
 
-		if (useMulx_) {
-			mov(d, ptr [px + 8 * 0]);
-			mulx(t0, a, d);
-			mov(ptr [py + 8 * 0], a);
+		mov(d, ptr [px + 8 * 0]);
+		mulx(t0, a, d);
+		mov(ptr [py + 8 * 0], a);
 
-			mov(t7, ptr [px + 8 * 1]);
-			mov(t9, ptr [px + 8 * 2]);
-			mulx(t2, t1, t7);
-			mulx(t4, t3, t9);
+		mov(t7, ptr [px + 8 * 1]);
+		mov(t9, ptr [px + 8 * 2]);
+		mulx(t2, t1, t7);
+		mulx(t4, t3, t9);
 
-			mov(t5, t2);
-			mov(t6, t4);
+		mov(t5, t2);
+		mov(t6, t4);
 
-			add(t0, t1);
-			adc(t5, t3);
-			adc(t6, 0); // [t6:t5:t0]
+		add(t0, t1);
+		adc(t5, t3);
+		adc(t6, 0); // [t6:t5:t0]
 
-			mov(d, t7);
-			mulx(t8, t7, d);
-			mulx(t10, t9, t9);
-		} else {
-			mov(t9, ptr [px + 8 * 0]);
-			mov(a, t9);
-			mul(t9);
-			mov(ptr [py + 8 * 0], a);
-			mov(t0, d);
-			mov(a, ptr [px + 8 * 1]);
-			mul(t9);
-			mov(t1, a);
-			mov(t2, d);
-			mov(a, ptr [px + 8 * 2]);
-			mul(t9);
-			mov(t3, a);
-			mov(t4, d);
-
-			mov(t5, t2);
-			mov(t6, t4);
-
-			add(t0, t1);
-			adc(t5, t3);
-			adc(t6, 0); // [t6:t5:t0]
-
-			mov(t9, ptr [px + 8 * 1]);
-			mov(a, t9);
-			mul(t9);
-			mov(t7, a);
-			mov(t8, d);
-			mov(a, ptr [px + 8 * 2]);
-			mul(t9);
-			mov(t9, a);
-			mov(t10, d);
-		}
+		mov(d, t7);
+		mulx(t8, t7, d);
+		mulx(t10, t9, t9);
 		add(t2, t7);
 		adc(t8, t9);
 		mov(t7, t10);
@@ -1738,21 +1678,10 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(d, ptr [px]);
 		mulx(pd[0], a, ptr [py + 8 * 0]);
 		mov(ptr [pz + 8 * 0], a);
-		if (useAdx_) {
-			xor_(a, a);
-			for (size_t i = 1; i < pd.size(); i++) {
-				mulx(pd[i], a, ptr [py + 8 * i]);
-				adcx(pd[i - 1], a);
-			}
-		} else {
-			for (size_t i = 1; i < pd.size(); i++) {
-				mulx(pd[i], a, ptr [py + 8 * i]);
-				if (i == 1) {
-					add(pd[i - 1], a);
-				} else {
-					adc(pd[i - 1], a);
-				}
-			}
+		xor_(a, a);
+		for (size_t i = 1; i < pd.size(); i++) {
+			mulx(pd[i], a, ptr [py + 8 * i]);
+			adcx(pd[i - 1], a);
 		}
 		adc(pd[pd.size() - 1], 0);
 	}
@@ -1823,58 +1752,34 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void mulPre3(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t)
 	{
-		const Reg64& a = rax;
 		const Reg64& d = rdx;
 		const Reg64& t0 = t[0];
 		const Reg64& t1 = t[1];
 		const Reg64& t2 = t[2];
-		const Reg64& t3 = t[3];
 		const Reg64& t4 = t[4];
 		const Reg64& t5 = t[5];
 		const Reg64& t6 = t[6];
-		const Reg64& t7 = t[7];
 		const Reg64& t8 = t[8];
 		const Reg64& t9 = t[9];
 
-		if (useMulx_) {
-			mulPack(pz, px, py, Pack(t2, t1, t0));
+		mulPack(pz, px, py, Pack(t2, t1, t0));
 #if 0 // a little slow
-			if (useAdx_) {
-				// [t2:t1:t0]
-				mulPackAdd(pz + 8 * 1, px + 8 * 1, py, t3, Pack(t2, t1, t0));
-				// [t3:t2:t1]
-				mulPackAdd(pz + 8 * 2, px + 8 * 2, py, t4, Pack(t3, t2, t1));
-				// [t4:t3:t2]
-				store_mr(pz + 8 * 3, Pack(t4, t3, t2));
-				return;
-			}
-#endif
-		} else {
-			mov(t5, ptr [px]);
-			mov(a, ptr [py + 8 * 0]);
-			mul(t5);
-			mov(ptr [pz + 8 * 0], a);
-			mov(t0, d);
-			mov(a, ptr [py + 8 * 1]);
-			mul(t5);
-			mov(t3, a);
-			mov(t1, d);
-			mov(a, ptr [py + 8 * 2]);
-			mul(t5);
-			mov(t4, a);
-			mov(t2, d);
-			add(t0, t3);
-			mov(t2, 0);
-			adc(t1, a);
-			adc(t2, d); // [t2:t1:t0:pz[0]] = px[0] * py[2..0]
+		if (useAdx_) {
+			// [t2:t1:t0]
+			mulPackAdd(pz + 8 * 1, px + 8 * 1, py, t3, Pack(t2, t1, t0));
+			// [t3:t2:t1]
+			mulPackAdd(pz + 8 * 2, px + 8 * 2, py, t4, Pack(t3, t2, t1));
+			// [t4:t3:t2]
+			store_mr(pz + 8 * 3, Pack(t4, t3, t2));
+			return;
 		}
-
+#endif
 		// here [t2:t1:t0]
 
 		mov(t9, ptr [px + 8]);
 
 		// [d:t9:t6:t5] = px[1] * py[2..0]
-		mul3x1(py, t9, t7, t6, t5, t4);
+		mul3x1(py, t9, t6, t5, t4);
 		add_rr(Pack(t2, t1, t0), Pack(t9, t6, t5));
 		adc(d, 0);
 		mov(t8, d);
@@ -1884,7 +1789,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t9, ptr [px + 16]);
 
 		// [d:t9:t5:t4]
-		mul3x1(py, t9, t6, t5, t4, t0);
+		mul3x1(py, t9, t5, t4, t0);
 		add_rr(Pack(t8, t2, t1), Pack(t9, t5, t4));
 		adc(d, 0);
 		store_mr(pz + 8 * 2, Pack(d, t8, t2, t1));
@@ -1909,7 +1814,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void sqr2(const Reg64& y3, const Reg64& y2, const Reg64& y1, const Reg64& y0, const Reg64& x1, const Reg64& x0, const Reg64& t1, const Reg64& t0)
 	{
-		assert(useMulx_);
 		mov(rdx, x0);
 		mulx(y1, y0, x0); // x0^2
 		mov(rdx, x1);
@@ -1928,7 +1832,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void mul2x2(const RegExp& px, const RegExp& py, const Reg64& t4, const Reg64& t3, const Reg64& t2, const Reg64& t1, const Reg64& t0)
 	{
-		assert(useMulx_);
 #if 0
 		// # of add is less, but a little slower
 		mov(t4, ptr [py + 8 * 0]);
@@ -2043,58 +1946,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void sqrPre4(const Reg64& py, const Reg64& px, const Pack& t)
 	{
-#if 1
-		if (useMulx_ && useAdx_) {
-			sqrPre4NF(py, px, t);
-			return;
-		}
-#endif
-		const Reg64& t0 = t[0];
-		const Reg64& t1 = t[1];
-		const Reg64& t2 = t[2];
-		const Reg64& t3 = t[3];
-		const Reg64& t4 = t[4];
-		const Reg64& t5 = t[5];
-		const Reg64& t6 = t[6];
-		const Reg64& t7 = t[7];
-		const Reg64& t8 = t[8];
-		const Reg64& t9 = t[9];
-		const Reg64& t10 = t[10];
-		const Reg64& a = rax;
-		const Reg64& d = rdx;
-
-		/*
-			(aN + b)^2 = a^2 N^2 + 2ab N + b^2
-		*/
-		load_rm(Pack(t9, t8), px);
-		sqr2(t3, t2, t1, t0, t9, t8, t7, t6);
-		// [t3:t2:t1:t0] = b^2
-		store_mr(py, Pack(t1, t0));
-		vmovq(xm0, t2);
-		mul2x2(px, px + 2 * 8, t6, t5, t4, t1, t0);
-		// [t5:t4:t1:t0] = ab
-		xor_(t6, t6);
-		add_rr(Pack(t6, t5, t4, t1, t0), Pack(t6, t5, t4, t1, t0));
-		// [t6:t5:t4:t1:t0] = 2ab
-		load_rm(Pack(t8, t7), px + 2 * 8);
-		// free t10, t9, rax, rdx
-		/*
-			[d:t8:t10:t9] = [t8:t7]^2
-		*/
-		mov(d, t7);
-		mulx(t10, t9, t7); // [t10:t9] = t7^2
-		mulx(t7, t2, t8); // [t7:t2] = t7 t8
-		xor_(a, a);
-		add_rr(Pack(a, t7, t2), Pack(a, t7, t2));
-		// [a:t7:t2] = 2 t7 t8
-		mov(d, t8);
-		mulx(d, t8, t8); // [d:t8] = t8^2
-		add_rr(Pack(d, t8, t10), Pack(a, t7, t2));
-		// [d:t8:t10:t9] = [t8:t7]^2
-		vmovq(t2, xm0);
-		add_rr(Pack(t8, t10, t9, t3, t2), Pack(t6, t5, t4, t1, t0));
-		adc(d, 0);
-		store_mr(py + 2 * 8, Pack(d, t8, t10, t9, t3, t2));
+		sqrPre4NF(py, px, t);
 	}
 	/*
 		(5, 5)(4, 4)(3, 3)(2, 2)(1, 1)(0, 0)
@@ -2221,7 +2073,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void mulPre4(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t)
 	{
-		const Reg64& a = rax;
 		const Reg64& d = rdx;
 		const Reg64& t0 = t[0];
 		const Reg64& t1 = t[1];
@@ -2235,14 +2086,12 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		const Reg64& t9 = t[9];
 
 #if 0 // a little slower
-		if (useMulx_ && useAdx_) {
-			mulPack(pz, px, py, Pack(t3, t2, t1, t0));
-			mulPackAdd(pz + 8 * 1, px + 8 * 1, py, t4, Pack(t3, t2, t1, t0));
-			mulPackAdd(pz + 8 * 2, px + 8 * 2, py, t0, Pack(t4, t3, t2, t1));
-			mulPackAdd(pz + 8 * 3, px + 8 * 3, py, t1, Pack(t0, t4, t3, t2));
-			store_mr(pz + 8 * 4, Pack(t1, t0, t4, t3));
-			return;
-		}
+		mulPack(pz, px, py, Pack(t3, t2, t1, t0));
+		mulPackAdd(pz + 8 * 1, px + 8 * 1, py, t4, Pack(t3, t2, t1, t0));
+		mulPackAdd(pz + 8 * 2, px + 8 * 2, py, t0, Pack(t4, t3, t2, t1));
+		mulPackAdd(pz + 8 * 3, px + 8 * 3, py, t1, Pack(t0, t4, t3, t2));
+		store_mr(pz + 8 * 4, Pack(t1, t0, t4, t3));
+		return;
 #endif
 #if 0
 		// a little slower
@@ -2266,37 +2115,14 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		adc(t7, 0);
 		store_mr(pz + 8 * 2, Pack(t7, t4, t3, t2, t1, t0));
 #else
-		if (useMulx_) {
-			mulPack(pz, px, py, Pack(t3, t2, t1, t0));
-		} else {
-			mov(t5, ptr [px]);
-			mov(a, ptr [py + 8 * 0]);
-			mul(t5);
-			mov(ptr [pz + 8 * 0], a);
-			mov(t0, d);
-			mov(a, ptr [py + 8 * 1]);
-			mul(t5);
-			mov(t3, a);
-			mov(t1, d);
-			mov(a, ptr [py + 8 * 2]);
-			mul(t5);
-			mov(t4, a);
-			mov(t2, d);
-			mov(a, ptr [py + 8 * 3]);
-			mul(t5);
-			add(t0, t3);
-			mov(t3, 0);
-			adc(t1, t4);
-			adc(t2, a);
-			adc(t3, d); // [t3:t2:t1:t0:pz[0]] = px[0] * py[3..0]
-		}
+		mulPack(pz, px, py, Pack(t3, t2, t1, t0));
 
 		// here [t3:t2:t1:t0]
 
 		mov(t9, ptr [px + 8]);
 
 		// [d:t9:t7:t6:t5] = px[1] * py[3..0]
-		mul4x1(py, t9, t8, t7, t6, t5, t4);
+		mul4x1(py, t9, t7, t6, t5);
 		add_rr(Pack(t3, t2, t1, t0), Pack(t9, t7, t6, t5));
 		adc(d, 0);
 		mov(t8, d);
@@ -2306,7 +2132,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t9, ptr [px + 16]);
 
 		// [d:t9:t6:t5:t4]
-		mul4x1(py, t9, t7, t6, t5, t4, t0);
+		mul4x1(py, t9, t6, t5, t4);
 		add_rr(Pack(t8, t3, t2, t1), Pack(t9, t6, t5, t4));
 		adc(d, 0);
 		mov(t7, d);
@@ -2315,7 +2141,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(t9, ptr [px + 24]);
 
 		// [d:t9:t5:t4:t1]
-		mul4x1(py, t9, t6, t5, t4, t1, t0);
+		mul4x1(py, t9, t5, t4, t1);
 		add_rr(Pack(t7, t8, t3, t2), Pack(t9, t5, t4, t1));
 		adc(d, 0);
 		store_mr(pz + 8 * 3, Pack(t7, t8, t3, t2));
@@ -2502,7 +2328,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void gen_fpDbl_sqrPre(void2u& f)
 	{
-		if (!(useMulx_ && useAdx_)) return;
 		void2u func = getCurr<void2u>();
 		switch (pn_) {
 		case 2:
@@ -2547,7 +2372,6 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	void gen_fpDbl_mulPre(void3u& f)
 	{
-		if (!useMulx_ || (pn_ == 6 && !useAdx_)) return;
 		void3u func = getCurr<void3u>();
 		switch (pn_) {
 		case 2:
@@ -3245,78 +3069,40 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	}
 	/*
 		[rdx:x:t0] <- py[1:0] * x
-		destroy x, t
+		destroy x, t0
 	*/
-	void mul2x1(const RegExp& py, const Reg64& x, const Reg64& t0, const Reg64& t)
+	void mul2x1(const RegExp& py, const Reg64& x, const Reg64& t0)
 	{
-		if (useMulx_) {
-			// mulx(H, L, x) = [H:L] = x * rdx
-			/*
-				rdx:x
-				   rax:t0
-			*/
-			mov(rdx, x);
-			mulx(rax, t0, ptr [py]); // [rax:t0] = py[0] * x
-			mulx(rdx, x, ptr [py + 8]); // [t:t1] = py[1] * x
-			add(x, rax);
-			adc(rdx, 0);
-		} else {
-			mov(rax, ptr [py]);
-			mul(x);
-			mov(t0, rax);
-			mov(t, rdx);
-			mov(rax, ptr [py + 8]);
-			mul(x);
-			/*
-				rdx:rax
-				     t:t0
-			*/
-			add(rax, t);
-			adc(rdx, 0);
-			mov(x, rax);
-		}
+		// mulx(H, L, x) = [H:L] = x * rdx
+		/*
+			rdx:x
+			   rax:t0
+		*/
+		mov(rdx, x);
+		mulx(rax, t0, ptr [py]); // [rax:t0] = py[0] * x
+		mulx(rdx, x, ptr [py + 8]); // [t:t1] = py[1] * x
+		add(x, rax);
+		adc(rdx, 0);
 	}
 	/*
 		[rdx:x:t1:t0] <- py[2:1:0] * x
 		destroy x, t
 	*/
-	void mul3x1(const RegExp& py, const Reg64& x, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t)
+	void mul3x1(const RegExp& py, const Reg64& x, const Reg64& t1, const Reg64& t0, const Reg64& t)
 	{
-		if (useMulx_) {
-			// mulx(H, L, x) = [H:L] = x * rdx
-			/*
-				rdx:x
-				    t:t1
-				      rax:t0
-			*/
-			mov(rdx, x);
-			mulx(rax, t0, ptr [py]); // [rax:t0] = py[0] * x
-			mulx(t, t1, ptr [py + 8]); // [t:t1] = py[1] * x
-			add(t1, rax);
-			mulx(rdx, x, ptr [py + 8 * 2]);
-			adc(x, t);
-			adc(rdx, 0);
-		} else {
-			mov(rax, ptr [py]);
-			mul(x);
-			mov(t0, rax);
-			mov(t1, rdx);
-			mov(rax, ptr [py + 8]);
-			mul(x);
-			mov(t, rax);
-			mov(t2, rdx);
-			mov(rax, ptr [py + 8 * 2]);
-			mul(x);
-			/*
-				rdx:rax
-				     t2:t
-				        t1:t0
-			*/
-			add(t1, t);
-			adc(rax, t2);
-			adc(rdx, 0);
-			mov(x, rax);
-		}
+		// mulx(H, L, x) = [H:L] = x * rdx
+		/*
+			rdx:x
+			    t:t1
+			      rax:t0
+		*/
+		mov(rdx, x);
+		mulx(rax, t0, ptr [py]); // [rax:t0] = py[0] * x
+		mulx(t, t1, ptr [py + 8]); // [t:t1] = py[1] * x
+		add(t1, rax);
+		mulx(rdx, x, ptr [py + 8 * 2]);
+		adc(x, t);
+		adc(rdx, 0);
 	}
 	/*
 		[x2:x1:x0] * x0
@@ -3399,7 +3185,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void montgomery3_sub(uint64_t pp, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0,
 		const Reg64& /*px*/, const Reg64& y, const Reg64& p,
-		const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst)
+		const Reg64& t0, const Reg64& t1, const Reg64& t3, const Reg64& t4, bool isFirst)
 	{
 		// input [c3:y:c1:0]
 		// [t4:c3:y:c1:c0]
@@ -3407,7 +3193,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(rax, pp);
 		mul(c0); // q = rax
 		mov(c2, rax);
-		mul3x1(p, c2, t2, t1, t0, t3);
+		mul3x1(p, c2, t1, t0, t3);
 		// [rdx:c2:t1:t0] = p * q
 		add(c0, t0); // always c0 is zero
 		adc(c1, t1);
@@ -3433,118 +3219,35 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void montgomery3_1(uint64_t pp, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0,
 		const Reg64& px, const Reg64& y, const Reg64& p,
-		const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst)
+		const Reg64& t0, const Reg64& t1, const Reg64& t3, const Reg64& t4, bool isFirst)
 	{
 		if (isFirst) {
-			mul3x1(px, y, c2, c1, c0, c3);
+			mul3x1(px, y, c1, c0, c3);
 			mov(c3, rdx);
 			// [c3:y:c1:c0] = px[2..0] * y
 		} else {
-			mul3x1(px, y, t2, t1, t0, t3);
+			mul3x1(px, y, t1, t0, t3);
 			// [rdx:y:t1:t0] = px[2..0] * y
 			add_rr(Pack(c3, y, c1, c0), Pack(rdx, c2, t1, t0));
 			if (isFullBit_) setc(t4.cvt8());
 		}
-		montgomery3_sub(pp, c3, c2, c1, c0, px, y, p, t0, t1, t2, t3, t4, isFirst);
-	}
-	/*
-		pc[0..n] += x[0..n-1] * y ; pc[] = 0 if isFirst
-		pc[n + 1] is temporary used if isFullBit_
-		q = uint64_t(pc[0] * pp)
-		pc[] = (pc[] + q * p) >> 64
-		input : pc[], px[], y, p[], pw1[], pw2[]
-		output : pc[0..n]   ; if isFullBit_
-		         pc[0..n-1] ; if !isFullBit_
-		destroy y
-		use
-		pw1[0] if useMulx_
-		pw1[0..n-2] otherwise
-		pw2[0..n-1]
-	*/
-	void montgomeryN_1(uint64_t pp, int n, const RegExp& pc, const RegExp& px, const Reg64& y, const Reg64& p, const Reg64& t, const MixPack& pw1, const RegExp& pw2, bool isFirst)
-	{
-		// pc[] += x[] * y
-		if (isFirst) {
-			gen_raw_mulUnit(pc, px, y, pw1, t, n);
-			mov(ptr [pc + n * 8], rdx);
-		} else {
-			gen_raw_mulUnit(pw2, px, y, pw1, t, n);
-			mov(t, ptr [pw2 + 0 * 8]);
-			add(ptr [pc + 0 * 8], t);
-			for (int i = 1; i < n; i++) {
-				mov(t, ptr [pw2 + i * 8]);
-				adc(ptr [pc + i * 8], t);
-			}
-			adc(ptr [pc + n * 8], rdx);
-			if (isFullBit_) {
-				mov(t, 0);
-				adc(t, 0);
-				mov(qword [pc + (n + 1) * 8], t);
-			}
-		}
-		mov(rax, pp);
-		mul(qword [pc]);
-		mov(y, rax); // y = q
-		gen_raw_mulUnit(pw2, p, y, pw1, t, n);
-		// c[] = (c[] + pw2[]) >> 64
-		mov(t, ptr [pw2 + 0 * 8]);
-		add(t, ptr [pc + 0 * 8]);
-		for (int i = 1; i < n; i++) {
-			mov(t, ptr [pw2 + i * 8]);
-			adc(t, ptr [pc + i * 8]);
-			mov(ptr [pc + (i - 1) * 8], t);
-		}
-		adc(rdx, ptr [pc + n * 8]);
-		mov(ptr [pc + (n - 1) * 8], rdx);
-		if (isFullBit_) {
-			if (isFirst) {
-				mov(t, 0);
-			} else {
-				mov(t, ptr [pc + (n + 1) * 8]);
-			}
-			adc(t, 0);
-			mov(qword [pc + n * 8], t);
-		} else {
-			xor_(eax, eax);
-			mov(ptr [pc + n * 8], rax);
-		}
+		montgomery3_sub(pp, c3, c2, c1, c0, px, y, p, t0, t1, t3, t4, isFirst);
 	}
 	/*
 		[rdx:x:t2:t1:t0] <- py[3:2:1:0] * x
 		destroy x, t
 	*/
-	void mul4x1(const RegExp& py, const Reg64& x, const Reg64& t3, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t)
-	{
-		if (useMulx_) {
-			mov(rdx, x);
-			mulx(t1, t0, ptr [py + 8 * 0]);
-			mulx(t2, rax, ptr [py + 8 * 1]);
-			add(t1, rax);
-			mulx(x, rax, ptr [py + 8 * 2]);
-			adc(t2, rax);
-			mulx(rdx, rax, ptr [py + 8 * 3]);
-			adc(x, rax);
-			adc(rdx, 0);
-		} else {
-			mov(rax, ptr [py]);
-			mul(x);
-			mov(t0, rax);
-			mov(t1, rdx);
-			mov(rax, ptr [py + 8]);
-			mul(x);
-			mov(t, rax);
-			mov(t2, rdx);
-			mov(rax, ptr [py + 8 * 2]);
-			mul(x);
-			mov(t3, rax);
-			mov(rax, x);
-			mov(x, rdx);
-			mul(qword [py + 8 * 3]);
-			add(t1, t);
-			adc(t2, t3);
-			adc(x, rax);
-			adc(rdx, 0);
-		}
+	void mul4x1(const RegExp& py, const Reg64& x, const Reg64& t2, const Reg64& t1, const Reg64& t0)
+	{
+		mov(rdx, x);
+		mulx(t1, t0, ptr [py + 8 * 0]);
+		mulx(t2, rax, ptr [py + 8 * 1]);
+		add(t1, rax);
+		mulx(x, rax, ptr [py + 8 * 2]);
+		adc(t2, rax);
+		mulx(rdx, rax, ptr [py + 8 * 3]);
+		adc(x, rax);
+		adc(rdx, 0);
 	}
 
 	/*
@@ -3560,14 +3263,14 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	*/
 	void montgomery4_1(uint64_t pp, const Reg64& c4, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0,
 		const Reg64& px, const Reg64& y, const Reg64& p,
-		const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst, const Xmm& xt)
+		const Reg64& t0, const Reg64& t1, const Reg64& t2, bool isFirst, const Xmm& xt)
 	{
 		if (isFirst) {
-			mul4x1(px, y, c3, c2, c1, c0, c4);
+			mul4x1(px, y, c2, c1, c0);
 			mov(c4, rdx);
 			// [c4:y:c2:c1:c0] = px[3..0] * y
 		} else {
-			mul4x1(px, y, t3, t2, t1, t0, t4);
+			mul4x1(px, y, t2, t1, t0);
 			// [rdx:y:t2:t1:t0] = px[3..0] * y
 			if (isFullBit_) {
 				vmovq(xt, px);
@@ -3583,7 +3286,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 		mov(rax, pp);
 		mul(c0); // q = rax
 		mov(c3, rax);
-		mul4x1(p, c3, t3, t2, t1, t0, t4);
+		mul4x1(p, c3, t2, t1, t0);
 		add(c0, t0); // always c0 is zero
 		adc(c1, t1);
 		adc(c2, t2);
@@ -3601,7 +3304,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	void3u gen_fp2Dbl_mulPre()
 	{
 		if (isFullBit_) return 0;
-		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
+		if (!(pn_ == 4 || pn_ == 6)) return 0;
 		void3u func = getCurr<void3u>();
 		bool embedded = pn_ == 4;
 
@@ -3881,7 +3584,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	void3u gen_fp2_mul()
 	{
 		if (isFullBit_) return 0;
-		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
+		if (!(pn_ == 4 || pn_ == 6)) return 0;
 		void3u func = getCurr<void3u>();
 		int stackSize = 8 + FpByte_ * 4;
 		StackFrame sf(this, 3, 10 | UseRDX, stackSize);
@@ -3903,7 +3606,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
 	void2u gen_fp2_sqr()
 	{
 		if (isFullBit_) return 0;
-		if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0;
+		if (!(pn_ == 4 || pn_ == 6)) return 0;
 		bool nocarry = (p_[pn_ - 1] >> 62) == 0;
 		if (!nocarry) return 0;
 		void2u func = getCurr<void2u>();

From d32e456fc8b8479185f3dd722adb96b3bceff26e Mon Sep 17 00:00:00 2001
From: MITSUNARI Shigeo <herumi@nifty.com>
Date: Wed, 14 Apr 2021 17:08:17 +0900
Subject: [PATCH 553/553] add sample of g1only she

---
 sample/she_g1only.cpp | 71 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 sample/she_g1only.cpp

diff --git a/sample/she_g1only.cpp b/sample/she_g1only.cpp
new file mode 100644
index 00000000..9dc54614
--- /dev/null
+++ b/sample/she_g1only.cpp
@@ -0,0 +1,71 @@
+#include <mcl/she.hpp>
+#include <mcl/elgamal.hpp>
+#include <cybozu/benchmark.hpp>
+
+using namespace mcl::she;
+
+const mcl::EcParam& g_para = mcl::ecparam::secp256k1;
+//const mcl::EcParam& g_para = mcl::ecparam::secp160k1;
+
+void elgamal()
+{
+	puts("elgamal");
+	struct TagZn;
+	typedef mcl::FpT<> Fp;
+	typedef mcl::FpT<TagZn> Zn;
+	typedef mcl::EcT<Fp> Ec;
+	typedef mcl::ElgamalT<Ec, Zn> ElgamalEc;
+	Ec P;
+	mcl::initCurve<Ec, Zn>(g_para.curveType, &P);
+	const size_t bitSize = Zn::getBitSize();
+	cybozu::RandomGenerator rg;
+
+	ElgamalEc::PrivateKey sec;
+	sec.init(P, bitSize, rg);
+	sec.setCache(0, 60000);
+	const ElgamalEc::PublicKey& pub = sec.getPublicKey();
+
+	const int m1 = 123;
+	const int m2 = 654;
+	ElgamalEc::CipherText c1, c2;
+	pub.enc(c1, m1);
+	pub.enc(c2, m2);
+	c1.add(c2);
+	Zn dec;
+	sec.dec(dec, c1);
+	std::cout << "dec=" << dec << std::endl;
+	CYBOZU_BENCH_C("enc", 1000, pub.enc, c1, m1);
+}
+
+int main()
+	try
+{
+	// initialize system
+	initG1only(g_para, 1024);
+
+	SecretKey sec;
+	sec.setByCSPRNG();
+	PublicKey pub;
+	sec.getPublicKey(pub);
+	PrecomputedPublicKey ppub;
+	ppub.init(pub);
+
+	int m1 = 123;
+	int m2 = 654;
+
+	CipherTextG1 c1, c2;
+	ppub.enc(c1, m1);
+	ppub.enc(c2, m2);
+	CYBOZU_BENCH_C("pub.enc", 1000, pub.enc, c1, m1);
+	CYBOZU_BENCH_C("ppub.enc", 1000, ppub.enc, c1, m1);
+	add(c1, c1, c2);
+	int m = sec.dec(c1);
+	printf("Dec(Enc(%d) + Enc(%d)) = %d(%s)\n", m1, m2, m, m == m1 + m2 ? "ok" : "ng");
+
+	elgamal();
+
+} catch (std::exception& e) {
+	printf("ERR %s\n", e.what());
+	return 1;
+}
+